Merge llvm trunk r238337 from ^/vendor/llvm/dist, resolve conflicts, and

preserve our customizations, where necessary.
author: dim <dim@FreeBSD.org> 2015-05-27 20:26:41 +0000
committer: dim <dim@FreeBSD.org> 2015-05-27 20:26:41 +0000
commit: 5ef8fd3549d38e883a31881636be3dc2a275de20 (patch)
tree: bd13a22d9db57ccf3eddbc07b32c18109521d050 /contrib/llvm/lib
parent: 77794ebe2d5718eb502c93ec32f8ccae4d8a0b7b (diff)
parent: 782067d0278612ee75d024b9b135c221c327e9e8 (diff)
download: FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.zip
FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.tar.gz
1380 files changed, 138093 insertions, 79702 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
index 6eea817..f54e234 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
@@ -37,7 +38,6 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 
 // Register the AliasAnalysis interface, providing a nice name to refer to.
@@ -82,6 +82,23 @@ void AliasAnalysis::addEscapingUse(Use &U) {
   AA->addEscapingUse(U);
 }
 
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
+  // We may have two calls
+  if (auto CS = ImmutableCallSite(I)) {
+    // Check if the two calls modify the same memory
+    return getModRefInfo(Call, CS);
+  } else {
+    // Otherwise, check if the call modifies or references the
+    // location this memory access defines.  The best we can say
+    // is that if the call references what this instruction
+    // defines, it must be clobbered by this location.
+    const AliasAnalysis::Location DefLoc = AA->getLocation(I);
+    if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef)
+      return AliasAnalysis::ModRef;
+  }
+  return AliasAnalysis::NoModRef;
+}
 
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
@@ -330,7 +347,7 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
 
   // If the load address doesn't alias the given address, it doesn't read
   // or write the specified memory.
-  if (!alias(getLocation(L), Loc))
+  if (Loc.Ptr && !alias(getLocation(L), Loc))
     return NoModRef;
 
   // Otherwise, a load just reads.
@@ -343,15 +360,18 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
   if (!S->isUnordered())
     return ModRef;
 
-  // If the store address cannot alias the pointer in question, then the
-  // specified memory cannot be modified by the store.
-  if (!alias(getLocation(S), Loc))
-    return NoModRef;
+  if (Loc.Ptr) {
+    // If the store address cannot alias the pointer in question, then the
+    // specified memory cannot be modified by the store.
+    if (!alias(getLocation(S), Loc))
+      return NoModRef;
 
-  // If the pointer is a pointer to constant memory, then it could not have been
-  // modified by this store.
-  if (pointsToConstantMemory(Loc))
-    return NoModRef;
+    // If the pointer is a pointer to constant memory, then it could not have
+    // been modified by this store.
+    if (pointsToConstantMemory(Loc))
+      return NoModRef;
+
+  }
 
   // Otherwise, a store just writes.
   return Mod;
@@ -359,15 +379,18 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
 
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
-  // If the va_arg address cannot alias the pointer in question, then the
-  // specified memory cannot be accessed by the va_arg.
-  if (!alias(getLocation(V), Loc))
-    return NoModRef;
 
-  // If the pointer is a pointer to constant memory, then it could not have been
-  // modified by this va_arg.
-  if (pointsToConstantMemory(Loc))
-    return NoModRef;
+  if (Loc.Ptr) {
+    // If the va_arg address cannot alias the pointer in question, then the
+    // specified memory cannot be accessed by the va_arg.
+    if (!alias(getLocation(V), Loc))
+      return NoModRef;
+
+    // If the pointer is a pointer to constant memory, then it could not have
+    // been modified by this va_arg.
+    if (pointsToConstantMemory(Loc))
+      return NoModRef;
+  }
 
   // Otherwise, a va_arg reads and writes.
   return ModRef;
@@ -380,7 +403,7 @@ AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) {
     return ModRef;
 
   // If the cmpxchg address does not alias the location, it does not access it.
-  if (!alias(getLocation(CX), Loc))
+  if (Loc.Ptr && !alias(getLocation(CX), Loc))
     return NoModRef;
 
   return ModRef;
@@ -393,7 +416,7 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) {
     return ModRef;
 
   // If the atomicrmw address does not alias the location, it does not access it.
-  if (!alias(getLocation(RMW), Loc))
+  if (Loc.Ptr && !alias(getLocation(RMW), Loc))
     return NoModRef;
 
   return ModRef;
@@ -407,9 +430,10 @@ AliasAnalysis::ModRefResult
 AliasAnalysis::callCapturesBefore(const Instruction *I,
                                   const AliasAnalysis::Location &MemLoc,
                                   DominatorTree *DT) {
-  if (!DT || !DL) return AliasAnalysis::ModRef;
+  if (!DT)
+    return AliasAnalysis::ModRef;
 
-  const Value *Object = GetUnderlyingObject(MemLoc.Ptr, DL);
+  const Value *Object = GetUnderlyingObject(MemLoc.Ptr, *DL);
   if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) ||
       isa<Constant>(Object))
     return AliasAnalysis::ModRef;
@@ -462,10 +486,10 @@ AliasAnalysis::~AliasAnalysis() {}
 /// InitializeAliasAnalysis - Subclasses must call this method to initialize the
 /// AliasAnalysis interface before any other methods are called.
 ///
-void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
-  DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-  TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>();
+void AliasAnalysis::InitializeAliasAnalysis(Pass *P, const DataLayout *NewDL) {
+  DL = NewDL;
+  auto *TLIP = P->getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+  TLI = TLIP ? &TLIP->getTLI() : nullptr;
   AA = &P->getAnalysis<AliasAnalysis>();
 }
 
@@ -494,7 +518,7 @@ bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
 /// execution of the specified instructions to mod\ref (according to the
 /// mode) the location Loc. The instructions to consider are all
 /// of the instructions in the range of [I1,I2] INCLUSIVE.
-/// I1 and I2 must be in the same basic block.  
+/// I1 and I2 must be in the same basic block.
 bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1,
                                               const Instruction &I2,
                                               const Location &Loc,
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
index b860914..a1bfba1 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -43,7 +44,7 @@ namespace {
       errs() <<  "  " << Val << " " << Desc << " responses ("
              << Val*100/Sum << "%)\n";
     }
-    ~AliasAnalysisCounter() {
+    ~AliasAnalysisCounter() override {
       unsigned AASum = No+May+Partial+Must;
       unsigned MRSum = NoMR+JustRef+JustMod+MR;
       if (AASum + MRSum) { // Print a report if any counted queries occurred...
@@ -76,7 +77,7 @@ namespace {
 
     bool runOnModule(Module &M) override {
       this->M = &M;
-      InitializeAliasAnalysis(this);
+      InitializeAliasAnalysis(this, &M.getDataLayout());
       return false;
     }
 
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index fe4bd4c..273eacc 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -158,7 +158,7 @@ bool AAEval::runOnFunction(Function &F) {
     if (EvalAAMD && isa<StoreInst>(&*I))
       Stores.insert(&*I);
     Instruction &Inst = *I;
-    if (CallSite CS = cast<Value>(&Inst)) {
+    if (auto CS = CallSite(&Inst)) {
       Value *Callee = CS.getCalledValue();
       // Skip actual functions for direct function calls.
       if (!isa<Function>(Callee) && isInterestingPointer(Callee))
diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
index 5d61cf9..f98b578 100644
--- a/contrib/llvm/lib/Analysis/AliasDebugger.cpp
+++ b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
@@ -44,7 +44,7 @@ namespace {
     }
 
     bool runOnModule(Module &M) override {
-      InitializeAliasAnalysis(this);                 // set up super class
+      InitializeAliasAnalysis(this, &M.getDataLayout()); // set up super class
 
       for(Module::global_iterator I = M.global_begin(),
             E = M.global_end(); I != E; ++I) {
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
index 45442b0..50890c1 100644
--- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -182,12 +182,13 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
   return false;
 }
 
-bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const {
+bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
+                                  AliasAnalysis &AA) const {
   if (!Inst->mayReadOrWriteMemory())
     return false;
 
   for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
-    CallSite C1 = getUnknownInst(i), C2 = Inst;
+    ImmutableCallSite C1(getUnknownInst(i)), C2(Inst);
     if (!C1 || !C2 ||
         AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef ||
         AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef)
@@ -242,7 +243,7 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
 /// containsPointer - Return true if the specified location is represented by
 /// this alias set, false otherwise.  This does not modify the AST object or
 /// alias sets.
-bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
+bool AliasSetTracker::containsPointer(const Value *Ptr, uint64_t Size,
                                       const AAMDNodes &AAInfo) const {
   for (const_iterator I = begin(), E = end(); I != E; ++I)
     if (!I->Forward && I->aliasesPointer(Ptr, Size, AAInfo, AA))
@@ -250,7 +251,7 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
   return false;
 }
 
-bool AliasSetTracker::containsUnknown(Instruction *Inst) const {
+bool AliasSetTracker::containsUnknown(const Instruction *Inst) const {
   for (const_iterator I = begin(), E = end(); I != E; ++I)
     if (!I->Forward && I->aliasesUnknownInst(Inst, AA))
       return true;
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
index f64bf0e..842ff0a 100644
--- a/contrib/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -37,6 +37,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeCFLAliasAnalysisPass(Registry);
   initializeDependenceAnalysisPass(Registry);
   initializeDelinearizationPass(Registry);
+  initializeDivergenceAnalysisPass(Registry);
   initializeDominanceFrontierPass(Registry);
   initializeDomViewerPass(Registry);
   initializeDomPrinterPass(Registry);
@@ -49,12 +50,12 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeIVUsersPass(Registry);
   initializeInstCountPass(Registry);
   initializeIntervalPartitionPass(Registry);
-  initializeJumpInstrTableInfoPass(Registry);
   initializeLazyValueInfoPass(Registry);
   initializeLibCallAliasAnalysisPass(Registry);
   initializeLintPass(Registry);
-  initializeLoopInfoPass(Registry);
+  initializeLoopInfoWrapperPassPass(Registry);
   initializeMemDepPrinterPass(Registry);
+  initializeMemDerefPrinterPass(Registry);
   initializeMemoryDependenceAnalysisPass(Registry);
   initializeModuleDebugInfoPrinterPass(Registry);
   initializePostDominatorTreePass(Registry);
@@ -65,7 +66,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeRegionOnlyPrinterPass(Registry);
   initializeScalarEvolutionPass(Registry);
   initializeScalarEvolutionAliasAnalysisPass(Registry);
-  initializeTargetTransformInfoAnalysisGroup(Registry);
+  initializeTargetTransformInfoWrapperPassPass(Registry);
   initializeTypeBasedAliasAnalysisPass(Registry);
   initializeScopedNoAliasAAPass(Registry);
 }
diff --git a/contrib/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm/lib/Analysis/AssumptionCache.cpp
index da5ba18..f468a43 100644
--- a/contrib/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/contrib/llvm/lib/Analysis/AssumptionCache.cpp
@@ -18,6 +18,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
@@ -73,6 +74,20 @@ void AssumptionCache::registerAssumption(CallInst *CI) {
 #endif
 }
 
+char AssumptionAnalysis::PassID;
+
+PreservedAnalyses AssumptionPrinterPass::run(Function &F,
+                                             AnalysisManager<Function> *AM) {
+  AssumptionCache &AC = AM->getResult<AssumptionAnalysis>(F);
+
+  OS << "Cached assumptions for function: " << F.getName() << "\n";
+  for (auto &VH : AC.assumptions())
+    if (VH)
+      OS << "  " << *cast<CallInst>(VH)->getArgOperand(0) << "\n";
+
+  return PreservedAnalyses::all();
+}
+
 void AssumptionCacheTracker::FunctionCallbackVH::deleted() {
   auto I = ACT->AssumptionCaches.find_as(cast<Function>(getValPtr()));
   if (I != ACT->AssumptionCaches.end())
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index a9efc5a..a61faca 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -38,7 +39,6 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -103,7 +103,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
                               const TargetLibraryInfo &TLI,
                               bool RoundToAlign = false) {
   uint64_t Size;
-  if (getObjectSize(V, Size, &DL, &TLI, RoundToAlign))
+  if (getObjectSize(V, Size, DL, &TLI, RoundToAlign))
     return Size;
   return AliasAnalysis::UnknownSize;
 }
@@ -221,7 +221,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
       case Instruction::Or:
         // X|C == X+C if all the bits in C are unset in X.  Otherwise we can't
         // analyze it.
-        if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL, 0, AC,
+        if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC,
                                BOp, DT))
           break;
         // FALL THROUGH.
@@ -292,7 +292,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
 static const Value *
 DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
                        SmallVectorImpl<VariableGEPIndex> &VarIndices,
-                       bool &MaxLookupReached, const DataLayout *DL,
+                       bool &MaxLookupReached, const DataLayout &DL,
                        AssumptionCache *AC, DominatorTree *DT) {
   // Limit recursion depth to limit compile time in crazy cases.
   unsigned MaxLookup = MaxLookupSearchDepth;
@@ -341,16 +341,6 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
     if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized())
       return V;
 
-    // If we are lacking DataLayout information, we can't compute the offets of
-    // elements computed by GEPs.  However, we can handle bitcast equivalent
-    // GEPs.
-    if (!DL) {
-      if (!GEPOp->hasAllZeroIndices())
-        return V;
-      V = GEPOp->getOperand(0);
-      continue;
-    }
-
     unsigned AS = GEPOp->getPointerAddressSpace();
     // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
     gep_type_iterator GTI = gep_type_begin(GEPOp);
@@ -363,30 +353,30 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
         unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
         if (FieldNo == 0) continue;
 
-        BaseOffs += DL->getStructLayout(STy)->getElementOffset(FieldNo);
+        BaseOffs += DL.getStructLayout(STy)->getElementOffset(FieldNo);
         continue;
       }
 
       // For an array/pointer, add the element offset, explicitly scaled.
       if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
         if (CIdx->isZero()) continue;
-        BaseOffs += DL->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
+        BaseOffs += DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue();
         continue;
       }
 
-      uint64_t Scale = DL->getTypeAllocSize(*GTI);
+      uint64_t Scale = DL.getTypeAllocSize(*GTI);
       ExtensionKind Extension = EK_NotExtended;
 
       // If the integer type is smaller than the pointer size, it is implicitly
       // sign extended to pointer size.
       unsigned Width = Index->getType()->getIntegerBitWidth();
-      if (DL->getPointerSizeInBits(AS) > Width)
+      if (DL.getPointerSizeInBits(AS) > Width)
         Extension = EK_SignExt;
 
       // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
       APInt IndexScale(Width, 0), IndexOffset(Width, 0);
-      Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
-                                  *DL, 0, AC, DT);
+      Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, DL,
+                                  0, AC, DT);
 
       // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
       // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
@@ -408,7 +398,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
 
       // Make sure that we have a scale that makes sense for this target's
       // pointer size.
-      if (unsigned ShiftBits = 64 - DL->getPointerSizeInBits(AS)) {
+      if (unsigned ShiftBits = 64 - DL.getPointerSizeInBits(AS)) {
         Scale <<= ShiftBits;
         Scale = (int64_t)Scale >> ShiftBits;
       }
@@ -461,14 +451,12 @@ namespace {
       initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
     }
 
-    void initializePass() override {
-      InitializeAliasAnalysis(this);
-    }
+    bool doInitialization(Module &M) override;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<AssumptionCacheTracker>();
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
 
     AliasResult alias(const Location &LocA, const Location &LocB) override {
@@ -591,7 +579,7 @@ INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa",
                    "Basic Alias Analysis (stateless AA impl)",
                    false, true, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa",
                    "Basic Alias Analysis (stateless AA impl)",
                    false, true, false)
@@ -612,7 +600,7 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
   SmallVector<const Value *, 16> Worklist;
   Worklist.push_back(Loc.Ptr);
   do {
-    const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
+    const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), *DL);
     if (!Visited.insert(V).second) {
       Visited.clear();
       return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
@@ -649,8 +637,8 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
         Visited.clear();
         return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
       }
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-        Worklist.push_back(PN->getIncomingValue(i));
+      for (Value *IncValue : PN->incoming_values())
+        Worklist.push_back(IncValue);
       continue;
     }
 
@@ -706,7 +694,7 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) {
     return DoesNotAccessMemory;
 
   // For intrinsics, we can check the table.
-  if (unsigned iid = F->getIntrinsicID()) {
+  if (Intrinsic::ID iid = F->getIntrinsicID()) {
 #define GET_INTRINSIC_MODREF_BEHAVIOR
 #include "llvm/IR/Intrinsics.gen"
 #undef GET_INTRINSIC_MODREF_BEHAVIOR
@@ -718,7 +706,8 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) {
   if (F->onlyReadsMemory())
     Min = OnlyReadsMemory;
 
-  const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>();
+  const TargetLibraryInfo &TLI =
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   if (isMemsetPattern16(F, TLI))
     Min = OnlyAccessesArgumentPointees;
 
@@ -730,7 +719,8 @@ AliasAnalysis::Location
 BasicAliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
                                    ModRefResult &Mask) {
   Location Loc = AliasAnalysis::getArgLocation(CS, ArgIdx, Mask);
-  const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>();
+  const TargetLibraryInfo &TLI =
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
   if (II != nullptr)
     switch (II->getIntrinsicID()) {
@@ -813,6 +803,11 @@ static bool isAssumeIntrinsic(ImmutableCallSite CS) {
   return false;
 }
 
+bool BasicAliasAnalysis::doInitialization(Module &M) {
+  InitializeAliasAnalysis(this, &M.getDataLayout());
+  return true;
+}
+
 /// getModRefInfo - Check to see if the specified callsite can clobber the
 /// specified memory object.  Since we only look at local properties of this
 /// function, we really can't say much about this query.  We do, however, use
@@ -823,7 +818,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
   assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
          "AliasAnalysis query involving multiple functions!");
 
-  const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);
+  const Value *Object = GetUnderlyingObject(Loc.Ptr, *DL);
 
   // If this is a tail call and Loc.Ptr points to a stack location, we know that
   // the tail call cannot access or modify the local stack.
@@ -888,6 +883,99 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
   return AliasAnalysis::getModRefInfo(CS1, CS2);
 }
 
+/// \brief Provide ad-hoc rules to disambiguate accesses through two GEP
+/// operators, both having the exact same pointer operand.
+static AliasAnalysis::AliasResult
+aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
+                         const GEPOperator *GEP2, uint64_t V2Size,
+                         const DataLayout &DL) {
+
+  assert(GEP1->getPointerOperand() == GEP2->getPointerOperand() &&
+         "Expected GEPs with the same pointer operand");
+
+  // Try to determine whether GEP1 and GEP2 index through arrays, into structs,
+  // such that the struct field accesses provably cannot alias.
+  // We also need at least two indices (the pointer, and the struct field).
+  if (GEP1->getNumIndices() != GEP2->getNumIndices() ||
+      GEP1->getNumIndices() < 2)
+    return AliasAnalysis::MayAlias;
+
+  // If we don't know the size of the accesses through both GEPs, we can't
+  // determine whether the struct fields accessed can't alias.
+  if (V1Size == AliasAnalysis::UnknownSize ||
+      V2Size == AliasAnalysis::UnknownSize)
+    return AliasAnalysis::MayAlias;
+
+  ConstantInt *C1 =
+      dyn_cast<ConstantInt>(GEP1->getOperand(GEP1->getNumOperands() - 1));
+  ConstantInt *C2 =
+      dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1));
+
+  // If the last (struct) indices aren't constants, we can't say anything.
+  // If they're identical, the other indices might be also be dynamically
+  // equal, so the GEPs can alias.
+  if (!C1 || !C2 || C1 == C2)
+    return AliasAnalysis::MayAlias;
+
+  // Find the last-indexed type of the GEP, i.e., the type you'd get if
+  // you stripped the last index.
+  // On the way, look at each indexed type.  If there's something other
+  // than an array, different indices can lead to different final types.
+  SmallVector<Value *, 8> IntermediateIndices;
+
+  // Insert the first index; we don't need to check the type indexed
+  // through it as it only drops the pointer indirection.
+  assert(GEP1->getNumIndices() > 1 && "Not enough GEP indices to examine");
+  IntermediateIndices.push_back(GEP1->getOperand(1));
+
+  // Insert all the remaining indices but the last one.
+  // Also, check that they all index through arrays.
+  for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) {
+    if (!isa<ArrayType>(GetElementPtrInst::getIndexedType(
+            GEP1->getSourceElementType(), IntermediateIndices)))
+      return AliasAnalysis::MayAlias;
+    IntermediateIndices.push_back(GEP1->getOperand(i + 1));
+  }
+
+  StructType *LastIndexedStruct =
+      dyn_cast<StructType>(GetElementPtrInst::getIndexedType(
+          GEP1->getSourceElementType(), IntermediateIndices));
+
+  if (!LastIndexedStruct)
+    return AliasAnalysis::MayAlias;
+
+  // We know that:
+  // - both GEPs begin indexing from the exact same pointer;
+  // - the last indices in both GEPs are constants, indexing into a struct;
+  // - said indices are different, hence, the pointed-to fields are different;
+  // - both GEPs only index through arrays prior to that.
+  //
+  // This lets us determine that the struct that GEP1 indexes into and the
+  // struct that GEP2 indexes into must either precisely overlap or be
+  // completely disjoint.  Because they cannot partially overlap, indexing into
+  // different non-overlapping fields of the struct will never alias.
+
+  // Therefore, the only remaining thing needed to show that both GEPs can't
+  // alias is that the fields are not overlapping.
+  const StructLayout *SL = DL.getStructLayout(LastIndexedStruct);
+  const uint64_t StructSize = SL->getSizeInBytes();
+  const uint64_t V1Off = SL->getElementOffset(C1->getZExtValue());
+  const uint64_t V2Off = SL->getElementOffset(C2->getZExtValue());
+
+  auto EltsDontOverlap = [StructSize](uint64_t V1Off, uint64_t V1Size,
+                                      uint64_t V2Off, uint64_t V2Size) {
+    return V1Off < V2Off && V1Off + V1Size <= V2Off &&
+           ((V2Off + V2Size <= StructSize) ||
+            (V2Off + V2Size - StructSize <= V1Off));
+  };
+
+  if (EltsDontOverlap(V1Off, V1Size, V2Off, V2Size) ||
+      EltsDontOverlap(V2Off, V2Size, V1Off, V1Size))
+    return AliasAnalysis::NoAlias;
+
+  return AliasAnalysis::MayAlias;
+}
+
 /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
 /// against another pointer.  We know that V1 is a GEP, but we don't know
 /// anything about V2.  UnderlyingV1 is GetUnderlyingObject(GEP1, DL),
@@ -947,10 +1035,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
         SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
         const Value *GEP2BasePtr =
             DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
-                                   GEP2MaxLookupReached, DL, AC2, DT);
+                                   GEP2MaxLookupReached, *DL, AC2, DT);
         const Value *GEP1BasePtr =
             DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
-                                   GEP1MaxLookupReached, DL, AC1, DT);
+                                   GEP1MaxLookupReached, *DL, AC1, DT);
         // DecomposeGEPExpression and GetUnderlyingObject should return the
         // same result except when DecomposeGEPExpression has no DataLayout.
         if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
@@ -979,14 +1067,14 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
     // about the relation of the resulting pointer.
     const Value *GEP1BasePtr =
         DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
-                               GEP1MaxLookupReached, DL, AC1, DT);
+                               GEP1MaxLookupReached, *DL, AC1, DT);
 
     int64_t GEP2BaseOffset;
     bool GEP2MaxLookupReached;
     SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
     const Value *GEP2BasePtr =
         DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
-                               GEP2MaxLookupReached, DL, AC2, DT);
+                               GEP2MaxLookupReached, *DL, AC2, DT);
 
     // DecomposeGEPExpression and GetUnderlyingObject should return the
     // same result except when DecomposeGEPExpression has no DataLayout.
@@ -995,6 +1083,17 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
              "DecomposeGEPExpression and GetUnderlyingObject disagree!");
       return MayAlias;
     }
+
+    // If we know the two GEPs are based off of the exact same pointer (and not
+    // just the same underlying object), see if that tells us anything about
+    // the resulting pointers.
+    if (DL && GEP1->getPointerOperand() == GEP2->getPointerOperand()) {
+      AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, *DL);
+      // If we couldn't find anything interesting, don't abandon just yet.
+      if (R != MayAlias)
+        return R;
+    }
+
     // If the max search depth is reached the result is undefined
     if (GEP2MaxLookupReached || GEP1MaxLookupReached)
       return MayAlias;
@@ -1025,7 +1124,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
 
     const Value *GEP1BasePtr =
         DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
-                               GEP1MaxLookupReached, DL, AC1, DT);
+                               GEP1MaxLookupReached, *DL, AC1, DT);
 
     // DecomposeGEPExpression and GetUnderlyingObject should return the
     // same result except when DecomposeGEPExpression has no DataLayout.
@@ -1094,7 +1193,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
         const Value *V = GEP1VariableIndices[i].V;
 
         bool SignKnownZero, SignKnownOne;
-        ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL,
+        ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, *DL,
                        0, AC1, nullptr, DT);
 
         // Zero-extension widens the variable, and so forces the sign
@@ -1239,8 +1338,7 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
 
   SmallPtrSet<Value*, 4> UniqueSrc;
   SmallVector<Value*, 4> V1Srcs;
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    Value *PV1 = PN->getIncomingValue(i);
+  for (Value *PV1 : PN->incoming_values()) {
     if (isa<PHINode>(PV1))
       // If any of the source itself is a PHI, return MayAlias conservatively
       // to avoid compile time explosion. The worst possible case is if both
@@ -1290,6 +1388,11 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
   V1 = V1->stripPointerCasts();
   V2 = V2->stripPointerCasts();
 
+  // If V1 or V2 is undef, the result is NoAlias because we can always pick a
+  // value for undef that aliases nothing in the program.
+  if (isa<UndefValue>(V1) || isa<UndefValue>(V2))
+    return NoAlias;
+
   // Are we checking for alias of the same value?
   // Because we look 'through' phi nodes we could look at "Value" pointers from
   // different iterations. We must therefore make sure that this is not the
@@ -1303,8 +1406,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
     return NoAlias;  // Scalars cannot alias each other
 
   // Figure out what objects these things are pointing to if we can.
-  const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
-  const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
+  const Value *O1 = GetUnderlyingObject(V1, *DL, MaxLookupSearchDepth);
+  const Value *O2 = GetUnderlyingObject(V2, *DL, MaxLookupSearchDepth);
 
   // Null values in the default address space don't point to any object, so they
   // don't alias any other pointer.
@@ -1427,6 +1530,9 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
   if (!Inst)
     return true;
 
+  if (VisitedPhiBBs.empty())
+    return true;
+
   if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
     return false;
 
@@ -1434,7 +1540,8 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
   DominatorTreeWrapperPass *DTWP =
       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
   DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>();
+  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+  LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
 
   // Make sure that the visited phis cannot reach the Value. This ensures that
   // the Values cannot come from different iterations of a potential cycle the
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index 8ed8e3e..3d819eb 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -85,7 +85,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
     std::string Result;
     raw_string_ostream OS(Result);
 
-    OS << Node->getName().str() << ":";
+    OS << Node->getName() << ":";
     switch (ViewBlockFreqPropagationDAG) {
     case GVDT_Fraction:
       Graph->printBlockFreq(OS, Node);
@@ -108,7 +108,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
 INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq",
                       "Block Frequency Analysis", true, true)
 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq",
                     "Block Frequency Analysis", true, true)
 
@@ -123,13 +123,13 @@ BlockFrequencyInfo::~BlockFrequencyInfo() {}
 
 void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<BranchProbabilityInfo>();
-  AU.addRequired<LoopInfo>();
+  AU.addRequired<LoopInfoWrapperPass>();
   AU.setPreservesAll();
 }
 
 bool BlockFrequencyInfo::runOnFunction(Function &F) {
   BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
-  LoopInfo &LI = getAnalysis<LoopInfo>();
+  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   if (!BFI)
     BFI.reset(new ImplType);
   BFI->doFunction(&F, &BPI, &LI);
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 278073c..456cee1 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -331,32 +331,35 @@ bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
   return true;
 }
 
-/// \brief Get the maximum allowed loop scale.
-///
-/// Gives the maximum number of estimated iterations allowed for a loop.  Very
-/// large numbers cause problems downstream (even within 64-bits).
-static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); }
-
 /// \brief Compute the loop scale for a loop.
 void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
   // Compute loop scale.
   DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
 
+  // Infinite loops need special handling. If we give the back edge an infinite
+  // mass, they may saturate all the other scales in the function down to 1,
+  // making all the other region temperatures look exactly the same. Choose an
+  // arbitrary scale to avoid these issues.
+  //
+  // FIXME: An alternate way would be to select a symbolic scale which is later
+  // replaced to be the maximum of all computed scales plus 1. This would
+  // appropriately describe the loop as having a large scale, without skewing
+  // the final frequency computation.
+  const Scaled64 InifiniteLoopScale(1, 12);
+
   // LoopScale == 1 / ExitMass
   // ExitMass == HeadMass - BackedgeMass
   BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
 
-  // Block scale stores the inverse of the scale.
-  Loop.Scale = ExitMass.toScaled().inverse();
+  // Block scale stores the inverse of the scale. If this is an infinite loop,
+  // its exit mass will be zero. In this case, use an arbitrary scale for the
+  // loop scale.
+  Loop.Scale =
+      ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse();
 
   DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
                << " - " << Loop.BackedgeMass << ")\n"
                << " - scale = " << Loop.Scale << "\n");
-
-  if (Loop.Scale > getMaxLoopScale()) {
-    Loop.Scale = getMaxLoopScale();
-    DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
-  }
 }
 
 /// \brief Package up a loop.
@@ -424,15 +427,24 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
                                      const Scaled64 &Min, const Scaled64 &Max) {
   // Scale the Factor to a size that creates integers.  Ideally, integers would
   // be scaled so that Max == UINT64_MAX so that they can be best
-  // differentiated.  However, the register allocator currently deals poorly
-  // with large numbers.  Instead, push Min up a little from 1 to give some
-  // room to differentiate small, unequal numbers.
-  //
-  // TODO: fix issues downstream so that ScalingFactor can be
-  // Scaled64(1,64)/Max.
-  Scaled64 ScalingFactor = Min.inverse();
-  if ((Max / Min).lg() < 60)
+  // differentiated.  However, in the presence of large frequency values, small
+  // frequencies are scaled down to 1, making it impossible to differentiate
+  // small, unequal numbers. When the spread between Min and Max frequencies
+  // fits well within MaxBits, we make the scale be at least 8.
+  const unsigned MaxBits = 64;
+  const unsigned SpreadBits = (Max / Min).lg();
+  Scaled64 ScalingFactor;
+  if (SpreadBits <= MaxBits - 3) {
+    // If the values are small enough, make the scaling factor at least 8 to
+    // allow distinguishing small values.
+    ScalingFactor = Min.inverse();
     ScalingFactor <<= 3;
+  } else {
+    // If the values need more than MaxBits to be represented, saturate small
+    // frequency values down to 1 by using a scaling factor that benefits large
+    // frequency values.
+    ScalingFactor = Scaled64(1, MaxBits) / Max;
+  }
 
   // Translate the floats to integers.
   DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 2b39d47..091943b 100644
--- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -21,6 +21,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -28,7 +29,7 @@ using namespace llvm;
 
 INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob",
                       "Branch Probability Analysis", false, true)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
                     "Branch Probability Analysis", false, true)
 
@@ -114,11 +115,6 @@ static const uint32_t NORMAL_WEIGHT = 16;
 // Minimum weight of an edge. Please note, that weight is NEVER 0.
 static const uint32_t MIN_WEIGHT = 1;
 
-static uint32_t getMaxWeightFor(BasicBlock *BB) {
-  return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
-}
-
-
 /// \brief Calculate edge weights for successors lead to unreachable.
 ///
 /// Predict that a successor which leads necessarily to an
@@ -184,15 +180,18 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
   if (!WeightsNode)
     return false;
 
+  // Check that the number of successors is manageable.
+  assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");
+
   // Ensure there are weights for all of the successors. Note that the first
   // operand to the metadata node is a name, not a weight.
   if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1)
     return false;
 
-  // Build up the final weights that will be used in a temporary buffer, but
-  // don't add them until all weihts are present. Each weight value is clamped
-  // to [1, getMaxWeightFor(BB)].
-  uint32_t WeightLimit = getMaxWeightFor(BB);
+  // Build up the final weights that will be used in a temporary buffer.
+  // Compute the sum of all weights to later decide whether they need to
+  // be scaled to fit in 32 bits.
+  uint64_t WeightSum = 0;
   SmallVector<uint32_t, 2> Weights;
   Weights.reserve(TI->getNumSuccessors());
   for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
@@ -200,12 +199,26 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
         mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i));
     if (!Weight)
       return false;
-    Weights.push_back(
-      std::max<uint32_t>(1, Weight->getLimitedValue(WeightLimit)));
+    assert(Weight->getValue().getActiveBits() <= 32 &&
+           "Too many bits for uint32_t");
+    Weights.push_back(Weight->getZExtValue());
+    WeightSum += Weights.back();
   }
   assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
-  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-    setEdgeWeight(BB, i, Weights[i]);
+
+  // If the sum of weights does not fit in 32 bits, scale every weight down
+  // accordingly.
+  uint64_t ScalingFactor =
+      (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
+
+  WeightSum = 0;
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+    uint32_t W = Weights[i] / ScalingFactor;
+    WeightSum += W;
+    setEdgeWeight(BB, i, W);
+  }
+  assert(WeightSum <= UINT32_MAX &&
+         "Expected weights to scale down to 32 bits");
 
   return true;
 }
@@ -378,6 +391,14 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
   if (!CV)
     return false;
 
+  // If the LHS is the result of AND'ing a value with a single bit bitmask,
+  // we don't have information about probabilities.
+  if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
+    if (LHS->getOpcode() == Instruction::And)
+      if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1)))
+        if (AndRHS->getUniqueInteger().isPowerOf2())
+          return false;
+
   bool isProb;
   if (CV->isZero()) {
     switch (CI->getPredicate()) {
@@ -484,7 +505,7 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {
 }
 
 void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<LoopInfo>();
+  AU.addRequired<LoopInfoWrapperPass>();
   AU.setPreservesAll();
 }
 
@@ -492,31 +513,29 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
   DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
                << " ----\n\n");
   LastF = &F; // Store the last function we ran on for printing.
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   assert(PostDominatedByUnreachable.empty());
   assert(PostDominatedByColdCall.empty());
 
   // Walk the basic blocks in post-order so that we can build up state about
   // the successors of a block iteratively.
-  for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()),
-                                 E = po_end(&F.getEntryBlock());
-       I != E; ++I) {
-    DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n");
-    if (calcUnreachableHeuristics(*I))
+  for (auto BB : post_order(&F.getEntryBlock())) {
+    DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
+    if (calcUnreachableHeuristics(BB))
       continue;
-    if (calcMetadataWeights(*I))
+    if (calcMetadataWeights(BB))
       continue;
-    if (calcColdCallHeuristics(*I))
+    if (calcColdCallHeuristics(BB))
       continue;
-    if (calcLoopBranchHeuristics(*I))
+    if (calcLoopBranchHeuristics(BB))
       continue;
-    if (calcPointerHeuristics(*I))
+    if (calcPointerHeuristics(BB))
       continue;
-    if (calcZeroHeuristics(*I))
+    if (calcZeroHeuristics(BB))
       continue;
-    if (calcFloatingPointHeuristics(*I))
+    if (calcFloatingPointHeuristics(BB))
       continue;
-    calcInvokeHeuristics(*I);
+    calcInvokeHeuristics(BB);
   }
 
   PostDominatedByUnreachable.clear();
@@ -546,7 +565,7 @@ uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const {
     uint32_t PrevSum = Sum;
 
     Sum += Weight;
-    assert(Sum > PrevSum); (void) PrevSum;
+    assert(Sum >= PrevSum); (void) PrevSum;
   }
 
   return Sum;
@@ -609,14 +628,17 @@ uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src,
 uint32_t BranchProbabilityInfo::
 getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const {
   uint32_t Weight = 0;
+  bool FoundWeight = false;
   DenseMap<Edge, uint32_t>::const_iterator MapI;
   for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
     if (*I == Dst) {
       MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex()));
-      if (MapI != Weights.end())
+      if (MapI != Weights.end()) {
+        FoundWeight = true;
         Weight += MapI->second;
+      }
     }
-  return (Weight == 0) ? DEFAULT_WEIGHT : Weight;
+  return (!FoundWeight) ? DEFAULT_WEIGHT : Weight;
 }
 
 /// Set the edge weight for a given edge specified by PredBlock and an index
diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
index 89787f8..c86f1f5 100644
--- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
@@ -77,7 +77,7 @@ namespace {
     }
 
     bool runOnFunction(Function &F) override {
-      std::string Filename = "cfg." + F.getName().str() + ".dot";
+      std::string Filename = ("cfg." + F.getName() + ".dot").str();
       errs() << "Writing '" << Filename << "'...";
 
       std::error_code EC;
@@ -111,7 +111,7 @@ namespace {
     }
 
     bool runOnFunction(Function &F) override {
-      std::string Filename = "cfg." + F.getName().str() + ".dot";
+      std::string Filename = ("cfg." + F.getName() + ".dot").str();
       errs() << "Writing '" << Filename << "'...";
 
       std::error_code EC;
diff --git a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
index 88bb84a..84b31df 100644
--- a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
@@ -43,14 +43,19 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <forward_list>
+#include <memory>
 #include <tuple>
 
 using namespace llvm;
 
+#define DEBUG_TYPE "cfl-aa"
+
 // Try to go from a Value* to a Function*. Never returns nullptr.
 static Optional<Function *> parentFunctionOfValue(Value *);
 
@@ -74,7 +79,7 @@ static Optional<Value *> getTargetValue(Instruction *);
 static bool hasUsefulEdges(Instruction *);
 
 const StratifiedIndex StratifiedLink::SetSentinel =
-  std::numeric_limits<StratifiedIndex>::max();
+    std::numeric_limits<StratifiedIndex>::max();
 
 namespace {
 // StratifiedInfo Attribute things.
@@ -82,11 +87,13 @@ typedef unsigned StratifiedAttr;
 LLVM_CONSTEXPR unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs;
 LLVM_CONSTEXPR unsigned AttrAllIndex = 0;
 LLVM_CONSTEXPR unsigned AttrGlobalIndex = 1;
-LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 2;
+LLVM_CONSTEXPR unsigned AttrUnknownIndex = 2;
+LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 3;
 LLVM_CONSTEXPR unsigned AttrLastArgIndex = MaxStratifiedAttrIndex;
 LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
 
 LLVM_CONSTEXPR StratifiedAttr AttrNone = 0;
+LLVM_CONSTEXPR StratifiedAttr AttrUnknown = 1 << AttrUnknownIndex;
 LLVM_CONSTEXPR StratifiedAttr AttrAll = ~AttrNone;
 
 // \brief StratifiedSets call for knowledge of "direction", so this is how we
@@ -141,9 +148,8 @@ struct FunctionInfo {
   // Lots of functions have < 4 returns. Adjust as necessary.
   SmallVector<Value *, 4> ReturnedValues;
 
-  FunctionInfo(StratifiedSets<Value *> &&S,
-               SmallVector<Value *, 4> &&RV)
-    : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
+  FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV)
+      : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
 };
 
 struct CFLAliasAnalysis;
@@ -155,7 +161,7 @@ struct FunctionHandle : public CallbackVH {
     assert(CFLAA != nullptr);
   }
 
-  virtual ~FunctionHandle() {}
+  ~FunctionHandle() override {}
 
   void deleted() override { removeSelfFromCache(); }
   void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
@@ -183,7 +189,7 @@ public:
     initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry());
   }
 
-  virtual ~CFLAliasAnalysis() {}
+  ~CFLAliasAnalysis() override {}
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AliasAnalysis::getAnalysisUsage(AU);
@@ -226,14 +232,22 @@ public:
 
     // Comparisons between global variables and other constants should be
     // handled by BasicAA.
+    // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing
+    // a GlobalValue and ConstantExpr, but every query needs to have at least
+    // one Value tied to a Function, and neither GlobalValues nor ConstantExprs
+    // are.
     if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) {
-      return MayAlias;
+      return AliasAnalysis::alias(LocA, LocB);
     }
 
-    return query(LocA, LocB);
+    AliasResult QueryResult = query(LocA, LocB);
+    if (QueryResult == MayAlias)
+      return AliasAnalysis::alias(LocA, LocB);
+
+    return QueryResult;
   }
 
-  void initializePass() override { InitializeAliasAnalysis(this); }
+  bool doInitialization(Module &M) override;
 };
 
 void FunctionHandle::removeSelfFromCache() {
@@ -256,9 +270,19 @@ public:
     llvm_unreachable("Unsupported instruction encountered");
   }
 
+  void visitPtrToIntInst(PtrToIntInst &Inst) {
+    auto *Ptr = Inst.getOperand(0);
+    Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown));
+  }
+
+  void visitIntToPtrInst(IntToPtrInst &Inst) {
+    auto *Ptr = &Inst;
+    Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown));
+  }
+
   void visitCastInst(CastInst &Inst) {
-    Output.push_back(Edge(&Inst, Inst.getOperand(0), EdgeType::Assign,
-                          AttrNone));
+    Output.push_back(
+        Edge(&Inst, Inst.getOperand(0), EdgeType::Assign, AttrNone));
   }
 
   void visitBinaryOperator(BinaryOperator &Inst) {
@@ -281,8 +305,7 @@ public:
   }
 
   void visitPHINode(PHINode &Inst) {
-    for (unsigned I = 0, E = Inst.getNumIncomingValues(); I != E; ++I) {
-      Value *Val = Inst.getIncomingValue(I);
+    for (Value *Val : Inst.incoming_values()) {
       Output.push_back(Edge(&Inst, Val, EdgeType::Assign, AttrNone));
     }
   }
@@ -295,8 +318,11 @@ public:
   }
 
   void visitSelectInst(SelectInst &Inst) {
-    auto *Condition = Inst.getCondition();
-    Output.push_back(Edge(&Inst, Condition, EdgeType::Assign, AttrNone));
+    // Condition is not processed here (The actual statement producing
+    // the condition result is processed elsewhere). For select, the
+    // condition is evaluated, but not loaded, stored, or assigned
+    // simply as a result of being the condition of a select.
+
     auto *TrueVal = Inst.getTrueValue();
     Output.push_back(Edge(&Inst, TrueVal, EdgeType::Assign, AttrNone));
     auto *FalseVal = Inst.getFalseValue();
@@ -367,7 +393,7 @@ public:
 
     // I put this here to give us an upper bound on time taken by IPA. Is it
     // really (realistically) needed? Keep in mind that we do have an n^2 algo.
-    if (std::distance(Args.begin(), Args.end()) > (int) MaxSupportedArgs)
+    if (std::distance(Args.begin(), Args.end()) > (int)MaxSupportedArgs)
       return false;
 
     // Exit early if we'll fail anyway
@@ -419,7 +445,7 @@ public:
         }
         if (AddEdge)
           Output.push_back(Edge(FuncValue, ArgVal, EdgeType::Assign,
-                            StratifiedAttrs().flip()));
+                                StratifiedAttrs().flip()));
       }
 
       if (Parameters.size() != Arguments.size())
@@ -561,8 +587,7 @@ private:
     EdgeTypeT Weight;
     Node Other;
 
-    Edge(const EdgeTypeT &W, const Node &N)
-      : Weight(W), Other(N) {}
+    Edge(const EdgeTypeT &W, const Node &N) : Weight(W), Other(N) {}
 
     bool operator==(const Edge &E) const {
       return Weight == E.Weight && Other == E.Other;
@@ -725,6 +750,25 @@ static Level directionOfEdgeType(EdgeType);
 static void buildGraphFrom(CFLAliasAnalysis &, Function *,
                            SmallVectorImpl<Value *> &, NodeMapT &, GraphT &);
 
+// Gets the edges of a ConstantExpr as if it was an Instruction. This
+// function also acts on any nested ConstantExprs, adding the edges
+// of those to the given SmallVector as well.
+static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &,
+                             SmallVectorImpl<Edge> &);
+
+// Given an Instruction, this will add it to the graph, along with any
+// Instructions that are potentially only available from said Instruction
+// For example, given the following line:
+//   %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
+// addInstructionToGraph would add both the `load` and `getelementptr`
+// instructions to the graph appropriately.
+static void addInstructionToGraph(CFLAliasAnalysis &, Instruction &,
+                                  SmallVectorImpl<Value *> &, NodeMapT &,
+                                  GraphT &);
+
+// Notes whether it would be pointless to add the given Value to our sets.
+static bool canSkipAddingToSets(Value *Val);
+
 // Builds the graph + StratifiedSets for a function.
 static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *);
 
@@ -768,13 +812,16 @@ static Optional<StratifiedAttr> valueToAttrIndex(Value *Val) {
     return AttrGlobalIndex;
 
   if (auto *Arg = dyn_cast<Argument>(Val))
-    if (!Arg->hasNoAliasAttr())
+    // Only pointer arguments should have the argument attribute,
+    // because things can't escape through scalars without us seeing a
+    // cast, and thus, interaction with them doesn't matter.
+    if (!Arg->hasNoAliasAttr() && Arg->getType()->isPointerTy())
       return argNumberToAttrIndex(Arg->getArgNo());
   return NoneType();
 }
 
 static StratifiedAttr argNumberToAttrIndex(unsigned ArgNum) {
-  if (ArgNum > AttrMaxNumArgs)
+  if (ArgNum >= AttrMaxNumArgs)
     return AttrAllIndex;
   return ArgNum + AttrFirstArgIndex;
 }
@@ -793,6 +840,8 @@ static EdgeType flipWeight(EdgeType Initial) {
 
 static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,
                         SmallVectorImpl<Edge> &Output) {
+  assert(hasUsefulEdges(Inst) &&
+         "Expected instructions to have 'useful' edges");
   GetEdgesVisitor v(Analysis, Output);
   v.visit(Inst);
 }
@@ -809,13 +858,41 @@ static Level directionOfEdgeType(EdgeType Weight) {
   llvm_unreachable("Incomplete switch coverage");
 }
 
-// Aside: We may remove graph construction entirely, because it doesn't really
-// buy us much that we don't already have. I'd like to add interprocedural
-// analysis prior to this however, in case that somehow requires the graph
-// produced by this for efficient execution
-static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
-                           SmallVectorImpl<Value *> &ReturnedValues,
-                           NodeMapT &Map, GraphT &Graph) {
+static void constexprToEdges(CFLAliasAnalysis &Analysis,
+                             ConstantExpr &CExprToCollapse,
+                             SmallVectorImpl<Edge> &Results) {
+  SmallVector<ConstantExpr *, 4> Worklist;
+  Worklist.push_back(&CExprToCollapse);
+
+  SmallVector<Edge, 8> ConstexprEdges;
+  while (!Worklist.empty()) {
+    auto *CExpr = Worklist.pop_back_val();
+    std::unique_ptr<Instruction> Inst(CExpr->getAsInstruction());
+
+    if (!hasUsefulEdges(Inst.get()))
+      continue;
+
+    ConstexprEdges.clear();
+    argsToEdges(Analysis, Inst.get(), ConstexprEdges);
+    for (auto &Edge : ConstexprEdges) {
+      if (Edge.From == Inst.get())
+        Edge.From = CExpr;
+      else if (auto *Nested = dyn_cast<ConstantExpr>(Edge.From))
+        Worklist.push_back(Nested);
+
+      if (Edge.To == Inst.get())
+        Edge.To = CExpr;
+      else if (auto *Nested = dyn_cast<ConstantExpr>(Edge.To))
+        Worklist.push_back(Nested);
+    }
+
+    Results.append(ConstexprEdges.begin(), ConstexprEdges.end());
+  }
+}
+
+static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst,
+                                  SmallVectorImpl<Value *> &ReturnedValues,
+                                  NodeMapT &Map, GraphT &Graph) {
   const auto findOrInsertNode = [&Map, &Graph](Value *Val) {
     auto Pair = Map.insert(std::make_pair(Val, GraphT::Node()));
     auto &Iter = Pair.first;
@@ -826,42 +903,86 @@ static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
     return Iter->second;
   };
 
+  // We don't want the edges of most "return" instructions, but we *do* want
+  // to know what can be returned.
+  if (isa<ReturnInst>(&Inst))
+    ReturnedValues.push_back(&Inst);
+
+  if (!hasUsefulEdges(&Inst))
+    return;
+
   SmallVector<Edge, 8> Edges;
-  for (auto &Bb : Fn->getBasicBlockList()) {
-    for (auto &Inst : Bb.getInstList()) {
-      // We don't want the edges of most "return" instructions, but we *do* want
-      // to know what can be returned.
-      if (auto *Ret = dyn_cast<ReturnInst>(&Inst))
-        ReturnedValues.push_back(Ret);
-
-      if (!hasUsefulEdges(&Inst))
-        continue;
+  argsToEdges(Analysis, &Inst, Edges);
+
+  // In the case of an unused alloca (or similar), edges may be empty. Note
+  // that it exists so we can potentially answer NoAlias.
+  if (Edges.empty()) {
+    auto MaybeVal = getTargetValue(&Inst);
+    assert(MaybeVal.hasValue());
+    auto *Target = *MaybeVal;
+    findOrInsertNode(Target);
+    return;
+  }
 
-      Edges.clear();
-      argsToEdges(Analysis, &Inst, Edges);
+  const auto addEdgeToGraph = [&Graph, &findOrInsertNode](const Edge &E) {
+    auto To = findOrInsertNode(E.To);
+    auto From = findOrInsertNode(E.From);
+    auto FlippedWeight = flipWeight(E.Weight);
+    auto Attrs = E.AdditionalAttrs;
+    Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs),
+                  std::make_pair(FlippedWeight, Attrs));
+  };
 
-      // In the case of an unused alloca (or similar), edges may be empty. Note
-      // that it exists so we can potentially answer NoAlias.
-      if (Edges.empty()) {
-        auto MaybeVal = getTargetValue(&Inst);
-        assert(MaybeVal.hasValue());
-        auto *Target = *MaybeVal;
-        findOrInsertNode(Target);
-        continue;
-      }
+  SmallVector<ConstantExpr *, 4> ConstantExprs;
+  for (const Edge &E : Edges) {
+    addEdgeToGraph(E);
+    if (auto *Constexpr = dyn_cast<ConstantExpr>(E.To))
+      ConstantExprs.push_back(Constexpr);
+    if (auto *Constexpr = dyn_cast<ConstantExpr>(E.From))
+      ConstantExprs.push_back(Constexpr);
+  }
 
-      for (const Edge &E : Edges) {
-        auto To = findOrInsertNode(E.To);
-        auto From = findOrInsertNode(E.From);
-        auto FlippedWeight = flipWeight(E.Weight);
-        auto Attrs = E.AdditionalAttrs;
-        Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs),
-                                std::make_pair(FlippedWeight, Attrs));
-      }
-    }
+  for (ConstantExpr *CE : ConstantExprs) {
+    Edges.clear();
+    constexprToEdges(Analysis, *CE, Edges);
+    std::for_each(Edges.begin(), Edges.end(), addEdgeToGraph);
   }
 }
 
+// Aside: We may remove graph construction entirely, because it doesn't really
+// buy us much that we don't already have. I'd like to add interprocedural
+// analysis prior to this however, in case that somehow requires the graph
+// produced by this for efficient execution
+static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
+                           SmallVectorImpl<Value *> &ReturnedValues,
+                           NodeMapT &Map, GraphT &Graph) {
+  for (auto &Bb : Fn->getBasicBlockList())
+    for (auto &Inst : Bb.getInstList())
+      addInstructionToGraph(Analysis, Inst, ReturnedValues, Map, Graph);
+}
+
+static bool canSkipAddingToSets(Value *Val) {
+  // Constants can share instances, which may falsely unify multiple
+  // sets, e.g. in
+  // store i32* null, i32** %ptr1
+  // store i32* null, i32** %ptr2
+  // clearly ptr1 and ptr2 should not be unified into the same set, so
+  // we should filter out the (potentially shared) instance to
+  // i32* null.
+  if (isa<Constant>(Val)) {
+    bool Container = isa<ConstantVector>(Val) || isa<ConstantArray>(Val) ||
+                     isa<ConstantStruct>(Val);
+    // TODO: Because all of these things are constant, we can determine whether
+    // the data is *actually* mutable at graph building time. This will probably
+    // come for free/cheap with offset awareness.
+    bool CanStoreMutableData =
+        isa<GlobalValue>(Val) || isa<ConstantExpr>(Val) || Container;
+    return !CanStoreMutableData;
+  }
+
+  return false;
+}
+
 static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
   NodeMapT Map;
   GraphT Graph;
@@ -893,7 +1014,7 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
     while (!Worklist.empty()) {
       auto Node = Worklist.pop_back_val();
       auto *CurValue = findValueOrDie(Node);
-      if (isa<Constant>(CurValue) && !isa<GlobalValue>(CurValue))
+      if (canSkipAddingToSets(CurValue))
         continue;
 
       for (const auto &EdgeTuple : Graph.edgesFor(Node)) {
@@ -902,7 +1023,7 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
         auto &OtherNode = std::get<1>(EdgeTuple);
         auto *OtherValue = findValueOrDie(OtherNode);
 
-        if (isa<Constant>(OtherValue) && !isa<GlobalValue>(OtherValue))
+        if (canSkipAddingToSets(OtherValue))
           continue;
 
         bool Added;
@@ -918,16 +1039,16 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
           break;
         }
 
-        if (Added) {
-          auto Aliasing = Weight.second;
-          if (auto MaybeCurIndex = valueToAttrIndex(CurValue))
-            Aliasing.set(*MaybeCurIndex);
-          if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue))
-            Aliasing.set(*MaybeOtherIndex);
-          Builder.noteAttributes(CurValue, Aliasing);
-          Builder.noteAttributes(OtherValue, Aliasing);
+        auto Aliasing = Weight.second;
+        if (auto MaybeCurIndex = valueToAttrIndex(CurValue))
+          Aliasing.set(*MaybeCurIndex);
+        if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue))
+          Aliasing.set(*MaybeOtherIndex);
+        Builder.noteAttributes(CurValue, Aliasing);
+        Builder.noteAttributes(OtherValue, Aliasing);
+
+        if (Added)
           Worklist.push_back(OtherNode);
-        }
       }
     }
   }
@@ -937,7 +1058,12 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
   // things that were present during construction being present in the graph.
   // So, we add all present arguments here.
   for (auto &Arg : Fn->args()) {
-    Builder.add(&Arg);
+    if (!Builder.add(&Arg))
+      continue;
+
+    auto Attrs = valueToAttrIndex(&Arg);
+    if (Attrs.hasValue())
+      Builder.noteAttributes(&Arg, *Attrs);
   }
 
   return FunctionInfo(Builder.build(), std::move(ReturnedValues));
@@ -964,8 +1090,10 @@ CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA,
   auto MaybeFnA = parentFunctionOfValue(ValA);
   auto MaybeFnB = parentFunctionOfValue(ValB);
   if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) {
-    llvm_unreachable("Don't know how to extract the parent function "
-                     "from values A or B");
+    // The only times this is known to happen are when globals + InlineAsm
+    // are involved
+    DEBUG(dbgs() << "CFLAA: could not extract parent function information.\n");
+    return AliasAnalysis::MayAlias;
   }
 
   if (MaybeFnA.hasValue()) {
@@ -991,23 +1119,36 @@ CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA,
 
   auto SetA = *MaybeA;
   auto SetB = *MaybeB;
-
-  if (SetA.Index == SetB.Index)
-    return AliasAnalysis::PartialAlias;
-
   auto AttrsA = Sets.getLink(SetA.Index).Attrs;
   auto AttrsB = Sets.getLink(SetB.Index).Attrs;
+
   // Stratified set attributes are used as markets to signify whether a member
-  // of a StratifiedSet (or a member of a set above the current set) has 
+  // of a StratifiedSet (or a member of a set above the current set) has
   // interacted with either arguments or globals. "Interacted with" meaning
-  // its value may be different depending on the value of an argument or 
+  // its value may be different depending on the value of an argument or
   // global. The thought behind this is that, because arguments and globals
   // may alias each other, if AttrsA and AttrsB have touched args/globals,
-  // we must conservatively say that they alias. However, if at least one of 
-  // the sets has no values that could legally be altered by changing the value 
+  // we must conservatively say that they alias. However, if at least one of
+  // the sets has no values that could legally be altered by changing the value
   // of an argument or global, then we don't have to be as conservative.
   if (AttrsA.any() && AttrsB.any())
     return AliasAnalysis::MayAlias;
 
+  // We currently unify things even if the accesses to them may not be in
+  // bounds, so we can't return partial alias here because we don't
+  // know whether the pointer is really within the object or not.
+  // IE Given an out of bounds GEP and an alloca'd pointer, we may
+  // unify the two. We can't return partial alias for this case.
+  // Since we do not currently track enough information to
+  // differentiate
+
+  if (SetA.Index == SetB.Index)
+    return AliasAnalysis::MayAlias;
+
   return AliasAnalysis::NoAlias;
 }
+
+bool CFLAliasAnalysis::doInitialization(Module &M) {
+  InitializeAliasAnalysis(this, &M.getDataLayout());
+  return true;
+}
diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
index fa5683c..46a2c43 100644
--- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
@@ -21,6 +21,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 #define DEBUG_TYPE "code-metrics"
 
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index fd8f2ae..2f4c6a9 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/config.h"
 #include "llvm/IR/Constants.h"
@@ -33,7 +34,6 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include <cerrno>
 #include <cmath>
 
@@ -50,8 +50,7 @@ using namespace llvm;
 /// Constant fold bitcast, symbolically evaluating it with DataLayout.
 /// This always returns a non-null constant, but it may be a
 /// ConstantExpr if unfoldable.
-static Constant *FoldBitCast(Constant *C, Type *DestTy,
-                             const DataLayout &TD) {
+static Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
   // Catch the obvious splat cases.
   if (C->isNullValue() && !DestTy->isX86_MMXTy())
     return Constant::getNullValue(DestTy);
@@ -84,11 +83,11 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
 
     // Now that we know that the input value is a vector of integers, just shift
     // and insert them into our result.
-    unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy);
+    unsigned BitShift = DL.getTypeAllocSizeInBits(SrcEltTy);
     APInt Result(IT->getBitWidth(), 0);
     for (unsigned i = 0; i != NumSrcElts; ++i) {
       Result <<= BitShift;
-      if (TD.isLittleEndian())
+      if (DL.isLittleEndian())
         Result |= CDV->getElementAsInteger(NumSrcElts-i-1);
       else
         Result |= CDV->getElementAsInteger(i);
@@ -106,7 +105,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
   // vector so the code below can handle it uniformly.
   if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
     Constant *Ops = C; // don't take the address of C!
-    return FoldBitCast(ConstantVector::get(Ops), DestTy, TD);
+    return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);
   }
 
   // If this is a bitcast from constant vector -> vector, fold it.
@@ -138,7 +137,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
     Type *DestIVTy =
       VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
     // Recursively handle this integer conversion, if possible.
-    C = FoldBitCast(C, DestIVTy, TD);
+    C = FoldBitCast(C, DestIVTy, DL);
 
     // Finally, IR can handle this now that #elts line up.
     return ConstantExpr::getBitCast(C, DestTy);
@@ -162,7 +161,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
   // of the same size, and that their #elements is not the same.  Do the
   // conversion here, which depends on whether the input or output has
   // more elements.
-  bool isLittleEndian = TD.isLittleEndian();
+  bool isLittleEndian = DL.isLittleEndian();
 
   SmallVector<Constant*, 32> Result;
   if (NumDstElt < NumSrcElt) {
@@ -198,7 +197,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
 
   // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
   unsigned Ratio = NumDstElt/NumSrcElt;
-  unsigned DstBitSize = TD.getTypeSizeInBits(DstEltTy);
+  unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
 
   // Loop over each source value, expanding into multiple results.
   for (unsigned i = 0; i != NumSrcElt; ++i) {
@@ -235,10 +234,10 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
 /// If this constant is a constant offset from a global, return the global and
 /// the constant. Because of constantexprs, this function is recursive.
 static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
-                                       APInt &Offset, const DataLayout &TD) {
+                                       APInt &Offset, const DataLayout &DL) {
   // Trivial case, constant is the global.
   if ((GV = dyn_cast<GlobalValue>(C))) {
-    unsigned BitWidth = TD.getPointerTypeSizeInBits(GV->getType());
+    unsigned BitWidth = DL.getPointerTypeSizeInBits(GV->getType());
     Offset = APInt(BitWidth, 0);
     return true;
   }
@@ -251,22 +250,22 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
   if (CE->getOpcode() == Instruction::PtrToInt ||
       CE->getOpcode() == Instruction::BitCast ||
       CE->getOpcode() == Instruction::AddrSpaceCast)
-    return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
+    return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL);
 
   // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
   GEPOperator *GEP = dyn_cast<GEPOperator>(CE);
   if (!GEP)
     return false;
 
-  unsigned BitWidth = TD.getPointerTypeSizeInBits(GEP->getType());
+  unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
   APInt TmpOffset(BitWidth, 0);
 
   // If the base isn't a global+constant, we aren't either.
-  if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, TD))
+  if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL))
     return false;
 
   // Otherwise, add any offset that our operands provide.
-  if (!GEP->accumulateConstantOffset(TD, TmpOffset))
+  if (!GEP->accumulateConstantOffset(DL, TmpOffset))
     return false;
 
   Offset = TmpOffset;
@@ -276,11 +275,11 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
 /// Recursive helper to read bits out of global. C is the constant being copied
 /// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
 /// results into and BytesLeft is the number of bytes left in
-/// the CurPtr buffer. TD is the target data.
+/// the CurPtr buffer. DL is the DataLayout.
 static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
                                unsigned char *CurPtr, unsigned BytesLeft,
-                               const DataLayout &TD) {
-  assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) &&
+                               const DataLayout &DL) {
+  assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&
          "Out of range access");
 
   // If this element is zero or undefined, we can just return since *CurPtr is
@@ -298,7 +297,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
 
     for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
       int n = ByteOffset;
-      if (!TD.isLittleEndian())
+      if (!DL.isLittleEndian())
         n = IntBytes - n - 1;
       CurPtr[i] = (unsigned char)(Val >> (n * 8));
       ++ByteOffset;
@@ -308,22 +307,22 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
 
   if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
     if (CFP->getType()->isDoubleTy()) {
-      C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD);
-      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+      C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL);
+      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
     }
     if (CFP->getType()->isFloatTy()){
-      C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD);
-      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+      C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL);
+      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
     }
     if (CFP->getType()->isHalfTy()){
-      C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD);
-      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+      C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL);
+      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
     }
     return false;
   }
 
   if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
-    const StructLayout *SL = TD.getStructLayout(CS->getType());
+    const StructLayout *SL = DL.getStructLayout(CS->getType());
     unsigned Index = SL->getElementContainingOffset(ByteOffset);
     uint64_t CurEltOffset = SL->getElementOffset(Index);
     ByteOffset -= CurEltOffset;
@@ -331,11 +330,11 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
     while (1) {
       // If the element access is to the element itself and not to tail padding,
       // read the bytes from the element.
-      uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType());
+      uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());
 
       if (ByteOffset < EltSize &&
           !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
-                              BytesLeft, TD))
+                              BytesLeft, DL))
         return false;
 
       ++Index;
@@ -362,7 +361,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
   if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
       isa<ConstantDataSequential>(C)) {
     Type *EltTy = C->getType()->getSequentialElementType();
-    uint64_t EltSize = TD.getTypeAllocSize(EltTy);
+    uint64_t EltSize = DL.getTypeAllocSize(EltTy);
     uint64_t Index = ByteOffset / EltSize;
     uint64_t Offset = ByteOffset - Index * EltSize;
     uint64_t NumElts;
@@ -373,7 +372,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
 
     for (; Index != NumElts; ++Index) {
       if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
-                              BytesLeft, TD))
+                              BytesLeft, DL))
         return false;
 
       uint64_t BytesWritten = EltSize - Offset;
@@ -390,9 +389,9 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
 
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
     if (CE->getOpcode() == Instruction::IntToPtr &&
-        CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) {
+        CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) {
       return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
-                                BytesLeft, TD);
+                                BytesLeft, DL);
     }
   }
 
@@ -401,7 +400,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
 }
 
 static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
-                                                 const DataLayout &TD) {
+                                                 const DataLayout &DL) {
   PointerType *PTy = cast<PointerType>(C->getType());
   Type *LoadTy = PTy->getElementType();
   IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
@@ -423,14 +422,13 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
       MapTy = Type::getInt64PtrTy(C->getContext(), AS);
     else if (LoadTy->isVectorTy()) {
       MapTy = PointerType::getIntNPtrTy(C->getContext(),
-                                        TD.getTypeAllocSizeInBits(LoadTy),
-                                        AS);
+                                        DL.getTypeAllocSizeInBits(LoadTy), AS);
     } else
       return nullptr;
 
-    C = FoldBitCast(C, MapTy, TD);
-    if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD))
-      return FoldBitCast(Res, LoadTy, TD);
+    C = FoldBitCast(C, MapTy, DL);
+    if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, DL))
+      return FoldBitCast(Res, LoadTy, DL);
     return nullptr;
   }
 
@@ -440,7 +438,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
 
   GlobalValue *GVal;
   APInt Offset;
-  if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
+  if (!IsConstantOffsetFromGlobal(C, GVal, Offset, DL))
     return nullptr;
 
   GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal);
@@ -455,16 +453,16 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
 
   // If we're not accessing anything in this constant, the result is undefined.
   if (Offset.getZExtValue() >=
-      TD.getTypeAllocSize(GV->getInitializer()->getType()))
+      DL.getTypeAllocSize(GV->getInitializer()->getType()))
     return UndefValue::get(IntType);
 
   unsigned char RawBytes[32] = {0};
   if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes,
-                          BytesLoaded, TD))
+                          BytesLoaded, DL))
     return nullptr;
 
   APInt ResultVal = APInt(IntType->getBitWidth(), 0);
-  if (TD.isLittleEndian()) {
+  if (DL.isLittleEndian()) {
     ResultVal = RawBytes[BytesLoaded - 1];
     for (unsigned i = 1; i != BytesLoaded; ++i) {
       ResultVal <<= 8;
@@ -482,9 +480,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
 }
 
 static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
-                                                const DataLayout *DL) {
-  if (!DL)
-    return nullptr;
+                                                const DataLayout &DL) {
   auto *DestPtrTy = dyn_cast<PointerType>(CE->getType());
   if (!DestPtrTy)
     return nullptr;
@@ -499,7 +495,7 @@ static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
 
     // If the type sizes are the same and a cast is legal, just directly
     // cast the constant.
-    if (DL->getTypeSizeInBits(DestTy) == DL->getTypeSizeInBits(SrcTy)) {
+    if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) {
       Instruction::CastOps Cast = Instruction::BitCast;
       // If we are going from a pointer to int or vice versa, we spell the cast
       // differently.
@@ -530,7 +526,7 @@ static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
 /// Return the value that a load from C would produce if it is constant and
 /// determinable. If this is not determinable, return null.
 Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
-                                             const DataLayout *TD) {
+                                             const DataLayout &DL) {
   // First, try the easy cases:
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
     if (GV->isConstant() && GV->hasDefinitiveInitializer())
@@ -552,13 +548,13 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
   }
 
   if (CE->getOpcode() == Instruction::BitCast)
-    if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, TD))
+    if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, DL))
       return LoadedC;
 
   // Instead of loading constant c string, use corresponding integer value
   // directly if string length is small enough.
   StringRef Str;
-  if (TD && getConstantStringInfo(CE, Str) && !Str.empty()) {
+  if (getConstantStringInfo(CE, Str) && !Str.empty()) {
     unsigned StrLen = Str.size();
     Type *Ty = cast<PointerType>(CE->getType())->getElementType();
     unsigned NumBits = Ty->getPrimitiveSizeInBits();
@@ -568,7 +564,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
         (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) {
       APInt StrVal(NumBits, 0);
       APInt SingleChar(NumBits, 0);
-      if (TD->isLittleEndian()) {
+      if (DL.isLittleEndian()) {
         for (signed i = StrLen-1; i >= 0; i--) {
           SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
           StrVal = (StrVal << 8) | SingleChar;
@@ -593,7 +589,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
   // If this load comes from anywhere in a constant global, and if the global
   // is all undef or zero, we know what it loads.
   if (GlobalVariable *GV =
-        dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) {
+          dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, DL))) {
     if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
       Type *ResTy = cast<PointerType>(C->getType())->getElementType();
       if (GV->getInitializer()->isNullValue())
@@ -604,16 +600,15 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
   }
 
   // Try hard to fold loads from bitcasted strange and non-type-safe things.
-  if (TD)
-    return FoldReinterpretLoadFromConstPtr(CE, *TD);
-  return nullptr;
+  return FoldReinterpretLoadFromConstPtr(CE, DL);
 }
 
-static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
+static Constant *ConstantFoldLoadInst(const LoadInst *LI,
+                                      const DataLayout &DL) {
   if (LI->isVolatile()) return nullptr;
 
   if (Constant *C = dyn_cast<Constant>(LI->getOperand(0)))
-    return ConstantFoldLoadFromConstPtr(C, TD);
+    return ConstantFoldLoadFromConstPtr(C, DL);
 
   return nullptr;
 }
@@ -623,16 +618,16 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
 /// these together.  If target data info is available, it is provided as DL,
 /// otherwise DL is null.
 static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
-                                           Constant *Op1, const DataLayout *DL){
+                                           Constant *Op1,
+                                           const DataLayout &DL) {
   // SROA
 
   // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
   // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
   // bits.
 
-
-  if (Opc == Instruction::And && DL) {
-    unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType());
+  if (Opc == Instruction::And) {
+    unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType());
     APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
     APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
     computeKnownBits(Op0, KnownZero0, KnownOne0, DL);
@@ -655,14 +650,13 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 
   // If the constant expr is something like &A[123] - &A[4].f, fold this into a
   // constant.  This happens frequently when iterating over a global array.
-  if (Opc == Instruction::Sub && DL) {
+  if (Opc == Instruction::Sub) {
     GlobalValue *GV1, *GV2;
     APInt Offs1, Offs2;
 
-    if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL))
-      if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) &&
-          GV1 == GV2) {
-        unsigned OpSize = DL->getTypeSizeInBits(Op0->getType());
+    if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL))
+      if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) {
+        unsigned OpSize = DL.getTypeSizeInBits(Op0->getType());
 
         // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
         // PtrToInt may change the bitwidth so we have convert to the right size
@@ -677,21 +671,19 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 
 /// If array indices are not pointer-sized integers, explicitly cast them so
 /// that they aren't implicitly casted by the getelementptr.
-static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
-                                Type *ResultTy, const DataLayout *TD,
+static Constant *CastGEPIndices(Type *SrcTy, ArrayRef<Constant *> Ops,
+                                Type *ResultTy, const DataLayout &DL,
                                 const TargetLibraryInfo *TLI) {
-  if (!TD)
-    return nullptr;
-
-  Type *IntPtrTy = TD->getIntPtrType(ResultTy);
+  Type *IntPtrTy = DL.getIntPtrType(ResultTy);
 
   bool Any = false;
   SmallVector<Constant*, 32> NewIdxs;
   for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
     if ((i == 1 ||
          !isa<StructType>(GetElementPtrInst::getIndexedType(
-                            Ops[0]->getType(),
-                            Ops.slice(1, i - 1)))) &&
+             cast<PointerType>(Ops[0]->getType()->getScalarType())
+                 ->getElementType(),
+             Ops.slice(1, i - 1)))) &&
         Ops[i]->getType() != IntPtrTy) {
       Any = true;
       NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
@@ -706,9 +698,9 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
   if (!Any)
     return nullptr;
 
-  Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
+  Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ops[0], NewIdxs);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
       C = Folded;
   }
 
@@ -732,15 +724,15 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) {
 }
 
 /// If we can symbolically evaluate the GEP constant expression, do so.
-static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
-                                         Type *ResultTy, const DataLayout *TD,
+static Constant *SymbolicallyEvaluateGEP(Type *SrcTy, ArrayRef<Constant *> Ops,
+                                         Type *ResultTy, const DataLayout &DL,
                                          const TargetLibraryInfo *TLI) {
   Constant *Ptr = Ops[0];
-  if (!TD || !Ptr->getType()->getPointerElementType()->isSized() ||
+  if (!Ptr->getType()->getPointerElementType()->isSized() ||
       !Ptr->getType()->isPointerTy())
     return nullptr;
 
-  Type *IntPtrTy = TD->getIntPtrType(Ptr->getType());
+  Type *IntPtrTy = DL.getIntPtrType(Ptr->getType());
   Type *ResultElementTy = ResultTy->getPointerElementType();
 
   // If this is a constant expr gep that is effectively computing an
@@ -760,19 +752,19 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
           Res = ConstantExpr::getSub(Res, CE->getOperand(1));
           Res = ConstantExpr::getIntToPtr(Res, ResultTy);
           if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
-            Res = ConstantFoldConstantExpression(ResCE, TD, TLI);
+            Res = ConstantFoldConstantExpression(ResCE, DL, TLI);
           return Res;
         }
       }
       return nullptr;
     }
 
-  unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
+  unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy);
   APInt Offset =
-    APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(),
-                                         makeArrayRef((Value *const*)
-                                                        Ops.data() + 1,
-                                                      Ops.size() - 1)));
+      APInt(BitWidth,
+            DL.getIndexedOffset(
+                Ptr->getType(),
+                makeArrayRef((Value * const *)Ops.data() + 1, Ops.size() - 1)));
   Ptr = StripPtrCastKeepAS(Ptr);
 
   // If this is a GEP of a GEP, fold it all into a single GEP.
@@ -790,8 +782,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
       break;
 
     Ptr = cast<Constant>(GEP->getOperand(0));
-    Offset += APInt(BitWidth,
-                    TD->getIndexedOffset(Ptr->getType(), NestedOps));
+    Offset += APInt(BitWidth, DL.getIndexedOffset(Ptr->getType(), NestedOps));
     Ptr = StripPtrCastKeepAS(Ptr);
   }
 
@@ -831,7 +822,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
       }
 
       // Determine which element of the array the offset points into.
-      APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
+      APInt ElemSize(BitWidth, DL.getTypeAllocSize(ATy->getElementType()));
       if (ElemSize == 0)
         // The element size is 0. This may be [0 x Ty]*, so just use a zero
         // index for this level and proceed to the next level to see if it can
@@ -850,7 +841,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
       // can't re-form this GEP in a regular form, so bail out. The pointer
       // operand likely went through casts that are necessary to make the GEP
       // sensible.
-      const StructLayout &SL = *TD->getStructLayout(STy);
+      const StructLayout &SL = *DL.getStructLayout(STy);
       if (Offset.uge(SL.getSizeInBytes()))
         break;
 
@@ -875,14 +866,14 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
     return nullptr;
 
   // Create a GEP.
-  Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
+  Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ptr, NewIdxs);
   assert(C->getType()->getPointerElementType() == Ty &&
          "Computed GetElementPtr has unexpected type!");
 
   // If we ended up indexing a member with a type that doesn't match
   // the type of what the original indices indexed, add a cast.
   if (Ty != ResultElementTy)
-    C = FoldBitCast(C, ResultTy, *TD);
+    C = FoldBitCast(C, ResultTy, DL);
 
   return C;
 }
@@ -898,15 +889,13 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
 /// Note that this fails if not all of the operands are constant.  Otherwise,
 /// this function can only fail when attempting to fold instructions like loads
 /// and stores, which have no constant expression form.
-Constant *llvm::ConstantFoldInstruction(Instruction *I,
-                                        const DataLayout *TD,
+Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
                                         const TargetLibraryInfo *TLI) {
   // Handle PHI nodes quickly here...
   if (PHINode *PN = dyn_cast<PHINode>(I)) {
     Constant *CommonValue = nullptr;
 
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-      Value *Incoming = PN->getIncomingValue(i);
+    for (Value *Incoming : PN->incoming_values()) {
       // If the incoming value is undef then skip it.  Note that while we could
       // skip the value if it is equal to the phi node itself we choose not to
       // because that would break the rule that constant folding only applies if
@@ -919,7 +908,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
         return nullptr;
       // Fold the PHI's operands.
       if (ConstantExpr *NewC = dyn_cast<ConstantExpr>(C))
-        C = ConstantFoldConstantExpression(NewC, TD, TLI);
+        C = ConstantFoldConstantExpression(NewC, DL, TLI);
       // If the incoming value is a different constant to
       // the one we saw previously, then give up.
       if (CommonValue && C != CommonValue)
@@ -942,17 +931,17 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
 
     // Fold the Instruction's operands.
     if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(Op))
-      Op = ConstantFoldConstantExpression(NewCE, TD, TLI);
+      Op = ConstantFoldConstantExpression(NewCE, DL, TLI);
 
     Ops.push_back(Op);
   }
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
-                                           TD, TLI);
+                                           DL, TLI);
 
   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
-    return ConstantFoldLoadInst(LI, TD);
+    return ConstantFoldLoadInst(LI, DL);
 
   if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) {
     return ConstantExpr::getInsertValue(
@@ -967,11 +956,11 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
                                     EVI->getIndices());
   }
 
-  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, DL, TLI);
 }
 
 static Constant *
-ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
+ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout &DL,
                                    const TargetLibraryInfo *TLI,
                                    SmallPtrSetImpl<ConstantExpr *> &FoldedOps) {
   SmallVector<Constant *, 8> Ops;
@@ -982,25 +971,25 @@ ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
     // a ConstantExpr, we don't have to process it again.
     if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
       if (FoldedOps.insert(NewCE).second)
-        NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps);
+        NewC = ConstantFoldConstantExpressionImpl(NewCE, DL, TLI, FoldedOps);
     }
     Ops.push_back(NewC);
   }
 
   if (CE->isCompare())
     return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
-                                           TD, TLI);
-  return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
+                                           DL, TLI);
+  return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, DL, TLI);
 }
 
 /// Attempt to fold the constant expression
 /// using the specified DataLayout.  If successful, the constant result is
 /// result is returned, if not, null is returned.
 Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
-                                               const DataLayout *TD,
+                                               const DataLayout &DL,
                                                const TargetLibraryInfo *TLI) {
   SmallPtrSet<ConstantExpr *, 4> FoldedOps;
-  return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps);
+  return ConstantFoldConstantExpressionImpl(CE, DL, TLI, FoldedOps);
 }
 
 /// Attempt to constant fold an instruction with the
@@ -1015,12 +1004,12 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
 ///
 Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
                                          ArrayRef<Constant *> Ops,
-                                         const DataLayout *TD,
+                                         const DataLayout &DL,
                                          const TargetLibraryInfo *TLI) {
   // Handle easy binops first.
   if (Instruction::isBinaryOp(Opcode)) {
     if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) {
-      if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
+      if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], DL))
         return C;
     }
 
@@ -1040,10 +1029,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
     // If the input is a inttoptr, eliminate the pair.  This requires knowing
     // the width of a pointer, so it can't be done in ConstantExpr::getCast.
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
-      if (TD && CE->getOpcode() == Instruction::IntToPtr) {
+      if (CE->getOpcode() == Instruction::IntToPtr) {
         Constant *Input = CE->getOperand(0);
         unsigned InWidth = Input->getType()->getScalarSizeInBits();
-        unsigned PtrWidth = TD->getPointerTypeSizeInBits(CE->getType());
+        unsigned PtrWidth = DL.getPointerTypeSizeInBits(CE->getType());
         if (PtrWidth < InWidth) {
           Constant *Mask =
             ConstantInt::get(CE->getContext(),
@@ -1061,15 +1050,15 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
     // This requires knowing the width of a pointer, so it can't be done in
     // ConstantExpr::getCast.
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
-      if (TD && CE->getOpcode() == Instruction::PtrToInt) {
+      if (CE->getOpcode() == Instruction::PtrToInt) {
         Constant *SrcPtr = CE->getOperand(0);
-        unsigned SrcPtrSize = TD->getPointerTypeSizeInBits(SrcPtr->getType());
+        unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());
         unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
 
         if (MidIntSize >= SrcPtrSize) {
           unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
           if (SrcAS == DestTy->getPointerAddressSpace())
-            return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+            return FoldBitCast(CE->getOperand(0), DestTy, DL);
         }
       }
     }
@@ -1087,9 +1076,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
   case Instruction::AddrSpaceCast:
       return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
   case Instruction::BitCast:
-    if (TD)
-      return FoldBitCast(Ops[0], DestTy, *TD);
-    return ConstantExpr::getBitCast(Ops[0], DestTy);
+    return FoldBitCast(Ops[0], DestTy, DL);
   case Instruction::Select:
     return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
   case Instruction::ExtractElement:
@@ -1098,13 +1085,15 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
     return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
   case Instruction::ShuffleVector:
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
-  case Instruction::GetElementPtr:
-    if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI))
+  case Instruction::GetElementPtr: {
+    Type *SrcTy = nullptr;
+    if (Constant *C = CastGEPIndices(SrcTy, Ops, DestTy, DL, TLI))
       return C;
-    if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI))
+    if (Constant *C = SymbolicallyEvaluateGEP(SrcTy, Ops, DestTy, DL, TLI))
       return C;
 
-    return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
+    return ConstantExpr::getGetElementPtr(SrcTy, Ops[0], Ops.slice(1));
+  }
   }
 }
 
@@ -1113,43 +1102,44 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
 /// returns a constant expression of the specified operands.
 Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
                                                 Constant *Ops0, Constant *Ops1,
-                                                const DataLayout *TD,
+                                                const DataLayout &DL,
                                                 const TargetLibraryInfo *TLI) {
   // fold: icmp (inttoptr x), null         -> icmp x, 0
   // fold: icmp (ptrtoint x), 0            -> icmp x, null
   // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
   // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
   //
-  // ConstantExpr::getCompare cannot do this, because it doesn't have TD
+  // FIXME: The following comment is out of data and the DataLayout is here now.
+  // ConstantExpr::getCompare cannot do this, because it doesn't have DL
   // around to know if bit truncation is happening.
   if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
-    if (TD && Ops1->isNullValue()) {
+    if (Ops1->isNullValue()) {
       if (CE0->getOpcode() == Instruction::IntToPtr) {
-        Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
+        Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
         // Convert the integer value to the right size to ensure we get the
         // proper extension or truncation.
         Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
                                                    IntPtrTy, false);
         Constant *Null = Constant::getNullValue(C->getType());
-        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+        return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
       }
 
       // Only do this transformation if the int is intptrty in size, otherwise
       // there is a truncation or extension that we aren't modeling.
       if (CE0->getOpcode() == Instruction::PtrToInt) {
-        Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+        Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
         if (CE0->getType() == IntPtrTy) {
           Constant *C = CE0->getOperand(0);
           Constant *Null = Constant::getNullValue(C->getType());
-          return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+          return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
         }
       }
     }
 
     if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
-      if (TD && CE0->getOpcode() == CE1->getOpcode()) {
+      if (CE0->getOpcode() == CE1->getOpcode()) {
         if (CE0->getOpcode() == Instruction::IntToPtr) {
-          Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
+          Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
 
           // Convert the integer value to the right size to ensure we get the
           // proper extension or truncation.
@@ -1157,20 +1147,17 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
                                                       IntPtrTy, false);
           Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
                                                       IntPtrTy, false);
-          return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI);
+          return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);
         }
 
         // Only do this transformation if the int is intptrty in size, otherwise
         // there is a truncation or extension that we aren't modeling.
         if (CE0->getOpcode() == Instruction::PtrToInt) {
-          Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+          Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
           if (CE0->getType() == IntPtrTy &&
               CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
-            return ConstantFoldCompareInstOperands(Predicate,
-                                                   CE0->getOperand(0),
-                                                   CE1->getOperand(0),
-                                                   TD,
-                                                   TLI);
+            return ConstantFoldCompareInstOperands(
+                Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI);
           }
         }
       }
@@ -1180,16 +1167,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
     // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
     if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
         CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
-      Constant *LHS =
-        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,
-                                        TD, TLI);
-      Constant *RHS =
-        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,
-                                        TD, TLI);
+      Constant *LHS = ConstantFoldCompareInstOperands(
+          Predicate, CE0->getOperand(0), Ops1, DL, TLI);
+      Constant *RHS = ConstantFoldCompareInstOperands(
+          Predicate, CE0->getOperand(1), Ops1, DL, TLI);
       unsigned OpC =
         Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
       Constant *Ops[] = { LHS, RHS };
-      return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI);
+      return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, DL, TLI);
     }
   }
 
@@ -1451,26 +1436,16 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
         default: break;
         case Intrinsic::fabs:
           return ConstantFoldFP(fabs, V, Ty);
-#if HAVE_LOG2
         case Intrinsic::log2:
-          return ConstantFoldFP(log2, V, Ty);
-#endif
-#if HAVE_LOG
+          return ConstantFoldFP(Log2, V, Ty);
         case Intrinsic::log:
           return ConstantFoldFP(log, V, Ty);
-#endif
-#if HAVE_LOG10
         case Intrinsic::log10:
           return ConstantFoldFP(log10, V, Ty);
-#endif
-#if HAVE_EXP
         case Intrinsic::exp:
           return ConstantFoldFP(exp, V, Ty);
-#endif
-#if HAVE_EXP2
         case Intrinsic::exp2:
           return ConstantFoldFP(exp2, V, Ty);
-#endif
         case Intrinsic::floor:
           return ConstantFoldFP(floor, V, Ty);
         case Intrinsic::ceil:
@@ -1568,8 +1543,8 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
         APFloat Val(APFloat::IEEEhalf, Op->getValue());
 
         bool lost = false;
-        APFloat::opStatus status =
-          Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
+        APFloat::opStatus status = Val.convert(
+            Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
 
         // Conversion is always precise.
         (void)status;
diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp
index 1b74f8c..b529c1a 100644
--- a/contrib/llvm/lib/Analysis/CostModel.cpp
+++ b/contrib/llvm/lib/Analysis/CostModel.cpp
@@ -83,7 +83,8 @@ CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 bool
 CostModelAnalysis::runOnFunction(Function &F) {
  this->F = &F;
- TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+ auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+ TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;
 
  return false;
 }
diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp
index 9334ceb..d603b7b 100644
--- a/contrib/llvm/lib/Analysis/Delinearization.cpp
+++ b/contrib/llvm/lib/Analysis/Delinearization.cpp
@@ -59,14 +59,14 @@ public:
 
 void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
-  AU.addRequired<LoopInfo>();
+  AU.addRequired<LoopInfoWrapperPass>();
   AU.addRequired<ScalarEvolution>();
 }
 
 bool Delinearization::runOnFunction(Function &F) {
   this->F = &F;
   SE = &getAnalysis<ScalarEvolution>();
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   return false;
 }
 
@@ -141,7 +141,7 @@ char Delinearization::ID = 0;
 static const char delinearization_name[] = "Delinearization";
 INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true,
                       true)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true)
 
 FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; }
diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
index 092df5c..808a38b 100644
--- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -52,6 +52,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -59,6 +60,7 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -114,7 +116,7 @@ Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
 
 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da",
                       "Dependence Analysis", true, true)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(DependenceAnalysis, "da",
@@ -132,7 +134,7 @@ bool DependenceAnalysis::runOnFunction(Function &F) {
   this->F = &F;
   AA = &getAnalysis<AliasAnalysis>();
   SE = &getAnalysis<ScalarEvolution>();
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   return false;
 }
 
@@ -145,7 +147,7 @@ void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequiredTransitive<AliasAnalysis>();
   AU.addRequiredTransitive<ScalarEvolution>();
-  AU.addRequiredTransitive<LoopInfo>();
+  AU.addRequiredTransitive<LoopInfoWrapperPass>();
 }
 
 
@@ -225,13 +227,11 @@ bool Dependence::isScalar(unsigned level) const {
 //===----------------------------------------------------------------------===//
 // FullDependence methods
 
-FullDependence::FullDependence(Instruction *Source,
-                               Instruction *Destination,
+FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
                                bool PossiblyLoopIndependent,
-                               unsigned CommonLevels) :
-  Dependence(Source, Destination),
-  Levels(CommonLevels),
-  LoopIndependent(PossiblyLoopIndependent) {
+                               unsigned CommonLevels)
+    : Dependence(Source, Destination), Levels(CommonLevels),
+      LoopIndependent(PossiblyLoopIndependent) {
   Consistent = true;
   DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr;
 }
@@ -625,14 +625,12 @@ void Dependence::dump(raw_ostream &OS) const {
   OS << "!\n";
 }
 
-
-
-static
-AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
-                                                  const Value *A,
-                                                  const Value *B) {
-  const Value *AObj = GetUnderlyingObject(A);
-  const Value *BObj = GetUnderlyingObject(B);
+static AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
+                                                         const DataLayout &DL,
+                                                         const Value *A,
+                                                         const Value *B) {
+  const Value *AObj = GetUnderlyingObject(A, DL);
+  const Value *BObj = GetUnderlyingObject(B, DL);
   return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()),
                    BObj, AA->getTypeStoreSize(BObj->getType()));
 }
@@ -832,6 +830,14 @@ bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src,
     return isLoopInvariant(Src, LoopNest);
   const SCEV *Start = AddRec->getStart();
   const SCEV *Step = AddRec->getStepRecurrence(*SE);
+  const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
+  if (!isa<SCEVCouldNotCompute>(UB)) {
+    if (SE->getTypeSizeInBits(Start->getType()) <
+        SE->getTypeSizeInBits(UB->getType())) {
+      if (!AddRec->getNoWrapFlags())
+        return false;
+    }
+  }
   if (!isLoopInvariant(Step, LoopNest))
     return false;
   Loops.set(mapSrcLoop(AddRec->getLoop()));
@@ -850,6 +856,14 @@ bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst,
     return isLoopInvariant(Dst, LoopNest);
   const SCEV *Start = AddRec->getStart();
   const SCEV *Step = AddRec->getStepRecurrence(*SE);
+  const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
+  if (!isa<SCEVCouldNotCompute>(UB)) {
+    if (SE->getTypeSizeInBits(Start->getType()) <
+        SE->getTypeSizeInBits(UB->getType())) {
+      if (!AddRec->getNoWrapFlags())
+        return false;
+    }
+  }
   if (!isLoopInvariant(Step, LoopNest))
     return false;
   Loops.set(mapDstLoop(AddRec->getLoop()));
@@ -944,13 +958,15 @@ bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred,
 // All subscripts are all the same type.
 // Loop bound may be smaller (e.g., a char).
 // Should zero extend loop bound, since it's always >= 0.
-// This routine collects upper bound and extends if needed.
+// This routine collects upper bound and extends or truncates if needed.
+// Truncating is safe when subscripts are known not to wrap. Cases without
+// nowrap flags should have been rejected earlier.
 // Return null if no bound available.
 const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L,
                                                   Type *T) const {
   if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
     const SCEV *UB = SE->getBackedgeTakenCount(L);
-    return SE->getNoopOrZeroExtend(UB, T);
+    return SE->getTruncateOrZeroExtend(UB, T);
   }
   return nullptr;
 }
@@ -3314,7 +3330,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
   Value *SrcPtr = getPointerOperand(Src);
   Value *DstPtr = getPointerOperand(Dst);
 
-  switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) {
+  switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
+                                 SrcPtr)) {
   case AliasAnalysis::MayAlias:
   case AliasAnalysis::PartialAlias:
     // cannot analyse objects if we don't understand their aliasing.
@@ -3347,9 +3364,9 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
     DEBUG(dbgs() << "    SrcPtrSCEV = " << *SrcPtrSCEV << "\n");
     DEBUG(dbgs() << "    DstPtrSCEV = " << *DstPtrSCEV << "\n");
 
-    UsefulGEP =
-      isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
-      isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
+    UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+                isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
+                (SrcGEP->getNumOperands() == DstGEP->getNumOperands());
   }
   unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
   SmallVector<Subscript, 4> Pair(Pairs);
@@ -3472,8 +3489,7 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
                          LI->getLoopFor(Dst->getParent()),
                          Pair[SI].Loops);
       Result.Consistent = false;
-    }
-    else if (Pair[SI].Classification == Subscript::ZIV) {
+    } else if (Pair[SI].Classification == Subscript::ZIV) {
       // always separable
       Separable.set(SI);
     }
@@ -3525,8 +3541,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
       DEBUG(dbgs() << ", SIV\n");
       unsigned Level;
       const SCEV *SplitIter = nullptr;
-      if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
-                  Result, NewConstraint, SplitIter))
+      if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
+                  SplitIter))
         return nullptr;
       break;
     }
@@ -3574,8 +3590,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
           unsigned Level;
           const SCEV *SplitIter = nullptr;
           DEBUG(dbgs() << "SIV\n");
-          if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
-                      Result, NewConstraint, SplitIter))
+          if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
+                      SplitIter))
             return nullptr;
           ConstrainedLevels.set(Level);
           if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
@@ -3651,8 +3667,10 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
 
       // update Result.DV from constraint vector
       DEBUG(dbgs() << "    updating\n");
-      for (int SJ = ConstrainedLevels.find_first();
-           SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) {
+      for (int SJ = ConstrainedLevels.find_first(); SJ >= 0;
+           SJ = ConstrainedLevels.find_next(SJ)) {
+        if (SJ > (int)CommonLevels)
+          break;
         updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
         if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
           return nullptr;
@@ -3759,8 +3777,8 @@ const  SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,
   assert(isLoadOrStore(Dst));
   Value *SrcPtr = getPointerOperand(Src);
   Value *DstPtr = getPointerOperand(Dst);
-  assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) ==
-         AliasAnalysis::MustAlias);
+  assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
+                                SrcPtr) == AliasAnalysis::MustAlias);
 
   // establish loop nesting levels
   establishNestingLevels(Src, Dst);
@@ -3775,9 +3793,9 @@ const  SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,
       SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
     const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
     const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
-    UsefulGEP =
-      isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
-      isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
+    UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+                isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
+                (SrcGEP->getNumOperands() == DstGEP->getNumOperands());
   }
   unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
   SmallVector<Subscript, 4> Pair(Pairs);
diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
new file mode 100644
index 0000000..e5ee295
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -0,0 +1,337 @@
+//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines divergence analysis which determines whether a branch in a
+// GPU program is divergent. It can help branch optimizations such as jump
+// threading and loop unswitching to make better decisions.
+//
+// GPU programs typically use the SIMD execution model, where multiple threads
+// in the same execution group have to execute in lock-step. Therefore, if the
+// code contains divergent branches (i.e., threads in a group do not agree on
+// which path of the branch to take), the group of threads has to execute all
+// the paths from that branch with different subsets of threads enabled until
+// they converge at the immediately post-dominating BB of the paths.
+//
+// Due to this execution model, some optimizations such as jump
+// threading and loop unswitching can be unfortunately harmful when performed on
+// divergent branches. Therefore, an analysis that computes which branches in a
+// GPU program are divergent can help the compiler to selectively run these
+// optimizations.
+//
+// This file defines divergence analysis which computes a conservative but
+// non-trivial approximation of all divergent branches in a GPU program. It
+// partially implements the approach described in
+//
+//   Divergence Analysis
+//   Sampaio, Souza, Collange, Pereira
+//   TOPLAS '13
+//
+// The divergence analysis identifies the sources of divergence (e.g., special
+// variables that hold the thread ID), and recursively marks variables that are
+// data or sync dependent on a source of divergence as divergent.
+//
+// While data dependency is a well-known concept, the notion of sync dependency
+// is worth more explanation. Sync dependence characterizes the control flow
+// aspect of the propagation of branch divergence. For example,
+//
+//   %cond = icmp slt i32 %tid, 10
+//   br i1 %cond, label %then, label %else
+// then:
+//   br label %merge
+// else:
+//   br label %merge
+// merge:
+//   %a = phi i32 [ 0, %then ], [ 1, %else ]
+//
+// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
+// because %tid is not on its use-def chains, %a is sync dependent on %tid
+// because the branch "br i1 %cond" depends on %tid and affects which value %a
+// is assigned to.
+//
+// The current implementation has the following limitations:
+// 1. intra-procedural. It conservatively considers the arguments of a
+//    non-kernel-entry function and the return value of a function call as
+//    divergent.
+// 2. memory as black box. It conservatively considers values loaded from
+//    generic or local address as divergent. This can be improved by leveraging
+//    pointer analysis.
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+#include "llvm/IR/Dominators.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "divergence"
+
+namespace {
+class DivergenceAnalysis : public FunctionPass {
+public:
+  static char ID;
+
+  DivergenceAnalysis() : FunctionPass(ID) {
+    initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<PostDominatorTree>();
+    AU.setPreservesAll();
+  }
+
+  bool runOnFunction(Function &F) override;
+
+  // Print all divergent branches in the function.
+  void print(raw_ostream &OS, const Module *) const override;
+
+  // Returns true if V is divergent.
+  bool isDivergent(const Value *V) const { return DivergentValues.count(V); }
+  // Returns true if V is uniform/non-divergent.
+  bool isUniform(const Value *V) const { return !isDivergent(V); }
+
+private:
+  // Stores all divergent values.
+  DenseSet<const Value *> DivergentValues;
+};
+} // End of anonymous namespace
+
+// Register this pass.
+char DivergenceAnalysis::ID = 0;
+INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
+                      false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
+                    false, true)
+
+namespace {
+
+class DivergencePropagator {
+public:
+  DivergencePropagator(Function &F, TargetTransformInfo &TTI,
+                       DominatorTree &DT, PostDominatorTree &PDT,
+                       DenseSet<const Value *> &DV)
+      : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
+  void populateWithSourcesOfDivergence();
+  void propagate();
+
+private:
+  // A helper function that explores data dependents of V.
+  void exploreDataDependency(Value *V);
+  // A helper function that explores sync dependents of TI.
+  void exploreSyncDependency(TerminatorInst *TI);
+  // Computes the influence region from Start to End. This region includes all
+  // basic blocks on any path from Start to End.
+  void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
+                              DenseSet<BasicBlock *> &InfluenceRegion);
+  // Finds all users of I that are outside the influence region, and add these
+  // users to Worklist.
+  void findUsersOutsideInfluenceRegion(
+      Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
+
+  Function &F;
+  TargetTransformInfo &TTI;
+  DominatorTree &DT;
+  PostDominatorTree &PDT;
+  std::vector<Value *> Worklist; // Stack for DFS.
+  DenseSet<const Value *> &DV; // Stores all divergent values.
+};
+
+void DivergencePropagator::populateWithSourcesOfDivergence() {
+  Worklist.clear();
+  DV.clear();
+  for (auto &I : inst_range(F)) {
+    if (TTI.isSourceOfDivergence(&I)) {
+      Worklist.push_back(&I);
+      DV.insert(&I);
+    }
+  }
+  for (auto &Arg : F.args()) {
+    if (TTI.isSourceOfDivergence(&Arg)) {
+      Worklist.push_back(&Arg);
+      DV.insert(&Arg);
+    }
+  }
+}
+
+void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {
+  // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
+  // immediate post dominator are divergent. This rule handles if-then-else
+  // patterns. For example,
+  //
+  // if (tid < 5)
+  //   a1 = 1;
+  // else
+  //   a2 = 2;
+  // a = phi(a1, a2); // sync dependent on (tid < 5)
+  BasicBlock *ThisBB = TI->getParent();
+  BasicBlock *IPostDom = PDT.getNode(ThisBB)->getIDom()->getBlock();
+  if (IPostDom == nullptr)
+    return;
+
+  for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
+    // A PHINode is uniform if it returns the same value no matter which path is
+    // taken.
+    if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(I).second)
+      Worklist.push_back(I);
+  }
+
+  // Propagation rule 2: if a value defined in a loop is used outside, the user
+  // is sync dependent on the condition of the loop exits that dominate the
+  // user. For example,
+  //
+  // int i = 0;
+  // do {
+  //   i++;
+  //   if (foo(i)) ... // uniform
+  // } while (i < tid);
+  // if (bar(i)) ...   // divergent
+  //
+  // A program may contain unstructured loops. Therefore, we cannot leverage
+  // LoopInfo, which only recognizes natural loops.
+  //
+  // The algorithm used here handles both natural and unstructured loops.  Given
+  // a branch TI, we first compute its influence region, the union of all simple
+  // paths from TI to its immediate post dominator (IPostDom). Then, we search
+  // for all the values defined in the influence region but used outside. All
+  // these users are sync dependent on TI.
+  DenseSet<BasicBlock *> InfluenceRegion;
+  computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
+  // An insight that can speed up the search process is that all the in-region
+  // values that are used outside must dominate TI. Therefore, instead of
+  // searching every basic blocks in the influence region, we search all the
+  // dominators of TI until it is outside the influence region.
+  BasicBlock *InfluencedBB = ThisBB;
+  while (InfluenceRegion.count(InfluencedBB)) {
+    for (auto &I : *InfluencedBB)
+      findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+    DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
+    if (IDomNode == nullptr)
+      break;
+    InfluencedBB = IDomNode->getBlock();
+  }
+}
+
+void DivergencePropagator::findUsersOutsideInfluenceRegion(
+    Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
+  for (User *U : I.users()) {
+    Instruction *UserInst = cast<Instruction>(U);
+    if (!InfluenceRegion.count(UserInst->getParent())) {
+      if (DV.insert(UserInst).second)
+        Worklist.push_back(UserInst);
+    }
+  }
+}
+
+void DivergencePropagator::computeInfluenceRegion(
+    BasicBlock *Start, BasicBlock *End,
+    DenseSet<BasicBlock *> &InfluenceRegion) {
+  assert(PDT.properlyDominates(End, Start) &&
+         "End does not properly dominate Start");
+  std::vector<BasicBlock *> InfluenceStack;
+  InfluenceStack.push_back(Start);
+  InfluenceRegion.insert(Start);
+  while (!InfluenceStack.empty()) {
+    BasicBlock *BB = InfluenceStack.back();
+    InfluenceStack.pop_back();
+    for (BasicBlock *Succ : successors(BB)) {
+      if (End != Succ && InfluenceRegion.insert(Succ).second)
+        InfluenceStack.push_back(Succ);
+    }
+  }
+}
+
+void DivergencePropagator::exploreDataDependency(Value *V) {
+  // Follow def-use chains of V.
+  for (User *U : V->users()) {
+    Instruction *UserInst = cast<Instruction>(U);
+    if (DV.insert(UserInst).second)
+      Worklist.push_back(UserInst);
+  }
+}
+
+void DivergencePropagator::propagate() {
+  // Traverse the dependency graph using DFS.
+  while (!Worklist.empty()) {
+    Value *V = Worklist.back();
+    Worklist.pop_back();
+    if (TerminatorInst *TI = dyn_cast<TerminatorInst>(V)) {
+      // Terminators with less than two successors won't introduce sync
+      // dependency. Ignore them.
+      if (TI->getNumSuccessors() > 1)
+        exploreSyncDependency(TI);
+    }
+    exploreDataDependency(V);
+  }
+}
+
+} /// end namespace anonymous
+
+FunctionPass *llvm::createDivergenceAnalysisPass() {
+  return new DivergenceAnalysis();
+}
+
+bool DivergenceAnalysis::runOnFunction(Function &F) {
+  auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+  if (TTIWP == nullptr)
+    return false;
+
+  TargetTransformInfo &TTI = TTIWP->getTTI(F);
+  // Fast path: if the target does not have branch divergence, we do not mark
+  // any branch as divergent.
+  if (!TTI.hasBranchDivergence())
+    return false;
+
+  DivergentValues.clear();
+  DivergencePropagator DP(F, TTI,
+                          getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+                          getAnalysis<PostDominatorTree>(), DivergentValues);
+  DP.populateWithSourcesOfDivergence();
+  DP.propagate();
+  return false;
+}
+
+void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
+  if (DivergentValues.empty())
+    return;
+  const Value *FirstDivergentValue = *DivergentValues.begin();
+  const Function *F;
+  if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
+    F = Arg->getParent();
+  } else if (const Instruction *I =
+                 dyn_cast<Instruction>(FirstDivergentValue)) {
+    F = I->getParent()->getParent();
+  } else {
+    llvm_unreachable("Only arguments and instructions can be divergent");
+  }
+
+  // Dumps all divergent values in F, arguments and then instructions.
+  for (auto &Arg : F->args()) {
+    if (DivergentValues.count(&Arg))
+      OS << "DIVERGENT:  " << Arg << "\n";
+  }
+  // Iterate instructions using inst_range to ensure a deterministic order.
+  for (auto &I : inst_range(F)) {
+    if (DivergentValues.count(&I))
+      OS << "DIVERGENT:" << I << "\n";
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/FunctionTargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/FunctionTargetTransformInfo.cpp
deleted file mode 100644
index a686bec..0000000
--- a/contrib/llvm/lib/Analysis/FunctionTargetTransformInfo.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- llvm/Analysis/FunctionTargetTransformInfo.h --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass wraps a TargetTransformInfo in a FunctionPass so that it can
-// forward along the current Function so that we can make target specific
-// decisions based on the particular subtarget specified for each Function.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/InitializePasses.h"
-#include "llvm/Analysis/FunctionTargetTransformInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "function-tti"
-static const char ftti_name[] = "Function TargetTransformInfo";
-INITIALIZE_PASS_BEGIN(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_END(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true)
-char FunctionTargetTransformInfo::ID = 0;
-
-namespace llvm {
-FunctionPass *createFunctionTargetTransformInfoPass() {
-  return new FunctionTargetTransformInfo();
-}
-}
-
-FunctionTargetTransformInfo::FunctionTargetTransformInfo()
-  : FunctionPass(ID), Fn(nullptr), TTI(nullptr) {
-  initializeFunctionTargetTransformInfoPass(*PassRegistry::getPassRegistry());
-}
-
-void FunctionTargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AU.addRequired<TargetTransformInfo>();
-}
-
-void FunctionTargetTransformInfo::releaseMemory() {}
-
-bool FunctionTargetTransformInfo::runOnFunction(Function &F) {
-  Fn = &F;
-  TTI = &getAnalysis<TargetTransformInfo>();
-  return false;
-}
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
index ded1de7..65ba1c7 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -49,7 +49,7 @@ public:
   explicit CGPassManager() 
     : ModulePass(ID), PMDataManager() { }
 
-  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// Execute all of the passes scheduled for execution.  Keep track of
   /// whether any of the passes modifies the module, and if so, return true.
   bool runOnModule(Module &M) override;
 
@@ -142,9 +142,8 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
   FPPassManager *FPP = (FPPassManager*)P;
   
   // Run pass P on all functions in the current SCC.
-  for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
-       I != E; ++I) {
-    if (Function *F = (*I)->getFunction()) {
+  for (CallGraphNode *CGN : CurSCC) {
+    if (Function *F = CGN->getFunction()) {
       dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
       {
         TimeRegion PassTimer(getPassTimer(FPP));
@@ -165,7 +164,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
 }
 
 
-/// RefreshCallGraph - Scan the functions in the specified CFG and resync the
+/// Scan the functions in the specified CFG and resync the
 /// callgraph with the call sites found in it.  This is used after
 /// FunctionPasses have potentially munged the callgraph, and can be used after
 /// CallGraphSCC passes to verify that they correctly updated the callgraph.
@@ -181,9 +180,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
   
   DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
                << " nodes:\n";
-        for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
-             I != E; ++I)
-          (*I)->dump();
+        for (CallGraphNode *CGN : CurSCC)
+          CGN->dump();
         );
 
   bool MadeChange = false;
@@ -214,10 +212,13 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
           // list of the same call.
           CallSites.count(I->first) ||
 
-          // If the call edge is not from a call or invoke, then the function
-          // pass RAUW'd a call with another value.  This can happen when
-          // constant folding happens of well known functions etc.
-          !CallSite(I->first)) {
+          // If the call edge is not from a call or invoke, or it is a
+          // instrinsic call, then the function pass RAUW'd a call with 
+          // another value. This can happen when constant folding happens
+          // of well known functions etc.
+          !CallSite(I->first) ||
+           (CallSite(I->first).getCalledFunction() &&
+            CallSite(I->first).getCalledFunction()->isIntrinsic())) {
         assert(!CheckingMode &&
                "CallGraphSCCPass did not update the CallGraph correctly!");
         
@@ -357,9 +358,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
 
   DEBUG(if (MadeChange) {
           dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
-          for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
-            I != E; ++I)
-              (*I)->dump();
+          for (CallGraphNode *CGN : CurSCC)
+            CGN->dump();
           if (DevirtualizedCall)
             dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
 
@@ -372,15 +372,15 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
   return DevirtualizedCall;
 }
 
-/// RunAllPassesOnSCC -  Execute the body of the entire pass manager on the
-/// specified SCC.  This keeps track of whether a function pass devirtualizes
+/// Execute the body of the entire pass manager on the specified SCC.
+/// This keeps track of whether a function pass devirtualizes
 /// any calls and returns it in DevirtualizedCall.
 bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
                                       bool &DevirtualizedCall) {
   bool Changed = false;
   
-  // CallGraphUpToDate - Keep track of whether the callgraph is known to be
-  // up-to-date or not.  The CGSSC pass manager runs two types of passes:
+  // Keep track of whether the callgraph is known to be up-to-date or not.
+  // The CGSSC pass manager runs two types of passes:
   // CallGraphSCC Passes and other random function passes.  Because other
   // random function passes are not CallGraph aware, they may clobber the
   // call graph by introducing new calls or deleting other ones.  This flag
@@ -433,7 +433,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
   return Changed;
 }
 
-/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// Execute all of the passes scheduled for execution.  Keep track of
 /// whether any of the passes modifies the module, and if so, return true.
 bool CGPassManager::runOnModule(Module &M) {
   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
@@ -519,7 +519,7 @@ bool CGPassManager::doFinalization(CallGraph &CG) {
 // CallGraphSCC Implementation
 //===----------------------------------------------------------------------===//
 
-/// ReplaceNode - This informs the SCC and the pass manager that the specified
+/// This informs the SCC and the pass manager that the specified
 /// Old node has been deleted, and New is to be used in its place.
 void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
   assert(Old != New && "Should not replace node with self");
@@ -578,8 +578,8 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS,
   CGP->add(this);
 }
 
-/// getAnalysisUsage - For this class, we declare that we require and preserve
-/// the call graph.  If the derived class implements this method, it should
+/// For this class, we declare that we require and preserve the call graph.
+/// If the derived class implements this method, it should
 /// always explicitly call the implementation here.
 void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<CallGraphWrapperPass>();
@@ -609,9 +609,9 @@ namespace {
 
     bool runOnSCC(CallGraphSCC &SCC) override {
       Out << Banner;
-      for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
-        if ((*I)->getFunction())
-          (*I)->getFunction()->print(Out);
+      for (CallGraphNode *CGN : SCC) {
+        if (CGN->getFunction())
+          CGN->getFunction()->print(Out);
         else
           Out << "\nPrinting <null> Function\n";
       }
diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
index 607c068..018ae99 100644
--- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -96,7 +96,7 @@ namespace {
     }
 
     bool runOnModule(Module &M) override {
-      InitializeAliasAnalysis(this);
+      InitializeAliasAnalysis(this, &M.getDataLayout());
 
       // Find non-addr taken globals.
       AnalyzeGlobals(M);
@@ -269,7 +269,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
     } else if (Operator::getOpcode(I) == Instruction::BitCast) {
       if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
         return true;
-    } else if (CallSite CS = I) {
+    } else if (auto CS = CallSite(I)) {
       // Make sure that this is just the function being called, not that it is
       // passing into the function.
       if (!CS.isCallee(&U)) {
@@ -322,7 +322,8 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
         continue;
 
       // Check the value being stored.
-      Value *Ptr = GetUnderlyingObject(SI->getOperand(0));
+      Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
+                                       GV->getParent()->getDataLayout());
 
       if (!isAllocLikeFn(Ptr, TLI))
         return false;  // Too hard to analyze.
@@ -481,8 +482,8 @@ AliasAnalysis::AliasResult
 GlobalsModRef::alias(const Location &LocA,
                      const Location &LocB) {
   // Get the base object these pointers point to.
-  const Value *UV1 = GetUnderlyingObject(LocA.Ptr);
-  const Value *UV2 = GetUnderlyingObject(LocB.Ptr);
+  const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL);
+  const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL);
 
   // If either of the underlying values is a global, they may be non-addr-taken
   // globals, which we can answer queries about.
@@ -540,8 +541,9 @@ GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
 
   // If we are asking for mod/ref info of a direct call with a pointer to a
   // global we are tracking, return information if we have it.
+  const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
   if (const GlobalValue *GV =
-        dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr)))
+          dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
     if (GV->hasLocalLinkage())
       if (const Function *F = CS.getCalledFunction())
         if (NonAddressTakenGlobals.count(GV))
diff --git a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp
index 86e7fc2..2bd959d 100644
--- a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp
@@ -45,9 +45,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   typedef InstVisitor<CallAnalyzer, bool> Base;
   friend class InstVisitor<CallAnalyzer, bool>;
 
-  // DataLayout if available, or null.
-  const DataLayout *const DL;
-
   /// The TargetTransformInfo available for this compilation.
   const TargetTransformInfo &TTI;
 
@@ -67,6 +64,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool ContainsNoDuplicateCall;
   bool HasReturn;
   bool HasIndirectBr;
+  bool HasFrameEscape;
 
   /// Number of bytes allocated statically by the callee.
   uint64_t AllocatedSize;
@@ -145,18 +143,18 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool visitUnreachableInst(UnreachableInst &I);
 
 public:
-  CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
-               AssumptionCacheTracker *ACT, Function &Callee, int Threshold)
-      : DL(DL), TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0),
+  CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
+               Function &Callee, int Threshold)
+      : TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0),
         IsCallerRecursive(false), IsRecursiveCall(false),
         ExposesReturnsTwice(false), HasDynamicAlloca(false),
         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
-        AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
-        FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
-        NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
-        NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
-        NumInstructionsSimplified(0), SROACostSavings(0),
-        SROACostSavingsLost(0) {}
+        HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
+        NumVectorInstructions(0), FiftyPercentVectorBonus(0),
+        TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
+        NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+        NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+        SROACostSavings(0), SROACostSavingsLost(0) {}
 
   bool analyzeCall(CallSite CS);
 
@@ -244,10 +242,8 @@ bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
 /// Returns false if unable to compute the offset for any reason. Respects any
 /// simplified values known during the analysis of this callsite.
 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
-  if (!DL)
-    return false;
-
-  unsigned IntPtrWidth = DL->getPointerSizeInBits();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  unsigned IntPtrWidth = DL.getPointerSizeInBits();
   assert(IntPtrWidth == Offset.getBitWidth());
 
   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
@@ -263,12 +259,12 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
     // Handle a struct index, which adds its field offset to the pointer.
     if (StructType *STy = dyn_cast<StructType>(*GTI)) {
       unsigned ElementIdx = OpC->getZExtValue();
-      const StructLayout *SL = DL->getStructLayout(STy);
+      const StructLayout *SL = DL.getStructLayout(STy);
       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
       continue;
     }
 
-    APInt TypeSize(IntPtrWidth, DL->getTypeAllocSize(GTI.getIndexedType()));
+    APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
   }
   return true;
@@ -289,9 +285,9 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
 
   // Accumulate the allocated size.
   if (I.isStaticAlloca()) {
+    const DataLayout &DL = F.getParent()->getDataLayout();
     Type *Ty = I.getAllocatedType();
-    AllocatedSize += (DL ? DL->getTypeAllocSize(Ty) :
-                      Ty->getPrimitiveSizeInBits());
+    AllocatedSize += DL.getTypeAllocSize(Ty);
   }
 
   // We will happily inline static alloca instructions.
@@ -327,7 +323,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
 
   // Try to fold GEPs of constant-offset call site argument pointers. This
   // requires target data and inbounds GEPs.
-  if (DL && I.isInBounds()) {
+  if (I.isInBounds()) {
     // Check if we have a base + offset for the pointer.
     Value *Ptr = I.getPointerOperand();
     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
@@ -396,7 +392,6 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) {
 }
 
 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
-  const DataLayout *DL = I.getDataLayout();
   // Propagate constants through ptrtoint.
   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
   if (!COp)
@@ -410,7 +405,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
   // Track base/offset pairs when converted to a plain integer provided the
   // integer is large enough to represent the pointer.
   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
-  if (DL && IntegerSize >= DL->getPointerSizeInBits()) {
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  if (IntegerSize >= DL.getPointerSizeInBits()) {
     std::pair<Value *, APInt> BaseAndOffset
       = ConstantOffsetPtrs.lookup(I.getOperand(0));
     if (BaseAndOffset.first)
@@ -433,7 +429,6 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
 }
 
 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
-  const DataLayout *DL = I.getDataLayout();
   // Propagate constants through ptrtoint.
   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
   if (!COp)
@@ -448,7 +443,8 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
   // modifications provided the integer is not too large.
   Value *Op = I.getOperand(0);
   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
-  if (DL && IntegerSize <= DL->getPointerSizeInBits()) {
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  if (IntegerSize <= DL.getPointerSizeInBits()) {
     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
     if (BaseAndOffset.first)
       ConstantOffsetPtrs[&I] = BaseAndOffset;
@@ -485,12 +481,14 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
   Constant *COp = dyn_cast<Constant>(Operand);
   if (!COp)
     COp = SimplifiedValues.lookup(Operand);
-  if (COp)
+  if (COp) {
+    const DataLayout &DL = F.getParent()->getDataLayout();
     if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
                                                COp, DL)) {
       SimplifiedValues[&I] = C;
       return true;
     }
+  }
 
   // Disable any SROA on the argument to arbitrary unary operators.
   disableSROA(Operand);
@@ -595,13 +593,20 @@ bool CallAnalyzer::visitSub(BinaryOperator &I) {
 
 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  const DataLayout &DL = F.getParent()->getDataLayout();
   if (!isa<Constant>(LHS))
     if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
       LHS = SimpleLHS;
   if (!isa<Constant>(RHS))
     if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
       RHS = SimpleRHS;
-  Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
+  Value *SimpleV = nullptr;
+  if (auto FI = dyn_cast<FPMathOperator>(&I))
+    SimpleV =
+        SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
+  else
+    SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
+
   if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
     SimplifiedValues[&I] = C;
     return true;
@@ -617,7 +622,7 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
 bool CallAnalyzer::visitLoad(LoadInst &I) {
   Value *SROAArg;
   DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
     if (I.isSimple()) {
       accumulateSROACost(CostIt, InlineConstants::InstrCost);
       return true;
@@ -632,7 +637,7 @@ bool CallAnalyzer::visitLoad(LoadInst &I) {
 bool CallAnalyzer::visitStore(StoreInst &I) {
   Value *SROAArg;
   DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
     if (I.isSimple()) {
       accumulateSROACost(CostIt, InlineConstants::InstrCost);
       return true;
@@ -713,8 +718,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
 
 bool CallAnalyzer::visitCallSite(CallSite CS) {
   if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
-      !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                      Attribute::ReturnsTwice)) {
+      !F.hasFnAttribute(Attribute::ReturnsTwice)) {
     // This aborts the entire analysis.
     ExposesReturnsTwice = true;
     return false;
@@ -740,6 +744,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
       case Intrinsic::memmove:
         // SROA can usually chew through these intrinsics, but they aren't free.
         return false;
+      case Intrinsic::frameescape:
+        HasFrameEscape = true;
+        return false;
       }
     }
 
@@ -783,7 +790,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
   // during devirtualization and so we want to give it a hefty bonus for
   // inlining, but cap that bonus in the event that inlining wouldn't pan
   // out. Pretend to inline the function, with a custom threshold.
-  CallAnalyzer CA(DL, TTI, ACT, *F, InlineConstants::IndirectCallThreshold);
+  CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold);
   if (CA.analyzeCall(CS)) {
     // We were able to inline the indirect call! Subtract the cost from the
     // bonus we want to apply, but don't go below zero.
@@ -907,6 +914,25 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
       ++NumVectorInstructions;
 
+    // If the instruction is floating point, and the target says this operation is
+    // expensive or the function has the "use-soft-float" attribute, this may
+    // eventually become a library call.  Treat the cost as such.
+    if (I->getType()->isFloatingPointTy()) {
+      bool hasSoftFloatAttr = false;
+
+      // If the function has the "use-soft-float" attribute, mark it as expensive.
+      if (F.hasFnAttribute("use-soft-float")) {
+        Attribute Attr = F.getFnAttribute("use-soft-float");
+        StringRef Val = Attr.getValueAsString();
+        if (Val == "true")
+          hasSoftFloatAttr = true;
+      }
+
+      if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
+          hasSoftFloatAttr)
+        Cost += InlineConstants::CallPenalty;
+    }
+
     // If the instruction simplified to a constant, there is no cost to this
     // instruction. Visit the instructions using our InstVisitor to account for
     // all of the per-instruction logic. The visit tree returns true if we
@@ -919,7 +945,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
 
     // If the visit this instruction detected an uninlinable pattern, abort.
     if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
-        HasIndirectBr)
+        HasIndirectBr || HasFrameEscape)
       return false;
 
     // If the caller is a recursive function then we don't want to inline
@@ -929,16 +955,9 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
         AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
       return false;
 
-    if (NumVectorInstructions > NumInstructions/2)
-      VectorBonus = FiftyPercentVectorBonus;
-    else if (NumVectorInstructions > NumInstructions/10)
-      VectorBonus = TenPercentVectorBonus;
-    else
-      VectorBonus = 0;
-
-    // Check if we've past the threshold so we don't spin in huge basic
-    // blocks that will never inline.
-    if (Cost > (Threshold + VectorBonus))
+    // Check if we've past the maximum possible threshold so we don't spin in
+    // huge basic blocks that will never inline.
+    if (Cost > Threshold)
       return false;
   }
 
@@ -952,10 +971,11 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
 /// no constant offsets applied.
 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
-  if (!DL || !V->getType()->isPointerTy())
+  if (!V->getType()->isPointerTy())
     return nullptr;
 
-  unsigned IntPtrWidth = DL->getPointerSizeInBits();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  unsigned IntPtrWidth = DL.getPointerSizeInBits();
   APInt Offset = APInt::getNullValue(IntPtrWidth);
 
   // Even though we don't look through PHI nodes, we could be called on an
@@ -979,7 +999,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
   } while (Visited.insert(V).second);
 
-  Type *IntPtrTy = DL->getIntPtrType(V->getContext());
+  Type *IntPtrTy = DL.getIntPtrType(V->getContext());
   return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
 }
 
@@ -993,33 +1013,42 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
 bool CallAnalyzer::analyzeCall(CallSite CS) {
   ++NumCallsAnalyzed;
 
-  // Track whether the post-inlining function would have more than one basic
-  // block. A single basic block is often intended for inlining. Balloon the
-  // threshold by 50% until we pass the single-BB phase.
-  bool SingleBB = true;
-  int SingleBBBonus = Threshold / 2;
-  Threshold += SingleBBBonus;
-
   // Perform some tweaks to the cost and threshold based on the direct
   // callsite information.
 
   // We want to more aggressively inline vector-dense kernels, so up the
   // threshold, and we'll lower it if the % of vector instructions gets too
-  // low.
+  // low. Note that these bonuses are some what arbitrary and evolved over time
+  // by accident as much as because they are principled bonuses.
+  //
+  // FIXME: It would be nice to remove all such bonuses. At least it would be
+  // nice to base the bonus values on something more scientific.
   assert(NumInstructions == 0);
   assert(NumVectorInstructions == 0);
-  FiftyPercentVectorBonus = Threshold;
-  TenPercentVectorBonus = Threshold / 2;
+  FiftyPercentVectorBonus = 3 * Threshold / 2;
+  TenPercentVectorBonus = 3 * Threshold / 4;
+  const DataLayout &DL = F.getParent()->getDataLayout();
+
+  // Track whether the post-inlining function would have more than one basic
+  // block. A single basic block is often intended for inlining. Balloon the
+  // threshold by 50% until we pass the single-BB phase.
+  bool SingleBB = true;
+  int SingleBBBonus = Threshold / 2;
+
+  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
+  // this Threshold any time, and cost cannot decrease, we can stop processing
+  // the rest of the function body.
+  Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
 
   // Give out bonuses per argument, as the instructions setting them up will
   // be gone after inlining.
   for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
-    if (DL && CS.isByValArgument(I)) {
+    if (CS.isByValArgument(I)) {
       // We approximate the number of loads and stores needed by dividing the
       // size of the byval type by the target's pointer size.
       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
-      unsigned TypeSize = DL->getTypeSizeInBits(PTy->getElementType());
-      unsigned PointerSize = DL->getPointerSizeInBits();
+      unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
+      unsigned PointerSize = DL.getPointerSizeInBits();
       // Ceiling division.
       unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
 
@@ -1053,9 +1082,9 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
   Instruction *Instr = CS.getInstruction();
   if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
     if (isa<UnreachableInst>(II->getNormalDest()->begin()))
-      Threshold = 1;
+      Threshold = 0;
   } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
-    Threshold = 1;
+    Threshold = 0;
 
   // If this function uses the coldcc calling convention, prefer not to inline
   // it.
@@ -1127,7 +1156,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
   for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
     // Bail out the moment we cross the threshold. This means we'll under-count
     // the cost, but only when undercounting doesn't matter.
-    if (Cost > (Threshold + VectorBonus))
+    if (Cost > Threshold)
       break;
 
     BasicBlock *BB = BBWorklist[Idx];
@@ -1147,7 +1176,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
     // returns false, and we can bail on out.
     if (!analyzeBlock(BB, EphValues)) {
       if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
-          HasIndirectBr)
+          HasIndirectBr || HasFrameEscape)
         return false;
 
       // If the caller is a recursive function then we don't want to inline
@@ -1205,7 +1234,13 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
     return false;
 
-  Threshold += VectorBonus;
+  // We applied the maximum possible vector bonus at the beginning. Now,
+  // subtract the excess bonus, if any, from the Threshold before
+  // comparing against Cost.
+  if (NumVectorInstructions <= NumInstructions / 10)
+    Threshold -= FiftyPercentVectorBonus;
+  else if (NumVectorInstructions <= NumInstructions / 2)
+    Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
 
   return Cost < Threshold;
 }
@@ -1220,19 +1255,19 @@ void CallAnalyzer::dump() {
   DEBUG_PRINT_STAT(NumConstantPtrCmps);
   DEBUG_PRINT_STAT(NumConstantPtrDiffs);
   DEBUG_PRINT_STAT(NumInstructionsSimplified);
+  DEBUG_PRINT_STAT(NumInstructions);
   DEBUG_PRINT_STAT(SROACostSavings);
   DEBUG_PRINT_STAT(SROACostSavingsLost);
   DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
   DEBUG_PRINT_STAT(Cost);
   DEBUG_PRINT_STAT(Threshold);
-  DEBUG_PRINT_STAT(VectorBonus);
 #undef DEBUG_PRINT_STAT
 }
 #endif
 
 INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
                       true, true)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
                     true, true)
@@ -1246,12 +1281,12 @@ InlineCostAnalysis::~InlineCostAnalysis() {}
 void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<AssumptionCacheTracker>();
-  AU.addRequired<TargetTransformInfo>();
+  AU.addRequired<TargetTransformInfoWrapperPass>();
   CallGraphSCCPass::getAnalysisUsage(AU);
 }
 
 bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
-  TTI = &getAnalysis<TargetTransformInfo>();
+  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
   ACT = &getAnalysis<AssumptionCacheTracker>();
   return false;
 }
@@ -1262,16 +1297,18 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
 
 /// \brief Test that two functions either have or have not the given attribute
 ///        at the same time.
-static bool attributeMatches(Function *F1, Function *F2,
-                             Attribute::AttrKind Attr) {
-  return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr);
+template<typename AttrKind>
+static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
+  return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
 }
 
 /// \brief Test that there are no attribute conflicts between Caller and Callee
 ///        that prevent inlining.
 static bool functionsHaveCompatibleAttributes(Function *Caller,
                                               Function *Callee) {
-  return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
+  return attributeMatches(Caller, Callee, "target-cpu") &&
+         attributeMatches(Caller, Callee, "target-features") &&
+         attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
          attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
          attributeMatches(Caller, Callee, Attribute::SanitizeThread);
 }
@@ -1309,8 +1346,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
         << "...\n");
 
-  CallAnalyzer CA(Callee->getDataLayout(), *TTI,
-                  ACT, *Callee, Threshold);
+  CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold);
   bool ShouldInline = CA.analyzeCall(CS);
 
   DEBUG(CA.dump());
@@ -1325,9 +1361,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
 }
 
 bool InlineCostAnalysis::isInlineViable(Function &F) {
-  bool ReturnsTwice =
-    F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                   Attribute::ReturnsTwice);
+  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
     // Disallow inlining of functions which contain indirect branches or
     // blockaddresses.
@@ -1349,6 +1383,13 @@ bool InlineCostAnalysis::isInlineViable(Function &F) {
       if (!ReturnsTwice && CS.isCall() &&
           cast<CallInst>(CS.getInstruction())->canReturnTwice())
         return false;
+
+      // Disallow inlining functions that call @llvm.frameescape. Doing this
+      // correctly would require major changes to the inliner.
+      if (CS.getCalledFunction() &&
+          CS.getCalledFunction()->getIntrinsicID() ==
+              llvm::Intrinsic::frameescape)
+        return false;
     }
   }
 
diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp
index 6b5f370..b88b249 100644
--- a/contrib/llvm/lib/Analysis/IVUsers.cpp
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@@ -22,6 +22,7 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -33,7 +34,7 @@ using namespace llvm;
 char IVUsers::ID = 0;
 INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
                       "Induction Variable Users", false, true)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(IVUsers, "iv-users",
@@ -113,6 +114,8 @@ static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
 /// return true.  Otherwise, return false.
 bool IVUsers::AddUsersImpl(Instruction *I,
                            SmallPtrSetImpl<Loop*> &SimpleLoopNests) {
+  const DataLayout &DL = I->getModule()->getDataLayout();
+
   // Add this IV user to the Processed set before returning false to ensure that
   // all IV users are members of the set. See IVUsers::isIVUserOrOperand.
   if (!Processed.insert(I).second)
@@ -124,14 +127,14 @@ bool IVUsers::AddUsersImpl(Instruction *I,
   // IVUsers is used by LSR which assumes that all SCEV expressions are safe to
   // pass to SCEVExpander. Expressions are not safe to expand if they represent
   // operations that are not safe to speculate, namely integer division.
-  if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I, DL))
+  if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I))
     return false;
 
   // LSR is not APInt clean, do not touch integers bigger than 64-bits.
   // Also avoid creating IVs of non-native types. For example, we don't want a
   // 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
   uint64_t Width = SE->getTypeSizeInBits(I->getType());
-  if (Width > 64 || (DL && !DL->isLegalInteger(Width)))
+  if (Width > 64 || !DL.isLegalInteger(Width))
     return false;
 
   // Get the symbolic expression for this instruction.
@@ -241,7 +244,7 @@ IVUsers::IVUsers()
 }
 
 void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<LoopInfo>();
+  AU.addRequired<LoopInfoWrapperPass>();
   AU.addRequired<DominatorTreeWrapperPass>();
   AU.addRequired<ScalarEvolution>();
   AU.setPreservesAll();
@@ -250,11 +253,9 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
 bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
 
   L = l;
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   SE = &getAnalysis<ScalarEvolution>();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
 
   // Find all uses of induction variables in this loop, and categorize
   // them by stride.  Start by finding all of the PHI nodes in the header for
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index 3fbbd7c..097b99e 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -45,13 +45,13 @@ STATISTIC(NumReassoc, "Number of reassociations");
 
 namespace {
 struct Query {
-  const DataLayout *DL;
+  const DataLayout &DL;
   const TargetLibraryInfo *TLI;
   const DominatorTree *DT;
   AssumptionCache *AC;
   const Instruction *CxtI;
 
-  Query(const DataLayout *DL, const TargetLibraryInfo *tli,
+  Query(const DataLayout &DL, const TargetLibraryInfo *tli,
         const DominatorTree *dt, AssumptionCache *ac = nullptr,
         const Instruction *cxti = nullptr)
       : DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {}
@@ -61,6 +61,8 @@ struct Query {
 static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
 static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &,
                             unsigned);
+static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
+                              const Query &, unsigned);
 static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
                               unsigned);
 static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
@@ -467,8 +469,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
 
   // Evaluate the BinOp on the incoming phi values.
   Value *CommonValue = nullptr;
-  for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
-    Value *Incoming = PI->getIncomingValue(i);
+  for (Value *Incoming : PI->incoming_values()) {
     // If the incoming value is the phi node itself, it can safely be skipped.
     if (Incoming == PI) continue;
     Value *V = PI == LHS ?
@@ -508,8 +509,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
 
   // Evaluate the BinOp on the incoming phi values.
   Value *CommonValue = nullptr;
-  for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
-    Value *Incoming = PI->getIncomingValue(i);
+  for (Value *Incoming : PI->incoming_values()) {
     // If the incoming value is the phi node itself, it can safely be skipped.
     if (Incoming == PI) continue;
     Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse);
@@ -582,7 +582,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const DataLayout *DL, const TargetLibraryInfo *TLI,
+                             const DataLayout &DL, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT, AssumptionCache *AC,
                              const Instruction *CxtI) {
   return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
@@ -599,17 +599,11 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 /// This is very similar to GetPointerBaseWithConstantOffset except it doesn't
 /// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
 /// folding.
-static Constant *stripAndComputeConstantOffsets(const DataLayout *DL,
-                                                Value *&V,
+static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
                                                 bool AllowNonInbounds = false) {
   assert(V->getType()->getScalarType()->isPointerTy());
 
-  // Without DataLayout, just be conservative for now. Theoretically, more could
-  // be done in this case.
-  if (!DL)
-    return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0);
-
-  Type *IntPtrTy = DL->getIntPtrType(V->getType())->getScalarType();
+  Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
   APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
 
   // Even though we don't look through PHI nodes, we could be called on an
@@ -619,7 +613,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *DL,
   do {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
       if ((!AllowNonInbounds && !GEP->isInBounds()) ||
-          !GEP->accumulateConstantOffset(*DL, Offset))
+          !GEP->accumulateConstantOffset(DL, Offset))
         break;
       V = GEP->getPointerOperand();
     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
@@ -644,8 +638,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *DL,
 
 /// \brief Compute the constant difference between two pointer values.
 /// If the difference is not a constant, returns zero.
-static Constant *computePointerDifference(const DataLayout *DL,
-                                          Value *LHS, Value *RHS) {
+static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
+                                          Value *RHS) {
   Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
   Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
 
@@ -781,7 +775,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const DataLayout *DL, const TargetLibraryInfo *TLI,
+                             const DataLayout &DL, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT, AssumptionCache *AC,
                              const Instruction *CxtI) {
   return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
@@ -960,7 +954,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
 }
 
 Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const DataLayout *DL,
+                              const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               const Instruction *CxtI) {
@@ -969,7 +963,7 @@ Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
 }
 
 Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const DataLayout *DL,
+                              const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               const Instruction *CxtI) {
@@ -978,7 +972,7 @@ Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
 }
 
 Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const DataLayout *DL,
+                              const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               const Instruction *CxtI) {
@@ -986,7 +980,7 @@ Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                             RecursionLimit);
 }
 
-Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT, AssumptionCache *AC,
                              const Instruction *CxtI) {
@@ -1090,7 +1084,7 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               const Instruction *CxtI) {
@@ -1108,7 +1102,7 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               const Instruction *CxtI) {
@@ -1116,8 +1110,8 @@ Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
                             RecursionLimit);
 }
 
-static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
-                               unsigned) {
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                               const Query &Q, unsigned) {
   // undef / X -> undef    (the undef could be a snan).
   if (match(Op0, m_Undef()))
     return Op0;
@@ -1126,14 +1120,21 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
   if (match(Op1, m_Undef()))
     return Op1;
 
+  // 0 / X -> 0
+  // Requires that NaNs are off (X could be zero) and signed zeroes are
+  // ignored (X could be positive or negative, so the output sign is unknown).
+  if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
+    return Op0;
+
   return nullptr;
 }
 
-Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               const Instruction *CxtI) {
-  return ::SimplifyFDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+  return ::SimplifyFDivInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
                             RecursionLimit);
 }
 
@@ -1208,7 +1209,7 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               const Instruction *CxtI) {
@@ -1226,7 +1227,7 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               const Instruction *CxtI) {
@@ -1234,8 +1235,8 @@ Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL,
                             RecursionLimit);
 }
 
-static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
-                               unsigned) {
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                               const Query &, unsigned) {
   // undef % X -> undef    (the undef could be a snan).
   if (match(Op0, m_Undef()))
     return Op0;
@@ -1244,14 +1245,21 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
   if (match(Op1, m_Undef()))
     return Op1;
 
+  // 0 % X -> 0
+  // Requires that NaNs are off (X could be zero) and signed zeroes are
+  // ignored (X could be positive or negative, so the output sign is unknown).
+  if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
+    return Op0;
+
   return nullptr;
 }
 
-Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               const Instruction *CxtI) {
-  return ::SimplifyFRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+  return ::SimplifyFRemInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
                             RecursionLimit);
 }
 
@@ -1371,7 +1379,7 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const DataLayout *DL, const TargetLibraryInfo *TLI,
+                             const DataLayout &DL, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT, AssumptionCache *AC,
                              const Instruction *CxtI) {
   return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
@@ -1395,7 +1403,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
 }
 
 Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
-                              const DataLayout *DL,
+                              const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               const Instruction *CxtI) {
@@ -1429,7 +1437,7 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
 }
 
 Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
-                              const DataLayout *DL,
+                              const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               const Instruction *CxtI) {
@@ -1580,9 +1588,11 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
   // A & (-A) = A if A is a power of two or zero.
   if (match(Op0, m_Neg(m_Specific(Op1))) ||
       match(Op1, m_Neg(m_Specific(Op0)))) {
-    if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+    if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
+                               Q.DT))
       return Op0;
-    if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+    if (isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
+                               Q.DT))
       return Op1;
   }
 
@@ -1627,7 +1637,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT, AssumptionCache *AC,
                              const Instruction *CxtI) {
@@ -1815,7 +1825,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL,
                             const TargetLibraryInfo *TLI,
                             const DominatorTree *DT, AssumptionCache *AC,
                             const Instruction *CxtI) {
@@ -1872,7 +1882,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT, AssumptionCache *AC,
                              const Instruction *CxtI) {
@@ -1932,10 +1942,10 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
 // If the C and C++ standards are ever made sufficiently restrictive in this
 // area, it may be possible to update LLVM's semantics accordingly and reinstate
 // this optimization.
-static Constant *computePointerICmp(const DataLayout *DL,
+static Constant *computePointerICmp(const DataLayout &DL,
                                     const TargetLibraryInfo *TLI,
-                                    CmpInst::Predicate Pred,
-                                    Value *LHS, Value *RHS) {
+                                    CmpInst::Predicate Pred, Value *LHS,
+                                    Value *RHS) {
   // First, skip past any trivial no-ops.
   LHS = LHS->stripPointerCasts();
   RHS = RHS->stripPointerCasts();
@@ -2353,8 +2363,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 
     // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
     // if the integer type is the same size as the pointer type.
-    if (MaxRecurse && Q.DL && isa<PtrToIntInst>(LI) &&
-        Q.DL->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
+    if (MaxRecurse && isa<PtrToIntInst>(LI) &&
+        Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
       if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
         // Transfer the cast to the constant.
         if (Value *V = SimplifyICmpInst(Pred, SrcOp,
@@ -2966,10 +2976,12 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         // what constant folding can make out of it.
         Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType());
         SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end());
-        Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS);
+        Constant *NewLHS = ConstantExpr::getGetElementPtr(
+            GLHS->getSourceElementType(), Null, IndicesLHS);
 
         SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end());
-        Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS);
+        Constant *NewRHS = ConstantExpr::getGetElementPtr(
+            GLHS->getSourceElementType(), Null, IndicesRHS);
         return ConstantExpr::getICmp(Pred, NewLHS, NewRHS);
       }
     }
@@ -3008,7 +3020,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 }
 
 Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const DataLayout *DL,
+                              const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               Instruction *CxtI) {
@@ -3038,8 +3050,13 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   if (Pred == FCmpInst::FCMP_TRUE)
     return ConstantInt::get(GetCompareTy(LHS), 1);
 
-  if (isa<UndefValue>(RHS))                  // fcmp pred X, undef -> undef
-    return UndefValue::get(GetCompareTy(LHS));
+  // fcmp pred x, undef  and  fcmp pred undef, x
+  // fold to true if unordered, false if ordered
+  if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) {
+    // Choosing NaN for the undef will always make unordered comparison succeed
+    // and ordered comparison fail.
+    return ConstantInt::get(GetCompareTy(LHS), CmpInst::isUnordered(Pred));
+  }
 
   // fcmp x,x -> true/false.  Not all compares are foldable.
   if (LHS == RHS) {
@@ -3050,44 +3067,57 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   }
 
   // Handle fcmp with constant RHS
-  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
     // If the constant is a nan, see if we can fold the comparison based on it.
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
-      if (CFP->getValueAPF().isNaN()) {
-        if (FCmpInst::isOrdered(Pred))   // True "if ordered and foo"
+    if (CFP->getValueAPF().isNaN()) {
+      if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
+        return ConstantInt::getFalse(CFP->getContext());
+      assert(FCmpInst::isUnordered(Pred) &&
+             "Comparison must be either ordered or unordered!");
+      // True if unordered.
+      return ConstantInt::getTrue(CFP->getContext());
+    }
+    // Check whether the constant is an infinity.
+    if (CFP->getValueAPF().isInfinity()) {
+      if (CFP->getValueAPF().isNegative()) {
+        switch (Pred) {
+        case FCmpInst::FCMP_OLT:
+          // No value is ordered and less than negative infinity.
           return ConstantInt::getFalse(CFP->getContext());
-        assert(FCmpInst::isUnordered(Pred) &&
-               "Comparison must be either ordered or unordered!");
-        // True if unordered.
-        return ConstantInt::getTrue(CFP->getContext());
-      }
-      // Check whether the constant is an infinity.
-      if (CFP->getValueAPF().isInfinity()) {
-        if (CFP->getValueAPF().isNegative()) {
-          switch (Pred) {
-          case FCmpInst::FCMP_OLT:
-            // No value is ordered and less than negative infinity.
-            return ConstantInt::getFalse(CFP->getContext());
-          case FCmpInst::FCMP_UGE:
-            // All values are unordered with or at least negative infinity.
-            return ConstantInt::getTrue(CFP->getContext());
-          default:
-            break;
-          }
-        } else {
-          switch (Pred) {
-          case FCmpInst::FCMP_OGT:
-            // No value is ordered and greater than infinity.
-            return ConstantInt::getFalse(CFP->getContext());
-          case FCmpInst::FCMP_ULE:
-            // All values are unordered with and at most infinity.
-            return ConstantInt::getTrue(CFP->getContext());
-          default:
-            break;
-          }
+        case FCmpInst::FCMP_UGE:
+          // All values are unordered with or at least negative infinity.
+          return ConstantInt::getTrue(CFP->getContext());
+        default:
+          break;
+        }
+      } else {
+        switch (Pred) {
+        case FCmpInst::FCMP_OGT:
+          // No value is ordered and greater than infinity.
+          return ConstantInt::getFalse(CFP->getContext());
+        case FCmpInst::FCMP_ULE:
+          // All values are unordered with and at most infinity.
+          return ConstantInt::getTrue(CFP->getContext());
+        default:
+          break;
         }
       }
     }
+    if (CFP->getValueAPF().isZero()) {
+      switch (Pred) {
+      case FCmpInst::FCMP_UGE:
+        if (CannotBeOrderedLessThanZero(LHS))
+          return ConstantInt::getTrue(CFP->getContext());
+        break;
+      case FCmpInst::FCMP_OLT:
+        // X < 0
+        if (CannotBeOrderedLessThanZero(LHS))
+          return ConstantInt::getFalse(CFP->getContext());
+        break;
+      default:
+        break;
+      }
+    }
   }
 
   // If the comparison is with the result of a select instruction, check whether
@@ -3106,7 +3136,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 }
 
 Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const DataLayout *DL,
+                              const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               const Instruction *CxtI) {
@@ -3201,7 +3231,7 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
 }
 
 Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
-                                const DataLayout *DL,
+                                const DataLayout &DL,
                                 const TargetLibraryInfo *TLI,
                                 const DominatorTree *DT, AssumptionCache *AC,
                                 const Instruction *CxtI) {
@@ -3211,17 +3241,18 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
 
 /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
+static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+                              const Query &Q, unsigned) {
   // The type of the GEP pointer operand.
-  PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()->getScalarType());
-  unsigned AS = PtrTy->getAddressSpace();
+  unsigned AS =
+      cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace();
 
   // getelementptr P -> P.
   if (Ops.size() == 1)
     return Ops[0];
 
   // Compute the (pointer) type returned by the GEP instruction.
-  Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1));
+  Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1));
   Type *GEPTy = PointerType::get(LastType, AS);
   if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
     GEPTy = VectorType::get(GEPTy, VT->getNumElements());
@@ -3234,11 +3265,11 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
     if (match(Ops[1], m_Zero()))
       return Ops[0];
 
-    Type *Ty = PtrTy->getElementType();
-    if (Q.DL && Ty->isSized()) {
+    Type *Ty = SrcTy;
+    if (Ty->isSized()) {
       Value *P;
       uint64_t C;
-      uint64_t TyAllocSize = Q.DL->getTypeAllocSize(Ty);
+      uint64_t TyAllocSize = Q.DL.getTypeAllocSize(Ty);
       // getelementptr P, N -> P if P points to a type of zero size.
       if (TyAllocSize == 0)
         return Ops[0];
@@ -3246,7 +3277,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
       // The following transforms are only safe if the ptrtoint cast
       // doesn't truncate the pointers.
       if (Ops[1]->getType()->getScalarSizeInBits() ==
-          Q.DL->getPointerSizeInBits(AS)) {
+          Q.DL.getPointerSizeInBits(AS)) {
         auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * {
           if (match(P, m_Zero()))
             return Constant::getNullValue(GEPTy);
@@ -3288,14 +3319,17 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
     if (!isa<Constant>(Ops[i]))
       return nullptr;
 
-  return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
+  return ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]),
+                                        Ops.slice(1));
 }
 
-Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *DL,
+Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT, AssumptionCache *AC,
                              const Instruction *CxtI) {
-  return ::SimplifyGEPInst(Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+  return ::SimplifyGEPInst(
+      cast<PointerType>(Ops[0]->getType()->getScalarType())->getElementType(),
+      Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
 }
 
 /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
@@ -3328,7 +3362,7 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
 }
 
 Value *llvm::SimplifyInsertValueInst(
-    Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout *DL,
+    Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout &DL,
     const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC,
     const Instruction *CxtI) {
   return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI),
@@ -3341,8 +3375,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
   // with the common value.
   Value *CommonValue = nullptr;
   bool HasUndefInput = false;
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    Value *Incoming = PN->getIncomingValue(i);
+  for (Value *Incoming : PN->incoming_values()) {
     // If the incoming value is the phi node itself, it can safely be skipped.
     if (Incoming == PN) continue;
     if (isa<UndefValue>(Incoming)) {
@@ -3376,7 +3409,7 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
   return nullptr;
 }
 
-Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL,
+Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL,
                                const TargetLibraryInfo *TLI,
                                const DominatorTree *DT, AssumptionCache *AC,
                                const Instruction *CxtI) {
@@ -3408,10 +3441,12 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
     return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse);
   case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
-  case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::FDiv:
+      return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
   case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
-  case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::FRem:
+      return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
   case Instruction::Shl:
     return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
                            Q, MaxRecurse);
@@ -3451,14 +3486,42 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
   }
 }
 
+/// SimplifyFPBinOp - Given operands for a BinaryOperator, see if we can
+/// fold the result.  If not, this returns null.
+/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
+/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
+static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                              const FastMathFlags &FMF, const Query &Q,
+                              unsigned MaxRecurse) {
+  switch (Opcode) {
+  case Instruction::FAdd:
+    return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse);
+  case Instruction::FSub:
+    return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse);
+  case Instruction::FMul:
+    return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse);
+  default:
+    return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse);
+  }
+}
+
 Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                           const DataLayout *DL, const TargetLibraryInfo *TLI,
+                           const DataLayout &DL, const TargetLibraryInfo *TLI,
                            const DominatorTree *DT, AssumptionCache *AC,
                            const Instruction *CxtI) {
   return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
                          RecursionLimit);
 }
 
+Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                             const FastMathFlags &FMF, const DataLayout &DL,
+                             const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT, AssumptionCache *AC,
+                             const Instruction *CxtI) {
+  return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Query(DL, TLI, DT, AC, CxtI),
+                           RecursionLimit);
+}
+
 /// SimplifyCmpInst - Given operands for a CmpInst, see if we can
 /// fold the result.
 static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -3469,7 +3532,7 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 }
 
 Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                             const DataLayout *DL, const TargetLibraryInfo *TLI,
+                             const DataLayout &DL, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT, AssumptionCache *AC,
                              const Instruction *CxtI) {
   return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
@@ -3493,14 +3556,53 @@ static bool IsIdempotent(Intrinsic::ID ID) {
 }
 
 template <typename IterTy>
-static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEnd,
+static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
                                 const Query &Q, unsigned MaxRecurse) {
+  Intrinsic::ID IID = F->getIntrinsicID();
+  unsigned NumOperands = std::distance(ArgBegin, ArgEnd);
+  Type *ReturnType = F->getReturnType();
+
+  // Binary Ops
+  if (NumOperands == 2) {
+    Value *LHS = *ArgBegin;
+    Value *RHS = *(ArgBegin + 1);
+    if (IID == Intrinsic::usub_with_overflow ||
+        IID == Intrinsic::ssub_with_overflow) {
+      // X - X -> { 0, false }
+      if (LHS == RHS)
+        return Constant::getNullValue(ReturnType);
+
+      // X - undef -> undef
+      // undef - X -> undef
+      if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
+        return UndefValue::get(ReturnType);
+    }
+
+    if (IID == Intrinsic::uadd_with_overflow ||
+        IID == Intrinsic::sadd_with_overflow) {
+      // X + undef -> undef
+      if (isa<UndefValue>(RHS))
+        return UndefValue::get(ReturnType);
+    }
+
+    if (IID == Intrinsic::umul_with_overflow ||
+        IID == Intrinsic::smul_with_overflow) {
+      // X * 0 -> { 0, false }
+      if (match(RHS, m_Zero()))
+        return Constant::getNullValue(ReturnType);
+
+      // X * undef -> { 0, false }
+      if (match(RHS, m_Undef()))
+        return Constant::getNullValue(ReturnType);
+    }
+  }
+
   // Perform idempotent optimizations
   if (!IsIdempotent(IID))
     return nullptr;
 
   // Unary Ops
-  if (std::distance(ArgBegin, ArgEnd) == 1)
+  if (NumOperands == 1)
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin))
       if (II->getIntrinsicID() == IID)
         return II;
@@ -3524,9 +3626,8 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
   if (!F)
     return nullptr;
 
-  if (unsigned IID = F->getIntrinsicID())
-    if (Value *Ret =
-        SimplifyIntrinsic((Intrinsic::ID) IID, ArgBegin, ArgEnd, Q, MaxRecurse))
+  if (F->isIntrinsic())
+    if (Value *Ret = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse))
       return Ret;
 
   if (!canConstantFoldCallTo(F))
@@ -3545,7 +3646,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
 }
 
 Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
-                          User::op_iterator ArgEnd, const DataLayout *DL,
+                          User::op_iterator ArgEnd, const DataLayout &DL,
                           const TargetLibraryInfo *TLI, const DominatorTree *DT,
                           AssumptionCache *AC, const Instruction *CxtI) {
   return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI),
@@ -3553,7 +3654,7 @@ Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
 }
 
 Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
-                          const DataLayout *DL, const TargetLibraryInfo *TLI,
+                          const DataLayout &DL, const TargetLibraryInfo *TLI,
                           const DominatorTree *DT, AssumptionCache *AC,
                           const Instruction *CxtI) {
   return ::SimplifyCall(V, Args.begin(), Args.end(),
@@ -3562,7 +3663,7 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
 
 /// SimplifyInstruction - See if we can compute a simplified version of this
 /// instruction.  If not, this returns null.
-Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
+Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
                                  const TargetLibraryInfo *TLI,
                                  const DominatorTree *DT, AssumptionCache *AC) {
   Value *Result;
@@ -3608,8 +3709,8 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
                               AC, I);
     break;
   case Instruction::FDiv:
-    Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
-                              AC, I);
+    Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
     break;
   case Instruction::SRem:
     Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
@@ -3620,8 +3721,8 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
                               AC, I);
     break;
   case Instruction::FRem:
-    Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
-                              AC, I);
+    Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
     break;
   case Instruction::Shl:
     Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
@@ -3710,12 +3811,12 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
 /// This routine returns 'true' only when *it* simplifies something. The passed
 /// in simplified value does not count toward this.
 static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
-                                              const DataLayout *DL,
                                               const TargetLibraryInfo *TLI,
                                               const DominatorTree *DT,
                                               AssumptionCache *AC) {
   bool Simplified = false;
   SmallSetVector<Instruction *, 8> Worklist;
+  const DataLayout &DL = I->getModule()->getDataLayout();
 
   // If we have an explicit value to collapse to, do that round of the
   // simplification loop by hand initially.
@@ -3763,19 +3864,18 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
   return Simplified;
 }
 
-bool llvm::recursivelySimplifyInstruction(Instruction *I, const DataLayout *DL,
+bool llvm::recursivelySimplifyInstruction(Instruction *I,
                                           const TargetLibraryInfo *TLI,
                                           const DominatorTree *DT,
                                           AssumptionCache *AC) {
-  return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT, AC);
+  return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC);
 }
 
 bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
-                                         const DataLayout *DL,
                                          const TargetLibraryInfo *TLI,
                                          const DominatorTree *DT,
                                          AssumptionCache *AC) {
   assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
   assert(SimpleV && "Must provide a simplified value.");
-  return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT, AC);
+  return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC);
 }
diff --git a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
new file mode 100644
index 0000000..9f1edd2
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -0,0 +1,95 @@
+//===- IteratedDominanceFrontier.cpp - Compute IDF ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \brief Compute iterated dominance frontiers using a linear time algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include <queue>
+
+using namespace llvm;
+
+void IDFCalculator::calculate(SmallVectorImpl<BasicBlock *> &PHIBlocks) {
+  // If we haven't computed dominator tree levels, do so now.
+  if (DomLevels.empty()) {
+    for (auto DFI = df_begin(DT.getRootNode()), DFE = df_end(DT.getRootNode());
+         DFI != DFE; ++DFI) {
+      DomLevels[*DFI] = DFI.getPathLength() - 1;
+    }
+  }
+
+  // Use a priority queue keyed on dominator tree level so that inserted nodes
+  // are handled from the bottom of the dominator tree upwards.
+  typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
+  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+                              less_second> IDFPriorityQueue;
+  IDFPriorityQueue PQ;
+
+  for (BasicBlock *BB : *DefBlocks) {
+    if (DomTreeNode *Node = DT.getNode(BB))
+      PQ.push(std::make_pair(Node, DomLevels.lookup(Node)));
+  }
+
+  SmallVector<DomTreeNode *, 32> Worklist;
+  SmallPtrSet<DomTreeNode *, 32> VisitedPQ;
+  SmallPtrSet<DomTreeNode *, 32> VisitedWorklist;
+
+  while (!PQ.empty()) {
+    DomTreeNodePair RootPair = PQ.top();
+    PQ.pop();
+    DomTreeNode *Root = RootPair.first;
+    unsigned RootLevel = RootPair.second;
+
+    // Walk all dominator tree children of Root, inspecting their CFG edges with
+    // targets elsewhere on the dominator tree. Only targets whose level is at
+    // most Root's level are added to the iterated dominance frontier of the
+    // definition set.
+
+    Worklist.clear();
+    Worklist.push_back(Root);
+    VisitedWorklist.insert(Root);
+
+    while (!Worklist.empty()) {
+      DomTreeNode *Node = Worklist.pop_back_val();
+      BasicBlock *BB = Node->getBlock();
+
+      for (auto Succ : successors(BB)) {
+        DomTreeNode *SuccNode = DT.getNode(Succ);
+
+        // Quickly skip all CFG edges that are also dominator tree edges instead
+        // of catching them below.
+        if (SuccNode->getIDom() == Node)
+          continue;
+
+        unsigned SuccLevel = DomLevels.lookup(SuccNode);
+        if (SuccLevel > RootLevel)
+          continue;
+
+        if (!VisitedPQ.insert(SuccNode).second)
+          continue;
+
+        BasicBlock *SuccBB = SuccNode->getBlock();
+        if (useLiveIn && !LiveInBlocks->count(SuccBB))
+          continue;
+
+        PHIBlocks.emplace_back(SuccBB);
+        if (!DefBlocks->count(SuccBB))
+          PQ.push(std::make_pair(SuccNode, SuccLevel));
+      }
+
+      for (auto DomChild : *Node) {
+        if (VisitedWorklist.insert(DomChild).second)
+          Worklist.push_back(DomChild);
+      }
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/JumpInstrTableInfo.cpp b/contrib/llvm/lib/Analysis/JumpInstrTableInfo.cpp
deleted file mode 100644
index 7aae2a5..0000000
--- a/contrib/llvm/lib/Analysis/JumpInstrTableInfo.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- JumpInstrTableInfo.cpp: Info for Jump-Instruction Tables ----------===//
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief Information about jump-instruction tables that have been created by
-/// JumpInstrTables pass.
-///
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "jiti"
-
-#include "llvm/Analysis/JumpInstrTableInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/MathExtras.h"
-
-using namespace llvm;
-
-INITIALIZE_PASS(JumpInstrTableInfo, "jump-instr-table-info",
-                "Jump-Instruction Table Info", true, true)
-char JumpInstrTableInfo::ID = 0;
-
-ImmutablePass *llvm::createJumpInstrTableInfoPass() {
-  return new JumpInstrTableInfo();
-}
-
-ModulePass *llvm::createJumpInstrTableInfoPass(unsigned Bound) {
-  // This cast is always safe, since Bound is always in a subset of uint64_t.
-  uint64_t B = static_cast<uint64_t>(Bound);
-  return new JumpInstrTableInfo(B);
-}
-
-JumpInstrTableInfo::JumpInstrTableInfo(uint64_t ByteAlign)
-    : ImmutablePass(ID), Tables(), ByteAlignment(ByteAlign) {
-  if (!llvm::isPowerOf2_64(ByteAlign)) {
-    // Note that we don't explicitly handle overflow here, since we handle the 0
-    // case explicitly when a caller actually tries to create jumptable entries,
-    // and this is the return value on overflow.
-    ByteAlignment = llvm::NextPowerOf2(ByteAlign);
-  }
-
-  initializeJumpInstrTableInfoPass(*PassRegistry::getPassRegistry());
-}
-
-JumpInstrTableInfo::~JumpInstrTableInfo() {}
-
-void JumpInstrTableInfo::insertEntry(FunctionType *TableFunTy, Function *Target,
-                                     Function *Jump) {
-  Tables[TableFunTy].push_back(JumpPair(Target, Jump));
-}
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
index 4de56f1..e6f586a 100644
--- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/ConstantRange.h"
@@ -29,7 +30,6 @@
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include <map>
 #include <stack>
 using namespace llvm;
@@ -41,7 +41,7 @@ char LazyValueInfo::ID = 0;
 INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info",
                 "Lazy Value Information Analysis", false, true)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info",
                 "Lazy Value Information Analysis", false, true)
 
@@ -191,7 +191,7 @@ public:
   
   /// Merge the specified lattice value into this one, updating this
   /// one and returning true if anything changed.
-  bool mergeIn(const LVILatticeVal &RHS) {
+  bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
     if (RHS.isUndefined() || isOverdefined()) return false;
     if (RHS.isOverdefined()) return markOverdefined();
 
@@ -215,11 +215,9 @@ public:
 
         // Unless we can prove that the two Constants are different, we must
         // move to overdefined.
-        // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding.
-        if (ConstantInt *Res = dyn_cast<ConstantInt>(
-                ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
-                                                getConstant(),
-                                                RHS.getNotConstant())))
+        if (ConstantInt *Res =
+                dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
+                    CmpInst::ICMP_NE, getConstant(), RHS.getNotConstant(), DL)))
           if (Res->isOne())
             return markNotConstant(RHS.getNotConstant());
 
@@ -241,11 +239,9 @@ public:
 
         // Unless we can prove that the two Constants are different, we must
         // move to overdefined.
-        // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding.
-        if (ConstantInt *Res = dyn_cast<ConstantInt>(
-                ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
-                                                getNotConstant(),
-                                                RHS.getConstant())))
+        if (ConstantInt *Res =
+                dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
+                    CmpInst::ICMP_NE, getNotConstant(), RHS.getConstant(), DL)))
           if (Res->isOne())
             return false;
 
@@ -346,21 +342,17 @@ namespace {
     /// Push BV onto BlockValueStack unless it's already in there.
     /// Returns true on success.
     bool pushBlockValue(const std::pair<BasicBlock *, Value *> &BV) {
-      if (BlockValueSet.count(BV))
+      if (!BlockValueSet.insert(BV).second)
         return false;  // It's already in the stack.
 
       BlockValueStack.push(BV);
-      BlockValueSet.insert(BV);
       return true;
     }
 
-    /// A pointer to the cache of @llvm.assume calls.
-    AssumptionCache *AC;
-    /// An optional DL pointer.
-    const DataLayout *DL;
-    /// An optional DT pointer.
-    DominatorTree *DT;
-    
+    AssumptionCache *AC;  ///< A pointer to the cache of @llvm.assume calls.
+    const DataLayout &DL; ///< A mandatory DataLayout
+    DominatorTree *DT;    ///< An optional DT pointer.
+
     friend struct LVIValueHandle;
 
     void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
@@ -426,7 +418,7 @@ namespace {
       OverDefinedCache.clear();
     }
 
-    LazyValueInfoCache(AssumptionCache *AC, const DataLayout *DL = nullptr,
+    LazyValueInfoCache(AssumptionCache *AC, const DataLayout &DL,
                        DominatorTree *DT = nullptr)
         : AC(AC), DL(DL), DT(DT) {}
   };
@@ -579,11 +571,13 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
 static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
   if (LoadInst *L = dyn_cast<LoadInst>(I)) {
     return L->getPointerAddressSpace() == 0 &&
-        GetUnderlyingObject(L->getPointerOperand()) == Ptr;
+           GetUnderlyingObject(L->getPointerOperand(),
+                               L->getModule()->getDataLayout()) == Ptr;
   }
   if (StoreInst *S = dyn_cast<StoreInst>(I)) {
     return S->getPointerAddressSpace() == 0 &&
-        GetUnderlyingObject(S->getPointerOperand()) == Ptr;
+           GetUnderlyingObject(S->getPointerOperand(),
+                               S->getModule()->getDataLayout()) == Ptr;
   }
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
     if (MI->isVolatile()) return false;
@@ -593,11 +587,13 @@ static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
     if (!Len || Len->isZero()) return false;
 
     if (MI->getDestAddressSpace() == 0)
-      if (GetUnderlyingObject(MI->getRawDest()) == Ptr)
+      if (GetUnderlyingObject(MI->getRawDest(),
+                              MI->getModule()->getDataLayout()) == Ptr)
         return true;
     if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
       if (MTI->getSourceAddressSpace() == 0)
-        if (GetUnderlyingObject(MTI->getRawSource()) == Ptr)
+        if (GetUnderlyingObject(MTI->getRawSource(),
+                                MTI->getModule()->getDataLayout()) == Ptr)
           return true;
   }
   return false;
@@ -614,10 +610,11 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
     if (isKnownNonNull(Val)) {
       NotNull = true;
     } else {
-      Value *UnderlyingVal = GetUnderlyingObject(Val);
+      const DataLayout &DL = BB->getModule()->getDataLayout();
+      Value *UnderlyingVal = GetUnderlyingObject(Val, DL);
       // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge
       // inside InstructionDereferencesPointer either.
-      if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, nullptr, 1)) {
+      if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, DL, 1)) {
         for (Instruction &I : *BB) {
           if (InstructionDereferencesPointer(&I, UnderlyingVal)) {
             NotNull = true;
@@ -651,7 +648,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
     if (EdgesMissing)
       continue;
 
-    Result.mergeIn(EdgeResult);
+    Result.mergeIn(EdgeResult, DL);
 
     // If we hit overdefined, exit early.  The BlockVals entry is already set
     // to overdefined.
@@ -696,7 +693,7 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
     if (EdgesMissing)
       continue;
 
-    Result.mergeIn(EdgeResult);
+    Result.mergeIn(EdgeResult, DL);
 
     // If we hit overdefined, exit early.  The BlockVals entry is already set
     // to overdefined.
@@ -735,7 +732,7 @@ void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val,
     if (!AssumeVH)
       continue;
     auto *I = cast<CallInst>(AssumeVH);
-    if (!isValidAssumeForContext(I, BBI, DL, DT))
+    if (!isValidAssumeForContext(I, BBI, DT))
       continue;
 
     Value *C = I->getArgOperand(0);
@@ -745,7 +742,7 @@ void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val,
         if (BBLV.isOverdefined())
           BBLV = Result;
         else
-          BBLV.mergeIn(Result);
+          BBLV.mergeIn(Result, DL);
       }
     }
   }
@@ -857,10 +854,10 @@ bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
 
     ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
     if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
-      // Calculate the range of values that would satisfy the comparison.
+      // Calculate the range of values that are allowed by the comparison
       ConstantRange CmpRange(CI->getValue());
       ConstantRange TrueValues =
-        ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
+          ConstantRange::makeAllowedICmpRegion(ICI->getPredicate(), CmpRange);
 
       if (NegOffset) // Apply the offset from above.
         TrueValues = TrueValues.subtract(NegOffset->getValue());
@@ -1104,27 +1101,27 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
 
 /// This lazily constructs the LazyValueInfoCache.
 static LazyValueInfoCache &getCache(void *&PImpl, AssumptionCache *AC,
-                                    const DataLayout *DL = nullptr,
+                                    const DataLayout *DL,
                                     DominatorTree *DT = nullptr) {
-  if (!PImpl)
-    PImpl = new LazyValueInfoCache(AC, DL, DT);
+  if (!PImpl) {
+    assert(DL && "getCache() called with a null DataLayout");
+    PImpl = new LazyValueInfoCache(AC, *DL, DT);
+  }
   return *static_cast<LazyValueInfoCache*>(PImpl);
 }
 
 bool LazyValueInfo::runOnFunction(Function &F) {
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  const DataLayout &DL = F.getParent()->getDataLayout();
 
   DominatorTreeWrapperPass *DTWP =
       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
   DT = DTWP ? &DTWP->getDomTree() : nullptr;
 
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-
-  TLI = &getAnalysis<TargetLibraryInfo>();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   if (PImpl)
-    getCache(PImpl, AC, DL, DT).clear();
+    getCache(PImpl, AC, &DL, DT).clear();
 
   // Fully lazy.
   return false;
@@ -1133,21 +1130,22 @@ bool LazyValueInfo::runOnFunction(Function &F) {
 void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<AssumptionCacheTracker>();
-  AU.addRequired<TargetLibraryInfo>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
 }
 
 void LazyValueInfo::releaseMemory() {
   // If the cache was allocated, free it.
   if (PImpl) {
-    delete &getCache(PImpl, AC);
+    delete &getCache(PImpl, AC, nullptr);
     PImpl = nullptr;
   }
 }
 
 Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
                                      Instruction *CxtI) {
+  const DataLayout &DL = BB->getModule()->getDataLayout();
   LVILatticeVal Result =
-      getCache(PImpl, AC, DL, DT).getValueInBlock(V, BB, CxtI);
+      getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
 
   if (Result.isConstant())
     return Result.getConstant();
@@ -1164,8 +1162,9 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
 Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
                                            BasicBlock *ToBB,
                                            Instruction *CxtI) {
+  const DataLayout &DL = FromBB->getModule()->getDataLayout();
   LVILatticeVal Result =
-      getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+      getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
 
   if (Result.isConstant())
     return Result.getConstant();
@@ -1177,9 +1176,10 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
   return nullptr;
 }
 
-static LazyValueInfo::Tristate
-getPredicateResult(unsigned Pred, Constant *C, LVILatticeVal &Result,
-                   const DataLayout *DL, TargetLibraryInfo *TLI) {
+static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
+                                                  LVILatticeVal &Result,
+                                                  const DataLayout &DL,
+                                                  TargetLibraryInfo *TLI) {
 
   // If we know the value is a constant, evaluate the conditional.
   Constant *Res = nullptr;
@@ -1250,8 +1250,9 @@ LazyValueInfo::Tristate
 LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
                                   BasicBlock *FromBB, BasicBlock *ToBB,
                                   Instruction *CxtI) {
+  const DataLayout &DL = FromBB->getModule()->getDataLayout();
   LVILatticeVal Result =
-      getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+      getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
 
   return getPredicateResult(Pred, C, Result, DL, TLI);
 }
@@ -1259,18 +1260,23 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
 LazyValueInfo::Tristate
 LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
                               Instruction *CxtI) {
-  LVILatticeVal Result = getCache(PImpl, AC, DL, DT).getValueAt(V, CxtI);
+  const DataLayout &DL = CxtI->getModule()->getDataLayout();
+  LVILatticeVal Result = getCache(PImpl, AC, &DL, DT).getValueAt(V, CxtI);
 
   return getPredicateResult(Pred, C, Result, DL, TLI);
 }
 
 void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
                                BasicBlock *NewSucc) {
-  if (PImpl)
-    getCache(PImpl, AC, DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
+  if (PImpl) {
+    const DataLayout &DL = PredBB->getModule()->getDataLayout();
+    getCache(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
+  }
 }
 
 void LazyValueInfo::eraseBlock(BasicBlock *BB) {
-  if (PImpl)
-    getCache(PImpl, AC, DL, DT).eraseBlock(BB);
+  if (PImpl) {
+    const DataLayout &DL = BB->getModule()->getDataLayout();
+    getCache(PImpl, AC, &DL, DT).eraseBlock(BB);
+  }
 }
diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
index 016f8c5..f6025e3 100644
--- a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -36,7 +36,11 @@ void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();                         // Does not transform code
 }
 
-
+bool LibCallAliasAnalysis::runOnFunction(Function &F) {
+  // set up super class
+  InitializeAliasAnalysis(this, &F.getParent()->getDataLayout());
+  return false;
+}
 
 /// AnalyzeLibCallDetails - Given a call to a function with the specified
 /// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call
diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
index 23639e7..e98540b 100644
--- a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
+++ b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/Analysis/LibCallSemantics.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/Function.h"
 using namespace llvm;
 
@@ -61,3 +62,29 @@ LibCallInfo::getFunctionInfo(const Function *F) const {
   return Map->lookup(F->getName());
 }
 
+/// See if the given exception handling personality function is one that we
+/// understand.  If so, return a description of it; otherwise return Unknown.
+EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
+  const Function *F = dyn_cast<Function>(Pers->stripPointerCasts());
+  if (!F)
+    return EHPersonality::Unknown;
+  return StringSwitch<EHPersonality>(F->getName())
+    .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
+    .Case("__gxx_personality_v0",  EHPersonality::GNU_CXX)
+    .Case("__gcc_personality_v0",  EHPersonality::GNU_C)
+    .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
+    .Case("_except_handler3",      EHPersonality::MSVC_X86SEH)
+    .Case("_except_handler4",      EHPersonality::MSVC_X86SEH)
+    .Case("__C_specific_handler",  EHPersonality::MSVC_Win64SEH)
+    .Case("__CxxFrameHandler3",    EHPersonality::MSVC_CXX)
+    .Default(EHPersonality::Unknown);
+}
+
+bool llvm::canSimplifyInvokeNoUnwind(const InvokeInst *II) {
+  const LandingPadInst *LP = II->getLandingPadInst();
+  EHPersonality Personality = classifyEHPersonality(LP->getPersonalityFn());
+  // We can't simplify any invokes to nounwind functions if the personality
+  // function wants to catch asynch exceptions.  The nounwind attribute only
+  // implies that the function does not throw synchronous exceptions.
+  return !isAsynchronousEHPersonality(Personality);
+}
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index b5c7245..65a90d7 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -36,12 +36,14 @@
 
 #include "llvm/Analysis/Lint.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
@@ -49,19 +51,18 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Pass.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 
 namespace {
   namespace MemRef {
-    static unsigned Read     = 1;
-    static unsigned Write    = 2;
-    static unsigned Callee   = 4;
-    static unsigned Branchee = 8;
+    static const unsigned Read     = 1;
+    static const unsigned Write    = 2;
+    static const unsigned Callee   = 4;
+    static const unsigned Branchee = 8;
   }
 
   class Lint : public FunctionPass, public InstVisitor<Lint> {
@@ -73,6 +74,8 @@ namespace {
     void visitMemoryReference(Instruction &I, Value *Ptr,
                               uint64_t Size, unsigned Align,
                               Type *Ty, unsigned Flags);
+    void visitEHBeginCatch(IntrinsicInst *II);
+    void visitEHEndCatch(IntrinsicInst *II);
 
     void visitCallInst(CallInst &I);
     void visitInvokeInst(InvokeInst &I);
@@ -95,8 +98,8 @@ namespace {
     void visitInsertElementInst(InsertElementInst &I);
     void visitUnreachableInst(UnreachableInst &I);
 
-    Value *findValue(Value *V, bool OffsetOk) const;
-    Value *findValueImpl(Value *V, bool OffsetOk,
+    Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const;
+    Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
                          SmallPtrSetImpl<Value *> &Visited) const;
 
   public:
@@ -104,7 +107,6 @@ namespace {
     AliasAnalysis *AA;
     AssumptionCache *AC;
     DominatorTree *DT;
-    const DataLayout *DL;
     TargetLibraryInfo *TLI;
 
     std::string Messages;
@@ -121,32 +123,38 @@ namespace {
       AU.setPreservesAll();
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<AssumptionCacheTracker>();
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
       AU.addRequired<DominatorTreeWrapperPass>();
     }
     void print(raw_ostream &O, const Module *M) const override {}
 
-    void WriteValue(const Value *V) {
-      if (!V) return;
-      if (isa<Instruction>(V)) {
-        MessagesStr << *V << '\n';
-      } else {
-        V->printAsOperand(MessagesStr, true, Mod);
-        MessagesStr << '\n';
+    void WriteValues(ArrayRef<const Value *> Vs) {
+      for (const Value *V : Vs) {
+        if (!V)
+          continue;
+        if (isa<Instruction>(V)) {
+          MessagesStr << *V << '\n';
+        } else {
+          V->printAsOperand(MessagesStr, true, Mod);
+          MessagesStr << '\n';
+        }
       }
     }
 
-    // CheckFailed - A check failed, so print out the condition and the message
-    // that failed.  This provides a nice place to put a breakpoint if you want
-    // to see why something is not correct.
-    void CheckFailed(const Twine &Message,
-                     const Value *V1 = nullptr, const Value *V2 = nullptr,
-                     const Value *V3 = nullptr, const Value *V4 = nullptr) {
-      MessagesStr << Message.str() << "\n";
-      WriteValue(V1);
-      WriteValue(V2);
-      WriteValue(V3);
-      WriteValue(V4);
+    /// \brief A check failed, so printout out the condition and the message.
+    ///
+    /// This provides a nice place to put a breakpoint if you want to see why
+    /// something is not correct.
+    void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
+
+    /// \brief A check failed (with values to print).
+    ///
+    /// This calls the Message-only version so that the above is easier to set
+    /// a breakpoint on.
+    template <typename T1, typename... Ts>
+    void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) {
+      CheckFailed(Message);
+      WriteValues({V1, Vs...});
     }
   };
 }
@@ -155,23 +163,15 @@ char Lint::ID = 0;
 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
                       false, true)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
                     false, true)
 
 // Assert - We know that cond should be true, if not print an error message.
-#define Assert(C, M) \
-    do { if (!(C)) { CheckFailed(M); return; } } while (0)
-#define Assert1(C, M, V1) \
-    do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
-#define Assert2(C, M, V1, V2) \
-    do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
-#define Assert3(C, M, V1, V2, V3) \
-    do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
-#define Assert4(C, M, V1, V2, V3, V4) \
-    do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+#define Assert(C, ...) \
+    do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0)
 
 // Lint::run - This is the main Analysis entry point for a
 // function.
@@ -181,9 +181,7 @@ bool Lint::runOnFunction(Function &F) {
   AA = &getAnalysis<AliasAnalysis>();
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-  TLI = &getAnalysis<TargetLibraryInfo>();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   visit(F);
   dbgs() << MessagesStr.str();
   Messages.clear();
@@ -193,8 +191,8 @@ bool Lint::runOnFunction(Function &F) {
 void Lint::visitFunction(Function &F) {
   // This isn't undefined behavior, it's just a little unusual, and it's a
   // fairly common mistake to neglect to name a function.
-  Assert1(F.hasName() || F.hasLocalLinkage(),
-          "Unusual: Unnamed function with non-local linkage", &F);
+  Assert(F.hasName() || F.hasLocalLinkage(),
+         "Unusual: Unnamed function with non-local linkage", &F);
 
   // TODO: Check for irreducible control flow.
 }
@@ -202,27 +200,30 @@ void Lint::visitFunction(Function &F) {
 void Lint::visitCallSite(CallSite CS) {
   Instruction &I = *CS.getInstruction();
   Value *Callee = CS.getCalledValue();
+  const DataLayout &DL = CS->getModule()->getDataLayout();
 
   visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
                        0, nullptr, MemRef::Callee);
 
-  if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
-    Assert1(CS.getCallingConv() == F->getCallingConv(),
-            "Undefined behavior: Caller and callee calling convention differ",
-            &I);
+  if (Function *F = dyn_cast<Function>(findValue(Callee, DL,
+                                                 /*OffsetOk=*/false))) {
+    Assert(CS.getCallingConv() == F->getCallingConv(),
+           "Undefined behavior: Caller and callee calling convention differ",
+           &I);
 
     FunctionType *FT = F->getFunctionType();
     unsigned NumActualArgs = CS.arg_size();
 
-    Assert1(FT->isVarArg() ?
-              FT->getNumParams() <= NumActualArgs :
-              FT->getNumParams() == NumActualArgs,
-            "Undefined behavior: Call argument count mismatches callee "
-            "argument count", &I);
+    Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
+                          : FT->getNumParams() == NumActualArgs,
+           "Undefined behavior: Call argument count mismatches callee "
+           "argument count",
+           &I);
 
-    Assert1(FT->getReturnType() == I.getType(),
-            "Undefined behavior: Call return type mismatches "
-            "callee return type", &I);
+    Assert(FT->getReturnType() == I.getType(),
+           "Undefined behavior: Call return type mismatches "
+           "callee return type",
+           &I);
 
     // Check argument types (in case the callee was casted) and attributes.
     // TODO: Verify that caller and callee attributes are compatible.
@@ -232,9 +233,10 @@ void Lint::visitCallSite(CallSite CS) {
       Value *Actual = *AI;
       if (PI != PE) {
         Argument *Formal = PI++;
-        Assert1(Formal->getType() == Actual->getType(),
-                "Undefined behavior: Call argument type mismatches "
-                "callee parameter type", &I);
+        Assert(Formal->getType() == Actual->getType(),
+               "Undefined behavior: Call argument type mismatches "
+               "callee parameter type",
+               &I);
 
         // Check that noalias arguments don't alias other arguments. This is
         // not fully precise because we don't know the sizes of the dereferenced
@@ -243,9 +245,9 @@ void Lint::visitCallSite(CallSite CS) {
           for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
             if (AI != BI && (*BI)->getType()->isPointerTy()) {
               AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
-              Assert1(Result != AliasAnalysis::MustAlias &&
-                      Result != AliasAnalysis::PartialAlias,
-                      "Unusual: noalias argument aliases another argument", &I);
+              Assert(Result != AliasAnalysis::MustAlias &&
+                         Result != AliasAnalysis::PartialAlias,
+                     "Unusual: noalias argument aliases another argument", &I);
             }
 
         // Check that an sret argument points to valid memory.
@@ -253,8 +255,8 @@ void Lint::visitCallSite(CallSite CS) {
           Type *Ty =
             cast<PointerType>(Formal->getType())->getElementType();
           visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
-                               DL ? DL->getABITypeAlignment(Ty) : 0,
-                               Ty, MemRef::Read | MemRef::Write);
+                               DL.getABITypeAlignment(Ty), Ty,
+                               MemRef::Read | MemRef::Write);
         }
       }
     }
@@ -263,10 +265,11 @@ void Lint::visitCallSite(CallSite CS) {
   if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
     for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
          AI != AE; ++AI) {
-      Value *Obj = findValue(*AI, /*OffsetOk=*/true);
-      Assert1(!isa<AllocaInst>(Obj),
-              "Undefined behavior: Call with \"tail\" keyword references "
-              "alloca", &I);
+      Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true);
+      Assert(!isa<AllocaInst>(Obj),
+             "Undefined behavior: Call with \"tail\" keyword references "
+             "alloca",
+             &I);
     }
 
 
@@ -291,13 +294,13 @@ void Lint::visitCallSite(CallSite CS) {
       // overlap is not distinguished from the case where nothing is known.
       uint64_t Size = 0;
       if (const ConstantInt *Len =
-            dyn_cast<ConstantInt>(findValue(MCI->getLength(),
-                                            /*OffsetOk=*/false)))
+              dyn_cast<ConstantInt>(findValue(MCI->getLength(), DL,
+                                              /*OffsetOk=*/false)))
         if (Len->getValue().isIntN(32))
           Size = Len->getValue().getZExtValue();
-      Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
-              AliasAnalysis::MustAlias,
-              "Undefined behavior: memcpy source and destination overlap", &I);
+      Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
+                 AliasAnalysis::MustAlias,
+             "Undefined behavior: memcpy source and destination overlap", &I);
       break;
     }
     case Intrinsic::memmove: {
@@ -321,9 +324,9 @@ void Lint::visitCallSite(CallSite CS) {
     }
 
     case Intrinsic::vastart:
-      Assert1(I.getParent()->getParent()->isVarArg(),
-              "Undefined behavior: va_start called in a non-varargs function",
-              &I);
+      Assert(I.getParent()->getParent()->isVarArg(),
+             "Undefined behavior: va_start called in a non-varargs function",
+             &I);
 
       visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
                            0, nullptr, MemRef::Read | MemRef::Write);
@@ -346,6 +349,13 @@ void Lint::visitCallSite(CallSite CS) {
       visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
                            0, nullptr, MemRef::Read | MemRef::Write);
       break;
+
+    case Intrinsic::eh_begincatch:
+      visitEHBeginCatch(II);
+      break;
+    case Intrinsic::eh_endcatch:
+      visitEHEndCatch(II);
+      break;
     }
 }
 
@@ -359,14 +369,13 @@ void Lint::visitInvokeInst(InvokeInst &I) {
 
 void Lint::visitReturnInst(ReturnInst &I) {
   Function *F = I.getParent()->getParent();
-  Assert1(!F->doesNotReturn(),
-          "Unusual: Return statement in function with noreturn attribute",
-          &I);
+  Assert(!F->doesNotReturn(),
+         "Unusual: Return statement in function with noreturn attribute", &I);
 
   if (Value *V = I.getReturnValue()) {
-    Value *Obj = findValue(V, /*OffsetOk=*/true);
-    Assert1(!isa<AllocaInst>(Obj),
-            "Unusual: Returning alloca value", &I);
+    Value *Obj =
+        findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true);
+    Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
   }
 }
 
@@ -380,45 +389,47 @@ void Lint::visitMemoryReference(Instruction &I,
   if (Size == 0)
     return;
 
-  Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
-  Assert1(!isa<ConstantPointerNull>(UnderlyingObject),
-          "Undefined behavior: Null pointer dereference", &I);
-  Assert1(!isa<UndefValue>(UnderlyingObject),
-          "Undefined behavior: Undef pointer dereference", &I);
-  Assert1(!isa<ConstantInt>(UnderlyingObject) ||
-          !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
-          "Unusual: All-ones pointer dereference", &I);
-  Assert1(!isa<ConstantInt>(UnderlyingObject) ||
-          !cast<ConstantInt>(UnderlyingObject)->isOne(),
-          "Unusual: Address one pointer dereference", &I);
+  Value *UnderlyingObject =
+      findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true);
+  Assert(!isa<ConstantPointerNull>(UnderlyingObject),
+         "Undefined behavior: Null pointer dereference", &I);
+  Assert(!isa<UndefValue>(UnderlyingObject),
+         "Undefined behavior: Undef pointer dereference", &I);
+  Assert(!isa<ConstantInt>(UnderlyingObject) ||
+             !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
+         "Unusual: All-ones pointer dereference", &I);
+  Assert(!isa<ConstantInt>(UnderlyingObject) ||
+             !cast<ConstantInt>(UnderlyingObject)->isOne(),
+         "Unusual: Address one pointer dereference", &I);
 
   if (Flags & MemRef::Write) {
     if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
-      Assert1(!GV->isConstant(),
-              "Undefined behavior: Write to read-only memory", &I);
-    Assert1(!isa<Function>(UnderlyingObject) &&
-            !isa<BlockAddress>(UnderlyingObject),
-            "Undefined behavior: Write to text section", &I);
+      Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
+             &I);
+    Assert(!isa<Function>(UnderlyingObject) &&
+               !isa<BlockAddress>(UnderlyingObject),
+           "Undefined behavior: Write to text section", &I);
   }
   if (Flags & MemRef::Read) {
-    Assert1(!isa<Function>(UnderlyingObject),
-            "Unusual: Load from function body", &I);
-    Assert1(!isa<BlockAddress>(UnderlyingObject),
-            "Undefined behavior: Load from block address", &I);
+    Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
+           &I);
+    Assert(!isa<BlockAddress>(UnderlyingObject),
+           "Undefined behavior: Load from block address", &I);
   }
   if (Flags & MemRef::Callee) {
-    Assert1(!isa<BlockAddress>(UnderlyingObject),
-            "Undefined behavior: Call to block address", &I);
+    Assert(!isa<BlockAddress>(UnderlyingObject),
+           "Undefined behavior: Call to block address", &I);
   }
   if (Flags & MemRef::Branchee) {
-    Assert1(!isa<Constant>(UnderlyingObject) ||
-            isa<BlockAddress>(UnderlyingObject),
-            "Undefined behavior: Branch to non-blockaddress", &I);
+    Assert(!isa<Constant>(UnderlyingObject) ||
+               isa<BlockAddress>(UnderlyingObject),
+           "Undefined behavior: Branch to non-blockaddress", &I);
   }
 
   // Check for buffer overflows and misalignment.
   // Only handles memory references that read/write something simple like an
   // alloca instruction or a global variable.
+  auto &DL = I.getModule()->getDataLayout();
   int64_t Offset = 0;
   if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) {
     // OK, so the access is to a constant offset from Ptr.  Check that Ptr is
@@ -429,37 +440,37 @@ void Lint::visitMemoryReference(Instruction &I,
 
     if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
       Type *ATy = AI->getAllocatedType();
-      if (DL && !AI->isArrayAllocation() && ATy->isSized())
-        BaseSize = DL->getTypeAllocSize(ATy);
+      if (!AI->isArrayAllocation() && ATy->isSized())
+        BaseSize = DL.getTypeAllocSize(ATy);
       BaseAlign = AI->getAlignment();
-      if (DL && BaseAlign == 0 && ATy->isSized())
-        BaseAlign = DL->getABITypeAlignment(ATy);
+      if (BaseAlign == 0 && ATy->isSized())
+        BaseAlign = DL.getABITypeAlignment(ATy);
     } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
       // If the global may be defined differently in another compilation unit
       // then don't warn about funky memory accesses.
       if (GV->hasDefinitiveInitializer()) {
         Type *GTy = GV->getType()->getElementType();
-        if (DL && GTy->isSized())
-          BaseSize = DL->getTypeAllocSize(GTy);
+        if (GTy->isSized())
+          BaseSize = DL.getTypeAllocSize(GTy);
         BaseAlign = GV->getAlignment();
-        if (DL && BaseAlign == 0 && GTy->isSized())
-          BaseAlign = DL->getABITypeAlignment(GTy);
+        if (BaseAlign == 0 && GTy->isSized())
+          BaseAlign = DL.getABITypeAlignment(GTy);
       }
     }
 
     // Accesses from before the start or after the end of the object are not
     // defined.
-    Assert1(Size == AliasAnalysis::UnknownSize ||
-            BaseSize == AliasAnalysis::UnknownSize ||
-            (Offset >= 0 && Offset + Size <= BaseSize),
-            "Undefined behavior: Buffer overflow", &I);
+    Assert(Size == AliasAnalysis::UnknownSize ||
+               BaseSize == AliasAnalysis::UnknownSize ||
+               (Offset >= 0 && Offset + Size <= BaseSize),
+           "Undefined behavior: Buffer overflow", &I);
 
     // Accesses that say that the memory is more aligned than it is are not
     // defined.
-    if (DL && Align == 0 && Ty && Ty->isSized())
-      Align = DL->getABITypeAlignment(Ty);
-    Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
-            "Undefined behavior: Memory reference address is misaligned", &I);
+    if (Align == 0 && Ty && Ty->isSized())
+      Align = DL.getABITypeAlignment(Ty);
+    Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
+           "Undefined behavior: Memory reference address is misaligned", &I);
   }
 }
 
@@ -477,39 +488,219 @@ void Lint::visitStoreInst(StoreInst &I) {
 }
 
 void Lint::visitXor(BinaryOperator &I) {
-  Assert1(!isa<UndefValue>(I.getOperand(0)) ||
-          !isa<UndefValue>(I.getOperand(1)),
-          "Undefined result: xor(undef, undef)", &I);
+  Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
+         "Undefined result: xor(undef, undef)", &I);
 }
 
 void Lint::visitSub(BinaryOperator &I) {
-  Assert1(!isa<UndefValue>(I.getOperand(0)) ||
-          !isa<UndefValue>(I.getOperand(1)),
-          "Undefined result: sub(undef, undef)", &I);
+  Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
+         "Undefined result: sub(undef, undef)", &I);
 }
 
 void Lint::visitLShr(BinaryOperator &I) {
-  if (ConstantInt *CI =
-        dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
-    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
-            "Undefined result: Shift count out of range", &I);
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(
+          findValue(I.getOperand(1), I.getModule()->getDataLayout(),
+                    /*OffsetOk=*/false)))
+    Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+           "Undefined result: Shift count out of range", &I);
 }
 
 void Lint::visitAShr(BinaryOperator &I) {
-  if (ConstantInt *CI =
-        dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
-    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
-            "Undefined result: Shift count out of range", &I);
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
+          I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
+    Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+           "Undefined result: Shift count out of range", &I);
 }
 
 void Lint::visitShl(BinaryOperator &I) {
-  if (ConstantInt *CI =
-        dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
-    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
-            "Undefined result: Shift count out of range", &I);
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
+          I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
+    Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+           "Undefined result: Shift count out of range", &I);
+}
+
+static bool
+allPredsCameFromLandingPad(BasicBlock *BB,
+                           SmallSet<BasicBlock *, 4> &VisitedBlocks) {
+  VisitedBlocks.insert(BB);
+  if (BB->isLandingPad())
+    return true;
+  // If we find a block with no predecessors, the search failed.
+  if (pred_empty(BB))
+    return false;
+  for (BasicBlock *Pred : predecessors(BB)) {
+    if (VisitedBlocks.count(Pred))
+      continue;
+    if (!allPredsCameFromLandingPad(Pred, VisitedBlocks))
+      return false;
+  }
+  return true;
+}
+
+static bool
+allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin,
+                           IntrinsicInst **SecondBeginCatch,
+                           SmallSet<BasicBlock *, 4> &VisitedBlocks) {
+  VisitedBlocks.insert(BB);
+  for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) {
+    IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I);
+    if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch)
+      return true;
+    // If we find another begincatch while looking for an endcatch,
+    // that's also an error.
+    if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) {
+      *SecondBeginCatch = IC;
+      return false;
+    }
+  }
+
+  // If we reach a block with no successors while searching, the
+  // search has failed.
+  if (succ_empty(BB))
+    return false;
+  // Otherwise, search all of the successors.
+  for (BasicBlock *Succ : successors(BB)) {
+    if (VisitedBlocks.count(Succ))
+      continue;
+    if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch,
+                                    VisitedBlocks))
+      return false;
+  }
+  return true;
+}
+
+void Lint::visitEHBeginCatch(IntrinsicInst *II) {
+  // The checks in this function make a potentially dubious assumption about
+  // the CFG, namely that any block involved in a catch is only used for the
+  // catch.  This will very likely be true of IR generated by a front end,
+  // but it may cease to be true, for example, if the IR is run through a
+  // pass which combines similar blocks.
+  //
+  // In general, if we encounter a block the isn't dominated by the catch
+  // block while we are searching the catch block's successors for a call
+  // to end catch intrinsic, then it is possible that it will be legal for
+  // a path through this block to never reach a call to llvm.eh.endcatch.
+  // An analogous statement could be made about our search for a landing
+  // pad among the catch block's predecessors.
+  //
+  // What is actually required is that no path is possible at runtime that
+  // reaches a call to llvm.eh.begincatch without having previously visited
+  // a landingpad instruction and that no path is possible at runtime that
+  // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch
+  // (mentally adjusting for the fact that in reality these calls will be
+  // removed before code generation).
+  //
+  // Because this is a lint check, we take a pessimistic approach and warn if
+  // the control flow is potentially incorrect.
+
+  SmallSet<BasicBlock *, 4> VisitedBlocks;
+  BasicBlock *CatchBB = II->getParent();
+
+  // The begin catch must occur in a landing pad block or all paths
+  // to it must have come from a landing pad.
+  Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks),
+         "llvm.eh.begincatch may be reachable without passing a landingpad",
+         II);
+
+  // Reset the visited block list.
+  VisitedBlocks.clear();
+
+  IntrinsicInst *SecondBeginCatch = nullptr;
+
+  // This has to be called before it is asserted.  Otherwise, the first assert
+  // below can never be hit.
+  bool EndCatchFound = allSuccessorsReachEndCatch(
+      CatchBB, std::next(static_cast<BasicBlock::iterator>(II)),
+      &SecondBeginCatch, VisitedBlocks);
+  Assert(
+      SecondBeginCatch == nullptr,
+      "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch",
+      II, SecondBeginCatch);
+  Assert(EndCatchFound,
+         "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch",
+         II);
+}
+
+static bool allPredCameFromBeginCatch(
+    BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin,
+    IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) {
+  VisitedBlocks.insert(BB);
+  // Look for a begincatch in this block.
+  for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE;
+       ++RI) {
+    IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI);
+    if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch)
+      return true;
+    // If we find another end catch before we find a begin catch, that's
+    // an error.
+    if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) {
+      *SecondEndCatch = IC;
+      return false;
+    }
+    // If we encounter a landingpad instruction, the search failed.
+    if (isa<LandingPadInst>(*RI))
+      return false;
+  }
+  // If while searching we find a block with no predeccesors,
+  // the search failed.
+  if (pred_empty(BB))
+    return false;
+  // Search any predecessors we haven't seen before.
+  for (BasicBlock *Pred : predecessors(BB)) {
+    if (VisitedBlocks.count(Pred))
+      continue;
+    if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch,
+                                   VisitedBlocks))
+      return false;
+  }
+  return true;
+}
+
+void Lint::visitEHEndCatch(IntrinsicInst *II) {
+  // The check in this function makes a potentially dubious assumption about
+  // the CFG, namely that any block involved in a catch is only used for the
+  // catch.  This will very likely be true of IR generated by a front end,
+  // but it may cease to be true, for example, if the IR is run through a
+  // pass which combines similar blocks.
+  //
+  // In general, if we encounter a block the isn't post-dominated by the
+  // end catch block while we are searching the end catch block's predecessors
+  // for a call to the begin catch intrinsic, then it is possible that it will
+  // be legal for a path to reach the end catch block without ever having
+  // called llvm.eh.begincatch.
+  //
+  // What is actually required is that no path is possible at runtime that
+  // reaches a call to llvm.eh.endcatch without having previously visited
+  // a call to llvm.eh.begincatch (mentally adjusting for the fact that in
+  // reality these calls will be removed before code generation).
+  //
+  // Because this is a lint check, we take a pessimistic approach and warn if
+  // the control flow is potentially incorrect.
+
+  BasicBlock *EndCatchBB = II->getParent();
+
+  // Alls paths to the end catch call must pass through a begin catch call.
+
+  // If llvm.eh.begincatch wasn't called in the current block, we'll use this
+  // lambda to recursively look for it in predecessors.
+  SmallSet<BasicBlock *, 4> VisitedBlocks;
+  IntrinsicInst *SecondEndCatch = nullptr;
+
+  // This has to be called before it is asserted.  Otherwise, the first assert
+  // below can never be hit.
+  bool BeginCatchFound =
+      allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II),
+                                &SecondEndCatch, VisitedBlocks);
+  Assert(
+      SecondEndCatch == nullptr,
+      "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch",
+      II, SecondEndCatch);
+  Assert(BeginCatchFound,
+         "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch",
+         II);
 }
 
-static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT,
+static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
                    AssumptionCache *AC) {
   // Assume undef could be zero.
   if (isa<UndefValue>(V))
@@ -550,30 +741,30 @@ static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT,
 }
 
 void Lint::visitSDiv(BinaryOperator &I) {
-  Assert1(!isZero(I.getOperand(1), DL, DT, AC),
-          "Undefined behavior: Division by zero", &I);
+  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+         "Undefined behavior: Division by zero", &I);
 }
 
 void Lint::visitUDiv(BinaryOperator &I) {
-  Assert1(!isZero(I.getOperand(1), DL, DT, AC),
-          "Undefined behavior: Division by zero", &I);
+  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+         "Undefined behavior: Division by zero", &I);
 }
 
 void Lint::visitSRem(BinaryOperator &I) {
-  Assert1(!isZero(I.getOperand(1), DL, DT, AC),
-          "Undefined behavior: Division by zero", &I);
+  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+         "Undefined behavior: Division by zero", &I);
 }
 
 void Lint::visitURem(BinaryOperator &I) {
-  Assert1(!isZero(I.getOperand(1), DL, DT, AC),
-          "Undefined behavior: Division by zero", &I);
+  Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+         "Undefined behavior: Division by zero", &I);
 }
 
 void Lint::visitAllocaInst(AllocaInst &I) {
   if (isa<ConstantInt>(I.getArraySize()))
     // This isn't undefined behavior, it's just an obvious pessimization.
-    Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
-            "Pessimization: Static alloca outside of entry block", &I);
+    Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
+           "Pessimization: Static alloca outside of entry block", &I);
 
   // TODO: Check for an unusual size (MSB set?)
 }
@@ -587,32 +778,33 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) {
   visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0,
                        nullptr, MemRef::Branchee);
 
-  Assert1(I.getNumDestinations() != 0,
-          "Undefined behavior: indirectbr with no destinations", &I);
+  Assert(I.getNumDestinations() != 0,
+         "Undefined behavior: indirectbr with no destinations", &I);
 }
 
 void Lint::visitExtractElementInst(ExtractElementInst &I) {
-  if (ConstantInt *CI =
-        dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
-                                        /*OffsetOk=*/false)))
-    Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
-            "Undefined result: extractelement index out of range", &I);
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(
+          findValue(I.getIndexOperand(), I.getModule()->getDataLayout(),
+                    /*OffsetOk=*/false)))
+    Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
+           "Undefined result: extractelement index out of range", &I);
 }
 
 void Lint::visitInsertElementInst(InsertElementInst &I) {
-  if (ConstantInt *CI =
-        dyn_cast<ConstantInt>(findValue(I.getOperand(2),
-                                        /*OffsetOk=*/false)))
-    Assert1(CI->getValue().ult(I.getType()->getNumElements()),
-            "Undefined result: insertelement index out of range", &I);
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(
+          findValue(I.getOperand(2), I.getModule()->getDataLayout(),
+                    /*OffsetOk=*/false)))
+    Assert(CI->getValue().ult(I.getType()->getNumElements()),
+           "Undefined result: insertelement index out of range", &I);
 }
 
 void Lint::visitUnreachableInst(UnreachableInst &I) {
   // This isn't undefined behavior, it's merely suspicious.
-  Assert1(&I == I.getParent()->begin() ||
-          std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(),
-          "Unusual: unreachable immediately preceded by instruction without "
-          "side effects", &I);
+  Assert(&I == I.getParent()->begin() ||
+             std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+         "Unusual: unreachable immediately preceded by instruction without "
+         "side effects",
+         &I);
 }
 
 /// findValue - Look through bitcasts and simple memory reference patterns
@@ -622,13 +814,13 @@ void Lint::visitUnreachableInst(UnreachableInst &I) {
 /// Most analysis passes don't require this logic, because instcombine
 /// will simplify most of these kinds of things away. But it's a goal of
 /// this Lint pass to be useful even on non-optimized IR.
-Value *Lint::findValue(Value *V, bool OffsetOk) const {
+Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const {
   SmallPtrSet<Value *, 4> Visited;
-  return findValueImpl(V, OffsetOk, Visited);
+  return findValueImpl(V, DL, OffsetOk, Visited);
 }
 
 /// findValueImpl - Implementation helper for findValue.
-Value *Lint::findValueImpl(Value *V, bool OffsetOk,
+Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
                            SmallPtrSetImpl<Value *> &Visited) const {
   // Detect self-referential values.
   if (!Visited.insert(V).second)
@@ -649,7 +841,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
         break;
       if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
                                               BB, BBI, 6, AA))
-        return findValueImpl(U, OffsetOk, Visited);
+        return findValueImpl(U, DL, OffsetOk, Visited);
       if (BBI != BB->begin()) break;
       BB = BB->getUniquePredecessor();
       if (!BB) break;
@@ -658,40 +850,38 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
     if (Value *W = PN->hasConstantValue())
       if (W != V)
-        return findValueImpl(W, OffsetOk, Visited);
+        return findValueImpl(W, DL, OffsetOk, Visited);
   } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
     if (CI->isNoopCast(DL))
-      return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
+      return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited);
   } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
     if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
                                      Ex->getIndices()))
       if (W != V)
-        return findValueImpl(W, OffsetOk, Visited);
+        return findValueImpl(W, DL, OffsetOk, Visited);
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     // Same as above, but for ConstantExpr instead of Instruction.
     if (Instruction::isCast(CE->getOpcode())) {
       if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
-                               CE->getOperand(0)->getType(),
-                               CE->getType(),
-                               DL ? DL->getIntPtrType(V->getType()) :
-                                    Type::getInt64Ty(V->getContext())))
-        return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
+                               CE->getOperand(0)->getType(), CE->getType(),
+                               DL.getIntPtrType(V->getType())))
+        return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited);
     } else if (CE->getOpcode() == Instruction::ExtractValue) {
       ArrayRef<unsigned> Indices = CE->getIndices();
       if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
         if (W != V)
-          return findValueImpl(W, OffsetOk, Visited);
+          return findValueImpl(W, DL, OffsetOk, Visited);
     }
   }
 
   // As a last resort, try SimplifyInstruction or constant folding.
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
     if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC))
-      return findValueImpl(W, OffsetOk, Visited);
+      return findValueImpl(W, DL, OffsetOk, Visited);
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI))
       if (W != V)
-        return findValueImpl(W, OffsetOk, Visited);
+        return findValueImpl(W, DL, OffsetOk, Visited);
   }
 
   return V;
@@ -711,7 +901,7 @@ void llvm::lintFunction(const Function &f) {
   Function &F = const_cast<Function&>(f);
   assert(!F.isDeclaration() && "Cannot lint external functions");
 
-  FunctionPassManager FPM(F.getParent());
+  legacy::FunctionPassManager FPM(F.getParent());
   Lint *V = new Lint();
   FPM.add(V);
   FPM.run(F);
@@ -720,7 +910,7 @@ void llvm::lintFunction(const Function &f) {
 /// lintModule - Check a module for errors, printing messages on stderr.
 ///
 void llvm::lintModule(const Module &M) {
-  PassManager PM;
+  legacy::PassManager PM;
   Lint *V = new Lint();
   PM.add(V);
   PM.run(const_cast<Module&>(M));
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
index 5042eb9..aed3b04 100644
--- a/contrib/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -19,6 +19,7 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 using namespace llvm;
 
@@ -62,7 +63,8 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
 /// This uses the pointee type to determine how many bytes need to be safe to
 /// load from the pointer.
 bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
-                                       unsigned Align, const DataLayout *DL) {
+                                       unsigned Align) {
+  const DataLayout &DL = ScanFrom->getModule()->getDataLayout();
   int64_t ByteOffset = 0;
   Value *Base = V;
   Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL);
@@ -87,19 +89,19 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
   }
 
   PointerType *AddrTy = cast<PointerType>(V->getType());
-  uint64_t LoadSize = DL ? DL->getTypeStoreSize(AddrTy->getElementType()) : 0;
+  uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType());
 
   // If we found a base allocated type from either an alloca or global variable,
   // try to see if we are definitively within the allocated region. We need to
   // know the size of the base type and the loaded type to do anything in this
-  // case, so only try this when we have the DataLayout available.
-  if (BaseType && BaseType->isSized() && DL) {
+  // case.
+  if (BaseType && BaseType->isSized()) {
     if (BaseAlign == 0)
-      BaseAlign = DL->getPrefTypeAlignment(BaseType);
+      BaseAlign = DL.getPrefTypeAlignment(BaseType);
 
     if (Align <= BaseAlign) {
       // Check if the load is within the bounds of the underlying object.
-      if (ByteOffset + LoadSize <= DL->getTypeAllocSize(BaseType) &&
+      if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) &&
           (Align == 0 || (ByteOffset % Align) == 0))
         return true;
     }
@@ -133,16 +135,13 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
     else
       continue;
 
-    // Handle trivial cases even w/o DataLayout or other work.
+    // Handle trivial cases.
     if (AccessedPtr == V)
       return true;
 
-    if (!DL)
-      continue;
-
     auto *AccessedTy = cast<PointerType>(AccessedPtr->getType());
     if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
-        LoadSize <= DL->getTypeStoreSize(AccessedTy->getElementType()))
+        LoadSize <= DL.getTypeStoreSize(AccessedTy->getElementType()))
       return true;
   }
   return false;
@@ -176,13 +175,10 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
 
   Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
 
-  // Try to get the DataLayout for this module. This may be null, in which case
-  // the optimizations will be limited.
-  const DataLayout *DL = ScanBB->getDataLayout();
+  const DataLayout &DL = ScanBB->getModule()->getDataLayout();
 
   // Try to get the store size for the type.
-  uint64_t AccessSize = DL ? DL->getTypeStoreSize(AccessTy)
-                           : AA ? AA->getTypeStoreSize(AccessTy) : 0;
+  uint64_t AccessSize = DL.getTypeStoreSize(AccessTy);
 
   Value *StrippedPtr = Ptr->stripPointerCasts();
 
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
new file mode 100644
index 0000000..b70de00
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -0,0 +1,1427 @@
+//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation for the loop memory dependence that was originally
+// developed for the loop vectorizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/VectorUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-accesses"
+
+static cl::opt<unsigned, true>
+VectorizationFactor("force-vector-width", cl::Hidden,
+                    cl::desc("Sets the SIMD width. Zero is autoselect."),
+                    cl::location(VectorizerParams::VectorizationFactor));
+unsigned VectorizerParams::VectorizationFactor;
+
+static cl::opt<unsigned, true>
+VectorizationInterleave("force-vector-interleave", cl::Hidden,
+                        cl::desc("Sets the vectorization interleave count. "
+                                 "Zero is autoselect."),
+                        cl::location(
+                            VectorizerParams::VectorizationInterleave));
+unsigned VectorizerParams::VectorizationInterleave;
+
+static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
+    "runtime-memory-check-threshold", cl::Hidden,
+    cl::desc("When performing memory disambiguation checks at runtime do not "
+             "generate more than this number of comparisons (default = 8)."),
+    cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
+unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
+
+/// Maximum SIMD width.
+const unsigned VectorizerParams::MaxVectorWidth = 64;
+
+/// \brief We collect interesting dependences up to this threshold.
+static cl::opt<unsigned> MaxInterestingDependence(
+    "max-interesting-dependences", cl::Hidden,
+    cl::desc("Maximum number of interesting dependences collected by "
+             "loop-access analysis (default = 100)"),
+    cl::init(100));
+
+bool VectorizerParams::isInterleaveForced() {
+  return ::VectorizationInterleave.getNumOccurrences() > 0;
+}
+
+void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message,
+                                    const Function *TheFunction,
+                                    const Loop *TheLoop,
+                                    const char *PassName) {
+  DebugLoc DL = TheLoop->getStartLoc();
+  if (const Instruction *I = Message.getInstr())
+    DL = I->getDebugLoc();
+  emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName,
+                                 *TheFunction, DL, Message.str());
+}
+
+Value *llvm::stripIntegerCast(Value *V) {
+  if (CastInst *CI = dyn_cast<CastInst>(V))
+    if (CI->getOperand(0)->getType()->isIntegerTy())
+      return CI->getOperand(0);
+  return V;
+}
+
+const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
+                                            const ValueToValueMap &PtrToStride,
+                                            Value *Ptr, Value *OrigPtr) {
+
+  const SCEV *OrigSCEV = SE->getSCEV(Ptr);
+
+  // If there is an entry in the map return the SCEV of the pointer with the
+  // symbolic stride replaced by one.
+  ValueToValueMap::const_iterator SI =
+      PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
+  if (SI != PtrToStride.end()) {
+    Value *StrideVal = SI->second;
+
+    // Strip casts.
+    StrideVal = stripIntegerCast(StrideVal);
+
+    // Replace symbolic stride by one.
+    Value *One = ConstantInt::get(StrideVal->getType(), 1);
+    ValueToValueMap RewriteMap;
+    RewriteMap[StrideVal] = One;
+
+    const SCEV *ByOne =
+        SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
+    DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
+                 << "\n");
+    return ByOne;
+  }
+
+  // Otherwise, just return the SCEV of the original pointer.
+  return SE->getSCEV(Ptr);
+}
+
+void LoopAccessInfo::RuntimePointerCheck::insert(
+    ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
+    unsigned ASId, const ValueToValueMap &Strides) {
+  // Get the stride replaced scev.
+  const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+  assert(AR && "Invalid addrec expression");
+  const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
+  const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
+  Pointers.push_back(Ptr);
+  Starts.push_back(AR->getStart());
+  Ends.push_back(ScEnd);
+  IsWritePtr.push_back(WritePtr);
+  DependencySetId.push_back(DepSetId);
+  AliasSetId.push_back(ASId);
+}
+
+bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
+    unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
+  // No need to check if two readonly pointers intersect.
+  if (!IsWritePtr[I] && !IsWritePtr[J])
+    return false;
+
+  // Only need to check pointers between two different dependency sets.
+  if (DependencySetId[I] == DependencySetId[J])
+    return false;
+
+  // Only need to check pointers in the same alias set.
+  if (AliasSetId[I] != AliasSetId[J])
+    return false;
+
+  // If PtrPartition is set omit checks between pointers of the same partition.
+  // Partition number -1 means that the pointer is used in multiple partitions.
+  // In this case we can't omit the check.
+  if (PtrPartition && (*PtrPartition)[I] != -1 &&
+      (*PtrPartition)[I] == (*PtrPartition)[J])
+    return false;
+
+  return true;
+}
+
+void LoopAccessInfo::RuntimePointerCheck::print(
+    raw_ostream &OS, unsigned Depth,
+    const SmallVectorImpl<int> *PtrPartition) const {
+  unsigned NumPointers = Pointers.size();
+  if (NumPointers == 0)
+    return;
+
+  OS.indent(Depth) << "Run-time memory checks:\n";
+  unsigned N = 0;
+  for (unsigned I = 0; I < NumPointers; ++I)
+    for (unsigned J = I + 1; J < NumPointers; ++J)
+      if (needsChecking(I, J, PtrPartition)) {
+        OS.indent(Depth) << N++ << ":\n";
+        OS.indent(Depth + 2) << *Pointers[I];
+        if (PtrPartition)
+          OS << " (Partition: " << (*PtrPartition)[I] << ")";
+        OS << "\n";
+        OS.indent(Depth + 2) << *Pointers[J];
+        if (PtrPartition)
+          OS << " (Partition: " << (*PtrPartition)[J] << ")";
+        OS << "\n";
+      }
+}
+
+bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
+    const SmallVectorImpl<int> *PtrPartition) const {
+  unsigned NumPointers = Pointers.size();
+
+  for (unsigned I = 0; I < NumPointers; ++I)
+    for (unsigned J = I + 1; J < NumPointers; ++J)
+      if (needsChecking(I, J, PtrPartition))
+        return true;
+  return false;
+}
+
+namespace {
+/// \brief Analyses memory accesses in a loop.
+///
+/// Checks whether run time pointer checks are needed and builds sets for data
+/// dependence checking.
+class AccessAnalysis {
+public:
+  /// \brief Read or write access location.
+  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
+  typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
+
+  AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
+                 MemoryDepChecker::DepCandidates &DA)
+      : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {}
+
+  /// \brief Register a load  and whether it is only read from.
+  void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) {
+    Value *Ptr = const_cast<Value*>(Loc.Ptr);
+    AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
+    Accesses.insert(MemAccessInfo(Ptr, false));
+    if (IsReadOnly)
+      ReadOnlyPtr.insert(Ptr);
+  }
+
+  /// \brief Register a store.
+  void addStore(AliasAnalysis::Location &Loc) {
+    Value *Ptr = const_cast<Value*>(Loc.Ptr);
+    AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
+    Accesses.insert(MemAccessInfo(Ptr, true));
+  }
+
+  /// \brief Check whether we can check the pointers at runtime for
+  /// non-intersection.
+  bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
+                       unsigned &NumComparisons, ScalarEvolution *SE,
+                       Loop *TheLoop, const ValueToValueMap &Strides,
+                       bool ShouldCheckStride = false);
+
+  /// \brief Goes over all memory accesses, checks whether a RT check is needed
+  /// and builds sets of dependent accesses.
+  void buildDependenceSets() {
+    processMemAccesses();
+  }
+
+  bool isRTCheckNeeded() { return IsRTCheckNeeded; }
+
+  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
+
+  /// We decided that no dependence analysis would be used.  Reset the state.
+  void resetDepChecks(MemoryDepChecker &DepChecker) {
+    CheckDeps.clear();
+    DepChecker.clearInterestingDependences();
+  }
+
+  MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
+
+private:
+  typedef SetVector<MemAccessInfo> PtrAccessSet;
+
+  /// \brief Go over all memory access and check whether runtime pointer checks
+  /// are needed /// and build sets of dependency check candidates.
+  void processMemAccesses();
+
+  /// Set of all accesses.
+  PtrAccessSet Accesses;
+
+  const DataLayout &DL;
+
+  /// Set of accesses that need a further dependence check.
+  MemAccessInfoSet CheckDeps;
+
+  /// Set of pointers that are read only.
+  SmallPtrSet<Value*, 16> ReadOnlyPtr;
+
+  /// An alias set tracker to partition the access set by underlying object and
+  //intrinsic property (such as TBAA metadata).
+  AliasSetTracker AST;
+
+  LoopInfo *LI;
+
+  /// Sets of potentially dependent accesses - members of one set share an
+  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
+  /// dependence check.
+  MemoryDepChecker::DepCandidates &DepCands;
+
+  bool IsRTCheckNeeded;
+};
+
+} // end anonymous namespace
+
+/// \brief Check whether a pointer can participate in a runtime bounds check.
+static bool hasComputableBounds(ScalarEvolution *SE,
+                                const ValueToValueMap &Strides, Value *Ptr) {
+  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+  if (!AR)
+    return false;
+
+  return AR->isAffine();
+}
+
+/// \brief Check the stride of the pointer and ensure that it does not wrap in
+/// the address space.
+static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
+                        const ValueToValueMap &StridesMap);
+
+bool AccessAnalysis::canCheckPtrAtRT(
+    LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons,
+    ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap,
+    bool ShouldCheckStride) {
+  // Find pointers with computable bounds. We are going to use this information
+  // to place a runtime bound check.
+  bool CanDoRT = true;
+
+  bool IsDepCheckNeeded = isDependencyCheckNeeded();
+  NumComparisons = 0;
+
+  // We assign a consecutive id to access from different alias sets.
+  // Accesses between different groups doesn't need to be checked.
+  unsigned ASId = 1;
+  for (auto &AS : AST) {
+    unsigned NumReadPtrChecks = 0;
+    unsigned NumWritePtrChecks = 0;
+
+    // We assign consecutive id to access from different dependence sets.
+    // Accesses within the same set don't need a runtime check.
+    unsigned RunningDepId = 1;
+    DenseMap<Value *, unsigned> DepSetId;
+
+    for (auto A : AS) {
+      Value *Ptr = A.getValue();
+      bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
+      MemAccessInfo Access(Ptr, IsWrite);
+
+      if (IsWrite)
+        ++NumWritePtrChecks;
+      else
+        ++NumReadPtrChecks;
+
+      if (hasComputableBounds(SE, StridesMap, Ptr) &&
+          // When we run after a failing dependency check we have to make sure
+          // we don't have wrapping pointers.
+          (!ShouldCheckStride ||
+           isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) {
+        // The id of the dependence set.
+        unsigned DepId;
+
+        if (IsDepCheckNeeded) {
+          Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+          unsigned &LeaderId = DepSetId[Leader];
+          if (!LeaderId)
+            LeaderId = RunningDepId++;
+          DepId = LeaderId;
+        } else
+          // Each access has its own dependence set.
+          DepId = RunningDepId++;
+
+        RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+
+        DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+      } else {
+        DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
+        CanDoRT = false;
+      }
+    }
+
+    if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
+      NumComparisons += 0; // Only one dependence set.
+    else {
+      NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks +
+                                              NumWritePtrChecks - 1));
+    }
+
+    ++ASId;
+  }
+
+  // If the pointers that we would use for the bounds comparison have different
+  // address spaces, assume the values aren't directly comparable, so we can't
+  // use them for the runtime check. We also have to assume they could
+  // overlap. In the future there should be metadata for whether address spaces
+  // are disjoint.
+  unsigned NumPointers = RtCheck.Pointers.size();
+  for (unsigned i = 0; i < NumPointers; ++i) {
+    for (unsigned j = i + 1; j < NumPointers; ++j) {
+      // Only need to check pointers between two different dependency sets.
+      if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
+       continue;
+      // Only need to check pointers in the same alias set.
+      if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
+        continue;
+
+      Value *PtrI = RtCheck.Pointers[i];
+      Value *PtrJ = RtCheck.Pointers[j];
+
+      unsigned ASi = PtrI->getType()->getPointerAddressSpace();
+      unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
+      if (ASi != ASj) {
+        DEBUG(dbgs() << "LAA: Runtime check would require comparison between"
+                       " different address spaces\n");
+        return false;
+      }
+    }
+  }
+
+  return CanDoRT;
+}
+
+void AccessAnalysis::processMemAccesses() {
+  // We process the set twice: first we process read-write pointers, last we
+  // process read-only pointers. This allows us to skip dependence tests for
+  // read-only pointers.
+
+  DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
+  DEBUG(dbgs() << "  AST: "; AST.dump());
+  DEBUG(dbgs() << "LAA:   Accesses(" << Accesses.size() << "):\n");
+  DEBUG({
+    for (auto A : Accesses)
+      dbgs() << "\t" << *A.getPointer() << " (" <<
+                (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
+                                         "read-only" : "read")) << ")\n";
+  });
+
+  // The AliasSetTracker has nicely partitioned our pointers by metadata
+  // compatibility and potential for underlying-object overlap. As a result, we
+  // only need to check for potential pointer dependencies within each alias
+  // set.
+  for (auto &AS : AST) {
+    // Note that both the alias-set tracker and the alias sets themselves used
+    // linked lists internally and so the iteration order here is deterministic
+    // (matching the original instruction order within each set).
+
+    bool SetHasWrite = false;
+
+    // Map of pointers to last access encountered.
+    typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
+    UnderlyingObjToAccessMap ObjToLastAccess;
+
+    // Set of access to check after all writes have been processed.
+    PtrAccessSet DeferredAccesses;
+
+    // Iterate over each alias set twice, once to process read/write pointers,
+    // and then to process read-only pointers.
+    for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
+      bool UseDeferred = SetIteration > 0;
+      PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
+
+      for (auto AV : AS) {
+        Value *Ptr = AV.getValue();
+
+        // For a single memory access in AliasSetTracker, Accesses may contain
+        // both read and write, and they both need to be handled for CheckDeps.
+        for (auto AC : S) {
+          if (AC.getPointer() != Ptr)
+            continue;
+
+          bool IsWrite = AC.getInt();
+
+          // If we're using the deferred access set, then it contains only
+          // reads.
+          bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
+          if (UseDeferred && !IsReadOnlyPtr)
+            continue;
+          // Otherwise, the pointer must be in the PtrAccessSet, either as a
+          // read or a write.
+          assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
+                  S.count(MemAccessInfo(Ptr, false))) &&
+                 "Alias-set pointer not in the access set?");
+
+          MemAccessInfo Access(Ptr, IsWrite);
+          DepCands.insert(Access);
+
+          // Memorize read-only pointers for later processing and skip them in
+          // the first round (they need to be checked after we have seen all
+          // write pointers). Note: we also mark pointer that are not
+          // consecutive as "read-only" pointers (so that we check
+          // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
+          if (!UseDeferred && IsReadOnlyPtr) {
+            DeferredAccesses.insert(Access);
+            continue;
+          }
+
+          // If this is a write - check other reads and writes for conflicts. If
+          // this is a read only check other writes for conflicts (but only if
+          // there is no other write to the ptr - this is an optimization to
+          // catch "a[i] = a[i] + " without having to do a dependence check).
+          if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
+            CheckDeps.insert(Access);
+            IsRTCheckNeeded = true;
+          }
+
+          if (IsWrite)
+            SetHasWrite = true;
+
+          // Create sets of pointers connected by a shared alias set and
+          // underlying object.
+          typedef SmallVector<Value *, 16> ValueVector;
+          ValueVector TempObjects;
+
+          GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
+          DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
+          for (Value *UnderlyingObj : TempObjects) {
+            UnderlyingObjToAccessMap::iterator Prev =
+                ObjToLastAccess.find(UnderlyingObj);
+            if (Prev != ObjToLastAccess.end())
+              DepCands.unionSets(Access, Prev->second);
+
+            ObjToLastAccess[UnderlyingObj] = Access;
+            DEBUG(dbgs() << "  " << *UnderlyingObj << "\n");
+          }
+        }
+      }
+    }
+  }
+}
+
+static bool isInBoundsGep(Value *Ptr) {
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+    return GEP->isInBounds();
+  return false;
+}
+
+/// \brief Check whether the access through \p Ptr has a constant stride.
+static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
+                        const ValueToValueMap &StridesMap) {
+  const Type *Ty = Ptr->getType();
+  assert(Ty->isPointerTy() && "Unexpected non-ptr");
+
+  // Make sure that the pointer does not point to aggregate types.
+  const PointerType *PtrTy = cast<PointerType>(Ty);
+  if (PtrTy->getElementType()->isAggregateType()) {
+    DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
+          << *Ptr << "\n");
+    return 0;
+  }
+
+  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
+
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+  if (!AR) {
+    DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer "
+          << *Ptr << " SCEV: " << *PtrScev << "\n");
+    return 0;
+  }
+
+  // The accesss function must stride over the innermost loop.
+  if (Lp != AR->getLoop()) {
+    DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
+          *Ptr << " SCEV: " << *PtrScev << "\n");
+  }
+
+  // The address calculation must not wrap. Otherwise, a dependence could be
+  // inverted.
+  // An inbounds getelementptr that is a AddRec with a unit stride
+  // cannot wrap per definition. The unit stride requirement is checked later.
+  // An getelementptr without an inbounds attribute and unit stride would have
+  // to access the pointer value "0" which is undefined behavior in address
+  // space 0, therefore we can also vectorize this case.
+  bool IsInBoundsGEP = isInBoundsGep(Ptr);
+  bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
+  bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
+  if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
+    DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
+          << *Ptr << " SCEV: " << *PtrScev << "\n");
+    return 0;
+  }
+
+  // Check the step is constant.
+  const SCEV *Step = AR->getStepRecurrence(*SE);
+
+  // Calculate the pointer stride and check if it is consecutive.
+  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+  if (!C) {
+    DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
+          " SCEV: " << *PtrScev << "\n");
+    return 0;
+  }
+
+  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
+  int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
+  const APInt &APStepVal = C->getValue()->getValue();
+
+  // Huge step value - give up.
+  if (APStepVal.getBitWidth() > 64)
+    return 0;
+
+  int64_t StepVal = APStepVal.getSExtValue();
+
+  // Strided access.
+  int64_t Stride = StepVal / Size;
+  int64_t Rem = StepVal % Size;
+  if (Rem)
+    return 0;
+
+  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
+  // know we can't "wrap around the address space". In case of address space
+  // zero we know that this won't happen without triggering undefined behavior.
+  if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
+      Stride != 1 && Stride != -1)
+    return 0;
+
+  return Stride;
+}
+
+bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
+  switch (Type) {
+  case NoDep:
+  case Forward:
+  case BackwardVectorizable:
+    return true;
+
+  case Unknown:
+  case ForwardButPreventsForwarding:
+  case Backward:
+  case BackwardVectorizableButPreventsForwarding:
+    return false;
+  }
+  llvm_unreachable("unexpected DepType!");
+}
+
+bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) {
+  switch (Type) {
+  case NoDep:
+  case Forward:
+    return false;
+
+  case BackwardVectorizable:
+  case Unknown:
+  case ForwardButPreventsForwarding:
+  case Backward:
+  case BackwardVectorizableButPreventsForwarding:
+    return true;
+  }
+  llvm_unreachable("unexpected DepType!");
+}
+
+bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
+  switch (Type) {
+  case NoDep:
+  case Forward:
+  case ForwardButPreventsForwarding:
+    return false;
+
+  case Unknown:
+  case BackwardVectorizable:
+  case Backward:
+  case BackwardVectorizableButPreventsForwarding:
+    return true;
+  }
+  llvm_unreachable("unexpected DepType!");
+}
+
+bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
+                                                    unsigned TypeByteSize) {
+  // If loads occur at a distance that is not a multiple of a feasible vector
+  // factor store-load forwarding does not take place.
+  // Positive dependences might cause troubles because vectorizing them might
+  // prevent store-load forwarding making vectorized code run a lot slower.
+  //   a[i] = a[i-3] ^ a[i-8];
+  //   The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
+  //   hence on your typical architecture store-load forwarding does not take
+  //   place. Vectorizing in such cases does not make sense.
+  // Store-load forwarding distance.
+  const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
+  // Maximum vector factor.
+  unsigned MaxVFWithoutSLForwardIssues =
+    VectorizerParams::MaxVectorWidth * TypeByteSize;
+  if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
+    MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
+
+  for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
+       vf *= 2) {
+    if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
+      MaxVFWithoutSLForwardIssues = (vf >>=1);
+      break;
+    }
+  }
+
+  if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
+    DEBUG(dbgs() << "LAA: Distance " << Distance <<
+          " that could cause a store-load forwarding conflict\n");
+    return true;
+  }
+
+  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
+      MaxVFWithoutSLForwardIssues !=
+      VectorizerParams::MaxVectorWidth * TypeByteSize)
+    MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
+  return false;
+}
+
+MemoryDepChecker::Dependence::DepType
+MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
+                              const MemAccessInfo &B, unsigned BIdx,
+                              const ValueToValueMap &Strides) {
+  assert (AIdx < BIdx && "Must pass arguments in program order");
+
+  Value *APtr = A.getPointer();
+  Value *BPtr = B.getPointer();
+  bool AIsWrite = A.getInt();
+  bool BIsWrite = B.getInt();
+
+  // Two reads are independent.
+  if (!AIsWrite && !BIsWrite)
+    return Dependence::NoDep;
+
+  // We cannot check pointers in different address spaces.
+  if (APtr->getType()->getPointerAddressSpace() !=
+      BPtr->getType()->getPointerAddressSpace())
+    return Dependence::Unknown;
+
+  const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
+  const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
+
+  int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides);
+  int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides);
+
+  const SCEV *Src = AScev;
+  const SCEV *Sink = BScev;
+
+  // If the induction step is negative we have to invert source and sink of the
+  // dependence.
+  if (StrideAPtr < 0) {
+    //Src = BScev;
+    //Sink = AScev;
+    std::swap(APtr, BPtr);
+    std::swap(Src, Sink);
+    std::swap(AIsWrite, BIsWrite);
+    std::swap(AIdx, BIdx);
+    std::swap(StrideAPtr, StrideBPtr);
+  }
+
+  const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
+
+  DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
+        << "(Induction step: " << StrideAPtr <<  ")\n");
+  DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
+        << *InstMap[BIdx] << ": " << *Dist << "\n");
+
+  // Need consecutive accesses. We don't want to vectorize
+  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
+  // the address space.
+  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
+    DEBUG(dbgs() << "Non-consecutive pointer access\n");
+    return Dependence::Unknown;
+  }
+
+  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
+  if (!C) {
+    DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
+    ShouldRetryWithRuntimeCheck = true;
+    return Dependence::Unknown;
+  }
+
+  Type *ATy = APtr->getType()->getPointerElementType();
+  Type *BTy = BPtr->getType()->getPointerElementType();
+  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
+  unsigned TypeByteSize = DL.getTypeAllocSize(ATy);
+
+  // Negative distances are not plausible dependencies.
+  const APInt &Val = C->getValue()->getValue();
+  if (Val.isNegative()) {
+    bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
+    if (IsTrueDataDependence &&
+        (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
+         ATy != BTy))
+      return Dependence::ForwardButPreventsForwarding;
+
+    DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n");
+    return Dependence::Forward;
+  }
+
+  // Write to the same location with the same size.
+  // Could be improved to assert type sizes are the same (i32 == float, etc).
+  if (Val == 0) {
+    if (ATy == BTy)
+      return Dependence::NoDep;
+    DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
+    return Dependence::Unknown;
+  }
+
+  assert(Val.isStrictlyPositive() && "Expect a positive value");
+
+  if (ATy != BTy) {
+    DEBUG(dbgs() <<
+          "LAA: ReadWrite-Write positive dependency with different types\n");
+    return Dependence::Unknown;
+  }
+
+  unsigned Distance = (unsigned) Val.getZExtValue();
+
+  // Bail out early if passed-in parameters make vectorization not feasible.
+  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
+                           VectorizerParams::VectorizationFactor : 1);
+  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
+                           VectorizerParams::VectorizationInterleave : 1);
+
+  // The distance must be bigger than the size needed for a vectorized version
+  // of the operation and the size of the vectorized operation must not be
+  // bigger than the currrent maximum size.
+  if (Distance < 2*TypeByteSize ||
+      2*TypeByteSize > MaxSafeDepDistBytes ||
+      Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
+    DEBUG(dbgs() << "LAA: Failure because of Positive distance "
+        << Val.getSExtValue() << '\n');
+    return Dependence::Backward;
+  }
+
+  // Positive distance bigger than max vectorization factor.
+  MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ?
+    Distance : MaxSafeDepDistBytes;
+
+  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
+  if (IsTrueDataDependence &&
+      couldPreventStoreLoadForward(Distance, TypeByteSize))
+    return Dependence::BackwardVectorizableButPreventsForwarding;
+
+  DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() <<
+        " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
+
+  return Dependence::BackwardVectorizable;
+}
+
+bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
+                                   MemAccessInfoSet &CheckDeps,
+                                   const ValueToValueMap &Strides) {
+
+  MaxSafeDepDistBytes = -1U;
+  while (!CheckDeps.empty()) {
+    MemAccessInfo CurAccess = *CheckDeps.begin();
+
+    // Get the relevant memory access set.
+    EquivalenceClasses<MemAccessInfo>::iterator I =
+      AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
+
+    // Check accesses within this set.
+    EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE;
+    AI = AccessSets.member_begin(I), AE = AccessSets.member_end();
+
+    // Check every access pair.
+    while (AI != AE) {
+      CheckDeps.erase(*AI);
+      EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
+      while (OI != AE) {
+        // Check every accessing instruction pair in program order.
+        for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
+             I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
+          for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
+               I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
+            auto A = std::make_pair(&*AI, *I1);
+            auto B = std::make_pair(&*OI, *I2);
+
+            assert(*I1 != *I2);
+            if (*I1 > *I2)
+              std::swap(A, B);
+
+            Dependence::DepType Type =
+                isDependent(*A.first, A.second, *B.first, B.second, Strides);
+            SafeForVectorization &= Dependence::isSafeForVectorization(Type);
+
+            // Gather dependences unless we accumulated MaxInterestingDependence
+            // dependences.  In that case return as soon as we find the first
+            // unsafe dependence.  This puts a limit on this quadratic
+            // algorithm.
+            if (RecordInterestingDependences) {
+              if (Dependence::isInterestingDependence(Type))
+                InterestingDependences.push_back(
+                    Dependence(A.second, B.second, Type));
+
+              if (InterestingDependences.size() >= MaxInterestingDependence) {
+                RecordInterestingDependences = false;
+                InterestingDependences.clear();
+                DEBUG(dbgs() << "Too many dependences, stopped recording\n");
+              }
+            }
+            if (!RecordInterestingDependences && !SafeForVectorization)
+              return false;
+          }
+        ++OI;
+      }
+      AI++;
+    }
+  }
+
+  DEBUG(dbgs() << "Total Interesting Dependences: "
+               << InterestingDependences.size() << "\n");
+  return SafeForVectorization;
+}
+
+SmallVector<Instruction *, 4>
+MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
+  MemAccessInfo Access(Ptr, isWrite);
+  auto &IndexVector = Accesses.find(Access)->second;
+
+  SmallVector<Instruction *, 4> Insts;
+  std::transform(IndexVector.begin(), IndexVector.end(),
+                 std::back_inserter(Insts),
+                 [&](unsigned Idx) { return this->InstMap[Idx]; });
+  return Insts;
+}
+
+const char *MemoryDepChecker::Dependence::DepName[] = {
+    "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
+    "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
+
+void MemoryDepChecker::Dependence::print(
+    raw_ostream &OS, unsigned Depth,
+    const SmallVectorImpl<Instruction *> &Instrs) const {
+  OS.indent(Depth) << DepName[Type] << ":\n";
+  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
+  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
+}
+
+bool LoopAccessInfo::canAnalyzeLoop() {
+  // We need to have a loop header.
+  DEBUG(dbgs() << "LAA: Found a loop: " <<
+        TheLoop->getHeader()->getName() << '\n');
+
+    // We can only analyze innermost loops.
+  if (!TheLoop->empty()) {
+    DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
+    emitAnalysis(LoopAccessReport() << "loop is not the innermost loop");
+    return false;
+  }
+
+  // We must have a single backedge.
+  if (TheLoop->getNumBackEdges() != 1) {
+    DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
+    emitAnalysis(
+        LoopAccessReport() <<
+        "loop control flow is not understood by analyzer");
+    return false;
+  }
+
+  // We must have a single exiting block.
+  if (!TheLoop->getExitingBlock()) {
+    DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
+    emitAnalysis(
+        LoopAccessReport() <<
+        "loop control flow is not understood by analyzer");
+    return false;
+  }
+
+  // We only handle bottom-tested loops, i.e. loop in which the condition is
+  // checked at the end of each iteration. With that we can assume that all
+  // instructions in the loop are executed the same number of times.
+  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
+    DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
+    emitAnalysis(
+        LoopAccessReport() <<
+        "loop control flow is not understood by analyzer");
+    return false;
+  }
+
+  // ScalarEvolution needs to be able to find the exit count.
+  const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
+  if (ExitCount == SE->getCouldNotCompute()) {
+    emitAnalysis(LoopAccessReport() <<
+                 "could not determine number of loop iterations");
+    DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
+    return false;
+  }
+
+  return true;
+}
+
+void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
+
+  typedef SmallVector<Value*, 16> ValueVector;
+  typedef SmallPtrSet<Value*, 16> ValueSet;
+
+  // Holds the Load and Store *instructions*.
+  ValueVector Loads;
+  ValueVector Stores;
+
+  // Holds all the different accesses in the loop.
+  unsigned NumReads = 0;
+  unsigned NumReadWrites = 0;
+
+  PtrRtCheck.Pointers.clear();
+  PtrRtCheck.Need = false;
+
+  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
+
+  // For each block.
+  for (Loop::block_iterator bb = TheLoop->block_begin(),
+       be = TheLoop->block_end(); bb != be; ++bb) {
+
+    // Scan the BB and collect legal loads and stores.
+    for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+         ++it) {
+
+      // If this is a load, save it. If this instruction can read from memory
+      // but is not a load, then we quit. Notice that we don't handle function
+      // calls that read or write.
+      if (it->mayReadFromMemory()) {
+        // Many math library functions read the rounding mode. We will only
+        // vectorize a loop if it contains known function calls that don't set
+        // the flag. Therefore, it is safe to ignore this read from memory.
+        CallInst *Call = dyn_cast<CallInst>(it);
+        if (Call && getIntrinsicIDForCall(Call, TLI))
+          continue;
+
+        // If the function has an explicit vectorized counterpart, we can safely
+        // assume that it can be vectorized.
+        if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
+            TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
+          continue;
+
+        LoadInst *Ld = dyn_cast<LoadInst>(it);
+        if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
+          emitAnalysis(LoopAccessReport(Ld)
+                       << "read with atomic ordering or volatile read");
+          DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
+          CanVecMem = false;
+          return;
+        }
+        NumLoads++;
+        Loads.push_back(Ld);
+        DepChecker.addAccess(Ld);
+        continue;
+      }
+
+      // Save 'store' instructions. Abort if other instructions write to memory.
+      if (it->mayWriteToMemory()) {
+        StoreInst *St = dyn_cast<StoreInst>(it);
+        if (!St) {
+          emitAnalysis(LoopAccessReport(it) <<
+                       "instruction cannot be vectorized");
+          CanVecMem = false;
+          return;
+        }
+        if (!St->isSimple() && !IsAnnotatedParallel) {
+          emitAnalysis(LoopAccessReport(St)
+                       << "write with atomic ordering or volatile write");
+          DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
+          CanVecMem = false;
+          return;
+        }
+        NumStores++;
+        Stores.push_back(St);
+        DepChecker.addAccess(St);
+      }
+    } // Next instr.
+  } // Next block.
+
+  // Now we have two lists that hold the loads and the stores.
+  // Next, we find the pointers that they use.
+
+  // Check if we see any stores. If there are no stores, then we don't
+  // care if the pointers are *restrict*.
+  if (!Stores.size()) {
+    DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
+    CanVecMem = true;
+    return;
+  }
+
+  MemoryDepChecker::DepCandidates DependentAccesses;
+  AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
+                          AA, LI, DependentAccesses);
+
+  // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
+  // multiple times on the same object. If the ptr is accessed twice, once
+  // for read and once for write, it will only appear once (on the write
+  // list). This is okay, since we are going to check for conflicts between
+  // writes and between reads and writes, but not between reads and reads.
+  ValueSet Seen;
+
+  ValueVector::iterator I, IE;
+  for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
+    StoreInst *ST = cast<StoreInst>(*I);
+    Value* Ptr = ST->getPointerOperand();
+    // Check for store to loop invariant address.
+    StoreToLoopInvariantAddress |= isUniform(Ptr);
+    // If we did *not* see this pointer before, insert it to  the read-write
+    // list. At this phase it is only a 'write' list.
+    if (Seen.insert(Ptr).second) {
+      ++NumReadWrites;
+
+      AliasAnalysis::Location Loc = AA->getLocation(ST);
+      // The TBAA metadata could have a control dependency on the predication
+      // condition, so we cannot rely on it when determining whether or not we
+      // need runtime pointer checks.
+      if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
+        Loc.AATags.TBAA = nullptr;
+
+      Accesses.addStore(Loc);
+    }
+  }
+
+  if (IsAnnotatedParallel) {
+    DEBUG(dbgs()
+          << "LAA: A loop annotated parallel, ignore memory dependency "
+          << "checks.\n");
+    CanVecMem = true;
+    return;
+  }
+
+  for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
+    LoadInst *LD = cast<LoadInst>(*I);
+    Value* Ptr = LD->getPointerOperand();
+    // If we did *not* see this pointer before, insert it to the
+    // read list. If we *did* see it before, then it is already in
+    // the read-write list. This allows us to vectorize expressions
+    // such as A[i] += x;  Because the address of A[i] is a read-write
+    // pointer. This only works if the index of A[i] is consecutive.
+    // If the address of i is unknown (for example A[B[i]]) then we may
+    // read a few words, modify, and write a few words, and some of the
+    // words may be written to the same address.
+    bool IsReadOnlyPtr = false;
+    if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) {
+      ++NumReads;
+      IsReadOnlyPtr = true;
+    }
+
+    AliasAnalysis::Location Loc = AA->getLocation(LD);
+    // The TBAA metadata could have a control dependency on the predication
+    // condition, so we cannot rely on it when determining whether or not we
+    // need runtime pointer checks.
+    if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
+      Loc.AATags.TBAA = nullptr;
+
+    Accesses.addLoad(Loc, IsReadOnlyPtr);
+  }
+
+  // If we write (or read-write) to a single destination and there are no
+  // other reads in this loop then is it safe to vectorize.
+  if (NumReadWrites == 1 && NumReads == 0) {
+    DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
+    CanVecMem = true;
+    return;
+  }
+
+  // Build dependence sets and check whether we need a runtime pointer bounds
+  // check.
+  Accesses.buildDependenceSets();
+  bool NeedRTCheck = Accesses.isRTCheckNeeded();
+
+  // Find pointers with computable bounds. We are going to use this information
+  // to place a runtime bound check.
+  bool CanDoRT = false;
+  if (NeedRTCheck)
+    CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
+                                       Strides);
+
+  DEBUG(dbgs() << "LAA: We need to do " << NumComparisons <<
+        " pointer comparisons.\n");
+
+  // If we only have one set of dependences to check pointers among we don't
+  // need a runtime check.
+  if (NumComparisons == 0 && NeedRTCheck)
+    NeedRTCheck = false;
+
+  // Check that we found the bounds for the pointer.
+  if (CanDoRT)
+    DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
+  else if (NeedRTCheck) {
+    emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
+    DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " <<
+          "the array bounds.\n");
+    PtrRtCheck.reset();
+    CanVecMem = false;
+    return;
+  }
+
+  PtrRtCheck.Need = NeedRTCheck;
+
+  CanVecMem = true;
+  if (Accesses.isDependencyCheckNeeded()) {
+    DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
+    CanVecMem = DepChecker.areDepsSafe(
+        DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
+    MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
+
+    if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
+      DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
+      NeedRTCheck = true;
+
+      // Clear the dependency checks. We assume they are not needed.
+      Accesses.resetDepChecks(DepChecker);
+
+      PtrRtCheck.reset();
+      PtrRtCheck.Need = true;
+
+      CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
+                                         TheLoop, Strides, true);
+      // Check that we found the bounds for the pointer.
+      if (!CanDoRT && NumComparisons > 0) {
+        emitAnalysis(LoopAccessReport()
+                     << "cannot check memory dependencies at runtime");
+        DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
+        PtrRtCheck.reset();
+        CanVecMem = false;
+        return;
+      }
+
+      CanVecMem = true;
+    }
+  }
+
+  if (CanVecMem)
+    DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop.  We"
+                 << (NeedRTCheck ? "" : " don't")
+                 << " need a runtime memory check.\n");
+  else {
+    emitAnalysis(LoopAccessReport() <<
+                 "unsafe dependent memory operations in loop");
+    DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
+  }
+}
+
+bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
+                                           DominatorTree *DT)  {
+  assert(TheLoop->contains(BB) && "Unknown block used");
+
+  // Blocks that do not dominate the latch need predication.
+  BasicBlock* Latch = TheLoop->getLoopLatch();
+  return !DT->dominates(BB, Latch);
+}
+
+void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {
+  assert(!Report && "Multiple reports generated");
+  Report = Message;
+}
+
+bool LoopAccessInfo::isUniform(Value *V) const {
+  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
+}
+
+// FIXME: this function is currently a duplicate of the one in
+// LoopVectorize.cpp.
+static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
+                                 Instruction *Loc) {
+  if (FirstInst)
+    return FirstInst;
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return I->getParent() == Loc->getParent() ? I : nullptr;
+  return nullptr;
+}
+
+std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
+    Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
+  if (!PtrRtCheck.Need)
+    return std::make_pair(nullptr, nullptr);
+
+  unsigned NumPointers = PtrRtCheck.Pointers.size();
+  SmallVector<TrackingVH<Value> , 2> Starts;
+  SmallVector<TrackingVH<Value> , 2> Ends;
+
+  LLVMContext &Ctx = Loc->getContext();
+  SCEVExpander Exp(*SE, DL, "induction");
+  Instruction *FirstInst = nullptr;
+
+  for (unsigned i = 0; i < NumPointers; ++i) {
+    Value *Ptr = PtrRtCheck.Pointers[i];
+    const SCEV *Sc = SE->getSCEV(Ptr);
+
+    if (SE->isLoopInvariant(Sc, TheLoop)) {
+      DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" <<
+            *Ptr <<"\n");
+      Starts.push_back(Ptr);
+      Ends.push_back(Ptr);
+    } else {
+      DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n');
+      unsigned AS = Ptr->getType()->getPointerAddressSpace();
+
+      // Use this type for pointer arithmetic.
+      Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+
+      Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc);
+      Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc);
+      Starts.push_back(Start);
+      Ends.push_back(End);
+    }
+  }
+
+  IRBuilder<> ChkBuilder(Loc);
+  // Our instructions might fold to a constant.
+  Value *MemoryRuntimeCheck = nullptr;
+  for (unsigned i = 0; i < NumPointers; ++i) {
+    for (unsigned j = i+1; j < NumPointers; ++j) {
+      if (!PtrRtCheck.needsChecking(i, j, PtrPartition))
+        continue;
+
+      unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
+      unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
+
+      assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
+             (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
+             "Trying to bounds check pointers with different address spaces");
+
+      Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+      Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+
+      Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
+      Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
+      Value *End0 =   ChkBuilder.CreateBitCast(Ends[i],   PtrArithTy1, "bc");
+      Value *End1 =   ChkBuilder.CreateBitCast(Ends[j],   PtrArithTy0, "bc");
+
+      Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+      FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
+      Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+      FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
+      Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+      FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+      if (MemoryRuntimeCheck) {
+        IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
+                                         "conflict.rdx");
+        FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+      }
+      MemoryRuntimeCheck = IsConflict;
+    }
+  }
+
+  if (!MemoryRuntimeCheck)
+    return std::make_pair(nullptr, nullptr);
+
+  // We have to do this trickery because the IRBuilder might fold the check to a
+  // constant expression in which case there is no Instruction anchored in a
+  // the block.
+  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
+                                                 ConstantInt::getTrue(Ctx));
+  ChkBuilder.Insert(Check, "memcheck.conflict");
+  FirstInst = getFirstInst(FirstInst, Check, Loc);
+  return std::make_pair(FirstInst, Check);
+}
+
+LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
+                               const DataLayout &DL,
+                               const TargetLibraryInfo *TLI, AliasAnalysis *AA,
+                               DominatorTree *DT, LoopInfo *LI,
+                               const ValueToValueMap &Strides)
+    : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
+      TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
+      MaxSafeDepDistBytes(-1U), CanVecMem(false),
+      StoreToLoopInvariantAddress(false) {
+  if (canAnalyzeLoop())
+    analyzeLoop(Strides);
+}
+
+void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
+  if (CanVecMem) {
+    if (PtrRtCheck.Need)
+      OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
+    else
+      OS.indent(Depth) << "Memory dependences are safe\n";
+  }
+
+  if (Report)
+    OS.indent(Depth) << "Report: " << Report->str() << "\n";
+
+  if (auto *InterestingDependences = DepChecker.getInterestingDependences()) {
+    OS.indent(Depth) << "Interesting Dependences:\n";
+    for (auto &Dep : *InterestingDependences) {
+      Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions());
+      OS << "\n";
+    }
+  } else
+    OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
+
+  // List the pair of accesses need run-time checks to prove independence.
+  PtrRtCheck.print(OS, Depth);
+  OS << "\n";
+
+  OS.indent(Depth) << "Store to invariant address was "
+                   << (StoreToLoopInvariantAddress ? "" : "not ")
+                   << "found in loop.\n";
+}
+
+const LoopAccessInfo &
+LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) {
+  auto &LAI = LoopAccessInfoMap[L];
+
+#ifndef NDEBUG
+  assert((!LAI || LAI->NumSymbolicStrides == Strides.size()) &&
+         "Symbolic strides changed for loop");
+#endif
+
+  if (!LAI) {
+    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+    LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
+                                            Strides);
+#ifndef NDEBUG
+    LAI->NumSymbolicStrides = Strides.size();
+#endif
+  }
+  return *LAI.get();
+}
+
+void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
+  LoopAccessAnalysis &LAA = *const_cast<LoopAccessAnalysis *>(this);
+
+  ValueToValueMap NoSymbolicStrides;
+
+  for (Loop *TopLevelLoop : *LI)
+    for (Loop *L : depth_first(TopLevelLoop)) {
+      OS.indent(2) << L->getHeader()->getName() << ":\n";
+      auto &LAI = LAA.getInfo(L, NoSymbolicStrides);
+      LAI.print(OS, 4);
+    }
+}
+
+bool LoopAccessAnalysis::runOnFunction(Function &F) {
+  SE = &getAnalysis<ScalarEvolution>();
+  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+  TLI = TLIP ? &TLIP->getTLI() : nullptr;
+  AA = &getAnalysis<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+  return false;
+}
+
+void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+
+    AU.setPreservesAll();
+}
+
+char LoopAccessAnalysis::ID = 0;
+static const char laa_name[] = "Loop Access Analysis";
+#define LAA_NAME "loop-accesses"
+
+INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
+
+namespace llvm {
+  Pass *createLAAPass() {
+    return new LoopAccessAnalysis();
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index b1f62c4..6b6faf8 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -26,8 +26,10 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -45,11 +47,6 @@ static cl::opt<bool,true>
 VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
                 cl::desc("Verify loop info (time consuming)"));
 
-char LoopInfo::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true)
-
 // Loop identifier metadata name.
 static const char *const LoopMDName = "llvm.loop";
 
@@ -59,20 +56,16 @@ static const char *const LoopMDName = "llvm.loop";
 
 /// isLoopInvariant - Return true if the specified value is loop invariant
 ///
-bool Loop::isLoopInvariant(Value *V) const {
-  if (Instruction *I = dyn_cast<Instruction>(V))
+bool Loop::isLoopInvariant(const Value *V) const {
+  if (const Instruction *I = dyn_cast<Instruction>(V))
     return !contains(I);
   return true;  // All non-instructions are loop invariant
 }
 
 /// hasLoopInvariantOperands - Return true if all the operands of the
 /// specified instruction are loop invariant.
-bool Loop::hasLoopInvariantOperands(Instruction *I) const {
-  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-    if (!isLoopInvariant(I->getOperand(i)))
-      return false;
-
-  return true;
+bool Loop::hasLoopInvariantOperands(const Instruction *I) const {
+  return all_of(I->operands(), [this](Value *V) { return isLoopInvariant(V); });
 }
 
 /// makeLoopInvariant - If the given value is an instruciton inside of the
@@ -609,15 +602,6 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
   return NearLoop;
 }
 
-//===----------------------------------------------------------------------===//
-// LoopInfo implementation
-//
-bool LoopInfo::runOnFunction(Function &) {
-  releaseMemory();
-  LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
-  return false;
-}
-
 /// updateUnloop - The last backedge has been removed from a loop--now the
 /// "unloop". Find a new parent for the blocks contained within unloop and
 /// update the loop tree. We don't necessarily have valid dominators at this
@@ -631,7 +615,8 @@ void LoopInfo::updateUnloop(Loop *Unloop) {
   if (!Unloop->getParentLoop()) {
     // Since BBLoop had no parent, Unloop blocks are no longer in a loop.
     for (Loop::block_iterator I = Unloop->block_begin(),
-         E = Unloop->block_end(); I != E; ++I) {
+                              E = Unloop->block_end();
+         I != E; ++I) {
 
       // Don't reparent blocks in subloops.
       if (getLoopFor(*I) != Unloop)
@@ -639,21 +624,21 @@ void LoopInfo::updateUnloop(Loop *Unloop) {
 
       // Blocks no longer have a parent but are still referenced by Unloop until
       // the Unloop object is deleted.
-      LI.changeLoopFor(*I, nullptr);
+      changeLoopFor(*I, nullptr);
     }
 
     // Remove the loop from the top-level LoopInfo object.
-    for (LoopInfo::iterator I = LI.begin();; ++I) {
-      assert(I != LI.end() && "Couldn't find loop");
+    for (iterator I = begin();; ++I) {
+      assert(I != end() && "Couldn't find loop");
       if (*I == Unloop) {
-        LI.removeLoop(I);
+        removeLoop(I);
         break;
       }
     }
 
     // Move all of the subloops to the top-level.
     while (!Unloop->empty())
-      LI.addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end())));
+      addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end())));
 
     return;
   }
@@ -680,35 +665,59 @@ void LoopInfo::updateUnloop(Loop *Unloop) {
   }
 }
 
-void LoopInfo::verifyAnalysis() const {
-  // LoopInfo is a FunctionPass, but verifying every loop in the function
-  // each time verifyAnalysis is called is very expensive. The
-  // -verify-loop-info option can enable this. In order to perform some
-  // checking by default, LoopPass has been taught to call verifyLoop
-  // manually during loop pass sequences.
+char LoopAnalysis::PassID;
+
+LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> *AM) {
+  // FIXME: Currently we create a LoopInfo from scratch for every function.
+  // This may prove to be too wasteful due to deallocating and re-allocating
+  // memory each time for the underlying map and vector datastructures. At some
+  // point it may prove worthwhile to use a freelist and recycle LoopInfo
+  // objects. I don't want to add that kind of complexity until the scope of
+  // the problem is better understood.
+  LoopInfo LI;
+  LI.Analyze(AM->getResult<DominatorTreeAnalysis>(F));
+  return LI;
+}
 
-  if (!VerifyLoopInfo) return;
+PreservedAnalyses LoopPrinterPass::run(Function &F,
+                                       AnalysisManager<Function> *AM) {
+  AM->getResult<LoopAnalysis>(F).print(OS);
+  return PreservedAnalyses::all();
+}
 
-  DenseSet<const Loop*> Loops;
-  for (iterator I = begin(), E = end(); I != E; ++I) {
-    assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
-    (*I)->verifyLoopNest(&Loops);
-  }
+//===----------------------------------------------------------------------===//
+// LoopInfo implementation
+//
 
-  // Verify that blocks are mapped to valid loops.
-  for (DenseMap<BasicBlock*, Loop*>::const_iterator I = LI.BBMap.begin(),
-         E = LI.BBMap.end(); I != E; ++I) {
-    assert(Loops.count(I->second) && "orphaned loop");
-    assert(I->second->contains(I->first) && "orphaned block");
-  }
+char LoopInfoWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInfoWrapperPass, "loops", "Natural Loop Information",
+                      true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(LoopInfoWrapperPass, "loops", "Natural Loop Information",
+                    true, true)
+
+bool LoopInfoWrapperPass::runOnFunction(Function &) {
+  releaseMemory();
+  LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
+  return false;
+}
+
+void LoopInfoWrapperPass::verifyAnalysis() const {
+  // LoopInfoWrapperPass is a FunctionPass, but verifying every loop in the
+  // function each time verifyAnalysis is called is very expensive. The
+  // -verify-loop-info option can enable this. In order to perform some
+  // checking by default, LoopPass has been taught to call verifyLoop manually
+  // during loop pass sequences.
+  if (VerifyLoopInfo)
+    LI.verify();
 }
 
-void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<DominatorTreeWrapperPass>();
 }
 
-void LoopInfo::print(raw_ostream &OS, const Module*) const {
+void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
   LI.print(OS);
 }
 
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index 190abc7..e9fcf02 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -18,6 +18,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "loop-pass-manager"
@@ -187,14 +188,15 @@ static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
 void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
   // LPPassManager needs LoopInfo. In the long term LoopInfo class will
   // become part of LPPassManager.
-  Info.addRequired<LoopInfo>();
+  Info.addRequired<LoopInfoWrapperPass>();
   Info.setPreservesAll();
 }
 
 /// run - Execute all of the passes scheduled for execution.  Keep track of
 /// whether any of the passes modifies the function, and if so, return true.
 bool LPPassManager::runOnFunction(Function &F) {
-  LI = &getAnalysis<LoopInfo>();
+  auto &LIWP = getAnalysis<LoopInfoWrapperPass>();
+  LI = &LIWP.getLoopInfo();
   bool Changed = false;
 
   // Collect inherited analysis from Module level pass manager.
@@ -262,7 +264,7 @@ bool LPPassManager::runOnFunction(Function &F) {
         // loop in the function every time. That level of checking can be
         // enabled with the -verify-loop-info option.
         {
-          TimeRegion PassTimer(getPassTimer(LI));
+          TimeRegion PassTimer(getPassTimer(&LIWP));
           CurrentLoop->verifyLoop();
         }
 
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
index ffc9fe6..da3b829 100644
--- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -96,8 +96,8 @@ bool MemDepPrinter::runOnFunction(Function &F) {
 
   // All this code uses non-const interfaces because MemDep is not
   // const-friendly, though nothing is actually modified.
-  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
-    Instruction *Inst = &*I;
+  for (auto &I : inst_range(F)) {
+    Instruction *Inst = &I;
 
     if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
       continue;
@@ -106,7 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
     if (!Res.isNonLocal()) {
       Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
                                        static_cast<BasicBlock *>(nullptr)));
-    } else if (CallSite CS = cast<Value>(Inst)) {
+    } else if (auto CS = CallSite(Inst)) {
       const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
         MDA.getNonLocalCallDependency(CS);
 
@@ -135,8 +135,8 @@ bool MemDepPrinter::runOnFunction(Function &F) {
 }
 
 void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
-  for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) {
-    const Instruction *Inst = &*I;
+  for (const auto &I : inst_range(*F)) {
+    const Instruction *Inst = &I;
 
     DepSetMap::const_iterator DI = Deps.find(Inst);
     if (DI == Deps.end())
@@ -144,11 +144,10 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
 
     const DepSet &InstDeps = DI->second;
 
-    for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end();
-         I != E; ++I) {
-      const Instruction *DepInst = I->first.getPointer();
-      DepType type = I->first.getInt();
-      const BasicBlock *DepBB = I->second;
+    for (const auto &I : InstDeps) {
+      const Instruction *DepInst = I.first.getPointer();
+      DepType type = I.first.getInt();
+      const BasicBlock *DepBB = I.second;
 
       OS << "    ";
       OS << DepTypeStr[type];
diff --git a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
new file mode 100644
index 0000000..fa292a2
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -0,0 +1,70 @@
+//===- MemDerefPrinter.cpp - Printer for isDereferenceablePointer ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct MemDerefPrinter : public FunctionPass {
+    SmallVector<Value *, 4> Vec;
+
+    static char ID; // Pass identification, replacement for typeid
+    MemDerefPrinter() : FunctionPass(ID) {
+      initializeMemDerefPrinterPass(*PassRegistry::getPassRegistry());
+    }
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+    bool runOnFunction(Function &F) override;
+    void print(raw_ostream &OS, const Module * = nullptr) const override;
+    void releaseMemory() override {
+      Vec.clear();
+    }
+  };
+}
+
+char MemDerefPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(MemDerefPrinter, "print-memderefs",
+                      "Memory Dereferenciblity of pointers in function", false, true)
+INITIALIZE_PASS_END(MemDerefPrinter, "print-memderefs",
+                    "Memory Dereferenciblity of pointers in function", false, true)
+
+FunctionPass *llvm::createMemDerefPrinter() {
+  return new MemDerefPrinter();
+}
+
+bool MemDerefPrinter::runOnFunction(Function &F) {
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  for (auto &I: inst_range(F)) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+      Value *PO = LI->getPointerOperand();
+      if (isDereferenceablePointer(PO, DL))
+        Vec.push_back(PO);
+    }
+  }
+  return false;
+}
+
+void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const {
+  OS << "The following are dereferenceable:\n";
+  for (auto &V: Vec) {
+    V->print(OS);
+    OS << "\n\n";
+  }
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
index 08b41fe..8ddac8f 100644
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -25,7 +26,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -206,7 +206,7 @@ const CallInst *llvm::extractMallocCall(const Value *I,
   return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr;
 }
 
-static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
+static Value *computeArraySize(const CallInst *CI, const DataLayout &DL,
                                const TargetLibraryInfo *TLI,
                                bool LookThroughSExt = false) {
   if (!CI)
@@ -214,12 +214,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
 
   // The size of the malloc's result type must be known to determine array size.
   Type *T = getMallocAllocatedType(CI, TLI);
-  if (!T || !T->isSized() || !DL)
+  if (!T || !T->isSized())
     return nullptr;
 
-  unsigned ElementSize = DL->getTypeAllocSize(T);
+  unsigned ElementSize = DL.getTypeAllocSize(T);
   if (StructType *ST = dyn_cast<StructType>(T))
-    ElementSize = DL->getStructLayout(ST)->getSizeInBytes();
+    ElementSize = DL.getStructLayout(ST)->getSizeInBytes();
 
   // If malloc call's arg can be determined to be a multiple of ElementSize,
   // return the multiple.  Otherwise, return NULL.
@@ -232,23 +232,6 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
   return nullptr;
 }
 
-/// isArrayMalloc - Returns the corresponding CallInst if the instruction
-/// is a call to malloc whose array size can be determined and the array size
-/// is not constant 1.  Otherwise, return NULL.
-const CallInst *llvm::isArrayMalloc(const Value *I,
-                                    const DataLayout *DL,
-                                    const TargetLibraryInfo *TLI) {
-  const CallInst *CI = extractMallocCall(I, TLI);
-  Value *ArraySize = computeArraySize(CI, DL, TLI);
-
-  if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize))
-    if (ConstSize->isOne())
-      return CI;
-
-  // CI is a non-array malloc or we can't figure out that it is an array malloc.
-  return nullptr;
-}
-
 /// getMallocType - Returns the PointerType resulting from the malloc call.
 /// The PointerType depends on the number of bitcast uses of the malloc call:
 ///   0: PointerType is the calls' return type.
@@ -297,7 +280,7 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI,
 /// then return that multiple.  For non-array mallocs, the multiple is
 /// constant 1.  Otherwise, return NULL for mallocs whose array size cannot be
 /// determined.
-Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL,
+Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout &DL,
                                 const TargetLibraryInfo *TLI,
                                 bool LookThroughSExt) {
   assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call");
@@ -319,7 +302,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
   if (!CI || isa<IntrinsicInst>(CI))
     return nullptr;
   Function *Callee = CI->getCalledFunction();
-  if (Callee == nullptr || !Callee->isDeclaration())
+  if (Callee == nullptr)
     return nullptr;
 
   StringRef FnName = Callee->getName();
@@ -367,11 +350,8 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
 /// object size in Size if successful, and false otherwise.
 /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
 /// byval arguments, and global variables.
-bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *DL,
+bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
                          const TargetLibraryInfo *TLI, bool RoundToAlign) {
-  if (!DL)
-    return false;
-
   ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), RoundToAlign);
   SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
   if (!Visitor.bothKnown(Data))
@@ -399,17 +379,17 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
   return Size;
 }
 
-ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL,
+ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL,
                                                  const TargetLibraryInfo *TLI,
                                                  LLVMContext &Context,
                                                  bool RoundToAlign)
-: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
+    : DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
   // Pointer size must be rechecked for each object visited since it could have
   // a different address space.
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
-  IntTyBits = DL->getPointerTypeSizeInBits(V->getType());
+  IntTyBits = DL.getPointerTypeSizeInBits(V->getType());
   Zero = APInt::getNullValue(IntTyBits);
 
   V = V->stripPointerCasts();
@@ -449,7 +429,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
   if (!I.getAllocatedType()->isSized())
     return unknown();
 
-  APInt Size(IntTyBits, DL->getTypeAllocSize(I.getAllocatedType()));
+  APInt Size(IntTyBits, DL.getTypeAllocSize(I.getAllocatedType()));
   if (!I.isArrayAllocation())
     return std::make_pair(align(Size, I.getAlignment()), Zero);
 
@@ -468,7 +448,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
     return unknown();
   }
   PointerType *PT = cast<PointerType>(A.getType());
-  APInt Size(IntTyBits, DL->getTypeAllocSize(PT->getElementType()));
+  APInt Size(IntTyBits, DL.getTypeAllocSize(PT->getElementType()));
   return std::make_pair(align(Size, A.getParamAlignment()), Zero);
 }
 
@@ -541,7 +521,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
 SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
   SizeOffsetType PtrData = compute(GEP.getPointerOperand());
   APInt Offset(IntTyBits, 0);
-  if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*DL, Offset))
+  if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(DL, Offset))
     return unknown();
 
   return std::make_pair(PtrData.first, PtrData.second + Offset);
@@ -557,7 +537,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
   if (!GV.hasDefinitiveInitializer())
     return unknown();
 
-  APInt Size(IntTyBits, DL->getTypeAllocSize(GV.getType()->getElementType()));
+  APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getType()->getElementType()));
   return std::make_pair(align(Size, GV.getAlignment()), Zero);
 }
 
@@ -593,19 +573,18 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
   return unknown();
 }
 
-ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL,
-                                                     const TargetLibraryInfo *TLI,
-                                                     LLVMContext &Context,
-                                                     bool RoundToAlign)
-: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
-  RoundToAlign(RoundToAlign) {
+ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(
+    const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context,
+    bool RoundToAlign)
+    : DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
+      RoundToAlign(RoundToAlign) {
   // IntTy and Zero must be set for each compute() since the address space may
   // be different for later objects.
 }
 
 SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
   // XXX - Are vectors of pointers possible here?
-  IntTy = cast<IntegerType>(DL->getIntPtrType(V->getType()));
+  IntTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
   Zero = ConstantInt::get(IntTy, 0);
 
   SizeOffsetEvalType Result = compute_(V);
@@ -687,7 +666,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
   assert(I.isArrayAllocation());
   Value *ArraySize = I.getArraySize();
   Value *Size = ConstantInt::get(ArraySize->getType(),
-                                 DL->getTypeAllocSize(I.getAllocatedType()));
+                                 DL.getTypeAllocSize(I.getAllocatedType()));
   Size = Builder.CreateMul(Size, ArraySize);
   return std::make_pair(Size, Zero);
 }
@@ -739,7 +718,7 @@ ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) {
   if (!bothKnown(PtrData))
     return unknown();
 
-  Value *Offset = EmitGEPOffset(&Builder, *DL, &GEP, /*NoAssumptions=*/true);
+  Value *Offset = EmitGEPOffset(&Builder, DL, &GEP, /*NoAssumptions=*/true);
   Offset = Builder.CreateAdd(PtrData.second, Offset);
   return std::make_pair(PtrData.first, Offset);
 }
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index c505aa4..3c1826a 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -65,7 +65,7 @@ INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
                       "Memory Dependence Analysis", false, true)
 
 MemoryDependenceAnalysis::MemoryDependenceAnalysis()
-    : FunctionPass(ID), PredCache() {
+    : FunctionPass(ID) {
   initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
 }
 MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
@@ -79,11 +79,9 @@ void MemoryDependenceAnalysis::releaseMemory() {
   ReverseLocalDeps.clear();
   ReverseNonLocalDeps.clear();
   ReverseNonLocalPtrDeps.clear();
-  PredCache->clear();
+  PredCache.clear();
 }
 
-
-
 /// getAnalysisUsage - Does not modify anything.  It uses Alias Analysis.
 ///
 void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -95,13 +93,9 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 bool MemoryDependenceAnalysis::runOnFunction(Function &F) {
   AA = &getAnalysis<AliasAnalysis>();
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
   DominatorTreeWrapperPass *DTWP =
       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
   DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  if (!PredCache)
-    PredCache.reset(new PredIteratorCache());
   return false;
 }
 
@@ -227,7 +221,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
       continue;
     }
 
-    if (CallSite InstCS = cast<Value>(Inst)) {
+    if (auto InstCS = CallSite(Inst)) {
       // Debug intrinsics don't cause dependences.
       if (isa<DbgInfoIntrinsic>(Inst)) continue;
       // If these two calls do not interfere, look past it.
@@ -265,22 +259,17 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
 ///
 /// MemLocBase, MemLocOffset are lazily computed here the first time the
 /// base/offs of memloc is needed.
-static bool
-isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
-                                       const Value *&MemLocBase,
-                                       int64_t &MemLocOffs,
-                                       const LoadInst *LI,
-                                       const DataLayout *DL) {
-  // If we have no target data, we can't do this.
-  if (!DL) return false;
+static bool isLoadLoadClobberIfExtendedToFullWidth(
+    const AliasAnalysis::Location &MemLoc, const Value *&MemLocBase,
+    int64_t &MemLocOffs, const LoadInst *LI) {
+  const DataLayout &DL = LI->getModule()->getDataLayout();
 
   // If we haven't already computed the base/offset of MemLoc, do so now.
   if (!MemLocBase)
     MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
 
-  unsigned Size = MemoryDependenceAnalysis::
-    getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
-                                    LI, *DL);
+  unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
+      MemLocBase, MemLocOffs, MemLoc.Size, LI);
   return Size != 0;
 }
 
@@ -291,23 +280,23 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
 /// 2) safe for the target, and 3) would provide the specified memory
 /// location value, then this function returns the size in bytes of the
 /// load width to use.  If not, this returns zero.
-unsigned MemoryDependenceAnalysis::
-getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
-                                unsigned MemLocSize, const LoadInst *LI,
-                                const DataLayout &DL) {
+unsigned MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
+    const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
+    const LoadInst *LI) {
   // We can only extend simple integer loads.
   if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
 
   // Load widening is hostile to ThreadSanitizer: it may cause false positives
   // or make the reports more cryptic (access sizes are wrong).
-  if (LI->getParent()->getParent()->getAttributes().
-      hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread))
+  if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
     return 0;
 
+  const DataLayout &DL = LI->getModule()->getDataLayout();
+
   // Get the base of this load.
   int64_t LIOffs = 0;
   const Value *LIBase =
-    GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &DL);
+      GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);
 
   // If the two pointers are not based on the same pointer, we can't tell that
   // they are related.
@@ -346,9 +335,9 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
         !DL.fitsInLegalInteger(NewLoadByteSize*8))
       return 0;
 
-    if (LIOffs+NewLoadByteSize > MemLocEnd &&
-        LI->getParent()->getParent()->getAttributes().
-          hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress))
+    if (LIOffs + NewLoadByteSize > MemLocEnd &&
+        LI->getParent()->getParent()->hasFnAttribute(
+            Attribute::SanitizeAddress))
       // We will be reading past the location accessed by the original program.
       // While this is safe in a regular build, Address Safety analysis tools
       // may start reporting false warnings. So, don't do widening.
@@ -362,6 +351,17 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
   }
 }
 
+static bool isVolatile(Instruction *Inst) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+    return LI->isVolatile();
+  else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+    return SI->isVolatile();
+  else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
+    return AI->isVolatile();
+  return false;
+}
+
+
 /// getPointerDependencyFrom - Return the instruction on which a memory
 /// location depends.  If isLoad is true, this routine ignores may-aliases with
 /// read-only operations.  If isLoad is false, this routine ignores may-aliases
@@ -405,14 +405,19 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
   // by every program that can detect any optimisation of that kind: either
   // it is racy (undefined) or there is a release followed by an acquire
   // between the pair of accesses under consideration.
-  bool HasSeenAcquire = false;
 
+  // If the load is invariant, we "know" that it doesn't alias *any* write. We
+  // do want to respect mustalias results since defs are useful for value
+  // forwarding, but any mayalias write can be assumed to be noalias.
+  // Arguably, this logic should be pushed inside AliasAnalysis itself.
   if (isLoad && QueryInst) {
     LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
     if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
       isInvariantLoad = true;
   }
 
+  const DataLayout &DL = BB->getModule()->getDataLayout();
+
   // Walk backwards through the basic block, looking for dependencies.
   while (ScanIt != BB->begin()) {
     Instruction *Inst = --ScanIt;
@@ -448,14 +453,28 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
     // does not alias with when this atomic load indicates that another thread may
     // be accessing the location.
     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+
+      // While volatile access cannot be eliminated, they do not have to clobber
+      // non-aliasing locations, as normal accesses, for example, can be safely
+      // reordered with volatile accesses.
+      if (LI->isVolatile()) {
+        if (!QueryInst)
+          // Original QueryInst *may* be volatile
+          return MemDepResult::getClobber(LI);
+        if (isVolatile(QueryInst))
+          // Ordering required if QueryInst is itself volatile
+          return MemDepResult::getClobber(LI);
+        // Otherwise, volatile doesn't imply any special ordering
+      }
+      
       // Atomic loads have complications involved.
       // A Monotonic (or higher) load is OK if the query inst is itself not atomic.
-      // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any
-      //   release store will know to return getClobber.
       // FIXME: This is overly conservative.
-      if (!LI->isUnordered()) {
+      if (LI->isAtomic() && LI->getOrdering() > Unordered) {
         if (!QueryInst)
           return MemDepResult::getClobber(LI);
+        if (LI->getOrdering() != Monotonic)
+          return MemDepResult::getClobber(LI);
         if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
           if (!QueryLI->isSimple())
             return MemDepResult::getClobber(LI);
@@ -465,18 +484,8 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
         } else if (QueryInst->mayReadOrWriteMemory()) {
           return MemDepResult::getClobber(LI);
         }
-
-        if (isAtLeastAcquire(LI->getOrdering()))
-          HasSeenAcquire = true;
       }
 
-      // FIXME: this is overly conservative.
-      // While volatile access cannot be eliminated, they do not have to clobber
-      // non-aliasing locations, as normal accesses can for example be reordered
-      // with volatile accesses.
-      if (LI->isVolatile())
-        return MemDepResult::getClobber(LI);
-
       AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
 
       // If we found a pointer, check if it could be the same as our pointer.
@@ -490,12 +499,12 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
           // location is 1 byte at P+1).  If so, return it as a load/load
           // clobber result, allowing the client to decide to widen the load if
           // it wants to.
-          if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
-            if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
+          if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
+            if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() &&
                 isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
-                                                       MemLocOffset, LI, DL))
+                                                       MemLocOffset, LI))
               return MemDepResult::getClobber(Inst);
-
+          }
           continue;
         }
 
@@ -534,12 +543,12 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       // Atomic stores have complications involved.
       // A Monotonic store is OK if the query inst is itself not atomic.
-      // A Release (or higher) store further requires that no acquire load
-      //   has been seen.
       // FIXME: This is overly conservative.
       if (!SI->isUnordered()) {
         if (!QueryInst)
           return MemDepResult::getClobber(SI);
+        if (SI->getOrdering() != Monotonic)
+          return MemDepResult::getClobber(SI);
         if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
           if (!QueryLI->isSimple())
             return MemDepResult::getClobber(SI);
@@ -549,9 +558,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
         } else if (QueryInst->mayReadOrWriteMemory()) {
           return MemDepResult::getClobber(SI);
         }
-
-        if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering()))
-          return MemDepResult::getClobber(SI);
       }
 
       // FIXME: this is overly conservative.
@@ -597,6 +603,8 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
 
       if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
         return MemDepResult::getDef(Inst);
+      if (isInvariantLoad)
+        continue;
       // Be conservative if the accessed pointer may alias the allocation.
       if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias)
         return MemDepResult::getClobber(Inst);
@@ -607,6 +615,9 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
         continue;
     }
 
+    if (isInvariantLoad)
+       continue;
+
     // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
     AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
     // If necessary, perform additional analysis.
@@ -757,8 +768,8 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
   } else {
     // Seed DirtyBlocks with each of the preds of QueryInst's block.
     BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
-    for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI)
-      DirtyBlocks.push_back(*PI);
+    for (BasicBlock *Pred : PredCache.get(QueryBB))
+      DirtyBlocks.push_back(Pred);
     ++NumUncacheNonLocal;
   }
 
@@ -843,8 +854,8 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
 
       // If the block *is* completely transparent to the load, we need to check
       // the predecessors of this block.  Add them to our worklist.
-      for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
-        DirtyBlocks.push_back(*PI);
+      for (BasicBlock *Pred : PredCache.get(DirtyBB))
+        DirtyBlocks.push_back(Pred);
     }
   }
 
@@ -861,23 +872,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
 void MemoryDependenceAnalysis::
 getNonLocalPointerDependency(Instruction *QueryInst,
                              SmallVectorImpl<NonLocalDepResult> &Result) {
-
-  auto getLocation = [](AliasAnalysis *AA, Instruction *Inst) {
-    if (auto *I = dyn_cast<LoadInst>(Inst))
-      return AA->getLocation(I);
-    else if (auto *I = dyn_cast<StoreInst>(Inst))
-      return AA->getLocation(I);
-    else if (auto *I = dyn_cast<VAArgInst>(Inst))
-      return AA->getLocation(I);
-    else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst))
-      return AA->getLocation(I);
-    else if (auto *I = dyn_cast<AtomicRMWInst>(Inst))
-      return AA->getLocation(I);
-    else
-      llvm_unreachable("unsupported memory instruction");
-  };
-   
-  const AliasAnalysis::Location Loc = getLocation(AA, QueryInst);
+  const AliasAnalysis::Location Loc = AA->getLocation(QueryInst);
   bool isLoad = isa<LoadInst>(QueryInst);
   BasicBlock *FromBB = QueryInst->getParent();
   assert(FromBB);
@@ -890,14 +885,7 @@ getNonLocalPointerDependency(Instruction *QueryInst,
   // Doing so would require piping through the QueryInst all the way through.
   // TODO: volatiles can't be elided, but they can be reordered with other
   // non-volatile accesses.
-  auto isVolatile = [](Instruction *Inst) {
-    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-      return LI->isVolatile();
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-      return SI->isVolatile();
-    }
-    return false;
-  };
+
   // We currently give up on any instruction which is ordered, but we do handle
   // atomic instructions which are unordered.
   // TODO: Handle ordered instructions
@@ -915,8 +903,7 @@ getNonLocalPointerDependency(Instruction *QueryInst,
                                        const_cast<Value *>(Loc.Ptr)));
     return;
   }
-
-
+  const DataLayout &DL = FromBB->getModule()->getDataLayout();
   PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AC);
 
   // This is the set of blocks we've inspected, and the pointer we consider in
@@ -924,7 +911,7 @@ getNonLocalPointerDependency(Instruction *QueryInst,
   // a block with multiple different pointers.  This can happen during PHI
   // translation.
   DenseMap<BasicBlock*, Value*> Visited;
-  if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB,
+  if (!getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
                                    Result, Visited, true))
     return;
   Result.clear();
@@ -938,7 +925,8 @@ getNonLocalPointerDependency(Instruction *QueryInst,
 /// lookup (which may use dirty cache info if available).  If we do a lookup,
 /// add the result to the cache.
 MemDepResult MemoryDependenceAnalysis::
-GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
+GetNonLocalInfoForBlock(Instruction *QueryInst,
+                        const AliasAnalysis::Location &Loc,
                         bool isLoad, BasicBlock *BB,
                         NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
 
@@ -979,7 +967,8 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
   }
 
   // Scan the block for the dependency.
-  MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
+  MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
+                                              QueryInst);
 
   // If we had a dirty entry for the block, update it.  Otherwise, just add
   // a new entry.
@@ -1052,7 +1041,8 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
 /// not compute dependence information for some reason.  This should be treated
 /// as a clobber dependence on the first instruction in the predecessor block.
 bool MemoryDependenceAnalysis::
-getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+getNonLocalPointerDepFromBB(Instruction *QueryInst,
+                            const PHITransAddr &Pointer,
                             const AliasAnalysis::Location &Loc,
                             bool isLoad, BasicBlock *StartBB,
                             SmallVectorImpl<NonLocalDepResult> &Result,
@@ -1091,7 +1081,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     } else if (CacheInfo->Size > Loc.Size) {
       // This query's Size is less than the cached one. Conservatively restart
       // the query using the greater size.
-      return getNonLocalPointerDepFromBB(Pointer,
+      return getNonLocalPointerDepFromBB(QueryInst, Pointer,
                                          Loc.getWithNewSize(CacheInfo->Size),
                                          isLoad, StartBB, Result, Visited,
                                          SkipFirstBlock);
@@ -1111,7 +1101,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
         CacheInfo->NonLocalDeps.clear();
       }
       if (Loc.AATags)
-        return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutAATags(),
+        return getNonLocalPointerDepFromBB(QueryInst,
+                                           Pointer, Loc.getWithoutAATags(),
                                            isLoad, StartBB, Result, Visited,
                                            SkipFirstBlock);
     }
@@ -1214,7 +1205,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
       // Get the dependency info for Pointer in BB.  If we have cached
       // information, we will use it, otherwise we compute it.
       DEBUG(AssertSorted(*Cache, NumSortedEntries));
-      MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
+      MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst,
+                                                 Loc, isLoad, BB, Cache,
                                                  NumSortedEntries);
 
       // If we got a Def or Clobber, add this to the list of results.
@@ -1238,13 +1230,13 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
       SkipFirstBlock = false;
       SmallVector<BasicBlock*, 16> NewBlocks;
-      for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+      for (BasicBlock *Pred : PredCache.get(BB)) {
         // Verify that we haven't looked at this block yet.
         std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
-          InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
+          InsertRes = Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
         if (InsertRes.second) {
           // First time we've looked at *PI.
-          NewBlocks.push_back(*PI);
+          NewBlocks.push_back(Pred);
           continue;
         }
 
@@ -1280,8 +1272,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     Cache = nullptr;
 
     PredList.clear();
-    for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
-      BasicBlock *Pred = *PI;
+    for (BasicBlock *Pred : PredCache.get(BB)) {
       PredList.push_back(std::make_pair(Pred, Pointer));
 
       // Get the PHI translated pointer in this predecessor.  This can fail if
@@ -1348,7 +1339,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
       // result conflicted with the Visited list; we have to conservatively
       // assume it is unknown, but this also does not block PRE of the load.
       if (!CanTranslate ||
-          getNonLocalPointerDepFromBB(PredPointer,
+          getNonLocalPointerDepFromBB(QueryInst, PredPointer,
                                       Loc.getWithNewPtr(PredPtrVal),
                                       isLoad, Pred,
                                       Result, Visited)) {
@@ -1471,7 +1462,7 @@ void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
 /// This needs to be done when the CFG changes, e.g., due to splitting
 /// critical edges.
 void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
-  PredCache->clear();
+  PredCache.clear();
 }
 
 /// removeInstruction - Remove an instruction from the dependence analysis,
diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index f645558..36c4714 100644
--- a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -55,28 +55,72 @@ bool ModuleDebugInfoPrinter::runOnModule(Module &M) {
   return false;
 }
 
+static void printFile(raw_ostream &O, StringRef Filename, StringRef Directory,
+                      unsigned Line = 0) {
+  if (Filename.empty())
+    return;
+
+  O << " from ";
+  if (!Directory.empty())
+    O << Directory << "/";
+  O << Filename;
+  if (Line)
+    O << ":" << Line;
+}
+
 void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
-  for (DICompileUnit CU : Finder.compile_units()) {
-    O << "Compile Unit: ";
-    CU.print(O);
+  // Printing the nodes directly isn't particularly helpful (since they
+  // reference other nodes that won't be printed, particularly for the
+  // filenames), so just print a few useful things.
+  for (DICompileUnit *CU : Finder.compile_units()) {
+    O << "Compile unit: ";
+    if (const char *Lang = dwarf::LanguageString(CU->getSourceLanguage()))
+      O << Lang;
+    else
+      O << "unknown-language(" << CU->getSourceLanguage() << ")";
+    printFile(O, CU->getFilename(), CU->getDirectory());
     O << '\n';
   }
 
-  for (DISubprogram S : Finder.subprograms()) {
-    O << "Subprogram: ";
-    S.print(O);
+  for (DISubprogram *S : Finder.subprograms()) {
+    O << "Subprogram: " << S->getName();
+    printFile(O, S->getFilename(), S->getDirectory(), S->getLine());
+    if (!S->getLinkageName().empty())
+      O << " ('" << S->getLinkageName() << "')";
     O << '\n';
   }
 
-  for (DIGlobalVariable GV : Finder.global_variables()) {
-    O << "GlobalVariable: ";
-    GV.print(O);
+  for (const DIGlobalVariable *GV : Finder.global_variables()) {
+    O << "Global variable: " << GV->getName();
+    printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine());
+    if (!GV->getLinkageName().empty())
+      O << " ('" << GV->getLinkageName() << "')";
     O << '\n';
   }
 
-  for (DIType T : Finder.types()) {
-    O << "Type: ";
-    T.print(O);
+  for (const DIType *T : Finder.types()) {
+    O << "Type:";
+    if (!T->getName().empty())
+      O << ' ' << T->getName();
+    printFile(O, T->getFilename(), T->getDirectory(), T->getLine());
+    if (auto *BT = dyn_cast<DIBasicType>(T)) {
+      O << " ";
+      if (const char *Encoding =
+              dwarf::AttributeEncodingString(BT->getEncoding()))
+        O << Encoding;
+      else
+        O << "unknown-encoding(" << BT->getEncoding() << ')';
+    } else {
+      O << ' ';
+      if (const char *Tag = dwarf::TagString(T->getTag()))
+        O << Tag;
+      else
+        O << "unknown-tag(" << T->getTag() << ")";
+    }
+    if (auto *CT = dyn_cast<DICompositeType>(T)) {
+      if (auto *S = CT->getRawIdentifier())
+        O << " (identifier: '" << S->getString() << "')";
+    }
     O << '\n';
   }
 }
diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
index c214d3c..203e1da 100644
--- a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
@@ -33,11 +34,11 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {}
 
-    void initializePass() override {
+    bool doInitialization(Module &M) override {
       // Note: NoAA does not call InitializeAliasAnalysis because it's
       // special and does not support chaining.
-      DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-      DL = DLP ? &DLP->getDataLayout() : nullptr;
+      DL = &M.getDataLayout();
+      return true;
     }
 
     AliasResult alias(const Location &LocA, const Location &LocB) override {
diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
index a534418..177684f 100644
--- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
@@ -404,10 +404,9 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
       GEPOps.push_back(OpVal);
     }
 
-    GetElementPtrInst *Result =
-      GetElementPtrInst::Create(GEPOps[0], makeArrayRef(GEPOps).slice(1),
-                                InVal->getName()+".phi.trans.insert",
-                                PredBB->getTerminator());
+    GetElementPtrInst *Result = GetElementPtrInst::Create(
+        GEP->getSourceElementType(), GEPOps[0], makeArrayRef(GEPOps).slice(1),
+        InVal->getName() + ".phi.trans.insert", PredBB->getTerminator());
     Result->setIsInBounds(GEP->isInBounds());
     NewInsts.push_back(Result);
     return Result;
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
index 6fa7b2e..5e1cdd4 100644
--- a/contrib/llvm/lib/Analysis/RegionPass.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "regionpassmgr"
@@ -83,9 +84,11 @@ bool RGPassManager::runOnFunction(Function &F) {
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       RegionPass *P = (RegionPass*)getContainedPass(Index);
 
-      dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
-                   CurrentRegion->getNameStr());
-      dumpRequiredSet(P);
+      if (isPassDebuggingExecutionsOrMore()) {
+        dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
+                     CurrentRegion->getNameStr());
+        dumpRequiredSet(P);
+      }
 
       initializeAnalysisImpl(P);
 
@@ -96,11 +99,13 @@ bool RGPassManager::runOnFunction(Function &F) {
         Changed |= P->runOnRegion(CurrentRegion, *this);
       }
 
-      if (Changed)
-        dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
-                     skipThisRegion ? "<deleted>" :
-                                    CurrentRegion->getNameStr());
-      dumpPreservedSet(P);
+      if (isPassDebuggingExecutionsOrMore()) {
+        if (Changed)
+          dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
+                       skipThisRegion ? "<deleted>" :
+                                      CurrentRegion->getNameStr());
+        dumpPreservedSet(P);
+      }
 
       if (!skipThisRegion) {
         // Manually check that this region is still healthy. This is done
@@ -120,8 +125,8 @@ bool RGPassManager::runOnFunction(Function &F) {
       removeNotPreservedAnalysis(P);
       recordAvailableAnalysis(P);
       removeDeadPasses(P,
-                       skipThisRegion ? "<deleted>" :
-                                      CurrentRegion->getNameStr(),
+                       (!isPassDebuggingExecutionsOrMore() || skipThisRegion) ?
+                       "<deleted>" :  CurrentRegion->getNameStr(),
                        ON_REGION_MSG);
 
       if (skipThisRegion)
@@ -194,7 +199,7 @@ public:
 
   bool runOnRegion(Region *R, RGPassManager &RGM) override {
     Out << Banner;
-    for (const auto &BB : R->blocks()) {
+    for (const auto *BB : R->blocks()) {
       if (BB)
         BB->print(Out);
       else
diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
index ad83113..d7f5109 100644
--- a/contrib/llvm/lib/Analysis/RegionPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
@@ -123,7 +123,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
 
     const RegionInfo &RI = *static_cast<const RegionInfo*>(R.getRegionInfo());
 
-    for (const auto &BB : R.blocks())
+    for (auto *BB : R.blocks())
       if (RI.getRegionFor(BB) == &R)
         O.indent(2 * (depth + 1)) << "Node"
           << static_cast<const void*>(RI.getTopLevelRegion()->getBBNode(BB))
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index ee42737..0e9f812 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -68,6 +68,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
@@ -87,7 +88,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -117,9 +117,9 @@ VerifySCEV("verify-scev",
 INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
                 "Scalar Evolution Analysis", false, true)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
                 "Scalar Evolution Analysis", false, true)
 char ScalarEvolution::ID = 0;
@@ -726,6 +726,13 @@ public:
       return;
     }
 
+    // A simple case when N/1. The quotient is N.
+    if (Denominator->isOne()) {
+      *Quotient = Numerator;
+      *Remainder = D.Zero;
+      return;
+    }
+
     // Split the Denominator when it is a product.
     if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
       const SCEV *Q, *R;
@@ -788,6 +795,14 @@ public:
     assert(Numerator->isAffine() && "Numerator should be affine");
     divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
     divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
+    // Bail out if the types do not match.
+    Type *Ty = Denominator->getType();
+    if (Ty != StartQ->getType() || Ty != StartR->getType() ||
+        Ty != StepQ->getType() || Ty != StepR->getType()) {
+      Quotient = Zero;
+      Remainder = Numerator;
+      return;
+    }
     Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
                                 Numerator->getNoWrapFlags());
     Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
@@ -1102,13 +1117,14 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
     return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
 
   // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
-  // eliminate all the truncates.
+  // eliminate all the truncates, or we replace other casts with truncates.
   if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
     SmallVector<const SCEV *, 4> Operands;
     bool hasTrunc = false;
     for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
       const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
-      hasTrunc = isa<SCEVTruncateExpr>(S);
+      if (!isa<SCEVCastExpr>(SA->getOperand(i)))
+        hasTrunc = isa<SCEVTruncateExpr>(S);
       Operands.push_back(S);
     }
     if (!hasTrunc)
@@ -1117,13 +1133,14 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
   }
 
   // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
-  // eliminate all the truncates.
+  // eliminate all the truncates, or we replace other casts with truncates.
   if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
     SmallVector<const SCEV *, 4> Operands;
     bool hasTrunc = false;
     for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
       const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
-      hasTrunc = isa<SCEVTruncateExpr>(S);
+      if (!isa<SCEVCastExpr>(SM->getOperand(i)))
+        hasTrunc = isa<SCEVTruncateExpr>(S);
       Operands.push_back(S);
     }
     if (!hasTrunc)
@@ -1148,6 +1165,262 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
   return S;
 }
 
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// signed overflow as long as the value of the recurrence within the
+// loop does not exceed this limit before incrementing.
+static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
+                                                 ICmpInst::Predicate *Pred,
+                                                 ScalarEvolution *SE) {
+  unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+  if (SE->isKnownPositive(Step)) {
+    *Pred = ICmpInst::ICMP_SLT;
+    return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
+                           SE->getSignedRange(Step).getSignedMax());
+  }
+  if (SE->isKnownNegative(Step)) {
+    *Pred = ICmpInst::ICMP_SGT;
+    return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
+                           SE->getSignedRange(Step).getSignedMin());
+  }
+  return nullptr;
+}
+
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// unsigned overflow as long as the value of the recurrence within the loop does
+// not exceed this limit before incrementing.
+static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
+                                                   ICmpInst::Predicate *Pred,
+                                                   ScalarEvolution *SE) {
+  unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+  *Pred = ICmpInst::ICMP_ULT;
+
+  return SE->getConstant(APInt::getMinValue(BitWidth) -
+                         SE->getUnsignedRange(Step).getUnsignedMax());
+}
+
+namespace {
+
+struct ExtendOpTraitsBase {
+  typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *);
+};
+
+// Used to make code generic over signed and unsigned overflow.
+template <typename ExtendOp> struct ExtendOpTraits {
+  // Members present:
+  //
+  // static const SCEV::NoWrapFlags WrapType;
+  //
+  // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
+  //
+  // static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+  //                                           ICmpInst::Predicate *Pred,
+  //                                           ScalarEvolution *SE);
+};
+
+template <>
+struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
+  static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;
+
+  static const GetExtendExprTy GetExtendExpr;
+
+  static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+                                             ICmpInst::Predicate *Pred,
+                                             ScalarEvolution *SE) {
+    return getSignedOverflowLimitForStep(Step, Pred, SE);
+  }
+};
+
+const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
+    SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
+
+template <>
+struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
+  static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;
+
+  static const GetExtendExprTy GetExtendExpr;
+
+  static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+                                             ICmpInst::Predicate *Pred,
+                                             ScalarEvolution *SE) {
+    return getUnsignedOverflowLimitForStep(Step, Pred, SE);
+  }
+};
+
+const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
+    SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
+}
+
+// The recurrence AR has been shown to have no signed/unsigned wrap or something
+// close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
+// easily prove NSW/NUW for its preincrement or postincrement sibling. This
+// allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
+// Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
+// expression "Step + sext/zext(PreIncAR)" is congruent with
+// "sext/zext(PostIncAR)"
+template <typename ExtendOpTy>
+static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
+                                        ScalarEvolution *SE) {
+  auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
+  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
+
+  const Loop *L = AR->getLoop();
+  const SCEV *Start = AR->getStart();
+  const SCEV *Step = AR->getStepRecurrence(*SE);
+
+  // Check for a simple looking step prior to loop entry.
+  const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
+  if (!SA)
+    return nullptr;
+
+  // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
+  // subtraction is expensive. For this purpose, perform a quick and dirty
+  // difference, by checking for Step in the operand list.
+  SmallVector<const SCEV *, 4> DiffOps;
+  for (const SCEV *Op : SA->operands())
+    if (Op != Step)
+      DiffOps.push_back(Op);
+
+  if (DiffOps.size() == SA->getNumOperands())
+    return nullptr;
+
+  // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
+  // `Step`:
+
+  // 1. NSW/NUW flags on the step increment.
+  const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
+  const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
+      SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
+
+  // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
+  // "S+X does not sign/unsign-overflow".
+  //
+
+  const SCEV *BECount = SE->getBackedgeTakenCount(L);
+  if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
+      !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount))
+    return PreStart;
+
+  // 2. Direct overflow check on the step operation's expression.
+  unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
+  Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
+  const SCEV *OperandExtendedStart =
+      SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy),
+                     (SE->*GetExtendExpr)(Step, WideTy));
+  if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) {
+    if (PreAR && AR->getNoWrapFlags(WrapType)) {
+      // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
+      // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
+      // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`.  Cache this fact.
+      const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
+    }
+    return PreStart;
+  }
+
+  // 3. Loop precondition.
+  ICmpInst::Predicate Pred;
+  const SCEV *OverflowLimit =
+      ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
+
+  if (OverflowLimit &&
+      SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
+    return PreStart;
+  }
+  return nullptr;
+}
+
+// Get the normalized zero or sign extended expression for this AddRec's Start.
+template <typename ExtendOpTy>
+static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
+                                        ScalarEvolution *SE) {
+  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
+
+  const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE);
+  if (!PreStart)
+    return (SE->*GetExtendExpr)(AR->getStart(), Ty);
+
+  return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty),
+                        (SE->*GetExtendExpr)(PreStart, Ty));
+}
+
+// Try to prove away overflow by looking at "nearby" add recurrences.  A
+// motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
+// does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
+//
+// Formally:
+//
+//     {S,+,X} == {S-T,+,X} + T
+//  => Ext({S,+,X}) == Ext({S-T,+,X} + T)
+//
+// If ({S-T,+,X} + T) does not overflow  ... (1)
+//
+//  RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
+//
+// If {S-T,+,X} does not overflow  ... (2)
+//
+//  RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
+//      == {Ext(S-T)+Ext(T),+,Ext(X)}
+//
+// If (S-T)+T does not overflow  ... (3)
+//
+//  RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
+//      == {Ext(S),+,Ext(X)} == LHS
+//
+// Thus, if (1), (2) and (3) are true for some T, then
+//   Ext({S,+,X}) == {Ext(S),+,Ext(X)}
+//
+// (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
+// does not overflow" restricted to the 0th iteration.  Therefore we only need
+// to check for (1) and (2).
+//
+// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
+// is `Delta` (defined below).
+//
+template <typename ExtendOpTy>
+bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
+                                                const SCEV *Step,
+                                                const Loop *L) {
+  auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
+
+  // We restrict `Start` to a constant to prevent SCEV from spending too much
+  // time here.  It is correct (but more expensive) to continue with a
+  // non-constant `Start` and do a general SCEV subtraction to compute
+  // `PreStart` below.
+  //
+  const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
+  if (!StartC)
+    return false;
+
+  APInt StartAI = StartC->getValue()->getValue();
+
+  for (unsigned Delta : {-2, -1, 1, 2}) {
+    const SCEV *PreStart = getConstant(StartAI - Delta);
+
+    // Give up if we don't already have the add recurrence we need because
+    // actually constructing an add recurrence is relatively expensive.
+    const SCEVAddRecExpr *PreAR = [&]() {
+      FoldingSetNodeID ID;
+      ID.AddInteger(scAddRecExpr);
+      ID.AddPointer(PreStart);
+      ID.AddPointer(Step);
+      ID.AddPointer(L);
+      void *IP = nullptr;
+      return static_cast<SCEVAddRecExpr *>(
+          this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+    }();
+
+    if (PreAR && PreAR->getNoWrapFlags(WrapType)) {  // proves (2)
+      const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
+      ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+      const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
+          DeltaS, &Pred, this);
+      if (Limit && isKnownPredicate(Pred, PreAR, Limit))  // proves (1)
+        return true;
+    }
+  }
+
+  return false;
+}
+
 const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
                                                Type *Ty) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
@@ -1201,9 +1474,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
       // If we have special knowledge that this addrec won't overflow,
       // we don't need to do any further analysis.
       if (AR->getNoWrapFlags(SCEV::FlagNUW))
-        return getAddRecExpr(getZeroExtendExpr(Start, Ty),
-                             getZeroExtendExpr(Step, Ty),
-                             L, AR->getNoWrapFlags());
+        return getAddRecExpr(
+            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+            getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
 
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
@@ -1240,9 +1513,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
             // Cache knowledge of AR NUW, which is propagated to this AddRec.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
             // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
-                                 getZeroExtendExpr(Step, Ty),
-                                 L, AR->getNoWrapFlags());
+            return getAddRecExpr(
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+                getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
           }
           // Similar to above, only this time treat the step value as signed.
           // This covers loops that count down.
@@ -1255,9 +1528,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
             // Negative step causes unsigned wrap, but it still can't self-wrap.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
             // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
-                                 getSignExtendExpr(Step, Ty),
-                                 L, AR->getNoWrapFlags());
+            return getAddRecExpr(
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+                getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
           }
         }
 
@@ -1275,9 +1548,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
             // Cache knowledge of AR NUW, which is propagated to this AddRec.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
             // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
-                                 getZeroExtendExpr(Step, Ty),
-                                 L, AR->getNoWrapFlags());
+            return getAddRecExpr(
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+                getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
           }
         } else if (isKnownNegative(Step)) {
           const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
@@ -1290,12 +1563,19 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
             // Negative step causes unsigned wrap, but it still can't self-wrap.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
             // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
-                                 getSignExtendExpr(Step, Ty),
-                                 L, AR->getNoWrapFlags());
+            return getAddRecExpr(
+                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+                getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
           }
         }
       }
+
+      if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
+        const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+        return getAddRecExpr(
+            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+            getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+      }
     }
 
   // The cast wasn't folded; create an explicit cast node.
@@ -1307,104 +1587,6 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
   return S;
 }
 
-// Get the limit of a recurrence such that incrementing by Step cannot cause
-// signed overflow as long as the value of the recurrence within the loop does
-// not exceed this limit before incrementing.
-static const SCEV *getOverflowLimitForStep(const SCEV *Step,
-                                           ICmpInst::Predicate *Pred,
-                                           ScalarEvolution *SE) {
-  unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
-  if (SE->isKnownPositive(Step)) {
-    *Pred = ICmpInst::ICMP_SLT;
-    return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
-                           SE->getSignedRange(Step).getSignedMax());
-  }
-  if (SE->isKnownNegative(Step)) {
-    *Pred = ICmpInst::ICMP_SGT;
-    return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
-                       SE->getSignedRange(Step).getSignedMin());
-  }
-  return nullptr;
-}
-
-// The recurrence AR has been shown to have no signed wrap. Typically, if we can
-// prove NSW for AR, then we can just as easily prove NSW for its preincrement
-// or postincrement sibling. This allows normalizing a sign extended AddRec as
-// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
-// result, the expression "Step + sext(PreIncAR)" is congruent with
-// "sext(PostIncAR)"
-static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
-                                            Type *Ty,
-                                            ScalarEvolution *SE) {
-  const Loop *L = AR->getLoop();
-  const SCEV *Start = AR->getStart();
-  const SCEV *Step = AR->getStepRecurrence(*SE);
-
-  // Check for a simple looking step prior to loop entry.
-  const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
-  if (!SA)
-    return nullptr;
-
-  // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
-  // subtraction is expensive. For this purpose, perform a quick and dirty
-  // difference, by checking for Step in the operand list.
-  SmallVector<const SCEV *, 4> DiffOps;
-  for (const SCEV *Op : SA->operands())
-    if (Op != Step)
-      DiffOps.push_back(Op);
-
-  if (DiffOps.size() == SA->getNumOperands())
-    return nullptr;
-
-  // This is a postinc AR. Check for overflow on the preinc recurrence using the
-  // same three conditions that getSignExtendedExpr checks.
-
-  // 1. NSW flags on the step increment.
-  const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
-  const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
-    SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
-
-  if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW))
-    return PreStart;
-
-  // 2. Direct overflow check on the step operation's expression.
-  unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
-  Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
-  const SCEV *OperandExtendedStart =
-    SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
-                   SE->getSignExtendExpr(Step, WideTy));
-  if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
-    // Cache knowledge of PreAR NSW.
-    if (PreAR)
-      const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
-    // FIXME: this optimization needs a unit test
-    DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
-    return PreStart;
-  }
-
-  // 3. Loop precondition.
-  ICmpInst::Predicate Pred;
-  const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
-
-  if (OverflowLimit &&
-      SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
-    return PreStart;
-  }
-  return nullptr;
-}
-
-// Get the normalized sign-extended expression for this AddRec's Start.
-static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
-                                            Type *Ty,
-                                            ScalarEvolution *SE) {
-  const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
-  if (!PreStart)
-    return SE->getSignExtendExpr(AR->getStart(), Ty);
-
-  return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
-                        SE->getSignExtendExpr(PreStart, Ty));
-}
-
 const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
                                                Type *Ty) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
@@ -1483,9 +1665,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
       // If we have special knowledge that this addrec won't overflow,
       // we don't need to do any further analysis.
       if (AR->getNoWrapFlags(SCEV::FlagNSW))
-        return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
-                             getSignExtendExpr(Step, Ty),
-                             L, SCEV::FlagNSW);
+        return getAddRecExpr(
+            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+            getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW);
 
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
@@ -1522,9 +1704,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
             // Cache knowledge of AR NSW, which is propagated to this AddRec.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
             // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
-                                 getSignExtendExpr(Step, Ty),
-                                 L, AR->getNoWrapFlags());
+            return getAddRecExpr(
+                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+                getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
           }
           // Similar to above, only this time treat the step value as unsigned.
           // This covers loops that count up with an unsigned step.
@@ -1533,12 +1715,20 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
                        getMulExpr(WideMaxBECount,
                                   getZeroExtendExpr(Step, WideTy)));
           if (SAdd == OperandExtendedAdd) {
-            // Cache knowledge of AR NSW, which is propagated to this AddRec.
-            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+            // If AR wraps around then
+            //
+            //    abs(Step) * MaxBECount > unsigned-max(AR->getType())
+            // => SAdd != OperandExtendedAdd
+            //
+            // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
+            // (SAdd == OperandExtendedAdd => AR is NW)
+
+            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+
             // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
-                                 getZeroExtendExpr(Step, Ty),
-                                 L, AR->getNoWrapFlags());
+            return getAddRecExpr(
+                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+                getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
           }
         }
 
@@ -1547,7 +1737,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
         // with the start value and the backedge is guarded by a comparison
         // with the post-inc value, the addrec is safe.
         ICmpInst::Predicate Pred;
-        const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
+        const SCEV *OverflowLimit =
+            getSignedOverflowLimitForStep(Step, &Pred, this);
         if (OverflowLimit &&
             (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
              (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
@@ -1555,9 +1746,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
                                           OverflowLimit)))) {
           // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
           const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
-          return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
-                               getSignExtendExpr(Step, Ty),
-                               L, AR->getNoWrapFlags());
+          return getAddRecExpr(
+              getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+              getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
         }
       }
       // If Start and Step are constants, check if we can apply this
@@ -1576,6 +1767,13 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
           return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
         }
       }
+
+      if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
+        const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+        return getAddRecExpr(
+            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+            getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+      }
     }
 
   // The cast wasn't folded; create an explicit cast node.
@@ -2169,8 +2367,7 @@ static bool containsConstantSomewhere(const SCEV *StartExpr) {
 
     if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) {
       const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr);
-      for (const SCEV *Operand : CurrentNAry->operands())
-        Ops.push_back(Operand);
+      Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end());
     }
   }
   return false;
@@ -2729,6 +2926,56 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
   return S;
 }
 
+const SCEV *
+ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
+                            const SmallVectorImpl<const SCEV *> &IndexExprs,
+                            bool InBounds) {
+  // getSCEV(Base)->getType() has the same address space as Base->getType()
+  // because SCEV::getType() preserves the address space.
+  Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
+  // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
+  // instruction to its SCEV, because the Instruction may be guarded by control
+  // flow and the no-overflow bits may not be valid for the expression in any
+  // context.
+  SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+
+  const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
+  // The address space is unimportant. The first thing we do on CurTy is getting
+  // its element type.
+  Type *CurTy = PointerType::getUnqual(PointeeType);
+  for (const SCEV *IndexExpr : IndexExprs) {
+    // Compute the (potentially symbolic) offset in bytes for this index.
+    if (StructType *STy = dyn_cast<StructType>(CurTy)) {
+      // For a struct, add the member offset.
+      ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
+      unsigned FieldNo = Index->getZExtValue();
+      const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
+
+      // Add the field offset to the running total offset.
+      TotalOffset = getAddExpr(TotalOffset, FieldOffset);
+
+      // Update CurTy to the type of the field at Index.
+      CurTy = STy->getTypeAtIndex(Index);
+    } else {
+      // Update CurTy to its element type.
+      CurTy = cast<SequentialType>(CurTy)->getElementType();
+      // For an array, add the element offset, explicitly scaled.
+      const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy);
+      // Getelementptr indices are signed.
+      IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy);
+
+      // Multiply the index by the element size to compute the element offset.
+      const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
+
+      // Add the element offset to the running total offset.
+      TotalOffset = getAddExpr(TotalOffset, LocalOffset);
+    }
+  }
+
+  // Add the total offset from all the GEP indices to the base.
+  return getAddExpr(BaseExpr, TotalOffset, Wrap);
+}
+
 const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
                                          const SCEV *RHS) {
   SmallVector<const SCEV *, 2> Ops;
@@ -2950,39 +3197,23 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
 }
 
 const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
-  // If we have DataLayout, we can bypass creating a target-independent
+  // We can bypass creating a target-independent
   // constant expression and then folding it back into a ConstantInt.
   // This is just a compile-time optimization.
-  if (DL)
-    return getConstant(IntTy, DL->getTypeAllocSize(AllocTy));
-
-  Constant *C = ConstantExpr::getSizeOf(AllocTy);
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
-      C = Folded;
-  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
-  assert(Ty == IntTy && "Effective SCEV type doesn't match");
-  return getTruncateOrZeroExtend(getSCEV(C), Ty);
+  return getConstant(IntTy,
+                     F->getParent()->getDataLayout().getTypeAllocSize(AllocTy));
 }
 
 const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
                                              StructType *STy,
                                              unsigned FieldNo) {
-  // If we have DataLayout, we can bypass creating a target-independent
+  // We can bypass creating a target-independent
   // constant expression and then folding it back into a ConstantInt.
   // This is just a compile-time optimization.
-  if (DL) {
-    return getConstant(IntTy,
-                       DL->getStructLayout(STy)->getElementOffset(FieldNo));
-  }
-
-  Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
-      C = Folded;
-
-  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
-  return getTruncateOrZeroExtend(getSCEV(C), Ty);
+  return getConstant(
+      IntTy,
+      F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset(
+          FieldNo));
 }
 
 const SCEV *ScalarEvolution::getUnknown(Value *V) {
@@ -3024,19 +3255,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const {
 /// for which isSCEVable must return true.
 uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
   assert(isSCEVable(Ty) && "Type is not SCEVable!");
-
-  // If we have a DataLayout, use it!
-  if (DL)
-    return DL->getTypeSizeInBits(Ty);
-
-  // Integer types have fixed sizes.
-  if (Ty->isIntegerTy())
-    return Ty->getPrimitiveSizeInBits();
-
-  // The only other support type is pointer. Without DataLayout, conservatively
-  // assume pointers are 64-bit.
-  assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
-  return 64;
+  return F->getParent()->getDataLayout().getTypeSizeInBits(Ty);
 }
 
 /// getEffectiveSCEVType - Return a type with the same bitwidth as
@@ -3052,12 +3271,7 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
 
   // The only other support type is pointer.
   assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
-
-  if (DL)
-    return DL->getIntPtrType(Ty);
-
-  // Without DataLayout, conservatively assume pointers are 64-bit.
-  return Type::getInt64Ty(getContext());
+  return F->getParent()->getDataLayout().getIntPtrType(Ty);
 }
 
 const SCEV *ScalarEvolution::getCouldNotCompute() {
@@ -3444,10 +3658,12 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
               // If the increment doesn't overflow, then neither the addrec nor
               // the post-increment will overflow.
               if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
-                if (OBO->hasNoUnsignedWrap())
-                  Flags = setFlags(Flags, SCEV::FlagNUW);
-                if (OBO->hasNoSignedWrap())
-                  Flags = setFlags(Flags, SCEV::FlagNSW);
+                if (OBO->getOperand(0) == PN) {
+                  if (OBO->hasNoUnsignedWrap())
+                    Flags = setFlags(Flags, SCEV::FlagNUW);
+                  if (OBO->hasNoSignedWrap())
+                    Flags = setFlags(Flags, SCEV::FlagNSW);
+                }
               } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
                 // If the increment is an inbounds GEP, then we know the address
                 // space cannot be wrapped around. We cannot make any guarantee
@@ -3455,7 +3671,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
                 // unsigned but we may have a negative index from the base
                 // pointer. We can guarantee that no unsigned wrap occurs if the
                 // indices form a positive value.
-                if (GEP->isInBounds()) {
+                if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
                   Flags = setFlags(Flags, SCEV::FlagNW);
 
                   const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
@@ -3521,7 +3737,8 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
   // PHI's incoming blocks are in a different loop, in which case doing so
   // risks breaking LCSSA form. Instcombine would normally zap these, but
   // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AC))
+  if (Value *V =
+          SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC))
     if (LI->replacementPreservesLCSSAForm(PN, V))
       return getSCEV(V);
 
@@ -3533,52 +3750,16 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
 /// operations. This allows them to be analyzed by regular SCEV code.
 ///
 const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
-  Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
   Value *Base = GEP->getOperand(0);
   // Don't attempt to analyze GEPs over unsized objects.
   if (!Base->getType()->getPointerElementType()->isSized())
     return getUnknown(GEP);
 
-  // Don't blindly transfer the inbounds flag from the GEP instruction to the
-  // Add expression, because the Instruction may be guarded by control flow
-  // and the no-overflow bits may not be valid for the expression in any
-  // context.
-  SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
-
-  const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
-  gep_type_iterator GTI = gep_type_begin(GEP);
-  for (GetElementPtrInst::op_iterator I = std::next(GEP->op_begin()),
-                                      E = GEP->op_end();
-       I != E; ++I) {
-    Value *Index = *I;
-    // Compute the (potentially symbolic) offset in bytes for this index.
-    if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
-      // For a struct, add the member offset.
-      unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
-      const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
-
-      // Add the field offset to the running total offset.
-      TotalOffset = getAddExpr(TotalOffset, FieldOffset);
-    } else {
-      // For an array, add the element offset, explicitly scaled.
-      const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI);
-      const SCEV *IndexS = getSCEV(Index);
-      // Getelementptr indices are signed.
-      IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
-
-      // Multiply the index by the element size to compute the element offset.
-      const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap);
-
-      // Add the element offset to the running total offset.
-      TotalOffset = getAddExpr(TotalOffset, LocalOffset);
-    }
-  }
-
-  // Get the SCEV for the GEP base.
-  const SCEV *BaseS = getSCEV(Base);
-
-  // Add the total offset from all the GEP indices to the base.
-  return getAddExpr(BaseS, TotalOffset, Wrap);
+  SmallVector<const SCEV *, 4> IndexExprs;
+  for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
+    IndexExprs.push_back(getSCEV(*Index));
+  return getGEPExpr(GEP->getSourceElementType(), getSCEV(Base), IndexExprs,
+                    GEP->isInBounds());
 }
 
 /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
@@ -3653,7 +3834,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
     // For a SCEVUnknown, ask ValueTracking.
     unsigned BitWidth = getTypeSizeInBits(U->getType());
     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
+    computeKnownBits(U->getValue(), Zeros, Ones,
+                     F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
     return Zeros.countTrailingOnes();
   }
 
@@ -3688,79 +3870,93 @@ static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
   return None;
 }
 
-/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+/// getRange - Determine the range for a particular SCEV.  If SignHint is
+/// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
+/// with a "cleaner" unsigned (resp. signed) representation.
 ///
 ConstantRange
-ScalarEvolution::getUnsignedRange(const SCEV *S) {
+ScalarEvolution::getRange(const SCEV *S,
+                          ScalarEvolution::RangeSignHint SignHint) {
+  DenseMap<const SCEV *, ConstantRange> &Cache =
+      SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
+                                                       : SignedRanges;
+
   // See if we've computed this range already.
-  DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S);
-  if (I != UnsignedRanges.end())
+  DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
+  if (I != Cache.end())
     return I->second;
 
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
-    return setUnsignedRange(C, ConstantRange(C->getValue()->getValue()));
+    return setRange(C, SignHint, ConstantRange(C->getValue()->getValue()));
 
   unsigned BitWidth = getTypeSizeInBits(S->getType());
   ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
 
-  // If the value has known zeros, the maximum unsigned value will have those
-  // known zeros as well.
+  // If the value has known zeros, the maximum value will have those known zeros
+  // as well.
   uint32_t TZ = GetMinTrailingZeros(S);
-  if (TZ != 0)
-    ConservativeResult =
-      ConstantRange(APInt::getMinValue(BitWidth),
-                    APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
+  if (TZ != 0) {
+    if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
+      ConservativeResult =
+          ConstantRange(APInt::getMinValue(BitWidth),
+                        APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
+    else
+      ConservativeResult = ConstantRange(
+          APInt::getSignedMinValue(BitWidth),
+          APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
+  }
 
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
-    ConstantRange X = getUnsignedRange(Add->getOperand(0));
+    ConstantRange X = getRange(Add->getOperand(0), SignHint);
     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
-      X = X.add(getUnsignedRange(Add->getOperand(i)));
-    return setUnsignedRange(Add, ConservativeResult.intersectWith(X));
+      X = X.add(getRange(Add->getOperand(i), SignHint));
+    return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
-    ConstantRange X = getUnsignedRange(Mul->getOperand(0));
+    ConstantRange X = getRange(Mul->getOperand(0), SignHint);
     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
-      X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
-    return setUnsignedRange(Mul, ConservativeResult.intersectWith(X));
+      X = X.multiply(getRange(Mul->getOperand(i), SignHint));
+    return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
-    ConstantRange X = getUnsignedRange(SMax->getOperand(0));
+    ConstantRange X = getRange(SMax->getOperand(0), SignHint);
     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
-      X = X.smax(getUnsignedRange(SMax->getOperand(i)));
-    return setUnsignedRange(SMax, ConservativeResult.intersectWith(X));
+      X = X.smax(getRange(SMax->getOperand(i), SignHint));
+    return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
-    ConstantRange X = getUnsignedRange(UMax->getOperand(0));
+    ConstantRange X = getRange(UMax->getOperand(0), SignHint);
     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
-      X = X.umax(getUnsignedRange(UMax->getOperand(i)));
-    return setUnsignedRange(UMax, ConservativeResult.intersectWith(X));
+      X = X.umax(getRange(UMax->getOperand(i), SignHint));
+    return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
-    ConstantRange X = getUnsignedRange(UDiv->getLHS());
-    ConstantRange Y = getUnsignedRange(UDiv->getRHS());
-    return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
+    ConstantRange X = getRange(UDiv->getLHS(), SignHint);
+    ConstantRange Y = getRange(UDiv->getRHS(), SignHint);
+    return setRange(UDiv, SignHint,
+                    ConservativeResult.intersectWith(X.udiv(Y)));
   }
 
   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
-    ConstantRange X = getUnsignedRange(ZExt->getOperand());
-    return setUnsignedRange(ZExt,
-      ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+    ConstantRange X = getRange(ZExt->getOperand(), SignHint);
+    return setRange(ZExt, SignHint,
+                    ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
   }
 
   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
-    ConstantRange X = getUnsignedRange(SExt->getOperand());
-    return setUnsignedRange(SExt,
-      ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+    ConstantRange X = getRange(SExt->getOperand(), SignHint);
+    return setRange(SExt, SignHint,
+                    ConservativeResult.intersectWith(X.signExtend(BitWidth)));
   }
 
   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
-    ConstantRange X = getUnsignedRange(Trunc->getOperand());
-    return setUnsignedRange(Trunc,
-      ConservativeResult.intersectWith(X.truncate(BitWidth)));
+    ConstantRange X = getRange(Trunc->getOperand(), SignHint);
+    return setRange(Trunc, SignHint,
+                    ConservativeResult.intersectWith(X.truncate(BitWidth)));
   }
 
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
@@ -3773,143 +3969,6 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
             ConservativeResult.intersectWith(
               ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
 
-    // TODO: non-affine addrec
-    if (AddRec->isAffine()) {
-      Type *Ty = AddRec->getType();
-      const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
-      if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
-          getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
-        MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
-
-        const SCEV *Start = AddRec->getStart();
-        const SCEV *Step = AddRec->getStepRecurrence(*this);
-
-        ConstantRange StartRange = getUnsignedRange(Start);
-        ConstantRange StepRange = getSignedRange(Step);
-        ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
-        ConstantRange EndRange =
-          StartRange.add(MaxBECountRange.multiply(StepRange));
-
-        // Check for overflow. This must be done with ConstantRange arithmetic
-        // because we could be called from within the ScalarEvolution overflow
-        // checking code.
-        ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1);
-        ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
-        ConstantRange ExtMaxBECountRange =
-          MaxBECountRange.zextOrTrunc(BitWidth*2+1);
-        ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
-        if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
-            ExtEndRange)
-          return setUnsignedRange(AddRec, ConservativeResult);
-
-        APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
-                                   EndRange.getUnsignedMin());
-        APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
-                                   EndRange.getUnsignedMax());
-        if (Min.isMinValue() && Max.isMaxValue())
-          return setUnsignedRange(AddRec, ConservativeResult);
-        return setUnsignedRange(AddRec,
-          ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
-      }
-    }
-
-    return setUnsignedRange(AddRec, ConservativeResult);
-  }
-
-  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
-    // Check if the IR explicitly contains !range metadata.
-    Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
-    if (MDRange.hasValue())
-      ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
-
-    // For a SCEVUnknown, ask ValueTracking.
-    APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
-    if (Ones == ~Zeros + 1)
-      return setUnsignedRange(U, ConservativeResult);
-    return setUnsignedRange(U,
-      ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)));
-  }
-
-  return setUnsignedRange(S, ConservativeResult);
-}
-
-/// getSignedRange - Determine the signed range for a particular SCEV.
-///
-ConstantRange
-ScalarEvolution::getSignedRange(const SCEV *S) {
-  // See if we've computed this range already.
-  DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S);
-  if (I != SignedRanges.end())
-    return I->second;
-
-  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
-    return setSignedRange(C, ConstantRange(C->getValue()->getValue()));
-
-  unsigned BitWidth = getTypeSizeInBits(S->getType());
-  ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
-
-  // If the value has known zeros, the maximum signed value will have those
-  // known zeros as well.
-  uint32_t TZ = GetMinTrailingZeros(S);
-  if (TZ != 0)
-    ConservativeResult =
-      ConstantRange(APInt::getSignedMinValue(BitWidth),
-                    APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
-
-  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
-    ConstantRange X = getSignedRange(Add->getOperand(0));
-    for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
-      X = X.add(getSignedRange(Add->getOperand(i)));
-    return setSignedRange(Add, ConservativeResult.intersectWith(X));
-  }
-
-  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
-    ConstantRange X = getSignedRange(Mul->getOperand(0));
-    for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
-      X = X.multiply(getSignedRange(Mul->getOperand(i)));
-    return setSignedRange(Mul, ConservativeResult.intersectWith(X));
-  }
-
-  if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
-    ConstantRange X = getSignedRange(SMax->getOperand(0));
-    for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
-      X = X.smax(getSignedRange(SMax->getOperand(i)));
-    return setSignedRange(SMax, ConservativeResult.intersectWith(X));
-  }
-
-  if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
-    ConstantRange X = getSignedRange(UMax->getOperand(0));
-    for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
-      X = X.umax(getSignedRange(UMax->getOperand(i)));
-    return setSignedRange(UMax, ConservativeResult.intersectWith(X));
-  }
-
-  if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
-    ConstantRange X = getSignedRange(UDiv->getLHS());
-    ConstantRange Y = getSignedRange(UDiv->getRHS());
-    return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
-  }
-
-  if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
-    ConstantRange X = getSignedRange(ZExt->getOperand());
-    return setSignedRange(ZExt,
-      ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
-  }
-
-  if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
-    ConstantRange X = getSignedRange(SExt->getOperand());
-    return setSignedRange(SExt,
-      ConservativeResult.intersectWith(X.signExtend(BitWidth)));
-  }
-
-  if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
-    ConstantRange X = getSignedRange(Trunc->getOperand());
-    return setSignedRange(Trunc,
-      ConservativeResult.intersectWith(X.truncate(BitWidth)));
-  }
-
-  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
     // If there's no signed wrap, and all the operands have the same sign or
     // zero, the value won't ever change sign.
     if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
@@ -3935,41 +3994,66 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
       if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
           getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+
+        // Check for overflow.  This must be done with ConstantRange arithmetic
+        // because we could be called from within the ScalarEvolution overflow
+        // checking code.
+
         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+        ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+        ConstantRange ZExtMaxBECountRange =
+            MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1);
 
         const SCEV *Start = AddRec->getStart();
         const SCEV *Step = AddRec->getStepRecurrence(*this);
+        ConstantRange StepSRange = getSignedRange(Step);
+        ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1);
+
+        ConstantRange StartURange = getUnsignedRange(Start);
+        ConstantRange EndURange =
+            StartURange.add(MaxBECountRange.multiply(StepSRange));
+
+        // Check for unsigned overflow.
+        ConstantRange ZExtStartURange =
+            StartURange.zextOrTrunc(BitWidth * 2 + 1);
+        ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1);
+        if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
+            ZExtEndURange) {
+          APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
+                                     EndURange.getUnsignedMin());
+          APInt Max = APIntOps::umax(StartURange.getUnsignedMax(),
+                                     EndURange.getUnsignedMax());
+          bool IsFullRange = Min.isMinValue() && Max.isMaxValue();
+          if (!IsFullRange)
+            ConservativeResult =
+                ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
+        }
 
-        ConstantRange StartRange = getSignedRange(Start);
-        ConstantRange StepRange = getSignedRange(Step);
-        ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
-        ConstantRange EndRange =
-          StartRange.add(MaxBECountRange.multiply(StepRange));
-
-        // Check for overflow. This must be done with ConstantRange arithmetic
-        // because we could be called from within the ScalarEvolution overflow
-        // checking code.
-        ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1);
-        ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
-        ConstantRange ExtMaxBECountRange =
-          MaxBECountRange.zextOrTrunc(BitWidth*2+1);
-        ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
-        if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
-            ExtEndRange)
-          return setSignedRange(AddRec, ConservativeResult);
-
-        APInt Min = APIntOps::smin(StartRange.getSignedMin(),
-                                   EndRange.getSignedMin());
-        APInt Max = APIntOps::smax(StartRange.getSignedMax(),
-                                   EndRange.getSignedMax());
-        if (Min.isMinSignedValue() && Max.isMaxSignedValue())
-          return setSignedRange(AddRec, ConservativeResult);
-        return setSignedRange(AddRec,
-          ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
+        ConstantRange StartSRange = getSignedRange(Start);
+        ConstantRange EndSRange =
+            StartSRange.add(MaxBECountRange.multiply(StepSRange));
+
+        // Check for signed overflow. This must be done with ConstantRange
+        // arithmetic because we could be called from within the ScalarEvolution
+        // overflow checking code.
+        ConstantRange SExtStartSRange =
+            StartSRange.sextOrTrunc(BitWidth * 2 + 1);
+        ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1);
+        if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
+            SExtEndSRange) {
+          APInt Min = APIntOps::smin(StartSRange.getSignedMin(),
+                                     EndSRange.getSignedMin());
+          APInt Max = APIntOps::smax(StartSRange.getSignedMax(),
+                                     EndSRange.getSignedMax());
+          bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue();
+          if (!IsFullRange)
+            ConservativeResult =
+                ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
+        }
       }
     }
 
-    return setSignedRange(AddRec, ConservativeResult);
+    return setRange(AddRec, SignHint, ConservativeResult);
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
@@ -3978,18 +4062,31 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
     if (MDRange.hasValue())
       ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
 
-    // For a SCEVUnknown, ask ValueTracking.
-    if (!U->getValue()->getType()->isIntegerTy() && !DL)
-      return setSignedRange(U, ConservativeResult);
-    unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
-    if (NS <= 1)
-      return setSignedRange(U, ConservativeResult);
-    return setSignedRange(U, ConservativeResult.intersectWith(
-      ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
-                    APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)));
+    // Split here to avoid paying the compile-time cost of calling both
+    // computeKnownBits and ComputeNumSignBits.  This restriction can be lifted
+    // if needed.
+    const DataLayout &DL = F->getParent()->getDataLayout();
+    if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
+      // For a SCEVUnknown, ask ValueTracking.
+      APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+      computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
+      if (Ones != ~Zeros + 1)
+        ConservativeResult =
+            ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
+    } else {
+      assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
+             "generalize as needed!");
+      unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
+      if (NS > 1)
+        ConservativeResult = ConservativeResult.intersectWith(
+            ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
+                          APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
+    }
+
+    return setRange(U, SignHint, ConservativeResult);
   }
 
-  return setSignedRange(S, ConservativeResult);
+  return setRange(S, SignHint, ConservativeResult);
 }
 
 /// createSCEV - We know that there is no SCEV for the specified value.
@@ -4088,8 +4185,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
       unsigned TZ = A.countTrailingZeros();
       unsigned BitWidth = A.getBitWidth();
       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, 0, AC,
-                       nullptr, DT);
+      computeKnownBits(U->getOperand(0), KnownZero, KnownOne,
+                       F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
 
       APInt EffectiveMask =
           APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
@@ -4280,9 +4377,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
       case ICmpInst::ICMP_SGE:
         // a >s b ? a+x : b+x  ->  smax(a, b)+x
         // a >s b ? b+x : a+x  ->  smin(a, b)+x
-        if (LHS->getType() == U->getType()) {
-          const SCEV *LS = getSCEV(LHS);
-          const SCEV *RS = getSCEV(RHS);
+        if (getTypeSizeInBits(LHS->getType()) <=
+            getTypeSizeInBits(U->getType())) {
+          const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType());
+          const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType());
           const SCEV *LA = getSCEV(U->getOperand(1));
           const SCEV *RA = getSCEV(U->getOperand(2));
           const SCEV *LDiff = getMinusSCEV(LA, LS);
@@ -4303,9 +4401,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
       case ICmpInst::ICMP_UGE:
         // a >u b ? a+x : b+x  ->  umax(a, b)+x
         // a >u b ? b+x : a+x  ->  umin(a, b)+x
-        if (LHS->getType() == U->getType()) {
-          const SCEV *LS = getSCEV(LHS);
-          const SCEV *RS = getSCEV(RHS);
+        if (getTypeSizeInBits(LHS->getType()) <=
+            getTypeSizeInBits(U->getType())) {
+          const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
+          const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType());
           const SCEV *LA = getSCEV(U->getOperand(1));
           const SCEV *RA = getSCEV(U->getOperand(2));
           const SCEV *LDiff = getMinusSCEV(LA, LS);
@@ -4320,11 +4419,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
         break;
       case ICmpInst::ICMP_NE:
         // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
-        if (LHS->getType() == U->getType() &&
-            isa<ConstantInt>(RHS) &&
-            cast<ConstantInt>(RHS)->isZero()) {
-          const SCEV *One = getConstant(LHS->getType(), 1);
-          const SCEV *LS = getSCEV(LHS);
+        if (getTypeSizeInBits(LHS->getType()) <=
+                getTypeSizeInBits(U->getType()) &&
+            isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+          const SCEV *One = getConstant(U->getType(), 1);
+          const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
           const SCEV *LA = getSCEV(U->getOperand(1));
           const SCEV *RA = getSCEV(U->getOperand(2));
           const SCEV *LDiff = getMinusSCEV(LA, LS);
@@ -4335,11 +4434,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
         break;
       case ICmpInst::ICMP_EQ:
         // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
-        if (LHS->getType() == U->getType() &&
-            isa<ConstantInt>(RHS) &&
-            cast<ConstantInt>(RHS)->isZero()) {
-          const SCEV *One = getConstant(LHS->getType(), 1);
-          const SCEV *LS = getSCEV(LHS);
+        if (getTypeSizeInBits(LHS->getType()) <=
+                getTypeSizeInBits(U->getType()) &&
+            isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+          const SCEV *One = getConstant(U->getType(), 1);
+          const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
           const SCEV *LA = getSCEV(U->getOperand(1));
           const SCEV *RA = getSCEV(U->getOperand(2));
           const SCEV *LDiff = getMinusSCEV(LA, One);
@@ -5238,12 +5337,9 @@ static bool canConstantEvolve(Instruction *I, const Loop *L) {
   if (!L->contains(I)) return false;
 
   if (isa<PHINode>(I)) {
-    if (L->getHeader() == I->getParent())
-      return true;
-    else
-      // We don't currently keep track of the control flow needed to evaluate
-      // PHIs, so we cannot handle PHIs inside of loops.
-      return false;
+    // We don't currently keep track of the control flow needed to evaluate
+    // PHIs, so we cannot handle PHIs inside of loops.
+    return L->getHeader() == I->getParent();
   }
 
   // If we won't be able to constant fold this expression even if the operands
@@ -5314,7 +5410,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
 /// reason, return null.
 static Constant *EvaluateExpression(Value *V, const Loop *L,
                                     DenseMap<Instruction *, Constant *> &Vals,
-                                    const DataLayout *DL,
+                                    const DataLayout &DL,
                                     const TargetLibraryInfo *TLI) {
   // Convenient constant check, but redundant for recursive calls.
   if (Constant *C = dyn_cast<Constant>(V)) return C;
@@ -5403,6 +5499,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
 
   unsigned NumIterations = BEs.getZExtValue(); // must be in range
   unsigned IterationNum = 0;
+  const DataLayout &DL = F->getParent()->getDataLayout();
   for (; ; ++IterationNum) {
     if (IterationNum == NumIterations)
       return RetVal = CurrentIterVals[PN];  // Got exit value!
@@ -5410,8 +5507,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
     // Compute the value of the PHIs for the next iteration.
     // EvaluateExpression adds non-phi values to the CurrentIterVals map.
     DenseMap<Instruction *, Constant *> NextIterVals;
-    Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL,
-                                           TLI);
+    Constant *NextPHI =
+        EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
     if (!NextPHI)
       return nullptr;        // Couldn't evaluate!
     NextIterVals[PN] = NextPHI;
@@ -5487,12 +5584,11 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
   // Okay, we find a PHI node that defines the trip count of this loop.  Execute
   // the loop symbolically to determine when the condition gets a value of
   // "ExitWhen".
-
   unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
+  const DataLayout &DL = F->getParent()->getDataLayout();
   for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
-    ConstantInt *CondVal =
-      dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
-                                                       DL, TLI));
+    ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>(
+        EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI));
 
     // Couldn't symbolically evaluate.
     if (!CondVal) return getCouldNotCompute();
@@ -5623,7 +5719,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
             if (PTy->getElementType()->isStructTy())
               C2 = ConstantExpr::getIntegerCast(
                   C2, Type::getInt32Ty(C->getContext()), true);
-            C = ConstantExpr::getGetElementPtr(C, C2);
+            C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2);
           } else
             C = ConstantExpr::getAdd(C, C2);
         }
@@ -5725,16 +5821,16 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
         // Check to see if getSCEVAtScope actually made an improvement.
         if (MadeImprovement) {
           Constant *C = nullptr;
+          const DataLayout &DL = F->getParent()->getDataLayout();
           if (const CmpInst *CI = dyn_cast<CmpInst>(I))
-            C = ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                                Operands[0], Operands[1], DL,
-                                                TLI);
+            C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
+                                                Operands[1], DL, TLI);
           else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
             if (!LI->isVolatile())
               C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
           } else
-            C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                         Operands, DL, TLI);
+            C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands,
+                                         DL, TLI);
           if (!C) return V;
           return getSCEV(C);
         }
@@ -6016,7 +6112,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
           dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
                                                       R1->getValue(),
                                                       R2->getValue()))) {
-        if (CB->getZExtValue() == false)
+        if (!CB->getZExtValue())
           std::swap(R1, R2);   // R1 is the minimum root now.
 
         // We can only use this value if the chrec ends up with an exact zero
@@ -6631,6 +6727,65 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
       return true;
   }
 
+  struct ClearWalkingBEDominatingCondsOnExit {
+    ScalarEvolution &SE;
+
+    explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE)
+        : SE(SE){};
+
+    ~ClearWalkingBEDominatingCondsOnExit() {
+      SE.WalkingBEDominatingConds = false;
+    }
+  };
+
+  // We don't want more than one activation of the following loop on the stack
+  // -- that can lead to O(n!) time complexity.
+  if (WalkingBEDominatingConds)
+    return false;
+
+  WalkingBEDominatingConds = true;
+  ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this);
+
+  // If the loop is not reachable from the entry block, we risk running into an
+  // infinite loop as we walk up into the dom tree.  These loops do not matter
+  // anyway, so we just return a conservative answer when we see them.
+  if (!DT->isReachableFromEntry(L->getHeader()))
+    return false;
+
+  for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()];
+       DTN != HeaderDTN;
+       DTN = DTN->getIDom()) {
+
+    assert(DTN && "should reach the loop header before reaching the root!");
+
+    BasicBlock *BB = DTN->getBlock();
+    BasicBlock *PBB = BB->getSinglePredecessor();
+    if (!PBB)
+      continue;
+
+    BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator());
+    if (!ContinuePredicate || !ContinuePredicate->isConditional())
+      continue;
+
+    Value *Condition = ContinuePredicate->getCondition();
+
+    // If we have an edge `E` within the loop body that dominates the only
+    // latch, the condition guarding `E` also guards the backedge.  This
+    // reasoning works only for loops with a single latch.
+
+    BasicBlockEdge DominatingEdge(PBB, BB);
+    if (DominatingEdge.isSingleEdge()) {
+      // We're constructively (and conservatively) enumerating edges within the
+      // loop body that dominate the latch.  The dominator tree better agree
+      // with us on this:
+      assert(DT->dominates(DominatingEdge, Latch) && "should be!");
+
+      if (isImpliedCond(Pred, LHS, RHS, Condition,
+                        BB != ContinuePredicate->getSuccessor(0)))
+        return true;
+    }
+  }
+
   return false;
 }
 
@@ -6726,15 +6881,6 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
   ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
   if (!ICI) return false;
 
-  // Bail if the ICmp's operands' types are wider than the needed type
-  // before attempting to call getSCEV on them. This avoids infinite
-  // recursion, since the analysis of widening casts can require loop
-  // exit condition information for overflow checking, which would
-  // lead back here.
-  if (getTypeSizeInBits(LHS->getType()) <
-      getTypeSizeInBits(ICI->getOperand(0)->getType()))
-    return false;
-
   // Now that we found a conditional branch that dominates the loop or controls
   // the loop latch. Check to see if it is the comparison we are looking for.
   ICmpInst::Predicate FoundPred;
@@ -6746,9 +6892,17 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
   const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
   const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
 
-  // Balance the types. The case where FoundLHS' type is wider than
-  // LHS' type is checked for above.
-  if (getTypeSizeInBits(LHS->getType()) >
+  // Balance the types.
+  if (getTypeSizeInBits(LHS->getType()) <
+      getTypeSizeInBits(FoundLHS->getType())) {
+    if (CmpInst::isSigned(Pred)) {
+      LHS = getSignExtendExpr(LHS, FoundLHS->getType());
+      RHS = getSignExtendExpr(RHS, FoundLHS->getType());
+    } else {
+      LHS = getZeroExtendExpr(LHS, FoundLHS->getType());
+      RHS = getZeroExtendExpr(RHS, FoundLHS->getType());
+    }
+  } else if (getTypeSizeInBits(LHS->getType()) >
       getTypeSizeInBits(FoundLHS->getType())) {
     if (CmpInst::isSigned(FoundPred)) {
       FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
@@ -6874,6 +7028,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
                                             const SCEV *LHS, const SCEV *RHS,
                                             const SCEV *FoundLHS,
                                             const SCEV *FoundRHS) {
+  if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
+    return true;
+
   return isImpliedCondOperandsHelper(Pred, LHS, RHS,
                                      FoundLHS, FoundRHS) ||
          // ~x < ~y --> x > y
@@ -7011,8 +7168,49 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
   return false;
 }
 
-// Verify if an linear IV with positive stride can overflow when in a 
-// less-than comparison, knowing the invariant term of the comparison, the 
+/// isImpliedCondOperandsViaRanges - helper function for isImpliedCondOperands.
+/// Tries to get cases like "X `sgt` 0 => X - 1 `sgt` -1".
+bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
+                                                     const SCEV *LHS,
+                                                     const SCEV *RHS,
+                                                     const SCEV *FoundLHS,
+                                                     const SCEV *FoundRHS) {
+  if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS))
+    // The restriction on `FoundRHS` be lifted easily -- it exists only to
+    // reduce the compile time impact of this optimization.
+    return false;
+
+  const SCEVAddExpr *AddLHS = dyn_cast<SCEVAddExpr>(LHS);
+  if (!AddLHS || AddLHS->getOperand(1) != FoundLHS ||
+      !isa<SCEVConstant>(AddLHS->getOperand(0)))
+    return false;
+
+  APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getValue()->getValue();
+
+  // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
+  // antecedent "`FoundLHS` `Pred` `FoundRHS`".
+  ConstantRange FoundLHSRange =
+      ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS);
+
+  // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range
+  // for `LHS`:
+  APInt Addend =
+      cast<SCEVConstant>(AddLHS->getOperand(0))->getValue()->getValue();
+  ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend));
+
+  // We can also compute the range of values for `LHS` that satisfy the
+  // consequent, "`LHS` `Pred` `RHS`":
+  APInt ConstRHS = cast<SCEVConstant>(RHS)->getValue()->getValue();
+  ConstantRange SatisfyingLHSRange =
+      ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS);
+
+  // The antecedent implies the consequent if every value of `LHS` that
+  // satisfies the antecedent also satisfies the consequent.
+  return SatisfyingLHSRange.contains(LHSRange);
+}
+
+// Verify if an linear IV with positive stride can overflow when in a
+// less-than comparison, knowing the invariant term of the comparison, the
 // stride and the knowledge of NSW/NUW flags on the recurrence.
 bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
                                          bool IsSigned, bool NoWrap) {
@@ -7040,7 +7238,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
   return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
 }
 
-// Verify if an linear IV with negative stride can overflow when in a 
+// Verify if an linear IV with negative stride can overflow when in a
 // greater-than comparison, knowing the invariant term of the comparison,
 // the stride and the knowledge of NSW/NUW flags on the recurrence.
 bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
@@ -7071,7 +7269,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
 
 // Compute the backedge taken count knowing the interval difference, the
 // stride and presence of the equality in the comparison.
-const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, 
+const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
                                             bool Equality) {
   const SCEV *One = getConstant(Step->getType(), 1);
   Delta = Equality ? getAddExpr(Delta, Step)
@@ -7111,7 +7309,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
 
   // Avoid proven overflow cases: this will ensure that the backedge taken count
   // will not generate any unsigned overflow. Relaxed no-overflow conditions
-  // exploit NoWrapFlags, allowing to optimize in presence of undefined 
+  // exploit NoWrapFlags, allowing to optimize in presence of undefined
   // behaviors like the case of C language.
   if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
     return getCouldNotCompute();
@@ -7191,7 +7389,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
 
   // Avoid proven overflow cases: this will ensure that the backedge taken count
   // will not generate any unsigned overflow. Relaxed no-overflow conditions
-  // exploit NoWrapFlags, allowing to optimize in presence of undefined 
+  // exploit NoWrapFlags, allowing to optimize in presence of undefined
   // behaviors like the case of C language.
   if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
     return getCouldNotCompute();
@@ -7239,7 +7437,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
   if (isa<SCEVConstant>(BECount))
     MaxBECount = BECount;
   else
-    MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), 
+    MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
                                 getConstant(MinStride), false);
 
   if (isa<SCEVCouldNotCompute>(MaxBECount))
@@ -7339,7 +7537,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
       if (ConstantInt *CB =
           dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
                          R1->getValue(), R2->getValue()))) {
-        if (CB->getZExtValue() == false)
+        if (!CB->getZExtValue())
           std::swap(R1, R2);   // R1 is the minimum root now.
 
         // Make sure the root is not off by one.  The returned iteration should
@@ -7858,18 +8056,16 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
 //===----------------------------------------------------------------------===//
 
 ScalarEvolution::ScalarEvolution()
-  : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64),
-    BlockDispositions(64), FirstUnknown(nullptr) {
+    : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64),
+      LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) {
   initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
 }
 
 bool ScalarEvolution::runOnFunction(Function &F) {
   this->F = &F;
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  LI = &getAnalysis<LoopInfo>();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-  TLI = &getAnalysis<TargetLibraryInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   return false;
 }
@@ -7892,6 +8088,7 @@ void ScalarEvolution::releaseMemory() {
   }
 
   assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
+  assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
 
   BackedgeTakenCounts.clear();
   ConstantEvolutionLoopExitValue.clear();
@@ -7907,9 +8104,9 @@ void ScalarEvolution::releaseMemory() {
 void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<AssumptionCacheTracker>();
-  AU.addRequiredTransitive<LoopInfo>();
+  AU.addRequiredTransitive<LoopInfoWrapperPass>();
   AU.addRequiredTransitive<DominatorTreeWrapperPass>();
-  AU.addRequired<TargetLibraryInfo>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
 }
 
 bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
@@ -7969,6 +8166,12 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
       OS << "  -->  ";
       const SCEV *SV = SE.getSCEV(&*I);
       SV->print(OS);
+      if (!isa<SCEVCouldNotCompute>(SV)) {
+        OS << " U: ";
+        SE.getUnsignedRange(SV).print(OS);
+        OS << " S: ";
+        SE.getSignedRange(SV).print(OS);
+      }
 
       const Loop *L = LI->getLoopFor((*I).getParent());
 
@@ -7976,6 +8179,12 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
       if (AtUse != SV) {
         OS << "  -->  ";
         AtUse->print(OS);
+        if (!isa<SCEVCouldNotCompute>(AtUse)) {
+          OS << " U: ";
+          SE.getUnsignedRange(AtUse).print(OS);
+          OS << " S: ";
+          SE.getSignedRange(AtUse).print(OS);
+        }
       }
 
       if (L) {
@@ -8000,17 +8209,17 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
 
 ScalarEvolution::LoopDisposition
 ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
-  SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S];
-  for (unsigned u = 0; u < Values.size(); u++) {
-    if (Values[u].first == L)
-      return Values[u].second;
+  auto &Values = LoopDispositions[S];
+  for (auto &V : Values) {
+    if (V.getPointer() == L)
+      return V.getInt();
   }
-  Values.push_back(std::make_pair(L, LoopVariant));
+  Values.emplace_back(L, LoopVariant);
   LoopDisposition D = computeLoopDisposition(S, L);
-  SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S];
-  for (unsigned u = Values2.size(); u > 0; u--) {
-    if (Values2[u - 1].first == L) {
-      Values2[u - 1].second = D;
+  auto &Values2 = LoopDispositions[S];
+  for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
+    if (V.getPointer() == L) {
+      V.setInt(D);
       break;
     }
   }
@@ -8106,17 +8315,17 @@ bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
 
 ScalarEvolution::BlockDisposition
 ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
-  SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S];
-  for (unsigned u = 0; u < Values.size(); u++) {
-    if (Values[u].first == BB)
-      return Values[u].second;
+  auto &Values = BlockDispositions[S];
+  for (auto &V : Values) {
+    if (V.getPointer() == BB)
+      return V.getInt();
   }
-  Values.push_back(std::make_pair(BB, DoesNotDominateBlock));
+  Values.emplace_back(BB, DoesNotDominateBlock);
   BlockDisposition D = computeBlockDisposition(S, BB);
-  SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S];
-  for (unsigned u = Values2.size(); u > 0; u--) {
-    if (Values2[u - 1].first == BB) {
-      Values2[u - 1].second = D;
+  auto &Values2 = BlockDispositions[S];
+  for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
+    if (V.getPointer() == BB) {
+      V.setInt(D);
       break;
     }
   }
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 5c339ee..ccec0a8 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
@@ -79,7 +80,7 @@ ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool
 ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
-  InitializeAliasAnalysis(this);
+  InitializeAliasAnalysis(this, &F.getParent()->getDataLayout());
   SE = &getAnalysis<ScalarEvolution>();
   return false;
 }
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index 7e9e351..0bd427b 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -23,7 +23,9 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -204,11 +206,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
 /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
 /// unnecessary; in its place, just signed-divide Ops[i] by the scale and
 /// check to see if the divide was folded.
-static bool FactorOutConstant(const SCEV *&S,
-                              const SCEV *&Remainder,
-                              const SCEV *Factor,
-                              ScalarEvolution &SE,
-                              const DataLayout *DL) {
+static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
+                              const SCEV *Factor, ScalarEvolution &SE,
+                              const DataLayout &DL) {
   // Everything is divisible by one.
   if (Factor->isOne())
     return true;
@@ -248,35 +248,17 @@ static bool FactorOutConstant(const SCEV *&S,
   // In a Mul, check if there is a constant operand which is a multiple
   // of the given factor.
   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
-    if (DL) {
-      // With DataLayout, the size is known. Check if there is a constant
-      // operand which is a multiple of the given factor. If so, we can
-      // factor it.
-      const SCEVConstant *FC = cast<SCEVConstant>(Factor);
-      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
-        if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
-          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
-          NewMulOps[0] =
-            SE.getConstant(C->getValue()->getValue().sdiv(
-                                                   FC->getValue()->getValue()));
-          S = SE.getMulExpr(NewMulOps);
-          return true;
-        }
-    } else {
-      // Without DataLayout, check if Factor can be factored out of any of the
-      // Mul's operands. If so, we can just remove it.
-      for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
-        const SCEV *SOp = M->getOperand(i);
-        const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
-        if (FactorOutConstant(SOp, Remainder, Factor, SE, DL) &&
-            Remainder->isZero()) {
-          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
-          NewMulOps[i] = SOp;
-          S = SE.getMulExpr(NewMulOps);
-          return true;
-        }
+    // Size is known, check if there is a constant operand which is a multiple
+    // of the given factor. If so, we can factor it.
+    const SCEVConstant *FC = cast<SCEVConstant>(Factor);
+    if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+      if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+        SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+        NewMulOps[0] = SE.getConstant(
+            C->getValue()->getValue().sdiv(FC->getValue()->getValue()));
+        S = SE.getMulExpr(NewMulOps);
+        return true;
       }
-    }
   }
 
   // In an AddRec, check if both start and step are divisible.
@@ -393,7 +375,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
                                     PointerType *PTy,
                                     Type *Ty,
                                     Value *V) {
-  Type *ElTy = PTy->getElementType();
+  Type *OriginalElTy = PTy->getElementType();
+  Type *ElTy = OriginalElTy;
   SmallVector<Value *, 4> GepIndices;
   SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
   bool AnyNonZeroIndices = false;
@@ -402,9 +385,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
   // without the other.
   SplitAddRecs(Ops, Ty, SE);
 
-  Type *IntPtrTy = SE.DL
-                 ? SE.DL->getIntPtrType(PTy)
-                 : Type::getInt64Ty(PTy->getContext());
+  Type *IntPtrTy = DL.getIntPtrType(PTy);
 
   // Descend down the pointer's type and attempt to convert the other
   // operands into GEP indices, at each level. The first index in a GEP
@@ -422,7 +403,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
         for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
           const SCEV *Op = Ops[i];
           const SCEV *Remainder = SE.getConstant(Ty, 0);
-          if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.DL)) {
+          if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
             // Op now has ElSize factored out.
             ScaledOps.push_back(Op);
             if (!Remainder->isZero())
@@ -456,43 +437,25 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
       bool FoundFieldNo = false;
       // An empty struct has no fields.
       if (STy->getNumElements() == 0) break;
-      if (SE.DL) {
-        // With DataLayout, field offsets are known. See if a constant offset
-        // falls within any of the struct fields.
-        if (Ops.empty()) break;
-        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
-          if (SE.getTypeSizeInBits(C->getType()) <= 64) {
-            const StructLayout &SL = *SE.DL->getStructLayout(STy);
-            uint64_t FullOffset = C->getValue()->getZExtValue();
-            if (FullOffset < SL.getSizeInBytes()) {
-              unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
-              GepIndices.push_back(
-                  ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
-              ElTy = STy->getTypeAtIndex(ElIdx);
-              Ops[0] =
+      // Field offsets are known. See if a constant offset falls within any of
+      // the struct fields.
+      if (Ops.empty())
+        break;
+      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+        if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+          const StructLayout &SL = *DL.getStructLayout(STy);
+          uint64_t FullOffset = C->getValue()->getZExtValue();
+          if (FullOffset < SL.getSizeInBytes()) {
+            unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+            GepIndices.push_back(
+                ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+            ElTy = STy->getTypeAtIndex(ElIdx);
+            Ops[0] =
                 SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
-              AnyNonZeroIndices = true;
-              FoundFieldNo = true;
-            }
-          }
-      } else {
-        // Without DataLayout, just check for an offsetof expression of the
-        // appropriate struct type.
-        for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-          if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) {
-            Type *CTy;
-            Constant *FieldNo;
-            if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) {
-              GepIndices.push_back(FieldNo);
-              ElTy =
-                STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue());
-              Ops[i] = SE.getConstant(Ty, 0);
-              AnyNonZeroIndices = true;
-              FoundFieldNo = true;
-              break;
-            }
+            AnyNonZeroIndices = true;
+            FoundFieldNo = true;
           }
-      }
+        }
       // If no struct field offsets were found, tentatively assume that
       // field zero was selected (since the zero offset would obviously
       // be folded away).
@@ -526,7 +489,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     // Fold a GEP with constant operands.
     if (Constant *CLHS = dyn_cast<Constant>(V))
       if (Constant *CRHS = dyn_cast<Constant>(Idx))
-        return ConstantExpr::getGetElementPtr(CLHS, CRHS);
+        return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()),
+                                              CLHS, CRHS);
 
     // Do a quick scan to see if we have this GEP nearby.  If so, reuse it.
     unsigned ScanLimit = 6;
@@ -561,7 +525,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     }
 
     // Emit a GEP.
-    Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
+    Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
     rememberInstruction(GEP);
 
     return GEP;
@@ -597,7 +561,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
   Value *Casted = V;
   if (V->getType() != PTy)
     Casted = InsertNoopCastOfTo(Casted, PTy);
-  Value *GEP = Builder.CreateGEP(Casted,
+  Value *GEP = Builder.CreateGEP(OriginalElTy, Casted,
                                  GepIndices,
                                  "scevgep");
   Ops.push_back(SE.getUnknown(GEP));
@@ -1063,6 +1027,34 @@ static bool canBeCheaplyTransformed(ScalarEvolution &SE,
   return false;
 }
 
+static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+  if (!isa<IntegerType>(AR->getType()))
+    return false;
+
+  unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+  Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+  const SCEV *Step = AR->getStepRecurrence(SE);
+  const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
+                                            SE.getSignExtendExpr(AR, WideTy));
+  const SCEV *ExtendAfterOp =
+    SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+  return ExtendAfterOp == OpAfterExtend;
+}
+
+static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+  if (!isa<IntegerType>(AR->getType()))
+    return false;
+
+  unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+  Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+  const SCEV *Step = AR->getStepRecurrence(SE);
+  const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
+                                            SE.getZeroExtendExpr(AR, WideTy));
+  const SCEV *ExtendAfterOp =
+    SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+  return ExtendAfterOp == OpAfterExtend;
+}
+
 /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
 /// the base addrec, which is the addrec without any non-loop-dominating
 /// values, and return the PHI.
@@ -1188,6 +1180,12 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   // Expand the step somewhere that dominates the loop header.
   Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
 
+  // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
+  // we actually do emit an addition.  It does not apply if we emit a
+  // subtraction.
+  bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
+  bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
+
   // Create the PHI.
   BasicBlock *Header = L->getHeader();
   Builder.SetInsertPoint(Header, Header->begin());
@@ -1213,10 +1211,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
       IVIncInsertPos : Pred->getTerminator();
     Builder.SetInsertPoint(InsertPos);
     Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+
     if (isa<OverflowingBinaryOperator>(IncV)) {
-      if (Normalized->getNoWrapFlags(SCEV::FlagNUW))
+      if (IncrementIsNUW)
         cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
-      if (Normalized->getNoWrapFlags(SCEV::FlagNSW))
+      if (IncrementIsNSW)
         cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
     }
     PN->addIncoming(IncV, Pred);
@@ -1711,7 +1710,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
 
     // Fold constant phis. They may be congruent to other constant phis and
     // would confuse the logic below that expects proper IVs.
-    if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT, SE.AC)) {
+    if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) {
       Phi->replaceAllUsesWith(V);
       DeadInsts.push_back(Phi);
       ++NumElim;
@@ -1806,6 +1805,72 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
   return NumElim;
 }
 
+bool SCEVExpander::isHighCostExpansionHelper(
+    const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) {
+  if (!Processed.insert(S).second)
+    return false;
+
+  if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
+    // If the divisor is a power of two and the SCEV type fits in a native
+    // integer, consider the divison cheap irrespective of whether it occurs in
+    // the user code since it can be lowered into a right shift.
+    if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
+      if (SC->getValue()->getValue().isPowerOf2()) {
+        const DataLayout &DL =
+            L->getHeader()->getParent()->getParent()->getDataLayout();
+        unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
+        return DL.isIllegalInteger(Width);
+      }
+
+    // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
+    // HowManyLessThans produced to compute a precise expression, rather than a
+    // UDiv from the user's code. If we can't find a UDiv in the code with some
+    // simple searching, assume the former consider UDivExpr expensive to
+    // compute.
+    BasicBlock *ExitingBB = L->getExitingBlock();
+    if (!ExitingBB)
+      return true;
+
+    BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+    if (!ExitingBI || !ExitingBI->isConditional())
+      return true;
+
+    ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition());
+    if (!OrigCond)
+      return true;
+
+    const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1));
+    RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1));
+    if (RHS != S) {
+      const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0));
+      LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1));
+      if (LHS != S)
+        return true;
+    }
+  }
+
+  // Recurse past add expressions, which commonly occur in the
+  // BackedgeTakenCount. They may already exist in program code, and if not,
+  // they are not too expensive rematerialize.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I) {
+      if (isHighCostExpansionHelper(*I, L, Processed))
+        return true;
+    }
+    return false;
+  }
+
+  // HowManyLessThans uses a Max expression whenever the loop is not guarded by
+  // the exit condition.
+  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+    return true;
+
+  // If we haven't recognized an expensive SCEV pattern, assume it's an
+  // expression produced by program code.
+  return false;
+}
+
 namespace {
 // Search for a SCEV subexpression that is not safe to expand.  Any expression
 // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index c6ea3af..02f8b0b 100644
--- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -80,7 +80,7 @@ public:
     initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry());
   }
 
-  void initializePass() override { InitializeAliasAnalysis(this); }
+  bool doInitialization(Module &M) override;
 
   /// getAdjustedAnalysisPointer - This method is used when a pass implements
   /// an analysis interface through multiple inheritance.  If needed, it
@@ -119,6 +119,11 @@ ImmutablePass *llvm::createScopedNoAliasAAPass() {
   return new ScopedNoAliasAA();
 }
 
+bool ScopedNoAliasAA::doInitialization(Module &M) {
+  InitializeAliasAnalysis(this, &M.getDataLayout());
+  return true;
+}
+
 void
 ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
diff --git a/contrib/llvm/lib/Target/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
index c0abdbd..635c50c 100644
--- a/contrib/llvm/lib/Target/TargetLibraryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -11,350 +11,24 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
-// Register the default implementation.
-INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo",
-                "Target Library Information", false, true)
-char TargetLibraryInfo::ID = 0;
-
-void TargetLibraryInfo::anchor() { }
-
-const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
-  {
-    "_IO_getc",
-    "_IO_putc",
-    "_ZdaPv",
-    "_ZdaPvRKSt9nothrow_t",
-    "_ZdaPvj",
-    "_ZdaPvm",
-    "_ZdlPv",
-    "_ZdlPvRKSt9nothrow_t",
-    "_ZdlPvj",
-    "_ZdlPvm",
-    "_Znaj",
-    "_ZnajRKSt9nothrow_t",
-    "_Znam",
-    "_ZnamRKSt9nothrow_t",
-    "_Znwj",
-    "_ZnwjRKSt9nothrow_t",
-    "_Znwm",
-    "_ZnwmRKSt9nothrow_t",
-    "__cospi",
-    "__cospif",
-    "__cxa_atexit",
-    "__cxa_guard_abort",
-    "__cxa_guard_acquire",
-    "__cxa_guard_release",
-    "__isoc99_scanf",
-    "__isoc99_sscanf",
-    "__memcpy_chk",
-    "__memmove_chk",
-    "__memset_chk",
-    "__sincospi_stret",
-    "__sincospif_stret",
-    "__sinpi",
-    "__sinpif",
-    "__sqrt_finite",
-    "__sqrtf_finite",
-    "__sqrtl_finite",
-    "__stpcpy_chk",
-    "__stpncpy_chk",
-    "__strcpy_chk",
-    "__strdup",
-    "__strncpy_chk",
-    "__strndup",
-    "__strtok_r",
-    "abs",
-    "access",
-    "acos",
-    "acosf",
-    "acosh",
-    "acoshf",
-    "acoshl",
-    "acosl",
-    "asin",
-    "asinf",
-    "asinh",
-    "asinhf",
-    "asinhl",
-    "asinl",
-    "atan",
-    "atan2",
-    "atan2f",
-    "atan2l",
-    "atanf",
-    "atanh",
-    "atanhf",
-    "atanhl",
-    "atanl",
-    "atof",
-    "atoi",
-    "atol",
-    "atoll",
-    "bcmp",
-    "bcopy",
-    "bzero",
-    "calloc",
-    "cbrt",
-    "cbrtf",
-    "cbrtl",
-    "ceil",
-    "ceilf",
-    "ceill",
-    "chmod",
-    "chown",
-    "clearerr",
-    "closedir",
-    "copysign",
-    "copysignf",
-    "copysignl",
-    "cos",
-    "cosf",
-    "cosh",
-    "coshf",
-    "coshl",
-    "cosl",
-    "ctermid",
-    "exp",
-    "exp10",
-    "exp10f",
-    "exp10l",
-    "exp2",
-    "exp2f",
-    "exp2l",
-    "expf",
-    "expl",
-    "expm1",
-    "expm1f",
-    "expm1l",
-    "fabs",
-    "fabsf",
-    "fabsl",
-    "fclose",
-    "fdopen",
-    "feof",
-    "ferror",
-    "fflush",
-    "ffs",
-    "ffsl",
-    "ffsll",
-    "fgetc",
-    "fgetpos",
-    "fgets",
-    "fileno",
-    "fiprintf",
-    "flockfile",
-    "floor",
-    "floorf",
-    "floorl",
-    "fmax",
-    "fmaxf",
-    "fmaxl",
-    "fmin",
-    "fminf",
-    "fminl",
-    "fmod",
-    "fmodf",
-    "fmodl",
-    "fopen",
-    "fopen64",
-    "fprintf",
-    "fputc",
-    "fputs",
-    "fread",
-    "free",
-    "frexp",
-    "frexpf",
-    "frexpl",
-    "fscanf",
-    "fseek",
-    "fseeko",
-    "fseeko64",
-    "fsetpos",
-    "fstat",
-    "fstat64",
-    "fstatvfs",
-    "fstatvfs64",
-    "ftell",
-    "ftello",
-    "ftello64",
-    "ftrylockfile",
-    "funlockfile",
-    "fwrite",
-    "getc",
-    "getc_unlocked",
-    "getchar",
-    "getenv",
-    "getitimer",
-    "getlogin_r",
-    "getpwnam",
-    "gets",
-    "gettimeofday",
-    "htonl",
-    "htons",
-    "iprintf",
-    "isascii",
-    "isdigit",
-    "labs",
-    "lchown",
-    "ldexp",
-    "ldexpf",
-    "ldexpl",
-    "llabs",
-    "log",
-    "log10",
-    "log10f",
-    "log10l",
-    "log1p",
-    "log1pf",
-    "log1pl",
-    "log2",
-    "log2f",
-    "log2l",
-    "logb",
-    "logbf",
-    "logbl",
-    "logf",
-    "logl",
-    "lstat",
-    "lstat64",
-    "malloc",
-    "memalign",
-    "memccpy",
-    "memchr",
-    "memcmp",
-    "memcpy",
-    "memmove",
-    "memrchr",
-    "memset",
-    "memset_pattern16",
-    "mkdir",
-    "mktime",
-    "modf",
-    "modff",
-    "modfl",
-    "nearbyint",
-    "nearbyintf",
-    "nearbyintl",
-    "ntohl",
-    "ntohs",
-    "open",
-    "open64",
-    "opendir",
-    "pclose",
-    "perror",
-    "popen",
-    "posix_memalign",
-    "pow",
-    "powf",
-    "powl",
-    "pread",
-    "printf",
-    "putc",
-    "putchar",
-    "puts",
-    "pwrite",
-    "qsort",
-    "read",
-    "readlink",
-    "realloc",
-    "reallocf",
-    "realpath",
-    "remove",
-    "rename",
-    "rewind",
-    "rint",
-    "rintf",
-    "rintl",
-    "rmdir",
-    "round",
-    "roundf",
-    "roundl",
-    "scanf",
-    "setbuf",
-    "setitimer",
-    "setvbuf",
-    "sin",
-    "sinf",
-    "sinh",
-    "sinhf",
-    "sinhl",
-    "sinl",
-    "siprintf",
-    "snprintf",
-    "sprintf",
-    "sqrt",
-    "sqrtf",
-    "sqrtl",
-    "sscanf",
-    "stat",
-    "stat64",
-    "statvfs",
-    "statvfs64",
-    "stpcpy",
-    "stpncpy",
-    "strcasecmp",
-    "strcat",
-    "strchr",
-    "strcmp",
-    "strcoll",
-    "strcpy",
-    "strcspn",
-    "strdup",
-    "strlen",
-    "strncasecmp",
-    "strncat",
-    "strncmp",
-    "strncpy",
-    "strndup",
-    "strnlen",
-    "strpbrk",
-    "strrchr",
-    "strspn",
-    "strstr",
-    "strtod",
-    "strtof",
-    "strtok",
-    "strtok_r",
-    "strtol",
-    "strtold",
-    "strtoll",
-    "strtoul",
-    "strtoull",
-    "strxfrm",
-    "system",
-    "tan",
-    "tanf",
-    "tanh",
-    "tanhf",
-    "tanhl",
-    "tanl",
-    "times",
-    "tmpfile",
-    "tmpfile64",
-    "toascii",
-    "trunc",
-    "truncf",
-    "truncl",
-    "uname",
-    "ungetc",
-    "unlink",
-    "unsetenv",
-    "utime",
-    "utimes",
-    "valloc",
-    "vfprintf",
-    "vfscanf",
-    "vprintf",
-    "vscanf",
-    "vsnprintf",
-    "vsprintf",
-    "vsscanf",
-    "write"
-  };
+static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
+    "vector-library", cl::Hidden, cl::desc("Vector functions library"),
+    cl::init(TargetLibraryInfoImpl::NoLibrary),
+    cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none",
+                          "No vector functions library"),
+               clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
+                          "Accelerate framework"),
+               clEnumValEnd));
+
+const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
+#define TLI_DEFINE_STRING
+#include "llvm/Analysis/TargetLibraryInfo.def"
+};
 
 static bool hasSinCosPiStret(const Triple &T) {
   // Only Darwin variants have _stret versions of combined trig functions.
@@ -377,15 +51,13 @@ static bool hasSinCosPiStret(const Triple &T) {
 /// initialize - Initialize the set of available library functions based on the
 /// specified target triple.  This should be carefully written so that a missing
 /// target triple gets a sane set of defaults.
-static void initialize(TargetLibraryInfo &TLI, const Triple &T,
-                       const char **StandardNames) {
-  initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
-
+static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
+                       const char *const *StandardNames) {
 #ifndef NDEBUG
   // Verify that the StandardNames array is in alphabetical order.
   for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) {
     if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0)
-      llvm_unreachable("TargetLibraryInfo function names must be sorted");
+      llvm_unreachable("TargetLibraryInfoImpl function names must be sorted");
   }
 #endif // !NDEBUG
 
@@ -683,63 +355,76 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc::statvfs64);
     TLI.setUnavailable(LibFunc::tmpfile64);
   }
-}
 
+  TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary);
+}
 
-TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) {
+TargetLibraryInfoImpl::TargetLibraryInfoImpl() {
   // Default to everything being available.
   memset(AvailableArray, -1, sizeof(AvailableArray));
 
   initialize(*this, Triple(), StandardNames);
 }
 
-TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
+TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) {
   // Default to everything being available.
   memset(AvailableArray, -1, sizeof(AvailableArray));
-  
+
   initialize(*this, T, StandardNames);
 }
 
-TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
-  : ImmutablePass(ID) {
+TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI)
+    : CustomNames(TLI.CustomNames) {
   memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
-  CustomNames = TLI.CustomNames;
+  VectorDescs = TLI.VectorDescs;
+  ScalarDescs = TLI.ScalarDescs;
 }
 
-namespace {
-struct StringComparator {
-  /// Compare two strings and return true if LHS is lexicographically less than
-  /// RHS. Requires that RHS doesn't contain any zero bytes.
-  bool operator()(const char *LHS, StringRef RHS) const {
-    // Compare prefixes with strncmp. If prefixes match we know that LHS is
-    // greater or equal to RHS as RHS can't contain any '\0'.
-    return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
-  }
+TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI)
+    : CustomNames(std::move(TLI.CustomNames)) {
+  std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
+            AvailableArray);
+  VectorDescs = TLI.VectorDescs;
+  ScalarDescs = TLI.ScalarDescs;
+}
 
-  // Provided for compatibility with MSVC's debug mode.
-  bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; }
-  bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; }
-  bool operator()(const char *LHS, const char *RHS) const {
-    return std::strcmp(LHS, RHS) < 0;
-  }
-};
+TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) {
+  CustomNames = TLI.CustomNames;
+  memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+  return *this;
 }
 
-bool TargetLibraryInfo::getLibFunc(StringRef funcName,
-                                   LibFunc::Func &F) const {
-  const char **Start = &StandardNames[0];
-  const char **End = &StandardNames[LibFunc::NumLibFuncs];
+TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&TLI) {
+  CustomNames = std::move(TLI.CustomNames);
+  std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
+            AvailableArray);
+  return *this;
+}
 
+static StringRef sanitizeFunctionName(StringRef funcName) {
   // Filter out empty names and names containing null bytes, those can't be in
   // our table.
   if (funcName.empty() || funcName.find('\0') != StringRef::npos)
-    return false;
+    return StringRef();
 
   // Check for \01 prefix that is used to mangle __asm declarations and
   // strip it if present.
-  if (funcName.front() == '\01')
-    funcName = funcName.substr(1);
-  const char **I = std::lower_bound(Start, End, funcName, StringComparator());
+  return GlobalValue::getRealLinkageName(funcName);
+}
+
+bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
+                                   LibFunc::Func &F) const {
+  const char *const *Start = &StandardNames[0];
+  const char *const *End = &StandardNames[LibFunc::NumLibFuncs];
+
+  funcName = sanitizeFunctionName(funcName);
+  if (funcName.empty())
+    return false;
+
+  const char *const *I = std::lower_bound(
+      Start, End, funcName, [](const char *LHS, StringRef RHS) {
+        return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
+      });
   if (I != End && *I == funcName) {
     F = (LibFunc::Func)(I - Start);
     return true;
@@ -747,8 +432,172 @@ bool TargetLibraryInfo::getLibFunc(StringRef funcName,
   return false;
 }
 
-/// disableAllFunctions - This disables all builtins, which is used for options
-/// like -fno-builtin.
-void TargetLibraryInfo::disableAllFunctions() {
+void TargetLibraryInfoImpl::disableAllFunctions() {
   memset(AvailableArray, 0, sizeof(AvailableArray));
 }
+
+static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) {
+  return std::strncmp(LHS.ScalarFnName, RHS.ScalarFnName,
+                      std::strlen(RHS.ScalarFnName)) < 0;
+}
+
+static bool compareByVectorFnName(const VecDesc &LHS, const VecDesc &RHS) {
+  return std::strncmp(LHS.VectorFnName, RHS.VectorFnName,
+                      std::strlen(RHS.VectorFnName)) < 0;
+}
+
+static bool compareWithScalarFnName(const VecDesc &LHS, StringRef S) {
+  return std::strncmp(LHS.ScalarFnName, S.data(), S.size()) < 0;
+}
+
+static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) {
+  return std::strncmp(LHS.VectorFnName, S.data(), S.size()) < 0;
+}
+
+void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
+  VectorDescs.insert(VectorDescs.end(), Fns.begin(), Fns.end());
+  std::sort(VectorDescs.begin(), VectorDescs.end(), compareByScalarFnName);
+
+  ScalarDescs.insert(ScalarDescs.end(), Fns.begin(), Fns.end());
+  std::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName);
+}
+
+void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
+    enum VectorLibrary VecLib) {
+  switch (VecLib) {
+  case Accelerate: {
+    const VecDesc VecFuncs[] = {
+        // Floating-Point Arithmetic and Auxiliary Functions
+        {"ceilf", "vceilf", 4},
+        {"fabsf", "vfabsf", 4},
+        {"llvm.fabs.f32", "vfabsf", 4},
+        {"floorf", "vfloorf", 4},
+        {"sqrtf", "vsqrtf", 4},
+        {"llvm.sqrt.f32", "vsqrtf", 4},
+
+        // Exponential and Logarithmic Functions
+        {"expf", "vexpf", 4},
+        {"llvm.exp.f32", "vexpf", 4},
+        {"expm1f", "vexpm1f", 4},
+        {"logf", "vlogf", 4},
+        {"llvm.log.f32", "vlogf", 4},
+        {"log1pf", "vlog1pf", 4},
+        {"log10f", "vlog10f", 4},
+        {"llvm.log10.f32", "vlog10f", 4},
+        {"logbf", "vlogbf", 4},
+
+        // Trigonometric Functions
+        {"sinf", "vsinf", 4},
+        {"llvm.sin.f32", "vsinf", 4},
+        {"cosf", "vcosf", 4},
+        {"llvm.cos.f32", "vcosf", 4},
+        {"tanf", "vtanf", 4},
+        {"asinf", "vasinf", 4},
+        {"acosf", "vacosf", 4},
+        {"atanf", "vatanf", 4},
+
+        // Hyperbolic Functions
+        {"sinhf", "vsinhf", 4},
+        {"coshf", "vcoshf", 4},
+        {"tanhf", "vtanhf", 4},
+        {"asinhf", "vasinhf", 4},
+        {"acoshf", "vacoshf", 4},
+        {"atanhf", "vatanhf", 4},
+    };
+    addVectorizableFunctions(VecFuncs);
+    break;
+  }
+  case NoLibrary:
+    break;
+  }
+}
+
+bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
+  funcName = sanitizeFunctionName(funcName);
+  if (funcName.empty())
+    return false;
+
+  std::vector<VecDesc>::const_iterator I = std::lower_bound(
+      VectorDescs.begin(), VectorDescs.end(), funcName,
+      compareWithScalarFnName);
+  return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
+}
+
+StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
+                                                       unsigned VF) const {
+  F = sanitizeFunctionName(F);
+  if (F.empty())
+    return F;
+  std::vector<VecDesc>::const_iterator I = std::lower_bound(
+      VectorDescs.begin(), VectorDescs.end(), F, compareWithScalarFnName);
+  while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
+    if (I->VectorizationFactor == VF)
+      return I->VectorFnName;
+    ++I;
+  }
+  return StringRef();
+}
+
+StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F,
+                                                       unsigned &VF) const {
+  F = sanitizeFunctionName(F);
+  if (F.empty())
+    return F;
+
+  std::vector<VecDesc>::const_iterator I = std::lower_bound(
+      ScalarDescs.begin(), ScalarDescs.end(), F, compareWithVectorFnName);
+  if (I == VectorDescs.end() || StringRef(I->VectorFnName) != F)
+    return StringRef();
+  VF = I->VectorizationFactor;
+  return I->ScalarFnName;
+}
+
+TargetLibraryInfo TargetLibraryAnalysis::run(Module &M) {
+  if (PresetInfoImpl)
+    return TargetLibraryInfo(*PresetInfoImpl);
+
+  return TargetLibraryInfo(lookupInfoImpl(Triple(M.getTargetTriple())));
+}
+
+TargetLibraryInfo TargetLibraryAnalysis::run(Function &F) {
+  if (PresetInfoImpl)
+    return TargetLibraryInfo(*PresetInfoImpl);
+
+  return TargetLibraryInfo(
+      lookupInfoImpl(Triple(F.getParent()->getTargetTriple())));
+}
+
+TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(Triple T) {
+  std::unique_ptr<TargetLibraryInfoImpl> &Impl =
+      Impls[T.normalize()];
+  if (!Impl)
+    Impl.reset(new TargetLibraryInfoImpl(T));
+
+  return *Impl;
+}
+
+
+TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass()
+    : ImmutablePass(ID), TLIImpl(), TLI(TLIImpl) {
+  initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(const Triple &T)
+    : ImmutablePass(ID), TLIImpl(T), TLI(TLIImpl) {
+  initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(
+    const TargetLibraryInfoImpl &TLIImpl)
+    : ImmutablePass(ID), TLIImpl(TLIImpl), TLI(this->TLIImpl) {
+  initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+char TargetLibraryAnalysis::PassID;
+
+// Register the basic pass.
+INITIALIZE_PASS(TargetLibraryInfoWrapperPass, "targetlibinfo",
+                "Target Library Information", false, true)
+char TargetLibraryInfoWrapperPass::ID = 0;
+
+void TargetLibraryInfoWrapperPass::anchor() {}
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
index ef3909b..e1744d1 100644
--- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -8,11 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfoImpl.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/ErrorHandling.h"
 
@@ -20,637 +22,303 @@ using namespace llvm;
 
 #define DEBUG_TYPE "tti"
 
-// Setup the analysis group to manage the TargetTransformInfo passes.
-INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI)
-char TargetTransformInfo::ID = 0;
-
-TargetTransformInfo::~TargetTransformInfo() {
+namespace {
+/// \brief No-op implementation of the TTI interface using the utility base
+/// classes.
+///
+/// This is used when no target specific information is available.
+struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
+  explicit NoTTIImpl(const DataLayout *DL)
+      : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
+};
 }
 
-void TargetTransformInfo::pushTTIStack(Pass *P) {
-  TopTTI = this;
-  PrevTTI = &P->getAnalysis<TargetTransformInfo>();
+TargetTransformInfo::TargetTransformInfo(const DataLayout *DL)
+    : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
 
-  // Walk up the chain and update the top TTI pointer.
-  for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI)
-    PTTI->TopTTI = this;
-}
+TargetTransformInfo::~TargetTransformInfo() {}
 
-void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<TargetTransformInfo>();
+TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg)
+    : TTIImpl(std::move(Arg.TTIImpl)) {}
+
+TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
+  TTIImpl = std::move(RHS.TTIImpl);
+  return *this;
 }
 
 unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
                                                Type *OpTy) const {
-  return PrevTTI->getOperationCost(Opcode, Ty, OpTy);
-}
-
-unsigned TargetTransformInfo::getGEPCost(
-    const Value *Ptr, ArrayRef<const Value *> Operands) const {
-  return PrevTTI->getGEPCost(Ptr, Operands);
+  return TTIImpl->getOperationCost(Opcode, Ty, OpTy);
 }
 
 unsigned TargetTransformInfo::getCallCost(FunctionType *FTy,
                                           int NumArgs) const {
-  return PrevTTI->getCallCost(FTy, NumArgs);
+  return TTIImpl->getCallCost(FTy, NumArgs);
 }
 
-unsigned TargetTransformInfo::getCallCost(const Function *F,
-                                          int NumArgs) const {
-  return PrevTTI->getCallCost(F, NumArgs);
-}
-
-unsigned TargetTransformInfo::getCallCost(
-    const Function *F, ArrayRef<const Value *> Arguments) const {
-  return PrevTTI->getCallCost(F, Arguments);
-}
-
-unsigned TargetTransformInfo::getIntrinsicCost(
-    Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) const {
-  return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys);
+unsigned
+TargetTransformInfo::getCallCost(const Function *F,
+                                 ArrayRef<const Value *> Arguments) const {
+  return TTIImpl->getCallCost(F, Arguments);
 }
 
-unsigned TargetTransformInfo::getIntrinsicCost(
-    Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
-  return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments);
+unsigned
+TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                                      ArrayRef<const Value *> Arguments) const {
+  return TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
 }
 
 unsigned TargetTransformInfo::getUserCost(const User *U) const {
-  return PrevTTI->getUserCost(U);
+  return TTIImpl->getUserCost(U);
 }
 
 bool TargetTransformInfo::hasBranchDivergence() const {
-  return PrevTTI->hasBranchDivergence();
+  return TTIImpl->hasBranchDivergence();
+}
+
+bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
+  return TTIImpl->isSourceOfDivergence(V);
 }
 
 bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
-  return PrevTTI->isLoweredToCall(F);
+  return TTIImpl->isLoweredToCall(F);
 }
 
-void
-TargetTransformInfo::getUnrollingPreferences(const Function *F, Loop *L,
-                                             UnrollingPreferences &UP) const {
-  PrevTTI->getUnrollingPreferences(F, L, UP);
+void TargetTransformInfo::getUnrollingPreferences(
+    Loop *L, UnrollingPreferences &UP) const {
+  return TTIImpl->getUnrollingPreferences(L, UP);
 }
 
 bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
-  return PrevTTI->isLegalAddImmediate(Imm);
+  return TTIImpl->isLegalAddImmediate(Imm);
 }
 
 bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
-  return PrevTTI->isLegalICmpImmediate(Imm);
+  return TTIImpl->isLegalICmpImmediate(Imm);
 }
 
-bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
-                                            int Consecutive) const {
-  return false;
+bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+                                                int64_t BaseOffset,
+                                                bool HasBaseReg,
+                                                int64_t Scale) const {
+  return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
+                                        Scale);
 }
 
 bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
                                              int Consecutive) const {
-  return false;
+  return TTIImpl->isLegalMaskedStore(DataType, Consecutive);
 }
 
-
-bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
-                                                int64_t BaseOffset,
-                                                bool HasBaseReg,
-                                                int64_t Scale) const {
-  return PrevTTI->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
-                                        Scale);
+bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
+                                            int Consecutive) const {
+  return TTIImpl->isLegalMaskedLoad(DataType, Consecutive);
 }
 
 int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                               int64_t BaseOffset,
                                               bool HasBaseReg,
                                               int64_t Scale) const {
-  return PrevTTI->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
+  return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
                                        Scale);
 }
 
 bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
-  return PrevTTI->isTruncateFree(Ty1, Ty2);
+  return TTIImpl->isTruncateFree(Ty1, Ty2);
+}
+
+bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const {
+  return TTIImpl->isProfitableToHoist(I);
 }
 
 bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
-  return PrevTTI->isTypeLegal(Ty);
+  return TTIImpl->isTypeLegal(Ty);
 }
 
 unsigned TargetTransformInfo::getJumpBufAlignment() const {
-  return PrevTTI->getJumpBufAlignment();
+  return TTIImpl->getJumpBufAlignment();
 }
 
 unsigned TargetTransformInfo::getJumpBufSize() const {
-  return PrevTTI->getJumpBufSize();
+  return TTIImpl->getJumpBufSize();
 }
 
 bool TargetTransformInfo::shouldBuildLookupTables() const {
-  return PrevTTI->shouldBuildLookupTables();
+  return TTIImpl->shouldBuildLookupTables();
+}
+
+bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
+  return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
 }
 
 TargetTransformInfo::PopcntSupportKind
 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
-  return PrevTTI->getPopcntSupport(IntTyWidthInBit);
+  return TTIImpl->getPopcntSupport(IntTyWidthInBit);
 }
 
 bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
-  return PrevTTI->haveFastSqrt(Ty);
+  return TTIImpl->haveFastSqrt(Ty);
+}
+
+unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const {
+  return TTIImpl->getFPOpCost(Ty);
 }
 
 unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
-  return PrevTTI->getIntImmCost(Imm, Ty);
+  return TTIImpl->getIntImmCost(Imm, Ty);
 }
 
-unsigned TargetTransformInfo::getIntImmCost(unsigned Opc, unsigned Idx,
+unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
                                             const APInt &Imm, Type *Ty) const {
-  return PrevTTI->getIntImmCost(Opc, Idx, Imm, Ty);
+  return TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
 }
 
 unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
                                             const APInt &Imm, Type *Ty) const {
-  return PrevTTI->getIntImmCost(IID, Idx, Imm, Ty);
+  return TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
 }
 
 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
-  return PrevTTI->getNumberOfRegisters(Vector);
+  return TTIImpl->getNumberOfRegisters(Vector);
 }
 
 unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
-  return PrevTTI->getRegisterBitWidth(Vector);
+  return TTIImpl->getRegisterBitWidth(Vector);
 }
 
-unsigned TargetTransformInfo::getMaxInterleaveFactor() const {
-  return PrevTTI->getMaxInterleaveFactor();
+unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
+  return TTIImpl->getMaxInterleaveFactor(VF);
 }
 
 unsigned TargetTransformInfo::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
-    OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+    unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+    OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
     OperandValueProperties Opd2PropInfo) const {
-  return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+  return TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
                                          Opd1PropInfo, Opd2PropInfo);
 }
 
-unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp,
+unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty,
                                              int Index, Type *SubTp) const {
-  return PrevTTI->getShuffleCost(Kind, Tp, Index, SubTp);
+  return TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
 }
 
 unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
                                                Type *Src) const {
-  return PrevTTI->getCastInstrCost(Opcode, Dst, Src);
+  return TTIImpl->getCastInstrCost(Opcode, Dst, Src);
 }
 
 unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
-  return PrevTTI->getCFInstrCost(Opcode);
+  return TTIImpl->getCFInstrCost(Opcode);
 }
 
 unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                                                  Type *CondTy) const {
-  return PrevTTI->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+  return TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy);
 }
 
 unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
                                                  unsigned Index) const {
-  return PrevTTI->getVectorInstrCost(Opcode, Val, Index);
+  return TTIImpl->getVectorInstrCost(Opcode, Val, Index);
 }
 
 unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
                                               unsigned Alignment,
                                               unsigned AddressSpace) const {
-  return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+  return TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
 }
 
 unsigned
-TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID,
-                                           Type *RetTy,
+TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+                                           unsigned Alignment,
+                                           unsigned AddressSpace) const {
+  return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+}
+
+unsigned
+TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                            ArrayRef<Type *> Tys) const {
-  return PrevTTI->getIntrinsicInstrCost(ID, RetTy, Tys);
+  return TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys);
+}
+
+unsigned TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
+                                               ArrayRef<Type *> Tys) const {
+  return TTIImpl->getCallInstrCost(F, RetTy, Tys);
 }
 
 unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
-  return PrevTTI->getNumberOfParts(Tp);
+  return TTIImpl->getNumberOfParts(Tp);
 }
 
 unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp,
                                                         bool IsComplex) const {
-  return PrevTTI->getAddressComputationCost(Tp, IsComplex);
+  return TTIImpl->getAddressComputationCost(Tp, IsComplex);
 }
 
 unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
-                                               bool IsPairwise) const {
-  return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise);
+                                               bool IsPairwiseForm) const {
+  return TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm);
 }
 
-unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys)
-  const {
-  return PrevTTI->getCostOfKeepingLiveOverCall(Tys);
+unsigned
+TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
+  return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
 }
 
-namespace {
+bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst,
+                                             MemIntrinsicInfo &Info) const {
+  return TTIImpl->getTgtMemIntrinsic(Inst, Info);
+}
 
-struct NoTTI final : ImmutablePass, TargetTransformInfo {
-  const DataLayout *DL;
-
-  NoTTI() : ImmutablePass(ID), DL(nullptr) {
-    initializeNoTTIPass(*PassRegistry::getPassRegistry());
-  }
-
-  void initializePass() override {
-    // Note that this subclass is special, and must *not* call initializeTTI as
-    // it does not chain.
-    TopTTI = this;
-    PrevTTI = nullptr;
-    DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-    DL = DLP ? &DLP->getDataLayout() : nullptr;
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    // Note that this subclass is special, and must *not* call
-    // TTI::getAnalysisUsage as it breaks the recursion.
-  }
-
-  /// Pass identification.
-  static char ID;
-
-  /// Provide necessary pointer adjustments for the two base classes.
-  void *getAdjustedAnalysisPointer(const void *ID) override {
-    if (ID == &TargetTransformInfo::ID)
-      return (TargetTransformInfo*)this;
-    return this;
-  }
-
-  unsigned getOperationCost(unsigned Opcode, Type *Ty,
-                            Type *OpTy) const override {
-    switch (Opcode) {
-    default:
-      // By default, just classify everything as 'basic'.
-      return TCC_Basic;
-
-    case Instruction::GetElementPtr:
-      llvm_unreachable("Use getGEPCost for GEP operations!");
-
-    case Instruction::BitCast:
-      assert(OpTy && "Cast instructions must provide the operand type");
-      if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy()))
-        // Identity and pointer-to-pointer casts are free.
-        return TCC_Free;
-
-      // Otherwise, the default basic cost is used.
-      return TCC_Basic;
-
-    case Instruction::IntToPtr: {
-      if (!DL)
-        return TCC_Basic;
-
-      // An inttoptr cast is free so long as the input is a legal integer type
-      // which doesn't contain values outside the range of a pointer.
-      unsigned OpSize = OpTy->getScalarSizeInBits();
-      if (DL->isLegalInteger(OpSize) &&
-          OpSize <= DL->getPointerTypeSizeInBits(Ty))
-        return TCC_Free;
-
-      // Otherwise it's not a no-op.
-      return TCC_Basic;
-    }
-    case Instruction::PtrToInt: {
-      if (!DL)
-        return TCC_Basic;
-
-      // A ptrtoint cast is free so long as the result is large enough to store
-      // the pointer, and a legal integer type.
-      unsigned DestSize = Ty->getScalarSizeInBits();
-      if (DL->isLegalInteger(DestSize) &&
-          DestSize >= DL->getPointerTypeSizeInBits(OpTy))
-        return TCC_Free;
-
-      // Otherwise it's not a no-op.
-      return TCC_Basic;
-    }
-    case Instruction::Trunc:
-      // trunc to a native type is free (assuming the target has compare and
-      // shift-right of the same width).
-      if (DL && DL->isLegalInteger(DL->getTypeSizeInBits(Ty)))
-        return TCC_Free;
-
-      return TCC_Basic;
-    }
-  }
-
-  unsigned getGEPCost(const Value *Ptr,
-                      ArrayRef<const Value *> Operands) const override {
-    // In the basic model, we just assume that all-constant GEPs will be folded
-    // into their uses via addressing modes.
-    for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
-      if (!isa<Constant>(Operands[Idx]))
-        return TCC_Basic;
-
-    return TCC_Free;
-  }
-
-  unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const override
-  {
-    assert(FTy && "FunctionType must be provided to this routine.");
-
-    // The target-independent implementation just measures the size of the
-    // function by approximating that each argument will take on average one
-    // instruction to prepare.
-
-    if (NumArgs < 0)
-      // Set the argument number to the number of explicit arguments in the
-      // function.
-      NumArgs = FTy->getNumParams();
-
-    return TCC_Basic * (NumArgs + 1);
-  }
-
-  unsigned getCallCost(const Function *F, int NumArgs = -1) const override
-  {
-    assert(F && "A concrete function must be provided to this routine.");
-
-    if (NumArgs < 0)
-      // Set the argument number to the number of explicit arguments in the
-      // function.
-      NumArgs = F->arg_size();
-
-    if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) {
-      FunctionType *FTy = F->getFunctionType();
-      SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
-      return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
-    }
-
-    if (!TopTTI->isLoweredToCall(F))
-      return TCC_Basic; // Give a basic cost if it will be lowered directly.
-
-    return TopTTI->getCallCost(F->getFunctionType(), NumArgs);
-  }
-
-  unsigned getCallCost(const Function *F,
-                       ArrayRef<const Value *> Arguments) const override {
-    // Simply delegate to generic handling of the call.
-    // FIXME: We should use instsimplify or something else to catch calls which
-    // will constant fold with these arguments.
-    return TopTTI->getCallCost(F, Arguments.size());
-  }
-
-  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<Type *> ParamTys) const override {
-    switch (IID) {
-    default:
-      // Intrinsics rarely (if ever) have normal argument setup constraints.
-      // Model them as having a basic instruction cost.
-      // FIXME: This is wrong for libc intrinsics.
-      return TCC_Basic;
-
-    case Intrinsic::annotation:
-    case Intrinsic::assume:
-    case Intrinsic::dbg_declare:
-    case Intrinsic::dbg_value:
-    case Intrinsic::invariant_start:
-    case Intrinsic::invariant_end:
-    case Intrinsic::lifetime_start:
-    case Intrinsic::lifetime_end:
-    case Intrinsic::objectsize:
-    case Intrinsic::ptr_annotation:
-    case Intrinsic::var_annotation:
-    case Intrinsic::experimental_gc_result_int:
-    case Intrinsic::experimental_gc_result_float:
-    case Intrinsic::experimental_gc_result_ptr:
-    case Intrinsic::experimental_gc_relocate:
-      // These intrinsics don't actually represent code after lowering.
-      return TCC_Free;
-    }
-  }
-
-  unsigned
-  getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                   ArrayRef<const Value *> Arguments) const override {
-    // Delegate to the generic intrinsic handling code. This mostly provides an
-    // opportunity for targets to (for example) special case the cost of
-    // certain intrinsics based on constants used as arguments.
-    SmallVector<Type *, 8> ParamTys;
-    ParamTys.reserve(Arguments.size());
-    for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
-      ParamTys.push_back(Arguments[Idx]->getType());
-    return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys);
-  }
-
-  unsigned getUserCost(const User *U) const override {
-    if (isa<PHINode>(U))
-      return TCC_Free; // Model all PHI nodes as free.
-
-    if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
-      SmallVector<const Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end());
-      return TopTTI->getGEPCost(GEP->getPointerOperand(), Indices);
-    }
-
-    if (ImmutableCallSite CS = U) {
-      const Function *F = CS.getCalledFunction();
-      if (!F) {
-        // Just use the called value type.
-        Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
-        return TopTTI->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
-      }
-
-      SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
-      return TopTTI->getCallCost(F, Arguments);
-    }
-
-    if (const CastInst *CI = dyn_cast<CastInst>(U)) {
-      // Result of a cmp instruction is often extended (to be used by other
-      // cmp instructions, logical or return instructions). These are usually
-      // nop on most sane targets.
-      if (isa<CmpInst>(CI->getOperand(0)))
-        return TCC_Free;
-    }
-
-    // Otherwise delegate to the fully generic implementations.
-    return getOperationCost(Operator::getOpcode(U), U->getType(),
-                            U->getNumOperands() == 1 ?
-                                U->getOperand(0)->getType() : nullptr);
-  }
-
-  bool hasBranchDivergence() const override { return false; }
-
-  bool isLoweredToCall(const Function *F) const override {
-    // FIXME: These should almost certainly not be handled here, and instead
-    // handled with the help of TLI or the target itself. This was largely
-    // ported from existing analysis heuristics here so that such refactorings
-    // can take place in the future.
-
-    if (F->isIntrinsic())
-      return false;
-
-    if (F->hasLocalLinkage() || !F->hasName())
-      return true;
-
-    StringRef Name = F->getName();
-
-    // These will all likely lower to a single selection DAG node.
-    if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
-        Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
-        Name == "fmin" || Name == "fminf" || Name == "fminl" ||
-        Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
-        Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
-        Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
-      return false;
-
-    // These are all likely to be optimized into something smaller.
-    if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
-        Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name ==
-        "floorf" || Name == "ceil" || Name == "round" || Name == "ffs" ||
-        Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs")
-      return false;
-
-    return true;
-  }
-
-  void getUnrollingPreferences(const Function *, Loop *,
-                               UnrollingPreferences &) const override {}
-
-  bool isLegalAddImmediate(int64_t Imm) const override {
-    return false;
-  }
-
-  bool isLegalICmpImmediate(int64_t Imm) const override {
-    return false;
-  }
-
-  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
-                             bool HasBaseReg, int64_t Scale) const override
-  {
-    // Guess that reg+reg addressing is allowed. This heuristic is taken from
-    // the implementation of LSR.
-    return !BaseGV && BaseOffset == 0 && Scale <= 1;
-  }
-
-  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
-                           bool HasBaseReg, int64_t Scale) const override {
-    // Guess that all legal addressing mode are free.
-    if(isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale))
-      return 0;
-    return -1;
-  }
-
-  bool isTruncateFree(Type *Ty1, Type *Ty2) const override {
-    return false;
-  }
-
-  bool isTypeLegal(Type *Ty) const override {
-    return false;
-  }
-
-  unsigned getJumpBufAlignment() const override {
-    return 0;
-  }
-
-  unsigned getJumpBufSize() const override {
-    return 0;
-  }
-
-  bool shouldBuildLookupTables() const override {
-    return true;
-  }
-
-  PopcntSupportKind
-  getPopcntSupport(unsigned IntTyWidthInBit) const override {
-    return PSK_Software;
-  }
-
-  bool haveFastSqrt(Type *Ty) const override {
-    return false;
-  }
-
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override {
-    return TCC_Basic;
-  }
-
-  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                         Type *Ty) const override {
-    return TCC_Free;
-  }
-
-  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                         Type *Ty) const override {
-    return TCC_Free;
-  }
-
-  unsigned getNumberOfRegisters(bool Vector) const override {
-    return 8;
-  }
-
-  unsigned  getRegisterBitWidth(bool Vector) const override {
-    return 32;
-  }
-
-  unsigned getMaxInterleaveFactor() const override {
-    return 1;
-  }
-
-  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
-                                  OperandValueKind, OperandValueProperties,
-                                  OperandValueProperties) const override {
-    return 1;
-  }
-
-  unsigned getShuffleCost(ShuffleKind Kind, Type *Ty,
-                          int Index = 0, Type *SubTp = nullptr) const override {
-    return 1;
-  }
-
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
-                            Type *Src) const override {
-    return 1;
-  }
-
-  unsigned getCFInstrCost(unsigned Opcode) const override {
-    return 1;
-  }
-
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                              Type *CondTy = nullptr) const override {
-    return 1;
-  }
-
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                              unsigned Index = -1) const override {
-    return 1;
-  }
-
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace) const override {
-    return 1;
-  }
-
-  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                 ArrayRef<Type*> Tys) const override {
-    return 1;
-  }
-
-  unsigned getNumberOfParts(Type *Tp) const override {
-    return 0;
-  }
-
-  unsigned getAddressComputationCost(Type *Tp, bool) const override {
-    return 0;
-  }
-
-  unsigned getReductionCost(unsigned, Type *, bool) const override {
-    return 1;
-  }
-
-  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override {
-    return 0;
-  }
+Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
+    IntrinsicInst *Inst, Type *ExpectedType) const {
+  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
+}
 
-};
+TargetTransformInfo::Concept::~Concept() {}
+
+TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
+
+TargetIRAnalysis::TargetIRAnalysis(
+    std::function<Result(Function &)> TTICallback)
+    : TTICallback(TTICallback) {}
 
-} // end anonymous namespace
+TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) {
+  return TTICallback(F);
+}
+
+char TargetIRAnalysis::PassID;
+
+TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) {
+  return Result(&F.getParent()->getDataLayout());
+}
 
-INITIALIZE_AG_PASS(NoTTI, TargetTransformInfo, "notti",
-                   "No target information", true, true, true)
-char NoTTI::ID = 0;
+// Register the basic pass.
+INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti",
+                "Target Transform Information", false, true)
+char TargetTransformInfoWrapperPass::ID = 0;
+
+void TargetTransformInfoWrapperPass::anchor() {}
+
+TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass()
+    : ImmutablePass(ID) {
+  initializeTargetTransformInfoWrapperPassPass(
+      *PassRegistry::getPassRegistry());
+}
+
+TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
+    TargetIRAnalysis TIRA)
+    : ImmutablePass(ID), TIRA(std::move(TIRA)) {
+  initializeTargetTransformInfoWrapperPassPass(
+      *PassRegistry::getPassRegistry());
+}
+
+TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(Function &F) {
+  TTI = TIRA.run(F);
+  return *TTI;
+}
 
-ImmutablePass *llvm::createNoTargetTransformInfoPass() {
-  return new NoTTI();
+ImmutablePass *
+llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) {
+  return new TargetTransformInfoWrapperPass(std::move(TIRA));
 }
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index ff89558..1158725 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -129,6 +129,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SetVector.h"
 using namespace llvm;
 
 // A handy option for disabling TBAA functionality. The same effect can also be
@@ -282,9 +283,7 @@ namespace {
       initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
     }
 
-    void initializePass() override {
-      InitializeAliasAnalysis(this);
-    }
+    bool doInitialization(Module &M) override;
 
     /// getAdjustedAnalysisPointer - This method is used when a pass implements
     /// an analysis interface through multiple inheritance.  If needed, it
@@ -321,6 +320,11 @@ ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
   return new TypeBasedAliasAnalysis();
 }
 
+bool TypeBasedAliasAnalysis::doInitialization(Module &M) {
+  InitializeAliasAnalysis(this, &M.getDataLayout());
+  return true;
+}
+
 void
 TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
@@ -575,18 +579,22 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
     if (!B) return nullptr;
   }
 
-  SmallVector<MDNode *, 4> PathA;
+  SmallSetVector<MDNode *, 4> PathA;
   MDNode *T = A;
   while (T) {
-    PathA.push_back(T);
+    if (PathA.count(T))
+      report_fatal_error("Cycle found in TBAA metadata.");
+    PathA.insert(T);
     T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
                                  : nullptr;
   }
 
-  SmallVector<MDNode *, 4> PathB;
+  SmallSetVector<MDNode *, 4> PathB;
   T = B;
   while (T) {
-    PathB.push_back(T);
+    if (PathB.count(T))
+      report_fatal_error("Cycle found in TBAA metadata.");
+    PathB.insert(T);
     T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
                                  : nullptr;
   }
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index 5d90917..a55712c 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
@@ -31,6 +32,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include <cstring>
@@ -39,13 +41,41 @@ using namespace llvm::PatternMatch;
 
 const unsigned MaxDepth = 6;
 
+/// Enable an experimental feature to leverage information about dominating
+/// conditions to compute known bits.  The individual options below control how
+/// hard we search.  The defaults are choosen to be fairly aggressive.  If you
+/// run into compile time problems when testing, scale them back and report
+/// your findings.
+static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
+                                         cl::Hidden, cl::init(false));
+
+// This is expensive, so we only do it for the top level query value.
+// (TODO: evaluate cost vs profit, consider higher thresholds)
+static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
+                                               cl::Hidden, cl::init(1));
+
+/// How many dominating blocks should be scanned looking for dominating
+/// conditions?
+static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
+                                                   cl::Hidden,
+                                                   cl::init(20000));
+
+// Controls the number of uses of the value searched for possible
+// dominating comparisons.
+static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
+                                              cl::Hidden, cl::init(2000));
+
+// If true, don't consider only compares whose only use is a branch.
+static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use",
+                                               cl::Hidden, cl::init(false));
+
 /// Returns the bitwidth of the given scalar or pointer type (if unknown returns
 /// 0). For vector types, returns the element type's bitwidth.
-static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
+static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
   if (unsigned BitWidth = Ty->getScalarSizeInBits())
     return BitWidth;
 
-  return TD ? TD->getPointerTypeSizeInBits(Ty) : 0;
+  return DL.getPointerTypeSizeInBits(Ty);
 }
 
 // Many of these functions have internal versions that take an assumption
@@ -97,73 +127,88 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
 }
 
 static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
-                            const DataLayout *TD, unsigned Depth,
-                            const Query &Q);
+                             const DataLayout &DL, unsigned Depth,
+                             const Query &Q);
 
 void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
-                            const DataLayout *TD, unsigned Depth,
+                            const DataLayout &DL, unsigned Depth,
                             AssumptionCache *AC, const Instruction *CxtI,
                             const DominatorTree *DT) {
-  ::computeKnownBits(V, KnownZero, KnownOne, TD, Depth,
+  ::computeKnownBits(V, KnownZero, KnownOne, DL, Depth,
                      Query(AC, safeCxtI(V, CxtI), DT));
 }
 
+bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL,
+                               AssumptionCache *AC, const Instruction *CxtI,
+                               const DominatorTree *DT) {
+  assert(LHS->getType() == RHS->getType() &&
+         "LHS and RHS should have the same type");
+  assert(LHS->getType()->isIntOrIntVectorTy() &&
+         "LHS and RHS should be integers");
+  IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
+  APInt LHSKnownZero(IT->getBitWidth(), 0), LHSKnownOne(IT->getBitWidth(), 0);
+  APInt RHSKnownZero(IT->getBitWidth(), 0), RHSKnownOne(IT->getBitWidth(), 0);
+  computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, 0, AC, CxtI, DT);
+  computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, 0, AC, CxtI, DT);
+  return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
+}
+
 static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
-                          const DataLayout *TD, unsigned Depth,
-                          const Query &Q);
+                           const DataLayout &DL, unsigned Depth,
+                           const Query &Q);
 
 void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
-                          const DataLayout *TD, unsigned Depth,
+                          const DataLayout &DL, unsigned Depth,
                           AssumptionCache *AC, const Instruction *CxtI,
                           const DominatorTree *DT) {
-  ::ComputeSignBit(V, KnownZero, KnownOne, TD, Depth,
+  ::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth,
                    Query(AC, safeCxtI(V, CxtI), DT));
 }
 
 static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
-                                   const Query &Q);
+                                   const Query &Q, const DataLayout &DL);
 
-bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
-                                  AssumptionCache *AC, const Instruction *CxtI,
+bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero,
+                                  unsigned Depth, AssumptionCache *AC,
+                                  const Instruction *CxtI,
                                   const DominatorTree *DT) {
   return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
-                                  Query(AC, safeCxtI(V, CxtI), DT));
+                                  Query(AC, safeCxtI(V, CxtI), DT), DL);
 }
 
-static bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+static bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
                            const Query &Q);
 
-bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
                           AssumptionCache *AC, const Instruction *CxtI,
                           const DominatorTree *DT) {
-  return ::isKnownNonZero(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT));
+  return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT));
 }
 
-static bool MaskedValueIsZero(Value *V, const APInt &Mask,
-                              const DataLayout *TD, unsigned Depth,
-                              const Query &Q);
+static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
+                              unsigned Depth, const Query &Q);
 
-bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD,
+bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
                              unsigned Depth, AssumptionCache *AC,
                              const Instruction *CxtI, const DominatorTree *DT) {
-  return ::MaskedValueIsZero(V, Mask, TD, Depth,
+  return ::MaskedValueIsZero(V, Mask, DL, Depth,
                              Query(AC, safeCxtI(V, CxtI), DT));
 }
 
-static unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
+static unsigned ComputeNumSignBits(Value *V, const DataLayout &DL,
                                    unsigned Depth, const Query &Q);
 
-unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
+unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout &DL,
                                   unsigned Depth, AssumptionCache *AC,
                                   const Instruction *CxtI,
                                   const DominatorTree *DT) {
-  return ::ComputeNumSignBits(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT));
+  return ::ComputeNumSignBits(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT));
 }
 
 static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
                                    APInt &KnownZero, APInt &KnownOne,
                                    APInt &KnownZero2, APInt &KnownOne2,
-                                   const DataLayout *TD, unsigned Depth,
+                                   const DataLayout &DL, unsigned Depth,
                                    const Query &Q) {
   if (!Add) {
     if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
@@ -175,7 +220,7 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
         unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
         // NLZ can't be BitWidth with no sign bit
         APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
-        computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q);
+        computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q);
 
         // If all of the MaskV bits are known to be zero, then we know the
         // output top bits are zero, because we now know that the output is
@@ -194,8 +239,8 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
   // If an initial sequence of bits in the result is not needed, the
   // corresponding bits in the operands are not needed.
   APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-  computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1, Q);
-  computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q);
+  computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, DL, Depth + 1, Q);
+  computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q);
 
   // Carry in a 1 for a subtract, rather than a 0.
   APInt CarryIn(BitWidth, 0);
@@ -243,11 +288,11 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
 static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
                                 APInt &KnownZero, APInt &KnownOne,
                                 APInt &KnownZero2, APInt &KnownOne2,
-                                const DataLayout *TD, unsigned Depth,
+                                const DataLayout &DL, unsigned Depth,
                                 const Query &Q) {
   unsigned BitWidth = KnownZero.getBitWidth();
-  computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1, Q);
-  computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1, Q);
+  computeKnownBits(Op1, KnownZero, KnownOne, DL, Depth + 1, Q);
+  computeKnownBits(Op0, KnownZero2, KnownOne2, DL, Depth + 1, Q);
 
   bool isKnownNegative = false;
   bool isKnownNonNegative = false;
@@ -268,9 +313,9 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
       // negative or zero.
       if (!isKnownNonNegative)
         isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
-                           isKnownNonZero(Op0, TD, Depth, Q)) ||
+                           isKnownNonZero(Op0, DL, Depth, Q)) ||
                           (isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
-                           isKnownNonZero(Op1, TD, Depth, Q));
+                           isKnownNonZero(Op1, DL, Depth, Q));
     }
   }
 
@@ -382,8 +427,7 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) {
   return false;
 }
 
-static bool isValidAssumeForContext(Value *V, const Query &Q,
-                                    const DataLayout *DL) {
+static bool isValidAssumeForContext(Value *V, const Query &Q) {
   Instruction *Inv = cast<Instruction>(V);
 
   // There are two restrictions on the use of an assume:
@@ -403,8 +447,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q,
       for (BasicBlock::const_iterator I =
              std::next(BasicBlock::const_iterator(Q.CxtI)),
                                       IE(Inv); I != IE; ++I)
-        if (!isSafeToSpeculativelyExecute(I, DL) &&
-            !isAssumeLikeIntrinsic(I))
+        if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
           return false;
 
       return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -428,8 +471,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q,
     for (BasicBlock::const_iterator I =
            std::next(BasicBlock::const_iterator(Q.CxtI)),
                                     IE(Inv); I != IE; ++I)
-      if (!isSafeToSpeculativelyExecute(I, DL) &&
-          !isAssumeLikeIntrinsic(I))
+      if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
         return false;
 
     return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -440,10 +482,9 @@ static bool isValidAssumeForContext(Value *V, const Query &Q,
 
 bool llvm::isValidAssumeForContext(const Instruction *I,
                                    const Instruction *CxtI,
-                                   const DataLayout *DL,
                                    const DominatorTree *DT) {
-  return ::isValidAssumeForContext(const_cast<Instruction*>(I),
-                                   Query(nullptr, CxtI, DT), DL);
+  return ::isValidAssumeForContext(const_cast<Instruction *>(I),
+                                   Query(nullptr, CxtI, DT));
 }
 
 template<typename LHS, typename RHS>
@@ -474,9 +515,181 @@ m_c_Xor(const LHS &L, const RHS &R) {
   return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
 }
 
+/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is
+/// true (at the context instruction.)  This is mostly a utility function for
+/// the prototype dominating conditions reasoning below.
+static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp,
+                                              APInt &KnownZero,
+                                              APInt &KnownOne,
+                                              const DataLayout &DL,
+                                              unsigned Depth, const Query &Q) {
+  Value *LHS = Cmp->getOperand(0);
+  Value *RHS = Cmp->getOperand(1);
+  // TODO: We could potentially be more aggressive here.  This would be worth
+  // evaluating.  If we can, explore commoning this code with the assume
+  // handling logic.
+  if (LHS != V && RHS != V)
+    return;
+
+  const unsigned BitWidth = KnownZero.getBitWidth();
+
+  switch (Cmp->getPredicate()) {
+  default:
+    // We know nothing from this condition
+    break;
+  // TODO: implement unsigned bound from below (known one bits)
+  // TODO: common condition check implementations with assumes
+  // TODO: implement other patterns from assume (e.g. V & B == A)
+  case ICmpInst::ICMP_SGT:
+    if (LHS == V) {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) {
+        // We know that the sign bit is zero.
+        KnownZero |= APInt::getSignBit(BitWidth);
+      }
+    }
+    break;
+  case ICmpInst::ICMP_EQ:
+    if (LHS == V)
+      computeKnownBits(RHS, KnownZero, KnownOne, DL, Depth + 1, Q);
+    else if (RHS == V)
+      computeKnownBits(LHS, KnownZero, KnownOne, DL, Depth + 1, Q);
+    else
+      llvm_unreachable("missing use?");
+    break;
+  case ICmpInst::ICMP_ULE:
+    if (LHS == V) {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      // The known zero bits carry over
+      unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+      KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+    }
+    break;
+  case ICmpInst::ICMP_ULT:
+    if (LHS == V) {
+      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+      // Whatever high bits in rhs are zero are known to be zero (if rhs is a
+      // power of 2, then one more).
+      unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+      if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp), DL))
+        SignBits++;
+      KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+    }
+    break;
+  };
+}
+
+/// Compute known bits in 'V' from conditions which are known to be true along
+/// all paths leading to the context instruction.  In particular, look for
+/// cases where one branch of an interesting condition dominates the context
+/// instruction.  This does not do general dataflow.
+/// NOTE: This code is EXPERIMENTAL and currently off by default.
+static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
+                                                    APInt &KnownOne,
+                                                    const DataLayout &DL,
+                                                    unsigned Depth,
+                                                    const Query &Q) {
+  // Need both the dominator tree and the query location to do anything useful
+  if (!Q.DT || !Q.CxtI)
+    return;
+  Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
+
+  // Avoid useless work
+  if (auto VI = dyn_cast<Instruction>(V))
+    if (VI->getParent() == Cxt->getParent())
+      return;
+
+  // Note: We currently implement two options.  It's not clear which of these
+  // will survive long term, we need data for that.
+  // Option 1 - Try walking the dominator tree looking for conditions which
+  // might apply.  This works well for local conditions (loop guards, etc..),
+  // but not as well for things far from the context instruction (presuming a
+  // low max blocks explored).  If we can set an high enough limit, this would
+  // be all we need.
+  // Option 2 - We restrict out search to those conditions which are uses of
+  // the value we're interested in.  This is independent of dom structure,
+  // but is slightly less powerful without looking through lots of use chains.
+  // It does handle conditions far from the context instruction (e.g. early
+  // function exits on entry) really well though.
+
+  // Option 1 - Search the dom tree
+  unsigned NumBlocksExplored = 0;
+  BasicBlock *Current = Cxt->getParent();
+  while (true) {
+    // Stop searching if we've gone too far up the chain
+    if (NumBlocksExplored >= DomConditionsMaxDomBlocks)
+      break;
+    NumBlocksExplored++;
+
+    if (!Q.DT->getNode(Current)->getIDom())
+      break;
+    Current = Q.DT->getNode(Current)->getIDom()->getBlock();
+    if (!Current)
+      // found function entry
+      break;
+
+    BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator());
+    if (!BI || BI->isUnconditional())
+      continue;
+    ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition());
+    if (!Cmp)
+      continue;
+
+    // We're looking for conditions that are guaranteed to hold at the context
+    // instruction.  Finding a condition where one path dominates the context
+    // isn't enough because both the true and false cases could merge before
+    // the context instruction we're actually interested in.  Instead, we need
+    // to ensure that the taken *edge* dominates the context instruction.
+    BasicBlock *BB0 = BI->getSuccessor(0);
+    BasicBlockEdge Edge(BI->getParent(), BB0);
+    if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+      continue;
+
+    computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+                                      Q);
+  }
+
+  // Option 2 - Search the other uses of V
+  unsigned NumUsesExplored = 0;
+  for (auto U : V->users()) {
+    // Avoid massive lists
+    if (NumUsesExplored >= DomConditionsMaxUses)
+      break;
+    NumUsesExplored++;
+    // Consider only compare instructions uniquely controlling a branch
+    ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
+    if (!Cmp)
+      continue;
+
+    if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
+      continue;
+
+    for (auto *CmpU : Cmp->users()) {
+      BranchInst *BI = dyn_cast<BranchInst>(CmpU);
+      if (!BI || BI->isUnconditional())
+        continue;
+      // We're looking for conditions that are guaranteed to hold at the
+      // context instruction.  Finding a condition where one path dominates
+      // the context isn't enough because both the true and false cases could
+      // merge before the context instruction we're actually interested in.
+      // Instead, we need to ensure that the taken *edge* dominates the context
+      // instruction. 
+      BasicBlock *BB0 = BI->getSuccessor(0);
+      BasicBlockEdge Edge(BI->getParent(), BB0);
+      if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+        continue;
+
+      computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+                                        Q);
+    }
+  }
+}
+
 static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
-                                       APInt &KnownOne,
-                                       const DataLayout *DL,
+                                       APInt &KnownOne, const DataLayout &DL,
                                        unsigned Depth, const Query &Q) {
   // Use of assumptions is context-sensitive. If we don't have a context, we
   // cannot use them!
@@ -498,14 +711,12 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
     // We're running this loop for once for each value queried resulting in a
     // runtime of ~O(#assumes * #values).
 
-    assert(isa<IntrinsicInst>(I) &&
-           dyn_cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::assume &&
+    assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
            "must be an assume intrinsic");
-    
+
     Value *Arg = I->getArgOperand(0);
 
-    if (Arg == V &&
-        isValidAssumeForContext(I, Q, DL)) {
+    if (Arg == V && isValidAssumeForContext(I, Q)) {
       assert(BitWidth == 1 && "assume operand is not i1?");
       KnownZero.clearAllBits();
       KnownOne.setAllBits();
@@ -525,15 +736,15 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
     ConstantInt *C;
     // assume(v = a)
     if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
-        Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+        Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
       KnownZero |= RHSKnownZero;
       KnownOne  |= RHSKnownOne;
     // assume(v & b = a)
-    } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)),
-                                   m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+    } else if (match(Arg,
+                     m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
       APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
@@ -546,7 +757,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
     // assume(~(v & b) = a)
     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
                                    m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
       APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
@@ -557,9 +768,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
       KnownZero |= RHSKnownOne  & MaskKnownOne;
       KnownOne  |= RHSKnownZero & MaskKnownOne;
     // assume(v | b = a)
-    } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)),
-                                   m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+    } else if (match(Arg,
+                     m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
       APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
@@ -572,7 +783,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
     // assume(~(v | b) = a)
     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
                                    m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
       APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
@@ -583,9 +794,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
       KnownZero |= RHSKnownOne  & BKnownZero;
       KnownOne  |= RHSKnownZero & BKnownZero;
     // assume(v ^ b = a)
-    } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)),
-                                   m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+    } else if (match(Arg,
+                     m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
       APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
@@ -601,7 +812,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
     // assume(~(v ^ b) = a)
     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
                                    m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
       APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
@@ -617,7 +828,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
     // assume(v << c = a)
     } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
                                    m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
       // For those bits in RHS that are known, we can propagate them to known
@@ -627,7 +838,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
     // assume(~(v << c) = a)
     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
                                    m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
       // For those bits in RHS that are known, we can propagate them inverted
@@ -637,10 +848,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
     // assume(v >> c = a)
     } else if (match(Arg,
                      m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
-                                                  m_AShr(m_V,
-                                                         m_ConstantInt(C))),
-                                     m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+                                                m_AShr(m_V, m_ConstantInt(C))),
+                              m_Value(A))) &&
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
       // For those bits in RHS that are known, we can propagate them to known
@@ -649,10 +859,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
       KnownOne  |= RHSKnownOne  << C->getZExtValue();
     // assume(~(v >> c) = a)
     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr(
-                                              m_LShr(m_V, m_ConstantInt(C)),
-                                              m_AShr(m_V, m_ConstantInt(C)))),
+                                             m_LShr(m_V, m_ConstantInt(C)),
+                                             m_AShr(m_V, m_ConstantInt(C)))),
                                    m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+               Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
       // For those bits in RHS that are known, we can propagate them inverted
@@ -661,8 +871,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
       KnownOne  |= RHSKnownZero << C->getZExtValue();
     // assume(v >=_s c) where c is non-negative
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
-               Pred == ICmpInst::ICMP_SGE &&
-               isValidAssumeForContext(I, Q, DL)) {
+               Pred == ICmpInst::ICMP_SGE && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
 
@@ -672,8 +881,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
       }
     // assume(v >_s c) where c is at least -1.
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
-               Pred == ICmpInst::ICMP_SGT &&
-               isValidAssumeForContext(I, Q, DL)) {
+               Pred == ICmpInst::ICMP_SGT && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
 
@@ -683,8 +891,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
       }
     // assume(v <=_s c) where c is negative
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
-               Pred == ICmpInst::ICMP_SLE &&
-               isValidAssumeForContext(I, Q, DL)) {
+               Pred == ICmpInst::ICMP_SLE && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
 
@@ -694,8 +901,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
       }
     // assume(v <_s c) where c is non-positive
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
-               Pred == ICmpInst::ICMP_SLT &&
-               isValidAssumeForContext(I, Q, DL)) {
+               Pred == ICmpInst::ICMP_SLT && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
 
@@ -705,8 +911,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
       }
     // assume(v <=_u c)
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
-               Pred == ICmpInst::ICMP_ULE &&
-               isValidAssumeForContext(I, Q, DL)) {
+               Pred == ICmpInst::ICMP_ULE && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
 
@@ -715,14 +920,13 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
         APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
     // assume(v <_u c)
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
-               Pred == ICmpInst::ICMP_ULT &&
-               isValidAssumeForContext(I, Q, DL)) {
+               Pred == ICmpInst::ICMP_ULT && isValidAssumeForContext(I, Q)) {
       APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
       computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
 
       // Whatever high bits in c are zero are known to be zero (if c is a power
       // of 2, then one more).
-      if (isKnownToBeAPowerOfTwo(A, false, Depth+1, Query(Q, I)))
+      if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I), DL))
         KnownZero |=
           APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1);
       else
@@ -743,13 +947,12 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
 /// this won't lose us code quality.
 ///
 /// This function is defined on values with integer type, values with pointer
-/// type (but only if TD is non-null), and vectors of integers.  In the case
+/// type, and vectors of integers.  In the case
 /// where V is a vector, known zero, and known one values are the
 /// same width as the vector element, and the bit is set only if it is true
 /// for all of the elements in the vector.
 void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
-                      const DataLayout *TD, unsigned Depth,
-                      const Query &Q) {
+                      const DataLayout &DL, unsigned Depth, const Query &Q) {
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
   unsigned BitWidth = KnownZero.getBitWidth();
@@ -757,8 +960,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   assert((V->getType()->isIntOrIntVectorTy() ||
           V->getType()->getScalarType()->isPointerTy()) &&
          "Not integer or pointer type!");
-  assert((!TD ||
-          TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
+  assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
          (!V->getType()->isIntOrIntVectorTy() ||
           V->getType()->getScalarSizeInBits() == BitWidth) &&
          KnownZero.getBitWidth() == BitWidth &&
@@ -797,7 +999,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   // The address of an aligned GlobalValue has trailing zeros.
   if (auto *GO = dyn_cast<GlobalObject>(V)) {
     unsigned Align = GO->getAlignment();
-    if (Align == 0 && TD) {
+    if (Align == 0) {
       if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
         Type *ObjectType = GVar->getType()->getElementType();
         if (ObjectType->isSized()) {
@@ -805,9 +1007,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
           // it the preferred alignment. Otherwise, we have to assume that it
           // may only have the minimum ABI alignment.
           if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
-            Align = TD->getPreferredAlignment(GVar);
+            Align = DL.getPreferredAlignment(GVar);
           else
-            Align = TD->getABITypeAlignment(ObjectType);
+            Align = DL.getABITypeAlignment(ObjectType);
         }
       }
     }
@@ -823,11 +1025,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   if (Argument *A = dyn_cast<Argument>(V)) {
     unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
 
-    if (!Align && TD && A->hasStructRetAttr()) {
+    if (!Align && A->hasStructRetAttr()) {
       // An sret parameter has at least the ABI alignment of the return type.
       Type *EltTy = cast<PointerType>(A->getType())->getElementType();
       if (EltTy->isSized())
-        Align = TD->getABITypeAlignment(EltTy);
+        Align = DL.getABITypeAlignment(EltTy);
     }
 
     if (Align)
@@ -838,7 +1040,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
 
     // Don't give up yet... there might be an assumption that provides more
     // information...
-    computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q);
+    computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
+
+    // Or a dominating condition for that matter
+    if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+      computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL,
+                                              Depth, Q);
     return;
   }
 
@@ -854,12 +1061,18 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   // the bits of its aliasee.
   if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
     if (!GA->mayBeOverridden())
-      computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth + 1, Q);
+      computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, DL, Depth + 1, Q);
     return;
   }
 
   // Check whether a nearby assume intrinsic can determine some known bits.
-  computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q);
+  computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
+
+  // Check whether there's a dominating condition which implies something about
+  // this value at the given context.
+  if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+    computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth,
+                                            Q);
 
   Operator *I = dyn_cast<Operator>(V);
   if (!I) return;
@@ -873,8 +1086,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     break;
   case Instruction::And: {
     // If either the LHS or the RHS are Zero, the result is zero.
-    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
-    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
+    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
 
     // Output known-1 bits are only known if set in both the LHS & RHS.
     KnownOne &= KnownOne2;
@@ -883,8 +1096,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     break;
   }
   case Instruction::Or: {
-    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
-    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
+    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
 
     // Output known-0 bits are only known if clear in both the LHS & RHS.
     KnownZero &= KnownZero2;
@@ -893,8 +1106,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     break;
   }
   case Instruction::Xor: {
-    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
-    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
+    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
 
     // Output known-0 bits are known if clear or set in both the LHS & RHS.
     APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
@@ -905,21 +1118,20 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   }
   case Instruction::Mul: {
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
-    computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW,
-                         KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
-                         Depth, Q);
+    computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero,
+                        KnownOne, KnownZero2, KnownOne2, DL, Depth, Q);
     break;
   }
   case Instruction::UDiv: {
     // For the purposes of computing leading zeros we can conservatively
     // treat a udiv as a logical right shift by the power of 2 known to
     // be less than the denominator.
-    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
     unsigned LeadZ = KnownZero2.countLeadingOnes();
 
     KnownOne2.clearAllBits();
     KnownZero2.clearAllBits();
-    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
+    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q);
     unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
     if (RHSUnknownLeadingOnes != BitWidth)
       LeadZ = std::min(BitWidth,
@@ -929,8 +1141,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     break;
   }
   case Instruction::Select:
-    computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1, Q);
-    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
+    computeKnownBits(I->getOperand(2), KnownZero, KnownOne, DL, Depth + 1, Q);
+    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q);
 
     // Only known if known in both the LHS and RHS.
     KnownOne &= KnownOne2;
@@ -946,8 +1158,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   case Instruction::PtrToInt:
   case Instruction::IntToPtr:
   case Instruction::AddrSpaceCast: // Pointers could be different sizes.
-    // We can't handle these if we don't know the pointer size.
-    if (!TD) break;
     // FALL THROUGH and handle them the same as zext/trunc.
   case Instruction::ZExt:
   case Instruction::Trunc: {
@@ -956,17 +1166,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     unsigned SrcBitWidth;
     // Note that we handle pointer operands here because of inttoptr/ptrtoint
     // which fall through here.
-    if(TD) {
-      SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType());
-    } else {
-      SrcBitWidth = SrcTy->getScalarSizeInBits();
-      if (!SrcBitWidth) break;
-    }
+    SrcBitWidth = DL.getTypeSizeInBits(SrcTy->getScalarType());
 
     assert(SrcBitWidth && "SrcBitWidth can't be zero");
     KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
     KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
-    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
     KnownZero = KnownZero.zextOrTrunc(BitWidth);
     KnownOne = KnownOne.zextOrTrunc(BitWidth);
     // Any top bits are known to be zero.
@@ -980,7 +1185,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
         // TODO: For now, not handling conversions like:
         // (bitcast i64 %x to <2 x i32>)
         !I->getType()->isVectorTy()) {
-      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
       break;
     }
     break;
@@ -991,7 +1196,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
 
     KnownZero = KnownZero.trunc(SrcBitWidth);
     KnownOne = KnownOne.trunc(SrcBitWidth);
-    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
 
@@ -1007,7 +1212,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
       KnownZero <<= ShiftAmt;
       KnownOne  <<= ShiftAmt;
       KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
@@ -1020,7 +1225,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
 
       // Unsigned shift right.
-      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
       // high bits known zero.
@@ -1034,7 +1239,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
 
       // Signed shift right.
-      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
 
@@ -1048,15 +1253,15 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   case Instruction::Sub: {
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
     computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
-                            KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
-                            Depth, Q);
+                           KnownZero, KnownOne, KnownZero2, KnownOne2, DL,
+                           Depth, Q);
     break;
   }
   case Instruction::Add: {
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
     computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
-                            KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
-                            Depth, Q);
+                           KnownZero, KnownOne, KnownZero2, KnownOne2, DL,
+                           Depth, Q);
     break;
   }
   case Instruction::SRem:
@@ -1064,8 +1269,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
       APInt RA = Rem->getValue().abs();
       if (RA.isPowerOf2()) {
         APInt LowBits = RA - 1;
-        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD,
-                         Depth+1, Q);
+        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1,
+                         Q);
 
         // The low bits of the first operand are unchanged by the srem.
         KnownZero = KnownZero2 & LowBits;
@@ -1089,8 +1294,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     // remainder is zero.
     if (KnownZero.isNonNegative()) {
       APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD,
-                       Depth+1, Q);
+      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, DL,
+                       Depth + 1, Q);
       // If it's known zero, our sign bit is also zero.
       if (LHSKnownZero.isNegative())
         KnownZero.setBit(BitWidth - 1);
@@ -1102,8 +1307,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
       APInt RA = Rem->getValue();
       if (RA.isPowerOf2()) {
         APInt LowBits = (RA - 1);
-        computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD,
-                         Depth+1, Q);
+        computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1,
+                         Q);
         KnownZero |= ~LowBits;
         KnownOne &= LowBits;
         break;
@@ -1112,8 +1317,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
 
     // Since the result is less than or equal to either operand, any leading
     // zero bits in either operand must also exist in the result.
-    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
-    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
+    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q);
 
     unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
                                 KnownZero2.countLeadingOnes());
@@ -1125,8 +1330,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   case Instruction::Alloca: {
     AllocaInst *AI = cast<AllocaInst>(V);
     unsigned Align = AI->getAlignment();
-    if (Align == 0 && TD)
-      Align = TD->getABITypeAlignment(AI->getType()->getElementType());
+    if (Align == 0)
+      Align = DL.getABITypeAlignment(AI->getType()->getElementType());
 
     if (Align > 0)
       KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
@@ -1136,8 +1341,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     // Analyze all of the subscripts of this getelementptr instruction
     // to determine if we can prove known low zero bits.
     APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
-    computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD,
-                     Depth+1, Q);
+    computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, DL,
+                     Depth + 1, Q);
     unsigned TrailZ = LocalKnownZero.countTrailingOnes();
 
     gep_type_iterator GTI = gep_type_begin(I);
@@ -1145,10 +1350,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
       Value *Index = I->getOperand(i);
       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
         // Handle struct member offset arithmetic.
-        if (!TD) {
-          TrailZ = 0;
-          break;
-        }
 
         // Handle case when index is vector zeroinitializer
         Constant *CIndex = cast<Constant>(Index);
@@ -1159,7 +1360,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
           Index = CIndex->getSplatValue();
 
         unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
-        const StructLayout *SL = TD->getStructLayout(STy);
+        const StructLayout *SL = DL.getStructLayout(STy);
         uint64_t Offset = SL->getElementOffset(Idx);
         TrailZ = std::min<unsigned>(TrailZ,
                                     countTrailingZeros(Offset));
@@ -1171,9 +1372,10 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
           break;
         }
         unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
-        uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
+        uint64_t TypeSize = DL.getTypeAllocSize(IndexedTy);
         LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
-        computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1, Q);
+        computeKnownBits(Index, LocalKnownZero, LocalKnownOne, DL, Depth + 1,
+                         Q);
         TrailZ = std::min(TrailZ,
                           unsigned(countTrailingZeros(TypeSize) +
                                    LocalKnownZero.countTrailingOnes()));
@@ -1215,11 +1417,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
             break;
           // Ok, we have a PHI of the form L op= R. Check for low
           // zero bits.
-          computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1, Q);
+          computeKnownBits(R, KnownZero2, KnownOne2, DL, Depth + 1, Q);
 
           // We need to take the minimum number of known bits
           APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
-          computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1, Q);
+          computeKnownBits(L, KnownZero3, KnownOne3, DL, Depth + 1, Q);
 
           KnownZero = APInt::getLowBitsSet(BitWidth,
                                            std::min(KnownZero2.countTrailingOnes(),
@@ -1242,16 +1444,16 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
 
       KnownZero = APInt::getAllOnesValue(BitWidth);
       KnownOne = APInt::getAllOnesValue(BitWidth);
-      for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) {
+      for (Value *IncValue : P->incoming_values()) {
         // Skip direct self references.
-        if (P->getIncomingValue(i) == P) continue;
+        if (IncValue == P) continue;
 
         KnownZero2 = APInt(BitWidth, 0);
         KnownOne2 = APInt(BitWidth, 0);
         // Recurse, but cap the recursion to one level, because we don't
         // want to waste time spinning around in loops.
-        computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD,
-                         MaxDepth-1, Q);
+        computeKnownBits(IncValue, KnownZero2, KnownOne2, DL,
+                         MaxDepth - 1, Q);
         KnownZero &= KnownZero2;
         KnownOne &= KnownOne2;
         // If all bits have been ruled out, there's no need to check
@@ -1303,19 +1505,19 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
         case Intrinsic::sadd_with_overflow:
           computeKnownBitsAddSub(true, II->getArgOperand(0),
                                  II->getArgOperand(1), false, KnownZero,
-                                 KnownOne, KnownZero2, KnownOne2, TD, Depth, Q);
+                                 KnownOne, KnownZero2, KnownOne2, DL, Depth, Q);
           break;
         case Intrinsic::usub_with_overflow:
         case Intrinsic::ssub_with_overflow:
           computeKnownBitsAddSub(false, II->getArgOperand(0),
                                  II->getArgOperand(1), false, KnownZero,
-                                 KnownOne, KnownZero2, KnownOne2, TD, Depth, Q);
+                                 KnownOne, KnownZero2, KnownOne2, DL, Depth, Q);
           break;
         case Intrinsic::umul_with_overflow:
         case Intrinsic::smul_with_overflow:
-          computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1),
-                              false, KnownZero, KnownOne,
-                              KnownZero2, KnownOne2, TD, Depth, Q);
+          computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
+                              KnownZero, KnownOne, KnownZero2, KnownOne2, DL,
+                              Depth, Q);
           break;
         }
       }
@@ -1328,9 +1530,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
 /// Determine whether the sign bit is known to be zero or one.
 /// Convenience wrapper around computeKnownBits.
 void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
-                    const DataLayout *TD, unsigned Depth,
-                    const Query &Q) {
-  unsigned BitWidth = getBitWidth(V->getType(), TD);
+                    const DataLayout &DL, unsigned Depth, const Query &Q) {
+  unsigned BitWidth = getBitWidth(V->getType(), DL);
   if (!BitWidth) {
     KnownZero = false;
     KnownOne = false;
@@ -1338,7 +1539,7 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
   }
   APInt ZeroBits(BitWidth, 0);
   APInt OneBits(BitWidth, 0);
-  computeKnownBits(V, ZeroBits, OneBits, TD, Depth, Q);
+  computeKnownBits(V, ZeroBits, OneBits, DL, Depth, Q);
   KnownOne = OneBits[BitWidth - 1];
   KnownZero = ZeroBits[BitWidth - 1];
 }
@@ -1348,7 +1549,7 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
 /// be a power of two when defined. Supports values with integer or pointer
 /// types and vectors of integers.
 bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
-                            const Query &Q) {
+                            const Query &Q, const DataLayout &DL) {
   if (Constant *C = dyn_cast<Constant>(V)) {
     if (C->isNullValue())
       return OrZero;
@@ -1375,20 +1576,19 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
   // A shift of a power of two is a power of two or zero.
   if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
                  match(V, m_Shr(m_Value(X), m_Value()))))
-    return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q);
+    return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL);
 
   if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
-    return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
+    return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q, DL);
 
   if (SelectInst *SI = dyn_cast<SelectInst>(V))
-    return
-      isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
-      isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q);
+    return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q, DL) &&
+           isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q, DL);
 
   if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
     // A power of two and'd with anything is a power of two or zero.
-    if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q) ||
-        isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth, Q))
+    if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL) ||
+        isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q, DL))
       return true;
     // X & (-X) is always a power of two or zero.
     if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
@@ -1403,19 +1603,19 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
     if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
       if (match(X, m_And(m_Specific(Y), m_Value())) ||
           match(X, m_And(m_Value(), m_Specific(Y))))
-        if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
+        if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q, DL))
           return true;
       if (match(Y, m_And(m_Specific(X), m_Value())) ||
           match(Y, m_And(m_Value(), m_Specific(X))))
-        if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q))
+        if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q, DL))
           return true;
 
       unsigned BitWidth = V->getType()->getScalarSizeInBits();
       APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0);
-      computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth, Q);
+      computeKnownBits(X, LHSZeroBits, LHSOneBits, DL, Depth, Q);
 
       APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0);
-      computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth, Q);
+      computeKnownBits(Y, RHSZeroBits, RHSOneBits, DL, Depth, Q);
       // If i8 V is a power of two or zero:
       //  ZeroBits: 1 1 1 0 1 1 1 1
       // ~ZeroBits: 0 0 0 1 0 0 0 0
@@ -1433,7 +1633,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
   if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
       match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
     return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero,
-                                  Depth, Q);
+                                  Depth, Q, DL);
   }
 
   return false;
@@ -1445,7 +1645,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
 /// to be non-null.
 ///
 /// Currently this routine does not support vector GEPs.
-static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
+static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout &DL,
                               unsigned Depth, const Query &Q) {
   if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
     return false;
@@ -1458,10 +1658,6 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
   if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth, Q))
     return true;
 
-  // Past this, if we don't have DataLayout, we can't do much.
-  if (!DL)
-    return false;
-
   // Walk the GEP operands and see if any operand introduces a non-zero offset.
   // If so, then the GEP cannot produce a null pointer, as doing so would
   // inherently violate the inbounds contract within address space zero.
@@ -1471,7 +1667,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
     if (StructType *STy = dyn_cast<StructType>(*GTI)) {
       ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
       unsigned ElementIdx = OpC->getZExtValue();
-      const StructLayout *SL = DL->getStructLayout(STy);
+      const StructLayout *SL = DL.getStructLayout(STy);
       uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
       if (ElementOffset > 0)
         return true;
@@ -1479,7 +1675,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
     }
 
     // If we have a zero-sized type, the index doesn't matter. Keep looping.
-    if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0)
+    if (DL.getTypeAllocSize(GTI.getIndexedType()) == 0)
       continue;
 
     // Fast path the constant operand case both for efficiency and so we don't
@@ -1528,7 +1724,7 @@ static bool rangeMetadataExcludesValue(MDNode* Ranges,
 /// For vectors return true if every element is known to be non-zero when
 /// defined. Supports values with integer or pointer type and vectors of
 /// integers.
-bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
                     const Query &Q) {
   if (Constant *C = dyn_cast<Constant>(V)) {
     if (C->isNullValue())
@@ -1561,21 +1757,20 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
     if (isKnownNonNull(V))
       return true; 
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
-      if (isGEPKnownNonNull(GEP, TD, Depth, Q))
+      if (isGEPKnownNonNull(GEP, DL, Depth, Q))
         return true;
   }
 
-  unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD);
+  unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), DL);
 
   // X | Y != 0 if X != 0 or Y != 0.
   Value *X = nullptr, *Y = nullptr;
   if (match(V, m_Or(m_Value(X), m_Value(Y))))
-    return isKnownNonZero(X, TD, Depth, Q) ||
-           isKnownNonZero(Y, TD, Depth, Q);
+    return isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q);
 
   // ext X != 0 if X != 0.
   if (isa<SExtInst>(V) || isa<ZExtInst>(V))
-    return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth, Q);
+    return isKnownNonZero(cast<Instruction>(V)->getOperand(0), DL, Depth, Q);
 
   // shl X, Y != 0 if X is odd.  Note that the value of the shift is undefined
   // if the lowest bit is shifted off the end.
@@ -1583,11 +1778,11 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
     // shl nuw can't remove any non-zero bits.
     OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
     if (BO->hasNoUnsignedWrap())
-      return isKnownNonZero(X, TD, Depth, Q);
+      return isKnownNonZero(X, DL, Depth, Q);
 
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
-    computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q);
+    computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q);
     if (KnownOne[0])
       return true;
   }
@@ -1597,29 +1792,28 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
     // shr exact can only shift out zero bits.
     PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
     if (BO->isExact())
-      return isKnownNonZero(X, TD, Depth, Q);
+      return isKnownNonZero(X, DL, Depth, Q);
 
     bool XKnownNonNegative, XKnownNegative;
-    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q);
+    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q);
     if (XKnownNegative)
       return true;
   }
   // div exact can only produce a zero if the dividend is zero.
   else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
-    return isKnownNonZero(X, TD, Depth, Q);
+    return isKnownNonZero(X, DL, Depth, Q);
   }
   // X + Y.
   else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
     bool XKnownNonNegative, XKnownNegative;
     bool YKnownNonNegative, YKnownNegative;
-    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q);
-    ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth, Q);
+    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q);
+    ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, DL, Depth, Q);
 
     // If X and Y are both non-negative (as signed values) then their sum is not
     // zero unless both X and Y are zero.
     if (XKnownNonNegative && YKnownNonNegative)
-      if (isKnownNonZero(X, TD, Depth, Q) ||
-          isKnownNonZero(Y, TD, Depth, Q))
+      if (isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q))
         return true;
 
     // If X and Y are both negative (as signed values) then their sum is not
@@ -1630,22 +1824,22 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
       APInt Mask = APInt::getSignedMaxValue(BitWidth);
       // The sign bit of X is set.  If some other bit is set then X is not equal
       // to INT_MIN.
-      computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q);
+      computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q);
       if ((KnownOne & Mask) != 0)
         return true;
       // The sign bit of Y is set.  If some other bit is set then Y is not equal
       // to INT_MIN.
-      computeKnownBits(Y, KnownZero, KnownOne, TD, Depth, Q);
+      computeKnownBits(Y, KnownZero, KnownOne, DL, Depth, Q);
       if ((KnownOne & Mask) != 0)
         return true;
     }
 
     // The sum of a non-negative number and a power of two is not zero.
     if (XKnownNonNegative &&
-        isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth, Q))
+        isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q, DL))
       return true;
     if (YKnownNonNegative &&
-        isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth, Q))
+        isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q, DL))
       return true;
   }
   // X * Y.
@@ -1654,21 +1848,20 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
     // If X and Y are non-zero then so is X * Y as long as the multiplication
     // does not overflow.
     if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
-        isKnownNonZero(X, TD, Depth, Q) &&
-        isKnownNonZero(Y, TD, Depth, Q))
+        isKnownNonZero(X, DL, Depth, Q) && isKnownNonZero(Y, DL, Depth, Q))
       return true;
   }
   // (C ? X : Y) != 0 if X != 0 and Y != 0.
   else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
-    if (isKnownNonZero(SI->getTrueValue(), TD, Depth, Q) &&
-        isKnownNonZero(SI->getFalseValue(), TD, Depth, Q))
+    if (isKnownNonZero(SI->getTrueValue(), DL, Depth, Q) &&
+        isKnownNonZero(SI->getFalseValue(), DL, Depth, Q))
       return true;
   }
 
   if (!BitWidth) return false;
   APInt KnownZero(BitWidth, 0);
   APInt KnownOne(BitWidth, 0);
-  computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
+  computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q);
   return KnownOne != 0;
 }
 
@@ -1677,15 +1870,14 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
 /// cannot have.
 ///
 /// This function is defined on values with integer type, values with pointer
-/// type (but only if TD is non-null), and vectors of integers.  In the case
+/// type, and vectors of integers.  In the case
 /// where V is a vector, the mask, known zero, and known one values are the
 /// same width as the vector element, and the bit is set only if it is true
 /// for all of the elements in the vector.
-bool MaskedValueIsZero(Value *V, const APInt &Mask,
-                       const DataLayout *TD, unsigned Depth,
-                       const Query &Q) {
+bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
+                       unsigned Depth, const Query &Q) {
   APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
-  computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
+  computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q);
   return (KnownZero & Mask) == Mask;
 }
 
@@ -1699,14 +1891,9 @@ bool MaskedValueIsZero(Value *V, const APInt &Mask,
 ///
 /// 'Op' must have a scalar integer type.
 ///
-unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
-                            unsigned Depth, const Query &Q) {
-  assert((TD || V->getType()->isIntOrIntVectorTy()) &&
-         "ComputeNumSignBits requires a DataLayout object to operate "
-         "on non-integer values!");
-  Type *Ty = V->getType();
-  unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) :
-                         Ty->getScalarSizeInBits();
+unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth,
+                            const Query &Q) {
+  unsigned TyBits = DL.getTypeSizeInBits(V->getType()->getScalarType());
   unsigned Tmp, Tmp2;
   unsigned FirstAnswer = 1;
 
@@ -1721,10 +1908,63 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
   default: break;
   case Instruction::SExt:
     Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
-    return ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q) + Tmp;
+    return ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q) + Tmp;
+
+  case Instruction::SDiv: {
+    const APInt *Denominator;
+    // sdiv X, C -> adds log(C) sign bits.
+    if (match(U->getOperand(1), m_APInt(Denominator))) {
+
+      // Ignore non-positive denominator.
+      if (!Denominator->isStrictlyPositive())
+        break;
+
+      // Calculate the incoming numerator bits.
+      unsigned NumBits = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
+
+      // Add floor(log(C)) bits to the numerator bits.
+      return std::min(TyBits, NumBits + Denominator->logBase2());
+    }
+    break;
+  }
+
+  case Instruction::SRem: {
+    const APInt *Denominator;
+    // srem X, C -> we know that the result is within [-C+1,C) when C is a
+    // positive constant.  This let us put a lower bound on the number of sign
+    // bits.
+    if (match(U->getOperand(1), m_APInt(Denominator))) {
+
+      // Ignore non-positive denominator.
+      if (!Denominator->isStrictlyPositive())
+        break;
+
+      // Calculate the incoming numerator bits. SRem by a positive constant
+      // can't lower the number of sign bits.
+      unsigned NumrBits =
+          ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
+
+      // Calculate the leading sign bit constraints by examining the
+      // denominator.  Given that the denominator is positive, there are two
+      // cases:
+      //
+      //  1. the numerator is positive.  The result range is [0,C) and [0,C) u<
+      //     (1 << ceilLogBase2(C)).
+      //
+      //  2. the numerator is negative.  Then the result range is (-C,0] and
+      //     integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
+      //
+      // Thus a lower bound on the number of sign bits is `TyBits -
+      // ceilLogBase2(C)`.
+
+      unsigned ResBits = TyBits - Denominator->ceilLogBase2();
+      return std::max(NumrBits, ResBits);
+    }
+    break;
+  }
 
   case Instruction::AShr: {
-    Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+    Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
     // ashr X, C   -> adds C sign bits.  Vectors too.
     const APInt *ShAmt;
     if (match(U->getOperand(1), m_APInt(ShAmt))) {
@@ -1737,7 +1977,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
     const APInt *ShAmt;
     if (match(U->getOperand(1), m_APInt(ShAmt))) {
       // shl destroys sign bits.
-      Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+      Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
       Tmp2 = ShAmt->getZExtValue();
       if (Tmp2 >= TyBits ||      // Bad shift.
           Tmp2 >= Tmp) break;    // Shifted all sign bits out.
@@ -1749,9 +1989,9 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
   case Instruction::Or:
   case Instruction::Xor:    // NOT is handled here.
     // Logical binary ops preserve the number of sign bits at the worst.
-    Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+    Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
     if (Tmp != 1) {
-      Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
+      Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
       FirstAnswer = std::min(Tmp, Tmp2);
       // We computed what we know about the sign bits as our first
       // answer. Now proceed to the generic code that uses
@@ -1760,22 +2000,23 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
     break;
 
   case Instruction::Select:
-    Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
+    Tmp = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
     if (Tmp == 1) return 1;  // Early out.
-    Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1, Q);
+    Tmp2 = ComputeNumSignBits(U->getOperand(2), DL, Depth + 1, Q);
     return std::min(Tmp, Tmp2);
 
   case Instruction::Add:
     // Add can have at most one carry bit.  Thus we know that the output
     // is, at worst, one more bit than the inputs.
-    Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+    Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
     if (Tmp == 1) return 1;  // Early out.
 
     // Special case decrementing a value (ADD X, -1):
     if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
       if (CRHS->isAllOnesValue()) {
         APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
-        computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+        computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, Depth + 1,
+                         Q);
 
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
@@ -1788,19 +2029,20 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
           return Tmp;
       }
 
-    Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
+    Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
     if (Tmp2 == 1) return 1;
     return std::min(Tmp, Tmp2)-1;
 
   case Instruction::Sub:
-    Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
+    Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
     if (Tmp2 == 1) return 1;
 
     // Handle NEG.
     if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
       if (CLHS->isNullValue()) {
         APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
-        computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
+        computeKnownBits(U->getOperand(1), KnownZero, KnownOne, DL, Depth + 1,
+                         Q);
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
         if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
@@ -1816,7 +2058,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
 
     // Sub can have at most one carry bit.  Thus we know that the output
     // is, at worst, one more bit than the inputs.
-    Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+    Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
     if (Tmp == 1) return 1;  // Early out.
     return std::min(Tmp, Tmp2)-1;
 
@@ -1830,12 +2072,11 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
 
     // Take the minimum of all incoming values.  This can't infinitely loop
     // because of our depth threshold.
-    Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1, Q);
+    Tmp = ComputeNumSignBits(PN->getIncomingValue(0), DL, Depth + 1, Q);
     for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
       if (Tmp == 1) return Tmp;
-      Tmp = std::min(Tmp,
-                     ComputeNumSignBits(PN->getIncomingValue(i), TD,
-                                        Depth+1, Q));
+      Tmp = std::min(
+          Tmp, ComputeNumSignBits(PN->getIncomingValue(i), DL, Depth + 1, Q));
     }
     return Tmp;
   }
@@ -1850,7 +2091,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
   // use this information.
   APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
   APInt Mask;
-  computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
+  computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q);
 
   if (KnownZero.isNegative()) {        // sign bit is 0
     Mask = KnownZero;
@@ -2000,8 +2241,11 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
     return !CFP->getValueAPF().isNegZero();
 
+  // FIXME: Magic number! At the least, this should be given a name because it's
+  // used similarly in CannotBeOrderedLessThanZero(). A better fix may be to
+  // expose it as a parameter, so it can be used for testing / experimenting.
   if (Depth == 6)
-    return 1;  // Limit search depth.
+    return false;  // Limit search depth.
 
   const Operator *I = dyn_cast<Operator>(V);
   if (!I) return false;
@@ -2044,6 +2288,62 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
   return false;
 }
 
+bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
+    return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();
+
+  // FIXME: Magic number! At the least, this should be given a name because it's
+  // used similarly in CannotBeNegativeZero(). A better fix may be to
+  // expose it as a parameter, so it can be used for testing / experimenting.
+  if (Depth == 6)
+    return false;  // Limit search depth.
+
+  const Operator *I = dyn_cast<Operator>(V);
+  if (!I) return false;
+
+  switch (I->getOpcode()) {
+  default: break;
+  case Instruction::FMul:
+    // x*x is always non-negative or a NaN.
+    if (I->getOperand(0) == I->getOperand(1)) 
+      return true;
+    // Fall through
+  case Instruction::FAdd:
+  case Instruction::FDiv:
+  case Instruction::FRem:
+    return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) &&
+           CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
+  case Instruction::FPExt:
+  case Instruction::FPTrunc:
+    // Widening/narrowing never change sign.
+    return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1);
+  case Instruction::Call: 
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::exp:
+      case Intrinsic::exp2:
+      case Intrinsic::fabs:
+      case Intrinsic::sqrt:
+        return true;
+      case Intrinsic::powi: 
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+          // powi(x,n) is non-negative if n is even.
+          if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0)
+            return true;
+        }
+        return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1);
+      case Intrinsic::fma:
+      case Intrinsic::fmuladd:
+        // x*x+y is non-negative if y is non-negative.
+        return I->getOperand(0) == I->getOperand(1) && 
+               CannotBeOrderedLessThanZero(I->getOperand(2), Depth+1);
+      }
+    break;
+  }
+  return false; 
+}
+
 /// If the specified value can be set by repeating the same byte in memory,
 /// return the i8 value that it is represented with.  This is
 /// true for all i8 values obviously, but is also true for i32 0, i32 -1,
@@ -2068,26 +2368,14 @@ Value *llvm::isBytewiseValue(Value *V) {
     // Don't handle long double formats, which have strange constraints.
   }
 
-  // We can handle constant integers that are power of two in size and a
-  // multiple of 8 bits.
+  // We can handle constant integers that are multiple of 8 bits.
   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    unsigned Width = CI->getBitWidth();
-    if (isPowerOf2_32(Width) && Width > 8) {
-      // We can handle this value if the recursive binary decomposition is the
-      // same at all levels.
-      APInt Val = CI->getValue();
-      APInt Val2;
-      while (Val.getBitWidth() != 8) {
-        unsigned NextWidth = Val.getBitWidth()/2;
-        Val2  = Val.lshr(NextWidth);
-        Val2 = Val2.trunc(Val.getBitWidth()/2);
-        Val = Val.trunc(Val.getBitWidth()/2);
-
-        // If the top/bottom halves aren't the same, reject it.
-        if (Val != Val2)
-          return nullptr;
-      }
-      return ConstantInt::get(V->getContext(), Val);
+    if (CI->getBitWidth() % 8 == 0) {
+      assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
+
+      if (!CI->getValue().isSplat(8))
+        return nullptr;
+      return ConstantInt::get(V->getContext(), CI->getValue().trunc(8));
     }
   }
 
@@ -2286,23 +2574,19 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
 /// Analyze the specified pointer to see if it can be expressed as a base
 /// pointer plus a constant offset. Return the base and offset to the caller.
 Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
-                                              const DataLayout *DL) {
-  // Without DataLayout, conservatively assume 64-bit offsets, which is
-  // the widest we support.
-  unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(Ptr->getType()) : 64;
+                                              const DataLayout &DL) {
+  unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType());
   APInt ByteOffset(BitWidth, 0);
   while (1) {
     if (Ptr->getType()->isVectorTy())
       break;
 
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
-      if (DL) {
-        APInt GEPOffset(BitWidth, 0);
-        if (!GEP->accumulateConstantOffset(*DL, GEPOffset))
-          break;
+      APInt GEPOffset(BitWidth, 0);
+      if (!GEP->accumulateConstantOffset(DL, GEPOffset))
+        break;
 
-        ByteOffset += GEPOffset;
-      }
+      ByteOffset += GEPOffset;
 
       Ptr = GEP->getPointerOperand();
     } else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
@@ -2331,7 +2615,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
   // Look through bitcast instructions and geps.
   V = V->stripPointerCasts();
 
-  // If the value is a GEP instructionor  constant expression, treat it as an
+  // If the value is a GEP instruction or constant expression, treat it as an
   // offset.
   if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
     // Make sure the GEP has exactly three arguments.
@@ -2358,7 +2642,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
       StartIdx = CI->getZExtValue();
     else
       return false;
-    return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset);
+    return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx + Offset,
+                                 TrimAtNul);
   }
 
   // The GEP instruction, constant or instruction, must reference a global
@@ -2421,8 +2706,8 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
 
     // If it was new, see if all the input strings are the same length.
     uint64_t LenSoFar = ~0ULL;
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-      uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
+    for (Value *IncValue : PN->incoming_values()) {
+      uint64_t Len = GetStringLengthH(IncValue, PHIs);
       if (Len == 0) return 0; // Unknown length -> unknown.
 
       if (Len == ~0ULL) continue;
@@ -2468,8 +2753,34 @@ uint64_t llvm::GetStringLength(Value *V) {
   return Len == ~0ULL ? 1 : Len;
 }
 
-Value *
-llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
+/// \brief \p PN defines a loop-variant pointer to an object.  Check if the
+/// previous iteration of the loop was referring to the same object as \p PN.
+static bool isSameUnderlyingObjectInLoop(PHINode *PN, LoopInfo *LI) {
+  // Find the loop-defined value.
+  Loop *L = LI->getLoopFor(PN->getParent());
+  if (PN->getNumIncomingValues() != 2)
+    return true;
+
+  // Find the value from previous iteration.
+  auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0));
+  if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
+    PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1));
+  if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
+    return true;
+
+  // If a new pointer is loaded in the loop, the pointer references a different
+  // object in every iteration.  E.g.:
+  //    for (i)
+  //       int *p = a[i];
+  //       ...
+  if (auto *Load = dyn_cast<LoadInst>(PrevValue))
+    if (!L->isLoopInvariant(Load->getPointerOperand()))
+      return false;
+  return true;
+}
+
+Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
+                                 unsigned MaxLookup) {
   if (!V->getType()->isPointerTy())
     return V;
   for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
@@ -2486,7 +2797,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
       // See if InstructionSimplify knows any relevant tricks.
       if (Instruction *I = dyn_cast<Instruction>(V))
         // TODO: Acquire a DominatorTree and AssumptionCache and use them.
-        if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) {
+        if (Value *Simplified = SimplifyInstruction(I, DL, nullptr)) {
           V = Simplified;
           continue;
         }
@@ -2498,17 +2809,15 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
   return V;
 }
 
-void
-llvm::GetUnderlyingObjects(Value *V,
-                           SmallVectorImpl<Value *> &Objects,
-                           const DataLayout *TD,
-                           unsigned MaxLookup) {
+void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
+                                const DataLayout &DL, LoopInfo *LI,
+                                unsigned MaxLookup) {
   SmallPtrSet<Value *, 4> Visited;
   SmallVector<Value *, 4> Worklist;
   Worklist.push_back(V);
   do {
     Value *P = Worklist.pop_back_val();
-    P = GetUnderlyingObject(P, TD, MaxLookup);
+    P = GetUnderlyingObject(P, DL, MaxLookup);
 
     if (!Visited.insert(P).second)
       continue;
@@ -2520,8 +2829,20 @@ llvm::GetUnderlyingObjects(Value *V,
     }
 
     if (PHINode *PN = dyn_cast<PHINode>(P)) {
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-        Worklist.push_back(PN->getIncomingValue(i));
+      // If this PHI changes the underlying object in every iteration of the
+      // loop, don't look through it.  Consider:
+      //   int **A;
+      //   for (i) {
+      //     Prev = Curr;     // Prev = PHI (Prev_0, Curr)
+      //     Curr = A[i];
+      //     *Prev, *Curr;
+      //
+      // Prev is tracking Curr one iteration behind so they refer to different
+      // underlying objects.
+      if (!LI || !LI->isLoopHeader(PN->getParent()) ||
+          isSameUnderlyingObjectInLoop(PN, LI))
+        for (Value *IncValue : PN->incoming_values())
+          Worklist.push_back(IncValue);
       continue;
     }
 
@@ -2542,8 +2863,188 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
   return true;
 }
 
+static bool isDereferenceableFromAttribute(const Value *BV, APInt Offset,
+                                           Type *Ty, const DataLayout &DL,
+                                           const Instruction *CtxI,
+                                           const DominatorTree *DT,
+                                           const TargetLibraryInfo *TLI) {
+  assert(Offset.isNonNegative() && "offset can't be negative");
+  assert(Ty->isSized() && "must be sized");
+  
+  APInt DerefBytes(Offset.getBitWidth(), 0);
+  bool CheckForNonNull = false;
+  if (const Argument *A = dyn_cast<Argument>(BV)) {
+    DerefBytes = A->getDereferenceableBytes();
+    if (!DerefBytes.getBoolValue()) {
+      DerefBytes = A->getDereferenceableOrNullBytes();
+      CheckForNonNull = true;
+    }
+  } else if (auto CS = ImmutableCallSite(BV)) {
+    DerefBytes = CS.getDereferenceableBytes(0);
+    if (!DerefBytes.getBoolValue()) {
+      DerefBytes = CS.getDereferenceableOrNullBytes(0);
+      CheckForNonNull = true;
+    }
+  } else if (const LoadInst *LI = dyn_cast<LoadInst>(BV)) {
+    if (MDNode *MD = LI->getMetadata(LLVMContext::MD_dereferenceable)) {
+      ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
+      DerefBytes = CI->getLimitedValue();
+    }
+    if (!DerefBytes.getBoolValue()) {
+      if (MDNode *MD = 
+              LI->getMetadata(LLVMContext::MD_dereferenceable_or_null)) {
+        ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
+        DerefBytes = CI->getLimitedValue();
+      }
+      CheckForNonNull = true;
+    }
+  }
+  
+  if (DerefBytes.getBoolValue())
+    if (DerefBytes.uge(Offset + DL.getTypeStoreSize(Ty)))
+      if (!CheckForNonNull || isKnownNonNullAt(BV, CtxI, DT, TLI))
+        return true;
+
+  return false;
+}
+
+static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL,
+                                           const Instruction *CtxI,
+                                           const DominatorTree *DT,
+                                           const TargetLibraryInfo *TLI) {
+  Type *VTy = V->getType();
+  Type *Ty = VTy->getPointerElementType();
+  if (!Ty->isSized())
+    return false;
+  
+  APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0);
+  return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI);
+}
+
+/// Return true if Value is always a dereferenceable pointer.
+///
+/// Test if V is always a pointer to allocated and suitably aligned memory for
+/// a simple load or store.
+static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
+                                     const Instruction *CtxI,
+                                     const DominatorTree *DT,
+                                     const TargetLibraryInfo *TLI,
+                                     SmallPtrSetImpl<const Value *> &Visited) {
+  // Note that it is not safe to speculate into a malloc'd region because
+  // malloc may return null.
+
+  // These are obviously ok.
+  if (isa<AllocaInst>(V)) return true;
+
+  // It's not always safe to follow a bitcast, for example:
+  //   bitcast i8* (alloca i8) to i32*
+  // would result in a 4-byte load from a 1-byte alloca. However,
+  // if we're casting from a pointer from a type of larger size
+  // to a type of smaller size (or the same size), and the alignment
+  // is at least as large as for the resulting pointer type, then
+  // we can look through the bitcast.
+  if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) {
+    Type *STy = BC->getSrcTy()->getPointerElementType(),
+         *DTy = BC->getDestTy()->getPointerElementType();
+    if (STy->isSized() && DTy->isSized() &&
+        (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) &&
+        (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy)))
+      return isDereferenceablePointer(BC->getOperand(0), DL, CtxI,
+                                      DT, TLI, Visited);
+  }
+
+  // Global variables which can't collapse to null are ok.
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return !GV->hasExternalWeakLinkage();
+
+  // byval arguments are okay.
+  if (const Argument *A = dyn_cast<Argument>(V))
+    if (A->hasByValAttr())
+      return true;
+    
+  if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI))
+    return true;
+
+  // For GEPs, determine if the indexing lands within the allocated object.
+  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+    // Conservatively require that the base pointer be fully dereferenceable.
+    if (!Visited.insert(GEP->getOperand(0)).second)
+      return false;
+    if (!isDereferenceablePointer(GEP->getOperand(0), DL, CtxI,
+                                  DT, TLI, Visited))
+      return false;
+    // Check the indices.
+    gep_type_iterator GTI = gep_type_begin(GEP);
+    for (User::const_op_iterator I = GEP->op_begin()+1,
+         E = GEP->op_end(); I != E; ++I) {
+      Value *Index = *I;
+      Type *Ty = *GTI++;
+      // Struct indices can't be out of bounds.
+      if (isa<StructType>(Ty))
+        continue;
+      ConstantInt *CI = dyn_cast<ConstantInt>(Index);
+      if (!CI)
+        return false;
+      // Zero is always ok.
+      if (CI->isZero())
+        continue;
+      // Check to see that it's within the bounds of an array.
+      ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+      if (!ATy)
+        return false;
+      if (CI->getValue().getActiveBits() > 64)
+        return false;
+      if (CI->getZExtValue() >= ATy->getNumElements())
+        return false;
+    }
+    // Indices check out; this is dereferenceable.
+    return true;
+  }
+
+  // For gc.relocate, look through relocations
+  if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V))
+    if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) {
+      GCRelocateOperands RelocateInst(I);
+      return isDereferenceablePointer(RelocateInst.getDerivedPtr(), DL, CtxI,
+                                      DT, TLI, Visited);
+    }
+
+  if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
+    return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI,
+                                    DT, TLI, Visited);
+
+  // If we don't know, assume the worst.
+  return false;
+}
+
+bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
+                                    const Instruction *CtxI,
+                                    const DominatorTree *DT,
+                                    const TargetLibraryInfo *TLI) {
+  // When dereferenceability information is provided by a dereferenceable
+  // attribute, we know exactly how many bytes are dereferenceable. If we can
+  // determine the exact offset to the attributed variable, we can use that
+  // information here.
+  Type *VTy = V->getType();
+  Type *Ty = VTy->getPointerElementType();
+  if (Ty->isSized()) {
+    APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0);
+    const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+    
+    if (Offset.isNonNegative())
+      if (isDereferenceableFromAttribute(BV, Offset, Ty, DL,
+                                         CtxI, DT, TLI))
+        return true;
+  }
+
+  SmallPtrSet<const Value *, 32> Visited;
+  return ::isDereferenceablePointer(V, DL, CtxI, DT, TLI, Visited);
+}
+
 bool llvm::isSafeToSpeculativelyExecute(const Value *V,
-                                        const DataLayout *TD) {
+                                        const Instruction *CtxI,
+                                        const DominatorTree *DT,
+                                        const TargetLibraryInfo *TLI) {
   const Operator *Inst = dyn_cast<Operator>(V);
   if (!Inst)
     return false;
@@ -2567,20 +3068,20 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
   case Instruction::SDiv:
   case Instruction::SRem: {
     // x / y is undefined if y == 0 or x == INT_MIN and y == -1
-    const APInt *X, *Y;
-    if (match(Inst->getOperand(1), m_APInt(Y))) {
-      if (*Y != 0) {
-        if (*Y == -1) {
-          // The numerator can't be MinSignedValue if the denominator is -1.
-          if (match(Inst->getOperand(0), m_APInt(X)))
-            return !Y->isMinSignedValue();
-          // The numerator *might* be MinSignedValue.
-          return false;
-        }
-        // The denominator is not 0 or -1, it's safe to proceed.
-        return true;
-      }
-    }
+    const APInt *Numerator, *Denominator;
+    if (!match(Inst->getOperand(1), m_APInt(Denominator)))
+      return false;
+    // We cannot hoist this division if the denominator is 0.
+    if (*Denominator == 0)
+      return false;
+    // It's safe to hoist if the denominator is not 0 or -1.
+    if (*Denominator != -1)
+      return true;
+    // At this point we know that the denominator is -1.  It is safe to hoist as
+    // long we know that the numerator is not INT_MIN.
+    if (match(Inst->getOperand(0), m_APInt(Numerator)))
+      return !Numerator->isMinSignedValue();
+    // The numerator *might* be MinSignedValue.
     return false;
   }
   case Instruction::Load: {
@@ -2589,7 +3090,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
         // Speculative load may create a race that did not exist in the source.
         LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
       return false;
-    return LI->getPointerOperand()->isDereferenceablePointer(TD);
+    const DataLayout &DL = LI->getModule()->getDataLayout();
+    return isDereferenceablePointer(LI->getPointerOperand(), DL, CtxI, DT, TLI);
   }
   case Instruction::Call: {
     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -2669,7 +3171,7 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
   if (const LoadInst *LI = dyn_cast<LoadInst>(V))
     return LI->getMetadata(LLVMContext::MD_nonnull);
 
-  if (ImmutableCallSite CS = V)
+  if (auto CS = ImmutableCallSite(V))
     if (CS.isReturnNonNull())
       return true;
 
@@ -2680,8 +3182,62 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
   return false;
 }
 
+static bool isKnownNonNullFromDominatingCondition(const Value *V,
+                                                  const Instruction *CtxI,
+                                                  const DominatorTree *DT) {
+  unsigned NumUsesExplored = 0;
+  for (auto U : V->users()) {
+    // Avoid massive lists
+    if (NumUsesExplored >= DomConditionsMaxUses)
+      break;
+    NumUsesExplored++;
+    // Consider only compare instructions uniquely controlling a branch
+    const ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
+    if (!Cmp)
+      continue;
+
+    if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
+      continue;
+
+    for (auto *CmpU : Cmp->users()) {
+      const BranchInst *BI = dyn_cast<BranchInst>(CmpU);
+      if (!BI)
+        continue;
+      
+      assert(BI->isConditional() && "uses a comparison!");
+
+      BasicBlock *NonNullSuccessor = nullptr;
+      CmpInst::Predicate Pred;
+
+      if (match(const_cast<ICmpInst*>(Cmp),
+                m_c_ICmp(Pred, m_Specific(V), m_Zero()))) {
+        if (Pred == ICmpInst::ICMP_EQ)
+          NonNullSuccessor = BI->getSuccessor(1);
+        else if (Pred == ICmpInst::ICMP_NE)
+          NonNullSuccessor = BI->getSuccessor(0);
+      }
+
+      if (NonNullSuccessor) {
+        BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
+        if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
+          return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI,
+                   const DominatorTree *DT, const TargetLibraryInfo *TLI) {
+  if (isKnownNonNull(V, TLI))
+    return true;
+
+  return CtxI ? ::isKnownNonNullFromDominatingCondition(V, CtxI, DT) : false;
+}
+
 OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
-                                                   const DataLayout *DL,
+                                                   const DataLayout &DL,
                                                    AssumptionCache *AC,
                                                    const Instruction *CxtI,
                                                    const DominatorTree *DT) {
@@ -2731,7 +3287,7 @@ OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
 }
 
 OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
-                                                   const DataLayout *DL,
+                                                   const DataLayout &DL,
                                                    AssumptionCache *AC,
                                                    const Instruction *CxtI,
                                                    const DominatorTree *DT) {
@@ -2758,3 +3314,133 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
 
   return OverflowResult::MayOverflow;
 }
+
+static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
+                                              Value *CmpLHS, Value *CmpRHS,
+                                              Value *TrueVal, Value *FalseVal,
+                                              Value *&LHS, Value *&RHS) {
+  LHS = CmpLHS;
+  RHS = CmpRHS;
+
+  // (icmp X, Y) ? X : Y
+  if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
+    switch (Pred) {
+    default: return SPF_UNKNOWN; // Equality.
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE: return SPF_UMAX;
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE: return SPF_SMAX;
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE: return SPF_UMIN;
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE: return SPF_SMIN;
+    }
+  }
+
+  // (icmp X, Y) ? Y : X
+  if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
+    switch (Pred) {
+    default: return SPF_UNKNOWN; // Equality.
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE: return SPF_UMIN;
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE: return SPF_SMIN;
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE: return SPF_UMAX;
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE: return SPF_SMAX;
+    }
+  }
+
+  if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) {
+    if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) ||
+        (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) {
+
+      // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
+      // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
+      if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
+        return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS;
+      }
+
+      // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
+      // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
+      if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
+        return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS;
+      }
+    }
+    
+    // Y >s C ? ~Y : ~C == ~Y <s ~C ? ~Y : ~C = SMIN(~Y, ~C)
+    if (const auto *C2 = dyn_cast<ConstantInt>(FalseVal)) {
+      if (C1->getType() == C2->getType() && ~C1->getValue() == C2->getValue() &&
+          (match(TrueVal, m_Not(m_Specific(CmpLHS))) ||
+           match(CmpLHS, m_Not(m_Specific(TrueVal))))) {
+        LHS = TrueVal;
+        RHS = FalseVal;
+        return SPF_SMIN;
+      }
+    }
+  }
+
+  // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5)
+
+  return SPF_UNKNOWN;
+}
+
+static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2,
+                                 Instruction::CastOps *CastOp) {
+  CastInst *CI = dyn_cast<CastInst>(V1);
+  Constant *C = dyn_cast<Constant>(V2);
+  if (!CI || !C)
+    return nullptr;
+  *CastOp = CI->getOpcode();
+
+  if (isa<SExtInst>(CI) && CmpI->isSigned()) {
+    Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy());
+    // This is only valid if the truncated value can be sign-extended
+    // back to the original value.
+    if (ConstantExpr::getSExt(T, C->getType()) == C)
+      return T;
+    return nullptr;
+  }
+  if (isa<ZExtInst>(CI) && CmpI->isUnsigned())
+    return ConstantExpr::getTrunc(C, CI->getSrcTy());
+
+  if (isa<TruncInst>(CI))
+    return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned());
+
+  return nullptr;
+}
+
+SelectPatternFlavor llvm::matchSelectPattern(Value *V,
+                                             Value *&LHS, Value *&RHS,
+                                             Instruction::CastOps *CastOp) {
+  SelectInst *SI = dyn_cast<SelectInst>(V);
+  if (!SI) return SPF_UNKNOWN;
+
+  ICmpInst *CmpI = dyn_cast<ICmpInst>(SI->getCondition());
+  if (!CmpI) return SPF_UNKNOWN;
+
+  ICmpInst::Predicate Pred = CmpI->getPredicate();
+  Value *CmpLHS = CmpI->getOperand(0);
+  Value *CmpRHS = CmpI->getOperand(1);
+  Value *TrueVal = SI->getTrueValue();
+  Value *FalseVal = SI->getFalseValue();
+
+  // Bail out early.
+  if (CmpI->isEquality())
+    return SPF_UNKNOWN;
+
+  // Deal with type mismatches.
+  if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
+    if (Constant *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
+      return ::matchSelectPattern(Pred, CmpLHS, CmpRHS,
+                                  cast<CastInst>(TrueVal)->getOperand(0), C,
+                                  LHS, RHS);
+    if (Constant *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
+      return ::matchSelectPattern(Pred, CmpLHS, CmpRHS,
+                                  C, cast<CastInst>(FalseVal)->getOperand(0),
+                                  LHS, RHS);
+  }
+  return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal,
+                              LHS, RHS);
+}
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp
index de5db1a..05c2428 100644
--- a/contrib/llvm/lib/AsmParser/LLLexer.cpp
+++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp
@@ -241,7 +241,7 @@ lltok::Kind LLLexer::LexToken() {
   case ')': return lltok::rparen;
   case ',': return lltok::comma;
   case '*': return lltok::star;
-  case '\\': return lltok::backslash;
+  case '|': return lltok::bar;
   }
 }
 
@@ -487,11 +487,11 @@ lltok::Kind LLLexer::LexIdentifier() {
   if (!KeywordEnd) KeywordEnd = CurPtr;
   CurPtr = KeywordEnd;
   --StartChar;
-  unsigned Len = CurPtr-StartChar;
-#define KEYWORD(STR)                                                    \
-  do {                                                                  \
-    if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR)))  \
-      return lltok::kw_##STR;                                           \
+  StringRef Keyword(StartChar, CurPtr - StartChar);
+#define KEYWORD(STR)                                                           \
+  do {                                                                         \
+    if (Keyword == #STR)                                                       \
+      return lltok::kw_##STR;                                                  \
   } while (0)
 
   KEYWORD(true);    KEYWORD(false);
@@ -597,7 +597,9 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(byval);
   KEYWORD(inalloca);
   KEYWORD(cold);
+  KEYWORD(convergent);
   KEYWORD(dereferenceable);
+  KEYWORD(dereferenceable_or_null);
   KEYWORD(inlinehint);
   KEYWORD(inreg);
   KEYWORD(jumptable);
@@ -669,9 +671,13 @@ lltok::Kind LLLexer::LexIdentifier() {
 #undef KEYWORD
 
   // Keywords for types.
-#define TYPEKEYWORD(STR, LLVMTY) \
-  if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
-    TyVal = LLVMTY; return lltok::Type; }
+#define TYPEKEYWORD(STR, LLVMTY)                                               \
+  do {                                                                         \
+    if (Keyword == STR) {                                                      \
+      TyVal = LLVMTY;                                                          \
+      return lltok::Type;                                                      \
+    }                                                                          \
+  } while (false)
   TYPEKEYWORD("void",      Type::getVoidTy(Context));
   TYPEKEYWORD("half",      Type::getHalfTy(Context));
   TYPEKEYWORD("float",     Type::getFloatTy(Context));
@@ -685,9 +691,13 @@ lltok::Kind LLLexer::LexIdentifier() {
 #undef TYPEKEYWORD
 
   // Keywords for instructions.
-#define INSTKEYWORD(STR, Enum) \
-  if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
-    UIntVal = Instruction::Enum; return lltok::kw_##STR; }
+#define INSTKEYWORD(STR, Enum)                                                 \
+  do {                                                                         \
+    if (Keyword == #STR) {                                                     \
+      UIntVal = Instruction::Enum;                                             \
+      return lltok::kw_##STR;                                                  \
+    }                                                                          \
+  } while (false)
 
   INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
   INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
@@ -739,6 +749,25 @@ lltok::Kind LLLexer::LexIdentifier() {
   INSTKEYWORD(landingpad,     LandingPad);
 #undef INSTKEYWORD
 
+#define DWKEYWORD(TYPE, TOKEN)                                                 \
+  do {                                                                         \
+    if (Keyword.startswith("DW_" #TYPE "_")) {                                 \
+      StrVal.assign(Keyword.begin(), Keyword.end());                           \
+      return lltok::TOKEN;                                                     \
+    }                                                                          \
+  } while (false)
+  DWKEYWORD(TAG, DwarfTag);
+  DWKEYWORD(ATE, DwarfAttEncoding);
+  DWKEYWORD(VIRTUALITY, DwarfVirtuality);
+  DWKEYWORD(LANG, DwarfLang);
+  DWKEYWORD(OP, DwarfOp);
+#undef DWKEYWORD
+
+  if (Keyword.startswith("DIFlag")) {
+    StrVal.assign(Keyword.begin(), Keyword.end());
+    return lltok::DIFlag;
+  }
+
   // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
   // the CFE to avoid forcing it to deal with 64-bit numbers.
   if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.h b/contrib/llvm/lib/AsmParser/LLLexer.h
index 3343168..90bf17d 100644
--- a/contrib/llvm/lib/AsmParser/LLLexer.h
+++ b/contrib/llvm/lib/AsmParser/LLLexer.h
@@ -45,7 +45,6 @@ namespace llvm {
   public:
     explicit LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &,
                      LLVMContext &C);
-    ~LLLexer() {}
 
     lltok::Kind Lex() {
       return CurKind = LexToken();
diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp
index d209a20..a52e20f 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm/lib/AsmParser/LLParser.cpp
@@ -16,6 +16,8 @@
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
@@ -23,6 +25,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/raw_ostream.h"
@@ -115,10 +118,10 @@ bool LLParser::ValidateEndOfModule() {
     return Error(ForwardRefBlockAddresses.begin()->first.Loc,
                  "expected function name in blockaddress");
 
-  for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i)
-    if (NumberedTypes[i].second.isValid())
-      return Error(NumberedTypes[i].second,
-                   "use of undefined type '%" + Twine(i) + "'");
+  for (const auto &NT : NumberedTypes)
+    if (NT.second.second.isValid())
+      return Error(NT.second.second,
+                   "use of undefined type '%" + Twine(NT.first) + "'");
 
   for (StringMap<std::pair<Type*, LocTy> >::iterator I =
        NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I)
@@ -147,9 +150,10 @@ bool LLParser::ValidateEndOfModule() {
                  Twine(ForwardRefMDNodes.begin()->first) + "'");
 
   // Resolve metadata cycles.
-  for (auto &N : NumberedMetadata)
-    if (auto *U = cast_or_null<UniquableMDNode>(N))
-      U->resolveCycles();
+  for (auto &N : NumberedMetadata) {
+    if (N.second && !N.second->isResolved())
+      N.second->resolveCycles();
+  }
 
   // Look for intrinsic functions and CallInst that need to be upgraded
   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
@@ -302,9 +306,6 @@ bool LLParser::ParseUnnamedType() {
       ParseToken(lltok::kw_type, "expected 'type' after '='"))
     return true;
 
-  if (TypeID >= NumberedTypes.size())
-    NumberedTypes.resize(TypeID+1);
-
   Type *Result = nullptr;
   if (ParseStructDefinition(TypeLoc, "",
                             NumberedTypes[TypeID], Result)) return true;
@@ -359,13 +360,14 @@ bool LLParser::ParseDeclare() {
 }
 
 /// toplevelentity
-///   ::= 'define' FunctionHeader '{' ...
+///   ::= 'define' FunctionHeader (!dbg !56)* '{' ...
 bool LLParser::ParseDefine() {
   assert(Lex.getKind() == lltok::kw_define);
   Lex.Lex();
 
   Function *F;
   return ParseFunctionHeader(F, true) ||
+         ParseOptionalFunctionMetadata(*F) ||
          ParseFunctionBody(*F);
 }
 
@@ -525,19 +527,17 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) {
     return true;
 
   // If not a forward reference, just return it now.
-  if (MID < NumberedMetadata.size() && NumberedMetadata[MID] != nullptr) {
+  if (NumberedMetadata.count(MID)) {
     Result = NumberedMetadata[MID];
     return false;
   }
 
   // Otherwise, create MDNode forward reference.
-  MDNodeFwdDecl *FwdNode = MDNodeFwdDecl::get(Context, None);
-  ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
+  auto &FwdRef = ForwardRefMDNodes[MID];
+  FwdRef = std::make_pair(MDTuple::getTemporary(Context, None), Lex.getLoc());
 
-  if (NumberedMetadata.size() <= MID)
-    NumberedMetadata.resize(MID+1);
-  NumberedMetadata[MID].reset(FwdNode);
-  Result = FwdNode;
+  Result = FwdRef.first.get();
+  NumberedMetadata[MID].reset(Result);
   return false;
 }
 
@@ -564,10 +564,7 @@ bool LLParser::ParseNamedMetadata() {
       NMD->addOperand(N);
     } while (EatIfPresent(lltok::comma));
 
-  if (ParseToken(lltok::rbrace, "expected end of metadata node"))
-    return true;
-
-  return false;
+  return ParseToken(lltok::rbrace, "expected end of metadata node");
 }
 
 /// ParseStandaloneMetadata:
@@ -597,17 +594,12 @@ bool LLParser::ParseStandaloneMetadata() {
   // See if this was forward referenced, if so, handle it.
   auto FI = ForwardRefMDNodes.find(MetadataID);
   if (FI != ForwardRefMDNodes.end()) {
-    MDNodeFwdDecl *Temp = FI->second.first;
-    Temp->replaceAllUsesWith(Init);
-    delete Temp;
+    FI->second.first->replaceAllUsesWith(Init);
     ForwardRefMDNodes.erase(FI);
 
     assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work");
   } else {
-    if (MetadataID >= NumberedMetadata.size())
-      NumberedMetadata.resize(MetadataID+1);
-
-    if (NumberedMetadata[MetadataID] != nullptr)
+    if (NumberedMetadata.count(MetadataID))
       return TokError("Metadata id is already used");
     NumberedMetadata[MetadataID].reset(Init);
   }
@@ -668,13 +660,11 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L,
   auto *PTy = dyn_cast<PointerType>(AliaseeType);
   if (!PTy)
     return Error(AliaseeLoc, "An alias must have pointer type");
-  Type *Ty = PTy->getElementType();
-  unsigned AddrSpace = PTy->getAddressSpace();
 
   // Okay, create the alias but do not insert it into the module yet.
   std::unique_ptr<GlobalAlias> GA(
-      GlobalAlias::create(Ty, AddrSpace, (GlobalValue::LinkageTypes)Linkage,
-                          Name, Aliasee, /*Parent*/ nullptr));
+      GlobalAlias::create(PTy, (GlobalValue::LinkageTypes)Linkage, Name,
+                          Aliasee, /*Parent*/ nullptr));
   GA->setThreadLocalMode(TLM);
   GA->setVisibility((GlobalValue::VisibilityTypes)Visibility);
   GA->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass);
@@ -756,7 +746,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
       return true;
   }
 
-  if (Ty->isFunctionTy() || Ty->isLabelTy())
+  if (Ty->isFunctionTy() || !PointerType::isValidElementType(Ty))
     return Error(TyLoc, "invalid type for global variable");
 
   GlobalValue *GVal = nullptr;
@@ -783,7 +773,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
                             Name, nullptr, GlobalVariable::NotThreadLocal,
                             AddrSpace);
   } else {
-    if (GVal->getType()->getElementType() != Ty)
+    if (GVal->getValueType() != Ty)
       return Error(TyLoc,
             "forward reference and definition of global have different types");
 
@@ -947,6 +937,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
     case lltok::kw_alwaysinline:      B.addAttribute(Attribute::AlwaysInline); break;
     case lltok::kw_builtin:           B.addAttribute(Attribute::Builtin); break;
     case lltok::kw_cold:              B.addAttribute(Attribute::Cold); break;
+    case lltok::kw_convergent:        B.addAttribute(Attribute::Convergent); break;
     case lltok::kw_inlinehint:        B.addAttribute(Attribute::InlineHint); break;
     case lltok::kw_jumptable:         B.addAttribute(Attribute::JumpTable); break;
     case lltok::kw_minsize:           B.addAttribute(Attribute::MinSize); break;
@@ -982,6 +973,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
       break;
     case lltok::kw_byval:
     case lltok::kw_dereferenceable:
+    case lltok::kw_dereferenceable_or_null:
     case lltok::kw_inalloca:
     case lltok::kw_nest:
     case lltok::kw_noalias:
@@ -1226,11 +1218,18 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
     case lltok::kw_byval:           B.addAttribute(Attribute::ByVal); break;
     case lltok::kw_dereferenceable: {
       uint64_t Bytes;
-      if (ParseOptionalDereferenceableBytes(Bytes))
+      if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
         return true;
       B.addDereferenceableAttr(Bytes);
       continue;
     }
+    case lltok::kw_dereferenceable_or_null: {
+      uint64_t Bytes;
+      if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable_or_null, Bytes))
+        return true;
+      B.addDereferenceableOrNullAttr(Bytes);
+      continue;
+    }
     case lltok::kw_inalloca:        B.addAttribute(Attribute::InAlloca); break;
     case lltok::kw_inreg:           B.addAttribute(Attribute::InReg); break;
     case lltok::kw_nest:            B.addAttribute(Attribute::Nest); break;
@@ -1290,11 +1289,18 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
       return HaveError;
     case lltok::kw_dereferenceable: {
       uint64_t Bytes;
-      if (ParseOptionalDereferenceableBytes(Bytes))
+      if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
         return true;
       B.addDereferenceableAttr(Bytes);
       continue;
     }
+    case lltok::kw_dereferenceable_or_null: {
+      uint64_t Bytes;
+      if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable_or_null, Bytes))
+        return true;
+      B.addDereferenceableOrNullAttr(Bytes);
+      continue;
+    }
     case lltok::kw_inreg:           B.addAttribute(Attribute::InReg); break;
     case lltok::kw_noalias:         B.addAttribute(Attribute::NoAlias); break;
     case lltok::kw_nonnull:         B.addAttribute(Attribute::NonNull); break;
@@ -1481,31 +1487,53 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
   return false;
 }
 
+/// ParseMetadataAttachment
+///   ::= !dbg !42
+bool LLParser::ParseMetadataAttachment(unsigned &Kind, MDNode *&MD) {
+  assert(Lex.getKind() == lltok::MetadataVar && "Expected metadata attachment");
+
+  std::string Name = Lex.getStrVal();
+  Kind = M->getMDKindID(Name);
+  Lex.Lex();
+
+  return ParseMDNode(MD);
+}
+
 /// ParseInstructionMetadata
 ///   ::= !dbg !42 (',' !dbg !57)*
-bool LLParser::ParseInstructionMetadata(Instruction *Inst,
-                                        PerFunctionState *PFS) {
+bool LLParser::ParseInstructionMetadata(Instruction &Inst) {
   do {
     if (Lex.getKind() != lltok::MetadataVar)
       return TokError("expected metadata after comma");
 
-    std::string Name = Lex.getStrVal();
-    unsigned MDK = M->getMDKindID(Name);
-    Lex.Lex();
-
+    unsigned MDK;
     MDNode *N;
-    if (ParseMDNode(N))
+    if (ParseMetadataAttachment(MDK, N))
       return true;
 
-    Inst->setMetadata(MDK, N);
+    Inst.setMetadata(MDK, N);
     if (MDK == LLVMContext::MD_tbaa)
-      InstsWithTBAATag.push_back(Inst);
+      InstsWithTBAATag.push_back(&Inst);
 
     // If this is the end of the list, we're done.
   } while (EatIfPresent(lltok::comma));
   return false;
 }
 
+/// ParseOptionalFunctionMetadata
+///   ::= (!dbg !57)*
+bool LLParser::ParseOptionalFunctionMetadata(Function &F) {
+  while (Lex.getKind() == lltok::MetadataVar) {
+    unsigned MDK;
+    MDNode *N;
+    if (ParseMetadataAttachment(MDK, N))
+      return true;
+
+    F.setMetadata(MDK, N);
+  }
+  return false;
+}
+
 /// ParseOptionalAlignment
 ///   ::= /* empty */
 ///   ::= 'align' 4
@@ -1522,12 +1550,19 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
   return false;
 }
 
-/// ParseOptionalDereferenceableBytes
+/// ParseOptionalDerefAttrBytes
 ///   ::= /* empty */
-///   ::= 'dereferenceable' '(' 4 ')'
-bool LLParser::ParseOptionalDereferenceableBytes(uint64_t &Bytes) {
+///   ::= AttrKind '(' 4 ')'
+///
+/// where AttrKind is either 'dereferenceable' or 'dereferenceable_or_null'.
+bool LLParser::ParseOptionalDerefAttrBytes(lltok::Kind AttrKind,
+                                           uint64_t &Bytes) {
+  assert((AttrKind == lltok::kw_dereferenceable ||
+          AttrKind == lltok::kw_dereferenceable_or_null) &&
+         "contract!");
+
   Bytes = 0;
-  if (!EatIfPresent(lltok::kw_dereferenceable))
+  if (!EatIfPresent(AttrKind))
     return false;
   LocTy ParenLoc = Lex.getLoc();
   if (!EatIfPresent(lltok::lparen))
@@ -1640,6 +1675,7 @@ bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices,
 
   while (EatIfPresent(lltok::comma)) {
     if (Lex.getKind() == lltok::MetadataVar) {
+      if (Indices.empty()) return TokError("expected index");
       AteExtraComma = true;
       return false;
     }
@@ -1704,8 +1740,6 @@ bool LLParser::ParseType(Type *&Result, const Twine &Msg, bool AllowVoid) {
 
   case lltok::LocalVarID: {
     // Type ::= %4
-    if (Lex.getUIntVal() >= NumberedTypes.size())
-      NumberedTypes.resize(Lex.getUIntVal()+1);
     std::pair<Type*, LocTy> &Entry = NumberedTypes[Lex.getUIntVal()];
 
     // If the type hasn't been defined yet, create a forward definition and
@@ -2277,13 +2311,13 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
 /// forward reference record if needed.
 BasicBlock *LLParser::PerFunctionState::GetBB(const std::string &Name,
                                               LocTy Loc) {
-  return cast_or_null<BasicBlock>(GetVal(Name,
-                                        Type::getLabelTy(F.getContext()), Loc));
+  return dyn_cast_or_null<BasicBlock>(GetVal(Name,
+                                      Type::getLabelTy(F.getContext()), Loc));
 }
 
 BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) {
-  return cast_or_null<BasicBlock>(GetVal(ID,
-                                        Type::getLabelTy(F.getContext()), Loc));
+  return dyn_cast_or_null<BasicBlock>(GetVal(ID,
+                                      Type::getLabelTy(F.getContext()), Loc));
 }
 
 /// DefineBB - Define the specified basic block, which is either named or
@@ -2519,7 +2553,12 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
 
     if (!F) {
       // Make a global variable as a placeholder for this reference.
-      GlobalValue *&FwdRef = ForwardRefBlockAddresses[Fn][Label];
+      GlobalValue *&FwdRef =
+          ForwardRefBlockAddresses.insert(std::make_pair(
+                                              std::move(Fn),
+                                              std::map<ValID, GlobalValue *>()))
+              .first->second.insert(std::make_pair(std::move(Label), nullptr))
+              .first->second;
       if (!FwdRef)
         FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context), false,
                                     GlobalValue::InternalLinkage, nullptr, "");
@@ -2616,8 +2655,15 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
     if (!Val0->getType()->isAggregateType())
       return Error(ID.Loc, "insertvalue operand must be aggregate type");
-    if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices))
+    Type *IndexedType =
+        ExtractValueInst::getIndexedType(Val0->getType(), Indices);
+    if (!IndexedType)
       return Error(ID.Loc, "invalid indices for insertvalue");
+    if (IndexedType != Val1->getType())
+      return Error(ID.Loc, "insertvalue operand and field disagree in type: '" +
+                               getTypeString(Val1->getType()) +
+                               "' instead of '" + getTypeString(IndexedType) +
+                               "'");
     ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1, Indices);
     ID.Kind = ValID::t_Constant;
     return false;
@@ -2772,24 +2818,63 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     unsigned Opc = Lex.getUIntVal();
     SmallVector<Constant*, 16> Elts;
     bool InBounds = false;
+    Type *Ty;
     Lex.Lex();
+
     if (Opc == Instruction::GetElementPtr)
       InBounds = EatIfPresent(lltok::kw_inbounds);
-    if (ParseToken(lltok::lparen, "expected '(' in constantexpr") ||
-        ParseGlobalValueVector(Elts) ||
+
+    if (ParseToken(lltok::lparen, "expected '(' in constantexpr"))
+      return true;
+
+    LocTy ExplicitTypeLoc = Lex.getLoc();
+    if (Opc == Instruction::GetElementPtr) {
+      if (ParseType(Ty) ||
+          ParseToken(lltok::comma, "expected comma after getelementptr's type"))
+        return true;
+    }
+
+    if (ParseGlobalValueVector(Elts) ||
         ParseToken(lltok::rparen, "expected ')' in constantexpr"))
       return true;
 
     if (Opc == Instruction::GetElementPtr) {
       if (Elts.size() == 0 ||
           !Elts[0]->getType()->getScalarType()->isPointerTy())
-        return Error(ID.Loc, "getelementptr requires pointer operand");
+        return Error(ID.Loc, "base of getelementptr must be a pointer");
+
+      Type *BaseType = Elts[0]->getType();
+      auto *BasePointerType = cast<PointerType>(BaseType->getScalarType());
+      if (Ty != BasePointerType->getElementType())
+        return Error(
+            ExplicitTypeLoc,
+            "explicit pointee type doesn't match operand's pointee type");
 
       ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
-      if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(), Indices))
-        return Error(ID.Loc, "invalid indices for getelementptr");
-      ID.ConstantVal = ConstantExpr::getGetElementPtr(Elts[0], Indices,
-                                                      InBounds);
+      for (Constant *Val : Indices) {
+        Type *ValTy = Val->getType();
+        if (!ValTy->getScalarType()->isIntegerTy())
+          return Error(ID.Loc, "getelementptr index must be an integer");
+        if (ValTy->isVectorTy() != BaseType->isVectorTy())
+          return Error(ID.Loc, "getelementptr index type missmatch");
+        if (ValTy->isVectorTy()) {
+          unsigned ValNumEl = cast<VectorType>(ValTy)->getNumElements();
+          unsigned PtrNumEl = cast<VectorType>(BaseType)->getNumElements();
+          if (ValNumEl != PtrNumEl)
+            return Error(
+                ID.Loc,
+                "getelementptr vector index has a wrong number of elements");
+        }
+      }
+
+      SmallPtrSet<const Type*, 4> Visited;
+      if (!Indices.empty() && !Ty->isSized(&Visited))
+        return Error(ID.Loc, "base element of getelementptr must be sized");
+
+      if (!GetElementPtrInst::getIndexedType(Ty, Indices))
+        return Error(ID.Loc, "invalid getelementptr indices");
+      ID.ConstantVal =
+          ConstantExpr::getGetElementPtr(Ty, Elts[0], Indices, InBounds);
     } else if (Opc == Instruction::Select) {
       if (Elts.size() != 3)
         return Error(ID.Loc, "expected three operands to select");
@@ -2905,7 +2990,7 @@ bool LLParser::ParseMDTuple(MDNode *&MD, bool IsDistinct) {
 /// MDNode:
 ///  ::= !{ ... }
 ///  ::= !7
-///  ::= !MDLocation(...)
+///  ::= !DILocation(...)
 bool LLParser::ParseMDNode(MDNode *&N) {
   if (Lex.getKind() == lltok::MetadataVar)
     return ParseSpecializedMDNode(N);
@@ -2923,28 +3008,269 @@ bool LLParser::ParseMDNodeTail(MDNode *&N) {
   return ParseMDNodeID(N);
 }
 
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
-                            MDUnsignedField<uint32_t> &Result) {
-  if (Result.Seen)
-    return Error(Loc,
-                 "field '" + Name + "' cannot be specified more than once");
+namespace {
+
+/// Structure to represent an optional metadata field.
+template <class FieldTy> struct MDFieldImpl {
+  typedef MDFieldImpl ImplTy;
+  FieldTy Val;
+  bool Seen;
+
+  void assign(FieldTy Val) {
+    Seen = true;
+    this->Val = std::move(Val);
+  }
 
+  explicit MDFieldImpl(FieldTy Default)
+      : Val(std::move(Default)), Seen(false) {}
+};
+
+struct MDUnsignedField : public MDFieldImpl<uint64_t> {
+  uint64_t Max;
+
+  MDUnsignedField(uint64_t Default = 0, uint64_t Max = UINT64_MAX)
+      : ImplTy(Default), Max(Max) {}
+};
+struct LineField : public MDUnsignedField {
+  LineField() : MDUnsignedField(0, UINT32_MAX) {}
+};
+struct ColumnField : public MDUnsignedField {
+  ColumnField() : MDUnsignedField(0, UINT16_MAX) {}
+};
+struct DwarfTagField : public MDUnsignedField {
+  DwarfTagField() : MDUnsignedField(0, dwarf::DW_TAG_hi_user) {}
+  DwarfTagField(dwarf::Tag DefaultTag)
+      : MDUnsignedField(DefaultTag, dwarf::DW_TAG_hi_user) {}
+};
+struct DwarfAttEncodingField : public MDUnsignedField {
+  DwarfAttEncodingField() : MDUnsignedField(0, dwarf::DW_ATE_hi_user) {}
+};
+struct DwarfVirtualityField : public MDUnsignedField {
+  DwarfVirtualityField() : MDUnsignedField(0, dwarf::DW_VIRTUALITY_max) {}
+};
+struct DwarfLangField : public MDUnsignedField {
+  DwarfLangField() : MDUnsignedField(0, dwarf::DW_LANG_hi_user) {}
+};
+
+struct DIFlagField : public MDUnsignedField {
+  DIFlagField() : MDUnsignedField(0, UINT32_MAX) {}
+};
+
+struct MDSignedField : public MDFieldImpl<int64_t> {
+  int64_t Min;
+  int64_t Max;
+
+  MDSignedField(int64_t Default = 0)
+      : ImplTy(Default), Min(INT64_MIN), Max(INT64_MAX) {}
+  MDSignedField(int64_t Default, int64_t Min, int64_t Max)
+      : ImplTy(Default), Min(Min), Max(Max) {}
+};
+
+struct MDBoolField : public MDFieldImpl<bool> {
+  MDBoolField(bool Default = false) : ImplTy(Default) {}
+};
+struct MDField : public MDFieldImpl<Metadata *> {
+  bool AllowNull;
+
+  MDField(bool AllowNull = true) : ImplTy(nullptr), AllowNull(AllowNull) {}
+};
+struct MDConstant : public MDFieldImpl<ConstantAsMetadata *> {
+  MDConstant() : ImplTy(nullptr) {}
+};
+struct MDStringField : public MDFieldImpl<MDString *> {
+  bool AllowEmpty;
+  MDStringField(bool AllowEmpty = true)
+      : ImplTy(nullptr), AllowEmpty(AllowEmpty) {}
+};
+struct MDFieldList : public MDFieldImpl<SmallVector<Metadata *, 4>> {
+  MDFieldList() : ImplTy(SmallVector<Metadata *, 4>()) {}
+};
+
+} // end namespace
+
+namespace llvm {
+
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+                            MDUnsignedField &Result) {
   if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
     return TokError("expected unsigned integer");
-  uint64_t Val64 = Lex.getAPSIntVal().getLimitedValue(Result.Max + 1ull);
 
-  if (Val64 > Result.Max)
+  auto &U = Lex.getAPSIntVal();
+  if (U.ugt(Result.Max))
     return TokError("value for '" + Name + "' too large, limit is " +
                     Twine(Result.Max));
-  Result.assign(Val64);
+  Result.assign(U.getZExtValue());
+  assert(Result.Val <= Result.Max && "Expected value in range");
   Lex.Lex();
   return false;
 }
 
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name, LineField &Result) {
+  return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+}
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name, ColumnField &Result) {
+  return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+}
+
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfTagField &Result) {
+  if (Lex.getKind() == lltok::APSInt)
+    return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+
+  if (Lex.getKind() != lltok::DwarfTag)
+    return TokError("expected DWARF tag");
+
+  unsigned Tag = dwarf::getTag(Lex.getStrVal());
+  if (Tag == dwarf::DW_TAG_invalid)
+    return TokError("invalid DWARF tag" + Twine(" '") + Lex.getStrVal() + "'");
+  assert(Tag <= Result.Max && "Expected valid DWARF tag");
+
+  Result.assign(Tag);
+  Lex.Lex();
+  return false;
+}
+
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+                            DwarfVirtualityField &Result) {
+  if (Lex.getKind() == lltok::APSInt)
+    return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+
+  if (Lex.getKind() != lltok::DwarfVirtuality)
+    return TokError("expected DWARF virtuality code");
+
+  unsigned Virtuality = dwarf::getVirtuality(Lex.getStrVal());
+  if (!Virtuality)
+    return TokError("invalid DWARF virtuality code" + Twine(" '") +
+                    Lex.getStrVal() + "'");
+  assert(Virtuality <= Result.Max && "Expected valid DWARF virtuality code");
+  Result.assign(Virtuality);
+  Lex.Lex();
+  return false;
+}
+
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfLangField &Result) {
+  if (Lex.getKind() == lltok::APSInt)
+    return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+
+  if (Lex.getKind() != lltok::DwarfLang)
+    return TokError("expected DWARF language");
+
+  unsigned Lang = dwarf::getLanguage(Lex.getStrVal());
+  if (!Lang)
+    return TokError("invalid DWARF language" + Twine(" '") + Lex.getStrVal() +
+                    "'");
+  assert(Lang <= Result.Max && "Expected valid DWARF language");
+  Result.assign(Lang);
+  Lex.Lex();
+  return false;
+}
+
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+                            DwarfAttEncodingField &Result) {
+  if (Lex.getKind() == lltok::APSInt)
+    return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+
+  if (Lex.getKind() != lltok::DwarfAttEncoding)
+    return TokError("expected DWARF type attribute encoding");
+
+  unsigned Encoding = dwarf::getAttributeEncoding(Lex.getStrVal());
+  if (!Encoding)
+    return TokError("invalid DWARF type attribute encoding" + Twine(" '") +
+                    Lex.getStrVal() + "'");
+  assert(Encoding <= Result.Max && "Expected valid DWARF language");
+  Result.assign(Encoding);
+  Lex.Lex();
+  return false;
+}
+
+/// DIFlagField
+///  ::= uint32
+///  ::= DIFlagVector
+///  ::= DIFlagVector '|' DIFlagFwdDecl '|' uint32 '|' DIFlagPublic
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DIFlagField &Result) {
+  assert(Result.Max == UINT32_MAX && "Expected only 32-bits");
+
+  // Parser for a single flag.
+  auto parseFlag = [&](unsigned &Val) {
+    if (Lex.getKind() == lltok::APSInt && !Lex.getAPSIntVal().isSigned())
+      return ParseUInt32(Val);
+
+    if (Lex.getKind() != lltok::DIFlag)
+      return TokError("expected debug info flag");
+
+    Val = DINode::getFlag(Lex.getStrVal());
+    if (!Val)
+      return TokError(Twine("invalid debug info flag flag '") +
+                      Lex.getStrVal() + "'");
+    Lex.Lex();
+    return false;
+  };
+
+  // Parse the flags and combine them together.
+  unsigned Combined = 0;
+  do {
+    unsigned Val;
+    if (parseFlag(Val))
+      return true;
+    Combined |= Val;
+  } while (EatIfPresent(lltok::bar));
+
+  Result.assign(Combined);
+  return false;
+}
+
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+                            MDSignedField &Result) {
+  if (Lex.getKind() != lltok::APSInt)
+    return TokError("expected signed integer");
+
+  auto &S = Lex.getAPSIntVal();
+  if (S < Result.Min)
+    return TokError("value for '" + Name + "' too small, limit is " +
+                    Twine(Result.Min));
+  if (S > Result.Max)
+    return TokError("value for '" + Name + "' too large, limit is " +
+                    Twine(Result.Max));
+  Result.assign(S.getExtValue());
+  assert(Result.Val >= Result.Min && "Expected value in range");
+  assert(Result.Val <= Result.Max && "Expected value in range");
+  Lex.Lex();
+  return false;
+}
+
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDBoolField &Result) {
+  switch (Lex.getKind()) {
+  default:
+    return TokError("expected 'true' or 'false'");
+  case lltok::kw_true:
+    Result.assign(true);
+    break;
+  case lltok::kw_false:
+    Result.assign(false);
+    break;
+  }
+  Lex.Lex();
+  return false;
+}
+
+template <>
 bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDField &Result) {
-  if (Result.Seen)
-    return Error(Loc,
-                 "field '" + Name + "' cannot be specified more than once");
+  if (Lex.getKind() == lltok::kw_null) {
+    if (!Result.AllowNull)
+      return TokError("'" + Name + "' cannot be null");
+    Lex.Lex();
+    Result.assign(nullptr);
+    return false;
+  }
 
   Metadata *MD;
   if (ParseMetadata(MD, nullptr))
@@ -2954,16 +3280,44 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDField &Result) {
   return false;
 }
 
-template <class ParserTy>
-bool LLParser::ParseMDFieldsImpl(ParserTy parseField) {
-  assert(Lex.getKind() == lltok::MetadataVar && "Expected metadata type name");
-  Lex.Lex();
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDConstant &Result) {
+  Metadata *MD;
+  if (ParseValueAsMetadata(MD, "expected constant", nullptr))
+    return true;
 
-  if (ParseToken(lltok::lparen, "expected '(' here"))
+  Result.assign(cast<ConstantAsMetadata>(MD));
+  return false;
+}
+
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDStringField &Result) {
+  LocTy ValueLoc = Lex.getLoc();
+  std::string S;
+  if (ParseStringConstant(S))
     return true;
-  if (EatIfPresent(lltok::rparen))
-    return false;
 
+  if (!Result.AllowEmpty && S.empty())
+    return Error(ValueLoc, "'" + Name + "' cannot be empty");
+
+  Result.assign(S.empty() ? nullptr : MDString::get(Context, S));
+  return false;
+}
+
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDFieldList &Result) {
+  SmallVector<Metadata *, 4> MDs;
+  if (ParseMDNodeVector(MDs))
+    return true;
+
+  Result.assign(std::move(MDs));
+  return false;
+}
+
+} // end namespace llvm
+
+template <class ParserTy>
+bool LLParser::ParseMDFieldsImplBody(ParserTy parseField) {
   do {
     if (Lex.getKind() != lltok::LabelStr)
       return TokError("expected field label here");
@@ -2972,56 +3326,502 @@ bool LLParser::ParseMDFieldsImpl(ParserTy parseField) {
       return true;
   } while (EatIfPresent(lltok::comma));
 
+  return false;
+}
+
+template <class ParserTy>
+bool LLParser::ParseMDFieldsImpl(ParserTy parseField, LocTy &ClosingLoc) {
+  assert(Lex.getKind() == lltok::MetadataVar && "Expected metadata type name");
+  Lex.Lex();
+
+  if (ParseToken(lltok::lparen, "expected '(' here"))
+    return true;
+  if (Lex.getKind() != lltok::rparen)
+    if (ParseMDFieldsImplBody(parseField))
+      return true;
+
+  ClosingLoc = Lex.getLoc();
   return ParseToken(lltok::rparen, "expected ')' here");
 }
 
+template <class FieldTy>
+bool LLParser::ParseMDField(StringRef Name, FieldTy &Result) {
+  if (Result.Seen)
+    return TokError("field '" + Name + "' cannot be specified more than once");
+
+  LocTy Loc = Lex.getLoc();
+  Lex.Lex();
+  return ParseMDField(Loc, Name, Result);
+}
+
 bool LLParser::ParseSpecializedMDNode(MDNode *&N, bool IsDistinct) {
   assert(Lex.getKind() == lltok::MetadataVar && "Expected metadata type name");
-#define DISPATCH_TO_PARSER(CLASS)                                              \
+
+#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS)                                  \
   if (Lex.getStrVal() == #CLASS)                                               \
     return Parse##CLASS(N, IsDistinct);
-
-  DISPATCH_TO_PARSER(MDLocation);
-#undef DISPATCH_TO_PARSER
+#include "llvm/IR/Metadata.def"
 
   return TokError("expected metadata type");
 }
 
-#define PARSE_MD_FIELD(NAME)                                                   \
+#define DECLARE_FIELD(NAME, TYPE, INIT) TYPE NAME INIT
+#define NOP_FIELD(NAME, TYPE, INIT)
+#define REQUIRE_FIELD(NAME, TYPE, INIT)                                        \
+  if (!NAME.Seen)                                                              \
+    return Error(ClosingLoc, "missing required field '" #NAME "'");
+#define PARSE_MD_FIELD(NAME, TYPE, DEFAULT)                                    \
+  if (Lex.getStrVal() == #NAME)                                                \
+    return ParseMDField(#NAME, NAME);
+#define PARSE_MD_FIELDS()                                                      \
+  VISIT_MD_FIELDS(DECLARE_FIELD, DECLARE_FIELD)                                \
   do {                                                                         \
-    if (Lex.getStrVal() == #NAME) {                                            \
-      LocTy Loc = Lex.getLoc();                                                \
-      Lex.Lex();                                                               \
-      if (ParseMDField(Loc, #NAME, NAME))                                      \
-        return true;                                                           \
-      return false;                                                            \
-    }                                                                          \
-  } while (0)
-
-/// ParseMDLocationFields:
-///   ::= !MDLocation(line: 43, column: 8, scope: !5, inlinedAt: !6)
-bool LLParser::ParseMDLocation(MDNode *&Result, bool IsDistinct) {
-  MDUnsignedField<uint32_t> line(0, ~0u >> 8);
-  MDUnsignedField<uint32_t> column(0, ~0u >> 24);
-  MDField scope;
-  MDField inlinedAt;
-  if (ParseMDFieldsImpl([&]() -> bool {
-        PARSE_MD_FIELD(line);
-        PARSE_MD_FIELD(column);
-        PARSE_MD_FIELD(scope);
-        PARSE_MD_FIELD(inlinedAt);
-        return TokError(Twine("invalid field '") + Lex.getStrVal() + "'");
-      }))
+    LocTy ClosingLoc;                                                          \
+    if (ParseMDFieldsImpl([&]() -> bool {                                      \
+      VISIT_MD_FIELDS(PARSE_MD_FIELD, PARSE_MD_FIELD)                          \
+      return TokError(Twine("invalid field '") + Lex.getStrVal() + "'");       \
+    }, ClosingLoc))                                                            \
+      return true;                                                             \
+    VISIT_MD_FIELDS(NOP_FIELD, REQUIRE_FIELD)                                  \
+  } while (false)
+#define GET_OR_DISTINCT(CLASS, ARGS)                                           \
+  (IsDistinct ? CLASS::getDistinct ARGS : CLASS::get ARGS)
+
+/// ParseDILocationFields:
+///   ::= !DILocation(line: 43, column: 8, scope: !5, inlinedAt: !6)
+bool LLParser::ParseDILocation(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  OPTIONAL(line, LineField, );                                                 \
+  OPTIONAL(column, ColumnField, );                                             \
+  REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
+  OPTIONAL(inlinedAt, MDField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(
+      DILocation, (Context, line.Val, column.Val, scope.Val, inlinedAt.Val));
+  return false;
+}
+
+/// ParseGenericDINode:
+///   ::= !GenericDINode(tag: 15, header: "...", operands: {...})
+bool LLParser::ParseGenericDINode(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(tag, DwarfTagField, );                                              \
+  OPTIONAL(header, MDStringField, );                                           \
+  OPTIONAL(operands, MDFieldList, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(GenericDINode,
+                           (Context, tag.Val, header.Val, operands.Val));
+  return false;
+}
+
+/// ParseDISubrange:
+///   ::= !DISubrange(count: 30, lowerBound: 2)
+bool LLParser::ParseDISubrange(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(count, MDSignedField, (-1, -1, INT64_MAX));                         \
+  OPTIONAL(lowerBound, MDSignedField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DISubrange, (Context, count.Val, lowerBound.Val));
+  return false;
+}
+
+/// ParseDIEnumerator:
+///   ::= !DIEnumerator(value: 30, name: "SomeKind")
+bool LLParser::ParseDIEnumerator(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(name, MDStringField, );                                             \
+  REQUIRED(value, MDSignedField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DIEnumerator, (Context, value.Val, name.Val));
+  return false;
+}
+
+/// ParseDIBasicType:
+///   ::= !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32)
+bool LLParser::ParseDIBasicType(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_base_type));                     \
+  OPTIONAL(name, MDStringField, );                                             \
+  OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX));                            \
+  OPTIONAL(align, MDUnsignedField, (0, UINT64_MAX));                           \
+  OPTIONAL(encoding, DwarfAttEncodingField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DIBasicType, (Context, tag.Val, name.Val, size.Val,
+                                         align.Val, encoding.Val));
+  return false;
+}
+
+/// ParseDIDerivedType:
+///   ::= !DIDerivedType(tag: DW_TAG_pointer_type, name: "int", file: !0,
+///                      line: 7, scope: !1, baseType: !2, size: 32,
+///                      align: 32, offset: 0, flags: 0, extraData: !3)
+bool LLParser::ParseDIDerivedType(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(tag, DwarfTagField, );                                              \
+  OPTIONAL(name, MDStringField, );                                             \
+  OPTIONAL(file, MDField, );                                                   \
+  OPTIONAL(line, LineField, );                                                 \
+  OPTIONAL(scope, MDField, );                                                  \
+  REQUIRED(baseType, MDField, );                                               \
+  OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX));                            \
+  OPTIONAL(align, MDUnsignedField, (0, UINT64_MAX));                           \
+  OPTIONAL(offset, MDUnsignedField, (0, UINT64_MAX));                          \
+  OPTIONAL(flags, DIFlagField, );                                              \
+  OPTIONAL(extraData, MDField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DIDerivedType,
+                           (Context, tag.Val, name.Val, file.Val, line.Val,
+                            scope.Val, baseType.Val, size.Val, align.Val,
+                            offset.Val, flags.Val, extraData.Val));
+  return false;
+}
+
+bool LLParser::ParseDICompositeType(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(tag, DwarfTagField, );                                              \
+  OPTIONAL(name, MDStringField, );                                             \
+  OPTIONAL(file, MDField, );                                                   \
+  OPTIONAL(line, LineField, );                                                 \
+  OPTIONAL(scope, MDField, );                                                  \
+  OPTIONAL(baseType, MDField, );                                               \
+  OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX));                            \
+  OPTIONAL(align, MDUnsignedField, (0, UINT64_MAX));                           \
+  OPTIONAL(offset, MDUnsignedField, (0, UINT64_MAX));                          \
+  OPTIONAL(flags, DIFlagField, );                                              \
+  OPTIONAL(elements, MDField, );                                               \
+  OPTIONAL(runtimeLang, DwarfLangField, );                                     \
+  OPTIONAL(vtableHolder, MDField, );                                           \
+  OPTIONAL(templateParams, MDField, );                                         \
+  OPTIONAL(identifier, MDStringField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(
+      DICompositeType,
+      (Context, tag.Val, name.Val, file.Val, line.Val, scope.Val, baseType.Val,
+       size.Val, align.Val, offset.Val, flags.Val, elements.Val,
+       runtimeLang.Val, vtableHolder.Val, templateParams.Val, identifier.Val));
+  return false;
+}
+
+bool LLParser::ParseDISubroutineType(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  OPTIONAL(flags, DIFlagField, );                                              \
+  REQUIRED(types, MDField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DISubroutineType, (Context, flags.Val, types.Val));
+  return false;
+}
+
+/// ParseDIFileType:
+///   ::= !DIFileType(filename: "path/to/file", directory: "/path/to/dir")
+bool LLParser::ParseDIFile(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(filename, MDStringField, );                                         \
+  REQUIRED(directory, MDStringField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DIFile, (Context, filename.Val, directory.Val));
+  return false;
+}
+
+/// ParseDICompileUnit:
+///   ::= !DICompileUnit(language: DW_LANG_C99, file: !0, producer: "clang",
+///                      isOptimized: true, flags: "-O2", runtimeVersion: 1,
+///                      splitDebugFilename: "abc.debug", emissionKind: 1,
+///                      enums: !1, retainedTypes: !2, subprograms: !3,
+///                      globals: !4, imports: !5, dwoId: 0x0abcd)
+bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(language, DwarfLangField, );                                        \
+  REQUIRED(file, MDField, (/* AllowNull */ false));                            \
+  OPTIONAL(producer, MDStringField, );                                         \
+  OPTIONAL(isOptimized, MDBoolField, );                                        \
+  OPTIONAL(flags, MDStringField, );                                            \
+  OPTIONAL(runtimeVersion, MDUnsignedField, (0, UINT32_MAX));                  \
+  OPTIONAL(splitDebugFilename, MDStringField, );                               \
+  OPTIONAL(emissionKind, MDUnsignedField, (0, UINT32_MAX));                    \
+  OPTIONAL(enums, MDField, );                                                  \
+  OPTIONAL(retainedTypes, MDField, );                                          \
+  OPTIONAL(subprograms, MDField, );                                            \
+  OPTIONAL(globals, MDField, );                                                \
+  OPTIONAL(imports, MDField, );                                                \
+  OPTIONAL(dwoId, MDUnsignedField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DICompileUnit,
+                           (Context, language.Val, file.Val, producer.Val,
+                            isOptimized.Val, flags.Val, runtimeVersion.Val,
+                            splitDebugFilename.Val, emissionKind.Val, enums.Val,
+                            retainedTypes.Val, subprograms.Val, globals.Val,
+                            imports.Val, dwoId.Val));
+  return false;
+}
+
+/// ParseDISubprogram:
+///   ::= !DISubprogram(scope: !0, name: "foo", linkageName: "_Zfoo",
+///                     file: !1, line: 7, type: !2, isLocal: false,
+///                     isDefinition: true, scopeLine: 8, containingType: !3,
+///                     virtuality: DW_VIRTUALTIY_pure_virtual,
+///                     virtualIndex: 10, flags: 11,
+///                     isOptimized: false, function: void ()* @_Z3foov,
+///                     templateParams: !4, declaration: !5, variables: !6)
+bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  OPTIONAL(scope, MDField, );                                                  \
+  OPTIONAL(name, MDStringField, );                                             \
+  OPTIONAL(linkageName, MDStringField, );                                      \
+  OPTIONAL(file, MDField, );                                                   \
+  OPTIONAL(line, LineField, );                                                 \
+  OPTIONAL(type, MDField, );                                                   \
+  OPTIONAL(isLocal, MDBoolField, );                                            \
+  OPTIONAL(isDefinition, MDBoolField, (true));                                 \
+  OPTIONAL(scopeLine, LineField, );                                            \
+  OPTIONAL(containingType, MDField, );                                         \
+  OPTIONAL(virtuality, DwarfVirtualityField, );                                \
+  OPTIONAL(virtualIndex, MDUnsignedField, (0, UINT32_MAX));                    \
+  OPTIONAL(flags, DIFlagField, );                                              \
+  OPTIONAL(isOptimized, MDBoolField, );                                        \
+  OPTIONAL(function, MDConstant, );                                            \
+  OPTIONAL(templateParams, MDField, );                                         \
+  OPTIONAL(declaration, MDField, );                                            \
+  OPTIONAL(variables, MDField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(
+      DISubprogram, (Context, scope.Val, name.Val, linkageName.Val, file.Val,
+                     line.Val, type.Val, isLocal.Val, isDefinition.Val,
+                     scopeLine.Val, containingType.Val, virtuality.Val,
+                     virtualIndex.Val, flags.Val, isOptimized.Val, function.Val,
+                     templateParams.Val, declaration.Val, variables.Val));
+  return false;
+}
+
+/// ParseDILexicalBlock:
+///   ::= !DILexicalBlock(scope: !0, file: !2, line: 7, column: 9)
+bool LLParser::ParseDILexicalBlock(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
+  OPTIONAL(file, MDField, );                                                   \
+  OPTIONAL(line, LineField, );                                                 \
+  OPTIONAL(column, ColumnField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(
+      DILexicalBlock, (Context, scope.Val, file.Val, line.Val, column.Val));
+  return false;
+}
+
+/// ParseDILexicalBlockFile:
+///   ::= !DILexicalBlockFile(scope: !0, file: !2, discriminator: 9)
+bool LLParser::ParseDILexicalBlockFile(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
+  OPTIONAL(file, MDField, );                                                   \
+  REQUIRED(discriminator, MDUnsignedField, (0, UINT32_MAX));
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DILexicalBlockFile,
+                           (Context, scope.Val, file.Val, discriminator.Val));
+  return false;
+}
+
+/// ParseDINamespace:
+///   ::= !DINamespace(scope: !0, file: !2, name: "SomeNamespace", line: 9)
+bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(scope, MDField, );                                                  \
+  OPTIONAL(file, MDField, );                                                   \
+  OPTIONAL(name, MDStringField, );                                             \
+  OPTIONAL(line, LineField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DINamespace,
+                           (Context, scope.Val, file.Val, name.Val, line.Val));
+  return false;
+}
+
+/// ParseDITemplateTypeParameter:
+///   ::= !DITemplateTypeParameter(name: "Ty", type: !1)
+bool LLParser::ParseDITemplateTypeParameter(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  OPTIONAL(name, MDStringField, );                                             \
+  REQUIRED(type, MDField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result =
+      GET_OR_DISTINCT(DITemplateTypeParameter, (Context, name.Val, type.Val));
+  return false;
+}
+
+/// ParseDITemplateValueParameter:
+///   ::= !DITemplateValueParameter(tag: DW_TAG_template_value_parameter,
+///                                 name: "V", type: !1, value: i32 7)
+bool LLParser::ParseDITemplateValueParameter(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_template_value_parameter));      \
+  OPTIONAL(name, MDStringField, );                                             \
+  OPTIONAL(type, MDField, );                                                   \
+  REQUIRED(value, MDField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DITemplateValueParameter,
+                           (Context, tag.Val, name.Val, type.Val, value.Val));
+  return false;
+}
+
+/// ParseDIGlobalVariable:
+///   ::= !DIGlobalVariable(scope: !0, name: "foo", linkageName: "foo",
+///                         file: !1, line: 7, type: !2, isLocal: false,
+///                         isDefinition: true, variable: i32* @foo,
+///                         declaration: !3)
+bool LLParser::ParseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(name, MDStringField, (/* AllowEmpty */ false));                     \
+  OPTIONAL(scope, MDField, );                                                  \
+  OPTIONAL(linkageName, MDStringField, );                                      \
+  OPTIONAL(file, MDField, );                                                   \
+  OPTIONAL(line, LineField, );                                                 \
+  OPTIONAL(type, MDField, );                                                   \
+  OPTIONAL(isLocal, MDBoolField, );                                            \
+  OPTIONAL(isDefinition, MDBoolField, (true));                                 \
+  OPTIONAL(variable, MDConstant, );                                            \
+  OPTIONAL(declaration, MDField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DIGlobalVariable,
+                           (Context, scope.Val, name.Val, linkageName.Val,
+                            file.Val, line.Val, type.Val, isLocal.Val,
+                            isDefinition.Val, variable.Val, declaration.Val));
+  return false;
+}
+
+/// ParseDILocalVariable:
+///   ::= !DILocalVariable(tag: DW_TAG_arg_variable, scope: !0, name: "foo",
+///                        file: !1, line: 7, type: !2, arg: 2, flags: 7)
+bool LLParser::ParseDILocalVariable(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(tag, DwarfTagField, );                                              \
+  REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
+  OPTIONAL(name, MDStringField, );                                             \
+  OPTIONAL(file, MDField, );                                                   \
+  OPTIONAL(line, LineField, );                                                 \
+  OPTIONAL(type, MDField, );                                                   \
+  OPTIONAL(arg, MDUnsignedField, (0, UINT8_MAX));                              \
+  OPTIONAL(flags, DIFlagField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DILocalVariable,
+                           (Context, tag.Val, scope.Val, name.Val, file.Val,
+                            line.Val, type.Val, arg.Val, flags.Val));
+  return false;
+}
+
+/// ParseDIExpression:
+///   ::= !DIExpression(0, 7, -1)
+bool LLParser::ParseDIExpression(MDNode *&Result, bool IsDistinct) {
+  assert(Lex.getKind() == lltok::MetadataVar && "Expected metadata type name");
+  Lex.Lex();
+
+  if (ParseToken(lltok::lparen, "expected '(' here"))
+    return true;
+
+  SmallVector<uint64_t, 8> Elements;
+  if (Lex.getKind() != lltok::rparen)
+    do {
+      if (Lex.getKind() == lltok::DwarfOp) {
+        if (unsigned Op = dwarf::getOperationEncoding(Lex.getStrVal())) {
+          Lex.Lex();
+          Elements.push_back(Op);
+          continue;
+        }
+        return TokError(Twine("invalid DWARF op '") + Lex.getStrVal() + "'");
+      }
+
+      if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
+        return TokError("expected unsigned integer");
+
+      auto &U = Lex.getAPSIntVal();
+      if (U.ugt(UINT64_MAX))
+        return TokError("element too large, limit is " + Twine(UINT64_MAX));
+      Elements.push_back(U.getZExtValue());
+      Lex.Lex();
+    } while (EatIfPresent(lltok::comma));
+
+  if (ParseToken(lltok::rparen, "expected ')' here"))
     return true;
 
-  if (!scope.Seen)
-    return TokError("missing required field 'scope'");
+  Result = GET_OR_DISTINCT(DIExpression, (Context, Elements));
+  return false;
+}
 
-  auto get = (IsDistinct ? MDLocation::getDistinct : MDLocation::get);
-  Result = get(Context, line.Val, column.Val, scope.Val, inlinedAt.Val);
+/// ParseDIObjCProperty:
+///   ::= !DIObjCProperty(name: "foo", file: !1, line: 7, setter: "setFoo",
+///                       getter: "getFoo", attributes: 7, type: !2)
+bool LLParser::ParseDIObjCProperty(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  OPTIONAL(name, MDStringField, );                                             \
+  OPTIONAL(file, MDField, );                                                   \
+  OPTIONAL(line, LineField, );                                                 \
+  OPTIONAL(setter, MDStringField, );                                           \
+  OPTIONAL(getter, MDStringField, );                                           \
+  OPTIONAL(attributes, MDUnsignedField, (0, UINT32_MAX));                      \
+  OPTIONAL(type, MDField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DIObjCProperty,
+                           (Context, name.Val, file.Val, line.Val, setter.Val,
+                            getter.Val, attributes.Val, type.Val));
   return false;
 }
+
+/// ParseDIImportedEntity:
+///   ::= !DIImportedEntity(tag: DW_TAG_imported_module, scope: !0, entity: !1,
+///                         line: 7, name: "foo")
+bool LLParser::ParseDIImportedEntity(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(tag, DwarfTagField, );                                              \
+  REQUIRED(scope, MDField, );                                                  \
+  OPTIONAL(entity, MDField, );                                                 \
+  OPTIONAL(line, LineField, );                                                 \
+  OPTIONAL(name, MDStringField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DIImportedEntity, (Context, tag.Val, scope.Val,
+                                              entity.Val, line.Val, name.Val));
+  return false;
+}
+
 #undef PARSE_MD_FIELD
+#undef NOP_FIELD
+#undef REQUIRE_FIELD
+#undef DECLARE_FIELD
 
 /// ParseMetadataAsValue
 ///  ::= metadata i32 %local
@@ -3044,10 +3844,11 @@ bool LLParser::ParseMetadataAsValue(Value *&V, PerFunctionState &PFS) {
 ///  ::= i32 %local
 ///  ::= i32 @global
 ///  ::= i32 7
-bool LLParser::ParseValueAsMetadata(Metadata *&MD, PerFunctionState *PFS) {
+bool LLParser::ParseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
+                                    PerFunctionState *PFS) {
   Type *Ty;
   LocTy Loc;
-  if (ParseType(Ty, "expected metadata operand", Loc))
+  if (ParseType(Ty, TypeMsg, Loc))
     return true;
   if (Ty->isMetadataTy())
     return Error(Loc, "invalid metadata-value-metadata roundtrip");
@@ -3067,7 +3868,7 @@ bool LLParser::ParseValueAsMetadata(Metadata *&MD, PerFunctionState *PFS) {
 ///  ::= !42
 ///  ::= !{...}
 ///  ::= !"string"
-///  ::= !MDLocation(...)
+///  ::= !DILocation(...)
 bool LLParser::ParseMetadata(Metadata *&MD, PerFunctionState *PFS) {
   if (Lex.getKind() == lltok::MetadataVar) {
     MDNode *N;
@@ -3080,7 +3881,7 @@ bool LLParser::ParseMetadata(Metadata *&MD, PerFunctionState *PFS) {
   // ValueAsMetadata:
   // <type> <value>
   if (Lex.getKind() != lltok::exclaim)
-    return ParseValueAsMetadata(MD, PFS);
+    return ParseValueAsMetadata(MD, "expected metadata operand", PFS);
 
   // '!'.
   assert(Lex.getKind() == lltok::exclaim && "Expected '!' here");
@@ -3570,7 +4371,9 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
   }
 
   BasicBlock *BB = PFS.DefineBB(Name, NameLoc);
-  if (!BB) return true;
+  if (!BB)
+    return Error(NameLoc,
+                 "unable to create block named '" + Name + "'");
 
   std::string NameStr;
 
@@ -3604,7 +4407,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
       // With a normal result, we check to see if the instruction is followed by
       // a comma and metadata.
       if (EatIfPresent(lltok::comma))
-        if (ParseInstructionMetadata(Inst, &PFS))
+        if (ParseInstructionMetadata(*Inst))
           return true;
       break;
     case InstExtraComma:
@@ -3612,7 +4415,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
 
       // If the instruction parser ate an extra comma at the end of it, it
       // *must* be followed by metadata.
-      if (ParseInstructionMetadata(Inst, &PFS))
+      if (ParseInstructionMetadata(*Inst))
         return true;
       break;
     }
@@ -3958,10 +4761,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   // If RetType is a non-function pointer type, then this is the short syntax
   // for the call, which means that RetType is just the return type.  Infer the
   // rest of the function argument types from the arguments that are present.
-  PointerType *PFTy = nullptr;
-  FunctionType *Ty = nullptr;
-  if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
-      !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+  FunctionType *Ty = dyn_cast<FunctionType>(RetType);
+  if (!Ty) {
     // Pull out the types of all of the arguments...
     std::vector<Type*> ParamTypes;
     for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
@@ -3971,12 +4772,12 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
       return Error(RetTypeLoc, "Invalid result type for LLVM function");
 
     Ty = FunctionType::get(RetType, ParamTypes, false);
-    PFTy = PointerType::getUnqual(Ty);
   }
 
   // Look up the callee.
   Value *Callee;
-  if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
+  if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS))
+    return true;
 
   // Set up the Attribute for the function.
   SmallVector<AttributeSet, 8> Attrs;
@@ -4012,15 +4813,19 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   if (I != E)
     return Error(CallLoc, "not enough parameters specified for call");
 
-  if (FnAttrs.hasAttributes())
+  if (FnAttrs.hasAttributes()) {
+    if (FnAttrs.hasAlignmentAttr())
+      return Error(CallLoc, "invoke instructions may not have an alignment");
+
     Attrs.push_back(AttributeSet::get(RetType->getContext(),
                                       AttributeSet::FunctionIndex,
                                       FnAttrs));
+  }
 
   // Finish off the Attribute and check them
   AttributeSet PAL = AttributeSet::get(Context, Attrs);
 
-  InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args);
+  InvokeInst *II = InvokeInst::Create(Ty, Callee, NormalBB, UnwindBB, Args);
   II->setCallingConv(CC);
   II->setAttributes(PAL);
   ForwardRefAttrGroups[II] = FwdRefAttrGrps;
@@ -4301,7 +5106,7 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
       ParseTypeAndValue(PersFn, PersFnLoc, PFS))
     return true;
 
-  LandingPadInst *LP = LandingPadInst::Create(Ty, PersFn, 0);
+  std::unique_ptr<LandingPadInst> LP(LandingPadInst::Create(Ty, PersFn, 0));
   LP->setCleanup(EatIfPresent(lltok::kw_cleanup));
 
   while (Lex.getKind() == lltok::kw_catch || Lex.getKind() == lltok::kw_filter){
@@ -4315,10 +5120,8 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
 
     Value *V;
     LocTy VLoc;
-    if (ParseTypeAndValue(V, VLoc, PFS)) {
-      delete LP;
+    if (ParseTypeAndValue(V, VLoc, PFS))
       return true;
-    }
 
     // A 'catch' type expects a non-array constant. A filter clause expects an
     // array constant.
@@ -4330,10 +5133,13 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
         Error(VLoc, "'filter' clause has an invalid type");
     }
 
-    LP->addClause(cast<Constant>(V));
+    Constant *CV = dyn_cast<Constant>(V);
+    if (!CV)
+      return Error(VLoc, "clause argument must be a constant");
+    LP->addClause(CV);
   }
 
-  Inst = LP;
+  Inst = LP.release();
   return false;
 }
 
@@ -4371,10 +5177,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   // If RetType is a non-function pointer type, then this is the short syntax
   // for the call, which means that RetType is just the return type.  Infer the
   // rest of the function argument types from the arguments that are present.
-  PointerType *PFTy = nullptr;
-  FunctionType *Ty = nullptr;
-  if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
-      !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+  FunctionType *Ty = dyn_cast<FunctionType>(RetType);
+  if (!Ty) {
     // Pull out the types of all of the arguments...
     std::vector<Type*> ParamTypes;
     for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
@@ -4384,12 +5188,12 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
       return Error(RetTypeLoc, "Invalid result type for LLVM function");
 
     Ty = FunctionType::get(RetType, ParamTypes, false);
-    PFTy = PointerType::getUnqual(Ty);
   }
 
   // Look up the callee.
   Value *Callee;
-  if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
+  if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS))
+    return true;
 
   // Set up the Attribute for the function.
   SmallVector<AttributeSet, 8> Attrs;
@@ -4425,15 +5229,19 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   if (I != E)
     return Error(CallLoc, "not enough parameters specified for call");
 
-  if (FnAttrs.hasAttributes())
+  if (FnAttrs.hasAttributes()) {
+    if (FnAttrs.hasAlignmentAttr())
+      return Error(CallLoc, "call instructions may not have an alignment");
+
     Attrs.push_back(AttributeSet::get(RetType->getContext(),
                                       AttributeSet::FunctionIndex,
                                       FnAttrs));
+  }
 
   // Finish off the Attribute and check them
   AttributeSet PAL = AttributeSet::get(Context, Attrs);
 
-  CallInst *CI = CallInst::Create(Callee, Args);
+  CallInst *CI = CallInst::Create(Ty, Callee, Args);
   CI->setTailCallKind(TCK);
   CI->setCallingConv(CC);
   CI->setAttributes(PAL);
@@ -4450,13 +5258,16 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
 ///   ::= 'alloca' 'inalloca'? Type (',' TypeAndValue)? (',' 'align' i32)?
 int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Size = nullptr;
-  LocTy SizeLoc;
+  LocTy SizeLoc, TyLoc;
   unsigned Alignment = 0;
   Type *Ty = nullptr;
 
   bool IsInAlloca = EatIfPresent(lltok::kw_inalloca);
 
-  if (ParseType(Ty)) return true;
+  if (ParseType(Ty, TyLoc)) return true;
+
+  if (Ty->isFunctionTy() || !PointerType::isValidElementType(Ty))
+    return Error(TyLoc, "invalid type for alloca");
 
   bool AteExtraComma = false;
   if (EatIfPresent(lltok::comma)) {
@@ -4503,20 +5314,27 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
     Lex.Lex();
   }
 
-  if (ParseTypeAndValue(Val, Loc, PFS) ||
+  Type *Ty;
+  LocTy ExplicitTypeLoc = Lex.getLoc();
+  if (ParseType(Ty) ||
+      ParseToken(lltok::comma, "expected comma after load's type") ||
+      ParseTypeAndValue(Val, Loc, PFS) ||
       ParseScopeAndOrdering(isAtomic, Scope, Ordering) ||
       ParseOptionalCommaAlign(Alignment, AteExtraComma))
     return true;
 
-  if (!Val->getType()->isPointerTy() ||
-      !cast<PointerType>(Val->getType())->getElementType()->isFirstClassType())
+  if (!Val->getType()->isPointerTy() || !Ty->isFirstClassType())
     return Error(Loc, "load operand must be a pointer to a first class type");
   if (isAtomic && !Alignment)
     return Error(Loc, "atomic load must have explicit non-zero alignment");
   if (Ordering == Release || Ordering == AcquireRelease)
     return Error(Loc, "atomic load cannot use Release ordering");
 
-  Inst = new LoadInst(Val, "", isVolatile, Alignment, Ordering, Scope);
+  if (Ty != cast<PointerType>(Val->getType())->getElementType())
+    return Error(ExplicitTypeLoc,
+                 "explicit pointee type doesn't match operand's pointee type");
+
+  Inst = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, Scope);
   return AteExtraComma ? InstExtraComma : InstNormal;
 }
 
@@ -4702,13 +5520,22 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
 
   bool InBounds = EatIfPresent(lltok::kw_inbounds);
 
-  if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
+  Type *Ty = nullptr;
+  LocTy ExplicitTypeLoc = Lex.getLoc();
+  if (ParseType(Ty) ||
+      ParseToken(lltok::comma, "expected comma after getelementptr's type") ||
+      ParseTypeAndValue(Ptr, Loc, PFS))
+    return true;
 
   Type *BaseType = Ptr->getType();
   PointerType *BasePointerType = dyn_cast<PointerType>(BaseType->getScalarType());
   if (!BasePointerType)
     return Error(Loc, "base of getelementptr must be a pointer");
 
+  if (Ty != BasePointerType->getElementType())
+    return Error(ExplicitTypeLoc,
+                 "explicit pointee type doesn't match operand's pointee type");
+
   SmallVector<Value*, 16> Indices;
   bool AteExtraComma = false;
   while (EatIfPresent(lltok::comma)) {
@@ -4731,12 +5558,13 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
     Indices.push_back(Val);
   }
 
-  if (!Indices.empty() && !BasePointerType->getElementType()->isSized())
+  SmallPtrSet<const Type*, 4> Visited;
+  if (!Indices.empty() && !Ty->isSized(&Visited))
     return Error(Loc, "base element of getelementptr must be sized");
 
-  if (!GetElementPtrInst::getIndexedType(BaseType, Indices))
+  if (!GetElementPtrInst::getIndexedType(Ty, Indices))
     return Error(Loc, "invalid getelementptr indices");
-  Inst = GetElementPtrInst::Create(Ptr, Indices);
+  Inst = GetElementPtrInst::Create(Ty, Ptr, Indices);
   if (InBounds)
     cast<GetElementPtrInst>(Inst)->setIsInBounds(true);
   return AteExtraComma ? InstExtraComma : InstNormal;
@@ -4776,8 +5604,13 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
   if (!Val0->getType()->isAggregateType())
     return Error(Loc0, "insertvalue operand must be aggregate type");
 
-  if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices))
+  Type *IndexedType = ExtractValueInst::getIndexedType(Val0->getType(), Indices);
+  if (!IndexedType)
     return Error(Loc0, "invalid indices for insertvalue");
+  if (IndexedType != Val1->getType())
+    return Error(Loc1, "insertvalue operand and field disagree in type: '" +
+                           getTypeString(Val1->getType()) + "' instead of '" +
+                           getTypeString(IndexedType) + "'");
   Inst = InsertValueInst::Create(Val0, Val1, Indices);
   return AteExtraComma ? InstExtraComma : InstNormal;
 }
diff --git a/contrib/llvm/lib/AsmParser/LLParser.h b/contrib/llvm/lib/AsmParser/LLParser.h
index 220562d..a43a4b0 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.h
+++ b/contrib/llvm/lib/AsmParser/LLParser.h
@@ -80,31 +80,6 @@ namespace llvm {
     }
   };
 
-  /// Structure to represent an optional metadata field.
-  template <class FieldTy> struct MDFieldImpl {
-    typedef MDFieldImpl ImplTy;
-    FieldTy Val;
-    bool Seen;
-
-    void assign(FieldTy Val) {
-      Seen = true;
-      this->Val = Val;
-    }
-
-    explicit MDFieldImpl(FieldTy Default) : Val(Default), Seen(false) {}
-  };
-  template <class NumTy> struct MDUnsignedField : public MDFieldImpl<NumTy> {
-    typedef typename MDUnsignedField::ImplTy ImplTy;
-    NumTy Max;
-
-    MDUnsignedField(NumTy Default = 0,
-                    NumTy Max = std::numeric_limits<NumTy>::max())
-        : ImplTy(Default), Max(Max) {}
-  };
-  struct MDField : public MDFieldImpl<Metadata *> {
-    MDField() : ImplTy(nullptr) {}
-  };
-
   class LLParser {
   public:
     typedef LLLexer::LocTy LocTy;
@@ -133,10 +108,10 @@ namespace llvm {
     // Type resolution handling data structures.  The location is set when we
     // have processed a use of the type but not a definition yet.
     StringMap<std::pair<Type*, LocTy> > NamedTypes;
-    std::vector<std::pair<Type*, LocTy> > NumberedTypes;
+    std::map<unsigned, std::pair<Type*, LocTy> > NumberedTypes;
 
-    std::vector<TrackingMDNodeRef> NumberedMetadata;
-    std::map<unsigned, std::pair<MDNodeFwdDecl *, LocTy>> ForwardRefMDNodes;
+    std::map<unsigned, TrackingMDNodeRef> NumberedMetadata;
+    std::map<unsigned, std::pair<TempMDTuple, LocTy>> ForwardRefMDNodes;
 
     // Global Value reference information.
     std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
@@ -248,7 +223,7 @@ namespace llvm {
     bool ParseOptionalDLLStorageClass(unsigned &DLLStorageClass);
     bool ParseOptionalCallingConv(unsigned &CC);
     bool ParseOptionalAlignment(unsigned &Alignment);
-    bool ParseOptionalDereferenceableBytes(uint64_t &Bytes);
+    bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes);
     bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
                                AtomicOrdering &Ordering);
     bool ParseOrdering(AtomicOrdering &Ordering);
@@ -410,20 +385,29 @@ namespace llvm {
     bool ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts);
     bool parseOptionalComdat(StringRef GlobalName, Comdat *&C);
     bool ParseMetadataAsValue(Value *&V, PerFunctionState &PFS);
-    bool ParseValueAsMetadata(Metadata *&MD, PerFunctionState *PFS);
+    bool ParseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
+                              PerFunctionState *PFS);
     bool ParseMetadata(Metadata *&MD, PerFunctionState *PFS);
     bool ParseMDTuple(MDNode *&MD, bool IsDistinct = false);
     bool ParseMDNode(MDNode *&MD);
     bool ParseMDNodeTail(MDNode *&MD);
     bool ParseMDNodeVector(SmallVectorImpl<Metadata *> &MDs);
-    bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
-
-    bool ParseMDField(LocTy Loc, StringRef Name,
-                      MDUnsignedField<uint32_t> &Result);
-    bool ParseMDField(LocTy Loc, StringRef Name, MDField &Result);
-    template <class ParserTy> bool ParseMDFieldsImpl(ParserTy parseField);
+    bool ParseMetadataAttachment(unsigned &Kind, MDNode *&MD);
+    bool ParseInstructionMetadata(Instruction &Inst);
+    bool ParseOptionalFunctionMetadata(Function &F);
+
+    template <class FieldTy>
+    bool ParseMDField(LocTy Loc, StringRef Name, FieldTy &Result);
+    template <class FieldTy> bool ParseMDField(StringRef Name, FieldTy &Result);
+    template <class ParserTy>
+    bool ParseMDFieldsImplBody(ParserTy parseField);
+    template <class ParserTy>
+    bool ParseMDFieldsImpl(ParserTy parseField, LocTy &ClosingLoc);
     bool ParseSpecializedMDNode(MDNode *&N, bool IsDistinct = false);
-    bool ParseMDLocation(MDNode *&Result, bool IsDistinct);
+
+#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS)                                  \
+  bool Parse##CLASS(MDNode *&Result, bool IsDistinct);
+#include "llvm/IR/Metadata.def"
 
     // Function Parsing.
     struct ArgInfo {
diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h
index 4fb0182..c47f5e1 100644
--- a/contrib/llvm/lib/AsmParser/LLToken.h
+++ b/contrib/llvm/lib/AsmParser/LLToken.h
@@ -28,9 +28,8 @@ namespace lltok {
     lbrace, rbrace,    // {  }
     less, greater,     // <  >
     lparen, rparen,    // (  )
-    backslash,         // \    (not /)
     exclaim,           // !
-    hash,              // #
+    bar,               // |
 
     kw_x,
     kw_true,    kw_false,
@@ -106,7 +105,9 @@ namespace lltok {
     kw_byval,
     kw_inalloca,
     kw_cold,
+    kw_convergent,
     kw_dereferenceable,
+    kw_dereferenceable_or_null,
     kw_inlinehint,
     kw_inreg,
     kw_jumptable,
@@ -200,6 +201,12 @@ namespace lltok {
     LocalVar,          // %foo %"foo"
     MetadataVar,       // !foo
     StringConstant,    // "foo"
+    DwarfTag,          // DW_TAG_foo
+    DwarfAttEncoding,  // DW_ATE_foo
+    DwarfVirtuality,   // DW_VIRTUALITY_foo
+    DwarfLang,         // DW_LANG_foo
+    DwarfOp,           // DW_OP_foo
+    DIFlag,            // DIFlagFoo
 
     // Type valued tokens (TyVal).
     Type,
diff --git a/contrib/llvm/lib/AsmParser/Parser.cpp b/contrib/llvm/lib/AsmParser/Parser.cpp
index 0815907..c55a6a1 100644
--- a/contrib/llvm/lib/AsmParser/Parser.cpp
+++ b/contrib/llvm/lib/AsmParser/Parser.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/AsmParser/Parser.h"
 #include "LLParser.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
@@ -23,7 +24,7 @@ using namespace llvm;
 
 bool llvm::parseAssemblyInto(MemoryBufferRef F, Module &M, SMDiagnostic &Err) {
   SourceMgr SM;
-  std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(F, false);
+  std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(F);
   SM.AddNewSourceBuffer(std::move(Buf), SMLoc());
 
   return LLParser(F.getBuffer(), SM, Err, &M).Run();
@@ -38,7 +39,7 @@ std::unique_ptr<Module> llvm::parseAssembly(MemoryBufferRef F,
   if (parseAssemblyInto(F, *M, Err))
     return nullptr;
 
-  return std::move(M);
+  return M;
 }
 
 std::unique_ptr<Module> llvm::parseAssemblyFile(StringRef Filename,
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 3d9546f..f6d5ccc 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -8,32 +8,380 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "BitcodeReader.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/OperandTraits.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/DataStream.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
-
+#include <deque>
 using namespace llvm;
 
+namespace {
 enum {
   SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
 };
 
+class BitcodeReaderValueList {
+  std::vector<WeakVH> ValuePtrs;
+
+  /// ResolveConstants - As we resolve forward-referenced constants, we add
+  /// information about them to this vector.  This allows us to resolve them in
+  /// bulk instead of resolving each reference at a time.  See the code in
+  /// ResolveConstantForwardRefs for more information about this.
+  ///
+  /// The key of this vector is the placeholder constant, the value is the slot
+  /// number that holds the resolved value.
+  typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
+  ResolveConstantsTy ResolveConstants;
+  LLVMContext &Context;
+public:
+  BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
+  ~BitcodeReaderValueList() {
+    assert(ResolveConstants.empty() && "Constants not resolved?");
+  }
+
+  // vector compatibility methods
+  unsigned size() const { return ValuePtrs.size(); }
+  void resize(unsigned N) { ValuePtrs.resize(N); }
+  void push_back(Value *V) {
+    ValuePtrs.push_back(V);
+  }
+
+  void clear() {
+    assert(ResolveConstants.empty() && "Constants not resolved?");
+    ValuePtrs.clear();
+  }
+
+  Value *operator[](unsigned i) const {
+    assert(i < ValuePtrs.size());
+    return ValuePtrs[i];
+  }
+
+  Value *back() const { return ValuePtrs.back(); }
+    void pop_back() { ValuePtrs.pop_back(); }
+  bool empty() const { return ValuePtrs.empty(); }
+  void shrinkTo(unsigned N) {
+    assert(N <= size() && "Invalid shrinkTo request!");
+    ValuePtrs.resize(N);
+  }
+
+  Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
+  Value *getValueFwdRef(unsigned Idx, Type *Ty);
+
+  void AssignValue(Value *V, unsigned Idx);
+
+  /// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+  /// resolves any forward references.
+  void ResolveConstantForwardRefs();
+};
+
+class BitcodeReaderMDValueList {
+  unsigned NumFwdRefs;
+  bool AnyFwdRefs;
+  unsigned MinFwdRef;
+  unsigned MaxFwdRef;
+  std::vector<TrackingMDRef> MDValuePtrs;
+
+  LLVMContext &Context;
+public:
+  BitcodeReaderMDValueList(LLVMContext &C)
+      : NumFwdRefs(0), AnyFwdRefs(false), Context(C) {}
+
+  // vector compatibility methods
+  unsigned size() const       { return MDValuePtrs.size(); }
+  void resize(unsigned N)     { MDValuePtrs.resize(N); }
+  void push_back(Metadata *MD) { MDValuePtrs.emplace_back(MD); }
+  void clear()                { MDValuePtrs.clear();  }
+  Metadata *back() const      { return MDValuePtrs.back(); }
+  void pop_back()             { MDValuePtrs.pop_back(); }
+  bool empty() const          { return MDValuePtrs.empty(); }
+
+  Metadata *operator[](unsigned i) const {
+    assert(i < MDValuePtrs.size());
+    return MDValuePtrs[i];
+  }
+
+  void shrinkTo(unsigned N) {
+    assert(N <= size() && "Invalid shrinkTo request!");
+    MDValuePtrs.resize(N);
+  }
+
+  Metadata *getValueFwdRef(unsigned Idx);
+  void AssignValue(Metadata *MD, unsigned Idx);
+  void tryToResolveCycles();
+};
+
+class BitcodeReader : public GVMaterializer {
+  LLVMContext &Context;
+  DiagnosticHandlerFunction DiagnosticHandler;
+  Module *TheModule;
+  std::unique_ptr<MemoryBuffer> Buffer;
+  std::unique_ptr<BitstreamReader> StreamFile;
+  BitstreamCursor Stream;
+  DataStreamer *LazyStreamer;
+  uint64_t NextUnreadBit;
+  bool SeenValueSymbolTable;
+
+  std::vector<Type*> TypeList;
+  BitcodeReaderValueList ValueList;
+  BitcodeReaderMDValueList MDValueList;
+  std::vector<Comdat *> ComdatList;
+  SmallVector<Instruction *, 64> InstructionList;
+
+  std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
+  std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
+  std::vector<std::pair<Function*, unsigned> > FunctionPrefixes;
+  std::vector<std::pair<Function*, unsigned> > FunctionPrologues;
+
+  SmallVector<Instruction*, 64> InstsWithTBAATag;
+
+  /// MAttributes - The set of attributes by index.  Index zero in the
+  /// file is for null, and is thus not represented here.  As such all indices
+  /// are off by one.
+  std::vector<AttributeSet> MAttributes;
+
+  /// \brief The set of attribute groups.
+  std::map<unsigned, AttributeSet> MAttributeGroups;
+
+  /// FunctionBBs - While parsing a function body, this is a list of the basic
+  /// blocks for the function.
+  std::vector<BasicBlock*> FunctionBBs;
+
+  // When reading the module header, this list is populated with functions that
+  // have bodies later in the file.
+  std::vector<Function*> FunctionsWithBodies;
+
+  // When intrinsic functions are encountered which require upgrading they are
+  // stored here with their replacement function.
+  typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
+  UpgradedIntrinsicMap UpgradedIntrinsics;
+
+  // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
+  DenseMap<unsigned, unsigned> MDKindMap;
+
+  // Several operations happen after the module header has been read, but
+  // before function bodies are processed. This keeps track of whether
+  // we've done this yet.
+  bool SeenFirstFunctionBody;
+
+  /// DeferredFunctionInfo - When function bodies are initially scanned, this
+  /// map contains info about where to find deferred function body in the
+  /// stream.
+  DenseMap<Function*, uint64_t> DeferredFunctionInfo;
+
+  /// When Metadata block is initially scanned when parsing the module, we may
+  /// choose to defer parsing of the metadata. This vector contains info about
+  /// which Metadata blocks are deferred.
+  std::vector<uint64_t> DeferredMetadataInfo;
+
+  /// These are basic blocks forward-referenced by block addresses.  They are
+  /// inserted lazily into functions when they're loaded.  The basic block ID is
+  /// its index into the vector.
+  DenseMap<Function *, std::vector<BasicBlock *>> BasicBlockFwdRefs;
+  std::deque<Function *> BasicBlockFwdRefQueue;
+
+  /// UseRelativeIDs - Indicates that we are using a new encoding for
+  /// instruction operands where most operands in the current
+  /// FUNCTION_BLOCK are encoded relative to the instruction number,
+  /// for a more compact encoding.  Some instruction operands are not
+  /// relative to the instruction ID: basic block numbers, and types.
+  /// Once the old style function blocks have been phased out, we would
+  /// not need this flag.
+  bool UseRelativeIDs;
+
+  /// True if all functions will be materialized, negating the need to process
+  /// (e.g.) blockaddress forward references.
+  bool WillMaterializeAllForwardRefs;
+
+  /// Functions that have block addresses taken.  This is usually empty.
+  SmallPtrSet<const Function *, 4> BlockAddressesTaken;
+
+  /// True if any Metadata block has been materialized.
+  bool IsMetadataMaterialized;
+
+  bool StripDebugInfo = false;
+
+public:
+  std::error_code Error(BitcodeError E, const Twine &Message);
+  std::error_code Error(BitcodeError E);
+  std::error_code Error(const Twine &Message);
+
+  explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C,
+                         DiagnosticHandlerFunction DiagnosticHandler);
+  explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C,
+                         DiagnosticHandlerFunction DiagnosticHandler);
+  ~BitcodeReader() override { FreeState(); }
+
+  std::error_code materializeForwardReferencedFunctions();
+
+  void FreeState();
+
+  void releaseBuffer();
+
+  bool isDematerializable(const GlobalValue *GV) const override;
+  std::error_code materialize(GlobalValue *GV) override;
+  std::error_code materializeModule(Module *M) override;
+  std::vector<StructType *> getIdentifiedStructTypes() const override;
+  void dematerialize(GlobalValue *GV) override;
+
+  /// @brief Main interface to parsing a bitcode buffer.
+  /// @returns true if an error occurred.
+  std::error_code ParseBitcodeInto(Module *M,
+                                   bool ShouldLazyLoadMetadata = false);
+
+  /// @brief Cheap mechanism to just extract module triple
+  /// @returns true if an error occurred.
+  ErrorOr<std::string> parseTriple();
+
+  static uint64_t decodeSignRotatedValue(uint64_t V);
+
+  /// Materialize any deferred Metadata block.
+  std::error_code materializeMetadata() override;
+
+  void setStripDebugInfo() override;
+
+private:
+  std::vector<StructType *> IdentifiedStructTypes;
+  StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
+  StructType *createIdentifiedStructType(LLVMContext &Context);
+
+  Type *getTypeByID(unsigned ID);
+  Value *getFnValueByID(unsigned ID, Type *Ty) {
+    if (Ty && Ty->isMetadataTy())
+      return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID));
+    return ValueList.getValueFwdRef(ID, Ty);
+  }
+  Metadata *getFnMetadataByID(unsigned ID) {
+    return MDValueList.getValueFwdRef(ID);
+  }
+  BasicBlock *getBasicBlock(unsigned ID) const {
+    if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID
+    return FunctionBBs[ID];
+  }
+  AttributeSet getAttributes(unsigned i) const {
+    if (i-1 < MAttributes.size())
+      return MAttributes[i-1];
+    return AttributeSet();
+  }
+
+  /// getValueTypePair - Read a value/type pair out of the specified record from
+  /// slot 'Slot'.  Increment Slot past the number of slots used in the record.
+  /// Return true on failure.
+  bool getValueTypePair(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
+                        unsigned InstNum, Value *&ResVal) {
+    if (Slot == Record.size()) return true;
+    unsigned ValNo = (unsigned)Record[Slot++];
+    // Adjust the ValNo, if it was encoded relative to the InstNum.
+    if (UseRelativeIDs)
+      ValNo = InstNum - ValNo;
+    if (ValNo < InstNum) {
+      // If this is not a forward reference, just return the value we already
+      // have.
+      ResVal = getFnValueByID(ValNo, nullptr);
+      return ResVal == nullptr;
+    }
+    if (Slot == Record.size())
+      return true;
+
+    unsigned TypeNo = (unsigned)Record[Slot++];
+    ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
+    return ResVal == nullptr;
+  }
+
+  /// popValue - Read a value out of the specified record from slot 'Slot'.
+  /// Increment Slot past the number of slots used by the value in the record.
+  /// Return true if there is an error.
+  bool popValue(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
+                unsigned InstNum, Type *Ty, Value *&ResVal) {
+    if (getValue(Record, Slot, InstNum, Ty, ResVal))
+      return true;
+    // All values currently take a single record slot.
+    ++Slot;
+    return false;
+  }
+
+  /// getValue -- Like popValue, but does not increment the Slot number.
+  bool getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
+                unsigned InstNum, Type *Ty, Value *&ResVal) {
+    ResVal = getValue(Record, Slot, InstNum, Ty);
+    return ResVal == nullptr;
+  }
+
+  /// getValue -- Version of getValue that returns ResVal directly,
+  /// or 0 if there is an error.
+  Value *getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
+                  unsigned InstNum, Type *Ty) {
+    if (Slot == Record.size()) return nullptr;
+    unsigned ValNo = (unsigned)Record[Slot];
+    // Adjust the ValNo, if it was encoded relative to the InstNum.
+    if (UseRelativeIDs)
+      ValNo = InstNum - ValNo;
+    return getFnValueByID(ValNo, Ty);
+  }
+
+  /// getValueSigned -- Like getValue, but decodes signed VBRs.
+  Value *getValueSigned(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
+                        unsigned InstNum, Type *Ty) {
+    if (Slot == Record.size()) return nullptr;
+    unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]);
+    // Adjust the ValNo, if it was encoded relative to the InstNum.
+    if (UseRelativeIDs)
+      ValNo = InstNum - ValNo;
+    return getFnValueByID(ValNo, Ty);
+  }
+
+  /// Converts alignment exponent (i.e. power of two (or zero)) to the
+  /// corresponding alignment to use. If alignment is too large, returns
+  /// a corresponding error code.
+  std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment);
+  std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
+  std::error_code ParseModule(bool Resume, bool ShouldLazyLoadMetadata = false);
+  std::error_code ParseAttributeBlock();
+  std::error_code ParseAttributeGroupBlock();
+  std::error_code ParseTypeTable();
+  std::error_code ParseTypeTableBody();
+
+  std::error_code ParseValueSymbolTable();
+  std::error_code ParseConstants();
+  std::error_code RememberAndSkipFunctionBody();
+  /// Save the positions of the Metadata blocks and skip parsing the blocks.
+  std::error_code rememberAndSkipMetadata();
+  std::error_code ParseFunctionBody(Function *F);
+  std::error_code GlobalCleanup();
+  std::error_code ResolveGlobalAndAliasInits();
+  std::error_code ParseMetadata();
+  std::error_code ParseMetadataAttachment(Function &F);
+  ErrorOr<std::string> parseModuleTriple();
+  std::error_code ParseUseLists();
+  std::error_code InitStream();
+  std::error_code InitStreamFromBuffer();
+  std::error_code InitLazyStream();
+  std::error_code FindFunctionInStream(
+      Function *F,
+      DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
+};
+} // namespace
+
 BitcodeDiagnosticInfo::BitcodeDiagnosticInfo(std::error_code EC,
                                              DiagnosticSeverity Severity,
                                              const Twine &Msg)
@@ -53,6 +401,12 @@ static std::error_code Error(DiagnosticHandlerFunction DiagnosticHandler,
   return Error(DiagnosticHandler, EC, EC.message());
 }
 
+static std::error_code Error(DiagnosticHandlerFunction DiagnosticHandler,
+                             const Twine &Message) {
+  return Error(DiagnosticHandler,
+               make_error_code(BitcodeError::CorruptedBitcode), Message);
+}
+
 std::error_code BitcodeReader::Error(BitcodeError E, const Twine &Message) {
   return ::Error(DiagnosticHandler, make_error_code(E), Message);
 }
@@ -79,7 +433,7 @@ BitcodeReader::BitcodeReader(MemoryBuffer *buffer, LLVMContext &C,
       TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr),
       NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
       MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false),
-      WillMaterializeAllForwardRefs(false) {}
+      WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {}
 
 BitcodeReader::BitcodeReader(DataStreamer *streamer, LLVMContext &C,
                              DiagnosticHandlerFunction DiagnosticHandler)
@@ -87,7 +441,7 @@ BitcodeReader::BitcodeReader(DataStreamer *streamer, LLVMContext &C,
       TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer),
       NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
       MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false),
-      WillMaterializeAllForwardRefs(false) {}
+      WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {}
 
 std::error_code BitcodeReader::materializeForwardReferencedFunctions() {
   if (WillMaterializeAllForwardRefs)
@@ -133,6 +487,7 @@ void BitcodeReader::FreeState() {
   std::vector<BasicBlock*>().swap(FunctionBBs);
   std::vector<Function*>().swap(FunctionsWithBodies);
   DeferredFunctionInfo.clear();
+  DeferredMetadataInfo.clear();
   MDKindMap.clear();
 
   assert(BasicBlockFwdRefs.empty() && "Unresolved blockaddress fwd references");
@@ -156,19 +511,27 @@ static bool ConvertToString(ArrayRef<uint64_t> Record, unsigned Idx,
   return false;
 }
 
+static bool hasImplicitComdat(size_t Val) {
+  switch (Val) {
+  default:
+    return false;
+  case 1:  // Old WeakAnyLinkage
+  case 4:  // Old LinkOnceAnyLinkage
+  case 10: // Old WeakODRLinkage
+  case 11: // Old LinkOnceODRLinkage
+    return true;
+  }
+}
+
 static GlobalValue::LinkageTypes getDecodedLinkage(unsigned Val) {
   switch (Val) {
   default: // Map unknown/new linkages to external
   case 0:
     return GlobalValue::ExternalLinkage;
-  case 1:
-    return GlobalValue::WeakAnyLinkage;
   case 2:
     return GlobalValue::AppendingLinkage;
   case 3:
     return GlobalValue::InternalLinkage;
-  case 4:
-    return GlobalValue::LinkOnceAnyLinkage;
   case 5:
     return GlobalValue::ExternalLinkage; // Obsolete DLLImportLinkage
   case 6:
@@ -179,10 +542,6 @@ static GlobalValue::LinkageTypes getDecodedLinkage(unsigned Val) {
     return GlobalValue::CommonLinkage;
   case 9:
     return GlobalValue::PrivateLinkage;
-  case 10:
-    return GlobalValue::WeakODRLinkage;
-  case 11:
-    return GlobalValue::LinkOnceODRLinkage;
   case 12:
     return GlobalValue::AvailableExternallyLinkage;
   case 13:
@@ -191,6 +550,18 @@ static GlobalValue::LinkageTypes getDecodedLinkage(unsigned Val) {
     return GlobalValue::PrivateLinkage; // Obsolete LinkerPrivateWeakLinkage
   case 15:
     return GlobalValue::ExternalLinkage; // Obsolete LinkOnceODRAutoHideLinkage
+  case 1: // Old value with implicit comdat.
+  case 16:
+    return GlobalValue::WeakAnyLinkage;
+  case 10: // Old value with implicit comdat.
+  case 17:
+    return GlobalValue::WeakODRLinkage;
+  case 4: // Old value with implicit comdat.
+  case 18:
+    return GlobalValue::LinkOnceAnyLinkage;
+  case 11: // Old value with implicit comdat.
+  case 19:
+    return GlobalValue::LinkOnceODRLinkage;
   }
 }
 
@@ -242,27 +613,42 @@ static int GetDecodedCastOpcode(unsigned Val) {
   case bitc::CAST_ADDRSPACECAST: return Instruction::AddrSpaceCast;
   }
 }
+
 static int GetDecodedBinaryOpcode(unsigned Val, Type *Ty) {
+  bool IsFP = Ty->isFPOrFPVectorTy();
+  // BinOps are only valid for int/fp or vector of int/fp types
+  if (!IsFP && !Ty->isIntOrIntVectorTy())
+    return -1;
+
   switch (Val) {
-  default: return -1;
+  default:
+    return -1;
   case bitc::BINOP_ADD:
-    return Ty->isFPOrFPVectorTy() ? Instruction::FAdd : Instruction::Add;
+    return IsFP ? Instruction::FAdd : Instruction::Add;
   case bitc::BINOP_SUB:
-    return Ty->isFPOrFPVectorTy() ? Instruction::FSub : Instruction::Sub;
+    return IsFP ? Instruction::FSub : Instruction::Sub;
   case bitc::BINOP_MUL:
-    return Ty->isFPOrFPVectorTy() ? Instruction::FMul : Instruction::Mul;
-  case bitc::BINOP_UDIV: return Instruction::UDiv;
+    return IsFP ? Instruction::FMul : Instruction::Mul;
+  case bitc::BINOP_UDIV:
+    return IsFP ? -1 : Instruction::UDiv;
   case bitc::BINOP_SDIV:
-    return Ty->isFPOrFPVectorTy() ? Instruction::FDiv : Instruction::SDiv;
-  case bitc::BINOP_UREM: return Instruction::URem;
+    return IsFP ? Instruction::FDiv : Instruction::SDiv;
+  case bitc::BINOP_UREM:
+    return IsFP ? -1 : Instruction::URem;
   case bitc::BINOP_SREM:
-    return Ty->isFPOrFPVectorTy() ? Instruction::FRem : Instruction::SRem;
-  case bitc::BINOP_SHL:  return Instruction::Shl;
-  case bitc::BINOP_LSHR: return Instruction::LShr;
-  case bitc::BINOP_ASHR: return Instruction::AShr;
-  case bitc::BINOP_AND:  return Instruction::And;
-  case bitc::BINOP_OR:   return Instruction::Or;
-  case bitc::BINOP_XOR:  return Instruction::Xor;
+    return IsFP ? Instruction::FRem : Instruction::SRem;
+  case bitc::BINOP_SHL:
+    return IsFP ? -1 : Instruction::Shl;
+  case bitc::BINOP_LSHR:
+    return IsFP ? -1 : Instruction::LShr;
+  case bitc::BINOP_ASHR:
+    return IsFP ? -1 : Instruction::AShr;
+  case bitc::BINOP_AND:
+    return IsFP ? -1 : Instruction::And;
+  case bitc::BINOP_OR:
+    return IsFP ? -1 : Instruction::Or;
+  case bitc::BINOP_XOR:
+    return IsFP ? -1 : Instruction::Xor;
   }
 }
 
@@ -332,7 +718,7 @@ namespace {
   /// @brief A class for maintaining the slot number definition
   /// as a placeholder for the actual definition for forward constants defs.
   class ConstantPlaceHolder : public ConstantExpr {
-    void operator=(const ConstantPlaceHolder &) LLVM_DELETED_FUNCTION;
+    void operator=(const ConstantPlaceHolder &) = delete;
   public:
     // allocate space for exactly one operand
     void *operator new(size_t s) {
@@ -399,7 +785,8 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
     resize(Idx + 1);
 
   if (Value *V = ValuePtrs[Idx]) {
-    assert(Ty == V->getType() && "Type mismatch in constant table!");
+    if (Ty != V->getType())
+      report_fatal_error("Type mismatch in constant table!");
     return cast<Constant>(V);
   }
 
@@ -410,11 +797,17 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
 }
 
 Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
+  // Bail out for a clearly invalid value. This would make us call resize(0)
+  if (Idx == UINT_MAX)
+    return nullptr;
+
   if (Idx >= size())
     resize(Idx + 1);
 
   if (Value *V = ValuePtrs[Idx]) {
-    assert((!Ty || Ty == V->getType()) && "Type mismatch in value table!");
+    // If the types don't match, it's invalid.
+    if (Ty && Ty != V->getType())
+      return nullptr;
     return V;
   }
 
@@ -525,9 +918,8 @@ void BitcodeReaderMDValueList::AssignValue(Metadata *MD, unsigned Idx) {
   }
 
   // If there was a forward reference to this value, replace it.
-  MDNodeFwdDecl *PrevMD = cast<MDNodeFwdDecl>(OldMD.get());
+  TempMDTuple PrevMD(cast<MDTuple>(OldMD.get()));
   PrevMD->replaceAllUsesWith(MD);
-  MDNode::deleteTemporary(PrevMD);
   --NumFwdRefs;
 }
 
@@ -549,7 +941,7 @@ Metadata *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
   ++NumFwdRefs;
 
   // Create and return a placeholder, which will later be RAUW'd.
-  Metadata *MD = MDNode::getTemporary(Context, None);
+  Metadata *MD = MDNode::getTemporary(Context, None).release();
   MDValuePtrs[Idx].reset(MD);
   return MD;
 }
@@ -566,9 +958,12 @@ void BitcodeReaderMDValueList::tryToResolveCycles() {
   // Resolve any cycles.
   for (unsigned I = MinFwdRef, E = MaxFwdRef + 1; I != E; ++I) {
     auto &MD = MDValuePtrs[I];
-    assert(!(MD && isa<MDNodeFwdDecl>(MD)) && "Unexpected forward reference");
-    if (auto *N = dyn_cast_or_null<UniquableMDNode>(MD))
-      N->resolveCycles();
+    auto *N = dyn_cast_or_null<MDNode>(MD);
+    if (!N)
+      continue;
+
+    assert(!N->isTemporary() && "Unexpected forward reference");
+    N->resolveCycles();
   }
 
   // Make sure we return early again until there's another forward ref.
@@ -701,6 +1096,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) {
     return Attribute::InAlloca;
   case bitc::ATTR_KIND_COLD:
     return Attribute::Cold;
+  case bitc::ATTR_KIND_CONVERGENT:
+    return Attribute::Convergent;
   case bitc::ATTR_KIND_INLINE_HINT:
     return Attribute::InlineHint;
   case bitc::ATTR_KIND_IN_REG:
@@ -731,6 +1128,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) {
     return Attribute::NonNull;
   case bitc::ATTR_KIND_DEREFERENCEABLE:
     return Attribute::Dereferenceable;
+  case bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL:
+    return Attribute::DereferenceableOrNull;
   case bitc::ATTR_KIND_NO_RED_ZONE:
     return Attribute::NoRedZone;
   case bitc::ATTR_KIND_NO_RETURN:
@@ -774,6 +1173,16 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) {
   }
 }
 
+std::error_code BitcodeReader::parseAlignmentValue(uint64_t Exponent,
+                                                   unsigned &Alignment) {
+  // Note: Alignment in bitcode files is incremented by 1, so that zero
+  // can be used for default alignment.
+  if (Exponent > Value::MaxAlignmentExponent + 1)
+    return Error("Invalid alignment value");
+  Alignment = (1 << static_cast<unsigned>(Exponent)) >> 1;
+  return std::error_code();
+}
+
 std::error_code BitcodeReader::ParseAttrKind(uint64_t Code,
                                              Attribute::AttrKind *Kind) {
   *Kind = GetAttrFromCode(Code);
@@ -837,6 +1246,8 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() {
             B.addStackAlignmentAttr(Record[++i]);
           else if (Kind == Attribute::Dereferenceable)
             B.addDereferenceableAttr(Record[++i]);
+          else if (Kind == Attribute::DereferenceableOrNull)
+            B.addDereferenceableOrNullAttr(Record[++i]);
         } else {                     // String attribute
           assert((Record[i] == 3 || Record[i] == 4) &&
                  "Invalid attribute group entry");
@@ -943,12 +1354,17 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
     case bitc::TYPE_CODE_X86_MMX:   // X86_MMX
       ResultTy = Type::getX86_MMXTy(Context);
       break;
-    case bitc::TYPE_CODE_INTEGER:   // INTEGER: [width]
+    case bitc::TYPE_CODE_INTEGER: { // INTEGER: [width]
       if (Record.size() < 1)
         return Error("Invalid record");
 
-      ResultTy = IntegerType::get(Context, Record[0]);
+      uint64_t NumBits = Record[0];
+      if (NumBits < IntegerType::MIN_INT_BITS ||
+          NumBits > IntegerType::MAX_INT_BITS)
+        return Error("Bitwidth for integer type out of range");
+      ResultTy = IntegerType::get(Context, NumBits);
       break;
+    }
     case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
                                     //          [pointee type, address space]
       if (Record.size() < 1)
@@ -957,7 +1373,8 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
       if (Record.size() == 2)
         AddressSpace = Record[1];
       ResultTy = getTypeByID(Record[0]);
-      if (!ResultTy)
+      if (!ResultTy ||
+          !PointerType::isValidElementType(ResultTy))
         return Error("Invalid type");
       ResultTy = PointerType::get(ResultTy, AddressSpace);
       break;
@@ -988,8 +1405,11 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
         return Error("Invalid record");
       SmallVector<Type*, 8> ArgTys;
       for (unsigned i = 2, e = Record.size(); i != e; ++i) {
-        if (Type *T = getTypeByID(Record[i]))
+        if (Type *T = getTypeByID(Record[i])) {
+          if (!FunctionType::isValidArgumentType(T))
+            return Error("Invalid function argument type");
           ArgTys.push_back(T);
+        }
         else
           break;
       }
@@ -1071,25 +1491,27 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
     case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
       if (Record.size() < 2)
         return Error("Invalid record");
-      if ((ResultTy = getTypeByID(Record[1])))
-        ResultTy = ArrayType::get(ResultTy, Record[0]);
-      else
+      ResultTy = getTypeByID(Record[1]);
+      if (!ResultTy || !ArrayType::isValidElementType(ResultTy))
         return Error("Invalid type");
+      ResultTy = ArrayType::get(ResultTy, Record[0]);
       break;
     case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
       if (Record.size() < 2)
         return Error("Invalid record");
-      if ((ResultTy = getTypeByID(Record[1])))
-        ResultTy = VectorType::get(ResultTy, Record[0]);
-      else
+      ResultTy = getTypeByID(Record[1]);
+      if (!ResultTy || !StructType::isValidElementType(ResultTy))
         return Error("Invalid type");
+      ResultTy = VectorType::get(ResultTy, Record[0]);
       break;
     }
 
     if (NumRecords >= TypeList.size())
       return Error("Invalid TYPE table");
+    if (TypeList[NumRecords])
+      return Error(
+          "Invalid TYPE table: Only named structs can be forward referenced");
     assert(ResultTy && "Didn't read a type?");
-    assert(!TypeList[NumRecords] && "Already read type?");
     TypeList[NumRecords++] = ResultTy;
   }
 }
@@ -1100,6 +1522,8 @@ std::error_code BitcodeReader::ParseValueSymbolTable() {
 
   SmallVector<uint64_t, 64> Record;
 
+  Triple TT(TheModule->getTargetTriple());
+
   // Read all the records for this value table.
   SmallString<128> ValueName;
   while (1) {
@@ -1130,6 +1554,14 @@ std::error_code BitcodeReader::ParseValueSymbolTable() {
       Value *V = ValueList[ValueID];
 
       V->setName(StringRef(ValueName.data(), ValueName.size()));
+      if (auto *GO = dyn_cast<GlobalObject>(V)) {
+        if (GO->getComdat() == reinterpret_cast<Comdat *>(1)) {
+          if (TT.isOSBinFormatMachO())
+            GO->setComdat(nullptr);
+          else
+            GO->setComdat(TheModule->getOrInsertComdat(V->getName()));
+        }
+      }
       ValueName.clear();
       break;
     }
@@ -1148,7 +1580,10 @@ std::error_code BitcodeReader::ParseValueSymbolTable() {
   }
 }
 
+static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
+
 std::error_code BitcodeReader::ParseMetadata() {
+  IsMetadataMaterialized = true;
   unsigned NextMDValueNo = MDValueList.size();
 
   if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
@@ -1156,6 +1591,22 @@ std::error_code BitcodeReader::ParseMetadata() {
 
   SmallVector<uint64_t, 64> Record;
 
+  auto getMD =
+      [&](unsigned ID) -> Metadata *{ return MDValueList.getValueFwdRef(ID); };
+  auto getMDOrNull = [&](unsigned ID) -> Metadata *{
+    if (ID)
+      return getMD(ID - 1);
+    return nullptr;
+  };
+  auto getMDString = [&](unsigned ID) -> MDString *{
+    // This requires that the ID is not really a forward reference.  In
+    // particular, the MDString must already have been resolved.
+    return cast_or_null<MDString>(getMDOrNull(ID));
+  };
+
+#define GET_OR_DISTINCT(CLASS, DISTINCT, ARGS)                                 \
+  (DISTINCT ? CLASS::getDistinct ARGS : CLASS::get ARGS)
+
   // Read all the records.
   while (1) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
@@ -1283,16 +1734,270 @@ std::error_code BitcodeReader::ParseMetadata() {
       if (Record.size() != 5)
         return Error("Invalid record");
 
-      auto get = Record[0] ? MDLocation::getDistinct : MDLocation::get;
       unsigned Line = Record[1];
       unsigned Column = Record[2];
       MDNode *Scope = cast<MDNode>(MDValueList.getValueFwdRef(Record[3]));
       Metadata *InlinedAt =
           Record[4] ? MDValueList.getValueFwdRef(Record[4] - 1) : nullptr;
-      MDValueList.AssignValue(get(Context, Line, Column, Scope, InlinedAt),
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DILocation, Record[0],
+                          (Context, Line, Column, Scope, InlinedAt)),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_GENERIC_DEBUG: {
+      if (Record.size() < 4)
+        return Error("Invalid record");
+
+      unsigned Tag = Record[1];
+      unsigned Version = Record[2];
+
+      if (Tag >= 1u << 16 || Version != 0)
+        return Error("Invalid record");
+
+      auto *Header = getMDString(Record[3]);
+      SmallVector<Metadata *, 8> DwarfOps;
+      for (unsigned I = 4, E = Record.size(); I != E; ++I)
+        DwarfOps.push_back(Record[I] ? MDValueList.getValueFwdRef(Record[I] - 1)
+                                     : nullptr);
+      MDValueList.AssignValue(GET_OR_DISTINCT(GenericDINode, Record[0],
+                                              (Context, Tag, Header, DwarfOps)),
+                              NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_SUBRANGE: {
+      if (Record.size() != 3)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DISubrange, Record[0],
+                          (Context, Record[1], unrotateSign(Record[2]))),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_ENUMERATOR: {
+      if (Record.size() != 3)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(GET_OR_DISTINCT(DIEnumerator, Record[0],
+                                              (Context, unrotateSign(Record[1]),
+                                               getMDString(Record[2]))),
                               NextMDValueNo++);
       break;
     }
+    case bitc::METADATA_BASIC_TYPE: {
+      if (Record.size() != 6)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DIBasicType, Record[0],
+                          (Context, Record[1], getMDString(Record[2]),
+                           Record[3], Record[4], Record[5])),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_DERIVED_TYPE: {
+      if (Record.size() != 12)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DIDerivedType, Record[0],
+                          (Context, Record[1], getMDString(Record[2]),
+                           getMDOrNull(Record[3]), Record[4],
+                           getMDOrNull(Record[5]), getMDOrNull(Record[6]),
+                           Record[7], Record[8], Record[9], Record[10],
+                           getMDOrNull(Record[11]))),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_COMPOSITE_TYPE: {
+      if (Record.size() != 16)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DICompositeType, Record[0],
+                          (Context, Record[1], getMDString(Record[2]),
+                           getMDOrNull(Record[3]), Record[4],
+                           getMDOrNull(Record[5]), getMDOrNull(Record[6]),
+                           Record[7], Record[8], Record[9], Record[10],
+                           getMDOrNull(Record[11]), Record[12],
+                           getMDOrNull(Record[13]), getMDOrNull(Record[14]),
+                           getMDString(Record[15]))),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_SUBROUTINE_TYPE: {
+      if (Record.size() != 3)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DISubroutineType, Record[0],
+                          (Context, Record[1], getMDOrNull(Record[2]))),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_FILE: {
+      if (Record.size() != 3)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DIFile, Record[0], (Context, getMDString(Record[1]),
+                                              getMDString(Record[2]))),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_COMPILE_UNIT: {
+      if (Record.size() < 14 || Record.size() > 15)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DICompileUnit, Record[0],
+                          (Context, Record[1], getMDOrNull(Record[2]),
+                           getMDString(Record[3]), Record[4],
+                           getMDString(Record[5]), Record[6],
+                           getMDString(Record[7]), Record[8],
+                           getMDOrNull(Record[9]), getMDOrNull(Record[10]),
+                           getMDOrNull(Record[11]), getMDOrNull(Record[12]),
+                           getMDOrNull(Record[13]),
+                           Record.size() == 14 ? 0 : Record[14])),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_SUBPROGRAM: {
+      if (Record.size() != 19)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(
+              DISubprogram, Record[0],
+              (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
+               getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
+               getMDOrNull(Record[6]), Record[7], Record[8], Record[9],
+               getMDOrNull(Record[10]), Record[11], Record[12], Record[13],
+               Record[14], getMDOrNull(Record[15]), getMDOrNull(Record[16]),
+               getMDOrNull(Record[17]), getMDOrNull(Record[18]))),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_LEXICAL_BLOCK: {
+      if (Record.size() != 5)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DILexicalBlock, Record[0],
+                          (Context, getMDOrNull(Record[1]),
+                           getMDOrNull(Record[2]), Record[3], Record[4])),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_LEXICAL_BLOCK_FILE: {
+      if (Record.size() != 4)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DILexicalBlockFile, Record[0],
+                          (Context, getMDOrNull(Record[1]),
+                           getMDOrNull(Record[2]), Record[3])),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_NAMESPACE: {
+      if (Record.size() != 5)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DINamespace, Record[0],
+                          (Context, getMDOrNull(Record[1]),
+                           getMDOrNull(Record[2]), getMDString(Record[3]),
+                           Record[4])),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_TEMPLATE_TYPE: {
+      if (Record.size() != 3)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
+                                              Record[0],
+                                              (Context, getMDString(Record[1]),
+                                               getMDOrNull(Record[2]))),
+                              NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_TEMPLATE_VALUE: {
+      if (Record.size() != 5)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DITemplateValueParameter, Record[0],
+                          (Context, Record[1], getMDString(Record[2]),
+                           getMDOrNull(Record[3]), getMDOrNull(Record[4]))),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_GLOBAL_VAR: {
+      if (Record.size() != 11)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DIGlobalVariable, Record[0],
+                          (Context, getMDOrNull(Record[1]),
+                           getMDString(Record[2]), getMDString(Record[3]),
+                           getMDOrNull(Record[4]), Record[5],
+                           getMDOrNull(Record[6]), Record[7], Record[8],
+                           getMDOrNull(Record[9]), getMDOrNull(Record[10]))),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_LOCAL_VAR: {
+      // 10th field is for the obseleted 'inlinedAt:' field.
+      if (Record.size() != 9 && Record.size() != 10)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DILocalVariable, Record[0],
+                          (Context, Record[1], getMDOrNull(Record[2]),
+                           getMDString(Record[3]), getMDOrNull(Record[4]),
+                           Record[5], getMDOrNull(Record[6]), Record[7],
+                           Record[8])),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_EXPRESSION: {
+      if (Record.size() < 1)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DIExpression, Record[0],
+                          (Context, makeArrayRef(Record).slice(1))),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_OBJC_PROPERTY: {
+      if (Record.size() != 8)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DIObjCProperty, Record[0],
+                          (Context, getMDString(Record[1]),
+                           getMDOrNull(Record[2]), Record[3],
+                           getMDString(Record[4]), getMDString(Record[5]),
+                           Record[6], getMDOrNull(Record[7]))),
+          NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_IMPORTED_ENTITY: {
+      if (Record.size() != 6)
+        return Error("Invalid record");
+
+      MDValueList.AssignValue(
+          GET_OR_DISTINCT(DIImportedEntity, Record[0],
+                          (Context, Record[1], getMDOrNull(Record[2]),
+                           getMDOrNull(Record[3]), Record[4],
+                           getMDString(Record[5]))),
+          NextMDValueNo++);
+      break;
+    }
     case bitc::METADATA_STRING: {
       std::string String(Record.begin(), Record.end());
       llvm::UpgradeMDStringConstant(String);
@@ -1314,6 +2019,7 @@ std::error_code BitcodeReader::ParseMetadata() {
     }
     }
   }
+#undef GET_OR_DISTINCT
 }
 
 /// decodeSignRotatedValue - Decode a signed value stored with the sign bit in
@@ -1635,19 +2341,29 @@ std::error_code BitcodeReader::ParseConstants() {
     }
     case bitc::CST_CODE_CE_INBOUNDS_GEP:
     case bitc::CST_CODE_CE_GEP: {  // CE_GEP:        [n x operands]
-      if (Record.size() & 1)
-        return Error("Invalid record");
+      unsigned OpNum = 0;
+      Type *PointeeType = nullptr;
+      if (Record.size() % 2)
+        PointeeType = getTypeByID(Record[OpNum++]);
       SmallVector<Constant*, 16> Elts;
-      for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
-        Type *ElTy = getTypeByID(Record[i]);
+      while (OpNum != Record.size()) {
+        Type *ElTy = getTypeByID(Record[OpNum++]);
         if (!ElTy)
           return Error("Invalid record");
-        Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy));
+        Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy));
       }
+
+      if (PointeeType &&
+          PointeeType !=
+              cast<SequentialType>(Elts[0]->getType()->getScalarType())
+                  ->getElementType())
+        return Error("Explicit gep operator type does not match pointee type "
+                     "of pointer operand");
+
       ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
-      V = ConstantExpr::getGetElementPtr(Elts[0], Indices,
+      V = ConstantExpr::getGetElementPtr(PointeeType, Elts[0], Indices,
                                          BitCode ==
-                                           bitc::CST_CODE_CE_INBOUNDS_GEP);
+                                             bitc::CST_CODE_CE_INBOUNDS_GEP);
       break;
     }
     case bitc::CST_CODE_CE_SELECT: {  // CE_SELECT: [opval#, opval#, opval#]
@@ -1917,6 +2633,32 @@ std::error_code BitcodeReader::ParseUseLists() {
   }
 }
 
+/// When we see the block for metadata, remember where it is and then skip it.
+/// This lets us lazily deserialize the metadata.
+std::error_code BitcodeReader::rememberAndSkipMetadata() {
+  // Save the current stream state.
+  uint64_t CurBit = Stream.GetCurrentBitNo();
+  DeferredMetadataInfo.push_back(CurBit);
+
+  // Skip over the block for now.
+  if (Stream.SkipBlock())
+    return Error("Invalid record");
+  return std::error_code();
+}
+
+std::error_code BitcodeReader::materializeMetadata() {
+  for (uint64_t BitPos : DeferredMetadataInfo) {
+    // Move the bit stream to the saved position.
+    Stream.JumpToBit(BitPos);
+    if (std::error_code EC = ParseMetadata())
+      return EC;
+  }
+  DeferredMetadataInfo.clear();
+  return std::error_code();
+}
+
+void BitcodeReader::setStripDebugInfo() { StripDebugInfo = true; }
+
 /// RememberAndSkipFunctionBody - When we see the block for a function body,
 /// remember where it is and then skip it.  This lets us lazily deserialize the
 /// functions.
@@ -1967,7 +2709,8 @@ std::error_code BitcodeReader::GlobalCleanup() {
   return std::error_code();
 }
 
-std::error_code BitcodeReader::ParseModule(bool Resume) {
+std::error_code BitcodeReader::ParseModule(bool Resume,
+                                           bool ShouldLazyLoadMetadata) {
   if (Resume)
     Stream.JumpToBit(NextUnreadBit);
   else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
@@ -2021,6 +2764,12 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
           return EC;
         break;
       case bitc::METADATA_BLOCK_ID:
+        if (ShouldLazyLoadMetadata && !IsMetadataMaterialized) {
+          if (std::error_code EC = rememberAndSkipMetadata())
+            return EC;
+          break;
+        }
+        assert(DeferredMetadataInfo.empty() && "Unexpected deferred metadata");
         if (std::error_code EC = ParseMetadata())
           return EC;
         break;
@@ -2139,21 +2888,31 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
     }
     // GLOBALVAR: [pointer type, isconst, initid,
     //             linkage, alignment, section, visibility, threadlocal,
-    //             unnamed_addr, dllstorageclass]
+    //             unnamed_addr, externally_initialized, dllstorageclass,
+    //             comdat]
     case bitc::MODULE_CODE_GLOBALVAR: {
       if (Record.size() < 6)
         return Error("Invalid record");
       Type *Ty = getTypeByID(Record[0]);
       if (!Ty)
         return Error("Invalid record");
-      if (!Ty->isPointerTy())
-        return Error("Invalid type for value");
-      unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
-      Ty = cast<PointerType>(Ty)->getElementType();
+      bool isConstant = Record[1] & 1;
+      bool explicitType = Record[1] & 2;
+      unsigned AddressSpace;
+      if (explicitType) {
+        AddressSpace = Record[1] >> 2;
+      } else {
+        if (!Ty->isPointerTy())
+          return Error("Invalid type for value");
+        AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
+        Ty = cast<PointerType>(Ty)->getElementType();
+      }
 
-      bool isConstant = Record[1];
-      GlobalValue::LinkageTypes Linkage = getDecodedLinkage(Record[3]);
-      unsigned Alignment = (1 << Record[4]) >> 1;
+      uint64_t RawLinkage = Record[3];
+      GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage);
+      unsigned Alignment;
+      if (std::error_code EC = parseAlignmentValue(Record[4], Alignment))
+        return EC;
       std::string Section;
       if (Record[5]) {
         if (Record[5]-1 >= SectionTable.size())
@@ -2190,7 +2949,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
       if (Record.size() > 10)
         NewGV->setDLLStorageClass(GetDecodedDLLStorageClass(Record[10]));
       else
-        UpgradeDLLImportExportLinkage(NewGV, Record[3]);
+        UpgradeDLLImportExportLinkage(NewGV, RawLinkage);
 
       ValueList.push_back(NewGV);
 
@@ -2198,11 +2957,15 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
       if (unsigned InitID = Record[2])
         GlobalInits.push_back(std::make_pair(NewGV, InitID-1));
 
-      if (Record.size() > 11)
+      if (Record.size() > 11) {
         if (unsigned ComdatID = Record[11]) {
-          assert(ComdatID <= ComdatList.size());
+          if (ComdatID > ComdatList.size())
+            return Error("Invalid global variable comdat ID");
           NewGV->setComdat(ComdatList[ComdatID - 1]);
         }
+      } else if (hasImplicitComdat(RawLinkage)) {
+        NewGV->setComdat(reinterpret_cast<Comdat *>(1));
+      }
       break;
     }
     // FUNCTION:  [type, callingconv, isproto, linkage, paramattr,
@@ -2214,10 +2977,9 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
       Type *Ty = getTypeByID(Record[0]);
       if (!Ty)
         return Error("Invalid record");
-      if (!Ty->isPointerTy())
-        return Error("Invalid type for value");
-      FunctionType *FTy =
-        dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
+      if (auto *PTy = dyn_cast<PointerType>(Ty))
+        Ty = PTy->getElementType();
+      auto *FTy = dyn_cast<FunctionType>(Ty);
       if (!FTy)
         return Error("Invalid type for value");
 
@@ -2226,10 +2988,14 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
 
       Func->setCallingConv(static_cast<CallingConv::ID>(Record[1]));
       bool isProto = Record[2];
-      Func->setLinkage(getDecodedLinkage(Record[3]));
+      uint64_t RawLinkage = Record[3];
+      Func->setLinkage(getDecodedLinkage(RawLinkage));
       Func->setAttributes(getAttributes(Record[4]));
 
-      Func->setAlignment((1 << Record[5]) >> 1);
+      unsigned Alignment;
+      if (std::error_code EC = parseAlignmentValue(Record[5], Alignment))
+        return EC;
+      Func->setAlignment(Alignment);
       if (Record[6]) {
         if (Record[6]-1 >= SectionTable.size())
           return Error("Invalid ID");
@@ -2240,7 +3006,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
         // FIXME: Change to an error if non-default in 4.0.
         Func->setVisibility(GetDecodedVisibility(Record[7]));
       if (Record.size() > 8 && Record[8]) {
-        if (Record[8]-1 > GCTable.size())
+        if (Record[8]-1 >= GCTable.size())
           return Error("Invalid ID");
         Func->setGC(GCTable[Record[8]-1].c_str());
       }
@@ -2254,13 +3020,17 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
       if (Record.size() > 11)
         Func->setDLLStorageClass(GetDecodedDLLStorageClass(Record[11]));
       else
-        UpgradeDLLImportExportLinkage(Func, Record[3]);
+        UpgradeDLLImportExportLinkage(Func, RawLinkage);
 
-      if (Record.size() > 12)
+      if (Record.size() > 12) {
         if (unsigned ComdatID = Record[12]) {
-          assert(ComdatID <= ComdatList.size());
+          if (ComdatID > ComdatList.size())
+            return Error("Invalid function comdat ID");
           Func->setComdat(ComdatList[ComdatID - 1]);
         }
+      } else if (hasImplicitComdat(RawLinkage)) {
+        Func->setComdat(reinterpret_cast<Comdat *>(1));
+      }
 
       if (Record.size() > 13 && Record[13] != 0)
         FunctionPrefixes.push_back(std::make_pair(Func, Record[13]-1));
@@ -2290,8 +3060,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
         return Error("Invalid type for value");
 
       auto *NewGA =
-          GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
-                              getDecodedLinkage(Record[2]), "", TheModule);
+          GlobalAlias::create(PTy, getDecodedLinkage(Record[2]), "", TheModule);
       // Old bitcode files didn't have visibility field.
       // Local linkage must have default visibility.
       if (Record.size() > 3 && !NewGA->hasLocalLinkage())
@@ -2321,7 +3090,8 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
   }
 }
 
-std::error_code BitcodeReader::ParseBitcodeInto(Module *M) {
+std::error_code BitcodeReader::ParseBitcodeInto(Module *M,
+                                                bool ShouldLazyLoadMetadata) {
   TheModule = nullptr;
 
   if (std::error_code EC = InitStream())
@@ -2339,8 +3109,12 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) {
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
   while (1) {
-    if (Stream.AtEndOfStream())
-      return std::error_code();
+    if (Stream.AtEndOfStream()) {
+      if (TheModule)
+        return std::error_code();
+      // We didn't really read a proper Module.
+      return Error("Malformed IR file");
+    }
 
     BitstreamEntry Entry =
       Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
@@ -2362,7 +3136,7 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) {
         if (TheModule)
           return Error("Invalid multiple blocks");
         TheModule = M;
-        if (std::error_code EC = ParseModule(false))
+        if (std::error_code EC = ParseModule(false, ShouldLazyLoadMetadata))
           return EC;
         if (LazyStreamer)
           return std::error_code();
@@ -2468,7 +3242,7 @@ ErrorOr<std::string> BitcodeReader::parseTriple() {
 }
 
 /// ParseMetadataAttachment - Parse metadata attachments.
-std::error_code BitcodeReader::ParseMetadataAttachment() {
+std::error_code BitcodeReader::ParseMetadataAttachment(Function &F) {
   if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
     return Error("Invalid record");
 
@@ -2494,8 +3268,21 @@ std::error_code BitcodeReader::ParseMetadataAttachment() {
       break;
     case bitc::METADATA_ATTACHMENT: {
       unsigned RecordLength = Record.size();
-      if (Record.empty() || (RecordLength - 1) % 2 == 1)
+      if (Record.empty())
         return Error("Invalid record");
+      if (RecordLength % 2 == 0) {
+        // A function attachment.
+        for (unsigned I = 0; I != RecordLength; I += 2) {
+          auto K = MDKindMap.find(Record[I]);
+          if (K == MDKindMap.end())
+            return Error("Invalid ID");
+          Metadata *MD = MDValueList.getValueFwdRef(Record[I + 1]);
+          F.setMetadata(K->second, cast<MDNode>(MD));
+        }
+        continue;
+      }
+
+      // An instruction attachment.
       Instruction *Inst = InstructionList[Record[0]];
       for (unsigned i = 1; i != RecordLength; i = i+2) {
         unsigned Kind = Record[i];
@@ -2518,6 +3305,20 @@ std::error_code BitcodeReader::ParseMetadataAttachment() {
   }
 }
 
+static std::error_code TypeCheckLoadStoreInst(DiagnosticHandlerFunction DH,
+                                              Type *ValType, Type *PtrType) {
+  if (!isa<PointerType>(PtrType))
+    return Error(DH, "Load/Store operand is not a pointer type");
+  Type *ElemType = cast<PointerType>(PtrType)->getElementType();
+
+  if (ValType && ValType != ElemType)
+    return Error(DH, "Explicit load/store type does not match pointee type of "
+                     "pointer operand");
+  if (!PointerType::isLoadableOrStorableType(ElemType))
+    return Error(DH, "Cannot load/store from pointer");
+  return std::error_code();
+}
+
 /// ParseFunctionBody - Lazily parse the specified function body block.
 std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
   if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
@@ -2572,7 +3373,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
           return EC;
         break;
       case bitc::METADATA_ATTACHMENT_ID:
-        if (std::error_code EC = ParseMetadataAttachment())
+        if (std::error_code EC = ParseMetadataAttachment(*F))
           return EC;
         break;
       case bitc::METADATA_BLOCK_ID:
@@ -2731,13 +3532,35 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       InstructionList.push_back(I);
       break;
     }
-    case bitc::FUNC_CODE_INST_INBOUNDS_GEP:
-    case bitc::FUNC_CODE_INST_GEP: { // GEP: [n x operands]
+    case bitc::FUNC_CODE_INST_INBOUNDS_GEP_OLD:
+    case bitc::FUNC_CODE_INST_GEP_OLD:
+    case bitc::FUNC_CODE_INST_GEP: { // GEP: type, [n x operands]
       unsigned OpNum = 0;
+
+      Type *Ty;
+      bool InBounds;
+
+      if (BitCode == bitc::FUNC_CODE_INST_GEP) {
+        InBounds = Record[OpNum++];
+        Ty = getTypeByID(Record[OpNum++]);
+      } else {
+        InBounds = BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP_OLD;
+        Ty = nullptr;
+      }
+
       Value *BasePtr;
       if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
         return Error("Invalid record");
 
+      if (!Ty)
+        Ty = cast<SequentialType>(BasePtr->getType()->getScalarType())
+                 ->getElementType();
+      else if (Ty !=
+               cast<SequentialType>(BasePtr->getType()->getScalarType())
+                   ->getElementType())
+        return Error(
+            "Explicit gep type does not match pointee type of pointer operand");
+
       SmallVector<Value*, 16> GEPIdx;
       while (OpNum != Record.size()) {
         Value *Op;
@@ -2746,9 +3569,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
         GEPIdx.push_back(Op);
       }
 
-      I = GetElementPtrInst::Create(BasePtr, GEPIdx);
+      I = GetElementPtrInst::Create(Ty, BasePtr, GEPIdx);
+
       InstructionList.push_back(I);
-      if (BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP)
+      if (InBounds)
         cast<GetElementPtrInst>(I)->setIsInBounds(true);
       break;
     }
@@ -2760,13 +3584,31 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
         return Error("Invalid record");
 
+      unsigned RecSize = Record.size();
+      if (OpNum == RecSize)
+        return Error("EXTRACTVAL: Invalid instruction with 0 indices");
+
       SmallVector<unsigned, 4> EXTRACTVALIdx;
-      for (unsigned RecSize = Record.size();
-           OpNum != RecSize; ++OpNum) {
+      Type *CurTy = Agg->getType();
+      for (; OpNum != RecSize; ++OpNum) {
+        bool IsArray = CurTy->isArrayTy();
+        bool IsStruct = CurTy->isStructTy();
         uint64_t Index = Record[OpNum];
+
+        if (!IsStruct && !IsArray)
+          return Error("EXTRACTVAL: Invalid type");
         if ((unsigned)Index != Index)
           return Error("Invalid value");
+        if (IsStruct && Index >= CurTy->subtypes().size())
+          return Error("EXTRACTVAL: Invalid struct index");
+        if (IsArray && Index >= CurTy->getArrayNumElements())
+          return Error("EXTRACTVAL: Invalid array index");
         EXTRACTVALIdx.push_back((unsigned)Index);
+
+        if (IsStruct)
+          CurTy = CurTy->subtypes()[Index];
+        else
+          CurTy = CurTy->subtypes()[0];
       }
 
       I = ExtractValueInst::Create(Agg, EXTRACTVALIdx);
@@ -2784,15 +3626,36 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       if (getValueTypePair(Record, OpNum, NextValueNo, Val))
         return Error("Invalid record");
 
+      unsigned RecSize = Record.size();
+      if (OpNum == RecSize)
+        return Error("INSERTVAL: Invalid instruction with 0 indices");
+
       SmallVector<unsigned, 4> INSERTVALIdx;
-      for (unsigned RecSize = Record.size();
-           OpNum != RecSize; ++OpNum) {
+      Type *CurTy = Agg->getType();
+      for (; OpNum != RecSize; ++OpNum) {
+        bool IsArray = CurTy->isArrayTy();
+        bool IsStruct = CurTy->isStructTy();
         uint64_t Index = Record[OpNum];
+
+        if (!IsStruct && !IsArray)
+          return Error("INSERTVAL: Invalid type");
         if ((unsigned)Index != Index)
           return Error("Invalid value");
+        if (IsStruct && Index >= CurTy->subtypes().size())
+          return Error("INSERTVAL: Invalid struct index");
+        if (IsArray && Index >= CurTy->getArrayNumElements())
+          return Error("INSERTVAL: Invalid array index");
+
         INSERTVALIdx.push_back((unsigned)Index);
+        if (IsStruct)
+          CurTy = CurTy->subtypes()[Index];
+        else
+          CurTy = CurTy->subtypes()[0];
       }
 
+      if (CurTy != Val->getType())
+        return Error("Inserted value type doesn't match aggregate type");
+
       I = InsertValueInst::Create(Agg, Val, INSERTVALIdx);
       InstructionList.push_back(I);
       break;
@@ -2846,6 +3709,8 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
           getValueTypePair(Record, OpNum, NextValueNo, Idx))
         return Error("Invalid record");
+      if (!Vec->getType()->isVectorTy())
+        return Error("Invalid type for value");
       I = ExtractElementInst::Create(Vec, Idx);
       InstructionList.push_back(I);
       break;
@@ -2854,8 +3719,11 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
     case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval]
       unsigned OpNum = 0;
       Value *Vec, *Elt, *Idx;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
-          popValue(Record, OpNum, NextValueNo,
+      if (getValueTypePair(Record, OpNum, NextValueNo, Vec))
+        return Error("Invalid record");
+      if (!Vec->getType()->isVectorTy())
+        return Error("Invalid type for value");
+      if (popValue(Record, OpNum, NextValueNo,
                    cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
           getValueTypePair(Record, OpNum, NextValueNo, Idx))
         return Error("Invalid record");
@@ -2873,6 +3741,8 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
 
       if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
         return Error("Invalid record");
+      if (!Vec1->getType()->isVectorTy() || !Vec2->getType()->isVectorTy())
+        return Error("Invalid type for value");
       I = new ShuffleVectorInst(Vec1, Vec2, Mask);
       InstructionList.push_back(I);
       break;
@@ -3056,24 +3926,33 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
       if (Record.size() < 4)
         return Error("Invalid record");
-      AttributeSet PAL = getAttributes(Record[0]);
-      unsigned CCInfo = Record[1];
-      BasicBlock *NormalBB = getBasicBlock(Record[2]);
-      BasicBlock *UnwindBB = getBasicBlock(Record[3]);
+      unsigned OpNum = 0;
+      AttributeSet PAL = getAttributes(Record[OpNum++]);
+      unsigned CCInfo = Record[OpNum++];
+      BasicBlock *NormalBB = getBasicBlock(Record[OpNum++]);
+      BasicBlock *UnwindBB = getBasicBlock(Record[OpNum++]);
+
+      FunctionType *FTy = nullptr;
+      if (CCInfo >> 13 & 1 &&
+          !(FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]))))
+        return Error("Explicit invoke type is not a function type");
 
-      unsigned OpNum = 4;
       Value *Callee;
       if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
         return Error("Invalid record");
 
       PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
-      FunctionType *FTy = !CalleeTy ? nullptr :
-        dyn_cast<FunctionType>(CalleeTy->getElementType());
-
-      // Check that the right number of fixed parameters are here.
-      if (!FTy || !NormalBB || !UnwindBB ||
-          Record.size() < OpNum+FTy->getNumParams())
-        return Error("Invalid record");
+      if (!CalleeTy)
+        return Error("Callee is not a pointer");
+      if (!FTy) {
+        FTy = dyn_cast<FunctionType>(CalleeTy->getElementType());
+        if (!FTy)
+          return Error("Callee is not of pointer to function type");
+      } else if (CalleeTy->getElementType() != FTy)
+        return Error("Explicit invoke type does not match pointee type of "
+                     "callee operand");
+      if (Record.size() < FTy->getNumParams() + OpNum)
+        return Error("Insufficient operands to call");
 
       SmallVector<Value*, 16> Ops;
       for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
@@ -3098,8 +3977,8 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
 
       I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops);
       InstructionList.push_back(I);
-      cast<InvokeInst>(I)->setCallingConv(
-        static_cast<CallingConv::ID>(CCInfo));
+      cast<InvokeInst>(I)
+          ->setCallingConv(static_cast<CallingConv::ID>(~(1U << 13) & CCInfo));
       cast<InvokeInst>(I)->setAttributes(PAL);
       break;
     }
@@ -3187,16 +4066,28 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
     case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
       if (Record.size() != 4)
         return Error("Invalid record");
-      PointerType *Ty =
-        dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
+      uint64_t AlignRecord = Record[3];
+      const uint64_t InAllocaMask = uint64_t(1) << 5;
+      const uint64_t ExplicitTypeMask = uint64_t(1) << 6;
+      const uint64_t FlagMask = InAllocaMask | ExplicitTypeMask;
+      bool InAlloca = AlignRecord & InAllocaMask;
+      Type *Ty = getTypeByID(Record[0]);
+      if ((AlignRecord & ExplicitTypeMask) == 0) {
+        auto *PTy = dyn_cast_or_null<PointerType>(Ty);
+        if (!PTy)
+          return Error("Old-style alloca with a non-pointer type");
+        Ty = PTy->getElementType();
+      }
       Type *OpTy = getTypeByID(Record[1]);
       Value *Size = getFnValueByID(Record[2], OpTy);
-      unsigned AlignRecord = Record[3];
-      bool InAlloca = AlignRecord & (1 << 5);
-      unsigned Align = AlignRecord & ((1 << 5) - 1);
+      unsigned Align;
+      if (std::error_code EC =
+              parseAlignmentValue(AlignRecord & ~FlagMask, Align)) {
+        return EC;
+      }
       if (!Ty || !Size)
         return Error("Invalid record");
-      AllocaInst *AI = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
+      AllocaInst *AI = new AllocaInst(Ty, Size, Align);
       AI->setUsedWithInAlloca(InAlloca);
       I = AI;
       InstructionList.push_back(I);
@@ -3206,10 +4097,23 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *Op;
       if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
-          OpNum+2 != Record.size())
+          (OpNum + 2 != Record.size() && OpNum + 3 != Record.size()))
         return Error("Invalid record");
 
-      I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+      Type *Ty = nullptr;
+      if (OpNum + 3 == Record.size())
+        Ty = getTypeByID(Record[OpNum++]);
+      if (std::error_code EC =
+              TypeCheckLoadStoreInst(DiagnosticHandler, Ty, Op->getType()))
+        return EC;
+      if (!Ty)
+        Ty = cast<PointerType>(Op->getType())->getElementType();
+
+      unsigned Align;
+      if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align))
+        return EC;
+      I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align);
+
       InstructionList.push_back(I);
       break;
     }
@@ -3218,9 +4122,18 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *Op;
       if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
-          OpNum+4 != Record.size())
+          (OpNum + 4 != Record.size() && OpNum + 5 != Record.size()))
         return Error("Invalid record");
 
+      Type *Ty = nullptr;
+      if (OpNum + 5 == Record.size())
+        Ty = getTypeByID(Record[OpNum++]);
+      if (std::error_code EC =
+              TypeCheckLoadStoreInst(DiagnosticHandler, Ty, Op->getType()))
+        return EC;
+      if (!Ty)
+        Ty = cast<PointerType>(Op->getType())->getElementType();
+
       AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
       if (Ordering == NotAtomic || Ordering == Release ||
           Ordering == AcquireRelease)
@@ -3229,34 +4142,54 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
         return Error("Invalid record");
       SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
 
-      I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1,
-                       Ordering, SynchScope);
+      unsigned Align;
+      if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align))
+        return EC;
+      I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SynchScope);
+
       InstructionList.push_back(I);
       break;
     }
-    case bitc::FUNC_CODE_INST_STORE: { // STORE2:[ptrty, ptr, val, align, vol]
+    case bitc::FUNC_CODE_INST_STORE:
+    case bitc::FUNC_CODE_INST_STORE_OLD: { // STORE2:[ptrty, ptr, val, align, vol]
       unsigned OpNum = 0;
       Value *Val, *Ptr;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
-          popValue(Record, OpNum, NextValueNo,
-                    cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
-          OpNum+2 != Record.size())
+          (BitCode == bitc::FUNC_CODE_INST_STORE
+               ? getValueTypePair(Record, OpNum, NextValueNo, Val)
+               : popValue(Record, OpNum, NextValueNo,
+                          cast<PointerType>(Ptr->getType())->getElementType(),
+                          Val)) ||
+          OpNum + 2 != Record.size())
         return Error("Invalid record");
 
-      I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+      if (std::error_code EC = TypeCheckLoadStoreInst(
+              DiagnosticHandler, Val->getType(), Ptr->getType()))
+        return EC;
+      unsigned Align;
+      if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align))
+        return EC;
+      I = new StoreInst(Val, Ptr, Record[OpNum+1], Align);
       InstructionList.push_back(I);
       break;
     }
-    case bitc::FUNC_CODE_INST_STOREATOMIC: {
+    case bitc::FUNC_CODE_INST_STOREATOMIC:
+    case bitc::FUNC_CODE_INST_STOREATOMIC_OLD: {
       // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, synchscope]
       unsigned OpNum = 0;
       Value *Val, *Ptr;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
-          popValue(Record, OpNum, NextValueNo,
-                    cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
-          OpNum+4 != Record.size())
+          (BitCode == bitc::FUNC_CODE_INST_STOREATOMIC
+               ? getValueTypePair(Record, OpNum, NextValueNo, Val)
+               : popValue(Record, OpNum, NextValueNo,
+                          cast<PointerType>(Ptr->getType())->getElementType(),
+                          Val)) ||
+          OpNum + 4 != Record.size())
         return Error("Invalid record");
 
+      if (std::error_code EC = TypeCheckLoadStoreInst(
+              DiagnosticHandler, Val->getType(), Ptr->getType()))
+        return EC;
       AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
       if (Ordering == NotAtomic || Ordering == Acquire ||
           Ordering == AcquireRelease)
@@ -3265,28 +4198,36 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       if (Ordering != NotAtomic && Record[OpNum] == 0)
         return Error("Invalid record");
 
-      I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1,
-                        Ordering, SynchScope);
+      unsigned Align;
+      if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align))
+        return EC;
+      I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SynchScope);
       InstructionList.push_back(I);
       break;
     }
+    case bitc::FUNC_CODE_INST_CMPXCHG_OLD:
     case bitc::FUNC_CODE_INST_CMPXCHG: {
       // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, synchscope,
       //          failureordering?, isweak?]
       unsigned OpNum = 0;
       Value *Ptr, *Cmp, *New;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
-          popValue(Record, OpNum, NextValueNo,
-                    cast<PointerType>(Ptr->getType())->getElementType(), Cmp) ||
-          popValue(Record, OpNum, NextValueNo,
-                    cast<PointerType>(Ptr->getType())->getElementType(), New) ||
-          (Record.size() < OpNum + 3 || Record.size() > OpNum + 5))
+          (BitCode == bitc::FUNC_CODE_INST_CMPXCHG
+               ? getValueTypePair(Record, OpNum, NextValueNo, Cmp)
+               : popValue(Record, OpNum, NextValueNo,
+                          cast<PointerType>(Ptr->getType())->getElementType(),
+                          Cmp)) ||
+          popValue(Record, OpNum, NextValueNo, Cmp->getType(), New) ||
+          Record.size() < OpNum + 3 || Record.size() > OpNum + 5)
         return Error("Invalid record");
       AtomicOrdering SuccessOrdering = GetDecodedOrdering(Record[OpNum+1]);
       if (SuccessOrdering == NotAtomic || SuccessOrdering == Unordered)
         return Error("Invalid record");
       SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+2]);
 
+      if (std::error_code EC = TypeCheckLoadStoreInst(
+              DiagnosticHandler, Cmp->getType(), Ptr->getType()))
+        return EC;
       AtomicOrdering FailureOrdering;
       if (Record.size() < 7)
         FailureOrdering =
@@ -3350,19 +4291,31 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
       if (Record.size() < 3)
         return Error("Invalid record");
 
-      AttributeSet PAL = getAttributes(Record[0]);
-      unsigned CCInfo = Record[1];
+      unsigned OpNum = 0;
+      AttributeSet PAL = getAttributes(Record[OpNum++]);
+      unsigned CCInfo = Record[OpNum++];
+
+      FunctionType *FTy = nullptr;
+      if (CCInfo >> 15 & 1 &&
+          !(FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]))))
+        return Error("Explicit call type is not a function type");
 
-      unsigned OpNum = 2;
       Value *Callee;
       if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
         return Error("Invalid record");
 
       PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
-      FunctionType *FTy = nullptr;
-      if (OpTy) FTy = dyn_cast<FunctionType>(OpTy->getElementType());
-      if (!FTy || Record.size() < FTy->getNumParams()+OpNum)
-        return Error("Invalid record");
+      if (!OpTy)
+        return Error("Callee is not a pointer type");
+      if (!FTy) {
+        FTy = dyn_cast<FunctionType>(OpTy->getElementType());
+        if (!FTy)
+          return Error("Callee is not of pointer to function type");
+      } else if (OpTy->getElementType() != FTy)
+        return Error("Explicit call type does not match pointee type of "
+                     "callee operand");
+      if (Record.size() < FTy->getNumParams() + OpNum)
+        return Error("Insufficient operands to call");
 
       SmallVector<Value*, 16> Args;
       // Read the fixed params.
@@ -3389,7 +4342,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
         }
       }
 
-      I = CallInst::Create(Callee, Args);
+      I = CallInst::Create(FTy, Callee, Args);
       InstructionList.push_back(I);
       cast<CallInst>(I)->setCallingConv(
           static_cast<CallingConv::ID>((~(1U << 14) & CCInfo) >> 1));
@@ -3483,6 +4436,9 @@ std::error_code BitcodeReader::FindFunctionInStream(
 void BitcodeReader::releaseBuffer() { Buffer.release(); }
 
 std::error_code BitcodeReader::materialize(GlobalValue *GV) {
+  if (std::error_code EC = materializeMetadata())
+    return EC;
+
   Function *F = dyn_cast<Function>(GV);
   // If it's not a function or is already material, ignore the request.
   if (!F || !F->isMaterializable())
@@ -3503,6 +4459,9 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) {
     return EC;
   F->setIsMaterializable(false);
 
+  if (StripDebugInfo)
+    stripDebugInfo(*F);
+
   // Upgrade any old intrinsic calls in the function.
   for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(),
        E = UpgradedIntrinsics.end(); I != E; ++I) {
@@ -3533,7 +4492,7 @@ bool BitcodeReader::isDematerializable(const GlobalValue *GV) const {
   return DeferredFunctionInfo.count(const_cast<Function*>(F));
 }
 
-void BitcodeReader::Dematerialize(GlobalValue *GV) {
+void BitcodeReader::dematerialize(GlobalValue *GV) {
   Function *F = dyn_cast<Function>(GV);
   // If this function isn't dematerializable, this is a noop.
   if (!F || !isDematerializable(F))
@@ -3546,10 +4505,13 @@ void BitcodeReader::Dematerialize(GlobalValue *GV) {
   F->setIsMaterializable(true);
 }
 
-std::error_code BitcodeReader::MaterializeModule(Module *M) {
+std::error_code BitcodeReader::materializeModule(Module *M) {
   assert(M == TheModule &&
          "Can only Materialize the Module this BitcodeReader is attached to.");
 
+  if (std::error_code EC = materializeMetadata())
+    return EC;
+
   // Promise to materialize all forward references.
   WillMaterializeAllForwardRefs = true;
 
@@ -3690,7 +4652,8 @@ const std::error_category &llvm::BitcodeErrorCategory() {
 static ErrorOr<Module *>
 getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
                          LLVMContext &Context, bool WillMaterializeAll,
-                         DiagnosticHandlerFunction DiagnosticHandler) {
+                         DiagnosticHandlerFunction DiagnosticHandler,
+                         bool ShouldLazyLoadMetadata = false) {
   Module *M = new Module(Buffer->getBufferIdentifier(), Context);
   BitcodeReader *R =
       new BitcodeReader(Buffer.get(), Context, DiagnosticHandler);
@@ -3702,7 +4665,8 @@ getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
     return EC;
   };
 
-  if (std::error_code EC = R->ParseBitcodeInto(M))
+  // Delay parsing Metadata if ShouldLazyLoadMetadata is true.
+  if (std::error_code EC = R->ParseBitcodeInto(M, ShouldLazyLoadMetadata))
     return cleanupOnError(EC);
 
   if (!WillMaterializeAll)
@@ -3717,9 +4681,10 @@ getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
 ErrorOr<Module *>
 llvm::getLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer,
                            LLVMContext &Context,
-                           DiagnosticHandlerFunction DiagnosticHandler) {
+                           DiagnosticHandlerFunction DiagnosticHandler,
+                           bool ShouldLazyLoadMetadata) {
   return getLazyBitcodeModuleImpl(std::move(Buffer), Context, false,
-                                  DiagnosticHandler);
+                                  DiagnosticHandler, ShouldLazyLoadMetadata);
 }
 
 ErrorOr<std::unique_ptr<Module>>
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
deleted file mode 100644
index 5090be4..0000000
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
+++ /dev/null
@@ -1,365 +0,0 @@
-//===- BitcodeReader.h - Internal BitcodeReader impl ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This header defines the BitcodeReader class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_BITCODE_READER_BITCODEREADER_H
-#define LLVM_LIB_BITCODE_READER_BITCODEREADER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Bitcode/BitstreamReader.h"
-#include "llvm/Bitcode/LLVMBitCodes.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/GVMaterializer.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/OperandTraits.h"
-#include "llvm/IR/TrackingMDRef.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/ValueHandle.h"
-#include <deque>
-#include <system_error>
-#include <vector>
-
-namespace llvm {
-  class Comdat;
-  class MemoryBuffer;
-  class LLVMContext;
-
-//===----------------------------------------------------------------------===//
-//                          BitcodeReaderValueList Class
-//===----------------------------------------------------------------------===//
-
-class BitcodeReaderValueList {
-  std::vector<WeakVH> ValuePtrs;
-
-  /// ResolveConstants - As we resolve forward-referenced constants, we add
-  /// information about them to this vector.  This allows us to resolve them in
-  /// bulk instead of resolving each reference at a time.  See the code in
-  /// ResolveConstantForwardRefs for more information about this.
-  ///
-  /// The key of this vector is the placeholder constant, the value is the slot
-  /// number that holds the resolved value.
-  typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
-  ResolveConstantsTy ResolveConstants;
-  LLVMContext &Context;
-public:
-  BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
-  ~BitcodeReaderValueList() {
-    assert(ResolveConstants.empty() && "Constants not resolved?");
-  }
-
-  // vector compatibility methods
-  unsigned size() const { return ValuePtrs.size(); }
-  void resize(unsigned N) { ValuePtrs.resize(N); }
-  void push_back(Value *V) {
-    ValuePtrs.push_back(V);
-  }
-
-  void clear() {
-    assert(ResolveConstants.empty() && "Constants not resolved?");
-    ValuePtrs.clear();
-  }
-
-  Value *operator[](unsigned i) const {
-    assert(i < ValuePtrs.size());
-    return ValuePtrs[i];
-  }
-
-  Value *back() const { return ValuePtrs.back(); }
-    void pop_back() { ValuePtrs.pop_back(); }
-  bool empty() const { return ValuePtrs.empty(); }
-  void shrinkTo(unsigned N) {
-    assert(N <= size() && "Invalid shrinkTo request!");
-    ValuePtrs.resize(N);
-  }
-
-  Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
-  Value *getValueFwdRef(unsigned Idx, Type *Ty);
-
-  void AssignValue(Value *V, unsigned Idx);
-
-  /// ResolveConstantForwardRefs - Once all constants are read, this method bulk
-  /// resolves any forward references.
-  void ResolveConstantForwardRefs();
-};
-
-
-//===----------------------------------------------------------------------===//
-//                          BitcodeReaderMDValueList Class
-//===----------------------------------------------------------------------===//
-
-class BitcodeReaderMDValueList {
-  unsigned NumFwdRefs;
-  bool AnyFwdRefs;
-  unsigned MinFwdRef;
-  unsigned MaxFwdRef;
-  std::vector<TrackingMDRef> MDValuePtrs;
-
-  LLVMContext &Context;
-public:
-  BitcodeReaderMDValueList(LLVMContext &C)
-      : NumFwdRefs(0), AnyFwdRefs(false), Context(C) {}
-
-  // vector compatibility methods
-  unsigned size() const       { return MDValuePtrs.size(); }
-  void resize(unsigned N)     { MDValuePtrs.resize(N); }
-  void push_back(Metadata *MD) { MDValuePtrs.emplace_back(MD); }
-  void clear()                { MDValuePtrs.clear();  }
-  Metadata *back() const      { return MDValuePtrs.back(); }
-  void pop_back()             { MDValuePtrs.pop_back(); }
-  bool empty() const          { return MDValuePtrs.empty(); }
-
-  Metadata *operator[](unsigned i) const {
-    assert(i < MDValuePtrs.size());
-    return MDValuePtrs[i];
-  }
-
-  void shrinkTo(unsigned N) {
-    assert(N <= size() && "Invalid shrinkTo request!");
-    MDValuePtrs.resize(N);
-  }
-
-  Metadata *getValueFwdRef(unsigned Idx);
-  void AssignValue(Metadata *MD, unsigned Idx);
-  void tryToResolveCycles();
-};
-
-class BitcodeReader : public GVMaterializer {
-  LLVMContext &Context;
-  DiagnosticHandlerFunction DiagnosticHandler;
-  Module *TheModule;
-  std::unique_ptr<MemoryBuffer> Buffer;
-  std::unique_ptr<BitstreamReader> StreamFile;
-  BitstreamCursor Stream;
-  DataStreamer *LazyStreamer;
-  uint64_t NextUnreadBit;
-  bool SeenValueSymbolTable;
-
-  std::vector<Type*> TypeList;
-  BitcodeReaderValueList ValueList;
-  BitcodeReaderMDValueList MDValueList;
-  std::vector<Comdat *> ComdatList;
-  SmallVector<Instruction *, 64> InstructionList;
-
-  std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
-  std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
-  std::vector<std::pair<Function*, unsigned> > FunctionPrefixes;
-  std::vector<std::pair<Function*, unsigned> > FunctionPrologues;
-
-  SmallVector<Instruction*, 64> InstsWithTBAATag;
-
-  /// MAttributes - The set of attributes by index.  Index zero in the
-  /// file is for null, and is thus not represented here.  As such all indices
-  /// are off by one.
-  std::vector<AttributeSet> MAttributes;
-
-  /// \brief The set of attribute groups.
-  std::map<unsigned, AttributeSet> MAttributeGroups;
-
-  /// FunctionBBs - While parsing a function body, this is a list of the basic
-  /// blocks for the function.
-  std::vector<BasicBlock*> FunctionBBs;
-
-  // When reading the module header, this list is populated with functions that
-  // have bodies later in the file.
-  std::vector<Function*> FunctionsWithBodies;
-
-  // When intrinsic functions are encountered which require upgrading they are
-  // stored here with their replacement function.
-  typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
-  UpgradedIntrinsicMap UpgradedIntrinsics;
-
-  // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
-  DenseMap<unsigned, unsigned> MDKindMap;
-
-  // Several operations happen after the module header has been read, but
-  // before function bodies are processed. This keeps track of whether
-  // we've done this yet.
-  bool SeenFirstFunctionBody;
-
-  /// DeferredFunctionInfo - When function bodies are initially scanned, this
-  /// map contains info about where to find deferred function body in the
-  /// stream.
-  DenseMap<Function*, uint64_t> DeferredFunctionInfo;
-
-  /// These are basic blocks forward-referenced by block addresses.  They are
-  /// inserted lazily into functions when they're loaded.  The basic block ID is
-  /// its index into the vector.
-  DenseMap<Function *, std::vector<BasicBlock *>> BasicBlockFwdRefs;
-  std::deque<Function *> BasicBlockFwdRefQueue;
-
-  /// UseRelativeIDs - Indicates that we are using a new encoding for
-  /// instruction operands where most operands in the current
-  /// FUNCTION_BLOCK are encoded relative to the instruction number,
-  /// for a more compact encoding.  Some instruction operands are not
-  /// relative to the instruction ID: basic block numbers, and types.
-  /// Once the old style function blocks have been phased out, we would
-  /// not need this flag.
-  bool UseRelativeIDs;
-
-  /// True if all functions will be materialized, negating the need to process
-  /// (e.g.) blockaddress forward references.
-  bool WillMaterializeAllForwardRefs;
-
-  /// Functions that have block addresses taken.  This is usually empty.
-  SmallPtrSet<const Function *, 4> BlockAddressesTaken;
-
-public:
-  std::error_code Error(BitcodeError E, const Twine &Message);
-  std::error_code Error(BitcodeError E);
-  std::error_code Error(const Twine &Message);
-
-  explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C,
-                         DiagnosticHandlerFunction DiagnosticHandler);
-  explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C,
-                         DiagnosticHandlerFunction DiagnosticHandler);
-  ~BitcodeReader() { FreeState(); }
-
-  std::error_code materializeForwardReferencedFunctions();
-
-  void FreeState();
-
-  void releaseBuffer();
-
-  bool isDematerializable(const GlobalValue *GV) const override;
-  std::error_code materialize(GlobalValue *GV) override;
-  std::error_code MaterializeModule(Module *M) override;
-  std::vector<StructType *> getIdentifiedStructTypes() const override;
-  void Dematerialize(GlobalValue *GV) override;
-
-  /// @brief Main interface to parsing a bitcode buffer.
-  /// @returns true if an error occurred.
-  std::error_code ParseBitcodeInto(Module *M);
-
-  /// @brief Cheap mechanism to just extract module triple
-  /// @returns true if an error occurred.
-  ErrorOr<std::string> parseTriple();
-
-  static uint64_t decodeSignRotatedValue(uint64_t V);
-
-private:
-  std::vector<StructType *> IdentifiedStructTypes;
-  StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
-  StructType *createIdentifiedStructType(LLVMContext &Context);
-
-  Type *getTypeByID(unsigned ID);
-  Value *getFnValueByID(unsigned ID, Type *Ty) {
-    if (Ty && Ty->isMetadataTy())
-      return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID));
-    return ValueList.getValueFwdRef(ID, Ty);
-  }
-  Metadata *getFnMetadataByID(unsigned ID) {
-    return MDValueList.getValueFwdRef(ID);
-  }
-  BasicBlock *getBasicBlock(unsigned ID) const {
-    if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID
-    return FunctionBBs[ID];
-  }
-  AttributeSet getAttributes(unsigned i) const {
-    if (i-1 < MAttributes.size())
-      return MAttributes[i-1];
-    return AttributeSet();
-  }
-
-  /// getValueTypePair - Read a value/type pair out of the specified record from
-  /// slot 'Slot'.  Increment Slot past the number of slots used in the record.
-  /// Return true on failure.
-  bool getValueTypePair(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
-                        unsigned InstNum, Value *&ResVal) {
-    if (Slot == Record.size()) return true;
-    unsigned ValNo = (unsigned)Record[Slot++];
-    // Adjust the ValNo, if it was encoded relative to the InstNum.
-    if (UseRelativeIDs)
-      ValNo = InstNum - ValNo;
-    if (ValNo < InstNum) {
-      // If this is not a forward reference, just return the value we already
-      // have.
-      ResVal = getFnValueByID(ValNo, nullptr);
-      return ResVal == nullptr;
-    } else if (Slot == Record.size()) {
-      return true;
-    }
-
-    unsigned TypeNo = (unsigned)Record[Slot++];
-    ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
-    return ResVal == nullptr;
-  }
-
-  /// popValue - Read a value out of the specified record from slot 'Slot'.
-  /// Increment Slot past the number of slots used by the value in the record.
-  /// Return true if there is an error.
-  bool popValue(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
-                unsigned InstNum, Type *Ty, Value *&ResVal) {
-    if (getValue(Record, Slot, InstNum, Ty, ResVal))
-      return true;
-    // All values currently take a single record slot.
-    ++Slot;
-    return false;
-  }
-
-  /// getValue -- Like popValue, but does not increment the Slot number.
-  bool getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
-                unsigned InstNum, Type *Ty, Value *&ResVal) {
-    ResVal = getValue(Record, Slot, InstNum, Ty);
-    return ResVal == nullptr;
-  }
-
-  /// getValue -- Version of getValue that returns ResVal directly,
-  /// or 0 if there is an error.
-  Value *getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
-                  unsigned InstNum, Type *Ty) {
-    if (Slot == Record.size()) return nullptr;
-    unsigned ValNo = (unsigned)Record[Slot];
-    // Adjust the ValNo, if it was encoded relative to the InstNum.
-    if (UseRelativeIDs)
-      ValNo = InstNum - ValNo;
-    return getFnValueByID(ValNo, Ty);
-  }
-
-  /// getValueSigned -- Like getValue, but decodes signed VBRs.
-  Value *getValueSigned(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
-                        unsigned InstNum, Type *Ty) {
-    if (Slot == Record.size()) return nullptr;
-    unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]);
-    // Adjust the ValNo, if it was encoded relative to the InstNum.
-    if (UseRelativeIDs)
-      ValNo = InstNum - ValNo;
-    return getFnValueByID(ValNo, Ty);
-  }
-
-  std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
-  std::error_code ParseModule(bool Resume);
-  std::error_code ParseAttributeBlock();
-  std::error_code ParseAttributeGroupBlock();
-  std::error_code ParseTypeTable();
-  std::error_code ParseTypeTableBody();
-
-  std::error_code ParseValueSymbolTable();
-  std::error_code ParseConstants();
-  std::error_code RememberAndSkipFunctionBody();
-  std::error_code ParseFunctionBody(Function *F);
-  std::error_code GlobalCleanup();
-  std::error_code ResolveGlobalAndAliasInits();
-  std::error_code ParseMetadata();
-  std::error_code ParseMetadataAttachment();
-  ErrorOr<std::string> parseModuleTriple();
-  std::error_code ParseUseLists();
-  std::error_code InitStream();
-  std::error_code InitStreamFromBuffer();
-  std::error_code InitLazyStream();
-  std::error_code FindFunctionInStream(
-      Function *F,
-      DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp
index 5e3232e..a103fbd 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -39,15 +39,16 @@ bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
 
   // Get the codesize of this block.
   CurCodeSize = ReadVBR(bitc::CodeLenWidth);
+  // We can't read more than MaxChunkSize at a time
+  if (CurCodeSize > MaxChunkSize)
+    return true;
+
   SkipToFourByteBoundary();
   unsigned NumWords = Read(bitc::BlockSizeWidth);
   if (NumWordsP) *NumWordsP = NumWords;
 
   // Validate that this block is sane.
-  if (CurCodeSize == 0 || AtEndOfStream())
-    return true;
-
-  return false;
+  return CurCodeSize == 0 || AtEndOfStream();
 }
 
 static uint64_t readAbbreviatedField(BitstreamCursor &Cursor,
@@ -60,8 +61,10 @@ static uint64_t readAbbreviatedField(BitstreamCursor &Cursor,
   case BitCodeAbbrevOp::Blob:
     llvm_unreachable("Should not reach here");
   case BitCodeAbbrevOp::Fixed:
+    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
     return Cursor.Read((unsigned)Op.getEncodingData());
   case BitCodeAbbrevOp::VBR:
+    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
     return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
   case BitCodeAbbrevOp::Char6:
     return BitCodeAbbrevOp::DecodeChar6(Cursor.Read(6));
@@ -79,9 +82,11 @@ static void skipAbbreviatedField(BitstreamCursor &Cursor,
   case BitCodeAbbrevOp::Blob:
     llvm_unreachable("Should not reach here");
   case BitCodeAbbrevOp::Fixed:
+    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
     Cursor.Read((unsigned)Op.getEncodingData());
     break;
   case BitCodeAbbrevOp::VBR:
+    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
     Cursor.ReadVBR64((unsigned)Op.getEncodingData());
     break;
   case BitCodeAbbrevOp::Char6:
@@ -170,8 +175,12 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
   unsigned Code;
   if (CodeOp.isLiteral())
     Code = CodeOp.getLiteralValue();
-  else
+  else {
+    if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
+        CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
+      report_fatal_error("Abbreviation starts with an Array or a Blob");
     Code = readAbbreviatedField(*this, CodeOp);
+  }
 
   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
@@ -191,8 +200,15 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
       unsigned NumElts = ReadVBR(6);
 
       // Get the element encoding.
-      assert(i+2 == e && "array op not second to last?");
+      if (i + 2 != e)
+        report_fatal_error("Array op not second to last");
       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+      if (!EltEnc.isEncoding())
+        report_fatal_error(
+            "Array element type has to be an encoding of a type");
+      if (EltEnc.getEncoding() == BitCodeAbbrevOp::Array ||
+          EltEnc.getEncoding() == BitCodeAbbrevOp::Blob)
+        report_fatal_error("Array element type can't be an Array or a Blob");
 
       // Read all the elements.
       for (; NumElts; --NumElts)
@@ -241,7 +257,7 @@ void BitstreamCursor::ReadAbbrevRecord() {
   BitCodeAbbrev *Abbv = new BitCodeAbbrev();
   unsigned NumOpInfo = ReadVBR(5);
   for (unsigned i = 0; i != NumOpInfo; ++i) {
-    bool IsLiteral = Read(1) ? true : false;
+    bool IsLiteral = Read(1);
     if (IsLiteral) {
       Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8)));
       continue;
@@ -249,7 +265,7 @@ void BitstreamCursor::ReadAbbrevRecord() {
 
     BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3);
     if (BitCodeAbbrevOp::hasEncodingData(E)) {
-      unsigned Data = ReadVBR64(5);
+      uint64_t Data = ReadVBR64(5);
 
       // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
       // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
@@ -260,10 +276,18 @@ void BitstreamCursor::ReadAbbrevRecord() {
         continue;
       }
 
+      if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
+          Data > MaxChunkSize)
+        report_fatal_error(
+            "Fixed or VBR abbrev record with size > MaxChunkData");
+
       Abbv->Add(BitCodeAbbrevOp(E, Data));
     } else
       Abbv->Add(BitCodeAbbrevOp(E));
   }
+
+  if (Abbv->getNumOperandInfos() == 0)
+    report_fatal_error("Abbrev record with no operands");
   CurAbbrevs.push_back(Abbv);
 }
 
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index a96e866..97caefb 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Bitcode/BitstreamWriter.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
@@ -55,7 +56,8 @@ enum {
   FUNCTION_INST_CAST_ABBREV,
   FUNCTION_INST_RET_VOID_ABBREV,
   FUNCTION_INST_RET_VAL_ABBREV,
-  FUNCTION_INST_UNREACHABLE_ABBREV
+  FUNCTION_INST_UNREACHABLE_ABBREV,
+  FUNCTION_INST_GEP_ABBREV,
 };
 
 static unsigned GetEncodedCastOpcode(unsigned Opcode) {
@@ -164,6 +166,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_BUILTIN;
   case Attribute::ByVal:
     return bitc::ATTR_KIND_BY_VAL;
+  case Attribute::Convergent:
+    return bitc::ATTR_KIND_CONVERGENT;
   case Attribute::InAlloca:
     return bitc::ATTR_KIND_IN_ALLOCA;
   case Attribute::Cold:
@@ -198,6 +202,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_NON_NULL;
   case Attribute::Dereferenceable:
     return bitc::ATTR_KIND_DEREFERENCEABLE;
+  case Attribute::DereferenceableOrNull:
+    return bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL;
   case Attribute::NoRedZone:
     return bitc::ATTR_KIND_NO_RED_ZONE;
   case Attribute::NoReturn:
@@ -322,7 +328,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */);
   SmallVector<uint64_t, 64> TypeVals;
 
-  uint64_t NumBits = Log2_32_Ceil(VE.getTypes().size()+1);
+  uint64_t NumBits = VE.computeBitsRequiredForTypeIndicies();
 
   // Abbrev for TYPE_CODE_POINTER.
   BitCodeAbbrev *Abbv = new BitCodeAbbrev();
@@ -480,13 +486,13 @@ static unsigned getEncodedLinkage(const GlobalValue &GV) {
   case GlobalValue::ExternalLinkage:
     return 0;
   case GlobalValue::WeakAnyLinkage:
-    return 1;
+    return 16;
   case GlobalValue::AppendingLinkage:
     return 2;
   case GlobalValue::InternalLinkage:
     return 3;
   case GlobalValue::LinkOnceAnyLinkage:
-    return 4;
+    return 18;
   case GlobalValue::ExternalWeakLinkage:
     return 7;
   case GlobalValue::CommonLinkage:
@@ -494,9 +500,9 @@ static unsigned getEncodedLinkage(const GlobalValue &GV) {
   case GlobalValue::PrivateLinkage:
     return 9;
   case GlobalValue::WeakODRLinkage:
-    return 10;
+    return 17;
   case GlobalValue::LinkOnceODRLinkage:
-    return 11;
+    return 19;
   case GlobalValue::AvailableExternallyLinkage:
     return 12;
   }
@@ -549,11 +555,13 @@ static unsigned getEncodedComdatSelectionKind(const Comdat &C) {
 }
 
 static void writeComdats(const ValueEnumerator &VE, BitstreamWriter &Stream) {
-  SmallVector<uint8_t, 64> Vals;
+  SmallVector<uint16_t, 64> Vals;
   for (const Comdat *C : VE.getComdats()) {
     // COMDAT: [selection_kind, name]
     Vals.push_back(getEncodedComdatSelectionKind(*C));
-    Vals.push_back(C->getName().size());
+    size_t Size = C->getName().size();
+    assert(isUInt<16>(Size));
+    Vals.push_back(Size);
     for (char Chr : C->getName())
       Vals.push_back((unsigned char)Chr);
     Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0);
@@ -584,7 +592,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
   unsigned MaxGlobalType = 0;
   for (const GlobalValue &GV : M->globals()) {
     MaxAlignment = std::max(MaxAlignment, GV.getAlignment());
-    MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV.getType()));
+    MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV.getValueType()));
     if (GV.hasSection()) {
       // Give section names unique ID's.
       unsigned &Entry = SectionMap[GV.getSection()];
@@ -625,10 +633,12 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
     Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                               Log2_32_Ceil(MaxGlobalType+1)));
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));      // Constant.
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));        // Initializer.
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4));      // Linkage.
-    if (MaxAlignment == 0)                                      // Alignment.
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // AddrSpace << 2
+                                                           //| explicitType << 1
+                                                           //| constant
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // Initializer.
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // Linkage.
+    if (MaxAlignment == 0)                                 // Alignment.
       Abbv->Add(BitCodeAbbrevOp(0));
     else {
       unsigned MaxEncAlignment = Log2_32(MaxAlignment)+1;
@@ -651,9 +661,10 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
 
     // GLOBALVAR: [type, isconst, initid,
     //             linkage, alignment, section, visibility, threadlocal,
-    //             unnamed_addr, externally_initialized, dllstorageclass]
-    Vals.push_back(VE.getTypeID(GV.getType()));
-    Vals.push_back(GV.isConstant());
+    //             unnamed_addr, externally_initialized, dllstorageclass,
+    //             comdat]
+    Vals.push_back(VE.getTypeID(GV.getValueType()));
+    Vals.push_back(GV.getType()->getAddressSpace() << 2 | 2 | GV.isConstant());
     Vals.push_back(GV.isDeclaration() ? 0 :
                    (VE.getValueID(GV.getInitializer()) + 1));
     Vals.push_back(getEncodedLinkage(GV));
@@ -683,7 +694,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
     // FUNCTION:  [type, callingconv, isproto, linkage, paramattrs, alignment,
     //             section, visibility, gc, unnamed_addr, prologuedata,
     //             dllstorageclass, comdat, prefixdata]
-    Vals.push_back(VE.getTypeID(F.getType()));
+    Vals.push_back(VE.getTypeID(F.getFunctionType()));
     Vals.push_back(F.getCallingConv());
     Vals.push_back(F.isDeclaration());
     Vals.push_back(getEncodedLinkage(F));
@@ -760,26 +771,22 @@ static void WriteValueAsMetadata(const ValueAsMetadata *MD,
   Record.clear();
 }
 
-static void WriteMDNode(const MDNode *N,
-                        const ValueEnumerator &VE,
-                        BitstreamWriter &Stream,
-                        SmallVectorImpl<uint64_t> &Record) {
+static void WriteMDTuple(const MDTuple *N, const ValueEnumerator &VE,
+                         BitstreamWriter &Stream,
+                         SmallVectorImpl<uint64_t> &Record, unsigned Abbrev) {
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     Metadata *MD = N->getOperand(i);
-    if (!MD) {
-      Record.push_back(0);
-      continue;
-    }
-    assert(!isa<LocalAsMetadata>(MD) && "Unexpected function-local metadata");
-    Record.push_back(VE.getMetadataID(MD) + 1);
+    assert(!(MD && isa<LocalAsMetadata>(MD)) &&
+           "Unexpected function-local metadata");
+    Record.push_back(VE.getMetadataOrNullID(MD));
   }
   Stream.EmitRecord(N->isDistinct() ? bitc::METADATA_DISTINCT_NODE
                                     : bitc::METADATA_NODE,
-                    Record);
+                    Record, Abbrev);
   Record.clear();
 }
 
-static void WriteMDLocation(const MDLocation *N, const ValueEnumerator &VE,
+static void WriteDILocation(const DILocation *N, const ValueEnumerator &VE,
                             BitstreamWriter &Stream,
                             SmallVectorImpl<uint64_t> &Record,
                             unsigned Abbrev) {
@@ -787,17 +794,355 @@ static void WriteMDLocation(const MDLocation *N, const ValueEnumerator &VE,
   Record.push_back(N->getLine());
   Record.push_back(N->getColumn());
   Record.push_back(VE.getMetadataID(N->getScope()));
-
-  // Always emit the inlined-at location, even though it's optional.
-  if (Metadata *InlinedAt = N->getInlinedAt())
-    Record.push_back(VE.getMetadataID(InlinedAt) + 1);
-  else
-    Record.push_back(0);
+  Record.push_back(VE.getMetadataOrNullID(N->getInlinedAt()));
 
   Stream.EmitRecord(bitc::METADATA_LOCATION, Record, Abbrev);
   Record.clear();
 }
 
+static void WriteGenericDINode(const GenericDINode *N,
+                               const ValueEnumerator &VE,
+                               BitstreamWriter &Stream,
+                               SmallVectorImpl<uint64_t> &Record,
+                               unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(N->getTag());
+  Record.push_back(0); // Per-tag version field; unused for now.
+
+  for (auto &I : N->operands())
+    Record.push_back(VE.getMetadataOrNullID(I));
+
+  Stream.EmitRecord(bitc::METADATA_GENERIC_DEBUG, Record, Abbrev);
+  Record.clear();
+}
+
+static uint64_t rotateSign(int64_t I) {
+  uint64_t U = I;
+  return I < 0 ? ~(U << 1) : U << 1;
+}
+
+static void WriteDISubrange(const DISubrange *N, const ValueEnumerator &,
+                            BitstreamWriter &Stream,
+                            SmallVectorImpl<uint64_t> &Record,
+                            unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(N->getCount());
+  Record.push_back(rotateSign(N->getLowerBound()));
+
+  Stream.EmitRecord(bitc::METADATA_SUBRANGE, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDIEnumerator(const DIEnumerator *N, const ValueEnumerator &VE,
+                              BitstreamWriter &Stream,
+                              SmallVectorImpl<uint64_t> &Record,
+                              unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(rotateSign(N->getValue()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+
+  Stream.EmitRecord(bitc::METADATA_ENUMERATOR, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDIBasicType(const DIBasicType *N, const ValueEnumerator &VE,
+                             BitstreamWriter &Stream,
+                             SmallVectorImpl<uint64_t> &Record,
+                             unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(N->getTag());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+  Record.push_back(N->getSizeInBits());
+  Record.push_back(N->getAlignInBits());
+  Record.push_back(N->getEncoding());
+
+  Stream.EmitRecord(bitc::METADATA_BASIC_TYPE, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDIDerivedType(const DIDerivedType *N,
+                               const ValueEnumerator &VE,
+                               BitstreamWriter &Stream,
+                               SmallVectorImpl<uint64_t> &Record,
+                               unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(N->getTag());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+  Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+  Record.push_back(N->getLine());
+  Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+  Record.push_back(VE.getMetadataOrNullID(N->getBaseType()));
+  Record.push_back(N->getSizeInBits());
+  Record.push_back(N->getAlignInBits());
+  Record.push_back(N->getOffsetInBits());
+  Record.push_back(N->getFlags());
+  Record.push_back(VE.getMetadataOrNullID(N->getExtraData()));
+
+  Stream.EmitRecord(bitc::METADATA_DERIVED_TYPE, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDICompositeType(const DICompositeType *N,
+                                 const ValueEnumerator &VE,
+                                 BitstreamWriter &Stream,
+                                 SmallVectorImpl<uint64_t> &Record,
+                                 unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(N->getTag());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+  Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+  Record.push_back(N->getLine());
+  Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+  Record.push_back(VE.getMetadataOrNullID(N->getBaseType()));
+  Record.push_back(N->getSizeInBits());
+  Record.push_back(N->getAlignInBits());
+  Record.push_back(N->getOffsetInBits());
+  Record.push_back(N->getFlags());
+  Record.push_back(VE.getMetadataOrNullID(N->getElements().get()));
+  Record.push_back(N->getRuntimeLang());
+  Record.push_back(VE.getMetadataOrNullID(N->getVTableHolder()));
+  Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawIdentifier()));
+
+  Stream.EmitRecord(bitc::METADATA_COMPOSITE_TYPE, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDISubroutineType(const DISubroutineType *N,
+                                  const ValueEnumerator &VE,
+                                  BitstreamWriter &Stream,
+                                  SmallVectorImpl<uint64_t> &Record,
+                                  unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(N->getFlags());
+  Record.push_back(VE.getMetadataOrNullID(N->getTypeArray().get()));
+
+  Stream.EmitRecord(bitc::METADATA_SUBROUTINE_TYPE, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDIFile(const DIFile *N, const ValueEnumerator &VE,
+                        BitstreamWriter &Stream,
+                        SmallVectorImpl<uint64_t> &Record, unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawFilename()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawDirectory()));
+
+  Stream.EmitRecord(bitc::METADATA_FILE, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDICompileUnit(const DICompileUnit *N,
+                               const ValueEnumerator &VE,
+                               BitstreamWriter &Stream,
+                               SmallVectorImpl<uint64_t> &Record,
+                               unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(N->getSourceLanguage());
+  Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawProducer()));
+  Record.push_back(N->isOptimized());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawFlags()));
+  Record.push_back(N->getRuntimeVersion());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawSplitDebugFilename()));
+  Record.push_back(N->getEmissionKind());
+  Record.push_back(VE.getMetadataOrNullID(N->getEnumTypes().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRetainedTypes().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getSubprograms().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get()));
+  Record.push_back(N->getDWOId());
+
+  Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDISubprogram(const DISubprogram *N, const ValueEnumerator &VE,
+                              BitstreamWriter &Stream,
+                              SmallVectorImpl<uint64_t> &Record,
+                              unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName()));
+  Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+  Record.push_back(N->getLine());
+  Record.push_back(VE.getMetadataOrNullID(N->getType()));
+  Record.push_back(N->isLocalToUnit());
+  Record.push_back(N->isDefinition());
+  Record.push_back(N->getScopeLine());
+  Record.push_back(VE.getMetadataOrNullID(N->getContainingType()));
+  Record.push_back(N->getVirtuality());
+  Record.push_back(N->getVirtualIndex());
+  Record.push_back(N->getFlags());
+  Record.push_back(N->isOptimized());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawFunction()));
+  Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getDeclaration()));
+  Record.push_back(VE.getMetadataOrNullID(N->getVariables().get()));
+
+  Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDILexicalBlock(const DILexicalBlock *N,
+                                const ValueEnumerator &VE,
+                                BitstreamWriter &Stream,
+                                SmallVectorImpl<uint64_t> &Record,
+                                unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+  Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+  Record.push_back(N->getLine());
+  Record.push_back(N->getColumn());
+
+  Stream.EmitRecord(bitc::METADATA_LEXICAL_BLOCK, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDILexicalBlockFile(const DILexicalBlockFile *N,
+                                    const ValueEnumerator &VE,
+                                    BitstreamWriter &Stream,
+                                    SmallVectorImpl<uint64_t> &Record,
+                                    unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+  Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+  Record.push_back(N->getDiscriminator());
+
+  Stream.EmitRecord(bitc::METADATA_LEXICAL_BLOCK_FILE, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDINamespace(const DINamespace *N, const ValueEnumerator &VE,
+                             BitstreamWriter &Stream,
+                             SmallVectorImpl<uint64_t> &Record,
+                             unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+  Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+  Record.push_back(N->getLine());
+
+  Stream.EmitRecord(bitc::METADATA_NAMESPACE, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDITemplateTypeParameter(const DITemplateTypeParameter *N,
+                                         const ValueEnumerator &VE,
+                                         BitstreamWriter &Stream,
+                                         SmallVectorImpl<uint64_t> &Record,
+                                         unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+  Record.push_back(VE.getMetadataOrNullID(N->getType()));
+
+  Stream.EmitRecord(bitc::METADATA_TEMPLATE_TYPE, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDITemplateValueParameter(const DITemplateValueParameter *N,
+                                          const ValueEnumerator &VE,
+                                          BitstreamWriter &Stream,
+                                          SmallVectorImpl<uint64_t> &Record,
+                                          unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(N->getTag());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+  Record.push_back(VE.getMetadataOrNullID(N->getType()));
+  Record.push_back(VE.getMetadataOrNullID(N->getValue()));
+
+  Stream.EmitRecord(bitc::METADATA_TEMPLATE_VALUE, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDIGlobalVariable(const DIGlobalVariable *N,
+                                  const ValueEnumerator &VE,
+                                  BitstreamWriter &Stream,
+                                  SmallVectorImpl<uint64_t> &Record,
+                                  unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName()));
+  Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+  Record.push_back(N->getLine());
+  Record.push_back(VE.getMetadataOrNullID(N->getType()));
+  Record.push_back(N->isLocalToUnit());
+  Record.push_back(N->isDefinition());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawVariable()));
+  Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration()));
+
+  Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDILocalVariable(const DILocalVariable *N,
+                                 const ValueEnumerator &VE,
+                                 BitstreamWriter &Stream,
+                                 SmallVectorImpl<uint64_t> &Record,
+                                 unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(N->getTag());
+  Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+  Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+  Record.push_back(N->getLine());
+  Record.push_back(VE.getMetadataOrNullID(N->getType()));
+  Record.push_back(N->getArg());
+  Record.push_back(N->getFlags());
+
+  Stream.EmitRecord(bitc::METADATA_LOCAL_VAR, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDIExpression(const DIExpression *N, const ValueEnumerator &,
+                              BitstreamWriter &Stream,
+                              SmallVectorImpl<uint64_t> &Record,
+                              unsigned Abbrev) {
+  Record.reserve(N->getElements().size() + 1);
+
+  Record.push_back(N->isDistinct());
+  Record.append(N->elements_begin(), N->elements_end());
+
+  Stream.EmitRecord(bitc::METADATA_EXPRESSION, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDIObjCProperty(const DIObjCProperty *N,
+                                const ValueEnumerator &VE,
+                                BitstreamWriter &Stream,
+                                SmallVectorImpl<uint64_t> &Record,
+                                unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+  Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+  Record.push_back(N->getLine());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawSetterName()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawGetterName()));
+  Record.push_back(N->getAttributes());
+  Record.push_back(VE.getMetadataOrNullID(N->getType()));
+
+  Stream.EmitRecord(bitc::METADATA_OBJC_PROPERTY, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDIImportedEntity(const DIImportedEntity *N,
+                                  const ValueEnumerator &VE,
+                                  BitstreamWriter &Stream,
+                                  SmallVectorImpl<uint64_t> &Record,
+                                  unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(N->getTag());
+  Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+  Record.push_back(VE.getMetadataOrNullID(N->getEntity()));
+  Record.push_back(N->getLine());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+
+  Stream.EmitRecord(bitc::METADATA_IMPORTED_ENTITY, Record, Abbrev);
+  Record.clear();
+}
+
 static void WriteModuleMetadata(const Module *M,
                                 const ValueEnumerator &VE,
                                 BitstreamWriter &Stream) {
@@ -817,8 +1162,11 @@ static void WriteModuleMetadata(const Module *M,
     MDSAbbrev = Stream.EmitAbbrev(Abbv);
   }
 
-  unsigned LocAbbrev = 0;
-  if (VE.hasMDLocation()) {
+  // Initialize MDNode abbreviations.
+#define HANDLE_MDNODE_LEAF(CLASS) unsigned CLASS##Abbrev = 0;
+#include "llvm/IR/Metadata.def"
+
+  if (VE.hasDILocation()) {
     // Abbrev for METADATA_LOCATION.
     //
     // Assume the column is usually under 128, and always output the inlined-at
@@ -830,7 +1178,23 @@ static void WriteModuleMetadata(const Module *M,
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
-    LocAbbrev = Stream.EmitAbbrev(Abbv);
+    DILocationAbbrev = Stream.EmitAbbrev(Abbv);
+  }
+
+  if (VE.hasGenericDINode()) {
+    // Abbrev for METADATA_GENERIC_DEBUG.
+    //
+    // Assume the column is usually under 128, and always output the inlined-at
+    // location (it's never more expensive than building an array size 1).
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_GENERIC_DEBUG));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+    GenericDINodeAbbrev = Stream.EmitAbbrev(Abbv);
   }
 
   unsigned NameAbbrev = 0;
@@ -845,13 +1209,18 @@ static void WriteModuleMetadata(const Module *M,
 
   SmallVector<uint64_t, 64> Record;
   for (const Metadata *MD : MDs) {
-    if (const MDLocation *Loc = dyn_cast<MDLocation>(MD)) {
-      WriteMDLocation(Loc, VE, Stream, Record, LocAbbrev);
-      continue;
-    }
     if (const MDNode *N = dyn_cast<MDNode>(MD)) {
-      WriteMDNode(N, VE, Stream, Record);
-      continue;
+      assert(N->isResolved() && "Expected forward references to be resolved");
+
+      switch (N->getMetadataID()) {
+      default:
+        llvm_unreachable("Invalid MDNode subclass");
+#define HANDLE_MDNODE_LEAF(CLASS)                                              \
+  case Metadata::CLASS##Kind:                                                  \
+    Write##CLASS(cast<CLASS>(N), VE, Stream, Record, CLASS##Abbrev);           \
+    continue;
+#include "llvm/IR/Metadata.def"
+      }
     }
     if (const auto *MDC = dyn_cast<ConstantAsMetadata>(MD)) {
       WriteValueAsMetadata(MDC, VE, Stream, Record);
@@ -914,6 +1283,15 @@ static void WriteMetadataAttachment(const Function &F,
   // Write metadata attachments
   // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+  F.getAllMetadata(MDs);
+  if (!MDs.empty()) {
+    for (const auto &I : MDs) {
+      Record.push_back(I.first);
+      Record.push_back(VE.getMetadataID(I.second));
+    }
+    Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
+    Record.clear();
+  }
 
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
@@ -1031,14 +1409,12 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
       // Add the asm string.
       const std::string &AsmStr = IA->getAsmString();
       Record.push_back(AsmStr.size());
-      for (unsigned i = 0, e = AsmStr.size(); i != e; ++i)
-        Record.push_back(AsmStr[i]);
+      Record.append(AsmStr.begin(), AsmStr.end());
 
       // Add the constraint string.
       const std::string &ConstraintStr = IA->getConstraintString();
       Record.push_back(ConstraintStr.size());
-      for (unsigned i = 0, e = ConstraintStr.size(); i != e; ++i)
-        Record.push_back(ConstraintStr[i]);
+      Record.append(ConstraintStr.begin(), ConstraintStr.end());
       Stream.EmitRecord(bitc::CST_CODE_INLINEASM, Record);
       Record.clear();
       continue;
@@ -1162,15 +1538,18 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
             Record.push_back(Flags);
         }
         break;
-      case Instruction::GetElementPtr:
+      case Instruction::GetElementPtr: {
         Code = bitc::CST_CODE_CE_GEP;
-        if (cast<GEPOperator>(C)->isInBounds())
+        const auto *GO = cast<GEPOperator>(C);
+        if (GO->isInBounds())
           Code = bitc::CST_CODE_CE_INBOUNDS_GEP;
+        Record.push_back(VE.getTypeID(GO->getSourceElementType()));
         for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
           Record.push_back(VE.getTypeID(C->getOperand(i)->getType()));
           Record.push_back(VE.getValueID(C->getOperand(i)));
         }
         break;
+      }
       case Instruction::Select:
         Code = bitc::CST_CODE_CE_SELECT;
         Record.push_back(VE.getValueID(C->getOperand(0)));
@@ -1316,19 +1695,21 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     }
     break;
 
-  case Instruction::GetElementPtr:
+  case Instruction::GetElementPtr: {
     Code = bitc::FUNC_CODE_INST_GEP;
-    if (cast<GEPOperator>(&I)->isInBounds())
-      Code = bitc::FUNC_CODE_INST_INBOUNDS_GEP;
+    AbbrevToUse = FUNCTION_INST_GEP_ABBREV;
+    auto &GEPInst = cast<GetElementPtrInst>(I);
+    Vals.push_back(GEPInst.isInBounds());
+    Vals.push_back(VE.getTypeID(GEPInst.getSourceElementType()));
     for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
       PushValueAndType(I.getOperand(i), InstID, Vals, VE);
     break;
+  }
   case Instruction::ExtractValue: {
     Code = bitc::FUNC_CODE_INST_EXTRACTVAL;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
     const ExtractValueInst *EVI = cast<ExtractValueInst>(&I);
-    for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
-      Vals.push_back(*i);
+    Vals.append(EVI->idx_begin(), EVI->idx_end());
     break;
   }
   case Instruction::InsertValue: {
@@ -1336,8 +1717,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
     PushValueAndType(I.getOperand(1), InstID, Vals, VE);
     const InsertValueInst *IVI = cast<InsertValueInst>(&I);
-    for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
-      Vals.push_back(*i);
+    Vals.append(IVI->idx_begin(), IVI->idx_end());
     break;
   }
   case Instruction::Select:
@@ -1423,15 +1803,15 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
 
   case Instruction::Invoke: {
     const InvokeInst *II = cast<InvokeInst>(&I);
-    const Value *Callee(II->getCalledValue());
-    PointerType *PTy = cast<PointerType>(Callee->getType());
-    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    const Value *Callee = II->getCalledValue();
+    FunctionType *FTy = II->getFunctionType();
     Code = bitc::FUNC_CODE_INST_INVOKE;
 
     Vals.push_back(VE.getAttributeID(II->getAttributes()));
-    Vals.push_back(II->getCallingConv());
+    Vals.push_back(II->getCallingConv() | 1 << 13);
     Vals.push_back(VE.getValueID(II->getNormalDest()));
     Vals.push_back(VE.getValueID(II->getUnwindDest()));
+    Vals.push_back(VE.getTypeID(FTy));
     PushValueAndType(Callee, InstID, Vals, VE);
 
     // Emit value #'s for the fixed parameters.
@@ -1492,15 +1872,16 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
 
   case Instruction::Alloca: {
     Code = bitc::FUNC_CODE_INST_ALLOCA;
-    Vals.push_back(VE.getTypeID(I.getType()));
+    const AllocaInst &AI = cast<AllocaInst>(I);
+    Vals.push_back(VE.getTypeID(AI.getAllocatedType()));
     Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
     Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
-    const AllocaInst &AI = cast<AllocaInst>(I);
     unsigned AlignRecord = Log2_32(AI.getAlignment()) + 1;
     assert(Log2_32(Value::MaximumAlignment) + 1 < 1 << 5 &&
            "not enough bits for maximum alignment");
     assert(AlignRecord < 1 << 5 && "alignment greater than 1 << 64");
     AlignRecord |= AI.isUsedWithInAlloca() << 5;
+    AlignRecord |= 1 << 6;
     Vals.push_back(AlignRecord);
     break;
   }
@@ -1514,6 +1895,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
       if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))  // ptr
         AbbrevToUse = FUNCTION_INST_LOAD_ABBREV;
     }
+    Vals.push_back(VE.getTypeID(I.getType()));
     Vals.push_back(Log2_32(cast<LoadInst>(I).getAlignment())+1);
     Vals.push_back(cast<LoadInst>(I).isVolatile());
     if (cast<LoadInst>(I).isAtomic()) {
@@ -1527,7 +1909,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     else
       Code = bitc::FUNC_CODE_INST_STORE;
     PushValueAndType(I.getOperand(1), InstID, Vals, VE);  // ptrty + ptr
-    pushValue(I.getOperand(0), InstID, Vals, VE);         // val.
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);  // valty + val
     Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
     Vals.push_back(cast<StoreInst>(I).isVolatile());
     if (cast<StoreInst>(I).isAtomic()) {
@@ -1538,7 +1920,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
   case Instruction::AtomicCmpXchg:
     Code = bitc::FUNC_CODE_INST_CMPXCHG;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);  // ptrty + ptr
-    pushValue(I.getOperand(1), InstID, Vals, VE);         // cmp.
+    PushValueAndType(I.getOperand(1), InstID, Vals, VE);         // cmp.
     pushValue(I.getOperand(2), InstID, Vals, VE);         // newval.
     Vals.push_back(cast<AtomicCmpXchgInst>(I).isVolatile());
     Vals.push_back(GetEncodedOrdering(
@@ -1567,14 +1949,14 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     break;
   case Instruction::Call: {
     const CallInst &CI = cast<CallInst>(I);
-    PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
-    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    FunctionType *FTy = CI.getFunctionType();
 
     Code = bitc::FUNC_CODE_INST_CALL;
 
     Vals.push_back(VE.getAttributeID(CI.getAttributes()));
     Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()) |
-                   unsigned(CI.isMustTailCall()) << 14);
+                   unsigned(CI.isMustTailCall()) << 14 | 1 << 15);
+    Vals.push_back(VE.getTypeID(FTy));
     PushValueAndType(CI.getCalledValue(), InstID, Vals, VE);  // Callee
 
     // Emit value #'s for the fixed parameters.
@@ -1673,15 +2055,16 @@ static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order,
   else
     Code = bitc::USELIST_CODE_DEFAULT;
 
-  SmallVector<uint64_t, 64> Record;
-  for (unsigned I : Order.Shuffle)
-    Record.push_back(I);
+  SmallVector<uint64_t, 64> Record(Order.Shuffle.begin(), Order.Shuffle.end());
   Record.push_back(VE.getValueID(Order.V));
   Stream.EmitRecord(Code, Record);
 }
 
 static void WriteUseListBlock(const Function *F, ValueEnumerator &VE,
                               BitstreamWriter &Stream) {
+  assert(VE.shouldPreserveUseListOrder() &&
+         "Expected to be preserving use-list order");
+
   auto hasMore = [&]() {
     return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F;
   };
@@ -1722,9 +2105,9 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
   // Keep a running idea of what the instruction ID is.
   unsigned InstID = CstEnd;
 
-  bool NeedsMetadataAttachment = false;
+  bool NeedsMetadataAttachment = F.hasMetadata();
 
-  DebugLoc LastDL;
+  DILocation *LastDL = nullptr;
 
   // Finally, emit all the instructions, in order.
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -1739,26 +2122,24 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
       NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
 
       // If the instruction has a debug location, emit it.
-      DebugLoc DL = I->getDebugLoc();
-      if (DL.isUnknown()) {
-        // nothing todo.
-      } else if (DL == LastDL) {
+      DILocation *DL = I->getDebugLoc();
+      if (!DL)
+        continue;
+
+      if (DL == LastDL) {
         // Just repeat the same debug loc as last time.
         Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
-      } else {
-        MDNode *Scope, *IA;
-        DL.getScopeAndInlinedAt(Scope, IA, I->getContext());
-        assert(Scope && "Expected valid scope");
-
-        Vals.push_back(DL.getLine());
-        Vals.push_back(DL.getCol());
-        Vals.push_back(Scope ? VE.getMetadataID(Scope) + 1 : 0);
-        Vals.push_back(IA ? VE.getMetadataID(IA) + 1 : 0);
-        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
-        Vals.clear();
-
-        LastDL = DL;
+        continue;
       }
+
+      Vals.push_back(DL->getLine());
+      Vals.push_back(DL->getColumn());
+      Vals.push_back(VE.getMetadataOrNullID(DL->getScope()));
+      Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt()));
+      Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
+      Vals.clear();
+
+      LastDL = DL;
     }
 
   // Emit names for all the instructions etc.
@@ -1766,7 +2147,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
 
   if (NeedsMetadataAttachment)
     WriteMetadataAttachment(F, VE, Stream);
-  if (shouldPreserveBitcodeUseListOrder())
+  if (VE.shouldPreserveUseListOrder())
     WriteUseListBlock(&F, VE, Stream);
   VE.purgeFunction();
   Stream.ExitBlock();
@@ -1827,7 +2208,7 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
-                              Log2_32_Ceil(VE.getTypes().size()+1)));
+                              VE.computeBitsRequiredForTypeIndicies()));
     if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
                                    Abbv) != CONSTANTS_SETTYPE_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
@@ -1847,7 +2228,7 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4));  // cast opc
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,       // typeid
-                              Log2_32_Ceil(VE.getTypes().size()+1)));
+                              VE.computeBitsRequiredForTypeIndicies()));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));    // value id
 
     if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
@@ -1868,6 +2249,8 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,    // dest ty
+                              VE.computeBitsRequiredForTypeIndicies()));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Align
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
@@ -1900,7 +2283,7 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));    // OpVal
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,       // dest ty
-                              Log2_32_Ceil(VE.getTypes().size()+1)));
+                              VE.computeBitsRequiredForTypeIndicies()));
     Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4));  // opc
     if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
                                    Abbv) != FUNCTION_INST_CAST_ABBREV)
@@ -1929,12 +2312,25 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
                                    Abbv) != FUNCTION_INST_UNREACHABLE_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
+  {
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_GEP));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
+                              Log2_32_Ceil(VE.getTypes().size() + 1)));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
+        FUNCTION_INST_GEP_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
 
   Stream.ExitBlock();
 }
 
 /// WriteModule - Emit the specified module to the bitstream.
-static void WriteModule(const Module *M, BitstreamWriter &Stream) {
+static void WriteModule(const Module *M, BitstreamWriter &Stream,
+                        bool ShouldPreserveUseListOrder) {
   Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
 
   SmallVector<unsigned, 1> Vals;
@@ -1943,7 +2339,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
 
   // Analyze the module, enumerating globals, functions, etc.
-  ValueEnumerator VE(*M);
+  ValueEnumerator VE(*M, ShouldPreserveUseListOrder);
 
   // Emit blockinfo, which defines the standard abbreviations etc.
   WriteBlockInfo(VE, Stream);
@@ -1976,7 +2372,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
 
   // Emit module-level use-lists.
-  if (shouldPreserveBitcodeUseListOrder())
+  if (VE.shouldPreserveUseListOrder())
     WriteUseListBlock(nullptr, VE, Stream);
 
   // Emit function bodies.
@@ -2062,7 +2458,8 @@ static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
 
 /// WriteBitcodeToFile - Write the specified module to the specified output
 /// stream.
-void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
+void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
+                              bool ShouldPreserveUseListOrder) {
   SmallVector<char, 0> Buffer;
   Buffer.reserve(256*1024);
 
@@ -2085,7 +2482,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
     Stream.Emit(0xD, 4);
 
     // Emit the module.
-    WriteModule(M, Stream);
+    WriteModule(M, Stream, ShouldPreserveUseListOrder);
   }
 
   if (TT.isOSDarwin())
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 25456a4..3165743 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -19,22 +19,25 @@
 using namespace llvm;
 
 PreservedAnalyses BitcodeWriterPass::run(Module &M) {
-  WriteBitcodeToFile(&M, OS);
+  WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder);
   return PreservedAnalyses::all();
 }
 
 namespace {
   class WriteBitcodePass : public ModulePass {
     raw_ostream &OS; // raw_ostream to print on
+    bool ShouldPreserveUseListOrder;
+
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit WriteBitcodePass(raw_ostream &o)
-      : ModulePass(ID), OS(o) {}
+    explicit WriteBitcodePass(raw_ostream &o, bool ShouldPreserveUseListOrder)
+        : ModulePass(ID), OS(o),
+          ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
 
     const char *getPassName() const override { return "Bitcode Writer"; }
 
     bool runOnModule(Module &M) override {
-      WriteBitcodeToFile(&M, OS);
+      WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder);
       return false;
     }
   };
@@ -42,6 +45,7 @@ namespace {
 
 char WriteBitcodePass::ID = 0;
 
-ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str) {
-  return new WriteBitcodePass(Str);
+ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str,
+                                          bool ShouldPreserveUseListOrder) {
+  return new WriteBitcodePass(Str, ShouldPreserveUseListOrder);
 }
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 27a63d8..6c517f5 100644
--- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@@ -282,9 +283,11 @@ static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
   return V.first->getType()->isIntOrIntVectorTy();
 }
 
-ValueEnumerator::ValueEnumerator(const Module &M)
-    : HasMDString(false), HasMDLocation(false) {
-  if (shouldPreserveBitcodeUseListOrder())
+ValueEnumerator::ValueEnumerator(const Module &M,
+                                 bool ShouldPreserveUseListOrder)
+    : HasMDString(false), HasDILocation(false), HasGenericDINode(false),
+      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
+  if (ShouldPreserveUseListOrder)
     UseListOrders = predictUseListOrder(M);
 
   // Enumerate the global variables.
@@ -345,6 +348,11 @@ ValueEnumerator::ValueEnumerator(const Module &M)
     for (const Argument &A : F.args())
       EnumerateType(A.getType());
 
+    // Enumerate metadata attached to this function.
+    F.getAllMetadata(MDs);
+    for (const auto &I : MDs)
+      EnumerateMetadata(I.second);
+
     for (const BasicBlock &BB : F)
       for (const Instruction &I : BB) {
         for (const Use &Op : I.operands()) {
@@ -372,12 +380,10 @@ ValueEnumerator::ValueEnumerator(const Module &M)
         for (unsigned i = 0, e = MDs.size(); i != e; ++i)
           EnumerateMetadata(MDs[i].second);
 
-        if (!I.getDebugLoc().isUnknown()) {
-          MDNode *Scope, *IA;
-          I.getDebugLoc().getScopeAndInlinedAt(Scope, IA, I.getContext());
-          if (Scope) EnumerateMetadata(Scope);
-          if (IA) EnumerateMetadata(IA);
-        }
+        // Don't enumerate the location directly -- it has a special record
+        // type -- but enumerate its operands.
+        if (DILocation *L = I.getDebugLoc())
+          EnumerateMDNodeOperands(L);
       }
   }
 
@@ -410,12 +416,6 @@ unsigned ValueEnumerator::getValueID(const Value *V) const {
   return I->second-1;
 }
 
-unsigned ValueEnumerator::getMetadataID(const Metadata *MD) const {
-  auto I = MDValueMap.find(MD);
-  assert(I != MDValueMap.end() && "Metadata not in slotcalculator!");
-  return I->second - 1;
-}
-
 void ValueEnumerator::dump() const {
   print(dbgs(), ValueMap, "Default");
   dbgs() << '\n';
@@ -468,7 +468,7 @@ void ValueEnumerator::print(raw_ostream &OS, const MetadataMapType &Map,
 void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
   if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
 
-  if (shouldPreserveBitcodeUseListOrder())
+  if (ShouldPreserveUseListOrder)
     // Optimizing constants makes the use-list order difficult to predict.
     // Disable it for now when trying to preserve the order.
     return;
@@ -548,7 +548,8 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) {
     EnumerateValue(C->getValue());
 
   HasMDString |= isa<MDString>(MD);
-  HasMDLocation |= isa<MDLocation>(MD);
+  HasDILocation |= isa<DILocation>(MD);
+  HasGenericDINode |= isa<GenericDINode>(MD);
 
   // Replace the dummy ID inserted above with the correct one.  MDValueMap may
   // have changed by inserting operands, so we need a fresh lookup here.
@@ -807,3 +808,7 @@ unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
   IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
   return getGlobalBasicBlockID(BB);
 }
+
+uint64_t ValueEnumerator::computeBitsRequiredForTypeIndicies() const {
+  return Log2_32_Ceil(getTypes().size() + 1);
+}
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
index d363c1b..92d166e 100644
--- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h
@@ -65,7 +65,9 @@ private:
   typedef DenseMap<const Metadata *, unsigned> MetadataMapType;
   MetadataMapType MDValueMap;
   bool HasMDString;
-  bool HasMDLocation;
+  bool HasDILocation;
+  bool HasGenericDINode;
+  bool ShouldPreserveUseListOrder;
 
   typedef DenseMap<AttributeSet, unsigned> AttributeGroupMapType;
   AttributeGroupMapType AttributeGroupMap;
@@ -98,10 +100,10 @@ private:
   unsigned FirstFuncConstantID;
   unsigned FirstInstID;
 
-  ValueEnumerator(const ValueEnumerator &) LLVM_DELETED_FUNCTION;
-  void operator=(const ValueEnumerator &) LLVM_DELETED_FUNCTION;
+  ValueEnumerator(const ValueEnumerator &) = delete;
+  void operator=(const ValueEnumerator &) = delete;
 public:
-  ValueEnumerator(const Module &M);
+  ValueEnumerator(const Module &M, bool ShouldPreserveUseListOrder);
 
   void dump() const;
   void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const;
@@ -109,10 +111,20 @@ public:
              const char *Name) const;
 
   unsigned getValueID(const Value *V) const;
-  unsigned getMetadataID(const Metadata *V) const;
+  unsigned getMetadataID(const Metadata *MD) const {
+    auto ID = getMetadataOrNullID(MD);
+    assert(ID != 0 && "Metadata not in slotcalculator!");
+    return ID - 1;
+  }
+  unsigned getMetadataOrNullID(const Metadata *MD) const {
+    return MDValueMap.lookup(MD);
+  }
 
   bool hasMDString() const { return HasMDString; }
-  bool hasMDLocation() const { return HasMDLocation; }
+  bool hasDILocation() const { return HasDILocation; }
+  bool hasGenericDINode() const { return HasGenericDINode; }
+
+  bool shouldPreserveUseListOrder() const { return ShouldPreserveUseListOrder; }
 
   unsigned getTypeID(Type *T) const {
     TypeMapType::const_iterator I = TypeMap.find(T);
@@ -173,6 +185,7 @@ public:
   ///
   void incorporateFunction(const Function &F);
   void purgeFunction();
+  uint64_t computeBitsRequiredForTypeIndicies() const;
 
 private:
   void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 69c3685..58b87e1 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -296,6 +296,16 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
 
+  // FIXME: We must leave subregisters of live super registers as live, so that
+  // we don't clear out the register tracking information for subregisters of
+  // super registers we're still tracking (and with which we're unioning
+  // subregister definitions).
+  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+    if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) {
+      DEBUG(if (!header && footer) dbgs() << footer);
+      return;
+    }
+
   if (!State->IsLive(Reg)) {
     KillIndices[Reg] = KillIdx;
     DefIndices[Reg] = ~0u;
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
index 12cf95b..18c8bb5 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -63,11 +63,11 @@ class RegisterClassInfo;
     /// Map registers to all their references within a live range.
     std::multimap<unsigned, RegisterReference> RegRefs;
 
-    /// The index of the most recent kill (proceding bottom-up),
+    /// The index of the most recent kill (proceeding bottom-up),
     /// or ~0u if the register is not live.
     std::vector<unsigned> KillIndices;
 
-    /// The index of the most recent complete def (proceding bottom
+    /// The index of the most recent complete def (proceeding bottom
     /// up), or ~0u if the register is live.
     std::vector<unsigned> DefIndices;
 
@@ -127,7 +127,7 @@ class RegisterClassInfo;
     AggressiveAntiDepBreaker(MachineFunction& MFi,
                           const RegisterClassInfo &RCI,
                           TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
-    ~AggressiveAntiDepBreaker();
+    ~AggressiveAntiDepBreaker() override;
 
     /// Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB) override;
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index 2e8af9e..3224fac 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -295,8 +295,8 @@ static const Value *getNoopInput(const Value *V,
     } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
       // Value may come from either the aggregate or the scalar
       ArrayRef<unsigned> InsertLoc = IVI->getIndices();
-      if (std::equal(InsertLoc.rbegin(), InsertLoc.rend(),
-                     ValLoc.rbegin())) {
+      if (ValLoc.size() >= InsertLoc.size() &&
+          std::equal(InsertLoc.begin(), InsertLoc.end(), ValLoc.rbegin())) {
         // The type being inserted is a nested sub-type of the aggregate; we
         // have to remove those initial indices to get the location we're
         // interested in for the operand.
@@ -312,8 +312,7 @@ static const Value *getNoopInput(const Value *V,
       // previous aggregate. Combine the two paths to obtain the true address of
       // our element.
       ArrayRef<unsigned> ExtractLoc = EVI->getIndices();
-      std::copy(ExtractLoc.rbegin(), ExtractLoc.rend(),
-                std::back_inserter(ValLoc));
+      ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend());
       NoopInput = Op;
     }
     // Terminate if we couldn't find anything to look through.
@@ -518,8 +517,9 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
         return false;
     }
 
+  const Function *F = ExitBB->getParent();
   return returnTypeIsEligibleForTailCall(
-      ExitBB->getParent(), I, Ret, *TM.getSubtargetImpl()->getTargetLowering());
+      F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
 }
 
 bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
@@ -600,10 +600,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
     // The manipulations performed when we're looking through an insertvalue or
     // an extractvalue would happen at the front of the RetPath list, so since
     // we have to copy it anyway it's more efficient to create a reversed copy.
-    using std::copy;
-    SmallVector<unsigned, 4> TmpRetPath, TmpCallPath;
-    copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath));
-    copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath));
+    SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend());
+    SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend());
 
     // Finally, we can check whether the value produced by the tail call at this
     // index is compatible with the value we return.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index 66c6c63..4cb460a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -36,13 +36,12 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
-ARMException::ARMException(AsmPrinter *A)
-  : EHStreamer(A), shouldEmitCFI(false) {}
+ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
 
 ARMException::~ARMException() {}
 
 ARMTargetStreamer &ARMException::getTargetStreamer() {
-  MCTargetStreamer &TS = *Asm->OutStreamer.getTargetStreamer();
+  MCTargetStreamer &TS = *Asm->OutStreamer->getTargetStreamer();
   return static_cast<ARMTargetStreamer &>(TS);
 }
 
@@ -50,48 +49,35 @@ ARMTargetStreamer &ARMException::getTargetStreamer() {
 /// content.
 void ARMException::endModule() {
   if (shouldEmitCFI)
-    Asm->OutStreamer.EmitCFISections(false, true);
+    Asm->OutStreamer->EmitCFISections(false, true);
 }
 
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
 void ARMException::beginFunction(const MachineFunction *MF) {
   if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
     getTargetStreamer().emitFnStart();
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
-                                                Asm->getFunctionNumber()));
   // See if we need call frame info.
   AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
   assert(MoveType != AsmPrinter::CFI_M_EH &&
          "non-EH CFI not yet supported in prologue with EHABI lowering");
   if (MoveType == AsmPrinter::CFI_M_Debug) {
     shouldEmitCFI = true;
-    Asm->OutStreamer.EmitCFIStartProc(false);
+    Asm->OutStreamer->EmitCFIStartProc(false);
   }
 }
 
 /// endFunction - Gather and emit post-function exception information.
 ///
-void ARMException::endFunction(const MachineFunction *) {
-  if (shouldEmitCFI)
-    Asm->OutStreamer.EmitCFIEndProc();
-
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
-
+void ARMException::endFunction(const MachineFunction *MF) {
   ARMTargetStreamer &ATS = getTargetStreamer();
   if (!Asm->MF->getFunction()->needsUnwindTableEntry() &&
       MMI->getLandingPads().empty())
     ATS.emitCantUnwind();
   else {
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
-                                                  Asm->getFunctionNumber()));
     if (!MMI->getLandingPads().empty()) {
       // Emit references to personality.
-      if (const Function * Personality =
-          MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+      if (const Function *Personality = MMI->getPersonality()) {
         MCSymbol *PerSym = Asm->getSymbol(Personality);
-        Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
+        Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global);
         ATS.emitPersonality(PerSym);
       }
 
@@ -111,13 +97,13 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
   const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
 
-  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+  bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
 
   int Entry = 0;
   // Emit the Catch TypeInfos.
   if (VerboseAsm && !TypeInfos.empty()) {
-    Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
-    Asm->OutStreamer.AddBlankLine();
+    Asm->OutStreamer->AddComment(">> Catch TypeInfos <<");
+    Asm->OutStreamer->AddBlankLine();
     Entry = TypeInfos.size();
   }
 
@@ -125,14 +111,14 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalValue *GV = *I;
     if (VerboseAsm)
-      Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
+      Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
     Asm->EmitTTypeReference(GV, TTypeEncoding);
   }
 
   // Emit the Exception Specifications.
   if (VerboseAsm && !FilterIds.empty()) {
-    Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
-    Asm->OutStreamer.AddBlankLine();
+    Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
+    Asm->OutStreamer->AddBlankLine();
     Entry = 0;
   }
   for (std::vector<unsigned>::const_iterator
@@ -141,7 +127,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
     if (VerboseAsm) {
       --Entry;
       if (TypeID != 0)
-        Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
+        Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry));
     }
 
     Asm->EmitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]),
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 8dab5e5..2487aba 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -24,12 +24,12 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
 }
 
 // Emit addresses into the section given.
-void AddressPool::emit(AsmPrinter &Asm, const MCSection *AddrSection) {
+void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
   if (Pool.empty())
     return;
 
   // Start the dwarf addr section.
-  Asm.OutStreamer.SwitchSection(AddrSection);
+  Asm.OutStreamer->SwitchSection(AddrSection);
 
   // Order the address pool entries by ID
   SmallVector<const MCExpr *, 64> Entries(Pool.size());
@@ -41,5 +41,5 @@ void AddressPool::emit(AsmPrinter &Asm, const MCSection *AddrSection) {
             : MCSymbolRefExpr::Create(I.first, Asm.OutContext);
 
   for (const MCExpr *Entry : Entries)
-    Asm.OutStreamer.EmitValue(Entry, Asm.getDataLayout().getPointerSize());
+    Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize());
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
index 802e050..211fc98 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -40,7 +40,7 @@ public:
   /// label/symbol.
   unsigned getIndex(const MCSymbol *Sym, bool TLS = false);
 
-  void emit(AsmPrinter &Asm, const MCSection *AddrSection);
+  void emit(AsmPrinter &Asm, MCSection *AddrSection);
 
   bool isEmpty() { return Pool.empty(); }
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index bbed808..206be70 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -28,7 +28,7 @@
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Mangler.h"
@@ -41,9 +41,11 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -75,11 +77,11 @@ static gcp_map_type &getGCMap(void *&P) {
 /// getGVAlignmentLog2 - Return the alignment to use for the specified global
 /// value in log2 form.  This rounds up to the preferred alignment if possible
 /// and legal.
-static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
                                    unsigned InBits = 0) {
   unsigned NumBits = 0;
   if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
-    NumBits = TD.getPreferredAlignmentLog(GVar);
+    NumBits = DL.getPreferredAlignmentLog(GVar);
 
   // If InBits is specified, round it to it.
   if (InBits > NumBits)
@@ -98,15 +100,19 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
   return NumBits;
 }
 
-AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
     : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
-      MII(tm.getSubtargetImpl()->getInstrInfo()),
-      OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(nullptr),
-      LastFn(0), Counter(~0U), SetCounter(0) {
-  DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr;
-  CurrentFnSym = CurrentFnSymForSize = nullptr;
+      OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)),
+      LastMI(nullptr), LastFn(0), Counter(~0U) {
+  DD = nullptr;
+  MMI = nullptr;
+  LI = nullptr;
+  MF = nullptr;
+  CurExceptionSym = CurrentFnSym = CurrentFnSymForSize = nullptr;
+  CurrentFnBegin = nullptr;
+  CurrentFnEnd = nullptr;
   GCMetadataPrinters = nullptr;
-  VerboseAsm = Streamer.isVerboseAsm();
+  VerboseAsm = OutStreamer->isVerboseAsm();
 }
 
 AsmPrinter::~AsmPrinter() {
@@ -118,8 +124,6 @@ AsmPrinter::~AsmPrinter() {
     delete &GCMap;
     GCMetadataPrinters = nullptr;
   }
-
-  delete &OutStreamer;
 }
 
 /// getFunctionNumber - Return a unique ID for the current function.
@@ -129,16 +133,17 @@ unsigned AsmPrinter::getFunctionNumber() const {
 }
 
 const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
-  return TM.getSubtargetImpl()->getTargetLowering()->getObjFileLowering();
+  return *TM.getObjFileLowering();
 }
 
 /// getDataLayout - Return information about data layout.
 const DataLayout &AsmPrinter::getDataLayout() const {
-  return *TM.getSubtargetImpl()->getDataLayout();
+  return *TM.getDataLayout();
 }
 
 const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
-  return TM.getSubtarget<MCSubtargetInfo>();
+  assert(MF && "getSubtargetInfo requires a valid MachineFunction!");
+  return MF->getSubtarget<MCSubtargetInfo>();
 }
 
 void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
@@ -151,7 +156,7 @@ StringRef AsmPrinter::getTargetTriple() const {
 
 /// getCurrentSection() - Return the current section we are emitting to.
 const MCSection *AsmPrinter::getCurrentSection() const {
-  return OutStreamer.getCurrentSection().first;
+  return OutStreamer->getCurrentSection().first;
 }
 
 
@@ -173,9 +178,9 @@ bool AsmPrinter::doInitialization(Module &M) {
   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
     .Initialize(OutContext, TM);
 
-  OutStreamer.InitSections(false);
+  OutStreamer->InitSections(false);
 
-  Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout());
+  Mang = new Mangler(TM.getDataLayout());
 
   // Emit the version-min deplyment target directive if needed.
   //
@@ -191,9 +196,9 @@ bool AsmPrinter::doInitialization(Module &M) {
     TT.getOSVersion(Major, Minor, Update);
     // If there is a version specified, Major will be non-zero.
     if (Major)
-      OutStreamer.EmitVersionMin((TT.isMacOSX() ?
-                                  MCVM_OSXVersionMin : MCVM_IOSVersionMin),
-                                 Major, Minor, Update);
+      OutStreamer->EmitVersionMin((TT.isMacOSX() ?
+                                   MCVM_OSXVersionMin : MCVM_IOSVersionMin),
+                                  Major, Minor, Update);
   }
 
   // Allow the target to emit any magic that it wants at the start of the file.
@@ -203,7 +208,7 @@ bool AsmPrinter::doInitialization(Module &M) {
   // don't, this at least helps the user find where a global came from.
   if (MAI->hasSingleParameterDotFile()) {
     // .file "foo.c"
-    OutStreamer.EmitFileDirective(M.getModuleIdentifier());
+    OutStreamer->EmitFileDirective(M.getModuleIdentifier());
   }
 
   GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -214,11 +219,15 @@ bool AsmPrinter::doInitialization(Module &M) {
 
   // Emit module-level inline asm if it exists.
   if (!M.getModuleInlineAsm().empty()) {
-    OutStreamer.AddComment("Start of file scope inline assembly");
-    OutStreamer.AddBlankLine();
-    EmitInlineAsm(M.getModuleInlineAsm()+"\n");
-    OutStreamer.AddComment("End of file scope inline assembly");
-    OutStreamer.AddBlankLine();
+    // We're at the module level. Construct MCSubtarget from the default CPU
+    // and target triple.
+    std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+        TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
+    OutStreamer->AddComment("Start of file scope inline assembly");
+    OutStreamer->AddBlankLine();
+    EmitInlineAsm(M.getModuleInlineAsm()+"\n", *STI, TM.Options.MCOptions);
+    OutStreamer->AddComment("End of file scope inline assembly");
+    OutStreamer->AddBlankLine();
   }
 
   if (MAI->doesSupportDebugInformation()) {
@@ -254,10 +263,11 @@ bool AsmPrinter::doInitialization(Module &M) {
   case ExceptionHandling::ARM:
     ES = new ARMException(this);
     break;
-  case ExceptionHandling::ItaniumWinEH:
-  case ExceptionHandling::MSVC:
+  case ExceptionHandling::WinEH:
     switch (MAI->getWinEHEncodingType()) {
     default: llvm_unreachable("unsupported unwinding information encoding");
+    case WinEH::EncodingType::Invalid:
+      break;
     case WinEH::EncodingType::Itanium:
       ES = new Win64Exception(this);
       break;
@@ -286,20 +296,20 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
   case GlobalValue::WeakODRLinkage:
     if (MAI->hasWeakDefDirective()) {
       // .globl _foo
-      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+      OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
 
       if (!canBeHidden(GV, *MAI))
         // .weak_definition _foo
-        OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+        OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
       else
-        OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
+        OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
     } else if (MAI->hasLinkOnceDirective()) {
       // .globl _foo
-      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+      OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
       //NOTE: linkonce is handled by the section the symbol was assigned to.
     } else {
       // .weak _foo
-      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+      OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Weak);
     }
     return;
   case GlobalValue::AppendingLinkage:
@@ -308,7 +318,7 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
   case GlobalValue::ExternalLinkage:
     // If external or appending, declare as a global symbol.
     // .globl _foo
-    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+    OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
     return;
   case GlobalValue::PrivateLinkage:
   case GlobalValue::InternalLinkage:
@@ -337,10 +347,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     if (EmitSpecialLLVMGlobal(GV))
       return;
 
+    // Skip the emission of global equivalents. The symbol can be emitted later
+    // on by emitGlobalGOTEquivs in case it turns out to be needed.
+    if (GlobalGOTEquivs.count(getSymbol(GV)))
+      return;
+
     if (isVerbose()) {
-      GV->printAsOperand(OutStreamer.GetCommentOS(),
+      GV->printAsOperand(OutStreamer->GetCommentOS(),
                      /*PrintType=*/false, GV->getParent());
-      OutStreamer.GetCommentOS() << '\n';
+      OutStreamer->GetCommentOS() << '\n';
     }
   }
 
@@ -356,11 +371,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
                        "' is already defined");
 
   if (MAI->hasDotTypeDotSizeDirective())
-    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+    OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
 
   SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
 
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
   uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType());
 
   // If the alignment is specified, we *must* obey it.  Overaligning a global
@@ -384,16 +399,16 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
         Align = 0;
 
       // .comm _foo, 42, 4
-      OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+      OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
       return;
     }
 
     // Handle local BSS symbols.
     if (MAI->hasMachoZeroFillDirective()) {
-      const MCSection *TheSection =
-        getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
+      MCSection *TheSection =
+          getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
       // .zerofill __DATA, __bss, _foo, 400, 5
-      OutStreamer.EmitZerofill(TheSection, GVSym, Size, Align);
+      OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align);
       return;
     }
 
@@ -405,7 +420,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     // Prefer to simply fall back to .local / .comm in this case.
     if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
       // .lcomm _foo, 42
-      OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align);
+      OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Align);
       return;
     }
 
@@ -413,14 +428,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
       Align = 0;
 
     // .local _foo
-    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
+    OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Local);
     // .comm _foo, 42, 4
-    OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+    OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
     return;
   }
 
-  const MCSection *TheSection =
-    getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
+  MCSection *TheSection =
+      getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
 
   // Handle the zerofill directive on darwin, which is a special form of BSS
   // emission.
@@ -428,9 +443,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     if (Size == 0) Size = 1;  // zerofill of 0 bytes is undefined.
 
     // .globl _foo
-    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+    OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
     // .zerofill __DATA, __common, _foo, 400, 5
-    OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+    OutStreamer->EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
     return;
   }
 
@@ -447,59 +462,58 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
     // Emit the .tbss symbol
     MCSymbol *MangSym =
-      OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+      OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
 
     if (GVKind.isThreadBSS()) {
       TheSection = getObjFileLowering().getTLSBSSSection();
-      OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
+      OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
     } else if (GVKind.isThreadData()) {
-      OutStreamer.SwitchSection(TheSection);
+      OutStreamer->SwitchSection(TheSection);
 
       EmitAlignment(AlignLog, GV);
-      OutStreamer.EmitLabel(MangSym);
+      OutStreamer->EmitLabel(MangSym);
 
       EmitGlobalConstant(GV->getInitializer());
     }
 
-    OutStreamer.AddBlankLine();
+    OutStreamer->AddBlankLine();
 
     // Emit the variable struct for the runtime.
-    const MCSection *TLVSect
-      = getObjFileLowering().getTLSExtraDataSection();
+    MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection();
 
-    OutStreamer.SwitchSection(TLVSect);
+    OutStreamer->SwitchSection(TLVSect);
     // Emit the linkage here.
     EmitLinkage(GV, GVSym);
-    OutStreamer.EmitLabel(GVSym);
+    OutStreamer->EmitLabel(GVSym);
 
     // Three pointers in size:
     //   - __tlv_bootstrap - used to make sure support exists
     //   - spare pointer, used when mapped by the runtime
     //   - pointer to mangled symbol above with initializer
     unsigned PtrSize = DL->getPointerTypeSize(GV->getType());
-    OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
+    OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
                                 PtrSize);
-    OutStreamer.EmitIntValue(0, PtrSize);
-    OutStreamer.EmitSymbolValue(MangSym, PtrSize);
+    OutStreamer->EmitIntValue(0, PtrSize);
+    OutStreamer->EmitSymbolValue(MangSym, PtrSize);
 
-    OutStreamer.AddBlankLine();
+    OutStreamer->AddBlankLine();
     return;
   }
 
-  OutStreamer.SwitchSection(TheSection);
+  OutStreamer->SwitchSection(TheSection);
 
   EmitLinkage(GV, GVSym);
   EmitAlignment(AlignLog, GV);
 
-  OutStreamer.EmitLabel(GVSym);
+  OutStreamer->EmitLabel(GVSym);
 
   EmitGlobalConstant(GV->getInitializer());
 
   if (MAI->hasDotTypeDotSizeDirective())
     // .size foo, 42
-    OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
+    OutStreamer->EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
 
-  OutStreamer.AddBlankLine();
+  OutStreamer->AddBlankLine();
 }
 
 /// EmitFunctionHeader - This method emits the header for the current
@@ -511,20 +525,21 @@ void AsmPrinter::EmitFunctionHeader() {
   // Print the 'header' of function.
   const Function *F = MF->getFunction();
 
-  OutStreamer.SwitchSection(
+  OutStreamer->SwitchSection(
       getObjFileLowering().SectionForGlobal(F, *Mang, TM));
   EmitVisibility(CurrentFnSym, F->getVisibility());
 
   EmitLinkage(F, CurrentFnSym);
-  EmitAlignment(MF->getAlignment(), F);
+  if (MAI->hasFunctionAlignment())
+    EmitAlignment(MF->getAlignment(), F);
 
   if (MAI->hasDotTypeDotSizeDirective())
-    OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+    OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
 
   if (isVerbose()) {
-    F->printAsOperand(OutStreamer.GetCommentOS(),
+    F->printAsOperand(OutStreamer->GetCommentOS(),
                    /*PrintType=*/false, F->getParent());
-    OutStreamer.GetCommentOS() << '\n';
+    OutStreamer->GetCommentOS() << '\n';
   }
 
   // Emit the prefix data.
@@ -541,8 +556,19 @@ void AsmPrinter::EmitFunctionHeader() {
   std::vector<MCSymbol*> DeadBlockSyms;
   MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
   for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
-    OutStreamer.AddComment("Address taken block that was later removed");
-    OutStreamer.EmitLabel(DeadBlockSyms[i]);
+    OutStreamer->AddComment("Address taken block that was later removed");
+    OutStreamer->EmitLabel(DeadBlockSyms[i]);
+  }
+
+  if (CurrentFnBegin) {
+    if (MAI->useAssignmentForEHBegin()) {
+      MCSymbol *CurPos = OutContext.createTempSymbol();
+      OutStreamer->EmitLabel(CurPos);
+      OutStreamer->EmitAssignment(CurrentFnBegin,
+                                 MCSymbolRefExpr::Create(CurPos, OutContext));
+    } else {
+      OutStreamer->EmitLabel(CurrentFnBegin);
+    }
   }
 
   // Emit pre-function debug and/or EH information.
@@ -570,13 +596,13 @@ void AsmPrinter::EmitFunctionEntryLabel() {
     report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
                        "' label emitted multiple times to assembly file");
 
-  return OutStreamer.EmitLabel(CurrentFnSym);
+  return OutStreamer->EmitLabel(CurrentFnSym);
 }
 
 /// emitComments - Pretty-print comments for instructions.
 static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
   const MachineFunction *MF = MI.getParent()->getParent();
-  const TargetMachine &TM = MF->getTarget();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
 
   // Check for spills and reloads
   int FI;
@@ -586,24 +612,20 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
   // We assume a single instruction only has a spill or reload, not
   // both.
   const MachineMemOperand *MMO;
-  if (TM.getSubtargetImpl()->getInstrInfo()->isLoadFromStackSlotPostFE(&MI,
-                                                                       FI)) {
+  if (TII->isLoadFromStackSlotPostFE(&MI, FI)) {
     if (FrameInfo->isSpillSlotObjectIndex(FI)) {
       MMO = *MI.memoperands_begin();
       CommentOS << MMO->getSize() << "-byte Reload\n";
     }
-  } else if (TM.getSubtargetImpl()->getInstrInfo()->hasLoadFromStackSlot(
-                 &MI, MMO, FI)) {
+  } else if (TII->hasLoadFromStackSlot(&MI, MMO, FI)) {
     if (FrameInfo->isSpillSlotObjectIndex(FI))
       CommentOS << MMO->getSize() << "-byte Folded Reload\n";
-  } else if (TM.getSubtargetImpl()->getInstrInfo()->isStoreToStackSlotPostFE(
-                 &MI, FI)) {
+  } else if (TII->isStoreToStackSlotPostFE(&MI, FI)) {
     if (FrameInfo->isSpillSlotObjectIndex(FI)) {
       MMO = *MI.memoperands_begin();
       CommentOS << MMO->getSize() << "-byte Spill\n";
     }
-  } else if (TM.getSubtargetImpl()->getInstrInfo()->hasStoreToStackSlot(
-                 &MI, MMO, FI)) {
+  } else if (TII->hasStoreToStackSlot(&MI, MMO, FI)) {
     if (FrameInfo->isSpillSlotObjectIndex(FI))
       CommentOS << MMO->getSize() << "-byte Folded Spill\n";
   }
@@ -617,10 +639,9 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
 /// that is an implicit def.
 void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
   unsigned RegNo = MI->getOperand(0).getReg();
-  OutStreamer.AddComment(
-      Twine("implicit-def: ") +
-      TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo));
-  OutStreamer.AddBlankLine();
+  OutStreamer->AddComment(Twine("implicit-def: ") +
+                          MMI->getContext().getRegisterInfo()->getName(RegNo));
+  OutStreamer->AddBlankLine();
 }
 
 static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
@@ -629,11 +650,11 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
     const MachineOperand &Op = MI->getOperand(i);
     assert(Op.isReg() && "KILL instruction must have only register operands");
     Str += ' ';
-    Str += AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Op.getReg());
+    Str += AP.MMI->getContext().getRegisterInfo()->getName(Op.getReg());
     Str += (Op.isDef() ? "<def>" : "<kill>");
   }
-  AP.OutStreamer.AddComment(Str);
-  AP.OutStreamer.AddBlankLine();
+  AP.OutStreamer->AddComment(Str);
+  AP.OutStreamer->AddBlankLine();
 }
 
 /// emitDebugValueComment - This method handles the target-independent form
@@ -648,18 +669,18 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   raw_svector_ostream OS(Str);
   OS << "DEBUG_VALUE: ";
 
-  DIVariable V = MI->getDebugVariable();
-  if (V.getContext().isSubprogram()) {
-    StringRef Name = DISubprogram(V.getContext()).getDisplayName();
+  const DILocalVariable *V = MI->getDebugVariable();
+  if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) {
+    StringRef Name = SP->getDisplayName();
     if (!Name.empty())
       OS << Name << ":";
   }
-  OS << V.getName();
+  OS << V->getName();
 
-  DIExpression Expr = MI->getDebugExpression();
-  if (Expr.isVariablePiece())
-    OS << " [piece offset=" << Expr.getPieceOffset()
-       << " size=" << Expr.getPieceSize() << "]";
+  const DIExpression *Expr = MI->getDebugExpression();
+  if (Expr->isBitPiece())
+    OS << " [bit_piece offset=" << Expr->getBitPieceOffset()
+       << " size=" << Expr->getBitPieceSize() << "]";
   OS << " <- ";
 
   // The second operand is only an offset if it's an immediate.
@@ -691,8 +712,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
       Reg = MI->getOperand(0).getReg();
     } else {
       assert(MI->getOperand(0).isFI() && "Unknown operand type");
-      const TargetFrameLowering *TFI =
-          AP.TM.getSubtargetImpl()->getFrameLowering();
+      const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering();
       Offset += TFI->getFrameIndexReference(*AP.MF,
                                             MI->getOperand(0).getIndex(), Reg);
       Deref = true;
@@ -701,19 +721,19 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
       // Suppress offset, it is not meaningful here.
       OS << "undef";
       // NOTE: Want this comment at start of line, don't emit with AddComment.
-      AP.OutStreamer.emitRawComment(OS.str());
+      AP.OutStreamer->emitRawComment(OS.str());
       return true;
     }
     if (Deref)
       OS << '[';
-    OS << AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Reg);
+    OS << AP.MMI->getContext().getRegisterInfo()->getName(Reg);
   }
 
   if (Deref)
     OS << '+' << Offset << ']';
 
   // NOTE: Want this comment at start of line, don't emit with AddComment.
-  AP.OutStreamer.emitRawComment(OS.str());
+  AP.OutStreamer->emitRawComment(OS.str());
   return true;
 }
 
@@ -754,13 +774,15 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
   int FrameOffset = MI.getOperand(1).getImm();
 
   // Emit a symbol assignment.
-  OutStreamer.EmitAssignment(FrameAllocSym,
+  OutStreamer->EmitAssignment(FrameAllocSym,
                              MCConstantExpr::Create(FrameOffset, OutContext));
 }
 
 /// EmitFunctionBody - This method emits the body and trailer for a
 /// function.
 void AsmPrinter::EmitFunctionBody() {
+  EmitFunctionHeader();
+
   // Emit target-specific gunk before the function body.
   EmitFunctionBodyStart();
 
@@ -789,7 +811,7 @@ void AsmPrinter::EmitFunctionBody() {
       }
 
       if (isVerbose())
-        emitComments(MI, OutStreamer.GetCommentOS());
+        emitComments(MI, OutStreamer->GetCommentOS());
 
       switch (MI.getOpcode()) {
       case TargetOpcode::CFI_INSTRUCTION:
@@ -802,7 +824,7 @@ void AsmPrinter::EmitFunctionBody() {
 
       case TargetOpcode::EH_LABEL:
       case TargetOpcode::GC_LABEL:
-        OutStreamer.EmitLabel(MI.getOperand(0).getMCSymbol());
+        OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol());
         break;
       case TargetOpcode::INLINEASM:
         EmitInlineAsm(&MI);
@@ -841,13 +863,13 @@ void AsmPrinter::EmitFunctionBody() {
   // labels from collapsing together.  Just emit a noop.
   if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) {
     MCInst Noop;
-    TM.getSubtargetImpl()->getInstrInfo()->getNoopForMachoTarget(Noop);
-    OutStreamer.AddComment("avoids zero-length function");
+    MF->getSubtarget().getInstrInfo()->getNoopForMachoTarget(Noop);
+    OutStreamer->AddComment("avoids zero-length function");
 
     // Targets can opt-out of emitting the noop here by leaving the opcode
     // unspecified.
     if (Noop.getOpcode())
-      OutStreamer.EmitInstruction(Noop, getSubtargetInfo());
+      OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
   }
 
   const Function *F = MF->getFunction();
@@ -857,29 +879,41 @@ void AsmPrinter::EmitFunctionBody() {
     MCSymbol *Sym = GetBlockAddressSymbol(&BB);
     if (Sym->isDefined())
       continue;
-    OutStreamer.AddComment("Address of block that was removed by CodeGen");
-    OutStreamer.EmitLabel(Sym);
+    OutStreamer->AddComment("Address of block that was removed by CodeGen");
+    OutStreamer->EmitLabel(Sym);
   }
 
   // Emit target-specific gunk after the function body.
   EmitFunctionBodyEnd();
 
+  if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
+      MAI->hasDotTypeDotSizeDirective()) {
+    // Create a symbol for the end of function.
+    CurrentFnEnd = createTempSymbol("func_end");
+    OutStreamer->EmitLabel(CurrentFnEnd);
+  }
+
   // If the target wants a .size directive for the size of the function, emit
   // it.
   if (MAI->hasDotTypeDotSizeDirective()) {
-    // Create a symbol for the end of function, so we can get the size as
-    // difference between the function label and the temp label.
-    MCSymbol *FnEndLabel = OutContext.CreateTempSymbol();
-    OutStreamer.EmitLabel(FnEndLabel);
-
+    // We can get the size as difference between the function label and the
+    // temp label.
     const MCExpr *SizeExp =
-      MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
+      MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(CurrentFnEnd, OutContext),
                               MCSymbolRefExpr::Create(CurrentFnSymForSize,
                                                       OutContext),
                               OutContext);
-    OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
+    OutStreamer->EmitELFSize(CurrentFnSym, SizeExp);
   }
 
+  for (const HandlerInfo &HI : Handlers) {
+    NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+    HI.Handler->markFunctionEnd();
+  }
+
+  // Print out jump tables referenced by the function.
+  EmitJumpTableInfo();
+
   // Emit post-function debug and/or EH information.
   for (const HandlerInfo &HI : Handlers) {
     NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
@@ -887,17 +921,106 @@ void AsmPrinter::EmitFunctionBody() {
   }
   MMI->EndFunction();
 
-  // Print out jump tables referenced by the function.
-  EmitJumpTableInfo();
+  OutStreamer->AddBlankLine();
+}
+
+/// \brief Compute the number of Global Variables that uses a Constant.
+static unsigned getNumGlobalVariableUses(const Constant *C) {
+  if (!C)
+    return 0;
+
+  if (isa<GlobalVariable>(C))
+    return 1;
+
+  unsigned NumUses = 0;
+  for (auto *CU : C->users())
+    NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU));
 
-  OutStreamer.AddBlankLine();
+  return NumUses;
+}
+
+/// \brief Only consider global GOT equivalents if at least one user is a
+/// cstexpr inside an initializer of another global variables. Also, don't
+/// handle cstexpr inside instructions. During global variable emission,
+/// candidates are skipped and are emitted later in case at least one cstexpr
+/// isn't replaced by a PC relative GOT entry access.
+static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
+                                     unsigned &NumGOTEquivUsers) {
+  // Global GOT equivalents are unnamed private globals with a constant
+  // pointer initializer to another global symbol. They must point to a
+  // GlobalVariable or Function, i.e., as GlobalValue.
+  if (!GV->hasUnnamedAddr() || !GV->hasInitializer() || !GV->isConstant() ||
+      !GV->isDiscardableIfUnused() || !dyn_cast<GlobalValue>(GV->getOperand(0)))
+    return false;
+
+  // To be a got equivalent, at least one of its users need to be a constant
+  // expression used by another global variable.
+  for (auto *U : GV->users())
+    NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U));
+
+  return NumGOTEquivUsers > 0;
+}
+
+/// \brief Unnamed constant global variables solely contaning a pointer to
+/// another globals variable is equivalent to a GOT table entry; it contains the
+/// the address of another symbol. Optimize it and replace accesses to these
+/// "GOT equivalents" by using the GOT entry for the final global instead.
+/// Compute GOT equivalent candidates among all global variables to avoid
+/// emitting them if possible later on, after it use is replaced by a GOT entry
+/// access.
+void AsmPrinter::computeGlobalGOTEquivs(Module &M) {
+  if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
+    return;
+
+  for (const auto &G : M.globals()) {
+    unsigned NumGOTEquivUsers = 0;
+    if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers))
+      continue;
+
+    const MCSymbol *GOTEquivSym = getSymbol(&G);
+    GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers);
+  }
+}
+
+/// \brief Constant expressions using GOT equivalent globals may not be eligible
+/// for PC relative GOT entry conversion, in such cases we need to emit such
+/// globals we previously omitted in EmitGlobalVariable.
+void AsmPrinter::emitGlobalGOTEquivs() {
+  if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
+    return;
+
+  SmallVector<const GlobalVariable *, 8> FailedCandidates;
+  for (auto &I : GlobalGOTEquivs) {
+    const GlobalVariable *GV = I.second.first;
+    unsigned Cnt = I.second.second;
+    if (Cnt)
+      FailedCandidates.push_back(GV);
+  }
+  GlobalGOTEquivs.clear();
+
+  for (auto *GV : FailedCandidates)
+    EmitGlobalVariable(GV);
 }
 
 bool AsmPrinter::doFinalization(Module &M) {
+  // Set the MachineFunction to nullptr so that we can catch attempted
+  // accesses to MF specific features at the module level and so that
+  // we can conditionalize accesses based on whether or not it is nullptr.
+  MF = nullptr;
+
+  // Gather all GOT equivalent globals in the module. We really need two
+  // passes over the globals: one to compute and another to avoid its emission
+  // in EmitGlobalVariable, otherwise we would not be able to handle cases
+  // where the got equivalent shows up before its use.
+  computeGlobalGOTEquivs(M);
+
   // Emit global variables.
   for (const auto &G : M.globals())
     EmitGlobalVariable(&G);
 
+  // Emit remaining GOT equivalent globals.
+  emitGlobalGOTEquivs();
+
   // Emit visibility info for declarations
   for (const Function &F : M) {
     if (!F.isDeclaration())
@@ -910,63 +1033,34 @@ bool AsmPrinter::doFinalization(Module &M) {
     EmitVisibility(Name, V, false);
   }
 
-  // Get information about jump-instruction tables to print.
-  JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>();
-
-  if (JITI && !JITI->getTables().empty()) {
-    unsigned Arch = Triple(getTargetTriple()).getArch();
-    bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb);
-    MCInst TrapInst;
-    TM.getSubtargetImpl()->getInstrInfo()->getTrap(TrapInst);
-    unsigned LogAlignment = llvm::Log2_64(JITI->entryByteAlignment());
-
-    // Emit the right section for these functions.
-    OutStreamer.SwitchSection(OutContext.getObjectFileInfo()->getTextSection());
-    for (const auto &KV : JITI->getTables()) {
-      uint64_t Count = 0;
-      for (const auto &FunPair : KV.second) {
-        // Emit the function labels to make this be a function entry point.
-        MCSymbol *FunSym =
-          OutContext.GetOrCreateSymbol(FunPair.second->getName());
-        EmitAlignment(LogAlignment);
-        if (IsThumb)
-          OutStreamer.EmitThumbFunc(FunSym);
-        if (MAI->hasDotTypeDotSizeDirective())
-          OutStreamer.EmitSymbolAttribute(FunSym, MCSA_ELF_TypeFunction);
-        OutStreamer.EmitLabel(FunSym);
-
-        // Emit the jump instruction to transfer control to the original
-        // function.
-        MCInst JumpToFun;
-        MCSymbol *TargetSymbol =
-          OutContext.GetOrCreateSymbol(FunPair.first->getName());
-        const MCSymbolRefExpr *TargetSymRef =
-          MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT,
-                                  OutContext);
-        TM.getSubtargetImpl()->getInstrInfo()->getUnconditionalBranch(
-            JumpToFun, TargetSymRef);
-        OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo());
-        ++Count;
-      }
-
-      // Emit enough padding instructions to fill up to the next power of two.
-      uint64_t Remaining = NextPowerOf2(Count) - Count;
-      for (uint64_t C = 0; C < Remaining; ++C) {
-        EmitAlignment(LogAlignment);
-        OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo());
-      }
-
-    }
-  }
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
 
   // Emit module flags.
   SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
   M.getModuleFlagsMetadata(ModuleFlags);
   if (!ModuleFlags.empty())
-    getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, *Mang, TM);
+    TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, *Mang, TM);
+
+  Triple TT(TM.getTargetTriple());
+  if (TT.isOSBinFormatELF()) {
+    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer->SwitchSection(TLOF.getDataRelSection());
+      const DataLayout *DL = TM.getDataLayout();
+
+      for (const auto &Stub : Stubs) {
+        OutStreamer->EmitLabel(Stub.first);
+        OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
+                                     DL->getPointerSize());
+      }
+    }
+  }
 
   // Make sure we wrote out everything we need.
-  OutStreamer.Flush();
+  OutStreamer->Flush();
 
   // Finalize debug and EH information.
   for (const HandlerInfo &HI : Handlers) {
@@ -989,31 +1083,31 @@ bool AsmPrinter::doFinalization(Module &M) {
     for (const auto &G : M.globals()) {
       if (!G.hasExternalWeakLinkage())
         continue;
-      OutStreamer.EmitSymbolAttribute(getSymbol(&G), MCSA_WeakReference);
+      OutStreamer->EmitSymbolAttribute(getSymbol(&G), MCSA_WeakReference);
     }
 
     for (const auto &F : M) {
       if (!F.hasExternalWeakLinkage())
         continue;
-      OutStreamer.EmitSymbolAttribute(getSymbol(&F), MCSA_WeakReference);
+      OutStreamer->EmitSymbolAttribute(getSymbol(&F), MCSA_WeakReference);
     }
   }
 
-  OutStreamer.AddBlankLine();
+  OutStreamer->AddBlankLine();
   for (const auto &Alias : M.aliases()) {
     MCSymbol *Name = getSymbol(&Alias);
 
     if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective())
-      OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
+      OutStreamer->EmitSymbolAttribute(Name, MCSA_Global);
     else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage())
-      OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
+      OutStreamer->EmitSymbolAttribute(Name, MCSA_WeakReference);
     else
       assert(Alias.hasLocalLinkage() && "Invalid alias linkage");
 
     EmitVisibility(Name, Alias.getVisibility());
 
     // Emit the directives as assignments aka .set:
-    OutStreamer.EmitAssignment(Name, lowerConstant(Alias.getAliasee()));
+    OutStreamer->EmitAssignment(Name, lowerConstant(Alias.getAliasee()));
   }
 
   GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -1027,27 +1121,26 @@ bool AsmPrinter::doFinalization(Module &M) {
 
   // Emit __morestack address if needed for indirect calls.
   if (MMI->usesMorestackAddr()) {
-    const MCSection *ReadOnlySection =
+    MCSection *ReadOnlySection =
         getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(),
                                                    /*C=*/nullptr);
-    OutStreamer.SwitchSection(ReadOnlySection);
+    OutStreamer->SwitchSection(ReadOnlySection);
 
     MCSymbol *AddrSymbol =
-        OutContext.GetOrCreateSymbol(StringRef("__morestack_addr"));
-    OutStreamer.EmitLabel(AddrSymbol);
+        OutContext.getOrCreateSymbol(StringRef("__morestack_addr"));
+    OutStreamer->EmitLabel(AddrSymbol);
 
-    const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout();
-    unsigned PtrSize = DL.getPointerSize(0);
-    OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__morestack"),
-                                PtrSize);
+    unsigned PtrSize = TM.getDataLayout()->getPointerSize(0);
+    OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"),
+                                 PtrSize);
   }
 
   // If we don't have any trampolines, then we don't require stack memory
   // to be executable. Some targets have a directive to declare this.
   Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
   if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
-    if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext))
-      OutStreamer.SwitchSection(S);
+    if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+      OutStreamer->SwitchSection(S);
 
   // Allow the target to emit any magic that it wants at the end of the file,
   // after everything else has gone out.
@@ -1056,29 +1149,44 @@ bool AsmPrinter::doFinalization(Module &M) {
   delete Mang; Mang = nullptr;
   MMI = nullptr;
 
-  OutStreamer.Finish();
-  OutStreamer.reset();
+  OutStreamer->Finish();
+  OutStreamer->reset();
 
   return false;
 }
 
+MCSymbol *AsmPrinter::getCurExceptionSym() {
+  if (!CurExceptionSym)
+    CurExceptionSym = createTempSymbol("exception");
+  return CurExceptionSym;
+}
+
 void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   this->MF = &MF;
   // Get the function symbol.
   CurrentFnSym = getSymbol(MF.getFunction());
   CurrentFnSymForSize = CurrentFnSym;
+  CurrentFnBegin = nullptr;
+  CurExceptionSym = nullptr;
+  bool NeedsLocalForSize = MAI->needsLocalForSize();
+  if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
+      NeedsLocalForSize) {
+    CurrentFnBegin = createTempSymbol("func_begin");
+    if (NeedsLocalForSize)
+      CurrentFnSymForSize = CurrentFnBegin;
+  }
 
   if (isVerbose())
     LI = &getAnalysis<MachineLoopInfo>();
 }
 
 namespace {
-  // SectionCPs - Keep track the alignment, constpool entries per Section.
+// Keep track the alignment, constpool entries per Section.
   struct SectionCPs {
-    const MCSection *S;
+    MCSection *S;
     unsigned Alignment;
     SmallVector<unsigned, 4> CPEs;
-    SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {}
+    SectionCPs(MCSection *s, unsigned a) : S(s), Alignment(a) {}
   };
 }
 
@@ -1100,13 +1208,13 @@ void AsmPrinter::EmitConstantPool() {
     unsigned Align = CPE.getAlignment();
 
     SectionKind Kind =
-        CPE.getSectionKind(TM.getSubtargetImpl()->getDataLayout());
+        CPE.getSectionKind(TM.getDataLayout());
 
     const Constant *C = nullptr;
     if (!CPE.isMachineConstantPoolEntry())
       C = CPE.Val.ConstVal;
 
-    const MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C);
+    MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C);
 
     // The number of sections are small, just do a linear search from the
     // last section to the first.
@@ -1139,7 +1247,7 @@ void AsmPrinter::EmitConstantPool() {
         continue;
 
       if (CurSection != CPSections[i].S) {
-        OutStreamer.SwitchSection(CPSections[i].S);
+        OutStreamer->SwitchSection(CPSections[i].S);
         EmitAlignment(Log2_32(CPSections[i].Alignment));
         CurSection = CPSections[i].S;
         Offset = 0;
@@ -1150,13 +1258,13 @@ void AsmPrinter::EmitConstantPool() {
       // Emit inter-object padding for alignment.
       unsigned AlignMask = CPE.getAlignment() - 1;
       unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
-      OutStreamer.EmitZeros(NewOffset - Offset);
+      OutStreamer->EmitZeros(NewOffset - Offset);
 
       Type *Ty = CPE.getType();
       Offset = NewOffset +
-               TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty);
+               TM.getDataLayout()->getTypeAllocSize(Ty);
 
-      OutStreamer.EmitLabel(Sym);
+      OutStreamer->EmitLabel(Sym);
       if (CPE.isMachineConstantPoolEntry())
         EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
       else
@@ -1169,7 +1277,7 @@ void AsmPrinter::EmitConstantPool() {
 /// by the current function to the current output stream.
 ///
 void AsmPrinter::EmitJumpTableInfo() {
-  const DataLayout *DL = MF->getSubtarget().getDataLayout();
+  const DataLayout *DL = MF->getTarget().getDataLayout();
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   if (!MJTI) return;
   if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
@@ -1179,34 +1287,23 @@ void AsmPrinter::EmitJumpTableInfo() {
   // Pick the directive to use to print the jump table entries, and switch to
   // the appropriate section.
   const Function *F = MF->getFunction();
-  bool JTInDiffSection = false;
-  if (// In PIC mode, we need to emit the jump table to the same section as the
-      // function body itself, otherwise the label differences won't make sense.
-      // FIXME: Need a better predicate for this: what about custom entries?
-      MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
-      // We should also do if the section name is NULL or function is declared
-      // in discardable section
-      // FIXME: this isn't the right predicate, should be based on the MCSection
-      // for the function.
-      F->isWeakForLinker()) {
-    OutStreamer.SwitchSection(
-        getObjFileLowering().SectionForGlobal(F, *Mang, TM));
-  } else {
-    // Otherwise, drop it in the readonly section.
-    const MCSection *ReadOnlySection =
-        getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(),
-                                                   /*C=*/nullptr);
-    OutStreamer.SwitchSection(ReadOnlySection);
-    JTInDiffSection = true;
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+  bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection(
+      MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
+      *F);
+  if (JTInDiffSection) {
+    // Drop it in the readonly section.
+    MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, *Mang, TM);
+    OutStreamer->SwitchSection(ReadOnlySection);
   }
 
   EmitAlignment(Log2_32(
-      MJTI->getEntryAlignment(*TM.getSubtargetImpl()->getDataLayout())));
+      MJTI->getEntryAlignment(*TM.getDataLayout())));
 
   // Jump tables in code sections are marked with a data_region directive
   // where that's supported.
   if (!JTInDiffSection)
-    OutStreamer.EmitDataRegion(MCDR_DataRegionJT32);
+    OutStreamer->EmitDataRegion(MCDR_DataRegionJT32);
 
   for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
     const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
@@ -1219,7 +1316,7 @@ void AsmPrinter::EmitJumpTableInfo() {
     if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
         MAI->doesSetDirectiveSuppressesReloc()) {
       SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
-      const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+      const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
       const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
       for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
         const MachineBasicBlock *MBB = JTBBs[ii];
@@ -1229,8 +1326,9 @@ void AsmPrinter::EmitJumpTableInfo() {
         // .set LJTSet, LBB32-base
         const MCExpr *LHS =
           MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
-        OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
-                                MCBinaryExpr::CreateSub(LHS, Base, OutContext));
+        OutStreamer->EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
+                                    MCBinaryExpr::CreateSub(LHS, Base,
+                                                            OutContext));
       }
     }
 
@@ -1241,15 +1339,15 @@ void AsmPrinter::EmitJumpTableInfo() {
     if (JTInDiffSection && DL->hasLinkerPrivateGlobalPrefix())
       // FIXME: This doesn't have to have any specific name, just any randomly
       // named and numbered 'l' label would work.  Simplify GetJTISymbol.
-      OutStreamer.EmitLabel(GetJTISymbol(JTI, true));
+      OutStreamer->EmitLabel(GetJTISymbol(JTI, true));
 
-    OutStreamer.EmitLabel(GetJTISymbol(JTI));
+    OutStreamer->EmitLabel(GetJTISymbol(JTI));
 
     for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
       EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
   }
   if (!JTInDiffSection)
-    OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+    OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
 }
 
 /// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
@@ -1263,9 +1361,8 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
   case MachineJumpTableInfo::EK_Inline:
     llvm_unreachable("Cannot emit EK_Inline jump table entry");
   case MachineJumpTableInfo::EK_Custom32:
-    Value =
-        TM.getSubtargetImpl()->getTargetLowering()->LowerCustomJumpTableEntry(
-            MJTI, MBB, UID, OutContext);
+    Value = MF->getSubtarget().getTargetLowering()->LowerCustomJumpTableEntry(
+        MJTI, MBB, UID, OutContext);
     break;
   case MachineJumpTableInfo::EK_BlockAddress:
     // EK_BlockAddress - Each entry is a plain address of block, e.g.:
@@ -1277,7 +1374,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
     // with a relocation as gp-relative, e.g.:
     //     .gprel32 LBB123
     MCSymbol *MBBSym = MBB->getSymbol();
-    OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+    OutStreamer->EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
     return;
   }
 
@@ -1286,7 +1383,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
     // with a relocation as gp-relative, e.g.:
     //     .gpdword LBB123
     MCSymbol *MBBSym = MBB->getSymbol();
-    OutStreamer.EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+    OutStreamer->EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
     return;
   }
 
@@ -1304,7 +1401,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
       break;
     }
     Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
-    const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+    const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
     const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext);
     Value = MCBinaryExpr::CreateSub(Value, Base, OutContext);
     break;
@@ -1314,8 +1411,8 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
   assert(Value && "Unknown entry kind!");
 
   unsigned EntrySize =
-      MJTI->getEntrySize(*TM.getSubtargetImpl()->getDataLayout());
-  OutStreamer.EmitValue(Value, EntrySize);
+      MJTI->getEntrySize(*TM.getDataLayout());
+  OutStreamer->EmitValue(Value, EntrySize);
 }
 
 
@@ -1344,8 +1441,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
     if (TM.getRelocationModel() == Reloc::Static &&
         MAI->hasStaticCtorDtorReferenceInStaticMode()) {
       StringRef Sym(".constructors_used");
-      OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
-                                      MCSA_Reference);
+      OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym),
+                                       MCSA_Reference);
     }
     return true;
   }
@@ -1356,8 +1453,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
     if (TM.getRelocationModel() == Reloc::Static &&
         MAI->hasStaticCtorDtorReferenceInStaticMode()) {
       StringRef Sym(".destructors_used");
-      OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
-                                      MCSA_Reference);
+      OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym),
+                                       MCSA_Reference);
     }
     return true;
   }
@@ -1374,7 +1471,7 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
     const GlobalValue *GV =
       dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
     if (GV)
-      OutStreamer.EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip);
+      OutStreamer->EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip);
   }
 }
 
@@ -1424,7 +1521,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
   }
 
   // Emit the function pointers in the target-specific order
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
   unsigned Align = Log2_32(DL->getPointerPrefAlignment());
   std::stable_sort(Structors.begin(), Structors.end(),
                    [](const Structor &L,
@@ -1440,11 +1537,11 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
 
       KeySym = getSymbol(GV);
     }
-    const MCSection *OutputSection =
+    MCSection *OutputSection =
         (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym)
                 : Obj.getStaticDtorSection(S.Priority, KeySym));
-    OutStreamer.SwitchSection(OutputSection);
-    if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection())
+    OutStreamer->SwitchSection(OutputSection);
+    if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection())
       EmitAlignment(Align);
     EmitXXStructor(S.Func);
   }
@@ -1460,7 +1557,7 @@ void AsmPrinter::EmitModuleIdents(Module &M) {
       assert(N->getNumOperands() == 1 &&
              "llvm.ident metadata entry can have only one operand");
       const MDString *S = cast<MDString>(N->getOperand(0));
-      OutStreamer.EmitIdent(S->getString());
+      OutStreamer->EmitIdent(S->getString());
     }
   }
 }
@@ -1472,19 +1569,19 @@ void AsmPrinter::EmitModuleIdents(Module &M) {
 /// EmitInt8 - Emit a byte directive and value.
 ///
 void AsmPrinter::EmitInt8(int Value) const {
-  OutStreamer.EmitIntValue(Value, 1);
+  OutStreamer->EmitIntValue(Value, 1);
 }
 
 /// EmitInt16 - Emit a short directive and value.
 ///
 void AsmPrinter::EmitInt16(int Value) const {
-  OutStreamer.EmitIntValue(Value, 2);
+  OutStreamer->EmitIntValue(Value, 2);
 }
 
 /// EmitInt32 - Emit a long directive and value.
 ///
 void AsmPrinter::EmitInt32(int Value) const {
-  OutStreamer.EmitIntValue(Value, 4);
+  OutStreamer->EmitIntValue(Value, 4);
 }
 
 /// Emit something like ".long Hi-Lo" where the size in bytes of the directive
@@ -1492,6 +1589,10 @@ void AsmPrinter::EmitInt32(int Value) const {
 /// .set if it avoids relocations.
 void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
                                      unsigned Size) const {
+  if (!MAI->doesDwarfUseRelocationsAcrossSections())
+    if (OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size))
+      return;
+
   // Get the Hi-Lo expression.
   const MCExpr *Diff =
     MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext),
@@ -1499,14 +1600,14 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
                             OutContext);
 
   if (!MAI->doesSetDirectiveSuppressesReloc()) {
-    OutStreamer.EmitValue(Diff, Size);
+    OutStreamer->EmitValue(Diff, Size);
     return;
   }
 
   // Otherwise, emit with .set (aka assignment).
-  MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
-  OutStreamer.EmitAssignment(SetLabel, Diff);
-  OutStreamer.EmitSymbolValue(SetLabel, Size);
+  MCSymbol *SetLabel = createTempSymbol("set");
+  OutStreamer->EmitAssignment(SetLabel, Diff);
+  OutStreamer->EmitSymbolValue(SetLabel, Size);
 }
 
 /// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
@@ -1516,7 +1617,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
                                      unsigned Size,
                                      bool IsSectionRelative) const {
   if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) {
-    OutStreamer.EmitCOFFSecRel32(Label);
+    OutStreamer->EmitCOFFSecRel32(Label);
     return;
   }
 
@@ -1526,7 +1627,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
     Expr = MCBinaryExpr::CreateAdd(
         Expr, MCConstantExpr::Create(Offset, OutContext), OutContext);
 
-  OutStreamer.EmitValue(Expr, Size);
+  OutStreamer->EmitValue(Expr, Size);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1539,7 +1640,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
 //
 void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
   if (GV)
-    NumBits = getGVAlignmentLog2(GV, *TM.getSubtargetImpl()->getDataLayout(),
+    NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(),
                                  NumBits);
 
   if (NumBits == 0) return;   // 1-byte aligned: no need to emit alignment.
@@ -1548,9 +1649,9 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
              static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
          "undefined behavior");
   if (getCurrentSection()->getKind().isText())
-    OutStreamer.EmitCodeAlignment(1u << NumBits);
+    OutStreamer->EmitCodeAlignment(1u << NumBits);
   else
-    OutStreamer.EmitValueToAlignment(1u << NumBits);
+    OutStreamer->EmitValueToAlignment(1u << NumBits);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1586,8 +1687,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
     // If the code isn't optimized, there may be outstanding folding
     // opportunities. Attempt to fold the expression using DataLayout as a
     // last resort before giving up.
-    if (Constant *C = ConstantFoldConstantExpression(
-            CE, TM.getSubtargetImpl()->getDataLayout()))
+    if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout()))
       if (C != CE)
         return lowerConstant(C);
 
@@ -1601,7 +1701,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
       report_fatal_error(OS.str());
     }
   case Instruction::GetElementPtr: {
-    const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout();
+    const DataLayout &DL = *TM.getDataLayout();
+
     // Generate a symbolic expression for the byte address
     APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
     cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
@@ -1625,7 +1726,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
     return lowerConstant(CE->getOperand(0));
 
   case Instruction::IntToPtr: {
-    const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout();
+    const DataLayout &DL = *TM.getDataLayout();
+
     // Handle casts to pointers by changing them into casts to the appropriate
     // integer type.  This promotes constant folding and simplifies this code.
     Constant *Op = CE->getOperand(0);
@@ -1635,7 +1737,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
   }
 
   case Instruction::PtrToInt: {
-    const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout();
+    const DataLayout &DL = *TM.getDataLayout();
+
     // Support only foldable casts to/from pointers that can be eliminated by
     // changing the pointer to the appropriately sized integer type.
     Constant *Op = CE->getOperand(0);
@@ -1685,7 +1788,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
   }
 }
 
-static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP);
+static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP,
+                                   const Constant *BaseCV = nullptr,
+                                   uint64_t Offset = 0);
 
 /// isRepeatedByteSequence - Determine whether the given value is
 /// composed of a repeated sequence of identical bytes and return the
@@ -1709,7 +1814,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
     if (CI->getBitWidth() > 64) return -1;
 
     uint64_t Size =
-        TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(V->getType());
+        TM.getDataLayout()->getTypeAllocSize(V->getType());
     uint64_t Value = CI->getZExtValue();
 
     // Make sure the constant is at least 8 bits long and has a power
@@ -1754,26 +1859,26 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
   int Value = isRepeatedByteSequence(CDS, AP.TM);
   if (Value != -1) {
     uint64_t Bytes =
-        AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+        AP.TM.getDataLayout()->getTypeAllocSize(
             CDS->getType());
     // Don't emit a 1-byte object as a .fill.
     if (Bytes > 1)
-      return AP.OutStreamer.EmitFill(Bytes, Value);
+      return AP.OutStreamer->EmitFill(Bytes, Value);
   }
 
   // If this can be emitted with .ascii/.asciz, emit it as such.
   if (CDS->isString())
-    return AP.OutStreamer.EmitBytes(CDS->getAsString());
+    return AP.OutStreamer->EmitBytes(CDS->getAsString());
 
   // Otherwise, emit the values in successive locations.
   unsigned ElementByteSize = CDS->getElementByteSize();
   if (isa<IntegerType>(CDS->getElementType())) {
     for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
       if (AP.isVerbose())
-        AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
-                                                CDS->getElementAsInteger(i));
-      AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i),
-                                  ElementByteSize);
+        AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n",
+                                                 CDS->getElementAsInteger(i));
+      AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i),
+                                   ElementByteSize);
     }
   } else if (ElementByteSize == 4) {
     // FP Constants are printed as integer constants to avoid losing
@@ -1787,8 +1892,8 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
 
       F = CDS->getElementAsFloat(i);
       if (AP.isVerbose())
-        AP.OutStreamer.GetCommentOS() << "float " << F << '\n';
-      AP.OutStreamer.EmitIntValue(I, 4);
+        AP.OutStreamer->GetCommentOS() << "float " << F << '\n';
+      AP.OutStreamer->EmitIntValue(I, 4);
     }
   } else {
     assert(CDS->getElementType()->isDoubleTy());
@@ -1800,34 +1905,36 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
 
       F = CDS->getElementAsDouble(i);
       if (AP.isVerbose())
-        AP.OutStreamer.GetCommentOS() << "double " << F << '\n';
-      AP.OutStreamer.EmitIntValue(I, 8);
+        AP.OutStreamer->GetCommentOS() << "double " << F << '\n';
+      AP.OutStreamer->EmitIntValue(I, 8);
     }
   }
 
-  const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout &DL = *AP.TM.getDataLayout();
   unsigned Size = DL.getTypeAllocSize(CDS->getType());
   unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *
                         CDS->getNumElements();
   if (unsigned Padding = Size - EmittedSize)
-    AP.OutStreamer.EmitZeros(Padding);
+    AP.OutStreamer->EmitZeros(Padding);
 
 }
 
-static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) {
+static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP,
+                                    const Constant *BaseCV, uint64_t Offset) {
   // See if we can aggregate some values.  Make sure it can be
   // represented as a series of bytes of the constant value.
   int Value = isRepeatedByteSequence(CA, AP.TM);
+  const DataLayout &DL = *AP.TM.getDataLayout();
 
   if (Value != -1) {
-    uint64_t Bytes =
-        AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
-            CA->getType());
-    AP.OutStreamer.EmitFill(Bytes, Value);
+    uint64_t Bytes = DL.getTypeAllocSize(CA->getType());
+    AP.OutStreamer->EmitFill(Bytes, Value);
   }
   else {
-    for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-      emitGlobalConstantImpl(CA->getOperand(i), AP);
+    for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
+      emitGlobalConstantImpl(CA->getOperand(i), AP, BaseCV, Offset);
+      Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType());
+    }
   }
 }
 
@@ -1835,36 +1942,37 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
   for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
     emitGlobalConstantImpl(CV->getOperand(i), AP);
 
-  const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout &DL = *AP.TM.getDataLayout();
   unsigned Size = DL.getTypeAllocSize(CV->getType());
   unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
                          CV->getType()->getNumElements();
   if (unsigned Padding = Size - EmittedSize)
-    AP.OutStreamer.EmitZeros(Padding);
+    AP.OutStreamer->EmitZeros(Padding);
 }
 
-static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) {
+static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP,
+                                     const Constant *BaseCV, uint64_t Offset) {
   // Print the fields in successive locations. Pad to align if needed!
-  const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = AP.TM.getDataLayout();
   unsigned Size = DL->getTypeAllocSize(CS->getType());
   const StructLayout *Layout = DL->getStructLayout(CS->getType());
   uint64_t SizeSoFar = 0;
   for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
     const Constant *Field = CS->getOperand(i);
 
+    // Print the actual field value.
+    emitGlobalConstantImpl(Field, AP, BaseCV, Offset+SizeSoFar);
+
     // Check if padding is needed and insert one or more 0s.
     uint64_t FieldSize = DL->getTypeAllocSize(Field->getType());
     uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
                         - Layout->getElementOffset(i)) - FieldSize;
     SizeSoFar += FieldSize + PadSize;
 
-    // Now print the actual field value.
-    emitGlobalConstantImpl(Field, AP);
-
     // Insert padding - this may include padding to increase the size of the
     // current field up to the ABI size (if the struct is not packed) as well
     // as padding to ensure that the next field starts at the right offset.
-    AP.OutStreamer.EmitZeros(PadSize);
+    AP.OutStreamer->EmitZeros(PadSize);
   }
   assert(SizeSoFar == Layout->getSizeInBytes() &&
          "Layout of constant struct may be incorrect!");
@@ -1880,10 +1988,10 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
     CFP->getValueAPF().toString(StrVal);
 
     if (CFP->getType())
-      CFP->getType()->print(AP.OutStreamer.GetCommentOS());
+      CFP->getType()->print(AP.OutStreamer->GetCommentOS());
     else
-      AP.OutStreamer.GetCommentOS() << "Printing <null> Type";
-    AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n';
+      AP.OutStreamer->GetCommentOS() << "Printing <null> Type";
+    AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n';
   }
 
   // Now iterate through the APInt chunks, emitting them in endian-correct
@@ -1895,32 +2003,32 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
 
   // PPC's long double has odd notions of endianness compared to how LLVM
   // handles it: p[0] goes first for *big* endian on PPC.
-  if (AP.TM.getSubtargetImpl()->getDataLayout()->isBigEndian() &&
+  if (AP.TM.getDataLayout()->isBigEndian() &&
       !CFP->getType()->isPPC_FP128Ty()) {
     int Chunk = API.getNumWords() - 1;
 
     if (TrailingBytes)
-      AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes);
+      AP.OutStreamer->EmitIntValue(p[Chunk--], TrailingBytes);
 
     for (; Chunk >= 0; --Chunk)
-      AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t));
+      AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t));
   } else {
     unsigned Chunk;
     for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk)
-      AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t));
+      AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t));
 
     if (TrailingBytes)
-      AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes);
+      AP.OutStreamer->EmitIntValue(p[Chunk], TrailingBytes);
   }
 
   // Emit the tail padding for the long double.
-  const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
-  AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
-                           DL.getTypeStoreSize(CFP->getType()));
+  const DataLayout &DL = *AP.TM.getDataLayout();
+  AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
+                            DL.getTypeStoreSize(CFP->getType()));
 }
 
 static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
-  const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = AP.TM.getDataLayout();
   unsigned BitWidth = CI->getBitWidth();
 
   // Copy the value as we may massage the layout for constants whose bit width
@@ -1959,28 +2067,123 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
   const uint64_t *RawData = Realigned.getRawData();
   for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
     uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i];
-    AP.OutStreamer.EmitIntValue(Val, 8);
+    AP.OutStreamer->EmitIntValue(Val, 8);
   }
 
   if (ExtraBitsSize) {
     // Emit the extra bits after the 64-bits chunks.
 
     // Emit a directive that fills the expected size.
-    uint64_t Size = AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+    uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(
         CI->getType());
     Size -= (BitWidth / 64) * 8;
     assert(Size && Size * 8 >= ExtraBitsSize &&
            (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize)))
            == ExtraBits && "Directive too small for extra bits.");
-    AP.OutStreamer.EmitIntValue(ExtraBits, Size);
+    AP.OutStreamer->EmitIntValue(ExtraBits, Size);
   }
 }
 
-static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
-  const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
+/// \brief Transform a not absolute MCExpr containing a reference to a GOT
+/// equivalent global, by a target specific GOT pc relative access to the
+/// final symbol.
+static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
+                                         const Constant *BaseCst,
+                                         uint64_t Offset) {
+  // The global @foo below illustrates a global that uses a got equivalent.
+  //
+  //  @bar = global i32 42
+  //  @gotequiv = private unnamed_addr constant i32* @bar
+  //  @foo = i32 trunc (i64 sub (i64 ptrtoint (i32** @gotequiv to i64),
+  //                             i64 ptrtoint (i32* @foo to i64))
+  //                        to i32)
+  //
+  // The cstexpr in @foo is converted into the MCExpr `ME`, where we actually
+  // check whether @foo is suitable to use a GOTPCREL. `ME` is usually in the
+  // form:
+  //
+  //  foo = cstexpr, where
+  //    cstexpr := <gotequiv> - "." + <cst>
+  //    cstexpr := <gotequiv> - (<foo> - <offset from @foo base>) + <cst>
+  //
+  // After canonicalization by EvaluateAsRelocatable `ME` turns into:
+  //
+  //  cstexpr := <gotequiv> - <foo> + gotpcrelcst, where
+  //    gotpcrelcst := <offset from @foo base> + <cst>
+  //
+  MCValue MV;
+  if (!(*ME)->EvaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute())
+    return;
+
+  const MCSymbol *GOTEquivSym = &MV.getSymA()->getSymbol();
+  if (!AP.GlobalGOTEquivs.count(GOTEquivSym))
+    return;
+
+  const GlobalValue *BaseGV = dyn_cast<GlobalValue>(BaseCst);
+  if (!BaseGV)
+    return;
+
+  const MCSymbol *BaseSym = AP.getSymbol(BaseGV);
+  if (BaseSym != &MV.getSymB()->getSymbol())
+    return;
+
+  // Make sure to match:
+  //
+  //    gotpcrelcst := <offset from @foo base> + <cst>
+  //
+  // If gotpcrelcst is positive it means that we can safely fold the pc rel
+  // displacement into the GOTPCREL. We can also can have an extra offset <cst>
+  // if the target knows how to encode it.
+  //
+  int64_t GOTPCRelCst = Offset + MV.getConstant();
+  if (GOTPCRelCst < 0)
+    return;
+  if (!AP.getObjFileLowering().supportGOTPCRelWithOffset() && GOTPCRelCst != 0)
+    return;
+
+  // Emit the GOT PC relative to replace the got equivalent global, i.e.:
+  //
+  //  bar:
+  //    .long 42
+  //  gotequiv:
+  //    .quad bar
+  //  foo:
+  //    .long gotequiv - "." + <cst>
+  //
+  // is replaced by the target specific equivalent to:
+  //
+  //  bar:
+  //    .long 42
+  //  foo:
+  //    .long bar@GOTPCREL+<gotpcrelcst>
+  //
+  AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym];
+  const GlobalVariable *GV = Result.first;
+  int NumUses = (int)Result.second;
+  const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0));
+  const MCSymbol *FinalSym = AP.getSymbol(FinalGV);
+  *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(
+      FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer);
+
+  // Update GOT equivalent usage information
+  --NumUses;
+  if (NumUses >= 0)
+    AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses);
+}
+
+static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
+                                   const Constant *BaseCV, uint64_t Offset) {
+  const DataLayout *DL = AP.TM.getDataLayout();
   uint64_t Size = DL->getTypeAllocSize(CV->getType());
+
+  // Globals with sub-elements such as combinations of arrays and structs
+  // are handled recursively by emitGlobalConstantImpl. Keep track of the
+  // constant symbol base and the current position with BaseCV and Offset.
+  if (!BaseCV && CV->hasOneUse())
+    BaseCV = dyn_cast<Constant>(CV->user_back());
+
   if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
-    return AP.OutStreamer.EmitZeros(Size);
+    return AP.OutStreamer->EmitZeros(Size);
 
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
     switch (Size) {
@@ -1989,9 +2192,9 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
     case 4:
     case 8:
       if (AP.isVerbose())
-        AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
-                                                CI->getZExtValue());
-      AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size);
+        AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n",
+                                                 CI->getZExtValue());
+      AP.OutStreamer->EmitIntValue(CI->getZExtValue(), Size);
       return;
     default:
       emitGlobalConstantLargeInt(CI, AP);
@@ -2003,7 +2206,7 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
     return emitGlobalConstantFP(CFP, AP);
 
   if (isa<ConstantPointerNull>(CV)) {
-    AP.OutStreamer.EmitIntValue(0, Size);
+    AP.OutStreamer->EmitIntValue(0, Size);
     return;
   }
 
@@ -2011,10 +2214,10 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
     return emitGlobalConstantDataSequential(CDS, AP);
 
   if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
-    return emitGlobalConstantArray(CVA, AP);
+    return emitGlobalConstantArray(CVA, AP, BaseCV, Offset);
 
   if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
-    return emitGlobalConstantStruct(CVS, AP);
+    return emitGlobalConstantStruct(CVS, AP, BaseCV, Offset);
 
   if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
     // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
@@ -2026,7 +2229,7 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
       // If the constant expression's size is greater than 64-bits, then we have
       // to emit the value in chunks. Try to constant fold the value and emit it
       // that way.
-      Constant *New = ConstantFoldConstantExpression(CE, DL);
+      Constant *New = ConstantFoldConstantExpression(CE, *DL);
       if (New && New != CE)
         return emitGlobalConstantImpl(New, AP);
     }
@@ -2037,19 +2240,27 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
 
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
-  AP.OutStreamer.EmitValue(AP.lowerConstant(CV), Size);
+  const MCExpr *ME = AP.lowerConstant(CV);
+
+  // Since lowerConstant already folded and got rid of all IR pointer and
+  // integer casts, detect GOT equivalent accesses by looking into the MCExpr
+  // directly.
+  if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel())
+    handleIndirectSymViaGOTPCRel(AP, &ME, BaseCV, Offset);
+
+  AP.OutStreamer->EmitValue(ME, Size);
 }
 
 /// EmitGlobalConstant - Print a general LLVM constant to the .s file.
 void AsmPrinter::EmitGlobalConstant(const Constant *CV) {
   uint64_t Size =
-      TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType());
+      TM.getDataLayout()->getTypeAllocSize(CV->getType());
   if (Size)
     emitGlobalConstantImpl(CV, *this);
   else if (MAI->hasSubsectionsViaSymbols()) {
     // If the global has zero size, emit a single byte so that two labels don't
     // look like they are at the same location.
-    OutStreamer.EmitIntValue(0, 1);
+    OutStreamer->EmitIntValue(0, 1);
   }
 }
 
@@ -2069,23 +2280,10 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
 // Symbol Lowering Routines.
 //===----------------------------------------------------------------------===//
 
-/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
-/// temporary label with the specified stem and unique ID.
-MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
-  return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) +
-                                      Name + Twine(ID));
-}
-
-/// GetTempSymbol - Return an assembler temporary label with the specified
-/// stem.
-MCSymbol *AsmPrinter::GetTempSymbol(Twine Name) const {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
-  return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
-                                      Name);
+MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const {
+  return OutContext.createTempSymbol(Name, true);
 }
 
-
 MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
   return MMI->getAddrLabelSymbol(BA->getBasicBlock());
 }
@@ -2096,8 +2294,8 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
 
 /// GetCPISymbol - Return the symbol for the specified constant pool entry.
 MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
-  return OutContext.GetOrCreateSymbol
+  const DataLayout *DL = TM.getDataLayout();
+  return OutContext.getOrCreateSymbol
     (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
      + "_" + Twine(CPID));
 }
@@ -2110,8 +2308,8 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
 /// GetJTSetSymbol - Return the symbol for the specified jump table .set
 /// FIXME: privatize to AsmPrinter.
 MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
-  return OutContext.GetOrCreateSymbol
+  const DataLayout *DL = TM.getDataLayout();
+  return OutContext.getOrCreateSymbol
   (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
    Twine(UID) + "_set_" + Twine(MBBID));
 }
@@ -2127,7 +2325,7 @@ MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
 MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
   SmallString<60> NameStr;
   Mang->getNameWithPrefix(NameStr, Sym);
-  return OutContext.GetOrCreateSymbol(NameStr.str());
+  return OutContext.getOrCreateSymbol(NameStr);
 }
 
 
@@ -2172,16 +2370,16 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
   // If this block is not a loop header, just print out what is the loop header
   // and return.
   if (Header != &MBB) {
-    AP.OutStreamer.AddComment("  in Loop: Header=BB" +
-                              Twine(AP.getFunctionNumber())+"_" +
-                              Twine(Loop->getHeader()->getNumber())+
-                              " Depth="+Twine(Loop->getLoopDepth()));
+    AP.OutStreamer->AddComment("  in Loop: Header=BB" +
+                               Twine(AP.getFunctionNumber())+"_" +
+                               Twine(Loop->getHeader()->getNumber())+
+                               " Depth="+Twine(Loop->getLoopDepth()));
     return;
   }
 
   // Otherwise, it is a loop header.  Print out information about child and
   // parent loops.
-  raw_ostream &OS = AP.OutStreamer.GetCommentOS();
+  raw_ostream &OS = AP.OutStreamer->GetCommentOS();
 
   PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
 
@@ -2212,18 +2410,18 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
   if (MBB.hasAddressTaken()) {
     const BasicBlock *BB = MBB.getBasicBlock();
     if (isVerbose())
-      OutStreamer.AddComment("Block address taken");
+      OutStreamer->AddComment("Block address taken");
 
     std::vector<MCSymbol*> Symbols = MMI->getAddrLabelSymbolToEmit(BB);
     for (auto *Sym : Symbols)
-      OutStreamer.EmitLabel(Sym);
+      OutStreamer->EmitLabel(Sym);
   }
 
   // Print some verbose block comments.
   if (isVerbose()) {
     if (const BasicBlock *BB = MBB.getBasicBlock())
       if (BB->hasName())
-        OutStreamer.AddComment("%" + BB->getName());
+        OutStreamer->AddComment("%" + BB->getName());
     emitBasicBlockLoopComments(MBB, LI, *this);
   }
 
@@ -2231,10 +2429,10 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
   if (MBB.pred_empty() || isBlockOnlyReachableByFallthrough(&MBB)) {
     if (isVerbose()) {
       // NOTE: Want this comment at start of line, don't emit with AddComment.
-      OutStreamer.emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false);
+      OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false);
     }
   } else {
-    OutStreamer.EmitLabel(MBB.getSymbol());
+    OutStreamer->EmitLabel(MBB.getSymbol());
   }
 }
 
@@ -2256,7 +2454,7 @@ void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
   }
 
   if (Attr != MCSA_Invalid)
-    OutStreamer.EmitSymbolAttribute(Sym, Attr);
+    OutStreamer->EmitSymbolAttribute(Sym, Attr);
 }
 
 /// isBlockOnlyReachableByFallthough - Return true if the basic block has
@@ -2335,3 +2533,5 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
 
 /// Pin vtable to this file.
 AsmPrinterHandler::~AsmPrinterHandler() {}
+
+void AsmPrinterHandler::markFunctionEnd() {}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 00681f6..3258961 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -12,45 +12,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "ByteStreamer.h"
+#include "DwarfDebug.h"
 #include "DwarfExpression.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "asm-printer"
 
-void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
-  BS.EmitInt8(
-      Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
-                  : dwarf::OperationEncodingString(Op));
-}
-
-void DebugLocDwarfExpression::EmitSigned(int Value) {
-  BS.EmitSLEB128(Value, Twine(Value));
-}
-
-void DebugLocDwarfExpression::EmitUnsigned(unsigned Value) {
-  BS.EmitULEB128(Value, Twine(Value));
-}
-
-bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
-  // This information is not available while emitting .debug_loc entries.
-  return false;
-}
-
 //===----------------------------------------------------------------------===//
 // Dwarf Emission Helper Routines
 //===----------------------------------------------------------------------===//
@@ -58,30 +42,30 @@ bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
 /// EmitSLEB128 - emit the specified signed leb128 value.
 void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
   if (isVerbose() && Desc)
-    OutStreamer.AddComment(Desc);
+    OutStreamer->AddComment(Desc);
 
-  OutStreamer.EmitSLEB128IntValue(Value);
+  OutStreamer->EmitSLEB128IntValue(Value);
 }
 
 /// EmitULEB128 - emit the specified signed leb128 value.
 void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc,
                              unsigned PadTo) const {
   if (isVerbose() && Desc)
-    OutStreamer.AddComment(Desc);
+    OutStreamer->AddComment(Desc);
 
-  OutStreamer.EmitULEB128IntValue(Value, PadTo);
+  OutStreamer->EmitULEB128IntValue(Value, PadTo);
 }
 
 /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
 void AsmPrinter::EmitCFAByte(unsigned Val) const {
   if (isVerbose()) {
     if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset + 64)
-      OutStreamer.AddComment("DW_CFA_offset + Reg (" +
-                             Twine(Val - dwarf::DW_CFA_offset) + ")");
+      OutStreamer->AddComment("DW_CFA_offset + Reg (" +
+                              Twine(Val - dwarf::DW_CFA_offset) + ")");
     else
-      OutStreamer.AddComment(dwarf::CallFrameString(Val));
+      OutStreamer->AddComment(dwarf::CallFrameString(Val));
   }
-  OutStreamer.EmitIntValue(Val, 1);
+  OutStreamer->EmitIntValue(Val, 1);
 }
 
 static const char *DecodeDWARFEncoding(unsigned Encoding) {
@@ -132,13 +116,13 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) {
 void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
   if (isVerbose()) {
     if (Desc)
-      OutStreamer.AddComment(Twine(Desc) + " Encoding = " +
-                             Twine(DecodeDWARFEncoding(Val)));
+      OutStreamer->AddComment(Twine(Desc) + " Encoding = " +
+                              Twine(DecodeDWARFEncoding(Val)));
     else
-      OutStreamer.AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val));
+      OutStreamer->AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val));
   }
 
-  OutStreamer.EmitIntValue(Val, 1);
+  OutStreamer->EmitIntValue(Val, 1);
 }
 
 /// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
@@ -150,7 +134,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
   default:
     llvm_unreachable("Invalid encoded value.");
   case dwarf::DW_EH_PE_absptr:
-    return TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
+    return TM.getDataLayout()->getPointerSize();
   case dwarf::DW_EH_PE_udata2:
     return 2;
   case dwarf::DW_EH_PE_udata4:
@@ -166,10 +150,11 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
     const TargetLoweringObjectFile &TLOF = getObjFileLowering();
 
     const MCExpr *Exp =
-        TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI, OutStreamer);
-    OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
+        TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI,
+                                     *OutStreamer);
+    OutStreamer->EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
   } else
-    OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
+    OutStreamer->EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
 }
 
 /// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
@@ -179,63 +164,43 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
 ///
 /// SectionLabel is a temporary label emitted at the start of the section that
 /// Label lives in.
-void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
-                                   const MCSymbol *SectionLabel) const {
+void AsmPrinter::emitSectionOffset(const MCSymbol *Label) const {
   // On COFF targets, we have to emit the special .secrel32 directive.
   if (MAI->needsDwarfSectionOffsetDirective()) {
-    OutStreamer.EmitCOFFSecRel32(Label);
+    OutStreamer->EmitCOFFSecRel32(Label);
     return;
   }
 
-  // Get the section that we're referring to, based on SectionLabel.
-  const MCSection &Section = SectionLabel->getSection();
-
-  // If Label has already been emitted, verify that it is in the same section as
-  // section label for sanity.
-  assert((!Label->isInSection() || &Label->getSection() == &Section) &&
-         "Section offset using wrong section base for label");
-
-  // If the section in question will end up with an address of 0 anyway, we can
-  // just emit an absolute reference to save a relocation.
-  if (Section.isBaseAddressKnownZero()) {
-    OutStreamer.EmitSymbolValue(Label, 4);
+  // If the format uses relocations with dwarf, refer to the symbol directly.
+  if (MAI->doesDwarfUseRelocationsAcrossSections()) {
+    OutStreamer->EmitSymbolValue(Label, 4);
     return;
   }
 
   // Otherwise, emit it as a label difference from the start of the section.
-  EmitLabelDifference(Label, SectionLabel, 4);
+  EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
 }
 
-// Some targets do not provide a DWARF register number for every
-// register.  This function attempts to emit a DWARF register by
-// emitting a piece of a super-register or by piecing together
-// multiple subregisters that alias the register.
-void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
-                                     const MachineLocation &MLoc,
-                                     unsigned PieceSizeInBits,
-                                     unsigned PieceOffsetInBits) const {
-  assert(MLoc.isReg() && "MLoc must be a register");
-  DebugLocDwarfExpression Expr(*this, Streamer);
-  Expr.AddMachineRegPiece(MLoc.getReg(), PieceSizeInBits, PieceOffsetInBits);
-}
+void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const {
+  if (MAI->doesDwarfUseRelocationsAcrossSections()) {
+    emitSectionOffset(S.getSymbol());
+    return;
+  }
 
-void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer,
-                                  unsigned PieceSizeInBits,
-                                  unsigned PieceOffsetInBits) const {
-  DebugLocDwarfExpression Expr(*this, Streamer);
-  Expr.AddOpPiece(PieceSizeInBits, PieceOffsetInBits);
+  // Just emit the offset directly; no need for symbol math.
+  EmitInt32(S.getOffset());
 }
 
 /// EmitDwarfRegOp - Emit dwarf register operation.
 void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
-                                const MachineLocation &MLoc,
-                                bool Indirect) const {
-  DebugLocDwarfExpression Expr(*this, Streamer);
-  const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
-  int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
+                                const MachineLocation &MLoc) const {
+  DebugLocDwarfExpression Expr(*MF->getSubtarget().getRegisterInfo(),
+                               getDwarfDebug()->getDwarfVersion(), Streamer);
+  const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
+  int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false);
   if (Reg < 0) {
     // We assume that pointers are always in an addressable register.
-    if (Indirect || MLoc.isIndirect())
+    if (MLoc.isIndirect())
       // FIXME: We have no reasonable way of handling errors in here. The
       // caller might be in the middle of a dwarf expression. We should
       // probably assert that Reg >= 0 once debug info generation is more
@@ -251,9 +216,7 @@ void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
   }
 
   if (MLoc.isIndirect())
-    Expr.AddRegIndirect(Reg, MLoc.getOffset(), Indirect);
-  else if (Indirect)
-    Expr.AddRegIndirect(Reg, 0, false);
+    Expr.AddRegIndirect(Reg, MLoc.getOffset());
   else
     Expr.AddReg(Reg);
 }
@@ -267,25 +230,82 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
   default:
     llvm_unreachable("Unexpected instruction");
   case MCCFIInstruction::OpDefCfaOffset:
-    OutStreamer.EmitCFIDefCfaOffset(Inst.getOffset());
+    OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset());
     break;
   case MCCFIInstruction::OpDefCfa:
-    OutStreamer.EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
+    OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
     break;
   case MCCFIInstruction::OpDefCfaRegister:
-    OutStreamer.EmitCFIDefCfaRegister(Inst.getRegister());
+    OutStreamer->EmitCFIDefCfaRegister(Inst.getRegister());
     break;
   case MCCFIInstruction::OpOffset:
-    OutStreamer.EmitCFIOffset(Inst.getRegister(), Inst.getOffset());
+    OutStreamer->EmitCFIOffset(Inst.getRegister(), Inst.getOffset());
     break;
   case MCCFIInstruction::OpRegister:
-    OutStreamer.EmitCFIRegister(Inst.getRegister(), Inst.getRegister2());
+    OutStreamer->EmitCFIRegister(Inst.getRegister(), Inst.getRegister2());
     break;
   case MCCFIInstruction::OpWindowSave:
-    OutStreamer.EmitCFIWindowSave();
+    OutStreamer->EmitCFIWindowSave();
     break;
   case MCCFIInstruction::OpSameValue:
-    OutStreamer.EmitCFISameValue(Inst.getRegister());
+    OutStreamer->EmitCFISameValue(Inst.getRegister());
     break;
   }
 }
+
+void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
+  // Get the abbreviation for this DIE.
+  const DIEAbbrev &Abbrev = Die.getAbbrev();
+
+  // Emit the code (index) for the abbreviation.
+  if (isVerbose())
+    OutStreamer->AddComment("Abbrev [" + Twine(Abbrev.getNumber()) +
+                            "] 0x" + Twine::utohexstr(Die.getOffset()) +
+                            ":0x" + Twine::utohexstr(Die.getSize()) + " " +
+                            dwarf::TagString(Abbrev.getTag()));
+  EmitULEB128(Abbrev.getNumber());
+
+  const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
+  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+
+  // Emit the DIE attribute values.
+  for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+    dwarf::Attribute Attr = AbbrevData[i].getAttribute();
+    dwarf::Form Form = AbbrevData[i].getForm();
+    assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+    if (isVerbose()) {
+      OutStreamer->AddComment(dwarf::AttributeString(Attr));
+      if (Attr == dwarf::DW_AT_accessibility)
+        OutStreamer->AddComment(dwarf::AccessibilityString(
+            cast<DIEInteger>(Values[i])->getValue()));
+    }
+
+    // Emit an attribute using the defined form.
+    Values[i]->EmitValue(this, Form);
+  }
+
+  // Emit the DIE children if any.
+  if (Abbrev.hasChildren()) {
+    for (auto &Child : Die.getChildren())
+      emitDwarfDIE(*Child);
+
+    OutStreamer->AddComment("End Of Children Mark");
+    EmitInt8(0);
+  }
+}
+
+void
+AsmPrinter::emitDwarfAbbrevs(const std::vector<DIEAbbrev *>& Abbrevs) const {
+  // For each abbrevation.
+  for (const DIEAbbrev *Abbrev : Abbrevs) {
+    // Emit the abbrevations code (base 1 index.)
+    EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+
+    // Emit the abbreviations data.
+    Abbrev->Emit(this);
+  }
+
+  // Mark end of abbreviations.
+  EmitULEB128(0, "EOM(3)");
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
index 31867dd..f1efe9d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
@@ -41,6 +41,10 @@ public:
   /// call.
   virtual void beginFunction(const MachineFunction *MF) = 0;
 
+  // \brief Emit any of function marker (like .cfi_endproc). This is called
+  // before endFunction and cannot switch sections.
+  virtual void markFunctionEnd();
+
   /// \brief Gather post-function debug information.
   /// Please note that some AsmPrinter implementations may not call
   /// beginFunction at all.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index f6ce4a0..e7631dd 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -72,7 +73,9 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
 }
 
 /// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
-void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
+                               const MCTargetOptions &MCOptions,
+                               const MDNode *LocMDNode,
                                InlineAsm::AsmDialect Dialect) const {
   assert(!Str.empty() && "Can't emit empty inline asm block");
 
@@ -89,10 +92,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
   const MCAsmInfo *MCAI = TM.getMCAsmInfo();
   assert(MCAI && "No MCAsmInfo");
   if (!MCAI->useIntegratedAssembler() &&
-      !OutStreamer.isIntegratedAssemblerRequired()) {
-    emitInlineAsmStart(TM.getSubtarget<MCSubtargetInfo>());
-    OutStreamer.EmitRawText(Str);
-    emitInlineAsmEnd(TM.getSubtarget<MCSubtargetInfo>(), nullptr);
+      !OutStreamer->isIntegratedAssemblerRequired()) {
+    emitInlineAsmStart();
+    OutStreamer->EmitRawText(Str);
+    emitInlineAsmEnd(STI, nullptr);
     return;
   }
 
@@ -122,27 +125,21 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
   SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
 
   std::unique_ptr<MCAsmParser> Parser(
-      createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI));
-
-  // Initialize the parser with a fresh subtarget info. It is better to use a
-  // new STI here because the parser may modify it and we do not want those
-  // modifications to persist after parsing the inlineasm. The modifications
-  // made by the parser will be seen by the code emitters because it passes
-  // the current STI down to the EncodeInstruction() method.
-  std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
-      TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
-
-  // Preserve a copy of the original STI because the parser may modify it.  For
-  // example, when switching between arm and thumb mode. If the target needs to
-  // emit code to return to the original state it can do so in
-  // emitInlineAsmEnd().
-  MCSubtargetInfo STIOrig = *STI;
+      createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI));
 
-  MCTargetOptions MCOptions;
-  if (MF)
-    MCOptions = MF->getTarget().Options.MCOptions;
-  std::unique_ptr<MCTargetAsmParser> TAP(
-      TM.getTarget().createMCAsmParser(*STI, *Parser, *MII, MCOptions));
+  // Create a temporary copy of the original STI because the parser may modify
+  // it. For example, when switching between arm and thumb mode. If the target
+  // needs to emit code to return to the original state it can do so in
+  // emitInlineAsmEnd().
+  MCSubtargetInfo TmpSTI = STI;
+
+  // We create a new MCInstrInfo here since we might be at the module level
+  // and not have a MachineFunction to initialize the TargetInstrInfo from and
+  // we only need MCInstrInfo for asm parsing. We create one unconditionally
+  // because it's not subtarget dependent.
+  std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo());
+  std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser(
+      TmpSTI, *Parser, *MII, MCOptions));
   if (!TAP)
     report_fatal_error("Inline asm not supported by this streamer because"
                        " we don't have an asm parser for this target\n");
@@ -153,11 +150,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
     TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
   }
 
-  emitInlineAsmStart(STIOrig);
+  emitInlineAsmStart();
   // Don't implicitly switch to the text section before the asm.
   int Res = Parser->Run(/*NoInitialTextSection*/ true,
                         /*NoFinalize*/ true);
-  emitInlineAsmEnd(STIOrig, STI.get());
+  emitInlineAsmEnd(STI, &TmpSTI);
   if (Res && !HasDiagHandler)
     report_fatal_error("Error parsing inline asm\n");
 }
@@ -452,14 +449,14 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
   // If this asmstr is empty, just print the #APP/#NOAPP markers.
   // These are useful to see where empty asm's wound up.
   if (AsmStr[0] == 0) {
-    OutStreamer.emitRawComment(MAI->getInlineAsmStart());
-    OutStreamer.emitRawComment(MAI->getInlineAsmEnd());
+    OutStreamer->emitRawComment(MAI->getInlineAsmStart());
+    OutStreamer->emitRawComment(MAI->getInlineAsmEnd());
     return;
   }
 
   // Emit the #APP start marker.  This has to happen even if verbose-asm isn't
   // enabled, so we use emitRawComment.
-  OutStreamer.emitRawComment(MAI->getInlineAsmStart());
+  OutStreamer->emitRawComment(MAI->getInlineAsmStart());
 
   // Get the !srcloc metadata node if we have it, and decode the loc cookie from
   // it.
@@ -492,11 +489,17 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
   else
     EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS);
 
-  EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect());
+  // Reset SanitizeAddress based on the function's attribute.
+  MCTargetOptions MCOptions = TM.Options.MCOptions;
+  MCOptions.SanitizeAddress =
+      MF->getFunction()->hasFnAttribute(Attribute::SanitizeAddress);
+
+  EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD,
+                MI->getInlineAsmDialect());
 
   // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
   // enabled, so we use emitRawComment.
-  OutStreamer.emitRawComment(MAI->getInlineAsmEnd());
+  OutStreamer->emitRawComment(MAI->getInlineAsmEnd());
 }
 
 
@@ -508,7 +511,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
 /// for their own strange codes.
 void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
                               const char *Code) const {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
   if (!strcmp(Code, "private")) {
     OS << DL->getPrivateGlobalPrefix();
   } else if (!strcmp(Code, "comment")) {
@@ -569,7 +572,7 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
   return true;
 }
 
-void AsmPrinter::emitInlineAsmStart(const MCSubtargetInfo &StartInfo) const {}
+void AsmPrinter::emitInlineAsmStart() const {}
 
 void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
                                   const MCSubtargetInfo *EndInfo) const {}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 42be114..0cc829f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -19,6 +19,8 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/LEB128.h"
+#include <string>
 
 namespace llvm {
 class ByteStreamer {
@@ -38,15 +40,15 @@ private:
 public:
   APByteStreamer(AsmPrinter &Asm) : AP(Asm) {}
   void EmitInt8(uint8_t Byte, const Twine &Comment) override {
-    AP.OutStreamer.AddComment(Comment);
+    AP.OutStreamer->AddComment(Comment);
     AP.EmitInt8(Byte);
   }
   void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
-    AP.OutStreamer.AddComment(Comment);
+    AP.OutStreamer->AddComment(Comment);
     AP.EmitSLEB128(DWord);
   }
   void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
-    AP.OutStreamer.AddComment(Comment);
+    AP.OutStreamer->AddComment(Comment);
     AP.EmitULEB128(DWord);
   }
 };
@@ -66,6 +68,41 @@ class HashingByteStreamer : public ByteStreamer {
     Hash.addULEB128(DWord);
   }
 };
+
+class BufferByteStreamer : public ByteStreamer {
+private:
+  SmallVectorImpl<char> &Buffer;
+  SmallVectorImpl<std::string> &Comments;
+
+  /// \brief Only verbose textual output needs comments.  This will be set to
+  /// true for that case, and false otherwise.  If false, comments passed in to
+  /// the emit methods will be ignored.
+  bool GenerateComments;
+
+public:
+  BufferByteStreamer(SmallVectorImpl<char> &Buffer,
+                     SmallVectorImpl<std::string> &Comments,
+                     bool GenerateComments)
+  : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {}
+  void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+    Buffer.push_back(Byte);
+    if (GenerateComments)
+      Comments.push_back(Comment.str());
+  }
+  void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
+    raw_svector_ostream OSE(Buffer);
+    encodeSLEB128(DWord, OSE);
+    if (GenerateComments)
+      Comments.push_back(Comment.str());
+  }
+  void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+    raw_svector_ostream OSE(Buffer);
+    encodeULEB128(DWord, OSE);
+    if (GenerateComments)
+      Comments.push_back(Comment.str());
+  }
+};
+
 }
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 64ba56b..1ccffe9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -60,7 +61,7 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
 
 /// Emit - Print the abbreviation using the specified asm printer.
 ///
-void DIEAbbrev::Emit(AsmPrinter *AP) const {
+void DIEAbbrev::Emit(const AsmPrinter *AP) const {
   // Emit its Dwarf tag type.
   AP->EmitULEB128(Tag, dwarf::TagString(Tag));
 
@@ -190,9 +191,67 @@ void DIE::dump() {
 }
 #endif
 
-void DIEValue::anchor() { }
+void DIEValue::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+  switch (Ty) {
+#define EMIT_VALUE_IMPL(Kind)                                                  \
+  case is##Kind:                                                               \
+    cast<DIE##Kind>(this)->EmitValueImpl(AP, Form);                            \
+    break;
+    EMIT_VALUE_IMPL(Integer)
+    EMIT_VALUE_IMPL(String)
+    EMIT_VALUE_IMPL(Expr)
+    EMIT_VALUE_IMPL(Label)
+    EMIT_VALUE_IMPL(Delta)
+    EMIT_VALUE_IMPL(Entry)
+    EMIT_VALUE_IMPL(TypeSignature)
+    EMIT_VALUE_IMPL(Block)
+    EMIT_VALUE_IMPL(Loc)
+    EMIT_VALUE_IMPL(LocList)
+#undef EMIT_VALUE_IMPL
+  }
+}
+
+unsigned DIEValue::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+  switch (Ty) {
+#define SIZE_OF_IMPL(Kind)                                                     \
+  case is##Kind:                                                               \
+    return cast<DIE##Kind>(this)->SizeOfImpl(AP, Form);
+    SIZE_OF_IMPL(Integer)
+    SIZE_OF_IMPL(String)
+    SIZE_OF_IMPL(Expr)
+    SIZE_OF_IMPL(Label)
+    SIZE_OF_IMPL(Delta)
+    SIZE_OF_IMPL(Entry)
+    SIZE_OF_IMPL(TypeSignature)
+    SIZE_OF_IMPL(Block)
+    SIZE_OF_IMPL(Loc)
+    SIZE_OF_IMPL(LocList)
+#undef SIZE_OF_IMPL
+  }
+  llvm_unreachable("Unknown DIE kind");
+}
 
 #ifndef NDEBUG
+void DIEValue::print(raw_ostream &O) const {
+  switch (Ty) {
+#define PRINT_IMPL(Kind)                                                       \
+  case is##Kind:                                                               \
+    cast<DIE##Kind>(this)->printImpl(O);                                       \
+    break;
+    PRINT_IMPL(Integer)
+    PRINT_IMPL(String)
+    PRINT_IMPL(Expr)
+    PRINT_IMPL(Label)
+    PRINT_IMPL(Delta)
+    PRINT_IMPL(Entry)
+    PRINT_IMPL(TypeSignature)
+    PRINT_IMPL(Block)
+    PRINT_IMPL(Loc)
+    PRINT_IMPL(LocList)
+#undef PRINT_IMPL
+  }
+}
+
 void DIEValue::dump() const {
   print(dbgs());
 }
@@ -204,13 +263,13 @@ void DIEValue::dump() const {
 
 /// EmitValue - Emit integer of appropriate size.
 ///
-void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIEInteger::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
   unsigned Size = ~0U;
   switch (Form) {
   case dwarf::DW_FORM_flag_present:
     // Emit something to keep the lines and comments in sync.
     // FIXME: Is there a better way to do this?
-    Asm->OutStreamer.AddBlankLine();
+    Asm->OutStreamer->AddBlankLine();
     return;
   case dwarf::DW_FORM_flag:  // Fall thru
   case dwarf::DW_FORM_ref1:  // Fall thru
@@ -218,6 +277,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
   case dwarf::DW_FORM_ref2:  // Fall thru
   case dwarf::DW_FORM_data2: Size = 2; break;
   case dwarf::DW_FORM_sec_offset: // Fall thru
+  case dwarf::DW_FORM_strp: // Fall thru
   case dwarf::DW_FORM_ref4:  // Fall thru
   case dwarf::DW_FORM_data4: Size = 4; break;
   case dwarf::DW_FORM_ref8:  // Fall thru
@@ -229,14 +289,17 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
   case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
   case dwarf::DW_FORM_addr:
     Size = Asm->getDataLayout().getPointerSize(); break;
+  case dwarf::DW_FORM_ref_addr:
+    Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr);
+    break;
   default: llvm_unreachable("DIE Value form not supported yet");
   }
-  Asm->OutStreamer.EmitIntValue(Integer, Size);
+  Asm->OutStreamer->EmitIntValue(Integer, Size);
 }
 
 /// SizeOf - Determine size of integer value in bytes.
 ///
-unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEInteger::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   switch (Form) {
   case dwarf::DW_FORM_flag_present: return 0;
   case dwarf::DW_FORM_flag:  // Fall thru
@@ -245,6 +308,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
   case dwarf::DW_FORM_ref2:  // Fall thru
   case dwarf::DW_FORM_data2: return sizeof(int16_t);
   case dwarf::DW_FORM_sec_offset: // Fall thru
+  case dwarf::DW_FORM_strp: // Fall thru
   case dwarf::DW_FORM_ref4:  // Fall thru
   case dwarf::DW_FORM_data4: return sizeof(int32_t);
   case dwarf::DW_FORM_ref8:  // Fall thru
@@ -255,12 +319,16 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
   case dwarf::DW_FORM_udata: return getULEB128Size(Integer);
   case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer);
   case dwarf::DW_FORM_addr:  return AP->getDataLayout().getPointerSize();
+  case dwarf::DW_FORM_ref_addr:
+    if (AP->OutStreamer->getContext().getDwarfVersion() == 2)
+      return AP->getDataLayout().getPointerSize();
+    return sizeof(int32_t);
   default: llvm_unreachable("DIE Value form not supported yet");
   }
 }
 
 #ifndef NDEBUG
-void DIEInteger::print(raw_ostream &O) const {
+void DIEInteger::printImpl(raw_ostream &O) const {
   O << "Int: " << (int64_t)Integer << "  0x";
   O.write_hex(Integer);
 }
@@ -272,13 +340,13 @@ void DIEInteger::print(raw_ostream &O) const {
 
 /// EmitValue - Emit expression value.
 ///
-void DIEExpr::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
-  AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form));
+void DIEExpr::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+  AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form));
 }
 
 /// SizeOf - Determine size of expression value in bytes.
 ///
-unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEExpr::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_sec_offset) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
@@ -286,10 +354,7 @@ unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
 }
 
 #ifndef NDEBUG
-void DIEExpr::print(raw_ostream &O) const {
-  O << "Expr: ";
-  Expr->print(O);
-}
+void DIEExpr::printImpl(raw_ostream &O) const { O << "Expr: " << *Expr; }
 #endif
 
 //===----------------------------------------------------------------------===//
@@ -298,7 +363,7 @@ void DIEExpr::print(raw_ostream &O) const {
 
 /// EmitValue - Emit label value.
 ///
-void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIELabel::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   AP->EmitLabelReference(Label, SizeOf(AP, Form),
                          Form == dwarf::DW_FORM_strp ||
                              Form == dwarf::DW_FORM_sec_offset ||
@@ -307,7 +372,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
 
 /// SizeOf - Determine size of label value in bytes.
 ///
-unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELabel::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_sec_offset) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
@@ -315,7 +380,7 @@ unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
 }
 
 #ifndef NDEBUG
-void DIELabel::print(raw_ostream &O) const {
+void DIELabel::printImpl(raw_ostream &O) const {
   O << "Lbl: " << Label->getName();
 }
 #endif
@@ -326,13 +391,13 @@ void DIELabel::print(raw_ostream &O) const {
 
 /// EmitValue - Emit delta value.
 ///
-void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIEDelta::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
 }
 
 /// SizeOf - Determine size of delta value in bytes.
 ///
-unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEDelta::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_sec_offset) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
@@ -340,7 +405,7 @@ unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
 }
 
 #ifndef NDEBUG
-void DIEDelta::print(raw_ostream &O) const {
+void DIEDelta::printImpl(raw_ostream &O) const {
   O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
 }
 #endif
@@ -351,20 +416,50 @@ void DIEDelta::print(raw_ostream &O) const {
 
 /// EmitValue - Emit string value.
 ///
-void DIEString::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
-  Access->EmitValue(AP, Form);
+void DIEString::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+  assert(
+      (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) &&
+      "Expected valid string form");
+
+  // Index of string in symbol table.
+  if (Form == dwarf::DW_FORM_GNU_str_index) {
+    DIEInteger(S.getIndex()).EmitValue(AP, Form);
+    return;
+  }
+
+  // Relocatable symbol.
+  assert(Form == dwarf::DW_FORM_strp);
+  if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) {
+    DIELabel(S.getSymbol()).EmitValue(AP, Form);
+    return;
+  }
+
+  // Offset into symbol table.
+  DIEInteger(S.getOffset()).EmitValue(AP, Form);
 }
 
 /// SizeOf - Determine size of delta value in bytes.
 ///
-unsigned DIEString::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
-  return Access->SizeOf(AP, Form);
+unsigned DIEString::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+  assert(
+      (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) &&
+      "Expected valid string form");
+
+  // Index of string in symbol table.
+  if (Form == dwarf::DW_FORM_GNU_str_index)
+    return DIEInteger(S.getIndex()).SizeOf(AP, Form);
+
+  // Relocatable symbol.
+  if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
+    return DIELabel(S.getSymbol()).SizeOf(AP, Form);
+
+  // Offset into symbol table.
+  return DIEInteger(S.getOffset()).SizeOf(AP, Form);
 }
 
 #ifndef NDEBUG
-void DIEString::print(raw_ostream &O) const {
-  O << "String: " << Str << "\tSymbol: ";
-  Access->print(O);
+void DIEString::printImpl(raw_ostream &O) const {
+  O << "String: " << S.getString();
 }
 #endif
 
@@ -372,32 +467,9 @@ void DIEString::print(raw_ostream &O) const {
 // DIEEntry Implementation
 //===----------------------------------------------------------------------===//
 
-/// Emit something like ".long Hi+Offset-Lo" where the size in bytes of the
-/// directive is specified by Size and Hi/Lo specify the labels.
-static void emitLabelOffsetDifference(MCStreamer &Streamer, const MCSymbol *Hi,
-                                      uint64_t Offset, const MCSymbol *Lo,
-                                      unsigned Size) {
-  MCContext &Context = Streamer.getContext();
-
-  // Emit Hi+Offset - Lo
-  // Get the Hi+Offset expression.
-  const MCExpr *Plus =
-      MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, Context),
-                              MCConstantExpr::Create(Offset, Context), Context);
-
-  // Get the Hi+Offset-Lo expression.
-  const MCExpr *Diff = MCBinaryExpr::CreateSub(
-      Plus, MCSymbolRefExpr::Create(Lo, Context), Context);
-
-  // Otherwise, emit with .set (aka assignment).
-  MCSymbol *SetLabel = Context.CreateTempSymbol();
-  Streamer.EmitAssignment(SetLabel, Diff);
-  Streamer.EmitSymbolValue(SetLabel, Size);
-}
-
 /// EmitValue - Emit debug information entry offset.
 ///
-void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIEEntry::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
 
   if (Form == dwarf::DW_FORM_ref_addr) {
     const DwarfDebug *DD = AP->getDwarfDebug();
@@ -413,14 +485,12 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
       AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr,
                               DIEEntry::getRefAddrSize(AP));
     else
-      emitLabelOffsetDifference(AP->OutStreamer, CU->getSectionSym(), Addr,
-                                CU->getSectionSym(),
-                                DIEEntry::getRefAddrSize(AP));
+      AP->OutStreamer->EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP));
   } else
     AP->EmitInt32(Entry.getOffset());
 }
 
-unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) {
+unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) {
   // DWARF4: References that use the attribute form DW_FORM_ref_addr are
   // specified to be four bytes in the DWARF 32-bit format and eight bytes
   // in the DWARF 64-bit format, while DWARF Version 2 specifies that such
@@ -433,7 +503,7 @@ unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) {
 }
 
 #ifndef NDEBUG
-void DIEEntry::print(raw_ostream &O) const {
+void DIEEntry::printImpl(raw_ostream &O) const {
   O << format("Die: 0x%lx", (long)(intptr_t)&Entry);
 }
 #endif
@@ -441,17 +511,15 @@ void DIEEntry::print(raw_ostream &O) const {
 //===----------------------------------------------------------------------===//
 // DIETypeSignature Implementation
 //===----------------------------------------------------------------------===//
-void DIETypeSignature::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIETypeSignature::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
   assert(Form == dwarf::DW_FORM_ref_sig8);
-  Asm->OutStreamer.EmitIntValue(Unit.getTypeSignature(), 8);
+  Asm->OutStreamer->EmitIntValue(Unit.getTypeSignature(), 8);
 }
 
 #ifndef NDEBUG
-void DIETypeSignature::print(raw_ostream &O) const {
+void DIETypeSignature::printImpl(raw_ostream &O) const {
   O << format("Type Unit: 0x%lx", Unit.getTypeSignature());
 }
-
-void DIETypeSignature::dump() const { print(dbgs()); }
 #endif
 
 //===----------------------------------------------------------------------===//
@@ -460,7 +528,7 @@ void DIETypeSignature::dump() const { print(dbgs()); }
 
 /// ComputeSize - calculate the size of the location expression.
 ///
-unsigned DIELoc::ComputeSize(AsmPrinter *AP) const {
+unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
   if (!Size) {
     const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
     for (unsigned i = 0, N = Values.size(); i < N; ++i)
@@ -472,7 +540,7 @@ unsigned DIELoc::ComputeSize(AsmPrinter *AP) const {
 
 /// EmitValue - Emit location data.
 ///
-void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIELoc::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
   switch (Form) {
   default: llvm_unreachable("Improper form for block");
   case dwarf::DW_FORM_block1: Asm->EmitInt8(Size);    break;
@@ -490,7 +558,7 @@ void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
 
 /// SizeOf - Determine size of location data in bytes.
 ///
-unsigned DIELoc::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELoc::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   switch (Form) {
   case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
   case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
@@ -503,7 +571,7 @@ unsigned DIELoc::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
 }
 
 #ifndef NDEBUG
-void DIELoc::print(raw_ostream &O) const {
+void DIELoc::printImpl(raw_ostream &O) const {
   O << "ExprLoc: ";
   DIE::print(O, 5);
 }
@@ -515,7 +583,7 @@ void DIELoc::print(raw_ostream &O) const {
 
 /// ComputeSize - calculate the size of the block.
 ///
-unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const {
+unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
   if (!Size) {
     const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
     for (unsigned i = 0, N = Values.size(); i < N; ++i)
@@ -527,7 +595,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const {
 
 /// EmitValue - Emit block data.
 ///
-void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIEBlock::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
   switch (Form) {
   default: llvm_unreachable("Improper form for block");
   case dwarf::DW_FORM_block1: Asm->EmitInt8(Size);    break;
@@ -543,7 +611,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
 
 /// SizeOf - Determine size of block data in bytes.
 ///
-unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEBlock::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   switch (Form) {
   case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
   case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
@@ -554,7 +622,7 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
 }
 
 #ifndef NDEBUG
-void DIEBlock::print(raw_ostream &O) const {
+void DIEBlock::printImpl(raw_ostream &O) const {
   O << "Blk: ";
   DIE::print(O, 5);
 }
@@ -564,7 +632,7 @@ void DIEBlock::print(raw_ostream &O) const {
 // DIELocList Implementation
 //===----------------------------------------------------------------------===//
 
-unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELocList::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   if (Form == dwarf::DW_FORM_data4)
     return 4;
   if (Form == dwarf::DW_FORM_sec_offset)
@@ -574,18 +642,18 @@ unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
 
 /// EmitValue - Emit label value.
 ///
-void DIELocList::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIELocList::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   DwarfDebug *DD = AP->getDwarfDebug();
-  MCSymbol *Label = DD->getDebugLocEntries()[Index].Label;
+  MCSymbol *Label = DD->getDebugLocs().getList(Index).Label;
 
   if (AP->MAI->doesDwarfUseRelocationsAcrossSections() && !DD->useSplitDwarf())
-    AP->EmitSectionOffset(Label, DD->getDebugLocSym());
+    AP->emitSectionOffset(Label);
   else
-    AP->EmitLabelDifference(Label, DD->getDebugLocSym(), 4);
+    AP->EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
 }
 
 #ifndef NDEBUG
-void DIELocList::print(raw_ostream &O) const {
+void DIELocList::printImpl(raw_ostream &O) const {
   O << "LocList: " << Index;
 
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 1e2ba2c..a2e5aad 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -285,8 +285,8 @@ void DIEHash::hashBlockData(const SmallVectorImpl<DIEValue *> &Values) {
 void DIEHash::hashLocList(const DIELocList &LocList) {
   HashingByteStreamer Streamer(*this);
   DwarfDebug &DD = *AP->getDwarfDebug();
-  for (const auto &Entry :
-       DD.getDebugLocEntries()[LocList.getValue()].List)
+  const DebugLocStream &Locs = DD.getDebugLocs();
+  for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue())))
     DD.emitDebugLocEntry(Streamer, Entry);
 }
 
@@ -510,7 +510,7 @@ uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) {
   // ... take the least significant 8 bytes and return those. Our MD5
   // implementation always returns its results in little endian, swap bytes
   // appropriately.
-  return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+  return support::endian::read64le(Result + 8);
 }
 
 /// This is based on the type signature computation given in section 7.27 of the
@@ -531,7 +531,7 @@ uint64_t DIEHash::computeCUSignature(const DIE &Die) {
   // ... take the least significant 8 bytes and return those. Our MD5
   // implementation always returns its results in little endian, swap bytes
   // appropriately.
-  return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+  return support::endian::read64le(Result + 8);
 }
 
 /// This is based on the type signature computation given in section 7.27 of the
@@ -555,5 +555,5 @@ uint64_t DIEHash::computeTypeSignature(const DIE &Die) {
   // ... take the least significant 8 bytes and return those. Our MD5
   // implementation always returns its results in little endian, swap bytes
   // appropriately.
-  return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+  return support::endian::read64le(Result + 8);
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 0c2a5e5..3c46a99 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 #include <map>
@@ -32,7 +33,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) {
   return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
 }
 
-void DbgValueHistoryMap::startInstrRange(const MDNode *Var,
+void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,
                                          const MachineInstr &MI) {
   // Instruction range should start with a DBG_VALUE instruction for the
   // variable.
@@ -47,7 +48,7 @@ void DbgValueHistoryMap::startInstrRange(const MDNode *Var,
   Ranges.push_back(std::make_pair(&MI, nullptr));
 }
 
-void DbgValueHistoryMap::endInstrRange(const MDNode *Var,
+void DbgValueHistoryMap::endInstrRange(InlinedVariable Var,
                                        const MachineInstr &MI) {
   auto &Ranges = VarInstrRanges[Var];
   // Verify that the current instruction range is not yet closed.
@@ -58,7 +59,7 @@ void DbgValueHistoryMap::endInstrRange(const MDNode *Var,
   Ranges.back().second = &MI;
 }
 
-unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const {
+unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const {
   const auto &I = VarInstrRanges.find(Var);
   if (I == VarInstrRanges.end())
     return 0;
@@ -70,12 +71,13 @@ unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const {
 
 namespace {
 // Maps physreg numbers to the variables they describe.
-typedef std::map<unsigned, SmallVector<const MDNode *, 1>> RegDescribedVarsMap;
+typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+typedef std::map<unsigned, SmallVector<InlinedVariable, 1>> RegDescribedVarsMap;
 }
 
 // \brief Claim that @Var is not described by @RegNo anymore.
-static void dropRegDescribedVar(RegDescribedVarsMap &RegVars,
-                                unsigned RegNo, const MDNode *Var) {
+static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
+                                InlinedVariable Var) {
   const auto &I = RegVars.find(RegNo);
   assert(RegNo != 0U && I != RegVars.end());
   auto &VarSet = I->second;
@@ -88,8 +90,8 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars,
 }
 
 // \brief Claim that @Var is now described by @RegNo.
-static void addRegDescribedVar(RegDescribedVarsMap &RegVars,
-                               unsigned RegNo, const MDNode *Var) {
+static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
+                               InlinedVariable Var) {
   assert(RegNo != 0U);
   auto &VarSet = RegVars[RegNo];
   assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end());
@@ -202,7 +204,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
       // Use the base variable (without any DW_OP_piece expressions)
       // as index into History. The full variables including the
       // piece expressions are attached to the MI.
-      DIVariable Var = MI.getDebugVariable();
+      const DILocalVariable *RawVar = MI.getDebugVariable();
+      assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+             "Expected inlined-at fields to agree");
+      InlinedVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt());
 
       if (unsigned PrevReg = Result.getRegisterForVar(Var))
         dropRegDescribedVar(RegVars, PrevReg, Var);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
index 4b62007..546d1b4 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -17,7 +17,8 @@ namespace llvm {
 
 class MachineFunction;
 class MachineInstr;
-class MDNode;
+class DILocalVariable;
+class DILocation;
 class TargetRegisterInfo;
 
 // For each user variable, keep a list of instruction ranges where this variable
@@ -31,16 +32,19 @@ class DbgValueHistoryMap {
 public:
   typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange;
   typedef SmallVector<InstrRange, 4> InstrRanges;
-  typedef MapVector<const MDNode *, InstrRanges> InstrRangesMap;
+  typedef std::pair<const DILocalVariable *, const DILocation *>
+      InlinedVariable;
+  typedef MapVector<InlinedVariable, InstrRanges> InstrRangesMap;
+
 private:
   InstrRangesMap VarInstrRanges;
 
 public:
-  void startInstrRange(const MDNode *Var, const MachineInstr &MI);
-  void endInstrRange(const MDNode *Var, const MachineInstr &MI);
+  void startInstrRange(InlinedVariable Var, const MachineInstr &MI);
+  void endInstrRange(InlinedVariable Var, const MachineInstr &MI);
   // Returns register currently describing @Var. If @Var is currently
   // unaccessible or is not described by a register, returns 0.
-  unsigned getRegisterForVar(const MDNode *Var) const;
+  unsigned getRegisterForVar(InlinedVariable Var) const;
 
   bool empty() const { return VarInstrRanges.empty(); }
   void clear() { VarInstrRanges.clear(); }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index b4fcada..6a943c6 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -9,52 +9,51 @@
 
 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
+#include "llvm/ADT/SmallString.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MachineLocation.h"
 
 namespace llvm {
-class MDNode;
+class AsmPrinter;
+class DebugLocStream;
+
 /// \brief This struct describes location entries emitted in the .debug_loc
 /// section.
 class DebugLocEntry {
-  // Begin and end symbols for the address range that this location is valid.
+  /// Begin and end symbols for the address range that this location is valid.
   const MCSymbol *Begin;
   const MCSymbol *End;
 
 public:
-  /// A single location or constant.
+  /// \brief A single location or constant.
   struct Value {
-    Value(const MDNode *Var, const MDNode *Expr, int64_t i)
-        : Variable(Var), Expression(Expr), EntryKind(E_Integer) {
+    Value(const DIExpression *Expr, int64_t i)
+        : Expression(Expr), EntryKind(E_Integer) {
       Constant.Int = i;
     }
-    Value(const MDNode *Var, const MDNode *Expr, const ConstantFP *CFP)
-        : Variable(Var), Expression(Expr), EntryKind(E_ConstantFP) {
+    Value(const DIExpression *Expr, const ConstantFP *CFP)
+        : Expression(Expr), EntryKind(E_ConstantFP) {
       Constant.CFP = CFP;
     }
-    Value(const MDNode *Var, const MDNode *Expr, const ConstantInt *CIP)
-        : Variable(Var), Expression(Expr), EntryKind(E_ConstantInt) {
+    Value(const DIExpression *Expr, const ConstantInt *CIP)
+        : Expression(Expr), EntryKind(E_ConstantInt) {
       Constant.CIP = CIP;
     }
-    Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc)
-        : Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) {
-      assert(DIVariable(Var).Verify());
-      assert(DIExpression(Expr).Verify());
+    Value(const DIExpression *Expr, MachineLocation Loc)
+        : Expression(Expr), EntryKind(E_Location), Loc(Loc) {
+      assert(cast<DIExpression>(Expr)->isValid());
     }
 
-    // The variable to which this location entry corresponds.
-    const MDNode *Variable;
-
-    // Any complex address location expression for this Value.
-    const MDNode *Expression;
+    /// Any complex address location expression for this Value.
+    const DIExpression *Expression;
 
-    // Type of entry that this represents.
+    /// Type of entry that this represents.
     enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
     enum EntryType EntryKind;
 
-    // Either a constant,
+    /// Either a constant,
     union {
       int64_t Int;
       const ConstantFP *CFP;
@@ -72,10 +71,8 @@ public:
     const ConstantFP *getConstantFP() const { return Constant.CFP; }
     const ConstantInt *getConstantInt() const { return Constant.CIP; }
     MachineLocation getLoc() const { return Loc; }
-    const MDNode *getVariableNode() const { return Variable; }
-    DIVariable getVariable() const { return DIVariable(Variable); }
-    bool isVariablePiece() const { return getExpression().isVariablePiece(); }
-    DIExpression getExpression() const { return DIExpression(Expression); }
+    bool isBitPiece() const { return getExpression()->isBitPiece(); }
+    const DIExpression *getExpression() const { return Expression; }
     friend bool operator==(const Value &, const Value &);
     friend bool operator<(const Value &, const Value &);
   };
@@ -92,17 +89,14 @@ public:
   }
 
   /// \brief If this and Next are describing different pieces of the same
-  // variable, merge them by appending Next's values to the current
-  // list of values.
-  // Return true if the merge was successful.
+  /// variable, merge them by appending Next's values to the current
+  /// list of values.
+  /// Return true if the merge was successful.
   bool MergeValues(const DebugLocEntry &Next) {
     if (Begin == Next.Begin) {
-      DIExpression Expr(Values[0].Expression);
-      DIVariable Var(Values[0].Variable);
-      DIExpression NextExpr(Next.Values[0].Expression);
-      DIVariable NextVar(Next.Values[0].Variable);
-      if (Var == NextVar && Expr.isVariablePiece() &&
-          NextExpr.isVariablePiece()) {
+      auto *Expr = cast_or_null<DIExpression>(Values[0].Expression);
+      auto *NextExpr = cast_or_null<DIExpression>(Next.Values[0].Expression);
+      if (Expr->isBitPiece() && NextExpr->isBitPiece()) {
         addValues(Next.Values);
         End = Next.End;
         return true;
@@ -131,24 +125,28 @@ public:
     Values.append(Vals.begin(), Vals.end());
     sortUniqueValues();
     assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){
-          return V.isVariablePiece();
+          return V.isBitPiece();
         }) && "value must be a piece");
   }
 
-  // Sort the pieces by offset.
+  // \brief Sort the pieces by offset.
   // Remove any duplicate entries by dropping all but the first.
   void sortUniqueValues() {
     std::sort(Values.begin(), Values.end());
-    Values.erase(std::unique(Values.begin(), Values.end(),
-                             [](const Value &A, const Value &B) {
-                   return A.getVariable() == B.getVariable() &&
-                          A.getExpression() == B.getExpression();
-                 }),
-                 Values.end());
+    Values.erase(
+        std::unique(
+            Values.begin(), Values.end(), [](const Value &A, const Value &B) {
+              return A.getExpression() == B.getExpression();
+            }),
+        Values.end());
   }
+
+  /// \brief Lower this entry into a DWARF expression.
+  void finalize(const AsmPrinter &AP, DebugLocStream &Locs,
+                const DIBasicType *BT);
 };
 
-/// Compare two Values for equality.
+/// \brief Compare two Values for equality.
 inline bool operator==(const DebugLocEntry::Value &A,
                        const DebugLocEntry::Value &B) {
   if (A.EntryKind != B.EntryKind)
@@ -157,9 +155,6 @@ inline bool operator==(const DebugLocEntry::Value &A,
   if (A.Expression != B.Expression)
     return false;
 
-  if (A.Variable != B.Variable)
-    return false;
-
   switch (A.EntryKind) {
   case DebugLocEntry::Value::E_Location:
     return A.Loc == B.Loc;
@@ -173,11 +168,11 @@ inline bool operator==(const DebugLocEntry::Value &A,
   llvm_unreachable("unhandled EntryKind");
 }
 
-/// Compare two pieces based on their offset.
+/// \brief Compare two pieces based on their offset.
 inline bool operator<(const DebugLocEntry::Value &A,
                       const DebugLocEntry::Value &B) {
-  return A.getExpression().getPieceOffset() <
-         B.getExpression().getPieceOffset();
+  return A.getExpression()->getBitPieceOffset() <
+         B.getExpression()->getBitPieceOffset();
 }
 
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocList.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocList.h
deleted file mode 100644
index 0f1d2ed..0000000
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocList.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//===--- lib/CodeGen/DebugLocList.h - DWARF debug_loc list ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H
-
-#include "DebugLocEntry.h"
-#include "llvm/ADT/SmallVector.h"
-
-namespace llvm {
-class DwarfCompileUnit;
-class MCSymbol;
-struct DebugLocList {
-  MCSymbol *Label;
-  DwarfCompileUnit *CU;
-  SmallVector<DebugLocEntry, 4> List;
-};
-}
-#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
new file mode 100644
index 0000000..3001da2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -0,0 +1,133 @@
+//===--- lib/CodeGen/DebugLocStream.h - DWARF debug_loc stream --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "ByteStreamer.h"
+
+namespace llvm {
+class DwarfCompileUnit;
+class MCSymbol;
+
+/// \brief Byte stream of .debug_loc entries.
+///
+/// Stores a unified stream of .debug_loc entries.  There's \a List for each
+/// variable/inlined-at pair, and an \a Entry for each \a DebugLocEntry.
+///
+/// FIXME: Do we need all these temp symbols?
+/// FIXME: Why not output directly to the output stream?
+class DebugLocStream {
+public:
+  struct List {
+    DwarfCompileUnit *CU;
+    MCSymbol *Label;
+    size_t EntryOffset;
+    List(DwarfCompileUnit *CU, MCSymbol *Label, size_t EntryOffset)
+        : CU(CU), Label(Label), EntryOffset(EntryOffset) {}
+  };
+  struct Entry {
+    const MCSymbol *BeginSym;
+    const MCSymbol *EndSym;
+    size_t ByteOffset;
+    size_t CommentOffset;
+    Entry(const MCSymbol *BeginSym, const MCSymbol *EndSym, size_t ByteOffset,
+          size_t CommentOffset)
+        : BeginSym(BeginSym), EndSym(EndSym), ByteOffset(ByteOffset),
+          CommentOffset(CommentOffset) {}
+  };
+
+private:
+  SmallVector<List, 4> Lists;
+  SmallVector<Entry, 32> Entries;
+  SmallString<256> DWARFBytes;
+  SmallVector<std::string, 32> Comments;
+
+  /// \brief Only verbose textual output needs comments.  This will be set to
+  /// true for that case, and false otherwise.
+  bool GenerateComments;
+
+public:
+  DebugLocStream(bool GenerateComments) : GenerateComments(GenerateComments) { }
+  size_t getNumLists() const { return Lists.size(); }
+  const List &getList(size_t LI) const { return Lists[LI]; }
+  ArrayRef<List> getLists() const { return Lists; }
+
+  /// \brief Start a new .debug_loc entry list.
+  ///
+  /// Start a new .debug_loc entry list.  Return the new list's index so it can
+  /// be retrieved later via \a getList().
+  ///
+  /// Until the next call, \a startEntry() will add entries to this list.
+  size_t startList(DwarfCompileUnit *CU, MCSymbol *Label) {
+    size_t LI = Lists.size();
+    Lists.emplace_back(CU, Label, Entries.size());
+    return LI;
+  }
+
+  /// \brief Start a new .debug_loc entry.
+  ///
+  /// Until the next call, bytes added to the stream will be added to this
+  /// entry.
+  void startEntry(const MCSymbol *BeginSym, const MCSymbol *EndSym) {
+    Entries.emplace_back(BeginSym, EndSym, DWARFBytes.size(), Comments.size());
+  }
+
+  BufferByteStreamer getStreamer() {
+    return BufferByteStreamer(DWARFBytes, Comments, GenerateComments);
+  }
+
+  ArrayRef<Entry> getEntries(const List &L) const {
+    size_t LI = getIndex(L);
+    return makeArrayRef(Entries)
+        .slice(Lists[LI].EntryOffset, getNumEntries(LI));
+  }
+
+  ArrayRef<char> getBytes(const Entry &E) const {
+    size_t EI = getIndex(E);
+    return makeArrayRef(DWARFBytes.begin(), DWARFBytes.end())
+        .slice(Entries[EI].ByteOffset, getNumBytes(EI));
+  }
+  ArrayRef<std::string> getComments(const Entry &E) const {
+    size_t EI = getIndex(E);
+    return makeArrayRef(Comments)
+        .slice(Entries[EI].CommentOffset, getNumComments(EI));
+  }
+
+private:
+  size_t getIndex(const List &L) const {
+    assert(&Lists.front() <= &L && &L <= &Lists.back() &&
+           "Expected valid list");
+    return &L - &Lists.front();
+  }
+  size_t getIndex(const Entry &E) const {
+    assert(&Entries.front() <= &E && &E <= &Entries.back() &&
+           "Expected valid entry");
+    return &E - &Entries.front();
+  }
+  size_t getNumEntries(size_t LI) const {
+    if (LI + 1 == Lists.size())
+      return Entries.size() - Lists[LI].EntryOffset;
+    return Lists[LI + 1].EntryOffset - Lists[LI].EntryOffset;
+  }
+  size_t getNumBytes(size_t EI) const {
+    if (EI + 1 == Entries.size())
+      return DWARFBytes.size() - Entries[EI].ByteOffset;
+    return Entries[EI + 1].ByteOffset - Entries[EI].ByteOffset;
+  }
+  size_t getNumComments(size_t EI) const {
+    if (EI + 1 == Entries.size())
+      return Comments.size() - Entries[EI].CommentOffset;
+    return Entries[EI + 1].CommentOffset - Entries[EI].CommentOffset;
+  }
+};
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index a71f35e..58b406b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -30,14 +30,14 @@ DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList)
     : Header(8 + (atomList.size() * 4)), HeaderData(atomList),
       Entries(Allocator) {}
 
-void DwarfAccelTable::AddName(StringRef Name, MCSymbol *StrSym, const DIE *die,
+void DwarfAccelTable::AddName(DwarfStringPoolEntryRef Name, const DIE *die,
                               char Flags) {
   assert(Data.empty() && "Already finalized!");
   // If the string is in the list already then add this die to the list
   // otherwise add a new one.
-  DataArray &DIEs = Entries[Name];
-  assert(!DIEs.StrSym || DIEs.StrSym == StrSym);
-  DIEs.StrSym = StrSym;
+  DataArray &DIEs = Entries[Name.getString()];
+  assert(!DIEs.Name || DIEs.Name == Name);
+  DIEs.Name = Name;
   DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags));
 }
 
@@ -54,7 +54,7 @@ void DwarfAccelTable::ComputeBucketCount(void) {
   // Then compute the bucket size, minimum of 1 bucket.
   if (num > 1024)
     Header.bucket_count = num / 4;
-  if (num > 16)
+  else if (num > 16)
     Header.bucket_count = num / 2;
   else
     Header.bucket_count = num > 0 ? num : 1;
@@ -70,6 +70,7 @@ static bool compareDIEs(const DwarfAccelTable::HashDataContents *A,
 
 void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) {
   // Create the individual hash data outputs.
+  Data.reserve(Entries.size());
   for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end();
        EI != EE; ++EI) {
 
@@ -95,33 +96,42 @@ void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) {
   for (size_t i = 0, e = Data.size(); i < e; ++i) {
     uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
     Buckets[bucket].push_back(Data[i]);
-    Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
+    Data[i]->Sym = Asm->createTempSymbol(Prefix);
   }
+
+  // Sort the contents of the buckets by hash value so that hash
+  // collisions end up together. Stable sort makes testing easier and
+  // doesn't cost much more.
+  for (size_t i = 0; i < Buckets.size(); ++i)
+    std::stable_sort(Buckets[i].begin(), Buckets[i].end(),
+                     [] (HashData *LHS, HashData *RHS) {
+                       return LHS->HashValue < RHS->HashValue;
+                     });
 }
 
 // Emits the header for the table via the AsmPrinter.
 void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
-  Asm->OutStreamer.AddComment("Header Magic");
+  Asm->OutStreamer->AddComment("Header Magic");
   Asm->EmitInt32(Header.magic);
-  Asm->OutStreamer.AddComment("Header Version");
+  Asm->OutStreamer->AddComment("Header Version");
   Asm->EmitInt16(Header.version);
-  Asm->OutStreamer.AddComment("Header Hash Function");
+  Asm->OutStreamer->AddComment("Header Hash Function");
   Asm->EmitInt16(Header.hash_function);
-  Asm->OutStreamer.AddComment("Header Bucket Count");
+  Asm->OutStreamer->AddComment("Header Bucket Count");
   Asm->EmitInt32(Header.bucket_count);
-  Asm->OutStreamer.AddComment("Header Hash Count");
+  Asm->OutStreamer->AddComment("Header Hash Count");
   Asm->EmitInt32(Header.hashes_count);
-  Asm->OutStreamer.AddComment("Header Data Length");
+  Asm->OutStreamer->AddComment("Header Data Length");
   Asm->EmitInt32(Header.header_data_len);
-  Asm->OutStreamer.AddComment("HeaderData Die Offset Base");
+  Asm->OutStreamer->AddComment("HeaderData Die Offset Base");
   Asm->EmitInt32(HeaderData.die_offset_base);
-  Asm->OutStreamer.AddComment("HeaderData Atom Count");
+  Asm->OutStreamer->AddComment("HeaderData Atom Count");
   Asm->EmitInt32(HeaderData.Atoms.size());
   for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
     Atom A = HeaderData.Atoms[i];
-    Asm->OutStreamer.AddComment(dwarf::AtomTypeString(A.type));
+    Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.type));
     Asm->EmitInt16(A.type);
-    Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form));
+    Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.form));
     Asm->EmitInt16(A.form);
   }
 }
@@ -131,24 +141,37 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
 void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
   unsigned index = 0;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
-    Asm->OutStreamer.AddComment("Bucket " + Twine(i));
+    Asm->OutStreamer->AddComment("Bucket " + Twine(i));
     if (Buckets[i].size() != 0)
       Asm->EmitInt32(index);
     else
       Asm->EmitInt32(UINT32_MAX);
-    index += Buckets[i].size();
+    // Buckets point in the list of hashes, not to the data. Do not
+    // increment the index multiple times in case of hash collisions.
+    uint64_t PrevHash = UINT64_MAX;
+    for (auto *HD : Buckets[i]) {
+      uint32_t HashValue = HD->HashValue;
+      if (PrevHash != HashValue)
+        ++index;
+      PrevHash = HashValue;
+    }
   }
 }
 
 // Walk through the buckets and emit the individual hashes for each
 // bucket.
 void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+  uint64_t PrevHash = UINT64_MAX;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI) {
-      Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
-      Asm->EmitInt32((*HI)->HashValue);
+      uint32_t HashValue = (*HI)->HashValue;
+      if (PrevHash == HashValue)
+        continue;
+      Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(i));
+      Asm->EmitInt32(HashValue);
+      PrevHash = HashValue;
     }
   }
 }
@@ -157,17 +180,22 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
 // element in each bucket. This is done via a symbol subtraction from the
 // beginning of the section. The non-section symbol will be output later
 // when we emit the actual data.
-void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) {
+  uint64_t PrevHash = UINT64_MAX;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI) {
-      Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i));
-      MCContext &Context = Asm->OutStreamer.getContext();
+      uint32_t HashValue = (*HI)->HashValue;
+      if (PrevHash == HashValue)
+        continue;
+      PrevHash = HashValue;
+      Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i));
+      MCContext &Context = Asm->OutStreamer->getContext();
       const MCExpr *Sub = MCBinaryExpr::CreateSub(
           MCSymbolRefExpr::Create((*HI)->Sym, Context),
           MCSymbolRefExpr::Create(SecBegin, Context), Context);
-      Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t));
+      Asm->OutStreamer->EmitValue(Sub, sizeof(uint32_t));
     }
   }
 }
@@ -175,18 +203,21 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
 // Walk through the buckets and emit the full data for each element in
 // the bucket. For the string case emit the dies and the various offsets.
 // Terminate each HashData bucket with 0.
-void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D,
-                               MCSymbol *StrSym) {
-  uint64_t PrevHash = UINT64_MAX;
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+    uint64_t PrevHash = UINT64_MAX;
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI) {
+      // Terminate the previous entry if there is no hash collision
+      // with the current one.
+      if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue)
+        Asm->EmitInt32(0);
       // Remember to emit the label for our offset.
-      Asm->OutStreamer.EmitLabel((*HI)->Sym);
-      Asm->OutStreamer.AddComment((*HI)->Str);
-      Asm->EmitSectionOffset((*HI)->Data.StrSym, StrSym);
-      Asm->OutStreamer.AddComment("Num DIEs");
+      Asm->OutStreamer->EmitLabel((*HI)->Sym);
+      Asm->OutStreamer->AddComment((*HI)->Str);
+      Asm->emitDwarfStringOffset((*HI)->Data.Name);
+      Asm->OutStreamer->AddComment("Num DIEs");
       Asm->EmitInt32((*HI)->Data.Values.size());
       for (HashDataContents *HD : (*HI)->Data.Values) {
         // Emit the DIE offset
@@ -200,17 +231,17 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D,
           Asm->EmitInt8(HD->Flags);
         }
       }
-      // Emit a 0 to terminate the data unless we have a hash collision.
-      if (PrevHash != (*HI)->HashValue)
-        Asm->EmitInt32(0);
       PrevHash = (*HI)->HashValue;
     }
+    // Emit the final end marker for the bucket.
+    if (!Buckets[i].empty())
+      Asm->EmitInt32(0);
   }
 }
 
 // Emit the entire data structure to the output file.
-void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D,
-                           MCSymbol *StrSym) {
+void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin,
+                           DwarfDebug *D) {
   // Emit the header.
   EmitHeader(Asm);
 
@@ -221,10 +252,10 @@ void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D,
   EmitHashes(Asm);
 
   // Emit the offsets.
-  EmitOffsets(Asm, SecBegin);
+  emitOffsets(Asm, SecBegin);
 
   // Emit the hash data.
-  EmitData(Asm, D, StrSym);
+  EmitData(Asm, D);
 }
 
 #ifndef NDEBUG
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 4a6085b..4d81441 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -181,9 +181,8 @@ public:
 private:
   // String Data
   struct DataArray {
-    MCSymbol *StrSym;
+    DwarfStringPoolEntryRef Name;
     std::vector<HashDataContents *> Values;
-    DataArray() : StrSym(nullptr) {}
   };
   friend struct HashData;
   struct HashData {
@@ -201,7 +200,7 @@ private:
       O << "  Hash Value: " << format("0x%x", HashValue) << "\n";
       O << "  Symbol: ";
       if (Sym)
-        Sym->print(O);
+        O << *Sym;
       else
         O << "<none>";
       O << "\n";
@@ -215,15 +214,15 @@ private:
 #endif
   };
 
-  DwarfAccelTable(const DwarfAccelTable &) LLVM_DELETED_FUNCTION;
-  void operator=(const DwarfAccelTable &) LLVM_DELETED_FUNCTION;
+  DwarfAccelTable(const DwarfAccelTable &) = delete;
+  void operator=(const DwarfAccelTable &) = delete;
 
   // Internal Functions
   void EmitHeader(AsmPrinter *);
   void EmitBuckets(AsmPrinter *);
   void EmitHashes(AsmPrinter *);
-  void EmitOffsets(AsmPrinter *, MCSymbol *);
-  void EmitData(AsmPrinter *, DwarfDebug *D, MCSymbol *StrSym);
+  void emitOffsets(AsmPrinter *, const MCSymbol *);
+  void EmitData(AsmPrinter *, DwarfDebug *D);
 
   // Allocator for HashData and HashDataContents.
   BumpPtrAllocator Allocator;
@@ -245,10 +244,9 @@ private:
   // Public Implementation
 public:
   DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
-  void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die,
-               char Flags = 0);
+  void AddName(DwarfStringPoolEntryRef Name, const DIE *Die, char Flags = 0);
   void FinalizeTable(AsmPrinter *, StringRef);
-  void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *, MCSymbol *StrSym);
+  void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *);
 #ifndef NDEBUG
   void print(raw_ostream &O);
   void dump() { print(dbgs()); }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 0dc52da..0bc873e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -39,9 +39,24 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
+DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A)
+    : EHStreamer(A), shouldEmitCFI(false) {}
+
+void DwarfCFIExceptionBase::markFunctionEnd() {
+  if (shouldEmitCFI)
+    Asm->OutStreamer->EmitCFIEndProc();
+
+  if (MMI->getLandingPads().empty())
+    return;
+
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
+}
+
 DwarfCFIException::DwarfCFIException(AsmPrinter *A)
-  : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false),
-    shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {}
+    : DwarfCFIExceptionBase(A), shouldEmitPersonality(false),
+      shouldEmitLSDA(false), shouldEmitMoves(false),
+      moveTypeModule(AsmPrinter::CFI_M_None) {}
 
 DwarfCFIException::~DwarfCFIException() {}
 
@@ -49,9 +64,10 @@ DwarfCFIException::~DwarfCFIException() {}
 /// content.
 void DwarfCFIException::endModule() {
   if (moveTypeModule == AsmPrinter::CFI_M_Debug)
-    Asm->OutStreamer.EmitCFISections(false, true);
+    Asm->OutStreamer->EmitCFISections(false, true);
 
-  if (!Asm->MAI->usesItaniumLSDAForExceptions())
+  // SjLj uses this pass and it doesn't need this info.
+  if (!Asm->MAI->usesCFIForEH())
     return;
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
@@ -67,12 +83,10 @@ void DwarfCFIException::endModule() {
     if (!Personalities[i])
       continue;
     MCSymbol *Sym = Asm->getSymbol(Personalities[i]);
-    TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym);
+    TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->TM, Sym);
   }
 }
 
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
 void DwarfCFIException::beginFunction(const MachineFunction *MF) {
   shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
 
@@ -90,7 +104,7 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
-  const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+  const Function *Per = MMI->getPersonality();
 
   shouldEmitPersonality = hasLandingPads &&
     PerEncoding != dwarf::DW_EH_PE_omit && Per;
@@ -99,10 +113,11 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
   shouldEmitLSDA = shouldEmitPersonality &&
     LSDAEncoding != dwarf::DW_EH_PE_omit;
 
-  if (!shouldEmitPersonality && !shouldEmitMoves)
+  shouldEmitCFI = shouldEmitPersonality || shouldEmitMoves;
+  if (!shouldEmitCFI)
     return;
 
-  Asm->OutStreamer.EmitCFIStartProc(/*IsSimple=*/false);
+  Asm->OutStreamer->EmitCFIStartProc(/*IsSimple=*/false);
 
   // Indicate personality routine, if any.
   if (!shouldEmitPersonality)
@@ -110,45 +125,20 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
 
   const MCSymbol *Sym =
       TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
-  Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
-
-  MCSymbol *EHBegin =
-      Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
-  if (Asm->MAI->useAssignmentForEHBegin()) {
-    MCContext &Ctx = Asm->OutContext;
-    MCSymbol *CurPos = Ctx.CreateTempSymbol();
-    Asm->OutStreamer.EmitLabel(CurPos);
-    Asm->OutStreamer.EmitAssignment(EHBegin,
-                                    MCSymbolRefExpr::Create(CurPos, Ctx));
-  } else {
-    Asm->OutStreamer.EmitLabel(EHBegin);
-  }
+  Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding);
 
   // Provide LSDA information.
   if (!shouldEmitLSDA)
     return;
 
-  Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
-                                                  Asm->getFunctionNumber()),
-                               LSDAEncoding);
+  Asm->OutStreamer->EmitCFILsda(Asm->getCurExceptionSym(), LSDAEncoding);
 }
 
 /// endFunction - Gather and emit post-function exception information.
 ///
 void DwarfCFIException::endFunction(const MachineFunction *) {
-  if (!shouldEmitPersonality && !shouldEmitMoves)
-    return;
-
-  Asm->OutStreamer.EmitCFIEndProc();
-
   if (!shouldEmitPersonality)
     return;
 
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
-                                                Asm->getFunctionNumber()));
-
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
-
   emitExceptionTable();
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index b4dba9c..c10e703 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1,6 +1,7 @@
 #include "DwarfCompileUnit.h"
 #include "DwarfExpression.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -15,11 +16,11 @@
 
 namespace llvm {
 
-DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node,
+DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
                                    AsmPrinter *A, DwarfDebug *DW,
                                    DwarfFile *DWU)
     : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU),
-      Skeleton(nullptr), LabelBegin(nullptr), BaseAddress(nullptr) {
+      Skeleton(nullptr), BaseAddress(nullptr) {
   insertDIE(Node, &getUnitDie());
 }
 
@@ -63,9 +64,9 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName,
 
   // FIXME: add a better feature test than hasRawTextSupport. Even better,
   // extend .file to support this.
-  return Asm->OutStreamer.EmitDwarfFileDirective(
+  return Asm->OutStreamer->EmitDwarfFileDirective(
       0, DirName, FileName,
-      Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID());
+      Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID());
 }
 
 // Return const expression if value is a GEP to access merged global
@@ -96,56 +97,57 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
 }
 
 /// getOrCreateGlobalVariableDIE - get or create global variable DIE.
-DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
+DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
+    const DIGlobalVariable *GV) {
   // Check for pre-existence.
   if (DIE *Die = getDIE(GV))
     return Die;
 
-  assert(GV.isGlobalVariable());
+  assert(GV);
 
-  DIScope GVContext = GV.getContext();
-  DIType GTy = DD->resolve(GV.getType());
+  auto *GVContext = GV->getScope();
+  auto *GTy = DD->resolve(GV->getType());
 
   // Construct the context before querying for the existence of the DIE in
   // case such construction creates the DIE.
   DIE *ContextDIE = getOrCreateContextDIE(GVContext);
 
   // Add to map.
-  DIE *VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV);
-  DIScope DeclContext;
-
-  if (DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration()) {
-    DeclContext = resolve(SDMDecl.getContext());
-    assert(SDMDecl.isStaticMember() && "Expected static member decl");
-    assert(GV.isDefinition());
+  DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV);
+  DIScope *DeclContext;
+  if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) {
+    DeclContext = resolve(SDMDecl->getScope());
+    assert(SDMDecl->isStaticMember() && "Expected static member decl");
+    assert(GV->isDefinition());
     // We need the declaration DIE that is in the static member's class.
     DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
     addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
   } else {
-    DeclContext = GV.getContext();
+    DeclContext = GV->getScope();
     // Add name and type.
-    addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
+    addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
     addType(*VariableDIE, GTy);
 
     // Add scoping info.
-    if (!GV.isLocalToUnit())
+    if (!GV->isLocalToUnit())
       addFlag(*VariableDIE, dwarf::DW_AT_external);
 
     // Add line number info.
     addSourceLine(*VariableDIE, GV);
   }
 
-  if (!GV.isDefinition())
+  if (!GV->isDefinition())
     addFlag(*VariableDIE, dwarf::DW_AT_declaration);
+  else
+    addGlobalName(GV->getName(), *VariableDIE, DeclContext);
 
   // Add location.
   bool addToAccelTable = false;
-  bool isGlobalVariable = GV.getGlobal() != nullptr;
-  if (isGlobalVariable) {
+  if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) {
     addToAccelTable = true;
     DIELoc *Loc = new (DIEValueAllocator) DIELoc();
-    const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal());
-    if (GV.getGlobal()->isThreadLocal()) {
+    const MCSymbol *Sym = Asm->getSymbol(Global);
+    if (Global->isThreadLocal()) {
       // FIXME: Make this work with -gsplit-dwarf.
       unsigned PointerSize = Asm->getDataLayout().getPointerSize();
       assert((PointerSize == 4 || PointerSize == 8) &&
@@ -164,28 +166,21 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
         addUInt(*Loc, dwarf::DW_FORM_udata,
                 DD->getAddressPool().getIndex(Sym, /* TLS */ true));
       }
-      // 3) followed by a custom OP to make the debugger do a TLS lookup.
-      addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address);
+      // 3) followed by an OP to make the debugger do a TLS lookup.
+      addUInt(*Loc, dwarf::DW_FORM_data1,
+              DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+                                    : dwarf::DW_OP_form_tls_address);
     } else {
       DD->addArangeLabel(SymbolCU(this, Sym));
       addOpAddress(*Loc, Sym);
     }
 
     addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
-    // Add the linkage name.
-    StringRef LinkageName = GV.getLinkageName();
-    if (!LinkageName.empty())
-      // From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
-      // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
-      // TAG_variable.
-      addString(*VariableDIE,
-                DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
-                                           : dwarf::DW_AT_MIPS_linkage_name,
-                GlobalValue::getRealLinkageName(LinkageName));
+    addLinkageName(*VariableDIE, GV->getLinkageName());
   } else if (const ConstantInt *CI =
-                 dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
+                 dyn_cast_or_null<ConstantInt>(GV->getVariable())) {
     addConstantValue(*VariableDIE, CI, GTy);
-  } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV.getConstant())) {
+  } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) {
     addToAccelTable = true;
     // GV is a merged global.
     DIELoc *Loc = new (DIEValueAllocator) DIELoc();
@@ -202,15 +197,14 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
   }
 
   if (addToAccelTable) {
-    DD->addAccelName(GV.getName(), *VariableDIE);
+    DD->addAccelName(GV->getName(), *VariableDIE);
 
     // If the linkage name is different than the name, go ahead and output
     // that as well into the name table.
-    if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
-      DD->addAccelName(GV.getLinkageName(), *VariableDIE);
+    if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName())
+      DD->addAccelName(GV->getLinkageName(), *VariableDIE);
   }
 
-  addGlobalName(GV.getName(), *VariableDIE, DeclContext);
   return VariableDIE;
 }
 
@@ -243,10 +237,10 @@ void DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
     addSectionDelta(Die, Attribute, Label, Sec);
 }
 
-void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
+void DwarfCompileUnit::initStmtList() {
   // Define start line table label for each Compile Unit.
   MCSymbol *LineTableStartSym =
-      Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID());
+      Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID());
 
   stmtListIndex = UnitDie.getValues().size();
 
@@ -255,8 +249,9 @@ void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
   // left in the skeleton CU and so not included.
   // The line table entries are not always emitted in assembly, so it
   // is not okay to use line_table_start here.
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
-                  DwarfLineSectionSym);
+                  TLOF.getDwarfLineSection()->getBeginSymbol());
 }
 
 void DwarfCompileUnit::applyStmtList(DIE &D) {
@@ -282,18 +277,17 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
 // Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
 // and DW_AT_high_pc attributes. If there are global variables in this
 // scope then create and insert DIEs for these variables.
-DIE &DwarfCompileUnit::updateSubprogramScopeDIE(DISubprogram SP) {
+DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
   DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
 
-  attachLowHighPC(*SPDie, DD->getFunctionBeginSym(), DD->getFunctionEndSym());
+  attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd());
   if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim(
           *DD->getCurrentFunction()))
     addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr);
 
   // Only include DW_AT_frame_base in full debug info
   if (!includeMinimalInlineScopes()) {
-    const TargetRegisterInfo *RI =
-        Asm->TM.getSubtargetImpl()->getRegisterInfo();
+    const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo();
     MachineLocation Location(RI->getFrameRegister(*Asm->MF));
     if (RI->isPhysicalRegister(Location.getReg()))
       addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
@@ -312,9 +306,9 @@ void DwarfCompileUnit::constructScopeDIE(
   if (!Scope || !Scope->getScopeNode())
     return;
 
-  DIScope DS(Scope->getScopeNode());
+  auto *DS = Scope->getScopeNode();
 
-  assert((Scope->getInlinedAt() || !DS.isSubprogram()) &&
+  assert((Scope->getInlinedAt() || !isa<DISubprogram>(DS)) &&
          "Only handle inlined subprograms here, use "
          "constructSubprogramScopeDIE for non-inlined "
          "subprograms");
@@ -325,7 +319,7 @@ void DwarfCompileUnit::constructScopeDIE(
   // avoid creating un-used children then removing them later when we find out
   // the scope DIE is null.
   std::unique_ptr<DIE> ScopeDIE;
-  if (Scope->getParent() && DS.isSubprogram()) {
+  if (Scope->getParent() && isa<DISubprogram>(DS)) {
     ScopeDIE = constructInlinedScopeDIE(Scope);
     if (!ScopeDIE)
       return;
@@ -347,7 +341,7 @@ void DwarfCompileUnit::constructScopeDIE(
       // There is no need to emit empty lexical block DIE.
       for (const auto &E : DD->findImportedEntitiesForScope(DS))
         Children.push_back(
-            constructImportedEntityDIE(DIImportedEntity(E.second)));
+            constructImportedEntityDIE(cast<DIImportedEntity>(E.second)));
     }
 
     // If there are only other scopes as children, put them directly in the
@@ -379,13 +373,14 @@ void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
 
 void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
                                          SmallVector<RangeSpan, 2> Range) {
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
   // Emit offset in .debug_range as a relocatable label. emitDIE will handle
   // emitting it appropriately.
-  auto *RangeSectionSym = DD->getRangeSectionSym();
+  const MCSymbol *RangeSectionSym =
+      TLOF.getDwarfRangesSection()->getBeginSymbol();
 
-  RangeSpanList List(
-      Asm->GetTempSymbol("debug_ranges", DD->getNextRangeNumber()),
-      std::move(Range));
+  RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range));
 
   // Under fission, ranges are specified by constant offsets relative to the
   // CU's DW_AT_GNU_ranges_base.
@@ -424,8 +419,8 @@ void DwarfCompileUnit::attachRangesOrLowHighPC(
 std::unique_ptr<DIE>
 DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
   assert(Scope->getScopeNode());
-  DIScope DS(Scope->getScopeNode());
-  DISubprogram InlinedSP = getDISubprogram(DS);
+  auto *DS = Scope->getScopeNode();
+  auto *InlinedSP = getDISubprogram(DS);
   // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
   // was inlined from another compile unit.
   DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP];
@@ -437,10 +432,10 @@ DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
   attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
 
   // Add the call site information to the DIE.
-  DILocation DL(Scope->getInlinedAt());
+  const DILocation *IA = Scope->getInlinedAt();
   addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
-          getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
-  addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber());
+          getOrCreateSourceID(IA->getFilename(), IA->getDirectory()));
+  addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
 
   // Add name to the name table, we do this here because we're guaranteed
   // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
@@ -486,7 +481,7 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
 
   // Add variable address.
 
-  unsigned Offset = DV.getDotDebugLocOffset();
+  unsigned Offset = DV.getDebugLocListIndex();
   if (Offset != ~0U) {
     addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
     return VariableDie;
@@ -516,15 +511,23 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
   }
 
   // .. else use frame index.
-  int FI = DV.getFrameIndex();
-  if (FI != ~0) {
+  if (DV.getFrameIndex().back() == ~0)
+    return VariableDie;
+
+  auto Expr = DV.getExpression().begin();
+  DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+  DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+  for (auto FI : DV.getFrameIndex()) {
     unsigned FrameReg = 0;
-    const TargetFrameLowering *TFI =
-        Asm->TM.getSubtargetImpl()->getFrameLowering();
+    const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
     int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
-    MachineLocation Location(FrameReg, Offset);
-    addVariableAddress(DV, *VariableDie, Location);
+    assert(Expr != DV.getExpression().end() &&
+           "Wrong number of expressions");
+    DwarfExpr.AddMachineRegIndirect(FrameReg, Offset);
+    DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end());
+    ++Expr;
   }
+  addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
 
   return VariableDie;
 }
@@ -560,16 +563,14 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
   assert(Scope && Scope->getScopeNode());
   assert(!Scope->getInlinedAt());
   assert(!Scope->isAbstractScope());
-  DISubprogram Sub(Scope->getScopeNode());
-
-  assert(Sub.isSubprogram());
+  auto *Sub = cast<DISubprogram>(Scope->getScopeNode());
 
   DD->getProcessedSPNodes().insert(Sub);
 
   DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
 
   // If this is a variadic function, add an unspecified parameter.
-  DITypeArray FnArgs = Sub.getType().getTypeArray();
+  DITypeRefArray FnArgs = Sub->getType()->getTypeArray();
 
   // Collect lexical scope children first.
   // ObjectPointer might be a local (non-argument) local variable if it's a
@@ -580,8 +581,7 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
   // If we have a single element of null, it is a function that returns void.
   // If we have more than one elements and the last one is null, it is a
   // variadic function.
-  if (FnArgs.getNumElements() > 1 &&
-      !FnArgs.getElement(FnArgs.getNumElements() - 1) &&
+  if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] &&
       !includeMinimalInlineScopes())
     ScopeDIE.addChild(make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters));
 }
@@ -605,26 +605,25 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
   if (AbsDef)
     return;
 
-  DISubprogram SP(Scope->getScopeNode());
+  auto *SP = cast<DISubprogram>(Scope->getScopeNode());
 
   DIE *ContextDIE;
 
   if (includeMinimalInlineScopes())
     ContextDIE = &getUnitDie();
   // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with
-  // the important distinction that the DIDescriptor is not associated with the
-  // DIE (since the DIDescriptor will be associated with the concrete DIE, if
+  // the important distinction that the debug node is not associated with the
+  // DIE (since the debug node will be associated with the concrete DIE, if
   // any). It could be refactored to some common utility function.
-  else if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+  else if (auto *SPDecl = SP->getDeclaration()) {
     ContextDIE = &getUnitDie();
     getOrCreateSubprogramDIE(SPDecl);
   } else
-    ContextDIE = getOrCreateContextDIE(resolve(SP.getContext()));
+    ContextDIE = getOrCreateContextDIE(resolve(SP->getScope()));
 
-  // Passing null as the associated DIDescriptor because the abstract definition
+  // Passing null as the associated node because the abstract definition
   // shouldn't be found by lookup.
-  AbsDef =
-      &createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, DIDescriptor());
+  AbsDef = &createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr);
   applySubprogramAttributesToDefinition(SP, *AbsDef);
 
   if (!includeMinimalInlineScopes())
@@ -634,36 +633,33 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
 }
 
 std::unique_ptr<DIE>
-DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity &Module) {
-  assert(Module.Verify() &&
-         "Use one of the MDNode * overloads to handle invalid metadata");
-  std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module.getTag());
+DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity *Module) {
+  std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module->getTag());
   insertDIE(Module, IMDie.get());
   DIE *EntityDie;
-  DIDescriptor Entity = resolve(Module.getEntity());
-  if (Entity.isNameSpace())
-    EntityDie = getOrCreateNameSpace(DINameSpace(Entity));
-  else if (Entity.isSubprogram())
-    EntityDie = getOrCreateSubprogramDIE(DISubprogram(Entity));
-  else if (Entity.isType())
-    EntityDie = getOrCreateTypeDIE(DIType(Entity));
-  else if (Entity.isGlobalVariable())
-    EntityDie = getOrCreateGlobalVariableDIE(DIGlobalVariable(Entity));
+  auto *Entity = resolve(Module->getEntity());
+  if (auto *NS = dyn_cast<DINamespace>(Entity))
+    EntityDie = getOrCreateNameSpace(NS);
+  else if (auto *SP = dyn_cast<DISubprogram>(Entity))
+    EntityDie = getOrCreateSubprogramDIE(SP);
+  else if (auto *T = dyn_cast<DIType>(Entity))
+    EntityDie = getOrCreateTypeDIE(T);
+  else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity))
+    EntityDie = getOrCreateGlobalVariableDIE(GV);
   else
     EntityDie = getDIE(Entity);
   assert(EntityDie);
-  addSourceLine(*IMDie, Module.getLineNumber(),
-                Module.getContext().getFilename(),
-                Module.getContext().getDirectory());
+  addSourceLine(*IMDie, Module->getLine(), Module->getScope()->getFilename(),
+                Module->getScope()->getDirectory());
   addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
-  StringRef Name = Module.getName();
+  StringRef Name = Module->getName();
   if (!Name.empty())
     addString(*IMDie, dwarf::DW_AT_name, Name);
 
   return IMDie;
 }
 
-void DwarfCompileUnit::finishSubprogramDefinition(DISubprogram SP) {
+void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
   DIE *D = getDIE(SP);
   if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) {
     if (D)
@@ -680,39 +676,39 @@ void DwarfCompileUnit::finishSubprogramDefinition(DISubprogram SP) {
       applySubprogramAttributesToDefinition(SP, *D);
   }
 }
-void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) {
-  assert(SP.isSubprogram() && "CU's subprogram list contains a non-subprogram");
-  assert(SP.isDefinition() &&
+void DwarfCompileUnit::collectDeadVariables(const DISubprogram *SP) {
+  assert(SP && "CU's subprogram list contains a non-subprogram");
+  assert(SP->isDefinition() &&
          "CU's subprogram list contains a subprogram declaration");
-  DIArray Variables = SP.getVariables();
-  if (Variables.getNumElements() == 0)
+  auto Variables = SP->getVariables();
+  if (Variables.size() == 0)
     return;
 
   DIE *SPDIE = DU->getAbstractSPDies().lookup(SP);
   if (!SPDIE)
     SPDIE = getDIE(SP);
   assert(SPDIE);
-  for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
-    DIVariable DV(Variables.getElement(vi));
-    assert(DV.isVariable());
-    DbgVariable NewVar(DV, DIExpression(nullptr), DD);
+  for (const DILocalVariable *DV : Variables) {
+    DbgVariable NewVar(DV, /* IA */ nullptr, /* Expr */ nullptr, DD);
     auto VariableDie = constructVariableDIE(NewVar);
     applyVariableAttributes(NewVar, *VariableDie);
     SPDIE->addChild(std::move(VariableDie));
   }
 }
 
-void DwarfCompileUnit::emitHeader(const MCSymbol *ASectionSym) const {
+void DwarfCompileUnit::emitHeader(bool UseOffsets) {
   // Don't bother labeling the .dwo unit, as its offset isn't used.
-  if (!Skeleton)
-    Asm->OutStreamer.EmitLabel(LabelBegin);
+  if (!Skeleton) {
+    LabelBegin = Asm->createTempSymbol("cu_begin");
+    Asm->OutStreamer->EmitLabel(LabelBegin);
+  }
 
-  DwarfUnit::emitHeader(ASectionSym);
+  DwarfUnit::emitHeader(UseOffsets);
 }
 
 /// addGlobalName - Add a new global name to the compile unit.
 void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die,
-                                     DIScope Context) {
+                                     const DIScope *Context) {
   if (includeMinimalInlineScopes())
     return;
   std::string FullName = getParentContextString(Context) + Name.str();
@@ -720,11 +716,11 @@ void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die,
 }
 
 /// Add a new global type to the unit.
-void DwarfCompileUnit::addGlobalType(DIType Ty, const DIE &Die,
-                                     DIScope Context) {
+void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
+                                     const DIScope *Context) {
   if (includeMinimalInlineScopes())
     return;
-  std::string FullName = getParentContextString(Context) + Ty.getName().str();
+  std::string FullName = getParentContextString(Context) + Ty->getName().str();
   GlobalTypes[FullName] = &Die;
 }
 
@@ -737,18 +733,16 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
   else if (DV.isBlockByrefVariable())
     addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
   else
-    addAddress(Die, dwarf::DW_AT_location, Location,
-               DV.getVariable().isIndirect());
+    addAddress(Die, dwarf::DW_AT_location, Location);
 }
 
 /// Add an address attribute to a die based on the location provided.
 void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
-                                  const MachineLocation &Location,
-                                  bool Indirect) {
+                                  const MachineLocation &Location) {
   DIELoc *Loc = new (DIEValueAllocator) DIELoc();
 
   bool validReg;
-  if (Location.isReg() && !Indirect)
+  if (Location.isReg())
     validReg = addRegisterOpPiece(*Loc, Location.getReg());
   else
     validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
@@ -756,9 +750,6 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
   if (!validReg)
     return;
 
-  if (!Location.isReg() && Indirect)
-    addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-
   // Now attach the location information to the DIE.
   addBlock(Die, Attribute, Loc);
 }
@@ -772,22 +763,20 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
                                          const MachineLocation &Location) {
   DIELoc *Loc = new (DIEValueAllocator) DIELoc();
   DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
-  DIExpression Expr = DV.getExpression();
+  assert(DV.getExpression().size() == 1);
+  const DIExpression *Expr = DV.getExpression().back();
+  bool ValidReg;
   if (Location.getOffset()) {
-    if (DwarfExpr.AddMachineRegIndirect(Location.getReg(),
-                                        Location.getOffset())) {
-      DwarfExpr.AddExpression(Expr);
-      assert(!DV.getVariable().isIndirect()
-             && "double indirection not handled");
-    }
-  } else {
-    if (DwarfExpr.AddMachineRegExpression(Expr, Location.getReg()))
-      if (DV.getVariable().isIndirect())
-        DwarfExpr.EmitOp(dwarf::DW_OP_deref);
-  }
+    ValidReg = DwarfExpr.AddMachineRegIndirect(Location.getReg(),
+                                               Location.getOffset());
+    if (ValidReg)
+      DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
+  } else
+    ValidReg = DwarfExpr.AddMachineRegExpression(Expr, Location.getReg());
 
   // Now attach the location information to the DIE.
-  addBlock(Die, Attribute, Loc);
+  if (ValidReg)
+    addBlock(Die, Attribute, Loc);
 }
 
 /// Add a Dwarf loclistptr attribute data and value.
@@ -817,12 +806,12 @@ void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
   Die.addValue((dwarf::Attribute)0, Form, Value);
 }
 
-void DwarfCompileUnit::applySubprogramAttributesToDefinition(DISubprogram SP,
-                                                             DIE &SPDie) {
-  DISubprogram SPDecl = SP.getFunctionDeclaration();
-  DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext());
+void DwarfCompileUnit::applySubprogramAttributesToDefinition(
+    const DISubprogram *SP, DIE &SPDie) {
+  auto *SPDecl = SP->getDeclaration();
+  auto *Context = resolve(SPDecl ? SPDecl->getScope() : SP->getScope());
   applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes());
-  addGlobalName(SP.getName(), SPDie, Context);
+  addGlobalName(SP->getName(), SPDie, Context);
 }
 
 bool DwarfCompileUnit::isDwoUnit() const {
@@ -830,7 +819,7 @@ bool DwarfCompileUnit::isDwoUnit() const {
 }
 
 bool DwarfCompileUnit::includeMinimalInlineScopes() const {
-  return getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly ||
+  return getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly ||
          (DD->useSplitDwarf() && !Skeleton);
 }
 } // end llvm namespace
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 91164bc..50e4a54e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -36,9 +36,6 @@ class DwarfCompileUnit : public DwarfUnit {
   /// Skeleton unit associated with this unit.
   DwarfCompileUnit *Skeleton;
 
-  /// A label at the start of the non-dwo section related to this unit.
-  MCSymbol *SectionSym;
-
   /// The start of the unit within its section.
   MCSymbol *LabelBegin;
 
@@ -69,20 +66,20 @@ class DwarfCompileUnit : public DwarfUnit {
   bool includeMinimalInlineScopes() const;
 
 public:
-  DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A,
+  DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
                    DwarfDebug *DW, DwarfFile *DWU);
 
   DwarfCompileUnit *getSkeleton() const {
     return Skeleton;
   }
 
-  void initStmtList(MCSymbol *DwarfLineSectionSym);
+  void initStmtList();
 
   /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
   void applyStmtList(DIE &D);
 
   /// getOrCreateGlobalVariableDIE - get or create global variable DIE.
-  DIE *getOrCreateGlobalVariableDIE(DIGlobalVariable GV);
+  DIE *getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV);
 
   /// addLabelAddress - Add a dwarf label attribute data and value using
   /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
@@ -116,7 +113,7 @@ public:
   /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
   /// variables in this scope then create and insert DIEs for these
   /// variables.
-  DIE &updateSubprogramScopeDIE(DISubprogram SP);
+  DIE &updateSubprogramScopeDIE(const DISubprogram *SP);
 
   void constructScopeDIE(LexicalScope *Scope,
                          SmallVectorImpl<std::unique_ptr<DIE>> &FinalChildren);
@@ -159,31 +156,18 @@ public:
 
   /// \brief Construct import_module DIE.
   std::unique_ptr<DIE>
-  constructImportedEntityDIE(const DIImportedEntity &Module);
+  constructImportedEntityDIE(const DIImportedEntity *Module);
 
-  void finishSubprogramDefinition(DISubprogram SP);
+  void finishSubprogramDefinition(const DISubprogram *SP);
 
-  void collectDeadVariables(DISubprogram SP);
+  void collectDeadVariables(const DISubprogram *SP);
 
   /// Set the skeleton unit associated with this unit.
   void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
 
-  MCSymbol *getSectionSym() const {
+  const MCSymbol *getSectionSym() const {
     assert(Section);
-    return SectionSym;
-  }
-
-  /// Pass in the SectionSym even though we could recreate it in every compile
-  /// unit (type units will have actually distinct symbols once they're in
-  /// comdat sections).
-  void initSection(const MCSection *Section, MCSymbol *SectionSym) {
-    DwarfUnit::initSection(Section);
-    this->SectionSym = SectionSym;
-
-    // Don't bother labeling the .dwo unit, as its offset isn't used.
-    if (!Skeleton)
-      LabelBegin =
-          Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
+    return Section->getBeginSymbol();
   }
 
   unsigned getLength() {
@@ -191,7 +175,7 @@ public:
         getHeaderSize() + UnitDie.getSize();
   }
 
-  void emitHeader(const MCSymbol *ASectionSym) const override;
+  void emitHeader(bool UseOffsets) override;
 
   MCSymbol *getLabelBegin() const {
     assert(Section);
@@ -199,10 +183,11 @@ public:
   }
 
   /// Add a new global name to the compile unit.
-  void addGlobalName(StringRef Name, DIE &Die, DIScope Context) override;
+  void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) override;
 
   /// Add a new global type to the compile unit.
-  void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) override;
+  void addGlobalType(const DIType *Ty, const DIE &Die,
+                     const DIScope *Context) override;
 
   const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
   const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
@@ -213,7 +198,7 @@ public:
                           MachineLocation Location);
   /// Add an address attribute to a die based on the location provided.
   void addAddress(DIE &Die, dwarf::Attribute Attribute,
-                  const MachineLocation &Location, bool Indirect = false);
+                  const MachineLocation &Location);
 
   /// Start with the address based on the location provided, and generate the
   /// DWARF information necessary to find the actual variable (navigating the
@@ -230,7 +215,8 @@ public:
   /// Add a Dwarf expression attribute data and value.
   void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
 
-  void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie);
+  void applySubprogramAttributesToDefinition(const DISubprogram *SP,
+                                             DIE &SPDie);
 
   /// getRangeLists - Get the vector of range lists.
   const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index a587b46..105ff6c 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -14,6 +14,7 @@
 #include "DwarfDebug.h"
 #include "ByteStreamer.h"
 #include "DIEHash.h"
+#include "DebugLocEntry.h"
 #include "DwarfCompileUnit.h"
 #include "DwarfExpression.h"
 #include "DwarfUnit.h"
@@ -45,6 +46,7 @@
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
@@ -105,24 +107,45 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
 static const char *const DWARFGroupName = "DWARF Emission";
 static const char *const DbgTimerName = "DWARF Debug Writer";
 
+void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
+  BS.EmitInt8(
+      Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
+                  : dwarf::OperationEncodingString(Op));
+}
+
+void DebugLocDwarfExpression::EmitSigned(int64_t Value) {
+  BS.EmitSLEB128(Value, Twine(Value));
+}
+
+void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) {
+  BS.EmitULEB128(Value, Twine(Value));
+}
+
+bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
+  // This information is not available while emitting .debug_loc entries.
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 
 /// resolve - Look in the DwarfDebug map for the MDNode that
 /// corresponds to the reference.
-template <typename T> T DbgVariable::resolve(DIRef<T> Ref) const {
+template <typename T> T *DbgVariable::resolve(TypedDINodeRef<T> Ref) const {
   return DD->resolve(Ref);
 }
 
 bool DbgVariable::isBlockByrefVariable() const {
-  assert(Var.isVariable() && "Invalid complex DbgVariable!");
-  return Var.isBlockByrefVariable(DD->getTypeIdentifierMap());
+  assert(Var && "Invalid complex DbgVariable!");
+  return Var->getType()
+      .resolve(DD->getTypeIdentifierMap())
+      ->isBlockByrefStruct();
 }
 
-DIType DbgVariable::getType() const {
-  DIType Ty = Var.getType().resolve(DD->getTypeIdentifierMap());
+const DIType *DbgVariable::getType() const {
+  DIType *Ty = Var->getType().resolve(DD->getTypeIdentifierMap());
   // FIXME: isBlockByrefVariable should be reformulated in terms of complex
   // addresses instead.
-  if (Var.isBlockByrefVariable(DD->getTypeIdentifierMap())) {
+  if (Ty->isBlockByrefStruct()) {
     /* Byref variables, in Blocks, are declared by the programmer as
        "SomeType VarName;", but the compiler creates a
        __Block_byref_x_VarName struct, and gives the variable VarName
@@ -147,17 +170,17 @@ DIType DbgVariable::getType() const {
        have a DW_AT_location that tells the debugger how to unwind through
        the pointers and __Block_byref_x_VarName struct to find the actual
        value of the variable.  The function addBlockByrefType does this.  */
-    DIType subType = Ty;
-    uint16_t tag = Ty.getTag();
+    DIType *subType = Ty;
+    uint16_t tag = Ty->getTag();
 
     if (tag == dwarf::DW_TAG_pointer_type)
-      subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom());
+      subType = resolve(cast<DIDerivedType>(Ty)->getBaseType());
 
-    DIArray Elements = DICompositeType(subType).getElements();
-    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-      DIDerivedType DT(Elements.getElement(i));
-      if (getName() == DT.getName())
-        return (resolve(DT.getTypeDerivedFrom()));
+    auto Elements = cast<DICompositeTypeBase>(subType)->getElements();
+    for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+      auto *DT = cast<DIDerivedTypeBase>(Elements[i]);
+      if (getName() == DT->getName())
+        return resolve(DT->getBaseType());
     }
   }
   return Ty;
@@ -169,11 +192,12 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
     DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
 
 DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
-    : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr), GlobalRangeCount(0),
-      InfoHolder(A, *this, "info_string", DIEValueAllocator),
+    : Asm(A), MMI(Asm->MMI), DebugLocs(A->OutStreamer->isVerboseAsm()),
+      PrevLabel(nullptr), InfoHolder(A, "info_string", DIEValueAllocator),
       UsedNonDefaultText(false),
-      SkeletonHolder(A, *this, "skel_string", DIEValueAllocator),
+      SkeletonHolder(A, "skel_string", DIEValueAllocator),
       IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
+      IsPS4(Triple(A->getTargetTriple()).isPS4()),
       AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
                                        dwarf::DW_FORM_data4)),
       AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
@@ -182,17 +206,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
                                            dwarf::DW_FORM_data4)),
       AccelTypes(TypeAtoms) {
 
-  DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = nullptr;
-  DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = nullptr;
-  DwarfLineSectionSym = nullptr;
-  DwarfAddrSectionSym = nullptr;
-  DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = nullptr;
-  FunctionBeginSym = FunctionEndSym = nullptr;
   CurFn = nullptr;
   CurMI = nullptr;
 
   // Turn on accelerator tables for Darwin by default, pubnames by
-  // default for non-Darwin, and handle split dwarf.
+  // default for non-Darwin/PS4, and handle split dwarf.
   if (DwarfAccelTables == Default)
     HasDwarfAccelTables = IsDarwin;
   else
@@ -204,7 +222,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
     HasSplitDwarf = SplitDwarf == Enable;
 
   if (DwarfPubSections == Default)
-    HasDwarfPubSections = !IsDarwin;
+    HasDwarfPubSections = !IsDarwin && !IsPS4;
   else
     HasDwarfPubSections = DwarfPubSections == Enable;
 
@@ -212,7 +230,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
                                     : MMI->getModule()->getDwarfVersion();
 
-  Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion);
+  // Darwin and PS4 use the standard TLS opcode (defined in DWARF 3).
+  // Everybody else uses GNU's.
+  UseGNUTLSOpcode = !(IsDarwin || IsPS4) || DwarfVersion < 3;
+
+  Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
 
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
@@ -223,19 +245,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
 // Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
 DwarfDebug::~DwarfDebug() { }
 
-// Switch to the specified MCSection and emit an assembler
-// temporary label to it if SymbolStem is specified.
-static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section,
-                                const char *SymbolStem = nullptr) {
-  Asm->OutStreamer.SwitchSection(Section);
-  if (!SymbolStem)
-    return nullptr;
-
-  MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
-  Asm->OutStreamer.EmitLabel(TmpSym);
-  return TmpSym;
-}
-
 static bool isObjCClass(StringRef Name) {
   return Name.startswith("+") || Name.startswith("-");
 }
@@ -264,37 +273,30 @@ static StringRef getObjCMethodName(StringRef In) {
   return In.slice(In.find(' ') + 1, In.find(']'));
 }
 
-// Helper for sorting sections into a stable output order.
-static bool SectionSort(const MCSection *A, const MCSection *B) {
-  std::string LA = (A ? A->getLabelBeginName() : "");
-  std::string LB = (B ? B->getLabelBeginName() : "");
-  return LA < LB;
-}
-
 // Add the various names to the Dwarf accelerator table names.
 // TODO: Determine whether or not we should add names for programs
 // that do not have a DW_AT_name or DW_AT_linkage_name field - this
 // is only slightly different than the lookup of non-standard ObjC names.
-void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) {
-  if (!SP.isDefinition())
+void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) {
+  if (!SP->isDefinition())
     return;
-  addAccelName(SP.getName(), Die);
+  addAccelName(SP->getName(), Die);
 
   // If the linkage name is different than the name, go ahead and output
   // that as well into the name table.
-  if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName())
-    addAccelName(SP.getLinkageName(), Die);
+  if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName())
+    addAccelName(SP->getLinkageName(), Die);
 
   // If this is an Objective-C selector name add it to the ObjC accelerator
   // too.
-  if (isObjCClass(SP.getName())) {
+  if (isObjCClass(SP->getName())) {
     StringRef Class, Category;
-    getObjCClassCategory(SP.getName(), Class, Category);
+    getObjCClassCategory(SP->getName(), Class, Category);
     addAccelObjC(Class, Die);
     if (Category != "")
       addAccelObjC(Category, Die);
     // Also add the base method name to the name table.
-    addAccelName(getObjCMethodName(SP.getName()), Die);
+    addAccelName(getObjCMethodName(SP->getName()), Die);
   }
 }
 
@@ -303,11 +305,10 @@ void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) {
 bool DwarfDebug::isSubprogramContext(const MDNode *Context) {
   if (!Context)
     return false;
-  DIDescriptor D(Context);
-  if (D.isSubprogram())
+  if (isa<DISubprogram>(Context))
     return true;
-  if (D.isType())
-    return isSubprogramContext(resolve(DIType(Context).getContext()));
+  if (auto *T = dyn_cast<DIType>(Context))
+    return isSubprogramContext(resolve(T->getScope()));
   return false;
 }
 
@@ -362,9 +363,10 @@ void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
 
 // Create new DwarfCompileUnit for the given metadata node with tag
 // DW_TAG_compile_unit.
-DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
-  StringRef FN = DIUnit.getFilename();
-  CompilationDir = DIUnit.getDirectory();
+DwarfCompileUnit &
+DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
+  StringRef FN = DIUnit->getFilename();
+  CompilationDir = DIUnit->getDirectory();
 
   auto OwnedUnit = make_unique<DwarfCompileUnit>(
       InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
@@ -378,17 +380,17 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
   // To avoid the compilation directory being ambiguous, let the line table
   // explicitly describe the directory of all files, never relying on the
   // compilation directory.
-  if (!Asm->OutStreamer.hasRawTextSupport() || SingleCU)
-    Asm->OutStreamer.getContext().setMCLineTableCompilationDir(
+  if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
+    Asm->OutStreamer->getContext().setMCLineTableCompilationDir(
         NewCU.getUniqueID(), CompilationDir);
 
-  NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
+  NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit->getProducer());
   NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
-                DIUnit.getLanguage());
+                DIUnit->getSourceLanguage());
   NewCU.addString(Die, dwarf::DW_AT_name, FN);
 
   if (!useSplitDwarf()) {
-    NewCU.initStmtList(DwarfLineSectionSym);
+    NewCU.initStmtList();
 
     // If we're using split dwarf the compilation dir is going to be in the
     // skeleton CU and so we don't need to duplicate it here.
@@ -398,23 +400,21 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
     addGnuPubAttributes(NewCU, Die);
   }
 
-  if (DIUnit.isOptimized())
+  if (DIUnit->isOptimized())
     NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized);
 
-  StringRef Flags = DIUnit.getFlags();
+  StringRef Flags = DIUnit->getFlags();
   if (!Flags.empty())
     NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
 
-  if (unsigned RVer = DIUnit.getRunTimeVersion())
+  if (unsigned RVer = DIUnit->getRuntimeVersion())
     NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
                   dwarf::DW_FORM_data1, RVer);
 
   if (useSplitDwarf())
-    NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(),
-                      DwarfInfoDWOSectionSym);
+    NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
   else
-    NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
-                      DwarfInfoSectionSym);
+    NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
 
   CUMap.insert(std::make_pair(DIUnit, &NewCU));
   CUDieMap.insert(std::make_pair(&Die, &NewCU));
@@ -422,11 +422,9 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
 }
 
 void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
-                                                  const MDNode *N) {
-  DIImportedEntity Module(N);
-  assert(Module.Verify());
-  if (DIE *D = TheCU.getOrCreateContextDIE(Module.getContext()))
-    D->addChild(TheCU.constructImportedEntityDIE(Module));
+                                                  const DIImportedEntity *N) {
+  if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope()))
+    D->addChild(TheCU.constructImportedEntityDIE(N));
 }
 
 // Emit all Dwarf sections that should come prior to the content. Create
@@ -445,54 +443,40 @@ void DwarfDebug::beginModule() {
     return;
   TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes);
 
-  // Emit initial sections so we can reference labels later.
-  emitSectionLabels();
-
   SingleCU = CU_Nodes->getNumOperands() == 1;
 
   for (MDNode *N : CU_Nodes->operands()) {
-    DICompileUnit CUNode(N);
+    auto *CUNode = cast<DICompileUnit>(N);
     DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
-    DIArray ImportedEntities = CUNode.getImportedEntities();
-    for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
-      ScopesWithImportedEntities.push_back(std::make_pair(
-          DIImportedEntity(ImportedEntities.getElement(i)).getContext(),
-          ImportedEntities.getElement(i)));
-    std::sort(ScopesWithImportedEntities.begin(),
-              ScopesWithImportedEntities.end(), less_first());
-    DIArray GVs = CUNode.getGlobalVariables();
-    for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
-      CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i)));
-    DIArray SPs = CUNode.getSubprograms();
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
-      SPMap.insert(std::make_pair(SPs.getElement(i), &CU));
-    DIArray EnumTypes = CUNode.getEnumTypes();
-    for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) {
-      DIType Ty(EnumTypes.getElement(i));
+    for (auto *IE : CUNode->getImportedEntities())
+      ScopesWithImportedEntities.push_back(std::make_pair(IE->getScope(), IE));
+    // Stable sort to preserve the order of appearance of imported entities.
+    // This is to avoid out-of-order processing of interdependent declarations
+    // within the same scope, e.g. { namespace A = base; namespace B = A; }
+    std::stable_sort(ScopesWithImportedEntities.begin(),
+                     ScopesWithImportedEntities.end(), less_first());
+    for (auto *GV : CUNode->getGlobalVariables())
+      CU.getOrCreateGlobalVariableDIE(GV);
+    for (auto *SP : CUNode->getSubprograms())
+      SPMap.insert(std::make_pair(SP, &CU));
+    for (auto *Ty : CUNode->getEnumTypes()) {
       // The enum types array by design contains pointers to
       // MDNodes rather than DIRefs. Unique them here.
-      DIType UniqueTy(resolve(Ty.getRef()));
-      CU.getOrCreateTypeDIE(UniqueTy);
+      CU.getOrCreateTypeDIE(cast<DIType>(resolve(Ty->getRef())));
     }
-    DIArray RetainedTypes = CUNode.getRetainedTypes();
-    for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) {
-      DIType Ty(RetainedTypes.getElement(i));
+    for (auto *Ty : CUNode->getRetainedTypes()) {
       // The retained types array by design contains pointers to
       // MDNodes rather than DIRefs. Unique them here.
-      DIType UniqueTy(resolve(Ty.getRef()));
-      CU.getOrCreateTypeDIE(UniqueTy);
+      CU.getOrCreateTypeDIE(cast<DIType>(resolve(Ty->getRef())));
     }
     // Emit imported_modules last so that the relevant context is already
     // available.
-    for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
-      constructAndAddImportedEntityDIE(CU, ImportedEntities.getElement(i));
+    for (auto *IE : CUNode->getImportedEntities())
+      constructAndAddImportedEntityDIE(CU, IE);
   }
 
   // Tell MMI that we have debug info.
   MMI->setDebugInfoAvailability(true);
-
-  // Prime section data.
-  SectionMap[Asm->getObjFileLowering().getTextSection()];
 }
 
 void DwarfDebug::finishVariableDefinitions() {
@@ -504,7 +488,8 @@ void DwarfDebug::finishVariableDefinitions() {
     // DIE::getUnit isn't simple - it walks parent pointers, etc.
     DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit());
     assert(Unit);
-    DbgVariable *AbsVar = getExistingAbstractVariable(Var->getVariable());
+    DbgVariable *AbsVar = getExistingAbstractVariable(
+        InlinedVariable(Var->getVariable(), Var->getInlinedAt()));
     if (AbsVar && AbsVar->getDIE()) {
       Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
                         *AbsVar->getDIE());
@@ -516,7 +501,7 @@ void DwarfDebug::finishVariableDefinitions() {
 void DwarfDebug::finishSubprogramDefinitions() {
   for (const auto &P : SPMap)
     forBothCUs(*P.second, [&](DwarfCompileUnit &CU) {
-      CU.finishSubprogramDefinition(DISubprogram(P.first));
+      CU.finishSubprogramDefinition(cast<DISubprogram>(P.first));
     });
 }
 
@@ -527,14 +512,12 @@ void DwarfDebug::collectDeadVariables() {
 
   if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
     for (MDNode *N : CU_Nodes->operands()) {
-      DICompileUnit TheCU(N);
+      auto *TheCU = cast<DICompileUnit>(N);
       // Construct subprogram DIE and add variables DIEs.
       DwarfCompileUnit *SPCU =
           static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU));
       assert(SPCU && "Unable to find Compile Unit!");
-      DIArray Subprograms = TheCU.getSubprograms();
-      for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
-        DISubprogram SP(Subprograms.getElement(i));
+      for (auto *SP : TheCU->getSubprograms()) {
         if (ProcessedSPNodes.count(SP) != 0)
           continue;
         SPCU->collectDeadVariables(SP);
@@ -544,6 +527,8 @@ void DwarfDebug::collectDeadVariables() {
 }
 
 void DwarfDebug::finalizeModuleInfo() {
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
   finishSubprogramDefinitions();
 
   finishVariableDefinitions();
@@ -573,13 +558,16 @@ void DwarfDebug::finalizeModuleInfo() {
 
       // We don't keep track of which addresses are used in which CU so this
       // is a bit pessimistic under LTO.
-      if (!AddrPool.isEmpty())
+      if (!AddrPool.isEmpty()) {
+        const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol();
         SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base,
-                              DwarfAddrSectionSym, DwarfAddrSectionSym);
-      if (!SkCU->getRangeLists().empty())
+                              Sym, Sym);
+      }
+      if (!SkCU->getRangeLists().empty()) {
+        const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();
         SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
-                              DwarfDebugRangeSectionSym,
-                              DwarfDebugRangeSectionSym);
+                              Sym, Sym);
+      }
     }
 
     // If we have code split among multiple sections or non-contiguous
@@ -597,7 +585,7 @@ void DwarfDebug::finalizeModuleInfo() {
         // 2.17.3).
         U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
       else
-        TheCU.setBaseAddress(TheCU.getRanges().front().getStart());
+        U.setBaseAddress(TheCU.getRanges().front().getStart());
       U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
     }
   }
@@ -608,53 +596,6 @@ void DwarfDebug::finalizeModuleInfo() {
     SkeletonHolder.computeSizeAndOffsets();
 }
 
-void DwarfDebug::endSections() {
-  // Filter labels by section.
-  for (const SymbolCU &SCU : ArangeLabels) {
-    if (SCU.Sym->isInSection()) {
-      // Make a note of this symbol and it's section.
-      const MCSection *Section = &SCU.Sym->getSection();
-      if (!Section->getKind().isMetadata())
-        SectionMap[Section].push_back(SCU);
-    } else {
-      // Some symbols (e.g. common/bss on mach-o) can have no section but still
-      // appear in the output. This sucks as we rely on sections to build
-      // arange spans. We can do it without, but it's icky.
-      SectionMap[nullptr].push_back(SCU);
-    }
-  }
-
-  // Build a list of sections used.
-  std::vector<const MCSection *> Sections;
-  for (const auto &it : SectionMap) {
-    const MCSection *Section = it.first;
-    Sections.push_back(Section);
-  }
-
-  // Sort the sections into order.
-  // This is only done to ensure consistent output order across different runs.
-  std::sort(Sections.begin(), Sections.end(), SectionSort);
-
-  // Add terminating symbols for each section.
-  for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) {
-    const MCSection *Section = Sections[ID];
-    MCSymbol *Sym = nullptr;
-
-    if (Section) {
-      // We can't call MCSection::getLabelEndName, as it's only safe to do so
-      // if we know the section name up-front. For user-created sections, the
-      // resulting label may not be valid to use as a label. (section names can
-      // use a greater set of characters on some systems)
-      Sym = Asm->GetTempSymbol("debug_end", ID);
-      Asm->OutStreamer.SwitchSection(Section);
-      Asm->OutStreamer.EmitLabel(Sym);
-    }
-
-    // Insert a final terminator.
-    SectionMap[Section].push_back(SymbolCU(nullptr, Sym));
-  }
-}
-
 // Emit all Dwarf sections that should come after the content.
 void DwarfDebug::endModule() {
   assert(CurFn == nullptr);
@@ -663,24 +604,26 @@ void DwarfDebug::endModule() {
   // If we aren't actually generating debug info (check beginModule -
   // conditionalized on !DisableDebugInfoPrinting and the presence of the
   // llvm.dbg.cu metadata node)
-  if (!DwarfInfoSectionSym)
+  if (!MMI->hasDebugInfo())
     return;
 
-  // End any existing sections.
-  // TODO: Does this need to happen?
-  endSections();
-
   // Finalize the debug info for the module.
   finalizeModuleInfo();
 
   emitDebugStr();
 
-  // Emit all the DIEs into a debug info section.
-  emitDebugInfo();
+  if (useSplitDwarf())
+    emitDebugLocDWO();
+  else
+    // Emit info into a debug loc section.
+    emitDebugLoc();
 
   // Corresponding abbreviations into a abbrev section.
   emitAbbreviations();
 
+  // Emit all the DIEs into a debug info section.
+  emitDebugInfo();
+
   // Emit info into a debug aranges section.
   if (GenerateARangeSection)
     emitDebugARanges();
@@ -693,12 +636,9 @@ void DwarfDebug::endModule() {
     emitDebugInfoDWO();
     emitDebugAbbrevDWO();
     emitDebugLineDWO();
-    emitDebugLocDWO();
     // Emit DWO addresses.
     AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
-  } else
-    // Emit info into a debug loc section.
-    emitDebugLoc();
+  }
 
   // Emit info into the dwarf accelerator table sections.
   if (useDwarfAccelTables()) {
@@ -720,80 +660,80 @@ void DwarfDebug::endModule() {
 }
 
 // Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV,
-                                                     DIVariable &Cleansed) {
-  LLVMContext &Ctx = DV->getContext();
+DbgVariable *
+DwarfDebug::getExistingAbstractVariable(InlinedVariable IV,
+                                        const DILocalVariable *&Cleansed) {
   // More then one inlined variable corresponds to one abstract variable.
-  // FIXME: This duplication of variables when inlining should probably be
-  // removed. It's done to allow each DIVariable to describe its location
-  // because the DebugLoc on the dbg.value/declare isn't accurate. We should
-  // make it accurate then remove this duplication/cleansing stuff.
-  Cleansed = cleanseInlinedVariable(DV, Ctx);
+  Cleansed = IV.first;
   auto I = AbstractVariables.find(Cleansed);
   if (I != AbstractVariables.end())
     return I->second.get();
   return nullptr;
 }
 
-DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) {
-  DIVariable Cleansed;
-  return getExistingAbstractVariable(DV, Cleansed);
+DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) {
+  const DILocalVariable *Cleansed;
+  return getExistingAbstractVariable(IV, Cleansed);
 }
 
-void DwarfDebug::createAbstractVariable(const DIVariable &Var,
+void DwarfDebug::createAbstractVariable(const DILocalVariable *Var,
                                         LexicalScope *Scope) {
-  auto AbsDbgVariable = make_unique<DbgVariable>(Var, DIExpression(), this);
+  auto AbsDbgVariable =
+      make_unique<DbgVariable>(Var, /* IA */ nullptr, /* Expr */ nullptr, this);
   InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
   AbstractVariables[Var] = std::move(AbsDbgVariable);
 }
 
-void DwarfDebug::ensureAbstractVariableIsCreated(const DIVariable &DV,
+void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV,
                                                  const MDNode *ScopeNode) {
-  DIVariable Cleansed = DV;
-  if (getExistingAbstractVariable(DV, Cleansed))
+  const DILocalVariable *Cleansed = nullptr;
+  if (getExistingAbstractVariable(IV, Cleansed))
     return;
 
-  createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(ScopeNode));
+  createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(
+                                       cast<DILocalScope>(ScopeNode)));
 }
 
-void
-DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV,
-                                                    const MDNode *ScopeNode) {
-  DIVariable Cleansed = DV;
-  if (getExistingAbstractVariable(DV, Cleansed))
+void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(
+    InlinedVariable IV, const MDNode *ScopeNode) {
+  const DILocalVariable *Cleansed = nullptr;
+  if (getExistingAbstractVariable(IV, Cleansed))
     return;
 
-  if (LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode))
+  if (LexicalScope *Scope =
+          LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
     createAbstractVariable(Cleansed, Scope);
 }
 
 // Collect variable information from side table maintained by MMI.
 void DwarfDebug::collectVariableInfoFromMMITable(
-    SmallPtrSetImpl<const MDNode *> &Processed) {
+    DenseSet<InlinedVariable> &Processed) {
   for (const auto &VI : MMI->getVariableDbgInfo()) {
     if (!VI.Var)
       continue;
-    Processed.insert(VI.Var);
+    assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
+           "Expected inlined-at fields to agree");
+
+    InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt());
+    Processed.insert(Var);
     LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
 
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
 
-    DIVariable DV(VI.Var);
-    DIExpression Expr(VI.Expr);
-    ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
-    ConcreteVariables.push_back(make_unique<DbgVariable>(DV, Expr, this));
-    DbgVariable *RegVar = ConcreteVariables.back().get();
-    RegVar->setFrameIndex(VI.Slot);
-    InfoHolder.addScopeVariable(Scope, RegVar);
+    const DIExpression *Expr = cast_or_null<DIExpression>(VI.Expr);
+    ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode());
+    auto RegVar =
+        make_unique<DbgVariable>(Var.first, Var.second, Expr, this, VI.Slot);
+    if (InfoHolder.addScopeVariable(Scope, RegVar.get()))
+      ConcreteVariables.push_back(std::move(RegVar));
   }
 }
 
 // Get .debug_loc entry for the instruction range starting at MI.
 static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
-  const MDNode *Expr = MI->getDebugExpression();
-  const MDNode *Var = MI->getDebugVariable();
+  const DIExpression *Expr = MI->getDebugExpression();
 
   assert(MI->getNumOperands() == 4);
   if (MI->getOperand(0).isReg()) {
@@ -804,26 +744,26 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
       MLoc.set(MI->getOperand(0).getReg());
     else
       MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
-    return DebugLocEntry::Value(Var, Expr, MLoc);
+    return DebugLocEntry::Value(Expr, MLoc);
   }
   if (MI->getOperand(0).isImm())
-    return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getImm());
+    return DebugLocEntry::Value(Expr, MI->getOperand(0).getImm());
   if (MI->getOperand(0).isFPImm())
-    return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getFPImm());
+    return DebugLocEntry::Value(Expr, MI->getOperand(0).getFPImm());
   if (MI->getOperand(0).isCImm())
-    return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getCImm());
+    return DebugLocEntry::Value(Expr, MI->getOperand(0).getCImm());
 
   llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
 }
 
 /// Determine whether two variable pieces overlap.
-static bool piecesOverlap(DIExpression P1, DIExpression P2) {
-  if (!P1.isVariablePiece() || !P2.isVariablePiece())
+static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2) {
+  if (!P1->isBitPiece() || !P2->isBitPiece())
     return true;
-  unsigned l1 = P1.getPieceOffset();
-  unsigned l2 = P2.getPieceOffset();
-  unsigned r1 = l1 + P1.getPieceSize();
-  unsigned r2 = l2 + P2.getPieceSize();
+  unsigned l1 = P1->getBitPieceOffset();
+  unsigned l2 = P2->getBitPieceOffset();
+  unsigned r1 = l1 + P1->getBitPieceSize();
+  unsigned r2 = l2 + P2->getBitPieceSize();
   // True where [l1,r1[ and [r1,r2[ overlap.
   return (l1 < r2) && (l2 < r1);
 }
@@ -842,7 +782,8 @@ static bool piecesOverlap(DIExpression P1, DIExpression P2) {
 // 1 | |    [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry
 // 2 | |    ...
 // 3   |    [clobber reg0]
-// 4        [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of x.
+// 4        [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of
+//                                     x.
 //
 // Output:
 //
@@ -868,7 +809,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     }
 
     // If this piece overlaps with any open ranges, truncate them.
-    DIExpression DIExpr = Begin->getDebugExpression();
+    const DIExpression *DIExpr = Begin->getDebugExpression();
     auto Last = std::remove_if(OpenRanges.begin(), OpenRanges.end(),
                                [&](DebugLocEntry::Value R) {
       return piecesOverlap(DIExpr, R.getExpression());
@@ -882,7 +823,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     if (End != nullptr)
       EndLabel = getLabelAfterInsn(End);
     else if (std::next(I) == Ranges.end())
-      EndLabel = FunctionEndSym;
+      EndLabel = Asm->getFunctionEnd();
     else
       EndLabel = getLabelBeforeInsn(std::next(I)->first);
     assert(EndLabel && "Forgot label after instruction ending a range!");
@@ -894,7 +835,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     bool couldMerge = false;
 
     // If this is a piece, it may belong to the current DebugLocEntry.
-    if (DIExpr.isVariablePiece()) {
+    if (DIExpr->isBitPiece()) {
       // Add this value to the list of open ranges.
       OpenRanges.push_back(Value);
 
@@ -916,54 +857,50 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     // Attempt to coalesce the ranges of two otherwise identical
     // DebugLocEntries.
     auto CurEntry = DebugLoc.rbegin();
-    auto PrevEntry = std::next(CurEntry);
-    if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
-      DebugLoc.pop_back();
-
     DEBUG({
       dbgs() << CurEntry->getValues().size() << " Values:\n";
-      for (auto Value : CurEntry->getValues()) {
-        Value.getVariable()->dump();
+      for (auto &Value : CurEntry->getValues())
         Value.getExpression()->dump();
-      }
       dbgs() << "-----\n";
     });
+
+    auto PrevEntry = std::next(CurEntry);
+    if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
+      DebugLoc.pop_back();
   }
 }
 
 
 // Find variables for each lexical scope.
-void
-DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
-                                SmallPtrSetImpl<const MDNode *> &Processed) {
+void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
+                                     const DISubprogram *SP,
+                                     DenseSet<InlinedVariable> &Processed) {
   // Grab the variable info that was squirreled away in the MMI side-table.
   collectVariableInfoFromMMITable(Processed);
 
   for (const auto &I : DbgValues) {
-    DIVariable DV(I.first);
-    if (Processed.count(DV))
+    InlinedVariable IV = I.first;
+    if (Processed.count(IV))
       continue;
 
-    // Instruction ranges, specifying where DV is accessible.
+    // Instruction ranges, specifying where IV is accessible.
     const auto &Ranges = I.second;
     if (Ranges.empty())
       continue;
 
     LexicalScope *Scope = nullptr;
-    if (MDNode *IA = DV.getInlinedAt()) {
-      DebugLoc DL = DebugLoc::getFromDILocation(IA);
-      Scope = LScopes.findInlinedScope(DebugLoc::get(
-          DL.getLine(), DL.getCol(), DV.getContext(), IA));
-    } else
-      Scope = LScopes.findLexicalScope(DV.getContext());
+    if (const DILocation *IA = IV.second)
+      Scope = LScopes.findInlinedScope(IV.first->getScope(), IA);
+    else
+      Scope = LScopes.findLexicalScope(IV.first->getScope());
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
 
-    Processed.insert(DV);
+    Processed.insert(IV);
     const MachineInstr *MInsn = Ranges.front().first;
     assert(MInsn->isDebugValue() && "History must begin with debug value");
-    ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+    ensureAbstractVariableIsCreatedIfScoped(IV, Scope->getScopeNode());
     ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this));
     DbgVariable *RegVar = ConcreteVariables.back().get();
     InfoHolder.addScopeVariable(Scope, RegVar);
@@ -973,29 +910,33 @@ DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
       continue;
 
     // Handle multiple DBG_VALUE instructions describing one variable.
-    RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
-
-    DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1);
-    DebugLocList &LocList = DotDebugLocEntries.back();
-    LocList.CU = &TheCU;
-    LocList.Label =
-        Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1);
+    RegVar->setDebugLocListIndex(
+        DebugLocs.startList(&TheCU, Asm->createTempSymbol("debug_loc")));
 
     // Build the location list for this variable.
-    buildLocationList(LocList.List, Ranges);
+    SmallVector<DebugLocEntry, 8> Entries;
+    buildLocationList(Entries, Ranges);
+
+    // If the variable has an DIBasicType, extract it.  Basic types cannot have
+    // unique identifiers, so don't bother resolving the type with the
+    // identifier map.
+    const DIBasicType *BT = dyn_cast<DIBasicType>(
+        static_cast<const Metadata *>(IV.first->getType()));
+
+    // Finalize the entry by lowering it into a DWARF bytestream.
+    for (auto &Entry : Entries)
+      Entry.finalize(*Asm, DebugLocs, BT);
   }
 
   // Collect info for variables that were optimized out.
-  DIArray Variables = SP.getVariables();
-  for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
-    DIVariable DV(Variables.getElement(i));
-    assert(DV.isVariable());
-    if (!Processed.insert(DV).second)
+  for (const DILocalVariable *DV : SP->getVariables()) {
+    if (!Processed.insert(InlinedVariable(DV, nullptr)).second)
       continue;
-    if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) {
-      ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
-      DIExpression NoExpr;
-      ConcreteVariables.push_back(make_unique<DbgVariable>(DV, NoExpr, this));
+    if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) {
+      ensureAbstractVariableIsCreatedIfScoped(InlinedVariable(DV, nullptr),
+                                              Scope->getScopeNode());
+      ConcreteVariables.push_back(make_unique<DbgVariable>(
+          DV, /* IA */ nullptr, /* Expr */ nullptr, this));
       InfoHolder.addScopeVariable(Scope, ConcreteVariables.back().get());
     }
   }
@@ -1020,23 +961,25 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   // Check if source location changes, but ignore DBG_VALUE locations.
   if (!MI->isDebugValue()) {
     DebugLoc DL = MI->getDebugLoc();
-    if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
-      unsigned Flags = 0;
-      PrevInstLoc = DL;
-      if (DL == PrologEndLoc) {
-        Flags |= DWARF2_FLAG_PROLOGUE_END;
-        PrologEndLoc = DebugLoc();
-        Flags |= DWARF2_FLAG_IS_STMT;
-      }
-      if (DL.getLine() !=
-          Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine())
-        Flags |= DWARF2_FLAG_IS_STMT;
+    if (DL != PrevInstLoc) {
+      if (DL) {
+        unsigned Flags = 0;
+        PrevInstLoc = DL;
+        if (DL == PrologEndLoc) {
+          Flags |= DWARF2_FLAG_PROLOGUE_END;
+          PrologEndLoc = DebugLoc();
+          Flags |= DWARF2_FLAG_IS_STMT;
+        }
+        if (DL.getLine() !=
+            Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine())
+          Flags |= DWARF2_FLAG_IS_STMT;
 
-      if (!DL.isUnknown()) {
-        const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+        const MDNode *Scope = DL.getScope();
         recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
-      } else
+      } else if (UnknownLocations) {
+        PrevInstLoc = DL;
         recordSourceLine(0, 0, nullptr, 0);
+      }
     }
   }
 
@@ -1053,8 +996,8 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
     return;
 
   if (!PrevLabel) {
-    PrevLabel = MMI->getContext().CreateTempSymbol();
-    Asm->OutStreamer.EmitLabel(PrevLabel);
+    PrevLabel = MMI->getContext().createTempSymbol();
+    Asm->OutStreamer->EmitLabel(PrevLabel);
   }
   I->second = PrevLabel;
 }
@@ -1081,8 +1024,8 @@ void DwarfDebug::endInstruction() {
 
   // We need a label after this instruction.
   if (!PrevLabel) {
-    PrevLabel = MMI->getContext().CreateTempSymbol();
-    Asm->OutStreamer.EmitLabel(PrevLabel);
+    PrevLabel = MMI->getContext().createTempSymbol();
+    Asm->OutStreamer->EmitLabel(PrevLabel);
   }
   I->second = PrevLabel;
 }
@@ -1119,7 +1062,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
   for (const auto &MBB : *MF)
     for (const auto &MI : MBB)
       if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
-        !MI.getDebugLoc().isUnknown()) {
+          MI.getDebugLoc()) {
         // Did the target forget to set the FrameSetup flag for CFI insns?
         assert(!MI.isCFIInstruction() &&
                "First non-frame-setup instruction is a CFI instruction.");
@@ -1166,19 +1109,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   // is absolute (such as an <> lookup header)))
   DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
   assert(TheCU && "Unable to find compile unit!");
-  if (Asm->OutStreamer.hasRawTextSupport())
+  if (Asm->OutStreamer->hasRawTextSupport())
     // Use a single line table if we are generating assembly.
-    Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
+    Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
   else
-    Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
-
-  // Emit a label for the function so that we have a beginning address.
-  FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber());
-  // Assumes in correct section after the entry point.
-  Asm->OutStreamer.EmitLabel(FunctionBeginSym);
+    Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
 
   // Calculate history for local variables.
-  calculateDbgValueHistory(MF, Asm->TM.getSubtargetImpl()->getRegisterInfo(),
+  calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
                            DbgValues);
 
   // Request labels for the full history.
@@ -1187,21 +1125,21 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
     if (Ranges.empty())
       continue;
 
-    // The first mention of a function argument gets the FunctionBeginSym
+    // The first mention of a function argument gets the CurrentFnBegin
     // label, so arguments are visible when breaking at function entry.
-    DIVariable DIVar(Ranges.front().first->getDebugVariable());
-    if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable &&
-        getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) {
-      LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym;
-      if (Ranges.front().first->getDebugExpression().isVariablePiece()) {
+    const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable();
+    if (DIVar->getTag() == dwarf::DW_TAG_arg_variable &&
+        getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
+      LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
+      if (Ranges.front().first->getDebugExpression()->isBitPiece()) {
         // Mark all non-overlapping initial pieces.
         for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
-          DIExpression Piece = I->first->getDebugExpression();
+          const DIExpression *Piece = I->first->getDebugExpression();
           if (std::all_of(Ranges.begin(), I,
                           [&](DbgValueHistoryMap::InstrRange Pred) {
                 return !piecesOverlap(Piece, Pred.first->getDebugExpression());
               }))
-            LabelsBeforeInsn[I->first] = FunctionBeginSym;
+            LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
           else
             break;
         }
@@ -1216,19 +1154,15 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   }
 
   PrevInstLoc = DebugLoc();
-  PrevLabel = FunctionBeginSym;
+  PrevLabel = Asm->getFunctionBegin();
 
   // Record beginning of function.
   PrologEndLoc = findPrologueEndLoc(MF);
-  if (!PrologEndLoc.isUnknown()) {
-    DebugLoc FnStartDL =
-        PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext());
-    recordSourceLine(
-        FnStartDL.getLine(), FnStartDL.getCol(),
-        FnStartDL.getScope(MF->getFunction()->getContext()),
-        // We'd like to list the prologue as "not statements" but GDB behaves
-        // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
-        DWARF2_FLAG_IS_STMT);
+  if (DILocation *L = PrologEndLoc) {
+    // We'd like to list the prologue as "not statements" but GDB behaves
+    // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+    auto *SP = L->getInlinedAtScope()->getSubprogram();
+    recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT);
   }
 }
 
@@ -1247,27 +1181,22 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
     return;
   }
 
-  // Define end label for subprogram.
-  FunctionEndSym = Asm->GetTempSymbol("func_end", Asm->getFunctionNumber());
-  // Assumes in correct section after the entry point.
-  Asm->OutStreamer.EmitLabel(FunctionEndSym);
-
   // Set DwarfDwarfCompileUnitID in MCContext to default value.
-  Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
+  Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
 
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
-  DISubprogram SP(FnScope->getScopeNode());
+  auto *SP = cast<DISubprogram>(FnScope->getScopeNode());
   DwarfCompileUnit &TheCU = *SPMap.lookup(SP);
 
-  SmallPtrSet<const MDNode *, 16> ProcessedVars;
+  DenseSet<InlinedVariable> ProcessedVars;
   collectVariableInfo(TheCU, SP, ProcessedVars);
 
   // Add the range of this function to the list of ranges for the CU.
-  TheCU.addRange(RangeSpan(FunctionBeginSym, FunctionEndSym));
+  TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
 
   // Under -gmlt, skip building the subprogram if there are no inlined
   // subroutines inside it.
-  if (TheCU.getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly &&
+  if (TheCU.getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly &&
       LScopes.getAbstractScopesList().empty() && !IsDarwin) {
     assert(InfoHolder.getScopeVariables().empty());
     assert(DbgValues.empty());
@@ -1286,16 +1215,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
 #endif
   // Construct abstract scopes.
   for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
-    DISubprogram SP(AScope->getScopeNode());
-    assert(SP.isSubprogram());
+    auto *SP = cast<DISubprogram>(AScope->getScopeNode());
     // Collect info for variables that were optimized out.
-    DIArray Variables = SP.getVariables();
-    for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
-      DIVariable DV(Variables.getElement(i));
-      assert(DV && DV.isVariable());
-      if (!ProcessedVars.insert(DV).second)
+    for (const DILocalVariable *DV : SP->getVariables()) {
+      if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second)
         continue;
-      ensureAbstractVariableIsCreated(DV, DV.getContext());
+      ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr),
+                                      DV->getScope());
       assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
              && "ensureAbstractVariableIsCreated inserted abstract scopes");
     }
@@ -1327,122 +1253,28 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
   StringRef Dir;
   unsigned Src = 1;
   unsigned Discriminator = 0;
-  if (DIScope Scope = DIScope(S)) {
-    assert(Scope.isScope());
-    Fn = Scope.getFilename();
-    Dir = Scope.getDirectory();
-    if (Scope.isLexicalBlockFile())
-      Discriminator = DILexicalBlockFile(S).getDiscriminator();
-
-    unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID();
+  if (auto *Scope = cast_or_null<DIScope>(S)) {
+    Fn = Scope->getFilename();
+    Dir = Scope->getDirectory();
+    if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
+      Discriminator = LBF->getDiscriminator();
+
+    unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
     Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
               .getOrCreateSourceID(Fn, Dir);
   }
-  Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0,
-                                         Discriminator, Fn);
+  Asm->OutStreamer->EmitDwarfLocDirective(Src, Line, Col, Flags, 0,
+                                          Discriminator, Fn);
 }
 
 //===----------------------------------------------------------------------===//
 // Emit Methods
 //===----------------------------------------------------------------------===//
 
-// Emit initial Dwarf sections with a label at the start of each one.
-void DwarfDebug::emitSectionLabels() {
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
-  // Dwarf sections base addresses.
-  DwarfInfoSectionSym =
-      emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
-  if (useSplitDwarf()) {
-    DwarfInfoDWOSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo");
-    DwarfTypesDWOSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo");
-  }
-  DwarfAbbrevSectionSym =
-      emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
-  if (useSplitDwarf())
-    DwarfAbbrevDWOSectionSym = emitSectionSym(
-        Asm, TLOF.getDwarfAbbrevDWOSection(), "section_abbrev_dwo");
-  if (GenerateARangeSection)
-    emitSectionSym(Asm, TLOF.getDwarfARangesSection());
-
-  DwarfLineSectionSym =
-      emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
-  if (GenerateGnuPubSections) {
-    DwarfGnuPubNamesSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection());
-    DwarfGnuPubTypesSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfGnuPubTypesSection());
-  } else if (HasDwarfPubSections) {
-    emitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
-    emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
-  }
-
-  DwarfStrSectionSym =
-      emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
-  if (useSplitDwarf()) {
-    DwarfStrDWOSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
-    DwarfAddrSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec");
-    DwarfDebugLocSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfLocDWOSection(), "skel_loc");
-  } else
-    DwarfDebugLocSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfLocSection(), "section_debug_loc");
-  DwarfDebugRangeSectionSym =
-      emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range");
-}
-
-// Recursively emits a debug information entry.
-void DwarfDebug::emitDIE(DIE &Die) {
-  // Get the abbreviation for this DIE.
-  const DIEAbbrev &Abbrev = Die.getAbbrev();
-
-  // Emit the code (index) for the abbreviation.
-  if (Asm->isVerbose())
-    Asm->OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) +
-                                "] 0x" + Twine::utohexstr(Die.getOffset()) +
-                                ":0x" + Twine::utohexstr(Die.getSize()) + " " +
-                                dwarf::TagString(Abbrev.getTag()));
-  Asm->EmitULEB128(Abbrev.getNumber());
-
-  const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
-  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
-
-  // Emit the DIE attribute values.
-  for (unsigned i = 0, N = Values.size(); i < N; ++i) {
-    dwarf::Attribute Attr = AbbrevData[i].getAttribute();
-    dwarf::Form Form = AbbrevData[i].getForm();
-    assert(Form && "Too many attributes for DIE (check abbreviation)");
-
-    if (Asm->isVerbose()) {
-      Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
-      if (Attr == dwarf::DW_AT_accessibility)
-        Asm->OutStreamer.AddComment(dwarf::AccessibilityString(
-            cast<DIEInteger>(Values[i])->getValue()));
-    }
-
-    // Emit an attribute using the defined form.
-    Values[i]->EmitValue(Asm, Form);
-  }
-
-  // Emit the DIE children if any.
-  if (Abbrev.hasChildren()) {
-    for (auto &Child : Die.getChildren())
-      emitDIE(*Child);
-
-    Asm->OutStreamer.AddComment("End Of Children Mark");
-    Asm->EmitInt8(0);
-  }
-}
-
 // Emit the debug info section.
 void DwarfDebug::emitDebugInfo() {
   DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
-
-  Holder.emitUnits(DwarfAbbrevSectionSym);
+  Holder.emitUnits(/* UseOffsets */ false);
 }
 
 // Emit the abbreviation section.
@@ -1452,65 +1284,39 @@ void DwarfDebug::emitAbbreviations() {
   Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
 }
 
-// Emit the last address of the section and the end of the line matrix.
-void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
-  // Define last address of section.
-  Asm->OutStreamer.AddComment("Extended Op");
-  Asm->EmitInt8(0);
-
-  Asm->OutStreamer.AddComment("Op size");
-  Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1);
-  Asm->OutStreamer.AddComment("DW_LNE_set_address");
-  Asm->EmitInt8(dwarf::DW_LNE_set_address);
-
-  Asm->OutStreamer.AddComment("Section end label");
-
-  Asm->OutStreamer.EmitSymbolValue(
-      Asm->GetTempSymbol("section_end", SectionEnd),
-      Asm->getDataLayout().getPointerSize());
-
-  // Mark end of matrix.
-  Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
-  Asm->EmitInt8(0);
-  Asm->EmitInt8(1);
-  Asm->EmitInt8(1);
-}
-
-void DwarfDebug::emitAccel(DwarfAccelTable &Accel, const MCSection *Section,
-                           StringRef TableName, StringRef SymName) {
+void DwarfDebug::emitAccel(DwarfAccelTable &Accel, MCSection *Section,
+                           StringRef TableName) {
   Accel.FinalizeTable(Asm, TableName);
-  Asm->OutStreamer.SwitchSection(Section);
-  auto *SectionBegin = Asm->GetTempSymbol(SymName);
-  Asm->OutStreamer.EmitLabel(SectionBegin);
+  Asm->OutStreamer->SwitchSection(Section);
 
   // Emit the full data.
-  Accel.Emit(Asm, SectionBegin, this, DwarfStrSectionSym);
+  Accel.emit(Asm, Section->getBeginSymbol(), this);
 }
 
 // Emit visible names into a hashed accelerator table section.
 void DwarfDebug::emitAccelNames() {
   emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(),
-            "Names", "names_begin");
+            "Names");
 }
 
 // Emit objective C classes and categories into a hashed accelerator table
 // section.
 void DwarfDebug::emitAccelObjC() {
   emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(),
-            "ObjC", "objc_begin");
+            "ObjC");
 }
 
 // Emit namespace dies into a hashed accelerator table.
 void DwarfDebug::emitAccelNamespaces() {
   emitAccel(AccelNamespace,
             Asm->getObjFileLowering().getDwarfAccelNamespaceSection(),
-            "namespac", "namespac_begin");
+            "namespac");
 }
 
 // Emit type dies into a hashed accelerator table.
 void DwarfDebug::emitAccelTypes() {
   emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(),
-            "types", "types_begin");
+            "types");
 }
 
 // Public name handling.
@@ -1559,7 +1365,6 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
     return dwarf::GIEK_TYPE;
   case dwarf::DW_TAG_subprogram:
     return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage);
-  case dwarf::DW_TAG_constant:
   case dwarf::DW_TAG_variable:
     return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage);
   case dwarf::DW_TAG_enumerator:
@@ -1573,16 +1378,16 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
 /// emitDebugPubNames - Emit visible names into a debug pubnames section.
 ///
 void DwarfDebug::emitDebugPubNames(bool GnuStyle) {
-  const MCSection *PSec =
-      GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
-               : Asm->getObjFileLowering().getDwarfPubNamesSection();
+  MCSection *PSec = GnuStyle
+                        ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
+                        : Asm->getObjFileLowering().getDwarfPubNamesSection();
 
   emitDebugPubSection(GnuStyle, PSec, "Names",
                       &DwarfCompileUnit::getGlobalNames);
 }
 
 void DwarfDebug::emitDebugPubSection(
-    bool GnuStyle, const MCSection *PSec, StringRef Name,
+    bool GnuStyle, MCSection *PSec, StringRef Name,
     const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const) {
   for (const auto &NU : CUMap) {
     DwarfCompileUnit *TheU = NU.second;
@@ -1594,26 +1399,25 @@ void DwarfDebug::emitDebugPubSection(
 
     if (auto *Skeleton = TheU->getSkeleton())
       TheU = Skeleton;
-    unsigned ID = TheU->getUniqueID();
 
     // Start the dwarf pubnames section.
-    Asm->OutStreamer.SwitchSection(PSec);
+    Asm->OutStreamer->SwitchSection(PSec);
 
     // Emit the header.
-    Asm->OutStreamer.AddComment("Length of Public " + Name + " Info");
-    MCSymbol *BeginLabel = Asm->GetTempSymbol("pub" + Name + "_begin", ID);
-    MCSymbol *EndLabel = Asm->GetTempSymbol("pub" + Name + "_end", ID);
+    Asm->OutStreamer->AddComment("Length of Public " + Name + " Info");
+    MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
+    MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
     Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
 
-    Asm->OutStreamer.EmitLabel(BeginLabel);
+    Asm->OutStreamer->EmitLabel(BeginLabel);
 
-    Asm->OutStreamer.AddComment("DWARF Version");
+    Asm->OutStreamer->AddComment("DWARF Version");
     Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
 
-    Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
-    Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym());
+    Asm->OutStreamer->AddComment("Offset of Compilation Unit Info");
+    Asm->emitSectionOffset(TheU->getLabelBegin());
 
-    Asm->OutStreamer.AddComment("Compilation Unit Length");
+    Asm->OutStreamer->AddComment("Compilation Unit Length");
     Asm->EmitInt32(TheU->getLength());
 
     // Emit the pubnames for this compilation unit.
@@ -1621,31 +1425,31 @@ void DwarfDebug::emitDebugPubSection(
       const char *Name = GI.getKeyData();
       const DIE *Entity = GI.second;
 
-      Asm->OutStreamer.AddComment("DIE offset");
+      Asm->OutStreamer->AddComment("DIE offset");
       Asm->EmitInt32(Entity->getOffset());
 
       if (GnuStyle) {
         dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
-        Asm->OutStreamer.AddComment(
+        Asm->OutStreamer->AddComment(
             Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
             dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
         Asm->EmitInt8(Desc.toBits());
       }
 
-      Asm->OutStreamer.AddComment("External Name");
-      Asm->OutStreamer.EmitBytes(StringRef(Name, GI.getKeyLength() + 1));
+      Asm->OutStreamer->AddComment("External Name");
+      Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1));
     }
 
-    Asm->OutStreamer.AddComment("End Mark");
+    Asm->OutStreamer->AddComment("End Mark");
     Asm->EmitInt32(0);
-    Asm->OutStreamer.EmitLabel(EndLabel);
+    Asm->OutStreamer->EmitLabel(EndLabel);
   }
 }
 
 void DwarfDebug::emitDebugPubTypes(bool GnuStyle) {
-  const MCSection *PSec =
-      GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
-               : Asm->getObjFileLowering().getDwarfPubTypesSection();
+  MCSection *PSec = GnuStyle
+                        ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
+                        : Asm->getObjFileLowering().getDwarfPubTypesSection();
 
   emitDebugPubSection(GnuStyle, PSec, "Types",
                       &DwarfCompileUnit::getGlobalTypes);
@@ -1657,86 +1461,44 @@ void DwarfDebug::emitDebugStr() {
   Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
 }
 
-/// Emits an optimal (=sorted) sequence of DW_OP_pieces.
-void DwarfDebug::emitLocPieces(ByteStreamer &Streamer,
-                               const DITypeIdentifierMap &Map,
-                               ArrayRef<DebugLocEntry::Value> Values) {
-  assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
-        return P.isVariablePiece();
-      }) && "all values are expected to be pieces");
-  assert(std::is_sorted(Values.begin(), Values.end()) &&
-         "pieces are expected to be sorted");
-
-  unsigned Offset = 0;
-  for (auto Piece : Values) {
-    const unsigned SizeOfByte = 8;
-    DIExpression Expr = Piece.getExpression();
-    unsigned PieceOffset = Expr.getPieceOffset();
-    unsigned PieceSize = Expr.getPieceSize();
-    assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
-    if (Offset < PieceOffset) {
-      // The DWARF spec seriously mandates pieces with no locations for gaps.
-      Asm->EmitDwarfOpPiece(Streamer, (PieceOffset-Offset)*SizeOfByte);
-      Offset += PieceOffset-Offset;
-    }
-    Offset += PieceSize;
-
-#ifndef NDEBUG
-    DIVariable Var = Piece.getVariable();
-    assert(!Var.isIndirect() && "indirect address for piece");
-    unsigned VarSize = Var.getSizeInBits(Map);
-    assert(PieceSize+PieceOffset <= VarSize/SizeOfByte
-           && "piece is larger than or outside of variable");
-    assert(PieceSize*SizeOfByte != VarSize
-           && "piece covers entire variable");
-#endif
-
-    emitDebugLocValue(Streamer, Piece, PieceOffset*SizeOfByte);
-  }
-}
-
-
 void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
-                                   const DebugLocEntry &Entry) {
-  const DebugLocEntry::Value Value = Entry.getValues()[0];
-  if (Value.isVariablePiece())
-    // Emit all pieces that belong to the same variable and range.
-    return emitLocPieces(Streamer, TypeIdentifierMap, Entry.getValues());
-
-  assert(Entry.getValues().size() == 1 && "only pieces may have >1 value");
-  emitDebugLocValue(Streamer, Value);
-}
-
-void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer,
-                                   const DebugLocEntry::Value &Value,
-                                   unsigned PieceOffsetInBits) {
-  DIVariable DV = Value.getVariable();
-  DebugLocDwarfExpression DwarfExpr(*Asm, Streamer);
-
+                                   const DebugLocStream::Entry &Entry) {
+  auto &&Comments = DebugLocs.getComments(Entry);
+  auto Comment = Comments.begin();
+  auto End = Comments.end();
+  for (uint8_t Byte : DebugLocs.getBytes(Entry))
+    Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : "");
+}
+
+static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
+                              ByteStreamer &Streamer,
+                              const DebugLocEntry::Value &Value,
+                              unsigned PieceOffsetInBits) {
+  DebugLocDwarfExpression DwarfExpr(*AP.MF->getSubtarget().getRegisterInfo(),
+                                    AP.getDwarfDebug()->getDwarfVersion(),
+                                    Streamer);
   // Regular entry.
   if (Value.isInt()) {
-    DIBasicType BTy(resolve(DV.getType()));
-    if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed ||
-                         BTy.getEncoding() == dwarf::DW_ATE_signed_char))
+    if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
+               BT->getEncoding() == dwarf::DW_ATE_signed_char))
       DwarfExpr.AddSignedConstant(Value.getInt());
     else
       DwarfExpr.AddUnsignedConstant(Value.getInt());
   } else if (Value.isLocation()) {
     MachineLocation Loc = Value.getLoc();
-    DIExpression Expr = Value.getExpression();
-    if (!Expr || (Expr.getNumElements() == 0))
+    const DIExpression *Expr = Value.getExpression();
+    if (!Expr || !Expr->getNumElements())
       // Regular entry.
-      Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect());
+      AP.EmitDwarfRegOp(Streamer, Loc);
     else {
       // Complex address entry.
       if (Loc.getOffset()) {
         DwarfExpr.AddMachineRegIndirect(Loc.getReg(), Loc.getOffset());
-        DwarfExpr.AddExpression(Expr, PieceOffsetInBits);
+        DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(),
+                                PieceOffsetInBits);
       } else
         DwarfExpr.AddMachineRegExpression(Expr, Loc.getReg(),
                                           PieceOffsetInBits);
-      if (DV.isIndirect())
-        DwarfExpr.EmitOp(dwarf::DW_OP_deref);
     }
   }
   // else ... ignore constant fp. There is not any good way to
@@ -1744,61 +1506,95 @@ void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer,
   // FIXME: ^
 }
 
-void DwarfDebug::emitDebugLocEntryLocation(const DebugLocEntry &Entry) {
-  Asm->OutStreamer.AddComment("Loc expr size");
-  MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
-  MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
-  Asm->EmitLabelDifference(end, begin, 2);
-  Asm->OutStreamer.EmitLabel(begin);
+void DebugLocEntry::finalize(const AsmPrinter &AP, DebugLocStream &Locs,
+                             const DIBasicType *BT) {
+  Locs.startEntry(Begin, End);
+  BufferByteStreamer Streamer = Locs.getStreamer();
+  const DebugLocEntry::Value &Value = Values[0];
+  if (Value.isBitPiece()) {
+    // Emit all pieces that belong to the same variable and range.
+    assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
+          return P.isBitPiece();
+        }) && "all values are expected to be pieces");
+    assert(std::is_sorted(Values.begin(), Values.end()) &&
+           "pieces are expected to be sorted");
+   
+    unsigned Offset = 0;
+    for (auto Piece : Values) {
+      const DIExpression *Expr = Piece.getExpression();
+      unsigned PieceOffset = Expr->getBitPieceOffset();
+      unsigned PieceSize = Expr->getBitPieceSize();
+      assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
+      if (Offset < PieceOffset) {
+        // The DWARF spec seriously mandates pieces with no locations for gaps.
+        DebugLocDwarfExpression Expr(*AP.MF->getSubtarget().getRegisterInfo(),
+                                     AP.getDwarfDebug()->getDwarfVersion(),
+                                     Streamer);
+        Expr.AddOpPiece(PieceOffset-Offset, 0);
+        Offset += PieceOffset-Offset;
+      }
+      Offset += PieceSize;
+
+      emitDebugLocValue(AP, BT, Streamer, Piece, PieceOffset);
+    }
+  } else {
+    assert(Values.size() == 1 && "only pieces may have >1 value");
+    emitDebugLocValue(AP, BT, Streamer, Value, 0);
+  }
+}
+
+void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
+  // Emit the size.
+  Asm->OutStreamer->AddComment("Loc expr size");
+  Asm->EmitInt16(DebugLocs.getBytes(Entry).size());
+
   // Emit the entry.
   APByteStreamer Streamer(*Asm);
   emitDebugLocEntry(Streamer, Entry);
-  // Close the range.
-  Asm->OutStreamer.EmitLabel(end);
 }
 
 // Emit locations into the debug loc section.
 void DwarfDebug::emitDebugLoc() {
   // Start the dwarf loc section.
-  Asm->OutStreamer.SwitchSection(
+  Asm->OutStreamer->SwitchSection(
       Asm->getObjFileLowering().getDwarfLocSection());
   unsigned char Size = Asm->getDataLayout().getPointerSize();
-  for (const auto &DebugLoc : DotDebugLocEntries) {
-    Asm->OutStreamer.EmitLabel(DebugLoc.Label);
-    const DwarfCompileUnit *CU = DebugLoc.CU;
-    for (const auto &Entry : DebugLoc.List) {
+  for (const auto &List : DebugLocs.getLists()) {
+    Asm->OutStreamer->EmitLabel(List.Label);
+    const DwarfCompileUnit *CU = List.CU;
+    for (const auto &Entry : DebugLocs.getEntries(List)) {
       // Set up the range. This range is relative to the entry point of the
       // compile unit. This is a hard coded 0 for low_pc when we're emitting
       // ranges, or the DW_AT_low_pc on the compile unit otherwise.
       if (auto *Base = CU->getBaseAddress()) {
-        Asm->EmitLabelDifference(Entry.getBeginSym(), Base, Size);
-        Asm->EmitLabelDifference(Entry.getEndSym(), Base, Size);
+        Asm->EmitLabelDifference(Entry.BeginSym, Base, Size);
+        Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
       } else {
-        Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size);
-        Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size);
+        Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size);
+        Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size);
       }
 
       emitDebugLocEntryLocation(Entry);
     }
-    Asm->OutStreamer.EmitIntValue(0, Size);
-    Asm->OutStreamer.EmitIntValue(0, Size);
+    Asm->OutStreamer->EmitIntValue(0, Size);
+    Asm->OutStreamer->EmitIntValue(0, Size);
   }
 }
 
 void DwarfDebug::emitDebugLocDWO() {
-  Asm->OutStreamer.SwitchSection(
+  Asm->OutStreamer->SwitchSection(
       Asm->getObjFileLowering().getDwarfLocDWOSection());
-  for (const auto &DebugLoc : DotDebugLocEntries) {
-    Asm->OutStreamer.EmitLabel(DebugLoc.Label);
-    for (const auto &Entry : DebugLoc.List) {
+  for (const auto &List : DebugLocs.getLists()) {
+    Asm->OutStreamer->EmitLabel(List.Label);
+    for (const auto &Entry : DebugLocs.getEntries(List)) {
       // Just always use start_length for now - at least that's one address
       // rather than two. We could get fancier and try to, say, reuse an
       // address we know we've emitted elsewhere (the start of the function?
       // The start of the CU or CU subrange that encloses this range?)
       Asm->EmitInt8(dwarf::DW_LLE_start_length_entry);
-      unsigned idx = AddrPool.getIndex(Entry.getBeginSym());
+      unsigned idx = AddrPool.getIndex(Entry.BeginSym);
       Asm->EmitULEB128(idx);
-      Asm->EmitLabelDifference(Entry.getEndSym(), Entry.getBeginSym(), 4);
+      Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
 
       emitDebugLocEntryLocation(Entry);
     }
@@ -1813,36 +1609,62 @@ struct ArangeSpan {
 // Emit a debug aranges section, containing a CU lookup for any
 // address we can tie back to a CU.
 void DwarfDebug::emitDebugARanges() {
-  // Start the dwarf aranges section.
-  Asm->OutStreamer.SwitchSection(
-      Asm->getObjFileLowering().getDwarfARangesSection());
+  // Provides a unique id per text section.
+  MapVector<MCSection *, SmallVector<SymbolCU, 8>> SectionMap;
 
-  typedef DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> SpansType;
+  // Filter labels by section.
+  for (const SymbolCU &SCU : ArangeLabels) {
+    if (SCU.Sym->isInSection()) {
+      // Make a note of this symbol and it's section.
+      MCSection *Section = &SCU.Sym->getSection();
+      if (!Section->getKind().isMetadata())
+        SectionMap[Section].push_back(SCU);
+    } else {
+      // Some symbols (e.g. common/bss on mach-o) can have no section but still
+      // appear in the output. This sucks as we rely on sections to build
+      // arange spans. We can do it without, but it's icky.
+      SectionMap[nullptr].push_back(SCU);
+    }
+  }
+
+  // Add terminating symbols for each section.
+  for (const auto &I : SectionMap) {
+    MCSection *Section = I.first;
+    MCSymbol *Sym = nullptr;
 
-  SpansType Spans;
+    if (Section)
+      Sym = Asm->OutStreamer->endSection(Section);
 
-  // Build a list of sections used.
-  std::vector<const MCSection *> Sections;
-  for (const auto &it : SectionMap) {
-    const MCSection *Section = it.first;
-    Sections.push_back(Section);
+    // Insert a final terminator.
+    SectionMap[Section].push_back(SymbolCU(nullptr, Sym));
   }
 
-  // Sort the sections into order.
-  // This is only done to ensure consistent output order across different runs.
-  std::sort(Sections.begin(), Sections.end(), SectionSort);
+  DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans;
 
-  // Build a set of address spans, sorted by CU.
-  for (const MCSection *Section : Sections) {
-    SmallVector<SymbolCU, 8> &List = SectionMap[Section];
+  for (auto &I : SectionMap) {
+    const MCSection *Section = I.first;
+    SmallVector<SymbolCU, 8> &List = I.second;
     if (List.size() < 2)
       continue;
 
+    // If we have no section (e.g. common), just write out
+    // individual spans for each symbol.
+    if (!Section) {
+      for (const SymbolCU &Cur : List) {
+        ArangeSpan Span;
+        Span.Start = Cur.Sym;
+        Span.End = nullptr;
+        if (Cur.CU)
+          Spans[Cur.CU].push_back(Span);
+      }
+      continue;
+    }
+
     // Sort the symbols by offset within the section.
     std::sort(List.begin(), List.end(),
               [&](const SymbolCU &A, const SymbolCU &B) {
-      unsigned IA = A.Sym ? Asm->OutStreamer.GetSymbolOrder(A.Sym) : 0;
-      unsigned IB = B.Sym ? Asm->OutStreamer.GetSymbolOrder(B.Sym) : 0;
+      unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
+      unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
 
       // Symbols with no order assigned should be placed at the end.
       // (e.g. section end labels)
@@ -1853,35 +1675,27 @@ void DwarfDebug::emitDebugARanges() {
       return IA < IB;
     });
 
-    // If we have no section (e.g. common), just write out
-    // individual spans for each symbol.
-    if (!Section) {
-      for (const SymbolCU &Cur : List) {
+    // Build spans between each label.
+    const MCSymbol *StartSym = List[0].Sym;
+    for (size_t n = 1, e = List.size(); n < e; n++) {
+      const SymbolCU &Prev = List[n - 1];
+      const SymbolCU &Cur = List[n];
+
+      // Try and build the longest span we can within the same CU.
+      if (Cur.CU != Prev.CU) {
         ArangeSpan Span;
-        Span.Start = Cur.Sym;
-        Span.End = nullptr;
-        if (Cur.CU)
-          Spans[Cur.CU].push_back(Span);
-      }
-    } else {
-      // Build spans between each label.
-      const MCSymbol *StartSym = List[0].Sym;
-      for (size_t n = 1, e = List.size(); n < e; n++) {
-        const SymbolCU &Prev = List[n - 1];
-        const SymbolCU &Cur = List[n];
-
-        // Try and build the longest span we can within the same CU.
-        if (Cur.CU != Prev.CU) {
-          ArangeSpan Span;
-          Span.Start = StartSym;
-          Span.End = Cur.Sym;
-          Spans[Prev.CU].push_back(Span);
-          StartSym = Cur.Sym;
-        }
+        Span.Start = StartSym;
+        Span.End = Cur.Sym;
+        Spans[Prev.CU].push_back(Span);
+        StartSym = Cur.Sym;
       }
     }
   }
 
+  // Start the dwarf aranges section.
+  Asm->OutStreamer->SwitchSection(
+      Asm->getObjFileLowering().getDwarfARangesSection());
+
   unsigned PtrSize = Asm->getDataLayout().getPointerSize();
 
   // Build a list of CUs used.
@@ -1921,18 +1735,18 @@ void DwarfDebug::emitDebugARanges() {
     ContentSize += (List.size() + 1) * TupleSize;
 
     // For each compile unit, write the list of spans it covers.
-    Asm->OutStreamer.AddComment("Length of ARange Set");
+    Asm->OutStreamer->AddComment("Length of ARange Set");
     Asm->EmitInt32(ContentSize);
-    Asm->OutStreamer.AddComment("DWARF Arange version number");
+    Asm->OutStreamer->AddComment("DWARF Arange version number");
     Asm->EmitInt16(dwarf::DW_ARANGES_VERSION);
-    Asm->OutStreamer.AddComment("Offset Into Debug Info Section");
-    Asm->EmitSectionOffset(CU->getLabelBegin(), CU->getSectionSym());
-    Asm->OutStreamer.AddComment("Address Size (in bytes)");
+    Asm->OutStreamer->AddComment("Offset Into Debug Info Section");
+    Asm->emitSectionOffset(CU->getLabelBegin());
+    Asm->OutStreamer->AddComment("Address Size (in bytes)");
     Asm->EmitInt8(PtrSize);
-    Asm->OutStreamer.AddComment("Segment Size (in bytes)");
+    Asm->OutStreamer->AddComment("Segment Size (in bytes)");
     Asm->EmitInt8(0);
 
-    Asm->OutStreamer.EmitFill(Padding, 0xff);
+    Asm->OutStreamer->EmitFill(Padding, 0xff);
 
     for (const ArangeSpan &Span : List) {
       Asm->EmitLabelReference(Span.Start, PtrSize);
@@ -1947,20 +1761,20 @@ void DwarfDebug::emitDebugARanges() {
         if (Size == 0)
           Size = 1;
 
-        Asm->OutStreamer.EmitIntValue(Size, PtrSize);
+        Asm->OutStreamer->EmitIntValue(Size, PtrSize);
       }
     }
 
-    Asm->OutStreamer.AddComment("ARange terminator");
-    Asm->OutStreamer.EmitIntValue(0, PtrSize);
-    Asm->OutStreamer.EmitIntValue(0, PtrSize);
+    Asm->OutStreamer->AddComment("ARange terminator");
+    Asm->OutStreamer->EmitIntValue(0, PtrSize);
+    Asm->OutStreamer->EmitIntValue(0, PtrSize);
   }
 }
 
 // Emit visible names into a debug ranges section.
 void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
-  Asm->OutStreamer.SwitchSection(
+  Asm->OutStreamer->SwitchSection(
       Asm->getObjFileLowering().getDwarfRangesSection());
 
   // Size for our labels.
@@ -1976,7 +1790,7 @@ void DwarfDebug::emitDebugRanges() {
     // Iterate over the misc ranges for the compile units in the module.
     for (const RangeSpanList &List : TheCU->getRangeLists()) {
       // Emit our symbol so we can find the beginning of the range.
-      Asm->OutStreamer.EmitLabel(List.getSym());
+      Asm->OutStreamer->EmitLabel(List.getSym());
 
       for (const RangeSpan &Range : List.getRanges()) {
         const MCSymbol *Begin = Range.getStart();
@@ -1987,14 +1801,14 @@ void DwarfDebug::emitDebugRanges() {
           Asm->EmitLabelDifference(Begin, Base, Size);
           Asm->EmitLabelDifference(End, Base, Size);
         } else {
-          Asm->OutStreamer.EmitSymbolValue(Begin, Size);
-          Asm->OutStreamer.EmitSymbolValue(End, Size);
+          Asm->OutStreamer->EmitSymbolValue(Begin, Size);
+          Asm->OutStreamer->EmitSymbolValue(End, Size);
         }
       }
 
       // And terminate the list with two 0 values.
-      Asm->OutStreamer.EmitIntValue(0, Size);
-      Asm->OutStreamer.EmitIntValue(0, Size);
+      Asm->OutStreamer->EmitIntValue(0, Size);
+      Asm->OutStreamer->EmitIntValue(0, Size);
     }
   }
 }
@@ -2004,7 +1818,7 @@ void DwarfDebug::emitDebugRanges() {
 void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
                                   std::unique_ptr<DwarfUnit> NewU) {
   NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
-                  U.getCUNode().getSplitDebugFilename());
+                  U.getCUNode()->getSplitDebugFilename());
 
   if (!CompilationDir.empty())
     NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
@@ -2022,10 +1836,9 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
   auto OwnedUnit = make_unique<DwarfCompileUnit>(
       CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
   DwarfCompileUnit &NewCU = *OwnedUnit;
-  NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
-                    DwarfInfoSectionSym);
+  NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
 
-  NewCU.initStmtList(DwarfLineSectionSym);
+  NewCU.initStmtList();
 
   initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit));
 
@@ -2036,9 +1849,8 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
 // compile units that would normally be in debug_info.
 void DwarfDebug::emitDebugInfoDWO() {
   assert(useSplitDwarf() && "No split dwarf debug info?");
-  // Don't pass an abbrev symbol, using a constant zero instead so as not to
-  // emit relocations into the dwo file.
-  InfoHolder.emitUnits(/* AbbrevSymbol */ nullptr);
+  // Don't emit relocations into the dwo file.
+  InfoHolder.emitUnits(/* UseOffsets */ true);
 }
 
 // Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
@@ -2050,9 +1862,9 @@ void DwarfDebug::emitDebugAbbrevDWO() {
 
 void DwarfDebug::emitDebugLineDWO() {
   assert(useSplitDwarf() && "No split dwarf?");
-  Asm->OutStreamer.SwitchSection(
+  Asm->OutStreamer->SwitchSection(
       Asm->getObjFileLowering().getDwarfLineDWOSection());
-  SplitTypeUnitFileTable.Emit(Asm->OutStreamer);
+  SplitTypeUnitFileTable.Emit(*Asm->OutStreamer);
 }
 
 // Emit the .debug_str.dwo section for separated dwarf. This contains the
@@ -2060,8 +1872,7 @@ void DwarfDebug::emitDebugLineDWO() {
 // sections.
 void DwarfDebug::emitDebugStrDWO() {
   assert(useSplitDwarf() && "No split dwarf?");
-  const MCSection *OffSec =
-      Asm->getObjFileLowering().getDwarfStrOffDWOSection();
+  MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection();
   InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
                          OffSec);
 }
@@ -2070,7 +1881,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
   if (!useSplitDwarf())
     return nullptr;
   if (SingleCU)
-    SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode().getDirectory());
+    SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode()->getDirectory());
   return &SplitTypeUnitFileTable;
 }
 
@@ -2082,12 +1893,12 @@ static uint64_t makeTypeSignature(StringRef Identifier) {
   // appropriately.
   MD5::MD5Result Result;
   Hash.final(Result);
-  return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+  return support::endian::read64le(Result + 8);
 }
 
 void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
                                       StringRef Identifier, DIE &RefDie,
-                                      DICompositeType CTy) {
+                                      const DICompositeType *CTy) {
   // Fast path if we're building some type units and one has already used the
   // address pool we know we're going to throw away all this work anyway, so
   // don't bother building dependent types.
@@ -2146,7 +1957,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
       // This is inefficient because all the dependent types will be rebuilt
       // from scratch, including building them in type units, discovering that
       // they depend on addresses, throwing them out and rebuilding them.
-      CU.constructTypeDIE(RefDie, CTy);
+      CU.constructTypeDIE(RefDie, cast<DICompositeType>(CTy));
       return;
     }
 
@@ -2165,27 +1976,23 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
 void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) {
   if (!useDwarfAccelTables())
     return;
-  AccelNames.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name),
-                     &Die);
+  AccelNames.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
 }
 
 void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) {
   if (!useDwarfAccelTables())
     return;
-  AccelObjC.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name),
-                    &Die);
+  AccelObjC.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
 }
 
 void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) {
   if (!useDwarfAccelTables())
     return;
-  AccelNamespace.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name),
-                         &Die);
+  AccelNamespace.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
 }
 
 void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) {
   if (!useDwarfAccelTables())
     return;
-  AccelTypes.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name),
-                     &Die);
+  AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index a1a9426..700f736 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -16,11 +16,11 @@
 
 #include "AsmPrinterHandler.h"
 #include "DbgValueHistoryCalculator.h"
-#include "DebugLocEntry.h"
-#include "DebugLocList.h"
+#include "DebugLocStream.h"
 #include "DwarfAccelTable.h"
 #include "DwarfFile.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -41,6 +41,7 @@ class AsmPrinter;
 class ByteStreamer;
 class ConstantInt;
 class ConstantFP;
+class DebugLocEntry;
 class DwarfCompileUnit;
 class DwarfDebug;
 class DwarfTypeUnit;
@@ -67,81 +68,111 @@ public:
 
 //===----------------------------------------------------------------------===//
 /// \brief This class is used to track local variable information.
+///
+/// - Variables whose location changes over time have a DebugLocListIndex and
+///   the other fields are not used.
+///
+/// - Variables that are described by multiple MMI table entries have multiple
+///   expressions and frame indices.
 class DbgVariable {
-  DIVariable Var;             // Variable Descriptor.
-  DIExpression Expr;          // Complex address location expression.
-  DIE *TheDIE;                // Variable DIE.
-  unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
-  const MachineInstr *MInsn;  // DBG_VALUE instruction of the variable.
-  int FrameIndex;
+  const DILocalVariable *Var; /// Variable Descriptor.
+  const DILocation *IA;       /// Inlined at location.
+  SmallVector<const DIExpression *, 1>
+      Expr;                          /// Complex address location expression.
+  DIE *TheDIE;                /// Variable DIE.
+  unsigned DebugLocListIndex;        /// Offset in DebugLocs.
+  const MachineInstr *MInsn;  /// DBG_VALUE instruction of the variable.
+  SmallVector<int, 1> FrameIndex; /// Frame index of the variable.
   DwarfDebug *DD;
 
 public:
-  /// Construct a DbgVariable from a DIVariable.
-  DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD)
-      : Var(V), Expr(E), TheDIE(nullptr), DotDebugLocOffset(~0U),
-        MInsn(nullptr), FrameIndex(~0), DD(DD) {
-    assert(Var.Verify() && Expr.Verify());
+  /// Construct a DbgVariable from a variable.
+  DbgVariable(const DILocalVariable *V, const DILocation *IA,
+              const DIExpression *E, DwarfDebug *DD, int FI = ~0)
+      : Var(V), IA(IA), Expr(1, E), TheDIE(nullptr), DebugLocListIndex(~0U),
+        MInsn(nullptr), DD(DD) {
+    FrameIndex.push_back(FI);
+    assert(!E || E->isValid());
   }
 
   /// Construct a DbgVariable from a DEBUG_VALUE.
   /// AbstractVar may be NULL.
   DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD)
-      : Var(DbgValue->getDebugVariable()), Expr(DbgValue->getDebugExpression()),
-        TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(DbgValue),
-        FrameIndex(~0), DD(DD) {}
+      : Var(DbgValue->getDebugVariable()),
+        IA(DbgValue->getDebugLoc()->getInlinedAt()),
+        Expr(1, DbgValue->getDebugExpression()), TheDIE(nullptr),
+        DebugLocListIndex(~0U), MInsn(DbgValue), DD(DD) {
+    FrameIndex.push_back(~0);
+  }
 
   // Accessors.
-  DIVariable getVariable() const { return Var; }
-  DIExpression getExpression() const { return Expr; }
+  const DILocalVariable *getVariable() const { return Var; }
+  const DILocation *getInlinedAt() const { return IA; }
+  const ArrayRef<const DIExpression *> getExpression() const { return Expr; }
   void setDIE(DIE &D) { TheDIE = &D; }
   DIE *getDIE() const { return TheDIE; }
-  void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
-  unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
-  StringRef getName() const { return Var.getName(); }
+  void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
+  unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
+  StringRef getName() const { return Var->getName(); }
   const MachineInstr *getMInsn() const { return MInsn; }
-  int getFrameIndex() const { return FrameIndex; }
-  void setFrameIndex(int FI) { FrameIndex = FI; }
+  const ArrayRef<int> getFrameIndex() const { return FrameIndex; }
+
+  void addMMIEntry(const DbgVariable &V) {
+    assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
+    assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry");
+    assert(V.Var == Var && "conflicting variable");
+    assert(V.IA == IA && "conflicting inlined-at location");
+
+    if (V.getFrameIndex().back() != ~0) {
+      auto E = V.getExpression();
+      auto FI = V.getFrameIndex();
+      Expr.append(E.begin(), E.end());
+      FrameIndex.append(FI.begin(), FI.end());
+    }
+    assert(Expr.size() > 1 ? std::all_of(Expr.begin(), Expr.end(),
+                                         [](const DIExpression *E) {
+                                           return E->isBitPiece();
+                                         })
+                           : (true && "conflicting locations for variable"));
+  }
+
   // Translate tag to proper Dwarf tag.
   dwarf::Tag getTag() const {
-    if (Var.getTag() == dwarf::DW_TAG_arg_variable)
+    if (Var->getTag() == dwarf::DW_TAG_arg_variable)
       return dwarf::DW_TAG_formal_parameter;
 
     return dwarf::DW_TAG_variable;
   }
   /// \brief Return true if DbgVariable is artificial.
   bool isArtificial() const {
-    if (Var.isArtificial())
+    if (Var->isArtificial())
       return true;
-    if (getType().isArtificial())
+    if (getType()->isArtificial())
       return true;
     return false;
   }
 
   bool isObjectPointer() const {
-    if (Var.isObjectPointer())
+    if (Var->isObjectPointer())
       return true;
-    if (getType().isObjectPointer())
+    if (getType()->isObjectPointer())
       return true;
     return false;
   }
 
   bool variableHasComplexAddress() const {
-    assert(Var.isVariable() && "Invalid complex DbgVariable!");
-    return Expr.getNumElements() > 0;
+    assert(Var && "Invalid complex DbgVariable!");
+    assert(Expr.size() == 1 &&
+           "variableHasComplexAddress() invoked on multi-FI variable");
+    return Expr.back()->getNumElements() > 0;
   }
   bool isBlockByrefVariable() const;
-  unsigned getNumAddrElements() const {
-    assert(Var.isVariable() && "Invalid complex DbgVariable!");
-    return Expr.getNumElements();
-  }
-  uint64_t getAddrElement(unsigned i) const { return Expr.getElement(i); }
-  DIType getType() const;
+  const DIType *getType() const;
 
 private:
   /// resolve - Look in the DwarfDebug map for the MDNode that
   /// corresponds to the reference.
-  template <typename T> T resolve(DIRef<T> Ref) const;
+  template <typename T> T *resolve(TypedDINodeRef<T> Ref) const;
 };
 
 
@@ -178,10 +209,6 @@ class DwarfDebug : public AsmPrinterHandler {
   // Size of each symbol emitted (for those symbols that have a specific size).
   DenseMap<const MCSymbol *, uint64_t> SymSize;
 
-  // Provides a unique id per text section.
-  typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType;
-  SectionMapType SectionMap;
-
   LexicalScopes LScopes;
 
   // Collection of abstract variables.
@@ -190,7 +217,7 @@ class DwarfDebug : public AsmPrinterHandler {
 
   // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
   // can refer to them in spite of insertions into this list.
-  SmallVector<DebugLocList, 4> DotDebugLocEntries;
+  DebugLocStream DebugLocs;
 
   // This is a collection of subprogram MDNodes that are processed to
   // create DIEs.
@@ -224,25 +251,10 @@ class DwarfDebug : public AsmPrinterHandler {
   // If nonnull, stores the CU in which the previous subprogram was contained.
   const DwarfCompileUnit *PrevCU;
 
-  // Section Symbols: these are assembler temporary labels that are emitted at
-  // the beginning of each supported dwarf section.  These are used to form
-  // section offsets and are created by EmitSectionLabels.
-  MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
-  MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
-  MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym;
-  MCSymbol *FunctionBeginSym, *FunctionEndSym;
-  MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym;
-  MCSymbol *DwarfTypesDWOSectionSym;
-  MCSymbol *DwarfStrDWOSectionSym;
-  MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym;
-
   // As an optimization, there is no need to emit an entry in the directory
   // table for the same directory as DW_AT_comp_dir.
   StringRef CompilationDir;
 
-  // Counter for assigning globally unique IDs for ranges.
-  unsigned GlobalRangeCount;
-
   // Holder for the file specific debug information.
   DwarfFile InfoHolder;
 
@@ -258,7 +270,9 @@ class DwarfDebug : public AsmPrinterHandler {
   // them.
   DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits;
 
-  SmallVector<std::pair<std::unique_ptr<DwarfTypeUnit>, DICompositeType>, 1> TypeUnitsUnderConstruction;
+  SmallVector<
+      std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
+      TypeUnitsUnderConstruction;
 
   // Whether to emit the pubnames/pubtypes sections.
   bool HasDwarfPubSections;
@@ -270,6 +284,9 @@ class DwarfDebug : public AsmPrinterHandler {
   // text.
   bool UsedNonDefaultText;
 
+  // Whether to use the GNU TLS opcode (instead of the standard opcode).
+  bool UseGNUTLSOpcode;
+
   // Version of dwarf we're emitting.
   unsigned DwarfVersion;
 
@@ -298,6 +315,7 @@ class DwarfDebug : public AsmPrinterHandler {
   // True iff there are multiple CUs in this module.
   bool SingleCU;
   bool IsDarwin;
+  bool IsPS4;
 
   AddressPool AddrPool;
 
@@ -306,7 +324,7 @@ class DwarfDebug : public AsmPrinterHandler {
   DwarfAccelTable AccelNamespace;
   DwarfAccelTable AccelTypes;
 
-  DenseMap<const Function *, DISubprogram> FunctionDIs;
+  DenseMap<const Function *, DISubprogram *> FunctionDIs;
 
   MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
 
@@ -314,22 +332,21 @@ class DwarfDebug : public AsmPrinterHandler {
     return InfoHolder.getUnits();
   }
 
+  typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+
   /// \brief Find abstract variable associated with Var.
-  DbgVariable *getExistingAbstractVariable(const DIVariable &DV,
-                                           DIVariable &Cleansed);
-  DbgVariable *getExistingAbstractVariable(const DIVariable &DV);
-  void createAbstractVariable(const DIVariable &DV, LexicalScope *Scope);
-  void ensureAbstractVariableIsCreated(const DIVariable &Var,
+  DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
+                                           const DILocalVariable *&Cleansed);
+  DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
+  void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope);
+  void ensureAbstractVariableIsCreated(InlinedVariable Var,
                                        const MDNode *Scope);
-  void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var,
+  void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var,
                                                const MDNode *Scope);
 
   /// \brief Construct a DIE for this abstract scope.
   void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
 
-  /// \brief Emit initial Dwarf sections with a label at the start of each one.
-  void emitSectionLabels();
-
   /// \brief Compute the size and offset of a DIE given an incoming Offset.
   unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
 
@@ -347,23 +364,15 @@ class DwarfDebug : public AsmPrinterHandler {
   /// processed.
   void finalizeModuleInfo();
 
-  /// \brief Emit labels to close any remaining sections that have been left
-  /// open.
-  void endSections();
-
   /// \brief Emit the debug info section.
   void emitDebugInfo();
 
   /// \brief Emit the abbreviation section.
   void emitAbbreviations();
 
-  /// \brief Emit the last address of the section and the end of
-  /// the line matrix.
-  void emitEndOfLineMatrix(unsigned SectionEnd);
-
   /// \brief Emit a specified accelerator table.
-  void emitAccel(DwarfAccelTable &Accel, const MCSection *Section,
-                 StringRef TableName, StringRef SymName);
+  void emitAccel(DwarfAccelTable &Accel, MCSection *Section,
+                 StringRef TableName);
 
   /// \brief Emit visible names into a hashed accelerator table section.
   void emitAccelNames();
@@ -391,7 +400,7 @@ class DwarfDebug : public AsmPrinterHandler {
   void emitDebugPubTypes(bool GnuStyle = false);
 
   void emitDebugPubSection(
-      bool GnuStyle, const MCSection *PSec, StringRef Name,
+      bool GnuStyle, MCSection *PSec, StringRef Name,
       const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const);
 
   /// \brief Emit visible names into a debug str section.
@@ -444,11 +453,11 @@ class DwarfDebug : public AsmPrinterHandler {
 
   /// \brief Create new DwarfCompileUnit for the given metadata node with tag
   /// DW_TAG_compile_unit.
-  DwarfCompileUnit &constructDwarfCompileUnit(DICompileUnit DIUnit);
+  DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit);
 
   /// \brief Construct imported_module or imported_declaration DIE.
   void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
-                                        const MDNode *N);
+                                        const DIImportedEntity *N);
 
   /// \brief Register a source line with debug info. Returns the unique
   /// label that was emitted and which provides correspondence to the
@@ -461,8 +470,8 @@ class DwarfDebug : public AsmPrinterHandler {
   void identifyScopeMarkers();
 
   /// \brief Populate LexicalScope entries with variables' info.
-  void collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
-                           SmallPtrSetImpl<const MDNode *> &ProcessedVars);
+  void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
+                           DenseSet<InlinedVariable> &ProcessedVars);
 
   /// \brief Build the location list for all DBG_VALUEs in the
   /// function that describe the same variable.
@@ -471,7 +480,7 @@ class DwarfDebug : public AsmPrinterHandler {
 
   /// \brief Collect variable information from the side table maintained
   /// by MMI.
-  void collectVariableInfoFromMMITable(SmallPtrSetImpl<const MDNode *> &P);
+  void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P);
 
   /// \brief Ensure that a label will be emitted before MI.
   void requestLabelBeforeInsn(const MachineInstr *MI) {
@@ -513,7 +522,7 @@ public:
   /// \brief Add a DIE to the set of types that we're going to pull into
   /// type units.
   void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
-                            DIE &Die, DICompositeType CTy);
+                            DIE &Die, const DICompositeType *CTy);
 
   /// \brief Add a label so that arange data can be generated for it.
   void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
@@ -524,8 +533,9 @@ public:
     SymSize[Sym] = Size;
   }
 
-  /// \brief Recursively Emits a debug information entry.
-  void emitDIE(DIE &Die);
+  /// \brief Returns whether to use DW_OP_GNU_push_tls_address, instead of the
+  /// standard DW_OP_form_tls_address opcode
+  bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }
 
   // Experimental DWARF5 features.
 
@@ -540,42 +550,23 @@ public:
   /// Returns the Dwarf Version.
   unsigned getDwarfVersion() const { return DwarfVersion; }
 
-  /// Returns the section symbol for the .debug_loc section.
-  MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; }
-
-  /// Returns the section symbol for the .debug_str section.
-  MCSymbol *getDebugStrSym() const { return DwarfStrSectionSym; }
-
-  /// Returns the section symbol for the .debug_ranges section.
-  MCSymbol *getRangeSectionSym() const { return DwarfDebugRangeSectionSym; }
-
   /// Returns the previous CU that was being updated
   const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
   void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
 
   /// Returns the entries for the .debug_loc section.
-  const SmallVectorImpl<DebugLocList> &
-  getDebugLocEntries() const {
-    return DotDebugLocEntries;
-  }
+  const DebugLocStream &getDebugLocs() const { return DebugLocs; }
 
   /// \brief Emit an entry for the debug loc section. This can be used to
   /// handle an entry that's going to be emitted into the debug loc section.
-  void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry);
-  /// \brief emit a single value for the debug loc section.
-  void emitDebugLocValue(ByteStreamer &Streamer,
-                         const DebugLocEntry::Value &Value,
-                         unsigned PieceOffsetInBits = 0);
-  /// Emits an optimal (=sorted) sequence of DW_OP_pieces.
-  void emitLocPieces(ByteStreamer &Streamer,
-                     const DITypeIdentifierMap &Map,
-                     ArrayRef<DebugLocEntry::Value> Values);
+  void emitDebugLocEntry(ByteStreamer &Streamer,
+                         const DebugLocStream::Entry &Entry);
 
   /// Emit the location for a debug loc entry, including the size header.
-  void emitDebugLocEntryLocation(const DebugLocEntry &Entry);
+  void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry);
 
   /// Find the MDNode for the given reference.
-  template <typename T> T resolve(DIRef<T> Ref) const {
+  template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
     return Ref.resolve(TypeIdentifierMap);
   }
 
@@ -592,7 +583,7 @@ public:
   /// or another context nested inside a subprogram.
   bool isSubprogramContext(const MDNode *Context);
 
-  void addSubprogramNames(DISubprogram SP, DIE &Die);
+  void addSubprogramNames(const DISubprogram *SP, DIE &Die);
 
   AddressPool &getAddressPool() { return AddrPool; }
 
@@ -605,8 +596,6 @@ public:
   void addAccelType(StringRef Name, const DIE &Die, char Flags);
 
   const MachineFunction *getCurrentFunction() const { return CurFn; }
-  const MCSymbol *getFunctionBeginSym() const { return FunctionBeginSym; }
-  const MCSymbol *getFunctionEndSym() const { return FunctionEndSym; }
 
   iterator_range<ImportedEntityMap::const_iterator>
   findImportedEntitiesForScope(const MDNode *Scope) const {
@@ -626,12 +615,6 @@ public:
   /// \brief Return Label immediately following the instruction.
   MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
 
-  // FIXME: Consider rolling ranges up into DwarfDebug since we use a single
-  // range_base anyway, so there's no need to keep them as separate per-CU range
-  // lists. (though one day we might end up with a range.dwo section, in which
-  // case it'd go to DwarfFile)
-  unsigned getNextRangeNumber() { return GlobalRangeCount++; }
-
   // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit.
 
   SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index e8867c0a..a4fd36f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -21,17 +21,24 @@ namespace llvm {
 class MachineFunction;
 class ARMTargetStreamer;
 
-class DwarfCFIException : public EHStreamer {
-  /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
-  /// should be emitted.
+class DwarfCFIExceptionBase : public EHStreamer {
+protected:
+  DwarfCFIExceptionBase(AsmPrinter *A);
+
+  /// Per-function flag to indicate if frame CFI info should be emitted.
+  bool shouldEmitCFI;
+
+  void markFunctionEnd() override;
+};
+
+class DwarfCFIException : public DwarfCFIExceptionBase {
+  /// Per-function flag to indicate if .cfi_personality should be emitted.
   bool shouldEmitPersonality;
 
-  /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda
-  /// should be emitted.
+  /// Per-function flag to indicate if .cfi_lsda should be emitted.
   bool shouldEmitLSDA;
 
-  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
-  /// should be emitted.
+  /// Per-function flag to indicate if frame moves info should be emitted.
   bool shouldEmitMoves;
 
   AsmPrinter::CFIMoveType moveTypeModule;
@@ -41,44 +48,38 @@ public:
   // Main entry points.
   //
   DwarfCFIException(AsmPrinter *A);
-  virtual ~DwarfCFIException();
+  ~DwarfCFIException() override;
 
-  /// endModule - Emit all exception information that should come after the
-  /// content.
+  /// Emit all exception information that should come after the content.
   void endModule() override;
 
-  /// beginFunction - Gather pre-function exception information.  Assumes being
-  /// emitted immediately after the function entry point.
+  /// Gather pre-function exception information.  Assumes being emitted
+  /// immediately after the function entry point.
   void beginFunction(const MachineFunction *MF) override;
 
-  /// endFunction - Gather and emit post-function exception information.
+  /// Gather and emit post-function exception information.
   void endFunction(const MachineFunction *) override;
 };
 
-class ARMException : public EHStreamer {
+class ARMException : public DwarfCFIExceptionBase {
   void emitTypeInfos(unsigned TTypeEncoding) override;
   ARMTargetStreamer &getTargetStreamer();
 
-  /// shouldEmitCFI - Per-function flag to indicate if frame CFI info
-  /// should be emitted.
-  bool shouldEmitCFI;
-
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
   ARMException(AsmPrinter *A);
-  virtual ~ARMException();
+  ~ARMException() override;
 
-  /// endModule - Emit all exception information that should come after the
-  /// content.
+  /// Emit all exception information that should come after the content.
   void endModule() override;
 
-  /// beginFunction - Gather pre-function exception information.  Assumes being
-  /// emitted immediately after the function entry point.
+  /// Gather pre-function exception information.  Assumes being emitted
+  /// immediately after the function entry point.
   void beginFunction(const MachineFunction *MF) override;
 
-  /// endFunction - Gather and emit post-function exception information.
+  /// Gather and emit post-function exception information.
   void endFunction(const MachineFunction *) override;
 };
 } // End of namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 8e85eff..a2799b8 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -22,14 +22,6 @@
 
 using namespace llvm;
 
-const TargetRegisterInfo *DwarfExpression::getTRI() const {
-  return AP.TM.getSubtargetImpl()->getRegisterInfo();
-}
-
-unsigned DwarfExpression::getDwarfVersion() const {
-  return AP.getDwarfDebug()->getDwarfVersion();
-}
-
 void DwarfExpression::AddReg(int DwarfReg, const char *Comment) {
   assert(DwarfReg >= 0 && "invalid negative dwarf register number");
   if (DwarfReg < 32) {
@@ -74,25 +66,28 @@ void DwarfExpression::AddShr(unsigned ShiftBy) {
 }
 
 bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) {
-  int DwarfReg = getTRI()->getDwarfRegNum(MachineReg, false);
-  if (DwarfReg < 0)
-    return false;
-
   if (isFrameRegister(MachineReg)) {
     // If variable offset is based in frame register then use fbreg.
     EmitOp(dwarf::DW_OP_fbreg);
     EmitSigned(Offset);
-  } else {
-    AddRegIndirect(DwarfReg, Offset);
+    return true;
   }
+
+  int DwarfReg = TRI.getDwarfRegNum(MachineReg, false);
+  if (DwarfReg < 0)
+    return false;
+
+  AddRegIndirect(DwarfReg, Offset);
   return true;
 }
 
 bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
                                          unsigned PieceSizeInBits,
                                          unsigned PieceOffsetInBits) {
-  const TargetRegisterInfo *TRI = getTRI();
-  int Reg = TRI->getDwarfRegNum(MachineReg, false);
+  if (!TRI.isPhysicalRegister(MachineReg))
+    return false;
+
+  int Reg = TRI.getDwarfRegNum(MachineReg, false);
 
   // If this is a valid register number, emit it.
   if (Reg >= 0) {
@@ -104,12 +99,12 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
 
   // Walk up the super-register chain until we find a valid number.
   // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0.
-  for (MCSuperRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) {
-    Reg = TRI->getDwarfRegNum(*SR, false);
+  for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
+    Reg = TRI.getDwarfRegNum(*SR, false);
     if (Reg >= 0) {
-      unsigned Idx = TRI->getSubRegIndex(*SR, MachineReg);
-      unsigned Size = TRI->getSubRegIdxSize(Idx);
-      unsigned RegOffset = TRI->getSubRegIdxOffset(Idx);
+      unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg);
+      unsigned Size = TRI.getSubRegIdxSize(Idx);
+      unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
       AddReg(Reg, "super-register");
       if (PieceOffsetInBits == RegOffset) {
         AddOpPiece(Size, RegOffset);
@@ -133,15 +128,15 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
   // efficient DW_OP_piece.
   unsigned CurPos = PieceOffsetInBits;
   // The size of the register in bits, assuming 8 bits per byte.
-  unsigned RegSize = TRI->getMinimalPhysRegClass(MachineReg)->getSize() * 8;
+  unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8;
   // Keep track of the bits in the register we already emitted, so we
   // can avoid emitting redundant aliasing subregs.
   SmallBitVector Coverage(RegSize, false);
-  for (MCSubRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) {
-    unsigned Idx = TRI->getSubRegIndex(MachineReg, *SR);
-    unsigned Size = TRI->getSubRegIdxSize(Idx);
-    unsigned Offset = TRI->getSubRegIdxOffset(Idx);
-    Reg = TRI->getDwarfRegNum(*SR, false);
+  for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
+    unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR);
+    unsigned Size = TRI.getSubRegIdxSize(Idx);
+    unsigned Offset = TRI.getSubRegIdxOffset(Idx);
+    Reg = TRI.getDwarfRegNum(*SR, false);
 
     // Intersection between the bits we already emitted and the bits
     // covered by this subregister.
@@ -177,7 +172,7 @@ void DwarfExpression::AddSignedConstant(int Value) {
   // value, so the producers and consumers started to rely on heuristics
   // to disambiguate the value vs. location status of the expression.
   // See PR21176 for more details.
-  if (getDwarfVersion() >= 4)
+  if (DwarfVersion >= 4)
     EmitOp(dwarf::DW_OP_stack_value);
 }
 
@@ -185,7 +180,7 @@ void DwarfExpression::AddUnsignedConstant(unsigned Value) {
   EmitOp(dwarf::DW_OP_constu);
   EmitUnsigned(Value);
   // cf. comment in DwarfExpression::AddSignedConstant().
-  if (getDwarfVersion() >= 4)
+  if (DwarfVersion >= 4)
     EmitOp(dwarf::DW_OP_stack_value);
 }
 
@@ -197,64 +192,74 @@ static unsigned getOffsetOrZero(unsigned OffsetInBits,
   return OffsetInBits;
 }
 
-bool DwarfExpression::AddMachineRegExpression(DIExpression Expr,
+bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr,
                                               unsigned MachineReg,
                                               unsigned PieceOffsetInBits) {
-  unsigned N = Expr.getNumElements();
-  unsigned I = 0;
-  bool ValidReg = false;
+  auto I = Expr->expr_op_begin();
+  auto E = Expr->expr_op_end();
+  if (I == E)
+    return AddMachineRegPiece(MachineReg);
+
   // Pattern-match combinations for which more efficient representations exist
   // first.
-  if (N >= 3 && Expr.getElement(0) == dwarf::DW_OP_piece) {
-    unsigned SizeOfByte = 8;
-    unsigned OffsetInBits = Expr.getElement(1) * SizeOfByte;
-    unsigned SizeInBits = Expr.getElement(2) * SizeOfByte;
-    ValidReg =
-        AddMachineRegPiece(MachineReg, SizeInBits,
-                           getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
-    I = 3;
-  } else if (N >= 3 && Expr.getElement(0) == dwarf::DW_OP_plus &&
-             Expr.getElement(2) == dwarf::DW_OP_deref) {
+  bool ValidReg = false;
+  switch (I->getOp()) {
+  case dwarf::DW_OP_bit_piece: {
+    unsigned OffsetInBits = I->getArg(0);
+    unsigned SizeInBits   = I->getArg(1);
+    // Piece always comes at the end of the expression.
+    return AddMachineRegPiece(MachineReg, SizeInBits,
+               getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
+  }
+  case dwarf::DW_OP_plus: {
     // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
-    unsigned Offset = Expr.getElement(1);
-    ValidReg = AddMachineRegIndirect(MachineReg, Offset);
-    I = 3;
-  } else if (N >= 1 && Expr.getElement(0) == dwarf::DW_OP_deref) {
-    // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
-    ValidReg = AddMachineRegIndirect(MachineReg);
-    I = 1;
-  } else
-    ValidReg = AddMachineRegPiece(MachineReg);
+    auto N = I.getNext();
+    if (N != E && N->getOp() == dwarf::DW_OP_deref) {
+      unsigned Offset = I->getArg(0);
+      ValidReg = AddMachineRegIndirect(MachineReg, Offset);
+      std::advance(I, 2);
+      break;
+    } else
+      ValidReg = AddMachineRegPiece(MachineReg);
+  }
+  case dwarf::DW_OP_deref: {
+      // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
+      ValidReg = AddMachineRegIndirect(MachineReg);
+      ++I;
+      break;
+  }
+  default:
+    llvm_unreachable("unsupported operand");
+  }
 
   if (!ValidReg)
     return false;
 
   // Emit remaining elements of the expression.
-  AddExpression(Expr, I);
+  AddExpression(I, E, PieceOffsetInBits);
   return true;
 }
 
-void DwarfExpression::AddExpression(DIExpression Expr, unsigned I,
+void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I,
+                                    DIExpression::expr_op_iterator E,
                                     unsigned PieceOffsetInBits) {
-  unsigned N = Expr.getNumElements();
-  for (; I < N; ++I) {
-    switch (Expr.getElement(I)) {
-    case dwarf::DW_OP_piece: {
-      unsigned SizeOfByte = 8;
-      unsigned OffsetInBits = Expr.getElement(++I) * SizeOfByte;
-      unsigned SizeInBits = Expr.getElement(++I) * SizeOfByte;
+  for (; I != E; ++I) {
+    switch (I->getOp()) {
+    case dwarf::DW_OP_bit_piece: {
+      unsigned OffsetInBits = I->getArg(0);
+      unsigned SizeInBits   = I->getArg(1);
       AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
       break;
     }
     case dwarf::DW_OP_plus:
       EmitOp(dwarf::DW_OP_plus_uconst);
-      EmitUnsigned(Expr.getElement(++I));
+      EmitUnsigned(I->getArg(0));
       break;
     case dwarf::DW_OP_deref:
       EmitOp(dwarf::DW_OP_deref);
       break;
     default:
-      llvm_unreachable("unhandled opcode found in DIExpression");
+      llvm_unreachable("unhandled opcode found in expression");
     }
   }
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 92e4d5d..78ec937 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -30,21 +30,22 @@ class DIELoc;
 /// entry.
 class DwarfExpression {
 protected:
-  const AsmPrinter &AP;
   // Various convenience accessors that extract things out of AsmPrinter.
-  const TargetRegisterInfo *getTRI() const;
-  unsigned getDwarfVersion() const;
+  const TargetRegisterInfo &TRI;
+  unsigned DwarfVersion;
 
 public:
-  DwarfExpression(const AsmPrinter &AP) : AP(AP) {}
+  DwarfExpression(const TargetRegisterInfo &TRI,
+                  unsigned DwarfVersion)
+    : TRI(TRI), DwarfVersion(DwarfVersion) {}
   virtual ~DwarfExpression() {}
 
   /// Output a dwarf operand and an optional assembler comment.
   virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0;
   /// Emit a raw signed value.
-  virtual void EmitSigned(int Value) = 0;
+  virtual void EmitSigned(int64_t Value) = 0;
   /// Emit a raw unsigned value.
-  virtual void EmitUnsigned(unsigned Value) = 0;
+  virtual void EmitUnsigned(uint64_t Value) = 0;
   /// Return whether the given machine register is the frame register in the
   /// current function.
   virtual bool isFrameRegister(unsigned MachineReg) = 0;
@@ -87,17 +88,19 @@ public:
   /// Emit an unsigned constant.
   void AddUnsignedConstant(unsigned Value);
 
-  /// Emit an entire DIExpression on top of a machine register location.
+  /// \brief Emit an entire expression on top of a machine register location.
+  ///
   /// \param PieceOffsetInBits If this is one piece out of a fragmented
   /// location, this is the offset of the piece inside the entire variable.
   /// \return false if no DWARF register exists for MachineReg.
-  bool AddMachineRegExpression(DIExpression Expr, unsigned MachineReg,
+  bool AddMachineRegExpression(const DIExpression *Expr, unsigned MachineReg,
                                unsigned PieceOffsetInBits = 0);
-  /// Emit a the operations in a DIExpression, starting from element I.
+  /// Emit a the operations remaining the DIExpressionIterator I.
   /// \param PieceOffsetInBits If this is one piece out of a fragmented
   /// location, this is the offset of the piece inside the entire variable.
-  void AddExpression(DIExpression Expr, unsigned PieceOffsetInBits = 0,
-                     unsigned I = 0);
+  void AddExpression(DIExpression::expr_op_iterator I,
+                     DIExpression::expr_op_iterator E,
+                     unsigned PieceOffsetInBits = 0);
 };
 
 /// DwarfExpression implementation for .debug_loc entries.
@@ -105,27 +108,27 @@ class DebugLocDwarfExpression : public DwarfExpression {
   ByteStreamer &BS;
 
 public:
-  DebugLocDwarfExpression(const AsmPrinter &AP, ByteStreamer &BS)
-      : DwarfExpression(AP), BS(BS) {}
+  DebugLocDwarfExpression(const TargetRegisterInfo &TRI,
+                          unsigned DwarfVersion, ByteStreamer &BS)
+    : DwarfExpression(TRI, DwarfVersion), BS(BS) {}
 
   void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
-  void EmitSigned(int Value) override;
-  void EmitUnsigned(unsigned Value) override;
+  void EmitSigned(int64_t Value) override;
+  void EmitUnsigned(uint64_t Value) override;
   bool isFrameRegister(unsigned MachineReg) override;
 };
 
 /// DwarfExpression implementation for singular DW_AT_location.
 class DIEDwarfExpression : public DwarfExpression {
+const AsmPrinter &AP;
   DwarfUnit &DU;
   DIELoc &DIE;
 
 public:
-  DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE)
-      : DwarfExpression(AP), DU(DU), DIE(DIE) {}
-
+  DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
   void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
-  void EmitSigned(int Value) override;
-  void EmitUnsigned(unsigned Value) override;
+  void EmitSigned(int64_t Value) override;
+  void EmitUnsigned(uint64_t Value) override;
   bool isFrameRegister(unsigned MachineReg) override;
 };
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 549abf8..10b58d4 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -17,9 +17,8 @@
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
 namespace llvm {
-DwarfFile::DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref,
-                     BumpPtrAllocator &DA)
-    : Asm(AP), DD(DD), StrPool(DA, *Asm, Pref) {}
+DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
+    : Asm(AP), StrPool(DA, *Asm, Pref) {}
 
 DwarfFile::~DwarfFile() {}
 
@@ -48,15 +47,15 @@ void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) {
 
 // Emit the various dwarf units to the unit section USection with
 // the abbreviations going into ASection.
-void DwarfFile::emitUnits(const MCSymbol *ASectionSym) {
+void DwarfFile::emitUnits(bool UseOffsets) {
   for (const auto &TheU : CUs) {
     DIE &Die = TheU->getUnitDie();
-    const MCSection *USection = TheU->getSection();
-    Asm->OutStreamer.SwitchSection(USection);
+    MCSection *USection = TheU->getSection();
+    Asm->OutStreamer->SwitchSection(USection);
 
-    TheU->emitHeader(ASectionSym);
+    TheU->emitHeader(UseOffsets);
 
-    DD.emitDIE(Die);
+    Asm->emitDwarfDIE(Die);
   }
 }
 
@@ -120,37 +119,26 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
   Die.setSize(Offset - Die.getOffset());
   return Offset;
 }
-void DwarfFile::emitAbbrevs(const MCSection *Section) {
+
+void DwarfFile::emitAbbrevs(MCSection *Section) {
   // Check to see if it is worth the effort.
   if (!Abbreviations.empty()) {
     // Start the debug abbrev section.
-    Asm->OutStreamer.SwitchSection(Section);
-
-    // For each abbrevation.
-    for (const DIEAbbrev *Abbrev : Abbreviations) {
-      // Emit the abbrevations code (base 1 index.)
-      Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
-
-      // Emit the abbreviations data.
-      Abbrev->Emit(Asm);
-    }
-
-    // Mark end of abbreviations.
-    Asm->EmitULEB128(0, "EOM(3)");
+    Asm->OutStreamer->SwitchSection(Section);
+    Asm->emitDwarfAbbrevs(Abbreviations);
   }
 }
 
 // Emit strings into a string section.
-void DwarfFile::emitStrings(const MCSection *StrSection,
-                            const MCSection *OffsetSection) {
+void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection) {
   StrPool.emit(*Asm, StrSection, OffsetSection);
 }
 
-void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
   SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
-  DIVariable DV = Var->getVariable();
+  const DILocalVariable *DV = Var->getVariable();
   // Variables with positive arg numbers are parameters.
-  if (unsigned ArgNum = DV.getArgNumber()) {
+  if (unsigned ArgNum = DV->getArg()) {
     // Keep all parameters in order at the start of the variable list to ensure
     // function types are correct (no out-of-order parameters)
     //
@@ -160,7 +148,7 @@ void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
     // rather than linear search.
     auto I = Vars.begin();
     while (I != Vars.end()) {
-      unsigned CurNum = (*I)->getVariable().getArgNumber();
+      unsigned CurNum = (*I)->getVariable()->getArg();
       // A local (non-parameter) variable has been found, insert immediately
       // before it.
       if (CurNum == 0)
@@ -168,18 +156,17 @@ void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
       // A later indexed parameter has been found, insert immediately before it.
       if (CurNum > ArgNum)
         break;
-      // FIXME: There are still some cases where two inlined functions are
-      // conflated together (two calls to the same function at the same
-      // location (eg: via a macro, or without column info, etc)) and then
-      // their arguments are conflated as well.
-      assert((LS->getParent() || CurNum != ArgNum) &&
-             "Duplicate argument for top level (non-inlined) function");
+      if (CurNum == ArgNum) {
+        (*I)->addMMIEntry(*Var);
+        return false;
+      }
       ++I;
     }
     Vars.insert(I, Var);
-    return;
+    return true;
   }
 
   Vars.push_back(Var);
+  return true;
 }
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index f14d673..532ed96 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -28,17 +28,15 @@ class DwarfUnit;
 class DIEAbbrev;
 class MCSymbol;
 class DIE;
-class DISubprogram;
 class LexicalScope;
 class StringRef;
 class DwarfDebug;
 class MCSection;
+class MDNode;
 class DwarfFile {
   // Target of Dwarf emission, used for sizing of abbreviations.
   AsmPrinter *Asm;
 
-  DwarfDebug &DD;
-
   // Used to uniquely define abbreviations.
   FoldingSet<DIEAbbrev> AbbreviationsSet;
 
@@ -59,11 +57,10 @@ class DwarfFile {
   /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
   /// be shared across CUs, that is why we keep the map here instead
   /// of in DwarfCompileUnit.
-  DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap;
+  DenseMap<const MDNode *, DIE *> DITypeNodeToDieMap;
 
 public:
-  DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref,
-            BumpPtrAllocator &DA);
+  DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA);
 
   ~DwarfFile();
 
@@ -83,19 +80,19 @@ public:
 
   /// \brief Emit all of the units to the section listed with the given
   /// abbreviation section.
-  void emitUnits(const MCSymbol *ASectionSym);
+  void emitUnits(bool UseOffsets);
 
   /// \brief Emit a set of abbreviations to the specific section.
-  void emitAbbrevs(const MCSection *);
+  void emitAbbrevs(MCSection *);
 
   /// \brief Emit all of the strings to the section given.
-  void emitStrings(const MCSection *StrSection,
-                   const MCSection *OffsetSection = nullptr);
+  void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr);
 
   /// \brief Returns the string pool.
   DwarfStringPool &getStringPool() { return StrPool; }
 
-  void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
+  /// \returns false if the variable was merged with a previous one.
+  bool addScopeVariable(LexicalScope *LS, DbgVariable *Var);
 
   DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> &getScopeVariables() {
     return ScopeVariables;
@@ -106,10 +103,10 @@ public:
   }
 
   void insertDIE(const MDNode *TypeMD, DIE *Die) {
-    MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
+    DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
   }
   DIE *getDIE(const MDNode *TypeMD) {
-    return MDTypeNodeToDieMap.lookup(TypeMD);
+    return DITypeNodeToDieMap.lookup(TypeMD);
   }
 };
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index d76b66c..2066f74 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -8,63 +8,67 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfStringPool.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 
 using namespace llvm;
 
-static std::pair<MCSymbol *, unsigned> &
-getEntry(AsmPrinter &Asm,
-         StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> &Pool,
-         StringRef Prefix, StringRef Str) {
-  std::pair<MCSymbol *, unsigned> &Entry = Pool[Str];
-  if (!Entry.first) {
-    Entry.second = Pool.size() - 1;
-    Entry.first = Asm.GetTempSymbol(Prefix, Entry.second);
-  }
-  return Entry;
-}
+DwarfStringPool::DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm,
+                                 StringRef Prefix)
+    : Pool(A), Prefix(Prefix),
+      ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {}
 
-MCSymbol *DwarfStringPool::getSymbol(AsmPrinter &Asm, StringRef Str) {
-  return getEntry(Asm, Pool, Prefix, Str).first;
-}
+DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
+                                                    StringRef Str) {
+  auto I = Pool.insert(std::make_pair(Str, EntryTy()));
+  if (I.second) {
+    auto &Entry = I.first->second;
+    Entry.Index = Pool.size() - 1;
+    Entry.Offset = NumBytes;
+    Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr;
 
-unsigned DwarfStringPool::getIndex(AsmPrinter &Asm, StringRef Str) {
-  return getEntry(Asm, Pool, Prefix, Str).second;
+    NumBytes += Str.size() + 1;
+    assert(NumBytes > Entry.Offset && "Unexpected overflow");
+  }
+  return EntryRef(*I.first);
 }
 
-void DwarfStringPool::emit(AsmPrinter &Asm, const MCSection *StrSection,
-                           const MCSection *OffsetSection) {
+void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
+                           MCSection *OffsetSection) {
   if (Pool.empty())
     return;
 
   // Start the dwarf str section.
-  Asm.OutStreamer.SwitchSection(StrSection);
+  Asm.OutStreamer->SwitchSection(StrSection);
 
   // Get all of the string pool entries and put them in an array by their ID so
   // we can sort them.
-  SmallVector<const StringMapEntry<std::pair<MCSymbol *, unsigned>> *, 64>
-  Entries(Pool.size());
+  SmallVector<const StringMapEntry<EntryTy> *, 64> Entries(Pool.size());
 
   for (const auto &E : Pool)
-    Entries[E.getValue().second] = &E;
+    Entries[E.getValue().Index] = &E;
 
   for (const auto &Entry : Entries) {
+    assert(ShouldCreateSymbols == static_cast<bool>(Entry->getValue().Symbol) &&
+           "Mismatch between setting and entry");
+
     // Emit a label for reference from debug information entries.
-    Asm.OutStreamer.EmitLabel(Entry->getValue().first);
+    if (ShouldCreateSymbols)
+      Asm.OutStreamer->EmitLabel(Entry->getValue().Symbol);
 
     // Emit the string itself with a terminating null byte.
-    Asm.OutStreamer.EmitBytes(
+    Asm.OutStreamer->AddComment("string offset=" +
+                                Twine(Entry->getValue().Offset));
+    Asm.OutStreamer->EmitBytes(
         StringRef(Entry->getKeyData(), Entry->getKeyLength() + 1));
   }
 
   // If we've got an offset section go ahead and emit that now as well.
   if (OffsetSection) {
-    Asm.OutStreamer.SwitchSection(OffsetSection);
-    unsigned offset = 0;
+    Asm.OutStreamer->SwitchSection(OffsetSection);
     unsigned size = 4; // FIXME: DWARF64 is 8.
-    for (const auto &Entry : Entries) {
-      Asm.OutStreamer.EmitIntValue(offset, size);
-      offset += Entry->getKeyLength() + 1;
-    }
+    for (const auto &Entry : Entries)
+      Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size);
   }
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index 63e3412..93a1684 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -11,12 +11,13 @@
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
 
 #include "llvm/ADT/StringMap.h"
-#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/Support/Allocator.h"
 #include <utility>
 
 namespace llvm {
 
+class AsmPrinter;
 class MCSymbol;
 class MCSection;
 class StringRef;
@@ -25,25 +26,24 @@ class StringRef;
 // A String->Symbol mapping of strings used by indirect
 // references.
 class DwarfStringPool {
-  StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> Pool;
+  typedef DwarfStringPoolEntry EntryTy;
+  StringMap<EntryTy, BumpPtrAllocator &> Pool;
   StringRef Prefix;
+  unsigned NumBytes = 0;
+  bool ShouldCreateSymbols;
 
 public:
-  DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix)
-      : Pool(A), Prefix(Prefix) {}
+  typedef DwarfStringPoolEntryRef EntryRef;
 
-  void emit(AsmPrinter &Asm, const MCSection *StrSection,
-            const MCSection *OffsetSection = nullptr);
+  DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix);
 
-  /// \brief Returns an entry into the string pool with the given
-  /// string text.
-  MCSymbol *getSymbol(AsmPrinter &Asm, StringRef Str);
-
-  /// \brief Returns the index into the string pool with the given
-  /// string text.
-  unsigned getIndex(AsmPrinter &Asm, StringRef Str);
+  void emit(AsmPrinter &Asm, MCSection *StrSection,
+            MCSection *OffsetSection = nullptr);
 
   bool empty() const { return Pool.empty(); }
+
+  /// Get a reference to an entry in the string pool.
+  EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
 };
 }
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 455258e..04836c6 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -17,6 +17,7 @@
 #include "DwarfDebug.h"
 #include "DwarfExpression.h"
 #include "llvm/ADT/APFloat.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
@@ -43,23 +44,28 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
                        cl::desc("Generate DWARF4 type units."),
                        cl::init(false));
 
+DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
+                                       DIELoc &DIE)
+    : DwarfExpression(*AP.MF->getSubtarget().getRegisterInfo(),
+                      AP.getDwarfDebug()->getDwarfVersion()),
+      AP(AP), DU(DU), DIE(DIE) {}
+
 void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) {
   DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
 }
-void DIEDwarfExpression::EmitSigned(int Value) {
+void DIEDwarfExpression::EmitSigned(int64_t Value) {
   DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
 }
-void DIEDwarfExpression::EmitUnsigned(unsigned Value) {
+void DIEDwarfExpression::EmitUnsigned(uint64_t Value) {
   DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
 }
 bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) {
-  return MachineReg == getTRI()->getFrameRegister(*AP.MF);
+  return MachineReg == TRI.getFrameRegister(*AP.MF);
 }
 
-
-/// Unit - Unit constructor.
-DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, DICompileUnit Node,
-                     AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
+DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag,
+                     const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW,
+                     DwarfFile *DWU)
     : UniqueID(UID), CUNode(Node), UnitDie(UnitTag), DebugInfoOffset(0), Asm(A),
       DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) {
   assert(UnitTag == dwarf::DW_TAG_compile_unit ||
@@ -76,7 +82,6 @@ DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A,
     addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0);
 }
 
-/// ~Unit - Destructor for compile unit.
 DwarfUnit::~DwarfUnit() {
   for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
     DIEBlocks[j]->~DIEBlock();
@@ -84,15 +89,11 @@ DwarfUnit::~DwarfUnit() {
     DIELocs[j]->~DIELoc();
 }
 
-/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
-/// information entry.
 DIEEntry *DwarfUnit::createDIEEntry(DIE &Entry) {
   DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
   return Value;
 }
 
-/// getDefaultLowerBound - Return the default lower bound for an array. If the
-/// DWARF version doesn't handle the language, return -1.
 int64_t DwarfUnit::getDefaultLowerBound() const {
   switch (getLanguage()) {
   default:
@@ -130,13 +131,37 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
     if (dwarf::DWARF_VERSION >= 4)
       return 1;
     break;
+
+  // The languages below have valid values only if the DWARF version >= 5.
+  case dwarf::DW_LANG_OpenCL:
+  case dwarf::DW_LANG_Go:
+  case dwarf::DW_LANG_Haskell:
+  case dwarf::DW_LANG_C_plus_plus_03:
+  case dwarf::DW_LANG_C_plus_plus_11:
+  case dwarf::DW_LANG_OCaml:
+  case dwarf::DW_LANG_Rust:
+  case dwarf::DW_LANG_C11:
+  case dwarf::DW_LANG_Swift:
+  case dwarf::DW_LANG_Dylan:
+  case dwarf::DW_LANG_C_plus_plus_14:
+    if (dwarf::DWARF_VERSION >= 5)
+      return 0;
+    break;
+
+  case dwarf::DW_LANG_Modula3:
+  case dwarf::DW_LANG_Julia:
+  case dwarf::DW_LANG_Fortran03:
+  case dwarf::DW_LANG_Fortran08:
+    if (dwarf::DWARF_VERSION >= 5)
+      return 1;
+    break;
   }
 
   return -1;
 }
 
 /// Check whether the DIE for this MDNode can be shared across CUs.
-static bool isShareableAcrossCUs(DIDescriptor D) {
+static bool isShareableAcrossCUs(const DINode *D) {
   // When the MDNode can be part of the type system, the DIE can be shared
   // across CUs.
   // Combining type units and cross-CU DIE sharing is lower value (since
@@ -144,25 +169,18 @@ static bool isShareableAcrossCUs(DIDescriptor D) {
   // level already) but may be implementable for some value in projects
   // building multiple independent libraries with LTO and then linking those
   // together.
-  return (D.isType() ||
-          (D.isSubprogram() && !DISubprogram(D).isDefinition())) &&
+  return (isa<DIType>(D) ||
+          (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
          !GenerateDwarfTypeUnits;
 }
 
-/// getDIE - Returns the debug information entry map slot for the
-/// specified debug variable. We delegate the request to DwarfDebug
-/// when the DIE for this MDNode can be shared across CUs. The mappings
-/// will be kept in DwarfDebug for shareable DIEs.
-DIE *DwarfUnit::getDIE(DIDescriptor D) const {
+DIE *DwarfUnit::getDIE(const DINode *D) const {
   if (isShareableAcrossCUs(D))
     return DU->getDIE(D);
   return MDNodeToDieMap.lookup(D);
 }
 
-/// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug
-/// when the DIE for this MDNode can be shared across CUs. The mappings
-/// will be kept in DwarfDebug for shareable DIEs.
-void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) {
+void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) {
   if (isShareableAcrossCUs(Desc)) {
     DU->insertDIE(Desc, D);
     return;
@@ -170,7 +188,6 @@ void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) {
   MDNodeToDieMap.insert(std::make_pair(Desc, D));
 }
 
-/// addFlag - Add a flag that is true.
 void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
   if (DD->getDwarfVersion() >= 4)
     Die.addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne);
@@ -178,8 +195,6 @@ void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
     Die.addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne);
 }
 
-/// addUInt - Add an unsigned integer attribute data and value.
-///
 void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute,
                         Optional<dwarf::Form> Form, uint64_t Integer) {
   if (!Form)
@@ -193,8 +208,6 @@ void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) {
   addUInt(Block, (dwarf::Attribute)0, Form, Integer);
 }
 
-/// addSInt - Add an signed integer attribute data and value.
-///
 void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute,
                         Optional<dwarf::Form> Form, int64_t Integer) {
   if (!Form)
@@ -208,43 +221,14 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
   addSInt(Die, (dwarf::Attribute)0, Form, Integer);
 }
 
-/// addString - Add a string attribute data and value. We always emit a
-/// reference to the string pool instead of immediate strings so that DIEs have
-/// more predictable sizes. In the case of split dwarf we emit an index
-/// into another table which gets us the static offset into the string
-/// table.
 void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
                           StringRef String) {
-  if (!isDwoUnit())
-    return addLocalString(Die, Attribute, String);
-
-  addIndexedString(Die, Attribute, String);
-}
-
-void DwarfUnit::addIndexedString(DIE &Die, dwarf::Attribute Attribute,
-                                 StringRef String) {
-  unsigned idx = DU->getStringPool().getIndex(*Asm, String);
-  DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
-  DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
-  Die.addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Str);
-}
-
-/// addLocalString - Add a string attribute data and value. This is guaranteed
-/// to be in the local string pool instead of indirected.
-void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute,
-                               StringRef String) {
-  MCSymbol *Symb = DU->getStringPool().getSymbol(*Asm, String);
-  DIEValue *Value;
-  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-    Value = new (DIEValueAllocator) DIELabel(Symb);
-  else
-    Value = new (DIEValueAllocator) DIEDelta(Symb, DD->getDebugStrSym());
-  DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
-  Die.addValue(Attribute, dwarf::DW_FORM_strp, Str);
+  Die.addValue(Attribute,
+               isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp,
+               new (DIEValueAllocator)
+               DIEString(DU->getStringPool().getEntry(*Asm, String)));
 }
 
-/// addLabel - Add a Dwarf label attribute data and value.
-///
 void DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
                          const MCSymbol *Label) {
   DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
@@ -255,8 +239,6 @@ void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
   addLabel(Die, (dwarf::Attribute)0, Form, Label);
 }
 
-/// addSectionOffset - Add an offset into a section attribute data and value.
-///
 void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
                                  uint64_t Integer) {
   if (DD->getDwarfVersion() >= 4)
@@ -270,9 +252,6 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirNam
                         : getCU().getOrCreateSourceID(FileName, DirName);
 }
 
-/// addOpAddress - Add a dwarf op address data and value using the
-/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
-///
 void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
   if (!DD->useSplitDwarf()) {
     addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
@@ -290,8 +269,6 @@ void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
   Die.addValue(Attribute, dwarf::DW_FORM_data4, Value);
 }
 
-/// addDIEEntry - Add a DIE attribute data and value.
-///
 void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) {
   addDIEEntry(Die, Attribute, createDIEEntry(Entry));
 }
@@ -321,9 +298,7 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
                Entry);
 }
 
-/// Create a DIE with the given Tag, add the DIE to its parent, and
-/// call insertDIE if MD is not null.
-DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) {
+DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) {
   assert(Tag != dwarf::DW_TAG_auto_variable &&
          Tag != dwarf::DW_TAG_arg_variable);
   Parent.addChild(make_unique<DIE>((dwarf::Tag)Tag));
@@ -333,8 +308,6 @@ DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) {
   return Die;
 }
 
-/// addBlock - Add block data.
-///
 void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) {
   Loc->ComputeSize(Asm);
   DIELocs.push_back(Loc); // Memoize so we can call the destructor later on.
@@ -348,8 +321,6 @@ void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,
   Die.addValue(Attribute, Block->BestForm(), Block);
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
 void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File,
                               StringRef Directory) {
   if (Line == 0)
@@ -361,58 +332,41 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File,
   addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfUnit::addSourceLine(DIE &Die, DIVariable V) {
-  assert(V.isVariable());
+void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) {
+  assert(V);
 
-  addSourceLine(Die, V.getLineNumber(), V.getContext().getFilename(),
-                V.getContext().getDirectory());
+  addSourceLine(Die, V->getLine(), V->getScope()->getFilename(),
+                V->getScope()->getDirectory());
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfUnit::addSourceLine(DIE &Die, DIGlobalVariable G) {
-  assert(G.isGlobalVariable());
+void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) {
+  assert(G);
 
-  addSourceLine(Die, G.getLineNumber(), G.getFilename(), G.getDirectory());
+  addSourceLine(Die, G->getLine(), G->getFilename(), G->getDirectory());
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfUnit::addSourceLine(DIE &Die, DISubprogram SP) {
-  assert(SP.isSubprogram());
+void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) {
+  assert(SP);
 
-  addSourceLine(Die, SP.getLineNumber(), SP.getFilename(), SP.getDirectory());
+  addSourceLine(Die, SP->getLine(), SP->getFilename(), SP->getDirectory());
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfUnit::addSourceLine(DIE &Die, DIType Ty) {
-  assert(Ty.isType());
+void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) {
+  assert(Ty);
 
-  addSourceLine(Die, Ty.getLineNumber(), Ty.getFilename(), Ty.getDirectory());
+  addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfUnit::addSourceLine(DIE &Die, DIObjCProperty Ty) {
-  assert(Ty.isObjCProperty());
+void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
+  assert(Ty);
 
-  DIFile File = Ty.getFile();
-  addSourceLine(Die, Ty.getLineNumber(), File.getFilename(),
-                File.getDirectory());
+  addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) {
-  assert(NS.Verify());
-
-  addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory());
+void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) {
+  addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory());
 }
 
-/// addRegisterOp - Add register operand.
 bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
                                    unsigned SizeInBits, unsigned OffsetInBits) {
   DIEDwarfExpression Expr(*Asm, *this, TheDie);
@@ -420,7 +374,6 @@ bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
   return true;
 }
 
-/// addRegisterOffset - Add register offset.
 bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
                                   int64_t Offset) {
   DIEDwarfExpression Expr(*Asm, *this, TheDie);
@@ -481,39 +434,31 @@ bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
 
    That is what this function does.  */
 
-/// addBlockByrefAddress - Start with the address based on the location
-/// provided, and generate the DWARF information necessary to find the
-/// actual Block variable (navigating the Block struct) based on the
-/// starting location.  Add the DWARF information to the die.  For
-/// more information, read large comment just above here.
-///
 void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
                                      dwarf::Attribute Attribute,
                                      const MachineLocation &Location) {
-  DIType Ty = DV.getType();
-  DIType TmpTy = Ty;
-  uint16_t Tag = Ty.getTag();
+  const DIType *Ty = DV.getType();
+  const DIType *TmpTy = Ty;
+  uint16_t Tag = Ty->getTag();
   bool isPointer = false;
 
   StringRef varName = DV.getName();
 
   if (Tag == dwarf::DW_TAG_pointer_type) {
-    DIDerivedType DTy(Ty);
-    TmpTy = resolve(DTy.getTypeDerivedFrom());
+    auto *DTy = cast<DIDerivedType>(Ty);
+    TmpTy = resolve(DTy->getBaseType());
     isPointer = true;
   }
 
-  DICompositeType blockStruct(TmpTy);
-
   // Find the __forwarding field and the variable field in the __Block_byref
   // struct.
-  DIArray Fields = blockStruct.getElements();
-  DIDerivedType varField;
-  DIDerivedType forwardingField;
+  DINodeArray Fields = cast<DICompositeTypeBase>(TmpTy)->getElements();
+  const DIDerivedType *varField = nullptr;
+  const DIDerivedType *forwardingField = nullptr;
 
-  for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
-    DIDerivedType DT(Fields.getElement(i));
-    StringRef fieldName = DT.getName();
+  for (unsigned i = 0, N = Fields.size(); i < N; ++i) {
+    auto *DT = cast<DIDerivedType>(Fields[i]);
+    StringRef fieldName = DT->getName();
     if (fieldName == "__forwarding")
       forwardingField = DT;
     else if (fieldName == varName)
@@ -521,8 +466,8 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
   }
 
   // Get the offsets for the forwarding field and the variable field.
-  unsigned forwardingFieldOffset = forwardingField.getOffsetInBits() >> 3;
-  unsigned varFieldOffset = varField.getOffsetInBits() >> 2;
+  unsigned forwardingFieldOffset = forwardingField->getOffsetInBits() >> 3;
+  unsigned varFieldOffset = varField->getOffsetInBits() >> 2;
 
   // Decode the original location, and use that as the start of the byref
   // variable's location.
@@ -567,10 +512,9 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
 }
 
 /// Return true if type encoding is unsigned.
-static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) {
-  DIDerivedType DTy(Ty);
-  if (DTy.isDerivedType()) {
-    dwarf::Tag T = (dwarf::Tag)Ty.getTag();
+static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
+  if (auto *DTy = dyn_cast<DIDerivedTypeBase>(Ty)) {
+    dwarf::Tag T = (dwarf::Tag)Ty->getTag();
     // Encode pointer constants as unsigned bytes. This is used at least for
     // null pointer constant emission.
     // (Pieces of) aggregate types that get hacked apart by SROA may also be
@@ -584,64 +528,64 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) {
         T == dwarf::DW_TAG_ptr_to_member_type ||
         T == dwarf::DW_TAG_reference_type ||
         T == dwarf::DW_TAG_rvalue_reference_type ||
-        T == dwarf::DW_TAG_structure_type)
+        T == dwarf::DW_TAG_structure_type ||
+        T == dwarf::DW_TAG_union_type)
       return true;
     assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
            T == dwarf::DW_TAG_volatile_type ||
            T == dwarf::DW_TAG_restrict_type ||
            T == dwarf::DW_TAG_enumeration_type);
-    if (DITypeRef Deriv = DTy.getTypeDerivedFrom())
+    if (DITypeRef Deriv = DTy->getBaseType())
       return isUnsignedDIType(DD, DD->resolve(Deriv));
     // FIXME: Enums without a fixed underlying type have unknown signedness
     // here, leading to incorrectly emitted constants.
-    assert(DTy.getTag() == dwarf::DW_TAG_enumeration_type);
+    assert(DTy->getTag() == dwarf::DW_TAG_enumeration_type);
     return false;
   }
 
-  DIBasicType BTy(Ty);
-  assert(BTy.isBasicType());
-  unsigned Encoding = BTy.getEncoding();
+  auto *BTy = cast<DIBasicType>(Ty);
+  unsigned Encoding = BTy->getEncoding();
   assert((Encoding == dwarf::DW_ATE_unsigned ||
           Encoding == dwarf::DW_ATE_unsigned_char ||
           Encoding == dwarf::DW_ATE_signed ||
           Encoding == dwarf::DW_ATE_signed_char ||
-          Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
-          (Ty.getTag() == dwarf::DW_TAG_unspecified_type &&
-           Ty.getName() == "decltype(nullptr)")) &&
+          Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF ||
+          Encoding == dwarf::DW_ATE_boolean ||
+          (Ty->getTag() == dwarf::DW_TAG_unspecified_type &&
+           Ty->getName() == "decltype(nullptr)")) &&
          "Unsupported encoding");
-  return (Encoding == dwarf::DW_ATE_unsigned ||
-          Encoding == dwarf::DW_ATE_unsigned_char ||
-          Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
-          Ty.getTag() == dwarf::DW_TAG_unspecified_type);
+  return Encoding == dwarf::DW_ATE_unsigned ||
+         Encoding == dwarf::DW_ATE_unsigned_char ||
+         Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
+         Ty->getTag() == dwarf::DW_TAG_unspecified_type;
 }
 
 /// If this type is derived from a base type then return base type size.
-static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) {
-  unsigned Tag = Ty.getTag();
+static uint64_t getBaseTypeSize(DwarfDebug *DD, const DIDerivedType *Ty) {
+  unsigned Tag = Ty->getTag();
 
   if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
       Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
       Tag != dwarf::DW_TAG_restrict_type)
-    return Ty.getSizeInBits();
+    return Ty->getSizeInBits();
 
-  DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom());
+  auto *BaseType = DD->resolve(Ty->getBaseType());
 
-  assert(BaseType.isValid());
+  assert(BaseType && "Unexpected invalid base type");
 
   // If this is a derived type, go ahead and get the base type, unless it's a
   // reference then it's just the size of the field. Pointer types have no need
   // of this since they're a different type of qualification on the type.
-  if (BaseType.getTag() == dwarf::DW_TAG_reference_type ||
-      BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type)
-    return Ty.getSizeInBits();
+  if (BaseType->getTag() == dwarf::DW_TAG_reference_type ||
+      BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type)
+    return Ty->getSizeInBits();
 
-  if (BaseType.isDerivedType())
-    return getBaseTypeSize(DD, DIDerivedType(BaseType));
+  if (auto *DT = dyn_cast<DIDerivedType>(BaseType))
+    return getBaseTypeSize(DD, DT);
 
-  return BaseType.getSizeInBits();
+  return BaseType->getSizeInBits();
 }
 
-/// addConstantFPValue - Add constant value entry in variable DIE.
 void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) {
   assert(MO.isFPImm() && "Invalid machine operand!");
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
@@ -664,20 +608,18 @@ void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) {
   addBlock(Die, dwarf::DW_AT_const_value, Block);
 }
 
-/// addConstantFPValue - Add constant value entry in variable DIE.
 void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) {
   // Pass this down to addConstantValue as an unsigned bag of bits.
   addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true);
 }
 
-/// addConstantValue - Add constant value entry in variable DIE.
-void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty) {
+void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI,
+                                 const DIType *Ty) {
   addConstantValue(Die, CI->getValue(), Ty);
 }
 
-/// addConstantValue - Add constant value entry in variable DIE.
 void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO,
-                                 DIType Ty) {
+                                 const DIType *Ty) {
   assert(MO.isImm() && "Invalid machine operand!");
 
   addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm());
@@ -690,11 +632,10 @@ void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
           Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata, Val);
 }
 
-void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, DIType Ty) {
+void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty) {
   addConstantValue(Die, Val, isUnsignedDIType(DD, Ty));
 }
 
-// addConstantValue - Add constant value entry in variable DIE.
 void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
   unsigned CIBitWidth = Val.getBitWidth();
   if (CIBitWidth <= 64) {
@@ -724,67 +665,67 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
   addBlock(Die, dwarf::DW_AT_const_value, Block);
 }
 
-/// addTemplateParams - Add template parameters into buffer.
-void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
+void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
+  if (!LinkageName.empty())
+    addString(Die,
+              DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
+                                         : dwarf::DW_AT_MIPS_linkage_name,
+              GlobalValue::getRealLinkageName(LinkageName));
+}
+
+void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) {
   // Add template parameters.
-  for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
-    DIDescriptor Element = TParams.getElement(i);
-    if (Element.isTemplateTypeParameter())
-      constructTemplateTypeParameterDIE(Buffer,
-                                        DITemplateTypeParameter(Element));
-    else if (Element.isTemplateValueParameter())
-      constructTemplateValueParameterDIE(Buffer,
-                                         DITemplateValueParameter(Element));
+  for (const auto *Element : TParams) {
+    if (auto *TTP = dyn_cast<DITemplateTypeParameter>(Element))
+      constructTemplateTypeParameterDIE(Buffer, TTP);
+    else if (auto *TVP = dyn_cast<DITemplateValueParameter>(Element))
+      constructTemplateValueParameterDIE(Buffer, TVP);
   }
 }
 
-/// getOrCreateContextDIE - Get context owner's DIE.
-DIE *DwarfUnit::getOrCreateContextDIE(DIScope Context) {
-  if (!Context || Context.isFile())
+DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
+  if (!Context || isa<DIFile>(Context))
     return &getUnitDie();
-  if (Context.isType())
-    return getOrCreateTypeDIE(DIType(Context));
-  if (Context.isNameSpace())
-    return getOrCreateNameSpace(DINameSpace(Context));
-  if (Context.isSubprogram())
-    return getOrCreateSubprogramDIE(DISubprogram(Context));
+  if (auto *T = dyn_cast<DIType>(Context))
+    return getOrCreateTypeDIE(T);
+  if (auto *NS = dyn_cast<DINamespace>(Context))
+    return getOrCreateNameSpace(NS);
+  if (auto *SP = dyn_cast<DISubprogram>(Context))
+    return getOrCreateSubprogramDIE(SP);
   return getDIE(Context);
 }
 
-DIE *DwarfUnit::createTypeDIE(DICompositeType Ty) {
-  DIScope Context = resolve(Ty.getContext());
+DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
+  auto *Context = resolve(Ty->getScope());
   DIE *ContextDIE = getOrCreateContextDIE(Context);
 
   if (DIE *TyDIE = getDIE(Ty))
     return TyDIE;
 
   // Create new type.
-  DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
+  DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
 
-  constructTypeDIE(TyDIE, Ty);
+  constructTypeDIE(TyDIE, cast<DICompositeType>(Ty));
 
   updateAcceleratorTables(Context, Ty, TyDIE);
   return &TyDIE;
 }
 
-/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
-/// given DIType.
 DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
   if (!TyNode)
     return nullptr;
 
-  DIType Ty(TyNode);
-  assert(Ty.isType());
-  assert(Ty == resolve(Ty.getRef()) &&
+  auto *Ty = cast<DIType>(TyNode);
+  assert(Ty == resolve(Ty->getRef()) &&
          "type was not uniqued, possible ODR violation.");
 
   // DW_TAG_restrict_type is not supported in DWARF2
-  if (Ty.getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
-    return getOrCreateTypeDIE(resolve(DIDerivedType(Ty).getTypeDerivedFrom()));
+  if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
+    return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
 
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIScope Context = resolve(Ty.getContext());
+  auto *Context = resolve(Ty->getScope());
   DIE *ContextDIE = getOrCreateContextDIE(Context);
   assert(ContextDIE);
 
@@ -792,50 +733,49 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
     return TyDIE;
 
   // Create new type.
-  DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
+  DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
 
   updateAcceleratorTables(Context, Ty, TyDIE);
 
-  if (Ty.isBasicType())
-    constructTypeDIE(TyDIE, DIBasicType(Ty));
-  else if (Ty.isCompositeType()) {
-    DICompositeType CTy(Ty);
-    if (GenerateDwarfTypeUnits && !Ty.isForwardDecl())
-      if (MDString *TypeId = CTy.getIdentifier()) {
+  if (auto *BT = dyn_cast<DIBasicType>(Ty))
+    constructTypeDIE(TyDIE, BT);
+  else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
+    constructTypeDIE(TyDIE, STy);
+  else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+    if (GenerateDwarfTypeUnits && !Ty->isForwardDecl())
+      if (MDString *TypeId = CTy->getRawIdentifier()) {
         DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
         // Skip updating the accelerator tables since this is not the full type.
         return &TyDIE;
       }
     constructTypeDIE(TyDIE, CTy);
   } else {
-    assert(Ty.isDerivedType() && "Unknown kind of DIType");
-    constructTypeDIE(TyDIE, DIDerivedType(Ty));
+    constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty));
   }
 
   return &TyDIE;
 }
 
-void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty,
-                                        const DIE &TyDIE) {
-  if (!Ty.getName().empty() && !Ty.isForwardDecl()) {
+void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
+                                        const DIType *Ty, const DIE &TyDIE) {
+  if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
     bool IsImplementation = 0;
-    if (Ty.isCompositeType()) {
-      DICompositeType CT(Ty);
+    if (auto *CT = dyn_cast<DICompositeTypeBase>(Ty)) {
       // A runtime language of 0 actually means C/C++ and that any
       // non-negative value is some version of Objective-C/C++.
-      IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete();
+      IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
     }
     unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
-    DD->addAccelType(Ty.getName(), TyDIE, Flags);
+    DD->addAccelType(Ty->getName(), TyDIE, Flags);
 
-    if (!Context || Context.isCompileUnit() || Context.isFile() ||
-        Context.isNameSpace())
+    if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
+        isa<DINamespace>(Context))
       addGlobalType(Ty, TyDIE, Context);
   }
 }
 
-/// addType - Add a new type attribute to the specified entity.
-void DwarfUnit::addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute) {
+void DwarfUnit::addType(DIE &Entity, const DIType *Ty,
+                        dwarf::Attribute Attribute) {
   assert(Ty && "Trying to add a type that doesn't exist?");
 
   // Check for pre-existence.
@@ -855,12 +795,7 @@ void DwarfUnit::addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute) {
   addDIEEntry(Entity, Attribute, Entry);
 }
 
-/// getParentContextString - Walks the metadata parent chain in a language
-/// specific manner (using the compile unit language) and returns
-/// it as a string. This is done at the metadata level because DIEs may
-/// not currently have been added to the parent context and walking the
-/// DIEs looking for names is more expensive than walking the metadata.
-std::string DwarfUnit::getParentContextString(DIScope Context) const {
+std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
   if (!Context)
     return "";
 
@@ -869,11 +804,11 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
     return "";
 
   std::string CS;
-  SmallVector<DIScope, 1> Parents;
-  while (!Context.isCompileUnit()) {
+  SmallVector<const DIScope *, 1> Parents;
+  while (!isa<DICompileUnit>(Context)) {
     Parents.push_back(Context);
-    if (Context.getContext())
-      Context = resolve(Context.getContext());
+    if (Context->getScope())
+      Context = resolve(Context->getScope());
     else
       // Structure, etc types will have a NULL context if they're at the top
       // level.
@@ -882,12 +817,10 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
 
   // Reverse iterate over our list to go from the outermost construct to the
   // innermost.
-  for (SmallVectorImpl<DIScope>::reverse_iterator I = Parents.rbegin(),
-                                                  E = Parents.rend();
-       I != E; ++I) {
-    DIScope Ctx = *I;
-    StringRef Name = Ctx.getName();
-    if (Name.empty() && Ctx.isNameSpace())
+  for (auto I = Parents.rbegin(), E = Parents.rend(); I != E; ++I) {
+    const DIScope *Ctx = *I;
+    StringRef Name = Ctx->getName();
+    if (Name.empty() && isa<DINamespace>(Ctx))
       Name = "(anonymous namespace)";
     if (!Name.empty()) {
       CS += Name;
@@ -897,34 +830,32 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
   return CS;
 }
 
-/// constructTypeDIE - Construct basic type die from DIBasicType.
-void DwarfUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
   // Get core information.
-  StringRef Name = BTy.getName();
+  StringRef Name = BTy->getName();
   // Add name if not anonymous or intermediate type.
   if (!Name.empty())
     addString(Buffer, dwarf::DW_AT_name, Name);
 
   // An unspecified type only has a name attribute.
-  if (BTy.getTag() == dwarf::DW_TAG_unspecified_type)
+  if (BTy->getTag() == dwarf::DW_TAG_unspecified_type)
     return;
 
   addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
-          BTy.getEncoding());
+          BTy->getEncoding());
 
-  uint64_t Size = BTy.getSizeInBits() >> 3;
+  uint64_t Size = BTy->getSizeInBits() >> 3;
   addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
 }
 
-/// constructTypeDIE - Construct derived type die from DIDerivedType.
-void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
   // Get core information.
-  StringRef Name = DTy.getName();
-  uint64_t Size = DTy.getSizeInBits() >> 3;
+  StringRef Name = DTy->getName();
+  uint64_t Size = DTy->getSizeInBits() >> 3;
   uint16_t Tag = Buffer.getTag();
 
   // Map to main type, void will not have a type.
-  DIType FromTy = resolve(DTy.getTypeDerivedFrom());
+  const DIType *FromTy = resolve(DTy->getBaseType());
   if (FromTy)
     addType(Buffer, FromTy);
 
@@ -938,35 +869,62 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
     addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
 
   if (Tag == dwarf::DW_TAG_ptr_to_member_type)
-    addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
-                *getOrCreateTypeDIE(resolve(DTy.getClassType())));
+    addDIEEntry(
+        Buffer, dwarf::DW_AT_containing_type,
+        *getOrCreateTypeDIE(resolve(cast<DIDerivedType>(DTy)->getClassType())));
   // Add source line info if available and TyDesc is not a forward declaration.
-  if (!DTy.isForwardDecl())
+  if (!DTy->isForwardDecl())
     addSourceLine(Buffer, DTy);
 }
 
-/// constructSubprogramArguments - Construct function argument DIEs.
-void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeArray Args) {
-  for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
-    DIType Ty = resolve(Args.getElement(i));
+void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
+  for (unsigned i = 1, N = Args.size(); i < N; ++i) {
+    const DIType *Ty = resolve(Args[i]);
     if (!Ty) {
       assert(i == N-1 && "Unspecified parameter must be the last argument");
       createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
     } else {
       DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
       addType(Arg, Ty);
-      if (Ty.isArtificial())
+      if (Ty->isArtificial())
         addFlag(Arg, dwarf::DW_AT_artificial);
     }
   }
 }
 
-/// constructTypeDIE - Construct type DIE from DICompositeType.
-void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
+  // Add return type.  A void return won't have a type.
+  auto Elements = cast<DISubroutineType>(CTy)->getTypeArray();
+  if (Elements.size())
+    if (auto RTy = resolve(Elements[0]))
+      addType(Buffer, RTy);
+
+  bool isPrototyped = true;
+  if (Elements.size() == 2 && !Elements[1])
+    isPrototyped = false;
+
+  constructSubprogramArguments(Buffer, Elements);
+
+  // Add prototype flag if we're dealing with a C language and the function has
+  // been prototyped.
+  uint16_t Language = getLanguage();
+  if (isPrototyped &&
+      (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
+       Language == dwarf::DW_LANG_ObjC))
+    addFlag(Buffer, dwarf::DW_AT_prototyped);
+
+  if (CTy->isLValueReference())
+    addFlag(Buffer, dwarf::DW_AT_reference);
+
+  if (CTy->isRValueReference())
+    addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
+}
+
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
   // Add name if not anonymous or intermediate type.
-  StringRef Name = CTy.getName();
+  StringRef Name = CTy->getName();
 
-  uint64_t Size = CTy.getSizeInBits() >> 3;
+  uint64_t Size = CTy->getSizeInBits() >> 3;
   uint16_t Tag = Buffer.getTag();
 
   switch (Tag) {
@@ -976,82 +934,39 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   case dwarf::DW_TAG_enumeration_type:
     constructEnumTypeDIE(Buffer, CTy);
     break;
-  case dwarf::DW_TAG_subroutine_type: {
-    // Add return type. A void return won't have a type.
-    DITypeArray Elements = DISubroutineType(CTy).getTypeArray();
-    DIType RTy(resolve(Elements.getElement(0)));
-    if (RTy)
-      addType(Buffer, RTy);
-
-    bool isPrototyped = true;
-    if (Elements.getNumElements() == 2 &&
-        !Elements.getElement(1))
-      isPrototyped = false;
-
-    constructSubprogramArguments(Buffer, Elements);
-
-    // Add prototype flag if we're dealing with a C language and the
-    // function has been prototyped.
-    uint16_t Language = getLanguage();
-    if (isPrototyped &&
-        (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
-         Language == dwarf::DW_LANG_ObjC))
-      addFlag(Buffer, dwarf::DW_AT_prototyped);
-
-    if (CTy.isLValueReference())
-      addFlag(Buffer, dwarf::DW_AT_reference);
-
-    if (CTy.isRValueReference())
-      addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
-  } break;
   case dwarf::DW_TAG_structure_type:
   case dwarf::DW_TAG_union_type:
   case dwarf::DW_TAG_class_type: {
     // Add elements to structure type.
-    DIArray Elements = CTy.getElements();
-    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-      DIDescriptor Element = Elements.getElement(i);
-      if (Element.isSubprogram())
-        getOrCreateSubprogramDIE(DISubprogram(Element));
-      else if (Element.isDerivedType()) {
-        DIDerivedType DDTy(Element);
-        if (DDTy.getTag() == dwarf::DW_TAG_friend) {
+    DINodeArray Elements = CTy->getElements();
+    for (const auto *Element : Elements) {
+      if (!Element)
+        continue;
+      if (auto *SP = dyn_cast<DISubprogram>(Element))
+        getOrCreateSubprogramDIE(SP);
+      else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) {
+        if (DDTy->getTag() == dwarf::DW_TAG_friend) {
           DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
-          addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()),
-                  dwarf::DW_AT_friend);
-        } else if (DDTy.isStaticMember()) {
+          addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend);
+        } else if (DDTy->isStaticMember()) {
           getOrCreateStaticMemberDIE(DDTy);
         } else {
           constructMemberDIE(Buffer, DDTy);
         }
-      } else if (Element.isObjCProperty()) {
-        DIObjCProperty Property(Element);
-        DIE &ElemDie = createAndAddDIE(Property.getTag(), Buffer);
-        StringRef PropertyName = Property.getObjCPropertyName();
+      } else if (auto *Property = dyn_cast<DIObjCProperty>(Element)) {
+        DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer);
+        StringRef PropertyName = Property->getName();
         addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
-        if (Property.getType())
-          addType(ElemDie, Property.getType());
+        if (Property->getType())
+          addType(ElemDie, Property->getType());
         addSourceLine(ElemDie, Property);
-        StringRef GetterName = Property.getObjCPropertyGetterName();
+        StringRef GetterName = Property->getGetterName();
         if (!GetterName.empty())
           addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
-        StringRef SetterName = Property.getObjCPropertySetterName();
+        StringRef SetterName = Property->getSetterName();
         if (!SetterName.empty())
           addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
-        unsigned PropertyAttributes = 0;
-        if (Property.isReadOnlyObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
-        if (Property.isReadWriteObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
-        if (Property.isAssignObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
-        if (Property.isRetainObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
-        if (Property.isCopyObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
-        if (Property.isNonAtomicObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
-        if (PropertyAttributes)
+        if (unsigned PropertyAttributes = Property->getAttributes())
           addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None,
                   PropertyAttributes);
 
@@ -1060,28 +975,27 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
           Entry = createDIEEntry(ElemDie);
           insertDIEEntry(Element, Entry);
         }
-      } else
-        continue;
+      }
     }
 
-    if (CTy.isAppleBlockExtension())
+    if (CTy->isAppleBlockExtension())
       addFlag(Buffer, dwarf::DW_AT_APPLE_block);
 
     // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type
     // inside C++ composite types to point to the base class with the vtable.
-    DICompositeType ContainingType(resolve(CTy.getContainingType()));
-    if (ContainingType)
+    if (auto *ContainingType =
+            dyn_cast_or_null<DICompositeType>(resolve(CTy->getVTableHolder())))
       addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
                   *getOrCreateTypeDIE(ContainingType));
 
-    if (CTy.isObjcClassComplete())
+    if (CTy->isObjcClassComplete())
       addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
 
     // Add template parameters to a class, structure or union types.
     // FIXME: The support isn't in the metadata for this yet.
     if (Tag == dwarf::DW_TAG_class_type ||
         Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
-      addTemplateParams(Buffer, CTy.getTemplateParams());
+      addTemplateParams(Buffer, CTy->getTemplateParams());
 
     break;
   }
@@ -1100,55 +1014,50 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     // TODO: Do we care about size for enum forward declarations?
     if (Size)
       addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
-    else if (!CTy.isForwardDecl())
+    else if (!CTy->isForwardDecl())
       // Add zero size if it is not a forward declaration.
       addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0);
 
     // If we're a forward decl, say so.
-    if (CTy.isForwardDecl())
+    if (CTy->isForwardDecl())
       addFlag(Buffer, dwarf::DW_AT_declaration);
 
     // Add source line info if available.
-    if (!CTy.isForwardDecl())
+    if (!CTy->isForwardDecl())
       addSourceLine(Buffer, CTy);
 
     // No harm in adding the runtime language to the declaration.
-    unsigned RLang = CTy.getRunTimeLang();
+    unsigned RLang = CTy->getRuntimeLang();
     if (RLang)
       addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
               RLang);
   }
 }
 
-/// constructTemplateTypeParameterDIE - Construct new DIE for the given
-/// DITemplateTypeParameter.
-void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer,
-                                                  DITemplateTypeParameter TP) {
+void DwarfUnit::constructTemplateTypeParameterDIE(
+    DIE &Buffer, const DITemplateTypeParameter *TP) {
   DIE &ParamDIE =
       createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
   // Add the type if it exists, it could be void and therefore no type.
-  if (TP.getType())
-    addType(ParamDIE, resolve(TP.getType()));
-  if (!TP.getName().empty())
-    addString(ParamDIE, dwarf::DW_AT_name, TP.getName());
+  if (TP->getType())
+    addType(ParamDIE, resolve(TP->getType()));
+  if (!TP->getName().empty())
+    addString(ParamDIE, dwarf::DW_AT_name, TP->getName());
 }
 
-/// constructTemplateValueParameterDIE - Construct new DIE for the given
-/// DITemplateValueParameter.
-void
-DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
-                                              DITemplateValueParameter VP) {
-  DIE &ParamDIE = createAndAddDIE(VP.getTag(), Buffer);
+void DwarfUnit::constructTemplateValueParameterDIE(
+    DIE &Buffer, const DITemplateValueParameter *VP) {
+  DIE &ParamDIE = createAndAddDIE(VP->getTag(), Buffer);
 
   // Add the type if there is one, template template and template parameter
   // packs will not have a type.
-  if (VP.getTag() == dwarf::DW_TAG_template_value_parameter)
-    addType(ParamDIE, resolve(VP.getType()));
-  if (!VP.getName().empty())
-    addString(ParamDIE, dwarf::DW_AT_name, VP.getName());
-  if (Metadata *Val = VP.getValue()) {
+  if (VP->getTag() == dwarf::DW_TAG_template_value_parameter)
+    addType(ParamDIE, resolve(VP->getType()));
+  if (!VP->getName().empty())
+    addString(ParamDIE, dwarf::DW_AT_name, VP->getName());
+  if (Metadata *Val = VP->getValue()) {
     if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
-      addConstantValue(ParamDIE, CI, resolve(VP.getType()));
+      addConstantValue(ParamDIE, CI, resolve(VP->getType()));
     else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
       // For declaration non-type template parameters (such as global values and
       // functions)
@@ -1158,51 +1067,47 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
       // parameter, rather than a pointer to it.
       addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
       addBlock(ParamDIE, dwarf::DW_AT_location, Loc);
-    } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
+    } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) {
       assert(isa<MDString>(Val));
       addString(ParamDIE, dwarf::DW_AT_GNU_template_name,
                 cast<MDString>(Val)->getString());
-    } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
-      assert(isa<MDNode>(Val));
-      DIArray A(cast<MDNode>(Val));
-      addTemplateParams(ParamDIE, A);
+    } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
+      addTemplateParams(ParamDIE, cast<MDTuple>(Val));
     }
   }
 }
 
-/// getOrCreateNameSpace - Create a DIE for DINameSpace.
-DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) {
+DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIE *ContextDIE = getOrCreateContextDIE(NS.getContext());
+  DIE *ContextDIE = getOrCreateContextDIE(NS->getScope());
 
   if (DIE *NDie = getDIE(NS))
     return NDie;
   DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);
 
-  StringRef Name = NS.getName();
+  StringRef Name = NS->getName();
   if (!Name.empty())
-    addString(NDie, dwarf::DW_AT_name, NS.getName());
+    addString(NDie, dwarf::DW_AT_name, NS->getName());
   else
     Name = "(anonymous namespace)";
   DD->addAccelNamespace(Name, NDie);
-  addGlobalName(Name, NDie, NS.getContext());
+  addGlobalName(Name, NDie, NS->getScope());
   addSourceLine(NDie, NS);
   return &NDie;
 }
 
-/// getOrCreateSubprogramDIE - Create new DIE using SP.
-DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) {
+DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE (as is the case for member function
   // declarations).
   DIE *ContextDIE =
-      Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP.getContext()));
+      Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP->getScope()));
 
   if (DIE *SPDie = getDIE(SP))
     return SPDie;
 
-  if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+  if (auto *SPDecl = SP->getDeclaration()) {
     if (!Minimal) {
       // Add subprogram definitions to the CU die directly.
       ContextDIE = &getUnitDie();
@@ -1216,36 +1121,35 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) {
 
   // Stop here and fill this in later, depending on whether or not this
   // subprogram turns out to have inlined instances or not.
-  if (SP.isDefinition())
+  if (SP->isDefinition())
     return &SPDie;
 
   applySubprogramAttributes(SP, SPDie);
   return &SPDie;
 }
 
-bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP,
+bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
                                                     DIE &SPDie) {
   DIE *DeclDie = nullptr;
   StringRef DeclLinkageName;
-  if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+  if (auto *SPDecl = SP->getDeclaration()) {
     DeclDie = getDIE(SPDecl);
     assert(DeclDie && "This DIE should've already been constructed when the "
                       "definition DIE was created in "
                       "getOrCreateSubprogramDIE");
-    DeclLinkageName = SPDecl.getLinkageName();
+    DeclLinkageName = SPDecl->getLinkageName();
   }
 
   // Add function template parameters.
-  addTemplateParams(SPDie, SP.getTemplateParams());
+  addTemplateParams(SPDie, SP->getTemplateParams());
 
   // Add the linkage name if we have one and it isn't in the Decl.
-  StringRef LinkageName = SP.getLinkageName();
+  StringRef LinkageName = SP->getLinkageName();
   assert(((LinkageName.empty() || DeclLinkageName.empty()) ||
           LinkageName == DeclLinkageName) &&
          "decl has a linkage name and it is different");
-  if (!LinkageName.empty() && DeclLinkageName.empty())
-    addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
-              GlobalValue::getRealLinkageName(LinkageName));
+  if (DeclLinkageName.empty())
+    addLinkageName(SPDie, LinkageName);
 
   if (!DeclDie)
     return false;
@@ -1256,15 +1160,15 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP,
   return true;
 }
 
-void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
+void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
                                           bool Minimal) {
   if (!Minimal)
     if (applySubprogramDefinitionAttributes(SP, SPDie))
       return;
 
   // Constructors and operators for anonymous aggregates do not have names.
-  if (!SP.getName().empty())
-    addString(SPDie, dwarf::DW_AT_name, SP.getName());
+  if (!SP->getName().empty())
+    addString(SPDie, dwarf::DW_AT_name, SP->getName());
 
   // Skip the rest of the attributes under -gmlt to save space.
   if (Minimal)
@@ -1275,33 +1179,34 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
   // Add the prototype if we have a prototype and we have a C like
   // language.
   uint16_t Language = getLanguage();
-  if (SP.isPrototyped() &&
+  if (SP->isPrototyped() &&
       (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
        Language == dwarf::DW_LANG_ObjC))
     addFlag(SPDie, dwarf::DW_AT_prototyped);
 
-  DISubroutineType SPTy = SP.getType();
-  assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type &&
+  const DISubroutineType *SPTy = SP->getType();
+  assert(SPTy->getTag() == dwarf::DW_TAG_subroutine_type &&
          "the type of a subprogram should be a subroutine");
 
-  DITypeArray Args = SPTy.getTypeArray();
+  auto Args = SPTy->getTypeArray();
   // Add a return type. If this is a type like a C/C++ void type we don't add a
   // return type.
-  if (resolve(Args.getElement(0)))
-    addType(SPDie, DIType(resolve(Args.getElement(0))));
+  if (Args.size())
+    if (auto Ty = resolve(Args[0]))
+      addType(SPDie, Ty);
 
-  unsigned VK = SP.getVirtuality();
+  unsigned VK = SP->getVirtuality();
   if (VK) {
     addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
     DIELoc *Block = getDIELoc();
     addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(*Block, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+    addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex());
     addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
     ContainingTypeMap.insert(
-        std::make_pair(&SPDie, resolve(SP.getContainingType())));
+        std::make_pair(&SPDie, resolve(SP->getContainingType())));
   }
 
-  if (!SP.isDefinition()) {
+  if (!SP->isDefinition()) {
     addFlag(SPDie, dwarf::DW_AT_declaration);
 
     // Add arguments. Do not add arguments for subprogram definition. They will
@@ -1309,41 +1214,40 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
     constructSubprogramArguments(SPDie, Args);
   }
 
-  if (SP.isArtificial())
+  if (SP->isArtificial())
     addFlag(SPDie, dwarf::DW_AT_artificial);
 
-  if (!SP.isLocalToUnit())
+  if (!SP->isLocalToUnit())
     addFlag(SPDie, dwarf::DW_AT_external);
 
-  if (SP.isOptimized())
+  if (SP->isOptimized())
     addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
 
-  if (unsigned isa = Asm->getISAEncoding()) {
+  if (unsigned isa = Asm->getISAEncoding())
     addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
-  }
 
-  if (SP.isLValueReference())
+  if (SP->isLValueReference())
     addFlag(SPDie, dwarf::DW_AT_reference);
 
-  if (SP.isRValueReference())
+  if (SP->isRValueReference())
     addFlag(SPDie, dwarf::DW_AT_rvalue_reference);
 
-  if (SP.isProtected())
+  if (SP->isProtected())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
-  else if (SP.isPrivate())
+  else if (SP->isPrivate())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
-  else if (SP.isPublic())
+  else if (SP->isPublic())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
 
-  if (SP.isExplicit())
+  if (SP->isExplicit())
     addFlag(SPDie, dwarf::DW_AT_explicit);
 }
 
-/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
-void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
+void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
+                                     DIE *IndexTy) {
   DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer);
   addDIEEntry(DW_Subrange, dwarf::DW_AT_type, *IndexTy);
 
@@ -1351,9 +1255,9 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
   // C/C++. The Count value is the number of elements.  Values are 64 bit. If
   // Count == -1 then the array is unbounded and we do not emit
   // DW_AT_lower_bound and DW_AT_count attributes.
-  int64_t LowerBound = SR.getLo();
+  int64_t LowerBound = SR->getLowerBound();
   int64_t DefaultLowerBound = getDefaultLowerBound();
-  int64_t Count = SR.getCount();
+  int64_t Count = SR->getCount();
 
   if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
     addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound);
@@ -1376,13 +1280,12 @@ DIE *DwarfUnit::getIndexTyDie() {
   return IndexTyDie;
 }
 
-/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
-  if (CTy.isVector())
+void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+  if (CTy->isVector())
     addFlag(Buffer, dwarf::DW_AT_GNU_vector);
 
   // Emit the element type.
-  addType(Buffer, resolve(CTy.getTypeDerivedFrom()));
+  addType(Buffer, resolve(CTy->getBaseType()));
 
   // Get an anonymous type for index type.
   // FIXME: This type should be passed down from the front end
@@ -1390,45 +1293,42 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
   DIE *IdxTy = getIndexTyDie();
 
   // Add subranges to array type.
-  DIArray Elements = CTy.getElements();
-  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-    DIDescriptor Element = Elements.getElement(i);
-    if (Element.getTag() == dwarf::DW_TAG_subrange_type)
-      constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
+  DINodeArray Elements = CTy->getElements();
+  for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+    // FIXME: Should this really be such a loose cast?
+    if (auto *Element = dyn_cast_or_null<DINode>(Elements[i]))
+      if (Element->getTag() == dwarf::DW_TAG_subrange_type)
+        constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy);
   }
 }
 
-/// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType.
-void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) {
-  DIArray Elements = CTy.getElements();
+void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+  DINodeArray Elements = CTy->getElements();
 
   // Add enumerators to enumeration type.
-  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-    DIEnumerator Enum(Elements.getElement(i));
-    if (Enum.isEnumerator()) {
+  for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+    auto *Enum = dyn_cast_or_null<DIEnumerator>(Elements[i]);
+    if (Enum) {
       DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
-      StringRef Name = Enum.getName();
+      StringRef Name = Enum->getName();
       addString(Enumerator, dwarf::DW_AT_name, Name);
-      int64_t Value = Enum.getEnumValue();
+      int64_t Value = Enum->getValue();
       addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
               Value);
     }
   }
-  DIType DTy = resolve(CTy.getTypeDerivedFrom());
+  const DIType *DTy = resolve(CTy->getBaseType());
   if (DTy) {
     addType(Buffer, DTy);
     addFlag(Buffer, dwarf::DW_AT_enum_class);
   }
 }
 
-/// constructContainingTypeDIEs - Construct DIEs for types that contain
-/// vtables.
 void DwarfUnit::constructContainingTypeDIEs() {
-  for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
-                                                 CE = ContainingTypeMap.end();
+  for (auto CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end();
        CI != CE; ++CI) {
     DIE &SPDie = *CI->first;
-    DIDescriptor D(CI->second);
+    const DINode *D = CI->second;
     if (!D)
       continue;
     DIE *NDie = getDIE(D);
@@ -1438,18 +1338,17 @@ void DwarfUnit::constructContainingTypeDIEs() {
   }
 }
 
-/// constructMemberDIE - Construct member DIE from DIDerivedType.
-void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
-  DIE &MemberDie = createAndAddDIE(DT.getTag(), Buffer);
-  StringRef Name = DT.getName();
+void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
+  DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer);
+  StringRef Name = DT->getName();
   if (!Name.empty())
     addString(MemberDie, dwarf::DW_AT_name, Name);
 
-  addType(MemberDie, resolve(DT.getTypeDerivedFrom()));
+  addType(MemberDie, resolve(DT->getBaseType()));
 
   addSourceLine(MemberDie, DT);
 
-  if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) {
+  if (DT->getTag() == dwarf::DW_TAG_inheritance && DT->isVirtual()) {
 
     // For C++, virtual base classes are not at fixed offset. Use following
     // expression to extract appropriate offset from vtable.
@@ -1459,14 +1358,14 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+    addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT->getOffsetInBits());
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
 
     addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie);
   } else {
-    uint64_t Size = DT.getSizeInBits();
+    uint64_t Size = DT->getSizeInBits();
     uint64_t FieldSize = getBaseTypeSize(DD, DT);
     uint64_t OffsetInBytes;
 
@@ -1475,8 +1374,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
       addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
       addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
 
-      uint64_t Offset = DT.getOffsetInBits();
-      uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+      uint64_t Offset = DT->getOffsetInBits();
+      uint64_t AlignMask = ~(DT->getAlignInBits() - 1);
       uint64_t HiMark = (Offset + FieldSize) & AlignMask;
       uint64_t FieldOffset = (HiMark - FieldSize);
       Offset -= FieldOffset;
@@ -1491,7 +1390,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
       OffsetInBytes = FieldOffset >> 3;
     } else
       // This is not a bitfield.
-      OffsetInBytes = DT.getOffsetInBits() >> 3;
+      OffsetInBytes = DT->getOffsetInBits() >> 3;
 
     if (DD->getDwarfVersion() <= 2) {
       DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc();
@@ -1503,49 +1402,48 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
               OffsetInBytes);
   }
 
-  if (DT.isProtected())
+  if (DT->isProtected())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
-  else if (DT.isPrivate())
+  else if (DT->isPrivate())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
   // Otherwise C++ member and base classes are considered public.
-  else if (DT.isPublic())
+  else if (DT->isPublic())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
-  if (DT.isVirtual())
+  if (DT->isVirtual())
     addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
             dwarf::DW_VIRTUALITY_virtual);
 
   // Objective-C properties.
-  if (MDNode *PNode = DT.getObjCProperty())
+  if (MDNode *PNode = DT->getObjCProperty())
     if (DIEEntry *PropertyDie = getDIEEntry(PNode))
       MemberDie.addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
                          PropertyDie);
 
-  if (DT.isArtificial())
+  if (DT->isArtificial())
     addFlag(MemberDie, dwarf::DW_AT_artificial);
 }
 
-/// getOrCreateStaticMemberDIE - Create new DIE for C++ static member.
-DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
-  if (!DT.Verify())
+DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
+  if (!DT)
     return nullptr;
 
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIE *ContextDIE = getOrCreateContextDIE(resolve(DT.getContext()));
+  DIE *ContextDIE = getOrCreateContextDIE(resolve(DT->getScope()));
   assert(dwarf::isType(ContextDIE->getTag()) &&
          "Static member should belong to a type.");
 
   if (DIE *StaticMemberDIE = getDIE(DT))
     return StaticMemberDIE;
 
-  DIE &StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT);
+  DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT);
 
-  DIType Ty = resolve(DT.getTypeDerivedFrom());
+  const DIType *Ty = resolve(DT->getBaseType());
 
-  addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName());
+  addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName());
   addType(StaticMemberDIE, Ty);
   addSourceLine(StaticMemberDIE, DT);
   addFlag(StaticMemberDIE, dwarf::DW_AT_external);
@@ -1553,57 +1451,59 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
 
   // FIXME: We could omit private if the parent is a class_type, and
   // public if the parent is something else.
-  if (DT.isProtected())
+  if (DT->isProtected())
     addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
-  else if (DT.isPrivate())
+  else if (DT->isPrivate())
     addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
-  else if (DT.isPublic())
+  else if (DT->isPublic())
     addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
 
-  if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant()))
+  if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant()))
     addConstantValue(StaticMemberDIE, CI, Ty);
-  if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant()))
+  if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant()))
     addConstantFPValue(StaticMemberDIE, CFP);
 
   return &StaticMemberDIE;
 }
 
-void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const {
+void DwarfUnit::emitHeader(bool UseOffsets) {
   // Emit size of content not including length itself
-  Asm->OutStreamer.AddComment("Length of Unit");
+  Asm->OutStreamer->AddComment("Length of Unit");
   Asm->EmitInt32(getHeaderSize() + UnitDie.getSize());
 
-  Asm->OutStreamer.AddComment("DWARF version number");
+  Asm->OutStreamer->AddComment("DWARF version number");
   Asm->EmitInt16(DD->getDwarfVersion());
-  Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+  Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
+
   // We share one abbreviations table across all units so it's always at the
   // start of the section. Use a relocatable offset where needed to ensure
   // linking doesn't invalidate that offset.
-  if (ASectionSym)
-    Asm->EmitSectionOffset(ASectionSym, ASectionSym);
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  if (!UseOffsets)
+    Asm->emitSectionOffset(TLOF.getDwarfAbbrevSection()->getBeginSymbol());
   else
-    // Use a constant value when no symbol is provided.
     Asm->EmitInt32(0);
-  Asm->OutStreamer.AddComment("Address Size (in bytes)");
+
+  Asm->OutStreamer->AddComment("Address Size (in bytes)");
   Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 }
 
-void DwarfUnit::initSection(const MCSection *Section) {
+void DwarfUnit::initSection(MCSection *Section) {
   assert(!this->Section);
   this->Section = Section;
 }
 
-void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const {
-  DwarfUnit::emitHeader(ASectionSym);
-  Asm->OutStreamer.AddComment("Type Signature");
-  Asm->OutStreamer.EmitIntValue(TypeSignature, sizeof(TypeSignature));
-  Asm->OutStreamer.AddComment("Type DIE Offset");
+void DwarfTypeUnit::emitHeader(bool UseOffsets) {
+  DwarfUnit::emitHeader(UseOffsets);
+  Asm->OutStreamer->AddComment("Type Signature");
+  Asm->OutStreamer->EmitIntValue(TypeSignature, sizeof(TypeSignature));
+  Asm->OutStreamer->AddComment("Type DIE Offset");
   // In a skeleton type unit there is no type DIE so emit a zero offset.
-  Asm->OutStreamer.EmitIntValue(Ty ? Ty->getOffset() : 0,
-                                sizeof(Ty->getOffset()));
+  Asm->OutStreamer->EmitIntValue(Ty ? Ty->getOffset() : 0,
+                                 sizeof(Ty->getOffset()));
 }
 
 bool DwarfTypeUnit::isDwoUnit() const {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 7a5e47d..0d01a9e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -63,15 +63,15 @@ public:
 };
 
 //===----------------------------------------------------------------------===//
-/// Unit - This dwarf writer support class manages information associated
-/// with a source file.
+/// This dwarf writer support class manages information associated with a
+/// source file.
 class DwarfUnit {
 protected:
-  /// UniqueID - a numeric ID unique among all CUs in the module
+  /// A numeric ID unique among all CUs in the module
   unsigned UniqueID;
 
-  /// Node - MDNode for the compile unit.
-  DICompileUnit CUNode;
+  /// MDNode for the compile unit.
+  const DICompileUnit *CUNode;
 
   /// Unit debug information entry.
   DIE UnitDie;
@@ -79,60 +79,62 @@ protected:
   /// Offset of the UnitDie from beginning of debug info section.
   unsigned DebugInfoOffset;
 
-  /// Asm - Target of Dwarf emission.
+  /// Target of Dwarf emission.
   AsmPrinter *Asm;
 
   // Holders for some common dwarf information.
   DwarfDebug *DD;
   DwarfFile *DU;
 
-  /// IndexTyDie - An anonymous type for index type.  Owned by UnitDie.
+  /// An anonymous type for index type.  Owned by UnitDie.
   DIE *IndexTyDie;
 
-  /// MDNodeToDieMap - Tracks the mapping of unit level debug information
-  /// variables to debug information entries.
+  /// Tracks the mapping of unit level debug information variables to debug
+  /// information entries.
   DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
 
-  /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug information
-  /// descriptors to debug information entries using a DIEEntry proxy.
+  /// Tracks the mapping of unit level debug information descriptors to debug
+  /// information entries using a DIEEntry proxy.
   DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
 
-  /// DIEBlocks - A list of all the DIEBlocks in use.
+  /// A list of all the DIEBlocks in use.
   std::vector<DIEBlock *> DIEBlocks;
-  
-  /// DIELocs - A list of all the DIELocs in use.
+
+  /// A list of all the DIELocs in use.
   std::vector<DIELoc *> DIELocs;
 
-  /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
-  /// need DW_AT_containing_type attribute. This attribute points to a DIE that
+  /// This map is used to keep track of subprogram DIEs that need
+  /// DW_AT_containing_type attribute. This attribute points to a DIE that
   /// corresponds to the MDNode mapped with the subprogram DIE.
-  DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+  DenseMap<DIE *, const DINode *> ContainingTypeMap;
 
-  // DIEValueAllocator - All DIEValues are allocated through this allocator.
+  // All DIEValues are allocated through this allocator.
   BumpPtrAllocator DIEValueAllocator;
 
-  // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently.
+  // A preallocated DIEValue because 1 is used frequently.
   DIEInteger *DIEIntegerOne;
 
   /// The section this unit will be emitted in.
-  const MCSection *Section;
+  MCSection *Section;
 
-  DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A,
+  DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A,
             DwarfDebug *DW, DwarfFile *DWU);
 
-  void initSection(const MCSection *Section);
-
-  /// Add a string attribute data and value.
+  /// \brief Add a string attribute data and value.
+  ///
+  /// This is guaranteed to be in the local string pool instead of indirected.
   void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
 
   void addIndexedString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
 
-  bool applySubprogramDefinitionAttributes(DISubprogram SP, DIE &SPDie);
+  bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
 
 public:
   virtual ~DwarfUnit();
 
-  const MCSection *getSection() const {
+  void initSection(MCSection *Section);
+
+  MCSection *getSection() const {
     assert(Section);
     return Section;
   }
@@ -140,119 +142,133 @@ public:
   // Accessors.
   AsmPrinter* getAsmPrinter() const { return Asm; }
   unsigned getUniqueID() const { return UniqueID; }
-  uint16_t getLanguage() const { return CUNode.getLanguage(); }
-  DICompileUnit getCUNode() const { return CUNode; }
+  uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
+  const DICompileUnit *getCUNode() const { return CUNode; }
   DIE &getUnitDie() { return UnitDie; }
 
   unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
   void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
 
-  /// hasContent - Return true if this compile unit has something to write out.
+  /// \brief Return true if this compile unit has something to write out.
   bool hasContent() const { return !UnitDie.getChildren().empty(); }
 
-  /// getParentContextString - Get a string containing the language specific
-  /// context for a global name.
-  std::string getParentContextString(DIScope Context) const;
+  /// \brief Get string containing language specific context for a global name.
+  ///
+  /// Walks the metadata parent chain in a language specific manner (using the
+  /// compile unit language) and returns it as a string. This is done at the
+  /// metadata level because DIEs may not currently have been added to the
+  /// parent context and walking the DIEs looking for names is more expensive
+  /// than walking the metadata.
+  std::string getParentContextString(const DIScope *Context) const;
 
   /// Add a new global name to the compile unit.
-  virtual void addGlobalName(StringRef Name, DIE &Die, DIScope Context) {}
+  virtual void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) {
+  }
 
   /// Add a new global type to the compile unit.
-  virtual void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) {}
+  virtual void addGlobalType(const DIType *Ty, const DIE &Die,
+                             const DIScope *Context) {}
 
-  /// addAccelNamespace - Add a new name to the namespace accelerator table.
+  /// \brief Add a new name to the namespace accelerator table.
   void addAccelNamespace(StringRef Name, const DIE &Die);
 
-  /// getDIE - Returns the debug information entry map slot for the
-  /// specified debug variable. We delegate the request to DwarfDebug
-  /// when the MDNode can be part of the type system, since DIEs for
-  /// the type system can be shared across CUs and the mappings are
-  /// kept in DwarfDebug.
-  DIE *getDIE(DIDescriptor D) const;
+  /// \brief Returns the DIE map slot for the specified debug variable.
+  ///
+  /// We delegate the request to DwarfDebug when the MDNode can be part of the
+  /// type system, since DIEs for the type system can be shared across CUs and
+  /// the mappings are kept in DwarfDebug.
+  DIE *getDIE(const DINode *D) const;
 
-  /// getDIELoc - Returns a fresh newly allocated DIELoc.
+  /// \brief Returns a fresh newly allocated DIELoc.
   DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc(); }
 
-  /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug
-  /// when the MDNode can be part of the type system, since DIEs for
-  /// the type system can be shared across CUs and the mappings are
-  /// kept in DwarfDebug.
-  void insertDIE(DIDescriptor Desc, DIE *D);
+  /// \brief Insert DIE into the map.
+  ///
+  /// We delegate the request to DwarfDebug when the MDNode can be part of the
+  /// type system, since DIEs for the type system can be shared across CUs and
+  /// the mappings are kept in DwarfDebug.
+  void insertDIE(const DINode *Desc, DIE *D);
 
-  /// addFlag - Add a flag that is true to the DIE.
+  /// \brief Add a flag that is true to the DIE.
   void addFlag(DIE &Die, dwarf::Attribute Attribute);
 
-  /// addUInt - Add an unsigned integer attribute data and value.
+  /// \brief Add an unsigned integer attribute data and value.
   void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
                uint64_t Integer);
 
   void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer);
 
-  /// addSInt - Add an signed integer attribute data and value.
+  /// \brief Add an signed integer attribute data and value.
   void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
                int64_t Integer);
 
   void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer);
 
-  /// addString - Add a string attribute data and value.
+  /// \brief Add a string attribute data and value.
+  ///
+  /// We always emit a reference to the string pool instead of immediate
+  /// strings so that DIEs have more predictable sizes. In the case of split
+  /// dwarf we emit an index into another table which gets us the static offset
+  /// into the string table.
   void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
 
-  /// addLabel - Add a Dwarf label attribute data and value.
+  /// \brief Add a Dwarf label attribute data and value.
   void addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
                 const MCSymbol *Label);
 
   void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
 
-  /// addSectionOffset - Add an offset into a section attribute data and value.
-  ///
+  /// \brief Add an offset into a section attribute data and value.
   void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer);
 
-  /// addOpAddress - Add a dwarf op address data and value using the
-  /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
+  /// \brief Add a dwarf op address data and value using the form given and an
+  /// op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
   void addOpAddress(DIELoc &Die, const MCSymbol *Label);
 
-  /// addLabelDelta - Add a label delta attribute data and value.
+  /// \brief Add a label delta attribute data and value.
   void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
                      const MCSymbol *Lo);
 
-  /// addDIEEntry - Add a DIE attribute data and value.
+  /// \brief Add a DIE attribute data and value.
   void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry);
 
-  /// addDIEEntry - Add a DIE attribute data and value.
+  /// \brief Add a DIE attribute data and value.
   void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry *Entry);
 
   void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type);
 
-  /// addBlock - Add block data.
+  /// \brief Add block data.
   void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block);
 
-  /// addBlock - Add block data.
+  /// \brief Add block data.
   void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block);
 
-  /// addSourceLine - Add location information to specified debug information
-  /// entry.
+  /// \brief Add location information to specified debug information entry.
   void addSourceLine(DIE &Die, unsigned Line, StringRef File,
                      StringRef Directory);
-  void addSourceLine(DIE &Die, DIVariable V);
-  void addSourceLine(DIE &Die, DIGlobalVariable G);
-  void addSourceLine(DIE &Die, DISubprogram SP);
-  void addSourceLine(DIE &Die, DIType Ty);
-  void addSourceLine(DIE &Die, DINameSpace NS);
-  void addSourceLine(DIE &Die, DIObjCProperty Ty);
-
-  /// addConstantValue - Add constant value entry in variable DIE.
-  void addConstantValue(DIE &Die, const MachineOperand &MO, DIType Ty);
-  void addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty);
-  void addConstantValue(DIE &Die, const APInt &Val, DIType Ty);
+  void addSourceLine(DIE &Die, const DILocalVariable *V);
+  void addSourceLine(DIE &Die, const DIGlobalVariable *G);
+  void addSourceLine(DIE &Die, const DISubprogram *SP);
+  void addSourceLine(DIE &Die, const DIType *Ty);
+  void addSourceLine(DIE &Die, const DINamespace *NS);
+  void addSourceLine(DIE &Die, const DIObjCProperty *Ty);
+
+  /// \brief Add constant value entry in variable DIE.
+  void addConstantValue(DIE &Die, const MachineOperand &MO, const DIType *Ty);
+  void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty);
+  void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty);
   void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned);
   void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val);
 
-  /// addConstantFPValue - Add constant value entry in variable DIE.
+  /// \brief Add constant value entry in variable DIE.
   void addConstantFPValue(DIE &Die, const MachineOperand &MO);
   void addConstantFPValue(DIE &Die, const ConstantFP *CFP);
 
-  /// addTemplateParams - Add template parameters in buffer.
-  void addTemplateParams(DIE &Buffer, DIArray TParams);
+  /// \brief Add a linkage name, if it isn't empty.
+  void addLinkageName(DIE &Die, StringRef LinkageName);
+
+  /// \brief Add template parameters in buffer.
+  void addTemplateParams(DIE &Buffer, DINodeArray TParams);
 
   /// \brief Add register operand.
   /// \returns false if the register does not exist, e.g., because it was never
@@ -266,51 +282,45 @@ public:
   bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset);
 
   // FIXME: Should be reformulated in terms of addComplexAddress.
-  /// addBlockByrefAddress - Start with the address based on the location
-  /// provided, and generate the DWARF information necessary to find the
-  /// actual Block variable (navigating the Block struct) based on the
-  /// starting location.  Add the DWARF information to the die.  Obsolete,
-  /// please use addComplexAddress instead.
+  /// Start with the address based on the location provided, and generate the
+  /// DWARF information necessary to find the actual Block variable (navigating
+  /// the Block struct) based on the starting location.  Add the DWARF
+  /// information to the die.  Obsolete, please use addComplexAddress instead.
   void addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
                             dwarf::Attribute Attribute,
                             const MachineLocation &Location);
 
-  /// addType - Add a new type attribute to the specified entity. This takes
-  /// and attribute parameter because DW_AT_friend attributes are also
-  /// type references.
-  void addType(DIE &Entity, DIType Ty,
+  /// \brief Add a new type attribute to the specified entity.
+  ///
+  /// This takes and attribute parameter because DW_AT_friend attributes are
+  /// also type references.
+  void addType(DIE &Entity, const DIType *Ty,
                dwarf::Attribute Attribute = dwarf::DW_AT_type);
 
-  /// getOrCreateNameSpace - Create a DIE for DINameSpace.
-  DIE *getOrCreateNameSpace(DINameSpace NS);
-
-  /// getOrCreateSubprogramDIE - Create new DIE using SP.
-  DIE *getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal = false);
+  DIE *getOrCreateNameSpace(const DINamespace *NS);
+  DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false);
 
-  void applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
+  void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
                                  bool Minimal = false);
 
-  /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
-  /// given DIType.
+  /// \brief Find existing DIE or create new DIE for the given type.
   DIE *getOrCreateTypeDIE(const MDNode *N);
 
-  /// getOrCreateContextDIE - Get context owner's DIE.
-  DIE *createTypeDIE(DICompositeType Ty);
+  /// \brief Get context owner's DIE.
+  DIE *createTypeDIE(const DICompositeType *Ty);
 
-  /// getOrCreateContextDIE - Get context owner's DIE.
-  DIE *getOrCreateContextDIE(DIScope Context);
+  /// \brief Get context owner's DIE.
+  DIE *getOrCreateContextDIE(const DIScope *Context);
 
-  /// constructContainingTypeDIEs - Construct DIEs for types that contain
-  /// vtables.
+  /// \brief Construct DIEs for types that contain vtables.
   void constructContainingTypeDIEs();
 
-  /// constructSubprogramArguments - Construct function argument DIEs.
-  void constructSubprogramArguments(DIE &Buffer, DITypeArray Args);
+  /// \brief Construct function argument DIEs.
+  void constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args);
 
   /// Create a DIE with the given Tag, add the DIE to its parent, and
   /// call insertDIE if MD is not null.
-  DIE &createAndAddDIE(unsigned Tag, DIE &Parent,
-                       DIDescriptor N = DIDescriptor());
+  DIE &createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N = nullptr);
 
   /// Compute the size of a header for this unit, not including the initial
   /// length field.
@@ -321,85 +331,68 @@ public:
   }
 
   /// Emit the header for this unit, not including the initial length field.
-  virtual void emitHeader(const MCSymbol *ASectionSym) const;
+  virtual void emitHeader(bool UseOffsets);
 
   virtual DwarfCompileUnit &getCU() = 0;
 
-  /// constructTypeDIE - Construct type DIE from DICompositeType.
-  void constructTypeDIE(DIE &Buffer, DICompositeType CTy);
+  void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
 
 protected:
-  /// getOrCreateStaticMemberDIE - Create new static data member DIE.
-  DIE *getOrCreateStaticMemberDIE(DIDerivedType DT);
+  /// \brief Create new static data member DIE.
+  DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT);
 
   /// Look up the source ID with the given directory and source file names. If
   /// none currently exists, create a new ID and insert it in the line table.
   virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0;
 
-  /// resolve - Look in the DwarfDebug map for the MDNode that
-  /// corresponds to the reference.
-  template <typename T> T resolve(DIRef<T> Ref) const {
+  /// \brief Look in the DwarfDebug map for the MDNode that corresponds to the
+  /// reference.
+  template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
     return DD->resolve(Ref);
   }
 
 private:
-  /// constructTypeDIE - Construct basic type die from DIBasicType.
-  void constructTypeDIE(DIE &Buffer, DIBasicType BTy);
-
-  /// constructTypeDIE - Construct derived type die from DIDerivedType.
-  void constructTypeDIE(DIE &Buffer, DIDerivedType DTy);
-
-  /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
-  void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
-
-  /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-  void constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy);
-
-  /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-  void constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy);
-
-  /// constructMemberDIE - Construct member DIE from DIDerivedType.
-  void constructMemberDIE(DIE &Buffer, DIDerivedType DT);
-
-  /// constructTemplateTypeParameterDIE - Construct new DIE for the given
-  /// DITemplateTypeParameter.
+  void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy);
+  void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy);
+  void constructTypeDIE(DIE &Buffer, const DISubroutineType *DTy);
+  void constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DIE *IndexTy);
+  void constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+  void constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+  void constructMemberDIE(DIE &Buffer, const DIDerivedType *DT);
   void constructTemplateTypeParameterDIE(DIE &Buffer,
-                                         DITemplateTypeParameter TP);
-
-  /// constructTemplateValueParameterDIE - Construct new DIE for the given
-  /// DITemplateValueParameter.
+                                         const DITemplateTypeParameter *TP);
   void constructTemplateValueParameterDIE(DIE &Buffer,
-                                          DITemplateValueParameter TVP);
+                                          const DITemplateValueParameter *TVP);
 
-  /// getLowerBoundDefault - Return the default lower bound for an array. If the
-  /// DWARF version doesn't handle the language, return -1.
+  /// \brief Return the default lower bound for an array.
+  ///
+  /// If the DWARF version doesn't handle the language, return -1.
   int64_t getDefaultLowerBound() const;
 
-  /// getDIEEntry - Returns the debug information entry for the specified
-  /// debug variable.
+  /// \brief Returns the DIE entry for the specified debug variable.
   DIEEntry *getDIEEntry(const MDNode *N) const {
     return MDNodeToDIEEntryMap.lookup(N);
   }
 
-  /// insertDIEEntry - Insert debug information entry into the map.
+  /// \brief Insert debug information entry into the map.
   void insertDIEEntry(const MDNode *N, DIEEntry *E) {
     MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
   }
 
-  // getIndexTyDie - Get an anonymous type for index type.
+  /// \brief Get an anonymous type for index type.
   DIE *getIndexTyDie();
 
-  // setIndexTyDie - Set D as anonymous type for index which can be reused
-  // later.
+  /// \brief Set D as anonymous type for index which can be reused later.
   void setIndexTyDie(DIE *D) { IndexTyDie = D; }
 
-  /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
-  /// information entry.
+  /// \brief Creates a new DIEEntry to be a proxy for a debug information
+  /// entry.
   DIEEntry *createDIEEntry(DIE &Entry);
 
   /// If this is a named finished type then include it in the list of types for
   /// the accelerator tables.
-  void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE &TyDIE);
+  void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
+                               const DIE &TyDIE);
 
   virtual bool isDwoUnit() const = 0;
 };
@@ -423,12 +416,11 @@ public:
   void setType(const DIE *Ty) { this->Ty = Ty; }
 
   /// Emit the header for this unit, not including the initial length field.
-  void emitHeader(const MCSymbol *ASectionSym) const override;
+  void emitHeader(bool UseOffsets) override;
   unsigned getHeaderSize() const override {
     return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
            sizeof(uint32_t);                               // Type DIE Offset
   }
-  using DwarfUnit::initSection;
   DwarfCompileUnit &getCU() override { return CU; }
 };
 } // end llvm namespace
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index f112120..1be3fd7 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -121,7 +121,8 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
       for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
         int TypeID = TypeIds[J];
         assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
-        int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+        int ValueForTypeID =
+            isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID;
         unsigned SizeTypeID = getSLEB128Size(ValueForTypeID);
 
         int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
@@ -187,20 +188,12 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) {
   return MarkedNoUnwind;
 }
 
-/// Compute the call-site table.  The entry for an invoke has a try-range
-/// containing the call, a non-zero landing pad, and an appropriate action.  The
-/// entry for an ordinary call has a try-range containing the call and zero for
-/// the landing pad and the action.  Calls marked 'nounwind' have no entry and
-/// must not be contained in the try-range of any entry - they form gaps in the
-/// table.  Entries must be ordered by try-range address.
-void EHStreamer::
-computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
-                     const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
-                     const SmallVectorImpl<unsigned> &FirstActions) {
+void EHStreamer::computePadMap(
+    const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+    RangeMapType &PadMap) {
   // Invokes and nounwind calls have entries in PadMap (due to being bracketed
   // by try-range labels when lowered).  Ordinary calls do not, so appropriate
   // try-ranges for them need be deduced so we can put them in the LSDA.
-  RangeMapType PadMap;
   for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
     const LandingPadInfo *LandingPad = LandingPads[i];
     for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
@@ -210,6 +203,20 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
       PadMap[BeginLabel] = P;
     }
   }
+}
+
+/// Compute the call-site table.  The entry for an invoke has a try-range
+/// containing the call, a non-zero landing pad, and an appropriate action.  The
+/// entry for an ordinary call has a try-range containing the call and zero for
+/// the landing pad and the action.  Calls marked 'nounwind' have no entry and
+/// must not be contained in the try-range of any entry - they form gaps in the
+/// table.  Entries must be ordered by try-range address.
+void EHStreamer::
+computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+                     const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+                     const SmallVectorImpl<unsigned> &FirstActions) {
+  RangeMapType PadMap;
+  computePadMap(LandingPads, PadMap);
 
   // The end label of the previous invoke or nounwind try-range.
   MCSymbol *LastLabel = nullptr;
@@ -252,7 +259,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
       // instruction between the previous try-range and this one may throw,
       // create a call-site entry with no landing pad for the region between the
       // try-ranges.
-      if (SawPotentiallyThrowing && !IsSJLJ) {
+      if (SawPotentiallyThrowing && Asm->MAI->usesCFIForEH()) {
         CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 };
         CallSites.push_back(Site);
         PreviousIsInvoke = false;
@@ -269,14 +276,14 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
         CallSiteEntry Site = {
           BeginLabel,
           LastLabel,
-          LandingPad->LandingPadLabel,
+          LandingPad,
           FirstActions[P.PadIndex]
         };
 
         // Try to merge with the previous call-site. SJLJ doesn't do this
         if (PreviousIsInvoke && !IsSJLJ) {
           CallSiteEntry &Prev = CallSites.back();
-          if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+          if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) {
             // Extend the range of the previous entry.
             Prev.EndLabel = Site.EndLabel;
             continue;
@@ -381,7 +388,7 @@ void EHStreamer::emitExceptionTable() {
   }
 
   // Type infos.
-  const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+  MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
   unsigned TTypeEncoding;
   unsigned TypeFormatSize;
 
@@ -427,20 +434,15 @@ void EHStreamer::emitExceptionTable() {
   // Sometimes we want not to emit the data into separate section (e.g. ARM
   // EHABI). In this case LSDASection will be NULL.
   if (LSDASection)
-    Asm->OutStreamer.SwitchSection(LSDASection);
+    Asm->OutStreamer->SwitchSection(LSDASection);
   Asm->EmitAlignment(2);
 
   // Emit the LSDA.
   MCSymbol *GCCETSym =
-    Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
+    Asm->OutContext.getOrCreateSymbol(Twine("GCC_except_table")+
                                       Twine(Asm->getFunctionNumber()));
-  Asm->OutStreamer.EmitLabel(GCCETSym);
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception",
-                                                Asm->getFunctionNumber()));
-
-  if (IsSJLJ)
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_",
-                                                  Asm->getFunctionNumber()));
+  Asm->OutStreamer->EmitLabel(GCCETSym);
+  Asm->OutStreamer->EmitLabel(Asm->getCurExceptionSym());
 
   // Emit the LSDA header.
   Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
@@ -484,7 +486,7 @@ void EHStreamer::emitExceptionTable() {
     SizeAlign = 0;
   }
 
-  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+  bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
 
   // SjLj Exception handling
   if (IsSJLJ) {
@@ -502,8 +504,8 @@ void EHStreamer::emitExceptionTable() {
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
       if (VerboseAsm) {
-        Asm->OutStreamer.AddComment(">> Call Site " + Twine(idx) + " <<");
-        Asm->OutStreamer.AddComment("  On exception at call site "+Twine(idx));
+        Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<");
+        Asm->OutStreamer->AddComment("  On exception at call site "+Twine(idx));
       }
       Asm->EmitULEB128(idx);
 
@@ -512,10 +514,10 @@ void EHStreamer::emitExceptionTable() {
       // the action table), and 0 indicates that there are no actions.
       if (VerboseAsm) {
         if (S.Action == 0)
-          Asm->OutStreamer.AddComment("  Action: cleanup");
+          Asm->OutStreamer->AddComment("  Action: cleanup");
         else
-          Asm->OutStreamer.AddComment("  Action: " +
-                                      Twine((S.Action - 1) / 2 + 1));
+          Asm->OutStreamer->AddComment("  Action: " +
+                                       Twine((S.Action - 1) / 2 + 1));
       }
       Asm->EmitULEB128(S.Action);
     }
@@ -551,40 +553,38 @@ void EHStreamer::emitExceptionTable() {
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
       const CallSiteEntry &S = *I;
 
-      MCSymbol *EHFuncBeginSym =
-        Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+      MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
 
       MCSymbol *BeginLabel = S.BeginLabel;
       if (!BeginLabel)
         BeginLabel = EHFuncBeginSym;
       MCSymbol *EndLabel = S.EndLabel;
       if (!EndLabel)
-        EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
-
+        EndLabel = Asm->getFunctionEnd();
 
       // Offset of the call site relative to the previous call site, counted in
       // number of 16-byte bundles. The first call site is counted relative to
       // the start of the procedure fragment.
       if (VerboseAsm)
-        Asm->OutStreamer.AddComment(">> Call Site " + Twine(++Entry) + " <<");
+        Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<");
       Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
       if (VerboseAsm)
-        Asm->OutStreamer.AddComment(Twine("  Call between ") +
-                                    BeginLabel->getName() + " and " +
-                                    EndLabel->getName());
+        Asm->OutStreamer->AddComment(Twine("  Call between ") +
+                                     BeginLabel->getName() + " and " +
+                                     EndLabel->getName());
       Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
 
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
-      if (!S.PadLabel) {
+      if (!S.LPad) {
         if (VerboseAsm)
-          Asm->OutStreamer.AddComment("    has no landing pad");
-        Asm->OutStreamer.EmitIntValue(0, 4/*size*/);
+          Asm->OutStreamer->AddComment("    has no landing pad");
+        Asm->OutStreamer->EmitIntValue(0, 4/*size*/);
       } else {
         if (VerboseAsm)
-          Asm->OutStreamer.AddComment(Twine("    jumps to ") +
-                                      S.PadLabel->getName());
-        Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);
+          Asm->OutStreamer->AddComment(Twine("    jumps to ") +
+                                       S.LPad->LandingPadLabel->getName());
+        Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4);
       }
 
       // Offset of the first associated action record, relative to the start of
@@ -592,10 +592,10 @@ void EHStreamer::emitExceptionTable() {
       // the action table), and 0 indicates that there are no actions.
       if (VerboseAsm) {
         if (S.Action == 0)
-          Asm->OutStreamer.AddComment("  On action: cleanup");
+          Asm->OutStreamer->AddComment("  On action: cleanup");
         else
-          Asm->OutStreamer.AddComment("  On action: " +
-                                      Twine((S.Action - 1) / 2 + 1));
+          Asm->OutStreamer->AddComment("  On action: " +
+                                       Twine((S.Action - 1) / 2 + 1));
       }
       Asm->EmitULEB128(S.Action);
     }
@@ -609,7 +609,7 @@ void EHStreamer::emitExceptionTable() {
 
     if (VerboseAsm) {
       // Emit comments that decode the action table.
-      Asm->OutStreamer.AddComment(">> Action Record " + Twine(++Entry) + " <<");
+      Asm->OutStreamer->AddComment(">> Action Record " + Twine(++Entry) + " <<");
     }
 
     // Type Filter
@@ -618,13 +618,13 @@ void EHStreamer::emitExceptionTable() {
     //   type of the catch clauses or the types in the exception specification.
     if (VerboseAsm) {
       if (Action.ValueForTypeID > 0)
-        Asm->OutStreamer.AddComment("  Catch TypeInfo " +
-                                    Twine(Action.ValueForTypeID));
+        Asm->OutStreamer->AddComment("  Catch TypeInfo " +
+                                     Twine(Action.ValueForTypeID));
       else if (Action.ValueForTypeID < 0)
-        Asm->OutStreamer.AddComment("  Filter TypeInfo " +
-                                    Twine(Action.ValueForTypeID));
+        Asm->OutStreamer->AddComment("  Filter TypeInfo " +
+                                     Twine(Action.ValueForTypeID));
       else
-        Asm->OutStreamer.AddComment("  Cleanup");
+        Asm->OutStreamer->AddComment("  Cleanup");
     }
     Asm->EmitSLEB128(Action.ValueForTypeID);
 
@@ -634,10 +634,10 @@ void EHStreamer::emitExceptionTable() {
     //   or 0 if there is no next action record.
     if (VerboseAsm) {
       if (Action.NextAction == 0) {
-        Asm->OutStreamer.AddComment("  No further actions");
+        Asm->OutStreamer->AddComment("  No further actions");
       } else {
         unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
-        Asm->OutStreamer.AddComment("  Continue to action "+Twine(NextAction));
+        Asm->OutStreamer->AddComment("  Continue to action "+Twine(NextAction));
       }
     }
     Asm->EmitSLEB128(Action.NextAction);
@@ -652,13 +652,13 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
   const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
 
-  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+  bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
 
   int Entry = 0;
   // Emit the Catch TypeInfos.
   if (VerboseAsm && !TypeInfos.empty()) {
-    Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
-    Asm->OutStreamer.AddBlankLine();
+    Asm->OutStreamer->AddComment(">> Catch TypeInfos <<");
+    Asm->OutStreamer->AddBlankLine();
     Entry = TypeInfos.size();
   }
 
@@ -666,14 +666,14 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalValue *GV = *I;
     if (VerboseAsm)
-      Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
+      Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
     Asm->EmitTTypeReference(GV, TTypeEncoding);
   }
 
   // Emit the Exception Specifications.
   if (VerboseAsm && !FilterIds.empty()) {
-    Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
-    Asm->OutStreamer.AddBlankLine();
+    Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
+    Asm->OutStreamer->AddBlankLine();
     Entry = 0;
   }
   for (std::vector<unsigned>::const_iterator
@@ -681,26 +681,10 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
     unsigned TypeID = *I;
     if (VerboseAsm) {
       --Entry;
-      if (TypeID != 0)
-        Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
+      if (isFilterEHSelector(TypeID))
+        Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry));
     }
 
     Asm->EmitULEB128(TypeID);
   }
 }
-
-/// Emit all exception information that should come after the content.
-void EHStreamer::endModule() {
-  llvm_unreachable("Should be implemented");
-}
-
-/// Gather pre-function exception information. Assumes it's being emitted
-/// immediately after the function entry point.
-void EHStreamer::beginFunction(const MachineFunction *MF) {
-  llvm_unreachable("Should be implemented");
-}
-
-/// Gather and emit post-function exception information.
-void EHStreamer::endFunction(const MachineFunction *) {
-  llvm_unreachable("Should be implemented");
-}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
index e93055c..65973fa 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -23,6 +23,8 @@ class MachineModuleInfo;
 class MachineInstr;
 class MachineFunction;
 class AsmPrinter;
+class MCSymbol;
+class MCSymbolRefExpr;
 
 template <typename T>
 class SmallVectorImpl;
@@ -60,11 +62,11 @@ protected:
   /// Structure describing an entry in the call-site table.
   struct CallSiteEntry {
     // The 'try-range' is BeginLabel .. EndLabel.
-    MCSymbol *BeginLabel; // zero indicates the start of the function.
-    MCSymbol *EndLabel;   // zero indicates the end of the function.
+    MCSymbol *BeginLabel; // Null indicates the start of the function.
+    MCSymbol *EndLabel;   // Null indicates the end of the function.
 
-    // The landing pad starts at PadLabel.
-    MCSymbol *PadLabel;   // zero indicates that there is no landing pad.
+    // LPad contains the landing pad start labels.
+    const LandingPadInfo *LPad; // Null indicates that there is no landing pad.
     unsigned Action;
   };
 
@@ -78,13 +80,15 @@ protected:
   /// `false' otherwise.
   bool callToNoUnwindFunction(const MachineInstr *MI);
 
+  void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+                     RangeMapType &PadMap);
+
   /// Compute the call-site table.  The entry for an invoke has a try-range
   /// containing the call, a non-zero landing pad and an appropriate action.
   /// The entry for an ordinary call has a try-range containing the call and
   /// zero for the landing pad and the action.  Calls marked 'nounwind' have
   /// no entry and must not be contained in the try-range of any entry - they
   /// form gaps in the table.  Entries must be ordered by try-range address.
-
   void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
                             const SmallVectorImpl<const LandingPadInfo *> &LPs,
                             const SmallVectorImpl<unsigned> &FirstActions);
@@ -112,19 +116,16 @@ protected:
 
   virtual void emitTypeInfos(unsigned TTypeEncoding);
 
+  // Helpers for for identifying what kind of clause an EH typeid or selector
+  // corresponds to. Negative selectors are for filter clauses, the zero
+  // selector is for cleanups, and positive selectors are for catch clauses.
+  static bool isFilterEHSelector(int Selector) { return Selector < 0; }
+  static bool isCleanupEHSelector(int Selector) { return Selector == 0; }
+  static bool isCatchEHSelector(int Selector) { return Selector > 0; }
+
 public:
   EHStreamer(AsmPrinter *A);
-  virtual ~EHStreamer();
-
-  /// Emit all exception information that should come after the content.
-  void endModule() override;
-
-  /// Gather pre-function exception information.  Assumes being emitted
-  /// immediately after the function entry point.
-  void beginFunction(const MachineFunction *MF) override;
-
-  /// Gather and emit post-function exception information.
-  void endFunction(const MachineFunction *) override;
+  ~EHStreamer() override;
 
   // Unused.
   void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index e293acd..eb9e4c1 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -34,33 +34,30 @@ using namespace llvm;
 
 namespace {
 
-  class ErlangGCPrinter : public GCMetadataPrinter {
-  public:
-    void finishAssembly(Module &M, GCModuleInfo &Info,
-                        AsmPrinter &AP) override;
-  };
-
+class ErlangGCPrinter : public GCMetadataPrinter {
+public:
+  void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+};
 }
 
 static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
-X("erlang", "erlang-compatible garbage collector");
+    X("erlang", "erlang-compatible garbage collector");
 
-void llvm::linkErlangGCPrinter() { }
+void llvm::linkErlangGCPrinter() {}
 
 void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
                                      AsmPrinter &AP) {
-  MCStreamer &OS = AP.OutStreamer;
-  unsigned IntPtrSize =
-      AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
+  MCStreamer &OS = *AP.OutStreamer;
+  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
 
   // Put this in a custom .note section.
-  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext()
-    .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0,
-                   SectionKind::getDataRel()));
+  OS.SwitchSection(
+      AP.getObjFileLowering().getContext().getELFSection(".note.gc",
+                                                         ELF::SHT_PROGBITS, 0));
 
   // For each function...
   for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(),
-         IE = Info.funcinfo_end();
+                                           IE = Info.funcinfo_end();
        FI != IE; ++FI) {
     GCFunctionInfo &MD = **FI;
     if (MD.getStrategy().getName() != getStrategy().getName())
@@ -91,7 +88,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
       // Emit the address of the safe point.
       OS.AddComment("safe point address");
       MCSymbol *Label = PI->Label;
-      AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/);
+      AP.EmitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/);
     }
 
     // Stack information never change in safe points! Only print info from the
@@ -104,8 +101,9 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
 
     // Emit stack arity, i.e. the number of stacked arguments.
     unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
-    unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ?
-                          MD.getFunction().arg_size() - RegisteredArgs : 0;
+    unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs
+                              ? MD.getFunction().arg_size() - RegisteredArgs
+                              : 0;
     OS.AddComment("stack arity");
     AP.EmitInt16(StackArity);
 
@@ -116,7 +114,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
     // And for each live root...
     for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
                                        LE = MD.live_end(PI);
-                                       LI != LE; ++LI) {
+         LI != LE; ++LI) {
       // Emit live root's offset within the stack frame.
       OS.AddComment("stack index (offset / wordsize)");
       AP.EmitInt16(LI->StackOffset / IntPtrSize);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index ddb14a0..802456b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -32,20 +32,17 @@ using namespace llvm;
 
 namespace {
 
-  class OcamlGCMetadataPrinter : public GCMetadataPrinter {
-  public:
-    void beginAssembly(Module &M, GCModuleInfo &Info,
-                       AsmPrinter &AP) override;
-    void finishAssembly(Module &M, GCModuleInfo &Info,
-                        AsmPrinter &AP) override;
-  };
-
+class OcamlGCMetadataPrinter : public GCMetadataPrinter {
+public:
+  void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+  void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+};
 }
 
 static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
-Y("ocaml", "ocaml 3.10-compatible collector");
+    Y("ocaml", "ocaml 3.10-compatible collector");
 
-void llvm::linkOcamlGCPrinter() { }
+void llvm::linkOcamlGCPrinter() {}
 
 static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
   const std::string &MId = M.getModuleIdentifier();
@@ -63,18 +60,18 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
   SmallString<128> TmpStr;
   AP.Mang->getNameWithPrefix(TmpStr, SymName);
 
-  MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr);
+  MCSymbol *Sym = AP.OutContext.getOrCreateSymbol(TmpStr);
 
-  AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
-  AP.OutStreamer.EmitLabel(Sym);
+  AP.OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global);
+  AP.OutStreamer->EmitLabel(Sym);
 }
 
 void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info,
                                            AsmPrinter &AP) {
-  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+  AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection());
   EmitCamlGlobal(M, AP, "code_begin");
 
-  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
   EmitCamlGlobal(M, AP, "data_begin");
 }
 
@@ -96,24 +93,24 @@ void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info,
 ///
 void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
                                             AsmPrinter &AP) {
-  unsigned IntPtrSize =
-      AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
+  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
 
-  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+  AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection());
   EmitCamlGlobal(M, AP, "code_end");
 
-  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
   EmitCamlGlobal(M, AP, "data_end");
 
   // FIXME: Why does ocaml emit this??
-  AP.OutStreamer.EmitIntValue(0, IntPtrSize);
+  AP.OutStreamer->EmitIntValue(0, IntPtrSize);
 
-  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
   EmitCamlGlobal(M, AP, "frametable");
 
   int NumDescriptors = 0;
   for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
-         IE = Info.funcinfo_end(); I != IE; ++I) {
+                                           IE = Info.funcinfo_end();
+       I != IE; ++I) {
     GCFunctionInfo &FI = **I;
     if (FI.getStrategy().getName() != getStrategy().getName())
       // this function is managed by some other GC
@@ -123,7 +120,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
     }
   }
 
-  if (NumDescriptors >= 1<<16) {
+  if (NumDescriptors >= 1 << 16) {
     // Very rude!
     report_fatal_error(" Too much descriptor for ocaml GC");
   }
@@ -131,45 +128,50 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
   AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
 
   for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
-         IE = Info.funcinfo_end(); I != IE; ++I) {
+                                           IE = Info.funcinfo_end();
+       I != IE; ++I) {
     GCFunctionInfo &FI = **I;
     if (FI.getStrategy().getName() != getStrategy().getName())
       // this function is managed by some other GC
       continue;
 
     uint64_t FrameSize = FI.getFrameSize();
-    if (FrameSize >= 1<<16) {
+    if (FrameSize >= 1 << 16) {
       // Very rude!
       report_fatal_error("Function '" + FI.getFunction().getName() +
                          "' is too large for the ocaml GC! "
-                         "Frame size " + Twine(FrameSize) + ">= 65536.\n"
-                         "(" + Twine(uintptr_t(&FI)) + ")");
+                         "Frame size " +
+                         Twine(FrameSize) + ">= 65536.\n"
+                                            "(" +
+                         Twine(uintptr_t(&FI)) + ")");
     }
 
-    AP.OutStreamer.AddComment("live roots for " +
-                              Twine(FI.getFunction().getName()));
-    AP.OutStreamer.AddBlankLine();
+    AP.OutStreamer->AddComment("live roots for " +
+                               Twine(FI.getFunction().getName()));
+    AP.OutStreamer->AddBlankLine();
 
     for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
       size_t LiveCount = FI.live_size(J);
-      if (LiveCount >= 1<<16) {
+      if (LiveCount >= 1 << 16) {
         // Very rude!
         report_fatal_error("Function '" + FI.getFunction().getName() +
                            "' is too large for the ocaml GC! "
-                           "Live root count "+Twine(LiveCount)+" >= 65536.");
+                           "Live root count " +
+                           Twine(LiveCount) + " >= 65536.");
       }
 
-      AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize);
+      AP.OutStreamer->EmitSymbolValue(J->Label, IntPtrSize);
       AP.EmitInt16(FrameSize);
       AP.EmitInt16(LiveCount);
 
       for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
-                                         KE = FI.live_end(J); K != KE; ++K) {
-        if (K->StackOffset >= 1<<16) {
+                                         KE = FI.live_end(J);
+           K != KE; ++K) {
+        if (K->StackOffset >= 1 << 16) {
           // Very rude!
           report_fatal_error(
-                 "GC root stack offset is outside of fixed stack frame and out "
-                 "of range for ocaml GC!");
+              "GC root stack offset is outside of fixed stack frame and out "
+              "of range for ocaml GC!");
         }
         AP.EmitInt16(K->StackOffset);
       }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 0f0ad75..dc6df9c 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
@@ -48,8 +49,6 @@ Win64Exception::~Win64Exception() {}
 void Win64Exception::endModule() {
 }
 
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
 void Win64Exception::beginFunction(const MachineFunction *MF) {
   shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
 
@@ -60,7 +59,7 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
-  const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+  const Function *Per = MF->getMMI().getPersonality();
 
   shouldEmitPersonality = hasLandingPads &&
     PerEncoding != dwarf::DW_EH_PE_omit && Per;
@@ -69,39 +68,437 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
   shouldEmitLSDA = shouldEmitPersonality &&
     LSDAEncoding != dwarf::DW_EH_PE_omit;
 
+
+  // If this was an outlined handler, we need to define the label corresponding
+  // to the offset of the parent frame relative to the stack pointer after the
+  // prologue.
+  const Function *F = MF->getFunction();
+  const Function *ParentF = MMI->getWinEHParent(F);
+  if (F != ParentF) {
+    WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
+    auto I = FuncInfo.CatchHandlerParentFrameObjOffset.find(F);
+    if (I != FuncInfo.CatchHandlerParentFrameObjOffset.end()) {
+      MCSymbol *HandlerTypeParentFrameOffset =
+          Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
+              GlobalValue::getRealLinkageName(F->getName()));
+
+      // Emit a symbol assignment.
+      Asm->OutStreamer->EmitAssignment(
+          HandlerTypeParentFrameOffset,
+          MCConstantExpr::Create(I->second, Asm->OutContext));
+    }
+  }
+
   if (!shouldEmitPersonality && !shouldEmitMoves)
     return;
 
-  Asm->OutStreamer.EmitWinCFIStartProc(Asm->CurrentFnSym);
+  Asm->OutStreamer->EmitWinCFIStartProc(Asm->CurrentFnSym);
 
   if (!shouldEmitPersonality)
     return;
 
   const MCSymbol *PersHandlerSym =
       TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
-  Asm->OutStreamer.EmitWinEHHandler(PersHandlerSym, true, true);
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
-                                                Asm->getFunctionNumber()));
+  Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
 }
 
 /// endFunction - Gather and emit post-function exception information.
 ///
-void Win64Exception::endFunction(const MachineFunction *) {
+void Win64Exception::endFunction(const MachineFunction *MF) {
   if (!shouldEmitPersonality && !shouldEmitMoves)
     return;
 
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
-                                                Asm->getFunctionNumber()));
+  EHPersonality Per = MMI->getPersonalityType();
 
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
+  // Get rid of any dead landing pads if we're not using a Windows EH scheme. In
+  // Windows EH schemes, the landing pad is not actually reachable. It only
+  // exists so that we can emit the right table data.
+  if (!isMSVCEHPersonality(Per))
+    MMI->TidyLandingPads();
 
   if (shouldEmitPersonality) {
-    Asm->OutStreamer.PushSection();
-    Asm->OutStreamer.EmitWinEHHandlerData();
-    emitExceptionTable();
-    Asm->OutStreamer.PopSection();
+    Asm->OutStreamer->PushSection();
+
+    // Emit an UNWIND_INFO struct describing the prologue.
+    Asm->OutStreamer->EmitWinEHHandlerData();
+
+    // Emit the tables appropriate to the personality function in use. If we
+    // don't recognize the personality, assume it uses an Itanium-style LSDA.
+    if (Per == EHPersonality::MSVC_Win64SEH)
+      emitCSpecificHandlerTable();
+    else if (Per == EHPersonality::MSVC_CXX)
+      emitCXXFrameHandler3Table(MF);
+    else
+      emitExceptionTable();
+
+    Asm->OutStreamer->PopSection();
+  }
+  Asm->OutStreamer->EmitWinCFIEndProc();
+}
+
+const MCExpr *Win64Exception::createImageRel32(const MCSymbol *Value) {
+  if (!Value)
+    return MCConstantExpr::Create(0, Asm->OutContext);
+  return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32,
+                                 Asm->OutContext);
+}
+
+const MCExpr *Win64Exception::createImageRel32(const GlobalValue *GV) {
+  if (!GV)
+    return MCConstantExpr::Create(0, Asm->OutContext);
+  return createImageRel32(Asm->getSymbol(GV));
+}
+
+/// Emit the language-specific data that __C_specific_handler expects.  This
+/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning
+/// up after faults with __try, __except, and __finally.  The typeinfo values
+/// are not really RTTI data, but pointers to filter functions that return an
+/// integer (1, 0, or -1) indicating how to handle the exception. For __finally
+/// blocks and other cleanups, the landing pad label is zero, and the filter
+/// function is actually a cleanup handler with the same prototype.  A catch-all
+/// entry is modeled with a null filter function field and a non-zero landing
+/// pad label.
+///
+/// Possible filter function return values:
+///   EXCEPTION_EXECUTE_HANDLER (1):
+///     Jump to the landing pad label after cleanups.
+///   EXCEPTION_CONTINUE_SEARCH (0):
+///     Continue searching this table or continue unwinding.
+///   EXCEPTION_CONTINUE_EXECUTION (-1):
+///     Resume execution at the trapping PC.
+///
+/// Inferred table structure:
+///   struct Table {
+///     int NumEntries;
+///     struct Entry {
+///       imagerel32 LabelStart;
+///       imagerel32 LabelEnd;
+///       imagerel32 FilterOrFinally;  // One means catch-all.
+///       imagerel32 LabelLPad;        // Zero means __finally.
+///     } Entries[NumEntries];
+///   };
+void Win64Exception::emitCSpecificHandlerTable() {
+  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+
+  // Simplifying assumptions for first implementation:
+  // - Cleanups are not implemented.
+  // - Filters are not implemented.
+
+  // The Itanium LSDA table sorts similar landing pads together to simplify the
+  // actions table, but we don't need that.
+  SmallVector<const LandingPadInfo *, 64> LandingPads;
+  LandingPads.reserve(PadInfos.size());
+  for (const auto &LP : PadInfos)
+    LandingPads.push_back(&LP);
+
+  // Compute label ranges for call sites as we would for the Itanium LSDA, but
+  // use an all zero action table because we aren't using these actions.
+  SmallVector<unsigned, 64> FirstActions;
+  FirstActions.resize(LandingPads.size());
+  SmallVector<CallSiteEntry, 64> CallSites;
+  computeCallSiteTable(CallSites, LandingPads, FirstActions);
+
+  MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
+  MCSymbol *EHFuncEndSym = Asm->getFunctionEnd();
+
+  // Emit the number of table entries.
+  unsigned NumEntries = 0;
+  for (const CallSiteEntry &CSE : CallSites) {
+    if (!CSE.LPad)
+      continue; // Ignore gaps.
+    NumEntries += CSE.LPad->SEHHandlers.size();
+  }
+  Asm->OutStreamer->EmitIntValue(NumEntries, 4);
+
+  // If there are no actions, we don't need to iterate again.
+  if (NumEntries == 0)
+    return;
+
+  // Emit the four-label records for each call site entry. The table has to be
+  // sorted in layout order, and the call sites should already be sorted.
+  for (const CallSiteEntry &CSE : CallSites) {
+    // Ignore gaps. Unlike the Itanium model, unwinding through a frame without
+    // an EH table entry will propagate the exception rather than terminating
+    // the program.
+    if (!CSE.LPad)
+      continue;
+    const LandingPadInfo *LPad = CSE.LPad;
+
+    // Compute the label range. We may reuse the function begin and end labels
+    // rather than forming new ones.
+    const MCExpr *Begin =
+        createImageRel32(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym);
+    const MCExpr *End;
+    if (CSE.EndLabel) {
+      // The interval is half-open, so we have to add one to include the return
+      // address of the last invoke in the range.
+      End = MCBinaryExpr::CreateAdd(createImageRel32(CSE.EndLabel),
+                                    MCConstantExpr::Create(1, Asm->OutContext),
+                                    Asm->OutContext);
+    } else {
+      End = createImageRel32(EHFuncEndSym);
+    }
+
+    // Emit an entry for each action.
+    for (SEHHandler Handler : LPad->SEHHandlers) {
+      Asm->OutStreamer->EmitValue(Begin, 4);
+      Asm->OutStreamer->EmitValue(End, 4);
+
+      // Emit the filter or finally function pointer, if present. Otherwise,
+      // emit '1' to indicate a catch-all.
+      const Function *F = Handler.FilterOrFinally;
+      if (F)
+        Asm->OutStreamer->EmitValue(createImageRel32(Asm->getSymbol(F)), 4);
+      else
+        Asm->OutStreamer->EmitIntValue(1, 4);
+
+      // Emit the recovery address, if present. Otherwise, this must be a
+      // finally.
+      const BlockAddress *BA = Handler.RecoverBA;
+      if (BA)
+        Asm->OutStreamer->EmitValue(
+            createImageRel32(Asm->GetBlockAddressSymbol(BA)), 4);
+      else
+        Asm->OutStreamer->EmitIntValue(0, 4);
+    }
+  }
+}
+
+void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) {
+  const Function *F = MF->getFunction();
+  const Function *ParentF = MMI->getWinEHParent(F);
+  auto &OS = *Asm->OutStreamer;
+  WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
+
+  StringRef ParentLinkageName =
+      GlobalValue::getRealLinkageName(ParentF->getName());
+
+  MCSymbol *FuncInfoXData =
+      Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", ParentLinkageName));
+  OS.EmitValue(createImageRel32(FuncInfoXData), 4);
+
+  // The Itanium LSDA table sorts similar landing pads together to simplify the
+  // actions table, but we don't need that.
+  SmallVector<const LandingPadInfo *, 64> LandingPads;
+  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+  LandingPads.reserve(PadInfos.size());
+  for (const auto &LP : PadInfos)
+    LandingPads.push_back(&LP);
+
+  RangeMapType PadMap;
+  computePadMap(LandingPads, PadMap);
+
+  // The end label of the previous invoke or nounwind try-range.
+  MCSymbol *LastLabel = Asm->getFunctionBegin();
+
+  // Whether there is a potentially throwing instruction (currently this means
+  // an ordinary call) between the end of the previous try-range and now.
+  bool SawPotentiallyThrowing = false;
+
+  int LastEHState = -2;
+
+  // The parent function and the catch handlers contribute to the 'ip2state'
+  // table.
+
+  // Include ip2state entries for the beginning of the main function and
+  // for catch handler functions.
+  if (F == ParentF) {
+    FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
+    LastEHState = -1;
+  } else if (FuncInfo.HandlerBaseState.count(F)) {
+    FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, 
+                                     FuncInfo.HandlerBaseState[F]));
+    LastEHState = FuncInfo.HandlerBaseState[F];
+  }
+  for (const auto &MBB : *MF) {
+    for (const auto &MI : MBB) {
+      if (!MI.isEHLabel()) {
+        if (MI.isCall())
+          SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
+        continue;
+      }
+
+      // End of the previous try-range?
+      MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol();
+      if (BeginLabel == LastLabel)
+        SawPotentiallyThrowing = false;
+
+      // Beginning of a new try-range?
+      RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+      if (L == PadMap.end())
+        // Nope, it was just some random label.
+        continue;
+
+      const PadRange &P = L->second;
+      const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+      assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+             "Inconsistent landing pad map!");
+
+      // FIXME: Should this be using FuncInfo.HandlerBaseState?
+      if (SawPotentiallyThrowing && LastEHState != -1) {
+        FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
+        SawPotentiallyThrowing = false;
+        LastEHState = -1;
+      }
+
+      if (LandingPad->WinEHState != LastEHState)
+        FuncInfo.IPToStateList.push_back(
+            std::make_pair(BeginLabel, LandingPad->WinEHState));
+      LastEHState = LandingPad->WinEHState;
+      LastLabel = LandingPad->EndLabels[P.RangeIndex];
+    }
+  }
+
+  // Defer emission until we've visited the parent function and all the catch
+  // handlers.  Cleanups don't contribute to the ip2state table yet, so don't
+  // count them.
+  if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F))
+    return;
+  ++FuncInfo.NumIPToStateFuncsVisited;
+  if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size())
+    return;
+
+  MCSymbol *UnwindMapXData = nullptr;
+  MCSymbol *TryBlockMapXData = nullptr;
+  MCSymbol *IPToStateXData = nullptr;
+  if (!FuncInfo.UnwindMap.empty())
+    UnwindMapXData = Asm->OutContext.getOrCreateSymbol(
+        Twine("$stateUnwindMap$", ParentLinkageName));
+  if (!FuncInfo.TryBlockMap.empty())
+    TryBlockMapXData = Asm->OutContext.getOrCreateSymbol(
+        Twine("$tryMap$", ParentLinkageName));
+  if (!FuncInfo.IPToStateList.empty())
+    IPToStateXData = Asm->OutContext.getOrCreateSymbol(
+        Twine("$ip2state$", ParentLinkageName));
+
+  // FuncInfo {
+  //   uint32_t           MagicNumber
+  //   int32_t            MaxState;
+  //   UnwindMapEntry    *UnwindMap;
+  //   uint32_t           NumTryBlocks;
+  //   TryBlockMapEntry  *TryBlockMap;
+  //   uint32_t           IPMapEntries;
+  //   IPToStateMapEntry *IPToStateMap;
+  //   uint32_t           UnwindHelp; // (x64/ARM only)
+  //   ESTypeList        *ESTypeList;
+  //   int32_t            EHFlags;
+  // }
+  // EHFlags & 1 -> Synchronous exceptions only, no async exceptions.
+  // EHFlags & 2 -> ???
+  // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue.
+  OS.EmitLabel(FuncInfoXData);
+  OS.EmitIntValue(0x19930522, 4);                      // MagicNumber
+  OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4);       // MaxState
+  OS.EmitValue(createImageRel32(UnwindMapXData), 4);   // UnwindMap
+  OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4);     // NumTryBlocks
+  OS.EmitValue(createImageRel32(TryBlockMapXData), 4); // TryBlockMap
+  OS.EmitIntValue(FuncInfo.IPToStateList.size(), 4);   // IPMapEntries
+  OS.EmitValue(createImageRel32(IPToStateXData), 4);   // IPToStateMap
+  OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4);  // UnwindHelp
+  OS.EmitIntValue(0, 4);                               // ESTypeList
+  OS.EmitIntValue(1, 4);                               // EHFlags
+
+  // UnwindMapEntry {
+  //   int32_t ToState;
+  //   void  (*Action)();
+  // };
+  if (UnwindMapXData) {
+    OS.EmitLabel(UnwindMapXData);
+    for (const WinEHUnwindMapEntry &UME : FuncInfo.UnwindMap) {
+      OS.EmitIntValue(UME.ToState, 4);                // ToState
+      OS.EmitValue(createImageRel32(UME.Cleanup), 4); // Action
+    }
+  }
+
+  // TryBlockMap {
+  //   int32_t      TryLow;
+  //   int32_t      TryHigh;
+  //   int32_t      CatchHigh;
+  //   int32_t      NumCatches;
+  //   HandlerType *HandlerArray;
+  // };
+  if (TryBlockMapXData) {
+    OS.EmitLabel(TryBlockMapXData);
+    SmallVector<MCSymbol *, 1> HandlerMaps;
+    for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
+      WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+      MCSymbol *HandlerMapXData = nullptr;
+
+      if (!TBME.HandlerArray.empty())
+        HandlerMapXData =
+            Asm->OutContext.getOrCreateSymbol(Twine("$handlerMap$")
+                                                  .concat(Twine(I))
+                                                  .concat("$")
+                                                  .concat(ParentLinkageName));
+
+      HandlerMaps.push_back(HandlerMapXData);
+
+      int CatchHigh = -1;
+      for (WinEHHandlerType &HT : TBME.HandlerArray)
+        CatchHigh =
+            std::max(CatchHigh, FuncInfo.CatchHandlerMaxState[HT.Handler]);
+
+      assert(TBME.TryLow <= TBME.TryHigh);
+      OS.EmitIntValue(TBME.TryLow, 4);                    // TryLow
+      OS.EmitIntValue(TBME.TryHigh, 4);                   // TryHigh
+      OS.EmitIntValue(CatchHigh, 4);                      // CatchHigh
+      OS.EmitIntValue(TBME.HandlerArray.size(), 4);       // NumCatches
+      OS.EmitValue(createImageRel32(HandlerMapXData), 4); // HandlerArray
+    }
+
+    for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
+      WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+      MCSymbol *HandlerMapXData = HandlerMaps[I];
+      if (!HandlerMapXData)
+        continue;
+      // HandlerType {
+      //   int32_t         Adjectives;
+      //   TypeDescriptor *Type;
+      //   int32_t         CatchObjOffset;
+      //   void          (*Handler)();
+      //   int32_t         ParentFrameOffset; // x64 only
+      // };
+      OS.EmitLabel(HandlerMapXData);
+      for (const WinEHHandlerType &HT : TBME.HandlerArray) {
+        MCSymbol *ParentFrameOffset =
+            Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
+                GlobalValue::getRealLinkageName(HT.Handler->getName()));
+        const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::Create(
+            ParentFrameOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
+
+        // Get the frame escape label with the offset of the catch object. If
+        // the index is -1, then there is no catch object, and we should emit an
+        // offset of zero, indicating that no copy will occur.
+        const MCExpr *FrameAllocOffsetRef = nullptr;
+        if (HT.CatchObjRecoverIdx >= 0) {
+          MCSymbol *FrameAllocOffset =
+              Asm->OutContext.getOrCreateFrameAllocSymbol(
+                  GlobalValue::getRealLinkageName(ParentF->getName()),
+                  HT.CatchObjRecoverIdx);
+          FrameAllocOffsetRef = MCSymbolRefExpr::Create(
+              FrameAllocOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
+        } else {
+          FrameAllocOffsetRef = MCConstantExpr::Create(0, Asm->OutContext);
+        }
+
+        OS.EmitIntValue(HT.Adjectives, 4);                    // Adjectives
+        OS.EmitValue(createImageRel32(HT.TypeDescriptor), 4); // Type
+        OS.EmitValue(FrameAllocOffsetRef, 4);                 // CatchObjOffset
+        OS.EmitValue(createImageRel32(HT.Handler), 4);        // Handler
+        OS.EmitValue(ParentFrameOffsetRef, 4);                // ParentFrameOffset
+      }
+    }
+  }
+
+  // IPToStateMapEntry {
+  //   void   *IP;
+  //   int32_t State;
+  // };
+  if (IPToStateXData) {
+    OS.EmitLabel(IPToStateXData);
+    for (auto &IPStatePair : FuncInfo.IPToStateList) {
+      OS.EmitValue(createImageRel32(IPStatePair.first), 4); // IP
+      OS.EmitIntValue(IPStatePair.second, 4);               // State
+    }
   }
-  Asm->OutStreamer.EmitWinCFIEndProc();
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.h b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.h
index 538e132..5f4237f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.h
@@ -17,7 +17,9 @@
 #include "EHStreamer.h"
 
 namespace llvm {
+class GlobalValue;
 class MachineFunction;
+class MCExpr;
 
 class Win64Exception : public EHStreamer {
   /// Per-function flag to indicate if personality info should be emitted.
@@ -29,12 +31,19 @@ class Win64Exception : public EHStreamer {
   /// Per-function flag to indicate if frame moves info should be emitted.
   bool shouldEmitMoves;
 
+  void emitCSpecificHandlerTable();
+
+  void emitCXXFrameHandler3Table(const MachineFunction *MF);
+
+  const MCExpr *createImageRel32(const MCSymbol *Value);
+  const MCExpr *createImageRel32(const GlobalValue *GV);
+
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
   Win64Exception(AsmPrinter *A);
-  virtual ~Win64Exception();
+  ~Win64Exception() override;
 
   /// Emit all exception information that should come after the content.
   void endModule() override;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index b5e0929..371e20a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -20,14 +20,13 @@ namespace llvm {
 
 StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
   assert(S);
-  DIDescriptor D(S);
-  assert((D.isCompileUnit() || D.isFile() || D.isSubprogram() ||
-          D.isLexicalBlockFile() || D.isLexicalBlock()) &&
+  assert((isa<DICompileUnit>(S) || isa<DIFile>(S) || isa<DISubprogram>(S) ||
+          isa<DILexicalBlockBase>(S)) &&
          "Unexpected scope info");
 
-  DIScope Scope(S);
-  StringRef Dir = Scope.getDirectory(),
-            Filename = Scope.getFilename();
+  auto *Scope = cast<DIScope>(S);
+  StringRef Dir = Scope->getDirectory(),
+            Filename = Scope->getFilename();
   char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
   if (Result)
     return Result;
@@ -40,7 +39,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
   if (Filename.find(':') == 1)
     Filepath = Filename;
   else
-    Filepath = (Dir + Twine("\\") + Filename).str();
+    Filepath = (Dir + "\\" + Filename).str();
 
   // Canonicalize the path.  We have to do it textually because we may no longer
   // have access the file in the filesystem.
@@ -81,7 +80,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
 
 void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
                                                 const MachineFunction *MF) {
-  const MDNode *Scope = DL.getScope(MF->getFunction()->getContext());
+  const MDNode *Scope = DL.getScope();
   if (!Scope)
     return;
   StringRef Filename = getFullFilepath(Scope);
@@ -95,8 +94,8 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
   }
   FileNameRegistry.add(Filename);
 
-  MCSymbol *MCL = Asm->MMI->getContext().CreateTempSymbol();
-  Asm->OutStreamer.EmitLabel(MCL);
+  MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol();
+  Asm->OutStreamer->EmitLabel(MCL);
   CurFn->Instrs.push_back(MCL);
   InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine());
 }
@@ -121,7 +120,7 @@ void WinCodeViewLineTables::endModule() {
     return;
 
   assert(Asm != nullptr);
-  Asm->OutStreamer.SwitchSection(
+  Asm->OutStreamer->SwitchSection(
       Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
   Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
 
@@ -136,7 +135,7 @@ void WinCodeViewLineTables::endModule() {
     emitDebugInfoForFunction(VisitedFunctions[I]);
 
   // This subsection holds a file index to offset in string table table.
-  Asm->OutStreamer.AddComment("File index to string table offset subsection");
+  Asm->OutStreamer->AddComment("File index to string table offset subsection");
   Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION);
   size_t NumFilenames = FileNameRegistry.Infos.size();
   Asm->EmitInt32(8 * NumFilenames);
@@ -149,7 +148,7 @@ void WinCodeViewLineTables::endModule() {
   }
 
   // This subsection holds the string table.
-  Asm->OutStreamer.AddComment("String table");
+  Asm->OutStreamer->AddComment("String table");
   Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION);
   Asm->EmitInt32(FileNameRegistry.LastOffset);
   // The payload starts with a null character.
@@ -157,12 +156,12 @@ void WinCodeViewLineTables::endModule() {
 
   for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
     // Just emit unique filenames one by one, separated by a null character.
-    Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]);
+    Asm->OutStreamer->EmitBytes(FileNameRegistry.Filenames[I]);
     Asm->EmitInt8(0);
   }
 
   // No more subsections. Fill with zeros to align the end of the section by 4.
-  Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
+  Asm->OutStreamer->EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
 
   clear();
 }
@@ -190,8 +189,11 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
     return;
   assert(FI.End && "Don't know where the function ends?");
 
-  StringRef FuncName = getDISubprogram(GV).getDisplayName(),
-            GVName = GV->getName();
+  StringRef GVName = GV->getName();
+  StringRef FuncName;
+  if (auto *SP = getDISubprogram(GV))
+    FuncName = SP->getDisplayName();
+
   // FIXME Clang currently sets DisplayName to "bar" for a C++
   // "namespace_foo::bar" function, see PR21528.  Luckily, dbghelp.dll is trying
   // to demangle display names anyways, so let's just put a mangled name into
@@ -199,41 +201,41 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
   if (GVName.startswith("\01?"))
     FuncName = GVName.substr(1);
   // Emit a symbol subsection, required by VS2012+ to find function boundaries.
-  MCSymbol *SymbolsBegin = Asm->MMI->getContext().CreateTempSymbol(),
-           *SymbolsEnd = Asm->MMI->getContext().CreateTempSymbol();
-  Asm->OutStreamer.AddComment("Symbol subsection for " + Twine(FuncName));
+  MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(),
+           *SymbolsEnd = Asm->MMI->getContext().createTempSymbol();
+  Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName));
   Asm->EmitInt32(COFF::DEBUG_SYMBOL_SUBSECTION);
-  EmitLabelDiff(Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
-  Asm->OutStreamer.EmitLabel(SymbolsBegin);
+  EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
+  Asm->OutStreamer->EmitLabel(SymbolsBegin);
   {
-    MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().CreateTempSymbol(),
-             *ProcSegmentEnd = Asm->MMI->getContext().CreateTempSymbol();
-    EmitLabelDiff(Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2);
-    Asm->OutStreamer.EmitLabel(ProcSegmentBegin);
+    MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(),
+             *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol();
+    EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2);
+    Asm->OutStreamer->EmitLabel(ProcSegmentBegin);
 
     Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_START);
     // Some bytes of this segment don't seem to be required for basic debugging,
     // so just fill them with zeroes.
-    Asm->OutStreamer.EmitFill(12, 0);
+    Asm->OutStreamer->EmitFill(12, 0);
     // This is the important bit that tells the debugger where the function
     // code is located and what's its size:
-    EmitLabelDiff(Asm->OutStreamer, Fn, FI.End);
-    Asm->OutStreamer.EmitFill(12, 0);
-    Asm->OutStreamer.EmitCOFFSecRel32(Fn);
-    Asm->OutStreamer.EmitCOFFSectionIndex(Fn);
+    EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
+    Asm->OutStreamer->EmitFill(12, 0);
+    Asm->OutStreamer->EmitCOFFSecRel32(Fn);
+    Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
     Asm->EmitInt8(0);
     // Emit the function display name as a null-terminated string.
-    Asm->OutStreamer.EmitBytes(FuncName);
+    Asm->OutStreamer->EmitBytes(FuncName);
     Asm->EmitInt8(0);
-    Asm->OutStreamer.EmitLabel(ProcSegmentEnd);
+    Asm->OutStreamer->EmitLabel(ProcSegmentEnd);
 
     // We're done with this function.
     Asm->EmitInt16(0x0002);
     Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_END);
   }
-  Asm->OutStreamer.EmitLabel(SymbolsEnd);
+  Asm->OutStreamer->EmitLabel(SymbolsEnd);
   // Every subsection must be aligned to a 4-byte boundary.
-  Asm->OutStreamer.EmitFill((-FuncName.size()) % 4, 0);
+  Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0);
 
   // PCs/Instructions are grouped into segments sharing the same filename.
   // Pre-calculate the lengths (in instructions) of these segments and store
@@ -252,21 +254,21 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
   FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd;
 
   // Emit a line table subsection, requred to do PC-to-file:line lookup.
-  Asm->OutStreamer.AddComment("Line table subsection for " + Twine(FuncName));
+  Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName));
   Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION);
-  MCSymbol *LineTableBegin = Asm->MMI->getContext().CreateTempSymbol(),
-           *LineTableEnd = Asm->MMI->getContext().CreateTempSymbol();
-  EmitLabelDiff(Asm->OutStreamer, LineTableBegin, LineTableEnd);
-  Asm->OutStreamer.EmitLabel(LineTableBegin);
+  MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(),
+           *LineTableEnd = Asm->MMI->getContext().createTempSymbol();
+  EmitLabelDiff(*Asm->OutStreamer, LineTableBegin, LineTableEnd);
+  Asm->OutStreamer->EmitLabel(LineTableBegin);
 
   // Identify the function this subsection is for.
-  Asm->OutStreamer.EmitCOFFSecRel32(Fn);
-  Asm->OutStreamer.EmitCOFFSectionIndex(Fn);
+  Asm->OutStreamer->EmitCOFFSecRel32(Fn);
+  Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
   // Insert padding after a 16-bit section index.
   Asm->EmitInt16(0);
 
   // Length of the function's code, in bytes.
-  EmitLabelDiff(Asm->OutStreamer, Fn, FI.End);
+  EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
 
   // PC-to-linenumber lookup table:
   MCSymbol *FileSegmentEnd = nullptr;
@@ -277,17 +279,17 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
     if (FilenameSegmentLengths.count(J)) {
       // We came to a beginning of a new filename segment.
       if (FileSegmentEnd)
-        Asm->OutStreamer.EmitLabel(FileSegmentEnd);
+        Asm->OutStreamer->EmitLabel(FileSegmentEnd);
       StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename;
       assert(FileNameRegistry.Infos.count(CurFilename));
       size_t IndexInStringTable =
           FileNameRegistry.Infos[CurFilename].FilenameID;
       // Each segment starts with the offset of the filename
       // in the string table.
-      Asm->OutStreamer.AddComment(
+      Asm->OutStreamer->AddComment(
           "Segment for file '" + Twine(CurFilename) + "' begins");
-      MCSymbol *FileSegmentBegin = Asm->MMI->getContext().CreateTempSymbol();
-      Asm->OutStreamer.EmitLabel(FileSegmentBegin);
+      MCSymbol *FileSegmentBegin = Asm->MMI->getContext().createTempSymbol();
+      Asm->OutStreamer->EmitLabel(FileSegmentBegin);
       Asm->EmitInt32(8 * IndexInStringTable);
 
       // Number of PC records in the lookup table.
@@ -296,18 +298,18 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
 
       // Full size of the segment for this filename, including the prev two
       // records.
-      FileSegmentEnd = Asm->MMI->getContext().CreateTempSymbol();
-      EmitLabelDiff(Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd);
+      FileSegmentEnd = Asm->MMI->getContext().createTempSymbol();
+      EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd);
     }
 
     // The first PC with the given linenumber and the linenumber itself.
-    EmitLabelDiff(Asm->OutStreamer, Fn, Instr);
+    EmitLabelDiff(*Asm->OutStreamer, Fn, Instr);
     Asm->EmitInt32(InstrInfo[Instr].LineNumber);
   }
 
   if (FileSegmentEnd)
-    Asm->OutStreamer.EmitLabel(FileSegmentEnd);
-  Asm->OutStreamer.EmitLabel(LineTableEnd);
+    Asm->OutStreamer->EmitLabel(FileSegmentEnd);
+  Asm->OutStreamer->EmitLabel(LineTableEnd);
 }
 
 void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
@@ -327,7 +329,7 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
   DebugLoc PrologEndLoc;
   bool EmptyPrologue = true;
   for (const auto &MBB : *MF) {
-    if (!PrologEndLoc.isUnknown())
+    if (PrologEndLoc)
       break;
     for (const auto &MI : MBB) {
       if (MI.isDebugValue())
@@ -336,8 +338,7 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
       // First known non-DBG_VALUE and non-frame setup location marks
       // the beginning of the function body.
       // FIXME: do we need the first subcondition?
-      if (!MI.getFlag(MachineInstr::FrameSetup) &&
-          (!MI.getDebugLoc().isUnknown())) {
+      if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
         PrologEndLoc = MI.getDebugLoc();
         break;
       }
@@ -345,9 +346,8 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
     }
   }
   // Record beginning of function if we have a non-empty prologue.
-  if (!PrologEndLoc.isUnknown() && !EmptyPrologue) {
-    DebugLoc FnStartDL =
-        PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext());
+  if (PrologEndLoc && !EmptyPrologue) {
+    DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
     maybeRecordLocation(FnStartDL, MF);
   }
 }
@@ -364,10 +364,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) {
     FnDebugInfo.erase(GV);
     VisitedFunctions.pop_back();
   } else {
-    // Define end label for subprogram.
-    MCSymbol *FunctionEndSym = Asm->OutStreamer.getContext().CreateTempSymbol();
-    Asm->OutStreamer.EmitLabel(FunctionEndSym);
-    CurFn->End = FunctionEndSym;
+    CurFn->End = Asm->getFunctionEnd();
   }
   CurFn = nullptr;
 }
@@ -377,7 +374,7 @@ void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) {
   if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
     return;
   DebugLoc DL = MI->getDebugLoc();
-  if (DL == PrevInstLoc || DL.isUnknown())
+  if (DL == PrevInstLoc || !DL)
     return;
   maybeRecordLocation(DL, Asm->MF);
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
index 8492eac..c66d141 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -114,7 +114,7 @@ class WinCodeViewLineTables : public AsmPrinterHandler {
 public:
   WinCodeViewLineTables(AsmPrinter *Asm);
 
-  ~WinCodeViewLineTables() {
+  ~WinCodeViewLineTables() override {
     for (DirAndFilenameToFilepathMapTy::iterator
              I = DirAndFilenameToFilepathMap.begin(),
              E = DirAndFilenameToFilepathMap.end();
diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 12f6bd7..fa17108 100644
--- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -31,10 +31,11 @@ using namespace llvm;
 namespace {
   class AtomicExpand: public FunctionPass {
     const TargetMachine *TM;
+    const TargetLowering *TLI;
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit AtomicExpand(const TargetMachine *TM = nullptr)
-      : FunctionPass(ID), TM(TM) {
+      : FunctionPass(ID), TM(TM), TLI(nullptr) {
       initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
     }
 
@@ -47,7 +48,7 @@ namespace {
     bool expandAtomicLoadToLL(LoadInst *LI);
     bool expandAtomicLoadToCmpXchg(LoadInst *LI);
     bool expandAtomicStore(StoreInst *SI);
-    bool expandAtomicRMW(AtomicRMWInst *AI);
+    bool tryExpandAtomicRMW(AtomicRMWInst *AI);
     bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
     bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
     bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
@@ -67,9 +68,9 @@ FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) {
 }
 
 bool AtomicExpand::runOnFunction(Function &F) {
-  if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand())
+  if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand())
     return false;
-  auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
+  TLI = TM->getSubtargetImpl(F)->getTargetLowering();
 
   SmallVector<Instruction *, 1> AtomicInsts;
 
@@ -91,7 +92,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
 
     auto FenceOrdering = Monotonic;
     bool IsStore, IsLoad;
-    if (TargetLowering->getInsertFencesForAtomic()) {
+    if (TLI->getInsertFencesForAtomic()) {
       if (LI && isAtLeastAcquire(LI->getOrdering())) {
         FenceOrdering = LI->getOrdering();
         LI->setOrdering(Monotonic);
@@ -107,9 +108,9 @@ bool AtomicExpand::runOnFunction(Function &F) {
         FenceOrdering = RMWI->getOrdering();
         RMWI->setOrdering(Monotonic);
         IsStore = IsLoad = true;
-      } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() &&
-                    (isAtLeastRelease(CASI->getSuccessOrdering()) ||
-                     isAtLeastAcquire(CASI->getSuccessOrdering()))) {
+      } else if (CASI && !TLI->hasLoadLinkedStoreConditional() &&
+                 (isAtLeastRelease(CASI->getSuccessOrdering()) ||
+                  isAtLeastAcquire(CASI->getSuccessOrdering()))) {
         // If a compare and swap is lowered to LL/SC, we can do smarter fence
         // insertion, with a stronger one on the success path than on the
         // failure path. As a result, fence insertion is directly done by
@@ -125,20 +126,22 @@ bool AtomicExpand::runOnFunction(Function &F) {
       }
     }
 
-    if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) {
+    if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) {
       MadeChange |= expandAtomicLoad(LI);
-    } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) {
+    } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) {
       MadeChange |= expandAtomicStore(SI);
     } else if (RMWI) {
       // There are two different ways of expanding RMW instructions:
       // - into a load if it is idempotent
       // - into a Cmpxchg/LL-SC loop otherwise
       // we try them in that order.
-      MadeChange |= (isIdempotentRMW(RMWI) &&
-                        simplifyIdempotentRMW(RMWI)) ||
-                    (TargetLowering->shouldExpandAtomicRMWInIR(RMWI) &&
-                        expandAtomicRMW(RMWI));
-    } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) {
+
+      if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+        MadeChange = true;
+      } else {
+        MadeChange |= tryExpandAtomicRMW(RMWI);
+      }
+    } else if (CASI && TLI->hasLoadLinkedStoreConditional()) {
       MadeChange |= expandAtomicCmpXchg(CASI);
     }
   }
@@ -149,13 +152,9 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
                                          bool IsStore, bool IsLoad) {
   IRBuilder<> Builder(I);
 
-  auto LeadingFence =
-      TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence(
-      Builder, Order, IsStore, IsLoad);
+  auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad);
 
-  auto TrailingFence =
-      TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence(
-      Builder, Order, IsStore, IsLoad);
+  auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad);
   // The trailing fence is emitted before the instruction instead of after
   // because there is no easy way of setting Builder insertion point after
   // an instruction. So we must erase it from the BB, and insert it back
@@ -171,16 +170,13 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
 }
 
 bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
-   if (TM->getSubtargetImpl()
-          ->getTargetLowering()
-          ->hasLoadLinkedStoreConditional())
+  if (TLI->hasLoadLinkedStoreConditional())
     return expandAtomicLoadToLL(LI);
   else
     return expandAtomicLoadToCmpXchg(LI);
 }
 
 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
-  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
   IRBuilder<> Builder(LI);
 
   // On some architectures, load-linked instructions are atomic for larger
@@ -218,7 +214,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
   // atomic if implemented as a native store. So we replace them by an
   // atomic swap, that can be implemented for example as a ldrex/strex on ARM
   // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
-  // It is the responsibility of the target to only return true in
+  // It is the responsibility of the target to only signal expansion via
   // shouldExpandAtomicRMW in cases where this is required and possible.
   IRBuilder<> Builder(SI);
   AtomicRMWInst *AI =
@@ -227,16 +223,26 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
   SI->eraseFromParent();
 
   // Now we have an appropriate swap instruction, lower it as usual.
-  return expandAtomicRMW(AI);
+  return tryExpandAtomicRMW(AI);
 }
 
-bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
-  if (TM->getSubtargetImpl()
-          ->getTargetLowering()
-          ->hasLoadLinkedStoreConditional())
+bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+  case TargetLoweringBase::AtomicRMWExpansionKind::None:
+    return false;
+  case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: {
+    assert(TLI->hasLoadLinkedStoreConditional() &&
+           "TargetLowering requested we expand AtomicRMW instruction into "
+           "load-linked/store-conditional combos, but such instructions aren't "
+           "supported");
+
     return expandAtomicRMWToLLSC(AI);
-  else
+  }
+  case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: {
     return expandAtomicRMWToCmpXchg(AI);
+  }
+  }
+  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
 }
 
 /// Emit IR to implement the given atomicrmw operation on values in registers,
@@ -277,7 +283,6 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
 }
 
 bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
-  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
   AtomicOrdering MemOpOrder = AI->getOrdering();
   Value *Addr = AI->getPointerOperand();
   BasicBlock *BB = AI->getParent();
@@ -397,7 +402,6 @@ bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
 }
 
 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
-  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
   AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
   AtomicOrdering FailureOrder = CI->getFailureOrdering();
   Value *Addr = CI->getPointerOperand();
@@ -551,13 +555,10 @@ bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
 }
 
 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
-  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
-
   if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
     if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad))
       expandAtomicLoad(ResultingLoad);
     return true;
   }
-
   return false;
 }
diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
index 72da806..82f5c48 100644
--- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -15,633 +15,23 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfoImpl.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <utility>
 using namespace llvm;
 
-static cl::opt<unsigned>
-PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
-  cl::desc("Threshold for partial unrolling"), cl::Hidden);
-
 #define DEBUG_TYPE "basictti"
 
-namespace {
-
-class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
-  const TargetMachine *TM;
-
-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
-  /// Estimate the cost overhead of SK_Alternate shuffle.
-  unsigned getAltShuffleOverhead(Type *Ty) const;
-
-  const TargetLoweringBase *getTLI() const {
-    return TM->getSubtargetImpl()->getTargetLowering();
-  }
-
-public:
-  BasicTTI() : ImmutablePass(ID), TM(nullptr) {
-    llvm_unreachable("This pass cannot be directly constructed");
-  }
-
-  BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) {
-    initializeBasicTTIPass(*PassRegistry::getPassRegistry());
-  }
-
-  void initializePass() override {
-    pushTTIStack(this);
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    TargetTransformInfo::getAnalysisUsage(AU);
-  }
-
-  /// Pass identification.
-  static char ID;
-
-  /// Provide necessary pointer adjustments for the two base classes.
-  void *getAdjustedAnalysisPointer(const void *ID) override {
-    if (ID == &TargetTransformInfo::ID)
-      return (TargetTransformInfo*)this;
-    return this;
-  }
-
-  bool hasBranchDivergence() const override;
-
-  /// \name Scalar TTI Implementations
-  /// @{
-
-  bool isLegalAddImmediate(int64_t imm) const override;
-  bool isLegalICmpImmediate(int64_t imm) const override;
-  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
-                             int64_t BaseOffset, bool HasBaseReg,
-                             int64_t Scale) const override;
-  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
-                           int64_t BaseOffset, bool HasBaseReg,
-                           int64_t Scale) const override;
-  bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
-  bool isTypeLegal(Type *Ty) const override;
-  unsigned getJumpBufAlignment() const override;
-  unsigned getJumpBufSize() const override;
-  bool shouldBuildLookupTables() const override;
-  bool haveFastSqrt(Type *Ty) const override;
-  void getUnrollingPreferences(const Function *F, Loop *L,
-                               UnrollingPreferences &UP) const override;
-
-  /// @}
-
-  /// \name Vector TTI Implementations
-  /// @{
-
-  unsigned getNumberOfRegisters(bool Vector) const override;
-  unsigned getMaxInterleaveFactor() const override;
-  unsigned getRegisterBitWidth(bool Vector) const override;
-  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
-                                  OperandValueKind, OperandValueProperties,
-                                  OperandValueProperties) const override;
-  unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
-                          int Index, Type *SubTp) const override;
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
-                            Type *Src) const override;
-  unsigned getCFInstrCost(unsigned Opcode) const override;
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                              Type *CondTy) const override;
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                              unsigned Index) const override;
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace) const override;
-  unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
-                                 ArrayRef<Type*> Tys) const override;
-  unsigned getNumberOfParts(Type *Tp) const override;
-  unsigned getAddressComputationCost( Type *Ty, bool IsComplex) const override;
-  unsigned getReductionCost(unsigned Opcode, Type *Ty,
-                            bool IsPairwise) const override;
-
-  /// @}
-};
-
-}
-
-INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti",
-                   "Target independent code generator's TTI", true, true, false)
-char BasicTTI::ID = 0;
-
-ImmutablePass *
-llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) {
-  return new BasicTTI(TM);
-}
-
-bool BasicTTI::hasBranchDivergence() const { return false; }
-
-bool BasicTTI::isLegalAddImmediate(int64_t imm) const {
-  return getTLI()->isLegalAddImmediate(imm);
-}
-
-bool BasicTTI::isLegalICmpImmediate(int64_t imm) const {
-  return getTLI()->isLegalICmpImmediate(imm);
-}
-
-bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
-                                     int64_t BaseOffset, bool HasBaseReg,
-                                     int64_t Scale) const {
-  TargetLoweringBase::AddrMode AM;
-  AM.BaseGV = BaseGV;
-  AM.BaseOffs = BaseOffset;
-  AM.HasBaseReg = HasBaseReg;
-  AM.Scale = Scale;
-  return getTLI()->isLegalAddressingMode(AM, Ty);
-}
-
-int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
-                                   int64_t BaseOffset, bool HasBaseReg,
-                                   int64_t Scale) const {
-  TargetLoweringBase::AddrMode AM;
-  AM.BaseGV = BaseGV;
-  AM.BaseOffs = BaseOffset;
-  AM.HasBaseReg = HasBaseReg;
-  AM.Scale = Scale;
-  return getTLI()->getScalingFactorCost(AM, Ty);
-}
-
-bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const {
-  return getTLI()->isTruncateFree(Ty1, Ty2);
-}
-
-bool BasicTTI::isTypeLegal(Type *Ty) const {
-  EVT T = getTLI()->getValueType(Ty);
-  return getTLI()->isTypeLegal(T);
-}
-
-unsigned BasicTTI::getJumpBufAlignment() const {
-  return getTLI()->getJumpBufAlignment();
-}
-
-unsigned BasicTTI::getJumpBufSize() const {
-  return getTLI()->getJumpBufSize();
-}
-
-bool BasicTTI::shouldBuildLookupTables() const {
-  const TargetLoweringBase *TLI = getTLI();
-  return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
-         TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
-}
-
-bool BasicTTI::haveFastSqrt(Type *Ty) const {
-  const TargetLoweringBase *TLI = getTLI();
-  EVT VT = TLI->getValueType(Ty);
-  return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
-}
-
-void BasicTTI::getUnrollingPreferences(const Function *F, Loop *L,
-                                       UnrollingPreferences &UP) const {
-  // This unrolling functionality is target independent, but to provide some
-  // motivation for its intended use, for x86:
-
-  // According to the Intel 64 and IA-32 Architectures Optimization Reference
-  // Manual, Intel Core models and later have a loop stream detector
-  // (and associated uop queue) that can benefit from partial unrolling.
-  // The relevant requirements are:
-  //  - The loop must have no more than 4 (8 for Nehalem and later) branches
-  //    taken, and none of them may be calls.
-  //  - The loop can have no more than 18 (28 for Nehalem and later) uops.
-
-  // According to the Software Optimization Guide for AMD Family 15h Processors,
-  // models 30h-4fh (Steamroller and later) have a loop predictor and loop
-  // buffer which can benefit from partial unrolling.
-  // The relevant requirements are:
-  //  - The loop must have fewer than 16 branches
-  //  - The loop must have less than 40 uops in all executed loop branches
-
-  // The number of taken branches in a loop is hard to estimate here, and
-  // benchmarking has revealed that it is better not to be conservative when
-  // estimating the branch count. As a result, we'll ignore the branch limits
-  // until someone finds a case where it matters in practice.
-
-  unsigned MaxOps;
-  const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(F);
-  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
-    MaxOps = PartialUnrollingThreshold;
-  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
-    MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
-  else
-    return;
-
-  // Scan the loop: don't unroll loops with calls.
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I) {
-    BasicBlock *BB = *I;
-
-    for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
-      if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
-        ImmutableCallSite CS(J);
-        if (const Function *F = CS.getCalledFunction()) {
-          if (!TopTTI->isLoweredToCall(F))
-            continue;
-        }
-
-        return;
-      }
-  }
-
-  // Enable runtime and partial unrolling up to the specified size.
-  UP.Partial = UP.Runtime = true;
-  UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps;
-}
-
-//===----------------------------------------------------------------------===//
-//
-// Calls used by the vectorizers.
-//
-//===----------------------------------------------------------------------===//
-
-unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert,
-                                            bool Extract) const {
-  assert (Ty->isVectorTy() && "Can only scalarize vectors");
-  unsigned Cost = 0;
-
-  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
-    if (Insert)
-      Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
-    if (Extract)
-      Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
-  }
-
-  return Cost;
-}
-
-unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
-  return 1;
-}
-
-unsigned BasicTTI::getRegisterBitWidth(bool Vector) const {
-  return 32;
-}
-
-unsigned BasicTTI::getMaxInterleaveFactor() const {
-  return 1;
-}
-
-unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                          OperandValueKind, OperandValueKind,
-                                          OperandValueProperties,
-                                          OperandValueProperties) const {
-  // Check if any of the operands are vector operands.
-  const TargetLoweringBase *TLI = getTLI();
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
-
-  bool IsFloat = Ty->getScalarType()->isFloatingPointTy();
-  // Assume that floating point arithmetic operations cost twice as much as
-  // integer operations.
-  unsigned OpCost = (IsFloat ? 2 : 1);
-
-  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
-    // The operation is legal. Assume it costs 1.
-    // If the type is split to multiple registers, assume that there is some
-    // overhead to this.
-    // TODO: Once we have extract/insert subvector cost we need to use them.
-    if (LT.first > 1)
-      return LT.first * 2 * OpCost;
-    return LT.first * 1 * OpCost;
-  }
-
-  if (!TLI->isOperationExpand(ISD, LT.second)) {
-    // If the operation is custom lowered then assume
-    // thare the code is twice as expensive.
-    return LT.first * 2 * OpCost;
-  }
-
-  // Else, assume that we need to scalarize this op.
-  if (Ty->isVectorTy()) {
-    unsigned Num = Ty->getVectorNumElements();
-    unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType());
-    // return the cost of multiple scalar invocation plus the cost of inserting
-    // and extracting the values.
-    return getScalarizationOverhead(Ty, true, true) + Num * Cost;
-  }
-
-  // We don't know anything about this scalar instruction.
-  return OpCost;
-}
-
-unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const {
-  assert(Ty->isVectorTy() && "Can only shuffle vectors");
-  unsigned Cost = 0;
-  // Shuffle cost is equal to the cost of extracting element from its argument
-  // plus the cost of inserting them onto the result vector.
-
-  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index
-  // 0 of first vector, index 1 of second vector,index 2 of first vector and
-  // finally index 3 of second vector and insert them at index <0,1,2,3> of
-  // result vector.
-  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
-    Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
-    Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
-  }
-  return Cost;
-}
-
-unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
-                                  Type *SubTp) const {
-  if (Kind == SK_Alternate) {
-    return getAltShuffleOverhead(Tp);
-  }
-  return 1;
-}
-
-unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
-                                    Type *Src) const {
-  const TargetLoweringBase *TLI = getTLI();
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
-  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src);
-  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst);
-
-  // Check for NOOP conversions.
-  if (SrcLT.first == DstLT.first &&
-      SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
-
-      // Bitcast between types that are legalized to the same type are free.
-      if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
-        return 0;
-  }
-
-  if (Opcode == Instruction::Trunc &&
-      TLI->isTruncateFree(SrcLT.second, DstLT.second))
-    return 0;
-
-  if (Opcode == Instruction::ZExt &&
-      TLI->isZExtFree(SrcLT.second, DstLT.second))
-    return 0;
-
-  // If the cast is marked as legal (or promote) then assume low cost.
-  if (SrcLT.first == DstLT.first &&
-      TLI->isOperationLegalOrPromote(ISD, DstLT.second))
-    return 1;
-
-  // Handle scalar conversions.
-  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
-
-    // Scalar bitcasts are usually free.
-    if (Opcode == Instruction::BitCast)
-      return 0;
-
-    // Just check the op cost. If the operation is legal then assume it costs 1.
-    if (!TLI->isOperationExpand(ISD, DstLT.second))
-      return  1;
-
-    // Assume that illegal scalar instruction are expensive.
-    return 4;
-  }
-
-  // Check vector-to-vector casts.
-  if (Dst->isVectorTy() && Src->isVectorTy()) {
-
-    // If the cast is between same-sized registers, then the check is simple.
-    if (SrcLT.first == DstLT.first &&
-        SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
-
-      // Assume that Zext is done using AND.
-      if (Opcode == Instruction::ZExt)
-        return 1;
-
-      // Assume that sext is done using SHL and SRA.
-      if (Opcode == Instruction::SExt)
-        return 2;
-
-      // Just check the op cost. If the operation is legal then assume it costs
-      // 1 and multiply by the type-legalization overhead.
-      if (!TLI->isOperationExpand(ISD, DstLT.second))
-        return SrcLT.first * 1;
-    }
-
-    // If we are converting vectors and the operation is illegal, or
-    // if the vectors are legalized to different types, estimate the
-    // scalarization costs.
-    unsigned Num = Dst->getVectorNumElements();
-    unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(),
-                                             Src->getScalarType());
-
-    // Return the cost of multiple scalar invocation plus the cost of
-    // inserting and extracting the values.
-    return getScalarizationOverhead(Dst, true, true) + Num * Cost;
-  }
-
-  // We already handled vector-to-vector and scalar-to-scalar conversions. This
-  // is where we handle bitcast between vectors and scalars. We need to assume
-  //  that the conversion is scalarized in one way or another.
-  if (Opcode == Instruction::BitCast)
-    // Illegal bitcasts are done by storing and loading from a stack slot.
-    return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) +
-           (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0);
-
-  llvm_unreachable("Unhandled cast");
- }
-
-unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const {
-  // Branches are assumed to be predicted.
-  return 0;
-}
-
-unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                      Type *CondTy) const {
-  const TargetLoweringBase *TLI = getTLI();
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
-  // Selects on vectors are actually vector selects.
-  if (ISD == ISD::SELECT) {
-    assert(CondTy && "CondTy must exist");
-    if (CondTy->isVectorTy())
-      ISD = ISD::VSELECT;
-  }
-
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
-
-  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
-      !TLI->isOperationExpand(ISD, LT.second)) {
-    // The operation is legal. Assume it costs 1. Multiply
-    // by the type-legalization overhead.
-    return LT.first * 1;
-  }
-
-  // Otherwise, assume that the cast is scalarized.
-  if (ValTy->isVectorTy()) {
-    unsigned Num = ValTy->getVectorNumElements();
-    if (CondTy)
-      CondTy = CondTy->getScalarType();
-    unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
-                                               CondTy);
-
-    // Return the cost of multiple scalar invocation plus the cost of inserting
-    // and extracting the values.
-    return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
-  }
-
-  // Unknown scalar opcode.
-  return 1;
-}
-
-unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                      unsigned Index) const {
-  std::pair<unsigned, MVT> LT =  getTLI()->getTypeLegalizationCost(Val->getScalarType());
-
-  return LT.first;
-}
-
-unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
-                                   unsigned Alignment,
-                                   unsigned AddressSpace) const {
-  assert(!Src->isVoidTy() && "Invalid type");
-  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src);
-
-  // Assuming that all loads of legal types cost 1.
-  unsigned Cost = LT.first;
-
-  if (Src->isVectorTy() &&
-      Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
-    // This is a vector load that legalizes to a larger type than the vector
-    // itself. Unless the corresponding extending load or truncating store is
-    // legal, then this will scalarize.
-    TargetLowering::LegalizeAction LA = TargetLowering::Expand;
-    EVT MemVT = getTLI()->getValueType(Src, true);
-    if (MemVT.isSimple() && MemVT != MVT::Other) {
-      if (Opcode == Instruction::Store)
-        LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT());
-      else
-        LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
-    }
-
-    if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
-      // This is a vector load/store for some illegal type that is scalarized.
-      // We must account for the cost of building or decomposing the vector.
-      Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
-                                            Opcode == Instruction::Store);
-    }
-  }
-
-  return Cost;
-}
-
-unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                                         ArrayRef<Type *> Tys) const {
-  unsigned ISD = 0;
-  switch (IID) {
-  default: {
-    // Assume that we need to scalarize this intrinsic.
-    unsigned ScalarizationCost = 0;
-    unsigned ScalarCalls = 1;
-    if (RetTy->isVectorTy()) {
-      ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
-      ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
-    }
-    for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
-      if (Tys[i]->isVectorTy()) {
-        ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
-        ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
-      }
-    }
-
-    return ScalarCalls + ScalarizationCost;
-  }
-  // Look for intrinsics that can be lowered directly or turned into a scalar
-  // intrinsic call.
-  case Intrinsic::sqrt:    ISD = ISD::FSQRT;  break;
-  case Intrinsic::sin:     ISD = ISD::FSIN;   break;
-  case Intrinsic::cos:     ISD = ISD::FCOS;   break;
-  case Intrinsic::exp:     ISD = ISD::FEXP;   break;
-  case Intrinsic::exp2:    ISD = ISD::FEXP2;  break;
-  case Intrinsic::log:     ISD = ISD::FLOG;   break;
-  case Intrinsic::log10:   ISD = ISD::FLOG10; break;
-  case Intrinsic::log2:    ISD = ISD::FLOG2;  break;
-  case Intrinsic::fabs:    ISD = ISD::FABS;   break;
-  case Intrinsic::minnum:  ISD = ISD::FMINNUM; break;
-  case Intrinsic::maxnum:  ISD = ISD::FMAXNUM; break;
-  case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break;
-  case Intrinsic::floor:   ISD = ISD::FFLOOR; break;
-  case Intrinsic::ceil:    ISD = ISD::FCEIL;  break;
-  case Intrinsic::trunc:   ISD = ISD::FTRUNC; break;
-  case Intrinsic::nearbyint:
-                           ISD = ISD::FNEARBYINT; break;
-  case Intrinsic::rint:    ISD = ISD::FRINT;  break;
-  case Intrinsic::round:   ISD = ISD::FROUND; break;
-  case Intrinsic::pow:     ISD = ISD::FPOW;   break;
-  case Intrinsic::fma:     ISD = ISD::FMA;    break;
-  case Intrinsic::fmuladd: ISD = ISD::FMA;    break;
-  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
-  case Intrinsic::lifetime_start:
-  case Intrinsic::lifetime_end:
-    return 0;
-  }
-
-  const TargetLoweringBase *TLI = getTLI();
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);
-
-  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
-    // The operation is legal. Assume it costs 1.
-    // If the type is split to multiple registers, assume that there is some
-    // overhead to this.
-    // TODO: Once we have extract/insert subvector cost we need to use them.
-    if (LT.first > 1)
-      return LT.first * 2;
-    return LT.first * 1;
-  }
-
-  if (!TLI->isOperationExpand(ISD, LT.second)) {
-    // If the operation is custom lowered then assume
-    // thare the code is twice as expensive.
-    return LT.first * 2;
-  }
-
-  // If we can't lower fmuladd into an FMA estimate the cost as a floating
-  // point mul followed by an add.
-  if (IID == Intrinsic::fmuladd)
-    return TopTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
-           TopTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
-
-  // Else, assume that we need to scalarize this intrinsic. For math builtins
-  // this will emit a costly libcall, adding call overhead and spills. Make it
-  // very expensive.
-  if (RetTy->isVectorTy()) {
-    unsigned Num = RetTy->getVectorNumElements();
-    unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(),
-                                                  Tys);
-    return 10 * Cost * Num;
-  }
-
-  // This is going to be turned into a library call, make it expensive.
-  return 10;
-}
-
-unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
-  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp);
-  return LT.first;
-}
-
-unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
-  return 0;
-}
+// This flag is used by the template base class for BasicTTIImpl, and here to
+// provide a definition.
+cl::opt<unsigned>
+    llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
+                                    cl::desc("Threshold for partial unrolling"),
+                                    cl::Hidden);
 
-unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty,
-                                    bool IsPairwise) const {
-  assert(Ty->isVectorTy() && "Expect a vector type");
-  unsigned NumVecElts = Ty->getVectorNumElements();
-  unsigned NumReduxLevels = Log2_32(NumVecElts);
-  unsigned ArithCost = NumReduxLevels *
-    TopTTI->getArithmeticInstrCost(Opcode, Ty);
-  // Assume the pairwise shuffles add a cost.
-  unsigned ShuffleCost =
-      NumReduxLevels * (IsPairwise + 1) *
-      TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty);
-  return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
-}
+BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, Function &F)
+    : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index 2128da1..b8d9a1a 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
@@ -601,8 +602,7 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
   // instructions that would be deleted in the merge.
   MachineFunction *MF = MBB1->getParent();
   if (EffectiveTailLen >= 2 &&
-      MF->getFunction()->getAttributes().
-        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+      MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
       (I1 == MBB1->begin() || I2 == MBB2->begin()))
     return true;
 
@@ -728,6 +728,62 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
   return true;
 }
 
+static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) {
+  auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end();
+  auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end();
+  if ((E1 - I1) != (E2 - I2))
+    return false;
+  for (; I1 != E1; ++I1, ++I2) {
+    if (**I1 != **I2)
+      return false;
+  }
+  return true;
+}
+
+static void
+removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
+                               MachineBasicBlock &MBBCommon) {
+  // Remove MMOs from memory operations in the common block
+  // when they do not match the ones from the block being tail-merged.
+  // This ensures later passes conservatively compute dependencies.
+  MachineBasicBlock *MBB = MBBIStartPos->getParent();
+  // Note CommonTailLen does not necessarily matches the size of
+  // the common BB nor all its instructions because of debug
+  // instructions differences.
+  unsigned CommonTailLen = 0;
+  for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos)
+    ++CommonTailLen;
+
+  MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin();
+  MachineBasicBlock::reverse_iterator MBBIE = MBB->rend();
+  MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin();
+  MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend();
+
+  while (CommonTailLen--) {
+    assert(MBBI != MBBIE && "Reached BB end within common tail length!");
+    (void)MBBIE;
+
+    if (MBBI->isDebugValue()) {
+      ++MBBI;
+      continue;
+    }
+
+    while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue())
+      ++MBBICommon;
+
+    assert(MBBICommon != MBBIECommon &&
+           "Reached BB end within common tail length!");
+    assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!");
+
+    if (MBBICommon->mayLoad() || MBBICommon->mayStore())
+      if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon))
+        MBBICommon->clearMemRefs();
+
+    ++MBBI;
+    ++MBBICommon;
+  }
+}
+
 // See if any of the blocks in MergePotentials (which all have a common single
 // successor, or all have no successor) can be tail-merged.  If there is a
 // successor, any blocks in MergePotentials that are not tail-merged and
@@ -762,7 +818,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
 
   // Sort by hash value so that blocks with identical end sequences sort
   // together.
-  std::stable_sort(MergePotentials.begin(), MergePotentials.end());
+  array_pod_sort(MergePotentials.begin(), MergePotentials.end());
 
   // Walk through equivalence sets looking for actual exact matches.
   while (MergePotentials.size() > 1) {
@@ -841,6 +897,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
         continue;
       DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
                    << (i == e-1 ? "" : ", "));
+      // Remove MMOs from memory operations as needed.
+      removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB);
       // Hack the end off BB i, making it jump to BB commonTailIndex instead.
       ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
       // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
@@ -1145,6 +1203,11 @@ ReoptimizeBlock:
 
     if (FallThrough == MF.end()) {
       // TODO: Simplify preds to not branch here if possible!
+    } else if (FallThrough->isLandingPad()) {
+      // Don't rewrite to a landing pad fallthough.  That could lead to the case
+      // where a BB jumps to more than one landing pad.
+      // TODO: Is it ever worth rewriting predecessors which don't already
+      // jump to a landing pad, and so can safely jump to the fallthrough?
     } else {
       // Rewrite all predecessors of the old block to go to the fallthrough
       // instead.
@@ -1620,8 +1683,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
   // Also avoid moving code above predicated instruction since it's hard to
   // reason about register liveness with predicated instruction.
   bool DontMoveAcrossStore = true;
-  if (!PI->isSafeToMove(TII, nullptr, DontMoveAcrossStore) ||
-      TII->isPredicated(PI))
+  if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(PI))
     return MBB->end();
 
 
@@ -1759,7 +1821,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
       break;
 
     bool DontMoveAcrossStore = true;
-    if (!TIB->isSafeToMove(TII, nullptr, DontMoveAcrossStore))
+    if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore))
       break;
 
     // Remove kills from LocalDefsSet, these registers had short live ranges.
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 307dec5..2c6eaf3 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -21,13 +21,13 @@ using namespace llvm;
 /// initializeCodeGen - Initialize all passes linked into the CodeGen library.
 void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeAtomicExpandPass(Registry);
-  initializeBasicTTIPass(Registry);
   initializeBranchFolderPassPass(Registry);
   initializeCodeGenPreparePass(Registry);
   initializeDeadMachineInstructionElimPass(Registry);
+  initializeDwarfEHPreparePass(Registry);
   initializeEarlyIfConverterPass(Registry);
-  initializeExpandPostRAPass(Registry);
   initializeExpandISelPseudosPass(Registry);
+  initializeExpandPostRAPass(Registry);
   initializeFinalizeMachineBundlesPass(Registry);
   initializeGCMachineCodeAnalysisPass(Registry);
   initializeGCModuleInfoPass(Registry);
@@ -37,31 +37,35 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeLiveStacksPass(Registry);
   initializeLiveVariablesPass(Registry);
   initializeLocalStackSlotPassPass(Registry);
+  initializeLowerIntrinsicsPass(Registry);
   initializeMachineBlockFrequencyInfoPass(Registry);
   initializeMachineBlockPlacementPass(Registry);
   initializeMachineBlockPlacementStatsPass(Registry);
-  initializeMachineCopyPropagationPass(Registry);
-  initializeMachineCombinerPass(Registry);
   initializeMachineCSEPass(Registry);
+  initializeMachineCombinerPass(Registry);
+  initializeMachineCopyPropagationPass(Registry);
   initializeMachineDominatorTreePass(Registry);
-  initializeMachinePostDominatorTreePass(Registry);
+  initializeMachineFunctionPrinterPassPass(Registry);
   initializeMachineLICMPass(Registry);
   initializeMachineLoopInfoPass(Registry);
   initializeMachineModuleInfoPass(Registry);
+  initializeMachinePostDominatorTreePass(Registry);
   initializeMachineSchedulerPass(Registry);
   initializeMachineSinkingPass(Registry);
   initializeMachineVerifierPassPass(Registry);
   initializeOptimizePHIsPass(Registry);
+  initializePEIPass(Registry);
   initializePHIEliminationPass(Registry);
   initializePeepholeOptimizerPass(Registry);
   initializePostMachineSchedulerPass(Registry);
   initializePostRASchedulerPass(Registry);
   initializeProcessImplicitDefsPass(Registry);
-  initializePEIPass(Registry);
   initializeRegisterCoalescerPass(Registry);
+  initializeShrinkWrapPass(Registry);
   initializeSlotIndexesPass(Registry);
-  initializeStackProtectorPass(Registry);
   initializeStackColoringPass(Registry);
+  initializeStackMapLivenessPass(Registry);
+  initializeStackProtectorPass(Registry);
   initializeStackSlotColoringPass(Registry);
   initializeTailDuplicatePassPass(Registry);
   initializeTargetPassConfigPass(Registry);
@@ -71,9 +75,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeUnreachableMachineBlockElimPass(Registry);
   initializeVirtRegMapPass(Registry);
   initializeVirtRegRewriterPass(Registry);
-  initializeLowerIntrinsicsPass(Registry);
-  initializeMachineFunctionPrinterPassPass(Registry);
-  initializeStackMapLivenessPass(Registry);
+  initializeWinEHPreparePass(Registry);
 }
 
 void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 82cd380..2c1858b 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
@@ -32,13 +33,13 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/ValueMap.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -72,6 +73,10 @@ static cl::opt<bool> DisableBranchOpts(
   "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
   cl::desc("Disable branch optimizations in CodeGenPrepare"));
 
+static cl::opt<bool>
+    DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
+                  cl::desc("Disable GC optimizations in CodeGenPrepare"));
+
 static cl::opt<bool> DisableSelectToBranch(
   "disable-cgp-select2branch", cl::Hidden, cl::init(false),
   cl::desc("Disable select to branch conversion."));
@@ -119,7 +124,6 @@ class TypePromotionTransaction;
     const TargetLowering *TLI;
     const TargetTransformInfo *TTI;
     const TargetLibraryInfo *TLInfo;
-    DominatorTree *DT;
 
     /// CurInstIterator - As we scan instructions optimizing them, this is the
     /// next instruction to optimize.  Xforms that can invalidate this should
@@ -137,8 +141,7 @@ class TypePromotionTransaction;
     /// promotion for the current function.
     InstrToOrigTy PromotedInsts;
 
-    /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
-    /// be updated.
+    /// ModifiedDT - If CFG is modified in anyway.
     bool ModifiedDT;
 
     /// OptSize - True if optimizing for size.
@@ -156,8 +159,8 @@ class TypePromotionTransaction;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addRequired<TargetLibraryInfo>();
-      AU.addRequired<TargetTransformInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
+      AU.addRequired<TargetTransformInfoWrapperPass>();
     }
 
   private:
@@ -181,8 +184,9 @@ class TypePromotionTransaction;
     bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
                         Instruction *&Inst,
                         const SmallVectorImpl<Instruction *> &Exts,
-                        unsigned CreatedInst);
+                        unsigned CreatedInstCost);
     bool splitBranchCondition(Function &F);
+    bool simplifyOffsetableRelocate(Instruction &I);
   };
 }
 
@@ -205,14 +209,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
 
   ModifiedDT = false;
   if (TM)
-    TLI = TM->getSubtargetImpl()->getTargetLowering();
-  TLInfo = &getAnalysis<TargetLibraryInfo>();
-  TTI = &getAnalysis<TargetTransformInfo>();
-  DominatorTreeWrapperPass *DTWP =
-      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                           Attribute::OptimizeForSize);
+    TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+  TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
 
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
@@ -248,9 +248,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
       BasicBlock *BB = I++;
       bool ModifiedDTOnIteration = false;
       MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
-      
+
       // Restart BB iteration if the dominator tree of the Function was changed
-      ModifiedDT |= ModifiedDTOnIteration;
       if (ModifiedDTOnIteration)
         break;
     }
@@ -293,13 +292,18 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
     if (EverMadeChange || MadeChange)
       MadeChange |= EliminateFallThrough(F);
 
-    if (MadeChange)
-      ModifiedDT = true;
     EverMadeChange |= MadeChange;
   }
 
-  if (ModifiedDT && DT)
-    DT->recalculate(F);
+  if (!DisableGCOpts) {
+    SmallVector<Instruction *, 2> Statepoints;
+    for (BasicBlock &BB : F)
+      for (Instruction &I : BB)
+        if (isStatepoint(I))
+          Statepoints.push_back(&I);
+    for (auto &I : Statepoints)
+      EverMadeChange |= simplifyOffsetableRelocate(*I);
+  }
 
   return EverMadeChange;
 }
@@ -326,7 +330,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
       // Remember if SinglePred was the entry block of the function.
       // If so, we will need to move BB back to the entry position.
       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
-      MergeBasicBlockIntoOnlyPred(BB, this);
+      MergeBasicBlockIntoOnlyPred(BB, nullptr);
 
       if (isEntry && BB != &BB->getParent()->getEntryBlock())
         BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -466,7 +470,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
       // Remember if SinglePred was the entry block of the function.  If so, we
       // will need to move BB back to the entry position.
       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
-      MergeBasicBlockIntoOnlyPred(DestBB, this);
+      MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
 
       if (isEntry && BB != &BB->getParent()->getEntryBlock())
         BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -508,19 +512,188 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   // The PHIs are now updated, change everything that refers to BB to use
   // DestBB and remove BB.
   BB->replaceAllUsesWith(DestBB);
-  if (DT && !ModifiedDT) {
-    BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
-    BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
-    BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
-    DT->changeImmediateDominator(DestBB, NewIDom);
-    DT->eraseNode(BB);
-  }
   BB->eraseFromParent();
   ++NumBlocksElim;
 
   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
 }
 
+// Computes a map of base pointer relocation instructions to corresponding
+// derived pointer relocation instructions given a vector of all relocate calls
+static void computeBaseDerivedRelocateMap(
+    const SmallVectorImpl<User *> &AllRelocateCalls,
+    DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
+        RelocateInstMap) {
+  // Collect information in two maps: one primarily for locating the base object
+  // while filling the second map; the second map is the final structure holding
+  // a mapping between Base and corresponding Derived relocate calls
+  DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
+  for (auto &U : AllRelocateCalls) {
+    GCRelocateOperands ThisRelocate(U);
+    IntrinsicInst *I = cast<IntrinsicInst>(U);
+    auto K = std::make_pair(ThisRelocate.getBasePtrIndex(),
+                            ThisRelocate.getDerivedPtrIndex());
+    RelocateIdxMap.insert(std::make_pair(K, I));
+  }
+  for (auto &Item : RelocateIdxMap) {
+    std::pair<unsigned, unsigned> Key = Item.first;
+    if (Key.first == Key.second)
+      // Base relocation: nothing to insert
+      continue;
+
+    IntrinsicInst *I = Item.second;
+    auto BaseKey = std::make_pair(Key.first, Key.first);
+
+    // We're iterating over RelocateIdxMap so we cannot modify it.
+    auto MaybeBase = RelocateIdxMap.find(BaseKey);
+    if (MaybeBase == RelocateIdxMap.end())
+      // TODO: We might want to insert a new base object relocate and gep off
+      // that, if there are enough derived object relocates.
+      continue;
+
+    RelocateInstMap[MaybeBase->second].push_back(I);
+  }
+}
+
+// Accepts a GEP and extracts the operands into a vector provided they're all
+// small integer constants
+static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
+                                          SmallVectorImpl<Value *> &OffsetV) {
+  for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
+    // Only accept small constant integer operands
+    auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (!Op || Op->getZExtValue() > 20)
+      return false;
+  }
+
+  for (unsigned i = 1; i < GEP->getNumOperands(); i++)
+    OffsetV.push_back(GEP->getOperand(i));
+  return true;
+}
+
+// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
+// replace, computes a replacement, and affects it.
+static bool
+simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
+                          const SmallVectorImpl<IntrinsicInst *> &Targets) {
+  bool MadeChange = false;
+  for (auto &ToReplace : Targets) {
+    GCRelocateOperands MasterRelocate(RelocatedBase);
+    GCRelocateOperands ThisRelocate(ToReplace);
+
+    assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() &&
+           "Not relocating a derived object of the original base object");
+    if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) {
+      // A duplicate relocate call. TODO: coalesce duplicates.
+      continue;
+    }
+
+    Value *Base = ThisRelocate.getBasePtr();
+    auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
+    if (!Derived || Derived->getPointerOperand() != Base)
+      continue;
+
+    SmallVector<Value *, 2> OffsetV;
+    if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
+      continue;
+
+    // Create a Builder and replace the target callsite with a gep
+    assert(RelocatedBase->getNextNode() && "Should always have one since it's not a terminator");
+
+    // Insert after RelocatedBase
+    IRBuilder<> Builder(RelocatedBase->getNextNode());
+    Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
+
+    // If gc_relocate does not match the actual type, cast it to the right type.
+    // In theory, there must be a bitcast after gc_relocate if the type does not
+    // match, and we should reuse it to get the derived pointer. But it could be
+    // cases like this:
+    // bb1:
+    //  ...
+    //  %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
+    //  br label %merge
+    //
+    // bb2:
+    //  ...
+    //  %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
+    //  br label %merge
+    //
+    // merge:
+    //  %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
+    //  %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
+    //
+    // In this case, we can not find the bitcast any more. So we insert a new bitcast
+    // no matter there is already one or not. In this way, we can handle all cases, and
+    // the extra bitcast should be optimized away in later passes.
+    Instruction *ActualRelocatedBase = RelocatedBase;
+    if (RelocatedBase->getType() != Base->getType()) {
+      ActualRelocatedBase =
+          cast<Instruction>(Builder.CreateBitCast(RelocatedBase, Base->getType()));
+    }
+    Value *Replacement = Builder.CreateGEP(
+        Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
+    Instruction *ReplacementInst = cast<Instruction>(Replacement);
+    Replacement->takeName(ToReplace);
+    // If the newly generated derived pointer's type does not match the original derived
+    // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
+    Instruction *ActualReplacement = ReplacementInst;
+    if (ReplacementInst->getType() != ToReplace->getType()) {
+      ActualReplacement =
+          cast<Instruction>(Builder.CreateBitCast(ReplacementInst, ToReplace->getType()));
+    }
+    ToReplace->replaceAllUsesWith(ActualReplacement);
+    ToReplace->eraseFromParent();
+
+    MadeChange = true;
+  }
+  return MadeChange;
+}
+
+// Turns this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = relocate(%tok, i32 4, i32 4)
+// %ptr' = relocate(%tok, i32 4, i32 5)
+// %val = load %ptr'
+//
+// into this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = gc.relocate(%tok, i32 4, i32 4)
+// %ptr' = gep %base' + 15
+// %val = load %ptr'
+bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
+  bool MadeChange = false;
+  SmallVector<User *, 2> AllRelocateCalls;
+
+  for (auto *U : I.users())
+    if (isGCRelocate(dyn_cast<Instruction>(U)))
+      // Collect all the relocate calls associated with a statepoint
+      AllRelocateCalls.push_back(U);
+
+  // We need atleast one base pointer relocation + one derived pointer
+  // relocation to mangle
+  if (AllRelocateCalls.size() < 2)
+    return false;
+
+  // RelocateInstMap is a mapping from the base relocate instruction to the
+  // corresponding derived relocate instructions
+  DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
+  computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
+  if (RelocateInstMap.empty())
+    return false;
+
+  for (auto &Item : RelocateInstMap)
+    // Item.first is the RelocatedBase to offset against
+    // Item.second is the vector of Targets to replace
+    MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
+  return MadeChange;
+}
+
 /// SinkCast - Sink the specified cast instruction into its user blocks
 static bool SinkCast(CastInst *CI) {
   BasicBlock *DefBB = CI->getParent();
@@ -555,11 +728,11 @@ static bool SinkCast(CastInst *CI) {
       InsertedCast =
         CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
                          InsertPt);
-      MadeChange = true;
     }
 
     // Replace a use of the cast with a use of the new cast.
     TheUse = InsertedCast;
+    MadeChange = true;
     ++NumCastUses;
   }
 
@@ -609,13 +782,60 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
   return SinkCast(CI);
 }
 
-/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// CombineUAddWithOverflow - try to combine CI into a call to the
+/// llvm.uadd.with.overflow intrinsic if possible.
+///
+/// Return true if any changes were made.
+static bool CombineUAddWithOverflow(CmpInst *CI) {
+  Value *A, *B;
+  Instruction *AddI;
+  if (!match(CI,
+             m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+    return false;
+
+  Type *Ty = AddI->getType();
+  if (!isa<IntegerType>(Ty))
+    return false;
+
+  // We don't want to move around uses of condition values this late, so we we
+  // check if it is legal to create the call to the intrinsic in the basic
+  // block containing the icmp:
+
+  if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+    return false;
+
+#ifndef NDEBUG
+  // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
+  // for now:
+  if (AddI->hasOneUse())
+    assert(*AddI->user_begin() == CI && "expected!");
+#endif
+
+  Module *M = CI->getParent()->getParent()->getParent();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+
+  auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+
+  auto *UAddWithOverflow =
+      CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+  auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+  auto *Overflow =
+      ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+
+  CI->replaceAllUsesWith(Overflow);
+  AddI->replaceAllUsesWith(UAdd);
+  CI->eraseFromParent();
+  AddI->eraseFromParent();
+  return true;
+}
+
+/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
 /// the number of virtual registers that must be created and coalesced.  This is
 /// a clear win except on targets with multiple condition code registers
 ///  (PowerPC), where it might lose; some adjustment may be wanted there.
 ///
 /// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI) {
+static bool SinkCmpExpression(CmpInst *CI) {
   BasicBlock *DefBB = CI->getParent();
 
   /// InsertedCmp - Only insert a cmp in each block once.
@@ -649,21 +869,33 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
         CmpInst::Create(CI->getOpcode(),
                         CI->getPredicate(),  CI->getOperand(0),
                         CI->getOperand(1), "", InsertPt);
-      MadeChange = true;
     }
 
     // Replace a use of the cmp with a use of the new cmp.
     TheUse = InsertedCmp;
+    MadeChange = true;
     ++NumCmpUses;
   }
 
   // If we removed all uses, nuke the cmp.
-  if (CI->use_empty())
+  if (CI->use_empty()) {
     CI->eraseFromParent();
+    MadeChange = true;
+  }
 
   return MadeChange;
 }
 
+static bool OptimizeCmpExpression(CmpInst *CI) {
+  if (SinkCmpExpression(CI))
+    return true;
+
+  if (CombineUAddWithOverflow(CI))
+    return true;
+
+  return false;
+}
+
 /// isExtractBitsCandidateUse - Check if the candidates could
 /// be combined with shift instruction, which includes:
 /// 1. Truncate instruction
@@ -943,8 +1175,9 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
     //
     CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
     Builder.SetInsertPoint(InsertPt);
-    
-    Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+
+    Value *Gep =
+        Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
     LoadInst* Load = Builder.CreateLoad(Gep, false);
     VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
 
@@ -1038,7 +1271,8 @@ static void ScalarizeMaskedStore(CallInst *CI) {
     Builder.SetInsertPoint(InsertPt);
     
     Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
-    Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+    Value *Gep =
+        Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
     Builder.CreateStore(OneElt, Gep);
 
     // Create "else" block, fill it in the next iteration
@@ -1072,6 +1306,54 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       return true;
   }
 
+  const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
+
+  // Align the pointer arguments to this call if the target thinks it's a good
+  // idea
+  unsigned MinSize, PrefAlign;
+  if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+    for (auto &Arg : CI->arg_operands()) {
+      // We want to align both objects whose address is used directly and
+      // objects whose address is used in casts and GEPs, though it only makes
+      // sense for GEPs if the offset is a multiple of the desired alignment and
+      // if size - offset meets the size threshold.
+      if (!Arg->getType()->isPointerTy())
+        continue;
+      APInt Offset(TD->getPointerSizeInBits(
+                     cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
+      Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
+      uint64_t Offset2 = Offset.getLimitedValue();
+      if ((Offset2 & (PrefAlign-1)) != 0)
+        continue;
+      AllocaInst *AI;
+      if ((AI = dyn_cast<AllocaInst>(Val)) &&
+          AI->getAlignment() < PrefAlign &&
+          TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
+        AI->setAlignment(PrefAlign);
+      // Global variables can only be aligned if they are defined in this
+      // object (i.e. they are uniquely initialized in this object), and
+      // over-aligning global variables that have an explicit section is
+      // forbidden.
+      GlobalVariable *GV;
+      if ((GV = dyn_cast<GlobalVariable>(Val)) &&
+          GV->hasUniqueInitializer() &&
+          !GV->hasSection() &&
+          GV->getAlignment() < PrefAlign &&
+          TD->getTypeAllocSize(
+            GV->getType()->getElementType()) >= MinSize + Offset2)
+        GV->setAlignment(PrefAlign);
+    }
+    // If this is a memcpy (or similar) then we may be able to improve the
+    // alignment
+    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
+      unsigned Align = getKnownAlignment(MI->getDest(), *TD);
+      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+        Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD));
+      if (Align > MI->getAlignment())
+        MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
+    }
+  }
+
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
   if (II) {
     switch (II->getIntrinsicID()) {
@@ -1088,8 +1370,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       WeakVH IterHandle(CurInstIterator);
 
       replaceAndRecursivelySimplify(CI, RetVal,
-                                    TLI ? TLI->getDataLayout() : nullptr,
-                                    TLInfo, ModifiedDT ? nullptr : DT);
+                                    TLInfo, nullptr);
 
       // If the iterator instruction was recursively deleted, start over at the
       // start of the block.
@@ -1116,6 +1397,16 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       }
       return false;
     }
+    case Intrinsic::aarch64_stlxr:
+    case Intrinsic::aarch64_stxr: {
+      ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
+      if (!ExtVal || !ExtVal->hasOneUse() ||
+          ExtVal->getParent() == CI->getParent())
+        return false;
+      // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
+      ExtVal->moveBefore(CI);
+      return true;
+    }
     }
 
     if (TLI) {
@@ -1131,15 +1422,11 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
   // From here on out we're working with named functions.
   if (!CI->getCalledFunction()) return false;
 
-  // We'll need DataLayout from here on out.
-  const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
-  if (!TD) return false;
-
   // Lower all default uses of _chk calls.  This is very similar
   // to what InstCombineCalls does, but here we are only lowering calls
   // to fortified library functions (e.g. __memcpy_chk) that have the default
   // "don't know" as the objectsize.  Anything else should be left alone.
-  FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true);
+  FortifiedLibCallSimplifier Simplifier(TLInfo, true);
   if (Value *V = Simplifier.optimizeCall(CI)) {
     CI->replaceAllUsesWith(V);
     CI->eraseFromParent();
@@ -1672,7 +1959,7 @@ class TypePromotionTransaction {
       Inst->removeFromParent();
     }
 
-    ~InstructionRemover() { delete Replacer; }
+    ~InstructionRemover() override { delete Replacer; }
 
     /// \brief Really remove the instruction.
     void commit() override { delete Inst; }
@@ -1802,6 +2089,7 @@ void TypePromotionTransaction::rollback(
 /// This encapsulates the logic for matching the target-legal addressing modes.
 class AddressingModeMatcher {
   SmallVectorImpl<Instruction*> &AddrModeInsts;
+  const TargetMachine &TM;
   const TargetLowering &TLI;
 
   /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
@@ -1825,13 +2113,15 @@ class AddressingModeMatcher {
   /// always returns true.
   bool IgnoreProfitability;
 
-  AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
-                        const TargetLowering &T, Type *AT,
-                        Instruction *MI, ExtAddrMode &AM,
-                        const SetOfInstrs &InsertedTruncs,
+  AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
+                        const TargetMachine &TM, Type *AT, Instruction *MI,
+                        ExtAddrMode &AM, const SetOfInstrs &InsertedTruncs,
                         InstrToOrigTy &PromotedInsts,
                         TypePromotionTransaction &TPT)
-      : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM),
+      : AddrModeInsts(AMI), TM(TM),
+        TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
+                 ->getTargetLowering()),
+        AccessTy(AT), MemoryInst(MI), AddrMode(AM),
         InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
     IgnoreProfitability = false;
   }
@@ -1848,13 +2138,13 @@ public:
   static ExtAddrMode Match(Value *V, Type *AccessTy,
                            Instruction *MemoryInst,
                            SmallVectorImpl<Instruction*> &AddrModeInsts,
-                           const TargetLowering &TLI,
+                           const TargetMachine &TM,
                            const SetOfInstrs &InsertedTruncs,
                            InstrToOrigTy &PromotedInsts,
                            TypePromotionTransaction &TPT) {
     ExtAddrMode Result;
 
-    bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
+    bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy,
                                          MemoryInst, Result, InsertedTruncs,
                                          PromotedInsts, TPT).MatchAddr(V, 0);
     (void)Success; assert(Success && "Couldn't select *anything*?");
@@ -1869,7 +2159,7 @@ private:
                                             ExtAddrMode &AMBefore,
                                             ExtAddrMode &AMAfter);
   bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
-  bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion,
+  bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost,
                              Value *PromotedOperand) const;
 };
 
@@ -2003,7 +2293,7 @@ class TypePromotionHelper {
   /// \brief Utility function to promote the operand of \p Ext when this
   /// operand is a promotable trunc or sext or zext.
   /// \p PromotedInsts maps the instructions to their type before promotion.
-  /// \p CreatedInsts[out] contains how many non-free instructions have been
+  /// \p CreatedInstsCost[out] contains the cost of all instructions
   /// created to promote the operand of Ext.
   /// Newly added extensions are inserted in \p Exts.
   /// Newly added truncates are inserted in \p Truncs.
@@ -2011,53 +2301,55 @@ class TypePromotionHelper {
   /// \return The promoted value which is used instead of Ext.
   static Value *promoteOperandForTruncAndAnyExt(
       Instruction *Ext, TypePromotionTransaction &TPT,
-      InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+      InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
       SmallVectorImpl<Instruction *> *Exts,
-      SmallVectorImpl<Instruction *> *Truncs);
+      SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
 
   /// \brief Utility function to promote the operand of \p Ext when this
   /// operand is promotable and is not a supported trunc or sext.
   /// \p PromotedInsts maps the instructions to their type before promotion.
-  /// \p CreatedInsts[out] contains how many non-free instructions have been
+  /// \p CreatedInstsCost[out] contains the cost of all the instructions
   /// created to promote the operand of Ext.
   /// Newly added extensions are inserted in \p Exts.
   /// Newly added truncates are inserted in \p Truncs.
   /// Should never be called directly.
   /// \return The promoted value which is used instead of Ext.
-  static Value *
-  promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
-                         InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
-                         SmallVectorImpl<Instruction *> *Exts,
-                         SmallVectorImpl<Instruction *> *Truncs, bool IsSExt);
+  static Value *promoteOperandForOther(Instruction *Ext,
+                                       TypePromotionTransaction &TPT,
+                                       InstrToOrigTy &PromotedInsts,
+                                       unsigned &CreatedInstsCost,
+                                       SmallVectorImpl<Instruction *> *Exts,
+                                       SmallVectorImpl<Instruction *> *Truncs,
+                                       const TargetLowering &TLI, bool IsSExt);
 
   /// \see promoteOperandForOther.
-  static Value *
-  signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
-                            InstrToOrigTy &PromotedInsts,
-                            unsigned &CreatedInsts,
-                            SmallVectorImpl<Instruction *> *Exts,
-                            SmallVectorImpl<Instruction *> *Truncs) {
-    return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
-                                  Truncs, true);
+  static Value *signExtendOperandForOther(
+      Instruction *Ext, TypePromotionTransaction &TPT,
+      InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+      SmallVectorImpl<Instruction *> *Exts,
+      SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+    return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+                                  Exts, Truncs, TLI, true);
   }
 
   /// \see promoteOperandForOther.
-  static Value *
-  zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
-                            InstrToOrigTy &PromotedInsts,
-                            unsigned &CreatedInsts,
-                            SmallVectorImpl<Instruction *> *Exts,
-                            SmallVectorImpl<Instruction *> *Truncs) {
-    return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
-                                  Truncs, false);
+  static Value *zeroExtendOperandForOther(
+      Instruction *Ext, TypePromotionTransaction &TPT,
+      InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+      SmallVectorImpl<Instruction *> *Exts,
+      SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+    return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+                                  Exts, Truncs, TLI, false);
   }
 
 public:
   /// Type for the utility function that promotes the operand of Ext.
   typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
-                           InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+                           InstrToOrigTy &PromotedInsts,
+                           unsigned &CreatedInstsCost,
                            SmallVectorImpl<Instruction *> *Exts,
-                           SmallVectorImpl<Instruction *> *Truncs);
+                           SmallVectorImpl<Instruction *> *Truncs,
+                           const TargetLowering &TLI);
   /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
   /// action to promote the operand of \p Ext instead of using Ext.
   /// \return NULL if no promotable action is possible with the current
@@ -2174,16 +2466,18 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
 
 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
     llvm::Instruction *SExt, TypePromotionTransaction &TPT,
-    InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+    InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
     SmallVectorImpl<Instruction *> *Exts,
-    SmallVectorImpl<Instruction *> *Truncs) {
+    SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
   // By construction, the operand of SExt is an instruction. Otherwise we cannot
   // get through it and this method should not be called.
   Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
   Value *ExtVal = SExt;
+  bool HasMergedNonFreeExt = false;
   if (isa<ZExtInst>(SExtOpnd)) {
     // Replace s|zext(zext(opnd))
     // => zext(opnd).
+    HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
     Value *ZExt =
         TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
     TPT.replaceAllUsesWith(SExt, ZExt);
@@ -2194,7 +2488,7 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
     // => z|sext(opnd).
     TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
   }
-  CreatedInsts = 0;
+  CreatedInstsCost = 0;
 
   // Remove dead code.
   if (SExtOpnd->use_empty())
@@ -2203,8 +2497,11 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
   // Check if the extension is still needed.
   Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
   if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
-    if (ExtInst && Exts)
-      Exts->push_back(ExtInst);
+    if (ExtInst) {
+      if (Exts)
+        Exts->push_back(ExtInst);
+      CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
+    }
     return ExtVal;
   }
 
@@ -2217,13 +2514,14 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
 
 Value *TypePromotionHelper::promoteOperandForOther(
     Instruction *Ext, TypePromotionTransaction &TPT,
-    InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+    InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
     SmallVectorImpl<Instruction *> *Exts,
-    SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) {
+    SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
+    bool IsSExt) {
   // By construction, the operand of Ext is an instruction. Otherwise we cannot
   // get through it and this method should not be called.
   Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
-  CreatedInsts = 0;
+  CreatedInstsCost = 0;
   if (!ExtOpnd->hasOneUse()) {
     // ExtOpnd will be promoted.
     // All its uses, but Ext, will need to use a truncated value of the
@@ -2298,7 +2596,6 @@ Value *TypePromotionHelper::promoteOperandForOther(
         continue;
       }
       ExtForOpnd = cast<Instruction>(ValForExtOpnd);
-      ++CreatedInsts;
     }
     if (Exts)
       Exts->push_back(ExtForOpnd);
@@ -2307,6 +2604,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
     // Move the sign extension before the insertion point.
     TPT.moveBefore(ExtForOpnd, ExtOpnd);
     TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
+    CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
     // If more sext are required, new instructions will have to be created.
     ExtForOpnd = nullptr;
   }
@@ -2319,22 +2617,22 @@ Value *TypePromotionHelper::promoteOperandForOther(
 
 /// IsPromotionProfitable - Check whether or not promoting an instruction
 /// to a wider type was profitable.
-/// \p MatchedSize gives the number of instructions that have been matched
-/// in the addressing mode after the promotion was applied.
-/// \p SizeWithPromotion gives the number of created instructions for
-/// the promotion plus the number of instructions that have been
-/// matched in the addressing mode before the promotion.
+/// \p NewCost gives the cost of extension instructions created by the
+/// promotion.
+/// \p OldCost gives the cost of extension instructions before the promotion
+/// plus the number of instructions that have been
+/// matched in the addressing mode the promotion.
 /// \p PromotedOperand is the value that has been promoted.
 /// \return True if the promotion is profitable, false otherwise.
-bool
-AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
-                                             unsigned SizeWithPromotion,
-                                             Value *PromotedOperand) const {
-  // We folded less instructions than what we created to promote the operand.
+bool AddressingModeMatcher::IsPromotionProfitable(
+    unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
+  DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
+  // The cost of the new extensions is greater than the cost of the
+  // old extension plus what we folded.
   // This is not profitable.
-  if (MatchedSize < SizeWithPromotion)
+  if (NewCost > OldCost)
     return false;
-  if (MatchedSize > SizeWithPromotion)
+  if (NewCost < OldCost)
     return true;
   // The promotion is neutral but it may help folding the sign extension in
   // loads for instance.
@@ -2374,7 +2672,6 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
       return MatchAddr(AddrInst->getOperand(0), Depth);
     return false;
   case Instruction::BitCast:
-  case Instruction::AddrSpaceCast:
     // BitCast is always a noop, and we can handle it as long as it is
     // int->int or pointer->pointer (we don't want int<->fp or something).
     if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
@@ -2385,6 +2682,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
         AddrInst->getOperand(0)->getType() != AddrInst->getType())
       return MatchAddr(AddrInst->getOperand(0), Depth);
     return false;
+  case Instruction::AddrSpaceCast: {
+    unsigned SrcAS
+      = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
+    unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
+    if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  }
   case Instruction::Add: {
     // Check to see if we can merge in the RHS then the LHS.  If so, we win.
     ExtAddrMode BackupAddrMode = AddrMode;
@@ -2532,9 +2837,10 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
 
     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
         TPT.getRestorationPoint();
-    unsigned CreatedInsts = 0;
+    unsigned CreatedInstsCost = 0;
+    unsigned ExtCost = !TLI.isExtFree(Ext);
     Value *PromotedOperand =
-        TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr);
+        TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
     // SExt has been moved away.
     // Thus either it will be rematched later in the recursive calls or it is
     // gone. Anyway, we must not fold it into the addressing mode at this point.
@@ -2556,7 +2862,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     unsigned OldSize = AddrModeInsts.size();
 
     if (!MatchAddr(PromotedOperand, Depth) ||
-        !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts,
+        // The total of the new cost is equals to the cost of the created
+        // instructions.
+        // The total of the old cost is equals to the cost of the extension plus
+        // what we have saved in the addressing mode.
+        !IsPromotionProfitable(CreatedInstsCost,
+                               ExtCost + (AddrModeInsts.size() - OldSize),
                                PromotedOperand)) {
       AddrMode = BackupAddrMode;
       AddrModeInsts.resize(OldSize);
@@ -2658,13 +2969,17 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
 /// inline asm call are due to memory operands.  If so, return true, otherwise
 /// return false.
 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
-                                    const TargetLowering &TLI) {
-  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+                                    const TargetMachine &TM) {
+  const Function *F = CI->getParent()->getParent();
+  const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
+  const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
+  TargetLowering::AsmOperandInfoVector TargetConstraints =
+      TLI->ParseConstraints(TRI, ImmutableCallSite(CI));
   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
 
     // Compute the constraint code and ConstraintType to use.
-    TLI.ComputeConstraintToUse(OpInfo, SDValue());
+    TLI->ComputeConstraintToUse(OpInfo, SDValue());
 
     // If this asm operand is our Value*, and if it isn't an indirect memory
     // operand, we can't fold it!
@@ -2680,10 +2995,10 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a
 /// memory use.  If we find an obviously non-foldable instruction, return true.
 /// Add the ultimately found memory instructions to MemoryUses.
-static bool FindAllMemoryUses(Instruction *I,
-                SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
-                              SmallPtrSetImpl<Instruction*> &ConsideredInsts,
-                              const TargetLowering &TLI) {
+static bool FindAllMemoryUses(
+    Instruction *I,
+    SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
+    SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
   // If we already considered this instruction, we're done.
   if (!ConsideredInsts.insert(I).second)
     return false;
@@ -2713,12 +3028,12 @@ static bool FindAllMemoryUses(Instruction *I,
       if (!IA) return true;
 
       // If this is a memory operand, we're cool, otherwise bail out.
-      if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+      if (!IsOperandAMemoryOperand(CI, IA, I, TM))
         return true;
       continue;
     }
 
-    if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI))
+    if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
       return true;
   }
 
@@ -2806,7 +3121,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
   // uses.
   SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
   SmallPtrSet<Instruction*, 16> ConsideredInsts;
-  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
     return false;  // Has a non-memory, non-foldable use!
 
   // Now that we know that all uses of this instruction are part of a chain of
@@ -2831,7 +3146,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
     ExtAddrMode Result;
     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
         TPT.getRestorationPoint();
-    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy,
                                   MemoryInst, Result, InsertedTruncs,
                                   PromotedInsts, TPT);
     Matcher.IgnoreProfitability = true;
@@ -2906,15 +3221,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
 
     // For a PHI node, push all of its incoming values.
     if (PHINode *P = dyn_cast<PHINode>(V)) {
-      for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
-        worklist.push_back(P->getIncomingValue(i));
+      for (Value *IncValue : P->incoming_values())
+        worklist.push_back(IncValue);
       continue;
     }
 
     // For non-PHIs, determine the addressing mode being computed.
     SmallVector<Instruction*, 16> NewAddrModeInsts;
     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
-        V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet,
+        V, AccessTy, MemoryInst, NewAddrModeInsts, *TM, InsertedTruncsSet,
         PromotedInsts, TPT);
 
     // This check is broken into two cases with very similar code to avoid using
@@ -2989,8 +3304,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
                  << *MemoryInst << "\n");
     if (SunkAddr->getType() != Addr->getType())
       SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
-  } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
-               TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) {
+  } else if (AddrSinkUsingGEPs ||
+             (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
+              TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
+                  ->useAA())) {
     // By default, we use the GEP-based method when AA is used later. This
     // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
@@ -3041,7 +3358,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       return false;
     } else {
       Type *I8PtrTy =
-        Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+          Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+      Type *I8Ty = Builder.getInt8Ty();
 
       // Start with the base register. Do this first so that subsequent address
       // matching finds it last, which will prevent it from trying to match it
@@ -3093,7 +3411,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
           // SDAG consecutive load/store merging.
           if (ResultPtr->getType() != I8PtrTy)
             ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
-          ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+          ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
         }
 
         ResultIndex = V;
@@ -3104,7 +3422,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       } else {
         if (ResultPtr->getType() != I8PtrTy)
           ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
-        SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+        SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
       }
 
       if (SunkAddr->getType() != Addr->getType())
@@ -3213,8 +3531,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
   bool MadeChange = false;
 
+  const TargetRegisterInfo *TRI =
+      TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
   TargetLowering::AsmOperandInfoVector
-    TargetConstraints = TLI->ParseConstraints(CS);
+    TargetConstraints = TLI->ParseConstraints(TRI, CS);
   unsigned ArgNo = 0;
   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
@@ -3308,7 +3628,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
 bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
                                     LoadInst *&LI, Instruction *&Inst,
                                     const SmallVectorImpl<Instruction *> &Exts,
-                                    unsigned CreatedInsts = 0) {
+                                    unsigned CreatedInstsCost = 0) {
   // Iterate over all the extensions to see if one form an ext(load).
   for (auto I : Exts) {
     // Check if we directly have ext(load).
@@ -3330,10 +3650,11 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
         TPT.getRestorationPoint();
     SmallVector<Instruction *, 4> NewExts;
-    unsigned NewCreatedInsts = 0;
+    unsigned NewCreatedInstsCost = 0;
+    unsigned ExtCost = !TLI->isExtFree(I);
     // Promote.
-    Value *PromotedVal =
-        TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr);
+    Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
+                             &NewExts, nullptr, *TLI);
     assert(PromotedVal &&
            "TypePromotionHelper should have filtered out those cases");
 
@@ -3343,9 +3664,10 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
     // With exactly 2, the transformation is neutral, because we will merge
     // one extension but leave one. However, we optimistically keep going,
     // because the new extension may be removed too.
-    unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts;
+    long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
+    TotalCreatedInstsCost -= ExtCost;
     if (!StressExtLdPromotion &&
-        (TotalCreatedInsts > 1 ||
+        (TotalCreatedInstsCost > 1 ||
          !isPromotedInstructionLegal(*TLI, PromotedVal))) {
       // The promotion is not profitable, rollback to the previous state.
       TPT.rollback(LastKnownGood);
@@ -3353,8 +3675,8 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
     }
     // The promotion is profitable.
     // Check if it exposes an ext(load).
-    (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts);
-    if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 ||
+    (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
+    if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
                // If we have created a new extension, i.e., now we have two
                // extensions. We must make sure one of them is merged with
                // the load, otherwise we may degrade the code quality.
@@ -3969,148 +4291,6 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
   Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
 }
 
-// See if we can speculate calls to intrinsic cttz/ctlz.
-//
-// Example:
-// entry:
-//   ...
-//   %cmp = icmp eq i64 %val, 0
-//   br i1 %cmp, label %end.bb, label %then.bb
-//
-// then.bb:
-//   %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 true)
-//   br label %EndBB
-//
-// end.bb:
-//   %cond = phi i64 [ %c, %then.bb ], [ 64, %entry ]
-//
-// ==>
-//
-// entry:
-//   ...
-//   %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 false)
-//
-static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) {
-  assert(BrInst->isConditional() && "Expected a conditional branch!");
-  BasicBlock *ThenBB = BrInst->getSuccessor(1);
-  BasicBlock *EndBB = BrInst->getSuccessor(0);
-
-  // See if ThenBB contains only one instruction (excluding the
-  // terminator and DbgInfoIntrinsic calls).
-  IntrinsicInst *II = nullptr;
-  CastInst *CI = nullptr;
-  for (BasicBlock::iterator I = ThenBB->begin(),
-                            E = std::prev(ThenBB->end()); I != E; ++I) {
-    // Skip debug info.
-    if (isa<DbgInfoIntrinsic>(I))
-      continue;
-
-    // Check if this is a zero extension or a truncate of a previously
-    // matched call to intrinsic cttz/ctlz.
-    if (II) {
-      // Early exit if we already found a "free" zero extend/truncate.
-      if (CI)
-        return false;
-
-      Type *SrcTy = II->getType();
-      Type *DestTy = I->getType();
-      Value *V;
- 
-      if (match(cast<Instruction>(I), m_ZExt(m_Value(V))) && V == II) {
-        // Speculate this zero extend only if it is "free" for the target.
-        if (TLI.isZExtFree(SrcTy, DestTy)) {
-          CI = cast<CastInst>(I);
-          continue;
-        }
-      } else if (match(cast<Instruction>(I), m_Trunc(m_Value(V))) && V == II) {
-        // Speculate this truncate only if it is "free" for the target.
-        if (TLI.isTruncateFree(SrcTy, DestTy)) {
-          CI = cast<CastInst>(I);
-          continue;
-        }
-      } else {
-        // Avoid speculating more than one instruction.
-        return false;
-      }
-    }
-
-    // See if this is a call to intrinsic cttz/ctlz.
-    if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::cttz>())) {
-      // Avoid speculating expensive intrinsic calls.
-      if (!TLI.isCheapToSpeculateCttz())
-        return false;
-    }
-    else if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::ctlz>())) {
-      // Avoid speculating expensive intrinsic calls.
-      if (!TLI.isCheapToSpeculateCtlz())
-        return false;
-    } else
-      return false;
-    
-    II = cast<IntrinsicInst>(I);
-  }
-
-  // Look for PHI nodes with 'II' as the incoming value from 'ThenBB'.
-  BasicBlock *EntryBB = BrInst->getParent();
-  for (BasicBlock::iterator I = EndBB->begin();
-       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-    Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
-    Value *OrigV = PN->getIncomingValueForBlock(EntryBB);
-
-    if (!OrigV)
-      return false;
-
-    if (ThenV != II && (!CI || ThenV != CI))
-      return false;
-    
-    if (ConstantInt *CInt = dyn_cast<ConstantInt>(OrigV)) {
-      unsigned BitWidth = II->getType()->getIntegerBitWidth();
-
-      // Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz
-      // intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits
-      // of the value in input to the cttz/ctlz.
-      if (CInt->getValue() != BitWidth)
-        return false;
-
-      // Hoist the call to cttz/ctlz from ThenBB into EntryBB.
-      EntryBB->getInstList().splice(BrInst, ThenBB->getInstList(),
-                                    ThenBB->begin(), std::prev(ThenBB->end()));
- 
-      // Update PN setting ThenV as the incoming value from both 'EntryBB'
-      // and 'ThenBB'. Eventually, method 'OptimizeInst' will fold this
-      // phi node if all the incoming values are the same.
-      PN->setIncomingValue(PN->getBasicBlockIndex(EntryBB), ThenV);
-      PN->setIncomingValue(PN->getBasicBlockIndex(ThenBB), ThenV);
-
-      // Clear the 'undef on zero' flag of the cttz/ctlz intrinsic call.
-      if (cast<ConstantInt>(II->getArgOperand(1))->isOne()) {
-        Type *Ty = II->getArgOperand(0)->getType();
-        Value *Args[] = { II->getArgOperand(0),
-                          ConstantInt::getFalse(II->getContext()) };
-        Module *M = EntryBB->getParent()->getParent();
-        Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty);
-        IRBuilder<> Builder(II);
-        Instruction *NewI = Builder.CreateCall(IF, Args);
-
-        // Replace the old call to cttz/ctlz.
-        II->replaceAllUsesWith(NewI);
-        II->eraseFromParent();
-      }
- 
-      // Update BrInst condition so that the branch to EndBB is always taken.
-      // Later on, method 'ConstantFoldTerminator' will simplify this branch
-      // replacing it with a direct branch to 'EndBB'.
-      // As a side effect, CodeGenPrepare will attempt to simplify the control
-      // flow graph by deleting basic block 'ThenBB' and merging 'EntryBB' into
-      // 'EndBB' (calling method 'EliminateFallThrough').
-      BrInst->setCondition(ConstantInt::getTrue(BrInst->getContext()));
-      return true;
-    }
-  }
-
-  return false;
-}
-
 /// Some targets can do store(extractelement) with one instruction.
 /// Try to push the extractelement towards the stores when the target
 /// has this feature and this is profitable.
@@ -4171,8 +4351,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
     // It is possible for very late stage optimizations (such as SimplifyCFG)
     // to introduce PHI nodes too late to be cleaned up.  If we detect such a
     // trivial PHI, go ahead and zap it here.
-    if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr,
-                                       TLInfo, DT)) {
+    const DataLayout &DL = I->getModule()->getDataLayout();
+    if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) {
       P->replaceAllUsesWith(V);
       P->eraseFromParent();
       ++NumPHIsElim;
@@ -4263,34 +4443,6 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
   if (isa<ExtractElementInst>(I))
     return OptimizeExtractElementInst(I);
 
-  if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
-    if (TLI && BI->isConditional() && BI->getCondition()->hasOneUse()) {
-      // Check if the branch condition compares a value agaist zero.
-      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
-        if (ICI->getPredicate() == ICmpInst::ICMP_EQ &&
-            match(ICI->getOperand(1), m_Zero())) {
-          BasicBlock *ThenBB = BI->getSuccessor(1);
-          BasicBlock *EndBB = BI->getSuccessor(0);
-
-          // Check if ThenBB is only reachable from this basic block; also,
-          // check if EndBB has more than one predecessor.
-          if (ThenBB->getSinglePredecessor() &&
-              !EndBB->getSinglePredecessor()) {
-            TerminatorInst *TI = ThenBB->getTerminator();
-
-            if (TI->getNumSuccessors() == 1 && TI->getSuccessor(0) == EndBB &&
-                // Try to speculate calls to intrinsic cttz/ctlz from 'ThenBB'.
-                OptimizeBranchInst(BI, *TLI)) {
-              ModifiedDT = true;
-              return true;
-            }
-          }
-        }
-      }
-    }
-    return false;
-  }
-
   return false;
 }
 
@@ -4469,8 +4621,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
 ///
 bool CodeGenPrepare::splitBranchCondition(Function &F) {
-  if (!TM || TM->Options.EnableFastISel != true ||
-      !TLI || TLI->isJumpExpensive())
+  if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
     return false;
 
   bool MadeChange = false;
@@ -4631,10 +4782,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
       }
     }
 
-    // Request DOM Tree update.
     // Note: No point in getting fancy here, since the DT info is never
-    // available to CodeGenPrepare and the existing update code is broken
-    // anyways.
+    // available to CodeGenPrepare.
     ModifiedDT = true;
 
     MadeChange = true;
diff --git a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
new file mode 100644
index 0000000..28c97ba
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
@@ -0,0 +1,54 @@
+//===-- CoreCLRGC.cpp - CoreCLR Runtime GC Strategy -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a GCStrategy for the CoreCLR Runtime.
+// The strategy is similar to Statepoint-example GC, but differs from it in
+// certain aspects, such as:
+// 1) Base-pointers need not be explicitly tracked and reported for
+//    interior pointers
+// 2) Uses a different format for encoding stack-maps
+// 3) Location of Safe-point polls: polls are only needed before loop-back edges
+//    and before tail-calls (not needed at function-entry)
+//
+// The above differences in behavior are to be implemented in upcoming checkins.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Value.h"
+
+using namespace llvm;
+
+namespace {
+class CoreCLRGC : public GCStrategy {
+public:
+  CoreCLRGC() {
+    UseStatepoints = true;
+    // These options are all gc.root specific, we specify them so that the
+    // gc.root lowering code doesn't run.
+    InitRoots = false;
+    NeededSafePoints = 0;
+    UsesMetadata = false;
+    CustomRoots = false;
+  }
+  Optional<bool> isGCManagedPointer(const Value *V) const override {
+    // Method is only valid on pointer typed values.
+    PointerType *PT = cast<PointerType>(V->getType());
+    // We pick addrspace(1) as our GC managed heap.
+    return (1 == PT->getAddressSpace());
+  }
+};
+}
+
+static GCRegistry::Add<CoreCLRGC> X("coreclr", "CoreCLR-compatible GC");
+
+namespace llvm {
+void linkCoreCLRGC() {}
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
index ceef74d..af011a0 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -69,7 +69,7 @@ class TargetRegisterInfo;
 
   public:
     CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
-    ~CriticalAntiDepBreaker();
+    ~CriticalAntiDepBreaker() override;
 
     /// Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB) override;
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index c17a35d..963d573 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -65,7 +65,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
 
   // Don't delete instructions with side effects.
   bool SawStore = false;
-  if (!MI->isSafeToMove(TII, nullptr, SawStore) && !MI->isPHI())
+  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI())
     return false;
 
   // Examine each operand.
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index 75b74d9..42656fb 100644
--- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -13,19 +13,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "dwarfehprepare"
@@ -39,15 +39,28 @@ namespace {
     // RewindFunction - _Unwind_Resume or the target equivalent.
     Constant *RewindFunction;
 
+    DominatorTree *DT;
+    const TargetLowering *TLI;
+
     bool InsertUnwindResumeCalls(Function &Fn);
     Value *GetExceptionObject(ResumeInst *RI);
+    size_t
+    pruneUnreachableResumes(Function &Fn,
+                            SmallVectorImpl<ResumeInst *> &Resumes,
+                            SmallVectorImpl<LandingPadInst *> &CleanupLPads);
 
   public:
     static char ID; // Pass identification, replacement for typeid.
+
+    // INITIALIZE_TM_PASS requires a default constructor, but it isn't used in
+    // practice.
+    DwarfEHPrepare()
+        : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr), DT(nullptr),
+          TLI(nullptr) {}
+
     DwarfEHPrepare(const TargetMachine *TM)
-        : FunctionPass(ID), TM(TM), RewindFunction(nullptr) {
-      initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
-    }
+        : FunctionPass(ID), TM(TM), RewindFunction(nullptr), DT(nullptr),
+          TLI(nullptr) {}
 
     bool runOnFunction(Function &Fn) override;
 
@@ -56,7 +69,7 @@ namespace {
       return false;
     }
 
-    void getAnalysisUsage(AnalysisUsage &AU) const override { }
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
 
     const char *getPassName() const override {
       return "Exception handling preparation";
@@ -65,11 +78,22 @@ namespace {
 } // end anonymous namespace
 
 char DwarfEHPrepare::ID = 0;
+INITIALIZE_TM_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare",
+                         "Prepare DWARF exceptions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_TM_PASS_END(DwarfEHPrepare, "dwarfehprepare",
+                       "Prepare DWARF exceptions", false, false)
 
 FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) {
   return new DwarfEHPrepare(TM);
 }
 
+void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetTransformInfoWrapperPass>();
+  AU.addRequired<DominatorTreeWrapperPass>();
+}
+
 /// GetExceptionObject - Return the exception object from the value passed into
 /// the 'resume' instruction (typically an aggregate). Clean up any dead
 /// instructions, including the 'resume' instruction.
@@ -99,34 +123,93 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
   RI->eraseFromParent();
 
   if (EraseIVIs) {
-    if (SelIVI->getNumUses() == 0)
+    if (SelIVI->use_empty())
       SelIVI->eraseFromParent();
-    if (ExcIVI->getNumUses() == 0)
+    if (ExcIVI->use_empty())
       ExcIVI->eraseFromParent();
-    if (SelLoad && SelLoad->getNumUses() == 0)
+    if (SelLoad && SelLoad->use_empty())
       SelLoad->eraseFromParent();
   }
 
   return ExnObj;
 }
 
+/// Replace resumes that are not reachable from a cleanup landing pad with
+/// unreachable and then simplify those blocks.
+size_t DwarfEHPrepare::pruneUnreachableResumes(
+    Function &Fn, SmallVectorImpl<ResumeInst *> &Resumes,
+    SmallVectorImpl<LandingPadInst *> &CleanupLPads) {
+  BitVector ResumeReachable(Resumes.size());
+  size_t ResumeIndex = 0;
+  for (auto *RI : Resumes) {
+    for (auto *LP : CleanupLPads) {
+      if (isPotentiallyReachable(LP, RI, DT)) {
+        ResumeReachable.set(ResumeIndex);
+        break;
+      }
+    }
+    ++ResumeIndex;
+  }
+
+  // If everything is reachable, there is no change.
+  if (ResumeReachable.all())
+    return Resumes.size();
+
+  const TargetTransformInfo &TTI =
+      getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
+  LLVMContext &Ctx = Fn.getContext();
+
+  // Otherwise, insert unreachable instructions and call simplifycfg.
+  size_t ResumesLeft = 0;
+  for (size_t I = 0, E = Resumes.size(); I < E; ++I) {
+    ResumeInst *RI = Resumes[I];
+    if (ResumeReachable[I]) {
+      Resumes[ResumesLeft++] = RI;
+    } else {
+      BasicBlock *BB = RI->getParent();
+      new UnreachableInst(Ctx, RI);
+      RI->eraseFromParent();
+      SimplifyCFG(BB, TTI, 1);
+    }
+  }
+  Resumes.resize(ResumesLeft);
+  return ResumesLeft;
+}
+
 /// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
 /// into calls to the appropriate _Unwind_Resume function.
 bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
   SmallVector<ResumeInst*, 16> Resumes;
-  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
-    TerminatorInst *TI = I->getTerminator();
-    if (ResumeInst *RI = dyn_cast<ResumeInst>(TI))
+  SmallVector<LandingPadInst*, 16> CleanupLPads;
+  bool FoundLP = false;
+  for (BasicBlock &BB : Fn) {
+    if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator()))
       Resumes.push_back(RI);
+    if (auto *LP = BB.getLandingPadInst()) {
+      if (LP->isCleanup())
+        CleanupLPads.push_back(LP);
+      // Check the personality on the first landingpad. Don't do anything if
+      // it's for MSVC.
+      if (!FoundLP) {
+        FoundLP = true;
+        EHPersonality Pers = classifyEHPersonality(LP->getPersonalityFn());
+        if (isMSVCEHPersonality(Pers))
+          return false;
+      }
+    }
   }
 
   if (Resumes.empty())
     return false;
 
+  LLVMContext &Ctx = Fn.getContext();
+
+  size_t ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads);
+  if (ResumesLeft == 0)
+    return true; // We pruned them all.
+
   // Find the rewind function if we didn't already.
-  const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
   if (!RewindFunction) {
-    LLVMContext &Ctx = Resumes[0]->getContext();
     FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
                                           Type::getInt8PtrTy(Ctx), false);
     const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
@@ -134,10 +217,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
   }
 
   // Create the basic block where the _Unwind_Resume call will live.
-  LLVMContext &Ctx = Fn.getContext();
-  unsigned ResumesSize = Resumes.size();
-
-  if (ResumesSize == 1) {
+  if (ResumesLeft == 1) {
     // Instead of creating a new BB and PHI node, just append the call to
     // _Unwind_Resume to the end of the single resume block.
     ResumeInst *RI = Resumes.front();
@@ -154,14 +234,12 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
   }
 
   BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
-  PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize,
+  PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft,
                                 "exn.obj", UnwindBB);
 
   // Extract the exception object from the ResumeInst and add it to the PHI node
   // that feeds the _Unwind_Resume call.
-  for (SmallVectorImpl<ResumeInst*>::iterator
-         I = Resumes.begin(), E = Resumes.end(); I != E; ++I) {
-    ResumeInst *RI = *I;
+  for (ResumeInst *RI : Resumes) {
     BasicBlock *Parent = RI->getParent();
     BranchInst::Create(UnwindBB, Parent);
 
@@ -181,6 +259,11 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
 }
 
 bool DwarfEHPrepare::runOnFunction(Function &Fn) {
+  assert(TM && "DWARF EH preparation requires a target machine");
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
   bool Changed = InsertUnwindResumeCalls(Fn);
+  DT = nullptr;
+  TLI = nullptr;
   return Changed;
 }
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 995606f..092b7f8 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -220,7 +220,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
 
     // We never speculate stores, so an AA pointer isn't necessary.
     bool DontMoveAcrossStore = true;
-    if (!I->isSafeToMove(TII, nullptr, DontMoveAcrossStore)) {
+    if (!I->isSafeToMove(nullptr, DontMoveAcrossStore)) {
       DEBUG(dbgs() << "Can't speculate: " << *I);
       return false;
     }
@@ -777,15 +777,13 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
                << "********** Function: " << MF.getName() << '\n');
   // Only run if conversion if the target wants it.
-  if (!MF.getTarget()
-           .getSubtarget<TargetSubtargetInfo>()
-           .enableEarlyIfConversion())
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  if (!STI.enableEarlyIfConversion())
     return false;
 
-  TII = MF.getSubtarget().getInstrInfo();
-  TRI = MF.getSubtarget().getRegisterInfo();
-  SchedModel =
-    MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
+  TII = STI.getInstrInfo();
+  TRI = STI.getRegisterInfo();
+  SchedModel = STI.getSchedModel();
   MRI = &MF.getRegInfo();
   DomTree = &getAnalysis<MachineDominatorTree>();
   Loops = getAnalysisIfAvailable<MachineLoopInfo>();
@@ -799,9 +797,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
   // if-conversion in a single pass. The tryConvertIf() function may erase
   // blocks, but only blocks dominated by the head block. This makes it safe to
   // update the dominator tree while the post-order iterator is still active.
-  for (po_iterator<MachineDominatorTree*>
-       I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I)
-    if (tryConvertIf(I->getBlock()))
+  for (auto DomNode : post_order(DomTree))
+    if (tryConvertIf(DomNode->getBlock()))
       Changed = true;
 
   return Changed;
diff --git a/contrib/llvm/lib/CodeGen/ErlangGC.cpp b/contrib/llvm/lib/CodeGen/ErlangGC.cpp
index 85b0893..024946d 100644
--- a/contrib/llvm/lib/CodeGen/ErlangGC.cpp
+++ b/contrib/llvm/lib/CodeGen/ErlangGC.cpp
@@ -27,56 +27,20 @@ using namespace llvm;
 
 namespace {
 
-  class ErlangGC : public GCStrategy {
-    MCSymbol *InsertLabel(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MI,
-                          DebugLoc DL) const;
-  public:
-    ErlangGC();
-    bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) override;
-  };
-
+class ErlangGC : public GCStrategy {
+public:
+  ErlangGC();
+};
 }
 
-static GCRegistry::Add<ErlangGC>
-X("erlang", "erlang-compatible garbage collector");
+static GCRegistry::Add<ErlangGC> X("erlang",
+                                   "erlang-compatible garbage collector");
 
-void llvm::linkErlangGC() { }
+void llvm::linkErlangGC() {}
 
 ErlangGC::ErlangGC() {
   InitRoots = false;
   NeededSafePoints = 1 << GC::PostCall;
   UsesMetadata = true;
   CustomRoots = false;
-  CustomSafePoints = true;
-}
-
-MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator MI,
-                                DebugLoc DL) const {
-  const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
-  MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
-  BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
-  return Label;
-}
-
-bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) {
-  for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
-       ++BBI)
-    for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
-         MI != ME; ++MI)
-
-      if (MI->getDesc().isCall()) {
-
-        // Do not treat tail call sites as safe points.
-        if (MI->getDesc().isTerminator())
-          continue;
-
-        /* Code copied from VisitCallPoint(...) */
-        MachineBasicBlock::iterator RAI = MI; ++RAI;
-        MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc());
-        FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc());
-      }
-
-  return false;
 }
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index b3a22c8..5b09cf1 100644
--- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -113,7 +113,7 @@ struct DomainValue {
 }
 
 namespace {
-/// LiveReg - Information about a live register.
+/// Information about a live register.
 struct LiveReg {
   /// Value currently in this register, or NULL when no value is being tracked.
   /// This counts as a DomainValue reference.
@@ -125,7 +125,7 @@ struct LiveReg {
   /// will be a negative number.
   int Def;
 };
-} // anonynous namespace
+} // anonymous namespace
 
 namespace {
 class ExeDepsFix : public MachineFunctionPass {
@@ -174,7 +174,7 @@ public:
 
 private:
   iterator_range<SmallVectorImpl<int>::const_iterator>
-  regIndizes(unsigned Reg) const;
+  regIndices(unsigned Reg) const;
 
   // DomainValue allocation.
   DomainValue *alloc(int domain = -1);
@@ -205,10 +205,10 @@ private:
 
 char ExeDepsFix::ID = 0;
 
-/// Translate TRI register number to a list of indizes into our stmaller tables
+/// Translate TRI register number to a list of indices into our smaller tables
 /// of interesting registers.
 iterator_range<SmallVectorImpl<int>::const_iterator>
-ExeDepsFix::regIndizes(unsigned Reg) const {
+ExeDepsFix::regIndices(unsigned Reg) const {
   assert(Reg < AliasMap.size() && "Invalid register");
   const auto &Entry = AliasMap[Reg];
   return make_range(Entry.begin(), Entry.end());
@@ -225,7 +225,7 @@ DomainValue *ExeDepsFix::alloc(int domain) {
   return dv;
 }
 
-/// release - Release a reference to DV.  When the last reference is released,
+/// Release a reference to DV.  When the last reference is released,
 /// collapse if needed.
 void ExeDepsFix::release(DomainValue *DV) {
   while (DV) {
@@ -245,8 +245,8 @@ void ExeDepsFix::release(DomainValue *DV) {
   }
 }
 
-/// resolve - Follow the chain of dead DomainValues until a live DomainValue is
-/// reached.  Update the referenced pointer when necessary.
+/// Follow the chain of dead DomainValues until a live DomainValue is reached.
+/// Update the referenced pointer when necessary.
 DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
   DomainValue *DV = DVRef;
   if (!DV || !DV->Next)
@@ -325,8 +325,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
         setLiveReg(rx, alloc(domain));
 }
 
-/// Merge - All instructions and registers in B are moved to A, and B is
-/// released.
+/// All instructions and registers in B are moved to A, and B is released.
 bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
   assert(!A->isCollapsed() && "Cannot merge into collapsed");
   assert(!B->isCollapsed() && "Cannot merge from collapsed");
@@ -352,7 +351,7 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
   return true;
 }
 
-// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values.
+/// Set up LiveRegs by merging predecessor live-out values.
 void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
   // Detect back-edges from predecessors we haven't processed yet.
   SeenUnknownBackEdge = false;
@@ -378,7 +377,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
   if (MBB->pred_empty()) {
     for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
          e = MBB->livein_end(); i != e; ++i) {
-      for (int rx : regIndizes(*i)) {
+      for (int rx : regIndices(*i)) {
         // Treat function live-ins as if they were defined just before the first
         // instruction.  Usually, function arguments are set up immediately
         // before the call.
@@ -475,7 +474,7 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
 bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
                                        unsigned Pref) {
   unsigned reg = MI->getOperand(OpIdx).getReg();
-  for (int rx : regIndizes(reg)) {
+  for (int rx : regIndices(reg)) {
     unsigned Clearance = CurInstr - LiveRegs[rx].Def;
     DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
 
@@ -521,7 +520,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
       break;
     if (MO.isUse())
       continue;
-    for (int rx : regIndizes(MO.getReg())) {
+    for (int rx : regIndices(MO.getReg())) {
       // This instruction explicitly defines rx.
       DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
                    << '\t' << *MI);
@@ -587,7 +586,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
                 e = mi->getDesc().getNumOperands(); i != e; ++i) {
     MachineOperand &mo = mi->getOperand(i);
     if (!mo.isReg()) continue;
-    for (int rx : regIndizes(mo.getReg())) {
+    for (int rx : regIndices(mo.getReg())) {
       force(rx, domain);
     }
   }
@@ -596,7 +595,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
   for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
     MachineOperand &mo = mi->getOperand(i);
     if (!mo.isReg()) continue;
-    for (int rx : regIndizes(mo.getReg())) {
+    for (int rx : regIndices(mo.getReg())) {
       kill(rx);
       force(rx, domain);
     }
@@ -616,7 +615,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
                   e = mi->getDesc().getNumOperands(); i != e; ++i) {
       MachineOperand &mo = mi->getOperand(i);
       if (!mo.isReg()) continue;
-      for (int rx : regIndizes(mo.getReg())) {
+      for (int rx : regIndices(mo.getReg())) {
         DomainValue *dv = LiveRegs[rx].Value;
         if (dv == nullptr)
           continue;
@@ -712,7 +711,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
                                   ii != ee; ++ii) {
     MachineOperand &mo = *ii;
     if (!mo.isReg()) continue;
-    for (int rx : regIndizes(mo.getReg())) {
+    for (int rx : regIndices(mo.getReg())) {
       if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
         kill(rx);
         setLiveReg(rx, dv);
diff --git a/contrib/llvm/lib/CodeGen/ForwardControlFlowIntegrity.cpp b/contrib/llvm/lib/CodeGen/ForwardControlFlowIntegrity.cpp
deleted file mode 100644
index 63c3699..0000000
--- a/contrib/llvm/lib/CodeGen/ForwardControlFlowIntegrity.cpp
+++ /dev/null
@@ -1,374 +0,0 @@
-//===-- ForwardControlFlowIntegrity.cpp: Forward-Edge CFI -----------------===//
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief A pass that instruments code with fast checks for indirect calls and
-/// hooks for a function to check violations.
-///
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "cfi"
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
-#include "llvm/CodeGen/ForwardControlFlowIntegrity.h"
-#include "llvm/CodeGen/JumpInstrTables.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-STATISTIC(NumCFIIndirectCalls,
-          "Number of indirect call sites rewritten by the CFI pass");
-
-char ForwardControlFlowIntegrity::ID = 0;
-INITIALIZE_PASS_BEGIN(ForwardControlFlowIntegrity, "forward-cfi",
-                      "Control-Flow Integrity", true, true)
-INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo);
-INITIALIZE_PASS_DEPENDENCY(JumpInstrTables);
-INITIALIZE_PASS_END(ForwardControlFlowIntegrity, "forward-cfi",
-                    "Control-Flow Integrity", true, true)
-
-ModulePass *llvm::createForwardControlFlowIntegrityPass() {
-  return new ForwardControlFlowIntegrity();
-}
-
-ModulePass *llvm::createForwardControlFlowIntegrityPass(
-    JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing,
-    StringRef CFIFuncName) {
-  return new ForwardControlFlowIntegrity(JTT, CFIType, CFIEnforcing,
-                                         CFIFuncName);
-}
-
-// Checks to see if a given CallSite is making an indirect call, including
-// cases where the indirect call is made through a bitcast.
-static bool isIndirectCall(CallSite &CS) {
-  if (CS.getCalledFunction())
-    return false;
-
-  // Check the value to see if it is merely a bitcast of a function. In
-  // this case, it will translate to a direct function call in the resulting
-  // assembly, so we won't treat it as an indirect call here.
-  const Value *V = CS.getCalledValue();
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    return !(CE->isCast() && isa<Function>(CE->getOperand(0)));
-  }
-
-  // Otherwise, since we know it's a call, it must be an indirect call
-  return true;
-}
-
-static const char cfi_failure_func_name[] = "__llvm_cfi_pointer_warning";
-
-ForwardControlFlowIntegrity::ForwardControlFlowIntegrity()
-    : ModulePass(ID), IndirectCalls(), JTType(JumpTable::Single),
-      CFIType(CFIntegrity::Sub), CFIEnforcing(false), CFIFuncName("") {
-  initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry());
-}
-
-ForwardControlFlowIntegrity::ForwardControlFlowIntegrity(
-    JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing,
-    std::string CFIFuncName)
-    : ModulePass(ID), IndirectCalls(), JTType(JTT), CFIType(CFIType),
-      CFIEnforcing(CFIEnforcing), CFIFuncName(CFIFuncName) {
-  initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry());
-}
-
-ForwardControlFlowIntegrity::~ForwardControlFlowIntegrity() {}
-
-void ForwardControlFlowIntegrity::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<JumpInstrTableInfo>();
-  AU.addRequired<JumpInstrTables>();
-}
-
-void ForwardControlFlowIntegrity::getIndirectCalls(Module &M) {
-  // To get the indirect calls, we iterate over all functions and iterate over
-  // the list of basic blocks in each. We extract a total list of indirect calls
-  // before modifying any of them, since our modifications will modify the list
-  // of basic blocks.
-  for (Function &F : M) {
-    for (BasicBlock &BB : F) {
-      for (Instruction &I : BB) {
-        CallSite CS(&I);
-        if (!(CS && isIndirectCall(CS)))
-          continue;
-
-        Value *CalledValue = CS.getCalledValue();
-
-        // Don't rewrite this instruction if the indirect call is actually just
-        // inline assembly, since our transformation will generate an invalid
-        // module in that case.
-        if (isa<InlineAsm>(CalledValue))
-          continue;
-
-        IndirectCalls.push_back(&I);
-      }
-    }
-  }
-}
-
-void ForwardControlFlowIntegrity::updateIndirectCalls(Module &M,
-                                                      CFITables &CFIT) {
-  Type *Int64Ty = Type::getInt64Ty(M.getContext());
-  for (Instruction *I : IndirectCalls) {
-    CallSite CS(I);
-    Value *CalledValue = CS.getCalledValue();
-
-    // Get the function type for this call and look it up in the tables.
-    Type *VTy = CalledValue->getType();
-    PointerType *PTy = dyn_cast<PointerType>(VTy);
-    Type *EltTy = PTy->getElementType();
-    FunctionType *FunTy = dyn_cast<FunctionType>(EltTy);
-    FunctionType *TransformedTy = JumpInstrTables::transformType(JTType, FunTy);
-    ++NumCFIIndirectCalls;
-    Constant *JumpTableStart = nullptr;
-    Constant *JumpTableMask = nullptr;
-    Constant *JumpTableSize = nullptr;
-
-    // Some call sites have function types that don't correspond to any
-    // address-taken function in the module. This happens when function pointers
-    // are passed in from external code.
-    auto it = CFIT.find(TransformedTy);
-    if (it == CFIT.end()) {
-      // In this case, make sure that the function pointer will change by
-      // setting the mask and the start to be 0 so that the transformed
-      // function is 0.
-      JumpTableStart = ConstantInt::get(Int64Ty, 0);
-      JumpTableMask = ConstantInt::get(Int64Ty, 0);
-      JumpTableSize = ConstantInt::get(Int64Ty, 0);
-    } else {
-      JumpTableStart = it->second.StartValue;
-      JumpTableMask = it->second.MaskValue;
-      JumpTableSize = it->second.Size;
-    }
-
-    rewriteFunctionPointer(M, I, CalledValue, JumpTableStart, JumpTableMask,
-                           JumpTableSize);
-  }
-
-  return;
-}
-
-bool ForwardControlFlowIntegrity::runOnModule(Module &M) {
-  JumpInstrTableInfo *JITI = &getAnalysis<JumpInstrTableInfo>();
-  Type *Int64Ty = Type::getInt64Ty(M.getContext());
-  Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
-
-  // JumpInstrTableInfo stores information about the alignment of each entry.
-  // The alignment returned by JumpInstrTableInfo is alignment in bytes, not
-  // in the exponent.
-  ByteAlignment = JITI->entryByteAlignment();
-  LogByteAlignment = llvm::Log2_64(ByteAlignment);
-
-  // Set up tables for control-flow integrity based on information about the
-  // jump-instruction tables.
-  CFITables CFIT;
-  for (const auto &KV : JITI->getTables()) {
-    uint64_t Size = static_cast<uint64_t>(KV.second.size());
-    uint64_t TableSize = NextPowerOf2(Size);
-
-    int64_t MaskValue = ((TableSize << LogByteAlignment) - 1) & -ByteAlignment;
-    Constant *JumpTableMaskValue = ConstantInt::get(Int64Ty, MaskValue);
-    Constant *JumpTableSize = ConstantInt::get(Int64Ty, Size);
-
-    // The base of the table is defined to be the first jumptable function in
-    // the table.
-    Function *First = KV.second.begin()->second;
-    Constant *JumpTableStartValue = ConstantExpr::getBitCast(First, VoidPtrTy);
-    CFIT[KV.first].StartValue = JumpTableStartValue;
-    CFIT[KV.first].MaskValue = JumpTableMaskValue;
-    CFIT[KV.first].Size = JumpTableSize;
-  }
-
-  if (CFIT.empty())
-    return false;
-
-  getIndirectCalls(M);
-
-  if (!CFIEnforcing) {
-    addWarningFunction(M);
-  }
-
-  // Update the instructions with the check and the indirect jump through our
-  // table.
-  updateIndirectCalls(M, CFIT);
-
-  return true;
-}
-
-void ForwardControlFlowIntegrity::addWarningFunction(Module &M) {
-  PointerType *CharPtrTy = Type::getInt8PtrTy(M.getContext());
-
-  // Get the type of the Warning Function: void (i8*, i8*),
-  // where the first argument is the name of the function in which the violation
-  // occurs, and the second is the function pointer that violates CFI.
-  SmallVector<Type *, 2> WarningFunArgs;
-  WarningFunArgs.push_back(CharPtrTy);
-  WarningFunArgs.push_back(CharPtrTy);
-  FunctionType *WarningFunTy =
-      FunctionType::get(Type::getVoidTy(M.getContext()), WarningFunArgs, false);
-
-  if (!CFIFuncName.empty()) {
-    Constant *FailureFun = M.getOrInsertFunction(CFIFuncName, WarningFunTy);
-    if (!FailureFun)
-      report_fatal_error("Could not get or insert the function specified by"
-                         " -cfi-func-name");
-  } else {
-    // The default warning function swallows the warning and lets the call
-    // continue, since there's no generic way for it to print out this
-    // information.
-    Function *WarningFun = M.getFunction(cfi_failure_func_name);
-    if (!WarningFun) {
-      WarningFun =
-          Function::Create(WarningFunTy, GlobalValue::LinkOnceAnyLinkage,
-                           cfi_failure_func_name, &M);
-    }
-
-    BasicBlock *Entry =
-        BasicBlock::Create(M.getContext(), "entry", WarningFun, 0);
-    ReturnInst::Create(M.getContext(), Entry);
-  }
-}
-
-void ForwardControlFlowIntegrity::rewriteFunctionPointer(
-    Module &M, Instruction *I, Value *FunPtr, Constant *JumpTableStart,
-    Constant *JumpTableMask, Constant *JumpTableSize) {
-  IRBuilder<> TempBuilder(I);
-
-  Type *OrigFunType = FunPtr->getType();
-
-  BasicBlock *CurBB = cast<BasicBlock>(I->getParent());
-  Function *CurF = cast<Function>(CurBB->getParent());
-  Type *Int64Ty = Type::getInt64Ty(M.getContext());
-
-  Value *TI = TempBuilder.CreatePtrToInt(FunPtr, Int64Ty);
-  Value *TStartInt = TempBuilder.CreatePtrToInt(JumpTableStart, Int64Ty);
-
-  Value *NewFunPtr = nullptr;
-  Value *Check = nullptr;
-  switch (CFIType) {
-  case CFIntegrity::Sub: {
-    // This is the subtract, mask, and add version.
-    // Subtract from the base.
-    Value *Sub = TempBuilder.CreateSub(TI, TStartInt);
-
-    // Mask the difference to force this to be a table offset.
-    Value *And = TempBuilder.CreateAnd(Sub, JumpTableMask);
-
-    // Add it back to the base.
-    Value *Result = TempBuilder.CreateAdd(And, TStartInt);
-
-    // Convert it back into a function pointer that we can call.
-    NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType);
-    break;
-  }
-  case CFIntegrity::Ror: {
-    // This is the subtract and rotate version.
-    // Rotate right by the alignment value. The optimizer should recognize
-    // this sequence as a rotation.
-
-    // This cast is safe, since unsigned is always a subset of uint64_t.
-    uint64_t LogByteAlignment64 = static_cast<uint64_t>(LogByteAlignment);
-    Constant *RightShift = ConstantInt::get(Int64Ty, LogByteAlignment64);
-    Constant *LeftShift = ConstantInt::get(Int64Ty, 64 - LogByteAlignment64);
-
-    // Subtract from the base.
-    Value *Sub = TempBuilder.CreateSub(TI, TStartInt);
-
-    // Create the equivalent of a rotate-right instruction.
-    Value *Shr = TempBuilder.CreateLShr(Sub, RightShift);
-    Value *Shl = TempBuilder.CreateShl(Sub, LeftShift);
-    Value *Or = TempBuilder.CreateOr(Shr, Shl);
-
-    // Perform unsigned comparison to check for inclusion in the table.
-    Check = TempBuilder.CreateICmpULT(Or, JumpTableSize);
-    NewFunPtr = FunPtr;
-    break;
-  }
-  case CFIntegrity::Add: {
-    // This is the mask and add version.
-    // Mask the function pointer to turn it into an offset into the table.
-    Value *And = TempBuilder.CreateAnd(TI, JumpTableMask);
-
-    // Then or this offset to the base and get the pointer value.
-    Value *Result = TempBuilder.CreateAdd(And, TStartInt);
-
-    // Convert it back into a function pointer that we can call.
-    NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType);
-    break;
-  }
-  }
-
-  if (!CFIEnforcing) {
-    // If a check hasn't been added (in the rotation version), then check to see
-    // if it's the same as the original function. This check determines whether
-    // or not we call the CFI failure function.
-    if (!Check)
-      Check = TempBuilder.CreateICmpEQ(NewFunPtr, FunPtr);
-    BasicBlock *InvalidPtrBlock =
-        BasicBlock::Create(M.getContext(), "invalid.ptr", CurF, 0);
-    BasicBlock *ContinuationBB = CurBB->splitBasicBlock(I);
-
-    // Remove the unconditional branch that connects the two blocks.
-    TerminatorInst *TermInst = CurBB->getTerminator();
-    TermInst->eraseFromParent();
-
-    // Add a conditional branch that depends on the Check above.
-    BranchInst::Create(ContinuationBB, InvalidPtrBlock, Check, CurBB);
-
-    // Call the warning function for this pointer, then continue.
-    Instruction *BI = BranchInst::Create(ContinuationBB, InvalidPtrBlock);
-    insertWarning(M, InvalidPtrBlock, BI, FunPtr);
-  } else {
-    // Modify the instruction to call this value.
-    CallSite CS(I);
-    CS.setCalledFunction(NewFunPtr);
-  }
-}
-
-void ForwardControlFlowIntegrity::insertWarning(Module &M, BasicBlock *Block,
-                                                Instruction *I, Value *FunPtr) {
-  Function *ParentFun = cast<Function>(Block->getParent());
-
-  // Get the function to call right before the instruction.
-  Function *WarningFun = nullptr;
-  if (CFIFuncName.empty()) {
-    WarningFun = M.getFunction(cfi_failure_func_name);
-  } else {
-    WarningFun = M.getFunction(CFIFuncName);
-  }
-
-  assert(WarningFun && "Could not find the CFI failure function");
-
-  Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
-
-  IRBuilder<> WarningInserter(I);
-  // Create a mergeable GlobalVariable containing the name of the function.
-  Value *ParentNameGV =
-      WarningInserter.CreateGlobalString(ParentFun->getName());
-  Value *ParentNamePtr = WarningInserter.CreateBitCast(ParentNameGV, VoidPtrTy);
-  Value *FunVoidPtr = WarningInserter.CreateBitCast(FunPtr, VoidPtrTy);
-  WarningInserter.CreateCall2(WarningFun, ParentNamePtr, FunVoidPtr);
-}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
index 6101c67..c8116a4 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -24,22 +24,20 @@
 using namespace llvm;
 
 namespace {
-  
-  class Printer : public FunctionPass {
-    static char ID;
-    raw_ostream &OS;
-    
-  public:
-    explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
 
+class Printer : public FunctionPass {
+  static char ID;
+  raw_ostream &OS;
 
-    const char *getPassName() const override;
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
+public:
+  explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
 
-    bool runOnFunction(Function &F) override;
-    bool doFinalization(Module &M) override;
-  };
+  const char *getPassName() const override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
 
+  bool runOnFunction(Function &F) override;
+  bool doFinalization(Module &M) override;
+};
 }
 
 INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
@@ -48,7 +46,7 @@ INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
 // -----------------------------------------------------------------------------
 
 GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
-  : F(F), S(S), FrameSize(~0LL) {}
+    : F(F), S(S), FrameSize(~0LL) {}
 
 GCFunctionInfo::~GCFunctionInfo() {}
 
@@ -56,41 +54,19 @@ GCFunctionInfo::~GCFunctionInfo() {}
 
 char GCModuleInfo::ID = 0;
 
-GCModuleInfo::GCModuleInfo()
-    : ImmutablePass(ID) {
+GCModuleInfo::GCModuleInfo() : ImmutablePass(ID) {
   initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
 }
 
-GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
-                                              const std::string &Name) {
-  strategy_map_type::iterator NMI = StrategyMap.find(Name);
-  if (NMI != StrategyMap.end())
-    return NMI->getValue();
-  
-  for (GCRegistry::iterator I = GCRegistry::begin(),
-                            E = GCRegistry::end(); I != E; ++I) {
-    if (Name == I->getName()) {
-      std::unique_ptr<GCStrategy> S = I->instantiate();
-      S->Name = Name;
-      StrategyMap[Name] = S.get();
-      StrategyList.push_back(std::move(S));
-      return StrategyList.back().get();
-    }
-  }
- 
-  dbgs() << "unsupported GC: " << Name << "\n";
-  llvm_unreachable(nullptr);
-}
-
 GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
   assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
   assert(F.hasGC());
-  
+
   finfo_map_type::iterator I = FInfoMap.find(&F);
   if (I != FInfoMap.end())
     return *I->second;
-  
-  GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC());
+
+  GCStrategy *S = getGCStrategy(F.getGC());
   Functions.push_back(make_unique<GCFunctionInfo>(F, *S));
   GCFunctionInfo *GFI = Functions.back().get();
   FInfoMap[&F] = GFI;
@@ -100,8 +76,7 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
 void GCModuleInfo::clear() {
   Functions.clear();
   FInfoMap.clear();
-  StrategyMap.clear();
-  StrategyList.clear();
+  GCStrategyList.clear();
 }
 
 // -----------------------------------------------------------------------------
@@ -112,7 +87,6 @@ FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
   return new Printer(OS);
 }
 
-
 const char *Printer::getPassName() const {
   return "Print Garbage Collector Information";
 }
@@ -125,42 +99,45 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
 
 static const char *DescKind(GC::PointKind Kind) {
   switch (Kind) {
-    case GC::Loop:     return "loop";
-    case GC::Return:   return "return";
-    case GC::PreCall:  return "pre-call";
-    case GC::PostCall: return "post-call";
+  case GC::PreCall:
+    return "pre-call";
+  case GC::PostCall:
+    return "post-call";
   }
   llvm_unreachable("Invalid point kind");
 }
 
 bool Printer::runOnFunction(Function &F) {
-  if (F.hasGC()) return false;
-  
+  if (F.hasGC())
+    return false;
+
   GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
-  
+
   OS << "GC roots for " << FD->getFunction().getName() << ":\n";
   for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
-                                      RE = FD->roots_end(); RI != RE; ++RI)
+                                      RE = FD->roots_end();
+       RI != RE; ++RI)
     OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
-  
+
   OS << "GC safe points for " << FD->getFunction().getName() << ":\n";
-  for (GCFunctionInfo::iterator PI = FD->begin(),
-                                PE = FD->end(); PI != PE; ++PI) {
-    
-    OS << "\t" << PI->Label->getName() << ": "
-       << DescKind(PI->Kind) << ", live = {";
-    
+  for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE;
+       ++PI) {
+
+    OS << "\t" << PI->Label->getName() << ": " << DescKind(PI->Kind)
+       << ", live = {";
+
     for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
-                                       RE = FD->live_end(PI);;) {
+                                       RE = FD->live_end(PI);
+         ;) {
       OS << " " << RI->Num;
       if (++RI == RE)
         break;
       OS << ",";
     }
-    
+
     OS << " }\n";
   }
-  
+
   return false;
 }
 
@@ -170,3 +147,31 @@ bool Printer::doFinalization(Module &M) {
   GMI->clear();
   return false;
 }
+
+GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
+  // TODO: Arguably, just doing a linear search would be faster for small N
+  auto NMI = GCStrategyMap.find(Name);
+  if (NMI != GCStrategyMap.end())
+    return NMI->getValue();
+  
+  for (auto& Entry : GCRegistry::entries()) {
+    if (Name == Entry.getName()) {
+      std::unique_ptr<GCStrategy> S = Entry.instantiate();
+      S->Name = Name;
+      GCStrategyMap[Name] = S.get();
+      GCStrategyList.push_back(std::move(S));
+      return GCStrategyList.back().get();
+    }
+  }
+
+  if (GCRegistry::begin() == GCRegistry::end()) {
+    // In normal operation, the registry should not be empty.  There should 
+    // be the builtin GCs if nothing else.  The most likely scenario here is
+    // that we got here without running the initializers used by the Registry 
+    // itself and it's registration mechanism.
+    const std::string error = ("unsupported GC: " + Name).str() + 
+      " (did you remember to link and initialize the CodeGen library?)";
+    report_fatal_error(error);
+  } else
+    report_fatal_error(std::string("unsupported GC: ") + Name);
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
index fdff4a7..bb8cfa1 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
@@ -14,6 +14,6 @@
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 using namespace llvm;
 
-GCMetadataPrinter::GCMetadataPrinter() { }
+GCMetadataPrinter::GCMetadataPrinter() {}
 
-GCMetadataPrinter::~GCMetadataPrinter() { }
+GCMetadataPrinter::~GCMetadataPrinter() {}
diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
new file mode 100644
index 0000000..d8edd7e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -0,0 +1,354 @@
+//===-- GCRootLowering.cpp - Garbage collection infrastructure ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lowering for the gc.root mechanism.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+/// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+/// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+/// directed by the GCStrategy. It also performs automatic root initialization
+/// and custom intrinsic lowering.
+class LowerIntrinsics : public FunctionPass {
+  bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+
+public:
+  static char ID;
+
+  LowerIntrinsics();
+  const char *getPassName() const override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool doInitialization(Module &M) override;
+  bool runOnFunction(Function &F) override;
+};
+
+/// GCMachineCodeAnalysis - This is a target-independent pass over the machine
+/// function representation to identify safe points for the garbage collector
+/// in the machine code. It inserts labels at safe points and populates a
+/// GCMetadata record for each function.
+class GCMachineCodeAnalysis : public MachineFunctionPass {
+  GCFunctionInfo *FI;
+  MachineModuleInfo *MMI;
+  const TargetInstrInfo *TII;
+
+  void FindSafePoints(MachineFunction &MF);
+  void VisitCallPoint(MachineBasicBlock::iterator MI);
+  MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                        DebugLoc DL) const;
+
+  void FindStackOffsets(MachineFunction &MF);
+
+public:
+  static char ID;
+
+  GCMachineCodeAnalysis();
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+}
+
+// -----------------------------------------------------------------------------
+
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
+FunctionPass *llvm::createGCLoweringPass() { return new LowerIntrinsics(); }
+
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) {
+  initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+}
+
+const char *LowerIntrinsics::getPassName() const {
+  return "Lower Garbage Collection Instructions";
+}
+
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+  FunctionPass::getAnalysisUsage(AU);
+  AU.addRequired<GCModuleInfo>();
+  AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+static bool NeedsDefaultLoweringPass(const GCStrategy &C) {
+  // Default lowering is necessary only if read or write barriers have a default
+  // action. The default for roots is no action.
+  return !C.customWriteBarrier() || !C.customReadBarrier() ||
+         C.initializeRoots();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+  assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (!I->isDeclaration() && I->hasGC())
+      MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+
+  return false;
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+static bool CouldBecomeSafePoint(Instruction *I) {
+  // The natural definition of instructions which could introduce safe points
+  // are:
+  //
+  //   - call, invoke (AfterCall, BeforeCall)
+  //   - phis (Loops)
+  //   - invoke, ret, unwind (Exit)
+  //
+  // However, instructions as seemingly inoccuous as arithmetic can become
+  // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+  // it is necessary to take a conservative approach.
+
+  if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || isa<StoreInst>(I) ||
+      isa<LoadInst>(I))
+    return false;
+
+  // llvm.gcroot is safe because it doesn't do anything at runtime.
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    if (Function *F = CI->getCalledFunction())
+      if (Intrinsic::ID IID = F->getIntrinsicID())
+        if (IID == Intrinsic::gcroot)
+          return false;
+
+  return true;
+}
+
+static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
+                                   unsigned Count) {
+  // Scroll past alloca instructions.
+  BasicBlock::iterator IP = F.getEntryBlock().begin();
+  while (isa<AllocaInst>(IP))
+    ++IP;
+
+  // Search for initializers in the initial BB.
+  SmallPtrSet<AllocaInst *, 16> InitedRoots;
+  for (; !CouldBecomeSafePoint(IP); ++IP)
+    if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+      if (AllocaInst *AI =
+              dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+        InitedRoots.insert(AI);
+
+  // Add root initializers.
+  bool MadeChange = false;
+
+  for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+    if (!InitedRoots.count(*I)) {
+      StoreInst *SI = new StoreInst(
+          ConstantPointerNull::get(cast<PointerType>(
+              cast<PointerType>((*I)->getType())->getElementType())),
+          *I);
+      SI->insertAfter(*I);
+      MadeChange = true;
+    }
+
+  return MadeChange;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+  // Quick exit for functions that do not use GC.
+  if (!F.hasGC())
+    return false;
+
+  GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+  GCStrategy &S = FI.getStrategy();
+
+  bool MadeChange = false;
+
+  if (NeedsDefaultLoweringPass(S))
+    MadeChange |= PerformDefaultLowering(F, S);
+
+  return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
+  bool LowerWr = !S.customWriteBarrier();
+  bool LowerRd = !S.customReadBarrier();
+  bool InitRoots = S.initializeRoots();
+
+  SmallVector<AllocaInst *, 32> Roots;
+
+  bool MadeChange = false;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
+        Function *F = CI->getCalledFunction();
+        switch (F->getIntrinsicID()) {
+        case Intrinsic::gcwrite:
+          if (LowerWr) {
+            // Replace a write barrier with a simple store.
+            Value *St =
+                new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI);
+            CI->replaceAllUsesWith(St);
+            CI->eraseFromParent();
+          }
+          break;
+        case Intrinsic::gcread:
+          if (LowerRd) {
+            // Replace a read barrier with a simple load.
+            Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+            Ld->takeName(CI);
+            CI->replaceAllUsesWith(Ld);
+            CI->eraseFromParent();
+          }
+          break;
+        case Intrinsic::gcroot:
+          if (InitRoots) {
+            // Initialize the GC root, but do not delete the intrinsic. The
+            // backend needs the intrinsic to flag the stack slot.
+            Roots.push_back(
+                cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+          }
+          break;
+        default:
+          continue;
+        }
+
+        MadeChange = true;
+      }
+    }
+  }
+
+  if (Roots.size())
+    MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+
+  return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+char GCMachineCodeAnalysis::ID = 0;
+char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
+
+INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
+                "Analyze Machine Code For Garbage Collection", false, false)
+
+GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {}
+
+void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  MachineFunctionPass::getAnalysisUsage(AU);
+  AU.setPreservesAll();
+  AU.addRequired<MachineModuleInfo>();
+  AU.addRequired<GCModuleInfo>();
+}
+
+MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+                                             MachineBasicBlock::iterator MI,
+                                             DebugLoc DL) const {
+  MCSymbol *Label = MBB.getParent()->getContext().createTempSymbol();
+  BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+  return Label;
+}
+
+void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+  // Find the return address (next instruction), too, so as to bracket the call
+  // instruction.
+  MachineBasicBlock::iterator RAI = CI;
+  ++RAI;
+
+  if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+    MCSymbol *Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+    FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+  }
+
+  if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+    MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+    FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+  }
+}
+
+void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+  for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
+       ++BBI)
+    for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
+         MI != ME; ++MI)
+      if (MI->isCall()) {
+        // Do not treat tail or sibling call sites as safe points.  This is
+        // legal since any arguments passed to the callee which live in the
+        // remnants of the callers frame will be owned and updated by the
+        // callee if required.
+        if (MI->isTerminator())
+          continue;
+        VisitCallPoint(MI);
+      }
+}
+
+void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  assert(TFI && "TargetRegisterInfo not available!");
+
+  for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
+       RI != FI->roots_end();) {
+    // If the root references a dead object, no need to keep it.
+    if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
+      RI = FI->removeStackRoot(RI);
+    } else {
+      RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+      ++RI;
+    }
+  }
+}
+
+bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+  // Quick exit for functions that do not use GC.
+  if (!MF.getFunction()->hasGC())
+    return false;
+
+  FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+  MMI = &getAnalysis<MachineModuleInfo>();
+  TII = MF.getSubtarget().getInstrInfo();
+
+  // Find the size of the stack frame.  There may be no correct static frame
+  // size, we use UINT64_MAX to represent this.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+  const bool DynamicFrameSize = MFI->hasVarSizedObjects() ||
+    RegInfo->needsStackRealignment(MF);
+  FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI->getStackSize());
+
+  // Find all safe points.
+  if (FI->getStrategy().needsSafePoints())
+    FindSafePoints(MF);
+
+  // Find the concrete stack offsets for all roots (stack slots)
+  FindStackOffsets(MF);
+
+  return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
index 05c36fc..554d326 100644
--- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -1,4 +1,4 @@
-//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===//
+//===-- GCStrategy.cpp - Garbage Collector Description --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,399 +7,16 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements target- and collector-independent garbage collection
-// infrastructure.
-//
-// GCMachineCodeAnalysis identifies the GC safe points in the machine code.
-// Roots are identified in SelectionDAGISel.
+// This file implements the policy object GCStrategy which describes the
+// behavior of a given garbage collector.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
-namespace {
-
-  /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
-  /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
-  /// directed by the GCStrategy. It also performs automatic root initialization
-  /// and custom intrinsic lowering.
-  class LowerIntrinsics : public FunctionPass {
-    static bool NeedsDefaultLoweringPass(const GCStrategy &C);
-    static bool NeedsCustomLoweringPass(const GCStrategy &C);
-    static bool CouldBecomeSafePoint(Instruction *I);
-    bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
-    static bool InsertRootInitializers(Function &F,
-                                       AllocaInst **Roots, unsigned Count);
-
-  public:
-    static char ID;
-
-    LowerIntrinsics();
-    const char *getPassName() const override;
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-    bool doInitialization(Module &M) override;
-    bool runOnFunction(Function &F) override;
-  };
-
-
-  /// GCMachineCodeAnalysis - This is a target-independent pass over the machine
-  /// function representation to identify safe points for the garbage collector
-  /// in the machine code. It inserts labels at safe points and populates a
-  /// GCMetadata record for each function.
-  class GCMachineCodeAnalysis : public MachineFunctionPass {
-    const TargetMachine *TM;
-    GCFunctionInfo *FI;
-    MachineModuleInfo *MMI;
-    const TargetInstrInfo *TII;
-
-    void FindSafePoints(MachineFunction &MF);
-    void VisitCallPoint(MachineBasicBlock::iterator MI);
-    MCSymbol *InsertLabel(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MI,
-                          DebugLoc DL) const;
-
-    void FindStackOffsets(MachineFunction &MF);
-
-  public:
-    static char ID;
-
-    GCMachineCodeAnalysis();
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-    bool runOnMachineFunction(MachineFunction &MF) override;
-  };
-
-}
-
-// -----------------------------------------------------------------------------
-
-GCStrategy::GCStrategy() :
-  UseStatepoints(false),
-  NeededSafePoints(0),
-  CustomReadBarriers(false),
-  CustomWriteBarriers(false),
-  CustomRoots(false),
-  CustomSafePoints(false),
-  InitRoots(true),
-  UsesMetadata(false)
-{}
-
-// -----------------------------------------------------------------------------
-
-INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering",
-                      false, false)
-INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
-INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
-
-FunctionPass *llvm::createGCLoweringPass() {
-  return new LowerIntrinsics();
-}
-
-char LowerIntrinsics::ID = 0;
-
-LowerIntrinsics::LowerIntrinsics()
-  : FunctionPass(ID) {
-    initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
-  }
-
-const char *LowerIntrinsics::getPassName() const {
-  return "Lower Garbage Collection Instructions";
-}
-
-void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
-  FunctionPass::getAnalysisUsage(AU);
-  AU.addRequired<GCModuleInfo>();
-  AU.addPreserved<DominatorTreeWrapperPass>();
-}
-
-/// doInitialization - If this module uses the GC intrinsics, find them now.
-bool LowerIntrinsics::doInitialization(Module &M) {
-  // FIXME: This is rather antisocial in the context of a JIT since it performs
-  //        work against the entire module. But this cannot be done at
-  //        runFunction time (initializeCustomLowering likely needs to change
-  //        the module).
-  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
-  assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    if (!I->isDeclaration() && I->hasGC())
-      MI->getFunctionInfo(*I); // Instantiate the GC strategy.
-
-  bool MadeChange = false;
-  for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
-    if (NeedsCustomLoweringPass(**I))
-      if ((*I)->initializeCustomLowering(M))
-        MadeChange = true;
-
-  return MadeChange;
-}
-
-bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
-                                                          unsigned Count) {
-  // Scroll past alloca instructions.
-  BasicBlock::iterator IP = F.getEntryBlock().begin();
-  while (isa<AllocaInst>(IP)) ++IP;
-
-  // Search for initializers in the initial BB.
-  SmallPtrSet<AllocaInst*,16> InitedRoots;
-  for (; !CouldBecomeSafePoint(IP); ++IP)
-    if (StoreInst *SI = dyn_cast<StoreInst>(IP))
-      if (AllocaInst *AI =
-          dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
-        InitedRoots.insert(AI);
-
-  // Add root initializers.
-  bool MadeChange = false;
-
-  for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
-    if (!InitedRoots.count(*I)) {
-      StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
-                        cast<PointerType>((*I)->getType())->getElementType())),
-                        *I);
-      SI->insertAfter(*I);
-      MadeChange = true;
-    }
-
-  return MadeChange;
-}
-
-bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) {
-  // Default lowering is necessary only if read or write barriers have a default
-  // action. The default for roots is no action.
-  return !C.customWriteBarrier()
-      || !C.customReadBarrier()
-      || C.initializeRoots();
-}
-
-bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
-  // Custom lowering is only necessary if enabled for some action.
-  return C.customWriteBarrier()
-      || C.customReadBarrier()
-      || C.customRoots();
-}
-
-/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
-/// instruction could introduce a safe point.
-bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
-  // The natural definition of instructions which could introduce safe points
-  // are:
-  //
-  //   - call, invoke (AfterCall, BeforeCall)
-  //   - phis (Loops)
-  //   - invoke, ret, unwind (Exit)
-  //
-  // However, instructions as seemingly inoccuous as arithmetic can become
-  // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
-  // it is necessary to take a conservative approach.
-
-  if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
-      isa<StoreInst>(I) || isa<LoadInst>(I))
-    return false;
-
-  // llvm.gcroot is safe because it doesn't do anything at runtime.
-  if (CallInst *CI = dyn_cast<CallInst>(I))
-    if (Function *F = CI->getCalledFunction())
-      if (unsigned IID = F->getIntrinsicID())
-        if (IID == Intrinsic::gcroot)
-          return false;
-
-  return true;
-}
-
-/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
-/// Leave gcroot intrinsics; the code generator needs to see those.
-bool LowerIntrinsics::runOnFunction(Function &F) {
-  // Quick exit for functions that do not use GC.
-  if (!F.hasGC())
-    return false;
-
-  GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
-  GCStrategy &S = FI.getStrategy();
-
-  bool MadeChange = false;
-
-  if (NeedsDefaultLoweringPass(S))
-    MadeChange |= PerformDefaultLowering(F, S);
-
-  bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
-  if (UseCustomLoweringPass)
-    MadeChange |= S.performCustomLowering(F);
-
-  // Custom lowering may modify the CFG, so dominators must be recomputed.
-  if (UseCustomLoweringPass) {
-    if (DominatorTreeWrapperPass *DTWP =
-            getAnalysisIfAvailable<DominatorTreeWrapperPass>())
-      DTWP->getDomTree().recalculate(F);
-  }
-
-  return MadeChange;
-}
-
-bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
-  bool LowerWr = !S.customWriteBarrier();
-  bool LowerRd = !S.customReadBarrier();
-  bool InitRoots = S.initializeRoots();
-
-  SmallVector<AllocaInst*, 32> Roots;
-
-  bool MadeChange = false;
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
-      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
-        Function *F = CI->getCalledFunction();
-        switch (F->getIntrinsicID()) {
-        case Intrinsic::gcwrite:
-          if (LowerWr) {
-            // Replace a write barrier with a simple store.
-            Value *St = new StoreInst(CI->getArgOperand(0),
-                                      CI->getArgOperand(2), CI);
-            CI->replaceAllUsesWith(St);
-            CI->eraseFromParent();
-          }
-          break;
-        case Intrinsic::gcread:
-          if (LowerRd) {
-            // Replace a read barrier with a simple load.
-            Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
-            Ld->takeName(CI);
-            CI->replaceAllUsesWith(Ld);
-            CI->eraseFromParent();
-          }
-          break;
-        case Intrinsic::gcroot:
-          if (InitRoots) {
-            // Initialize the GC root, but do not delete the intrinsic. The
-            // backend needs the intrinsic to flag the stack slot.
-            Roots.push_back(cast<AllocaInst>(
-                              CI->getArgOperand(0)->stripPointerCasts()));
-          }
-          break;
-        default:
-          continue;
-        }
-
-        MadeChange = true;
-      }
-    }
-  }
-
-  if (Roots.size())
-    MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
-
-  return MadeChange;
-}
-
-// -----------------------------------------------------------------------------
-
-char GCMachineCodeAnalysis::ID = 0;
-char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
-
-INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
-                "Analyze Machine Code For Garbage Collection", false, false)
-
-GCMachineCodeAnalysis::GCMachineCodeAnalysis()
-  : MachineFunctionPass(ID) {}
-
-void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  MachineFunctionPass::getAnalysisUsage(AU);
-  AU.setPreservesAll();
-  AU.addRequired<MachineModuleInfo>();
-  AU.addRequired<GCModuleInfo>();
-}
-
-MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
-                                             MachineBasicBlock::iterator MI,
-                                             DebugLoc DL) const {
-  MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
-  BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
-  return Label;
-}
-
-void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
-  // Find the return address (next instruction), too, so as to bracket the call
-  // instruction.
-  MachineBasicBlock::iterator RAI = CI;
-  ++RAI;
-
-  if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
-    MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
-    FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
-  }
-
-  if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
-    MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
-    FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
-  }
-}
-
-void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
-  for (MachineFunction::iterator BBI = MF.begin(),
-                                 BBE = MF.end(); BBI != BBE; ++BBI)
-    for (MachineBasicBlock::iterator MI = BBI->begin(),
-                                     ME = BBI->end(); MI != ME; ++MI)
-      if (MI->isCall())
-        VisitCallPoint(MI);
-}
-
-void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
-  const TargetFrameLowering *TFI = TM->getSubtargetImpl()->getFrameLowering();
-  assert(TFI && "TargetRegisterInfo not available!");
-
-  for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
-       RI != FI->roots_end();) {
-    // If the root references a dead object, no need to keep it.
-    if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
-      RI = FI->removeStackRoot(RI);
-    } else {
-      RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
-      ++RI;
-    }
-  }
-}
-
-bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
-  // Quick exit for functions that do not use GC.
-  if (!MF.getFunction()->hasGC())
-    return false;
-
-  FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
-  if (!FI->getStrategy().needsSafePoints())
-    return false;
-
-  TM = &MF.getTarget();
-  MMI = &getAnalysis<MachineModuleInfo>();
-  TII = TM->getSubtargetImpl()->getInstrInfo();
-
-  // Find the size of the stack frame.
-  FI->setFrameSize(MF.getFrameInfo()->getStackSize());
-
-  // Find all safe points.
-  if (FI->getStrategy().customSafePoints()) {
-    FI->getStrategy().findCustomSafePoints(*FI, MF);
-  } else {
-    FindSafePoints(MF);
-  }
-
-  // Find the stack offsets for all roots.
-  FindStackOffsets(MF);
-
-  return false;
-}
+GCStrategy::GCStrategy()
+    : UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false),
+      CustomWriteBarriers(false), CustomRoots(false), InitRoots(true),
+      UsesMetadata(false) {}
diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
index 8257567..79de175 100644
--- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -49,9 +49,19 @@
 //  str     r0, [r5], #4
 //
 //  note that we saved 2 registers here almostly "for free".
+//
+// However, merging globals can have tradeoffs:
+// - it confuses debuggers, tools, and users
+// - it makes linker optimizations less useful (order files, LOHs, ...)
+// - it forces usage of indexed addressing (which isn't necessarily "free")
+// - it can increase register pressure when the uses are disparate enough.
+// 
+// We use heuristics to discover the best global grouping we can (cf cl::opts).
 // ===---------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/Passes.h"
@@ -66,18 +76,31 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
 using namespace llvm;
 
 #define DEBUG_TYPE "global-merge"
 
+// FIXME: This is only useful as a last-resort way to disable the pass.
 static cl::opt<bool>
 EnableGlobalMerge("enable-global-merge", cl::Hidden,
-                  cl::desc("Enable global merge pass"),
+                  cl::desc("Enable the global merge pass"),
                   cl::init(true));
 
+static cl::opt<bool> GlobalMergeGroupByUse(
+    "global-merge-group-by-use", cl::Hidden,
+    cl::desc("Improve global merge pass to look at uses"), cl::init(true));
+
+static cl::opt<bool> GlobalMergeIgnoreSingleUse(
+    "global-merge-ignore-single-use", cl::Hidden,
+    cl::desc("Improve global merge pass to ignore globals only used alone"),
+    cl::init(true));
+
 static cl::opt<bool>
 EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
                          cl::desc("Enable global merge pass on constants"),
@@ -90,13 +113,24 @@ EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
      cl::desc("Enable global merge pass on external linkage"),
      cl::init(false));
 
-STATISTIC(NumMerged      , "Number of globals merged");
+STATISTIC(NumMerged, "Number of globals merged");
 namespace {
   class GlobalMerge : public FunctionPass {
     const TargetMachine *TM;
+    const DataLayout *DL;
+    // FIXME: Infer the maximum possible offset depending on the actual users
+    // (these max offsets are different for the users inside Thumb or ARM
+    // functions), see the code that passes in the offset in the ARM backend
+    // for more information.
+    unsigned MaxOffset;
 
     bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
                  Module &M, bool isConst, unsigned AddrSpace) const;
+    /// \brief Merge everything in \p Globals for which the corresponding bit
+    /// in \p GlobalSet is set.
+    bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
+                 const BitVector &GlobalSet, Module &M, bool isConst,
+                 unsigned AddrSpace) const;
 
     /// \brief Check if the given variable has been identified as must keep
     /// \pre setMustKeepGlobalVariables must have been called on the Module that
@@ -117,8 +151,10 @@ namespace {
 
   public:
     static char ID;             // Pass identification, replacement for typeid.
-    explicit GlobalMerge(const TargetMachine *TM = nullptr)
-      : FunctionPass(ID), TM(TM) {
+    explicit GlobalMerge(const TargetMachine *TM = nullptr,
+                         unsigned MaximalOffset = 0)
+        : FunctionPass(ID), TM(TM), DL(TM->getDataLayout()),
+          MaxOffset(MaximalOffset) {
       initializeGlobalMergePass(*PassRegistry::getPassRegistry());
     }
 
@@ -138,48 +174,243 @@ namespace {
 } // end anonymous namespace
 
 char GlobalMerge::ID = 0;
-INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables",
-                   false, false)
+INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables",
+                      false, false)
+INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables",
+                    false, false)
 
 bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
                           Module &M, bool isConst, unsigned AddrSpace) const {
-  const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
-  const DataLayout *DL = TLI->getDataLayout();
-
-  // FIXME: Infer the maximum possible offset depending on the actual users
-  // (these max offsets are different for the users inside Thumb or ARM
-  // functions)
-  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
-
   // FIXME: Find better heuristics
   std::stable_sort(Globals.begin(), Globals.end(),
-                   [DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+                   [this](const GlobalVariable *GV1, const GlobalVariable *GV2) {
     Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
     Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
 
     return (DL->getTypeAllocSize(Ty1) < DL->getTypeAllocSize(Ty2));
   });
 
+  // If we want to just blindly group all globals together, do so.
+  if (!GlobalMergeGroupByUse) {
+    BitVector AllGlobals(Globals.size());
+    AllGlobals.set();
+    return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
+  }
+
+  // If we want to be smarter, look at all uses of each global, to try to
+  // discover all sets of globals used together, and how many times each of
+  // these sets occured.
+  //
+  // Keep this reasonably efficient, by having an append-only list of all sets
+  // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
+  // code (currently, a Function) to the set of globals seen so far that are
+  // used together in that unit (GlobalUsesByFunction).
+  //
+  // When we look at the Nth global, we now that any new set is either:
+  // - the singleton set {N}, containing this global only, or
+  // - the union of {N} and a previously-discovered set, containing some
+  //   combination of the previous N-1 globals.
+  // Using that knowledge, when looking at the Nth global, we can keep:
+  // - a reference to the singleton set {N} (CurGVOnlySetIdx)
+  // - a list mapping each previous set to its union with {N} (EncounteredUGS),
+  //   if it actually occurs.
+
+  // We keep track of the sets of globals used together "close enough".
+  struct UsedGlobalSet {
+    UsedGlobalSet(size_t Size) : Globals(Size), UsageCount(1) {}
+    BitVector Globals;
+    unsigned UsageCount;
+  };
+
+  // Each set is unique in UsedGlobalSets.
+  std::vector<UsedGlobalSet> UsedGlobalSets;
+
+  // Avoid repeating the create-global-set pattern.
+  auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
+    UsedGlobalSets.emplace_back(Globals.size());
+    return UsedGlobalSets.back();
+  };
+
+  // The first set is the empty set.
+  CreateGlobalSet().UsageCount = 0;
+
+  // We define "close enough" to be "in the same function".
+  // FIXME: Grouping uses by function is way too aggressive, so we should have
+  // a better metric for distance between uses.
+  // The obvious alternative would be to group by BasicBlock, but that's in
+  // turn too conservative..
+  // Anything in between wouldn't be trivial to compute, so just stick with
+  // per-function grouping.
+
+  // The value type is an index into UsedGlobalSets.
+  // The default (0) conveniently points to the empty set.
+  DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
+
+  // Now, look at each merge-eligible global in turn.
+
+  // Keep track of the sets we already encountered to which we added the
+  // current global.
+  // Each element matches the same-index element in UsedGlobalSets.
+  // This lets us efficiently tell whether a set has already been expanded to
+  // include the current global.
+  std::vector<size_t> EncounteredUGS;
+
+  for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
+    GlobalVariable *GV = Globals[GI];
+
+    // Reset the encountered sets for this global...
+    std::fill(EncounteredUGS.begin(), EncounteredUGS.end(), 0);
+    // ...and grow it in case we created new sets for the previous global.
+    EncounteredUGS.resize(UsedGlobalSets.size());
+
+    // We might need to create a set that only consists of the current global.
+    // Keep track of its index into UsedGlobalSets.
+    size_t CurGVOnlySetIdx = 0;
+
+    // For each global, look at all its Uses.
+    for (auto &U : GV->uses()) {
+      // This Use might be a ConstantExpr.  We're interested in Instruction
+      // users, so look through ConstantExpr...
+      Use *UI, *UE;
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
+        UI = &*CE->use_begin();
+        UE = nullptr;
+      } else if (isa<Instruction>(U.getUser())) {
+        UI = &U;
+        UE = UI->getNext();
+      } else {
+        continue;
+      }
+
+      // ...to iterate on all the instruction users of the global.
+      // Note that we iterate on Uses and not on Users to be able to getNext().
+      for (; UI != UE; UI = UI->getNext()) {
+        Instruction *I = dyn_cast<Instruction>(UI->getUser());
+        if (!I)
+          continue;
+
+        Function *ParentFn = I->getParent()->getParent();
+        size_t UGSIdx = GlobalUsesByFunction[ParentFn];
+
+        // If this is the first global the basic block uses, map it to the set
+        // consisting of this global only.
+        if (!UGSIdx) {
+          // If that set doesn't exist yet, create it.
+          if (!CurGVOnlySetIdx) {
+            CurGVOnlySetIdx = UsedGlobalSets.size();
+            CreateGlobalSet().Globals.set(GI);
+          } else {
+            ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
+          }
+
+          GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
+          continue;
+        }
+
+        // If we already encountered this BB, just increment the counter.
+        if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
+          ++UsedGlobalSets[UGSIdx].UsageCount;
+          continue;
+        }
+
+        // If not, the previous set wasn't actually used in this function.
+        --UsedGlobalSets[UGSIdx].UsageCount;
+
+        // If we already expanded the previous set to include this global, just
+        // reuse that expanded set.
+        if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
+          ++UsedGlobalSets[ExpandedIdx].UsageCount;
+          GlobalUsesByFunction[ParentFn] = ExpandedIdx;
+          continue;
+        }
+
+        // If not, create a new set consisting of the union of the previous set
+        // and this global.  Mark it as encountered, so we can reuse it later.
+        GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
+            UsedGlobalSets.size();
+
+        UsedGlobalSet &NewUGS = CreateGlobalSet();
+        NewUGS.Globals.set(GI);
+        NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
+      }
+    }
+  }
+
+  // Now we found a bunch of sets of globals used together.  We accumulated
+  // the number of times we encountered the sets (i.e., the number of blocks
+  // that use that exact set of globals).
+  //
+  // Multiply that by the size of the set to give us a crude profitability
+  // metric.
+  std::sort(UsedGlobalSets.begin(), UsedGlobalSets.end(),
+            [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
+              return UGS1.Globals.count() * UGS1.UsageCount <
+                     UGS2.Globals.count() * UGS2.UsageCount;
+            });
+
+  // We can choose to merge all globals together, but ignore globals never used
+  // with another global.  This catches the obviously non-profitable cases of
+  // having a single global, but is aggressive enough for any other case.
+  if (GlobalMergeIgnoreSingleUse) {
+    BitVector AllGlobals(Globals.size());
+    for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
+      const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
+      if (UGS.UsageCount == 0)
+        continue;
+      if (UGS.Globals.count() > 1)
+        AllGlobals |= UGS.Globals;
+    }
+    return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
+  }
+
+  // Starting from the sets with the best (=biggest) profitability, find a
+  // good combination.
+  // The ideal (and expensive) solution can only be found by trying all
+  // combinations, looking for the one with the best profitability.
+  // Don't be smart about it, and just pick the first compatible combination,
+  // starting with the sets with the best profitability.
+  BitVector PickedGlobals(Globals.size());
+  bool Changed = false;
+
+  for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
+    const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
+    if (UGS.UsageCount == 0)
+      continue;
+    if (PickedGlobals.anyCommon(UGS.Globals))
+      continue;
+    PickedGlobals |= UGS.Globals;
+    // If the set only contains one global, there's no point in merging.
+    // Ignore the global for inclusion in other sets though, so keep it in
+    // PickedGlobals.
+    if (UGS.Globals.count() < 2)
+      continue;
+    Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
+  }
+
+  return Changed;
+}
+
+bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
+                          const BitVector &GlobalSet, Module &M, bool isConst,
+                          unsigned AddrSpace) const {
+
   Type *Int32Ty = Type::getInt32Ty(M.getContext());
 
   assert(Globals.size() > 1);
 
-  // FIXME: This simple solution merges globals all together as maximum as
-  // possible. However, with this solution it would be hard to remove dead
-  // global symbols at link-time. An alternative solution could be checking
-  // global symbols references function by function, and make the symbols
-  // being referred in the same function merged and we would probably need
-  // to introduce heuristic algorithm to solve the merge conflict from
-  // different functions.
-  for (size_t i = 0, e = Globals.size(); i != e; ) {
-    size_t j = 0;
+  DEBUG(dbgs() << " Trying to merge set, starts with #"
+               << GlobalSet.find_first() << "\n");
+
+  ssize_t i = GlobalSet.find_first();
+  while (i != -1) {
+    ssize_t j = 0;
     uint64_t MergedSize = 0;
     std::vector<Type*> Tys;
     std::vector<Constant*> Inits;
 
     bool HasExternal = false;
     GlobalVariable *TheFirstExternal = 0;
-    for (j = i; j != e; ++j) {
+    for (j = i; j != -1; j = GlobalSet.find_next(j)) {
       Type *Ty = Globals[j]->getType()->getElementType();
       MergedSize += DL->getTypeAllocSize(Ty);
       if (MergedSize > MaxOffset) {
@@ -212,23 +443,23 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
                     : "_MergedGlobals",
         nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
 
-    for (size_t k = i; k < j; ++k) {
+    for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k)) {
       GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
       std::string Name = Globals[k]->getName();
 
       Constant *Idx[2] = {
         ConstantInt::get(Int32Ty, 0),
-        ConstantInt::get(Int32Ty, k-i)
+        ConstantInt::get(Int32Ty, idx++)
       };
-      Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
+      Constant *GEP =
+          ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
       Globals[k]->replaceAllUsesWith(GEP);
       Globals[k]->eraseFromParent();
 
       if (Linkage != GlobalValue::InternalLinkage) {
         // Generate a new alias...
         auto *PTy = cast<PointerType>(GEP->getType());
-        GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
-                            Linkage, Name, GEP, &M);
+        GlobalAlias::create(PTy, Linkage, Name, GEP, &M);
       }
 
       NumMerged++;
@@ -282,9 +513,6 @@ bool GlobalMerge::doInitialization(Module &M) {
 
   DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
                                                         BSSGlobals;
-  const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
-  const DataLayout *DL = TLI->getDataLayout();
-  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
   bool Changed = false;
   setMustKeepGlobalVariables(M);
 
@@ -357,6 +585,6 @@ bool GlobalMerge::doFinalization(Module &M) {
   return false;
 }
 
-Pass *llvm::createGlobalMergePass(const TargetMachine *TM) {
-  return new GlobalMerge(TM);
+Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset) {
+  return new GlobalMerge(TM, Offset);
 }
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index e84d25d9..0d59c72 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -24,7 +24,6 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -247,7 +246,7 @@ namespace {
         return true;
       else if (Incr1 == Incr2) {
         // Favors subsumption.
-        if (C1->NeedSubsumption == false && C2->NeedSubsumption == true)
+        if (!C1->NeedSubsumption && C2->NeedSubsumption)
           return true;
         else if (C1->NeedSubsumption == C2->NeedSubsumption) {
           // Favors diamond over triangle, etc.
@@ -271,15 +270,13 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
 
 bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
-  TLI = MF.getSubtarget().getTargetLowering();
-  TII = MF.getSubtarget().getInstrInfo();
-  TRI = MF.getSubtarget().getRegisterInfo();
+  const TargetSubtargetInfo &ST = MF.getSubtarget();
+  TLI = ST.getTargetLowering();
+  TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   MRI = &MF.getRegInfo();
-
-  const TargetSubtargetInfo &ST =
-    MF.getTarget().getSubtarget<TargetSubtargetInfo>();
   SchedModel.init(ST.getSchedModel(), &ST, TII);
 
   if (!TII) return false;
@@ -290,7 +287,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   if (!PreRegAlloc) {
     // Tail merge tend to expose more if-conversion opportunities.
     BranchFolder BF(true, false, *MBFI, *MBPI);
-    BFChange = BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
+    BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(),
                                    getAnalysisIfAvailable<MachineModuleInfo>());
   }
 
@@ -728,6 +725,12 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
   if (BBI.IsDone || BBI.IsUnpredicable)
     return false;
 
+  // If it is already predicated but we couldn't analyze its terminator, the
+  // latter might fallthrough, but we can't determine where to.
+  // Conservatively avoid if-converting again.
+  if (BBI.Predicate.size() && !BBI.IsBrAnalyzable)
+    return false;
+
   // If it is already predicated, check if the new predicate subsumes
   // its predicate.
   if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
@@ -971,26 +974,37 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
 /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
 /// values defined in MI which are not live/used by MI.
 static void UpdatePredRedefs(MachineInstr *MI, LivePhysRegs &Redefs) {
-  for (ConstMIBundleOperands Ops(MI); Ops.isValid(); ++Ops) {
-    if (!Ops->isReg() || !Ops->isKill())
-      continue;
-    unsigned Reg = Ops->getReg();
-    if (Reg == 0)
-      continue;
-    Redefs.removeReg(Reg);
-  }
-  for (MIBundleOperands Ops(MI); Ops.isValid(); ++Ops) {
-    if (!Ops->isReg() || !Ops->isDef())
-      continue;
-    unsigned Reg = Ops->getReg();
-    if (Reg == 0 || Redefs.contains(Reg))
+  SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers;
+  Redefs.stepForward(*MI, Clobbers);
+
+  // Now add the implicit uses for each of the clobbered values.
+  for (auto Reg : Clobbers) {
+    // FIXME: Const cast here is nasty, but better than making StepForward
+    // take a mutable instruction instead of const.
+    MachineOperand &Op = const_cast<MachineOperand&>(*Reg.second);
+    MachineInstr *OpMI = Op.getParent();
+    MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI);
+    if (Op.isRegMask()) {
+      // First handle regmasks.  They clobber any entries in the mask which
+      // means that we need a def for those registers.
+      MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
+
+      // We also need to add an implicit def of this register for the later
+      // use to read from.
+      // For the register allocator to have allocated a register clobbered
+      // by the call which is used later, it must be the case that
+      // the call doesn't return.
+      MIB.addReg(Reg.first, RegState::Implicit | RegState::Define);
       continue;
-    Redefs.addReg(Reg);
-
-    MachineOperand &Op = *Ops;
-    MachineInstr *MI = Op.getParent();
-    MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
-    MIB.addReg(Reg, RegState::Implicit | RegState::Undef);
+    }
+    assert(Op.isReg() && "Register operand required");
+    if (Op.isDead()) {
+      // If we found a dead def, but it needs to be live, then remove the dead
+      // flag.
+      if (Redefs.contains(Op.getReg()))
+        Op.setIsDead(false);
+    }
+    MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
   }
 }
 
@@ -1370,7 +1384,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
 
   for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E;
        ++I) {
-    Redefs.stepForward(*I);
+    SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers;
+    Redefs.stepForward(*I, IgnoredClobbers);
   }
   BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
   BBI2->BB->erase(BBI2->BB->begin(), DI2);
@@ -1504,10 +1519,9 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
 }
 
 static bool MaySpeculate(const MachineInstr *MI,
-                         SmallSet<unsigned, 4> &LaterRedefs,
-                         const TargetInstrInfo *TII) {
+                         SmallSet<unsigned, 4> &LaterRedefs) {
   bool SawStore = true;
-  if (!MI->isSafeToMove(TII, nullptr, SawStore))
+  if (!MI->isSafeToMove(nullptr, SawStore))
     return false;
 
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -1538,7 +1552,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
     // It may be possible not to predicate an instruction if it's the 'true'
     // side of a diamond and the 'false' side may re-define the instruction's
     // defs.
-    if (MaySpec && MaySpeculate(I, *LaterRedefs, TII)) {
+    if (MaySpec && MaySpeculate(I, *LaterRedefs)) {
       AnyUnpred = true;
       continue;
     }
@@ -1557,7 +1571,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
     UpdatePredRedefs(I, Redefs);
   }
 
-  std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+  BBI.Predicate.append(Cond.begin(), Cond.end());
 
   BBI.IsAnalyzed = false;
   BBI.NonPredSize = 0;
@@ -1622,9 +1636,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
     }
   }
 
-  std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
-            std::back_inserter(ToBBI.Predicate));
-  std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+  ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
+  ToBBI.Predicate.append(Cond.begin(), Cond.end());
 
   ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
   ToBBI.IsAnalyzed = false;
@@ -1663,8 +1676,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
   if (NBB && !FromBBI.BB->isSuccessor(NBB))
     FromBBI.BB->addSuccessor(NBB);
 
-  std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
-            std::back_inserter(ToBBI.Predicate));
+  ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
   FromBBI.Predicate.clear();
 
   ToBBI.NonPredSize += FromBBI.NonPredSize;
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index df889f7..9989f23 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -135,7 +135,7 @@ private:
   // Dead defs generated during spilling.
   SmallVector<MachineInstr*, 8> DeadDefs;
 
-  ~InlineSpiller() {}
+  ~InlineSpiller() override {}
 
 public:
   InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
@@ -576,8 +576,8 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
         std::tie(SVI, Inserted) =
           SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
         // Add all the PHIs as dependents of NonPHI.
-        for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi)
-          SVI->second.Deps.push_back(PHIs[pi]);
+        SVI->second.Deps.insert(SVI->second.Deps.end(), PHIs.begin(),
+                                PHIs.end());
         // This is the first time we see NonPHI, add it to the worklist.
         if (Inserted)
           WorkList.push_back(std::make_pair(Reg, NonPHI));
@@ -921,7 +921,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
 
   // Replace operands
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(Ops[i].second);
+    MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
     if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
       MO.setReg(NewVReg);
       MO.setIsKill();
@@ -1100,6 +1100,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
   SmallVector<unsigned, 8> FoldOps;
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
     unsigned Idx = Ops[i].second;
+    assert(MI == Ops[i].first && "Instruction conflict during operand folding");
     MachineOperand &MO = MI->getOperand(Idx);
     if (MO.isImplicit()) {
       ImpReg = MO.getReg();
@@ -1142,13 +1143,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
       continue;
     // FoldMI does not define this physreg. Remove the LI segment.
     assert(MO->isDead() && "Cannot fold physreg def");
-    for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) {
-      if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) {
-        SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
-        if (VNInfo *VNI = LR->getVNInfoAt(Idx))
-          LR->removeValNo(VNI);
-      }
-    }
+    SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+    LIS.removePhysRegDefAt(Reg, Idx);
   }
 
   LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
@@ -1237,6 +1233,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
       DebugLoc DL = MI->getDebugLoc();
       DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
       MachineBasicBlock *MBB = MI->getParent();
+      assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+             "Expected inlined-at fields to agree");
       BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE))
           .addFrameIndex(StackSlot)
           .addImm(Offset)
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
index 187e015..fd5749b 100644
--- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -21,7 +21,8 @@ using namespace llvm;
 #define DEBUG_TYPE "regalloc"
 
 // Static member used for null interference cursors.
-InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
+const InterferenceCache::BlockInterference
+    InterferenceCache::Cursor::NoInterference;
 
 // Initializes PhysRegEntries (instead of a SmallVector, PhysRegEntries is a
 // buffer of size NumPhysRegs to speed up alloc/clear for targets with large
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h
index 1791afb..6519a80 100644
--- a/contrib/llvm/lib/CodeGen/InterferenceCache.h
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h
@@ -170,8 +170,8 @@ public:
   /// Cursor - The primary query interface for the block interference cache.
   class Cursor {
     Entry *CacheEntry;
-    BlockInterference *Current;
-    static BlockInterference NoInterference;
+    const BlockInterference *Current;
+    static const BlockInterference NoInterference;
 
     void setEntry(Entry *E) {
       Current = nullptr;
diff --git a/contrib/llvm/lib/CodeGen/JumpInstrTables.cpp b/contrib/llvm/lib/CodeGen/JumpInstrTables.cpp
deleted file mode 100644
index 75fa261..0000000
--- a/contrib/llvm/lib/CodeGen/JumpInstrTables.cpp
+++ /dev/null
@@ -1,296 +0,0 @@
-//===-- JumpInstrTables.cpp: Jump-Instruction Tables ----------------------===//
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief An implementation of jump-instruction tables.
-///
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "jt"
-
-#include "llvm/CodeGen/JumpInstrTables.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <vector>
-
-using namespace llvm;
-
-char JumpInstrTables::ID = 0;
-
-INITIALIZE_PASS_BEGIN(JumpInstrTables, "jump-instr-tables",
-                      "Jump-Instruction Tables", true, true)
-INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo);
-INITIALIZE_PASS_END(JumpInstrTables, "jump-instr-tables",
-                    "Jump-Instruction Tables", true, true)
-
-STATISTIC(NumJumpTables, "Number of indirect call tables generated");
-STATISTIC(NumFuncsInJumpTables, "Number of functions in the jump tables");
-
-ModulePass *llvm::createJumpInstrTablesPass() {
-  // The default implementation uses a single table for all functions.
-  return new JumpInstrTables(JumpTable::Single);
-}
-
-ModulePass *llvm::createJumpInstrTablesPass(JumpTable::JumpTableType JTT) {
-  return new JumpInstrTables(JTT);
-}
-
-namespace {
-static const char jump_func_prefix[] = "__llvm_jump_instr_table_";
-static const char jump_section_prefix[] = ".jump.instr.table.text.";
-
-// Checks to see if a given CallSite is making an indirect call, including
-// cases where the indirect call is made through a bitcast.
-bool isIndirectCall(CallSite &CS) {
-  if (CS.getCalledFunction())
-    return false;
-
-  // Check the value to see if it is merely a bitcast of a function. In
-  // this case, it will translate to a direct function call in the resulting
-  // assembly, so we won't treat it as an indirect call here.
-  const Value *V = CS.getCalledValue();
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    return !(CE->isCast() && isa<Function>(CE->getOperand(0)));
-  }
-
-  // Otherwise, since we know it's a call, it must be an indirect call
-  return true;
-}
-
-// Replaces Functions and GlobalAliases with a different Value.
-bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) {
-  User *Us = U->getUser();
-  if (!Us)
-    return false;
-  if (Instruction *I = dyn_cast<Instruction>(Us)) {
-    CallSite CS(I);
-
-    // Don't do the replacement if this use is a direct call to this function.
-    // If the use is not the called value, then replace it.
-    if (CS && (isIndirectCall(CS) || CS.isCallee(U))) {
-      return false;
-    }
-
-    U->set(V);
-  } else if (Constant *C = dyn_cast<Constant>(Us)) {
-    // Don't replace calls to bitcasts of function symbols, since they get
-    // translated to direct calls.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Us)) {
-      if (CE->getOpcode() == Instruction::BitCast) {
-        // This bitcast must have exactly one user.
-        if (CE->user_begin() != CE->user_end()) {
-          User *ParentUs = *CE->user_begin();
-          if (CallInst *CI = dyn_cast<CallInst>(ParentUs)) {
-            CallSite CS(CI);
-            Use &CEU = *CE->use_begin();
-            if (CS.isCallee(&CEU)) {
-              return false;
-            }
-          }
-        }
-      }
-    }
-
-    // GlobalAlias doesn't support replaceUsesOfWithOnConstant. And the verifier
-    // requires alias to point to a defined function. So, GlobalAlias is handled
-    // as a separate case in runOnModule.
-    if (!isa<GlobalAlias>(C))
-      C->replaceUsesOfWithOnConstant(GV, V, U);
-  } else {
-    llvm_unreachable("The Use of a Function symbol is neither an instruction "
-                     "nor a constant");
-  }
-
-  return true;
-}
-
-// Replaces all replaceable address-taken uses of GV with a pointer to a
-// jump-instruction table entry.
-void replaceValueWithFunction(GlobalValue *GV, Function *F) {
-  // Go through all uses of this function and replace the uses of GV with the
-  // jump-table version of the function. Get the uses as a vector before
-  // replacing them, since replacing them changes the use list and invalidates
-  // the iterator otherwise.
-  for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E;) {
-    Use &U = *I++;
-
-    // Replacement of constants replaces all instances in the constant. So, some
-    // uses might have already been handled by the time we reach them here.
-    if (U.get() == GV)
-      replaceGlobalValueIndirectUse(GV, F, &U);
-  }
-
-  return;
-}
-} // end anonymous namespace
-
-JumpInstrTables::JumpInstrTables()
-    : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0),
-      JTType(JumpTable::Single) {
-  initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
-}
-
-JumpInstrTables::JumpInstrTables(JumpTable::JumpTableType JTT)
-    : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), JTType(JTT) {
-  initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
-}
-
-JumpInstrTables::~JumpInstrTables() {}
-
-void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<JumpInstrTableInfo>();
-}
-
-Function *JumpInstrTables::insertEntry(Module &M, Function *Target) {
-  FunctionType *OrigFunTy = Target->getFunctionType();
-  FunctionType *FunTy = transformType(JTType, OrigFunTy);
-
-  JumpMap::iterator it = Metadata.find(FunTy);
-  if (Metadata.end() == it) {
-    struct TableMeta Meta;
-    Meta.TableNum = TableCount;
-    Meta.Count = 0;
-    Metadata[FunTy] = Meta;
-    it = Metadata.find(FunTy);
-    ++NumJumpTables;
-    ++TableCount;
-  }
-
-  it->second.Count++;
-
-  std::string NewName(jump_func_prefix);
-  NewName += (Twine(it->second.TableNum) + "_" + Twine(it->second.Count)).str();
-  Function *JumpFun =
-      Function::Create(OrigFunTy, GlobalValue::ExternalLinkage, NewName, &M);
-  // The section for this table
-  JumpFun->setSection((jump_section_prefix + Twine(it->second.TableNum)).str());
-  JITI->insertEntry(FunTy, Target, JumpFun);
-
-  ++NumFuncsInJumpTables;
-  return JumpFun;
-}
-
-bool JumpInstrTables::hasTable(FunctionType *FunTy) {
-  FunctionType *TransTy = transformType(JTType, FunTy);
-  return Metadata.end() != Metadata.find(TransTy);
-}
-
-FunctionType *JumpInstrTables::transformType(JumpTable::JumpTableType JTT,
-                                             FunctionType *FunTy) {
-  // Returning nullptr forces all types into the same table, since all types map
-  // to the same type
-  Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext());
-
-  // Ignore the return type.
-  Type *RetTy = VoidPtrTy;
-  bool IsVarArg = FunTy->isVarArg();
-  std::vector<Type *> ParamTys(FunTy->getNumParams());
-  FunctionType::param_iterator PI, PE;
-  int i = 0;
-
-  std::vector<Type *> EmptyParams;
-  Type *Int32Ty = Type::getInt32Ty(FunTy->getContext());
-  FunctionType *VoidFnTy = FunctionType::get(
-      Type::getVoidTy(FunTy->getContext()), EmptyParams, false);
-  switch (JTT) {
-  case JumpTable::Single:
-
-    return FunctionType::get(RetTy, EmptyParams, false);
-  case JumpTable::Arity:
-    // Transform all types to void* so that all functions with the same arity
-    // end up in the same table.
-    for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
-         PI++, i++) {
-      ParamTys[i] = VoidPtrTy;
-    }
-
-    return FunctionType::get(RetTy, ParamTys, IsVarArg);
-  case JumpTable::Simplified:
-    // Project all parameters types to one of 3 types: composite, integer, and
-    // function, matching the three subclasses of Type.
-    for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
-         ++PI, ++i) {
-      assert((isa<IntegerType>(*PI) || isa<FunctionType>(*PI) ||
-              isa<CompositeType>(*PI)) &&
-             "This type is not an Integer or a Composite or a Function");
-      if (isa<CompositeType>(*PI)) {
-        ParamTys[i] = VoidPtrTy;
-      } else if (isa<FunctionType>(*PI)) {
-        ParamTys[i] = VoidFnTy;
-      } else if (isa<IntegerType>(*PI)) {
-        ParamTys[i] = Int32Ty;
-      }
-    }
-
-    return FunctionType::get(RetTy, ParamTys, IsVarArg);
-  case JumpTable::Full:
-    // Don't transform this type at all.
-    return FunTy;
-  }
-
-  return nullptr;
-}
-
-bool JumpInstrTables::runOnModule(Module &M) {
-  JITI = &getAnalysis<JumpInstrTableInfo>();
-
-  // Get the set of jumptable-annotated functions that have their address taken.
-  DenseMap<Function *, Function *> Functions;
-  for (Function &F : M) {
-    if (F.hasFnAttribute(Attribute::JumpTable) && F.hasAddressTaken()) {
-      assert(F.hasUnnamedAddr() &&
-             "Attribute 'jumptable' requires 'unnamed_addr'");
-      Functions[&F] = nullptr;
-    }
-  }
-
-  // Create the jump-table functions.
-  for (auto &KV : Functions) {
-    Function *F = KV.first;
-    KV.second = insertEntry(M, F);
-  }
-
-  // GlobalAlias is a special case, because the target of an alias statement
-  // must be a defined function. So, instead of replacing a given function in
-  // the alias, we replace all uses of aliases that target jumptable functions.
-  // Note that there's no need to create these functions, since only aliases
-  // that target known jumptable functions are replaced, and there's no way to
-  // put the jumptable annotation on a global alias.
-  DenseMap<GlobalAlias *, Function *> Aliases;
-  for (GlobalAlias &GA : M.aliases()) {
-    Constant *Aliasee = GA.getAliasee();
-    if (Function *F = dyn_cast<Function>(Aliasee)) {
-      auto it = Functions.find(F);
-      if (it != Functions.end()) {
-        Aliases[&GA] = it->second;
-      }
-    }
-  }
-
-  // Replace each address taken function with its jump-instruction table entry.
-  for (auto &KV : Functions)
-    replaceValueWithFunction(KV.first, KV.second);
-
-  for (auto &KV : Aliases)
-    replaceValueWithFunction(KV.first, KV.second);
-
-  return !Functions.empty();
-}
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 9018314..610c9f4 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -12,32 +12,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/ForwardControlFlowIntegrity.h"
-#include "llvm/CodeGen/JumpInstrTables.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
@@ -49,8 +43,16 @@ EnableFastISelOption("fast-isel", cl::Hidden,
   cl::desc("Enable the \"fast\" instruction selector"));
 
 void LLVMTargetMachine::initAsmInfo() {
-  MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(
-      *getSubtargetImpl()->getRegisterInfo(), getTargetTriple());
+  MRI = TheTarget.createMCRegInfo(getTargetTriple());
+  MII = TheTarget.createMCInstrInfo();
+  // FIXME: Having an MCSubtargetInfo on the target machine is a hack due
+  // to some backends having subtarget feature dependent module level
+  // code generation. This is similar to the hack in the AsmPrinter for
+  // module level assembly etc.
+  STI = TheTarget.createMCSubtargetInfo(getTargetTriple(), getTargetCPU(),
+                                        getTargetFeatureString());
+
+  MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*MRI, getTargetTriple());
   // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
   // and if the old one gets included then MCAsmInfo will be NULL and
   // we'll crash later.
@@ -68,17 +70,20 @@ void LLVMTargetMachine::initAsmInfo() {
   AsmInfo = TmpAsmInfo;
 }
 
-LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
-                                     StringRef CPU, StringRef FS,
-                                     TargetOptions Options,
+LLVMTargetMachine::LLVMTargetMachine(const Target &T,
+                                     StringRef DataLayoutString,
+                                     StringRef Triple, StringRef CPU,
+                                     StringRef FS, TargetOptions Options,
                                      Reloc::Model RM, CodeModel::Model CM,
                                      CodeGenOpt::Level OL)
-  : TargetMachine(T, Triple, CPU, FS, Options) {
+    : TargetMachine(T, DataLayoutString, Triple, CPU, FS, Options) {
   CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
 }
 
-void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-  PM.add(createBasicTargetTransformInfoPass(this));
+TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis([this](Function &F) {
+    return TargetTransformInfo(BasicTTIImpl(this, F));
+  });
 }
 
 /// addPassesToX helper drives creation and initialization of TargetPassConfig.
@@ -89,7 +94,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
                                           AnalysisID StopAfter) {
 
   // Add internal analysis passes from the target machine.
-  TM->addAnalysisPasses(PM);
+  PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
 
   // Targets may override createPassConfig to provide a target-specific
   // subclass.
@@ -112,8 +117,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
   // Install a MachineModuleInfo class, which is an immutable pass that holds
   // all the per-module stuff we're generating, including MCContext.
   MachineModuleInfo *MMI = new MachineModuleInfo(
-      *TM->getMCAsmInfo(), *TM->getSubtargetImpl()->getRegisterInfo(),
-      &TM->getSubtargetImpl()->getTargetLowering()->getObjFileLowering());
+      *TM->getMCAsmInfo(), *TM->getMCRegisterInfo(), TM->getObjFileLowering());
   PM.add(MMI);
 
   // Set up a MachineFunction for the rest of CodeGen to work on.
@@ -136,22 +140,9 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
   return &MMI->getContext();
 }
 
-bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
-                                            formatted_raw_ostream &Out,
-                                            CodeGenFileType FileType,
-                                            bool DisableVerify,
-                                            AnalysisID StartAfter,
-                                            AnalysisID StopAfter) {
-  // Passes to handle jumptable function annotations. These can't be handled at
-  // JIT time, so we don't add them directly to addPassesToGenerateCode.
-  PM.add(createJumpInstrTableInfoPass(
-      getSubtargetImpl()->getInstrInfo()->getJumpInstrTableEntryBound()));
-  PM.add(createJumpInstrTablesPass(Options.JTType));
-  if (Options.FCFI)
-    PM.add(createForwardControlFlowIntegrityPass(
-        Options.JTType, Options.CFIType, Options.CFIEnforcing,
-        Options.getCFIFuncName()));
-
+bool LLVMTargetMachine::addPassesToEmitFile(
+    PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
+    bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) {
   // Add common CodeGen passes.
   MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
                                                StartAfter, StopAfter);
@@ -171,27 +162,28 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   if (Options.MCOptions.MCSaveTempLabels)
     Context->setAllowTemporaryLabels(false);
 
-  const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+  const MCSubtargetInfo &STI = *getMCSubtargetInfo();
   const MCAsmInfo &MAI = *getMCAsmInfo();
-  const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo();
-  const MCInstrInfo &MII = *getSubtargetImpl()->getInstrInfo();
+  const MCRegisterInfo &MRI = *getMCRegisterInfo();
+  const MCInstrInfo &MII = *getMCInstrInfo();
+
   std::unique_ptr<MCStreamer> AsmStreamer;
 
   switch (FileType) {
   case CGFT_AssemblyFile: {
-    MCInstPrinter *InstPrinter =
-      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI,
-                                      MII, MRI, STI);
+    MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(
+        Triple(getTargetTriple()), MAI.getAssemblerDialect(), MAI, MII, MRI);
 
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = nullptr;
     if (Options.MCOptions.ShowMCEncoding)
-      MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context);
+      MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
 
     MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
                                                        TargetCPU);
+    auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
     MCStreamer *S = getTarget().createAsmStreamer(
-        *Context, Out, Options.MCOptions.AsmVerbose,
+        *Context, std::move(FOut), Options.MCOptions.AsmVerbose,
         Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB,
         Options.MCOptions.ShowMCInst);
     AsmStreamer.reset(S);
@@ -200,17 +192,19 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   case CGFT_ObjectFile: {
     // Create the code emitter for the target if it exists.  If not, .o file
     // emission fails.
-    MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI,
-                                                         *Context);
+    MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
     MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
                                                        TargetCPU);
     if (!MCE || !MAB)
       return true;
 
-    AsmStreamer.reset(
-        getTarget()
-            .createMCObjectStreamer(getTargetTriple(), *Context, *MAB, Out, MCE,
-                                    STI, Options.MCOptions.MCRelaxAll));
+    // Don't waste memory on names of temp labels.
+    Context->setUseNamesOnTempLabels(false);
+
+    Triple T(getTargetTriple());
+    AsmStreamer.reset(getTarget().createMCObjectStreamer(
+        T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+        /*DWARFMustBeAtTheEnd*/ true));
     break;
   }
   case CGFT_Null:
@@ -221,13 +215,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   }
 
   // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
-  FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+  FunctionPass *Printer =
+      getTarget().createAsmPrinter(*this, std::move(AsmStreamer));
   if (!Printer)
     return true;
 
-  // If successful, createAsmPrinter took ownership of AsmStreamer.
-  AsmStreamer.release();
-
   PM.add(Printer);
 
   return false;
@@ -238,9 +230,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 /// code is not supported. It fills the MCContext Ctx pointer which can be
 /// used to build custom MCStreamer.
 ///
-bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
-                                          MCContext *&Ctx,
-                                          raw_ostream &Out,
+bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
+                                          raw_pwrite_stream &Out,
                                           bool DisableVerify) {
   // Add common CodeGen passes.
   Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr);
@@ -252,29 +243,26 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
 
   // Create the code emitter for the target if it exists.  If not, .o file
   // emission fails.
-  const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo();
-  const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
-  MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(
-      *getSubtargetImpl()->getInstrInfo(), MRI, STI, *Ctx);
+  const MCRegisterInfo &MRI = *getMCRegisterInfo();
+  MCCodeEmitter *MCE =
+      getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
   MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
                                                      TargetCPU);
   if (!MCE || !MAB)
     return true;
 
-  std::unique_ptr<MCStreamer> AsmStreamer;
-  AsmStreamer.reset(getTarget()
-                        .createMCObjectStreamer(getTargetTriple(), *Ctx, *MAB,
-                                                Out, MCE, STI,
-                                                Options.MCOptions.MCRelaxAll));
+  Triple T(getTargetTriple());
+  const MCSubtargetInfo &STI = *getMCSubtargetInfo();
+  std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
+      T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+      /*DWARFMustBeAtTheEnd*/ true));
 
   // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
-  FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+  FunctionPass *Printer =
+      getTarget().createAsmPrinter(*this, std::move(AsmStreamer));
   if (!Printer)
     return true;
 
-  // If successful, createAsmPrinter took ownership of AsmStreamer.
-  AsmStreamer.release();
-
   PM.add(Printer);
 
   return false; // success!
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
index cdf505e..4321849 100644
--- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -138,16 +138,3 @@ void LatencyPriorityQueue::remove(SUnit *SU) {
     std::swap(*I, Queue.back());
   Queue.pop_back();
 }
-
-#ifdef NDEBUG
-void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
-#else
-void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
-  LatencyPriorityQueue q = *this;
-  while (!q.empty()) {
-    SUnit *su = q.pop();
-    dbgs() << "Height " << su->getHeight() << ": ";
-    su->dump(DAG);
-  }
-}
-#endif
diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
index b621e3b..be61a20 100644
--- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -59,11 +59,11 @@ void LexicalScopes::extractLexicalScopes(
   for (const auto &MBB : *MF) {
     const MachineInstr *RangeBeginMI = nullptr;
     const MachineInstr *PrevMI = nullptr;
-    DebugLoc PrevDL;
+    const DILocation *PrevDL = nullptr;
     for (const auto &MInsn : MBB) {
       // Check if instruction has valid location information.
-      const DebugLoc MIDL = MInsn.getDebugLoc();
-      if (MIDL.isUnknown()) {
+      const DILocation *MIDL = MInsn.getDebugLoc();
+      if (!MIDL) {
         PrevMI = &MInsn;
         continue;
       }
@@ -96,7 +96,7 @@ void LexicalScopes::extractLexicalScopes(
     }
 
     // Create last instruction range.
-    if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) {
+    if (RangeBeginMI && PrevMI && PrevDL) {
       InsnRange R(RangeBeginMI, PrevMI);
       MIRanges.push_back(R);
       MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
@@ -104,30 +104,19 @@ void LexicalScopes::extractLexicalScopes(
   }
 }
 
-LexicalScope *LexicalScopes::findInlinedScope(DebugLoc DL) {
-  MDNode *Scope = nullptr;
-  MDNode *IA = nullptr;
-  DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
-  auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA));
-  return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr;
-}
-
 /// findLexicalScope - Find lexical scope, either regular or inlined, for the
 /// given DebugLoc. Return NULL if not found.
-LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
-  MDNode *Scope = nullptr;
-  MDNode *IA = nullptr;
-  DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
+LexicalScope *LexicalScopes::findLexicalScope(const DILocation *DL) {
+  DILocalScope *Scope = DL->getScope();
   if (!Scope)
     return nullptr;
 
   // The scope that we were created with could have an extra file - which
   // isn't what we care about in this case.
-  DIDescriptor D = DIDescriptor(Scope);
-  if (D.isLexicalBlockFile())
-    Scope = DILexicalBlockFile(Scope).getScope();
+  if (auto *File = dyn_cast<DILexicalBlockFile>(Scope))
+    Scope = File->getScope();
 
-  if (IA) {
+  if (auto *IA = DL->getInlinedAt()) {
     auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA));
     return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr;
   }
@@ -136,47 +125,39 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
 
 /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
 /// not available then create new lexical scope.
-LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) {
-  if (DL.isUnknown())
-    return nullptr;
-  MDNode *Scope = nullptr;
-  MDNode *InlinedAt = nullptr;
-  DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext());
-
-  if (InlinedAt) {
+LexicalScope *LexicalScopes::getOrCreateLexicalScope(const DILocalScope *Scope,
+                                                     const DILocation *IA) {
+  if (IA) {
     // Create an abstract scope for inlined function.
     getOrCreateAbstractScope(Scope);
     // Create an inlined scope for inlined function.
-    return getOrCreateInlinedScope(Scope, InlinedAt);
+    return getOrCreateInlinedScope(Scope, IA);
   }
 
   return getOrCreateRegularScope(Scope);
 }
 
 /// getOrCreateRegularScope - Find or create a regular lexical scope.
-LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
-  DIDescriptor D = DIDescriptor(Scope);
-  if (D.isLexicalBlockFile()) {
-    Scope = DILexicalBlockFile(Scope).getScope();
-    D = DIDescriptor(Scope);
-  }
+LexicalScope *
+LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) {
+  if (auto *File = dyn_cast<DILexicalBlockFile>(Scope))
+    Scope = File->getScope();
 
   auto I = LexicalScopeMap.find(Scope);
   if (I != LexicalScopeMap.end())
     return &I->second;
 
+  // FIXME: Should the following dyn_cast be DILexicalBlock?
   LexicalScope *Parent = nullptr;
-  if (D.isLexicalBlock())
-    Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope));
-  // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012
-  // compatibility is no longer required.
-  I = LexicalScopeMap.emplace(std::piecewise_construct, std::make_tuple(Scope),
-                              std::make_tuple(Parent, DIDescriptor(Scope),
-                                              nullptr, false)).first;
+  if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope))
+    Parent = getOrCreateLexicalScope(Block->getScope());
+  I = LexicalScopeMap.emplace(std::piecewise_construct,
+                              std::forward_as_tuple(Scope),
+                              std::forward_as_tuple(Parent, Scope, nullptr,
+                                                    false)).first;
 
   if (!Parent) {
-    assert(DIDescriptor(Scope).isSubprogram());
-    assert(DISubprogram(Scope).describes(MF->getFunction()));
+    assert(cast<DISubprogram>(Scope)->describes(MF->getFunction()));
     assert(!CurrentFnLexicalScope);
     CurrentFnLexicalScope = &I->second;
   }
@@ -185,51 +166,49 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
 }
 
 /// getOrCreateInlinedScope - Find or create an inlined lexical scope.
-LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode,
-                                                     MDNode *InlinedAt) {
-  std::pair<const MDNode*, const MDNode*> P(ScopeNode, InlinedAt);
+LexicalScope *
+LexicalScopes::getOrCreateInlinedScope(const DILocalScope *Scope,
+                                       const DILocation *InlinedAt) {
+  std::pair<const DILocalScope *, const DILocation *> P(Scope, InlinedAt);
   auto I = InlinedLexicalScopeMap.find(P);
   if (I != InlinedLexicalScopeMap.end())
     return &I->second;
 
   LexicalScope *Parent;
-  DILexicalBlock Scope(ScopeNode);
-  if (Scope.isSubprogram())
-    Parent = getOrCreateLexicalScope(DebugLoc::getFromDILocation(InlinedAt));
+  if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope))
+    Parent = getOrCreateInlinedScope(Block->getScope(), InlinedAt);
   else
-    Parent = getOrCreateInlinedScope(Scope.getContext(), InlinedAt);
+    Parent = getOrCreateLexicalScope(InlinedAt);
 
-  // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012
-  // compatibility is no longer required.
   I = InlinedLexicalScopeMap.emplace(std::piecewise_construct,
-                                     std::make_tuple(P),
-                                     std::make_tuple(Parent, Scope, InlinedAt,
-                                                     false)).first;
+                                     std::forward_as_tuple(P),
+                                     std::forward_as_tuple(Parent, Scope,
+                                                           InlinedAt, false))
+          .first;
   return &I->second;
 }
 
 /// getOrCreateAbstractScope - Find or create an abstract lexical scope.
-LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) {
-  assert(N && "Invalid Scope encoding!");
+LexicalScope *
+LexicalScopes::getOrCreateAbstractScope(const DILocalScope *Scope) {
+  assert(Scope && "Invalid Scope encoding!");
 
-  DIDescriptor Scope(N);
-  if (Scope.isLexicalBlockFile())
-    Scope = DILexicalBlockFile(Scope).getScope();
+  if (auto *File = dyn_cast<DILexicalBlockFile>(Scope))
+    Scope = File->getScope();
   auto I = AbstractScopeMap.find(Scope);
   if (I != AbstractScopeMap.end())
     return &I->second;
 
+  // FIXME: Should the following isa be DILexicalBlock?
   LexicalScope *Parent = nullptr;
-  if (Scope.isLexicalBlock()) {
-    DILexicalBlock DB(Scope);
-    DIDescriptor ParentDesc = DB.getContext();
-    Parent = getOrCreateAbstractScope(ParentDesc);
-  }
+  if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope))
+    Parent = getOrCreateAbstractScope(Block->getScope());
+
   I = AbstractScopeMap.emplace(std::piecewise_construct,
                                std::forward_as_tuple(Scope),
                                std::forward_as_tuple(Parent, Scope,
                                                      nullptr, true)).first;
-  if (Scope.isSubprogram())
+  if (isa<DISubprogram>(Scope))
     AbstractScopesList.push_back(&I->second);
   return &I->second;
 }
@@ -290,7 +269,7 @@ void LexicalScopes::assignInstructionRanges(
 /// have machine instructions that belong to lexical scope identified by
 /// DebugLoc.
 void LexicalScopes::getMachineBasicBlocks(
-    DebugLoc DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
+    const DILocation *DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
   MBBs.clear();
   LexicalScope *Scope = getOrCreateLexicalScope(DL);
   if (!Scope)
@@ -313,7 +292,7 @@ void LexicalScopes::getMachineBasicBlocks(
 
 /// dominates - Return true if DebugLoc's lexical scope dominates at least one
 /// machine instruction's lexical scope in a given machine basic block.
-bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
+bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
   LexicalScope *Scope = getOrCreateLexicalScope(DL);
   if (!Scope)
     return false;
@@ -325,12 +304,10 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
   bool Result = false;
   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
        ++I) {
-    DebugLoc IDL = I->getDebugLoc();
-    if (IDL.isUnknown())
-      continue;
-    if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
-      if (Scope->dominates(IScope))
-        return true;
+    if (const DILocation *IDL = I->getDebugLoc())
+      if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
+        if (Scope->dominates(IScope))
+          return true;
   }
   return Result;
 }
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index dc936a3..1571551 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -36,6 +36,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -157,10 +158,10 @@ public:
   UserValue *getNext() const { return next; }
 
   /// match - Does this UserValue match the parameters?
-  bool match(const MDNode *Var, const MDNode *Expr, unsigned Offset,
-             bool indirect) const {
-    return Var == Variable && Expr == Expression && Offset == offset &&
-           indirect == IsIndirect;
+  bool match(const MDNode *Var, const MDNode *Expr, const DILocation *IA,
+             unsigned Offset, bool indirect) const {
+    return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA &&
+           Offset == offset && indirect == IsIndirect;
   }
 
   /// merge - Merge equivalence classes.
@@ -268,15 +269,9 @@ public:
   void emitDebugValues(VirtRegMap *VRM,
                        LiveIntervals &LIS, const TargetInstrInfo &TRI);
 
-  /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
-  /// variable may have more than one corresponding DBG_VALUE instructions.
-  /// Only first one needs DebugLoc to identify variable's lexical scope
-  /// in source file.
-  DebugLoc findDebugLoc();
-
   /// getDebugLoc - Return DebugLoc of this UserValue.
   DebugLoc getDebugLoc() { return dl;}
-  void print(raw_ostream&, const TargetMachine*);
+  void print(raw_ostream &, const TargetRegisterInfo *);
 };
 } // namespace
 
@@ -362,10 +357,47 @@ public:
 };
 } // namespace
 
-void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
-  DIVariable DV(Variable);
+static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
+                          const LLVMContext &Ctx) {
+  if (!DL)
+    return;
+
+  auto *Scope = cast<DIScope>(DL.getScope());
+  // Omit the directory, because it's likely to be long and uninteresting.
+  CommentOS << Scope->getFilename();
+  CommentOS << ':' << DL.getLine();
+  if (DL.getCol() != 0)
+    CommentOS << ':' << DL.getCol();
+
+  DebugLoc InlinedAtDL = DL.getInlinedAt();
+  if (!InlinedAtDL)
+    return;
+
+  CommentOS << " @[ ";
+  printDebugLoc(InlinedAtDL, CommentOS, Ctx);
+  CommentOS << " ]";
+}
+
+static void printExtendedName(raw_ostream &OS, const DILocalVariable *V,
+                              const DILocation *DL) {
+  const LLVMContext &Ctx = V->getContext();
+  StringRef Res = V->getName();
+  if (!Res.empty())
+    OS << Res << "," << V->getLine();
+  if (auto *InlinedAt = DL->getInlinedAt()) {
+    if (DebugLoc InlinedAtDL = InlinedAt) {
+      OS << " @[";
+      printDebugLoc(InlinedAtDL, OS, Ctx);
+      OS << "]";
+    }
+  }
+}
+
+void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+  auto *DV = cast<DILocalVariable>(Variable);
   OS << "!\"";
-  DV.printExtendedName(OS);
+  printExtendedName(OS, DV, dl);
+
   OS << "\"\t";
   if (offset)
     OS << '+' << offset;
@@ -378,7 +410,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
   }
   for (unsigned i = 0, e = locations.size(); i != e; ++i) {
     OS << " Loc" << i << '=';
-    locations[i].print(OS, TM);
+    locations[i].print(OS, TRI);
   }
   OS << '\n';
 }
@@ -386,7 +418,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
 void LDVImpl::print(raw_ostream &OS) {
   OS << "********** DEBUG VARIABLES **********\n";
   for (unsigned i = 0, e = userValues.size(); i != e; ++i)
-    userValues[i]->print(OS, &MF->getTarget());
+    userValues[i]->print(OS, TRI);
 }
 
 void UserValue::coalesceLocation(unsigned LocNo) {
@@ -432,7 +464,7 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr,
     UserValue *UV = Leader->getLeader();
     Leader = UV;
     for (; UV; UV = UV->getNext())
-      if (UV->match(Var, Expr, Offset, IsIndirect))
+      if (UV->match(Var, Expr, DL->getInlinedAt(), Offset, IsIndirect))
         return UV;
   }
 
@@ -941,11 +973,6 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
                               std::next(MachineBasicBlock::iterator(MI));
 }
 
-DebugLoc UserValue::findDebugLoc() {
-  DebugLoc D = dl;
-  dl = DebugLoc();
-  return D;
-}
 void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
                                  unsigned LocNo,
                                  LiveIntervals &LIS,
@@ -954,11 +981,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
   MachineOperand &Loc = locations[LocNo];
   ++NumInsertedDebugValues;
 
+  assert(cast<DILocalVariable>(Variable)
+             ->isValidLocationForIntrinsic(getDebugLoc()) &&
+         "Expected inlined-at fields to agree");
   if (Loc.isReg())
-    BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
+    BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
             IsIndirect, Loc.getReg(), offset, Variable, Expression);
   else
-    BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+    BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
         .addOperand(Loc)
         .addImm(offset)
         .addMetadata(Variable)
@@ -1004,7 +1034,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
     return;
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
   for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
-    DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
+    DEBUG(userValues[i]->print(dbgs(), TRI));
     userValues[i]->rewriteLocations(*VRM, *TRI);
     userValues[i]->emitDebugValues(VRM, *LIS, *TII);
   }
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
index 9748329..ac2d1a1 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -33,12 +33,13 @@ class VirtRegMap;
 
 class LiveDebugVariables : public MachineFunctionPass {
   void *pImpl;
-  DenseMap<const Function*, DISubprogram> FunctionDIs;
+  DenseMap<const Function *, DISubprogram *> FunctionDIs;
+
 public:
   static char ID; // Pass identification, replacement for typeid
 
   LiveDebugVariables();
-  ~LiveDebugVariables();
+  ~LiveDebugVariables() override;
 
   /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
   /// @param OldReg Old virtual register that is going away.
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index 9423edc..d75e441 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -32,6 +32,274 @@
 #include <algorithm>
 using namespace llvm;
 
+namespace {
+//===----------------------------------------------------------------------===//
+// Implementation of various methods necessary for calculation of live ranges.
+// The implementation of the methods abstracts from the concrete type of the
+// segment collection.
+//
+// Implementation of the class follows the Template design pattern. The base
+// class contains generic algorithms that call collection-specific methods,
+// which are provided in concrete subclasses. In order to avoid virtual calls
+// these methods are provided by means of C++ template instantiation.
+// The base class calls the methods of the subclass through method impl(),
+// which casts 'this' pointer to the type of the subclass.
+//
+//===----------------------------------------------------------------------===//
+
+template <typename ImplT, typename IteratorT, typename CollectionT>
+class CalcLiveRangeUtilBase {
+protected:
+  LiveRange *LR;
+
+protected:
+  CalcLiveRangeUtilBase(LiveRange *LR) : LR(LR) {}
+
+public:
+  typedef LiveRange::Segment Segment;
+  typedef IteratorT iterator;
+
+  VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator) {
+    assert(!Def.isDead() && "Cannot define a value at the dead slot");
+
+    iterator I = impl().find(Def);
+    if (I == segments().end()) {
+      VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+      impl().insertAtEnd(Segment(Def, Def.getDeadSlot(), VNI));
+      return VNI;
+    }
+
+    Segment *S = segmentAt(I);
+    if (SlotIndex::isSameInstr(Def, S->start)) {
+      assert(S->valno->def == S->start && "Inconsistent existing value def");
+
+      // It is possible to have both normal and early-clobber defs of the same
+      // register on an instruction. It doesn't make a lot of sense, but it is
+      // possible to specify in inline assembly.
+      //
+      // Just convert everything to early-clobber.
+      Def = std::min(Def, S->start);
+      if (Def != S->start)
+        S->start = S->valno->def = Def;
+      return S->valno;
+    }
+    assert(SlotIndex::isEarlierInstr(Def, S->start) && "Already live at def");
+    VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+    segments().insert(I, Segment(Def, Def.getDeadSlot(), VNI));
+    return VNI;
+  }
+
+  VNInfo *extendInBlock(SlotIndex StartIdx, SlotIndex Use) {
+    if (segments().empty())
+      return nullptr;
+    iterator I =
+        impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr));
+    if (I == segments().begin())
+      return nullptr;
+    --I;
+    if (I->end <= StartIdx)
+      return nullptr;
+    if (I->end < Use)
+      extendSegmentEndTo(I, Use);
+    return I->valno;
+  }
+
+  /// This method is used when we want to extend the segment specified
+  /// by I to end at the specified endpoint. To do this, we should
+  /// merge and eliminate all segments that this will overlap
+  /// with. The iterator is not invalidated.
+  void extendSegmentEndTo(iterator I, SlotIndex NewEnd) {
+    assert(I != segments().end() && "Not a valid segment!");
+    Segment *S = segmentAt(I);
+    VNInfo *ValNo = I->valno;
+
+    // Search for the first segment that we can't merge with.
+    iterator MergeTo = std::next(I);
+    for (; MergeTo != segments().end() && NewEnd >= MergeTo->end; ++MergeTo)
+      assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+
+    // If NewEnd was in the middle of a segment, make sure to get its endpoint.
+    S->end = std::max(NewEnd, std::prev(MergeTo)->end);
+
+    // If the newly formed segment now touches the segment after it and if they
+    // have the same value number, merge the two segments into one segment.
+    if (MergeTo != segments().end() && MergeTo->start <= I->end &&
+        MergeTo->valno == ValNo) {
+      S->end = MergeTo->end;
+      ++MergeTo;
+    }
+
+    // Erase any dead segments.
+    segments().erase(std::next(I), MergeTo);
+  }
+
+  /// This method is used when we want to extend the segment specified
+  /// by I to start at the specified endpoint.  To do this, we should
+  /// merge and eliminate all segments that this will overlap with.
+  iterator extendSegmentStartTo(iterator I, SlotIndex NewStart) {
+    assert(I != segments().end() && "Not a valid segment!");
+    Segment *S = segmentAt(I);
+    VNInfo *ValNo = I->valno;
+
+    // Search for the first segment that we can't merge with.
+    iterator MergeTo = I;
+    do {
+      if (MergeTo == segments().begin()) {
+        S->start = NewStart;
+        segments().erase(MergeTo, I);
+        return I;
+      }
+      assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+      --MergeTo;
+    } while (NewStart <= MergeTo->start);
+
+    // If we start in the middle of another segment, just delete a range and
+    // extend that segment.
+    if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
+      segmentAt(MergeTo)->end = S->end;
+    } else {
+      // Otherwise, extend the segment right after.
+      ++MergeTo;
+      Segment *MergeToSeg = segmentAt(MergeTo);
+      MergeToSeg->start = NewStart;
+      MergeToSeg->end = S->end;
+    }
+
+    segments().erase(std::next(MergeTo), std::next(I));
+    return MergeTo;
+  }
+
+  iterator addSegment(Segment S) {
+    SlotIndex Start = S.start, End = S.end;
+    iterator I = impl().findInsertPos(S);
+
+    // If the inserted segment starts in the middle or right at the end of
+    // another segment, just extend that segment to contain the segment of S.
+    if (I != segments().begin()) {
+      iterator B = std::prev(I);
+      if (S.valno == B->valno) {
+        if (B->start <= Start && B->end >= Start) {
+          extendSegmentEndTo(B, End);
+          return B;
+        }
+      } else {
+        // Check to make sure that we are not overlapping two live segments with
+        // different valno's.
+        assert(B->end <= Start &&
+               "Cannot overlap two segments with differing ValID's"
+               " (did you def the same reg twice in a MachineInstr?)");
+      }
+    }
+
+    // Otherwise, if this segment ends in the middle of, or right next
+    // to, another segment, merge it into that segment.
+    if (I != segments().end()) {
+      if (S.valno == I->valno) {
+        if (I->start <= End) {
+          I = extendSegmentStartTo(I, Start);
+
+          // If S is a complete superset of a segment, we may need to grow its
+          // endpoint as well.
+          if (End > I->end)
+            extendSegmentEndTo(I, End);
+          return I;
+        }
+      } else {
+        // Check to make sure that we are not overlapping two live segments with
+        // different valno's.
+        assert(I->start >= End &&
+               "Cannot overlap two segments with differing ValID's");
+      }
+    }
+
+    // Otherwise, this is just a new segment that doesn't interact with
+    // anything.
+    // Insert it.
+    return segments().insert(I, S);
+  }
+
+private:
+  ImplT &impl() { return *static_cast<ImplT *>(this); }
+
+  CollectionT &segments() { return impl().segmentsColl(); }
+
+  Segment *segmentAt(iterator I) { return const_cast<Segment *>(&(*I)); }
+};
+
+//===----------------------------------------------------------------------===//
+//   Instantiation of the methods for calculation of live ranges
+//   based on a segment vector.
+//===----------------------------------------------------------------------===//
+
+class CalcLiveRangeUtilVector;
+typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator,
+                              LiveRange::Segments> CalcLiveRangeUtilVectorBase;
+
+class CalcLiveRangeUtilVector : public CalcLiveRangeUtilVectorBase {
+public:
+  CalcLiveRangeUtilVector(LiveRange *LR) : CalcLiveRangeUtilVectorBase(LR) {}
+
+private:
+  friend CalcLiveRangeUtilVectorBase;
+
+  LiveRange::Segments &segmentsColl() { return LR->segments; }
+
+  void insertAtEnd(const Segment &S) { LR->segments.push_back(S); }
+
+  iterator find(SlotIndex Pos) { return LR->find(Pos); }
+
+  iterator findInsertPos(Segment S) {
+    return std::upper_bound(LR->begin(), LR->end(), S.start);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//   Instantiation of the methods for calculation of live ranges
+//   based on a segment set.
+//===----------------------------------------------------------------------===//
+
+class CalcLiveRangeUtilSet;
+typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilSet,
+                              LiveRange::SegmentSet::iterator,
+                              LiveRange::SegmentSet> CalcLiveRangeUtilSetBase;
+
+class CalcLiveRangeUtilSet : public CalcLiveRangeUtilSetBase {
+public:
+  CalcLiveRangeUtilSet(LiveRange *LR) : CalcLiveRangeUtilSetBase(LR) {}
+
+private:
+  friend CalcLiveRangeUtilSetBase;
+
+  LiveRange::SegmentSet &segmentsColl() { return *LR->segmentSet; }
+
+  void insertAtEnd(const Segment &S) {
+    LR->segmentSet->insert(LR->segmentSet->end(), S);
+  }
+
+  iterator find(SlotIndex Pos) {
+    iterator I =
+        LR->segmentSet->upper_bound(Segment(Pos, Pos.getNextSlot(), nullptr));
+    if (I == LR->segmentSet->begin())
+      return I;
+    iterator PrevI = std::prev(I);
+    if (Pos < (*PrevI).end)
+      return PrevI;
+    return I;
+  }
+
+  iterator findInsertPos(Segment S) {
+    iterator I = LR->segmentSet->upper_bound(S);
+    if (I != LR->segmentSet->end() && !(S.start < *I))
+      ++I;
+    return I;
+  }
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+//   LiveRange methods
+//===----------------------------------------------------------------------===//
+
 LiveRange::iterator LiveRange::find(SlotIndex Pos) {
   // This algorithm is basically std::upper_bound.
   // Unfortunately, std::upper_bound cannot be used with mixed types until we
@@ -52,30 +320,11 @@ LiveRange::iterator LiveRange::find(SlotIndex Pos) {
 
 VNInfo *LiveRange::createDeadDef(SlotIndex Def,
                                   VNInfo::Allocator &VNInfoAllocator) {
-  assert(!Def.isDead() && "Cannot define a value at the dead slot");
-  iterator I = find(Def);
-  if (I == end()) {
-    VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
-    segments.push_back(Segment(Def, Def.getDeadSlot(), VNI));
-    return VNI;
-  }
-  if (SlotIndex::isSameInstr(Def, I->start)) {
-    assert(I->valno->def == I->start && "Inconsistent existing value def");
-
-    // It is possible to have both normal and early-clobber defs of the same
-    // register on an instruction. It doesn't make a lot of sense, but it is
-    // possible to specify in inline assembly.
-    //
-    // Just convert everything to early-clobber.
-    Def = std::min(Def, I->start);
-    if (Def != I->start)
-      I->start = I->valno->def = Def;
-    return I->valno;
-  }
-  assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def");
-  VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
-  segments.insert(I, Segment(Def, Def.getDeadSlot(), VNI));
-  return VNI;
+  // Use the segment set, if it is available.
+  if (segmentSet != nullptr)
+    return CalcLiveRangeUtilSet(this).createDeadDef(Def, VNInfoAllocator);
+  // Otherwise use the segment vector.
+  return CalcLiveRangeUtilVector(this).createDeadDef(Def, VNInfoAllocator);
 }
 
 // overlaps - Return true if the intersection of the two live ranges is
@@ -236,68 +485,18 @@ void LiveRange::RenumberValues() {
   }
 }
 
-/// This method is used when we want to extend the segment specified by I to end
-/// at the specified endpoint.  To do this, we should merge and eliminate all
-/// segments that this will overlap with.  The iterator is not invalidated.
-void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) {
-  assert(I != end() && "Not a valid segment!");
-  VNInfo *ValNo = I->valno;
-
-  // Search for the first segment that we can't merge with.
-  iterator MergeTo = std::next(I);
-  for (; MergeTo != end() && NewEnd >= MergeTo->end; ++MergeTo) {
-    assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
-  }
-
-  // If NewEnd was in the middle of a segment, make sure to get its endpoint.
-  I->end = std::max(NewEnd, std::prev(MergeTo)->end);
-
-  // If the newly formed segment now touches the segment after it and if they
-  // have the same value number, merge the two segments into one segment.
-  if (MergeTo != end() && MergeTo->start <= I->end &&
-      MergeTo->valno == ValNo) {
-    I->end = MergeTo->end;
-    ++MergeTo;
-  }
-
-  // Erase any dead segments.
-  segments.erase(std::next(I), MergeTo);
+void LiveRange::addSegmentToSet(Segment S) {
+  CalcLiveRangeUtilSet(this).addSegment(S);
 }
 
-
-/// This method is used when we want to extend the segment specified by I to
-/// start at the specified endpoint.  To do this, we should merge and eliminate
-/// all segments that this will overlap with.
-LiveRange::iterator
-LiveRange::extendSegmentStartTo(iterator I, SlotIndex NewStart) {
-  assert(I != end() && "Not a valid segment!");
-  VNInfo *ValNo = I->valno;
-
-  // Search for the first segment that we can't merge with.
-  iterator MergeTo = I;
-  do {
-    if (MergeTo == begin()) {
-      I->start = NewStart;
-      segments.erase(MergeTo, I);
-      return I;
-    }
-    assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
-    --MergeTo;
-  } while (NewStart <= MergeTo->start);
-
-  // If we start in the middle of another segment, just delete a range and
-  // extend that segment.
-  if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
-    MergeTo->end = I->end;
-  } else {
-    // Otherwise, extend the segment right after.
-    ++MergeTo;
-    MergeTo->start = NewStart;
-    MergeTo->end = I->end;
+LiveRange::iterator LiveRange::addSegment(Segment S) {
+  // Use the segment set, if it is available.
+  if (segmentSet != nullptr) {
+    addSegmentToSet(S);
+    return end();
   }
-
-  segments.erase(std::next(MergeTo), std::next(I));
-  return MergeTo;
+  // Otherwise use the segment vector.
+  return CalcLiveRangeUtilVector(this).addSegment(S);
 }
 
 void LiveRange::append(const Segment S) {
@@ -306,69 +505,15 @@ void LiveRange::append(const Segment S) {
   segments.push_back(S);
 }
 
-LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) {
-  SlotIndex Start = S.start, End = S.end;
-  iterator it = std::upper_bound(From, end(), Start);
-
-  // If the inserted segment starts in the middle or right at the end of
-  // another segment, just extend that segment to contain the segment of S.
-  if (it != begin()) {
-    iterator B = std::prev(it);
-    if (S.valno == B->valno) {
-      if (B->start <= Start && B->end >= Start) {
-        extendSegmentEndTo(B, End);
-        return B;
-      }
-    } else {
-      // Check to make sure that we are not overlapping two live segments with
-      // different valno's.
-      assert(B->end <= Start &&
-             "Cannot overlap two segments with differing ValID's"
-             " (did you def the same reg twice in a MachineInstr?)");
-    }
-  }
-
-  // Otherwise, if this segment ends in the middle of, or right next to, another
-  // segment, merge it into that segment.
-  if (it != end()) {
-    if (S.valno == it->valno) {
-      if (it->start <= End) {
-        it = extendSegmentStartTo(it, Start);
-
-        // If S is a complete superset of a segment, we may need to grow its
-        // endpoint as well.
-        if (End > it->end)
-          extendSegmentEndTo(it, End);
-        return it;
-      }
-    } else {
-      // Check to make sure that we are not overlapping two live segments with
-      // different valno's.
-      assert(it->start >= End &&
-             "Cannot overlap two segments with differing ValID's");
-    }
-  }
-
-  // Otherwise, this is just a new segment that doesn't interact with anything.
-  // Insert it.
-  return segments.insert(it, S);
-}
-
 /// extendInBlock - If this range is live before Kill in the basic
 /// block that starts at StartIdx, extend it to be live up to Kill and return
 /// the value. If there is no live range before Kill, return NULL.
 VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
-  if (empty())
-    return nullptr;
-  iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot());
-  if (I == begin())
-    return nullptr;
-  --I;
-  if (I->end <= StartIdx)
-    return nullptr;
-  if (I->end < Kill)
-    extendSegmentEndTo(I, Kill);
-  return I->valno;
+  // Use the segment set, if it is available.
+  if (segmentSet != nullptr)
+    return CalcLiveRangeUtilSet(this).extendInBlock(StartIdx, Kill);
+  // Otherwise use the segment vector.
+  return CalcLiveRangeUtilVector(this).extendInBlock(StartIdx, Kill);
 }
 
 /// Remove the specified segment from this range.  Note that the segment must
@@ -424,13 +569,9 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
 /// Also remove the value# from value# list.
 void LiveRange::removeValNo(VNInfo *ValNo) {
   if (empty()) return;
-  iterator I = end();
-  iterator E = begin();
-  do {
-    --I;
-    if (I->valno == ValNo)
-      segments.erase(I);
-  } while (I != E);
+  segments.erase(std::remove_if(begin(), end(), [ValNo](const Segment &S) {
+    return S.valno == ValNo;
+  }), end());
   // Now that ValNo is dead, remove it.
   markValNoForDeletion(ValNo);
 }
@@ -598,6 +739,21 @@ VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
   return V2;
 }
 
+void LiveRange::flushSegmentSet() {
+  assert(segmentSet != nullptr && "segment set must have been created");
+  assert(
+      segments.empty() &&
+      "segment set can be used only initially before switching to the array");
+  segments.append(segmentSet->begin(), segmentSet->end());
+  segmentSet = nullptr;
+  verify();
+}
+
+void LiveInterval::freeSubRange(SubRange *S) {
+  S->~SubRange();
+  // Memory was allocated with BumpPtr allocator and is not freed here.
+}
+
 void LiveInterval::removeEmptySubRanges() {
   SubRange **NextPtr = &SubRanges;
   SubRange *I = *NextPtr;
@@ -609,12 +765,22 @@ void LiveInterval::removeEmptySubRanges() {
     }
     // Skip empty subranges until we find the first nonempty one.
     do {
-      I = I->Next;
+      SubRange *Next = I->Next;
+      freeSubRange(I);
+      I = Next;
     } while (I != nullptr && I->empty());
     *NextPtr = I;
   }
 }
 
+void LiveInterval::clearSubRanges() {
+  for (SubRange *I = SubRanges, *Next; I != nullptr; I = Next) {
+    Next = I->Next;
+    freeSubRange(I);
+  }
+  SubRanges = nullptr;
+}
+
 /// Helper function for constructMainRangeFromSubranges(): Search the CFG
 /// backwards until we find a place covered by a LiveRange segment that actually
 /// has a valno set.
@@ -650,23 +816,45 @@ static VNInfo *searchForVNI(const SlotIndexes &Indexes, LiveRange &LR,
 
 static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) {
   SmallPtrSet<const MachineBasicBlock*, 5> Visited;
-  for (LiveRange::Segment &S : LI.segments) {
-    if (S.valno != nullptr)
-      continue;
-    // This can only happen at the begin of a basic block.
-    assert(S.start.isBlock() && "valno should only be missing at block begin");
-
-    Visited.clear();
-    const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start);
-    for (const MachineBasicBlock *Pred : MBB->predecessors()) {
-      VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited);
-      if (VNI != nullptr) {
-        S.valno = VNI;
-        break;
+
+  LiveRange::iterator OutIt;
+  VNInfo *PrevValNo = nullptr;
+  for (LiveRange::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    LiveRange::Segment &S = *I;
+    // Determine final VNI if necessary.
+    if (S.valno == nullptr) {
+      // This can only happen at the begin of a basic block.
+      assert(S.start.isBlock() && "valno should only be missing at block begin");
+
+      Visited.clear();
+      const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start);
+      for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+        VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited);
+        if (VNI != nullptr) {
+          S.valno = VNI;
+          break;
+        }
       }
+      assert(S.valno != nullptr && "could not determine valno");
+    }
+    // Merge with previous segment if it has the same VNI.
+    if (PrevValNo == S.valno && OutIt->end == S.start) {
+      OutIt->end = S.end;
+    } else {
+      // Didn't merge. Move OutIt to next segment.
+      if (PrevValNo == nullptr)
+        OutIt = LI.begin();
+      else
+        ++OutIt;
+
+      if (OutIt != I)
+        *OutIt = *I;
+      PrevValNo = S.valno;
     }
-    assert(S.valno != nullptr && "could not determine valno");
   }
+  // If we merged some segments chop off the end.
+  ++OutIt;
+  LI.segments.erase(OutIt, LI.end());
 }
 
 void LiveInterval::constructMainRangeFromSubranges(
@@ -789,6 +977,12 @@ void LiveInterval::constructMainRangeFromSubranges(
             NeedVNIFixup = true;
         }
 
+        // In rare cases we can produce adjacent segments with the same value
+        // number (if they come from different subranges, but happen to have
+        // the same defining instruction). VNIFixup will fix those cases.
+        if (!empty() && segments.back().end == Pos &&
+            segments.back().valno == VNI)
+          NeedVNIFixup = true;
         CurrentSegment.start = Pos;
         CurrentSegment.valno = VNI;
         ConstructingSegment = true;
@@ -997,6 +1191,13 @@ static inline bool coalescable(const LiveRange::Segment &A,
 void LiveRangeUpdater::add(LiveRange::Segment Seg) {
   assert(LR && "Cannot add to a null destination");
 
+  // Fall back to the regular add method if the live range
+  // is using the segment set instead of the segment vector.
+  if (LR->segmentSet != nullptr) {
+    LR->addSegmentToSet(Seg);
+    return;
+  }
+
   // Flush the state if Start moves backwards.
   if (!LastStart.isValid() || LastStart > Seg.start) {
     if (isDirty())
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index 56f38b6..adca4cc 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -67,6 +67,13 @@ static cl::opt<bool> EnableSubRegLiveness(
   "enable-subreg-liveness", cl::Hidden, cl::init(true),
   cl::desc("Enable subregister liveness tracking."));
 
+namespace llvm {
+cl::opt<bool> UseSegmentSetForPhysRegs(
+    "use-segment-set-for-physregs", cl::Hidden, cl::init(true),
+    cl::desc(
+        "Use segment set for the computation of the live ranges of physregs."));
+}
+
 void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addRequired<AliasAnalysis>();
@@ -192,7 +199,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
   assert(LRCalc && "LRCalc not initialized.");
   assert(LI.empty() && "Should only compute empty intervals.");
   LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
-  LRCalc->calculate(LI);
+  LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
   computeDeadValues(LI, nullptr);
 }
 
@@ -268,6 +275,10 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
         LRCalc->extendToUses(LR, Reg);
     }
   }
+
+  // Flush the segment set to the segment vector.
+  if (UseSegmentSetForPhysRegs)
+    LR.flushSegmentSet();
 }
 
 
@@ -300,7 +311,8 @@ void LiveIntervals::computeLiveInRegUnits() {
         unsigned Unit = *Units;
         LiveRange *LR = RegUnitRanges[Unit];
         if (!LR) {
-          LR = RegUnitRanges[Unit] = new LiveRange();
+          // Use segment set to speed-up initial computation of the live range.
+          LR = RegUnitRanges[Unit] = new LiveRange(UseSegmentSetForPhysRegs);
           NewRanges.push_back(Unit);
         }
         VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator());
@@ -448,23 +460,34 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
   for (auto VNI : LI.valnos) {
     if (VNI->isUnused())
       continue;
-    LiveRange::iterator I = LI.FindSegmentContaining(VNI->def);
+    SlotIndex Def = VNI->def;
+    LiveRange::iterator I = LI.FindSegmentContaining(Def);
     assert(I != LI.end() && "Missing segment for VNI");
-    if (I->end != VNI->def.getDeadSlot())
+
+    // Is the register live before? Otherwise we may have to add a read-undef
+    // flag for subregister defs.
+    if (MRI->shouldTrackSubRegLiveness(LI.reg)) {
+      if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
+        MachineInstr *MI = getInstructionFromIndex(Def);
+        MI->addRegisterDefReadUndef(LI.reg);
+      }
+    }
+
+    if (I->end != Def.getDeadSlot())
       continue;
     if (VNI->isPHIDef()) {
       // This is a dead PHI. Remove it.
       VNI->markUnused();
       LI.removeSegment(I);
-      DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
+      DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n");
       PHIRemoved = true;
     } else {
       // This is a dead def. Make sure the instruction knows.
-      MachineInstr *MI = getInstructionFromIndex(VNI->def);
+      MachineInstr *MI = getInstructionFromIndex(Def);
       assert(MI && "No instruction defining live value");
       MI->addRegisterDead(LI.reg, TRI);
       if (dead && MI->allDefsAreDead()) {
-        DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI);
+        DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
         dead->push_back(MI);
       }
     }
@@ -609,15 +632,6 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
   }
 }
 
-void LiveIntervals::pruneValue(LiveInterval &LI, SlotIndex Kill,
-                               SmallVectorImpl<SlotIndex> *EndPoints) {
-  pruneValue((LiveRange&)LI, Kill, EndPoints);
-
-  for (LiveInterval::SubRange &SR : LI.subranges()) {
-    pruneValue(SR, Kill, nullptr);
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // Register allocator hooks.
 //
@@ -648,7 +662,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
       RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
     }
 
-    if (MRI->tracksSubRegLiveness()) {
+    if (MRI->subRegLivenessEnabled()) {
       SRs.clear();
       for (const LiveInterval::SubRange &SR : LI.subranges()) {
         SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end)));
@@ -686,7 +700,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
         goto CancelKill;
       }
 
-      if (MRI->tracksSubRegLiveness()) {
+      if (MRI->subRegLivenessEnabled()) {
         // When reading a partial undefined value we must not add a kill flag.
         // The regalloc might have used the undef lane for something else.
         // Example:
@@ -1376,3 +1390,25 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
     repairOldRegInRange(Begin, End, endIdx, LI, Reg);
   }
 }
+
+void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) {
+  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+    if (LiveRange *LR = getCachedRegUnit(*Units))
+      if (VNInfo *VNI = LR->getVNInfoAt(Pos))
+        LR->removeValNo(VNI);
+  }
+}
+
+void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
+  VNInfo *VNI = LI.getVNInfoAt(Pos);
+  if (VNI == nullptr)
+    return;
+  LI.removeValNo(VNI);
+
+  // Also remove the value in subranges.
+  for (LiveInterval::SubRange &S : LI.subranges()) {
+    if (VNInfo *SVNI = S.getVNInfoAt(Pos))
+      S.removeValNo(SVNI);
+  }
+  LI.removeEmptySubRanges();
+}
diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
index 7efd941..eef7643 100644
--- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -16,17 +16,23 @@
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 
 /// \brief Remove all registers from the set that get clobbered by the register
 /// mask.
-void LivePhysRegs::removeRegsInMask(const MachineOperand &MO) {
+/// The clobbers set will be the list of live registers clobbered
+/// by the regmask.
+void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
+        SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers) {
   SparseSet<unsigned>::iterator LRI = LiveRegs.begin();
   while (LRI != LiveRegs.end()) {
-    if (MO.clobbersPhysReg(*LRI))
+    if (MO.clobbersPhysReg(*LRI)) {
+      if (Clobbers)
+        Clobbers->push_back(std::make_pair(*LRI, &MO));
       LRI = LiveRegs.erase(LRI);
-    else
+    } else
       ++LRI;
   }
 }
@@ -44,7 +50,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
         continue;
       removeReg(Reg);
     } else if (O->isRegMask())
-      removeRegsInMask(*O);
+      removeRegsInMask(*O, nullptr);
   }
 
   // Add uses to the set.
@@ -62,8 +68,8 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
 /// killed-uses, add defs. This is the not recommended way, because it depends
 /// on accurate kill flags. If possible use stepBackwards() instead of this
 /// function.
-void LivePhysRegs::stepForward(const MachineInstr &MI) {
-  SmallVector<unsigned, 4> Defs;
+void LivePhysRegs::stepForward(const MachineInstr &MI,
+        SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) {
   // Remove killed registers from the set.
   for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
     if (O->isReg()) {
@@ -71,8 +77,9 @@ void LivePhysRegs::stepForward(const MachineInstr &MI) {
       if (Reg == 0)
         continue;
       if (O->isDef()) {
-        if (!O->isDead())
-          Defs.push_back(Reg);
+        // Note, dead defs are still recorded.  The caller should decide how to
+        // handle them.
+        Clobbers.push_back(std::make_pair(Reg, &*O));
       } else {
         if (!O->isKill())
           continue;
@@ -80,12 +87,16 @@ void LivePhysRegs::stepForward(const MachineInstr &MI) {
         removeReg(Reg);
       }
     } else if (O->isRegMask())
-      removeRegsInMask(*O);
+      removeRegsInMask(*O, &Clobbers);
   }
 
   // Add defs to the set.
-  for (unsigned i = 0, e = Defs.size(); i != e; ++i)
-    addReg(Defs[i]);
+  for (auto Reg : Clobbers) {
+    // Skip dead defs.  They shouldn't be added to the set.
+    if (Reg.second->isReg() && Reg.second->isDead())
+      continue;
+    addReg(Reg.first);
+  }
 }
 
 /// Prin the currently live registers to OS.
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index 1d46161..bb2877a 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -43,17 +43,14 @@ void LiveRangeCalc::reset(const MachineFunction *mf,
 static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc,
                           LiveRange &LR, const MachineOperand &MO) {
     const MachineInstr *MI = MO.getParent();
-    SlotIndex DefIdx;
-    if (MI->isPHI())
-      DefIdx = Indexes.getMBBStartIdx(MI->getParent());
-    else
-      DefIdx = Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber());
+    SlotIndex DefIdx =
+        Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber());
 
     // Create the def in LR. This may find an existing def.
     LR.createDeadDef(DefIdx, Alloc);
 }
 
-void LiveRangeCalc::calculate(LiveInterval &LI) {
+void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
   assert(MRI && Indexes && "call reset() first");
 
   // Step 1: Create minimal live segments for every definition of Reg.
@@ -66,7 +63,7 @@ void LiveRangeCalc::calculate(LiveInterval &LI) {
       continue;
 
     unsigned SubReg = MO.getSubReg();
-    if (LI.hasSubRanges() || (SubReg != 0 && MRI->tracksSubRegLiveness())) {
+    if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
       unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
                                   : MRI->getMaxLaneMaskForVReg(Reg);
 
@@ -222,23 +219,23 @@ void LiveRangeCalc::updateFromLiveIns() {
 }
 
 
-void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg) {
-  assert(Kill.isValid() && "Invalid SlotIndex");
+void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) {
+  assert(Use.isValid() && "Invalid SlotIndex");
   assert(Indexes && "Missing SlotIndexes");
   assert(DomTree && "Missing dominator tree");
 
-  MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot());
-  assert(KillMBB && "No MBB at Kill");
+  MachineBasicBlock *UseMBB = Indexes->getMBBFromIndex(Use.getPrevSlot());
+  assert(UseMBB && "No MBB at Use");
 
   // Is there a def in the same MBB we can extend?
-  if (LR.extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill))
+  if (LR.extendInBlock(Indexes->getMBBStartIdx(UseMBB), Use))
     return;
 
-  // Find the single reaching def, or determine if Kill is jointly dominated by
+  // Find the single reaching def, or determine if Use is jointly dominated by
   // multiple values, and we may need to create even more phi-defs to preserve
   // VNInfo SSA form.  Perform a search for all predecessor blocks where we
   // know the dominating VNInfo.
-  if (findReachingDefs(LR, *KillMBB, Kill, PhysReg))
+  if (findReachingDefs(LR, *UseMBB, Use, PhysReg))
     return;
 
   // When there were multiple different values, we may need new PHIs.
@@ -257,12 +254,12 @@ void LiveRangeCalc::calculateValues() {
 }
 
 
-bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
-                                     SlotIndex Kill, unsigned PhysReg) {
-  unsigned KillMBBNum = KillMBB.getNumber();
+bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
+                                     SlotIndex Use, unsigned PhysReg) {
+  unsigned UseMBBNum = UseMBB.getNumber();
 
   // Block numbers where LR should be live-in.
-  SmallVector<unsigned, 16> WorkList(1, KillMBBNum);
+  SmallVector<unsigned, 16> WorkList(1, UseMBBNum);
 
   // Remember if we have seen more than one value.
   bool UniqueVNI = true;
@@ -275,13 +272,19 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
 #ifndef NDEBUG
     if (MBB->pred_empty()) {
       MBB->getParent()->verify();
+      errs() << "Use of " << PrintReg(PhysReg)
+             << " does not have a corresponding definition on every path:\n";
+      const MachineInstr *MI = Indexes->getInstructionFromIndex(Use);
+      if (MI != nullptr)
+        errs() << Use << " " << *MI;
       llvm_unreachable("Use not jointly dominated by defs.");
     }
 
     if (TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
         !MBB->isLiveIn(PhysReg)) {
       MBB->getParent()->verify();
-      errs() << "The register needs to be live in to BB#" << MBB->getNumber()
+      errs() << "The register " << PrintReg(PhysReg)
+             << " needs to be live in to BB#" << MBB->getNumber()
              << ", but is missing from the live-in list.\n";
       llvm_unreachable("Invalid global physical register");
     }
@@ -316,11 +319,11 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
        }
 
        // No, we need a live-in value for Pred as well
-       if (Pred != &KillMBB)
+       if (Pred != &UseMBB)
           WorkList.push_back(Pred->getNumber());
        else
-          // Loopback to KillMBB, so value is really live through.
-         Kill = SlotIndex();
+          // Loopback to UseMBB, so value is really live through.
+         Use = SlotIndex();
     }
   }
 
@@ -338,9 +341,9 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
          E = WorkList.end(); I != E; ++I) {
        SlotIndex Start, End;
        std::tie(Start, End) = Indexes->getMBBRange(*I);
-       // Trim the live range in KillMBB.
-       if (*I == KillMBBNum && Kill.isValid())
-         End = Kill;
+       // Trim the live range in UseMBB.
+       if (*I == UseMBBNum && Use.isValid())
+         End = Use;
        else
          Map[MF->getBlockNumbered(*I)] = LiveOutPair(TheVNI, nullptr);
        Updater.add(Start, End, TheVNI);
@@ -355,8 +358,8 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
        I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
     MachineBasicBlock *MBB = MF->getBlockNumbered(*I);
     addLiveInBlock(LR, DomTree->getNode(MBB));
-    if (MBB == &KillMBB)
-      LiveIn.back().Kill = Kill;
+    if (MBB == &UseMBB)
+      LiveIn.back().Kill = Use;
   }
 
   return false;
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
index 1b9099b..34d9953 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
@@ -101,17 +101,17 @@ class LiveRangeCalc {
   /// used to add entries directly.
   SmallVector<LiveInBlock, 16> LiveIn;
 
-  /// Assuming that LI is live-in to KillMBB and killed at Kill, find the set
-  /// of defs that can reach it.
+  /// Assuming that @p LR is live-in to @p UseMBB, find the set of defs that can
+  /// reach it.
   ///
-  /// If only one def can reach Kill, all paths from the def to kill are added
-  /// to LI, and the function returns true.
+  /// If only one def can reach @p UseMBB, all paths from the def to @p UseMBB
+  /// are added to @p LR, and the function returns true.
   ///
-  /// If multiple values can reach Kill, the blocks that need LI to be live in
-  /// are added to the LiveIn array, and the function returns false.
+  /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be
+  /// live in are added to the LiveIn array, and the function returns false.
   ///
   /// PhysReg, when set, is used to verify live-in lists on basic blocks.
-  bool findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
+  bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
                         SlotIndex Kill, unsigned PhysReg);
 
   /// updateSSA - Compute the values that will be live in to all requested
@@ -162,15 +162,14 @@ public:
   // Modify existing live ranges.
   //
 
-  /// extend - Extend the live range of LI to reach Kill.
+  /// Extend the live range of @p LR to reach @p Use.
   ///
-  /// The existing values in LI must be live so they jointly dominate Kill.  If
-  /// Kill is not dominated by a single existing value, PHI-defs are inserted
-  /// as required to preserve SSA form.  If Kill is known to be dominated by a
-  /// single existing value, Alloc may be null.
+  /// The existing values in @p LR must be live so they jointly dominate @p Use.
+  /// If @p Use is not dominated by a single existing value, PHI-defs are
+  /// inserted as required to preserve SSA form.
   ///
   /// PhysReg, when set, is used to verify live-in lists on basic blocks.
-  void extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg = 0);
+  void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg = 0);
 
   /// createDeadDefs - Create a dead def in LI for every def operand of Reg.
   /// Each instruction defining Reg gets a new VNInfo with a corresponding
@@ -188,7 +187,7 @@ public:
   /// Calculates liveness for the register specified in live interval @p LI.
   /// Creates subregister live ranges as needed if subreg liveness tracking is
   /// enabled.
-  void calculate(LiveInterval &LI);
+  void calculate(LiveInterval &LI, bool TrackSubRegs);
 
   //===--------------------------------------------------------------------===//
   // Low-level interface.
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 47da205..27c57d5 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -196,7 +196,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
   // We also need to make sure it is safe to move the load.
   // Assume there are stores between DefMI and UseMI.
   bool SawStore = true;
-  if (!DefMI->isSafeToMove(&TII, nullptr, SawStore))
+  if (!DefMI->isSafeToMove(nullptr, SawStore))
     return false;
 
   DEBUG(dbgs() << "Try to fold single def: " << *DefMI
@@ -235,7 +235,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
 
   // Use the same criteria as DeadMachineInstructionElim.
   bool SawStore = false;
-  if (!MI->isSafeToMove(&TII, nullptr, SawStore)) {
+  if (!MI->isSafeToMove(nullptr, SawStore)) {
     DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
     return;
   }
@@ -256,15 +256,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
       // Check if MI reads any unreserved physregs.
       if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
         ReadsPhysRegs = true;
-      else if (MOI->isDef()) {
-        for (MCRegUnitIterator Units(Reg, MRI.getTargetRegisterInfo());
-             Units.isValid(); ++Units) {
-          if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) {
-            if (VNInfo *VNI = LR->getVNInfoAt(Idx))
-              LR->removeValNo(VNI);
-          }
-        }
-      }
+      else if (MOI->isDef())
+        LIS.removePhysRegDefAt(Reg, Idx);
       continue;
     }
     LiveInterval &LI = LIS.getInterval(Reg);
@@ -280,21 +273,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
 
     // Remove defined value.
     if (MOI->isDef()) {
-      if (VNInfo *VNI = LI.getVNInfoAt(Idx)) {
-        if (TheDelegate)
-          TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
-        LI.removeValNo(VNI);
-        if (LI.empty()) {
-          RegsToErase.push_back(Reg);
-        } else {
-          // Also remove the value in subranges.
-          for (LiveInterval::SubRange &S : LI.subranges()) {
-            if (VNInfo *SVNI = S.getVNInfoAt(Idx))
-              S.removeValNo(SVNI);
-          }
-          LI.removeEmptySubRanges();
-        }
-      }
+      if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr)
+        TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
+      LIS.removeVRegDefAt(LI, Idx);
+      if (LI.empty())
+        RegsToErase.push_back(Reg);
     }
   }
 
@@ -416,7 +399,7 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
   VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI);
   for (unsigned I = 0, Size = size(); I < Size; ++I) {
     LiveInterval &LI = LIS.getInterval(get(I));
-    if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
+    if (MRI.recomputeRegClass(LI.reg))
       DEBUG({
         const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
         dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
index 8a6ac25..5c9c679 100644
--- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
@@ -61,8 +61,10 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
   assert(Slot >= 0 && "Spill slot indice must be >= 0");
   SS2IntervalMap::iterator I = S2IMap.find(Slot);
   if (I == S2IMap.end()) {
-    I = S2IMap.insert(I, std::make_pair(Slot,
-            LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+    I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot),
+                       std::forward_as_tuple(
+                           TargetRegisterInfo::index2StackSlot(Slot), 0.0F))
+            .first;
     S2RCMap.insert(std::make_pair(Slot, RC));
   } else {
     // Use the largest common subclass register class.
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index c4bca5f..11deb81 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -36,6 +36,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include <algorithm>
 using namespace llvm;
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index e8bf687..8378429 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -252,7 +252,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
 }
 
 static inline bool
-lookupCandidateBaseReg(int64_t BaseOffset,
+lookupCandidateBaseReg(unsigned BaseReg,
+                       int64_t BaseOffset,
                        int64_t FrameSizeAdjust,
                        int64_t LocalFrameOffset,
                        const MachineInstr *MI,
@@ -260,7 +261,7 @@ lookupCandidateBaseReg(int64_t BaseOffset,
   // Check if the relative offset from the where the base register references
   // to the target address is in range for the instruction.
   int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset;
-  return TRI->isFrameOffsetLegal(MI, Offset);
+  return TRI->isFrameOffsetLegal(MI, BaseReg, Offset);
 }
 
 bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
@@ -362,8 +363,9 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
     // instruction itself will be taken into account by the target,
     // so we don't have to adjust for it here when reusing a base
     // register.
-    if (UsedBaseReg && lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust,
-                                              LocalOffset, MI, TRI)) {
+    if (UsedBaseReg && lookupCandidateBaseReg(BaseReg, BaseOffset,
+                                              FrameSizeAdjust, LocalOffset, MI,
+                                              TRI)) {
       DEBUG(dbgs() << "  Reusing base register " << BaseReg << "\n");
       // We found a register to reuse.
       Offset = FrameSizeAdjust + LocalOffset - BaseOffset;
@@ -382,7 +384,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
       // then don't bother creating it.
       if (ref + 1 >= e ||
           !lookupCandidateBaseReg(
-              BaseOffset, FrameSizeAdjust,
+              BaseReg, BaseOffset, FrameSizeAdjust,
               FrameReferenceInsns[ref + 1].getLocalOffset(),
               FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) {
         BaseOffset = PrevBaseOffset;
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 3c73905..d5fdf8e 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -53,7 +53,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
     const MachineFunction *MF = getParent();
     MCContext &Ctx = MF->getContext();
     const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
-    CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
+    CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
                                            Twine(MF->getFunctionNumber()) +
                                            "_" + Twine(getNumber()));
   }
@@ -250,7 +250,7 @@ std::string MachineBasicBlock::getFullName() const {
   if (getBasicBlock())
     Name += getBasicBlock()->getName();
   else
-    Name += (Twine("BB") + Twine(getNumber())).str();
+    Name += ("BB" + Twine(getNumber())).str();
   return Name;
 }
 
@@ -307,7 +307,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
     OS << '\t';
     if (I->isInsideBundle())
       OS << "  * ";
-    I->print(OS, &getParent()->getTarget());
+    I->print(OS);
   }
 
   // Print the successors of this block according to the CFG.
@@ -1129,21 +1129,19 @@ getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
 /// instructions after (searching just for defs) MI.
 MachineBasicBlock::LivenessQueryResult
 MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
-                                           unsigned Reg, MachineInstr *MI,
-                                           unsigned Neighborhood) {
+                                           unsigned Reg, const_iterator Before,
+                                           unsigned Neighborhood) const {
   unsigned N = Neighborhood;
-  MachineBasicBlock *MBB = MI->getParent();
 
-  // Start by searching backwards from MI, looking for kills, reads or defs.
-
-  MachineBasicBlock::iterator I(MI);
+  // Start by searching backwards from Before, looking for kills, reads or defs.
+  const_iterator I(Before);
   // If this is the first insn in the block, don't search backwards.
-  if (I != MBB->begin()) {
+  if (I != begin()) {
     do {
       --I;
 
       MachineOperandIteratorBase::PhysRegInfo Analysis =
-        MIOperands(I).analyzePhysReg(Reg, TRI);
+        ConstMIOperands(I).analyzePhysReg(Reg, TRI);
 
       if (Analysis.Defines)
         // Outputs happen after inputs so they take precedence if both are
@@ -1158,15 +1156,15 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
         // Defined or read without a previous kill - live.
         return Analysis.Reads ? LQR_Live : LQR_OverlappingLive;
 
-    } while (I != MBB->begin() && --N > 0);
+    } while (I != begin() && --N > 0);
   }
 
   // Did we get to the start of the block?
-  if (I == MBB->begin()) {
+  if (I == begin()) {
     // If so, the register's state is definitely defined by the live-in state.
     for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true);
          RAI.isValid(); ++RAI) {
-      if (MBB->isLiveIn(*RAI))
+      if (isLiveIn(*RAI))
         return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive;
     }
 
@@ -1175,13 +1173,13 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
 
   N = Neighborhood;
 
-  // Try searching forwards from MI, looking for reads or defs.
-  I = MachineBasicBlock::iterator(MI);
+  // Try searching forwards from Before, looking for reads or defs.
+  I = const_iterator(Before);
   // If this is the last insn in the block, don't search forwards.
-  if (I != MBB->end()) {
-    for (++I; I != MBB->end() && N > 0; ++I, --N) {
+  if (I != end()) {
+    for (++I; I != end() && N > 0; ++I, --N) {
       MachineOperandIteratorBase::PhysRegInfo Analysis =
-        MIOperands(I).analyzePhysReg(Reg, TRI);
+        ConstMIOperands(I).analyzePhysReg(Reg, TRI);
 
       if (Analysis.ReadsOverlap)
         // Used, therefore must have been live.
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index aaa7d91..2969bad 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -40,13 +41,14 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 using namespace llvm;
 
-#define DEBUG_TYPE "block-placement2"
+#define DEBUG_TYPE "block-placement"
 
 STATISTIC(NumCondBranches, "Number of conditional branches");
 STATISTIC(NumUncondBranches, "Number of uncondittional branches");
@@ -61,11 +63,23 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
                                        cl::init(0), cl::Hidden);
 
 // FIXME: Find a good default for this flag and remove the flag.
-static cl::opt<unsigned>
-ExitBlockBias("block-placement-exit-block-bias",
-              cl::desc("Block frequency percentage a loop exit block needs "
-                       "over the original exit to be considered the new exit."),
-              cl::init(0), cl::Hidden);
+static cl::opt<unsigned> ExitBlockBias(
+    "block-placement-exit-block-bias",
+    cl::desc("Block frequency percentage a loop exit block needs "
+             "over the original exit to be considered the new exit."),
+    cl::init(0), cl::Hidden);
+
+static cl::opt<bool> OutlineOptionalBranches(
+    "outline-optional-branches",
+    cl::desc("Put completely optional branches, i.e. branches with a common "
+             "post dominator, out of line."),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> OutlineOptionalThreshold(
+    "outline-optional-threshold",
+    cl::desc("Don't outline optional branches that are a single block with an "
+             "instruction count below this threshold"),
+    cl::init(4), cl::Hidden);
 
 namespace {
 class BlockChain;
@@ -107,7 +121,7 @@ public:
   /// function. It also registers itself as the chain that block participates
   /// in with the BlockToChain mapping.
   BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
-    : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
+      : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
     assert(BB && "Cannot create a chain with a null basic block");
     BlockToChain[BB] = this;
   }
@@ -144,19 +158,18 @@ public:
 
     // Update the incoming blocks to point to this chain, and add them to the
     // chain structure.
-    for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
-         BI != BE; ++BI) {
-      Blocks.push_back(*BI);
-      assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
-      BlockToChain[*BI] = this;
+    for (MachineBasicBlock *ChainBB : *Chain) {
+      Blocks.push_back(ChainBB);
+      assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain");
+      BlockToChain[ChainBB] = this;
     }
   }
 
 #ifndef NDEBUG
   /// \brief Dump the blocks in this chain.
   LLVM_DUMP_METHOD void dump() {
-    for (iterator I = begin(), E = end(); I != E; ++I)
-      (*I)->dump();
+    for (MachineBasicBlock *MBB : *this)
+      MBB->dump();
   }
 #endif // NDEBUG
 
@@ -188,6 +201,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
   /// \brief A handle to the target's lowering info.
   const TargetLoweringBase *TLI;
 
+  /// \brief A handle to the post dominator tree.
+  MachineDominatorTree *MDT;
+
+  /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
+  /// all terminators of the MachineFunction.
+  SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
+
   /// \brief Allocator and owner of BlockChain structures.
   ///
   /// We build BlockChains lazily while processing the loop structure of
@@ -205,28 +225,26 @@ class MachineBlockPlacement : public MachineFunctionPass {
   /// between basic blocks.
   DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
 
-  void markChainSuccessors(BlockChain &Chain,
-                           MachineBasicBlock *LoopHeaderBB,
+  void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
                            SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
                            const BlockFilterSet *BlockFilter = nullptr);
   MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
                                          BlockChain &Chain,
                                          const BlockFilterSet *BlockFilter);
-  MachineBasicBlock *selectBestCandidateBlock(
-      BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
-      const BlockFilterSet *BlockFilter);
-  MachineBasicBlock *getFirstUnplacedBlock(
-      MachineFunction &F,
-      const BlockChain &PlacedChain,
-      MachineFunction::iterator &PrevUnplacedBlockIt,
-      const BlockFilterSet *BlockFilter);
+  MachineBasicBlock *
+  selectBestCandidateBlock(BlockChain &Chain,
+                           SmallVectorImpl<MachineBasicBlock *> &WorkList,
+                           const BlockFilterSet *BlockFilter);
+  MachineBasicBlock *
+  getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain,
+                        MachineFunction::iterator &PrevUnplacedBlockIt,
+                        const BlockFilterSet *BlockFilter);
   void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
                   SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
                   const BlockFilterSet *BlockFilter = nullptr);
   MachineBasicBlock *findBestLoopTop(MachineLoop &L,
                                      const BlockFilterSet &LoopBlockSet);
-  MachineBasicBlock *findBestLoopExit(MachineFunction &F,
-                                      MachineLoop &L,
+  MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L,
                                       const BlockFilterSet &LoopBlockSet);
   void buildLoopChains(MachineFunction &F, MachineLoop &L);
   void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
@@ -244,6 +262,7 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineBranchProbabilityInfo>();
     AU.addRequired<MachineBlockFrequencyInfo>();
+    AU.addRequired<MachineDominatorTree>();
     AU.addRequired<MachineLoopInfo>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
@@ -252,12 +271,13 @@ public:
 
 char MachineBlockPlacement::ID = 0;
 char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
-INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
                       "Branch Probability Basic Block Placement", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
+INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
                     "Branch Probability Basic Block Placement", false, false)
 
 #ifndef NDEBUG
@@ -267,8 +287,8 @@ INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
 static std::string getBlockName(MachineBasicBlock *BB) {
   std::string Result;
   raw_string_ostream OS(Result);
-  OS << "BB#" << BB->getNumber()
-     << " (derived from LLVM BB '" << BB->getName() << "')";
+  OS << "BB#" << BB->getNumber();
+  OS << " (derived from LLVM BB '" << BB->getName() << "')";
   OS.flush();
   return Result;
 }
@@ -292,26 +312,22 @@ static std::string getBlockNum(MachineBasicBlock *BB) {
 /// having one fewer active predecessor. It also adds any successors of this
 /// chain which reach the zero-predecessor state to the worklist passed in.
 void MachineBlockPlacement::markChainSuccessors(
-    BlockChain &Chain,
-    MachineBasicBlock *LoopHeaderBB,
+    BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
     SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
     const BlockFilterSet *BlockFilter) {
   // Walk all the blocks in this chain, marking their successors as having
   // a predecessor placed.
-  for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
-       CBI != CBE; ++CBI) {
+  for (MachineBasicBlock *MBB : Chain) {
     // Add any successors for which this is the only un-placed in-loop
     // predecessor to the worklist as a viable candidate for CFG-neutral
     // placement. No subsequent placement of this block will violate the CFG
     // shape, so we get to use heuristics to choose a favorable placement.
-    for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
-                                          SE = (*CBI)->succ_end();
-         SI != SE; ++SI) {
-      if (BlockFilter && !BlockFilter->count(*SI))
+    for (MachineBasicBlock *Succ : MBB->successors()) {
+      if (BlockFilter && !BlockFilter->count(Succ))
         continue;
-      BlockChain &SuccChain = *BlockToChain[*SI];
+      BlockChain &SuccChain = *BlockToChain[Succ];
       // Disregard edges within a fixed chain, or edges to the loop header.
-      if (&Chain == &SuccChain || *SI == LoopHeaderBB)
+      if (&Chain == &SuccChain || Succ == LoopHeaderBB)
         continue;
 
       // This is a cross-chain edge that is within the loop, so decrement the
@@ -331,9 +347,10 @@ void MachineBlockPlacement::markChainSuccessors(
 /// very hot successor edges.
 ///
 /// \returns The best successor block found, or null if none are viable.
-MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
-    MachineBasicBlock *BB, BlockChain &Chain,
-    const BlockFilterSet *BlockFilter) {
+MachineBasicBlock *
+MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
+                                           BlockChain &Chain,
+                                           const BlockFilterSet *BlockFilter) {
   const BranchProbability HotProb(4, 5); // 80%
 
   MachineBasicBlock *BestSucc = nullptr;
@@ -347,65 +364,85 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
   uint32_t WeightScale = 0;
   uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
   DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-                                        SE = BB->succ_end();
-       SI != SE; ++SI) {
-    if (BlockFilter && !BlockFilter->count(*SI))
+  for (MachineBasicBlock *Succ : BB->successors()) {
+    if (BlockFilter && !BlockFilter->count(Succ))
       continue;
-    BlockChain &SuccChain = *BlockToChain[*SI];
+    BlockChain &SuccChain = *BlockToChain[Succ];
     if (&SuccChain == &Chain) {
-      DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Already merged!\n");
+      DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Already merged!\n");
       continue;
     }
-    if (*SI != *SuccChain.begin()) {
-      DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Mid chain!\n");
+    if (Succ != *SuccChain.begin()) {
+      DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Mid chain!\n");
       continue;
     }
 
-    uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
+    uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
     BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
 
+    // If we outline optional branches, look whether Succ is unavoidable, i.e.
+    // dominates all terminators of the MachineFunction. If it does, other
+    // successors must be optional. Don't do this for cold branches.
+    if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() &&
+        UnavoidableBlocks.count(Succ) > 0) {
+      auto HasShortOptionalBranch = [&]() {
+        for (MachineBasicBlock *Pred : Succ->predecessors()) {
+          // Check whether there is an unplaced optional branch.
+          if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
+              BlockToChain[Pred] == &Chain)
+            continue;
+          // Check whether the optional branch has exactly one BB.
+          if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
+            continue;
+          // Check whether the optional branch is small.
+          if (Pred->size() < OutlineOptionalThreshold)
+            return true;
+        }
+        return false;
+      };
+      if (!HasShortOptionalBranch())
+        return Succ;
+    }
+
     // Only consider successors which are either "hot", or wouldn't violate
     // any CFG constraints.
     if (SuccChain.LoopPredecessors != 0) {
       if (SuccProb < HotProb) {
-        DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
+        DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
                      << " (prob) (CFG conflict)\n");
         continue;
       }
 
-      // Make sure that a hot successor doesn't have a globally more important
-      // predecessor.
-      BlockFrequency CandidateEdgeFreq
-        = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+      // Make sure that a hot successor doesn't have a globally more
+      // important predecessor.
+      BlockFrequency CandidateEdgeFreq =
+          MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
       bool BadCFGConflict = false;
-      for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
-                                            PE = (*SI)->pred_end();
-           PI != PE; ++PI) {
-        if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
-            BlockToChain[*PI] == &Chain)
+      for (MachineBasicBlock *Pred : Succ->predecessors()) {
+        if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
+            BlockToChain[Pred] == &Chain)
           continue;
-        BlockFrequency PredEdgeFreq
-          = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
+        BlockFrequency PredEdgeFreq =
+            MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
         if (PredEdgeFreq >= CandidateEdgeFreq) {
           BadCFGConflict = true;
           break;
         }
       }
       if (BadCFGConflict) {
-        DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
+        DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
                      << " (prob) (non-cold CFG conflict)\n");
         continue;
       }
     }
 
-    DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
+    DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
                  << " (prob)"
                  << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
                  << "\n");
     if (BestSucc && BestWeight >= SuccWeight)
       continue;
-    BestSucc = *SI;
+    BestSucc = Succ;
     BestWeight = SuccWeight;
   }
   return BestSucc;
@@ -430,29 +467,26 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
   // some code complexity) into the loop below.
   WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
                                 [&](MachineBasicBlock *BB) {
-                   return BlockToChain.lookup(BB) == &Chain;
-                 }),
+                                  return BlockToChain.lookup(BB) == &Chain;
+                                }),
                  WorkList.end());
 
   MachineBasicBlock *BestBlock = nullptr;
   BlockFrequency BestFreq;
-  for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
-                                                      WBE = WorkList.end();
-       WBI != WBE; ++WBI) {
-    BlockChain &SuccChain = *BlockToChain[*WBI];
+  for (MachineBasicBlock *MBB : WorkList) {
+    BlockChain &SuccChain = *BlockToChain[MBB];
     if (&SuccChain == &Chain) {
-      DEBUG(dbgs() << "    " << getBlockName(*WBI)
-                   << " -> Already merged!\n");
+      DEBUG(dbgs() << "    " << getBlockName(MBB) << " -> Already merged!\n");
       continue;
     }
     assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
 
-    BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
-    DEBUG(dbgs() << "    " << getBlockName(*WBI) << " -> ";
-                 MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
+    BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
+    DEBUG(dbgs() << "    " << getBlockName(MBB) << " -> ";
+          MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
     if (BestBlock && BestFreq >= CandidateFreq)
       continue;
-    BestBlock = *WBI;
+    BestBlock = MBB;
     BestFreq = CandidateFreq;
   }
   return BestBlock;
@@ -485,8 +519,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
 }
 
 void MachineBlockPlacement::buildChain(
-    MachineBasicBlock *BB,
-    BlockChain &Chain,
+    MachineBasicBlock *BB, BlockChain &Chain,
     SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
     const BlockFilterSet *BlockFilter) {
   assert(BB);
@@ -513,8 +546,8 @@ void MachineBlockPlacement::buildChain(
       BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
 
     if (!BestSucc) {
-      BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
-                                       BlockFilter);
+      BestSucc =
+          getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter);
       if (!BestSucc)
         break;
 
@@ -527,8 +560,8 @@ void MachineBlockPlacement::buildChain(
     // Zero out LoopPredecessors for the successor we're about to merge in case
     // we selected a successor that didn't fit naturally into the CFG.
     SuccChain.LoopPredecessors = 0;
-    DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
-                 << " to " << getBlockNum(BestSucc) << "\n");
+    DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to "
+                 << getBlockNum(BestSucc) << "\n");
     markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
     Chain.merge(BestSucc, &SuccChain);
     BB = *std::prev(Chain.end());
@@ -558,20 +591,17 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
   if (!LoopBlockSet.count(*HeaderChain.begin()))
     return L.getHeader();
 
-  DEBUG(dbgs() << "Finding best loop top for: "
-               << getBlockName(L.getHeader()) << "\n");
+  DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader())
+               << "\n");
 
   BlockFrequency BestPredFreq;
   MachineBasicBlock *BestPred = nullptr;
-  for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(),
-                                        PE = L.getHeader()->pred_end();
-       PI != PE; ++PI) {
-    MachineBasicBlock *Pred = *PI;
+  for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
     if (!LoopBlockSet.count(Pred))
       continue;
     DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", "
                  << Pred->succ_size() << " successors, ";
-                 MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
+          MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
     if (Pred->succ_size() > 1)
       continue;
 
@@ -598,15 +628,13 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
   return BestPred;
 }
 
-
 /// \brief Find the best loop exiting block for layout.
 ///
 /// This routine implements the logic to analyze the loop looking for the best
 /// block to layout at the top of the loop. Typically this is done to maximize
 /// fallthrough opportunities.
 MachineBasicBlock *
-MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
-                                        MachineLoop &L,
+MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
                                         const BlockFilterSet &LoopBlockSet) {
   // We don't want to layout the loop linearly in all cases. If the loop header
   // is just a normal basic block in the loop, we want to look for what block
@@ -628,15 +656,13 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
   // blocks where rotating to exit with that block will reach an outer loop.
   SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
 
-  DEBUG(dbgs() << "Finding best loop exit for: "
-               << getBlockName(L.getHeader()) << "\n");
-  for (MachineLoop::block_iterator I = L.block_begin(),
-                                   E = L.block_end();
-       I != E; ++I) {
-    BlockChain &Chain = *BlockToChain[*I];
+  DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader())
+               << "\n");
+  for (MachineBasicBlock *MBB : L.getBlocks()) {
+    BlockChain &Chain = *BlockToChain[MBB];
     // Ensure that this block is at the end of a chain; otherwise it could be
-    // mid-way through an inner loop or a successor of an analyzable branch.
-    if (*I != *std::prev(Chain.end()))
+    // mid-way through an inner loop or a successor of an unanalyzable branch.
+    if (MBB != *std::prev(Chain.end()))
       continue;
 
     // Now walk the successors. We need to establish whether this has a viable
@@ -650,59 +676,56 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
     // the MBPI analysis, we use the internal weights and manually compute the
     // probabilities to avoid quadratic behavior.
     uint32_t WeightScale = 0;
-    uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
-    for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
-                                          SE = (*I)->succ_end();
-         SI != SE; ++SI) {
-      if ((*SI)->isLandingPad())
+    uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale);
+    for (MachineBasicBlock *Succ : MBB->successors()) {
+      if (Succ->isLandingPad())
         continue;
-      if (*SI == *I)
+      if (Succ == MBB)
         continue;
-      BlockChain &SuccChain = *BlockToChain[*SI];
+      BlockChain &SuccChain = *BlockToChain[Succ];
       // Don't split chains, either this chain or the successor's chain.
       if (&Chain == &SuccChain) {
-        DEBUG(dbgs() << "    exiting: " << getBlockName(*I) << " -> "
-                     << getBlockName(*SI) << " (chain conflict)\n");
+        DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> "
+                     << getBlockName(Succ) << " (chain conflict)\n");
         continue;
       }
 
-      uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
-      if (LoopBlockSet.count(*SI)) {
-        DEBUG(dbgs() << "    looping: " << getBlockName(*I) << " -> "
-                     << getBlockName(*SI) << " (" << SuccWeight << ")\n");
+      uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ);
+      if (LoopBlockSet.count(Succ)) {
+        DEBUG(dbgs() << "    looping: " << getBlockName(MBB) << " -> "
+                     << getBlockName(Succ) << " (" << SuccWeight << ")\n");
         HasLoopingSucc = true;
         continue;
       }
 
       unsigned SuccLoopDepth = 0;
-      if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) {
+      if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
         SuccLoopDepth = ExitLoop->getLoopDepth();
         if (ExitLoop->contains(&L))
-          BlocksExitingToOuterLoop.insert(*I);
+          BlocksExitingToOuterLoop.insert(MBB);
       }
 
       BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
-      BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
-      DEBUG(dbgs() << "    exiting: " << getBlockName(*I) << " -> "
-                   << getBlockName(*SI) << " [L:" << SuccLoopDepth
-                   << "] (";
-                   MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
+      BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
+      DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> "
+                   << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
+            MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
       // Note that we bias this toward an existing layout successor to retain
       // incoming order in the absence of better information. The exit must have
       // a frequency higher than the current exit before we consider breaking
       // the layout.
       BranchProbability Bias(100 - ExitBlockBias, 100);
-      if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
+      if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
           ExitEdgeFreq > BestExitEdgeFreq ||
-          ((*I)->isLayoutSuccessor(*SI) &&
+          (MBB->isLayoutSuccessor(Succ) &&
            !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
         BestExitEdgeFreq = ExitEdgeFreq;
-        ExitingBB = *I;
+        ExitingBB = MBB;
       }
     }
 
-    // Restore the old exiting state, no viable looping successor was found.
     if (!HasLoopingSucc) {
+      // Restore the old exiting state, no viable looping successor was found.
       ExitingBB = OldExitingBB;
       BestExitEdgeFreq = OldBestExitEdgeFreq;
       continue;
@@ -738,12 +761,10 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
 
   MachineBasicBlock *Top = *LoopChain.begin();
   bool ViableTopFallthrough = false;
-  for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(),
-                                        PE = Top->pred_end();
-       PI != PE; ++PI) {
-    BlockChain *PredChain = BlockToChain[*PI];
-    if (!LoopBlockSet.count(*PI) &&
-        (!PredChain || *PI == *std::prev(PredChain->end()))) {
+  for (MachineBasicBlock *Pred : Top->predecessors()) {
+    BlockChain *PredChain = BlockToChain[Pred];
+    if (!LoopBlockSet.count(Pred) &&
+        (!PredChain || Pred == *std::prev(PredChain->end()))) {
       ViableTopFallthrough = true;
       break;
     }
@@ -754,18 +775,16 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
   // introduce an unnecessary branch.
   if (ViableTopFallthrough) {
     MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
-    for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(),
-                                          SE = Bottom->succ_end();
-         SI != SE; ++SI) {
-      BlockChain *SuccChain = BlockToChain[*SI];
-      if (!LoopBlockSet.count(*SI) &&
-          (!SuccChain || *SI == *SuccChain->begin()))
+    for (MachineBasicBlock *Succ : Bottom->successors()) {
+      BlockChain *SuccChain = BlockToChain[Succ];
+      if (!LoopBlockSet.count(Succ) &&
+          (!SuccChain || Succ == *SuccChain->begin()))
         return;
     }
   }
 
-  BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(),
-                                          ExitingBB);
+  BlockChain::iterator ExitIt =
+      std::find(LoopChain.begin(), LoopChain.end(), ExitingBB);
   if (ExitIt == LoopChain.end())
     return;
 
@@ -782,8 +801,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
                                             MachineLoop &L) {
   // First recurse through any nested loops, building chains for those inner
   // loops.
-  for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
-    buildLoopChains(F, **LI);
+  for (MachineLoop *InnerLoop : L)
+    buildLoopChains(F, *InnerLoop);
 
   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
   BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
@@ -809,21 +828,16 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
   assert(LoopChain.LoopPredecessors == 0);
   UpdatedPreds.insert(&LoopChain);
-  for (MachineLoop::block_iterator BI = L.block_begin(),
-                                   BE = L.block_end();
-       BI != BE; ++BI) {
-    BlockChain &Chain = *BlockToChain[*BI];
+  for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+    BlockChain &Chain = *BlockToChain[LoopBB];
     if (!UpdatedPreds.insert(&Chain).second)
       continue;
 
     assert(Chain.LoopPredecessors == 0);
-    for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
-         BCI != BCE; ++BCI) {
-      assert(BlockToChain[*BCI] == &Chain);
-      for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
-                                            PE = (*BCI)->pred_end();
-           PI != PE; ++PI) {
-        if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
+    for (MachineBasicBlock *ChainBB : Chain) {
+      assert(BlockToChain[ChainBB] == &Chain);
+      for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+        if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred))
           continue;
         ++Chain.LoopPredecessors;
       }
@@ -845,29 +859,26 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
              << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
              << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
     }
-    for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
-         BCI != BCE; ++BCI) {
-      dbgs() << "          ... " << getBlockName(*BCI) << "\n";
-      if (!LoopBlockSet.erase(*BCI)) {
+    for (MachineBasicBlock *ChainBB : LoopChain) {
+      dbgs() << "          ... " << getBlockName(ChainBB) << "\n";
+      if (!LoopBlockSet.erase(ChainBB)) {
         // We don't mark the loop as bad here because there are real situations
         // where this can occur. For example, with an unanalyzable fallthrough
         // from a loop block to a non-loop block or vice versa.
         dbgs() << "Loop chain contains a block not contained by the loop!\n"
                << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
                << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
-               << "  Bad block:    " << getBlockName(*BCI) << "\n";
+               << "  Bad block:    " << getBlockName(ChainBB) << "\n";
       }
     }
 
     if (!LoopBlockSet.empty()) {
       BadLoop = true;
-      for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
-                                    LBE = LoopBlockSet.end();
-           LBI != LBE; ++LBI)
+      for (MachineBasicBlock *LoopBB : LoopBlockSet)
         dbgs() << "Loop contains blocks never placed into a chain!\n"
                << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
                << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
-               << "  Bad block:    " << getBlockName(*LBI) << "\n";
+               << "  Bad block:    " << getBlockName(LoopBB) << "\n";
     }
     assert(!BadLoop && "Detected problems with the placement of this loop.");
   });
@@ -879,8 +890,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
   SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
   for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
     MachineBasicBlock *BB = FI;
-    BlockChain *Chain
-      = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+    BlockChain *Chain =
+        new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
     // Also, merge any blocks which we cannot reason about and must preserve
     // the exact fallthrough behavior for.
     for (;;) {
@@ -903,28 +914,44 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
     }
   }
 
+  if (OutlineOptionalBranches) {
+    // Find the nearest common dominator of all of F's terminators.
+    MachineBasicBlock *Terminator = nullptr;
+    for (MachineBasicBlock &MBB : F) {
+      if (MBB.succ_size() == 0) {
+        if (Terminator == nullptr)
+          Terminator = &MBB;
+        else
+          Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
+      }
+    }
+
+    // MBBs dominating this common dominator are unavoidable.
+    UnavoidableBlocks.clear();
+    for (MachineBasicBlock &MBB : F) {
+      if (MDT->dominates(&MBB, Terminator)) {
+        UnavoidableBlocks.insert(&MBB);
+      }
+    }
+  }
+
   // Build any loop-based chains.
-  for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
-       ++LI)
-    buildLoopChains(F, **LI);
+  for (MachineLoop *L : *MLI)
+    buildLoopChains(F, *L);
 
   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
 
   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
-  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
-    MachineBasicBlock *BB = &*FI;
-    BlockChain &Chain = *BlockToChain[BB];
+  for (MachineBasicBlock &MBB : F) {
+    BlockChain &Chain = *BlockToChain[&MBB];
     if (!UpdatedPreds.insert(&Chain).second)
       continue;
 
     assert(Chain.LoopPredecessors == 0);
-    for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
-         BCI != BCE; ++BCI) {
-      assert(BlockToChain[*BCI] == &Chain);
-      for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
-                                            PE = (*BCI)->pred_end();
-           PI != PE; ++PI) {
-        if (BlockToChain[*PI] == &Chain)
+    for (MachineBasicBlock *ChainBB : Chain) {
+      assert(BlockToChain[ChainBB] == &Chain);
+      for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+        if (BlockToChain[Pred] == &Chain)
           continue;
         ++Chain.LoopPredecessors;
       }
@@ -944,46 +971,40 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
     // Crash at the end so we get all of the debugging output first.
     bool BadFunc = false;
     FunctionBlockSetType FunctionBlockSet;
-    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
-      FunctionBlockSet.insert(FI);
+    for (MachineBasicBlock &MBB : F)
+      FunctionBlockSet.insert(&MBB);
 
-    for (BlockChain::iterator BCI = FunctionChain.begin(),
-                              BCE = FunctionChain.end();
-         BCI != BCE; ++BCI)
-      if (!FunctionBlockSet.erase(*BCI)) {
+    for (MachineBasicBlock *ChainBB : FunctionChain)
+      if (!FunctionBlockSet.erase(ChainBB)) {
         BadFunc = true;
         dbgs() << "Function chain contains a block not in the function!\n"
-               << "  Bad block:    " << getBlockName(*BCI) << "\n";
+               << "  Bad block:    " << getBlockName(ChainBB) << "\n";
       }
 
     if (!FunctionBlockSet.empty()) {
       BadFunc = true;
-      for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
-                                          FBE = FunctionBlockSet.end();
-           FBI != FBE; ++FBI)
+      for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
         dbgs() << "Function contains blocks never placed into a chain!\n"
-               << "  Bad block:    " << getBlockName(*FBI) << "\n";
+               << "  Bad block:    " << getBlockName(RemainingBB) << "\n";
     }
     assert(!BadFunc && "Detected problems with the block placement.");
   });
 
   // Splice the blocks into place.
   MachineFunction::iterator InsertPos = F.begin();
-  for (BlockChain::iterator BI = FunctionChain.begin(),
-                            BE = FunctionChain.end();
-       BI != BE; ++BI) {
-    DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
-                                                  : "          ... ")
-          << getBlockName(*BI) << "\n");
-    if (InsertPos != MachineFunction::iterator(*BI))
-      F.splice(InsertPos, *BI);
+  for (MachineBasicBlock *ChainBB : FunctionChain) {
+    DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
+                                                       : "          ... ")
+                 << getBlockName(ChainBB) << "\n");
+    if (InsertPos != MachineFunction::iterator(ChainBB))
+      F.splice(InsertPos, ChainBB);
     else
       ++InsertPos;
 
     // Update the terminator of the previous block.
-    if (BI == FunctionChain.begin())
+    if (ChainBB == *FunctionChain.begin())
       continue;
-    MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI));
+    MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB));
 
     // FIXME: It would be awesome of updateTerminator would just return rather
     // than assert when the branch cannot be analyzed in order to remove this
@@ -993,16 +1014,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
     if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
       // The "PrevBB" is not yet updated to reflect current code layout, so,
       //   o. it may fall-through to a block without explict "goto" instruction
-      //      before layout, and no longer fall-through it after layout; or 
+      //      before layout, and no longer fall-through it after layout; or
       //   o. just opposite.
-      // 
+      //
       // AnalyzeBranch() may return erroneous value for FBB when these two
       // situations take place. For the first scenario FBB is mistakenly set
       // NULL; for the 2nd scenario, the FBB, which is expected to be NULL,
       // is mistakenly pointing to "*BI".
       //
       bool needUpdateBr = true;
-      if (!Cond.empty() && (!FBB || FBB == *BI)) {
+      if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
         PrevBB->updateTerminator();
         needUpdateBr = false;
         Cond.clear();
@@ -1022,7 +1043,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
                      << getBlockName(PrevBB) << "\n");
         DEBUG(dbgs() << "    Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
                      << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
-        DebugLoc dl;  // FIXME: this is nowhere
+        DebugLoc dl; // FIXME: this is nowhere
         TII->RemoveBranch(*PrevBB);
         TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
         needUpdateBr = true;
@@ -1043,33 +1064,33 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
   // exclusively on the loop info here so that we can align backedges in
   // unnatural CFGs and backedges that were introduced purely because of the
   // loop rotations done during this layout pass.
-  if (F.getFunction()->getAttributes().
-        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
+  if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
     return;
   if (FunctionChain.begin() == FunctionChain.end())
-    return;  // Empty chain.
+    return; // Empty chain.
 
   const BranchProbability ColdProb(1, 5); // 20%
   BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
   BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
-  for (BlockChain::iterator BI = std::next(FunctionChain.begin()),
-                            BE = FunctionChain.end();
-       BI != BE; ++BI) {
+  for (MachineBasicBlock *ChainBB : FunctionChain) {
+    if (ChainBB == *FunctionChain.begin())
+      continue;
+
     // Don't align non-looping basic blocks. These are unlikely to execute
     // enough times to matter in practice. Note that we'll still handle
     // unnatural CFGs inside of a natural outer loop (the common case) and
     // rotated loops.
-    MachineLoop *L = MLI->getLoopFor(*BI);
+    MachineLoop *L = MLI->getLoopFor(ChainBB);
     if (!L)
       continue;
 
     unsigned Align = TLI->getPrefLoopAlignment(L);
     if (!Align)
-      continue;  // Don't care about loop alignment.
+      continue; // Don't care about loop alignment.
 
     // If the block is cold relative to the function entry don't waste space
     // aligning it.
-    BlockFrequency Freq = MBFI->getBlockFreq(*BI);
+    BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
     if (Freq < WeightedEntryFreq)
       continue;
 
@@ -1082,12 +1103,13 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
 
     // Check for the existence of a non-layout predecessor which would benefit
     // from aligning this block.
-    MachineBasicBlock *LayoutPred = *std::prev(BI);
+    MachineBasicBlock *LayoutPred =
+        &*std::prev(MachineFunction::iterator(ChainBB));
 
     // Force alignment if all the predecessors are jumps. We already checked
     // that the block isn't cold above.
-    if (!LayoutPred->isSuccessor(*BI)) {
-      (*BI)->setAlignment(Align);
+    if (!LayoutPred->isSuccessor(ChainBB)) {
+      ChainBB->setAlignment(Align);
       continue;
     }
 
@@ -1095,10 +1117,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
     // cold relative to the block. When this is true, other predecessors make up
     // all of the hot entries into the block and thus alignment is likely to be
     // important.
-    BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
+    BranchProbability LayoutProb =
+        MBPI->getEdgeProbability(LayoutPred, ChainBB);
     BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
     if (LayoutEdgeFreq <= (Freq * ColdProb))
-      (*BI)->setAlignment(Align);
+      ChainBB->setAlignment(Align);
   }
 }
 
@@ -1115,6 +1138,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
   MLI = &getAnalysis<MachineLoopInfo>();
   TII = F.getSubtarget().getInstrInfo();
   TLI = F.getSubtarget().getTargetLowering();
+  MDT = &getAnalysis<MachineDominatorTree>();
   assert(BlockToChain.empty());
 
   buildCFGChains(F);
@@ -1124,9 +1148,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
 
   if (AlignAllBlock)
     // Align all of the blocks in the function to a specific alignment.
-    for (MachineFunction::iterator FI = F.begin(), FE = F.end();
-         FI != FE; ++FI)
-      FI->setAlignment(AlignAllBlock);
+    for (MachineBasicBlock &MBB : F)
+      MBB.setAlignment(AlignAllBlock);
 
   // We always return true as we have no way to track whether the final order
   // differs from the original order.
@@ -1181,20 +1204,19 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
 
-  for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-    BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
-    Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
-                                                  : NumUncondBranches;
-    Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
-                                                      : UncondBranchTakenFreq;
-    for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
-                                          SE = I->succ_end();
-         SI != SE; ++SI) {
+  for (MachineBasicBlock &MBB : F) {
+    BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
+    Statistic &NumBranches =
+        (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
+    Statistic &BranchTakenFreq =
+        (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
+    for (MachineBasicBlock *Succ : MBB.successors()) {
       // Skip if this successor is a fallthrough.
-      if (I->isLayoutSuccessor(*SI))
+      if (MBB.isLayoutSuccessor(Succ))
         continue;
 
-      BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
+      BlockFrequency EdgeFreq =
+          BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
       ++NumBranches;
       BranchTakenFreq += EdgeFreq.getFrequency();
     }
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 2960408..87aaaa0 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
@@ -47,7 +48,7 @@ namespace {
     MachineRegisterInfo *MRI;
   public:
     static char ID; // Pass identification
-    MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
+    MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(0), CurrVN(0) {
       initializeMachineCSEPass(*PassRegistry::getPassRegistry());
     }
 
@@ -68,7 +69,7 @@ namespace {
     }
 
   private:
-    const unsigned LookAheadLimit;
+    unsigned LookAheadLimit;
     typedef RecyclingAllocator<BumpPtrAllocator,
         ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
     typedef ScopedHashTable<MachineInstr*, unsigned,
@@ -580,8 +581,15 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
     // Actually perform the elimination.
     if (DoCSE) {
       for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
-        MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
-        MRI->clearKillFlags(CSEPairs[i].second);
+        unsigned OldReg = CSEPairs[i].first;
+        unsigned NewReg = CSEPairs[i].second;
+        // OldReg may have been unused but is used now, clear the Dead flag
+        MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
+        assert(Def != nullptr && "CSEd register has no unique definition?");
+        Def->clearRegisterDeads(NewReg);
+        // Replace with NewReg and clear kill flags which may be wrong now.
+        MRI->replaceRegWith(OldReg, NewReg);
+        MRI->clearKillFlags(NewReg);
       }
 
       // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
@@ -708,5 +716,6 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<MachineDominatorTree>();
+  LookAheadLimit = TII->getMachineCSELookAheadLimit();
   return PerformCSE(DT->getRootNode());
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
index 2931258..a4bc77e 100644
--- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -45,7 +45,7 @@ class MachineCombiner : public MachineFunctionPass {
 
   TargetSchedModel TSchedModel;
 
-  /// OptSize - True if optimizing for code size.
+  /// True if optimizing for code size.
   bool OptSize;
 
 public:
@@ -109,7 +109,7 @@ MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
   return DefInstr;
 }
 
-/// getDepth - Computes depth of instructions in vector \InsInstr.
+/// Computes depth of instructions in vector \InsInstr.
 ///
 /// \param InsInstrs is a vector of machine instructions
 /// \param InstrIdxForVirtReg is a dense map of virtual register to index
@@ -125,14 +125,13 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
   SmallVector<unsigned, 16> InstrDepth;
   assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
 
-  // Foreach instruction in in the new sequence compute the depth based on the
+  // For each instruction in the new sequence compute the depth based on the
   // operands. Use the trace information when possible. For new operands which
   // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
   for (auto *InstrPtr : InsInstrs) { // for each Use
     unsigned IDepth = 0;
     DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
-    for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = InstrPtr->getOperand(i);
+    for (const MachineOperand &MO : InstrPtr->operands()) {
       // Check for virtual register operand.
       if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
         continue;
@@ -169,8 +168,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
   return InstrDepth[NewRootIdx];
 }
 
-/// getLatency - Computes instruction latency as max of latency of defined
-/// operands
+/// Computes instruction latency as max of latency of defined operands.
 ///
 /// \param Root is a machine instruction that could be replaced by NewRoot.
 /// It is used to compute a more accurate latency information for NewRoot in
@@ -187,8 +185,7 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
   // Check each definition in NewRoot and compute the latency
   unsigned NewRootLatency = 0;
 
-  for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = NewRoot->getOperand(i);
+  for (const MachineOperand &MO : NewRoot->operands()) {
     // Check for virtual register operand.
     if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
       continue;
@@ -211,12 +208,12 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
   return NewRootLatency;
 }
 
-/// preservesCriticalPathlen - True when the new instruction sequence does not
+/// True when the new instruction sequence does not
 /// lengthen the critical path. The DAGCombine code sequence ends in MI
 /// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A
 /// necessary condition for the new sequence to replace the old sequence is that
-/// is cannot lengthen the critical path. This is decided by the formula
-/// (NewRootDepth + NewRootLatency) <=  (RootDepth + RootLatency + RootSlack)).
+/// it cannot lengthen the critical path. This is decided by the formula
+/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
 /// The slack is the number of cycles Root can be delayed before the critical
 /// patch becomes longer.
 bool MachineCombiner::preservesCriticalPathLen(
@@ -264,8 +261,7 @@ void MachineCombiner::instr2instrSC(
     InstrsSC.push_back(SC);
   }
 }
-/// preservesResourceLen - True when the new instructions do not increase
-/// resource length
+/// True when the new instructions do not increase resource length
 bool MachineCombiner::preservesResourceLen(
     MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
     SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -300,7 +296,7 @@ bool MachineCombiner::preservesResourceLen(
 }
 
 /// \returns true when new instruction sequence should be generated
-/// independent if it lenghtens critical path or not
+/// independent if it lengthens critical path or not
 bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
   if (OptSize && (NewSize < OldSize))
     return true;
@@ -309,7 +305,7 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
   return false;
 }
 
-/// combineInstructions - substitute a slow code sequence with a faster one by
+/// Substitute a slow code sequence with a faster one by
 /// evaluating instruction combining pattern.
 /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
 /// combining based on machine trace metrics. Only combine a sequence of
@@ -370,7 +366,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
           continue;
         // Substitute when we optimize for codesize and the new sequence has
         // fewer instructions OR
-        // the new sequence neither lenghten the critical path nor increases
+        // the new sequence neither lengthens the critical path nor increases
         // resource pressure.
         if (doSubstitute(InsInstrs.size(), DelInstrs.size()) ||
             (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
@@ -406,8 +402,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
 }
 
 bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
-  const TargetSubtargetInfo &STI =
-      MF.getTarget().getSubtarget<TargetSubtargetInfo>();
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
   TII = STI.getInstrInfo();
   TRI = STI.getRegisterInfo();
   SchedModel = STI.getSchedModel();
@@ -416,8 +411,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
   Traces = &getAnalysis<MachineTraceMetrics>();
   MinInstr = 0;
 
-  OptSize = MF.getFunction()->getAttributes().hasAttribute(
-      AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
 
   DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
   if (!TII->useMachineCombiner()) {
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 9611122..43c80b7 100644
--- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -252,7 +252,11 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
         report_fatal_error("MachineCopyPropagation should be run after"
                            " register allocation!");
 
-      if (MO.isDef()) {
+      // Treat undef use like defs.
+      // The backends are allowed to do whatever they want with undef value
+      // and we cannot be sure this register will not be rewritten to break
+      // some false dependencies for the hardware for instance.
+      if (MO.isDef() || MO.isUndef()) {
         Defs.push_back(Reg);
         continue;
       }
diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
index df60cf3..467a2e4 100644
--- a/contrib/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallBitVector.h"
 
 using namespace llvm;
 
@@ -59,3 +60,68 @@ void MachineDominatorTree::releaseMemory() {
 void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
   DT->print(OS);
 }
+
+void MachineDominatorTree::applySplitCriticalEdges() const {
+  // Bail out early if there is nothing to do.
+  if (CriticalEdgesToSplit.empty())
+    return;
+
+  // For each element in CriticalEdgesToSplit, remember whether or not element
+  // is the new immediate domminator of its successor. The mapping is done by
+  // index, i.e., the information for the ith element of CriticalEdgesToSplit is
+  // the ith element of IsNewIDom.
+  SmallBitVector IsNewIDom(CriticalEdgesToSplit.size(), true);
+  size_t Idx = 0;
+
+  // Collect all the dominance properties info, before invalidating
+  // the underlying DT.
+  for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+    // Update dominator information.
+    MachineBasicBlock *Succ = Edge.ToBB;
+    MachineDomTreeNode *SuccDTNode = DT->getNode(Succ);
+
+    for (MachineBasicBlock *PredBB : Succ->predecessors()) {
+      if (PredBB == Edge.NewBB)
+        continue;
+      // If we are in this situation:
+      // FromBB1        FromBB2
+      //    +              +
+      //   + +            + +
+      //  +   +          +   +
+      // ...  Split1  Split2 ...
+      //           +   +
+      //            + +
+      //             +
+      //            Succ
+      // Instead of checking the domiance property with Split2, we check it with
+      // FromBB2 since Split2 is still unknown of the underlying DT structure.
+      if (NewBBs.count(PredBB)) {
+        assert(PredBB->pred_size() == 1 && "A basic block resulting from a "
+                                           "critical edge split has more "
+                                           "than one predecessor!");
+        PredBB = *PredBB->pred_begin();
+      }
+      if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) {
+        IsNewIDom[Idx] = false;
+        break;
+      }
+    }
+    ++Idx;
+  }
+
+  // Now, update DT with the collected dominance properties info.
+  Idx = 0;
+  for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+    // We know FromBB dominates NewBB.
+    MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB);
+
+    // If all the other predecessors of "Succ" are dominated by "Succ" itself
+    // then the new block is the new immediate dominator of "Succ". Otherwise,
+    // the new block doesn't dominate anything.
+    if (IsNewIDom[Idx])
+      DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode);
+    ++Idx;
+  }
+  NewBBs.clear();
+  CriticalEdgesToSplit.clear();
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index 6b4cba6..8ec63f8 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -54,7 +54,7 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
 
 MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
                                  unsigned FunctionNum, MachineModuleInfo &mmi)
-    : Fn(F), Target(TM), STI(TM.getSubtargetImpl()), Ctx(mmi.getContext()),
+    : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()),
       MMI(mmi) {
   if (STI->getRegisterInfo())
     RegInfo = new (Allocator) MachineRegisterInfo(this);
@@ -67,17 +67,14 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
                        STI->getFrameLowering()->isStackRealignable(),
                        !F->hasFnAttribute("no-realign-stack"));
 
-  if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                       Attribute::StackAlignment))
-    FrameInfo->ensureMaxAlignment(Fn->getAttributes().
-                                getStackAlignment(AttributeSet::FunctionIndex));
+  if (Fn->hasFnAttribute(Attribute::StackAlignment))
+    FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment());
 
   ConstantPool = new (Allocator) MachineConstantPool(TM);
   Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
 
   // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
-  if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                        Attribute::OptimizeForSize))
+  if (!Fn->hasFnAttribute(Attribute::OptimizeForSize))
     Alignment = std::max(Alignment,
                          STI->getTargetLowering()->getPrefFunctionAlignment());
 
@@ -383,7 +380,7 @@ namespace llvm {
   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
 
     static std::string getGraphName(const MachineFunction *F) {
-      return "CFG for '" + F->getName().str() + "' function";
+      return ("CFG for '" + F->getName() + "' function").str();
     }
 
     std::string getNodeLabel(const MachineBasicBlock *Node,
@@ -462,7 +459,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
 /// normal 'L' label is returned.
 MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
                                         bool isLinkerPrivate) const {
-  const DataLayout *DL = getSubtarget().getDataLayout();
+  const DataLayout *DL = getTarget().getDataLayout();
   assert(JumpTableInfo && "No jump tables");
   assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
 
@@ -471,14 +468,14 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
   SmallString<60> Name;
   raw_svector_ostream(Name)
     << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
-  return Ctx.GetOrCreateSymbol(Name.str());
+  return Ctx.getOrCreateSymbol(Name);
 }
 
 /// getPICBaseSymbol - Return a function-local symbol to represent the PIC
 /// base.
 MCSymbol *MachineFunction::getPICBaseSymbol() const {
-  const DataLayout *DL = getSubtarget().getDataLayout();
-  return Ctx.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
+  const DataLayout *DL = getTarget().getDataLayout();
+  return Ctx.getOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
                                Twine(getFunctionNumber())+"$pb");
 }
 
@@ -587,21 +584,12 @@ int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
   return -++NumFixedObjects;
 }
 
-int MachineFrameInfo::CreateFrameAllocation(uint64_t Size) {
-  // Force the use of a frame pointer. The intention is that this intrinsic be
-  // used in conjunction with unwind mechanisms that leak the frame pointer.
-  setFrameAddressIsTaken(true);
-  Size = RoundUpToAlignment(Size, StackAlignment);
-  return CreateStackObject(Size, StackAlignment, false);
-}
-
 BitVector
 MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
   assert(MBB && "MBB must be valid");
   const MachineFunction *MF = MBB->getParent();
   assert(MF && "MBB must be part of a MachineFunction");
-  const TargetMachine &TM = MF->getTarget();
-  const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
   BitVector BV(TRI->getNumRegs());
 
   // Before CSI is calculated, no registers are considered pristine. They can be
@@ -612,8 +600,8 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
   for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
     BV.set(*CSR);
 
-  // The entry MBB always has all CSRs pristine.
-  if (MBB == &MF->front())
+  // Each MBB before the save point has all CSRs pristine.
+  if (isBeforeSavePoint(*MF, *MBB))
     return BV;
 
   // On other MBBs the saved CSRs are not pristine.
@@ -625,6 +613,40 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
   return BV;
 }
 
+// Note: We could use some sort of caching mecanism, but we lack the ability
+// to know when the cache is invalid, i.e., the CFG changed.
+// Assuming we have that, we can simply compute all the set of MBBs
+// that are before the save point.
+bool MachineFrameInfo::isBeforeSavePoint(const MachineFunction &MF,
+                                         const MachineBasicBlock &MBB) const {
+  // Early exit if shrink-wrapping did not kick.
+  if (!Save)
+    return &MBB == &MF.front();
+
+  // Starting from MBB, check if there is a path leading to Save that do
+  // not cross Restore.
+  SmallPtrSet<const MachineBasicBlock *, 8> Visited;
+  SmallVector<const MachineBasicBlock *, 8> WorkList;
+  WorkList.push_back(&MBB);
+  Visited.insert(&MBB);
+  do {
+    const MachineBasicBlock *CurBB = WorkList.pop_back_val();
+    // By construction, the region that is after the save point is
+    // dominated by the Save and post-dominated by the Restore.
+    // If we do not reach Restore and still reach Save, this
+    // means MBB is before Save.
+    if (CurBB == Save)
+      return true;
+    if (CurBB == Restore)
+      continue;
+    // Enqueue all the successors not already visited.
+    for (MachineBasicBlock *SuccBB : CurBB->successors())
+      if (Visited.insert(SuccBB).second)
+        WorkList.push_back(SuccBB);
+  } while (!WorkList.empty());
+  return false;
+}
+
 unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -821,7 +843,7 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); }
 void MachineConstantPoolValue::anchor() { }
 
 const DataLayout *MachineConstantPool::getDataLayout() const {
-  return TM.getSubtargetImpl()->getDataLayout();
+  return TM.getDataLayout();
 }
 
 Type *MachineConstantPoolEntry::getType() const {
@@ -843,13 +865,13 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
   switch (getRelocationInfo()) {
   default:
     llvm_unreachable("Unknown section kind");
-  case 2:
+  case Constant::GlobalRelocations:
     Kind = SectionKind::getReadOnlyWithRel();
     break;
-  case 1:
+  case Constant::LocalRelocation:
     Kind = SectionKind::getReadOnlyWithRelLocal();
     break;
-  case 0:
+  case Constant::NoRelocation:
     switch (DL->getTypeAllocSize(getType())) {
     case 4:
       Kind = SectionKind::getMergeableConst4();
@@ -861,7 +883,7 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
       Kind = SectionKind::getMergeableConst16();
       break;
     default:
-      Kind = SectionKind::getMergeableConst();
+      Kind = SectionKind::getReadOnly();
       break;
     }
   }
@@ -907,16 +929,16 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
   // DataLayout.
   if (isa<PointerType>(A->getType()))
     A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
-                                 const_cast<Constant*>(A), TD);
+                                 const_cast<Constant *>(A), *TD);
   else if (A->getType() != IntTy)
     A = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
-                                 const_cast<Constant*>(A), TD);
+                                 const_cast<Constant *>(A), *TD);
   if (isa<PointerType>(B->getType()))
     B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
-                                 const_cast<Constant*>(B), TD);
+                                 const_cast<Constant *>(B), *TD);
   else if (B->getType() != IntTy)
     B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
-                                 const_cast<Constant*>(B), TD);
+                                 const_cast<Constant *>(B), *TD);
 
   return A == B;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 2f076b6..aaf06a7 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Analysis/DominanceFrontier.h"
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -54,7 +53,7 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<DominanceFrontier>();
   AU.addPreserved<DominatorTreeWrapperPass>();
   AU.addPreserved<IVUsers>();
-  AU.addPreserved<LoopInfo>();
+  AU.addPreserved<LoopInfoWrapperPass>();
   AU.addPreserved<MemoryDependenceAnalysis>();
   AU.addPreserved<ScalarEvolution>();
   AU.addPreserved<StackProtector>();
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 968ec2c..205032f 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -141,6 +141,28 @@ void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) {
   Contents.CFP = FPImm;
 }
 
+void MachineOperand::ChangeToES(const char *SymName, unsigned char TargetFlags) {
+  assert((!isReg() || !isTied()) &&
+         "Cannot change a tied operand into an external symbol");
+
+  removeRegFromUses();
+
+  OpKind = MO_ExternalSymbol;
+  Contents.OffsetedInfo.Val.SymbolName = SymName;
+  setOffset(0); // Offset is always 0.
+  setTargetFlags(TargetFlags);
+}
+
+void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
+  assert((!isReg() || !isTied()) &&
+         "Cannot change a tied operand into an MCSymbol");
+
+  removeRegFromUses();
+
+  OpKind = MO_MCSymbol;
+  Contents.Sym = Sym;
+}
+
 /// ChangeToRegister - Replace this operand with a new register operand of
 /// the specified value.  If an operand is known to be an register already,
 /// the setReg method should be used.
@@ -276,17 +298,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
 
 /// print - Print the specified machine operand.
 ///
-void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
-  // If the instruction is embedded into a basic block, we can find the
-  // target info for the instruction.
-  if (!TM)
-    if (const MachineInstr *MI = getParent())
-      if (const MachineBasicBlock *MBB = MI->getParent())
-        if (const MachineFunction *MF = MBB->getParent())
-          TM = &MF->getTarget();
-  const TargetRegisterInfo *TRI =
-      TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr;
-
+void MachineOperand::print(raw_ostream &OS,
+                           const TargetRegisterInfo *TRI) const {
   switch (getType()) {
   case MachineOperand::MO_Register:
     OS << PrintReg(getReg(), TRI, getSubReg());
@@ -308,8 +321,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
         if (isUndef() && getSubReg())
           OS << ",read-undef";
       } else if (isImplicit()) {
-          OS << "imp-use";
-          NeedComma = true;
+        OS << "imp-use";
+        NeedComma = true;
       }
 
       if (isKill()) {
@@ -595,10 +608,10 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
 /// implicit operands. It reserves space for the number of operands specified by
 /// the MCInstrDesc.
 MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
-                           const DebugLoc dl, bool NoImp)
-  : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0),
-    Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(nullptr), debugLoc(dl) {
+                           DebugLoc dl, bool NoImp)
+    : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0),
+      AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr),
+      debugLoc(std::move(dl)) {
   assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
 
   // Reserve space for the expected number of operands.
@@ -625,8 +638,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
   Operands = MF.allocateOperandArray(CapOperands);
 
   // Copy operands.
-  for (unsigned i = 0; i != MI.getNumOperands(); ++i)
-    addOperand(MF, MI.getOperand(i));
+  for (const MachineOperand &MO : MI.operands())
+    addOperand(MF, MO);
 
   // Copy all the sensible flags.
   setFlags(MI.Flags);
@@ -645,18 +658,18 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
 /// this instruction from their respective use lists.  This requires that the
 /// operands already be on their use lists.
 void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (Operands[i].isReg())
-      MRI.removeRegOperandFromUseList(&Operands[i]);
+  for (MachineOperand &MO : operands())
+    if (MO.isReg())
+      MRI.removeRegOperandFromUseList(&MO);
 }
 
 /// AddRegOperandsToUseLists - Add all of the register operands in
 /// this instruction from their respective use lists.  This requires that the
 /// operands not be on their use lists yet.
 void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (Operands[i].isReg())
-      MRI.addRegOperandToUseList(&Operands[i]);
+  for (MachineOperand &MO : operands())
+    if (MO.isReg())
+      MRI.addRegOperandToUseList(&MO);
 }
 
 void MachineInstr::addOperand(const MachineOperand &Op) {
@@ -674,14 +687,8 @@ static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
   if (MRI)
     return MRI->moveOperands(Dst, Src, NumOps);
 
-  // Here it would be convenient to call memmove, so that isn't allowed because
-  // MachineOperand has a constructor and so isn't a POD type.
-  if (Dst < Src)
-    for (unsigned i = 0; i != NumOps; ++i)
-      new (Dst + i) MachineOperand(Src[i]);
-  else
-    for (unsigned i = NumOps; i ; --i)
-      new (Dst + i - 1) MachineOperand(Src[i - 1]);
+  // MachineOperand is a trivially copyable type so we can just use memmove.
+  std::memmove(Dst, Src, NumOps * sizeof(MachineOperand));
 }
 
 /// addOperand - Add the specified operand to the instruction.  If it is an
@@ -896,8 +903,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
   }
   // If DebugLoc does not match then two dbg.values are not identical.
   if (isDebugValue())
-    if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown()
-        && getDebugLoc() != Other->getDebugLoc())
+    if (getDebugLoc() && Other->getDebugLoc() &&
+        getDebugLoc() != Other->getDebugLoc())
       return false;
   return true;
 }
@@ -926,8 +933,7 @@ void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() {
   MachineInstr *MI = (MachineInstr *)this;
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI->operands()) {
     if (!MO.isReg() || !MO.isDef())
       continue;
     unsigned Reg = MO.getReg();
@@ -1330,8 +1336,7 @@ unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
 /// clearKillInfo - Clears kill flags on all operands.
 ///
 void MachineInstr::clearKillInfo() {
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = getOperand(i);
+  for (MachineOperand &MO : operands()) {
     if (MO.isReg() && MO.isUse())
       MO.setIsKill(false);
   }
@@ -1344,15 +1349,13 @@ void MachineInstr::substituteRegister(unsigned FromReg,
   if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
     if (SubIdx)
       ToReg = RegInfo.getSubReg(ToReg, SubIdx);
-    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = getOperand(i);
+    for (MachineOperand &MO : operands()) {
       if (!MO.isReg() || MO.getReg() != FromReg)
         continue;
       MO.substPhysReg(ToReg, RegInfo);
     }
   } else {
-    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = getOperand(i);
+    for (MachineOperand &MO : operands()) {
       if (!MO.isReg() || MO.getReg() != FromReg)
         continue;
       MO.substVirtReg(ToReg, SubIdx, RegInfo);
@@ -1363,9 +1366,7 @@ void MachineInstr::substituteRegister(unsigned FromReg,
 /// isSafeToMove - Return true if it is safe to move this instruction. If
 /// SawStore is set to true, it means that there is a store (or call) between
 /// the instruction's location and its intended destination.
-bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
-                                AliasAnalysis *AA,
-                                bool &SawStore) const {
+bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
   // Ignore stuff that we obviously can't move.
   //
   // Treat volatile loads as stores. This is not strictly necessary for
@@ -1495,8 +1496,7 @@ bool MachineInstr::hasUnmodeledSideEffects() const {
 /// allDefsAreDead - Return true if all the defs of this instruction are dead.
 ///
 bool MachineInstr::allDefsAreDead() const {
-  for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
-    const MachineOperand &MO = getOperand(i);
+  for (const MachineOperand &MO : operands()) {
     if (!MO.isReg() || MO.isUse())
       continue;
     if (!MO.isDead())
@@ -1523,23 +1523,19 @@ void MachineInstr::dump() const {
 #endif
 }
 
-static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
-                         raw_ostream &CommentOS) {
-  const LLVMContext &Ctx = MF->getFunction()->getContext();
-  DL.print(Ctx, CommentOS);
-}
-
-void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
-                         bool SkipOpers) const {
-  // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
+void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
+  // We can be a bit tidier if we know the MachineFunction.
   const MachineFunction *MF = nullptr;
+  const TargetRegisterInfo *TRI = nullptr;
   const MachineRegisterInfo *MRI = nullptr;
+  const TargetInstrInfo *TII = nullptr;
   if (const MachineBasicBlock *MBB = getParent()) {
     MF = MBB->getParent();
-    if (!TM && MF)
-      TM = &MF->getTarget();
-    if (MF)
+    if (MF) {
       MRI = &MF->getRegInfo();
+      TRI = MF->getSubtarget().getRegisterInfo();
+      TII = MF->getSubtarget().getInstrInfo();
+    }
   }
 
   // Save a list of virtual registers.
@@ -1552,7 +1548,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
          !getOperand(StartOp).isImplicit();
        ++StartOp) {
     if (StartOp != 0) OS << ", ";
-    getOperand(StartOp).print(OS, TM);
+    getOperand(StartOp).print(OS, TRI);
     unsigned Reg = getOperand(StartOp).getReg();
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       VirtRegs.push_back(Reg);
@@ -1562,8 +1558,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
     OS << " = ";
 
   // Print the opcode name.
-  if (TM && TM->getSubtargetImpl()->getInstrInfo())
-    OS << TM->getSubtargetImpl()->getInstrInfo()->getName(getOpcode());
+  if (TII)
+    OS << TII->getName(getOpcode());
   else
     OS << "UNKNOWN";
 
@@ -1579,7 +1575,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
   if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
     // Print asm string.
     OS << " ";
-    getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+    getOperand(InlineAsm::MIOp_AsmString).print(OS, TRI);
 
     // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
     unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -1617,9 +1613,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         if (MRI->use_empty(Reg)) {
           bool HasAliasLive = false;
-          for (MCRegAliasIterator AI(
-                   Reg, TM->getSubtargetImpl()->getRegisterInfo(), true);
-               AI.isValid(); ++AI) {
+          for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
             unsigned AliasReg = *AI;
             if (!MRI->use_empty(AliasReg)) {
               HasAliasLive = true;
@@ -1645,17 +1639,13 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
     }
     if (isDebugValue() && MO.isMetadata()) {
       // Pretty print DBG_VALUE instructions.
-      const MDNode *MD = MO.getMetadata();
-      DIDescriptor DI(MD);
-      DIVariable DIV(MD);
-
-      if (DI.isVariable() && !DIV.getName().empty())
-        OS << "!\"" << DIV.getName() << '\"';
+      auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata());
+      if (DIV && !DIV->getName().empty())
+        OS << "!\"" << DIV->getName() << '\"';
       else
-        MO.print(OS, TM);
-    } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
-      OS << TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIndexName(
-          MO.getImm());
+        MO.print(OS, TRI);
+    } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+      OS << TRI->getSubRegIndexName(MO.getImm());
     } else if (i == AsmDescOp && MO.isImm()) {
       // Pretty print the inline asm operand descriptor.
       OS << '$' << AsmOpCount++;
@@ -1672,11 +1662,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
 
       unsigned RCID = 0;
       if (InlineAsm::hasRegClassConstraint(Flag, RCID)) {
-        if (TM) {
-          const TargetRegisterInfo *TRI =
-            TM->getSubtargetImpl()->getRegisterInfo();
-          OS << ':'
-             << TRI->getRegClassName(TRI->getRegClass(RCID));
+        if (TRI) {
+          OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID));
         } else
           OS << ":RC" << RCID;
       }
@@ -1690,7 +1677,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
       // Compute the index of the next operand descriptor.
       AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
     } else
-      MO.print(OS, TM);
+      MO.print(OS, TRI);
   }
 
   // Briefly indicate whether any call clobbers were omitted.
@@ -1726,7 +1713,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
     if (!HaveSemi) OS << ";"; HaveSemi = true;
     for (unsigned i = 0; i != VirtRegs.size(); ++i) {
       const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
-      OS << " " << MRI->getTargetRegisterInfo()->getRegClassName(RC)
+      OS << " " << TRI->getRegClassName(RC)
          << ':' << PrintReg(VirtRegs[i]);
       for (unsigned j = i+1; j != VirtRegs.size();) {
         if (MRI->getRegClass(VirtRegs[j]) != RC) {
@@ -1741,24 +1728,24 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
   }
 
   // Print debug location information.
-  if (isDebugValue() && getOperand(e - 1).isMetadata()) {
+  if (isDebugValue() && getOperand(e - 2).isMetadata()) {
     if (!HaveSemi) OS << ";";
-    DIVariable DV(getOperand(e - 1).getMetadata());
-    OS << " line no:" <<  DV.getLineNumber();
-    if (MDNode *InlinedAt = DV.getInlinedAt()) {
-      DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
-      if (!InlinedAtDL.isUnknown() && MF) {
+    auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata());
+    OS << " line no:" <<  DV->getLine();
+    if (auto *InlinedAt = debugLoc->getInlinedAt()) {
+      DebugLoc InlinedAtDL(InlinedAt);
+      if (InlinedAtDL && MF) {
         OS << " inlined @[ ";
-        printDebugLoc(InlinedAtDL, MF, OS);
+	InlinedAtDL.print(OS);
         OS << " ]";
       }
     }
     if (isIndirectDebugValue())
       OS << " indirect";
-  } else if (!debugLoc.isUnknown() && MF) {
+  } else if (debugLoc && MF) {
     if (!HaveSemi) OS << ";";
     OS << " dbg:";
-    printDebugLoc(debugLoc, MF, OS);
+    debugLoc.print(OS);
   }
 
   OS << '\n';
@@ -1827,8 +1814,7 @@ void MachineInstr::clearRegisterKills(unsigned Reg,
                                       const TargetRegisterInfo *RegInfo) {
   if (!TargetRegisterInfo::isPhysicalRegister(Reg))
     RegInfo = nullptr;
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = getOperand(i);
+  for (MachineOperand &MO : operands()) {
     if (!MO.isReg() || !MO.isUse() || !MO.isKill())
       continue;
     unsigned OpReg = MO.getReg();
@@ -1889,6 +1875,22 @@ bool MachineInstr::addRegisterDead(unsigned Reg,
   return true;
 }
 
+void MachineInstr::clearRegisterDeads(unsigned Reg) {
+  for (MachineOperand &MO : operands()) {
+    if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
+      continue;
+    MO.setIsDead(false);
+  }
+}
+
+void MachineInstr::addRegisterDefReadUndef(unsigned Reg) {
+  for (MachineOperand &MO : operands()) {
+    if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0)
+      continue;
+    MO.setIsUndef();
+  }
+}
+
 void MachineInstr::addRegisterDefined(unsigned Reg,
                                       const TargetRegisterInfo *RegInfo) {
   if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -1896,8 +1898,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg,
     if (MO)
       return;
   } else {
-    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = getOperand(i);
+    for (const MachineOperand &MO : operands()) {
       if (MO.isReg() && MO.getReg() == Reg && MO.isDef() &&
           MO.getSubReg() == 0)
         return;
@@ -1911,8 +1912,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg,
 void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
                                          const TargetRegisterInfo &TRI) {
   bool HasRegMask = false;
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = getOperand(i);
+  for (MachineOperand &MO : operands()) {
     if (MO.isRegMask()) {
       HasRegMask = true;
       continue;
@@ -1920,15 +1920,10 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
     if (!MO.isReg() || !MO.isDef()) continue;
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
-    bool Dead = true;
-    for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
-         I != E; ++I)
-      if (TRI.regsOverlap(*I, Reg)) {
-        Dead = false;
-        break;
-      }
     // If there are no uses, including partial uses, the def is dead.
-    if (Dead) MO.setIsDead();
+    if (std::none_of(UsedRegs.begin(), UsedRegs.end(),
+                     [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
+      MO.setIsDead();
   }
 
   // This is a call with a register mask operand.
@@ -1945,8 +1940,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
   SmallVector<size_t, 8> HashComponents;
   HashComponents.reserve(MI->getNumOperands() + 1);
   HashComponents.push_back(MI->getOpcode());
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI->operands()) {
     if (MO.isReg() && MO.isDef() &&
         TargetRegisterInfo::isVirtualRegister(MO.getReg()))
       continue;  // Skip virtual register defs.
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index cb14a5c..3967a2f 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -10,10 +10,6 @@
 // This pass performs loop invariant code motion on machine instructions. We
 // attempt to remove as much code from the body of a loop as possible.
 //
-// This pass does not attempt to throttle itself to limit register pressure.
-// The register allocation phases are expected to perform rematerialization
-// to recover when register pressure is high.
-//
 // This pass is not intended to be a replacement or a complete alternative
 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
 // constructs that are not exposed before lowering and instruction selection.
@@ -54,6 +50,12 @@ HoistCheapInsts("hoist-cheap-insts",
                 cl::desc("MachineLICM should hoist even cheap instructions"),
                 cl::init(false), cl::Hidden);
 
+static cl::opt<bool>
+SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
+                       cl::desc("MachineLICM should sink instructions into "
+                                "loops to avoid register spills"),
+                       cl::init(false), cl::Hidden);
+
 STATISTIC(NumHoisted,
           "Number of machine instructions hoisted out of loops");
 STATISTIC(NumLowRP,
@@ -98,7 +100,7 @@ namespace {
     SmallSet<unsigned, 32> RegSeen;
     SmallVector<unsigned, 8> RegPressure;
 
-    // Register pressure "limit" per register class. If the pressure
+    // Register pressure "limit" per register pressure set. If the pressure
     // is higher than the limit, then it's considered high.
     SmallVector<unsigned, 8> RegLimit;
 
@@ -208,7 +210,8 @@ namespace {
     /// CanCauseHighRegPressure - Visit BBs from header to current BB,
     /// check if hoisting an instruction of the given cost matrix can cause high
     /// register pressure.
-    bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap);
+    bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
+                                 bool Cheap);
 
     /// UpdateBackTraceRegPressure - Traverse the back trace from header to
     /// the current block and update their register pressures to reflect the
@@ -243,21 +246,30 @@ namespace {
     void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
     void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
 
-    /// getRegisterClassIDAndCost - For a given MI, register, and the operand
-    /// index, return the ID and cost of its representative register class by
-    /// reference.
-    void getRegisterClassIDAndCost(const MachineInstr *MI,
-                                   unsigned Reg, unsigned OpIdx,
-                                   unsigned &RCId, unsigned &RCCost) const;
+    /// SinkIntoLoop - Sink instructions into loops if profitable. This
+    /// especially tries to prevent register spills caused by register pressure
+    /// if there is little to no overhead moving instructions into loops.
+    void SinkIntoLoop();
 
     /// InitRegPressure - Find all virtual register references that are liveout
     /// of the preheader to initialize the starting "register pressure". Note
     /// this does not count live through (livein but not used) registers.
     void InitRegPressure(MachineBasicBlock *BB);
 
+    /// calcRegisterCost - Calculate the additional register pressure that the
+    /// registers used in MI cause.
+    ///
+    /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
+    /// figure out which usages are live-ins.
+    /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
+    DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
+                                             bool ConsiderSeen,
+                                             bool ConsiderUnseenAsDef);
+
     /// UpdateRegPressure - Update estimate of register pressure after the
     /// specified instruction.
-    void UpdateRegPressure(const MachineInstr *MI);
+    void UpdateRegPressure(const MachineInstr *MI,
+                           bool ConsiderUnseenAsDef = false);
 
     /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
     /// the load itself could be hoisted. Return the unfolded and hoistable
@@ -343,13 +355,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
 
   if (PreRegAlloc) {
     // Estimate register pressure during pre-regalloc pass.
-    unsigned NumRC = TRI->getNumRegClasses();
-    RegPressure.resize(NumRC);
+    unsigned NumRPS = TRI->getNumRegPressureSets();
+    RegPressure.resize(NumRPS);
     std::fill(RegPressure.begin(), RegPressure.end(), 0);
-    RegLimit.resize(NumRC);
-    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
-           E = TRI->regclass_end(); I != E; ++I)
-      RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
+    RegLimit.resize(NumRPS);
+    for (unsigned i = 0, e = NumRPS; i != e; ++i)
+      RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
   }
 
   // Get our Loop information...
@@ -381,6 +392,9 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
       FirstInLoop = true;
       HoistOutOfLoop(N);
       CSEMap.clear();
+
+      if (SinkInstsToAvoidSpills)
+        SinkIntoLoop();
     }
   }
 
@@ -693,6 +707,10 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
 /// one pass without iteration.
 ///
 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader)
+    return;
+
   SmallVector<MachineDomTreeNode*, 32> Scopes;
   SmallVector<MachineDomTreeNode*, 8> WorkList;
   DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
@@ -700,7 +718,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
 
   // Perform a DFS walk to determine the order of visit.
   WorkList.push_back(HeaderN);
-  do {
+  while (!WorkList.empty()) {
     MachineDomTreeNode *Node = WorkList.pop_back_val();
     assert(Node && "Null dominator tree node?");
     MachineBasicBlock *BB = Node->getBlock();
@@ -734,28 +752,21 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
       ParentMap[Child] = Node;
       WorkList.push_back(Child);
     }
-  } while (!WorkList.empty());
+  }
 
-  if (Scopes.size() != 0) {
-    MachineBasicBlock *Preheader = getCurPreheader();
-    if (!Preheader)
-      return;
+  if (Scopes.size() == 0)
+    return;
 
-    // Compute registers which are livein into the loop headers.
-    RegSeen.clear();
-    BackTrace.clear();
-    InitRegPressure(Preheader);
-  }
+  // Compute registers which are livein into the loop headers.
+  RegSeen.clear();
+  BackTrace.clear();
+  InitRegPressure(Preheader);
 
   // Now perform LICM.
   for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
     MachineDomTreeNode *Node = Scopes[i];
     MachineBasicBlock *MBB = Node->getBlock();
 
-    MachineBasicBlock *Preheader = getCurPreheader();
-    if (!Preheader)
-      continue;
-
     EnterScope(MBB);
 
     // Process the block
@@ -774,27 +785,57 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
   }
 }
 
-static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
-  return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
-}
+void MachineLICM::SinkIntoLoop() {
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader)
+    return;
+
+  SmallVector<MachineInstr *, 8> Candidates;
+  for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
+       I != Preheader->instr_end(); ++I) {
+    // We need to ensure that we can safely move this instruction into the loop.
+    // As such, it must not have side-effects, e.g. such as a call has.  
+    if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I))
+      Candidates.push_back(I);
+  }
 
-/// getRegisterClassIDAndCost - For a given MI, register, and the operand
-/// index, return the ID and cost of its representative register class.
-void
-MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
-                                       unsigned Reg, unsigned OpIdx,
-                                       unsigned &RCId, unsigned &RCCost) const {
-  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
-  MVT VT = *RC->vt_begin();
-  if (VT == MVT::Untyped) {
-    RCId = RC->getID();
-    RCCost = 1;
-  } else {
-    RCId = TLI->getRepRegClassFor(VT)->getID();
-    RCCost = TLI->getRepRegClassCostFor(VT);
+  for (MachineInstr *I : Candidates) {
+    const MachineOperand &MO = I->getOperand(0);
+    if (!MO.isDef() || !MO.isReg() || !MO.getReg())
+      continue;
+    if (!MRI->hasOneDef(MO.getReg()))
+      continue;
+    bool CanSink = true;
+    MachineBasicBlock *B = nullptr;
+    for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
+      // FIXME: Come up with a proper cost model that estimates whether sinking
+      // the instruction (and thus possibly executing it on every loop
+      // iteration) is more expensive than a register.
+      // For now assumes that copies are cheap and thus almost always worth it.
+      if (!MI.isCopy()) {
+        CanSink = false;
+        break;
+      }
+      if (!B) {
+        B = MI.getParent();
+        continue;
+      }
+      B = DT->findNearestCommonDominator(B, MI.getParent());
+      if (!B) {
+        CanSink = false;
+        break;
+      }
+    }
+    if (!CanSink || !B || B == Preheader)
+      continue;
+    B->splice(B->getFirstNonPHI(), Preheader, I);
   }
 }
 
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+  return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
 /// InitRegPressure - Find all virtual register references that are liveout of
 /// the preheader to initialize the starting "register pressure". Note this
 /// does not count live through (livein but not used) registers.
@@ -812,41 +853,30 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
       InitRegPressure(*BB->pred_begin());
   }
 
-  for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
-       MII != E; ++MII) {
-    MachineInstr *MI = &*MII;
-    for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg() || MO.isImplicit())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(Reg))
-        continue;
-
-      bool isNew = RegSeen.insert(Reg).second;
-      unsigned RCId, RCCost;
-      getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
-      if (MO.isDef())
-        RegPressure[RCId] += RCCost;
-      else {
-        bool isKill = isOperandKill(MO, MRI);
-        if (isNew && !isKill)
-          // Haven't seen this, it must be a livein.
-          RegPressure[RCId] += RCCost;
-        else if (!isNew && isKill)
-          RegPressure[RCId] -= RCCost;
-      }
-    }
-  }
+  for (const MachineInstr &MI : *BB)
+    UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
 }
 
 /// UpdateRegPressure - Update estimate of register pressure after the
 /// specified instruction.
-void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
-  if (MI->isImplicitDef())
-    return;
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
+                                    bool ConsiderUnseenAsDef) {
+  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
+  for (const auto &RPIdAndCost : Cost) {
+    unsigned Class = RPIdAndCost.first;
+    if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
+      RegPressure[Class] = 0;
+    else
+      RegPressure[Class] += RPIdAndCost.second;
+  }
+}
 
-  SmallVector<unsigned, 4> Defs;
+DenseMap<unsigned, int>
+MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
+                              bool ConsiderUnseenAsDef) {
+  DenseMap<unsigned, int> Cost;
+  if (MI->isImplicitDef())
+    return Cost;
   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
@@ -855,27 +885,33 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
 
-    bool isNew = RegSeen.insert(Reg).second;
+    // FIXME: It seems bad to use RegSeen only for some of these calculations.
+    bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+
+    RegClassWeight W = TRI->getRegClassWeight(RC);
+    int RCCost = 0;
     if (MO.isDef())
-      Defs.push_back(Reg);
-    else if (!isNew && isOperandKill(MO, MRI)) {
-      unsigned RCId, RCCost;
-      getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
-      if (RCCost > RegPressure[RCId])
-        RegPressure[RCId] = 0;
+      RCCost = W.RegWeight;
+    else {
+      bool isKill = isOperandKill(MO, MRI);
+      if (isNew && !isKill && ConsiderUnseenAsDef)
+        // Haven't seen this, it must be a livein.
+        RCCost = W.RegWeight;
+      else if (!isNew && isKill)
+        RCCost = -W.RegWeight;
+    }
+    if (RCCost == 0)
+      continue;
+    const int *PS = TRI->getRegClassPressureSets(RC);
+    for (; *PS != -1; ++PS) {
+      if (Cost.find(*PS) == Cost.end())
+        Cost[*PS] = RCCost;
       else
-        RegPressure[RCId] -= RCCost;
+        Cost[*PS] += RCCost;
     }
   }
-
-  unsigned Idx = 0;
-  while (!Defs.empty()) {
-    unsigned Reg = Defs.pop_back_val();
-    unsigned RCId, RCCost;
-    getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost);
-    RegPressure[RCId] += RCCost;
-    ++Idx;
-  }
+  return Cost;
 }
 
 /// isLoadFromGOTOrConstantPool - Return true if this machine instruction
@@ -898,7 +934,7 @@ static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
 bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
   // Check if it's safe to move the instruction.
   bool DontMoveAcrossStore = true;
-  if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
+  if (!I.isSafeToMove(AA, DontMoveAcrossStore))
     return false;
 
   // If it is load then check if it is guaranteed to execute by making sure that
@@ -1067,27 +1103,23 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check
 /// if hoisting an instruction of the given cost matrix can cause high
 /// register pressure.
-bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
+bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
                                           bool CheapInstr) {
-  for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
-       CI != CE; ++CI) {
-    if (CI->second <= 0)
+  for (const auto &RPIdAndCost : Cost) {
+    if (RPIdAndCost.second <= 0)
       continue;
 
-    unsigned RCId = CI->first;
-    unsigned Limit = RegLimit[RCId];
-    int Cost = CI->second;
+    unsigned Class = RPIdAndCost.first;
+    int Limit = RegLimit[Class];
 
     // Don't hoist cheap instructions if they would increase register pressure,
     // even if we're under the limit.
     if (CheapInstr && !HoistCheapInsts)
       return true;
 
-    for (unsigned i = BackTrace.size(); i != 0; --i) {
-      SmallVectorImpl<unsigned> &RP = BackTrace[i-1];
-      if (RP[RCId] + Cost >= Limit)
+    for (const auto &RP : BackTrace)
+      if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
         return true;
-    }
   }
 
   return false;
@@ -1097,46 +1129,15 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
 /// current block and update their register pressures to reflect the effect
 /// of hoisting MI from the current block to the preheader.
 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
-  if (MI->isImplicitDef())
-    return;
-
   // First compute the 'cost' of the instruction, i.e. its contribution
   // to register pressure.
-  DenseMap<unsigned, int> Cost;
-  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.isImplicit())
-      continue;
-    unsigned Reg = MO.getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(Reg))
-      continue;
-
-    unsigned RCId, RCCost;
-    getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
-    if (MO.isDef()) {
-      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
-      if (CI != Cost.end())
-        CI->second += RCCost;
-      else
-        Cost.insert(std::make_pair(RCId, RCCost));
-    } else if (isOperandKill(MO, MRI)) {
-      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
-      if (CI != Cost.end())
-        CI->second -= RCCost;
-      else
-        Cost.insert(std::make_pair(RCId, -RCCost));
-    }
-  }
+  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
+                               /*ConsiderUnseenAsDef=*/false);
 
   // Update register pressure of blocks from loop header to current block.
-  for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
-    SmallVectorImpl<unsigned> &RP = BackTrace[i];
-    for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
-         CI != CE; ++CI) {
-      unsigned RCId = CI->first;
-      RP[RCId] += CI->second;
-    }
-  }
+  for (auto &RP : BackTrace)
+    for (const auto &RPIdAndCost : Cost)
+      RP[RPIdAndCost.first] += RPIdAndCost.second;
 }
 
 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
@@ -1171,15 +1172,8 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
   if (TII->isTriviallyReMaterializable(&MI, AA))
     return true;
 
-  // Estimate register pressure to determine whether to LICM the instruction.
-  // In low register pressure situation, we can be more aggressive about
-  // hoisting. Also, favors hoisting long latency instructions even in
-  // moderately high pressure situation.
-  // Cheap instructions will only be hoisted if they don't increase register
-  // pressure at all.
   // FIXME: If there are long latency loop-invariant instructions inside the
   // loop at this point, why didn't the optimizer's LICM hoist them?
-  DenseMap<unsigned, int> Cost;
   for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
@@ -1187,24 +1181,22 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
-
-    unsigned RCId, RCCost;
-    getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
-    if (MO.isDef()) {
-      if (HasHighOperandLatency(MI, i, Reg)) {
-        DEBUG(dbgs() << "Hoist High Latency: " << MI);
-        ++NumHighLatency;
-        return true;
-      }
-      Cost[RCId] += RCCost;
-    } else if (isOperandKill(MO, MRI)) {
-      // Is a virtual register use is a kill, hoisting it out of the loop
-      // may actually reduce register pressure or be register pressure
-      // neutral.
-      Cost[RCId] -= RCCost;
+    if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
+      DEBUG(dbgs() << "Hoist High Latency: " << MI);
+      ++NumHighLatency;
+      return true;
     }
   }
 
+  // Estimate register pressure to determine whether to LICM the instruction.
+  // In low register pressure situation, we can be more aggressive about
+  // hoisting. Also, favors hoisting long latency instructions even in
+  // moderately high pressure situation.
+  // Cheap instructions will only be hoisted if they don't increase register
+  // pressure at all.
+  auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
+                               /*ConsiderUnseenAsDef=*/false);
+
   // Visit BBs from header to current BB, if hoisting this doesn't cause
   // high register pressure, then it's safe to proceed.
   if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 89054d4..ce6abdd 100644
--- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 // Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index baad411..d9da7bc 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -9,10 +9,12 @@
 
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/Analysis/LibCallSemantics.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -116,7 +118,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
   BBCallbacks.back().setMap(this);
   Entry.Index = BBCallbacks.size()-1;
   Entry.Fn = BB->getParent();
-  MCSymbol *Result = Context.CreateTempSymbol();
+  MCSymbol *Result = Context.createTempSymbol();
   Entry.Symbols = Result;
   return Result;
 }
@@ -276,6 +278,7 @@ bool MachineModuleInfo::doInitialization(Module &M) {
   DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
   // Always emit some info, by default "no personality" info.
   Personalities.push_back(nullptr);
+  PersonalityTypeCache = EHPersonality::Unknown;
   AddrLabelSymbols = nullptr;
   TheModule = nullptr;
 
@@ -398,7 +401,7 @@ void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
 /// addLandingPad - Provide the label of a try LandingPad block.
 ///
 MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
-  MCSymbol *LandingPadLabel = Context.CreateTempSymbol();
+  MCSymbol *LandingPadLabel = Context.createTempSymbol();
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   LP.LandingPadLabel = LandingPadLabel;
   return LandingPadLabel;
@@ -423,6 +426,12 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
     Personalities.push_back(Personality);
 }
 
+void MachineModuleInfo::addWinEHState(MachineBasicBlock *LandingPad,
+                                      int State) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.WinEHState = State;
+}
+
 /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
 ///
 void MachineModuleInfo::
@@ -452,6 +461,25 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
   LP.TypeIds.push_back(0);
 }
 
+void MachineModuleInfo::addSEHCatchHandler(MachineBasicBlock *LandingPad,
+                                           const Function *Filter,
+                                           const BlockAddress *RecoverBA) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  SEHHandler Handler;
+  Handler.FilterOrFinally = Filter;
+  Handler.RecoverBA = RecoverBA;
+  LP.SEHHandlers.push_back(Handler);
+}
+
+void MachineModuleInfo::addSEHCleanupHandler(MachineBasicBlock *LandingPad,
+                                             const Function *Cleanup) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  SEHHandler Handler;
+  Handler.FilterOrFinally = Cleanup;
+  Handler.RecoverBA = nullptr;
+  LP.SEHHandlers.push_back(Handler);
+}
+
 /// TidyLandingPads - Remap landing pad labels and remove any deleted landing
 /// pads.
 void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
@@ -546,9 +574,18 @@ try_next:;
 
 /// getPersonality - Return the personality function for the current function.
 const Function *MachineModuleInfo::getPersonality() const {
-  // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
-  // function
-  return !LandingPads.empty() ? LandingPads[0].Personality : nullptr;
+  for (const LandingPadInfo &LPI : LandingPads)
+    if (LPI.Personality)
+      return LPI.Personality;
+  return nullptr;
+}
+
+EHPersonality MachineModuleInfo::getPersonalityType() {
+  if (PersonalityTypeCache == EHPersonality::Unknown) {
+    if (const Function *F = getPersonality())
+      PersonalityTypeCache = classifyEHPersonality(F);
+  }
+  return PersonalityTypeCache;
 }
 
 /// getPersonalityIndex - Return unique index for current personality
@@ -572,3 +609,18 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
   // in the zero index.
   return 0;
 }
+
+const Function *MachineModuleInfo::getWinEHParent(const Function *F) const {
+  StringRef WinEHParentName =
+      F->getFnAttribute("wineh-parent").getValueAsString();
+  if (WinEHParentName.empty() || WinEHParentName == F->getName())
+    return F;
+  return F->getParent()->getFunction(WinEHParentName);
+}
+
+WinEHFuncInfo &MachineModuleInfo::getWinEHFuncInfo(const Function *F) {
+  auto &Ptr = FuncInfoMap[getWinEHParent(F)];
+  if (!Ptr)
+    Ptr.reset(new WinEHFuncInfo);
+  return *Ptr;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
index a1c7e9f..22d519e 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -31,15 +31,14 @@ static int SortSymbolPair(const void *LHS, const void *RHS) {
   return LHSS->getName().compare(RHSS->getName());
 }
 
-/// GetSortedStubs - Return the entries from a DenseMap in a deterministic
-/// sorted orer.
-MachineModuleInfoImpl::SymbolListTy
-MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
-                                      MachineModuleInfoImpl::StubValueTy>&Map) {
+MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs(
+    DenseMap<MCSymbol *, MachineModuleInfoImpl::StubValueTy> &Map) {
   MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
 
   if (!List.empty())
     qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+
+  Map.clear();
   return List;
 }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 86bb34b..278a8f2 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -61,11 +61,11 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg,
 }
 
 bool
-MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
-  const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
+MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
   const TargetRegisterClass *OldRC = getRegClass(Reg);
   const TargetRegisterClass *NewRC =
-    getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC);
+      getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC, *MF);
 
   // Stop early if there is no room to grow.
   if (NewRC == OldRC)
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index 9fe23c5..44107d6 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -209,6 +209,11 @@ static MachineSchedRegistry
 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
                      useDefaultMachineSched);
 
+static cl::opt<bool> EnableMachineSched(
+    "enable-misched",
+    cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
+    cl::Hidden);
+
 /// Forward declare the standard machine scheduler. This will be used as the
 /// default scheduler if the target does not set a default.
 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
@@ -304,6 +309,12 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
 /// design would be to split blocks at scheduling boundaries, but LLVM has a
 /// general bias against block splitting purely for implementation simplicity.
 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+  if (EnableMachineSched.getNumOccurrences()) {
+    if (!EnableMachineSched)
+      return false;
+  } else if (!mf.getSubtarget().enableMachineScheduler())
+    return false;
+
   DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
 
   // Initialize the context of the pass.
@@ -336,9 +347,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   if (skipOptnoneFunction(*mf.getFunction()))
     return false;
 
-  const TargetSubtargetInfo &ST =
-    mf.getTarget().getSubtarget<TargetSubtargetInfo>();
-  if (!ST.enablePostMachineScheduler()) {
+  if (!mf.getSubtarget().enablePostMachineScheduler()) {
     DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
     return false;
   }
@@ -934,8 +943,9 @@ updateScheduledPressure(const SUnit *SU,
     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
     if (NewMaxPressure[ID] >= Limit - 2) {
       DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
-            << NewMaxPressure[ID] << " > " << Limit << "(+ "
-            << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
+            << NewMaxPressure[ID]
+            << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
+            << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
     }
   }
 }
@@ -1027,8 +1037,6 @@ void ScheduleDAGMILive::schedule() {
 
     scheduleMI(SU, IsTopNode);
 
-    updateQueues(SU, IsTopNode);
-
     if (DFSResult) {
       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
       if (!ScheduledTrees.test(SubtreeID)) {
@@ -1040,6 +1048,8 @@ void ScheduleDAGMILive::schedule() {
 
     // Notify the scheduling strategy after updating the DAG.
     SchedImpl->schedNode(SU, IsTopNode);
+
+    updateQueues(SU, IsTopNode);
   }
   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 
@@ -1434,12 +1444,15 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
   // Check if either the dest or source is local. If it's live across a back
   // edge, it's not local. Note that if both vregs are live across the back
   // edge, we cannot successfully contrain the copy without cyclic scheduling.
-  unsigned LocalReg = DstReg;
-  unsigned GlobalReg = SrcReg;
+  // If both the copy's source and dest are local live intervals, then we
+  // should treat the dest as the global for the purpose of adding
+  // constraints. This adds edges from source's other uses to the copy.
+  unsigned LocalReg = SrcReg;
+  unsigned GlobalReg = DstReg;
   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
-    LocalReg = SrcReg;
-    GlobalReg = DstReg;
+    LocalReg = DstReg;
+    GlobalReg = SrcReg;
     LocalLI = &LIS->getInterval(LocalReg);
     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
       return;
@@ -2599,8 +2612,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
                  TryCand, Cand, PhysRegCopy))
     return;
 
-  // Avoid exceeding the target's limit. If signed PSetID is negative, it is
-  // invalid; convert it to INT_MAX to give it lowest priority.
+  // Avoid exceeding the target's limit.
   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
                                                Cand.RPDelta.Excess,
                                                TryCand, Cand, RegExcess))
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index 8337793..5f03390 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -70,6 +71,8 @@ namespace {
     // will be split.
     SetVector<std::pair<MachineBasicBlock*,MachineBasicBlock*> > ToSplit;
 
+    SparseBitVector<> RegsToClearKillFlags;
+
   public:
     static char ID; // Pass identification
     MachineSinking() : MachineFunctionPass(ID) {
@@ -287,6 +290,12 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
     if (!MadeChange) break;
     EverMadeChange = true;
   }
+
+  // Now clear any kill flags for recorded registers.
+  for (auto I : RegsToClearKillFlags)
+    MRI->clearKillFlags(I);
+  RegsToClearKillFlags.clear();
+
   return EverMadeChange;
 }
 
@@ -643,7 +652,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
     return false;
 
   // Check if it's safe to move the instruction.
-  if (!MI->isSafeToMove(TII, AA, SawStore))
+  if (!MI->isSafeToMove(AA, SawStore))
     return false;
 
   // FIXME: This should include support for sinking instructions within the
@@ -656,7 +665,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
 
   bool BreakPHIEdge = false;
   MachineBasicBlock *ParentBlock = MI->getParent();
-  MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge);
+  MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock,
+                                                     BreakPHIEdge);
 
   // If there are no outputs, it must have side-effects.
   if (!SuccToSinkTo)
@@ -684,7 +694,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
     // other code paths.
     bool TryBreak = false;
     bool store = true;
-    if (!MI->isSafeToMove(TII, AA, store)) {
+    if (!MI->isSafeToMove(AA, store)) {
       DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
       TryBreak = true;
     }
@@ -755,7 +765,13 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
 
   // Conservatively, clear any kill flags, since it's possible that they are no
   // longer correct.
-  MI->clearKillInfo();
+  // Note that we have to clear the kill flags for any register this instruction
+  // uses as we may sink over another instruction which currently kills the
+  // used registers.
+  for (MachineOperand &MO : MI->operands()) {
+    if (MO.isReg() && MO.isUse())
+      RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags.
+  }
 
   return true;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 2cf87eb..e07250b 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -52,12 +52,11 @@ void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
   MF = &Func;
-  TII = MF->getSubtarget().getInstrInfo();
-  TRI = MF->getSubtarget().getRegisterInfo();
+  const TargetSubtargetInfo &ST = MF->getSubtarget();
+  TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
   MRI = &MF->getRegInfo();
   Loops = &getAnalysis<MachineLoopInfo>();
-  const TargetSubtargetInfo &ST =
-    MF->getTarget().getSubtarget<TargetSubtargetInfo>();
   SchedModel.init(ST.getSchedModel(), &ST, TII);
   BlockInfo.resize(MF->getNumBlockIDs());
   ProcResourceCycles.resize(MF->getNumBlockIDs() *
@@ -321,9 +320,7 @@ MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
   unsigned CurCount = MTM.getResources(MBB)->InstrCount;
   const MachineBasicBlock *Best = nullptr;
   unsigned BestDepth = 0;
-  for (MachineBasicBlock::const_pred_iterator
-       I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
-    const MachineBasicBlock *Pred = *I;
+  for (const MachineBasicBlock *Pred : MBB->predecessors()) {
     const MachineTraceMetrics::TraceBlockInfo *PredTBI =
       getDepthResources(Pred);
     // Ignore cycles that aren't natural loops.
@@ -345,9 +342,7 @@ MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
   const MachineLoop *CurLoop = getLoopFor(MBB);
   const MachineBasicBlock *Best = nullptr;
   unsigned BestHeight = 0;
-  for (MachineBasicBlock::const_succ_iterator
-       I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
-    const MachineBasicBlock *Succ = *I;
+  for (const MachineBasicBlock *Succ : MBB->successors()) {
     // Don't consider back-edges.
     if (CurLoop && Succ == CurLoop->getHeader())
       continue;
@@ -464,13 +459,11 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
   // Run an upwards post-order search for the trace start.
   Bounds.Downward = false;
   Bounds.Visited.clear();
-  typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
-  for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
-       I != E; ++I) {
+  for (auto I : inverse_post_order_ext(MBB, Bounds)) {
     DEBUG(dbgs() << "  pred for BB#" << I->getNumber() << ": ");
     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
     // All the predecessors have been visited, pick the preferred one.
-    TBI.Pred = pickTracePred(*I);
+    TBI.Pred = pickTracePred(I);
     DEBUG({
       if (TBI.Pred)
         dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
@@ -478,19 +471,17 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
         dbgs() << "null\n";
     });
     // The trace leading to I is now known, compute the depth resources.
-    computeDepthResources(*I);
+    computeDepthResources(I);
   }
 
   // Run a downwards post-order search for the trace end.
   Bounds.Downward = true;
   Bounds.Visited.clear();
-  typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
-  for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
-       I != E; ++I) {
+  for (auto I : post_order_ext(MBB, Bounds)) {
     DEBUG(dbgs() << "  succ for BB#" << I->getNumber() << ": ");
     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
     // All the successors have been visited, pick the preferred one.
-    TBI.Succ = pickTraceSucc(*I);
+    TBI.Succ = pickTraceSucc(I);
     DEBUG({
       if (TBI.Succ)
         dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
@@ -498,7 +489,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
         dbgs() << "null\n";
     });
     // The trace leaving I is now known, compute the height resources.
-    computeHeightResources(*I);
+    computeHeightResources(I);
   }
 }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index 364e8e2..f5edcb7 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -55,16 +55,13 @@ namespace {
 
     MachineVerifier(Pass *pass, const char *b) :
       PASS(pass),
-      Banner(b),
-      OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
+      Banner(b)
       {}
 
     bool runOnMachineFunction(MachineFunction &MF);
 
     Pass *const PASS;
     const char *Banner;
-    const char *const OutFileName;
-    raw_ostream *OS;
     const MachineFunction *MF;
     const TargetMachine *TM;
     const TargetInstrInfo *TII;
@@ -277,22 +274,6 @@ void MachineFunction::verify(Pass *p, const char *Banner) const {
 }
 
 bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
-  raw_ostream *OutFile = nullptr;
-  if (OutFileName) {
-    std::error_code EC;
-    OutFile = new raw_fd_ostream(OutFileName, EC,
-                                 sys::fs::F_Append | sys::fs::F_Text);
-    if (EC) {
-      errs() << "Error opening '" << OutFileName << "': " << EC.message()
-             << '\n';
-      exit(1);
-    }
-
-    OS = OutFile;
-  } else {
-    OS = &errs();
-  }
-
   foundErrors = 0;
 
   this->MF = &MF;
@@ -327,7 +308,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
            MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
       if (MBBI->getParent() != MFI) {
         report("Bad instruction parent pointer", MFI);
-        *OS << "Instruction: " << *MBBI;
+        errs() << "Instruction: " << *MBBI;
         continue;
       }
 
@@ -348,8 +329,18 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
       } else if (!CurBundle)
         report("No bundle header", MBBI);
       visitMachineInstrBefore(MBBI);
-      for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
-        visitMachineOperand(&MBBI->getOperand(I), I);
+      for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+        const MachineInstr &MI = *MBBI;
+        const MachineOperand &Op = MI.getOperand(I);
+        if (Op.getParent() != &MI) {
+          // Make sure to use correct addOperand / RemoveOperand / ChangeTo
+          // functions when replacing operands of a MachineInstr.
+          report("Instruction has operand with wrong parent set", &MI);
+        }
+
+        visitMachineOperand(&Op, I);
+      }
+
       visitMachineInstrAfter(MBBI);
 
       // Was this the last bundled instruction?
@@ -363,9 +354,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
   }
   visitMachineFunctionAfter();
 
-  if (OutFile)
-    delete OutFile;
-  else if (foundErrors)
+  if (foundErrors)
     report_fatal_error("Found "+Twine(foundErrors)+" machine code errors.");
 
   // Clean up.
@@ -382,76 +371,76 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
 
 void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
   assert(MF);
-  *OS << '\n';
+  errs() << '\n';
   if (!foundErrors++) {
     if (Banner)
-      *OS << "# " << Banner << '\n';
-    MF->print(*OS, Indexes);
+      errs() << "# " << Banner << '\n';
+    MF->print(errs(), Indexes);
   }
-  *OS << "*** Bad machine code: " << msg << " ***\n"
+  errs() << "*** Bad machine code: " << msg << " ***\n"
       << "- function:    " << MF->getName() << "\n";
 }
 
 void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
   assert(MBB);
   report(msg, MBB->getParent());
-  *OS << "- basic block: BB#" << MBB->getNumber()
+  errs() << "- basic block: BB#" << MBB->getNumber()
       << ' ' << MBB->getName()
       << " (" << (const void*)MBB << ')';
   if (Indexes)
-    *OS << " [" << Indexes->getMBBStartIdx(MBB)
+    errs() << " [" << Indexes->getMBBStartIdx(MBB)
         << ';' <<  Indexes->getMBBEndIdx(MBB) << ')';
-  *OS << '\n';
+  errs() << '\n';
 }
 
 void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
   assert(MI);
   report(msg, MI->getParent());
-  *OS << "- instruction: ";
+  errs() << "- instruction: ";
   if (Indexes && Indexes->hasIndex(MI))
-    *OS << Indexes->getInstructionIndex(MI) << '\t';
-  MI->print(*OS, TM);
+    errs() << Indexes->getInstructionIndex(MI) << '\t';
+  MI->print(errs(), TM);
 }
 
 void MachineVerifier::report(const char *msg,
                              const MachineOperand *MO, unsigned MONum) {
   assert(MO);
   report(msg, MO->getParent());
-  *OS << "- operand " << MONum << ":   ";
-  MO->print(*OS, TM);
-  *OS << "\n";
+  errs() << "- operand " << MONum << ":   ";
+  MO->print(errs(), TRI);
+  errs() << "\n";
 }
 
 void MachineVerifier::report(const char *msg, const MachineFunction *MF,
                              const LiveInterval &LI) {
   report(msg, MF);
-  *OS << "- interval:    " << LI << '\n';
+  errs() << "- interval:    " << LI << '\n';
 }
 
 void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
                              const LiveInterval &LI) {
   report(msg, MBB);
-  *OS << "- interval:    " << LI << '\n';
+  errs() << "- interval:    " << LI << '\n';
 }
 
 void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
                              const LiveRange &LR, unsigned Reg,
                              unsigned LaneMask) {
   report(msg, MBB);
-  *OS << "- liverange:   " << LR << '\n';
-  *OS << "- register:    " << PrintReg(Reg, TRI) << '\n';
+  errs() << "- liverange:   " << LR << '\n';
+  errs() << "- register:    " << PrintReg(Reg, TRI) << '\n';
   if (LaneMask != 0)
-    *OS << "- lanemask:    " << format("%04X\n", LaneMask);
+    errs() << "- lanemask:    " << format("%04X\n", LaneMask);
 }
 
 void MachineVerifier::report(const char *msg, const MachineFunction *MF,
                              const LiveRange &LR, unsigned Reg,
                              unsigned LaneMask) {
   report(msg, MF);
-  *OS << "- liverange:   " << LR << '\n';
-  *OS << "- register:    " << PrintReg(Reg, TRI) << '\n';
+  errs() << "- liverange:   " << LR << '\n';
+  errs() << "- register:    " << PrintReg(Reg, TRI) << '\n';
   if (LaneMask != 0)
-    *OS << "- lanemask:    " << format("%04X\n", LaneMask);
+    errs() << "- lanemask:    " << format("%04X\n", LaneMask);
 }
 
 void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
@@ -539,7 +528,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       report("MBB has successor that isn't part of the function.", MBB);
     if (!MBBInfoMap[*I].Preds.count(MBB)) {
       report("Inconsistent CFG", MBB);
-      *OS << "MBB is not in the predecessor list of the successor BB#"
+      errs() << "MBB is not in the predecessor list of the successor BB#"
           << (*I)->getNumber() << ".\n";
     }
   }
@@ -551,7 +540,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       report("MBB has predecessor that isn't part of the function.", MBB);
     if (!MBBInfoMap[*I].Succs.count(MBB)) {
       report("Inconsistent CFG", MBB);
-      *OS << "MBB is not in the successor list of the predecessor BB#"
+      errs() << "MBB is not in the successor list of the predecessor BB#"
           << (*I)->getNumber() << ".\n";
     }
   }
@@ -726,7 +715,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
     SlotIndex idx = Indexes->getInstructionIndex(MI);
     if (!(idx > lastIndex)) {
       report("Instruction index out of order", MI);
-      *OS << "Last instruction was at " << lastIndex << '\n';
+      errs() << "Last instruction was at " << lastIndex << '\n';
     }
     lastIndex = idx;
   }
@@ -739,7 +728,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
       FirstTerminator = MI;
   } else if (FirstTerminator) {
     report("Non-terminator instruction after the first terminator", MI);
-    *OS << "First terminator was:\t" << *FirstTerminator;
+    errs() << "First terminator was:\t" << *FirstTerminator;
   }
 }
 
@@ -760,7 +749,7 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
   if (!isUInt<5>(MI->getOperand(1).getImm()))
     report("Unknown asm flags", &MI->getOperand(1), 1);
 
-  assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed");
+  static_assert(InlineAsm::MIOp_FirstOperand == 2, "Asm format changed");
 
   unsigned OpNo = InlineAsm::MIOp_FirstOperand;
   unsigned NumOps;
@@ -791,7 +780,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
   const MCInstrDesc &MCID = MI->getDesc();
   if (MI->getNumOperands() < MCID.getNumOperands()) {
     report("Too few operands", MI);
-    *OS << MCID.getNumOperands() << " operands expected, but "
+    errs() << MCID.getNumOperands() << " operands expected, but "
         << MI->getNumOperands() << " given.\n";
   }
 
@@ -921,7 +910,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
               TII->getRegClass(MCID, MONum, TRI, *MF)) {
           if (!DRC->contains(Reg)) {
             report("Illegal physical register for instruction", MO, MONum);
-            *OS << TRI->getName(Reg) << " is not a "
+            errs() << TRI->getName(Reg) << " is not a "
                 << TRI->getRegClassName(DRC) << " register.\n";
           }
         }
@@ -933,13 +922,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
             TRI->getSubClassWithSubReg(RC, SubIdx);
           if (!SRC) {
             report("Invalid subregister index for virtual register", MO, MONum);
-            *OS << "Register class " << TRI->getRegClassName(RC)
+            errs() << "Register class " << TRI->getRegClassName(RC)
                 << " does not support subreg index " << SubIdx << "\n";
             return;
           }
           if (RC != SRC) {
             report("Invalid register class for subregister index", MO, MONum);
-            *OS << "Register class " << TRI->getRegClassName(RC)
+            errs() << "Register class " << TRI->getRegClassName(RC)
                 << " does not fully support subreg index " << SubIdx << "\n";
             return;
           }
@@ -948,7 +937,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
               TII->getRegClass(MCID, MONum, TRI, *MF)) {
           if (SubIdx) {
             const TargetRegisterClass *SuperRC =
-              TRI->getLargestLegalSuperClass(RC);
+                TRI->getLargestLegalSuperClass(RC, *MF);
             if (!SuperRC) {
               report("No largest legal super class exists.", MO, MONum);
               return;
@@ -961,7 +950,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
           }
           if (!RC->hasSuperClassEq(DRC)) {
             report("Illegal virtual register for instruction", MO, MONum);
-            *OS << "Expected a " << TRI->getRegClassName(DRC)
+            errs() << "Expected a " << TRI->getRegClassName(DRC)
                 << " register, but got a " << TRI->getRegClassName(RC)
                 << " register\n";
           }
@@ -987,11 +976,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       SlotIndex Idx = LiveInts->getInstructionIndex(MI);
       if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
         report("Instruction loads from dead spill slot", MO, MONum);
-        *OS << "Live stack: " << LI << '\n';
+        errs() << "Live stack: " << LI << '\n';
       }
       if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
         report("Instruction stores to dead spill slot", MO, MONum);
-        *OS << "Live stack: " << LI << '\n';
+        errs() << "Live stack: " << LI << '\n';
       }
     }
     break;
@@ -1030,12 +1019,12 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
             LiveQueryResult LRQ = LR->Query(UseIdx);
             if (!LRQ.valueIn()) {
               report("No live segment at use", MO, MONum);
-              *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
+              errs() << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
                   << ' ' << *LR << '\n';
             }
             if (MO->isKill() && !LRQ.isKill()) {
               report("Live range continues after kill flag", MO, MONum);
-              *OS << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n';
+              errs() << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n';
             }
           }
         }
@@ -1048,13 +1037,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
           LiveQueryResult LRQ = LI.Query(UseIdx);
           if (!LRQ.valueIn()) {
             report("No live segment at use", MO, MONum);
-            *OS << UseIdx << " is not live in " << LI << '\n';
+            errs() << UseIdx << " is not live in " << LI << '\n';
           }
           // Check for extra kill flags.
           // Note that we allow missing kill flags for now.
           if (MO->isKill() && !LRQ.isKill()) {
             report("Live range continues after kill flag", MO, MONum);
-            *OS << "Live range: " << LI << '\n';
+            errs() << "Live range: " << LI << '\n';
           }
         } else {
           report("Virtual register has no live interval", MO, MONum);
@@ -1077,6 +1066,25 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
             }
           }
         }
+        // If there is an additional implicit-use of a super register we stop
+        // here. By definition we are fine if the super register is not
+        // (completely) dead, if the complete super register is dead we will
+        // get a report for its operand.
+        if (Bad) {
+          for (const MachineOperand &MOP : MI->uses()) {
+            if (!MOP.isReg())
+              continue;
+            if (!MOP.isImplicit())
+              continue;
+            for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid();
+                 ++SubRegs) {
+              if (*SubRegs == Reg) {
+                Bad = false;
+                break;
+              }
+            }
+          }
+        }
         if (Bad)
           report("Using an undefined physical register", MO, MONum);
       } else if (MRI->def_empty(Reg)) {
@@ -1118,19 +1126,19 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
           assert(VNI && "NULL valno is not allowed");
           if (VNI->def != DefIdx) {
             report("Inconsistent valno->def", MO, MONum);
-            *OS << "Valno " << VNI->id << " is not defined at "
+            errs() << "Valno " << VNI->id << " is not defined at "
               << DefIdx << " in " << LI << '\n';
           }
         } else {
           report("No live segment at def", MO, MONum);
-          *OS << DefIdx << " is not live in " << LI << '\n';
+          errs() << DefIdx << " is not live in " << LI << '\n';
         }
         // Check that, if the dead def flag is present, LiveInts agree.
         if (MO->isDead()) {
           LiveQueryResult LRQ = LI.Query(DefIdx);
           if (!LRQ.isDeadDef()) {
             report("Live range continues after dead def flag", MO, MONum);
-            *OS << "Live range: " << LI << '\n';
+            errs() << "Live range: " << LI << '\n';
           }
         }
       } else {
@@ -1172,7 +1180,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
     SlotIndex stop = Indexes->getMBBEndIdx(MBB);
     if (!(stop > lastIndex)) {
       report("Block ends before last instruction index", MBB);
-      *OS << "Block ends at " << stop
+      errs() << "Block ends at " << stop
           << " last instruction was at " << lastIndex << '\n';
     }
     lastIndex = stop;
@@ -1274,7 +1282,7 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
            PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
       if (!seen.count(*PrI)) {
         report("Missing PHI operand", &BBI);
-        *OS << "BB#" << (*PrI)->getNumber()
+        errs() << "BB#" << (*PrI)->getNumber()
             << " is a predecessor according to the CFG.\n";
       }
     }
@@ -1305,7 +1313,7 @@ void MachineVerifier::visitMachineFunctionAfter() {
          ++I)
       if (MInfo.regsKilled.count(*I)) {
         report("Virtual register killed in block, but needed live out.", &MBB);
-        *OS << "Virtual register " << PrintReg(*I)
+        errs() << "Virtual register " << PrintReg(*I)
             << " is used after the block.\n";
       }
   }
@@ -1337,13 +1345,13 @@ void MachineVerifier::verifyLiveVariables() {
       if (MInfo.vregsRequired.count(Reg)) {
         if (!VI.AliveBlocks.test(MBB.getNumber())) {
           report("LiveVariables: Block missing from AliveBlocks", &MBB);
-          *OS << "Virtual register " << PrintReg(Reg)
+          errs() << "Virtual register " << PrintReg(Reg)
               << " must be live through the block.\n";
         }
       } else {
         if (VI.AliveBlocks.test(MBB.getNumber())) {
           report("LiveVariables: Block should not be in AliveBlocks", &MBB);
-          *OS << "Virtual register " << PrintReg(Reg)
+          errs() << "Virtual register " << PrintReg(Reg)
               << " is not needed live through the block.\n";
         }
       }
@@ -1362,7 +1370,7 @@ void MachineVerifier::verifyLiveIntervals() {
 
     if (!LiveInts->hasInterval(Reg)) {
       report("Missing live interval for virtual register", MF);
-      *OS << PrintReg(Reg, TRI) << " still has defs or uses\n";
+      errs() << PrintReg(Reg, TRI) << " still has defs or uses\n";
       continue;
     }
 
@@ -1388,13 +1396,13 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
   if (!DefVNI) {
     report("Valno not live at def and not marked unused", MF, LR, Reg,
            LaneMask);
-    *OS << "Valno #" << VNI->id << '\n';
+    errs() << "Valno #" << VNI->id << '\n';
     return;
   }
 
   if (DefVNI != VNI) {
     report("Live segment at def has different valno", MF, LR, Reg, LaneMask);
-    *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+    errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
         << " where valno #" << DefVNI->id << " is live\n";
     return;
   }
@@ -1402,7 +1410,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
   const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
   if (!MBB) {
     report("Invalid definition index", MF, LR, Reg, LaneMask);
-    *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+    errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
         << " in " << LR << '\n';
     return;
   }
@@ -1411,7 +1419,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
     if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
       report("PHIDef value is not defined at MBB start", MBB, LR, Reg,
              LaneMask);
-      *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+      errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
           << ", not at the beginning of BB#" << MBB->getNumber() << '\n';
     }
     return;
@@ -1421,7 +1429,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
   const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
   if (!MI) {
     report("No instruction at def index", MBB, LR, Reg, LaneMask);
-    *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+    errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
     return;
   }
 
@@ -1449,7 +1457,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
 
     if (!hasDef) {
       report("Defining instruction does not modify register", MI);
-      *OS << "Valno #" << VNI->id << " in " << LR << '\n';
+      errs() << "Valno #" << VNI->id << " in " << LR << '\n';
     }
 
     // Early clobber defs begin at USE slots, but other defs must begin at
@@ -1458,12 +1466,12 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
       if (!VNI->def.isEarlyClobber()) {
         report("Early clobber def must be at an early-clobber slot", MBB, LR,
                Reg, LaneMask);
-        *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+        errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
       }
     } else if (!VNI->def.isRegister()) {
       report("Non-PHI, non-early clobber def must be at a register slot",
              MBB, LR, Reg, LaneMask);
-      *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+      errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
     }
   }
 }
@@ -1477,32 +1485,32 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
 
   if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) {
     report("Foreign valno in live segment", MF, LR, Reg, LaneMask);
-    *OS << S << " has a bad valno\n";
+    errs() << S << " has a bad valno\n";
   }
 
   if (VNI->isUnused()) {
     report("Live segment valno is marked unused", MF, LR, Reg, LaneMask);
-    *OS << S << '\n';
+    errs() << S << '\n';
   }
 
   const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start);
   if (!MBB) {
     report("Bad start of live segment, no basic block", MF, LR, Reg, LaneMask);
-    *OS << S << '\n';
+    errs() << S << '\n';
     return;
   }
   SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
   if (S.start != MBBStartIdx && S.start != VNI->def) {
     report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg,
            LaneMask);
-    *OS << S << '\n';
+    errs() << S << '\n';
   }
 
   const MachineBasicBlock *EndMBB =
     LiveInts->getMBBFromIndex(S.end.getPrevSlot());
   if (!EndMBB) {
     report("Bad end of live segment, no basic block", MF, LR, Reg, LaneMask);
-    *OS << S << '\n';
+    errs() << S << '\n';
     return;
   }
 
@@ -1521,7 +1529,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
   if (!MI) {
     report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg,
            LaneMask);
-    *OS << S << '\n';
+    errs() << S << '\n';
     return;
   }
 
@@ -1529,7 +1537,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
   if (S.end.isBlock()) {
     report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg,
            LaneMask);
-    *OS << S << '\n';
+    errs() << S << '\n';
   }
 
   if (S.end.isDead()) {
@@ -1538,7 +1546,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
     if (!SlotIndex::isSameInstr(S.start, S.end)) {
       report("Live segment ending at dead slot spans instructions", EndMBB, LR,
              Reg, LaneMask);
-      *OS << S << '\n';
+      errs() << S << '\n';
     }
   }
 
@@ -1549,7 +1557,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       report("Live segment ending at early clobber slot must be "
              "redefined by an EC def in the same instruction", EndMBB, LR, Reg,
              LaneMask);
-      *OS << S << '\n';
+      errs() << S << '\n';
     }
   }
 
@@ -1575,10 +1583,11 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       if (!hasRead) {
         // When tracking subregister liveness, the main range must start new
         // values on partial register writes, even if there is no read.
-        if (!MRI->tracksSubRegLiveness() || LaneMask != 0 || !hasSubRegDef) {
+        if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask != 0 ||
+            !hasSubRegDef) {
           report("Instruction ending live segment doesn't read the register",
                  MI);
-          *OS << S << " in " << LR << '\n';
+          errs() << S << " in " << LR << '\n';
         }
       }
     }
@@ -1619,7 +1628,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       if (!PVNI) {
         report("Register not marked live out of predecessor", *PI, LR, Reg,
                LaneMask);
-        *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+        errs() << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
             << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
             << PEnd << '\n';
         continue;
@@ -1629,7 +1638,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       if (!IsPHI && PVNI != VNI) {
         report("Different value live out of predecessor", *PI, LR, Reg,
                LaneMask);
-        *OS << "Valno #" << PVNI->id << " live out of BB#"
+        errs() << "Valno #" << PVNI->id << " live out of BB#"
             << (*PI)->getNumber() << '@' << PEnd
             << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
             << '@' << LiveInts->getMBBStartIdx(MFI) << '\n';
@@ -1651,40 +1660,35 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
 }
 
 void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
-  verifyLiveRange(LI, LI.reg);
-
   unsigned Reg = LI.reg;
-  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-    unsigned Mask = 0;
-    unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
-    for (const LiveInterval::SubRange &SR : LI.subranges()) {
-      if ((Mask & SR.LaneMask) != 0)
-        report("Lane masks of sub ranges overlap in live interval", MF, LI);
-      if ((SR.LaneMask & ~MaxMask) != 0)
-        report("Subrange lanemask is invalid", MF, LI);
-      Mask |= SR.LaneMask;
-      verifyLiveRange(SR, LI.reg, SR.LaneMask);
-      if (!LI.covers(SR))
-        report("A Subrange is not covered by the main range", MF, LI);
-    }
-  } else if (LI.hasSubRanges()) {
-    report("subregister liveness only allowed for virtual registers", MF, LI);
+  assert(TargetRegisterInfo::isVirtualRegister(Reg));
+  verifyLiveRange(LI, Reg);
+
+  unsigned Mask = 0;
+  unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+  for (const LiveInterval::SubRange &SR : LI.subranges()) {
+    if ((Mask & SR.LaneMask) != 0)
+      report("Lane masks of sub ranges overlap in live interval", MF, LI);
+    if ((SR.LaneMask & ~MaxMask) != 0)
+      report("Subrange lanemask is invalid", MF, LI);
+    Mask |= SR.LaneMask;
+    verifyLiveRange(SR, LI.reg, SR.LaneMask);
+    if (!LI.covers(SR))
+      report("A Subrange is not covered by the main range", MF, LI);
   }
 
   // Check the LI only has one connected component.
-  if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
-    ConnectedVNInfoEqClasses ConEQ(*LiveInts);
-    unsigned NumComp = ConEQ.Classify(&LI);
-    if (NumComp > 1) {
-      report("Multiple connected components in live interval", MF, LI);
-      for (unsigned comp = 0; comp != NumComp; ++comp) {
-        *OS << comp << ": valnos";
-        for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
-             E = LI.vni_end(); I!=E; ++I)
-          if (comp == ConEQ.getEqClass(*I))
-            *OS << ' ' << (*I)->id;
-        *OS << '\n';
-      }
+  ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+  unsigned NumComp = ConEQ.Classify(&LI);
+  if (NumComp > 1) {
+    report("Multiple connected components in live interval", MF, LI);
+    for (unsigned comp = 0; comp != NumComp; ++comp) {
+      errs() << comp << ": valnos";
+      for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+           E = LI.vni_end(); I!=E; ++I)
+        if (comp == ConEQ.getEqClass(*I))
+          errs() << ' ' << (*I)->id;
+      errs() << '\n';
     }
   }
 }
@@ -1712,8 +1716,8 @@ namespace {
 /// by a FrameDestroy <n>, stack adjustments are identical on all
 /// CFG edges to a merge point, and frame is destroyed at end of a return block.
 void MachineVerifier::verifyStackFrame() {
-  int FrameSetupOpcode   = TII->getCallFrameSetupOpcode();
-  int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
+  unsigned FrameSetupOpcode   = TII->getCallFrameSetupOpcode();
+  unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
 
   SmallVector<StackStateOfBB, 8> SPState;
   SPState.resize(MF->getNumBlockIDs());
@@ -1764,7 +1768,7 @@ void MachineVerifier::verifyStackFrame() {
                                                BBState.ExitValue;
         if (BBState.ExitIsSetup && AbsSPAdj != Size) {
           report("FrameDestroy <n> is after FrameSetup <m>", &I);
-          *OS << "FrameDestroy <" << Size << "> is after FrameSetup <"
+          errs() << "FrameDestroy <" << Size << "> is after FrameSetup <"
               << AbsSPAdj << ">.\n";
         }
         BBState.ExitValue += Size;
@@ -1781,7 +1785,7 @@ void MachineVerifier::verifyStackFrame() {
           (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue ||
            SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) {
         report("The exit stack state of a predecessor is inconsistent.", MBB);
-        *OS << "Predecessor BB#" << (*I)->getNumber() << " has exit state ("
+        errs() << "Predecessor BB#" << (*I)->getNumber() << " has exit state ("
             << SPState[(*I)->getNumber()].ExitValue << ", "
             << SPState[(*I)->getNumber()].ExitIsSetup
             << "), while BB#" << MBB->getNumber() << " has entry state ("
@@ -1797,7 +1801,7 @@ void MachineVerifier::verifyStackFrame() {
           (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue ||
            SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) {
         report("The entry stack state of a successor is inconsistent.", MBB);
-        *OS << "Successor BB#" << (*I)->getNumber() << " has entry state ("
+        errs() << "Successor BB#" << (*I)->getNumber() << " has entry state ("
             << SPState[(*I)->getNumber()].EntryValue << ", "
             << SPState[(*I)->getNumber()].EntryIsSetup
             << "), while BB#" << MBB->getNumber() << " has exit state ("
diff --git a/contrib/llvm/lib/CodeGen/OcamlGC.cpp b/contrib/llvm/lib/CodeGen/OcamlGC.cpp
index 48db200..17654a6 100644
--- a/contrib/llvm/lib/CodeGen/OcamlGC.cpp
+++ b/contrib/llvm/lib/CodeGen/OcamlGC.cpp
@@ -20,16 +20,15 @@
 using namespace llvm;
 
 namespace {
-  class OcamlGC : public GCStrategy {
-  public:
-    OcamlGC();
-  };
+class OcamlGC : public GCStrategy {
+public:
+  OcamlGC();
+};
 }
 
-static GCRegistry::Add<OcamlGC>
-X("ocaml", "ocaml 3.10-compatible GC");
+static GCRegistry::Add<OcamlGC> X("ocaml", "ocaml 3.10-compatible GC");
 
-void llvm::linkOcamlGC() { }
+void llvm::linkOcamlGC() {}
 
 OcamlGC::OcamlGC() {
   NeededSafePoints = 1 << GC::PostCall;
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index def2e3d..d514190 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
@@ -46,6 +47,10 @@ SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false),
                       cl::Hidden, cl::desc("Split all critical edges during "
                                            "PHI elimination"));
 
+static cl::opt<bool> NoPhiElimLiveOutEarlyExit(
+    "no-phi-elim-live-out-early-exit", cl::init(false), cl::Hidden,
+    cl::desc("Do not use an early exit if isLiveOutPastPHIs returns true."));
+
 namespace {
   class PHIElimination : public MachineFunctionPass {
     MachineRegisterInfo *MRI; // Machine register information
@@ -573,12 +578,14 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
       // there is a risk it may not be coalesced away.
       //
       // If the copy would be a kill, there is no need to split the edge.
-      if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges)
+      bool ShouldSplit = isLiveOutPastPHIs(Reg, PreMBB);
+      if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit)
         continue;
-
-      DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
-                   << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
-                   << ": " << *BBI);
+      if (ShouldSplit) {
+        DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
+                     << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
+                     << ": " << *BBI);
+      }
 
       // If Reg is not live-in to MBB, it means it must be live-in to some
       // other PreMBB successor, and we can avoid the interference by splitting
@@ -588,7 +595,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
       // is likely to be left after coalescing. If we are looking at a loop
       // exiting edge, split it so we won't insert code in the loop, otherwise
       // don't bother.
-      bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges;
+      ShouldSplit = ShouldSplit && !isLiveIn(Reg, &MBB);
 
       // Check for a loop exiting edge.
       if (!ShouldSplit && CurLoop != PreLoop) {
@@ -603,7 +610,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
         // Split unless this edge is entering CurLoop from an outer loop.
         ShouldSplit = PreLoop && !PreLoop->contains(CurLoop);
       }
-      if (!ShouldSplit)
+      if (!ShouldSplit && !SplitAllCriticalEdges)
         continue;
       if (!PreMBB->SplitCriticalEdge(&MBB, this)) {
         DEBUG(dbgs() << "Failed to split critical edge.\n");
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp
index e53e874..6902243 100644
--- a/contrib/llvm/lib/CodeGen/Passes.cpp
+++ b/contrib/llvm/lib/CodeGen/Passes.cpp
@@ -14,18 +14,16 @@
 
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
 
@@ -54,11 +52,11 @@ static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
 static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
     cl::desc("Disable Machine Common Subexpression Elimination"));
 static cl::opt<cl::boolOrDefault>
-OptimizeRegAlloc("optimize-regalloc", cl::Hidden,
+    EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
+                        cl::desc("enable the shrink-wrapping pass"));
+static cl::opt<cl::boolOrDefault> OptimizeRegAlloc(
+    "optimize-regalloc", cl::Hidden,
     cl::desc("Enable optimized register allocation compilation path."));
-static cl::opt<cl::boolOrDefault>
-EnableMachineSched("enable-misched",
-    cl::desc("Enable the machine instruction scheduling pass."));
 static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
     cl::Hidden,
     cl::desc("Disable Machine LICM"));
@@ -82,7 +80,9 @@ static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
     cl::desc("Dump garbage collector data"));
 static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
     cl::desc("Verify generated machine code"),
-    cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=nullptr));
+    cl::init(false),
+    cl::ZeroOrMore);
+
 static cl::opt<std::string>
 PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
                    cl::desc("Print machine instrs"),
@@ -115,28 +115,6 @@ static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
   return PassID;
 }
 
-/// Allow Pass selection to be overriden by command line options. This supports
-/// flags with ternary conditions. TargetID is passed through by default. The
-/// pass is suppressed when the option is false. When the option is true, the
-/// StandardID is selected if the target provides no default.
-static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID,
-                                        cl::boolOrDefault Override,
-                                        AnalysisID StandardID) {
-  switch (Override) {
-  case cl::BOU_UNSET:
-    return TargetID;
-  case cl::BOU_TRUE:
-    if (TargetID.isValid())
-      return TargetID;
-    if (StandardID == nullptr)
-      report_fatal_error("Target cannot enable pass");
-    return StandardID;
-  case cl::BOU_FALSE:
-    return IdentifyingPassPtr();
-  }
-  llvm_unreachable("Invalid command line option state");
-}
-
 /// Allow standard passes to be disabled by the command line, regardless of who
 /// is adding the pass.
 ///
@@ -181,9 +159,6 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
   if (StandardID == &MachineCSEID)
     return applyDisable(TargetID, DisableMachineCSE);
 
-  if (StandardID == &MachineSchedulerID)
-    return applyOverride(TargetID, EnableMachineSched, StandardID);
-
   if (StandardID == &TargetPassConfig::PostRAMachineLICMID)
     return applyDisable(TargetID, DisablePostRAMachineLICM);
 
@@ -234,10 +209,10 @@ TargetPassConfig::~TargetPassConfig() {
 // Out of line constructor provides default values for pass options and
 // registers all common codegen passes.
 TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
-  : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr),
-    Started(true), Stopped(false), AddingMachinePasses(false), TM(tm),
-    Impl(nullptr), Initialized(false), DisableVerify(false),
-    EnableTailMerge(true) {
+    : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr),
+      Started(true), Stopped(false), AddingMachinePasses(false), TM(tm),
+      Impl(nullptr), Initialized(false), DisableVerify(false),
+      EnableTailMerge(true), EnableShrinkWrap(false) {
 
   Impl = new PassConfigImpl();
 
@@ -248,11 +223,6 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
   // Substitute Pseudo Pass IDs for real ones.
   substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
   substitutePass(&PostRAMachineLICMID, &MachineLICMID);
-
-  // Temporarily disable experimental passes.
-  const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
-  if (!ST.useMachineScheduler())
-    disablePass(&MachineSchedulerID);
 }
 
 /// Insert InsertedPassID pass after TargetPassID.
@@ -408,10 +378,8 @@ void TargetPassConfig::addIRPasses() {
 
   // Before running any passes, run the verifier to determine if the input
   // coming from the front-end and/or optimizer is valid.
-  if (!DisableVerify) {
+  if (!DisableVerify)
     addPass(createVerifierPass());
-    addPass(createDebugInfoVerifierPass());
-  }
 
   // Run loop strength reduction before anything else.
   if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
@@ -420,7 +388,10 @@ void TargetPassConfig::addIRPasses() {
       addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
   }
 
+  // Run GC lowering passes for builtin collectors
+  // TODO: add a pass insertion point here
   addPass(createGCLoweringPass());
+  addPass(createShadowStackGCLoweringPass());
 
   // Make sure that no unreachable blocks are instruction selected.
   addPass(createUnreachableBlockEliminationPass());
@@ -448,10 +419,15 @@ void TargetPassConfig::addPassesToHandleExceptions() {
     // FALLTHROUGH
   case ExceptionHandling::DwarfCFI:
   case ExceptionHandling::ARM:
-  case ExceptionHandling::ItaniumWinEH:
     addPass(createDwarfEHPass(TM));
     break;
-  case ExceptionHandling::MSVC: // FIXME: Add preparation.
+  case ExceptionHandling::WinEH:
+    // We support using both GCC-style and MSVC-style exceptions on Windows, so
+    // add both preparation passes. Each pass will only actually run if it
+    // recognizes the personality function.
+    addPass(createWinEHPass(TM));
+    addPass(createDwarfEHPass(TM));
+    break;
   case ExceptionHandling::None:
     addPass(createLowerInvokePass());
 
@@ -474,12 +450,6 @@ void TargetPassConfig::addCodeGenPrepare() {
 void TargetPassConfig::addISelPrepare() {
   addPreISel();
 
-  // Need to verify DebugInfo *before* creating the stack protector analysis.
-  // It's a function pass, and verifying between it and its users causes a
-  // crash.
-  if (!DisableVerify)
-    addPass(createDebugInfoVerifierPass());
-
   addPass(createStackProtectorPass(TM));
 
   if (PrintISelInput)
@@ -557,6 +527,8 @@ void TargetPassConfig::addMachinePasses() {
   addPostRegAlloc();
 
   // Insert prolog/epilog code.  Eliminate abstract frame index references...
+  if (getEnableShrinkWrap())
+    addPass(&ShrinkWrapID);
   addPass(&PrologEpilogCodeInserterID);
 
   /// Add passes that optimize machine instructions after register allocation.
@@ -632,6 +604,21 @@ void TargetPassConfig::addMachineSSAOptimization() {
   addPass(&DeadMachineInstructionElimID);
 }
 
+bool TargetPassConfig::getEnableShrinkWrap() const {
+  switch (EnableShrinkWrapOpt) {
+  case cl::BOU_UNSET:
+    return EnableShrinkWrap && getOptLevel() != CodeGenOpt::None;
+  // If EnableShrinkWrap is set, it takes precedence on whatever the
+  // target sets. The rational is that we assume we want to test
+  // something related to shrink-wrapping.
+  case cl::BOU_TRUE:
+    return true;
+  case cl::BOU_FALSE:
+    return false;
+  }
+  llvm_unreachable("Invalid shrink-wrapping state");
+}
+
 //===---------------------------------------------------------------------===//
 /// Register Allocation Pass Configuration
 //===---------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 283d1f2..ebe05e3 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -76,6 +76,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -411,8 +412,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
 
   if (ExtendLife && !ExtendedUses.empty())
     // Extend the liveness of the extension result.
-    std::copy(ExtendedUses.begin(), ExtendedUses.end(),
-              std::back_inserter(Uses));
+    Uses.append(ExtendedUses.begin(), ExtendedUses.end());
 
   // Now replace all uses.
   bool Changed = false;
@@ -916,7 +916,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
   // => v0 = COPY v1
   // Currently we haven't seen motivating example for that and we
   // want to avoid untested code.
-  NumRewrittenCopies += Changed == true;
+  NumRewrittenCopies += Changed;
   return Changed;
 }
 
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 89e1d11..55f08e4 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -141,7 +141,7 @@ namespace {
         TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
         SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
 
-    ~SchedulePostRATDList();
+    ~SchedulePostRATDList() override;
 
     /// startBlock - Initialize register live-range state for scheduling in
     /// this block.
@@ -282,9 +282,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   } else {
     // Check that post-RA scheduling is enabled for this target.
     // This may upgrade the AntiDepMode.
-    const TargetSubtargetInfo &ST =
-        Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
-    if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(),
+    if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(),
                                AntiDepMode, CriticalPathRCs))
       return false;
   }
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 61407fa..76583f0 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -16,7 +16,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PrologEpilogInserter.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@@ -28,8 +27,10 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/LLVMContext.h"
@@ -48,6 +49,53 @@ using namespace llvm;
 
 #define DEBUG_TYPE "pei"
 
+namespace {
+class PEI : public MachineFunctionPass {
+public:
+  static char ID;
+  PEI() : MachineFunctionPass(ID) {
+    initializePEIPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+  /// frame indexes with appropriate references.
+  ///
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+private:
+  RegScavenger *RS;
+
+  // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+  // stack frame indexes.
+  unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+  // Save and Restore blocks of the current function.
+  MachineBasicBlock *SaveBlock;
+  SmallVector<MachineBasicBlock *, 4> RestoreBlocks;
+
+  // Flag to control whether to use the register scavenger to resolve
+  // frame index materialization registers. Set according to
+  // TRI->requiresFrameIndexScavenging() for the current function.
+  bool FrameIndexVirtualScavenging;
+
+  void calculateSets(MachineFunction &Fn);
+  void calculateCallsInformation(MachineFunction &Fn);
+  void calculateCalleeSavedRegisters(MachineFunction &Fn);
+  void insertCSRSpillsAndRestores(MachineFunction &Fn);
+  void calculateFrameObjectOffsets(MachineFunction &Fn);
+  void replaceFrameIndices(MachineFunction &Fn);
+  void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+                           int &SPAdj);
+  void scavengeFrameVirtualRegs(MachineFunction &Fn);
+  void insertPrologEpilogCode(MachineFunction &Fn);
+
+  // Convenience for recognizing return blocks.
+  bool isReturnBlock(MachineBasicBlock *MBB);
+};
+} // namespace
+
 char PEI::ID = 0;
 char &llvm::PrologEpilogCodeInserterID = PEI::ID;
 
@@ -85,20 +133,26 @@ bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
 
 /// Compute the set of return blocks
 void PEI::calculateSets(MachineFunction &Fn) {
-  // Sets used to compute spill, restore placement sets.
-  const std::vector<CalleeSavedInfo> &CSI =
-    Fn.getFrameInfo()->getCalleeSavedInfo();
+  const MachineFrameInfo *MFI = Fn.getFrameInfo();
 
-  // If no CSRs used, we are done.
-  if (CSI.empty())
+  // Even when we do not change any CSR, we still want to insert the
+  // prologue and epilogue of the function.
+  // So set the save points for those.
+
+  // Use the points found by shrink-wrapping, if any.
+  if (MFI->getSavePoint()) {
+    SaveBlock = MFI->getSavePoint();
+    assert(MFI->getRestorePoint() && "Both restore and save must be set");
+    RestoreBlocks.push_back(MFI->getRestorePoint());
     return;
+  }
 
   // Save refs to entry and return blocks.
-  EntryBlock = Fn.begin();
+  SaveBlock = Fn.begin();
   for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
        MBB != E; ++MBB)
     if (isReturnBlock(MBB))
-      ReturnBlocks.push_back(MBB);
+      RestoreBlocks.push_back(MBB);
 
   return;
 }
@@ -178,7 +232,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   }
 
   delete RS;
-  ReturnBlocks.clear();
+  RestoreBlocks.clear();
   return true;
 }
 
@@ -194,12 +248,12 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
   bool AdjustsStack = MFI->adjustsStack();
 
   // Get the function call frame set-up and tear-down instruction opcode
-  int FrameSetupOpcode   = TII.getCallFrameSetupOpcode();
-  int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+  unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+  unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
 
   // Early exit for targets which have no call frame setup/destroy pseudo
   // instructions.
-  if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+  if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u)
     return;
 
   std::vector<MachineBasicBlock::iterator> FrameSDOps;
@@ -324,6 +378,61 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
   MFI->setCalleeSavedInfo(CSI);
 }
 
+/// Helper function to update the liveness information for the callee-saved
+/// registers.
+static void updateLiveness(MachineFunction &MF) {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Visited will contain all the basic blocks that are in the region
+  // where the callee saved registers are alive:
+  // - Anything that is not Save or Restore -> LiveThrough.
+  // - Save -> LiveIn.
+  // - Restore -> LiveOut.
+  // The live-out is not attached to the block, so no need to keep
+  // Restore in this set.
+  SmallPtrSet<MachineBasicBlock *, 8> Visited;
+  SmallVector<MachineBasicBlock *, 8> WorkList;
+  MachineBasicBlock *Entry = &MF.front();
+  MachineBasicBlock *Save = MFI->getSavePoint();
+
+  if (!Save)
+    Save = Entry;
+
+  if (Entry != Save) {
+    WorkList.push_back(Entry);
+    Visited.insert(Entry);
+  }
+  Visited.insert(Save);
+
+  MachineBasicBlock *Restore = MFI->getRestorePoint();
+  if (Restore)
+    // By construction Restore cannot be visited, otherwise it
+    // means there exists a path to Restore that does not go
+    // through Save.
+    WorkList.push_back(Restore);
+
+  while (!WorkList.empty()) {
+    const MachineBasicBlock *CurBB = WorkList.pop_back_val();
+    // By construction, the region that is after the save point is
+    // dominated by the Save and post-dominated by the Restore.
+    if (CurBB == Save)
+      continue;
+    // Enqueue all the successors not already visited.
+    // Those are by construction either before Save or after Restore.
+    for (MachineBasicBlock *SuccBB : CurBB->successors())
+      if (Visited.insert(SuccBB).second)
+        WorkList.push_back(SuccBB);
+  }
+
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    for (MachineBasicBlock *MBB : Visited)
+      // Add the callee-saved register as live-in.
+      // It's killed at the spill.
+      MBB->addLiveIn(CSI[i].getReg());
+  }
+}
+
 /// insertCSRSpillsAndRestores - Insert spill and restore code for
 /// callee saved registers used in the function.
 ///
@@ -344,26 +453,22 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
   MachineBasicBlock::iterator I;
 
   // Spill using target interface.
-  I = EntryBlock->begin();
-  if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
+  I = SaveBlock->begin();
+  if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-      // Add the callee-saved register as live-in.
-      // It's killed at the spill.
-      EntryBlock->addLiveIn(CSI[i].getReg());
-
       // Insert the spill to the stack frame.
       unsigned Reg = CSI[i].getReg();
       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(),
+      TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
                               RC, TRI);
     }
   }
+  // Update the live-in information of all the blocks up to the save point.
+  updateLiveness(Fn);
 
   // Restore using target interface.
-  for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
-    MachineBasicBlock *MBB = ReturnBlocks[ri];
+  for (MachineBasicBlock *MBB : RestoreBlocks) {
     I = MBB->end();
-    --I;
 
     // Skip over all terminator instructions, which are part of the return
     // sequence.
@@ -495,7 +600,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
       unsigned Align = MFI->getObjectAlignment(i);
       // Adjust to alignment boundary
-      Offset = (Offset+Align-1)/Align*Align;
+      Offset = RoundUpToAlignment(Offset, Align);
 
       MFI->setObjectOffset(i, -Offset);        // Set the computed offset
     }
@@ -504,7 +609,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     for (int i = MaxCSFI; i >= MinCSFI ; --i) {
       unsigned Align = MFI->getObjectAlignment(i);
       // Adjust to alignment boundary
-      Offset = (Offset+Align-1)/Align*Align;
+      Offset = RoundUpToAlignment(Offset, Align);
 
       MFI->setObjectOffset(i, Offset);
       Offset += MFI->getObjectSize(i);
@@ -537,7 +642,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     unsigned Align = MFI->getLocalFrameMaxAlign();
 
     // Adjust to alignment boundary.
-    Offset = (Offset + Align - 1) / Align * Align;
+    Offset = RoundUpToAlignment(Offset, Align);
 
     DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
 
@@ -656,8 +761,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     // If the frame pointer is eliminated, all frame offsets will be relative to
     // SP not FP. Align to MaxAlign so this works.
     StackAlign = std::max(StackAlign, MaxAlign);
-    unsigned AlignMask = StackAlign - 1;
-    Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+    Offset = RoundUpToAlignment(Offset, StackAlign);
   }
 
   // Update frame info to pretend that this is part of the stack...
@@ -674,21 +778,18 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
 
   // Add prologue to the function...
-  TFI.emitPrologue(Fn);
+  TFI.emitPrologue(Fn, *SaveBlock);
 
-  // Add epilogue to restore the callee-save registers in each exiting block
-  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
-    // If last instruction is a return instruction, add an epilogue
-    if (!I->empty() && I->back().isReturn())
-      TFI.emitEpilogue(Fn, *I);
-  }
+  // Add epilogue to restore the callee-save registers in each exiting block.
+  for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+    TFI.emitEpilogue(Fn, *RestoreBlock);
 
   // Emit additional code that is required to support segmented stacks, if
   // we've been asked for it.  This, when linked with a runtime with support
   // for segmented stacks (libgcc is one), will result in allocating stack
   // space in small chunks instead of one large contiguous block.
   if (Fn.shouldSplitStack())
-    TFI.adjustForSegmentedStacks(Fn);
+    TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
 
   // Emit additional code that is required to explicitly handle the stack in
   // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
@@ -696,7 +797,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   // different conditional check and another BIF for allocating more stack
   // space.
   if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
-    TFI.adjustForHiPEPrologue(Fn);
+    TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
 }
 
 /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
@@ -706,6 +807,25 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
   if (!TFI.needsFrameIndexResolution(Fn)) return;
 
+  MachineModuleInfo &MMI = Fn.getMMI();
+  const Function *F = Fn.getFunction();
+  const Function *ParentF = MMI.getWinEHParent(F);
+  unsigned FrameReg;
+  if (F == ParentF) {
+    WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
+    // FIXME: This should be unconditional but we have bugs in the preparation
+    // pass.
+    if (FuncInfo.UnwindHelpFrameIdx != INT_MAX)
+      FuncInfo.UnwindHelpFrameOffset = TFI.getFrameIndexReferenceFromSP(
+          Fn, FuncInfo.UnwindHelpFrameIdx, FrameReg);
+  } else if (MMI.hasWinEHFuncInfo(F)) {
+    WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
+    auto I = FuncInfo.CatchHandlerParentFrameObjIdx.find(F);
+    if (I != FuncInfo.CatchHandlerParentFrameObjIdx.end())
+      FuncInfo.CatchHandlerParentFrameObjOffset[F] =
+          TFI.getFrameIndexReferenceFromSP(Fn, I->second, FrameReg);
+  }
+
   // Store SPAdj at exit of a basic block.
   SmallVector<int, 8> SPState;
   SPState.resize(Fn.getNumBlockIDs());
@@ -744,8 +864,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
   const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
   const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
   const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
-  int FrameSetupOpcode   = TII.getCallFrameSetupOpcode();
-  int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+  unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+  unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
 
   if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
 
@@ -811,17 +931,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
         continue;
       }
 
-      // Frame allocations are target independent. Simply swap the index with
-      // the offset.
-      if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC) {
-        assert(TFI->hasFP(Fn) && "frame alloc requires FP");
-        MachineOperand &FI = MI->getOperand(i);
-        unsigned Reg;
-        int FrameOffset = TFI->getFrameIndexReference(Fn, FI.getIndex(), Reg);
-        FI.ChangeToImmediate(FrameOffset);
-        continue;
-      }
-
       // Some instructions (e.g. inline asm instructions) can have
       // multiple frame indices and/or cause eliminateFrameIndex
       // to insert more than one instruction. We need the register
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
deleted file mode 100644
index f88b8ef..0000000
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -*---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass is responsible for finalizing the functions frame layout, saving
-// callee saved registers, and for emitting prolog & epilog code for the
-// function.
-//
-// This pass must be run after register allocation.  After this pass is
-// executed, it is illegal to construct MO_FrameIndex operands.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H
-#define LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-namespace llvm {
-  class RegScavenger;
-  class MachineBasicBlock;
-
-  class PEI : public MachineFunctionPass {
-  public:
-    static char ID;
-    PEI() : MachineFunctionPass(ID) {
-      initializePEIPass(*PassRegistry::getPassRegistry());
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-    /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
-    /// frame indexes with appropriate references.
-    ///
-    bool runOnMachineFunction(MachineFunction &Fn) override;
-
-  private:
-    RegScavenger *RS;
-
-    // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
-    // stack frame indexes.
-    unsigned MinCSFrameIndex, MaxCSFrameIndex;
-
-    // Entry and return blocks of the current function.
-    MachineBasicBlock* EntryBlock;
-    SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
-
-    // Flag to control whether to use the register scavenger to resolve
-    // frame index materialization registers. Set according to
-    // TRI->requiresFrameIndexScavenging() for the curren function.
-    bool FrameIndexVirtualScavenging;
-
-    void calculateSets(MachineFunction &Fn);
-    void calculateCallsInformation(MachineFunction &Fn);
-    void calculateCalleeSavedRegisters(MachineFunction &Fn);
-    void insertCSRSpillsAndRestores(MachineFunction &Fn);
-    void calculateFrameObjectOffsets(MachineFunction &Fn);
-    void replaceFrameIndices(MachineFunction &Fn);
-    void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
-                             int &SPAdj);
-    void scavengeFrameVirtualRegs(MachineFunction &Fn);
-    void insertPrologEpilogCode(MachineFunction &Fn);
-
-    // Convenience for recognizing return blocks.
-    bool isReturnBlock(MachineBasicBlock* MBB);
-  };
-} // End llvm namespace
-#endif
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
index 6b346f4..16ff48e 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -27,6 +27,7 @@
 #endif
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Timer.h"
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index c621414..fd3d4d7 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -301,13 +301,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
       const MDNode *Expr = DBG->getDebugExpression();
       bool IsIndirect = DBG->isIndirectDebugValue();
       uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0;
-      DebugLoc DL;
-      if (MI == MBB->end()) {
-        // If MI is at basic block end then use last instruction's location.
-        MachineBasicBlock::iterator EI = MI;
-        DL = (--EI)->getDebugLoc();
-      } else
-        DL = MI->getDebugLoc();
+      DebugLoc DL = DBG->getDebugLoc();
+      assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+             "Expected inlined-at fields to agree");
       MachineInstr *NewDV =
           BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE))
               .addFrameIndex(FI)
@@ -877,6 +873,9 @@ void RAFast::AllocateBasicBlock() {
               const MDNode *Expr = MI->getDebugExpression();
               DebugLoc DL = MI->getDebugLoc();
               MachineBasicBlock *MBB = MI->getParent();
+              assert(
+                  cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+                  "Expected inlined-at fields to agree");
               MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL,
                                             TII->get(TargetOpcode::DBG_VALUE))
                                         .addFrameIndex(SS)
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index edc3294..26f42c9 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -538,8 +538,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
     // Giant live ranges fall back to the global assignment heuristic, which
     // prevents excessive spilling in pathological cases.
     bool ReverseLocal = TRI->reverseLocalAssignment();
+    const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
     bool ForceGlobal = !ReverseLocal &&
-      (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs());
+      (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
 
     if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
         LIS->intervalIsInOneMBB(*LI)) {
@@ -552,10 +553,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
         // Allocating bottom up may allow many short LRGs to be assigned first
         // to one of the cheap registers. This could be much faster for very
         // large blocks on targets with many physical registers.
-        Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex());
+        Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
       }
-    }
-    else {
+      Prio |= RC.AllocationPriority << 24;
+    } else {
       // Allocate global and split ranges in long->short order. Long ranges that
       // don't fit should be spilled (or split) ASAP so they don't create
       // interference.  Mark a bit to prioritize global above local ranges.
@@ -1554,7 +1555,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
 
   DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
 
-  const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC);
+  const TargetRegisterClass *SuperRC =
+      TRI->getLargestLegalSuperClass(CurRC, *MF);
   unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
   // Split around every non-copy instruction if this split will relax
   // the constraints on the virtual register.
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index eb7e563..eeff73d 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -126,7 +126,12 @@ private:
   void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
 
   /// \brief Constructs an initial graph.
-  void initializeGraph(PBQPRAGraph &G);
+  void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
+
+  /// \brief Spill the given VReg.
+  void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals,
+                 MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
+                 Spiller &VRegSpiller);
 
   /// \brief Given a solved PBQP problem maps this solution back to a register
   /// assignment.
@@ -172,11 +177,41 @@ public:
 class Interference : public PBQPRAConstraint {
 private:
 
-private:
-
   typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
-  typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IMatrixKey;
-  typedef DenseMap<IMatrixKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
+  typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey;
+  typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
+  typedef DenseSet<IKey> DisjointAllowedRegsCache;
+  typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey;
+  typedef DenseSet<IEdgeKey> IEdgeCache;
+
+  bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+                               PBQPRAGraph::NodeId MId,
+                               const DisjointAllowedRegsCache &D) const {
+    const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
+    const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
+
+    if (NRegs == MRegs)
+      return false;
+
+    if (NRegs < MRegs)
+      return D.count(IKey(NRegs, MRegs)) > 0;
+
+    return D.count(IKey(MRegs, NRegs)) > 0;
+  }
+
+  void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+                              PBQPRAGraph::NodeId MId,
+                              DisjointAllowedRegsCache &D) {
+    const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
+    const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
+
+    assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself");
+
+    if (NRegs < MRegs)
+      D.insert(IKey(NRegs, MRegs));
+    else
+      D.insert(IKey(MRegs, NRegs));
+  }
 
   // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
   // for the fast interference graph construction algorithm. The last is there
@@ -244,6 +279,13 @@ public:
     // and uniquing them.
     IMatrixCache C;
 
+    // Finding an edge is expensive in the worst case (O(max_clique(G))). So
+    // cache locally edges we have already seen.
+    IEdgeCache EC;
+
+    // Cache known disjoint allowed registers pairs
+    DisjointAllowedRegsCache D;
+
     typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet;
     typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
                                 decltype(&lowestStartPoint)> IntervalQueue;
@@ -287,14 +329,21 @@ public:
       for (const auto &A : Active) {
         PBQP::GraphBase::NodeId MId = getNodeId(A);
 
+        // Do not add an edge when the nodes' allowed registers do not
+        // intersect: there is obviously no interference.
+        if (haveDisjointAllowedRegs(G, NId, MId, D))
+          continue;
+
         // Check that we haven't already added this edge
-        // FIXME: findEdge is expensive in the worst case (O(max_clique(G))).
-        //        It might be better to replace this with a local bit-matrix.
-        if (G.findEdge(NId, MId) != PBQPRAGraph::invalidEdgeId())
+        IEdgeKey EK(std::min(NId, MId), std::max(NId, MId));
+        if (EC.count(EK))
           continue;
 
         // This is a new edge - add it to the graph.
-        createInterferenceEdge(G, NId, MId, C);
+        if (!createInterferenceEdge(G, NId, MId, C))
+          setDisjointAllowedRegs(G, NId, MId, D);
+        else
+          EC.insert(EK);
       }
 
       // Finally, add Cur to the Active set.
@@ -304,35 +353,48 @@ public:
 
 private:
 
-  void createInterferenceEdge(PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
-                              PBQPRAGraph::NodeId MId, IMatrixCache &C) {
+  // Create an Interference edge and add it to the graph, unless it is
+  // a null matrix, meaning the nodes' allowed registers do not have any
+  // interference. This case occurs frequently between integer and floating
+  // point registers for example.
+  // return true iff both nodes interferes.
+  bool createInterferenceEdge(PBQPRAGraph &G,
+                              PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId,
+                              IMatrixCache &C) {
 
     const TargetRegisterInfo &TRI =
-      *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
-
+        *G.getMetadata().MF.getSubtarget().getRegisterInfo();
     const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
     const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
 
     // Try looking the edge costs up in the IMatrixCache first.
-    IMatrixKey K(&NRegs, &MRegs);
+    IKey K(&NRegs, &MRegs);
     IMatrixCache::iterator I = C.find(K);
     if (I != C.end()) {
       G.addEdgeBypassingCostAllocator(NId, MId, I->second);
-      return;
+      return true;
     }
 
     PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
+    bool NodesInterfere = false;
     for (unsigned I = 0; I != NRegs.size(); ++I) {
       unsigned PRegN = NRegs[I];
       for (unsigned J = 0; J != MRegs.size(); ++J) {
         unsigned PRegM = MRegs[J];
-        if (TRI.regsOverlap(PRegN, PRegM))
+        if (TRI.regsOverlap(PRegN, PRegM)) {
           M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+          NodesInterfere = true;
+        }
       }
     }
 
+    if (!NodesInterfere)
+      return false;
+
     PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
     C[K] = G.getEdgeCostsPtr(EId);
+
+    return true;
   }
 };
 
@@ -342,7 +404,7 @@ public:
   void apply(PBQPRAGraph &G) override {
     MachineFunction &MF = G.getMetadata().MF;
     MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
-    CoalescerPair CP(*MF.getTarget().getSubtargetImpl()->getRegisterInfo());
+    CoalescerPair CP(*MF.getSubtarget().getRegisterInfo());
 
     // Scan the machine function and add a coalescing cost whenever CoalescerPair
     // gives the Ok.
@@ -398,7 +460,7 @@ public:
             }
             PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
             addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
-            G.setEdgeCosts(EId, std::move(Costs));
+            G.updateEdgeCosts(EId, std::move(Costs));
           }
         }
       }
@@ -488,15 +550,21 @@ static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
   return false;
 }
 
-void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) {
+void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
+                                   Spiller &VRegSpiller) {
   MachineFunction &MF = G.getMetadata().MF;
 
   LiveIntervals &LIS = G.getMetadata().LIS;
   const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
   const TargetRegisterInfo &TRI =
-    *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+      *G.getMetadata().MF.getSubtarget().getRegisterInfo();
+
+  std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
+
+  while (!Worklist.empty()) {
+    unsigned VReg = Worklist.back();
+    Worklist.pop_back();
 
-  for (auto VReg : VRegsToAlloc) {
     const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
     LiveInterval &VRegLI = LIS.getInterval(VReg);
 
@@ -531,6 +599,15 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) {
       VRegAllowed.push_back(PReg);
     }
 
+    // Check for vregs that have no allowed registers. These should be
+    // pre-spilled and the new vregs added to the worklist.
+    if (VRegAllowed.empty()) {
+      SmallVector<unsigned, 8> NewVRegs;
+      spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
+      Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end());
+      continue;
+    }
+
     PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
 
     // Tweak cost of callee saved registers, as using then force spilling and
@@ -547,14 +624,40 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) {
   }
 }
 
+void RegAllocPBQP::spillVReg(unsigned VReg,
+                             SmallVectorImpl<unsigned> &NewIntervals,
+                             MachineFunction &MF, LiveIntervals &LIS,
+                             VirtRegMap &VRM, Spiller &VRegSpiller) {
+
+  VRegsToAlloc.erase(VReg);
+  LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM);
+  VRegSpiller.spill(LRE);
+
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+  (void)TRI;
+  DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: "
+               << LRE.getParent().weight << ", New vregs: ");
+
+  // Copy any newly inserted live intervals into the list of regs to
+  // allocate.
+  for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end();
+       I != E; ++I) {
+    const LiveInterval &LI = LIS.getInterval(*I);
+    assert(!LI.empty() && "Empty spill range.");
+    DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " ");
+    VRegsToAlloc.insert(LI.reg);
+  }
+
+  DEBUG(dbgs() << ")\n");
+}
+
 bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
                                      const PBQP::Solution &Solution,
                                      VirtRegMap &VRM,
                                      Spiller &VRegSpiller) {
   MachineFunction &MF = G.getMetadata().MF;
   LiveIntervals &LIS = G.getMetadata().LIS;
-  const TargetRegisterInfo &TRI =
-    *MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   (void)TRI;
 
   // Set to true if we have any spills
@@ -576,28 +679,11 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
       assert(PReg != 0 && "Invalid preg selected.");
       VRM.assignVirt2Phys(VReg, PReg);
     } else {
-      VRegsToAlloc.erase(VReg);
-      SmallVector<unsigned, 8> NewSpills;
-      LiveRangeEdit LRE(&LIS.getInterval(VReg), NewSpills, MF, LIS, &VRM);
-      VRegSpiller.spill(LRE);
-
-      DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: "
-                   << LRE.getParent().weight << ", New vregs: ");
-
-      // Copy any newly inserted live intervals into the list of regs to
-      // allocate.
-      for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end();
-           I != E; ++I) {
-        LiveInterval &LI = LIS.getInterval(*I);
-        assert(!LI.empty() && "Empty spill range.");
-        DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " ");
-        VRegsToAlloc.insert(LI.reg);
-      }
-
-      DEBUG(dbgs() << ")\n");
-
-      // We need another round if spill intervals were added.
-      AnotherRoundNeeded |= !LRE.empty();
+      // Spill VReg. If this introduces new intervals we'll need another round
+      // of allocation.
+      SmallVector<unsigned, 8> NewVRegs;
+      spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
+      AnotherRoundNeeded |= !NewVRegs.empty();
     }
   }
 
@@ -670,7 +756,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   // If there are non-empty intervals allocate them using pbqp.
   if (!VRegsToAlloc.empty()) {
 
-    const TargetSubtargetInfo &Subtarget = *MF.getTarget().getSubtargetImpl();
+    const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
     std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
       llvm::make_unique<PBQPRAConstraintList>();
     ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>());
@@ -686,7 +772,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
       DEBUG(dbgs() << "  PBQP Regalloc round " << Round << ":\n");
 
       PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
-      initializeGraph(G);
+      initializeGraph(G, VRM, *VRegSpiller);
       ConstraintsRoot->apply(G);
 
 #ifndef NDEBUG
@@ -699,7 +785,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
         raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text);
         DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
               << GraphFileName << "\"\n");
-        G.dumpToStream(OS);
+        G.dump(OS);
       }
 #endif
 
@@ -719,6 +805,79 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   return true;
 }
 
+namespace {
+// A helper class for printing node and register info in a consistent way
+class PrintNodeInfo {
+public:
+  typedef PBQP::RegAlloc::PBQPRAGraph Graph;
+  typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId;
+
+  PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {}
+
+  void print(raw_ostream &OS) const {
+    const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
+    const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+    unsigned VReg = G.getNodeMetadata(NId).getVReg();
+    const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
+    OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')';
+  }
+
+private:
+  const Graph &G;
+  NodeId NId;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) {
+  PR.print(OS);
+  return OS;
+}
+} // anonymous namespace
+
+void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
+  for (auto NId : nodeIds()) {
+    const Vector &Costs = getNodeCosts(NId);
+    assert(Costs.getLength() != 0 && "Empty vector in graph.");
+    OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n';
+  }
+  OS << '\n';
+
+  for (auto EId : edgeIds()) {
+    NodeId N1Id = getEdgeNode1Id(EId);
+    NodeId N2Id = getEdgeNode2Id(EId);
+    assert(N1Id != N2Id && "PBQP graphs should not have self-edges.");
+    const Matrix &M = getEdgeCosts(EId);
+    assert(M.getRows() != 0 && "No rows in matrix.");
+    assert(M.getCols() != 0 && "No cols in matrix.");
+    OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / ";
+    OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n";
+    OS << M << '\n';
+  }
+}
+
+void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); }
+
+void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const {
+  OS << "graph {\n";
+  for (auto NId : nodeIds()) {
+    OS << "  node" << NId << " [ label=\""
+       << PrintNodeInfo(NId, *this) << "\\n"
+       << getNodeCosts(NId) << "\" ]\n";
+  }
+
+  OS << "  edge [ len=" << nodeIds().size() << " ]\n";
+  for (auto EId : edgeIds()) {
+    OS << "  node" << getEdgeNode1Id(EId)
+       << " -- node" << getEdgeNode2Id(EId)
+       << " [ label=\"";
+    const Matrix &EdgeCosts = getEdgeCosts(EId);
+    for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) {
+      OS << EdgeCosts.getRowAsVector(i) << "\\n";
+    }
+    OS << "\" ]\n";
+  }
+  OS << "}\n";
+}
+
 FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) {
   return new RegAllocPBQP(customPassID);
 }
diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
index ab33672..178fa18 100644
--- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -131,7 +131,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
     RCI.NumRegs = StressRA;
 
   // Check if RC is a proper sub-class.
-  if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC))
+  if (const TargetRegisterClass *Super =
+          TRI->getLargestLegalSuperClass(RC, *MF))
     if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
       RCI.ProperSubClass = true;
 
@@ -175,6 +176,6 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
   }
   compute(RC);
   unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC);
-  return TRI->getRegPressureSetLimit(Idx)
-    - TRI->getRegClassWeight(RC).RegWeight * NReserved;
+  return TRI->getRegPressureSetLimit(*MF, Idx) -
+         TRI->getRegClassWeight(RC).RegWeight * NReserved;
 }
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index b8cae4a..ac7d98f 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -58,12 +58,16 @@ EnableJoining("join-liveintervals",
               cl::desc("Coalesce copies (default=true)"),
               cl::init(true));
 
-// Temporary flag to test critical edge unsplitting.
+static cl::opt<bool> UseTerminalRule("terminal-rule",
+                                     cl::desc("Apply the terminal rule"),
+                                     cl::init(false), cl::Hidden);
+
+/// Temporary flag to test critical edge unsplitting.
 static cl::opt<bool>
 EnableJoinSplits("join-splitedges",
   cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
 
-// Temporary flag to test global copy optimization.
+/// Temporary flag to test global copy optimization.
 static cl::opt<cl::boolOrDefault>
 EnableGlobalCopies("join-globalcopies",
   cl::desc("Coalesce copies that span blocks (default=subtarget)"),
@@ -120,7 +124,7 @@ namespace {
     /// Recursively eliminate dead defs in DeadDefs.
     void eliminateDeadDefs();
 
-    /// LiveRangeEdit callback.
+    /// LiveRangeEdit callback for eliminateDeadDefs().
     void LRE_WillEraseInstruction(MachineInstr *MI) override;
 
     /// Coalesce the LocalWorkList.
@@ -133,16 +137,15 @@ namespace {
     /// copies that cannot yet be coalesced into WorkList.
     void copyCoalesceInMBB(MachineBasicBlock *MBB);
 
-    /// Try to coalesce all copies in CurrList. Return
-    /// true if any progress was made.
+    /// Tries to coalesce all copies in CurrList. Returns true if any progress
+    /// was made.
     bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
 
-    /// Attempt to join intervals corresponding to SrcReg/DstReg,
-    /// which are the src/dst of the copy instruction CopyMI.  This returns
-    /// true if the copy was successfully coalesced away. If it is not
-    /// currently possible to coalesce this interval, but it may be possible if
-    /// other things get coalesced, then it returns true by reference in
-    /// 'Again'.
+    /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
+    /// src/dst of the copy instruction CopyMI.  This returns true if the copy
+    /// was successfully coalesced away. If it is not currently possible to
+    /// coalesce this interval, but it may be possible if other things get
+    /// coalesced, then it returns true by reference in 'Again'.
     bool joinCopy(MachineInstr *TheCopy, bool &Again);
 
     /// Attempt to join these two intervals.  On failure, this
@@ -161,18 +164,20 @@ namespace {
     /// LaneMask are split as necessary. @p LaneMask are the lanes that
     /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
     /// lanemasks already adjusted to the coalesced register.
-    void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
+    /// @returns false if live range conflicts couldn't get resolved.
+    bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
                            unsigned LaneMask, CoalescerPair &CP);
 
     /// Join the liveranges of two subregisters. Joins @p RRange into
     /// @p LRange, @p RRange may be invalid afterwards.
-    void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+    /// @returns false if live range conflicts couldn't get resolved.
+    bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
                           unsigned LaneMask, const CoalescerPair &CP);
 
-    /// We found a non-trivially-coalescable copy. If
-    /// the source value number is defined by a copy from the destination reg
-    /// see if we can merge these two destination reg valno# into a single
-    /// value number, eliminating a copy.
+    /// We found a non-trivially-coalescable copy. If the source value number is
+    /// defined by a copy from the destination reg see if we can merge these two
+    /// destination reg valno# into a single value number, eliminating a copy.
+    /// This returns true if an interval was modified.
     bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
 
     /// Return true if there are definitions of IntB
@@ -184,28 +189,69 @@ namespace {
     /// If the source value number is defined by a commutable instruction and
     /// its other operand is coalesced to the copy dest register, see if we
     /// can transform the copy into a noop by commuting the definition.
+    /// This returns true if an interval was modified.
     bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
 
     /// If the source of a copy is defined by a
     /// trivial computation, replace the copy by rematerialize the definition.
-    bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI,
+    bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
                                  bool &IsDefCopy);
 
-    /// Return true if a physreg copy should be joined.
+    /// Return true if a copy involving a physreg should be joined.
     bool canJoinPhys(const CoalescerPair &CP);
 
-    /// Replace all defs and uses of SrcReg to DstReg and
-    /// update the subregister number if it is not zero. If DstReg is a
-    /// physical register and the existing subregister number of the def / use
-    /// being updated is not zero, make sure to set it to the correct physical
-    /// subregister.
+    /// Replace all defs and uses of SrcReg to DstReg and update the subregister
+    /// number if it is not zero. If DstReg is a physical register and the
+    /// existing subregister number of the def / use being updated is not zero,
+    /// make sure to set it to the correct physical subregister.
     void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
 
     /// Handle copies of undef values.
+    /// Returns true if @p CopyMI was a copy of an undef value and eliminated.
     bool eliminateUndefCopy(MachineInstr *CopyMI);
 
+    /// Check whether or not we should apply the terminal rule on the
+    /// destination (Dst) of \p Copy.
+    /// When the terminal rule applies, Copy is not profitable to
+    /// coalesce.
+    /// Dst is terminal if it has exactly one affinity (Dst, Src) and
+    /// at least one interference (Dst, Dst2). If Dst is terminal, the
+    /// terminal rule consists in checking that at least one of
+    /// interfering node, say Dst2, has an affinity of equal or greater
+    /// weight with Src.
+    /// In that case, Dst2 and Dst will not be able to be both coalesced
+    /// with Src. Since Dst2 exposes more coalescing opportunities than
+    /// Dst, we can drop \p Copy.
+    bool applyTerminalRule(const MachineInstr &Copy) const;
+
+    /// Check whether or not \p LI is composed by multiple connected
+    /// components and if that is the case, fix that.
+    void splitNewRanges(LiveInterval *LI) {
+      ConnectedVNInfoEqClasses ConEQ(*LIS);
+      unsigned NumComps = ConEQ.Classify(LI);
+      if (NumComps <= 1)
+        return;
+      SmallVector<LiveInterval*, 8> NewComps(1, LI);
+      for (unsigned i = 1; i != NumComps; ++i) {
+        unsigned VReg = MRI->createVirtualRegister(MRI->getRegClass(LI->reg));
+        NewComps.push_back(&LIS->createEmptyInterval(VReg));
+      }
+
+      ConEQ.Distribute(&NewComps[0], *MRI);
+    }
+
+    /// Wrapper method for \see LiveIntervals::shrinkToUses.
+    /// This method does the proper fixing of the live-ranges when the afore
+    /// mentioned method returns true.
+    void shrinkToUses(LiveInterval *LI,
+                      SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
+      if (LIS->shrinkToUses(LI, Dead))
+        // We may have created multiple connected components, split them.
+        splitNewRanges(LI);
+    }
+
   public:
-    static char ID; // Class identification, replacement for typeinfo
+    static char ID; ///< Class identification, replacement for typeinfo
     RegisterCoalescer() : MachineFunctionPass(ID) {
       initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
     }
@@ -220,7 +266,7 @@ namespace {
     /// Implement the dump method.
     void print(raw_ostream &O, const Module* = nullptr) const override;
   };
-} /// end anonymous namespace
+} // end anonymous namespace
 
 char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
 
@@ -254,11 +300,11 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
   return true;
 }
 
-// Return true if this block should be vacated by the coalescer to eliminate
-// branches. The important cases to handle in the coalescer are critical edges
-// split during phi elimination which contain only copies. Simple blocks that
-// contain non-branches should also be vacated, but this can be handled by an
-// earlier pass similar to early if-conversion.
+/// Return true if this block should be vacated by the coalescer to eliminate
+/// branches. The important cases to handle in the coalescer are critical edges
+/// split during phi elimination which contain only copies. Simple blocks that
+/// contain non-branches should also be vacated, but this can be handled by an
+/// earlier pass similar to early if-conversion.
 static bool isSplitEdge(const MachineBasicBlock *MBB) {
   if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
     return false;
@@ -423,27 +469,11 @@ void RegisterCoalescer::eliminateDeadDefs() {
                 nullptr, this).eliminateDeadDefs(DeadDefs);
 }
 
-// Callback from eliminateDeadDefs().
 void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
   // MI may be in WorkList. Make sure we don't visit it.
   ErasedInstrs.insert(MI);
 }
 
-/// We found a non-trivially-coalescable copy with IntA
-/// being the source and IntB being the dest, thus this defines a value number
-/// in IntB.  If the source value number (in IntA) is defined by a copy from B,
-/// see if we can merge these two pieces of B into a single value number,
-/// eliminating a copy.  For example:
-///
-///  A3 = B0
-///    ...
-///  B1 = A3      <- this copy
-///
-/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
-/// value number to be replaced with B0 (which simplifies the B liveinterval).
-///
-/// This returns true if an interval was modified.
-///
 bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
                                              MachineInstr *CopyMI) {
   assert(!CP.isPartial() && "This doesn't work for partial copies.");
@@ -455,6 +485,20 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
     LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
   SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
 
+  // We have a non-trivially-coalescable copy with IntA being the source and
+  // IntB being the dest, thus this defines a value number in IntB.  If the
+  // source value number (in IntA) is defined by a copy from B, see if we can
+  // merge these two pieces of B into a single value number, eliminating a copy.
+  // For example:
+  //
+  //  A3 = B0
+  //    ...
+  //  B1 = A3      <- this copy
+  //
+  // In this case, B0 can be extended to where the B1 copy lives, allowing the
+  // B1 value number to be replaced with B0 (which simplifies the B
+  // liveinterval).
+
   // BValNo is a value number in B that is defined by a copy from A.  'B1' in
   // the example above.
   LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
@@ -538,14 +582,12 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
   // will also add the isKill marker.
   CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
   if (AS->end == CopyIdx)
-    LIS->shrinkToUses(&IntA);
+    shrinkToUses(&IntA);
 
   ++numExtends;
   return true;
 }
 
-/// Return true if there are definitions of IntB
-/// other than BValNo val# that can reach uses of AValno val# of IntA.
 bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
                                              LiveInterval &IntB,
                                              VNInfo *AValNo,
@@ -585,29 +627,6 @@ static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
   }
 }
 
-/// We found a non-trivially-coalescable copy with
-/// IntA being the source and IntB being the dest, thus this defines a value
-/// number in IntB.  If the source value number (in IntA) is defined by a
-/// commutable instruction and its other operand is coalesced to the copy dest
-/// register, see if we can transform the copy into a noop by commuting the
-/// definition. For example,
-///
-///  A3 = op A2 B0<kill>
-///    ...
-///  B1 = A3      <- this copy
-///    ...
-///     = op A3   <- more uses
-///
-/// ==>
-///
-///  B2 = op B0 A2<kill>
-///    ...
-///  B1 = B2      <- now an identity copy
-///    ...
-///     = op B2   <- more uses
-///
-/// This returns true if an interval was modified.
-///
 bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
                                                  MachineInstr *CopyMI) {
   assert(!CP.isPhys());
@@ -617,6 +636,26 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   LiveInterval &IntB =
       LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
 
+  // We found a non-trivially-coalescable copy with IntA being the source and
+  // IntB being the dest, thus this defines a value number in IntB.  If the
+  // source value number (in IntA) is defined by a commutable instruction and
+  // its other operand is coalesced to the copy dest register, see if we can
+  // transform the copy into a noop by commuting the definition. For example,
+  //
+  //  A3 = op A2 B0<kill>
+  //    ...
+  //  B1 = A3      <- this copy
+  //    ...
+  //     = op A3   <- more uses
+  //
+  // ==>
+  //
+  //  B2 = op B0 A2<kill>
+  //    ...
+  //  B1 = B2      <- now an identity copy
+  //    ...
+  //     = op B2   <- more uses
+
   // BValNo is a value number in B that is defined by a copy from A. 'B1' in
   // the example above.
   SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
@@ -745,15 +784,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
       continue;
     DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
     assert(DVNI->def == DefIdx);
-    BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
+    BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
     for (LiveInterval::SubRange &S : IntB.subranges()) {
       VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
       if (!SubDVNI)
         continue;
       VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
       assert(SubBValNo->def == CopyIdx);
-      VNInfo *Merged = S.MergeValueNumberInto(SubBValNo, SubDVNI);
-      Merged->def = CopyIdx;
+      S.MergeValueNumberInto(SubDVNI, SubBValNo);
     }
 
     ErasedInstrs.insert(UseMI);
@@ -809,7 +847,6 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
         VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator);
         addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo);
       }
-      SA.removeValNo(ASubValNo);
     }
   }
 
@@ -817,25 +854,30 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
   DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
 
-  IntA.removeValNo(AValNo);
-  // Remove valuenos in subranges (the A+B have subranges case has already been
-  // handled above)
-  if (!IntB.hasSubRanges()) {
-    SlotIndex AIdx = CopyIdx.getRegSlot(true);
-    for (LiveInterval::SubRange &SA : IntA.subranges()) {
-      VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
-      assert(ASubValNo != nullptr);
-      SA.removeValNo(ASubValNo);
-    }
-  }
+  LIS->removeVRegDefAt(IntA, AValNo->def);
+
   DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
   ++numCommutes;
   return true;
 }
 
-/// If the source of a copy is defined by a trivial
-/// computation, replace the copy by rematerialize the definition.
-bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
+/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
+/// defining a subregister.
+static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
+  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) &&
+         "This code cannot handle physreg aliasing");
+  for (const MachineOperand &Op : MI.operands()) {
+    if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+      continue;
+    // Return true if we define the full register or don't care about the value
+    // inside other subregisters.
+    if (Op.getSubReg() == 0 || Op.isUndef())
+      return true;
+  }
+  return false;
+}
+
+bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
                                                 MachineInstr *CopyMI,
                                                 bool &IsDefCopy) {
   IsDefCopy = false;
@@ -863,8 +905,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
     return false;
   if (!TII->isTriviallyReMaterializable(DefMI, AA))
     return false;
+  if (!definesFullReg(*DefMI, SrcReg))
+    return false;
   bool SawStore = false;
-  if (!DefMI->isSafeToMove(TII, AA, SawStore))
+  if (!DefMI->isSafeToMove(AA, SawStore))
     return false;
   const MCInstrDesc &MCID = DefMI->getDesc();
   if (MCID.getNumDefs() != 1)
@@ -911,6 +955,28 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
   TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI);
   MachineInstr *NewMI = std::prev(MII);
 
+  // In a situation like the following:
+  //     %vreg0:subreg = instr              ; DefMI, subreg = DstIdx
+  //     %vreg1        = copy %vreg0:subreg ; CopyMI, SrcIdx = 0
+  // instead of widening %vreg1 to the register class of %vreg0 simply do:
+  //     %vreg1 = instr
+  const TargetRegisterClass *NewRC = CP.getNewRC();
+  if (DstIdx != 0) {
+    MachineOperand &DefMO = NewMI->getOperand(0);
+    if (DefMO.getSubReg() == DstIdx) {
+      assert(SrcIdx == 0 && CP.isFlipped()
+             && "Shouldn't have SrcIdx+DstIdx at this point");
+      const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+      const TargetRegisterClass *CommonRC =
+        TRI->getCommonSubClass(DefRC, DstRC);
+      if (CommonRC != nullptr) {
+        NewRC = CommonRC;
+        DstIdx = 0;
+        DefMO.setSubReg(0);
+      }
+    }
+  }
+
   LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI);
   CopyMI->eraseFromParent();
   ErasedInstrs.insert(CopyMI);
@@ -922,23 +988,23 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
   for (unsigned i = NewMI->getDesc().getNumOperands(),
          e = NewMI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = NewMI->getOperand(i);
-    if (MO.isReg()) {
-      assert(MO.isDef() && MO.isImplicit() && MO.isDead() &&
+    if (MO.isReg() && MO.isDef()) {
+      assert(MO.isImplicit() && MO.isDead() &&
              TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
       NewMIImplDefs.push_back(MO.getReg());
     }
   }
 
   if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
-    const TargetRegisterClass *NewRC = CP.getNewRC();
     unsigned NewIdx = NewMI->getOperand(0).getSubReg();
 
-    if (NewIdx)
-      NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
-    else
-      NewRC = TRI->getCommonSubClass(NewRC, DefRC);
-
-    assert(NewRC && "subreg chosen for remat incompatible with instruction");
+    if (DefRC != nullptr) {
+      if (NewIdx)
+        NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
+      else
+        NewRC = TRI->getCommonSubClass(NewRC, DefRC);
+      assert(NewRC && "subreg chosen for remat incompatible with instruction");
+    }
     MRI->setRegClass(DstReg, NewRC);
 
     updateRegDefsUses(DstReg, DstReg, DstIdx);
@@ -1005,7 +1071,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
   ++NumReMats;
 
   // The source interval can become smaller because we removed a use.
-  LIS->shrinkToUses(&SrcInt, &DeadDefs);
+  shrinkToUses(&SrcInt, &DeadDefs);
   if (!DeadDefs.empty()) {
     // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
     // to describe DstReg instead.
@@ -1022,21 +1088,15 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
   return true;
 }
 
-static void removeUndefValue(LiveRange &LR, SlotIndex At)
-{
-  VNInfo *VNInfo = LR.getVNInfoAt(At);
-  assert(VNInfo != nullptr && SlotIndex::isSameInstr(VNInfo->def, At));
-  LR.removeValNo(VNInfo);
-}
-
-/// ProcessImpicitDefs may leave some copies of <undef>
-/// values, it only removes local variables. When we have a copy like:
-///
-///   %vreg1 = COPY %vreg2<undef>
-///
-/// We delete the copy and remove the corresponding value number from %vreg1.
-/// Any uses of that value number are marked as <undef>.
 bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
+  // ProcessImpicitDefs may leave some copies of <undef> values, it only removes
+  // local variables. When we have a copy like:
+  //
+  //   %vreg1 = COPY %vreg2<undef>
+  //
+  // We delete the copy and remove the corresponding value number from %vreg1.
+  // Any uses of that value number are marked as <undef>.
+
   // Note that we do not query CoalescerPair here but redo isMoveInstr as the
   // CoalescerPair may have a new register class with adjusted subreg indices
   // at this point.
@@ -1061,22 +1121,25 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
 
   // Remove any DstReg segments starting at the instruction.
   LiveInterval &DstLI = LIS->getInterval(DstReg);
-  unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
   SlotIndex RegIndex = Idx.getRegSlot();
-  for (LiveInterval::SubRange &SR : DstLI.subranges()) {
-    if ((SR.LaneMask & DstMask) == 0)
-      continue;
-    removeUndefValue(SR, RegIndex);
-
-    DstLI.removeEmptySubRanges();
-  }
   // Remove value or merge with previous one in case of a subregister def.
   if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
-    VNInfo *VNInfo = DstLI.getVNInfoAt(RegIndex);
-    DstLI.MergeValueNumberInto(VNInfo, PrevVNI);
-  } else {
-    removeUndefValue(DstLI, RegIndex);
-  }
+    VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
+    DstLI.MergeValueNumberInto(VNI, PrevVNI);
+
+    // The affected subregister segments can be removed.
+    unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
+    for (LiveInterval::SubRange &SR : DstLI.subranges()) {
+      if ((SR.LaneMask & DstMask) == 0)
+        continue;
+
+      VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
+      assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
+      SR.removeValNo(SVNI);
+    }
+    DstLI.removeEmptySubRanges();
+  } else
+    LIS->removeVRegDefAt(DstLI, RegIndex);
 
   // Mark uses as undef.
   for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
@@ -1106,10 +1169,6 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
   return true;
 }
 
-/// Replace all defs and uses of SrcReg to DstReg and update the subregister
-/// number if it is not zero. If DstReg is a physical register and the existing
-/// subregister number of the def / use being updated is not zero, make sure to
-/// set it to the correct physical subregister.
 void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
                                           unsigned DstReg,
                                           unsigned SubIdx) {
@@ -1151,7 +1210,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
 
       // A subreg use of a partially undef (super) register may be a complete
       // undef use now and then has to be marked that way.
-      if (SubIdx != 0 && MO.isUse() && MRI->tracksSubRegLiveness()) {
+      if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
         if (!DstInt->hasSubRanges()) {
           BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
           unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
@@ -1198,29 +1257,23 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
   }
 }
 
-/// Return true if a copy involving a physreg should be joined.
 bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
-  /// Always join simple intervals that are defined by a single copy from a
-  /// reserved register. This doesn't increase register pressure, so it is
-  /// always beneficial.
+  // Always join simple intervals that are defined by a single copy from a
+  // reserved register. This doesn't increase register pressure, so it is
+  // always beneficial.
   if (!MRI->isReserved(CP.getDstReg())) {
     DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
     return false;
   }
 
   LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
-  if (CP.isFlipped() && JoinVInt.containsOneValue())
+  if (JoinVInt.containsOneValue())
     return true;
 
-  DEBUG(dbgs() << "\tCannot join defs into reserved register.\n");
+  DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n");
   return false;
 }
 
-/// Attempt to join intervals corresponding to SrcReg/DstReg,
-/// which are the src/dst of the copy instruction CopyMI.  This returns true
-/// if the copy was successfully coalesced away. If it is not currently
-/// possible to coalesce this interval, but it may be possible if other
-/// things get coalesced, then it returns true by reference in 'Again'.
 bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
 
   Again = false;
@@ -1399,7 +1452,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
   }
   if (ShrinkMainRange) {
     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
-    LIS->shrinkToUses(&LI);
+    shrinkToUses(&LI);
   }
 
   // SrcReg is guaranteed to be the register whose live interval that is
@@ -1407,7 +1460,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
   LIS->removeInterval(CP.getSrcReg());
 
   // Update regalloc hint.
-  TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
+  TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
 
   DEBUG({
     dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
@@ -1424,24 +1477,23 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
   return true;
 }
 
-/// Attempt joining with a reserved physreg.
 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
+  unsigned DstReg = CP.getDstReg();
   assert(CP.isPhys() && "Must be a physreg copy");
-  assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register");
+  assert(MRI->isReserved(DstReg) && "Not a reserved register");
   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
   DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
 
-  assert(CP.isFlipped() && RHS.containsOneValue() &&
-         "Invalid join with reserved register");
+  assert(RHS.containsOneValue() && "Invalid join with reserved register");
 
   // Optimization for reserved registers like ESP. We can only merge with a
-  // reserved physreg if RHS has a single value that is a copy of CP.DstReg().
+  // reserved physreg if RHS has a single value that is a copy of DstReg.
   // The live range of the reserved register will look like a set of dead defs
   // - we don't properly track the live range of reserved registers.
 
   // Deny any overlapping intervals.  This depends on all the reserved
   // register live ranges to look like dead defs.
-  for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI)
+  for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI)
     if (RHS.overlaps(LIS->getRegUnit(*UI))) {
       DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
       return false;
@@ -1453,7 +1505,46 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   // defs are there.
 
   // Delete the identity copy.
-  MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg);
+  MachineInstr *CopyMI;
+  if (CP.isFlipped()) {
+    CopyMI = MRI->getVRegDef(RHS.reg);
+  } else {
+    if (!MRI->hasOneNonDBGUse(RHS.reg)) {
+      DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
+      return false;
+    }
+
+    MachineInstr *DestMI = MRI->getVRegDef(RHS.reg);
+    CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg);
+    const SlotIndex CopyRegIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
+    const SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
+
+    // We checked above that there are no interfering defs of the physical
+    // register. However, for this case, where we intent to move up the def of
+    // the physical register, we also need to check for interfering uses.
+    SlotIndexes *Indexes = LIS->getSlotIndexes();
+    for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
+         SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
+      MachineInstr *MI = LIS->getInstructionFromIndex(SI);
+      if (MI->readsRegister(DstReg, TRI)) {
+        DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
+        return false;
+      }
+    }
+
+    // We're going to remove the copy which defines a physical reserved
+    // register, so remove its valno, etc.
+    DEBUG(dbgs() << "\t\tRemoving phys reg def of " << DstReg << " at "
+          << CopyRegIdx << "\n");
+
+    LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
+    // Create a new dead def at the new def location.
+    for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
+      LiveRange &LR = LIS->getRegUnit(*UI);
+      LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
+    }
+  }
+
   LIS->RemoveMachineInstrFromMaps(CopyMI);
   CopyMI->eraseFromParent();
 
@@ -1538,11 +1629,12 @@ class JoinVals {
   /// (Main) register we work on.
   const unsigned Reg;
 
-  // Reg (and therefore the values in this liverange) will end up as subregister
-  // SubIdx in the coalesced register. Either CP.DstIdx or CP.SrcIdx.
+  /// Reg (and therefore the values in this liverange) will end up as
+  /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
+  /// CP.SrcIdx.
   const unsigned SubIdx;
-  // The LaneMask that this liverange will occupy the coalesced register. May be
-  // smaller than the lanemask produced by SubIdx when merging subranges.
+  /// The LaneMask that this liverange will occupy the coalesced register. May
+  /// be smaller than the lanemask produced by SubIdx when merging subranges.
   const unsigned LaneMask;
 
   /// This is true when joining sub register ranges, false when joining main
@@ -1551,7 +1643,7 @@ class JoinVals {
   /// Whether the current LiveInterval tracks subregister liveness.
   const bool TrackSubRegLiveness;
 
-  // Values that will be present in the final live range.
+  /// Values that will be present in the final live range.
   SmallVectorImpl<VNInfo*> &NewVNInfo;
 
   const CoalescerPair &CP;
@@ -1559,75 +1651,75 @@ class JoinVals {
   SlotIndexes *Indexes;
   const TargetRegisterInfo *TRI;
 
-  // Value number assignments. Maps value numbers in LI to entries in NewVNInfo.
-  // This is suitable for passing to LiveInterval::join().
+  /// Value number assignments. Maps value numbers in LI to entries in
+  /// NewVNInfo. This is suitable for passing to LiveInterval::join().
   SmallVector<int, 8> Assignments;
 
-  // Conflict resolution for overlapping values.
+  /// Conflict resolution for overlapping values.
   enum ConflictResolution {
-    // No overlap, simply keep this value.
+    /// No overlap, simply keep this value.
     CR_Keep,
 
-    // Merge this value into OtherVNI and erase the defining instruction.
-    // Used for IMPLICIT_DEF, coalescable copies, and copies from external
-    // values.
+    /// Merge this value into OtherVNI and erase the defining instruction.
+    /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
+    /// values.
     CR_Erase,
 
-    // Merge this value into OtherVNI but keep the defining instruction.
-    // This is for the special case where OtherVNI is defined by the same
-    // instruction.
+    /// Merge this value into OtherVNI but keep the defining instruction.
+    /// This is for the special case where OtherVNI is defined by the same
+    /// instruction.
     CR_Merge,
 
-    // Keep this value, and have it replace OtherVNI where possible. This
-    // complicates value mapping since OtherVNI maps to two different values
-    // before and after this def.
-    // Used when clobbering undefined or dead lanes.
+    /// Keep this value, and have it replace OtherVNI where possible. This
+    /// complicates value mapping since OtherVNI maps to two different values
+    /// before and after this def.
+    /// Used when clobbering undefined or dead lanes.
     CR_Replace,
 
-    // Unresolved conflict. Visit later when all values have been mapped.
+    /// Unresolved conflict. Visit later when all values have been mapped.
     CR_Unresolved,
 
-    // Unresolvable conflict. Abort the join.
+    /// Unresolvable conflict. Abort the join.
     CR_Impossible
   };
 
-  // Per-value info for LI. The lane bit masks are all relative to the final
-  // joined register, so they can be compared directly between SrcReg and
-  // DstReg.
+  /// Per-value info for LI. The lane bit masks are all relative to the final
+  /// joined register, so they can be compared directly between SrcReg and
+  /// DstReg.
   struct Val {
     ConflictResolution Resolution;
 
-    // Lanes written by this def, 0 for unanalyzed values.
+    /// Lanes written by this def, 0 for unanalyzed values.
     unsigned WriteLanes;
 
-    // Lanes with defined values in this register. Other lanes are undef and
-    // safe to clobber.
+    /// Lanes with defined values in this register. Other lanes are undef and
+    /// safe to clobber.
     unsigned ValidLanes;
 
-    // Value in LI being redefined by this def.
+    /// Value in LI being redefined by this def.
     VNInfo *RedefVNI;
 
-    // Value in the other live range that overlaps this def, if any.
+    /// Value in the other live range that overlaps this def, if any.
     VNInfo *OtherVNI;
 
-    // Is this value an IMPLICIT_DEF that can be erased?
-    //
-    // IMPLICIT_DEF values should only exist at the end of a basic block that
-    // is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
-    // safely erased if they are overlapping a live value in the other live
-    // interval.
-    //
-    // Weird control flow graphs and incomplete PHI handling in
-    // ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
-    // longer live ranges. Such IMPLICIT_DEF values should be treated like
-    // normal values.
+    /// Is this value an IMPLICIT_DEF that can be erased?
+    ///
+    /// IMPLICIT_DEF values should only exist at the end of a basic block that
+    /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
+    /// safely erased if they are overlapping a live value in the other live
+    /// interval.
+    ///
+    /// Weird control flow graphs and incomplete PHI handling in
+    /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
+    /// longer live ranges. Such IMPLICIT_DEF values should be treated like
+    /// normal values.
     bool ErasableImplicitDef;
 
-    // True when the live range of this value will be pruned because of an
-    // overlapping CR_Replace value in the other live range.
+    /// True when the live range of this value will be pruned because of an
+    /// overlapping CR_Replace value in the other live range.
     bool Pruned;
 
-    // True once Pruned above has been computed.
+    /// True once Pruned above has been computed.
     bool PrunedComputed;
 
     Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
@@ -1637,17 +1729,61 @@ class JoinVals {
     bool isAnalyzed() const { return WriteLanes != 0; }
   };
 
-  // One entry per value number in LI.
+  /// One entry per value number in LI.
   SmallVector<Val, 8> Vals;
 
+  /// Compute the bitmask of lanes actually written by DefMI.
+  /// Set Redef if there are any partial register definitions that depend on the
+  /// previous value of the register.
   unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
+
+  /// Find the ultimate value that VNI was copied from.
   std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
+
   bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const;
+
+  /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
+  /// Return a conflict resolution when possible, but leave the hard cases as
+  /// CR_Unresolved.
+  /// Recursively calls computeAssignment() on this and Other, guaranteeing that
+  /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
+  /// The recursion always goes upwards in the dominator tree, making loops
+  /// impossible.
   ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
+
+  /// Compute the value assignment for ValNo in RI.
+  /// This may be called recursively by analyzeValue(), but never for a ValNo on
+  /// the stack.
   void computeAssignment(unsigned ValNo, JoinVals &Other);
+
+  /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
+  /// the extent of the tainted lanes in the block.
+  ///
+  /// Multiple values in Other.LR can be affected since partial redefinitions
+  /// can preserve previously tainted lanes.
+  ///
+  ///   1 %dst = VLOAD           <-- Define all lanes in %dst
+  ///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
+  ///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
+  ///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
+  ///
+  /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
+  /// entry to TaintedVals.
+  ///
+  /// Returns false if the tainted lanes extend beyond the basic block.
   bool taintExtent(unsigned, unsigned, JoinVals&,
                    SmallVectorImpl<std::pair<SlotIndex, unsigned> >&);
+
+  /// Return true if MI uses any of the given Lanes from Reg.
+  /// This does not include partial redefinitions of Reg.
   bool usesLanes(const MachineInstr *MI, unsigned, unsigned, unsigned) const;
+
+  /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
+  /// be pruned:
+  ///
+  ///   %dst = COPY %src
+  ///   %src = COPY %dst  <-- This value to be pruned.
+  ///   %dst = COPY %src  <-- This value is a copy of a pruned value.
   bool isPrunedValue(unsigned ValNo, JoinVals &Other);
 
 public:
@@ -1675,9 +1811,9 @@ public:
   void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
                    bool changeInstrs);
 
-  // Removes subranges starting at copies that get removed. This sometimes
-  // happens when undefined subranges are copied around. These ranges contain
-  // no usefull information and can be removed.
+  /// Removes subranges starting at copies that get removed. This sometimes
+  /// happens when undefined subranges are copied around. These ranges contain
+  /// no usefull information and can be removed.
   void pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask);
 
   /// Erase any machine instructions that have been coalesced away.
@@ -1687,14 +1823,14 @@ public:
   void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
                    SmallVectorImpl<unsigned> &ShrinkRegs);
 
+  /// Remove liverange defs at places where implicit defs will be removed.
+  void removeImplicitDefs();
+
   /// Get the value assignments suitable for passing to LiveInterval::join.
   const int *getAssignments() const { return Assignments.data(); }
 };
 } // end anonymous namespace
 
-/// Compute the bitmask of lanes actually written by DefMI.
-/// Set Redef if there are any partial register definitions that depend on the
-/// previous value of the register.
 unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
   const {
   unsigned L = 0;
@@ -1709,7 +1845,6 @@ unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
   return L;
 }
 
-/// Find the ultimate value that VNI was copied from.
 std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
     const VNInfo *VNI) const {
   unsigned Reg = this->Reg;
@@ -1770,13 +1905,6 @@ bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
   return Orig0->def == Orig1->def && Reg0 == Reg1;
 }
 
-/// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
-/// Return a conflict resolution when possible, but leave the hard cases as
-/// CR_Unresolved.
-/// Recursively calls computeAssignment() on this and Other, guaranteeing that
-/// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
-/// The recursion always goes upwards in the dominator tree, making loops
-/// impossible.
 JoinVals::ConflictResolution
 JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   Val &V = Vals[ValNo];
@@ -1798,7 +1926,11 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
     assert(DefMI != nullptr);
     if (SubRangeJoin) {
       // We don't care about the lanes when joining subregister ranges.
-      V.ValidLanes = V.WriteLanes = 1;
+      V.WriteLanes = V.ValidLanes = 1;
+      if (DefMI->isImplicitDef()) {
+        V.ValidLanes = 0;
+        V.ErasableImplicitDef = true;
+      }
     } else {
       bool Redef = false;
       V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
@@ -1995,9 +2127,6 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   return CR_Unresolved;
 }
 
-/// Compute the value assignment for ValNo in RI.
-/// This may be called recursively by analyzeValue(), but never for a ValNo on
-/// the stack.
 void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
   Val &V = Vals[ValNo];
   if (V.isAnalyzed()) {
@@ -2051,21 +2180,6 @@ bool JoinVals::mapValues(JoinVals &Other) {
   return true;
 }
 
-/// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
-/// the extent of the tainted lanes in the block.
-///
-/// Multiple values in Other.LR can be affected since partial redefinitions can
-/// preserve previously tainted lanes.
-///
-///   1 %dst = VLOAD           <-- Define all lanes in %dst
-///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
-///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
-///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
-///
-/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
-/// entry to TaintedVals.
-///
-/// Returns false if the tainted lanes extend beyond the basic block.
 bool JoinVals::
 taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
             SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) {
@@ -2106,8 +2220,6 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
   return true;
 }
 
-/// Return true if MI uses any of the given Lanes from Reg.
-/// This does not include partial redefinitions of Reg.
 bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx,
                          unsigned Lanes) const {
   if (MI->isDebugValue())
@@ -2189,13 +2301,6 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
   return true;
 }
 
-// Determine if ValNo is a copy of a value number in LR or Other.LR that will
-// be pruned:
-//
-//   %dst = COPY %src
-//   %src = COPY %dst  <-- This value to be pruned.
-//   %dst = COPY %src  <-- This value is a copy of a pruned value.
-//
 bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
   Val &V = Vals[ValNo];
   if (V.Pruned || V.PrunedComputed)
@@ -2308,25 +2413,39 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
     LI.removeEmptySubRanges();
 }
 
+void JoinVals::removeImplicitDefs() {
+  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+    Val &V = Vals[i];
+    if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
+      continue;
+
+    VNInfo *VNI = LR.getValNumInfo(i);
+    VNI->markUnused();
+    LR.removeValNo(VNI);
+  }
+}
+
 void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
                            SmallVectorImpl<unsigned> &ShrinkRegs) {
   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
     // Get the def location before markUnused() below invalidates it.
     SlotIndex Def = LR.getValNumInfo(i)->def;
     switch (Vals[i].Resolution) {
-    case CR_Keep:
+    case CR_Keep: {
       // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
       // longer. The IMPLICIT_DEF instructions are only inserted by
       // PHIElimination to guarantee that all PHI predecessors have a value.
       if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
         break;
-      // Remove value number i from LR. Note that this VNInfo is still present
-      // in NewVNInfo, so it will appear as an unused value number in the final
-      // joined interval.
-      LR.getValNumInfo(i)->markUnused();
-      LR.removeValNo(LR.getValNumInfo(i));
+      // Remove value number i from LR.
+      VNInfo *VNI = LR.getValNumInfo(i);
+      LR.removeValNo(VNI);
+      // Note that this VNInfo is reused and still referenced in NewVNInfo,
+      // make it appear like an unused value number.
+      VNI->markUnused();
       DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n');
       // FALL THROUGH.
+    }
 
     case CR_Erase: {
       MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
@@ -2349,7 +2468,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
   }
 }
 
-void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
                                          unsigned LaneMask,
                                          const CoalescerPair &CP) {
   SmallVector<VNInfo*, 16> NewVNInfo;
@@ -2358,13 +2477,20 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
   JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
                    NewVNInfo, CP, LIS, TRI, true, true);
 
-  /// Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
-  /// Conflicts should already be resolved so the mapping/resolution should
-  /// always succeed.
-  if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
-    llvm_unreachable("Can't join subrange although main ranges are compatible");
-  if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
-    llvm_unreachable("Can't join subrange although main ranges are compatible");
+  // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
+  // We should be able to resolve all conflicts here as we could successfully do
+  // it on the mainrange already. There is however a problem when multiple
+  // ranges get mapped to the "overflow" lane mask bit which creates unexpected
+  // interferences.
+  if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
+    DEBUG(dbgs() << "*** Couldn't join subrange!\n");
+    return false;
+  }
+  if (!LHSVals.resolveConflicts(RHSVals) ||
+      !RHSVals.resolveConflicts(LHSVals)) {
+    DEBUG(dbgs() << "*** Couldn't join subrange!\n");
+    return false;
+  }
 
   // The merging algorithm in LiveInterval::join() can't handle conflicting
   // value mappings, so we need to remove any live ranges that overlap a
@@ -2374,6 +2500,9 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
   LHSVals.pruneValues(RHSVals, EndPoints, false);
   RHSVals.pruneValues(LHSVals, EndPoints, false);
 
+  LHSVals.removeImplicitDefs();
+  RHSVals.removeImplicitDefs();
+
   LRange.verify();
   RRange.verify();
 
@@ -2383,16 +2512,17 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
 
   DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
   if (EndPoints.empty())
-    return;
+    return true;
 
   // Recompute the parts of the live range we had to remove because of
   // CR_Replace conflicts.
   DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
                << " points: " << LRange << '\n');
   LIS->extendToIndices(LRange, EndPoints);
+  return true;
 }
 
-void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
+bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
                                           const LiveRange &ToMerge,
                                           unsigned LaneMask, CoalescerPair &CP) {
   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
@@ -2420,7 +2550,8 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
       CommonRange = &R;
     }
     LiveRange RangeCopy(ToMerge, Allocator);
-    joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
+    if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP))
+      return false;
     LaneMask &= ~RMask;
   }
 
@@ -2428,13 +2559,14 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
     DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask));
     LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
   }
+  return true;
 }
 
 bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   SmallVector<VNInfo*, 16> NewVNInfo;
   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
   LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
-  bool TrackSubRegLiveness = MRI->tracksSubRegLiveness();
+  bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
   JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS,
                    TRI, false, TrackSubRegLiveness);
   JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS,
@@ -2478,22 +2610,40 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
 
     // Determine lanemasks of RHS in the coalesced register and merge subranges.
     unsigned SrcIdx = CP.getSrcIdx();
+    bool Abort = false;
     if (!RHS.hasSubRanges()) {
       unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
                                   : TRI->getSubRegIndexLaneMask(SrcIdx);
-      mergeSubRangeInto(LHS, RHS, Mask, CP);
+      if (!mergeSubRangeInto(LHS, RHS, Mask, CP))
+        Abort = true;
     } else {
       // Pair up subranges and merge.
       for (LiveInterval::SubRange &R : RHS.subranges()) {
         unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
-        mergeSubRangeInto(LHS, R, Mask, CP);
+        if (!mergeSubRangeInto(LHS, R, Mask, CP)) {
+          Abort = true;
+          break;
+        }
       }
     }
+    if (Abort) {
+      // This shouldn't have happened :-(
+      // However we are aware of at least one existing problem where we
+      // can't merge subranges when multiple ranges end up in the
+      // "overflow bit" 32. As a workaround we drop all subregister ranges
+      // which means we loose some precision but are back to a well defined
+      // state.
+      assert((CP.getNewRC()->getLaneMask() & 0x80000000u)
+             && "SubRange merge should only fail when merging into bit 32.");
+      DEBUG(dbgs() << "\tSubrange join aborted!\n");
+      LHS.clearSubRanges();
+      RHS.clearSubRanges();
+    } else {
+      DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
 
-    DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
-
-    LHSVals.pruneSubRegValues(LHS, ShrinkMask);
-    RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+      LHSVals.pruneSubRegValues(LHS, ShrinkMask);
+      RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+    }
   }
 
   // The merging algorithm in LiveInterval::join() can't handle conflicting
@@ -2510,7 +2660,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
   RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
   while (!ShrinkRegs.empty())
-    LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
+    shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
 
   // Join RHS into LHS.
   LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
@@ -2532,13 +2682,12 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   return true;
 }
 
-/// Attempt to join these two intervals.  On failure, this returns false.
 bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
   return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
 }
 
 namespace {
-// Information concerning MBB coalescing priority.
+/// Information concerning MBB coalescing priority.
 struct MBBPriorityInfo {
   MachineBasicBlock *MBB;
   unsigned Depth;
@@ -2549,10 +2698,10 @@ struct MBBPriorityInfo {
 };
 }
 
-// C-style comparator that sorts first based on the loop depth of the basic
-// block (the unsigned), and then on the MBB number.
-//
-// EnableGlobalCopies assumes that the primary sort key is loop depth.
+/// C-style comparator that sorts first based on the loop depth of the basic
+/// block (the unsigned), and then on the MBB number.
+///
+/// EnableGlobalCopies assumes that the primary sort key is loop depth.
 static int compareMBBPriority(const MBBPriorityInfo *LHS,
                               const MBBPriorityInfo *RHS) {
   // Deeper loops first
@@ -2592,8 +2741,6 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
     || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
 }
 
-// Try joining WorkList copies starting from index From.
-// Null out any successful joins.
 bool RegisterCoalescer::
 copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
   bool Progress = false;
@@ -2615,6 +2762,64 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
   return Progress;
 }
 
+/// Check if DstReg is a terminal node.
+/// I.e., it does not have any affinity other than \p Copy.
+static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy,
+                          const MachineRegisterInfo *MRI) {
+  assert(Copy.isCopyLike());
+  // Check if the destination of this copy as any other affinity.
+  for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
+    if (&MI != &Copy && MI.isCopyLike())
+      return false;
+  return true;
+}
+
+bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
+  assert(Copy.isCopyLike());
+  if (!UseTerminalRule)
+    return false;
+  unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
+  isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
+  // Check if the destination of this copy has any other affinity.
+  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+      // If SrcReg is a physical register, the copy won't be coalesced.
+      // Ignoring it may have other side effect (like missing
+      // rematerialization). So keep it.
+      TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
+      !isTerminalReg(DstReg, Copy, MRI))
+    return false;
+
+  // DstReg is a terminal node. Check if it inteferes with any other
+  // copy involving SrcReg.
+  const MachineBasicBlock *OrigBB = Copy.getParent();
+  const LiveInterval &DstLI = LIS->getInterval(DstReg);
+  for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
+    // Technically we should check if the weight of the new copy is
+    // interesting compared to the other one and update the weight
+    // of the copies accordingly. However, this would only work if
+    // we would gather all the copies first then coalesce, whereas
+    // right now we interleave both actions.
+    // For now, just consider the copies that are in the same block.
+    if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
+      continue;
+    unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
+    isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
+                OtherSubReg);
+    if (OtherReg == SrcReg)
+      OtherReg = OtherSrcReg;
+    // Check if OtherReg is a non-terminal.
+    if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
+        isTerminalReg(OtherReg, MI, MRI))
+      continue;
+    // Check that OtherReg interfere with DstReg.
+    if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
+      DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n');
+      return true;
+    }
+  }
+  return false;
+}
+
 void
 RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
   DEBUG(dbgs() << MBB->getName() << ":\n");
@@ -2623,6 +2828,8 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
   // yet, it might invalidate the iterator.
   const unsigned PrevSize = WorkList.size();
   if (JoinGlobalCopies) {
+    SmallVector<MachineInstr*, 2> LocalTerminals;
+    SmallVector<MachineInstr*, 2> GlobalTerminals;
     // Coalesce copies bottom-up to coalesce local defs before local uses. They
     // are not inherently easier to resolve, but slightly preferable until we
     // have local live range splitting. In particular this is required by
@@ -2631,17 +2838,35 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
          MII != E; ++MII) {
       if (!MII->isCopyLike())
         continue;
-      if (isLocalCopy(&(*MII), LIS))
-        LocalWorkList.push_back(&(*MII));
-      else
-        WorkList.push_back(&(*MII));
+      bool ApplyTerminalRule = applyTerminalRule(*MII);
+      if (isLocalCopy(&(*MII), LIS)) {
+        if (ApplyTerminalRule)
+          LocalTerminals.push_back(&(*MII));
+        else
+          LocalWorkList.push_back(&(*MII));
+      } else {
+        if (ApplyTerminalRule)
+          GlobalTerminals.push_back(&(*MII));
+        else
+          WorkList.push_back(&(*MII));
+      }
     }
+    // Append the copies evicted by the terminal rule at the end of the list.
+    LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
+    WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
   }
   else {
+    SmallVector<MachineInstr*, 2> Terminals;
      for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
           MII != E; ++MII)
-       if (MII->isCopyLike())
-         WorkList.push_back(MII);
+       if (MII->isCopyLike()) {
+        if (applyTerminalRule(*MII))
+          Terminals.push_back(&(*MII));
+        else
+          WorkList.push_back(MII);
+       }
+     // Append the copies evicted by the terminal rule at the end of the list.
+     WorkList.append(Terminals.begin(), Terminals.end());
   }
   // Try coalescing the collected copies immediately, and remove the nulls.
   // This prevents the WorkList from getting too large since most copies are
@@ -2704,15 +2929,14 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   MF = &fn;
   MRI = &fn.getRegInfo();
   TM = &fn.getTarget();
-  TRI = TM->getSubtargetImpl()->getRegisterInfo();
-  TII = TM->getSubtargetImpl()->getInstrInfo();
+  const TargetSubtargetInfo &STI = fn.getSubtarget();
+  TRI = STI.getRegisterInfo();
+  TII = STI.getInstrInfo();
   LIS = &getAnalysis<LiveIntervals>();
   AA = &getAnalysis<AliasAnalysis>();
   Loops = &getAnalysis<MachineLoopInfo>();
-
-  const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
   if (EnableGlobalCopies == cl::BOU_UNSET)
-    JoinGlobalCopies = ST.useMachineScheduler();
+    JoinGlobalCopies = STI.enableJoinGlobalCopies();
   else
     JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
 
@@ -2744,7 +2968,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
     unsigned Reg = InflateRegs[i];
     if (MRI->reg_nodbg_empty(Reg))
       continue;
-    if (MRI->recomputeRegClass(Reg, *TM)) {
+    if (MRI->recomputeRegClass(Reg)) {
       DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
                    << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
       LiveInterval &LI = LIS->getInterval(Reg);
@@ -2754,9 +2978,9 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
         // remove the subranges.
         LI.clearSubRanges();
       } else {
+#ifndef NDEBUG
         // If subranges are still supported, then the same subregs should still
         // be supported.
-#ifndef NDEBUG
         for (LiveInterval::SubRange &S : LI.subranges()) {
           assert ((S.LaneMask & ~MaxMask) == 0);
         }
@@ -2772,7 +2996,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   return true;
 }
 
-/// Implement the dump method.
 void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
    LIS->print(O, m);
 }
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index 9925efb..667783e 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -304,6 +304,7 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) {
   return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end();
 }
 
+namespace {
 /// Collect this instruction's unique uses and defs into SmallVectors for
 /// processing defs and uses in order.
 ///
@@ -354,6 +355,7 @@ protected:
     }
   }
 };
+} // namespace
 
 /// Collect physical and virtual register operands.
 static void collectOperands(const MachineInstr *MI,
@@ -748,9 +750,11 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
 ///
 /// This assumes that the current LiveOut set is sufficient.
 ///
-/// FIXME: This is expensive for an on-the-fly query. We need to cache the
-/// result per-SUnit with enough information to adjust for the current
-/// scheduling position. But this works as a proof of concept.
+/// This is expensive for an on-the-fly query because it calls
+/// bumpUpwardPressure to recompute the pressure sets based on current
+/// liveness. This mainly exists to verify correctness, e.g. with
+/// -verify-misched. getUpwardPressureDelta is the fast version of this query
+/// that uses the per-SUnit cache of the PressureDiff.
 void RegPressureTracker::
 getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff,
                           RegPressureDelta &Delta,
@@ -807,10 +811,8 @@ getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff,
 #endif
 }
 
-/// This is a prototype of the fast version of querying register pressure that
-/// does not directly depend on current liveness. It's still slow because we
-/// recompute pressure change on-the-fly. This implementation only exists to
-/// prove correctness.
+/// This is the fast version of querying register pressure that does not
+/// directly depend on current liveness.
 ///
 /// @param Delta captures information needed for heuristics.
 ///
@@ -948,6 +950,11 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
 /// register units of that pressure set introduced by this instruction.
 ///
 /// This assumes that the current LiveIn set is sufficient.
+///
+/// This is expensive for an on-the-fly query because it calls
+/// bumpDownwardPressure to recompute the pressure sets based on current
+/// liveness. We don't yet have a fast version of downward pressure tracking
+/// analagous to getUpwardPressureDelta.
 void RegPressureTracker::
 getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
                             ArrayRef<PressureChange> CriticalPSets,
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index 6f8b337..76a7fef 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -36,8 +36,8 @@ static cl::opt<bool> StressSchedOpt(
 void SchedulingPriorityQueue::anchor() { }
 
 ScheduleDAG::ScheduleDAG(MachineFunction &mf)
-    : TM(mf.getTarget()), TII(TM.getSubtargetImpl()->getInstrInfo()),
-      TRI(TM.getSubtargetImpl()->getRegisterInfo()), MF(mf),
+    : TM(mf.getTarget()), TII(mf.getSubtarget().getInstrInfo()),
+      TRI(mf.getSubtarget().getRegisterInfo()), MF(mf),
       MRI(mf.getRegInfo()), EntrySU(), ExitSU() {
 #ifndef NDEBUG
   StressSched = StressSchedOpt;
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 409e704..c60c518 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -27,7 +28,6 @@
 #include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/ScheduleDFS.h"
 #include "llvm/IR/Operator.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
@@ -51,18 +51,17 @@ static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
 
 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      const MachineLoopInfo *mli,
-                                     bool IsPostRAFlag,
-                                     bool RemoveKillFlags,
+                                     bool IsPostRAFlag, bool RemoveKillFlags,
                                      LiveIntervals *lis)
-  : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
-    IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
-    CanHandleTerminators(false), FirstDbgValue(nullptr) {
+    : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
+      IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
+      CanHandleTerminators(false), FirstDbgValue(nullptr) {
   assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
   DbgValues.clear();
   assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
          "Virtual registers must be removed prior to PostRA scheduling");
 
-  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  const TargetSubtargetInfo &ST = mf.getSubtarget();
   SchedModel.init(ST.getSchedModel(), &ST, TII);
 }
 
@@ -97,14 +96,15 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
 /// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
 static void getUnderlyingObjects(const Value *V,
-                                 SmallVectorImpl<Value *> &Objects) {
+                                 SmallVectorImpl<Value *> &Objects,
+                                 const DataLayout &DL) {
   SmallPtrSet<const Value *, 16> Visited;
   SmallVector<const Value *, 4> Working(1, V);
   do {
     V = Working.pop_back_val();
 
     SmallVector<Value *, 4> Objs;
-    GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+    GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
 
     for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
          I != IE; ++I) {
@@ -133,7 +133,8 @@ UnderlyingObjectsVector;
 /// object, return the Value for that object.
 static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
                                          const MachineFrameInfo *MFI,
-                                         UnderlyingObjectsVector &Objects) {
+                                         UnderlyingObjectsVector &Objects,
+                                         const DataLayout &DL) {
   if (!MI->hasOneMemOperand() ||
       (!(*MI->memoperands_begin())->getValue() &&
        !(*MI->memoperands_begin())->getPseudoValue()) ||
@@ -142,6 +143,13 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
 
   if (const PseudoSourceValue *PSV =
       (*MI->memoperands_begin())->getPseudoValue()) {
+    // Function that contain tail calls don't have unique PseudoSourceValue
+    // objects. Two PseudoSourceValues might refer to the same or overlapping
+    // locations. The client code calling this function assumes this is not the
+    // case. So return a conservative answer of no known object.
+    if (MFI->hasTailCall())
+      return;
+
     // For now, ignore PseudoSourceValues which may alias LLVM IR values
     // because the code that uses this function has no way to cope with
     // such aliases.
@@ -157,12 +165,9 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
     return;
 
   SmallVector<Value *, 4> Objs;
-  getUnderlyingObjects(V, Objs);
-
-  for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
-         I != IE; ++I) {
-    V = *I;
+  getUnderlyingObjects(V, Objs, DL);
 
+  for (Value *V : Objs) {
     if (!isIdentifiedObject(V)) {
       Objects.clear();
       return;
@@ -253,7 +258,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
   assert(MO.isDef() && "expect physreg def");
 
   // Ask the target if address-backscheduling is desirable, and if so how much.
-  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  const TargetSubtargetInfo &ST = MF.getSubtarget();
 
   for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
        Alias.isValid(); ++Alias) {
@@ -444,7 +449,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
       int DefOp = Def->findRegisterDefOperandIdx(Reg);
       dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
 
-      const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+      const TargetSubtargetInfo &ST = MF.getSubtarget();
       ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
       SU->addPred(dep);
     }
@@ -469,7 +474,8 @@ static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
 // This MI might have either incomplete info, or known to be unsafe
 // to deal with (i.e. volatile object).
 static inline bool isUnsafeMemoryObject(MachineInstr *MI,
-                                        const MachineFrameInfo *MFI) {
+                                        const MachineFrameInfo *MFI,
+                                        const DataLayout &DL) {
   if (!MI || MI->memoperands_empty())
     return true;
   // We purposefully do no check for hasOneMemOperand() here
@@ -492,11 +498,10 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
     return true;
 
   SmallVector<Value *, 4> Objs;
-  getUnderlyingObjects(V, Objs);
-  for (SmallVectorImpl<Value *>::iterator I = Objs.begin(),
-         IE = Objs.end(); I != IE; ++I) {
+  getUnderlyingObjects(V, Objs, DL);
+  for (Value *V : Objs) {
     // Does this pointer refer to a distinct and identifiable object?
-    if (!isIdentifiedObject(*I))
+    if (!isIdentifiedObject(V))
       return true;
   }
 
@@ -509,7 +514,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
 /// these two MIs be reordered during scheduling from memory dependency
 /// point of view.
 static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
-                             MachineInstr *MIa,
+                             const DataLayout &DL, MachineInstr *MIa,
                              MachineInstr *MIb) {
   const MachineFunction *MF = MIa->getParent()->getParent();
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -528,7 +533,7 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
   if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
     return true;
 
-  if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
+  if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL))
     return true;
 
   // If we are dealing with two "normal" loads, we do not need an edge
@@ -580,10 +585,10 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 
 /// This recursive function iterates over chain deps of SUb looking for
 /// "latest" node that needs a chain edge to SUa.
-static unsigned
-iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
-                 SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
-                 SmallPtrSetImpl<const SUnit*> &Visited) {
+static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+                                 const DataLayout &DL, SUnit *SUa, SUnit *SUb,
+                                 SUnit *ExitSU, unsigned *Depth,
+                                 SmallPtrSetImpl<const SUnit *> &Visited) {
   if (!SUa || !SUb || SUb == ExitSU)
     return *Depth;
 
@@ -608,7 +613,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
   // add that edge to the predecessors chain of SUb,
   // and stop descending.
   if (*Depth > 200 ||
-      MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+      MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
     SUb->addPred(SDep(SUa, SDep::MayAliasMem));
     return *Depth;
   }
@@ -618,7 +623,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
   for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
        I != E; ++I)
     if (I->isNormalMemoryOrBarrier())
-      iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
+      iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited);
   return *Depth;
 }
 
@@ -627,7 +632,8 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 /// checks whether SU can be aliasing any node dominated
 /// by it.
 static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
-                            SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
+                            const DataLayout &DL, SUnit *SU, SUnit *ExitSU,
+                            std::set<SUnit *> &CheckList,
                             unsigned LatencyToLoad) {
   if (!SU)
     return;
@@ -639,7 +645,7 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
        I != IE; ++I) {
     if (SU == *I)
       continue;
-    if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
+    if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) {
       SDep Dep(SU, SDep::MayAliasMem);
       Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
       (*I)->addPred(Dep);
@@ -650,22 +656,22 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
     for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
          JE = (*I)->Succs.end(); J != JE; ++J)
       if (J->isNormalMemoryOrBarrier())
-        iterateChainSucc (AA, MFI, SU, J->getSUnit(),
-                          ExitSU, &Depth, Visited);
+        iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth,
+                         Visited);
   }
 }
 
 /// Check whether two objects need a chain edge, if so, add it
 /// otherwise remember the rejected SU.
-static inline
-void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
-                         SUnit *SUa, SUnit *SUb,
-                         std::set<SUnit *> &RejectList,
-                         unsigned TrueMemOrderLatency = 0,
-                         bool isNormalMemory = false) {
+static inline void addChainDependency(AliasAnalysis *AA,
+                                      const MachineFrameInfo *MFI,
+                                      const DataLayout &DL, SUnit *SUa,
+                                      SUnit *SUb, std::set<SUnit *> &RejectList,
+                                      unsigned TrueMemOrderLatency = 0,
+                                      bool isNormalMemory = false) {
   // If this is a false dependency,
   // do not add the edge, but rememeber the rejected node.
-  if (MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+  if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
     SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
     Dep.setLatency(TrueMemOrderLatency);
     SUb->addPred(Dep);
@@ -743,7 +749,7 @@ void ScheduleDAGInstrs::initSUnits() {
 void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
                                         RegPressureTracker *RPTracker,
                                         PressureDiffs *PDiffs) {
-  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  const TargetSubtargetInfo &ST = MF.getSubtarget();
   bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
                                                        : ST.useAA();
   AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
@@ -884,7 +890,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       BarrierChain = SU;
       // This is a barrier event that acts as a pivotal node in the DAG,
       // so it is safe to clear list of exposed nodes.
-      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+      adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
                       TrueMemOrderLatency);
       RejectMemNodes.clear();
       NonAliasMemDefs.clear();
@@ -897,25 +903,27 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         unsigned ChainLatency = 0;
         if (AliasChain->getInstr()->mayLoad())
           ChainLatency = TrueMemOrderLatency;
-        addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes,
-                           ChainLatency);
+        addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+                           RejectMemNodes, ChainLatency);
       }
       AliasChain = SU;
       for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-        addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
+        addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                           PendingLoads[k], RejectMemNodes,
                            TrueMemOrderLatency);
       for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
            AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-          addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes);
+          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                             I->second[i], RejectMemNodes);
       }
       for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
            AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-          addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
-                             TrueMemOrderLatency);
+          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                             I->second[i], RejectMemNodes, TrueMemOrderLatency);
       }
-      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+      adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
                       TrueMemOrderLatency);
       PendingLoads.clear();
       AliasMemDefs.clear();
@@ -929,7 +937,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         BarrierChain->addPred(SDep(SU, SDep::Barrier));
 
       UnderlyingObjectsVector Objs;
-      getUnderlyingObjectsForInstr(MI, MFI, Objs);
+      getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
 
       if (Objs.empty()) {
         // Treat all other stores conservatively.
@@ -953,8 +961,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
           ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
         if (I != IE) {
           for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-            addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
-                               0, true);
+            addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                               I->second[i], RejectMemNodes, 0, true);
 
           // If we're not using AA, then we only need one store per object.
           if (!AAForDep)
@@ -978,7 +986,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
           ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
         if (J != JE) {
           for (unsigned i = 0, e = J->second.size(); i != e; ++i)
-            addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes,
+            addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                               J->second[i], RejectMemNodes,
                                TrueMemOrderLatency, true);
           J->second.clear();
         }
@@ -987,23 +996,23 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         // Add dependencies from all the PendingLoads, i.e. loads
         // with no underlying object.
         for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-          addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
+          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                             PendingLoads[k], RejectMemNodes,
                              TrueMemOrderLatency);
         // Add dependence on alias chain, if needed.
         if (AliasChain)
-          addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
-        // But we also should check dependent instructions for the
-        // SU in question.
-        adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
-                        TrueMemOrderLatency);
+          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+                             RejectMemNodes);
       }
+      adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+                      TrueMemOrderLatency);
     } else if (MI->mayLoad()) {
       bool MayAlias = true;
       if (MI->isInvariantLoad(AA)) {
         // Invariant load, no chain dependencies needed!
       } else {
         UnderlyingObjectsVector Objs;
-        getUnderlyingObjectsForInstr(MI, MFI, Objs);
+        getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
 
         if (Objs.empty()) {
           // A load with no underlying object. Depend on all
@@ -1011,8 +1020,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
           for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
                  AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
             for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-              addChainDependency(AAForDep, MFI, SU, I->second[i],
-                                 RejectMemNodes);
+              addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                                 I->second[i], RejectMemNodes);
 
           PendingLoads.push_back(SU);
           MayAlias = true;
@@ -1035,18 +1044,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
             ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
           if (I != IE)
             for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-              addChainDependency(AAForDep, MFI, SU, I->second[i],
-                                 RejectMemNodes, 0, true);
+              addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                                 I->second[i], RejectMemNodes, 0, true);
           if (ThisMayAlias)
             AliasMemUses[V].push_back(SU);
           else
             NonAliasMemUses[V].push_back(SU);
         }
         if (MayAlias)
-          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
+          adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU,
+                          RejectMemNodes, /*Latency=*/0);
         // Add dependencies on alias and barrier chains, if needed.
         if (MayAlias && AliasChain)
-          addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
+          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+                             RejectMemNodes);
         if (BarrierChain)
           BarrierChain->addPred(SDep(SU, SDep::Barrier));
       }
@@ -1080,22 +1091,65 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
   }
 }
 
+/// \brief If we change a kill flag on the bundle instruction implicit register
+/// operands, then we also need to propagate that to any instructions inside
+/// the bundle which had the same kill state.
+static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
+                                 bool NewKillState) {
+  if (MI->getOpcode() != TargetOpcode::BUNDLE)
+    return;
+
+  // Walk backwards from the last instruction in the bundle to the first.
+  // Once we set a kill flag on an instruction, we bail out, as otherwise we
+  // might set it on too many operands.  We will clear as many flags as we
+  // can though.
+  MachineBasicBlock::instr_iterator Begin = MI;
+  MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
+  while (Begin != End) {
+    for (MIOperands MO(--End); MO.isValid(); ++MO) {
+      if (!MO->isReg() || MO->isDef() || Reg != MO->getReg())
+        continue;
+
+      // DEBUG_VALUE nodes do not contribute to code generation and should
+      // always be ignored.  Failure to do so may result in trying to modify
+      // KILL flags on DEBUG_VALUE nodes, which is distressing.
+      if (MO->isDebug())
+        continue;
+
+      // If the register has the internal flag then it could be killing an
+      // internal def of the register.  In this case, just skip.  We only want
+      // to toggle the flag on operands visible outside the bundle.
+      if (MO->isInternalRead())
+        continue;
+
+      if (MO->isKill() == NewKillState)
+        continue;
+      MO->setIsKill(NewKillState);
+      if (NewKillState)
+        return;
+    }
+  }
+}
+
 bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
   // Setting kill flag...
   if (!MO.isKill()) {
     MO.setIsKill(true);
+    toggleBundleKillFlag(MI, MO.getReg(), true);
     return false;
   }
 
   // If MO itself is live, clear the kill flag...
   if (LiveRegs.test(MO.getReg())) {
     MO.setIsKill(false);
+    toggleBundleKillFlag(MI, MO.getReg(), false);
     return false;
   }
 
   // If any subreg of MO is live, then create an imp-def for that
   // subreg and keep MO marked as killed.
   MO.setIsKill(false);
+  toggleBundleKillFlag(MI, MO.getReg(), false);
   bool AllDead = true;
   const unsigned SuperReg = MO.getReg();
   MachineInstrBuilder MIB(MF, MI);
@@ -1106,8 +1160,10 @@ bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
     }
   }
 
-  if(AllDead)
+  if(AllDead) {
     MO.setIsKill(true);
+    toggleBundleKillFlag(MI, MO.getReg(), true);
+  }
   return false;
 }
 
@@ -1180,6 +1236,12 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
         // Warning: toggleKillFlag may invalidate MO.
         toggleKillFlag(MI, MO);
         DEBUG(MI->dump());
+        DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) {
+          MachineBasicBlock::instr_iterator Begin = MI;
+          MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
+          while (++Begin != End)
+            DEBUG(Begin->dump());
+        });
       }
 
       killedRegs.set(Reg);
@@ -1214,7 +1276,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
   else if (SU == &ExitSU)
     oss << "<exit>";
   else
-    SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true);
+    SU->getInstr()->print(oss, /*SkipOpers=*/true);
   return oss.str();
 }
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1df61e4..2c2dc85 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -246,10 +246,11 @@ namespace {
     SDValue visitSDIVREM(SDNode *N);
     SDValue visitUDIVREM(SDNode *N);
     SDValue visitAND(SDNode *N);
+    SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
     SDValue visitOR(SDNode *N);
+    SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
     SDValue visitXOR(SDNode *N);
     SDValue SimplifyVBinOp(SDNode *N);
-    SDValue SimplifyVUnaryOp(SDNode *N);
     SDValue visitSHL(SDNode *N);
     SDValue visitSRA(SDNode *N);
     SDValue visitSRL(SDNode *N);
@@ -267,6 +268,7 @@ namespace {
     SDValue visitZERO_EXTEND(SDNode *N);
     SDValue visitANY_EXTEND(SDNode *N);
     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+    SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
     SDValue visitTRUNCATE(SDNode *N);
     SDValue visitBITCAST(SDNode *N);
     SDValue visitBUILD_PAIR(SDNode *N);
@@ -302,9 +304,16 @@ namespace {
     SDValue visitCONCAT_VECTORS(SDNode *N);
     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
     SDValue visitVECTOR_SHUFFLE(SDNode *N);
+    SDValue visitSCALAR_TO_VECTOR(SDNode *N);
     SDValue visitINSERT_SUBVECTOR(SDNode *N);
     SDValue visitMLOAD(SDNode *N);
     SDValue visitMSTORE(SDNode *N);
+    SDValue visitMGATHER(SDNode *N);
+    SDValue visitMSCATTER(SDNode *N);
+    SDValue visitFP_TO_FP16(SDNode *N);
+
+    SDValue visitFADDForFMACombine(SDNode *N);
+    SDValue visitFSUBForFMACombine(SDNode *N);
 
     SDValue XformToShuffleWithZero(SDNode *N);
     SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -327,6 +336,7 @@ namespace {
     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                          unsigned HiOp);
     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+    SDValue CombineExtLoad(SDNode *N);
     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
     SDValue BuildSDIV(SDNode *N);
     SDValue BuildSDIVPow2(SDNode *N);
@@ -363,6 +373,28 @@ namespace {
     /// chain (aliasing node.)
     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 
+    /// Holds a pointer to an LSBaseSDNode as well as information on where it
+    /// is located in a sequence of memory operations connected by a chain.
+    struct MemOpLink {
+      MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
+      MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+      // Ptr to the mem node.
+      LSBaseSDNode *MemNode;
+      // Offset from the base ptr.
+      int64_t OffsetFromBase;
+      // What is the sequence number of this mem node.
+      // Lowest mem operand in the DAG starts at zero.
+      unsigned SequenceNum;
+    };
+
+    /// This is a helper function for MergeConsecutiveStores. When the source
+    /// elements of the consecutive stores are all constants or all extracted
+    /// vector elements, try to merge them into one larger store.
+    /// \return True if a merged store was created.
+    bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+                                         EVT MemVT, unsigned NumElem,
+                                         bool IsConstantSrc, bool UseVector);
+
     /// Merge consecutive store operations into a wide store.
     /// This optimization uses wide integers or vectors when possible.
     /// \return True if some memory operations were changed.
@@ -380,12 +412,9 @@ namespace {
     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
-      AttributeSet FnAttrs =
-          DAG.getMachineFunction().getFunction()->getAttributes();
-      ForCodeSize =
-          FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
-                               Attribute::OptimizeForSize) ||
-          FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+      auto *F = DAG.getMachineFunction().getFunction();
+      ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) ||
+                    F->hasFnAttribute(Attribute::MinSize);
     }
 
     /// Runs the dag combiner on all nodes in the work list
@@ -446,7 +475,7 @@ void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
 }
 
 SDValue TargetLowering::DAGCombinerInfo::
-CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 }
 
@@ -566,7 +595,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
   case ISD::ConstantFP: {
     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
     V.changeSign();
-    return DAG.getConstantFP(V, Op.getValueType());
+    return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
   }
   case ISD::FADD:
     // FIXME: determine better conditions for this xform.
@@ -683,13 +712,23 @@ static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
           EltVT.getSizeInBits() >= SplatBitSize);
 }
 
-// \brief Returns the SDNode if it is a constant BuildVector or constant.
-static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
+// \brief Returns the SDNode if it is a constant integer BuildVector
+// or constant integer.
+static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
   if (isa<ConstantSDNode>(N))
     return N.getNode();
-  BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
-  if (BV && BV->isConstant())
-    return BV;
+  if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
+    return N.getNode();
+  return nullptr;
+}
+
+// \brief Returns the SDNode if it is a constant float BuildVector
+// or constant float.
+static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
+  if (isa<ConstantFPSDNode>(N))
+    return N.getNode();
+  if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
+    return N.getNode();
   return nullptr;
 }
 
@@ -735,10 +774,10 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
                                     SDValue N0, SDValue N1) {
   EVT VT = N0.getValueType();
   if (N0.getOpcode() == Opc) {
-    if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) {
-      if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) {
+    if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+      if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) {
         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
-        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R))
+        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
         return SDValue();
       }
@@ -755,10 +794,10 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
   }
 
   if (N1.getOpcode() == Opc) {
-    if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) {
-      if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) {
+    if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
+      if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) {
         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
-        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L))
+        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
         return SDValue();
       }
@@ -1309,6 +1348,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
+  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
   case ISD::TRUNCATE:           return visitTRUNCATE(N);
   case ISD::BITCAST:            return visitBITCAST(N);
   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
@@ -1344,9 +1384,13 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
+  case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
+  case ISD::MGATHER:            return visitMGATHER(N);
   case ISD::MLOAD:              return visitMLOAD(N);
+  case ISD::MSCATTER:           return visitMSCATTER(N);
   case ISD::MSTORE:             return visitMSTORE(N);
+  case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
   }
   return SDValue();
 }
@@ -1412,9 +1456,10 @@ SDValue DAGCombiner::combine(SDNode *N) {
       SDNode *CSENode;
       if (const BinaryWithFlagsSDNode *BinNode =
               dyn_cast<BinaryWithFlagsSDNode>(N)) {
-        CSENode = DAG.getNodeIfExists(
-            N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
-            BinNode->hasNoSignedWrap(), BinNode->isExact());
+        CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+                                      BinNode->Flags.hasNoUnsignedWrap(),
+                                      BinNode->Flags.hasNoSignedWrap(),
+                                      BinNode->Flags.hasExact());
       } else {
         CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
       }
@@ -1471,7 +1516,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
       switch (Op.getOpcode()) {
       case ISD::EntryToken:
         // Entry tokens don't need to be added to the list. They are
-        // rededundant.
+        // redundant.
         Changed = true;
         break;
 
@@ -1500,7 +1545,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
 
   SDValue Result;
 
-  // If we've change things around then replace token factor.
+  // If we've changed things around then replace token factor.
   if (Changed) {
     if (Ops.empty()) {
       // The entry token is the only possible outcome.
@@ -1510,8 +1555,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
       Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
     }
 
-    // Don't add users to work list.
-    return CombineTo(N, Result, false);
+    // Add users to worklist if AA is enabled, since it may introduce
+    // a lot of new chained token factors while removing memory deps.
+    bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+      : DAG.getSubtarget().useAA();
+    return CombineTo(N, Result, UseAA /*add to worklist*/);
   }
 
   return Result;
@@ -1534,17 +1582,37 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
 }
 
+static bool isNullConstant(SDValue V) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isNullValue();
+}
+
+static bool isAllOnesConstant(SDValue V) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isAllOnesValue();
+}
+
+static bool isOneConstant(SDValue V) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isOne();
+}
+
+/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
+/// ContantSDNode pointer else nullptr.
+static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
+  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
+}
+
 SDValue DAGCombiner::visitADD(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (add x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N1.getNode()))
@@ -1559,13 +1627,16 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
   // fold (add c1, c2) -> c1+c2
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+    return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
   // fold (add x, 0) -> x
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return N0;
   // fold (add Sym, c) -> Sym+c
   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
@@ -1576,22 +1647,21 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
                                     (uint64_t)N1C->getSExtValue());
   // fold ((c1-A)+c2) -> (c1+c2)-A
   if (N1C && N0.getOpcode() == ISD::SUB)
-    if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
-      return DAG.getNode(ISD::SUB, SDLoc(N), VT,
+    if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::SUB, DL, VT,
                          DAG.getConstant(N1C->getAPIntValue()+
-                                         N0C->getAPIntValue(), VT),
+                                         N0C->getAPIntValue(), DL, VT),
                          N0.getOperand(1));
+    }
   // reassociate add
-  SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1);
-  if (RADD.getNode())
+  if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
     return RADD;
   // fold ((0-A) + B) -> B-A
-  if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
-      cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+  if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
   // fold (A + (0-B)) -> A-B
-  if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
-      cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+  if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
   // fold (A+(B-A)) -> B
   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
@@ -1651,34 +1721,27 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   }
 
   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
-  if (N1.getOpcode() == ISD::SHL &&
-      N1.getOperand(0).getOpcode() == ISD::SUB)
-    if (ConstantSDNode *C =
-          dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
-      if (C->getAPIntValue() == 0)
-        return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
-                           DAG.getNode(ISD::SHL, SDLoc(N), VT,
-                                       N1.getOperand(0).getOperand(1),
-                                       N1.getOperand(1)));
-  if (N0.getOpcode() == ISD::SHL &&
-      N0.getOperand(0).getOpcode() == ISD::SUB)
-    if (ConstantSDNode *C =
-          dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
-      if (C->getAPIntValue() == 0)
-        return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
-                           DAG.getNode(ISD::SHL, SDLoc(N), VT,
-                                       N0.getOperand(0).getOperand(1),
-                                       N0.getOperand(1)));
+  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
+      isNullConstant(N1.getOperand(0).getOperand(0)))
+    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
+                       DAG.getNode(ISD::SHL, SDLoc(N), VT,
+                                   N1.getOperand(0).getOperand(1),
+                                   N1.getOperand(1)));
+  if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
+      isNullConstant(N0.getOperand(0).getOperand(0)))
+    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
+                       DAG.getNode(ISD::SHL, SDLoc(N), VT,
+                                   N0.getOperand(0).getOperand(1),
+                                   N0.getOperand(1)));
 
   if (N1.getOpcode() == ISD::AND) {
     SDValue AndOp0 = N1.getOperand(0);
-    ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
     unsigned DestBits = VT.getScalarType().getSizeInBits();
 
     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
     // and similar xforms where the inner op is either ~0 or 0.
-    if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+    if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
       SDLoc DL(N);
       return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
     }
@@ -1699,7 +1762,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     if (TN->getVT() == MVT::i1) {
       SDLoc DL(N);
       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
-                                 DAG.getConstant(1, VT));
+                                 DAG.getConstant(1, DL, VT));
       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
     }
   }
@@ -1710,8 +1773,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
 SDValue DAGCombiner::visitADDC(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
 
   // If the flag result is dead, turn this into an ADD.
@@ -1721,11 +1782,13 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
                                  SDLoc(N), MVT::Glue));
 
   // canonicalize constant to RHS.
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (N0C && !N1C)
     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
 
   // fold (addc x, 0) -> x + no carry out
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
                                         SDLoc(N), MVT::Glue));
 
@@ -1752,10 +1815,10 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   SDValue CarryIn = N->getOperand(2);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
 
   // canonicalize constant to RHS
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (N0C && !N1C)
     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
                        N1, N0, CarryIn);
@@ -1773,25 +1836,21 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
                              SelectionDAG &DAG,
                              bool LegalOperations, bool LegalTypes) {
   if (!VT.isVector())
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, DL, VT);
   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, DL, VT);
   return SDValue();
 }
 
 SDValue DAGCombiner::visitSUB(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
-    dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
   EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (sub x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N1.getNode()))
@@ -1803,14 +1862,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (N0 == N1)
     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
   // fold (sub c1, c2) -> c1-c2
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+    return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C);
   // fold (sub x, c) -> (add x, -c)
-  if (N1C)
-    return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0,
-                       DAG.getConstant(-N1C->getAPIntValue(), VT));
+  if (N1C) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::ADD, DL, VT, N0,
+                       DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+  }
   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
-  if (N0C && N0C->isAllOnesValue())
+  if (isAllOnesConstant(N0))
     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   // fold A-(A-B) -> B
   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
@@ -1822,10 +1885,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
     return N0.getOperand(0);
   // fold C2-(A+C1) -> (C2-C1)-A
+  ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
+    dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
   if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
+    SDLoc DL(N);
     SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
-                                   VT);
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC,
+                                   DL, VT);
+    return DAG.getNode(ISD::SUB, DL, VT, NewC,
                        N1.getOperand(0));
   }
   // fold ((A+(B+or-C))-B) -> A+or-C
@@ -1866,7 +1932,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
         if (GA->getGlobal() == GB->getGlobal())
           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
-                                 VT);
+                                 SDLoc(N), VT);
     }
 
   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
@@ -1875,7 +1941,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     if (TN->getVT() == MVT::i1) {
       SDLoc DL(N);
       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
-                                 DAG.getConstant(1, VT));
+                                 DAG.getConstant(1, DL, VT));
       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
     }
   }
@@ -1886,8 +1952,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
 SDValue DAGCombiner::visitSUBC(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
 
   // If the flag result is dead, turn this into an SUB.
@@ -1897,18 +1961,20 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
                                  MVT::Glue));
 
   // fold (subc x, x) -> 0 + no borrow
-  if (N0 == N1)
-    return CombineTo(N, DAG.getConstant(0, VT),
-                     DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
+  if (N0 == N1) {
+    SDLoc DL(N);
+    return CombineTo(N, DAG.getConstant(0, DL, VT),
+                     DAG.getNode(ISD::CARRY_FALSE, DL,
                                  MVT::Glue));
+  }
 
   // fold (subc x, 0) -> x + no borrow
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
                                         MVT::Glue));
 
   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
-  if (N0C && N0C->isAllOnesValue())
+  if (isAllOnesConstant(N0))
     return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
                      DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
                                  MVT::Glue));
@@ -1935,33 +2001,41 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
 
   // fold (mul x, undef) -> 0
   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
 
   bool N0IsConst = false;
   bool N1IsConst = false;
+  bool N1IsOpaqueConst = false;
+  bool N0IsOpaqueConst = false;
   APInt ConstValue0, ConstValue1;
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
     N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
   } else {
-    N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr;
-    ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
-                            : APInt();
-    N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr;
-    ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
-                            : APInt();
+    N0IsConst = isa<ConstantSDNode>(N0);
+    if (N0IsConst) {
+      ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
+      N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
+    }
+    N1IsConst = isa<ConstantSDNode>(N1);
+    if (N1IsConst) {
+      ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
+      N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
+    }
   }
 
   // fold (mul c1, c2) -> c1*c2
-  if (N0IsConst && N1IsConst)
-    return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
+  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
+    return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
+                                      N0.getNode(), N1.getNode());
 
-  // canonicalize constant to RHS
-  if (N0IsConst && !N1IsConst)
+  // canonicalize constant to RHS (vector doesn't have to splat)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
   // fold (mul x, 0) -> 0
   if (N1IsConst && ConstValue1 == 0)
@@ -1974,23 +2048,30 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
     return N0;
   // fold (mul x, -1) -> 0-x
-  if (N1IsConst && ConstValue1.isAllOnesValue())
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       DAG.getConstant(0, VT), N0);
+  if (N1IsConst && ConstValue1.isAllOnesValue()) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SUB, DL, VT,
+                       DAG.getConstant(0, DL, VT), N0);
+  }
   // fold (mul x, (1 << c)) -> x << c
-  if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
-    return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
-                       DAG.getConstant(ConstValue1.logBase2(),
+  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
+      IsFullSplat) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SHL, DL, VT, N0,
+                       DAG.getConstant(ConstValue1.logBase2(), DL,
                                        getShiftAmountTy(N0.getValueType())));
+  }
   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
-  if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
+  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
+      IsFullSplat) {
     unsigned Log2Val = (-ConstValue1).logBase2();
+    SDLoc DL(N);
     // FIXME: If the input is something that is easily negated (e.g. a
     // single-use add), we should put the negate there.
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       DAG.getConstant(0, VT),
-                       DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
-                            DAG.getConstant(Log2Val,
+    return DAG.getNode(ISD::SUB, DL, VT,
+                       DAG.getConstant(0, DL, VT),
+                       DAG.getNode(ISD::SHL, DL, VT, N0,
+                            DAG.getConstant(Log2Val, DL,
                                       getShiftAmountTy(N0.getValueType()))));
   }
 
@@ -2041,8 +2122,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                                    N0.getOperand(1), N1));
 
   // reassociate mul
-  SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1);
-  if (RMUL.getNode())
+  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
     return RMUL;
 
   return SDValue();
@@ -2051,26 +2131,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
 SDValue DAGCombiner::visitSDIV(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = isConstOrConstSplat(N0);
-  ConstantSDNode *N1C = isConstOrConstSplat(N1);
   EVT VT = N->getValueType(0);
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (sdiv c1, c2) -> c1/c2
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
+  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C);
   // fold (sdiv X, 1) -> X
-  if (N1C && N1C->getAPIntValue() == 1LL)
+  if (N1C && N1C->isOne())
     return N0;
   // fold (sdiv X, -1) -> 0-X
-  if (N1C && N1C->isAllOnesValue())
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       DAG.getConstant(0, VT), N0);
+  if (N1C && N1C->isAllOnesValue()) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SUB, DL, VT,
+                       DAG.getConstant(0, DL, VT), N0);
+  }
   // If we know the sign bits of both operands are zero, strength reduce to a
   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
   if (!VT.isVector()) {
@@ -2080,8 +2161,9 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   }
 
   // fold (sdiv X, pow2) -> simple ops after legalize
-  if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() ||
-                                     (-N1C->getAPIntValue()).isPowerOf2())) {
+  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+      (N1C->getAPIntValue().isPowerOf2() ||
+       (-N1C->getAPIntValue()).isPowerOf2())) {
     // If dividing by powers of two is cheap, then don't perform the following
     // fold.
     if (TLI.isPow2SDivCheap())
@@ -2093,24 +2175,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
       return Res;
 
     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
+    SDLoc DL(N);
 
     // Splat the sign bit into the register
     SDValue SGN =
-        DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
-                    DAG.getConstant(VT.getScalarSizeInBits() - 1,
+        DAG.getNode(ISD::SRA, DL, VT, N0,
+                    DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
                                     getShiftAmountTy(N0.getValueType())));
     AddToWorklist(SGN.getNode());
 
     // Add (N0 < 0) ? abs2 - 1 : 0;
     SDValue SRL =
-        DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
-                    DAG.getConstant(VT.getScalarSizeInBits() - lg2,
+        DAG.getNode(ISD::SRL, DL, VT, SGN,
+                    DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
                                     getShiftAmountTy(SGN.getValueType())));
-    SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
+    SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
     AddToWorklist(SRL.getNode());
     AddToWorklist(ADD.getNode());    // Divide by pow2
-    SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
-                  DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
+    SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
+                  DAG.getConstant(lg2, DL,
+                                  getShiftAmountTy(ADD.getValueType())));
 
     // If we're dividing by a positive value, we're done.  Otherwise, we must
     // negate the result.
@@ -2118,10 +2202,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
       return SRA;
 
     AddToWorklist(SRA.getNode());
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
+    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
   }
 
-  // if integer divide is expensive and we satisfy the requirements, emit an
+  // If integer divide is expensive and we satisfy the requirements, emit an
   // alternate sequence.
   if (N1C && !TLI.isIntDivCheap()) {
     SDValue Op = BuildSDIV(N);
@@ -2130,7 +2214,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
 
   // undef / X -> 0
   if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // X / undef -> undef
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
@@ -2141,36 +2225,40 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
 SDValue DAGCombiner::visitUDIV(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = isConstOrConstSplat(N0);
-  ConstantSDNode *N1C = isConstOrConstSplat(N1);
   EVT VT = N->getValueType(0);
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (udiv c1, c2) -> c1/c2
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
+  if (N0C && N1C)
+    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT,
+                                                    N0C, N1C))
+      return Folded;
   // fold (udiv x, (1 << c)) -> x >>u c
-  if (N1C && N1C->getAPIntValue().isPowerOf2())
-    return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
-                       DAG.getConstant(N1C->getAPIntValue().logBase2(),
+  if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SRL, DL, VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
                                        getShiftAmountTy(N0.getValueType())));
+  }
   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
   if (N1.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+    if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
       if (SHC->getAPIntValue().isPowerOf2()) {
         EVT ADDVT = N1.getOperand(1).getValueType();
-        SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT,
+        SDLoc DL(N);
+        SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
                                   N1.getOperand(1),
                                   DAG.getConstant(SHC->getAPIntValue()
                                                                   .logBase2(),
-                                                  ADDVT));
+                                                  DL, ADDVT));
         AddToWorklist(Add.getNode());
-        return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
+        return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
       }
     }
   }
@@ -2182,7 +2270,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
 
   // undef / X -> 0
   if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // X / undef -> undef
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
@@ -2193,13 +2281,15 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
 SDValue DAGCombiner::visitSREM(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = isConstOrConstSplat(N0);
-  ConstantSDNode *N1C = isConstOrConstSplat(N1);
   EVT VT = N->getValueType(0);
 
   // fold (srem c1, c2) -> c1%c2
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
+  if (N0C && N1C)
+    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT,
+                                                    N0C, N1C))
+      return Folded;
   // If we know the sign bits of both operands are zero, strength reduce to a
   // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
   if (!VT.isVector()) {
@@ -2224,7 +2314,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
 
   // undef % X -> 0
   if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // X % undef -> undef
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
@@ -2235,27 +2325,33 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
 SDValue DAGCombiner::visitUREM(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = isConstOrConstSplat(N0);
-  ConstantSDNode *N1C = isConstOrConstSplat(N1);
   EVT VT = N->getValueType(0);
 
   // fold (urem c1, c2) -> c1%c2
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
+  if (N0C && N1C)
+    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT,
+                                                    N0C, N1C))
+      return Folded;
   // fold (urem x, pow2) -> (and x, pow2-1)
-  if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
-    return DAG.getNode(ISD::AND, SDLoc(N), VT, N0,
-                       DAG.getConstant(N1C->getAPIntValue()-1,VT));
+  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+      N1C->getAPIntValue().isPowerOf2()) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::AND, DL, VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+  }
   // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
   if (N1.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+    if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
       if (SHC->getAPIntValue().isPowerOf2()) {
+        SDLoc DL(N);
         SDValue Add =
-          DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
-                 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+          DAG.getNode(ISD::ADD, DL, VT, N1,
+                 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
                                  VT));
         AddToWorklist(Add.getNode());
-        return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
+        return DAG.getNode(ISD::AND, DL, VT, N0, Add);
       }
     }
   }
@@ -2277,7 +2373,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
 
   // undef % X -> 0
   if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // X % undef -> undef
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
@@ -2288,21 +2384,23 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
 SDValue DAGCombiner::visitMULHS(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
   // fold (mulhs x, 0) -> 0
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return N1;
   // fold (mulhs x, 1) -> (sra x, size(x)-1)
-  if (N1C && N1C->getAPIntValue() == 1)
-    return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0,
+  if (isOneConstant(N1)) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
                        DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+                                       DL,
                                        getShiftAmountTy(N0.getValueType())));
+  }
   // fold (mulhs x, undef) -> 0
   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
 
   // If the type twice as wide is legal, transform the mulhs to a wider multiply
   // plus a shift.
@@ -2315,7 +2413,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
-            DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+            DAG.getConstant(SimpleSize, DL,
+                            getShiftAmountTy(N1.getValueType())));
       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
     }
   }
@@ -2326,19 +2425,18 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
 SDValue DAGCombiner::visitMULHU(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
   // fold (mulhu x, 0) -> 0
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return N1;
   // fold (mulhu x, 1) -> 0
-  if (N1C && N1C->getAPIntValue() == 1)
-    return DAG.getConstant(0, N0.getValueType());
+  if (isOneConstant(N1))
+    return DAG.getConstant(0, DL, N0.getValueType());
   // fold (mulhu x, undef) -> 0
   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, DL, VT);
 
   // If the type twice as wide is legal, transform the mulhu to a wider multiply
   // plus a shift.
@@ -2351,7 +2449,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
-            DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+            DAG.getConstant(SimpleSize, DL,
+                            getShiftAmountTy(N1.getValueType())));
       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
     }
   }
@@ -2417,8 +2516,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
-  // If the type twice as wide is legal, transform the mulhu to a wider multiply
-  // plus a shift.
+  // If the type is twice as wide is legal, transform the mulhu to a wider
+  // multiply plus a shift.
   if (VT.isSimple() && !VT.isVector()) {
     MVT Simple = VT.getSimpleVT();
     unsigned SimpleSize = Simple.getSizeInBits();
@@ -2429,7 +2528,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
       // Compute the high part as N1.
       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
-            DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+            DAG.getConstant(SimpleSize, DL,
+                            getShiftAmountTy(Lo.getValueType())));
       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
       // Compute the low part as N0.
       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
@@ -2447,8 +2547,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
-  // If the type twice as wide is legal, transform the mulhu to a wider multiply
-  // plus a shift.
+  // If the type is twice as wide is legal, transform the mulhu to a wider
+  // multiply plus a shift.
   if (VT.isSimple() && !VT.isVector()) {
     MVT Simple = VT.getSimpleVT();
     unsigned SimpleSize = Simple.getSizeInBits();
@@ -2459,7 +2559,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
       // Compute the high part as N1.
       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
-            DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+            DAG.getConstant(SimpleSize, DL,
+                            getShiftAmountTy(Lo.getValueType())));
       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
       // Compute the low part as N0.
       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
@@ -2615,7 +2716,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
       // build vector of all zeros that might be illegal at this stage.
       if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
         if (!LegalTypes)
-          ShOp = DAG.getConstant(0, VT);
+          ShOp = DAG.getConstant(0, SDLoc(N), VT);
         else
           ShOp = SDValue();
       }
@@ -2636,7 +2737,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
       ShOp = N0->getOperand(0);
       if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
         if (!LegalTypes)
-          ShOp = DAG.getConstant(0, VT);
+          ShOp = DAG.getConstant(0, SDLoc(N), VT);
         else
           ShOp = SDValue();
       }
@@ -2657,19 +2758,122 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   return SDValue();
 }
 
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an And operation to a single value. This makes them reusable in the context
+/// of visitSELECT(). Rules involving constants are not included as
+/// visitSELECT() already handles those cases.
+SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
+                                  SDNode *LocReference) {
+  EVT VT = N1.getValueType();
+
+  // fold (and x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, SDLoc(LocReference), VT);
+  // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+  SDValue LL, LR, RL, RR, CC0, CC1;
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+        LL.getValueType().isInteger()) {
+      // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+      if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
+        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+                                     LR.getValueType(), LL, RL);
+        AddToWorklist(ORNode.getNode());
+        return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+      }
+      if (isAllOnesConstant(LR)) {
+        // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+        if (Op1 == ISD::SETEQ) {
+          SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
+                                        LR.getValueType(), LL, RL);
+          AddToWorklist(ANDNode.getNode());
+          return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+        }
+        // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
+        if (Op1 == ISD::SETGT) {
+          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+                                       LR.getValueType(), LL, RL);
+          AddToWorklist(ORNode.getNode());
+          return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+        }
+      }
+    }
+    // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
+    if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
+        Op0 == Op1 && LL.getValueType().isInteger() &&
+      Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
+                            (isAllOnesConstant(LR) && isNullConstant(RR)))) {
+      SDLoc DL(N0);
+      SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
+                                    LL, DAG.getConstant(1, DL,
+                                                        LL.getValueType()));
+      AddToWorklist(ADDNode.getNode());
+      return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
+                          DAG.getConstant(2, DL, LL.getValueType()),
+                          ISD::SETUGE);
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = LL.getValueType().isInteger();
+      ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID &&
+          (!LegalOperations ||
+           (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+            TLI.isOperationLegal(ISD::SETCC,
+                            getSetCCResultType(N0.getSimpleValueType())))))
+        return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+                            LL, LR, Result);
+    }
+  }
+
+  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+      VT.getSizeInBits() <= 64) {
+    if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      APInt ADDC = ADDI->getAPIntValue();
+      if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+        // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+        // immediate for an add, but it is legal if its top c2 bits are set,
+        // transform the ADD so the immediate doesn't need to be materialized
+        // in a register.
+        if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+          APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+                                             SRLI->getZExtValue());
+          if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+            ADDC |= Mask;
+            if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+              SDLoc DL(N0);
+              SDValue NewAdd =
+                DAG.getNode(ISD::ADD, DL, VT,
+                            N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
+              CombineTo(N0.getNode(), NewAdd);
+              // Return N so it doesn't get rechecked!
+              return SDValue(LocReference, 0);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  SDValue LL, LR, RL, RR, CC0, CC1;
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N1.getValueType();
-  unsigned BitWidth = VT.getScalarType().getSizeInBits();
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (and x, 0) -> 0, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -2677,13 +2881,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return DAG.getConstant(
           APInt::getNullValue(
               N0.getValueType().getScalarType().getSizeInBits()),
-          N0.getValueType());
+          SDLoc(N), N0.getValueType());
     if (ISD::isBuildVectorAllZeros(N1.getNode()))
       // do not return N1, because undef node may exist in N1
       return DAG.getConstant(
           APInt::getNullValue(
               N1.getValueType().getScalarType().getSizeInBits()),
-          N1.getValueType());
+          SDLoc(N), N1.getValueType());
 
     // fold (and x, -1) -> x, vector edition
     if (ISD::isBuildVectorAllOnes(N0.getNode()))
@@ -2692,25 +2896,25 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return N0;
   }
 
-  // fold (and x, undef) -> 0
-  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
   // fold (and c1, c2) -> c1&c2
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
   // fold (and x, -1) -> x
-  if (N1C && N1C->isAllOnesValue())
+  if (isAllOnesConstant(N1))
     return N0;
   // if (and x, c) is known to be zero, return 0
+  unsigned BitWidth = VT.getScalarType().getSizeInBits();
   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
                                    APInt::getAllOnesValue(BitWidth)))
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // reassociate and
-  SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1);
-  if (RAND.getNode())
+  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
     return RAND;
   // fold (and (or x, C), D) -> D if (C & D) == D
   if (N1C && N0.getOpcode() == ISD::OR)
@@ -2840,117 +3044,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return SDValue(N, 0); // Return N so it doesn't get rechecked!
     }
   }
-  // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
-  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
-    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
-    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
-    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
-        LL.getValueType().isInteger()) {
-      // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
-      if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
-        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
-                                     LR.getValueType(), LL, RL);
-        AddToWorklist(ORNode.getNode());
-        return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
-      }
-      // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
-      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
-        SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
-                                      LR.getValueType(), LL, RL);
-        AddToWorklist(ANDNode.getNode());
-        return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
-      }
-      // fold (and (setgt X,  -1), (setgt Y,  -1)) -> (setgt (or X, Y), -1)
-      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
-        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
-                                     LR.getValueType(), LL, RL);
-        AddToWorklist(ORNode.getNode());
-        return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
-      }
-    }
-    // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
-    if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
-        Op0 == Op1 && LL.getValueType().isInteger() &&
-      Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
-                                 cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
-                                (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
-                                 cast<ConstantSDNode>(RR)->isNullValue()))) {
-      SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
-                                    LL, DAG.getConstant(1, LL.getValueType()));
-      AddToWorklist(ADDNode.getNode());
-      return DAG.getSetCC(SDLoc(N), VT, ADDNode,
-                          DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
-    }
-    // canonicalize equivalent to ll == rl
-    if (LL == RR && LR == RL) {
-      Op1 = ISD::getSetCCSwappedOperands(Op1);
-      std::swap(RL, RR);
-    }
-    if (LL == RL && LR == RR) {
-      bool isInteger = LL.getValueType().isInteger();
-      ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
-      if (Result != ISD::SETCC_INVALID &&
-          (!LegalOperations ||
-           (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
-            TLI.isOperationLegal(ISD::SETCC,
-                            getSetCCResultType(N0.getSimpleValueType())))))
-        return DAG.getSetCC(SDLoc(N), N0.getValueType(),
-                            LL, LR, Result);
-    }
-  }
-
-  // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
-  if (N0.getOpcode() == N1.getOpcode()) {
-    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
-    if (Tmp.getNode()) return Tmp;
-  }
-
-  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
-  // fold (and (sra)) -> (and (srl)) when possible.
-  if (!VT.isVector() &&
-      SimplifyDemandedBits(SDValue(N, 0)))
-    return SDValue(N, 0);
-
-  // fold (zext_inreg (extload x)) -> (zextload x)
-  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    EVT MemVT = LN0->getMemoryVT();
-    // If we zero all the possible extended bits, then we can turn this into
-    // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
-    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
-        ((!LegalOperations && !LN0->isVolatile()) ||
-         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
-                                       LN0->getChain(), LN0->getBasePtr(),
-                                       MemVT, LN0->getMemOperand());
-      AddToWorklist(N);
-      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
-      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
-    }
-  }
-  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
-  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
-      N0.hasOneUse()) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    EVT MemVT = LN0->getMemoryVT();
-    // If we zero all the possible extended bits, then we can turn this into
-    // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
-    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
-        ((!LegalOperations && !LN0->isVolatile()) ||
-         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
-                                       LN0->getChain(), LN0->getBasePtr(),
-                                       MemVT, LN0->getMemOperand());
-      AddToWorklist(N);
-      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
-      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
-    }
-  }
 
   // fold (and (load x), 255) -> (zextload x, i8)
   // fold (and (extload x, i16), 255) -> (zextload x, i8)
@@ -3001,8 +3094,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
             unsigned EVTStoreBytes = ExtVT.getStoreSize();
             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
-            NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType,
-                                 NewPtr, DAG.getConstant(PtrOff, PtrType));
+            SDLoc DL(LN0);
+            NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
+                                 NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
             Alignment = MinAlign(Alignment, PtrOff);
           }
 
@@ -3022,33 +3116,60 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     }
   }
 
-  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
-      VT.getSizeInBits() <= 64) {
-    if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
-      APInt ADDC = ADDI->getAPIntValue();
-      if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
-        // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
-        // immediate for an add, but it is legal if its top c2 bits are set,
-        // transform the ADD so the immediate doesn't need to be materialized
-        // in a register.
-        if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
-          APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
-                                             SRLI->getZExtValue());
-          if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
-            ADDC |= Mask;
-            if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
-              SDValue NewAdd =
-                DAG.getNode(ISD::ADD, SDLoc(N0), VT,
-                            N0.getOperand(0), DAG.getConstant(ADDC, VT));
-              CombineTo(N0.getNode(), NewAdd);
-              return SDValue(N, 0); // Return N so it doesn't get rechecked!
-            }
-          }
-        }
-      }
-    }
+  if (SDValue Combined = visitANDLike(N0, N1, N))
+    return Combined;
+
+  // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.getNode()) return Tmp;
   }
 
+  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+  // fold (and (sra)) -> (and (srl)) when possible.
+  if (!VT.isVector() &&
+      SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (zext_inreg (extload x)) -> (zextload x)
+  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+        ((!LegalOperations && !LN0->isVolatile()) ||
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
+                                       LN0->getChain(), LN0->getBasePtr(),
+                                       MemVT, LN0->getMemOperand());
+      AddToWorklist(N);
+      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+        ((!LegalOperations && !LN0->isVolatile()) ||
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
+                                       LN0->getChain(), LN0->getBasePtr(),
+                                       MemVT, LN0->getMemOperand());
+      AddToWorklist(N);
+      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
     SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
@@ -3159,9 +3280,12 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
   }
 
   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
-  if (OpSizeInBits > 16)
-    Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res,
-                      DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
+  if (OpSizeInBits > 16) {
+    SDLoc DL(N);
+    Res = DAG.getNode(ISD::SRL, DL, VT, Res,
+                      DAG.getConstant(OpSizeInBits - 16, DL,
+                                      getShiftAmountTy(VT)));
+  }
   return Res;
 }
 
@@ -3299,33 +3423,125 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
     return SDValue();
 
-  SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT,
-                              SDValue(Parts[0],0));
+  SDLoc DL(N);
+  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
+                              SDValue(Parts[0], 0));
 
   // Result of the bswap should be rotated by 16. If it's not legal, then
   // do  (x << 16) | (x >> 16).
-  SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
+  SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
-    return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt);
+    return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
-    return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt);
-  return DAG.getNode(ISD::OR, SDLoc(N), VT,
-                     DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt),
-                     DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt));
+    return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
+  return DAG.getNode(ISD::OR, DL, VT,
+                     DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
+                     DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
+}
+
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an Or operation to a single value \see visitANDLike().
+SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+  EVT VT = N1.getValueType();
+  // fold (or x, undef) -> -1
+  if (!LegalOperations &&
+      (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
+    EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+    return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
+                           SDLoc(LocReference), VT);
+  }
+  // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+  SDValue LL, LR, RL, RR, CC0, CC1;
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+    if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
+      // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+      // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+      if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
+                                     LR.getValueType(), LL, RL);
+        AddToWorklist(ORNode.getNode());
+        return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+      }
+      // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+      // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
+      if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+        SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
+                                      LR.getValueType(), LL, RL);
+        AddToWorklist(ANDNode.getNode());
+        return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+      }
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = LL.getValueType().isInteger();
+      ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID &&
+          (!LegalOperations ||
+           (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+            TLI.isOperationLegal(ISD::SETCC,
+              getSetCCResultType(N0.getValueType())))))
+        return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+                            LL, LR, Result);
+    }
+  }
+
+  // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
+  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
+      // Don't increase # computations.
+      (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+    // We can only do this xform if we know that bits from X that are set in C2
+    // but not in C1 are already zero.  Likewise for Y.
+    if (const ConstantSDNode *N0O1C =
+        getAsNonOpaqueConstant(N0.getOperand(1))) {
+      if (const ConstantSDNode *N1O1C =
+          getAsNonOpaqueConstant(N1.getOperand(1))) {
+        // We can only do this xform if we know that bits from X that are set in
+        // C2 but not in C1 are already zero.  Likewise for Y.
+        const APInt &LHSMask = N0O1C->getAPIntValue();
+        const APInt &RHSMask = N1O1C->getAPIntValue();
+
+        if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+            DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+          SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+                                  N0.getOperand(0), N1.getOperand(0));
+          SDLoc DL(LocReference);
+          return DAG.getNode(ISD::AND, DL, VT, X,
+                             DAG.getConstant(LHSMask | RHSMask, DL, VT));
+        }
+      }
+    }
+  }
+
+  // (or (and X, M), (and X, N)) -> (and X, (or M, N))
+  if (N0.getOpcode() == ISD::AND &&
+      N1.getOpcode() == ISD::AND &&
+      N0.getOperand(0) == N1.getOperand(0) &&
+      // Don't increase # computations.
+      (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+    SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+                            N0.getOperand(1), N1.getOperand(1));
+    return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
+  }
+
+  return SDValue();
 }
 
 SDValue DAGCombiner::visitOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  SDValue LL, LR, RL, RR, CC0, CC1;
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N1.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (or x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -3339,13 +3555,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
       return DAG.getConstant(
           APInt::getAllOnesValue(
               N0.getValueType().getScalarType().getSizeInBits()),
-          N0.getValueType());
+          SDLoc(N), N0.getValueType());
     if (ISD::isBuildVectorAllOnes(N1.getNode()))
       // do not return N1, because undef node may exist in N1
       return DAG.getConstant(
           APInt::getAllOnesValue(
               N1.getValueType().getScalarType().getSizeInBits()),
-          N1.getValueType());
+          SDLoc(N), N1.getValueType());
 
     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
@@ -3404,28 +3620,28 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     }
   }
 
-  // fold (or x, undef) -> -1
-  if (!LegalOperations &&
-      (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
-    EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
-    return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
-  }
   // fold (or c1, c2) -> c1|c2
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
   // fold (or x, 0) -> x
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return N0;
   // fold (or x, -1) -> -1
-  if (N1C && N1C->isAllOnesValue())
+  if (isAllOnesConstant(N1))
     return N1;
   // fold (or x, c) -> c iff (x & ~c) == 0
   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
     return N1;
 
+  if (SDValue Combined = visitORLike(N0, N1, N))
+    return Combined;
+
   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
   SDValue BSwap = MatchBSwapHWord(N, N0, N1);
   if (BSwap.getNode())
@@ -3435,8 +3651,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     return BSwap;
 
   // reassociate or
-  SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1);
-  if (ROR.getNode())
+  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
     return ROR;
   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
   // iff (c1 & c2) == 0.
@@ -3444,86 +3659,20 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
              isa<ConstantSDNode>(N0.getOperand(1))) {
     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
     if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
-      if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1))
+      if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
+                                                   N1C, C1))
         return DAG.getNode(
             ISD::AND, SDLoc(N), VT,
             DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
       return SDValue();
     }
   }
-  // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
-  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
-    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
-    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
-    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
-        LL.getValueType().isInteger()) {
-      // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
-      // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
-      if (cast<ConstantSDNode>(LR)->isNullValue() &&
-          (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
-        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
-                                     LR.getValueType(), LL, RL);
-        AddToWorklist(ORNode.getNode());
-        return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
-      }
-      // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
-      // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
-      if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
-          (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
-        SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
-                                      LR.getValueType(), LL, RL);
-        AddToWorklist(ANDNode.getNode());
-        return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
-      }
-    }
-    // canonicalize equivalent to ll == rl
-    if (LL == RR && LR == RL) {
-      Op1 = ISD::getSetCCSwappedOperands(Op1);
-      std::swap(RL, RR);
-    }
-    if (LL == RL && LR == RR) {
-      bool isInteger = LL.getValueType().isInteger();
-      ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
-      if (Result != ISD::SETCC_INVALID &&
-          (!LegalOperations ||
-           (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
-            TLI.isOperationLegal(ISD::SETCC,
-              getSetCCResultType(N0.getValueType())))))
-        return DAG.getSetCC(SDLoc(N), N0.getValueType(),
-                            LL, LR, Result);
-    }
-  }
-
   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
   if (N0.getOpcode() == N1.getOpcode()) {
     SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
     if (Tmp.getNode()) return Tmp;
   }
 
-  // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
-  if (N0.getOpcode() == ISD::AND &&
-      N1.getOpcode() == ISD::AND &&
-      N0.getOperand(1).getOpcode() == ISD::Constant &&
-      N1.getOperand(1).getOpcode() == ISD::Constant &&
-      // Don't increase # computations.
-      (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
-    // We can only do this xform if we know that bits from X that are set in C2
-    // but not in C1 are already zero.  Likewise for Y.
-    const APInt &LHSMask =
-      cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
-    const APInt &RHSMask =
-      cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
-
-    if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
-        DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
-      SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
-                              N0.getOperand(0), N1.getOperand(0));
-      return DAG.getNode(ISD::AND, SDLoc(N), VT, X,
-                         DAG.getConstant(LHSMask | RHSMask, VT));
-    }
-  }
-
   // See if this is some rotate idiom.
   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
     return SDValue(Rot, 0);
@@ -3751,7 +3900,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
         Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
       }
 
-      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT));
     }
 
     return Rot.getNode();
@@ -3793,15 +3942,12 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
 SDValue DAGCombiner::visitXOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  SDValue LHS, RHS, CC;
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (xor x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -3812,27 +3958,30 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
 
   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
   if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // fold (xor x, undef) -> undef
   if (N0.getOpcode() == ISD::UNDEF)
     return N0;
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
   // fold (xor c1, c2) -> c1^c2
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+    return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   // fold (xor x, 0) -> x
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return N0;
   // reassociate xor
-  SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1);
-  if (RXOR.getNode())
+  if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
     return RXOR;
 
   // fold !(x cc y) -> (x !cc y)
+  SDValue LHS, RHS, CC;
   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
     bool isInt = LHS.getValueType().isInteger();
     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
@@ -3853,18 +4002,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   }
 
   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
-  if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
+  if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
       N0.getNode()->hasOneUse() &&
       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
     SDValue V = N0.getOperand(0);
-    V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V,
-                    DAG.getConstant(1, V.getValueType()));
+    SDLoc DL(N0);
+    V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
+                    DAG.getConstant(1, DL, V.getValueType()));
     AddToWorklist(V.getNode());
     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
   }
 
   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
-  if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
+  if (isOneConstant(N1) && VT == MVT::i1 &&
       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
@@ -3876,7 +4026,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     }
   }
   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
-  if (N1C && N1C->isAllOnesValue() &&
+  if (isAllOnesConstant(N1) &&
       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
@@ -3897,21 +4047,48 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   }
   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
   if (N1C && N0.getOpcode() == ISD::XOR) {
-    ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
-    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
-    if (N00C)
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1),
+    if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
                          DAG.getConstant(N1C->getAPIntValue() ^
-                                         N00C->getAPIntValue(), VT));
-    if (N01C)
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0),
+                                         N00C->getAPIntValue(), DL, VT));
+    }
+    if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
                          DAG.getConstant(N1C->getAPIntValue() ^
-                                         N01C->getAPIntValue(), VT));
+                                         N01C->getAPIntValue(), DL, VT));
+    }
   }
   // fold (xor x, x) -> 0
   if (N0 == N1)
     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
 
+  // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
+  // Here is a concrete example of this equivalence:
+  // i16   x ==  14
+  // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
+  // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
+  //
+  // =>
+  //
+  // i16     ~1      == 0b1111111111111110
+  // i16 rol(~1, 14) == 0b1011111111111111
+  //
+  // Some additional tips to help conceptualize this transform:
+  // - Try to see the operation as placing a single zero in a value of all ones.
+  // - There exists no value for x which would allow the result to contain zero.
+  // - Values of x larger than the bitwidth are undefined and do not require a
+  //   consistent result.
+  // - Pushing the zero left requires shifting one bits in from the right.
+  // A rotate left of ~1 is a nice way of achieving the desired result.
+  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
+      && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
+                       N0.getOperand(1));
+  }
+
   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
   if (N0.getOpcode() == N1.getOpcode()) {
     SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
@@ -3929,10 +4106,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
 /// Handle transforms common to the three shifts, when the shift amount is a
 /// constant.
 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
-  // We can't and shouldn't fold opaque constants.
-  if (Amt->isOpaque())
-    return SDValue();
-
   SDNode *LHS = N->getOperand(0).getNode();
   if (!LHS->hasOneUse()) return SDValue();
 
@@ -3959,8 +4132,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
   }
 
   // We require the RHS of the binop to be a constant and not opaque as well.
-  ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
-  if (!BinOpCst || BinOpCst->isOpaque()) return SDValue();
+  ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
+  if (!BinOpCst) return SDValue();
 
   // FIXME: disable this unless the input to the binop is a shift by a constant.
   // If it is not a shift, it pessimizes some common cases like:
@@ -4013,14 +4186,17 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
     SDValue N01 = N->getOperand(0).getOperand(1);
 
     if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
-      EVT TruncVT = N->getValueType(0);
-      SDValue N00 = N->getOperand(0).getOperand(0);
-      APInt TruncC = N01C->getAPIntValue();
-      TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+      if (!N01C->isOpaque()) {
+        EVT TruncVT = N->getValueType(0);
+        SDValue N00 = N->getOperand(0).getOperand(0);
+        APInt TruncC = N01C->getAPIntValue();
+        TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+        SDLoc DL(N);
 
-      return DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
-                         DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00),
-                         DAG.getConstant(TruncC, TruncVT));
+        return DAG.getNode(ISD::AND, DL, TruncVT,
+                           DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
+                           DAG.getConstant(TruncC, DL, TruncVT));
+      }
     }
   }
 
@@ -4042,15 +4218,14 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
 SDValue DAGCombiner::visitSHL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
   // fold vector ops
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
     // If setcc produces all-one true value then:
@@ -4064,7 +4239,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
-          if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV))
+          if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
+                                                     N01CV, N1CV))
             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
         }
       } else {
@@ -4074,10 +4250,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   }
 
   // fold (shl c1, c2) -> c1<<c2
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
   // fold (shl 0, x) -> 0
-  if (N0C && N0C->isNullValue())
+  if (isNullConstant(N0))
     return N0;
   // fold (shl x, c >= size(x)) -> undef
   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
@@ -4087,11 +4264,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     return N0;
   // fold (shl undef, x) -> 0
   if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // if (shl x, c) is known to be zero, return 0
   if (DAG.MaskedValueIsZero(SDValue(N, 0),
                             APInt::getAllOnesValue(OpSizeInBits)))
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
   if (N1.getOpcode() == ISD::TRUNCATE &&
       N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -4108,10 +4285,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
       uint64_t c1 = N0C1->getZExtValue();
       uint64_t c2 = N1C->getZExtValue();
+      SDLoc DL(N);
       if (c1 + c2 >= OpSizeInBits)
-        return DAG.getConstant(0, VT);
-      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(c1 + c2, N1.getValueType()));
+        return DAG.getConstant(0, DL, VT);
+      return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+                         DAG.getConstant(c1 + c2, DL, N1.getValueType()));
     }
   }
 
@@ -4131,12 +4309,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       EVT InnerShiftVT = N0Op0.getValueType();
       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
       if (c2 >= OpSizeInBits - InnerShiftSize) {
+        SDLoc DL(N0);
         if (c1 + c2 >= OpSizeInBits)
-          return DAG.getConstant(0, VT);
-        return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
-                           DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+          return DAG.getConstant(0, DL, VT);
+        return DAG.getNode(ISD::SHL, DL, VT,
+                           DAG.getNode(N0.getOpcode(), DL, VT,
                                        N0Op0->getOperand(0)),
-                           DAG.getConstant(c1 + c2, N1.getValueType()));
+                           DAG.getConstant(c1 + c2, DL, N1.getValueType()));
       }
     }
   }
@@ -4154,8 +4333,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
         if (c1 == c2) {
           SDValue NewOp0 = N0.getOperand(0);
           EVT CountVT = NewOp0.getOperand(1).getValueType();
-          SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
-                                       NewOp0, DAG.getConstant(c2, CountVT));
+          SDLoc DL(N);
+          SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
+                                       NewOp0,
+                                       DAG.getConstant(c2, DL, CountVT));
           AddToWorklist(NewSHL.getNode());
           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
         }
@@ -4176,25 +4357,30 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
         SDValue Shift;
         if (c2 > c1) {
           Mask = Mask.shl(c2 - c1);
-          Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
-                              DAG.getConstant(c2 - c1, N1.getValueType()));
+          SDLoc DL(N);
+          Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+                              DAG.getConstant(c2 - c1, DL, N1.getValueType()));
         } else {
           Mask = Mask.lshr(c1 - c2);
-          Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
-                              DAG.getConstant(c1 - c2, N1.getValueType()));
+          SDLoc DL(N);
+          Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+                              DAG.getConstant(c1 - c2, DL, N1.getValueType()));
         }
-        return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
-                           DAG.getConstant(Mask, VT));
+        SDLoc DL(N0);
+        return DAG.getNode(ISD::AND, DL, VT, Shift,
+                           DAG.getConstant(Mask, DL, VT));
       }
     }
   }
   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
   if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
     unsigned BitSize = VT.getScalarSizeInBits();
+    SDLoc DL(N);
     SDValue HiBitsMask =
       DAG.getConstant(APInt::getHighBitsSet(BitSize,
-                                            BitSize - N1C->getZExtValue()), VT);
-    return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
+                                            BitSize - N1C->getZExtValue()),
+                      DL, VT);
+    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
                        HiBitsMask);
   }
 
@@ -4210,7 +4396,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
   }
 
-  if (N1C) {
+  if (N1C && !N1C->isOpaque()) {
     SDValue NewSHL = visitShiftByConstant(N, N1C);
     if (NewSHL.getNode())
       return NewSHL;
@@ -4222,27 +4408,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
 SDValue DAGCombiner::visitSRA(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
 
   // fold vector ops
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     N1C = isConstOrConstSplat(N1);
   }
 
   // fold (sra c1, c2) -> (sra c1, c2)
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
   // fold (sra 0, x) -> 0
-  if (N0C && N0C->isNullValue())
+  if (isNullConstant(N0))
     return N0;
   // fold (sra -1, x) -> -1
-  if (N0C && N0C->isAllOnesValue())
+  if (isAllOnesConstant(N0))
     return N0;
   // fold (sra x, (setge c, size(x))) -> undef
   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
@@ -4270,8 +4456,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
       unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
       if (Sum >= OpSizeInBits)
         Sum = OpSizeInBits - 1;
-      return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(Sum, N1.getValueType()));
+      SDLoc DL(N);
+      return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
+                         DAG.getConstant(Sum, DL, N1.getValueType()));
     }
   }
 
@@ -4303,14 +4490,15 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
           TLI.isTruncateFree(VT, TruncVT)) {
 
-          SDValue Amt = DAG.getConstant(ShiftAmt,
-              getShiftAmountTy(N0.getOperand(0).getValueType()));
-          SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT,
-                                      N0.getOperand(0), Amt);
-          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT,
-                                      Shift);
-          return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N),
-                             N->getValueType(0), Trunc);
+        SDLoc DL(N);
+        SDValue Amt = DAG.getConstant(ShiftAmt, DL,
+            getShiftAmountTy(N0.getOperand(0).getValueType()));
+        SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
+                                    N0.getOperand(0), Amt);
+        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
+                                    Shift);
+        return DAG.getNode(ISD::SIGN_EXTEND, DL,
+                           N->getValueType(0), Trunc);
       }
     }
   }
@@ -4337,12 +4525,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
       EVT LargeVT = N0Op0.getValueType();
 
       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+        SDLoc DL(N);
         SDValue Amt =
-          DAG.getConstant(LargeShiftVal + N1C->getZExtValue(),
+          DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
-        SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
+        SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
                                   N0Op0.getOperand(0), Amt);
-        return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+        return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
       }
     }
   }
@@ -4356,7 +4545,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   if (DAG.SignBitIsZero(N0))
     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
 
-  if (N1C) {
+  if (N1C && !N1C->isOpaque()) {
     SDValue NewSRA = visitShiftByConstant(N, N1C);
     if (NewSRA.getNode())
       return NewSRA;
@@ -4368,24 +4557,24 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
 SDValue DAGCombiner::visitSRL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
 
   // fold vector ops
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     N1C = isConstOrConstSplat(N1);
   }
 
   // fold (srl c1, c2) -> c1 >>u c2
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
   // fold (srl 0, x) -> 0
-  if (N0C && N0C->isNullValue())
+  if (isNullConstant(N0))
     return N0;
   // fold (srl x, c >= size(x)) -> undef
   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
@@ -4396,17 +4585,18 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   // if (srl x, c) is known to be zero, return 0
   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
                                    APInt::getAllOnesValue(OpSizeInBits)))
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
 
   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
   if (N1C && N0.getOpcode() == ISD::SRL) {
     if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
       uint64_t c1 = N01C->getZExtValue();
       uint64_t c2 = N1C->getZExtValue();
+      SDLoc DL(N);
       if (c1 + c2 >= OpSizeInBits)
-        return DAG.getConstant(0, VT);
-      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(c1 + c2, N1.getValueType()));
+        return DAG.getConstant(0, DL, VT);
+      return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+                         DAG.getConstant(c1 + c2, DL, N1.getValueType()));
     }
   }
 
@@ -4422,12 +4612,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
     // This is only valid if the OpSizeInBits + c1 = size of inner shift.
     if (c1 + OpSizeInBits == InnerShiftSize) {
+      SDLoc DL(N0);
       if (c1 + c2 >= InnerShiftSize)
-        return DAG.getConstant(0, VT);
-      return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT,
-                         DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT,
+        return DAG.getConstant(0, DL, VT);
+      return DAG.getNode(ISD::TRUNCATE, DL, VT,
+                         DAG.getNode(ISD::SRL, DL, InnerShiftVT,
                                      N0.getOperand(0)->getOperand(0),
-                                     DAG.getConstant(c1 + c2, ShiftCountVT)));
+                                     DAG.getConstant(c1 + c2, DL,
+                                                     ShiftCountVT)));
     }
   }
 
@@ -4436,8 +4628,9 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     unsigned BitSize = N0.getScalarValueSizeInBits();
     if (BitSize <= 64) {
       uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
-      return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(~0ULL >> ShAmt, VT));
+      SDLoc DL(N);
+      return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
+                         DAG.getConstant(~0ULL >> ShAmt, DL, VT));
     }
   }
 
@@ -4451,14 +4644,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
       uint64_t ShiftAmt = N1C->getZExtValue();
-      SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT,
+      SDLoc DL0(N0);
+      SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
                                        N0.getOperand(0),
-                          DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
+                          DAG.getConstant(ShiftAmt, DL0,
+                                          getShiftAmountTy(SmallVT)));
       AddToWorklist(SmallShift.getNode());
       APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
-      return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                         DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
-                         DAG.getConstant(Mask, VT));
+      SDLoc DL(N);
+      return DAG.getNode(ISD::AND, DL, VT,
+                         DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
+                         DAG.getConstant(Mask, DL, VT));
     }
   }
 
@@ -4477,12 +4673,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
     // If any of the input bits are KnownOne, then the input couldn't be all
     // zeros, thus the result of the srl will always be zero.
-    if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+    if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
 
     // If all of the bits input the to ctlz node are known to be zero, then
     // the result of the ctlz is "32" and the result of the shift is one.
     APInt UnknownBits = ~KnownZero;
-    if (UnknownBits == 0) return DAG.getConstant(1, VT);
+    if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
 
     // Otherwise, check to see if there is exactly one bit input to the ctlz.
     if ((UnknownBits & (UnknownBits - 1)) == 0) {
@@ -4494,13 +4690,16 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       SDValue Op = N0.getOperand(0);
 
       if (ShAmt) {
-        Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op,
-                  DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
+        SDLoc DL(N0);
+        Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+                  DAG.getConstant(ShAmt, DL,
+                                  getShiftAmountTy(Op.getValueType())));
         AddToWorklist(Op.getNode());
       }
 
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT,
-                         Op, DAG.getConstant(1, VT));
+      SDLoc DL(N);
+      return DAG.getNode(ISD::XOR, DL, VT,
+                         Op, DAG.getConstant(1, DL, VT));
     }
   }
 
@@ -4517,7 +4716,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
-  if (N1C) {
+  if (N1C && !N1C->isOpaque()) {
     SDValue NewSRL = visitShiftByConstant(N, N1C);
     if (NewSRL.getNode())
       return NewSRL;
@@ -4651,23 +4850,19 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   SDValue N2 = N->getOperand(2);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
   EVT VT = N->getValueType(0);
   EVT VT0 = N0.getValueType();
 
   // fold (select C, X, X) -> X
   if (N1 == N2)
     return N1;
-  // fold (select true, X, Y) -> X
-  if (N0C && !N0C->isNullValue())
-    return N1;
-  // fold (select false, X, Y) -> Y
-  if (N0C && N0C->isNullValue())
-    return N2;
+  if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
+    // fold (select true, X, Y) -> X
+    // fold (select false, X, Y) -> Y
+    return !N0C->isNullValue() ? N1 : N2;
+  }
   // fold (select C, 1, X) -> (or C, X)
-  if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
+  if (VT == MVT::i1 && isOneConstant(N1))
     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
   // fold (select C, 0, 1) -> (xor C, 1)
   // We can't do this reliably if integer based booleans have different contents
@@ -4684,40 +4879,43 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
                               TLI.getBooleanContents(false, true) &&
                           TLI.getBooleanContents(false, false) ==
                               TargetLowering::ZeroOrOneBooleanContent)) &&
-      N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
+      isNullConstant(N1) && isOneConstant(N2)) {
     SDValue XORNode;
-    if (VT == VT0)
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT0,
-                         N0, DAG.getConstant(1, VT0));
-    XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0,
-                          N0, DAG.getConstant(1, VT0));
+    if (VT == VT0) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::XOR, DL, VT0,
+                         N0, DAG.getConstant(1, DL, VT0));
+    }
+    SDLoc DL0(N0);
+    XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
+                          N0, DAG.getConstant(1, DL0, VT0));
     AddToWorklist(XORNode.getNode());
     if (VT.bitsGT(VT0))
       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
   }
   // fold (select C, 0, X) -> (and (not C), X)
-  if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
+  if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
     AddToWorklist(NOTNode.getNode());
     return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
   }
   // fold (select C, X, 1) -> (or (not C), X)
-  if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
+  if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
     AddToWorklist(NOTNode.getNode());
     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
   }
   // fold (select C, X, 0) -> (and C, X)
-  if (VT == MVT::i1 && N2C && N2C->isNullValue())
+  if (VT == MVT::i1 && isNullConstant(N2))
     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
   // fold (select X, X, Y) -> (or X, Y)
   // fold (select X, 1, Y) -> (or X, Y)
-  if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
+  if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
   // fold (select X, Y, X) -> (and X, Y)
   // fold (select X, Y, 0) -> (and X, Y)
-  if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
+  if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
 
   // If we can fold this based on the true/false value, do so.
@@ -4757,6 +4955,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     return SimplifySelect(SDLoc(N), N0, N1, N2);
   }
 
+  if (VT0 == MVT::i1) {
+    if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+      // select (and Cond0, Cond1), X, Y
+      //   -> select Cond0, (select Cond1, X, Y), Y
+      if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+        SDValue Cond0 = N0->getOperand(0);
+        SDValue Cond1 = N0->getOperand(1);
+        SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+                                          N1.getValueType(), Cond1, N1, N2);
+        return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
+                           InnerSelect, N2);
+      }
+      // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+      if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+        SDValue Cond0 = N0->getOperand(0);
+        SDValue Cond1 = N0->getOperand(1);
+        SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+                                          N1.getValueType(), Cond1, N1, N2);
+        return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
+                           InnerSelect);
+      }
+    }
+
+    // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+    if (N1->getOpcode() == ISD::SELECT) {
+      SDValue N1_0 = N1->getOperand(0);
+      SDValue N1_1 = N1->getOperand(1);
+      SDValue N1_2 = N1->getOperand(2);
+      if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
+        // Create the actual and node if we can generate good code for it.
+        if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+          SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
+                                    N0, N1_0);
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
+                             N1_1, N2);
+        }
+        // Otherwise see if we can optimize the "and" to a better pattern.
+        if (SDValue Combined = visitANDLike(N0, N1_0, N))
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+                             N1_1, N2);
+      }
+    }
+    // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+    if (N2->getOpcode() == ISD::SELECT) {
+      SDValue N2_0 = N2->getOperand(0);
+      SDValue N2_1 = N2->getOperand(1);
+      SDValue N2_2 = N2->getOperand(2);
+      if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
+        // Create the actual or node if we can generate good code for it.
+        if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+          SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
+                                   N0, N2_0);
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
+                             N1, N2_2);
+        }
+        // Otherwise see if we can optimize to a better pattern.
+        if (SDValue Combined = visitORLike(N0, N2_0, N))
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+                             N1, N2_2);
+      }
+    }
+  }
+
   return SDValue();
 }
 
@@ -4832,6 +5093,67 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
 }
 
+SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
+  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+  SDValue Mask = MSC->getMask();
+  SDValue Data  = MSC->getValue();
+  SDLoc DL(N);
+
+  // If the MSCATTER data type requires splitting and the mask is provided by a
+  // SETCC, then split both nodes and its operands before legalization. This
+  // prevents the type legalizer from unrolling SETCC into scalar comparisons
+  // and enables future optimizations (e.g. min/max pattern matching on X86).
+  if (Mask.getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  // Check if any splitting is required.
+  if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+      TargetLowering::TypeSplitVector)
+    return SDValue();
+  SDValue MaskLo, MaskHi, Lo, Hi;
+  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+  EVT LoVT, HiVT;
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
+
+  SDValue Chain = MSC->getChain();
+
+  EVT MemoryVT = MSC->getMemoryVT();
+  unsigned Alignment = MSC->getOriginalAlignment();
+
+  EVT LoMemVT, HiMemVT;
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+  SDValue DataLo, DataHi;
+  std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+  SDValue BasePtr = MSC->getBasePtr();
+  SDValue IndexLo, IndexHi;
+  std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
+
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MSC->getPointerInfo(), 
+                          MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
+                          Alignment, MSC->getAAInfo(), MSC->getRanges());
+
+  SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
+  Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
+                            DL, OpsLo, MMO);
+
+  SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
+  Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+                            DL, OpsHi, MMO);
+
+  AddToWorklist(Lo.getNode());
+  AddToWorklist(Hi.getNode());
+
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
 
   if (Level >= AfterLegalizeTypes)
@@ -4878,7 +5200,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
 
     MachineMemOperand *MMO = DAG.getMachineFunction().
-      getMachineMemOperand(MST->getPointerInfo(), 
+      getMachineMemOperand(MST->getPointerInfo(),
                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
                            Alignment, MST->getAAInfo(), MST->getRanges());
 
@@ -4887,10 +5209,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
 
     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
 
     MMO = DAG.getMachineFunction().
-      getMachineMemOperand(MST->getPointerInfo(), 
+      getMachineMemOperand(MST->getPointerInfo(),
                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
                            SecondHalfAlignment, MST->getAAInfo(),
                            MST->getRanges());
@@ -4906,6 +5228,83 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitMGATHER(SDNode *N) {
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
+  MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
+  SDValue Mask = MGT->getMask();
+  SDLoc DL(N);
+
+  // If the MGATHER result requires splitting and the mask is provided by a
+  // SETCC, then split both nodes and its operands before legalization. This
+  // prevents the type legalizer from unrolling SETCC into scalar comparisons
+  // and enables future optimizations (e.g. min/max pattern matching on X86).
+
+  if (Mask.getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+
+  // Check if any splitting is required.
+  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
+      TargetLowering::TypeSplitVector)
+    return SDValue();
+
+  SDValue MaskLo, MaskHi, Lo, Hi;
+  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+  SDValue Src0 = MGT->getValue();
+  SDValue Src0Lo, Src0Hi;
+  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+
+  EVT LoVT, HiVT;
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+
+  SDValue Chain = MGT->getChain();
+  EVT MemoryVT = MGT->getMemoryVT();
+  unsigned Alignment = MGT->getOriginalAlignment();
+
+  EVT LoMemVT, HiMemVT;
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+  SDValue BasePtr = MGT->getBasePtr();
+  SDValue Index = MGT->getIndex();
+  SDValue IndexLo, IndexHi;
+  std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MGT->getPointerInfo(), 
+                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
+                          Alignment, MGT->getAAInfo(), MGT->getRanges());
+
+  SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
+  Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
+                            MMO);
+
+  SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
+  Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
+                            MMO);
+
+  AddToWorklist(Lo.getNode());
+  AddToWorklist(Hi.getNode());
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
+                      Hi.getValue(1));
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
+
+  SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+
+  SDValue RetOps[] = { GatherRes, Chain };
+  return DAG.getMergeValues(RetOps, DL);
+}
+
 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
 
   if (Level >= AfterLegalizeTypes)
@@ -4953,7 +5352,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
     MachineMemOperand *MMO = DAG.getMachineFunction().
-    getMachineMemOperand(MLD->getPointerInfo(), 
+    getMachineMemOperand(MLD->getPointerInfo(),
                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
                          Alignment, MLD->getAAInfo(), MLD->getRanges());
 
@@ -4962,10 +5361,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
 
     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
 
     MMO = DAG.getMachineFunction().
-    getMachineMemOperand(MLD->getPointerInfo(), 
+    getMachineMemOperand(MLD->getPointerInfo(),
                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
 
@@ -5021,7 +5420,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
       EVT VT = LHS.getValueType();
       SDValue Shift = DAG.getNode(
           ISD::SRA, DL, VT, LHS,
-          DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
+          DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
       AddToWorklist(Shift.getNode());
       AddToWorklist(Add.getNode());
@@ -5029,6 +5428,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
     }
   }
 
+  if (SimplifySelectOps(N, N1, N2))
+    return SDValue(N, 0);  // Don't revisit N.
+
   // If the VSELECT result requires splitting and the mask is provided by a
   // SETCC, then split both nodes and its operands before legalization. This
   // prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -5141,7 +5543,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   EVT VT = N->getValueType(0);
 
   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
-         Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!");
+         Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
+         && "Expected EXTEND dag node in input!");
 
   // fold (sext c1) -> c1
   // fold (zext c1) -> c1
@@ -5163,7 +5566,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
   unsigned ShAmt = VTBits - EVTBits;
   SmallVector<SDValue, 8> Elts;
-  unsigned NumElts = N0->getNumOperands();
+  unsigned NumElts = VT.getVectorNumElements();
   SDLoc DL(N);
 
   for (unsigned i=0; i != NumElts; ++i) {
@@ -5173,14 +5576,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
       continue;
     }
 
+    SDLoc DL(Op);
     ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
     const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
-    if (Opcode == ISD::SIGN_EXTEND)
+    if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
       Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
-                                     SVT));
+                                     DL, SVT));
     else
       Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(),
-                                     SVT));
+                                     DL, SVT));
   }
 
   return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
@@ -5271,6 +5675,102 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
   }
 }
 
+// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
+SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT DstVT = N->getValueType(0);
+  EVT SrcVT = N0.getValueType();
+
+  assert((N->getOpcode() == ISD::SIGN_EXTEND ||
+          N->getOpcode() == ISD::ZERO_EXTEND) &&
+         "Unexpected node type (not an extend)!");
+
+  // fold (sext (load x)) to multiple smaller sextloads; same for zext.
+  // For example, on a target with legal v4i32, but illegal v8i32, turn:
+  //   (v8i32 (sext (v8i16 (load x))))
+  // into:
+  //   (v8i32 (concat_vectors (v4i32 (sextload x)),
+  //                          (v4i32 (sextload (x + 16)))))
+  // Where uses of the original load, i.e.:
+  //   (v8i16 (load x))
+  // are replaced with:
+  //   (v8i16 (truncate
+  //     (v8i32 (concat_vectors (v4i32 (sextload x)),
+  //                            (v4i32 (sextload (x + 16)))))))
+  //
+  // This combine is only applicable to illegal, but splittable, vectors.
+  // All legal types, and illegal non-vector types, are handled elsewhere.
+  // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
+  //
+  if (N0->getOpcode() != ISD::LOAD)
+    return SDValue();
+
+  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+
+  if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
+      !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
+      !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+    return SDValue();
+
+  SmallVector<SDNode *, 4> SetCCs;
+  if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
+    return SDValue();
+
+  ISD::LoadExtType ExtType =
+      N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+
+  // Try to split the vector types to get down to legal types.
+  EVT SplitSrcVT = SrcVT;
+  EVT SplitDstVT = DstVT;
+  while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
+         SplitSrcVT.getVectorNumElements() > 1) {
+    SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
+    SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
+  }
+
+  if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
+    return SDValue();
+
+  SDLoc DL(N);
+  const unsigned NumSplits =
+      DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
+  const unsigned Stride = SplitSrcVT.getStoreSize();
+  SmallVector<SDValue, 4> Loads;
+  SmallVector<SDValue, 4> Chains;
+
+  SDValue BasePtr = LN0->getBasePtr();
+  for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
+    const unsigned Offset = Idx * Stride;
+    const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
+
+    SDValue SplitLoad = DAG.getExtLoad(
+        ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
+        LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT,
+        LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(),
+        Align, LN0->getAAInfo());
+
+    BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+                          DAG.getConstant(Stride, DL, BasePtr.getValueType()));
+
+    Loads.push_back(SplitLoad.getValue(0));
+    Chains.push_back(SplitLoad.getValue(1));
+  }
+
+  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+  SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
+
+  CombineTo(N, NewValue);
+
+  // Replace uses of the original load (before extension)
+  // with a truncate of the concatenated sextloaded vectors.
+  SDValue Trunc =
+      DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
+  CombineTo(N0.getNode(), Trunc, NewChain);
+  ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
+                  (ISD::NodeType)N->getOpcode());
+  return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -5337,17 +5837,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   }
 
   // fold (sext (load x)) -> (sext (truncate (sextload x)))
-  // None of the supported targets knows how to perform load and sign extend
-  // on vectors in one instruction.  We only perform this transformation on
-  // scalars.
-  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
-      ISD::isUNINDEXEDLoad(N0.getNode()) &&
-      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+  // Only generate vector extloads when 1) they're legal, and 2) they are
+  // deemed desirable by the target.
+  if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      ((!LegalOperations && !VT.isVector() &&
+        !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
     bool DoXform = true;
     SmallVector<SDNode*, 4> SetCCs;
     if (!N0.hasOneUse())
       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+    if (VT.isVector())
+      DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
     if (DoXform) {
       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
@@ -5364,6 +5865,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     }
   }
 
+  // fold (sext (load x)) to multiple smaller sextloads.
+  // Only on illegal but splittable vectors.
+  if (SDValue ExtLoad = CombineExtLoad(N))
+    return ExtLoad;
+
   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
@@ -5407,14 +5913,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                                          LN0->getMemOperand());
         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
         Mask = Mask.sext(VT.getSizeInBits());
-        SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
-                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDLoc DL(N);
+        SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+                                  ExtLoad, DAG.getConstant(Mask, DL, VT));
         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
                                     SDLoc(N0.getOperand(0)),
                                     N0.getOperand(0).getValueType(), ExtLoad);
         CombineTo(N, And);
         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
-        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
                         ISD::SIGN_EXTEND);
         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
       }
@@ -5457,11 +5964,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
 
     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
     unsigned ElementWidth = VT.getScalarType().getSizeInBits();
+    SDLoc DL(N);
     SDValue NegOne =
-      DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
+      DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
     SDValue SCC =
-      SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
-                       NegOne, DAG.getConstant(0, VT),
+      SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+                       NegOne, DAG.getConstant(0, DL, VT),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
     if (SCC.getNode()) return SCC;
 
@@ -5473,7 +5981,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
         SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
                                      N0.getOperand(0), N0.getOperand(1), CC);
         return DAG.getSelect(DL, VT, SetCC,
-                             NegOne, DAG.getConstant(0, VT));
+                             NegOne, DAG.getConstant(0, DL, VT));
       }
     }
   }
@@ -5507,11 +6015,9 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
   SDValue Op1 = N->getOperand(1);
   assert(Op0.getValueType() == Op1.getValueType());
 
-  ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
-  ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
-  if (COp0 && COp0->isNullValue())
+  if (isNullConstant(Op0))
     Op = Op1;
-  else if (COp1 && COp1->isNullValue())
+  else if (isNullConstant(Op1))
     Op = Op0;
   else
     return false;
@@ -5622,22 +6128,24 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     }
     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
     Mask = Mask.zext(VT.getSizeInBits());
-    return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                       X, DAG.getConstant(Mask, VT));
+    SDLoc DL(N);
+    return DAG.getNode(ISD::AND, DL, VT,
+                       X, DAG.getConstant(Mask, DL, VT));
   }
 
   // fold (zext (load x)) -> (zext (truncate (zextload x)))
-  // None of the supported targets knows how to perform load and vector_zext
-  // on vectors in one instruction.  We only perform this transformation on
-  // scalars.
-  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
-      ISD::isUNINDEXEDLoad(N0.getNode()) &&
-      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+  // Only generate vector extloads when 1) they're legal, and 2) they are
+  // deemed desirable by the target.
+  if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      ((!LegalOperations && !VT.isVector() &&
+        !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
     bool DoXform = true;
     SmallVector<SDNode*, 4> SetCCs;
     if (!N0.hasOneUse())
       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+    if (VT.isVector())
+      DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
     if (DoXform) {
       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
@@ -5655,6 +6163,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     }
   }
 
+  // fold (zext (load x)) to multiple smaller zextloads.
+  // Only on illegal but splittable vectors.
+  if (SDValue ExtLoad = CombineExtLoad(N))
+    return ExtLoad;
+
   // fold (zext (and/or/xor (load x), cst)) ->
   //      (and/or/xor (zextload x), (zext cst))
   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
@@ -5677,14 +6190,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                          LN0->getMemOperand());
         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
         Mask = Mask.zext(VT.getSizeInBits());
-        SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
-                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDLoc DL(N);
+        SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+                                  ExtLoad, DAG.getConstant(Mask, DL, VT));
         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
                                     SDLoc(N0.getOperand(0)),
                                     N0.getOperand(0).getValueType(), ExtLoad);
         CombineTo(N, And);
         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
-        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
                         ISD::ZERO_EXTEND);
         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
       }
@@ -5722,19 +6236,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
       // Only do this before legalize for now.
       EVT EltVT = VT.getVectorElementType();
+      SDLoc DL(N);
       SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
-                                    DAG.getConstant(1, EltVT));
+                                    DAG.getConstant(1, DL, EltVT));
       if (VT.getSizeInBits() == N0VT.getSizeInBits())
         // We know that the # elements of the results is the same as the
         // # elements of the compare (and the # elements of the compare result
         // for that matter).  Check to see that they are the same size.  If so,
         // we know that the element size of the sext'd result matches the
         // element size of the compare operands.
-        return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                           DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
+        return DAG.getNode(ISD::AND, DL, VT,
+                           DAG.getSetCC(DL, VT, N0.getOperand(0),
                                          N0.getOperand(1),
                                  cast<CondCodeSDNode>(N0.getOperand(2))->get()),
-                           DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
+                           DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
                                        OneOps));
 
       // If the desired elements are smaller or larger than the source
@@ -5747,18 +6262,19 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
         EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
                          N0VT.getVectorNumElements());
       SDValue VsetCC =
-        DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
+        DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0),
                       N0.getOperand(1),
                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
-      return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                         DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT),
-                         DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps));
+      return DAG.getNode(ISD::AND, DL, VT,
+                         DAG.getSExtOrTrunc(VsetCC, DL, VT),
+                         DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps));
     }
 
     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+    SDLoc DL(N);
     SDValue SCC =
-      SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
-                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+      SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
     if (SCC.getNode()) return SCC;
   }
@@ -5850,8 +6366,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
     }
     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
     Mask = Mask.zext(VT.getSizeInBits());
-    return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                       X, DAG.getConstant(Mask, VT));
+    SDLoc DL(N);
+    return DAG.getNode(ISD::AND, DL, VT,
+                       X, DAG.getConstant(Mask, DL, VT));
   }
 
   // fold (aext (load x)) -> (aext (truncate (extload x)))
@@ -5934,9 +6451,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
     }
 
     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+    SDLoc DL(N);
     SDValue SCC =
-      SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
-                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+      SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
     if (SCC.getNode())
       return SCC;
@@ -5957,7 +6475,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
     const APInt &CVal = CV->getAPIntValue();
     APInt NewVal = CVal & Mask;
     if (NewVal != CVal)
-      return DAG.getConstant(NewVal, V.getValueType());
+      return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
     break;
   }
   case ISD::OR:
@@ -5972,7 +6490,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
     // Only look at single-use SRLs.
     if (!V.getNode()->hasOneUse())
       break;
-    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+    if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
       // See if we can recursively simplify the LHS.
       unsigned Amt = RHSC->getZExtValue();
 
@@ -6117,9 +6635,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
 
   uint64_t PtrOff = ShAmt / 8;
   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
-  SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0),
+  SDLoc DL(LN0);
+  SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
                                PtrType, LN0->getBasePtr(),
-                               DAG.getConstant(PtrOff, PtrType));
+                               DAG.getConstant(PtrOff, DL, PtrType));
   AddToWorklist(NewPtr.getNode());
 
   SDValue Load;
@@ -6148,11 +6667,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
     // no larger than the source) then the useful bits of the result are
     // zero; we can't simply return the shortened shift, because the result
     // of that operation is undefined.
+    SDLoc DL(N0);
     if (ShLeftAmt >= VT.getSizeInBits())
-      Result = DAG.getConstant(0, VT);
+      Result = DAG.getConstant(0, DL, VT);
     else
-      Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT,
-                          Result, DAG.getConstant(ShLeftAmt, ShImmTy));
+      Result = DAG.getNode(ISD::SHL, DL, VT,
+                          Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
   }
 
   // Return the new loaded value.
@@ -6279,7 +6799,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
       ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
       const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
       Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
-                                     Op.getValueType()));
+                                     SDLoc(Op), Op.getValueType()));
     }
 
     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
@@ -6288,6 +6808,20 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getUNDEF(VT);
+
+  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+                                              LegalOperations))
+    return SDValue(Res, 0);
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -6297,7 +6831,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   if (N0.getValueType() == N->getValueType(0))
     return N0;
   // fold (truncate c1) -> c1
-  if (isa<ConstantSDNode>(N0))
+  if (isConstantIntBuildVectorOrConstantInt(N0))
     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
   // fold (truncate (truncate x)) -> (truncate x)
   if (N0.getOpcode() == ISD::TRUNCATE)
@@ -6350,9 +6884,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
                               NVT, N0.getOperand(0));
 
+      SDLoc DL(N);
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
-                         SDLoc(N), TrTy, V,
-                         DAG.getConstant(Index, IndexTy));
+                         DL, TrTy, V,
+                         DAG.getConstant(Index, DL, IndexTy));
     }
   }
 
@@ -6598,13 +7133,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
                                   N0.getOperand(0));
     AddToWorklist(NewConv.getNode());
 
+    SDLoc DL(N);
     APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
     if (N0.getOpcode() == ISD::FNEG)
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT,
-                         NewConv, DAG.getConstant(SignBit, VT));
+      return DAG.getNode(ISD::XOR, DL, VT,
+                         NewConv, DAG.getConstant(SignBit, DL, VT));
     assert(N0.getOpcode() == ISD::FABS);
-    return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                       NewConv, DAG.getConstant(~SignBit, VT));
+    return DAG.getNode(ISD::AND, DL, VT,
+                       NewConv, DAG.getConstant(~SignBit, DL, VT));
   }
 
   // fold (bitconvert (fcopysign cst, x)) ->
@@ -6629,9 +7165,11 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
       } else if (OrigXWidth > VTWidth) {
         // To get the sign bit in the right place, we have to shift it right
         // before truncating.
-        X = DAG.getNode(ISD::SRL, SDLoc(X),
+        SDLoc DL(X);
+        X = DAG.getNode(ISD::SRL, DL,
                         X.getValueType(), X,
-                        DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+                        DAG.getConstant(OrigXWidth-VTWidth, DL,
+                                        X.getValueType()));
         AddToWorklist(X.getNode());
         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
         AddToWorklist(X.getNode());
@@ -6639,13 +7177,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
 
       APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
-                      X, DAG.getConstant(SignBit, VT));
+                      X, DAG.getConstant(SignBit, SDLoc(X), VT));
       AddToWorklist(X.getNode());
 
       SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
                                 VT, N0.getOperand(0));
       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
-                        Cst, DAG.getConstant(~SignBit, VT));
+                        Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
       AddToWorklist(Cst.getNode());
 
       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
@@ -6659,6 +7197,51 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
       return CombineLD;
   }
 
+  // Remove double bitcasts from shuffles - this is often a legacy of
+  // XformToShuffleWithZero being used to combine bitmaskings (of
+  // float vectors bitcast to integer vectors) into shuffles.
+  // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
+  if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
+      N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
+      VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
+      !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
+    ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
+
+    // If operands are a bitcast, peek through if it casts the original VT.
+    // If operands are a UNDEF or constant, just bitcast back to original VT.
+    auto PeekThroughBitcast = [&](SDValue Op) {
+      if (Op.getOpcode() == ISD::BITCAST &&
+          Op.getOperand(0)->getValueType(0) == VT)
+        return SDValue(Op.getOperand(0));
+      if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+          ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+        return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+      return SDValue();
+    };
+
+    SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
+    SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
+    if (!(SV0 && SV1))
+      return SDValue();
+
+    int MaskScale =
+        VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
+    SmallVector<int, 8> NewMask;
+    for (int M : SVN->getMask())
+      for (int i = 0; i != MaskScale; ++i)
+        NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
+
+    bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
+    if (!LegalMask) {
+      std::swap(SV0, SV1);
+      ShuffleVectorSDNode::commuteMask(NewMask);
+      LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
+    }
+
+    if (LegalMask)
+      return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
+  }
+
   return SDValue();
 }
 
@@ -6727,6 +7310,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
   }
 
+  SDLoc DL(BV);
+
   // Okay, we know the src/dst types are both integers of differing types.
   // Handling growing first.
   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
@@ -6753,16 +7338,15 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
       if (EltIsUndef)
         Ops.push_back(DAG.getUNDEF(DstEltVT));
       else
-        Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+        Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
     }
 
     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
-    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
+    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
   }
 
   // Finally, this must be the case where we are shrinking elements: each input
   // turns into multiple outputs.
-  bool isS2V = ISD::isScalarToVector(BV);
   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
                             NumOutputsPerInput*BV->getNumOperands());
@@ -6770,8 +7354,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
 
   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
     if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
-      for (unsigned j = 0; j != NumOutputsPerInput; ++j)
-        Ops.push_back(DAG.getUNDEF(DstEltVT));
+      Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
       continue;
     }
 
@@ -6780,11 +7363,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
 
     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
       APInt ThisVal = OpVal.trunc(DstBitSize);
-      Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
-      if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
-        // Simply turn this into a SCALAR_TO_VECTOR of the new type.
-        return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
-                           Ops[0]);
+      Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
       OpVal = OpVal.lshr(DstBitSize);
     }
 
@@ -6793,7 +7372,450 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
   }
 
-  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+}
+
+/// Try to perform FMA combining on a given FADD node.
+SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N->getValueType(0);
+  SDLoc SL(N);
+
+  const TargetOptions &Options = DAG.getTarget().Options;
+  bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+                       Options.UnsafeFPMath);
+
+  // Floating-point multiply-add with intermediate rounding.
+  bool HasFMAD = (LegalOperations &&
+                  TLI.isOperationLegal(ISD::FMAD, VT));
+
+  // Floating-point multiply-add without intermediate rounding.
+  bool HasFMA = ((!LegalOperations ||
+                  TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
+                 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+                 UnsafeFPMath);
+
+  // No valid opcode, do not combine.
+  if (!HasFMAD && !HasFMA)
+    return SDValue();
+
+  // Always prefer FMAD to FMA for precision.
+  unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+  bool LookThroughFPExt = TLI.isFPExtFree(VT);
+
+  // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+  if (N0.getOpcode() == ISD::FMUL &&
+      (Aggressive || N0->hasOneUse())) {
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                       N0.getOperand(0), N0.getOperand(1), N1);
+  }
+
+  // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+  // Note: Commutes FADD operands.
+  if (N1.getOpcode() == ISD::FMUL &&
+      (Aggressive || N1->hasOneUse())) {
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                       N1.getOperand(0), N1.getOperand(1), N0);
+  }
+
+  // Look through FP_EXTEND nodes to do more combining.
+  if (UnsafeFPMath && LookThroughFPExt) {
+    // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+    if (N0.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FMUL)
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N00.getOperand(0)),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N00.getOperand(1)), N1);
+    }
+
+    // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+    // Note: Commutes FADD operands.
+    if (N1.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N10 = N1.getOperand(0);
+      if (N10.getOpcode() == ISD::FMUL)
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N10.getOperand(0)),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N10.getOperand(1)), N0);
+    }
+  }
+
+  // More folding opportunities when target permits.
+  if ((UnsafeFPMath || HasFMAD)  && Aggressive) {
+    // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
+    if (N0.getOpcode() == PreferredFusedOpcode &&
+        N0.getOperand(2).getOpcode() == ISD::FMUL) {
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         N0.getOperand(0), N0.getOperand(1),
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                     N0.getOperand(2).getOperand(0),
+                                     N0.getOperand(2).getOperand(1),
+                                     N1));
+    }
+
+    // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
+    if (N1->getOpcode() == PreferredFusedOpcode &&
+        N1.getOperand(2).getOpcode() == ISD::FMUL) {
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         N1.getOperand(0), N1.getOperand(1),
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                     N1.getOperand(2).getOperand(0),
+                                     N1.getOperand(2).getOperand(1),
+                                     N0));
+    }
+
+    if (UnsafeFPMath && LookThroughFPExt) {
+      // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+      //   -> (fma x, y, (fma (fpext u), (fpext v), z))
+      auto FoldFAddFMAFPExtFMul = [&] (
+          SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+        return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
+                           DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+                                       Z));
+      };
+      if (N0.getOpcode() == PreferredFusedOpcode) {
+        SDValue N02 = N0.getOperand(2);
+        if (N02.getOpcode() == ISD::FP_EXTEND) {
+          SDValue N020 = N02.getOperand(0);
+          if (N020.getOpcode() == ISD::FMUL)
+            return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
+                                        N020.getOperand(0), N020.getOperand(1),
+                                        N1);
+        }
+      }
+
+      // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+      //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+      // FIXME: This turns two single-precision and one double-precision
+      // operation into two double-precision operations, which might not be
+      // interesting for all targets, especially GPUs.
+      auto FoldFAddFPExtFMAFMul = [&] (
+          SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+                           DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+                                       Z));
+      };
+      if (N0.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N00 = N0.getOperand(0);
+        if (N00.getOpcode() == PreferredFusedOpcode) {
+          SDValue N002 = N00.getOperand(2);
+          if (N002.getOpcode() == ISD::FMUL)
+            return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
+                                        N002.getOperand(0), N002.getOperand(1),
+                                        N1);
+        }
+      }
+
+      // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
+      //   -> (fma y, z, (fma (fpext u), (fpext v), x))
+      if (N1.getOpcode() == PreferredFusedOpcode) {
+        SDValue N12 = N1.getOperand(2);
+        if (N12.getOpcode() == ISD::FP_EXTEND) {
+          SDValue N120 = N12.getOperand(0);
+          if (N120.getOpcode() == ISD::FMUL)
+            return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
+                                        N120.getOperand(0), N120.getOperand(1),
+                                        N0);
+        }
+      }
+
+      // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
+      //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
+      // FIXME: This turns two single-precision and one double-precision
+      // operation into two double-precision operations, which might not be
+      // interesting for all targets, especially GPUs.
+      if (N1.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N10 = N1.getOperand(0);
+        if (N10.getOpcode() == PreferredFusedOpcode) {
+          SDValue N102 = N10.getOperand(2);
+          if (N102.getOpcode() == ISD::FMUL)
+            return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
+                                        N102.getOperand(0), N102.getOperand(1),
+                                        N0);
+        }
+      }
+    }
+  }
+
+  return SDValue();
+}
+
+/// Try to perform FMA combining on a given FSUB node.
+SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N->getValueType(0);
+  SDLoc SL(N);
+
+  const TargetOptions &Options = DAG.getTarget().Options;
+  bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+                       Options.UnsafeFPMath);
+
+  // Floating-point multiply-add with intermediate rounding.
+  bool HasFMAD = (LegalOperations &&
+                  TLI.isOperationLegal(ISD::FMAD, VT));
+
+  // Floating-point multiply-add without intermediate rounding.
+  bool HasFMA = ((!LegalOperations ||
+                  TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
+                 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+                 UnsafeFPMath);
+
+  // No valid opcode, do not combine.
+  if (!HasFMAD && !HasFMA)
+    return SDValue();
+
+  // Always prefer FMAD to FMA for precision.
+  unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+  bool LookThroughFPExt = TLI.isFPExtFree(VT);
+
+  // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+  if (N0.getOpcode() == ISD::FMUL &&
+      (Aggressive || N0->hasOneUse())) {
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                       N0.getOperand(0), N0.getOperand(1),
+                       DAG.getNode(ISD::FNEG, SL, VT, N1));
+  }
+
+  // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+  // Note: Commutes FSUB operands.
+  if (N1.getOpcode() == ISD::FMUL &&
+      (Aggressive || N1->hasOneUse()))
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                       DAG.getNode(ISD::FNEG, SL, VT,
+                                   N1.getOperand(0)),
+                       N1.getOperand(1), N0);
+
+  // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+  if (N0.getOpcode() == ISD::FNEG &&
+      N0.getOperand(0).getOpcode() == ISD::FMUL &&
+      (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
+    SDValue N00 = N0.getOperand(0).getOperand(0);
+    SDValue N01 = N0.getOperand(0).getOperand(1);
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                       DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
+                       DAG.getNode(ISD::FNEG, SL, VT, N1));
+  }
+
+  // Look through FP_EXTEND nodes to do more combining.
+  if (UnsafeFPMath && LookThroughFPExt) {
+    // fold (fsub (fpext (fmul x, y)), z)
+    //   -> (fma (fpext x), (fpext y), (fneg z))
+    if (N0.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FMUL)
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N00.getOperand(0)),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N00.getOperand(1)),
+                           DAG.getNode(ISD::FNEG, SL, VT, N1));
+    }
+
+    // fold (fsub x, (fpext (fmul y, z)))
+    //   -> (fma (fneg (fpext y)), (fpext z), x)
+    // Note: Commutes FSUB operands.
+    if (N1.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N10 = N1.getOperand(0);
+      if (N10.getOpcode() == ISD::FMUL)
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FNEG, SL, VT,
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                   N10.getOperand(0))),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N10.getOperand(1)),
+                           N0);
+    }
+
+    // fold (fsub (fpext (fneg (fmul, x, y))), z)
+    //   -> (fneg (fma (fpext x), (fpext y), z))
+    // Note: This could be removed with appropriate canonicalization of the
+    // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+    // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+    // from implementing the canonicalization in visitFSUB.
+    if (N0.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FNEG) {
+        SDValue N000 = N00.getOperand(0);
+        if (N000.getOpcode() == ISD::FMUL) {
+          return DAG.getNode(ISD::FNEG, SL, VT,
+                             DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N000.getOperand(0)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N000.getOperand(1)),
+                                         N1));
+        }
+      }
+    }
+
+    // fold (fsub (fneg (fpext (fmul, x, y))), z)
+    //   -> (fneg (fma (fpext x)), (fpext y), z)
+    // Note: This could be removed with appropriate canonicalization of the
+    // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+    // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+    // from implementing the canonicalization in visitFSUB.
+    if (N0.getOpcode() == ISD::FNEG) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N000 = N00.getOperand(0);
+        if (N000.getOpcode() == ISD::FMUL) {
+          return DAG.getNode(ISD::FNEG, SL, VT,
+                             DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N000.getOperand(0)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N000.getOperand(1)),
+                                         N1));
+        }
+      }
+    }
+
+  }
+
+  // More folding opportunities when target permits.
+  if ((UnsafeFPMath || HasFMAD) && Aggressive) {
+    // fold (fsub (fma x, y, (fmul u, v)), z)
+    //   -> (fma x, y (fma u, v, (fneg z)))
+    if (N0.getOpcode() == PreferredFusedOpcode &&
+        N0.getOperand(2).getOpcode() == ISD::FMUL) {
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         N0.getOperand(0), N0.getOperand(1),
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                     N0.getOperand(2).getOperand(0),
+                                     N0.getOperand(2).getOperand(1),
+                                     DAG.getNode(ISD::FNEG, SL, VT,
+                                                 N1)));
+    }
+
+    // fold (fsub x, (fma y, z, (fmul u, v)))
+    //   -> (fma (fneg y), z, (fma (fneg u), v, x))
+    if (N1.getOpcode() == PreferredFusedOpcode &&
+        N1.getOperand(2).getOpcode() == ISD::FMUL) {
+      SDValue N20 = N1.getOperand(2).getOperand(0);
+      SDValue N21 = N1.getOperand(2).getOperand(1);
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         DAG.getNode(ISD::FNEG, SL, VT,
+                                     N1.getOperand(0)),
+                         N1.getOperand(1),
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                     DAG.getNode(ISD::FNEG, SL, VT, N20),
+
+                                     N21, N0));
+    }
+
+    if (UnsafeFPMath && LookThroughFPExt) {
+      // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
+      //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
+      if (N0.getOpcode() == PreferredFusedOpcode) {
+        SDValue N02 = N0.getOperand(2);
+        if (N02.getOpcode() == ISD::FP_EXTEND) {
+          SDValue N020 = N02.getOperand(0);
+          if (N020.getOpcode() == ISD::FMUL)
+            return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                               N0.getOperand(0), N0.getOperand(1),
+                               DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                       N020.getOperand(0)),
+                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                       N020.getOperand(1)),
+                                           DAG.getNode(ISD::FNEG, SL, VT,
+                                                       N1)));
+        }
+      }
+
+      // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
+      //   -> (fma (fpext x), (fpext y),
+      //           (fma (fpext u), (fpext v), (fneg z)))
+      // FIXME: This turns two single-precision and one double-precision
+      // operation into two double-precision operations, which might not be
+      // interesting for all targets, especially GPUs.
+      if (N0.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N00 = N0.getOperand(0);
+        if (N00.getOpcode() == PreferredFusedOpcode) {
+          SDValue N002 = N00.getOperand(2);
+          if (N002.getOpcode() == ISD::FMUL)
+            return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                               DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                           N00.getOperand(0)),
+                               DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                           N00.getOperand(1)),
+                               DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                       N002.getOperand(0)),
+                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                       N002.getOperand(1)),
+                                           DAG.getNode(ISD::FNEG, SL, VT,
+                                                       N1)));
+        }
+      }
+
+      // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
+      //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
+      if (N1.getOpcode() == PreferredFusedOpcode &&
+        N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+        SDValue N120 = N1.getOperand(2).getOperand(0);
+        if (N120.getOpcode() == ISD::FMUL) {
+          SDValue N1200 = N120.getOperand(0);
+          SDValue N1201 = N120.getOperand(1);
+          return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+                             N1.getOperand(1),
+                             DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                         DAG.getNode(ISD::FNEG, SL, VT,
+                                             DAG.getNode(ISD::FP_EXTEND, SL,
+                                                         VT, N1200)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N1201),
+                                         N0));
+        }
+      }
+
+      // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
+      //   -> (fma (fneg (fpext y)), (fpext z),
+      //           (fma (fneg (fpext u)), (fpext v), x))
+      // FIXME: This turns two single-precision and one double-precision
+      // operation into two double-precision operations, which might not be
+      // interesting for all targets, especially GPUs.
+      if (N1.getOpcode() == ISD::FP_EXTEND &&
+        N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+        SDValue N100 = N1.getOperand(0).getOperand(0);
+        SDValue N101 = N1.getOperand(0).getOperand(1);
+        SDValue N102 = N1.getOperand(0).getOperand(2);
+        if (N102.getOpcode() == ISD::FMUL) {
+          SDValue N1020 = N102.getOperand(0);
+          SDValue N1021 = N102.getOperand(1);
+          return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                             DAG.getNode(ISD::FNEG, SL, VT,
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N100)),
+                             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+                             DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                         DAG.getNode(ISD::FNEG, SL, VT,
+                                             DAG.getNode(ISD::FP_EXTEND, SL,
+                                                         VT, N1020)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N1021),
+                                         N0));
+        }
+      }
+    }
+  }
+
+  return SDValue();
 }
 
 SDValue DAGCombiner::visitFADD(SDNode *N) {
@@ -6802,32 +7824,32 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
+  SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (fadd c1, c2) -> c1 + c2
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1);
+    return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
 
   // canonicalize constant to RHS
   if (N0CFP && !N1CFP)
-    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
+    return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
 
   // fold (fadd A, (fneg B)) -> (fsub A, B)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
-    return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0,
+    return DAG.getNode(ISD::FSUB, DL, VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
 
   // fold (fadd (fneg A), B) -> (fsub B, A)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
-    return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1,
+    return DAG.getNode(ISD::FSUB, DL, VT, N1,
                        GetNegatedExpression(N0, DAG, LegalOperations));
 
   // If 'unsafe math' is enabled, fold lots of things.
@@ -6843,17 +7865,16 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
         isa<ConstantFPSDNode>(N0.getOperand(1)))
-      return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                     N0.getOperand(1), N1));
+      return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
+                         DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1));
 
     // If allowed, fold (fadd (fneg x), x) -> 0.0
     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
-      return DAG.getConstantFP(0.0, VT);
+      return DAG.getConstantFP(0.0, DL, VT);
 
     // If allowed, fold (fadd x, (fneg x)) -> 0.0
     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
-      return DAG.getConstantFP(0.0, VT);
+      return DAG.getConstantFP(0.0, DL, VT);
 
     // We can fold chains of FADD's of the same value into multiplications.
     // This transform is not safe in general because we are reducing the number
@@ -6865,21 +7886,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
 
         // (fadd (fmul x, c), x) -> (fmul x, c+1)
         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                       SDValue(CFP01, 0),
-                                       DAG.getConstantFP(1.0, VT));
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+                                       DAG.getConstantFP(1.0, DL, VT));
+          return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
         }
 
         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
             N1.getOperand(0) == N1.getOperand(1) &&
             N0.getOperand(0) == N1.getOperand(0)) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                       SDValue(CFP01, 0),
-                                       DAG.getConstantFP(2.0, VT));
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                             N0.getOperand(0), NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+                                       DAG.getConstantFP(2.0, DL, VT));
+          return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
         }
       }
 
@@ -6889,20 +7907,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
 
         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                       SDValue(CFP11, 0),
-                                       DAG.getConstantFP(1.0, VT));
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+                                       DAG.getConstantFP(1.0, DL, VT));
+          return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
         }
 
         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
             N0.getOperand(0) == N0.getOperand(1) &&
             N1.getOperand(0) == N0.getOperand(0)) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                       SDValue(CFP11, 0),
-                                       DAG.getConstantFP(2.0, VT));
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+                                       DAG.getConstantFP(2.0, DL, VT));
+          return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
         }
       }
 
@@ -6910,18 +7926,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
         ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
         if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
-            (N0.getOperand(0) == N1))
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                             N1, DAG.getConstantFP(3.0, VT));
+            (N0.getOperand(0) == N1)) {
+          return DAG.getNode(ISD::FMUL, DL, VT,
+                             N1, DAG.getConstantFP(3.0, DL, VT));
+        }
       }
 
       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
         ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
-            N1.getOperand(0) == N0)
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                             N0, DAG.getConstantFP(3.0, VT));
+            N1.getOperand(0) == N0) {
+          return DAG.getNode(ISD::FMUL, DL, VT,
+                             N0, DAG.getConstantFP(3.0, DL, VT));
+        }
       }
 
       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
@@ -6929,81 +7947,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
           N0.getOperand(0) == N0.getOperand(1) &&
           N1.getOperand(0) == N1.getOperand(1) &&
-          N0.getOperand(0) == N1.getOperand(0))
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N0.getOperand(0), DAG.getConstantFP(4.0, VT));
+          N0.getOperand(0) == N1.getOperand(0)) {
+        return DAG.getNode(ISD::FMUL, DL, VT,
+                           N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
+      }
     }
   } // enable-unsafe-fp-math
 
   // FADD -> FMA combines:
-  if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
-      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
-      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
-
-    // fold (fadd (fmul x, y), z) -> (fma x, y, z)
-    if (N0.getOpcode() == ISD::FMUL &&
-        (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
-      return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                         N0.getOperand(0), N0.getOperand(1), N1);
-
-    // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
-    // Note: Commutes FADD operands.
-    if (N1.getOpcode() == ISD::FMUL &&
-        (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
-      return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                         N1.getOperand(0), N1.getOperand(1), N0);
-
-    // When FP_EXTEND nodes are free on the target, and there is an opportunity
-    // to combine into FMA, arrange such nodes accordingly.
-    if (TLI.isFPExtFree(VT)) {
-
-      // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
-      if (N0.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == ISD::FMUL)
-          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N00.getOperand(0)),
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N00.getOperand(1)), N1);
-      }
-
-      // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
-      // Note: Commutes FADD operands.
-      if (N1.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N10 = N1.getOperand(0);
-        if (N10.getOpcode() == ISD::FMUL)
-          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N10.getOperand(0)),
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N10.getOperand(1)), N0);
-      }
-    }
-
-    // More folding opportunities when target permits.
-    if (TLI.enableAggressiveFMAFusion(VT)) {
-
-      // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
-      if (N0.getOpcode() == ISD::FMA &&
-          N0.getOperand(2).getOpcode() == ISD::FMUL)
-        return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                           N0.getOperand(0), N0.getOperand(1),
-                           DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                                       N0.getOperand(2).getOperand(0),
-                                       N0.getOperand(2).getOperand(1),
-                                       N1));
-
-      // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
-      if (N1->getOpcode() == ISD::FMA &&
-          N1.getOperand(2).getOpcode() == ISD::FMUL)
-        return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                           N1.getOperand(0), N1.getOperand(1),
-                           DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                                       N1.getOperand(2).getOperand(0),
-                                       N1.getOperand(2).getOperand(1),
-                                       N0));
-    }
+  SDValue Fused = visitFADDForFMACombine(N);
+  if (Fused) {
+    AddToWorklist(Fused.getNode());
+    return Fused;
   }
 
   return SDValue();
@@ -7019,14 +7974,13 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (fsub c1, c2) -> c1-c2
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1);
+    return DAG.getNode(ISD::FSUB, dl, VT, N0, N1);
 
   // fold (fsub A, (fneg B)) -> (fadd A, B)
   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
@@ -7049,7 +8003,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
 
     // (fsub x, x) -> 0.0
     if (N0 == N1)
-      return DAG.getConstantFP(0.0f, VT);
+      return DAG.getConstantFP(0.0f, dl, VT);
 
     // (fsub x, (fadd x, y)) -> (fneg y)
     // (fsub x, (fadd y, x)) -> (fneg y)
@@ -7066,138 +8020,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   }
 
   // FSUB -> FMA combines:
-  if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
-      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
-      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
-
-    // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
-    if (N0.getOpcode() == ISD::FMUL &&
-        (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
-      return DAG.getNode(ISD::FMA, dl, VT,
-                         N0.getOperand(0), N0.getOperand(1),
-                         DAG.getNode(ISD::FNEG, dl, VT, N1));
-
-    // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
-    // Note: Commutes FSUB operands.
-    if (N1.getOpcode() == ISD::FMUL &&
-        (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
-      return DAG.getNode(ISD::FMA, dl, VT,
-                         DAG.getNode(ISD::FNEG, dl, VT,
-                         N1.getOperand(0)),
-                         N1.getOperand(1), N0);
-
-    // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
-    if (N0.getOpcode() == ISD::FNEG &&
-        N0.getOperand(0).getOpcode() == ISD::FMUL &&
-        ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
-            TLI.enableAggressiveFMAFusion(VT))) {
-      SDValue N00 = N0.getOperand(0).getOperand(0);
-      SDValue N01 = N0.getOperand(0).getOperand(1);
-      return DAG.getNode(ISD::FMA, dl, VT,
-                         DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
-                         DAG.getNode(ISD::FNEG, dl, VT, N1));
-    }
-
-    // When FP_EXTEND nodes are free on the target, and there is an opportunity
-    // to combine into FMA, arrange such nodes accordingly.
-    if (TLI.isFPExtFree(VT)) {
-
-      // fold (fsub (fpext (fmul x, y)), z)
-      //   -> (fma (fpext x), (fpext y), (fneg z))
-      if (N0.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == ISD::FMUL)
-          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N00.getOperand(0)),
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N00.getOperand(1)),
-                             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
-      }
-
-      // fold (fsub x, (fpext (fmul y, z)))
-      //   -> (fma (fneg (fpext y)), (fpext z), x)
-      // Note: Commutes FSUB operands.
-      if (N1.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N10 = N1.getOperand(0);
-        if (N10.getOpcode() == ISD::FMUL)
-          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                             DAG.getNode(ISD::FNEG, SDLoc(N), VT,
-                                         DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
-                                                     VT, N10.getOperand(0))),
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N10.getOperand(1)),
-                             N0);
-      }
-
-      // fold (fsub (fpext (fneg (fmul, x, y))), z)
-      //   -> (fma (fneg (fpext x)), (fpext y), (fneg z))
-      if (N0.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == ISD::FNEG) {
-          SDValue N000 = N00.getOperand(0);
-          if (N000.getOpcode() == ISD::FMUL) {
-            return DAG.getNode(ISD::FMA, dl, VT,
-                               DAG.getNode(ISD::FNEG, dl, VT,
-                                           DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
-                                                       VT, N000.getOperand(0))),
-                               DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                           N000.getOperand(1)),
-                               DAG.getNode(ISD::FNEG, dl, VT, N1));
-          }
-        }
-      }
-
-      // fold (fsub (fneg (fpext (fmul, x, y))), z)
-      //   -> (fma (fneg (fpext x)), (fpext y), (fneg z))
-      if (N0.getOpcode() == ISD::FNEG) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == ISD::FP_EXTEND) {
-          SDValue N000 = N00.getOperand(0);
-          if (N000.getOpcode() == ISD::FMUL) {
-            return DAG.getNode(ISD::FMA, dl, VT,
-                               DAG.getNode(ISD::FNEG, dl, VT,
-                                           DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
-                                           VT, N000.getOperand(0))),
-                               DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                           N000.getOperand(1)),
-                               DAG.getNode(ISD::FNEG, dl, VT, N1));
-          }
-        }
-      }
-    }
-
-    // More folding opportunities when target permits.
-    if (TLI.enableAggressiveFMAFusion(VT)) {
-
-      // fold (fsub (fma x, y, (fmul u, v)), z)
-      //   -> (fma x, y (fma u, v, (fneg z)))
-      if (N0.getOpcode() == ISD::FMA &&
-          N0.getOperand(2).getOpcode() == ISD::FMUL)
-        return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                           N0.getOperand(0), N0.getOperand(1),
-                           DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                                       N0.getOperand(2).getOperand(0),
-                                       N0.getOperand(2).getOperand(1),
-                                       DAG.getNode(ISD::FNEG, SDLoc(N), VT,
-                                                   N1)));
-
-      // fold (fsub x, (fma y, z, (fmul u, v)))
-      //   -> (fma (fneg y), z, (fma (fneg u), v, x))
-      if (N1.getOpcode() == ISD::FMA &&
-          N1.getOperand(2).getOpcode() == ISD::FMUL) {
-        SDValue N20 = N1.getOperand(2).getOperand(0);
-        SDValue N21 = N1.getOperand(2).getOperand(1);
-        return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                           DAG.getNode(ISD::FNEG, SDLoc(N), VT,
-                                       N1.getOperand(0)),
-                           N1.getOperand(1),
-                           DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                                       DAG.getNode(ISD::FNEG, SDLoc(N),  VT,
-                                                   N20),
-                                       N21, N0));
-      }
-    }
+  SDValue Fused = visitFSUBForFMACombine(N);
+  if (Fused) {
+    AddToWorklist(Fused.getNode());
+    return Fused;
   }
 
   return SDValue();
@@ -7209,29 +8035,24 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   EVT VT = N->getValueType(0);
+  SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
   if (VT.isVector()) {
     // This just handles C1 * C2 for vectors. Other vector folds are below.
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
       return FoldedVOp;
-    // Canonicalize vector constant to RHS.
-    if (N0.getOpcode() == ISD::BUILD_VECTOR &&
-        N1.getOpcode() != ISD::BUILD_VECTOR)
-      if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0))
-        if (BV0->isConstant())
-          return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
   }
 
   // fold (fmul c1, c2) -> c1*c2
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
+    return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
 
   // canonicalize constant to RHS
-  if (N0CFP && !N1CFP)
-    return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
+  if (isConstantFPBuildVectorOrConstantFP(N0) &&
+     !isConstantFPBuildVectorOrConstantFP(N1))
+    return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
 
   // fold (fmul A, 1.0) -> A
   if (N1CFP && N1CFP->isExactlyValue(1.0))
@@ -7247,14 +8068,22 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
       // Fold scalars or any vector constants (not just splats).
       // This fold is done in general by InstCombine, but extra fmul insts
       // may have been generated during lowering.
+      SDValue N00 = N0.getOperand(0);
       SDValue N01 = N0.getOperand(1);
       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+      auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
-      if ((N1CFP && isConstOrConstSplatFP(N01)) ||
-          (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
-        SDLoc SL(N);
-        SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
-        return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
+      
+      // Check 1: Make sure that the first operand of the inner multiply is NOT
+      // a constant. Otherwise, we may induce infinite looping.
+      if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
+        // Check 2: Make sure that the second operand of the inner multiply and
+        // the second operand of the outer multiply are constants.
+        if ((N1CFP && isConstOrConstSplatFP(N01)) ||
+            (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
+          SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
+          return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
+        }
       }
     }
 
@@ -7263,21 +8092,20 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
     // during an early run of DAGCombiner can prevent folding with fmuls
     // inserted during lowering.
     if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
-      SDLoc SL(N);
-      const SDValue Two = DAG.getConstantFP(2.0, VT);
-      SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1);
-      return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts);
+      const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
+      SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
+      return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
     }
   }
 
   // fold (fmul X, 2.0) -> (fadd X, X)
   if (N1CFP && N1CFP->isExactlyValue(+2.0))
-    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0);
+    return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
 
   // fold (fmul X, -1.0) -> (fneg X)
   if (N1CFP && N1CFP->isExactlyValue(-1.0))
     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
-      return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
+      return DAG.getNode(ISD::FNEG, DL, VT, N0);
 
   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
@@ -7285,7 +8113,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
       // Both can be negated for free, check to see if at least one is cheaper
       // negated.
       if (LHSNeg == 2 || RHSNeg == 2)
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+        return DAG.getNode(ISD::FMUL, DL, VT,
                            GetNegatedExpression(N0, DAG, LegalOperations),
                            GetNegatedExpression(N1, DAG, LegalOperations));
     }
@@ -7364,14 +8192,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
   if (Options.UnsafeFPMath && N1CFP && N0 == N2)
     return DAG.getNode(ISD::FMUL, dl, VT, N0,
                        DAG.getNode(ISD::FADD, dl, VT,
-                                   N1, DAG.getConstantFP(1.0, VT)));
+                                   N1, DAG.getConstantFP(1.0, dl, VT)));
 
   // (fma x, c, (fneg x)) -> (fmul x, (c-1))
   if (Options.UnsafeFPMath && N1CFP &&
       N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
     return DAG.getNode(ISD::FMUL, dl, VT, N0,
                        DAG.getNode(ISD::FADD, dl, VT,
-                                   N1, DAG.getConstantFP(-1.0, VT)));
+                                   N1, DAG.getConstantFP(-1.0, dl, VT)));
 
 
   return SDValue();
@@ -7387,10 +8215,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (fdiv c1, c2) -> c1/c2
   if (N0CFP && N1CFP)
@@ -7412,8 +8239,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
            TLI.isFPImmLegal(Recip, VT)))
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
-                           DAG.getConstantFP(Recip, VT));
+        return DAG.getNode(ISD::FMUL, DL, VT, N0,
+                           DAG.getConstantFP(Recip, DL, VT));
     }
 
     // If this FDIV is part of a reciprocal square root, it may be folded
@@ -7492,24 +8319,22 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
 
     SmallVector<SDNode *, 4> Users;
     // Find all FDIV users of the same divisor.
-    for (SDNode::use_iterator UI = N1.getNode()->use_begin(),
-                              UE = N1.getNode()->use_end();
-         UI != UE; ++UI) {
-      SDNode *User = UI.getUse().getUser();
-      if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1)
-        Users.push_back(User);
+    for (auto *U : N1->uses()) {
+      if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
+        Users.push_back(U);
     }
 
     if (TLI.combineRepeatedFPDivisors(Users.size())) {
-      SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0
-      SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1);
+      SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+      SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
 
       // Dividend / Divisor -> Dividend * Reciprocal
-      for (auto I = Users.begin(), E = Users.end(); I != E; ++I) {
-        if ((*I)->getOperand(0) != FPOne) {
-          SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT,
-                                        (*I)->getOperand(0), Reciprocal);
-          DAG.ReplaceAllUsesWith(*I, NewNode.getNode());
+      for (auto *U : Users) {
+        SDValue Dividend = U->getOperand(0);
+        if (Dividend != FPOne) {
+          SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
+                                        Reciprocal);
+          DAG.ReplaceAllUsesWith(U, NewNode.getNode());
         }
       }
       return SDValue();
@@ -7539,20 +8364,21 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
     // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
     if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
       EVT VT = RV.getValueType();
-      RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV);
+      SDLoc DL(N);
+      RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
       AddToWorklist(RV.getNode());
 
       // Unfortunately, RV is now NaN if the input was exactly 0.
       // Select out this case and force the answer to 0.
-      SDValue Zero = DAG.getConstantFP(0.0, VT);
+      SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
       SDValue ZeroCmp =
-        DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
+        DAG.getSetCC(DL, TLI.getSetCCResultType(*DAG.getContext(), VT),
                      N->getOperand(0), Zero, ISD::SETEQ);
       AddToWorklist(ZeroCmp.getNode());
       AddToWorklist(RV.getNode());
 
       RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
-                       SDLoc(N), VT, ZeroCmp, Zero, RV);
+                       DL, VT, ZeroCmp, Zero, RV);
       return RV;
     }
   }
@@ -7611,12 +8437,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
 
 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   EVT VT = N->getValueType(0);
   EVT OpVT = N0.getValueType();
 
   // fold (sint_to_fp c1) -> c1fp
-  if (N0C &&
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
       // ...but only if the target supports immediate floating-point values
       (!LegalOperations ||
        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -7638,11 +8463,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
         !VT.isVector() &&
         (!LegalOperations ||
          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+      SDLoc DL(N);
       SDValue Ops[] =
         { N0.getOperand(0), N0.getOperand(1),
-          DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
+          DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
           N0.getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
     }
 
     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
@@ -7651,11 +8477,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
         (!LegalOperations ||
          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+      SDLoc DL(N);
       SDValue Ops[] =
         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
-          DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
+          DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
           N0.getOperand(0).getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
     }
   }
 
@@ -7664,12 +8491,11 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
 
 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   EVT VT = N->getValueType(0);
   EVT OpVT = N0.getValueType();
 
   // fold (uint_to_fp c1) -> c1fp
-  if (N0C &&
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
       // ...but only if the target supports immediate floating-point values
       (!LegalOperations ||
        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -7691,39 +8517,82 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
         (!LegalOperations ||
          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+      SDLoc DL(N);
       SDValue Ops[] =
         { N0.getOperand(0), N0.getOperand(1),
-          DAG.getConstantFP(1.0, VT),  DAG.getConstantFP(0.0, VT),
+          DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
           N0.getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
     }
   }
 
   return SDValue();
 }
 
+// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
+static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
+    return SDValue();
+
+  SDValue Src = N0.getOperand(0);
+  EVT SrcVT = Src.getValueType();
+  bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
+  bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+  // We can safely assume the conversion won't overflow the output range,
+  // because (for example) (uint8_t)18293.f is undefined behavior.
+
+  // Since we can assume the conversion won't overflow, our decision as to
+  // whether the input will fit in the float should depend on the minimum
+  // of the input range and output range.
+
+  // This means this is also safe for a signed input and unsigned output, since
+  // a negative input would lead to undefined behavior.
+  unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
+  unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
+  unsigned ActualSize = std::min(InputSize, OutputSize);
+  const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
+
+  // We can only fold away the float conversion if the input range can be
+  // represented exactly in the float range.
+  if (APFloat::semanticsPrecision(sem) >= ActualSize) {
+    if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
+      unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
+                                                       : ISD::ZERO_EXTEND;
+      return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
+    }
+    if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
+      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
+    if (SrcVT == VT)
+      return Src;
+    return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src);
+  }
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (fp_to_sint c1fp) -> c1
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
 
-  return SDValue();
+  return FoldIntToFPToInt(N, DAG);
 }
 
 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (fp_to_uint c1fp) -> c1
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
 
-  return SDValue();
+  return FoldIntToFPToInt(N, DAG);
 }
 
 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
@@ -7742,11 +8611,18 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
 
   // fold (fp_round (fp_round x)) -> (fp_round x)
   if (N0.getOpcode() == ISD::FP_ROUND) {
-    // This is a value preserving truncation if both round's are.
-    bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
-                   N0.getNode()->getConstantOperandVal(1) == 1;
-    return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
-                       DAG.getIntPtrConstant(IsTrunc));
+    const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
+    const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+    // If the first fp_round isn't a value preserving truncation, it might
+    // introduce a tie in the second fp_round, that wouldn't occur in the
+    // single-step fp_round we want to fold to.
+    // In other words, double rounding isn't the same as rounding.
+    // Also, this is a value preserving truncation iff both fp_round's are.
+    if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
+                         DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
+    }
   }
 
   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
@@ -7769,8 +8645,9 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
 
   // fold (fp_round_inreg c1fp) -> c1fp
   if (N0CFP && isTypeLegal(EVT)) {
-    SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
-    return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round);
+    SDLoc DL(N);
+    SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
+    return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
   }
 
   return SDValue();
@@ -7778,7 +8655,6 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
 
 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
@@ -7787,9 +8663,14 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
     return SDValue();
 
   // fold (fp_extend c1fp) -> c1fp
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
 
+  // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
+  if (N0.getOpcode() == ISD::FP16_TO_FP &&
+      TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
+    return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
+
   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
   // value of X.
   if (N0.getOpcode() == ISD::FP_ROUND
@@ -7813,7 +8694,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(),
               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
-                          N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+                          N0.getValueType(), ExtLoad,
+                          DAG.getIntPtrConstant(1, SDLoc(N0))),
               ExtLoad.getValue(1));
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   }
@@ -7823,11 +8705,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
 
 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (fceil c1) -> fceil(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
 
   return SDValue();
@@ -7835,11 +8716,10 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) {
 
 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (ftrunc c1) -> ftrunc(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
 
   return SDValue();
@@ -7847,11 +8727,10 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
 
 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (ffloor c1) -> ffloor(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
 
   return SDValue();
@@ -7862,14 +8741,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVUnaryOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
-
   // Constant fold FNEG.
-  if (isa<ConstantFPSDNode>(N0))
-    return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0));
+  if (isConstantFPBuildVectorOrConstantFP(N0))
+    return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
 
   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
                          &DAG.getTarget().Options))
@@ -7893,8 +8767,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
         // For a scalar, just generate 0x80...
         SignMask = APInt::getSignBit(IntVT.getSizeInBits());
       }
-      Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
-                        DAG.getConstant(SignMask, IntVT));
+      SDLoc DL0(N0);
+      Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
+                        DAG.getConstant(SignMask, DL0, IntVT));
       AddToWorklist(Int.getNode());
       return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
     }
@@ -7927,7 +8802,7 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
   if (N0CFP && N1CFP) {
     const APFloat &C0 = N0CFP->getValueAPF();
     const APFloat &C1 = N1CFP->getValueAPF();
-    return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0));
+    return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0));
   }
 
   if (N0CFP) {
@@ -7948,7 +8823,7 @@ SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
   if (N0CFP && N1CFP) {
     const APFloat &C0 = N0CFP->getValueAPF();
     const APFloat &C1 = N1CFP->getValueAPF();
-    return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0));
+    return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0));
   }
 
   if (N0CFP) {
@@ -7964,13 +8839,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVUnaryOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
-
   // fold (fabs c1) -> fabs(c1)
-  if (isa<ConstantFPSDNode>(N0))
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
 
   // fold (fabs (fabs x)) -> (fabs x)
@@ -8000,8 +8870,9 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
         // For a scalar, just generate 0x7f...
         SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
       }
-      Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,
-                        DAG.getConstant(SignMask, IntVT));
+      SDLoc DL(N0);
+      Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
+                        DAG.getConstant(SignMask, DL, IntVT));
       AddToWorklist(Int.getNode());
       return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
     }
@@ -8071,13 +8942,14 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
 
         if (AndConst.isPowerOf2() &&
             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+          SDLoc DL(N);
           SDValue SetCC =
-            DAG.getSetCC(SDLoc(N),
+            DAG.getSetCC(DL,
                          getSetCCResultType(Op0.getValueType()),
-                         Op0, DAG.getConstant(0, Op0.getValueType()),
+                         Op0, DAG.getConstant(0, DL, Op0.getValueType()),
                          ISD::SETNE);
 
-          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N),
+          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
                                           MVT::Other, Chain, SetCC, N2);
           // Don't add the new BRCond into the worklist or else SimplifySelectCC
           // will convert it back to (X & C1) >> C2.
@@ -8130,12 +9002,11 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
 
     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
       bool Equal = false;
-      if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
-        if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
-            Op0.getOpcode() == ISD::XOR) {
-          TheXor = Op0.getNode();
-          Equal = true;
-        }
+      if (isOneConstant(Op0) && Op0.hasOneUse() &&
+          Op0.getOpcode() == ISD::XOR) {
+        TheXor = Op0.getNode();
+        Equal = true;
+      }
 
       EVT SetCCVT = N1.getValueType();
       if (LegalTypes)
@@ -8193,11 +9064,11 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
       return false;
-    VT = Use->getValueType(0);
+    VT = LD->getMemoryVT();
   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
       return false;
-    VT = ST->getValue().getValueType();
+    VT = ST->getMemoryVT();
   } else
     return false;
 
@@ -8280,8 +9151,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   }
 
   // Don't create a indexed load / store with zero offset.
-  if (isa<ConstantSDNode>(Offset) &&
-      cast<ConstantSDNode>(Offset)->isNullValue())
+  if (isNullConstant(Offset))
     return false;
 
   // Try turning it into a pre-indexed load / store except when:
@@ -8309,24 +9179,25 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   // a copy of the original base pointer.
   SmallVector<SDNode *, 16> OtherUses;
   if (isa<ConstantSDNode>(Offset))
-    for (SDNode *Use : BasePtr.getNode()->uses()) {
-      if (Use == Ptr.getNode())
+    for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
+                              UE = BasePtr.getNode()->use_end();
+         UI != UE; ++UI) {
+      SDUse &Use = UI.getUse();
+      // Skip the use that is Ptr and uses of other results from BasePtr's
+      // node (important for nodes that return multiple results).
+      if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
         continue;
 
-      if (Use->isPredecessorOf(N))
+      if (Use.getUser()->isPredecessorOf(N))
         continue;
 
-      if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) {
+      if (Use.getUser()->getOpcode() != ISD::ADD &&
+          Use.getUser()->getOpcode() != ISD::SUB) {
         OtherUses.clear();
         break;
       }
 
-      SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
-      if (Op1.getNode() == BasePtr.getNode())
-        std::swap(Op0, Op1);
-      assert(Op0.getNode() == BasePtr.getNode() &&
-             "Use of ADD/SUB but not an operand");
-
+      SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
       if (!isa<ConstantSDNode>(Op1)) {
         OtherUses.clear();
         break;
@@ -8338,7 +9209,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
         break;
       }
 
-      OtherUses.push_back(Use);
+      OtherUses.push_back(Use.getUser());
     }
 
   if (Swapped)
@@ -8431,12 +9302,14 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
     else CNV = CNV - Offset1;
 
+    SDLoc DL(OtherUses[i]);
+
     // We can now generate the new expression.
-    SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0));
+    SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
 
     SDValue NewUse = DAG.getNode(Opcode,
-                                 SDLoc(OtherUses[i]),
+                                 DL,
                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
     deleteAndRecombine(OtherUses[i]);
@@ -8494,8 +9367,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
     ISD::MemIndexedMode AM = ISD::UNINDEXED;
     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
       // Don't create a indexed load / store with zero offset.
-      if (isa<ConstantSDNode>(Offset) &&
-          cast<ConstantSDNode>(Offset)->isNullValue())
+      if (isNullConstant(Offset))
         continue;
 
       // Try turning it into a post-indexed load / store except when
@@ -8585,7 +9457,7 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
          "Cannot split out indexing using opaque target constants");
   if (Inc.getOpcode() == ISD::TargetConstant) {
     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
-    Inc = DAG.getConstant(*ConstInc->getConstantIntValue(),
+    Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
                           ConstInc->getValueType(0));
   }
 
@@ -8686,7 +9558,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
                               LD->getMemoryVT(),
                               LD->isVolatile(), LD->isNonTemporal(),
                               LD->isInvariant(), Align, LD->getAAInfo());
-        return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
+        if (NewLoad.getNode() != N)
+          return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
       }
     }
   }
@@ -8851,9 +9724,6 @@ struct LoadedSlice {
               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
 
-  LoadedSlice(const LoadedSlice &LS)
-      : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {}
-
   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
   /// \return Result is \p BitWidth and has used bits set to 1 and
   ///         not used bits set to 0.
@@ -8980,8 +9850,9 @@ struct LoadedSlice {
     if (Offset) {
       // BaseAddr = BaseAddr + Offset.
       EVT ArithType = BaseAddr.getValueType();
-      BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr,
-                              DAG->getConstant(Offset, ArithType));
+      SDLoc DL(Origin);
+      BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
+                              DAG->getConstant(Offset, DL, ArithType));
     }
 
     // Create the type of the loaded slice according to its size.
@@ -9336,7 +10207,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   if (NotMaskLZ == 64) return Result;  // All zero mask.
 
   // See if we have a continuous run of bits.  If so, we have 0*1+0*
-  if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+  if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
     return Result;
 
   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
@@ -9387,10 +10258,12 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
 
   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
   // shifted by ByteShift and truncated down to NumBytes.
-  if (ByteShift)
-    IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal,
-                       DAG.getConstant(ByteShift*8,
+  if (ByteShift) {
+    SDLoc DL(IVal);
+    IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
+                       DAG.getConstant(ByteShift*8, DL,
                                     DC->getShiftAmountTy(IVal.getValueType())));
+  }
 
   // Figure out the offset for the store and the alignment of the access.
   unsigned StOffset;
@@ -9403,8 +10276,9 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
 
   SDValue Ptr = St->getBasePtr();
   if (StOffset) {
-    Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(),
-                      Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+    SDLoc DL(IVal);
+    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
+                      Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
     NewAlign = MinAlign(NewAlign, StOffset);
   }
 
@@ -9486,8 +10360,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
     // The narrowing should be profitable, the load/store operation should be
     // legal (or custom) and the store size should be equal to the NewVT width.
     while (NewBW < BitWidth &&
-           !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
-             TLI.isNarrowingProfitable(VT, NewVT))) {
+           (NewVT.getStoreSizeInBits() != NewBW ||
+            !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
+            !TLI.isNarrowingProfitable(VT, NewVT))) {
       NewBW = NextPowerOf2(NewBW);
       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
     }
@@ -9517,7 +10392,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
 
       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
                                    Ptr.getValueType(), Ptr,
-                                   DAG.getConstant(PtrOff, Ptr.getValueType()));
+                                   DAG.getConstant(PtrOff, SDLoc(LD),
+                                                   Ptr.getValueType()));
       SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
                                   LD->getChain(), NewPtr,
                                   LD->getPointerInfo().getWithOffset(PtrOff),
@@ -9525,7 +10401,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
                                   LD->isInvariant(), NewAlign,
                                   LD->getAAInfo());
       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
-                                   DAG.getConstant(NewImm, NewVT));
+                                   DAG.getConstant(NewImm, SDLoc(Value),
+                                                   NewVT));
       SDValue NewST = DAG.getStore(Chain, SDLoc(N),
                                    NewVal, NewPtr,
                                    ST->getPointerInfo().getWithOffset(PtrOff),
@@ -9599,6 +10476,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   return SDValue();
 }
 
+namespace {
 /// Helper struct to parse and store a memory address as base + index + offset.
 /// We ignore sign extensions when it is safe to do so.
 /// The following two expressions are not equivalent. To differentiate we need
@@ -9686,37 +10564,156 @@ struct BaseIndexOffset {
     return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
   }
 };
+} // namespace
 
-/// Holds a pointer to an LSBaseSDNode as well as information on where it
-/// is located in a sequence of memory operations connected by a chain.
-struct MemOpLink {
-  MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
-    MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
-  // Ptr to the mem node.
-  LSBaseSDNode *MemNode;
-  // Offset from the base ptr.
-  int64_t OffsetFromBase;
-  // What is the sequence number of this mem node.
-  // Lowest mem operand in the DAG starts at zero.
-  unsigned SequenceNum;
-};
+bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
+                  SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
+                  unsigned NumElem, bool IsConstantSrc, bool UseVector) {
+  // Make sure we have something to merge.
+  if (NumElem < 2)
+    return false;
+
+  int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+  unsigned LatestNodeUsed = 0;
+
+  for (unsigned i=0; i < NumElem; ++i) {
+    // Find a chain for the new wide-store operand. Notice that some
+    // of the store nodes that we found may not be selected for inclusion
+    // in the wide store. The chain we use needs to be the chain of the
+    // latest store node which is *used* and replaced by the wide store.
+    if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+      LatestNodeUsed = i;
+  }
+
+  // The latest Node in the DAG.
+  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
+  SDLoc DL(StoreNodes[0].MemNode);
+
+  SDValue StoredVal;
+  if (UseVector) {
+    // Find a legal type for the vector store.
+    EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+    assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+    if (IsConstantSrc) {
+      // A vector store with a constant source implies that the constant is
+      // zero; we only handle merging stores of constant zeros because the zero
+      // can be materialized without a load.
+      // It may be beneficial to loosen this restriction to allow non-zero
+      // store merging.
+      StoredVal = DAG.getConstant(0, DL, Ty);
+    } else {
+      SmallVector<SDValue, 8> Ops;
+      for (unsigned i = 0; i < NumElem ; ++i) {
+        StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+        SDValue Val = St->getValue();
+        // All of the operands of a BUILD_VECTOR must have the same type.
+        if (Val.getValueType() != MemVT)
+          return false;
+        Ops.push_back(Val);
+      }
+
+      // Build the extracted vector elements back into a vector.
+      StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
+    }
+  } else {
+    // We should always use a vector store when merging extracted vector
+    // elements, so this path implies a store of constants.
+    assert(IsConstantSrc && "Merged vector elements should use vector store");
+
+    unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+    APInt StoreInt(StoreBW, 0);
+
+    // Construct a single integer constant which is made of the smaller
+    // constant inputs.
+    bool IsLE = TLI.isLittleEndian();
+    for (unsigned i = 0; i < NumElem ; ++i) {
+      unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
+      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+      SDValue Val = St->getValue();
+      StoreInt <<= ElementSizeBytes*8;
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+        StoreInt |= C->getAPIntValue().zext(StoreBW);
+      } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+        StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+      } else {
+        llvm_unreachable("Invalid constant element type");
+      }
+    }
+
+    // Create the new Load and Store operations.
+    EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+    StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
+  }
+
+  SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
+                                  FirstInChain->getBasePtr(),
+                                  FirstInChain->getPointerInfo(),
+                                  false, false,
+                                  FirstInChain->getAlignment());
+
+  // Replace the last store with the new store
+  CombineTo(LatestOp, NewStore);
+  // Erase all other stores.
+  for (unsigned i = 0; i < NumElem ; ++i) {
+    if (StoreNodes[i].MemNode == LatestOp)
+      continue;
+    StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+    // ReplaceAllUsesWith will replace all uses that existed when it was
+    // called, but graph optimizations may cause new ones to appear. For
+    // example, the case in pr14333 looks like
+    //
+    //  St's chain -> St -> another store -> X
+    //
+    // And the only difference from St to the other store is the chain.
+    // When we change it's chain to be St's chain they become identical,
+    // get CSEed and the net result is that X is now a use of St.
+    // Since we know that St is redundant, just iterate.
+    while (!St->use_empty())
+      DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+    deleteAndRecombine(St);
+  }
+
+  return true;
+}
+
+static bool allowableAlignment(const SelectionDAG &DAG,
+                               const TargetLowering &TLI, EVT EVTTy,
+                               unsigned AS, unsigned Align) {
+  if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align))
+    return true;
+
+  Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
+  unsigned ABIAlignment = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
+  return (Align >= ABIAlignment);
+}
 
 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+  if (OptLevel == CodeGenOpt::None)
+    return false;
+
   EVT MemVT = St->getMemoryVT();
   int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
-  bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+  bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+      Attribute::NoImplicitFloat);
+
+  // This function cannot currently deal with non-byte-sized memory sizes.
+  if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
+    return false;
 
   // Don't merge vectors into wider inputs.
   if (MemVT.isVector() || !MemVT.isSimple())
     return false;
 
   // Perform an early exit check. Do not bother looking at stored values that
-  // are not constants or loads.
+  // are not constants, loads, or extracted vector elements.
   SDValue StoredVal = St->getValue();
   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
-  if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
-      !IsLoadSrc)
+  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
+                       isa<ConstantFPSDNode>(StoredVal);
+  bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+
+  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
     return false;
 
   // Only look at ends of store sequences.
@@ -9761,10 +10758,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     if (!Ptr.equalBaseIndex(BasePtr))
       break;
 
-    // Check that the alignment is the same.
-    if (Index->getAlignment() != St->getAlignment())
-      break;
-
     // The memory operands must not be volatile.
     if (Index->isVolatile() || Index->isIndexed())
       break;
@@ -9778,11 +10771,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     if (Index->getMemoryVT() != MemVT)
       break;
 
-    // We do not allow unaligned stores because we want to prevent overriding
-    // stores.
-    if (Index->getAlignment()*8 != MemVT.getSizeInBits())
-      break;
-
     // We found a potential memory operand to merge.
     StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
 
@@ -9856,9 +10844,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
 
   // The node with the lowest store address.
   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+  unsigned FirstStoreAlign = FirstInChain->getAlignment();
 
   // Store the constants into memory as one consecutive store.
-  if (!IsLoadSrc) {
+  if (IsConstantSrc) {
     unsigned LastLegalType = 0;
     unsigned LastLegalVectorType = 0;
     bool NonZero = false;
@@ -9878,27 +10868,41 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       // Find a legal type for the constant store.
       unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
       EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
-      if (TLI.isTypeLegal(StoreTy))
+      if (TLI.isTypeLegal(StoreTy) &&
+          allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
+                             FirstStoreAlign)) {
         LastLegalType = i+1;
       // Or check whether a truncstore is legal.
-      else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
-               TargetLowering::TypePromoteInteger) {
+      } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+                 TargetLowering::TypePromoteInteger) {
         EVT LegalizedStoredValueTy =
           TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
-        if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
-          LastLegalType = i+1;
+        if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+            allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
+                               FirstStoreAlign)) {
+          LastLegalType = i + 1;
+        }
       }
 
       // Find a legal type for the vector store.
       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
-      if (TLI.isTypeLegal(Ty))
+      if (TLI.isTypeLegal(Ty) &&
+          allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) {
         LastLegalVectorType = i + 1;
+      }
     }
 
-    // We only use vectors if the constant is known to be zero and the
-    // function is not marked with the noimplicitfloat attribute.
-    if (NonZero || NoVectors)
+
+    // We only use vectors if the constant is known to be zero or the target
+    // allows it and the function is not marked with the noimplicitfloat
+    // attribute.
+    if (NoVectors) {
       LastLegalVectorType = 0;
+    } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT,
+                                                            LastLegalVectorType,
+                                                            FirstStoreAS)) {
+      LastLegalVectorType = 0;
+    }
 
     // Check if we found a legal integer type to store.
     if (LastLegalType == 0 && LastLegalVectorType == 0)
@@ -9907,85 +10911,34 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
     unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
 
-    // Make sure we have something to merge.
-    if (NumElem < 2)
-      return false;
-
-    unsigned EarliestNodeUsed = 0;
-    for (unsigned i=0; i < NumElem; ++i) {
-      // Find a chain for the new wide-store operand. Notice that some
-      // of the store nodes that we found may not be selected for inclusion
-      // in the wide store. The chain we use needs to be the chain of the
-      // earliest store node which is *used* and replaced by the wide store.
-      if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
-        EarliestNodeUsed = i;
-    }
+    return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+                                           true, UseVector);
+  }
 
-    // The earliest Node in the DAG.
-    LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
-    SDLoc DL(StoreNodes[0].MemNode);
+  // When extracting multiple vector elements, try to store them
+  // in one vector store rather than a sequence of scalar stores.
+  if (IsExtractVecEltSrc) {
+    unsigned NumElem = 0;
+    for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
+      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
+      SDValue StoredVal = St->getValue();
+      // This restriction could be loosened.
+      // Bail out if any stored values are not elements extracted from a vector.
+      // It should be possible to handle mixed sources, but load sources need
+      // more careful handling (see the block of code below that handles
+      // consecutive loads).
+      if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+        return false;
 
-    SDValue StoredVal;
-    if (UseVector) {
       // Find a legal type for the vector store.
-      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
-      assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
-      StoredVal = DAG.getConstant(0, Ty);
-    } else {
-      unsigned StoreBW = NumElem * ElementSizeBytes * 8;
-      APInt StoreInt(StoreBW, 0);
-
-      // Construct a single integer constant which is made of the smaller
-      // constant inputs.
-      bool IsLE = TLI.isLittleEndian();
-      for (unsigned i = 0; i < NumElem ; ++i) {
-        unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
-        StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
-        SDValue Val = St->getValue();
-        StoreInt<<=ElementSizeBytes*8;
-        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
-          StoreInt|=C->getAPIntValue().zext(StoreBW);
-        } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
-          StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
-        } else {
-          llvm_unreachable("Invalid constant element type");
-        }
-      }
-
-      // Create the new Load and Store operations.
-      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
-      StoredVal = DAG.getConstant(StoreInt, StoreTy);
-    }
-
-    SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
-                                    FirstInChain->getBasePtr(),
-                                    FirstInChain->getPointerInfo(),
-                                    false, false,
-                                    FirstInChain->getAlignment());
-
-    // Replace the first store with the new store
-    CombineTo(EarliestOp, NewStore);
-    // Erase all other stores.
-    for (unsigned i = 0; i < NumElem ; ++i) {
-      if (StoreNodes[i].MemNode == EarliestOp)
-        continue;
-      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
-      // ReplaceAllUsesWith will replace all uses that existed when it was
-      // called, but graph optimizations may cause new ones to appear. For
-      // example, the case in pr14333 looks like
-      //
-      //  St's chain -> St -> another store -> X
-      //
-      // And the only difference from St to the other store is the chain.
-      // When we change it's chain to be St's chain they become identical,
-      // get CSEed and the net result is that X is now a use of St.
-      // Since we know that St is redundant, just iterate.
-      while (!St->use_empty())
-        DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
-      deleteAndRecombine(St);
+      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+      if (TLI.isTypeLegal(Ty) &&
+          allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign))
+        NumElem = i + 1;
     }
 
-    return true;
+    return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+                                           false, true);
   }
 
   // Below we handle the case of multiple consecutive stores that
@@ -10007,10 +10960,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     if (!Ld->hasNUsesOfValue(1, 0))
       break;
 
-    // Check that the alignment is the same as the stores.
-    if (Ld->getAlignment() != St->getAlignment())
-      break;
-
     // The memory operands must not be volatile.
     if (Ld->isVolatile() || Ld->isIndexed())
       break;
@@ -10048,6 +10997,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       St->getAlignment() >= RequiredAlignment)
     return false;
 
+  LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+  unsigned FirstLoadAS = FirstLoad->getAddressSpace();
+  unsigned FirstLoadAlign = FirstLoad->getAlignment();
+
   // Scan the memory operations on the chain and find the first non-consecutive
   // load memory address. These variables hold the index in the store node
   // array.
@@ -10056,7 +11009,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   unsigned LastLegalVectorType = 0;
   unsigned LastLegalIntegerType = 0;
   StartAddress = LoadNodes[0].OffsetFromBase;
-  SDValue FirstChain = LoadNodes[0].MemNode->getChain();
+  SDValue FirstChain = FirstLoad->getChain();
   for (unsigned i = 1; i < LoadNodes.size(); ++i) {
     // All loads much share the same chain.
     if (LoadNodes[i].MemNode->getChain() != FirstChain)
@@ -10069,13 +11022,18 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
 
     // Find a legal type for the vector store.
     EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
-    if (TLI.isTypeLegal(StoreTy))
+    if (TLI.isTypeLegal(StoreTy) &&
+        allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
+        allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) {
       LastLegalVectorType = i + 1;
+    }
 
     // Find a legal type for the integer store.
     unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
     StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
-    if (TLI.isTypeLegal(StoreTy))
+    if (TLI.isTypeLegal(StoreTy) &&
+        allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
+        allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
       LastLegalIntegerType = i + 1;
     // Or check whether a truncstore and extload is legal.
     else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
@@ -10085,7 +11043,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
           TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
           TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
-          TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy))
+          TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+          allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
+                             FirstStoreAlign) &&
+          allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS,
+                             FirstLoadAlign))
         LastLegalIntegerType = i+1;
     }
   }
@@ -10103,18 +11065,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   if (NumElem < 2)
     return false;
 
-  // The earliest Node in the DAG.
-  unsigned EarliestNodeUsed = 0;
-  LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+  // The latest Node in the DAG.
+  unsigned LatestNodeUsed = 0;
   for (unsigned i=1; i<NumElem; ++i) {
     // Find a chain for the new wide-store operand. Notice that some
     // of the store nodes that we found may not be selected for inclusion
     // in the wide store. The chain we use needs to be the chain of the
-    // earliest store node which is *used* and replaced by the wide store.
-    if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
-      EarliestNodeUsed = i;
+    // latest store node which is *used* and replaced by the wide store.
+    if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+      LatestNodeUsed = i;
   }
 
+  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
+
   // Find if it is better to use vectors or integers to load and store
   // to memory.
   EVT JointMemOpVT;
@@ -10128,18 +11091,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   SDLoc LoadDL(LoadNodes[0].MemNode);
   SDLoc StoreDL(StoreNodes[0].MemNode);
 
-  LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
-  SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
-                                FirstLoad->getChain(),
-                                FirstLoad->getBasePtr(),
-                                FirstLoad->getPointerInfo(),
-                                false, false, false,
-                                FirstLoad->getAlignment());
-
-  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
-                                  FirstInChain->getBasePtr(),
-                                  FirstInChain->getPointerInfo(), false, false,
-                                  FirstInChain->getAlignment());
+  SDValue NewLoad = DAG.getLoad(
+      JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
+      FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
+
+  SDValue NewStore = DAG.getStore(
+      LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(),
+      FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
 
   // Replace one of the loads with the new load.
   LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
@@ -10154,12 +11112,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
   }
 
-  // Replace the first store with the new store.
-  CombineTo(EarliestOp, NewStore);
+  // Replace the last store with the new store.
+  CombineTo(LatestOp, NewStore);
   // Erase all other stores.
   for (unsigned i = 0; i < NumElem ; ++i) {
     // Remove all Store nodes.
-    if (StoreNodes[i].MemNode == EarliestOp)
+    if (StoreNodes[i].MemNode == LatestOp)
       continue;
     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
     DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
@@ -10214,8 +11172,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       case MVT::f32:
         if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
             TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+          ;
           Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
-                              bitcastToAPInt().getZExtValue(), MVT::i32);
+                              bitcastToAPInt().getZExtValue(), SDLoc(CFP),
+                              MVT::i32);
           return DAG.getStore(Chain, SDLoc(N), Tmp,
                               Ptr, ST->getMemOperand());
         }
@@ -10224,8 +11184,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
         if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
              !ST->isVolatile()) ||
             TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+          ;
           Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
-                                getZExtValue(), MVT::i64);
+                                getZExtValue(), SDLoc(CFP), MVT::i64);
           return DAG.getStore(Chain, SDLoc(N), Tmp,
                               Ptr, ST->getMemOperand());
         }
@@ -10236,8 +11197,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           // argument passing.  Since this is so common, custom legalize the
           // 64-bit integer store into two 32-bit stores.
           uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-          SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
-          SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
+          SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
+          SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
           if (TLI.isBigEndian()) std::swap(Lo, Hi);
 
           unsigned Alignment = ST->getAlignment();
@@ -10245,18 +11206,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           bool isNonTemporal = ST->isNonTemporal();
           AAMDNodes AAInfo = ST->getAAInfo();
 
+          SDLoc DL(N);
+
           SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
                                      Ptr, ST->getPointerInfo(),
                                      isVolatile, isNonTemporal,
                                      ST->getAlignment(), AAInfo);
-          Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
-                            DAG.getConstant(4, Ptr.getValueType()));
+          Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+                            DAG.getConstant(4, DL, Ptr.getValueType()));
           Alignment = MinAlign(Alignment, 4U);
           SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
                                      Ptr, ST->getPointerInfo().getWithOffset(4),
                                      isVolatile, isNonTemporal,
                                      Alignment, AAInfo);
-          return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
+          return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
                              St0, St1);
         }
 
@@ -10268,11 +11231,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   // Try to infer better alignment information than the store already has.
   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
-      if (Align > ST->getAlignment())
-        return DAG.getTruncStore(Chain, SDLoc(N), Value,
+      if (Align > ST->getAlignment()) {
+        SDValue NewStore =
+               DAG.getTruncStore(Chain, SDLoc(N), Value,
                                  Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
                                  ST->isVolatile(), ST->isNonTemporal(), Align,
                                  ST->getAAInfo());
+        if (NewStore.getNode() != N)
+          return CombineTo(ST, NewStore, true);
+      }
     }
   }
 
@@ -10493,24 +11460,20 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
   SDValue Offset;
   EVT PtrType = NewPtr.getValueType();
   MachinePointerInfo MPI;
+  SDLoc DL(EVE);
   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
     int Elt = ConstEltNo->getZExtValue();
     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
-    if (TLI.isBigEndian())
-      PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
-    Offset = DAG.getConstant(PtrOff, PtrType);
+    Offset = DAG.getConstant(PtrOff, DL, PtrType);
     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
   } else {
+    Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
     Offset = DAG.getNode(
-        ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
-        DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
-    if (TLI.isBigEndian())
-      Offset = DAG.getNode(
-          ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
-          DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
+        ISD::MUL, DL, PtrType, Offset,
+        DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
     MPI = OriginalLoad->getPointerInfo();
   }
-  NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
+  NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
 
   // The replacement we need to do here is a little tricky: we need to
   // replace an extractelement of a load with a load.
@@ -10620,8 +11583,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
     if (!LegalOperations) {
       EVT IndexTy = TLI.getVectorIdxTy();
-      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
-                         SVInVec, DAG.getConstant(OrigElt, IndexTy));
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
+                         DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
     }
   }
 
@@ -10710,7 +11673,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
       if (ISD::isNormalLoad(InVec.getNode())) {
         LN0 = cast<LoadSDNode>(InVec);
         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
-        EltNo = DAG.getConstant(Elt, EltNo.getValueType());
+        EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
       }
     }
 
@@ -10800,7 +11763,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
   assert(ElemRatio > 1 && "Invalid element size ratio");
   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
-                               DAG.getConstant(0, SourceType);
+                               DAG.getConstant(0, SDLoc(N), SourceType);
 
   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
@@ -10890,6 +11853,11 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
     return SDValue();
 
+  // Just because the floating-point vector type is legal does not necessarily
+  // mean that the corresponding integer vector type is.
+  if (!isTypeLegal(NVT))
+    return SDValue();
+
   SmallVector<SDValue, 8> Opnds;
   for (unsigned i = 0; i != NumInScalars; ++i) {
     SDValue In = N->getOperand(i);
@@ -10914,12 +11882,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   if (ISD::allOperandsUndef(N))
     return DAG.getUNDEF(VT);
 
-  SDValue V = reduceBuildVecExtToExtBuildVec(N);
-  if (V.getNode())
+  if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
     return V;
 
-  V = reduceBuildVecConvertToConvertBuildVec(N);
-  if (V.getNode())
+  if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
     return V;
 
   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
@@ -10942,8 +11908,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
     if (Op.getOpcode() == ISD::UNDEF) continue;
 
     // See if we can combine this build_vector into a blend with a zero vector.
-    if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant &&
-        cast<ConstantSDNode>(Op.getNode())->isNullValue()) ||
+    if (!VecIn2.getNode() && (isNullConstant(Op) ||
         (Op.getOpcode() == ISD::ConstantFP &&
         cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) {
       UsesZeroVector = true;
@@ -11047,20 +12012,20 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
 
         if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
           return SDValue();
-        
+
         // Try to replace VecIn1 with two extract_subvectors
         // No need to update the masks, they should still be correct.
-        VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, 
-          DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
+        VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+          DAG.getConstant(VT.getVectorNumElements(), dl, TLI.getVectorIdxTy()));
         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-          DAG.getConstant(0, TLI.getVectorIdxTy()));
+          DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
       } else
         return SDValue();
     }
 
     if (UsesZeroVector)
-      VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) :
-                                DAG.getConstantFP(0.0, VT);
+      VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
+                                DAG.getConstantFP(0.0, dl, VT);
     else
       // If VecIn2 is unused then change it to undef.
       VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
@@ -11081,6 +12046,68 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   return SDValue();
 }
 
+static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT OpVT = N->getOperand(0).getValueType();
+
+  // If the operands are legal vectors, leave them alone.
+  if (TLI.isTypeLegal(OpVT))
+    return SDValue();
+
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+  SmallVector<SDValue, 8> Ops;
+
+  EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+  SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+
+  // Keep track of what we encounter.
+  bool AnyInteger = false;
+  bool AnyFP = false;
+  for (const SDValue &Op : N->ops()) {
+    if (ISD::BITCAST == Op.getOpcode() &&
+        !Op.getOperand(0).getValueType().isVector())
+      Ops.push_back(Op.getOperand(0));
+    else if (ISD::UNDEF == Op.getOpcode())
+      Ops.push_back(ScalarUndef);
+    else
+      return SDValue();
+
+    // Note whether we encounter an integer or floating point scalar.
+    // If it's neither, bail out, it could be something weird like x86mmx.
+    EVT LastOpVT = Ops.back().getValueType();
+    if (LastOpVT.isFloatingPoint())
+      AnyFP = true;
+    else if (LastOpVT.isInteger())
+      AnyInteger = true;
+    else
+      return SDValue();
+  }
+
+  // If any of the operands is a floating point scalar bitcast to a vector,
+  // use floating point types throughout, and bitcast everything.  
+  // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
+  if (AnyFP) {
+    SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
+    ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+    if (AnyInteger) {
+      for (SDValue &Op : Ops) {
+        if (Op.getValueType() == SVT)
+          continue;
+        if (Op.getOpcode() == ISD::UNDEF)
+          Op = ScalarUndef;
+        else
+          Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
+      }
+    }
+  }
+
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+                               VT.getSizeInBits() / SVT.getSizeInBits());
+  return DAG.getNode(ISD::BITCAST, DL, VT,
+                     DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
+}
+
 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
   // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR vector
@@ -11096,9 +12123,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   if (ISD::allOperandsUndef(N))
     return DAG.getUNDEF(VT);
 
-  // Optimize concat_vectors where one of the vectors is undef.
-  if (N->getNumOperands() == 2 &&
-      N->getOperand(1)->getOpcode() == ISD::UNDEF) {
+  // Optimize concat_vectors where all but the first of the vectors are undef.
+  if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
+        return Op.getOpcode() == ISD::UNDEF;
+      })) {
     SDValue In = N->getOperand(0);
     assert(In.getValueType().isVector() && "Must concat vectors");
 
@@ -11106,6 +12134,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
     if (In->getOpcode() == ISD::BITCAST &&
         !In->getOperand(0)->getValueType(0).isVector()) {
       SDValue Scalar = In->getOperand(0);
+
+      // If the bitcast type isn't legal, it might be a trunc of a legal type;
+      // look through the trunc so we can still do the transform:
+      //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
+      if (Scalar->getOpcode() == ISD::TRUNCATE &&
+          !TLI.isTypeLegal(Scalar.getValueType()) &&
+          TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
+        Scalar = Scalar->getOperand(0);
+
       EVT SclTy = Scalar->getValueType(0);
 
       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
@@ -11122,39 +12159,61 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
     }
   }
 
+  // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
+  // We have already tested above for an UNDEF only concatenation.
   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
-  if (N->getNumOperands() == 2 &&
-      N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
-      N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
-    EVT VT = N->getValueType(0);
-    SDValue N0 = N->getOperand(0);
-    SDValue N1 = N->getOperand(1);
+  auto IsBuildVectorOrUndef = [](const SDValue &Op) {
+    return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
+  };
+  bool AllBuildVectorsOrUndefs =
+      std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
+  if (AllBuildVectorsOrUndefs) {
     SmallVector<SDValue, 8> Opnds;
-    unsigned BuildVecNumElts =  N0.getNumOperands();
-
-    EVT SclTy0 = N0.getOperand(0)->getValueType(0);
-    EVT SclTy1 = N1.getOperand(0)->getValueType(0);
-    if (SclTy0.isFloatingPoint()) {
-      for (unsigned i = 0; i != BuildVecNumElts; ++i)
-        Opnds.push_back(N0.getOperand(i));
-      for (unsigned i = 0; i != BuildVecNumElts; ++i)
-        Opnds.push_back(N1.getOperand(i));
-    } else {
+    EVT SVT = VT.getScalarType();
+
+    EVT MinVT = SVT;
+    if (!SVT.isFloatingPoint()) {
       // If BUILD_VECTOR are from built from integer, they may have different
-      // operand types. Get the smaller type and truncate all operands to it.
-      EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
-      for (unsigned i = 0; i != BuildVecNumElts; ++i)
-        Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
-                        N0.getOperand(i)));
-      for (unsigned i = 0; i != BuildVecNumElts; ++i)
-        Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
-                        N1.getOperand(i)));
+      // operand types. Get the smallest type and truncate all operands to it.
+      bool FoundMinVT = false;
+      for (const SDValue &Op : N->ops())
+        if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+          EVT OpSVT = Op.getOperand(0)->getValueType(0);
+          MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
+          FoundMinVT = true;
+        }
+      assert(FoundMinVT && "Concat vector type mismatch");
     }
 
+    for (const SDValue &Op : N->ops()) {
+      EVT OpVT = Op.getValueType();
+      unsigned NumElts = OpVT.getVectorNumElements();
+
+      if (ISD::UNDEF == Op.getOpcode())
+        Opnds.append(NumElts, DAG.getUNDEF(MinVT));
+
+      if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+        if (SVT.isFloatingPoint()) {
+          assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
+          Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
+        } else {
+          for (unsigned i = 0; i != NumElts; ++i)
+            Opnds.push_back(
+                DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
+        }
+      }
+    }
+
+    assert(VT.getVectorNumElements() == Opnds.size() &&
+           "Concat vector type mismatch");
     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
   }
 
+  // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+  if (SDValue V = combineConcatVectorOfScalars(N, DAG))
+    return V;
+
   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
   // nodes often generate nop CONCAT_VECTOR nodes.
   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -11216,7 +12275,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     // type.
     if (V->getOperand(0).getValueType() != NVT)
       return SDValue();
-    unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    unsigned Idx = N->getConstantOperandVal(1);
     unsigned NumElems = NVT.getVectorNumElements();
     assert((Idx % NumElems) == 0 &&
            "IDX in concat is not a multiple of the result vector length.");
@@ -11347,7 +12406,8 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
 }
 
-// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
+// or turn a shuffle of a single concat into simpler shuffle then concat.
 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   EVT VT = N->getValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
@@ -11361,6 +12421,18 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
   unsigned NumConcats = NumElts / NumElemsPerConcat;
 
+  // Special case: shuffle(concat(A,B)) can be more efficiently represented
+  // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
+  // half vector elements.
+  if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF &&
+      std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
+                  SVN->getMask().end(), [](int i) { return i == -1; })) {
+    N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
+                              ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat));
+    N1 = DAG.getUNDEF(ConcatVT);
+    return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
+  }
+
   // Look at every vector that's inserted. We're looking for exact
   // subvector-sized copies from a concatenated vector
   for (unsigned I = 0; I != NumConcats; ++I) {
@@ -11459,7 +12531,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   }
 
   // If it is a splat, check if the argument vector is another splat or a
-  // build_vector with all scalar elements the same.
+  // build_vector.
   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
     SDNode *V = N0.getNode();
 
@@ -11496,6 +12568,18 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       // Splat of <x, x, x, x>, return <x, x, x, x>
       if (AllSame)
         return N0;
+
+      // Canonicalize any other splat as a build_vector.
+      const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
+      SmallVector<SDValue, 8> Ops(NumElts, Splatted);
+      SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+                                  V->getValueType(0), Ops);
+
+      // We may have jumped through bitcasts, so the type of the
+      // BUILD_VECTOR may not match the type of the shuffle.
+      if (V->getValueType(0) != VT)
+        NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
+      return NewBV;
     }
   }
 
@@ -11516,6 +12600,118 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       return V;
   }
 
+  // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+  // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+    SmallVector<SDValue, 8> Ops;
+    for (int M : SVN->getMask()) {
+      SDValue Op = DAG.getUNDEF(VT.getScalarType());
+      if (M >= 0) {
+        int Idx = M % NumElts;
+        SDValue &S = (M < (int)NumElts ? N0 : N1);
+        if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
+          Op = S.getOperand(Idx);
+        } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
+          if (Idx == 0)
+            Op = S.getOperand(0);
+        } else {
+          // Operand can't be combined - bail out.
+          break;
+        }
+      }
+      Ops.push_back(Op);
+    }
+    if (Ops.size() == VT.getVectorNumElements()) {
+      // BUILD_VECTOR requires all inputs to be of the same type, find the
+      // maximum type and extend them all.
+      EVT SVT = VT.getScalarType();
+      if (SVT.isInteger())
+        for (SDValue &Op : Ops)
+          SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+      if (SVT != VT.getScalarType())
+        for (SDValue &Op : Ops)
+          Op = TLI.isZExtFree(Op.getValueType(), SVT)
+                   ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
+                   : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
+      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
+    }
+  }
+
+  // If this shuffle only has a single input that is a bitcasted shuffle,
+  // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
+  // back to their original types.
+  if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+      N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
+      TLI.isTypeLegal(VT)) {
+
+    // Peek through the bitcast only if there is one user.
+    SDValue BC0 = N0;
+    while (BC0.getOpcode() == ISD::BITCAST) {
+      if (!BC0.hasOneUse())
+        break;
+      BC0 = BC0.getOperand(0);
+    }
+
+    auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
+      if (Scale == 1)
+        return SmallVector<int, 8>(Mask.begin(), Mask.end());
+
+      SmallVector<int, 8> NewMask;
+      for (int M : Mask)
+        for (int s = 0; s != Scale; ++s)
+          NewMask.push_back(M < 0 ? -1 : Scale * M + s);
+      return NewMask;
+    };
+
+    if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
+      EVT SVT = VT.getScalarType();
+      EVT InnerVT = BC0->getValueType(0);
+      EVT InnerSVT = InnerVT.getScalarType();
+
+      // Determine which shuffle works with the smaller scalar type.
+      EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
+      EVT ScaleSVT = ScaleVT.getScalarType();
+
+      if (TLI.isTypeLegal(ScaleVT) &&
+          0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
+          0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
+
+        int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+        int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+
+        // Scale the shuffle masks to the smaller scalar type.
+        ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
+        SmallVector<int, 8> InnerMask =
+            ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
+        SmallVector<int, 8> OuterMask =
+            ScaleShuffleMask(SVN->getMask(), OuterScale);
+
+        // Merge the shuffle masks.
+        SmallVector<int, 8> NewMask;
+        for (int M : OuterMask)
+          NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
+
+        // Test for shuffle mask legality over both commutations.
+        SDValue SV0 = BC0->getOperand(0);
+        SDValue SV1 = BC0->getOperand(1);
+        bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+        if (!LegalMask) {
+          std::swap(SV0, SV1);
+          ShuffleVectorSDNode::commuteMask(NewMask);
+          LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+        }
+
+        if (LegalMask) {
+          SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
+          SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
+          return DAG.getNode(
+              ISD::BITCAST, SDLoc(N), VT,
+              DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
+        }
+      }
+    }
+  }
+
   // Canonicalize shuffles according to rules:
   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
@@ -11543,8 +12739,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
   // Don't try to fold shuffles with illegal type.
-  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
-      TLI.isTypeLegal(VT)) {
+  // Only fold if this shuffle is the only user of the other shuffle.
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
+      Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
 
     // The incoming shuffle must be of the same type as the result of the
@@ -11624,20 +12821,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 
     // Avoid introducing shuffles with illegal mask.
     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
-      // Compute the commuted shuffle mask and test again.
-      for (unsigned i = 0; i != NumElts; ++i) {
-        int idx = Mask[i];
-        if (idx < 0)
-          continue;
-        else if (idx < (int)NumElts)
-          Mask[i] = idx + NumElts;
-        else
-          Mask[i] = idx - NumElts;
-      }
+      ShuffleVectorSDNode::commuteMask(Mask);
 
       if (!TLI.isShuffleMaskLegal(Mask, VT))
         return SDValue();
- 
+
       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
@@ -11653,6 +12841,34 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
+  SDValue InVal = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
+  // with a VECTOR_SHUFFLE.
+  if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+    SDValue InVec = InVal->getOperand(0);
+    SDValue EltNo = InVal->getOperand(1);
+
+    // FIXME: We could support implicit truncation if the shuffle can be
+    // scaled to a smaller vector scalar type.
+    ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
+    if (C0 && VT == InVec.getValueType() &&
+        VT.getScalarType() == InVal.getValueType()) {
+      SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
+      int Elt = C0->getZExtValue();
+      NewMask[0] = Elt;
+
+      if (TLI.isShuffleMaskLegal(NewMask, VT))
+        return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
+                                    NewMask);
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N2 = N->getOperand(2);
@@ -11680,48 +12896,64 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+
+  // fold (fp_to_fp16 (fp16_to_fp op)) -> op
+  if (N0->getOpcode() == ISD::FP16_TO_FP)
+    return N0->getOperand(0);
+
+  return SDValue();
+}
+
 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
 /// with the destination vector and a zero vector.
 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   EVT VT = N->getValueType(0);
-  SDLoc dl(N);
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
-  if (N->getOpcode() == ISD::AND) {
-    if (RHS.getOpcode() == ISD::BITCAST)
-      RHS = RHS.getOperand(0);
-    if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
-      SmallVector<int, 8> Indices;
-      unsigned NumElts = RHS.getNumOperands();
-      for (unsigned i = 0; i != NumElts; ++i) {
-        SDValue Elt = RHS.getOperand(i);
-        if (!isa<ConstantSDNode>(Elt))
-          return SDValue();
+  SDLoc dl(N);
 
-        if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
-          Indices.push_back(i);
-        else if (cast<ConstantSDNode>(Elt)->isNullValue())
-          Indices.push_back(NumElts+i);
-        else
-          return SDValue();
-      }
+  // Make sure we're not running after operation legalization where it 
+  // may have custom lowered the vector shuffles.
+  if (LegalOperations)
+    return SDValue();
 
-      // Let's see if the target supports this vector_shuffle.
-      EVT RVT = RHS.getValueType();
-      if (!TLI.isVectorClearMaskLegal(Indices, RVT))
-        return SDValue();
+  if (N->getOpcode() != ISD::AND)
+    return SDValue();
+
+  if (RHS.getOpcode() == ISD::BITCAST)
+    RHS = RHS.getOperand(0);
 
-      // Return the new VECTOR_SHUFFLE node.
-      EVT EltVT = RVT.getVectorElementType();
-      SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
-                                     DAG.getConstant(0, EltVT));
-      SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps);
-      LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
-      SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
-      return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
+  if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+    SmallVector<int, 8> Indices;
+    unsigned NumElts = RHS.getNumOperands();
+
+    for (unsigned i = 0; i != NumElts; ++i) {
+      SDValue Elt = RHS.getOperand(i);
+      if (isAllOnesConstant(Elt))
+        Indices.push_back(i);
+      else if (isNullConstant(Elt))
+        Indices.push_back(NumElts+i);
+      else
+        return SDValue();
     }
+
+    // Let's see if the target supports this vector_shuffle.
+    EVT RVT = RHS.getValueType();
+    if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+      return SDValue();
+
+    // Return the new VECTOR_SHUFFLE node.
+    EVT EltVT = RVT.getVectorElementType();
+    SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+                                   DAG.getConstant(0, dl, EltVT));
+    SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps);
+    LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
+    SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+    return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
   }
 
   return SDValue();
@@ -11734,8 +12966,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
 
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
-  SDValue Shuffle = XformToShuffleWithZero(N);
-  if (Shuffle.getNode()) return Shuffle;
+
+  if (SDValue Shuffle = XformToShuffleWithZero(N))
+    return Shuffle;
 
   // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
   // this operation.
@@ -11754,9 +12987,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
       // Can't fold divide by zero.
       if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
           N->getOpcode() == ISD::FDIV) {
-        if ((RHSOp.getOpcode() == ISD::Constant &&
-             cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
-            (RHSOp.getOpcode() == ISD::ConstantFP &&
+        if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP &&
              cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
           break;
       }
@@ -11813,38 +13044,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   return SDValue();
 }
 
-/// Visit a binary vector operation, like FABS/FNEG.
-SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
-  assert(N->getValueType(0).isVector() &&
-         "SimplifyVUnaryOp only works on vectors!");
-
-  SDValue N0 = N->getOperand(0);
-
-  if (N0.getOpcode() != ISD::BUILD_VECTOR)
-    return SDValue();
-
-  // Operand is a BUILD_VECTOR node, see if we can constant fold it.
-  SmallVector<SDValue, 8> Ops;
-  for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
-    SDValue Op = N0.getOperand(i);
-    if (Op.getOpcode() != ISD::UNDEF &&
-        Op.getOpcode() != ISD::ConstantFP)
-      break;
-    EVT EltVT = Op.getValueType();
-    SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op);
-    if (FoldOp.getOpcode() != ISD::UNDEF &&
-        FoldOp.getOpcode() != ISD::ConstantFP)
-      break;
-    Ops.push_back(FoldOp);
-    AddToWorklist(FoldOp.getNode());
-  }
-
-  if (Ops.size() != N0.getNumOperands())
-    return SDValue();
-
-  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops);
-}
-
 SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
                                     SDValue N1, SDValue N2){
   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
@@ -11881,6 +13080,38 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                                     SDValue RHS) {
 
+  // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
+  // The select + setcc is redundant, because fsqrt returns NaN for X < -0.
+  if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
+    if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
+      // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
+      SDValue Sqrt = RHS;
+      ISD::CondCode CC;
+      SDValue CmpLHS;
+      const ConstantFPSDNode *NegZero = nullptr;
+
+      if (TheSelect->getOpcode() == ISD::SELECT_CC) {
+        CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
+        CmpLHS = TheSelect->getOperand(0);
+        NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1));
+      } else {
+        // SELECT or VSELECT
+        SDValue Cmp = TheSelect->getOperand(0);
+        if (Cmp.getOpcode() == ISD::SETCC) {
+          CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
+          CmpLHS = Cmp.getOperand(0);
+          NegZero = isConstOrConstSplatFP(Cmp.getOperand(1));
+        }
+      }
+      if (NegZero && NegZero->isNegative() && NegZero->isZero() &&
+          Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
+          CC == ISD::SETULT || CC == ISD::SETLT)) {
+        // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
+        CombineTo(TheSelect, Sqrt);
+        return true;
+      }
+    }
+  }
   // Cannot simplify select with vector condition
   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
 
@@ -11902,6 +13133,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
     if (LHS.getOperand(0) != RHS.getOperand(0) ||
         // Do not let this transformation reduce the number of volatile loads.
         LLD->isVolatile() || RLD->isVolatile() ||
+        // FIXME: If either is a pre/post inc/dec load,
+        // we'd need to split out the address adjustment.
+        LLD->isIndexed() || RLD->isIndexed() ||
         // If this is an EXTLOAD, the VT's must match.
         LLD->getMemoryVT() != RLD->getMemoryVT() ||
         // If this is an EXTLOAD, the kind of extension must match.
@@ -12003,20 +13237,17 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
   EVT VT = N2.getValueType();
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
-  ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
 
   // Determine if the condition we're dealing with is constant
   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
                               N0, N1, CC, DL, false);
   if (SCC.getNode()) AddToWorklist(SCC.getNode());
-  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
 
-  // fold select_cc true, x, y -> x
-  if (SCCC && !SCCC->isNullValue())
-    return N2;
-  // fold select_cc false, x, y -> y
-  if (SCCC && SCCC->isNullValue())
-    return N3;
+  if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+    // fold select_cc true, x, y -> x
+    // fold select_cc false, x, y -> y
+    return !SCCC->isNullValue() ? N2 : N3;
+  }
 
   // Check to see if we can simplify the select into an fabs node
   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
@@ -12069,9 +13300,9 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
 
         // Get the offsets to the 0 and 1 element of the array so that we can
         // select between them.
-        SDValue Zero = DAG.getIntPtrConstant(0);
+        SDValue Zero = DAG.getIntPtrConstant(0, DL);
         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
-        SDValue One = DAG.getIntPtrConstant(EltSize);
+        SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
 
         SDValue Cond = DAG.getSetCC(DL,
                                     getSetCCResultType(N0.getValueType()),
@@ -12086,24 +13317,23 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
         return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
                            MachinePointerInfo::getConstantPool(), false,
                            false, false, Alignment);
-
       }
     }
 
   // Check to see if we can perform the "gzip trick", transforming
   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
-  if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
-      (N1C->isNullValue() ||                         // (a < 0) ? b : 0
-       (N1C->getAPIntValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
+  if (isNullConstant(N3) && CC == ISD::SETLT &&
+      (isNullConstant(N1) ||                 // (a < 0) ? b : 0
+       (isOneConstant(N1) && N0 == N2))) {   // (a < 1) ? a : 0
     EVT XType = N0.getValueType();
     EVT AType = N2.getValueType();
     if (XType.bitsGE(AType)) {
       // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
       // single-bit constant.
-      if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+      if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
         unsigned ShCtV = N2C->getAPIntValue().logBase2();
-        ShCtV = XType.getSizeInBits()-ShCtV-1;
-        SDValue ShCt = DAG.getConstant(ShCtV,
+        ShCtV = XType.getSizeInBits() - ShCtV - 1;
+        SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
                                        getShiftAmountTy(N0.getValueType()));
         SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
                                     XType, N0, ShCt);
@@ -12119,7 +13349,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
 
       SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
                                   XType, N0,
-                                  DAG.getConstant(XType.getSizeInBits()-1,
+                                  DAG.getConstant(XType.getSizeInBits() - 1,
+                                                  SDLoc(N0),
                                          getShiftAmountTy(N0.getValueType())));
       AddToWorklist(Shift.getNode());
 
@@ -12139,23 +13370,21 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
   // single bit-test can be materialized as an all-ones register with
   // shift-left and shift-right-arith.
   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
-      N0->getValueType(0) == VT &&
-      N1C && N1C->isNullValue() &&
-      N2C && N2C->isNullValue()) {
+      N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
     SDValue AndLHS = N0->getOperand(0);
     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
       // Shift the tested bit over the sign bit.
       APInt AndMask = ConstAndRHS->getAPIntValue();
       SDValue ShlAmt =
-        DAG.getConstant(AndMask.countLeadingZeros(),
+        DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
                         getShiftAmountTy(AndLHS.getValueType()));
       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
 
       // Now arithmetic right shift it all the way over, so the result is either
       // all-ones, or zero.
       SDValue ShrAmt =
-        DAG.getConstant(AndMask.getBitWidth()-1,
+        DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
                         getShiftAmountTy(Shl.getValueType()));
       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
 
@@ -12164,13 +13393,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
   }
 
   // fold select C, 16, 0 -> shl C, 4
-  if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
+  if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
       TLI.getBooleanContents(N0.getValueType()) ==
           TargetLowering::ZeroOrOneBooleanContent) {
 
     // If the caller doesn't want us to simplify this into a zext of a compare,
     // don't do it.
-    if (NotExtCompare && N2C->getAPIntValue() == 1)
+    if (NotExtCompare && N2C->isOne())
       return SDValue();
 
     // Get a SetCC of the condition
@@ -12198,13 +13427,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
       AddToWorklist(SCC.getNode());
       AddToWorklist(Temp.getNode());
 
-      if (N2C->getAPIntValue() == 1)
+      if (N2C->isOne())
         return Temp;
 
       // shl setcc result by log2 n2c
       return DAG.getNode(
           ISD::SHL, DL, N2.getValueType(), Temp,
-          DAG.getConstant(N2C->getAPIntValue().logBase2(),
+          DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
                           getShiftAmountTy(Temp.getValueType())));
     }
   }
@@ -12212,7 +13441,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
   // Check to see if this is the equivalent of setcc
   // FIXME: Turn all of these into setcc if setcc if setcc is legal
   // otherwise, go ahead with the folds.
-  if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
+  if (0 && isNullConstant(N3) && isOneConstant(N2)) {
     EVT XType = N0.getValueType();
     if (!LegalOperations ||
         TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) {
@@ -12223,30 +13452,34 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
     }
 
     // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
-    if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+    if (isNullConstant(N1) && CC == ISD::SETEQ &&
         (!LegalOperations ||
          TLI.isOperationLegal(ISD::CTLZ, XType))) {
       SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
       return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
                          DAG.getConstant(Log2_32(XType.getSizeInBits()),
+                                         SDLoc(Ctlz),
                                        getShiftAmountTy(Ctlz.getValueType())));
     }
     // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
-    if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
-      SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0),
-                                  XType, DAG.getConstant(0, XType), N0);
-      SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType);
+    if (isNullConstant(N1) && CC == ISD::SETGT) {
+      SDLoc DL(N0);
+      SDValue NegN0 = DAG.getNode(ISD::SUB, DL,
+                                  XType, DAG.getConstant(0, DL, XType), N0);
+      SDValue NotN0 = DAG.getNOT(DL, N0, XType);
       return DAG.getNode(ISD::SRL, DL, XType,
                          DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
-                         DAG.getConstant(XType.getSizeInBits()-1,
+                         DAG.getConstant(XType.getSizeInBits() - 1, DL,
                                          getShiftAmountTy(XType)));
     }
     // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
-    if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
-      SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0,
-                                 DAG.getConstant(XType.getSizeInBits()-1,
+    if (isAllOnesConstant(N1) && CC == ISD::SETGT) {
+      SDLoc DL(N0);
+      SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0,
+                                 DAG.getConstant(XType.getSizeInBits() - 1, DL,
                                          getShiftAmountTy(N0.getValueType())));
-      return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+      return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL,
+                                                                    XType));
     }
   }
 
@@ -12269,11 +13502,12 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
 
     EVT XType = N0.getValueType();
     if (SubC && SubC->isNullValue() && XType.isInteger()) {
-      SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType,
+      SDLoc DL(N0);
+      SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
                                   N0,
-                                  DAG.getConstant(XType.getSizeInBits()-1,
+                                  DAG.getConstant(XType.getSizeInBits() - 1, DL,
                                          getShiftAmountTy(N0.getValueType())));
-      SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0),
+      SDValue Add = DAG.getNode(ISD::ADD, DL,
                                 XType, N0, Shift);
       AddToWorklist(Shift.getNode());
       AddToWorklist(Add.getNode());
@@ -12303,7 +13537,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
     return SDValue();
 
   // Avoid division by zero.
-  if (!C->getAPIntValue())
+  if (C->isNullValue())
     return SDValue();
 
   std::vector<SDNode*> Built;
@@ -12323,7 +13557,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
     return SDValue();
 
   // Avoid division by zero.
-  if (!C->getAPIntValue())
+  if (C->isNullValue())
     return SDValue();
 
   std::vector<SDNode *> Built;
@@ -12344,7 +13578,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
     return SDValue();
 
   // Avoid division by zero.
-  if (!C->getAPIntValue())
+  if (C->isNullValue())
     return SDValue();
 
   std::vector<SDNode*> Built;
@@ -12374,7 +13608,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
       //     does not require additional intermediate precision]
       EVT VT = Op.getValueType();
       SDLoc DL(Op);
-      SDValue FPOne = DAG.getConstantFP(1.0, VT);
+      SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
 
       AddToWorklist(Est.getNode());
 
@@ -12409,7 +13643,7 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
                                           unsigned Iterations) {
   EVT VT = Arg.getValueType();
   SDLoc DL(Arg);
-  SDValue ThreeHalves = DAG.getConstantFP(1.5, VT);
+  SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
 
   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
   // this entire sequence requires only one FP constant.
@@ -12445,8 +13679,8 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
                                           unsigned Iterations) {
   EVT VT = Arg.getValueType();
   SDLoc DL(Arg);
-  SDValue MinusThree = DAG.getConstantFP(-3.0, VT);
-  SDValue MinusHalf = DAG.getConstantFP(-0.5, VT);
+  SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
+  SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
 
   // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
   for (unsigned i = 0; i < Iterations; ++i) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index c46539b..0351c33 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -44,6 +44,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -61,8 +62,8 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -424,7 +425,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
 
   // Check if the second operand is a constant and handle it appropriately.
   if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-    uint64_t Imm = CI->getZExtValue();
+    uint64_t Imm = CI->getSExtValue();
 
     // Transform "sdiv exact X, 8" -> "sra X, 3".
     if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
@@ -710,7 +711,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
   CallingConv::ID CC = I->getCallingConv();
   bool IsAnyRegCC = CC == CallingConv::AnyReg;
   bool HasDef = !I->getType()->isVoidTy();
-  Value *Callee = I->getOperand(PatchPointOpers::TargetPos);
+  Value *Callee = I->getOperand(PatchPointOpers::TargetPos)->stripPointerCasts();
 
   // Get the real number of arguments participating in the call <numArgs>
   assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) &&
@@ -756,23 +757,25 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
       cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
   Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
 
-  // Assume that the callee is a constant address or null pointer.
-  // FIXME: handle function symbols in the future.
-  uint64_t CalleeAddr;
-  if (const auto *C = dyn_cast<IntToPtrInst>(Callee))
-    CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue();
-  else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) {
-    if (C->getOpcode() == Instruction::IntToPtr)
-      CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue();
-    else
+  // Add the call target.
+  if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) {
+    uint64_t CalleeConstAddr =
+      cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+    Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr));
+  } else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) {
+    if (C->getOpcode() == Instruction::IntToPtr) {
+      uint64_t CalleeConstAddr =
+        cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+      Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr));
+    } else
       llvm_unreachable("Unsupported ConstantExpr.");
+  } else if (const auto *GV = dyn_cast<GlobalValue>(Callee)) {
+    Ops.push_back(MachineOperand::CreateGA(GV, 0));
   } else if (isa<ConstantPointerNull>(Callee))
-    CalleeAddr = 0;
+    Ops.push_back(MachineOperand::CreateImm(0));
   else
     llvm_unreachable("Unsupported callee address.");
 
-  Ops.push_back(MachineOperand::CreateImm(CalleeAddr));
-
   // Adjust <numArgs> to account for any arguments that have been passed on
   // the stack instead.
   unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size();
@@ -801,7 +804,8 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
     return false;
 
   // Push the register mask info.
-  Ops.push_back(MachineOperand::CreateRegMask(TRI.getCallPreservedMask(CC)));
+  Ops.push_back(MachineOperand::CreateRegMask(
+      TRI.getCallPreservedMask(*FuncInfo.MF, CC)));
 
   // Add scratch registers as implicit def and early clobber.
   const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
@@ -1077,12 +1081,17 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
   // The donothing intrinsic does, well, nothing.
   case Intrinsic::donothing:
     return true;
+  case Intrinsic::eh_actions: {
+    unsigned ResultReg = getRegForValue(UndefValue::get(II->getType()));
+    if (!ResultReg)
+      return false;
+    updateValueMap(II, ResultReg);
+    return true;
+  }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
-    DIVariable DIVar(DI->getVariable());
-    assert((!DIVar || DIVar.isVariable()) &&
-           "Variable in DbgDeclareInst should be either null or a DIVariable.");
-    if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) {
+    assert(DI->getVariable() && "Missing variable");
+    if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
       DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
       return true;
     }
@@ -1122,6 +1131,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
                                      false);
 
     if (Op) {
+      assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+             "Expected inlined-at fields to agree");
       if (Op->isReg()) {
         Op->setIsDebug(true);
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1146,6 +1157,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
     const DbgValueInst *DI = cast<DbgValueInst>(II);
     const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
     const Value *V = DI->getValue();
+    assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+           "Expected inlined-at fields to agree");
     if (!V) {
       // Currently the optimizer can produce this; insert an undef to
       // help debugging.  Probably the optimizer should not do this.
@@ -1580,7 +1593,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
                    bool SkipTargetIndependentISel)
     : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
       MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
-      TM(FuncInfo.MF->getTarget()), DL(*MF->getSubtarget().getDataLayout()),
+      TM(FuncInfo.MF->getTarget()), DL(*TM.getDataLayout()),
       TII(*MF->getSubtarget().getInstrInfo()),
       TLI(*MF->getSubtarget().getTargetLowering()),
       TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
@@ -1662,6 +1675,7 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
   if (ResultReg)
     return ResultReg;
   unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+  bool IsImmKill = true;
   if (!MaterialReg) {
     // This is a bit ugly/slow, but failing here means falling out of
     // fast-isel, which would be very slow.
@@ -1670,9 +1684,15 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
     MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
     if (!MaterialReg)
       return 0;
+    // FIXME: If the materialized register here has no uses yet then this
+    // will be the first use and we should be able to mark it as killed.
+    // However, the local value area for materialising constant expressions
+    // grows down, not up, which means that any constant expressions we generate
+    // later which also use 'Imm' could be after this instruction and therefore
+    // after this kill.
+    IsImmKill = false;
   }
-  return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg,
-                     /*IsKill=*/true);
+  return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill);
 }
 
 unsigned FastISel::createResultReg(const TargetRegisterClass *RC) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 19aca6e..7b5b8c4 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -31,6 +32,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -78,12 +80,40 @@ static ISD::NodeType getPreferredExtendForValue(const Value *V) {
   return ExtendKind;
 }
 
+namespace {
+struct WinEHNumbering {
+  WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo),
+      CurrentBaseState(-1), NextState(0) {}
+
+  WinEHFuncInfo &FuncInfo;
+  int CurrentBaseState;
+  int NextState;
+
+  SmallVector<std::unique_ptr<ActionHandler>, 4> HandlerStack;
+  SmallPtrSet<const Function *, 4> VisitedHandlers;
+
+  int currentEHNumber() const {
+    return HandlerStack.empty() ? CurrentBaseState : HandlerStack.back()->getEHState();
+  }
+
+  void createUnwindMapEntry(int ToState, ActionHandler *AH);
+  void createTryBlockMapEntry(int TryLow, int TryHigh,
+                              ArrayRef<CatchHandler *> Handlers);
+  void processCallSite(MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
+                       ImmutableCallSite CS);
+  void popUnmatchedActions(int FirstMismatch);
+  void calculateStateNumbers(const Function &F);
+  void findActionRootLPads(const Function &F);
+};
+}
+
 void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                                SelectionDAG *DAG) {
   Fn = &fn;
   MF = &mf;
   TLI = MF->getSubtarget().getTargetLowering();
   RegInfo = &MF->getRegInfo();
+  MachineModuleInfo &MMI = MF->getMMI();
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
@@ -133,16 +163,17 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
         ImmutableCallSite CS(I);
         if (isa<InlineAsm>(CS.getCalledValue())) {
           unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+          const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
           std::vector<TargetLowering::AsmOperandInfo> Ops =
-            TLI->ParseConstraints(CS);
+              TLI->ParseConstraints(TRI, CS);
           for (size_t I = 0, E = Ops.size(); I != E; ++I) {
             TargetLowering::AsmOperandInfo &Op = Ops[I];
             if (Op.Type == InlineAsm::isClobber) {
               // Clobbers don't have SDValue operands, hence SDValue().
               TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
               std::pair<unsigned, const TargetRegisterClass *> PhysReg =
-                  TLI->getRegForInlineAsmConstraint(Op.ConstraintCode,
-                                                   Op.ConstraintVT);
+                  TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode,
+                                                    Op.ConstraintVT);
               if (PhysReg.first == SP)
                 MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true);
             }
@@ -176,13 +207,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
       // during the initial isel pass through the IR so that it is done
       // in a predictable order.
       if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
-        MachineModuleInfo &MMI = MF->getMMI();
-        DIVariable DIVar(DI->getVariable());
-        assert((!DIVar || DIVar.isVariable()) &&
-          "Variable in DbgDeclareInst should be either null or a DIVariable.");
-        if (MMI.hasDebugInfo() &&
-            DIVar &&
-            !DI->getDebugLoc().isUnknown()) {
+        assert(DI->getVariable() && "Missing variable");
+        assert(DI->getDebugLoc() && "Missing location");
+        if (MMI.hasDebugInfo()) {
           // Don't handle byval struct arguments or VLAs, for example.
           // Non-byval arguments are handled here (they refer to the stack
           // temporary alloca at this point).
@@ -249,9 +276,414 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
   }
 
   // Mark landing pad blocks.
-  for (BB = Fn->begin(); BB != EB; ++BB)
-    if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
+  SmallVector<const LandingPadInst *, 4> LPads;
+  for (BB = Fn->begin(); BB != EB; ++BB) {
+    if (const auto *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
       MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+    if (BB->isLandingPad())
+      LPads.push_back(BB->getLandingPadInst());
+  }
+
+  // If this is an MSVC EH personality, we need to do a bit more work.
+  EHPersonality Personality = EHPersonality::Unknown;
+  if (!LPads.empty())
+    Personality = classifyEHPersonality(LPads.back()->getPersonalityFn());
+  if (!isMSVCEHPersonality(Personality))
+    return;
+
+  WinEHFuncInfo *EHInfo = nullptr;
+  if (Personality == EHPersonality::MSVC_Win64SEH) {
+    addSEHHandlersForLPads(LPads);
+  } else if (Personality == EHPersonality::MSVC_CXX) {
+    const Function *WinEHParentFn = MMI.getWinEHParent(&fn);
+    EHInfo = &MMI.getWinEHFuncInfo(WinEHParentFn);
+    if (EHInfo->LandingPadStateMap.empty()) {
+      WinEHNumbering Num(*EHInfo);
+      Num.findActionRootLPads(*WinEHParentFn);
+      // The VisitedHandlers list is used by both findActionRootLPads and
+      // calculateStateNumbers, but both functions need to visit all handlers.
+      Num.VisitedHandlers.clear();
+      Num.calculateStateNumbers(*WinEHParentFn);
+      // Pop everything on the handler stack.
+      // It may be necessary to call this more than once because a handler can
+      // be pushed on the stack as a result of clearing the stack.
+      while (!Num.HandlerStack.empty())
+        Num.processCallSite(None, ImmutableCallSite());
+    }
+
+    // Copy the state numbers to LandingPadInfo for the current function, which
+    // could be a handler or the parent.
+    for (const LandingPadInst *LP : LPads) {
+      MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
+      MMI.addWinEHState(LPadMBB, EHInfo->LandingPadStateMap[LP]);
+    }
+  }
+}
+
+void FunctionLoweringInfo::addSEHHandlersForLPads(
+    ArrayRef<const LandingPadInst *> LPads) {
+  MachineModuleInfo &MMI = MF->getMMI();
+
+  // Iterate over all landing pads with llvm.eh.actions calls.
+  for (const LandingPadInst *LP : LPads) {
+    const IntrinsicInst *ActionsCall =
+        dyn_cast<IntrinsicInst>(LP->getNextNode());
+    if (!ActionsCall ||
+        ActionsCall->getIntrinsicID() != Intrinsic::eh_actions)
+      continue;
+
+    // Parse the llvm.eh.actions call we found.
+    MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
+    SmallVector<std::unique_ptr<ActionHandler>, 4> Actions;
+    parseEHActions(ActionsCall, Actions);
+
+    // Iterate EH actions from most to least precedence, which means
+    // iterating in reverse.
+    for (auto I = Actions.rbegin(), E = Actions.rend(); I != E; ++I) {
+      ActionHandler *Action = I->get();
+      if (auto *CH = dyn_cast<CatchHandler>(Action)) {
+        const auto *Filter =
+            dyn_cast<Function>(CH->getSelector()->stripPointerCasts());
+        assert((Filter || CH->getSelector()->isNullValue()) &&
+               "expected function or catch-all");
+        const auto *RecoverBA =
+            cast<BlockAddress>(CH->getHandlerBlockOrFunc());
+        MMI.addSEHCatchHandler(LPadMBB, Filter, RecoverBA);
+      } else {
+        assert(isa<CleanupHandler>(Action));
+        const auto *Fini = cast<Function>(Action->getHandlerBlockOrFunc());
+        MMI.addSEHCleanupHandler(LPadMBB, Fini);
+      }
+    }
+  }
+}
+
+void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) {
+  WinEHUnwindMapEntry UME;
+  UME.ToState = ToState;
+  if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH))
+    UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc());
+  else
+    UME.Cleanup = nullptr;
+  FuncInfo.UnwindMap.push_back(UME);
+}
+
+void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh,
+                                            ArrayRef<CatchHandler *> Handlers) {
+  // See if we already have an entry for this set of handlers.
+  // This is using iterators rather than a range-based for loop because
+  // if we find the entry we're looking for we'll need the iterator to erase it.
+  int NumHandlers = Handlers.size();
+  auto I = FuncInfo.TryBlockMap.begin();
+  auto E = FuncInfo.TryBlockMap.end();
+  for ( ; I != E; ++I) {
+    auto &Entry = *I;
+    if (Entry.HandlerArray.size() != (size_t)NumHandlers)
+      continue;
+    int N;
+    for (N = 0; N < NumHandlers; ++N) {
+      if (Entry.HandlerArray[N].Handler != Handlers[N]->getHandlerBlockOrFunc())
+        break; // breaks out of inner loop
+    }
+    // If all the handlers match, this is what we were looking for.
+    if (N == NumHandlers) {
+      break;
+    }
+  }
+
+  // If we found an existing entry for this set of handlers, extend the range
+  // but move the entry to the end of the map vector.  The order of entries
+  // in the map is critical to the way that the runtime finds handlers.
+  // FIXME: Depending on what has happened with block ordering, this may
+  //        incorrectly combine entries that should remain separate.
+  if (I != E) {
+    // Copy the existing entry.
+    WinEHTryBlockMapEntry Entry = *I;
+    Entry.TryLow = std::min(TryLow, Entry.TryLow);
+    Entry.TryHigh = std::max(TryHigh, Entry.TryHigh);
+    assert(Entry.TryLow <= Entry.TryHigh);
+    // Erase the old entry and add this one to the back.
+    FuncInfo.TryBlockMap.erase(I);
+    FuncInfo.TryBlockMap.push_back(Entry);
+    return;
+  }
+
+  // If we didn't find an entry, create a new one.
+  WinEHTryBlockMapEntry TBME;
+  TBME.TryLow = TryLow;
+  TBME.TryHigh = TryHigh;
+  assert(TBME.TryLow <= TBME.TryHigh);
+  for (CatchHandler *CH : Handlers) {
+    WinEHHandlerType HT;
+    if (CH->getSelector()->isNullValue()) {
+      HT.Adjectives = 0x40;
+      HT.TypeDescriptor = nullptr;
+    } else {
+      auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts());
+      // Selectors are always pointers to GlobalVariables with 'struct' type.
+      // The struct has two fields, adjectives and a type descriptor.
+      auto *CS = cast<ConstantStruct>(GV->getInitializer());
+      HT.Adjectives =
+          cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue();
+      HT.TypeDescriptor =
+          cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts());
+    }
+    HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc());
+    HT.CatchObjRecoverIdx = CH->getExceptionVarIndex();
+    TBME.HandlerArray.push_back(HT);
+  }
+  FuncInfo.TryBlockMap.push_back(TBME);
+}
+
+static void print_name(const Value *V) {
+#ifndef NDEBUG
+  if (!V) {
+    DEBUG(dbgs() << "null");
+    return;
+  }
+
+  if (const auto *F = dyn_cast<Function>(V))
+    DEBUG(dbgs() << F->getName());
+  else
+    DEBUG(V->dump());
+#endif
+}
+
+void WinEHNumbering::processCallSite(
+    MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
+    ImmutableCallSite CS) {
+  DEBUG(dbgs() << "processCallSite (EH state = " << currentEHNumber()
+               << ") for: ");
+  print_name(CS ? CS.getCalledValue() : nullptr);
+  DEBUG(dbgs() << '\n');
+
+  DEBUG(dbgs() << "HandlerStack: \n");
+  for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
+    DEBUG(dbgs() << "  ");
+    print_name(HandlerStack[I]->getHandlerBlockOrFunc());
+    DEBUG(dbgs() << '\n');
+  }
+  DEBUG(dbgs() << "Actions: \n");
+  for (int I = 0, E = Actions.size(); I < E; ++I) {
+    DEBUG(dbgs() << "  ");
+    print_name(Actions[I]->getHandlerBlockOrFunc());
+    DEBUG(dbgs() << '\n');
+  }
+  int FirstMismatch = 0;
+  for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E;
+       ++FirstMismatch) {
+    if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() !=
+        Actions[FirstMismatch]->getHandlerBlockOrFunc())
+      break;
+  }
+
+  // Remove unmatched actions from the stack and process their EH states.
+  popUnmatchedActions(FirstMismatch);
+
+  DEBUG(dbgs() << "Pushing actions for CallSite: ");
+  print_name(CS ? CS.getCalledValue() : nullptr);
+  DEBUG(dbgs() << '\n');
+
+  bool LastActionWasCatch = false;
+  const LandingPadInst *LastRootLPad = nullptr;
+  for (size_t I = FirstMismatch; I != Actions.size(); ++I) {
+    // We can reuse eh states when pushing two catches for the same invoke.
+    bool CurrActionIsCatch = isa<CatchHandler>(Actions[I].get());
+    auto *Handler = cast<Function>(Actions[I]->getHandlerBlockOrFunc());
+    // Various conditions can lead to a handler being popped from the
+    // stack and re-pushed later.  That shouldn't create a new state.
+    // FIXME: Can code optimization lead to re-used handlers?
+    if (FuncInfo.HandlerEnclosedState.count(Handler)) {
+      // If we already assigned the state enclosed by this handler re-use it.
+      Actions[I]->setEHState(FuncInfo.HandlerEnclosedState[Handler]);
+      continue;
+    }
+    const LandingPadInst* RootLPad = FuncInfo.RootLPad[Handler];
+    if (CurrActionIsCatch && LastActionWasCatch && RootLPad == LastRootLPad) {
+      DEBUG(dbgs() << "setEHState for handler to " << currentEHNumber() << "\n");
+      Actions[I]->setEHState(currentEHNumber());
+    } else {
+      DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() << ", ");
+      print_name(Actions[I]->getHandlerBlockOrFunc());
+      DEBUG(dbgs() << ") with EH state " << NextState << "\n");
+      createUnwindMapEntry(currentEHNumber(), Actions[I].get());
+      DEBUG(dbgs() << "setEHState for handler to " << NextState << "\n");
+      Actions[I]->setEHState(NextState);
+      NextState++;
+    }
+    HandlerStack.push_back(std::move(Actions[I]));
+    LastActionWasCatch = CurrActionIsCatch;
+    LastRootLPad = RootLPad;
+  }
+
+  // This is used to defer numbering states for a handler until after the
+  // last time it appears in an invoke action list.
+  if (CS.isInvoke()) {
+    for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
+      auto *Handler = cast<Function>(HandlerStack[I]->getHandlerBlockOrFunc());
+      if (FuncInfo.LastInvoke[Handler] != cast<InvokeInst>(CS.getInstruction()))
+        continue;
+      FuncInfo.LastInvokeVisited[Handler] = true;
+      DEBUG(dbgs() << "Last invoke of ");
+      print_name(Handler);
+      DEBUG(dbgs() << " has been visited.\n");
+    }
+  }
+
+  DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: ");
+  print_name(CS ? CS.getCalledValue() : nullptr);
+  DEBUG(dbgs() << '\n');
+}
+
+void WinEHNumbering::popUnmatchedActions(int FirstMismatch) {
+  // Don't recurse while we are looping over the handler stack.  Instead, defer
+  // the numbering of the catch handlers until we are done popping.
+  SmallVector<CatchHandler *, 4> PoppedCatches;
+  for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) {
+    std::unique_ptr<ActionHandler> Handler = HandlerStack.pop_back_val();
+    if (isa<CatchHandler>(Handler.get()))
+      PoppedCatches.push_back(cast<CatchHandler>(Handler.release()));
+  }
+
+  int TryHigh = NextState - 1;
+  int LastTryLowIdx = 0;
+  for (int I = 0, E = PoppedCatches.size(); I != E; ++I) {
+    CatchHandler *CH = PoppedCatches[I];
+    DEBUG(dbgs() << "Popped handler with state " << CH->getEHState() << "\n");
+    if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) {
+      int TryLow = CH->getEHState();
+      auto Handlers =
+          makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1);
+      DEBUG(dbgs() << "createTryBlockMapEntry(" << TryLow << ", " << TryHigh);
+      for (size_t J = 0; J < Handlers.size(); ++J) {
+        DEBUG(dbgs() << ", ");
+        print_name(Handlers[J]->getHandlerBlockOrFunc());
+      }
+      DEBUG(dbgs() << ")\n");
+      createTryBlockMapEntry(TryLow, TryHigh, Handlers);
+      LastTryLowIdx = I + 1;
+    }
+  }
+
+  for (CatchHandler *CH : PoppedCatches) {
+    if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc())) {
+      if (FuncInfo.LastInvokeVisited[F]) {
+        DEBUG(dbgs() << "Assigning base state " << NextState << " to ");
+        print_name(F);
+        DEBUG(dbgs() << '\n');
+        FuncInfo.HandlerBaseState[F] = NextState;
+        DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() 
+                     << ", null)\n");
+        createUnwindMapEntry(currentEHNumber(), nullptr);
+        ++NextState;
+        calculateStateNumbers(*F);
+      }
+      else {
+        DEBUG(dbgs() << "Deferring handling of ");
+        print_name(F);
+        DEBUG(dbgs() << " until last invoke visited.\n");
+      }
+    }
+    delete CH;
+  }
+}
+
+void WinEHNumbering::calculateStateNumbers(const Function &F) {
+  auto I = VisitedHandlers.insert(&F);
+  if (!I.second)
+    return; // We've already visited this handler, don't renumber it.
+
+  int OldBaseState = CurrentBaseState;
+  if (FuncInfo.HandlerBaseState.count(&F)) {
+    CurrentBaseState = FuncInfo.HandlerBaseState[&F];
+  }
+
+  size_t SavedHandlerStackSize = HandlerStack.size();
+
+  DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n');
+  SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+  for (const BasicBlock &BB : F) {
+    for (const Instruction &I : BB) {
+      const auto *CI = dyn_cast<CallInst>(&I);
+      if (!CI || CI->doesNotThrow())
+        continue;
+      processCallSite(None, CI);
+    }
+    const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+    if (!II)
+      continue;
+    const LandingPadInst *LPI = II->getLandingPadInst();
+    auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
+    if (!ActionsCall)
+      continue;
+    assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
+    parseEHActions(ActionsCall, ActionList);
+    if (ActionList.empty())
+      continue;
+    processCallSite(ActionList, II);
+    ActionList.clear();
+    FuncInfo.LandingPadStateMap[LPI] = currentEHNumber();
+    DEBUG(dbgs() << "Assigning state " << currentEHNumber()
+                  << " to landing pad at " << LPI->getParent()->getName()
+                  << '\n');
+  }
+
+  // Pop any actions that were pushed on the stack for this function.
+  popUnmatchedActions(SavedHandlerStackSize);
+
+  DEBUG(dbgs() << "Assigning max state " << NextState - 1
+               << " to " << F.getName() << '\n');
+  FuncInfo.CatchHandlerMaxState[&F] = NextState - 1;
+
+  CurrentBaseState = OldBaseState;
+}
+
+// This function follows the same basic traversal as calculateStateNumbers
+// but it is necessary to identify the root landing pad associated
+// with each action before we start assigning state numbers.
+void WinEHNumbering::findActionRootLPads(const Function &F) {
+  auto I = VisitedHandlers.insert(&F);
+  if (!I.second)
+    return; // We've already visited this handler, don't revisit it.
+
+  SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+  for (const BasicBlock &BB : F) {
+    const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+    if (!II)
+      continue;
+    const LandingPadInst *LPI = II->getLandingPadInst();
+    auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
+    if (!ActionsCall)
+      continue;
+
+    assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
+    parseEHActions(ActionsCall, ActionList);
+    if (ActionList.empty())
+      continue;
+    for (int I = 0, E = ActionList.size(); I < E; ++I) {
+      if (auto *Handler
+              = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) {
+        FuncInfo.LastInvoke[Handler] = II;
+        // Don't replace the root landing pad if we previously saw this
+        // handler in a different function.
+        if (FuncInfo.RootLPad.count(Handler) &&
+            FuncInfo.RootLPad[Handler]->getParent()->getParent() != &F)
+          continue;
+        DEBUG(dbgs() << "Setting root lpad for ");
+        print_name(Handler);
+        DEBUG(dbgs() << " to " << LPI->getParent()->getName() << '\n');
+        FuncInfo.RootLPad[Handler] = LPI;
+      }
+    }
+    // Walk the actions again and look for nested handlers.  This has to
+    // happen after all of the actions have been processed in the current
+    // function.
+    for (int I = 0, E = ActionList.size(); I < E; ++I)
+      if (auto *Handler
+              = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc()))
+        findActionRootLPads(*Handler);
+    ActionList.clear();
+  }
 }
 
 /// clear - Clear out all the function-specific state. This returns this
@@ -274,6 +706,7 @@ void FunctionLoweringInfo::clear() {
   ByValArgFrameIndexMap.clear();
   RegFixups.clear();
   StatepointStackSlots.clear();
+  StatepointRelocatedValues.clear();
   PreferredExtendType.clear();
 }
 
@@ -460,8 +893,7 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
   if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
     for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
       Type* T = I.getArgOperand(i)->getType();
-      for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
-           i != e; ++i) {
+      for (auto i : post_order(T)) {
         if (i->isFloatingPointTy()) {
           MMI->setUsesVAFloatArgument(true);
           return;
@@ -471,60 +903,6 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
   }
 }
 
-/// AddCatchInfo - Extract the personality and type infos from an eh.selector
-/// call, and add them to the specified machine basic block.
-void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
-                        MachineBasicBlock *MBB) {
-  // Inform the MachineModuleInfo of the personality for this landing pad.
-  const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1));
-  assert(CE->getOpcode() == Instruction::BitCast &&
-         isa<Function>(CE->getOperand(0)) &&
-         "Personality should be a function");
-  MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
-
-  // Gather all the type infos for this landing pad and pass them along to
-  // MachineModuleInfo.
-  std::vector<const GlobalValue *> TyInfo;
-  unsigned N = I.getNumArgOperands();
-
-  for (unsigned i = N - 1; i > 1; --i) {
-    if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) {
-      unsigned FilterLength = CI->getZExtValue();
-      unsigned FirstCatch = i + FilterLength + !FilterLength;
-      assert(FirstCatch <= N && "Invalid filter length");
-
-      if (FirstCatch < N) {
-        TyInfo.reserve(N - FirstCatch);
-        for (unsigned j = FirstCatch; j < N; ++j)
-          TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
-        MMI->addCatchTypeInfo(MBB, TyInfo);
-        TyInfo.clear();
-      }
-
-      if (!FilterLength) {
-        // Cleanup.
-        MMI->addCleanup(MBB);
-      } else {
-        // Filter.
-        TyInfo.reserve(FilterLength - 1);
-        for (unsigned j = i + 1; j < FirstCatch; ++j)
-          TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
-        MMI->addFilterTypeInfo(MBB, TyInfo);
-        TyInfo.clear();
-      }
-
-      N = i;
-    }
-  }
-
-  if (N > 2) {
-    TyInfo.reserve(N - 2);
-    for (unsigned j = 2; j < N; ++j)
-      TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
-    MMI->addCatchTypeInfo(MBB, TyInfo);
-  }
-}
-
 /// AddLandingPadInfo - Extract the exception handling information from the
 /// landingpad instruction and add them to the specified machine module info.
 void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index a65f33e..7abc0c4 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -406,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
     Type *Type = CP->getType();
     // MachineConstantPool wants an explicit alignment.
     if (Align == 0) {
-      Align = MF->getSubtarget().getDataLayout()->getPrefTypeAlignment(Type);
+      Align = MF->getTarget().getDataLayout()->getPrefTypeAlignment(Type);
       if (Align == 0) {
         // Alignment of vector types.  FIXME!
-        Align = MF->getSubtarget().getDataLayout()->getTypeAllocSize(Type);
+        Align = MF->getTarget().getDataLayout()->getTypeAllocSize(Type);
       }
     }
 
@@ -650,6 +650,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
   MDNode *Var = SD->getVariable();
   MDNode *Expr = SD->getExpression();
   DebugLoc DL = SD->getDebugLoc();
+  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
 
   if (SD->getKind() == SDDbgValue::FRAMEIX) {
     // Stack address; this needs to be lowered in target-dependent fashion.
@@ -951,6 +953,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     // Remember to operand index of the group flags.
     SmallVector<unsigned, 8> GroupIdx;
 
+    // Remember registers that are part of early-clobber defs.
+    SmallVector<unsigned, 8> ECRegs;
+
     // Add all of the operand registers to the instruction.
     for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
       unsigned Flags =
@@ -979,6 +984,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
           MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
                   getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+          ECRegs.push_back(Reg);
         }
         break;
       case InlineAsm::Kind_RegUse:  // Use of register.
@@ -1004,6 +1010,19 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
       }
     }
 
+    // GCC inline assembly allows input operands to also be early-clobber
+    // output operands (so long as the operand is written only after it's
+    // used), but this does not match the semantics of our early-clobber flag.
+    // If an early-clobber operand register is also an input operand register,
+    // then remove the early-clobber flag.
+    for (unsigned Reg : ECRegs) {
+      if (MIB->readsRegister(Reg, TRI)) {
+        MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, TRI);
+        assert(MO && "No def operand for clobbered register?");
+        MO->setIsEarlyClobber(false);
+      }
+    }
+
     // Get the mdnode from the asm if it exists and add it to the instruction.
     SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
     const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index e5473e3..7d98872 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -249,7 +249,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
   ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
   if (!UseCP) {
     assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
-    return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(),
+    return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), dl,
                            (VT == MVT::f64) ? MVT::i64 : MVT::i32);
   }
 
@@ -331,7 +331,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
     SDValue Store = DAG.getTruncStore(Chain, dl,
                                       Val, StackPtr, MachinePointerInfo(),
                                       StoredVT, false, false, 0);
-    SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy(AS));
+    SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy(AS));
     SmallVector<SDValue, 8> Stores;
     unsigned Offset = 0;
 
@@ -385,7 +385,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
   int IncrementSize = NumBits / 8;
 
   // Divide the stored value in two parts.
-  SDValue ShiftAmount = DAG.getConstant(NumBits,
+  SDValue ShiftAmount = DAG.getConstant(NumBits, dl,
                                       TLI.getShiftAmountTy(Val.getValueType()));
   SDValue Lo = Val;
   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
@@ -397,7 +397,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
                              ST->isVolatile(), ST->isNonTemporal(), Alignment);
 
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, TLI.getPointerTy(AS)));
+                    DAG.getConstant(IncrementSize, dl, TLI.getPointerTy(AS)));
   Alignment = MinAlign(Alignment, IncrementSize);
   Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
                              ST->getPointerInfo().getWithOffset(IncrementSize),
@@ -448,7 +448,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
     // Make sure the stack slot is also aligned for the register type.
     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
 
-    SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+    SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy());
     SmallVector<SDValue, 8> Stores;
     SDValue StackPtr = StackBase;
     unsigned Offset = 0;
@@ -528,7 +528,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
                         LD->isNonTemporal(), LD->isInvariant(), Alignment,
                         LD->getAAInfo());
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
                         LD->getPointerInfo().getWithOffset(IncrementSize),
                         NewLoadedVT, LD->isVolatile(),
@@ -540,7 +540,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
                         LD->isNonTemporal(), LD->isInvariant(), Alignment,
                         LD->getAAInfo());
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
                         LD->getPointerInfo().getWithOffset(IncrementSize),
                         NewLoadedVT, LD->isVolatile(),
@@ -549,7 +549,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   }
 
   // aggregate the two parts
-  SDValue ShiftAmount = DAG.getConstant(NumBits,
+  SDValue ShiftAmount = DAG.getConstant(NumBits, dl,
                                        TLI.getShiftAmountTy(Hi.getValueType()));
   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
@@ -596,7 +596,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
   Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
   // Add the offset to the index.
   unsigned EltSize = EltVT.getSizeInBits()/8;
-  Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+  Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,
+                     DAG.getConstant(EltSize, dl, IdxVT));
   SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
   // Store the scalar value.
   Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
@@ -655,7 +656,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
         TLI.isTypeLegal(MVT::i32)) {
       SDValue Con = DAG.getConstant(CFP->getValueAPF().
                                       bitcastToAPInt().zextOrTrunc(32),
-                              MVT::i32);
+                                    SDLoc(CFP), MVT::i32);
       return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
                           isVolatile, isNonTemporal, Alignment, AAInfo);
     }
@@ -664,7 +665,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
       // If this target supports 64-bit registers, do a single 64-bit store.
       if (TLI.isTypeLegal(MVT::i64)) {
         SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
-                                  zextOrTrunc(64), MVT::i64);
+                                      zextOrTrunc(64), SDLoc(CFP), MVT::i64);
         return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
                             isVolatile, isNonTemporal, Alignment, AAInfo);
       }
@@ -673,15 +674,15 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
         // Otherwise, if the target supports 32-bit registers, use 2 32-bit
         // stores.  If the target supports neither 32- nor 64-bits, this
         // xform is certainly not worth it.
-        const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
-        SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);
-        SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
+        const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt();
+        SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32);
+        SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32);
         if (TLI.isBigEndian()) std::swap(Lo, Hi);
 
         Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
                           isNonTemporal, Alignment, AAInfo);
         Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                          DAG.getConstant(4, Ptr.getValueType()));
+                          DAG.getConstant(4, dl, Ptr.getValueType()));
         Hi = DAG.getStore(Chain, dl, Hi, Ptr,
                           ST->getPointerInfo().getWithOffset(4),
                           isVolatile, isNonTemporal, MinAlign(Alignment, 4U),
@@ -731,7 +732,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
         }
         case TargetLowering::Custom: {
           SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
-          if (Res.getNode())
+          if (Res && Res != SDValue(Node, 0))
             ReplaceNode(SDValue(Node, 0), Res);
           return;
         }
@@ -792,9 +793,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           // Store the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
           Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                            DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                            DAG.getConstant(IncrementSize, dl,
+                                            Ptr.getValueType()));
           Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
-                           DAG.getConstant(RoundWidth,
+                           DAG.getConstant(RoundWidth, dl,
                                    TLI.getShiftAmountTy(Value.getValueType())));
           Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
                              ST->getPointerInfo().getWithOffset(IncrementSize),
@@ -805,7 +807,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
           // Store the top RoundWidth bits.
           Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
-                           DAG.getConstant(ExtraWidth,
+                           DAG.getConstant(ExtraWidth, dl,
                                    TLI.getShiftAmountTy(Value.getValueType())));
           Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
                                  RoundVT, isVolatile, isNonTemporal, Alignment,
@@ -814,7 +816,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           // Store the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
           Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                            DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                            DAG.getConstant(IncrementSize, dl,
+                                            Ptr.getValueType()));
           Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
                               ST->getPointerInfo().getWithOffset(IncrementSize),
                                  ExtraVT, isVolatile, isNonTemporal,
@@ -843,7 +846,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
         }
         case TargetLowering::Custom: {
           SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
-          if (Res.getNode())
+          if (Res && Res != SDValue(Node, 0))
             ReplaceNode(SDValue(Node, 0), Res);
           return;
         }
@@ -1004,7 +1007,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
       // Load the remaining ExtraWidth bits.
       IncrementSize = RoundWidth / 8;
       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                         DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                         DAG.getConstant(IncrementSize, dl,
+                                         Ptr.getValueType()));
       Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
                           LD->getPointerInfo().getWithOffset(IncrementSize),
                           ExtraVT, isVolatile, isNonTemporal, isInvariant,
@@ -1017,7 +1021,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
 
       // Move the top bits to the right place.
       Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                       DAG.getConstant(RoundWidth,
+                       DAG.getConstant(RoundWidth, dl,
                                       TLI.getShiftAmountTy(Hi.getValueType())));
 
       // Join the hi and lo parts.
@@ -1033,7 +1037,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
       // Load the remaining ExtraWidth bits.
       IncrementSize = RoundWidth / 8;
       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                         DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                         DAG.getConstant(IncrementSize, dl,
+                                         Ptr.getValueType()));
       Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
                           dl, Node->getValueType(0), Chain, Ptr,
                           LD->getPointerInfo().getWithOffset(IncrementSize),
@@ -1047,7 +1052,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
 
       // Move the top bits to the right place.
       Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                       DAG.getConstant(ExtraWidth,
+                       DAG.getConstant(ExtraWidth, dl,
                                       TLI.getShiftAmountTy(Hi.getValueType())));
 
       // Join the hi and lo parts.
@@ -1240,12 +1245,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     break;
   case ISD::EXTRACT_ELEMENT:
   case ISD::FLT_ROUNDS_:
-  case ISD::SADDO:
-  case ISD::SSUBO:
-  case ISD::UADDO:
-  case ISD::USUBO:
-  case ISD::SMULO:
-  case ISD::UMULO:
   case ISD::FPOWI:
   case ISD::MERGE_VALUES:
   case ISD::EH_RETURN:
@@ -1437,18 +1436,32 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   unsigned EltSize =
       Vec.getValueType().getVectorElementType().getSizeInBits()/8;
   Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
-                    DAG.getConstant(EltSize, Idx.getValueType()));
+                    DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
 
   Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
   StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
 
+  SDValue NewLoad;
+
   if (Op.getValueType().isVector())
-    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
-                       false, false, false, 0);
-  return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
-                        MachinePointerInfo(),
-                        Vec.getValueType().getVectorElementType(),
-                        false, false, false, 0);
+    NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,
+                          MachinePointerInfo(), false, false, false, 0);
+  else
+    NewLoad = DAG.getExtLoad(
+        ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(),
+        Vec.getValueType().getVectorElementType(), false, false, false, 0);
+
+  // Replace the chain going out of the store, by the one out of the load.
+  DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1));
+
+  // We introduced a cycle though, so update the loads operands, making sure
+  // to use the original store's chain as an incoming chain.
+  SmallVector<SDValue, 6> NewLoadOperands(NewLoad->op_begin(),
+                                          NewLoad->op_end());
+  NewLoadOperands[0] = Ch;
+  NewLoad =
+      SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0);
+  return NewLoad;
 }
 
 SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
@@ -1476,7 +1489,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
       Vec.getValueType().getVectorElementType().getSizeInBits()/8;
 
   Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
-                    DAG.getConstant(EltSize, Idx.getValueType()));
+                    DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
   Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
 
   SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
@@ -1513,7 +1526,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
 
     unsigned Offset = TypeByteSize*i;
 
-    SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+    SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType());
     Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
 
     // If the destination vector element type is narrower than the source
@@ -1575,7 +1588,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
       unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
       unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
       LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr,
-                           DAG.getConstant(ByteOffset, LoadPtr.getValueType()));
+                           DAG.getConstant(ByteOffset, dl,
+                                           LoadPtr.getValueType()));
       // Load a legal integer containing the sign bit.
       SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
                             false, false, false, 0);
@@ -1585,13 +1599,14 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
       assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
       if (BitShift)
         SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
-                              DAG.getConstant(BitShift,
+                              DAG.getConstant(BitShift, dl,
                                  TLI.getShiftAmountTy(SignBit.getValueType())));
     }
   }
   // Now get the sign bit proper, by seeing whether the value is negative.
   SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()),
-                         SignBit, DAG.getConstant(0, SignBit.getValueType()),
+                         SignBit,
+                         DAG.getConstant(0, dl, SignBit.getValueType()),
                          ISD::SETLT);
   // Get the absolute value of the result.
   SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
@@ -1616,8 +1631,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
 
   // Chain the dynamic stack allocation so that it doesn't modify the stack
   // pointer when other instructions are using the stack.
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true),
-                               SDLoc(Node));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl);
 
   SDValue Size  = Tmp2.getOperand(1);
   SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
@@ -1628,12 +1642,11 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
   Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size);       // Value
   if (Align > StackAlign)
     Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
-                       DAG.getConstant(-(uint64_t)Align, VT));
+                       DAG.getConstant(-(uint64_t)Align, dl, VT));
   Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1);     // Output chain
 
-  Tmp2 = DAG.getCALLSEQ_END(Chain,  DAG.getIntPtrConstant(0, true),
-                            DAG.getIntPtrConstant(0, true), SDValue(),
-                            SDLoc(Node));
+  Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
+                            DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
 
   Results.push_back(Tmp1);
   Results.push_back(Tmp2);
@@ -2404,7 +2417,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
 
     // word offset constant for Hi/Lo address computation
-    SDValue WordOff = DAG.getConstant(sizeof(int), StackSlot.getValueType());
+    SDValue WordOff = DAG.getConstant(sizeof(int), dl,
+                                      StackSlot.getValueType());
     // set up Hi and Lo (into buffer) address based on endian
     SDValue Hi = StackSlot;
     SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(),
@@ -2416,7 +2430,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     SDValue Op0Mapped;
     if (isSigned) {
       // constant used to invert sign bit (signed to unsigned mapping)
-      SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+      SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32);
       Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
     } else {
       Op0Mapped = Op0;
@@ -2426,7 +2440,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                                   Op0Mapped, Lo, MachinePointerInfo(),
                                   false, false, 0);
     // initial hi portion of constructed double
-    SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+    SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32);
     // store the hi of the constructed double - biased exponent
     SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
                                   MachinePointerInfo(),
@@ -2438,7 +2452,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     SDValue Bias = DAG.getConstantFP(isSigned ?
                                      BitsToDouble(0x4330000080000000ULL) :
                                      BitsToDouble(0x4330000000000000ULL),
-                                     MVT::f64);
+                                     dl, MVT::f64);
     // subtract the bias
     SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
     // final result
@@ -2449,7 +2463,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
       Result = Sub;
     } else if (DestVT.bitsLT(MVT::f64)) {
       Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
-                           DAG.getIntPtrConstant(0));
+                           DAG.getIntPtrConstant(0, dl));
     } else if (DestVT.bitsGT(MVT::f64)) {
       Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
     }
@@ -2465,15 +2479,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   // TODO: Generalize this for use with other types.
   if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
     SDValue TwoP52 =
-      DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64);
+      DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64);
     SDValue TwoP84PlusTwoP52 =
-      DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64);
+      DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), dl,
+                        MVT::f64);
     SDValue TwoP84 =
-      DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64);
+      DAG.getConstant(UINT64_C(0x4530000000000000), dl, MVT::i64);
 
     SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
     SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
-                             DAG.getConstant(32, MVT::i64));
+                             DAG.getConstant(32, dl, MVT::i64));
     SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
     SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
     SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
@@ -2492,9 +2507,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
       SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
 
       SDValue ShiftConst =
-          DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType()));
+          DAG.getConstant(1, dl, TLI.getShiftAmountTy(Op0.getValueType()));
       SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
-      SDValue AndConst = DAG.getConstant(1, MVT::i64);
+      SDValue AndConst = DAG.getConstant(1, dl, MVT::i64);
       SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
       SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
 
@@ -2506,47 +2521,52 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
       // thing most of the time.  This would be a good candidate for a
       //pseudo-op, or, even better, for whole-function isel.
       SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
-        Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
+        Op0, DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
       return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast);
     }
 
     // Otherwise, implement the fully general conversion.
 
     SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
-         DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
+         DAG.getConstant(UINT64_C(0xfffffffffffff800), dl, MVT::i64));
     SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
-         DAG.getConstant(UINT64_C(0x800), MVT::i64));
+         DAG.getConstant(UINT64_C(0x800), dl, MVT::i64));
     SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
-         DAG.getConstant(UINT64_C(0x7ff), MVT::i64));
-    SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
-                   And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE);
+         DAG.getConstant(UINT64_C(0x7ff), dl, MVT::i64));
+    SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2,
+                              DAG.getConstant(UINT64_C(0), dl, MVT::i64),
+                              ISD::SETNE);
     SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0);
-    SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
-                   Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
-                   ISD::SETUGE);
+    SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0,
+                              DAG.getConstant(UINT64_C(0x0020000000000000), dl,
+                                              MVT::i64),
+                              ISD::SETUGE);
     SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0);
     EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
 
     SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
-                             DAG.getConstant(32, SHVT));
+                             DAG.getConstant(32, dl, SHVT));
     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
     SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
     SDValue TwoP32 =
-      DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64);
+      DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), dl,
+                        MVT::f64);
     SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
     SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
     SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
     SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
     return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
-                       DAG.getIntPtrConstant(0));
+                       DAG.getIntPtrConstant(0, dl));
   }
 
   SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
 
   SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()),
-                                 Op0, DAG.getConstant(0, Op0.getValueType()),
+                                 Op0,
+                                 DAG.getConstant(0, dl, Op0.getValueType()),
                                  ISD::SETLT);
-  SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4);
+  SDValue Zero = DAG.getIntPtrConstant(0, dl),
+          Four = DAG.getIntPtrConstant(4, dl);
   SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(),
                                     SignSet, Four, Zero);
 
@@ -2681,34 +2701,41 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
   switch (VT.getSimpleVT().SimpleTy) {
   default: llvm_unreachable("Unhandled Expand type in BSWAP!");
   case MVT::i16:
-    Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
-    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
     return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   case MVT::i32:
-    Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
-    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
-    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
-    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
-    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
-    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+    Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
+                       DAG.getConstant(0xFF0000, dl, VT));
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
     return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
   case MVT::i64:
-    Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT));
-    Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT));
-    Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
-    Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
-    Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
-    Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
-    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT));
-    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT));
-    Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
-    Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
-    Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
-    Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
-    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
-    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+    Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
+    Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
+    Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+    Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+    Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+    Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
+    Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
+                       DAG.getConstant(255ULL<<48, dl, VT));
+    Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
+                       DAG.getConstant(255ULL<<40, dl, VT));
+    Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
+                       DAG.getConstant(255ULL<<32, dl, VT));
+    Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
+                       DAG.getConstant(255ULL<<24, dl, VT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
+                       DAG.getConstant(255ULL<<16, dl, VT));
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
+                       DAG.getConstant(255ULL<<8 , dl, VT));
     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
     Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
@@ -2735,34 +2762,38 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     // This is the "best" algorithm from
     // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
 
-    SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), VT);
-    SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), VT);
-    SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), VT);
-    SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), VT);
+    SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)),
+                                     dl, VT);
+    SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)),
+                                     dl, VT);
+    SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)),
+                                     dl, VT);
+    SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)),
+                                     dl, VT);
 
     // v = v - ((v >> 1) & 0x55555555...)
     Op = DAG.getNode(ISD::SUB, dl, VT, Op,
                      DAG.getNode(ISD::AND, dl, VT,
                                  DAG.getNode(ISD::SRL, dl, VT, Op,
-                                             DAG.getConstant(1, ShVT)),
+                                             DAG.getConstant(1, dl, ShVT)),
                                  Mask55));
     // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
     Op = DAG.getNode(ISD::ADD, dl, VT,
                      DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
                      DAG.getNode(ISD::AND, dl, VT,
                                  DAG.getNode(ISD::SRL, dl, VT, Op,
-                                             DAG.getConstant(2, ShVT)),
+                                             DAG.getConstant(2, dl, ShVT)),
                                  Mask33));
     // v = (v + (v >> 4)) & 0x0F0F0F0F...
     Op = DAG.getNode(ISD::AND, dl, VT,
                      DAG.getNode(ISD::ADD, dl, VT, Op,
                                  DAG.getNode(ISD::SRL, dl, VT, Op,
-                                             DAG.getConstant(4, ShVT))),
+                                             DAG.getConstant(4, dl, ShVT))),
                      Mask0F);
     // v = (v * 0x01010101...) >> (Len - 8)
     Op = DAG.getNode(ISD::SRL, dl, VT,
                      DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
-                     DAG.getConstant(Len - 8, ShVT));
+                     DAG.getConstant(Len - 8, dl, ShVT));
 
     return Op;
   }
@@ -2783,7 +2814,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     EVT ShVT = TLI.getShiftAmountTy(VT);
     unsigned len = VT.getSizeInBits();
     for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
-      SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+      SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
       Op = DAG.getNode(ISD::OR, dl, VT, Op,
                        DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
     }
@@ -2802,12 +2833,12 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
                                DAG.getNOT(dl, Op, VT),
                                DAG.getNode(ISD::SUB, dl, VT, Op,
-                                           DAG.getConstant(1, VT)));
+                                           DAG.getConstant(1, dl, VT)));
     // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
     if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
         TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
       return DAG.getNode(ISD::SUB, dl, VT,
-                         DAG.getConstant(VT.getSizeInBits(), VT),
+                         DAG.getConstant(VT.getSizeInBits(), dl, VT),
                          DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
     return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
   }
@@ -2817,132 +2848,8 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
 std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
   unsigned Opc = Node->getOpcode();
   MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
-  RTLIB::Libcall LC;
-
-  switch (Opc) {
-  default:
-    llvm_unreachable("Unhandled atomic intrinsic Expand!");
-  case ISD::ATOMIC_SWAP:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break;
-    }
-    break;
-  case ISD::ATOMIC_CMP_SWAP:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_ADD:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_SUB:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_AND:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_OR:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_XOR:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_NAND:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_MAX:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_MAX_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_UMAX:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_MIN:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_MIN_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_UMIN:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break;
-    }
-    break;
-  }
+  RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
 
   return ExpandChainLibCall(LC, Node, false);
 }
@@ -2967,10 +2874,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   case ISD::FRAMEADDR:
   case ISD::RETURNADDR:
   case ISD::FRAME_TO_ARGS_OFFSET:
-    Results.push_back(DAG.getConstant(0, Node->getValueType(0)));
+    Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0)));
     break;
   case ISD::FLT_ROUNDS_:
-    Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
+    Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0)));
     break;
   case ISD::EH_RETURN:
   case ISD::EH_LABEL:
@@ -2984,7 +2891,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   case ISD::EH_SJLJ_SETJMP:
     // If the target didn't expand this, just return 'zero' and preserve the
     // chain.
-    Results.push_back(DAG.getConstant(0, MVT::i32));
+    Results.push_back(DAG.getConstant(0, dl, MVT::i32));
     Results.push_back(Node->getOperand(0));
     break;
   case ISD::ATOMIC_FENCE: {
@@ -3005,7 +2912,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   }
   case ISD::ATOMIC_LOAD: {
     // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
-    SDValue Zero = DAG.getConstant(0, Node->getValueType(0));
+    SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0));
     SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
     SDValue Swap = DAG.getAtomicCmpSwap(
         ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
@@ -3081,10 +2988,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   case ISD::UNDEF: {
     EVT VT = Node->getValueType(0);
     if (VT.isInteger())
-      Results.push_back(DAG.getConstant(0, VT));
+      Results.push_back(DAG.getConstant(0, dl, VT));
     else {
       assert(VT.isFloatingPoint() && "Unknown value type!");
-      Results.push_back(DAG.getConstantFP(0, VT));
+      Results.push_back(DAG.getConstantFP(0, dl, VT));
     }
     break;
   }
@@ -3123,7 +3030,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       ShiftAmountTy = VT;
     unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
                         ExtraVT.getScalarType().getSizeInBits();
-    SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy);
+    SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy);
     Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
                        Node->getOperand(0), ShiftCst);
     Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
@@ -3161,7 +3068,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                 APInt::getNullValue(VT.getSizeInBits()));
     APInt x = APInt::getSignBit(NVT.getSizeInBits());
     (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
-    Tmp1 = DAG.getConstantFP(apf, VT);
+    Tmp1 = DAG.getConstantFP(apf, dl, VT);
     Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT),
                         Node->getOperand(0),
                         Tmp1, ISD::SETLT);
@@ -3170,7 +3077,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                         DAG.getNode(ISD::FSUB, dl, VT,
                                     Node->getOperand(0), Tmp1));
     False = DAG.getNode(ISD::XOR, dl, NVT, False,
-                        DAG.getConstant(x, NVT));
+                        DAG.getConstant(x, dl, NVT));
     Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False);
     Results.push_back(Tmp1);
     break;
@@ -3191,11 +3098,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
 
       VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
-                           DAG.getConstant(Align - 1,
+                           DAG.getConstant(Align - 1, dl,
                                            VAList.getValueType()));
 
       VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList,
-                           DAG.getConstant(-(int64_t)Align,
+                           DAG.getConstant(-(int64_t)Align, dl,
                                            VAList.getValueType()));
     }
 
@@ -3203,6 +3110,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
                        DAG.getConstant(TLI.getDataLayout()->
                           getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
+                                       dl,
                                        VAList.getValueType()));
     // Store the incremented VAList to the legalized pointer
     Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
@@ -3317,11 +3225,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       if (Idx < NumElems)
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
                                   Op0,
-                                  DAG.getConstant(Idx, TLI.getVectorIdxTy())));
+                                  DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())));
       else
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
                                   Op1,
-                                  DAG.getConstant(Idx - NumElems,
+                                  DAG.getConstant(Idx - NumElems, dl,
                                                   TLI.getVectorIdxTy())));
     }
 
@@ -3336,7 +3244,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
       // 1 -> Hi
       Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
-                         DAG.getConstant(OpTy.getSizeInBits()/2,
+                         DAG.getConstant(OpTy.getSizeInBits()/2, dl,
                     TLI.getShiftAmountTy(Node->getOperand(0).getValueType())));
       Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
     } else {
@@ -3374,7 +3282,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     break;
   case ISD::FNEG:
     // Expand Y = FNEG(X) ->  Y = SUB -0.0, X
-    Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+    Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
     Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
                        Node->getOperand(0));
     Results.push_back(Tmp1);
@@ -3383,7 +3291,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
     EVT VT = Node->getValueType(0);
     Tmp1 = Node->getOperand(0);
-    Tmp2 = DAG.getConstantFP(0.0, VT);
+    Tmp2 = DAG.getConstantFP(0.0, dl, VT);
     Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()),
                         Tmp1, Tmp2, ISD::SETUGT);
     Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
@@ -3391,6 +3299,26 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(Tmp1);
     break;
   }
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX: {
+    // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
+    ISD::CondCode Pred;
+    switch (Node->getOpcode()) {
+    default: llvm_unreachable("How did we get here?");
+    case ISD::SMAX: Pred = ISD::SETGT; break;
+    case ISD::SMIN: Pred = ISD::SETLT; break;
+    case ISD::UMAX: Pred = ISD::SETUGT; break;
+    case ISD::UMIN: Pred = ISD::SETULT; break;
+    }
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp1, Tmp2, Pred);
+    Results.push_back(Tmp1);
+    break;
+  }
+    
   case ISD::FMINNUM:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
                                       RTLIB::FMIN_F80, RTLIB::FMIN_F128,
@@ -3519,6 +3447,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                       RTLIB::FMA_F80, RTLIB::FMA_F128,
                                       RTLIB::FMA_PPCF128));
     break;
+  case ISD::FMAD:
+    llvm_unreachable("Illegal fmad should never be formed");
+
   case ISD::FADD:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
                                       RTLIB::ADD_F80, RTLIB::ADD_F128,
@@ -3545,6 +3476,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     break;
   }
   case ISD::FP_TO_FP16: {
+    if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
+      SDValue Op = Node->getOperand(0);
+      MVT SVT = Op.getSimpleValueType();
+      if ((SVT == MVT::f64 || SVT == MVT::f80) &&
+          TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) {
+        // Under fastmath, we can expand this node into a fround followed by
+        // a float-half conversion.
+        SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
+                                       DAG.getIntPtrConstant(0, dl));
+        Results.push_back(
+            DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal));
+        break;
+      }
+    }
+
     RTLIB::Libcall LC =
         RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
@@ -3579,8 +3525,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
            TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
            "Don't know how to expand this subtraction!");
     Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
-               DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));
-    Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, VT));
+               DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
+                               VT));
+    Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));
     Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
     break;
   }
@@ -3696,7 +3643,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) {
       Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
       Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi);
-      SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(),
+      SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), dl,
                                       TLI.getShiftAmountTy(HalfType));
       Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
       Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
@@ -3721,7 +3668,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     EVT ResultType = Node->getValueType(1);
     EVT OType = getSetCCResultType(Node->getValueType(0));
 
-    SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+    SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
 
     //   LHSSign -> LHS >= 0
     //   RHSSign -> RHS >= 0
@@ -3787,9 +3734,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
       Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
       BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
-                               DAG.getIntPtrConstant(0));
+                               DAG.getIntPtrConstant(0, dl));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
-                            DAG.getIntPtrConstant(1));
+                            DAG.getIntPtrConstant(1, dl));
     } else {
       // We can fall back to a libcall with an illegal type for the MUL if we
       // have a libcall big enough.
@@ -3810,9 +3757,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       // part.
       unsigned LoSize = VT.getSizeInBits();
       SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
-                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+                                  DAG.getConstant(LoSize - 1, dl,
+                                                  TLI.getPointerTy()));
       SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
-                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+                                  DAG.getConstant(LoSize - 1, dl,
+                                                  TLI.getPointerTy()));
 
       // Here we're passing the 2 arguments explicitly as 4 arguments that are
       // pre-lowered to the correct types. This all depends upon WideVT not
@@ -3821,9 +3770,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
       SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
       BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
-                               DAG.getIntPtrConstant(0));
+                               DAG.getIntPtrConstant(0, dl));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
-                            DAG.getIntPtrConstant(1));
+                            DAG.getIntPtrConstant(1, dl));
       // Ret is a node with an illegal type. Because such things are not
       // generally permitted during this phase of legalization, make sure the
       // node has no more uses. The above EXTRACT_ELEMENT nodes should have been
@@ -3833,14 +3782,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     }
 
     if (isSigned) {
-      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
+      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, dl,
                              TLI.getShiftAmountTy(BottomHalf.getValueType()));
       Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
       TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1,
                              ISD::SETNE);
     } else {
       TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf,
-                             DAG.getConstant(0, VT), ISD::SETNE);
+                             DAG.getConstant(0, dl, VT), ISD::SETNE);
     }
     Results.push_back(BottomHalf);
     Results.push_back(TopHalf);
@@ -3851,7 +3800,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
     Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
-                       DAG.getConstant(PairTy.getSizeInBits()/2,
+                       DAG.getConstant(PairTy.getSizeInBits()/2, dl,
                                        TLI.getShiftAmountTy(PairTy)));
     Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
     break;
@@ -3866,7 +3815,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                              cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
     } else {
       Tmp1 = DAG.getSelectCC(dl, Tmp1,
-                             DAG.getConstant(0, Tmp1.getValueType()),
+                             DAG.getConstant(0, dl, Tmp1.getValueType()),
                              Tmp2, Tmp3, ISD::SETNE);
     }
     Results.push_back(Tmp1);
@@ -3882,8 +3831,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     unsigned EntrySize =
       DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
 
-    Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(),
-                       Index, DAG.getConstant(EntrySize, Index.getValueType()));
+    Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+                        DAG.getConstant(EntrySize, dl, Index.getValueType()));
     SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),
                                Index, Table);
 
@@ -3917,10 +3866,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       // We test only the i1 bit.  Skip the AND if UNDEF.
       Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 :
         DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
-                    DAG.getConstant(1, Tmp2.getValueType()));
+                    DAG.getConstant(1, dl, Tmp2.getValueType()));
       Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
                          DAG.getCondCode(ISD::SETNE), Tmp3,
-                         DAG.getConstant(0, Tmp3.getValueType()),
+                         DAG.getConstant(0, dl, Tmp3.getValueType()),
                          Node->getOperand(2));
     }
     Results.push_back(Tmp1);
@@ -3962,7 +3911,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       break;
     }
     Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
-                       DAG.getConstant(TrueValue, VT), DAG.getConstant(0, VT),
+                       DAG.getConstant(TrueValue, dl, VT),
+                       DAG.getConstant(0, dl, VT),
                        Tmp3);
     Results.push_back(Tmp1);
     break;
@@ -4030,7 +3980,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
                            Tmp1, Tmp2, Tmp3, Tmp4, CC);
       } else {
-        Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+        Tmp2 = DAG.getConstant(0, dl, Tmp1.getValueType());
         CC = DAG.getCondCode(ISD::SETNE);
         Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1,
                            Tmp2, Tmp3, Tmp4, CC);
@@ -4061,7 +4011,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1,
                          Tmp4, Tmp2, Tmp3, Node->getOperand(4));
     } else {
-      Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+      Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType());
       Tmp4 = DAG.getCondCode(ISD::SETNE);
       Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4,
                          Tmp2, Tmp3, Node->getOperand(4));
@@ -4085,12 +4035,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     for (unsigned Idx = 0; Idx < NumElem; Idx++) {
       SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                                VT.getScalarType(),
-                               Node->getOperand(0), DAG.getConstant(Idx,
-                                                    TLI.getVectorIdxTy()));
+                               Node->getOperand(0),
+                               DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
       SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                                VT.getScalarType(),
-                               Node->getOperand(1), DAG.getConstant(Idx,
-                                                    TLI.getVectorIdxTy()));
+                               Node->getOperand(1),
+                               DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
       Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
                                     VT.getScalarType(), Ex, Sh));
     }
@@ -4125,6 +4075,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
       Node->getOpcode() == ISD::SETCC) {
     OVT = Node->getOperand(0).getSimpleValueType();
   }
+  if (Node->getOpcode() == ISD::BR_CC)
+    OVT = Node->getOperand(2).getSimpleValueType();
   MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
   SDLoc dl(Node);
   SDValue Tmp1, Tmp2, Tmp3;
@@ -4142,16 +4094,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     if (Node->getOpcode() == ISD::CTTZ) {
       // FIXME: This should set a bit in the zero extended value instead.
       Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT),
-                          Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
+                          Tmp1, DAG.getConstant(NVT.getSizeInBits(), dl, NVT),
                           ISD::SETEQ);
       Tmp1 = DAG.getSelect(dl, NVT, Tmp2,
-                           DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
+                           DAG.getConstant(OVT.getSizeInBits(), dl, NVT), Tmp1);
     } else if (Node->getOpcode() == ISD::CTLZ ||
                Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
       // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
       Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
                           DAG.getConstant(NVT.getSizeInBits() -
-                                          OVT.getSizeInBits(), NVT));
+                                          OVT.getSizeInBits(), dl, NVT));
     }
     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
     break;
@@ -4160,7 +4112,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
     Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
-                          DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
+                       DAG.getConstant(DiffBits, dl,
+                                       TLI.getShiftAmountTy(NVT)));
     Results.push_back(Tmp1);
     break;
   }
@@ -4250,7 +4203,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
       Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
     else
       Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
-                         DAG.getIntPtrConstant(0));
+                         DAG.getIntPtrConstant(0, dl));
     Results.push_back(Tmp1);
     break;
   }
@@ -4280,27 +4233,74 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
                                   Tmp1, Tmp2, Node->getOperand(2)));
     break;
   }
+  case ISD::BR_CC: {
+    unsigned ExtOp = ISD::FP_EXTEND;
+    if (NVT.isInteger()) {
+      ISD::CondCode CCCode =
+        cast<CondCodeSDNode>(Node->getOperand(1))->get();
+      ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    }
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3));
+    Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0),
+                                  Node->getOperand(0), Node->getOperand(1),
+                                  Tmp1, Tmp2, Node->getOperand(4)));
+    break;
+  }
   case ISD::FADD:
   case ISD::FSUB:
   case ISD::FMUL:
   case ISD::FDIV:
   case ISD::FREM:
+  case ISD::FMINNUM:
+  case ISD::FMAXNUM:
+  case ISD::FCOPYSIGN:
   case ISD::FPOW: {
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
     Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
     Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
-                                  Tmp3, DAG.getIntPtrConstant(0)));
+                                  Tmp3, DAG.getIntPtrConstant(0, dl)));
     break;
   }
-  case ISD::FLOG2:
-  case ISD::FEXP2:
+  case ISD::FMA: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+    Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2));
+    Results.push_back(
+        DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                    DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
+                    DAG.getIntPtrConstant(0, dl)));
+    break;
+  }
+  case ISD::FPOWI: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = Node->getOperand(1);
+    Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+    Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                                  Tmp3, DAG.getIntPtrConstant(0, dl)));
+    break;
+  }
+  case ISD::FFLOOR:
+  case ISD::FCEIL:
+  case ISD::FRINT:
+  case ISD::FNEARBYINT:
+  case ISD::FROUND:
+  case ISD::FTRUNC:
+  case ISD::FNEG:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
   case ISD::FLOG:
-  case ISD::FEXP: {
+  case ISD::FLOG2:
+  case ISD::FLOG10:
+  case ISD::FABS:
+  case ISD::FEXP:
+  case ISD::FEXP2: {
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
     Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
-                                  Tmp2, DAG.getIntPtrConstant(0)));
+                                  Tmp2, DAG.getIntPtrConstant(0, dl)));
     break;
   }
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index b596715..37fdf44 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -131,7 +131,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
-  return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
+  return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N),
                          TLI.getTypeToTransformTo(*DAG.getContext(),
                                                   N->getValueType(0)));
 }
@@ -149,8 +149,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
 
   // Mask = ~(1 << (Size-1))
   APInt API = APInt::getAllOnesValue(Size);
-  API.clearBit(Size-1);
-  SDValue Mask = DAG.getConstant(API, NVT);
+  API.clearBit(Size - 1);
+  SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT);
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask);
 }
@@ -218,8 +218,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
   unsigned RSize = RVT.getSizeInBits();
 
   // First get the sign bit of second operand.
-  SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),
-                                  DAG.getConstant(RSize - 1,
+  SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT),
+                                  DAG.getConstant(RSize - 1, dl,
                                                   TLI.getShiftAmountTy(RVT)));
   SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
 
@@ -227,21 +227,21 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
   int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
   if (SizeDiff > 0) {
     SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
-                          DAG.getConstant(SizeDiff,
+                          DAG.getConstant(SizeDiff, dl,
                                  TLI.getShiftAmountTy(SignBit.getValueType())));
     SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
   } else if (SizeDiff < 0) {
     SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
     SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
-                          DAG.getConstant(-SizeDiff,
+                          DAG.getConstant(-SizeDiff, dl,
                                  TLI.getShiftAmountTy(SignBit.getValueType())));
   }
 
   // Clear the sign bit of the first operand.
-  SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),
-                               DAG.getConstant(LSize - 1,
+  SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT),
+                               DAG.getConstant(LSize - 1, dl,
                                                TLI.getShiftAmountTy(LVT)));
-  Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));
+  Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT));
   LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
 
   // Or the value with the sign bit.
@@ -386,8 +386,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDLoc dl(N);
   // Expand Y = FNEG(X) -> Y = SUB -0.0, X
-  SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
+  SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),
                      GetSoftenedFloat(N->getOperand(0)) };
   return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
                                            RTLIB::SUB_F32,
@@ -395,7 +396,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
                                            RTLIB::SUB_F80,
                                            RTLIB::SUB_F128,
                                            RTLIB::SUB_PPCF128),
-                         NVT, Ops, 2, false, SDLoc(N)).first;
+                         NVT, Ops, 2, false, dl).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
@@ -755,7 +756,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
   // If softenSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (!NewRHS.getNode()) {
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
     CCCode = ISD::SETNE;
   }
 
@@ -794,7 +795,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
   // If softenSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (!NewRHS.getNode()) {
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
     CCCode = ISD::SETNE;
   }
 
@@ -837,7 +838,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
   if (ST->isTruncatingStore())
     // Do an FP_ROUND followed by a non-truncating store.
     Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
-                                          Val, DAG.getIntPtrConstant(0)));
+                                          Val, DAG.getIntPtrConstant(0, dl)));
   else
     Val = GetSoftenedFloat(Val);
 
@@ -927,12 +928,13 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
   assert(NVT.getSizeInBits() == integerPartWidth &&
          "Do not know how to expand this float constant!");
   APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+  SDLoc dl(N);
   Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
                                  APInt(integerPartWidth, C.getRawData()[1])),
-                         NVT);
+                         dl, NVT);
   Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
                                  APInt(integerPartWidth, C.getRawData()[0])),
-                         NVT);
+                         dl, NVT);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
@@ -1136,9 +1138,10 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
 void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
                                                 SDValue &Hi) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  Hi = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), NVT, N->getOperand(0));
+  SDLoc dl(N);
+  Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0));
   Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
-                                 APInt(NVT.getSizeInBits(), 0)), NVT);
+                                 APInt(NVT.getSizeInBits(), 0)), dl, NVT);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
@@ -1262,7 +1265,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
 
   // The low part is zero.
   Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
-                                 APInt(NVT.getSizeInBits(), 0)), NVT);
+                                 APInt(NVT.getSizeInBits(), 0)), dl, NVT);
 
   // Modified the chain - switch anything that used the old chain to use the
   // new one.
@@ -1287,7 +1290,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
     Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
                       MVT::i32, Src);
     Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
-                                   APInt(NVT.getSizeInBits(), 0)), NVT);
+                                   APInt(NVT.getSizeInBits(), 0)), dl, NVT);
     Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
   } else {
     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -1335,8 +1338,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
   Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
                    DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
                                              APInt(128, Parts)),
-                                     MVT::ppcf128));
-  Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, SrcVT),
+                                     dl, MVT::ppcf128));
+  Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT),
                        Lo, Hi, ISD::SETLT);
   GetPairElements(Lo, Lo, Hi);
 }
@@ -1436,7 +1439,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
   // If ExpandSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (!NewRHS.getNode()) {
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
     CCCode = ISD::SETNE;
   }
 
@@ -1479,7 +1482,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
     SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
                               N->getOperand(0), DAG.getValueType(MVT::f64));
     Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
-                      DAG.getIntPtrConstant(1));
+                      DAG.getIntPtrConstant(1, dl));
     return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
   }
 
@@ -1499,7 +1502,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
            "Logic only correct for ppcf128!");
     const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
     APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31));
-    SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
+    SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
     //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
     // FIXME: generated code sucks.
     return DAG.getSelectCC(dl, N->getOperand(0), Tmp,
@@ -1509,7 +1512,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
                                                                MVT::ppcf128,
                                                                N->getOperand(0),
                                                                Tmp)),
-                                       DAG.getConstant(0x80000000, MVT::i32)),
+                                       DAG.getConstant(0x80000000, dl,
+                                                       MVT::i32)),
                            DAG.getNode(ISD::FP_TO_SINT, dl,
                                        MVT::i32, N->getOperand(0)),
                            ISD::SETGE);
@@ -1529,7 +1533,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
   // If ExpandSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (!NewRHS.getNode()) {
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
     CCCode = ISD::SETNE;
   }
 
@@ -1579,3 +1583,420 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
   return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr,
                            ST->getMemoryVT(), ST->getMemOperand());
 }
+
+//===----------------------------------------------------------------------===//
+//  Float Operand Promotion
+//===----------------------------------------------------------------------===//
+//
+
+static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f16) {
+      return ISD::FP16_TO_FP;
+  } else if (RetVT == MVT::f16) {
+      return ISD::FP_TO_FP16;
+  }
+
+  report_fatal_error("Attempt at an invalid promotion-related conversion");
+}
+
+bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
+  SDValue R = SDValue();
+
+  // Nodes that use a promotion-requiring floating point operand, but doesn't
+  // produce a promotion-requiring floating point result, need to be legalized
+  // to use the promoted float operand.  Nodes that produce at least one
+  // promotion-requiring floating point result have their operands legalized as
+  // a part of PromoteFloatResult.
+  switch (N->getOpcode()) {
+    default:
+      llvm_unreachable("Do not know how to promote this operator's operand!");
+
+    case ISD::BITCAST:    R = PromoteFloatOp_BITCAST(N, OpNo); break;
+    case ISD::FCOPYSIGN:  R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break;
+    case ISD::FP_TO_SINT:
+    case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break;
+    case ISD::FP_EXTEND:  R = PromoteFloatOp_FP_EXTEND(N, OpNo); break;
+    case ISD::SELECT_CC:  R = PromoteFloatOp_SELECT_CC(N, OpNo); break;
+    case ISD::SETCC:      R = PromoteFloatOp_SETCC(N, OpNo); break;
+    case ISD::STORE:      R = PromoteFloatOp_STORE(N, OpNo); break;
+  }
+
+  if (R.getNode())
+    ReplaceValueWith(SDValue(N, 0), R);
+  return false;
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) {
+  SDValue Op = N->getOperand(0);
+  EVT OpVT = Op->getValueType(0);
+
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+  assert (IVT == N->getValueType(0) && "Bitcast to type of different size");
+
+  SDValue Promoted = GetPromotedFloat(N->getOperand(0));
+  EVT PromotedVT = Promoted->getValueType(0);
+
+  // Convert the promoted float value to the desired IVT.
+  return DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), IVT,
+                     Promoted);
+}
+
+// Promote Operand 1 of FCOPYSIGN.  Operand 0 ought to be handled by
+// PromoteFloatRes_FCOPYSIGN.
+SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) {
+  assert (OpNo == 1 && "Only Operand 1 must need promotion here");
+  SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+
+  return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+                     N->getOperand(0), Op1);
+}
+
+// Convert the promoted float value to the desired integer type
+SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) {
+  SDValue Op = GetPromotedFloat(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) {
+  SDValue Op = GetPromotedFloat(N->getOperand(0));
+  EVT VT = N->getValueType(0);
+
+  // Desired VT is same as promoted type.  Use promoted float directly.
+  if (VT == Op->getValueType(0))
+    return Op;
+
+  // Else, extend the promoted float value to the desired VT.
+  return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op);
+}
+
+// Promote the float operands used for comparison.  The true- and false-
+// operands have the same type as the result and are promoted, if needed, by
+// PromoteFloatRes_SELECT_CC
+SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+  SDValue LHS = GetPromotedFloat(N->getOperand(0));
+  SDValue RHS = GetPromotedFloat(N->getOperand(1));
+
+  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
+                     LHS, RHS, N->getOperand(2), N->getOperand(3),
+                     N->getOperand(4));
+}
+
+// Construct a SETCC that compares the promoted values and sets the conditional
+// code.
+SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+  SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+  return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode);
+
+}
+
+// Lower the promoted Float down to the integer value of same size and construct
+// a STORE of the integer value.
+SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) {
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+  SDValue Val = ST->getValue();
+  SDLoc DL(N);
+
+  SDValue Promoted = GetPromotedFloat(Val);
+  EVT VT = ST->getOperand(1)->getValueType(0);
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+
+  SDValue NewVal;
+  NewVal = DAG.getNode(GetPromotionOpcode(Promoted.getValueType(), VT), DL,
+                       IVT, Promoted);
+
+  return DAG.getStore(ST->getChain(), DL, NewVal, ST->getBasePtr(),
+                      ST->getMemOperand());
+}
+
+//===----------------------------------------------------------------------===//
+//  Float Result Promotion
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
+  SDValue R = SDValue();
+
+  switch (N->getOpcode()) {
+    // These opcodes cannot appear if promotion of FP16 is done in the backend
+    // instead of Clang
+    case ISD::FP16_TO_FP:
+    case ISD::FP_TO_FP16:
+    default:
+      llvm_unreachable("Do not know how to promote this operator's result!");
+
+    case ISD::BITCAST:    R = PromoteFloatRes_BITCAST(N); break;
+    case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break;
+    case ISD::EXTRACT_VECTOR_ELT:
+                          R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break;
+    case ISD::FCOPYSIGN:  R = PromoteFloatRes_FCOPYSIGN(N); break;
+
+    // Unary FP Operations
+    case ISD::FABS:
+    case ISD::FCEIL:
+    case ISD::FCOS:
+    case ISD::FEXP:
+    case ISD::FEXP2:
+    case ISD::FFLOOR:
+    case ISD::FLOG:
+    case ISD::FLOG2:
+    case ISD::FLOG10:
+    case ISD::FNEARBYINT:
+    case ISD::FNEG:
+    case ISD::FRINT:
+    case ISD::FROUND:
+    case ISD::FSIN:
+    case ISD::FSQRT:
+    case ISD::FTRUNC:     R = PromoteFloatRes_UnaryOp(N); break;
+
+    // Binary FP Operations
+    case ISD::FADD:
+    case ISD::FDIV:
+    case ISD::FMAXNUM:
+    case ISD::FMINNUM:
+    case ISD::FMUL:
+    case ISD::FPOW:
+    case ISD::FREM:
+    case ISD::FSUB:       R = PromoteFloatRes_BinOp(N); break;
+
+    case ISD::FMA:        // FMA is same as FMAD
+    case ISD::FMAD:       R = PromoteFloatRes_FMAD(N); break;
+
+    case ISD::FPOWI:      R = PromoteFloatRes_FPOWI(N); break;
+
+    case ISD::FP_ROUND:   R = PromoteFloatRes_FP_ROUND(N); break;
+    case ISD::LOAD:       R = PromoteFloatRes_LOAD(N); break;
+    case ISD::SELECT:     R = PromoteFloatRes_SELECT(N); break;
+    case ISD::SELECT_CC:  R = PromoteFloatRes_SELECT_CC(N); break;
+
+    case ISD::SINT_TO_FP:
+    case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
+    case ISD::UNDEF:      R = PromoteFloatRes_UNDEF(N); break;
+
+  }
+
+  if (R.getNode())
+    SetPromotedFloat(SDValue(N, ResNo), R);
+}
+
+// Bitcast from i16 to f16:  convert the i16 to a f32 value instead.
+// At this point, it is not possible to determine if the bitcast value is
+// eventually stored to memory or promoted to f32 or promoted to a floating
+// point at a higher precision.  Some of these cases are handled by FP_EXTEND,
+// STORE promotion handlers.
+SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT,
+                     N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) {
+  ConstantFPSDNode *CFPNode = cast<ConstantFPSDNode>(N);
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+
+  // Get the (bit-cast) APInt of the APFloat and build an integer constant
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+  SDValue C = DAG.getConstant(CFPNode->getValueAPF().bitcastToAPInt(), DL,
+                              IVT);
+
+  // Convert the Constant to the desired FP type
+  // FIXME We might be able to do the conversion during compilation and get rid
+  // of it from the object code
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, C);
+}
+
+// If the Index operand is a constant, try to redirect the extract operation to
+// the correct legalized vector.  If not, bit-convert the input vector to
+// equivalent integer vector.  Extract the element as an (bit-cast) integer
+// value and convert it to the promoted type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDLoc DL(N);
+
+  // If the index is constant, try to extract the value from the legalized
+  // vector type.
+  if (isa<ConstantSDNode>(N->getOperand(1))) {
+    SDValue Vec = N->getOperand(0);
+    SDValue Idx = N->getOperand(1);
+    EVT VecVT = Vec->getValueType(0);
+    EVT EltVT = VecVT.getVectorElementType();
+
+    uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+    switch (getTypeAction(VecVT)) {
+    default: break;
+    case TargetLowering::TypeScalarizeVector: {
+      SDValue Res = GetScalarizedVector(N->getOperand(0));
+      ReplaceValueWith(SDValue(N, 0), Res);
+      return SDValue();
+    }
+    case TargetLowering::TypeWidenVector: {
+      Vec = GetWidenedVector(Vec);
+      SDValue Res = DAG.getNode(N->getOpcode(), DL, EltVT, Vec, Idx);
+      ReplaceValueWith(SDValue(N, 0), Res);
+      return SDValue();
+    }
+    case TargetLowering::TypeSplitVector: {
+      SDValue Lo, Hi;
+      GetSplitVector(Vec, Lo, Hi);
+
+      uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+      SDValue Res;
+      if (IdxVal < LoElts)
+        Res = DAG.getNode(N->getOpcode(), DL, EltVT, Lo, Idx);
+      else
+        Res = DAG.getNode(N->getOpcode(), DL, EltVT, Hi,
+                          DAG.getConstant(IdxVal - LoElts, DL,
+                                          Idx.getValueType()));
+      ReplaceValueWith(SDValue(N, 0), Res);
+      return SDValue();
+    }
+
+    }
+  }
+
+  // Bit-convert the input vector to the equivalent integer vector
+  SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+  EVT IVT = NewOp.getValueType().getVectorElementType();
+
+  // Extract the element as an (bit-cast) integer value
+  SDValue NewVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IVT,
+                               NewOp, N->getOperand(1));
+
+  // Convert the element to the desired FP type
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, NewVal);
+}
+
+// FCOPYSIGN(X, Y) returns the value of X with the sign of Y.  If the result
+// needs promotion, so does the argument X.  Note that Y, if needed, will be
+// handled during operand promotion.
+SDValue DAGTypeLegalizer::PromoteFloatRes_FCOPYSIGN(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+
+  SDValue Op1 = N->getOperand(1);
+
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+}
+
+// Unary operation where the result and the operand have PromoteFloat type
+// action.  Construct a new SDNode with the promoted float value of the old
+// operand.
+SDValue DAGTypeLegalizer::PromoteFloatRes_UnaryOp(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Op = GetPromotedFloat(N->getOperand(0));
+
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op);
+}
+
+// Binary operations where the result and both operands have PromoteFloat type
+// action.  Construct a new SDNode with the promoted float values of the old
+// operands.
+SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+  SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+  SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+  SDValue Op2 = GetPromotedFloat(N->getOperand(2));
+
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, Op2);
+}
+
+// Promote the Float (first) operand and retain the Integer (second) operand
+SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+  SDValue Op1 = N->getOperand(1);
+
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+}
+
+// Explicit operation to reduce precision.  Reduce the value to half precision
+// and promote it back to the legal type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) {
+  SDLoc DL(N);
+
+  SDValue Op = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  EVT OpVT = Op->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+
+  // Round promoted float to desired precision
+  SDValue Round = DAG.getNode(GetPromotionOpcode(OpVT, VT), DL, IVT, Op);
+  // Promote it back to the legal output type
+  return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
+  LoadSDNode *L = cast<LoadSDNode>(N);
+  EVT VT = N->getValueType(0);
+
+  // Load the value as an integer value with the same number of bits
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+  SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
+                   IVT, SDLoc(N), L->getChain(), L->getBasePtr(),
+                   L->getOffset(), L->getPointerInfo(), IVT, L->isVolatile(),
+                   L->isNonTemporal(), false, L->getAlignment(),
+                   L->getAAInfo());
+  // Legalize the chain result by replacing uses of the old value chain with the
+  // new one
+  ReplaceValueWith(SDValue(N, 1), newL.getValue(1));
+
+  // Convert the integer value to the desired FP type
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, newL);
+}
+
+// Construct a new SELECT node with the promoted true- and false- values.
+SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) {
+  SDValue TrueVal = GetPromotedFloat(N->getOperand(1));
+  SDValue FalseVal = GetPromotedFloat(N->getOperand(2));
+
+  return DAG.getNode(ISD::SELECT, SDLoc(N), TrueVal->getValueType(0),
+                     N->getOperand(0), TrueVal, FalseVal);
+}
+
+// Construct a new SELECT_CC node with the promoted true- and false- values.
+// The operands used for comparison are promoted by PromoteFloatOp_SELECT_CC.
+SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) {
+  SDValue TrueVal = GetPromotedFloat(N->getOperand(2));
+  SDValue FalseVal = GetPromotedFloat(N->getOperand(3));
+
+  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
+                     N->getOperand(0), N->getOperand(1), TrueVal, FalseVal,
+                     N->getOperand(4));
+}
+
+// Construct a SDNode that transforms the SINT or UINT operand to the promoted
+// float type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+                                               N->getValueType(0)));
+}
+
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a4e44cc..eeaebf78 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -71,6 +71,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::VSELECT:     Res = PromoteIntRes_VSELECT(N); break;
   case ISD::SELECT_CC:   Res = PromoteIntRes_SELECT_CC(N); break;
   case ISD::SETCC:       Res = PromoteIntRes_SETCC(N); break;
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX:        Res = PromoteIntRes_SimpleIntBinOp(N); break;
   case ISD::SHL:         Res = PromoteIntRes_SHL(N); break;
   case ISD::SIGN_EXTEND_INREG:
                          Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
@@ -251,6 +255,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
   case TargetLowering::TypeSoftenFloat:
     // Promote the integer operand by hand.
     return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+  case TargetLowering::TypePromoteFloat: {
+    // Convert the promoted float by hand.
+    if (NOutVT.bitsEq(NInVT)) {
+      SDValue PromotedOp = GetPromotedFloat(InOp);
+      SDValue Trunc = DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp);
+      return DAG.getNode(ISD::AssertZext, dl, NOutVT, Trunc,
+                         DAG.getValueType(OutVT));
+    }
+    break;
+  }
   case TargetLowering::TypeExpandInteger:
   case TargetLowering::TypeExpandFloat:
     break;
@@ -297,7 +311,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
 
   unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
   return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
-                     DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
+                     DAG.getConstant(DiffBits, dl, TLI.getShiftAmountTy(NVT)));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
@@ -345,7 +359,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
   // Subtract off the extra leading bits in the bigger type.
   return DAG.getNode(
       ISD::SUB, dl, NVT, Op,
-      DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(),
+      DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl,
                       NVT));
 }
 
@@ -366,7 +380,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
     // the top of the original type.
     auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(),
                                       OVT.getScalarSizeInBits());
-    Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+    Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
   }
   return DAG.getNode(N->getOpcode(), dl, NVT, Op);
 }
@@ -723,9 +737,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
   if (N->getOpcode() == ISD::UMULO) {
     // Unsigned overflow occurred if the high part is non-zero.
     SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
-                             DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
+                             DAG.getIntPtrConstant(SmallVT.getSizeInBits(),
+                                                   DL));
     Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
-                            DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
+                            DAG.getConstant(0, DL, Hi.getValueType()),
+                            ISD::SETNE);
   } else {
     // Signed overflow occurred if the high part does not sign extend the low.
     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
@@ -784,7 +800,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
     SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
     // Shift it to the right position and "or" it in.
     Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
-                       DAG.getConstant(i * RegVT.getSizeInBits(),
+                       DAG.getConstant(i*RegVT.getSizeInBits(), dl,
                                        TLI.getPointerTy()));
     Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
   }
@@ -852,6 +868,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::FP16_TO_FP:
   case ISD::UINT_TO_FP:   Res = PromoteIntOp_UINT_TO_FP(N); break;
   case ISD::ZERO_EXTEND:  Res = PromoteIntOp_ZERO_EXTEND(N); break;
+  case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
 
   case ISD::SHL:
   case ISD::SRA:
@@ -977,7 +994,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
   SDLoc dl(N);
 
   Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
-                   DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy()));
+                   DAG.getConstant(OVT.getSizeInBits(), dl,
+                                   TLI.getPointerTy()));
   return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
 }
 
@@ -1116,7 +1134,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
 
 SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
 
-  assert(OpNo == 2 && "Only know how to promote the mask!");
   SDValue DataOp = N->getValue();
   EVT DataVT = DataOp.getValueType();
   SDValue Mask = N->getMask();
@@ -1127,7 +1144,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN
   if (!TLI.isTypeLegal(DataVT)) {
     if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
       DataOp = GetPromotedInteger(DataOp);
-      Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
+      if (!TLI.isTypeLegal(MaskVT))
+        Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
       TruncateStore = true;
     }
     else {
@@ -1147,7 +1165,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN
 
         unsigned NumConcat = WidenNumElts / MaskNumElts;
         SmallVector<SDValue, 16> Ops(NumConcat);
-        SDValue ZeroVal = DAG.getConstant(0, MaskVT);
+        SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
         Ops[0] = Mask;
         for (unsigned i = 1; i != NumConcat; ++i)
           Ops[i] = ZeroVal;
@@ -1323,92 +1341,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
 std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
   unsigned Opc = Node->getOpcode();
   MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
-  RTLIB::Libcall LC;
-
-  switch (Opc) {
-  default:
-    llvm_unreachable("Unhandled atomic intrinsic Expand!");
-  case ISD::ATOMIC_SWAP:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break;
-    }
-    break;
-  case ISD::ATOMIC_CMP_SWAP:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_ADD:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_SUB:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_AND:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_OR:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_XOR:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_NAND:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break;
-    }
-    break;
-  }
+  RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
 
   return ExpandChainLibCall(LC, Node, false);
 }
@@ -1417,12 +1351,19 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
 /// and the shift amount is a constant 'Amt'.  Expand the operation.
 void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
                                              SDValue &Lo, SDValue &Hi) {
-  assert(Amt && "Expected zero shifts to be already optimized away.");
   SDLoc DL(N);
   // Expand the incoming operand to be shifted, so that we have its parts
   SDValue InL, InH;
   GetExpandedInteger(N->getOperand(0), InL, InH);
 
+  // Though Amt shouldn't usually be 0, it's possible. E.g. when legalization
+  // splitted a vector shift, like this: <op1, op2> SHL <0, 2>.
+  if (!Amt) {
+    Lo = InL;
+    Hi = InH;
+    return;
+  }
+
   EVT NVT = InL.getValueType();
   unsigned VTBits = N->getValueType(0).getSizeInBits();
   unsigned NVTBits = NVT.getSizeInBits();
@@ -1430,13 +1371,13 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
 
   if (N->getOpcode() == ISD::SHL) {
     if (Amt > VTBits) {
-      Lo = Hi = DAG.getConstant(0, NVT);
+      Lo = Hi = DAG.getConstant(0, DL, NVT);
     } else if (Amt > NVTBits) {
-      Lo = DAG.getConstant(0, NVT);
+      Lo = DAG.getConstant(0, DL, NVT);
       Hi = DAG.getNode(ISD::SHL, DL,
-                       NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));
+                       NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy));
     } else if (Amt == NVTBits) {
-      Lo = DAG.getConstant(0, NVT);
+      Lo = DAG.getConstant(0, DL, NVT);
       Hi = InL;
     } else if (Amt == 1 &&
                TLI.isOperationLegalOrCustom(ISD::ADDC,
@@ -1448,34 +1389,34 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
       SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
       Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps);
     } else {
-      Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
+      Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy));
       Hi = DAG.getNode(ISD::OR, DL, NVT,
                        DAG.getNode(ISD::SHL, DL, NVT, InH,
-                                   DAG.getConstant(Amt, ShTy)),
+                                   DAG.getConstant(Amt, DL, ShTy)),
                        DAG.getNode(ISD::SRL, DL, NVT, InL,
-                                   DAG.getConstant(NVTBits-Amt, ShTy)));
+                                   DAG.getConstant(NVTBits - Amt, DL, ShTy)));
     }
     return;
   }
 
   if (N->getOpcode() == ISD::SRL) {
     if (Amt > VTBits) {
-      Lo = DAG.getConstant(0, NVT);
-      Hi = DAG.getConstant(0, NVT);
+      Lo = DAG.getConstant(0, DL, NVT);
+      Hi = DAG.getConstant(0, DL, NVT);
     } else if (Amt > NVTBits) {
       Lo = DAG.getNode(ISD::SRL, DL,
-                       NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
-      Hi = DAG.getConstant(0, NVT);
+                       NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy));
+      Hi = DAG.getConstant(0, DL, NVT);
     } else if (Amt == NVTBits) {
       Lo = InH;
-      Hi = DAG.getConstant(0, NVT);
+      Hi = DAG.getConstant(0, DL, NVT);
     } else {
       Lo = DAG.getNode(ISD::OR, DL, NVT,
                        DAG.getNode(ISD::SRL, DL, NVT, InL,
-                                   DAG.getConstant(Amt, ShTy)),
+                                   DAG.getConstant(Amt, DL, ShTy)),
                        DAG.getNode(ISD::SHL, DL, NVT, InH,
-                                   DAG.getConstant(NVTBits-Amt, ShTy)));
-      Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+                                   DAG.getConstant(NVTBits - Amt, DL, ShTy)));
+      Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
     }
     return;
   }
@@ -1483,23 +1424,23 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
   assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
   if (Amt > VTBits) {
     Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
-                          DAG.getConstant(NVTBits-1, ShTy));
+                          DAG.getConstant(NVTBits - 1, DL, ShTy));
   } else if (Amt > NVTBits) {
     Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
-                     DAG.getConstant(Amt-NVTBits, ShTy));
+                     DAG.getConstant(Amt-NVTBits, DL, ShTy));
     Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
-                     DAG.getConstant(NVTBits-1, ShTy));
+                     DAG.getConstant(NVTBits - 1, DL, ShTy));
   } else if (Amt == NVTBits) {
     Lo = InH;
     Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
-                     DAG.getConstant(NVTBits-1, ShTy));
+                     DAG.getConstant(NVTBits - 1, DL, ShTy));
   } else {
     Lo = DAG.getNode(ISD::OR, DL, NVT,
                      DAG.getNode(ISD::SRL, DL, NVT, InL,
-                                 DAG.getConstant(Amt, ShTy)),
+                                 DAG.getConstant(Amt, DL, ShTy)),
                      DAG.getNode(ISD::SHL, DL, NVT, InH,
-                                 DAG.getConstant(NVTBits-Amt, ShTy)));
-    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+                                 DAG.getConstant(NVTBits - Amt, DL, ShTy)));
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
   }
 }
 
@@ -1535,21 +1476,21 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   if (KnownOne.intersects(HighBitMask)) {
     // Mask out the high bit, which we know is set.
     Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
-                      DAG.getConstant(~HighBitMask, ShTy));
+                      DAG.getConstant(~HighBitMask, dl, ShTy));
 
     switch (N->getOpcode()) {
     default: llvm_unreachable("Unknown shift");
     case ISD::SHL:
-      Lo = DAG.getConstant(0, NVT);              // Low part is zero.
+      Lo = DAG.getConstant(0, dl, NVT);              // Low part is zero.
       Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
       return true;
     case ISD::SRL:
-      Hi = DAG.getConstant(0, NVT);              // Hi part is zero.
+      Hi = DAG.getConstant(0, dl, NVT);              // Hi part is zero.
       Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
       return true;
     case ISD::SRA:
       Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,       // Sign extend high part.
-                       DAG.getConstant(NVTBits-1, ShTy));
+                       DAG.getConstant(NVTBits - 1, dl, ShTy));
       Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
       return true;
     }
@@ -1562,7 +1503,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     // shift if x is zero.  We can use XOR here because x is known to be smaller
     // than 32.
     SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
-                               DAG.getConstant(NVTBits-1, ShTy));
+                               DAG.getConstant(NVTBits - 1, dl, ShTy));
 
     unsigned Op1, Op2;
     switch (N->getOpcode()) {
@@ -1578,7 +1519,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
     // Use a little trick to get the bits that move from Lo to Hi. First
     // shift by one bit.
-    SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy));
+    SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, dl, ShTy));
     // Then compute the remaining shift with amount-1.
     SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
 
@@ -1609,11 +1550,14 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDValue InL, InH;
   GetExpandedInteger(N->getOperand(0), InL, InH);
 
-  SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);
+  SDValue NVBitsNode = DAG.getConstant(NVTBits, dl, ShTy);
   SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
   SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
   SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy),
                                  Amt, NVBitsNode, ISD::SETULT);
+  SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(ShTy),
+                                Amt, DAG.getConstant(0, dl, ShTy),
+                                ISD::SETEQ);
 
   SDValue LoS, HiS, LoL, HiL;
   switch (N->getOpcode()) {
@@ -1623,16 +1567,15 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
     HiS = DAG.getNode(ISD::OR, dl, NVT,
                       DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
-    // FIXME: If Amt is zero, the following shift generates an undefined result
-    // on some architectures.
                       DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
 
     // Long: ShAmt >= NVTBits
-    LoL = DAG.getConstant(0, NVT);                        // Lo part is zero.
+    LoL = DAG.getConstant(0, dl, NVT);                    // Lo part is zero.
     HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
 
     Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
-    Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
+    Hi = DAG.getSelect(dl, NVT, isZero, InH,
+                       DAG.getSelect(dl, NVT, isShort, HiS, HiL));
     return true;
   case ISD::SRL:
     // Short: ShAmt < NVTBits
@@ -1644,10 +1587,11 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
                       DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
 
     // Long: ShAmt >= NVTBits
-    HiL = DAG.getConstant(0, NVT);                        // Hi part is zero.
+    HiL = DAG.getConstant(0, dl, NVT);                    // Hi part is zero.
     LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
 
-    Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
+    Lo = DAG.getSelect(dl, NVT, isZero, InL,
+                       DAG.getSelect(dl, NVT, isShort, LoS, LoL));
     Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
     return true;
   case ISD::SRA:
@@ -1655,16 +1599,15 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
     LoS = DAG.getNode(ISD::OR, dl, NVT,
                       DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
-    // FIXME: If Amt is zero, the following shift generates an undefined result
-    // on some architectures.
                       DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
 
     // Long: ShAmt >= NVTBits
     HiL = DAG.getNode(ISD::SRA, dl, NVT, InH,             // Sign of Hi part.
-                      DAG.getConstant(NVTBits-1, ShTy));
+                      DAG.getConstant(NVTBits - 1, dl, ShTy));
     LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
 
-    Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
+    Lo = DAG.getSelect(dl, NVT, isZero, InL,
+                       DAG.getSelect(dl, NVT, isShort, LoS, LoL));
     Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
     return true;
   }
@@ -1706,18 +1649,50 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
     return;
   }
 
+  bool hasOVF =
+    TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+                                   ISD::UADDO : ISD::USUBO,
+                                 TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+  if (hasOVF) {
+    SDVTList VTList = DAG.getVTList(NVT, NVT);
+    TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
+    int RevOpc;
+    if (N->getOpcode() == ISD::ADD) {
+      RevOpc = ISD::SUB;
+      Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
+    } else {
+      RevOpc = ISD::ADD;
+      Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
+      Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
+    }
+    SDValue OVF = Lo.getValue(1);
+
+    switch (BoolType) {
+    case TargetLoweringBase::UndefinedBooleanContent:
+      OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF);
+      // Fallthrough
+    case TargetLoweringBase::ZeroOrOneBooleanContent:
+      Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
+      break;
+    case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+      Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF);
+    }
+    return;
+  }
+
   if (N->getOpcode() == ISD::ADD) {
     Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps);
     Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
     SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
                                 ISD::SETULT);
     SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
-                                   DAG.getConstant(1, NVT),
-                                   DAG.getConstant(0, NVT));
+                                   DAG.getConstant(1, dl, NVT),
+                                   DAG.getConstant(0, dl, NVT));
     SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1],
                                 ISD::SETULT);
     SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2,
-                                   DAG.getConstant(1, NVT), Carry1);
+                                   DAG.getConstant(1, dl, NVT), Carry1);
     Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
   } else {
     Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps);
@@ -1726,8 +1701,8 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
       DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
                    LoOps[0], LoOps[1], ISD::SETULT);
     SDValue Borrow = DAG.getSelect(dl, NVT, Cmp,
-                                   DAG.getConstant(1, NVT),
-                                   DAG.getConstant(0, NVT));
+                                   DAG.getConstant(1, dl, NVT),
+                                   DAG.getConstant(0, dl, NVT));
     Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
   }
 }
@@ -1824,7 +1799,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
     Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part replicates the sign bit of Lo, make it explicit.
     Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
-                     DAG.getConstant(NVTBits-1, TLI.getPointerTy()));
+                     DAG.getConstant(NVTBits - 1, dl, TLI.getPointerTy()));
   }
 }
 
@@ -1844,7 +1819,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
   } else {
     Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part must be zero, make it explicit.
-    Hi = DAG.getConstant(0, NVT);
+    Hi = DAG.getConstant(0, dl, NVT);
   }
 }
 
@@ -1864,8 +1839,9 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
   const APInt &Cst = Constant->getAPIntValue();
   bool IsTarget = Constant->isTargetOpcode();
   bool IsOpaque = Constant->isOpaque();
-  Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT, IsTarget, IsOpaque);
-  Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT, IsTarget,
+  SDLoc dl(N);
+  Lo = DAG.getConstant(Cst.trunc(NBitWidth), dl, NVT, IsTarget, IsOpaque);
+  Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), dl, NVT, IsTarget,
                        IsOpaque);
 }
 
@@ -1877,15 +1853,16 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
   EVT NVT = Lo.getValueType();
 
   SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi,
-                                   DAG.getConstant(0, NVT), ISD::SETNE);
+                                   DAG.getConstant(0, dl, NVT), ISD::SETNE);
 
   SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
   SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
 
   Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ,
                      DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
-                                 DAG.getConstant(NVT.getSizeInBits(), NVT)));
-  Hi = DAG.getConstant(0, NVT);
+                                 DAG.getConstant(NVT.getSizeInBits(), dl,
+                                                 NVT)));
+  Hi = DAG.getConstant(0, dl, NVT);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
@@ -1896,7 +1873,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
   EVT NVT = Lo.getValueType();
   Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
                    DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
-  Hi = DAG.getConstant(0, NVT);
+  Hi = DAG.getConstant(0, dl, NVT);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
@@ -1907,22 +1884,27 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
   EVT NVT = Lo.getValueType();
 
   SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo,
-                                   DAG.getConstant(0, NVT), ISD::SETNE);
+                                   DAG.getConstant(0, dl, NVT), ISD::SETNE);
 
   SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
   SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
 
   Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ,
                      DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
-                                 DAG.getConstant(NVT.getSizeInBits(), NVT)));
-  Hi = DAG.getConstant(0, NVT);
+                                 DAG.getConstant(NVT.getSizeInBits(), dl,
+                                                 NVT)));
+  Hi = DAG.getConstant(0, dl, NVT);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
                                                SDValue &Hi) {
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
+
   SDValue Op = N->getOperand(0);
+  if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+    Op = GetPromotedFloat(Op);
+
   RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
   SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/,
@@ -1934,7 +1916,11 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
                                                SDValue &Hi) {
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
+
   SDValue Op = N->getOperand(0);
+  if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+    Op = GetPromotedFloat(Op);
+
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
   SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/,
@@ -1980,10 +1966,10 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
       // lo part.
       unsigned LoSize = Lo.getValueType().getSizeInBits();
       Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
-                       DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+                       DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy()));
     } else if (ExtType == ISD::ZEXTLOAD) {
       // The high part is just a zero.
-      Hi = DAG.getConstant(0, NVT);
+      Hi = DAG.getConstant(0, dl, NVT);
     } else {
       assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
       // The high part is undefined.
@@ -2002,7 +1988,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     // Increment the pointer to the other half.
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
     Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
                         N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
                         isVolatile, isNonTemporal, isInvariant,
@@ -2029,7 +2015,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
 
     // Increment the pointer to the other half.
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
     // Load the rest of the low bits.
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
                         N->getPointerInfo().getWithOffset(IncrementSize),
@@ -2046,12 +2032,12 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
       // Transfer low bits from the bottom of Hi to the top of Lo.
       Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
                        DAG.getNode(ISD::SHL, dl, NVT, Hi,
-                                   DAG.getConstant(ExcessBits,
+                                   DAG.getConstant(ExcessBits, dl,
                                                    TLI.getPointerTy())));
       // Move high bits to the right position in Hi.
       Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,
                        NVT, Hi,
-                       DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+                       DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
                                        TLI.getPointerTy()));
     }
   }
@@ -2127,7 +2113,7 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
   //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
   //
   EVT OType = Node->getValueType(1);
-  SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+  SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
 
   SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
   SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
@@ -2148,6 +2134,13 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+  if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
+    SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+    SplitInteger(Res.getValue(0), Lo, Hi);
+    return;
+  }
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i16)
@@ -2160,7 +2153,6 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
     LC = RTLIB::SDIV_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
 
-  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
   SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
 }
 
@@ -2276,7 +2268,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
     // The high part is obtained by SRA'ing all but one of the bits of low part.
     unsigned LoSize = NVT.getSizeInBits();
     Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
-                     DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+                     DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy()));
   } else {
     // For example, extension of an i48 to an i64.  The operand type necessarily
     // promotes to the result type, so will end up being expanded too.
@@ -2310,7 +2302,7 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
     // The high part gets the sign extension from the lo-part.  This handles
     // things like sextinreg V:i64 from i8.
     Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
-                     DAG.getConstant(Hi.getValueType().getSizeInBits()-1,
+                     DAG.getConstant(Hi.getValueType().getSizeInBits() - 1, dl,
                                      TLI.getPointerTy()));
   } else {
     // For example, extension of an i48 to an i64.  Leave the low part alone,
@@ -2327,6 +2319,13 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+  if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
+    SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+    SplitInteger(Res.getValue(1), Lo, Hi);
+    return;
+  }
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i16)
@@ -2339,7 +2338,6 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
     LC = RTLIB::SREM_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
 
-  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
   SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
 }
 
@@ -2350,7 +2348,8 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
   Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
   Hi = DAG.getNode(ISD::SRL, dl,
                    N->getOperand(0).getValueType(), N->getOperand(0),
-                   DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy()));
+                   DAG.getConstant(NVT.getSizeInBits(), dl,
+                                   TLI.getPointerTy()));
   Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
 }
 
@@ -2392,14 +2391,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
     // A divide for UMULO will be faster than a function call. Select to
     // make sure we aren't using 0.
     SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT),
-                                  RHS, DAG.getConstant(0, VT), ISD::SETEQ);
+                                  RHS, DAG.getConstant(0, dl, VT), ISD::SETEQ);
     SDValue NotZero = DAG.getSelect(dl, VT, isZero,
-                                    DAG.getConstant(1, VT), RHS);
+                                    DAG.getConstant(1, dl, VT), RHS);
     SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero);
     SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS,
                                     ISD::SETNE);
     Overflow = DAG.getSelect(dl, N->getValueType(1), isZero,
-                             DAG.getConstant(0, N->getValueType(1)),
+                             DAG.getConstant(0, dl, N->getValueType(1)),
                              Overflow);
     ReplaceValueWith(SDValue(N, 1), Overflow);
     return;
@@ -2422,7 +2421,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
   SDValue Temp = DAG.CreateStackTemporary(PtrVT);
   // Temporary for the overflow value, default it to zero.
   SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
-                               DAG.getConstant(0, PtrVT), Temp,
+                               DAG.getConstant(0, dl, PtrVT), Temp,
                                MachinePointerInfo(), false, false, 0);
 
   TargetLowering::ArgListTy Args;
@@ -2457,7 +2456,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
   SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
                               MachinePointerInfo(), false, false, false, 0);
   SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
-                             DAG.getConstant(0, PtrVT),
+                             DAG.getConstant(0, dl, PtrVT),
                              ISD::SETNE);
   // Use the overflow from the libcall everywhere.
   ReplaceValueWith(SDValue(N, 1), Ofl);
@@ -2467,6 +2466,13 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+  if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
+    SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+    SplitInteger(Res.getValue(0), Lo, Hi);
+    return;
+  }
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i16)
@@ -2479,7 +2485,6 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
     LC = RTLIB::UDIV_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
 
-  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
   SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
 }
 
@@ -2487,6 +2492,13 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+  if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
+    SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+    SplitInteger(Res.getValue(1), Lo, Hi);
+    return;
+  }
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i16)
@@ -2499,7 +2511,6 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
     LC = RTLIB::UREM_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
 
-  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
   SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
 }
 
@@ -2511,7 +2522,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
   if (Op.getValueType().bitsLE(NVT)) {
     // The low part is zero extension of the input (degenerates to a copy).
     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
-    Hi = DAG.getConstant(0, NVT);   // The high part is just a zero.
+    Hi = DAG.getConstant(0, dl, NVT);   // The high part is just a zero.
   } else {
     // For example, extension of an i48 to an i64.  The operand type necessarily
     // promotes to the result type, so will end up being expanded too.
@@ -2536,7 +2547,7 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
   SDLoc dl(N);
   EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
   SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
-  SDValue Zero = DAG.getConstant(0, VT);
+  SDValue Zero = DAG.getConstant(0, dl, VT);
   SDValue Swap = DAG.getAtomicCmpSwap(
       ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
       cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0),
@@ -2637,7 +2648,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
     NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
     NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
     NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, dl, NewLHS.getValueType());
     return;
   }
 
@@ -2726,7 +2737,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
   // If ExpandSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (!NewRHS.getNode()) {
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
     CCCode = ISD::SETNE;
   }
 
@@ -2744,7 +2755,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
   // If ExpandSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (!NewRHS.getNode()) {
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
     CCCode = ISD::SETNE;
   }
 
@@ -2839,7 +2850,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
     // Increment the pointer to the other half.
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
     Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
                            N->getPointerInfo().getWithOffset(IncrementSize),
                            NEVT, isVolatile, isNonTemporal,
@@ -2861,11 +2872,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
   if (ExcessBits < NVT.getSizeInBits()) {
     // Transfer high bits from the top of Lo to the bottom of Hi.
     Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
-                     DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+                     DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
                                      TLI.getPointerTy()));
     Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
                      DAG.getNode(ISD::SRL, dl, NVT, Lo,
-                                 DAG.getConstant(ExcessBits,
+                                 DAG.getConstant(ExcessBits, dl,
                                                  TLI.getPointerTy())));
   }
 
@@ -2875,7 +2886,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
   // Increment the pointer to the other half.
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
   // Store the lowest ExcessBits bits in the second half.
   Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
                          N->getPointerInfo().getWithOffset(IncrementSize),
@@ -2931,7 +2942,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
     GetExpandedInteger(Op, Lo, Hi);
     SDValue SignSet = DAG.getSetCC(dl,
                                    getSetCCResultType(Hi.getValueType()),
-                                   Hi, DAG.getConstant(0, Hi.getValueType()),
+                                   Hi,
+                                   DAG.getConstant(0, dl, Hi.getValueType()),
                                    ISD::SETLT);
 
     // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
@@ -2940,8 +2952,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
                                            TLI.getPointerTy());
 
     // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
-    SDValue Zero = DAG.getIntPtrConstant(0);
-    SDValue Four = DAG.getIntPtrConstant(4);
+    SDValue Zero = DAG.getIntPtrConstant(0, dl);
+    SDValue Four = DAG.getIntPtrConstant(4, dl);
     if (TLI.isBigEndian()) std::swap(Zero, Four);
     SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet,
                                    Zero, Four);
@@ -2999,7 +3011,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
 
     // Extract the element from the original vector.
     SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
-      BaseIdx, DAG.getConstant(i, BaseIdx.getValueType()));
+      BaseIdx, DAG.getConstant(i, dl, BaseIdx.getValueType()));
     SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
       InVT.getVectorElementType(), N->getOperand(0), Index);
 
@@ -3017,17 +3029,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
 
-  unsigned NumElts = VT.getVectorNumElements();
-  SmallVector<int, 8> NewMask;
-  for (unsigned i = 0; i != NumElts; ++i) {
-    NewMask.push_back(SV->getMaskElt(i));
-  }
+  ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements());
 
   SDValue V0 = GetPromotedInteger(N->getOperand(0));
   SDValue V1 = GetPromotedInteger(N->getOperand(1));
   EVT OutVT = V0.getValueType();
 
-  return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]);
+  return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask);
 }
 
 
@@ -3097,7 +3105,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
     SDValue Op = N->getOperand(i);
     for (unsigned j = 0; j < NumElem; ++j) {
       SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                                InElemTy, Op, DAG.getConstant(j,
+                                InElemTy, Op, DAG.getConstant(j, dl,
                                               TLI.getVectorIdxTy()));
       Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
     }
@@ -3135,6 +3143,16 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
 }
 
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
+  SDLoc dl(N);
+  SDValue V0 = GetPromotedInteger(N->getOperand(0));
+  MVT InVT = V0.getValueType().getSimpleVT();
+  MVT OutVT = MVT::getVectorVT(InVT.getVectorElementType(),
+                               N->getValueType(0).getVectorNumElements());
+  SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, V0, N->getOperand(1));
+  return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
   SDLoc dl(N);
   unsigned NumElems = N->getNumOperands();
@@ -3153,7 +3171,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
     for (unsigned i=0; i<NumElem; ++i) {
       // Extract element from incoming vector
       SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy,
-      Incoming, DAG.getConstant(i, TLI.getVectorIdxTy()));
+      Incoming, DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
       SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
       NewOps.push_back(Tr);
     }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index ebf6b28..9c29769 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -259,6 +259,10 @@ bool DAGTypeLegalizer::run() {
         WidenVectorResult(N, i);
         Changed = true;
         goto NodeDone;
+      case TargetLowering::TypePromoteFloat:
+        PromoteFloatResult(N, i);
+        Changed = true;
+        goto NodeDone;
       }
     }
 
@@ -308,6 +312,10 @@ ScanOperands:
         NeedsReanalyzing = WidenVectorOperand(N, i);
         Changed = true;
         break;
+      case TargetLowering::TypePromoteFloat:
+        NeedsReanalyzing = PromoteFloatOperand(N, i);
+        Changed = true;
+        break;
       }
       break;
     }
@@ -753,6 +761,17 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
   OpEntry = Result;
 }
 
+void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for promoted float");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = PromotedFloats[Op];
+  assert(!OpEntry.getNode() && "Node is already promoted!");
+  OpEntry = Result;
+}
+
 void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
   // Note that in some cases vector operation operands may be greater than
   // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
@@ -978,9 +997,9 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair,
   SDLoc dl(Pair);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
   Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
-                   DAG.getIntPtrConstant(0));
+                   DAG.getIntPtrConstant(0, dl));
   Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
-                   DAG.getIntPtrConstant(1));
+                   DAG.getIntPtrConstant(1, dl));
 }
 
 SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
@@ -993,7 +1012,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
   unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
 
   Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
-                      DAG.getConstant(EltSize, Index.getValueType()));
+                      DAG.getConstant(EltSize, dl, Index.getValueType()));
   return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
 }
 
@@ -1010,7 +1029,8 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
   Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
   Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
   Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
-                   DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy()));
+                   DAG.getConstant(LVT.getSizeInBits(), dlHi,
+                                   TLI.getPointerTy()));
   return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
 }
 
@@ -1096,7 +1116,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
          Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
   Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
   Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
-                   DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy()));
+                   DAG.getConstant(LoVT.getSizeInBits(), dl,
+                                   TLI.getPointerTy()));
   Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
 }
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index cef3fc9..2f27789 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -93,6 +93,11 @@ private:
   /// the same size, this map indicates the converted value to use.
   SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats;
 
+  /// PromotedFloats - For floating point nodes that have a smaller precision
+  /// than the smallest supported precision, this map indicates what promoted
+  /// value to use.
+  SmallDenseMap<SDValue, SDValue, 8> PromotedFloats;
+
   /// ExpandedFloats - For float nodes that need to be expanded this map
   /// indicates which operands are the expanded version of the input.
   SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats;
@@ -273,6 +278,7 @@ private:
   SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
   SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
   SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
   SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
@@ -499,6 +505,44 @@ private:
   void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
                                 ISD::CondCode &CCCode, SDLoc dl);
 
+
+  //===--------------------------------------------------------------------===//
+  // Float promotion support: LegalizeFloatTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  SDValue GetPromotedFloat(SDValue Op) {
+    SDValue &PromotedOp = PromotedFloats[Op];
+    RemapValue(PromotedOp);
+    assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+    return PromotedOp;
+  }
+  void SetPromotedFloat(SDValue Op, SDValue Result);
+
+  void PromoteFloatResult(SDNode *N, unsigned ResNo);
+  SDValue PromoteFloatRes_BITCAST(SDNode *N);
+  SDValue PromoteFloatRes_BinOp(SDNode *N);
+  SDValue PromoteFloatRes_ConstantFP(SDNode *N);
+  SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N);
+  SDValue PromoteFloatRes_FMAD(SDNode *N);
+  SDValue PromoteFloatRes_FPOWI(SDNode *N);
+  SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
+  SDValue PromoteFloatRes_LOAD(SDNode *N);
+  SDValue PromoteFloatRes_SELECT(SDNode *N);
+  SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
+  SDValue PromoteFloatRes_UnaryOp(SDNode *N);
+  SDValue PromoteFloatRes_UNDEF(SDNode *N);
+  SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
+
+  bool PromoteFloatOperand(SDNode *N, unsigned ResNo);
+  SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo);
+
   //===--------------------------------------------------------------------===//
   // Scalarization Support: LegalizeVectorTypes.cpp
   //===--------------------------------------------------------------------===//
@@ -582,6 +626,7 @@ private:
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -593,14 +638,16 @@ private:
   bool SplitVectorOperand(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_UnaryOp(SDNode *N);
+  SDValue SplitVecOp_TruncateHelper(SDNode *N);
 
   SDValue SplitVecOp_BITCAST(SDNode *N);
   SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
-  SDValue SplitVecOp_TRUNCATE(SDNode *N);
   SDValue SplitVecOp_VSETCC(SDNode *N);
   SDValue SplitVecOp_FP_ROUND(SDNode *N);
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 38829b6..330c31c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -50,6 +50,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
     case TargetLowering::TypeLegal:
     case TargetLowering::TypePromoteInteger:
       break;
+    case TargetLowering::TypePromoteFloat:
+      llvm_unreachable("Bitcast of a promotion-needing float should never need"
+                       "expansion");
     case TargetLowering::TypeSoftenFloat:
       // Convert the integer operand instead.
       SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
@@ -117,7 +120,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
       SmallVector<SDValue, 8> Vals;
       for (unsigned i = 0; i < NumElems; ++i)
         Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT,
-                                   CastInOp, DAG.getConstant(i,
+                                   CastInOp, DAG.getConstant(i, dl,
                                              TLI.getVectorIdxTy())));
 
       // Build Lo, Hi pair by pairing extracted elements if needed.
@@ -170,7 +173,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
   // Increment the pointer to the other half.
   unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
   StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                         DAG.getConstant(IncrementSize,
+                         DAG.getConstant(IncrementSize, dl,
                                          StackPtr.getValueType()));
 
   // Load the second half from the stack slot.
@@ -235,7 +238,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
 
   Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
-                    DAG.getConstant(1, Idx.getValueType()));
+                    DAG.getConstant(1, dl, Idx.getValueType()));
   Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
 
   if (TLI.isBigEndian())
@@ -267,7 +270,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
   // Increment the pointer to the other half.
   unsigned IncrementSize = NVT.getSizeInBits() / 8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
   Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
                    LD->getPointerInfo().getWithOffset(IncrementSize),
                    isVolatile, isNonTemporal, isInvariant,
@@ -436,7 +439,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
   NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
   Idx = DAG.getNode(ISD::ADD, dl,
                     Idx.getValueType(), Idx,
-                    DAG.getConstant(1, Idx.getValueType()));
+                    DAG.getConstant(1, dl, Idx.getValueType()));
   NewVec =  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
 
   // Convert the new vector to the old vector type.
@@ -485,7 +488,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
                     isVolatile, isNonTemporal, Alignment, AAInfo);
 
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
   Hi = DAG.getStore(Chain, dl, Hi, Ptr,
                     St->getPointerInfo().getWithOffset(IncrementSize),
                     isVolatile, isNonTemporal,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3a8c276..c06227b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -196,6 +196,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
 
   SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0);
 
+  bool HasVectorValue = false;
   if (Op.getOpcode() == ISD::LOAD) {
     LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
     ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -207,6 +208,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
         return TranslateLegalizeResults(Op, Result);
       case TargetLowering::Custom:
         if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
+          if (Lowered == Result)
+            return TranslateLegalizeResults(Op, Lowered);
           Changed = true;
           if (Lowered->getNumValues() != Op->getNumValues()) {
             // This expanded to something other than the load. Assume the
@@ -232,16 +235,18 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
       default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Legal:
         return TranslateLegalizeResults(Op, Result);
-      case TargetLowering::Custom:
-        Changed = true;
-        return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG));
+      case TargetLowering::Custom: {
+        SDValue Lowered = TLI.LowerOperation(Result, DAG);
+        Changed = Lowered != Result;
+        return TranslateLegalizeResults(Op, Lowered);
+      }
       case TargetLowering::Expand:
         Changed = true;
         return LegalizeOp(ExpandStore(Op));
       }
-  }
+  } else if (Op.getOpcode() == ISD::MSCATTER)
+    HasVectorValue = true;
 
-  bool HasVectorValue = false;
   for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
        J != E;
        ++J)
@@ -317,6 +322,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::ANY_EXTEND_VECTOR_INREG:
   case ISD::SIGN_EXTEND_VECTOR_INREG:
   case ISD::ZERO_EXTEND_VECTOR_INREG:
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX:
     QueryType = Node->getValueType(0);
     break;
   case ISD::FP_ROUND_INREG:
@@ -326,6 +335,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::UINT_TO_FP:
     QueryType = Node->getOperand(0).getValueType();
     break;
+  case ISD::MSCATTER:
+    QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType();
+    break;
   }
 
   switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
@@ -375,8 +387,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
 
   // There are currently two cases of vector promotion:
   // 1) Bitcasting a vector of integers to a different type to a vector of the
-  //    same overall length. For example, x86 promotes ISD::AND on v2i32 to v1i64.
-  // 2) Extending a vector of floats to a vector of the same number oflarger
+  //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
+  // 2) Extending a vector of floats to a vector of the same number of larger
   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
   MVT VT = Op.getSimpleValueType();
   assert(Op.getNode()->getNumValues() == 1 &&
@@ -403,7 +415,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
   if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
       (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
        NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
-    return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0));
+    return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
   else
     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
 }
@@ -512,7 +524,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
         ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
                                  LD->getPointerInfo().getWithOffset(Offset),
                                  LD->isVolatile(), LD->isNonTemporal(),
-                                 LD->isInvariant(), LD->getAlignment(),
+                                 LD->isInvariant(),
+                                 MinAlign(LD->getAlignment(), Offset),
                                  LD->getAAInfo());
       } else {
         EVT LoadVT = WideVT;
@@ -524,13 +537,15 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
                                     LD->getPointerInfo().getWithOffset(Offset),
                                     LoadVT, LD->isVolatile(),
                                     LD->isNonTemporal(), LD->isInvariant(),
-                                    LD->getAlignment(), LD->getAAInfo());
+                                    MinAlign(LD->getAlignment(), Offset),
+                                    LD->getAAInfo());
       }
 
       RemainingBytes -= LoadBytes;
       Offset += LoadBytes;
       BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
-                            DAG.getConstant(LoadBytes, BasePTR.getValueType()));
+                            DAG.getConstant(LoadBytes, dl,
+                                            BasePTR.getValueType()));
 
       LoadVals.push_back(ScalarLoad.getValue(0));
       LoadChains.push_back(ScalarLoad.getValue(1));
@@ -538,7 +553,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
 
     // Extract bits, pack and extend/trunc them into destination type.
     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
-    SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT);
+    SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
 
     unsigned BitOffset = 0;
     unsigned WideIdx = 0;
@@ -548,7 +563,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
       SDValue Lo, Hi, ShAmt;
 
       if (BitOffset < WideBits) {
-        ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT));
+        ShAmt = DAG.getConstant(BitOffset, dl, TLI.getShiftAmountTy(WideVT));
         Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
         Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
       }
@@ -558,7 +573,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
         WideIdx++;
         BitOffset -= WideBits;
         if (BitOffset > 0) {
-          ShAmt = DAG.getConstant(SrcEltBits - BitOffset,
+          ShAmt = DAG.getConstant(SrcEltBits - BitOffset, dl,
                                   TLI.getShiftAmountTy(WideVT));
           Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
           Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
@@ -577,7 +592,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
         Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
         break;
       case ISD::SEXTLOAD:
-        ShAmt = DAG.getConstant(WideBits - SrcEltBits,
+        ShAmt = DAG.getConstant(WideBits - SrcEltBits, dl,
                                 TLI.getShiftAmountTy(WideVT));
         Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
         Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
@@ -595,10 +610,10 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
                 Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
                 SrcVT.getScalarType(),
                 LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(),
-                LD->getAlignment(), LD->getAAInfo());
+                MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo());
 
       BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
-                         DAG.getConstant(Stride, BasePTR.getValueType()));
+                         DAG.getConstant(Stride, dl, BasePTR.getValueType()));
 
       Vals.push_back(ScalarLoad.getValue(0));
       LoadChains.push_back(ScalarLoad.getValue(1));
@@ -649,15 +664,16 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
   SmallVector<SDValue, 8> Stores;
   for (unsigned Idx = 0; Idx < NumElem; Idx++) {
     SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-               RegSclVT, Value, DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+               RegSclVT, Value, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
 
     // This scalar TruncStore may be illegal, but we legalize it later.
     SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
                ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
-               isVolatile, isNonTemporal, Alignment, AAInfo);
+               isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride),
+               AAInfo);
 
     BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
-                               DAG.getConstant(Stride, BasePTR.getValueType()));
+                          DAG.getConstant(Stride, dl, BasePTR.getValueType()));
 
     Stores.push_back(Store);
   }
@@ -727,8 +743,9 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
   EVT BitTy = MaskTy.getScalarType();
 
   Mask = DAG.getSelect(DL, BitTy, Mask,
-          DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy),
-          DAG.getConstant(0, BitTy));
+          DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
+                          BitTy),
+          DAG.getConstant(0, DL, BitTy));
 
   // Broadcast the mask so that the entire vector is all-one or all zero.
   SmallVector<SDValue, 8> Ops(NumElem, Mask);
@@ -741,7 +758,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
   Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
 
   SDValue AllOnes = DAG.getConstant(
-            APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy);
+            APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
   SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
 
   Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
@@ -763,7 +780,7 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
 
   unsigned BW = VT.getScalarType().getSizeInBits();
   unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
-  SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT);
+  SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
 
   Op = Op.getOperand(0);
   Op =   DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
@@ -810,7 +827,7 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
   // without full scalarization than the sign extension does.
   unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
   unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits();
-  SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT);
+  SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
   return DAG.getNode(ISD::SRA, DL, VT,
                      DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
                      ShiftAmount);
@@ -829,7 +846,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
 
   // Build up a zero vector to blend into this one.
   EVT SrcScalarVT = SrcVT.getScalarType();
-  SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT);
+  SDValue ScalarZero = DAG.getTargetConstant(0, DL, SrcScalarVT);
   SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero);
   SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands);
 
@@ -910,7 +927,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
 
   SDValue AllOnes = DAG.getConstant(
-    APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT);
+    APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT);
   SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
 
   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
@@ -933,16 +950,16 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
       "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
 
   unsigned BW = SVT.getSizeInBits();
-  SDValue HalfWord = DAG.getConstant(BW/2, VT);
+  SDValue HalfWord = DAG.getConstant(BW/2, DL, VT);
 
   // Constants to clear the upper part of the word.
   // Notice that we can also use SHL+SHR, but using a constant is slightly
   // faster on x86.
   uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
-  SDValue HalfWordMask = DAG.getConstant(HWMask, VT);
+  SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
 
   // Two to the power of half-word-size.
-  SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType());
+  SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType());
 
   // Clear upper part of LO, lower HI
   SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
@@ -961,8 +978,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
 
 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
   if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
-    SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
-    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+    SDLoc DL(Op);
+    SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
+    return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
                        Zero, Op.getOperand(0));
   }
   return DAG.UnrollVectorOp(Op.getNode());
@@ -978,16 +996,16 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
   SmallVector<SDValue, 8> Ops(NumElems);
   for (unsigned i = 0; i < NumElems; ++i) {
     SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
-                                  DAG.getConstant(i, TLI.getVectorIdxTy()));
+                                  DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
     SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
-                                  DAG.getConstant(i, TLI.getVectorIdxTy()));
+                                  DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
     Ops[i] = DAG.getNode(ISD::SETCC, dl,
                          TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT),
                          LHSElem, RHSElem, CC);
     Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
                            DAG.getConstant(APInt::getAllOnesValue
-                                           (EltVT.getSizeInBits()), EltVT),
-                           DAG.getConstant(0, EltVT));
+                                           (EltVT.getSizeInBits()), dl, EltVT),
+                           DAG.getConstant(0, dl, EltVT));
   }
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
 }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 63671f7..445e882 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -252,7 +252,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
   } else {
     EVT VT = OpVT.getVectorElementType();
     Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
-                      DAG.getConstant(0, TLI.getVectorIdxTy()));
+                      DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
   }
   return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
 }
@@ -308,7 +308,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
                VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
         // Vector read from all ones, scalar expects a single 1 so mask.
         Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT,
-                           Cond, DAG.getConstant(1, CondVT));
+                           Cond, DAG.getConstant(1, SDLoc(N), CondVT));
         break;
       case TargetLowering::ZeroOrNegativeOneBooleanContent:
         assert(VecBool == TargetLowering::UndefinedBooleanContent ||
@@ -385,9 +385,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
   } else {
     EVT VT = OpVT.getVectorElementType();
     LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
-                      DAG.getConstant(0, TLI.getVectorIdxTy()));
+                      DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
     RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
-                      DAG.getConstant(0, TLI.getVectorIdxTy()));
+                      DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
   }
 
   // Turn it into a scalar SETCC.
@@ -600,6 +600,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::MLOAD:
     SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
     break;
+  case ISD::MGATHER:
+    SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi);
+    break;
   case ISD::SETCC:
     SplitVecRes_SETCC(N, Lo, Hi);
     break;
@@ -668,6 +671,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::UREM:
   case ISD::SREM:
   case ISD::FREM:
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX:
     SplitVecRes_BinOp(N, Lo, Hi);
     break;
   case ISD::FMA:
@@ -723,6 +730,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
   switch (getTypeAction(InVT)) {
   case TargetLowering::TypeLegal:
   case TargetLowering::TypePromoteInteger:
+  case TargetLowering::TypePromoteFloat:
   case TargetLowering::TypeSoftenFloat:
   case TargetLowering::TypeScalarizeVector:
   case TargetLowering::TypeWidenVector:
@@ -810,7 +818,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
   Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
   uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
   Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
-                   DAG.getConstant(IdxVal + LoVT.getVectorNumElements(),
+                   DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl,
                                    TLI.getVectorIdxTy()));
 }
 
@@ -844,7 +852,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
   unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
   StackPtr =
       DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                  DAG.getConstant(IncrementSize, StackPtr.getValueType()));
+                  DAG.getConstant(IncrementSize, dl, StackPtr.getValueType()));
 
   // Load the Hi part from the stack slot.
   Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
@@ -891,7 +899,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
                        Lo.getValueType(), Lo, Elt, Idx);
     else
       Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
-                       DAG.getConstant(IdxVal - LoNumElts,
+                       DAG.getConstant(IdxVal - LoNumElts, dl,
                                        TLI.getVectorIdxTy()));
     return;
   }
@@ -923,7 +931,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   // Increment the pointer to the other part.
   unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
   StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                       DAG.getConstant(IncrementSize, StackPtr.getValueType()));
+                         DAG.getConstant(IncrementSize, dl,
+                                         StackPtr.getValueType()));
 
   // Load the Hi part from the stack slot.
   Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
@@ -966,7 +975,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
   Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
                    LD->getPointerInfo().getWithOffset(IncrementSize),
                    HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment,
@@ -1021,7 +1030,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
 
   MMO = DAG.getMachineFunction().
     getMachineMemOperand(MLD->getPointerInfo(), 
@@ -1043,6 +1052,54 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
 
 }
 
+void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT LoVT, HiVT;
+  SDLoc dl(MGT);
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
+
+  SDValue Ch = MGT->getChain();
+  SDValue Ptr = MGT->getBasePtr();
+  SDValue Mask = MGT->getMask();
+  unsigned Alignment = MGT->getOriginalAlignment();
+
+  SDValue MaskLo, MaskHi;
+  std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+
+  EVT MemoryVT = MGT->getMemoryVT();
+  EVT LoMemVT, HiMemVT;
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+  SDValue Src0Lo, Src0Hi;
+  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
+
+  SDValue IndexHi, IndexLo;
+  std::tie(IndexLo, IndexHi) = DAG.SplitVector(MGT->getIndex(), dl);
+
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MGT->getPointerInfo(), 
+                         MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
+                         Alignment, MGT->getAAInfo(), MGT->getRanges());
+
+  SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
+  Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
+                           MMO);
+
+  SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
+  Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
+                           MMO);
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                   Hi.getValue(1));
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(MGT, 1), Ch);
+}
+
+
 void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
   assert(N->getValueType(0).isVector() &&
          N->getOperand(0).getValueType().isVector() &&
@@ -1236,8 +1293,9 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
 
         // Extract the vector element by hand.
         SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
-                                    Inputs[Input], DAG.getConstant(Idx,
-                                                   TLI.getVectorIdxTy())));
+                                    Inputs[Input],
+                                    DAG.getConstant(Idx, dl,
+                                                    TLI.getVectorIdxTy())));
       }
 
       // Construct the Lo/Hi output using a BUILD_VECTOR.
@@ -1293,7 +1351,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
     case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
     case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break;
-    case ISD::TRUNCATE:          Res = SplitVecOp_TRUNCATE(N); break;
+    case ISD::TRUNCATE:
+      Res = SplitVecOp_TruncateHelper(N);
+      break;
     case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
     case ISD::STORE:
       Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
@@ -1301,21 +1361,37 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::MSTORE:
       Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
       break;
+    case ISD::MSCATTER:
+      Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
+      break;
+    case ISD::MGATHER:
+      Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
+      break;
     case ISD::VSELECT:
       Res = SplitVecOp_VSELECT(N, OpNo);
       break;
-    case ISD::CTTZ:
-    case ISD::CTLZ:
-    case ISD::CTPOP:
-    case ISD::FP_EXTEND:
     case ISD::FP_TO_SINT:
     case ISD::FP_TO_UINT:
+      if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+        Res = SplitVecOp_TruncateHelper(N);
+      else
+        Res = SplitVecOp_UnaryOp(N);
+      break;
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP:
-    case ISD::FTRUNC:
+      if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+        Res = SplitVecOp_TruncateHelper(N);
+      else
+        Res = SplitVecOp_UnaryOp(N);
+      break;
+    case ISD::CTTZ:
+    case ISD::CTLZ:
+    case ISD::CTPOP:
+    case ISD::FP_EXTEND:
     case ISD::SIGN_EXTEND:
     case ISD::ZERO_EXTEND:
     case ISD::ANY_EXTEND:
+    case ISD::FTRUNC:
       Res = SplitVecOp_UnaryOp(N);
       break;
     }
@@ -1420,7 +1496,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
   } else {
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
-                       DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));
+                       DAG.getConstant(IdxVal - LoElts, dl,
+                                       Idx.getValueType()));
   }
 }
 
@@ -1441,7 +1518,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
     if (IdxVal < LoElts)
       return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
     return SDValue(DAG.UpdateNodeOperands(N, Hi,
-                                  DAG.getConstant(IdxVal - LoElts,
+                                  DAG.getConstant(IdxVal - LoElts, SDLoc(N),
                                                   Idx.getValueType())), 0);
   }
 
@@ -1462,6 +1539,68 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
                         MachinePointerInfo(), EltVT, false, false, false, 0);
 }
 
+SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
+                                             unsigned OpNo) {
+  EVT LoVT, HiVT;
+  SDLoc dl(MGT);
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
+
+  SDValue Ch = MGT->getChain();
+  SDValue Ptr = MGT->getBasePtr();
+  SDValue Index = MGT->getIndex();
+  SDValue Mask = MGT->getMask();
+  unsigned Alignment = MGT->getOriginalAlignment();
+
+  SDValue MaskLo, MaskHi;
+  std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+
+  EVT MemoryVT = MGT->getMemoryVT();
+  EVT LoMemVT, HiMemVT;
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+  SDValue Src0Lo, Src0Hi;
+  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
+
+  SDValue IndexHi, IndexLo;
+  if (Index.getNode())
+    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+  else
+    IndexLo = IndexHi = Index;
+
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MGT->getPointerInfo(), 
+                         MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
+                         Alignment, MGT->getAAInfo(), MGT->getRanges());
+
+  SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
+  SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
+                                   OpsLo, MMO);
+
+  MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MGT->getPointerInfo(), 
+                         MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
+                         Alignment, MGT->getAAInfo(),
+                         MGT->getRanges());
+
+  SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
+  SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
+                                   OpsHi, MMO);
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                   Hi.getValue(1));
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(MGT, 1), Ch);
+
+  SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo,
+                            Hi);
+  ReplaceValueWith(SDValue(MGT, 0), Res);
+  return SDValue();
+}
+
 SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
                                             unsigned OpNo) {
   SDValue Ch  = N->getChain();
@@ -1497,7 +1636,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
 
   MMO = DAG.getMachineFunction().
     getMachineMemOperand(N->getPointerInfo(), 
@@ -1507,11 +1646,64 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
   Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
                           N->isTruncatingStore());
 
-
   // Build a factor node to remember that this store is independent of the
   // other one.
   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
 
+SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
+                                              unsigned OpNo) {
+  SDValue Ch  = N->getChain();
+  SDValue Ptr = N->getBasePtr();
+  SDValue Mask = N->getMask();
+  SDValue Index = N->getIndex();
+  SDValue Data = N->getValue();
+  EVT MemoryVT = N->getMemoryVT();
+  unsigned Alignment = N->getOriginalAlignment();
+  SDLoc DL(N);
+
+  EVT LoMemVT, HiMemVT;
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+  SDValue DataLo, DataHi;
+  GetSplitVector(Data, DataLo, DataHi);
+  SDValue MaskLo, MaskHi;
+  GetSplitVector(Mask, MaskLo, MaskHi);
+
+    SDValue PtrLo, PtrHi;
+  if (Ptr.getValueType().isVector()) // gather form vector of pointers
+    std::tie(PtrLo, PtrHi) = DAG.SplitVector(Ptr, DL);
+  else
+    PtrLo = PtrHi = Ptr;
+
+  SDValue IndexHi, IndexLo;
+  if (Index.getNode())
+    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+  else
+    IndexLo = IndexHi = Index;
+
+  SDValue Lo, Hi;
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(N->getPointerInfo(), 
+                         MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+                         Alignment, N->getAAInfo(), N->getRanges());
+
+  SDValue OpsLo[] = {Ch, DataLo, MaskLo, PtrLo, IndexLo};
+  Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
+                            DL, OpsLo, MMO);
+
+  MMO = DAG.getMachineFunction().
+    getMachineMemOperand(N->getPointerInfo(), 
+                         MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
+                         Alignment, N->getAAInfo(), N->getRanges());
+
+  SDValue OpsHi[] = {Ch, DataHi, MaskHi, PtrHi, IndexHi};
+  Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+                            DL, OpsHi, MMO);
+
+  // Build a factor node to remember that this store is independent of the
+  // other one.
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1544,7 +1736,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
   // Increment the pointer to the other half.
   Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
 
   if (isTruncating)
     Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
@@ -1573,7 +1765,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
     for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
          i != e; ++i) {
       Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
-                                 Op, DAG.getConstant(i, TLI.getVectorIdxTy())));
+                                 Op, DAG.getConstant(i, DL, TLI.getVectorIdxTy())));
 
     }
   }
@@ -1581,7 +1773,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
   return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
 }
 
-SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
+SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
   // The result type is legal, but the input type is illegal.  If splitting
   // ends up with the result type of each half still being legal, just
   // do that.  If, however, that would result in an illegal result type,
@@ -1603,6 +1795,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
   EVT InVT = InVec->getValueType(0);
   EVT OutVT = N->getValueType(0);
   unsigned NumElements = OutVT.getVectorNumElements();
+  bool IsFloat = OutVT.isFloatingPoint();
+  
   // Widening should have already made sure this is a power-two vector
   // if we're trying to split it at all. assert() that's true, just in case.
   assert(!(NumElements & 1) && "Splitting vector, but not in half!");
@@ -1621,11 +1815,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
   SDValue InLoVec, InHiVec;
   std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
   // Truncate them to 1/2 the element size.
-  EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
+  EVT HalfElementVT = IsFloat ?
+    EVT::getFloatingPointVT(InElementSize/2) :
+    EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
                                 NumElements/2);
-  SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec);
-  SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec);
+  SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+  SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
   // Concatenate them to get the full intermediate truncation result.
   EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
   SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
@@ -1634,7 +1830,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
   // type. This should normally be something that ends up being legal directly,
   // but in theory if a target has very wide vectors and an annoyingly
   // restricted set of legal types, this split can chain to build things up.
-  return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+  return IsFloat ?
+    DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
+                DAG.getTargetConstant(0, DL, TLI.getPointerTy())) :
+    DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
@@ -1865,9 +2064,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
   while (CurNumElts != 0) {
     while (CurNumElts >= NumElts) {
       SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
-                                 DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+                                 DAG.getConstant(Idx, dl,
+                                                 TLI.getVectorIdxTy()));
       SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
-                                 DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+                                 DAG.getConstant(Idx, dl,
+                                                 TLI.getVectorIdxTy()));
       ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
       Idx += NumElts;
       CurNumElts -= NumElts;
@@ -1880,11 +2081,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
     if (NumElts == 1) {
       for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
         SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
-                                   InOp1, DAG.getConstant(Idx,
-                                                         TLI.getVectorIdxTy()));
+                                   InOp1,
+                                   DAG.getConstant(Idx, dl,
+                                                   TLI.getVectorIdxTy()));
         SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
-                                   InOp2, DAG.getConstant(Idx,
-                                                         TLI.getVectorIdxTy()));
+                                   InOp2,
+                                   DAG.getConstant(Idx, dl,
+                                                   TLI.getVectorIdxTy()));
         ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
                                              EOp1, EOp2);
       }
@@ -1922,8 +2125,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
       unsigned NumToInsert = ConcatEnd - Idx - 1;
       for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
         VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
-                            ConcatOps[OpIdx], DAG.getConstant(i,
-                                                         TLI.getVectorIdxTy()));
+                            ConcatOps[OpIdx],
+                            DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
       }
       ConcatOps[Idx+1] = VecOp;
       ConcatEnd = Idx + 2;
@@ -2009,9 +2212,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
     }
 
     if (InVTNumElts % WidenNumElts == 0) {
-      SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
-                                  InOp, DAG.getConstant(0,
-                                                        TLI.getVectorIdxTy()));
+      SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
+                                  DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
       // Extract the input and convert the shorten input vector.
       if (N->getNumOperands() == 1)
         return DAG.getNode(Opcode, DL, WidenVT, InVal);
@@ -2026,7 +2228,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   unsigned i;
   for (i=0; i < MinElts; ++i) {
     SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
-                              DAG.getConstant(i, TLI.getVectorIdxTy()));
+                              DAG.getConstant(i, DL, TLI.getVectorIdxTy()));
     if (N->getNumOperands() == 1)
       Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
     else
@@ -2114,6 +2316,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
       return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
     break;
   case TargetLowering::TypeSoftenFloat:
+  case TargetLowering::TypePromoteFloat:
   case TargetLowering::TypeExpandInteger:
   case TargetLowering::TypeExpandFloat:
   case TargetLowering::TypeScalarizeVector:
@@ -2252,7 +2455,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
       InOp = GetWidenedVector(InOp);
     for (unsigned j=0; j < NumInElts; ++j)
       Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                               DAG.getConstant(j, TLI.getVectorIdxTy()));
+                               DAG.getConstant(j, dl, TLI.getVectorIdxTy()));
   }
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for (; Idx < WidenNumElts; ++Idx)
@@ -2310,7 +2513,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
     if (InVTNumElts % WidenNumElts == 0) {
       // Extract the input and convert the shorten input vector.
       InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
-                         DAG.getConstant(0, TLI.getVectorIdxTy()));
+                         DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
       return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
                                   SatOp, CvtCode);
     }
@@ -2326,7 +2529,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
   unsigned i;
   for (i=0; i < MinElts; ++i) {
     SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
-                                 DAG.getConstant(i, TLI.getVectorIdxTy()));
+                                 DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
     Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
                                   SatOp, CvtCode);
   }
@@ -2369,7 +2572,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
   unsigned i;
   for (i=0; i < NumElts; ++i)
     Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                         DAG.getConstant(IdxVal+i, TLI.getVectorIdxTy()));
+                         DAG.getConstant(IdxVal + i, dl, TLI.getVectorIdxTy()));
 
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for (; i < WidenNumElts; ++i)
@@ -2432,7 +2635,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
 
     unsigned NumConcat = WidenNumElts / MaskNumElts;
     SmallVector<SDValue, 16> Ops(NumConcat);
-    SDValue ZeroVal = DAG.getConstant(0, MaskVT);
+    SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
     Ops[0] = Mask;
     for (unsigned i = 1; i != NumConcat; ++i)
       Ops[i] = ZeroVal;
@@ -2553,6 +2756,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
   assert(InVT.isVector() && "can not widen non-vector type");
   EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
                                    InVT.getVectorElementType(), WidenNumElts);
+
+  // The input and output types often differ here, and it could be that while
+  // we'd prefer to widen the result type, the input operands have been split.
+  // In this case, we also need to split the result of this node as well.
+  if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+    SDValue SplitVSetCC = SplitVecOp_VSETCC(N);
+    SDValue Res = ModifyToType(SplitVSetCC, WidenVT);
+    return Res;
+  }
+
   InOp1 = GetWidenedVector(InOp1);
   SDValue InOp2 = GetWidenedVector(N->getOperand(1));
 
@@ -2662,10 +2875,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
         if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
           InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT,
                              DAG.getUNDEF(FixedVT), InOp,
-                             DAG.getConstant(0, TLI.getVectorIdxTy()));
+                             DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
         else
           InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
-                             DAG.getConstant(0, TLI.getVectorIdxTy()));
+                             DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
         break;
       }
     }
@@ -2710,7 +2923,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   for (unsigned i=0; i < NumElts; ++i)
     Ops[i] = DAG.getNode(Opcode, dl, EltVT,
                          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
-                                     DAG.getConstant(i, TLI.getVectorIdxTy())));
+                                     DAG.getConstant(i, dl,
+                                                     TLI.getVectorIdxTy())));
 
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
 }
@@ -2731,7 +2945,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
     if (TLI.isTypeLegal(NewVT)) {
       SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
-                         DAG.getConstant(0, TLI.getVectorIdxTy()));
+                         DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
     }
   }
 
@@ -2759,7 +2973,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
       InOp = GetWidenedVector(InOp);
     for (unsigned j=0; j < NumInElts; ++j)
       Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                               DAG.getConstant(j, TLI.getVectorIdxTy()));
+                               DAG.getConstant(j, dl, TLI.getVectorIdxTy()));
   }
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
 }
@@ -2814,7 +3028,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
 
     unsigned NumConcat = WidenNumElts / MaskNumElts;
     SmallVector<SDValue, 16> Ops(NumConcat);
-    SDValue ZeroVal = DAG.getConstant(0, MaskVT);
+    SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
     Ops[0] = Mask;
     for (unsigned i = 1; i != NumConcat; ++i)
       Ops[i] = ZeroVal;
@@ -2849,8 +3063,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
                                SVT.getVectorElementType(),
                                N->getValueType(0).getVectorNumElements());
   SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
-                           ResVT, WideSETCC, DAG.getConstant(0,
-                                             TLI.getVectorIdxTy()));
+                           ResVT, WideSETCC,
+                           DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
 
   return PromoteTargetBoolean(CC, N->getValueType(0));
 }
@@ -2888,7 +3102,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
     unsigned MemVTWidth = MemVT.getSizeInBits();
     if (MemVT.getSizeInBits() <= WidenEltWidth)
       break;
-    if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+    auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+    if ((Action == TargetLowering::TypeLegal ||
+         Action == TargetLowering::TypePromoteInteger) &&
+        (WidenWidth % MemVTWidth) == 0 &&
         isPowerOf2_32(WidenWidth / MemVTWidth) &&
         (MemVTWidth <= Width ||
          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
@@ -2944,7 +3161,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
       LdTy = NewLdTy;
     }
     VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
-                        DAG.getConstant(Idx++, TLI.getVectorIdxTy()));
+                        DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy()));
   }
   return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
 }
@@ -3015,7 +3232,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
     unsigned Increment = NewVTWidth / 8;
     Offset += Increment;
     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
-                          DAG.getConstant(Increment, BasePtr.getValueType()));
+                          DAG.getConstant(Increment, dl, BasePtr.getValueType()));
 
     SDValue L;
     if (LdWidth < NewVTWidth) {
@@ -3141,7 +3358,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
   for (i=1; i < NumElts; ++i, Offset += Increment) {
     SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
                                      BasePtr,
-                                     DAG.getConstant(Offset,
+                                     DAG.getConstant(Offset, dl,
                                                      BasePtr.getValueType()));
     Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
                             LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
@@ -3192,7 +3409,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
       unsigned NumVTElts = NewVT.getVectorNumElements();
       do {
         SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
-                                   DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+                                  DAG.getConstant(Idx, dl,
+                                                  TLI.getVectorIdxTy()));
         StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
                                     ST->getPointerInfo().getWithOffset(Offset),
                                        isVolatile, isNonTemporal,
@@ -3201,7 +3419,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
         Offset += Increment;
         Idx += NumVTElts;
         BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
-                              DAG.getConstant(Increment, BasePtr.getValueType()));
+                              DAG.getConstant(Increment, dl,
+                                              BasePtr.getValueType()));
       } while (StWidth != 0 && StWidth >= NewVTWidth);
     } else {
       // Cast the vector to the scalar type we can store
@@ -3212,7 +3431,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
       Idx = Idx * ValEltWidth / NewVTWidth;
       do {
         SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
-                      DAG.getConstant(Idx++, TLI.getVectorIdxTy()));
+                      DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy()));
         StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
                                     ST->getPointerInfo().getWithOffset(Offset),
                                        isVolatile, isNonTemporal,
@@ -3220,7 +3439,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
         StWidth -= NewVTWidth;
         Offset += Increment;
         BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
-                            DAG.getConstant(Increment, BasePtr.getValueType()));
+                              DAG.getConstant(Increment, dl,
+                                              BasePtr.getValueType()));
       } while (StWidth != 0 && StWidth >= NewVTWidth);
       // Restore index back to be relative to the original widen element type
       Idx = Idx * NewVTWidth / ValEltWidth;
@@ -3258,7 +3478,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
   unsigned Increment = ValEltVT.getSizeInBits() / 8;
   unsigned NumElts = StVT.getVectorNumElements();
   SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
-                            DAG.getConstant(0, TLI.getVectorIdxTy()));
+                            DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
   StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
                                       ST->getPointerInfo(), StEltVT,
                                       isVolatile, isNonTemporal, Align,
@@ -3266,10 +3486,11 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
   unsigned Offset = Increment;
   for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
     SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
-                                     BasePtr, DAG.getConstant(Offset,
-                                                       BasePtr.getValueType()));
+                                     BasePtr,
+                                     DAG.getConstant(Offset, dl,
+                                                     BasePtr.getValueType()));
     SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
-                            DAG.getConstant(0, TLI.getVectorIdxTy()));
+                            DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
     StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
                                       ST->getPointerInfo().getWithOffset(Offset),
                                         StEltVT, isVolatile, isNonTemporal,
@@ -3306,7 +3527,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
 
   if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
-                       DAG.getConstant(0, TLI.getVectorIdxTy()));
+                       DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
 
   // Fall back to extract and build.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
@@ -3315,7 +3536,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
   unsigned Idx;
   for (Idx = 0; Idx < MinNumElts; ++Idx)
     Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                           DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+                           DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
 
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for ( ; Idx < WidenNumElts; ++Idx)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index db38b76..6303422 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -47,7 +47,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
   TRI = STI.getRegisterInfo();
   TLI = IS->TLI;
   TII = STI.getInstrInfo();
-  ResourcesModel = TII->CreateTargetScheduleState(STI);
+  ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
   // This hard requirement could be relaxed, but for now
   // do not let it procede.
   assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
@@ -637,17 +637,3 @@ void ResourcePriorityQueue::remove(SUnit *SU) {
 
   Queue.pop_back();
 }
-
-
-#ifdef NDEBUG
-void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
-#else
-void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
-  ResourcePriorityQueue q = *this;
-  while (!q.empty()) {
-    SUnit *su = q.pop();
-    dbgs() << "Height " << su->getHeight() << ": ";
-    su->dump(DAG);
-  }
-}
-#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index bce69d7..c27f8de 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -35,7 +35,6 @@ public:
     FRAMEIX = 2             // value is contents of a stack location
   };
 private:
-  enum DbgValueKind kind;
   union {
     struct {
       SDNode *Node;         // valid for expressions
@@ -46,17 +45,18 @@ private:
   } u;
   MDNode *Var;
   MDNode *Expr;
-  bool IsIndirect;
   uint64_t Offset;
   DebugLoc DL;
   unsigned Order;
-  bool Invalid;
+  enum DbgValueKind kind;
+  bool IsIndirect;
+  bool Invalid = false;
+
 public:
   // Constructor for non-constants.
   SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir,
              uint64_t off, DebugLoc dl, unsigned O)
-      : Var(Var), Expr(Expr), IsIndirect(indir), Offset(off), DL(dl), Order(O),
-        Invalid(false) {
+      : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(indir) {
     kind = SDNODE;
     u.s.Node = N;
     u.s.ResNo = R;
@@ -65,8 +65,7 @@ public:
   // Constructor for constants.
   SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off,
              DebugLoc dl, unsigned O)
-      : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O),
-        Invalid(false) {
+      : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) {
     kind = CONST;
     u.Const = C;
   }
@@ -74,8 +73,7 @@ public:
   // Constructor for frame indices.
   SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl,
              unsigned O)
-      : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O),
-        Invalid(false) {
+      : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) {
     kind = FRAMEIX;
     u.FrameIx = FI;
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 5222de1..fd0fa31 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -173,7 +173,7 @@ public:
       HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
   }
 
-  ~ScheduleDAGRRList() {
+  ~ScheduleDAGRRList() override {
     delete HazardRec;
     delete AvailableQueue;
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index f2b18fc..3853ada 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -137,13 +137,9 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
 }
 
 // Helper for AddGlue to clone node operands.
-static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
-                                SmallVectorImpl<EVT> &VTs,
+static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
                                 SDValue ExtraOper = SDValue()) {
-  SmallVector<SDValue, 8> Ops;
-  for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
-    Ops.push_back(N->getOperand(I));
-
+  SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end());
   if (ExtraOper.getNode())
     Ops.push_back(ExtraOper);
 
@@ -165,7 +161,6 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
 }
 
 static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
-  SmallVector<EVT, 4> VTs;
   SDNode *GlueDestNode = Glue.getNode();
 
   // Don't add glue from a node to itself.
@@ -179,9 +174,7 @@ static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
   // Don't add glue to something that already has a glue value.
   if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
 
-  for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
-    VTs.push_back(N->getValueType(I));
-
+  SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end());
   if (AddGlue)
     VTs.push_back(MVT::Glue);
 
@@ -197,11 +190,8 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
           !N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
          "expected an unused glue value");
 
-  SmallVector<EVT, 4> VTs;
-  for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I)
-    VTs.push_back(N->getValueType(I));
-
-  CloneNodeWithValues(N, DAG, VTs);
+  CloneNodeWithValues(N, DAG,
+                      makeArrayRef(N->value_begin(), N->getNumValues() - 1));
 }
 
 /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 2cd1f4b..6351fa2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -44,7 +44,7 @@ namespace llvm {
 
     explicit ScheduleDAGSDNodes(MachineFunction &mf);
 
-    virtual ~ScheduleDAGSDNodes() {}
+    ~ScheduleDAGSDNodes() override {}
 
     /// Run - perform scheduling.
     ///
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 418b58e..eee4a4b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -76,7 +76,7 @@ public:
     HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
   }
 
-  ~ScheduleDAGVLIW() {
+  ~ScheduleDAGVLIW() override {
     delete HazardRec;
     delete AvailableQueue;
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f75d5f4..efd4bd9 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -49,6 +49,7 @@
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cmath>
+#include <utility>
 
 using namespace llvm;
 
@@ -196,6 +197,22 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
   return true;
 }
 
+/// \brief Return true if the specified node is a BUILD_VECTOR node of
+/// all ConstantFPSDNode or undef.
+bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDValue Op = N->getOperand(i);
+    if (Op.getOpcode() == ISD::UNDEF)
+      continue;
+    if (!isa<ConstantFPSDNode>(Op))
+      return false;
+  }
+  return true;
+}
+
 /// isScalarToVector - Return true if the specified node is a
 /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
 /// element is not an undef.
@@ -499,8 +516,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   case ISD::SUB:
   case ISD::SHL: {
     const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N);
-    AddBinaryNodeIDCustom(ID, N->getOpcode(), BinNode->hasNoUnsignedWrap(),
-                          BinNode->hasNoSignedWrap(), BinNode->isExact());
+    AddBinaryNodeIDCustom(
+        ID, N->getOpcode(), BinNode->Flags.hasNoUnsignedWrap(),
+        BinNode->Flags.hasNoSignedWrap(), BinNode->Flags.hasExact());
     break;
   }
   case ISD::ATOMIC_CMP_SWAP:
@@ -860,7 +878,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
   AddNodeIDCustom(ID, N);
-  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
   return Node;
 }
 
@@ -878,7 +896,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
   AddNodeIDCustom(ID, N);
-  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
   return Node;
 }
 
@@ -895,7 +913,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
   AddNodeIDCustom(ID, N);
-  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
   return Node;
 }
 
@@ -947,9 +965,9 @@ BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
   if (isBinOpWithFlags(Opcode)) {
     BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode(
         Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
-    FN->setHasNoUnsignedWrap(nuw);
-    FN->setHasNoSignedWrap(nsw);
-    FN->setIsExact(exact);
+    FN->Flags.setNoUnsignedWrap(nuw);
+    FN->Flags.setNoSignedWrap(nsw);
+    FN->Flags.setExact(exact);
 
     return FN;
   }
@@ -959,6 +977,40 @@ BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
   return N;
 }
 
+SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+                                          void *&InsertPos) {
+  SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  if (N) {
+    switch (N->getOpcode()) {
+    default: break;
+    case ISD::Constant:
+    case ISD::ConstantFP:
+      llvm_unreachable("Querying for Constant and ConstantFP nodes requires "
+                       "debug location.  Use another overload.");
+    }
+  }
+  return N;
+}
+
+SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+                                          DebugLoc DL, void *&InsertPos) {
+  SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  if (N) {
+    switch (N->getOpcode()) {
+    default: break; // Process only regular (non-target) constant nodes.
+    case ISD::Constant:
+    case ISD::ConstantFP:
+      // Erase debug location from the node if the node is used at several
+      // different places to do not propagate one location to all uses as it
+      // leads to incorrect debug info.
+      if (N->getDebugLoc() != DL)
+        N->setDebugLoc(DebugLoc());
+      break;
+    }
+  }
+  return N;
+}
+
 void SelectionDAG::clear() {
   allnodes_clear();
   OperandAllocator.Reset();
@@ -1014,7 +1066,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) {
   APInt Imm = APInt::getLowBitsSet(BitWidth,
                                    VT.getSizeInBits());
   return getNode(ISD::AND, DL, Op.getValueType(), Op,
-                 getConstant(Imm, Op.getValueType()));
+                 getConstant(Imm, DL, Op.getValueType()));
 }
 
 SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
@@ -1052,7 +1104,7 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
 SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) {
   EVT EltVT = VT.getScalarType();
   SDValue NegOne =
-    getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+    getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT);
   return getNode(ISD::XOR, DL, VT, Val, NegOne);
 }
 
@@ -1062,31 +1114,33 @@ SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) {
   switch (TLI->getBooleanContents(VT)) {
     case TargetLowering::ZeroOrOneBooleanContent:
     case TargetLowering::UndefinedBooleanContent:
-      TrueValue = getConstant(1, VT);
+      TrueValue = getConstant(1, DL, VT);
       break;
     case TargetLowering::ZeroOrNegativeOneBooleanContent:
-      TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
+      TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL,
                               VT);
       break;
   }
   return getNode(ISD::XOR, DL, VT, Val, TrueValue);
 }
 
-SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT, bool isO) {
+SDValue SelectionDAG::getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isT,
+                                  bool isO) {
   EVT EltVT = VT.getScalarType();
   assert((EltVT.getSizeInBits() >= 64 ||
          (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
          "getConstant with a uint64_t value that doesn't fit in the type!");
-  return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT, isO);
+  return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO);
 }
 
-SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT, bool isO)
+SDValue SelectionDAG::getConstant(const APInt &Val, SDLoc DL, EVT VT, bool isT,
+                                  bool isO)
 {
-  return getConstant(*ConstantInt::get(*Context, Val), VT, isT, isO);
+  return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO);
 }
 
-SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
-                                  bool isO) {
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT,
+                                  bool isT, bool isO) {
   assert(VT.isInteger() && "Cannot create FP integer constant!");
 
   EVT EltVT = VT.getScalarType();
@@ -1125,7 +1179,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
     SmallVector<SDValue, 2> EltParts;
     for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
       EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
-                                           .trunc(ViaEltSizeInBits),
+                                           .trunc(ViaEltSizeInBits), DL,
                                      ViaEltVT, isT, isO));
     }
 
@@ -1160,12 +1214,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
   ID.AddBoolean(isO);
   void *IP = nullptr;
   SDNode *N = nullptr;
-  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+  if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)))
     if (!VT.isVector())
       return SDValue(N, 0);
 
   if (!N) {
-    N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT);
+    N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, DL.getDebugLoc(),
+                                           EltVT);
     CSEMap.InsertNode(N, IP);
     InsertNode(N);
   }
@@ -1179,16 +1234,17 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
   return Result;
 }
 
-SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
-  return getConstant(Val, TLI->getPointerTy(), isTarget);
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget) {
+  return getConstant(Val, DL, TLI->getPointerTy(), isTarget);
 }
 
-
-SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
-  return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget);
+SDValue SelectionDAG::getConstantFP(const APFloat& V, SDLoc DL, EVT VT,
+                                    bool isTarget) {
+  return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget);
 }
 
-SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, SDLoc DL, EVT VT,
+                                    bool isTarget){
   assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
 
   EVT EltVT = VT.getScalarType();
@@ -1202,12 +1258,13 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
   ID.AddPointer(&V);
   void *IP = nullptr;
   SDNode *N = nullptr;
-  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+  if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)))
     if (!VT.isVector())
       return SDValue(N, 0);
 
   if (!N) {
-    N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT);
+    N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, DL.getDebugLoc(),
+                                             EltVT);
     CSEMap.InsertNode(N, IP);
     InsertNode(N);
   }
@@ -1216,25 +1273,25 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
   if (VT.isVector()) {
     SmallVector<SDValue, 8> Ops;
     Ops.assign(VT.getVectorNumElements(), Result);
-    // FIXME SDLoc info might be appropriate here
     Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops);
   }
   return Result;
 }
 
-SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
+SDValue SelectionDAG::getConstantFP(double Val, SDLoc DL, EVT VT,
+                                    bool isTarget) {
   EVT EltVT = VT.getScalarType();
   if (EltVT==MVT::f32)
-    return getConstantFP(APFloat((float)Val), VT, isTarget);
+    return getConstantFP(APFloat((float)Val), DL, VT, isTarget);
   else if (EltVT==MVT::f64)
-    return getConstantFP(APFloat(Val), VT, isTarget);
+    return getConstantFP(APFloat(Val), DL, VT, isTarget);
   else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 ||
            EltVT==MVT::f16) {
     bool ignored;
     APFloat apf = APFloat(Val);
     apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
                 &ignored);
-    return getConstantFP(apf, VT, isTarget);
+    return getConstantFP(apf, DL, VT, isTarget);
   } else
     llvm_unreachable("Unsupported type in getConstantFP");
 }
@@ -1264,7 +1321,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
   ID.AddInteger(TargetFlags);
   ID.AddInteger(GV->getType()->getAddressSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL.getIROrder(),
@@ -1281,7 +1338,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
   AddNodeIDNode(ID, Opc, getVTList(VT), None);
   ID.AddInteger(FI);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget);
@@ -1300,7 +1357,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
   ID.AddInteger(JTI);
   ID.AddInteger(TargetFlags);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget,
@@ -1326,7 +1383,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
   ID.AddPointer(C);
   ID.AddInteger(TargetFlags);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
@@ -1353,7 +1410,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
   C->addSelectionDAGCSEId(ID);
   ID.AddInteger(TargetFlags);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
@@ -1371,7 +1428,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
   ID.AddInteger(Offset);
   ID.AddInteger(TargetFlags);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
@@ -1386,7 +1443,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
   AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None);
   ID.AddPointer(MBB);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB);
@@ -1446,13 +1503,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
 // N2 to point at N1.
 static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
   std::swap(N1, N2);
-  int NElts = M.size();
-  for (int i = 0; i != NElts; ++i) {
-    if (M[i] >= NElts)
-      M[i] -= NElts;
-    else if (M[i] >= 0)
-      M[i] += NElts;
-  }
+  ShuffleVectorSDNode::commuteMask(M);
 }
 
 SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
@@ -1484,6 +1535,34 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
   if (N1.getOpcode() == ISD::UNDEF)
     commuteShuffle(N1, N2, MaskVec);
 
+  // If shuffling a splat, try to blend the splat instead. We do this here so
+  // that even when this arises during lowering we don't have to re-handle it.
+  auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+    BitVector UndefElements;
+    SDValue Splat = BV->getSplatValue(&UndefElements);
+    if (!Splat)
+      return;
+
+    for (int i = 0; i < (int)NElts; ++i) {
+      if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts))
+        continue;
+
+      // If this input comes from undef, mark it as such.
+      if (UndefElements[MaskVec[i] - Offset]) {
+        MaskVec[i] = -1;
+        continue;
+      }
+
+      // If we can blend a non-undef lane, use that instead.
+      if (!UndefElements[i])
+        MaskVec[i] = i + Offset;
+    }
+  };
+  if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+    BlendSplat(N1BV, 0);
+  if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+    BlendSplat(N2BV, NElts);
+
   // Canonicalize all index into lhs, -> shuffle lhs, undef
   // Canonicalize all index into rhs, -> shuffle rhs, undef
   bool AllLHS = true, AllRHS = true;
@@ -1513,9 +1592,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
     return getUNDEF(VT);
 
   // If Identity shuffle return that node.
-  bool Identity = true;
+  bool Identity = true, AllSame = true;
   for (unsigned i = 0; i != NElts; ++i) {
     if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+    if (MaskVec[i] != MaskVec[0]) AllSame = false;
   }
   if (Identity && NElts)
     return N1;
@@ -1537,18 +1617,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
       if (Splat && Splat.getOpcode() == ISD::UNDEF)
         return getUNDEF(VT);
 
+      bool SameNumElts =
+          V.getValueType().getVectorNumElements() == VT.getVectorNumElements();
+
       // We only have a splat which can skip shuffles if there is a splatted
       // value and no undef lanes rearranged by the shuffle.
       if (Splat && UndefElements.none()) {
         // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
         // number of elements match or the value splatted is a zero constant.
-        if (V.getValueType().getVectorNumElements() ==
-            VT.getVectorNumElements())
+        if (SameNumElts)
           return N1;
         if (auto *C = dyn_cast<ConstantSDNode>(Splat))
           if (C->isNullValue())
             return N1;
       }
+
+      // If the shuffle itself creates a splat, build the vector directly.
+      if (AllSame && SameNumElts) {
+        const SDValue &Splatted = BV->getOperand(MaskVec[0]);
+        SmallVector<SDValue, 8> Ops(NElts, Splatted);
+
+        EVT BuildVT = BV->getValueType(0);
+        SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops);
+
+        // We may have jumped through bitcasts, so the type of the
+        // BUILD_VECTOR may not match the type of the shuffle.
+        if (BuildVT != VT)
+          NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
+        return NewBV;
+      }
     }
   }
 
@@ -1559,7 +1656,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
     ID.AddInteger(MaskVec[i]);
 
   void* IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))
     return SDValue(E, 0);
 
   // Allocate the mask array for the node out of the BumpPtrAllocator, since
@@ -1579,19 +1676,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
 
 SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
   MVT VT = SV.getSimpleValueType(0);
-  unsigned NumElems = VT.getVectorNumElements();
-  SmallVector<int, 8> MaskVec;
-
-  for (unsigned i = 0; i != NumElems; ++i) {
-    int Idx = SV.getMaskElt(i);
-    if (Idx >= 0) {
-      if (Idx < (int)NumElems)
-        Idx += NumElems;
-      else
-        Idx -= NumElems;
-    }
-    MaskVec.push_back(Idx);
-  }
+  SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
+  ShuffleVectorSDNode::commuteMask(MaskVec);
 
   SDValue Op0 = SV.getOperand(0);
   SDValue Op1 = SV.getOperand(1);
@@ -1612,7 +1698,7 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl,
   SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
   AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops);
   void* IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))
     return SDValue(E, 0);
 
   CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(),
@@ -1628,7 +1714,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   AddNodeIDNode(ID, ISD::Register, getVTList(VT), None);
   ID.AddInteger(RegNo);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT);
@@ -1642,7 +1728,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
   AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None);
   ID.AddPointer(RegMask);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
@@ -1657,7 +1743,7 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) {
   AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops);
   ID.AddPointer(Label);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(),
@@ -1680,7 +1766,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
   ID.AddInteger(Offset);
   ID.AddInteger(TargetFlags);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset,
@@ -1699,7 +1785,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
   ID.AddPointer(V);
 
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) SrcValueSDNode(V);
@@ -1715,7 +1801,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
   ID.AddPointer(MD);
 
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
@@ -1734,7 +1820,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
   ID.AddInteger(DestAS);
 
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(),
@@ -1791,13 +1877,14 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
   switch (Cond) {
   default: break;
   case ISD::SETFALSE:
-  case ISD::SETFALSE2: return getConstant(0, VT);
+  case ISD::SETFALSE2: return getConstant(0, dl, VT);
   case ISD::SETTRUE:
   case ISD::SETTRUE2: {
     TargetLowering::BooleanContent Cnt =
         TLI->getBooleanContents(N1->getValueType(0));
     return getConstant(
-        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
+        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
+        VT);
   }
 
   case ISD::SETOEQ:
@@ -1821,16 +1908,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
 
       switch (Cond) {
       default: llvm_unreachable("Unknown integer setcc!");
-      case ISD::SETEQ:  return getConstant(C1 == C2, VT);
-      case ISD::SETNE:  return getConstant(C1 != C2, VT);
-      case ISD::SETULT: return getConstant(C1.ult(C2), VT);
-      case ISD::SETUGT: return getConstant(C1.ugt(C2), VT);
-      case ISD::SETULE: return getConstant(C1.ule(C2), VT);
-      case ISD::SETUGE: return getConstant(C1.uge(C2), VT);
-      case ISD::SETLT:  return getConstant(C1.slt(C2), VT);
-      case ISD::SETGT:  return getConstant(C1.sgt(C2), VT);
-      case ISD::SETLE:  return getConstant(C1.sle(C2), VT);
-      case ISD::SETGE:  return getConstant(C1.sge(C2), VT);
+      case ISD::SETEQ:  return getConstant(C1 == C2, dl, VT);
+      case ISD::SETNE:  return getConstant(C1 != C2, dl, VT);
+      case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT);
+      case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT);
+      case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT);
+      case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT);
+      case ISD::SETLT:  return getConstant(C1.slt(C2), dl, VT);
+      case ISD::SETGT:  return getConstant(C1.sgt(C2), dl, VT);
+      case ISD::SETLE:  return getConstant(C1.sle(C2), dl, VT);
+      case ISD::SETGE:  return getConstant(C1.sge(C2), dl, VT);
       }
     }
   }
@@ -1842,41 +1929,41 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
       case ISD::SETEQ:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         // fall through
-      case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT);
+      case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT);
       case ISD::SETNE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         // fall through
       case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
-                                           R==APFloat::cmpLessThan, VT);
+                                           R==APFloat::cmpLessThan, dl, VT);
       case ISD::SETLT:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         // fall through
-      case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT);
+      case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT);
       case ISD::SETGT:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         // fall through
-      case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT);
+      case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT);
       case ISD::SETLE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         // fall through
       case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
-                                           R==APFloat::cmpEqual, VT);
+                                           R==APFloat::cmpEqual, dl, VT);
       case ISD::SETGE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         // fall through
       case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
-                                           R==APFloat::cmpEqual, VT);
-      case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, VT);
-      case ISD::SETUO:  return getConstant(R==APFloat::cmpUnordered, VT);
+                                           R==APFloat::cmpEqual, dl, VT);
+      case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, dl, VT);
+      case ISD::SETUO:  return getConstant(R==APFloat::cmpUnordered, dl, VT);
       case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
-                                           R==APFloat::cmpEqual, VT);
-      case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT);
+                                           R==APFloat::cmpEqual, dl, VT);
+      case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT);
       case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
-                                           R==APFloat::cmpLessThan, VT);
+                                           R==APFloat::cmpLessThan, dl, VT);
       case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
-                                           R==APFloat::cmpUnordered, VT);
-      case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT);
-      case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT);
+                                           R==APFloat::cmpUnordered, dl, VT);
+      case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT);
+      case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT);
       }
     } else {
       // Ensure that the constant occurs on the RHS.
@@ -2323,6 +2410,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
     break;
   }
+  case ISD::EXTRACT_ELEMENT: {
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    const unsigned Index =
+      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    const unsigned BitWidth = Op.getValueType().getSizeInBits();
+
+    // Remove low part of known bits mask
+    KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth);
+    KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth);
+
+    // Remove high part of known bit mask
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
+    break;
+  }
   case ISD::FrameIndex:
   case ISD::TargetFrameIndex:
     if (unsigned Align = InferPtrAlignment(Op)) {
@@ -2522,6 +2624,21 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     // FIXME: it's tricky to do anything useful for this, but it is an important
     // case for targets like X86.
     break;
+  case ISD::EXTRACT_ELEMENT: {
+    const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    const int BitWidth = Op.getValueType().getSizeInBits();
+    const int Items =
+      Op.getOperand(0).getValueType().getSizeInBits() / BitWidth;
+
+    // Get reverse index (starting from 1), Op1 value indexes elements from
+    // little end. Sign starts at big end.
+    const int rIndex = Items - 1 -
+      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+    // If the sign portion ends in our element the substraction gives correct
+    // result. Otherwise it gives either negative or > bitwidth result
+    return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
+  }
   }
 
   // If we are looking at the loaded value of the SDNode.
@@ -2643,7 +2760,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opcode, getVTList(VT), None);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(),
@@ -2666,12 +2783,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
     switch (Opcode) {
     default: break;
     case ISD::SIGN_EXTEND:
-      return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT,
+      return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
                          C->isTargetOpcode(), C->isOpaque());
     case ISD::ANY_EXTEND:
     case ISD::ZERO_EXTEND:
     case ISD::TRUNCATE:
-      return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT,
+      return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
                          C->isTargetOpcode(), C->isOpaque());
     case ISD::UINT_TO_FP:
     case ISD::SINT_TO_FP: {
@@ -2680,29 +2797,29 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
       (void)apf.convertFromAPInt(Val,
                                  Opcode==ISD::SINT_TO_FP,
                                  APFloat::rmNearestTiesToEven);
-      return getConstantFP(apf, VT);
+      return getConstantFP(apf, DL, VT);
     }
     case ISD::BITCAST:
       if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
-        return getConstantFP(APFloat(APFloat::IEEEhalf, Val), VT);
+        return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT);
       if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
-        return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT);
+        return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT);
       else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
-        return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT);
+        return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT);
       break;
     case ISD::BSWAP:
-      return getConstant(Val.byteSwap(), VT, C->isTargetOpcode(),
+      return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
                          C->isOpaque());
     case ISD::CTPOP:
-      return getConstant(Val.countPopulation(), VT, C->isTargetOpcode(),
+      return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(),
                          C->isOpaque());
     case ISD::CTLZ:
     case ISD::CTLZ_ZERO_UNDEF:
-      return getConstant(Val.countLeadingZeros(), VT, C->isTargetOpcode(),
+      return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(),
                          C->isOpaque());
     case ISD::CTTZ:
     case ISD::CTTZ_ZERO_UNDEF:
-      return getConstant(Val.countTrailingZeros(), VT, C->isTargetOpcode(),
+      return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
                          C->isOpaque());
     }
   }
@@ -2713,26 +2830,26 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
     switch (Opcode) {
     case ISD::FNEG:
       V.changeSign();
-      return getConstantFP(V, VT);
+      return getConstantFP(V, DL, VT);
     case ISD::FABS:
       V.clearSign();
-      return getConstantFP(V, VT);
+      return getConstantFP(V, DL, VT);
     case ISD::FCEIL: {
       APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
       if (fs == APFloat::opOK || fs == APFloat::opInexact)
-        return getConstantFP(V, VT);
+        return getConstantFP(V, DL, VT);
       break;
     }
     case ISD::FTRUNC: {
       APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
       if (fs == APFloat::opOK || fs == APFloat::opInexact)
-        return getConstantFP(V, VT);
+        return getConstantFP(V, DL, VT);
       break;
     }
     case ISD::FFLOOR: {
       APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
       if (fs == APFloat::opOK || fs == APFloat::opInexact)
-        return getConstantFP(V, VT);
+        return getConstantFP(V, DL, VT);
       break;
     }
     case ISD::FP_EXTEND: {
@@ -2741,7 +2858,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
       // FIXME need to be more flexible about rounding mode.
       (void)V.convert(EVTToAPFloatSemantics(VT),
                       APFloat::rmNearestTiesToEven, &ignored);
-      return getConstantFP(V, VT);
+      return getConstantFP(V, DL, VT);
     }
     case ISD::FP_TO_SINT:
     case ISD::FP_TO_UINT: {
@@ -2755,20 +2872,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
       if (s==APFloat::opInvalidOp)     // inexact is OK, in fact usual
         break;
       APInt api(VT.getSizeInBits(), x);
-      return getConstant(api, VT);
+      return getConstant(api, DL, VT);
     }
     case ISD::BITCAST:
       if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
-        return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), VT);
+        return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
       else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
-        return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+        return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
       else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
-        return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+        return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
       break;
     }
   }
 
-  // Constant fold unary operations with a vector integer operand.
+  // Constant fold unary operations with a vector integer or float operand.
   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {
     if (BV->isConstant()) {
       switch (Opcode) {
@@ -2776,18 +2893,55 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
         // FIXME: Entirely reasonable to perform folding of other unary
         // operations here as the need arises.
         break;
+      case ISD::FNEG:
+      case ISD::FABS:
+      case ISD::FCEIL:
+      case ISD::FTRUNC:
+      case ISD::FFLOOR:
+      case ISD::FP_EXTEND:
+      case ISD::FP_TO_SINT:
+      case ISD::FP_TO_UINT:
+      case ISD::TRUNCATE:
       case ISD::UINT_TO_FP:
       case ISD::SINT_TO_FP: {
+        EVT SVT = VT.getScalarType();
+        EVT InVT = BV->getValueType(0);
+        EVT InSVT = InVT.getScalarType();
+
+        // Find legal integer scalar type for constant promotion and
+        // ensure that its scalar size is at least as large as source.
+        EVT LegalSVT = SVT;
+        if (SVT.isInteger()) {
+          LegalSVT = TLI->getTypeToTransformTo(*getContext(), SVT);
+          if (LegalSVT.bitsLT(SVT)) break;
+        }
+
+        // Let the above scalar folding handle the folding of each element.
         SmallVector<SDValue, 8> Ops;
         for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
           SDValue OpN = BV->getOperand(i);
-          // Let the above scalar folding handle the conversion of each
-          // element.
-          OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(),
-                        OpN);
+          EVT OpVT = OpN.getValueType();
+
+          // Build vector (integer) scalar operands may need implicit
+          // truncation - do this before constant folding.
+          if (OpVT.isInteger() && OpVT.bitsGT(InSVT))
+            OpN = getNode(ISD::TRUNCATE, DL, InSVT, OpN);
+
+          OpN = getNode(Opcode, DL, SVT, OpN);
+
+          // Legalize the (integer) scalar constant if necessary.
+          if (LegalSVT != SVT)
+            OpN = getNode(ISD::ANY_EXTEND, DL, LegalSVT, OpN);
+
+          if (OpN.getOpcode() != ISD::UNDEF &&
+              OpN.getOpcode() != ISD::Constant &&
+              OpN.getOpcode() != ISD::ConstantFP)
+            break;
           Ops.push_back(OpN);
         }
-        return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+        if (Ops.size() == VT.getVectorNumElements())
+          return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+        break;
       }
       }
     }
@@ -2825,7 +2979,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
       return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
     else if (OpOpcode == ISD::UNDEF)
       // sext(undef) = 0, because the top bits will all be the same.
-      return getConstant(0, VT);
+      return getConstant(0, DL, VT);
     break;
   case ISD::ZERO_EXTEND:
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
@@ -2842,7 +2996,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
                      Operand.getNode()->getOperand(0));
     else if (OpOpcode == ISD::UNDEF)
       // zext(undef) = 0, because the top bits will be zero.
-      return getConstant(0, VT);
+      return getConstant(0, DL, VT);
     break;
   case ISD::ANY_EXTEND:
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
@@ -2941,7 +3095,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
     SDValue Ops[1] = { Operand };
     AddNodeIDNode(ID, Opcode, VTs, Ops);
     void *IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
       return SDValue(E, 0);
 
     N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
@@ -2956,7 +3110,54 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
+static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
+                                        const APInt &C2) {
+  switch (Opcode) {
+  case ISD::ADD:  return std::make_pair(C1 + C2, true);
+  case ISD::SUB:  return std::make_pair(C1 - C2, true);
+  case ISD::MUL:  return std::make_pair(C1 * C2, true);
+  case ISD::AND:  return std::make_pair(C1 & C2, true);
+  case ISD::OR:   return std::make_pair(C1 | C2, true);
+  case ISD::XOR:  return std::make_pair(C1 ^ C2, true);
+  case ISD::SHL:  return std::make_pair(C1 << C2, true);
+  case ISD::SRL:  return std::make_pair(C1.lshr(C2), true);
+  case ISD::SRA:  return std::make_pair(C1.ashr(C2), true);
+  case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
+  case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
+  case ISD::UDIV:
+    if (!C2.getBoolValue())
+      break;
+    return std::make_pair(C1.udiv(C2), true);
+  case ISD::UREM:
+    if (!C2.getBoolValue())
+      break;
+    return std::make_pair(C1.urem(C2), true);
+  case ISD::SDIV:
+    if (!C2.getBoolValue())
+      break;
+    return std::make_pair(C1.sdiv(C2), true);
+  case ISD::SREM:
+    if (!C2.getBoolValue())
+      break;
+    return std::make_pair(C1.srem(C2), true);
+  }
+  return std::make_pair(APInt(1, 0), false);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
+                                             const ConstantSDNode *Cst1,
+                                             const ConstantSDNode *Cst2) {
+  if (Cst1->isOpaque() || Cst2->isOpaque())
+    return SDValue();
+
+  std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(),
+                                            Cst2->getAPIntValue());
+  if (!Folded.second)
+    return SDValue();
+  return getConstant(Folded.first, DL, VT);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
                                              SDNode *Cst1, SDNode *Cst2) {
   // If the opcode is a target-specific ISD node, there's nothing we can
   // do here and the operand rules may not line up with the below, so
@@ -2964,116 +3165,59 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
   if (Opcode >= ISD::BUILTIN_OP_END)
     return SDValue();
 
-  SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs;
-  SmallVector<SDValue, 4> Outputs;
-  EVT SVT = VT.getScalarType();
+  // Handle the case of two scalars.
+  if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
+    if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
+      if (SDValue Folded =
+          FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2)) {
+        if (!VT.isVector())
+          return Folded;
+        SmallVector<SDValue, 4> Outputs;
+        // We may have a vector type but a scalar result. Create a splat.
+        Outputs.resize(VT.getVectorNumElements(), Outputs.back());
+        // Build a big vector out of the scalar elements we generated.
+        return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs);
+      } else {
+        return SDValue();
+      }
+    }
+  }
 
-  ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1);
-  ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2);
-  if (Scalar1 && Scalar2 && (Scalar1->isOpaque() || Scalar2->isOpaque()))
+  // For vectors extract each constant element into Inputs so we can constant
+  // fold them individually.
+  BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
+  BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
+  if (!BV1 || !BV2)
     return SDValue();
 
-  if (Scalar1 && Scalar2)
-    // Scalar instruction.
-    Inputs.push_back(std::make_pair(Scalar1, Scalar2));
-  else {
-    // For vectors extract each constant element into Inputs so we can constant
-    // fold them individually.
-    BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
-    BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
-    if (!BV1 || !BV2)
-      return SDValue();
-
-    assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
-
-    for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
-      ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
-      ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
-      if (!V1 || !V2) // Not a constant, bail.
-        return SDValue();
+  assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
 
-      if (V1->isOpaque() || V2->isOpaque())
-        return SDValue();
-
-      // Avoid BUILD_VECTOR nodes that perform implicit truncation.
-      // FIXME: This is valid and could be handled by truncating the APInts.
-      if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
-        return SDValue();
+  EVT SVT = VT.getScalarType();
+  SmallVector<SDValue, 4> Outputs;
+  for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
+    ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
+    ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
+    if (!V1 || !V2) // Not a constant, bail.
+      return SDValue();
 
-      Inputs.push_back(std::make_pair(V1, V2));
-    }
-  }
+    if (V1->isOpaque() || V2->isOpaque())
+      return SDValue();
 
-  // We have a number of constant values, constant fold them element by element.
-  for (unsigned I = 0, E = Inputs.size(); I != E; ++I) {
-    const APInt &C1 = Inputs[I].first->getAPIntValue();
-    const APInt &C2 = Inputs[I].second->getAPIntValue();
+    // Avoid BUILD_VECTOR nodes that perform implicit truncation.
+    // FIXME: This is valid and could be handled by truncating the APInts.
+    if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
+      return SDValue();
 
-    switch (Opcode) {
-    case ISD::ADD:
-      Outputs.push_back(getConstant(C1 + C2, SVT));
-      break;
-    case ISD::SUB:
-      Outputs.push_back(getConstant(C1 - C2, SVT));
-      break;
-    case ISD::MUL:
-      Outputs.push_back(getConstant(C1 * C2, SVT));
-      break;
-    case ISD::UDIV:
-      if (!C2.getBoolValue())
-        return SDValue();
-      Outputs.push_back(getConstant(C1.udiv(C2), SVT));
-      break;
-    case ISD::UREM:
-      if (!C2.getBoolValue())
-        return SDValue();
-      Outputs.push_back(getConstant(C1.urem(C2), SVT));
-      break;
-    case ISD::SDIV:
-      if (!C2.getBoolValue())
-        return SDValue();
-      Outputs.push_back(getConstant(C1.sdiv(C2), SVT));
-      break;
-    case ISD::SREM:
-      if (!C2.getBoolValue())
-        return SDValue();
-      Outputs.push_back(getConstant(C1.srem(C2), SVT));
-      break;
-    case ISD::AND:
-      Outputs.push_back(getConstant(C1 & C2, SVT));
-      break;
-    case ISD::OR:
-      Outputs.push_back(getConstant(C1 | C2, SVT));
-      break;
-    case ISD::XOR:
-      Outputs.push_back(getConstant(C1 ^ C2, SVT));
-      break;
-    case ISD::SHL:
-      Outputs.push_back(getConstant(C1 << C2, SVT));
-      break;
-    case ISD::SRL:
-      Outputs.push_back(getConstant(C1.lshr(C2), SVT));
-      break;
-    case ISD::SRA:
-      Outputs.push_back(getConstant(C1.ashr(C2), SVT));
-      break;
-    case ISD::ROTL:
-      Outputs.push_back(getConstant(C1.rotl(C2), SVT));
-      break;
-    case ISD::ROTR:
-      Outputs.push_back(getConstant(C1.rotr(C2), SVT));
-      break;
-    default:
+    // Fold one vector element.
+    std::pair<APInt, bool> Folded = FoldValue(Opcode, V1->getAPIntValue(),
+                                              V2->getAPIntValue());
+    if (!Folded.second)
       return SDValue();
-    }
+    Outputs.push_back(getConstant(Folded.first, DL, SVT));
   }
 
-  assert((Scalar1 && Scalar2) || (VT.getVectorNumElements() == Outputs.size() &&
-                                  "Expected a scalar or vector!"));
-
-  // Handle the scalar case first.
-  if (!VT.isVector())
-    return Outputs.back();
+  assert(VT.getVectorNumElements() == Outputs.size() &&
+         "Vector size mismatch!");
 
   // We may have a vector type but a scalar result. Create a splat.
   Outputs.resize(VT.getVectorNumElements(), Outputs.back());
@@ -3109,6 +3253,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
                                     N1.getNode()->op_end());
       Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+
+      // BUILD_VECTOR requires all inputs to be of the same type, find the
+      // maximum type and extend them all.
+      EVT SVT = VT.getScalarType();
+      for (SDValue Op : Elts)
+        SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+      if (SVT.bitsGT(VT.getScalarType()))
+        for (SDValue &Op : Elts)
+          Op = TLI->isZExtFree(Op.getValueType(), SVT)
+             ? getZExtOrTrunc(Op, DL, SVT)
+             : getSExtOrTrunc(Op, DL, SVT);
+
       return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
     }
     break;
@@ -3273,12 +3429,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
     assert(EVT.bitsLE(VT) && "Not extending!");
     if (EVT == VT) return N1;  // Not actually extending
 
+    auto SignExtendInReg = [&](APInt Val) {
+      unsigned FromBits = EVT.getScalarType().getSizeInBits();
+      Val <<= Val.getBitWidth() - FromBits;
+      Val = Val.ashr(Val.getBitWidth() - FromBits);
+      return getConstant(Val, DL, VT.getScalarType());
+    };
+
     if (N1C) {
       APInt Val = N1C->getAPIntValue();
-      unsigned FromBits = EVT.getScalarType().getSizeInBits();
-      Val <<= Val.getBitWidth()-FromBits;
-      Val = Val.ashr(Val.getBitWidth()-FromBits);
-      return getConstant(Val, VT);
+      return SignExtendInReg(Val);
+    }
+    if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
+      SmallVector<SDValue, 8> Ops;
+      for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+        SDValue Op = N1.getOperand(i);
+        if (Op.getValueType() != VT.getScalarType()) break;
+        if (Op.getOpcode() == ISD::UNDEF) {
+          Ops.push_back(Op);
+          continue;
+        }
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getNode())) {
+          APInt Val = C->getAPIntValue();
+          Ops.push_back(SignExtendInReg(Val));
+          continue;
+        }
+        break;
+      }
+      if (Ops.size() == VT.getVectorNumElements())
+        return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
     }
     break;
   }
@@ -3287,6 +3466,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
     if (N1.getOpcode() == ISD::UNDEF)
       return getUNDEF(VT);
 
+    // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
+    if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+      return getUNDEF(VT);
+
     // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
     // expanding copies of large vectors from registers.
     if (N2C &&
@@ -3296,7 +3479,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
         N1.getOperand(0).getValueType().getVectorNumElements();
       return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
                      N1.getOperand(N2C->getZExtValue() / Factor),
-                     getConstant(N2C->getZExtValue() % Factor,
+                     getConstant(N2C->getZExtValue() % Factor, DL,
                                  N2.getValueType()));
     }
 
@@ -3353,7 +3536,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       unsigned ElementSize = VT.getSizeInBits();
       unsigned Shift = ElementSize * N2C->getZExtValue();
       APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
-      return getConstant(ShiftedVal.trunc(ElementSize), VT);
+      return getConstant(ShiftedVal.trunc(ElementSize), DL, VT);
     }
     break;
   case ISD::EXTRACT_SUBVECTOR: {
@@ -3384,7 +3567,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
 
   // Perform trivial constant folding.
   if (SDValue SV =
-          FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()))
+          FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
     return SV;
 
   // Canonicalize constant to RHS if commutative.
@@ -3409,35 +3592,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       case ISD::FADD:
         s = V1.add(V2, APFloat::rmNearestTiesToEven);
         if (!HasFPExceptions || s != APFloat::opInvalidOp)
-          return getConstantFP(V1, VT);
+          return getConstantFP(V1, DL, VT);
         break;
       case ISD::FSUB:
         s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
         if (!HasFPExceptions || s!=APFloat::opInvalidOp)
-          return getConstantFP(V1, VT);
+          return getConstantFP(V1, DL, VT);
         break;
       case ISD::FMUL:
         s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
         if (!HasFPExceptions || s!=APFloat::opInvalidOp)
-          return getConstantFP(V1, VT);
+          return getConstantFP(V1, DL, VT);
         break;
       case ISD::FDIV:
         s = V1.divide(V2, APFloat::rmNearestTiesToEven);
         if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
                                  s!=APFloat::opDivByZero)) {
-          return getConstantFP(V1, VT);
+          return getConstantFP(V1, DL, VT);
         }
         break;
       case ISD::FREM :
         s = V1.mod(V2, APFloat::rmNearestTiesToEven);
         if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
                                  s!=APFloat::opDivByZero)) {
-          return getConstantFP(V1, VT);
+          return getConstantFP(V1, DL, VT);
         }
         break;
       case ISD::FCOPYSIGN:
         V1.copySign(V2);
-        return getConstantFP(V1, VT);
+        return getConstantFP(V1, DL, VT);
       default: break;
       }
     }
@@ -3449,7 +3632,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       // FIXME need to be more flexible about rounding mode.
       (void)V.convert(EVTToAPFloatSemantics(VT),
                       APFloat::rmNearestTiesToEven, &ignored);
-      return getConstantFP(V, VT);
+      return getConstantFP(V, DL, VT);
     }
   }
 
@@ -3474,7 +3657,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       case ISD::SRL:
       case ISD::SHL:
         if (!VT.isVector())
-          return getConstant(0, VT);    // fold op(undef, arg2) -> 0
+          return getConstant(0, DL, VT);    // fold op(undef, arg2) -> 0
         // For vectors, we can't easily build an all zero vector, just return
         // the LHS.
         return N2;
@@ -3489,7 +3672,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       if (N1.getOpcode() == ISD::UNDEF)
         // Handle undef ^ undef -> 0 special case. This is a common
         // idiom (misuse).
-        return getConstant(0, VT);
+        return getConstant(0, DL, VT);
       // fallthrough
     case ISD::ADD:
     case ISD::ADDC:
@@ -3513,13 +3696,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
     case ISD::SRL:
     case ISD::SHL:
       if (!VT.isVector())
-        return getConstant(0, VT);  // fold op(arg1, undef) -> 0
+        return getConstant(0, DL, VT);  // fold op(arg1, undef) -> 0
       // For vectors, we can't easily build an all zero vector, just return
       // the LHS.
       return N1;
     case ISD::OR:
       if (!VT.isVector())
-        return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+        return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
       // For vectors, we can't easily build an all one vector, just return
       // the LHS.
       return N1;
@@ -3539,14 +3722,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
     if (BinOpHasFlags)
       AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact);
     void *IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
       return SDValue(E, 0);
 
     N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
 
     CSEMap.InsertNode(N, IP);
   } else {
-
     N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
   }
 
@@ -3569,8 +3751,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
       const APFloat &V3 = N3CFP->getValueAPF();
       APFloat::opStatus s =
         V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
-      if (s != APFloat::opInvalidOp)
-        return getConstantFP(V1, VT);
+      if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
+        return getConstantFP(V1, DL, VT);
     }
     break;
   }
@@ -3643,7 +3825,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops);
     void *IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
       return SDValue(E, 0);
 
     N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
@@ -3705,16 +3887,32 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
     assert(C->getAPIntValue().getBitWidth() == 8);
     APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
     if (VT.isInteger())
-      return DAG.getConstant(Val, VT);
-    return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT);
+      return DAG.getConstant(Val, dl, VT);
+    return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl,
+                             VT);
   }
 
-  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+  assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?");
+  EVT IntVT = VT.getScalarType();
+  if (!IntVT.isInteger())
+    IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits());
+
+  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value);
   if (NumBits > 8) {
     // Use a multiplication with 0x010101... to extend the input to the
     // required length.
     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
-    Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
+    Value = DAG.getNode(ISD::MUL, dl, IntVT, Value,
+                        DAG.getConstant(Magic, dl, IntVT));
+  }
+
+  if (VT != Value.getValueType() && !VT.isInteger())
+    Value = DAG.getNode(ISD::BITCAST, dl, VT.getScalarType(), Value);
+  if (VT != Value.getValueType()) {
+    assert(VT.getVectorElementType() == Value.getValueType() &&
+           "value type should be one vector element here");
+    SmallVector<SDValue, 8> BVOps(VT.getVectorNumElements(), Value);
+    Value = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BVOps);
   }
 
   return Value;
@@ -3728,15 +3926,16 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,
   // Handle vector with all elements zero.
   if (Str.empty()) {
     if (VT.isInteger())
-      return DAG.getConstant(0, VT);
+      return DAG.getConstant(0, dl, VT);
     else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
-      return DAG.getConstantFP(0.0, VT);
+      return DAG.getConstantFP(0.0, dl, VT);
     else if (VT.isVector()) {
       unsigned NumElts = VT.getVectorNumElements();
       MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
       return DAG.getNode(ISD::BITCAST, dl, VT,
-                         DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
-                                                             EltVT, NumElts)));
+                         DAG.getConstant(0, dl,
+                                         EVT::getVectorVT(*DAG.getContext(),
+                                                          EltVT, NumElts)));
     } else
       llvm_unreachable("Expected type!");
   }
@@ -3759,7 +3958,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,
   // of a load, then it is cost effective to turn the load into the immediate.
   Type *Ty = VT.getTypeForEVT(*DAG.getContext());
   if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty))
-    return DAG.getConstant(Val, VT);
+    return DAG.getConstant(Val, dl, VT);
   return SDValue(nullptr, 0);
 }
 
@@ -3769,7 +3968,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl,
                                       SelectionDAG &DAG) {
   EVT VT = Base.getValueType();
   return DAG.getNode(ISD::ADD, dl,
-                     VT, Base, DAG.getConstant(Offset, VT));
+                     VT, Base, DAG.getConstant(Offset, dl, VT));
 }
 
 /// isMemSrcFromString - Returns true if memcpy source is a string constant.
@@ -3918,9 +4117,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize =
-    MF.getFunction()->getAttributes().
-      hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -4033,8 +4230,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -4128,8 +4324,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -4198,7 +4393,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
 SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
                                 unsigned Align, bool isVol, bool AlwaysInline,
-                                MachinePointerInfo DstPtrInfo,
+                                bool isTailCall, MachinePointerInfo DstPtrInfo,
                                 MachinePointerInfo SrcPtrInfo) {
   assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
 
@@ -4219,11 +4414,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
 
   // Then check to see if we should lower the memcpy with target-specific
   // code. If the target chooses to do this, this is the next best.
-  SDValue Result =
-      TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
-                                   isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo);
-  if (Result.getNode())
-    return Result;
+  if (TSI) {
+    SDValue Result = TSI->EmitTargetCodeForMemcpy(
+        *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline,
+        DstPtrInfo, SrcPtrInfo);
+    if (Result.getNode())
+      return Result;
+  }
 
   // If we really need inline code and the target declined to provide it,
   // use a (potentially long) sequence of loads and stores.
@@ -4254,15 +4451,16 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
                                  TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
-  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+    .setDiscardResult()
+    .setTailCall(isTailCall);
 
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
 }
 
 SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
                                  SDValue Src, SDValue Size,
-                                 unsigned Align, bool isVol,
+                                 unsigned Align, bool isVol, bool isTailCall,
                                  MachinePointerInfo DstPtrInfo,
                                  MachinePointerInfo SrcPtrInfo) {
   assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
@@ -4285,10 +4483,12 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
 
   // Then check to see if we should lower the memmove with target-specific
   // code. If the target chooses to do this, this is the next best.
-  SDValue Result = TSI->EmitTargetCodeForMemmove(
-      *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
-  if (Result.getNode())
-    return Result;
+  if (TSI) {
+    SDValue Result = TSI->EmitTargetCodeForMemmove(
+        *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
+    if (Result.getNode())
+      return Result;
+  }
 
   // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
   // not be safe.  See memcpy above for more details.
@@ -4307,15 +4507,16 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
                                  TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
-  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+    .setDiscardResult()
+    .setTailCall(isTailCall);
 
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
 }
 
 SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
-                                unsigned Align, bool isVol,
+                                unsigned Align, bool isVol, bool isTailCall,
                                 MachinePointerInfo DstPtrInfo) {
   assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
 
@@ -4337,10 +4538,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
 
   // Then check to see if we should lower the memset with target-specific
   // code. If the target chooses to do this, this is the next best.
-  SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src,
-                                                Size, Align, isVol, DstPtrInfo);
-  if (Result.getNode())
-    return Result;
+  if (TSI) {
+    SDValue Result = TSI->EmitTargetCodeForMemset(
+        *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo);
+    if (Result.getNode())
+      return Result;
+  }
 
   // Emit a library call.
   Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext());
@@ -4362,7 +4565,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
                                  TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
+    .setDiscardResult()
+    .setTailCall(isTailCall);
 
   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
@@ -4379,7 +4583,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
   AddNodeIDNode(ID, Opcode, VTList, Ops);
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void* IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
     cast<AtomicSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
@@ -4584,7 +4788,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
     AddNodeIDNode(ID, Opcode, VTList, Ops);
     ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
     void *IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
       cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
       return SDValue(E, 0);
     }
@@ -4685,10 +4889,10 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
     assert(VT.isInteger() == MemVT.isInteger() &&
            "Cannot convert from FP to Int or Int -> FP!");
     assert(VT.isVector() == MemVT.isVector() &&
-           "Cannot use trunc store to convert to or from a vector!");
+           "Cannot use an ext load to convert to or from a vector!");
     assert((!VT.isVector() ||
             VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
-           "Cannot use trunc store to change the number of vector elements!");
+           "Cannot use an ext load to change the number of vector elements!");
   }
 
   bool Indexed = AM != ISD::UNINDEXED;
@@ -4706,7 +4910,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                                      MMO->isInvariant()));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
     cast<LoadSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
@@ -4814,7 +5018,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
                                      MMO->isNonTemporal(), MMO->isInvariant()));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
@@ -4883,7 +5087,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
                                      MMO->isNonTemporal(), MMO->isInvariant()));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
@@ -4909,7 +5113,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
   ID.AddInteger(ST->getRawSubclassData());
   ID.AddInteger(ST->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(),
@@ -4938,7 +5142,7 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
                                      MMO->isInvariant()));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
     cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
@@ -4965,7 +5169,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
                                      MMO->isNonTemporal(), MMO->isInvariant()));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
     cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
@@ -4977,11 +5181,60 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
   return SDValue(N, 0);
 }
 
+SDValue
+SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl,
+                              ArrayRef<SDValue> Ops,
+                              MachineMemOperand *MMO) {
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
+  ID.AddInteger(VT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,
+                                     MMO->isVolatile(),
+                                     MMO->isNonTemporal(),
+                                     MMO->isInvariant()));
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+  void *IP = nullptr;
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
+    cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  MaskedGatherSDNode *N = 
+    new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(),
+                                           Ops, VTs, VT, MMO);
+  CSEMap.InsertNode(N, IP);
+  InsertNode(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl,
+                                       ArrayRef<SDValue> Ops,
+                                       MachineMemOperand *MMO) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
+  ID.AddInteger(VT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+                                     MMO->isNonTemporal(),
+                                     MMO->isInvariant()));
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+  void *IP = nullptr;
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
+    cast<MaskedScatterSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N =
+    new (NodeAllocator) MaskedScatterSDNode(dl.getIROrder(), dl.getDebugLoc(),
+                                            Ops, VTs, VT, MMO);
+  CSEMap.InsertNode(N, IP);
+  InsertNode(N);
+  return SDValue(N, 0);
+}
+
 SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl,
                                SDValue Chain, SDValue Ptr,
                                SDValue SV,
                                unsigned Align) {
-  SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) };
+  SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
   return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops);
 }
 
@@ -5041,7 +5294,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
     AddNodeIDNode(ID, Opcode, VTs, Ops);
     void *IP = nullptr;
 
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
       return SDValue(E, 0);
 
     N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
@@ -5096,7 +5349,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops);
     void *IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
       return SDValue(E, 0);
 
     if (NumOps == 1) {
@@ -5340,17 +5593,9 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
   assert(N->getNumOperands() == NumOps &&
          "Update with wrong number of operands");
 
-  // Check to see if there is no change.
-  bool AnyChange = false;
-  for (unsigned i = 0; i != NumOps; ++i) {
-    if (Ops[i] != N->getOperand(i)) {
-      AnyChange = true;
-      break;
-    }
-  }
-
-  // No operands changed, just return the input node.
-  if (!AnyChange) return N;
+  // If no operands changed just return the input node.
+  if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin()))
+    return N;
 
   // See if the modified node already exists.
   void *InsertPos = nullptr;
@@ -5498,8 +5743,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 /// For IROrder, we keep the smaller of the two
 SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) {
   DebugLoc NLoc = N->getDebugLoc();
-  if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) &&
-    (OLoc.getDebugLoc() != NLoc)) {
+  if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
     N->setDebugLoc(DebugLoc());
   }
   unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder());
@@ -5531,7 +5775,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
   if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opc, VTs, Ops);
-    if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *ON = FindNodeOrInsertPos(ID, N->getDebugLoc(), IP))
       return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N));
   }
 
@@ -5737,7 +5981,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, ~Opcode, VTs, OpsArray);
     IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) {
       return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL));
     }
   }
@@ -5769,7 +6013,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
 SDValue
 SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT,
                                      SDValue Operand) {
-  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
   SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
                                   VT, Operand, SRIdxVal);
   return SDValue(Subreg, 0);
@@ -5780,7 +6024,7 @@ SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT,
 SDValue
 SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT,
                                     SDValue Operand, SDValue Subreg) {
-  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
   SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
                                   VT, Operand, Subreg, SRIdxVal);
   return SDValue(Result, 0);
@@ -5797,7 +6041,7 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
     if (isBinOpWithFlags(Opcode))
       AddBinaryNodeIDCustom(ID, nuw, nsw, exact);
     void *IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP))
       return E;
   }
   return nullptr;
@@ -5809,21 +6053,28 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
 SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
                                       unsigned R, bool IsIndirect, uint64_t Off,
                                       DebugLoc DL, unsigned O) {
-  return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
+  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
+  return new (DbgInfo->getAlloc())
+      SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
 }
 
 /// Constant
 SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
                                               const Value *C, uint64_t Off,
                                               DebugLoc DL, unsigned O) {
-  return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O);
+  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
+  return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O);
 }
 
 /// FrameIndex
 SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,
                                                 unsigned FI, uint64_t Off,
                                                 DebugLoc DL, unsigned O) {
-  return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O);
+  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
+  return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O);
 }
 
 namespace {
@@ -6489,7 +6740,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
         Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                               OperandEltVT,
                               Operand,
-                              getConstant(i, TLI->getVectorIdxTy()));
+                              getConstant(i, dl, TLI->getVectorIdxTy()));
       } else {
         // A scalar operand; just use it as is.
         Operands[j] = Operand;
@@ -6595,8 +6846,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
     unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType());
     APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
-    llvm::computeKnownBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
-                           TLI->getDataLayout());
+    llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne,
+                           *TLI->getDataLayout());
     unsigned AlignBits = KnownZero.countTrailingOnes();
     unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
     if (Align)
@@ -6652,9 +6903,10 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
          "More vector elements requested than available!");
   SDValue Lo, Hi;
   Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
-               getConstant(0, TLI->getVectorIdxTy()));
+               getConstant(0, DL, TLI->getVectorIdxTy()));
   Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N,
-               getConstant(LoVT.getVectorNumElements(), TLI->getVectorIdxTy()));
+               getConstant(LoVT.getVectorNumElements(), DL,
+                           TLI->getVectorIdxTy()));
   return std::make_pair(Lo, Hi);
 }
 
@@ -6670,7 +6922,7 @@ void SelectionDAG::ExtractVectorElements(SDValue Op,
   SDLoc SL(Op);
   for (unsigned i = Start, e = Start + Count; i != e; ++i) {
     Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
-                           Op, getConstant(i, IdxTy)));
+                           Op, getConstant(i, SL, IdxTy)));
   }
 }
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fe9e442..85303d2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -20,8 +20,8 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GCMetadata.h"
@@ -34,6 +34,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -57,7 +58,6 @@
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetSelectionDAGInfo.h"
@@ -161,7 +161,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
         EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
         Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
         Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
-                         DAG.getConstant(Lo.getValueType().getSizeInBits(),
+                         DAG.getConstant(Lo.getValueType().getSizeInBits(), DL,
                                          TLI.getPointerTy()));
         Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
         Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
@@ -208,7 +208,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
     // FP_ROUND's are always exact here.
     if (ValueVT.bitsLT(Val.getValueType()))
       return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
-                         DAG.getTargetConstant(1, TLI.getPointerTy()));
+                         DAG.getTargetConstant(1, DL, TLI.getPointerTy()));
 
     return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
   }
@@ -301,7 +301,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
       assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
              "Cannot narrow, it would be a lossy transformation");
       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
-                         DAG.getConstant(0, TLI.getVectorIdxTy()));
+                         DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
     }
 
     // Vector/Vector bitcast.
@@ -425,7 +425,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
     unsigned RoundBits = RoundParts * PartBits;
     unsigned OddParts = NumParts - RoundParts;
     SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
-                                 DAG.getIntPtrConstant(RoundBits));
+                                 DAG.getIntPtrConstant(RoundBits, DL));
     getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
 
     if (TLI.isBigEndian())
@@ -452,9 +452,9 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
       SDValue &Part1 = Parts[i+StepSize/2];
 
       Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
-                          ThisVT, Part0, DAG.getIntPtrConstant(1));
+                          ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
       Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
-                          ThisVT, Part0, DAG.getIntPtrConstant(0));
+                          ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
 
       if (ThisBits == PartBits && ThisVT != PartVT) {
         Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
@@ -493,7 +493,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
       SmallVector<SDValue, 16> Ops;
       for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                                  ElementVT, Val, DAG.getConstant(i,
+                                  ElementVT, Val, DAG.getConstant(i, DL,
                                                   TLI.getVectorIdxTy())));
 
       for (unsigned i = ValueVT.getVectorNumElements(),
@@ -520,7 +520,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
       assert(ValueVT.getVectorNumElements() == 1 &&
              "Only trivial vector-to-scalar conversions should get here!");
       Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                        PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy()));
+                        PartVT, Val,
+                        DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
 
       bool Smaller = ValueVT.bitsLE(PartVT);
       Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
@@ -550,12 +551,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
     if (IntermediateVT.isVector())
       Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
                            IntermediateVT, Val,
-                   DAG.getConstant(i * (NumElements / NumIntermediates),
+                   DAG.getConstant(i * (NumElements / NumIntermediates), DL,
                                    TLI.getVectorIdxTy()));
     else
       Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                            IntermediateVT, Val,
-                           DAG.getConstant(i, TLI.getVectorIdxTy()));
+                           DAG.getConstant(i, DL, TLI.getVectorIdxTy()));
   }
 
   // Split the intermediate operands into legal parts.
@@ -576,93 +577,25 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
   }
 }
 
-namespace {
-  /// RegsForValue - This struct represents the registers (physical or virtual)
-  /// that a particular set of values is assigned, and the type information
-  /// about the value. The most common situation is to represent one value at a
-  /// time, but struct or array values are handled element-wise as multiple
-  /// values.  The splitting of aggregates is performed recursively, so that we
-  /// never have aggregate-typed registers. The values at this point do not
-  /// necessarily have legal types, so each value may require one or more
-  /// registers of some legal type.
-  ///
-  struct RegsForValue {
-    /// ValueVTs - The value types of the values, which may not be legal, and
-    /// may need be promoted or synthesized from one or more registers.
-    ///
-    SmallVector<EVT, 4> ValueVTs;
+RegsForValue::RegsForValue() {}
 
-    /// RegVTs - The value types of the registers. This is the same size as
-    /// ValueVTs and it records, for each value, what the type of the assigned
-    /// register or registers are. (Individual values are never synthesized
-    /// from more than one type of register.)
-    ///
-    /// With virtual registers, the contents of RegVTs is redundant with TLI's
-    /// getRegisterType member function, however when with physical registers
-    /// it is necessary to have a separate record of the types.
-    ///
-    SmallVector<MVT, 4> RegVTs;
-
-    /// Regs - This list holds the registers assigned to the values.
-    /// Each legal or promoted value requires one register, and each
-    /// expanded value requires multiple registers.
-    ///
-    SmallVector<unsigned, 4> Regs;
-
-    RegsForValue() {}
-
-    RegsForValue(const SmallVector<unsigned, 4> &regs,
-                 MVT regvt, EVT valuevt)
-      : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
-
-    RegsForValue(LLVMContext &Context, const TargetLowering &tli,
-                 unsigned Reg, Type *Ty) {
-      ComputeValueVTs(tli, Ty, ValueVTs);
-
-      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-        EVT ValueVT = ValueVTs[Value];
-        unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
-        MVT RegisterVT = tli.getRegisterType(Context, ValueVT);
-        for (unsigned i = 0; i != NumRegs; ++i)
-          Regs.push_back(Reg + i);
-        RegVTs.push_back(RegisterVT);
-        Reg += NumRegs;
-      }
-    }
+RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
+                           EVT valuevt)
+    : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
 
-    /// append - Add the specified values to this one.
-    void append(const RegsForValue &RHS) {
-      ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
-      RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
-      Regs.append(RHS.Regs.begin(), RHS.Regs.end());
-    }
+RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &tli,
+                           unsigned Reg, Type *Ty) {
+  ComputeValueVTs(tli, Ty, ValueVTs);
 
-    /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
-    /// this value and returns the result as a ValueVTs value.  This uses
-    /// Chain/Flag as the input and updates them for the output Chain/Flag.
-    /// If the Flag pointer is NULL, no flag is used.
-    SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
-                            SDLoc dl,
-                            SDValue &Chain, SDValue *Flag,
-                            const Value *V = nullptr) const;
-
-    /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
-    /// specified value into the registers specified by this object.  This uses
-    /// Chain/Flag as the input and updates them for the output Chain/Flag.
-    /// If the Flag pointer is NULL, no flag is used.
-    void
-    getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain,
-                  SDValue *Flag, const Value *V,
-                  ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
-
-    /// AddInlineAsmOperands - Add this value to the specified inlineasm node
-    /// operand list.  This adds the code marker, matching input operand index
-    /// (if applicable), and includes the number of values added into it.
-    void AddInlineAsmOperands(unsigned Kind,
-                              bool HasMatching, unsigned MatchingIdx,
-                              SelectionDAG &DAG,
-                              std::vector<SDValue> &Ops) const;
-  };
+  for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
+    MVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+    for (unsigned i = 0; i != NumRegs; ++i)
+      Regs.push_back(Reg + i);
+    RegVTs.push_back(RegisterVT);
+    Reg += NumRegs;
+  }
 }
 
 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
@@ -721,7 +654,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
         // The current value is a zero.
         // Explicitly express that as it would be easier for
         // optimizations to kick in.
-        Parts[i] = DAG.getConstant(0, RegisterVT);
+        Parts[i] = DAG.getConstant(0, dl, RegisterVT);
         continue;
       }
 
@@ -823,7 +756,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
 /// operand list.  This adds the code marker and includes the number of
 /// values added into it.
 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
-                                        unsigned MatchingIdx,
+                                        unsigned MatchingIdx, SDLoc dl,
                                         SelectionDAG &DAG,
                                         std::vector<SDValue> &Ops) const {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -843,7 +776,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
     Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
   }
 
-  SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
+  SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
   Ops.push_back(Res);
 
   unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
@@ -869,7 +802,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
   AA = &aa;
   GFI = gfi;
   LibInfo = li;
-  DL = DAG.getSubtarget().getDataLayout();
+  DL = DAG.getTarget().getDataLayout();
   Context = DAG.getContext();
   LPadToCallSiteMap.clear();
 }
@@ -997,14 +930,16 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
     const DbgValueInst *DI = DDI.getDI();
     DebugLoc dl = DDI.getdl();
     unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
-    MDNode *Variable = DI->getVariable();
-    MDNode *Expr = DI->getExpression();
+    DILocalVariable *Variable = DI->getVariable();
+    DIExpression *Expr = DI->getExpression();
+    assert(Variable->isValidLocationForIntrinsic(dl) &&
+           "Expected inlined-at fields to agree");
     uint64_t Offset = DI->getOffset();
     // A dbg.value for an alloca is always indirect.
     bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
     SDDbgValue *SDV;
     if (Val.getNode()) {
-      if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect,
+      if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect,
                                     Val)) {
         SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
                               IsIndirect, Offset, dl, DbgSDNodeOrder);
@@ -1016,6 +951,24 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
   }
 }
 
+/// getCopyFromRegs - If there was virtual register allocated for the value V
+/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
+  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+  SDValue Result;
+
+  if (It != FuncInfo.ValueMap.end()) {
+    unsigned InReg = It->second;
+    RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
+                     Ty);
+    SDValue Chain = DAG.getEntryNode();
+    Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+    resolveDanglingDebugInfo(V, Result);
+  }
+
+  return Result;
+}
+
 /// getValue - Return an SDValue for the given Value.
 SDValue SelectionDAGBuilder::getValue(const Value *V) {
   // If we already have an SDValue for this value, use it. It's important
@@ -1026,15 +979,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
 
   // If there's a virtual register allocated and initialized for this
   // value, use it.
-  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
-  if (It != FuncInfo.ValueMap.end()) {
-    unsigned InReg = It->second;
-    RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
-                     V->getType());
-    SDValue Chain = DAG.getEntryNode();
-    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
-    resolveDanglingDebugInfo(V, N);
-    return N;
+  SDValue copyFromReg = getCopyFromRegs(V, V->getType());
+  if (copyFromReg.getNode()) {
+    return copyFromReg;
   }
 
   // Otherwise create a new SDValue and remember it.
@@ -1044,6 +991,12 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
   return Val;
 }
 
+// Return true if SDValue exists for the given Value
+bool SelectionDAGBuilder::findValue(const Value *V) const {
+  return (NodeMap.find(V) != NodeMap.end()) ||
+    (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
+}
+
 /// getNonRegisterValue - Return an SDValue for the given Value, but
 /// don't look in FuncInfo.ValueMap for a virtual register.
 SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
@@ -1067,18 +1020,18 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
     EVT VT = TLI.getValueType(V->getType(), true);
 
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
-      return DAG.getConstant(*CI, VT);
+      return DAG.getConstant(*CI, getCurSDLoc(), VT);
 
     if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
       return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
 
     if (isa<ConstantPointerNull>(C)) {
       unsigned AS = V->getType()->getPointerAddressSpace();
-      return DAG.getConstant(0, TLI.getPointerTy(AS));
+      return DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(AS));
     }
 
     if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
-      return DAG.getConstantFP(*CFP, VT);
+      return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
 
     if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
       return DAG.getUNDEF(VT);
@@ -1138,9 +1091,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
         if (isa<UndefValue>(C))
           Constants[i] = DAG.getUNDEF(EltVT);
         else if (EltVT.isFloatingPoint())
-          Constants[i] = DAG.getConstantFP(0, EltVT);
+          Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
         else
-          Constants[i] = DAG.getConstant(0, EltVT);
+          Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
       }
 
       return DAG.getMergeValues(Constants, getCurSDLoc());
@@ -1164,9 +1117,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
 
       SDValue Op;
       if (EltVT.isFloatingPoint())
-        Op = DAG.getConstantFP(0, EltVT);
+        Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
       else
-        Op = DAG.getConstant(0, EltVT);
+        Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
       Ops.assign(NumElements, Op);
     }
 
@@ -1223,7 +1176,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
     for (unsigned i = 0; i != NumValues; ++i) {
       SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
                                 RetPtr.getValueType(), RetPtr,
-                                DAG.getIntPtrConstant(Offsets[i]));
+                                DAG.getIntPtrConstant(Offsets[i],
+                                                      getCurSDLoc()));
       Chains[i] =
         DAG.getStore(Chain, getCurSDLoc(),
                      SDValue(RetOp.getNode(), RetOp.getResNo() + i),
@@ -1573,19 +1527,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
   // Update machine-CFG edges.
   MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
 
-  // Figure out which block is immediately after the current one.
-  MachineBasicBlock *NextBlock = nullptr;
-  MachineFunction::iterator BBI = BrMBB;
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
   if (I.isUnconditional()) {
     // Update machine-CFG edges.
     BrMBB->addSuccessor(Succ0MBB);
 
     // If this is not a fall-through branch or optimizations are switched off,
     // emit the branch.
-    if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None)
+    if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
       DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
                               MVT::Other, getControlRoot(),
                               DAG.getBasicBlock(Succ0MBB)));
@@ -1674,7 +1622,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
       Cond = CondLHS;
     else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
              CB.CC == ISD::SETEQ) {
-      SDValue True = DAG.getConstant(1, CondLHS.getValueType());
+      SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
       Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
     } else
       Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
@@ -1682,19 +1630,19 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
     assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
 
     const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
-    const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
+    const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
 
     SDValue CmpOp = getValue(CB.CmpMHS);
     EVT VT = CmpOp.getValueType();
 
     if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
-      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
+      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
                           ISD::SETLE);
     } else {
       SDValue SUB = DAG.getNode(ISD::SUB, dl,
-                                VT, CmpOp, DAG.getConstant(Low, VT));
+                                VT, CmpOp, DAG.getConstant(Low, dl, VT));
       Cond = DAG.getSetCC(dl, MVT::i1, SUB,
-                          DAG.getConstant(High-Low, VT), ISD::SETULE);
+                          DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
     }
   }
 
@@ -1705,18 +1653,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
   if (CB.TrueBB != CB.FalseBB)
     addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
 
-  // Set NextBlock to be the MBB immediately after the current one, if any.
-  // This is used to avoid emitting unnecessary branches to the next block.
-  MachineBasicBlock *NextBlock = nullptr;
-  MachineFunction::iterator BBI = SwitchBB;
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
   // If the lhs block is the next block, invert the condition so that we can
   // fall through to the lhs instead of the rhs block.
-  if (CB.TrueBB == NextBlock) {
+  if (CB.TrueBB == NextBlock(SwitchBB)) {
     std::swap(CB.TrueBB, CB.FalseBB);
-    SDValue True = DAG.getConstant(1, Cond.getValueType());
+    SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
     Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
   }
 
@@ -1752,13 +1693,15 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
                                                JumpTableHeader &JTH,
                                                MachineBasicBlock *SwitchBB) {
+  SDLoc dl = getCurSDLoc();
+
   // Subtract the lowest switch case value from the value being switched on and
   // conditional branch to default mbb if the result is greater than the
   // difference between smallest and largest cases.
   SDValue SwitchOp = getValue(JTH.SValue);
   EVT VT = SwitchOp.getValueType();
-  SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp,
-                            DAG.getConstant(JTH.First, VT));
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
+                            DAG.getConstant(JTH.First, dl, VT));
 
   // The SDNode we just created, which holds the value being switched on minus
   // the smallest case value, needs to be copied to a virtual register so it
@@ -1766,10 +1709,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
   // This value may be smaller or larger than the target's pointer type, and
   // therefore require extension or truncating.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy());
+  SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy());
 
   unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
-  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(),
+  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
                                     JumpTableReg, SwitchOp);
   JT.Reg = JumpTableReg;
 
@@ -1777,24 +1720,18 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
   // for the switch statement if the value being switched on exceeds the largest
   // case in the switch.
   SDValue CMP =
-      DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(),
-                                                         Sub.getValueType()),
-                   Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT);
+      DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(),
+                                              Sub.getValueType()),
+                   Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT),
+                   ISD::SETUGT);
 
-  // Set NextBlock to be the MBB immediately after the current one, if any.
-  // This is used to avoid emitting unnecessary branches to the next block.
-  MachineBasicBlock *NextBlock = nullptr;
-  MachineFunction::iterator BBI = SwitchBB;
-
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
-  SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(),
+  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
                                MVT::Other, CopyTo, CMP,
                                DAG.getBasicBlock(JT.Default));
 
-  if (JT.MBB != NextBlock)
-    BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond,
+  // Avoid emitting unnecessary branches to the next block.
+  if (JT.MBB != NextBlock(SwitchBB))
+    BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
                          DAG.getBasicBlock(JT.MBB));
 
   DAG.setRoot(BrCond);
@@ -1824,6 +1761,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
     TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType());
 
   SDValue Guard;
+  SDLoc dl = getCurSDLoc();
 
   // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the
   // guard value from the virtual register holding the value. Otherwise, emit a
@@ -1831,34 +1769,34 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
   unsigned GuardReg = SPD.getGuardReg();
 
   if (GuardReg && TLI.useLoadStackGuardNode())
-    Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg,
+    Guard = DAG.getCopyFromReg(DAG.getEntryNode(), dl, GuardReg,
                                PtrTy);
   else
-    Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
+    Guard = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(),
                         GuardPtr, MachinePointerInfo(IRGuard, 0),
                         true, false, false, Align);
 
-  SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
+  SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(),
                                   StackSlotPtr,
                                   MachinePointerInfo::getFixedStack(FI),
                                   true, false, false, Align);
 
   // Perform the comparison via a subtract/getsetcc.
   EVT VT = Guard.getValueType();
-  SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot);
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot);
 
   SDValue Cmp =
-      DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(),
+      DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(),
                                                          Sub.getValueType()),
-                   Sub, DAG.getConstant(0, VT), ISD::SETNE);
+                   Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
 
   // If the sub is not 0, then we know the guard/stackslot do not equal, so
   // branch to failure MBB.
-  SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(),
+  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
                                MVT::Other, StackSlot.getOperand(0),
                                Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
   // Otherwise branch to success MBB.
-  SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(),
+  SDValue Br = DAG.getNode(ISD::BR, dl,
                            MVT::Other, BrCond,
                            DAG.getBasicBlock(SPD.getSuccessMBB()));
 
@@ -1886,18 +1824,20 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
 /// suitable for "bit tests"
 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
                                              MachineBasicBlock *SwitchBB) {
+  SDLoc dl = getCurSDLoc();
+
   // Subtract the minimum value
   SDValue SwitchOp = getValue(B.SValue);
   EVT VT = SwitchOp.getValueType();
-  SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp,
-                            DAG.getConstant(B.First, VT));
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
+                            DAG.getConstant(B.First, dl, VT));
 
   // Check range
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue RangeCmp =
-      DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(),
-                                                         Sub.getValueType()),
-                   Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT);
+      DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(),
+                                              Sub.getValueType()),
+                   Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
 
   // Determine the type of the test operands.
   bool UsePtrType = false;
@@ -1914,32 +1854,25 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
   }
   if (UsePtrType) {
     VT = TLI.getPointerTy();
-    Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT);
+    Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
   }
 
   B.RegVT = VT.getSimpleVT();
   B.Reg = FuncInfo.CreateReg(B.RegVT);
-  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(),
-                                    B.Reg, Sub);
-
-  // Set NextBlock to be the MBB immediately after the current one, if any.
-  // This is used to avoid emitting unnecessary branches to the next block.
-  MachineBasicBlock *NextBlock = nullptr;
-  MachineFunction::iterator BBI = SwitchBB;
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
+  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
 
   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
 
   addSuccessorWithWeight(SwitchBB, B.Default);
   addSuccessorWithWeight(SwitchBB, MBB);
 
-  SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(),
+  SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
                                 MVT::Other, CopyTo, RangeCmp,
                                 DAG.getBasicBlock(B.Default));
 
-  if (MBB != NextBlock)
-    BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo,
+  // Avoid emitting unnecessary branches to the next block.
+  if (MBB != NextBlock(SwitchBB))
+    BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
                           DAG.getBasicBlock(MBB));
 
   DAG.setRoot(BrRange);
@@ -1952,34 +1885,33 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                                            unsigned Reg,
                                            BitTestCase &B,
                                            MachineBasicBlock *SwitchBB) {
+  SDLoc dl = getCurSDLoc();
   MVT VT = BB.RegVT;
-  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
-                                       Reg, VT);
+  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
   SDValue Cmp;
-  unsigned PopCount = CountPopulation_64(B.Mask);
+  unsigned PopCount = countPopulation(B.Mask);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (PopCount == 1) {
     // Testing for a single bit; just compare the shift count with what it
     // would need to be to shift a 1 bit in that position.
     Cmp = DAG.getSetCC(
-        getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
-        DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ);
+        dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
+        DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), ISD::SETEQ);
   } else if (PopCount == BB.Range) {
     // There is only one zero bit in the range, test for it directly.
     Cmp = DAG.getSetCC(
-        getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
-        DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE);
+        dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
+        DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), ISD::SETNE);
   } else {
     // Make desired shift
-    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT,
-                                    DAG.getConstant(1, VT), ShiftOp);
+    SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
+                                    DAG.getConstant(1, dl, VT), ShiftOp);
 
     // Emit bit tests and jumps
-    SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(),
-                                VT, SwitchVal, DAG.getConstant(B.Mask, VT));
-    Cmp = DAG.getSetCC(getCurSDLoc(),
-                       TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp,
-                       DAG.getConstant(0, VT), ISD::SETNE);
+    SDValue AndOp = DAG.getNode(ISD::AND, dl,
+                                VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
+    Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp,
+                       DAG.getConstant(0, dl, VT), ISD::SETNE);
   }
 
   // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
@@ -1987,19 +1919,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
   // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
   addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
 
-  SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(),
+  SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
                               MVT::Other, getControlRoot(),
                               Cmp, DAG.getBasicBlock(B.TargetBB));
 
-  // Set NextBlock to be the MBB immediately after the current one, if any.
-  // This is used to avoid emitting unnecessary branches to the next block.
-  MachineBasicBlock *NextBlock = nullptr;
-  MachineFunction::iterator BBI = SwitchBB;
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
-  if (NextMBB != NextBlock)
-    BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd,
+  // Avoid emitting unnecessary branches to the next block.
+  if (NextMBB != NextBlock(SwitchBB))
+    BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
                         DAG.getBasicBlock(NextMBB));
 
   DAG.setRoot(BrAnd);
@@ -2027,13 +1953,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
     case Intrinsic::experimental_patchpoint_i64:
       visitPatchpoint(&I, LandingPad);
       break;
+    case Intrinsic::experimental_gc_statepoint:
+      LowerStatepoint(ImmutableStatepoint(&I), LandingPad);
+      break;
     }
   } else
     LowerCallTo(&I, getValue(Callee), false, LandingPad);
 
   // If the value of the invoke is used outside of its defining block, make it
   // available as a virtual register.
-  CopyToExportRegsIfNeeded(&I);
+  // We already took care of the exported value for the statepoint instruction
+  // during call to the LowerStatepoint.
+  if (!isStatepoint(I)) {
+    CopyToExportRegsIfNeeded(&I);
+  }
 
   // Update successor info
   addSuccessorWithWeight(InvokeMBB, Return);
@@ -2065,622 +1998,86 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
     return;
 
   SmallVector<EVT, 2> ValueVTs;
+  SDLoc dl = getCurSDLoc();
   ComputeValueVTs(TLI, LP.getType(), ValueVTs);
   assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
 
   // Get the two live-in registers as SDValues. The physregs have already been
   // copied into virtual registers.
   SDValue Ops[2];
-  Ops[0] = DAG.getZExtOrTrunc(
-      DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
-                         FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
-      getCurSDLoc(), ValueVTs[0]);
+  if (FuncInfo.ExceptionPointerVirtReg) {
+    Ops[0] = DAG.getZExtOrTrunc(
+        DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                           FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
+        dl, ValueVTs[0]);
+  } else {
+    Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy());
+  }
   Ops[1] = DAG.getZExtOrTrunc(
-      DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+      DAG.getCopyFromReg(DAG.getEntryNode(), dl,
                          FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()),
-      getCurSDLoc(), ValueVTs[1]);
+      dl, ValueVTs[1]);
 
   // Merge into one.
-  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
                             DAG.getVTList(ValueVTs), Ops);
   setValue(&LP, Res);
 }
 
-/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
-/// small case ranges).
-bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
-                                                 CaseRecVector& WorkList,
-                                                 const Value* SV,
-                                                 MachineBasicBlock *Default,
-                                                 MachineBasicBlock *SwitchBB) {
-  // Size is the number of Cases represented by this range.
-  size_t Size = CR.Range.second - CR.Range.first;
-  if (Size > 3)
-    return false;
-
-  // Get the MachineFunction which holds the current MBB.  This is used when
-  // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = FuncInfo.MF;
-
-  // Figure out which block is immediately after the current one.
-  MachineBasicBlock *NextBlock = nullptr;
-  MachineFunction::iterator BBI = CR.CaseBB;
-
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
-  BranchProbabilityInfo *BPI = FuncInfo.BPI;
-  // If any two of the cases has the same destination, and if one value
-  // is the same as the other, but has one bit unset that the other has set,
-  // use bit manipulation to do two compares at once.  For example:
-  // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
-  // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
-  // TODO: Handle cases where CR.CaseBB != SwitchBB.
-  if (Size == 2 && CR.CaseBB == SwitchBB) {
-    Case &Small = *CR.Range.first;
-    Case &Big = *(CR.Range.second-1);
-
-    if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
-      const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
-      const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
-
-      // Check that there is only one bit different.
-      if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
-          (SmallValue | BigValue) == BigValue) {
-        // Isolate the common bit.
-        APInt CommonBit = BigValue & ~SmallValue;
-        assert((SmallValue | CommonBit) == BigValue &&
-               CommonBit.countPopulation() == 1 && "Not a common bit?");
-
-        SDValue CondLHS = getValue(SV);
-        EVT VT = CondLHS.getValueType();
-        SDLoc DL = getCurSDLoc();
-
-        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
-                                 DAG.getConstant(CommonBit, VT));
-        SDValue Cond = DAG.getSetCC(DL, MVT::i1,
-                                    Or, DAG.getConstant(BigValue, VT),
-                                    ISD::SETEQ);
-
-        // Update successor info.
-        // Both Small and Big will jump to Small.BB, so we sum up the weights.
-        addSuccessorWithWeight(SwitchBB, Small.BB,
-                               Small.ExtraWeight + Big.ExtraWeight);
-        addSuccessorWithWeight(SwitchBB, Default,
-          // The default destination is the first successor in IR.
-          BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0);
-
-        // Insert the true branch.
-        SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
-                                     getControlRoot(), Cond,
-                                     DAG.getBasicBlock(Small.BB));
-
-        // Insert the false branch.
-        BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
-                             DAG.getBasicBlock(Default));
-
-        DAG.setRoot(BrCond);
-        return true;
-      }
-    }
-  }
-
-  // Order cases by weight so the most likely case will be checked first.
-  uint32_t UnhandledWeights = 0;
-  if (BPI) {
-    for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
-      uint32_t IWeight = I->ExtraWeight;
-      UnhandledWeights += IWeight;
-      for (CaseItr J = CR.Range.first; J < I; ++J) {
-        uint32_t JWeight = J->ExtraWeight;
-        if (IWeight > JWeight)
-          std::swap(*I, *J);
-      }
-    }
-  }
-  // Rearrange the case blocks so that the last one falls through if possible.
-  Case &BackCase = *(CR.Range.second-1);
-  if (Size > 1 &&
-      NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
-    // The last case block won't fall through into 'NextBlock' if we emit the
-    // branches in this order.  See if rearranging a case value would help.
-    // We start at the bottom as it's the case with the least weight.
-    for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I)
-      if (I->BB == NextBlock) {
-        std::swap(*I, BackCase);
-        break;
-      }
-  }
-
-  // Create a CaseBlock record representing a conditional branch to
-  // the Case's target mbb if the value being switched on SV is equal
-  // to C.
-  MachineBasicBlock *CurBlock = CR.CaseBB;
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
-    MachineBasicBlock *FallThrough;
-    if (I != E-1) {
-      FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
-      CurMF->insert(BBI, FallThrough);
-
-      // Put SV in a virtual register to make it available from the new blocks.
-      ExportFromCurrentBlock(SV);
-    } else {
-      // If the last case doesn't match, go to the default block.
-      FallThrough = Default;
-    }
-
-    const Value *RHS, *LHS, *MHS;
-    ISD::CondCode CC;
-    if (I->High == I->Low) {
-      // This is just small small case range :) containing exactly 1 case
-      CC = ISD::SETEQ;
-      LHS = SV; RHS = I->High; MHS = nullptr;
-    } else {
-      CC = ISD::SETLE;
-      LHS = I->Low; MHS = SV; RHS = I->High;
-    }
-
-    // The false weight should be sum of all un-handled cases.
-    UnhandledWeights -= I->ExtraWeight;
-    CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
-                 /* me */ CurBlock,
-                 /* trueweight */ I->ExtraWeight,
-                 /* falseweight */ UnhandledWeights);
-
-    // If emitting the first comparison, just call visitSwitchCase to emit the
-    // code into the current block.  Otherwise, push the CaseBlock onto the
-    // vector to be later processed by SDISel, and insert the node's MBB
-    // before the next MBB.
-    if (CurBlock == SwitchBB)
-      visitSwitchCase(CB, SwitchBB);
-    else
-      SwitchCases.push_back(CB);
-
-    CurBlock = FallThrough;
-  }
-
-  return true;
-}
-
-static inline bool areJTsAllowed(const TargetLowering &TLI) {
-  return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
-         TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
-}
-
-static APInt ComputeRange(const APInt &First, const APInt &Last) {
-  uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
-  APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
-  return (LastExt - FirstExt + 1ULL);
-}
-
-/// handleJTSwitchCase - Emit jumptable for current switch case range
-bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
-                                             CaseRecVector &WorkList,
-                                             const Value *SV,
-                                             MachineBasicBlock *Default,
-                                             MachineBasicBlock *SwitchBB) {
-  Case& FrontCase = *CR.Range.first;
-  Case& BackCase  = *(CR.Range.second-1);
-
-  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
-  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
-
-  APInt TSize(First.getBitWidth(), 0);
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
-    TSize += I->size();
+unsigned
+SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV,
+                                             MachineBasicBlock *LPadBB) {
+  SDValue Chain = getControlRoot();
+  SDLoc dl = getCurSDLoc();
 
+  // Get the typeid that we will dispatch on later.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
-    return false;
-
-  APInt Range = ComputeRange(First, Last);
-  // The density is TSize / Range. Require at least 40%.
-  // It should not be possible for IntTSize to saturate for sane code, but make
-  // sure we handle Range saturation correctly.
-  uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10);
-  uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10);
-  if (IntTSize * 10 < IntRange * 4)
-    return false;
-
-  DEBUG(dbgs() << "Lowering jump table\n"
-               << "First entry: " << First << ". Last entry: " << Last << '\n'
-               << "Range: " << Range << ". Size: " << TSize << ".\n\n");
-
-  // Get the MachineFunction which holds the current MBB.  This is used when
-  // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = FuncInfo.MF;
-
-  // Figure out which block is immediately after the current one.
-  MachineFunction::iterator BBI = CR.CaseBB;
-  ++BBI;
+  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy());
+  unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
+  unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV);
+  SDValue Sel = DAG.getConstant(TypeID, dl, TLI.getPointerTy());
+  Chain = DAG.getCopyToReg(Chain, dl, VReg, Sel);
+
+  // Branch to the main landing pad block.
+  MachineBasicBlock *ClauseMBB = FuncInfo.MBB;
+  ClauseMBB->addSuccessor(LPadBB);
+  DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, Chain,
+                          DAG.getBasicBlock(LPadBB)));
+  return VReg;
+}
+
+void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
+#ifndef NDEBUG
+  for (const CaseCluster &CC : Clusters)
+    assert(CC.Low == CC.High && "Input clusters must be single-case");
+#endif
 
-  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
-
-  // Create a new basic block to hold the code for loading the address
-  // of the jump table, and jumping to it.  Update successor information;
-  // we will either branch to the default case for the switch, or the jump
-  // table.
-  MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
-  CurMF->insert(BBI, JumpTableBB);
-
-  addSuccessorWithWeight(CR.CaseBB, Default);
-  addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
-
-  // Build a vector of destination BBs, corresponding to each target
-  // of the jump table. If the value of the jump table slot corresponds to
-  // a case statement, push the case's BB onto the vector, otherwise, push
-  // the default BB.
-  std::vector<MachineBasicBlock*> DestBBs;
-  APInt TEI = First;
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
-    const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
-    const APInt &High = cast<ConstantInt>(I->High)->getValue();
-
-    if (Low.sle(TEI) && TEI.sle(High)) {
-      DestBBs.push_back(I->BB);
-      if (TEI==High)
-        ++I;
+  std::sort(Clusters.begin(), Clusters.end(),
+            [](const CaseCluster &a, const CaseCluster &b) {
+    return a.Low->getValue().slt(b.Low->getValue());
+  });
+
+  // Merge adjacent clusters with the same destination.
+  const unsigned N = Clusters.size();
+  unsigned DstIndex = 0;
+  for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
+    CaseCluster &CC = Clusters[SrcIndex];
+    const ConstantInt *CaseVal = CC.Low;
+    MachineBasicBlock *Succ = CC.MBB;
+
+    if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
+        (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
+      // If this case has the same successor and is a neighbour, merge it into
+      // the previous cluster.
+      Clusters[DstIndex - 1].High = CaseVal;
+      Clusters[DstIndex - 1].Weight += CC.Weight;
+      assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!");
     } else {
-      DestBBs.push_back(Default);
+      std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
+                   sizeof(Clusters[SrcIndex]));
     }
   }
-
-  // Calculate weight for each unique destination in CR.
-  DenseMap<MachineBasicBlock*, uint32_t> DestWeights;
-  if (FuncInfo.BPI)
-    for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
-      DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
-          DestWeights.find(I->BB);
-      if (Itr != DestWeights.end())
-        Itr->second += I->ExtraWeight;
-      else
-        DestWeights[I->BB] = I->ExtraWeight;
-    }
-
-  // Update successor info. Add one edge to each unique successor.
-  BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
-  for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
-         E = DestBBs.end(); I != E; ++I) {
-    if (!SuccsHandled[(*I)->getNumber()]) {
-      SuccsHandled[(*I)->getNumber()] = true;
-      DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
-          DestWeights.find(*I);
-      addSuccessorWithWeight(JumpTableBB, *I,
-                             Itr != DestWeights.end() ? Itr->second : 0);
-    }
-  }
-
-  // Create a jump table index for this jump table.
-  unsigned JTEncoding = TLI.getJumpTableEncoding();
-  unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
-                       ->createJumpTableIndex(DestBBs);
-
-  // Set the jump table information so that we can codegen it as a second
-  // MachineBasicBlock
-  JumpTable JT(-1U, JTI, JumpTableBB, Default);
-  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
-  if (CR.CaseBB == SwitchBB)
-    visitJumpTableHeader(JT, JTH, SwitchBB);
-
-  JTCases.push_back(JumpTableBlock(JTH, JT));
-  return true;
-}
-
-/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
-/// 2 subtrees.
-bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
-                                                  CaseRecVector& WorkList,
-                                                  const Value* SV,
-                                                  MachineBasicBlock* SwitchBB) {
-  // Get the MachineFunction which holds the current MBB.  This is used when
-  // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = FuncInfo.MF;
-
-  // Figure out which block is immediately after the current one.
-  MachineFunction::iterator BBI = CR.CaseBB;
-  ++BBI;
-
-  Case& FrontCase = *CR.Range.first;
-  Case& BackCase  = *(CR.Range.second-1);
-  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
-
-  // Size is the number of Cases represented by this range.
-  unsigned Size = CR.Range.second - CR.Range.first;
-
-  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
-  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
-  double FMetric = 0;
-  CaseItr Pivot = CR.Range.first + Size/2;
-
-  // Select optimal pivot, maximizing sum density of LHS and RHS. This will
-  // (heuristically) allow us to emit JumpTable's later.
-  APInt TSize(First.getBitWidth(), 0);
-  for (CaseItr I = CR.Range.first, E = CR.Range.second;
-       I!=E; ++I)
-    TSize += I->size();
-
-  APInt LSize = FrontCase.size();
-  APInt RSize = TSize-LSize;
-  DEBUG(dbgs() << "Selecting best pivot: \n"
-               << "First: " << First << ", Last: " << Last <<'\n'
-               << "LSize: " << LSize << ", RSize: " << RSize << '\n');
-  for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
-       J!=E; ++I, ++J) {
-    const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
-    const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
-    APInt Range = ComputeRange(LEnd, RBegin);
-    assert((Range - 2ULL).isNonNegative() &&
-           "Invalid case distance");
-    // Use volatile double here to avoid excess precision issues on some hosts,
-    // e.g. that use 80-bit X87 registers.
-    volatile double LDensity =
-       (double)LSize.roundToDouble() /
-                           (LEnd - First + 1ULL).roundToDouble();
-    volatile double RDensity =
-      (double)RSize.roundToDouble() /
-                           (Last - RBegin + 1ULL).roundToDouble();
-    volatile double Metric = Range.logBase2()*(LDensity+RDensity);
-    // Should always split in some non-trivial place
-    DEBUG(dbgs() <<"=>Step\n"
-                 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
-                 << "LDensity: " << LDensity
-                 << ", RDensity: " << RDensity << '\n'
-                 << "Metric: " << Metric << '\n');
-    if (FMetric < Metric) {
-      Pivot = J;
-      FMetric = Metric;
-      DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
-    }
-
-    LSize += J->size();
-    RSize -= J->size();
-  }
-
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (areJTsAllowed(TLI)) {
-    // If our case is dense we *really* should handle it earlier!
-    assert((FMetric > 0) && "Should handle dense range earlier!");
-  } else {
-    Pivot = CR.Range.first + Size/2;
-  }
-
-  CaseRange LHSR(CR.Range.first, Pivot);
-  CaseRange RHSR(Pivot, CR.Range.second);
-  const Constant *C = Pivot->Low;
-  MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr;
-
-  // We know that we branch to the LHS if the Value being switched on is
-  // less than the Pivot value, C.  We use this to optimize our binary
-  // tree a bit, by recognizing that if SV is greater than or equal to the
-  // LHS's Case Value, and that Case Value is exactly one less than the
-  // Pivot's Value, then we can branch directly to the LHS's Target,
-  // rather than creating a leaf node for it.
-  if ((LHSR.second - LHSR.first) == 1 &&
-      LHSR.first->High == CR.GE &&
-      cast<ConstantInt>(C)->getValue() ==
-      (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
-    TrueBB = LHSR.first->BB;
-  } else {
-    TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
-    CurMF->insert(BBI, TrueBB);
-    WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
-
-    // Put SV in a virtual register to make it available from the new blocks.
-    ExportFromCurrentBlock(SV);
-  }
-
-  // Similar to the optimization above, if the Value being switched on is
-  // known to be less than the Constant CR.LT, and the current Case Value
-  // is CR.LT - 1, then we can branch directly to the target block for
-  // the current Case Value, rather than emitting a RHS leaf node for it.
-  if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
-      cast<ConstantInt>(RHSR.first->Low)->getValue() ==
-      (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
-    FalseBB = RHSR.first->BB;
-  } else {
-    FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
-    CurMF->insert(BBI, FalseBB);
-    WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
-
-    // Put SV in a virtual register to make it available from the new blocks.
-    ExportFromCurrentBlock(SV);
-  }
-
-  // Create a CaseBlock record representing a conditional branch to
-  // the LHS node if the value being switched on SV is less than C.
-  // Otherwise, branch to LHS.
-  CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB);
-
-  if (CR.CaseBB == SwitchBB)
-    visitSwitchCase(CB, SwitchBB);
-  else
-    SwitchCases.push_back(CB);
-
-  return true;
-}
-
-/// handleBitTestsSwitchCase - if current case range has few destination and
-/// range span less, than machine word bitwidth, encode case range into series
-/// of masks and emit bit tests with these masks.
-bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
-                                                   CaseRecVector& WorkList,
-                                                   const Value* SV,
-                                                   MachineBasicBlock* Default,
-                                                   MachineBasicBlock* SwitchBB) {
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT PTy = TLI.getPointerTy();
-  unsigned IntPtrBits = PTy.getSizeInBits();
-
-  Case& FrontCase = *CR.Range.first;
-  Case& BackCase  = *(CR.Range.second-1);
-
-  // Get the MachineFunction which holds the current MBB.  This is used when
-  // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = FuncInfo.MF;
-
-  // If target does not have legal shift left, do not emit bit tests at all.
-  if (!TLI.isOperationLegal(ISD::SHL, PTy))
-    return false;
-
-  size_t numCmps = 0;
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
-    // Single case counts one, case range - two.
-    numCmps += (I->Low == I->High ? 1 : 2);
-  }
-
-  // Count unique destinations
-  SmallSet<MachineBasicBlock*, 4> Dests;
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
-    Dests.insert(I->BB);
-    if (Dests.size() > 3)
-      // Don't bother the code below, if there are too much unique destinations
-      return false;
-  }
-  DEBUG(dbgs() << "Total number of unique destinations: "
-        << Dests.size() << '\n'
-        << "Total number of comparisons: " << numCmps << '\n');
-
-  // Compute span of values.
-  const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
-  const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
-  APInt cmpRange = maxValue - minValue;
-
-  DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
-               << "Low bound: " << minValue << '\n'
-               << "High bound: " << maxValue << '\n');
-
-  if (cmpRange.uge(IntPtrBits) ||
-      (!(Dests.size() == 1 && numCmps >= 3) &&
-       !(Dests.size() == 2 && numCmps >= 5) &&
-       !(Dests.size() >= 3 && numCmps >= 6)))
-    return false;
-
-  DEBUG(dbgs() << "Emitting bit tests\n");
-  APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
-
-  // Optimize the case where all the case values fit in a
-  // word without having to subtract minValue. In this case,
-  // we can optimize away the subtraction.
-  if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
-    cmpRange = maxValue;
-  } else {
-    lowBound = minValue;
-  }
-
-  CaseBitsVector CasesBits;
-  unsigned i, count = 0;
-
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
-    MachineBasicBlock* Dest = I->BB;
-    for (i = 0; i < count; ++i)
-      if (Dest == CasesBits[i].BB)
-        break;
-
-    if (i == count) {
-      assert((count < 3) && "Too much destinations to test!");
-      CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/));
-      count++;
-    }
-
-    const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
-    const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
-
-    uint64_t lo = (lowValue - lowBound).getZExtValue();
-    uint64_t hi = (highValue - lowBound).getZExtValue();
-    CasesBits[i].ExtraWeight += I->ExtraWeight;
-
-    for (uint64_t j = lo; j <= hi; j++) {
-      CasesBits[i].Mask |=  1ULL << j;
-      CasesBits[i].Bits++;
-    }
-
-  }
-  std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
-
-  BitTestInfo BTC;
-
-  // Figure out which block is immediately after the current one.
-  MachineFunction::iterator BBI = CR.CaseBB;
-  ++BBI;
-
-  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
-
-  DEBUG(dbgs() << "Cases:\n");
-  for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
-    DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
-                 << ", Bits: " << CasesBits[i].Bits
-                 << ", BB: " << CasesBits[i].BB << '\n');
-
-    MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
-    CurMF->insert(BBI, CaseBB);
-    BTC.push_back(BitTestCase(CasesBits[i].Mask,
-                              CaseBB,
-                              CasesBits[i].BB, CasesBits[i].ExtraWeight));
-
-    // Put SV in a virtual register to make it available from the new blocks.
-    ExportFromCurrentBlock(SV);
-  }
-
-  BitTestBlock BTB(lowBound, cmpRange, SV,
-                   -1U, MVT::Other, (CR.CaseBB == SwitchBB),
-                   CR.CaseBB, Default, std::move(BTC));
-
-  if (CR.CaseBB == SwitchBB)
-    visitBitTestHeader(BTB, SwitchBB);
-
-  BitTestCases.push_back(std::move(BTB));
-
-  return true;
-}
-
-/// Clusterify - Transform simple list of Cases into list of CaseRange's
-void SelectionDAGBuilder::Clusterify(CaseVector& Cases,
-                                     const SwitchInst& SI) {
-  BranchProbabilityInfo *BPI = FuncInfo.BPI;
-  // Start with "simple" cases.
-  for (SwitchInst::ConstCaseIt i : SI.cases()) {
-    const BasicBlock *SuccBB = i.getCaseSuccessor();
-    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
-
-    uint32_t ExtraWeight =
-      BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0;
-
-    Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
-                         SMBB, ExtraWeight));
-  }
-  std::sort(Cases.begin(), Cases.end(), CaseCmp());
-
-  // Merge case into clusters
-  if (Cases.size() >= 2)
-    // Must recompute end() each iteration because it may be
-    // invalidated by erase if we hold on to it
-    for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
-         J != Cases.end(); ) {
-      const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
-      const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
-      MachineBasicBlock* nextBB = J->BB;
-      MachineBasicBlock* currentBB = I->BB;
-
-      // If the two neighboring cases go to the same destination, merge them
-      // into a single case.
-      if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
-        I->High = J->High;
-        I->ExtraWeight += J->ExtraWeight;
-        J = Cases.erase(J);
-      } else {
-        I = J++;
-      }
-    }
-
-  DEBUG({
-      size_t numCmps = 0;
-      for (auto &I : Cases)
-        // A range counts double, since it requires two compares.
-        numCmps += I.Low != I.High ? 2 : 1;
-
-      dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
-             << ". Total compares: " << numCmps << '\n';
-    });
+  Clusters.resize(DstIndex);
 }
 
 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
@@ -2696,96 +2093,6 @@ void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
       BitTestCases[i].Parent = Last;
 }
 
-void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
-  MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
-
-  // Figure out which block is immediately after the current one.
-  MachineBasicBlock *NextBlock = nullptr;
-  if (SwitchMBB + 1 != FuncInfo.MF->end())
-    NextBlock = SwitchMBB + 1;
-
-
-  // Create a vector of Cases, sorted so that we can efficiently create a binary
-  // search tree from them.
-  CaseVector Cases;
-  Clusterify(Cases, SI);
-
-  // Get the default destination MBB.
-  MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
-
-  if (isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()) &&
-      !Cases.empty()) {
-    // Replace an unreachable default destination with the most popular case
-    // destination.
-    DenseMap<const BasicBlock *, unsigned> Popularity;
-    unsigned MaxPop = 0;
-    const BasicBlock *MaxBB = nullptr;
-    for (auto I : SI.cases()) {
-      const BasicBlock *BB = I.getCaseSuccessor();
-      if (++Popularity[BB] > MaxPop) {
-        MaxPop = Popularity[BB];
-        MaxBB = BB;
-      }
-    }
-
-    // Set new default.
-    assert(MaxPop > 0);
-    assert(MaxBB);
-    Default = FuncInfo.MBBMap[MaxBB];
-
-    // Remove cases that were pointing to the destination that is now the default.
-    Cases.erase(std::remove_if(Cases.begin(), Cases.end(),
-                               [&](const Case &C) { return C.BB == Default; }),
-                Cases.end());
-  }
-
-  // If there is only the default destination, go there directly.
-  if (Cases.empty()) {
-    // Update machine-CFG edges.
-    SwitchMBB->addSuccessor(Default);
-
-    // If this is not a fall-through branch, emit the branch.
-    if (Default != NextBlock) {
-      DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
-                              getControlRoot(), DAG.getBasicBlock(Default)));
-    }
-    return;
-  }
-
-  // Get the Value to be switched on.
-  const Value *SV = SI.getCondition();
-
-  // Push the initial CaseRec onto the worklist
-  CaseRecVector WorkList;
-  WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr,
-                             CaseRange(Cases.begin(),Cases.end())));
-
-  while (!WorkList.empty()) {
-    // Grab a record representing a case range to process off the worklist
-    CaseRec CR = WorkList.back();
-    WorkList.pop_back();
-
-    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
-      continue;
-
-    // If the range has few cases (two or less) emit a series of specific
-    // tests.
-    if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
-      continue;
-
-    // If the switch has more than N blocks, and is at least 40% dense, and the
-    // target supports indirect branches, then emit a jump table rather than
-    // lowering the switch to a binary tree of conditional branches.
-    // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries().
-    if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
-      continue;
-
-    // Emit binary tree. We need to pick a pivot, and push left and right ranges
-    // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
-    handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB);
-  }
-}
-
 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
   MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
 
@@ -2950,19 +2257,47 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
 
   SmallVector<SDValue, 4> Values(NumValues);
   SDValue Cond     = getValue(I.getOperand(0));
-  SDValue TrueVal  = getValue(I.getOperand(1));
-  SDValue FalseVal = getValue(I.getOperand(2));
+  SDValue LHSVal   = getValue(I.getOperand(1));
+  SDValue RHSVal   = getValue(I.getOperand(2));
+  auto BaseOps = {Cond};
   ISD::NodeType OpCode = Cond.getValueType().isVector() ?
     ISD::VSELECT : ISD::SELECT;
 
-  for (unsigned i = 0; i != NumValues; ++i)
+  // Min/max matching is only viable if all output VTs are the same.
+  if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
+    Value *LHS, *RHS;
+    SelectPatternFlavor SPF = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
+    ISD::NodeType Opc = ISD::DELETED_NODE;
+    switch (SPF) {
+    case SPF_UMAX: Opc = ISD::UMAX; break;
+    case SPF_UMIN: Opc = ISD::UMIN; break;
+    case SPF_SMAX: Opc = ISD::SMAX; break;
+    case SPF_SMIN: Opc = ISD::SMIN; break;
+    default: break;
+    }
+
+    EVT VT = ValueVTs[0];
+    LLVMContext &Ctx = *DAG.getContext();
+    auto &TLI = DAG.getTargetLoweringInfo();
+    while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector)
+      VT = TLI.getTypeToTransformTo(Ctx, VT);
+
+    if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT)) {
+      OpCode = Opc;
+      LHSVal = getValue(LHS);
+      RHSVal = getValue(RHS);
+      BaseOps = {};
+    }
+  }
+
+  for (unsigned i = 0; i != NumValues; ++i) {
+    SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
+    Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+    Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
     Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
-                            TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
-                            Cond,
-                            SDValue(TrueVal.getNode(),
-                                    TrueVal.getResNo() + i),
-                            SDValue(FalseVal.getNode(),
-                                    FalseVal.getResNo() + i));
+                            LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
+                            Ops);
+  }
 
   setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
                            DAG.getVTList(ValueVTs), Values));
@@ -2994,10 +2329,11 @@ void SelectionDAGBuilder::visitSExt(const User &I) {
 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
+  SDLoc dl = getCurSDLoc();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N,
-                           DAG.getTargetConstant(0, TLI.getPointerTy())));
+  setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
+                           DAG.getTargetConstant(0, dl, TLI.getPointerTy())));
 }
 
 void SelectionDAGBuilder::visitFPExt(const User &I) {
@@ -3053,19 +2389,20 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) {
 
 void SelectionDAGBuilder::visitBitCast(const User &I) {
   SDValue N = getValue(I.getOperand(0));
+  SDLoc dl = getCurSDLoc();
   EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
 
   // BitCast assures us that source and destination are the same size so this is
   // either a BITCAST or a no-op.
   if (DestVT != N.getValueType())
-    setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(),
+    setValue(&I, DAG.getNode(ISD::BITCAST, dl,
                              DestVT, N)); // convert types.
   // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
   // might fold any kind of constant expression to an integer constant and that
   // is not what we are looking for. Only regcognize a bitcast of a genuine
   // constant integer as an opaque constant.
   else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
-    setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false,
+    setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
                                  /*isOpaque*/true));
   else
     setValue(&I, N);            // noop cast.
@@ -3243,10 +2580,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
         SDValue &Src = Input == 0 ? Src1 : Src2;
         if (RangeUse[Input] == 0)
           Src = DAG.getUNDEF(VT);
-        else
+        else {
+          SDLoc dl = getCurSDLoc();
           Src = DAG.getNode(
-              ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src,
-              DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy()));
+              ISD::EXTRACT_SUBVECTOR, dl, VT, Src,
+              DAG.getConstant(StartIdx[Input], dl, TLI.getVectorIdxTy()));
+        }
       }
 
       // Calculate new mask.
@@ -3273,6 +2612,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   // to insert and build vector.
   EVT EltVT = VT.getVectorElementType();
   EVT IdxVT = TLI.getVectorIdxTy();
+  SDLoc dl = getCurSDLoc();
   SmallVector<SDValue,8> Ops;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
     int Idx = Mask[i];
@@ -3284,14 +2624,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
       SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
       if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
 
-      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
-                        EltVT, Src, DAG.getConstant(Idx, IdxVT));
+      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                        EltVT, Src, DAG.getConstant(Idx, dl, IdxVT));
     }
 
     Ops.push_back(Res);
   }
 
-  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops));
+  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops));
 }
 
 void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
@@ -3383,6 +2723,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   Type *Ty = Op0->getType()->getScalarType();
   unsigned AS = Ty->getPointerAddressSpace();
   SDValue N = getValue(Op0);
+  SDLoc dl = getCurSDLoc();
 
   for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
        OI != E; ++OI) {
@@ -3392,8 +2733,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       if (Field) {
         // N = N + Offset
         uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
-        N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
-                        DAG.getConstant(Offset, N.getValueType()));
+        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
+                        DAG.getConstant(Offset, dl, N.getValueType()));
       }
 
       Ty = StTy->getElementType(Field);
@@ -3408,8 +2749,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
         if (CI->isZero())
           continue;
         APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
-        SDValue OffsVal = DAG.getConstant(Offs, PtrTy);
-        N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal);
+        SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy);
+        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
         continue;
       }
 
@@ -3418,24 +2759,24 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
 
       // If the index is smaller or larger than intptr_t, truncate or extend
       // it.
-      IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType());
+      IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
 
       // If this is a multiply by a power of two, turn it into a shl
       // immediately.  This is a very common case.
       if (ElementSize != 1) {
         if (ElementSize.isPowerOf2()) {
           unsigned Amt = ElementSize.logBase2();
-          IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(),
+          IdxN = DAG.getNode(ISD::SHL, dl,
                              N.getValueType(), IdxN,
-                             DAG.getConstant(Amt, IdxN.getValueType()));
+                             DAG.getConstant(Amt, dl, IdxN.getValueType()));
         } else {
-          SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType());
-          IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(),
+          SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
+          IdxN = DAG.getNode(ISD::MUL, dl,
                              N.getValueType(), IdxN, Scale);
         }
       }
 
-      N = DAG.getNode(ISD::ADD, getCurSDLoc(),
+      N = DAG.getNode(ISD::ADD, dl,
                       N.getValueType(), N, IdxN);
     }
   }
@@ -3449,6 +2790,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   if (FuncInfo.StaticAllocaMap.count(&I))
     return;   // getValue will auto-populate this.
 
+  SDLoc dl = getCurSDLoc();
   Type *Ty = I.getAllocatedType();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
@@ -3460,11 +2802,11 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
 
   EVT IntPtr = TLI.getPointerTy();
   if (AllocSize.getValueType() != IntPtr)
-    AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr);
+    AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
 
-  AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr,
+  AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
                           AllocSize,
-                          DAG.getConstant(TySize, IntPtr));
+                          DAG.getConstant(TySize, dl, IntPtr));
 
   // Handle alignment.  If the requested alignment is less than or equal to
   // the stack alignment, ignore it.  If the size is greater than or equal to
@@ -3476,18 +2818,19 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
 
   // Round the size of the allocation up to the stack alignment size
   // by add SA-1 to the size.
-  AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(),
+  AllocSize = DAG.getNode(ISD::ADD, dl,
                           AllocSize.getValueType(), AllocSize,
-                          DAG.getIntPtrConstant(StackAlign-1));
+                          DAG.getIntPtrConstant(StackAlign - 1, dl));
 
   // Mask out the low bits for alignment purposes.
-  AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(),
+  AllocSize = DAG.getNode(ISD::AND, dl,
                           AllocSize.getValueType(), AllocSize,
-                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1),
+                                                dl));
 
-  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
+  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) };
   SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
-  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops);
+  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
   setValue(&I, DSA);
   DAG.setRoot(DSA.getValue(1));
 
@@ -3535,8 +2878,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
     Root = DAG.getRoot();
   }
 
+  SDLoc dl = getCurSDLoc();
+
   if (isVolatile)
-    Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG);
+    Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
 
   SmallVector<SDValue, 4> Values(NumValues);
   SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
@@ -3552,15 +2897,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
     // (MaxParallelChains should always remain as failsafe).
     if (ChainI == MaxParallelChains) {
       assert(PendingLoads.empty() && "PendingLoads must be serialized first");
-      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                                   makeArrayRef(Chains.data(), ChainI));
       Root = Chain;
       ChainI = 0;
     }
-    SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(),
+    SDValue A = DAG.getNode(ISD::ADD, dl,
                             PtrVT, Ptr,
-                            DAG.getConstant(Offsets[i], PtrVT));
-    SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root,
+                            DAG.getConstant(Offsets[i], dl, PtrVT));
+    SDValue L = DAG.getLoad(ValueVTs[i], dl, Root,
                             A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
                             isNonTemporal, isInvariant, Alignment, AAInfo,
                             Ranges);
@@ -3570,7 +2915,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   }
 
   if (!ConstantMemory) {
-    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+    SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                                 makeArrayRef(Chains.data(), ChainI));
     if (isVolatile)
       DAG.setRoot(Chain);
@@ -3578,7 +2923,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
       PendingLoads.push_back(Chain);
   }
 
-  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
                            DAG.getVTList(ValueVTs), Values));
 }
 
@@ -3610,6 +2955,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   bool isVolatile = I.isVolatile();
   bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
   unsigned Alignment = I.getAlignment();
+  SDLoc dl = getCurSDLoc();
 
   AAMDNodes AAInfo;
   I.getAAMetadata(AAInfo);
@@ -3618,21 +2964,21 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
     // See visitLoad comments.
     if (ChainI == MaxParallelChains) {
-      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                                   makeArrayRef(Chains.data(), ChainI));
       Root = Chain;
       ChainI = 0;
     }
-    SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr,
-                              DAG.getConstant(Offsets[i], PtrVT));
-    SDValue St = DAG.getStore(Root, getCurSDLoc(),
+    SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
+                              DAG.getConstant(Offsets[i], dl, PtrVT));
+    SDValue St = DAG.getStore(Root, dl,
                               SDValue(Src.getNode(), Src.getResNo() + i),
                               Add, MachinePointerInfo(PtrV, Offsets[i]),
                               isVolatile, isNonTemporal, Alignment, AAInfo);
     Chains[ChainI] = St;
   }
 
-  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                                   makeArrayRef(Chains.data(), ChainI));
   DAG.setRoot(StoreNode);
 }
@@ -3664,6 +3010,94 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
   setValue(&I, StoreNode);
 }
 
+// Gather/scatter receive a vector of pointers.
+// This vector of pointers may be represented as a base pointer + vector of 
+// indices, it depends on GEP and instruction preceeding GEP
+// that calculates indices
+static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index,
+                           SelectionDAGBuilder* SDB) {
+
+  assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+  GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!Gep || Gep->getNumOperands() > 2)
+    return false;
+  ShuffleVectorInst *ShuffleInst = 
+    dyn_cast<ShuffleVectorInst>(Gep->getPointerOperand());
+  if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() ||
+      cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() !=
+      Instruction::InsertElement)
+    return false;
+
+  Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1);
+
+  SelectionDAG& DAG = SDB->DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  // Check is the Ptr is inside current basic block
+  // If not, look for the shuffle instruction
+  if (SDB->findValue(Ptr))
+    Base = SDB->getValue(Ptr);
+  else if (SDB->findValue(ShuffleInst)) {
+    SDValue ShuffleNode = SDB->getValue(ShuffleInst);
+    SDLoc sdl = ShuffleNode;
+    Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl,
+                       ShuffleNode.getValueType().getScalarType(), ShuffleNode,
+                       DAG.getConstant(0, sdl, TLI.getVectorIdxTy()));
+    SDB->setValue(Ptr, Base);
+  }
+  else
+    return false;
+
+  Value *IndexVal = Gep->getOperand(1);
+  if (SDB->findValue(IndexVal)) {
+    Index = SDB->getValue(IndexVal);
+
+    if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+      IndexVal = Sext->getOperand(0);
+      if (SDB->findValue(IndexVal))
+        Index = SDB->getValue(IndexVal);
+    }
+    return true;
+  }
+  return false;
+}
+
+void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
+  SDLoc sdl = getCurSDLoc();
+
+  // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
+  Value  *Ptr = I.getArgOperand(1);
+  SDValue Src0 = getValue(I.getArgOperand(0));
+  SDValue Mask = getValue(I.getArgOperand(3));
+  EVT VT = Src0.getValueType();
+  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
+  if (!Alignment)
+    Alignment = DAG.getEVTAlignment(VT);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  AAMDNodes AAInfo;
+  I.getAAMetadata(AAInfo);
+
+  SDValue Base;
+  SDValue Index;
+  Value *BasePtr = Ptr;
+  bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
+
+  Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
+                         MachineMemOperand::MOStore,  VT.getStoreSize(),
+                         Alignment, AAInfo);
+  if (!UniformBase) {
+    Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy());
+    Index = getValue(Ptr);
+  }
+  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index };
+  SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
+                                         Ops, MMO);
+  DAG.setRoot(Scatter);
+  setValue(&I, Scatter);
+}
+
 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
   SDLoc sdl = getCurSDLoc();
 
@@ -3705,6 +3139,59 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
   setValue(&I, Load);
 }
 
+void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
+  SDLoc sdl = getCurSDLoc();
+
+  // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
+  Value  *Ptr = I.getArgOperand(0);
+  SDValue Src0 = getValue(I.getArgOperand(3));
+  SDValue Mask = getValue(I.getArgOperand(2));
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT VT = TLI.getValueType(I.getType());
+  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
+  if (!Alignment)
+    Alignment = DAG.getEVTAlignment(VT);
+
+  AAMDNodes AAInfo;
+  I.getAAMetadata(AAInfo);
+  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+
+  SDValue Root = DAG.getRoot();
+  SDValue Base;
+  SDValue Index;
+  Value *BasePtr = Ptr;
+  bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
+  bool ConstantMemory = false;
+  if (UniformBase && AA->pointsToConstantMemory(
+      AliasAnalysis::Location(BasePtr,
+	                            AA->getTypeStoreSize(I.getType()),
+                              AAInfo))) {
+    // Do not serialize (non-volatile) loads of constant memory with anything.
+    Root = DAG.getEntryNode();
+    ConstantMemory = true;
+  }
+
+  MachineMemOperand *MMO =
+    DAG.getMachineFunction().
+    getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
+                         MachineMemOperand::MOLoad,  VT.getStoreSize(),
+                         Alignment, AAInfo, Ranges);
+
+  if (!UniformBase) {
+    Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy());
+    Index = getValue(Ptr);
+  }
+  SDValue Ops[] = { Root, Src0, Mask, Base, Index };
+  SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
+                                       Ops, MMO);
+
+  SDValue OutChain = Gather.getValue(1);
+  if (!ConstantMemory)
+    PendingLoads.push_back(OutChain);
+  setValue(&I, Gather);
+}
+
 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
   SDLoc dl = getCurSDLoc();
   AtomicOrdering SuccessOrder = I.getSuccessOrdering();
@@ -3769,8 +3256,8 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Ops[3];
   Ops[0] = getRoot();
-  Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
-  Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
+  Ops[1] = DAG.getConstant(I.getOrdering(), dl, TLI.getPointerTy());
+  Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy());
   DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
 }
 
@@ -3859,7 +3346,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
   if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
       Info.opc == ISD::INTRINSIC_W_CHAIN)
-    Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
+    Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
+                                        TLI.getPointerTy()));
 
   // Add all operands of the call to the operand list.
   for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
@@ -3919,9 +3407,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
 static SDValue
 GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) {
   SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
-                           DAG.getConstant(0x007fffff, MVT::i32));
+                           DAG.getConstant(0x007fffff, dl, MVT::i32));
   SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
-                           DAG.getConstant(0x3f800000, MVT::i32));
+                           DAG.getConstant(0x3f800000, dl, MVT::i32));
   return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
 }
 
@@ -3934,21 +3422,108 @@ static SDValue
 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
             SDLoc dl) {
   SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
-                           DAG.getConstant(0x7f800000, MVT::i32));
+                           DAG.getConstant(0x7f800000, dl, MVT::i32));
   SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
-                           DAG.getConstant(23, TLI.getPointerTy()));
+                           DAG.getConstant(23, dl, TLI.getPointerTy()));
   SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
-                           DAG.getConstant(127, MVT::i32));
+                           DAG.getConstant(127, dl, MVT::i32));
   return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
 }
 
 /// getF32Constant - Get 32-bit floating point constant.
 static SDValue
-getF32Constant(SelectionDAG &DAG, unsigned Flt) {
-  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)),
+getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) {
+  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl,
                            MVT::f32);
 }
 
+static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
+                                       SelectionDAG &DAG) {
+  //   IntegerPartOfX = ((int32_t)(t0);
+  SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+  //   FractionalPartOfX = t0 - (float)IntegerPartOfX;
+  SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+  SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+  //   IntegerPartOfX <<= 23;
+  IntegerPartOfX = DAG.getNode(
+      ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+      DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy()));
+
+  SDValue TwoToFractionalPartOfX;
+  if (LimitFloatPrecision <= 6) {
+    // For floating-point precision of 6:
+    //
+    //   TwoToFractionalPartOfX =
+    //     0.997535578f +
+    //       (0.735607626f + 0.252464424f * x) * x;
+    //
+    // error 0.0144103317, which is 6 bits
+    SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                             getF32Constant(DAG, 0x3e814304, dl));
+    SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                             getF32Constant(DAG, 0x3f3c50c8, dl));
+    SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+    TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                                         getF32Constant(DAG, 0x3f7f5e7e, dl));
+  } else if (LimitFloatPrecision <= 12) {
+    // For floating-point precision of 12:
+    //
+    //   TwoToFractionalPartOfX =
+    //     0.999892986f +
+    //       (0.696457318f +
+    //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+    //
+    // error 0.000107046256, which is 13 to 14 bits
+    SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                             getF32Constant(DAG, 0x3da235e3, dl));
+    SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                             getF32Constant(DAG, 0x3e65b8f3, dl));
+    SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+    SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                             getF32Constant(DAG, 0x3f324b07, dl));
+    SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+    TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                                         getF32Constant(DAG, 0x3f7ff8fd, dl));
+  } else { // LimitFloatPrecision <= 18
+    // For floating-point precision of 18:
+    //
+    //   TwoToFractionalPartOfX =
+    //     0.999999982f +
+    //       (0.693148872f +
+    //         (0.240227044f +
+    //           (0.554906021e-1f +
+    //             (0.961591928e-2f +
+    //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+    // error 2.47208000*10^(-7), which is better than 18 bits
+    SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                             getF32Constant(DAG, 0x3924b03e, dl));
+    SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                             getF32Constant(DAG, 0x3ab24b87, dl));
+    SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+    SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                             getF32Constant(DAG, 0x3c1d8c17, dl));
+    SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+    SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                             getF32Constant(DAG, 0x3d634a1d, dl));
+    SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+    SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                             getF32Constant(DAG, 0x3e75fe14, dl));
+    SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+    SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                              getF32Constant(DAG, 0x3f317234, dl));
+    SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+    TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                         getF32Constant(DAG, 0x3f800000, dl));
+  }
+
+  // Add the exponent into the result in integer domain.
+  SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+                     DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
+}
+
 /// expandExp - Lower an exp intrinsic. Handles the special sequences for
 /// limited-precision mode.
 static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
@@ -3960,92 +3535,10 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
     // final result:
     //
     //   #define LOG2OFe 1.4426950f
-    //   IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+    //   t0 = Op * LOG2OFe
     SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
-                             getF32Constant(DAG, 0x3fb8aa3b));
-    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
-
-    //   FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
-    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
-    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
-
-    //   IntegerPartOfX <<= 23;
-    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
-                                 DAG.getConstant(23, TLI.getPointerTy()));
-
-    SDValue TwoToFracPartOfX;
-    if (LimitFloatPrecision <= 6) {
-      // For floating-point precision of 6:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.997535578f +
-      //       (0.735607626f + 0.252464424f * x) * x;
-      //
-      // error 0.0144103317, which is 6 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3e814304));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f3c50c8));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                                     getF32Constant(DAG, 0x3f7f5e7e));
-    } else if (LimitFloatPrecision <= 12) {
-      // For floating-point precision of 12:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999892986f +
-      //       (0.696457318f +
-      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
-      //
-      // 0.000107046256 error, which is 13 to 14 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3da235e3));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3e65b8f3));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f324b07));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                                     getF32Constant(DAG, 0x3f7ff8fd));
-    } else { // LimitFloatPrecision <= 18
-      // For floating-point precision of 18:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999999982f +
-      //       (0.693148872f +
-      //         (0.240227044f +
-      //           (0.554906021e-1f +
-      //             (0.961591928e-2f +
-      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
-      //
-      // error 2.47208000*10^(-7), which is better than 18 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3924b03e));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3ab24b87));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3c1d8c17));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3d634a1d));
-      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x3e75fe14));
-      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
-                                getF32Constant(DAG, 0x3f317234));
-      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                     getF32Constant(DAG, 0x3f800000));
-    }
-
-    // Add the exponent into the result in integer domain.
-    SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX);
-    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
-                       DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                   t13, IntegerPartOfX));
+                             getF32Constant(DAG, 0x3fb8aa3b, dl));
+    return getLimitedPrecisionExp2(t0, dl, DAG);
   }
 
   // No special expansion.
@@ -4063,7 +3556,7 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
     // Scale the exponent by log(2) [0.69314718f].
     SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
-                                        getF32Constant(DAG, 0x3f317218));
+                                        getF32Constant(DAG, 0x3f317218, dl));
 
     // Get the significand and build it into a floating-point number with
     // exponent of 1.
@@ -4079,12 +3572,12 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0034276066, which is better than 8 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbe74c456));
+                               getF32Constant(DAG, 0xbe74c456, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3fb3a2b1));
+                               getF32Constant(DAG, 0x3fb3a2b1, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                  getF32Constant(DAG, 0x3f949a29));
+                                  getF32Constant(DAG, 0x3f949a29, dl));
     } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -4096,18 +3589,18 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.000061011436, which is 14 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbd67b6d6));
+                               getF32Constant(DAG, 0xbd67b6d6, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3ee4f4b8));
+                               getF32Constant(DAG, 0x3ee4f4b8, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3fbc278b));
+                               getF32Constant(DAG, 0x3fbc278b, dl));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x40348e95));
+                               getF32Constant(DAG, 0x40348e95, dl));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                                  getF32Constant(DAG, 0x3fdef31a));
+                                  getF32Constant(DAG, 0x3fdef31a, dl));
     } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -4121,24 +3614,24 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0000023660568, which is better than 18 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbc91e5ac));
+                               getF32Constant(DAG, 0xbc91e5ac, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3e4350aa));
+                               getF32Constant(DAG, 0x3e4350aa, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f60d3e3));
+                               getF32Constant(DAG, 0x3f60d3e3, dl));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x4011cdf0));
+                               getF32Constant(DAG, 0x4011cdf0, dl));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x406cfd1c));
+                               getF32Constant(DAG, 0x406cfd1c, dl));
       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x408797cb));
+                               getF32Constant(DAG, 0x408797cb, dl));
       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
       LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
-                                  getF32Constant(DAG, 0x4006dcab));
+                                  getF32Constant(DAG, 0x4006dcab, dl));
     }
 
     return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
@@ -4173,12 +3666,12 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0049451742, which is more than 7 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbeb08fe0));
+                               getF32Constant(DAG, 0xbeb08fe0, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x40019463));
+                               getF32Constant(DAG, 0x40019463, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                   getF32Constant(DAG, 0x3fd6633d));
+                                   getF32Constant(DAG, 0x3fd6633d, dl));
     } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -4190,18 +3683,18 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0000876136000, which is better than 13 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbda7262e));
+                               getF32Constant(DAG, 0xbda7262e, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3f25280b));
+                               getF32Constant(DAG, 0x3f25280b, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x4007b923));
+                               getF32Constant(DAG, 0x4007b923, dl));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x40823e2f));
+                               getF32Constant(DAG, 0x40823e2f, dl));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                                   getF32Constant(DAG, 0x4020d29c));
+                                   getF32Constant(DAG, 0x4020d29c, dl));
     } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -4216,24 +3709,24 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0000018516, which is better than 18 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbcd2769e));
+                               getF32Constant(DAG, 0xbcd2769e, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3e8ce0b9));
+                               getF32Constant(DAG, 0x3e8ce0b9, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3fa22ae7));
+                               getF32Constant(DAG, 0x3fa22ae7, dl));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x40525723));
+                               getF32Constant(DAG, 0x40525723, dl));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x40aaf200));
+                               getF32Constant(DAG, 0x40aaf200, dl));
       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x40c39dad));
+                               getF32Constant(DAG, 0x40c39dad, dl));
       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
       Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
-                                   getF32Constant(DAG, 0x4042902c));
+                                   getF32Constant(DAG, 0x4042902c, dl));
     }
 
     return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
@@ -4254,7 +3747,7 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
     // Scale the exponent by log10(2) [0.30102999f].
     SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
-                                        getF32Constant(DAG, 0x3e9a209a));
+                                        getF32Constant(DAG, 0x3e9a209a, dl));
 
     // Get the significand and build it into a floating-point number with
     // exponent of 1.
@@ -4270,12 +3763,12 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0014886165, which is 6 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbdd49a13));
+                               getF32Constant(DAG, 0xbdd49a13, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3f1c0789));
+                               getF32Constant(DAG, 0x3f1c0789, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                    getF32Constant(DAG, 0x3f011300));
+                                    getF32Constant(DAG, 0x3f011300, dl));
     } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -4286,15 +3779,15 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.00019228036, which is better than 12 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3d431f31));
+                               getF32Constant(DAG, 0x3d431f31, dl));
       SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3ea21fb2));
+                               getF32Constant(DAG, 0x3ea21fb2, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f6ae232));
+                               getF32Constant(DAG, 0x3f6ae232, dl));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
-                                    getF32Constant(DAG, 0x3f25f7c3));
+                                    getF32Constant(DAG, 0x3f25f7c3, dl));
     } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -4307,21 +3800,21 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0000037995730, which is better than 18 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3c5d51ce));
+                               getF32Constant(DAG, 0x3c5d51ce, dl));
       SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3e00685a));
+                               getF32Constant(DAG, 0x3e00685a, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3efb6798));
+                               getF32Constant(DAG, 0x3efb6798, dl));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f88d192));
+                               getF32Constant(DAG, 0x3f88d192, dl));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3fc4316c));
+                               getF32Constant(DAG, 0x3fc4316c, dl));
       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
       Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
-                                    getF32Constant(DAG, 0x3f57ce70));
+                                    getF32Constant(DAG, 0x3f57ce70, dl));
     }
 
     return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
@@ -4336,91 +3829,8 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
 static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
                           const TargetLowering &TLI) {
   if (Op.getValueType() == MVT::f32 &&
-      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
-
-    //   FractionalPartOfX = x - (float)IntegerPartOfX;
-    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
-    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
-
-    //   IntegerPartOfX <<= 23;
-    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
-                                 DAG.getConstant(23, TLI.getPointerTy()));
-
-    SDValue TwoToFractionalPartOfX;
-    if (LimitFloatPrecision <= 6) {
-      // For floating-point precision of 6:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.997535578f +
-      //       (0.735607626f + 0.252464424f * x) * x;
-      //
-      // error 0.0144103317, which is 6 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3e814304));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f3c50c8));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                                           getF32Constant(DAG, 0x3f7f5e7e));
-    } else if (LimitFloatPrecision <= 12) {
-      // For floating-point precision of 12:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999892986f +
-      //       (0.696457318f +
-      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
-      //
-      // error 0.000107046256, which is 13 to 14 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3da235e3));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3e65b8f3));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f324b07));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                                           getF32Constant(DAG, 0x3f7ff8fd));
-    } else { // LimitFloatPrecision <= 18
-      // For floating-point precision of 18:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999999982f +
-      //       (0.693148872f +
-      //         (0.240227044f +
-      //           (0.554906021e-1f +
-      //             (0.961591928e-2f +
-      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
-      // error 2.47208000*10^(-7), which is better than 18 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3924b03e));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3ab24b87));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3c1d8c17));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3d634a1d));
-      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x3e75fe14));
-      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
-                                getF32Constant(DAG, 0x3f317234));
-      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                           getF32Constant(DAG, 0x3f800000));
-    }
-
-    // Add the exponent into the result in integer domain.
-    SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32,
-                              TwoToFractionalPartOfX);
-    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
-                       DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                   t13, IntegerPartOfX));
-  }
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
+    return getLimitedPrecisionExp2(Op, dl, DAG);
 
   // No special expansion.
   return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
@@ -4444,90 +3854,10 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
     // final result:
     //
     //   #define LOG2OF10 3.3219281f
-    //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
+    //   t0 = Op * LOG2OF10;
     SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
-                             getF32Constant(DAG, 0x40549a78));
-    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
-
-    //   FractionalPartOfX = x - (float)IntegerPartOfX;
-    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
-    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
-
-    //   IntegerPartOfX <<= 23;
-    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
-                                 DAG.getConstant(23, TLI.getPointerTy()));
-
-    SDValue TwoToFractionalPartOfX;
-    if (LimitFloatPrecision <= 6) {
-      // For floating-point precision of 6:
-      //
-      //   twoToFractionalPartOfX =
-      //     0.997535578f +
-      //       (0.735607626f + 0.252464424f * x) * x;
-      //
-      // error 0.0144103317, which is 6 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3e814304));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f3c50c8));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                                           getF32Constant(DAG, 0x3f7f5e7e));
-    } else if (LimitFloatPrecision <= 12) {
-      // For floating-point precision of 12:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999892986f +
-      //       (0.696457318f +
-      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
-      //
-      // error 0.000107046256, which is 13 to 14 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3da235e3));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3e65b8f3));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f324b07));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                                           getF32Constant(DAG, 0x3f7ff8fd));
-    } else { // LimitFloatPrecision <= 18
-      // For floating-point precision of 18:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999999982f +
-      //       (0.693148872f +
-      //         (0.240227044f +
-      //           (0.554906021e-1f +
-      //             (0.961591928e-2f +
-      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
-      // error 2.47208000*10^(-7), which is better than 18 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3924b03e));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3ab24b87));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3c1d8c17));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3d634a1d));
-      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x3e75fe14));
-      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
-                                getF32Constant(DAG, 0x3f317234));
-      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                           getF32Constant(DAG, 0x3f800000));
-    }
-
-    SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX);
-    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
-                       DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                   t13, IntegerPartOfX));
+                             getF32Constant(DAG, 0x40549a78, dl));
+    return getLimitedPrecisionExp2(t0, dl, DAG);
   }
 
   // No special expansion.
@@ -4549,14 +3879,13 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
 
     // powi(x, 0) -> 1.0
     if (Val == 0)
-      return DAG.getConstantFP(1.0, LHS.getValueType());
+      return DAG.getConstantFP(1.0, DL, LHS.getValueType());
 
     const Function *F = DAG.getMachineFunction().getFunction();
-    if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                         Attribute::OptimizeForSize) ||
+    if (!F->hasFnAttribute(Attribute::OptimizeForSize) ||
         // If optimizing for size, don't insert too many multiplies.  This
         // inserts up to 5 multiplies.
-        CountPopulation_32(Val)+Log2_32(Val) < 7) {
+        countPopulation(Val) + Log2_32(Val) < 7) {
       // We use the simple binary decomposition method to generate the multiply
       // sequence.  There are more optimal ways to do this (for example,
       // powi(x,15) generates one more multiply than it should), but this has
@@ -4579,7 +3908,7 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
       // If the original was negative, invert the result, producing 1/(x*x*x).
       if (RHSC->getSExtValue() < 0)
         Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
-                          DAG.getConstantFP(1.0, LHS.getValueType()), Res);
+                          DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
       return Res;
     }
   }
@@ -4609,11 +3938,9 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
 /// argument, create the corresponding DBG_VALUE machine instruction for it now.
 /// At the end of instruction selection, they will be inserted to the entry BB.
-bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
-                                                   MDNode *Variable,
-                                                   MDNode *Expr, int64_t Offset,
-                                                   bool IsIndirect,
-                                                   const SDValue &N) {
+bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
+    const Value *V, DILocalVariable *Variable, DIExpression *Expr,
+    DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) {
   const Argument *Arg = dyn_cast<Argument>(V);
   if (!Arg)
     return false;
@@ -4622,8 +3949,9 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
   const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
 
   // Ignore inlined function arguments here.
-  DIVariable DV(Variable);
-  if (DV.isInlinedFnArgument(MF.getFunction()))
+  //
+  // FIXME: Should we be checking DL->inlinedAt() to determine this?
+  if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction()))
     return false;
 
   Optional<MachineOperand> Op;
@@ -4664,13 +3992,15 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
   if (!Op)
     return false;
 
+  assert(Variable->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
   if (Op->isReg())
     FuncInfo.ArgDbgValues.push_back(
-        BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE),
-                IsIndirect, Op->getReg(), Offset, Variable, Expr));
+        BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+                Op->getReg(), Offset, Variable, Expr));
   else
     FuncInfo.ArgDbgValues.push_back(
-        BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE))
+        BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
             .addOperand(*Op)
             .addImm(Offset)
             .addMetadata(Variable)
@@ -4715,16 +4045,20 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return nullptr;
   case Intrinsic::read_register: {
     Value *Reg = I.getArgOperand(0);
+    SDValue Chain = getRoot();
     SDValue RegName =
         DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
     EVT VT = TLI.getValueType(I.getType());
-    setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName));
+    Res = DAG.getNode(ISD::READ_REGISTER, sdl,
+      DAG.getVTList(VT, MVT::Other), Chain, RegName);
+    setValue(&I, Res);
+    DAG.setRoot(Res.getValue(1));
     return nullptr;
   }
   case Intrinsic::write_register: {
     Value *Reg = I.getArgOperand(0);
     Value *RegValue = I.getArgOperand(1);
-    SDValue Chain = getValue(RegValue).getOperand(0);
+    SDValue Chain = getRoot();
     SDValue RegName =
         DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
     DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
@@ -4736,6 +4070,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::longjmp:
     return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
   case Intrinsic::memcpy: {
+    // FIXME: this definition of "user defined address space" is x86-specific
     // Assert for address < 256 since we support only user defined address
     // spaces.
     assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
@@ -4750,12 +4085,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!Align)
       Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false,
-                              MachinePointerInfo(I.getArgOperand(0)),
-                              MachinePointerInfo(I.getArgOperand(1))));
+    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+    SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+                               false, isTC,
+                               MachinePointerInfo(I.getArgOperand(0)),
+                               MachinePointerInfo(I.getArgOperand(1)));
+    updateDAGForMaybeTailCall(MC);
     return nullptr;
   }
   case Intrinsic::memset: {
+    // FIXME: this definition of "user defined address space" is x86-specific
     // Assert for address < 256 since we support only user defined address
     // spaces.
     assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
@@ -4768,11 +4107,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!Align)
       Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
-                              MachinePointerInfo(I.getArgOperand(0))));
+    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+    SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+                               isTC, MachinePointerInfo(I.getArgOperand(0)));
+    updateDAGForMaybeTailCall(MS);
     return nullptr;
   }
   case Intrinsic::memmove: {
+    // FIXME: this definition of "user defined address space" is x86-specific
     // Assert for address < 256 since we support only user defined address
     // spaces.
     assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
@@ -4787,20 +4129,20 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!Align)
       Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
-                               MachinePointerInfo(I.getArgOperand(0)),
-                               MachinePointerInfo(I.getArgOperand(1))));
+    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+    SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+                                isTC, MachinePointerInfo(I.getArgOperand(0)),
+                                MachinePointerInfo(I.getArgOperand(1)));
+    updateDAGForMaybeTailCall(MM);
     return nullptr;
   }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
-    MDNode *Variable = DI.getVariable();
-    MDNode *Expression = DI.getExpression();
+    DILocalVariable *Variable = DI.getVariable();
+    DIExpression *Expression = DI.getExpression();
     const Value *Address = DI.getAddress();
-    DIVariable DIVar(Variable);
-    assert((!DIVar || DIVar.isVariable()) &&
-      "Variable in DbgDeclareInst should be either null or a DIVariable.");
-    if (!Address || !DIVar) {
+    assert(Variable && "Missing variable");
+    if (!Address) {
       DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       return nullptr;
     }
@@ -4821,9 +4163,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
         Address = BCI->getOperand(0);
       // Parameters are handled specially.
-      bool isParameter =
-        (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
-         isa<Argument>(Address));
+      bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable ||
+                         isa<Argument>(Address);
 
       const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
 
@@ -4836,7 +4177,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
         else {
           // Address is an argument, so try to emit its dbg value using
           // virtual register info from the FuncInfo.ValueMap.
-          EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N);
+          EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
+                                   N);
           return nullptr;
         }
       } else if (AI)
@@ -4853,7 +4195,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     } else {
       // If Address is an argument then try to emit its dbg value using
       // virtual register info from the FuncInfo.ValueMap.
-      if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false,
+      if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
                                     N)) {
         // If variable is pinned by a alloca in dominating bb then
         // use StaticAllocaMap.
@@ -4876,14 +4218,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::dbg_value: {
     const DbgValueInst &DI = cast<DbgValueInst>(I);
-    DIVariable DIVar(DI.getVariable());
-    assert((!DIVar || DIVar.isVariable()) &&
-      "Variable in DbgValueInst should be either null or a DIVariable.");
-    if (!DIVar)
-      return nullptr;
+    assert(DI.getVariable() && "Missing variable");
 
-    MDNode *Variable = DI.getVariable();
-    MDNode *Expression = DI.getExpression();
+    DILocalVariable *Variable = DI.getVariable();
+    DIExpression *Expression = DI.getExpression();
     uint64_t Offset = DI.getOffset();
     const Value *V = DI.getValue();
     if (!V)
@@ -4904,7 +4242,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       if (N.getNode()) {
         // A dbg.value for an alloca is always indirect.
         bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
-        if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset,
+        if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
                                       IsIndirect, N)) {
           SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
                                 IsIndirect, Offset, dl, SDNodeOrder);
@@ -4943,7 +4281,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // Find the type id for the given typeinfo.
     GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
     unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
-    Res = DAG.getConstant(TypeID, MVT::i32);
+    Res = DAG.getConstant(TypeID, sdl, MVT::i32);
     setValue(&I, Res);
     return nullptr;
   }
@@ -4969,7 +4307,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                              CfaArg.getValueType()),
                                  CfaArg);
     SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(),
-                             DAG.getConstant(0, TLI.getPointerTy()));
+                             DAG.getConstant(0, sdl, TLI.getPointerTy()));
     setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(),
                              FA, Offset));
     return nullptr;
@@ -5008,9 +4346,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return nullptr;
   }
 
+  case Intrinsic::masked_gather:
+    visitMaskedGather(I);
+    return nullptr;
   case Intrinsic::masked_load:
     visitMaskedLoad(I);
     return nullptr;
+  case Intrinsic::masked_scatter:
+    visitMaskedScatter(I);
+    return nullptr;
   case Intrinsic::masked_store:
     visitMaskedStore(I);
     return nullptr;
@@ -5063,44 +4407,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // We must do this early because v2i32 is not a legal type.
     SDValue ShOps[2];
     ShOps[0] = ShAmt;
-    ShOps[1] = DAG.getConstant(0, MVT::i32);
+    ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
     ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps);
     EVT DestVT = TLI.getValueType(I.getType());
     ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
     Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
-                       DAG.getConstant(NewIntrinsic, MVT::i32),
+                       DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
                        getValue(I.getArgOperand(0)), ShAmt);
     setValue(&I, Res);
     return nullptr;
   }
-  case Intrinsic::x86_avx_vinsertf128_pd_256:
-  case Intrinsic::x86_avx_vinsertf128_ps_256:
-  case Intrinsic::x86_avx_vinsertf128_si_256:
-  case Intrinsic::x86_avx2_vinserti128: {
-    EVT DestVT = TLI.getValueType(I.getType());
-    EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
-    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
-                   ElVT.getVectorNumElements();
-    Res =
-        DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
-                    getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
-                    DAG.getConstant(Idx, TLI.getVectorIdxTy()));
-    setValue(&I, Res);
-    return nullptr;
-  }
-  case Intrinsic::x86_avx_vextractf128_pd_256:
-  case Intrinsic::x86_avx_vextractf128_ps_256:
-  case Intrinsic::x86_avx_vextractf128_si_256:
-  case Intrinsic::x86_avx2_vextracti128: {
-    EVT DestVT = TLI.getValueType(I.getType());
-    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
-                   DestVT.getVectorNumElements();
-    Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT,
-                      getValue(I.getArgOperand(0)),
-                      DAG.getConstant(Idx, TLI.getVectorIdxTy()));
-    setValue(&I, Res);
-    return nullptr;
-  }
   case Intrinsic::convertff:
   case Intrinsic::convertfsi:
   case Intrinsic::convertfui:
@@ -5238,7 +4554,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
                              DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
                                          getValue(I.getArgOperand(0)),
-                                         DAG.getTargetConstant(0, MVT::i32))));
+                                         DAG.getTargetConstant(0, sdl,
+                                                               MVT::i32))));
     return nullptr;
   case Intrinsic::convert_from_fp16:
     setValue(&I,
@@ -5366,9 +4683,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     EVT Ty = Arg.getValueType();
 
     if (CI->isZero())
-      Res = DAG.getConstant(-1ULL, Ty);
+      Res = DAG.getConstant(-1ULL, sdl, Ty);
     else
-      Res = DAG.getConstant(0, Ty);
+      Res = DAG.getConstant(0, sdl, Ty);
 
     setValue(&I, Res);
     return nullptr;
@@ -5498,7 +4815,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       return nullptr;
 
     SmallVector<Value *, 4> Allocas;
-    GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL);
+    GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL);
 
     for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
            E = Allocas.end(); Object != E; ++Object) {
@@ -5547,6 +4864,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::clear_cache:
     return TLI.getClearCacheBuiltinName();
+  case Intrinsic::eh_actions:
+    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+    return nullptr;
   case Intrinsic::donothing:
     // ignore
     return nullptr;
@@ -5565,7 +4885,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::experimental_gc_result_int:
   case Intrinsic::experimental_gc_result_float:
-  case Intrinsic::experimental_gc_result_ptr: {
+  case Intrinsic::experimental_gc_result_ptr:
+  case Intrinsic::experimental_gc_result: {
     visitGCResult(I);
     return nullptr;
   }
@@ -5576,45 +4897,49 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::instrprof_increment:
     llvm_unreachable("instrprof failed to lower an increment");
 
-  case Intrinsic::frameallocate: {
+  case Intrinsic::frameescape: {
     MachineFunction &MF = DAG.getMachineFunction();
     const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
 
-    // Do the allocation and map it as a normal value.
-    // FIXME: Maybe we should add this to the alloca map so that we don't have
-    // to register allocate it?
-    uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
-    int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size);
-    MVT PtrVT = TLI.getPointerTy(0);
-    SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT);
-    setValue(&I, FIVal);
-
-    // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is
-    // the same on all targets.
-    MCSymbol *FrameAllocSym =
-        MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName());
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
-            TII->get(TargetOpcode::FRAME_ALLOC))
-        .addSym(FrameAllocSym)
-        .addFrameIndex(Alloc);
+    // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission
+    // is the same on all targets.
+    for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
+      Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
+      if (isa<ConstantPointerNull>(Arg))
+        continue; // Skip null pointers. They represent a hole in index space.
+      AllocaInst *Slot = cast<AllocaInst>(Arg);
+      assert(FuncInfo.StaticAllocaMap.count(Slot) &&
+             "can only escape static allocas");
+      int FI = FuncInfo.StaticAllocaMap[Slot];
+      MCSymbol *FrameAllocSym =
+          MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+              GlobalValue::getRealLinkageName(MF.getName()), Idx);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
+              TII->get(TargetOpcode::FRAME_ALLOC))
+          .addSym(FrameAllocSym)
+          .addFrameIndex(FI);
+    }
 
     return nullptr;
   }
 
   case Intrinsic::framerecover: {
-    // i8* @llvm.framerecover(i8* %fn, i8* %fp)
+    // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx)
     MachineFunction &MF = DAG.getMachineFunction();
     MVT PtrVT = TLI.getPointerTy(0);
 
     // Get the symbol that defines the frame offset.
-    Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
+    auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
+    auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
+    unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX));
     MCSymbol *FrameAllocSym =
-        MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName());
+        MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+            GlobalValue::getRealLinkageName(Fn->getName()), IdxVal);
 
     // Create a TargetExternalSymbol for the label to avoid any target lowering
     // that would make this PC relative.
     StringRef Name = FrameAllocSym->getName();
-    assert(Name.size() == strlen(Name.data()) && "not null terminated");
+    assert(Name.data()[Name.size()] == '\0' && "not null terminated");
     SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT);
     SDValue OffsetVal =
         DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym);
@@ -5627,6 +4952,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
     return nullptr;
   }
+  case Intrinsic::eh_begincatch:
+  case Intrinsic::eh_endcatch:
+    llvm_unreachable("begin/end catch intrinsics not lowered in codegen");
+  case Intrinsic::eh_exceptioncode: {
+    unsigned Reg = TLI.getExceptionPointerRegister();
+    assert(Reg && "cannot get exception code on this platform");
+    MVT PtrVT = TLI.getPointerTy();
+    const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
+    unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC);
+    SDValue N =
+        DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
+    N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
+    setValue(&I, N);
+    return nullptr;
+  }
   }
 }
 
@@ -5639,7 +4979,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
   if (LandingPad) {
     // Insert a label before the invoke call to mark the try range.  This can be
     // used to detect deletion of the invoke via the MachineModuleInfo.
-    BeginLabel = MMI.getContext().CreateTempSymbol();
+    BeginLabel = MMI.getContext().createTempSymbol();
 
     // For SjLj, keep track of which landing pads go with which invokes
     // so as to maintain the ordering of pads in the LSDA.
@@ -5659,9 +4999,8 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
 
     CLI.setChain(getRoot());
   }
-
-  const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
-  std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
 
   assert((CLI.IsTailCall || Result.second.getNode()) &&
          "Non-null chain expected with non-tail call!");
@@ -5683,7 +5022,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
   if (LandingPad) {
     // Insert a label at the end of the invoke call to mark the try range.  This
     // can be used to detect deletion of the invoke via the MachineModuleInfo.
-    MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol();
+    MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
     DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
 
     // Inform MachineModuleInfo of range.
@@ -5766,9 +5105,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
     LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
                                          PointerType::getUnqual(LoadTy));
 
-    if (const Constant *LoadCst =
-          ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
-                                       Builder.DL))
+    if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
+            const_cast<Constant *>(LoadInput), *Builder.DL))
       return Builder.getValue(LoadCst);
   }
 
@@ -5829,7 +5167,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
   const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
   if (CSize && CSize->getZExtValue() == 0) {
     EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true);
-    setValue(&I, DAG.getConstant(0, CallVT));
+    setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
     return true;
   }
 
@@ -6112,7 +5450,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
             return;
         }
       }
-      if (unsigned IID = F->getIntrinsicID()) {
+      if (Intrinsic::ID IID = F->getIntrinsicID()) {
         RenameFn = visitIntrinsicCall(I, IID);
         if (!RenameFn)
           return;
@@ -6364,9 +5702,10 @@ static void GetRegistersForValue(SelectionDAG &DAG,
 
   // If this is a constraint for a single physreg, or a constraint for a
   // register class, find it.
-  std::pair<unsigned, const TargetRegisterClass*> PhysReg =
-    TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                     OpInfo.ConstraintVT);
+  std::pair<unsigned, const TargetRegisterClass *> PhysReg =
+      TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(),
+                                       OpInfo.ConstraintCode,
+                                       OpInfo.ConstraintVT);
 
   unsigned NumRegs = 1;
   if (OpInfo.ConstraintVT != MVT::Other) {
@@ -6462,8 +5801,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   SDISelAsmOperandInfoVector ConstraintOperands;
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  TargetLowering::AsmOperandInfoVector
-    TargetConstraints = TLI.ParseConstraints(CS);
+  TargetLowering::AsmOperandInfoVector TargetConstraints =
+      TLI.ParseConstraints(DAG.getSubtarget().getRegisterInfo(), CS);
 
   bool hasMemory = false;
 
@@ -6555,12 +5894,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
 
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
-        std::pair<unsigned, const TargetRegisterClass*> MatchRC =
-          TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                            OpInfo.ConstraintVT);
-        std::pair<unsigned, const TargetRegisterClass*> InputRC =
-          TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
-                                            Input.ConstraintVT);
+	const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+        std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+            TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+                                             OpInfo.ConstraintVT);
+        std::pair<unsigned, const TargetRegisterClass *> InputRC =
+            TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
+                                             Input.ConstraintVT);
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
             (MatchRC.second != InputRC.second)) {
@@ -6687,7 +6027,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     }
   }
 
-  AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+  AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, getCurSDLoc(),
                                                   TLI.getPointerTy()));
 
   // Loop over all of the inputs, copying the operand values into the
@@ -6707,10 +6047,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         // Memory output, or 'other' output (e.g. 'X' constraint).
         assert(OpInfo.isIndirect && "Memory output must be indirect operand");
 
+        unsigned ConstraintID =
+            TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+        assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+               "Failed to convert memory constraint code to constraint id.");
+
         // Add information to the INLINEASM node to know about this output.
         unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
-        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
-                                                        TLI.getPointerTy()));
+        OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
+                                                        MVT::i32));
         AsmNodeOperands.push_back(OpInfo.CallOperand);
         break;
       }
@@ -6745,7 +6091,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
           .AddInlineAsmOperands(OpInfo.isEarlyClobber
                                     ? InlineAsm::Kind_RegDefEarlyClobber
                                     : InlineAsm::Kind_RegDef,
-                                false, 0, DAG, AsmNodeOperands);
+                                false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
       break;
     }
     case InlineAsm::isInput: {
@@ -6800,11 +6146,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
               return;
             }
           }
+          SDLoc dl = getCurSDLoc();
           // Use the produced MatchedRegs object to
-          MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(),
+          MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl,
                                     Chain, &Flag, CS.getInstruction());
           MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
-                                           true, OpInfo.getMatchedOperand(),
+                                           true, OpInfo.getMatchedOperand(), dl,
                                            DAG, AsmNodeOperands);
           break;
         }
@@ -6814,9 +6161,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
                "Unexpected number of operands");
         // Add information to the INLINEASM node to know about this input.
         // See InlineAsm.h isUseOperandTiedToDef.
+        OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
         OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
                                                     OpInfo.getMatchedOperand());
-        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, getCurSDLoc(),
                                                         TLI.getPointerTy()));
         AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
         break;
@@ -6843,6 +6191,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         unsigned ResOpType =
           InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        getCurSDLoc(),
                                                         TLI.getPointerTy()));
         AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
         break;
@@ -6853,10 +6202,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
                "Memory operands expect pointer values");
 
+        unsigned ConstraintID =
+            TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+        assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+               "Failed to convert memory constraint code to constraint id.");
+
         // Add information to the INLINEASM node to know about this input.
         unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+        ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
-                                                        TLI.getPointerTy()));
+                                                        getCurSDLoc(),
+                                                        MVT::i32));
         AsmNodeOperands.push_back(InOperandVal);
         break;
       }
@@ -6884,11 +6240,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         return;
       }
 
-      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(),
+      SDLoc dl = getCurSDLoc();
+
+      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl,
                                         Chain, &Flag, CS.getInstruction());
 
       OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
-                                               DAG, AsmNodeOperands);
+                                               dl, DAG, AsmNodeOperands);
       break;
     }
     case InlineAsm::isClobber: {
@@ -6896,7 +6254,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // allocator is aware that the physreg got clobbered.
       if (!OpInfo.AssignedRegs.Regs.empty())
         OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
-                                                 false, 0, DAG,
+                                                 false, 0, getCurSDLoc(), DAG,
                                                  AsmNodeOperands);
       break;
     }
@@ -7020,7 +6378,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
 std::pair<SDValue, SDValue>
 SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
                                        unsigned NumArgs, SDValue Callee,
-                                       bool UseVoidTy,
+                                       Type *ReturnTy,
                                        MachineBasicBlock *LandingPad,
                                        bool IsPatchPoint) {
   TargetLowering::ArgListTy Args;
@@ -7041,10 +6399,9 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
     Args.push_back(Entry);
   }
 
-  Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
-    .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs)
+    .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs)
     .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint);
 
   return lowerInvokable(CLI, LandingPad);
@@ -7068,15 +6425,15 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
 /// only available in a register, then the runtime would need to trap when
 /// execution reaches the StackMap in order to read the alloca's location.
 static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
-                                SmallVectorImpl<SDValue> &Ops,
+                                SDLoc DL, SmallVectorImpl<SDValue> &Ops,
                                 SelectionDAGBuilder &Builder) {
   for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) {
     SDValue OpVal = Builder.getValue(CS.getArgument(i));
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
       Ops.push_back(
-        Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+        Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
       Ops.push_back(
-        Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
+        Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
     } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
       const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
       Ops.push_back(
@@ -7098,7 +6455,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
 
   SDLoc DL = getCurSDLoc();
   Callee = getValue(CI.getCalledValue());
-  NullPtr = DAG.getIntPtrConstant(0, true);
+  NullPtr = DAG.getIntPtrConstant(0, DL, true);
 
   // The stackmap intrinsic only records the live variables (the arguemnts
   // passed to it) and emits NOPS (if requested). Unlike the patchpoint
@@ -7116,13 +6473,14 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
   // Add the <id> and <numBytes> constants.
   SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
   Ops.push_back(DAG.getTargetConstant(
-                  cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64));
+                  cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64));
   SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
   Ops.push_back(DAG.getTargetConstant(
-                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
+                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL,
+                  MVT::i32));
 
   // Push live variables for the stack map.
-  addStackMapLiveVars(&CI, 2, Ops, *this);
+  addStackMapLiveVars(&CI, 2, DL, Ops, *this);
 
   // We are not pushing any register mask info here on the operands list,
   // because the stackmap doesn't clobber anything.
@@ -7161,7 +6519,17 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
   CallingConv::ID CC = CS.getCallingConv();
   bool IsAnyRegCC = CC == CallingConv::AnyReg;
   bool HasDef = !CS->getType()->isVoidTy();
-  SDValue Callee = getValue(CS->getOperand(2)); // <target>
+  SDLoc dl = getCurSDLoc();
+  SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos));
+
+  // Handle immediate and symbolic callees.
+  if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
+    Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
+                                   /*isTarget=*/true);
+  else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee =  DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
+                                         SDLoc(SymbolicCallee),
+                                         SymbolicCallee->getValueType(0));
 
   // Get the real number of arguments participating in the call <numArgs>
   SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos));
@@ -7175,8 +6543,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
 
   // For AnyRegCC the arguments are lowered later on manually.
   unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
+  Type *ReturnTy =
+    IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
   std::pair<SDValue, SDValue> Result =
-    lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC,
+    lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
                       LandingPad, true);
 
   SDNode *CallEnd = Result.second.getNode();
@@ -7196,26 +6566,24 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
   // Add the <id> and <numBytes> constants.
   SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos));
   Ops.push_back(DAG.getTargetConstant(
-                  cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64));
+                  cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
   SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos));
   Ops.push_back(DAG.getTargetConstant(
-                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
+                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
+                  MVT::i32));
 
-  // Assume that the Callee is a constant address.
-  // FIXME: handle function symbols in the future.
-  Ops.push_back(
-    DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(),
-                          /*isTarget=*/true));
+  // Add the callee.
+  Ops.push_back(Callee);
 
   // Adjust <numArgs> to account for any arguments that have been passed on the
   // stack instead.
   // Call Node: Chain, Target, {Args}, RegMask, [Glue]
   unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
   NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
-  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32));
+  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
 
   // Add the calling convention
-  Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32));
+  Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
 
   // Add the arguments we omitted previously. The register allocator should
   // place these in any free register.
@@ -7225,11 +6593,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
 
   // Push the arguments from the call instruction up to the register mask.
   SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
-  for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i)
-    Ops.push_back(*i);
+  Ops.append(Call->op_begin() + 2, e);
 
   // Push live variables for the stack map.
-  addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this);
+  addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this);
 
   // Push the register mask info.
   if (HasGlue)
@@ -7262,7 +6629,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
 
   // Replace the target specific call node with a PATCHPOINT node.
   MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
-                                         getCurSDLoc(), NodeTys, Ops);
+                                         dl, NodeTys, Ops);
 
   // Update the NodeMap.
   if (HasDef) {
@@ -7529,7 +6896,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
 
     for (unsigned i = 0; i < NumValues; ++i) {
       SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
-                                    CLI.DAG.getConstant(Offsets[i], PtrVT));
+                                    CLI.DAG.getConstant(Offsets[i], CLI.DL,
+                                                        PtrVT));
       SDValue L = CLI.DAG.getLoad(
           RetTys[i], CLI.DL, CLI.Chain, Add,
           MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false,
@@ -7849,7 +7217,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
   assert(i == InVals.size() && "Argument register count mismatch!");
 
   // Finally, if the target has anything special to do, allow it to do so.
-  // FIXME: this should insert code into the DAG!
   EmitFunctionEntryCode();
 }
 
@@ -7866,8 +7233,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
 
   SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
 
-  // Check successor nodes' PHI nodes that expect a constant to be available
-  // from this block.
+  // Check PHI nodes in successors that expect a value to be available from this
+  // block.
   for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
     const BasicBlock *SuccBB = TI->getSuccessor(succ);
     if (!isa<PHINode>(SuccBB->begin())) continue;
@@ -7954,3 +7321,816 @@ AddSuccessorMBB(const BasicBlock *BB,
       SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely));
   return SuccMBB;
 }
+
+MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
+  MachineFunction::iterator I = MBB;
+  if (++I == FuncInfo.MF->end())
+    return nullptr;
+  return I;
+}
+
+/// During lowering new call nodes can be created (such as memset, etc.).
+/// Those will become new roots of the current DAG, but complications arise
+/// when they are tail calls. In such cases, the call lowering will update
+/// the root, but the builder still needs to know that a tail call has been
+/// lowered in order to avoid generating an additional return.
+void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
+  // If the node is null, we do have a tail call.
+  if (MaybeTC.getNode() != nullptr)
+    DAG.setRoot(MaybeTC);
+  else
+    HasTailCall = true;
+}
+
+bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
+                                  unsigned *TotalCases, unsigned First,
+                                  unsigned Last) {
+  assert(Last >= First);
+  assert(TotalCases[Last] >= TotalCases[First]);
+
+  APInt LowCase = Clusters[First].Low->getValue();
+  APInt HighCase = Clusters[Last].High->getValue();
+  assert(LowCase.getBitWidth() == HighCase.getBitWidth());
+
+  // FIXME: A range of consecutive cases has 100% density, but only requires one
+  // comparison to lower. We should discriminate against such consecutive ranges
+  // in jump tables.
+
+  uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100);
+  uint64_t Range = Diff + 1;
+
+  uint64_t NumCases =
+      TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
+
+  assert(NumCases < UINT64_MAX / 100);
+  assert(Range >= NumCases);
+
+  return NumCases * 100 >= Range * MinJumpTableDensity;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+  return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+         TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
+}
+
+bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
+                                         unsigned First, unsigned Last,
+                                         const SwitchInst *SI,
+                                         MachineBasicBlock *DefaultMBB,
+                                         CaseCluster &JTCluster) {
+  assert(First <= Last);
+
+  uint32_t Weight = 0;
+  unsigned NumCmps = 0;
+  std::vector<MachineBasicBlock*> Table;
+  DenseMap<MachineBasicBlock*, uint32_t> JTWeights;
+  for (unsigned I = First; I <= Last; ++I) {
+    assert(Clusters[I].Kind == CC_Range);
+    Weight += Clusters[I].Weight;
+    assert(Weight >= Clusters[I].Weight && "Weight overflow!");
+    APInt Low = Clusters[I].Low->getValue();
+    APInt High = Clusters[I].High->getValue();
+    NumCmps += (Low == High) ? 1 : 2;
+    if (I != First) {
+      // Fill the gap between this and the previous cluster.
+      APInt PreviousHigh = Clusters[I - 1].High->getValue();
+      assert(PreviousHigh.slt(Low));
+      uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
+      for (uint64_t J = 0; J < Gap; J++)
+        Table.push_back(DefaultMBB);
+    }
+    uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
+    for (uint64_t J = 0; J < ClusterSize; ++J)
+      Table.push_back(Clusters[I].MBB);
+    JTWeights[Clusters[I].MBB] += Clusters[I].Weight;
+  }
+
+  unsigned NumDests = JTWeights.size();
+  if (isSuitableForBitTests(NumDests, NumCmps,
+                            Clusters[First].Low->getValue(),
+                            Clusters[Last].High->getValue())) {
+    // Clusters[First..Last] should be lowered as bit tests instead.
+    return false;
+  }
+
+  // Create the MBB that will load from and jump through the table.
+  // Note: We create it here, but it's not inserted into the function yet.
+  MachineFunction *CurMF = FuncInfo.MF;
+  MachineBasicBlock *JumpTableMBB =
+      CurMF->CreateMachineBasicBlock(SI->getParent());
+
+  // Add successors. Note: use table order for determinism.
+  SmallPtrSet<MachineBasicBlock *, 8> Done;
+  for (MachineBasicBlock *Succ : Table) {
+    if (Done.count(Succ))
+      continue;
+    addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]);
+    Done.insert(Succ);
+  }
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
+                     ->createJumpTableIndex(Table);
+
+  // Set up the jump table info.
+  JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
+  JumpTableHeader JTH(Clusters[First].Low->getValue(),
+                      Clusters[Last].High->getValue(), SI->getCondition(),
+                      nullptr, false);
+  JTCases.push_back(JumpTableBlock(JTH, JT));
+
+  JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
+                                     JTCases.size() - 1, Weight);
+  return true;
+}
+
+void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
+                                         const SwitchInst *SI,
+                                         MachineBasicBlock *DefaultMBB) {
+#ifndef NDEBUG
+  // Clusters must be non-empty, sorted, and only contain Range clusters.
+  assert(!Clusters.empty());
+  for (CaseCluster &C : Clusters)
+    assert(C.Kind == CC_Range);
+  for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
+    assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!areJTsAllowed(TLI))
+    return;
+
+  const int64_t N = Clusters.size();
+  const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries();
+
+  // Split Clusters into minimum number of dense partitions. The algorithm uses
+  // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
+  // for the Case Statement'" (1994), but builds the MinPartitions array in
+  // reverse order to make it easier to reconstruct the partitions in ascending
+  // order. In the choice between two optimal partitionings, it picks the one
+  // which yields more jump tables.
+
+  // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+  SmallVector<unsigned, 8> MinPartitions(N);
+  // LastElement[i] is the last element of the partition starting at i.
+  SmallVector<unsigned, 8> LastElement(N);
+  // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1].
+  SmallVector<unsigned, 8> NumTables(N);
+  // TotalCases[i]: Total nbr of cases in Clusters[0..i].
+  SmallVector<unsigned, 8> TotalCases(N);
+
+  for (unsigned i = 0; i < N; ++i) {
+    APInt Hi = Clusters[i].High->getValue();
+    APInt Lo = Clusters[i].Low->getValue();
+    TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
+    if (i != 0)
+      TotalCases[i] += TotalCases[i - 1];
+  }
+
+  // Base case: There is only one way to partition Clusters[N-1].
+  MinPartitions[N - 1] = 1;
+  LastElement[N - 1] = N - 1;
+  assert(MinJumpTableSize > 1);
+  NumTables[N - 1] = 0;
+
+  // Note: loop indexes are signed to avoid underflow.
+  for (int64_t i = N - 2; i >= 0; i--) {
+    // Find optimal partitioning of Clusters[i..N-1].
+    // Baseline: Put Clusters[i] into a partition on its own.
+    MinPartitions[i] = MinPartitions[i + 1] + 1;
+    LastElement[i] = i;
+    NumTables[i] = NumTables[i + 1];
+
+    // Search for a solution that results in fewer partitions.
+    for (int64_t j = N - 1; j > i; j--) {
+      // Try building a partition from Clusters[i..j].
+      if (isDense(Clusters, &TotalCases[0], i, j)) {
+        unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+        bool IsTable = j - i + 1 >= MinJumpTableSize;
+        unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
+
+        // If this j leads to fewer partitions, or same number of partitions
+        // with more lookup tables, it is a better partitioning.
+        if (NumPartitions < MinPartitions[i] ||
+            (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) {
+          MinPartitions[i] = NumPartitions;
+          LastElement[i] = j;
+          NumTables[i] = Tables;
+        }
+      }
+    }
+  }
+
+  // Iterate over the partitions, replacing some with jump tables in-place.
+  unsigned DstIndex = 0;
+  for (unsigned First = 0, Last; First < N; First = Last + 1) {
+    Last = LastElement[First];
+    assert(Last >= First);
+    assert(DstIndex <= First);
+    unsigned NumClusters = Last - First + 1;
+
+    CaseCluster JTCluster;
+    if (NumClusters >= MinJumpTableSize &&
+        buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
+      Clusters[DstIndex++] = JTCluster;
+    } else {
+      for (unsigned I = First; I <= Last; ++I)
+        std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
+    }
+  }
+  Clusters.resize(DstIndex);
+}
+
+bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) {
+  // FIXME: Using the pointer type doesn't seem ideal.
+  uint64_t BW = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits();
+  uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
+  return Range <= BW;
+}
+
+bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests,
+                                                unsigned NumCmps,
+                                                const APInt &Low,
+                                                const APInt &High) {
+  // FIXME: I don't think NumCmps is the correct metric: a single case and a
+  // range of cases both require only one branch to lower. Just looking at the
+  // number of clusters and destinations should be enough to decide whether to
+  // build bit tests.
+
+  // To lower a range with bit tests, the range must fit the bitwidth of a
+  // machine word.
+  if (!rangeFitsInWord(Low, High))
+    return false;
+
+  // Decide whether it's profitable to lower this range with bit tests. Each
+  // destination requires a bit test and branch, and there is an overall range
+  // check branch. For a small number of clusters, separate comparisons might be
+  // cheaper, and for many destinations, splitting the range might be better.
+  return (NumDests == 1 && NumCmps >= 3) ||
+         (NumDests == 2 && NumCmps >= 5) ||
+         (NumDests == 3 && NumCmps >= 6);
+}
+
+bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
+                                        unsigned First, unsigned Last,
+                                        const SwitchInst *SI,
+                                        CaseCluster &BTCluster) {
+  assert(First <= Last);
+  if (First == Last)
+    return false;
+
+  BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+  unsigned NumCmps = 0;
+  for (int64_t I = First; I <= Last; ++I) {
+    assert(Clusters[I].Kind == CC_Range);
+    Dests.set(Clusters[I].MBB->getNumber());
+    NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
+  }
+  unsigned NumDests = Dests.count();
+
+  APInt Low = Clusters[First].Low->getValue();
+  APInt High = Clusters[Last].High->getValue();
+  assert(Low.slt(High));
+
+  if (!isSuitableForBitTests(NumDests, NumCmps, Low, High))
+    return false;
+
+  APInt LowBound;
+  APInt CmpRange;
+
+  const int BitWidth =
+      DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits();
+  assert((High - Low + 1).sle(BitWidth) && "Case range must fit in bit mask!");
+
+  if (Low.isNonNegative() && High.slt(BitWidth)) {
+    // Optimize the case where all the case values fit in a
+    // word without having to subtract minValue. In this case,
+    // we can optimize away the subtraction.
+    LowBound = APInt::getNullValue(Low.getBitWidth());
+    CmpRange = High;
+  } else {
+    LowBound = Low;
+    CmpRange = High - Low;
+  }
+
+  CaseBitsVector CBV;
+  uint32_t TotalWeight = 0;
+  for (unsigned i = First; i <= Last; ++i) {
+    // Find the CaseBits for this destination.
+    unsigned j;
+    for (j = 0; j < CBV.size(); ++j)
+      if (CBV[j].BB == Clusters[i].MBB)
+        break;
+    if (j == CBV.size())
+      CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0));
+    CaseBits *CB = &CBV[j];
+
+    // Update Mask, Bits and ExtraWeight.
+    uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
+    uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
+    for (uint64_t j = Lo; j <= Hi; ++j) {
+      CB->Mask |= 1ULL << j;
+      CB->Bits++;
+    }
+    CB->ExtraWeight += Clusters[i].Weight;
+    TotalWeight += Clusters[i].Weight;
+    assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!");
+  }
+
+  BitTestInfo BTI;
+  std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
+    // Sort by weight first, number of bits second.
+    if (a.ExtraWeight != b.ExtraWeight)
+      return a.ExtraWeight > b.ExtraWeight;
+    return a.Bits > b.Bits;
+  });
+
+  for (auto &CB : CBV) {
+    MachineBasicBlock *BitTestBB =
+        FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
+    BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight));
+  }
+  BitTestCases.push_back(BitTestBlock(LowBound, CmpRange, SI->getCondition(),
+                                      -1U, MVT::Other, false, nullptr,
+                                      nullptr, std::move(BTI)));
+
+  BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
+                                    BitTestCases.size() - 1, TotalWeight);
+  return true;
+}
+
+void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
+                                              const SwitchInst *SI) {
+// Partition Clusters into as few subsets as possible, where each subset has a
+// range that fits in a machine word and has <= 3 unique destinations.
+
+#ifndef NDEBUG
+  // Clusters must be sorted and contain Range or JumpTable clusters.
+  assert(!Clusters.empty());
+  assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
+  for (const CaseCluster &C : Clusters)
+    assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
+  for (unsigned i = 1; i < Clusters.size(); ++i)
+    assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+  // If target does not have legal shift left, do not emit bit tests at all.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT PTy = TLI.getPointerTy();
+  if (!TLI.isOperationLegal(ISD::SHL, PTy))
+    return;
+
+  int BitWidth = PTy.getSizeInBits();
+  const int64_t N = Clusters.size();
+
+  // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+  SmallVector<unsigned, 8> MinPartitions(N);
+  // LastElement[i] is the last element of the partition starting at i.
+  SmallVector<unsigned, 8> LastElement(N);
+
+  // FIXME: This might not be the best algorithm for finding bit test clusters.
+
+  // Base case: There is only one way to partition Clusters[N-1].
+  MinPartitions[N - 1] = 1;
+  LastElement[N - 1] = N - 1;
+
+  // Note: loop indexes are signed to avoid underflow.
+  for (int64_t i = N - 2; i >= 0; --i) {
+    // Find optimal partitioning of Clusters[i..N-1].
+    // Baseline: Put Clusters[i] into a partition on its own.
+    MinPartitions[i] = MinPartitions[i + 1] + 1;
+    LastElement[i] = i;
+
+    // Search for a solution that results in fewer partitions.
+    // Note: the search is limited by BitWidth, reducing time complexity.
+    for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
+      // Try building a partition from Clusters[i..j].
+
+      // Check the range.
+      if (!rangeFitsInWord(Clusters[i].Low->getValue(),
+                           Clusters[j].High->getValue()))
+        continue;
+
+      // Check nbr of destinations and cluster types.
+      // FIXME: This works, but doesn't seem very efficient.
+      bool RangesOnly = true;
+      BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+      for (int64_t k = i; k <= j; k++) {
+        if (Clusters[k].Kind != CC_Range) {
+          RangesOnly = false;
+          break;
+        }
+        Dests.set(Clusters[k].MBB->getNumber());
+      }
+      if (!RangesOnly || Dests.count() > 3)
+        break;
+
+      // Check if it's a better partition.
+      unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+      if (NumPartitions < MinPartitions[i]) {
+        // Found a better partition.
+        MinPartitions[i] = NumPartitions;
+        LastElement[i] = j;
+      }
+    }
+  }
+
+  // Iterate over the partitions, replacing with bit-test clusters in-place.
+  unsigned DstIndex = 0;
+  for (unsigned First = 0, Last; First < N; First = Last + 1) {
+    Last = LastElement[First];
+    assert(First <= Last);
+    assert(DstIndex <= First);
+
+    CaseCluster BitTestCluster;
+    if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
+      Clusters[DstIndex++] = BitTestCluster;
+    } else {
+      for (unsigned I = First; I <= Last; ++I)
+        std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
+    }
+  }
+  Clusters.resize(DstIndex);
+}
+
+void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
+                                        MachineBasicBlock *SwitchMBB,
+                                        MachineBasicBlock *DefaultMBB) {
+  MachineFunction *CurMF = FuncInfo.MF;
+  MachineBasicBlock *NextMBB = nullptr;
+  MachineFunction::iterator BBI = W.MBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextMBB = BBI;
+
+  unsigned Size = W.LastCluster - W.FirstCluster + 1;
+
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+
+  if (Size == 2 && W.MBB == SwitchMBB) {
+    // If any two of the cases has the same destination, and if one value
+    // is the same as the other, but has one bit unset that the other has set,
+    // use bit manipulation to do two compares at once.  For example:
+    // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+    // TODO: This could be extended to merge any 2 cases in switches with 3
+    // cases.
+    // TODO: Handle cases where W.CaseBB != SwitchBB.
+    CaseCluster &Small = *W.FirstCluster;
+    CaseCluster &Big = *W.LastCluster;
+
+    if (Small.Low == Small.High && Big.Low == Big.High &&
+        Small.MBB == Big.MBB) {
+      const APInt &SmallValue = Small.Low->getValue();
+      const APInt &BigValue = Big.Low->getValue();
+
+      // Check that there is only one bit different.
+      if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+          (SmallValue | BigValue) == BigValue) {
+        // Isolate the common bit.
+        APInt CommonBit = BigValue & ~SmallValue;
+        assert((SmallValue | CommonBit) == BigValue &&
+               CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+        SDValue CondLHS = getValue(Cond);
+        EVT VT = CondLHS.getValueType();
+        SDLoc DL = getCurSDLoc();
+
+        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+                                 DAG.getConstant(CommonBit, DL, VT));
+        SDValue Cond = DAG.getSetCC(DL, MVT::i1, Or,
+                                    DAG.getConstant(BigValue, DL, VT),
+                                    ISD::SETEQ);
+
+        // Update successor info.
+        // Both Small and Big will jump to Small.BB, so we sum up the weights.
+        addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight);
+        addSuccessorWithWeight(
+            SwitchMBB, DefaultMBB,
+            // The default destination is the first successor in IR.
+            BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0)
+                : 0);
+
+        // Insert the true branch.
+        SDValue BrCond =
+            DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
+                        DAG.getBasicBlock(Small.MBB));
+        // Insert the false branch.
+        BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+                             DAG.getBasicBlock(DefaultMBB));
+
+        DAG.setRoot(BrCond);
+        return;
+      }
+    }
+  }
+
+  if (TM.getOptLevel() != CodeGenOpt::None) {
+    // Order cases by weight so the most likely case will be checked first.
+    std::sort(W.FirstCluster, W.LastCluster + 1,
+              [](const CaseCluster &a, const CaseCluster &b) {
+      return a.Weight > b.Weight;
+    });
+
+    // Rearrange the case blocks so that the last one falls through if possible
+    // without without changing the order of weights.
+    for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
+      --I;
+      if (I->Weight > W.LastCluster->Weight)
+        break;
+      if (I->Kind == CC_Range && I->MBB == NextMBB) {
+        std::swap(*I, *W.LastCluster);
+        break;
+      }
+    }
+  }
+
+  // Compute total weight.
+  uint32_t UnhandledWeights = 0;
+  for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) {
+    UnhandledWeights += I->Weight;
+    assert(UnhandledWeights >= I->Weight && "Weight overflow!");
+  }
+
+  MachineBasicBlock *CurMBB = W.MBB;
+  for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
+    MachineBasicBlock *Fallthrough;
+    if (I == W.LastCluster) {
+      // For the last cluster, fall through to the default destination.
+      Fallthrough = DefaultMBB;
+    } else {
+      Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
+      CurMF->insert(BBI, Fallthrough);
+      // Put Cond in a virtual register to make it available from the new blocks.
+      ExportFromCurrentBlock(Cond);
+    }
+
+    switch (I->Kind) {
+      case CC_JumpTable: {
+        // FIXME: Optimize away range check based on pivot comparisons.
+        JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first;
+        JumpTable *JT = &JTCases[I->JTCasesIndex].second;
+
+        // The jump block hasn't been inserted yet; insert it here.
+        MachineBasicBlock *JumpMBB = JT->MBB;
+        CurMF->insert(BBI, JumpMBB);
+        addSuccessorWithWeight(CurMBB, Fallthrough);
+        addSuccessorWithWeight(CurMBB, JumpMBB);
+
+        // The jump table header will be inserted in our current block, do the
+        // range check, and fall through to our fallthrough block.
+        JTH->HeaderBB = CurMBB;
+        JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
+
+        // If we're in the right place, emit the jump table header right now.
+        if (CurMBB == SwitchMBB) {
+          visitJumpTableHeader(*JT, *JTH, SwitchMBB);
+          JTH->Emitted = true;
+        }
+        break;
+      }
+      case CC_BitTests: {
+        // FIXME: Optimize away range check based on pivot comparisons.
+        BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex];
+
+        // The bit test blocks haven't been inserted yet; insert them here.
+        for (BitTestCase &BTC : BTB->Cases)
+          CurMF->insert(BBI, BTC.ThisBB);
+
+        // Fill in fields of the BitTestBlock.
+        BTB->Parent = CurMBB;
+        BTB->Default = Fallthrough;
+
+        // If we're in the right place, emit the bit test header header right now.
+        if (CurMBB ==SwitchMBB) {
+          visitBitTestHeader(*BTB, SwitchMBB);
+          BTB->Emitted = true;
+        }
+        break;
+      }
+      case CC_Range: {
+        const Value *RHS, *LHS, *MHS;
+        ISD::CondCode CC;
+        if (I->Low == I->High) {
+          // Check Cond == I->Low.
+          CC = ISD::SETEQ;
+          LHS = Cond;
+          RHS=I->Low;
+          MHS = nullptr;
+        } else {
+          // Check I->Low <= Cond <= I->High.
+          CC = ISD::SETLE;
+          LHS = I->Low;
+          MHS = Cond;
+          RHS = I->High;
+        }
+
+        // The false weight is the sum of all unhandled cases.
+        UnhandledWeights -= I->Weight;
+        CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight,
+                     UnhandledWeights);
+
+        if (CurMBB == SwitchMBB)
+          visitSwitchCase(CB, SwitchMBB);
+        else
+          SwitchCases.push_back(CB);
+
+        break;
+      }
+    }
+    CurMBB = Fallthrough;
+  }
+}
+
+void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
+                                        const SwitchWorkListItem &W,
+                                        Value *Cond,
+                                        MachineBasicBlock *SwitchMBB) {
+  assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
+         "Clusters not sorted?");
+
+  assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
+
+  // Balance the tree based on branch weights to create a near-optimal (in terms
+  // of search time given key frequency) binary search tree. See e.g. Kurt
+  // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
+  CaseClusterIt LastLeft = W.FirstCluster;
+  CaseClusterIt FirstRight = W.LastCluster;
+  uint32_t LeftWeight = LastLeft->Weight;
+  uint32_t RightWeight = FirstRight->Weight;
+
+  // Move LastLeft and FirstRight towards each other from opposite directions to
+  // find a partitioning of the clusters which balances the weight on both
+  // sides. If LeftWeight and RightWeight are equal, alternate which side is
+  // taken to ensure 0-weight nodes are distributed evenly.
+  unsigned I = 0;
+  while (LastLeft + 1 < FirstRight) {
+    if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1)))
+      LeftWeight += (++LastLeft)->Weight;
+    else
+      RightWeight += (--FirstRight)->Weight;
+    I++;
+  }
+  assert(LastLeft + 1 == FirstRight);
+  assert(LastLeft >= W.FirstCluster);
+  assert(FirstRight <= W.LastCluster);
+
+  // Use the first element on the right as pivot since we will make less-than
+  // comparisons against it.
+  CaseClusterIt PivotCluster = FirstRight;
+  assert(PivotCluster > W.FirstCluster);
+  assert(PivotCluster <= W.LastCluster);
+
+  CaseClusterIt FirstLeft = W.FirstCluster;
+  CaseClusterIt LastRight = W.LastCluster;
+
+  const ConstantInt *Pivot = PivotCluster->Low;
+
+  // New blocks will be inserted immediately after the current one.
+  MachineFunction::iterator BBI = W.MBB;
+  ++BBI;
+
+  // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
+  // we can branch to its destination directly if it's squeezed exactly in
+  // between the known lower bound and Pivot - 1.
+  MachineBasicBlock *LeftMBB;
+  if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
+      FirstLeft->Low == W.GE &&
+      (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
+    LeftMBB = FirstLeft->MBB;
+  } else {
+    LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
+    FuncInfo.MF->insert(BBI, LeftMBB);
+    WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot});
+    // Put Cond in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(Cond);
+  }
+
+  // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
+  // single cluster, RHS.Low == Pivot, and we can branch to its destination
+  // directly if RHS.High equals the current upper bound.
+  MachineBasicBlock *RightMBB;
+  if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
+      W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
+    RightMBB = FirstRight->MBB;
+  } else {
+    RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
+    FuncInfo.MF->insert(BBI, RightMBB);
+    WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT});
+    // Put Cond in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(Cond);
+  }
+
+  // Create the CaseBlock record that will be used to lower the branch.
+  CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
+               LeftWeight, RightWeight);
+
+  if (W.MBB == SwitchMBB)
+    visitSwitchCase(CB, SwitchMBB);
+  else
+    SwitchCases.push_back(CB);
+}
+
+void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
+  // Extract cases from the switch.
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  CaseClusterVector Clusters;
+  Clusters.reserve(SI.getNumCases());
+  for (auto I : SI.cases()) {
+    MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
+    const ConstantInt *CaseVal = I.getCaseValue();
+    uint32_t Weight =
+        BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0;
+    Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight));
+  }
+
+  MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+  // Cluster adjacent cases with the same destination. We do this at all
+  // optimization levels because it's cheap to do and will make codegen faster
+  // if there are many clusters.
+  sortAndRangeify(Clusters);
+
+  if (TM.getOptLevel() != CodeGenOpt::None) {
+    // Replace an unreachable default with the most popular destination.
+    // FIXME: Exploit unreachable default more aggressively.
+    bool UnreachableDefault =
+        isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg());
+    if (UnreachableDefault && !Clusters.empty()) {
+      DenseMap<const BasicBlock *, unsigned> Popularity;
+      unsigned MaxPop = 0;
+      const BasicBlock *MaxBB = nullptr;
+      for (auto I : SI.cases()) {
+        const BasicBlock *BB = I.getCaseSuccessor();
+        if (++Popularity[BB] > MaxPop) {
+          MaxPop = Popularity[BB];
+          MaxBB = BB;
+        }
+      }
+      // Set new default.
+      assert(MaxPop > 0 && MaxBB);
+      DefaultMBB = FuncInfo.MBBMap[MaxBB];
+
+      // Remove cases that were pointing to the destination that is now the
+      // default.
+      CaseClusterVector New;
+      New.reserve(Clusters.size());
+      for (CaseCluster &CC : Clusters) {
+        if (CC.MBB != DefaultMBB)
+          New.push_back(CC);
+      }
+      Clusters = std::move(New);
+    }
+  }
+
+  // If there is only the default destination, jump there directly.
+  MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+  if (Clusters.empty()) {
+    SwitchMBB->addSuccessor(DefaultMBB);
+    if (DefaultMBB != NextBlock(SwitchMBB)) {
+      DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+                              getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
+    }
+    return;
+  }
+
+  if (TM.getOptLevel() != CodeGenOpt::None) {
+    findJumpTables(Clusters, &SI, DefaultMBB);
+    findBitTestClusters(Clusters, &SI);
+  }
+
+
+  DEBUG({
+    dbgs() << "Case clusters: ";
+    for (const CaseCluster &C : Clusters) {
+      if (C.Kind == CC_JumpTable) dbgs() << "JT:";
+      if (C.Kind == CC_BitTests) dbgs() << "BT:";
+
+      C.Low->getValue().print(dbgs(), true);
+      if (C.Low != C.High) {
+        dbgs() << '-';
+        C.High->getValue().print(dbgs(), true);
+      }
+      dbgs() << ' ';
+    }
+    dbgs() << '\n';
+  });
+
+  assert(!Clusters.empty());
+  SwitchWorkList WorkList;
+  CaseClusterIt First = Clusters.begin();
+  CaseClusterIt Last = Clusters.end() - 1;
+  WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr});
+
+  while (!WorkList.empty()) {
+    SwitchWorkListItem W = WorkList.back();
+    WorkList.pop_back();
+    unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
+
+    if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) {
+      // For optimized builds, lower large range as a balanced binary tree.
+      splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
+      continue;
+    }
+
+    lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 9070091..f0c03af 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -17,9 +17,11 @@
 #include "StatepointLowering.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/Statepoint.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetLowering.h"
@@ -133,26 +135,65 @@ private:
   /// SDNodes we create.
   unsigned SDNodeOrder;
 
-  /// Case - A struct to record the Value for a switch case, and the
-  /// case's target basic block.
-  struct Case {
-    const Constant *Low;
-    const Constant *High;
-    MachineBasicBlock* BB;
-    uint32_t ExtraWeight;
+  enum CaseClusterKind {
+    /// A cluster of adjacent case labels with the same destination, or just one
+    /// case.
+    CC_Range,
+    /// A cluster of cases suitable for jump table lowering.
+    CC_JumpTable,
+    /// A cluster of cases suitable for bit test lowering.
+    CC_BitTests
+  };
 
-    Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { }
-    Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
-         uint32_t extraweight) : Low(low), High(high), BB(bb),
-         ExtraWeight(extraweight) { }
+  /// A cluster of case labels.
+  struct CaseCluster {
+    CaseClusterKind Kind;
+    const ConstantInt *Low, *High;
+    union {
+      MachineBasicBlock *MBB;
+      unsigned JTCasesIndex;
+      unsigned BTCasesIndex;
+    };
+    uint32_t Weight;
+
+    static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
+                             MachineBasicBlock *MBB, uint32_t Weight) {
+      CaseCluster C;
+      C.Kind = CC_Range;
+      C.Low = Low;
+      C.High = High;
+      C.MBB = MBB;
+      C.Weight = Weight;
+      return C;
+    }
 
-    APInt size() const {
-      const APInt &rHigh = cast<ConstantInt>(High)->getValue();
-      const APInt &rLow  = cast<ConstantInt>(Low)->getValue();
-      return (rHigh - rLow + 1ULL);
+    static CaseCluster jumpTable(const ConstantInt *Low,
+                                 const ConstantInt *High, unsigned JTCasesIndex,
+                                 uint32_t Weight) {
+      CaseCluster C;
+      C.Kind = CC_JumpTable;
+      C.Low = Low;
+      C.High = High;
+      C.JTCasesIndex = JTCasesIndex;
+      C.Weight = Weight;
+      return C;
+    }
+
+    static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
+                                unsigned BTCasesIndex, uint32_t Weight) {
+      CaseCluster C;
+      C.Kind = CC_BitTests;
+      C.Low = Low;
+      C.High = High;
+      C.BTCasesIndex = BTCasesIndex;
+      C.Weight = Weight;
+      return C;
     }
   };
 
+  typedef std::vector<CaseCluster> CaseClusterVector;
+  typedef CaseClusterVector::iterator CaseClusterIt;
+
   struct CaseBits {
     uint64_t Mask;
     MachineBasicBlock* BB;
@@ -162,51 +203,14 @@ private:
     CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
              uint32_t Weight):
       Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
-  };
-
-  typedef std::vector<Case>           CaseVector;
-  typedef std::vector<CaseBits>       CaseBitsVector;
-  typedef CaseVector::iterator        CaseItr;
-  typedef std::pair<CaseItr, CaseItr> CaseRange;
-
-  /// CaseRec - A struct with ctor used in lowering switches to a binary tree
-  /// of conditional branches.
-  struct CaseRec {
-    CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge,
-            CaseRange r) :
-    CaseBB(bb), LT(lt), GE(ge), Range(r) {}
-
-    /// CaseBB - The MBB in which to emit the compare and branch
-    MachineBasicBlock *CaseBB;
-    /// LT, GE - If nonzero, we know the current case value must be less-than or
-    /// greater-than-or-equal-to these Constants.
-    const Constant *LT;
-    const Constant *GE;
-    /// Range - A pair of iterators representing the range of case values to be
-    /// processed at this point in the binary search tree.
-    CaseRange Range;
-  };
 
-  typedef std::vector<CaseRec> CaseRecVector;
-
-  /// The comparison function for sorting the switch case values in the vector.
-  /// WARNING: Case ranges should be disjoint!
-  struct CaseCmp {
-    bool operator()(const Case &C1, const Case &C2) {
-      assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
-      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
-      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
-      return CI1->getValue().slt(CI2->getValue());
-    }
+    CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {}
   };
 
-  struct CaseBitsCmp {
-    bool operator()(const CaseBits &C1, const CaseBits &C2) {
-      return C1.Bits > C2.Bits;
-    }
-  };
+  typedef std::vector<CaseBits> CaseBitsVector;
 
-  void Clusterify(CaseVector &Cases, const SwitchInst &SI);
+  /// Sort Clusters and merge adjacent cases.
+  void sortAndRangeify(CaseClusterVector &Clusters);
 
   /// CaseBlock - This structure is used to communicate between
   /// SelectionDAGBuilder and SDISel for the code generation of additional basic
@@ -296,6 +300,58 @@ private:
     BitTestInfo Cases;
   };
 
+  /// Minimum jump table density, in percent.
+  enum { MinJumpTableDensity = 40 };
+
+  /// Check whether a range of clusters is dense enough for a jump table.
+  bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases,
+               unsigned First, unsigned Last);
+
+  /// Build a jump table cluster from Clusters[First..Last]. Returns false if it
+  /// decides it's not a good idea.
+  bool buildJumpTable(CaseClusterVector &Clusters, unsigned First,
+                      unsigned Last, const SwitchInst *SI,
+                      MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
+
+  /// Find clusters of cases suitable for jump table lowering.
+  void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
+                      MachineBasicBlock *DefaultMBB);
+
+  /// Check whether the range [Low,High] fits in a machine word.
+  bool rangeFitsInWord(const APInt &Low, const APInt &High);
+
+  /// Check whether these clusters are suitable for lowering with bit tests based
+  /// on the number of destinations, comparison metric, and range.
+  bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
+                             const APInt &Low, const APInt &High);
+
+  /// Build a bit test cluster from Clusters[First..Last]. Returns false if it
+  /// decides it's not a good idea.
+  bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
+                     const SwitchInst *SI, CaseCluster &BTCluster);
+
+  /// Find clusters of cases suitable for bit test lowering.
+  void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI);
+
+  struct SwitchWorkListItem {
+    MachineBasicBlock *MBB;
+    CaseClusterIt FirstCluster;
+    CaseClusterIt LastCluster;
+    const ConstantInt *GE;
+    const ConstantInt *LT;
+  };
+  typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;
+
+  /// Emit comparison and split W into two subtrees.
+  void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W,
+                     Value *Cond, MachineBasicBlock *SwitchMBB);
+
+  /// Lower W.
+  void lowerWorkItem(SwitchWorkListItem W, Value *Cond,
+                     MachineBasicBlock *SwitchMBB,
+                     MachineBasicBlock *DefaultMBB);
+
+
   /// A class which encapsulates all of the information needed to generate a
   /// stack protector check and signals to isel via its state being initialized
   /// that a stack protector needs to be generated.
@@ -405,7 +461,6 @@ private:
     StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr),
                                  FailureMBB(nullptr), Guard(nullptr),
                                  GuardReg(0) { }
-    ~StackProtectorDescriptor() { }
 
     /// Returns true if all fields of the stack protector descriptor are
     /// initialized implying that we should/are ready to emit a stack protector.
@@ -605,10 +660,16 @@ public:
 
   void visit(unsigned Opcode, const User &I);
 
+  /// getCopyFromRegs - If there was virtual register allocated for the value V
+  /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+  SDValue getCopyFromRegs(const Value *V, Type *Ty);
+
   // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
   // generate the debug data structures now that we've seen its definition.
   void resolveDanglingDebugInfo(const Value *V, SDValue Val);
   SDValue getValue(const Value *V);
+  bool findValue(const Value *V) const;
+
   SDValue getNonRegisterValue(const Value *V);
   SDValue getValueImpl(const Value *V);
 
@@ -618,13 +679,6 @@ public:
     N = NewN;
   }
 
-  void removeValue(const Value *V) {
-    // This is to support hack in lowerCallFromStatepoint
-    // Should be removed when hack is resolved
-    if (NodeMap.count(V))
-      NodeMap.erase(V);
-  }
-
   void setUnusedArgValue(const Value *V, SDValue NewN) {
     SDValue &N = UnusedArgNodeMap[V];
     assert(!N.getNode() && "Already set a value for this node!");
@@ -652,7 +706,7 @@ public:
           unsigned ArgIdx,
           unsigned NumArgs,
           SDValue Callee,
-          bool UseVoidTy = false,
+          Type *ReturnTy,
           MachineBasicBlock *LandingPad = nullptr,
           bool IsPatchPoint = false);
 
@@ -660,6 +714,10 @@ public:
   /// references that need to refer to the last resulting block.
   void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
 
+  // This function is responsible for the whole statepoint lowering process.
+  // It uniformly handles invoke and call statepoints.
+  void LowerStatepoint(ImmutableStatepoint Statepoint,
+                       MachineBasicBlock *LandingPad = nullptr);
 private:
   std::pair<SDValue, SDValue> lowerInvokable(
           TargetLowering::CallLoweringInfo &CLI,
@@ -672,27 +730,6 @@ private:
   void visitIndirectBr(const IndirectBrInst &I);
   void visitUnreachable(const UnreachableInst &I);
 
-  // Helpers for visitSwitch
-  bool handleSmallSwitchRange(CaseRec& CR,
-                              CaseRecVector& WorkList,
-                              const Value* SV,
-                              MachineBasicBlock* Default,
-                              MachineBasicBlock *SwitchBB);
-  bool handleJTSwitchCase(CaseRec& CR,
-                          CaseRecVector& WorkList,
-                          const Value* SV,
-                          MachineBasicBlock* Default,
-                          MachineBasicBlock *SwitchBB);
-  bool handleBTSplitSwitchCase(CaseRec& CR,
-                               CaseRecVector& WorkList,
-                               const Value* SV,
-                               MachineBasicBlock *SwitchBB);
-  bool handleBitTestsSwitchCase(CaseRec& CR,
-                                CaseRecVector& WorkList,
-                                const Value* SV,
-                                MachineBasicBlock* Default,
-                                MachineBasicBlock *SwitchBB);
-
   uint32_t getEdgeWeight(const MachineBasicBlock *Src,
                          const MachineBasicBlock *Dst) const;
   void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
@@ -713,6 +750,8 @@ public:
   void visitJumpTable(JumpTable &JT);
   void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
                             MachineBasicBlock *SwitchBB);
+  unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV,
+                                   MachineBasicBlock *LPadMBB);
 
 private:
   // These all get lowered before this pass.
@@ -772,6 +811,8 @@ private:
   void visitStore(const StoreInst &I);
   void visitMaskedLoad(const CallInst &I);
   void visitMaskedStore(const CallInst &I);
+  void visitMaskedGather(const CallInst &I);
+  void visitMaskedScatter(const CallInst &I);
   void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
   void visitAtomicRMW(const AtomicRMWInst &I);
   void visitFence(const FenceInst &I);
@@ -820,9 +861,91 @@ private:
   /// EmitFuncArgumentDbgValue - If V is an function argument then create
   /// corresponding DBG_VALUE machine instruction for it now. At the end of
   /// instruction selection, they will be inserted to the entry BB.
-  bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr,
+  bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable,
+                                DIExpression *Expr, DILocation *DL,
                                 int64_t Offset, bool IsIndirect,
                                 const SDValue &N);
+
+  /// Return the next block after MBB, or nullptr if there is none.
+  MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
+
+  /// Update the DAG and DAG builder with the relevant information after
+  /// a new root node has been created which could be a tail call.
+  void updateDAGForMaybeTailCall(SDValue MaybeTC);
+};
+
+/// RegsForValue - This struct represents the registers (physical or virtual)
+/// that a particular set of values is assigned, and the type information about
+/// the value. The most common situation is to represent one value at a time,
+/// but struct or array values are handled element-wise as multiple values.  The
+/// splitting of aggregates is performed recursively, so that we never have
+/// aggregate-typed registers. The values at this point do not necessarily have
+/// legal types, so each value may require one or more registers of some legal
+/// type.
+///
+struct RegsForValue {
+  /// ValueVTs - The value types of the values, which may not be legal, and
+  /// may need be promoted or synthesized from one or more registers.
+  ///
+  SmallVector<EVT, 4> ValueVTs;
+
+  /// RegVTs - The value types of the registers. This is the same size as
+  /// ValueVTs and it records, for each value, what the type of the assigned
+  /// register or registers are. (Individual values are never synthesized
+  /// from more than one type of register.)
+  ///
+  /// With virtual registers, the contents of RegVTs is redundant with TLI's
+  /// getRegisterType member function, however when with physical registers
+  /// it is necessary to have a separate record of the types.
+  ///
+  SmallVector<MVT, 4> RegVTs;
+
+  /// Regs - This list holds the registers assigned to the values.
+  /// Each legal or promoted value requires one register, and each
+  /// expanded value requires multiple registers.
+  ///
+  SmallVector<unsigned, 4> Regs;
+
+  RegsForValue();
+
+  RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt);
+
+  RegsForValue(LLVMContext &Context, const TargetLowering &tli, unsigned Reg,
+               Type *Ty);
+
+  /// append - Add the specified values to this one.
+  void append(const RegsForValue &RHS) {
+    ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+    RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+    Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+  }
+
+  /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+  /// this value and returns the result as a ValueVTs value.  This uses
+  /// Chain/Flag as the input and updates them for the output Chain/Flag.
+  /// If the Flag pointer is NULL, no flag is used.
+  SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
+                          SDLoc dl,
+                          SDValue &Chain, SDValue *Flag,
+                          const Value *V = nullptr) const;
+
+  /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the specified
+  /// value into the registers specified by this object.  This uses Chain/Flag
+  /// as the input and updates them for the output Chain/Flag.  If the Flag
+  /// pointer is nullptr, no flag is used.  If V is not nullptr, then it is used
+  /// in printing better diagnostic messages on error.
+  void
+  getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain,
+                SDValue *Flag, const Value *V = nullptr,
+                ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
+
+  /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+  /// operand list.  This adds the code marker, matching input operand index
+  /// (if applicable), and includes the number of values added into it.
+  void AddInlineAsmOperands(unsigned Kind,
+                            bool HasMatching, unsigned MatchingIdx, SDLoc dl,
+                            SelectionDAG &DAG,
+                            std::vector<SDValue> &Ops) const;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index e8577d8..96ee899 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -95,6 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::GLOBAL_OFFSET_TABLE:        return "GLOBAL_OFFSET_TABLE";
   case ISD::RETURNADDR:                 return "RETURNADDR";
   case ISD::FRAMEADDR:                  return "FRAMEADDR";
+  case ISD::FRAME_ALLOC_RECOVER:        return "FRAME_ALLOC_RECOVER";
   case ISD::READ_REGISTER:              return "READ_REGISTER";
   case ISD::WRITE_REGISTER:             return "WRITE_REGISTER";
   case ISD::FRAME_TO_ARGS_OFFSET:       return "FRAME_TO_ARGS_OFFSET";
@@ -187,10 +188,15 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FMUL:                       return "fmul";
   case ISD::FDIV:                       return "fdiv";
   case ISD::FMA:                        return "fma";
+  case ISD::FMAD:                       return "fmad";
   case ISD::FREM:                       return "frem";
   case ISD::FCOPYSIGN:                  return "fcopysign";
   case ISD::FGETSIGN:                   return "fgetsign";
   case ISD::FPOW:                       return "fpow";
+  case ISD::SMIN:                       return "smin";
+  case ISD::SMAX:                       return "smax";
+  case ISD::UMIN:                       return "umin";
+  case ISD::UMAX:                       return "umax";
 
   case ISD::FPOWI:                      return "fpowi";
   case ISD::SETCC:                      return "setcc";
@@ -271,6 +277,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::STORE:                      return "store";
   case ISD::MLOAD:                      return "masked_load";
   case ISD::MSTORE:                     return "masked_store";
+  case ISD::MGATHER:                    return "masked_gather";
+  case ISD::MSCATTER:                   return "masked_scatter";
   case ISD::VAARG:                      return "vaarg";
   case ISD::VACOPY:                     return "vacopy";
   case ISD::VAEND:                      return "vaend";
@@ -284,6 +292,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::DEBUGTRAP:                  return "debugtrap";
   case ISD::LIFETIME_START:             return "lifetime.start";
   case ISD::LIFETIME_END:               return "lifetime.end";
+  case ISD::GC_TRANSITION_START:        return "gc_transition.start";
+  case ISD::GC_TRANSITION_END:          return "gc_transition.end";
 
   // Bit manipulation
   case ISD::BSWAP:                      return "bswap";
@@ -518,22 +528,20 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   if (getNodeId() != -1)
     OS << " [ID=" << getNodeId() << ']';
 
-  DebugLoc dl = getDebugLoc();
-  if (G && !dl.isUnknown()) {
-    DIScope
-      Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
-    OS << " dbg:";
-    assert((!Scope || Scope.isScope()) &&
-      "Scope of a DebugLoc should be null or a DIScope.");
-    // Omit the directory, since it's usually long and uninteresting.
-    if (Scope)
-      OS << Scope.getFilename();
-    else
-      OS << "<unknown>";
-    OS << ':' << dl.getLine();
-    if (dl.getCol() != 0)
-      OS << ':' << dl.getCol();
-  }
+  if (!G)
+    return;
+
+  DILocation *L = getDebugLoc();
+  if (!L)
+    return;
+
+  if (auto *Scope = L->getScope())
+    OS << Scope->getFilename();
+  else
+    OS << "<unknown>";
+  OS << ':' << L->getLine();
+  if (unsigned C = L->getColumn())
+    OS << ':' << C;
 }
 
 static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index ef54525..22f592a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
 #include "ScheduleDAGSDNodes.h"
 #include "SelectionDAGBuilder.h"
 #include "llvm/ADT/PostOrderIterator.h"
@@ -19,10 +19,11 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -31,6 +32,8 @@
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
@@ -40,6 +43,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -47,7 +51,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -166,14 +169,13 @@ static cl::opt<bool>
 EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
           cl::desc("Enable verbose messages in the \"fast\" "
                    "instruction selector"));
-static cl::opt<bool>
-EnableFastISelAbort("fast-isel-abort", cl::Hidden,
-          cl::desc("Enable abort calls when \"fast\" instruction selection "
-                   "fails to lower an instruction"));
-static cl::opt<bool>
-EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden,
-          cl::desc("Enable abort calls when \"fast\" instruction selection "
-                   "fails to lower a formal argument"));
+static cl::opt<int> EnableFastISelAbort(
+    "fast-isel-abort", cl::Hidden,
+    cl::desc("Enable abort calls when \"fast\" instruction selection "
+             "fails to lower an instruction: 0 disable the abort, 1 will "
+             "abort but for args, calls and terminators, 2 will also "
+             "abort for argument lowering, and 3 will never fallback "
+             "to SelectionDAG."));
 
 static cl::opt<bool>
 UseMBPI("use-mbpi",
@@ -291,7 +293,8 @@ namespace llvm {
     const TargetLowering *TLI = IS->TLI;
     const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
 
-    if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() ||
+    if (OptLevel == CodeGenOpt::None ||
+        (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
         TLI->getSchedulingPreference() == Sched::Source)
       return createSourceListDAGScheduler(IS, OptLevel);
     if (TLI->getSchedulingPreference() == Sched::RegPressure)
@@ -349,7 +352,8 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
     initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
     initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
     initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
-    initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+    initializeTargetLibraryInfoWrapperPassPass(
+        *PassRegistry::getPassRegistry());
   }
 
 SelectionDAGISel::~SelectionDAGISel() {
@@ -363,7 +367,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<AliasAnalysis>();
   AU.addRequired<GCModuleInfo>();
   AU.addPreserved<GCModuleInfo>();
-  AU.addRequired<TargetLibraryInfo>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
   if (UseMBPI && OptLevel != CodeGenOpt::None)
     AU.addRequired<BranchProbabilityInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
@@ -376,7 +380,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
 ///
 /// This is required for correctness, so it must be done at -O0.
 ///
-static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
   // Loop for blocks with phi nodes.
   for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
     PHINode *PN = dyn_cast<PHINode>(BB->begin());
@@ -400,8 +404,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
           continue;
 
         // Okay, we have to split this edge.
-        SplitCriticalEdge(Pred->getTerminator(),
-                          GetSuccessorNumber(Pred, BB), SDISel, true);
+        SplitCriticalEdge(
+            Pred->getTerminator(), GetSuccessorNumber(Pred, BB),
+            CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges());
         goto ReprocessBlock;
       }
   }
@@ -412,7 +417,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
          "-fast-isel-verbose requires -fast-isel");
   assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
-         "-fast-isel-abort requires -fast-isel");
+         "-fast-isel-abort > 0 requires -fast-isel");
 
   const Function &Fn = *mf.getFunction();
   MF = &mf;
@@ -433,12 +438,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   TLI = MF->getSubtarget().getTargetLowering();
   RegInfo = &MF->getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
-  LibInfo = &getAnalysis<TargetLibraryInfo>();
+  LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
 
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
-  SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+  SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), AA);
 
   CurDAG->init(*MF);
   FuncInfo->set(Fn, *MF, CurDAG);
@@ -496,12 +501,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
       MachineBasicBlock::iterator InsertPos = Def;
       const MDNode *Variable = MI->getDebugVariable();
       const MDNode *Expr = MI->getDebugExpression();
+      DebugLoc DL = MI->getDebugLoc();
       bool IsIndirect = MI->isIndirectDebugValue();
       unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
+      assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+             "Expected inlined-at fields to agree");
       // Def is never a terminator here, so it is ok to increment InsertPos.
-      BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
-              TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Offset,
-              Variable, Expr);
+      BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE),
+              IsIndirect, LDI->second, Offset, Variable, Expr);
 
       // If this vreg is directly copied into an exported register then
       // that COPY instructions also need DBG_VALUE, if it is the only
@@ -519,9 +526,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
         CopyUseMI = nullptr; break;
       }
       if (CopyUseMI) {
+        // Use MI's debug location, which describes where Variable was
+        // declared, rather than whatever is attached to CopyUseMI.
         MachineInstr *NewMI =
-            BuildMI(*MF, CopyUseMI->getDebugLoc(),
-                    TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+            BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
                     CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr);
         MachineBasicBlock::iterator Pos = CopyUseMI;
         EntryMBB->insertAfter(Pos, NewMI);
@@ -570,6 +578,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
         TargetRegisterInfo::isVirtualRegister(To))
       MRI.constrainRegClass(To, MRI.getRegClass(From));
     // Replace it.
+
+
+    // Replacing one register with another won't touch the kill flags.
+    // We need to conservatively clear the kill flags as a kill on the old
+    // register might dominate existing uses of the new register.
+    if (!MRI.use_empty(To))
+      MRI.clearKillFlags(From);
     MRI.replaceRegWith(From, To);
   }
 
@@ -591,9 +606,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
                                         BasicBlock::const_iterator End,
                                         bool &HadTailCall) {
-  // Lower all of the non-terminator instructions. If a call is emitted
-  // as a tail call, cease emitting nodes for this block. Terminators
-  // are handled below.
+  // Lower the instructions. If a call is emitted as a tail call, cease emitting
+  // nodes for this block.
   for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
     SDB->visit(*I);
 
@@ -656,7 +670,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   (void)BlockNumber;
   bool MatchFilterBB = false; (void)MatchFilterBB;
 #ifndef NDEBUG
-  MatchFilterBB = (!FilterDAGBasicBlockName.empty() &&
+  MatchFilterBB = (FilterDAGBasicBlockName.empty() ||
                    FilterDAGBasicBlockName ==
                        FuncInfo->MBB->getBasicBlock()->getName().str());
 #endif
@@ -667,8 +681,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 #endif
   {
     BlockNumber = FuncInfo->MBB->getNumber();
-    BlockName = MF->getName().str() + ":" +
-                FuncInfo->MBB->getBasicBlock()->getName().str();
+    BlockName =
+        (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
   }
   DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
         << " '" << BlockName << "'\n"; CurDAG->dump());
@@ -904,9 +918,11 @@ void SelectionDAGISel::DoInstructionSelection() {
 
 /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
 /// do other setup for EH landing-pad blocks.
-void SelectionDAGISel::PrepareEHLandingPad() {
+bool SelectionDAGISel::PrepareEHLandingPad() {
   MachineBasicBlock *MBB = FuncInfo->MBB;
 
+  const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
+
   // Add a label to mark the beginning of the landing pad.  Deletion of the
   // landing pad can thus be detected via the MachineModuleInfo.
   MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
@@ -918,14 +934,53 @@ void SelectionDAGISel::PrepareEHLandingPad() {
   BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
     .addSym(Label);
 
+  // If this is an MSVC-style personality function, we need to split the landing
+  // pad into several BBs.
+  const BasicBlock *LLVMBB = MBB->getBasicBlock();
+  const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst();
+  MF->getMMI().addPersonality(
+      MBB, cast<Function>(LPadInst->getPersonalityFn()->stripPointerCasts()));
+  EHPersonality Personality = MF->getMMI().getPersonalityType();
+
+  if (isMSVCEHPersonality(Personality)) {
+    SmallVector<MachineBasicBlock *, 4> ClauseBBs;
+    const IntrinsicInst *ActionsCall =
+        dyn_cast<IntrinsicInst>(LLVMBB->getFirstInsertionPt());
+    // Get all invoke BBs that unwind to this landingpad.
+    SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(),
+                                                  MBB->pred_end());
+    if (ActionsCall && ActionsCall->getIntrinsicID() == Intrinsic::eh_actions) {
+      // If this is a call to llvm.eh.actions followed by indirectbr, then we've
+      // run WinEHPrepare, and we should remove this block from the machine CFG.
+      // Mark the targets of the indirectbr as landingpads instead.
+      for (const BasicBlock *LLVMSucc : successors(LLVMBB)) {
+        MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc];
+        // Add the edge from the invoke to the clause.
+        for (MachineBasicBlock *InvokeBB : InvokeBBs)
+          InvokeBB->addSuccessor(ClauseBB);
+
+        // Mark the clause as a landing pad or MI passes will delete it.
+        ClauseBB->setIsLandingPad();
+      }
+    }
+
+    // Remove the edge from the invoke to the lpad.
+    for (MachineBasicBlock *InvokeBB : InvokeBBs)
+      InvokeBB->removeSuccessor(MBB);
+
+    // Don't select instructions for the landingpad.
+    return false;
+  }
+
   // Mark exception register as live in.
-  const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
   if (unsigned Reg = TLI->getExceptionPointerRegister())
     FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
 
   // Mark exception selector register as live in.
   if (unsigned Reg = TLI->getExceptionSelectorRegister())
     FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
+
+  return true;
 }
 
 /// isFoldedOrDeadInstruction - Return true if the specified instruction is
@@ -1095,8 +1150,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     // Setup an EH landing-pad block.
     FuncInfo->ExceptionPointerVirtReg = 0;
     FuncInfo->ExceptionSelectorVirtReg = 0;
-    if (FuncInfo->MBB->isLandingPad())
-      PrepareEHLandingPad();
+    if (LLVMBB->isLandingPad())
+      if (!PrepareEHLandingPad())
+        continue;
 
     // Before doing SelectionDAG ISel, see if FastISel has been requested.
     if (FastIS) {
@@ -1111,8 +1167,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
         if (!FastIS->lowerArguments()) {
           // Fast isel failed to lower these arguments
           ++NumFastIselFailLowerArguments;
-          if (EnableFastISelAbortArgs)
-            llvm_unreachable("FastISel didn't lower all arguments");
+          if (EnableFastISelAbort > 1)
+            report_fatal_error("FastISel didn't lower all arguments");
 
           // Use SelectionDAG argument lowering
           LowerArguments(Fn);
@@ -1181,6 +1237,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
             dbgs() << "FastISel missed call: ";
             Inst->dump();
           }
+          if (EnableFastISelAbort > 2)
+            // FastISel selector couldn't handle something and bailed.
+            // For the purpose of debugging, just abort.
+            report_fatal_error("FastISel didn't select the entire block");
 
           if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
             unsigned &R = FuncInfo->ValueMap[Inst];
@@ -1208,24 +1268,24 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           continue;
         }
 
-        if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
-          // Don't abort, and use a different message for terminator misses.
-          NumFastIselFailures += NumFastIselRemaining;
-          if (EnableFastISelVerbose || EnableFastISelAbort) {
+        bool ShouldAbort = EnableFastISelAbort;
+        if (EnableFastISelVerbose || EnableFastISelAbort) {
+          if (isa<TerminatorInst>(Inst)) {
+            // Use a different message for terminator misses.
             dbgs() << "FastISel missed terminator: ";
-            Inst->dump();
-          }
-        } else {
-          NumFastIselFailures += NumFastIselRemaining;
-          if (EnableFastISelVerbose || EnableFastISelAbort) {
+            // Don't abort unless for terminator unless the level is really high
+            ShouldAbort = (EnableFastISelAbort > 2);
+          } else {
             dbgs() << "FastISel miss: ";
-            Inst->dump();
           }
-          if (EnableFastISelAbort)
-            // The "fast" selector couldn't handle something and bailed.
-            // For the purpose of debugging, just abort.
-            llvm_unreachable("FastISel didn't select the entire block");
+          Inst->dump();
         }
+        if (ShouldAbort)
+          // FastISel selector couldn't handle something and bailed.
+          // For the purpose of debugging, just abort.
+          report_fatal_error("FastISel didn't select the entire block");
+
+        NumFastIselFailures += NumFastIselRemaining;
         break;
       }
 
@@ -1354,21 +1414,15 @@ SelectionDAGISel::FinishBasicBlock() {
                  << FuncInfo->PHINodesToUpdate[i].first
                  << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
 
-  const bool MustUpdatePHINodes = SDB->SwitchCases.empty() &&
-                                  SDB->JTCases.empty() &&
-                                  SDB->BitTestCases.empty();
-
   // Next, now that we know what the last MBB the LLVM BB expanded is, update
   // PHI nodes in successors.
-  if (MustUpdatePHINodes) {
-    for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
-      MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
-      assert(PHI->isPHI() &&
-             "This is not a machine PHI node that we are updating!");
-      if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
-        continue;
-      PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
-    }
+  for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+    MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
+    assert(PHI->isPHI() &&
+           "This is not a machine PHI node that we are updating!");
+    if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
+      continue;
+    PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
   }
 
   // Handle stack protector.
@@ -1413,10 +1467,6 @@ SelectionDAGISel::FinishBasicBlock() {
     SDB->SPDescriptor.resetPerBBState();
   }
 
-  // If we updated PHI Nodes, return early.
-  if (MustUpdatePHINodes)
-    return;
-
   for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
     // Lower header first, if it wasn't already lowered
     if (!SDB->BitTestCases[i].Emitted) {
@@ -1530,16 +1580,6 @@ SelectionDAGISel::FinishBasicBlock() {
   }
   SDB->JTCases.clear();
 
-  // If the switch block involved a branch to one of the actual successors, we
-  // need to update PHI nodes in that block.
-  for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
-    MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
-    assert(PHI->isPHI() &&
-           "This is not a machine PHI node that we are updating!");
-    if (FuncInfo->MBB->isSuccessor(PHI->getParent()))
-      PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
-  }
-
   // If we generated any switch lowering information, build and codegen any
   // additional DAGs necessary.
   for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
@@ -1677,11 +1717,10 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
   return false;
 }
 
-
 /// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
 /// by tblgen.  Others should not call it.
 void SelectionDAGISel::
-SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
+SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, SDLoc DL) {
   std::vector<SDValue> InOps;
   std::swap(InOps, Ops);
 
@@ -1704,16 +1743,30 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
     } else {
       assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
              "Memory operand with multiple values?");
+
+      unsigned TiedToOperand;
+      if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) {
+        // We need the constraint ID from the operand this is tied to.
+        unsigned CurOp = InlineAsm::Op_FirstOperand;
+        Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+        for (; TiedToOperand; --TiedToOperand) {
+          CurOp += InlineAsm::getNumOperandRegisters(Flags)+1;
+          Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+        }
+      }
+
       // Otherwise, this is a memory operand.  Ask the target to select it.
       std::vector<SDValue> SelOps;
-      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps))
+      if (SelectInlineAsmMemoryOperand(InOps[i+1],
+                                       InlineAsm::getMemoryConstraintID(Flags),
+                                       SelOps))
         report_fatal_error("Could not match memory address.  Inline asm"
                            " failure!");
 
       // Add this to the output node.
       unsigned NewFlags =
         InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
-      Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32));
+      Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32));
       Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
       i += 2;
     }
@@ -1859,11 +1912,13 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
 }
 
 SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+  SDLoc DL(N);
+
   std::vector<SDValue> Ops(N->op_begin(), N->op_end());
-  SelectInlineAsmMemoryOperands(Ops);
+  SelectInlineAsmMemoryOperands(Ops, DL);
 
-  EVT VTs[] = { MVT::Other, MVT::Glue };
-  SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, Ops);
+  const EVT VTs[] = {MVT::Other, MVT::Glue};
+  SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops);
   New->setNodeId(-1);
   return New.getNode();
 }
@@ -1871,12 +1926,12 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
 SDNode
 *SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
   SDLoc dl(Op);
-  MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(0));
+  MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
   const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
   unsigned Reg =
       TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0));
   SDValue New = CurDAG->getCopyFromReg(
-                        CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0));
+                        Op->getOperand(0), dl, Reg, Op->getValueType(0));
   New->setNodeId(-1);
   return New.getNode();
 }
@@ -1889,7 +1944,7 @@ SDNode
   unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(),
                                         Op->getOperand(2).getValueType());
   SDValue New = CurDAG->getCopyToReg(
-                        CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2));
+                        Op->getOperand(0), dl, Reg, Op->getOperand(2));
   New->setNodeId(-1);
   return New.getNode();
 }
@@ -2464,7 +2519,7 @@ public:
     SelectionDAG::DAGUpdateListener(DAG),
     RecordedNodes(RN), MatchScopes(MS) { }
 
-  void NodeDeleted(SDNode *N, SDNode *E) {
+  void NodeDeleted(SDNode *N, SDNode *E) override {
     // Some early-returns here to avoid the search if we deleted the node or
     // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we
     // do, so it's unnecessary to update matching state at that point).
@@ -2885,7 +2940,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (Val & 128)
         Val = GetVBR(Val, MatcherTable, MatcherIndex);
       RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
-                              CurDAG->getTargetConstant(Val, VT), nullptr));
+                              CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch),
+                                                        VT), nullptr));
       continue;
     }
     case OPC_EmitRegister: {
@@ -2917,10 +2973,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
 
       if (Imm->getOpcode() == ISD::Constant) {
         const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue();
-        Imm = CurDAG->getConstant(*Val, Imm.getValueType(), true);
+        Imm = CurDAG->getConstant(*Val, SDLoc(NodeToMatch), Imm.getValueType(),
+                                  true);
       } else if (Imm->getOpcode() == ISD::ConstantFP) {
         const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
-        Imm = CurDAG->getConstantFP(*Val, Imm.getValueType(), true);
+        Imm = CurDAG->getConstantFP(*Val, SDLoc(NodeToMatch),
+                                    Imm.getValueType(), true);
       }
 
       RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 33c20d3..2d4ab6c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/StackMaps.h"
@@ -37,13 +38,19 @@ STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered");
 STATISTIC(StatepointMaxSlotsRequired,
           "Maximum number of stack slots required for a singe statepoint");
 
-void
-StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
+static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
+                                 SelectionDAGBuilder &Builder, uint64_t Value) {
+  SDLoc L = Builder.getCurSDLoc();
+  Ops.push_back(Builder.DAG.getTargetConstant(StackMaps::ConstantOp, L,
+                                              MVT::i64));
+  Ops.push_back(Builder.DAG.getTargetConstant(Value, L, MVT::i64));
+}
+
+void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
   // Consistency check
   assert(PendingGCRelocateCalls.empty() &&
          "Trying to visit statepoint before finished processing previous one");
   Locations.clear();
-  RelocLocations.clear();
   NextSlotToAllocate = 0;
   // Need to resize this on each safepoint - we need the two to stay in
   // sync and the clear patterns of a SelectionDAGBuilder have no relation
@@ -53,9 +60,9 @@ StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
     AllocatedStackSlots[i] = false;
   }
 }
+
 void StatepointLoweringState::clear() {
   Locations.clear();
-  RelocLocations.clear();
   AllocatedStackSlots.clear();
   assert(PendingGCRelocateCalls.empty() &&
          "cleared before statepoint sequence completed");
@@ -222,75 +229,94 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
 /// Extract call from statepoint, lower it and return pointer to the
 /// call node. Also update NodeMap so that getValue(statepoint) will
 /// reference lowered call result
-static SDNode *lowerCallFromStatepoint(const CallInst &CI,
-                                       SelectionDAGBuilder &Builder) {
-
-  assert(Intrinsic::experimental_gc_statepoint ==
-             dyn_cast<IntrinsicInst>(&CI)->getIntrinsicID() &&
-         "function called must be the statepoint function");
+static SDNode *
+lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
+                        SelectionDAGBuilder &Builder,
+                        SmallVectorImpl<SDValue> &PendingExports) {
+
+  ImmutableCallSite CS(ISP.getCallSite());
+
+  SDValue ActualCallee = Builder.getValue(ISP.getActualCallee());
+
+  // Handle immediate and symbolic callees.
+  if (auto *ConstCallee = dyn_cast<ConstantSDNode>(ActualCallee.getNode()))
+    ActualCallee = Builder.DAG.getIntPtrConstant(ConstCallee->getZExtValue(),
+                                                 Builder.getCurSDLoc(),
+                                                 /*isTarget=*/true);
+  else if (auto *SymbolicCallee =
+               dyn_cast<GlobalAddressSDNode>(ActualCallee.getNode()))
+    ActualCallee = Builder.DAG.getTargetGlobalAddress(
+        SymbolicCallee->getGlobal(), SDLoc(SymbolicCallee),
+        SymbolicCallee->getValueType(0));
+
+  assert(CS.getCallingConv() != CallingConv::AnyReg &&
+         "anyregcc is not supported on statepoints!");
+
+  Type *DefTy = ISP.getActualReturnType();
+  bool HasDef = !DefTy->isVoidTy();
+
+  SDValue ReturnValue, CallEndVal;
+  std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands(
+      ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos,
+      ISP.getNumCallArgs(), ActualCallee, DefTy, LandingPad,
+      false /* IsPatchPoint */);
+
+  SDNode *CallEnd = CallEndVal.getNode();
+
+  // Get a call instruction from the call sequence chain.  Tail calls are not
+  // allowed.  The following code is essentially reverse engineering X86's
+  // LowerCallTo.
+  //
+  // We are expecting DAG to have the following form:
+  //
+  // ch = eh_label (only in case of invoke statepoint)
+  //   ch, glue = callseq_start ch
+  //   ch, glue = X86::Call ch, glue
+  //   ch, glue = callseq_end ch, glue
+  //   get_return_value ch, glue
+  //
+  // get_return_value can either be a CopyFromReg to grab the return value from
+  // %RAX, or it can be a LOAD to load a value returned by reference via a stack
+  // slot.
+
+  if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg ||
+                 CallEnd->getOpcode() == ISD::LOAD))
+    CallEnd = CallEnd->getOperand(0).getNode();
+
+  assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!");
 
-  ImmutableStatepoint StatepointOperands(&CI);
-
-  // Lower the actual call itself - This is a bit of a hack, but we want to
-  // avoid modifying the actual lowering code.  This is similiar in intent to
-  // the LowerCallOperands mechanism used by PATCHPOINT, but is structured
-  // differently.  Hopefully, this is slightly more robust w.r.t. calling
-  // convention, return values, and other function attributes.
-  Value *ActualCallee = const_cast<Value *>(StatepointOperands.actualCallee());
-
-  std::vector<Value *> Args;
-  CallInst::const_op_iterator arg_begin = StatepointOperands.call_args_begin();
-  CallInst::const_op_iterator arg_end = StatepointOperands.call_args_end();
-  Args.insert(Args.end(), arg_begin, arg_end);
-  // TODO: remove the creation of a new instruction!  We should not be
-  // modifying the IR (even temporarily) at this point.
-  CallInst *Tmp = CallInst::Create(ActualCallee, Args);
-  Tmp->setTailCall(CI.isTailCall());
-  Tmp->setCallingConv(CI.getCallingConv());
-  Tmp->setAttributes(CI.getAttributes());
-  Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false);
-
-  // Handle the return value of the call iff any.
-  const bool HasDef = !Tmp->getType()->isVoidTy();
   if (HasDef) {
-    // The value of the statepoint itself will be the value of call itself.
-    // We'll replace the actually call node shortly.  gc_result will grab
-    // this value.
-    Builder.setValue(&CI, Builder.getValue(Tmp));
+    if (CS.isInvoke()) {
+      // Result value will be used in different basic block for invokes
+      // so we need to export it now. But statepoint call has a different type
+      // than the actuall call. It means that standart exporting mechanism will
+      // create register of the wrong type. So instead we need to create
+      // register with correct type and save value into it manually.
+      // TODO: To eliminate this problem we can remove gc.result intrinsics
+      //       completelly and make statepoint call to return a tuple.
+      unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType());
+      RegsForValue RFV(*Builder.DAG.getContext(),
+                       Builder.DAG.getTargetLoweringInfo(), Reg,
+                       ISP.getActualReturnType());
+      SDValue Chain = Builder.DAG.getEntryNode();
+
+      RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain,
+                        nullptr);
+      PendingExports.push_back(Chain);
+      Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg;
+    } else {
+      // The value of the statepoint itself will be the value of call itself.
+      // We'll replace the actually call node shortly.  gc_result will grab
+      // this value.
+      Builder.setValue(CS.getInstruction(), ReturnValue);
+    }
   } else {
     // The token value is never used from here on, just generate a poison value
-    Builder.setValue(&CI, Builder.DAG.getIntPtrConstant(-1));
-  }
-  // Remove the fake entry we created so we don't have a hanging reference
-  // after we delete this node.
-  Builder.removeValue(Tmp);
-  delete Tmp;
-  Tmp = nullptr;
-
-  // Search for the call node
-  // The following code is essentially reverse engineering X86's
-  // LowerCallTo.
-  SDNode *CallNode = nullptr;
-
-  // We just emitted a call, so it should be last thing generated
-  SDValue Chain = Builder.DAG.getRoot();
-
-  // Find closest CALLSEQ_END walking back through lowered nodes if needed
-  SDNode *CallEnd = Chain.getNode();
-  int Sanity = 0;
-  while (CallEnd->getOpcode() != ISD::CALLSEQ_END) {
-    CallEnd = CallEnd->getGluedNode();
-    assert(CallEnd && "Can not find call node");
-    assert(Sanity < 20 && "should have found call end already");
-    Sanity++;
+    Builder.setValue(CS.getInstruction(),
+                     Builder.DAG.getIntPtrConstant(-1, Builder.getCurSDLoc()));
   }
-  assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
-         "Expected a callseq node.");
-  assert(CallEnd->getGluedNode());
 
-  // Step back inside the CALLSEQ
-  CallNode = CallEnd->getGluedNode();
-  return CallNode;
+  return CallEnd->getOperand(0).getNode();
 }
 
 /// Callect all gc pointers coming into statepoint intrinsic, clean them up,
@@ -300,24 +326,15 @@ static SDNode *lowerCallFromStatepoint(const CallInst &CI,
 ///   Relocs - the gc_relocate corresponding to each base/ptr pair
 /// Elements of this arrays should be in one-to-one correspondence with each
 /// other i.e Bases[i], Ptrs[i] are from the same gcrelocate call
-static void
-getIncomingStatepointGCValues(SmallVectorImpl<const Value *> &Bases,
-                              SmallVectorImpl<const Value *> &Ptrs,
-                              SmallVectorImpl<const Value *> &Relocs,
-                              ImmutableCallSite Statepoint,
-                              SelectionDAGBuilder &Builder) {
-  // Search for relocated pointers.  Note that working backwards from the
-  // gc_relocates ensures that we only get pairs which are actually relocated
-  // and used after the statepoint.
-  // TODO: This logic should probably become a utility function in Statepoint.h
-  for (const User *U : cast<CallInst>(Statepoint.getInstruction())->users()) {
-    if (!isGCRelocate(U)) {
-      continue;
-    }
-    GCRelocateOperands relocateOpers(U);
-    Relocs.push_back(cast<Value>(U));
-    Bases.push_back(relocateOpers.basePtr());
-    Ptrs.push_back(relocateOpers.derivedPtr());
+static void getIncomingStatepointGCValues(
+    SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs,
+    SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite,
+    SelectionDAGBuilder &Builder) {
+  for (GCRelocateOperands relocateOpers :
+       StatepointSite.getRelocates(StatepointSite)) {
+    Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction());
+    Bases.push_back(relocateOpers.getBasePtr());
+    Ptrs.push_back(relocateOpers.getDerivedPtr());
   }
 
   // Remove any redundant llvm::Values which map to the same SDValue as another
@@ -376,14 +393,13 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
     // such in the stackmap.  This is required so that the consumer can
     // parse any internal format to the deopt state.  It also handles null
     // pointers and other constant pointers in GC states
-    Ops.push_back(
-        Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
-    Ops.push_back(Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
+    pushStackMapConstant(Ops, Builder, C->getSExtValue());
   } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
-    // This handles allocas as arguments to the statepoint
-    const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
-    Ops.push_back(
-        Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy()));
+    // This handles allocas as arguments to the statepoint (this is only
+    // really meaningful for a deopt value.  For GC, we'd be trying to
+    // relocate the address of the alloca itself?)
+    Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
+                                                  Incoming.getValueType()));
   } else {
     // Otherwise, locate a spill slot and explicitly spill it so it
     // can be found by the runtime later.  We currently do not support
@@ -408,15 +424,15 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
 /// statepoint. The chain nodes will have already been created and the DAG root
 /// will be set to the last value spilled (if any were).
 static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
-                                    ImmutableStatepoint Statepoint,
+                                    ImmutableStatepoint StatepointSite,
                                     SelectionDAGBuilder &Builder) {
 
   // Lower the deopt and gc arguments for this statepoint.  Layout will
   // be: deopt argument length, deopt arguments.., gc arguments...
 
   SmallVector<const Value *, 64> Bases, Ptrs, Relocations;
-  getIncomingStatepointGCValues(Bases, Ptrs, Relocations,
-                                Statepoint.getCallSite(), Builder);
+  getIncomingStatepointGCValues(Bases, Ptrs, Relocations, StatepointSite,
+                                Builder);
 
 #ifndef NDEBUG
   // Check that each of the gc pointer and bases we've gotten out of the
@@ -424,61 +440,54 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // heap.  This is basically just here to help catch errors during statepoint
   // insertion. TODO: This should actually be in the Verifier, but we can't get
   // to the GCStrategy from there (yet).
-  if (Builder.GFI) {
-    GCStrategy &S = Builder.GFI->getStrategy();
-    for (const Value *V : Bases) {
-      auto Opt = S.isGCManagedPointer(V);
-      if (Opt.hasValue()) {
-        assert(Opt.getValue() &&
-               "non gc managed base pointer found in statepoint");
-      }
+  GCStrategy &S = Builder.GFI->getStrategy();
+  for (const Value *V : Bases) {
+    auto Opt = S.isGCManagedPointer(V);
+    if (Opt.hasValue()) {
+      assert(Opt.getValue() &&
+             "non gc managed base pointer found in statepoint");
     }
-    for (const Value *V : Ptrs) {
-      auto Opt = S.isGCManagedPointer(V);
-      if (Opt.hasValue()) {
-        assert(Opt.getValue() &&
-               "non gc managed derived pointer found in statepoint");
-      }
+  }
+  for (const Value *V : Ptrs) {
+    auto Opt = S.isGCManagedPointer(V);
+    if (Opt.hasValue()) {
+      assert(Opt.getValue() &&
+             "non gc managed derived pointer found in statepoint");
     }
-    for (const Value *V : Relocations) {
-      auto Opt = S.isGCManagedPointer(V);
-      if (Opt.hasValue()) {
-        assert(Opt.getValue() && "non gc managed pointer relocated");
-      }
+  }
+  for (const Value *V : Relocations) {
+    auto Opt = S.isGCManagedPointer(V);
+    if (Opt.hasValue()) {
+      assert(Opt.getValue() && "non gc managed pointer relocated");
     }
   }
 #endif
 
-
-
   // Before we actually start lowering (and allocating spill slots for values),
   // reserve any stack slots which we judge to be profitable to reuse for a
   // particular value.  This is purely an optimization over the code below and
   // doesn't change semantics at all.  It is important for performance that we
   // reserve slots for both deopt and gc values before lowering either.
-  for (auto I = Statepoint.vm_state_begin() + 1, E = Statepoint.vm_state_end();
-       I != E; ++I) {
-    Value *V = *I;
+  for (const Value *V : StatepointSite.vm_state_args()) {
     SDValue Incoming = Builder.getValue(V);
     reservePreviousStackSlotForValue(Incoming, Builder);
   }
-  for (unsigned i = 0; i < Bases.size() * 2; ++i) {
-    // Even elements will contain base, odd elements - derived ptr
-    const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2];
-    SDValue Incoming = Builder.getValue(V);
-    reservePreviousStackSlotForValue(Incoming, Builder);
+  for (unsigned i = 0; i < Bases.size(); ++i) {
+    const Value *Base = Bases[i];
+    reservePreviousStackSlotForValue(Builder.getValue(Base), Builder);
+
+    const Value *Ptr = Ptrs[i];
+    reservePreviousStackSlotForValue(Builder.getValue(Ptr), Builder);
   }
 
   // First, prefix the list with the number of unique values to be
   // lowered.  Note that this is the number of *Values* not the
   // number of SDValues required to lower them.
-  const int NumVMSArgs = Statepoint.numTotalVMSArgs();
-  Ops.push_back(
-      Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
-  Ops.push_back(Builder.DAG.getTargetConstant(NumVMSArgs, MVT::i64));
+  const int NumVMSArgs = StatepointSite.getNumTotalVMSArgs();
+  pushStackMapConstant(Ops, Builder, NumVMSArgs);
 
-  assert(NumVMSArgs + 1 == std::distance(Statepoint.vm_state_begin(),
-                                         Statepoint.vm_state_end()));
+  assert(NumVMSArgs == std::distance(StatepointSite.vm_state_begin(),
+                                     StatepointSite.vm_state_end()));
 
   // The vm state arguments are lowered in an opaque manner.  We do
   // not know what type of values are contained within.  We skip the
@@ -486,9 +495,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // explicitly just above.  We could have left it in the loop and
   // not done it explicitly, but it's far easier to understand this
   // way.
-  for (auto I = Statepoint.vm_state_begin() + 1, E = Statepoint.vm_state_end();
-       I != E; ++I) {
-    const Value *V = *I;
+  for (const Value *V : StatepointSite.vm_state_args()) {
     SDValue Incoming = Builder.getValue(V);
     lowerIncomingStatepointValue(Incoming, Ops, Builder);
   }
@@ -498,35 +505,96 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // arrays interwoven with each (lowered) base pointer immediately followed by
   // it's (lowered) derived pointer.  i.e
   // (base[0], ptr[0], base[1], ptr[1], ...)
-  for (unsigned i = 0; i < Bases.size() * 2; ++i) {
-    // Even elements will contain base, odd elements - derived ptr
-    const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2];
+  for (unsigned i = 0; i < Bases.size(); ++i) {
+    const Value *Base = Bases[i];
+    lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder);
+
+    const Value *Ptr = Ptrs[i];
+    lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder);
+  }
+
+  // If there are any explicit spill slots passed to the statepoint, record
+  // them, but otherwise do not do anything special.  These are user provided
+  // allocas and give control over placement to the consumer.  In this case,
+  // it is the contents of the slot which may get updated, not the pointer to
+  // the alloca
+  for (Value *V : StatepointSite.gc_args()) {
     SDValue Incoming = Builder.getValue(V);
-    lowerIncomingStatepointValue(Incoming, Ops, Builder);
+    if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+      // This handles allocas as arguments to the statepoint
+      Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
+                                                    Incoming.getValueType()));
+    }
+  }
+
+  // Record computed locations for all lowered values.
+  // This can not be embedded in lowering loops as we need to record *all*
+  // values, while previous loops account only values with unique SDValues.
+  const Instruction *StatepointInstr =
+    StatepointSite.getCallSite().getInstruction();
+  FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
+    Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr];
+
+  for (GCRelocateOperands RelocateOpers :
+       StatepointSite.getRelocates(StatepointSite)) {
+    const Value *V = RelocateOpers.getDerivedPtr();
+    SDValue SDV = Builder.getValue(V);
+    SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
+
+    if (Loc.getNode()) {
+      SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
+    } else {
+      // Record value as visited, but not spilled. This is case for allocas
+      // and constants. For this values we can avoid emiting spill load while
+      // visiting corresponding gc_relocate.
+      // Actually we do not need to record them in this map at all.
+      // We do this only to check that we are not relocating any unvisited value.
+      SpillMap[V] = None;
+
+      // Default llvm mechanisms for exporting values which are used in
+      // different basic blocks does not work for gc relocates.
+      // Note that it would be incorrect to teach llvm that all relocates are
+      // uses of the corresponging values so that it would automatically
+      // export them. Relocates of the spilled values does not use original
+      // value.
+      if (StatepointSite.getCallSite().isInvoke())
+        Builder.ExportFromCurrentBlock(V);
+    }
   }
 }
+
 void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
+  // Check some preconditions for sanity
+  assert(isStatepoint(&CI) &&
+         "function called must be the statepoint function");
+
+  LowerStatepoint(ImmutableStatepoint(&CI));
+}
+
+void SelectionDAGBuilder::LowerStatepoint(
+    ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) {
   // The basic scheme here is that information about both the original call and
   // the safepoint is encoded in the CallInst.  We create a temporary call and
   // lower it, then reverse engineer the calling sequence.
 
-  // Check some preconditions for sanity
-  assert(isStatepoint(&CI) &&
-         "function called must be the statepoint function");
   NumOfStatepoints++;
   // Clear state
   StatepointLowering.startNewStatepoint(*this);
 
+  ImmutableCallSite CS(ISP.getCallSite());
+
 #ifndef NDEBUG
-  // Consistency check
-  for (const User *U : CI.users()) {
-    const CallInst *Call = cast<CallInst>(U);
-    if (isGCRelocate(Call))
-      StatepointLowering.scheduleRelocCall(*Call);
+  // Consistency check. Don't do this for invokes. It would be too
+  // expensive to preserve this information across different basic blocks
+  if (!CS.isInvoke()) {
+    for (const User *U : CS->users()) {
+      const CallInst *Call = cast<CallInst>(U);
+      if (isGCRelocate(Call))
+        StatepointLowering.scheduleRelocCall(*Call);
+    }
   }
 #endif
 
-  ImmutableStatepoint ISP(&CI);
 #ifndef NDEBUG
   // If this is a malformed statepoint, report it early to simplify debugging.
   // This should catch any IR level mistake that's made when constructing or
@@ -534,42 +602,82 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
   ISP.verify();
 
   // Check that the associated GCStrategy expects to encounter statepoints.
-  // TODO: This if should become an assert.  For now, we allow the GCStrategy
-  // to be optional for backwards compatibility.  This will only last a short
-  // period (i.e. a couple of weeks).
-  if (GFI) {
-    assert(GFI->getStrategy().useStatepoints() &&
-           "GCStrategy does not expect to encounter statepoints");
-  }
+  assert(GFI->getStrategy().useStatepoints() &&
+         "GCStrategy does not expect to encounter statepoints");
 #endif
 
-
   // Lower statepoint vmstate and gcstate arguments
-  SmallVector<SDValue, 10> LoweredArgs;
-  lowerStatepointMetaArgs(LoweredArgs, ISP, *this);
+  SmallVector<SDValue, 10> LoweredMetaArgs;
+  lowerStatepointMetaArgs(LoweredMetaArgs, ISP, *this);
 
   // Get call node, we will replace it later with statepoint
-  SDNode *CallNode = lowerCallFromStatepoint(CI, *this);
+  SDNode *CallNode =
+      lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports);
 
-  // Construct the actual STATEPOINT node with all the appropriate arguments
-  // and return values.
+  // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
+  // nodes with all the appropriate arguments and return values.
+
+  // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+  SDValue Chain = CallNode->getOperand(0);
+
+  SDValue Glue;
+  bool CallHasIncomingGlue = CallNode->getGluedNode();
+  if (CallHasIncomingGlue) {
+    // Glue is always last operand
+    Glue = CallNode->getOperand(CallNode->getNumOperands() - 1);
+  }
+
+  // Build the GC_TRANSITION_START node if necessary.
+  //
+  // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the
+  // order in which they appear in the call to the statepoint intrinsic. If
+  // any of the operands is a pointer-typed, that operand is immediately
+  // followed by a SRCVALUE for the pointer that may be used during lowering
+  // (e.g. to form MachinePointerInfo values for loads/stores).
+  const bool IsGCTransition =
+      (ISP.getFlags() & (uint64_t)StatepointFlags::GCTransition) ==
+          (uint64_t)StatepointFlags::GCTransition;
+  if (IsGCTransition) {
+    SmallVector<SDValue, 8> TSOps;
+
+    // Add chain
+    TSOps.push_back(Chain);
+
+    // Add GC transition arguments
+    for (const Value *V : ISP.gc_transition_args()) {
+      TSOps.push_back(getValue(V));
+      if (V->getType()->isPointerTy())
+        TSOps.push_back(DAG.getSrcValue(V));
+    }
+
+    // Add glue if necessary
+    if (CallHasIncomingGlue)
+      TSOps.push_back(Glue);
+
+    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+    SDValue GCTransitionStart =
+        DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps);
+
+    Chain = GCTransitionStart.getValue(0);
+    Glue = GCTransitionStart.getValue(1);
+  }
 
   // TODO: Currently, all of these operands are being marked as read/write in
   // PrologEpilougeInserter.cpp, we should special case the VMState arguments
   // and flags to be read-only.
   SmallVector<SDValue, 40> Ops;
 
+  // Add the <id> and <numBytes> constants.
+  Ops.push_back(DAG.getTargetConstant(ISP.getID(), getCurSDLoc(), MVT::i64));
+  Ops.push_back(
+      DAG.getTargetConstant(ISP.getNumPatchBytes(), getCurSDLoc(), MVT::i32));
+
   // Calculate and push starting position of vmstate arguments
-  // Call Node: Chain, Target, {Args}, RegMask, [Glue]
-  SDValue Glue;
-  if (CallNode->getGluedNode()) {
-    // Glue is always last operand
-    Glue = CallNode->getOperand(CallNode->getNumOperands() - 1);
-  }
   // Get number of arguments incoming directly into call node
   unsigned NumCallRegArgs =
-      CallNode->getNumOperands() - (Glue.getNode() ? 4 : 3);
-  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32));
+      CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3);
+  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32));
 
   // Add call target
   SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0);
@@ -578,47 +686,74 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
   // Add call arguments
   // Get position of register mask in the call
   SDNode::op_iterator RegMaskIt;
-  if (Glue.getNode())
+  if (CallHasIncomingGlue)
     RegMaskIt = CallNode->op_end() - 2;
   else
     RegMaskIt = CallNode->op_end() - 1;
   Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt);
 
-  // Add a leading constant argument with the Flags and the calling convention
-  // masked together
-  CallingConv::ID CallConv = CI.getCallingConv();
-  int Flags = dyn_cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
-  assert(Flags == 0 && "not expected to be used");
-  Ops.push_back(DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
-  Ops.push_back(
-      DAG.getTargetConstant(Flags | ((unsigned)CallConv << 1), MVT::i64));
+  // Add a constant argument for the calling convention
+  pushStackMapConstant(Ops, *this, CS.getCallingConv());
+
+  // Add a constant argument for the flags
+  uint64_t Flags = ISP.getFlags();
+  assert(
+      ((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0)
+          && "unknown flag used");
+  pushStackMapConstant(Ops, *this, Flags);
 
   // Insert all vmstate and gcstate arguments
-  Ops.insert(Ops.end(), LoweredArgs.begin(), LoweredArgs.end());
+  Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end());
 
   // Add register mask from call node
   Ops.push_back(*RegMaskIt);
 
   // Add chain
-  Ops.push_back(CallNode->getOperand(0));
+  Ops.push_back(Chain);
 
   // Same for the glue, but we add it only if original call had it
   if (Glue.getNode())
     Ops.push_back(Glue);
 
-  // Compute return values
-  SmallVector<EVT, 21> ValueVTs;
-  ValueVTs.push_back(MVT::Other);
-  ValueVTs.push_back(MVT::Glue); // provide a glue output since we consume one
-  // as input.  This allows someone else to chain
-  // off us as needed.
-  SDVTList NodeTys = DAG.getVTList(ValueVTs);
+  // Compute return values.  Provide a glue output since we consume one as
+  // input.  This allows someone else to chain off us as needed.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+  SDNode *StatepointMCNode =
+      DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
+
+  SDNode *SinkNode = StatepointMCNode;
 
-  SDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT,
-                                                getCurSDLoc(), NodeTys, Ops);
+  // Build the GC_TRANSITION_END node if necessary.
+  //
+  // See the comment above regarding GC_TRANSITION_START for the layout of
+  // the operands to the GC_TRANSITION_END node.
+  if (IsGCTransition) {
+    SmallVector<SDValue, 8> TEOps;
+
+    // Add chain
+    TEOps.push_back(SDValue(StatepointMCNode, 0));
+
+    // Add GC transition arguments
+    for (const Value *V : ISP.gc_transition_args()) {
+      TEOps.push_back(getValue(V));
+      if (V->getType()->isPointerTy())
+        TEOps.push_back(DAG.getSrcValue(V));
+    }
+
+    // Add glue
+    TEOps.push_back(SDValue(StatepointMCNode, 1));
+
+    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+    SDValue GCTransitionStart =
+        DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps);
+
+    SinkNode = GCTransitionStart.getNode();
+  }
 
   // Replace original call
-  DAG.ReplaceAllUsesWith(CallNode, StatepointMCNode); // This may update Root
+  DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
   // Remove originall call node
   DAG.DeleteNode(CallNode);
 
@@ -636,49 +771,72 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
   // The result value of the gc_result is simply the result of the actual
   // call.  We've already emitted this, so just grab the value.
   Instruction *I = cast<Instruction>(CI.getArgOperand(0));
-  assert(isStatepoint(I) &&
-         "first argument must be a statepoint token");
-
-  setValue(&CI, getValue(I));
+  assert(isStatepoint(I) && "first argument must be a statepoint token");
+
+  if (isa<InvokeInst>(I)) {
+    // For invokes we should have stored call result in a virtual register.
+    // We can not use default getValue() functionality to copy value from this
+    // register because statepoint and actuall call return types can be
+    // different, and getValue() will use CopyFromReg of the wrong type,
+    // which is always i32 in our case.
+    PointerType *CalleeType =
+        cast<PointerType>(ImmutableStatepoint(I).getActualCallee()->getType());
+    Type *RetTy =
+        cast<FunctionType>(CalleeType->getElementType())->getReturnType();
+    SDValue CopyFromReg = getCopyFromRegs(I, RetTy);
+
+    assert(CopyFromReg.getNode());
+    setValue(&CI, CopyFromReg);
+  } else {
+    setValue(&CI, getValue(I));
+  }
 }
 
 void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
+  GCRelocateOperands RelocateOpers(&CI);
+
 #ifndef NDEBUG
   // Consistency check
-  StatepointLowering.relocCallVisited(CI);
+  // We skip this check for invoke statepoints. It would be too expensive to
+  // preserve validation info through different basic blocks.
+  if (!RelocateOpers.isTiedToInvoke()) {
+    StatepointLowering.relocCallVisited(CI);
+  }
 #endif
 
-  GCRelocateOperands relocateOpers(&CI);
-  SDValue SD = getValue(relocateOpers.derivedPtr());
+  const Value *DerivedPtr = RelocateOpers.getDerivedPtr();
+  SDValue SD = getValue(DerivedPtr);
+
+  FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
+    FuncInfo.StatepointRelocatedValues[RelocateOpers.getStatepoint()];
 
-  if (isa<ConstantSDNode>(SD) || isa<FrameIndexSDNode>(SD)) {
-    // We didn't need to spill these special cases (constants and allocas).
-    // See the handling in spillIncomingValueForStatepoint for detail.
+  // We should have recorded location for this pointer
+  assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value");
+  Optional<int> DerivedPtrLocation = SpillMap[DerivedPtr];
+
+  // We didn't need to spill these special cases (constants and allocas).
+  // See the handling in spillIncomingValueForStatepoint for detail.
+  if (!DerivedPtrLocation) {
     setValue(&CI, SD);
     return;
   }
 
-  SDValue Loc = StatepointLowering.getRelocLocation(SD);
-  // Emit new load if we did not emit it before
-  if (!Loc.getNode()) {
-    SDValue SpillSlot = StatepointLowering.getLocation(SD);
-    int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+  SDValue SpillSlot = DAG.getTargetFrameIndex(*DerivedPtrLocation,
+                                              SD.getValueType());
 
-    // Be conservative: flush all pending loads
-    // TODO: Probably we can be less restrictive on this,
-    // it may allow more scheduling opprtunities
-    SDValue Chain = getRoot();
+  // Be conservative: flush all pending loads
+  // TODO: Probably we can be less restrictive on this,
+  // it may allow more scheduling opprtunities
+  SDValue Chain = getRoot();
 
-    Loc = DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain,
-                      SpillSlot, MachinePointerInfo::getFixedStack(FI), false,
-                      false, false, 0);
+  SDValue SpillLoad =
+    DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
+                MachinePointerInfo::getFixedStack(*DerivedPtrLocation),
+                false, false, false, 0);
 
-    StatepointLowering.setRelocLocation(SD, Loc);
+  // Again, be conservative, don't emit pending loads
+  DAG.setRoot(SpillLoad.getValue(1));
 
-    // Again, be conservative, don't emit pending loads
-    DAG.setRoot(Loc.getValue(1));
-  }
-
-  assert(Loc.getNode());
-  setValue(&CI, Loc);
+  assert(SpillLoad.getNode());
+  setValue(&CI, SpillLoad);
 }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
index 673112c..82d0c62 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -30,8 +30,7 @@ class SelectionDAGBuilder;
 /// works in concert with information in FunctionLoweringInfo.
 class StatepointLoweringState {
 public:
-  StatepointLoweringState() : NextSlotToAllocate(0) {
-  }
+  StatepointLoweringState() : NextSlotToAllocate(0) {}
 
   /// Reset all state tracking for a newly encountered safepoint.  Also
   /// performs some consistency checking.
@@ -57,25 +56,6 @@ public:
     Locations[val] = Location;
   }
 
-  /// Returns the relocated value for a given input pointer. Will
-  /// return SDValue() if this value hasn't yet been reloaded from
-  /// it's stack slot after the statepoint.  Otherwise, the value
-  /// has already been reloaded and the SDValue of that reload will
-  /// be returned. Note that VMState values are spilled but not
-  /// reloaded (since they don't change at the safepoint unless
-  /// also listed in the GC pointer section) and will thus never
-  /// be in this map
-  SDValue getRelocLocation(SDValue val) {
-    if (!RelocLocations.count(val))
-      return SDValue();
-    return RelocLocations[val];
-  }
-  void setRelocLocation(SDValue val, SDValue Location) {
-    assert(!RelocLocations.count(val) &&
-           "Trying to allocate already allocated location");
-    RelocLocations[val] = Location;
-  }
-
   /// Record the fact that we expect to encounter a given gc_relocate
   /// before the next statepoint.  If we don't see it, we'll report
   /// an assertion.
@@ -118,8 +98,6 @@ private:
   /// Maps pre-relocation value (gc pointer directly incoming into statepoint)
   /// into it's location (currently only stack slots)
   DenseMap<SDValue, SDValue> Locations;
-  /// Map pre-relocated value into it's new relocated location
-  DenseMap<SDValue, SDValue> RelocLocations;
 
   /// A boolean indicator for each slot listed in the FunctionInfo as to
   /// whether it has been used in the current statepoint.  Since we try to
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f12c035..833da4b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -100,6 +100,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
     Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
     Args.push_back(Entry);
   }
+  if (LC == RTLIB::UNKNOWN_LIBCALL)
+    report_fatal_error("Unsupported library call operation!");
   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
 
   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
@@ -201,7 +203,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
   SDValue Ops[2] = { NewLHS, NewRHS };
   NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/,
                        dl).first;
-  NewRHS = DAG.getConstant(0, RetVT);
+  NewRHS = DAG.getConstant(0, dl, RetVT);
   CCCode = getCmpLibcallCC(LC1);
   if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
     SDValue Tmp = DAG.getNode(ISD::SETCC, dl,
@@ -303,7 +305,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
       SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
                                 DAG.getConstant(Demanded &
                                                 C->getAPIntValue(),
-                                                VT));
+                                                dl, VT));
       return CombineTo(Op, New);
     }
 
@@ -447,7 +449,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       return TLO.CombineTo(Op, Op.getOperand(1));
     // If all of the demanded bits in the inputs are known zeros, return zero.
     if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
-      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType()));
     // If the RHS is a constant, see if we can simplify it.
     if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
       return true;
@@ -535,7 +537,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side
       if (KnownOne == KnownOne2) { // set bits are the same on both sides
         EVT VT = Op.getValueType();
-        SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
+        SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, dl, VT);
         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
                                                  Op.getOperand(0), ANDC));
       }
@@ -551,7 +553,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         if (Expanded != C->getAPIntValue()) {
           EVT VT = Op.getValueType();
           SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
-                                          TLO.DAG.getConstant(Expanded, VT));
+                                        TLO.DAG.getConstant(Expanded, dl, VT));
           return TLO.CombineTo(Op, New);
         }
         // if it already has all the bits set, nothing to change
@@ -624,7 +626,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           }
 
           SDValue NewSA =
-            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+            TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
           EVT VT = Op.getValueType();
           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
                                                    InOp.getOperand(0), NewSA));
@@ -648,7 +650,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
             ShTy = InnerVT;
           SDValue NarrowShl =
             TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
-                            TLO.DAG.getConstant(ShAmt, ShTy));
+                            TLO.DAG.getConstant(ShAmt, dl, ShTy));
           return
             TLO.CombineTo(Op,
                           TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
@@ -670,7 +672,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
               NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 &&
               NewMask.trunc(ShAmt) == 0) {
             SDValue NewSA =
-              TLO.DAG.getConstant(ShAmt - InnerShAmt,
+              TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
                                   Op.getOperand(1).getValueType());
             EVT VT = Op.getValueType();
             SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
@@ -713,7 +715,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           }
 
           SDValue NewSA =
-            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+            TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
                                                    InOp.getOperand(0), NewSA));
         }
@@ -778,7 +780,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       if (Log2 >= 0) {
         // The bit must come from the sign.
         SDValue NewSA =
-          TLO.DAG.getConstant(BitWidth - 1 - Log2,
+          TLO.DAG.getConstant(BitWidth - 1 - Log2, dl,
                               Op.getOperand(1).getValueType());
         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
                                                  Op.getOperand(0), NewSA));
@@ -794,19 +796,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
     APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
     // If we only care about the highest bit, don't bother shifting right.
-    if (MsbMask == DemandedMask) {
+    if (MsbMask == NewMask) {
       unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
       SDValue InOp = Op.getOperand(0);
-
-      // Compute the correct shift amount type, which must be getShiftAmountTy
-      // for scalar types after legalization.
-      EVT ShiftAmtTy = Op.getValueType();
-      if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
-        ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
-
-      SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
-      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
-                                            Op.getValueType(), InOp, ShiftAmt));
+      unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits();
+      bool AlreadySignExtended =
+        TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
+      // However if the input is already sign extended we expect the sign
+      // extension to be dropped altogether later and do not simplify.
+      if (!AlreadySignExtended) {
+        // Compute the correct shift amount type, which must be getShiftAmountTy
+        // for scalar types after legalization.
+        EVT ShiftAmtTy = Op.getValueType();
+        if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+          ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+        SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl,
+                                               ShiftAmtTy);
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+                                                 Op.getValueType(), InOp,
+                                                 ShiftAmt));
+      }
     }
 
     // Sign extension.  Compute the demanded bits in the result that are not
@@ -985,7 +995,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         if (TLO.LegalTypes()) {
           uint64_t ShVal = ShAmt->getZExtValue();
           Shift =
-            TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType()));
+            TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(Op.getValueType()));
         }
 
         APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
@@ -1043,7 +1053,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         if (!OpVTLegal && OpVTSizeInBits > 32)
           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
         unsigned ShVal = Op.getValueType().getSizeInBits()-1;
-        SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType());
+        SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType());
         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
                                                  Op.getValueType(),
                                                  Sign, ShAmt));
@@ -1076,8 +1086,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
   // If we know the value of all of the demanded bits, return this as a
   // constant.
-  if ((NewMask & (KnownZero|KnownOne)) == NewMask)
-    return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+  if ((NewMask & (KnownZero|KnownOne)) == NewMask) {
+    // Avoid folding to a constant if any OpaqueConstant is involved.
+    const SDNode *N = Op.getNode();
+    for (SDNodeIterator I = SDNodeIterator::begin(N),
+         E = SDNodeIterator::end(N); I != E; ++I) {
+      SDNode *Op = *I;
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+        if (C->isOpaque())
+          return false;
+    }
+    return TLO.CombineTo(Op,
+                         TLO.DAG.getConstant(KnownOne, dl, Op.getValueType()));
+  }
 
   return false;
 }
@@ -1213,13 +1234,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   switch (Cond) {
   default: break;
   case ISD::SETFALSE:
-  case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+  case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT);
   case ISD::SETTRUE:
   case ISD::SETTRUE2: {
     TargetLowering::BooleanContent Cnt =
         getBooleanContents(N0->getValueType(0));
     return DAG.getConstant(
-        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
+        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
+        VT);
   }
   }
 
@@ -1253,7 +1275,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           // (srl (ctlz x), 5) == 1  -> X == 0
           Cond = ISD::SETEQ;
         }
-        SDValue Zero = DAG.getConstant(0, N0.getValueType());
+        SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
         return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
                             Zero, Cond);
       }
@@ -1274,10 +1296,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       // (ctpop x) u> 1 -> (x & x-1) != 0
       if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
         SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
-                                  DAG.getConstant(1, CTVT));
+                                  DAG.getConstant(1, dl, CTVT));
         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
         ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
-        return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
+        return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
       }
 
       // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
@@ -1331,7 +1353,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
           // Will get folded away.
           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
-          SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
+          SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
         }
       }
@@ -1382,7 +1404,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           SDValue Ptr = Lod->getBasePtr();
           if (bestOffset != 0)
             Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
-                              DAG.getConstant(bestOffset, PtrType));
+                              DAG.getConstant(bestOffset, dl, PtrType));
           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
           SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
                                 Lod->getPointerInfo().getWithOffset(bestOffset),
@@ -1390,8 +1412,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           return DAG.getSetCC(dl, VT,
                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
                                       DAG.getConstant(bestMask.trunc(bestWidth),
-                                                      newVT)),
-                              DAG.getConstant(0LL, newVT), Cond);
+                                                      dl, newVT)),
+                              DAG.getConstant(0LL, dl, newVT), Cond);
         }
       }
     }
@@ -1407,18 +1429,18 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         switch (Cond) {
         case ISD::SETUGT:
         case ISD::SETUGE:
-        case ISD::SETEQ: return DAG.getConstant(0, VT);
+        case ISD::SETEQ: return DAG.getConstant(0, dl, VT);
         case ISD::SETULT:
         case ISD::SETULE:
-        case ISD::SETNE: return DAG.getConstant(1, VT);
+        case ISD::SETNE: return DAG.getConstant(1, dl, VT);
         case ISD::SETGT:
         case ISD::SETGE:
           // True if the sign bit of C1 is set.
-          return DAG.getConstant(C1.isNegative(), VT);
+          return DAG.getConstant(C1.isNegative(), dl, VT);
         case ISD::SETLT:
         case ISD::SETLE:
           // True if the sign bit of C1 isn't set.
-          return DAG.getConstant(C1.isNonNegative(), VT);
+          return DAG.getConstant(C1.isNonNegative(), dl, VT);
         default:
           break;
         }
@@ -1437,7 +1459,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
             (isOperationLegal(ISD::SETCC, newVT) &&
              getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) {
           EVT NewSetCCVT = getSetCCResultType(*DAG.getContext(), newVT);
-          SDValue NewConst = DAG.getConstant(C1.trunc(InSize), newVT);
+          SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
 
           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
                                           NewConst, Cond);
@@ -1458,7 +1480,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       // If the constant doesn't fit into the number of bits for the source of
       // the sign extension, it is impossible for both sides to be equal.
       if (C1.getMinSignedBits() > ExtSrcTyBits)
-        return DAG.getConstant(Cond == ISD::SETNE, VT);
+        return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
 
       SDValue ZextOp;
       EVT Op0Ty = N0.getOperand(0).getValueType();
@@ -1467,7 +1489,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       } else {
         APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
         ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
-                              DAG.getConstant(Imm, Op0Ty));
+                              DAG.getConstant(Imm, dl, Op0Ty));
       }
       if (!DCI.isCalledByLegalizer())
         DCI.AddToWorklist(ZextOp.getNode());
@@ -1476,7 +1498,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                           DAG.getConstant(C1 & APInt::getLowBitsSet(
                                                               ExtDstTyBits,
                                                               ExtSrcTyBits),
-                                          ExtDstTy),
+                                          dl, ExtDstTy),
                           Cond);
     } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
@@ -1546,20 +1568,20 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           if (Op0.getValueType().bitsGT(VT))
             Op0 = DAG.getNode(ISD::AND, dl, VT,
                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
-                          DAG.getConstant(1, VT));
+                          DAG.getConstant(1, dl, VT));
           else if (Op0.getValueType().bitsLT(VT))
             Op0 = DAG.getNode(ISD::AND, dl, VT,
                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
-                        DAG.getConstant(1, VT));
+                        DAG.getConstant(1, dl, VT));
 
           return DAG.getSetCC(dl, VT, Op0,
-                              DAG.getConstant(0, Op0.getValueType()),
+                              DAG.getConstant(0, dl, Op0.getValueType()),
                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
         }
         if (Op0.getOpcode() == ISD::AssertZext &&
             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
           return DAG.getSetCC(dl, VT, Op0,
-                              DAG.getConstant(0, Op0.getValueType()),
+                              DAG.getConstant(0, dl, Op0.getValueType()),
                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
       }
     }
@@ -1576,7 +1598,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
-      if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
+      if (C1 == MinVal) return DAG.getConstant(1, dl, VT);  // X >= MIN --> true
       // X >= C0 --> X > (C0 - 1)
       APInt C = C1 - 1;
       ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
@@ -1585,13 +1607,13 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
                                 isLegalICmpImmediate(C.getSExtValue())))) {
         return DAG.getSetCC(dl, VT, N0,
-                            DAG.getConstant(C, N1.getValueType()),
+                            DAG.getConstant(C, dl, N1.getValueType()),
                             NewCC);
       }
     }
 
     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
-      if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
+      if (C1 == MaxVal) return DAG.getConstant(1, dl, VT);  // X <= MAX --> true
       // X <= C0 --> X < (C0 + 1)
       APInt C = C1 + 1;
       ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
@@ -1600,19 +1622,19 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
                                 isLegalICmpImmediate(C.getSExtValue())))) {
         return DAG.getSetCC(dl, VT, N0,
-                            DAG.getConstant(C, N1.getValueType()),
+                            DAG.getConstant(C, dl, N1.getValueType()),
                             NewCC);
       }
     }
 
     if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
-      return DAG.getConstant(0, VT);      // X < MIN --> false
+      return DAG.getConstant(0, dl, VT);      // X < MIN --> false
     if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
-      return DAG.getConstant(1, VT);      // X >= MIN --> true
+      return DAG.getConstant(1, dl, VT);      // X >= MIN --> true
     if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
-      return DAG.getConstant(0, VT);      // X > MAX --> false
+      return DAG.getConstant(0, dl, VT);      // X > MAX --> false
     if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
-      return DAG.getConstant(1, VT);      // X <= MAX --> true
+      return DAG.getConstant(1, dl, VT);      // X <= MAX --> true
 
     // Canonicalize setgt X, Min --> setne X, Min
     if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
@@ -1624,12 +1646,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     // If we have setult X, 1, turn it into seteq X, 0
     if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
       return DAG.getSetCC(dl, VT, N0,
-                          DAG.getConstant(MinVal, N0.getValueType()),
+                          DAG.getConstant(MinVal, dl, N0.getValueType()),
                           ISD::SETEQ);
     // If we have setugt X, Max-1, turn it into seteq X, Max
     if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
       return DAG.getSetCC(dl, VT, N0,
-                          DAG.getConstant(MaxVal, N0.getValueType()),
+                          DAG.getConstant(MaxVal, dl, N0.getValueType()),
                           ISD::SETEQ);
 
     // If we have "setcc X, C0", check to see if we can shrink the immediate
@@ -1639,14 +1661,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     if (Cond == ISD::SETUGT &&
         C1 == APInt::getSignedMaxValue(OperandBitSize))
       return DAG.getSetCC(dl, VT, N0,
-                          DAG.getConstant(0, N1.getValueType()),
+                          DAG.getConstant(0, dl, N1.getValueType()),
                           ISD::SETLT);
 
     // SETULT X, SINTMIN  -> SETGT X, -1
     if (Cond == ISD::SETULT &&
         C1 == APInt::getSignedMinValue(OperandBitSize)) {
       SDValue ConstMinusOne =
-          DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
+          DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
                           N1.getValueType());
       return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
     }
@@ -1665,7 +1687,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           if (AndRHS->getAPIntValue().isPowerOf2()) {
             return DAG.getNode(ISD::TRUNCATE, dl, VT,
                               DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
-                   DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy)));
+                   DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
+                                   ShiftTy)));
           }
         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
           // (X & 8) == 8  -->  (X & 8) >> 3
@@ -1673,7 +1696,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           if (C1.isPowerOf2()) {
             return DAG.getNode(ISD::TRUNCATE, dl, VT,
                                DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
-                                      DAG.getConstant(C1.logBase2(), ShiftTy)));
+                                      DAG.getConstant(C1.logBase2(), dl,
+                                                      ShiftTy)));
           }
         }
       }
@@ -1692,8 +1716,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
               getPointerTy() : getShiftAmountTy(N0.getValueType());
             EVT CmpTy = N0.getValueType();
             SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
-                                        DAG.getConstant(ShiftBits, ShiftTy));
-            SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), CmpTy);
+                                        DAG.getConstant(ShiftBits, dl,
+                                                        ShiftTy));
+            SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
             return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
           }
         }
@@ -1715,13 +1740,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           ShiftBits = C1.countTrailingZeros();
         }
         NewC = NewC.lshr(ShiftBits);
-        if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
+        if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
+          isLegalICmpImmediate(NewC.getSExtValue())) {
           EVT ShiftTy = DCI.isBeforeLegalize() ?
             getPointerTy() : getShiftAmountTy(N0.getValueType());
           EVT CmpTy = N0.getValueType();
           SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
-                                      DAG.getConstant(ShiftBits, ShiftTy));
-          SDValue CmpRHS = DAG.getConstant(NewC, CmpTy);
+                                      DAG.getConstant(ShiftBits, dl, ShiftTy));
+          SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
         }
       }
@@ -1740,9 +1766,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       switch (ISD::getUnorderedFlavor(Cond)) {
       default: llvm_unreachable("Unknown flavor!");
       case 0:  // Known false.
-        return DAG.getConstant(0, VT);
+        return DAG.getConstant(0, dl, VT);
       case 1:  // Known true.
-        return DAG.getConstant(1, VT);
+        return DAG.getConstant(1, dl, VT);
       case 2:  // Undefined.
         return DAG.getUNDEF(VT);
       }
@@ -1809,13 +1835,13 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     // We can always fold X == X for integer setcc's.
     if (N0.getValueType().isInteger()) {
-      return DAG.getConstant(EqVal, VT);
+      return DAG.getConstant(EqVal, dl, VT);
     }
     unsigned UOF = ISD::getUnorderedFlavor(Cond);
     if (UOF == 2)   // FP operators that are undefined on NaNs.
-      return DAG.getConstant(EqVal, VT);
+      return DAG.getConstant(EqVal, dl, VT);
     if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
-      return DAG.getConstant(EqVal, VT);
+      return DAG.getConstant(EqVal, dl, VT);
     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
     // if it is not already.
     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
@@ -1856,7 +1882,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
             return DAG.getSetCC(dl, VT, N0.getOperand(0),
                                 DAG.getConstant(RHSC->getAPIntValue()-
                                                 LHSR->getAPIntValue(),
-                                N0.getValueType()), Cond);
+                                dl, N0.getValueType()), Cond);
           }
 
           // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
@@ -1868,7 +1894,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                 DAG.getSetCC(dl, VT, N0.getOperand(0),
                              DAG.getConstant(LHSR->getAPIntValue() ^
                                                RHSC->getAPIntValue(),
-                                             N0.getValueType()),
+                                             dl, N0.getValueType()),
                              Cond);
         }
 
@@ -1879,7 +1905,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
               DAG.getSetCC(dl, VT, N0.getOperand(1),
                            DAG.getConstant(SUBC->getAPIntValue() -
                                              RHSC->getAPIntValue(),
-                                           N0.getValueType()),
+                                           dl, N0.getValueType()),
                            Cond);
           }
         }
@@ -1896,16 +1922,18 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
         if (N0.getOperand(0) == N1)
           return DAG.getSetCC(dl, VT, N0.getOperand(1),
-                              DAG.getConstant(0, N0.getValueType()), Cond);
+                              DAG.getConstant(0, dl, N0.getValueType()), Cond);
         if (N0.getOperand(1) == N1) {
           if (DAG.isCommutativeBinOp(N0.getOpcode()))
             return DAG.getSetCC(dl, VT, N0.getOperand(0),
-                                DAG.getConstant(0, N0.getValueType()), Cond);
+                                DAG.getConstant(0, dl, N0.getValueType()),
+                                Cond);
           if (N0.getNode()->hasOneUse()) {
             assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
             // (Z-X) == X  --> Z == X<<1
             SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
-                       DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
+                       DAG.getConstant(1, dl,
+                                       getShiftAmountTy(N1.getValueType())));
             if (!DCI.isCalledByLegalizer())
               DCI.AddToWorklist(SH.getNode());
             return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
@@ -1919,16 +1947,17 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       // Simplify  X == (X+Z) -->  Z == 0
       if (N1.getOperand(0) == N0)
         return DAG.getSetCC(dl, VT, N1.getOperand(1),
-                        DAG.getConstant(0, N1.getValueType()), Cond);
+                        DAG.getConstant(0, dl, N1.getValueType()), Cond);
       if (N1.getOperand(1) == N0) {
         if (DAG.isCommutativeBinOp(N1.getOpcode()))
           return DAG.getSetCC(dl, VT, N1.getOperand(0),
-                          DAG.getConstant(0, N1.getValueType()), Cond);
+                          DAG.getConstant(0, dl, N1.getValueType()), Cond);
         if (N1.getNode()->hasOneUse()) {
           assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
           // X == (Z-X)  --> X<<1 == Z
           SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
-                       DAG.getConstant(1, getShiftAmountTy(N0.getValueType())));
+                       DAG.getConstant(1, dl,
+                                       getShiftAmountTy(N0.getValueType())));
           if (!DCI.isCalledByLegalizer())
             DCI.AddToWorklist(SH.getNode());
           return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
@@ -1946,7 +1975,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
           if (DCI.isBeforeLegalizeOps() ||
               isCondCodeLegal(Cond, N0.getSimpleValueType())) {
-            SDValue Zero = DAG.getConstant(0, N1.getValueType());
+            SDValue Zero = DAG.getConstant(0, dl, N1.getValueType());
             return DAG.getSetCC(dl, VT, N0, Zero, Cond);
           }
         }
@@ -1957,7 +1986,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
           if (DCI.isBeforeLegalizeOps() ||
               isCondCodeLegal(Cond, N1.getSimpleValueType())) {
-            SDValue Zero = DAG.getConstant(0, N0.getValueType());
+            SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
             return DAG.getSetCC(dl, VT, N1, Zero, Cond);
           }
         }
@@ -2172,7 +2201,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
         // now; without this it would get ZExt'd later in
         // ScheduleDAGSDNodes::EmitNode, which is very generic.
         Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
-                                            MVT::i64));
+                                            SDLoc(C), MVT::i64));
         return;
       }
     }
@@ -2181,9 +2210,10 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   }
 }
 
-std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint,
-                             MVT VT) const {
+std::pair<unsigned, const TargetRegisterClass *>
+TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
+                                             const std::string &Constraint,
+                                             MVT VT) const {
   if (Constraint.empty() || Constraint[0] != '{')
     return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
   assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
@@ -2195,8 +2225,6 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
     std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
 
   // Figure out which register class contains this reg.
-  const TargetRegisterInfo *RI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
   for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
        E = RI->regclass_end(); RCI != E; ++RCI) {
     const TargetRegisterClass *RC = *RCI;
@@ -2249,8 +2277,9 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
 /// and also tie in the associated operand values.
 /// If this returns an empty vector, and if the constraint string itself
 /// isn't empty, there was an error parsing.
-TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
-    ImmutableCallSite CS) const {
+TargetLowering::AsmOperandInfoVector
+TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI,
+                                 ImmutableCallSite CS) const {
   /// ConstraintOperands - Information about all of the constraints.
   AsmOperandInfoVector ConstraintOperands;
   const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
@@ -2341,7 +2370,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
   }
 
   // If we have multiple alternative constraints, select the best alternative.
-  if (ConstraintOperands.size()) {
+  if (!ConstraintOperands.empty()) {
     if (maCount) {
       unsigned bestMAIndex = 0;
       int bestWeight = -1;
@@ -2412,12 +2441,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
 
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
-        std::pair<unsigned, const TargetRegisterClass*> MatchRC =
-          getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                       OpInfo.ConstraintVT);
-        std::pair<unsigned, const TargetRegisterClass*> InputRC =
-          getRegForInlineAsmConstraint(Input.ConstraintCode,
-                                       Input.ConstraintVT);
+        std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+            getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+                                         OpInfo.ConstraintVT);
+        std::pair<unsigned, const TargetRegisterClass *> InputRC =
+            getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
+                                         Input.ConstraintVT);
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
             (MatchRC.second != InputRC.second)) {
@@ -2640,7 +2669,8 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
   unsigned ShAmt = d.countTrailingZeros();
   if (ShAmt) {
     // TODO: For UDIV use SRL instead of SRA.
-    SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType()));
+    SDValue Amt =
+        DAG.getConstant(ShAmt, dl, getShiftAmountTy(Op1.getValueType()));
     Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false,
                       true);
     d = d.ashr(ShAmt);
@@ -2651,7 +2681,7 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
   while ((t = d*xn) != 1)
     xn *= APInt(d.getBitWidth(), 2) - t;
 
-  Op2 = DAG.getConstant(xn, Op1.getValueType());
+  Op2 = DAG.getConstant(xn, dl, Op1.getValueType());
   return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
 }
 
@@ -2680,12 +2710,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
   if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
                             isOperationLegalOrCustom(ISD::MULHS, VT))
     Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
-                    DAG.getConstant(magics.m, VT));
+                    DAG.getConstant(magics.m, dl, VT));
   else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
                                  isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
     Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
                               N->getOperand(0),
-                              DAG.getConstant(magics.m, VT)).getNode(), 1);
+                              DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
   else
     return SDValue();       // No mulhs or equvialent
   // If d > 0 and m < 0, add the numerator
@@ -2701,12 +2731,13 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
   // Shift right algebraic if shift value is nonzero
   if (magics.s > 0) {
     Q = DAG.getNode(ISD::SRA, dl, VT, Q,
-                 DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+                    DAG.getConstant(magics.s, dl,
+                                    getShiftAmountTy(Q.getValueType())));
     Created->push_back(Q.getNode());
   }
   // Extract the sign bit and add it to the quotient
   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q,
-                          DAG.getConstant(VT.getScalarSizeInBits() - 1,
+                          DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,
                                           getShiftAmountTy(Q.getValueType())));
   Created->push_back(T.getNode());
   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
@@ -2740,7 +2771,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
   if (magics.a != 0 && !Divisor[0]) {
     unsigned Shift = Divisor.countTrailingZeros();
     Q = DAG.getNode(ISD::SRL, dl, VT, Q,
-                    DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType())));
+                    DAG.getConstant(Shift, dl,
+                                    getShiftAmountTy(Q.getValueType())));
     Created->push_back(Q.getNode());
 
     // Get magic number for the shifted divisor.
@@ -2752,11 +2784,11 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
   // FIXME: We should support doing a MUL in a wider type
   if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
                             isOperationLegalOrCustom(ISD::MULHU, VT))
-    Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
+    Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT));
   else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
                                  isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
     Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
-                            DAG.getConstant(magics.m, VT)).getNode(), 1);
+                            DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
   else
     return SDValue();       // No mulhu or equvialent
 
@@ -2766,17 +2798,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
     assert(magics.s < Divisor.getBitWidth() &&
            "We shouldn't generate an undefined shift!");
     return DAG.getNode(ISD::SRL, dl, VT, Q,
-                 DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+                       DAG.getConstant(magics.s, dl,
+                                       getShiftAmountTy(Q.getValueType())));
   } else {
     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
     Created->push_back(NPQ.getNode());
     NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
-                      DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType())));
+                      DAG.getConstant(1, dl,
+                                      getShiftAmountTy(NPQ.getValueType())));
     Created->push_back(NPQ.getNode());
     NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
     Created->push_back(NPQ.getNode());
     return DAG.getNode(ISD::SRL, dl, VT, NPQ,
-             DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType())));
+                       DAG.getConstant(magics.s - 1, dl,
+                                       getShiftAmountTy(NPQ.getValueType())));
   }
 }
 
@@ -2863,7 +2898,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
         isOperationLegalOrCustom(ISD::SRL, VT) &&
         isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
       unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits();
-      SDValue Shift = DAG.getConstant(ShiftAmt, getShiftAmountTy(VT));
+      SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT));
       LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift);
       LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
       RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift);
@@ -2913,13 +2948,13 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
   // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
   EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
                                 VT.getSizeInBits());
-  SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT);
-  SDValue ExponentLoBit = DAG.getConstant(23, IntVT);
-  SDValue Bias = DAG.getConstant(127, IntVT);
-  SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()),
+  SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
+  SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
+  SDValue Bias = DAG.getConstant(127, dl, IntVT);
+  SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl,
                                      IntVT);
-  SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT);
-  SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT);
+  SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT);
+  SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
 
   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
 
@@ -2935,7 +2970,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
 
   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
       DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
-      DAG.getConstant(0x00800000, IntVT));
+      DAG.getConstant(0x00800000, dl, IntVT));
 
   R = DAG.getZExtOrTrunc(R, dl, NVT);
 
@@ -2955,7 +2990,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
       DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
       Sign);
 
-  Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT),
-      DAG.getConstant(0, NVT), Ret, ISD::SETLT);
+  Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
+      DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
   return true;
 }
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
index 0be00f0..b12e943 100644
--- a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
@@ -38,416 +38,18 @@ using namespace llvm;
 #define DEBUG_TYPE "shadowstackgc"
 
 namespace {
-
-  class ShadowStackGC : public GCStrategy {
-    /// RootChain - This is the global linked-list that contains the chain of GC
-    /// roots.
-    GlobalVariable *Head;
-
-    /// StackEntryTy - Abstract type of a link in the shadow stack.
-    ///
-    StructType *StackEntryTy;
-    StructType *FrameMapTy;
-
-    /// Roots - GC roots in the current function. Each is a pair of the
-    /// intrinsic call and its corresponding alloca.
-    std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
-
-  public:
-    ShadowStackGC();
-
-    bool initializeCustomLowering(Module &M) override;
-    bool performCustomLowering(Function &F) override;
-
-  private:
-    bool IsNullValue(Value *V);
-    Constant *GetFrameMap(Function &F);
-    Type* GetConcreteStackEntryType(Function &F);
-    void CollectRoots(Function &F);
-    static GetElementPtrInst *CreateGEP(LLVMContext &Context, 
-                                        IRBuilder<> &B, Value *BasePtr,
-                                        int Idx1, const char *Name);
-    static GetElementPtrInst *CreateGEP(LLVMContext &Context,
-                                        IRBuilder<> &B, Value *BasePtr,
-                                        int Idx1, int Idx2, const char *Name);
-  };
-
+class ShadowStackGC : public GCStrategy {
+public:
+  ShadowStackGC();
+};
 }
 
 static GCRegistry::Add<ShadowStackGC>
-X("shadow-stack", "Very portable GC for uncooperative code generators");
-
-namespace {
-  /// EscapeEnumerator - This is a little algorithm to find all escape points
-  /// from a function so that "finally"-style code can be inserted. In addition
-  /// to finding the existing return and unwind instructions, it also (if
-  /// necessary) transforms any call instructions into invokes and sends them to
-  /// a landing pad.
-  ///
-  /// It's wrapped up in a state machine using the same transform C# uses for
-  /// 'yield return' enumerators, This transform allows it to be non-allocating.
-  class EscapeEnumerator {
-    Function &F;
-    const char *CleanupBBName;
-
-    // State.
-    int State;
-    Function::iterator StateBB, StateE;
-    IRBuilder<> Builder;
-
-  public:
-    EscapeEnumerator(Function &F, const char *N = "cleanup")
-      : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
-
-    IRBuilder<> *Next() {
-      switch (State) {
-      default:
-        return nullptr;
-
-      case 0:
-        StateBB = F.begin();
-        StateE = F.end();
-        State = 1;
-
-      case 1:
-        // Find all 'return', 'resume', and 'unwind' instructions.
-        while (StateBB != StateE) {
-          BasicBlock *CurBB = StateBB++;
-
-          // Branches and invokes do not escape, only unwind, resume, and return
-          // do.
-          TerminatorInst *TI = CurBB->getTerminator();
-          if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
-            continue;
-
-          Builder.SetInsertPoint(TI->getParent(), TI);
-          return &Builder;
-        }
-
-        State = 2;
-
-        // Find all 'call' instructions.
-        SmallVector<Instruction*,16> Calls;
-        for (Function::iterator BB = F.begin(),
-                                E = F.end(); BB != E; ++BB)
-          for (BasicBlock::iterator II = BB->begin(),
-                                    EE = BB->end(); II != EE; ++II)
-            if (CallInst *CI = dyn_cast<CallInst>(II))
-              if (!CI->getCalledFunction() ||
-                  !CI->getCalledFunction()->getIntrinsicID())
-                Calls.push_back(CI);
-
-        if (Calls.empty())
-          return nullptr;
+    X("shadow-stack", "Very portable GC for uncooperative code generators");
 
-        // Create a cleanup block.
-        LLVMContext &C = F.getContext();
-        BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
-        Type *ExnTy = StructType::get(Type::getInt8PtrTy(C),
-                                      Type::getInt32Ty(C), nullptr);
-        Constant *PersFn =
-          F.getParent()->
-          getOrInsertFunction("__gcc_personality_v0",
-                              FunctionType::get(Type::getInt32Ty(C), true));
-        LandingPadInst *LPad = LandingPadInst::Create(ExnTy, PersFn, 1,
-                                                      "cleanup.lpad",
-                                                      CleanupBB);
-        LPad->setCleanup(true);
-        ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
+void llvm::linkShadowStackGC() {}
 
-        // Transform the 'call' instructions into 'invoke's branching to the
-        // cleanup block. Go in reverse order to make prettier BB names.
-        SmallVector<Value*,16> Args;
-        for (unsigned I = Calls.size(); I != 0; ) {
-          CallInst *CI = cast<CallInst>(Calls[--I]);
-
-          // Split the basic block containing the function call.
-          BasicBlock *CallBB = CI->getParent();
-          BasicBlock *NewBB =
-            CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
-
-          // Remove the unconditional branch inserted at the end of CallBB.
-          CallBB->getInstList().pop_back();
-          NewBB->getInstList().remove(CI);
-
-          // Create a new invoke instruction.
-          Args.clear();
-          CallSite CS(CI);
-          Args.append(CS.arg_begin(), CS.arg_end());
-
-          InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
-                                              NewBB, CleanupBB,
-                                              Args, CI->getName(), CallBB);
-          II->setCallingConv(CI->getCallingConv());
-          II->setAttributes(CI->getAttributes());
-          CI->replaceAllUsesWith(II);
-          delete CI;
-        }
-
-        Builder.SetInsertPoint(RI->getParent(), RI);
-        return &Builder;
-      }
-    }
-  };
-}
-
-// -----------------------------------------------------------------------------
-
-void llvm::linkShadowStackGC() { }
-
-ShadowStackGC::ShadowStackGC() : Head(nullptr), StackEntryTy(nullptr) {
+ShadowStackGC::ShadowStackGC() {
   InitRoots = true;
   CustomRoots = true;
 }
-
-Constant *ShadowStackGC::GetFrameMap(Function &F) {
-  // doInitialization creates the abstract type of this value.
-  Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
-
-  // Truncate the ShadowStackDescriptor if some metadata is null.
-  unsigned NumMeta = 0;
-  SmallVector<Constant*, 16> Metadata;
-  for (unsigned I = 0; I != Roots.size(); ++I) {
-    Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
-    if (!C->isNullValue())
-      NumMeta = I + 1;
-    Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
-  }
-  Metadata.resize(NumMeta);
-
-  Type *Int32Ty = Type::getInt32Ty(F.getContext());
-  
-  Constant *BaseElts[] = {
-    ConstantInt::get(Int32Ty, Roots.size(), false),
-    ConstantInt::get(Int32Ty, NumMeta, false),
-  };
-
-  Constant *DescriptorElts[] = {
-    ConstantStruct::get(FrameMapTy, BaseElts),
-    ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)
-  };
-
-  Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()};
-  StructType *STy = StructType::create(EltTys, "gc_map."+utostr(NumMeta));
-  
-  Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
-
-  // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
-  //        that, short of multithreaded LLVM, it should be safe; all that is
-  //        necessary is that a simple Module::iterator loop not be invalidated.
-  //        Appending to the GlobalVariable list is safe in that sense.
-  //
-  //        All of the output passes emit globals last. The ExecutionEngine
-  //        explicitly supports adding globals to the module after
-  //        initialization.
-  //
-  //        Still, if it isn't deemed acceptable, then this transformation needs
-  //        to be a ModulePass (which means it cannot be in the 'llc' pipeline
-  //        (which uses a FunctionPassManager (which segfaults (not asserts) if
-  //        provided a ModulePass))).
-  Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
-                                    GlobalVariable::InternalLinkage,
-                                    FrameMap, "__gc_" + F.getName());
-
-  Constant *GEPIndices[2] = {
-                          ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
-                          ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)
-                          };
-  return ConstantExpr::getGetElementPtr(GV, GEPIndices);
-}
-
-Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
-  // doInitialization creates the generic version of this type.
-  std::vector<Type*> EltTys;
-  EltTys.push_back(StackEntryTy);
-  for (size_t I = 0; I != Roots.size(); I++)
-    EltTys.push_back(Roots[I].second->getAllocatedType());
-  
-  return StructType::create(EltTys, "gc_stackentry."+F.getName().str());
-}
-
-/// doInitialization - If this module uses the GC intrinsics, find them now. If
-/// not, exit fast.
-bool ShadowStackGC::initializeCustomLowering(Module &M) {
-  // struct FrameMap {
-  //   int32_t NumRoots; // Number of roots in stack frame.
-  //   int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots.
-  //   void *Meta[];     // May be absent for roots without metadata.
-  // };
-  std::vector<Type*> EltTys;
-  // 32 bits is ok up to a 32GB stack frame. :)
-  EltTys.push_back(Type::getInt32Ty(M.getContext()));
-  // Specifies length of variable length array. 
-  EltTys.push_back(Type::getInt32Ty(M.getContext()));
-  FrameMapTy = StructType::create(EltTys, "gc_map");
-  PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
-
-  // struct StackEntry {
-  //   ShadowStackEntry *Next; // Caller's stack entry.
-  //   FrameMap *Map;          // Pointer to constant FrameMap.
-  //   void *Roots[];          // Stack roots (in-place array, so we pretend).
-  // };
-  
-  StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
-  
-  EltTys.clear();
-  EltTys.push_back(PointerType::getUnqual(StackEntryTy));
-  EltTys.push_back(FrameMapPtrTy);
-  StackEntryTy->setBody(EltTys);
-  PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
-
-  // Get the root chain if it already exists.
-  Head = M.getGlobalVariable("llvm_gc_root_chain");
-  if (!Head) {
-    // If the root chain does not exist, insert a new one with linkonce
-    // linkage!
-    Head = new GlobalVariable(M, StackEntryPtrTy, false,
-                              GlobalValue::LinkOnceAnyLinkage,
-                              Constant::getNullValue(StackEntryPtrTy),
-                              "llvm_gc_root_chain");
-  } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
-    Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
-    Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
-  }
-
-  return true;
-}
-
-bool ShadowStackGC::IsNullValue(Value *V) {
-  if (Constant *C = dyn_cast<Constant>(V))
-    return C->isNullValue();
-  return false;
-}
-
-void ShadowStackGC::CollectRoots(Function &F) {
-  // FIXME: Account for original alignment. Could fragment the root array.
-  //   Approach 1: Null initialize empty slots at runtime. Yuck.
-  //   Approach 2: Emit a map of the array instead of just a count.
-
-  assert(Roots.empty() && "Not cleaned up?");
-
-  SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots;
-
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
-      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
-        if (Function *F = CI->getCalledFunction())
-          if (F->getIntrinsicID() == Intrinsic::gcroot) {
-            std::pair<CallInst*, AllocaInst*> Pair = std::make_pair(
-              CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
-            if (IsNullValue(CI->getArgOperand(1)))
-              Roots.push_back(Pair);
-            else
-              MetaRoots.push_back(Pair);
-          }
-
-  // Number roots with metadata (usually empty) at the beginning, so that the
-  // FrameMap::Meta array can be elided.
-  Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
-}
-
-GetElementPtrInst *
-ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
-                         int Idx, int Idx2, const char *Name) {
-  Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
-                       ConstantInt::get(Type::getInt32Ty(Context), Idx),
-                       ConstantInt::get(Type::getInt32Ty(Context), Idx2) };
-  Value* Val = B.CreateGEP(BasePtr, Indices, Name);
-
-  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
-
-  return dyn_cast<GetElementPtrInst>(Val);
-}
-
-GetElementPtrInst *
-ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
-                         int Idx, const char *Name) {
-  Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
-                       ConstantInt::get(Type::getInt32Ty(Context), Idx) };
-  Value *Val = B.CreateGEP(BasePtr, Indices, Name);
-
-  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
-
-  return dyn_cast<GetElementPtrInst>(Val);
-}
-
-/// runOnFunction - Insert code to maintain the shadow stack.
-bool ShadowStackGC::performCustomLowering(Function &F) {
-  LLVMContext &Context = F.getContext();
-  
-  // Find calls to llvm.gcroot.
-  CollectRoots(F);
-
-  // If there are no roots in this function, then there is no need to add a
-  // stack map entry for it.
-  if (Roots.empty())
-    return false;
-
-  // Build the constant map and figure the type of the shadow stack entry.
-  Value *FrameMap = GetFrameMap(F);
-  Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
-
-  // Build the shadow stack entry at the very start of the function.
-  BasicBlock::iterator IP = F.getEntryBlock().begin();
-  IRBuilder<> AtEntry(IP->getParent(), IP);
-
-  Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr,
-                                                 "gc_frame");
-
-  while (isa<AllocaInst>(IP)) ++IP;
-  AtEntry.SetInsertPoint(IP->getParent(), IP);
-
-  // Initialize the map pointer and load the current head of the shadow stack.
-  Instruction *CurrentHead  = AtEntry.CreateLoad(Head, "gc_currhead");
-  Instruction *EntryMapPtr  = CreateGEP(Context, AtEntry, StackEntry,
-                                        0,1,"gc_frame.map");
-  AtEntry.CreateStore(FrameMap, EntryMapPtr);
-
-  // After all the allocas...
-  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
-    // For each root, find the corresponding slot in the aggregate...
-    Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
-
-    // And use it in lieu of the alloca.
-    AllocaInst *OriginalAlloca = Roots[I].second;
-    SlotPtr->takeName(OriginalAlloca);
-    OriginalAlloca->replaceAllUsesWith(SlotPtr);
-  }
-
-  // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
-  // really necessary (the collector would never see the intermediate state at
-  // runtime), but it's nicer not to push the half-initialized entry onto the
-  // shadow stack.
-  while (isa<StoreInst>(IP)) ++IP;
-  AtEntry.SetInsertPoint(IP->getParent(), IP);
-
-  // Push the entry onto the shadow stack.
-  Instruction *EntryNextPtr = CreateGEP(Context, AtEntry,
-                                        StackEntry,0,0,"gc_frame.next");
-  Instruction *NewHeadVal   = CreateGEP(Context, AtEntry, 
-                                        StackEntry, 0, "gc_newhead");
-  AtEntry.CreateStore(CurrentHead, EntryNextPtr);
-  AtEntry.CreateStore(NewHeadVal, Head);
-
-  // For each instruction that escapes...
-  EscapeEnumerator EE(F, "gc_cleanup");
-  while (IRBuilder<> *AtExit = EE.Next()) {
-    // Pop the entry from the shadow stack. Don't reuse CurrentHead from
-    // AtEntry, since that would make the value live for the entire function.
-    Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0,
-                                           "gc_frame.next");
-    Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
-                       AtExit->CreateStore(SavedHead, Head);
-  }
-
-  // Delete the original allocas (which are no longer used) and the intrinsic
-  // calls (which are no longer valid). Doing this last avoids invalidating
-  // iterators.
-  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
-    Roots[I].first->eraseFromParent();
-    Roots[I].second->eraseFromParent();
-  }
-
-  Roots.clear();
-  return true;
-}
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
new file mode 100644
index 0000000..7c0b2bb
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -0,0 +1,460 @@
+//===-- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom lowering code required by the shadow-stack GC
+// strategy.  
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "shadowstackgclowering"
+
+namespace {
+
+class ShadowStackGCLowering : public FunctionPass {
+  /// RootChain - This is the global linked-list that contains the chain of GC
+  /// roots.
+  GlobalVariable *Head;
+
+  /// StackEntryTy - Abstract type of a link in the shadow stack.
+  ///
+  StructType *StackEntryTy;
+  StructType *FrameMapTy;
+
+  /// Roots - GC roots in the current function. Each is a pair of the
+  /// intrinsic call and its corresponding alloca.
+  std::vector<std::pair<CallInst *, AllocaInst *>> Roots;
+
+public:
+  static char ID;
+  ShadowStackGCLowering();
+
+  bool doInitialization(Module &M) override;
+  bool runOnFunction(Function &F) override;
+
+private:
+  bool IsNullValue(Value *V);
+  Constant *GetFrameMap(Function &F);
+  Type *GetConcreteStackEntryType(Function &F);
+  void CollectRoots(Function &F);
+  static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
+                                      Type *Ty, Value *BasePtr, int Idx1,
+                                      const char *Name);
+  static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
+                                      Type *Ty, Value *BasePtr, int Idx1, int Idx2,
+                                      const char *Name);
+};
+}
+
+INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering",
+                      "Shadow Stack GC Lowering", false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(ShadowStackGCLowering, "shadow-stack-gc-lowering",
+                    "Shadow Stack GC Lowering", false, false)
+
+FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); }
+
+char ShadowStackGCLowering::ID = 0;
+
+ShadowStackGCLowering::ShadowStackGCLowering()
+  : FunctionPass(ID), Head(nullptr), StackEntryTy(nullptr),
+    FrameMapTy(nullptr) {
+  initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry());
+}
+
+namespace {
+/// EscapeEnumerator - This is a little algorithm to find all escape points
+/// from a function so that "finally"-style code can be inserted. In addition
+/// to finding the existing return and unwind instructions, it also (if
+/// necessary) transforms any call instructions into invokes and sends them to
+/// a landing pad.
+///
+/// It's wrapped up in a state machine using the same transform C# uses for
+/// 'yield return' enumerators, This transform allows it to be non-allocating.
+class EscapeEnumerator {
+  Function &F;
+  const char *CleanupBBName;
+
+  // State.
+  int State;
+  Function::iterator StateBB, StateE;
+  IRBuilder<> Builder;
+
+public:
+  EscapeEnumerator(Function &F, const char *N = "cleanup")
+      : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
+
+  IRBuilder<> *Next() {
+    switch (State) {
+    default:
+      return nullptr;
+
+    case 0:
+      StateBB = F.begin();
+      StateE = F.end();
+      State = 1;
+
+    case 1:
+      // Find all 'return', 'resume', and 'unwind' instructions.
+      while (StateBB != StateE) {
+        BasicBlock *CurBB = StateBB++;
+
+        // Branches and invokes do not escape, only unwind, resume, and return
+        // do.
+        TerminatorInst *TI = CurBB->getTerminator();
+        if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
+          continue;
+
+        Builder.SetInsertPoint(TI->getParent(), TI);
+        return &Builder;
+      }
+
+      State = 2;
+
+      // Find all 'call' instructions.
+      SmallVector<Instruction *, 16> Calls;
+      for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+        for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); II != EE;
+             ++II)
+          if (CallInst *CI = dyn_cast<CallInst>(II))
+            if (!CI->getCalledFunction() ||
+                !CI->getCalledFunction()->getIntrinsicID())
+              Calls.push_back(CI);
+
+      if (Calls.empty())
+        return nullptr;
+
+      // Create a cleanup block.
+      LLVMContext &C = F.getContext();
+      BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
+      Type *ExnTy =
+          StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr);
+      Constant *PersFn = F.getParent()->getOrInsertFunction(
+          "__gcc_personality_v0", FunctionType::get(Type::getInt32Ty(C), true));
+      LandingPadInst *LPad =
+          LandingPadInst::Create(ExnTy, PersFn, 1, "cleanup.lpad", CleanupBB);
+      LPad->setCleanup(true);
+      ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
+
+      // Transform the 'call' instructions into 'invoke's branching to the
+      // cleanup block. Go in reverse order to make prettier BB names.
+      SmallVector<Value *, 16> Args;
+      for (unsigned I = Calls.size(); I != 0;) {
+        CallInst *CI = cast<CallInst>(Calls[--I]);
+
+        // Split the basic block containing the function call.
+        BasicBlock *CallBB = CI->getParent();
+        BasicBlock *NewBB =
+            CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+
+        // Remove the unconditional branch inserted at the end of CallBB.
+        CallBB->getInstList().pop_back();
+        NewBB->getInstList().remove(CI);
+
+        // Create a new invoke instruction.
+        Args.clear();
+        CallSite CS(CI);
+        Args.append(CS.arg_begin(), CS.arg_end());
+
+        InvokeInst *II =
+            InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args,
+                               CI->getName(), CallBB);
+        II->setCallingConv(CI->getCallingConv());
+        II->setAttributes(CI->getAttributes());
+        CI->replaceAllUsesWith(II);
+        delete CI;
+      }
+
+      Builder.SetInsertPoint(RI->getParent(), RI);
+      return &Builder;
+    }
+  }
+};
+}
+
+
+Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
+  // doInitialization creates the abstract type of this value.
+  Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+
+  // Truncate the ShadowStackDescriptor if some metadata is null.
+  unsigned NumMeta = 0;
+  SmallVector<Constant *, 16> Metadata;
+  for (unsigned I = 0; I != Roots.size(); ++I) {
+    Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
+    if (!C->isNullValue())
+      NumMeta = I + 1;
+    Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+  }
+  Metadata.resize(NumMeta);
+
+  Type *Int32Ty = Type::getInt32Ty(F.getContext());
+
+  Constant *BaseElts[] = {
+      ConstantInt::get(Int32Ty, Roots.size(), false),
+      ConstantInt::get(Int32Ty, NumMeta, false),
+  };
+
+  Constant *DescriptorElts[] = {
+      ConstantStruct::get(FrameMapTy, BaseElts),
+      ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)};
+
+  Type *EltTys[] = {DescriptorElts[0]->getType(), DescriptorElts[1]->getType()};
+  StructType *STy = StructType::create(EltTys, "gc_map." + utostr(NumMeta));
+
+  Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
+
+  // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+  //        that, short of multithreaded LLVM, it should be safe; all that is
+  //        necessary is that a simple Module::iterator loop not be invalidated.
+  //        Appending to the GlobalVariable list is safe in that sense.
+  //
+  //        All of the output passes emit globals last. The ExecutionEngine
+  //        explicitly supports adding globals to the module after
+  //        initialization.
+  //
+  //        Still, if it isn't deemed acceptable, then this transformation needs
+  //        to be a ModulePass (which means it cannot be in the 'llc' pipeline
+  //        (which uses a FunctionPassManager (which segfaults (not asserts) if
+  //        provided a ModulePass))).
+  Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
+                                    GlobalVariable::InternalLinkage, FrameMap,
+                                    "__gc_" + F.getName());
+
+  Constant *GEPIndices[2] = {
+      ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+      ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
+  return ConstantExpr::getGetElementPtr(FrameMap->getType(), GV, GEPIndices);
+}
+
+Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
+  // doInitialization creates the generic version of this type.
+  std::vector<Type *> EltTys;
+  EltTys.push_back(StackEntryTy);
+  for (size_t I = 0; I != Roots.size(); I++)
+    EltTys.push_back(Roots[I].second->getAllocatedType());
+
+  return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str());
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGCLowering::doInitialization(Module &M) {
+  bool Active = false;
+  for (Function &F : M) {
+    if (F.hasGC() && F.getGC() == std::string("shadow-stack")) {
+      Active = true;
+      break;
+    }
+  }
+  if (!Active)
+    return false;
+  
+  // struct FrameMap {
+  //   int32_t NumRoots; // Number of roots in stack frame.
+  //   int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots.
+  //   void *Meta[];     // May be absent for roots without metadata.
+  // };
+  std::vector<Type *> EltTys;
+  // 32 bits is ok up to a 32GB stack frame. :)
+  EltTys.push_back(Type::getInt32Ty(M.getContext()));
+  // Specifies length of variable length array.
+  EltTys.push_back(Type::getInt32Ty(M.getContext()));
+  FrameMapTy = StructType::create(EltTys, "gc_map");
+  PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+  // struct StackEntry {
+  //   ShadowStackEntry *Next; // Caller's stack entry.
+  //   FrameMap *Map;          // Pointer to constant FrameMap.
+  //   void *Roots[];          // Stack roots (in-place array, so we pretend).
+  // };
+
+  StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
+
+  EltTys.clear();
+  EltTys.push_back(PointerType::getUnqual(StackEntryTy));
+  EltTys.push_back(FrameMapPtrTy);
+  StackEntryTy->setBody(EltTys);
+  PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+
+  // Get the root chain if it already exists.
+  Head = M.getGlobalVariable("llvm_gc_root_chain");
+  if (!Head) {
+    // If the root chain does not exist, insert a new one with linkonce
+    // linkage!
+    Head = new GlobalVariable(
+        M, StackEntryPtrTy, false, GlobalValue::LinkOnceAnyLinkage,
+        Constant::getNullValue(StackEntryPtrTy), "llvm_gc_root_chain");
+  } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+    Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+    Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+  }
+
+  return true;
+}
+
+bool ShadowStackGCLowering::IsNullValue(Value *V) {
+  if (Constant *C = dyn_cast<Constant>(V))
+    return C->isNullValue();
+  return false;
+}
+
+void ShadowStackGCLowering::CollectRoots(Function &F) {
+  // FIXME: Account for original alignment. Could fragment the root array.
+  //   Approach 1: Null initialize empty slots at runtime. Yuck.
+  //   Approach 2: Emit a map of the array instead of just a count.
+
+  assert(Roots.empty() && "Not cleaned up?");
+
+  SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots;
+
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+        if (Function *F = CI->getCalledFunction())
+          if (F->getIntrinsicID() == Intrinsic::gcroot) {
+            std::pair<CallInst *, AllocaInst *> Pair = std::make_pair(
+                CI,
+                cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+            if (IsNullValue(CI->getArgOperand(1)))
+              Roots.push_back(Pair);
+            else
+              MetaRoots.push_back(Pair);
+          }
+
+  // Number roots with metadata (usually empty) at the beginning, so that the
+  // FrameMap::Meta array can be elided.
+  Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
+                                                    IRBuilder<> &B, Type *Ty,
+                                                    Value *BasePtr, int Idx,
+                                                    int Idx2,
+                                                    const char *Name) {
+  Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
+                      ConstantInt::get(Type::getInt32Ty(Context), Idx),
+                      ConstantInt::get(Type::getInt32Ty(Context), Idx2)};
+  Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
+
+  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+  return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
+                                            IRBuilder<> &B, Type *Ty, Value *BasePtr,
+                                            int Idx, const char *Name) {
+  Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
+                      ConstantInt::get(Type::getInt32Ty(Context), Idx)};
+  Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
+
+  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+  return dyn_cast<GetElementPtrInst>(Val);
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGCLowering::runOnFunction(Function &F) {
+  // Quick exit for functions that do not use the shadow stack GC.
+  if (!F.hasGC() ||
+      F.getGC() != std::string("shadow-stack"))
+    return false;
+  
+  LLVMContext &Context = F.getContext();
+
+  // Find calls to llvm.gcroot.
+  CollectRoots(F);
+
+  // If there are no roots in this function, then there is no need to add a
+  // stack map entry for it.
+  if (Roots.empty())
+    return false;
+
+  // Build the constant map and figure the type of the shadow stack entry.
+  Value *FrameMap = GetFrameMap(F);
+  Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+  // Build the shadow stack entry at the very start of the function.
+  BasicBlock::iterator IP = F.getEntryBlock().begin();
+  IRBuilder<> AtEntry(IP->getParent(), IP);
+
+  Instruction *StackEntry =
+      AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame");
+
+  while (isa<AllocaInst>(IP))
+    ++IP;
+  AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+  // Initialize the map pointer and load the current head of the shadow stack.
+  Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+  Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+                                       StackEntry, 0, 1, "gc_frame.map");
+  AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+  // After all the allocas...
+  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+    // For each root, find the corresponding slot in the aggregate...
+    Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+                               StackEntry, 1 + I, "gc_root");
+
+    // And use it in lieu of the alloca.
+    AllocaInst *OriginalAlloca = Roots[I].second;
+    SlotPtr->takeName(OriginalAlloca);
+    OriginalAlloca->replaceAllUsesWith(SlotPtr);
+  }
+
+  // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+  // really necessary (the collector would never see the intermediate state at
+  // runtime), but it's nicer not to push the half-initialized entry onto the
+  // shadow stack.
+  while (isa<StoreInst>(IP))
+    ++IP;
+  AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+  // Push the entry onto the shadow stack.
+  Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+                                        StackEntry, 0, 0, "gc_frame.next");
+  Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+                                      StackEntry, 0, "gc_newhead");
+  AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+  AtEntry.CreateStore(NewHeadVal, Head);
+
+  // For each instruction that escapes...
+  EscapeEnumerator EE(F, "gc_cleanup");
+  while (IRBuilder<> *AtExit = EE.Next()) {
+    // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+    // AtEntry, since that would make the value live for the entire function.
+    Instruction *EntryNextPtr2 =
+        CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0,
+                  "gc_frame.next");
+    Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+    AtExit->CreateStore(SavedHead, Head);
+  }
+
+  // Delete the original allocas (which are no longer used) and the intrinsic
+  // calls (which are no longer valid). Doing this last avoids invalidating
+  // iterators.
+  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+    Roots[I].first->eraseFromParent();
+    Roots[I].second->eraseFromParent();
+  }
+
+  Roots.clear();
+  return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
new file mode 100644
index 0000000..4463cc7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -0,0 +1,388 @@
+//===-- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for safe point where the prologue and epilogue can be
+// inserted.
+// The safe point for the prologue (resp. epilogue) is called Save
+// (resp. Restore).
+// A point is safe for prologue (resp. epilogue) if and only if
+// it 1) dominates (resp. post-dominates) all the frame related operations and
+// between 2) two executions of the Save (resp. Restore) point there is an
+// execution of the Restore (resp. Save) point.
+//
+// For instance, the following points are safe:
+// for (int i = 0; i < 10; ++i) {
+//   Save
+//   ...
+//   Restore
+// }
+// Indeed, the execution looks like Save -> Restore -> Save -> Restore ...
+// And the following points are not:
+// for (int i = 0; i < 10; ++i) {
+//   Save
+//   ...
+// }
+// for (int i = 0; i < 10; ++i) {
+//   ...
+//   Restore
+// }
+// Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore.
+//
+// This pass also ensures that the safe points are 3) cheaper than the regular
+// entry and exits blocks.
+//
+// Property #1 is ensured via the use of MachineDominatorTree and
+// MachinePostDominatorTree.
+// Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both
+// points must be in the same loop.
+// Property #3 is ensured via the MachineBlockFrequencyInfo.
+//
+// If this pass found points matching all this properties, then
+// MachineFrameInfo is updated this that information.
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/Statistic.h"
+// To check for profitability.
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+// For property #1 for Save.
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+// To record the result of the analysis.
+#include "llvm/CodeGen/MachineFrameInfo.h"
+// For property #2.
+#include "llvm/CodeGen/MachineLoopInfo.h"
+// For property #1 for Restore.
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
+// To know about callee-saved.
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/Support/Debug.h"
+// To query the target about frame lowering.
+#include "llvm/Target/TargetFrameLowering.h"
+// To know about frame setup operation.
+#include "llvm/Target/TargetInstrInfo.h"
+// To access TargetInstrInfo.
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define DEBUG_TYPE "shrink-wrap"
+
+using namespace llvm;
+
+STATISTIC(NumFunc, "Number of functions");
+STATISTIC(NumCandidates, "Number of shrink-wrapping candidates");
+STATISTIC(NumCandidatesDropped,
+          "Number of shrink-wrapping candidates dropped because of frequency");
+
+namespace {
+/// \brief Class to determine where the safe point to insert the
+/// prologue and epilogue are.
+/// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the
+/// shrink-wrapping term for prologue/epilogue placement, this pass
+/// does not rely on expensive data-flow analysis. Instead we use the
+/// dominance properties and loop information to decide which point
+/// are safe for such insertion.
+class ShrinkWrap : public MachineFunctionPass {
+  /// Hold callee-saved information.
+  RegisterClassInfo RCI;
+  MachineDominatorTree *MDT;
+  MachinePostDominatorTree *MPDT;
+  /// Current safe point found for the prologue.
+  /// The prologue will be inserted before the first instruction
+  /// in this basic block.
+  MachineBasicBlock *Save;
+  /// Current safe point found for the epilogue.
+  /// The epilogue will be inserted before the first terminator instruction
+  /// in this basic block.
+  MachineBasicBlock *Restore;
+  /// Hold the information of the basic block frequency.
+  /// Use to check the profitability of the new points.
+  MachineBlockFrequencyInfo *MBFI;
+  /// Hold the loop information. Used to determine if Save and Restore
+  /// are in the same loop.
+  MachineLoopInfo *MLI;
+  /// Frequency of the Entry block.
+  uint64_t EntryFreq;
+  /// Current opcode for frame setup.
+  unsigned FrameSetupOpcode;
+  /// Current opcode for frame destroy.
+  unsigned FrameDestroyOpcode;
+  /// Entry block.
+  const MachineBasicBlock *Entry;
+
+  /// \brief Check if \p MI uses or defines a callee-saved register or
+  /// a frame index. If this is the case, this means \p MI must happen
+  /// after Save and before Restore.
+  bool useOrDefCSROrFI(const MachineInstr &MI) const;
+
+  /// \brief Update the Save and Restore points such that \p MBB is in
+  /// the region that is dominated by Save and post-dominated by Restore
+  /// and Save and Restore still match the safe point definition.
+  /// Such point may not exist and Save and/or Restore may be null after
+  /// this call.
+  void updateSaveRestorePoints(MachineBasicBlock &MBB);
+
+  /// \brief Initialize the pass for \p MF.
+  void init(MachineFunction &MF) {
+    RCI.runOnMachineFunction(MF);
+    MDT = &getAnalysis<MachineDominatorTree>();
+    MPDT = &getAnalysis<MachinePostDominatorTree>();
+    Save = nullptr;
+    Restore = nullptr;
+    MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+    MLI = &getAnalysis<MachineLoopInfo>();
+    EntryFreq = MBFI->getEntryFreq();
+    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+    FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+    FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+    Entry = &MF.front();
+
+    ++NumFunc;
+  }
+
+  /// Check whether or not Save and Restore points are still interesting for
+  /// shrink-wrapping.
+  bool ArePointsInteresting() const { return Save != Entry && Save && Restore; }
+
+public:
+  static char ID;
+
+  ShrinkWrap() : MachineFunctionPass(ID) {
+    initializeShrinkWrapPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    AU.addRequired<MachineBlockFrequencyInfo>();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addRequired<MachinePostDominatorTree>();
+    AU.addRequired<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  const char *getPassName() const override {
+    return "Shrink Wrapping analysis";
+  }
+
+  /// \brief Perform the shrink-wrapping analysis and update
+  /// the MachineFrameInfo attached to \p MF with the results.
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End anonymous namespace.
+
+char ShrinkWrap::ID = 0;
+char &llvm::ShrinkWrapID = ShrinkWrap::ID;
+
+INITIALIZE_PASS_BEGIN(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false)
+
+bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI) const {
+  if (MI.getOpcode() == FrameSetupOpcode ||
+      MI.getOpcode() == FrameDestroyOpcode) {
+    DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
+    return true;
+  }
+  for (const MachineOperand &MO : MI.operands()) {
+    bool UseCSR = false;
+    if (MO.isReg()) {
+      unsigned PhysReg = MO.getReg();
+      if (!PhysReg)
+        continue;
+      assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+             "Unallocated register?!");
+      UseCSR = RCI.getLastCalleeSavedAlias(PhysReg);
+    }
+    // TODO: Handle regmask more accurately.
+    // For now, be conservative about them.
+    if (UseCSR || MO.isFI() || MO.isRegMask()) {
+      DEBUG(dbgs() << "Use or define CSR(" << UseCSR << ") or FI(" << MO.isFI()
+                   << "): " << MI << '\n');
+      return true;
+    }
+  }
+  return false;
+}
+
+/// \brief Helper function to find the immediate (post) dominator.
+template <typename ListOfBBs, typename DominanceAnalysis>
+MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
+                            DominanceAnalysis &Dom) {
+  MachineBasicBlock *IDom = &Block;
+  for (MachineBasicBlock *BB : BBs) {
+    IDom = Dom.findNearestCommonDominator(IDom, BB);
+    if (!IDom)
+      break;
+  }
+  return IDom;
+}
+
+void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
+  // Get rid of the easy cases first.
+  if (!Save)
+    Save = &MBB;
+  else
+    Save = MDT->findNearestCommonDominator(Save, &MBB);
+
+  if (!Save) {
+    DEBUG(dbgs() << "Found a block that is not reachable from Entry\n");
+    return;
+  }
+
+  if (!Restore)
+    Restore = &MBB;
+  else
+    Restore = MPDT->findNearestCommonDominator(Restore, &MBB);
+
+  // Make sure we would be able to insert the restore code before the
+  // terminator.
+  if (Restore == &MBB) {
+    for (const MachineInstr &Terminator : MBB.terminators()) {
+      if (!useOrDefCSROrFI(Terminator))
+        continue;
+      // One of the terminator needs to happen before the restore point.
+      if (MBB.succ_empty()) {
+        Restore = nullptr;
+        break;
+      }
+      // Look for a restore point that post-dominates all the successors.
+      // The immediate post-dominator is what we are looking for.
+      Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+      break;
+    }
+  }
+
+  if (!Restore) {
+    DEBUG(dbgs() << "Restore point needs to be spanned on several blocks\n");
+    return;
+  }
+
+  // Make sure Save and Restore are suitable for shrink-wrapping:
+  // 1. all path from Save needs to lead to Restore before exiting.
+  // 2. all path to Restore needs to go through Save from Entry.
+  // We achieve that by making sure that:
+  // A. Save dominates Restore.
+  // B. Restore post-dominates Save.
+  // C. Save and Restore are in the same loop.
+  bool SaveDominatesRestore = false;
+  bool RestorePostDominatesSave = false;
+  while (Save && Restore &&
+         (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) ||
+          !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) ||
+          MLI->getLoopFor(Save) != MLI->getLoopFor(Restore))) {
+    // Fix (A).
+    if (!SaveDominatesRestore) {
+      Save = MDT->findNearestCommonDominator(Save, Restore);
+      continue;
+    }
+    // Fix (B).
+    if (!RestorePostDominatesSave)
+      Restore = MPDT->findNearestCommonDominator(Restore, Save);
+
+    // Fix (C).
+    if (Save && Restore && Save != Restore &&
+        MLI->getLoopFor(Save) != MLI->getLoopFor(Restore)) {
+      if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore))
+        // Push Save outside of this loop.
+        Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
+      else
+        // Push Restore outside of this loop.
+        Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+    }
+  }
+}
+
+bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
+  if (MF.empty())
+    return false;
+  DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+  init(MF);
+
+  for (MachineBasicBlock &MBB : MF) {
+    DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName()
+                 << '\n');
+
+    for (const MachineInstr &MI : MBB) {
+      if (!useOrDefCSROrFI(MI))
+        continue;
+      // Save (resp. restore) point must dominate (resp. post dominate)
+      // MI. Look for the proper basic block for those.
+      updateSaveRestorePoints(MBB);
+      // If we are at a point where we cannot improve the placement of
+      // save/restore instructions, just give up.
+      if (!ArePointsInteresting()) {
+        DEBUG(dbgs() << "No Shrink wrap candidate found\n");
+        return false;
+      }
+      // No need to look for other instructions, this basic block
+      // will already be part of the handled region.
+      break;
+    }
+  }
+  if (!ArePointsInteresting()) {
+    // If the points are not interesting at this point, then they must be null
+    // because it means we did not encounter any frame/CSR related code.
+    // Otherwise, we would have returned from the previous loop.
+    assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!");
+    DEBUG(dbgs() << "Nothing to shrink-wrap\n");
+    return false;
+  }
+
+  DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
+               << '\n');
+
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  do {
+    DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
+                 << Save->getNumber() << ' ' << Save->getName() << ' '
+                 << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: "
+                 << Restore->getNumber() << ' ' << Restore->getName() << ' '
+                 << MBFI->getBlockFreq(Restore).getFrequency() << '\n');
+
+    bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
+    if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) &&
+         EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) &&
+        ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) &&
+         TFI->canUseAsEpilogue(*Restore)))
+      break;
+    DEBUG(dbgs() << "New points are too expensive or invalid for the target\n");
+    MachineBasicBlock *NewBB;
+    if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) {
+      Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
+      if (!Save)
+        break;
+      NewBB = Save;
+    } else {
+      // Restore is expensive.
+      Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+      if (!Restore)
+        break;
+      NewBB = Restore;
+    }
+    updateSaveRestorePoints(*NewBB);
+  } while (Save && Restore);
+
+  if (!ArePointsInteresting()) {
+    ++NumCandidatesDropped;
+    return false;
+  }
+
+  DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber()
+               << ' ' << Save->getName() << "\nRestore: "
+               << Restore->getNumber() << ' ' << Restore->getName() << '\n');
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setSavePoint(Save);
+  MFI->setRestorePoint(Restore);
+  ++NumCandidates;
+  return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 7fd8107..42d277e 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -46,6 +46,8 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges");
 namespace {
 class SjLjEHPrepare : public FunctionPass {
   const TargetMachine *TM;
+  Type *doubleUnderDataTy;
+  Type *doubleUnderJBufTy;
   Type *FunctionContextTy;
   Constant *RegisterFn;
   Constant *UnregisterFn;
@@ -93,12 +95,14 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
   // builtin_setjmp uses a five word jbuf
   Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
   Type *Int32Ty = Type::getInt32Ty(M.getContext());
-  FunctionContextTy = StructType::get(VoidPtrTy,                  // __prev
-                                      Int32Ty,                    // call_site
-                                      ArrayType::get(Int32Ty, 4), // __data
-                                      VoidPtrTy, // __personality
-                                      VoidPtrTy, // __lsda
-                                      ArrayType::get(VoidPtrTy, 5), // __jbuf
+  doubleUnderDataTy = ArrayType::get(Int32Ty, 4);
+  doubleUnderJBufTy = ArrayType::get(VoidPtrTy, 5);
+  FunctionContextTy = StructType::get(VoidPtrTy,         // __prev
+                                      Int32Ty,           // call_site
+                                      doubleUnderDataTy, // __data
+                                      VoidPtrTy,         // __personality
+                                      VoidPtrTy,         // __lsda
+                                      doubleUnderJBufTy, // __jbuf
                                       nullptr);
   RegisterFn = M.getOrInsertFunction(
       "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
@@ -128,7 +132,8 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
   Value *Zero = ConstantInt::get(Int32Ty, 0);
   Value *One = ConstantInt::get(Int32Ty, 1);
   Value *Idxs[2] = { Zero, One };
-  Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site");
+  Value *CallSite =
+      Builder.CreateGEP(FunctionContextTy, FuncCtx, Idxs, "call_site");
 
   // Insert a store of the call-site number
   ConstantInt *CallSiteNoC =
@@ -191,7 +196,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
   // Create an alloca for the incoming jump buffer ptr and the new jump buffer
   // that needs to be restored on all exits from the function. This is an alloca
   // because the value needs to be added to the global context list.
-  const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
+  const TargetLowering *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
   unsigned Align =
       TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
   FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
@@ -203,16 +208,17 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
     IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
 
     // Reference the __data field.
-    Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data");
+    Value *FCData =
+        Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data");
 
     // The exception values come back in context->__data[0].
-    Value *ExceptionAddr =
-        Builder.CreateConstGEP2_32(FCData, 0, 0, "exception_gep");
+    Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
+                                                      0, 0, "exception_gep");
     Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
     ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
 
-    Value *SelectorAddr =
-        Builder.CreateConstGEP2_32(FCData, 0, 1, "exn_selector_gep");
+    Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
+                                                     0, 1, "exn_selector_gep");
     Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
 
     substituteLPadValues(LPI, ExnVal, SelVal);
@@ -222,15 +228,16 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
   IRBuilder<> Builder(EntryBB->getTerminator());
   if (!PersonalityFn)
     PersonalityFn = LPads[0]->getPersonalityFn();
-  Value *PersonalityFieldPtr =
-      Builder.CreateConstGEP2_32(FuncCtx, 0, 3, "pers_fn_gep");
+  Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(
+      FunctionContextTy, FuncCtx, 0, 3, "pers_fn_gep");
   Builder.CreateStore(
       Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()),
       PersonalityFieldPtr, /*isVolatile=*/true);
 
   // LSDA address
-  Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr");
-  Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep");
+  Value *LSDA = Builder.CreateCall(LSDAAddrFn, {}, "lsda_addr");
+  Value *LSDAFieldPtr =
+      Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 4, "lsda_gep");
   Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true);
 
   return FuncCtx;
@@ -399,18 +406,21 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
   IRBuilder<> Builder(EntryBB->getTerminator());
 
   // Get a reference to the jump buffer.
-  Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep");
+  Value *JBufPtr =
+      Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 5, "jbuf_gep");
 
   // Save the frame pointer.
-  Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep");
+  Value *FramePtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 0,
+                                               "jbuf_fp_gep");
 
   Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp");
   Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true);
 
   // Save the stack pointer.
-  Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep");
+  Value *StackPtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 2,
+                                               "jbuf_sp_gep");
 
-  Val = Builder.CreateCall(StackAddrFn, "sp");
+  Val = Builder.CreateCall(StackAddrFn, {}, "sp");
   Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
 
   // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
index d46621d..025ae70 100644
--- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -127,7 +127,7 @@ void SlotIndexes::renumberIndexes() {
 void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
   // Number indexes with half the default spacing so we can catch up quickly.
   const unsigned Space = SlotIndex::InstrDist/2;
-  assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM");
+  static_assert((Space & 3) == 0, "InstrDist must be a multiple of 2*NUM");
 
   IndexList::iterator startItr = std::prev(curItr);
   unsigned index = startItr->getIndex();
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h
index 622361e..03dd58d 100644
--- a/contrib/llvm/lib/CodeGen/SpillPlacement.h
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h
@@ -70,7 +70,7 @@ public:
   static char ID; // Pass identification, replacement for typeid.
 
   SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {}
-  ~SpillPlacement() { releaseMemory(); }
+  ~SpillPlacement() override { releaseMemory(); }
 
   /// BorderConstraint - A basic block has separate constraints for entry and
   /// exit.
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
index 4c8801a..dab1dfe 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -623,8 +623,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
   AssignI.setMap(RegAssign);
 
   for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
-    VNInfo *VNI = Copies[i];
-    SlotIndex Def = VNI->def;
+    SlotIndex Def = Copies[i]->def;
     MachineInstr *MI = LIS.getInstructionFromIndex(Def);
     assert(MI && "No instruction for back-copy");
 
@@ -635,13 +634,12 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
     while (!AtBegin && (--MBBI)->isDebugValue());
 
     DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);
-    LI->removeValNo(VNI);
+    LIS.removeVRegDefAt(*LI, Def);
     LIS.RemoveMachineInstrFromMaps(MI);
     MI->eraseFromParent();
 
-    // Adjust RegAssign if a register assignment is killed at VNI->def.  We
-    // want to avoid calculating the live range of the source register if
-    // possible.
+    // Adjust RegAssign if a register assignment is killed at Def. We want to
+    // avoid calculating the live range of the source register if possible.
     AssignI.find(Def.getPrevSlot());
     if (!AssignI.valid() || AssignI.start() >= Def)
       continue;
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
index 2e60c14..a062763 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -419,7 +419,7 @@ public:
   ///              There may be extra indices created by dead code elimination.
   void finish(SmallVectorImpl<unsigned> *LRMap = nullptr);
 
-  /// dump - print the current interval maping to dbgs().
+  /// dump - print the current interval mapping to dbgs().
   void dump() const;
 
   // ===--- High level methods ---===
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
index faf94b6..3541b33 100644
--- a/contrib/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -48,7 +48,6 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -364,7 +363,7 @@ void StackColoring::calculateLocalLiveness() {
       }
     }
 
-    BBSet = NextBBSet;
+    BBSet = std::move(NextBBSet);
   }// while changed.
 }
 
@@ -464,7 +463,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
       continue;
     if (SlotRemap.count(VI.Slot)) {
       DEBUG(dbgs() << "Remapping debug info for ["
-                   << DIVariable(VI.Var).getName() << "].\n");
+                   << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
       VI.Slot = SlotRemap[VI.Slot];
       FixedDbg++;
     }
diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 767f43a..d88be57 100644
--- a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -14,24 +14,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/StackMapLivenessAnalysis.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
 #define DEBUG_TYPE "stackmaps"
 
-namespace llvm {
-cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness",
-  cl::Hidden, cl::init(true),
-  cl::desc("Enable PatchPoint Liveness Analysis Pass"));
-}
+static cl::opt<bool> EnablePatchPointLiveness(
+    "enable-patchpoint-liveness", cl::Hidden, cl::init(true),
+    cl::desc("Enable PatchPoint Liveness Analysis Pass"));
 
 STATISTIC(NumStackMapFuncVisited, "Number of functions visited");
 STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped");
@@ -39,6 +39,46 @@ STATISTIC(NumBBsVisited,          "Number of basic blocks visited");
 STATISTIC(NumBBsHaveNoStackmap,   "Number of basic blocks with no stackmap");
 STATISTIC(NumStackMaps,           "Number of StackMaps visited");
 
+namespace {
+/// \brief This pass calculates the liveness information for each basic block in
+/// a function and attaches the register live-out information to a patchpoint
+/// intrinsic if present.
+///
+/// This pass can be disabled via the -enable-patchpoint-liveness=false flag.
+/// The pass skips functions that don't have any patchpoint intrinsics. The
+/// information provided by this pass is optional and not required by the
+/// aformentioned intrinsic to function.
+class StackMapLiveness : public MachineFunctionPass {
+  MachineFunction *MF;
+  const TargetRegisterInfo *TRI;
+  LivePhysRegs LiveRegs;
+
+public:
+  static char ID;
+
+  /// \brief Default construct and initialize the pass.
+  StackMapLiveness();
+
+  /// \brief Tell the pass manager which passes we depend on and what
+  /// information we preserve.
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// \brief Calculate the liveness information for the given machine function.
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+  /// \brief Performs the actual liveness calculation for the function.
+  bool calculateLiveness();
+
+  /// \brief Add the current register live set to the instruction.
+  void addLiveOutSetToMI(MachineInstr &MI);
+
+  /// \brief Create a register mask and initialize it with the registers from
+  /// the register live set.
+  uint32_t *createRegisterMask() const;
+};
+} // namespace
+
 char StackMapLiveness::ID = 0;
 char &llvm::StackMapLivenessID = StackMapLiveness::ID;
 INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness",
@@ -60,18 +100,18 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 /// Calculate the liveness information for the given machine function.
-bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) {
+bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
   if (!EnablePatchPointLiveness)
     return false;
 
-  DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
-               << _MF.getName() << " **********\n");
-  MF = &_MF;
-  TRI = MF->getSubtarget().getRegisterInfo();
+  DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName()
+               << " **********\n");
+  this->MF = &MF;
+  TRI = MF.getSubtarget().getRegisterInfo();
   ++NumStackMapFuncVisited;
 
   // Skip this function if there are no patchpoints to process.
-  if (!MF->getFrameInfo()->hasPatchPoint()) {
+  if (!MF.getFrameInfo()->hasPatchPoint()) {
     ++NumStackMapFuncSkipped;
     return false;
   }
diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp
index f1d1160..ffe59c1 100644
--- a/contrib/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp
@@ -19,8 +19,6 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOpcodes.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -76,21 +74,32 @@ StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
     llvm_unreachable("Unsupported stackmap version!");
 }
 
+/// Go up the super-register chain until we hit a valid dwarf register number.
+static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
+  int RegNo = TRI->getDwarfRegNum(Reg, false);
+  for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR)
+    RegNo = TRI->getDwarfRegNum(*SR, false);
+
+  assert(RegNo >= 0 && "Invalid Dwarf register number.");
+  return (unsigned) RegNo;
+}
+
 MachineInstr::const_mop_iterator
 StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
                         MachineInstr::const_mop_iterator MOE,
                         LocationVec &Locs, LiveOutVec &LiveOuts) const {
+  const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
   if (MOI->isImm()) {
     switch (MOI->getImm()) {
     default: llvm_unreachable("Unrecognized operand type.");
     case StackMaps::DirectMemRefOp: {
-      unsigned Size =
-          AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits();
+      unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits();
       assert((Size % 8) == 0 && "Need pointer size in bytes.");
       Size /= 8;
       unsigned Reg = (++MOI)->getReg();
       int64_t Imm = (++MOI)->getImm();
-      Locs.push_back(Location(StackMaps::Location::Direct, Size, Reg, Imm));
+      Locs.push_back(Location(StackMaps::Location::Direct, Size,
+                              getDwarfRegNum(Reg, TRI), Imm));
       break;
     }
     case StackMaps::IndirectMemRefOp: {
@@ -98,7 +107,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
       assert(Size > 0 && "Need a valid size for indirect memory locations.");
       unsigned Reg = (++MOI)->getReg();
       int64_t Imm = (++MOI)->getImm();
-      Locs.push_back(Location(StackMaps::Location::Indirect, Size, Reg, Imm));
+      Locs.push_back(Location(StackMaps::Location::Indirect, Size,
+                              getDwarfRegNum(Reg, TRI), Imm));
       break;
     }
     case StackMaps::ConstantOp: {
@@ -123,12 +133,18 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
 
     assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) &&
            "Virtreg operands should have been rewritten before now.");
-    const TargetRegisterClass *RC =
-        AP.TM.getSubtargetImpl()->getRegisterInfo()->getMinimalPhysRegClass(
-            MOI->getReg());
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg());
     assert(!MOI->getSubReg() && "Physical subreg still around.");
+
+    unsigned Offset = 0;
+    unsigned RegNo = getDwarfRegNum(MOI->getReg(), TRI);
+    unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false);
+    unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, MOI->getReg());
+    if (SubRegIdx)
+      Offset = TRI->getSubRegIdxOffset(SubRegIdx);
+
     Locs.push_back(
-      Location(Location::Register, RC->getSize(), MOI->getReg(), 0));
+      Location(Location::Register, RC->getSize(), RegNo, Offset));
     return ++MOI;
   }
 
@@ -138,14 +154,74 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
   return ++MOI;
 }
 
-/// Go up the super-register chain until we hit a valid dwarf register number.
-static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
-  int RegNo = TRI->getDwarfRegNum(Reg, false);
-  for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR)
-    RegNo = TRI->getDwarfRegNum(*SR, false);
+void StackMaps::print(raw_ostream &OS) {
+  const TargetRegisterInfo *TRI =
+      AP.MF ? AP.MF->getSubtarget().getRegisterInfo() : nullptr;
+  OS << WSMP << "callsites:\n";
+  for (const auto &CSI : CSInfos) {
+    const LocationVec &CSLocs = CSI.Locations;
+    const LiveOutVec &LiveOuts = CSI.LiveOuts;
 
-  assert(RegNo >= 0 && "Invalid Dwarf register number.");
-  return (unsigned) RegNo;
+    OS << WSMP << "callsite " << CSI.ID << "\n";
+    OS << WSMP << "  has " << CSLocs.size() << " locations\n";
+
+    unsigned OperIdx = 0;
+    for (const auto &Loc : CSLocs) {
+      OS << WSMP << "  Loc " << OperIdx << ": ";
+      switch (Loc.LocType) {
+      case Location::Unprocessed:
+        OS << "<Unprocessed operand>";
+        break;
+      case Location::Register:
+        OS << "Register ";
+	if (TRI)
+	  OS << TRI->getName(Loc.Reg);
+	else
+	  OS << Loc.Reg;
+        break;
+      case Location::Direct:
+        OS << "Direct ";
+        if (TRI)
+          OS << TRI->getName(Loc.Reg);
+        else
+          OS << Loc.Reg;
+        if (Loc.Offset)
+          OS << " + " << Loc.Offset;
+        break;
+      case Location::Indirect:
+        OS << "Indirect ";
+        if (TRI)
+          OS << TRI->getName(Loc.Reg);
+        else
+          OS << Loc.Reg;
+        OS << "+" << Loc.Offset;
+        break;
+      case Location::Constant:
+        OS << "Constant " << Loc.Offset;
+        break;
+      case Location::ConstantIndex:
+        OS << "Constant Index " << Loc.Offset;
+        break;
+      }
+      OS << "     [encoding: .byte " << Loc.LocType << ", .byte " << Loc.Size
+         << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n";
+      OperIdx++;
+    }
+
+    OS << WSMP << "  has " << LiveOuts.size() << " live-out registers\n";
+
+    OperIdx = 0;
+    for (const auto &LO : LiveOuts) {
+      OS << WSMP << "  LO " << OperIdx << ": ";
+      if (TRI)
+        OS << TRI->getName(LO.Reg);
+      else
+        OS << LO.Reg;
+      OS << "      [encoding: .short " << LO.RegNo << ", .byte 0, .byte "
+         << LO.Size << "]\n";
+      OperIdx++;
+    }
+  }
 }
 
 /// Create a live-out register record for the given register Reg.
@@ -161,7 +237,7 @@ StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const {
 StackMaps::LiveOutVec
 StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
   assert(Mask && "No register mask specified");
-  const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
   LiveOutVec LiveOuts;
 
   // Create a LiveOutReg for each bit that is set in the register mask.
@@ -197,9 +273,9 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
                                     MachineInstr::const_mop_iterator MOE,
                                     bool recordResult) {
 
-  MCContext &OutContext = AP.OutStreamer.getContext();
-  MCSymbol *MILabel = OutContext.CreateTempSymbol();
-  AP.OutStreamer.EmitLabel(MILabel);
+  MCContext &OutContext = AP.OutStreamer->getContext();
+  MCSymbol *MILabel = OutContext.createTempSymbol();
+  AP.OutStreamer->EmitLabel(MILabel);
 
   LocationVec Locations;
   LiveOutVec LiveOuts;
@@ -294,9 +370,8 @@ void StackMaps::recordStatepoint(const MachineInstr &MI) {
   // Record all the deopt and gc operands (they're contiguous and run from the
   // initial index to the end of the operand list)
   const unsigned StartIdx = opers.getVarIdx();
-  recordStackMapOpers(MI, 0xABCDEF00,
-                      MI.operands_begin() + StartIdx, MI.operands_end(),
-                      false);
+  recordStackMapOpers(MI, opers.getID(), MI.operands_begin() + StartIdx,
+                      MI.operands_end(), false);
 }
 
 /// Emit the stackmap header.
@@ -384,16 +459,13 @@ void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {
 ///   0x3, Indirect, [Reg + Offset]      (spilled value)
 ///   0x4, Constant, Offset              (small constant)
 ///   0x5, ConstIndex, Constants[Offset] (large constant)
-void StackMaps::emitCallsiteEntries(MCStreamer &OS,
-                                    const TargetRegisterInfo *TRI) {
+void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
+  DEBUG(print(dbgs()));
   // Callsite entries.
-  DEBUG(dbgs() << WSMP << "callsites:\n");
   for (const auto &CSI : CSInfos) {
     const LocationVec &CSLocs = CSI.Locations;
     const LiveOutVec &LiveOuts = CSI.LiveOuts;
 
-    DEBUG(dbgs() << WSMP << "callsite " << CSI.ID << "\n");
-
     // Verify stack map entry. It's better to communicate a problem to the
     // runtime than crash in case of in-process compilation. Currently, we do
     // simple overflow checks, but we may eventually communicate other
@@ -414,83 +486,20 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS,
 
     // Reserved for flags.
     OS.EmitIntValue(0, 2);
-
-    DEBUG(dbgs() << WSMP << "  has " << CSLocs.size() << " locations\n");
-
     OS.EmitIntValue(CSLocs.size(), 2);
 
-    unsigned OperIdx = 0;
     for (const auto &Loc : CSLocs) {
-      unsigned RegNo = 0;
-      int Offset = Loc.Offset;
-      if(Loc.Reg) {
-        RegNo = getDwarfRegNum(Loc.Reg, TRI);
-
-        // If this is a register location, put the subregister byte offset in
-        // the location offset.
-        if (Loc.LocType == Location::Register) {
-          assert(!Loc.Offset && "Register location should have zero offset");
-          unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false);
-          unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, Loc.Reg);
-          if (SubRegIdx)
-            Offset = TRI->getSubRegIdxOffset(SubRegIdx);
-        }
-      }
-      else {
-        assert(Loc.LocType != Location::Register &&
-               "Missing location register");
-      }
-
-      DEBUG(dbgs() << WSMP << "  Loc " << OperIdx << ": ";
-            switch (Loc.LocType) {
-            case Location::Unprocessed:
-              dbgs() << "<Unprocessed operand>";
-              break;
-            case Location::Register:
-              dbgs() << "Register " << TRI->getName(Loc.Reg);
-              break;
-            case Location::Direct:
-              dbgs() << "Direct " << TRI->getName(Loc.Reg);
-              if (Loc.Offset)
-              dbgs() << " + " << Loc.Offset;
-              break;
-            case Location::Indirect:
-              dbgs() << "Indirect " << TRI->getName(Loc.Reg)
-              << " + " << Loc.Offset;
-              break;
-            case Location::Constant:
-              dbgs() << "Constant " << Loc.Offset;
-              break;
-            case Location::ConstantIndex:
-              dbgs() << "Constant Index " << Loc.Offset;
-              break;
-              }
-            dbgs() << "     [encoding: .byte " << Loc.LocType
-            << ", .byte " << Loc.Size
-            << ", .short " << RegNo
-            << ", .int " << Offset << "]\n";
-            );
-
       OS.EmitIntValue(Loc.LocType, 1);
       OS.EmitIntValue(Loc.Size, 1);
-      OS.EmitIntValue(RegNo, 2);
-      OS.EmitIntValue(Offset, 4);
-      OperIdx++;
+      OS.EmitIntValue(Loc.Reg, 2);
+      OS.EmitIntValue(Loc.Offset, 4);
     }
 
-    DEBUG(dbgs() << WSMP << "  has " << LiveOuts.size()
-                         << " live-out registers\n");
-
     // Num live-out registers and padding to align to 4 byte.
     OS.EmitIntValue(0, 2);
     OS.EmitIntValue(LiveOuts.size(), 2);
 
-    OperIdx = 0;
     for (const auto &LO : LiveOuts) {
-      DEBUG(dbgs() << WSMP << "  LO " << OperIdx << ": "
-                           << TRI->getName(LO.Reg)
-                           << "     [encoding: .short " << LO.RegNo
-                           << ", .byte 0, .byte " << LO.Size << "]\n");
       OS.EmitIntValue(LO.RegNo, 2);
       OS.EmitIntValue(0, 1);
       OS.EmitIntValue(LO.Size, 1);
@@ -511,24 +520,23 @@ void StackMaps::serializeToStackMapSection() {
   if (CSInfos.empty())
     return;
 
-  MCContext &OutContext = AP.OutStreamer.getContext();
-  MCStreamer &OS = AP.OutStreamer;
-  const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo();
+  MCContext &OutContext = AP.OutStreamer->getContext();
+  MCStreamer &OS = *AP.OutStreamer;
 
   // Create the section.
-  const MCSection *StackMapSection =
-    OutContext.getObjectFileInfo()->getStackMapSection();
+  MCSection *StackMapSection =
+      OutContext.getObjectFileInfo()->getStackMapSection();
   OS.SwitchSection(StackMapSection);
 
   // Emit a dummy symbol to force section inclusion.
-  OS.EmitLabel(OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps")));
+  OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps")));
 
   // Serialize data.
   DEBUG(dbgs() << "********** Stack Map Output **********\n");
   emitStackmapHeader(OS);
   emitFunctionFrameRecords(OS);
   emitConstantPoolEntries(OS);
-  emitCallsiteEntries(OS, TRI);
+  emitCallsiteEntries(OS);
   OS.AddBlankLine();
 
   // Clean up.
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index a132805..0824d6f 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -88,10 +88,9 @@ bool StackProtector::runOnFunction(Function &Fn) {
   DominatorTreeWrapperPass *DTWP =
       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
   DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  TLI = TM->getSubtargetImpl()->getTargetLowering();
+  TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
 
-  Attribute Attr = Fn.getAttributes().getAttribute(
-      AttributeSet::FunctionIndex, "stack-protector-buffer-size");
+  Attribute Attr = Fn.getFnAttribute("stack-protector-buffer-size");
   if (Attr.isStringAttribute() &&
       Attr.getValueAsString().getAsInteger(10, SSPBufferSize))
       return false; // Invalid integer string
@@ -201,15 +200,12 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
 bool StackProtector::RequiresStackProtector() {
   bool Strong = false;
   bool NeedsProtector = false;
-  if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                      Attribute::StackProtectReq)) {
+  if (F->hasFnAttribute(Attribute::StackProtectReq)) {
     NeedsProtector = true;
     Strong = true; // Use the same heuristic as strong to determine SSPLayout
-  } else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                             Attribute::StackProtectStrong))
+  } else if (F->hasFnAttribute(Attribute::StackProtectStrong))
     Strong = true;
-  else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                            Attribute::StackProtect))
+  else if (!F->hasFnAttribute(Attribute::StackProtect))
     return false;
 
   for (const BasicBlock &BB : *F) {
@@ -357,8 +353,8 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI,
   IRBuilder<> B(&F->getEntryBlock().front());
   AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot");
   LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard");
-  B.CreateCall2(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), LI,
-                AI);
+  B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+               {LI, AI});
 
   return SupportsSelectionDAGSP;
 }
@@ -492,7 +488,7 @@ BasicBlock *StackProtector::CreateFailBB() {
     Constant *StackChkFail =
         M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context),
                                nullptr);
-    B.CreateCall(StackChkFail);
+    B.CreateCall(StackChkFail, {});
   }
   B.CreateUnreachable();
   return FailBB;
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index cc72e5e..a5a175f 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -184,10 +184,18 @@ void StackSlotColoring::InitializeSlots() {
   UsedColors.resize(LastFI);
   Assignments.resize(LastFI);
 
+  typedef std::iterator_traits<LiveStacks::iterator>::value_type Pair;
+  SmallVector<Pair *, 16> Intervals;
+  Intervals.reserve(LS->getNumIntervals());
+  for (auto &I : *LS)
+    Intervals.push_back(&I);
+  std::sort(Intervals.begin(), Intervals.end(),
+            [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; });
+
   // Gather all spill slots into a list.
   DEBUG(dbgs() << "Spill slot intervals:\n");
-  for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
-    LiveInterval &li = i->second;
+  for (auto *I : Intervals) {
+    LiveInterval &li = I->second;
     DEBUG(li.dump());
     int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
     if (MFI->isDeadObjectIndex(FI))
diff --git a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
index 802cf13..95dfd75 100644
--- a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
+++ b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
@@ -12,7 +12,7 @@
 // suitable as a default implementation usable with any collector which can
 // consume the standard stackmap format generated by statepoints, uses the
 // default addrespace to distinguish between gc managed and non-gc managed
-// pointers, and has reasonable relocation semantics.  
+// pointers, and has reasonable relocation semantics.
 //
 //===----------------------------------------------------------------------===//
 
@@ -33,21 +33,22 @@ public:
     NeededSafePoints = 0;
     UsesMetadata = false;
     CustomRoots = false;
-    CustomSafePoints = false;
   }
   Optional<bool> isGCManagedPointer(const Value *V) const override {
     // Method is only valid on pointer typed values.
     PointerType *PT = cast<PointerType>(V->getType());
     // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
     // GC managed heap.  We know that a pointer into this heap needs to be
-    // updated and that no other pointer does.
+    // updated and that no other pointer does.  Note that addrspace(1) is used
+    // only as an example, it has no special meaning, and is not reserved for
+    // GC usage.
     return (1 == PT->getAddressSpace());
   }
 };
 }
 
-static GCRegistry::Add<StatepointGC>
-X("statepoint-example", "an example strategy for statepoint");
+static GCRegistry::Add<StatepointGC> X("statepoint-example",
+                                       "an example strategy for statepoint");
 
 namespace llvm {
 void linkStatepointExampleGC() {}
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index 4377236..23f41c8 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -449,6 +449,9 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
       DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
       if (VI != LocalVRMap.end()) {
         MO.setReg(VI->second);
+        // Clear any kill flags from this operand.  The new register could have
+        // uses after this one, so kills are not valid here.
+        MO.setIsKill(false);
         MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg));
       }
     }
@@ -560,8 +563,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
   if (TailDuplicateSize.getNumOccurrences() == 0 &&
-      MF.getFunction()->getAttributes().
-        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
+      MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
     MaxDuplicateCount = 1;
   else
     MaxDuplicateCount = TailDuplicateSize;
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index e3f0191..5638324 100644
--- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <cstdlib>
@@ -22,6 +23,12 @@ using namespace llvm;
 TargetFrameLowering::~TargetFrameLowering() {
 }
 
+/// The default implementation just looks at attribute "no-frame-pointer-elim".
+bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
+  auto Attr = MF.getFunction()->getFnAttribute("no-frame-pointer-elim");
+  return Attr.getValueAsString() == "true";
+}
+
 /// getFrameIndexOffset - Returns the displacement from the frame register to
 /// the stack frame of the specified index. This is the default implementation
 /// which is overridden for some targets.
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 608b806..92488de 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -142,6 +142,10 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
   unsigned SubReg2 = MI->getOperand(Idx2).getSubReg();
   bool Reg1IsKill = MI->getOperand(Idx1).isKill();
   bool Reg2IsKill = MI->getOperand(Idx2).isKill();
+  bool Reg1IsUndef = MI->getOperand(Idx1).isUndef();
+  bool Reg2IsUndef = MI->getOperand(Idx2).isUndef();
+  bool Reg1IsInternal = MI->getOperand(Idx1).isInternalRead();
+  bool Reg2IsInternal = MI->getOperand(Idx2).isInternalRead();
   // If destination is tied to either of the commuted source register, then
   // it must be updated.
   if (HasDef && Reg0 == Reg1 &&
@@ -172,6 +176,10 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
   MI->getOperand(Idx1).setSubReg(SubReg2);
   MI->getOperand(Idx2).setIsKill(Reg1IsKill);
   MI->getOperand(Idx1).setIsKill(Reg2IsKill);
+  MI->getOperand(Idx2).setIsUndef(Reg1IsUndef);
+  MI->getOperand(Idx1).setIsUndef(Reg2IsUndef);
+  MI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal);
+  MI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal);
   return MI;
 }
 
@@ -285,21 +293,20 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
 bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
                                         unsigned SubIdx, unsigned &Size,
                                         unsigned &Offset,
-                                        const TargetMachine *TM) const {
+                                        const MachineFunction &MF) const {
   if (!SubIdx) {
     Size = RC->getSize();
     Offset = 0;
     return true;
   }
-  unsigned BitSize =
-      TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxSize(SubIdx);
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  unsigned BitSize = TRI->getSubRegIdxSize(SubIdx);
   // Convert bit size to byte size to be consistent with
   // MCRegisterClass::getSize().
   if (BitSize % 8)
     return false;
 
-  int BitOffset =
-      TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxOffset(SubIdx);
+  int BitOffset = TRI->getSubRegIdxOffset(SubIdx);
   if (BitOffset < 0 || BitOffset % 8)
     return false;
 
@@ -308,7 +315,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
 
   assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
 
-  if (!TM->getSubtargetImpl()->getDataLayout()->isLittleEndian()) {
+  if (!MF.getTarget().getDataLayout()->isLittleEndian()) {
     Offset = RC->getSize() - (Offset + Size);
   }
   return true;
@@ -377,16 +384,13 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
   llvm_unreachable("Not a MachO target");
 }
 
-bool TargetInstrInfo::
-canFoldMemoryOperand(const MachineInstr *MI,
-                     const SmallVectorImpl<unsigned> &Ops) const {
+bool TargetInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+                                           ArrayRef<unsigned> Ops) const {
   return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
 }
 
-static MachineInstr* foldPatchpoint(MachineFunction &MF,
-                                    MachineInstr *MI,
-                                    const SmallVectorImpl<unsigned> &Ops,
-                                    int FrameIndex,
+static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI,
+                                    ArrayRef<unsigned> Ops, int FrameIndex,
                                     const TargetInstrInfo &TII) {
   unsigned StartIdx = 0;
   switch (MI->getOpcode()) {
@@ -405,9 +409,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF,
 
   // Return false if any operands requested for folding are not foldable (not
   // part of the stackmap's live values).
-  for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end();
-       I != E; ++I) {
-    if (*I < StartIdx)
+  for (unsigned Op : Ops) {
+    if (Op < StartIdx)
       return nullptr;
   }
 
@@ -427,8 +430,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF,
       // Compute the spill slot size and offset.
       const TargetRegisterClass *RC =
         MF.getRegInfo().getRegClass(MO.getReg());
-      bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize,
-                                         SpillOffset, &MF.getTarget());
+      bool Valid =
+          TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, SpillOffset, MF);
       if (!Valid)
         report_fatal_error("cannot spill patchpoint subregister operand");
       MIB.addImm(StackMaps::IndirectMemRefOp);
@@ -448,10 +451,9 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF,
 /// operand folded, otherwise NULL is returned. The client is responsible for
 /// removing the old instruction and adding the new one in the instruction
 /// stream.
-MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
-                                   const SmallVectorImpl<unsigned> &Ops,
-                                   int FI) const {
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                                 ArrayRef<unsigned> Ops,
+                                                 int FI) const {
   unsigned Flags = 0;
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     if (MI->getOperand(Ops[i]).isDef())
@@ -517,10 +519,9 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
 /// foldMemoryOperand - Same as the previous version except it allows folding
 /// of any load and store from / to any address, not just from a specific
 /// stack slot.
-MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
-                                   const SmallVectorImpl<unsigned> &Ops,
-                                   MachineInstr* LoadMI) const {
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                                 ArrayRef<unsigned> Ops,
+                                                 MachineInstr *LoadMI) const {
   assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
 #ifndef NDEBUG
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
@@ -651,8 +652,8 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const {
   bool StackGrowsDown =
     TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
 
-  int FrameSetupOpcode = getCallFrameSetupOpcode();
-  int FrameDestroyOpcode = getCallFrameDestroyOpcode();
+  unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
+  unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
 
   if (MI->getOpcode() != FrameSetupOpcode &&
       MI->getOpcode() != FrameDestroyOpcode)
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 9b2fdff..b7f1db6 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -420,6 +420,14 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
     // These are generally not available.
     Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr;
   }
+
+  // For f16/f32 conversions, Darwin uses the standard naming scheme, instead
+  // of the gnueabi-style __gnu_*_ieee.
+  // FIXME: What about other targets?
+  if (TT.isOSDarwin()) {
+    Names[RTLIB::FPEXT_F16_F32] = "__extendhfsf2";
+    Names[RTLIB::FPROUND_F32_F16] = "__truncsfhf2";
+  }
 }
 
 /// InitLibcallCallingConvs - Set default libcall CallingConvs.
@@ -664,6 +672,44 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
   return UNKNOWN_LIBCALL;
 }
 
+RTLIB::Libcall RTLIB::getATOMIC(unsigned Opc, MVT VT) {
+#define OP_TO_LIBCALL(Name, Enum)                                              \
+  case Name:                                                                   \
+    switch (VT.SimpleTy) {                                                     \
+    default:                                                                   \
+      return UNKNOWN_LIBCALL;                                                  \
+    case MVT::i8:                                                              \
+      return Enum##_1;                                                         \
+    case MVT::i16:                                                             \
+      return Enum##_2;                                                         \
+    case MVT::i32:                                                             \
+      return Enum##_4;                                                         \
+    case MVT::i64:                                                             \
+      return Enum##_8;                                                         \
+    case MVT::i128:                                                            \
+      return Enum##_16;                                                        \
+    }
+
+  switch (Opc) {
+    OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET)
+    OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN)
+  }
+
+#undef OP_TO_LIBCALL
+
+  return UNKNOWN_LIBCALL;
+}
+
 /// InitCmpLibcallCCs - Set default comparison libcall CC.
 ///
 static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
@@ -695,12 +741,11 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
 }
 
 /// NOTE: The TargetMachine owns TLOF.
-TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
-    : TM(tm), DL(TM.getSubtargetImpl()->getDataLayout()) {
+TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
   initActions();
 
   // Perform these initializations only once.
-  IsLittleEndian = DL->isLittleEndian();
+  IsLittleEndian = getDataLayout()->isLittleEndian();
   MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
   MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
     = MaxStoresPerMemmoveOptSize = 4;
@@ -765,6 +810,19 @@ void TargetLoweringBase::initActions() {
     setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
     setOperationAction(ISD::FMINNUM, VT, Expand);
     setOperationAction(ISD::FMAXNUM, VT, Expand);
+    setOperationAction(ISD::FMAD, VT, Expand);
+    setOperationAction(ISD::SMIN, VT, Expand);
+    setOperationAction(ISD::SMAX, VT, Expand);
+    setOperationAction(ISD::UMIN, VT, Expand);
+    setOperationAction(ISD::UMAX, VT, Expand);
+
+    // Overflow operations default to expand
+    setOperationAction(ISD::SADDO, VT, Expand);
+    setOperationAction(ISD::SSUBO, VT, Expand);
+    setOperationAction(ISD::UADDO, VT, Expand);
+    setOperationAction(ISD::USUBO, VT, Expand);
+    setOperationAction(ISD::SMULO, VT, Expand);
+    setOperationAction(ISD::UMULO, VT, Expand);
 
     // These library functions default to expand.
     setOperationAction(ISD::FROUND, VT, Expand);
@@ -791,58 +849,21 @@ void TargetLoweringBase::initActions() {
   setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
 
   // These library functions default to expand.
-  setOperationAction(ISD::FLOG ,  MVT::f16, Expand);
-  setOperationAction(ISD::FLOG2,  MVT::f16, Expand);
-  setOperationAction(ISD::FLOG10, MVT::f16, Expand);
-  setOperationAction(ISD::FEXP ,  MVT::f16, Expand);
-  setOperationAction(ISD::FEXP2,  MVT::f16, Expand);
-  setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
-  setOperationAction(ISD::FMINNUM, MVT::f16, Expand);
-  setOperationAction(ISD::FMAXNUM, MVT::f16, Expand);
-  setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
-  setOperationAction(ISD::FCEIL,  MVT::f16, Expand);
-  setOperationAction(ISD::FRINT,  MVT::f16, Expand);
-  setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
-  setOperationAction(ISD::FROUND, MVT::f16, Expand);
-  setOperationAction(ISD::FLOG ,  MVT::f32, Expand);
-  setOperationAction(ISD::FLOG2,  MVT::f32, Expand);
-  setOperationAction(ISD::FLOG10, MVT::f32, Expand);
-  setOperationAction(ISD::FEXP ,  MVT::f32, Expand);
-  setOperationAction(ISD::FEXP2,  MVT::f32, Expand);
-  setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
-  setOperationAction(ISD::FMINNUM, MVT::f32, Expand);
-  setOperationAction(ISD::FMAXNUM, MVT::f32, Expand);
-  setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
-  setOperationAction(ISD::FCEIL,  MVT::f32, Expand);
-  setOperationAction(ISD::FRINT,  MVT::f32, Expand);
-  setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
-  setOperationAction(ISD::FROUND, MVT::f32, Expand);
-  setOperationAction(ISD::FLOG ,  MVT::f64, Expand);
-  setOperationAction(ISD::FLOG2,  MVT::f64, Expand);
-  setOperationAction(ISD::FLOG10, MVT::f64, Expand);
-  setOperationAction(ISD::FEXP ,  MVT::f64, Expand);
-  setOperationAction(ISD::FEXP2,  MVT::f64, Expand);
-  setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
-  setOperationAction(ISD::FMINNUM, MVT::f64, Expand);
-  setOperationAction(ISD::FMAXNUM, MVT::f64, Expand);
-  setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
-  setOperationAction(ISD::FCEIL,  MVT::f64, Expand);
-  setOperationAction(ISD::FRINT,  MVT::f64, Expand);
-  setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
-  setOperationAction(ISD::FROUND, MVT::f64, Expand);
-  setOperationAction(ISD::FLOG ,  MVT::f128, Expand);
-  setOperationAction(ISD::FLOG2,  MVT::f128, Expand);
-  setOperationAction(ISD::FLOG10, MVT::f128, Expand);
-  setOperationAction(ISD::FEXP ,  MVT::f128, Expand);
-  setOperationAction(ISD::FEXP2,  MVT::f128, Expand);
-  setOperationAction(ISD::FFLOOR, MVT::f128, Expand);
-  setOperationAction(ISD::FMINNUM, MVT::f128, Expand);
-  setOperationAction(ISD::FMAXNUM, MVT::f128, Expand);
-  setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand);
-  setOperationAction(ISD::FCEIL,  MVT::f128, Expand);
-  setOperationAction(ISD::FRINT,  MVT::f128, Expand);
-  setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
-  setOperationAction(ISD::FROUND, MVT::f128, Expand);
+  for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
+    setOperationAction(ISD::FLOG ,      VT, Expand);
+    setOperationAction(ISD::FLOG2,      VT, Expand);
+    setOperationAction(ISD::FLOG10,     VT, Expand);
+    setOperationAction(ISD::FEXP ,      VT, Expand);
+    setOperationAction(ISD::FEXP2,      VT, Expand);
+    setOperationAction(ISD::FFLOOR,     VT, Expand);
+    setOperationAction(ISD::FMINNUM,    VT, Expand);
+    setOperationAction(ISD::FMAXNUM,    VT, Expand);
+    setOperationAction(ISD::FNEARBYINT, VT, Expand);
+    setOperationAction(ISD::FCEIL,      VT, Expand);
+    setOperationAction(ISD::FRINT,      VT, Expand);
+    setOperationAction(ISD::FTRUNC,     VT, Expand);
+    setOperationAction(ISD::FROUND,     VT, Expand);
+  }
 
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -858,7 +879,7 @@ MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
 }
 
 unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const {
-  return DL->getPointerSizeInBits(AS);
+  return getDataLayout()->getPointerSizeInBits(AS);
 }
 
 unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
@@ -867,7 +888,7 @@ unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
 }
 
 MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
-  return MVT::getIntegerVT(8*DL->getPointerSize(0));
+  return MVT::getIntegerVT(8 * getDataLayout()->getPointerSize(0));
 }
 
 EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
@@ -894,6 +915,138 @@ bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
   }
 }
 
+TargetLoweringBase::LegalizeKind
+TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
+  // If this is a simple type, use the ComputeRegisterProp mechanism.
+  if (VT.isSimple()) {
+    MVT SVT = VT.getSimpleVT();
+    assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType));
+    MVT NVT = TransformToType[SVT.SimpleTy];
+    LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
+
+    assert((LA == TypeLegal || LA == TypeSoftenFloat ||
+            ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) &&
+           "Promote may not follow Expand or Promote");
+
+    if (LA == TypeSplitVector)
+      return LegalizeKind(LA,
+                          EVT::getVectorVT(Context, SVT.getVectorElementType(),
+                                           SVT.getVectorNumElements() / 2));
+    if (LA == TypeScalarizeVector)
+      return LegalizeKind(LA, SVT.getVectorElementType());
+    return LegalizeKind(LA, NVT);
+  }
+
+  // Handle Extended Scalar Types.
+  if (!VT.isVector()) {
+    assert(VT.isInteger() && "Float types must be simple");
+    unsigned BitSize = VT.getSizeInBits();
+    // First promote to a power-of-two size, then expand if necessary.
+    if (BitSize < 8 || !isPowerOf2_32(BitSize)) {
+      EVT NVT = VT.getRoundIntegerType(Context);
+      assert(NVT != VT && "Unable to round integer VT");
+      LegalizeKind NextStep = getTypeConversion(Context, NVT);
+      // Avoid multi-step promotion.
+      if (NextStep.first == TypePromoteInteger)
+        return NextStep;
+      // Return rounded integer type.
+      return LegalizeKind(TypePromoteInteger, NVT);
+    }
+
+    return LegalizeKind(TypeExpandInteger,
+                        EVT::getIntegerVT(Context, VT.getSizeInBits() / 2));
+  }
+
+  // Handle vector types.
+  unsigned NumElts = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+
+  // Vectors with only one element are always scalarized.
+  if (NumElts == 1)
+    return LegalizeKind(TypeScalarizeVector, EltVT);
+
+  // Try to widen vector elements until the element type is a power of two and
+  // promote it to a legal type later on, for example:
+  // <3 x i8> -> <4 x i8> -> <4 x i32>
+  if (EltVT.isInteger()) {
+    // Vectors with a number of elements that is not a power of two are always
+    // widened, for example <3 x i8> -> <4 x i8>.
+    if (!VT.isPow2VectorType()) {
+      NumElts = (unsigned)NextPowerOf2(NumElts);
+      EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts);
+      return LegalizeKind(TypeWidenVector, NVT);
+    }
+
+    // Examine the element type.
+    LegalizeKind LK = getTypeConversion(Context, EltVT);
+
+    // If type is to be expanded, split the vector.
+    //  <4 x i140> -> <2 x i140>
+    if (LK.first == TypeExpandInteger)
+      return LegalizeKind(TypeSplitVector,
+                          EVT::getVectorVT(Context, EltVT, NumElts / 2));
+
+    // Promote the integer element types until a legal vector type is found
+    // or until the element integer type is too big. If a legal type was not
+    // found, fallback to the usual mechanism of widening/splitting the
+    // vector.
+    EVT OldEltVT = EltVT;
+    while (1) {
+      // Increase the bitwidth of the element to the next pow-of-two
+      // (which is greater than 8 bits).
+      EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits())
+                  .getRoundIntegerType(Context);
+
+      // Stop trying when getting a non-simple element type.
+      // Note that vector elements may be greater than legal vector element
+      // types. Example: X86 XMM registers hold 64bit element on 32bit
+      // systems.
+      if (!EltVT.isSimple())
+        break;
+
+      // Build a new vector type and check if it is legal.
+      MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+      // Found a legal promoted vector type.
+      if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal)
+        return LegalizeKind(TypePromoteInteger,
+                            EVT::getVectorVT(Context, EltVT, NumElts));
+    }
+
+    // Reset the type to the unexpanded type if we did not find a legal vector
+    // type with a promoted vector element type.
+    EltVT = OldEltVT;
+  }
+
+  // Try to widen the vector until a legal type is found.
+  // If there is no wider legal type, split the vector.
+  while (1) {
+    // Round up to the next power of 2.
+    NumElts = (unsigned)NextPowerOf2(NumElts);
+
+    // If there is no simple vector type with this many elements then there
+    // cannot be a larger legal vector type.  Note that this assumes that
+    // there are no skipped intermediate vector types in the simple types.
+    if (!EltVT.isSimple())
+      break;
+    MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+    if (LargerVector == MVT())
+      break;
+
+    // If this type is legal then widen the vector.
+    if (ValueTypeActions.getTypeAction(LargerVector) == TypeLegal)
+      return LegalizeKind(TypeWidenVector, LargerVector);
+  }
+
+  // Widen odd vectors to next power of two.
+  if (!VT.isPow2VectorType()) {
+    EVT NVT = VT.getPow2VectorType(Context);
+    return LegalizeKind(TypeWidenVector, NVT);
+  }
+
+  // Vectors with illegal element types are expanded.
+  EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2);
+  return LegalizeKind(TypeSplitVector, NVT);
+}
 
 static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
                                           unsigned &NumIntermediates,
@@ -997,8 +1150,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
     }
     MachineMemOperand *MMO = MF.getMachineMemOperand(
         MachinePointerInfo::getFixedStack(FI), Flags,
-        TM.getSubtargetImpl()->getDataLayout()->getPointerSize(),
-        MFI.getObjectAlignment(FI));
+        TM.getDataLayout()->getPointerSize(), MFI.getObjectAlignment(FI));
     MIB->addMemOperand(MF, MMO);
 
     // Replace the instruction and update the operand index.
@@ -1012,10 +1164,13 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
 
 /// findRepresentativeClass - Return the largest legal super-reg register class
 /// of the register class for the specified type and its associated "cost".
-std::pair<const TargetRegisterClass*, uint8_t>
-TargetLoweringBase::findRepresentativeClass(MVT VT) const {
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+// This function is in TargetLowering because it uses RegClassForVT which would
+// need to be moved to TargetRegisterInfo and would necessitate moving
+// isTypeLegal over as well - a massive change that would just require
+// TargetLowering having a TargetRegisterInfo class member that it would use.
+std::pair<const TargetRegisterClass *, uint8_t>
+TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
+                                            MVT VT) const {
   const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
   if (!RC)
     return std::make_pair(RC, 0);
@@ -1041,7 +1196,8 @@ TargetLoweringBase::findRepresentativeClass(MVT VT) const {
 
 /// computeRegisterProperties - Once all of the register classes are added,
 /// this allows us to compute derived properties we expose.
-void TargetLoweringBase::computeRegisterProperties() {
+void TargetLoweringBase::computeRegisterProperties(
+    const TargetRegisterInfo *TRI) {
   static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
                 "Too many value types for ValueTypeActions to hold!");
 
@@ -1110,27 +1266,29 @@ void TargetLoweringBase::computeRegisterProperties() {
     ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
   }
 
-  // Decide how to handle f32. If the target does not have native support for
-  // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+  // Decide how to handle f32. If the target does not have native f32 support,
+  // expand it to i32 and we will be generating soft float library calls.
   if (!isTypeLegal(MVT::f32)) {
-    if (isTypeLegal(MVT::f64)) {
-      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
-      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
-      TransformToType[MVT::f32] = MVT::f64;
-      ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
-    } else {
-      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
-      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
-      TransformToType[MVT::f32] = MVT::i32;
-      ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
-    }
+    NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+    RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+    TransformToType[MVT::f32] = MVT::i32;
+    ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
   }
 
   if (!isTypeLegal(MVT::f16)) {
-    NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
-    RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
-    TransformToType[MVT::f16] = MVT::i16;
-    ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat);
+    // If the target has native f32 support, promote f16 operations to f32.  If
+    // f32 is not supported, generate soft float library calls.
+    if (isTypeLegal(MVT::f32)) {
+      NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
+      RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
+      TransformToType[MVT::f16] = MVT::f32;
+      ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
+    } else {
+      NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
+      RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
+      TransformToType[MVT::f16] = MVT::i16;
+      ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat);
+    }
   }
 
   // Loop over all of the vector value types to see which need transformations.
@@ -1223,7 +1381,7 @@ void TargetLoweringBase::computeRegisterProperties() {
   for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
     const TargetRegisterClass* RRC;
     uint8_t Cost;
-    std::tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i);
+    std::tie(RRC, Cost) = findRepresentativeClass(TRI, (MVT::SimpleValueType)i);
     RepRegClassForVT[i] = RRC;
     RepRegClassCostForVT[i] = Cost;
   }
@@ -1366,7 +1524,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
 /// function arguments in the caller parameter area.  This is the actual
 /// alignment, not its logarithm.
 unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const {
-  return DL->getABITypeAlignment(Ty);
+  return getDataLayout()->getABITypeAlignment(Ty);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 9f1e06b..a32bdf8 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -31,6 +31,7 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -50,7 +51,7 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
     MachineModuleInfo *MMI) const {
   unsigned Encoding = getPersonalityEncoding();
   if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect)
-    return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
+    return getContext().getOrCreateSymbol(StringRef("DW.ref.") +
                                           TM.getSymbol(GV, Mang)->getName());
   if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr)
     return TM.getSymbol(GV, Mang);
@@ -62,21 +63,17 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
                                                        const MCSymbol *Sym) const {
   SmallString<64> NameData("DW.ref.");
   NameData += Sym->getName();
-  MCSymbol *Label = getContext().GetOrCreateSymbol(NameData);
+  MCSymbol *Label = getContext().getOrCreateSymbol(NameData);
   Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
   Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
   StringRef Prefix = ".data.";
   NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end());
   unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
-  const MCSection *Sec = getContext().getELFSection(NameData,
-                                                    ELF::SHT_PROGBITS,
-                                                    Flags,
-                                                    SectionKind::getDataRel(),
-                                                    0, Label->getName());
-  unsigned Size = TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
+  MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS,
+                                              Flags, 0, Label->getName());
+  unsigned Size = TM.getDataLayout()->getPointerSize();
   Streamer.SwitchSection(Sec);
-  Streamer.EmitValueToAlignment(
-      TM.getSubtargetImpl()->getDataLayout()->getPointerABIAlignment());
+  Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
   Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
   const MCExpr *E = MCConstantExpr::Create(Size, getContext());
   Streamer.EmitELFSize(Label, E);
@@ -166,9 +163,7 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) {
   return ELF::SHT_PROGBITS;
 }
 
-
-static unsigned
-getELFSectionFlags(SectionKind K) {
+static unsigned getELFSectionFlags(SectionKind K) {
   unsigned Flags = 0;
 
   if (!K.isMetadata())
@@ -183,9 +178,7 @@ getELFSectionFlags(SectionKind K) {
   if (K.isThreadLocal())
     Flags |= ELF::SHF_TLS;
 
-  // K.isMergeableConst() is left out to honour PR4650
-  if (K.isMergeableCString() || K.isMergeableConst4() ||
-      K.isMergeableConst8() || K.isMergeableConst16())
+  if (K.isMergeableCString() || K.isMergeableConst())
     Flags |= ELF::SHF_MERGE;
 
   if (K.isMergeableCString())
@@ -206,7 +199,7 @@ static const Comdat *getELFComdat(const GlobalValue *GV) {
   return C;
 }
 
-const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
+MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
     const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
     const TargetMachine &TM) const {
   StringRef SectionName = GV->getSection();
@@ -222,126 +215,141 @@ const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
   }
   return getContext().getELFSection(SectionName,
                                     getELFSectionType(SectionName, Kind), Flags,
-                                    Kind, /*EntrySize=*/0, Group);
+                                    /*EntrySize=*/0, Group);
 }
 
-/// getSectionPrefixForGlobal - Return the section prefix name used by options
-/// FunctionsSections and DataSections.
+/// Return the section prefix name used by options FunctionsSections and
+/// DataSections.
 static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
-  if (Kind.isText())                 return ".text.";
-  if (Kind.isReadOnly())             return ".rodata.";
-  if (Kind.isBSS())                  return ".bss.";
-
-  if (Kind.isThreadData())           return ".tdata.";
-  if (Kind.isThreadBSS())            return ".tbss.";
-
-  if (Kind.isDataNoRel())            return ".data.";
-  if (Kind.isDataRelLocal())         return ".data.rel.local.";
-  if (Kind.isDataRel())              return ".data.rel.";
-  if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local.";
-
+  if (Kind.isText())
+    return ".text";
+  if (Kind.isReadOnly())
+    return ".rodata";
+  if (Kind.isBSS())
+    return ".bss";
+  if (Kind.isThreadData())
+    return ".tdata";
+  if (Kind.isThreadBSS())
+    return ".tbss";
+  if (Kind.isDataNoRel())
+    return ".data";
+  if (Kind.isDataRelLocal())
+    return ".data.rel.local";
+  if (Kind.isDataRel())
+    return ".data.rel";
+  if (Kind.isReadOnlyWithRelLocal())
+    return ".data.rel.ro.local";
   assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return ".data.rel.ro.";
+  return ".data.rel.ro";
 }
 
-const MCSection *TargetLoweringObjectFileELF::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler &Mang, const TargetMachine &TM) const {
-  // If we have -ffunction-section or -fdata-section then we should emit the
-  // global value to a uniqued section specifically for it.
-  bool EmitUniquedSection;
-  if (Kind.isText())
-    EmitUniquedSection = TM.getFunctionSections();
-  else
-    EmitUniquedSection = TM.getDataSections();
-
-  // If this global is linkonce/weak and the target handles this by emitting it
-  // into a 'uniqued' section name, create and return the section now.
-  if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) &&
-      !Kind.isCommon()) {
-    StringRef Prefix = getSectionPrefixForGlobal(Kind);
-
-    SmallString<128> Name(Prefix);
-    TM.getNameWithPrefix(Name, GV, Mang, true);
-
-    StringRef Group = "";
-    unsigned Flags = getELFSectionFlags(Kind);
-    if (GV->isWeakForLinker() || GV->hasComdat()) {
-      if (const Comdat *C = getELFComdat(GV))
-        Group = C->getName();
-      else
-        Group = Name.substr(Prefix.size());
-      Flags |= ELF::SHF_GROUP;
+static MCSectionELF *
+selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
+                          SectionKind Kind, Mangler &Mang,
+                          const TargetMachine &TM, bool EmitUniqueSection,
+                          unsigned Flags, unsigned *NextUniqueID) {
+  unsigned EntrySize = 0;
+  if (Kind.isMergeableCString()) {
+    if (Kind.isMergeable2ByteCString()) {
+      EntrySize = 2;
+    } else if (Kind.isMergeable4ByteCString()) {
+      EntrySize = 4;
+    } else {
+      EntrySize = 1;
+      assert(Kind.isMergeable1ByteCString() && "unknown string width");
+    }
+  } else if (Kind.isMergeableConst()) {
+    if (Kind.isMergeableConst4()) {
+      EntrySize = 4;
+    } else if (Kind.isMergeableConst8()) {
+      EntrySize = 8;
+    } else {
+      assert(Kind.isMergeableConst16() && "unknown data width");
+      EntrySize = 16;
     }
-
-    return getContext().getELFSection(Name.str(),
-                                      getELFSectionType(Name.str(), Kind),
-                                      Flags, Kind, 0, Group);
   }
 
-  if (Kind.isText()) return TextSection;
-
-  if (Kind.isMergeable1ByteCString() ||
-      Kind.isMergeable2ByteCString() ||
-      Kind.isMergeable4ByteCString()) {
+  StringRef Group = "";
+  if (const Comdat *C = getELFComdat(GV)) {
+    Flags |= ELF::SHF_GROUP;
+    Group = C->getName();
+  }
 
+  bool UniqueSectionNames = TM.getUniqueSectionNames();
+  SmallString<128> Name;
+  if (Kind.isMergeableCString()) {
     // We also need alignment here.
     // FIXME: this is getting the alignment of the character, not the
     // alignment of the global!
     unsigned Align =
-        TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
-            cast<GlobalVariable>(GV));
-
-    const char *SizeSpec = ".rodata.str1.";
-    if (Kind.isMergeable2ByteCString())
-      SizeSpec = ".rodata.str2.";
-    else if (Kind.isMergeable4ByteCString())
-      SizeSpec = ".rodata.str4.";
-    else
-      assert(Kind.isMergeable1ByteCString() && "unknown string width");
-
+        TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV));
 
-    std::string Name = SizeSpec + utostr(Align);
-    return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
-                                      ELF::SHF_ALLOC |
-                                      ELF::SHF_MERGE |
-                                      ELF::SHF_STRINGS,
-                                      Kind);
+    std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
+    Name = SizeSpec + utostr(Align);
+  } else if (Kind.isMergeableConst()) {
+    Name = ".rodata.cst";
+    Name += utostr(EntrySize);
+  } else {
+    Name = getSectionPrefixForGlobal(Kind);
   }
 
-  if (Kind.isMergeableConst()) {
-    if (Kind.isMergeableConst4() && MergeableConst4Section)
-      return MergeableConst4Section;
-    if (Kind.isMergeableConst8() && MergeableConst8Section)
-      return MergeableConst8Section;
-    if (Kind.isMergeableConst16() && MergeableConst16Section)
-      return MergeableConst16Section;
-    return ReadOnlySection;  // .const
+  if (EmitUniqueSection && UniqueSectionNames) {
+    Name.push_back('.');
+    TM.getNameWithPrefix(Name, GV, Mang, true);
+  }
+  unsigned UniqueID = ~0;
+  if (EmitUniqueSection && !UniqueSectionNames) {
+    UniqueID = *NextUniqueID;
+    (*NextUniqueID)++;
   }
+  return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
+                           EntrySize, Group, UniqueID);
+}
 
-  if (Kind.isReadOnly())             return ReadOnlySection;
+MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
+    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+    const TargetMachine &TM) const {
+  unsigned Flags = getELFSectionFlags(Kind);
 
-  if (Kind.isThreadData())           return TLSDataSection;
-  if (Kind.isThreadBSS())            return TLSBSSSection;
+  // If we have -ffunction-section or -fdata-section then we should emit the
+  // global value to a uniqued section specifically for it.
+  bool EmitUniqueSection = false;
+  if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) {
+    if (Kind.isText())
+      EmitUniqueSection = TM.getFunctionSections();
+    else
+      EmitUniqueSection = TM.getDataSections();
+  }
+  EmitUniqueSection |= GV->hasComdat();
 
-  // Note: we claim that common symbols are put in BSSSection, but they are
-  // really emitted with the magic .comm directive, which creates a symbol table
-  // entry but not a section.
-  if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
+  return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM,
+                                   EmitUniqueSection, Flags, &NextUniqueID);
+}
 
-  if (Kind.isDataNoRel())            return DataSection;
-  if (Kind.isDataRelLocal())         return DataRelLocalSection;
-  if (Kind.isDataRel())              return DataRelSection;
-  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
+    const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+  // If the function can be removed, produce a unique section so that
+  // the table doesn't prevent the removal.
+  const Comdat *C = F.getComdat();
+  bool EmitUniqueSection = TM.getFunctionSections() || C;
+  if (!EmitUniqueSection)
+    return ReadOnlySection;
 
-  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return DataRelROSection;
+  return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
+                                   Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC,
+                                   &NextUniqueID);
+}
+
+bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
+    bool UsesLabelDifference, const Function &F) const {
+  // We can always create relative relocations, so use another section
+  // that can be marked non-executable.
+  return false;
 }
 
-/// getSectionForConstant - Given a mergeable constant with the
-/// specified size and relocation information, return a section that it
-/// should be placed in.
-const MCSection *
+/// Given a mergeable constant with the specified size and relocation
+/// information, return a section that it should be placed in.
+MCSection *
 TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind,
                                                    const Constant *C) const {
   if (Kind.isMergeableConst4() && MergeableConst4Section)
@@ -358,15 +366,12 @@ TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind,
   return DataRelROSection;
 }
 
-static const MCSectionELF *getStaticStructorSection(MCContext &Ctx,
-                                                    bool UseInitArray,
-                                                    bool IsCtor,
-                                                    unsigned Priority,
-                                                    const MCSymbol *KeySym) {
+static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray,
+                                              bool IsCtor, unsigned Priority,
+                                              const MCSymbol *KeySym) {
   std::string Name;
   unsigned Type;
   unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE;
-  SectionKind Kind = SectionKind::getDataRel();
   StringRef COMDAT = KeySym ? KeySym->getName() : "";
 
   if (KeySym)
@@ -398,16 +403,16 @@ static const MCSectionELF *getStaticStructorSection(MCContext &Ctx,
     Type = ELF::SHT_PROGBITS;
   }
 
-  return Ctx.getELFSection(Name, Type, Flags, Kind, 0, COMDAT);
+  return Ctx.getELFSection(Name, Type, Flags, 0, COMDAT);
 }
 
-const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
+MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
     unsigned Priority, const MCSymbol *KeySym) const {
   return getStaticStructorSection(getContext(), UseInitArray, true, Priority,
                                   KeySym);
 }
 
-const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
+MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
     unsigned Priority, const MCSymbol *KeySym) const {
   return getStaticStructorSection(getContext(), UseInitArray, false, Priority,
                                   KeySym);
@@ -419,22 +424,21 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
   if (!UseInitArray)
     return;
 
-  StaticCtorSection =
-    getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
-                               ELF::SHF_WRITE |
-                               ELF::SHF_ALLOC,
-                               SectionKind::getDataRel());
-  StaticDtorSection =
-    getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
-                               ELF::SHF_WRITE |
-                               ELF::SHF_ALLOC,
-                               SectionKind::getDataRel());
+  StaticCtorSection = getContext().getELFSection(
+      ".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+  StaticDtorSection = getContext().getELFSection(
+      ".fini_array", ELF::SHT_FINI_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
 }
 
 //===----------------------------------------------------------------------===//
 //                                 MachO
 //===----------------------------------------------------------------------===//
 
+TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
+  : TargetLoweringObjectFile() {
+  SupportIndirectSymViaGOTPCRel = true;
+}
+
 /// getDepLibFromLinkerOpt - Extract the dependent library name from a linker
 /// option string. Returns StringRef() if the option does not specify a library.
 StringRef TargetLoweringObjectFileMachO::
@@ -511,12 +515,11 @@ emitModuleFlags(MCStreamer &Streamer,
                        ErrorCode + ".");
 
   // Get the section.
-  const MCSectionMachO *S =
-    getContext().getMachOSection(Segment, Section, TAA, StubSize,
-                                 SectionKind::getDataNoRel());
+  MCSectionMachO *S = getContext().getMachOSection(
+      Segment, Section, TAA, StubSize, SectionKind::getDataNoRel());
   Streamer.SwitchSection(S);
   Streamer.EmitLabel(getContext().
-                     GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
+                     getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
   Streamer.EmitIntValue(VersionVal, 4);
   Streamer.EmitIntValue(ImageInfoFlags, 4);
   Streamer.AddBlankLine();
@@ -531,7 +534,7 @@ static void checkMachOComdat(const GlobalValue *GV) {
                      "' cannot be lowered.");
 }
 
-const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
+MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
     const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
     const TargetMachine &TM) const {
   // Parse the section specifier and create it if valid.
@@ -552,8 +555,8 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
   }
 
   // Get the section.
-  const MCSectionMachO *S =
-    getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
+  MCSectionMachO *S =
+      getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
 
   // If TAA wasn't set by ParseSectionSpecifier() above,
   // use the value returned by getMachOSection() as a default.
@@ -573,9 +576,9 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
   return S;
 }
 
-const MCSection *TargetLoweringObjectFileMachO::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler &Mang, const TargetMachine &TM) const {
+MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
+    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+    const TargetMachine &TM) const {
   checkMachOComdat(GV);
 
   // Handle thread local data.
@@ -595,16 +598,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
 
   // FIXME: Alignment check should be handled by section classifier.
   if (Kind.isMergeable1ByteCString() &&
-      TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
-          cast<GlobalVariable>(GV)) < 32)
+      TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
     return CStringSection;
 
   // Do not put 16-bit arrays in the UString section if they have an
   // externally visible label, this runs into issues with certain linker
   // versions.
   if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
-      TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
-          cast<GlobalVariable>(GV)) < 32)
+      TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
     return UStringSection;
 
   // With MachO only variables whose corresponding symbol starts with 'l' or
@@ -642,7 +643,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   return DataSection;
 }
 
-const MCSection *
+MCSection *
 TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind,
                                                      const Constant *C) const {
   // If this constant requires a relocation, we have to put it in the data
@@ -711,6 +712,66 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
   return SSym;
 }
 
+const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
+    const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
+    MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+  // Although MachO 32-bit targets do not explictly have a GOTPCREL relocation
+  // as 64-bit do, we replace the GOT equivalent by accessing the final symbol
+  // through a non_lazy_ptr stub instead. One advantage is that it allows the
+  // computation of deltas to final external symbols. Example:
+  //
+  //    _extgotequiv:
+  //       .long   _extfoo
+  //
+  //    _delta:
+  //       .long   _extgotequiv-_delta
+  //
+  // is transformed to:
+  //
+  //    _delta:
+  //       .long   L_extfoo$non_lazy_ptr-(_delta+0)
+  //
+  //       .section        __IMPORT,__pointers,non_lazy_symbol_pointers
+  //    L_extfoo$non_lazy_ptr:
+  //       .indirect_symbol        _extfoo
+  //       .long   0
+  //
+  MachineModuleInfoMachO &MachOMMI =
+    MMI->getObjFileInfo<MachineModuleInfoMachO>();
+  MCContext &Ctx = getContext();
+
+  // The offset must consider the original displacement from the base symbol
+  // since 32-bit targets don't have a GOTPCREL to fold the PC displacement.
+  Offset = -MV.getConstant();
+  const MCSymbol *BaseSym = &MV.getSymB()->getSymbol();
+
+  // Access the final symbol via sym$non_lazy_ptr and generate the appropriated
+  // non_lazy_ptr stubs.
+  SmallString<128> Name;
+  StringRef Suffix = "$non_lazy_ptr";
+  Name += DL->getPrivateGlobalPrefix();
+  Name += Sym->getName();
+  Name += Suffix;
+  MCSymbol *Stub = Ctx.getOrCreateSymbol(Name);
+
+  MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub);
+  if (!StubSym.getPointer())
+    StubSym = MachineModuleInfoImpl::
+      StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */);
+
+  const MCExpr *BSymExpr =
+    MCSymbolRefExpr::Create(BaseSym, MCSymbolRefExpr::VK_None, Ctx);
+  const MCExpr *LHS =
+    MCSymbolRefExpr::Create(Stub, MCSymbolRefExpr::VK_None, Ctx);
+
+  if (!Offset)
+    return MCBinaryExpr::CreateSub(LHS, BSymExpr, Ctx);
+
+  const MCExpr *RHS =
+    MCBinaryExpr::CreateAdd(BSymExpr, MCConstantExpr::Create(Offset, Ctx), Ctx);
+  return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+}
+
 //===----------------------------------------------------------------------===//
 //                                  COFF
 //===----------------------------------------------------------------------===//
@@ -794,14 +855,14 @@ static int getSelectionForCOFF(const GlobalValue *GV) {
   return 0;
 }
 
-const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
+MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
     const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
     const TargetMachine &TM) const {
   int Selection = 0;
   unsigned Characteristics = getCOFFSectionFlags(Kind);
   StringRef Name = GV->getSection();
   StringRef COMDATSymName = "";
-  if ((GV->isWeakForLinker() || GV->hasComdat()) && !Kind.isCommon()) {
+  if (GV->hasComdat()) {
     Selection = getSelectionForCOFF(GV);
     const GlobalValue *ComdatGV;
     if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
@@ -836,10 +897,9 @@ static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {
   return ".data";
 }
 
-
-const MCSection *TargetLoweringObjectFileCOFF::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler &Mang, const TargetMachine &TM) const {
+MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
+    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+    const TargetMachine &TM) const {
   // If we have -ffunction-sections then we should emit the global value to a
   // uniqued section specifically for it.
   bool EmitUniquedSection;
@@ -848,12 +908,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   else
     EmitUniquedSection = TM.getDataSections();
 
-  // If this global is linkonce/weak and the target handles this by emitting it
-  // into a 'uniqued' section name, create and return the section now.
-  // Section names depend on the name of the symbol which is not feasible if the
-  // symbol has private linkage.
-  if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) &&
-      !Kind.isCommon()) {
+  if ((EmitUniquedSection && !Kind.isCommon()) || GV->hasComdat()) {
     const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
     unsigned Characteristics = getCOFFSectionFlags(Kind);
 
@@ -872,6 +927,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
       StringRef COMDATSymName = Sym->getName();
       return getContext().getCOFFSection(Name, Characteristics, Kind,
                                          COMDATSymName, Selection);
+    } else {
+      SmallString<256> TmpData;
+      getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true, Mang, TM);
+      return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
+                                         Selection);
     }
   }
 
@@ -893,6 +953,42 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   return DataSection;
 }
 
+void TargetLoweringObjectFileCOFF::getNameWithPrefix(
+    SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+    bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const {
+  if (GV->hasPrivateLinkage() &&
+      ((isa<Function>(GV) && TM.getFunctionSections()) ||
+       (isa<GlobalVariable>(GV) && TM.getDataSections())))
+    CannotUsePrivateLabel = true;
+
+  Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
+
+MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
+    const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+  // If the function can be removed, produce a unique section so that
+  // the table doesn't prevent the removal.
+  const Comdat *C = F.getComdat();
+  bool EmitUniqueSection = TM.getFunctionSections() || C;
+  if (!EmitUniqueSection)
+    return ReadOnlySection;
+
+  // FIXME: we should produce a symbol for F instead.
+  if (F.hasPrivateLinkage())
+    return ReadOnlySection;
+
+  MCSymbol *Sym = TM.getSymbol(&F, Mang);
+  StringRef COMDATSymName = Sym->getName();
+
+  SectionKind Kind = SectionKind::getReadOnly();
+  const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
+  unsigned Characteristics = getCOFFSectionFlags(Kind);
+  Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+
+  return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName,
+                                     COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
+}
+
 StringRef TargetLoweringObjectFileCOFF::
 getDepLibFromLinkerOpt(StringRef LinkerOption) const {
   const char *LibCmd = "/DEFAULTLIB:";
@@ -923,38 +1019,27 @@ emitModuleFlags(MCStreamer &Streamer,
 
   // Emit the linker options to the linker .drectve section.  According to the
   // spec, this section is a space-separated string containing flags for linker.
-  const MCSection *Sec = getDrectveSection();
+  MCSection *Sec = getDrectveSection();
   Streamer.SwitchSection(Sec);
   for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
     MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
     for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
       MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
-      StringRef Op = MDOption->getString();
       // Lead with a space for consistency with our dllexport implementation.
-      std::string Escaped(" ");
-      if (!Op.startswith("\"") && (Op.find(" ") != StringRef::npos)) {
-        // The PE-COFF spec says args with spaces must be quoted.  It doesn't say
-        // how to escape quotes, but it probably uses this algorithm:
-        // http://msdn.microsoft.com/en-us/library/17w5ykft(v=vs.85).aspx
-        // FIXME: Reuse escaping code from Support/Windows/Program.inc
-        Escaped.push_back('\"');
-        Escaped.append(Op);
-        Escaped.push_back('\"');
-      } else {
-        Escaped.append(Op);
-      }
-      Streamer.EmitBytes(Escaped);
+      std::string Directive(" ");
+      Directive.append(MDOption->getString());
+      Streamer.EmitBytes(Directive);
     }
   }
 }
 
-const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
+MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
     unsigned Priority, const MCSymbol *KeySym) const {
   return getContext().getAssociativeCOFFSection(
       cast<MCSectionCOFF>(StaticCtorSection), KeySym);
 }
 
-const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
+MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
     unsigned Priority, const MCSymbol *KeySym) const {
   return getContext().getAssociativeCOFFSection(
       cast<MCSectionCOFF>(StaticDtorSection), KeySym);
diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 618d903..f4926cb 100644
--- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -12,23 +12,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 /// DisableFramePointerElim - This returns true if frame pointer elimination
 /// optimization should be disabled for the given machine function.
 bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
-  // Check to see if we should eliminate non-leaf frame pointers and then
-  // check to see if we should eliminate all frame pointers.
-  if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf") &&
-      !NoFramePointerElim) {
-    const MachineFrameInfo *MFI = MF.getFrameInfo();
-    return MFI->hasCalls();
-  }
+  // Check to see if we should eliminate all frame pointers.
+  if (MF.getSubtarget().getFrameLowering()->noFramePointerElim(MF))
+    return true;
 
-  return NoFramePointerElim;
+  // Check to see if we should eliminate non-leaf frame pointers.
+  if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf"))
+    return MF.getFrameInfo()->hasCalls();
+
+  return false;
 }
 
 /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
@@ -51,10 +54,3 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const {
 StringRef TargetOptions::getTrapFunctionName() const {
   return TrapFuncName;
 }
-
-/// getCFIFuncName - If this returns a non-empty string, then it is the name of
-/// the function that gets called on CFI violations in CFI non-enforcing mode
-/// (!TargetOptions::CFIEnforcing).
-StringRef TargetOptions::getCFIFuncName() const {
-  return CFIFuncName;
-}
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
index ef2dab1..299380d 100644
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -224,26 +224,29 @@ unsigned TargetSchedModel::computeOperandLatency(
   return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, DefMI);
 }
 
+unsigned
+TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const {
+  unsigned Latency = 0;
+  for (unsigned DefIdx = 0, DefEnd = SCDesc.NumWriteLatencyEntries;
+       DefIdx != DefEnd; ++DefIdx) {
+    // Lookup the definition's write latency in SubtargetInfo.
+    const MCWriteLatencyEntry *WLEntry =
+      STI->getWriteLatencyEntry(&SCDesc, DefIdx);
+    Latency = std::max(Latency, capLatency(WLEntry->Cycles));
+  }
+  return Latency;
+}
+
 unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
   assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
 
   unsigned SCIdx = TII->get(Opcode).getSchedClass();
   const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx);
-  unsigned Latency = 0;
 
-  if (SCDesc->isValid() && !SCDesc->isVariant()) {
-    for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
-         DefIdx != DefEnd; ++DefIdx) {
-      // Lookup the definition's write latency in SubtargetInfo.
-      const MCWriteLatencyEntry *WLEntry =
-          STI->getWriteLatencyEntry(SCDesc, DefIdx);
-      Latency = std::max(Latency, capLatency(WLEntry->Cycles));
-    }
-    return Latency;
-  }
+  if (SCDesc->isValid() && !SCDesc->isVariant())
+    return computeInstrLatency(*SCDesc);
 
-  assert(Latency && "No MI sched latency");
-  return 0;
+  llvm_unreachable("No MI sched latency");
 }
 
 unsigned
@@ -257,17 +260,8 @@ TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
 
   if (hasInstrSchedModel()) {
     const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
-    if (SCDesc->isValid()) {
-      unsigned Latency = 0;
-      for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
-           DefIdx != DefEnd; ++DefIdx) {
-        // Lookup the definition's write latency in SubtargetInfo.
-        const MCWriteLatencyEntry *WLEntry =
-          STI->getWriteLatencyEntry(SCDesc, DefIdx);
-        Latency = std::max(Latency, capLatency(WLEntry->Cycles));
-      }
-      return Latency;
-    }
+    if (SCDesc->isValid())
+      return computeInstrLatency(*SCDesc);
   }
   return TII->defaultDefLatency(SchedModel, MI);
 }
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index e218a83..6bceccc 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -45,6 +45,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -102,6 +103,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
                             MachineBasicBlock::iterator OldPos);
 
+  bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen);
+
   bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
 
   bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
@@ -186,7 +189,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
 
   // Check if it's safe to move this instruction.
   bool SeenStore = true; // Be conservative.
-  if (!MI->isSafeToMove(TII, AA, SeenStore))
+  if (!MI->isSafeToMove(AA, SeenStore))
     return false;
 
   unsigned DefReg = 0;
@@ -309,6 +312,45 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
   return true;
 }
 
+/// getSingleDef -- return the MachineInstr* if it is the single def of the Reg
+/// in current BB.
+static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
+                                  const MachineRegisterInfo *MRI) {
+  MachineInstr *Ret = nullptr;
+  for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
+    if (DefMI.getParent() != BB || DefMI.isDebugValue())
+      continue;
+    if (!Ret)
+      Ret = &DefMI;
+    else if (Ret != &DefMI)
+      return nullptr;
+  }
+  return Ret;
+}
+
+/// Check if there is a reversed copy chain from FromReg to ToReg:
+/// %Tmp1 = copy %Tmp2;
+/// %FromReg = copy %Tmp1;
+/// %ToReg = add %FromReg ...
+/// %Tmp2 = copy %ToReg;
+/// MaxLen specifies the maximum length of the copy chain the func
+/// can walk through.
+bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg,
+                                               int Maxlen) {
+  unsigned TmpReg = FromReg;
+  for (int i = 0; i < Maxlen; i++) {
+    MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI);
+    if (!Def || !Def->isCopy())
+      return false;
+
+    TmpReg = Def->getOperand(1).getReg();
+
+    if (TmpReg == ToReg)
+      return true;
+  }
+  return false;
+}
+
 /// noUseAfterLastDef - Return true if there are no intervening uses between the
 /// last instruction in the MBB that defines the specified register and the
 /// two-address instruction which is being processed. It also returns the last
@@ -574,6 +616,27 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
   if (!noUseAfterLastDef(regB, Dist, LastDefB))
     return true;
 
+  // Look for situation like this:
+  // %reg101 = MOV %reg100
+  // %reg102 = ...
+  // %reg103 = ADD %reg102, %reg101
+  // ... = %reg103 ...
+  // %reg100 = MOV %reg103
+  // If there is a reversed copy chain from reg101 to reg103, commute the ADD
+  // to eliminate an otherwise unavoidable copy.
+  // FIXME:
+  // We can extend the logic further: If an pair of operands in an insn has
+  // been merged, the insn could be regarded as a virtual copy, and the virtual
+  // copy could also be used to construct a copy chain.
+  // To more generally minimize register copies, ideally the logic of two addr
+  // instruction pass should be integrated with register allocation pass where
+  // interference graph is available.
+  if (isRevCopyChain(regC, regA, 3))
+    return true;
+
+  if (isRevCopyChain(regB, regA, 3))
+    return false;
+
   // Since there are no intervening uses for both registers, then commute
   // if the def of regC is closer. Its live interval is shorter.
   return LastDefB && LastDefC && LastDefC > LastDefB;
@@ -798,7 +861,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
     return false;
 
   bool SeenStore = true;
-  if (!MI->isSafeToMove(TII, AA, SeenStore))
+  if (!MI->isSafeToMove(AA, SeenStore))
     return false;
 
   if (TII->getInstrLatency(InstrItins, MI) > 1)
@@ -985,7 +1048,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
     return false;
 
   bool SeenStore = true;
-  if (!KillMI->isSafeToMove(TII, AA, SeenStore))
+  if (!KillMI->isSafeToMove(AA, SeenStore))
     return false;
 
   SmallSet<unsigned, 2> Uses;
@@ -1515,9 +1578,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
   MF = &Func;
   const TargetMachine &TM = MF->getTarget();
   MRI = &MF->getRegInfo();
-  TII = TM.getSubtargetImpl()->getInstrInfo();
-  TRI = TM.getSubtargetImpl()->getRegisterInfo();
-  InstrItins = TM.getSubtargetImpl()->getInstrItineraryData();
+  TII = MF->getSubtarget().getInstrInfo();
+  TRI = MF->getSubtarget().getRegisterInfo();
+  InstrItins = MF->getSubtarget().getInstrItineraryData();
   LV = getAnalysisIfAvailable<LiveVariables>();
   LIS = getAnalysisIfAvailable<LiveIntervals>();
   AA = &getAnalysis<AliasAnalysis>();
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 7824f92..d393e10 100644
--- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -88,7 +88,7 @@ bool UnreachableBlockElim::runOnFunction(Function &F) {
     DeadBlocks[i]->eraseFromParent();
   }
 
-  return DeadBlocks.size();
+  return !DeadBlocks.empty();
 }
 
 
@@ -204,5 +204,5 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
 
   F.RenumberBlocks();
 
-  return (DeadBlocks.size() || ModifiedPHI);
+  return (!DeadBlocks.empty() || ModifiedPHI);
 }
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 7d3b0ce..9fb1b5b 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -264,8 +264,7 @@ void VirtRegRewriter::addMBBLiveIns() {
             if ((SubRegLaneMask & S.LaneMask) == 0)
               continue;
             for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) {
-              if (!LiveIn[i]->isLiveIn(SubReg))
-                LiveIn[i]->addLiveIn(SubReg);
+              LiveIn[i]->addLiveIn(SubReg);
             }
           }
           LiveIn.clear();
@@ -277,16 +276,20 @@ void VirtRegRewriter::addMBBLiveIns() {
         if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn))
           continue;
         for (unsigned i = 0, e = LiveIn.size(); i != e; ++i)
-          if (!LiveIn[i]->isLiveIn(PhysReg))
-            LiveIn[i]->addLiveIn(PhysReg);
+          LiveIn[i]->addLiveIn(PhysReg);
         LiveIn.clear();
       }
     }
   }
+
+  // Sort and unique MBB LiveIns as we've not checked if SubReg/PhysReg were in
+  // each MBB's LiveIns set before calling addLiveIn on them.
+  for (MachineBasicBlock &MBB : *MF)
+    MBB.sortUniqueLiveIns();
 }
 
 void VirtRegRewriter::rewrite() {
-  bool NoSubRegLiveness = !MRI->tracksSubRegLiveness();
+  bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
   SmallVector<unsigned, 8> SuperDeads;
   SmallVector<unsigned, 8> SuperDefs;
   SmallVector<unsigned, 8> SuperKills;
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
new file mode 100644
index 0000000..7246e1c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -0,0 +1,2482 @@
+//===-- WinEHPrepare - Prepare exception handling for code generation ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers LLVM IR exception handling into something closer to what the
+// backend wants for functions using a personality function from a runtime
+// provided by MSVC. Functions with other personality functions are left alone
+// and may be prepared by other passes. In particular, all supported MSVC
+// personality functions require cleanup code to be outlined, and the C++
+// personality requires catch handler code to be outlined.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <memory>
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "winehprepare"
+
+namespace {
+
+// This map is used to model frame variable usage during outlining, to
+// construct a structure type to hold the frame variables in a frame
+// allocation block, and to remap the frame variable allocas (including
+// spill locations as needed) to GEPs that get the variable from the
+// frame allocation structure.
+typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap;
+
+// TinyPtrVector cannot hold nullptr, so we need our own sentinel that isn't
+// quite null.
+AllocaInst *getCatchObjectSentinel() {
+  return static_cast<AllocaInst *>(nullptr) + 1;
+}
+
+typedef SmallSet<BasicBlock *, 4> VisitedBlockSet;
+
+class LandingPadActions;
+class LandingPadMap;
+
+typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy;
+typedef DenseMap<const BasicBlock *, CleanupHandler *> CleanupHandlerMapTy;
+
+class WinEHPrepare : public FunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid.
+  WinEHPrepare(const TargetMachine *TM = nullptr)
+      : FunctionPass(ID) {
+    if (TM)
+      TheTriple = Triple(TM->getTargetTriple());
+  }
+
+  bool runOnFunction(Function &Fn) override;
+
+  bool doFinalization(Module &M) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  const char *getPassName() const override {
+    return "Windows exception handling preparation";
+  }
+
+private:
+  bool prepareExceptionHandlers(Function &F,
+                                SmallVectorImpl<LandingPadInst *> &LPads);
+  void identifyEHBlocks(Function &F, SmallVectorImpl<LandingPadInst *> &LPads);
+  void promoteLandingPadValues(LandingPadInst *LPad);
+  void demoteValuesLiveAcrossHandlers(Function &F,
+                                      SmallVectorImpl<LandingPadInst *> &LPads);
+  void findSEHEHReturnPoints(Function &F,
+                             SetVector<BasicBlock *> &EHReturnBlocks);
+  void findCXXEHReturnPoints(Function &F,
+                             SetVector<BasicBlock *> &EHReturnBlocks);
+  void getPossibleReturnTargets(Function *ParentF, Function *HandlerF,
+                                SetVector<BasicBlock*> &Targets);
+  void completeNestedLandingPad(Function *ParentFn,
+                                LandingPadInst *OutlinedLPad,
+                                const LandingPadInst *OriginalLPad,
+                                FrameVarInfoMap &VarInfo);
+  Function *createHandlerFunc(Type *RetTy, const Twine &Name, Module *M,
+                              Value *&ParentFP);
+  bool outlineHandler(ActionHandler *Action, Function *SrcFn,
+                      LandingPadInst *LPad, BasicBlock *StartBB,
+                      FrameVarInfoMap &VarInfo);
+  void addStubInvokeToHandlerIfNeeded(Function *Handler, Value *PersonalityFn);
+
+  void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions);
+  CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB,
+                                 VisitedBlockSet &VisitedBlocks);
+  void findCleanupHandlers(LandingPadActions &Actions, BasicBlock *StartBB,
+                           BasicBlock *EndBB);
+
+  void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB);
+
+  Triple TheTriple;
+
+  // All fields are reset by runOnFunction.
+  DominatorTree *DT = nullptr;
+  EHPersonality Personality = EHPersonality::Unknown;
+  CatchHandlerMapTy CatchHandlerMap;
+  CleanupHandlerMapTy CleanupHandlerMap;
+  DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps;
+  SmallPtrSet<BasicBlock *, 4> NormalBlocks;
+  SmallPtrSet<BasicBlock *, 4> EHBlocks;
+  SetVector<BasicBlock *> EHReturnBlocks;
+
+  // This maps landing pad instructions found in outlined handlers to
+  // the landing pad instruction in the parent function from which they
+  // were cloned.  The cloned/nested landing pad is used as the key
+  // because the landing pad may be cloned into multiple handlers.
+  // This map will be used to add the llvm.eh.actions call to the nested
+  // landing pads after all handlers have been outlined.
+  DenseMap<LandingPadInst *, const LandingPadInst *> NestedLPtoOriginalLP;
+
+  // This maps blocks in the parent function which are destinations of
+  // catch handlers to cloned blocks in (other) outlined handlers. This
+  // handles the case where a nested landing pads has a catch handler that
+  // returns to a handler function rather than the parent function.
+  // The original block is used as the key here because there should only
+  // ever be one handler function from which the cloned block is not pruned.
+  // The original block will be pruned from the parent function after all
+  // handlers have been outlined.  This map will be used to adjust the
+  // return instructions of handlers which return to the block that was
+  // outlined into a handler.  This is done after all handlers have been
+  // outlined but before the outlined code is pruned from the parent function.
+  DenseMap<const BasicBlock *, BasicBlock *> LPadTargetBlocks;
+
+  // Map from outlined handler to call to llvm.frameaddress(1). Only used for
+  // 32-bit EH.
+  DenseMap<Function *, Value *> HandlerToParentFP;
+
+  AllocaInst *SEHExceptionCodeSlot = nullptr;
+};
+
+class WinEHFrameVariableMaterializer : public ValueMaterializer {
+public:
+  WinEHFrameVariableMaterializer(Function *OutlinedFn, Value *ParentFP,
+                                 FrameVarInfoMap &FrameVarInfo);
+  ~WinEHFrameVariableMaterializer() override {}
+
+  Value *materializeValueFor(Value *V) override;
+
+  void escapeCatchObject(Value *V);
+
+private:
+  FrameVarInfoMap &FrameVarInfo;
+  IRBuilder<> Builder;
+};
+
+class LandingPadMap {
+public:
+  LandingPadMap() : OriginLPad(nullptr) {}
+  void mapLandingPad(const LandingPadInst *LPad);
+
+  bool isInitialized() { return OriginLPad != nullptr; }
+
+  bool isOriginLandingPadBlock(const BasicBlock *BB) const;
+  bool isLandingPadSpecificInst(const Instruction *Inst) const;
+
+  void remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
+                     Value *SelectorValue) const;
+
+private:
+  const LandingPadInst *OriginLPad;
+  // We will normally only see one of each of these instructions, but
+  // if more than one occurs for some reason we can handle that.
+  TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs;
+  TinyPtrVector<const ExtractValueInst *> ExtractedSelectors;
+};
+
+class WinEHCloningDirectorBase : public CloningDirector {
+public:
+  WinEHCloningDirectorBase(Function *HandlerFn, Value *ParentFP,
+                           FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
+      : Materializer(HandlerFn, ParentFP, VarInfo),
+        SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())),
+        Int8PtrType(Type::getInt8PtrTy(HandlerFn->getContext())),
+        LPadMap(LPadMap), ParentFP(ParentFP) {}
+
+  CloningAction handleInstruction(ValueToValueMapTy &VMap,
+                                  const Instruction *Inst,
+                                  BasicBlock *NewBB) override;
+
+  virtual CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
+                                         const Instruction *Inst,
+                                         BasicBlock *NewBB) = 0;
+  virtual CloningAction handleEndCatch(ValueToValueMapTy &VMap,
+                                       const Instruction *Inst,
+                                       BasicBlock *NewBB) = 0;
+  virtual CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
+                                        const Instruction *Inst,
+                                        BasicBlock *NewBB) = 0;
+  virtual CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
+                                         const IndirectBrInst *IBr,
+                                         BasicBlock *NewBB) = 0;
+  virtual CloningAction handleInvoke(ValueToValueMapTy &VMap,
+                                     const InvokeInst *Invoke,
+                                     BasicBlock *NewBB) = 0;
+  virtual CloningAction handleResume(ValueToValueMapTy &VMap,
+                                     const ResumeInst *Resume,
+                                     BasicBlock *NewBB) = 0;
+  virtual CloningAction handleCompare(ValueToValueMapTy &VMap,
+                                      const CmpInst *Compare,
+                                      BasicBlock *NewBB) = 0;
+  virtual CloningAction handleLandingPad(ValueToValueMapTy &VMap,
+                                         const LandingPadInst *LPad,
+                                         BasicBlock *NewBB) = 0;
+
+  ValueMaterializer *getValueMaterializer() override { return &Materializer; }
+
+protected:
+  WinEHFrameVariableMaterializer Materializer;
+  Type *SelectorIDType;
+  Type *Int8PtrType;
+  LandingPadMap &LPadMap;
+
+  /// The value representing the parent frame pointer.
+  Value *ParentFP;
+};
+
+class WinEHCatchDirector : public WinEHCloningDirectorBase {
+public:
+  WinEHCatchDirector(
+      Function *CatchFn, Value *ParentFP, Value *Selector,
+      FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap,
+      DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPads,
+      DominatorTree *DT, SmallPtrSetImpl<BasicBlock *> &EHBlocks)
+      : WinEHCloningDirectorBase(CatchFn, ParentFP, VarInfo, LPadMap),
+        CurrentSelector(Selector->stripPointerCasts()),
+        ExceptionObjectVar(nullptr), NestedLPtoOriginalLP(NestedLPads),
+        DT(DT), EHBlocks(EHBlocks) {}
+
+  CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
+                                 const Instruction *Inst,
+                                 BasicBlock *NewBB) override;
+  CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
+                               BasicBlock *NewBB) override;
+  CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
+                                const Instruction *Inst,
+                                BasicBlock *NewBB) override;
+  CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
+                                 const IndirectBrInst *IBr,
+                                 BasicBlock *NewBB) override;
+  CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
+                             BasicBlock *NewBB) override;
+  CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
+                             BasicBlock *NewBB) override;
+  CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare,
+                              BasicBlock *NewBB) override;
+  CloningAction handleLandingPad(ValueToValueMapTy &VMap,
+                                 const LandingPadInst *LPad,
+                                 BasicBlock *NewBB) override;
+
+  Value *getExceptionVar() { return ExceptionObjectVar; }
+  TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
+
+private:
+  Value *CurrentSelector;
+
+  Value *ExceptionObjectVar;
+  TinyPtrVector<BasicBlock *> ReturnTargets;
+
+  // This will be a reference to the field of the same name in the WinEHPrepare
+  // object which instantiates this WinEHCatchDirector object.
+  DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPtoOriginalLP;
+  DominatorTree *DT;
+  SmallPtrSetImpl<BasicBlock *> &EHBlocks;
+};
+
+class WinEHCleanupDirector : public WinEHCloningDirectorBase {
+public:
+  WinEHCleanupDirector(Function *CleanupFn, Value *ParentFP,
+                       FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
+      : WinEHCloningDirectorBase(CleanupFn, ParentFP, VarInfo,
+                                 LPadMap) {}
+
+  CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
+                                 const Instruction *Inst,
+                                 BasicBlock *NewBB) override;
+  CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
+                               BasicBlock *NewBB) override;
+  CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
+                                const Instruction *Inst,
+                                BasicBlock *NewBB) override;
+  CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
+                                 const IndirectBrInst *IBr,
+                                 BasicBlock *NewBB) override;
+  CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
+                             BasicBlock *NewBB) override;
+  CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
+                             BasicBlock *NewBB) override;
+  CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare,
+                              BasicBlock *NewBB) override;
+  CloningAction handleLandingPad(ValueToValueMapTy &VMap,
+                                 const LandingPadInst *LPad,
+                                 BasicBlock *NewBB) override;
+};
+
+class LandingPadActions {
+public:
+  LandingPadActions() : HasCleanupHandlers(false) {}
+
+  void insertCatchHandler(CatchHandler *Action) { Actions.push_back(Action); }
+  void insertCleanupHandler(CleanupHandler *Action) {
+    Actions.push_back(Action);
+    HasCleanupHandlers = true;
+  }
+
+  bool includesCleanup() const { return HasCleanupHandlers; }
+
+  SmallVectorImpl<ActionHandler *> &actions() { return Actions; }
+  SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); }
+  SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); }
+
+private:
+  // Note that this class does not own the ActionHandler objects in this vector.
+  // The ActionHandlers are owned by the CatchHandlerMap and CleanupHandlerMap
+  // in the WinEHPrepare class.
+  SmallVector<ActionHandler *, 4> Actions;
+  bool HasCleanupHandlers;
+};
+
+} // end anonymous namespace
+
+char WinEHPrepare::ID = 0;
+INITIALIZE_TM_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions",
+                   false, false)
+
+FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) {
+  return new WinEHPrepare(TM);
+}
+
+bool WinEHPrepare::runOnFunction(Function &Fn) {
+  // No need to prepare outlined handlers.
+  if (Fn.hasFnAttribute("wineh-parent"))
+    return false;
+
+  SmallVector<LandingPadInst *, 4> LPads;
+  SmallVector<ResumeInst *, 4> Resumes;
+  for (BasicBlock &BB : Fn) {
+    if (auto *LP = BB.getLandingPadInst())
+      LPads.push_back(LP);
+    if (auto *Resume = dyn_cast<ResumeInst>(BB.getTerminator()))
+      Resumes.push_back(Resume);
+  }
+
+  // No need to prepare functions that lack landing pads.
+  if (LPads.empty())
+    return false;
+
+  // Classify the personality to see what kind of preparation we need.
+  Personality = classifyEHPersonality(LPads.back()->getPersonalityFn());
+
+  // Do nothing if this is not an MSVC personality.
+  if (!isMSVCEHPersonality(Personality))
+    return false;
+
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+  // If there were any landing pads, prepareExceptionHandlers will make changes.
+  prepareExceptionHandlers(Fn, LPads);
+  return true;
+}
+
+bool WinEHPrepare::doFinalization(Module &M) { return false; }
+
+void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<DominatorTreeWrapperPass>();
+}
+
+static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
+                               Constant *&Selector, BasicBlock *&NextBB);
+
+// Finds blocks reachable from the starting set Worklist. Does not follow unwind
+// edges or blocks listed in StopPoints.
+static void findReachableBlocks(SmallPtrSetImpl<BasicBlock *> &ReachableBBs,
+                                SetVector<BasicBlock *> &Worklist,
+                                const SetVector<BasicBlock *> *StopPoints) {
+  while (!Worklist.empty()) {
+    BasicBlock *BB = Worklist.pop_back_val();
+
+    // Don't cross blocks that we should stop at.
+    if (StopPoints && StopPoints->count(BB))
+      continue;
+
+    if (!ReachableBBs.insert(BB).second)
+      continue; // Already visited.
+
+    // Don't follow unwind edges of invokes.
+    if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      Worklist.insert(II->getNormalDest());
+      continue;
+    }
+
+    // Otherwise, follow all successors.
+    Worklist.insert(succ_begin(BB), succ_end(BB));
+  }
+}
+
+// Attempt to find an instruction where a block can be split before
+// a call to llvm.eh.begincatch and its operands.  If the block
+// begins with the begincatch call or one of its adjacent operands
+// the block will not be split.
+static Instruction *findBeginCatchSplitPoint(BasicBlock *BB,
+                                             IntrinsicInst *II) {
+  // If the begincatch call is already the first instruction in the block,
+  // don't split.
+  Instruction *FirstNonPHI = BB->getFirstNonPHI();
+  if (II == FirstNonPHI)
+    return nullptr;
+
+  // If either operand is in the same basic block as the instruction and
+  // isn't used by another instruction before the begincatch call, include it
+  // in the split block.
+  auto *Op0 = dyn_cast<Instruction>(II->getOperand(0));
+  auto *Op1 = dyn_cast<Instruction>(II->getOperand(1));
+
+  Instruction *I = II->getPrevNode();
+  Instruction *LastI = II;
+
+  while (I == Op0 || I == Op1) {
+    // If the block begins with one of the operands and there are no other
+    // instructions between the operand and the begincatch call, don't split.
+    if (I == FirstNonPHI)
+      return nullptr;
+
+    LastI = I;
+    I = I->getPrevNode();
+  }
+
+  // If there is at least one instruction in the block before the begincatch
+  // call and its operands, split the block at either the begincatch or
+  // its operand.
+  return LastI;
+}
+
+/// Find all points where exceptional control rejoins normal control flow via
+/// llvm.eh.endcatch. Add them to the normal bb reachability worklist.
+void WinEHPrepare::findCXXEHReturnPoints(
+    Function &F, SetVector<BasicBlock *> &EHReturnBlocks) {
+  for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
+    BasicBlock *BB = BBI;
+    for (Instruction &I : *BB) {
+      if (match(&I, m_Intrinsic<Intrinsic::eh_begincatch>())) {
+        Instruction *SplitPt =
+            findBeginCatchSplitPoint(BB, cast<IntrinsicInst>(&I));
+        if (SplitPt) {
+          // Split the block before the llvm.eh.begincatch call to allow
+          // cleanup and catch code to be distinguished later.
+          // Do not update BBI because we still need to process the
+          // portion of the block that we are splitting off.
+          SplitBlock(BB, SplitPt, DT);
+          break;
+        }
+      }
+      if (match(&I, m_Intrinsic<Intrinsic::eh_endcatch>())) {
+        // Split the block after the call to llvm.eh.endcatch if there is
+        // anything other than an unconditional branch, or if the successor
+        // starts with a phi.
+        auto *Br = dyn_cast<BranchInst>(I.getNextNode());
+        if (!Br || !Br->isUnconditional() ||
+            isa<PHINode>(Br->getSuccessor(0)->begin())) {
+          DEBUG(dbgs() << "splitting block " << BB->getName()
+                       << " with llvm.eh.endcatch\n");
+          BBI = SplitBlock(BB, I.getNextNode(), DT);
+        }
+        // The next BB is normal control flow.
+        EHReturnBlocks.insert(BB->getTerminator()->getSuccessor(0));
+        break;
+      }
+    }
+  }
+}
+
+static bool isCatchAllLandingPad(const BasicBlock *BB) {
+  const LandingPadInst *LP = BB->getLandingPadInst();
+  if (!LP)
+    return false;
+  unsigned N = LP->getNumClauses();
+  return (N > 0 && LP->isCatch(N - 1) &&
+          isa<ConstantPointerNull>(LP->getClause(N - 1)));
+}
+
+/// Find all points where exceptions control rejoins normal control flow via
+/// selector dispatch.
+void WinEHPrepare::findSEHEHReturnPoints(
+    Function &F, SetVector<BasicBlock *> &EHReturnBlocks) {
+  for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
+    BasicBlock *BB = BBI;
+    // If the landingpad is a catch-all, treat the whole lpad as if it is
+    // reachable from normal control flow.
+    // FIXME: This is imprecise. We need a better way of identifying where a
+    // catch-all starts and cleanups stop. As far as LLVM is concerned, there
+    // is no difference.
+    if (isCatchAllLandingPad(BB)) {
+      EHReturnBlocks.insert(BB);
+      continue;
+    }
+
+    BasicBlock *CatchHandler;
+    BasicBlock *NextBB;
+    Constant *Selector;
+    if (isSelectorDispatch(BB, CatchHandler, Selector, NextBB)) {
+      // Split the edge if there is a phi node. Returning from EH to a phi node
+      // is just as impossible as having a phi after an indirectbr.
+      if (isa<PHINode>(CatchHandler->begin())) {
+        DEBUG(dbgs() << "splitting EH return edge from " << BB->getName()
+                     << " to " << CatchHandler->getName() << '\n');
+        BBI = CatchHandler = SplitCriticalEdge(
+            BB, std::find(succ_begin(BB), succ_end(BB), CatchHandler));
+      }
+      EHReturnBlocks.insert(CatchHandler);
+    }
+  }
+}
+
+void WinEHPrepare::identifyEHBlocks(Function &F, 
+                                    SmallVectorImpl<LandingPadInst *> &LPads) {
+  DEBUG(dbgs() << "Demoting values live across exception handlers in function "
+               << F.getName() << '\n');
+
+  // Build a set of all non-exceptional blocks and exceptional blocks.
+  // - Non-exceptional blocks are blocks reachable from the entry block while
+  //   not following invoke unwind edges.
+  // - Exceptional blocks are blocks reachable from landingpads. Analysis does
+  //   not follow llvm.eh.endcatch blocks, which mark a transition from
+  //   exceptional to normal control.
+
+  if (Personality == EHPersonality::MSVC_CXX)
+    findCXXEHReturnPoints(F, EHReturnBlocks);
+  else
+    findSEHEHReturnPoints(F, EHReturnBlocks);
+
+  DEBUG({
+    dbgs() << "identified the following blocks as EH return points:\n";
+    for (BasicBlock *BB : EHReturnBlocks)
+      dbgs() << "  " << BB->getName() << '\n';
+  });
+
+// Join points should not have phis at this point, unless they are a
+// landingpad, in which case we will demote their phis later.
+#ifndef NDEBUG
+  for (BasicBlock *BB : EHReturnBlocks)
+    assert((BB->isLandingPad() || !isa<PHINode>(BB->begin())) &&
+           "non-lpad EH return block has phi");
+#endif
+
+  // Normal blocks are the blocks reachable from the entry block and all EH
+  // return points.
+  SetVector<BasicBlock *> Worklist;
+  Worklist = EHReturnBlocks;
+  Worklist.insert(&F.getEntryBlock());
+  findReachableBlocks(NormalBlocks, Worklist, nullptr);
+  DEBUG({
+    dbgs() << "marked the following blocks as normal:\n";
+    for (BasicBlock *BB : NormalBlocks)
+      dbgs() << "  " << BB->getName() << '\n';
+  });
+
+  // Exceptional blocks are the blocks reachable from landingpads that don't
+  // cross EH return points.
+  Worklist.clear();
+  for (auto *LPI : LPads)
+    Worklist.insert(LPI->getParent());
+  findReachableBlocks(EHBlocks, Worklist, &EHReturnBlocks);
+  DEBUG({
+    dbgs() << "marked the following blocks as exceptional:\n";
+    for (BasicBlock *BB : EHBlocks)
+      dbgs() << "  " << BB->getName() << '\n';
+  });
+
+}
+
+/// Ensure that all values live into and out of exception handlers are stored
+/// in memory.
+/// FIXME: This falls down when values are defined in one handler and live into
+/// another handler. For example, a cleanup defines a value used only by a
+/// catch handler.
+void WinEHPrepare::demoteValuesLiveAcrossHandlers(
+    Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
+  DEBUG(dbgs() << "Demoting values live across exception handlers in function "
+               << F.getName() << '\n');
+
+  // identifyEHBlocks() should have been called before this function.
+  assert(!NormalBlocks.empty());
+
+  SetVector<Argument *> ArgsToDemote;
+  SetVector<Instruction *> InstrsToDemote;
+  for (BasicBlock &BB : F) {
+    bool IsNormalBB = NormalBlocks.count(&BB);
+    bool IsEHBB = EHBlocks.count(&BB);
+    if (!IsNormalBB && !IsEHBB)
+      continue; // Blocks that are neither normal nor EH are unreachable.
+    for (Instruction &I : BB) {
+      for (Value *Op : I.operands()) {
+        // Don't demote static allocas, constants, and labels.
+        if (isa<Constant>(Op) || isa<BasicBlock>(Op) || isa<InlineAsm>(Op))
+          continue;
+        auto *AI = dyn_cast<AllocaInst>(Op);
+        if (AI && AI->isStaticAlloca())
+          continue;
+
+        if (auto *Arg = dyn_cast<Argument>(Op)) {
+          if (IsEHBB) {
+            DEBUG(dbgs() << "Demoting argument " << *Arg
+                         << " used by EH instr: " << I << "\n");
+            ArgsToDemote.insert(Arg);
+          }
+          continue;
+        }
+
+        auto *OpI = cast<Instruction>(Op);
+        BasicBlock *OpBB = OpI->getParent();
+        // If a value is produced and consumed in the same BB, we don't need to
+        // demote it.
+        if (OpBB == &BB)
+          continue;
+        bool IsOpNormalBB = NormalBlocks.count(OpBB);
+        bool IsOpEHBB = EHBlocks.count(OpBB);
+        if (IsNormalBB != IsOpNormalBB || IsEHBB != IsOpEHBB) {
+          DEBUG({
+            dbgs() << "Demoting instruction live in-out from EH:\n";
+            dbgs() << "Instr: " << *OpI << '\n';
+            dbgs() << "User: " << I << '\n';
+          });
+          InstrsToDemote.insert(OpI);
+        }
+      }
+    }
+  }
+
+  // Demote values live into and out of handlers.
+  // FIXME: This demotion is inefficient. We should insert spills at the point
+  // of definition, insert one reload in each handler that uses the value, and
+  // insert reloads in the BB used to rejoin normal control flow.
+  Instruction *AllocaInsertPt = F.getEntryBlock().getFirstInsertionPt();
+  for (Instruction *I : InstrsToDemote)
+    DemoteRegToStack(*I, false, AllocaInsertPt);
+
+  // Demote arguments separately, and only for uses in EH blocks.
+  for (Argument *Arg : ArgsToDemote) {
+    auto *Slot = new AllocaInst(Arg->getType(), nullptr,
+                                Arg->getName() + ".reg2mem", AllocaInsertPt);
+    SmallVector<User *, 4> Users(Arg->user_begin(), Arg->user_end());
+    for (User *U : Users) {
+      auto *I = dyn_cast<Instruction>(U);
+      if (I && EHBlocks.count(I->getParent())) {
+        auto *Reload = new LoadInst(Slot, Arg->getName() + ".reload", false, I);
+        U->replaceUsesOfWith(Arg, Reload);
+      }
+    }
+    new StoreInst(Arg, Slot, AllocaInsertPt);
+  }
+
+  // Demote landingpad phis, as the landingpad will be removed from the machine
+  // CFG.
+  for (LandingPadInst *LPI : LPads) {
+    BasicBlock *BB = LPI->getParent();
+    while (auto *Phi = dyn_cast<PHINode>(BB->begin()))
+      DemotePHIToStack(Phi, AllocaInsertPt);
+  }
+
+  DEBUG(dbgs() << "Demoted " << InstrsToDemote.size() << " instructions and "
+               << ArgsToDemote.size() << " arguments for WinEHPrepare\n\n");
+}
+
+bool WinEHPrepare::prepareExceptionHandlers(
+    Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
+  // Don't run on functions that are already prepared.
+  for (LandingPadInst *LPad : LPads) {
+    BasicBlock *LPadBB = LPad->getParent();
+    for (Instruction &Inst : *LPadBB)
+      if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>()))
+        return false;
+  }
+
+  identifyEHBlocks(F, LPads);
+  demoteValuesLiveAcrossHandlers(F, LPads);
+
+  // These containers are used to re-map frame variables that are used in
+  // outlined catch and cleanup handlers.  They will be populated as the
+  // handlers are outlined.
+  FrameVarInfoMap FrameVarInfo;
+
+  bool HandlersOutlined = false;
+
+  Module *M = F.getParent();
+  LLVMContext &Context = M->getContext();
+
+  // Create a new function to receive the handler contents.
+  PointerType *Int8PtrType = Type::getInt8PtrTy(Context);
+  Type *Int32Type = Type::getInt32Ty(Context);
+  Function *ActionIntrin = Intrinsic::getDeclaration(M, Intrinsic::eh_actions);
+
+  if (isAsynchronousEHPersonality(Personality)) {
+    // FIXME: Switch the ehptr type to i32 and then switch this.
+    SEHExceptionCodeSlot =
+        new AllocaInst(Int8PtrType, nullptr, "seh_exception_code",
+                       F.getEntryBlock().getFirstInsertionPt());
+  }
+
+  // In order to handle the case where one outlined catch handler returns
+  // to a block within another outlined catch handler that would otherwise
+  // be unreachable, we need to outline the nested landing pad before we
+  // outline the landing pad which encloses it.
+  if (!isAsynchronousEHPersonality(Personality))
+    std::sort(LPads.begin(), LPads.end(),
+              [this](LandingPadInst *const &L, LandingPadInst *const &R) {
+                return DT->properlyDominates(R->getParent(), L->getParent());
+              });
+
+  // This container stores the llvm.eh.recover and IndirectBr instructions
+  // that make up the body of each landing pad after it has been outlined.
+  // We need to defer the population of the target list for the indirectbr
+  // until all landing pads have been outlined so that we can handle the
+  // case of blocks in the target that are reached only from nested
+  // landing pads.
+  SmallVector<std::pair<CallInst*, IndirectBrInst *>, 4> LPadImpls;
+
+  for (LandingPadInst *LPad : LPads) {
+    // Look for evidence that this landingpad has already been processed.
+    bool LPadHasActionList = false;
+    BasicBlock *LPadBB = LPad->getParent();
+    for (Instruction &Inst : *LPadBB) {
+      if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>())) {
+        LPadHasActionList = true;
+        break;
+      }
+    }
+
+    // If we've already outlined the handlers for this landingpad,
+    // there's nothing more to do here.
+    if (LPadHasActionList)
+      continue;
+
+    // If either of the values in the aggregate returned by the landing pad is
+    // extracted and stored to memory, promote the stored value to a register.
+    promoteLandingPadValues(LPad);
+
+    LandingPadActions Actions;
+    mapLandingPadBlocks(LPad, Actions);
+
+    HandlersOutlined |= !Actions.actions().empty();
+    for (ActionHandler *Action : Actions) {
+      if (Action->hasBeenProcessed())
+        continue;
+      BasicBlock *StartBB = Action->getStartBlock();
+
+      // SEH doesn't do any outlining for catches. Instead, pass the handler
+      // basic block addr to llvm.eh.actions and list the block as a return
+      // target.
+      if (isAsynchronousEHPersonality(Personality)) {
+        if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+          processSEHCatchHandler(CatchAction, StartBB);
+          continue;
+        }
+      }
+
+      outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo);
+    }
+
+    // Split the block after the landingpad instruction so that it is just a
+    // call to llvm.eh.actions followed by indirectbr.
+    assert(!isa<PHINode>(LPadBB->begin()) && "lpad phi not removed");
+    SplitBlock(LPadBB, LPad->getNextNode(), DT);
+    // Erase the branch inserted by the split so we can insert indirectbr.
+    LPadBB->getTerminator()->eraseFromParent();
+
+    // Replace all extracted values with undef and ultimately replace the
+    // landingpad with undef.
+    SmallVector<Instruction *, 4> SEHCodeUses;
+    SmallVector<Instruction *, 4> EHUndefs;
+    for (User *U : LPad->users()) {
+      auto *E = dyn_cast<ExtractValueInst>(U);
+      if (!E)
+        continue;
+      assert(E->getNumIndices() == 1 &&
+             "Unexpected operation: extracting both landing pad values");
+      unsigned Idx = *E->idx_begin();
+      assert((Idx == 0 || Idx == 1) && "unexpected index");
+      if (Idx == 0 && isAsynchronousEHPersonality(Personality))
+        SEHCodeUses.push_back(E);
+      else
+        EHUndefs.push_back(E);
+    }
+    for (Instruction *E : EHUndefs) {
+      E->replaceAllUsesWith(UndefValue::get(E->getType()));
+      E->eraseFromParent();
+    }
+    LPad->replaceAllUsesWith(UndefValue::get(LPad->getType()));
+
+    // Rewrite uses of the exception pointer to loads of an alloca.
+    for (Instruction *E : SEHCodeUses) {
+      SmallVector<Use *, 4> Uses;
+      for (Use &U : E->uses())
+        Uses.push_back(&U);
+      for (Use *U : Uses) {
+        auto *I = cast<Instruction>(U->getUser());
+        if (isa<ResumeInst>(I))
+          continue;
+        LoadInst *LI;
+        if (auto *Phi = dyn_cast<PHINode>(I))
+          LI = new LoadInst(SEHExceptionCodeSlot, "sehcode", false,
+                            Phi->getIncomingBlock(*U));
+        else
+          LI = new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I);
+        U->set(LI);
+      }
+      E->replaceAllUsesWith(UndefValue::get(E->getType()));
+      E->eraseFromParent();
+    }
+
+    // Add a call to describe the actions for this landing pad.
+    std::vector<Value *> ActionArgs;
+    for (ActionHandler *Action : Actions) {
+      // Action codes from docs are: 0 cleanup, 1 catch.
+      if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+        ActionArgs.push_back(ConstantInt::get(Int32Type, 1));
+        ActionArgs.push_back(CatchAction->getSelector());
+        // Find the frame escape index of the exception object alloca in the
+        // parent.
+        int FrameEscapeIdx = -1;
+        Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar());
+        if (EHObj && !isa<ConstantPointerNull>(EHObj)) {
+          auto I = FrameVarInfo.find(EHObj);
+          assert(I != FrameVarInfo.end() &&
+                 "failed to map llvm.eh.begincatch var");
+          FrameEscapeIdx = std::distance(FrameVarInfo.begin(), I);
+        }
+        ActionArgs.push_back(ConstantInt::get(Int32Type, FrameEscapeIdx));
+      } else {
+        ActionArgs.push_back(ConstantInt::get(Int32Type, 0));
+      }
+      ActionArgs.push_back(Action->getHandlerBlockOrFunc());
+    }
+    CallInst *Recover =
+        CallInst::Create(ActionIntrin, ActionArgs, "recover", LPadBB);
+
+    SetVector<BasicBlock *> ReturnTargets;
+    for (ActionHandler *Action : Actions) {
+      if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+        const auto &CatchTargets = CatchAction->getReturnTargets();
+        ReturnTargets.insert(CatchTargets.begin(), CatchTargets.end());
+      }
+    }
+    IndirectBrInst *Branch =
+        IndirectBrInst::Create(Recover, ReturnTargets.size(), LPadBB);
+    for (BasicBlock *Target : ReturnTargets)
+      Branch->addDestination(Target);
+
+    if (!isAsynchronousEHPersonality(Personality)) {
+      // C++ EH must repopulate the targets later to handle the case of
+      // targets that are reached indirectly through nested landing pads.
+      LPadImpls.push_back(std::make_pair(Recover, Branch));
+    }
+
+  } // End for each landingpad
+
+  // If nothing got outlined, there is no more processing to be done.
+  if (!HandlersOutlined)
+    return false;
+
+  // Replace any nested landing pad stubs with the correct action handler.
+  // This must be done before we remove unreachable blocks because it
+  // cleans up references to outlined blocks that will be deleted.
+  for (auto &LPadPair : NestedLPtoOriginalLP)
+    completeNestedLandingPad(&F, LPadPair.first, LPadPair.second, FrameVarInfo);
+  NestedLPtoOriginalLP.clear();
+
+  // Update the indirectbr instructions' target lists if necessary.
+  SetVector<BasicBlock*> CheckedTargets;
+  SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+  for (auto &LPadImplPair : LPadImpls) {
+    IntrinsicInst *Recover = cast<IntrinsicInst>(LPadImplPair.first);
+    IndirectBrInst *Branch = LPadImplPair.second;
+
+    // Get a list of handlers called by 
+    parseEHActions(Recover, ActionList);
+
+    // Add an indirect branch listing possible successors of the catch handlers.
+    SetVector<BasicBlock *> ReturnTargets;
+    for (const auto &Action : ActionList) {
+      if (auto *CA = dyn_cast<CatchHandler>(Action.get())) {
+        Function *Handler = cast<Function>(CA->getHandlerBlockOrFunc());
+        getPossibleReturnTargets(&F, Handler, ReturnTargets);
+      }
+    }
+    ActionList.clear();
+    // Clear any targets we already knew about.
+    for (unsigned int I = 0, E = Branch->getNumDestinations(); I < E; ++I) {
+      BasicBlock *KnownTarget = Branch->getDestination(I);
+      if (ReturnTargets.count(KnownTarget))
+        ReturnTargets.remove(KnownTarget);
+    }
+    for (BasicBlock *Target : ReturnTargets) {
+      Branch->addDestination(Target);
+      // The target may be a block that we excepted to get pruned.
+      // If it is, it may contain a call to llvm.eh.endcatch.
+      if (CheckedTargets.insert(Target)) {
+        // Earlier preparations guarantee that all calls to llvm.eh.endcatch
+        // will be followed by an unconditional branch.
+        auto *Br = dyn_cast<BranchInst>(Target->getTerminator());
+        if (Br && Br->isUnconditional() &&
+            Br != Target->getFirstNonPHIOrDbgOrLifetime()) {
+          Instruction *Prev = Br->getPrevNode();
+          if (match(cast<Value>(Prev), m_Intrinsic<Intrinsic::eh_endcatch>()))
+            Prev->eraseFromParent();
+        }
+      }
+    }
+  }
+  LPadImpls.clear();
+
+  F.addFnAttr("wineh-parent", F.getName());
+
+  // Delete any blocks that were only used by handlers that were outlined above.
+  removeUnreachableBlocks(F);
+
+  BasicBlock *Entry = &F.getEntryBlock();
+  IRBuilder<> Builder(F.getParent()->getContext());
+  Builder.SetInsertPoint(Entry->getFirstInsertionPt());
+
+  Function *FrameEscapeFn =
+      Intrinsic::getDeclaration(M, Intrinsic::frameescape);
+  Function *RecoverFrameFn =
+      Intrinsic::getDeclaration(M, Intrinsic::framerecover);
+  SmallVector<Value *, 8> AllocasToEscape;
+
+  // Scan the entry block for an existing call to llvm.frameescape. We need to
+  // keep escaping those objects.
+  for (Instruction &I : F.front()) {
+    auto *II = dyn_cast<IntrinsicInst>(&I);
+    if (II && II->getIntrinsicID() == Intrinsic::frameescape) {
+      auto Args = II->arg_operands();
+      AllocasToEscape.append(Args.begin(), Args.end());
+      II->eraseFromParent();
+      break;
+    }
+  }
+
+  // Finally, replace all of the temporary allocas for frame variables used in
+  // the outlined handlers with calls to llvm.framerecover.
+  for (auto &VarInfoEntry : FrameVarInfo) {
+    Value *ParentVal = VarInfoEntry.first;
+    TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second;
+    AllocaInst *ParentAlloca = cast<AllocaInst>(ParentVal);
+
+    // FIXME: We should try to sink unescaped allocas from the parent frame into
+    // the child frame. If the alloca is escaped, we have to use the lifetime
+    // markers to ensure that the alloca is only live within the child frame.
+
+    // Add this alloca to the list of things to escape.
+    AllocasToEscape.push_back(ParentAlloca);
+
+    // Next replace all outlined allocas that are mapped to it.
+    for (AllocaInst *TempAlloca : Allocas) {
+      if (TempAlloca == getCatchObjectSentinel())
+        continue; // Skip catch parameter sentinels.
+      Function *HandlerFn = TempAlloca->getParent()->getParent();
+      llvm::Value *FP = HandlerToParentFP[HandlerFn];
+      assert(FP);
+
+      // FIXME: Sink this framerecover into the blocks where it is used.
+      Builder.SetInsertPoint(TempAlloca);
+      Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc());
+      Value *RecoverArgs[] = {
+          Builder.CreateBitCast(&F, Int8PtrType, ""), FP,
+          llvm::ConstantInt::get(Int32Type, AllocasToEscape.size() - 1)};
+      Instruction *RecoveredAlloca =
+          Builder.CreateCall(RecoverFrameFn, RecoverArgs);
+
+      // Add a pointer bitcast if the alloca wasn't an i8.
+      if (RecoveredAlloca->getType() != TempAlloca->getType()) {
+        RecoveredAlloca->setName(Twine(TempAlloca->getName()) + ".i8");
+        RecoveredAlloca = cast<Instruction>(
+            Builder.CreateBitCast(RecoveredAlloca, TempAlloca->getType()));
+      }
+      TempAlloca->replaceAllUsesWith(RecoveredAlloca);
+      TempAlloca->removeFromParent();
+      RecoveredAlloca->takeName(TempAlloca);
+      delete TempAlloca;
+    }
+  } // End for each FrameVarInfo entry.
+
+  // Insert 'call void (...)* @llvm.frameescape(...)' at the end of the entry
+  // block.
+  Builder.SetInsertPoint(&F.getEntryBlock().back());
+  Builder.CreateCall(FrameEscapeFn, AllocasToEscape);
+
+  if (SEHExceptionCodeSlot) {
+    if (SEHExceptionCodeSlot->hasNUses(0))
+      SEHExceptionCodeSlot->eraseFromParent();
+    else if (isAllocaPromotable(SEHExceptionCodeSlot))
+      PromoteMemToReg(SEHExceptionCodeSlot, *DT);
+  }
+
+  // Clean up the handler action maps we created for this function
+  DeleteContainerSeconds(CatchHandlerMap);
+  CatchHandlerMap.clear();
+  DeleteContainerSeconds(CleanupHandlerMap);
+  CleanupHandlerMap.clear();
+  HandlerToParentFP.clear();
+  DT = nullptr;
+  SEHExceptionCodeSlot = nullptr;
+  EHBlocks.clear();
+  NormalBlocks.clear();
+  EHReturnBlocks.clear();
+
+  return HandlersOutlined;
+}
+
+void WinEHPrepare::promoteLandingPadValues(LandingPadInst *LPad) {
+  // If the return values of the landing pad instruction are extracted and
+  // stored to memory, we want to promote the store locations to reg values.
+  SmallVector<AllocaInst *, 2> EHAllocas;
+
+  // The landingpad instruction returns an aggregate value.  Typically, its
+  // value will be passed to a pair of extract value instructions and the
+  // results of those extracts are often passed to store instructions.
+  // In unoptimized code the stored value will often be loaded and then stored
+  // again.
+  for (auto *U : LPad->users()) {
+    ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
+    if (!Extract)
+      continue;
+
+    for (auto *EU : Extract->users()) {
+      if (auto *Store = dyn_cast<StoreInst>(EU)) {
+        auto *AV = cast<AllocaInst>(Store->getPointerOperand());
+        EHAllocas.push_back(AV);
+      }
+    }
+  }
+
+  // We can't do this without a dominator tree.
+  assert(DT);
+
+  if (!EHAllocas.empty()) {
+    PromoteMemToReg(EHAllocas, *DT);
+    EHAllocas.clear();
+  }
+
+  // After promotion, some extracts may be trivially dead. Remove them.
+  SmallVector<Value *, 4> Users(LPad->user_begin(), LPad->user_end());
+  for (auto *U : Users)
+    RecursivelyDeleteTriviallyDeadInstructions(U);
+}
+
+void WinEHPrepare::getPossibleReturnTargets(Function *ParentF,
+                                            Function *HandlerF,
+                                            SetVector<BasicBlock*> &Targets) {
+  for (BasicBlock &BB : *HandlerF) {
+    // If the handler contains landing pads, check for any
+    // handlers that may return directly to a block in the
+    // parent function.
+    if (auto *LPI = BB.getLandingPadInst()) {
+      IntrinsicInst *Recover = cast<IntrinsicInst>(LPI->getNextNode());
+      SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+      parseEHActions(Recover, ActionList);
+      for (const auto &Action : ActionList) {
+        if (auto *CH = dyn_cast<CatchHandler>(Action.get())) {
+          Function *NestedF = cast<Function>(CH->getHandlerBlockOrFunc());
+          getPossibleReturnTargets(ParentF, NestedF, Targets);
+        }
+      }
+    }
+
+    auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
+    if (!Ret)
+      continue;
+
+    // Handler functions must always return a block address.
+    BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue());
+
+    // If this is the handler for a nested landing pad, the
+    // return address may have been remapped to a block in the
+    // parent handler.  We're not interested in those.
+    if (BA->getFunction() != ParentF)
+      continue;
+
+    Targets.insert(BA->getBasicBlock());
+  }
+}
+
+void WinEHPrepare::completeNestedLandingPad(Function *ParentFn,
+                                            LandingPadInst *OutlinedLPad,
+                                            const LandingPadInst *OriginalLPad,
+                                            FrameVarInfoMap &FrameVarInfo) {
+  // Get the nested block and erase the unreachable instruction that was
+  // temporarily inserted as its terminator.
+  LLVMContext &Context = ParentFn->getContext();
+  BasicBlock *OutlinedBB = OutlinedLPad->getParent();
+  // If the nested landing pad was outlined before the landing pad that enclosed
+  // it, it will already be in outlined form.  In that case, we just need to see
+  // if the returns and the enclosing branch instruction need to be updated.
+  IndirectBrInst *Branch =
+      dyn_cast<IndirectBrInst>(OutlinedBB->getTerminator());
+  if (!Branch) {
+    // If the landing pad wasn't in outlined form, it should be a stub with
+    // an unreachable terminator.
+    assert(isa<UnreachableInst>(OutlinedBB->getTerminator()));
+    OutlinedBB->getTerminator()->eraseFromParent();
+    // That should leave OutlinedLPad as the last instruction in its block.
+    assert(&OutlinedBB->back() == OutlinedLPad);
+  }
+
+  // The original landing pad will have already had its action intrinsic
+  // built by the outlining loop.  We need to clone that into the outlined
+  // location.  It may also be necessary to add references to the exception
+  // variables to the outlined handler in which this landing pad is nested
+  // and remap return instructions in the nested handlers that should return
+  // to an address in the outlined handler.
+  Function *OutlinedHandlerFn = OutlinedBB->getParent();
+  BasicBlock::const_iterator II = OriginalLPad;
+  ++II;
+  // The instruction after the landing pad should now be a call to eh.actions.
+  const Instruction *Recover = II;
+  assert(match(Recover, m_Intrinsic<Intrinsic::eh_actions>()));
+  const IntrinsicInst *EHActions = cast<IntrinsicInst>(Recover);
+
+  // Remap the return target in the nested handler.
+  SmallVector<BlockAddress *, 4> ActionTargets;
+  SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+  parseEHActions(EHActions, ActionList);
+  for (const auto &Action : ActionList) {
+    auto *Catch = dyn_cast<CatchHandler>(Action.get());
+    if (!Catch)
+      continue;
+    // The dyn_cast to function here selects C++ catch handlers and skips
+    // SEH catch handlers.
+    auto *Handler = dyn_cast<Function>(Catch->getHandlerBlockOrFunc());
+    if (!Handler)
+      continue;
+    // Visit all the return instructions, looking for places that return
+    // to a location within OutlinedHandlerFn.
+    for (BasicBlock &NestedHandlerBB : *Handler) {
+      auto *Ret = dyn_cast<ReturnInst>(NestedHandlerBB.getTerminator());
+      if (!Ret)
+        continue;
+
+      // Handler functions must always return a block address.
+      BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue());
+      // The original target will have been in the main parent function,
+      // but if it is the address of a block that has been outlined, it
+      // should be a block that was outlined into OutlinedHandlerFn.
+      assert(BA->getFunction() == ParentFn);
+
+      // Ignore targets that aren't part of an outlined handler function.
+      if (!LPadTargetBlocks.count(BA->getBasicBlock()))
+        continue;
+
+      // If the return value is the address ofF a block that we
+      // previously outlined into the parent handler function, replace
+      // the return instruction and add the mapped target to the list
+      // of possible return addresses.
+      BasicBlock *MappedBB = LPadTargetBlocks[BA->getBasicBlock()];
+      assert(MappedBB->getParent() == OutlinedHandlerFn);
+      BlockAddress *NewBA = BlockAddress::get(OutlinedHandlerFn, MappedBB);
+      Ret->eraseFromParent();
+      ReturnInst::Create(Context, NewBA, &NestedHandlerBB);
+      ActionTargets.push_back(NewBA);
+    }
+  }
+  ActionList.clear();
+
+  if (Branch) {
+    // If the landing pad was already in outlined form, just update its targets.
+    for (unsigned int I = Branch->getNumDestinations(); I > 0; --I)
+      Branch->removeDestination(I);
+    // Add the previously collected action targets.
+    for (auto *Target : ActionTargets)
+      Branch->addDestination(Target->getBasicBlock());
+  } else {
+    // If the landing pad was previously stubbed out, fill in its outlined form.
+    IntrinsicInst *NewEHActions = cast<IntrinsicInst>(EHActions->clone());
+    OutlinedBB->getInstList().push_back(NewEHActions);
+
+    // Insert an indirect branch into the outlined landing pad BB.
+    IndirectBrInst *IBr = IndirectBrInst::Create(NewEHActions, 0, OutlinedBB);
+    // Add the previously collected action targets.
+    for (auto *Target : ActionTargets)
+      IBr->addDestination(Target->getBasicBlock());
+  }
+}
+
+// This function examines a block to determine whether the block ends with a
+// conditional branch to a catch handler based on a selector comparison.
+// This function is used both by the WinEHPrepare::findSelectorComparison() and
+// WinEHCleanupDirector::handleTypeIdFor().
+static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
+                               Constant *&Selector, BasicBlock *&NextBB) {
+  ICmpInst::Predicate Pred;
+  BasicBlock *TBB, *FBB;
+  Value *LHS, *RHS;
+
+  if (!match(BB->getTerminator(),
+             m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TBB, FBB)))
+    return false;
+
+  if (!match(LHS,
+             m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))) &&
+      !match(RHS, m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))))
+    return false;
+
+  if (Pred == CmpInst::ICMP_EQ) {
+    CatchHandler = TBB;
+    NextBB = FBB;
+    return true;
+  }
+
+  if (Pred == CmpInst::ICMP_NE) {
+    CatchHandler = FBB;
+    NextBB = TBB;
+    return true;
+  }
+
+  return false;
+}
+
+static bool isCatchBlock(BasicBlock *BB) {
+  for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+       II != IE; ++II) {
+    if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_begincatch>()))
+      return true;
+  }
+  return false;
+}
+
+static BasicBlock *createStubLandingPad(Function *Handler,
+                                        Value *PersonalityFn) {
+  // FIXME: Finish this!
+  LLVMContext &Context = Handler->getContext();
+  BasicBlock *StubBB = BasicBlock::Create(Context, "stub");
+  Handler->getBasicBlockList().push_back(StubBB);
+  IRBuilder<> Builder(StubBB);
+  LandingPadInst *LPad = Builder.CreateLandingPad(
+      llvm::StructType::get(Type::getInt8PtrTy(Context),
+                            Type::getInt32Ty(Context), nullptr),
+      PersonalityFn, 0);
+  // Insert a call to llvm.eh.actions so that we don't try to outline this lpad.
+  Function *ActionIntrin =
+      Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::eh_actions);
+  Builder.CreateCall(ActionIntrin, {}, "recover");
+  LPad->setCleanup(true);
+  Builder.CreateUnreachable();
+  return StubBB;
+}
+
+// Cycles through the blocks in an outlined handler function looking for an
+// invoke instruction and inserts an invoke of llvm.donothing with an empty
+// landing pad if none is found.  The code that generates the .xdata tables for
+// the handler needs at least one landing pad to identify the parent function's
+// personality.
+void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler,
+                                                  Value *PersonalityFn) {
+  ReturnInst *Ret = nullptr;
+  UnreachableInst *Unreached = nullptr;
+  for (BasicBlock &BB : *Handler) {
+    TerminatorInst *Terminator = BB.getTerminator();
+    // If we find an invoke, there is nothing to be done.
+    auto *II = dyn_cast<InvokeInst>(Terminator);
+    if (II)
+      return;
+    // If we've already recorded a return instruction, keep looking for invokes.
+    if (!Ret)
+      Ret = dyn_cast<ReturnInst>(Terminator);
+    // If we haven't recorded an unreachable instruction, try this terminator.
+    if (!Unreached)
+      Unreached = dyn_cast<UnreachableInst>(Terminator);
+  }
+
+  // If we got this far, the handler contains no invokes.  We should have seen
+  // at least one return or unreachable instruction.  We'll insert an invoke of
+  // llvm.donothing ahead of that instruction.
+  assert(Ret || Unreached);
+  TerminatorInst *Term;
+  if (Ret)
+    Term = Ret;
+  else
+    Term = Unreached;
+  BasicBlock *OldRetBB = Term->getParent();
+  BasicBlock *NewRetBB = SplitBlock(OldRetBB, Term, DT);
+  // SplitBlock adds an unconditional branch instruction at the end of the
+  // parent block.  We want to replace that with an invoke call, so we can
+  // erase it now.
+  OldRetBB->getTerminator()->eraseFromParent();
+  BasicBlock *StubLandingPad = createStubLandingPad(Handler, PersonalityFn);
+  Function *F =
+      Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::donothing);
+  InvokeInst::Create(F, NewRetBB, StubLandingPad, None, "", OldRetBB);
+}
+
+// FIXME: Consider sinking this into lib/Target/X86 somehow. TargetLowering
+// usually doesn't build LLVM IR, so that's probably the wrong place.
+Function *WinEHPrepare::createHandlerFunc(Type *RetTy, const Twine &Name,
+                                          Module *M, Value *&ParentFP) {
+  // x64 uses a two-argument prototype where the parent FP is the second
+  // argument. x86 uses no arguments, just the incoming EBP value.
+  LLVMContext &Context = M->getContext();
+  FunctionType *FnType;
+  if (TheTriple.getArch() == Triple::x86_64) {
+    Type *Int8PtrType = Type::getInt8PtrTy(Context);
+    Type *ArgTys[2] = {Int8PtrType, Int8PtrType};
+    FnType = FunctionType::get(RetTy, ArgTys, false);
+  } else {
+    FnType = FunctionType::get(RetTy, None, false);
+  }
+
+  Function *Handler =
+      Function::Create(FnType, GlobalVariable::InternalLinkage, Name, M);
+  BasicBlock *Entry = BasicBlock::Create(Context, "entry");
+  Handler->getBasicBlockList().push_front(Entry);
+  if (TheTriple.getArch() == Triple::x86_64) {
+    ParentFP = &(Handler->getArgumentList().back());
+  } else {
+    assert(M);
+    Function *FrameAddressFn =
+        Intrinsic::getDeclaration(M, Intrinsic::frameaddress);
+    Value *Args[1] = {ConstantInt::get(Type::getInt32Ty(Context), 1)};
+    ParentFP = CallInst::Create(FrameAddressFn, Args, "parent_fp",
+                                &Handler->getEntryBlock());
+  }
+  return Handler;
+}
+
+bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
+                                  LandingPadInst *LPad, BasicBlock *StartBB,
+                                  FrameVarInfoMap &VarInfo) {
+  Module *M = SrcFn->getParent();
+  LLVMContext &Context = M->getContext();
+  Type *Int8PtrType = Type::getInt8PtrTy(Context);
+
+  // Create a new function to receive the handler contents.
+  Value *ParentFP;
+  Function *Handler;
+  if (Action->getType() == Catch) {
+    Handler = createHandlerFunc(Int8PtrType, SrcFn->getName() + ".catch", M,
+                                ParentFP);
+  } else {
+    Handler = createHandlerFunc(Type::getVoidTy(Context),
+                                SrcFn->getName() + ".cleanup", M, ParentFP);
+  }
+  HandlerToParentFP[Handler] = ParentFP;
+  Handler->addFnAttr("wineh-parent", SrcFn->getName());
+  BasicBlock *Entry = &Handler->getEntryBlock();
+
+  // Generate a standard prolog to setup the frame recovery structure.
+  IRBuilder<> Builder(Context);
+  Builder.SetInsertPoint(Entry);
+  Builder.SetCurrentDebugLocation(LPad->getDebugLoc());
+
+  std::unique_ptr<WinEHCloningDirectorBase> Director;
+
+  ValueToValueMapTy VMap;
+
+  LandingPadMap &LPadMap = LPadMaps[LPad];
+  if (!LPadMap.isInitialized())
+    LPadMap.mapLandingPad(LPad);
+  if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+    Constant *Sel = CatchAction->getSelector();
+    Director.reset(new WinEHCatchDirector(Handler, ParentFP, Sel, VarInfo,
+                                          LPadMap, NestedLPtoOriginalLP, DT,
+                                          EHBlocks));
+    LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
+                          ConstantInt::get(Type::getInt32Ty(Context), 1));
+  } else {
+    Director.reset(
+        new WinEHCleanupDirector(Handler, ParentFP, VarInfo, LPadMap));
+    LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
+                          UndefValue::get(Type::getInt32Ty(Context)));
+  }
+
+  SmallVector<ReturnInst *, 8> Returns;
+  ClonedCodeInfo OutlinedFunctionInfo;
+
+  // If the start block contains PHI nodes, we need to map them.
+  BasicBlock::iterator II = StartBB->begin();
+  while (auto *PN = dyn_cast<PHINode>(II)) {
+    bool Mapped = false;
+    // Look for PHI values that we have already mapped (such as the selector).
+    for (Value *Val : PN->incoming_values()) {
+      if (VMap.count(Val)) {
+        VMap[PN] = VMap[Val];
+        Mapped = true;
+      }
+    }
+    // If we didn't find a match for this value, map it as an undef.
+    if (!Mapped) {
+      VMap[PN] = UndefValue::get(PN->getType());
+    }
+    ++II;
+  }
+
+  // The landing pad value may be used by PHI nodes.  It will ultimately be
+  // eliminated, but we need it in the map for intermediate handling.
+  VMap[LPad] = UndefValue::get(LPad->getType());
+
+  // Skip over PHIs and, if applicable, landingpad instructions.
+  II = StartBB->getFirstInsertionPt();
+
+  CloneAndPruneIntoFromInst(Handler, SrcFn, II, VMap,
+                            /*ModuleLevelChanges=*/false, Returns, "",
+                            &OutlinedFunctionInfo, Director.get());
+
+  // Move all the instructions in the cloned "entry" block into our entry block.
+  // Depending on how the parent function was laid out, the block that will
+  // correspond to the outlined entry block may not be the first block in the
+  // list.  We can recognize it, however, as the cloned block which has no
+  // predecessors.  Any other block wouldn't have been cloned if it didn't
+  // have a predecessor which was also cloned.
+  Function::iterator ClonedIt = std::next(Function::iterator(Entry));
+  while (!pred_empty(ClonedIt))
+    ++ClonedIt;
+  BasicBlock *ClonedEntryBB = ClonedIt;
+  assert(ClonedEntryBB);
+  Entry->getInstList().splice(Entry->end(), ClonedEntryBB->getInstList());
+  ClonedEntryBB->eraseFromParent();
+
+  // Make sure we can identify the handler's personality later.
+  addStubInvokeToHandlerIfNeeded(Handler, LPad->getPersonalityFn());
+
+  if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+    WinEHCatchDirector *CatchDirector =
+        reinterpret_cast<WinEHCatchDirector *>(Director.get());
+    CatchAction->setExceptionVar(CatchDirector->getExceptionVar());
+    CatchAction->setReturnTargets(CatchDirector->getReturnTargets());
+
+    // Look for blocks that are not part of the landing pad that we just
+    // outlined but terminate with a call to llvm.eh.endcatch and a
+    // branch to a block that is in the handler we just outlined.
+    // These blocks will be part of a nested landing pad that intends to
+    // return to an address in this handler.  This case is best handled
+    // after both landing pads have been outlined, so for now we'll just
+    // save the association of the blocks in LPadTargetBlocks.  The
+    // return instructions which are created from these branches will be
+    // replaced after all landing pads have been outlined.
+    for (const auto MapEntry : VMap) {
+      // VMap maps all values and blocks that were just cloned, but dead
+      // blocks which were pruned will map to nullptr.
+      if (!isa<BasicBlock>(MapEntry.first) || MapEntry.second == nullptr)
+        continue;
+      const BasicBlock *MappedBB = cast<BasicBlock>(MapEntry.first);
+      for (auto *Pred : predecessors(const_cast<BasicBlock *>(MappedBB))) {
+        auto *Branch = dyn_cast<BranchInst>(Pred->getTerminator());
+        if (!Branch || !Branch->isUnconditional() || Pred->size() <= 1)
+          continue;
+        BasicBlock::iterator II = const_cast<BranchInst *>(Branch);
+        --II;
+        if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_endcatch>())) {
+          // This would indicate that a nested landing pad wants to return
+          // to a block that is outlined into two different handlers.
+          assert(!LPadTargetBlocks.count(MappedBB));
+          LPadTargetBlocks[MappedBB] = cast<BasicBlock>(MapEntry.second);
+        }
+      }
+    }
+  } // End if (CatchAction)
+
+  Action->setHandlerBlockOrFunc(Handler);
+
+  return true;
+}
+
+/// This BB must end in a selector dispatch. All we need to do is pass the
+/// handler block to llvm.eh.actions and list it as a possible indirectbr
+/// target.
+void WinEHPrepare::processSEHCatchHandler(CatchHandler *CatchAction,
+                                          BasicBlock *StartBB) {
+  BasicBlock *HandlerBB;
+  BasicBlock *NextBB;
+  Constant *Selector;
+  bool Res = isSelectorDispatch(StartBB, HandlerBB, Selector, NextBB);
+  if (Res) {
+    // If this was EH dispatch, this must be a conditional branch to the handler
+    // block.
+    // FIXME: Handle instructions in the dispatch block. Currently we drop them,
+    // leading to crashes if some optimization hoists stuff here.
+    assert(CatchAction->getSelector() && HandlerBB &&
+           "expected catch EH dispatch");
+  } else {
+    // This must be a catch-all. Split the block after the landingpad.
+    assert(CatchAction->getSelector()->isNullValue() && "expected catch-all");
+    HandlerBB = SplitBlock(StartBB, StartBB->getFirstInsertionPt(), DT);
+  }
+  IRBuilder<> Builder(HandlerBB->getFirstInsertionPt());
+  Function *EHCodeFn = Intrinsic::getDeclaration(
+      StartBB->getParent()->getParent(), Intrinsic::eh_exceptioncode);
+  Value *Code = Builder.CreateCall(EHCodeFn, {}, "sehcode");
+  Code = Builder.CreateIntToPtr(Code, SEHExceptionCodeSlot->getAllocatedType());
+  Builder.CreateStore(Code, SEHExceptionCodeSlot);
+  CatchAction->setHandlerBlockOrFunc(BlockAddress::get(HandlerBB));
+  TinyPtrVector<BasicBlock *> Targets(HandlerBB);
+  CatchAction->setReturnTargets(Targets);
+}
+
+void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) {
+  // Each instance of this class should only ever be used to map a single
+  // landing pad.
+  assert(OriginLPad == nullptr || OriginLPad == LPad);
+
+  // If the landing pad has already been mapped, there's nothing more to do.
+  if (OriginLPad == LPad)
+    return;
+
+  OriginLPad = LPad;
+
+  // The landingpad instruction returns an aggregate value.  Typically, its
+  // value will be passed to a pair of extract value instructions and the
+  // results of those extracts will have been promoted to reg values before
+  // this routine is called.
+  for (auto *U : LPad->users()) {
+    const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
+    if (!Extract)
+      continue;
+    assert(Extract->getNumIndices() == 1 &&
+           "Unexpected operation: extracting both landing pad values");
+    unsigned int Idx = *(Extract->idx_begin());
+    assert((Idx == 0 || Idx == 1) &&
+           "Unexpected operation: extracting an unknown landing pad element");
+    if (Idx == 0) {
+      ExtractedEHPtrs.push_back(Extract);
+    } else if (Idx == 1) {
+      ExtractedSelectors.push_back(Extract);
+    }
+  }
+}
+
+bool LandingPadMap::isOriginLandingPadBlock(const BasicBlock *BB) const {
+  return BB->getLandingPadInst() == OriginLPad;
+}
+
+bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const {
+  if (Inst == OriginLPad)
+    return true;
+  for (auto *Extract : ExtractedEHPtrs) {
+    if (Inst == Extract)
+      return true;
+  }
+  for (auto *Extract : ExtractedSelectors) {
+    if (Inst == Extract)
+      return true;
+  }
+  return false;
+}
+
+void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
+                                  Value *SelectorValue) const {
+  // Remap all landing pad extract instructions to the specified values.
+  for (auto *Extract : ExtractedEHPtrs)
+    VMap[Extract] = EHPtrValue;
+  for (auto *Extract : ExtractedSelectors)
+    VMap[Extract] = SelectorValue;
+}
+
+static bool isFrameAddressCall(const Value *V) {
+  return match(const_cast<Value *>(V),
+               m_Intrinsic<Intrinsic::frameaddress>(m_SpecificInt(0)));
+}
+
+CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
+    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+  // If this is one of the boilerplate landing pad instructions, skip it.
+  // The instruction will have already been remapped in VMap.
+  if (LPadMap.isLandingPadSpecificInst(Inst))
+    return CloningDirector::SkipInstruction;
+
+  // Nested landing pads that have not already been outlined will be cloned as
+  // stubs, with just the landingpad instruction and an unreachable instruction.
+  // When all landingpads have been outlined, we'll replace this with the
+  // llvm.eh.actions call and indirect branch created when the landing pad was
+  // outlined.
+  if (auto *LPad = dyn_cast<LandingPadInst>(Inst)) {
+    return handleLandingPad(VMap, LPad, NewBB);
+  }
+
+  // Nested landing pads that have already been outlined will be cloned in their
+  // outlined form, but we need to intercept the ibr instruction to filter out
+  // targets that do not return to the handler we are outlining.
+  if (auto *IBr = dyn_cast<IndirectBrInst>(Inst)) {
+    return handleIndirectBr(VMap, IBr, NewBB);
+  }
+
+  if (auto *Invoke = dyn_cast<InvokeInst>(Inst))
+    return handleInvoke(VMap, Invoke, NewBB);
+
+  if (auto *Resume = dyn_cast<ResumeInst>(Inst))
+    return handleResume(VMap, Resume, NewBB);
+
+  if (auto *Cmp = dyn_cast<CmpInst>(Inst))
+    return handleCompare(VMap, Cmp, NewBB);
+
+  if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
+    return handleBeginCatch(VMap, Inst, NewBB);
+  if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
+    return handleEndCatch(VMap, Inst, NewBB);
+  if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
+    return handleTypeIdFor(VMap, Inst, NewBB);
+
+  // When outlining llvm.frameaddress(i32 0), remap that to the second argument,
+  // which is the FP of the parent.
+  if (isFrameAddressCall(Inst)) {
+    VMap[Inst] = ParentFP;
+    return CloningDirector::SkipInstruction;
+  }
+
+  // Continue with the default cloning behavior.
+  return CloningDirector::CloneInstruction;
+}
+
+CloningDirector::CloningAction WinEHCatchDirector::handleLandingPad(
+    ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
+  // If the instruction after the landing pad is a call to llvm.eh.actions
+  // the landing pad has already been outlined.  In this case, we should
+  // clone it because it may return to a block in the handler we are
+  // outlining now that would otherwise be unreachable.  The landing pads
+  // are sorted before outlining begins to enable this case to work
+  // properly.
+  const Instruction *NextI = LPad->getNextNode();
+  if (match(NextI, m_Intrinsic<Intrinsic::eh_actions>()))
+    return CloningDirector::CloneInstruction;
+
+  // If the landing pad hasn't been outlined yet, the landing pad we are
+  // outlining now does not dominate it and so it cannot return to a block
+  // in this handler.  In that case, we can just insert a stub landing
+  // pad now and patch it up later.
+  Instruction *NewInst = LPad->clone();
+  if (LPad->hasName())
+    NewInst->setName(LPad->getName());
+  // Save this correlation for later processing.
+  NestedLPtoOriginalLP[cast<LandingPadInst>(NewInst)] = LPad;
+  VMap[LPad] = NewInst;
+  BasicBlock::InstListType &InstList = NewBB->getInstList();
+  InstList.push_back(NewInst);
+  InstList.push_back(new UnreachableInst(NewBB->getContext()));
+  return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch(
+    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+  // The argument to the call is some form of the first element of the
+  // landingpad aggregate value, but that doesn't matter.  It isn't used
+  // here.
+  // The second argument is an outparameter where the exception object will be
+  // stored. Typically the exception object is a scalar, but it can be an
+  // aggregate when catching by value.
+  // FIXME: Leave something behind to indicate where the exception object lives
+  // for this handler. Should it be part of llvm.eh.actions?
+  assert(ExceptionObjectVar == nullptr && "Multiple calls to "
+                                          "llvm.eh.begincatch found while "
+                                          "outlining catch handler.");
+  ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts();
+  if (isa<ConstantPointerNull>(ExceptionObjectVar))
+    return CloningDirector::SkipInstruction;
+  assert(cast<AllocaInst>(ExceptionObjectVar)->isStaticAlloca() &&
+         "catch parameter is not static alloca");
+  Materializer.escapeCatchObject(ExceptionObjectVar);
+  return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap,
+                                   const Instruction *Inst, BasicBlock *NewBB) {
+  auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
+  // It might be interesting to track whether or not we are inside a catch
+  // function, but that might make the algorithm more brittle than it needs
+  // to be.
+
+  // The end catch call can occur in one of two places: either in a
+  // landingpad block that is part of the catch handlers exception mechanism,
+  // or at the end of the catch block.  However, a catch-all handler may call
+  // end catch from the original landing pad.  If the call occurs in a nested
+  // landing pad block, we must skip it and continue so that the landing pad
+  // gets cloned.
+  auto *ParentBB = IntrinCall->getParent();
+  if (ParentBB->isLandingPad() && !LPadMap.isOriginLandingPadBlock(ParentBB))
+    return CloningDirector::SkipInstruction;
+
+  // If an end catch occurs anywhere else we want to terminate the handler
+  // with a return to the code that follows the endcatch call.  If the
+  // next instruction is not an unconditional branch, we need to split the
+  // block to provide a clear target for the return instruction.
+  BasicBlock *ContinueBB;
+  auto Next = std::next(BasicBlock::const_iterator(IntrinCall));
+  const BranchInst *Branch = dyn_cast<BranchInst>(Next);
+  if (!Branch || !Branch->isUnconditional()) {
+    // We're interrupting the cloning process at this location, so the
+    // const_cast we're doing here will not cause a problem.
+    ContinueBB = SplitBlock(const_cast<BasicBlock *>(ParentBB),
+                            const_cast<Instruction *>(cast<Instruction>(Next)));
+  } else {
+    ContinueBB = Branch->getSuccessor(0);
+  }
+
+  ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueBB), NewBB);
+  ReturnTargets.push_back(ContinueBB);
+
+  // We just added a terminator to the cloned block.
+  // Tell the caller to stop processing the current basic block so that
+  // the branch instruction will be skipped.
+  return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCatchDirector::handleTypeIdFor(
+    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+  auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
+  Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
+  // This causes a replacement that will collapse the landing pad CFG based
+  // on the filter function we intend to match.
+  if (Selector == CurrentSelector)
+    VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
+  else
+    VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
+  // Tell the caller not to clone this instruction.
+  return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction WinEHCatchDirector::handleIndirectBr(
+    ValueToValueMapTy &VMap,
+    const IndirectBrInst *IBr,
+    BasicBlock *NewBB) {
+  // If this indirect branch is not part of a landing pad block, just clone it.
+  const BasicBlock *ParentBB = IBr->getParent();
+  if (!ParentBB->isLandingPad())
+    return CloningDirector::CloneInstruction;
+
+  // If it is part of a landing pad, we want to filter out target blocks
+  // that are not part of the handler we are outlining.
+  const LandingPadInst *LPad = ParentBB->getLandingPadInst();
+
+  // Save this correlation for later processing.
+  NestedLPtoOriginalLP[cast<LandingPadInst>(VMap[LPad])] = LPad;
+
+  // We should only get here for landing pads that have already been outlined.
+  assert(match(LPad->getNextNode(), m_Intrinsic<Intrinsic::eh_actions>()));
+
+  // Copy the indirectbr, but only include targets that were previously
+  // identified as EH blocks and are dominated by the nested landing pad.
+  SetVector<const BasicBlock *> ReturnTargets;
+  for (int I = 0, E = IBr->getNumDestinations(); I < E; ++I) {
+    auto *TargetBB = IBr->getDestination(I);
+    if (EHBlocks.count(const_cast<BasicBlock*>(TargetBB)) &&
+        DT->dominates(ParentBB, TargetBB)) {
+      DEBUG(dbgs() << "  Adding destination " << TargetBB->getName() << "\n");
+      ReturnTargets.insert(TargetBB);
+    }
+  }
+  IndirectBrInst *NewBranch = 
+        IndirectBrInst::Create(const_cast<Value *>(IBr->getAddress()),
+                               ReturnTargets.size(), NewBB);
+  for (auto *Target : ReturnTargets)
+    NewBranch->addDestination(const_cast<BasicBlock*>(Target));
+
+  // The operands and targets of the branch instruction are remapped later
+  // because it is a terminator.  Tell the cloning code to clone the
+  // blocks we just added to the target list.
+  return CloningDirector::CloneSuccessors;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleInvoke(ValueToValueMapTy &VMap,
+                                 const InvokeInst *Invoke, BasicBlock *NewBB) {
+  return CloningDirector::CloneInstruction;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap,
+                                 const ResumeInst *Resume, BasicBlock *NewBB) {
+  // Resume instructions shouldn't be reachable from catch handlers.
+  // We still need to handle it, but it will be pruned.
+  BasicBlock::InstListType &InstList = NewBB->getInstList();
+  InstList.push_back(new UnreachableInst(NewBB->getContext()));
+  return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleCompare(ValueToValueMapTy &VMap,
+                                  const CmpInst *Compare, BasicBlock *NewBB) {
+  const IntrinsicInst *IntrinCall = nullptr;
+  if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>())) {
+    IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(0));
+  } else if (match(Compare->getOperand(1),
+                   m_Intrinsic<Intrinsic::eh_typeid_for>())) {
+    IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(1));
+  }
+  if (IntrinCall) {
+    Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
+    // This causes a replacement that will collapse the landing pad CFG based
+    // on the filter function we intend to match.
+    if (Selector == CurrentSelector->stripPointerCasts()) {
+      VMap[Compare] = ConstantInt::get(SelectorIDType, 1);
+    } else {
+      VMap[Compare] = ConstantInt::get(SelectorIDType, 0);
+    }
+    return CloningDirector::SkipInstruction;
+  }
+  return CloningDirector::CloneInstruction;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleLandingPad(
+    ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
+  // The MS runtime will terminate the process if an exception occurs in a
+  // cleanup handler, so we shouldn't encounter landing pads in the actual
+  // cleanup code, but they may appear in catch blocks.  Depending on where
+  // we started cloning we may see one, but it will get dropped during dead
+  // block pruning.
+  Instruction *NewInst = new UnreachableInst(NewBB->getContext());
+  VMap[LPad] = NewInst;
+  BasicBlock::InstListType &InstList = NewBB->getInstList();
+  InstList.push_back(NewInst);
+  return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch(
+    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+  // Cleanup code may flow into catch blocks or the catch block may be part
+  // of a branch that will be optimized away.  We'll insert a return
+  // instruction now, but it may be pruned before the cloning process is
+  // complete.
+  ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
+  return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch(
+    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+  // Cleanup handlers nested within catch handlers may begin with a call to
+  // eh.endcatch.  We can just ignore that instruction.
+  return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor(
+    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+  // If we encounter a selector comparison while cloning a cleanup handler,
+  // we want to stop cloning immediately.  Anything after the dispatch
+  // will be outlined into a different handler.
+  BasicBlock *CatchHandler;
+  Constant *Selector;
+  BasicBlock *NextBB;
+  if (isSelectorDispatch(const_cast<BasicBlock *>(Inst->getParent()),
+                         CatchHandler, Selector, NextBB)) {
+    ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
+    return CloningDirector::StopCloningBB;
+  }
+  // If eg.typeid.for is called for any other reason, it can be ignored.
+  VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
+  return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleIndirectBr(
+    ValueToValueMapTy &VMap,
+    const IndirectBrInst *IBr,
+    BasicBlock *NewBB) {
+  // No special handling is required for cleanup cloning.
+  return CloningDirector::CloneInstruction;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke(
+    ValueToValueMapTy &VMap, const InvokeInst *Invoke, BasicBlock *NewBB) {
+  // All invokes in cleanup handlers can be replaced with calls.
+  SmallVector<Value *, 16> CallArgs(Invoke->op_begin(), Invoke->op_end() - 3);
+  // Insert a normal call instruction...
+  CallInst *NewCall =
+      CallInst::Create(const_cast<Value *>(Invoke->getCalledValue()), CallArgs,
+                       Invoke->getName(), NewBB);
+  NewCall->setCallingConv(Invoke->getCallingConv());
+  NewCall->setAttributes(Invoke->getAttributes());
+  NewCall->setDebugLoc(Invoke->getDebugLoc());
+  VMap[Invoke] = NewCall;
+
+  // Remap the operands.
+  llvm::RemapInstruction(NewCall, VMap, RF_None, nullptr, &Materializer);
+
+  // Insert an unconditional branch to the normal destination.
+  BranchInst::Create(Invoke->getNormalDest(), NewBB);
+
+  // The unwind destination won't be cloned into the new function, so
+  // we don't need to clean up its phi nodes.
+
+  // We just added a terminator to the cloned block.
+  // Tell the caller to stop processing the current basic block.
+  return CloningDirector::CloneSuccessors;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleResume(
+    ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) {
+  ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
+
+  // We just added a terminator to the cloned block.
+  // Tell the caller to stop processing the current basic block so that
+  // the branch instruction will be skipped.
+  return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction
+WinEHCleanupDirector::handleCompare(ValueToValueMapTy &VMap,
+                                    const CmpInst *Compare, BasicBlock *NewBB) {
+  if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>()) ||
+      match(Compare->getOperand(1), m_Intrinsic<Intrinsic::eh_typeid_for>())) {
+    VMap[Compare] = ConstantInt::get(SelectorIDType, 1);
+    return CloningDirector::SkipInstruction;
+  }
+  return CloningDirector::CloneInstruction;
+}
+
+WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer(
+    Function *OutlinedFn, Value *ParentFP, FrameVarInfoMap &FrameVarInfo)
+    : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) {
+  BasicBlock *EntryBB = &OutlinedFn->getEntryBlock();
+
+  // New allocas should be inserted in the entry block, but after the parent FP
+  // is established if it is an instruction.
+  Instruction *InsertPoint = EntryBB->getFirstInsertionPt();
+  if (auto *FPInst = dyn_cast<Instruction>(ParentFP))
+    InsertPoint = FPInst->getNextNode();
+  Builder.SetInsertPoint(EntryBB, InsertPoint);
+}
+
+Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
+  // If we're asked to materialize a static alloca, we temporarily create an
+  // alloca in the outlined function and add this to the FrameVarInfo map.  When
+  // all the outlining is complete, we'll replace these temporary allocas with
+  // calls to llvm.framerecover.
+  if (auto *AV = dyn_cast<AllocaInst>(V)) {
+    assert(AV->isStaticAlloca() &&
+           "cannot materialize un-demoted dynamic alloca");
+    AllocaInst *NewAlloca = dyn_cast<AllocaInst>(AV->clone());
+    Builder.Insert(NewAlloca, AV->getName());
+    FrameVarInfo[AV].push_back(NewAlloca);
+    return NewAlloca;
+  }
+
+  if (isa<Instruction>(V) || isa<Argument>(V)) {
+    Function *Parent = isa<Instruction>(V)
+                           ? cast<Instruction>(V)->getParent()->getParent()
+                           : cast<Argument>(V)->getParent();
+    errs()
+        << "Failed to demote instruction used in exception handler of function "
+        << GlobalValue::getRealLinkageName(Parent->getName()) << ":\n";
+    errs() << "  " << *V << '\n';
+    report_fatal_error("WinEHPrepare failed to demote instruction");
+  }
+
+  // Don't materialize other values.
+  return nullptr;
+}
+
+void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) {
+  // Catch parameter objects have to live in the parent frame. When we see a use
+  // of a catch parameter, add a sentinel to the multimap to indicate that it's
+  // used from another handler. This will prevent us from trying to sink the
+  // alloca into the handler and ensure that the catch parameter is present in
+  // the call to llvm.frameescape.
+  FrameVarInfo[V].push_back(getCatchObjectSentinel());
+}
+
+// This function maps the catch and cleanup handlers that are reachable from the
+// specified landing pad. The landing pad sequence will have this basic shape:
+//
+//  <cleanup handler>
+//  <selector comparison>
+//  <catch handler>
+//  <cleanup handler>
+//  <selector comparison>
+//  <catch handler>
+//  <cleanup handler>
+//  ...
+//
+// Any of the cleanup slots may be absent.  The cleanup slots may be occupied by
+// any arbitrary control flow, but all paths through the cleanup code must
+// eventually reach the next selector comparison and no path can skip to a
+// different selector comparisons, though some paths may terminate abnormally.
+// Therefore, we will use a depth first search from the start of any given
+// cleanup block and stop searching when we find the next selector comparison.
+//
+// If the landingpad instruction does not have a catch clause, we will assume
+// that any instructions other than selector comparisons and catch handlers can
+// be ignored.  In practice, these will only be the boilerplate instructions.
+//
+// The catch handlers may also have any control structure, but we are only
+// interested in the start of the catch handlers, so we don't need to actually
+// follow the flow of the catch handlers.  The start of the catch handlers can
+// be located from the compare instructions, but they can be skipped in the
+// flow by following the contrary branch.
+void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
+                                       LandingPadActions &Actions) {
+  unsigned int NumClauses = LPad->getNumClauses();
+  unsigned int HandlersFound = 0;
+  BasicBlock *BB = LPad->getParent();
+
+  DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n");
+
+  if (NumClauses == 0) {
+    findCleanupHandlers(Actions, BB, nullptr);
+    return;
+  }
+
+  VisitedBlockSet VisitedBlocks;
+
+  while (HandlersFound != NumClauses) {
+    BasicBlock *NextBB = nullptr;
+
+    // Skip over filter clauses.
+    if (LPad->isFilter(HandlersFound)) {
+      ++HandlersFound;
+      continue;
+    }
+
+    // See if the clause we're looking for is a catch-all.
+    // If so, the catch begins immediately.
+    Constant *ExpectedSelector =
+        LPad->getClause(HandlersFound)->stripPointerCasts();
+    if (isa<ConstantPointerNull>(ExpectedSelector)) {
+      // The catch all must occur last.
+      assert(HandlersFound == NumClauses - 1);
+
+      // There can be additional selector dispatches in the call chain that we
+      // need to ignore.
+      BasicBlock *CatchBlock = nullptr;
+      Constant *Selector;
+      while (BB && isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) {
+        DEBUG(dbgs() << "  Found extra catch dispatch in block "
+                     << CatchBlock->getName() << "\n");
+        BB = NextBB;
+      }
+
+      // Add the catch handler to the action list.
+      CatchHandler *Action = nullptr;
+      if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) {
+        // If the CatchHandlerMap already has an entry for this BB, re-use it.
+        Action = CatchHandlerMap[BB];
+        assert(Action->getSelector() == ExpectedSelector);
+      } else {
+        // We don't expect a selector dispatch, but there may be a call to
+        // llvm.eh.begincatch, which separates catch handling code from
+        // cleanup code in the same control flow.  This call looks for the
+        // begincatch intrinsic.
+        Action = findCatchHandler(BB, NextBB, VisitedBlocks);
+        if (Action) {
+          // For C++ EH, check if there is any interesting cleanup code before
+          // we begin the catch. This is important because cleanups cannot
+          // rethrow exceptions but code called from catches can. For SEH, it
+          // isn't important if some finally code before a catch-all is executed
+          // out of line or after recovering from the exception.
+          if (Personality == EHPersonality::MSVC_CXX)
+            findCleanupHandlers(Actions, BB, BB);
+        } else {
+          // If an action was not found, it means that the control flows
+          // directly into the catch-all handler and there is no cleanup code.
+          // That's an expected situation and we must create a catch action.
+          // Since this is a catch-all handler, the selector won't actually
+          // appear in the code anywhere.  ExpectedSelector here is the constant
+          // null ptr that we got from the landing pad instruction.
+          Action = new CatchHandler(BB, ExpectedSelector, nullptr);
+          CatchHandlerMap[BB] = Action;
+        }
+      }
+      Actions.insertCatchHandler(Action);
+      DEBUG(dbgs() << "  Catch all handler at block " << BB->getName() << "\n");
+      ++HandlersFound;
+
+      // Once we reach a catch-all, don't expect to hit a resume instruction.
+      BB = nullptr;
+      break;
+    }
+
+    CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks);
+    assert(CatchAction);
+
+    // See if there is any interesting code executed before the dispatch.
+    findCleanupHandlers(Actions, BB, CatchAction->getStartBlock());
+
+    // When the source program contains multiple nested try blocks the catch
+    // handlers can get strung together in such a way that we can encounter
+    // a dispatch for a selector that we've already had a handler for.
+    if (CatchAction->getSelector()->stripPointerCasts() == ExpectedSelector) {
+      ++HandlersFound;
+
+      // Add the catch handler to the action list.
+      DEBUG(dbgs() << "  Found catch dispatch in block "
+                   << CatchAction->getStartBlock()->getName() << "\n");
+      Actions.insertCatchHandler(CatchAction);
+    } else {
+      // Under some circumstances optimized IR will flow unconditionally into a
+      // handler block without checking the selector.  This can only happen if
+      // the landing pad has a catch-all handler and the handler for the
+      // preceeding catch clause is identical to the catch-call handler
+      // (typically an empty catch).  In this case, the handler must be shared
+      // by all remaining clauses.
+      if (isa<ConstantPointerNull>(
+              CatchAction->getSelector()->stripPointerCasts())) {
+        DEBUG(dbgs() << "  Applying early catch-all handler in block "
+                     << CatchAction->getStartBlock()->getName()
+                     << "  to all remaining clauses.\n");
+        Actions.insertCatchHandler(CatchAction);
+        return;
+      }
+
+      DEBUG(dbgs() << "  Found extra catch dispatch in block "
+                   << CatchAction->getStartBlock()->getName() << "\n");
+    }
+
+    // Move on to the block after the catch handler.
+    BB = NextBB;
+  }
+
+  // If we didn't wind up in a catch-all, see if there is any interesting code
+  // executed before the resume.
+  findCleanupHandlers(Actions, BB, BB);
+
+  // It's possible that some optimization moved code into a landingpad that
+  // wasn't
+  // previously being used for cleanup.  If that happens, we need to execute
+  // that
+  // extra code from a cleanup handler.
+  if (Actions.includesCleanup() && !LPad->isCleanup())
+    LPad->setCleanup(true);
+}
+
+// This function searches starting with the input block for the next
+// block that terminates with a branch whose condition is based on a selector
+// comparison.  This may be the input block.  See the mapLandingPadBlocks
+// comments for a discussion of control flow assumptions.
+//
+CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB,
+                                             BasicBlock *&NextBB,
+                                             VisitedBlockSet &VisitedBlocks) {
+  // See if we've already found a catch handler use it.
+  // Call count() first to avoid creating a null entry for blocks
+  // we haven't seen before.
+  if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) {
+    CatchHandler *Action = cast<CatchHandler>(CatchHandlerMap[BB]);
+    NextBB = Action->getNextBB();
+    return Action;
+  }
+
+  // VisitedBlocks applies only to the current search.  We still
+  // need to consider blocks that we've visited while mapping other
+  // landing pads.
+  VisitedBlocks.insert(BB);
+
+  BasicBlock *CatchBlock = nullptr;
+  Constant *Selector = nullptr;
+
+  // If this is the first time we've visited this block from any landing pad
+  // look to see if it is a selector dispatch block.
+  if (!CatchHandlerMap.count(BB)) {
+    if (isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) {
+      CatchHandler *Action = new CatchHandler(BB, Selector, NextBB);
+      CatchHandlerMap[BB] = Action;
+      return Action;
+    }
+    // If we encounter a block containing an llvm.eh.begincatch before we
+    // find a selector dispatch block, the handler is assumed to be
+    // reached unconditionally.  This happens for catch-all blocks, but
+    // it can also happen for other catch handlers that have been combined
+    // with the catch-all handler during optimization.
+    if (isCatchBlock(BB)) {
+      PointerType *Int8PtrTy = Type::getInt8PtrTy(BB->getContext());
+      Constant *NullSelector = ConstantPointerNull::get(Int8PtrTy);
+      CatchHandler *Action = new CatchHandler(BB, NullSelector, nullptr);
+      CatchHandlerMap[BB] = Action;
+      return Action;
+    }
+  }
+
+  // Visit each successor, looking for the dispatch.
+  // FIXME: We expect to find the dispatch quickly, so this will probably
+  //        work better as a breadth first search.
+  for (BasicBlock *Succ : successors(BB)) {
+    if (VisitedBlocks.count(Succ))
+      continue;
+
+    CatchHandler *Action = findCatchHandler(Succ, NextBB, VisitedBlocks);
+    if (Action)
+      return Action;
+  }
+  return nullptr;
+}
+
+// These are helper functions to combine repeated code from findCleanupHandlers.
+static void createCleanupHandler(LandingPadActions &Actions,
+                                 CleanupHandlerMapTy &CleanupHandlerMap,
+                                 BasicBlock *BB) {
+  CleanupHandler *Action = new CleanupHandler(BB);
+  CleanupHandlerMap[BB] = Action;
+  Actions.insertCleanupHandler(Action);
+  DEBUG(dbgs() << "  Found cleanup code in block "
+               << Action->getStartBlock()->getName() << "\n");
+}
+
+static CallSite matchOutlinedFinallyCall(BasicBlock *BB,
+                                         Instruction *MaybeCall) {
+  // Look for finally blocks that Clang has already outlined for us.
+  //   %fp = call i8* @llvm.frameaddress(i32 0)
+  //   call void @"fin$parent"(iN 1, i8* %fp)
+  if (isFrameAddressCall(MaybeCall) && MaybeCall != BB->getTerminator())
+    MaybeCall = MaybeCall->getNextNode();
+  CallSite FinallyCall(MaybeCall);
+  if (!FinallyCall || FinallyCall.arg_size() != 2)
+    return CallSite();
+  if (!match(FinallyCall.getArgument(0), m_SpecificInt(1)))
+    return CallSite();
+  if (!isFrameAddressCall(FinallyCall.getArgument(1)))
+    return CallSite();
+  return FinallyCall;
+}
+
+static BasicBlock *followSingleUnconditionalBranches(BasicBlock *BB) {
+  // Skip single ubr blocks.
+  while (BB->getFirstNonPHIOrDbg() == BB->getTerminator()) {
+    auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
+    if (Br && Br->isUnconditional())
+      BB = Br->getSuccessor(0);
+    else
+      return BB;
+  }
+  return BB;
+}
+
+// This function searches starting with the input block for the next block that
+// contains code that is not part of a catch handler and would not be eliminated
+// during handler outlining.
+//
+void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions,
+                                       BasicBlock *StartBB, BasicBlock *EndBB) {
+  // Here we will skip over the following:
+  //
+  // landing pad prolog:
+  //
+  // Unconditional branches
+  //
+  // Selector dispatch
+  //
+  // Resume pattern
+  //
+  // Anything else marks the start of an interesting block
+
+  BasicBlock *BB = StartBB;
+  // Anything other than an unconditional branch will kick us out of this loop
+  // one way or another.
+  while (BB) {
+    BB = followSingleUnconditionalBranches(BB);
+    // If we've already scanned this block, don't scan it again.  If it is
+    // a cleanup block, there will be an action in the CleanupHandlerMap.
+    // If we've scanned it and it is not a cleanup block, there will be a
+    // nullptr in the CleanupHandlerMap.  If we have not scanned it, there will
+    // be no entry in the CleanupHandlerMap.  We must call count() first to
+    // avoid creating a null entry for blocks we haven't scanned.
+    if (CleanupHandlerMap.count(BB)) {
+      if (auto *Action = CleanupHandlerMap[BB]) {
+        Actions.insertCleanupHandler(Action);
+        DEBUG(dbgs() << "  Found cleanup code in block "
+                     << Action->getStartBlock()->getName() << "\n");
+        // FIXME: This cleanup might chain into another, and we need to discover
+        // that.
+        return;
+      } else {
+        // Here we handle the case where the cleanup handler map contains a
+        // value for this block but the value is a nullptr.  This means that
+        // we have previously analyzed the block and determined that it did
+        // not contain any cleanup code.  Based on the earlier analysis, we
+        // know the the block must end in either an unconditional branch, a
+        // resume or a conditional branch that is predicated on a comparison
+        // with a selector.  Either the resume or the selector dispatch
+        // would terminate the search for cleanup code, so the unconditional
+        // branch is the only case for which we might need to continue
+        // searching.
+        BasicBlock *SuccBB = followSingleUnconditionalBranches(BB);
+        if (SuccBB == BB || SuccBB == EndBB)
+          return;
+        BB = SuccBB;
+        continue;
+      }
+    }
+
+    // Create an entry in the cleanup handler map for this block.  Initially
+    // we create an entry that says this isn't a cleanup block.  If we find
+    // cleanup code, the caller will replace this entry.
+    CleanupHandlerMap[BB] = nullptr;
+
+    TerminatorInst *Terminator = BB->getTerminator();
+
+    // Landing pad blocks have extra instructions we need to accept.
+    LandingPadMap *LPadMap = nullptr;
+    if (BB->isLandingPad()) {
+      LandingPadInst *LPad = BB->getLandingPadInst();
+      LPadMap = &LPadMaps[LPad];
+      if (!LPadMap->isInitialized())
+        LPadMap->mapLandingPad(LPad);
+    }
+
+    // Look for the bare resume pattern:
+    //   %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn, 0
+    //   %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel, 1
+    //   resume { i8*, i32 } %lpad.val2
+    if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) {
+      InsertValueInst *Insert1 = nullptr;
+      InsertValueInst *Insert2 = nullptr;
+      Value *ResumeVal = Resume->getOperand(0);
+      // If the resume value isn't a phi or landingpad value, it should be a
+      // series of insertions. Identify them so we can avoid them when scanning
+      // for cleanups.
+      if (!isa<PHINode>(ResumeVal) && !isa<LandingPadInst>(ResumeVal)) {
+        Insert2 = dyn_cast<InsertValueInst>(ResumeVal);
+        if (!Insert2)
+          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+        Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand());
+        if (!Insert1)
+          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+      }
+      for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+           II != IE; ++II) {
+        Instruction *Inst = II;
+        if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+          continue;
+        if (Inst == Insert1 || Inst == Insert2 || Inst == Resume)
+          continue;
+        if (!Inst->hasOneUse() ||
+            (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) {
+          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+        }
+      }
+      return;
+    }
+
+    BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
+    if (Branch && Branch->isConditional()) {
+      // Look for the selector dispatch.
+      //   %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*))
+      //   %matches = icmp eq i32 %sel, %2
+      //   br i1 %matches, label %catch14, label %eh.resume
+      CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition());
+      if (!Compare || !Compare->isEquality())
+        return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+      for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+           II != IE; ++II) {
+        Instruction *Inst = II;
+        if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+          continue;
+        if (Inst == Compare || Inst == Branch)
+          continue;
+        if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
+          continue;
+        return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+      }
+      // The selector dispatch block should always terminate our search.
+      assert(BB == EndBB);
+      return;
+    }
+
+    if (isAsynchronousEHPersonality(Personality)) {
+      // If this is a landingpad block, split the block at the first non-landing
+      // pad instruction.
+      Instruction *MaybeCall = BB->getFirstNonPHIOrDbg();
+      if (LPadMap) {
+        while (MaybeCall != BB->getTerminator() &&
+               LPadMap->isLandingPadSpecificInst(MaybeCall))
+          MaybeCall = MaybeCall->getNextNode();
+      }
+
+      // Look for outlined finally calls.
+      if (CallSite FinallyCall = matchOutlinedFinallyCall(BB, MaybeCall)) {
+        Function *Fin = FinallyCall.getCalledFunction();
+        assert(Fin && "outlined finally call should be direct");
+        auto *Action = new CleanupHandler(BB);
+        Action->setHandlerBlockOrFunc(Fin);
+        Actions.insertCleanupHandler(Action);
+        CleanupHandlerMap[BB] = Action;
+        DEBUG(dbgs() << "  Found frontend-outlined finally call to "
+                     << Fin->getName() << " in block "
+                     << Action->getStartBlock()->getName() << "\n");
+
+        // Split the block if there were more interesting instructions and look
+        // for finally calls in the normal successor block.
+        BasicBlock *SuccBB = BB;
+        if (FinallyCall.getInstruction() != BB->getTerminator() &&
+            FinallyCall.getInstruction()->getNextNode() !=
+                BB->getTerminator()) {
+          SuccBB =
+              SplitBlock(BB, FinallyCall.getInstruction()->getNextNode(), DT);
+        } else {
+          if (FinallyCall.isInvoke()) {
+            SuccBB =
+                cast<InvokeInst>(FinallyCall.getInstruction())->getNormalDest();
+          } else {
+            SuccBB = BB->getUniqueSuccessor();
+            assert(SuccBB &&
+                   "splitOutlinedFinallyCalls didn't insert a branch");
+          }
+        }
+        BB = SuccBB;
+        if (BB == EndBB)
+          return;
+        continue;
+      }
+    }
+
+    // Anything else is either a catch block or interesting cleanup code.
+    for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+         II != IE; ++II) {
+      Instruction *Inst = II;
+      if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+        continue;
+      // Unconditional branches fall through to this loop.
+      if (Inst == Branch)
+        continue;
+      // If this is a catch block, there is no cleanup code to be found.
+      if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
+        return;
+      // If this a nested landing pad, it may contain an endcatch call.
+      if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
+        return;
+      // Anything else makes this interesting cleanup code.
+      return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+    }
+
+    // Only unconditional branches in empty blocks should get this far.
+    assert(Branch && Branch->isUnconditional());
+    if (BB == EndBB)
+      return;
+    BB = Branch->getSuccessor(0);
+  }
+}
+
+// This is a public function, declared in WinEHFuncInfo.h and is also
+// referenced by WinEHNumbering in FunctionLoweringInfo.cpp.
+void llvm::parseEHActions(
+    const IntrinsicInst *II,
+    SmallVectorImpl<std::unique_ptr<ActionHandler>> &Actions) {
+  for (unsigned I = 0, E = II->getNumArgOperands(); I != E;) {
+    uint64_t ActionKind =
+        cast<ConstantInt>(II->getArgOperand(I))->getZExtValue();
+    if (ActionKind == /*catch=*/1) {
+      auto *Selector = cast<Constant>(II->getArgOperand(I + 1));
+      ConstantInt *EHObjIndex = cast<ConstantInt>(II->getArgOperand(I + 2));
+      int64_t EHObjIndexVal = EHObjIndex->getSExtValue();
+      Constant *Handler = cast<Constant>(II->getArgOperand(I + 3));
+      I += 4;
+      auto CH = make_unique<CatchHandler>(/*BB=*/nullptr, Selector,
+                                          /*NextBB=*/nullptr);
+      CH->setHandlerBlockOrFunc(Handler);
+      CH->setExceptionVarIndex(EHObjIndexVal);
+      Actions.push_back(std::move(CH));
+    } else if (ActionKind == 0) {
+      Constant *Handler = cast<Constant>(II->getArgOperand(I + 1));
+      I += 2;
+      auto CH = make_unique<CleanupHandler>(/*BB=*/nullptr);
+      CH->setHandlerBlockOrFunc(Handler);
+      Actions.push_back(std::move(CH));
+    } else {
+      llvm_unreachable("Expected either a catch or cleanup handler!");
+    }
+  }
+  std::reverse(Actions.begin(), Actions.end());
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index 2c9eff6..9314c9e 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARFAbbreviationDeclaration.h"
+#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/DebugInfo/DWARFAcceleratorTable.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index cbdde6a..8ae0543 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFAcceleratorTable.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARFAcceleratorTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
index 3ec2cf5..39a7c77 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARFCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -22,9 +22,10 @@ void DWARFCompileUnit::dump(raw_ostream &OS) {
      << " (next unit at " << format("0x%08x", getNextUnitOffset())
      << ")\n";
 
-  const DWARFDebugInfoEntryMinimal *CU = getCompileUnitDIE(false);
-  assert(CU && "Null Compile Unit?");
-  CU->dump(OS, this, -1U);
+  if (const DWARFDebugInfoEntryMinimal *CU = getUnitDIE(false))
+    CU->dump(OS, this, -1U);
+  else
+    OS << "<compile unit can't be parsed!>\n\n";
 }
 
 // VTable anchor.
diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 3844cde..cc2fbbd 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/DebugInfo/DWARFAcceleratorTable.h"
-#include "llvm/DebugInfo/DWARFDebugArangeSet.h"
+#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
@@ -140,9 +140,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
     OS << "\n.debug_line contents:\n";
     for (const auto &CU : compile_units()) {
       savedAddressByteSize = CU->getAddressByteSize();
-      unsigned stmtOffset =
-          CU->getCompileUnitDIE()->getAttributeValueAsSectionOffset(
-              CU.get(), DW_AT_stmt_list, -1U);
+      const auto *CUDIE = CU->getUnitDIE();
+      if (CUDIE == nullptr)
+        continue;
+      unsigned stmtOffset = CUDIE->getAttributeValueAsSectionOffset(
+          CU.get(), DW_AT_stmt_list, -1U);
       if (stmtOffset != -1U) {
         DataExtractor lineData(getLineSection().Data, isLittleEndian(),
                                savedAddressByteSize);
@@ -321,13 +323,14 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() {
 }
 
 const DWARFLineTable *
-DWARFContext::getLineTableForUnit(DWARFUnit *cu) {
+DWARFContext::getLineTableForUnit(DWARFUnit *U) {
   if (!Line)
     Line.reset(new DWARFDebugLine(&getLineSection().Relocs));
-
+  const auto *UnitDIE = U->getUnitDIE();
+  if (UnitDIE == nullptr)
+    return nullptr;
   unsigned stmtOffset =
-      cu->getCompileUnitDIE()->getAttributeValueAsSectionOffset(
-          cu, DW_AT_stmt_list, -1U);
+      UnitDIE->getAttributeValueAsSectionOffset(U, DW_AT_stmt_list, -1U);
   if (stmtOffset == -1U)
     return nullptr; // No line table for this compile unit.
 
@@ -337,7 +340,7 @@ DWARFContext::getLineTableForUnit(DWARFUnit *cu) {
 
   // We have to parse it first.
   DataExtractor lineData(getLineSection().Data, isLittleEndian(),
-                         cu->getAddressByteSize());
+                         U->getAddressByteSize());
   return Line->getOrParseLineTable(lineData, stmtOffset);
 }
 
@@ -537,7 +540,8 @@ static bool consumeCompressedDebugSectionHeader(StringRef &data,
   return true;
 }
 
-DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj)
+DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
+    const LoadedObjectInfo *L)
     : IsLittleEndian(Obj.isLittleEndian()),
       AddressSize(Obj.getBytesInAddress()) {
   for (const SectionRef &Section : Obj.sections()) {
@@ -551,7 +555,12 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj)
     if (IsVirtual)
       continue;
     StringRef data;
-    Section.getContents(data);
+
+    // Try to obtain an already relocated version of this section.
+    // Else use the unrelocated section from the object file. We'll have to
+    // apply relocations ourselves later.
+    if (!L || !L->getLoadedSectionContents(name,data))
+      Section.getContents(data);
 
     name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes.
 
@@ -619,7 +628,15 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj)
       continue;
 
     StringRef RelSecName;
+    StringRef RelSecData;
     RelocatedSection->getName(RelSecName);
+
+    // If the section we're relocating was relocated already by the JIT,
+    // then we used the relocated version above, so we do not need to process
+    // relocations for it now.
+    if (L && L->getLoadedSectionContents(RelSecName,RelSecData))
+      continue;
+
     RelSecName = RelSecName.substr(
         RelSecName.find_first_not_of("._")); // Skip . and _ prefixes.
 
@@ -655,9 +672,33 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj)
         uint64_t Type;
         Reloc.getType(Type);
         uint64_t SymAddr = 0;
+        uint64_t SectionLoadAddress = 0;
         object::symbol_iterator Sym = Reloc.getSymbol();
-        if (Sym != Obj.symbol_end())
+        object::section_iterator RSec = Reloc.getSection();
+
+        // First calculate the address of the symbol or section as it appears
+        // in the objct file
+        if (Sym != Obj.symbol_end()) {
           Sym->getAddress(SymAddr);
+          // Also remember what section this symbol is in for later
+          Sym->getSection(RSec);
+        } else if (RSec != Obj.section_end())
+          SymAddr = RSec->getAddress();
+
+        // If we are given load addresses for the sections, we need to adjust:
+        // SymAddr = (Address of Symbol Or Section in File) -
+        //           (Address of Section in File) +
+        //           (Load Address of Section)
+        if (L != nullptr && RSec != Obj.section_end()) {
+          // RSec is now either the section being targetted or the section
+          // containing the symbol being targetted. In either case,
+          // we need to perform the same computation.
+          StringRef SecName;
+          RSec->getName(SecName);
+          SectionLoadAddress = L->getSectionLoadAddress(SecName);
+          if (SectionLoadAddress != 0)
+            SymAddr += SectionLoadAddress - RSec->getAddress();
+        }
 
         object::RelocVisitor V(Obj);
         object::RelocToApply R(V.visit(Type, Reloc, SymAddr));
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 6c9c9eb..e63e289 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARFDebugAbbrev.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
index d4bff90..67589cd 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARFDebugArangeSet.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
index b825d83..27a02c4 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARFDebugAranges.h"
-#include "llvm/DebugInfo/DWARFCompileUnit.h"
-#include "llvm/DebugInfo/DWARFContext.h"
-#include "llvm/DebugInfo/DWARFDebugArangeSet.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h"
+#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugFrame.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 968a0ce..1aa31be 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFDebugFrame.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -7,8 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARFDebugFrame.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -179,19 +182,6 @@ void FrameEntry::parseInstructions(DataExtractor Data, uint32_t *Offset,
   }
 }
 
-
-void FrameEntry::dumpInstructions(raw_ostream &OS) const {
-  // TODO: at the moment only instruction names are dumped. Expand this to
-  // dump operands as well.
-  for (const auto &Instr : Instructions) {
-    uint8_t Opcode = Instr.Opcode;
-    if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
-      Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
-    OS << "  " << CallFrameString(Opcode) << ":\n";
-  }
-}
-
-
 namespace {
 /// \brief DWARF Common Information Entry (CIE)
 class CIE : public FrameEntry {
@@ -199,16 +189,21 @@ public:
   // CIEs (and FDEs) are simply container classes, so the only sensible way to
   // create them is by providing the full parsed contents in the constructor.
   CIE(uint64_t Offset, uint64_t Length, uint8_t Version,
-      SmallString<8> Augmentation, uint64_t CodeAlignmentFactor,
+      SmallString<8> Augmentation, uint8_t AddressSize,
+      uint8_t SegmentDescriptorSize, uint64_t CodeAlignmentFactor,
       int64_t DataAlignmentFactor, uint64_t ReturnAddressRegister)
       : FrameEntry(FK_CIE, Offset, Length), Version(Version),
         Augmentation(std::move(Augmentation)),
+        AddressSize(AddressSize),
+        SegmentDescriptorSize(SegmentDescriptorSize),
         CodeAlignmentFactor(CodeAlignmentFactor),
         DataAlignmentFactor(DataAlignmentFactor),
         ReturnAddressRegister(ReturnAddressRegister) {}
 
-  ~CIE() {
-  }
+  ~CIE() override {}
+
+  uint64_t getCodeAlignmentFactor() const { return CodeAlignmentFactor; }
+  int64_t getDataAlignmentFactor() const { return DataAlignmentFactor; }
 
   void dumpHeader(raw_ostream &OS) const override {
     OS << format("%08x %08x %08x CIE",
@@ -216,6 +211,12 @@ public:
        << "\n";
     OS << format("  Version:               %d\n", Version);
     OS << "  Augmentation:          \"" << Augmentation << "\"\n";
+    if (Version >= 4) {
+      OS << format("  Address size:          %u\n",
+                   (uint32_t)AddressSize);
+      OS << format("  Segment desc size:     %u\n",
+                   (uint32_t)SegmentDescriptorSize);
+    }
     OS << format("  Code alignment factor: %u\n",
                  (uint32_t)CodeAlignmentFactor);
     OS << format("  Data alignment factor: %d\n",
@@ -230,9 +231,11 @@ public:
   }
 
 private:
-  /// The following fields are defined in section 6.4.1 of the DWARF standard v3
+  /// The following fields are defined in section 6.4.1 of the DWARF standard v4
   uint8_t Version;
   SmallString<8> Augmentation;
+  uint8_t AddressSize;
+  uint8_t SegmentDescriptorSize;
   uint64_t CodeAlignmentFactor;
   int64_t DataAlignmentFactor;
   uint64_t ReturnAddressRegister;
@@ -246,13 +249,15 @@ public:
   // an offset to the CIE (provided by parsing the FDE header). The CIE itself
   // is obtained lazily once it's actually required.
   FDE(uint64_t Offset, uint64_t Length, int64_t LinkedCIEOffset,
-      uint64_t InitialLocation, uint64_t AddressRange)
+      uint64_t InitialLocation, uint64_t AddressRange,
+      CIE *Cie)
       : FrameEntry(FK_FDE, Offset, Length), LinkedCIEOffset(LinkedCIEOffset),
         InitialLocation(InitialLocation), AddressRange(AddressRange),
-        LinkedCIE(nullptr) {}
+        LinkedCIE(Cie) {}
 
-  ~FDE() {
-  }
+  ~FDE() override {}
+
+  CIE *getLinkedCIE() const { return LinkedCIE; }
 
   void dumpHeader(raw_ostream &OS) const override {
     OS << format("%08x %08x %08x FDE ",
@@ -261,9 +266,6 @@ public:
                  (int32_t)LinkedCIEOffset,
                  (uint32_t)InitialLocation,
                  (uint32_t)InitialLocation + (uint32_t)AddressRange);
-    if (LinkedCIE) {
-      OS << format("%p\n", LinkedCIE);
-    }
   }
 
   static bool classof(const FrameEntry *FE) {
@@ -277,8 +279,149 @@ private:
   uint64_t AddressRange;
   CIE *LinkedCIE;
 };
+
+/// \brief Types of operands to CF instructions.
+enum OperandType {
+  OT_Unset,
+  OT_None,
+  OT_Address,
+  OT_Offset,
+  OT_FactoredCodeOffset,
+  OT_SignedFactDataOffset,
+  OT_UnsignedFactDataOffset,
+  OT_Register,
+  OT_Expression
+};
+
 } // end anonymous namespace
 
+/// \brief Initialize the array describing the types of operands.
+static ArrayRef<OperandType[2]> getOperandTypes() {
+  static OperandType OpTypes[DW_CFA_restore+1][2];
+
+#define DECLARE_OP2(OP, OPTYPE0, OPTYPE1)       \
+  do {                                          \
+    OpTypes[OP][0] = OPTYPE0;                   \
+    OpTypes[OP][1] = OPTYPE1;                   \
+  } while (0)
+#define DECLARE_OP1(OP, OPTYPE0) DECLARE_OP2(OP, OPTYPE0, OT_None)
+#define DECLARE_OP0(OP) DECLARE_OP1(OP, OT_None)
+
+  DECLARE_OP1(DW_CFA_set_loc, OT_Address);
+  DECLARE_OP1(DW_CFA_advance_loc, OT_FactoredCodeOffset);
+  DECLARE_OP1(DW_CFA_advance_loc1, OT_FactoredCodeOffset);
+  DECLARE_OP1(DW_CFA_advance_loc2, OT_FactoredCodeOffset);
+  DECLARE_OP1(DW_CFA_advance_loc4, OT_FactoredCodeOffset);
+  DECLARE_OP1(DW_CFA_MIPS_advance_loc8, OT_FactoredCodeOffset);
+  DECLARE_OP2(DW_CFA_def_cfa, OT_Register, OT_Offset);
+  DECLARE_OP2(DW_CFA_def_cfa_sf, OT_Register, OT_SignedFactDataOffset);
+  DECLARE_OP1(DW_CFA_def_cfa_register, OT_Register);
+  DECLARE_OP1(DW_CFA_def_cfa_offset, OT_Offset);
+  DECLARE_OP1(DW_CFA_def_cfa_offset_sf, OT_SignedFactDataOffset);
+  DECLARE_OP1(DW_CFA_def_cfa_expression, OT_Expression);
+  DECLARE_OP1(DW_CFA_undefined, OT_Register);
+  DECLARE_OP1(DW_CFA_same_value, OT_Register);
+  DECLARE_OP2(DW_CFA_offset, OT_Register, OT_UnsignedFactDataOffset);
+  DECLARE_OP2(DW_CFA_offset_extended, OT_Register, OT_UnsignedFactDataOffset);
+  DECLARE_OP2(DW_CFA_offset_extended_sf, OT_Register, OT_SignedFactDataOffset);
+  DECLARE_OP2(DW_CFA_val_offset, OT_Register, OT_UnsignedFactDataOffset);
+  DECLARE_OP2(DW_CFA_val_offset_sf, OT_Register, OT_SignedFactDataOffset);
+  DECLARE_OP2(DW_CFA_register, OT_Register, OT_Register);
+  DECLARE_OP2(DW_CFA_expression, OT_Register, OT_Expression);
+  DECLARE_OP2(DW_CFA_val_expression, OT_Register, OT_Expression);
+  DECLARE_OP1(DW_CFA_restore, OT_Register);
+  DECLARE_OP1(DW_CFA_restore_extended, OT_Register);
+  DECLARE_OP0(DW_CFA_remember_state);
+  DECLARE_OP0(DW_CFA_restore_state);
+  DECLARE_OP0(DW_CFA_GNU_window_save);
+  DECLARE_OP1(DW_CFA_GNU_args_size, OT_Offset);
+  DECLARE_OP0(DW_CFA_nop);
+
+#undef DECLARE_OP0
+#undef DECLARE_OP1
+#undef DECLARE_OP2
+  return ArrayRef<OperandType[2]>(&OpTypes[0], DW_CFA_restore+1);
+}
+
+static ArrayRef<OperandType[2]> OpTypes = getOperandTypes();
+
+/// \brief Print \p Opcode's operand number \p OperandIdx which has
+/// value \p Operand.
+static void printOperand(raw_ostream &OS, uint8_t Opcode, unsigned OperandIdx,
+                         uint64_t Operand, uint64_t CodeAlignmentFactor,
+                         int64_t DataAlignmentFactor) {
+  assert(OperandIdx < 2);
+  OperandType Type = OpTypes[Opcode][OperandIdx];
+
+  switch (Type) {
+  case OT_Unset:
+    OS << " Unsupported " << (OperandIdx ? "second" : "first") << " operand to";
+    if (const char *OpcodeName = CallFrameString(Opcode))
+      OS << " " << OpcodeName;
+    else
+      OS << format(" Opcode %x",  Opcode);
+    break;
+  case OT_None:
+    break;
+  case OT_Address:
+    OS << format(" %" PRIx64, Operand);
+    break;
+  case OT_Offset:
+    // The offsets are all encoded in a unsigned form, but in practice
+    // consumers use them signed. It's most certainly legacy due to
+    // the lack of signed variants in the first Dwarf standards.
+    OS << format(" %+" PRId64, int64_t(Operand));
+    break;
+  case OT_FactoredCodeOffset: // Always Unsigned
+    if (CodeAlignmentFactor)
+      OS << format(" %" PRId64, Operand * CodeAlignmentFactor);
+    else
+      OS << format(" %" PRId64 "*code_alignment_factor" , Operand);
+    break;
+  case OT_SignedFactDataOffset:
+    if (DataAlignmentFactor)
+      OS << format(" %" PRId64, int64_t(Operand) * DataAlignmentFactor);
+    else
+      OS << format(" %" PRId64 "*data_alignment_factor" , int64_t(Operand));
+    break;
+  case OT_UnsignedFactDataOffset:
+    if (DataAlignmentFactor)
+      OS << format(" %" PRId64, Operand * DataAlignmentFactor);
+    else
+      OS << format(" %" PRId64 "*data_alignment_factor" , Operand);
+    break;
+  case OT_Register:
+    OS << format(" reg%" PRId64, Operand);
+    break;
+  case OT_Expression:
+    OS << " expression";
+    break;
+  }
+}
+
+void FrameEntry::dumpInstructions(raw_ostream &OS) const {
+  uint64_t CodeAlignmentFactor = 0;
+  int64_t DataAlignmentFactor = 0;
+  const CIE *Cie = dyn_cast<CIE>(this);
+
+  if (!Cie)
+    Cie = cast<FDE>(this)->getLinkedCIE();
+  if (Cie) {
+    CodeAlignmentFactor = Cie->getCodeAlignmentFactor();
+    DataAlignmentFactor = Cie->getDataAlignmentFactor();
+  }
+
+  for (const auto &Instr : Instructions) {
+    uint8_t Opcode = Instr.Opcode;
+    if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
+      Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
+    OS << "  " << CallFrameString(Opcode) << ":";
+    for (unsigned i = 0; i < Instr.Ops.size(); ++i)
+      printOperand(OS, Opcode, i, Instr.Ops[i], CodeAlignmentFactor,
+                   DataAlignmentFactor);
+    OS << '\n';
+  }
+}
 
 DWARFDebugFrame::DWARFDebugFrame() {
 }
@@ -299,6 +442,7 @@ static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data,
 
 void DWARFDebugFrame::parse(DataExtractor Data) {
   uint32_t Offset = 0;
+  DenseMap<uint32_t, CIE *> CIEs;
 
   while (Data.isValidOffset(Offset)) {
     uint32_t StartOffset = Offset;
@@ -328,19 +472,21 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
     bool IsCIE = ((IsDWARF64 && Id == DW64_CIE_ID) || Id == DW_CIE_ID);
 
     if (IsCIE) {
-      // Note: this is specifically DWARFv3 CIE header structure. It was
-      // changed in DWARFv4. We currently don't support reading DWARFv4
-      // here because LLVM itself does not emit it (and LLDB doesn't
-      // support it either).
       uint8_t Version = Data.getU8(&Offset);
       const char *Augmentation = Data.getCStr(&Offset);
+      uint8_t AddressSize = Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset);
+      Data.setAddressSize(AddressSize);
+      uint8_t SegmentDescriptorSize = Version < 4 ? 0 : Data.getU8(&Offset);
       uint64_t CodeAlignmentFactor = Data.getULEB128(&Offset);
       int64_t DataAlignmentFactor = Data.getSLEB128(&Offset);
       uint64_t ReturnAddressRegister = Data.getULEB128(&Offset);
 
-      Entries.emplace_back(new CIE(StartOffset, Length, Version,
-                                   StringRef(Augmentation), CodeAlignmentFactor,
-                                   DataAlignmentFactor, ReturnAddressRegister));
+      auto Cie = make_unique<CIE>(StartOffset, Length, Version,
+                                  StringRef(Augmentation), AddressSize,
+                                  SegmentDescriptorSize, CodeAlignmentFactor,
+                                  DataAlignmentFactor, ReturnAddressRegister);
+      CIEs[StartOffset] = Cie.get();
+      Entries.emplace_back(std::move(Cie));
     } else {
       // FDE
       uint64_t CIEPointer = Id;
@@ -348,7 +494,8 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
       uint64_t AddressRange = Data.getAddress(&Offset);
 
       Entries.emplace_back(new FDE(StartOffset, Length, CIEPointer,
-                                   InitialLocation, AddressRange));
+                                   InitialLocation, AddressRange,
+                                   CIEs[CIEPointer]));
     }
 
     Entries.back()->parseInstructions(Data, &Offset, EndStructureOffset);
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 01ff52f..5abbde4 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -8,11 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "SyntaxHighlighting.h"
-#include "llvm/DebugInfo/DWARFCompileUnit.h"
-#include "llvm/DebugInfo/DWARFContext.h"
-#include "llvm/DebugInfo/DWARFDebugAbbrev.h"
-#include "llvm/DebugInfo/DWARFDebugInfoEntry.h"
-#include "llvm/DebugInfo/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
@@ -161,14 +161,15 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
   // We have dumped the attribute raw value. For some attributes
   // having both the raw value and the pretty-printed value is
   // interesting. These attributes are handled below.
-  if ((attr == DW_AT_specification || attr == DW_AT_abstract_origin) &&
-      // The signature references aren't handled.
-      formValue.getForm() != DW_FORM_ref_sig8) {
-    uint32_t Ref = formValue.getAsReference(u).getValue();
-    DWARFDebugInfoEntryMinimal DIE;
-    if (const DWARFUnit *RefU = findUnitAndExtractFast(DIE, u, &Ref))
-      if (const char *Ref = DIE.getName(RefU, DINameKind::LinkageName))
-        OS << " \"" << Ref << '\"';
+  if (attr == DW_AT_specification || attr == DW_AT_abstract_origin) {
+    Optional<uint64_t> Ref = formValue.getAsReference(u);
+    if (Ref.hasValue()) {
+      uint32_t RefOffset = Ref.getValue();
+      DWARFDebugInfoEntryMinimal DIE;
+      if (const DWARFUnit *RefU = findUnitAndExtractFast(DIE, u, &RefOffset))
+        if (const char *Name = DIE.getName(RefU, DINameKind::LinkageName))
+          OS << " \"" << Name << '\"';
+    }
   } else if (attr == DW_AT_APPLE_property_attribute) {
     if (Optional<uint64_t> OptVal = formValue.getAsUnsignedConstant())
       dumpApplePropertyAttribute(OS, *OptVal);
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 365a522..b63af6a 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARFDebugLine.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Path.h"
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugLoc.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index bc0133d..cd6fbef 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFDebugLoc.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARFDebugLoc.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
@@ -67,8 +67,7 @@ void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) {
       // A single location description describing the location of the object...
       StringRef str = data.getData().substr(Offset, Bytes);
       Offset += Bytes;
-      E.Loc.reserve(str.size());
-      std::copy(str.begin(), str.end(), std::back_inserter(E.Loc));
+      E.Loc.append(str.begin(), str.end());
       Loc.Entries.push_back(std::move(E));
     }
   }
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugRangeList.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index ce60deb..d5df688 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFDebugRangeList.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARFDebugRangeList.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index d784bfc..53a676e 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -10,14 +10,15 @@
 #include "SyntaxHighlighting.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/DWARFCompileUnit.h"
-#include "llvm/DebugInfo/DWARFContext.h"
-#include "llvm/DebugInfo/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
+#include <climits>
 using namespace llvm;
 using namespace dwarf;
 using namespace syntax;
@@ -112,21 +113,22 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
   if (Form < ArrayRef<FormClass>(DWARF4FormClasses).size() &&
       DWARF4FormClasses[Form] == FC)
     return true;
-  // Check DW_FORM_ref_sig8 from DWARF4.
-  if (Form == DW_FORM_ref_sig8)
+  // Check more forms from DWARF4 and DWARF5 proposals.
+  switch (Form) {
+  case DW_FORM_ref_sig8:
+  case DW_FORM_GNU_ref_alt:
     return (FC == FC_Reference);
-  // Check for some DWARF5 forms.
-  if (Form == DW_FORM_GNU_addr_index)
+  case DW_FORM_GNU_addr_index:
     return (FC == FC_Address);
-  if (Form == DW_FORM_GNU_str_index)
+  case DW_FORM_GNU_str_index:
+  case DW_FORM_GNU_strp_alt:
     return (FC == FC_String);
+  }
   // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section offset.
   // Don't check for DWARF version here, as some producers may still do this
   // by mistake.
-  if ((Form == DW_FORM_data4 || Form == DW_FORM_data8) &&
-      FC == FC_SectionOffset)
-    return true;
-  return false;
+  return (Form == DW_FORM_data4 || Form == DW_FORM_data8) &&
+         FC == FC_SectionOffset;
 }
 
 bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
@@ -198,15 +200,6 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
     case DW_FORM_sdata:
       Value.sval = data.getSLEB128(offset_ptr);
       break;
-    case DW_FORM_strp: {
-      Value.uval = data.getU32(offset_ptr);
-      if (!cu)
-        break;
-      RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr-4);
-      if (AI != cu->getRelocMap()->end())
-        Value.uval += AI->second.second;
-      break;
-    }
     case DW_FORM_udata:
     case DW_FORM_ref_udata:
       Value.uval = data.getULEB128(offset_ptr);
@@ -218,14 +211,18 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
       Form = data.getULEB128(offset_ptr);
       indirect = true;
       break;
-    case DW_FORM_sec_offset: {
+    case DW_FORM_sec_offset:
+    case DW_FORM_strp:
+    case DW_FORM_GNU_ref_alt:
+    case DW_FORM_GNU_strp_alt: {
       // FIXME: This is 64-bit for DWARF64.
       Value.uval = data.getU32(offset_ptr);
       if (!cu)
         break;
-      RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr-4);
+      RelocAddrMap::const_iterator AI =
+          cu->getRelocMap()->find(*offset_ptr - 4);
       if (AI != cu->getRelocMap()->end())
-        Value.uval +=  AI->second.second;
+        Value.uval += AI->second.second;
       break;
     }
     case DW_FORM_flag_present:
@@ -322,7 +319,6 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
       return true;
 
     // 4 byte values
-    case DW_FORM_strp:
     case DW_FORM_data4:
     case DW_FORM_ref4:
       *offset_ptr += 4;
@@ -352,6 +348,9 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
 
     // FIXME: 4 for DWARF32, 8 for DWARF64.
     case DW_FORM_sec_offset:
+    case DW_FORM_strp:
+    case DW_FORM_GNU_ref_alt:
+    case DW_FORM_GNU_strp_alt:
       *offset_ptr += 4;
       return true;
 
@@ -423,24 +422,17 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFUnit *cu) const {
   case DW_FORM_udata:     OS << Value.uval; break;
   case DW_FORM_strp: {
     OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)uvalue);
-    Optional<const char *> DbgStr = getAsCString(cu);
-    if (DbgStr.hasValue()) {
-      raw_ostream &COS = WithColor(OS, syntax::String);
-      COS << '"';
-      COS.write_escaped(DbgStr.getValue());
-      COS << '"';
-    }
+    dumpString(OS, cu);
     break;
   }
   case DW_FORM_GNU_str_index: {
     OS << format(" indexed (%8.8x) string = ", (uint32_t)uvalue);
-    Optional<const char *> DbgStr = getAsCString(cu);
-    if (DbgStr.hasValue()) {
-      raw_ostream &COS = WithColor(OS, syntax::String);
-      COS << '"';
-      COS.write_escaped(DbgStr.getValue());
-      COS << '"';
-    }
+    dumpString(OS, cu);
+    break;
+  }
+  case DW_FORM_GNU_strp_alt: {
+    OS << format("alt indirect string, offset: 0x%" PRIx64 "", uvalue);
+    dumpString(OS, cu);
     break;
   }
   case DW_FORM_ref_addr:
@@ -466,6 +458,9 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFUnit *cu) const {
     cu_relative_offset = true;
     OS << format("cu + 0x%" PRIx64, uvalue);
     break;
+  case DW_FORM_GNU_ref_alt:
+    OS << format("<alt 0x%" PRIx64 ">", uvalue);
+    break;
 
     // All DW_FORM_indirect attributes should be resolved prior to calling
     // this function
@@ -491,12 +486,23 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFUnit *cu) const {
   }
 }
 
+void DWARFFormValue::dumpString(raw_ostream &OS, const DWARFUnit *U) const {
+  Optional<const char *> DbgStr = getAsCString(U);
+  if (DbgStr.hasValue()) {
+    raw_ostream &COS = WithColor(OS, syntax::String);
+    COS << '"';
+    COS.write_escaped(DbgStr.getValue());
+    COS << '"';
+  }
+}
+
 Optional<const char *> DWARFFormValue::getAsCString(const DWARFUnit *U) const {
   if (!isFormClass(FC_String))
     return None;
   if (Form == DW_FORM_string)
     return Value.cstr;
-  if (!U)
+  // FIXME: Add support for DW_FORM_GNU_strp_alt
+  if (Form == DW_FORM_GNU_strp_alt || U == nullptr)
     return None;
   uint32_t Offset = Value.uval;
   if (Form == DW_FORM_GNU_str_index) {
@@ -538,9 +544,9 @@ Optional<uint64_t> DWARFFormValue::getAsReference(const DWARFUnit *U) const {
     return Value.uval + U->getOffset();
   case DW_FORM_ref_addr:
     return Value.uval;
-  // FIXME: Add proper support for DW_FORM_ref_sig8
+  // FIXME: Add proper support for DW_FORM_ref_sig8 and DW_FORM_GNU_ref_alt.
   default:
-    return Value.uval;
+    return None;
   }
 }
 
@@ -557,6 +563,24 @@ Optional<uint64_t> DWARFFormValue::getAsUnsignedConstant() const {
   return Value.uval;
 }
 
+Optional<int64_t> DWARFFormValue::getAsSignedConstant() const {
+  if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) ||
+      (Form == DW_FORM_udata && uint64_t(LLONG_MAX) < Value.uval))
+    return None;
+  switch (Form) {
+  case DW_FORM_data4:
+    return int32_t(Value.uval);
+  case DW_FORM_data2:
+    return int16_t(Value.uval);
+  case DW_FORM_data1:
+    return int8_t(Value.uval);
+  case DW_FORM_sdata:
+  case DW_FORM_data8:
+  default:
+    return Value.sval;
+  }
+}
+
 Optional<ArrayRef<uint8_t>> DWARFFormValue::getAsBlock() const {
   if (!isFormClass(FC_Block) && !isFormClass(FC_Exprloc))
     return None;
diff --git a/contrib/llvm/lib/DebugInfo/DWARFTypeUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
index 6e93e28..766e8ac 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFTypeUnit.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARFTypeUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -33,7 +33,8 @@ void DWARFTypeUnit::dump(raw_ostream &OS) {
      << " (next unit at " << format("0x%08x", getNextUnitOffset())
      << ")\n";
 
-  const DWARFDebugInfoEntryMinimal *CU = getCompileUnitDIE(false);
-  assert(CU && "Null Compile Unit?");
-  CU->dump(OS, this, -1U);
+  if (const DWARFDebugInfoEntryMinimal *TU = getUnitDIE(false))
+    TU->dump(OS, this, -1U);
+  else
+    OS << "<type unit can't be parsed!>\n\n";
 }
diff --git a/contrib/llvm/lib/DebugInfo/DWARFUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 395a179..348476d 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFUnit.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARFUnit.h"
-#include "llvm/DebugInfo/DWARFContext.h"
-#include "llvm/DebugInfo/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Path.h"
 #include <cstdio>
@@ -79,10 +79,7 @@ bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
     return false;
 
   Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset);
-  if (Abbrevs == nullptr)
-    return false;
-
-  return true;
+  return Abbrevs != nullptr;
 }
 
 bool DWARFUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) {
@@ -256,7 +253,7 @@ DWARFUnit::DWOHolder::DWOHolder(StringRef DWOPath)
     return;
   DWOFile = std::move(Obj.get());
   DWOContext.reset(
-      cast<DWARFContext>(DIContext::getDWARFContext(*DWOFile.getBinary())));
+      cast<DWARFContext>(new DWARFContextInMemory(*DWOFile.getBinary())));
   if (DWOContext->getNumDWOCompileUnits() > 0)
     DWOU = DWOContext->getDWOCompileUnitAtIndex(0);
 }
@@ -310,8 +307,11 @@ void DWARFUnit::clearDIEs(bool KeepCUDie) {
 }
 
 void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) {
-  // First, check if CU DIE describes address ranges for the unit.
-  const auto &CUDIERanges = getCompileUnitDIE()->getAddressRanges(this);
+  const auto *U = getUnitDIE();
+  if (U == nullptr)
+    return;
+  // First, check if unit DIE describes address ranges for the whole unit.
+  const auto &CUDIERanges = U->getAddressRanges(this);
   if (!CUDIERanges.empty()) {
     CURanges.insert(CURanges.end(), CUDIERanges.begin(), CUDIERanges.end());
     return;
diff --git a/contrib/llvm/lib/DebugInfo/SyntaxHighlighting.cpp b/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp
index a6b4c65..a6b4c65 100644
--- a/contrib/llvm/lib/DebugInfo/SyntaxHighlighting.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp
diff --git a/contrib/llvm/lib/DebugInfo/SyntaxHighlighting.h b/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.h
index 946a313..946a313 100644
--- a/contrib/llvm/lib/DebugInfo/SyntaxHighlighting.h
+++ b/contrib/llvm/lib/DebugInfo/DWARF/SyntaxHighlighting.h
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/module.modulemap b/contrib/llvm/lib/DebugInfo/DWARF/module.modulemap
new file mode 100644
index 0000000..c2f624f
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARF/module.modulemap
@@ -0,0 +1 @@
+module DebugInfoDWARF { requires cplusplus umbrella "." module * { export * } }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIADataStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
new file mode 100644
index 0000000..e0e1b27
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
@@ -0,0 +1,73 @@
+//===- DIADataStream.cpp - DIA implementation of IPDBDataStream -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/DIA/DIADataStream.h"
+#include "llvm/Support/ConvertUTF.h"
+
+using namespace llvm;
+
+DIADataStream::DIADataStream(CComPtr<IDiaEnumDebugStreamData> DiaStreamData)
+    : StreamData(DiaStreamData) {}
+
+uint32_t DIADataStream::getRecordCount() const {
+  LONG Count = 0;
+  return (S_OK == StreamData->get_Count(&Count)) ? Count : 0;
+}
+
+std::string DIADataStream::getName() const {
+  CComBSTR Name16;
+  if (S_OK != StreamData->get_name(&Name16))
+    return std::string();
+
+  std::string Name8;
+  llvm::ArrayRef<char> Name16Bytes(reinterpret_cast<char *>(Name16.m_str),
+                                   Name16.ByteLength());
+  if (!llvm::convertUTF16ToUTF8String(Name16Bytes, Name8))
+    return std::string();
+  return Name8;
+}
+
+llvm::Optional<DIADataStream::RecordType>
+DIADataStream::getItemAtIndex(uint32_t Index) const {
+  RecordType Record;
+  DWORD RecordSize = 0;
+  StreamData->Item(Index, 0, &RecordSize, nullptr);
+  if (RecordSize == 0)
+    return llvm::Optional<RecordType>();
+
+  Record.resize(RecordSize);
+  if (S_OK != StreamData->Item(Index, RecordSize, &RecordSize, &Record[0]))
+    return llvm::Optional<RecordType>();
+  return Record;
+}
+
+bool DIADataStream::getNext(RecordType &Record) {
+  Record.clear();
+  DWORD RecordSize = 0;
+  ULONG CountFetched = 0;
+  StreamData->Next(1, 0, &RecordSize, nullptr, &CountFetched);
+  if (RecordSize == 0)
+    return false;
+
+  Record.resize(RecordSize);
+  if (S_OK ==
+      StreamData->Next(1, RecordSize, &RecordSize, &Record[0], &CountFetched))
+    return false;
+  return true;
+}
+
+void DIADataStream::reset() { StreamData->Reset(); }
+
+DIADataStream *DIADataStream::clone() const {
+  CComPtr<IDiaEnumDebugStreamData> EnumeratorClone;
+  if (S_OK != StreamData->Clone(&EnumeratorClone))
+    return nullptr;
+
+  return new DIADataStream(EnumeratorClone);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
new file mode 100644
index 0000000..23c6489
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
@@ -0,0 +1,53 @@
+//==- DIAEnumDebugStreams.cpp - DIA Debug Stream Enumerator impl -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/DIA/DIADataStream.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h"
+
+using namespace llvm;
+
+DIAEnumDebugStreams::DIAEnumDebugStreams(
+    CComPtr<IDiaEnumDebugStreams> DiaEnumerator)
+    : Enumerator(DiaEnumerator) {}
+
+uint32_t DIAEnumDebugStreams::getChildCount() const {
+  LONG Count = 0;
+  return (S_OK == Enumerator->get_Count(&Count)) ? Count : 0;
+}
+
+std::unique_ptr<IPDBDataStream>
+DIAEnumDebugStreams::getChildAtIndex(uint32_t Index) const {
+  CComPtr<IDiaEnumDebugStreamData> Item;
+  VARIANT VarIndex;
+  VarIndex.vt = VT_I4;
+  VarIndex.lVal = Index;
+  if (S_OK != Enumerator->Item(VarIndex, &Item))
+    return nullptr;
+
+  return std::unique_ptr<IPDBDataStream>(new DIADataStream(Item));
+}
+
+std::unique_ptr<IPDBDataStream> DIAEnumDebugStreams::getNext() {
+  CComPtr<IDiaEnumDebugStreamData> Item;
+  ULONG NumFetched = 0;
+  if (S_OK != Enumerator->Next(1, &Item, &NumFetched))
+    return nullptr;
+
+  return std::unique_ptr<IPDBDataStream>(new DIADataStream(Item));
+}
+
+void DIAEnumDebugStreams::reset() { Enumerator->Reset(); }
+
+DIAEnumDebugStreams *DIAEnumDebugStreams::clone() const {
+  CComPtr<IDiaEnumDebugStreams> EnumeratorClone;
+  if (S_OK != Enumerator->Clone(&EnumeratorClone))
+    return nullptr;
+  return new DIAEnumDebugStreams(EnumeratorClone);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
new file mode 100644
index 0000000..32a9af2
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
@@ -0,0 +1,50 @@
+//==- DIAEnumLineNumbers.cpp - DIA Line Number Enumerator impl ---*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h"
+#include "llvm/DebugInfo/PDB/DIA/DIALineNumber.h"
+
+using namespace llvm;
+
+DIAEnumLineNumbers::DIAEnumLineNumbers(
+    CComPtr<IDiaEnumLineNumbers> DiaEnumerator)
+    : Enumerator(DiaEnumerator) {}
+
+uint32_t DIAEnumLineNumbers::getChildCount() const {
+  LONG Count = 0;
+  return (S_OK == Enumerator->get_Count(&Count)) ? Count : 0;
+}
+
+std::unique_ptr<IPDBLineNumber>
+DIAEnumLineNumbers::getChildAtIndex(uint32_t Index) const {
+  CComPtr<IDiaLineNumber> Item;
+  if (S_OK != Enumerator->Item(Index, &Item))
+    return nullptr;
+
+  return std::unique_ptr<IPDBLineNumber>(new DIALineNumber(Item));
+}
+
+std::unique_ptr<IPDBLineNumber> DIAEnumLineNumbers::getNext() {
+  CComPtr<IDiaLineNumber> Item;
+  ULONG NumFetched = 0;
+  if (S_OK != Enumerator->Next(1, &Item, &NumFetched))
+    return nullptr;
+
+  return std::unique_ptr<IPDBLineNumber>(new DIALineNumber(Item));
+}
+
+void DIAEnumLineNumbers::reset() { Enumerator->Reset(); }
+
+DIAEnumLineNumbers *DIAEnumLineNumbers::clone() const {
+  CComPtr<IDiaEnumLineNumbers> EnumeratorClone;
+  if (S_OK != Enumerator->Clone(&EnumeratorClone))
+    return nullptr;
+  return new DIAEnumLineNumbers(EnumeratorClone);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
new file mode 100644
index 0000000..1a94610
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
@@ -0,0 +1,50 @@
+//==- DIAEnumSourceFiles.cpp - DIA Source File Enumerator impl ---*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h"
+#include "llvm/DebugInfo/PDB/DIA/DIASourceFile.h"
+
+using namespace llvm;
+
+DIAEnumSourceFiles::DIAEnumSourceFiles(
+    const DIASession &PDBSession, CComPtr<IDiaEnumSourceFiles> DiaEnumerator)
+    : Session(PDBSession), Enumerator(DiaEnumerator) {}
+
+uint32_t DIAEnumSourceFiles::getChildCount() const {
+  LONG Count = 0;
+  return (S_OK == Enumerator->get_Count(&Count)) ? Count : 0;
+}
+
+std::unique_ptr<IPDBSourceFile>
+DIAEnumSourceFiles::getChildAtIndex(uint32_t Index) const {
+  CComPtr<IDiaSourceFile> Item;
+  if (S_OK != Enumerator->Item(Index, &Item))
+    return nullptr;
+
+  return std::unique_ptr<IPDBSourceFile>(new DIASourceFile(Session, Item));
+}
+
+std::unique_ptr<IPDBSourceFile> DIAEnumSourceFiles::getNext() {
+  CComPtr<IDiaSourceFile> Item;
+  ULONG NumFetched = 0;
+  if (S_OK != Enumerator->Next(1, &Item, &NumFetched))
+    return nullptr;
+
+  return std::unique_ptr<IPDBSourceFile>(new DIASourceFile(Session, Item));
+}
+
+void DIAEnumSourceFiles::reset() { Enumerator->Reset(); }
+
+DIAEnumSourceFiles *DIAEnumSourceFiles::clone() const {
+  CComPtr<IDiaEnumSourceFiles> EnumeratorClone;
+  if (S_OK != Enumerator->Clone(&EnumeratorClone))
+    return nullptr;
+  return new DIAEnumSourceFiles(Session, EnumeratorClone);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
new file mode 100644
index 0000000..6754d9a
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
@@ -0,0 +1,54 @@
+//==- DIAEnumSymbols.cpp - DIA Symbol Enumerator impl ------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h"
+#include "llvm/DebugInfo/PDB/DIA/DIARawSymbol.h"
+#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+
+using namespace llvm;
+
+DIAEnumSymbols::DIAEnumSymbols(const DIASession &PDBSession,
+                               CComPtr<IDiaEnumSymbols> DiaEnumerator)
+    : Session(PDBSession), Enumerator(DiaEnumerator) {}
+
+uint32_t DIAEnumSymbols::getChildCount() const {
+  LONG Count = 0;
+  return (S_OK == Enumerator->get_Count(&Count)) ? Count : 0;
+}
+
+std::unique_ptr<PDBSymbol>
+DIAEnumSymbols::getChildAtIndex(uint32_t Index) const {
+  CComPtr<IDiaSymbol> Item;
+  if (S_OK != Enumerator->Item(Index, &Item))
+    return nullptr;
+
+  std::unique_ptr<DIARawSymbol> RawSymbol(new DIARawSymbol(Session, Item));
+  return std::unique_ptr<PDBSymbol>(PDBSymbol::create(Session, std::move(RawSymbol)));
+}
+
+std::unique_ptr<PDBSymbol> DIAEnumSymbols::getNext() {
+  CComPtr<IDiaSymbol> Item;
+  ULONG NumFetched = 0;
+  if (S_OK != Enumerator->Next(1, &Item, &NumFetched))
+    return nullptr;
+
+  std::unique_ptr<DIARawSymbol> RawSymbol(new DIARawSymbol(Session, Item));
+  return std::unique_ptr<PDBSymbol>(
+      PDBSymbol::create(Session, std::move(RawSymbol)));
+}
+
+void DIAEnumSymbols::reset() { Enumerator->Reset(); }
+
+DIAEnumSymbols *DIAEnumSymbols::clone() const {
+  CComPtr<IDiaEnumSymbols> EnumeratorClone;
+  if (S_OK != Enumerator->Clone(&EnumeratorClone))
+    return nullptr;
+  return new DIAEnumSymbols(Session, EnumeratorClone);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIALineNumber.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIALineNumber.cpp
new file mode 100644
index 0000000..c5577f1
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIALineNumber.cpp
@@ -0,0 +1,75 @@
+//===- DIALineNumber.cpp - DIA implementation of IPDBLineNumber -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/DIA/DIALineNumber.h"
+
+using namespace llvm;
+
+DIALineNumber::DIALineNumber(CComPtr<IDiaLineNumber> DiaLineNumber)
+    : LineNumber(DiaLineNumber) {}
+
+uint32_t DIALineNumber::getLineNumber() const {
+  DWORD Line = 0;
+  return (S_OK == LineNumber->get_lineNumber(&Line)) ? Line : 0;
+}
+
+uint32_t DIALineNumber::getLineNumberEnd() const {
+  DWORD LineEnd = 0;
+  return (S_OK == LineNumber->get_lineNumberEnd(&LineEnd)) ? LineEnd : 0;
+}
+
+uint32_t DIALineNumber::getColumnNumber() const {
+  DWORD Column = 0;
+  return (S_OK == LineNumber->get_columnNumber(&Column)) ? Column : 0;
+}
+
+uint32_t DIALineNumber::getColumnNumberEnd() const {
+  DWORD ColumnEnd = 0;
+  return (S_OK == LineNumber->get_columnNumberEnd(&ColumnEnd)) ? ColumnEnd : 0;
+}
+
+uint32_t DIALineNumber::getAddressSection() const {
+  DWORD Section = 0;
+  return (S_OK == LineNumber->get_addressSection(&Section)) ? Section : 0;
+}
+
+uint32_t DIALineNumber::getAddressOffset() const {
+  DWORD Offset = 0;
+  return (S_OK == LineNumber->get_addressOffset(&Offset)) ? Offset : 0;
+}
+
+uint32_t DIALineNumber::getRelativeVirtualAddress() const {
+  DWORD RVA = 0;
+  return (S_OK == LineNumber->get_relativeVirtualAddress(&RVA)) ? RVA : 0;
+}
+
+uint64_t DIALineNumber::getVirtualAddress() const {
+  ULONGLONG Addr = 0;
+  return (S_OK == LineNumber->get_virtualAddress(&Addr)) ? Addr : 0;
+}
+
+uint32_t DIALineNumber::getLength() const {
+  DWORD Length = 0;
+  return (S_OK == LineNumber->get_length(&Length)) ? Length : 0;
+}
+
+uint32_t DIALineNumber::getSourceFileId() const {
+  DWORD Id = 0;
+  return (S_OK == LineNumber->get_sourceFileId(&Id)) ? Id : 0;
+}
+
+uint32_t DIALineNumber::getCompilandId() const {
+  DWORD Id = 0;
+  return (S_OK == LineNumber->get_compilandId(&Id)) ? Id : 0;
+}
+
+bool DIALineNumber::isStatement() const {
+  BOOL Statement = 0;
+  return (S_OK == LineNumber->get_statement(&Statement)) ? Statement : false;
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
new file mode 100644
index 0000000..abe0ab5
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
@@ -0,0 +1,1095 @@
+//===- DIARawSymbol.cpp - DIA implementation of IPDBRawSymbol ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h"
+#include "llvm/DebugInfo/PDB/DIA/DIARawSymbol.h"
+#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+Variant VariantFromVARIANT(const VARIANT &V) {
+  Variant Result;
+  switch (V.vt) {
+  case VT_I1:
+    Result.Int8 = V.cVal;
+    Result.Type = PDB_VariantType::Int8;
+    break;
+  case VT_I2:
+    Result.Int16 = V.iVal;
+    Result.Type = PDB_VariantType::Int16;
+    break;
+  case VT_I4:
+    Result.Int32 = V.intVal;
+    Result.Type = PDB_VariantType::Int32;
+    break;
+  case VT_I8:
+    Result.Int64 = V.llVal;
+    Result.Type = PDB_VariantType::Int64;
+    break;
+  case VT_UI1:
+    Result.UInt8 = V.bVal;
+    Result.Type = PDB_VariantType::UInt8;
+    break;
+  case VT_UI2:
+    Result.UInt16 = V.uiVal;
+    Result.Type = PDB_VariantType::UInt16;
+    break;
+  case VT_UI4:
+    Result.UInt32 = V.uintVal;
+    Result.Type = PDB_VariantType::UInt32;
+    break;
+  case VT_UI8:
+    Result.UInt64 = V.ullVal;
+    Result.Type = PDB_VariantType::UInt64;
+    break;
+  case VT_BOOL:
+    Result.Bool = (V.boolVal == VARIANT_TRUE) ? true : false;
+    Result.Type = PDB_VariantType::Bool;
+    break;
+  case VT_R4:
+    Result.Single = V.fltVal;
+    Result.Type = PDB_VariantType::Single;
+    break;
+  case VT_R8:
+    Result.Double = V.dblVal;
+    Result.Type = PDB_VariantType::Double;
+    break;
+  default:
+    Result.Type = PDB_VariantType::Unknown;
+    break;
+  }
+  return Result;
+}
+
+template <typename ArgType>
+ArgType PrivateGetDIAValue(IDiaSymbol *Symbol,
+                           HRESULT (__stdcall IDiaSymbol::*Method)(ArgType *)) {
+  ArgType Value;
+  if (S_OK == (Symbol->*Method)(&Value))
+    return static_cast<ArgType>(Value);
+
+  return ArgType();
+}
+
+template <typename ArgType, typename RetType>
+RetType PrivateGetDIAValue(IDiaSymbol *Symbol,
+                           HRESULT (__stdcall IDiaSymbol::*Method)(ArgType *)) {
+  ArgType Value;
+  if (S_OK == (Symbol->*Method)(&Value))
+    return static_cast<RetType>(Value);
+
+  return RetType();
+}
+
+std::string
+PrivateGetDIAValue(IDiaSymbol *Symbol,
+                   HRESULT (__stdcall IDiaSymbol::*Method)(BSTR *)) {
+  CComBSTR Result16;
+  if (S_OK != (Symbol->*Method)(&Result16))
+    return std::string();
+
+  const char *SrcBytes = reinterpret_cast<const char *>(Result16.m_str);
+  llvm::ArrayRef<char> SrcByteArray(SrcBytes, Result16.ByteLength());
+  std::string Result8;
+  if (!llvm::convertUTF16ToUTF8String(SrcByteArray, Result8))
+    return std::string();
+  return Result8;
+}
+
+PDB_UniqueId
+PrivateGetDIAValue(IDiaSymbol *Symbol,
+                   HRESULT (__stdcall IDiaSymbol::*Method)(GUID *)) {
+  GUID Result;
+  if (S_OK != (Symbol->*Method)(&Result))
+    return PDB_UniqueId();
+
+  static_assert(sizeof(PDB_UniqueId) == sizeof(GUID),
+                "PDB_UniqueId is the wrong size!");
+  PDB_UniqueId IdResult;
+  ::memcpy(&IdResult, &Result, sizeof(GUID));
+  return IdResult;
+}
+
+template <typename ArgType>
+void DumpDIAValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
+                  IDiaSymbol *Symbol,
+                  HRESULT (__stdcall IDiaSymbol::*Method)(ArgType *)) {
+  ArgType Value;
+  if (S_OK == (Symbol->*Method)(&Value)) {
+    OS << "\n";
+    OS.indent(Indent);
+    OS << Name << ": " << Value;
+  }
+}
+
+void DumpDIAValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
+                  IDiaSymbol *Symbol,
+                  HRESULT (__stdcall IDiaSymbol::*Method)(BSTR *)) {
+  BSTR Value = nullptr;
+  if (S_OK != (Symbol->*Method)(&Value))
+    return;
+  const char *Bytes = reinterpret_cast<const char *>(Value);
+  ArrayRef<char> ByteArray(Bytes, ::SysStringByteLen(Value));
+  std::string Result;
+  if (llvm::convertUTF16ToUTF8String(ByteArray, Result)) {
+    OS << "\n";
+    OS.indent(Indent);
+    OS << Name << ": " << Result;
+  }
+  ::SysFreeString(Value);
+}
+
+void DumpDIAValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
+                  IDiaSymbol *Symbol,
+                  HRESULT (__stdcall IDiaSymbol::*Method)(VARIANT *)) {
+  VARIANT Value;
+  Value.vt = VT_EMPTY;
+  if (S_OK != (Symbol->*Method)(&Value))
+    return;
+  OS << "\n";
+  OS.indent(Indent);
+  Variant V = VariantFromVARIANT(Value);
+  OS << V;
+}
+}
+
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, const GUID &Guid) {
+  const PDB_UniqueId *Id = reinterpret_cast<const PDB_UniqueId *>(&Guid);
+  OS << *Id;
+  return OS;
+}
+}
+
+DIARawSymbol::DIARawSymbol(const DIASession &PDBSession,
+                           CComPtr<IDiaSymbol> DiaSymbol)
+    : Session(PDBSession), Symbol(DiaSymbol) {}
+
+#define RAW_METHOD_DUMP(Stream, Method)                                        \
+  DumpDIAValue(Stream, Indent, StringRef(#Method), Symbol, &IDiaSymbol::Method);
+
+void DIARawSymbol::dump(raw_ostream &OS, int Indent) const {
+  RAW_METHOD_DUMP(OS, get_access)
+  RAW_METHOD_DUMP(OS, get_addressOffset)
+  RAW_METHOD_DUMP(OS, get_addressSection)
+  RAW_METHOD_DUMP(OS, get_age)
+  RAW_METHOD_DUMP(OS, get_arrayIndexTypeId)
+  RAW_METHOD_DUMP(OS, get_backEndMajor)
+  RAW_METHOD_DUMP(OS, get_backEndMinor)
+  RAW_METHOD_DUMP(OS, get_backEndBuild)
+  RAW_METHOD_DUMP(OS, get_backEndQFE)
+  RAW_METHOD_DUMP(OS, get_baseDataOffset)
+  RAW_METHOD_DUMP(OS, get_baseDataSlot)
+  RAW_METHOD_DUMP(OS, get_baseSymbolId)
+  RAW_METHOD_DUMP(OS, get_baseType)
+  RAW_METHOD_DUMP(OS, get_bitPosition)
+  RAW_METHOD_DUMP(OS, get_callingConvention)
+  RAW_METHOD_DUMP(OS, get_classParentId)
+  RAW_METHOD_DUMP(OS, get_compilerName)
+  RAW_METHOD_DUMP(OS, get_count)
+  RAW_METHOD_DUMP(OS, get_countLiveRanges)
+  RAW_METHOD_DUMP(OS, get_frontEndMajor)
+  RAW_METHOD_DUMP(OS, get_frontEndMinor)
+  RAW_METHOD_DUMP(OS, get_frontEndBuild)
+  RAW_METHOD_DUMP(OS, get_frontEndQFE)
+  RAW_METHOD_DUMP(OS, get_lexicalParentId)
+  RAW_METHOD_DUMP(OS, get_libraryName)
+  RAW_METHOD_DUMP(OS, get_liveRangeStartAddressOffset)
+  RAW_METHOD_DUMP(OS, get_liveRangeStartAddressSection)
+  RAW_METHOD_DUMP(OS, get_liveRangeStartRelativeVirtualAddress)
+  RAW_METHOD_DUMP(OS, get_localBasePointerRegisterId)
+  RAW_METHOD_DUMP(OS, get_lowerBoundId)
+  RAW_METHOD_DUMP(OS, get_memorySpaceKind)
+  RAW_METHOD_DUMP(OS, get_name)
+  RAW_METHOD_DUMP(OS, get_numberOfAcceleratorPointerTags)
+  RAW_METHOD_DUMP(OS, get_numberOfColumns)
+  RAW_METHOD_DUMP(OS, get_numberOfModifiers)
+  RAW_METHOD_DUMP(OS, get_numberOfRegisterIndices)
+  RAW_METHOD_DUMP(OS, get_numberOfRows)
+  RAW_METHOD_DUMP(OS, get_objectFileName)
+  RAW_METHOD_DUMP(OS, get_oemId)
+  RAW_METHOD_DUMP(OS, get_oemSymbolId)
+  RAW_METHOD_DUMP(OS, get_offsetInUdt)
+  RAW_METHOD_DUMP(OS, get_platform)
+  RAW_METHOD_DUMP(OS, get_rank)
+  RAW_METHOD_DUMP(OS, get_registerId)
+  RAW_METHOD_DUMP(OS, get_registerType)
+  RAW_METHOD_DUMP(OS, get_relativeVirtualAddress)
+  RAW_METHOD_DUMP(OS, get_samplerSlot)
+  RAW_METHOD_DUMP(OS, get_signature)
+  RAW_METHOD_DUMP(OS, get_sizeInUdt)
+  RAW_METHOD_DUMP(OS, get_slot)
+  RAW_METHOD_DUMP(OS, get_sourceFileName)
+  RAW_METHOD_DUMP(OS, get_stride)
+  RAW_METHOD_DUMP(OS, get_subTypeId)
+  RAW_METHOD_DUMP(OS, get_symbolsFileName)
+  RAW_METHOD_DUMP(OS, get_symIndexId)
+  RAW_METHOD_DUMP(OS, get_targetOffset)
+  RAW_METHOD_DUMP(OS, get_targetRelativeVirtualAddress)
+  RAW_METHOD_DUMP(OS, get_targetVirtualAddress)
+  RAW_METHOD_DUMP(OS, get_targetSection)
+  RAW_METHOD_DUMP(OS, get_textureSlot)
+  RAW_METHOD_DUMP(OS, get_timeStamp)
+  RAW_METHOD_DUMP(OS, get_token)
+  RAW_METHOD_DUMP(OS, get_typeId)
+  RAW_METHOD_DUMP(OS, get_uavSlot)
+  RAW_METHOD_DUMP(OS, get_undecoratedName)
+  RAW_METHOD_DUMP(OS, get_unmodifiedTypeId)
+  RAW_METHOD_DUMP(OS, get_upperBoundId)
+  RAW_METHOD_DUMP(OS, get_virtualBaseDispIndex)
+  RAW_METHOD_DUMP(OS, get_virtualBaseOffset)
+  RAW_METHOD_DUMP(OS, get_virtualTableShapeId)
+  RAW_METHOD_DUMP(OS, get_dataKind)
+  RAW_METHOD_DUMP(OS, get_symTag)
+  RAW_METHOD_DUMP(OS, get_guid)
+  RAW_METHOD_DUMP(OS, get_offset)
+  RAW_METHOD_DUMP(OS, get_thisAdjust)
+  RAW_METHOD_DUMP(OS, get_virtualBasePointerOffset)
+  RAW_METHOD_DUMP(OS, get_locationType)
+  RAW_METHOD_DUMP(OS, get_machineType)
+  RAW_METHOD_DUMP(OS, get_thunkOrdinal)
+  RAW_METHOD_DUMP(OS, get_length)
+  RAW_METHOD_DUMP(OS, get_liveRangeLength)
+  RAW_METHOD_DUMP(OS, get_virtualAddress)
+  RAW_METHOD_DUMP(OS, get_udtKind)
+  RAW_METHOD_DUMP(OS, get_constructor)
+  RAW_METHOD_DUMP(OS, get_customCallingConvention)
+  RAW_METHOD_DUMP(OS, get_farReturn)
+  RAW_METHOD_DUMP(OS, get_code)
+  RAW_METHOD_DUMP(OS, get_compilerGenerated)
+  RAW_METHOD_DUMP(OS, get_constType)
+  RAW_METHOD_DUMP(OS, get_editAndContinueEnabled)
+  RAW_METHOD_DUMP(OS, get_function)
+  RAW_METHOD_DUMP(OS, get_stride)
+  RAW_METHOD_DUMP(OS, get_noStackOrdering)
+  RAW_METHOD_DUMP(OS, get_hasAlloca)
+  RAW_METHOD_DUMP(OS, get_hasAssignmentOperator)
+  RAW_METHOD_DUMP(OS, get_isCTypes)
+  RAW_METHOD_DUMP(OS, get_hasCastOperator)
+  RAW_METHOD_DUMP(OS, get_hasDebugInfo)
+  RAW_METHOD_DUMP(OS, get_hasEH)
+  RAW_METHOD_DUMP(OS, get_hasEHa)
+  RAW_METHOD_DUMP(OS, get_hasInlAsm)
+  RAW_METHOD_DUMP(OS, get_framePointerPresent)
+  RAW_METHOD_DUMP(OS, get_inlSpec)
+  RAW_METHOD_DUMP(OS, get_interruptReturn)
+  RAW_METHOD_DUMP(OS, get_hasLongJump)
+  RAW_METHOD_DUMP(OS, get_hasManagedCode)
+  RAW_METHOD_DUMP(OS, get_hasNestedTypes)
+  RAW_METHOD_DUMP(OS, get_noInline)
+  RAW_METHOD_DUMP(OS, get_noReturn)
+  RAW_METHOD_DUMP(OS, get_optimizedCodeDebugInfo)
+  RAW_METHOD_DUMP(OS, get_overloadedOperator)
+  RAW_METHOD_DUMP(OS, get_hasSEH)
+  RAW_METHOD_DUMP(OS, get_hasSecurityChecks)
+  RAW_METHOD_DUMP(OS, get_hasSetJump)
+  RAW_METHOD_DUMP(OS, get_strictGSCheck)
+  RAW_METHOD_DUMP(OS, get_isAcceleratorGroupSharedLocal)
+  RAW_METHOD_DUMP(OS, get_isAcceleratorPointerTagLiveRange)
+  RAW_METHOD_DUMP(OS, get_isAcceleratorStubFunction)
+  RAW_METHOD_DUMP(OS, get_isAggregated)
+  RAW_METHOD_DUMP(OS, get_intro)
+  RAW_METHOD_DUMP(OS, get_isCVTCIL)
+  RAW_METHOD_DUMP(OS, get_isConstructorVirtualBase)
+  RAW_METHOD_DUMP(OS, get_isCxxReturnUdt)
+  RAW_METHOD_DUMP(OS, get_isDataAligned)
+  RAW_METHOD_DUMP(OS, get_isHLSLData)
+  RAW_METHOD_DUMP(OS, get_isHotpatchable)
+  RAW_METHOD_DUMP(OS, get_indirectVirtualBaseClass)
+  RAW_METHOD_DUMP(OS, get_isInterfaceUdt)
+  RAW_METHOD_DUMP(OS, get_intrinsic)
+  RAW_METHOD_DUMP(OS, get_isLTCG)
+  RAW_METHOD_DUMP(OS, get_isLocationControlFlowDependent)
+  RAW_METHOD_DUMP(OS, get_isMSILNetmodule)
+  RAW_METHOD_DUMP(OS, get_isMatrixRowMajor)
+  RAW_METHOD_DUMP(OS, get_managed)
+  RAW_METHOD_DUMP(OS, get_msil)
+  RAW_METHOD_DUMP(OS, get_isMultipleInheritance)
+  RAW_METHOD_DUMP(OS, get_isNaked)
+  RAW_METHOD_DUMP(OS, get_nested)
+  RAW_METHOD_DUMP(OS, get_isOptimizedAway)
+  RAW_METHOD_DUMP(OS, get_packed)
+  RAW_METHOD_DUMP(OS, get_isPointerBasedOnSymbolValue)
+  RAW_METHOD_DUMP(OS, get_isPointerToDataMember)
+  RAW_METHOD_DUMP(OS, get_isPointerToMemberFunction)
+  RAW_METHOD_DUMP(OS, get_pure)
+  RAW_METHOD_DUMP(OS, get_RValueReference)
+  RAW_METHOD_DUMP(OS, get_isRefUdt)
+  RAW_METHOD_DUMP(OS, get_reference)
+  RAW_METHOD_DUMP(OS, get_restrictedType)
+  RAW_METHOD_DUMP(OS, get_isReturnValue)
+  RAW_METHOD_DUMP(OS, get_isSafeBuffers)
+  RAW_METHOD_DUMP(OS, get_scoped)
+  RAW_METHOD_DUMP(OS, get_isSdl)
+  RAW_METHOD_DUMP(OS, get_isSingleInheritance)
+  RAW_METHOD_DUMP(OS, get_isSplitted)
+  RAW_METHOD_DUMP(OS, get_isStatic)
+  RAW_METHOD_DUMP(OS, get_isStripped)
+  RAW_METHOD_DUMP(OS, get_unalignedType)
+  RAW_METHOD_DUMP(OS, get_notReached)
+  RAW_METHOD_DUMP(OS, get_isValueUdt)
+  RAW_METHOD_DUMP(OS, get_virtual)
+  RAW_METHOD_DUMP(OS, get_virtualBaseClass)
+  RAW_METHOD_DUMP(OS, get_isVirtualInheritance)
+  RAW_METHOD_DUMP(OS, get_volatileType)
+  RAW_METHOD_DUMP(OS, get_wasInlined)
+  RAW_METHOD_DUMP(OS, get_unused)
+  RAW_METHOD_DUMP(OS, get_value)
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+DIARawSymbol::findChildren(PDB_SymType Type) const {
+  enum SymTagEnum EnumVal = static_cast<enum SymTagEnum>(Type);
+
+  CComPtr<IDiaEnumSymbols> DiaEnumerator;
+  if (S_OK != Symbol->findChildrenEx(EnumVal, nullptr, nsNone, &DiaEnumerator))
+    return nullptr;
+
+  return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+DIARawSymbol::findChildren(PDB_SymType Type, StringRef Name,
+                           PDB_NameSearchFlags Flags) const {
+  llvm::SmallVector<UTF16, 32> Name16;
+  llvm::convertUTF8ToUTF16String(Name, Name16);
+
+  enum SymTagEnum EnumVal = static_cast<enum SymTagEnum>(Type);
+  DWORD CompareFlags = static_cast<DWORD>(Flags);
+  wchar_t *Name16Str = reinterpret_cast<wchar_t *>(Name16.data());
+
+  CComPtr<IDiaEnumSymbols> DiaEnumerator;
+  if (S_OK !=
+      Symbol->findChildrenEx(EnumVal, Name16Str, CompareFlags, &DiaEnumerator))
+    return nullptr;
+
+  return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+DIARawSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name,
+                                PDB_NameSearchFlags Flags, uint32_t RVA) const {
+  llvm::SmallVector<UTF16, 32> Name16;
+  llvm::convertUTF8ToUTF16String(Name, Name16);
+
+  enum SymTagEnum EnumVal = static_cast<enum SymTagEnum>(Type);
+  DWORD CompareFlags = static_cast<DWORD>(Flags);
+  wchar_t *Name16Str = reinterpret_cast<wchar_t *>(Name16.data());
+
+  CComPtr<IDiaEnumSymbols> DiaEnumerator;
+  if (S_OK !=
+      Symbol->findChildrenExByRVA(EnumVal, Name16Str, CompareFlags, RVA,
+                                  &DiaEnumerator))
+    return nullptr;
+
+  return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+DIARawSymbol::findInlineFramesByRVA(uint32_t RVA) const {
+  CComPtr<IDiaEnumSymbols> DiaEnumerator;
+  if (S_OK != Symbol->findInlineFramesByRVA(RVA, &DiaEnumerator))
+    return nullptr;
+
+  return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
+}
+
+void DIARawSymbol::getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes) const {
+  bytes.clear();
+
+  DWORD DataSize = 0;
+  Symbol->get_dataBytes(0, &DataSize, nullptr);
+  if (DataSize == 0)
+    return;
+
+  bytes.resize(DataSize);
+  Symbol->get_dataBytes(DataSize, &DataSize, bytes.data());
+}
+
+PDB_MemberAccess DIARawSymbol::getAccess() const {
+  return PrivateGetDIAValue<DWORD, PDB_MemberAccess>(Symbol,
+                                                     &IDiaSymbol::get_access);
+}
+
+uint32_t DIARawSymbol::getAddressOffset() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_addressOffset);
+}
+
+uint32_t DIARawSymbol::getAddressSection() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_addressSection);
+}
+
+uint32_t DIARawSymbol::getAge() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_age);
+}
+
+uint32_t DIARawSymbol::getArrayIndexTypeId() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_arrayIndexTypeId);
+}
+
+void DIARawSymbol::getBackEndVersion(VersionInfo &Version) const {
+  Version.Major = PrivateGetDIAValue(Symbol, &IDiaSymbol::get_backEndMajor);
+  Version.Minor = PrivateGetDIAValue(Symbol, &IDiaSymbol::get_backEndMinor);
+  Version.Build = PrivateGetDIAValue(Symbol, &IDiaSymbol::get_backEndBuild);
+  Version.QFE = PrivateGetDIAValue(Symbol, &IDiaSymbol::get_backEndQFE);
+}
+
+uint32_t DIARawSymbol::getBaseDataOffset() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_baseDataOffset);
+}
+
+uint32_t DIARawSymbol::getBaseDataSlot() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_baseDataSlot);
+}
+
+uint32_t DIARawSymbol::getBaseSymbolId() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_baseSymbolId);
+}
+
+PDB_BuiltinType DIARawSymbol::getBuiltinType() const {
+  return PrivateGetDIAValue<DWORD, PDB_BuiltinType>(Symbol,
+                                                    &IDiaSymbol::get_baseType);
+}
+
+uint32_t DIARawSymbol::getBitPosition() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_bitPosition);
+}
+
+PDB_CallingConv DIARawSymbol::getCallingConvention() const {
+  return PrivateGetDIAValue<DWORD, PDB_CallingConv>(
+      Symbol, &IDiaSymbol::get_callingConvention);
+}
+
+uint32_t DIARawSymbol::getClassParentId() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_classParentId);
+}
+
+std::string DIARawSymbol::getCompilerName() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_compilerName);
+}
+
+uint32_t DIARawSymbol::getCount() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_count);
+}
+
+uint32_t DIARawSymbol::getCountLiveRanges() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_countLiveRanges);
+}
+
+void DIARawSymbol::getFrontEndVersion(VersionInfo &Version) const {
+  Version.Major = PrivateGetDIAValue(Symbol, &IDiaSymbol::get_frontEndMajor);
+  Version.Minor = PrivateGetDIAValue(Symbol, &IDiaSymbol::get_frontEndMinor);
+  Version.Build = PrivateGetDIAValue(Symbol, &IDiaSymbol::get_frontEndBuild);
+  Version.QFE = PrivateGetDIAValue(Symbol, &IDiaSymbol::get_frontEndQFE);
+}
+
+PDB_Lang DIARawSymbol::getLanguage() const {
+  return PrivateGetDIAValue<DWORD, PDB_Lang>(Symbol, &IDiaSymbol::get_language);
+}
+
+uint32_t DIARawSymbol::getLexicalParentId() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_lexicalParentId);
+}
+
+std::string DIARawSymbol::getLibraryName() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_libraryName);
+}
+
+uint32_t DIARawSymbol::getLiveRangeStartAddressOffset() const {
+  return PrivateGetDIAValue(Symbol,
+                            &IDiaSymbol::get_liveRangeStartAddressOffset);
+}
+
+uint32_t DIARawSymbol::getLiveRangeStartAddressSection() const {
+  return PrivateGetDIAValue(Symbol,
+                            &IDiaSymbol::get_liveRangeStartAddressSection);
+}
+
+uint32_t DIARawSymbol::getLiveRangeStartRelativeVirtualAddress() const {
+  return PrivateGetDIAValue(
+      Symbol, &IDiaSymbol::get_liveRangeStartRelativeVirtualAddress);
+}
+
+PDB_RegisterId DIARawSymbol::getLocalBasePointerRegisterId() const {
+  return PrivateGetDIAValue<DWORD, PDB_RegisterId>(
+      Symbol, &IDiaSymbol::get_localBasePointerRegisterId);
+}
+
+uint32_t DIARawSymbol::getLowerBoundId() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_lowerBoundId);
+}
+
+uint32_t DIARawSymbol::getMemorySpaceKind() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_memorySpaceKind);
+}
+
+std::string DIARawSymbol::getName() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_name);
+}
+
+uint32_t DIARawSymbol::getNumberOfAcceleratorPointerTags() const {
+  return PrivateGetDIAValue(Symbol,
+                            &IDiaSymbol::get_numberOfAcceleratorPointerTags);
+}
+
+uint32_t DIARawSymbol::getNumberOfColumns() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_numberOfColumns);
+}
+
+uint32_t DIARawSymbol::getNumberOfModifiers() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_numberOfModifiers);
+}
+
+uint32_t DIARawSymbol::getNumberOfRegisterIndices() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_numberOfRegisterIndices);
+}
+
+uint32_t DIARawSymbol::getNumberOfRows() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_numberOfRows);
+}
+
+std::string DIARawSymbol::getObjectFileName() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_objectFileName);
+}
+
+uint32_t DIARawSymbol::getOemId() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_oemId);
+}
+
+uint32_t DIARawSymbol::getOemSymbolId() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_oemSymbolId);
+}
+
+uint32_t DIARawSymbol::getOffsetInUdt() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_offsetInUdt);
+}
+
+PDB_Cpu DIARawSymbol::getPlatform() const {
+  return PrivateGetDIAValue<DWORD, PDB_Cpu>(Symbol, &IDiaSymbol::get_platform);
+}
+
+uint32_t DIARawSymbol::getRank() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_rank);
+}
+
+PDB_RegisterId DIARawSymbol::getRegisterId() const {
+  return PrivateGetDIAValue<DWORD, PDB_RegisterId>(Symbol,
+                                                   &IDiaSymbol::get_registerId);
+}
+
+uint32_t DIARawSymbol::getRegisterType() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_registerType);
+}
+
+uint32_t DIARawSymbol::getRelativeVirtualAddress() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_relativeVirtualAddress);
+}
+
+uint32_t DIARawSymbol::getSamplerSlot() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_samplerSlot);
+}
+
+uint32_t DIARawSymbol::getSignature() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_signature);
+}
+
+uint32_t DIARawSymbol::getSizeInUdt() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_sizeInUdt);
+}
+
+uint32_t DIARawSymbol::getSlot() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_slot);
+}
+
+std::string DIARawSymbol::getSourceFileName() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_sourceFileName);
+}
+
+uint32_t DIARawSymbol::getStride() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_stride);
+}
+
+uint32_t DIARawSymbol::getSubTypeId() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_subTypeId);
+}
+
+std::string DIARawSymbol::getSymbolsFileName() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_symbolsFileName);
+}
+
+uint32_t DIARawSymbol::getSymIndexId() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_symIndexId);
+}
+
+uint32_t DIARawSymbol::getTargetOffset() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_targetOffset);
+}
+
+uint32_t DIARawSymbol::getTargetRelativeVirtualAddress() const {
+  return PrivateGetDIAValue(Symbol,
+                            &IDiaSymbol::get_targetRelativeVirtualAddress);
+}
+
+uint64_t DIARawSymbol::getTargetVirtualAddress() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_targetVirtualAddress);
+}
+
+uint32_t DIARawSymbol::getTargetSection() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_targetSection);
+}
+
+uint32_t DIARawSymbol::getTextureSlot() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_textureSlot);
+}
+
+uint32_t DIARawSymbol::getTimeStamp() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_timeStamp);
+}
+
+uint32_t DIARawSymbol::getToken() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_token);
+}
+
+uint32_t DIARawSymbol::getTypeId() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_typeId);
+}
+
+uint32_t DIARawSymbol::getUavSlot() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_uavSlot);
+}
+
+std::string DIARawSymbol::getUndecoratedName() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_undecoratedName);
+}
+
+uint32_t DIARawSymbol::getUnmodifiedTypeId() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_unmodifiedTypeId);
+}
+
+uint32_t DIARawSymbol::getUpperBoundId() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_upperBoundId);
+}
+
+Variant DIARawSymbol::getValue() const {
+  VARIANT Value;
+  Value.vt = VT_EMPTY;
+  if (S_OK != Symbol->get_value(&Value))
+    return Variant();
+
+  return VariantFromVARIANT(Value);
+}
+
+uint32_t DIARawSymbol::getVirtualBaseDispIndex() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualBaseDispIndex);
+}
+
+uint32_t DIARawSymbol::getVirtualBaseOffset() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualBaseOffset);
+}
+
+uint32_t DIARawSymbol::getVirtualTableShapeId() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualTableShapeId);
+}
+
+PDB_DataKind DIARawSymbol::getDataKind() const {
+  return PrivateGetDIAValue<DWORD, PDB_DataKind>(Symbol,
+                                                 &IDiaSymbol::get_dataKind);
+}
+
+PDB_SymType DIARawSymbol::getSymTag() const {
+  return PrivateGetDIAValue<DWORD, PDB_SymType>(Symbol,
+                                                &IDiaSymbol::get_symTag);
+}
+
+PDB_UniqueId DIARawSymbol::getGuid() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_guid);
+}
+
+int32_t DIARawSymbol::getOffset() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_offset);
+}
+
+int32_t DIARawSymbol::getThisAdjust() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_thisAdjust);
+}
+
+int32_t DIARawSymbol::getVirtualBasePointerOffset() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualBasePointerOffset);
+}
+
+PDB_LocType DIARawSymbol::getLocationType() const {
+  return PrivateGetDIAValue<DWORD, PDB_LocType>(Symbol,
+                                                &IDiaSymbol::get_locationType);
+}
+
+PDB_Machine DIARawSymbol::getMachineType() const {
+  return PrivateGetDIAValue<DWORD, PDB_Machine>(Symbol,
+                                                &IDiaSymbol::get_machineType);
+}
+
+PDB_ThunkOrdinal DIARawSymbol::getThunkOrdinal() const {
+  return PrivateGetDIAValue<DWORD, PDB_ThunkOrdinal>(
+      Symbol, &IDiaSymbol::get_thunkOrdinal);
+}
+
+uint64_t DIARawSymbol::getLength() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_length);
+}
+
+uint64_t DIARawSymbol::getLiveRangeLength() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_liveRangeLength);
+}
+
+uint64_t DIARawSymbol::getVirtualAddress() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualAddress);
+}
+
+PDB_UdtType DIARawSymbol::getUdtKind() const {
+  return PrivateGetDIAValue<DWORD, PDB_UdtType>(Symbol,
+                                                &IDiaSymbol::get_udtKind);
+}
+
+bool DIARawSymbol::hasConstructor() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_constructor);
+}
+
+bool DIARawSymbol::hasCustomCallingConvention() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_customCallingConvention);
+}
+
+bool DIARawSymbol::hasFarReturn() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_farReturn);
+}
+
+bool DIARawSymbol::isCode() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_code);
+}
+
+bool DIARawSymbol::isCompilerGenerated() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_compilerGenerated);
+}
+
+bool DIARawSymbol::isConstType() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_constType);
+}
+
+bool DIARawSymbol::isEditAndContinueEnabled() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_editAndContinueEnabled);
+}
+
+bool DIARawSymbol::isFunction() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_function);
+}
+
+bool DIARawSymbol::getAddressTaken() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_addressTaken);
+}
+
+bool DIARawSymbol::getNoStackOrdering() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_noStackOrdering);
+}
+
+bool DIARawSymbol::hasAlloca() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_hasAlloca);
+}
+
+bool DIARawSymbol::hasAssignmentOperator() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_hasAssignmentOperator);
+}
+
+bool DIARawSymbol::hasCTypes() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isCTypes);
+}
+
+bool DIARawSymbol::hasCastOperator() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_hasCastOperator);
+}
+
+bool DIARawSymbol::hasDebugInfo() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_hasDebugInfo);
+}
+
+bool DIARawSymbol::hasEH() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_hasEH);
+}
+
+bool DIARawSymbol::hasEHa() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_hasEHa);
+}
+
+bool DIARawSymbol::hasInlAsm() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_hasInlAsm);
+}
+
+bool DIARawSymbol::hasInlineAttribute() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_inlSpec);
+}
+
+bool DIARawSymbol::hasInterruptReturn() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_interruptReturn);
+}
+
+bool DIARawSymbol::hasFramePointer() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_framePointerPresent);
+}
+
+bool DIARawSymbol::hasLongJump() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_hasLongJump);
+}
+
+bool DIARawSymbol::hasManagedCode() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_hasManagedCode);
+}
+
+bool DIARawSymbol::hasNestedTypes() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_hasNestedTypes);
+}
+
+bool DIARawSymbol::hasNoInlineAttribute() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_noInline);
+}
+
+bool DIARawSymbol::hasNoReturnAttribute() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_noReturn);
+}
+
+bool DIARawSymbol::hasOptimizedCodeDebugInfo() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_optimizedCodeDebugInfo);
+}
+
+bool DIARawSymbol::hasOverloadedOperator() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_overloadedOperator);
+}
+
+bool DIARawSymbol::hasSEH() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_hasSEH);
+}
+
+bool DIARawSymbol::hasSecurityChecks() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_hasSecurityChecks);
+}
+
+bool DIARawSymbol::hasSetJump() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_hasSetJump);
+}
+
+bool DIARawSymbol::hasStrictGSCheck() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_strictGSCheck);
+}
+
+bool DIARawSymbol::isAcceleratorGroupSharedLocal() const {
+  return PrivateGetDIAValue(Symbol,
+                            &IDiaSymbol::get_isAcceleratorGroupSharedLocal);
+}
+
+bool DIARawSymbol::isAcceleratorPointerTagLiveRange() const {
+  return PrivateGetDIAValue(Symbol,
+                            &IDiaSymbol::get_isAcceleratorPointerTagLiveRange);
+}
+
+bool DIARawSymbol::isAcceleratorStubFunction() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isAcceleratorStubFunction);
+}
+
+bool DIARawSymbol::isAggregated() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isAggregated);
+}
+
+bool DIARawSymbol::isIntroVirtualFunction() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_intro);
+}
+
+bool DIARawSymbol::isCVTCIL() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isCVTCIL);
+}
+
+bool DIARawSymbol::isConstructorVirtualBase() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isConstructorVirtualBase);
+}
+
+bool DIARawSymbol::isCxxReturnUdt() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isCxxReturnUdt);
+}
+
+bool DIARawSymbol::isDataAligned() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isDataAligned);
+}
+
+bool DIARawSymbol::isHLSLData() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isHLSLData);
+}
+
+bool DIARawSymbol::isHotpatchable() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isHotpatchable);
+}
+
+bool DIARawSymbol::isIndirectVirtualBaseClass() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_indirectVirtualBaseClass);
+}
+
+bool DIARawSymbol::isInterfaceUdt() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isInterfaceUdt);
+}
+
+bool DIARawSymbol::isIntrinsic() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_intrinsic);
+}
+
+bool DIARawSymbol::isLTCG() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isLTCG);
+}
+
+bool DIARawSymbol::isLocationControlFlowDependent() const {
+  return PrivateGetDIAValue(Symbol,
+                            &IDiaSymbol::get_isLocationControlFlowDependent);
+}
+
+bool DIARawSymbol::isMSILNetmodule() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isMSILNetmodule);
+}
+
+bool DIARawSymbol::isMatrixRowMajor() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isMatrixRowMajor);
+}
+
+bool DIARawSymbol::isManagedCode() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_managed);
+}
+
+bool DIARawSymbol::isMSILCode() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_msil);
+}
+
+bool DIARawSymbol::isMultipleInheritance() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isMultipleInheritance);
+}
+
+bool DIARawSymbol::isNaked() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isNaked);
+}
+
+bool DIARawSymbol::isNested() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_nested);
+}
+
+bool DIARawSymbol::isOptimizedAway() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isOptimizedAway);
+}
+
+bool DIARawSymbol::isPacked() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_packed);
+}
+
+bool DIARawSymbol::isPointerBasedOnSymbolValue() const {
+  return PrivateGetDIAValue(Symbol,
+                            &IDiaSymbol::get_isPointerBasedOnSymbolValue);
+}
+
+bool DIARawSymbol::isPointerToDataMember() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isPointerToDataMember);
+}
+
+bool DIARawSymbol::isPointerToMemberFunction() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isPointerToMemberFunction);
+}
+
+bool DIARawSymbol::isPureVirtual() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_pure);
+}
+
+bool DIARawSymbol::isRValueReference() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_RValueReference);
+}
+
+bool DIARawSymbol::isRefUdt() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isRefUdt);
+}
+
+bool DIARawSymbol::isReference() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_reference);
+}
+
+bool DIARawSymbol::isRestrictedType() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_restrictedType);
+}
+
+bool DIARawSymbol::isReturnValue() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isReturnValue);
+}
+
+bool DIARawSymbol::isSafeBuffers() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isSafeBuffers);
+}
+
+bool DIARawSymbol::isScoped() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_scoped);
+}
+
+bool DIARawSymbol::isSdl() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isSdl);
+}
+
+bool DIARawSymbol::isSingleInheritance() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isSingleInheritance);
+}
+
+bool DIARawSymbol::isSplitted() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isSplitted);
+}
+
+bool DIARawSymbol::isStatic() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isStatic);
+}
+
+bool DIARawSymbol::hasPrivateSymbols() const {
+  // hasPrivateSymbols is the opposite of isStripped, but we expose
+  // hasPrivateSymbols as a more intuitive interface.
+  return !PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isStripped);
+}
+
+bool DIARawSymbol::isUnalignedType() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_unalignedType);
+}
+
+bool DIARawSymbol::isUnreached() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_notReached);
+}
+
+bool DIARawSymbol::isValueUdt() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isValueUdt);
+}
+
+bool DIARawSymbol::isVirtual() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtual);
+}
+
+bool DIARawSymbol::isVirtualBaseClass() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualBaseClass);
+}
+
+bool DIARawSymbol::isVirtualInheritance() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_isVirtualInheritance);
+}
+
+bool DIARawSymbol::isVolatileType() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_volatileType);
+}
+
+bool DIARawSymbol::wasInlined() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_wasInlined);
+}
+
+std::string DIARawSymbol::getUnused() const {
+  return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_unused);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp
new file mode 100644
index 0000000..99fe750
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp
@@ -0,0 +1,205 @@
+//===- DIASession.cpp - DIA implementation of IPDBSession -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h"
+#include "llvm/DebugInfo/PDB/DIA/DIARawSymbol.h"
+#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+#include "llvm/DebugInfo/PDB/DIA/DIASourceFile.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
+#include "llvm/Support/ConvertUTF.h"
+
+using namespace llvm;
+
+namespace {}
+
+DIASession::DIASession(CComPtr<IDiaSession> DiaSession) : Session(DiaSession) {}
+
+PDB_ErrorCode DIASession::createFromPdb(StringRef Path,
+                                        std::unique_ptr<IPDBSession> &Session) {
+  CComPtr<IDiaDataSource> DiaDataSource;
+  CComPtr<IDiaSession> DiaSession;
+
+  // We assume that CoInitializeEx has already been called by the executable.
+  HRESULT Result = ::CoCreateInstance(
+      CLSID_DiaSource, nullptr, CLSCTX_INPROC_SERVER, IID_IDiaDataSource,
+      reinterpret_cast<LPVOID *>(&DiaDataSource));
+  if (FAILED(Result))
+    return PDB_ErrorCode::NoPdbImpl;
+
+  llvm::SmallVector<UTF16, 128> Path16;
+  if (!llvm::convertUTF8ToUTF16String(Path, Path16))
+    return PDB_ErrorCode::InvalidPath;
+
+  const wchar_t *Path16Str = reinterpret_cast<const wchar_t*>(Path16.data());
+  if (FAILED(Result = DiaDataSource->loadDataFromPdb(Path16Str))) {
+    if (Result == E_PDB_NOT_FOUND)
+      return PDB_ErrorCode::InvalidPath;
+    else if (Result == E_PDB_FORMAT)
+      return PDB_ErrorCode::InvalidFileFormat;
+    else if (Result == E_INVALIDARG)
+      return PDB_ErrorCode::InvalidParameter;
+    else if (Result == E_UNEXPECTED)
+      return PDB_ErrorCode::AlreadyLoaded;
+    else
+      return PDB_ErrorCode::UnknownError;
+  }
+
+  if (FAILED(Result = DiaDataSource->openSession(&DiaSession))) {
+    if (Result == E_OUTOFMEMORY)
+      return PDB_ErrorCode::NoMemory;
+    else
+      return PDB_ErrorCode::UnknownError;
+  }
+
+  Session.reset(new DIASession(DiaSession));
+  return PDB_ErrorCode::Success;
+}
+
+PDB_ErrorCode DIASession::createFromExe(StringRef Path,
+                                        std::unique_ptr<IPDBSession> &Session) {
+  CComPtr<IDiaDataSource> DiaDataSource;
+  CComPtr<IDiaSession> DiaSession;
+
+  // We assume that CoInitializeEx has already been called by the executable.
+  HRESULT Result = ::CoCreateInstance(
+      CLSID_DiaSource, nullptr, CLSCTX_INPROC_SERVER, IID_IDiaDataSource,
+      reinterpret_cast<LPVOID *>(&DiaDataSource));
+  if (FAILED(Result))
+    return PDB_ErrorCode::NoPdbImpl;
+
+  llvm::SmallVector<UTF16, 128> Path16;
+  if (!llvm::convertUTF8ToUTF16String(Path, Path16))
+    return PDB_ErrorCode::InvalidPath;
+
+  const wchar_t *Path16Str = reinterpret_cast<const wchar_t *>(Path16.data());
+  if (FAILED(Result =
+                 DiaDataSource->loadDataForExe(Path16Str, nullptr, nullptr))) {
+    if (Result == E_PDB_NOT_FOUND)
+      return PDB_ErrorCode::InvalidPath;
+    else if (Result == E_PDB_FORMAT)
+      return PDB_ErrorCode::InvalidFileFormat;
+    else if (Result == E_PDB_INVALID_SIG || Result == E_PDB_INVALID_AGE)
+      return PDB_ErrorCode::DebugInfoMismatch;
+    else if (Result == E_INVALIDARG)
+      return PDB_ErrorCode::InvalidParameter;
+    else if (Result == E_UNEXPECTED)
+      return PDB_ErrorCode::AlreadyLoaded;
+    else
+      return PDB_ErrorCode::UnknownError;
+  }
+
+  if (FAILED(Result = DiaDataSource->openSession(&DiaSession))) {
+    if (Result == E_OUTOFMEMORY)
+      return PDB_ErrorCode::NoMemory;
+    else
+      return PDB_ErrorCode::UnknownError;
+  }
+
+  Session.reset(new DIASession(DiaSession));
+  return PDB_ErrorCode::Success;
+}
+
+uint64_t DIASession::getLoadAddress() const {
+  uint64_t LoadAddress;
+  bool success = (S_OK == Session->get_loadAddress(&LoadAddress));
+  return (success) ? LoadAddress : 0;
+}
+
+void DIASession::setLoadAddress(uint64_t Address) {
+  Session->put_loadAddress(Address);
+}
+
+std::unique_ptr<PDBSymbolExe> DIASession::getGlobalScope() const {
+  CComPtr<IDiaSymbol> GlobalScope;
+  if (S_OK != Session->get_globalScope(&GlobalScope))
+    return nullptr;
+
+  auto RawSymbol = llvm::make_unique<DIARawSymbol>(*this, GlobalScope);
+  auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol)));
+  std::unique_ptr<PDBSymbolExe> ExeSymbol(
+      static_cast<PDBSymbolExe *>(PdbSymbol.release()));
+  return ExeSymbol;
+}
+
+std::unique_ptr<PDBSymbol> DIASession::getSymbolById(uint32_t SymbolId) const {
+  CComPtr<IDiaSymbol> LocatedSymbol;
+  if (S_OK != Session->symbolById(SymbolId, &LocatedSymbol))
+    return nullptr;
+
+  auto RawSymbol = llvm::make_unique<DIARawSymbol>(*this, LocatedSymbol);
+  return PDBSymbol::create(*this, std::move(RawSymbol));
+}
+
+std::unique_ptr<PDBSymbol>
+DIASession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const {
+  enum SymTagEnum EnumVal = static_cast<enum SymTagEnum>(Type);
+
+  CComPtr<IDiaSymbol> Symbol;
+  if (S_OK != Session->findSymbolByVA(Address, EnumVal, &Symbol)) {
+    ULONGLONG LoadAddr = 0;
+    if (S_OK != Session->get_loadAddress(&LoadAddr))
+      return nullptr;
+    DWORD RVA = static_cast<DWORD>(Address - LoadAddr);
+    if (S_OK != Session->findSymbolByRVA(RVA, EnumVal, &Symbol))
+      return nullptr;
+  }
+  auto RawSymbol = llvm::make_unique<DIARawSymbol>(*this, Symbol);
+  return PDBSymbol::create(*this, std::move(RawSymbol));
+}
+
+std::unique_ptr<IPDBEnumLineNumbers>
+DIASession::findLineNumbersByAddress(uint64_t Address, uint32_t Length) const {
+  CComPtr<IDiaEnumLineNumbers> LineNumbers;
+  if (S_OK != Session->findLinesByVA(Address, Length, &LineNumbers))
+    return nullptr;
+
+  return llvm::make_unique<DIAEnumLineNumbers>(LineNumbers);
+}
+
+std::unique_ptr<IPDBEnumSourceFiles> DIASession::getAllSourceFiles() const {
+  CComPtr<IDiaEnumSourceFiles> Files;
+  if (S_OK != Session->findFile(nullptr, nullptr, nsNone, &Files))
+    return nullptr;
+
+  return llvm::make_unique<DIAEnumSourceFiles>(*this, Files);
+}
+
+std::unique_ptr<IPDBEnumSourceFiles> DIASession::getSourceFilesForCompiland(
+    const PDBSymbolCompiland &Compiland) const {
+  CComPtr<IDiaEnumSourceFiles> Files;
+
+  const DIARawSymbol &RawSymbol =
+      static_cast<const DIARawSymbol &>(Compiland.getRawSymbol());
+  if (S_OK !=
+      Session->findFile(RawSymbol.getDiaSymbol(), nullptr, nsNone, &Files))
+    return nullptr;
+
+  return llvm::make_unique<DIAEnumSourceFiles>(*this, Files);
+}
+
+std::unique_ptr<IPDBSourceFile>
+DIASession::getSourceFileById(uint32_t FileId) const {
+  CComPtr<IDiaSourceFile> LocatedFile;
+  if (S_OK != Session->findFileById(FileId, &LocatedFile))
+    return nullptr;
+
+  return llvm::make_unique<DIASourceFile>(*this, LocatedFile);
+}
+
+std::unique_ptr<IPDBEnumDataStreams> DIASession::getDebugStreams() const {
+  CComPtr<IDiaEnumDebugStreams> DiaEnumerator;
+  if (S_OK != Session->getEnumDebugStreams(&DiaEnumerator))
+    return nullptr;
+
+  return llvm::make_unique<DIAEnumDebugStreams>(DiaEnumerator);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
new file mode 100644
index 0000000..0a9c444
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
@@ -0,0 +1,67 @@
+//===- DIASourceFile.cpp - DIA implementation of IPDBSourceFile -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h"
+#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+#include "llvm/DebugInfo/PDB/DIA/DIASourceFile.h"
+#include "llvm/Support/ConvertUTF.h"
+
+using namespace llvm;
+
+DIASourceFile::DIASourceFile(const DIASession &PDBSession,
+                             CComPtr<IDiaSourceFile> DiaSourceFile)
+    : Session(PDBSession), SourceFile(DiaSourceFile) {}
+
+std::string DIASourceFile::getFileName() const {
+  CComBSTR FileName16;
+  HRESULT Result = SourceFile->get_fileName(&FileName16);
+  if (S_OK != Result)
+    return std::string();
+
+  std::string FileName8;
+  llvm::ArrayRef<char> FileNameBytes(reinterpret_cast<char *>(FileName16.m_str),
+                                     FileName16.ByteLength());
+  llvm::convertUTF16ToUTF8String(FileNameBytes, FileName8);
+  return FileName8;
+}
+
+uint32_t DIASourceFile::getUniqueId() const {
+  DWORD Id;
+  return (S_OK == SourceFile->get_uniqueId(&Id)) ? Id : 0;
+}
+
+std::string DIASourceFile::getChecksum() const {
+  DWORD ByteSize = 0;
+  HRESULT Result = SourceFile->get_checksum(0, &ByteSize, nullptr);
+  if (ByteSize == 0)
+    return std::string();
+  std::vector<BYTE> ChecksumBytes(ByteSize);
+  Result = SourceFile->get_checksum(ByteSize, &ByteSize, &ChecksumBytes[0]);
+  if (S_OK != Result)
+    return std::string();
+  return std::string(ChecksumBytes.begin(), ChecksumBytes.end());
+}
+
+PDB_Checksum DIASourceFile::getChecksumType() const {
+  DWORD Type;
+  HRESULT Result = SourceFile->get_checksumType(&Type);
+  if (S_OK != Result)
+    return PDB_Checksum::None;
+  return static_cast<PDB_Checksum>(Type);
+}
+
+std::unique_ptr<IPDBEnumSymbols> DIASourceFile::getCompilands() const {
+  CComPtr<IDiaEnumSymbols> DiaEnumerator;
+  HRESULT Result = SourceFile->get_compilands(&DiaEnumerator);
+  if (S_OK != Result)
+    return nullptr;
+
+  return std::unique_ptr<IPDBEnumSymbols>(
+      new DIAEnumSymbols(Session, DiaEnumerator));
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/IPDBSourceFile.cpp b/contrib/llvm/lib/DebugInfo/PDB/IPDBSourceFile.cpp
new file mode 100644
index 0000000..3abe59d
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/IPDBSourceFile.cpp
@@ -0,0 +1,32 @@
+//===- IPDBSourceFile.cpp - base interface for a PDB source file *- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
+
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+IPDBSourceFile::~IPDBSourceFile() {}
+
+void IPDBSourceFile::dump(raw_ostream &OS, int Indent) const {
+  OS.indent(Indent);
+  PDB_Checksum ChecksumType = getChecksumType();
+  OS << "[";
+  if (ChecksumType != PDB_Checksum::None) {
+    OS << ChecksumType << ": ";
+    std::string Checksum = getChecksum();
+    for (uint8_t c : Checksum)
+      OS << format_hex_no_prefix(c, 2, true);
+  } else
+    OS << "No checksum";
+  OS << "] " << getFileName() << "\n";
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
new file mode 100644
index 0000000..13201bb
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
@@ -0,0 +1,39 @@
+//===- PDB.cpp - base header file for creating a PDB reader -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDB.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Config/config.h"
+#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/PDB.h"
+
+#if HAVE_DIA_SDK
+#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+#endif
+
+using namespace llvm;
+
+PDB_ErrorCode llvm::loadDataForPDB(PDB_ReaderType Type, StringRef Path,
+                                   std::unique_ptr<IPDBSession> &Session) {
+  // Create the correct concrete instance type based on the value of Type.
+#if HAVE_DIA_SDK
+  return DIASession::createFromPdb(Path, Session);
+#endif
+  return PDB_ErrorCode::NoPdbImpl;
+}
+
+PDB_ErrorCode llvm::loadDataForEXE(PDB_ReaderType Type, StringRef Path,
+                                   std::unique_ptr<IPDBSession> &Session) {
+// Create the correct concrete instance type based on the value of Type.
+#if HAVE_DIA_SDK
+  return DIASession::createFromExe(Path, Session);
+#endif
+  return PDB_ErrorCode::NoPdbImpl;
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBContext.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBContext.cpp
new file mode 100644
index 0000000..83f27c7
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBContext.cpp
@@ -0,0 +1,133 @@
+//===-- PDBContext.cpp ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===/
+
+#include "llvm/DebugInfo/PDB/PDBContext.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
+#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
+#include "llvm/Object/COFF.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+PDBContext::PDBContext(const COFFObjectFile &Object,
+                       std::unique_ptr<IPDBSession> PDBSession,
+                       bool RelativeAddress)
+    : DIContext(CK_PDB), Session(std::move(PDBSession)) {
+  if (!RelativeAddress) {
+    uint64_t ImageBase = 0;
+    if (Object.is64()) {
+      const pe32plus_header *Header = nullptr;
+      Object.getPE32PlusHeader(Header);
+      if (Header)
+        ImageBase = Header->ImageBase;
+    } else {
+      const pe32_header *Header = nullptr;
+      Object.getPE32Header(Header);
+      if (Header)
+        ImageBase = static_cast<uint64_t>(Header->ImageBase);
+    }
+    Session->setLoadAddress(ImageBase);
+  }
+}
+
+void PDBContext::dump(raw_ostream &OS, DIDumpType DumpType) {}
+
+DILineInfo PDBContext::getLineInfoForAddress(uint64_t Address,
+                                             DILineInfoSpecifier Specifier) {
+  DILineInfo Result;
+  Result.FunctionName = getFunctionName(Address, Specifier.FNKind);
+
+  uint32_t Length = 1;
+  std::unique_ptr<PDBSymbol> Symbol =
+      Session->findSymbolByAddress(Address, PDB_SymType::None);
+  if (auto Func = dyn_cast_or_null<PDBSymbolFunc>(Symbol.get())) {
+    Length = Func->getLength();
+  } else if (auto Data = dyn_cast_or_null<PDBSymbolData>(Symbol.get())) {
+    Length = Data->getLength();
+  }
+
+  // If we couldn't find a symbol, then just assume 1 byte, so that we get
+  // only the line number of the first instruction.
+  auto LineNumbers = Session->findLineNumbersByAddress(Address, Length);
+  if (!LineNumbers || LineNumbers->getChildCount() == 0)
+    return Result;
+
+  auto LineInfo = LineNumbers->getNext();
+  assert(LineInfo);
+  auto SourceFile = Session->getSourceFileById(LineInfo->getSourceFileId());
+
+  if (SourceFile &&
+      Specifier.FLIKind != DILineInfoSpecifier::FileLineInfoKind::None)
+    Result.FileName = SourceFile->getFileName();
+  Result.Column = LineInfo->getColumnNumber();
+  Result.Line = LineInfo->getLineNumber();
+  return Result;
+}
+
+DILineInfoTable
+PDBContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
+                                       DILineInfoSpecifier Specifier) {
+  if (Size == 0)
+    return DILineInfoTable();
+
+  DILineInfoTable Table;
+  auto LineNumbers = Session->findLineNumbersByAddress(Address, Size);
+  if (!LineNumbers || LineNumbers->getChildCount() == 0)
+    return Table;
+
+  while (auto LineInfo = LineNumbers->getNext()) {
+    DILineInfo LineEntry =
+        getLineInfoForAddress(LineInfo->getVirtualAddress(), Specifier);
+    Table.push_back(std::make_pair(LineInfo->getVirtualAddress(), LineEntry));
+  }
+  return Table;
+}
+
+DIInliningInfo
+PDBContext::getInliningInfoForAddress(uint64_t Address,
+                                      DILineInfoSpecifier Specifier) {
+  DIInliningInfo InlineInfo;
+  DILineInfo Frame = getLineInfoForAddress(Address, Specifier);
+  InlineInfo.addFrame(Frame);
+  return InlineInfo;
+}
+
+std::string PDBContext::getFunctionName(uint64_t Address,
+                                        DINameKind NameKind) const {
+  if (NameKind == DINameKind::None)
+    return std::string();
+
+  if (NameKind == DINameKind::LinkageName) {
+    // It is not possible to get the mangled linkage name through a
+    // PDBSymbolFunc.  For that we have to specifically request a
+    // PDBSymbolPublicSymbol.
+    auto PublicSym =
+        Session->findSymbolByAddress(Address, PDB_SymType::PublicSymbol);
+    if (auto PS = dyn_cast_or_null<PDBSymbolPublicSymbol>(PublicSym.get()))
+      return PS->getName();
+  }
+
+  auto FuncSymbol =
+      Session->findSymbolByAddress(Address, PDB_SymType::Function);
+
+  // This could happen either if there was no public symbol (e.g. not
+  // external) or the user requested the short name.  In the former case,
+  // although they technically requested the linkage name, if the linkage
+  // name is not available we fallback to at least returning a non-empty
+  // string.
+  if (auto Func = dyn_cast_or_null<PDBSymbolFunc>(FuncSymbol.get()))
+      return Func->getName();
+
+  return std::string();
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
new file mode 100644
index 0000000..4b9437c
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -0,0 +1,339 @@
+//===- PDBExtras.cpp - helper functions and classes for PDBs -----*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
+
+#include "llvm/ADT/ArrayRef.h"
+
+using namespace llvm;
+
+#define CASE_OUTPUT_ENUM_CLASS_STR(Class, Value, Str, Stream)                  \
+  case Class::Value:                                                           \
+    Stream << Str;                                                             \
+    break;
+
+#define CASE_OUTPUT_ENUM_CLASS_NAME(Class, Value, Stream)                      \
+  CASE_OUTPUT_ENUM_CLASS_STR(Class, Value, #Value, Stream)
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_VariantType &Type) {
+  switch (Type) {
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_VariantType, Bool, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_VariantType, Single, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_VariantType, Double, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_VariantType, Int8, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_VariantType, Int16, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_VariantType, Int32, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_VariantType, Int64, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_VariantType, UInt8, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_VariantType, UInt16, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_VariantType, UInt32, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_VariantType, UInt64, OS)
+    default:
+      OS << "Unknown";
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_CallingConv &Conv) {
+  OS << "__";
+  switch (Conv) {
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, NearCdecl, "cdecl", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, FarCdecl, "cdecl", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, NearPascal, "pascal", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, FarPascal, "pascal", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, NearFastcall, "fastcall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, FarFastcall, "fastcall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, Skipped, "skippedcall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, NearStdcall, "stdcall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, FarStdcall, "stdcall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, NearSyscall, "syscall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, FarSyscall, "syscall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, Thiscall, "thiscall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, MipsCall, "mipscall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, Generic, "genericcall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, Alphacall, "alphacall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, Ppccall, "ppccall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, SuperHCall, "superhcall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, Armcall, "armcall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, AM33call, "am33call", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, Tricall, "tricall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, Sh5call, "sh5call", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, M32R, "m32rcall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, Clrcall, "clrcall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, Inline, "inlinecall", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_CallingConv, NearVectorcall, "vectorcall",
+                               OS)
+  default:
+    OS << "unknowncall";
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_DataKind &Data) {
+  switch (Data) {
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_DataKind, Unknown, "unknown", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_DataKind, Local, "local", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_DataKind, StaticLocal, "static local", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_DataKind, Param, "param", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_DataKind, ObjectPtr, "this ptr", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_DataKind, FileStatic, "static global", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_DataKind, Global, "global", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_DataKind, Member, "member", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_DataKind, StaticMember, "static member", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_DataKind, Constant, "const", OS)
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_RegisterId &Reg) {
+  switch (Reg) {
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, AL, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, CL, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, DL, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, BL, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, AH, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, CH, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, DH, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, BH, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, AX, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, CX, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, DX, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, BX, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, SP, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, BP, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, SI, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, DI, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, EAX, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, ECX, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, EDX, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, EBX, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, ESP, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, EBP, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, ESI, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, EDI, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, ES, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, CS, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, SS, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, DS, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, FS, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, GS, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, IP, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, RAX, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, RBX, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, RCX, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, RDX, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, RSI, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, RDI, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, RBP, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, RSP, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, R8, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, R9, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, R10, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, R11, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, R12, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, R13, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, R14, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_RegisterId, R15, OS)
+  default:
+    OS << static_cast<int>(Reg);
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_LocType &Loc) {
+  switch (Loc) {
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_LocType, Static, "static", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_LocType, TLS, "tls", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_LocType, RegRel, "regrel", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_LocType, ThisRel, "thisrel", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_LocType, Enregistered, "register", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_LocType, BitField, "bitfield", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_LocType, Slot, "slot", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_LocType, IlRel, "IL rel", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_LocType, MetaData, "metadata", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_LocType, Constant, "constant", OS)
+  default:
+    OS << "Unknown";
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_ThunkOrdinal &Thunk) {
+  switch (Thunk) {
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_ThunkOrdinal, BranchIsland, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_ThunkOrdinal, Pcode, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_ThunkOrdinal, Standard, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_ThunkOrdinal, ThisAdjustor, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_ThunkOrdinal, TrampIncremental, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_ThunkOrdinal, UnknownLoad, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_ThunkOrdinal, Vcall, OS)
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_Checksum &Checksum) {
+  switch (Checksum) {
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Checksum, None, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Checksum, MD5, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Checksum, SHA1, OS)
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_Lang &Lang) {
+  switch (Lang) {
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, C, OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_Lang, Cpp, "C++", OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Fortran, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Masm, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Pascal, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Basic, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Cobol, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Link, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Cvtres, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Cvtpgd, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, CSharp, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, VB, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, ILAsm, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Java, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, JScript, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, MSIL, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, HLSL, OS)
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_SymType &Tag) {
+  switch (Tag) {
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Exe, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Compiland, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, CompilandDetails, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, CompilandEnv, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Function, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Block, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Data, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Annotation, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Label, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, PublicSymbol, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, UDT, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Enum, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, FunctionSig, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, PointerType, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, ArrayType, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, BuiltinType, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Typedef, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, BaseClass, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Friend, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, FunctionArg, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, FuncDebugStart, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, FuncDebugEnd, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, UsingNamespace, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, VTableShape, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, VTable, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Custom, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Thunk, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, CustomType, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, ManagedType, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Dimension, OS)
+  default:
+    OS << "Unknown";
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_MemberAccess &Access) {
+  switch (Access) {
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_MemberAccess, Public, "public", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_MemberAccess, Protected, "protected", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_MemberAccess, Private, "private", OS)
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_UdtType &Type) {
+  switch (Type) {
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Class, "class", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Struct, "struct", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Interface, "interface", OS)
+    CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Union, "union", OS)
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_UniqueId &Id) {
+  static const char *Lookup = "0123456789ABCDEF";
+
+  static_assert(sizeof(PDB_UniqueId) == 16, "Expected 16-byte GUID");
+  ArrayRef<uint8_t> GuidBytes(reinterpret_cast<const uint8_t*>(&Id), 16);
+  OS << "{";
+  for (int i=0; i < 16;) {
+    uint8_t Byte = GuidBytes[i];
+    uint8_t HighNibble = (Byte >> 4) & 0xF;
+    uint8_t LowNibble = Byte & 0xF;
+    OS << Lookup[HighNibble] << Lookup[LowNibble];
+    ++i;
+    if (i>=4 && i<=10 && i%2==0)
+      OS << "-";
+  }
+  OS << "}";
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const Variant &Value) {
+  switch (Value.Type) {
+    case PDB_VariantType::Bool:
+      OS << (Value.Bool ? "true" : "false");
+      break;
+    case PDB_VariantType::Double:
+      OS << Value.Double;
+      break;
+    case PDB_VariantType::Int16:
+      OS << Value.Int16;
+      break;
+    case PDB_VariantType::Int32:
+      OS << Value.Int32;
+      break;
+    case PDB_VariantType::Int64:
+      OS << Value.Int64;
+      break;
+    case PDB_VariantType::Int8:
+      OS << static_cast<int>(Value.Int8);
+      break;
+    case PDB_VariantType::Single:
+      OS << Value.Single;
+      break;
+    case PDB_VariantType::UInt16:
+      OS << Value.Double;
+      break;
+    case PDB_VariantType::UInt32:
+      OS << Value.UInt32;
+      break;
+    case PDB_VariantType::UInt64:
+      OS << Value.UInt64;
+      break;
+    case PDB_VariantType::UInt8:
+      OS << static_cast<unsigned>(Value.UInt8);
+      break;
+    default:
+      OS << Value.Type;
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const VersionInfo &Version) {
+  OS << Version.Major << "." << Version.Minor << "." << Version.Build;
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const TagStats &Stats) {
+  for (auto Tag : Stats) {
+    OS << Tag.first << ":" << Tag.second << " ";
+  }
+  return OS;
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
new file mode 100644
index 0000000..7b6268d
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
@@ -0,0 +1,28 @@
+//===- PDBInterfaceAnchors.h - defines class anchor funcions ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Class anchors are necessary per the LLVM Coding style guide, to ensure that
+// the vtable is only generated in this object file, and not in every object
+// file that incldues the corresponding header.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/IPDBDataStream.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
+#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
+
+using namespace llvm;
+
+IPDBSession::~IPDBSession() {}
+
+IPDBDataStream::~IPDBDataStream() {}
+
+IPDBRawSymbol::~IPDBRawSymbol() {}
+
+IPDBLineNumber::~IPDBLineNumber() {}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymDumper.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymDumper.cpp
new file mode 100644
index 0000000..121e2d1
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymDumper.cpp
@@ -0,0 +1,146 @@
+//===- PDBSymDumper.cpp - ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+#define PDB_SYMDUMP_UNREACHABLE(Type)                                          \
+  if (RequireImpl)                                                             \
+    llvm_unreachable("Attempt to dump " #Type " with no dump implementation");
+
+PDBSymDumper::PDBSymDumper(bool ShouldRequireImpl)
+    : RequireImpl(ShouldRequireImpl) {}
+
+PDBSymDumper::~PDBSymDumper() {}
+
+void PDBSymDumper::dump(const PDBSymbolAnnotation &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolAnnotation)
+}
+
+void PDBSymDumper::dump(const PDBSymbolBlock &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolBlock)
+}
+
+void PDBSymDumper::dump(const PDBSymbolCompiland &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolCompiland)
+}
+
+void PDBSymDumper::dump(const PDBSymbolCompilandDetails &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolCompilandDetails)
+}
+
+void PDBSymDumper::dump(const PDBSymbolCompilandEnv &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolCompilandEnv)
+}
+
+void PDBSymDumper::dump(const PDBSymbolCustom &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolCustom)
+}
+
+void PDBSymDumper::dump(const PDBSymbolData &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolData)
+}
+
+void PDBSymDumper::dump(const PDBSymbolExe &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolExe)
+}
+
+void PDBSymDumper::dump(const PDBSymbolFunc &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolFunc)
+}
+
+void PDBSymDumper::dump(const PDBSymbolFuncDebugEnd &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolFuncDebugEnd)
+}
+
+void PDBSymDumper::dump(const PDBSymbolFuncDebugStart &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolFuncDebugStart)
+}
+
+void PDBSymDumper::dump(const PDBSymbolLabel &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolLabel)
+}
+
+void PDBSymDumper::dump(const PDBSymbolPublicSymbol &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolPublicSymbol)
+}
+
+void PDBSymDumper::dump(const PDBSymbolThunk &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolThunk)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeArray &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeArray)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeBaseClass &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeBaseClass)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeBuiltin &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeBuiltin)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeCustom &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeCustom)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeDimension &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeDimension)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeEnum &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeEnum)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeFriend &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeFriend)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeFunctionArg &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeFunctionArg)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeFunctionSig &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeFunctionSig)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeManaged &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeManaged)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypePointer &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypePointer)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeTypedef &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeTypedef)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeUDT &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeUDT)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeVTable &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeVTable)
+}
+
+void PDBSymDumper::dump(const PDBSymbolTypeVTableShape &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeVTableShape)
+}
+
+void PDBSymDumper::dump(const PDBSymbolUnknown &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolUnknown)
+}
+
+void PDBSymDumper::dump(const PDBSymbolUsingNamespace &Symbol) {
+  PDB_SYMDUMP_UNREACHABLE(PDBSymbolUsingNamespace)
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
new file mode 100644
index 0000000..f9aaf3a
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
@@ -0,0 +1,151 @@
+//===- PDBSymbol.cpp - base class for user-facing symbol types --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolAnnotation.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolBlock.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolCustom.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolLabel.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolThunk.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeArray.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolUnknown.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include <memory>
+#include <utility>
+
+#include <memory>
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbol::PDBSymbol(const IPDBSession &PDBSession,
+                     std::unique_ptr<IPDBRawSymbol> Symbol)
+    : Session(PDBSession), RawSymbol(std::move(Symbol)) {}
+
+PDBSymbol::~PDBSymbol() {}
+
+#define FACTORY_SYMTAG_CASE(Tag, Type)                                         \
+  case PDB_SymType::Tag:                                                       \
+    return std::unique_ptr<PDBSymbol>(new Type(PDBSession, std::move(Symbol)));
+
+std::unique_ptr<PDBSymbol>
+PDBSymbol::create(const IPDBSession &PDBSession,
+                  std::unique_ptr<IPDBRawSymbol> Symbol) {
+  switch (Symbol->getSymTag()) {
+    FACTORY_SYMTAG_CASE(Exe, PDBSymbolExe)
+    FACTORY_SYMTAG_CASE(Compiland, PDBSymbolCompiland)
+    FACTORY_SYMTAG_CASE(CompilandDetails, PDBSymbolCompilandDetails)
+    FACTORY_SYMTAG_CASE(CompilandEnv, PDBSymbolCompilandEnv)
+    FACTORY_SYMTAG_CASE(Function, PDBSymbolFunc)
+    FACTORY_SYMTAG_CASE(Block, PDBSymbolBlock)
+    FACTORY_SYMTAG_CASE(Data, PDBSymbolData)
+    FACTORY_SYMTAG_CASE(Annotation, PDBSymbolAnnotation)
+    FACTORY_SYMTAG_CASE(Label, PDBSymbolLabel)
+    FACTORY_SYMTAG_CASE(PublicSymbol, PDBSymbolPublicSymbol)
+    FACTORY_SYMTAG_CASE(UDT, PDBSymbolTypeUDT)
+    FACTORY_SYMTAG_CASE(Enum, PDBSymbolTypeEnum)
+    FACTORY_SYMTAG_CASE(FunctionSig, PDBSymbolTypeFunctionSig)
+    FACTORY_SYMTAG_CASE(PointerType, PDBSymbolTypePointer)
+    FACTORY_SYMTAG_CASE(ArrayType, PDBSymbolTypeArray)
+    FACTORY_SYMTAG_CASE(BuiltinType, PDBSymbolTypeBuiltin)
+    FACTORY_SYMTAG_CASE(Typedef, PDBSymbolTypeTypedef)
+    FACTORY_SYMTAG_CASE(BaseClass, PDBSymbolTypeBaseClass)
+    FACTORY_SYMTAG_CASE(Friend, PDBSymbolTypeFriend)
+    FACTORY_SYMTAG_CASE(FunctionArg, PDBSymbolTypeFunctionArg)
+    FACTORY_SYMTAG_CASE(FuncDebugStart, PDBSymbolFuncDebugStart)
+    FACTORY_SYMTAG_CASE(FuncDebugEnd, PDBSymbolFuncDebugEnd)
+    FACTORY_SYMTAG_CASE(UsingNamespace, PDBSymbolUsingNamespace)
+    FACTORY_SYMTAG_CASE(VTableShape, PDBSymbolTypeVTableShape)
+    FACTORY_SYMTAG_CASE(VTable, PDBSymbolTypeVTable)
+    FACTORY_SYMTAG_CASE(Custom, PDBSymbolCustom)
+    FACTORY_SYMTAG_CASE(Thunk, PDBSymbolThunk)
+    FACTORY_SYMTAG_CASE(CustomType, PDBSymbolTypeCustom)
+    FACTORY_SYMTAG_CASE(ManagedType, PDBSymbolTypeManaged)
+    FACTORY_SYMTAG_CASE(Dimension, PDBSymbolTypeDimension)
+  default:
+    return std::unique_ptr<PDBSymbol>(
+        new PDBSymbolUnknown(PDBSession, std::move(Symbol)));
+  }
+}
+
+#define TRY_DUMP_TYPE(Type)                                                    \
+  if (const Type *DerivedThis = dyn_cast<Type>(this))                          \
+    Dumper.dump(OS, Indent, *DerivedThis);
+
+#define ELSE_TRY_DUMP_TYPE(Type, Dumper) else TRY_DUMP_TYPE(Type, Dumper)
+
+void PDBSymbol::defaultDump(raw_ostream &OS, int Indent) const {
+  RawSymbol->dump(OS, Indent);
+}
+
+PDB_SymType PDBSymbol::getSymTag() const { return RawSymbol->getSymTag(); }
+
+std::unique_ptr<IPDBEnumSymbols> PDBSymbol::findAllChildren() const {
+  return findAllChildren(PDB_SymType::None);
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+PDBSymbol::findAllChildren(PDB_SymType Type) const {
+  return RawSymbol->findChildren(Type);
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+PDBSymbol::findChildren(PDB_SymType Type, StringRef Name,
+                        PDB_NameSearchFlags Flags) const {
+  return RawSymbol->findChildren(Type, Name, Flags);
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+PDBSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name,
+                             PDB_NameSearchFlags Flags, uint32_t RVA) const {
+  return RawSymbol->findChildrenByRVA(Type, Name, Flags, RVA);
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+PDBSymbol::findInlineFramesByRVA(uint32_t RVA) const {
+  return RawSymbol->findInlineFramesByRVA(RVA);
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+PDBSymbol::getChildStats(TagStats &Stats) const {
+  std::unique_ptr<IPDBEnumSymbols> Result(findAllChildren());
+  Stats.clear();
+  while (auto Child = Result->getNext()) {
+    ++Stats[Child->getSymTag()];
+  }
+  Result->reset();
+  return Result;
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
new file mode 100644
index 0000000..a782cad
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
@@ -0,0 +1,24 @@
+//===- PDBSymbolAnnotation.cpp - --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolAnnotation.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolAnnotation::PDBSymbolAnnotation(const IPDBSession &PDBSession,
+                                         std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolAnnotation::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
new file mode 100644
index 0000000..46b0ea5
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
@@ -0,0 +1,23 @@
+//===- PDBSymbolBlock.cpp - -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolBlock.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolBlock::PDBSymbolBlock(const IPDBSession &PDBSession,
+                               std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolBlock::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
new file mode 100644
index 0000000..7436914
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
@@ -0,0 +1,24 @@
+//===- PDBSymbolCompiland.cpp - compiland details --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolCompiland::PDBSymbolCompiland(const IPDBSession &PDBSession,
+                                       std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolCompiland::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
new file mode 100644
index 0000000..7b351a0
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
@@ -0,0 +1,25 @@
+//===- PDBSymbolCompilandDetails.cpp - compiland details --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolCompilandDetails::PDBSymbolCompilandDetails(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolCompilandDetails::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
new file mode 100644
index 0000000..e863ccf
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
@@ -0,0 +1,31 @@
+//===- PDBSymbolCompilandEnv.cpp - compiland env variables ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h"
+
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolCompilandEnv::PDBSymbolCompilandEnv(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+std::string PDBSymbolCompilandEnv::getValue() const {
+  // call RawSymbol->getValue() and convert the result to an std::string.
+  return std::string();
+}
+
+void PDBSymbolCompilandEnv::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
new file mode 100644
index 0000000..bd7d9cd
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
@@ -0,0 +1,28 @@
+//===- PDBSymbolCustom.cpp - compiler-specific types ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolCustom.h"
+
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolCustom::PDBSymbolCustom(const IPDBSession &PDBSession,
+                                 std::unique_ptr<IPDBRawSymbol> CustomSymbol)
+    : PDBSymbol(PDBSession, std::move(CustomSymbol)) {}
+
+void PDBSymbolCustom::getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes) {
+  RawSymbol->getDataBytes(bytes);
+}
+
+void PDBSymbolCustom::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp
new file mode 100644
index 0000000..a948c2d
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp
@@ -0,0 +1,27 @@
+//===- PDBSymbolData.cpp - PDB data (e.g. variable) accessors ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
+
+#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolData::PDBSymbolData(const IPDBSession &PDBSession,
+                             std::unique_ptr<IPDBRawSymbol> DataSymbol)
+    : PDBSymbol(PDBSession, std::move(DataSymbol)) {}
+
+std::unique_ptr<PDBSymbol> PDBSymbolData::getType() const {
+  return Session.getSymbolById(getTypeId());
+}
+
+void PDBSymbolData::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp
new file mode 100644
index 0000000..c9e34ea
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp
@@ -0,0 +1,22 @@
+//===- PDBSymbolExe.cpp - ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolExe::PDBSymbolExe(const IPDBSession &PDBSession,
+                           std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolExe::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
new file mode 100644
index 0000000..0aff327
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -0,0 +1,102 @@
+//===- PDBSymbolFunc.cpp - --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
+
+#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
+
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+class FunctionArgEnumerator : public IPDBEnumChildren<PDBSymbolData> {
+public:
+  typedef ConcreteSymbolEnumerator<PDBSymbolData> ArgEnumeratorType;
+
+  FunctionArgEnumerator(const IPDBSession &PDBSession,
+                        const PDBSymbolFunc &PDBFunc)
+      : Session(PDBSession), Func(PDBFunc) {
+    // Arguments can appear multiple times if they have live range
+    // information, so we only take the first occurrence.
+    std::unordered_set<std::string> SeenNames;
+    auto DataChildren = Func.findAllChildren<PDBSymbolData>();
+    while (auto Child = DataChildren->getNext()) {
+      if (Child->getDataKind() == PDB_DataKind::Param) {
+        std::string Name = Child->getName();
+        if (SeenNames.find(Name) != SeenNames.end())
+          continue;
+        Args.push_back(std::move(Child));
+        SeenNames.insert(Name);
+      }
+    }
+    reset();
+  }
+
+  uint32_t getChildCount() const override { return Args.size(); }
+
+  std::unique_ptr<PDBSymbolData>
+  getChildAtIndex(uint32_t Index) const override {
+    if (Index >= Args.size())
+      return nullptr;
+
+    return Session.getConcreteSymbolById<PDBSymbolData>(
+        Args[Index]->getSymIndexId());
+  }
+
+  std::unique_ptr<PDBSymbolData> getNext() override {
+    if (CurIter == Args.end())
+      return nullptr;
+    const auto &Result = **CurIter;
+    ++CurIter;
+    return Session.getConcreteSymbolById<PDBSymbolData>(Result.getSymIndexId());
+  }
+
+  void reset() override { CurIter = Args.empty() ? Args.end() : Args.begin(); }
+
+  FunctionArgEnumerator *clone() const override {
+    return new FunctionArgEnumerator(Session, Func);
+  }
+
+private:
+  typedef std::vector<std::unique_ptr<PDBSymbolData>> ArgListType;
+  const IPDBSession &Session;
+  const PDBSymbolFunc &Func;
+  ArgListType Args;
+  ArgListType::const_iterator CurIter;
+};
+}
+
+PDBSymbolFunc::PDBSymbolFunc(const IPDBSession &PDBSession,
+                             std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+std::unique_ptr<PDBSymbolTypeFunctionSig> PDBSymbolFunc::getSignature() const {
+  return Session.getConcreteSymbolById<PDBSymbolTypeFunctionSig>(getTypeId());
+}
+
+std::unique_ptr<IPDBEnumChildren<PDBSymbolData>>
+PDBSymbolFunc::getArguments() const {
+  return llvm::make_unique<FunctionArgEnumerator>(Session, *this);
+}
+
+std::unique_ptr<PDBSymbolTypeUDT> PDBSymbolFunc::getClassParent() const {
+  return Session.getConcreteSymbolById<PDBSymbolTypeUDT>(getClassParentId());
+}
+
+void PDBSymbolFunc::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
new file mode 100644
index 0000000..8e559b3
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
@@ -0,0 +1,25 @@
+//===- PDBSymbolFuncDebugEnd.cpp - ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolFuncDebugEnd::PDBSymbolFuncDebugEnd(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolFuncDebugEnd::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
new file mode 100644
index 0000000..ff4254f
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
@@ -0,0 +1,25 @@
+//===- PDBSymbolFuncDebugStart.cpp - ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolFuncDebugStart::PDBSymbolFuncDebugStart(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolFuncDebugStart::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
new file mode 100644
index 0000000..f39dee8
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
@@ -0,0 +1,22 @@
+//===- PDBSymbolLabel.cpp - -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolLabel.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolLabel::PDBSymbolLabel(const IPDBSession &PDBSession,
+                               std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolLabel::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
new file mode 100644
index 0000000..bd6fe89
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
@@ -0,0 +1,25 @@
+//===- PDBSymbolPublicSymbol.cpp - ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolPublicSymbol::PDBSymbolPublicSymbol(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolPublicSymbol::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
new file mode 100644
index 0000000..733eb5f
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
@@ -0,0 +1,22 @@
+//===- PDBSymbolThunk.cpp - -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolThunk.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolThunk::PDBSymbolThunk(const IPDBSession &PDBSession,
+                               std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolThunk::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
new file mode 100644
index 0000000..1980965
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
@@ -0,0 +1,29 @@
+//===- PDBSymbolTypeArray.cpp - ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeArray.h"
+
+#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypeArray::PDBSymbolTypeArray(const IPDBSession &PDBSession,
+                                       std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+std::unique_ptr<PDBSymbol> PDBSymbolTypeArray::getElementType() const {
+  return Session.getSymbolById(getTypeId());
+}
+
+void PDBSymbolTypeArray::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
new file mode 100644
index 0000000..c1f0d2f
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
@@ -0,0 +1,25 @@
+//===- PDBSymbolTypeBaseClass.cpp - -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypeBaseClass::PDBSymbolTypeBaseClass(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolTypeBaseClass::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
new file mode 100644
index 0000000..b302b66
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
@@ -0,0 +1,24 @@
+//===- PDBSymbolTypeBuiltin.cpp - ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypeBuiltin::PDBSymbolTypeBuiltin(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolTypeBuiltin::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
new file mode 100644
index 0000000..cc391f1
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
@@ -0,0 +1,25 @@
+//===- PDBSymbolTypeCustom.cpp - --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypeCustom::PDBSymbolTypeCustom(const IPDBSession &PDBSession,
+                                         std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolTypeCustom::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
new file mode 100644
index 0000000..1e19d0b
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
@@ -0,0 +1,26 @@
+//===- PDBSymbolTypeDimension.cpp - --------------------------------*- C++
+//-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypeDimension::PDBSymbolTypeDimension(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolTypeDimension::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
new file mode 100644
index 0000000..8dd26a3
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
@@ -0,0 +1,34 @@
+//===- PDBSymbolTypeEnum.cpp - --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+
+#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypeEnum::PDBSymbolTypeEnum(const IPDBSession &PDBSession,
+                                     std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+std::unique_ptr<PDBSymbolTypeUDT> PDBSymbolTypeEnum::getClassParent() const {
+  return Session.getConcreteSymbolById<PDBSymbolTypeUDT>(getClassParentId());
+}
+
+std::unique_ptr<PDBSymbolTypeBuiltin>
+PDBSymbolTypeEnum::getUnderlyingType() const {
+  return Session.getConcreteSymbolById<PDBSymbolTypeBuiltin>(getTypeId());
+}
+
+void PDBSymbolTypeEnum::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
new file mode 100644
index 0000000..d332660
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
@@ -0,0 +1,25 @@
+//===- PDBSymbolTypeFriend.cpp - --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypeFriend::PDBSymbolTypeFriend(const IPDBSession &PDBSession,
+                                         std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolTypeFriend::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
new file mode 100644
index 0000000..f8f71ea
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
@@ -0,0 +1,24 @@
+//===- PDBSymbolTypeFunctionArg.cpp - --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypeFunctionArg::PDBSymbolTypeFunctionArg(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolTypeFunctionArg::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
new file mode 100644
index 0000000..af3563f
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
@@ -0,0 +1,90 @@
+//===- PDBSymbolTypeFunctionSig.cpp - --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
+
+#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+namespace {
+class FunctionArgEnumerator : public IPDBEnumSymbols {
+public:
+  typedef ConcreteSymbolEnumerator<PDBSymbolTypeFunctionArg> ArgEnumeratorType;
+
+  FunctionArgEnumerator(const IPDBSession &PDBSession,
+                        const PDBSymbolTypeFunctionSig &Sig)
+      : Session(PDBSession),
+        Enumerator(Sig.findAllChildren<PDBSymbolTypeFunctionArg>()) {}
+
+  FunctionArgEnumerator(const IPDBSession &PDBSession,
+                        std::unique_ptr<ArgEnumeratorType> ArgEnumerator)
+      : Session(PDBSession), Enumerator(std::move(ArgEnumerator)) {}
+
+  uint32_t getChildCount() const override {
+    return Enumerator->getChildCount();
+  }
+
+  std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override {
+    auto FunctionArgSymbol = Enumerator->getChildAtIndex(Index);
+    if (!FunctionArgSymbol)
+      return nullptr;
+    return Session.getSymbolById(FunctionArgSymbol->getTypeId());
+  }
+
+  std::unique_ptr<PDBSymbol> getNext() override {
+    auto FunctionArgSymbol = Enumerator->getNext();
+    if (!FunctionArgSymbol)
+      return nullptr;
+    return Session.getSymbolById(FunctionArgSymbol->getTypeId());
+  }
+
+  void reset() override { Enumerator->reset(); }
+
+  MyType *clone() const override {
+    std::unique_ptr<ArgEnumeratorType> Clone(Enumerator->clone());
+    return new FunctionArgEnumerator(Session, std::move(Clone));
+  }
+
+private:
+  const IPDBSession &Session;
+  std::unique_ptr<ArgEnumeratorType> Enumerator;
+};
+}
+
+PDBSymbolTypeFunctionSig::PDBSymbolTypeFunctionSig(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+std::unique_ptr<PDBSymbol> PDBSymbolTypeFunctionSig::getReturnType() const {
+  return Session.getSymbolById(getTypeId());
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+PDBSymbolTypeFunctionSig::getArguments() const {
+  return llvm::make_unique<FunctionArgEnumerator>(Session, *this);
+}
+
+std::unique_ptr<PDBSymbol> PDBSymbolTypeFunctionSig::getClassParent() const {
+  uint32_t ClassId = getClassParentId();
+  if (ClassId == 0)
+    return nullptr;
+  return Session.getSymbolById(ClassId);
+}
+
+void PDBSymbolTypeFunctionSig::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
new file mode 100644
index 0000000..a7fac30
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
@@ -0,0 +1,25 @@
+//===- PDBSymboTypelManaged.cpp - ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypeManaged::PDBSymbolTypeManaged(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolTypeManaged::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
new file mode 100644
index 0000000..082ed83
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
@@ -0,0 +1,29 @@
+//===- PDBSymbolTypePointer.cpp -----------------------------------*- C++ -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
+
+#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypePointer::PDBSymbolTypePointer(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+std::unique_ptr<PDBSymbol> PDBSymbolTypePointer::getPointeeType() const {
+  return Session.getSymbolById(getTypeId());
+}
+
+void PDBSymbolTypePointer::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
new file mode 100644
index 0000000..5a42699
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
@@ -0,0 +1,24 @@
+//===- PDBSymbolTypeTypedef.cpp ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypeTypedef::PDBSymbolTypeTypedef(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolTypeTypedef::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
new file mode 100644
index 0000000..2b5da29
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
@@ -0,0 +1,22 @@
+//===- PDBSymbolTypeUDT.cpp - --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypeUDT::PDBSymbolTypeUDT(const IPDBSession &PDBSession,
+                                   std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolTypeUDT::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
new file mode 100644
index 0000000..b465d023c
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
@@ -0,0 +1,24 @@
+//===- PDBSymbolTypeVTable.cpp - --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypeVTable::PDBSymbolTypeVTable(const IPDBSession &PDBSession,
+                                         std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolTypeVTable::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
new file mode 100644
index 0000000..16052f1
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
@@ -0,0 +1,25 @@
+//===- PDBSymbolTypeVTableShape.cpp - ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolTypeVTableShape::PDBSymbolTypeVTableShape(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolTypeVTableShape::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
new file mode 100644
index 0000000..48dc115
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
@@ -0,0 +1,23 @@
+//===- PDBSymbolUnknown.cpp - -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolUnknown.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolUnknown::PDBSymbolUnknown(const IPDBSession &PDBSession,
+                                   std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolUnknown::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
new file mode 100644
index 0000000..6cf13de
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
@@ -0,0 +1,25 @@
+//===- PDBSymbolUsingNamespace.cpp - ------------------- --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h"
+
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+
+#include <utility>
+
+using namespace llvm;
+
+PDBSymbolUsingNamespace::PDBSymbolUsingNamespace(
+    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
+    : PDBSymbol(PDBSession, std::move(Symbol)) {}
+
+void PDBSymbolUsingNamespace::dump(PDBSymDumper &Dumper) const {
+  Dumper.dump(*this);
+}
diff --git a/contrib/llvm/lib/DebugInfo/module.modulemap b/contrib/llvm/lib/DebugInfo/module.modulemap
deleted file mode 100644
index 1fe5ab1..0000000
--- a/contrib/llvm/lib/DebugInfo/module.modulemap
+++ /dev/null
@@ -1 +0,0 @@
-module DebugInfo { requires cplusplus umbrella "." module * { export * } }
diff --git a/contrib/llvm/lib/ExecutionEngine/EventListenerCommon.h b/contrib/llvm/lib/ExecutionEngine/EventListenerCommon.h
deleted file mode 100644
index 6f17427..0000000
--- a/contrib/llvm/lib/ExecutionEngine/EventListenerCommon.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Common functionality for JITEventListener implementations
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef EVENT_LISTENER_COMMON_H
-#define EVENT_LISTENER_COMMON_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/Path.h"
-
-namespace llvm {
-
-namespace jitprofiling {
-
-class FilenameCache {
-  // Holds the filename of each Scope, so that we can pass a null-terminated
-  // string into oprofile.  
-  DenseMap<const MDNode *, std::string> Filenames;
-  DenseMap<const MDNode *, std::string> Paths;
-
- public:
-  const char *getFilename(MDNode *Scope) {
-    assert(Scope->isResolved() && "Expected Scope to be resolved");
-    std::string &Filename = Filenames[Scope];
-    if (Filename.empty()) {
-      DIScope DIScope(Scope);
-      Filename = DIScope.getFilename();
-    }
-    return Filename.c_str();
-  }
-
-  const char *getFullPath(MDNode *Scope) {
-    assert(Scope->isResolved() && "Expected Scope to be resolved");
-    std::string &P = Paths[Scope];
-    if (P.empty()) {
-      DIScope DIScope(Scope);
-      StringRef DirName = DIScope.getDirectory();
-      StringRef FileName = DIScope.getFilename();
-      SmallString<256> FullPath;
-      if (DirName != "." && DirName != "") {
-        FullPath = DirName;
-      }
-      if (FileName != "") {
-        sys::path::append(FullPath, FileName);
-      }
-      P = FullPath.str();
-    }
-    return P.c_str();
-  }
-};
-
-} // namespace jitprofiling
-
-} // namespace llvm
-
-#endif //EVENT_LISTENER_COMMON_H
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index 32d20ea..d7038fd 100644
--- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -13,13 +13,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/ValueHandle.h"
@@ -44,16 +47,22 @@ STATISTIC(NumGlobals  , "Number of global vars initialized");
 
 ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
     std::unique_ptr<Module> M, std::string *ErrorStr,
-    std::unique_ptr<RTDyldMemoryManager> MCJMM,
+    std::shared_ptr<MCJITMemoryManager> MemMgr,
+    std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
     std::unique_ptr<TargetMachine> TM) = nullptr;
+
+ExecutionEngine *(*ExecutionEngine::OrcMCJITReplacementCtor)(
+  std::string *ErrorStr, std::shared_ptr<MCJITMemoryManager> MemMgr,
+  std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
+  std::unique_ptr<TargetMachine> TM) = nullptr;
+
 ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr<Module> M,
                                                 std::string *ErrorStr) =nullptr;
 
 void JITEventListener::anchor() {}
 
 ExecutionEngine::ExecutionEngine(std::unique_ptr<Module> M)
-  : EEState(*this),
-    LazyFunctionCreator(nullptr) {
+  : LazyFunctionCreator(nullptr) {
   CompilingLazily         = false;
   GVCompilationDisabled   = false;
   SymbolSearchingDisabled = false;
@@ -137,45 +146,60 @@ bool ExecutionEngine::removeModule(Module *M) {
 
 Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
   for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
-    if (Function *F = Modules[i]->getFunction(FnName))
+    Function *F = Modules[i]->getFunction(FnName);
+    if (F && !F->isDeclaration())
       return F;
   }
   return nullptr;
 }
 
 
-void *ExecutionEngineState::RemoveMapping(const GlobalValue *ToUnmap) {
-  GlobalAddressMapTy::iterator I = GlobalAddressMap.find(ToUnmap);
-  void *OldVal;
+uint64_t ExecutionEngineState::RemoveMapping(StringRef Name) {
+  GlobalAddressMapTy::iterator I = GlobalAddressMap.find(Name);
+  uint64_t OldVal;
 
   // FIXME: This is silly, we shouldn't end up with a mapping -> 0 in the
   // GlobalAddressMap.
   if (I == GlobalAddressMap.end())
-    OldVal = nullptr;
+    OldVal = 0;
   else {
+    GlobalAddressReverseMap.erase(I->second);
     OldVal = I->second;
     GlobalAddressMap.erase(I);
   }
 
-  GlobalAddressReverseMap.erase(OldVal);
   return OldVal;
 }
 
+std::string ExecutionEngine::getMangledName(const GlobalValue *GV) {
+  MutexGuard locked(lock);
+  Mangler Mang(DL);
+  SmallString<128> FullName;
+  Mang.getNameWithPrefix(FullName, GV->getName());
+  return FullName.str();
+}
+
 void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
   MutexGuard locked(lock);
+  addGlobalMapping(getMangledName(GV), (uint64_t) Addr);
+}
+
+void ExecutionEngine::addGlobalMapping(StringRef Name, uint64_t Addr) {
+  MutexGuard locked(lock);
+
+  assert(!Name.empty() && "Empty GlobalMapping symbol name!");
 
-  DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
-        << "\' to [" << Addr << "]\n";);
-  void *&CurVal = EEState.getGlobalAddressMap()[GV];
+  DEBUG(dbgs() << "JIT: Map \'" << Name  << "\' to [" << Addr << "]\n";);
+  uint64_t &CurVal = EEState.getGlobalAddressMap()[Name];
   assert((!CurVal || !Addr) && "GlobalMapping already established!");
   CurVal = Addr;
 
   // If we are using the reverse mapping, add it too.
   if (!EEState.getGlobalAddressReverseMap().empty()) {
-    AssertingVH<const GlobalValue> &V =
-      EEState.getGlobalAddressReverseMap()[Addr];
-    assert((!V || !GV) && "GlobalMapping already established!");
-    V = GV;
+    std::string &V = EEState.getGlobalAddressReverseMap()[CurVal];
+    assert((!V.empty() || !Name.empty()) &&
+           "GlobalMapping already established!");
+    V = Name;
   }
 }
 
@@ -190,13 +214,19 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
   MutexGuard locked(lock);
 
   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
-    EEState.RemoveMapping(FI);
+    EEState.RemoveMapping(getMangledName(FI));
   for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
        GI != GE; ++GI)
-    EEState.RemoveMapping(GI);
+    EEState.RemoveMapping(getMangledName(GI));
 }
 
-void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
+uint64_t ExecutionEngine::updateGlobalMapping(const GlobalValue *GV,
+                                              void *Addr) {
+  MutexGuard locked(lock);
+  return updateGlobalMapping(getMangledName(GV), (uint64_t) Addr);
+}
+
+uint64_t ExecutionEngine::updateGlobalMapping(StringRef Name, uint64_t Addr) {
   MutexGuard locked(lock);
 
   ExecutionEngineState::GlobalAddressMapTy &Map =
@@ -204,10 +234,10 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
 
   // Deleting from the mapping?
   if (!Addr)
-    return EEState.RemoveMapping(GV);
+    return EEState.RemoveMapping(Name);
 
-  void *&CurVal = Map[GV];
-  void *OldVal = CurVal;
+  uint64_t &CurVal = Map[Name];
+  uint64_t OldVal = CurVal;
 
   if (CurVal && !EEState.getGlobalAddressReverseMap().empty())
     EEState.getGlobalAddressReverseMap().erase(CurVal);
@@ -215,20 +245,35 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
 
   // If we are using the reverse mapping, add it too.
   if (!EEState.getGlobalAddressReverseMap().empty()) {
-    AssertingVH<const GlobalValue> &V =
-      EEState.getGlobalAddressReverseMap()[Addr];
-    assert((!V || !GV) && "GlobalMapping already established!");
-    V = GV;
+    std::string &V = EEState.getGlobalAddressReverseMap()[CurVal];
+    assert((!V.empty() || !Name.empty()) &&
+           "GlobalMapping already established!");
+    V = Name;
   }
   return OldVal;
 }
 
-void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
+uint64_t ExecutionEngine::getAddressToGlobalIfAvailable(StringRef S) {
   MutexGuard locked(lock);
-
+  uint64_t Address = 0;
   ExecutionEngineState::GlobalAddressMapTy::iterator I =
-    EEState.getGlobalAddressMap().find(GV);
-  return I != EEState.getGlobalAddressMap().end() ? I->second : nullptr;
+    EEState.getGlobalAddressMap().find(S);
+  if (I != EEState.getGlobalAddressMap().end())
+    Address = I->second;
+  return Address;
+}
+
+
+void *ExecutionEngine::getPointerToGlobalIfAvailable(StringRef S) {
+  MutexGuard locked(lock);
+  if (void* Address = (void *) getAddressToGlobalIfAvailable(S))
+    return Address;
+  return nullptr;
+}
+
+void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
+  MutexGuard locked(lock);
+  return getPointerToGlobalIfAvailable(getMangledName(GV));
 }
 
 const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
@@ -237,15 +282,25 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
   // If we haven't computed the reverse mapping yet, do so first.
   if (EEState.getGlobalAddressReverseMap().empty()) {
     for (ExecutionEngineState::GlobalAddressMapTy::iterator
-         I = EEState.getGlobalAddressMap().begin(),
-         E = EEState.getGlobalAddressMap().end(); I != E; ++I)
+           I = EEState.getGlobalAddressMap().begin(),
+           E = EEState.getGlobalAddressMap().end(); I != E; ++I) {
+      StringRef Name = I->first();
+      uint64_t Addr = I->second;
       EEState.getGlobalAddressReverseMap().insert(std::make_pair(
-                                                          I->second, I->first));
+                                                          Addr, Name));
+    }
   }
 
-  std::map<void *, AssertingVH<const GlobalValue> >::iterator I =
-    EEState.getGlobalAddressReverseMap().find(Addr);
-  return I != EEState.getGlobalAddressReverseMap().end() ? I->second : nullptr;
+  std::map<uint64_t, std::string>::iterator I =
+    EEState.getGlobalAddressReverseMap().find((uint64_t) Addr);
+
+  if (I != EEState.getGlobalAddressReverseMap().end()) {
+    StringRef Name = I->second;
+    for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+      if (GlobalValue *GV = Modules[i]->getNamedValue(Name))
+        return GV;
+  }
+  return nullptr;
 }
 
 namespace {
@@ -393,35 +448,42 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
   return runFunction(Fn, GVArgs).IntVal.getZExtValue();
 }
 
+EngineBuilder::EngineBuilder() : EngineBuilder(nullptr) {}
+
 EngineBuilder::EngineBuilder(std::unique_ptr<Module> M)
-  : M(std::move(M)), MCJMM(nullptr) {
-  InitEngine();
+    : M(std::move(M)), WhichEngine(EngineKind::Either), ErrorStr(nullptr),
+      OptLevel(CodeGenOpt::Default), MemMgr(nullptr), Resolver(nullptr),
+      RelocModel(Reloc::Default), CMModel(CodeModel::JITDefault),
+      UseOrcMCJITReplacement(false) {
+// IR module verification is enabled by default in debug builds, and disabled
+// by default in release builds.
+#ifndef NDEBUG
+  VerifyModules = true;
+#else
+  VerifyModules = false;
+#endif
 }
 
-EngineBuilder::~EngineBuilder() {}
+EngineBuilder::~EngineBuilder() = default;
 
 EngineBuilder &EngineBuilder::setMCJITMemoryManager(
                                    std::unique_ptr<RTDyldMemoryManager> mcjmm) {
-  MCJMM = std::move(mcjmm);
+  auto SharedMM = std::shared_ptr<RTDyldMemoryManager>(std::move(mcjmm));
+  MemMgr = SharedMM;
+  Resolver = SharedMM;
   return *this;
 }
 
-void EngineBuilder::InitEngine() {
-  WhichEngine = EngineKind::Either;
-  ErrorStr = nullptr;
-  OptLevel = CodeGenOpt::Default;
-  MCJMM = nullptr;
-  Options = TargetOptions();
-  RelocModel = Reloc::Default;
-  CMModel = CodeModel::JITDefault;
+EngineBuilder&
+EngineBuilder::setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM) {
+  MemMgr = std::shared_ptr<MCJITMemoryManager>(std::move(MM));
+  return *this;
+}
 
-// IR module verification is enabled by default in debug builds, and disabled
-// by default in release builds.
-#ifndef NDEBUG
-  VerifyModules = true;
-#else
-  VerifyModules = false;
-#endif
+EngineBuilder&
+EngineBuilder::setSymbolResolver(std::unique_ptr<RuntimeDyld::SymbolResolver> SR) {
+  Resolver = std::shared_ptr<RuntimeDyld::SymbolResolver>(std::move(SR));
+  return *this;
 }
 
 ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
@@ -435,7 +497,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
   // If the user specified a memory manager but didn't specify which engine to
   // create, we assume they only want the JIT, and we fail if they only want
   // the interpreter.
-  if (MCJMM) {
+  if (MemMgr) {
     if (WhichEngine & EngineKind::JIT)
       WhichEngine = EngineKind::JIT;
     else {
@@ -456,9 +518,15 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
     }
 
     ExecutionEngine *EE = nullptr;
-    if (ExecutionEngine::MCJITCtor)
-      EE = ExecutionEngine::MCJITCtor(std::move(M), ErrorStr, std::move(MCJMM),
-                                      std::move(TheTM));
+    if (ExecutionEngine::OrcMCJITReplacementCtor && UseOrcMCJITReplacement) {
+      EE = ExecutionEngine::OrcMCJITReplacementCtor(ErrorStr, std::move(MemMgr),
+                                                    std::move(Resolver),
+                                                    std::move(TheTM));
+      EE->addModule(std::move(M));
+    } else if (ExecutionEngine::MCJITCtor)
+      EE = ExecutionEngine::MCJITCtor(std::move(M), ErrorStr, std::move(MemMgr),
+                                      std::move(Resolver), std::move(TheTM));
+
     if (EE) {
       EE->setVerifyModules(VerifyModules);
       return EE;
@@ -488,7 +556,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
     return getPointerToFunction(F);
 
   MutexGuard locked(lock);
-  if (void *P = EEState.getGlobalAddressMap()[GV])
+  if (void* P = getPointerToGlobalIfAvailable(GV))
     return P;
 
   // Global variable might have been added since interpreter started.
@@ -498,7 +566,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
   else
     llvm_unreachable("Global hasn't had an address allocated yet!");
 
-  return EEState.getGlobalAddressMap()[GV];
+  return getPointerToGlobalIfAvailable(GV);
 }
 
 /// \brief Converts a Constant* into a GenericValue, including handling of
@@ -1270,25 +1338,3 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
   NumInitBytes += (unsigned)GVSize;
   ++NumGlobals;
 }
-
-ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE)
-  : EE(EE), GlobalAddressMap(this) {
-}
-
-sys::Mutex *
-ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) {
-  return &EES->EE.lock;
-}
-
-void ExecutionEngineState::AddressMapConfig::onDelete(ExecutionEngineState *EES,
-                                                      const GlobalValue *Old) {
-  void *OldVal = EES->GlobalAddressMap.lookup(Old);
-  EES->GlobalAddressReverseMap.erase(OldVal);
-}
-
-void ExecutionEngineState::AddressMapConfig::onRAUW(ExecutionEngineState *,
-                                                    const GlobalValue *,
-                                                    const GlobalValue *) {
-  llvm_unreachable("The ExecutionEngine doesn't know how to handle a"
-                   " RAUW on a value it has a global mapping for.");
-}
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index aaa53f0..ce1ab59 100644
--- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -177,11 +177,22 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
   memcpy(&options, PassedOptions, SizeOfPassedOptions);
   
   TargetOptions targetOptions;
-  targetOptions.NoFramePointerElim = options.NoFramePointerElim;
   targetOptions.EnableFastISel = options.EnableFastISel;
+  std::unique_ptr<Module> Mod(unwrap(M));
+
+  if (Mod)
+    // Set function attribute "no-frame-pointer-elim" based on
+    // NoFramePointerElim.
+    for (auto &F : *Mod) {
+      auto Attrs = F.getAttributes();
+      auto Value = options.NoFramePointerElim ? "true" : "false";
+      Attrs = Attrs.addAttribute(F.getContext(), AttributeSet::FunctionIndex,
+                                 "no-frame-pointer-elim", Value);
+      F.setAttributes(Attrs);
+    }
 
   std::string Error;
-  EngineBuilder builder(std::unique_ptr<Module>(unwrap(M)));
+  EngineBuilder builder(std::move(Mod));
   builder.setEngineKind(EngineKind::JIT)
          .setErrorStr(&Error)
          .setOptLevel((CodeGenOpt::Level)options.OptLevel)
@@ -351,7 +362,7 @@ class SimpleBindingMemoryManager : public RTDyldMemoryManager {
 public:
   SimpleBindingMemoryManager(const SimpleBindingMMFunctions& Functions,
                              void *Opaque);
-  virtual ~SimpleBindingMemoryManager();
+  ~SimpleBindingMemoryManager() override;
 
   uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                unsigned SectionID,
diff --git a/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
index 8ef878c..1ab6203 100644
--- a/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
@@ -103,7 +103,7 @@ public:
 
   /// Unregisters each object that was previously registered and releases all
   /// internal resources.
-  virtual ~GDBJITRegistrationListener();
+  ~GDBJITRegistrationListener() override;
 
   /// Creates an entry in the JIT registry for the buffer @p Object,
   /// which must contain an object file in executable memory with any
diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index ee9096b..ec67019 100644
--- a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -13,11 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Config/config.h"
-#include "EventListenerCommon.h"
 #include "IntelJITEventsWrapper.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
@@ -29,7 +29,6 @@
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
-using namespace llvm::jitprofiling;
 using namespace llvm::object;
 
 #define DEBUG_TYPE "amplifier-jit-event-listener"
@@ -41,7 +40,6 @@ class IntelJITEventListener : public JITEventListener {
 
   std::unique_ptr<IntelJITEventsWrapper> Wrapper;
   MethodIDMap MethodIDs;
-  FilenameCache Filenames;
 
   typedef SmallVector<const void *, 64> MethodAddressVector;
   typedef DenseMap<const void *, MethodAddressVector>  ObjectMap;
@@ -105,7 +103,7 @@ void IntelJITEventListener::NotifyObjectEmitted(
 
   // Get the address of the object image for use as a unique identifier
   const void* ObjData = DebugObj.getData().data();
-  DIContext* Context = DIContext::getDWARFContext(DebugObj);
+  DIContext* Context = new DWARFContextInMemory(DebugObj);
   MethodAddressVector Functions;
 
   // Use symbol info to iterate functions in the object.
diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
index 7b507de..e966889 100644
--- a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
+++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
@@ -24,6 +24,7 @@
 #else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #include <pthread.h>
 #include <dlfcn.h>
+#include <stdint.h>
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #include <malloc.h>
 #include <stdlib.h>
@@ -371,7 +372,7 @@ static int loadiJIT_Funcs()
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
     FUNC_NotifyEvent = (TPNotify)GetProcAddress(m_libHandle, "NotifyEvent");
 #else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-    FUNC_NotifyEvent = (TPNotify)dlsym(m_libHandle, "NotifyEvent");
+    FUNC_NotifyEvent = (TPNotify)(intptr_t)dlsym(m_libHandle, "NotifyEvent");
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
     if (!FUNC_NotifyEvent) 
     {
@@ -382,7 +383,7 @@ static int loadiJIT_Funcs()
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
     FUNC_Initialize = (TPInitialize)GetProcAddress(m_libHandle, "Initialize");
 #else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-    FUNC_Initialize = (TPInitialize)dlsym(m_libHandle, "Initialize");
+    FUNC_Initialize = (TPInitialize)(intptr_t)dlsym(m_libHandle, "Initialize");
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
     if (!FUNC_Initialize) 
     {
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index 93bb2d1..a26740b 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cmath>
 using namespace llvm;
@@ -315,7 +316,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
 
 #define IMPLEMENT_VECTOR_FCMP(OP)                                   \
   case Type::VectorTyID:                                            \
-    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {   \
+    if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) {      \
       IMPLEMENT_VECTOR_FCMP_T(OP, Float);                           \
     } else {                                                        \
         IMPLEMENT_VECTOR_FCMP_T(OP, Double);                        \
@@ -362,7 +363,7 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
 
 #define MASK_VECTOR_NANS(TY, X,Y, FLAG)                                     \
   if (TY->isVectorTy()) {                                                   \
-    if (dyn_cast<VectorType>(TY)->getElementType()->isFloatTy()) {          \
+    if (cast<VectorType>(TY)->getElementType()->isFloatTy()) {              \
       MASK_VECTOR_NANS_T(X, Y, Float, FLAG)                                 \
     } else {                                                                \
       MASK_VECTOR_NANS_T(X, Y, Double, FLAG)                                \
@@ -464,14 +465,14 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
     return Dest;                                                         \
   }
 
-#define IMPLEMENT_VECTOR_UNORDERED(TY, X,Y, _FUNC)                       \
-  if (TY->isVectorTy()) {                                                \
-    GenericValue DestMask = Dest;                                        \
-    Dest = _FUNC(Src1, Src2, Ty);                                        \
-      for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)               \
-        if (DestMask.AggregateVal[_i].IntVal == true)                    \
-          Dest.AggregateVal[_i].IntVal = APInt(1,true);                  \
-      return Dest;                                                       \
+#define IMPLEMENT_VECTOR_UNORDERED(TY, X, Y, FUNC)                             \
+  if (TY->isVectorTy()) {                                                      \
+    GenericValue DestMask = Dest;                                              \
+    Dest = FUNC(Src1, Src2, Ty);                                               \
+    for (size_t _i = 0; _i < Src1.AggregateVal.size(); _i++)                   \
+      if (DestMask.AggregateVal[_i].IntVal == true)                            \
+        Dest.AggregateVal[_i].IntVal = APInt(1, true);                         \
+    return Dest;                                                               \
   }
 
 static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
@@ -535,7 +536,7 @@ static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
   if(Ty->isVectorTy()) {
     assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
     Dest.AggregateVal.resize( Src1.AggregateVal.size() );
-    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+    if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
       for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
         Dest.AggregateVal[_i].IntVal = APInt(1,
         ( (Src1.AggregateVal[_i].FloatVal ==
@@ -566,7 +567,7 @@ static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
   if(Ty->isVectorTy()) {
     assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
     Dest.AggregateVal.resize( Src1.AggregateVal.size() );
-    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+    if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
       for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
         Dest.AggregateVal[_i].IntVal = APInt(1,
         ( (Src1.AggregateVal[_i].FloatVal !=
@@ -712,10 +713,10 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
     // Macros to choose appropriate TY: float or double and run operation
     // execution
 #define FLOAT_VECTOR_OP(OP) {                                         \
-  if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())        \
+  if (cast<VectorType>(Ty)->getElementType()->isFloatTy())            \
     FLOAT_VECTOR_FUNCTION(OP, FloatVal)                               \
   else {                                                              \
-    if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())     \
+    if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())         \
       FLOAT_VECTOR_FUNCTION(OP, DoubleVal)                            \
     else {                                                            \
       dbgs() << "Unhandled type for OP instruction: " << *Ty << "\n"; \
@@ -744,12 +745,12 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
     case Instruction::FMul:  FLOAT_VECTOR_OP(*) break;
     case Instruction::FDiv:  FLOAT_VECTOR_OP(/) break;
     case Instruction::FRem:
-      if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())
+      if (cast<VectorType>(Ty)->getElementType()->isFloatTy())
         for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
           R.AggregateVal[i].FloatVal = 
           fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal);
       else {
-        if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+        if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())
           for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
             R.AggregateVal[i].DoubleVal = 
             fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal);
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index b022101..e2fe065 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -95,16 +95,15 @@ static ExFunc lookupFunction(const Function *F) {
   FunctionType *FT = F->getFunctionType();
   for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
     ExtName += getTypeID(FT->getContainedType(i));
-  ExtName += "_" + F->getName().str();
+  ExtName += ("_" + F->getName()).str();
 
   sys::ScopedLock Writer(*FunctionsLock);
   ExFunc FnPtr = (*FuncNames)[ExtName];
   if (!FnPtr)
-    FnPtr = (*FuncNames)["lle_X_" + F->getName().str()];
+    FnPtr = (*FuncNames)[("lle_X_" + F->getName()).str()];
   if (!FnPtr)  // Try calling a generic function... if it exists...
-    FnPtr = (ExFunc)(intptr_t)
-      sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_" +
-                                                    F->getName().str());
+    FnPtr = (ExFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
+        ("lle_X_" + F->getName()).str());
   if (FnPtr)
     ExportedFunctions->insert(std::make_pair(F, FnPtr));  // Cache for later
   return FnPtr;
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
index 2be9c59..0dc0463 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -108,7 +108,7 @@ class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
 
 public:
   explicit Interpreter(std::unique_ptr<Module> M);
-  ~Interpreter();
+  ~Interpreter() override;
 
   /// runAtExitHandlers - Run any functions registered by the program's calls to
   /// atexit(3), which we intercept and store in AtExitHandlers.
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index f2d53f5..7e37afe 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCJIT.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/MCJIT.h"
@@ -15,18 +16,16 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ObjectFile.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MutexGuard.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -43,26 +42,35 @@ static struct RegisterJIT {
 extern "C" void LLVMLinkInMCJIT() {
 }
 
-ExecutionEngine *MCJIT::createJIT(std::unique_ptr<Module> M,
-                                  std::string *ErrorStr,
-                                  std::unique_ptr<RTDyldMemoryManager> MemMgr,
-                                  std::unique_ptr<TargetMachine> TM) {
+ExecutionEngine*
+MCJIT::createJIT(std::unique_ptr<Module> M,
+                 std::string *ErrorStr,
+                 std::shared_ptr<MCJITMemoryManager> MemMgr,
+                 std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
+                 std::unique_ptr<TargetMachine> TM) {
   // Try to register the program as a source of symbols to resolve against.
   //
   // FIXME: Don't do this here.
   sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr);
 
-  std::unique_ptr<RTDyldMemoryManager> MM = std::move(MemMgr);
-  if (!MM)
-    MM = std::unique_ptr<SectionMemoryManager>(new SectionMemoryManager());
+  if (!MemMgr || !Resolver) {
+    auto RTDyldMM = std::make_shared<SectionMemoryManager>();
+    if (!MemMgr)
+      MemMgr = RTDyldMM;
+    if (!Resolver)
+      Resolver = RTDyldMM;
+  }
 
-  return new MCJIT(std::move(M), std::move(TM), std::move(MM));
+  return new MCJIT(std::move(M), std::move(TM), std::move(MemMgr),
+                   std::move(Resolver));
 }
 
 MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
-             std::unique_ptr<RTDyldMemoryManager> MM)
+             std::shared_ptr<MCJITMemoryManager> MemMgr,
+             std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver)
     : ExecutionEngine(std::move(M)), TM(std::move(tm)), Ctx(nullptr),
-      MemMgr(this, std::move(MM)), Dyld(&MemMgr), ObjCache(nullptr) {
+      MemMgr(std::move(MemMgr)), Resolver(*this, std::move(Resolver)),
+      Dyld(*this->MemMgr, this->Resolver), ObjCache(nullptr) {
   // FIXME: We are managing our modules, so we do not want the base class
   // ExecutionEngine to manage them as well. To avoid double destruction
   // of the first (and only) module added in ExecutionEngine constructor
@@ -77,7 +85,7 @@ MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
   Modules.clear();
 
   OwnedModules.addModule(std::move(First));
-  setDataLayout(TM->getSubtargetImpl()->getDataLayout());
+  setDataLayout(TM->getDataLayout());
   RegisterJITEventListener(JITEventListener::createGDBRegistrationListener());
 }
 
@@ -137,10 +145,9 @@ std::unique_ptr<MemoryBuffer> MCJIT::emitObject(Module *M) {
   // MCJIT instance, since these conditions are tested by our caller,
   // generateCodeForModule.
 
-  PassManager PM;
+  legacy::PassManager PM;
 
-  M->setDataLayout(TM->getSubtargetImpl()->getDataLayout());
-  PM.add(new DataLayoutPass());
+  M->setDataLayout(*TM->getDataLayout());
 
   // The RuntimeDyld will take ownership of this shortly
   SmallVector<char, 4096> ObjBufferSV;
@@ -224,7 +231,7 @@ void MCJIT::finalizeLoadedModules() {
   Dyld.registerEHFrames();
 
   // Set page permissions.
-  MemMgr.finalizeMemory();
+  MemMgr->finalizeMemory();
 }
 
 // FIXME: Rename this.
@@ -256,11 +263,11 @@ void MCJIT::finalizeModule(Module *M) {
   finalizeLoadedModules();
 }
 
-uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) {
-  Mangler Mang(TM->getSubtargetImpl()->getDataLayout());
+RuntimeDyld::SymbolInfo MCJIT::findExistingSymbol(const std::string &Name) {
+  Mangler Mang(TM->getDataLayout());
   SmallString<128> FullName;
   Mang.getNameWithPrefix(FullName, Name);
-  return Dyld.getSymbolLoadAddress(FullName);
+  return Dyld.getSymbol(FullName);
 }
 
 Module *MCJIT::findModuleForSymbol(const std::string &Name,
@@ -287,14 +294,17 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name,
 }
 
 uint64_t MCJIT::getSymbolAddress(const std::string &Name,
-                                 bool CheckFunctionsOnly)
-{
+                                 bool CheckFunctionsOnly) {
+  return findSymbol(Name, CheckFunctionsOnly).getAddress();
+}
+
+RuntimeDyld::SymbolInfo MCJIT::findSymbol(const std::string &Name,
+                                          bool CheckFunctionsOnly) {
   MutexGuard locked(lock);
 
   // First, check to see if we already have this symbol.
-  uint64_t Addr = getExistingSymbolAddress(Name);
-  if (Addr)
-    return Addr;
+  if (auto Sym = findExistingSymbol(Name))
+    return Sym;
 
   for (object::OwningBinary<object::Archive> &OB : Archives) {
     object::Archive *A = OB.getBinary();
@@ -313,9 +323,8 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
         // This causes the object file to be loaded.
         addObjectFile(std::move(OF));
         // The address should be here now.
-        Addr = getExistingSymbolAddress(Name);
-        if (Addr)
-          return Addr;
+        if (auto Sym = findExistingSymbol(Name))
+          return Sym;
       }
     }
   }
@@ -326,15 +335,18 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
     generateCodeForModule(M);
 
     // Check the RuntimeDyld table again, it should be there now.
-    return getExistingSymbolAddress(Name);
+    return findExistingSymbol(Name);
   }
 
   // If a LazyFunctionCreator is installed, use it to get/create the function.
   // FIXME: Should we instead have a LazySymbolCreator callback?
-  if (LazyFunctionCreator)
-    Addr = (uint64_t)LazyFunctionCreator(Name);
+  if (LazyFunctionCreator) {
+    auto Addr = static_cast<uint64_t>(
+                  reinterpret_cast<uintptr_t>(LazyFunctionCreator(Name)));
+    return RuntimeDyld::SymbolInfo(Addr, JITSymbolFlags::Exported);
+  }
 
-  return Addr;
+  return nullptr;
 }
 
 uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) {
@@ -357,7 +369,7 @@ uint64_t MCJIT::getFunctionAddress(const std::string &Name) {
 void *MCJIT::getPointerToFunction(Function *F) {
   MutexGuard locked(lock);
 
-  Mangler Mang(TM->getSubtargetImpl()->getDataLayout());
+  Mangler Mang(TM->getDataLayout());
   SmallString<128> Name;
   TM->getNameWithPrefix(Name, F, Mang);
 
@@ -386,7 +398,7 @@ void *MCJIT::getPointerToFunction(Function *F) {
   //
   // This is the accessor for the target address, so make sure to check the
   // load address of the symbol, not the local address.
-  return (void*)Dyld.getSymbolLoadAddress(Name);
+  return (void*)Dyld.getSymbol(Name).getAddress();
 }
 
 void MCJIT::runStaticConstructorsDestructorsInModulePtrSet(
@@ -410,7 +422,8 @@ Function *MCJIT::FindFunctionNamedInModulePtrSet(const char *FnName,
                                                  ModulePtrSet::iterator I,
                                                  ModulePtrSet::iterator E) {
   for (; I != E; ++I) {
-    if (Function *F = (*I)->getFunction(FnName))
+    Function *F = (*I)->getFunction(FnName);
+    if (F && !F->isDeclaration())
       return F;
   }
   return nullptr;
@@ -530,7 +543,9 @@ GenericValue MCJIT::runFunction(Function *F,
 
 void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) {
   if (!isSymbolSearchingDisabled()) {
-    void *ptr = MemMgr.getPointerToNamedFunction(Name, false);
+    void *ptr =
+      reinterpret_cast<void*>(
+        static_cast<uintptr_t>(Resolver.findSymbol(Name).getAddress()));
     if (ptr)
       return ptr;
   }
@@ -568,7 +583,7 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
 void MCJIT::NotifyObjectEmitted(const object::ObjectFile& Obj,
                                 const RuntimeDyld::LoadedObjectInfo &L) {
   MutexGuard locked(lock);
-  MemMgr.notifyObjectLoaded(this, Obj);
+  MemMgr->notifyObjectLoaded(this, Obj);
   for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
     EventListeners[I]->NotifyObjectEmitted(Obj, L);
   }
@@ -580,15 +595,16 @@ void MCJIT::NotifyFreeingObject(const object::ObjectFile& Obj) {
     L->NotifyFreeingObject(Obj);
 }
 
-uint64_t LinkingMemoryManager::getSymbolAddress(const std::string &Name) {
-  uint64_t Result = ParentEngine->getSymbolAddress(Name, false);
+RuntimeDyld::SymbolInfo
+LinkingSymbolResolver::findSymbol(const std::string &Name) {
+  auto Result = ParentEngine.findSymbol(Name, false);
   // If the symbols wasn't found and it begins with an underscore, try again
   // without the underscore.
   if (!Result && Name[0] == '_')
-    Result = ParentEngine->getSymbolAddress(Name.substr(1), false);
+    Result = ParentEngine.findSymbol(Name.substr(1), false);
   if (Result)
     return Result;
-  if (ParentEngine->isSymbolSearchingDisabled())
-    return 0;
-  return ClientMM->getSymbolAddress(Name);
+  if (ParentEngine.isSymbolSearchingDisabled())
+    return nullptr;
+  return ClientResolver->findSymbol(Name);
 }
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
index f55dd60..59e9949 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -10,12 +10,13 @@
 #ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_MCJIT_H
 #define LLVM_LIB_EXECUTIONENGINE_MCJIT_MCJIT_H
 
-#include "ObjectBuffer.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/ObjectCache.h"
+#include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/IR/Module.h"
 
@@ -26,59 +27,23 @@ class MCJIT;
 // functions across modules that it owns.  It aggregates the memory manager
 // that is passed in to the MCJIT constructor and defers most functionality
 // to that object.
-class LinkingMemoryManager : public RTDyldMemoryManager {
+class LinkingSymbolResolver : public RuntimeDyld::SymbolResolver {
 public:
-  LinkingMemoryManager(MCJIT *Parent,
-                       std::unique_ptr<RTDyldMemoryManager> MM)
-    : ParentEngine(Parent), ClientMM(std::move(MM)) {}
+  LinkingSymbolResolver(MCJIT &Parent,
+                        std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver)
+    : ParentEngine(Parent), ClientResolver(std::move(Resolver)) {}
 
-  uint64_t getSymbolAddress(const std::string &Name) override;
+  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override;
 
-  // Functions deferred to client memory manager
-  uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
-                               unsigned SectionID,
-                               StringRef SectionName) override {
-    return ClientMM->allocateCodeSection(Size, Alignment, SectionID, SectionName);
-  }
-
-  uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                               unsigned SectionID, StringRef SectionName,
-                               bool IsReadOnly) override {
-    return ClientMM->allocateDataSection(Size, Alignment,
-                                         SectionID, SectionName, IsReadOnly);
-  }
-
-  void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO,
-                              uintptr_t DataSizeRW) override {
-    return ClientMM->reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
-  }
-
-  bool needsToReserveAllocationSpace() override {
-    return ClientMM->needsToReserveAllocationSpace();
-  }
-
-  void notifyObjectLoaded(ExecutionEngine *EE,
-                          const object::ObjectFile &Obj) override {
-    ClientMM->notifyObjectLoaded(EE, Obj);
-  }
-
-  void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                        size_t Size) override {
-    ClientMM->registerEHFrames(Addr, LoadAddr, Size);
-  }
-
-  void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                          size_t Size) override {
-    ClientMM->deregisterEHFrames(Addr, LoadAddr, Size);
-  }
-
-  bool finalizeMemory(std::string *ErrMsg = nullptr) override {
-    return ClientMM->finalizeMemory(ErrMsg);
+  // MCJIT doesn't support logical dylibs.
+  RuntimeDyld::SymbolInfo
+  findSymbolInLogicalDylib(const std::string &Name) override {
+    return nullptr;
   }
 
 private:
-  MCJIT *ParentEngine;
-  std::unique_ptr<RTDyldMemoryManager> ClientMM;
+  MCJIT &ParentEngine;
+  std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver;
 };
 
 // About Module states: added->loaded->finalized.
@@ -103,7 +68,8 @@ private:
 
 class MCJIT : public ExecutionEngine {
   MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
-        std::unique_ptr<RTDyldMemoryManager> MemMgr);
+        std::shared_ptr<MCJITMemoryManager> MemMgr,
+        std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver);
 
   typedef llvm::SmallPtrSet<Module *, 4> ModulePtrSet;
 
@@ -214,7 +180,8 @@ class MCJIT : public ExecutionEngine {
 
   std::unique_ptr<TargetMachine> TM;
   MCContext *Ctx;
-  LinkingMemoryManager MemMgr;
+  std::shared_ptr<MCJITMemoryManager> MemMgr;
+  LinkingSymbolResolver Resolver;
   RuntimeDyld Dyld;
   std::vector<JITEventListener*> EventListeners;
 
@@ -238,7 +205,7 @@ class MCJIT : public ExecutionEngine {
                                                       ModulePtrSet::iterator E);
 
 public:
-  ~MCJIT();
+  ~MCJIT() override;
 
   /// @name ExecutionEngine interface implementation
   /// @{
@@ -324,17 +291,22 @@ public:
     MCJITCtor = createJIT;
   }
 
-  static ExecutionEngine *createJIT(std::unique_ptr<Module> M,
-                                    std::string *ErrorStr,
-                                    std::unique_ptr<RTDyldMemoryManager> MemMgr,
-                                    std::unique_ptr<TargetMachine> TM);
+  static ExecutionEngine*
+  createJIT(std::unique_ptr<Module> M,
+            std::string *ErrorStr,
+            std::shared_ptr<MCJITMemoryManager> MemMgr,
+            std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
+            std::unique_ptr<TargetMachine> TM);
 
   // @}
 
+  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name,
+                                     bool CheckFunctionsOnly);
+  // DEPRECATED - Please use findSymbol instead.
   // This is not directly exposed via the ExecutionEngine API, but it is
   // used by the LinkingMemoryManager.
   uint64_t getSymbolAddress(const std::string &Name,
-                          bool CheckFunctionsOnly);
+                            bool CheckFunctionsOnly);
 
 protected:
   /// emitObject -- Generate a JITed object in memory from the specified module
@@ -348,7 +320,7 @@ protected:
                            const RuntimeDyld::LoadedObjectInfo &L);
   void NotifyFreeingObject(const object::ObjectFile& Obj);
 
-  uint64_t getExistingSymbolAddress(const std::string &Name);
+  RuntimeDyld::SymbolInfo findExistingSymbol(const std::string &Name);
   Module *findModuleForSymbol(const std::string &Name,
                               bool CheckFunctionsOnly);
 };
diff --git a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 9ab4003..23e7662 100644
--- a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Config/config.h"
-#include "EventListenerCommon.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/OProfileWrapper.h"
@@ -28,7 +27,6 @@
 #include <fcntl.h>
 
 using namespace llvm;
-using namespace llvm::jitprofiling;
 using namespace llvm::object;
 
 #define DEBUG_TYPE "oprofile-jit-event-listener"
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
new file mode 100644
index 0000000..b7220db
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -0,0 +1,102 @@
+//===---- ExecutionUtils.cpp - Utilities for executing functions in Orc ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+
+namespace llvm {
+namespace orc {
+
+CtorDtorIterator::CtorDtorIterator(const GlobalVariable *GV, bool End)
+  : InitList(
+      GV ? dyn_cast_or_null<ConstantArray>(GV->getInitializer()) : nullptr),
+    I((InitList && End) ? InitList->getNumOperands() : 0) {
+}
+
+bool CtorDtorIterator::operator==(const CtorDtorIterator &Other) const {
+  assert(InitList == Other.InitList && "Incomparable iterators.");
+  return I == Other.I;
+}
+
+bool CtorDtorIterator::operator!=(const CtorDtorIterator &Other) const {
+  return !(*this == Other);
+}
+
+CtorDtorIterator& CtorDtorIterator::operator++() {
+  ++I;
+  return *this;
+}
+
+CtorDtorIterator CtorDtorIterator::operator++(int) {
+  CtorDtorIterator Temp = *this;
+  ++I;
+  return Temp;
+}
+
+CtorDtorIterator::Element CtorDtorIterator::operator*() const {
+  ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(I));
+  assert(CS && "Unrecognized type in llvm.global_ctors/llvm.global_dtors");
+
+  Constant *FuncC = CS->getOperand(1);
+  Function *Func = nullptr;
+
+  // Extract function pointer, pulling off any casts.
+  while (FuncC) {
+    if (Function *F = dyn_cast_or_null<Function>(FuncC)) {
+      Func = F;
+      break;
+    } else if (ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(FuncC)) {
+      if (CE->isCast())
+        FuncC = dyn_cast_or_null<ConstantExpr>(CE->getOperand(0));
+      else
+        break;
+    } else {
+      // This isn't anything we recognize. Bail out with Func left set to null.
+      break;
+    }
+  }
+
+  ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+  Value *Data = CS->getOperand(2);
+  return Element(Priority->getZExtValue(), Func, Data);
+}
+
+iterator_range<CtorDtorIterator> getConstructors(const Module &M) {
+  const GlobalVariable *CtorsList = M.getNamedGlobal("llvm.global_ctors");
+  return make_range(CtorDtorIterator(CtorsList, false),
+                    CtorDtorIterator(CtorsList, true));
+}
+
+iterator_range<CtorDtorIterator> getDestructors(const Module &M) {
+  const GlobalVariable *DtorsList = M.getNamedGlobal("llvm.global_dtors");
+  return make_range(CtorDtorIterator(DtorsList, false),
+                    CtorDtorIterator(DtorsList, true));
+}
+
+void LocalCXXRuntimeOverrides::runDestructors() {
+  auto& CXXDestructorDataPairs = DSOHandleOverride;
+  for (auto &P : CXXDestructorDataPairs)
+    P.first(P.second);
+  CXXDestructorDataPairs.clear();
+}
+
+int LocalCXXRuntimeOverrides::CXAAtExitOverride(DestructorPtr Destructor,
+                                                void *Arg, void *DSOHandle) {
+  auto& CXXDestructorDataPairs =
+    *reinterpret_cast<CXXDestructorDataPairList*>(DSOHandle);
+  CXXDestructorDataPairs.push_back(std::make_pair(Destructor, Arg));
+  return 0;
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
new file mode 100644
index 0000000..4ed8730
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -0,0 +1,183 @@
+//===---- IndirectionUtils.cpp - Utilities for call indirection in Orc ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <set>
+#include <sstream>
+
+namespace llvm {
+namespace orc {
+
+Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr) {
+  Constant *AddrIntVal =
+    ConstantInt::get(Type::getInt64Ty(FT.getContext()), Addr);
+  Constant *AddrPtrVal =
+    ConstantExpr::getCast(Instruction::IntToPtr, AddrIntVal,
+                          PointerType::get(&FT, 0));
+  return AddrPtrVal;
+}
+
+GlobalVariable* createImplPointer(PointerType &PT, Module &M,
+                                  const Twine &Name, Constant *Initializer) {
+  if (!Initializer)
+    Initializer = Constant::getNullValue(&PT);
+  auto IP = new GlobalVariable(M, &PT, false, GlobalValue::ExternalLinkage,
+                               Initializer, Name, nullptr,
+                               GlobalValue::NotThreadLocal, 0, true);
+  IP->setVisibility(GlobalValue::HiddenVisibility);
+  return IP;
+}
+
+void makeStub(Function &F, GlobalVariable &ImplPointer) {
+  assert(F.isDeclaration() && "Can't turn a definition into a stub.");
+  assert(F.getParent() && "Function isn't in a module.");
+  Module &M = *F.getParent();
+  BasicBlock *EntryBlock = BasicBlock::Create(M.getContext(), "entry", &F);
+  IRBuilder<> Builder(EntryBlock);
+  LoadInst *ImplAddr = Builder.CreateLoad(&ImplPointer);
+  std::vector<Value*> CallArgs;
+  for (auto &A : F.args())
+    CallArgs.push_back(&A);
+  CallInst *Call = Builder.CreateCall(ImplAddr, CallArgs);
+  Call->setTailCall();
+  Call->setAttributes(F.getAttributes());
+  if (F.getReturnType()->isVoidTy())
+    Builder.CreateRetVoid();
+  else
+    Builder.CreateRet(Call);
+}
+
+// Utility class for renaming global values and functions during partitioning.
+class GlobalRenamer {
+public:
+
+  static bool needsRenaming(const Value &New) {
+    if (!New.hasName() || New.getName().startswith("\01L"))
+      return true;
+    return false;
+  }
+
+  const std::string& getRename(const Value &Orig) {
+    // See if we have a name for this global.
+    {
+      auto I = Names.find(&Orig);
+      if (I != Names.end())
+        return I->second;
+    }
+
+    // Nope. Create a new one.
+    // FIXME: Use a more robust uniquing scheme. (This may blow up if the user
+    //        writes a "__orc_anon[[:digit:]]* method).
+    unsigned ID = Names.size();
+    std::ostringstream NameStream;
+    NameStream << "__orc_anon" << ID++;
+    auto I = Names.insert(std::make_pair(&Orig, NameStream.str()));
+    return I.first->second;
+  }
+private:
+  DenseMap<const Value*, std::string> Names;
+};
+
+static void raiseVisibilityOnValue(GlobalValue &V, GlobalRenamer &R) {
+  if (V.hasLocalLinkage()) {
+    if (R.needsRenaming(V))
+      V.setName(R.getRename(V));
+    V.setLinkage(GlobalValue::ExternalLinkage);
+    V.setVisibility(GlobalValue::HiddenVisibility);
+  }
+  V.setUnnamedAddr(false);
+  assert(!R.needsRenaming(V) && "Invalid global name.");
+}
+
+void makeAllSymbolsExternallyAccessible(Module &M) {
+  GlobalRenamer Renamer;
+
+  for (auto &F : M)
+    raiseVisibilityOnValue(F, Renamer);
+
+  for (auto &GV : M.globals())
+    raiseVisibilityOnValue(GV, Renamer);
+}
+
+Function* cloneFunctionDecl(Module &Dst, const Function &F,
+                            ValueToValueMapTy *VMap) {
+  assert(F.getParent() != &Dst && "Can't copy decl over existing function.");
+  Function *NewF =
+    Function::Create(cast<FunctionType>(F.getType()->getElementType()),
+                     F.getLinkage(), F.getName(), &Dst);
+  NewF->copyAttributesFrom(&F);
+
+  if (VMap) {
+    (*VMap)[&F] = NewF;
+    auto NewArgI = NewF->arg_begin();
+    for (auto ArgI = F.arg_begin(), ArgE = F.arg_end(); ArgI != ArgE;
+         ++ArgI, ++NewArgI)
+      (*VMap)[ArgI] = NewArgI;
+  }
+
+  return NewF;
+}
+
+void moveFunctionBody(Function &OrigF, ValueToValueMapTy &VMap,
+                      ValueMaterializer *Materializer,
+                      Function *NewF) {
+  assert(!OrigF.isDeclaration() && "Nothing to move");
+  if (!NewF)
+    NewF = cast<Function>(VMap[&OrigF]);
+  else
+    assert(VMap[&OrigF] == NewF && "Incorrect function mapping in VMap.");
+  assert(NewF && "Function mapping missing from VMap.");
+  assert(NewF->getParent() != OrigF.getParent() &&
+         "moveFunctionBody should only be used to move bodies between "
+         "modules.");
+
+  SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
+  CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns,
+                    "", nullptr, nullptr, Materializer);
+  OrigF.deleteBody();
+}
+
+GlobalVariable* cloneGlobalVariableDecl(Module &Dst, const GlobalVariable &GV,
+                                        ValueToValueMapTy *VMap) {
+  assert(GV.getParent() != &Dst && "Can't copy decl over existing global var.");
+  GlobalVariable *NewGV = new GlobalVariable(
+      Dst, GV.getType()->getElementType(), GV.isConstant(),
+      GV.getLinkage(), nullptr, GV.getName(), nullptr,
+      GV.getThreadLocalMode(), GV.getType()->getAddressSpace());
+  NewGV->copyAttributesFrom(&GV);
+  if (VMap)
+    (*VMap)[&GV] = NewGV;
+  return NewGV;
+}
+
+void moveGlobalVariableInitializer(GlobalVariable &OrigGV,
+                                   ValueToValueMapTy &VMap,
+                                   ValueMaterializer *Materializer,
+                                   GlobalVariable *NewGV) {
+  assert(OrigGV.hasInitializer() && "Nothing to move");
+  if (!NewGV)
+    NewGV = cast<GlobalVariable>(VMap[&OrigGV]);
+  else
+    assert(VMap[&OrigGV] == NewGV &&
+           "Incorrect global variable mapping in VMap.");
+  assert(NewGV->getParent() != OrigGV.getParent() &&
+         "moveGlobalVariable should only be used to move initializers between "
+         "modules");
+
+  NewGV->setInitializer(MapValue(OrigGV.getInitializer(), VMap, RF_None,
+                                 nullptr, Materializer));
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
new file mode 100644
index 0000000..48fd31e
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
@@ -0,0 +1,128 @@
+//===-------- OrcMCJITReplacement.cpp - Orc-based MCJIT replacement -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "OrcMCJITReplacement.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+
+namespace {
+
+static struct RegisterJIT {
+  RegisterJIT() { llvm::orc::OrcMCJITReplacement::Register(); }
+} JITRegistrator;
+
+}
+
+extern "C" void LLVMLinkInOrcMCJITReplacement() {}
+
+namespace llvm {
+namespace orc {
+
+GenericValue
+OrcMCJITReplacement::runFunction(Function *F,
+                                 const std::vector<GenericValue> &ArgValues) {
+  assert(F && "Function *F was null at entry to run()");
+
+  void *FPtr = getPointerToFunction(F);
+  assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
+  FunctionType *FTy = F->getFunctionType();
+  Type *RetTy = FTy->getReturnType();
+
+  assert((FTy->getNumParams() == ArgValues.size() ||
+          (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
+         "Wrong number of arguments passed into function!");
+  assert(FTy->getNumParams() == ArgValues.size() &&
+         "This doesn't support passing arguments through varargs (yet)!");
+
+  // Handle some common cases first.  These cases correspond to common `main'
+  // prototypes.
+  if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
+    switch (ArgValues.size()) {
+    case 3:
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
+          FTy->getParamType(1)->isPointerTy() &&
+          FTy->getParamType(2)->isPointerTy()) {
+        int (*PF)(int, char **, const char **) =
+            (int (*)(int, char **, const char **))(intptr_t)FPtr;
+
+        // Call the function.
+        GenericValue rv;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+                                 (char **)GVTOP(ArgValues[1]),
+                                 (const char **)GVTOP(ArgValues[2])));
+        return rv;
+      }
+      break;
+    case 2:
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
+          FTy->getParamType(1)->isPointerTy()) {
+        int (*PF)(int, char **) = (int (*)(int, char **))(intptr_t)FPtr;
+
+        // Call the function.
+        GenericValue rv;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
+                                 (char **)GVTOP(ArgValues[1])));
+        return rv;
+      }
+      break;
+    case 1:
+      if (FTy->getNumParams() == 1 && FTy->getParamType(0)->isIntegerTy(32)) {
+        GenericValue rv;
+        int (*PF)(int) = (int (*)(int))(intptr_t)FPtr;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
+        return rv;
+      }
+      break;
+    }
+  }
+
+  // Handle cases where no arguments are passed first.
+  if (ArgValues.empty()) {
+    GenericValue rv;
+    switch (RetTy->getTypeID()) {
+    default:
+      llvm_unreachable("Unknown return type for function call!");
+    case Type::IntegerTyID: {
+      unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth();
+      if (BitWidth == 1)
+        rv.IntVal = APInt(BitWidth, ((bool (*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 8)
+        rv.IntVal = APInt(BitWidth, ((char (*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 16)
+        rv.IntVal = APInt(BitWidth, ((short (*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 32)
+        rv.IntVal = APInt(BitWidth, ((int (*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 64)
+        rv.IntVal = APInt(BitWidth, ((int64_t (*)())(intptr_t)FPtr)());
+      else
+        llvm_unreachable("Integer types > 64 bits not supported");
+      return rv;
+    }
+    case Type::VoidTyID:
+      rv.IntVal = APInt(32, ((int (*)())(intptr_t)FPtr)());
+      return rv;
+    case Type::FloatTyID:
+      rv.FloatVal = ((float (*)())(intptr_t)FPtr)();
+      return rv;
+    case Type::DoubleTyID:
+      rv.DoubleVal = ((double (*)())(intptr_t)FPtr)();
+      return rv;
+    case Type::X86_FP80TyID:
+    case Type::FP128TyID:
+    case Type::PPC_FP128TyID:
+      llvm_unreachable("long double not supported yet");
+    case Type::PointerTyID:
+      return PTOGV(((void *(*)())(intptr_t)FPtr)());
+    }
+  }
+
+  llvm_unreachable("Full-featured argument passing not supported yet!");
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
new file mode 100644
index 0000000..4023344
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -0,0 +1,356 @@
+//===---- OrcMCJITReplacement.h - Orc based MCJIT replacement ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Orc based MCJIT replacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_ORC_ORCMCJITREPLACEMENT_H
+#define LLVM_LIB_EXECUTIONENGINE_ORC_ORCMCJITREPLACEMENT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#include "llvm/Object/Archive.h"
+
+namespace llvm {
+namespace orc {
+
+class OrcMCJITReplacement : public ExecutionEngine {
+
+  // OrcMCJITReplacement needs to do a little extra book-keeping to ensure that
+  // Orc's automatic finalization doesn't kick in earlier than MCJIT clients are
+  // expecting - see finalizeMemory.
+  class MCJITReplacementMemMgr : public MCJITMemoryManager {
+  public:
+    MCJITReplacementMemMgr(OrcMCJITReplacement &M,
+                           std::shared_ptr<MCJITMemoryManager> ClientMM)
+      : M(M), ClientMM(std::move(ClientMM)) {}
+
+    uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                                 unsigned SectionID,
+                                 StringRef SectionName) override {
+      uint8_t *Addr =
+          ClientMM->allocateCodeSection(Size, Alignment, SectionID,
+                                        SectionName);
+      M.SectionsAllocatedSinceLastLoad.insert(Addr);
+      return Addr;
+    }
+
+    uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                                 unsigned SectionID, StringRef SectionName,
+                                 bool IsReadOnly) override {
+      uint8_t *Addr = ClientMM->allocateDataSection(Size, Alignment, SectionID,
+                                                    SectionName, IsReadOnly);
+      M.SectionsAllocatedSinceLastLoad.insert(Addr);
+      return Addr;
+    }
+
+    void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO,
+                                uintptr_t DataSizeRW) override {
+      return ClientMM->reserveAllocationSpace(CodeSize, DataSizeRO,
+                                                DataSizeRW);
+    }
+
+    bool needsToReserveAllocationSpace() override {
+      return ClientMM->needsToReserveAllocationSpace();
+    }
+
+    void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
+                          size_t Size) override {
+      return ClientMM->registerEHFrames(Addr, LoadAddr, Size);
+    }
+
+    void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
+                            size_t Size) override {
+      return ClientMM->deregisterEHFrames(Addr, LoadAddr, Size);
+    }
+
+    void notifyObjectLoaded(ExecutionEngine *EE,
+                            const object::ObjectFile &O) override {
+      return ClientMM->notifyObjectLoaded(EE, O);
+    }
+
+    bool finalizeMemory(std::string *ErrMsg = nullptr) override {
+      // Each set of objects loaded will be finalized exactly once, but since
+      // symbol lookup during relocation may recursively trigger the
+      // loading/relocation of other modules, and since we're forwarding all
+      // finalizeMemory calls to a single underlying memory manager, we need to
+      // defer forwarding the call on until all necessary objects have been
+      // loaded. Otherwise, during the relocation of a leaf object, we will end
+      // up finalizing memory, causing a crash further up the stack when we
+      // attempt to apply relocations to finalized memory.
+      // To avoid finalizing too early, look at how many objects have been
+      // loaded but not yet finalized. This is a bit of a hack that relies on
+      // the fact that we're lazily emitting object files: The only way you can
+      // get more than one set of objects loaded but not yet finalized is if
+      // they were loaded during relocation of another set.
+      if (M.UnfinalizedSections.size() == 1)
+        return ClientMM->finalizeMemory(ErrMsg);
+      return false;
+    }
+
+  private:
+    OrcMCJITReplacement &M;
+    std::shared_ptr<MCJITMemoryManager> ClientMM;
+  };
+
+  class LinkingResolver : public RuntimeDyld::SymbolResolver {
+  public:
+    LinkingResolver(OrcMCJITReplacement &M) : M(M) {}
+
+    RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override {
+      return M.findMangledSymbol(Name);
+    }
+
+    RuntimeDyld::SymbolInfo
+    findSymbolInLogicalDylib(const std::string &Name) override {
+      return M.ClientResolver->findSymbolInLogicalDylib(Name);
+    }
+
+  private:
+    OrcMCJITReplacement &M;
+  };
+
+private:
+
+  static ExecutionEngine *
+  createOrcMCJITReplacement(std::string *ErrorMsg,
+                            std::shared_ptr<MCJITMemoryManager> MemMgr,
+                            std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
+                            std::unique_ptr<TargetMachine> TM) {
+    return new OrcMCJITReplacement(std::move(MemMgr), std::move(Resolver),
+                                   std::move(TM));
+  }
+
+public:
+  static void Register() {
+    OrcMCJITReplacementCtor = createOrcMCJITReplacement;
+  }
+
+  OrcMCJITReplacement(
+                    std::shared_ptr<MCJITMemoryManager> MemMgr,
+                    std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver,
+                    std::unique_ptr<TargetMachine> TM)
+      : TM(std::move(TM)), MemMgr(*this, std::move(MemMgr)),
+        Resolver(*this), ClientResolver(std::move(ClientResolver)),
+        Mang(this->TM->getDataLayout()),
+        NotifyObjectLoaded(*this), NotifyFinalized(*this),
+        ObjectLayer(NotifyObjectLoaded, NotifyFinalized),
+        CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)),
+        LazyEmitLayer(CompileLayer) {
+    setDataLayout(this->TM->getDataLayout());
+  }
+
+  void addModule(std::unique_ptr<Module> M) override {
+
+    // If this module doesn't have a DataLayout attached then attach the
+    // default.
+    if (M->getDataLayout().isDefault())
+      M->setDataLayout(*getDataLayout());
+
+    Modules.push_back(std::move(M));
+    std::vector<Module *> Ms;
+    Ms.push_back(&*Modules.back());
+    LazyEmitLayer.addModuleSet(std::move(Ms), &MemMgr, &Resolver);
+  }
+
+  void addObjectFile(std::unique_ptr<object::ObjectFile> O) override {
+    std::vector<std::unique_ptr<object::ObjectFile>> Objs;
+    Objs.push_back(std::move(O));
+    ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver);
+  }
+
+  void addObjectFile(object::OwningBinary<object::ObjectFile> O) override {
+    std::unique_ptr<object::ObjectFile> Obj;
+    std::unique_ptr<MemoryBuffer> Buf;
+    std::tie(Obj, Buf) = O.takeBinary();
+    std::vector<std::unique_ptr<object::ObjectFile>> Objs;
+    Objs.push_back(std::move(Obj));
+    auto H =
+      ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver);
+
+    std::vector<std::unique_ptr<MemoryBuffer>> Bufs;
+    Bufs.push_back(std::move(Buf));
+    ObjectLayer.takeOwnershipOfBuffers(H, std::move(Bufs));
+  }
+
+  void addArchive(object::OwningBinary<object::Archive> A) override {
+    Archives.push_back(std::move(A));
+  }
+
+  uint64_t getSymbolAddress(StringRef Name) {
+    return findSymbol(Name).getAddress();
+  }
+
+  RuntimeDyld::SymbolInfo findSymbol(StringRef Name) {
+    return findMangledSymbol(Mangle(Name));
+  }
+
+  void finalizeObject() override {
+    // This is deprecated - Aim to remove in ExecutionEngine.
+    // REMOVE IF POSSIBLE - Doesn't make sense for New JIT.
+  }
+
+  void mapSectionAddress(const void *LocalAddress,
+                         uint64_t TargetAddress) override {
+    for (auto &P : UnfinalizedSections)
+      if (P.second.count(LocalAddress))
+        ObjectLayer.mapSectionAddress(P.first, LocalAddress, TargetAddress);
+  }
+
+  uint64_t getGlobalValueAddress(const std::string &Name) override {
+    return getSymbolAddress(Name);
+  }
+
+  uint64_t getFunctionAddress(const std::string &Name) override {
+    return getSymbolAddress(Name);
+  }
+
+  void *getPointerToFunction(Function *F) override {
+    uint64_t FAddr = getSymbolAddress(F->getName());
+    return reinterpret_cast<void *>(static_cast<uintptr_t>(FAddr));
+  }
+
+  void *getPointerToNamedFunction(StringRef Name,
+                                  bool AbortOnFailure = true) override {
+    uint64_t Addr = getSymbolAddress(Name);
+    if (!Addr && AbortOnFailure)
+      llvm_unreachable("Missing symbol!");
+    return reinterpret_cast<void *>(static_cast<uintptr_t>(Addr));
+  }
+
+  GenericValue runFunction(Function *F,
+                           const std::vector<GenericValue> &ArgValues) override;
+
+  void setObjectCache(ObjectCache *NewCache) override {
+    CompileLayer.setObjectCache(NewCache);
+  }
+
+private:
+
+  RuntimeDyld::SymbolInfo findMangledSymbol(StringRef Name) {
+    if (auto Sym = LazyEmitLayer.findSymbol(Name, false))
+      return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
+    if (auto Sym = ClientResolver->findSymbol(Name))
+      return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
+    if (auto Sym = scanArchives(Name))
+      return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
+
+    return nullptr;
+  }
+
+  JITSymbol scanArchives(StringRef Name) {
+    for (object::OwningBinary<object::Archive> &OB : Archives) {
+      object::Archive *A = OB.getBinary();
+      // Look for our symbols in each Archive
+      object::Archive::child_iterator ChildIt = A->findSym(Name);
+      if (ChildIt != A->child_end()) {
+        // FIXME: Support nested archives?
+        ErrorOr<std::unique_ptr<object::Binary>> ChildBinOrErr =
+            ChildIt->getAsBinary();
+        if (ChildBinOrErr.getError())
+          continue;
+        std::unique_ptr<object::Binary> &ChildBin = ChildBinOrErr.get();
+        if (ChildBin->isObject()) {
+          std::vector<std::unique_ptr<object::ObjectFile>> ObjSet;
+          ObjSet.push_back(std::unique_ptr<object::ObjectFile>(
+              static_cast<object::ObjectFile *>(ChildBin.release())));
+          ObjectLayer.addObjectSet(std::move(ObjSet), &MemMgr, &Resolver);
+          if (auto Sym = ObjectLayer.findSymbol(Name, true))
+            return Sym;
+        }
+      }
+    }
+    return nullptr;
+  }
+
+  class NotifyObjectLoadedT {
+  public:
+    typedef std::vector<std::unique_ptr<object::ObjectFile>> ObjListT;
+    typedef std::vector<std::unique_ptr<RuntimeDyld::LoadedObjectInfo>>
+        LoadedObjInfoListT;
+
+    NotifyObjectLoadedT(OrcMCJITReplacement &M) : M(M) {}
+
+    void operator()(ObjectLinkingLayerBase::ObjSetHandleT H,
+                    const ObjListT &Objects,
+                    const LoadedObjInfoListT &Infos) const {
+      M.UnfinalizedSections[H] = std::move(M.SectionsAllocatedSinceLastLoad);
+      M.SectionsAllocatedSinceLastLoad = SectionAddrSet();
+      assert(Objects.size() == Infos.size() &&
+             "Incorrect number of Infos for Objects.");
+      for (unsigned I = 0; I < Objects.size(); ++I)
+        M.MemMgr.notifyObjectLoaded(&M, *Objects[I]);
+    };
+
+  private:
+    OrcMCJITReplacement &M;
+  };
+
+  class NotifyFinalizedT {
+  public:
+    NotifyFinalizedT(OrcMCJITReplacement &M) : M(M) {}
+    void operator()(ObjectLinkingLayerBase::ObjSetHandleT H) {
+      M.UnfinalizedSections.erase(H);
+    }
+
+  private:
+    OrcMCJITReplacement &M;
+  };
+
+  std::string Mangle(StringRef Name) {
+    std::string MangledName;
+    {
+      raw_string_ostream MangledNameStream(MangledName);
+      Mang.getNameWithPrefix(MangledNameStream, Name);
+    }
+    return MangledName;
+  }
+
+  typedef ObjectLinkingLayer<NotifyObjectLoadedT> ObjectLayerT;
+  typedef IRCompileLayer<ObjectLayerT> CompileLayerT;
+  typedef LazyEmittingLayer<CompileLayerT> LazyEmitLayerT;
+
+  std::unique_ptr<TargetMachine> TM;
+  MCJITReplacementMemMgr MemMgr;
+  LinkingResolver Resolver;
+  std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver;
+  Mangler Mang;
+
+  NotifyObjectLoadedT NotifyObjectLoaded;
+  NotifyFinalizedT NotifyFinalized;
+
+  ObjectLayerT ObjectLayer;
+  CompileLayerT CompileLayer;
+  LazyEmitLayerT LazyEmitLayer;
+
+  // We need to store ObjLayerT::ObjSetHandles for each of the object sets
+  // that have been emitted but not yet finalized so that we can forward the
+  // mapSectionAddress calls appropriately.
+  typedef std::set<const void *> SectionAddrSet;
+  struct ObjSetHandleCompare {
+    bool operator()(ObjectLayerT::ObjSetHandleT H1,
+                    ObjectLayerT::ObjSetHandleT H2) const {
+      return &*H1 < &*H2;
+    }
+  };
+  SectionAddrSet SectionsAllocatedSinceLastLoad;
+  std::map<ObjectLayerT::ObjSetHandleT, SectionAddrSet, ObjSetHandleCompare>
+      UnfinalizedSections;
+
+  std::vector<object::OwningBinary<object::Archive>> Archives;
+};
+
+} // End namespace orc.
+} // End namespace llvm.
+
+#endif // LLVM_LIB_EXECUTIONENGINE_ORC_MCJITREPLACEMENT_H
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
new file mode 100644
index 0000000..258868a
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
@@ -0,0 +1,138 @@
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include <array>
+
+using namespace llvm::orc;
+
+namespace {
+
+uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM,
+                                TargetAddress CallbackID) {
+  return JCBM->executeCompileCallback(CallbackID);
+}
+
+}
+
+namespace llvm {
+namespace orc {
+
+const char* OrcX86_64::ResolverBlockName = "orc_resolver_block";
+
+void OrcX86_64::insertResolverBlock(
+    Module &M, JITCompileCallbackManagerBase &JCBM) {
+
+  // Trampoline code-sequence length, used to get trampoline address from return
+  // address.
+  const unsigned X86_64_TrampolineLength = 6;
+
+  // List of x86-64 GPRs to save. Note - RBP saved separately below.
+  std::array<const char *, 14> GPRs = {{
+      "rax", "rbx", "rcx", "rdx",
+      "rsi", "rdi", "r8", "r9",
+      "r10", "r11", "r12", "r13",
+      "r14", "r15"
+    }};
+
+  // Address of the executeCompileCallback function.
+  uint64_t CallbackAddr =
+      static_cast<uint64_t>(
+        reinterpret_cast<uintptr_t>(executeCompileCallback));
+
+  std::ostringstream AsmStream;
+  Triple TT(M.getTargetTriple());
+
+  // Switch to text section.
+  if (TT.getOS() == Triple::Darwin)
+    AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
+              << ".align 4, 0x90\n";
+  else
+    AsmStream << ".text\n"
+              << ".align 16, 0x90\n";
+
+  // Bake in a pointer to the callback manager immediately before the
+  // start of the resolver function.
+  AsmStream << "jit_callback_manager_addr:\n"
+            << "  .quad " << &JCBM << "\n";
+
+  // Start the resolver function.
+  AsmStream << ResolverBlockName << ":\n"
+            << "  pushq     %rbp\n"
+            << "  movq      %rsp, %rbp\n";
+
+  // Store the GPRs.
+  for (const auto &GPR : GPRs)
+    AsmStream << "  pushq     %" << GPR << "\n";
+
+  // Store floating-point state with FXSAVE.
+  // Note: We need to keep the stack 16-byte aligned, so if we've emitted an odd
+  //       number of 64-bit pushes so far (GPRs.size() plus 1 for RBP) then add
+  //       an extra 64 bits of padding to the FXSave area.
+  unsigned Padding = (GPRs.size() + 1) % 2 ? 8 : 0;
+  unsigned FXSaveSize = 512 + Padding;
+  AsmStream << "  subq      $" << FXSaveSize << ", %rsp\n"
+            << "  fxsave64  (%rsp)\n"
+
+  // Load callback manager address, compute trampoline address, call JIT.
+            << "  lea       jit_callback_manager_addr(%rip), %rdi\n"
+            << "  movq      (%rdi), %rdi\n"
+            << "  movq      0x8(%rbp), %rsi\n"
+            << "  subq      $" << X86_64_TrampolineLength << ", %rsi\n"
+            << "  movabsq   $" << CallbackAddr << ", %rax\n"
+            << "  callq     *%rax\n"
+
+  // Replace the return to the trampoline with the return address of the
+  // compiled function body.
+            << "  movq      %rax, 0x8(%rbp)\n"
+
+  // Restore the floating point state.
+            << "  fxrstor64 (%rsp)\n"
+            << "  addq      $" << FXSaveSize << ", %rsp\n";
+
+  for (const auto &GPR : make_range(GPRs.rbegin(), GPRs.rend()))
+    AsmStream << "  popq      %" << GPR << "\n";
+
+  // Restore original RBP and return to compiled function body.
+  AsmStream << "  popq      %rbp\n"
+            << "  retq\n";
+
+  M.appendModuleInlineAsm(AsmStream.str());
+}
+
+OrcX86_64::LabelNameFtor
+OrcX86_64::insertCompileCallbackTrampolines(Module &M,
+                                            TargetAddress ResolverBlockAddr,
+                                            unsigned NumCalls,
+                                            unsigned StartIndex) {
+  const char *ResolverBlockPtrName = "Lorc_resolve_block_addr";
+
+  std::ostringstream AsmStream;
+  Triple TT(M.getTargetTriple());
+
+  if (TT.getOS() == Triple::Darwin)
+    AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
+              << ".align 4, 0x90\n";
+  else
+    AsmStream << ".text\n"
+              << ".align 16, 0x90\n";
+
+  AsmStream << ResolverBlockPtrName << ":\n"
+            << "  .quad " << ResolverBlockAddr << "\n";
+
+  auto GetLabelName =
+    [=](unsigned I) {
+      std::ostringstream LabelStream;
+      LabelStream << "orc_jcc_" << (StartIndex + I);
+      return LabelStream.str();
+  };
+
+  for (unsigned I = 0; I < NumCalls; ++I)
+    AsmStream << GetLabelName(I) << ":\n"
+              << "  callq *" << ResolverBlockPtrName << "(%rip)\n";
+
+  M.appendModuleInlineAsm(AsmStream.str());
+
+  return GetLabelName;
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 304014e..24a3ec1 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -13,10 +13,12 @@
 
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "RuntimeDyldCheckerImpl.h"
+#include "RuntimeDyldCOFF.h"
 #include "RuntimeDyldELF.h"
 #include "RuntimeDyldImpl.h"
 #include "RuntimeDyldMachO.h"
 #include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/COFF.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MutexGuard.h"
 
@@ -55,7 +57,8 @@ static void dumpSectionMemory(const SectionEntry &S, StringRef State) {
   unsigned BytesRemaining = S.Size;
 
   if (StartPadding) {
-    dbgs() << "\n" << format("0x%016" PRIx64, LoadAddr & ~(ColsPerRow - 1)) << ":";
+    dbgs() << "\n" << format("0x%016" PRIx64,
+                             LoadAddr & ~(uint64_t)(ColsPerRow - 1)) << ":";
     while (StartPadding--)
       dbgs() << "   ";
   }
@@ -90,7 +93,7 @@ void RuntimeDyldImpl::resolveRelocations() {
     // entry provides the section to which the relocation will be applied.
     uint64_t Addr = Sections[i].LoadAddress;
     DEBUG(dbgs() << "Resolving relocations Section #" << i << "\t"
-                 << format("0x%x", Addr) << "\n");
+                 << format("%p", (uintptr_t)Addr) << "\n");
     DEBUG(dumpSectionMemory(Sections[i], "before relocations"));
     resolveRelocationList(Relocations[i], Addr);
     DEBUG(dumpSectionMemory(Sections[i], "after relocations"));
@@ -149,68 +152,65 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
 
   // Compute the memory size required to load all sections to be loaded
   // and pass this information to the memory manager
-  if (MemMgr->needsToReserveAllocationSpace()) {
+  if (MemMgr.needsToReserveAllocationSpace()) {
     uint64_t CodeSize = 0, DataSizeRO = 0, DataSizeRW = 0;
     computeTotalAllocSize(Obj, CodeSize, DataSizeRO, DataSizeRW);
-    MemMgr->reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
+    MemMgr.reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
   }
 
   // Used sections from the object file
   ObjSectionToIDMap LocalSections;
 
   // Common symbols requiring allocation, with their sizes and alignments
-  CommonSymbolMap CommonSymbols;
-  // Maximum required total memory to allocate all common symbols
-  uint64_t CommonSize = 0;
+  CommonSymbolList CommonSymbols;
 
   // Parse symbols
   DEBUG(dbgs() << "Parse symbols:\n");
   for (symbol_iterator I = Obj.symbol_begin(), E = Obj.symbol_end(); I != E;
        ++I) {
-    object::SymbolRef::Type SymType;
-    StringRef Name;
-    Check(I->getType(SymType));
-    Check(I->getName(Name));
-
     uint32_t Flags = I->getFlags();
 
     bool IsCommon = Flags & SymbolRef::SF_Common;
-    if (IsCommon) {
-      // Add the common symbols to a list.  We'll allocate them all below.
-      if (!GlobalSymbolTable.count(Name)) {
-        uint32_t Align;
-        Check(I->getAlignment(Align));
-        uint64_t Size = 0;
-        Check(I->getSize(Size));
-        CommonSize += Size + Align;
-        CommonSymbols[*I] = CommonSymbolInfo(Size, Align);
-      }
-    } else {
+    if (IsCommon)
+      CommonSymbols.push_back(*I);
+    else {
+      object::SymbolRef::Type SymType;
+      Check(I->getType(SymType));
+
       if (SymType == object::SymbolRef::ST_Function ||
           SymType == object::SymbolRef::ST_Data ||
           SymType == object::SymbolRef::ST_Unknown) {
+
+        StringRef Name;
         uint64_t SectOffset;
-        StringRef SectionData;
-        section_iterator SI = Obj.section_end();
+        Check(I->getName(Name));
         Check(getOffset(*I, SectOffset));
+        section_iterator SI = Obj.section_end();
         Check(I->getSection(SI));
         if (SI == Obj.section_end())
           continue;
+        StringRef SectionData;
         Check(SI->getContents(SectionData));
         bool IsCode = SI->isText();
         unsigned SectionID =
             findOrEmitSection(Obj, *SI, IsCode, LocalSections);
-        DEBUG(dbgs() << "\tOffset: " << format("%p", (uintptr_t)SectOffset)
-                     << " flags: " << Flags << " SID: " << SectionID);
-        GlobalSymbolTable[Name] = SymbolLoc(SectionID, SectOffset);
+        DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name
+                     << " SID: " << SectionID << " Offset: "
+                     << format("%p", (uintptr_t)SectOffset)
+                     << " flags: " << Flags << "\n");
+        JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None;
+        if (Flags & SymbolRef::SF_Weak)
+          RTDyldSymFlags |= JITSymbolFlags::Weak;
+        if (Flags & SymbolRef::SF_Exported)
+          RTDyldSymFlags |= JITSymbolFlags::Exported;
+        GlobalSymbolTable[Name] =
+          SymbolTableEntry(SectionID, SectOffset, RTDyldSymFlags);
       }
     }
-    DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n");
   }
 
   // Allocate common symbols
-  if (CommonSize != 0)
-    emitCommonSymbols(Obj, CommonSymbols, CommonSize, GlobalSymbolTable);
+  emitCommonSymbols(Obj, CommonSymbols);
 
   // Parse and process relocations
   DEBUG(dbgs() << "Parse relocations:\n");
@@ -220,6 +220,9 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
     StubMap Stubs;
     section_iterator RelocatedSection = SI->getRelocatedSection();
 
+    if (RelocatedSection == SE)
+      continue;
+
     relocation_iterator I = SI->relocation_begin();
     relocation_iterator E = SI->relocation_end();
 
@@ -267,6 +270,20 @@ static bool isRequiredForExecution(const SectionRef &Section) {
   const ObjectFile *Obj = Section.getObject();
   if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj))
     return ELFObj->getSectionFlags(Section) & ELF::SHF_ALLOC;
+  if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj)) {
+    const coff_section *CoffSection = COFFObj->getCOFFSection(Section);
+    // Avoid loading zero-sized COFF sections.
+    // In PE files, VirtualSize gives the section size, and SizeOfRawData
+    // may be zero for sections with content. In Obj files, SizeOfRawData 
+    // gives the section size, and VirtualSize is always zero. Hence
+    // the need to check for both cases below.
+    bool HasContent = (CoffSection->VirtualSize > 0) 
+      || (CoffSection->SizeOfRawData > 0);
+    bool IsDiscardable = CoffSection->Characteristics &
+      (COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_LNK_INFO);
+    return HasContent && !IsDiscardable;
+  }
+  
   assert(isa<MachOObjectFile>(Obj));
   return true;
  }
@@ -276,6 +293,15 @@ static bool isReadOnlyData(const SectionRef &Section) {
   if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj))
     return !(ELFObj->getSectionFlags(Section) &
              (ELF::SHF_WRITE | ELF::SHF_EXECINSTR));
+  if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj))
+    return ((COFFObj->getCOFFSection(Section)->Characteristics &
+             (COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+             | COFF::IMAGE_SCN_MEM_READ
+             | COFF::IMAGE_SCN_MEM_WRITE))
+             ==
+             (COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+             | COFF::IMAGE_SCN_MEM_READ));
+
   assert(isa<MachOObjectFile>(Obj));
   return false;
 }
@@ -284,6 +310,9 @@ static bool isZeroInit(const SectionRef &Section) {
   const ObjectFile *Obj = Section.getObject();
   if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj))
     return ELFObj->getSectionType(Section) == ELF::SHT_NOBITS;
+  if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj))
+    return COFFObj->getCOFFSection(Section)->Characteristics &
+            COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
 
   auto *MachO = cast<MachOObjectFile>(Obj);
   unsigned SectionType = MachO->getSectionType(Section);
@@ -332,19 +361,20 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
       if (Name == ".eh_frame")
         SectionSize += 4;
 
-      if (SectionSize > 0) {
-        // save the total size of the section
-        if (IsCode) {
-          CodeSectionSizes.push_back(SectionSize);
-        } else if (IsReadOnly) {
-          ROSectionSizes.push_back(SectionSize);
-        } else {
-          RWSectionSizes.push_back(SectionSize);
-        }
-        // update the max alignment
-        if (Alignment > MaxAlignment) {
-          MaxAlignment = Alignment;
-        }
+      if (!SectionSize)
+        SectionSize = 1;
+
+      if (IsCode) {
+        CodeSectionSizes.push_back(SectionSize);
+      } else if (IsReadOnly) {
+        ROSectionSizes.push_back(SectionSize);
+      } else {
+        RWSectionSizes.push_back(SectionSize);
+      }
+
+      // update the max alignment
+      if (Alignment > MaxAlignment) {
+        MaxAlignment = Alignment;
       }
     }
   }
@@ -442,38 +472,73 @@ void RuntimeDyldImpl::writeBytesUnaligned(uint64_t Value, uint8_t *Dst,
 }
 
 void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
-                                        const CommonSymbolMap &CommonSymbols,
-                                        uint64_t TotalSize,
-                                        SymbolTableMap &SymbolTable) {
+                                        CommonSymbolList &CommonSymbols) {
+  if (CommonSymbols.empty())
+    return;
+
+  uint64_t CommonSize = 0;
+  CommonSymbolList SymbolsToAllocate;
+
+  DEBUG(dbgs() << "Processing common symbols...\n");
+
+  for (const auto &Sym : CommonSymbols) {
+    StringRef Name;
+    Check(Sym.getName(Name));
+
+    // Skip common symbols already elsewhere.
+    if (GlobalSymbolTable.count(Name) ||
+        Resolver.findSymbolInLogicalDylib(Name)) {
+      DEBUG(dbgs() << "\tSkipping already emitted common symbol '" << Name
+                   << "'\n");
+      continue;
+    }
+
+    uint32_t Align = 0;
+    uint64_t Size = 0;
+    Check(Sym.getAlignment(Align));
+    Check(Sym.getSize(Size));
+
+    CommonSize += Align + Size;
+    SymbolsToAllocate.push_back(Sym);
+  }
+
   // Allocate memory for the section
   unsigned SectionID = Sections.size();
-  uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, sizeof(void *),
-                                              SectionID, StringRef(), false);
+  uint8_t *Addr = MemMgr.allocateDataSection(CommonSize, sizeof(void *),
+                                             SectionID, StringRef(), false);
   if (!Addr)
     report_fatal_error("Unable to allocate memory for common symbols!");
   uint64_t Offset = 0;
-  Sections.push_back(SectionEntry("<common symbols>", Addr, TotalSize, 0));
-  memset(Addr, 0, TotalSize);
+  Sections.push_back(SectionEntry("<common symbols>", Addr, CommonSize, 0));
+  memset(Addr, 0, CommonSize);
 
   DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID << " new addr: "
-               << format("%p", Addr) << " DataSize: " << TotalSize << "\n");
+               << format("%p", Addr) << " DataSize: " << CommonSize << "\n");
 
   // Assign the address of each symbol
-  for (CommonSymbolMap::const_iterator it = CommonSymbols.begin(),
-       itEnd = CommonSymbols.end(); it != itEnd; ++it) {
-    uint64_t Size = it->second.first;
-    uint64_t Align = it->second.second;
+  for (auto &Sym : SymbolsToAllocate) {
+    uint32_t Align;
+    uint64_t Size;
     StringRef Name;
-    it->first.getName(Name);
+    Check(Sym.getAlignment(Align));
+    Check(Sym.getSize(Size));
+    Check(Sym.getName(Name));
     if (Align) {
       // This symbol has an alignment requirement.
       uint64_t AlignOffset = OffsetToAlignment((uint64_t)Addr, Align);
       Addr += AlignOffset;
       Offset += AlignOffset;
-      DEBUG(dbgs() << "Allocating common symbol " << Name << " address "
-                   << format("%p\n", Addr));
     }
-    SymbolTable[Name.data()] = SymbolLoc(SectionID, Offset);
+    uint32_t Flags = Sym.getFlags();
+    JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None;
+    if (Flags & SymbolRef::SF_Weak)
+      RTDyldSymFlags |= JITSymbolFlags::Weak;
+    if (Flags & SymbolRef::SF_Exported)
+      RTDyldSymFlags |= JITSymbolFlags::Exported;
+    DEBUG(dbgs() << "Allocating common symbol " << Name << " address "
+                 << format("%p", Addr) << "\n");
+    GlobalSymbolTable[Name] =
+      SymbolTableEntry(SectionID, Offset, RTDyldSymFlags);
     Offset += Size;
     Addr += Size;
   }
@@ -483,7 +548,6 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
                                       const SectionRef &Section, bool IsCode) {
 
   StringRef data;
-  Check(Section.getContents(data));
   uint64_t Alignment64 = Section.getAlignment();
 
   unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
@@ -510,21 +574,26 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
   uint8_t *Addr;
   const char *pData = nullptr;
 
+  // In either case, set the location of the unrelocated section in memory,
+  // since we still process relocations for it even if we're not applying them.
+  Check(Section.getContents(data));
+  // Virtual sections have no data in the object image, so leave pData = 0
+  if (!IsVirtual)
+    pData = data.data();
+
   // Some sections, such as debug info, don't need to be loaded for execution.
   // Leave those where they are.
   if (IsRequired) {
     Allocate = DataSize + PaddingSize + StubBufSize;
-    Addr = IsCode ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID,
-                                                Name)
-                  : MemMgr->allocateDataSection(Allocate, Alignment, SectionID,
-                                                Name, IsReadOnly);
+    if (!Allocate)
+      Allocate = 1;
+    Addr = IsCode ? MemMgr.allocateCodeSection(Allocate, Alignment, SectionID,
+                                               Name)
+                  : MemMgr.allocateDataSection(Allocate, Alignment, SectionID,
+                                               Name, IsReadOnly);
     if (!Addr)
       report_fatal_error("Unable to allocate section memory!");
 
-    // Virtual sections have no data in the object image, so leave pData = 0
-    if (!IsVirtual)
-      pData = data.data();
-
     // Zero-initialize or copy the data from the image
     if (IsZeroInit || IsVirtual)
       memset(Addr, 0, DataSize);
@@ -589,14 +658,15 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,
   // Relocation by symbol.  If the symbol is found in the global symbol table,
   // create an appropriate section relocation.  Otherwise, add it to
   // ExternalSymbolRelocations.
-  SymbolTableMap::const_iterator Loc = GlobalSymbolTable.find(SymbolName);
+  RTDyldSymbolTable::const_iterator Loc = GlobalSymbolTable.find(SymbolName);
   if (Loc == GlobalSymbolTable.end()) {
     ExternalSymbolRelocations[SymbolName].push_back(RE);
   } else {
     // Copy the RE since we want to modify its addend.
     RelocationEntry RECopy = RE;
-    RECopy.Addend += Loc->second.second;
-    Relocations[Loc->second.first].push_back(RECopy);
+    const auto &SymInfo = Loc->second;
+    RECopy.Addend += SymInfo.getOffset();
+    Relocations[SymInfo.getSectionID()].push_back(RECopy);
   }
 }
 
@@ -721,11 +791,11 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
       resolveRelocationList(Relocs, 0);
     } else {
       uint64_t Addr = 0;
-      SymbolTableMap::const_iterator Loc = GlobalSymbolTable.find(Name);
+      RTDyldSymbolTable::const_iterator Loc = GlobalSymbolTable.find(Name);
       if (Loc == GlobalSymbolTable.end()) {
-        // This is an external symbol, try to get its address from
-        // MemoryManager.
-        Addr = MemMgr->getSymbolAddress(Name.data());
+        // This is an external symbol, try to get its address from the symbol
+        // resolver.
+        Addr = Resolver.findSymbol(Name.data()).getAddress();
         // The call to getSymbolAddress may have caused additional modules to
         // be loaded, which may have added new entries to the
         // ExternalSymbolRelocations map.  Consquently, we need to update our
@@ -736,8 +806,9 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
       } else {
         // We found the symbol in our global table.  It was probably in a
         // Module that we loaded previously.
-        SymbolLoc SymLoc = Loc->second;
-        Addr = getSectionLoadAddress(SymLoc.first) + SymLoc.second;
+        const auto &SymInfo = Loc->second;
+        Addr = getSectionLoadAddress(SymInfo.getSectionID()) +
+               SymInfo.getOffset();
       }
 
       // FIXME: Implement error handling that doesn't kill the host program!
@@ -745,7 +816,6 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
         report_fatal_error("Program used external function '" + Name +
                            "' which could not be resolved!");
 
-      updateGOTEntries(Name, Addr);
       DEBUG(dbgs() << "Resolving relocations Name: " << Name << "\t"
                    << format("0x%lx", Addr) << "\n");
       // This list may have been updated when we called getSymbolAddress, so
@@ -770,7 +840,12 @@ uint64_t RuntimeDyld::LoadedObjectInfo::getSectionLoadAddress(
   return 0;
 }
 
-RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
+void RuntimeDyld::MemoryManager::anchor() {}
+void RuntimeDyld::SymbolResolver::anchor() {}
+
+RuntimeDyld::RuntimeDyld(RuntimeDyld::MemoryManager &MemMgr,
+                         RuntimeDyld::SymbolResolver &Resolver)
+    : MemMgr(MemMgr), Resolver(Resolver) {
   // FIXME: There's a potential issue lurking here if a single instance of
   // RuntimeDyld is used to load multiple objects.  The current implementation
   // associates a single memory manager with a RuntimeDyld instance.  Even
@@ -778,26 +853,40 @@ RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
   // they share a single memory manager.  This can become a problem when page
   // permissions are applied.
   Dyld = nullptr;
-  MM = mm;
   ProcessAllSections = false;
   Checker = nullptr;
 }
 
 RuntimeDyld::~RuntimeDyld() {}
 
+static std::unique_ptr<RuntimeDyldCOFF>
+createRuntimeDyldCOFF(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
+                      RuntimeDyld::SymbolResolver &Resolver,
+                      bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) {
+  std::unique_ptr<RuntimeDyldCOFF> Dyld =
+    RuntimeDyldCOFF::create(Arch, MM, Resolver);
+  Dyld->setProcessAllSections(ProcessAllSections);
+  Dyld->setRuntimeDyldChecker(Checker);
+  return Dyld;
+}
+
 static std::unique_ptr<RuntimeDyldELF>
-createRuntimeDyldELF(RTDyldMemoryManager *MM, bool ProcessAllSections,
-                     RuntimeDyldCheckerImpl *Checker) {
-  std::unique_ptr<RuntimeDyldELF> Dyld(new RuntimeDyldELF(MM));
+createRuntimeDyldELF(RuntimeDyld::MemoryManager &MM,
+                     RuntimeDyld::SymbolResolver &Resolver,
+                     bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) {
+  std::unique_ptr<RuntimeDyldELF> Dyld(new RuntimeDyldELF(MM, Resolver));
   Dyld->setProcessAllSections(ProcessAllSections);
   Dyld->setRuntimeDyldChecker(Checker);
   return Dyld;
 }
 
 static std::unique_ptr<RuntimeDyldMachO>
-createRuntimeDyldMachO(Triple::ArchType Arch, RTDyldMemoryManager *MM,
-                       bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) {
-  std::unique_ptr<RuntimeDyldMachO> Dyld(RuntimeDyldMachO::create(Arch, MM));
+createRuntimeDyldMachO(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
+                       RuntimeDyld::SymbolResolver &Resolver,
+                       bool ProcessAllSections,
+                       RuntimeDyldCheckerImpl *Checker) {
+  std::unique_ptr<RuntimeDyldMachO> Dyld =
+    RuntimeDyldMachO::create(Arch, MM, Resolver);
   Dyld->setProcessAllSections(ProcessAllSections);
   Dyld->setRuntimeDyldChecker(Checker);
   return Dyld;
@@ -807,10 +896,14 @@ std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
 RuntimeDyld::loadObject(const ObjectFile &Obj) {
   if (!Dyld) {
     if (Obj.isELF())
-      Dyld = createRuntimeDyldELF(MM, ProcessAllSections, Checker);
+      Dyld = createRuntimeDyldELF(MemMgr, Resolver, ProcessAllSections, Checker);
     else if (Obj.isMachO())
       Dyld = createRuntimeDyldMachO(
-               static_cast<Triple::ArchType>(Obj.getArch()), MM,
+               static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
+               ProcessAllSections, Checker);
+    else if (Obj.isCOFF())
+      Dyld = createRuntimeDyldCOFF(
+               static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
                ProcessAllSections, Checker);
     else
       report_fatal_error("Incompatible object format!");
@@ -822,16 +915,16 @@ RuntimeDyld::loadObject(const ObjectFile &Obj) {
   return Dyld->loadObject(Obj);
 }
 
-void *RuntimeDyld::getSymbolAddress(StringRef Name) const {
+void *RuntimeDyld::getSymbolLocalAddress(StringRef Name) const {
   if (!Dyld)
     return nullptr;
-  return Dyld->getSymbolAddress(Name);
+  return Dyld->getSymbolLocalAddress(Name);
 }
 
-uint64_t RuntimeDyld::getSymbolLoadAddress(StringRef Name) const {
+RuntimeDyld::SymbolInfo RuntimeDyld::getSymbol(StringRef Name) const {
   if (!Dyld)
-    return 0;
-  return Dyld->getSymbolLoadAddress(Name);
+    return nullptr;
+  return Dyld->getSymbol(Name);
 }
 
 void RuntimeDyld::resolveRelocations() { Dyld->resolveRelocations(); }
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
new file mode 100644
index 0000000..c8d3d22
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -0,0 +1,88 @@
+//===-- RuntimeDyldCOFF.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of COFF support for the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RuntimeDyldCOFF.h"
+#include "Targets/RuntimeDyldCOFFX86_64.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/ObjectFile.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+#define DEBUG_TYPE "dyld"
+
+namespace {
+
+class LoadedCOFFObjectInfo
+    : public RuntimeDyld::LoadedObjectInfoHelper<LoadedCOFFObjectInfo> {
+public:
+  LoadedCOFFObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
+                       unsigned EndIdx)
+      : LoadedObjectInfoHelper(RTDyld, BeginIdx, EndIdx) {}
+
+  OwningBinary<ObjectFile>
+  getObjectForDebug(const ObjectFile &Obj) const override {
+    return OwningBinary<ObjectFile>();
+  }
+};
+}
+
+namespace llvm {
+
+std::unique_ptr<RuntimeDyldCOFF>
+llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch,
+                              RuntimeDyld::MemoryManager &MemMgr,
+                              RuntimeDyld::SymbolResolver &Resolver) {
+  switch (Arch) {
+  default:
+    llvm_unreachable("Unsupported target for RuntimeDyldCOFF.");
+    break;
+  case Triple::x86_64:
+    return make_unique<RuntimeDyldCOFFX86_64>(MemMgr, Resolver);
+  }
+}
+
+std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
+RuntimeDyldCOFF::loadObject(const object::ObjectFile &O) {
+  unsigned SectionStartIdx, SectionEndIdx;
+  std::tie(SectionStartIdx, SectionEndIdx) = loadObjectImpl(O);
+  return llvm::make_unique<LoadedCOFFObjectInfo>(*this, SectionStartIdx,
+                                                 SectionEndIdx);
+}
+
+uint64_t RuntimeDyldCOFF::getSymbolOffset(const SymbolRef &Sym) {
+  uint64_t Address;
+  if (Sym.getAddress(Address))
+    return UnknownAddressOrSize;
+
+  if (Address == UnknownAddressOrSize)
+    return UnknownAddressOrSize;
+
+  const ObjectFile *Obj = Sym.getObject();
+  section_iterator SecI(Obj->section_end());
+  if (Sym.getSection(SecI))
+    return UnknownAddressOrSize;
+
+  if (SecI == Obj->section_end())
+    return UnknownAddressOrSize;
+
+  uint64_t SectionAddress = SecI->getAddress();
+  return Address - SectionAddress;
+}
+
+bool RuntimeDyldCOFF::isCompatibleFile(const object::ObjectFile &Obj) const {
+  return Obj.isCOFF();
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
new file mode 100644
index 0000000..32b8fa2
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
@@ -0,0 +1,50 @@
+//===-- RuntimeDyldCOFF.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// COFF support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_COFF_H
+#define LLVM_RUNTIME_DYLD_COFF_H
+
+#include "RuntimeDyldImpl.h"
+#include "llvm/ADT/DenseMap.h"
+
+#define DEBUG_TYPE "dyld"
+
+using namespace llvm;
+
+namespace llvm {
+
+// Common base class for COFF dynamic linker support.
+// Concrete subclasses for each target can be found in ./Targets.
+class RuntimeDyldCOFF : public RuntimeDyldImpl {
+
+public:
+  std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
+  loadObject(const object::ObjectFile &Obj) override;
+  bool isCompatibleFile(const object::ObjectFile &Obj) const override;
+
+  static std::unique_ptr<RuntimeDyldCOFF>
+  create(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MemMgr,
+         RuntimeDyld::SymbolResolver &Resolver);
+
+protected:
+  RuntimeDyldCOFF(RuntimeDyld::MemoryManager &MemMgr,
+                  RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldImpl(MemMgr, Resolver) {}
+  uint64_t getSymbolOffset(const SymbolRef &Sym);
+};
+
+} // end namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index f3e5c77..957571b 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -260,9 +260,7 @@ private:
                    << "'. Instruction has only "
                    << format("%i", Inst.getNumOperands())
                    << " operands.\nInstruction is:\n  ";
-      Inst.dump_pretty(ErrMsgStream,
-                       Checker.Disassembler->getContext().getAsmInfo(),
-                       Checker.InstPrinter);
+      Inst.dump_pretty(ErrMsgStream, Checker.InstPrinter);
       return std::make_pair(EvalResult(ErrMsgStream.str()), "");
     }
 
@@ -272,9 +270,7 @@ private:
       raw_string_ostream ErrMsgStream(ErrMsg);
       ErrMsgStream << "Operand '" << format("%i", OpIdx) << "' of instruction '"
                    << Symbol << "' is not an immediate.\nInstruction is:\n  ";
-      Inst.dump_pretty(ErrMsgStream,
-                       Checker.Disassembler->getContext().getAsmInfo(),
-                       Checker.InstPrinter);
+      Inst.dump_pretty(ErrMsgStream, Checker.InstPrinter);
 
       return std::make_pair(EvalResult(ErrMsgStream.str()), "");
     }
@@ -314,7 +310,7 @@ private:
           "");
 
     uint64_t SymbolAddr = PCtx.IsInsideLoad
-                              ? Checker.getSymbolLinkerAddr(Symbol)
+                              ? Checker.getSymbolLocalAddr(Symbol)
                               : Checker.getSymbolRemoteAddr(Symbol);
     uint64_t NextPC = SymbolAddr + InstSize;
 
@@ -441,7 +437,7 @@ private:
     // The value for the symbol depends on the context we're evaluating in:
     // Inside a load this is the address in the linker's memory, outside a
     // load it's the address in the target processes memory.
-    uint64_t Value = PCtx.IsInsideLoad ? Checker.getSymbolLinkerAddr(Symbol)
+    uint64_t Value = PCtx.IsInsideLoad ? Checker.getSymbolLocalAddr(Symbol)
                                        : Checker.getSymbolRemoteAddr(Symbol);
 
     // Looks like a plain symbol reference.
@@ -731,18 +727,18 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix,
 }
 
 bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const {
-  return getRTDyld().getSymbolAddress(Symbol) != nullptr;
+  return getRTDyld().getSymbolLocalAddress(Symbol) != nullptr;
 }
 
-uint64_t RuntimeDyldCheckerImpl::getSymbolLinkerAddr(StringRef Symbol) const {
+uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const {
   return static_cast<uint64_t>(
-      reinterpret_cast<uintptr_t>(getRTDyld().getSymbolAddress(Symbol)));
+      reinterpret_cast<uintptr_t>(getRTDyld().getSymbolLocalAddress(Symbol)));
 }
 
 uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const {
-  if (uint64_t InternalSymbolAddr = getRTDyld().getSymbolLoadAddress(Symbol))
-      return InternalSymbolAddr;
-  return getRTDyld().MemMgr->getSymbolAddress(Symbol);
+  if (auto InternalSymbol = getRTDyld().getSymbol(Symbol))
+    return InternalSymbol.getAddress();
+  return getRTDyld().Resolver.findSymbol(Symbol).getAddress();
 }
 
 uint64_t RuntimeDyldCheckerImpl::readMemoryAtAddr(uint64_t SrcAddr,
@@ -850,14 +846,16 @@ std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getStubAddrFor(
 
 StringRef
 RuntimeDyldCheckerImpl::getSubsectionStartingAt(StringRef Name) const {
-  RuntimeDyldImpl::SymbolTableMap::const_iterator pos =
+  RTDyldSymbolTable::const_iterator pos =
       getRTDyld().GlobalSymbolTable.find(Name);
   if (pos == getRTDyld().GlobalSymbolTable.end())
     return StringRef();
-  RuntimeDyldImpl::SymbolLoc Loc = pos->second;
-  uint8_t *SectionAddr = getRTDyld().getSectionAddress(Loc.first);
-  return StringRef(reinterpret_cast<const char *>(SectionAddr) + Loc.second,
-                   getRTDyld().Sections[Loc.first].Size - Loc.second);
+  const auto &SymInfo = pos->second;
+  uint8_t *SectionAddr = getRTDyld().getSectionAddress(SymInfo.getSectionID());
+  return StringRef(reinterpret_cast<const char *>(SectionAddr) +
+                     SymInfo.getOffset(),
+                   getRTDyld().Sections[SymInfo.getSectionID()].Size -
+                     SymInfo.getOffset());
 }
 
 void RuntimeDyldCheckerImpl::registerSection(
@@ -887,9 +885,10 @@ void RuntimeDyldCheckerImpl::registerStubMap(
       // If this is a (Section, Offset) pair, do a reverse lookup in the
       // global symbol table to find the name.
       for (auto &GSTEntry : getRTDyld().GlobalSymbolTable) {
-        if (GSTEntry.second.first == StubMapEntry.first.SectionID &&
-            GSTEntry.second.second ==
-                static_cast<uint64_t>(StubMapEntry.first.Offset)) {
+        const auto &SymInfo = GSTEntry.second;
+        if (SymInfo.getSectionID() == StubMapEntry.first.SectionID &&
+            SymInfo.getOffset() ==
+              static_cast<uint64_t>(StubMapEntry.first.Offset)) {
           SymbolName = GSTEntry.first();
           break;
         }
@@ -930,6 +929,6 @@ bool RuntimeDyldChecker::checkAllRulesInBuffer(StringRef RulePrefix,
 
 std::pair<uint64_t, std::string>
 RuntimeDyldChecker::getSectionAddr(StringRef FileName, StringRef SectionName,
-                                   bool LinkerAddress) {
-  return Impl->getSectionAddr(FileName, SectionName, LinkerAddress);
+                                   bool LocalAddress) {
+  return Impl->getSectionAddr(FileName, SectionName, LocalAddress);
 }
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
index de20c1e..69d2a7d 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
@@ -19,6 +19,7 @@ class RuntimeDyldCheckerImpl {
   friend class RuntimeDyldChecker;
   friend class RuntimeDyldImpl;
   friend class RuntimeDyldCheckerExprEval;
+  friend class RuntimeDyldELF;
 
 public:
   RuntimeDyldCheckerImpl(RuntimeDyld &RTDyld, MCDisassembler *Disassembler,
@@ -42,7 +43,7 @@ private:
   RuntimeDyldImpl &getRTDyld() const { return *RTDyld.Dyld; }
 
   bool isSymbolValid(StringRef Symbol) const;
-  uint64_t getSymbolLinkerAddr(StringRef Symbol) const;
+  uint64_t getSymbolLocalAddr(StringRef Symbol) const;
   uint64_t getSymbolRemoteAddr(StringRef Symbol) const;
   uint64_t readMemoryAtAddr(uint64_t Addr, unsigned Size) const;
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 2664a10..95421b3 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "RuntimeDyldELF.h"
+#include "RuntimeDyldCheckerImpl.h"
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -103,11 +104,12 @@ void DyldELFObject<ELFT>::updateSymbolAddress(const SymbolRef &SymRef,
   sym->st_value = static_cast<addr_type>(Addr);
 }
 
-class LoadedELFObjectInfo : public RuntimeDyld::LoadedObjectInfo {
+class LoadedELFObjectInfo
+    : public RuntimeDyld::LoadedObjectInfoHelper<LoadedELFObjectInfo> {
 public:
   LoadedELFObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
                       unsigned EndIdx)
-    : RuntimeDyld::LoadedObjectInfo(RTDyld, BeginIdx, EndIdx) {}
+      : LoadedObjectInfoHelper(RTDyld, BeginIdx, EndIdx) {}
 
   OwningBinary<ObjectFile>
   getObjectForDebug(const ObjectFile &Obj) const override;
@@ -183,32 +185,30 @@ LoadedELFObjectInfo::getObjectForDebug(const ObjectFile &Obj) const {
 
 namespace llvm {
 
-RuntimeDyldELF::RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+RuntimeDyldELF::RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr,
+                               RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldImpl(MemMgr, Resolver), GOTSectionID(0), CurrentGOTIndex(0) {}
 RuntimeDyldELF::~RuntimeDyldELF() {}
 
 void RuntimeDyldELF::registerEHFrames() {
-  if (!MemMgr)
-    return;
   for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) {
     SID EHFrameSID = UnregisteredEHFrameSections[i];
     uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
     uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
     size_t EHFrameSize = Sections[EHFrameSID].Size;
-    MemMgr->registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
+    MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
     RegisteredEHFrameSections.push_back(EHFrameSID);
   }
   UnregisteredEHFrameSections.clear();
 }
 
 void RuntimeDyldELF::deregisterEHFrames() {
-  if (!MemMgr)
-    return;
   for (int i = 0, e = RegisteredEHFrameSections.size(); i != e; ++i) {
     SID EHFrameSID = RegisteredEHFrameSections[i];
     uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
     uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
     size_t EHFrameSize = Sections[EHFrameSID].Size;
-    MemMgr->deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
+    MemMgr.deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
   }
   RegisteredEHFrameSections.clear();
 }
@@ -247,40 +247,18 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
                  << format("%p\n", Section.Address + Offset));
     break;
   }
-  case ELF::R_X86_64_GOTPCREL: {
-    // findGOTEntry returns the 'G + GOT' part of the relocation calculation
-    // based on the load/target address of the GOT (not the current/local addr).
-    uint64_t GOTAddr = findGOTEntry(Value, SymOffset);
-    uint64_t FinalAddress = Section.LoadAddress + Offset;
-    // The processRelocationRef method combines the symbol offset and the addend
-    // and in most cases that's what we want.  For this relocation type, we need
-    // the raw addend, so we subtract the symbol offset to get it.
-    int64_t RealOffset = GOTAddr + Addend - SymOffset - FinalAddress;
-    assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN);
-    int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
-    support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset;
-    break;
-  }
   case ELF::R_X86_64_PC32: {
-    // Get the placeholder value from the generated object since
-    // a previous relocation attempt may have overwritten the loaded version
-    support::ulittle32_t::ref Placeholder(
-        (void *)(Section.ObjAddress + Offset));
     uint64_t FinalAddress = Section.LoadAddress + Offset;
-    int64_t RealOffset = Placeholder + Value + Addend - FinalAddress;
-    assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN);
+    int64_t RealOffset = Value + Addend - FinalAddress;
+    assert(isInt<32>(RealOffset));
     int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
     support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset;
     break;
   }
   case ELF::R_X86_64_PC64: {
-    // Get the placeholder value from the generated object since
-    // a previous relocation attempt may have overwritten the loaded version
-    support::ulittle64_t::ref Placeholder(
-        (void *)(Section.ObjAddress + Offset));
     uint64_t FinalAddress = Section.LoadAddress + Offset;
-    support::ulittle64_t::ref(Section.Address + Offset) =
-        Placeholder + Value + Addend - FinalAddress;
+    int64_t RealOffset = Value + Addend - FinalAddress;
+    support::ulittle64_t::ref(Section.Address + Offset) = RealOffset;
     break;
   }
   }
@@ -291,21 +269,12 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section,
                                           uint32_t Type, int32_t Addend) {
   switch (Type) {
   case ELF::R_386_32: {
-    // Get the placeholder value from the generated object since
-    // a previous relocation attempt may have overwritten the loaded version
-    support::ulittle32_t::ref Placeholder(
-        (void *)(Section.ObjAddress + Offset));
-    support::ulittle32_t::ref(Section.Address + Offset) =
-        Placeholder + Value + Addend;
+    support::ulittle32_t::ref(Section.Address + Offset) = Value + Addend;
     break;
   }
   case ELF::R_386_PC32: {
-    // Get the placeholder value from the generated object since
-    // a previous relocation attempt may have overwritten the loaded version
-    support::ulittle32_t::ref Placeholder(
-        (void *)(Section.ObjAddress + Offset));
     uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF);
-    uint32_t RealOffset = Placeholder + Value + Addend - FinalAddress;
+    uint32_t RealOffset = Value + Addend - FinalAddress;
     support::ulittle32_t::ref(Section.Address + Offset) = RealOffset;
     break;
   }
@@ -354,8 +323,7 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
     uint64_t BranchImm = Value + Addend - FinalAddress;
 
     // "Check that -2^27 <= result < 2^27".
-    assert(-(1LL << 27) <= static_cast<int64_t>(BranchImm) &&
-           static_cast<int64_t>(BranchImm) < (1LL << 27));
+    assert(isInt<28>(BranchImm));
 
     // AArch64 code is emitted with .rela relocations. The data already in any
     // bits affected by the relocation on entry is garbage.
@@ -418,9 +386,7 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
         ((Value + Addend) & ~0xfffULL) - (FinalAddress & ~0xfffULL);
 
     // Check that -2^32 <= X < 2^32
-    assert(static_cast<int64_t>(Result) >= (-1LL << 32) &&
-           static_cast<int64_t>(Result) < (1LL << 32) &&
-           "overflow check failed for relocation");
+    assert(isInt<33>(Result) && "overflow check failed for relocation");
 
     // AArch64 code is emitted with .rela relocations. The data already in any
     // bits affected by the relocation on entry is garbage.
@@ -462,8 +428,6 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
                                           uint64_t Offset, uint32_t Value,
                                           uint32_t Type, int32_t Addend) {
   // TODO: Add Thumb relocations.
-  uint32_t *Placeholder =
-      reinterpret_cast<uint32_t *>(Section.ObjAddress + Offset);
   uint32_t *TargetPtr = (uint32_t *)(Section.Address + Offset);
   uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF);
   Value += Addend;
@@ -480,39 +444,27 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
 
   case ELF::R_ARM_NONE:
     break;
-  // Write a 32bit value to relocation address, taking into account the
-  // implicit addend encoded in the target.
   case ELF::R_ARM_PREL31:
   case ELF::R_ARM_TARGET1:
   case ELF::R_ARM_ABS32:
-    *TargetPtr = *Placeholder + Value;
+    *TargetPtr = Value;
     break;
-  // Write first 16 bit of 32 bit value to the mov instruction.
-  // Last 4 bit should be shifted.
+    // Write first 16 bit of 32 bit value to the mov instruction.
+    // Last 4 bit should be shifted.
   case ELF::R_ARM_MOVW_ABS_NC:
-    // We are not expecting any other addend in the relocation address.
-    // Using 0x000F0FFF because MOVW has its 16 bit immediate split into 2
-    // non-contiguous fields.
-    assert((*Placeholder & 0x000F0FFF) == 0);
-    Value = Value & 0xFFFF;
-    *TargetPtr = *Placeholder | (Value & 0xFFF);
-    *TargetPtr |= ((Value >> 12) & 0xF) << 16;
-    break;
-  // Write last 16 bit of 32 bit value to the mov instruction.
-  // Last 4 bit should be shifted.
   case ELF::R_ARM_MOVT_ABS:
-    // We are not expecting any other addend in the relocation address.
-    // Use 0x000F0FFF for the same reason as R_ARM_MOVW_ABS_NC.
-    assert((*Placeholder & 0x000F0FFF) == 0);
-
-    Value = (Value >> 16) & 0xFFFF;
-    *TargetPtr = *Placeholder | (Value & 0xFFF);
+    if (Type == ELF::R_ARM_MOVW_ABS_NC)
+      Value = Value & 0xFFFF;
+    else if (Type == ELF::R_ARM_MOVT_ABS)
+      Value = (Value >> 16) & 0xFFFF;
+    *TargetPtr &= ~0x000F0FFF;
+    *TargetPtr |= Value & 0xFFF;
     *TargetPtr |= ((Value >> 12) & 0xF) << 16;
     break;
-  // Write 24 bit relative value to the branch instruction.
+    // Write 24 bit relative value to the branch instruction.
   case ELF::R_ARM_PC24: // Fall through.
   case ELF::R_ARM_CALL: // Fall through.
-  case ELF::R_ARM_JUMP24: {
+  case ELF::R_ARM_JUMP24:
     int32_t RelValue = static_cast<int32_t>(Value - FinalAddress - 8);
     RelValue = (RelValue & 0x03FFFFFC) >> 2;
     assert((*TargetPtr & 0xFFFFFF) == 0xFFFFFE);
@@ -520,21 +472,11 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
     *TargetPtr |= RelValue;
     break;
   }
-  case ELF::R_ARM_PRIVATE_0:
-    // This relocation is reserved by the ARM ELF ABI for internal use. We
-    // appropriate it here to act as an R_ARM_ABS32 without any addend for use
-    // in the stubs created during JIT (which can't put an addend into the
-    // original object file).
-    *TargetPtr = Value;
-    break;
-  }
 }
 
 void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
                                            uint64_t Offset, uint32_t Value,
                                            uint32_t Type, int32_t Addend) {
-  uint32_t *Placeholder =
-      reinterpret_cast<uint32_t *>(Section.ObjAddress + Offset);
   uint32_t *TargetPtr = (uint32_t *)(Section.Address + Offset);
   Value += Addend;
 
@@ -549,30 +491,17 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
     llvm_unreachable("Not implemented relocation type!");
     break;
   case ELF::R_MIPS_32:
-    *TargetPtr = Value + (*Placeholder);
+    *TargetPtr = Value;
     break;
   case ELF::R_MIPS_26:
-    *TargetPtr = ((*Placeholder) & 0xfc000000) | ((Value & 0x0fffffff) >> 2);
+    *TargetPtr = ((*TargetPtr) & 0xfc000000) | ((Value & 0x0fffffff) >> 2);
     break;
   case ELF::R_MIPS_HI16:
     // Get the higher 16-bits. Also add 1 if bit 15 is 1.
-    Value += ((*Placeholder) & 0x0000ffff) << 16;
     *TargetPtr =
-        ((*Placeholder) & 0xffff0000) | (((Value + 0x8000) >> 16) & 0xffff);
+      ((*TargetPtr) & 0xffff0000) | (((Value + 0x8000) >> 16) & 0xffff);
     break;
   case ELF::R_MIPS_LO16:
-    Value += ((*Placeholder) & 0x0000ffff);
-    *TargetPtr = ((*Placeholder) & 0xffff0000) | (Value & 0xffff);
-    break;
-  case ELF::R_MIPS_UNUSED1:
-    // Similar to ELF::R_ARM_PRIVATE_0, R_MIPS_UNUSED1 and R_MIPS_UNUSED2
-    // are used for internal JIT purpose. These relocations are similar to
-    // R_MIPS_HI16 and R_MIPS_LO16, but they do not take any addend into
-    // account.
-    *TargetPtr =
-        ((*TargetPtr) & 0xffff0000) | (((Value + 0x8000) >> 16) & 0xffff);
-    break;
-  case ELF::R_MIPS_UNUSED2:
     *TargetPtr = ((*TargetPtr) & 0xffff0000) | (Value & 0xffff);
     break;
   }
@@ -898,6 +827,18 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
   }
 }
 
+void *RuntimeDyldELF::computePlaceholderAddress(unsigned SectionID, uint64_t Offset) const {
+  return (void*)(Sections[SectionID].ObjAddress + Offset);
+}
+
+void RuntimeDyldELF::processSimpleRelocation(unsigned SectionID, uint64_t Offset, unsigned RelType, RelocationValueRef Value) {
+  RelocationEntry RE(SectionID, Offset, RelType, Value.Addend, Value.Offset);
+  if (Value.SymbolName)
+    addRelocationForSymbol(RE, Value.SymbolName);
+  else
+    addRelocationForSection(RE, Value.SectionID);
+}
+
 relocation_iterator RuntimeDyldELF::processRelocationRef(
     unsigned SectionID, relocation_iterator RelI,
     const ObjectFile &Obj,
@@ -920,15 +861,16 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
   SymbolRef::Type SymType = SymbolRef::ST_Unknown;
 
   // Search for the symbol in the global symbol table
-  SymbolTableMap::const_iterator gsi = GlobalSymbolTable.end();
+  RTDyldSymbolTable::const_iterator gsi = GlobalSymbolTable.end();
   if (Symbol != Obj.symbol_end()) {
     gsi = GlobalSymbolTable.find(TargetName.data());
     Symbol->getType(SymType);
   }
   if (gsi != GlobalSymbolTable.end()) {
-    Value.SectionID = gsi->second.first;
-    Value.Offset = gsi->second.second;
-    Value.Addend = gsi->second.second + Addend;
+    const auto &SymInfo = gsi->second;
+    Value.SectionID = SymInfo.getSectionID();
+    Value.Offset = SymInfo.getOffset();
+    Value.Addend = SymInfo.getOffset() + Addend;
   } else {
     switch (SymType) {
     case SymbolRef::ST_Debug: {
@@ -1014,80 +956,103 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
                         0);
       Section.StubOffset += getMaxStubSize();
     }
-  } else if (Arch == Triple::arm &&
-             (RelType == ELF::R_ARM_PC24 || RelType == ELF::R_ARM_CALL ||
-              RelType == ELF::R_ARM_JUMP24)) {
-    // This is an ARM branch relocation, need to use a stub function.
-    DEBUG(dbgs() << "\t\tThis is an ARM branch relocation.");
-    SectionEntry &Section = Sections[SectionID];
+  } else if (Arch == Triple::arm) {
+    if (RelType == ELF::R_ARM_PC24 || RelType == ELF::R_ARM_CALL ||
+      RelType == ELF::R_ARM_JUMP24) {
+      // This is an ARM branch relocation, need to use a stub function.
+      DEBUG(dbgs() << "\t\tThis is an ARM branch relocation.");
+      SectionEntry &Section = Sections[SectionID];
 
-    // Look for an existing stub.
-    StubMap::const_iterator i = Stubs.find(Value);
-    if (i != Stubs.end()) {
-      resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second,
-                        RelType, 0);
-      DEBUG(dbgs() << " Stub function found\n");
-    } else {
-      // Create a new stub function.
-      DEBUG(dbgs() << " Create a new stub function\n");
-      Stubs[Value] = Section.StubOffset;
-      uint8_t *StubTargetAddr =
+      // Look for an existing stub.
+      StubMap::const_iterator i = Stubs.find(Value);
+      if (i != Stubs.end()) {
+        resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second,
+          RelType, 0);
+        DEBUG(dbgs() << " Stub function found\n");
+      } else {
+        // Create a new stub function.
+        DEBUG(dbgs() << " Create a new stub function\n");
+        Stubs[Value] = Section.StubOffset;
+        uint8_t *StubTargetAddr =
           createStubFunction(Section.Address + Section.StubOffset);
-      RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
-                         ELF::R_ARM_PRIVATE_0, Value.Addend);
-      if (Value.SymbolName)
-        addRelocationForSymbol(RE, Value.SymbolName);
-      else
-        addRelocationForSection(RE, Value.SectionID);
-
-      resolveRelocation(Section, Offset,
-                        (uint64_t)Section.Address + Section.StubOffset, RelType,
-                        0);
-      Section.StubOffset += getMaxStubSize();
+        RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
+          ELF::R_ARM_ABS32, Value.Addend);
+        if (Value.SymbolName)
+          addRelocationForSymbol(RE, Value.SymbolName);
+        else
+          addRelocationForSection(RE, Value.SectionID);
+
+        resolveRelocation(Section, Offset,
+          (uint64_t)Section.Address + Section.StubOffset, RelType,
+          0);
+        Section.StubOffset += getMaxStubSize();
+      }
+    } else {
+      uint32_t *Placeholder =
+        reinterpret_cast<uint32_t*>(computePlaceholderAddress(SectionID, Offset));
+      if (RelType == ELF::R_ARM_PREL31 || RelType == ELF::R_ARM_TARGET1 ||
+          RelType == ELF::R_ARM_ABS32) {
+        Value.Addend += *Placeholder;
+      } else if (RelType == ELF::R_ARM_MOVW_ABS_NC || RelType == ELF::R_ARM_MOVT_ABS) {
+        // See ELF for ARM documentation
+        Value.Addend += (int16_t)((*Placeholder & 0xFFF) | (((*Placeholder >> 16) & 0xF) << 12));
+      }
+      processSimpleRelocation(SectionID, Offset, RelType, Value);
     }
-  } else if ((Arch == Triple::mipsel || Arch == Triple::mips) &&
-             RelType == ELF::R_MIPS_26) {
-    // This is an Mips branch relocation, need to use a stub function.
-    DEBUG(dbgs() << "\t\tThis is a Mips branch relocation.");
-    SectionEntry &Section = Sections[SectionID];
-    uint8_t *Target = Section.Address + Offset;
-    uint32_t *TargetAddress = (uint32_t *)Target;
+  } else if ((Arch == Triple::mipsel || Arch == Triple::mips)) {
+    uint32_t *Placeholder = reinterpret_cast<uint32_t*>(computePlaceholderAddress(SectionID, Offset));
+    if (RelType == ELF::R_MIPS_26) {
+      // This is an Mips branch relocation, need to use a stub function.
+      DEBUG(dbgs() << "\t\tThis is a Mips branch relocation.");
+      SectionEntry &Section = Sections[SectionID];
 
-    // Extract the addend from the instruction.
-    uint32_t Addend = ((*TargetAddress) & 0x03ffffff) << 2;
+      // Extract the addend from the instruction.
+      // We shift up by two since the Value will be down shifted again
+      // when applying the relocation.
+      uint32_t Addend = ((*Placeholder) & 0x03ffffff) << 2;
 
-    Value.Addend += Addend;
+      Value.Addend += Addend;
 
-    //  Look up for existing stub.
-    StubMap::const_iterator i = Stubs.find(Value);
-    if (i != Stubs.end()) {
-      RelocationEntry RE(SectionID, Offset, RelType, i->second);
-      addRelocationForSection(RE, SectionID);
-      DEBUG(dbgs() << " Stub function found\n");
-    } else {
-      // Create a new stub function.
-      DEBUG(dbgs() << " Create a new stub function\n");
-      Stubs[Value] = Section.StubOffset;
-      uint8_t *StubTargetAddr =
+      //  Look up for existing stub.
+      StubMap::const_iterator i = Stubs.find(Value);
+      if (i != Stubs.end()) {
+        RelocationEntry RE(SectionID, Offset, RelType, i->second);
+        addRelocationForSection(RE, SectionID);
+        DEBUG(dbgs() << " Stub function found\n");
+      } else {
+        // Create a new stub function.
+        DEBUG(dbgs() << " Create a new stub function\n");
+        Stubs[Value] = Section.StubOffset;
+        uint8_t *StubTargetAddr =
           createStubFunction(Section.Address + Section.StubOffset);
 
-      // Creating Hi and Lo relocations for the filled stub instructions.
-      RelocationEntry REHi(SectionID, StubTargetAddr - Section.Address,
-                           ELF::R_MIPS_UNUSED1, Value.Addend);
-      RelocationEntry RELo(SectionID, StubTargetAddr - Section.Address + 4,
-                           ELF::R_MIPS_UNUSED2, Value.Addend);
+        // Creating Hi and Lo relocations for the filled stub instructions.
+        RelocationEntry REHi(SectionID, StubTargetAddr - Section.Address,
+          ELF::R_MIPS_HI16, Value.Addend);
+        RelocationEntry RELo(SectionID, StubTargetAddr - Section.Address + 4,
+          ELF::R_MIPS_LO16, Value.Addend);
 
-      if (Value.SymbolName) {
-        addRelocationForSymbol(REHi, Value.SymbolName);
-        addRelocationForSymbol(RELo, Value.SymbolName);
-      } else {
-        addRelocationForSection(REHi, Value.SectionID);
-        addRelocationForSection(RELo, Value.SectionID);
-      }
+        if (Value.SymbolName) {
+          addRelocationForSymbol(REHi, Value.SymbolName);
+          addRelocationForSymbol(RELo, Value.SymbolName);
+        }
+        else {
+          addRelocationForSection(REHi, Value.SectionID);
+          addRelocationForSection(RELo, Value.SectionID);
+        }
 
-      RelocationEntry RE(SectionID, Offset, RelType, Section.StubOffset);
-      addRelocationForSection(RE, SectionID);
-      Section.StubOffset += getMaxStubSize();
+        RelocationEntry RE(SectionID, Offset, RelType, Section.StubOffset);
+        addRelocationForSection(RE, SectionID);
+        Section.StubOffset += getMaxStubSize();
+      }
+    } else {
+      if (RelType == ELF::R_MIPS_HI16)
+        Value.Addend += ((*Placeholder) & 0x0000ffff) << 16;
+      else if (RelType == ELF::R_MIPS_LO16)
+        Value.Addend += ((*Placeholder) & 0x0000ffff);
+      else if (RelType == ELF::R_MIPS_32)
+        Value.Addend += *Placeholder;
+      processSimpleRelocation(SectionID, Offset, RelType, Value);
     }
   } else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) {
     if (RelType == ELF::R_PPC64_REL24) {
@@ -1127,7 +1092,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
           RangeOverflow = true;
         }
       }
-      if (SymType == SymbolRef::ST_Unknown || RangeOverflow == true) {
+      if (SymType == SymbolRef::ST_Unknown || RangeOverflow) {
         // It is an external symbol (SymbolRef::ST_Unknown) or within a range
         // larger than 24-bits.
         StubMap::const_iterator i = Stubs.find(Value);
@@ -1286,93 +1251,95 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
                         Addend);
     else
       resolveRelocation(Section, Offset, StubAddress, RelType, Addend);
-  } else if (Arch == Triple::x86_64 && RelType == ELF::R_X86_64_PLT32) {
-    // The way the PLT relocations normally work is that the linker allocates
-    // the
-    // PLT and this relocation makes a PC-relative call into the PLT.  The PLT
-    // entry will then jump to an address provided by the GOT.  On first call,
-    // the
-    // GOT address will point back into PLT code that resolves the symbol. After
-    // the first call, the GOT entry points to the actual function.
-    //
-    // For local functions we're ignoring all of that here and just replacing
-    // the PLT32 relocation type with PC32, which will translate the relocation
-    // into a PC-relative call directly to the function. For external symbols we
-    // can't be sure the function will be within 2^32 bytes of the call site, so
-    // we need to create a stub, which calls into the GOT.  This case is
-    // equivalent to the usual PLT implementation except that we use the stub
-    // mechanism in RuntimeDyld (which puts stubs at the end of the section)
-    // rather than allocating a PLT section.
-    if (Value.SymbolName) {
-      // This is a call to an external function.
-      // Look for an existing stub.
-      SectionEntry &Section = Sections[SectionID];
-      StubMap::const_iterator i = Stubs.find(Value);
-      uintptr_t StubAddress;
-      if (i != Stubs.end()) {
+  } else if (Arch == Triple::x86_64) {
+    if (RelType == ELF::R_X86_64_PLT32) {
+      // The way the PLT relocations normally work is that the linker allocates
+      // the
+      // PLT and this relocation makes a PC-relative call into the PLT.  The PLT
+      // entry will then jump to an address provided by the GOT.  On first call,
+      // the
+      // GOT address will point back into PLT code that resolves the symbol. After
+      // the first call, the GOT entry points to the actual function.
+      //
+      // For local functions we're ignoring all of that here and just replacing
+      // the PLT32 relocation type with PC32, which will translate the relocation
+      // into a PC-relative call directly to the function. For external symbols we
+      // can't be sure the function will be within 2^32 bytes of the call site, so
+      // we need to create a stub, which calls into the GOT.  This case is
+      // equivalent to the usual PLT implementation except that we use the stub
+      // mechanism in RuntimeDyld (which puts stubs at the end of the section)
+      // rather than allocating a PLT section.
+      if (Value.SymbolName) {
+        // This is a call to an external function.
+        // Look for an existing stub.
+        SectionEntry &Section = Sections[SectionID];
+        StubMap::const_iterator i = Stubs.find(Value);
+        uintptr_t StubAddress;
+        if (i != Stubs.end()) {
         StubAddress = uintptr_t(Section.Address) + i->second;
         DEBUG(dbgs() << " Stub function found\n");
-      } else {
+        } else {
         // Create a new stub function (equivalent to a PLT entry).
         DEBUG(dbgs() << " Create a new stub function\n");
 
         uintptr_t BaseAddress = uintptr_t(Section.Address);
         uintptr_t StubAlignment = getStubAlignment();
         StubAddress = (BaseAddress + Section.StubOffset + StubAlignment - 1) &
-                      -StubAlignment;
+                -StubAlignment;
         unsigned StubOffset = StubAddress - BaseAddress;
         Stubs[Value] = StubOffset;
         createStubFunction((uint8_t *)StubAddress);
 
-        // Create a GOT entry for the external function.
-        GOTEntries.push_back(Value);
-
-        // Make our stub function a relative call to the GOT entry.
-        RelocationEntry RE(SectionID, StubOffset + 2, ELF::R_X86_64_GOTPCREL,
-                           -4);
-        addRelocationForSymbol(RE, Value.SymbolName);
-
         // Bump our stub offset counter
         Section.StubOffset = StubOffset + getMaxStubSize();
+
+        // Allocate a GOT Entry
+        uint64_t GOTOffset = allocateGOTEntries(SectionID, 1);
+
+        // The load of the GOT address has an addend of -4
+        resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4);
+
+        // Fill in the value of the symbol we're targeting into the GOT
+        addRelocationForSymbol(computeGOTOffsetRE(SectionID,GOTOffset,0,ELF::R_X86_64_64),
+          Value.SymbolName);
+        }
+
+        // Make the target call a call into the stub table.
+        resolveRelocation(Section, Offset, StubAddress, ELF::R_X86_64_PC32,
+                Addend);
+      } else {
+        RelocationEntry RE(SectionID, Offset, ELF::R_X86_64_PC32, Value.Addend,
+                  Value.Offset);
+        addRelocationForSection(RE, Value.SectionID);
       }
+    } else if (RelType == ELF::R_X86_64_GOTPCREL) {
+      uint64_t GOTOffset = allocateGOTEntries(SectionID, 1);
+      resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend);
 
-      // Make the target call a call into the stub table.
-      resolveRelocation(Section, Offset, StubAddress, ELF::R_X86_64_PC32,
-                        Addend);
+      // Fill in the value of the symbol we're targeting into the GOT
+      RelocationEntry RE = computeGOTOffsetRE(SectionID, GOTOffset, Value.Offset, ELF::R_X86_64_64);
+      if (Value.SymbolName)
+        addRelocationForSymbol(RE, Value.SymbolName);
+      else
+        addRelocationForSection(RE, Value.SectionID);
+    } else if (RelType == ELF::R_X86_64_PC32) {
+      Value.Addend += support::ulittle32_t::ref(computePlaceholderAddress(SectionID, Offset));
+      processSimpleRelocation(SectionID, Offset, RelType, Value);
+    } else if (RelType == ELF::R_X86_64_PC64) {
+      Value.Addend += support::ulittle64_t::ref(computePlaceholderAddress(SectionID, Offset));
+      processSimpleRelocation(SectionID, Offset, RelType, Value);
     } else {
-      RelocationEntry RE(SectionID, Offset, ELF::R_X86_64_PC32, Value.Addend,
-                         Value.Offset);
-      addRelocationForSection(RE, Value.SectionID);
+      processSimpleRelocation(SectionID, Offset, RelType, Value);
     }
   } else {
-    if (Arch == Triple::x86_64 && RelType == ELF::R_X86_64_GOTPCREL) {
-      GOTEntries.push_back(Value);
+    if (Arch == Triple::x86) {
+      Value.Addend += support::ulittle32_t::ref(computePlaceholderAddress(SectionID, Offset));
     }
-    RelocationEntry RE(SectionID, Offset, RelType, Value.Addend, Value.Offset);
-    if (Value.SymbolName)
-      addRelocationForSymbol(RE, Value.SymbolName);
-    else
-      addRelocationForSection(RE, Value.SectionID);
+    processSimpleRelocation(SectionID, Offset, RelType, Value);
   }
   return ++RelI;
 }
 
-void RuntimeDyldELF::updateGOTEntries(StringRef Name, uint64_t Addr) {
-
-  SmallVectorImpl<std::pair<SID, GOTRelocations>>::iterator it;
-  SmallVectorImpl<std::pair<SID, GOTRelocations>>::iterator end = GOTs.end();
-
-  for (it = GOTs.begin(); it != end; ++it) {
-    GOTRelocations &GOTEntries = it->second;
-    for (int i = 0, e = GOTEntries.size(); i != e; ++i) {
-      if (GOTEntries[i].SymbolName != nullptr &&
-          GOTEntries[i].SymbolName == Name) {
-        GOTEntries[i].Offset = Addr;
-      }
-    }
-  }
-}
-
 size_t RuntimeDyldELF::getGOTEntrySize() {
   // We don't use the GOT in all of these cases, but it's essentially free
   // to put them all here.
@@ -1399,83 +1366,53 @@ size_t RuntimeDyldELF::getGOTEntrySize() {
   return Result;
 }
 
-uint64_t RuntimeDyldELF::findGOTEntry(uint64_t LoadAddress, uint64_t Offset) {
-
-  const size_t GOTEntrySize = getGOTEntrySize();
-
-  SmallVectorImpl<std::pair<SID, GOTRelocations>>::const_iterator it;
-  SmallVectorImpl<std::pair<SID, GOTRelocations>>::const_iterator end =
-      GOTs.end();
-
-  int GOTIndex = -1;
-  for (it = GOTs.begin(); it != end; ++it) {
-    SID GOTSectionID = it->first;
-    const GOTRelocations &GOTEntries = it->second;
-
-    // Find the matching entry in our vector.
-    uint64_t SymbolOffset = 0;
-    for (int i = 0, e = GOTEntries.size(); i != e; ++i) {
-      if (!GOTEntries[i].SymbolName) {
-        if (getSectionLoadAddress(GOTEntries[i].SectionID) == LoadAddress &&
-            GOTEntries[i].Offset == Offset) {
-          GOTIndex = i;
-          SymbolOffset = GOTEntries[i].Offset;
-          break;
-        }
-      } else {
-        // GOT entries for external symbols use the addend as the address when
-        // the external symbol has been resolved.
-        if (GOTEntries[i].Offset == LoadAddress) {
-          GOTIndex = i;
-          // Don't use the Addend here.  The relocation handler will use it.
-          break;
-        }
-      }
-    }
-
-    if (GOTIndex != -1) {
-      if (GOTEntrySize == sizeof(uint64_t)) {
-        uint64_t *LocalGOTAddr = (uint64_t *)getSectionAddress(GOTSectionID);
-        // Fill in this entry with the address of the symbol being referenced.
-        LocalGOTAddr[GOTIndex] = LoadAddress + SymbolOffset;
-      } else {
-        uint32_t *LocalGOTAddr = (uint32_t *)getSectionAddress(GOTSectionID);
-        // Fill in this entry with the address of the symbol being referenced.
-        LocalGOTAddr[GOTIndex] = (uint32_t)(LoadAddress + SymbolOffset);
-      }
-
-      // Calculate the load address of this entry
-      return getSectionLoadAddress(GOTSectionID) + (GOTIndex * GOTEntrySize);
-    }
+uint64_t RuntimeDyldELF::allocateGOTEntries(unsigned SectionID, unsigned no)
+{
+  (void)SectionID; // The GOT Section is the same for all section in the object file
+  if (GOTSectionID == 0) {
+    GOTSectionID = Sections.size();
+    // Reserve a section id. We'll allocate the section later
+    // once we know the total size
+    Sections.push_back(SectionEntry(".got", 0, 0, 0));
   }
+  uint64_t StartOffset = CurrentGOTIndex * getGOTEntrySize();
+  CurrentGOTIndex += no;
+  return StartOffset;
+}
+
+void RuntimeDyldELF::resolveGOTOffsetRelocation(unsigned SectionID, uint64_t Offset, uint64_t GOTOffset)
+{
+  // Fill in the relative address of the GOT Entry into the stub
+  RelocationEntry GOTRE(SectionID, Offset, ELF::R_X86_64_PC32, GOTOffset);
+  addRelocationForSection(GOTRE, GOTSectionID);
+}
 
-  assert(GOTIndex != -1 && "Unable to find requested GOT entry.");
-  return 0;
+RelocationEntry RuntimeDyldELF::computeGOTOffsetRE(unsigned SectionID, uint64_t GOTOffset, uint64_t SymbolOffset,
+                                                   uint32_t Type)
+{
+  (void)SectionID; // The GOT Section is the same for all section in the object file
+  return RelocationEntry(GOTSectionID, GOTOffset, Type, SymbolOffset);
 }
 
 void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
                                   ObjSectionToIDMap &SectionMap) {
   // If necessary, allocate the global offset table
-  if (MemMgr) {
-    // Allocate the GOT if necessary
-    size_t numGOTEntries = GOTEntries.size();
-    if (numGOTEntries != 0) {
-      // Allocate memory for the section
-      unsigned SectionID = Sections.size();
-      size_t TotalSize = numGOTEntries * getGOTEntrySize();
-      uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, getGOTEntrySize(),
-                                                  SectionID, ".got", false);
-      if (!Addr)
-        report_fatal_error("Unable to allocate memory for GOT!");
-
-      GOTs.push_back(std::make_pair(SectionID, GOTEntries));
-      Sections.push_back(SectionEntry(".got", Addr, TotalSize, 0));
-      // For now, initialize all GOT entries to zero.  We'll fill them in as
-      // needed when GOT-based relocations are applied.
-      memset(Addr, 0, TotalSize);
-    }
-  } else {
-    report_fatal_error("Unable to allocate memory for GOT!");
+  if (GOTSectionID != 0) {
+    // Allocate memory for the section
+    size_t TotalSize = CurrentGOTIndex * getGOTEntrySize();
+    uint8_t *Addr = MemMgr.allocateDataSection(TotalSize, getGOTEntrySize(),
+                                                GOTSectionID, ".got", false);
+    if (!Addr)
+      report_fatal_error("Unable to allocate memory for GOT!");
+
+    Sections[GOTSectionID] = SectionEntry(".got", Addr, TotalSize, 0);
+
+    if (Checker)
+      Checker->registerSection(Obj.getFileName(), GOTSectionID);
+
+    // For now, initialize all GOT entries to zero.  We'll fill them in as
+    // needed when GOT-based relocations are applied.
+    memset(Addr, 0, TotalSize);
   }
 
   // Look for and record the EH frame section.
@@ -1489,6 +1426,9 @@ void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
       break;
     }
   }
+
+  GOTSectionID = 0;
+  CurrentGOTIndex = 0;
 }
 
 bool RuntimeDyldELF::isCompatibleFile(const object::ObjectFile &Obj) const {
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index b4414b0..9a4a863 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -20,16 +20,6 @@
 using namespace llvm;
 
 namespace llvm {
-namespace {
-// Helper for extensive error checking in debug builds.
-std::error_code Check(std::error_code Err) {
-  if (Err) {
-    report_fatal_error(Err.message());
-  }
-  return Err;
-}
-
-} // end anonymous namespace
 
 class RuntimeDyldELF : public RuntimeDyldImpl {
 
@@ -90,16 +80,39 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
                            ObjSectionToIDMap &LocalSections,
                            RelocationValueRef &Rel);
 
-  uint64_t findGOTEntry(uint64_t LoadAddr, uint64_t Offset);
   size_t getGOTEntrySize();
 
-  void updateGOTEntries(StringRef Name, uint64_t Addr) override;
+  SectionEntry &getSection(unsigned SectionID) { return Sections[SectionID]; }
+
+  // Allocate no GOT entries for use in the given section.
+  uint64_t allocateGOTEntries(unsigned SectionID, unsigned no);
+
+  // Resolve the relvative address of GOTOffset in Section ID and place
+  // it at the given Offset
+  void resolveGOTOffsetRelocation(unsigned SectionID, uint64_t Offset,
+                                  uint64_t GOTOffset);
+
+  // For a GOT entry referenced from SectionID, compute a relocation entry
+  // that will place the final resolved value in the GOT slot
+  RelocationEntry computeGOTOffsetRE(unsigned SectionID,
+                                     uint64_t GOTOffset,
+                                     uint64_t SymbolOffset,
+                                     unsigned Type);
+
+  // Compute the address in memory where we can find the placeholder
+  void *computePlaceholderAddress(unsigned SectionID, uint64_t Offset) const;
+
+  // Split out common case for createing the RelocationEntry for when the relocation requires
+  // no particular advanced processing.
+  void processSimpleRelocation(unsigned SectionID, uint64_t Offset, unsigned RelType, RelocationValueRef Value);
+
+  // The tentative ID for the GOT section
+  unsigned GOTSectionID;
 
-  // Relocation entries for symbols whose position-independent offset is
-  // updated in a global offset table.
-  typedef SmallVector<RelocationValueRef, 2> GOTRelocations;
-  GOTRelocations GOTEntries; // List of entries requiring finalization.
-  SmallVector<std::pair<SID, GOTRelocations>, 8> GOTs; // Allocated tables.
+  // Records the current number of allocated slots in the GOT
+  // (This would be equivalent to GOTEntries.size() were it not for relocations
+  // that consume more than one slot)
+  unsigned CurrentGOTIndex;
 
   // When a module is loaded we save the SectionID of the EH frame section
   // in a table until we receive a request to register all unregistered
@@ -108,8 +121,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
   SmallVector<SID, 2> RegisteredEHFrameSections;
 
 public:
-  RuntimeDyldELF(RTDyldMemoryManager *mm);
-  virtual ~RuntimeDyldELF();
+  RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr,
+                 RuntimeDyld::SymbolResolver &Resolver);
+  ~RuntimeDyldELF() override;
 
   std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
   loadObject(const object::ObjectFile &O) override;
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 2f3e3a8..90e61a5 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/ExecutionEngine/RuntimeDyldChecker.h"
 #include "llvm/Object/ObjectFile.h"
@@ -36,6 +37,14 @@ using namespace llvm::object;
 
 namespace llvm {
 
+  // Helper for extensive error checking in debug builds.
+inline std::error_code Check(std::error_code Err) {
+  if (Err) {
+    report_fatal_error(Err.message());
+  }
+  return Err;
+}
+
 class Twine;
 
 /// SectionEntry - represents a section emitted into memory by the dynamic
@@ -43,7 +52,7 @@ class Twine;
 class SectionEntry {
 public:
   /// Name - section name.
-  StringRef Name;
+  std::string Name;
 
   /// Address - address in the linker's memory where the section resides.
   uint8_t *Address;
@@ -156,12 +165,34 @@ public:
   }
 };
 
+/// @brief Symbol info for RuntimeDyld. 
+class SymbolTableEntry : public JITSymbolBase {
+public:
+  SymbolTableEntry()
+    : JITSymbolBase(JITSymbolFlags::None), Offset(0), SectionID(0) {}
+
+  SymbolTableEntry(unsigned SectionID, uint64_t Offset, JITSymbolFlags Flags)
+    : JITSymbolBase(Flags), Offset(Offset), SectionID(SectionID) {}
+
+  unsigned getSectionID() const { return SectionID; }
+  uint64_t getOffset() const { return Offset; }
+
+private:
+  uint64_t Offset;
+  unsigned SectionID;
+};
+
+typedef StringMap<SymbolTableEntry> RTDyldSymbolTable;
+
 class RuntimeDyldImpl {
   friend class RuntimeDyld::LoadedObjectInfo;
   friend class RuntimeDyldCheckerImpl;
 protected:
   // The MemoryManager to load objects into.
-  RTDyldMemoryManager *MemMgr;
+  RuntimeDyld::MemoryManager &MemMgr;
+
+  // The symbol resolver to use for external symbols.
+  RuntimeDyld::SymbolResolver &Resolver;
 
   // Attached RuntimeDyldChecker instance. Null if no instance attached.
   RuntimeDyldCheckerImpl *Checker;
@@ -172,22 +203,17 @@ protected:
   SectionList Sections;
 
   typedef unsigned SID; // Type for SectionIDs
-#define RTDYLD_INVALID_SECTION_ID ((SID)(-1))
+#define RTDYLD_INVALID_SECTION_ID ((RuntimeDyldImpl::SID)(-1))
 
   // Keep a map of sections from object file to the SectionID which
   // references it.
   typedef std::map<SectionRef, unsigned> ObjSectionToIDMap;
 
-  // A global symbol table for symbols from all loaded modules.  Maps the
-  // symbol name to a (SectionID, offset in section) pair.
-  typedef std::pair<unsigned, uintptr_t> SymbolLoc;
-  typedef StringMap<SymbolLoc> SymbolTableMap;
-  SymbolTableMap GlobalSymbolTable;
+  // A global symbol table for symbols from all loaded modules.
+  RTDyldSymbolTable GlobalSymbolTable;
 
-  // Pair representing the size and alignment requirement for a common symbol.
-  typedef std::pair<unsigned, unsigned> CommonSymbolInfo;
   // Keep a map of common symbols to their info pairs
-  typedef std::map<SymbolRef, CommonSymbolInfo> CommonSymbolMap;
+  typedef std::vector<SymbolRef> CommonSymbolList;
 
   // For each symbol, keep a list of relocations based on it. Anytime
   // its address is reassigned (the JIT re-compiled the function, e.g.),
@@ -287,9 +313,7 @@ protected:
   /// \brief Given the common symbols discovered in the object file, emit a
   /// new section for them and update the symbol mappings in the object and
   /// symbol table.
-  void emitCommonSymbols(const ObjectFile &Obj,
-                         const CommonSymbolMap &CommonSymbols,
-                         uint64_t TotalSize, SymbolTableMap &SymbolTable);
+  void emitCommonSymbols(const ObjectFile &Obj, CommonSymbolList &CommonSymbols);
 
   /// \brief Emits section data from the object file to the MemoryManager.
   /// \param IsCode if it's true then allocateCodeSection() will be
@@ -337,10 +361,6 @@ protected:
   /// \brief Resolve relocations to external symbols.
   void resolveExternalSymbols();
 
-  /// \brief Update GOT entries for external symbols.
-  // The base class does nothing.  ELF overrides this.
-  virtual void updateGOTEntries(StringRef Name, uint64_t Addr) {}
-
   // \brief Compute an upper bound of the memory that is required to load all
   // sections
   void computeTotalAllocSize(const ObjectFile &Obj, uint64_t &CodeSize,
@@ -354,8 +374,10 @@ protected:
   std::pair<unsigned, unsigned> loadObjectImpl(const object::ObjectFile &Obj);
 
 public:
-  RuntimeDyldImpl(RTDyldMemoryManager *mm)
-    : MemMgr(mm), Checker(nullptr), ProcessAllSections(false), HasError(false) {
+  RuntimeDyldImpl(RuntimeDyld::MemoryManager &MemMgr,
+                  RuntimeDyld::SymbolResolver &Resolver)
+    : MemMgr(MemMgr), Resolver(Resolver), Checker(nullptr),
+      ProcessAllSections(false), HasError(false) {
   }
 
   virtual ~RuntimeDyldImpl();
@@ -371,24 +393,26 @@ public:
   virtual std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
   loadObject(const object::ObjectFile &Obj) = 0;
 
-  uint8_t* getSymbolAddress(StringRef Name) const {
+  uint8_t* getSymbolLocalAddress(StringRef Name) const {
     // FIXME: Just look up as a function for now. Overly simple of course.
     // Work in progress.
-    SymbolTableMap::const_iterator pos = GlobalSymbolTable.find(Name);
+    RTDyldSymbolTable::const_iterator pos = GlobalSymbolTable.find(Name);
     if (pos == GlobalSymbolTable.end())
       return nullptr;
-    SymbolLoc Loc = pos->second;
-    return getSectionAddress(Loc.first) + Loc.second;
+    const auto &SymInfo = pos->second;
+    return getSectionAddress(SymInfo.getSectionID()) + SymInfo.getOffset();
   }
 
-  uint64_t getSymbolLoadAddress(StringRef Name) const {
+  RuntimeDyld::SymbolInfo getSymbol(StringRef Name) const {
     // FIXME: Just look up as a function for now. Overly simple of course.
     // Work in progress.
-    SymbolTableMap::const_iterator pos = GlobalSymbolTable.find(Name);
+    RTDyldSymbolTable::const_iterator pos = GlobalSymbolTable.find(Name);
     if (pos == GlobalSymbolTable.end())
-      return 0;
-    SymbolLoc Loc = pos->second;
-    return getSectionLoadAddress(Loc.first) + Loc.second;
+      return nullptr;
+    const auto &SymEntry = pos->second;
+    uint64_t TargetAddr =
+      getSectionLoadAddress(SymEntry.getSectionID()) + SymEntry.getOffset();
+    return RuntimeDyld::SymbolInfo(TargetAddr, SymEntry.getFlags());
   }
 
   void resolveRelocations();
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index d75be28..d4a680d 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -26,11 +26,12 @@ using namespace llvm::object;
 
 namespace {
 
-class LoadedMachOObjectInfo : public RuntimeDyld::LoadedObjectInfo {
+class LoadedMachOObjectInfo
+    : public RuntimeDyld::LoadedObjectInfoHelper<LoadedMachOObjectInfo> {
 public:
   LoadedMachOObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
                         unsigned EndIdx)
-    : RuntimeDyld::LoadedObjectInfo(RTDyld, BeginIdx, EndIdx) {}
+      : LoadedObjectInfoHelper(RTDyld, BeginIdx, EndIdx) {}
 
   OwningBinary<ObjectFile>
   getObjectForDebug(const ObjectFile &Obj) const override {
@@ -64,17 +65,18 @@ RelocationValueRef RuntimeDyldMachO::getRelocationValueRef(
     symbol_iterator Symbol = RI->getSymbol();
     StringRef TargetName;
     Symbol->getName(TargetName);
-    SymbolTableMap::const_iterator SI =
+    RTDyldSymbolTable::const_iterator SI =
       GlobalSymbolTable.find(TargetName.data());
     if (SI != GlobalSymbolTable.end()) {
-      Value.SectionID = SI->second.first;
-      Value.Offset = SI->second.second + RE.Addend;
+      const auto &SymInfo = SI->second;
+      Value.SectionID = SymInfo.getSectionID();
+      Value.Offset = SymInfo.getOffset() + RE.Addend;
     } else {
       Value.SymbolName = TargetName.data();
       Value.Offset = RE.Addend;
     }
   } else {
-    SectionRef Sec = Obj.getRelocationSection(RelInfo);
+    SectionRef Sec = Obj.getAnyRelocationSection(RelInfo);
     bool IsCode = Sec.isText();
     Value.SectionID = findOrEmitSection(Obj, Sec, IsCode, ObjSectionToID);
     uint64_t Addr = Sec.getAddress();
@@ -252,8 +254,6 @@ static int64_t computeDelta(SectionEntry *A, SectionEntry *B) {
 template <typename Impl>
 void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() {
 
-  if (!MemMgr)
-    return;
   for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) {
     EHFrameRelatedSections &SectionInfo = UnregisteredEHFrameSections[i];
     if (SectionInfo.EHFrameSID == RTDYLD_INVALID_SECTION_ID ||
@@ -276,22 +276,28 @@ void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() {
       P = processFDE(P, DeltaForText, DeltaForEH);
     } while (P != End);
 
-    MemMgr->registerEHFrames(EHFrame->Address, EHFrame->LoadAddress,
-                             EHFrame->Size);
+    MemMgr.registerEHFrames(EHFrame->Address, EHFrame->LoadAddress,
+                            EHFrame->Size);
   }
   UnregisteredEHFrameSections.clear();
 }
 
 std::unique_ptr<RuntimeDyldMachO>
-RuntimeDyldMachO::create(Triple::ArchType Arch, RTDyldMemoryManager *MM) {
+RuntimeDyldMachO::create(Triple::ArchType Arch,
+                         RuntimeDyld::MemoryManager &MemMgr,
+                         RuntimeDyld::SymbolResolver &Resolver) {
   switch (Arch) {
   default:
     llvm_unreachable("Unsupported target for RuntimeDyldMachO.");
     break;
-  case Triple::arm: return make_unique<RuntimeDyldMachOARM>(MM);
-  case Triple::aarch64: return make_unique<RuntimeDyldMachOAArch64>(MM);
-  case Triple::x86: return make_unique<RuntimeDyldMachOI386>(MM);
-  case Triple::x86_64: return make_unique<RuntimeDyldMachOX86_64>(MM);
+  case Triple::arm:
+    return make_unique<RuntimeDyldMachOARM>(MemMgr, Resolver);
+  case Triple::aarch64:
+    return make_unique<RuntimeDyldMachOAArch64>(MemMgr, Resolver);
+  case Triple::x86:
+    return make_unique<RuntimeDyldMachOI386>(MemMgr, Resolver);
+  case Triple::x86_64:
+    return make_unique<RuntimeDyldMachOX86_64>(MemMgr, Resolver);
   }
 }
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index f8bfc03..45a94ba 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -49,7 +49,9 @@ protected:
   // EH frame sections with the memory manager.
   SmallVector<EHFrameRelatedSections, 2> UnregisteredEHFrameSections;
 
-  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+  RuntimeDyldMachO(RuntimeDyld::MemoryManager &MemMgr,
+                   RuntimeDyld::SymbolResolver &Resolver)
+      : RuntimeDyldImpl(MemMgr, Resolver) {}
 
   /// This convenience method uses memcpy to extract a contiguous addend (the
   /// addend size and offset are taken from the corresponding fields of the RE).
@@ -114,8 +116,10 @@ protected:
 public:
 
   /// Create a RuntimeDyldMachO instance for the given target architecture.
-  static std::unique_ptr<RuntimeDyldMachO> create(Triple::ArchType Arch,
-                                                  RTDyldMemoryManager *mm);
+  static std::unique_ptr<RuntimeDyldMachO>
+  create(Triple::ArchType Arch,
+         RuntimeDyld::MemoryManager &MemMgr,
+         RuntimeDyld::SymbolResolver &Resolver);
 
   std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
   loadObject(const object::ObjectFile &O) override;
@@ -142,7 +146,9 @@ private:
                             int64_t DeltaForEH);
 
 public:
-  RuntimeDyldMachOCRTPBase(RTDyldMemoryManager *mm) : RuntimeDyldMachO(mm) {}
+  RuntimeDyldMachOCRTPBase(RuntimeDyld::MemoryManager &MemMgr,
+                           RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldMachO(MemMgr, Resolver) {}
 
   void finalizeLoad(const ObjectFile &Obj,
                     ObjSectionToIDMap &SectionMap) override;
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
new file mode 100644
index 0000000..478665e
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -0,0 +1,216 @@
+//===-- RuntimeDyldCOFFX86_64.h --- COFF/X86_64 specific code ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// COFF x86_x64 support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H
+
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/COFF.h"
+#include "../RuntimeDyldCOFF.h"
+
+#define DEBUG_TYPE "dyld"
+
+namespace llvm {
+
+class RuntimeDyldCOFFX86_64 : public RuntimeDyldCOFF {
+
+private:
+  // When a module is loaded we save the SectionID of the unwind
+  // sections in a table until we receive a request to register all
+  // unregisteredEH frame sections with the memory manager.
+  SmallVector<SID, 2> UnregisteredEHFrameSections;
+  SmallVector<SID, 2> RegisteredEHFrameSections;
+
+public:
+  RuntimeDyldCOFFX86_64(RuntimeDyld::MemoryManager &MM,
+                        RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldCOFF(MM, Resolver) {}
+
+  unsigned getMaxStubSize() override {
+    return 6; // 2-byte jmp instruction + 32-bit relative address
+  }
+
+  // The target location for the relocation is described by RE.SectionID and
+  // RE.Offset.  RE.SectionID can be used to find the SectionEntry.  Each
+  // SectionEntry has three members describing its location.
+  // SectionEntry::Address is the address at which the section has been loaded
+  // into memory in the current (host) process.  SectionEntry::LoadAddress is
+  // the address that the section will have in the target process.
+  // SectionEntry::ObjAddress is the address of the bits for this section in the
+  // original emitted object image (also in the current address space).
+  //
+  // Relocations will be applied as if the section were loaded at
+  // SectionEntry::LoadAddress, but they will be applied at an address based
+  // on SectionEntry::Address.  SectionEntry::ObjAddress will be used to refer
+  // to Target memory contents if they are required for value calculations.
+  //
+  // The Value parameter here is the load address of the symbol for the
+  // relocation to be applied.  For relocations which refer to symbols in the
+  // current object Value will be the LoadAddress of the section in which
+  // the symbol resides (RE.Addend provides additional information about the
+  // symbol location).  For external symbols, Value will be the address of the
+  // symbol in the target address space.
+  void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
+    const SectionEntry &Section = Sections[RE.SectionID];
+    uint8_t *Target = Section.Address + RE.Offset;
+
+    switch (RE.RelType) {
+
+    case COFF::IMAGE_REL_AMD64_REL32:
+    case COFF::IMAGE_REL_AMD64_REL32_1:
+    case COFF::IMAGE_REL_AMD64_REL32_2:
+    case COFF::IMAGE_REL_AMD64_REL32_3:
+    case COFF::IMAGE_REL_AMD64_REL32_4:
+    case COFF::IMAGE_REL_AMD64_REL32_5: {
+      uint32_t *TargetAddress = (uint32_t *)Target;
+      uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+      // Delta is the distance from the start of the reloc to the end of the
+      // instruction with the reloc.
+      uint64_t Delta = 4 + (RE.RelType - COFF::IMAGE_REL_AMD64_REL32);
+      Value -= FinalAddress + Delta;
+      uint64_t Result = Value + RE.Addend;
+      assert(((int64_t)Result <= INT32_MAX) && "Relocation overflow");
+      assert(((int64_t)Result >= INT32_MIN) && "Relocation underflow");
+      *TargetAddress = Result;
+      break;
+    }
+
+    case COFF::IMAGE_REL_AMD64_ADDR32NB: {
+      // Note ADDR32NB requires a well-established notion of
+      // image base. This address must be less than or equal
+      // to every section's load address, and all sections must be
+      // within a 32 bit offset from the base.
+      //
+      // For now we just set these to zero.
+      uint32_t *TargetAddress = (uint32_t *)Target;
+      *TargetAddress = 0;
+      break;
+    }
+
+    case COFF::IMAGE_REL_AMD64_ADDR64: {
+      uint64_t *TargetAddress = (uint64_t *)Target;
+      *TargetAddress = Value + RE.Addend;
+      break;
+    }
+
+    default:
+      llvm_unreachable("Relocation type not implemented yet!");
+      break;
+    }
+  }
+
+  relocation_iterator processRelocationRef(unsigned SectionID,
+                                           relocation_iterator RelI,
+                                           const ObjectFile &Obj,
+                                           ObjSectionToIDMap &ObjSectionToID,
+                                           StubMap &Stubs) override {
+    // If possible, find the symbol referred to in the relocation,
+    // and the section that contains it.
+    symbol_iterator Symbol = RelI->getSymbol();
+    if (Symbol == Obj.symbol_end())
+      report_fatal_error("Unknown symbol in relocation");
+    section_iterator SecI(Obj.section_end());
+    Symbol->getSection(SecI);
+    // If there is no section, this must be an external reference.
+    const bool IsExtern = SecI == Obj.section_end();
+
+    // Determine the Addend used to adjust the relocation value.
+    uint64_t RelType;
+    Check(RelI->getType(RelType));
+    uint64_t Offset;
+    Check(RelI->getOffset(Offset));
+    uint64_t Addend = 0;
+    SectionEntry &Section = Sections[SectionID];
+    uintptr_t ObjTarget = Section.ObjAddress + Offset;
+
+    switch (RelType) {
+
+    case COFF::IMAGE_REL_AMD64_REL32:
+    case COFF::IMAGE_REL_AMD64_REL32_1:
+    case COFF::IMAGE_REL_AMD64_REL32_2:
+    case COFF::IMAGE_REL_AMD64_REL32_3:
+    case COFF::IMAGE_REL_AMD64_REL32_4:
+    case COFF::IMAGE_REL_AMD64_REL32_5:
+    case COFF::IMAGE_REL_AMD64_ADDR32NB: {
+      uint32_t *Displacement = (uint32_t *)ObjTarget;
+      Addend = *Displacement;
+      break;
+    }
+
+    case COFF::IMAGE_REL_AMD64_ADDR64: {
+      uint64_t *Displacement = (uint64_t *)ObjTarget;
+      Addend = *Displacement;
+      break;
+    }
+
+    default:
+      break;
+    }
+
+    StringRef TargetName;
+    Symbol->getName(TargetName);
+
+    DEBUG(dbgs() << "\t\tIn Section " << SectionID << " Offset " << Offset
+                 << " RelType: " << RelType << " TargetName: " << TargetName
+                 << " Addend " << Addend << "\n");
+
+    if (IsExtern) {
+      RelocationEntry RE(SectionID, Offset, RelType, Addend);
+      addRelocationForSymbol(RE, TargetName);
+    } else {
+      bool IsCode = SecI->isText();
+      unsigned TargetSectionID =
+          findOrEmitSection(Obj, *SecI, IsCode, ObjSectionToID);
+      uint64_t TargetOffset = getSymbolOffset(*Symbol);
+      RelocationEntry RE(SectionID, Offset, RelType, TargetOffset + Addend);
+      addRelocationForSection(RE, TargetSectionID);
+    }
+
+    return ++RelI;
+  }
+
+  unsigned getStubAlignment() override { return 1; }
+  void registerEHFrames() override {
+    for (auto const &EHFrameSID : UnregisteredEHFrameSections) {
+      uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
+      uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
+      size_t EHFrameSize = Sections[EHFrameSID].Size;
+      MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
+      RegisteredEHFrameSections.push_back(EHFrameSID);
+    }
+    UnregisteredEHFrameSections.clear();
+  }
+  void deregisterEHFrames() override {
+    // Stub
+  }
+  void finalizeLoad(const ObjectFile &Obj,
+                    ObjSectionToIDMap &SectionMap) override {
+    // Look for and record the EH frame section IDs.
+    for (const auto &SectionPair : SectionMap) {
+      const SectionRef &Section = SectionPair.first;
+      StringRef Name;
+      Check(Section.getName(Name));
+      // Note unwind info is split across .pdata and .xdata, so this
+      // may not be sufficiently general for all users.
+      if (Name == ".xdata") {
+        UnregisteredEHFrameSections.push_back(SectionPair.second);
+      }
+    }
+  }
+};
+
+} // end namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
index 196fa62..99fd6e3 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
@@ -23,8 +23,9 @@ public:
 
   typedef uint64_t TargetPtrT;
 
-  RuntimeDyldMachOAArch64(RTDyldMemoryManager *MM)
-      : RuntimeDyldMachOCRTPBase(MM) {}
+  RuntimeDyldMachOAArch64(RuntimeDyld::MemoryManager &MM,
+                          RuntimeDyld::SymbolResolver &Resolver)
+      : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 8; }
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index 09e430e..09e51f2 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -25,7 +25,9 @@ public:
 
   typedef uint32_t TargetPtrT;
 
-  RuntimeDyldMachOARM(RTDyldMemoryManager *MM) : RuntimeDyldMachOCRTPBase(MM) {}
+  RuntimeDyldMachOARM(RuntimeDyld::MemoryManager &MM,
+                      RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 8; }
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index 67d7027..053f90c 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -22,8 +22,9 @@ public:
 
   typedef uint32_t TargetPtrT;
 
-  RuntimeDyldMachOI386(RTDyldMemoryManager *MM)
-      : RuntimeDyldMachOCRTPBase(MM) {}
+  RuntimeDyldMachOI386(RuntimeDyld::MemoryManager &MM,
+                       RuntimeDyld::SymbolResolver &Resolver)
+      : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 0; }
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index 0734017..4b3b01b 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -22,8 +22,9 @@ public:
 
   typedef uint64_t TargetPtrT;
 
-  RuntimeDyldMachOX86_64(RTDyldMemoryManager *MM)
-      : RuntimeDyldMachOCRTPBase(MM) {}
+  RuntimeDyldMachOX86_64(RuntimeDyld::MemoryManager &MM,
+                         RuntimeDyld::SymbolResolver &Resolver)
+      : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 8; }
 
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
index 5986084..5986084 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
diff --git a/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp
index e6679cf..57f6e08 100644
--- a/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp
@@ -49,21 +49,19 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
   // Adjust the triple to match what the user requested.
   const Target *TheTarget = nullptr;
   if (!MArch.empty()) {
-    for (TargetRegistry::iterator it = TargetRegistry::begin(),
-           ie = TargetRegistry::end(); it != ie; ++it) {
-      if (MArch == it->getName()) {
-        TheTarget = &*it;
-        break;
-      }
-    }
+    auto I = std::find_if(
+        TargetRegistry::targets().begin(), TargetRegistry::targets().end(),
+        [&](const Target &T) { return MArch == T.getName(); });
 
-    if (!TheTarget) {
+    if (I == TargetRegistry::targets().end()) {
       if (ErrorStr)
         *ErrorStr = "No available targets are compatible with this -march, "
                     "see -version for the available targets.\n";
       return nullptr;
     }
 
+    TheTarget = &*I;
+
     // Adjust the triple to match (if known), otherwise stick with the
     // requested/host triple.
     Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp
index c494d6c..1089cb5 100644
--- a/contrib/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm/lib/IR/AsmWriter.cpp
@@ -14,9 +14,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AsmWriter.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
@@ -31,13 +31,16 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/Statepoint.h"
 #include "llvm/IR/TypeFinder.h"
+#include "llvm/IR/UseListOrder.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cctype>
 using namespace llvm;
@@ -275,6 +278,15 @@ static const Module *getModuleFromVal(const Value *V) {
 
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
     return GV->getParent();
+
+  if (const auto *MAV = dyn_cast<MetadataAsValue>(V)) {
+    for (const User *U : MAV->users())
+      if (isa<Instruction>(U))
+        if (const Module *M = getModuleFromVal(U))
+          return M;
+    return nullptr;
+  }
+
   return nullptr;
 }
 
@@ -378,7 +390,27 @@ static void PrintLLVMName(raw_ostream &OS, const Value *V) {
 }
 
 
-namespace llvm {
+namespace {
+class TypePrinting {
+  TypePrinting(const TypePrinting &) = delete;
+  void operator=(const TypePrinting&) = delete;
+public:
+
+  /// NamedTypes - The named types that are used by the current module.
+  TypeFinder NamedTypes;
+
+  /// NumberedTypes - The numbered types, along with their value.
+  DenseMap<StructType*, unsigned> NumberedTypes;
+
+  TypePrinting() = default;
+
+  void incorporateTypes(const Module &M);
+
+  void print(Type *Ty, raw_ostream &OS);
+
+  void printStructBody(StructType *Ty, raw_ostream &OS);
+};
+} // namespace
 
 void TypePrinting::incorporateTypes(const Module &M) {
   NamedTypes.run(M, false);
@@ -508,6 +540,7 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
     OS << '>';
 }
 
+namespace {
 //===----------------------------------------------------------------------===//
 // SlotTracker Class: Enumerate slot numbers for unnamed values
 //===----------------------------------------------------------------------===//
@@ -525,6 +558,7 @@ private:
   /// TheFunction - The function for which we are holding slot numbers.
   const Function* TheFunction;
   bool FunctionProcessed;
+  bool ShouldInitializeAllMetadata;
 
   /// mMap - The slot map for the module level data.
   ValueMap mMap;
@@ -542,10 +576,20 @@ private:
   DenseMap<AttributeSet, unsigned> asMap;
   unsigned asNext;
 public:
-  /// Construct from a module
-  explicit SlotTracker(const Module *M);
+  /// Construct from a module.
+  ///
+  /// If \c ShouldInitializeAllMetadata, initializes all metadata in all
+  /// functions, giving correct numbering for metadata referenced only from
+  /// within a function (even if no functions have been initialized).
+  explicit SlotTracker(const Module *M,
+                       bool ShouldInitializeAllMetadata = false);
   /// Construct from a function, starting out in incorp state.
-  explicit SlotTracker(const Function *F);
+  ///
+  /// If \c ShouldInitializeAllMetadata, initializes all metadata in all
+  /// functions, giving correct numbering for metadata referenced only from
+  /// within a function (even if no functions have been initialized).
+  explicit SlotTracker(const Function *F,
+                       bool ShouldInitializeAllMetadata = false);
 
   /// Return the slot number of the specified value in it's type
   /// plane.  If something is not in the SlotTracker, return -1.
@@ -606,11 +650,18 @@ private:
   /// Add all of the functions arguments, basic blocks, and instructions.
   void processFunction();
 
-  SlotTracker(const SlotTracker &) LLVM_DELETED_FUNCTION;
-  void operator=(const SlotTracker &) LLVM_DELETED_FUNCTION;
+  /// Add all of the metadata from a function.
+  void processFunctionMetadata(const Function &F);
+
+  /// Add all of the metadata from an instruction.
+  void processInstructionMetadata(const Instruction &I);
+
+  SlotTracker(const SlotTracker &) = delete;
+  void operator=(const SlotTracker &) = delete;
 };
+} // namespace
 
-SlotTracker *createSlotTracker(const Module *M) {
+static SlotTracker *createSlotTracker(const Module *M) {
   return new SlotTracker(M);
 }
 
@@ -645,15 +696,18 @@ static SlotTracker *createSlotTracker(const Value *V) {
 
 // Module level constructor. Causes the contents of the Module (sans functions)
 // to be added to the slot table.
-SlotTracker::SlotTracker(const Module *M)
-    : TheModule(M), TheFunction(nullptr), FunctionProcessed(false), mNext(0),
+SlotTracker::SlotTracker(const Module *M, bool ShouldInitializeAllMetadata)
+    : TheModule(M), TheFunction(nullptr), FunctionProcessed(false),
+      ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), mNext(0),
       fNext(0), mdnNext(0), asNext(0) {}
 
 // Function level constructor. Causes the contents of the Module and the one
 // function provided to be added to the slot table.
-SlotTracker::SlotTracker(const Function *F)
+SlotTracker::SlotTracker(const Function *F, bool ShouldInitializeAllMetadata)
     : TheModule(F ? F->getParent() : nullptr), TheFunction(F),
-      FunctionProcessed(false), mNext(0), fNext(0), mdnNext(0), asNext(0) {}
+      FunctionProcessed(false),
+      ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), mNext(0),
+      fNext(0), mdnNext(0), asNext(0) {}
 
 inline void SlotTracker::initialize() {
   if (TheModule) {
@@ -692,6 +746,9 @@ void SlotTracker::processModule() {
       // Add all the unnamed functions to the table.
       CreateModuleSlot(I);
 
+    if (ShouldInitializeAllMetadata)
+      processFunctionMetadata(*I);
+
     // Add all the function attributes to the table.
     // FIXME: Add attributes of other objects?
     AttributeSet FnAttrs = I->getAttributes().getFnAttributes();
@@ -715,46 +772,30 @@ void SlotTracker::processFunction() {
 
   ST_DEBUG("Inserting Instructions:\n");
 
-  SmallVector<std::pair<unsigned, MDNode *>, 4> MDForInst;
-
   // Add all of the basic blocks and instructions with no names.
-  for (Function::const_iterator BB = TheFunction->begin(),
-       E = TheFunction->end(); BB != E; ++BB) {
-    if (!BB->hasName())
-      CreateFunctionSlot(BB);
-
-    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E;
-         ++I) {
-      if (!I->getType()->isVoidTy() && !I->hasName())
-        CreateFunctionSlot(I);
-
-      // Intrinsics can directly use metadata.  We allow direct calls to any
-      // llvm.foo function here, because the target may not be linked into the
-      // optimizer.
-      if (const CallInst *CI = dyn_cast<CallInst>(I)) {
-        if (Function *F = CI->getCalledFunction())
-          if (F->isIntrinsic())
-            for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-              if (auto *V = dyn_cast_or_null<MetadataAsValue>(I->getOperand(i)))
-                if (MDNode *N = dyn_cast<MDNode>(V->getMetadata()))
-                  CreateMetadataSlot(N);
+  for (auto &BB : *TheFunction) {
+    if (!BB.hasName())
+      CreateFunctionSlot(&BB);
+
+    processFunctionMetadata(*TheFunction);
+
+    for (auto &I : BB) {
+      if (!I.getType()->isVoidTy() && !I.hasName())
+        CreateFunctionSlot(&I);
 
+      // We allow direct calls to any llvm.foo function here, because the
+      // target may not be linked into the optimizer.
+      if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
         // Add all the call attributes to the table.
         AttributeSet Attrs = CI->getAttributes().getFnAttributes();
         if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
           CreateAttributeSetSlot(Attrs);
-      } else if (const InvokeInst *II = dyn_cast<InvokeInst>(I)) {
+      } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
         // Add all the call attributes to the table.
         AttributeSet Attrs = II->getAttributes().getFnAttributes();
         if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
           CreateAttributeSetSlot(Attrs);
       }
-
-      // Process metadata attached with this instruction.
-      I->getAllMetadata(MDForInst);
-      for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
-        CreateMetadataSlot(MDForInst[i].second);
-      MDForInst.clear();
     }
   }
 
@@ -763,6 +804,35 @@ void SlotTracker::processFunction() {
   ST_DEBUG("end processFunction!\n");
 }
 
+void SlotTracker::processFunctionMetadata(const Function &F) {
+  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+  for (auto &BB : F) {
+    F.getAllMetadata(MDs);
+    for (auto &MD : MDs)
+      CreateMetadataSlot(MD.second);
+
+    for (auto &I : BB)
+      processInstructionMetadata(I);
+  }
+}
+
+void SlotTracker::processInstructionMetadata(const Instruction &I) {
+  // Process metadata used directly by intrinsics.
+  if (const CallInst *CI = dyn_cast<CallInst>(&I))
+    if (Function *F = CI->getCalledFunction())
+      if (F->isIntrinsic())
+        for (auto &Op : I.operands())
+          if (auto *V = dyn_cast_or_null<MetadataAsValue>(Op))
+            if (MDNode *N = dyn_cast<MDNode>(V->getMetadata()))
+              CreateMetadataSlot(N);
+
+  // Process metadata attached to this instruction.
+  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+  I.getAllMetadata(MDs);
+  for (auto &MD : MDs)
+    CreateMetadataSlot(MD.second);
+}
+
 /// Clean up after incorporating a function. This is the only way to get out of
 /// the function incorporation state that affects get*Slot/Create*Slot. Function
 /// incorporation state is indicated by TheFunction != 0.
@@ -1010,7 +1080,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
              (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
           // Reparse stringized version!
           if (APFloat(APFloat::IEEEdouble, StrVal).convertToDouble() == Val) {
-            Out << StrVal.str();
+            Out << StrVal;
             return;
           }
         }
@@ -1223,6 +1293,14 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
       Out << ' ' << getPredicateText(CE->getPredicate());
     Out << " (";
 
+    if (const GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
+      TypePrinter.print(
+          cast<PointerType>(GEP->getPointerOperandType()->getScalarType())
+              ->getElementType(),
+          Out);
+      Out << ", ";
+    }
+
     for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
       TypePrinter.print((*OI)->getType(), Out);
       Out << ' ';
@@ -1275,52 +1353,474 @@ static void writeMDTuple(raw_ostream &Out, const MDTuple *Node,
 namespace {
 struct FieldSeparator {
   bool Skip;
-  FieldSeparator() : Skip(true) {}
+  const char *Sep;
+  FieldSeparator(const char *Sep = ", ") : Skip(true), Sep(Sep) {}
 };
 raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) {
   if (FS.Skip) {
     FS.Skip = false;
     return OS;
   }
-  return OS << ", ";
+  return OS << FS.Sep;
 }
+struct MDFieldPrinter {
+  raw_ostream &Out;
+  FieldSeparator FS;
+  TypePrinting *TypePrinter;
+  SlotTracker *Machine;
+  const Module *Context;
+
+  explicit MDFieldPrinter(raw_ostream &Out)
+      : Out(Out), TypePrinter(nullptr), Machine(nullptr), Context(nullptr) {}
+  MDFieldPrinter(raw_ostream &Out, TypePrinting *TypePrinter,
+                 SlotTracker *Machine, const Module *Context)
+      : Out(Out), TypePrinter(TypePrinter), Machine(Machine), Context(Context) {
+  }
+  void printTag(const DINode *N);
+  void printString(StringRef Name, StringRef Value,
+                   bool ShouldSkipEmpty = true);
+  void printMetadata(StringRef Name, const Metadata *MD,
+                     bool ShouldSkipNull = true);
+  template <class IntTy>
+  void printInt(StringRef Name, IntTy Int, bool ShouldSkipZero = true);
+  void printBool(StringRef Name, bool Value);
+  void printDIFlags(StringRef Name, unsigned Flags);
+  template <class IntTy, class Stringifier>
+  void printDwarfEnum(StringRef Name, IntTy Value, Stringifier toString,
+                      bool ShouldSkipZero = true);
+};
 } // end namespace
 
-static void writeMDLocation(raw_ostream &Out, const MDLocation *DL,
+void MDFieldPrinter::printTag(const DINode *N) {
+  Out << FS << "tag: ";
+  if (const char *Tag = dwarf::TagString(N->getTag()))
+    Out << Tag;
+  else
+    Out << N->getTag();
+}
+
+void MDFieldPrinter::printString(StringRef Name, StringRef Value,
+                                 bool ShouldSkipEmpty) {
+  if (ShouldSkipEmpty && Value.empty())
+    return;
+
+  Out << FS << Name << ": \"";
+  PrintEscapedString(Value, Out);
+  Out << "\"";
+}
+
+static void writeMetadataAsOperand(raw_ostream &Out, const Metadata *MD,
+                                   TypePrinting *TypePrinter,
+                                   SlotTracker *Machine,
+                                   const Module *Context) {
+  if (!MD) {
+    Out << "null";
+    return;
+  }
+  WriteAsOperandInternal(Out, MD, TypePrinter, Machine, Context);
+}
+
+void MDFieldPrinter::printMetadata(StringRef Name, const Metadata *MD,
+                                   bool ShouldSkipNull) {
+  if (ShouldSkipNull && !MD)
+    return;
+
+  Out << FS << Name << ": ";
+  writeMetadataAsOperand(Out, MD, TypePrinter, Machine, Context);
+}
+
+template <class IntTy>
+void MDFieldPrinter::printInt(StringRef Name, IntTy Int, bool ShouldSkipZero) {
+  if (ShouldSkipZero && !Int)
+    return;
+
+  Out << FS << Name << ": " << Int;
+}
+
+void MDFieldPrinter::printBool(StringRef Name, bool Value) {
+  Out << FS << Name << ": " << (Value ? "true" : "false");
+}
+
+void MDFieldPrinter::printDIFlags(StringRef Name, unsigned Flags) {
+  if (!Flags)
+    return;
+
+  Out << FS << Name << ": ";
+
+  SmallVector<unsigned, 8> SplitFlags;
+  unsigned Extra = DINode::splitFlags(Flags, SplitFlags);
+
+  FieldSeparator FlagsFS(" | ");
+  for (unsigned F : SplitFlags) {
+    const char *StringF = DINode::getFlagString(F);
+    assert(StringF && "Expected valid flag");
+    Out << FlagsFS << StringF;
+  }
+  if (Extra || SplitFlags.empty())
+    Out << FlagsFS << Extra;
+}
+
+template <class IntTy, class Stringifier>
+void MDFieldPrinter::printDwarfEnum(StringRef Name, IntTy Value,
+                                    Stringifier toString, bool ShouldSkipZero) {
+  if (!Value)
+    return;
+
+  Out << FS << Name << ": ";
+  if (const char *S = toString(Value))
+    Out << S;
+  else
+    Out << Value;
+}
+
+static void writeGenericDINode(raw_ostream &Out, const GenericDINode *N,
+                               TypePrinting *TypePrinter, SlotTracker *Machine,
+                               const Module *Context) {
+  Out << "!GenericDINode(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printTag(N);
+  Printer.printString("header", N->getHeader());
+  if (N->getNumDwarfOperands()) {
+    Out << Printer.FS << "operands: {";
+    FieldSeparator IFS;
+    for (auto &I : N->dwarf_operands()) {
+      Out << IFS;
+      writeMetadataAsOperand(Out, I, TypePrinter, Machine, Context);
+    }
+    Out << "}";
+  }
+  Out << ")";
+}
+
+static void writeDILocation(raw_ostream &Out, const DILocation *DL,
                             TypePrinting *TypePrinter, SlotTracker *Machine,
                             const Module *Context) {
-  Out << "!MDLocation(";
-  FieldSeparator FS;
+  Out << "!DILocation(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   // Always output the line, since 0 is a relevant and important value for it.
-  Out << FS << "line: " << DL->getLine();
-  if (DL->getColumn())
-    Out << FS << "column: " << DL->getColumn();
-  Out << FS << "scope: ";
-  WriteAsOperandInternal(Out, DL->getScope(), TypePrinter, Machine, Context);
-  if (DL->getInlinedAt()) {
-    Out << FS << "inlinedAt: ";
-    WriteAsOperandInternal(Out, DL->getInlinedAt(), TypePrinter, Machine,
-                           Context);
+  Printer.printInt("line", DL->getLine(), /* ShouldSkipZero */ false);
+  Printer.printInt("column", DL->getColumn());
+  Printer.printMetadata("scope", DL->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("inlinedAt", DL->getRawInlinedAt());
+  Out << ")";
+}
+
+static void writeDISubrange(raw_ostream &Out, const DISubrange *N,
+                            TypePrinting *, SlotTracker *, const Module *) {
+  Out << "!DISubrange(";
+  MDFieldPrinter Printer(Out);
+  Printer.printInt("count", N->getCount(), /* ShouldSkipZero */ false);
+  Printer.printInt("lowerBound", N->getLowerBound());
+  Out << ")";
+}
+
+static void writeDIEnumerator(raw_ostream &Out, const DIEnumerator *N,
+                              TypePrinting *, SlotTracker *, const Module *) {
+  Out << "!DIEnumerator(";
+  MDFieldPrinter Printer(Out);
+  Printer.printString("name", N->getName(), /* ShouldSkipEmpty */ false);
+  Printer.printInt("value", N->getValue(), /* ShouldSkipZero */ false);
+  Out << ")";
+}
+
+static void writeDIBasicType(raw_ostream &Out, const DIBasicType *N,
+                             TypePrinting *, SlotTracker *, const Module *) {
+  Out << "!DIBasicType(";
+  MDFieldPrinter Printer(Out);
+  if (N->getTag() != dwarf::DW_TAG_base_type)
+    Printer.printTag(N);
+  Printer.printString("name", N->getName());
+  Printer.printInt("size", N->getSizeInBits());
+  Printer.printInt("align", N->getAlignInBits());
+  Printer.printDwarfEnum("encoding", N->getEncoding(),
+                         dwarf::AttributeEncodingString);
+  Out << ")";
+}
+
+static void writeDIDerivedType(raw_ostream &Out, const DIDerivedType *N,
+                               TypePrinting *TypePrinter, SlotTracker *Machine,
+                               const Module *Context) {
+  Out << "!DIDerivedType(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printTag(N);
+  Printer.printString("name", N->getName());
+  Printer.printMetadata("scope", N->getRawScope());
+  Printer.printMetadata("file", N->getRawFile());
+  Printer.printInt("line", N->getLine());
+  Printer.printMetadata("baseType", N->getRawBaseType(),
+                        /* ShouldSkipNull */ false);
+  Printer.printInt("size", N->getSizeInBits());
+  Printer.printInt("align", N->getAlignInBits());
+  Printer.printInt("offset", N->getOffsetInBits());
+  Printer.printDIFlags("flags", N->getFlags());
+  Printer.printMetadata("extraData", N->getRawExtraData());
+  Out << ")";
+}
+
+static void writeDICompositeType(raw_ostream &Out, const DICompositeType *N,
+                                 TypePrinting *TypePrinter,
+                                 SlotTracker *Machine, const Module *Context) {
+  Out << "!DICompositeType(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printTag(N);
+  Printer.printString("name", N->getName());
+  Printer.printMetadata("scope", N->getRawScope());
+  Printer.printMetadata("file", N->getRawFile());
+  Printer.printInt("line", N->getLine());
+  Printer.printMetadata("baseType", N->getRawBaseType());
+  Printer.printInt("size", N->getSizeInBits());
+  Printer.printInt("align", N->getAlignInBits());
+  Printer.printInt("offset", N->getOffsetInBits());
+  Printer.printDIFlags("flags", N->getFlags());
+  Printer.printMetadata("elements", N->getRawElements());
+  Printer.printDwarfEnum("runtimeLang", N->getRuntimeLang(),
+                         dwarf::LanguageString);
+  Printer.printMetadata("vtableHolder", N->getRawVTableHolder());
+  Printer.printMetadata("templateParams", N->getRawTemplateParams());
+  Printer.printString("identifier", N->getIdentifier());
+  Out << ")";
+}
+
+static void writeDISubroutineType(raw_ostream &Out, const DISubroutineType *N,
+                                  TypePrinting *TypePrinter,
+                                  SlotTracker *Machine, const Module *Context) {
+  Out << "!DISubroutineType(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printDIFlags("flags", N->getFlags());
+  Printer.printMetadata("types", N->getRawTypeArray(),
+                        /* ShouldSkipNull */ false);
+  Out << ")";
+}
+
+static void writeDIFile(raw_ostream &Out, const DIFile *N, TypePrinting *,
+                        SlotTracker *, const Module *) {
+  Out << "!DIFile(";
+  MDFieldPrinter Printer(Out);
+  Printer.printString("filename", N->getFilename(),
+                      /* ShouldSkipEmpty */ false);
+  Printer.printString("directory", N->getDirectory(),
+                      /* ShouldSkipEmpty */ false);
+  Out << ")";
+}
+
+static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
+                               TypePrinting *TypePrinter, SlotTracker *Machine,
+                               const Module *Context) {
+  Out << "!DICompileUnit(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printDwarfEnum("language", N->getSourceLanguage(),
+                         dwarf::LanguageString, /* ShouldSkipZero */ false);
+  Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
+  Printer.printString("producer", N->getProducer());
+  Printer.printBool("isOptimized", N->isOptimized());
+  Printer.printString("flags", N->getFlags());
+  Printer.printInt("runtimeVersion", N->getRuntimeVersion(),
+                   /* ShouldSkipZero */ false);
+  Printer.printString("splitDebugFilename", N->getSplitDebugFilename());
+  Printer.printInt("emissionKind", N->getEmissionKind(),
+                   /* ShouldSkipZero */ false);
+  Printer.printMetadata("enums", N->getRawEnumTypes());
+  Printer.printMetadata("retainedTypes", N->getRawRetainedTypes());
+  Printer.printMetadata("subprograms", N->getRawSubprograms());
+  Printer.printMetadata("globals", N->getRawGlobalVariables());
+  Printer.printMetadata("imports", N->getRawImportedEntities());
+  Printer.printInt("dwoId", N->getDWOId());
+  Out << ")";
+}
+
+static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N,
+                              TypePrinting *TypePrinter, SlotTracker *Machine,
+                              const Module *Context) {
+  Out << "!DISubprogram(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printString("name", N->getName());
+  Printer.printString("linkageName", N->getLinkageName());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
+  Printer.printInt("line", N->getLine());
+  Printer.printMetadata("type", N->getRawType());
+  Printer.printBool("isLocal", N->isLocalToUnit());
+  Printer.printBool("isDefinition", N->isDefinition());
+  Printer.printInt("scopeLine", N->getScopeLine());
+  Printer.printMetadata("containingType", N->getRawContainingType());
+  Printer.printDwarfEnum("virtuality", N->getVirtuality(),
+                         dwarf::VirtualityString);
+  Printer.printInt("virtualIndex", N->getVirtualIndex());
+  Printer.printDIFlags("flags", N->getFlags());
+  Printer.printBool("isOptimized", N->isOptimized());
+  Printer.printMetadata("function", N->getRawFunction());
+  Printer.printMetadata("templateParams", N->getRawTemplateParams());
+  Printer.printMetadata("declaration", N->getRawDeclaration());
+  Printer.printMetadata("variables", N->getRawVariables());
+  Out << ")";
+}
+
+static void writeDILexicalBlock(raw_ostream &Out, const DILexicalBlock *N,
+                                TypePrinting *TypePrinter, SlotTracker *Machine,
+                                const Module *Context) {
+  Out << "!DILexicalBlock(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
+  Printer.printInt("line", N->getLine());
+  Printer.printInt("column", N->getColumn());
+  Out << ")";
+}
+
+static void writeDILexicalBlockFile(raw_ostream &Out,
+                                    const DILexicalBlockFile *N,
+                                    TypePrinting *TypePrinter,
+                                    SlotTracker *Machine,
+                                    const Module *Context) {
+  Out << "!DILexicalBlockFile(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
+  Printer.printInt("discriminator", N->getDiscriminator(),
+                   /* ShouldSkipZero */ false);
+  Out << ")";
+}
+
+static void writeDINamespace(raw_ostream &Out, const DINamespace *N,
+                             TypePrinting *TypePrinter, SlotTracker *Machine,
+                             const Module *Context) {
+  Out << "!DINamespace(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printString("name", N->getName());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
+  Printer.printInt("line", N->getLine());
+  Out << ")";
+}
+
+static void writeDITemplateTypeParameter(raw_ostream &Out,
+                                         const DITemplateTypeParameter *N,
+                                         TypePrinting *TypePrinter,
+                                         SlotTracker *Machine,
+                                         const Module *Context) {
+  Out << "!DITemplateTypeParameter(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printString("name", N->getName());
+  Printer.printMetadata("type", N->getRawType(), /* ShouldSkipNull */ false);
+  Out << ")";
+}
+
+static void writeDITemplateValueParameter(raw_ostream &Out,
+                                          const DITemplateValueParameter *N,
+                                          TypePrinting *TypePrinter,
+                                          SlotTracker *Machine,
+                                          const Module *Context) {
+  Out << "!DITemplateValueParameter(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  if (N->getTag() != dwarf::DW_TAG_template_value_parameter)
+    Printer.printTag(N);
+  Printer.printString("name", N->getName());
+  Printer.printMetadata("type", N->getRawType());
+  Printer.printMetadata("value", N->getValue(), /* ShouldSkipNull */ false);
+  Out << ")";
+}
+
+static void writeDIGlobalVariable(raw_ostream &Out, const DIGlobalVariable *N,
+                                  TypePrinting *TypePrinter,
+                                  SlotTracker *Machine, const Module *Context) {
+  Out << "!DIGlobalVariable(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printString("name", N->getName());
+  Printer.printString("linkageName", N->getLinkageName());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
+  Printer.printInt("line", N->getLine());
+  Printer.printMetadata("type", N->getRawType());
+  Printer.printBool("isLocal", N->isLocalToUnit());
+  Printer.printBool("isDefinition", N->isDefinition());
+  Printer.printMetadata("variable", N->getRawVariable());
+  Printer.printMetadata("declaration", N->getRawStaticDataMemberDeclaration());
+  Out << ")";
+}
+
+static void writeDILocalVariable(raw_ostream &Out, const DILocalVariable *N,
+                                 TypePrinting *TypePrinter,
+                                 SlotTracker *Machine, const Module *Context) {
+  Out << "!DILocalVariable(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printTag(N);
+  Printer.printString("name", N->getName());
+  Printer.printInt("arg", N->getArg(),
+                   /* ShouldSkipZero */
+                   N->getTag() == dwarf::DW_TAG_auto_variable);
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
+  Printer.printInt("line", N->getLine());
+  Printer.printMetadata("type", N->getRawType());
+  Printer.printDIFlags("flags", N->getFlags());
+  Out << ")";
+}
+
+static void writeDIExpression(raw_ostream &Out, const DIExpression *N,
+                              TypePrinting *TypePrinter, SlotTracker *Machine,
+                              const Module *Context) {
+  Out << "!DIExpression(";
+  FieldSeparator FS;
+  if (N->isValid()) {
+    for (auto I = N->expr_op_begin(), E = N->expr_op_end(); I != E; ++I) {
+      const char *OpStr = dwarf::OperationEncodingString(I->getOp());
+      assert(OpStr && "Expected valid opcode");
+
+      Out << FS << OpStr;
+      for (unsigned A = 0, AE = I->getNumArgs(); A != AE; ++A)
+        Out << FS << I->getArg(A);
+    }
+  } else {
+    for (const auto &I : N->getElements())
+      Out << FS << I;
   }
   Out << ")";
 }
 
+static void writeDIObjCProperty(raw_ostream &Out, const DIObjCProperty *N,
+                                TypePrinting *TypePrinter, SlotTracker *Machine,
+                                const Module *Context) {
+  Out << "!DIObjCProperty(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printString("name", N->getName());
+  Printer.printMetadata("file", N->getRawFile());
+  Printer.printInt("line", N->getLine());
+  Printer.printString("setter", N->getSetterName());
+  Printer.printString("getter", N->getGetterName());
+  Printer.printInt("attributes", N->getAttributes());
+  Printer.printMetadata("type", N->getRawType());
+  Out << ")";
+}
+
+static void writeDIImportedEntity(raw_ostream &Out, const DIImportedEntity *N,
+                                  TypePrinting *TypePrinter,
+                                  SlotTracker *Machine, const Module *Context) {
+  Out << "!DIImportedEntity(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printTag(N);
+  Printer.printString("name", N->getName());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("entity", N->getRawEntity());
+  Printer.printInt("line", N->getLine());
+  Out << ")";
+}
+
+
 static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
                                     TypePrinting *TypePrinter,
                                     SlotTracker *Machine,
                                     const Module *Context) {
-  assert(isa<UniquableMDNode>(Node) && "Expected uniquable MDNode");
-
-  auto *Uniquable = cast<UniquableMDNode>(Node);
-  if (Uniquable->isDistinct())
+  if (Node->isDistinct())
     Out << "distinct ";
+  else if (Node->isTemporary())
+    Out << "<temporary!> "; // Handle broken code.
 
-  switch (Uniquable->getMetadataID()) {
+  switch (Node->getMetadataID()) {
   default:
     llvm_unreachable("Expected uniquable MDNode");
-#define HANDLE_UNIQUABLE_LEAF(CLASS)                                           \
+#define HANDLE_MDNODE_LEAF(CLASS)                                              \
   case Metadata::CLASS##Kind:                                                  \
-    write##CLASS(Out, cast<CLASS>(Uniquable), TypePrinter, Machine, Context);  \
+    write##CLASS(Out, cast<CLASS>(Node), TypePrinter, Machine, Context);       \
     break;
 #include "llvm/IR/Metadata.def"
   }
@@ -1440,6 +1940,77 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
   WriteAsOperandInternal(Out, V->getValue(), TypePrinter, Machine, Context);
 }
 
+namespace {
+class AssemblyWriter {
+  formatted_raw_ostream &Out;
+  const Module *TheModule;
+  std::unique_ptr<SlotTracker> ModuleSlotTracker;
+  SlotTracker &Machine;
+  TypePrinting TypePrinter;
+  AssemblyAnnotationWriter *AnnotationWriter;
+  SetVector<const Comdat *> Comdats;
+  bool ShouldPreserveUseListOrder;
+  UseListOrderStack UseListOrders;
+  SmallVector<StringRef, 8> MDNames;
+
+public:
+  /// Construct an AssemblyWriter with an external SlotTracker
+  AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const Module *M,
+                 AssemblyAnnotationWriter *AAW,
+                 bool ShouldPreserveUseListOrder = false);
+
+  /// Construct an AssemblyWriter with an internally allocated SlotTracker
+  AssemblyWriter(formatted_raw_ostream &o, const Module *M,
+                 AssemblyAnnotationWriter *AAW,
+                 bool ShouldPreserveUseListOrder = false);
+
+  void printMDNodeBody(const MDNode *MD);
+  void printNamedMDNode(const NamedMDNode *NMD);
+
+  void printModule(const Module *M);
+
+  void writeOperand(const Value *Op, bool PrintType);
+  void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx);
+  void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
+  void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
+                          AtomicOrdering FailureOrdering,
+                          SynchronizationScope SynchScope);
+
+  void writeAllMDNodes();
+  void writeMDNode(unsigned Slot, const MDNode *Node);
+  void writeAllAttributeGroups();
+
+  void printTypeIdentities();
+  void printGlobal(const GlobalVariable *GV);
+  void printAlias(const GlobalAlias *GV);
+  void printComdat(const Comdat *C);
+  void printFunction(const Function *F);
+  void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx);
+  void printBasicBlock(const BasicBlock *BB);
+  void printInstructionLine(const Instruction &I);
+  void printInstruction(const Instruction &I);
+
+  void printUseListOrder(const UseListOrder &Order);
+  void printUseLists(const Function *F);
+
+private:
+  void init();
+
+  /// \brief Print out metadata attachments.
+  void printMetadataAttachments(
+      const SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs,
+      StringRef Separator);
+
+  // printInfoComment - Print a little comment after the instruction indicating
+  // which slot it occupies.
+  void printInfoComment(const Value &V);
+
+  // printGCRelocateComment - print comment after call to the gc.relocate
+  // intrinsic indicating base and derived pointer names.
+  void printGCRelocateComment(const Value &V);
+};
+} // namespace
+
 void AssemblyWriter::init() {
   if (!TheModule)
     return;
@@ -1452,23 +2023,23 @@ void AssemblyWriter::init() {
       Comdats.insert(C);
 }
 
-
 AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
-                               const Module *M,
-                               AssemblyAnnotationWriter *AAW)
-  : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW) {
+                               const Module *M, AssemblyAnnotationWriter *AAW,
+                               bool ShouldPreserveUseListOrder)
+    : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW),
+      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
   init();
 }
 
 AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, const Module *M,
-                               AssemblyAnnotationWriter *AAW)
-  : Out(o), TheModule(M), ModuleSlotTracker(createSlotTracker(M)),
-    Machine(*ModuleSlotTracker), AnnotationWriter(AAW) {
+                               AssemblyAnnotationWriter *AAW,
+                               bool ShouldPreserveUseListOrder)
+    : Out(o), TheModule(M), ModuleSlotTracker(createSlotTracker(M)),
+      Machine(*ModuleSlotTracker), AnnotationWriter(AAW),
+      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
   init();
 }
 
-AssemblyWriter::~AssemblyWriter() { }
-
 void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
   if (!Operand) {
     Out << "<null operand!>";
@@ -1553,7 +2124,7 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
 void AssemblyWriter::printModule(const Module *M) {
   Machine.initialize();
 
-  if (shouldPreserveAssemblyUseListOrder())
+  if (ShouldPreserveUseListOrder)
     UseListOrders = predictUseListOrder(M);
 
   if (!M->getModuleIdentifier().empty() &&
@@ -1982,6 +2553,10 @@ void AssemblyWriter::printFunction(const Function *F) {
     writeOperand(F->getPrologueData(), true);
   }
 
+  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+  F->getAllMetadata(MDs);
+  printMetadataAttachments(MDs, " ");
+
   if (F->isDeclaration()) {
     Out << '\n';
   } else {
@@ -2073,10 +2648,26 @@ void AssemblyWriter::printInstructionLine(const Instruction &I) {
   Out << '\n';
 }
 
+/// printGCRelocateComment - print comment after call to the gc.relocate
+/// intrinsic indicating base and derived pointer names.
+void AssemblyWriter::printGCRelocateComment(const Value &V) {
+  assert(isGCRelocate(&V));
+  GCRelocateOperands GCOps(cast<Instruction>(&V));
+
+  Out << " ; (";
+  writeOperand(GCOps.getBasePtr(), false);
+  Out << ", ";
+  writeOperand(GCOps.getDerivedPtr(), false);
+  Out << ")";
+}
+
 /// printInfoComment - Print a little comment after the instruction indicating
 /// which slot it occupies.
 ///
 void AssemblyWriter::printInfoComment(const Value &V) {
+  if (isGCRelocate(&V))
+    printGCRelocateComment(V);
+
   if (AnnotationWriter)
     AnnotationWriter->printInfoComment(V, Out);
 }
@@ -2228,8 +2819,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     }
 
     Operand = CI->getCalledValue();
-    PointerType *PTy = cast<PointerType>(Operand->getType());
-    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    FunctionType *FTy = cast<FunctionType>(CI->getFunctionType());
     Type *RetTy = FTy->getReturnType();
     const AttributeSet &PAL = CI->getAttributes();
 
@@ -2241,15 +2831,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     // and if the return type is not a pointer to a function.
     //
     Out << ' ';
-    if (!FTy->isVarArg() &&
-        (!RetTy->isPointerTy() ||
-         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
-      TypePrinter.print(RetTy, Out);
-      Out << ' ';
-      writeOperand(Operand, false);
-    } else {
-      writeOperand(Operand, true);
-    }
+    TypePrinter.print(FTy->isVarArg() ? FTy : RetTy, Out);
+    Out << ' ';
+    writeOperand(Operand, false);
     Out << '(';
     for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
       if (op > 0)
@@ -2269,8 +2853,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
       Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
   } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
     Operand = II->getCalledValue();
-    PointerType *PTy = cast<PointerType>(Operand->getType());
-    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    FunctionType *FTy = cast<FunctionType>(II->getFunctionType());
     Type *RetTy = FTy->getReturnType();
     const AttributeSet &PAL = II->getAttributes();
 
@@ -2288,15 +2871,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     // and if the return type is not a pointer to a function.
     //
     Out << ' ';
-    if (!FTy->isVarArg() &&
-        (!RetTy->isPointerTy() ||
-         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
-      TypePrinter.print(RetTy, Out);
-      Out << ' ';
-      writeOperand(Operand, false);
-    } else {
-      writeOperand(Operand, true);
-    }
+    TypePrinter.print(FTy->isVarArg() ? FTy : RetTy, Out);
+    Out << ' ';
+    writeOperand(Operand, false);
     Out << '(';
     for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) {
       if (op)
@@ -2318,7 +2895,13 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     if (AI->isUsedWithInAlloca())
       Out << "inalloca ";
     TypePrinter.print(AI->getAllocatedType(), Out);
-    if (!AI->getArraySize() || AI->isArrayAllocation()) {
+
+    // Explicitly write the array size if the code is broken, if it's an array
+    // allocation, or if the type is not canonical for scalar allocations.  The
+    // latter case prevents the type from mutating when round-tripping through
+    // assembly.
+    if (!AI->getArraySize() || AI->isArrayAllocation() ||
+        !AI->getArraySize()->getType()->isIntegerTy(32)) {
       Out << ", ";
       writeOperand(AI->getArraySize(), true);
     }
@@ -2340,6 +2923,15 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     Out << ", ";
     TypePrinter.print(I.getType(), Out);
   } else if (Operand) {   // Print the normal way.
+    if (const auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+      Out << ' ';
+      TypePrinter.print(GEP->getSourceElementType(), Out);
+      Out << ',';
+    } else if (const auto *LI = dyn_cast<LoadInst>(&I)) {
+      Out << ' ';
+      TypePrinter.print(LI->getType(), Out);
+      Out << ',';
+    }
 
     // PrintAllTypes - Instructions who have operands of all the same type
     // omit the type from all but the first operand.  If the instruction has
@@ -2398,50 +2990,37 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
   // Print Metadata info.
   SmallVector<std::pair<unsigned, MDNode *>, 4> InstMD;
   I.getAllMetadata(InstMD);
-  if (!InstMD.empty()) {
-    SmallVector<StringRef, 8> MDNames;
-    I.getType()->getContext().getMDKindNames(MDNames);
-    for (unsigned i = 0, e = InstMD.size(); i != e; ++i) {
-      unsigned Kind = InstMD[i].first;
-       if (Kind < MDNames.size()) {
-         Out << ", !" << MDNames[Kind];
-       } else {
-         Out << ", !<unknown kind #" << Kind << ">";
-       }
-      Out << ' ';
-      WriteAsOperandInternal(Out, InstMD[i].second, &TypePrinter, &Machine,
-                             TheModule);
-    }
-  }
+  printMetadataAttachments(InstMD, ", ");
+
+  // Print a nice comment.
   printInfoComment(I);
 }
 
-static void WriteMDNodeComment(const MDNode *Node,
-                               formatted_raw_ostream &Out) {
-  if (Node->getNumOperands() < 1)
-    return;
-
-  Metadata *Op = Node->getOperand(0);
-  if (!Op || !isa<MDString>(Op))
+void AssemblyWriter::printMetadataAttachments(
+    const SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs,
+    StringRef Separator) {
+  if (MDs.empty())
     return;
 
-  DIDescriptor Desc(Node);
-  if (!Desc.Verify())
-    return;
+  if (MDNames.empty())
+    TheModule->getMDKindNames(MDNames);
 
-  unsigned Tag = Desc.getTag();
-  Out.PadToColumn(50);
-  if (dwarf::TagString(Tag)) {
-    Out << "; ";
-    Desc.print(Out);
-  } else if (Tag == dwarf::DW_TAG_user_base) {
-    Out << "; [ DW_TAG_user_base ]";
+  for (const auto &I : MDs) {
+    unsigned Kind = I.first;
+    Out << Separator;
+    if (Kind < MDNames.size())
+      Out << "!" << MDNames[Kind];
+    else
+      Out << "!<unknown kind #" << Kind << ">";
+    Out << ' ';
+    WriteAsOperandInternal(Out, I.second, &TypePrinter, &Machine, TheModule);
   }
 }
 
 void AssemblyWriter::writeMDNode(unsigned Slot, const MDNode *Node) {
   Out << '!' << Slot << " = ";
   printMDNodeBody(Node);
+  Out << "\n";
 }
 
 void AssemblyWriter::writeAllMDNodes() {
@@ -2458,8 +3037,6 @@ void AssemblyWriter::writeAllMDNodes() {
 
 void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
   WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule);
-  WriteMDNodeComment(Node, Out);
-  Out << "\n";
 }
 
 void AssemblyWriter::writeAllAttributeGroups() {
@@ -2476,8 +3053,6 @@ void AssemblyWriter::writeAllAttributeGroups() {
         << I->first.getAsString(AttributeSet::FunctionIndex, true) << " }\n";
 }
 
-} // namespace llvm
-
 void AssemblyWriter::printUseListOrder(const UseListOrder &Order) {
   bool IsInFunction = Machine.getFunction();
   if (IsInFunction)
@@ -2521,10 +3096,18 @@ void AssemblyWriter::printUseLists(const Function *F) {
 //                       External Interface declarations
 //===----------------------------------------------------------------------===//
 
-void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+  SlotTracker SlotTable(this->getParent());
+  formatted_raw_ostream OS(ROS);
+  AssemblyWriter W(OS, SlotTable, this->getParent(), AAW);
+  W.printFunction(this);
+}
+
+void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
+                   bool ShouldPreserveUseListOrder) const {
   SlotTracker SlotTable(this);
   formatted_raw_ostream OS(ROS);
-  AssemblyWriter W(OS, SlotTable, this, AAW);
+  AssemblyWriter W(OS, SlotTable, this, AAW, ShouldPreserveUseListOrder);
   W.printModule(this);
 }
 
@@ -2572,11 +3155,24 @@ void Type::print(raw_ostream &OS) const {
     }
 }
 
+static bool isReferencingMDNode(const Instruction &I) {
+  if (const auto *CI = dyn_cast<CallInst>(&I))
+    if (Function *F = CI->getCalledFunction())
+      if (F->isIntrinsic())
+        for (auto &Op : I.operands())
+          if (auto *V = dyn_cast_or_null<MetadataAsValue>(Op))
+            if (isa<MDNode>(V->getMetadata()))
+              return true;
+  return false;
+}
+
 void Value::print(raw_ostream &ROS) const {
   formatted_raw_ostream OS(ROS);
   if (const Instruction *I = dyn_cast<Instruction>(this)) {
     const Function *F = I->getParent() ? I->getParent()->getParent() : nullptr;
-    SlotTracker SlotTable(F);
+    SlotTracker SlotTable(
+        F,
+        /* ShouldInitializeAllMetadata */ isReferencingMDNode(*I));
     AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), nullptr);
     W.printInstruction(*I);
   } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) {
@@ -2584,7 +3180,8 @@ void Value::print(raw_ostream &ROS) const {
     AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), nullptr);
     W.printBasicBlock(BB);
   } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
-    SlotTracker SlotTable(GV->getParent());
+    SlotTracker SlotTable(GV->getParent(),
+                          /* ShouldInitializeAllMetadata */ isa<Function>(GV));
     AssemblyWriter W(OS, SlotTable, GV->getParent(), nullptr);
     if (const GlobalVariable *V = dyn_cast<GlobalVariable>(GV))
       W.printGlobal(V);
@@ -2593,7 +3190,7 @@ void Value::print(raw_ostream &ROS) const {
     else
       W.printAlias(cast<GlobalAlias>(GV));
   } else if (const MetadataAsValue *V = dyn_cast<MetadataAsValue>(this)) {
-    V->getMetadata()->print(ROS);
+    V->getMetadata()->print(ROS, getModuleFromVal(V));
   } else if (const Constant *C = dyn_cast<Constant>(this)) {
     TypePrinting TypePrinter;
     TypePrinter.print(C->getType(), OS);
@@ -2609,8 +3206,9 @@ void Value::print(raw_ostream &ROS) const {
 void Value::printAsOperand(raw_ostream &O, bool PrintType, const Module *M) const {
   // Fast path: Don't construct and populate a TypePrinting object if we
   // won't be needing any types printed.
-  if (!PrintType && ((!isa<Constant>(this) && !isa<MetadataAsValue>(this)) ||
-                     hasName() || isa<GlobalValue>(this))) {
+  bool IsMetadata = isa<MetadataAsValue>(this);
+  if (!PrintType && ((!isa<Constant>(this) && !IsMetadata) || hasName() ||
+                     isa<GlobalValue>(this))) {
     WriteAsOperandInternal(O, this, nullptr, nullptr, M);
     return;
   }
@@ -2626,51 +3224,62 @@ void Value::printAsOperand(raw_ostream &O, bool PrintType, const Module *M) cons
     O << ' ';
   }
 
-  WriteAsOperandInternal(O, this, &TypePrinter, nullptr, M);
+  SlotTracker Machine(M, /* ShouldInitializeAllMetadata */ IsMetadata);
+  WriteAsOperandInternal(O, this, &TypePrinter, &Machine, M);
 }
 
-void Metadata::print(raw_ostream &ROS) const {
+static void printMetadataImpl(raw_ostream &ROS, const Metadata &MD,
+                              const Module *M, bool OnlyAsOperand) {
   formatted_raw_ostream OS(ROS);
-  if (auto *N = dyn_cast<MDNode>(this)) {
-    SlotTracker SlotTable(static_cast<Function *>(nullptr));
-    AssemblyWriter W(OS, SlotTable, nullptr, nullptr);
-    W.printMDNodeBody(N);
 
+  auto *N = dyn_cast<MDNode>(&MD);
+  TypePrinting TypePrinter;
+  SlotTracker Machine(M, /* ShouldInitializeAllMetadata */ N);
+  if (M)
+    TypePrinter.incorporateTypes(*M);
+
+  WriteAsOperandInternal(OS, &MD, &TypePrinter, &Machine, M,
+                         /* FromValue */ true);
+  if (OnlyAsOperand || !N)
     return;
-  }
-  printAsOperand(OS);
+
+  OS << " = ";
+  WriteMDNodeBodyInternal(OS, N, &TypePrinter, &Machine, M);
 }
 
-void Metadata::printAsOperand(raw_ostream &ROS, bool PrintType,
-                              const Module *M) const {
-  formatted_raw_ostream OS(ROS);
+void Metadata::printAsOperand(raw_ostream &OS, const Module *M) const {
+  printMetadataImpl(OS, *this, M, /* OnlyAsOperand */ true);
+}
 
-  std::unique_ptr<TypePrinting> TypePrinter;
-  if (PrintType) {
-    TypePrinter.reset(new TypePrinting);
-    if (M)
-      TypePrinter->incorporateTypes(*M);
-  }
-  WriteAsOperandInternal(OS, this, TypePrinter.get(), nullptr, M,
-                         /* FromValue */ true);
+void Metadata::print(raw_ostream &OS, const Module *M) const {
+  printMetadataImpl(OS, *this, M, /* OnlyAsOperand */ false);
 }
 
 // Value::dump - allow easy printing of Values from the debugger.
+LLVM_DUMP_METHOD
 void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
 
 // Type::dump - allow easy printing of Types from the debugger.
+LLVM_DUMP_METHOD
 void Type::dump() const { print(dbgs()); dbgs() << '\n'; }
 
 // Module::dump() - Allow printing of Modules from the debugger.
+LLVM_DUMP_METHOD
 void Module::dump() const { print(dbgs(), nullptr); }
 
 // \brief Allow printing of Comdats from the debugger.
+LLVM_DUMP_METHOD
 void Comdat::dump() const { print(dbgs()); }
 
 // NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger.
+LLVM_DUMP_METHOD
 void NamedMDNode::dump() const { print(dbgs()); }
 
-void Metadata::dump() const {
-  print(dbgs());
+LLVM_DUMP_METHOD
+void Metadata::dump() const { dump(nullptr); }
+
+LLVM_DUMP_METHOD
+void Metadata::dump(const Module *M) const {
+  print(dbgs(), M);
   dbgs() << '\n';
 }
diff --git a/contrib/llvm/lib/IR/AsmWriter.h b/contrib/llvm/lib/IR/AsmWriter.h
deleted file mode 100644
index 60da5ad..0000000
--- a/contrib/llvm/lib/IR/AsmWriter.h
+++ /dev/null
@@ -1,129 +0,0 @@
-//===-- llvm/IR/AsmWriter.h - Printing LLVM IR as an assembly file - C++ --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This files defines the interface for the AssemblyWriter class used to print
-// LLVM IR and various helper classes that are used in printing.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_IR_ASMWRITER_H
-#define LLVM_LIB_IR_ASMWRITER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/TypeFinder.h"
-#include "llvm/IR/UseListOrder.h"
-#include "llvm/Support/FormattedStream.h"
-
-namespace llvm {
-
-class BasicBlock;
-class Function;
-class GlobalValue;
-class Comdat;
-class Module;
-class NamedMDNode;
-class Value;
-class SlotTracker;
-
-/// Create a new SlotTracker for a Module 
-SlotTracker *createSlotTracker(const Module *M);
-
-//===----------------------------------------------------------------------===//
-// TypePrinting Class: Type printing machinery
-//===----------------------------------------------------------------------===//
-
-class TypePrinting {
-  TypePrinting(const TypePrinting &) LLVM_DELETED_FUNCTION;
-  void operator=(const TypePrinting&) LLVM_DELETED_FUNCTION;
-public:
-
-  /// NamedTypes - The named types that are used by the current module.
-  TypeFinder NamedTypes;
-
-  /// NumberedTypes - The numbered types, along with their value.
-  DenseMap<StructType*, unsigned> NumberedTypes;
-
-
-  TypePrinting() {}
-  ~TypePrinting() {}
-
-  void incorporateTypes(const Module &M);
-
-  void print(Type *Ty, raw_ostream &OS);
-
-  void printStructBody(StructType *Ty, raw_ostream &OS);
-};
-
-class AssemblyWriter {
-protected:
-  formatted_raw_ostream &Out;
-  const Module *TheModule;
-
-private:
-  std::unique_ptr<SlotTracker> ModuleSlotTracker;
-  SlotTracker &Machine;
-  TypePrinting TypePrinter;
-  AssemblyAnnotationWriter *AnnotationWriter;
-  SetVector<const Comdat *> Comdats;
-  UseListOrderStack UseListOrders;
-
-public:
-  /// Construct an AssemblyWriter with an external SlotTracker
-  AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
-                 const Module *M, AssemblyAnnotationWriter *AAW);
-
-  /// Construct an AssemblyWriter with an internally allocated SlotTracker
-  AssemblyWriter(formatted_raw_ostream &o, const Module *M,
-                 AssemblyAnnotationWriter *AAW);
-
-  virtual ~AssemblyWriter();
-
-  void printMDNodeBody(const MDNode *MD);
-  void printNamedMDNode(const NamedMDNode *NMD);
-
-  void printModule(const Module *M);
-
-  void writeOperand(const Value *Op, bool PrintType);
-  void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx);
-  void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
-  void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
-                          AtomicOrdering FailureOrdering,
-                          SynchronizationScope SynchScope);
-
-  void writeAllMDNodes();
-  void writeMDNode(unsigned Slot, const MDNode *Node);
-  void writeAllAttributeGroups();
-
-  void printTypeIdentities();
-  void printGlobal(const GlobalVariable *GV);
-  void printAlias(const GlobalAlias *GV);
-  void printComdat(const Comdat *C);
-  void printFunction(const Function *F);
-  void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx);
-  void printBasicBlock(const BasicBlock *BB);
-  void printInstructionLine(const Instruction &I);
-  void printInstruction(const Instruction &I);
-
-  void printUseListOrder(const UseListOrder &Order);
-  void printUseLists(const Function *F);
-
-private:
-  void init();
-
-  // printInfoComment - Print a little comment after the instruction indicating
-  // which slot it occupies.
-  void printInfoComment(const Value &V);
-};
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/IR/AttributeImpl.h b/contrib/llvm/lib/IR/AttributeImpl.h
index 0448dc17..dbd7d63 100644
--- a/contrib/llvm/lib/IR/AttributeImpl.h
+++ b/contrib/llvm/lib/IR/AttributeImpl.h
@@ -33,8 +33,8 @@ class AttributeImpl : public FoldingSetNode {
   unsigned char KindID; ///< Holds the AttrEntryKind of the attribute
 
   // AttributesImpl is uniqued, these should not be publicly available.
-  void operator=(const AttributeImpl &) LLVM_DELETED_FUNCTION;
-  AttributeImpl(const AttributeImpl &) LLVM_DELETED_FUNCTION;
+  void operator=(const AttributeImpl &) = delete;
+  AttributeImpl(const AttributeImpl &) = delete;
 
 protected:
   enum AttrEntryKind {
@@ -115,10 +115,10 @@ class IntAttributeImpl : public EnumAttributeImpl {
 public:
   IntAttributeImpl(Attribute::AttrKind Kind, uint64_t Val)
       : EnumAttributeImpl(IntAttrEntry, Kind), Val(Val) {
-    assert(
-        (Kind == Attribute::Alignment || Kind == Attribute::StackAlignment ||
-         Kind == Attribute::Dereferenceable) &&
-        "Wrong kind for int attribute!");
+    assert((Kind == Attribute::Alignment || Kind == Attribute::StackAlignment ||
+            Kind == Attribute::Dereferenceable ||
+            Kind == Attribute::DereferenceableOrNull) &&
+           "Wrong kind for int attribute!");
   }
 
   uint64_t getValue() const { return Val; }
@@ -151,8 +151,8 @@ class AttributeSetNode : public FoldingSetNode {
   }
 
   // AttributesSetNode is uniqued, these should not be publicly available.
-  void operator=(const AttributeSetNode &) LLVM_DELETED_FUNCTION;
-  AttributeSetNode(const AttributeSetNode &) LLVM_DELETED_FUNCTION;
+  void operator=(const AttributeSetNode &) = delete;
+  AttributeSetNode(const AttributeSetNode &) = delete;
 public:
   static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs);
 
@@ -166,6 +166,7 @@ public:
   unsigned getAlignment() const;
   unsigned getStackAlignment() const;
   uint64_t getDereferenceableBytes() const;
+  uint64_t getDereferenceableOrNullBytes() const;
   std::string getAsString(bool InAttrGrp) const;
 
   typedef const Attribute *iterator;
@@ -199,8 +200,8 @@ class AttributeSetImpl : public FoldingSetNode {
   }
 
   // AttributesSet is uniqued, these should not be publicly available.
-  void operator=(const AttributeSetImpl &) LLVM_DELETED_FUNCTION;
-  AttributeSetImpl(const AttributeSetImpl &) LLVM_DELETED_FUNCTION;
+  void operator=(const AttributeSetImpl &) = delete;
+  AttributeSetImpl(const AttributeSetImpl &) = delete;
 public:
   AttributeSetImpl(LLVMContext &C,
                    ArrayRef<std::pair<unsigned, AttributeSetNode *> > Attrs)
diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp
index 04545ea..fef05c8 100644
--- a/contrib/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm/lib/IR/Attributes.cpp
@@ -94,6 +94,12 @@ Attribute Attribute::getWithDereferenceableBytes(LLVMContext &Context,
   return get(Context, Dereferenceable, Bytes);
 }
 
+Attribute Attribute::getWithDereferenceableOrNullBytes(LLVMContext &Context,
+                                                       uint64_t Bytes) {
+  assert(Bytes && "Bytes must be non-zero.");
+  return get(Context, DereferenceableOrNull, Bytes);
+}
+
 //===----------------------------------------------------------------------===//
 // Attribute Accessor Methods
 //===----------------------------------------------------------------------===//
@@ -170,6 +176,13 @@ uint64_t Attribute::getDereferenceableBytes() const {
   return pImpl->getValueAsInt();
 }
 
+uint64_t Attribute::getDereferenceableOrNullBytes() const {
+  assert(hasAttribute(Attribute::DereferenceableOrNull) &&
+         "Trying to get dereferenceable bytes from "
+         "non-dereferenceable attribute!");
+  return pImpl->getValueAsInt();
+}
+
 std::string Attribute::getAsString(bool InAttrGrp) const {
   if (!pImpl) return "";
 
@@ -181,6 +194,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "builtin";
   if (hasAttribute(Attribute::ByVal))
     return "byval";
+  if (hasAttribute(Attribute::Convergent))
+    return "convergent";
   if (hasAttribute(Attribute::InAlloca))
     return "inalloca";
   if (hasAttribute(Attribute::InlineHint))
@@ -263,9 +278,9 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return Result;
   }
 
-  if (hasAttribute(Attribute::StackAlignment)) {
+  auto AttrWithBytesToString = [&](const char *Name) {
     std::string Result;
-    Result += "alignstack";
+    Result += Name;
     if (InAttrGrp) {
       Result += "=";
       Result += utostr(getValueAsInt());
@@ -275,21 +290,16 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
       Result += ")";
     }
     return Result;
-  }
+  };
 
-  if (hasAttribute(Attribute::Dereferenceable)) {
-    std::string Result;
-    Result += "dereferenceable";
-    if (InAttrGrp) {
-      Result += "=";
-      Result += utostr(getValueAsInt());
-    } else {
-      Result += "(";
-      Result += utostr(getValueAsInt());
-      Result += ")";
-    }
-    return Result;
-  }
+  if (hasAttribute(Attribute::StackAlignment))
+    return AttrWithBytesToString("alignstack");
+
+  if (hasAttribute(Attribute::Dereferenceable))
+    return AttrWithBytesToString("dereferenceable");
+
+  if (hasAttribute(Attribute::DereferenceableOrNull))
+    return AttrWithBytesToString("dereferenceable_or_null");
 
   // Convert target-dependent attributes to strings of the form:
   //
@@ -298,12 +308,12 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
   //
   if (isStringAttribute()) {
     std::string Result;
-    Result += '\"' + getKindAsString().str() + '"';
+    Result += (Twine('"') + getKindAsString() + Twine('"')).str();
 
     StringRef Val = pImpl->getValueAsString();
     if (Val.empty()) return Result;
 
-    Result += "=\"" + Val.str() + '"';
+    Result += ("=\"" + Val + Twine('"')).str();
     return Result;
   }
 
@@ -426,8 +436,14 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
   case Attribute::InAlloca:        return 1ULL << 43;
   case Attribute::NonNull:         return 1ULL << 44;
   case Attribute::JumpTable:       return 1ULL << 45;
+  case Attribute::Convergent:      return 1ULL << 46;
   case Attribute::Dereferenceable:
     llvm_unreachable("dereferenceable attribute not supported in raw format");
+    break;
+  case Attribute::DereferenceableOrNull:
+    llvm_unreachable("dereferenceable_or_null attribute not supported in raw "
+                     "format");
+    break;
   }
   llvm_unreachable("Unsupported attribute type");
 }
@@ -519,6 +535,13 @@ uint64_t AttributeSetNode::getDereferenceableBytes() const {
   return 0;
 }
 
+uint64_t AttributeSetNode::getDereferenceableOrNullBytes() const {
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    if (I->hasAttribute(Attribute::DereferenceableOrNull))
+      return I->getDereferenceableOrNullBytes();
+  return 0;
+}
+
 std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
   std::string Str;
   for (iterator I = begin(), E = end(); I != E; ++I) {
@@ -663,6 +686,10 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
       Attrs.push_back(std::make_pair(Index,
                                      Attribute::getWithDereferenceableBytes(C,
                                        B.getDereferenceableBytes())));
+    else if (Kind == Attribute::DereferenceableOrNull)
+      Attrs.push_back(
+          std::make_pair(Index, Attribute::getWithDereferenceableOrNullBytes(
+                                    C, B.getDereferenceableOrNullBytes())));
     else
       Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind)));
   }
@@ -795,12 +822,10 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
   if (!pImpl) return AttributeSet();
   if (!Attrs.pImpl) return *this;
 
-#ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment.
   // For now, say we can't pass in alignment, which no current use does.
   assert(!Attrs.hasAttribute(Index, Attribute::Alignment) &&
          "Attempt to change alignment!");
-#endif
 
   // Add the attribute slots before the one we're trying to add.
   SmallVector<AttributeSet, 4> AttrSet;
@@ -835,6 +860,57 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
   return get(C, AttrSet);
 }
 
+AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
+                                            const AttrBuilder &Attrs) const {
+  if (!pImpl) return AttributeSet();
+
+  // FIXME it is not obvious how this should work for alignment.
+  // For now, say we can't pass in alignment, which no current use does.
+  assert(!Attrs.hasAlignmentAttr() && "Attempt to change alignment!");
+
+  // Add the attribute slots before the one we're trying to add.
+  SmallVector<AttributeSet, 4> AttrSet;
+  uint64_t NumAttrs = pImpl->getNumAttributes();
+  AttributeSet AS;
+  uint64_t LastIndex = 0;
+  for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
+    if (getSlotIndex(I) >= Index) {
+      if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++);
+      break;
+    }
+    LastIndex = I + 1;
+    AttrSet.push_back(getSlotAttributes(I));
+  }
+
+  // Now remove the attribute from the correct slot. There may already be an
+  // AttributeSet there.
+  AttrBuilder B(AS, Index);
+  B.remove(Attrs);
+
+  AttrSet.push_back(AttributeSet::get(C, Index, B));
+
+  // Add the remaining attribute slots.
+  for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
+    AttrSet.push_back(getSlotAttributes(I));
+
+  return get(C, AttrSet);
+}
+
+AttributeSet AttributeSet::addDereferenceableAttr(LLVMContext &C, unsigned Index,
+                                                  uint64_t Bytes) const {
+  llvm::AttrBuilder B;
+  B.addDereferenceableAttr(Bytes);
+  return addAttributes(C, Index, AttributeSet::get(C, Index, B));
+}
+
+AttributeSet AttributeSet::addDereferenceableOrNullAttr(LLVMContext &C,
+                                                        unsigned Index,
+                                                        uint64_t Bytes) const {
+  llvm::AttrBuilder B;
+  B.addDereferenceableOrNullAttr(Bytes);
+  return addAttributes(C, Index, AttributeSet::get(C, Index, B));
+}
+
 //===----------------------------------------------------------------------===//
 // AttributeSet Accessor Methods
 //===----------------------------------------------------------------------===//
@@ -925,6 +1001,11 @@ uint64_t AttributeSet::getDereferenceableBytes(unsigned Index) const {
   return ASN ? ASN->getDereferenceableBytes() : 0;
 }
 
+uint64_t AttributeSet::getDereferenceableOrNullBytes(unsigned Index) const {
+  AttributeSetNode *ASN = getAttributes(Index);
+  return ASN ? ASN->getDereferenceableOrNullBytes() : 0;
+}
+
 std::string AttributeSet::getAsString(unsigned Index,
                                       bool InAttrGrp) const {
   AttributeSetNode *ASN = getAttributes(Index);
@@ -1004,7 +1085,8 @@ void AttributeSet::dump() const {
 //===----------------------------------------------------------------------===//
 
 AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
-  : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0) {
+    : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0),
+      DerefOrNullBytes(0) {
   AttributeSetImpl *pImpl = AS.pImpl;
   if (!pImpl) return;
 
@@ -1021,7 +1103,7 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
 
 void AttrBuilder::clear() {
   Attrs.reset();
-  Alignment = StackAlignment = DerefBytes = 0;
+  Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0;
 }
 
 AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
@@ -1048,6 +1130,8 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
     StackAlignment = Attr.getStackAlignment();
   else if (Kind == Attribute::Dereferenceable)
     DerefBytes = Attr.getDereferenceableBytes();
+  else if (Kind == Attribute::DereferenceableOrNull)
+    DerefOrNullBytes = Attr.getDereferenceableOrNullBytes();
   return *this;
 }
 
@@ -1066,6 +1150,8 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
     StackAlignment = 0;
   else if (Val == Attribute::Dereferenceable)
     DerefBytes = 0;
+  else if (Val == Attribute::DereferenceableOrNull)
+    DerefOrNullBytes = 0;
 
   return *this;
 }
@@ -1092,6 +1178,8 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
         StackAlignment = 0;
       else if (Kind == Attribute::Dereferenceable)
         DerefBytes = 0;
+      else if (Kind == Attribute::DereferenceableOrNull)
+        DerefOrNullBytes = 0;
     } else {
       assert(Attr.isStringAttribute() && "Invalid attribute type!");
       std::map<std::string, std::string>::iterator
@@ -1142,6 +1230,15 @@ AttrBuilder &AttrBuilder::addDereferenceableAttr(uint64_t Bytes) {
   return *this;
 }
 
+AttrBuilder &AttrBuilder::addDereferenceableOrNullAttr(uint64_t Bytes) {
+  if (Bytes == 0)
+    return *this;
+
+  Attrs[Attribute::DereferenceableOrNull] = true;
+  DerefOrNullBytes = Bytes;
+  return *this;
+}
+
 AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
   // FIXME: What if both have alignments, but they don't match?!
   if (!Alignment)
@@ -1153,15 +1250,52 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
   if (!DerefBytes)
     DerefBytes = B.DerefBytes;
 
+  if (!DerefOrNullBytes)
+    DerefOrNullBytes = B.DerefOrNullBytes;
+
   Attrs |= B.Attrs;
 
-  for (td_const_iterator I = B.TargetDepAttrs.begin(),
-         E = B.TargetDepAttrs.end(); I != E; ++I)
-    TargetDepAttrs[I->first] = I->second;
+  for (auto I : B.td_attrs())
+    TargetDepAttrs[I.first] = I.second;
+
+  return *this;
+}
+
+AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) {
+  // FIXME: What if both have alignments, but they don't match?!
+  if (B.Alignment)
+    Alignment = 0;
+
+  if (B.StackAlignment)
+    StackAlignment = 0;
+
+  if (B.DerefBytes)
+    DerefBytes = 0;
+
+  if (B.DerefOrNullBytes)
+    DerefOrNullBytes = 0;
+
+  Attrs &= ~B.Attrs;
+
+  for (auto I : B.td_attrs())
+    TargetDepAttrs.erase(I.first);
 
   return *this;
 }
 
+bool AttrBuilder::overlaps(const AttrBuilder &B) const {
+  // First check if any of the target independent attributes overlap.
+  if ((Attrs & B.Attrs).any())
+    return true;
+
+  // Then check if any target dependent ones do.
+  for (auto I : td_attrs())
+    if (B.contains(I.first))
+      return true;
+
+  return false;
+}
+
 bool AttrBuilder::contains(StringRef A) const {
   return TargetDepAttrs.find(A) != TargetDepAttrs.end();
 }
@@ -1218,7 +1352,8 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
 
   for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
        I = Attribute::AttrKind(I + 1)) {
-    if (I == Attribute::Dereferenceable)
+    if (I == Attribute::Dereferenceable ||
+        I == Attribute::DereferenceableOrNull)
       continue;
     if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) {
       Attrs[I] = true;
@@ -1238,7 +1373,7 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
 //===----------------------------------------------------------------------===//
 
 /// \brief Which attributes cannot be applied to a type.
-AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) {
+AttrBuilder AttributeFuncs::typeIncompatible(const Type *Ty) {
   AttrBuilder Incompatible;
 
   if (!Ty->isIntegerTy())
@@ -1254,10 +1389,11 @@ AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) {
       .addAttribute(Attribute::NoCapture)
       .addAttribute(Attribute::NonNull)
       .addDereferenceableAttr(1) // the int here is ignored
+      .addDereferenceableOrNullAttr(1) // the int here is ignored
       .addAttribute(Attribute::ReadNone)
       .addAttribute(Attribute::ReadOnly)
       .addAttribute(Attribute::StructRet)
       .addAttribute(Attribute::InAlloca);
 
-  return AttributeSet::get(Ty->getContext(), Index, Incompatible);
+  return Incompatible;
 }
diff --git a/contrib/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm/lib/IR/AutoUpgrade.cpp
index e3544df..70a5518 100644
--- a/contrib/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp
@@ -7,7 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the auto-upgrade helper functions
+// This file implements the auto-upgrade helper functions.
+// This is where deprecated IR intrinsics and other IR features are updated to
+// current specifications.
 //
 //===----------------------------------------------------------------------===//
 
@@ -107,19 +109,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     }
     break;
   }
-  case 'd': {
-    if (Name.startswith("dbg.declare") && F->arg_size() == 2) {
-      F->setName(Name + ".old");
-      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_declare);
-      return true;
-    }
-    if (Name.startswith("dbg.value") && F->arg_size() == 3) {
-      F->setName(Name + ".old");
-      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
-      return true;
-    }
-    break;
-  }
 
   case 'o':
     // We only need to change the name to match the mangling including the
@@ -141,6 +130,14 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
         Name.startswith("x86.avx2.pcmpeq.") ||
         Name.startswith("x86.avx2.pcmpgt.") ||
         Name.startswith("x86.avx.vpermil.") ||
+        Name == "x86.avx.vinsertf128.pd.256" ||
+        Name == "x86.avx.vinsertf128.ps.256" ||
+        Name == "x86.avx.vinsertf128.si.256" ||
+        Name == "x86.avx2.vinserti128" ||
+        Name == "x86.avx.vextractf128.pd.256" ||
+        Name == "x86.avx.vextractf128.ps.256" ||
+        Name == "x86.avx.vextractf128.si.256" ||
+        Name == "x86.avx2.vextracti128" ||
         Name == "x86.avx.movnt.dq.256" ||
         Name == "x86.avx.movnt.pd.256" ||
         Name == "x86.avx.movnt.ps.256" ||
@@ -148,6 +145,23 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
         Name == "x86.avx.vbroadcast.ss" ||
         Name == "x86.avx.vbroadcast.ss.256" ||
         Name == "x86.avx.vbroadcast.sd.256" ||
+        Name == "x86.sse2.psll.dq" ||
+        Name == "x86.sse2.psrl.dq" ||
+        Name == "x86.avx2.psll.dq" ||
+        Name == "x86.avx2.psrl.dq" ||
+        Name == "x86.sse2.psll.dq.bs" ||
+        Name == "x86.sse2.psrl.dq.bs" ||
+        Name == "x86.avx2.psll.dq.bs" ||
+        Name == "x86.avx2.psrl.dq.bs" ||
+        Name == "x86.sse41.pblendw" ||
+        Name == "x86.sse41.blendpd" ||
+        Name == "x86.sse41.blendps" ||
+        Name == "x86.avx.blend.pd.256" ||
+        Name == "x86.avx.blend.ps.256" ||
+        Name == "x86.avx2.pblendw" ||
+        Name == "x86.avx2.pblendd.128" ||
+        Name == "x86.avx2.pblendd.256" ||
+        Name == "x86.avx2.vbroadcasti128" ||
         (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
       NewFn = nullptr;
       return true;
@@ -161,17 +175,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       if (Name == "x86.sse41.ptestnzc")
         return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
     }
-    // Several blend and other instructions with maskes used the wrong number of
+    // Several blend and other instructions with masks used the wrong number of
     // bits.
-    if (Name == "x86.sse41.pblendw")
-      return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_pblendw,
-                                              NewFn);
-    if (Name == "x86.sse41.blendpd")
-      return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendpd,
-                                              NewFn);
-    if (Name == "x86.sse41.blendps")
-      return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendps,
-                                              NewFn);
     if (Name == "x86.sse41.insertps")
       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
                                               NewFn);
@@ -184,24 +189,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     if (Name == "x86.sse41.mpsadbw")
       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
                                               NewFn);
-    if (Name == "x86.avx.blend.pd.256")
-      return UpgradeX86IntrinsicsWith8BitMask(
-          F, Intrinsic::x86_avx_blend_pd_256, NewFn);
-    if (Name == "x86.avx.blend.ps.256")
-      return UpgradeX86IntrinsicsWith8BitMask(
-          F, Intrinsic::x86_avx_blend_ps_256, NewFn);
     if (Name == "x86.avx.dp.ps.256")
       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
                                               NewFn);
-    if (Name == "x86.avx2.pblendw")
-      return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_pblendw,
-                                              NewFn);
-    if (Name == "x86.avx2.pblendd.128")
-      return UpgradeX86IntrinsicsWith8BitMask(
-          F, Intrinsic::x86_avx2_pblendd_128, NewFn);
-    if (Name == "x86.avx2.pblendd.256")
-      return UpgradeX86IntrinsicsWith8BitMask(
-          F, Intrinsic::x86_avx2_pblendd_256, NewFn);
     if (Name == "x86.avx2.mpsadbw")
       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
                                               NewFn);
@@ -243,9 +233,8 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
   // Upgrade intrinsic attributes.  This does not change the function.
   if (NewFn)
     F = NewFn;
-  if (unsigned id = F->getIntrinsicID())
-    F->setAttributes(Intrinsic::getAttributes(F->getContext(),
-                                              (Intrinsic::ID)id));
+  if (Intrinsic::ID id = F->getIntrinsicID())
+    F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
   return Upgraded;
 }
 
@@ -254,21 +243,78 @@ bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
   return false;
 }
 
-static MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
-  if (!DbgNode || Elt >= DbgNode->getNumOperands())
-    return nullptr;
-  return dyn_cast_or_null<MDNode>(DbgNode->getOperand(Elt));
+// Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
+// to byte shuffles.
+static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
+                                         Value *Op, unsigned NumLanes,
+                                         unsigned Shift) {
+  // Each lane is 16 bytes.
+  unsigned NumElts = NumLanes * 16;
+
+  // Bitcast from a 64-bit element type to a byte element type.
+  Op = Builder.CreateBitCast(Op,
+                             VectorType::get(Type::getInt8Ty(C), NumElts),
+                             "cast");
+  // We'll be shuffling in zeroes.
+  Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
+
+  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
+  // we'll just return the zero vector.
+  if (Shift < 16) {
+    SmallVector<Constant*, 32> Idxs;
+    // 256-bit version is split into two 16-byte lanes.
+    for (unsigned l = 0; l != NumElts; l += 16)
+      for (unsigned i = 0; i != 16; ++i) {
+        unsigned Idx = NumElts + i - Shift;
+        if (Idx < NumElts)
+          Idx -= NumElts - 16; // end of lane, switch operand.
+        Idxs.push_back(Builder.getInt32(Idx + l));
+      }
+
+    Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
+  }
+
+  // Bitcast back to a 64-bit element type.
+  return Builder.CreateBitCast(Res,
+                               VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
+                               "cast");
 }
 
-static MetadataAsValue *getExpression(Value *VarOperand, Function *F) {
-  // Old-style DIVariables have an optional expression as the 8th element.
-  DIExpression Expr(getNodeField(
-      cast<MDNode>(cast<MetadataAsValue>(VarOperand)->getMetadata()), 8));
-  if (!Expr) {
-    DIBuilder DIB(*F->getParent(), /*AllowUnresolved*/ false);
-    Expr = DIB.createExpression();
+// Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
+// to byte shuffles.
+static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
+                                         Value *Op, unsigned NumLanes,
+                                         unsigned Shift) {
+  // Each lane is 16 bytes.
+  unsigned NumElts = NumLanes * 16;
+
+  // Bitcast from a 64-bit element type to a byte element type.
+  Op = Builder.CreateBitCast(Op,
+                             VectorType::get(Type::getInt8Ty(C), NumElts),
+                             "cast");
+  // We'll be shuffling in zeroes.
+  Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
+
+  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
+  // we'll just return the zero vector.
+  if (Shift < 16) {
+    SmallVector<Constant*, 32> Idxs;
+    // 256-bit version is split into two 16-byte lanes.
+    for (unsigned l = 0; l != NumElts; l += 16)
+      for (unsigned i = 0; i != 16; ++i) {
+        unsigned Idx = i + Shift;
+        if (Idx >= 16)
+          Idx += NumElts - 16; // end of lane, switch operand.
+        Idxs.push_back(Builder.getInt32(Idx + l));
+      }
+
+    Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
   }
-  return MetadataAsValue::get(F->getContext(), Expr);
+
+  // Bitcast back to a 64-bit element type.
+  return Builder.CreateBitCast(Res,
+                               VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
+                               "cast");
 }
 
 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
@@ -361,21 +407,22 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
         Imm = 4;
       else if (Name.startswith("ne"))
         Imm = 5;
-      else if (Name.startswith("true"))
-        Imm = 6;
       else if (Name.startswith("false"))
+        Imm = 6;
+      else if (Name.startswith("true"))
         Imm = 7;
       else
         llvm_unreachable("Unknown condition");
 
       Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
-      Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0),
-                                CI->getArgOperand(1), Builder.getInt8(Imm));
+      Rep =
+          Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
+                                     Builder.getInt8(Imm)});
     } else if (Name == "llvm.x86.sse42.crc32.64.8") {
       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
                                                Intrinsic::x86_sse42_crc32_32_8);
       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
-      Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1));
+      Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
     } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
       // Replace broadcasts with a series of insertelements.
@@ -384,12 +431,149 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       unsigned EltNum = VecTy->getVectorNumElements();
       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
                                           EltTy->getPointerTo());
-      Value *Load = Builder.CreateLoad(Cast);
+      Value *Load = Builder.CreateLoad(EltTy, Cast);
       Type *I32Ty = Type::getInt32Ty(C);
       Rep = UndefValue::get(VecTy);
       for (unsigned I = 0; I < EltNum; ++I)
         Rep = Builder.CreateInsertElement(Rep, Load,
                                           ConstantInt::get(I32Ty, I));
+    } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
+      // Replace vbroadcasts with a vector shuffle.
+      Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
+      Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
+                                            PointerType::getUnqual(VT));
+      Value *Load = Builder.CreateLoad(VT, Op);
+      const int Idxs[4] = { 0, 1, 0, 1 };
+      Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+                                        Idxs);
+    } else if (Name == "llvm.x86.sse2.psll.dq") {
+      // 128-bit shift left specified in bits.
+      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
+                                       Shift / 8); // Shift is in bits.
+    } else if (Name == "llvm.x86.sse2.psrl.dq") {
+      // 128-bit shift right specified in bits.
+      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
+                                       Shift / 8); // Shift is in bits.
+    } else if (Name == "llvm.x86.avx2.psll.dq") {
+      // 256-bit shift left specified in bits.
+      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
+                                       Shift / 8); // Shift is in bits.
+    } else if (Name == "llvm.x86.avx2.psrl.dq") {
+      // 256-bit shift right specified in bits.
+      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
+                                       Shift / 8); // Shift is in bits.
+    } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
+      // 128-bit shift left specified in bytes.
+      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
+                                       Shift);
+    } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
+      // 128-bit shift right specified in bytes.
+      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
+                                       Shift);
+    } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
+      // 256-bit shift left specified in bytes.
+      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
+                                       Shift);
+    } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
+      // 256-bit shift right specified in bytes.
+      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
+                                       Shift);
+    } else if (Name == "llvm.x86.sse41.pblendw" ||
+               Name == "llvm.x86.sse41.blendpd" ||
+               Name == "llvm.x86.sse41.blendps" ||
+               Name == "llvm.x86.avx.blend.pd.256" ||
+               Name == "llvm.x86.avx.blend.ps.256" ||
+               Name == "llvm.x86.avx2.pblendw" ||
+               Name == "llvm.x86.avx2.pblendd.128" ||
+               Name == "llvm.x86.avx2.pblendd.256") {
+      Value *Op0 = CI->getArgOperand(0);
+      Value *Op1 = CI->getArgOperand(1);
+      unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+      VectorType *VecTy = cast<VectorType>(CI->getType());
+      unsigned NumElts = VecTy->getNumElements();
+
+      SmallVector<Constant*, 16> Idxs;
+      for (unsigned i = 0; i != NumElts; ++i) {
+        unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
+        Idxs.push_back(Builder.getInt32(Idx));
+      }
+
+      Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
+    } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
+               Name == "llvm.x86.avx.vinsertf128.ps.256" ||
+               Name == "llvm.x86.avx.vinsertf128.si.256" ||
+               Name == "llvm.x86.avx2.vinserti128") {
+      Value *Op0 = CI->getArgOperand(0);
+      Value *Op1 = CI->getArgOperand(1);
+      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+      VectorType *VecTy = cast<VectorType>(CI->getType());
+      unsigned NumElts = VecTy->getNumElements();
+      
+      // Mask off the high bits of the immediate value; hardware ignores those.
+      Imm = Imm & 1;
+      
+      // Extend the second operand into a vector that is twice as big.
+      Value *UndefV = UndefValue::get(Op1->getType());
+      SmallVector<Constant*, 8> Idxs;
+      for (unsigned i = 0; i != NumElts; ++i) {
+        Idxs.push_back(Builder.getInt32(i));
+      }
+      Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
+
+      // Insert the second operand into the first operand.
+
+      // Note that there is no guarantee that instruction lowering will actually
+      // produce a vinsertf128 instruction for the created shuffles. In
+      // particular, the 0 immediate case involves no lane changes, so it can
+      // be handled as a blend.
+
+      // Example of shuffle mask for 32-bit elements:
+      // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
+      // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
+
+      SmallVector<Constant*, 8> Idxs2;
+      // The low half of the result is either the low half of the 1st operand
+      // or the low half of the 2nd operand (the inserted vector).
+      for (unsigned i = 0; i != NumElts / 2; ++i) {
+        unsigned Idx = Imm ? i : (i + NumElts);
+        Idxs2.push_back(Builder.getInt32(Idx));
+      }
+      // The high half of the result is either the low half of the 2nd operand
+      // (the inserted vector) or the high half of the 1st operand.
+      for (unsigned i = NumElts / 2; i != NumElts; ++i) {
+        unsigned Idx = Imm ? (i + NumElts / 2) : i;
+        Idxs2.push_back(Builder.getInt32(Idx));
+      }
+      Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
+    } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
+               Name == "llvm.x86.avx.vextractf128.ps.256" ||
+               Name == "llvm.x86.avx.vextractf128.si.256" ||
+               Name == "llvm.x86.avx2.vextracti128") {
+      Value *Op0 = CI->getArgOperand(0);
+      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      VectorType *VecTy = cast<VectorType>(CI->getType());
+      unsigned NumElts = VecTy->getNumElements();
+      
+      // Mask off the high bits of the immediate value; hardware ignores those.
+      Imm = Imm & 1;
+
+      // Get indexes for either the high half or low half of the input vector.
+      SmallVector<Constant*, 4> Idxs(NumElts);
+      for (unsigned i = 0; i != NumElts; ++i) {
+        unsigned Idx = Imm ? (i + NumElts) : i;
+        Idxs[i] = Builder.getInt32(Idx);
+      }
+
+      Value *UndefV = UndefValue::get(Op0->getType());
+      Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
     } else {
       bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
       if (Name == "llvm.x86.avx.vpermil.pd.256")
@@ -434,7 +618,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     return;
   }
 
-  std::string Name = CI->getName().str();
+  std::string Name = CI->getName();
   if (!Name.empty())
     CI->setName(Name + ".old");
 
@@ -442,52 +626,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   default:
     llvm_unreachable("Unknown function for CallInst upgrade.");
 
-  // Upgrade debug intrinsics to use an additional DIExpression argument.
-  case Intrinsic::dbg_declare: {
-    auto NewCI =
-        Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1),
-                            getExpression(CI->getArgOperand(1), F), Name);
-    NewCI->setDebugLoc(CI->getDebugLoc());
-    CI->replaceAllUsesWith(NewCI);
-    CI->eraseFromParent();
-    return;
-  }
-  case Intrinsic::dbg_value: {
-    auto NewCI = Builder.CreateCall4(
-        NewFn, CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
-        getExpression(CI->getArgOperand(2), F), Name);
-    NewCI->setDebugLoc(CI->getDebugLoc());
-    CI->replaceAllUsesWith(NewCI);
-    CI->eraseFromParent();
-    return;
-  }
   case Intrinsic::ctlz:
   case Intrinsic::cttz:
     assert(CI->getNumArgOperands() == 1 &&
            "Mismatch between function args and call args");
-    CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
-                                               Builder.getFalse(), Name));
+    CI->replaceAllUsesWith(Builder.CreateCall(
+        NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
     CI->eraseFromParent();
     return;
 
   case Intrinsic::objectsize:
-    CI->replaceAllUsesWith(Builder.CreateCall2(NewFn,
-                                               CI->getArgOperand(0),
-                                               CI->getArgOperand(1),
-                                               Name));
+    CI->replaceAllUsesWith(Builder.CreateCall(
+        NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
     CI->eraseFromParent();
     return;
 
   case Intrinsic::ctpop: {
-    CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0)));
+    CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
     CI->eraseFromParent();
     return;
   }
 
   case Intrinsic::x86_xop_vfrcz_ss:
   case Intrinsic::x86_xop_vfrcz_sd:
-    CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1),
-                                              Name));
+    CI->replaceAllUsesWith(
+        Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
     CI->eraseFromParent();
     return;
 
@@ -505,34 +668,22 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     // Old intrinsic, add bitcasts
     Value *Arg1 = CI->getArgOperand(1);
 
-    Value *BC0 =
-      Builder.CreateBitCast(Arg0,
-                            VectorType::get(Type::getInt64Ty(C), 2),
-                            "cast");
-    Value *BC1 =
-      Builder.CreateBitCast(Arg1,
-                            VectorType::get(Type::getInt64Ty(C), 2),
-                            "cast");
+    Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
+
+    Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
+    Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
 
-    CallInst* NewCall = Builder.CreateCall2(NewFn, BC0, BC1, Name);
+    CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
     CI->replaceAllUsesWith(NewCall);
     CI->eraseFromParent();
     return;
   }
 
-  case Intrinsic::x86_sse41_pblendw:
-  case Intrinsic::x86_sse41_blendpd:
-  case Intrinsic::x86_sse41_blendps:
   case Intrinsic::x86_sse41_insertps:
   case Intrinsic::x86_sse41_dppd:
   case Intrinsic::x86_sse41_dpps:
   case Intrinsic::x86_sse41_mpsadbw:
-  case Intrinsic::x86_avx_blend_pd_256:
-  case Intrinsic::x86_avx_blend_ps_256:
   case Intrinsic::x86_avx_dp_ps_256:
-  case Intrinsic::x86_avx2_pblendw:
-  case Intrinsic::x86_avx2_pblendd_128:
-  case Intrinsic::x86_avx2_pblendd_256:
   case Intrinsic::x86_avx2_mpsadbw: {
     // Need to truncate the last argument from i32 to i8 -- this argument models
     // an inherently 8-bit immediate operand to these x86 instructions.
diff --git a/contrib/llvm/lib/IR/BasicBlock.cpp b/contrib/llvm/lib/IR/BasicBlock.cpp
index 98a3062..70ae3c3 100644
--- a/contrib/llvm/lib/IR/BasicBlock.cpp
+++ b/contrib/llvm/lib/IR/BasicBlock.cpp
@@ -29,10 +29,6 @@ ValueSymbolTable *BasicBlock::getValueSymbolTable() {
   return nullptr;
 }
 
-const DataLayout *BasicBlock::getDataLayout() const {
-  return getParent()->getDataLayout();
-}
-
 LLVMContext &BasicBlock::getContext() const {
   return getType()->getContext();
 }
@@ -98,18 +94,18 @@ void BasicBlock::removeFromParent() {
   getParent()->getBasicBlockList().remove(this);
 }
 
-void BasicBlock::eraseFromParent() {
-  getParent()->getBasicBlockList().erase(this);
+iplist<BasicBlock>::iterator BasicBlock::eraseFromParent() {
+  return getParent()->getBasicBlockList().erase(this);
 }
 
-/// moveBefore - Unlink this basic block from its current function and
+/// Unlink this basic block from its current function and
 /// insert it into the function that MovePos lives in, right before MovePos.
 void BasicBlock::moveBefore(BasicBlock *MovePos) {
   MovePos->getParent()->getBasicBlockList().splice(MovePos,
                        getParent()->getBasicBlockList(), this);
 }
 
-/// moveAfter - Unlink this basic block from its current function and
+/// Unlink this basic block from its current function and
 /// insert it into the function that MovePos lives in, right after MovePos.
 void BasicBlock::moveAfter(BasicBlock *MovePos) {
   Function::iterator I = MovePos;
@@ -117,6 +113,13 @@ void BasicBlock::moveAfter(BasicBlock *MovePos) {
                                        getParent()->getBasicBlockList(), this);
 }
 
+const Module *BasicBlock::getModule() const {
+  return getParent()->getParent();
+}
+
+Module *BasicBlock::getModule() {
+  return getParent()->getParent();
+}
 
 TerminatorInst *BasicBlock::getTerminator() {
   if (InstList.empty()) return nullptr;
@@ -210,7 +213,7 @@ void BasicBlock::dropAllReferences() {
     I->dropAllReferences();
 }
 
-/// getSinglePredecessor - If this basic block has a single predecessor block,
+/// If this basic block has a single predecessor block,
 /// return the block, otherwise return a null pointer.
 BasicBlock *BasicBlock::getSinglePredecessor() {
   pred_iterator PI = pred_begin(this), E = pred_end(this);
@@ -220,7 +223,7 @@ BasicBlock *BasicBlock::getSinglePredecessor() {
   return (PI == E) ? ThePred : nullptr /*multiple preds*/;
 }
 
-/// getUniquePredecessor - If this basic block has a unique predecessor block,
+/// If this basic block has a unique predecessor block,
 /// return the block, otherwise return a null pointer.
 /// Note that unique predecessor doesn't mean single edge, there can be
 /// multiple edges from the unique predecessor to this block (for example
@@ -239,7 +242,29 @@ BasicBlock *BasicBlock::getUniquePredecessor() {
   return PredBB;
 }
 
-/// removePredecessor - This method is used to notify a BasicBlock that the
+BasicBlock *BasicBlock::getSingleSuccessor() {
+  succ_iterator SI = succ_begin(this), E = succ_end(this);
+  if (SI == E) return nullptr; // no successors
+  BasicBlock *TheSucc = *SI;
+  ++SI;
+  return (SI == E) ? TheSucc : nullptr /* multiple successors */;
+}
+
+BasicBlock *BasicBlock::getUniqueSuccessor() {
+  succ_iterator SI = succ_begin(this), E = succ_end(this);
+  if (SI == E) return NULL; // No successors
+  BasicBlock *SuccBB = *SI;
+  ++SI;
+  for (;SI != E; ++SI) {
+    if (*SI != SuccBB)
+      return NULL;
+    // The same successor appears multiple times in the successor list.
+    // This is OK.
+  }
+  return SuccBB;
+}
+
+/// This method is used to notify a BasicBlock that the
 /// specified Predecessor of the block is no longer able to reach it.  This is
 /// actually not used to update the Predecessor list, but is actually used to
 /// update the PHI nodes that reside in the block.  Note that this should be
@@ -316,7 +341,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
 }
 
 
-/// splitBasicBlock - This splits a basic block into two at the specified
+/// This splits a basic block into two at the specified
 /// instruction.  Note that all instructions BEFORE the specified iterator stay
 /// as part of the original basic block, an unconditional branch is added to
 /// the new BB, and the rest of the instructions in the BB are moved to the new
@@ -387,14 +412,13 @@ void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
   }
 }
 
-/// isLandingPad - Return true if this basic block is a landing pad. I.e., it's
+/// Return true if this basic block is a landing pad. I.e., it's
 /// the destination of the 'unwind' edge of an invoke instruction.
 bool BasicBlock::isLandingPad() const {
   return isa<LandingPadInst>(getFirstNonPHI());
 }
 
-/// getLandingPadInst() - Return the landingpad instruction associated with
-/// the landing pad.
+/// Return the landingpad instruction associated with the landing pad.
 LandingPadInst *BasicBlock::getLandingPadInst() {
   return dyn_cast<LandingPadInst>(getFirstNonPHI());
 }
diff --git a/contrib/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm/lib/IR/ConstantFold.cpp
index 39d9a1d..3f64c43 100644
--- a/contrib/llvm/lib/IR/ConstantFold.cpp
+++ b/contrib/llvm/lib/IR/ConstantFold.cpp
@@ -132,7 +132,8 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
 
         if (ElTy == DPTy->getElementType())
           // This GEP is inbounds because all indices are zero.
-          return ConstantExpr::getInBoundsGetElementPtr(V, IdxList);
+          return ConstantExpr::getInBoundsGetElementPtr(PTy->getElementType(),
+                                                        V, IdxList);
       }
 
   // Handle casts from one vector constant to another.  We know that the src 
@@ -169,7 +170,8 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
       // be the same. Consequently, we just fold to V.
       return V;
 
-    if (DestTy->isFloatingPointTy())
+    // See note below regarding the PPC_FP128 restriction.
+    if (DestTy->isFloatingPointTy() && !DestTy->isPPC_FP128Ty())
       return ConstantFP::get(DestTy->getContext(),
                              APFloat(DestTy->getFltSemantics(),
                                      CI->getValue()));
@@ -179,9 +181,19 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
   }
 
   // Handle ConstantFP input: FP -> Integral.
-  if (ConstantFP *FP = dyn_cast<ConstantFP>(V))
+  if (ConstantFP *FP = dyn_cast<ConstantFP>(V)) {
+    // PPC_FP128 is really the sum of two consecutive doubles, where the first
+    // double is always stored first in memory, regardless of the target
+    // endianness. The memory layout of i128, however, depends on the target
+    // endianness, and so we can't fold this without target endianness
+    // information. This should instead be handled by
+    // Analysis/ConstantFolding.cpp
+    if (FP->getType()->isPPC_FP128Ty())
+      return nullptr;
+
     return ConstantInt::get(FP->getContext(),
                             FP->getValueAPF().bitcastToAPInt());
+  }
 
   return nullptr;
 }
@@ -620,8 +632,8 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
       if (CE->getOpcode() == Instruction::GetElementPtr &&
           CE->getOperand(0)->isNullValue()) {
-        Type *Ty =
-          cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+        GEPOperator *GEPO = cast<GEPOperator>(CE);
+        Type *Ty = GEPO->getSourceElementType();
         if (CE->getNumOperands() == 2) {
           // Handle a sizeof-like expression.
           Constant *Idx = CE->getOperand(1);
@@ -777,11 +789,10 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
     return UndefValue::get(Val->getType()->getVectorElementType());
 
   if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
-    uint64_t Index = CIdx->getZExtValue();
     // ee({w,x,y,z}, wrong_value) -> undef
-    if (Index >= Val->getType()->getVectorNumElements())
+    if (CIdx->uge(Val->getType()->getVectorNumElements()))
       return UndefValue::get(Val->getType()->getVectorElementType());
-    return Val->getAggregateElement(Index);
+    return Val->getAggregateElement(CIdx->getZExtValue());
   }
   return nullptr;
 }
@@ -789,23 +800,30 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
 Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
                                                      Constant *Elt,
                                                      Constant *Idx) {
+  if (isa<UndefValue>(Idx))
+    return UndefValue::get(Val->getType());
+
   ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
   if (!CIdx) return nullptr;
-  const APInt &IdxVal = CIdx->getValue();
-  
+
+  unsigned NumElts = Val->getType()->getVectorNumElements();
+  if (CIdx->uge(NumElts))
+    return UndefValue::get(Val->getType());
+
   SmallVector<Constant*, 16> Result;
-  Type *Ty = IntegerType::get(Val->getContext(), 32);
-  for (unsigned i = 0, e = Val->getType()->getVectorNumElements(); i != e; ++i){
+  Result.reserve(NumElts);
+  auto *Ty = Type::getInt32Ty(Val->getContext());
+  uint64_t IdxVal = CIdx->getZExtValue();
+  for (unsigned i = 0; i != NumElts; ++i) {    
     if (i == IdxVal) {
       Result.push_back(Elt);
       continue;
     }
     
-    Constant *C =
-      ConstantExpr::getExtractElement(Val, ConstantInt::get(Ty, i));
+    Constant *C = ConstantExpr::getExtractElement(Val, ConstantInt::get(Ty, i));
     Result.push_back(C);
   }
-  
+
   return ConstantVector::get(Result);
 }
 
@@ -1273,15 +1291,17 @@ static int IdxCompare(Constant *C1, Constant *C2, Type *ElTy) {
   if (!isa<ConstantInt>(C1) || !isa<ConstantInt>(C2))
     return -2; // don't know!
 
-  // Ok, we have two differing integer indices.  Sign extend them to be the same
-  // type.  Long is always big enough, so we use it.
-  if (!C1->getType()->isIntegerTy(64))
-    C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext()));
+  // We cannot compare the indices if they don't fit in an int64_t.
+  if (cast<ConstantInt>(C1)->getValue().getActiveBits() > 64 ||
+      cast<ConstantInt>(C2)->getValue().getActiveBits() > 64)
+    return -2; // don't know!
 
-  if (!C2->getType()->isIntegerTy(64))
-    C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext()));
+  // Ok, we have two differing integer indices.  Sign extend them to be the same
+  // type.
+  int64_t C1Val = cast<ConstantInt>(C1)->getSExtValue();
+  int64_t C2Val = cast<ConstantInt>(C2)->getSExtValue();
 
-  if (C1 == C2) return 0;  // They are equal
+  if (C1Val == C2Val) return 0;  // They are equal
 
   // If the type being indexed over is really just a zero sized type, there is
   // no pointer difference being made here.
@@ -1290,8 +1310,7 @@ static int IdxCompare(Constant *C1, Constant *C2, Type *ElTy) {
 
   // If they are really different, now that they are the same type, then we
   // found a difference!
-  if (cast<ConstantInt>(C1)->getSExtValue() < 
-      cast<ConstantInt>(C2)->getSExtValue())
+  if (C1Val < C2Val)
     return -1;
   else
     return 1;
@@ -1317,7 +1336,7 @@ static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
 
   if (!isa<ConstantExpr>(V1)) {
     if (!isa<ConstantExpr>(V2)) {
-      // We distilled thisUse the standard constant folder for a few cases
+      // Simple case, use the standard constant folder.
       ConstantInt *R = nullptr;
       R = dyn_cast<ConstantInt>(
                       ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, V1, V2));
@@ -1366,7 +1385,7 @@ static ICmpInst::Predicate areGlobalsPotentiallyEqual(const GlobalValue *GV1,
     if (GV->hasExternalWeakLinkage() || GV->hasWeakAnyLinkage())
       return true;
     if (const auto *GVar = dyn_cast<GlobalVariable>(GV)) {
-      Type *Ty = GVar->getType()->getPointerElementType();
+      Type *Ty = GVar->getValueType();
       // A global with opaque type might end up being zero sized.
       if (!Ty->isSized())
         return true;
@@ -1655,15 +1674,22 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
 
   // Handle some degenerate cases first
   if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+    CmpInst::Predicate Predicate = CmpInst::Predicate(pred);
+    bool isIntegerPredicate = ICmpInst::isIntPredicate(Predicate);
     // For EQ and NE, we can always pick a value for the undef to make the
     // predicate pass or fail, so we can return undef.
-    // Also, if both operands are undef, we can return undef.
-    if (ICmpInst::isEquality(ICmpInst::Predicate(pred)) ||
-        (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
+    // Also, if both operands are undef, we can return undef for int comparison.
+    if (ICmpInst::isEquality(Predicate) || (isIntegerPredicate && C1 == C2))
       return UndefValue::get(ResultTy);
-    // Otherwise, pick the same value as the non-undef operand, and fold
-    // it to true or false.
-    return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+
+    // Otherwise, for integer compare, pick the same value as the non-undef
+    // operand, and fold it to true or false.
+    if (isIntegerPredicate)
+      return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+
+    // Choosing NaN for the undef will always make unordered comparison succeed
+    // and ordered comparison fails.
+    return ConstantInt::get(ResultTy, CmpInst::isUnordered(Predicate));
   }
 
   // icmp eq/ne(null,GV) -> false/true
@@ -1779,7 +1805,10 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
     return ConstantVector::get(ResElts);
   }
 
-  if (C1->getType()->isFloatingPointTy()) {
+  if (C1->getType()->isFloatingPointTy() &&
+      // Only call evaluateFCmpRelation if we have a constant expr to avoid
+      // infinite recursive loop
+      (isa<ConstantExpr>(C1) || isa<ConstantExpr>(C2))) {
     int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
     switch (evaluateFCmpRelation(C1, C2)) {
     default: llvm_unreachable("Unknown relation!");
@@ -1999,7 +2028,7 @@ static bool isIndexInRangeOfSequentialType(const SequentialType *STy,
 }
 
 template<typename IndexTy>
-static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
+static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C,
                                                bool inBounds,
                                                ArrayRef<IndexTy> Idxs) {
   if (Idxs.empty()) return C;
@@ -2009,7 +2038,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
 
   if (isa<UndefValue>(C)) {
     PointerType *Ptr = cast<PointerType>(C->getType());
-    Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
+    Type *Ty = GetElementPtrInst::getIndexedType(
+        cast<PointerType>(Ptr->getScalarType())->getElementType(), Idxs);
     assert(Ty && "Invalid indices for GEP!");
     return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
   }
@@ -2023,7 +2053,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
       }
     if (isNull) {
       PointerType *Ptr = cast<PointerType>(C->getType());
-      Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
+      Type *Ty = GetElementPtrInst::getIndexedType(
+          cast<PointerType>(Ptr->getScalarType())->getElementType(), Idxs);
       assert(Ty && "Invalid indices for GEP!");
       return ConstantPointerNull::get(PointerType::get(Ty,
                                                        Ptr->getAddressSpace()));
@@ -2069,8 +2100,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
       if (PerformFold) {
         SmallVector<Value*, 16> NewIndices;
         NewIndices.reserve(Idxs.size() + CE->getNumOperands());
-        for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
-          NewIndices.push_back(CE->getOperand(i));
+        NewIndices.append(CE->op_begin() + 1, CE->op_end() - 1);
 
         // Add the last index of the source with the first index of the new GEP.
         // Make sure to handle the case when they are actually different types.
@@ -2079,9 +2109,15 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
         if (!Idx0->isNullValue()) {
           Type *IdxTy = Combined->getType();
           if (IdxTy != Idx0->getType()) {
-            Type *Int64Ty = Type::getInt64Ty(IdxTy->getContext());
-            Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, Int64Ty);
-            Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, Int64Ty);
+            unsigned CommonExtendedWidth =
+                std::max(IdxTy->getIntegerBitWidth(),
+                         Idx0->getType()->getIntegerBitWidth());
+            CommonExtendedWidth = std::max(CommonExtendedWidth, 64U);
+
+            Type *CommonTy =
+                Type::getIntNTy(IdxTy->getContext(), CommonExtendedWidth);
+            Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, CommonTy);
+            Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, CommonTy);
             Combined = ConstantExpr::get(Instruction::Add, C1, C2);
           } else {
             Combined =
@@ -2091,10 +2127,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
 
         NewIndices.push_back(Combined);
         NewIndices.append(Idxs.begin() + 1, Idxs.end());
-        return
-          ConstantExpr::getGetElementPtr(CE->getOperand(0), NewIndices,
-                                         inBounds &&
-                                           cast<GEPOperator>(CE)->isInBounds());
+        return ConstantExpr::getGetElementPtr(
+            cast<GEPOperator>(CE)->getSourceElementType(), CE->getOperand(0),
+            NewIndices, inBounds && cast<GEPOperator>(CE)->isInBounds());
       }
     }
 
@@ -2119,8 +2154,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
         if (SrcArrayTy && DstArrayTy
             && SrcArrayTy->getElementType() == DstArrayTy->getElementType()
             && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace())
-          return ConstantExpr::getGetElementPtr((Constant*)CE->getOperand(0),
-                                                Idxs, inBounds);
+          return ConstantExpr::getGetElementPtr(
+              SrcArrayTy, (Constant *)CE->getOperand(0), Idxs, inBounds);
       }
     }
   }
@@ -2130,9 +2165,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
   // factored out into preceding dimensions.
   bool Unknown = false;
   SmallVector<Constant *, 8> NewIdxs;
-  Type *Ty = C->getType();
-  Type *Prev = nullptr;
-  for (unsigned i = 0, e = Idxs.size(); i != e;
+  Type *Ty = PointeeTy;
+  Type *Prev = C->getType();
+  for (unsigned i = 1, e = Idxs.size(); i != e;
        Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) {
     if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) {
       if (isa<ArrayType>(Ty) || isa<VectorType>(Ty))
@@ -2154,14 +2189,20 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
             Constant *PrevIdx = cast<Constant>(Idxs[i-1]);
             Constant *Div = ConstantExpr::getSDiv(CI, Factor);
 
+            unsigned CommonExtendedWidth =
+                std::max(PrevIdx->getType()->getIntegerBitWidth(),
+                         Div->getType()->getIntegerBitWidth());
+            CommonExtendedWidth = std::max(CommonExtendedWidth, 64U);
+
             // Before adding, extend both operands to i64 to avoid
             // overflow trouble.
-            if (!PrevIdx->getType()->isIntegerTy(64))
-              PrevIdx = ConstantExpr::getSExt(PrevIdx,
-                                           Type::getInt64Ty(Div->getContext()));
-            if (!Div->getType()->isIntegerTy(64))
-              Div = ConstantExpr::getSExt(Div,
-                                          Type::getInt64Ty(Div->getContext()));
+            if (!PrevIdx->getType()->isIntegerTy(CommonExtendedWidth))
+              PrevIdx = ConstantExpr::getSExt(
+                  PrevIdx,
+                  Type::getIntNTy(Div->getContext(), CommonExtendedWidth));
+            if (!Div->getType()->isIntegerTy(CommonExtendedWidth))
+              Div = ConstantExpr::getSExt(
+                  Div, Type::getIntNTy(Div->getContext(), CommonExtendedWidth));
 
             NewIdxs[i-1] = ConstantExpr::getAdd(PrevIdx, Div);
           } else {
@@ -2180,7 +2221,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
   if (!NewIdxs.empty()) {
     for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
       if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]);
-    return ConstantExpr::getGetElementPtr(C, NewIdxs, inBounds);
+    return ConstantExpr::getGetElementPtr(PointeeTy, C, NewIdxs, inBounds);
   }
 
   // If all indices are known integers and normalized, we can do a simple
@@ -2188,7 +2229,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
   if (!Unknown && !inBounds)
     if (auto *GV = dyn_cast<GlobalVariable>(C))
       if (!GV->hasExternalWeakLinkage() && isInBoundsIndices(Idxs))
-        return ConstantExpr::getInBoundsGetElementPtr(C, Idxs);
+        return ConstantExpr::getInBoundsGetElementPtr(PointeeTy, C, Idxs);
 
   return nullptr;
 }
@@ -2196,11 +2237,27 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
 Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
                                           bool inBounds,
                                           ArrayRef<Constant *> Idxs) {
-  return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs);
+  return ConstantFoldGetElementPtrImpl(
+      cast<PointerType>(C->getType()->getScalarType())->getElementType(), C,
+      inBounds, Idxs);
 }
 
 Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
                                           bool inBounds,
                                           ArrayRef<Value *> Idxs) {
-  return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs);
+  return ConstantFoldGetElementPtrImpl(
+      cast<PointerType>(C->getType()->getScalarType())->getElementType(), C,
+      inBounds, Idxs);
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Type *Ty, Constant *C,
+                                          bool inBounds,
+                                          ArrayRef<Constant *> Idxs) {
+  return ConstantFoldGetElementPtrImpl(Ty, C, inBounds, Idxs);
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Type *Ty, Constant *C,
+                                          bool inBounds,
+                                          ArrayRef<Value *> Idxs) {
+  return ConstantFoldGetElementPtrImpl(Ty, C, inBounds, Idxs);
 }
diff --git a/contrib/llvm/lib/IR/ConstantFold.h b/contrib/llvm/lib/IR/ConstantFold.h
index a516abe..42a9c6b 100644
--- a/contrib/llvm/lib/IR/ConstantFold.h
+++ b/contrib/llvm/lib/IR/ConstantFold.h
@@ -51,6 +51,10 @@ namespace llvm {
                                       ArrayRef<Constant *> Idxs);
   Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
                                       ArrayRef<Value *> Idxs);
+  Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool inBounds,
+                                      ArrayRef<Constant *> Idxs);
+  Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool inBounds,
+                                      ArrayRef<Value *> Idxs);
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/lib/IR/ConstantRange.cpp b/contrib/llvm/lib/IR/ConstantRange.cpp
index f8e9ba4..91095cf 100644
--- a/contrib/llvm/lib/IR/ConstantRange.cpp
+++ b/contrib/llvm/lib/IR/ConstantRange.cpp
@@ -49,14 +49,15 @@ ConstantRange::ConstantRange(APIntMoveTy L, APIntMoveTy U)
          "Lower == Upper, but they aren't min or max value!");
 }
 
-ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
-                                            const ConstantRange &CR) {
+ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred,
+                                                   const ConstantRange &CR) {
   if (CR.isEmptySet())
     return CR;
 
   uint32_t W = CR.getBitWidth();
   switch (Pred) {
-    default: llvm_unreachable("Invalid ICmp predicate to makeICmpRegion()");
+  default:
+    llvm_unreachable("Invalid ICmp predicate to makeAllowedICmpRegion()");
     case CmpInst::ICMP_EQ:
       return CR;
     case CmpInst::ICMP_NE:
@@ -114,6 +115,16 @@ ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
   }
 }
 
+ConstantRange ConstantRange::makeSatisfyingICmpRegion(CmpInst::Predicate Pred,
+                                                      const ConstantRange &CR) {
+  // Follows from De-Morgan's laws:
+  //
+  // ~(~A union ~B) == A intersect B.
+  //
+  return makeAllowedICmpRegion(CmpInst::getInversePredicate(Pred), CR)
+      .inverse();
+}
+
 /// isFullSet - Return true if this set contains all of the elements possible
 /// for this data-type
 bool ConstantRange::isFullSet() const {
@@ -587,6 +598,13 @@ ConstantRange::multiply(const ConstantRange &Other) const {
   if (isEmptySet() || Other.isEmptySet())
     return ConstantRange(getBitWidth(), /*isFullSet=*/false);
 
+  // Multiplication is signedness-independent. However different ranges can be
+  // obtained depending on how the input ranges are treated. These different
+  // ranges are all conservatively correct, but one might be better than the
+  // other. We calculate two ranges; one treating the inputs as unsigned
+  // and the other signed, then return the smallest of these ranges.
+
+  // Unsigned range first.
   APInt this_min = getUnsignedMin().zext(getBitWidth() * 2);
   APInt this_max = getUnsignedMax().zext(getBitWidth() * 2);
   APInt Other_min = Other.getUnsignedMin().zext(getBitWidth() * 2);
@@ -594,7 +612,26 @@ ConstantRange::multiply(const ConstantRange &Other) const {
 
   ConstantRange Result_zext = ConstantRange(this_min * Other_min,
                                             this_max * Other_max + 1);
-  return Result_zext.truncate(getBitWidth());
+  ConstantRange UR = Result_zext.truncate(getBitWidth());
+
+  // Now the signed range. Because we could be dealing with negative numbers
+  // here, the lower bound is the smallest of the cartesian product of the
+  // lower and upper ranges; for example:
+  //   [-1,4) * [-2,3) = min(-1*-2, -1*2, 3*-2, 3*2) = -6.
+  // Similarly for the upper bound, swapping min for max.
+
+  this_min = getSignedMin().sext(getBitWidth() * 2);
+  this_max = getSignedMax().sext(getBitWidth() * 2);
+  Other_min = Other.getSignedMin().sext(getBitWidth() * 2);
+  Other_max = Other.getSignedMax().sext(getBitWidth() * 2);
+  
+  auto L = {this_min * Other_min, this_min * Other_max,
+            this_max * Other_min, this_max * Other_max};
+  auto Compare = [](const APInt &A, const APInt &B) { return A.slt(B); };
+  ConstantRange Result_sext(std::min(L, Compare), std::max(L, Compare) + 1);
+  ConstantRange SR = Result_sext.truncate(getBitWidth());
+
+  return UR.getSetSize().ult(SR.getSetSize()) ? UR : SR;
 }
 
 ConstantRange
diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp
index 44052b2..fb83ebb 100644
--- a/contrib/llvm/lib/IR/Constants.cpp
+++ b/contrib/llvm/lib/IR/Constants.cpp
@@ -663,6 +663,17 @@ Constant *ConstantFP::get(Type *Ty, StringRef Str) {
   return C; 
 }
 
+Constant *ConstantFP::getNaN(Type *Ty, bool Negative, unsigned Type) {
+  const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
+  APFloat NaN = APFloat::getNaN(Semantics, Negative, Type);
+  Constant *C = get(Ty->getContext(), NaN);
+
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::getSplat(VTy->getNumElements(), C);
+
+  return C;
+}
+
 Constant *ConstantFP::getNegativeZero(Type *Ty) {
   const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
   APFloat NegZero = APFloat::getZero(Semantics, /*Negative=*/true);
@@ -911,23 +922,25 @@ Constant *ConstantArray::getImpl(ArrayType *Ty, ArrayRef<Constant*> V) {
 
     if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
       if (CFP->getType()->isFloatTy()) {
-        SmallVector<float, 16> Elts;
+        SmallVector<uint32_t, 16> Elts;
         for (unsigned i = 0, e = V.size(); i != e; ++i)
           if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
-            Elts.push_back(CFP->getValueAPF().convertToFloat());
+            Elts.push_back(
+                CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
           else
             break;
         if (Elts.size() == V.size())
-          return ConstantDataArray::get(C->getContext(), Elts);
+          return ConstantDataArray::getFP(C->getContext(), Elts);
       } else if (CFP->getType()->isDoubleTy()) {
-        SmallVector<double, 16> Elts;
+        SmallVector<uint64_t, 16> Elts;
         for (unsigned i = 0, e = V.size(); i != e; ++i)
           if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
-            Elts.push_back(CFP->getValueAPF().convertToDouble());
+            Elts.push_back(
+                CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
           else
             break;
         if (Elts.size() == V.size())
-          return ConstantDataArray::get(C->getContext(), Elts);
+          return ConstantDataArray::getFP(C->getContext(), Elts);
       }
     }
   }
@@ -1097,23 +1110,25 @@ Constant *ConstantVector::getImpl(ArrayRef<Constant*> V) {
 
     if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
       if (CFP->getType()->isFloatTy()) {
-        SmallVector<float, 16> Elts;
+        SmallVector<uint32_t, 16> Elts;
         for (unsigned i = 0, e = V.size(); i != e; ++i)
           if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
-            Elts.push_back(CFP->getValueAPF().convertToFloat());
+            Elts.push_back(
+                CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
           else
             break;
         if (Elts.size() == V.size())
-          return ConstantDataVector::get(C->getContext(), Elts);
+          return ConstantDataVector::getFP(C->getContext(), Elts);
       } else if (CFP->getType()->isDoubleTy()) {
-        SmallVector<double, 16> Elts;
+        SmallVector<uint64_t, 16> Elts;
         for (unsigned i = 0, e = V.size(); i != e; ++i)
           if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
-            Elts.push_back(CFP->getValueAPF().convertToDouble());
+            Elts.push_back(
+                CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
           else
             break;
         if (Elts.size() == V.size())
-          return ConstantDataVector::get(C->getContext(), Elts);
+          return ConstantDataVector::getFP(C->getContext(), Elts);
       }
     }
   }
@@ -1211,11 +1226,9 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
 Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
                                         bool OnlyIfReduced) const {
   assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
-  bool AnyChange = Ty != getType();
-  for (unsigned i = 0; i != Ops.size(); ++i)
-    AnyChange |= Ops[i] != getOperand(i);
 
-  if (!AnyChange)  // No operands changed, return self.
+  // If no operands changed return self.
+  if (Ty == getType() && std::equal(Ops.begin(), Ops.end(), op_begin()))
     return const_cast<ConstantExpr*>(this);
 
   Type *OnlyIfReducedTy = OnlyIfReduced ? Ty : nullptr;
@@ -1250,7 +1263,7 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2],
                                           OnlyIfReducedTy);
   case Instruction::GetElementPtr:
-    return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1),
+    return ConstantExpr::getGetElementPtr(nullptr, Ops[0], Ops.slice(1),
                                           cast<GEPOperator>(this)->isInBounds(),
                                           OnlyIfReducedTy);
   case Instruction::ICmp:
@@ -1923,7 +1936,7 @@ Constant *ConstantExpr::getSizeOf(Type* Ty) {
   // Note that a non-inbounds gep is used, as null isn't within any object.
   Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
   Constant *GEP = getGetElementPtr(
-                 Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
+      Ty, Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
   return getPtrToInt(GEP, 
                      Type::getInt64Ty(Ty->getContext()));
 }
@@ -1937,7 +1950,7 @@ Constant *ConstantExpr::getAlignOf(Type* Ty) {
   Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
   Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
   Constant *Indices[2] = { Zero, One };
-  Constant *GEP = getGetElementPtr(NullPtr, Indices);
+  Constant *GEP = getGetElementPtr(AligningTy, NullPtr, Indices);
   return getPtrToInt(GEP,
                      Type::getInt64Ty(Ty->getContext()));
 }
@@ -1955,7 +1968,7 @@ Constant *ConstantExpr::getOffsetOf(Type* Ty, Constant *FieldNo) {
     FieldNo
   };
   Constant *GEP = getGetElementPtr(
-                Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
+      Ty, Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
   return getPtrToInt(GEP,
                      Type::getInt64Ty(Ty->getContext()));
 }
@@ -1999,19 +2012,24 @@ Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2,
   return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
 }
 
-Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
-                                         bool InBounds, Type *OnlyIfReducedTy) {
-  assert(C->getType()->isPtrOrPtrVectorTy() &&
-         "Non-pointer type for constant GetElementPtr expression");
+Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
+                                         ArrayRef<Value *> Idxs, bool InBounds,
+                                         Type *OnlyIfReducedTy) {
+  if (!Ty)
+    Ty = cast<PointerType>(C->getType()->getScalarType())->getElementType();
+  else
+    assert(
+        Ty ==
+        cast<PointerType>(C->getType()->getScalarType())->getContainedType(0u));
 
-  if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs))
+  if (Constant *FC = ConstantFoldGetElementPtr(Ty, C, InBounds, Idxs))
     return FC;          // Fold a few common cases.
 
   // Get the result type of the getelementptr!
-  Type *Ty = GetElementPtrInst::getIndexedType(C->getType(), Idxs);
-  assert(Ty && "GEP indices invalid!");
+  Type *DestTy = GetElementPtrInst::getIndexedType(Ty, Idxs);
+  assert(DestTy && "GEP indices invalid!");
   unsigned AS = C->getType()->getPointerAddressSpace();
-  Type *ReqTy = Ty->getPointerTo(AS);
+  Type *ReqTy = DestTy->getPointerTo(AS);
   if (VectorType *VecTy = dyn_cast<VectorType>(C->getType()))
     ReqTy = VectorType::get(ReqTy, VecTy->getNumElements());
 
@@ -2032,7 +2050,8 @@ Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
     ArgVec.push_back(cast<Constant>(Idxs[i]));
   }
   const ConstantExprKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
-                                InBounds ? GEPOperator::IsInBounds : 0);
+                                InBounds ? GEPOperator::IsInBounds : 0, None,
+                                Ty);
 
   LLVMContextImpl *pImpl = C->getContext().pImpl;
   return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
@@ -2362,19 +2381,22 @@ const char *ConstantExpr::getOpcodeName() const {
   return Instruction::getOpcodeName(getOpcode());
 }
 
-
-
-GetElementPtrConstantExpr::
-GetElementPtrConstantExpr(Constant *C, ArrayRef<Constant*> IdxList,
-                          Type *DestTy)
-  : ConstantExpr(DestTy, Instruction::GetElementPtr,
-                 OperandTraits<GetElementPtrConstantExpr>::op_end(this)
-                 - (IdxList.size()+1), IdxList.size()+1) {
-  OperandList[0] = C;
+GetElementPtrConstantExpr::GetElementPtrConstantExpr(
+    Type *SrcElementTy, Constant *C, ArrayRef<Constant *> IdxList, Type *DestTy)
+    : ConstantExpr(DestTy, Instruction::GetElementPtr,
+                   OperandTraits<GetElementPtrConstantExpr>::op_end(this) -
+                       (IdxList.size() + 1),
+                   IdxList.size() + 1),
+      SrcElementTy(SrcElementTy) {
+  Op<0>() = C;
   for (unsigned i = 0, E = IdxList.size(); i != E; ++i)
     OperandList[i+1] = IdxList[i];
 }
 
+Type *GetElementPtrConstantExpr::getSourceElementType() const {
+  return SrcElementTy;
+}
+
 //===----------------------------------------------------------------------===//
 //                       ConstantData* implementations
 
@@ -2544,7 +2566,31 @@ Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<float> Elts) {
 Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<double> Elts) {
   Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size());
   const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 8), Ty);
+}
+
+/// getFP() constructors - Return a constant with array type with an element
+/// count and element type of float with precision matching the number of
+/// bits in the ArrayRef passed in. (i.e. half for 16bits, float for 32bits,
+/// double for 64bits) Note that this can return a ConstantAggregateZero
+/// object.
+Constant *ConstantDataArray::getFP(LLVMContext &Context,
+                                   ArrayRef<uint16_t> Elts) {
+  Type *Ty = VectorType::get(Type::getHalfTy(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 2), Ty);
+}
+Constant *ConstantDataArray::getFP(LLVMContext &Context,
+                                   ArrayRef<uint32_t> Elts) {
+  Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 4), Ty);
+}
+Constant *ConstantDataArray::getFP(LLVMContext &Context,
+                                   ArrayRef<uint64_t> Elts) {
+  Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 8), Ty);
 }
 
 /// getString - This method constructs a CDS and initializes it with a text
@@ -2597,7 +2643,31 @@ Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<float> Elts) {
 Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<double> Elts) {
   Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size());
   const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 8), Ty);
+}
+
+/// getFP() constructors - Return a constant with vector type with an element
+/// count and element type of float with the precision matching the number of
+/// bits in the ArrayRef passed in.  (i.e. half for 16bits, float for 32bits,
+/// double for 64bits) Note that this can return a ConstantAggregateZero
+/// object.
+Constant *ConstantDataVector::getFP(LLVMContext &Context,
+                                    ArrayRef<uint16_t> Elts) {
+  Type *Ty = VectorType::get(Type::getHalfTy(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 2), Ty);
+}
+Constant *ConstantDataVector::getFP(LLVMContext &Context,
+                                    ArrayRef<uint32_t> Elts) {
+  Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 4), Ty);
+}
+Constant *ConstantDataVector::getFP(LLVMContext &Context,
+                                    ArrayRef<uint64_t> Elts) {
+  Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 8), Ty);
 }
 
 Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) {
@@ -2623,13 +2693,14 @@ Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) {
 
   if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
     if (CFP->getType()->isFloatTy()) {
-      SmallVector<float, 16> Elts(NumElts, CFP->getValueAPF().convertToFloat());
-      return get(V->getContext(), Elts);
+      SmallVector<uint32_t, 16> Elts(
+          NumElts, CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
+      return getFP(V->getContext(), Elts);
     }
     if (CFP->getType()->isDoubleTy()) {
-      SmallVector<double, 16> Elts(NumElts,
-                                   CFP->getValueAPF().convertToDouble());
-      return get(V->getContext(), Elts);
+      SmallVector<uint64_t, 16> Elts(
+          NumElts, CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
+      return getFP(V->getContext(), Elts);
     }
   }
   return ConstantVector::getSplat(NumElts, V);
@@ -2667,13 +2738,13 @@ APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const {
   default:
     llvm_unreachable("Accessor can only be used when element is float/double!");
   case Type::FloatTyID: {
-      const float *FloatPrt = reinterpret_cast<const float *>(EltPtr);
-      return APFloat(*const_cast<float *>(FloatPrt));
-    }
+    auto EltVal = *reinterpret_cast<const uint32_t *>(EltPtr);
+    return APFloat(APFloat::IEEEsingle, APInt(32, EltVal));
+  }
   case Type::DoubleTyID: {
-      const double *DoublePtr = reinterpret_cast<const double *>(EltPtr);
-      return APFloat(*const_cast<double *>(DoublePtr));
-    }
+    auto EltVal = *reinterpret_cast<const uint64_t *>(EltPtr);
+    return APFloat(APFloat::IEEEdouble, APInt(64, EltVal));
+  }
   }
 }
 
@@ -2918,10 +2989,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
 }
 
 Instruction *ConstantExpr::getAsInstruction() {
-  SmallVector<Value*,4> ValueOperands;
-  for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
-    ValueOperands.push_back(cast<Value>(I));
-
+  SmallVector<Value *, 4> ValueOperands(op_begin(), op_end());
   ArrayRef<Value*> Ops(ValueOperands);
 
   switch (getOpcode()) {
@@ -2953,12 +3021,14 @@ Instruction *ConstantExpr::getAsInstruction() {
   case Instruction::ShuffleVector:
     return new ShuffleVectorInst(Ops[0], Ops[1], Ops[2]);
 
-  case Instruction::GetElementPtr:
-    if (cast<GEPOperator>(this)->isInBounds())
-      return GetElementPtrInst::CreateInBounds(Ops[0], Ops.slice(1));
-    else
-      return GetElementPtrInst::Create(Ops[0], Ops.slice(1));
-
+  case Instruction::GetElementPtr: {
+    const auto *GO = cast<GEPOperator>(this);
+    if (GO->isInBounds())
+      return GetElementPtrInst::CreateInBounds(GO->getSourceElementType(),
+                                               Ops[0], Ops.slice(1));
+    return GetElementPtrInst::Create(GO->getSourceElementType(), Ops[0],
+                                     Ops.slice(1));
+  }
   case Instruction::ICmp:
   case Instruction::FCmp:
     return CmpInst::Create((Instruction::OtherOps)getOpcode(),
diff --git a/contrib/llvm/lib/IR/ConstantsContext.h b/contrib/llvm/lib/IR/ConstantsContext.h
index 571dec2..f3ddcd7 100644
--- a/contrib/llvm/lib/IR/ConstantsContext.h
+++ b/contrib/llvm/lib/IR/ConstantsContext.h
@@ -34,7 +34,7 @@ namespace llvm {
 /// behind the scenes to implement unary constant exprs.
 class UnaryConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void *operator new(size_t, unsigned) = delete;
 public:
   // allocate space for exactly one operand
   void *operator new(size_t s) {
@@ -51,7 +51,7 @@ public:
 /// behind the scenes to implement binary constant exprs.
 class BinaryConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void *operator new(size_t, unsigned) = delete;
 public:
   // allocate space for exactly two operands
   void *operator new(size_t s) {
@@ -72,7 +72,7 @@ public:
 /// behind the scenes to implement select constant exprs.
 class SelectConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void *operator new(size_t, unsigned) = delete;
 public:
   // allocate space for exactly three operands
   void *operator new(size_t s) {
@@ -93,14 +93,14 @@ public:
 /// extractelement constant exprs.
 class ExtractElementConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void *operator new(size_t, unsigned) = delete;
 public:
   // allocate space for exactly two operands
   void *operator new(size_t s) {
     return User::operator new(s, 2);
   }
   ExtractElementConstantExpr(Constant *C1, Constant *C2)
-    : ConstantExpr(cast<VectorType>(C1->getType())->getElementType(), 
+    : ConstantExpr(cast<VectorType>(C1->getType())->getElementType(),
                    Instruction::ExtractElement, &Op<0>(), 2) {
     Op<0>() = C1;
     Op<1>() = C2;
@@ -114,14 +114,14 @@ public:
 /// insertelement constant exprs.
 class InsertElementConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void *operator new(size_t, unsigned) = delete;
 public:
   // allocate space for exactly three operands
   void *operator new(size_t s) {
     return User::operator new(s, 3);
   }
   InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3)
-    : ConstantExpr(C1->getType(), Instruction::InsertElement, 
+    : ConstantExpr(C1->getType(), Instruction::InsertElement,
                    &Op<0>(), 3) {
     Op<0>() = C1;
     Op<1>() = C2;
@@ -136,7 +136,7 @@ public:
 /// shufflevector constant exprs.
 class ShuffleVectorConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void *operator new(size_t, unsigned) = delete;
 public:
   // allocate space for exactly three operands
   void *operator new(size_t s) {
@@ -146,7 +146,7 @@ public:
   : ConstantExpr(VectorType::get(
                    cast<VectorType>(C1->getType())->getElementType(),
                    cast<VectorType>(C3->getType())->getNumElements()),
-                 Instruction::ShuffleVector, 
+                 Instruction::ShuffleVector,
                  &Op<0>(), 3) {
     Op<0>() = C1;
     Op<1>() = C2;
@@ -161,7 +161,7 @@ public:
 /// extractvalue constant exprs.
 class ExtractValueConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void *operator new(size_t, unsigned) = delete;
 public:
   // allocate space for exactly one operand
   void *operator new(size_t s) {
@@ -186,7 +186,7 @@ public:
 /// insertvalue constant exprs.
 class InsertValueConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void *operator new(size_t, unsigned) = delete;
 public:
   // allocate space for exactly one operand
   void *operator new(size_t s) {
@@ -207,23 +207,32 @@ public:
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 };
 
-
 /// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
 /// used behind the scenes to implement getelementpr constant exprs.
 class GetElementPtrConstantExpr : public ConstantExpr {
+  Type *SrcElementTy;
   void anchor() override;
-  GetElementPtrConstantExpr(Constant *C, ArrayRef<Constant*> IdxList,
-                            Type *DestTy);
+  GetElementPtrConstantExpr(Type *SrcElementTy, Constant *C,
+                            ArrayRef<Constant *> IdxList, Type *DestTy);
+
 public:
   static GetElementPtrConstantExpr *Create(Constant *C,
                                            ArrayRef<Constant*> IdxList,
                                            Type *DestTy,
                                            unsigned Flags) {
-    GetElementPtrConstantExpr *Result =
-      new(IdxList.size() + 1) GetElementPtrConstantExpr(C, IdxList, DestTy);
+    return Create(
+        cast<PointerType>(C->getType()->getScalarType())->getElementType(), C,
+        IdxList, DestTy, Flags);
+  }
+  static GetElementPtrConstantExpr *Create(Type *SrcElementTy, Constant *C,
+                                           ArrayRef<Constant *> IdxList,
+                                           Type *DestTy, unsigned Flags) {
+    GetElementPtrConstantExpr *Result = new (IdxList.size() + 1)
+        GetElementPtrConstantExpr(SrcElementTy, C, IdxList, DestTy);
     Result->SubclassOptionalData = Flags;
     return Result;
   }
+  Type *getSourceElementType() const;
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 };
@@ -233,7 +242,7 @@ public:
 // needed in order to store the predicate value for these instructions.
 class CompareConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void *operator new(size_t, unsigned) = delete;
 public:
   // allocate space for exactly two operands
   void *operator new(size_t s) {
@@ -251,65 +260,54 @@ public:
 };
 
 template <>
-struct OperandTraits<UnaryConstantExpr> :
-  public FixedNumOperandTraits<UnaryConstantExpr, 1> {
-};
+struct OperandTraits<UnaryConstantExpr>
+    : public FixedNumOperandTraits<UnaryConstantExpr, 1> {};
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryConstantExpr, Value)
 
 template <>
-struct OperandTraits<BinaryConstantExpr> :
-  public FixedNumOperandTraits<BinaryConstantExpr, 2> {
-};
+struct OperandTraits<BinaryConstantExpr>
+    : public FixedNumOperandTraits<BinaryConstantExpr, 2> {};
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryConstantExpr, Value)
 
 template <>
-struct OperandTraits<SelectConstantExpr> :
-  public FixedNumOperandTraits<SelectConstantExpr, 3> {
-};
+struct OperandTraits<SelectConstantExpr>
+    : public FixedNumOperandTraits<SelectConstantExpr, 3> {};
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectConstantExpr, Value)
 
 template <>
-struct OperandTraits<ExtractElementConstantExpr> :
-  public FixedNumOperandTraits<ExtractElementConstantExpr, 2> {
-};
+struct OperandTraits<ExtractElementConstantExpr>
+    : public FixedNumOperandTraits<ExtractElementConstantExpr, 2> {};
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementConstantExpr, Value)
 
 template <>
-struct OperandTraits<InsertElementConstantExpr> :
-  public FixedNumOperandTraits<InsertElementConstantExpr, 3> {
-};
+struct OperandTraits<InsertElementConstantExpr>
+    : public FixedNumOperandTraits<InsertElementConstantExpr, 3> {};
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementConstantExpr, Value)
 
 template <>
-struct OperandTraits<ShuffleVectorConstantExpr> :
-    public FixedNumOperandTraits<ShuffleVectorConstantExpr, 3> {
-};
+struct OperandTraits<ShuffleVectorConstantExpr>
+    : public FixedNumOperandTraits<ShuffleVectorConstantExpr, 3> {};
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
 
 template <>
-struct OperandTraits<ExtractValueConstantExpr> :
-  public FixedNumOperandTraits<ExtractValueConstantExpr, 1> {
-};
+struct OperandTraits<ExtractValueConstantExpr>
+    : public FixedNumOperandTraits<ExtractValueConstantExpr, 1> {};
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value)
 
 template <>
-struct OperandTraits<InsertValueConstantExpr> :
-  public FixedNumOperandTraits<InsertValueConstantExpr, 2> {
-};
+struct OperandTraits<InsertValueConstantExpr>
+    : public FixedNumOperandTraits<InsertValueConstantExpr, 2> {};
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
 
 template <>
-struct OperandTraits<GetElementPtrConstantExpr> :
-  public VariadicOperandTraits<GetElementPtrConstantExpr, 1> {
-};
+struct OperandTraits<GetElementPtrConstantExpr>
+    : public VariadicOperandTraits<GetElementPtrConstantExpr, 1> {};
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrConstantExpr, Value)
 
-
 template <>
-struct OperandTraits<CompareConstantExpr> :
-  public FixedNumOperandTraits<CompareConstantExpr, 2> {
-};
+struct OperandTraits<CompareConstantExpr>
+    : public FixedNumOperandTraits<CompareConstantExpr, 2> {};
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
 
 template <class ConstantClass> struct ConstantAggrKeyType;
@@ -420,13 +418,16 @@ struct ConstantExprKeyType {
   uint16_t SubclassData;
   ArrayRef<Constant *> Ops;
   ArrayRef<unsigned> Indexes;
+  Type *ExplicitTy;
 
   ConstantExprKeyType(unsigned Opcode, ArrayRef<Constant *> Ops,
                       unsigned short SubclassData = 0,
                       unsigned short SubclassOptionalData = 0,
-                      ArrayRef<unsigned> Indexes = None)
+                      ArrayRef<unsigned> Indexes = None,
+                      Type *ExplicitTy = nullptr)
       : Opcode(Opcode), SubclassOptionalData(SubclassOptionalData),
-        SubclassData(SubclassData), Ops(Ops), Indexes(Indexes) {}
+        SubclassData(SubclassData), Ops(Ops), Indexes(Indexes),
+        ExplicitTy(ExplicitTy) {}
   ConstantExprKeyType(ArrayRef<Constant *> Operands, const ConstantExpr *CE)
       : Opcode(CE->getOpcode()),
         SubclassOptionalData(CE->getRawSubclassOptionalData()),
@@ -497,8 +498,11 @@ struct ConstantExprKeyType {
     case Instruction::ExtractValue:
       return new ExtractValueConstantExpr(Ops[0], Indexes, Ty);
     case Instruction::GetElementPtr:
-      return GetElementPtrConstantExpr::Create(Ops[0], Ops.slice(1), Ty,
-                                               SubclassOptionalData);
+      return GetElementPtrConstantExpr::Create(
+          ExplicitTy ? ExplicitTy
+                     : cast<PointerType>(Ops[0]->getType()->getScalarType())
+                           ->getElementType(),
+          Ops[0], Ops.slice(1), Ty, SubclassOptionalData);
     case Instruction::ICmp:
       return new CompareConstantExpr(Ty, Instruction::ICmp, SubclassData,
                                      Ops[0], Ops[1]);
diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp
index 753d9c2..9557cda 100644
--- a/contrib/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm/lib/IR/Core.cpp
@@ -26,8 +26,8 @@
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
@@ -310,6 +310,9 @@ LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C) {
 LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C) {
   return (LLVMTypeRef) Type::getInt64Ty(*unwrap(C));
 }
+LLVMTypeRef LLVMInt128TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getInt128Ty(*unwrap(C));
+}
 LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits) {
   return wrap(IntegerType::get(*unwrap(C), NumBits));
 }
@@ -329,6 +332,9 @@ LLVMTypeRef LLVMInt32Type(void) {
 LLVMTypeRef LLVMInt64Type(void) {
   return LLVMInt64TypeInContext(LLVMGetGlobalContext());
 }
+LLVMTypeRef LLVMInt128Type(void) {
+  return LLVMInt128TypeInContext(LLVMGetGlobalContext());
+}
 LLVMTypeRef LLVMIntType(unsigned NumBits) {
   return LLVMIntTypeInContext(LLVMGetGlobalContext(), NumBits);
 }
@@ -1153,8 +1159,8 @@ LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
                           LLVMValueRef *ConstantIndices, unsigned NumIndices) {
   ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
                                NumIndices);
-  return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
-                                             IdxList));
+  return wrap(ConstantExpr::getGetElementPtr(
+      nullptr, unwrap<Constant>(ConstantVal), IdxList));
 }
 
 LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
@@ -1163,7 +1169,7 @@ LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
   Constant* Val = unwrap<Constant>(ConstantVal);
   ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
                                NumIndices);
-  return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, IdxList));
+  return wrap(ConstantExpr::getInBoundsGetElementPtr(nullptr, Val, IdxList));
 }
 
 LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
@@ -1628,9 +1634,8 @@ void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) {
 LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
                           const char *Name) {
   auto *PTy = cast<PointerType>(unwrap(Ty));
-  return wrap(GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
-                                  GlobalValue::ExternalLinkage, Name,
-                                  unwrap<GlobalObject>(Aliasee), unwrap(M)));
+  return wrap(GlobalAlias::create(PTy, GlobalValue::ExternalLinkage, Name,
+                                  unwrap<Constant>(Aliasee), unwrap(M)));
 }
 
 /*--.. Operations on functions .............................................--*/
@@ -2181,14 +2186,13 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
 void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
   MDNode *Loc =
       L ? cast<MDNode>(unwrap<MetadataAsValue>(L)->getMetadata()) : nullptr;
-  unwrap(Builder)->SetCurrentDebugLocation(DebugLoc::getFromDILocation(Loc));
+  unwrap(Builder)->SetCurrentDebugLocation(DebugLoc(Loc));
 }
 
 LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
   LLVMContext &Context = unwrap(Builder)->getContext();
   return wrap(MetadataAsValue::get(
-      Context,
-      unwrap(Builder)->getCurrentDebugLocation().getAsMDNode(Context)));
+      Context, unwrap(Builder)->getCurrentDebugLocation().getAsMDNode()));
 }
 
 void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
@@ -2506,19 +2510,20 @@ LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                           LLVMValueRef *Indices, unsigned NumIndices,
                           const char *Name) {
   ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
-  return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), IdxList, Name));
+  return wrap(unwrap(B)->CreateGEP(nullptr, unwrap(Pointer), IdxList, Name));
 }
 
 LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                                   LLVMValueRef *Indices, unsigned NumIndices,
                                   const char *Name) {
   ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
-  return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), IdxList, Name));
+  return wrap(
+      unwrap(B)->CreateInBoundsGEP(nullptr, unwrap(Pointer), IdxList, Name));
 }
 
 LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                                 unsigned Idx, const char *Name) {
-  return wrap(unwrap(B)->CreateStructGEP(unwrap(Pointer), Idx, Name));
+  return wrap(unwrap(B)->CreateStructGEP(nullptr, unwrap(Pointer), Idx, Name));
 }
 
 LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
@@ -2845,11 +2850,11 @@ LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void) {
 /*===-- Pass Manager ------------------------------------------------------===*/
 
 LLVMPassManagerRef LLVMCreatePassManager() {
-  return wrap(new PassManager());
+  return wrap(new legacy::PassManager());
 }
 
 LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
-  return wrap(new FunctionPassManager(unwrap(M)));
+  return wrap(new legacy::FunctionPassManager(unwrap(M)));
 }
 
 LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
@@ -2858,19 +2863,19 @@ LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
 }
 
 LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
-  return unwrap<PassManager>(PM)->run(*unwrap(M));
+  return unwrap<legacy::PassManager>(PM)->run(*unwrap(M));
 }
 
 LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
-  return unwrap<FunctionPassManager>(FPM)->doInitialization();
+  return unwrap<legacy::FunctionPassManager>(FPM)->doInitialization();
 }
 
 LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
-  return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
+  return unwrap<legacy::FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
 }
 
 LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
-  return unwrap<FunctionPassManager>(FPM)->doFinalization();
+  return unwrap<legacy::FunctionPassManager>(FPM)->doFinalization();
 }
 
 void LLVMDisposePassManager(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm/lib/IR/DIBuilder.cpp
index 856bb3c..b1925ea 100644
--- a/contrib/llvm/lib/IR/DIBuilder.cpp
+++ b/contrib/llvm/lib/IR/DIBuilder.cpp
@@ -25,15 +25,24 @@ using namespace llvm::dwarf;
 
 namespace {
 class HeaderBuilder {
+  /// \brief Whether there are any fields yet.
+  ///
+  /// Note that this is not equivalent to \c Chars.empty(), since \a concat()
+  /// may have been called already with an empty string.
+  bool IsEmpty;
   SmallVector<char, 256> Chars;
 
 public:
-  explicit HeaderBuilder(Twine T) { T.toVector(Chars); }
-  HeaderBuilder(const HeaderBuilder &X) : Chars(X.Chars) {}
-  HeaderBuilder(HeaderBuilder &&X) : Chars(std::move(X.Chars)) {}
+  HeaderBuilder() : IsEmpty(true) {}
+  HeaderBuilder(const HeaderBuilder &X) : IsEmpty(X.IsEmpty), Chars(X.Chars) {}
+  HeaderBuilder(HeaderBuilder &&X)
+      : IsEmpty(X.IsEmpty), Chars(std::move(X.Chars)) {}
 
   template <class Twineable> HeaderBuilder &concat(Twineable &&X) {
-    Chars.push_back(0);
+    if (IsEmpty)
+      IsEmpty = false;
+    else
+      Chars.push_back(0);
     Twine(X).toVector(Chars);
     return *this;
   }
@@ -43,7 +52,7 @@ public:
   }
 
   static HeaderBuilder get(unsigned Tag) {
-    return HeaderBuilder("0x" + Twine::utohexstr(Tag));
+    return HeaderBuilder().concat("0x" + Twine::utohexstr(Tag));
   }
 };
 }
@@ -54,24 +63,18 @@ DIBuilder::DIBuilder(Module &m, bool AllowUnresolvedNodes)
       DeclareFn(nullptr), ValueFn(nullptr),
       AllowUnresolvedNodes(AllowUnresolvedNodes) {}
 
-static bool isUnresolved(MDNode *N) {
-  return N &&
-         (isa<MDNodeFwdDecl>(N) || !cast<UniquableMDNode>(N)->isResolved());
-}
-
 void DIBuilder::trackIfUnresolved(MDNode *N) {
-  if (!AllowUnresolvedNodes) {
-    assert(!isUnresolved(N) && "Cannot handle unresolved nodes");
+  if (!N)
     return;
-  }
-  if (isUnresolved(N))
-    UnresolvedNodes.emplace_back(N);
-  return;
+  if (N->isResolved())
+    return;
+
+  assert(AllowUnresolvedNodes && "Cannot handle unresolved nodes");
+  UnresolvedNodes.emplace_back(N);
 }
 
 void DIBuilder::finalize() {
-  DIArray Enums = getOrCreateArray(AllEnumTypes);
-  DIType(TempEnumTypes).replaceAllUsesWith(Enums);
+  TempEnumTypes->replaceAllUsesWith(MDTuple::get(VMContext, AllEnumTypes));
 
   SmallVector<Metadata *, 16> RetainValues;
   // Declarations and definitions of the same type may be retained. Some
@@ -82,36 +85,30 @@ void DIBuilder::finalize() {
   for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++)
     if (RetainSet.insert(AllRetainTypes[I]).second)
       RetainValues.push_back(AllRetainTypes[I]);
-  DIArray RetainTypes = getOrCreateArray(RetainValues);
-  DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes);
-
-  DIArray SPs = getOrCreateArray(AllSubprograms);
-  DIType(TempSubprograms).replaceAllUsesWith(SPs);
-  for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-    DISubprogram SP(SPs.getElement(i));
-    if (MDNode *Temp = SP.getVariablesNodes()) {
-      SmallVector<Metadata *, 4> Variables;
-      for (Metadata *V : PreservedVariables.lookup(SP))
-        Variables.push_back(V);
-      DIArray AV = getOrCreateArray(Variables);
-      DIType(Temp).replaceAllUsesWith(AV);
+  TempRetainTypes->replaceAllUsesWith(MDTuple::get(VMContext, RetainValues));
+
+  DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms);
+  TempSubprograms->replaceAllUsesWith(SPs.get());
+  for (auto *SP : SPs) {
+    if (MDTuple *Temp = SP->getVariables().get()) {
+      const auto &PV = PreservedVariables.lookup(SP);
+      SmallVector<Metadata *, 4> Variables(PV.begin(), PV.end());
+      DINodeArray AV = getOrCreateArray(Variables);
+      TempMDTuple(Temp)->replaceAllUsesWith(AV.get());
     }
   }
 
-  DIArray GVs = getOrCreateArray(AllGVs);
-  DIType(TempGVs).replaceAllUsesWith(GVs);
+  TempGVs->replaceAllUsesWith(MDTuple::get(VMContext, AllGVs));
 
-  SmallVector<Metadata *, 16> RetainValuesI;
-  for (unsigned I = 0, E = AllImportedModules.size(); I < E; I++)
-    RetainValuesI.push_back(AllImportedModules[I]);
-  DIArray IMs = getOrCreateArray(RetainValuesI);
-  DIType(TempImportedModules).replaceAllUsesWith(IMs);
+  TempImportedModules->replaceAllUsesWith(MDTuple::get(
+      VMContext, SmallVector<Metadata *, 16>(AllImportedModules.begin(),
+                                             AllImportedModules.end())));
 
   // Now that all temp nodes have been replaced or deleted, resolve remaining
   // cycles.
   for (const auto &N : UnresolvedNodes)
-    if (N)
-      cast<UniquableMDNode>(N)->resolveCycles();
+    if (N && !N->isResolved())
+      N->resolveCycles();
   UnresolvedNodes.clear();
 
   // Can't handle unresolved nodes anymore.
@@ -119,58 +116,38 @@ void DIBuilder::finalize() {
 }
 
 /// If N is compile unit return NULL otherwise return N.
-static MDNode *getNonCompileUnitScope(MDNode *N) {
-  if (DIDescriptor(N).isCompileUnit())
+static DIScope *getNonCompileUnitScope(DIScope *N) {
+  if (!N || isa<DICompileUnit>(N))
     return nullptr;
-  return N;
-}
-
-static MDNode *createFilePathPair(LLVMContext &VMContext, StringRef Filename,
-                                  StringRef Directory) {
-  assert(!Filename.empty() && "Unable to create file without name");
-  Metadata *Pair[] = {MDString::get(VMContext, Filename),
-                      MDString::get(VMContext, Directory)};
-  return MDNode::get(VMContext, Pair);
+  return cast<DIScope>(N);
 }
 
-DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
-                                           StringRef Directory,
-                                           StringRef Producer, bool isOptimized,
-                                           StringRef Flags, unsigned RunTimeVer,
-                                           StringRef SplitName,
-                                           DebugEmissionKind Kind,
-                                           bool EmitDebugInfo) {
+DICompileUnit *DIBuilder::createCompileUnit(
+    unsigned Lang, StringRef Filename, StringRef Directory, StringRef Producer,
+    bool isOptimized, StringRef Flags, unsigned RunTimeVer, StringRef SplitName,
+    DebugEmissionKind Kind, uint64_t DWOId, bool EmitDebugInfo) {
 
-  assert(((Lang <= dwarf::DW_LANG_OCaml && Lang >= dwarf::DW_LANG_C89) ||
+  assert(((Lang <= dwarf::DW_LANG_Fortran08 && Lang >= dwarf::DW_LANG_C89) ||
           (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
          "Invalid Language tag");
   assert(!Filename.empty() &&
          "Unable to create compile unit without filename");
-  Metadata *TElts[] = {HeaderBuilder::get(DW_TAG_base_type).get(VMContext)};
-  TempEnumTypes = MDNode::getTemporary(VMContext, TElts);
-
-  TempRetainTypes = MDNode::getTemporary(VMContext, TElts);
-
-  TempSubprograms = MDNode::getTemporary(VMContext, TElts);
-
-  TempGVs = MDNode::getTemporary(VMContext, TElts);
 
-  TempImportedModules = MDNode::getTemporary(VMContext, TElts);
-
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_compile_unit)
-                          .concat(Lang)
-                          .concat(Producer)
-                          .concat(isOptimized)
-                          .concat(Flags)
-                          .concat(RunTimeVer)
-                          .concat(SplitName)
-                          .concat(Kind)
-                          .get(VMContext),
-                      createFilePathPair(VMContext, Filename, Directory),
-                      TempEnumTypes, TempRetainTypes, TempSubprograms, TempGVs,
-                      TempImportedModules};
-
-  MDNode *CUNode = MDNode::get(VMContext, Elts);
+  // TODO: Once we make DICompileUnit distinct, stop using temporaries here
+  // (just start with operands assigned to nullptr).
+  TempEnumTypes = MDTuple::getTemporary(VMContext, None);
+  TempRetainTypes = MDTuple::getTemporary(VMContext, None);
+  TempSubprograms = MDTuple::getTemporary(VMContext, None);
+  TempGVs = MDTuple::getTemporary(VMContext, None);
+  TempImportedModules = MDTuple::getTemporary(VMContext, None);
+
+  // TODO: Switch to getDistinct().  We never want to merge compile units based
+  // on contents.
+  DICompileUnit *CUNode = DICompileUnit::get(
+      VMContext, Lang, DIFile::get(VMContext, Filename, Directory), Producer,
+      isOptimized, Flags, RunTimeVer, SplitName, Kind, TempEnumTypes.get(),
+      TempRetainTypes.get(), TempSubprograms.get(), TempGVs.get(),
+      TempImportedModules.get(), DWOId);
 
   // Create a named metadata so that it is easier to find cu in a module.
   // Note that we only generate this when the caller wants to actually
@@ -183,785 +160,474 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
   }
 
   trackIfUnresolved(CUNode);
-  return DICompileUnit(CUNode);
+  return CUNode;
 }
 
-static DIImportedEntity
-createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope Context,
+static DIImportedEntity *
+createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context,
                      Metadata *NS, unsigned Line, StringRef Name,
                      SmallVectorImpl<TrackingMDNodeRef> &AllImportedModules) {
-  const MDNode *R;
-  Metadata *Elts[] = {HeaderBuilder::get(Tag).concat(Line).concat(Name).get(C),
-                      Context, NS};
-  R = MDNode::get(C, Elts);
-  DIImportedEntity M(R);
-  assert(M.Verify() && "Imported module should be valid");
-  AllImportedModules.emplace_back(M.get());
+  auto *M = DIImportedEntity::get(C, Tag, Context, DINodeRef(NS), Line, Name);
+  AllImportedModules.emplace_back(M);
   return M;
 }
 
-DIImportedEntity DIBuilder::createImportedModule(DIScope Context,
-                                                 DINameSpace NS,
-                                                 unsigned Line) {
+DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context,
+                                                  DINamespace *NS,
+                                                  unsigned Line) {
   return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
                                 Context, NS, Line, StringRef(), AllImportedModules);
 }
 
-DIImportedEntity DIBuilder::createImportedModule(DIScope Context,
-                                                 DIImportedEntity NS,
-                                                 unsigned Line) {
+DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context,
+                                                  DIImportedEntity *NS,
+                                                  unsigned Line) {
   return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
                                 Context, NS, Line, StringRef(), AllImportedModules);
 }
 
-DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context,
-                                                      DIDescriptor Decl,
-                                                      unsigned Line, StringRef Name) {
+DIImportedEntity *DIBuilder::createImportedDeclaration(DIScope *Context,
+                                                       DINode *Decl,
+                                                       unsigned Line,
+                                                       StringRef Name) {
   // Make sure to use the unique identifier based metadata reference for
   // types that have one.
-  Metadata *V =
-      Decl.isType() ? static_cast<Metadata *>(DIType(Decl).getRef()) : Decl;
   return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration,
-                                Context, V, Line, Name,
+                                Context, DINodeRef::get(Decl), Line, Name,
                                 AllImportedModules);
 }
 
-DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context,
-                                                      DIImportedEntity Imp,
-                                                      unsigned Line, StringRef Name) {
-  return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration,
-                                Context, Imp, Line, Name, AllImportedModules);
-}
-
-DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
-  Metadata *Elts[] = {
-      HeaderBuilder::get(dwarf::DW_TAG_file_type).get(VMContext),
-      createFilePathPair(VMContext, Filename, Directory)};
-  return DIFile(MDNode::get(VMContext, Elts));
+DIFile *DIBuilder::createFile(StringRef Filename, StringRef Directory) {
+  return DIFile::get(VMContext, Filename, Directory);
 }
 
-DIEnumerator DIBuilder::createEnumerator(StringRef Name, int64_t Val) {
+DIEnumerator *DIBuilder::createEnumerator(StringRef Name, int64_t Val) {
   assert(!Name.empty() && "Unable to create enumerator without name");
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_enumerator)
-                          .concat(Name)
-                          .concat(Val)
-                          .get(VMContext)};
-  return DIEnumerator(MDNode::get(VMContext, Elts));
+  return DIEnumerator::get(VMContext, Val, Name);
 }
 
-DIBasicType DIBuilder::createUnspecifiedType(StringRef Name) {
+DIBasicType *DIBuilder::createUnspecifiedType(StringRef Name) {
   assert(!Name.empty() && "Unable to create type without name");
-  // Unspecified types are encoded in DIBasicType format. Line number, filename,
-  // size, alignment, offset and flags are always empty here.
-  Metadata *Elts[] = {
-      HeaderBuilder::get(dwarf::DW_TAG_unspecified_type)
-          .concat(Name)
-          .concat(0)
-          .concat(0)
-          .concat(0)
-          .concat(0)
-          .concat(0)
-          .concat(0)
-          .get(VMContext),
-      nullptr, // Filename
-      nullptr  // Unused
-  };
-  return DIBasicType(MDNode::get(VMContext, Elts));
-}
-
-DIBasicType DIBuilder::createNullPtrType() {
+  return DIBasicType::get(VMContext, dwarf::DW_TAG_unspecified_type, Name);
+}
+
+DIBasicType *DIBuilder::createNullPtrType() {
   return createUnspecifiedType("decltype(nullptr)");
 }
 
-DIBasicType
-DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
-                           uint64_t AlignInBits, unsigned Encoding) {
+DIBasicType *DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
+                                        uint64_t AlignInBits,
+                                        unsigned Encoding) {
   assert(!Name.empty() && "Unable to create type without name");
-  // Basic types are encoded in DIBasicType format. Line number, filename,
-  // offset and flags are always empty here.
-  Metadata *Elts[] = {
-      HeaderBuilder::get(dwarf::DW_TAG_base_type)
-          .concat(Name)
-          .concat(0) // Line
-          .concat(SizeInBits)
-          .concat(AlignInBits)
-          .concat(0) // Offset
-          .concat(0) // Flags
-          .concat(Encoding)
-          .get(VMContext),
-      nullptr, // Filename
-      nullptr  // Unused
-  };
-  return DIBasicType(MDNode::get(VMContext, Elts));
-}
-
-DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
-  // Qualified types are encoded in DIDerivedType format.
-  Metadata *Elts[] = {HeaderBuilder::get(Tag)
-                          .concat(StringRef()) // Name
-                          .concat(0)           // Line
-                          .concat(0)           // Size
-                          .concat(0)           // Align
-                          .concat(0)           // Offset
-                          .concat(0)           // Flags
-                          .get(VMContext),
-                      nullptr, // Filename
-                      nullptr, // Unused
-                      FromTy.getRef()};
-  return DIDerivedType(MDNode::get(VMContext, Elts));
-}
-
-DIDerivedType
-DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
-                             uint64_t AlignInBits, StringRef Name) {
-  // Pointer types are encoded in DIDerivedType format.
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_pointer_type)
-                          .concat(Name)
-                          .concat(0) // Line
-                          .concat(SizeInBits)
-                          .concat(AlignInBits)
-                          .concat(0) // Offset
-                          .concat(0) // Flags
-                          .get(VMContext),
-                      nullptr, // Filename
-                      nullptr, // Unused
-                      PointeeTy.getRef()};
-  return DIDerivedType(MDNode::get(VMContext, Elts));
-}
-
-DIDerivedType
-DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base,
-                                   uint64_t SizeInBits, uint64_t AlignInBits) {
-  // Pointer types are encoded in DIDerivedType format.
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_ptr_to_member_type)
-                          .concat(StringRef())
-                          .concat(0) // Line
-                          .concat(SizeInBits) // Size
-                          .concat(AlignInBits) // Align
-                          .concat(0) // Offset
-                          .concat(0) // Flags
-                          .get(VMContext),
-                      nullptr, // Filename
-                      nullptr, // Unused
-                      PointeeTy.getRef(), Base.getRef()};
-  return DIDerivedType(MDNode::get(VMContext, Elts));
-}
-
-DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
-  assert(RTy.isType() && "Unable to create reference type");
-  // References are encoded in DIDerivedType format.
-  Metadata *Elts[] = {HeaderBuilder::get(Tag)
-                          .concat(StringRef()) // Name
-                          .concat(0)           // Line
-                          .concat(0)           // Size
-                          .concat(0)           // Align
-                          .concat(0)           // Offset
-                          .concat(0)           // Flags
-                          .get(VMContext),
-                      nullptr, // Filename
-                      nullptr, // TheCU,
-                      RTy.getRef()};
-  return DIDerivedType(MDNode::get(VMContext, Elts));
-}
-
-DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
-                                       unsigned LineNo, DIDescriptor Context) {
-  // typedefs are encoded in DIDerivedType format.
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_typedef)
-                          .concat(Name)
-                          .concat(LineNo)
-                          .concat(0) // Size
-                          .concat(0) // Align
-                          .concat(0) // Offset
-                          .concat(0) // Flags
-                          .get(VMContext),
-                      File.getFileNode(),
-                      DIScope(getNonCompileUnitScope(Context)).getRef(),
-                      Ty.getRef()};
-  return DIDerivedType(MDNode::get(VMContext, Elts));
-}
-
-DIDerivedType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
-  // typedefs are encoded in DIDerivedType format.
-  assert(Ty.isType() && "Invalid type!");
-  assert(FriendTy.isType() && "Invalid friend type!");
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_friend)
-                          .concat(StringRef()) // Name
-                          .concat(0)           // Line
-                          .concat(0)           // Size
-                          .concat(0)           // Align
-                          .concat(0)           // Offset
-                          .concat(0)           // Flags
-                          .get(VMContext),
-                      nullptr, Ty.getRef(), FriendTy.getRef()};
-  return DIDerivedType(MDNode::get(VMContext, Elts));
-}
-
-DIDerivedType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
-                                           uint64_t BaseOffset,
-                                           unsigned Flags) {
-  assert(Ty.isType() && "Unable to create inheritance");
-  // TAG_inheritance is encoded in DIDerivedType format.
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_inheritance)
-                          .concat(StringRef()) // Name
-                          .concat(0)           // Line
-                          .concat(0)           // Size
-                          .concat(0)           // Align
-                          .concat(BaseOffset)
-                          .concat(Flags)
-                          .get(VMContext),
-                      nullptr, Ty.getRef(), BaseTy.getRef()};
-  return DIDerivedType(MDNode::get(VMContext, Elts));
-}
-
-DIDerivedType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
-                                          DIFile File, unsigned LineNumber,
-                                          uint64_t SizeInBits,
-                                          uint64_t AlignInBits,
-                                          uint64_t OffsetInBits, unsigned Flags,
-                                          DIType Ty) {
-  // TAG_member is encoded in DIDerivedType format.
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_member)
-                          .concat(Name)
-                          .concat(LineNumber)
-                          .concat(SizeInBits)
-                          .concat(AlignInBits)
-                          .concat(OffsetInBits)
-                          .concat(Flags)
-                          .get(VMContext),
-                      File.getFileNode(),
-                      DIScope(getNonCompileUnitScope(Scope)).getRef(),
-                      Ty.getRef()};
-  return DIDerivedType(MDNode::get(VMContext, Elts));
-}
-
-static Metadata *getConstantOrNull(Constant *C) {
+  return DIBasicType::get(VMContext, dwarf::DW_TAG_base_type, Name, SizeInBits,
+                          AlignInBits, Encoding);
+}
+
+DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) {
+  return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr,
+                            DITypeRef::get(FromTy), 0, 0, 0, 0);
+}
+
+DIDerivedType *DIBuilder::createPointerType(DIType *PointeeTy,
+                                            uint64_t SizeInBits,
+                                            uint64_t AlignInBits,
+                                            StringRef Name) {
+  // FIXME: Why is there a name here?
+  return DIDerivedType::get(VMContext, dwarf::DW_TAG_pointer_type, Name,
+                            nullptr, 0, nullptr, DITypeRef::get(PointeeTy),
+                            SizeInBits, AlignInBits, 0, 0);
+}
+
+DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy,
+                                                  DIType *Base,
+                                                  uint64_t SizeInBits,
+                                                  uint64_t AlignInBits) {
+  return DIDerivedType::get(VMContext, dwarf::DW_TAG_ptr_to_member_type, "",
+                            nullptr, 0, nullptr, DITypeRef::get(PointeeTy),
+                            SizeInBits, AlignInBits, 0, 0,
+                            DITypeRef::get(Base));
+}
+
+DIDerivedType *DIBuilder::createReferenceType(unsigned Tag, DIType *RTy) {
+  assert(RTy && "Unable to create reference type");
+  return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr,
+                            DITypeRef::get(RTy), 0, 0, 0, 0);
+}
+
+DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name,
+                                        DIFile *File, unsigned LineNo,
+                                        DIScope *Context) {
+  return DIDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, File,
+                            LineNo,
+                            DIScopeRef::get(getNonCompileUnitScope(Context)),
+                            DITypeRef::get(Ty), 0, 0, 0, 0);
+}
+
+DIDerivedType *DIBuilder::createFriend(DIType *Ty, DIType *FriendTy) {
+  assert(Ty && "Invalid type!");
+  assert(FriendTy && "Invalid friend type!");
+  return DIDerivedType::get(VMContext, dwarf::DW_TAG_friend, "", nullptr, 0,
+                            DITypeRef::get(Ty), DITypeRef::get(FriendTy), 0, 0,
+                            0, 0);
+}
+
+DIDerivedType *DIBuilder::createInheritance(DIType *Ty, DIType *BaseTy,
+                                            uint64_t BaseOffset,
+                                            unsigned Flags) {
+  assert(Ty && "Unable to create inheritance");
+  return DIDerivedType::get(VMContext, dwarf::DW_TAG_inheritance, "", nullptr,
+                            0, DITypeRef::get(Ty), DITypeRef::get(BaseTy), 0, 0,
+                            BaseOffset, Flags);
+}
+
+DIDerivedType *DIBuilder::createMemberType(DIScope *Scope, StringRef Name,
+                                           DIFile *File, unsigned LineNumber,
+                                           uint64_t SizeInBits,
+                                           uint64_t AlignInBits,
+                                           uint64_t OffsetInBits,
+                                           unsigned Flags, DIType *Ty) {
+  return DIDerivedType::get(
+      VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
+      DIScopeRef::get(getNonCompileUnitScope(Scope)), DITypeRef::get(Ty),
+      SizeInBits, AlignInBits, OffsetInBits, Flags);
+}
+
+static ConstantAsMetadata *getConstantOrNull(Constant *C) {
   if (C)
     return ConstantAsMetadata::get(C);
   return nullptr;
 }
 
-DIDerivedType DIBuilder::createStaticMemberType(DIDescriptor Scope,
-                                                StringRef Name, DIFile File,
-                                                unsigned LineNumber, DIType Ty,
-                                                unsigned Flags,
-                                                llvm::Constant *Val) {
-  // TAG_member is encoded in DIDerivedType format.
-  Flags |= DIDescriptor::FlagStaticMember;
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_member)
-                          .concat(Name)
-                          .concat(LineNumber)
-                          .concat(0) // Size
-                          .concat(0) // Align
-                          .concat(0) // Offset
-                          .concat(Flags)
-                          .get(VMContext),
-                      File.getFileNode(),
-                      DIScope(getNonCompileUnitScope(Scope)).getRef(),
-                      Ty.getRef(), getConstantOrNull(Val)};
-  return DIDerivedType(MDNode::get(VMContext, Elts));
-}
-
-DIDerivedType DIBuilder::createObjCIVar(StringRef Name, DIFile File,
-                                        unsigned LineNumber,
-                                        uint64_t SizeInBits,
-                                        uint64_t AlignInBits,
-                                        uint64_t OffsetInBits, unsigned Flags,
-                                        DIType Ty, MDNode *PropertyNode) {
-  // TAG_member is encoded in DIDerivedType format.
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_member)
-                          .concat(Name)
-                          .concat(LineNumber)
-                          .concat(SizeInBits)
-                          .concat(AlignInBits)
-                          .concat(OffsetInBits)
-                          .concat(Flags)
-                          .get(VMContext),
-                      File.getFileNode(), getNonCompileUnitScope(File), Ty,
-                      PropertyNode};
-  return DIDerivedType(MDNode::get(VMContext, Elts));
-}
-
-DIObjCProperty
-DIBuilder::createObjCProperty(StringRef Name, DIFile File, unsigned LineNumber,
+DIDerivedType *DIBuilder::createStaticMemberType(DIScope *Scope, StringRef Name,
+                                                 DIFile *File,
+                                                 unsigned LineNumber,
+                                                 DIType *Ty, unsigned Flags,
+                                                 llvm::Constant *Val) {
+  Flags |= DINode::FlagStaticMember;
+  return DIDerivedType::get(
+      VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
+      DIScopeRef::get(getNonCompileUnitScope(Scope)), DITypeRef::get(Ty), 0, 0,
+      0, Flags, getConstantOrNull(Val));
+}
+
+DIDerivedType *DIBuilder::createObjCIVar(StringRef Name, DIFile *File,
+                                         unsigned LineNumber,
+                                         uint64_t SizeInBits,
+                                         uint64_t AlignInBits,
+                                         uint64_t OffsetInBits, unsigned Flags,
+                                         DIType *Ty, MDNode *PropertyNode) {
+  return DIDerivedType::get(
+      VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
+      DIScopeRef::get(getNonCompileUnitScope(File)), DITypeRef::get(Ty),
+      SizeInBits, AlignInBits, OffsetInBits, Flags, PropertyNode);
+}
+
+DIObjCProperty *
+DIBuilder::createObjCProperty(StringRef Name, DIFile *File, unsigned LineNumber,
                               StringRef GetterName, StringRef SetterName,
-                              unsigned PropertyAttributes, DIType Ty) {
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_APPLE_property)
-                          .concat(Name)
-                          .concat(LineNumber)
-                          .concat(GetterName)
-                          .concat(SetterName)
-                          .concat(PropertyAttributes)
-                          .get(VMContext),
-                      File, Ty};
-  return DIObjCProperty(MDNode::get(VMContext, Elts));
-}
-
-DITemplateTypeParameter
-DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
-                                       DIType Ty, MDNode *File, unsigned LineNo,
-                                       unsigned ColumnNo) {
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_template_type_parameter)
-                          .concat(Name)
-                          .concat(LineNo)
-                          .concat(ColumnNo)
-                          .get(VMContext),
-                      DIScope(getNonCompileUnitScope(Context)).getRef(),
-                      Ty.getRef(), File};
-  return DITemplateTypeParameter(MDNode::get(VMContext, Elts));
-}
-
-static DITemplateValueParameter createTemplateValueParameterHelper(
-    LLVMContext &VMContext, unsigned Tag, DIDescriptor Context, StringRef Name,
-    DIType Ty, Metadata *MD, MDNode *File, unsigned LineNo, unsigned ColumnNo) {
-  Metadata *Elts[] = {
-      HeaderBuilder::get(Tag).concat(Name).concat(LineNo).concat(ColumnNo).get(
-          VMContext),
-      DIScope(getNonCompileUnitScope(Context)).getRef(), Ty.getRef(), MD, File};
-  return DITemplateValueParameter(MDNode::get(VMContext, Elts));
-}
-
-DITemplateValueParameter
-DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
-                                        DIType Ty, Constant *Val, MDNode *File,
-                                        unsigned LineNo, unsigned ColumnNo) {
+                              unsigned PropertyAttributes, DIType *Ty) {
+  return DIObjCProperty::get(VMContext, Name, File, LineNumber, GetterName,
+                             SetterName, PropertyAttributes, Ty);
+}
+
+DITemplateTypeParameter *
+DIBuilder::createTemplateTypeParameter(DIScope *Context, StringRef Name,
+                                       DIType *Ty) {
+  assert((!Context || isa<DICompileUnit>(Context)) && "Expected compile unit");
+  return DITemplateTypeParameter::get(VMContext, Name, DITypeRef::get(Ty));
+}
+
+static DITemplateValueParameter *
+createTemplateValueParameterHelper(LLVMContext &VMContext, unsigned Tag,
+                                   DIScope *Context, StringRef Name, DIType *Ty,
+                                   Metadata *MD) {
+  assert((!Context || isa<DICompileUnit>(Context)) && "Expected compile unit");
+  return DITemplateValueParameter::get(VMContext, Tag, Name, DITypeRef::get(Ty),
+                                       MD);
+}
+
+DITemplateValueParameter *
+DIBuilder::createTemplateValueParameter(DIScope *Context, StringRef Name,
+                                        DIType *Ty, Constant *Val) {
   return createTemplateValueParameterHelper(
       VMContext, dwarf::DW_TAG_template_value_parameter, Context, Name, Ty,
-      getConstantOrNull(Val), File, LineNo, ColumnNo);
+      getConstantOrNull(Val));
 }
 
-DITemplateValueParameter
-DIBuilder::createTemplateTemplateParameter(DIDescriptor Context, StringRef Name,
-                                           DIType Ty, StringRef Val,
-                                           MDNode *File, unsigned LineNo,
-                                           unsigned ColumnNo) {
+DITemplateValueParameter *
+DIBuilder::createTemplateTemplateParameter(DIScope *Context, StringRef Name,
+                                           DIType *Ty, StringRef Val) {
   return createTemplateValueParameterHelper(
       VMContext, dwarf::DW_TAG_GNU_template_template_param, Context, Name, Ty,
-      MDString::get(VMContext, Val), File, LineNo, ColumnNo);
+      MDString::get(VMContext, Val));
 }
 
-DITemplateValueParameter
-DIBuilder::createTemplateParameterPack(DIDescriptor Context, StringRef Name,
-                                       DIType Ty, DIArray Val,
-                                       MDNode *File, unsigned LineNo,
-                                       unsigned ColumnNo) {
+DITemplateValueParameter *
+DIBuilder::createTemplateParameterPack(DIScope *Context, StringRef Name,
+                                       DIType *Ty, DINodeArray Val) {
   return createTemplateValueParameterHelper(
       VMContext, dwarf::DW_TAG_GNU_template_parameter_pack, Context, Name, Ty,
-      Val, File, LineNo, ColumnNo);
+      Val.get());
 }
 
-DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
-                                           DIFile File, unsigned LineNumber,
-                                           uint64_t SizeInBits,
-                                           uint64_t AlignInBits,
-                                           uint64_t OffsetInBits,
-                                           unsigned Flags, DIType DerivedFrom,
-                                           DIArray Elements,
-                                           DIType VTableHolder,
-                                           MDNode *TemplateParams,
-                                           StringRef UniqueIdentifier) {
-  assert((!Context || Context.isScope() || Context.isType()) &&
+DICompositeType *DIBuilder::createClassType(
+    DIScope *Context, StringRef Name, DIFile *File, unsigned LineNumber,
+    uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
+    unsigned Flags, DIType *DerivedFrom, DINodeArray Elements,
+    DIType *VTableHolder, MDNode *TemplateParams, StringRef UniqueIdentifier) {
+  assert((!Context || isa<DIScope>(Context)) &&
          "createClassType should be called with a valid Context");
-  // TAG_class_type is encoded in DICompositeType format.
-  Metadata *Elts[] = {
-      HeaderBuilder::get(dwarf::DW_TAG_class_type)
-          .concat(Name)
-          .concat(LineNumber)
-          .concat(SizeInBits)
-          .concat(AlignInBits)
-          .concat(OffsetInBits)
-          .concat(Flags)
-          .concat(0)
-          .get(VMContext),
-      File.getFileNode(), DIScope(getNonCompileUnitScope(Context)).getRef(),
-      DerivedFrom.getRef(), Elements, VTableHolder.getRef(), TemplateParams,
-      UniqueIdentifier.empty() ? nullptr
-                               : MDString::get(VMContext, UniqueIdentifier)};
-  DICompositeType R(MDNode::get(VMContext, Elts));
-  assert(R.isCompositeType() &&
-         "createClassType should return a DICompositeType");
+
+  auto *R = DICompositeType::get(
+      VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber,
+      DIScopeRef::get(getNonCompileUnitScope(Context)),
+      DITypeRef::get(DerivedFrom), SizeInBits, AlignInBits, OffsetInBits, Flags,
+      Elements, 0, DITypeRef::get(VTableHolder),
+      cast_or_null<MDTuple>(TemplateParams), UniqueIdentifier);
   if (!UniqueIdentifier.empty())
     retainType(R);
+  trackIfUnresolved(R);
   return R;
 }
 
-DICompositeType DIBuilder::createStructType(DIDescriptor Context,
-                                            StringRef Name, DIFile File,
-                                            unsigned LineNumber,
-                                            uint64_t SizeInBits,
-                                            uint64_t AlignInBits,
-                                            unsigned Flags, DIType DerivedFrom,
-                                            DIArray Elements,
-                                            unsigned RunTimeLang,
-                                            DIType VTableHolder,
-                                            StringRef UniqueIdentifier) {
- // TAG_structure_type is encoded in DICompositeType format.
-  Metadata *Elts[] = {
-      HeaderBuilder::get(dwarf::DW_TAG_structure_type)
-          .concat(Name)
-          .concat(LineNumber)
-          .concat(SizeInBits)
-          .concat(AlignInBits)
-          .concat(0)
-          .concat(Flags)
-          .concat(RunTimeLang)
-          .get(VMContext),
-      File.getFileNode(), DIScope(getNonCompileUnitScope(Context)).getRef(),
-      DerivedFrom.getRef(), Elements, VTableHolder.getRef(), nullptr,
-      UniqueIdentifier.empty() ? nullptr
-                               : MDString::get(VMContext, UniqueIdentifier)};
-  DICompositeType R(MDNode::get(VMContext, Elts));
-  assert(R.isCompositeType() &&
-         "createStructType should return a DICompositeType");
+DICompositeType *DIBuilder::createStructType(
+    DIScope *Context, StringRef Name, DIFile *File, unsigned LineNumber,
+    uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags,
+    DIType *DerivedFrom, DINodeArray Elements, unsigned RunTimeLang,
+    DIType *VTableHolder, StringRef UniqueIdentifier) {
+  auto *R = DICompositeType::get(
+      VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber,
+      DIScopeRef::get(getNonCompileUnitScope(Context)),
+      DITypeRef::get(DerivedFrom), SizeInBits, AlignInBits, 0, Flags, Elements,
+      RunTimeLang, DITypeRef::get(VTableHolder), nullptr, UniqueIdentifier);
   if (!UniqueIdentifier.empty())
     retainType(R);
+  trackIfUnresolved(R);
   return R;
 }
 
-DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
-                                           DIFile File, unsigned LineNumber,
-                                           uint64_t SizeInBits,
-                                           uint64_t AlignInBits, unsigned Flags,
-                                           DIArray Elements,
-                                           unsigned RunTimeLang,
-                                           StringRef UniqueIdentifier) {
-  // TAG_union_type is encoded in DICompositeType format.
-  Metadata *Elts[] = {
-      HeaderBuilder::get(dwarf::DW_TAG_union_type)
-          .concat(Name)
-          .concat(LineNumber)
-          .concat(SizeInBits)
-          .concat(AlignInBits)
-          .concat(0) // Offset
-          .concat(Flags)
-          .concat(RunTimeLang)
-          .get(VMContext),
-      File.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(),
-      nullptr, Elements, nullptr, nullptr,
-      UniqueIdentifier.empty() ? nullptr
-                               : MDString::get(VMContext, UniqueIdentifier)};
-  DICompositeType R(MDNode::get(VMContext, Elts));
+DICompositeType *DIBuilder::createUnionType(
+    DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
+    uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags,
+    DINodeArray Elements, unsigned RunTimeLang, StringRef UniqueIdentifier) {
+  auto *R = DICompositeType::get(
+      VMContext, dwarf::DW_TAG_union_type, Name, File, LineNumber,
+      DIScopeRef::get(getNonCompileUnitScope(Scope)), nullptr, SizeInBits,
+      AlignInBits, 0, Flags, Elements, RunTimeLang, nullptr, nullptr,
+      UniqueIdentifier);
   if (!UniqueIdentifier.empty())
     retainType(R);
+  trackIfUnresolved(R);
   return R;
 }
 
-DISubroutineType DIBuilder::createSubroutineType(DIFile File,
-                                                 DITypeArray ParameterTypes,
-                                                 unsigned Flags) {
-  // TAG_subroutine_type is encoded in DICompositeType format.
-  Metadata *Elts[] = {
-      HeaderBuilder::get(dwarf::DW_TAG_subroutine_type)
-          .concat(StringRef())
-          .concat(0)     // Line
-          .concat(0)     // Size
-          .concat(0)     // Align
-          .concat(0)     // Offset
-          .concat(Flags) // Flags
-          .concat(0)
-          .get(VMContext),
-      nullptr, nullptr, nullptr, ParameterTypes, nullptr, nullptr,
-      nullptr // Type Identifer
-  };
-  return DISubroutineType(MDNode::get(VMContext, Elts));
-}
-
-DICompositeType DIBuilder::createEnumerationType(
-    DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
-    uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
-    DIType UnderlyingType, StringRef UniqueIdentifier) {
-  // TAG_enumeration_type is encoded in DICompositeType format.
-  Metadata *Elts[] = {
-      HeaderBuilder::get(dwarf::DW_TAG_enumeration_type)
-          .concat(Name)
-          .concat(LineNumber)
-          .concat(SizeInBits)
-          .concat(AlignInBits)
-          .concat(0) // Offset
-          .concat(0) // Flags
-          .concat(0)
-          .get(VMContext),
-      File.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(),
-      UnderlyingType.getRef(), Elements, nullptr, nullptr,
-      UniqueIdentifier.empty() ? nullptr
-                               : MDString::get(VMContext, UniqueIdentifier)};
-  DICompositeType CTy(MDNode::get(VMContext, Elts));
+DISubroutineType *DIBuilder::createSubroutineType(DIFile *File,
+                                                  DITypeRefArray ParameterTypes,
+                                                  unsigned Flags) {
+  return DISubroutineType::get(VMContext, Flags, ParameterTypes);
+}
+
+DICompositeType *DIBuilder::createEnumerationType(
+    DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
+    uint64_t SizeInBits, uint64_t AlignInBits, DINodeArray Elements,
+    DIType *UnderlyingType, StringRef UniqueIdentifier) {
+  auto *CTy = DICompositeType::get(
+      VMContext, dwarf::DW_TAG_enumeration_type, Name, File, LineNumber,
+      DIScopeRef::get(getNonCompileUnitScope(Scope)),
+      DITypeRef::get(UnderlyingType), SizeInBits, AlignInBits, 0, 0, Elements,
+      0, nullptr, nullptr, UniqueIdentifier);
   AllEnumTypes.push_back(CTy);
   if (!UniqueIdentifier.empty())
     retainType(CTy);
+  trackIfUnresolved(CTy);
   return CTy;
 }
 
-DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
-                                           DIType Ty, DIArray Subscripts) {
-  // TAG_array_type is encoded in DICompositeType format.
-  Metadata *Elts[] = {
-      HeaderBuilder::get(dwarf::DW_TAG_array_type)
-          .concat(StringRef())
-          .concat(0) // Line
-          .concat(Size)
-          .concat(AlignInBits)
-          .concat(0) // Offset
-          .concat(0) // Flags
-          .concat(0)
-          .get(VMContext),
-      nullptr, // Filename/Directory,
-      nullptr, // Unused
-      Ty.getRef(), Subscripts, nullptr, nullptr,
-      nullptr // Type Identifer
-  };
-  return DICompositeType(MDNode::get(VMContext, Elts));
-}
-
-DICompositeType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
-                                            DIType Ty, DIArray Subscripts) {
-  // A vector is an array type with the FlagVector flag applied.
-  Metadata *Elts[] = {
-      HeaderBuilder::get(dwarf::DW_TAG_array_type)
-          .concat("")
-          .concat(0) // Line
-          .concat(Size)
-          .concat(AlignInBits)
-          .concat(0) // Offset
-          .concat(DIType::FlagVector)
-          .concat(0)
-          .get(VMContext),
-      nullptr, // Filename/Directory,
-      nullptr, // Unused
-      Ty.getRef(), Subscripts, nullptr, nullptr,
-      nullptr // Type Identifer
-  };
-  return DICompositeType(MDNode::get(VMContext, Elts));
-}
-
-static HeaderBuilder setTypeFlagsInHeader(StringRef Header,
-                                          unsigned FlagsToSet) {
-  DIHeaderFieldIterator I(Header);
-  std::advance(I, 6);
-
-  unsigned Flags;
-  if (I->getAsInteger(0, Flags))
-    Flags = 0;
-  Flags |= FlagsToSet;
-
-  return HeaderBuilder(Twine(I.getPrefix())).concat(Flags).concat(
-      I.getSuffix());
-}
-
-static DIType createTypeWithFlags(LLVMContext &Context, DIType Ty,
-                                  unsigned FlagsToSet) {
-  SmallVector<Metadata *, 9> Elts;
-  MDNode *N = Ty;
-  assert(N && "Unexpected input DIType!");
-  // Update header field.
-  Elts.push_back(setTypeFlagsInHeader(Ty.getHeader(), FlagsToSet).get(Context));
-  for (unsigned I = 1, E = N->getNumOperands(); I != E; ++I)
-    Elts.push_back(N->getOperand(I));
-
-  return DIType(MDNode::get(Context, Elts));
-}
-
-DIType DIBuilder::createArtificialType(DIType Ty) {
-  if (Ty.isArtificial())
+DICompositeType *DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
+                                            DIType *Ty,
+                                            DINodeArray Subscripts) {
+  auto *R = DICompositeType::get(VMContext, dwarf::DW_TAG_array_type, "",
+                                 nullptr, 0, nullptr, DITypeRef::get(Ty), Size,
+                                 AlignInBits, 0, 0, Subscripts, 0, nullptr);
+  trackIfUnresolved(R);
+  return R;
+}
+
+DICompositeType *DIBuilder::createVectorType(uint64_t Size,
+                                             uint64_t AlignInBits, DIType *Ty,
+                                             DINodeArray Subscripts) {
+  auto *R =
+      DICompositeType::get(VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0,
+                           nullptr, DITypeRef::get(Ty), Size, AlignInBits, 0,
+                           DINode::FlagVector, Subscripts, 0, nullptr);
+  trackIfUnresolved(R);
+  return R;
+}
+
+static DIType *createTypeWithFlags(LLVMContext &Context, DIType *Ty,
+                                   unsigned FlagsToSet) {
+  auto NewTy = Ty->clone();
+  NewTy->setFlags(NewTy->getFlags() | FlagsToSet);
+  return MDNode::replaceWithUniqued(std::move(NewTy));
+}
+
+DIType *DIBuilder::createArtificialType(DIType *Ty) {
+  // FIXME: Restrict this to the nodes where it's valid.
+  if (Ty->isArtificial())
     return Ty;
-  return createTypeWithFlags(VMContext, Ty, DIType::FlagArtificial);
+  return createTypeWithFlags(VMContext, Ty, DINode::FlagArtificial);
 }
 
-DIType DIBuilder::createObjectPointerType(DIType Ty) {
-  if (Ty.isObjectPointer())
+DIType *DIBuilder::createObjectPointerType(DIType *Ty) {
+  // FIXME: Restrict this to the nodes where it's valid.
+  if (Ty->isObjectPointer())
     return Ty;
-  unsigned Flags = DIType::FlagObjectPointer | DIType::FlagArtificial;
+  unsigned Flags = DINode::FlagObjectPointer | DINode::FlagArtificial;
   return createTypeWithFlags(VMContext, Ty, Flags);
 }
 
-void DIBuilder::retainType(DIType T) { AllRetainTypes.emplace_back(T); }
-
-DIBasicType DIBuilder::createUnspecifiedParameter() {
-  return DIBasicType();
+void DIBuilder::retainType(DIType *T) {
+  assert(T && "Expected non-null type");
+  AllRetainTypes.emplace_back(T);
 }
 
-DICompositeType
-DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope,
-                             DIFile F, unsigned Line, unsigned RuntimeLang,
+DIBasicType *DIBuilder::createUnspecifiedParameter() { return nullptr; }
+
+DICompositeType *
+DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIScope *Scope,
+                             DIFile *F, unsigned Line, unsigned RuntimeLang,
                              uint64_t SizeInBits, uint64_t AlignInBits,
                              StringRef UniqueIdentifier) {
-  // Create a temporary MDNode.
-  Metadata *Elts[] = {
-      HeaderBuilder::get(Tag)
-          .concat(Name)
-          .concat(Line)
-          .concat(SizeInBits)
-          .concat(AlignInBits)
-          .concat(0) // Offset
-          .concat(DIDescriptor::FlagFwdDecl)
-          .concat(RuntimeLang)
-          .get(VMContext),
-      F.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr,
-      DIArray(), nullptr,
-      nullptr, // TemplateParams
-      UniqueIdentifier.empty() ? nullptr
-                               : MDString::get(VMContext, UniqueIdentifier)};
-  MDNode *Node = MDNode::get(VMContext, Elts);
-  DICompositeType RetTy(Node);
-  assert(RetTy.isCompositeType() &&
-         "createForwardDecl result should be a DIType");
+  // FIXME: Define in terms of createReplaceableForwardDecl() by calling
+  // replaceWithUniqued().
+  auto *RetTy = DICompositeType::get(
+      VMContext, Tag, Name, F, Line,
+      DIScopeRef::get(getNonCompileUnitScope(Scope)), nullptr, SizeInBits,
+      AlignInBits, 0, DINode::FlagFwdDecl, nullptr, RuntimeLang, nullptr,
+      nullptr, UniqueIdentifier);
   if (!UniqueIdentifier.empty())
     retainType(RetTy);
+  trackIfUnresolved(RetTy);
   return RetTy;
 }
 
-DICompositeType DIBuilder::createReplaceableForwardDecl(
-    unsigned Tag, StringRef Name, DIDescriptor Scope, DIFile F, unsigned Line,
+DICompositeType *DIBuilder::createReplaceableCompositeType(
+    unsigned Tag, StringRef Name, DIScope *Scope, DIFile *F, unsigned Line,
     unsigned RuntimeLang, uint64_t SizeInBits, uint64_t AlignInBits,
-    StringRef UniqueIdentifier) {
-  // Create a temporary MDNode.
-  Metadata *Elts[] = {
-      HeaderBuilder::get(Tag)
-          .concat(Name)
-          .concat(Line)
-          .concat(SizeInBits)
-          .concat(AlignInBits)
-          .concat(0) // Offset
-          .concat(DIDescriptor::FlagFwdDecl)
-          .concat(RuntimeLang)
-          .get(VMContext),
-      F.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr,
-      DIArray(), nullptr,
-      nullptr, // TemplateParams
-      UniqueIdentifier.empty() ? nullptr
-                               : MDString::get(VMContext, UniqueIdentifier)};
-  DICompositeType RetTy(MDNode::getTemporary(VMContext, Elts));
-  assert(RetTy.isCompositeType() &&
-         "createReplaceableForwardDecl result should be a DIType");
+    unsigned Flags, StringRef UniqueIdentifier) {
+  auto *RetTy = DICompositeType::getTemporary(
+                    VMContext, Tag, Name, F, Line,
+                    DIScopeRef::get(getNonCompileUnitScope(Scope)), nullptr,
+                    SizeInBits, AlignInBits, 0, Flags, nullptr, RuntimeLang,
+                    nullptr, nullptr, UniqueIdentifier)
+                    .release();
   if (!UniqueIdentifier.empty())
     retainType(RetTy);
+  trackIfUnresolved(RetTy);
   return RetTy;
 }
 
-DIArray DIBuilder::getOrCreateArray(ArrayRef<Metadata *> Elements) {
-  return DIArray(MDNode::get(VMContext, Elements));
+DINodeArray DIBuilder::getOrCreateArray(ArrayRef<Metadata *> Elements) {
+  return MDTuple::get(VMContext, Elements);
 }
 
-DITypeArray DIBuilder::getOrCreateTypeArray(ArrayRef<Metadata *> Elements) {
+DITypeRefArray DIBuilder::getOrCreateTypeArray(ArrayRef<Metadata *> Elements) {
   SmallVector<llvm::Metadata *, 16> Elts;
   for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
     if (Elements[i] && isa<MDNode>(Elements[i]))
-      Elts.push_back(DIType(cast<MDNode>(Elements[i])).getRef());
+      Elts.push_back(DITypeRef::get(cast<DIType>(Elements[i])));
     else
       Elts.push_back(Elements[i]);
   }
-  return DITypeArray(MDNode::get(VMContext, Elts));
+  return DITypeRefArray(MDNode::get(VMContext, Elts));
 }
 
-DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_subrange_type)
-                          .concat(Lo)
-                          .concat(Count)
-                          .get(VMContext)};
-
-  return DISubrange(MDNode::get(VMContext, Elts));
+DISubrange *DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
+  return DISubrange::get(VMContext, Count, Lo);
 }
 
-static DIGlobalVariable createGlobalVariableHelper(
-    LLVMContext &VMContext, DIDescriptor Context, StringRef Name,
-    StringRef LinkageName, DIFile F, unsigned LineNumber, DITypeRef Ty,
-    bool isLocalToUnit, Constant *Val, MDNode *Decl, bool isDefinition,
-    std::function<MDNode *(ArrayRef<Metadata *>)> CreateFunc) {
-
-  MDNode *TheCtx = getNonCompileUnitScope(Context);
-  if (DIScope(TheCtx).isCompositeType()) {
-    assert(!DICompositeType(TheCtx).getIdentifier() &&
+static void checkGlobalVariableScope(DIScope *Context) {
+#ifndef NDEBUG
+  if (auto *CT =
+          dyn_cast_or_null<DICompositeType>(getNonCompileUnitScope(Context)))
+    assert(CT->getIdentifier().empty() &&
            "Context of a global variable should not be a type with identifier");
-  }
+#endif
+}
 
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_variable)
-                          .concat(Name)
-                          .concat(Name)
-                          .concat(LinkageName)
-                          .concat(LineNumber)
-                          .concat(isLocalToUnit)
-                          .concat(isDefinition)
-                          .get(VMContext),
-                      TheCtx, F, Ty, getConstantOrNull(Val),
-                      DIDescriptor(Decl)};
+DIGlobalVariable *DIBuilder::createGlobalVariable(
+    DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
+    unsigned LineNumber, DIType *Ty, bool isLocalToUnit, Constant *Val,
+    MDNode *Decl) {
+  checkGlobalVariableScope(Context);
 
-  return DIGlobalVariable(CreateFunc(Elts));
+  auto *N = DIGlobalVariable::get(VMContext, cast_or_null<DIScope>(Context),
+                                  Name, LinkageName, F, LineNumber,
+                                  DITypeRef::get(Ty), isLocalToUnit, true, Val,
+                                  cast_or_null<DIDerivedType>(Decl));
+  AllGVs.push_back(N);
+  return N;
 }
 
-DIGlobalVariable DIBuilder::createGlobalVariable(
-    DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F,
-    unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val,
+DIGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl(
+    DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
+    unsigned LineNumber, DIType *Ty, bool isLocalToUnit, Constant *Val,
     MDNode *Decl) {
-  return createGlobalVariableHelper(
-      VMContext, Context, Name, LinkageName, F, LineNumber, Ty, isLocalToUnit,
-      Val, Decl, true, [&](ArrayRef<Metadata *> Elts) -> MDNode *{
-        MDNode *Node = MDNode::get(VMContext, Elts);
-        AllGVs.push_back(Node);
-        return Node;
-      });
-}
-
-DIGlobalVariable DIBuilder::createTempGlobalVariableFwdDecl(
-    DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F,
-    unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val,
-    MDNode *Decl) {
-  return createGlobalVariableHelper(VMContext, Context, Name, LinkageName, F,
-                                    LineNumber, Ty, isLocalToUnit, Val, Decl,
-                                    false, [&](ArrayRef<Metadata *> Elts) {
-    return MDNode::getTemporary(VMContext, Elts);
-  });
-}
-
-DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
-                                          StringRef Name, DIFile File,
-                                          unsigned LineNo, DITypeRef Ty,
-                                          bool AlwaysPreserve, unsigned Flags,
-                                          unsigned ArgNo) {
-  DIDescriptor Context(getNonCompileUnitScope(Scope));
-  assert((!Context || Context.isScope()) &&
-         "createLocalVariable should be called with a valid Context");
-  Metadata *Elts[] = {HeaderBuilder::get(Tag)
-                          .concat(Name)
-                          .concat(LineNo | (ArgNo << 24))
-                          .concat(Flags)
-                          .get(VMContext),
-                      getNonCompileUnitScope(Scope), File, Ty};
-  MDNode *Node = MDNode::get(VMContext, Elts);
+  checkGlobalVariableScope(Context);
+
+  return DIGlobalVariable::getTemporary(
+             VMContext, cast_or_null<DIScope>(Context), Name, LinkageName, F,
+             LineNumber, DITypeRef::get(Ty), isLocalToUnit, false, Val,
+             cast_or_null<DIDerivedType>(Decl))
+      .release();
+}
+
+DILocalVariable *DIBuilder::createLocalVariable(
+    unsigned Tag, DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo,
+    DIType *Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) {
+  // FIXME: Why getNonCompileUnitScope()?
+  // FIXME: Why is "!Context" okay here?
+  // FIXME: WHy doesn't this check for a subprogram or lexical block (AFAICT
+  // the only valid scopes)?
+  DIScope *Context = getNonCompileUnitScope(Scope);
+
+  auto *Node = DILocalVariable::get(
+      VMContext, Tag, cast_or_null<DILocalScope>(Context), Name, File, LineNo,
+      DITypeRef::get(Ty), ArgNo, Flags);
   if (AlwaysPreserve) {
     // The optimizer may remove local variable. If there is an interest
     // to preserve variable info in such situation then stash it in a
     // named mdnode.
-    DISubprogram Fn(getDISubprogram(Scope));
+    DISubprogram *Fn = getDISubprogram(Scope);
     assert(Fn && "Missing subprogram for local variable");
     PreservedVariables[Fn].emplace_back(Node);
   }
-  DIVariable RetVar(Node);
-  assert(RetVar.isVariable() &&
-         "createLocalVariable should return a valid DIVariable");
-  return RetVar;
+  return Node;
 }
 
-DIExpression DIBuilder::createExpression(ArrayRef<int64_t> Addr) {
-  auto Header = HeaderBuilder::get(DW_TAG_expression);
-  for (int64_t I : Addr)
-    Header.concat(I);
-  Metadata *Elts[] = {Header.get(VMContext)};
-  return DIExpression(MDNode::get(VMContext, Elts));
+DIExpression *DIBuilder::createExpression(ArrayRef<uint64_t> Addr) {
+  return DIExpression::get(VMContext, Addr);
 }
 
-DIExpression DIBuilder::createPieceExpression(unsigned OffsetInBytes,
-                                              unsigned SizeInBytes) {
-  int64_t Addr[] = {dwarf::DW_OP_piece, OffsetInBytes, SizeInBytes};
+DIExpression *DIBuilder::createExpression(ArrayRef<int64_t> Signed) {
+  // TODO: Remove the callers of this signed version and delete.
+  SmallVector<uint64_t, 8> Addr(Signed.begin(), Signed.end());
   return createExpression(Addr);
 }
 
-DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
-                                       StringRef LinkageName, DIFile File,
-                                       unsigned LineNo, DICompositeType Ty,
-                                       bool isLocalToUnit, bool isDefinition,
-                                       unsigned ScopeLine, unsigned Flags,
-                                       bool isOptimized, Function *Fn,
-                                       MDNode *TParams, MDNode *Decl) {
+DIExpression *DIBuilder::createBitPieceExpression(unsigned OffsetInBytes,
+                                                  unsigned SizeInBytes) {
+  uint64_t Addr[] = {dwarf::DW_OP_bit_piece, OffsetInBytes, SizeInBytes};
+  return DIExpression::get(VMContext, Addr);
+}
+
+DISubprogram *DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
+                                        StringRef LinkageName, DIFile *File,
+                                        unsigned LineNo, DISubroutineType *Ty,
+                                        bool isLocalToUnit, bool isDefinition,
+                                        unsigned ScopeLine, unsigned Flags,
+                                        bool isOptimized, Function *Fn,
+                                        MDNode *TParams, MDNode *Decl) {
   // dragonegg does not generate identifier for types, so using an empty map
   // to resolve the context should be fine.
   DITypeIdentifierMap EmptyMap;
@@ -970,161 +636,84 @@ DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
                         Flags, isOptimized, Fn, TParams, Decl);
 }
 
-static DISubprogram createFunctionHelper(
-    LLVMContext &VMContext, DIDescriptor Context, StringRef Name,
-    StringRef LinkageName, DIFile File, unsigned LineNo, DICompositeType Ty,
-    bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, unsigned Flags,
-    bool isOptimized, Function *Fn, MDNode *TParams, MDNode *Decl, MDNode *Vars,
-    std::function<MDNode *(ArrayRef<Metadata *>)> CreateFunc) {
-  assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type &&
+DISubprogram *DIBuilder::createFunction(DIScope *Context, StringRef Name,
+                                        StringRef LinkageName, DIFile *File,
+                                        unsigned LineNo, DISubroutineType *Ty,
+                                        bool isLocalToUnit, bool isDefinition,
+                                        unsigned ScopeLine, unsigned Flags,
+                                        bool isOptimized, Function *Fn,
+                                        MDNode *TParams, MDNode *Decl) {
+  assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type &&
          "function types should be subroutines");
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_subprogram)
-                          .concat(Name)
-                          .concat(Name)
-                          .concat(LinkageName)
-                          .concat(LineNo)
-                          .concat(isLocalToUnit)
-                          .concat(isDefinition)
-                          .concat(0)
-                          .concat(0)
-                          .concat(Flags)
-                          .concat(isOptimized)
-                          .concat(ScopeLine)
-                          .get(VMContext),
-                      File.getFileNode(),
-                      DIScope(getNonCompileUnitScope(Context)).getRef(), Ty,
-                      nullptr, getConstantOrNull(Fn), TParams, Decl, Vars};
-
-  DISubprogram S(CreateFunc(Elts));
-  assert(S.isSubprogram() &&
-         "createFunction should return a valid DISubprogram");
-  return S;
-}
-
-
-DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name,
-                                       StringRef LinkageName, DIFile File,
-                                       unsigned LineNo, DICompositeType Ty,
-                                       bool isLocalToUnit, bool isDefinition,
-                                       unsigned ScopeLine, unsigned Flags,
-                                       bool isOptimized, Function *Fn,
-                                       MDNode *TParams, MDNode *Decl) {
-  return createFunctionHelper(VMContext, Context, Name, LinkageName, File,
-                              LineNo, Ty, isLocalToUnit, isDefinition,
-                              ScopeLine, Flags, isOptimized, Fn, TParams, Decl,
-                              MDNode::getTemporary(VMContext, None),
-                              [&](ArrayRef<Metadata *> Elts) -> MDNode *{
-    MDNode *Node = MDNode::get(VMContext, Elts);
-    // Create a named metadata so that we
-    // do not lose this mdnode.
-    if (isDefinition)
-      AllSubprograms.push_back(Node);
-    return Node;
-  });
-}
-
-DISubprogram
-DIBuilder::createTempFunctionFwdDecl(DIDescriptor Context, StringRef Name,
-                                     StringRef LinkageName, DIFile File,
-                                     unsigned LineNo, DICompositeType Ty,
-                                     bool isLocalToUnit, bool isDefinition,
-                                     unsigned ScopeLine, unsigned Flags,
-                                     bool isOptimized, Function *Fn,
-                                     MDNode *TParams, MDNode *Decl) {
-  return createFunctionHelper(VMContext, Context, Name, LinkageName, File,
-                              LineNo, Ty, isLocalToUnit, isDefinition,
-                              ScopeLine, Flags, isOptimized, Fn, TParams, Decl,
-                              nullptr, [&](ArrayRef<Metadata *> Elts) {
-    return MDNode::getTemporary(VMContext, Elts);
-  });
-}
-
-DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name,
-                                     StringRef LinkageName, DIFile F,
-                                     unsigned LineNo, DICompositeType Ty,
-                                     bool isLocalToUnit, bool isDefinition,
-                                     unsigned VK, unsigned VIndex,
-                                     DIType VTableHolder, unsigned Flags,
-                                     bool isOptimized, Function *Fn,
-                                     MDNode *TParam) {
-  assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type &&
+  auto *Node = DISubprogram::get(
+      VMContext, DIScopeRef::get(getNonCompileUnitScope(Context)), Name,
+      LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine,
+      nullptr, 0, 0, Flags, isOptimized, Fn, cast_or_null<MDTuple>(TParams),
+      cast_or_null<DISubprogram>(Decl),
+      MDTuple::getTemporary(VMContext, None).release());
+
+  if (isDefinition)
+    AllSubprograms.push_back(Node);
+  trackIfUnresolved(Node);
+  return Node;
+}
+
+DISubprogram *DIBuilder::createTempFunctionFwdDecl(
+    DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
+    unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
+    bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized,
+    Function *Fn, MDNode *TParams, MDNode *Decl) {
+  return DISubprogram::getTemporary(
+             VMContext, DIScopeRef::get(getNonCompileUnitScope(Context)), Name,
+             LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition,
+             ScopeLine, nullptr, 0, 0, Flags, isOptimized, Fn,
+             cast_or_null<MDTuple>(TParams), cast_or_null<DISubprogram>(Decl),
+             nullptr)
+      .release();
+}
+
+DISubprogram *
+DIBuilder::createMethod(DIScope *Context, StringRef Name, StringRef LinkageName,
+                        DIFile *F, unsigned LineNo, DISubroutineType *Ty,
+                        bool isLocalToUnit, bool isDefinition, unsigned VK,
+                        unsigned VIndex, DIType *VTableHolder, unsigned Flags,
+                        bool isOptimized, Function *Fn, MDNode *TParam) {
+  assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type &&
          "function types should be subroutines");
   assert(getNonCompileUnitScope(Context) &&
          "Methods should have both a Context and a context that isn't "
          "the compile unit.");
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_subprogram)
-                          .concat(Name)
-                          .concat(Name)
-                          .concat(LinkageName)
-                          .concat(LineNo)
-                          .concat(isLocalToUnit)
-                          .concat(isDefinition)
-                          .concat(VK)
-                          .concat(VIndex)
-                          .concat(Flags)
-                          .concat(isOptimized)
-                          .concat(LineNo)
-                          // FIXME: Do we want to use different scope/lines?
-                          .get(VMContext),
-                      F.getFileNode(), DIScope(Context).getRef(), Ty,
-                      VTableHolder.getRef(), getConstantOrNull(Fn), TParam,
-                      nullptr, nullptr};
-  MDNode *Node = MDNode::get(VMContext, Elts);
+  // FIXME: Do we want to use different scope/lines?
+  auto *SP = DISubprogram::get(
+      VMContext, DIScopeRef::get(cast<DIScope>(Context)), Name, LinkageName, F,
+      LineNo, Ty, isLocalToUnit, isDefinition, LineNo,
+      DITypeRef::get(VTableHolder), VK, VIndex, Flags, isOptimized, Fn,
+      cast_or_null<MDTuple>(TParam), nullptr, nullptr);
+
   if (isDefinition)
-    AllSubprograms.push_back(Node);
-  DISubprogram S(Node);
-  assert(S.isSubprogram() && "createMethod should return a valid DISubprogram");
-  return S;
-}
-
-DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
-                                       DIFile File, unsigned LineNo) {
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_namespace)
-                          .concat(Name)
-                          .concat(LineNo)
-                          .get(VMContext),
-                      File.getFileNode(), getNonCompileUnitScope(Scope)};
-  DINameSpace R(MDNode::get(VMContext, Elts));
-  assert(R.Verify() &&
-         "createNameSpace should return a verifiable DINameSpace");
-  return R;
+    AllSubprograms.push_back(SP);
+  trackIfUnresolved(SP);
+  return SP;
 }
 
-DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
-                                                     DIFile File,
-                                                     unsigned Discriminator) {
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_lexical_block)
-                          .concat(Discriminator)
-                          .get(VMContext),
-                      File.getFileNode(), Scope};
-  DILexicalBlockFile R(MDNode::get(VMContext, Elts));
-  assert(
-      R.Verify() &&
-      "createLexicalBlockFile should return a verifiable DILexicalBlockFile");
-  return R;
+DINamespace *DIBuilder::createNameSpace(DIScope *Scope, StringRef Name,
+                                        DIFile *File, unsigned LineNo) {
+  return DINamespace::get(VMContext, getNonCompileUnitScope(Scope), File, Name,
+                          LineNo);
 }
 
-DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
-                                             unsigned Line, unsigned Col) {
-  // FIXME: This isn't thread safe nor the right way to defeat MDNode uniquing.
-  // I believe the right way is to have a self-referential element in the node.
-  // Also: why do we bother with line/column - they're not used and the
-  // documentation (SourceLevelDebugging.rst) claims the line/col are necessary
-  // for uniquing, yet then we have this other solution (because line/col were
-  // inadequate) anyway. Remove all 3 and replace them with a self-reference.
-
-  // Defeat MDNode uniquing for lexical blocks by using unique id.
-  static unsigned int unique_id = 0;
-  Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_lexical_block)
-                          .concat(Line)
-                          .concat(Col)
-                          .concat(unique_id++)
-                          .get(VMContext),
-                      File.getFileNode(), getNonCompileUnitScope(Scope)};
-  DILexicalBlock R(MDNode::get(VMContext, Elts));
-  assert(R.Verify() &&
-         "createLexicalBlock should return a verifiable DILexicalBlock");
-  return R;
+DILexicalBlockFile *DIBuilder::createLexicalBlockFile(DIScope *Scope,
+                                                      DIFile *File,
+                                                      unsigned Discriminator) {
+  return DILexicalBlockFile::get(VMContext, Scope, File, Discriminator);
+}
+
+DILexicalBlock *DIBuilder::createLexicalBlock(DIScope *Scope, DIFile *File,
+                                              unsigned Line, unsigned Col) {
+  // Make these distinct, to avoid merging two lexical blocks on the same
+  // file/line/column.
+  return DILexicalBlock::getDistinct(VMContext, getNonCompileUnitScope(Scope),
+                                     File, Line, Col);
 }
 
 static Value *getDbgIntrinsicValueImpl(LLVMContext &VMContext, Value *V) {
@@ -1132,11 +721,19 @@ static Value *getDbgIntrinsicValueImpl(LLVMContext &VMContext, Value *V) {
   return MetadataAsValue::get(VMContext, ValueAsMetadata::get(V));
 }
 
-Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
-                                      DIExpression Expr,
+static Instruction *withDebugLoc(Instruction *I, const DILocation *DL) {
+  I->setDebugLoc(const_cast<DILocation *>(DL));
+  return I;
+}
+
+Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
+                                      DIExpression *Expr, const DILocation *DL,
                                       Instruction *InsertBefore) {
-  assert(VarInfo.isVariable() &&
-         "empty or invalid DIVariable passed to dbg.declare");
+  assert(VarInfo && "empty or invalid DILocalVariable* passed to dbg.declare");
+  assert(DL && "Expected debug loc");
+  assert(DL->getScope()->getSubprogram() ==
+             VarInfo->getScope()->getSubprogram() &&
+         "Expected matching subprograms");
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
@@ -1145,14 +742,17 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
   Value *Args[] = {getDbgIntrinsicValueImpl(VMContext, Storage),
                    MetadataAsValue::get(VMContext, VarInfo),
                    MetadataAsValue::get(VMContext, Expr)};
-  return CallInst::Create(DeclareFn, Args, "", InsertBefore);
+  return withDebugLoc(CallInst::Create(DeclareFn, Args, "", InsertBefore), DL);
 }
 
-Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
-                                      DIExpression Expr,
+Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
+                                      DIExpression *Expr, const DILocation *DL,
                                       BasicBlock *InsertAtEnd) {
-  assert(VarInfo.isVariable() &&
-         "empty or invalid DIVariable passed to dbg.declare");
+  assert(VarInfo && "empty or invalid DILocalVariable* passed to dbg.declare");
+  assert(DL && "Expected debug loc");
+  assert(DL->getScope()->getSubprogram() ==
+             VarInfo->getScope()->getSubprogram() &&
+         "Expected matching subprograms");
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
@@ -1165,18 +765,21 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
   // If this block already has a terminator then insert this intrinsic
   // before the terminator.
   if (TerminatorInst *T = InsertAtEnd->getTerminator())
-    return CallInst::Create(DeclareFn, Args, "", T);
-  else
-    return CallInst::Create(DeclareFn, Args, "", InsertAtEnd);
+    return withDebugLoc(CallInst::Create(DeclareFn, Args, "", T), DL);
+  return withDebugLoc(CallInst::Create(DeclareFn, Args, "", InsertAtEnd), DL);
 }
 
 Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
-                                                DIVariable VarInfo,
-                                                DIExpression Expr,
+                                                DILocalVariable *VarInfo,
+                                                DIExpression *Expr,
+                                                const DILocation *DL,
                                                 Instruction *InsertBefore) {
   assert(V && "no value passed to dbg.value");
-  assert(VarInfo.isVariable() &&
-         "empty or invalid DIVariable passed to dbg.value");
+  assert(VarInfo && "empty or invalid DILocalVariable* passed to dbg.value");
+  assert(DL && "Expected debug loc");
+  assert(DL->getScope()->getSubprogram() ==
+             VarInfo->getScope()->getSubprogram() &&
+         "Expected matching subprograms");
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
@@ -1186,16 +789,20 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
                    ConstantInt::get(Type::getInt64Ty(VMContext), Offset),
                    MetadataAsValue::get(VMContext, VarInfo),
                    MetadataAsValue::get(VMContext, Expr)};
-  return CallInst::Create(ValueFn, Args, "", InsertBefore);
+  return withDebugLoc(CallInst::Create(ValueFn, Args, "", InsertBefore), DL);
 }
 
 Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
-                                                DIVariable VarInfo,
-                                                DIExpression Expr,
+                                                DILocalVariable *VarInfo,
+                                                DIExpression *Expr,
+                                                const DILocation *DL,
                                                 BasicBlock *InsertAtEnd) {
   assert(V && "no value passed to dbg.value");
-  assert(VarInfo.isVariable() &&
-         "empty or invalid DIVariable passed to dbg.value");
+  assert(VarInfo && "empty or invalid DILocalVariable* passed to dbg.value");
+  assert(DL && "Expected debug loc");
+  assert(DL->getScope()->getSubprogram() ==
+             VarInfo->getScope()->getSubprogram() &&
+         "Expected matching subprograms");
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
@@ -1205,27 +812,40 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
                    ConstantInt::get(Type::getInt64Ty(VMContext), Offset),
                    MetadataAsValue::get(VMContext, VarInfo),
                    MetadataAsValue::get(VMContext, Expr)};
-  return CallInst::Create(ValueFn, Args, "", InsertAtEnd);
+
+  return withDebugLoc(CallInst::Create(ValueFn, Args, "", InsertAtEnd), DL);
 }
 
-void DIBuilder::replaceVTableHolder(DICompositeType &T, DICompositeType VTableHolder) {
-  T.setContainingType(VTableHolder);
+void DIBuilder::replaceVTableHolder(DICompositeType *&T,
+                                    DICompositeType *VTableHolder) {
+  {
+    TypedTrackingMDRef<DICompositeType> N(T);
+    N->replaceVTableHolder(DITypeRef::get(VTableHolder));
+    T = N.get();
+  }
 
   // If this didn't create a self-reference, just return.
   if (T != VTableHolder)
     return;
 
-  // Look for unresolved operands.  T has dropped RAUW support and is already
-  // marked resolved, orphaning any cycles underneath it.
-  assert(T->isResolved() && "Expected self-reference to be resolved");
-  for (const MDOperand &O : T->operands())
-    if (auto *N = dyn_cast_or_null<MDNode>(O))
-      trackIfUnresolved(N);
-}
-
-void DIBuilder::replaceArrays(DICompositeType &T, DIArray Elements,
-                              DIArray TParams) {
-  T.setArrays(Elements, TParams);
+  // Look for unresolved operands.  T will drop RAUW support, orphaning any
+  // cycles underneath it.
+  if (T->isResolved())
+    for (const MDOperand &O : T->operands())
+      if (auto *N = dyn_cast_or_null<MDNode>(O))
+        trackIfUnresolved(N);
+}
+
+void DIBuilder::replaceArrays(DICompositeType *&T, DINodeArray Elements,
+                              DINodeArray TParams) {
+  {
+    TypedTrackingMDRef<DICompositeType> N(T);
+    if (Elements)
+      N->replaceElements(Elements);
+    if (TParams)
+      N->replaceTemplateParams(DITemplateParameterArray(TParams));
+    T = N.get();
+  }
 
   // If T isn't resolved, there's no problem.
   if (!T->isResolved())
@@ -1235,7 +855,7 @@ void DIBuilder::replaceArrays(DICompositeType &T, DIArray Elements,
   // arrays explicitly if they're unresolved, or else the cycles will be
   // orphaned.
   if (Elements)
-    trackIfUnresolved(Elements);
+    trackIfUnresolved(Elements.get());
   if (TParams)
-    trackIfUnresolved(TParams);
+    trackIfUnresolved(TParams.get());
 }
diff --git a/contrib/llvm/lib/IR/DataLayout.cpp b/contrib/llvm/lib/IR/DataLayout.cpp
index cde3937..4d867ef 100644
--- a/contrib/llvm/lib/IR/DataLayout.cpp
+++ b/contrib/llvm/lib/IR/DataLayout.cpp
@@ -33,11 +33,6 @@
 #include <cstdlib>
 using namespace llvm;
 
-// Handle the Pass registration stuff necessary to use DataLayout's.
-
-INITIALIZE_PASS(DataLayoutPass, "datalayout", "Data Layout", false, true)
-char DataLayoutPass::ID = 0;
-
 //===----------------------------------------------------------------------===//
 // Support for StructLayout
 //===----------------------------------------------------------------------===//
@@ -155,8 +150,8 @@ DataLayout::InvalidPointerElem = { 0U, 0U, 0U, ~0U };
 const char *DataLayout::getManglingComponent(const Triple &T) {
   if (T.isOSBinFormatMachO())
     return "-m:o";
-  if (T.isOSWindows() && T.getArch() == Triple::x86 && T.isOSBinFormatCOFF())
-    return "-m:w";
+  if (T.isOSWindows() && T.isOSBinFormatCOFF())
+    return T.getArch() == Triple::x86 ? "-m:x" : "-m:w";
   return "-m:e";
 }
 
@@ -221,6 +216,7 @@ static unsigned inBytes(unsigned Bits) {
 }
 
 void DataLayout::parseSpecifier(StringRef Desc) {
+  StringRepresentation = Desc;
   while (!Desc.empty()) {
     // Split at '-'.
     std::pair<StringRef, StringRef> Split = split(Desc, '-');
@@ -259,6 +255,8 @@ void DataLayout::parseSpecifier(StringRef Desc) {
             "Missing size specification for pointer in datalayout string");
       Split = split(Rest, ':');
       unsigned PointerMemSize = inBytes(getInt(Tok));
+      if (!PointerMemSize)
+        report_fatal_error("Invalid pointer size of 0 bytes");
 
       // ABI alignment.
       if (Rest.empty())
@@ -266,12 +264,18 @@ void DataLayout::parseSpecifier(StringRef Desc) {
             "Missing alignment specification for pointer in datalayout string");
       Split = split(Rest, ':');
       unsigned PointerABIAlign = inBytes(getInt(Tok));
+      if (!isPowerOf2_64(PointerABIAlign))
+        report_fatal_error(
+            "Pointer ABI alignment must be a power of 2");
 
       // Preferred alignment.
       unsigned PointerPrefAlign = PointerABIAlign;
       if (!Rest.empty()) {
         Split = split(Rest, ':');
         PointerPrefAlign = inBytes(getInt(Tok));
+        if (!isPowerOf2_64(PointerPrefAlign))
+          report_fatal_error(
+            "Pointer preferred alignment must be a power of 2");
       }
 
       setPointerAlignment(AddrSpace, PointerABIAlign, PointerPrefAlign,
@@ -304,6 +308,9 @@ void DataLayout::parseSpecifier(StringRef Desc) {
             "Missing alignment specification in datalayout string");
       Split = split(Rest, ':');
       unsigned ABIAlign = inBytes(getInt(Tok));
+      if (AlignType != AGGREGATE_ALIGN && !ABIAlign)
+        report_fatal_error(
+            "ABI alignment specification must be >0 for non-aggregate types");
 
       // Preferred alignment.
       unsigned PrefAlign = ABIAlign;
@@ -352,7 +359,10 @@ void DataLayout::parseSpecifier(StringRef Desc) {
         ManglingMode = MM_Mips;
         break;
       case 'w':
-        ManglingMode = MM_WINCOFF;
+        ManglingMode = MM_WinCOFF;
+        break;
+      case 'x':
+        ManglingMode = MM_WinCOFFX86;
         break;
       }
       break;
@@ -367,13 +377,7 @@ DataLayout::DataLayout(const Module *M) : LayoutMap(nullptr) {
   init(M);
 }
 
-void DataLayout::init(const Module *M) {
-  const DataLayout *Other = M->getDataLayout();
-  if (Other)
-    *this = *Other;
-  else
-    reset("");
-}
+void DataLayout::init(const Module *M) { *this = M->getDataLayout(); }
 
 bool DataLayout::operator==(const DataLayout &Other) const {
   bool Ret = BigEndian == Other.BigEndian &&
@@ -381,16 +385,28 @@ bool DataLayout::operator==(const DataLayout &Other) const {
              ManglingMode == Other.ManglingMode &&
              LegalIntWidths == Other.LegalIntWidths &&
              Alignments == Other.Alignments && Pointers == Other.Pointers;
-  assert(Ret == (getStringRepresentation() == Other.getStringRepresentation()));
+  // Note: getStringRepresentation() might differs, it is not canonicalized
   return Ret;
 }
 
 void
 DataLayout::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
                          unsigned pref_align, uint32_t bit_width) {
-  assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
-  assert(pref_align < (1 << 16) && "Alignment doesn't fit in bitfield");
-  assert(bit_width < (1 << 24) && "Bit width doesn't fit in bitfield");
+  if (!isUInt<24>(bit_width))
+    report_fatal_error("Invalid bit width, must be a 24bit integer");
+  if (!isUInt<16>(abi_align))
+    report_fatal_error("Invalid ABI alignment, must be a 16bit integer");
+  if (!isUInt<16>(pref_align))
+    report_fatal_error("Invalid preferred alignment, must be a 16bit integer");
+  if (abi_align != 0 && !isPowerOf2_64(abi_align))
+    report_fatal_error("Invalid ABI alignment, must be a power of 2");
+  if (pref_align != 0 && !isPowerOf2_64(pref_align))
+    report_fatal_error("Invalid preferred alignment, must be a power of 2");
+
+  if (pref_align < abi_align)
+    report_fatal_error(
+        "Preferred alignment cannot be less than the ABI alignment");
+
   for (LayoutAlignElem &Elem : Alignments) {
     if (Elem.AlignType == (unsigned)align_type &&
         Elem.TypeBitWidth == bit_width) {
@@ -416,7 +432,10 @@ DataLayout::findPointerLowerBound(uint32_t AddressSpace) {
 void DataLayout::setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign,
                                      unsigned PrefAlign,
                                      uint32_t TypeByteWidth) {
-  assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!");
+  if (PrefAlign < ABIAlign)
+    report_fatal_error(
+        "Preferred alignment cannot be less than the ABI alignment");
+
   PointersTy::iterator I = findPointerLowerBound(AddrSpace);
   if (I == Pointers.end() || I->AddressSpace != AddrSpace) {
     Pointers.insert(I, PointerAlignElem::get(AddrSpace, ABIAlign, PrefAlign,
@@ -463,9 +482,7 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
     // If we didn't find an integer alignment, fall back on most conservative.
     if (AlignType == INTEGER_ALIGN) {
       BestMatchIdx = LargestInt;
-    } else {
-      assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
-
+    } else if (AlignType == VECTOR_ALIGN) {
       // By default, use natural alignment for vector types. This is consistent
       // with what clang and llvm-gcc do.
       unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
@@ -478,6 +495,19 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
     }
   }
 
+  // If we still couldn't find a reasonable default alignment, fall back
+  // to a simple heuristic that the alignment is the first power of two
+  // greater-or-equal to the store size of the type.  This is a reasonable
+  // approximation of reality, and if the user wanted something less
+  // less conservative, they should have specified it explicitly in the data
+  // layout.
+  if (BestMatchIdx == -1) {
+    unsigned Align = getTypeStoreSize(Ty);
+    if (Align & (Align-1))
+      Align = NextPowerOf2(Align);
+    return Align;
+  }
+
   // Since we got a "best match" index, just return it.
   return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
                  : Alignments[BestMatchIdx].PrefAlign;
@@ -541,68 +571,6 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
   return L;
 }
 
-std::string DataLayout::getStringRepresentation() const {
-  std::string Result;
-  raw_string_ostream OS(Result);
-
-  OS << (BigEndian ? "E" : "e");
-
-  switch (ManglingMode) {
-  case MM_None:
-    break;
-  case MM_ELF:
-    OS << "-m:e";
-    break;
-  case MM_MachO:
-    OS << "-m:o";
-    break;
-  case MM_WINCOFF:
-    OS << "-m:w";
-    break;
-  case MM_Mips:
-    OS << "-m:m";
-    break;
-  }
-
-  for (const PointerAlignElem &PI : Pointers) {
-    // Skip default.
-    if (PI.AddressSpace == 0 && PI.ABIAlign == 8 && PI.PrefAlign == 8 &&
-        PI.TypeByteWidth == 8)
-      continue;
-
-    OS << "-p";
-    if (PI.AddressSpace) {
-      OS << PI.AddressSpace;
-    }
-    OS << ":" << PI.TypeByteWidth*8 << ':' << PI.ABIAlign*8;
-    if (PI.PrefAlign != PI.ABIAlign)
-      OS << ':' << PI.PrefAlign*8;
-  }
-
-  for (const LayoutAlignElem &AI : Alignments) {
-    if (std::find(std::begin(DefaultAlignments), std::end(DefaultAlignments),
-                  AI) != std::end(DefaultAlignments))
-      continue;
-    OS << '-' << (char)AI.AlignType;
-    if (AI.TypeBitWidth)
-      OS << AI.TypeBitWidth;
-    OS << ':' << AI.ABIAlign*8;
-    if (AI.ABIAlign != AI.PrefAlign)
-      OS << ':' << AI.PrefAlign*8;
-  }
-
-  if (!LegalIntWidths.empty()) {
-    OS << "-n" << (unsigned)LegalIntWidths[0];
-
-    for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
-      OS << ':' << (unsigned)LegalIntWidths[i];
-  }
-
-  if (StackNaturalAlign)
-    OS << "-S" << StackNaturalAlign*8;
-
-  return OS.str();
-}
 
 unsigned DataLayout::getPointerABIAlignment(unsigned AS) const {
   PointersTy::const_iterator I = findPointerLowerBound(AS);
@@ -818,18 +786,3 @@ unsigned DataLayout::getPreferredAlignmentLog(const GlobalVariable *GV) const {
   return Log2_32(getPreferredAlignment(GV));
 }
 
-DataLayoutPass::DataLayoutPass() : ImmutablePass(ID), DL("") {
-  initializeDataLayoutPassPass(*PassRegistry::getPassRegistry());
-}
-
-DataLayoutPass::~DataLayoutPass() {}
-
-bool DataLayoutPass::doInitialization(Module &M) {
-  DL.init(&M);
-  return false;
-}
-
-bool DataLayoutPass::doFinalization(Module &M) {
-  DL.reset("");
-  return false;
-}
diff --git a/contrib/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm/lib/IR/DebugInfo.cpp
index 71b4394..2e161c2 100644
--- a/contrib/llvm/lib/IR/DebugInfo.cpp
+++ b/contrib/llvm/lib/IR/DebugInfo.cpp
@@ -24,6 +24,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
@@ -32,914 +33,62 @@
 using namespace llvm;
 using namespace llvm::dwarf;
 
-//===----------------------------------------------------------------------===//
-// DIDescriptor
-//===----------------------------------------------------------------------===//
-
-bool DIDescriptor::Verify() const {
-  return DbgNode &&
-         (DIDerivedType(DbgNode).Verify() ||
-          DICompositeType(DbgNode).Verify() || DIBasicType(DbgNode).Verify() ||
-          DIVariable(DbgNode).Verify() || DISubprogram(DbgNode).Verify() ||
-          DIGlobalVariable(DbgNode).Verify() || DIFile(DbgNode).Verify() ||
-          DICompileUnit(DbgNode).Verify() || DINameSpace(DbgNode).Verify() ||
-          DILexicalBlock(DbgNode).Verify() ||
-          DILexicalBlockFile(DbgNode).Verify() ||
-          DISubrange(DbgNode).Verify() || DIEnumerator(DbgNode).Verify() ||
-          DIObjCProperty(DbgNode).Verify() ||
-          DITemplateTypeParameter(DbgNode).Verify() ||
-          DITemplateValueParameter(DbgNode).Verify() ||
-          DIImportedEntity(DbgNode).Verify() || DIExpression(DbgNode).Verify());
-}
-
-static Metadata *getField(const MDNode *DbgNode, unsigned Elt) {
-  if (!DbgNode || Elt >= DbgNode->getNumOperands())
-    return nullptr;
-  return DbgNode->getOperand(Elt);
-}
-
-static MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
-  return dyn_cast_or_null<MDNode>(getField(DbgNode, Elt));
-}
-
-static StringRef getStringField(const MDNode *DbgNode, unsigned Elt) {
-  if (MDString *MDS = dyn_cast_or_null<MDString>(getField(DbgNode, Elt)))
-    return MDS->getString();
-  return StringRef();
-}
-
-StringRef DIDescriptor::getStringField(unsigned Elt) const {
-  return ::getStringField(DbgNode, Elt);
-}
-
-uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
-  if (auto *C = getConstantField(Elt))
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
-      return CI->getZExtValue();
-
-  return 0;
-}
-
-int64_t DIDescriptor::getInt64Field(unsigned Elt) const {
-  if (auto *C = getConstantField(Elt))
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
-      return CI->getZExtValue();
-
-  return 0;
-}
-
-DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
-  MDNode *Field = getNodeField(DbgNode, Elt);
-  return DIDescriptor(Field);
-}
-
-GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
-  return dyn_cast_or_null<GlobalVariable>(getConstantField(Elt));
-}
-
-Constant *DIDescriptor::getConstantField(unsigned Elt) const {
-  if (!DbgNode)
-    return nullptr;
-
-  if (Elt < DbgNode->getNumOperands())
-    if (auto *C =
-            dyn_cast_or_null<ConstantAsMetadata>(DbgNode->getOperand(Elt)))
-      return C->getValue();
+DISubprogram *llvm::getDISubprogram(const MDNode *Scope) {
+  if (auto *LocalScope = dyn_cast_or_null<DILocalScope>(Scope))
+    return LocalScope->getSubprogram();
   return nullptr;
 }
 
-Function *DIDescriptor::getFunctionField(unsigned Elt) const {
-  return dyn_cast_or_null<Function>(getConstantField(Elt));
-}
-
-void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) {
-  if (!DbgNode)
-    return;
-
-  if (Elt < DbgNode->getNumOperands()) {
-    MDNode *Node = const_cast<MDNode *>(DbgNode);
-    Node->replaceOperandWith(Elt, F ? ConstantAsMetadata::get(F) : nullptr);
-  }
-}
-
-static unsigned DIVariableInlinedAtIndex = 4;
-MDNode *DIVariable::getInlinedAt() const {
-  return getNodeField(DbgNode, DIVariableInlinedAtIndex);
-}
-
-/// \brief Return the size reported by the variable's type.
-unsigned DIVariable::getSizeInBits(const DITypeIdentifierMap &Map) {
-  DIType Ty = getType().resolve(Map);
-  // Follow derived types until we reach a type that
-  // reports back a size.
-  while (Ty.isDerivedType() && !Ty.getSizeInBits()) {
-    DIDerivedType DT(&*Ty);
-    Ty = DT.getTypeDerivedFrom().resolve(Map);
-  }
-  assert(Ty.getSizeInBits() && "type with size 0");
-  return Ty.getSizeInBits();
-}
-
-uint64_t DIExpression::getElement(unsigned Idx) const {
-  unsigned I = Idx + 1;
-  assert(I < getNumHeaderFields() &&
-         "non-existing complex address element requested");
-  return getHeaderFieldAs<int64_t>(I);
-}
-
-bool DIExpression::isVariablePiece() const {
-  return getNumElements() && getElement(0) == dwarf::DW_OP_piece;
-}
-
-uint64_t DIExpression::getPieceOffset() const {
-  assert(isVariablePiece());
-  return getElement(1);
-}
-
-uint64_t DIExpression::getPieceSize() const {
-  assert(isVariablePiece());
-  return getElement(2);
-}
-
-//===----------------------------------------------------------------------===//
-// Predicates
-//===----------------------------------------------------------------------===//
-
-bool DIDescriptor::isSubroutineType() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_subroutine_type;
-}
-
-bool DIDescriptor::isBasicType() const {
-  if (!DbgNode)
-    return false;
-  switch (getTag()) {
-  case dwarf::DW_TAG_base_type:
-  case dwarf::DW_TAG_unspecified_type:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool DIDescriptor::isDerivedType() const {
-  if (!DbgNode)
-    return false;
-  switch (getTag()) {
-  case dwarf::DW_TAG_typedef:
-  case dwarf::DW_TAG_pointer_type:
-  case dwarf::DW_TAG_ptr_to_member_type:
-  case dwarf::DW_TAG_reference_type:
-  case dwarf::DW_TAG_rvalue_reference_type:
-  case dwarf::DW_TAG_const_type:
-  case dwarf::DW_TAG_volatile_type:
-  case dwarf::DW_TAG_restrict_type:
-  case dwarf::DW_TAG_member:
-  case dwarf::DW_TAG_inheritance:
-  case dwarf::DW_TAG_friend:
-    return true;
-  default:
-    // CompositeTypes are currently modelled as DerivedTypes.
-    return isCompositeType();
-  }
-}
-
-bool DIDescriptor::isCompositeType() const {
-  if (!DbgNode)
-    return false;
-  switch (getTag()) {
-  case dwarf::DW_TAG_array_type:
-  case dwarf::DW_TAG_structure_type:
-  case dwarf::DW_TAG_union_type:
-  case dwarf::DW_TAG_enumeration_type:
-  case dwarf::DW_TAG_subroutine_type:
-  case dwarf::DW_TAG_class_type:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool DIDescriptor::isVariable() const {
-  if (!DbgNode)
-    return false;
-  switch (getTag()) {
-  case dwarf::DW_TAG_auto_variable:
-  case dwarf::DW_TAG_arg_variable:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool DIDescriptor::isType() const {
-  return isBasicType() || isCompositeType() || isDerivedType();
-}
-
-bool DIDescriptor::isSubprogram() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_subprogram;
-}
-
-bool DIDescriptor::isGlobalVariable() const {
-  return DbgNode && (getTag() == dwarf::DW_TAG_variable ||
-                     getTag() == dwarf::DW_TAG_constant);
-}
-
-bool DIDescriptor::isScope() const {
-  if (!DbgNode)
-    return false;
-  switch (getTag()) {
-  case dwarf::DW_TAG_compile_unit:
-  case dwarf::DW_TAG_lexical_block:
-  case dwarf::DW_TAG_subprogram:
-  case dwarf::DW_TAG_namespace:
-  case dwarf::DW_TAG_file_type:
-    return true;
-  default:
-    break;
-  }
-  return isType();
-}
-
-bool DIDescriptor::isTemplateTypeParameter() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter;
-}
-
-bool DIDescriptor::isTemplateValueParameter() const {
-  return DbgNode && (getTag() == dwarf::DW_TAG_template_value_parameter ||
-                     getTag() == dwarf::DW_TAG_GNU_template_template_param ||
-                     getTag() == dwarf::DW_TAG_GNU_template_parameter_pack);
-}
-
-bool DIDescriptor::isCompileUnit() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_compile_unit;
-}
-
-bool DIDescriptor::isFile() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_file_type;
-}
-
-bool DIDescriptor::isNameSpace() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_namespace;
-}
-
-bool DIDescriptor::isLexicalBlockFile() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
-         DbgNode->getNumOperands() == 3 && getNumHeaderFields() == 2;
-}
-
-bool DIDescriptor::isLexicalBlock() const {
-  // FIXME: There are always exactly 4 header fields in DILexicalBlock, but
-  // something relies on this returning true for DILexicalBlockFile.
-  return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
-         DbgNode->getNumOperands() == 3 &&
-         (getNumHeaderFields() == 2 || getNumHeaderFields() == 4);
-}
-
-bool DIDescriptor::isSubrange() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_subrange_type;
-}
-
-bool DIDescriptor::isEnumerator() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
-}
-
-bool DIDescriptor::isObjCProperty() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property;
-}
-
-bool DIDescriptor::isImportedEntity() const {
-  return DbgNode && (getTag() == dwarf::DW_TAG_imported_module ||
-                     getTag() == dwarf::DW_TAG_imported_declaration);
-}
-
-bool DIDescriptor::isExpression() const {
-  return DbgNode && (getTag() == dwarf::DW_TAG_expression);
-}
-
-//===----------------------------------------------------------------------===//
-// Simple Descriptor Constructors and other Methods
-//===----------------------------------------------------------------------===//
-
-void DIDescriptor::replaceAllUsesWith(LLVMContext &VMContext, DIDescriptor D) {
-
-  assert(DbgNode && "Trying to replace an unverified type!");
-
-  // Since we use a TrackingVH for the node, its easy for clients to manufacture
-  // legitimate situations where they want to replaceAllUsesWith() on something
-  // which, due to uniquing, has merged with the source. We shield clients from
-  // this detail by allowing a value to be replaced with replaceAllUsesWith()
-  // itself.
-  const MDNode *DN = D;
-  if (DbgNode == DN) {
-    SmallVector<Metadata *, 10> Ops(DbgNode->getNumOperands());
-    for (size_t i = 0; i != Ops.size(); ++i)
-      Ops[i] = DbgNode->getOperand(i);
-    DN = MDNode::get(VMContext, Ops);
-  }
-
-  auto *Node = cast<MDNodeFwdDecl>(const_cast<MDNode *>(DbgNode));
-  Node->replaceAllUsesWith(const_cast<MDNode *>(DN));
-  MDNode::deleteTemporary(Node);
-  DbgNode = DN;
-}
-
-void DIDescriptor::replaceAllUsesWith(MDNode *D) {
-  assert(DbgNode && "Trying to replace an unverified type!");
-  assert(DbgNode != D && "This replacement should always happen");
-  auto *Node = cast<MDNodeFwdDecl>(const_cast<MDNode *>(DbgNode));
-  Node->replaceAllUsesWith(D);
-  MDNode::deleteTemporary(Node);
-}
-
-bool DICompileUnit::Verify() const {
-  if (!isCompileUnit())
-    return false;
-
-  // Don't bother verifying the compilation directory or producer string
-  // as those could be empty.
-  if (getFilename().empty())
-    return false;
-
-  return DbgNode->getNumOperands() == 7 && getNumHeaderFields() == 8;
-}
-
-bool DIObjCProperty::Verify() const {
-  if (!isObjCProperty())
-    return false;
-
-  // Don't worry about the rest of the strings for now.
-  return DbgNode->getNumOperands() == 3 && getNumHeaderFields() == 6;
-}
-
-/// \brief Check if a field at position Elt of a MDNode is a MDNode.
-///
-/// We currently allow an empty string and an integer.
-/// But we don't allow a non-empty string in a MDNode field.
-static bool fieldIsMDNode(const MDNode *DbgNode, unsigned Elt) {
-  // FIXME: This function should return true, if the field is null or the field
-  // is indeed a MDNode: return !Fld || isa<MDNode>(Fld).
-  Metadata *Fld = getField(DbgNode, Elt);
-  if (Fld && isa<MDString>(Fld) && !cast<MDString>(Fld)->getString().empty())
-    return false;
-  return true;
-}
-
-/// \brief Check if a field at position Elt of a MDNode is a MDString.
-static bool fieldIsMDString(const MDNode *DbgNode, unsigned Elt) {
-  Metadata *Fld = getField(DbgNode, Elt);
-  return !Fld || isa<MDString>(Fld);
-}
-
-/// \brief Check if a value can be a reference to a type.
-static bool isTypeRef(const Metadata *MD) {
-  if (!MD)
-    return true;
-  if (auto *S = dyn_cast<MDString>(MD))
-    return !S->getString().empty();
-  if (auto *N = dyn_cast<MDNode>(MD))
-    return DIType(N).isType();
-  return false;
-}
-
-/// \brief Check if referenced field might be a type.
-static bool fieldIsTypeRef(const MDNode *DbgNode, unsigned Elt) {
-  return isTypeRef(dyn_cast_or_null<Metadata>(getField(DbgNode, Elt)));
-}
-
-/// \brief Check if a value can be a ScopeRef.
-static bool isScopeRef(const Metadata *MD) {
-  if (!MD)
-    return true;
-  if (auto *S = dyn_cast<MDString>(MD))
-    return !S->getString().empty();
-  return isa<MDNode>(MD);
-}
-
-/// \brief Check if a field at position Elt of a MDNode can be a ScopeRef.
-static bool fieldIsScopeRef(const MDNode *DbgNode, unsigned Elt) {
-  return isScopeRef(dyn_cast_or_null<Metadata>(getField(DbgNode, Elt)));
-}
-
-bool DIType::Verify() const {
-  if (!isType())
-    return false;
-  // Make sure Context @ field 2 is MDNode.
-  if (!fieldIsScopeRef(DbgNode, 2))
-    return false;
-
-  // FIXME: Sink this into the various subclass verifies.
-  uint16_t Tag = getTag();
-  if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
-      Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type &&
-      Tag != dwarf::DW_TAG_ptr_to_member_type &&
-      Tag != dwarf::DW_TAG_reference_type &&
-      Tag != dwarf::DW_TAG_rvalue_reference_type &&
-      Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_array_type &&
-      Tag != dwarf::DW_TAG_enumeration_type &&
-      Tag != dwarf::DW_TAG_subroutine_type &&
-      Tag != dwarf::DW_TAG_inheritance && Tag != dwarf::DW_TAG_friend &&
-      getFilename().empty())
-    return false;
-
-  // DIType is abstract, it should be a BasicType, a DerivedType or
-  // a CompositeType.
-  if (isBasicType())
-    return DIBasicType(DbgNode).Verify();
-  else if (isCompositeType())
-    return DICompositeType(DbgNode).Verify();
-  else if (isDerivedType())
-    return DIDerivedType(DbgNode).Verify();
-  else
-    return false;
-}
-
-bool DIBasicType::Verify() const {
-  return isBasicType() && DbgNode->getNumOperands() == 3 &&
-         getNumHeaderFields() == 8;
-}
-
-bool DIDerivedType::Verify() const {
-  // Make sure DerivedFrom @ field 3 is TypeRef.
-  if (!fieldIsTypeRef(DbgNode, 3))
-    return false;
-  if (getTag() == dwarf::DW_TAG_ptr_to_member_type)
-    // Make sure ClassType @ field 4 is a TypeRef.
-    if (!fieldIsTypeRef(DbgNode, 4))
-      return false;
-
-  return isDerivedType() && DbgNode->getNumOperands() >= 4 &&
-         DbgNode->getNumOperands() <= 8 && getNumHeaderFields() >= 7 &&
-         getNumHeaderFields() <= 8;
-}
-
-bool DICompositeType::Verify() const {
-  if (!isCompositeType())
-    return false;
-
-  // Make sure DerivedFrom @ field 3 and ContainingType @ field 5 are TypeRef.
-  if (!fieldIsTypeRef(DbgNode, 3))
-    return false;
-  if (!fieldIsTypeRef(DbgNode, 5))
-    return false;
-
-  // Make sure the type identifier at field 7 is MDString, it can be null.
-  if (!fieldIsMDString(DbgNode, 7))
-    return false;
-
-  // A subroutine type can't be both & and &&.
-  if (isLValueReference() && isRValueReference())
-    return false;
-
-  return DbgNode->getNumOperands() == 8 && getNumHeaderFields() == 8;
-}
-
-bool DISubprogram::Verify() const {
-  if (!isSubprogram())
-    return false;
-
-  // Make sure context @ field 2 is a ScopeRef and type @ field 3 is a MDNode.
-  if (!fieldIsScopeRef(DbgNode, 2))
-    return false;
-  if (!fieldIsMDNode(DbgNode, 3))
-    return false;
-  // Containing type @ field 4.
-  if (!fieldIsTypeRef(DbgNode, 4))
-    return false;
-
-  // A subprogram can't be both & and &&.
-  if (isLValueReference() && isRValueReference())
-    return false;
-
-  // If a DISubprogram has an llvm::Function*, then scope chains from all
-  // instructions within the function should lead to this DISubprogram.
-  if (auto *F = getFunction()) {
-    for (auto &BB : *F) {
-      for (auto &I : BB) {
-        DebugLoc DL = I.getDebugLoc();
-        if (DL.isUnknown())
-          continue;
-
-        MDNode *Scope = nullptr;
-        MDNode *IA = nullptr;
-        // walk the inlined-at scopes
-        while ((IA = DL.getInlinedAt()))
-          DL = DebugLoc::getFromDILocation(IA);
-        DL.getScopeAndInlinedAt(Scope, IA);
-        if (!Scope)
-          return false;
-        assert(!IA);
-        while (!DIDescriptor(Scope).isSubprogram()) {
-          DILexicalBlockFile D(Scope);
-          Scope = D.isLexicalBlockFile()
-                      ? D.getScope()
-                      : DebugLoc::getFromDILexicalBlock(Scope).getScope();
-          assert(Scope && "lexical block file has no scope");
-        }
-        if (!DISubprogram(Scope).describes(F))
-          return false;
-      }
-    }
-  }
-  return DbgNode->getNumOperands() == 9 && getNumHeaderFields() == 12;
-}
-
-bool DIGlobalVariable::Verify() const {
-  if (!isGlobalVariable())
-    return false;
-
-  if (getDisplayName().empty())
-    return false;
-  // Make sure context @ field 1 is an MDNode.
-  if (!fieldIsMDNode(DbgNode, 1))
-    return false;
-  // Make sure that type @ field 3 is a DITypeRef.
-  if (!fieldIsTypeRef(DbgNode, 3))
-    return false;
-  // Make sure StaticDataMemberDeclaration @ field 5 is MDNode.
-  if (!fieldIsMDNode(DbgNode, 5))
-    return false;
-
-  return DbgNode->getNumOperands() == 6 && getNumHeaderFields() == 7;
-}
-
-bool DIVariable::Verify() const {
-  if (!isVariable())
-    return false;
-
-  // Make sure context @ field 1 is an MDNode.
-  if (!fieldIsMDNode(DbgNode, 1))
-    return false;
-  // Make sure that type @ field 3 is a DITypeRef.
-  if (!fieldIsTypeRef(DbgNode, 3))
-    return false;
-
-  // Check the number of header fields, which is common between complex and
-  // simple variables.
-  if (getNumHeaderFields() != 4)
-    return false;
-
-  // Variable without an inline location.
-  if (DbgNode->getNumOperands() == 4)
-    return true;
-
-  // Variable with an inline location.
-  return getInlinedAt() != nullptr && DbgNode->getNumOperands() == 5;
-}
-
-bool DIExpression::Verify() const {
-  // Empty DIExpressions may be represented as a nullptr.
-  if (!DbgNode)
-    return true;
-
-  return isExpression() && DbgNode->getNumOperands() == 1;
-}
-
-bool DILocation::Verify() const {
-  return DbgNode && isa<MDLocation>(DbgNode);
-}
-
-bool DINameSpace::Verify() const {
-  if (!isNameSpace())
-    return false;
-  return DbgNode->getNumOperands() == 3 && getNumHeaderFields() == 3;
-}
-
-MDNode *DIFile::getFileNode() const { return getNodeField(DbgNode, 1); }
-
-bool DIFile::Verify() const {
-  return isFile() && DbgNode->getNumOperands() == 2;
-}
-
-bool DIEnumerator::Verify() const {
-  return isEnumerator() && DbgNode->getNumOperands() == 1 &&
-         getNumHeaderFields() == 3;
-}
-
-bool DISubrange::Verify() const {
-  return isSubrange() && DbgNode->getNumOperands() == 1 &&
-         getNumHeaderFields() == 3;
-}
-
-bool DILexicalBlock::Verify() const {
-  return isLexicalBlock() && DbgNode->getNumOperands() == 3 &&
-         getNumHeaderFields() == 4;
-}
-
-bool DILexicalBlockFile::Verify() const {
-  return isLexicalBlockFile() && DbgNode->getNumOperands() == 3 &&
-         getNumHeaderFields() == 2;
-}
-
-bool DITemplateTypeParameter::Verify() const {
-  return isTemplateTypeParameter() && DbgNode->getNumOperands() == 4 &&
-         getNumHeaderFields() == 4;
-}
-
-bool DITemplateValueParameter::Verify() const {
-  return isTemplateValueParameter() && DbgNode->getNumOperands() == 5 &&
-         getNumHeaderFields() == 4;
-}
-
-bool DIImportedEntity::Verify() const {
-  return isImportedEntity() && DbgNode->getNumOperands() == 3 &&
-         getNumHeaderFields() == 3;
-}
-
-MDNode *DIDerivedType::getObjCProperty() const {
-  return getNodeField(DbgNode, 4);
-}
-
-MDString *DICompositeType::getIdentifier() const {
-  return cast_or_null<MDString>(getField(DbgNode, 7));
-}
-
-#ifndef NDEBUG
-static void VerifySubsetOf(const MDNode *LHS, const MDNode *RHS) {
-  for (unsigned i = 0; i != LHS->getNumOperands(); ++i) {
-    // Skip the 'empty' list (that's a single i32 0, rather than truly empty).
-    if (i == 0 && mdconst::hasa<ConstantInt>(LHS->getOperand(i)))
-      continue;
-    const MDNode *E = cast<MDNode>(LHS->getOperand(i));
-    bool found = false;
-    for (unsigned j = 0; !found && j != RHS->getNumOperands(); ++j)
-      found = (E == cast<MDNode>(RHS->getOperand(j)));
-    assert(found && "Losing a member during member list replacement");
-  }
-}
-#endif
-
-void DICompositeType::setArraysHelper(MDNode *Elements, MDNode *TParams) {
-  TrackingMDNodeRef N(*this);
-  if (Elements) {
-#ifndef NDEBUG
-    // Check that the new list of members contains all the old members as well.
-    if (const MDNode *El = cast_or_null<MDNode>(N->getOperand(4)))
-      VerifySubsetOf(El, Elements);
-#endif
-    N->replaceOperandWith(4, Elements);
-  }
-  if (TParams)
-    N->replaceOperandWith(6, TParams);
-  DbgNode = N;
-}
-
-DIScopeRef DIScope::getRef() const {
-  if (!isCompositeType())
-    return DIScopeRef(*this);
-  DICompositeType DTy(DbgNode);
-  if (!DTy.getIdentifier())
-    return DIScopeRef(*this);
-  return DIScopeRef(DTy.getIdentifier());
-}
-
-void DICompositeType::setContainingType(DICompositeType ContainingType) {
-  TrackingMDNodeRef N(*this);
-  N->replaceOperandWith(5, ContainingType.getRef());
-  DbgNode = N;
-}
-
-bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
-  assert(CurFn && "Invalid function");
-  if (!getContext().isSubprogram())
-    return false;
-  // This variable is not inlined function argument if its scope
-  // does not describe current function.
-  return !DISubprogram(getContext()).describes(CurFn);
-}
-
-bool DISubprogram::describes(const Function *F) {
-  assert(F && "Invalid function");
-  if (F == getFunction())
-    return true;
-  StringRef Name = getLinkageName();
-  if (Name.empty())
-    Name = getName();
-  if (F->getName() == Name)
-    return true;
-  return false;
-}
-
-MDNode *DISubprogram::getVariablesNodes() const {
-  return getNodeField(DbgNode, 8);
-}
-
-DIArray DISubprogram::getVariables() const {
-  return DIArray(getNodeField(DbgNode, 8));
-}
-
-Metadata *DITemplateValueParameter::getValue() const {
-  return DbgNode->getOperand(3);
-}
-
-DIScopeRef DIScope::getContext() const {
-
-  if (isType())
-    return DIType(DbgNode).getContext();
-
-  if (isSubprogram())
-    return DIScopeRef(DISubprogram(DbgNode).getContext());
-
-  if (isLexicalBlock())
-    return DIScopeRef(DILexicalBlock(DbgNode).getContext());
-
-  if (isLexicalBlockFile())
-    return DIScopeRef(DILexicalBlockFile(DbgNode).getContext());
-
-  if (isNameSpace())
-    return DIScopeRef(DINameSpace(DbgNode).getContext());
-
-  assert((isFile() || isCompileUnit()) && "Unhandled type of scope.");
-  return DIScopeRef(nullptr);
-}
-
-StringRef DIScope::getName() const {
-  if (isType())
-    return DIType(DbgNode).getName();
-  if (isSubprogram())
-    return DISubprogram(DbgNode).getName();
-  if (isNameSpace())
-    return DINameSpace(DbgNode).getName();
-  assert((isLexicalBlock() || isLexicalBlockFile() || isFile() ||
-          isCompileUnit()) &&
-         "Unhandled type of scope.");
-  return StringRef();
-}
-
-StringRef DIScope::getFilename() const {
-  if (!DbgNode)
-    return StringRef();
-  return ::getStringField(getNodeField(DbgNode, 1), 0);
-}
-
-StringRef DIScope::getDirectory() const {
-  if (!DbgNode)
-    return StringRef();
-  return ::getStringField(getNodeField(DbgNode, 1), 1);
-}
-
-DIArray DICompileUnit::getEnumTypes() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 7)
-    return DIArray();
-
-  return DIArray(getNodeField(DbgNode, 2));
-}
-
-DIArray DICompileUnit::getRetainedTypes() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 7)
-    return DIArray();
-
-  return DIArray(getNodeField(DbgNode, 3));
-}
-
-DIArray DICompileUnit::getSubprograms() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 7)
-    return DIArray();
-
-  return DIArray(getNodeField(DbgNode, 4));
-}
-
-DIArray DICompileUnit::getGlobalVariables() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 7)
-    return DIArray();
-
-  return DIArray(getNodeField(DbgNode, 5));
-}
-
-DIArray DICompileUnit::getImportedEntities() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 7)
-    return DIArray();
-
-  return DIArray(getNodeField(DbgNode, 6));
-}
-
-void DICompileUnit::replaceSubprograms(DIArray Subprograms) {
-  assert(Verify() && "Expected compile unit");
-  if (Subprograms == getSubprograms())
-    return;
-
-  const_cast<MDNode *>(DbgNode)->replaceOperandWith(4, Subprograms);
-}
-
-void DICompileUnit::replaceGlobalVariables(DIArray GlobalVariables) {
-  assert(Verify() && "Expected compile unit");
-  if (GlobalVariables == getGlobalVariables())
-    return;
-
-  const_cast<MDNode *>(DbgNode)->replaceOperandWith(5, GlobalVariables);
-}
-
-DILocation DILocation::copyWithNewScope(LLVMContext &Ctx,
-                                        DILexicalBlockFile NewScope) {
-  assert(Verify());
-  assert(NewScope && "Expected valid scope");
-
-  const auto *Old = cast<MDLocation>(DbgNode);
-  return DILocation(MDLocation::get(Ctx, Old->getLine(), Old->getColumn(),
-                                    NewScope, Old->getInlinedAt()));
-}
-
-unsigned DILocation::computeNewDiscriminator(LLVMContext &Ctx) {
-  std::pair<const char *, unsigned> Key(getFilename().data(), getLineNumber());
-  return ++Ctx.pImpl->DiscriminatorTable[Key];
-}
-
-DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope,
-                                       LLVMContext &VMContext) {
-  assert(DIVariable(DV).Verify() && "Expected a DIVariable");
-  if (!InlinedScope)
-    return cleanseInlinedVariable(DV, VMContext);
-
-  // Insert inlined scope.
-  SmallVector<Metadata *, 8> Elts;
-  for (unsigned I = 0, E = DIVariableInlinedAtIndex; I != E; ++I)
-    Elts.push_back(DV->getOperand(I));
-  Elts.push_back(InlinedScope);
-
-  DIVariable Inlined(MDNode::get(VMContext, Elts));
-  assert(Inlined.Verify() && "Expected to create a DIVariable");
-  return Inlined;
-}
-
-DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
-  assert(DIVariable(DV).Verify() && "Expected a DIVariable");
-  if (!DIVariable(DV).getInlinedAt())
-    return DIVariable(DV);
-
-  // Remove inlined scope.
-  SmallVector<Metadata *, 8> Elts;
-  for (unsigned I = 0, E = DIVariableInlinedAtIndex; I != E; ++I)
-    Elts.push_back(DV->getOperand(I));
-
-  DIVariable Cleansed(MDNode::get(VMContext, Elts));
-  assert(Cleansed.Verify() && "Expected to create a DIVariable");
-  return Cleansed;
-}
-
-DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
-  DIDescriptor D(Scope);
-  if (D.isSubprogram())
-    return DISubprogram(Scope);
-
-  if (D.isLexicalBlockFile())
-    return getDISubprogram(DILexicalBlockFile(Scope).getContext());
-
-  if (D.isLexicalBlock())
-    return getDISubprogram(DILexicalBlock(Scope).getContext());
-
-  return DISubprogram();
-}
-
-DISubprogram llvm::getDISubprogram(const Function *F) {
+DISubprogram *llvm::getDISubprogram(const Function *F) {
   // We look for the first instr that has a debug annotation leading back to F.
   for (auto &BB : *F) {
     auto Inst = std::find_if(BB.begin(), BB.end(), [](const Instruction &Inst) {
-      return !Inst.getDebugLoc().isUnknown();
+      return Inst.getDebugLoc();
     });
     if (Inst == BB.end())
       continue;
     DebugLoc DLoc = Inst->getDebugLoc();
-    const MDNode *Scope = DLoc.getScopeNode();
-    DISubprogram Subprogram = getDISubprogram(Scope);
-    return Subprogram.describes(F) ? Subprogram : DISubprogram();
+    const MDNode *Scope = DLoc.getInlinedAtScope();
+    auto *Subprogram = getDISubprogram(Scope);
+    return Subprogram->describes(F) ? Subprogram : nullptr;
   }
 
-  return DISubprogram();
+  return nullptr;
 }
 
-DICompositeType llvm::getDICompositeType(DIType T) {
-  if (T.isCompositeType())
-    return DICompositeType(T);
+DICompositeTypeBase *llvm::getDICompositeType(DIType *T) {
+  if (auto *C = dyn_cast_or_null<DICompositeTypeBase>(T))
+    return C;
 
-  if (T.isDerivedType()) {
+  if (auto *D = dyn_cast_or_null<DIDerivedTypeBase>(T)) {
     // This function is currently used by dragonegg and dragonegg does
     // not generate identifier for types, so using an empty map to resolve
     // DerivedFrom should be fine.
     DITypeIdentifierMap EmptyMap;
-    return getDICompositeType(
-        DIDerivedType(T).getTypeDerivedFrom().resolve(EmptyMap));
+    return getDICompositeType(D->getBaseType().resolve(EmptyMap));
   }
 
-  return DICompositeType();
+  return nullptr;
 }
 
 DITypeIdentifierMap
 llvm::generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes) {
   DITypeIdentifierMap Map;
   for (unsigned CUi = 0, CUe = CU_Nodes->getNumOperands(); CUi != CUe; ++CUi) {
-    DICompileUnit CU(CU_Nodes->getOperand(CUi));
-    DIArray Retain = CU.getRetainedTypes();
-    for (unsigned Ti = 0, Te = Retain.getNumElements(); Ti != Te; ++Ti) {
-      if (!Retain.getElement(Ti).isCompositeType())
+    auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(CUi));
+    DINodeArray Retain = CU->getRetainedTypes();
+    for (unsigned Ti = 0, Te = Retain.size(); Ti != Te; ++Ti) {
+      if (!isa<DICompositeType>(Retain[Ti]))
         continue;
-      DICompositeType Ty(Retain.getElement(Ti));
-      if (MDString *TypeId = Ty.getIdentifier()) {
+      auto *Ty = cast<DICompositeType>(Retain[Ti]);
+      if (MDString *TypeId = Ty->getRawIdentifier()) {
         // Definition has priority over declaration.
         // Try to insert (TypeId, Ty) to Map.
         std::pair<DITypeIdentifierMap::iterator, bool> P =
             Map.insert(std::make_pair(TypeId, Ty));
         // If TypeId already exists in Map and this is a definition, replace
         // whatever we had (declaration or definition) with the definition.
-        if (!P.second && !Ty.isForwardDecl())
+        if (!P.second && !Ty->isForwardDecl())
           P.first->second = Ty;
       }
     }
@@ -974,168 +123,146 @@ void DebugInfoFinder::processModule(const Module &M) {
   InitializeTypeMap(M);
   if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) {
     for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-      DICompileUnit CU(CU_Nodes->getOperand(i));
+      auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i));
       addCompileUnit(CU);
-      DIArray GVs = CU.getGlobalVariables();
-      for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
-        DIGlobalVariable DIG(GVs.getElement(i));
+      for (auto *DIG : CU->getGlobalVariables()) {
         if (addGlobalVariable(DIG)) {
-          processScope(DIG.getContext());
-          processType(DIG.getType().resolve(TypeIdentifierMap));
+          processScope(DIG->getScope());
+          processType(DIG->getType().resolve(TypeIdentifierMap));
         }
       }
-      DIArray SPs = CU.getSubprograms();
-      for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
-        processSubprogram(DISubprogram(SPs.getElement(i)));
-      DIArray EnumTypes = CU.getEnumTypes();
-      for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
-        processType(DIType(EnumTypes.getElement(i)));
-      DIArray RetainedTypes = CU.getRetainedTypes();
-      for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
-        processType(DIType(RetainedTypes.getElement(i)));
-      DIArray Imports = CU.getImportedEntities();
-      for (unsigned i = 0, e = Imports.getNumElements(); i != e; ++i) {
-        DIImportedEntity Import = DIImportedEntity(Imports.getElement(i));
-        DIDescriptor Entity = Import.getEntity().resolve(TypeIdentifierMap);
-        if (Entity.isType())
-          processType(DIType(Entity));
-        else if (Entity.isSubprogram())
-          processSubprogram(DISubprogram(Entity));
-        else if (Entity.isNameSpace())
-          processScope(DINameSpace(Entity).getContext());
+      for (auto *SP : CU->getSubprograms())
+        processSubprogram(SP);
+      for (auto *ET : CU->getEnumTypes())
+        processType(ET);
+      for (auto *RT : CU->getRetainedTypes())
+        processType(RT);
+      for (auto *Import : CU->getImportedEntities()) {
+        auto *Entity = Import->getEntity().resolve(TypeIdentifierMap);
+        if (auto *T = dyn_cast<DIType>(Entity))
+          processType(T);
+        else if (auto *SP = dyn_cast<DISubprogram>(Entity))
+          processSubprogram(SP);
+        else if (auto *NS = dyn_cast<DINamespace>(Entity))
+          processScope(NS->getScope());
       }
     }
   }
 }
 
-void DebugInfoFinder::processLocation(const Module &M, DILocation Loc) {
+void DebugInfoFinder::processLocation(const Module &M, const DILocation *Loc) {
   if (!Loc)
     return;
   InitializeTypeMap(M);
-  processScope(Loc.getScope());
-  processLocation(M, Loc.getOrigLocation());
+  processScope(Loc->getScope());
+  processLocation(M, Loc->getInlinedAt());
 }
 
-void DebugInfoFinder::processType(DIType DT) {
+void DebugInfoFinder::processType(DIType *DT) {
   if (!addType(DT))
     return;
-  processScope(DT.getContext().resolve(TypeIdentifierMap));
-  if (DT.isCompositeType()) {
-    DICompositeType DCT(DT);
-    processType(DCT.getTypeDerivedFrom().resolve(TypeIdentifierMap));
-    if (DT.isSubroutineType()) {
-      DITypeArray DTA = DISubroutineType(DT).getTypeArray();
-      for (unsigned i = 0, e = DTA.getNumElements(); i != e; ++i)
-        processType(DTA.getElement(i).resolve(TypeIdentifierMap));
+  processScope(DT->getScope().resolve(TypeIdentifierMap));
+  if (auto *DCT = dyn_cast<DICompositeTypeBase>(DT)) {
+    processType(DCT->getBaseType().resolve(TypeIdentifierMap));
+    if (auto *ST = dyn_cast<DISubroutineType>(DCT)) {
+      for (DITypeRef Ref : ST->getTypeArray())
+        processType(Ref.resolve(TypeIdentifierMap));
       return;
     }
-    DIArray DA = DCT.getElements();
-    for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
-      DIDescriptor D = DA.getElement(i);
-      if (D.isType())
-        processType(DIType(D));
-      else if (D.isSubprogram())
-        processSubprogram(DISubprogram(D));
+    for (Metadata *D : DCT->getElements()) {
+      if (auto *T = dyn_cast<DIType>(D))
+        processType(T);
+      else if (auto *SP = dyn_cast<DISubprogram>(D))
+        processSubprogram(SP);
     }
-  } else if (DT.isDerivedType()) {
-    DIDerivedType DDT(DT);
-    processType(DDT.getTypeDerivedFrom().resolve(TypeIdentifierMap));
+  } else if (auto *DDT = dyn_cast<DIDerivedTypeBase>(DT)) {
+    processType(DDT->getBaseType().resolve(TypeIdentifierMap));
   }
 }
 
-void DebugInfoFinder::processScope(DIScope Scope) {
-  if (Scope.isType()) {
-    DIType Ty(Scope);
+void DebugInfoFinder::processScope(DIScope *Scope) {
+  if (!Scope)
+    return;
+  if (auto *Ty = dyn_cast<DIType>(Scope)) {
     processType(Ty);
     return;
   }
-  if (Scope.isCompileUnit()) {
-    addCompileUnit(DICompileUnit(Scope));
+  if (auto *CU = dyn_cast<DICompileUnit>(Scope)) {
+    addCompileUnit(CU);
     return;
   }
-  if (Scope.isSubprogram()) {
-    processSubprogram(DISubprogram(Scope));
+  if (auto *SP = dyn_cast<DISubprogram>(Scope)) {
+    processSubprogram(SP);
     return;
   }
   if (!addScope(Scope))
     return;
-  if (Scope.isLexicalBlock()) {
-    DILexicalBlock LB(Scope);
-    processScope(LB.getContext());
-  } else if (Scope.isLexicalBlockFile()) {
-    DILexicalBlockFile LBF = DILexicalBlockFile(Scope);
-    processScope(LBF.getScope());
-  } else if (Scope.isNameSpace()) {
-    DINameSpace NS(Scope);
-    processScope(NS.getContext());
+  if (auto *LB = dyn_cast<DILexicalBlockBase>(Scope)) {
+    processScope(LB->getScope());
+  } else if (auto *NS = dyn_cast<DINamespace>(Scope)) {
+    processScope(NS->getScope());
   }
 }
 
-void DebugInfoFinder::processSubprogram(DISubprogram SP) {
+void DebugInfoFinder::processSubprogram(DISubprogram *SP) {
   if (!addSubprogram(SP))
     return;
-  processScope(SP.getContext().resolve(TypeIdentifierMap));
-  processType(SP.getType());
-  DIArray TParams = SP.getTemplateParams();
-  for (unsigned I = 0, E = TParams.getNumElements(); I != E; ++I) {
-    DIDescriptor Element = TParams.getElement(I);
-    if (Element.isTemplateTypeParameter()) {
-      DITemplateTypeParameter TType(Element);
-      processScope(TType.getContext().resolve(TypeIdentifierMap));
-      processType(TType.getType().resolve(TypeIdentifierMap));
-    } else if (Element.isTemplateValueParameter()) {
-      DITemplateValueParameter TVal(Element);
-      processScope(TVal.getContext().resolve(TypeIdentifierMap));
-      processType(TVal.getType().resolve(TypeIdentifierMap));
+  processScope(SP->getScope().resolve(TypeIdentifierMap));
+  processType(SP->getType());
+  for (auto *Element : SP->getTemplateParams()) {
+    if (auto *TType = dyn_cast<DITemplateTypeParameter>(Element)) {
+      processType(TType->getType().resolve(TypeIdentifierMap));
+    } else if (auto *TVal = dyn_cast<DITemplateValueParameter>(Element)) {
+      processType(TVal->getType().resolve(TypeIdentifierMap));
     }
   }
 }
 
 void DebugInfoFinder::processDeclare(const Module &M,
                                      const DbgDeclareInst *DDI) {
-  MDNode *N = dyn_cast<MDNode>(DDI->getVariable());
+  auto *N = dyn_cast<MDNode>(DDI->getVariable());
   if (!N)
     return;
   InitializeTypeMap(M);
 
-  DIDescriptor DV(N);
-  if (!DV.isVariable())
+  auto *DV = dyn_cast<DILocalVariable>(N);
+  if (!DV)
     return;
 
   if (!NodesSeen.insert(DV).second)
     return;
-  processScope(DIVariable(N).getContext());
-  processType(DIVariable(N).getType().resolve(TypeIdentifierMap));
+  processScope(DV->getScope());
+  processType(DV->getType().resolve(TypeIdentifierMap));
 }
 
 void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) {
-  MDNode *N = dyn_cast<MDNode>(DVI->getVariable());
+  auto *N = dyn_cast<MDNode>(DVI->getVariable());
   if (!N)
     return;
   InitializeTypeMap(M);
 
-  DIDescriptor DV(N);
-  if (!DV.isVariable())
+  auto *DV = dyn_cast<DILocalVariable>(N);
+  if (!DV)
     return;
 
   if (!NodesSeen.insert(DV).second)
     return;
-  processScope(DIVariable(N).getContext());
-  processType(DIVariable(N).getType().resolve(TypeIdentifierMap));
+  processScope(DV->getScope());
+  processType(DV->getType().resolve(TypeIdentifierMap));
 }
 
-bool DebugInfoFinder::addType(DIType DT) {
+bool DebugInfoFinder::addType(DIType *DT) {
   if (!DT)
     return false;
 
   if (!NodesSeen.insert(DT).second)
     return false;
 
-  TYs.push_back(DT);
+  TYs.push_back(const_cast<DIType *>(DT));
   return true;
 }
 
-bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
+bool DebugInfoFinder::addCompileUnit(DICompileUnit *CU) {
   if (!CU)
     return false;
   if (!NodesSeen.insert(CU).second)
@@ -1145,7 +272,7 @@ bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
   return true;
 }
 
-bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
+bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable *DIG) {
   if (!DIG)
     return false;
 
@@ -1156,7 +283,7 @@ bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
   return true;
 }
 
-bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
+bool DebugInfoFinder::addSubprogram(DISubprogram *SP) {
   if (!SP)
     return false;
 
@@ -1167,7 +294,7 @@ bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
   return true;
 }
 
-bool DebugInfoFinder::addScope(DIScope Scope) {
+bool DebugInfoFinder::addScope(DIScope *Scope) {
   if (!Scope)
     return false;
   // FIXME: Ocaml binding generates a scope with no content, we treat it
@@ -1180,284 +307,17 @@ bool DebugInfoFinder::addScope(DIScope Scope) {
   return true;
 }
 
-//===----------------------------------------------------------------------===//
-// DIDescriptor: dump routines for all descriptors.
-//===----------------------------------------------------------------------===//
-
-void DIDescriptor::dump() const {
-  print(dbgs());
-  dbgs() << '\n';
-}
-
-void DIDescriptor::print(raw_ostream &OS) const {
-  if (!DbgNode)
-    return;
-
-  if (const char *Tag = dwarf::TagString(getTag()))
-    OS << "[ " << Tag << " ]";
-
-  if (this->isSubrange()) {
-    DISubrange(DbgNode).printInternal(OS);
-  } else if (this->isCompileUnit()) {
-    DICompileUnit(DbgNode).printInternal(OS);
-  } else if (this->isFile()) {
-    DIFile(DbgNode).printInternal(OS);
-  } else if (this->isEnumerator()) {
-    DIEnumerator(DbgNode).printInternal(OS);
-  } else if (this->isBasicType()) {
-    DIType(DbgNode).printInternal(OS);
-  } else if (this->isDerivedType()) {
-    DIDerivedType(DbgNode).printInternal(OS);
-  } else if (this->isCompositeType()) {
-    DICompositeType(DbgNode).printInternal(OS);
-  } else if (this->isSubprogram()) {
-    DISubprogram(DbgNode).printInternal(OS);
-  } else if (this->isGlobalVariable()) {
-    DIGlobalVariable(DbgNode).printInternal(OS);
-  } else if (this->isVariable()) {
-    DIVariable(DbgNode).printInternal(OS);
-  } else if (this->isObjCProperty()) {
-    DIObjCProperty(DbgNode).printInternal(OS);
-  } else if (this->isNameSpace()) {
-    DINameSpace(DbgNode).printInternal(OS);
-  } else if (this->isScope()) {
-    DIScope(DbgNode).printInternal(OS);
-  } else if (this->isExpression()) {
-    DIExpression(DbgNode).printInternal(OS);
-  }
-}
-
-void DISubrange::printInternal(raw_ostream &OS) const {
-  int64_t Count = getCount();
-  if (Count != -1)
-    OS << " [" << getLo() << ", " << Count - 1 << ']';
-  else
-    OS << " [unbounded]";
-}
-
-void DIScope::printInternal(raw_ostream &OS) const {
-  OS << " [" << getDirectory() << "/" << getFilename() << ']';
-}
-
-void DICompileUnit::printInternal(raw_ostream &OS) const {
-  DIScope::printInternal(OS);
-  OS << " [";
-  unsigned Lang = getLanguage();
-  if (const char *LangStr = dwarf::LanguageString(Lang))
-    OS << LangStr;
-  else
-    (OS << "lang 0x").write_hex(Lang);
-  OS << ']';
-}
-
-void DIEnumerator::printInternal(raw_ostream &OS) const {
-  OS << " [" << getName() << " :: " << getEnumValue() << ']';
-}
-
-void DIType::printInternal(raw_ostream &OS) const {
-  if (!DbgNode)
-    return;
-
-  StringRef Res = getName();
-  if (!Res.empty())
-    OS << " [" << Res << "]";
-
-  // TODO: Print context?
-
-  OS << " [line " << getLineNumber() << ", size " << getSizeInBits()
-     << ", align " << getAlignInBits() << ", offset " << getOffsetInBits();
-  if (isBasicType())
-    if (const char *Enc =
-            dwarf::AttributeEncodingString(DIBasicType(DbgNode).getEncoding()))
-      OS << ", enc " << Enc;
-  OS << "]";
-
-  if (isPrivate())
-    OS << " [private]";
-  else if (isProtected())
-    OS << " [protected]";
-  else if (isPublic())
-    OS << " [public]";
-
-  if (isArtificial())
-    OS << " [artificial]";
-
-  if (isForwardDecl())
-    OS << " [decl]";
-  else if (getTag() == dwarf::DW_TAG_structure_type ||
-           getTag() == dwarf::DW_TAG_union_type ||
-           getTag() == dwarf::DW_TAG_enumeration_type ||
-           getTag() == dwarf::DW_TAG_class_type)
-    OS << " [def]";
-  if (isVector())
-    OS << " [vector]";
-  if (isStaticMember())
-    OS << " [static]";
-
-  if (isLValueReference())
-    OS << " [reference]";
-
-  if (isRValueReference())
-    OS << " [rvalue reference]";
-}
-
-void DIDerivedType::printInternal(raw_ostream &OS) const {
-  DIType::printInternal(OS);
-  OS << " [from " << getTypeDerivedFrom().getName() << ']';
-}
-
-void DICompositeType::printInternal(raw_ostream &OS) const {
-  DIType::printInternal(OS);
-  DIArray A = getElements();
-  OS << " [" << A.getNumElements() << " elements]";
-}
-
-void DINameSpace::printInternal(raw_ostream &OS) const {
-  StringRef Name = getName();
-  if (!Name.empty())
-    OS << " [" << Name << ']';
-
-  OS << " [line " << getLineNumber() << ']';
-}
-
-void DISubprogram::printInternal(raw_ostream &OS) const {
-  // TODO : Print context
-  OS << " [line " << getLineNumber() << ']';
-
-  if (isLocalToUnit())
-    OS << " [local]";
-
-  if (isDefinition())
-    OS << " [def]";
-
-  if (getScopeLineNumber() != getLineNumber())
-    OS << " [scope " << getScopeLineNumber() << "]";
-
-  if (isPrivate())
-    OS << " [private]";
-  else if (isProtected())
-    OS << " [protected]";
-  else if (isPublic())
-    OS << " [public]";
-
-  if (isLValueReference())
-    OS << " [reference]";
-
-  if (isRValueReference())
-    OS << " [rvalue reference]";
-
-  StringRef Res = getName();
-  if (!Res.empty())
-    OS << " [" << Res << ']';
-}
-
-void DIGlobalVariable::printInternal(raw_ostream &OS) const {
-  StringRef Res = getName();
-  if (!Res.empty())
-    OS << " [" << Res << ']';
-
-  OS << " [line " << getLineNumber() << ']';
-
-  // TODO : Print context
-
-  if (isLocalToUnit())
-    OS << " [local]";
-
-  if (isDefinition())
-    OS << " [def]";
-}
-
-void DIVariable::printInternal(raw_ostream &OS) const {
-  StringRef Res = getName();
-  if (!Res.empty())
-    OS << " [" << Res << ']';
-
-  OS << " [line " << getLineNumber() << ']';
-}
-
-void DIExpression::printInternal(raw_ostream &OS) const {
-  for (unsigned I = 0; I < getNumElements(); ++I) {
-    uint64_t OpCode = getElement(I);
-    OS << " [" << OperationEncodingString(OpCode);
-    switch (OpCode) {
-    case DW_OP_plus: {
-      OS << " " << getElement(++I);
-      break;
-    }
-    case DW_OP_piece: {
-      unsigned Offset = getElement(++I);
-      unsigned Size = getElement(++I);
-      OS << " offset=" << Offset << ", size=" << Size;
-      break;
-    }
-    case DW_OP_deref:
-      // No arguments.
-      break;
-    default:
-      // Else bail out early. This may be a line table entry.
-      OS << "Unknown]";
-      return;
-    }
-    OS << "]";
-  }
-}
-
-void DIObjCProperty::printInternal(raw_ostream &OS) const {
-  StringRef Name = getObjCPropertyName();
-  if (!Name.empty())
-    OS << " [" << Name << ']';
-
-  OS << " [line " << getLineNumber() << ", properties " << getUnsignedField(6)
-     << ']';
-}
-
-static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
-                          const LLVMContext &Ctx) {
-  if (!DL.isUnknown()) { // Print source line info.
-    DIScope Scope(DL.getScope(Ctx));
-    assert(Scope.isScope() && "Scope of a DebugLoc should be a DIScope.");
-    // Omit the directory, because it's likely to be long and uninteresting.
-    CommentOS << Scope.getFilename();
-    CommentOS << ':' << DL.getLine();
-    if (DL.getCol() != 0)
-      CommentOS << ':' << DL.getCol();
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
-    if (!InlinedAtDL.isUnknown()) {
-      CommentOS << " @[ ";
-      printDebugLoc(InlinedAtDL, CommentOS, Ctx);
-      CommentOS << " ]";
-    }
-  }
-}
-
-void DIVariable::printExtendedName(raw_ostream &OS) const {
-  const LLVMContext &Ctx = DbgNode->getContext();
-  StringRef Res = getName();
-  if (!Res.empty())
-    OS << Res << "," << getLineNumber();
-  if (MDNode *InlinedAt = getInlinedAt()) {
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
-    if (!InlinedAtDL.isUnknown()) {
-      OS << " @[";
-      printDebugLoc(InlinedAtDL, OS, Ctx);
-      OS << "]";
+bool llvm::stripDebugInfo(Function &F) {
+  bool Changed = false;
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      if (I.getDebugLoc()) {
+        Changed = true;
+        I.setDebugLoc(DebugLoc());
+      }
     }
   }
-}
-
-template <> DIRef<DIScope>::DIRef(const Metadata *V) : Val(V) {
-  assert(isScopeRef(V) && "DIScopeRef should be a MDString or MDNode");
-}
-template <> DIRef<DIType>::DIRef(const Metadata *V) : Val(V) {
-  assert(isTypeRef(V) && "DITypeRef should be a MDString or MDNode");
-}
-
-template <>
-DIScopeRef DIDescriptor::getFieldAs<DIScopeRef>(unsigned Elt) const {
-  return DIScopeRef(cast_or_null<Metadata>(getField(DbgNode, Elt)));
-}
-template <> DITypeRef DIDescriptor::getFieldAs<DITypeRef>(unsigned Elt) const {
-  return DITypeRef(cast_or_null<Metadata>(getField(DbgNode, Elt)));
+  return Changed;
 }
 
 bool llvm::StripDebugInfo(Module &M) {
@@ -1493,41 +353,34 @@ bool llvm::StripDebugInfo(Module &M) {
     }
   }
 
-  for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
-    for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
-         ++FI)
-      for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
-           ++BI) {
-        if (!BI->getDebugLoc().isUnknown()) {
-          Changed = true;
-          BI->setDebugLoc(DebugLoc());
-        }
-      }
+  for (Function &F : M)
+    Changed |= stripDebugInfo(F);
+
+  if (GVMaterializer *Materializer = M.getMaterializer())
+    Materializer->setStripDebugInfo();
 
   return Changed;
 }
 
 unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) {
-  if (auto *Val = mdconst::extract_or_null<ConstantInt>(
+  if (auto *Val = mdconst::dyn_extract_or_null<ConstantInt>(
           M.getModuleFlag("Debug Info Version")))
     return Val->getZExtValue();
   return 0;
 }
 
-llvm::DenseMap<const llvm::Function *, llvm::DISubprogram>
+DenseMap<const llvm::Function *, DISubprogram *>
 llvm::makeSubprogramMap(const Module &M) {
-  DenseMap<const Function *, DISubprogram> R;
+  DenseMap<const Function *, DISubprogram *> R;
 
   NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu");
   if (!CU_Nodes)
     return R;
 
   for (MDNode *N : CU_Nodes->operands()) {
-    DICompileUnit CUNode(N);
-    DIArray SPs = CUNode.getSubprograms();
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-      DISubprogram SP(SPs.getElement(i));
-      if (Function *F = SP.getFunction())
+    auto *CUNode = cast<DICompileUnit>(N);
+    for (auto *SP : CUNode->getSubprograms()) {
+      if (Function *F = SP->getFunction())
         R.insert(std::make_pair(F, SP));
     }
   }
diff --git a/contrib/llvm/lib/IR/DebugInfoMetadata.cpp b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
new file mode 100644
index 0000000..8b9857d
--- /dev/null
+++ b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -0,0 +1,551 @@
+//===- DebugInfoMetadata.cpp - Implement debug info metadata --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the debug info Metadata classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "LLVMContextImpl.h"
+#include "MetadataImpl.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+DILocation::DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
+                       unsigned Column, ArrayRef<Metadata *> MDs)
+    : MDNode(C, DILocationKind, Storage, MDs) {
+  assert((MDs.size() == 1 || MDs.size() == 2) &&
+         "Expected a scope and optional inlined-at");
+
+  // Set line and column.
+  assert(Column < (1u << 16) && "Expected 16-bit column");
+
+  SubclassData32 = Line;
+  SubclassData16 = Column;
+}
+
+static void adjustColumn(unsigned &Column) {
+  // Set to unknown on overflow.  We only have 16 bits to play with here.
+  if (Column >= (1u << 16))
+    Column = 0;
+}
+
+DILocation *DILocation::getImpl(LLVMContext &Context, unsigned Line,
+                                unsigned Column, Metadata *Scope,
+                                Metadata *InlinedAt, StorageType Storage,
+                                bool ShouldCreate) {
+  // Fixup column.
+  adjustColumn(Column);
+
+  assert(Scope && "Expected scope");
+  if (Storage == Uniqued) {
+    if (auto *N =
+            getUniqued(Context.pImpl->DILocations,
+                       DILocationInfo::KeyTy(Line, Column, Scope, InlinedAt)))
+      return N;
+    if (!ShouldCreate)
+      return nullptr;
+  } else {
+    assert(ShouldCreate && "Expected non-uniqued nodes to always be created");
+  }
+
+  SmallVector<Metadata *, 2> Ops;
+  Ops.push_back(Scope);
+  if (InlinedAt)
+    Ops.push_back(InlinedAt);
+  return storeImpl(new (Ops.size())
+                       DILocation(Context, Storage, Line, Column, Ops),
+                   Storage, Context.pImpl->DILocations);
+}
+
+unsigned DILocation::computeNewDiscriminator() const {
+  // FIXME: This seems completely wrong.
+  //
+  //  1. If two modules are generated in the same context, then the second
+  //     Module will get different discriminators than it would have if it were
+  //     generated in its own context.
+  //  2. If this function is called after round-tripping to bitcode instead of
+  //     before, it will give a different (and potentially incorrect!) return.
+  //
+  // The discriminator should instead be calculated from local information
+  // where it's actually needed.  This logic should be moved to
+  // AddDiscriminators::runOnFunction(), where it doesn't pollute the
+  // LLVMContext.
+  std::pair<const char *, unsigned> Key(getFilename().data(), getLine());
+  return ++getContext().pImpl->DiscriminatorTable[Key];
+}
+
+unsigned DINode::getFlag(StringRef Flag) {
+  return StringSwitch<unsigned>(Flag)
+#define HANDLE_DI_FLAG(ID, NAME) .Case("DIFlag" #NAME, Flag##NAME)
+#include "llvm/IR/DebugInfoFlags.def"
+      .Default(0);
+}
+
+const char *DINode::getFlagString(unsigned Flag) {
+  switch (Flag) {
+  default:
+    return "";
+#define HANDLE_DI_FLAG(ID, NAME)                                               \
+  case Flag##NAME:                                                             \
+    return "DIFlag" #NAME;
+#include "llvm/IR/DebugInfoFlags.def"
+  }
+}
+
+unsigned DINode::splitFlags(unsigned Flags,
+                            SmallVectorImpl<unsigned> &SplitFlags) {
+  // Accessibility flags need to be specially handled, since they're packed
+  // together.
+  if (unsigned A = Flags & FlagAccessibility) {
+    if (A == FlagPrivate)
+      SplitFlags.push_back(FlagPrivate);
+    else if (A == FlagProtected)
+      SplitFlags.push_back(FlagProtected);
+    else
+      SplitFlags.push_back(FlagPublic);
+    Flags &= ~A;
+  }
+
+#define HANDLE_DI_FLAG(ID, NAME)                                               \
+  if (unsigned Bit = Flags & ID) {                                             \
+    SplitFlags.push_back(Bit);                                                 \
+    Flags &= ~Bit;                                                             \
+  }
+#include "llvm/IR/DebugInfoFlags.def"
+
+  return Flags;
+}
+
+DIScopeRef DIScope::getScope() const {
+  if (auto *T = dyn_cast<DIType>(this))
+    return T->getScope();
+
+  if (auto *SP = dyn_cast<DISubprogram>(this))
+    return SP->getScope();
+
+  if (auto *LB = dyn_cast<DILexicalBlockBase>(this))
+    return DIScopeRef(LB->getScope());
+
+  if (auto *NS = dyn_cast<DINamespace>(this))
+    return DIScopeRef(NS->getScope());
+
+  assert((isa<DIFile>(this) || isa<DICompileUnit>(this)) &&
+         "Unhandled type of scope.");
+  return nullptr;
+}
+
+StringRef DIScope::getName() const {
+  if (auto *T = dyn_cast<DIType>(this))
+    return T->getName();
+  if (auto *SP = dyn_cast<DISubprogram>(this))
+    return SP->getName();
+  if (auto *NS = dyn_cast<DINamespace>(this))
+    return NS->getName();
+  assert((isa<DILexicalBlockBase>(this) || isa<DIFile>(this) ||
+          isa<DICompileUnit>(this)) &&
+         "Unhandled type of scope.");
+  return "";
+}
+
+static StringRef getString(const MDString *S) {
+  if (S)
+    return S->getString();
+  return StringRef();
+}
+
+#ifndef NDEBUG
+static bool isCanonical(const MDString *S) {
+  return !S || !S->getString().empty();
+}
+#endif
+
+GenericDINode *GenericDINode::getImpl(LLVMContext &Context, unsigned Tag,
+                                      MDString *Header,
+                                      ArrayRef<Metadata *> DwarfOps,
+                                      StorageType Storage, bool ShouldCreate) {
+  unsigned Hash = 0;
+  if (Storage == Uniqued) {
+    GenericDINodeInfo::KeyTy Key(Tag, getString(Header), DwarfOps);
+    if (auto *N = getUniqued(Context.pImpl->GenericDINodes, Key))
+      return N;
+    if (!ShouldCreate)
+      return nullptr;
+    Hash = Key.getHash();
+  } else {
+    assert(ShouldCreate && "Expected non-uniqued nodes to always be created");
+  }
+
+  // Use a nullptr for empty headers.
+  assert(isCanonical(Header) && "Expected canonical MDString");
+  Metadata *PreOps[] = {Header};
+  return storeImpl(new (DwarfOps.size() + 1) GenericDINode(
+                       Context, Storage, Hash, Tag, PreOps, DwarfOps),
+                   Storage, Context.pImpl->GenericDINodes);
+}
+
+void GenericDINode::recalculateHash() {
+  setHash(GenericDINodeInfo::KeyTy::calculateHash(this));
+}
+
+#define UNWRAP_ARGS_IMPL(...) __VA_ARGS__
+#define UNWRAP_ARGS(ARGS) UNWRAP_ARGS_IMPL ARGS
+#define DEFINE_GETIMPL_LOOKUP(CLASS, ARGS)                                     \
+  do {                                                                         \
+    if (Storage == Uniqued) {                                                  \
+      if (auto *N = getUniqued(Context.pImpl->CLASS##s,                        \
+                               CLASS##Info::KeyTy(UNWRAP_ARGS(ARGS))))         \
+        return N;                                                              \
+      if (!ShouldCreate)                                                       \
+        return nullptr;                                                        \
+    } else {                                                                   \
+      assert(ShouldCreate &&                                                   \
+             "Expected non-uniqued nodes to always be created");               \
+    }                                                                          \
+  } while (false)
+#define DEFINE_GETIMPL_STORE(CLASS, ARGS, OPS)                                 \
+  return storeImpl(new (ArrayRef<Metadata *>(OPS).size())                      \
+                       CLASS(Context, Storage, UNWRAP_ARGS(ARGS), OPS),        \
+                   Storage, Context.pImpl->CLASS##s)
+#define DEFINE_GETIMPL_STORE_NO_OPS(CLASS, ARGS)                               \
+  return storeImpl(new (0u) CLASS(Context, Storage, UNWRAP_ARGS(ARGS)),        \
+                   Storage, Context.pImpl->CLASS##s)
+#define DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(CLASS, OPS)                   \
+  return storeImpl(new (ArrayRef<Metadata *>(OPS).size())                      \
+                       CLASS(Context, Storage, OPS),                           \
+                   Storage, Context.pImpl->CLASS##s)
+
+DISubrange *DISubrange::getImpl(LLVMContext &Context, int64_t Count, int64_t Lo,
+                                StorageType Storage, bool ShouldCreate) {
+  DEFINE_GETIMPL_LOOKUP(DISubrange, (Count, Lo));
+  DEFINE_GETIMPL_STORE_NO_OPS(DISubrange, (Count, Lo));
+}
+
+DIEnumerator *DIEnumerator::getImpl(LLVMContext &Context, int64_t Value,
+                                    MDString *Name, StorageType Storage,
+                                    bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DIEnumerator, (Value, getString(Name)));
+  Metadata *Ops[] = {Name};
+  DEFINE_GETIMPL_STORE(DIEnumerator, (Value), Ops);
+}
+
+DIBasicType *DIBasicType::getImpl(LLVMContext &Context, unsigned Tag,
+                                  MDString *Name, uint64_t SizeInBits,
+                                  uint64_t AlignInBits, unsigned Encoding,
+                                  StorageType Storage, bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(
+      DIBasicType, (Tag, getString(Name), SizeInBits, AlignInBits, Encoding));
+  Metadata *Ops[] = {nullptr, nullptr, Name};
+  DEFINE_GETIMPL_STORE(DIBasicType, (Tag, SizeInBits, AlignInBits, Encoding),
+                       Ops);
+}
+
+DIDerivedType *DIDerivedType::getImpl(
+    LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
+    unsigned Line, Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
+    uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
+    Metadata *ExtraData, StorageType Storage, bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DIDerivedType, (Tag, getString(Name), File, Line, Scope,
+                                        BaseType, SizeInBits, AlignInBits,
+                                        OffsetInBits, Flags, ExtraData));
+  Metadata *Ops[] = {File, Scope, Name, BaseType, ExtraData};
+  DEFINE_GETIMPL_STORE(
+      DIDerivedType, (Tag, Line, SizeInBits, AlignInBits, OffsetInBits, Flags),
+      Ops);
+}
+
+DICompositeType *DICompositeType::getImpl(
+    LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
+    unsigned Line, Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
+    uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
+    Metadata *Elements, unsigned RuntimeLang, Metadata *VTableHolder,
+    Metadata *TemplateParams, MDString *Identifier, StorageType Storage,
+    bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DICompositeType,
+                        (Tag, getString(Name), File, Line, Scope, BaseType,
+                         SizeInBits, AlignInBits, OffsetInBits, Flags, Elements,
+                         RuntimeLang, VTableHolder, TemplateParams,
+                         getString(Identifier)));
+  Metadata *Ops[] = {File,     Scope,        Name,           BaseType,
+                     Elements, VTableHolder, TemplateParams, Identifier};
+  DEFINE_GETIMPL_STORE(DICompositeType, (Tag, Line, RuntimeLang, SizeInBits,
+                                         AlignInBits, OffsetInBits, Flags),
+                       Ops);
+}
+
+DISubroutineType *DISubroutineType::getImpl(LLVMContext &Context,
+                                            unsigned Flags, Metadata *TypeArray,
+                                            StorageType Storage,
+                                            bool ShouldCreate) {
+  DEFINE_GETIMPL_LOOKUP(DISubroutineType, (Flags, TypeArray));
+  Metadata *Ops[] = {nullptr,   nullptr, nullptr, nullptr,
+                     TypeArray, nullptr, nullptr, nullptr};
+  DEFINE_GETIMPL_STORE(DISubroutineType, (Flags), Ops);
+}
+
+DIFile *DIFile::getImpl(LLVMContext &Context, MDString *Filename,
+                        MDString *Directory, StorageType Storage,
+                        bool ShouldCreate) {
+  assert(isCanonical(Filename) && "Expected canonical MDString");
+  assert(isCanonical(Directory) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DIFile, (getString(Filename), getString(Directory)));
+  Metadata *Ops[] = {Filename, Directory};
+  DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(DIFile, Ops);
+}
+
+DICompileUnit *DICompileUnit::getImpl(
+    LLVMContext &Context, unsigned SourceLanguage, Metadata *File,
+    MDString *Producer, bool IsOptimized, MDString *Flags,
+    unsigned RuntimeVersion, MDString *SplitDebugFilename,
+    unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
+    Metadata *Subprograms, Metadata *GlobalVariables,
+    Metadata *ImportedEntities, uint64_t DWOId,
+    StorageType Storage, bool ShouldCreate) {
+  assert(isCanonical(Producer) && "Expected canonical MDString");
+  assert(isCanonical(Flags) && "Expected canonical MDString");
+  assert(isCanonical(SplitDebugFilename) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(
+      DICompileUnit,
+      (SourceLanguage, File, getString(Producer), IsOptimized, getString(Flags),
+       RuntimeVersion, getString(SplitDebugFilename), EmissionKind, EnumTypes,
+       RetainedTypes, Subprograms, GlobalVariables, ImportedEntities, DWOId));
+  Metadata *Ops[] = {File, Producer, Flags, SplitDebugFilename, EnumTypes,
+                     RetainedTypes, Subprograms, GlobalVariables,
+                     ImportedEntities};
+  DEFINE_GETIMPL_STORE(
+      DICompileUnit,
+      (SourceLanguage, IsOptimized, RuntimeVersion, EmissionKind, DWOId), Ops);
+}
+
+DISubprogram *DILocalScope::getSubprogram() const {
+  if (auto *Block = dyn_cast<DILexicalBlockBase>(this))
+    return Block->getScope()->getSubprogram();
+  return const_cast<DISubprogram *>(cast<DISubprogram>(this));
+}
+
+DISubprogram *DISubprogram::getImpl(
+    LLVMContext &Context, Metadata *Scope, MDString *Name,
+    MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
+    bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
+    Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
+    unsigned Flags, bool IsOptimized, Metadata *Function,
+    Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables,
+    StorageType Storage, bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  assert(isCanonical(LinkageName) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DISubprogram,
+                        (Scope, getString(Name), getString(LinkageName), File,
+                         Line, Type, IsLocalToUnit, IsDefinition, ScopeLine,
+                         ContainingType, Virtuality, VirtualIndex, Flags,
+                         IsOptimized, Function, TemplateParams, Declaration,
+                         Variables));
+  Metadata *Ops[] = {File,           Scope,       Name,           Name,
+                     LinkageName,    Type,        ContainingType, Function,
+                     TemplateParams, Declaration, Variables};
+  DEFINE_GETIMPL_STORE(DISubprogram,
+                       (Line, ScopeLine, Virtuality, VirtualIndex, Flags,
+                        IsLocalToUnit, IsDefinition, IsOptimized),
+                       Ops);
+}
+
+Function *DISubprogram::getFunction() const {
+  // FIXME: Should this be looking through bitcasts?
+  return dyn_cast_or_null<Function>(getFunctionConstant());
+}
+
+bool DISubprogram::describes(const Function *F) const {
+  assert(F && "Invalid function");
+  if (F == getFunction())
+    return true;
+  StringRef Name = getLinkageName();
+  if (Name.empty())
+    Name = getName();
+  return F->getName() == Name;
+}
+
+void DISubprogram::replaceFunction(Function *F) {
+  replaceFunction(F ? ConstantAsMetadata::get(F)
+                    : static_cast<ConstantAsMetadata *>(nullptr));
+}
+
+DILexicalBlock *DILexicalBlock::getImpl(LLVMContext &Context, Metadata *Scope,
+                                        Metadata *File, unsigned Line,
+                                        unsigned Column, StorageType Storage,
+                                        bool ShouldCreate) {
+  assert(Scope && "Expected scope");
+  DEFINE_GETIMPL_LOOKUP(DILexicalBlock, (Scope, File, Line, Column));
+  Metadata *Ops[] = {File, Scope};
+  DEFINE_GETIMPL_STORE(DILexicalBlock, (Line, Column), Ops);
+}
+
+DILexicalBlockFile *DILexicalBlockFile::getImpl(LLVMContext &Context,
+                                                Metadata *Scope, Metadata *File,
+                                                unsigned Discriminator,
+                                                StorageType Storage,
+                                                bool ShouldCreate) {
+  assert(Scope && "Expected scope");
+  DEFINE_GETIMPL_LOOKUP(DILexicalBlockFile, (Scope, File, Discriminator));
+  Metadata *Ops[] = {File, Scope};
+  DEFINE_GETIMPL_STORE(DILexicalBlockFile, (Discriminator), Ops);
+}
+
+DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
+                                  Metadata *File, MDString *Name, unsigned Line,
+                                  StorageType Storage, bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DINamespace, (Scope, File, getString(Name), Line));
+  Metadata *Ops[] = {File, Scope, Name};
+  DEFINE_GETIMPL_STORE(DINamespace, (Line), Ops);
+}
+
+DITemplateTypeParameter *DITemplateTypeParameter::getImpl(LLVMContext &Context,
+                                                          MDString *Name,
+                                                          Metadata *Type,
+                                                          StorageType Storage,
+                                                          bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DITemplateTypeParameter, (getString(Name), Type));
+  Metadata *Ops[] = {Name, Type};
+  DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(DITemplateTypeParameter, Ops);
+}
+
+DITemplateValueParameter *DITemplateValueParameter::getImpl(
+    LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *Type,
+    Metadata *Value, StorageType Storage, bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DITemplateValueParameter,
+                        (Tag, getString(Name), Type, Value));
+  Metadata *Ops[] = {Name, Type, Value};
+  DEFINE_GETIMPL_STORE(DITemplateValueParameter, (Tag), Ops);
+}
+
+DIGlobalVariable *
+DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
+                          MDString *LinkageName, Metadata *File, unsigned Line,
+                          Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
+                          Metadata *Variable,
+                          Metadata *StaticDataMemberDeclaration,
+                          StorageType Storage, bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  assert(isCanonical(LinkageName) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DIGlobalVariable,
+                        (Scope, getString(Name), getString(LinkageName), File,
+                         Line, Type, IsLocalToUnit, IsDefinition, Variable,
+                         StaticDataMemberDeclaration));
+  Metadata *Ops[] = {Scope, Name,        File,     Type,
+                     Name,  LinkageName, Variable, StaticDataMemberDeclaration};
+  DEFINE_GETIMPL_STORE(DIGlobalVariable, (Line, IsLocalToUnit, IsDefinition),
+                       Ops);
+}
+
+DILocalVariable *DILocalVariable::getImpl(LLVMContext &Context, unsigned Tag,
+                                          Metadata *Scope, MDString *Name,
+                                          Metadata *File, unsigned Line,
+                                          Metadata *Type, unsigned Arg,
+                                          unsigned Flags, StorageType Storage,
+                                          bool ShouldCreate) {
+  // 64K ought to be enough for any frontend.
+  assert(Arg <= UINT16_MAX && "Expected argument number to fit in 16-bits");
+
+  assert(Scope && "Expected scope");
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DILocalVariable, (Tag, Scope, getString(Name), File,
+                                          Line, Type, Arg, Flags));
+  Metadata *Ops[] = {Scope, Name, File, Type};
+  DEFINE_GETIMPL_STORE(DILocalVariable, (Tag, Line, Arg, Flags), Ops);
+}
+
+DIExpression *DIExpression::getImpl(LLVMContext &Context,
+                                    ArrayRef<uint64_t> Elements,
+                                    StorageType Storage, bool ShouldCreate) {
+  DEFINE_GETIMPL_LOOKUP(DIExpression, (Elements));
+  DEFINE_GETIMPL_STORE_NO_OPS(DIExpression, (Elements));
+}
+
+unsigned DIExpression::ExprOperand::getSize() const {
+  switch (getOp()) {
+  case dwarf::DW_OP_bit_piece:
+    return 3;
+  case dwarf::DW_OP_plus:
+    return 2;
+  default:
+    return 1;
+  }
+}
+
+bool DIExpression::isValid() const {
+  for (auto I = expr_op_begin(), E = expr_op_end(); I != E; ++I) {
+    // Check that there's space for the operand.
+    if (I->get() + I->getSize() > E->get())
+      return false;
+
+    // Check that the operand is valid.
+    switch (I->getOp()) {
+    default:
+      return false;
+    case dwarf::DW_OP_bit_piece:
+      // Piece expressions must be at the end.
+      return I->get() + I->getSize() == E->get();
+    case dwarf::DW_OP_plus:
+    case dwarf::DW_OP_deref:
+      break;
+    }
+  }
+  return true;
+}
+
+bool DIExpression::isBitPiece() const {
+  assert(isValid() && "Expected valid expression");
+  if (unsigned N = getNumElements())
+    if (N >= 3)
+      return getElement(N - 3) == dwarf::DW_OP_bit_piece;
+  return false;
+}
+
+uint64_t DIExpression::getBitPieceOffset() const {
+  assert(isBitPiece() && "Expected bit piece");
+  return getElement(getNumElements() - 2);
+}
+
+uint64_t DIExpression::getBitPieceSize() const {
+  assert(isBitPiece() && "Expected bit piece");
+  return getElement(getNumElements() - 1);
+}
+
+DIObjCProperty *DIObjCProperty::getImpl(
+    LLVMContext &Context, MDString *Name, Metadata *File, unsigned Line,
+    MDString *GetterName, MDString *SetterName, unsigned Attributes,
+    Metadata *Type, StorageType Storage, bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  assert(isCanonical(GetterName) && "Expected canonical MDString");
+  assert(isCanonical(SetterName) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DIObjCProperty,
+                        (getString(Name), File, Line, getString(GetterName),
+                         getString(SetterName), Attributes, Type));
+  Metadata *Ops[] = {Name, File, GetterName, SetterName, Type};
+  DEFINE_GETIMPL_STORE(DIObjCProperty, (Line, Attributes), Ops);
+}
+
+DIImportedEntity *DIImportedEntity::getImpl(LLVMContext &Context, unsigned Tag,
+                                            Metadata *Scope, Metadata *Entity,
+                                            unsigned Line, MDString *Name,
+                                            StorageType Storage,
+                                            bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DIImportedEntity,
+                        (Tag, Scope, Entity, Line, getString(Name)));
+  Metadata *Ops[] = {Scope, Entity, Name};
+  DEFINE_GETIMPL_STORE(DIImportedEntity, (Tag, Line), Ops);
+}
diff --git a/contrib/llvm/lib/IR/DebugLoc.cpp b/contrib/llvm/lib/IR/DebugLoc.cpp
index 075f295..72d5c0e 100644
--- a/contrib/llvm/lib/IR/DebugLoc.cpp
+++ b/contrib/llvm/lib/IR/DebugLoc.cpp
@@ -16,106 +16,87 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 // DebugLoc Implementation
 //===----------------------------------------------------------------------===//
+DebugLoc::DebugLoc(const DILocation *L) : Loc(const_cast<DILocation *>(L)) {}
+DebugLoc::DebugLoc(const MDNode *L) : Loc(const_cast<MDNode *>(L)) {}
 
-unsigned DebugLoc::getLine() const { return DILocation(Loc).getLineNumber(); }
-unsigned DebugLoc::getCol() const { return DILocation(Loc).getColumnNumber(); }
+DILocation *DebugLoc::get() const {
+  return cast_or_null<DILocation>(Loc.get());
+}
+
+unsigned DebugLoc::getLine() const {
+  assert(get() && "Expected valid DebugLoc");
+  return get()->getLine();
+}
 
-MDNode *DebugLoc::getScope() const { return DILocation(Loc).getScope(); }
+unsigned DebugLoc::getCol() const {
+  assert(get() && "Expected valid DebugLoc");
+  return get()->getColumn();
+}
 
-MDNode *DebugLoc::getInlinedAt() const {
-  return DILocation(Loc).getOrigLocation();
+MDNode *DebugLoc::getScope() const {
+  assert(get() && "Expected valid DebugLoc");
+  return get()->getScope();
 }
 
-/// Return both the Scope and the InlinedAt values.
-void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA) const {
-  Scope = getScope();
-  IA = getInlinedAt();
+DILocation *DebugLoc::getInlinedAt() const {
+  assert(get() && "Expected valid DebugLoc");
+  return get()->getInlinedAt();
 }
 
-MDNode *DebugLoc::getScopeNode() const {
-  if (MDNode *InlinedAt = getInlinedAt())
-    return DebugLoc::getFromDILocation(InlinedAt).getScopeNode();
-  return getScope();
+MDNode *DebugLoc::getInlinedAtScope() const {
+  return cast<DILocation>(Loc)->getInlinedAtScope();
 }
 
 DebugLoc DebugLoc::getFnDebugLoc() const {
-  const MDNode *Scope = getScopeNode();
-  DISubprogram SP = getDISubprogram(Scope);
-  if (SP.isSubprogram())
-    return DebugLoc::get(SP.getScopeLineNumber(), 0, SP);
+  // FIXME: Add a method on \a DILocation that does this work.
+  const MDNode *Scope = getInlinedAtScope();
+  if (auto *SP = getDISubprogram(Scope))
+    return DebugLoc::get(SP->getScopeLine(), 0, SP);
 
   return DebugLoc();
 }
 
-DebugLoc DebugLoc::get(unsigned Line, unsigned Col,
-                       MDNode *Scope, MDNode *InlinedAt) {
+DebugLoc DebugLoc::get(unsigned Line, unsigned Col, const MDNode *Scope,
+                       const MDNode *InlinedAt) {
   // If no scope is available, this is an unknown location.
   if (!Scope)
     return DebugLoc();
 
-  // Saturate line and col to "unknown".
-  // FIXME: Allow 16-bits for columns.
-  if (Col > 255) Col = 0;
-  if (Line >= (1 << 24)) Line = 0;
-
-  return getFromDILocation(
-      MDLocation::get(Scope->getContext(), Line, Col, Scope, InlinedAt));
-}
-
-/// getAsMDNode - This method converts the compressed DebugLoc node into a
-/// DILocation-compatible MDNode.
-MDNode *DebugLoc::getAsMDNode() const { return Loc; }
-
-/// getFromDILocation - Translate the DILocation quad into a DebugLoc.
-DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
-  DebugLoc Loc;
-  Loc.Loc.reset(N);
-  return Loc;
-}
-
-/// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc.
-DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) {
-  DILexicalBlock LexBlock(N);
-  MDNode *Scope = LexBlock.getContext();
-  if (!Scope) return DebugLoc();
-  return get(LexBlock.getLineNumber(), LexBlock.getColumnNumber(), Scope,
-             nullptr);
+  return DILocation::get(Scope->getContext(), Line, Col,
+                         const_cast<MDNode *>(Scope),
+                         const_cast<MDNode *>(InlinedAt));
 }
 
 void DebugLoc::dump() const {
 #ifndef NDEBUG
-  if (!isUnknown()) {
-    dbgs() << getLine();
-    if (getCol() != 0)
-      dbgs() << ',' << getCol();
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt());
-    if (!InlinedAtDL.isUnknown()) {
-      dbgs() << " @ ";
-      InlinedAtDL.dump();
-    } else
-      dbgs() << "\n";
-  }
+  if (!Loc)
+    return;
+
+  dbgs() << getLine();
+  if (getCol() != 0)
+    dbgs() << ',' << getCol();
+  if (DebugLoc InlinedAtDL = DebugLoc(getInlinedAt())) {
+    dbgs() << " @ ";
+    InlinedAtDL.dump();
+  } else
+    dbgs() << "\n";
 #endif
 }
 
 void DebugLoc::print(raw_ostream &OS) const {
-  if (!isUnknown()) {
-    // Print source line info.
-    DIScope Scope(getScope());
-    assert((!Scope || Scope.isScope()) &&
-           "Scope of a DebugLoc should be null or a DIScope.");
-    if (Scope)
-      OS << Scope.getFilename();
-    else
-      OS << "<unknown>";
-    OS << ':' << getLine();
-    if (getCol() != 0)
-      OS << ':' << getCol();
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt());
-    if (!InlinedAtDL.isUnknown()) {
-      OS << " @[ ";
-      InlinedAtDL.print(OS);
-      OS << " ]";
-    }
+  if (!Loc)
+    return;
+
+  // Print source line info.
+  auto *Scope = cast<DIScope>(getScope());
+  OS << Scope->getFilename();
+  OS << ':' << getLine();
+  if (getCol() != 0)
+    OS << ':' << getCol();
+
+  if (DebugLoc InlinedAtDL = getInlinedAt()) {
+    OS << " @[ ";
+    InlinedAtDL.print(OS);
+    OS << " ]";
   }
 }
diff --git a/contrib/llvm/lib/IR/DiagnosticInfo.cpp b/contrib/llvm/lib/IR/DiagnosticInfo.cpp
index cfb699a..45be100 100644
--- a/contrib/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/contrib/llvm/lib/IR/DiagnosticInfo.cpp
@@ -129,16 +129,17 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const {
 }
 
 bool DiagnosticInfoOptimizationBase::isLocationAvailable() const {
-  return getDebugLoc().isUnknown() == false;
+  return getDebugLoc();
 }
 
 void DiagnosticInfoOptimizationBase::getLocation(StringRef *Filename,
                                                  unsigned *Line,
                                                  unsigned *Column) const {
-  DILocation DIL(getDebugLoc().getAsMDNode(getFunction().getContext()));
-  *Filename = DIL.getFilename();
-  *Line = DIL.getLineNumber();
-  *Column = DIL.getColumnNumber();
+  DILocation *L = getDebugLoc();
+  assert(L != nullptr && "debug location is invalid");
+  *Filename = L->getFilename();
+  *Line = L->getLine();
+  *Column = L->getColumn();
 }
 
 const std::string DiagnosticInfoOptimizationBase::getLocationStr() const {
@@ -147,7 +148,7 @@ const std::string DiagnosticInfoOptimizationBase::getLocationStr() const {
   unsigned Column = 0;
   if (isLocationAvailable())
     getLocation(&Filename, &Line, &Column);
-  return Twine(Filename + ":" + Twine(Line) + ":" + Twine(Column)).str();
+  return (Filename + ":" + Twine(Line) + ":" + Twine(Column)).str();
 }
 
 void DiagnosticInfoOptimizationBase::print(DiagnosticPrinter &DP) const {
diff --git a/contrib/llvm/lib/IR/Dominators.cpp b/contrib/llvm/lib/IR/Dominators.cpp
index 9b6ff1e..e325889 100644
--- a/contrib/llvm/lib/IR/Dominators.cpp
+++ b/contrib/llvm/lib/IR/Dominators.cpp
@@ -282,9 +282,6 @@ bool DominatorTree::isReachableFromEntry(const Use &U) const {
 }
 
 void DominatorTree::verifyDomTree() const {
-  if (!VerifyDomInfo)
-    return;
-
   Function &F = *getRoot()->getParent();
 
   DominatorTree OtherDT;
@@ -350,7 +347,10 @@ bool DominatorTreeWrapperPass::runOnFunction(Function &F) {
   return false;
 }
 
-void DominatorTreeWrapperPass::verifyAnalysis() const { DT.verifyDomTree(); }
+void DominatorTreeWrapperPass::verifyAnalysis() const {
+    if (VerifyDomInfo)
+      DT.verifyDomTree();
+}
 
 void DominatorTreeWrapperPass::print(raw_ostream &OS, const Module *) const {
   DT.print(OS);
diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp
index bfd0ca6..cf8e3ed 100644
--- a/contrib/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm/lib/IR/Function.cpp
@@ -19,10 +19,13 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/RWMutex.h"
@@ -117,6 +120,12 @@ uint64_t Argument::getDereferenceableBytes() const {
   return getParent()->getDereferenceableBytes(getArgNo()+1);
 }
 
+uint64_t Argument::getDereferenceableOrNullBytes() const {
+  assert(getType()->isPointerTy() &&
+         "Only pointers have dereferenceable bytes");
+  return getParent()->getDereferenceableOrNullBytes(getArgNo()+1);
+}
+
 /// hasNestAttr - Return true if this argument has the nest attribute on
 /// it in its containing function.
 bool Argument::hasNestAttr() const {
@@ -206,18 +215,18 @@ void Argument::removeAttr(AttributeSet AS) {
 //===----------------------------------------------------------------------===//
 
 bool Function::isMaterializable() const {
-  return getGlobalObjectSubClassData();
+  return getGlobalObjectSubClassData() & IsMaterializableBit;
 }
 
-void Function::setIsMaterializable(bool V) { setGlobalObjectSubClassData(V); }
+void Function::setIsMaterializable(bool V) {
+  setGlobalObjectBit(IsMaterializableBit, V);
+}
 
 LLVMContext &Function::getContext() const {
   return getType()->getContext();
 }
 
-FunctionType *Function::getFunctionType() const {
-  return cast<FunctionType>(getType()->getElementType());
-}
+FunctionType *Function::getFunctionType() const { return Ty; }
 
 bool Function::isVarArg() const {
   return getFunctionType()->isVarArg();
@@ -242,10 +251,11 @@ void Function::eraseFromParent() {
 Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
                    Module *ParentModule)
     : GlobalObject(PointerType::getUnqual(Ty), Value::FunctionVal, nullptr, 0,
-                   Linkage, name) {
+                   Linkage, name),
+      Ty(Ty) {
   assert(FunctionType::isValidReturnType(getReturnType()) &&
          "invalid return type");
-  setIsMaterializable(false);
+  setGlobalObjectSubClassData(0);
   SymTab = new ValueSymbolTable();
 
   // If the function has arguments, mark them as lazily built.
@@ -256,9 +266,10 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
     ParentModule->getFunctionList().push_back(this);
 
   // Ensure intrinsics have the right parameter attributes.
-  if (unsigned IID = getIntrinsicID())
-    setAttributes(Intrinsic::getAttributes(getContext(), Intrinsic::ID(IID)));
-
+  // Note, the IntID field will have been set in Value::setName if this function
+  // name is a valid intrinsic ID.
+  if (IntID)
+    setAttributes(Intrinsic::getAttributes(getContext(), IntID));
 }
 
 Function::~Function() {
@@ -270,10 +281,6 @@ Function::~Function() {
 
   // Remove the function from the on-the-side GC table.
   clearGC();
-
-  // Remove the intrinsicID from the Cache.
-  if (getValueName() && isIntrinsic())
-    getContext().pImpl->IntrinsicIDCache.erase(this);
 }
 
 void Function::BuildLazyArguments() const {
@@ -323,6 +330,9 @@ void Function::dropAllReferences() {
   // Prefix and prologue data are stored in a side table.
   setPrefixData(nullptr);
   setPrologueData(nullptr);
+
+  // Metadata is stored in a side-table.
+  clearMetadata();
 }
 
 void Function::addAttribute(unsigned i, Attribute::AttrKind attr) {
@@ -343,6 +353,18 @@ void Function::removeAttributes(unsigned i, AttributeSet attrs) {
   setAttributes(PAL);
 }
 
+void Function::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
+  setAttributes(PAL);
+}
+
+void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
+  setAttributes(PAL);
+}
+
 // Maintain the GC name for each function in an on-the-side table. This saves
 // allocating an additional word in Function for programs which do not use GC
 // (i.e., most programs) at the cost of increased overhead for clients which do
@@ -408,33 +430,9 @@ void Function::copyAttributesFrom(const GlobalValue *Src) {
     setPrologueData(nullptr);
 }
 
-/// getIntrinsicID - This method returns the ID number of the specified
-/// function, or Intrinsic::not_intrinsic if the function is not an
-/// intrinsic, or if the pointer is null.  This value is always defined to be
-/// zero to allow easy checking for whether a function is intrinsic or not.  The
-/// particular intrinsic functions which correspond to this value are defined in
-/// llvm/Intrinsics.h.  Results are cached in the LLVM context, subsequent
-/// requests for the same ID return results much faster from the cache.
-///
-unsigned Function::getIntrinsicID() const {
-  const ValueName *ValName = this->getValueName();
-  if (!ValName || !isIntrinsic())
-    return 0;
-
-  LLVMContextImpl::IntrinsicIDCacheTy &IntrinsicIDCache =
-    getContext().pImpl->IntrinsicIDCache;
-  if (!IntrinsicIDCache.count(this)) {
-    unsigned Id = lookupIntrinsicID();
-    IntrinsicIDCache[this]=Id;
-    return Id;
-  }
-  return IntrinsicIDCache[this];
-}
-
-/// This private method does the actual lookup of an intrinsic ID when the query
-/// could not be answered from the cache.
-unsigned Function::lookupIntrinsicID() const {
-  const ValueName *ValName = this->getValueName();
+/// \brief This does the actual lookup of an intrinsic ID which
+/// matches the given function name.
+static Intrinsic::ID lookupIntrinsicID(const ValueName *ValName) {
   unsigned Len = ValName->getKeyLength();
   const char *Name = ValName->getKeyData();
 
@@ -442,7 +440,16 @@ unsigned Function::lookupIntrinsicID() const {
 #include "llvm/IR/Intrinsics.gen"
 #undef GET_FUNCTION_RECOGNIZER
 
-  return 0;
+  return Intrinsic::not_intrinsic;
+}
+
+void Function::recalculateIntrinsicID() {
+  const ValueName *ValName = this->getValueName();
+  if (!ValName || !isIntrinsic()) {
+    IntID = Intrinsic::not_intrinsic;
+    return;
+  }
+  IntID = lookupIntrinsicID(ValName);
 }
 
 /// Returns a stable mangling for the type specified for use in the name
@@ -455,6 +462,10 @@ unsigned Function::lookupIntrinsicID() const {
 /// which can't be confused with it's prefix.  This ensures we don't have
 /// collisions between two unrelated function types. Otherwise, you might
 /// parse ffXX as f(fXX) or f(fX)X.  (X is a placeholder for any other type.)
+/// Manglings of integers, floats, and vectors ('i', 'f', and 'v' prefix in most
+/// cases) fall back to the MVT codepath, where they could be mangled to
+/// 'x86mmx', for example; matching on derived types is not sufficient to mangle
+/// everything.
 static std::string getMangledTypeStr(Type* Ty) {
   std::string Result;
   if (PointerType* PTyp = dyn_cast<PointerType>(Ty)) {
@@ -464,10 +475,8 @@ static std::string getMangledTypeStr(Type* Ty) {
     Result += "a" + llvm::utostr(ATyp->getNumElements()) +
       getMangledTypeStr(ATyp->getElementType());
   } else if (StructType* STyp = dyn_cast<StructType>(Ty)) {
-    if (!STyp->isLiteral())
-      Result += STyp->getName();
-    else
-      llvm_unreachable("TODO: implement literal types");
+    assert(!STyp->isLiteral() && "TODO: implement literal types");
+    Result += STyp->getName();
   } else if (FunctionType* FT = dyn_cast<FunctionType>(Ty)) {
     Result += "f_" + getMangledTypeStr(FT->getReturnType());
     for (size_t i = 0; i < FT->getNumParams(); i++)
@@ -538,7 +547,9 @@ enum IIT_Info {
   IIT_VARARG = 28,
   IIT_HALF_VEC_ARG = 29,
   IIT_SAME_VEC_WIDTH_ARG = 30,
-  IIT_PTR_TO_ARG = 31
+  IIT_PTR_TO_ARG = 31,
+  IIT_VEC_OF_PTRS_TO_ELT = 32,
+  IIT_I128 = 33
 };
 
 
@@ -585,6 +596,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
   case IIT_I64:
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64));
     return;
+  case IIT_I128:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 128));
+    return;
   case IIT_V1:
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 1));
     DecodeIITType(NextElt, Infos, OutputTable);
@@ -658,6 +672,12 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
                                              ArgInfo));
     return;
   }
+  case IIT_VEC_OF_PTRS_TO_ELT: {
+    unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfPtrsToElt,
+                                             ArgInfo));
+    return;
+  }
   case IIT_EMPTYSTRUCT:
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0));
     return;
@@ -777,6 +797,15 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
     Type *Ty = Tys[D.getArgumentNumber()];
     return PointerType::getUnqual(Ty);
   }
+  case IITDescriptor::VecOfPtrsToElt: {
+    Type *Ty = Tys[D.getArgumentNumber()];
+    VectorType *VTy = dyn_cast<VectorType>(Ty);
+    if (!VTy)
+      llvm_unreachable("Expected an argument of Vector Type");
+    Type *EltTy = VTy->getVectorElementType();
+    return VectorType::get(PointerType::getUnqual(EltTy),
+                           VTy->getNumElements());
+  }
  }
   llvm_unreachable("unhandled");
 }
@@ -933,3 +962,19 @@ void Function::setPrologueData(Constant *PrologueData) {
   }
   setValueSubclassData(PDData);
 }
+
+void Function::setEntryCount(uint64_t Count) {
+  MDBuilder MDB(getContext());
+  setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count));
+}
+
+Optional<uint64_t> Function::getEntryCount() const {
+  MDNode *MD = getMetadata(LLVMContext::MD_prof);
+  if (MD && MD->getOperand(0))
+    if (MDString *MDS = dyn_cast<MDString>(MD->getOperand(0)))
+      if (MDS->getString().equals("function_entry_count")) {
+        ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(1));
+        return CI->getValue().getZExtValue();
+      }
+  return None;
+}
diff --git a/contrib/llvm/lib/IR/GCOV.cpp b/contrib/llvm/lib/IR/GCOV.cpp
index 88e0cd0..6ed58913 100644
--- a/contrib/llvm/lib/IR/GCOV.cpp
+++ b/contrib/llvm/lib/IR/GCOV.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <system_error>
 using namespace llvm;
@@ -28,12 +29,16 @@ using namespace llvm;
 
 /// readGCNO - Read GCNO buffer.
 bool GCOVFile::readGCNO(GCOVBuffer &Buffer) {
-  if (!Buffer.readGCNOFormat()) return false;
-  if (!Buffer.readGCOVVersion(Version)) return false;
+  if (!Buffer.readGCNOFormat())
+    return false;
+  if (!Buffer.readGCOVVersion(Version))
+    return false;
 
-  if (!Buffer.readInt(Checksum)) return false;
+  if (!Buffer.readInt(Checksum))
+    return false;
   while (true) {
-    if (!Buffer.readFunctionTag()) break;
+    if (!Buffer.readFunctionTag())
+      break;
     auto GFun = make_unique<GCOVFunction>(*this);
     if (!GFun->readGCNO(Buffer, Version))
       return false;
@@ -48,19 +53,22 @@ bool GCOVFile::readGCNO(GCOVBuffer &Buffer) {
 /// called after readGCNO().
 bool GCOVFile::readGCDA(GCOVBuffer &Buffer) {
   assert(GCNOInitialized && "readGCDA() can only be called after readGCNO()");
-  if (!Buffer.readGCDAFormat()) return false;
+  if (!Buffer.readGCDAFormat())
+    return false;
   GCOV::GCOVVersion GCDAVersion;
-  if (!Buffer.readGCOVVersion(GCDAVersion)) return false;
+  if (!Buffer.readGCOVVersion(GCDAVersion))
+    return false;
   if (Version != GCDAVersion) {
     errs() << "GCOV versions do not match.\n";
     return false;
   }
 
   uint32_t GCDAChecksum;
-  if (!Buffer.readInt(GCDAChecksum)) return false;
+  if (!Buffer.readInt(GCDAChecksum))
+    return false;
   if (Checksum != GCDAChecksum) {
-    errs() << "File checksums do not match: " << Checksum << " != "
-           << GCDAChecksum << ".\n";
+    errs() << "File checksums do not match: " << Checksum
+           << " != " << GCDAChecksum << ".\n";
     return false;
   }
   for (size_t i = 0, e = Functions.size(); i < e; ++i) {
@@ -74,15 +82,20 @@ bool GCOVFile::readGCDA(GCOVBuffer &Buffer) {
   if (Buffer.readObjectTag()) {
     uint32_t Length;
     uint32_t Dummy;
-    if (!Buffer.readInt(Length)) return false;
-    if (!Buffer.readInt(Dummy)) return false; // checksum
-    if (!Buffer.readInt(Dummy)) return false; // num
-    if (!Buffer.readInt(RunCount)) return false;
-    Buffer.advanceCursor(Length-3);
+    if (!Buffer.readInt(Length))
+      return false;
+    if (!Buffer.readInt(Dummy))
+      return false; // checksum
+    if (!Buffer.readInt(Dummy))
+      return false; // num
+    if (!Buffer.readInt(RunCount))
+      return false;
+    Buffer.advanceCursor(Length - 3);
   }
   while (Buffer.readProgramTag()) {
     uint32_t Length;
-    if (!Buffer.readInt(Length)) return false;
+    if (!Buffer.readInt(Length))
+      return false;
     Buffer.advanceCursor(Length);
     ++ProgramCount;
   }
@@ -112,21 +125,28 @@ void GCOVFile::collectLineCounts(FileInfo &FI) {
 /// occurs.
 bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
   uint32_t Dummy;
-  if (!Buff.readInt(Dummy)) return false; // Function header length
-  if (!Buff.readInt(Ident)) return false;
-  if (!Buff.readInt(Checksum)) return false;
+  if (!Buff.readInt(Dummy))
+    return false; // Function header length
+  if (!Buff.readInt(Ident))
+    return false;
+  if (!Buff.readInt(Checksum))
+    return false;
   if (Version != GCOV::V402) {
     uint32_t CfgChecksum;
-    if (!Buff.readInt(CfgChecksum)) return false;
+    if (!Buff.readInt(CfgChecksum))
+      return false;
     if (Parent.getChecksum() != CfgChecksum) {
       errs() << "File checksums do not match: " << Parent.getChecksum()
              << " != " << CfgChecksum << " in (" << Name << ").\n";
       return false;
     }
   }
-  if (!Buff.readString(Name)) return false;
-  if (!Buff.readString(Filename)) return false;
-  if (!Buff.readInt(LineNumber)) return false;
+  if (!Buff.readString(Name))
+    return false;
+  if (!Buff.readString(Filename))
+    return false;
+  if (!Buff.readInt(LineNumber))
+    return false;
 
   // read blocks.
   if (!Buff.readBlockTag()) {
@@ -134,19 +154,23 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
     return false;
   }
   uint32_t BlockCount;
-  if (!Buff.readInt(BlockCount)) return false;
+  if (!Buff.readInt(BlockCount))
+    return false;
   for (uint32_t i = 0, e = BlockCount; i != e; ++i) {
-    if (!Buff.readInt(Dummy)) return false; // Block flags;
+    if (!Buff.readInt(Dummy))
+      return false; // Block flags;
     Blocks.push_back(make_unique<GCOVBlock>(*this, i));
   }
 
   // read edges.
   while (Buff.readEdgeTag()) {
     uint32_t EdgeCount;
-    if (!Buff.readInt(EdgeCount)) return false;
+    if (!Buff.readInt(EdgeCount))
+      return false;
     EdgeCount = (EdgeCount - 1) / 2;
     uint32_t BlockNo;
-    if (!Buff.readInt(BlockNo)) return false;
+    if (!Buff.readInt(BlockNo))
+      return false;
     if (BlockNo >= BlockCount) {
       errs() << "Unexpected block number: " << BlockNo << " (in " << Name
              << ").\n";
@@ -154,12 +178,14 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
     }
     for (uint32_t i = 0, e = EdgeCount; i != e; ++i) {
       uint32_t Dst;
-      if (!Buff.readInt(Dst)) return false;
+      if (!Buff.readInt(Dst))
+        return false;
       Edges.push_back(make_unique<GCOVEdge>(*Blocks[BlockNo], *Blocks[Dst]));
       GCOVEdge *Edge = Edges.back().get();
       Blocks[BlockNo]->addDstEdge(Edge);
       Blocks[Dst]->addSrcEdge(Edge);
-      if (!Buff.readInt(Dummy)) return false; // Edge flag
+      if (!Buff.readInt(Dummy))
+        return false; // Edge flag
     }
   }
 
@@ -167,11 +193,13 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
   while (Buff.readLineTag()) {
     uint32_t LineTableLength;
     // Read the length of this line table.
-    if (!Buff.readInt(LineTableLength)) return false;
-    uint32_t EndPos = Buff.getCursor() + LineTableLength*4;
+    if (!Buff.readInt(LineTableLength))
+      return false;
+    uint32_t EndPos = Buff.getCursor() + LineTableLength * 4;
     uint32_t BlockNo;
     // Read the block number this table is associated with.
-    if (!Buff.readInt(BlockNo)) return false;
+    if (!Buff.readInt(BlockNo))
+      return false;
     if (BlockNo >= BlockCount) {
       errs() << "Unexpected block number: " << BlockNo << " (in " << Name
              << ").\n";
@@ -180,13 +208,15 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
     GCOVBlock &Block = *Blocks[BlockNo];
     // Read the word that pads the beginning of the line table. This may be a
     // flag of some sort, but seems to always be zero.
-    if (!Buff.readInt(Dummy)) return false;
+    if (!Buff.readInt(Dummy))
+      return false;
 
     // Line information starts here and continues up until the last word.
     if (Buff.getCursor() != (EndPos - sizeof(uint32_t))) {
       StringRef F;
       // Read the source file name.
-      if (!Buff.readString(F)) return false;
+      if (!Buff.readString(F))
+        return false;
       if (Filename != F) {
         errs() << "Multiple sources for a single basic block: " << Filename
                << " != " << F << " (in " << Name << ").\n";
@@ -195,17 +225,21 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
       // Read lines up to, but not including, the null terminator.
       while (Buff.getCursor() < (EndPos - 2 * sizeof(uint32_t))) {
         uint32_t Line;
-        if (!Buff.readInt(Line)) return false;
+        if (!Buff.readInt(Line))
+          return false;
         // Line 0 means this instruction was injected by the compiler. Skip it.
-        if (!Line) continue;
+        if (!Line)
+          continue;
         Block.addLine(Line);
       }
       // Read the null terminator.
-      if (!Buff.readInt(Dummy)) return false;
+      if (!Buff.readInt(Dummy))
+        return false;
     }
     // The last word is either a flag or padding, it isn't clear which. Skip
     // over it.
-    if (!Buff.readInt(Dummy)) return false;
+    if (!Buff.readInt(Dummy))
+      return false;
   }
   return true;
 }
@@ -214,27 +248,31 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
 /// occurs.
 bool GCOVFunction::readGCDA(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
   uint32_t Dummy;
-  if (!Buff.readInt(Dummy)) return false; // Function header length
+  if (!Buff.readInt(Dummy))
+    return false; // Function header length
 
   uint32_t GCDAIdent;
-  if (!Buff.readInt(GCDAIdent)) return false;
+  if (!Buff.readInt(GCDAIdent))
+    return false;
   if (Ident != GCDAIdent) {
-    errs() << "Function identifiers do not match: " << Ident << " != "
-           << GCDAIdent << " (in " << Name << ").\n";
+    errs() << "Function identifiers do not match: " << Ident
+           << " != " << GCDAIdent << " (in " << Name << ").\n";
     return false;
   }
 
   uint32_t GCDAChecksum;
-  if (!Buff.readInt(GCDAChecksum)) return false;
+  if (!Buff.readInt(GCDAChecksum))
+    return false;
   if (Checksum != GCDAChecksum) {
-    errs() << "Function checksums do not match: " << Checksum << " != "
-           << GCDAChecksum << " (in " << Name << ").\n";
+    errs() << "Function checksums do not match: " << Checksum
+           << " != " << GCDAChecksum << " (in " << Name << ").\n";
     return false;
   }
 
   uint32_t CfgChecksum;
   if (Version != GCOV::V402) {
-    if (!Buff.readInt(CfgChecksum)) return false;
+    if (!Buff.readInt(CfgChecksum))
+      return false;
     if (Parent.getChecksum() != CfgChecksum) {
       errs() << "File checksums do not match: " << Parent.getChecksum()
              << " != " << CfgChecksum << " (in " << Name << ").\n";
@@ -243,7 +281,8 @@ bool GCOVFunction::readGCDA(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
   }
 
   StringRef GCDAName;
-  if (!Buff.readString(GCDAName)) return false;
+  if (!Buff.readString(GCDAName))
+    return false;
   if (Name != GCDAName) {
     errs() << "Function names do not match: " << Name << " != " << GCDAName
            << ".\n";
@@ -256,7 +295,8 @@ bool GCOVFunction::readGCDA(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
   }
 
   uint32_t Count;
-  if (!Buff.readInt(Count)) return false;
+  if (!Buff.readInt(Count))
+    return false;
   Count /= 2;
 
   // This for loop adds the counts for each block. A second nested loop is
@@ -271,13 +311,14 @@ bool GCOVFunction::readGCDA(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
       errs() << "(" << Name << ") has arcs from exit block.\n";
     GCOVBlock &Block = *Blocks[BlockNo];
     for (size_t EdgeNo = 0, End = Block.getNumDstEdges(); EdgeNo < End;
-           ++EdgeNo) {
+         ++EdgeNo) {
       if (Count == 0) {
         errs() << "Unexpected number of edges (in " << Name << ").\n";
         return false;
       }
       uint64_t ArcCount;
-      if (!Buff.readInt64(ArcCount)) return false;
+      if (!Buff.readInt64(ArcCount))
+        return false;
       Block.addCount(EdgeNo, ArcCount);
       --Count;
     }
@@ -351,9 +392,8 @@ void GCOVBlock::sortDstEdges() {
 /// collectLineCounts - Collect line counts. This must be used after
 /// reading .gcno and .gcda files.
 void GCOVBlock::collectLineCounts(FileInfo &FI) {
-  for (SmallVectorImpl<uint32_t>::iterator I = Lines.begin(),
-         E = Lines.end(); I != E; ++I)
-    FI.addBlockLine(Parent.getFilename(), *I, this);
+  for (uint32_t N : Lines)
+    FI.addBlockLine(Parent.getFilename(), N, this);
 }
 
 /// dump - Dump GCOVBlock content to dbgs() for debugging purposes.
@@ -361,25 +401,20 @@ void GCOVBlock::dump() const {
   dbgs() << "Block : " << Number << " Counter : " << Counter << "\n";
   if (!SrcEdges.empty()) {
     dbgs() << "\tSource Edges : ";
-    for (EdgeIterator I = SrcEdges.begin(), E = SrcEdges.end(); I != E; ++I) {
-      const GCOVEdge *Edge = *I;
+    for (const GCOVEdge *Edge : SrcEdges)
       dbgs() << Edge->Src.Number << " (" << Edge->Count << "), ";
-    }
     dbgs() << "\n";
   }
   if (!DstEdges.empty()) {
     dbgs() << "\tDestination Edges : ";
-    for (EdgeIterator I = DstEdges.begin(), E = DstEdges.end(); I != E; ++I) {
-      const GCOVEdge *Edge = *I;
+    for (const GCOVEdge *Edge : DstEdges)
       dbgs() << Edge->Dst.Number << " (" << Edge->Count << "), ";
-    }
     dbgs() << "\n";
   }
   if (!Lines.empty()) {
     dbgs() << "\tLines : ";
-    for (SmallVectorImpl<uint32_t>::const_iterator I = Lines.begin(),
-           E = Lines.end(); I != E; ++I)
-      dbgs() << (*I) << ",";
+    for (uint32_t N : Lines)
+      dbgs() << (N) << ",";
     dbgs() << "\n";
   }
 }
@@ -391,7 +426,7 @@ void GCOVBlock::dump() const {
 static uint32_t safeDiv(uint64_t Numerator, uint64_t Divisor) {
   if (!Numerator)
     return 0;
-  return Numerator/Divisor;
+  return Numerator / Divisor;
 }
 
 // This custom division function mimics gcov's branch ouputs:
@@ -403,7 +438,7 @@ static uint32_t branchDiv(uint64_t Numerator, uint64_t Divisor) {
   if (Numerator == Divisor)
     return 100;
 
-  uint8_t Res = (Numerator*100+Divisor/2) / Divisor;
+  uint8_t Res = (Numerator * 100 + Divisor / 2) / Divisor;
   if (Res == 0)
     return 1;
   if (Res == 100)
@@ -411,10 +446,10 @@ static uint32_t branchDiv(uint64_t Numerator, uint64_t Divisor) {
   return Res;
 }
 
+namespace {
 struct formatBranchInfo {
-  formatBranchInfo(const GCOVOptions &Options, uint64_t Count,
-                   uint64_t Total) :
-    Options(Options), Count(Count), Total(Total) {}
+  formatBranchInfo(const GCOVOptions &Options, uint64_t Count, uint64_t Total)
+      : Options(Options), Count(Count), Total(Total) {}
 
   void print(raw_ostream &OS) const {
     if (!Total)
@@ -435,10 +470,10 @@ static raw_ostream &operator<<(raw_ostream &OS, const formatBranchInfo &FBI) {
   return OS;
 }
 
-namespace {
 class LineConsumer {
   std::unique_ptr<MemoryBuffer> Buffer;
   StringRef Remaining;
+
 public:
   LineConsumer(StringRef Filename) {
     ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
@@ -510,8 +545,7 @@ std::string FileInfo::getCoveragePath(StringRef Filename,
   if (Options.LongFileNames && !Filename.equals(MainFilename))
     CoveragePath =
         mangleCoveragePath(MainFilename, Options.PreservePaths) + "##";
-  CoveragePath +=
-      mangleCoveragePath(Filename, Options.PreservePaths) + ".gcov";
+  CoveragePath += mangleCoveragePath(Filename, Options.PreservePaths) + ".gcov";
   return CoveragePath;
 }
 
@@ -521,7 +555,7 @@ FileInfo::openCoveragePath(StringRef CoveragePath) {
     return llvm::make_unique<raw_null_ostream>();
 
   std::error_code EC;
-  auto OS = llvm::make_unique<raw_fd_ostream>(CoveragePath.str(), EC,
+  auto OS = llvm::make_unique<raw_fd_ostream>(CoveragePath, EC,
                                               sys::fs::F_Text);
   if (EC) {
     errs() << EC.message() << "\n";
@@ -531,47 +565,44 @@ FileInfo::openCoveragePath(StringRef CoveragePath) {
 }
 
 /// print -  Print source files with collected line count information.
-void FileInfo::print(StringRef MainFilename, StringRef GCNOFile,
-                     StringRef GCDAFile) {
-  for (StringMap<LineData>::const_iterator I = LineInfo.begin(),
-         E = LineInfo.end(); I != E; ++I) {
-    StringRef Filename = I->first();
+void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename,
+                     StringRef GCNOFile, StringRef GCDAFile) {
+  for (const auto &LI : LineInfo) {
+    StringRef Filename = LI.first();
     auto AllLines = LineConsumer(Filename);
 
     std::string CoveragePath = getCoveragePath(Filename, MainFilename);
-    std::unique_ptr<raw_ostream> S = openCoveragePath(CoveragePath);
-    raw_ostream &OS = *S;
+    std::unique_ptr<raw_ostream> CovStream = openCoveragePath(CoveragePath);
+    raw_ostream &CovOS = *CovStream;
 
-    OS << "        -:    0:Source:" << Filename << "\n";
-    OS << "        -:    0:Graph:" << GCNOFile << "\n";
-    OS << "        -:    0:Data:" << GCDAFile << "\n";
-    OS << "        -:    0:Runs:" << RunCount << "\n";
-    OS << "        -:    0:Programs:" << ProgramCount << "\n";
+    CovOS << "        -:    0:Source:" << Filename << "\n";
+    CovOS << "        -:    0:Graph:" << GCNOFile << "\n";
+    CovOS << "        -:    0:Data:" << GCDAFile << "\n";
+    CovOS << "        -:    0:Runs:" << RunCount << "\n";
+    CovOS << "        -:    0:Programs:" << ProgramCount << "\n";
 
-    const LineData &Line = I->second;
+    const LineData &Line = LI.second;
     GCOVCoverage FileCoverage(Filename);
-    for (uint32_t LineIndex = 0;
-         LineIndex < Line.LastLine || !AllLines.empty(); ++LineIndex) {
+    for (uint32_t LineIndex = 0; LineIndex < Line.LastLine || !AllLines.empty();
+         ++LineIndex) {
       if (Options.BranchInfo) {
         FunctionLines::const_iterator FuncsIt = Line.Functions.find(LineIndex);
         if (FuncsIt != Line.Functions.end())
-          printFunctionSummary(OS, FuncsIt->second);
+          printFunctionSummary(CovOS, FuncsIt->second);
       }
 
       BlockLines::const_iterator BlocksIt = Line.Blocks.find(LineIndex);
       if (BlocksIt == Line.Blocks.end()) {
         // No basic blocks are on this line. Not an executable line of code.
-        OS << "        -:";
-        AllLines.printNext(OS, LineIndex + 1);
+        CovOS << "        -:";
+        AllLines.printNext(CovOS, LineIndex + 1);
       } else {
         const BlockVector &Blocks = BlocksIt->second;
 
         // Add up the block counts to form line counts.
         DenseMap<const GCOVFunction *, bool> LineExecs;
         uint64_t LineCount = 0;
-        for (BlockVector::const_iterator I = Blocks.begin(), E = Blocks.end();
-               I != E; ++I) {
-          const GCOVBlock *Block = *I;
+        for (const GCOVBlock *Block : Blocks) {
           if (Options.AllBlocks) {
             // Only take the highest block count for that line.
             uint64_t BlockCount = Block->getCount();
@@ -595,8 +626,8 @@ void FileInfo::print(StringRef MainFilename, StringRef GCNOFile,
             //    one of the blocks are executed.
             const GCOVFunction *Function = &Block->getParent();
             if (FuncCoverages.find(Function) == FuncCoverages.end()) {
-              std::pair<const GCOVFunction *, GCOVCoverage>
-                KeyValue(Function, GCOVCoverage(Function->getName()));
+              std::pair<const GCOVFunction *, GCOVCoverage> KeyValue(
+                  Function, GCOVCoverage(Function->getName()));
               FuncCoverages.insert(KeyValue);
             }
             GCOVCoverage &FuncCoverage = FuncCoverages.find(Function)->second;
@@ -617,32 +648,30 @@ void FileInfo::print(StringRef MainFilename, StringRef GCNOFile,
         }
 
         if (LineCount == 0)
-          OS << "    #####:";
+          CovOS << "    #####:";
         else {
-          OS << format("%9" PRIu64 ":", LineCount);
+          CovOS << format("%9" PRIu64 ":", LineCount);
           ++FileCoverage.LinesExec;
         }
         ++FileCoverage.LogicalLines;
 
-        AllLines.printNext(OS, LineIndex + 1);
+        AllLines.printNext(CovOS, LineIndex + 1);
 
         uint32_t BlockNo = 0;
         uint32_t EdgeNo = 0;
-        for (BlockVector::const_iterator I = Blocks.begin(), E = Blocks.end();
-               I != E; ++I) {
-          const GCOVBlock *Block = *I;
-
+        for (const GCOVBlock *Block : Blocks) {
           // Only print block and branch information at the end of the block.
-          if (Block->getLastLine() != LineIndex+1)
+          if (Block->getLastLine() != LineIndex + 1)
             continue;
           if (Options.AllBlocks)
-            printBlockInfo(OS, *Block, LineIndex, BlockNo);
+            printBlockInfo(CovOS, *Block, LineIndex, BlockNo);
           if (Options.BranchInfo) {
             size_t NumEdges = Block->getNumDstEdges();
             if (NumEdges > 1)
-              printBranchInfo(OS, *Block, FileCoverage, EdgeNo);
+              printBranchInfo(CovOS, *Block, FileCoverage, EdgeNo);
             else if (Options.UncondBranch && NumEdges == 1)
-              printUncondBranchInfo(OS, EdgeNo, (*Block->dst_begin())->Count);
+              printUncondBranchInfo(CovOS, EdgeNo,
+                                    (*Block->dst_begin())->Count);
           }
         }
       }
@@ -652,30 +681,25 @@ void FileInfo::print(StringRef MainFilename, StringRef GCNOFile,
 
   // FIXME: There is no way to detect calls given current instrumentation.
   if (Options.FuncCoverage)
-    printFuncCoverage();
-  printFileCoverage();
+    printFuncCoverage(InfoOS);
+  printFileCoverage(InfoOS);
   return;
 }
 
 /// printFunctionSummary - Print function and block summary.
 void FileInfo::printFunctionSummary(raw_ostream &OS,
                                     const FunctionVector &Funcs) const {
-  for (FunctionVector::const_iterator I = Funcs.begin(), E = Funcs.end();
-         I != E; ++I) {
-    const GCOVFunction *Func = *I;
+  for (const GCOVFunction *Func : Funcs) {
     uint64_t EntryCount = Func->getEntryCount();
     uint32_t BlocksExec = 0;
-    for (GCOVFunction::BlockIterator I = Func->block_begin(),
-           E = Func->block_end(); I != E; ++I) {
-      const GCOVBlock &Block = **I;
+    for (const GCOVBlock &Block : Func->blocks())
       if (Block.getNumDstEdges() && Block.getCount())
-          ++BlocksExec;
-    }
+        ++BlocksExec;
 
     OS << "function " << Func->getName() << " called " << EntryCount
-       << " returned " << safeDiv(Func->getExitCount()*100, EntryCount)
+       << " returned " << safeDiv(Func->getExitCount() * 100, EntryCount)
        << "% blocks executed "
-       << safeDiv(BlocksExec*100, Func->getNumBlocks()-1) << "%\n";
+       << safeDiv(BlocksExec * 100, Func->getNumBlocks() - 1) << "%\n";
   }
 }
 
@@ -686,7 +710,7 @@ void FileInfo::printBlockInfo(raw_ostream &OS, const GCOVBlock &Block,
     OS << "    $$$$$:";
   else
     OS << format("%9" PRIu64 ":", Block.getCount());
-  OS << format("%5u-block %2u\n", LineIndex+1, BlockNo++);
+  OS << format("%5u-block %2u\n", LineIndex + 1, BlockNo++);
 }
 
 /// printBranchInfo - Print conditional branch probabilities.
@@ -694,29 +718,29 @@ void FileInfo::printBranchInfo(raw_ostream &OS, const GCOVBlock &Block,
                                GCOVCoverage &Coverage, uint32_t &EdgeNo) {
   SmallVector<uint64_t, 16> BranchCounts;
   uint64_t TotalCounts = 0;
-  for (GCOVBlock::EdgeIterator I = Block.dst_begin(), E = Block.dst_end();
-         I != E; ++I) {
-    const GCOVEdge *Edge = *I;
+  for (const GCOVEdge *Edge : Block.dsts()) {
     BranchCounts.push_back(Edge->Count);
     TotalCounts += Edge->Count;
-    if (Block.getCount()) ++Coverage.BranchesExec;
-    if (Edge->Count) ++Coverage.BranchesTaken;
+    if (Block.getCount())
+      ++Coverage.BranchesExec;
+    if (Edge->Count)
+      ++Coverage.BranchesTaken;
     ++Coverage.Branches;
 
     if (Options.FuncCoverage) {
       const GCOVFunction *Function = &Block.getParent();
       GCOVCoverage &FuncCoverage = FuncCoverages.find(Function)->second;
-      if (Block.getCount()) ++FuncCoverage.BranchesExec;
-      if (Edge->Count) ++FuncCoverage.BranchesTaken;
+      if (Block.getCount())
+        ++FuncCoverage.BranchesExec;
+      if (Edge->Count)
+        ++FuncCoverage.BranchesTaken;
       ++FuncCoverage.Branches;
     }
   }
 
-  for (SmallVectorImpl<uint64_t>::const_iterator I = BranchCounts.begin(),
-         E = BranchCounts.end(); I != E; ++I) {
+  for (uint64_t N : BranchCounts)
     OS << format("branch %2u ", EdgeNo++)
-       << formatBranchInfo(Options, *I, TotalCounts) << "\n";
-  }
+       << formatBranchInfo(Options, N, TotalCounts) << "\n";
 }
 
 /// printUncondBranchInfo - Print unconditional branch probabilities.
@@ -728,46 +752,45 @@ void FileInfo::printUncondBranchInfo(raw_ostream &OS, uint32_t &EdgeNo,
 
 // printCoverage - Print generic coverage info used by both printFuncCoverage
 // and printFileCoverage.
-void FileInfo::printCoverage(const GCOVCoverage &Coverage) const {
-  outs() << format("Lines executed:%.2f%% of %u\n",
-                   double(Coverage.LinesExec)*100/Coverage.LogicalLines,
-                   Coverage.LogicalLines);
+void FileInfo::printCoverage(raw_ostream &OS,
+                             const GCOVCoverage &Coverage) const {
+  OS << format("Lines executed:%.2f%% of %u\n",
+               double(Coverage.LinesExec) * 100 / Coverage.LogicalLines,
+               Coverage.LogicalLines);
   if (Options.BranchInfo) {
     if (Coverage.Branches) {
-      outs() << format("Branches executed:%.2f%% of %u\n",
-                       double(Coverage.BranchesExec)*100/Coverage.Branches,
-                       Coverage.Branches);
-      outs() << format("Taken at least once:%.2f%% of %u\n",
-                       double(Coverage.BranchesTaken)*100/Coverage.Branches,
-                       Coverage.Branches);
+      OS << format("Branches executed:%.2f%% of %u\n",
+                   double(Coverage.BranchesExec) * 100 / Coverage.Branches,
+                   Coverage.Branches);
+      OS << format("Taken at least once:%.2f%% of %u\n",
+                   double(Coverage.BranchesTaken) * 100 / Coverage.Branches,
+                   Coverage.Branches);
     } else {
-      outs() << "No branches\n";
+      OS << "No branches\n";
     }
-    outs() << "No calls\n"; // to be consistent with gcov
+    OS << "No calls\n"; // to be consistent with gcov
   }
 }
 
 // printFuncCoverage - Print per-function coverage info.
-void FileInfo::printFuncCoverage() const {
-  for (FuncCoverageMap::const_iterator I = FuncCoverages.begin(),
-                                       E = FuncCoverages.end(); I != E; ++I) {
-    const GCOVCoverage &Coverage = I->second;
-    outs() << "Function '" << Coverage.Name << "'\n";
-    printCoverage(Coverage);
-    outs() << "\n";
+void FileInfo::printFuncCoverage(raw_ostream &OS) const {
+  for (const auto &FC : FuncCoverages) {
+    const GCOVCoverage &Coverage = FC.second;
+    OS << "Function '" << Coverage.Name << "'\n";
+    printCoverage(OS, Coverage);
+    OS << "\n";
   }
 }
 
 // printFileCoverage - Print per-file coverage info.
-void FileInfo::printFileCoverage() const {
-  for (FileCoverageList::const_iterator I = FileCoverages.begin(),
-                                        E = FileCoverages.end(); I != E; ++I) {
-    const std::string &Filename = I->first;
-    const GCOVCoverage &Coverage = I->second;
-    outs() << "File '" << Coverage.Name << "'\n";
-    printCoverage(Coverage);
+void FileInfo::printFileCoverage(raw_ostream &OS) const {
+  for (const auto &FC : FileCoverages) {
+    const std::string &Filename = FC.first;
+    const GCOVCoverage &Coverage = FC.second;
+    OS << "File '" << Coverage.Name << "'\n";
+    printCoverage(OS, Coverage);
     if (!Options.NoOutput)
-      outs() << Coverage.Name << ":creating '" << Filename << "'\n";
-    outs() << "\n";
+      OS << Coverage.Name << ":creating '" << Filename << "'\n";
+    OS << "\n";
   }
 }
diff --git a/contrib/llvm/lib/IR/Globals.cpp b/contrib/llvm/lib/IR/Globals.cpp
index 54197d9..1028e33 100644
--- a/contrib/llvm/lib/IR/Globals.cpp
+++ b/contrib/llvm/lib/IR/Globals.cpp
@@ -38,12 +38,8 @@ bool GlobalValue::isDematerializable() const {
 std::error_code GlobalValue::materialize() {
   return getParent()->materialize(this);
 }
-void GlobalValue::Dematerialize() {
-  getParent()->Dematerialize(this);
-}
-
-const DataLayout *GlobalValue::getDataLayout() const {
-  return getParent()->getDataLayout();
+void GlobalValue::dematerialize() {
+  getParent()->dematerialize(this);
 }
 
 /// Override destroyConstant to make sure it doesn't get called on
@@ -245,40 +241,35 @@ void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
 // GlobalAlias Implementation
 //===----------------------------------------------------------------------===//
 
-GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link,
-                         const Twine &Name, Constant *Aliasee,
-                         Module *ParentModule)
-    : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalAliasVal,
-                  &Op<0>(), 1, Link, Name) {
+GlobalAlias::GlobalAlias(PointerType *Ty, LinkageTypes Link, const Twine &Name,
+                         Constant *Aliasee, Module *ParentModule)
+    : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) {
   Op<0>() = Aliasee;
 
   if (ParentModule)
     ParentModule->getAliasList().push_back(this);
 }
 
-GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
-                                 LinkageTypes Link, const Twine &Name,
-                                 Constant *Aliasee, Module *ParentModule) {
-  return new GlobalAlias(Ty, AddressSpace, Link, Name, Aliasee, ParentModule);
+GlobalAlias *GlobalAlias::create(PointerType *Ty, LinkageTypes Link,
+                                 const Twine &Name, Constant *Aliasee,
+                                 Module *ParentModule) {
+  return new GlobalAlias(Ty, Link, Name, Aliasee, ParentModule);
 }
 
-GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
-                                 LinkageTypes Linkage, const Twine &Name,
-                                 Module *Parent) {
-  return create(Ty, AddressSpace, Linkage, Name, nullptr, Parent);
+GlobalAlias *GlobalAlias::create(PointerType *Ty, LinkageTypes Linkage,
+                                 const Twine &Name, Module *Parent) {
+  return create(Ty, Linkage, Name, nullptr, Parent);
 }
 
-GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
-                                 LinkageTypes Linkage, const Twine &Name,
-                                 GlobalValue *Aliasee) {
-  return create(Ty, AddressSpace, Linkage, Name, Aliasee, Aliasee->getParent());
+GlobalAlias *GlobalAlias::create(PointerType *Ty, LinkageTypes Linkage,
+                                 const Twine &Name, GlobalValue *Aliasee) {
+  return create(Ty, Linkage, Name, Aliasee, Aliasee->getParent());
 }
 
 GlobalAlias *GlobalAlias::create(LinkageTypes Link, const Twine &Name,
                                  GlobalValue *Aliasee) {
   PointerType *PTy = Aliasee->getType();
-  return create(PTy->getElementType(), PTy->getAddressSpace(), Link, Name,
-                Aliasee);
+  return create(PTy, Link, Name, Aliasee);
 }
 
 GlobalAlias *GlobalAlias::create(const Twine &Name, GlobalValue *Aliasee) {
diff --git a/contrib/llvm/lib/IR/IRBuilder.cpp b/contrib/llvm/lib/IR/IRBuilder.cpp
index ef1f226..335cf36 100644
--- a/contrib/llvm/lib/IR/IRBuilder.cpp
+++ b/contrib/llvm/lib/IR/IRBuilder.cpp
@@ -17,13 +17,15 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Statepoint.h"
 using namespace llvm;
 
 /// CreateGlobalString - Make a new global variable with an initializer that
 /// has array of i8 type filled in with the nul terminated string value
 /// specified.  If Name is specified, it is the name of the global variable
 /// created.
-Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) {
+GlobalVariable *IRBuilderBase::CreateGlobalString(StringRef Str,
+                                                  const Twine &Name) {
   Constant *StrConstant = ConstantDataArray::getString(Context, Str);
   Module &M = *BB->getParent()->getParent();
   GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(),
@@ -61,6 +63,19 @@ static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops,
   return CI;  
 }
 
+static InvokeInst *createInvokeHelper(Value *Invokee, BasicBlock *NormalDest,
+                                      BasicBlock *UnwindDest,
+                                      ArrayRef<Value *> Ops,
+                                      IRBuilderBase *Builder,
+                                      const Twine &Name = "") {
+  InvokeInst *II =
+      InvokeInst::Create(Invokee, NormalDest, UnwindDest, Ops, Name);
+  Builder->GetInsertBlock()->getInstList().insert(Builder->GetInsertPoint(),
+                                                  II);
+  Builder->SetInstDebugLocation(II);
+  return II;
+}
+
 CallInst *IRBuilderBase::
 CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
              bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag,
@@ -220,59 +235,104 @@ CallInst *IRBuilderBase::CreateMaskedStore(Value *Val, Value *Ptr,
 
 /// Create a call to a Masked intrinsic, with given intrinsic Id,
 /// an array of operands - Ops, and one overloaded type - DataTy
-CallInst *IRBuilderBase::CreateMaskedIntrinsic(unsigned Id,
+CallInst *IRBuilderBase::CreateMaskedIntrinsic(Intrinsic::ID Id,
                                                ArrayRef<Value *> Ops,
                                                Type *DataTy,
                                                const Twine &Name) {
   Module *M = BB->getParent()->getParent();
   Type *OverloadedTypes[] = { DataTy };
-  Value *TheFn = Intrinsic::getDeclaration(M, (Intrinsic::ID)Id, OverloadedTypes);
+  Value *TheFn = Intrinsic::getDeclaration(M, Id, OverloadedTypes);
   return createCallHelper(TheFn, Ops, this, Name);
 }
 
-CallInst *IRBuilderBase::CreateGCStatepoint(Value *ActualCallee,
-                                            ArrayRef<Value*> CallArgs,
-                                            ArrayRef<Value*> DeoptArgs,
-                                            ArrayRef<Value*> GCArgs,
-                                            const Twine& Name) {
- // Extract out the type of the callee.
- PointerType *FuncPtrType = cast<PointerType>(ActualCallee->getType());
- assert(isa<FunctionType>(FuncPtrType->getElementType()) &&
-        "actual callee must be a callable value");
+static std::vector<Value *>
+getStatepointArgs(IRBuilderBase &B, uint64_t ID, uint32_t NumPatchBytes,
+                  Value *ActualCallee, ArrayRef<Value *> CallArgs,
+                  ArrayRef<Value *> DeoptArgs, ArrayRef<Value *> GCArgs) {
+  std::vector<Value *> Args;
+  Args.push_back(B.getInt64(ID));
+  Args.push_back(B.getInt32(NumPatchBytes));
+  Args.push_back(ActualCallee);
+  Args.push_back(B.getInt32(CallArgs.size()));
+  Args.push_back(B.getInt32((unsigned)StatepointFlags::None));
+  Args.insert(Args.end(), CallArgs.begin(), CallArgs.end());
+  Args.push_back(B.getInt32(0 /* no transition args */));
+  Args.push_back(B.getInt32(DeoptArgs.size()));
+  Args.insert(Args.end(), DeoptArgs.begin(), DeoptArgs.end());
+  Args.insert(Args.end(), GCArgs.begin(), GCArgs.end());
 
- 
- Module *M = BB->getParent()->getParent();
- // Fill in the one generic type'd argument (the function is also vararg)
- Type *ArgTypes[] = { FuncPtrType };
- Function *FnStatepoint =
-   Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_statepoint,
-                             ArgTypes);
-
- std::vector<llvm::Value *> args;
- args.push_back(ActualCallee);
- args.push_back(getInt32(CallArgs.size()));
- args.push_back(getInt32(0 /*unused*/));
- args.insert(args.end(), CallArgs.begin(), CallArgs.end());
- args.push_back(getInt32(DeoptArgs.size()));
- args.insert(args.end(), DeoptArgs.begin(), DeoptArgs.end());
- args.insert(args.end(), GCArgs.begin(), GCArgs.end());
-
- return createCallHelper(FnStatepoint, args, this, Name);
+  return Args;
+}
+
+CallInst *IRBuilderBase::CreateGCStatepointCall(
+    uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee,
+    ArrayRef<Value *> CallArgs, ArrayRef<Value *> DeoptArgs,
+    ArrayRef<Value *> GCArgs, const Twine &Name) {
+  // Extract out the type of the callee.
+  PointerType *FuncPtrType = cast<PointerType>(ActualCallee->getType());
+  assert(isa<FunctionType>(FuncPtrType->getElementType()) &&
+         "actual callee must be a callable value");
+
+  Module *M = BB->getParent()->getParent();
+  // Fill in the one generic type'd argument (the function is also vararg)
+  Type *ArgTypes[] = { FuncPtrType };
+  Function *FnStatepoint =
+    Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_statepoint,
+                              ArgTypes);
+
+  std::vector<llvm::Value *> Args = getStatepointArgs(
+      *this, ID, NumPatchBytes, ActualCallee, CallArgs, DeoptArgs, GCArgs);
+  return createCallHelper(FnStatepoint, Args, this, Name);
+}
+
+CallInst *IRBuilderBase::CreateGCStatepointCall(
+    uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee,
+    ArrayRef<Use> CallArgs, ArrayRef<Value *> DeoptArgs,
+    ArrayRef<Value *> GCArgs, const Twine &Name) {
+  std::vector<Value *> VCallArgs;
+  for (auto &U : CallArgs)
+    VCallArgs.push_back(U.get());
+  return CreateGCStatepointCall(ID, NumPatchBytes, ActualCallee, VCallArgs,
+                                DeoptArgs, GCArgs, Name);
+}
+
+InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
+    uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
+    BasicBlock *NormalDest, BasicBlock *UnwindDest,
+    ArrayRef<Value *> InvokeArgs, ArrayRef<Value *> DeoptArgs,
+    ArrayRef<Value *> GCArgs, const Twine &Name) {
+  // Extract out the type of the callee.
+  PointerType *FuncPtrType = cast<PointerType>(ActualInvokee->getType());
+  assert(isa<FunctionType>(FuncPtrType->getElementType()) &&
+         "actual callee must be a callable value");
+
+  Module *M = BB->getParent()->getParent();
+  // Fill in the one generic type'd argument (the function is also vararg)
+  Function *FnStatepoint = Intrinsic::getDeclaration(
+      M, Intrinsic::experimental_gc_statepoint, {FuncPtrType});
+
+  std::vector<llvm::Value *> Args = getStatepointArgs(
+      *this, ID, NumPatchBytes, ActualInvokee, InvokeArgs, DeoptArgs, GCArgs);
+  return createInvokeHelper(FnStatepoint, NormalDest, UnwindDest, Args, this,
+                            Name);
+}
+
+InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
+    uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
+    BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef<Use> InvokeArgs,
+    ArrayRef<Value *> DeoptArgs, ArrayRef<Value *> GCArgs, const Twine &Name) {
+  std::vector<Value *> VCallArgs;
+  for (auto &U : InvokeArgs)
+    VCallArgs.push_back(U.get());
+  return CreateGCStatepointInvoke(ID, NumPatchBytes, ActualInvokee, NormalDest,
+                                  UnwindDest, VCallArgs, DeoptArgs, GCArgs,
+                                  Name);
 }
 
 CallInst *IRBuilderBase::CreateGCResult(Instruction *Statepoint,
                                        Type *ResultType,
                                        const Twine &Name) {
- Intrinsic::ID ID;
- if (ResultType->isIntegerTy()) {
-   ID = Intrinsic::experimental_gc_result_int;
- } else if (ResultType->isFloatingPointTy()) {
-   ID = Intrinsic::experimental_gc_result_float;
- } else if (ResultType->isPointerTy()) {
-   ID = Intrinsic::experimental_gc_result_ptr;
- } else {
-   llvm_unreachable("unimplemented result type for gc.result");
- }
+ Intrinsic::ID ID = Intrinsic::experimental_gc_result;
  Module *M = BB->getParent()->getParent();
  Type *Types[] = {ResultType};
  Value *FnGCResult = Intrinsic::getDeclaration(M, ID, Types);
diff --git a/contrib/llvm/lib/IR/IRPrintingPasses.cpp b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
index 91ccfbb..c1ac336 100644
--- a/contrib/llvm/lib/IR/IRPrintingPasses.cpp
+++ b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
@@ -21,11 +21,14 @@
 using namespace llvm;
 
 PrintModulePass::PrintModulePass() : OS(dbgs()) {}
-PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner)
-    : OS(OS), Banner(Banner) {}
+PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner,
+                                 bool ShouldPreserveUseListOrder)
+    : OS(OS), Banner(Banner),
+      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
 
 PreservedAnalyses PrintModulePass::run(Module &M) {
-  OS << Banner << M;
+  OS << Banner;
+  M.print(OS, nullptr, ShouldPreserveUseListOrder);
   return PreservedAnalyses::all();
 }
 
@@ -46,8 +49,9 @@ class PrintModulePassWrapper : public ModulePass {
 public:
   static char ID;
   PrintModulePassWrapper() : ModulePass(ID) {}
-  PrintModulePassWrapper(raw_ostream &OS, const std::string &Banner)
-      : ModulePass(ID), P(OS, Banner) {}
+  PrintModulePassWrapper(raw_ostream &OS, const std::string &Banner,
+                         bool ShouldPreserveUseListOrder)
+      : ModulePass(ID), P(OS, Banner, ShouldPreserveUseListOrder) {}
 
   bool runOnModule(Module &M) override {
     P.run(M);
@@ -112,8 +116,9 @@ INITIALIZE_PASS(PrintBasicBlockPass, "print-bb", "Print BB to stderr", false,
                 false)
 
 ModulePass *llvm::createPrintModulePass(llvm::raw_ostream &OS,
-                                        const std::string &Banner) {
-  return new PrintModulePassWrapper(OS, Banner);
+                                        const std::string &Banner,
+                                        bool ShouldPreserveUseListOrder) {
+  return new PrintModulePassWrapper(OS, Banner, ShouldPreserveUseListOrder);
 }
 
 FunctionPass *llvm::createPrintFunctionPass(llvm::raw_ostream &OS,
diff --git a/contrib/llvm/lib/IR/InlineAsm.cpp b/contrib/llvm/lib/IR/InlineAsm.cpp
index 16d874f3..aa9e027 100644
--- a/contrib/llvm/lib/IR/InlineAsm.cpp
+++ b/contrib/llvm/lib/IR/InlineAsm.cpp
@@ -73,9 +73,9 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
   unsigned multipleAlternativeCount = Str.count('|') + 1;
   unsigned multipleAlternativeIndex = 0;
   ConstraintCodeVector *pCodes = &Codes;
-  
+
   // Initialize
-  isMultipleAlternative = (multipleAlternativeCount > 1 ? true : false);
+  isMultipleAlternative = multipleAlternativeCount > 1;
   if (isMultipleAlternative) {
     multipleAlternatives.resize(multipleAlternativeCount);
     pCodes = &multipleAlternatives[0].Codes;
@@ -99,12 +99,12 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
     ++I;
     Type = isOutput;
   }
-  
+
   if (*I == '*') {
     isIndirect = true;
     ++I;
   }
-  
+
   if (I == E) return true;  // Just a prefix, like "==" or "~".
   
   // Parse the modifiers.
@@ -167,7 +167,9 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
         // Note that operand #n has a matching input.
         scInfo.MatchingInput = ConstraintsSoFar.size();
       } else {
-        if (ConstraintsSoFar[N].hasMatchingInput())
+        if (ConstraintsSoFar[N].hasMatchingInput() &&
+            (size_t)ConstraintsSoFar[N].MatchingInput !=
+                ConstraintsSoFar.size())
           return true;
         // Note that operand #n has a matching input.
         ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
@@ -228,7 +230,10 @@ InlineAsm::ParseConstraints(StringRef Constraints) {
     I = ConstraintEnd;
     if (I != E) {
       ++I;
-      if (I == E) { Result.clear(); break; }    // don't allow "xyz,"
+      if (I == E) {
+        Result.clear();
+        break;
+      } // don't allow "xyz,"
     }
   }
   
diff --git a/contrib/llvm/lib/IR/Instruction.cpp b/contrib/llvm/lib/IR/Instruction.cpp
index 92c6e9f..45bb296 100644
--- a/contrib/llvm/lib/IR/Instruction.cpp
+++ b/contrib/llvm/lib/IR/Instruction.cpp
@@ -32,10 +32,6 @@ Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
   }
 }
 
-const DataLayout *Instruction::getDataLayout() const {
-  return getParent()->getDataLayout();
-}
-
 Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
                          BasicBlock *InsertAtEnd)
   : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(nullptr) {
@@ -58,12 +54,21 @@ void Instruction::setParent(BasicBlock *P) {
   Parent = P;
 }
 
+const Module *Instruction::getModule() const {
+  return getParent()->getModule();
+}
+
+Module *Instruction::getModule() {
+  return getParent()->getModule();
+}
+
+
 void Instruction::removeFromParent() {
   getParent()->getInstList().remove(this);
 }
 
-void Instruction::eraseFromParent() {
-  getParent()->getInstList().erase(this);
+iplist<Instruction>::iterator Instruction::eraseFromParent() {
+  return getParent()->getInstList().erase(this);
 }
 
 /// insertBefore - Insert an unlinked instructions into a basic block
diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp
index 132800e..1e9d9a5 100644
--- a/contrib/llvm/lib/IR/Instructions.cpp
+++ b/contrib/llvm/lib/IR/Instructions.cpp
@@ -227,7 +227,7 @@ void LandingPadInst::init(Value *PersFn, unsigned NumReservedValues,
   ReservedSpace = NumReservedValues;
   NumOperands = 1;
   OperandList = allocHungoffUses(ReservedSpace);
-  OperandList[0] = PersFn;
+  Op<0>() = PersFn;
   setName(NameStr);
   setCleanup(false);
 }
@@ -263,14 +263,13 @@ void LandingPadInst::addClause(Constant *Val) {
 CallInst::~CallInst() {
 }
 
-void CallInst::init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) {
+void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
+                    const Twine &NameStr) {
+  this->FTy = FTy;
   assert(NumOperands == Args.size() + 1 && "NumOperands not set up?");
   Op<-1>() = Func;
 
 #ifndef NDEBUG
-  FunctionType *FTy =
-    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-
   assert((Args.size() == FTy->getNumParams() ||
           (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
          "Calling a function with bad signature!");
@@ -286,15 +285,12 @@ void CallInst::init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) {
 }
 
 void CallInst::init(Value *Func, const Twine &NameStr) {
+  FTy =
+      cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
   assert(NumOperands == 1 && "NumOperands not set up?");
   Op<-1>() = Func;
 
-#ifndef NDEBUG
-  FunctionType *FTy =
-    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-
   assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
-#endif
 
   setName(NameStr);
 }
@@ -320,10 +316,10 @@ CallInst::CallInst(Value *Func, const Twine &Name,
 }
 
 CallInst::CallInst(const CallInst &CI)
-  : Instruction(CI.getType(), Instruction::Call,
-                OperandTraits<CallInst>::op_end(this) - CI.getNumOperands(),
-                CI.getNumOperands()) {
-  setAttributes(CI.getAttributes());
+    : Instruction(CI.getType(), Instruction::Call,
+                  OperandTraits<CallInst>::op_end(this) - CI.getNumOperands(),
+                  CI.getNumOperands()),
+      AttributeList(CI.AttributeList), FTy(CI.FTy) {
   setTailCallKind(CI.getTailCallKind());
   setCallingConv(CI.getCallingConv());
     
@@ -346,6 +342,18 @@ void CallInst::removeAttribute(unsigned i, Attribute attr) {
   setAttributes(PAL);
 }
 
+void CallInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
+  setAttributes(PAL);
+}
+
+void CallInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
+  setAttributes(PAL);
+}
+
 bool CallInst::hasFnAttrImpl(Attribute::AttrKind A) const {
   if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
     return true;
@@ -529,17 +537,17 @@ Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
 //                        InvokeInst Implementation
 //===----------------------------------------------------------------------===//
 
-void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
-                      ArrayRef<Value *> Args, const Twine &NameStr) {
+void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
+                      BasicBlock *IfException, ArrayRef<Value *> Args,
+                      const Twine &NameStr) {
+  this->FTy = FTy;
+
   assert(NumOperands == 3 + Args.size() && "NumOperands not set up?");
   Op<-3>() = Fn;
   Op<-2>() = IfNormal;
   Op<-1>() = IfException;
 
 #ifndef NDEBUG
-  FunctionType *FTy =
-    cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
-
   assert(((Args.size() == FTy->getNumParams()) ||
           (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
          "Invoking a function with bad signature");
@@ -555,11 +563,11 @@ void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
 }
 
 InvokeInst::InvokeInst(const InvokeInst &II)
-  : TerminatorInst(II.getType(), Instruction::Invoke,
-                   OperandTraits<InvokeInst>::op_end(this)
-                   - II.getNumOperands(),
-                   II.getNumOperands()) {
-  setAttributes(II.getAttributes());
+    : TerminatorInst(II.getType(), Instruction::Invoke,
+                     OperandTraits<InvokeInst>::op_end(this) -
+                         II.getNumOperands(),
+                     II.getNumOperands()),
+      AttributeList(II.AttributeList), FTy(II.FTy) {
   setCallingConv(II.getCallingConv());
   std::copy(II.op_begin(), II.op_end(), op_begin());
   SubclassOptionalData = II.SubclassOptionalData;
@@ -605,6 +613,18 @@ void InvokeInst::removeAttribute(unsigned i, Attribute attr) {
   setAttributes(PAL);
 }
 
+void InvokeInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
+  setAttributes(PAL);
+}
+
+void InvokeInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
+  setAttributes(PAL);
+}
+
 LandingPadInst *InvokeInst::getLandingPadInst() const {
   return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI());
 }
@@ -829,46 +849,25 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) {
   return Amt;
 }
 
-AllocaInst::AllocaInst(Type *Ty, Value *ArraySize,
-                       const Twine &Name, Instruction *InsertBefore)
-  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
-                     getAISize(Ty->getContext(), ArraySize), InsertBefore) {
-  setAlignment(0);
-  assert(!Ty->isVoidTy() && "Cannot allocate void!");
-  setName(Name);
-}
+AllocaInst::AllocaInst(Type *Ty, const Twine &Name, Instruction *InsertBefore)
+    : AllocaInst(Ty, /*ArraySize=*/nullptr, Name, InsertBefore) {}
 
-AllocaInst::AllocaInst(Type *Ty, Value *ArraySize,
-                       const Twine &Name, BasicBlock *InsertAtEnd)
-  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
-                     getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
-  setAlignment(0);
-  assert(!Ty->isVoidTy() && "Cannot allocate void!");
-  setName(Name);
-}
+AllocaInst::AllocaInst(Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
+    : AllocaInst(Ty, /*ArraySize=*/nullptr, Name, InsertAtEnd) {}
 
-AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, const Twine &Name,
                        Instruction *InsertBefore)
-  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
-                     getAISize(Ty->getContext(), nullptr), InsertBefore) {
-  setAlignment(0);
-  assert(!Ty->isVoidTy() && "Cannot allocate void!");
-  setName(Name);
-}
+    : AllocaInst(Ty, ArraySize, /*Align=*/0, Name, InsertBefore) {}
 
-AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, const Twine &Name,
                        BasicBlock *InsertAtEnd)
-  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
-                     getAISize(Ty->getContext(), nullptr), InsertAtEnd) {
-  setAlignment(0);
-  assert(!Ty->isVoidTy() && "Cannot allocate void!");
-  setName(Name);
-}
+    : AllocaInst(Ty, ArraySize, /*Align=*/0, Name, InsertAtEnd) {}
 
 AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
                        const Twine &Name, Instruction *InsertBefore)
-  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
-                     getAISize(Ty->getContext(), ArraySize), InsertBefore) {
+    : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                       getAISize(Ty->getContext(), ArraySize), InsertBefore),
+      AllocatedType(Ty) {
   setAlignment(Align);
   assert(!Ty->isVoidTy() && "Cannot allocate void!");
   setName(Name);
@@ -876,8 +875,9 @@ AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
 
 AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
                        const Twine &Name, BasicBlock *InsertAtEnd)
-  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
-                     getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
+    : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                       getAISize(Ty->getContext(), ArraySize), InsertAtEnd),
+      AllocatedType(Ty) {
   setAlignment(Align);
   assert(!Ty->isVoidTy() && "Cannot allocate void!");
   setName(Name);
@@ -902,10 +902,6 @@ bool AllocaInst::isArrayAllocation() const {
   return true;
 }
 
-Type *AllocaInst::getAllocatedType() const {
-  return getType()->getElementType();
-}
-
 /// isStaticAlloca - Return true if this alloca is in the entry block of the
 /// function and is a constant size.  If so, the code generator will fold it
 /// into the prolog/epilog code, so it is basically free.
@@ -930,75 +926,34 @@ void LoadInst::AssertOK() {
 }
 
 LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertBef) {
-  setVolatile(false);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-  setName(Name);
-}
+    : LoadInst(Ptr, Name, /*isVolatile=*/false, InsertBef) {}
 
 LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertAE) {
-  setVolatile(false);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-  setName(Name);
-}
+    : LoadInst(Ptr, Name, /*isVolatile=*/false, InsertAE) {}
 
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
                    Instruction *InsertBef)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertBef) {
-  setVolatile(isVolatile);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-  setName(Name);
-}
+    : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/0, InsertBef) {}
 
 LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
                    BasicBlock *InsertAE)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertAE) {
-  setVolatile(isVolatile);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-  setName(Name);
-}
+    : LoadInst(Ptr, Name, isVolatile, /*Align=*/0, InsertAE) {}
 
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
                    unsigned Align, Instruction *InsertBef)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertBef) {
-  setVolatile(isVolatile);
-  setAlignment(Align);
-  setAtomic(NotAtomic);
-  AssertOK();
-  setName(Name);
-}
+    : LoadInst(Ty, Ptr, Name, isVolatile, Align, NotAtomic, CrossThread,
+               InsertBef) {}
 
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
                    unsigned Align, BasicBlock *InsertAE)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertAE) {
-  setVolatile(isVolatile);
-  setAlignment(Align);
-  setAtomic(NotAtomic);
-  AssertOK();
-  setName(Name);
+    : LoadInst(Ptr, Name, isVolatile, Align, NotAtomic, CrossThread, InsertAE) {
 }
 
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
                    unsigned Align, AtomicOrdering Order,
-                   SynchronizationScope SynchScope,
-                   Instruction *InsertBef)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertBef) {
+                   SynchronizationScope SynchScope, Instruction *InsertBef)
+    : UnaryInstruction(Ty, Load, Ptr, InsertBef) {
+  assert(Ty == cast<PointerType>(Ptr->getType())->getElementType());
   setVolatile(isVolatile);
   setAlignment(Align);
   setAtomic(Order, SynchScope);
@@ -1039,10 +994,10 @@ LoadInst::LoadInst(Value *Ptr, const char *Name, BasicBlock *InsertAE)
   if (Name && Name[0]) setName(Name);
 }
 
-LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const char *Name, bool isVolatile,
                    Instruction *InsertBef)
-: UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                   Load, Ptr, InsertBef) {
+    : UnaryInstruction(Ty, Load, Ptr, InsertBef) {
+  assert(Ty == cast<PointerType>(Ptr->getType())->getElementType());
   setVolatile(isVolatile);
   setAlignment(0);
   setAtomic(NotAtomic);
@@ -1085,60 +1040,29 @@ void StoreInst::AssertOK() {
          "Alignment required for atomic store");
 }
 
-
 StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertBefore) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(false);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-}
+    : StoreInst(val, addr, /*isVolatile=*/false, InsertBefore) {}
 
 StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertAtEnd) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(false);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-}
+    : StoreInst(val, addr, /*isVolatile=*/false, InsertAtEnd) {}
 
 StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
                      Instruction *InsertBefore)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertBefore) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(isVolatile);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-}
+    : StoreInst(val, addr, isVolatile, /*Align=*/0, InsertBefore) {}
 
 StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
-                     unsigned Align, Instruction *InsertBefore)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertBefore) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(isVolatile);
-  setAlignment(Align);
-  setAtomic(NotAtomic);
-  AssertOK();
-}
+                     BasicBlock *InsertAtEnd)
+    : StoreInst(val, addr, isVolatile, /*Align=*/0, InsertAtEnd) {}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align,
+                     Instruction *InsertBefore)
+    : StoreInst(val, addr, isVolatile, Align, NotAtomic, CrossThread,
+                InsertBefore) {}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align,
+                     BasicBlock *InsertAtEnd)
+    : StoreInst(val, addr, isVolatile, Align, NotAtomic, CrossThread,
+                InsertAtEnd) {}
 
 StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
                      unsigned Align, AtomicOrdering Order,
@@ -1157,34 +1081,6 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
 }
 
 StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
-                     BasicBlock *InsertAtEnd)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertAtEnd) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(isVolatile);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-}
-
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
-                     unsigned Align, BasicBlock *InsertAtEnd)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertAtEnd) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(isVolatile);
-  setAlignment(Align);
-  setAtomic(NotAtomic);
-  AssertOK();
-}
-
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
                      unsigned Align, AtomicOrdering Order,
                      SynchronizationScope SynchScope,
                      BasicBlock *InsertAtEnd)
@@ -1343,16 +1239,17 @@ FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
 void GetElementPtrInst::init(Value *Ptr, ArrayRef<Value *> IdxList,
                              const Twine &Name) {
   assert(NumOperands == 1 + IdxList.size() && "NumOperands not initialized?");
-  OperandList[0] = Ptr;
+  Op<0>() = Ptr;
   std::copy(IdxList.begin(), IdxList.end(), op_begin() + 1);
   setName(Name);
 }
 
 GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
-  : Instruction(GEPI.getType(), GetElementPtr,
-                OperandTraits<GetElementPtrInst>::op_end(this)
-                - GEPI.getNumOperands(),
-                GEPI.getNumOperands()) {
+    : Instruction(GEPI.getType(), GetElementPtr,
+                  OperandTraits<GetElementPtrInst>::op_end(this) -
+                      GEPI.getNumOperands(),
+                  GEPI.getNumOperands()),
+      SourceElementType(GEPI.SourceElementType) {
   std::copy(GEPI.op_begin(), GEPI.op_end(), op_begin());
   SubclassOptionalData = GEPI.SubclassOptionalData;
 }
@@ -1367,11 +1264,7 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
 /// pointer type.
 ///
 template <typename IndexTy>
-static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
-  PointerType *PTy = dyn_cast<PointerType>(Ptr->getScalarType());
-  if (!PTy) return nullptr;   // Type isn't a pointer type!
-  Type *Agg = PTy->getElementType();
-
+static Type *getIndexedTypeInternal(Type *Agg, ArrayRef<IndexTy> IdxList) {
   // Handle the special case of the empty set index set, which is always valid.
   if (IdxList.empty())
     return Agg;
@@ -1392,17 +1285,17 @@ static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
   return CurIdx == IdxList.size() ? Agg : nullptr;
 }
 
-Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<Value *> IdxList) {
-  return getIndexedTypeInternal(Ptr, IdxList);
+Type *GetElementPtrInst::getIndexedType(Type *Ty, ArrayRef<Value *> IdxList) {
+  return getIndexedTypeInternal(Ty, IdxList);
 }
 
-Type *GetElementPtrInst::getIndexedType(Type *Ptr,
+Type *GetElementPtrInst::getIndexedType(Type *Ty,
                                         ArrayRef<Constant *> IdxList) {
-  return getIndexedTypeInternal(Ptr, IdxList);
+  return getIndexedTypeInternal(Ty, IdxList);
 }
 
-Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) {
-  return getIndexedTypeInternal(Ptr, IdxList);
+Type *GetElementPtrInst::getIndexedType(Type *Ty, ArrayRef<uint64_t> IdxList) {
+  return getIndexedTypeInternal(Ty, IdxList);
 }
 
 /// hasAllZeroIndices - Return true if all of the indices of this GEP are
@@ -2157,21 +2050,15 @@ bool CastInst::isNoopCast(Type *IntPtrTy) const {
   return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
 }
 
-bool CastInst::isNoopCast(const DataLayout *DL) const {
-  if (!DL) {
-    // Assume maximum pointer size.
-    return isNoopCast(Type::getInt64Ty(getContext()));
-  }
-
+bool CastInst::isNoopCast(const DataLayout &DL) const {
   Type *PtrOpTy = nullptr;
   if (getOpcode() == Instruction::PtrToInt)
     PtrOpTy = getOperand(0)->getType();
   else if (getOpcode() == Instruction::IntToPtr)
     PtrOpTy = getType();
 
-  Type *IntPtrTy = PtrOpTy
-                 ? DL->getIntPtrType(PtrOpTy)
-                 : DL->getIntPtrType(getContext(), 0);
+  Type *IntPtrTy =
+      PtrOpTy ? DL.getIntPtrType(PtrOpTy) : DL.getIntPtrType(getContext(), 0);
 
   return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
 }
@@ -2227,7 +2114,7 @@ unsigned CastInst::isEliminableCastPair(
     // N  X  X  U  S  F  F  N  X  N  2  V  V   |
     // C  T  T  I  I  P  P  C  T  T  P  T  T  -+
     {  1, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // Trunc         -+
-    {  8, 1, 9,99,99, 2, 0,99,99,99, 2, 3, 0}, // ZExt           |
+    {  8, 1, 9,99,99, 2,17,99,99,99, 2, 3, 0}, // ZExt           |
     {  8, 0, 1,99,99, 0, 2,99,99,99, 0, 3, 0}, // SExt           |
     {  0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToUI         |
     {  0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToSI         |
@@ -2394,6 +2281,9 @@ unsigned CastInst::isEliminableCastPair(
         "Illegal bitcast, ptrtoint sequence!");
       // Allowed, use second cast's opcode
       return secondOp;
+    case 17:
+      // (sitofp (zext x)) -> (uitofp x)
+      return Instruction::UIToFP;
     case 99: 
       // Cast combination can't happen (error in input). This is for all cases
       // where the MidTy is not the same for the two cast instructions.
@@ -2644,44 +2534,38 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) {
 
   // Run through the possibilities ...
   if (DestTy->isIntegerTy()) {               // Casting to integral
-    if (SrcTy->isIntegerTy()) {                // Casting from integral
+    if (SrcTy->isIntegerTy())                // Casting from integral
         return true;
-    } else if (SrcTy->isFloatingPointTy()) {   // Casting from floating pt
+    if (SrcTy->isFloatingPointTy())   // Casting from floating pt
       return true;
-    } else if (SrcTy->isVectorTy()) {          // Casting from vector
+    if (SrcTy->isVectorTy())          // Casting from vector
       return DestBits == SrcBits;
-    } else {                                   // Casting from something else
-      return SrcTy->isPointerTy();
-    }
-  } else if (DestTy->isFloatingPointTy()) {  // Casting to floating pt
-    if (SrcTy->isIntegerTy()) {                // Casting from integral
+                                      // Casting from something else
+    return SrcTy->isPointerTy();
+  } 
+  if (DestTy->isFloatingPointTy()) {  // Casting to floating pt
+    if (SrcTy->isIntegerTy())                // Casting from integral
       return true;
-    } else if (SrcTy->isFloatingPointTy()) {   // Casting from floating pt
+    if (SrcTy->isFloatingPointTy())   // Casting from floating pt
       return true;
-    } else if (SrcTy->isVectorTy()) {          // Casting from vector
+    if (SrcTy->isVectorTy())          // Casting from vector
       return DestBits == SrcBits;
-    } else {                                   // Casting from something else
-      return false;
-    }
-  } else if (DestTy->isVectorTy()) {         // Casting to vector
+                                    // Casting from something else
+    return false;
+  }
+  if (DestTy->isVectorTy())         // Casting to vector
     return DestBits == SrcBits;
-  } else if (DestTy->isPointerTy()) {        // Casting to pointer
-    if (SrcTy->isPointerTy()) {                // Casting from pointer
-      return true;
-    } else if (SrcTy->isIntegerTy()) {         // Casting from integral
+  if (DestTy->isPointerTy()) {        // Casting to pointer
+    if (SrcTy->isPointerTy())                // Casting from pointer
       return true;
-    } else {                                   // Casting from something else
-      return false;
-    }
-  } else if (DestTy->isX86_MMXTy()) {
-    if (SrcTy->isVectorTy()) {
+    return SrcTy->isIntegerTy();             // Casting from integral
+  } 
+  if (DestTy->isX86_MMXTy()) {
+    if (SrcTy->isVectorTy())
       return DestBits == SrcBits;       // 64-bit vector to MMX
-    } else {
-      return false;
-    }
-  } else {                                   // Casting to something else
     return false;
-  }
+  }                                    // Casting to something else
+  return false;
 }
 
 bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) {
@@ -2725,13 +2609,13 @@ bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) {
 }
 
 bool CastInst::isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy,
-                                          const DataLayout *DL) {
+                                          const DataLayout &DL) {
   if (auto *PtrTy = dyn_cast<PointerType>(SrcTy))
     if (auto *IntTy = dyn_cast<IntegerType>(DestTy))
-      return DL && IntTy->getBitWidth() == DL->getPointerTypeSizeInBits(PtrTy);
+      return IntTy->getBitWidth() == DL.getPointerTypeSizeInBits(PtrTy);
   if (auto *PtrTy = dyn_cast<PointerType>(DestTy))
     if (auto *IntTy = dyn_cast<IntegerType>(SrcTy))
-      return DL && IntTy->getBitWidth() == DL->getPointerTypeSizeInBits(PtrTy);
+      return IntTy->getBitWidth() == DL.getPointerTypeSizeInBits(PtrTy);
 
   return isBitCastable(SrcTy, DestTy);
 }
@@ -2852,10 +2736,6 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
   // Check for type sanity on the arguments
   Type *SrcTy = S->getType();
 
-  // If this is a cast to the same type then it's trivially true.
-  if (SrcTy == DstTy)
-    return true;
-
   if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() ||
       SrcTy->isAggregateType() || DstTy->isAggregateType())
     return false;
@@ -3418,8 +3298,8 @@ void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) {
   NumOperands = 2;
   OperandList = allocHungoffUses(ReservedSpace);
 
-  OperandList[0] = Value;
-  OperandList[1] = Default;
+  Op<0>() = Value;
+  Op<1>() = Default;
 }
 
 /// SwitchInst ctor - Create a new switch instruction, specifying a value to
@@ -3537,7 +3417,7 @@ void IndirectBrInst::init(Value *Address, unsigned NumDests) {
   NumOperands = 1;
   OperandList = allocHungoffUses(ReservedSpace);
   
-  OperandList[0] = Address;
+  Op<0>() = Address;
 }
 
 
diff --git a/contrib/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm/lib/IR/LLVMContext.cpp
index b6d95c4..44c4532 100644
--- a/contrib/llvm/lib/IR/LLVMContext.cpp
+++ b/contrib/llvm/lib/IR/LLVMContext.cpp
@@ -88,11 +88,22 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
          "mem_parallel_loop_access kind id drifted");
   (void)MemParallelLoopAccessID;
 
-
   // Create the 'nonnull' metadata kind.
   unsigned NonNullID = getMDKindID("nonnull");
   assert(NonNullID == MD_nonnull && "nonnull kind id drifted");
   (void)NonNullID;
+  
+  // Create the 'dereferenceable' metadata kind.
+  unsigned DereferenceableID = getMDKindID("dereferenceable");
+  assert(DereferenceableID == MD_dereferenceable && 
+         "dereferenceable kind id drifted");
+  (void)DereferenceableID;
+  
+  // Create the 'dereferenceable_or_null' metadata kind.
+  unsigned DereferenceableOrNullID = getMDKindID("dereferenceable_or_null");
+  assert(DereferenceableOrNullID == MD_dereferenceable_or_null && 
+         "dereferenceable_or_null kind id drifted");
+  (void)DereferenceableOrNullID;
 }
 LLVMContext::~LLVMContext() { delete pImpl; }
 
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.cpp b/contrib/llvm/lib/IR/LLVMContextImpl.cpp
index 01a0e6c..1e20807 100644
--- a/contrib/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.cpp
@@ -35,7 +35,8 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
     Int8Ty(C, 8),
     Int16Ty(C, 16),
     Int32Ty(C, 32),
-    Int64Ty(C, 64) {
+    Int64Ty(C, 64),
+    Int128Ty(C, 128) {
   InlineAsmDiagHandler = nullptr;
   InlineAsmDiagContext = nullptr;
   DiagnosticHandler = nullptr;
@@ -77,10 +78,10 @@ LLVMContextImpl::~LLVMContextImpl() {
   // unnecessary RAUW when nodes are still unresolved.
   for (auto *I : DistinctMDNodes)
     I->dropAllReferences();
-  for (auto *I : MDTuples)
-    I->dropAllReferences();
-  for (auto *I : MDLocations)
+#define HANDLE_MDNODE_LEAF(CLASS)                                              \
+  for (auto *I : CLASS##s)                                                     \
     I->dropAllReferences();
+#include "llvm/IR/Metadata.def"
 
   // Also drop references that come from the Value bridges.
   for (auto &Pair : ValuesAsMetadata)
@@ -89,15 +90,14 @@ LLVMContextImpl::~LLVMContextImpl() {
     Pair.second->dropUse();
 
   // Destroy MDNodes.
-  for (UniquableMDNode *I : DistinctMDNodes)
+  for (MDNode *I : DistinctMDNodes)
     I->deleteAsSubclass();
-  for (MDTuple *I : MDTuples)
-    delete I;
-  for (MDLocation *I : MDLocations)
+#define HANDLE_MDNODE_LEAF(CLASS)                                              \
+  for (CLASS *I : CLASS##s)                                                    \
     delete I;
+#include "llvm/IR/Metadata.def"
 
-  // Free the constants.  This is important to do here to ensure that they are
-  // freed before the LeakDetector is torn down.
+  // Free the constants.
   std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
                 DropFirst());
   std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(),
@@ -162,6 +162,62 @@ LLVMContextImpl::~LLVMContextImpl() {
   MDStringCache.clear();
 }
 
+void LLVMContextImpl::dropTriviallyDeadConstantArrays() {
+  bool Changed;
+  do {
+    Changed = false;
+
+    for (auto I = ArrayConstants.map_begin(), E = ArrayConstants.map_end();
+         I != E; ) {
+      auto *C = I->first;
+      I++;
+      if (C->use_empty()) {
+        Changed = true;
+        C->destroyConstant();
+      }
+    }
+
+  } while (Changed);
+}
+
+void Module::dropTriviallyDeadConstantArrays() {
+  Context.pImpl->dropTriviallyDeadConstantArrays();
+}
+
+namespace llvm {
+/// \brief Make MDOperand transparent for hashing.
+///
+/// This overload of an implementation detail of the hashing library makes
+/// MDOperand hash to the same value as a \a Metadata pointer.
+///
+/// Note that overloading \a hash_value() as follows:
+///
+/// \code
+///     size_t hash_value(const MDOperand &X) { return hash_value(X.get()); }
+/// \endcode
+///
+/// does not cause MDOperand to be transparent.  In particular, a bare pointer
+/// doesn't get hashed before it's combined, whereas \a MDOperand would.
+static const Metadata *get_hashable_data(const MDOperand &X) { return X.get(); }
+}
+
+unsigned MDNodeOpsKey::calculateHash(MDNode *N, unsigned Offset) {
+  unsigned Hash = hash_combine_range(N->op_begin() + Offset, N->op_end());
+#ifndef NDEBUG
+  {
+    SmallVector<Metadata *, 8> MDs(N->op_begin() + Offset, N->op_end());
+    unsigned RawHash = calculateHash(MDs);
+    assert(Hash == RawHash &&
+           "Expected hash of MDOperand to equal hash of Metadata*");
+  }
+#endif
+  return Hash;
+}
+
+unsigned MDNodeOpsKey::calculateHash(ArrayRef<Metadata *> Ops) {
+  return hash_combine_range(Ops.begin(), Ops.end());
+}
+
 // ConstantsContext anchors
 void UnaryConstantExpr::anchor() { }
 
@@ -182,3 +238,4 @@ void InsertValueConstantExpr::anchor() { }
 void GetElementPtrConstantExpr::anchor() { }
 
 void CompareConstantExpr::anchor() { }
+
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h
index 6ebc567..f81db60 100644
--- a/contrib/llvm/lib/IR/LLVMContextImpl.h
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.h
@@ -17,7 +17,6 @@
 
 #include "AttributeImpl.h"
 #include "ConstantsContext.h"
-#include "LeaksContext.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -28,6 +27,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
@@ -41,6 +41,7 @@ class ConstantFP;
 class DiagnosticInfoOptimizationRemark;
 class DiagnosticInfoOptimizationRemarkMissed;
 class DiagnosticInfoOptimizationRemarkAnalysis;
+class GCStrategy;
 class LLVMContext;
 class Type;
 class Value;
@@ -166,97 +167,731 @@ struct FunctionTypeKeyInfo {
   }
 };
 
+/// \brief Structure for hashing arbitrary MDNode operands.
+class MDNodeOpsKey {
+  ArrayRef<Metadata *> RawOps;
+  ArrayRef<MDOperand> Ops;
+
+  unsigned Hash;
+
+protected:
+  MDNodeOpsKey(ArrayRef<Metadata *> Ops)
+      : RawOps(Ops), Hash(calculateHash(Ops)) {}
+
+  template <class NodeTy>
+  MDNodeOpsKey(const NodeTy *N, unsigned Offset = 0)
+      : Ops(N->op_begin() + Offset, N->op_end()), Hash(N->getHash()) {}
+
+  template <class NodeTy>
+  bool compareOps(const NodeTy *RHS, unsigned Offset = 0) const {
+    if (getHash() != RHS->getHash())
+      return false;
+
+    assert((RawOps.empty() || Ops.empty()) && "Two sets of operands?");
+    return RawOps.empty() ? compareOps(Ops, RHS, Offset)
+                          : compareOps(RawOps, RHS, Offset);
+  }
+
+  static unsigned calculateHash(MDNode *N, unsigned Offset = 0);
+
+private:
+  template <class T>
+  static bool compareOps(ArrayRef<T> Ops, const MDNode *RHS, unsigned Offset) {
+    if (Ops.size() != RHS->getNumOperands() - Offset)
+      return false;
+    return std::equal(Ops.begin(), Ops.end(), RHS->op_begin() + Offset);
+  }
+
+  static unsigned calculateHash(ArrayRef<Metadata *> Ops);
+
+public:
+  unsigned getHash() const { return Hash; }
+};
+
+template <class NodeTy> struct MDNodeKeyImpl;
+template <class NodeTy> struct MDNodeInfo;
+
 /// \brief DenseMapInfo for MDTuple.
 ///
 /// Note that we don't need the is-function-local bit, since that's implicit in
 /// the operands.
-struct MDTupleInfo {
-  struct KeyTy {
-    ArrayRef<Metadata *> RawOps;
-    ArrayRef<MDOperand> Ops;
-    unsigned Hash;
+template <> struct MDNodeKeyImpl<MDTuple> : MDNodeOpsKey {
+  MDNodeKeyImpl(ArrayRef<Metadata *> Ops) : MDNodeOpsKey(Ops) {}
+  MDNodeKeyImpl(const MDTuple *N) : MDNodeOpsKey(N) {}
 
-    KeyTy(ArrayRef<Metadata *> Ops)
-        : RawOps(Ops), Hash(hash_combine_range(Ops.begin(), Ops.end())) {}
+  bool isKeyOf(const MDTuple *RHS) const { return compareOps(RHS); }
 
-    KeyTy(MDTuple *N)
-        : Ops(N->op_begin(), N->op_end()), Hash(N->getHash()) {}
+  unsigned getHashValue() const { return getHash(); }
 
-    bool operator==(const MDTuple *RHS) const {
-      if (RHS == getEmptyKey() || RHS == getTombstoneKey())
-        return false;
-      if (Hash != RHS->getHash())
-        return false;
-      assert((RawOps.empty() || Ops.empty()) && "Two sets of operands?");
-      return RawOps.empty() ? compareOps(Ops, RHS) : compareOps(RawOps, RHS);
-    }
-    template <class T>
-    static bool compareOps(ArrayRef<T> Ops, const MDTuple *RHS) {
-      if (Ops.size() != RHS->getNumOperands())
-        return false;
-      return std::equal(Ops.begin(), Ops.end(), RHS->op_begin());
-    }
-  };
-  static inline MDTuple *getEmptyKey() {
-    return DenseMapInfo<MDTuple *>::getEmptyKey();
+  static unsigned calculateHash(MDTuple *N) {
+    return MDNodeOpsKey::calculateHash(N);
   }
-  static inline MDTuple *getTombstoneKey() {
-    return DenseMapInfo<MDTuple *>::getTombstoneKey();
+};
+
+/// \brief DenseMapInfo for DILocation.
+template <> struct MDNodeKeyImpl<DILocation> {
+  unsigned Line;
+  unsigned Column;
+  Metadata *Scope;
+  Metadata *InlinedAt;
+
+  MDNodeKeyImpl(unsigned Line, unsigned Column, Metadata *Scope,
+                Metadata *InlinedAt)
+      : Line(Line), Column(Column), Scope(Scope), InlinedAt(InlinedAt) {}
+
+  MDNodeKeyImpl(const DILocation *L)
+      : Line(L->getLine()), Column(L->getColumn()), Scope(L->getRawScope()),
+        InlinedAt(L->getRawInlinedAt()) {}
+
+  bool isKeyOf(const DILocation *RHS) const {
+    return Line == RHS->getLine() && Column == RHS->getColumn() &&
+           Scope == RHS->getRawScope() && InlinedAt == RHS->getRawInlinedAt();
   }
-  static unsigned getHashValue(const KeyTy &Key) { return Key.Hash; }
-  static unsigned getHashValue(const MDTuple *U) {
-    return U->getHash();
+  unsigned getHashValue() const {
+    return hash_combine(Line, Column, Scope, InlinedAt);
   }
-  static bool isEqual(const KeyTy &LHS, const MDTuple *RHS) {
-    return LHS == RHS;
+};
+
+/// \brief DenseMapInfo for GenericDINode.
+template <> struct MDNodeKeyImpl<GenericDINode> : MDNodeOpsKey {
+  unsigned Tag;
+  StringRef Header;
+  MDNodeKeyImpl(unsigned Tag, StringRef Header, ArrayRef<Metadata *> DwarfOps)
+      : MDNodeOpsKey(DwarfOps), Tag(Tag), Header(Header) {}
+  MDNodeKeyImpl(const GenericDINode *N)
+      : MDNodeOpsKey(N, 1), Tag(N->getTag()), Header(N->getHeader()) {}
+
+  bool isKeyOf(const GenericDINode *RHS) const {
+    return Tag == RHS->getTag() && Header == RHS->getHeader() &&
+           compareOps(RHS, 1);
   }
-  static bool isEqual(const MDTuple *LHS, const MDTuple *RHS) {
-    return LHS == RHS;
+
+  unsigned getHashValue() const { return hash_combine(getHash(), Tag, Header); }
+
+  static unsigned calculateHash(GenericDINode *N) {
+    return MDNodeOpsKey::calculateHash(N, 1);
   }
 };
 
-/// \brief DenseMapInfo for MDLocation.
-struct MDLocationInfo {
-  struct KeyTy {
-    unsigned Line;
-    unsigned Column;
-    Metadata *Scope;
-    Metadata *InlinedAt;
+template <> struct MDNodeKeyImpl<DISubrange> {
+  int64_t Count;
+  int64_t LowerBound;
 
-    KeyTy(unsigned Line, unsigned Column, Metadata *Scope, Metadata *InlinedAt)
-        : Line(Line), Column(Column), Scope(Scope), InlinedAt(InlinedAt) {}
+  MDNodeKeyImpl(int64_t Count, int64_t LowerBound)
+      : Count(Count), LowerBound(LowerBound) {}
+  MDNodeKeyImpl(const DISubrange *N)
+      : Count(N->getCount()), LowerBound(N->getLowerBound()) {}
 
-    KeyTy(const MDLocation *L)
-        : Line(L->getLine()), Column(L->getColumn()), Scope(L->getScope()),
-          InlinedAt(L->getInlinedAt()) {}
+  bool isKeyOf(const DISubrange *RHS) const {
+    return Count == RHS->getCount() && LowerBound == RHS->getLowerBound();
+  }
+  unsigned getHashValue() const { return hash_combine(Count, LowerBound); }
+};
 
-    bool operator==(const MDLocation *RHS) const {
-      if (RHS == getEmptyKey() || RHS == getTombstoneKey())
-        return false;
-      return Line == RHS->getLine() && Column == RHS->getColumn() &&
-             Scope == RHS->getScope() && InlinedAt == RHS->getInlinedAt();
-    }
-  };
-  static inline MDLocation *getEmptyKey() {
-    return DenseMapInfo<MDLocation *>::getEmptyKey();
+template <> struct MDNodeKeyImpl<DIEnumerator> {
+  int64_t Value;
+  StringRef Name;
+
+  MDNodeKeyImpl(int64_t Value, StringRef Name) : Value(Value), Name(Name) {}
+  MDNodeKeyImpl(const DIEnumerator *N)
+      : Value(N->getValue()), Name(N->getName()) {}
+
+  bool isKeyOf(const DIEnumerator *RHS) const {
+    return Value == RHS->getValue() && Name == RHS->getName();
   }
-  static inline MDLocation *getTombstoneKey() {
-    return DenseMapInfo<MDLocation *>::getTombstoneKey();
+  unsigned getHashValue() const { return hash_combine(Value, Name); }
+};
+
+template <> struct MDNodeKeyImpl<DIBasicType> {
+  unsigned Tag;
+  StringRef Name;
+  uint64_t SizeInBits;
+  uint64_t AlignInBits;
+  unsigned Encoding;
+
+  MDNodeKeyImpl(unsigned Tag, StringRef Name, uint64_t SizeInBits,
+                uint64_t AlignInBits, unsigned Encoding)
+      : Tag(Tag), Name(Name), SizeInBits(SizeInBits), AlignInBits(AlignInBits),
+        Encoding(Encoding) {}
+  MDNodeKeyImpl(const DIBasicType *N)
+      : Tag(N->getTag()), Name(N->getName()), SizeInBits(N->getSizeInBits()),
+        AlignInBits(N->getAlignInBits()), Encoding(N->getEncoding()) {}
+
+  bool isKeyOf(const DIBasicType *RHS) const {
+    return Tag == RHS->getTag() && Name == RHS->getName() &&
+           SizeInBits == RHS->getSizeInBits() &&
+           AlignInBits == RHS->getAlignInBits() &&
+           Encoding == RHS->getEncoding();
   }
-  static unsigned getHashValue(const KeyTy &Key) {
-    return hash_combine(Key.Line, Key.Column, Key.Scope, Key.InlinedAt);
+  unsigned getHashValue() const {
+    return hash_combine(Tag, Name, SizeInBits, AlignInBits, Encoding);
   }
-  static unsigned getHashValue(const MDLocation *U) {
-    return getHashValue(KeyTy(U));
+};
+
+template <> struct MDNodeKeyImpl<DIDerivedType> {
+  unsigned Tag;
+  StringRef Name;
+  Metadata *File;
+  unsigned Line;
+  Metadata *Scope;
+  Metadata *BaseType;
+  uint64_t SizeInBits;
+  uint64_t AlignInBits;
+  uint64_t OffsetInBits;
+  unsigned Flags;
+  Metadata *ExtraData;
+
+  MDNodeKeyImpl(unsigned Tag, StringRef Name, Metadata *File, unsigned Line,
+                Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
+                uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
+                Metadata *ExtraData)
+      : Tag(Tag), Name(Name), File(File), Line(Line), Scope(Scope),
+        BaseType(BaseType), SizeInBits(SizeInBits), AlignInBits(AlignInBits),
+        OffsetInBits(OffsetInBits), Flags(Flags), ExtraData(ExtraData) {}
+  MDNodeKeyImpl(const DIDerivedType *N)
+      : Tag(N->getTag()), Name(N->getName()), File(N->getRawFile()),
+        Line(N->getLine()), Scope(N->getRawScope()),
+        BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()),
+        AlignInBits(N->getAlignInBits()), OffsetInBits(N->getOffsetInBits()),
+        Flags(N->getFlags()), ExtraData(N->getRawExtraData()) {}
+
+  bool isKeyOf(const DIDerivedType *RHS) const {
+    return Tag == RHS->getTag() && Name == RHS->getName() &&
+           File == RHS->getRawFile() && Line == RHS->getLine() &&
+           Scope == RHS->getRawScope() && BaseType == RHS->getRawBaseType() &&
+           SizeInBits == RHS->getSizeInBits() &&
+           AlignInBits == RHS->getAlignInBits() &&
+           OffsetInBits == RHS->getOffsetInBits() && Flags == RHS->getFlags() &&
+           ExtraData == RHS->getRawExtraData();
   }
-  static bool isEqual(const KeyTy &LHS, const MDLocation *RHS) {
-    return LHS == RHS;
+  unsigned getHashValue() const {
+    return hash_combine(Tag, Name, File, Line, Scope, BaseType, SizeInBits,
+                        AlignInBits, OffsetInBits, Flags, ExtraData);
   }
-  static bool isEqual(const MDLocation *LHS, const MDLocation *RHS) {
+};
+
+template <> struct MDNodeKeyImpl<DICompositeType> {
+  unsigned Tag;
+  StringRef Name;
+  Metadata *File;
+  unsigned Line;
+  Metadata *Scope;
+  Metadata *BaseType;
+  uint64_t SizeInBits;
+  uint64_t AlignInBits;
+  uint64_t OffsetInBits;
+  unsigned Flags;
+  Metadata *Elements;
+  unsigned RuntimeLang;
+  Metadata *VTableHolder;
+  Metadata *TemplateParams;
+  StringRef Identifier;
+
+  MDNodeKeyImpl(unsigned Tag, StringRef Name, Metadata *File, unsigned Line,
+                Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
+                uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
+                Metadata *Elements, unsigned RuntimeLang,
+                Metadata *VTableHolder, Metadata *TemplateParams,
+                StringRef Identifier)
+      : Tag(Tag), Name(Name), File(File), Line(Line), Scope(Scope),
+        BaseType(BaseType), SizeInBits(SizeInBits), AlignInBits(AlignInBits),
+        OffsetInBits(OffsetInBits), Flags(Flags), Elements(Elements),
+        RuntimeLang(RuntimeLang), VTableHolder(VTableHolder),
+        TemplateParams(TemplateParams), Identifier(Identifier) {}
+  MDNodeKeyImpl(const DICompositeType *N)
+      : Tag(N->getTag()), Name(N->getName()), File(N->getRawFile()),
+        Line(N->getLine()), Scope(N->getRawScope()),
+        BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()),
+        AlignInBits(N->getAlignInBits()), OffsetInBits(N->getOffsetInBits()),
+        Flags(N->getFlags()), Elements(N->getRawElements()),
+        RuntimeLang(N->getRuntimeLang()), VTableHolder(N->getRawVTableHolder()),
+        TemplateParams(N->getRawTemplateParams()),
+        Identifier(N->getIdentifier()) {}
+
+  bool isKeyOf(const DICompositeType *RHS) const {
+    return Tag == RHS->getTag() && Name == RHS->getName() &&
+           File == RHS->getRawFile() && Line == RHS->getLine() &&
+           Scope == RHS->getRawScope() && BaseType == RHS->getRawBaseType() &&
+           SizeInBits == RHS->getSizeInBits() &&
+           AlignInBits == RHS->getAlignInBits() &&
+           OffsetInBits == RHS->getOffsetInBits() && Flags == RHS->getFlags() &&
+           Elements == RHS->getRawElements() &&
+           RuntimeLang == RHS->getRuntimeLang() &&
+           VTableHolder == RHS->getRawVTableHolder() &&
+           TemplateParams == RHS->getRawTemplateParams() &&
+           Identifier == RHS->getIdentifier();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(Tag, Name, File, Line, Scope, BaseType, SizeInBits,
+                        AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
+                        VTableHolder, TemplateParams, Identifier);
+  }
+};
+
+template <> struct MDNodeKeyImpl<DISubroutineType> {
+  unsigned Flags;
+  Metadata *TypeArray;
+
+  MDNodeKeyImpl(int64_t Flags, Metadata *TypeArray)
+      : Flags(Flags), TypeArray(TypeArray) {}
+  MDNodeKeyImpl(const DISubroutineType *N)
+      : Flags(N->getFlags()), TypeArray(N->getRawTypeArray()) {}
+
+  bool isKeyOf(const DISubroutineType *RHS) const {
+    return Flags == RHS->getFlags() && TypeArray == RHS->getRawTypeArray();
+  }
+  unsigned getHashValue() const { return hash_combine(Flags, TypeArray); }
+};
+
+template <> struct MDNodeKeyImpl<DIFile> {
+  StringRef Filename;
+  StringRef Directory;
+
+  MDNodeKeyImpl(StringRef Filename, StringRef Directory)
+      : Filename(Filename), Directory(Directory) {}
+  MDNodeKeyImpl(const DIFile *N)
+      : Filename(N->getFilename()), Directory(N->getDirectory()) {}
+
+  bool isKeyOf(const DIFile *RHS) const {
+    return Filename == RHS->getFilename() && Directory == RHS->getDirectory();
+  }
+  unsigned getHashValue() const { return hash_combine(Filename, Directory); }
+};
+
+template <> struct MDNodeKeyImpl<DICompileUnit> {
+  unsigned SourceLanguage;
+  Metadata *File;
+  StringRef Producer;
+  bool IsOptimized;
+  StringRef Flags;
+  unsigned RuntimeVersion;
+  StringRef SplitDebugFilename;
+  unsigned EmissionKind;
+  Metadata *EnumTypes;
+  Metadata *RetainedTypes;
+  Metadata *Subprograms;
+  Metadata *GlobalVariables;
+  Metadata *ImportedEntities;
+  uint64_t DWOId;
+
+  MDNodeKeyImpl(unsigned SourceLanguage, Metadata *File, StringRef Producer,
+                bool IsOptimized, StringRef Flags, unsigned RuntimeVersion,
+                StringRef SplitDebugFilename, unsigned EmissionKind,
+                Metadata *EnumTypes, Metadata *RetainedTypes,
+                Metadata *Subprograms, Metadata *GlobalVariables,
+                Metadata *ImportedEntities, uint64_t DWOId)
+      : SourceLanguage(SourceLanguage), File(File), Producer(Producer),
+        IsOptimized(IsOptimized), Flags(Flags), RuntimeVersion(RuntimeVersion),
+        SplitDebugFilename(SplitDebugFilename), EmissionKind(EmissionKind),
+        EnumTypes(EnumTypes), RetainedTypes(RetainedTypes),
+        Subprograms(Subprograms), GlobalVariables(GlobalVariables),
+        ImportedEntities(ImportedEntities), DWOId(DWOId) {}
+  MDNodeKeyImpl(const DICompileUnit *N)
+      : SourceLanguage(N->getSourceLanguage()), File(N->getRawFile()),
+        Producer(N->getProducer()), IsOptimized(N->isOptimized()),
+        Flags(N->getFlags()), RuntimeVersion(N->getRuntimeVersion()),
+        SplitDebugFilename(N->getSplitDebugFilename()),
+        EmissionKind(N->getEmissionKind()), EnumTypes(N->getRawEnumTypes()),
+        RetainedTypes(N->getRawRetainedTypes()),
+        Subprograms(N->getRawSubprograms()),
+        GlobalVariables(N->getRawGlobalVariables()),
+        ImportedEntities(N->getRawImportedEntities()), DWOId(N->getDWOId()) {}
+
+  bool isKeyOf(const DICompileUnit *RHS) const {
+    return SourceLanguage == RHS->getSourceLanguage() &&
+           File == RHS->getRawFile() && Producer == RHS->getProducer() &&
+           IsOptimized == RHS->isOptimized() && Flags == RHS->getFlags() &&
+           RuntimeVersion == RHS->getRuntimeVersion() &&
+           SplitDebugFilename == RHS->getSplitDebugFilename() &&
+           EmissionKind == RHS->getEmissionKind() &&
+           EnumTypes == RHS->getRawEnumTypes() &&
+           RetainedTypes == RHS->getRawRetainedTypes() &&
+           Subprograms == RHS->getRawSubprograms() &&
+           GlobalVariables == RHS->getRawGlobalVariables() &&
+           ImportedEntities == RHS->getRawImportedEntities() &&
+           DWOId == RHS->getDWOId();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(SourceLanguage, File, Producer, IsOptimized, Flags,
+                        RuntimeVersion, SplitDebugFilename, EmissionKind,
+                        EnumTypes, RetainedTypes, Subprograms, GlobalVariables,
+                        ImportedEntities, DWOId);
+  }
+};
+
+template <> struct MDNodeKeyImpl<DISubprogram> {
+  Metadata *Scope;
+  StringRef Name;
+  StringRef LinkageName;
+  Metadata *File;
+  unsigned Line;
+  Metadata *Type;
+  bool IsLocalToUnit;
+  bool IsDefinition;
+  unsigned ScopeLine;
+  Metadata *ContainingType;
+  unsigned Virtuality;
+  unsigned VirtualIndex;
+  unsigned Flags;
+  bool IsOptimized;
+  Metadata *Function;
+  Metadata *TemplateParams;
+  Metadata *Declaration;
+  Metadata *Variables;
+
+  MDNodeKeyImpl(Metadata *Scope, StringRef Name, StringRef LinkageName,
+                Metadata *File, unsigned Line, Metadata *Type,
+                bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
+                Metadata *ContainingType, unsigned Virtuality,
+                unsigned VirtualIndex, unsigned Flags, bool IsOptimized,
+                Metadata *Function, Metadata *TemplateParams,
+                Metadata *Declaration, Metadata *Variables)
+      : Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
+        Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit),
+        IsDefinition(IsDefinition), ScopeLine(ScopeLine),
+        ContainingType(ContainingType), Virtuality(Virtuality),
+        VirtualIndex(VirtualIndex), Flags(Flags), IsOptimized(IsOptimized),
+        Function(Function), TemplateParams(TemplateParams),
+        Declaration(Declaration), Variables(Variables) {}
+  MDNodeKeyImpl(const DISubprogram *N)
+      : Scope(N->getRawScope()), Name(N->getName()),
+        LinkageName(N->getLinkageName()), File(N->getRawFile()),
+        Line(N->getLine()), Type(N->getRawType()),
+        IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()),
+        ScopeLine(N->getScopeLine()), ContainingType(N->getRawContainingType()),
+        Virtuality(N->getVirtuality()), VirtualIndex(N->getVirtualIndex()),
+        Flags(N->getFlags()), IsOptimized(N->isOptimized()),
+        Function(N->getRawFunction()),
+        TemplateParams(N->getRawTemplateParams()),
+        Declaration(N->getRawDeclaration()), Variables(N->getRawVariables()) {}
+
+  bool isKeyOf(const DISubprogram *RHS) const {
+    return Scope == RHS->getRawScope() && Name == RHS->getName() &&
+           LinkageName == RHS->getLinkageName() && File == RHS->getRawFile() &&
+           Line == RHS->getLine() && Type == RHS->getRawType() &&
+           IsLocalToUnit == RHS->isLocalToUnit() &&
+           IsDefinition == RHS->isDefinition() &&
+           ScopeLine == RHS->getScopeLine() &&
+           ContainingType == RHS->getRawContainingType() &&
+           Virtuality == RHS->getVirtuality() &&
+           VirtualIndex == RHS->getVirtualIndex() && Flags == RHS->getFlags() &&
+           IsOptimized == RHS->isOptimized() &&
+           Function == RHS->getRawFunction() &&
+           TemplateParams == RHS->getRawTemplateParams() &&
+           Declaration == RHS->getRawDeclaration() &&
+           Variables == RHS->getRawVariables();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(Scope, Name, LinkageName, File, Line, Type,
+                        IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
+                        Virtuality, VirtualIndex, Flags, IsOptimized, Function,
+                        TemplateParams, Declaration, Variables);
+  }
+};
+
+template <> struct MDNodeKeyImpl<DILexicalBlock> {
+  Metadata *Scope;
+  Metadata *File;
+  unsigned Line;
+  unsigned Column;
+
+  MDNodeKeyImpl(Metadata *Scope, Metadata *File, unsigned Line, unsigned Column)
+      : Scope(Scope), File(File), Line(Line), Column(Column) {}
+  MDNodeKeyImpl(const DILexicalBlock *N)
+      : Scope(N->getRawScope()), File(N->getRawFile()), Line(N->getLine()),
+        Column(N->getColumn()) {}
+
+  bool isKeyOf(const DILexicalBlock *RHS) const {
+    return Scope == RHS->getRawScope() && File == RHS->getRawFile() &&
+           Line == RHS->getLine() && Column == RHS->getColumn();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(Scope, File, Line, Column);
+  }
+};
+
+template <> struct MDNodeKeyImpl<DILexicalBlockFile> {
+  Metadata *Scope;
+  Metadata *File;
+  unsigned Discriminator;
+
+  MDNodeKeyImpl(Metadata *Scope, Metadata *File, unsigned Discriminator)
+      : Scope(Scope), File(File), Discriminator(Discriminator) {}
+  MDNodeKeyImpl(const DILexicalBlockFile *N)
+      : Scope(N->getRawScope()), File(N->getRawFile()),
+        Discriminator(N->getDiscriminator()) {}
+
+  bool isKeyOf(const DILexicalBlockFile *RHS) const {
+    return Scope == RHS->getRawScope() && File == RHS->getRawFile() &&
+           Discriminator == RHS->getDiscriminator();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(Scope, File, Discriminator);
+  }
+};
+
+template <> struct MDNodeKeyImpl<DINamespace> {
+  Metadata *Scope;
+  Metadata *File;
+  StringRef Name;
+  unsigned Line;
+
+  MDNodeKeyImpl(Metadata *Scope, Metadata *File, StringRef Name, unsigned Line)
+      : Scope(Scope), File(File), Name(Name), Line(Line) {}
+  MDNodeKeyImpl(const DINamespace *N)
+      : Scope(N->getRawScope()), File(N->getRawFile()), Name(N->getName()),
+        Line(N->getLine()) {}
+
+  bool isKeyOf(const DINamespace *RHS) const {
+    return Scope == RHS->getRawScope() && File == RHS->getRawFile() &&
+           Name == RHS->getName() && Line == RHS->getLine();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(Scope, File, Name, Line);
+  }
+};
+
+template <> struct MDNodeKeyImpl<DITemplateTypeParameter> {
+  StringRef Name;
+  Metadata *Type;
+
+  MDNodeKeyImpl(StringRef Name, Metadata *Type) : Name(Name), Type(Type) {}
+  MDNodeKeyImpl(const DITemplateTypeParameter *N)
+      : Name(N->getName()), Type(N->getRawType()) {}
+
+  bool isKeyOf(const DITemplateTypeParameter *RHS) const {
+    return Name == RHS->getName() && Type == RHS->getRawType();
+  }
+  unsigned getHashValue() const { return hash_combine(Name, Type); }
+};
+
+template <> struct MDNodeKeyImpl<DITemplateValueParameter> {
+  unsigned Tag;
+  StringRef Name;
+  Metadata *Type;
+  Metadata *Value;
+
+  MDNodeKeyImpl(unsigned Tag, StringRef Name, Metadata *Type, Metadata *Value)
+      : Tag(Tag), Name(Name), Type(Type), Value(Value) {}
+  MDNodeKeyImpl(const DITemplateValueParameter *N)
+      : Tag(N->getTag()), Name(N->getName()), Type(N->getRawType()),
+        Value(N->getValue()) {}
+
+  bool isKeyOf(const DITemplateValueParameter *RHS) const {
+    return Tag == RHS->getTag() && Name == RHS->getName() &&
+           Type == RHS->getRawType() && Value == RHS->getValue();
+  }
+  unsigned getHashValue() const { return hash_combine(Tag, Name, Type, Value); }
+};
+
+template <> struct MDNodeKeyImpl<DIGlobalVariable> {
+  Metadata *Scope;
+  StringRef Name;
+  StringRef LinkageName;
+  Metadata *File;
+  unsigned Line;
+  Metadata *Type;
+  bool IsLocalToUnit;
+  bool IsDefinition;
+  Metadata *Variable;
+  Metadata *StaticDataMemberDeclaration;
+
+  MDNodeKeyImpl(Metadata *Scope, StringRef Name, StringRef LinkageName,
+                Metadata *File, unsigned Line, Metadata *Type,
+                bool IsLocalToUnit, bool IsDefinition, Metadata *Variable,
+                Metadata *StaticDataMemberDeclaration)
+      : Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
+        Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit),
+        IsDefinition(IsDefinition), Variable(Variable),
+        StaticDataMemberDeclaration(StaticDataMemberDeclaration) {}
+  MDNodeKeyImpl(const DIGlobalVariable *N)
+      : Scope(N->getRawScope()), Name(N->getName()),
+        LinkageName(N->getLinkageName()), File(N->getRawFile()),
+        Line(N->getLine()), Type(N->getRawType()),
+        IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()),
+        Variable(N->getRawVariable()),
+        StaticDataMemberDeclaration(N->getRawStaticDataMemberDeclaration()) {}
+
+  bool isKeyOf(const DIGlobalVariable *RHS) const {
+    return Scope == RHS->getRawScope() && Name == RHS->getName() &&
+           LinkageName == RHS->getLinkageName() && File == RHS->getRawFile() &&
+           Line == RHS->getLine() && Type == RHS->getRawType() &&
+           IsLocalToUnit == RHS->isLocalToUnit() &&
+           IsDefinition == RHS->isDefinition() &&
+           Variable == RHS->getRawVariable() &&
+           StaticDataMemberDeclaration ==
+               RHS->getRawStaticDataMemberDeclaration();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(Scope, Name, LinkageName, File, Line, Type,
+                        IsLocalToUnit, IsDefinition, Variable,
+                        StaticDataMemberDeclaration);
+  }
+};
+
+template <> struct MDNodeKeyImpl<DILocalVariable> {
+  unsigned Tag;
+  Metadata *Scope;
+  StringRef Name;
+  Metadata *File;
+  unsigned Line;
+  Metadata *Type;
+  unsigned Arg;
+  unsigned Flags;
+
+  MDNodeKeyImpl(unsigned Tag, Metadata *Scope, StringRef Name, Metadata *File,
+                unsigned Line, Metadata *Type, unsigned Arg, unsigned Flags)
+      : Tag(Tag), Scope(Scope), Name(Name), File(File), Line(Line), Type(Type),
+        Arg(Arg), Flags(Flags) {}
+  MDNodeKeyImpl(const DILocalVariable *N)
+      : Tag(N->getTag()), Scope(N->getRawScope()), Name(N->getName()),
+        File(N->getRawFile()), Line(N->getLine()), Type(N->getRawType()),
+        Arg(N->getArg()), Flags(N->getFlags()) {}
+
+  bool isKeyOf(const DILocalVariable *RHS) const {
+    return Tag == RHS->getTag() && Scope == RHS->getRawScope() &&
+           Name == RHS->getName() && File == RHS->getRawFile() &&
+           Line == RHS->getLine() && Type == RHS->getRawType() &&
+           Arg == RHS->getArg() && Flags == RHS->getFlags();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(Tag, Scope, Name, File, Line, Type, Arg, Flags);
+  }
+};
+
+template <> struct MDNodeKeyImpl<DIExpression> {
+  ArrayRef<uint64_t> Elements;
+
+  MDNodeKeyImpl(ArrayRef<uint64_t> Elements) : Elements(Elements) {}
+  MDNodeKeyImpl(const DIExpression *N) : Elements(N->getElements()) {}
+
+  bool isKeyOf(const DIExpression *RHS) const {
+    return Elements == RHS->getElements();
+  }
+  unsigned getHashValue() const {
+    return hash_combine_range(Elements.begin(), Elements.end());
+  }
+};
+
+template <> struct MDNodeKeyImpl<DIObjCProperty> {
+  StringRef Name;
+  Metadata *File;
+  unsigned Line;
+  StringRef GetterName;
+  StringRef SetterName;
+  unsigned Attributes;
+  Metadata *Type;
+
+  MDNodeKeyImpl(StringRef Name, Metadata *File, unsigned Line,
+                StringRef GetterName, StringRef SetterName, unsigned Attributes,
+                Metadata *Type)
+      : Name(Name), File(File), Line(Line), GetterName(GetterName),
+        SetterName(SetterName), Attributes(Attributes), Type(Type) {}
+  MDNodeKeyImpl(const DIObjCProperty *N)
+      : Name(N->getName()), File(N->getRawFile()), Line(N->getLine()),
+        GetterName(N->getGetterName()), SetterName(N->getSetterName()),
+        Attributes(N->getAttributes()), Type(N->getRawType()) {}
+
+  bool isKeyOf(const DIObjCProperty *RHS) const {
+    return Name == RHS->getName() && File == RHS->getRawFile() &&
+           Line == RHS->getLine() && GetterName == RHS->getGetterName() &&
+           SetterName == RHS->getSetterName() &&
+           Attributes == RHS->getAttributes() && Type == RHS->getRawType();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(Name, File, Line, GetterName, SetterName, Attributes,
+                        Type);
+  }
+};
+
+template <> struct MDNodeKeyImpl<DIImportedEntity> {
+  unsigned Tag;
+  Metadata *Scope;
+  Metadata *Entity;
+  unsigned Line;
+  StringRef Name;
+
+  MDNodeKeyImpl(unsigned Tag, Metadata *Scope, Metadata *Entity, unsigned Line,
+                StringRef Name)
+      : Tag(Tag), Scope(Scope), Entity(Entity), Line(Line), Name(Name) {}
+  MDNodeKeyImpl(const DIImportedEntity *N)
+      : Tag(N->getTag()), Scope(N->getRawScope()), Entity(N->getRawEntity()),
+        Line(N->getLine()), Name(N->getName()) {}
+
+  bool isKeyOf(const DIImportedEntity *RHS) const {
+    return Tag == RHS->getTag() && Scope == RHS->getRawScope() &&
+           Entity == RHS->getRawEntity() && Line == RHS->getLine() &&
+           Name == RHS->getName();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(Tag, Scope, Entity, Line, Name);
+  }
+};
+
+/// \brief DenseMapInfo for MDNode subclasses.
+template <class NodeTy> struct MDNodeInfo {
+  typedef MDNodeKeyImpl<NodeTy> KeyTy;
+  static inline NodeTy *getEmptyKey() {
+    return DenseMapInfo<NodeTy *>::getEmptyKey();
+  }
+  static inline NodeTy *getTombstoneKey() {
+    return DenseMapInfo<NodeTy *>::getTombstoneKey();
+  }
+  static unsigned getHashValue(const KeyTy &Key) { return Key.getHashValue(); }
+  static unsigned getHashValue(const NodeTy *N) {
+    return KeyTy(N).getHashValue();
+  }
+  static bool isEqual(const KeyTy &LHS, const NodeTy *RHS) {
+    if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+      return false;
+    return LHS.isKeyOf(RHS);
+  }
+  static bool isEqual(const NodeTy *LHS, const NodeTy *RHS) {
     return LHS == RHS;
   }
 };
 
+#define HANDLE_MDNODE_LEAF(CLASS) typedef MDNodeInfo<CLASS> CLASS##Info;
+#include "llvm/IR/Metadata.def"
+
+/// \brief Map-like storage for metadata attachments.
+class MDAttachmentMap {
+  SmallVector<std::pair<unsigned, TrackingMDNodeRef>, 2> Attachments;
+
+public:
+  bool empty() const { return Attachments.empty(); }
+  size_t size() const { return Attachments.size(); }
+
+  /// \brief Get a particular attachment (if any).
+  MDNode *lookup(unsigned ID) const;
+
+  /// \brief Set an attachment to a particular node.
+  ///
+  /// Set the \c ID attachment to \c MD, replacing the current attachment at \c
+  /// ID (if anyway).
+  void set(unsigned ID, MDNode &MD);
+
+  /// \brief Remove an attachment.
+  ///
+  /// Remove the attachment at \c ID, if any.
+  void erase(unsigned ID);
+
+  /// \brief Copy out all the attachments.
+  ///
+  /// Copies all the current attachments into \c Result, sorting by attachment
+  /// ID.  This function does \em not clear \c Result.
+  void getAll(SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const;
+
+  /// \brief Erase matching attachments.
+  ///
+  /// Erases all attachments matching the \c shouldRemove predicate.
+  template <class PredTy> void remove_if(PredTy shouldRemove) {
+    Attachments.erase(
+        std::remove_if(Attachments.begin(), Attachments.end(), shouldRemove),
+        Attachments.end());
+  }
+};
+
 class LLVMContextImpl {
 public:
   /// OwnedModules - The set of modules instantiated in this context, and which
@@ -287,14 +922,14 @@ public:
   DenseMap<Value *, ValueAsMetadata *> ValuesAsMetadata;
   DenseMap<Metadata *, MetadataAsValue *> MetadataAsValues;
 
-  DenseSet<MDTuple *, MDTupleInfo> MDTuples;
-  DenseSet<MDLocation *, MDLocationInfo> MDLocations;
+#define HANDLE_MDNODE_LEAF(CLASS) DenseSet<CLASS *, CLASS##Info> CLASS##s;
+#include "llvm/IR/Metadata.def"
 
   // MDNodes may be uniqued or not uniqued.  When they're not uniqued, they
   // aren't in the MDNodeSet, but they're still shared between objects, so no
   // one object can destroy them.  This set allows us to at least destroy them
   // on Context destruction.
-  SmallPtrSet<UniquableMDNode *, 1> DistinctMDNodes;
+  SmallPtrSet<MDNode *, 1> DistinctMDNodes;
 
   DenseMap<Type*, ConstantAggregateZero*> CAZConstants;
 
@@ -321,14 +956,11 @@ public:
 
   ConstantInt *TheTrueVal;
   ConstantInt *TheFalseVal;
-  
-  LeakDetectorImpl<Value> LLVMObjects;
-  LeakDetectorImpl<Metadata> LLVMMDObjects;
 
   // Basic type instances.
   Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy;
   Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
-  IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty;
+  IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty;
 
   
   /// TypeAllocator - All dynamically allocated types are allocated from this.
@@ -359,23 +991,17 @@ public:
   /// CustomMDKindNames - Map to hold the metadata string to ID mapping.
   StringMap<unsigned> CustomMDKindNames;
 
-  typedef std::pair<unsigned, TrackingMDNodeRef> MDPairTy;
-  typedef SmallVector<MDPairTy, 2> MDMapTy;
+  /// Collection of per-instruction metadata used in this context.
+  DenseMap<const Instruction *, MDAttachmentMap> InstructionMetadata;
+
+  /// Collection of per-function metadata used in this context.
+  DenseMap<const Function *, MDAttachmentMap> FunctionMetadata;
 
-  /// MetadataStore - Collection of per-instruction metadata used in this
-  /// context.
-  DenseMap<const Instruction *, MDMapTy> MetadataStore;
-  
   /// DiscriminatorTable - This table maps file:line locations to an
   /// integer representing the next DWARF path discriminator to assign to
   /// instructions in different blocks at the same location.
   DenseMap<std::pair<const char *, unsigned>, unsigned> DiscriminatorTable;
 
-  /// IntrinsicIDCache - Cache of intrinsic name (string) to numeric ID mappings
-  /// requested in this context
-  typedef DenseMap<const Function*, unsigned> IntrinsicIDCacheTy;
-  IntrinsicIDCacheTy IntrinsicIDCache;
-
   /// \brief Mapping from a function to its prefix data, which is stored as the
   /// operand of an unparented ReturnInst so that the prefix data has a Use.
   typedef DenseMap<const Function *, ReturnInst *> PrefixDataMapTy;
@@ -389,9 +1015,12 @@ public:
 
   int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx);
   int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx);
-  
+
   LLVMContextImpl(LLVMContext &C);
   ~LLVMContextImpl();
+
+  /// Destroy the ConstantArrays if they are not used.
+  void dropTriviallyDeadConstantArrays();
 };
 
 }
diff --git a/contrib/llvm/lib/IR/LeaksContext.h b/contrib/llvm/lib/IR/LeaksContext.h
deleted file mode 100644
index 47704fa..0000000
--- a/contrib/llvm/lib/IR/LeaksContext.h
+++ /dev/null
@@ -1,103 +0,0 @@
-//===- LeaksContext.h - LeadDetector Implementation ------------*- C++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file defines various helper methods and classes used by
-// LLVMContextImpl for leaks detectors.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_IR_LEAKSCONTEXT_H
-#define LLVM_LIB_IR_LEAKSCONTEXT_H
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace llvm {
-
-template <class T>
-struct PrinterTrait {
-  static void print(const T* P) { errs() << P; }
-};
-
-template<>
-struct PrinterTrait<Value> {
-  static void print(const Value* P) { errs() << *P; }
-};
-
-template <> struct PrinterTrait<Metadata> {
-  static void print(const Metadata *P) { P->print(errs()); }
-};
-
-template <typename T>
-struct LeakDetectorImpl {
-  explicit LeakDetectorImpl(const char* const name = "") : 
-    Cache(nullptr), Name(name) { }
-
-  void clear() {
-    Cache = nullptr;
-    Ts.clear();
-  }
-    
-  void setName(const char* n) { 
-    Name = n;
-  }
-    
-  // Because the most common usage pattern, by far, is to add a
-  // garbage object, then remove it immediately, we optimize this
-  // case.  When an object is added, it is not added to the set
-  // immediately, it is added to the CachedValue Value.  If it is
-  // immediately removed, no set search need be performed.
-  void addGarbage(const T* o) {
-    assert(Ts.count(o) == 0 && "Object already in set!");
-    if (Cache) {
-      assert(Cache != o && "Object already in set!");
-      Ts.insert(Cache);
-    }
-    Cache = o;
-  }
-
-  void removeGarbage(const T* o) {
-    if (o == Cache)
-      Cache = nullptr; // Cache hit
-    else
-      Ts.erase(o);
-  }
-
-  bool hasGarbage(const std::string& Message) {
-    addGarbage(nullptr); // Flush the Cache
-
-    assert(!Cache && "No value should be cached anymore!");
-
-    if (!Ts.empty()) {
-      errs() << "Leaked " << Name << " objects found: " << Message << ":\n";
-      for (typename SmallPtrSet<const T*, 8>::iterator I = Ts.begin(),
-           E = Ts.end(); I != E; ++I) {
-        errs() << '\t';
-        PrinterTrait<T>::print(*I);
-        errs() << '\n';
-      }
-      errs() << '\n';
-
-      return true;
-    }
-    
-    return false;
-  }
-
-private:
-  SmallPtrSet<const T*, 8> Ts;
-  const T* Cache;
-  const char* Name;
-};
-
-}
-
-#endif
diff --git a/contrib/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm/lib/IR/LegacyPassManager.cpp
index fa8d50e..bbec642 100644
--- a/contrib/llvm/lib/IR/LegacyPassManager.cpp
+++ b/contrib/llvm/lib/IR/LegacyPassManager.cpp
@@ -88,8 +88,7 @@ PrintAfterAll("print-after-all",
 
 static bool ShouldPrintBeforeOrAfterPass(const PassInfo *PI,
                                          PassOptionList &PassesToPrint) {
-  for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
-    const llvm::PassInfo *PassInf = PassesToPrint[i];
+  for (auto *PassInf : PassesToPrint) {
     if (PassInf)
       if (PassInf->getPassArgument() == PI->getPassArgument()) {
         return true;
@@ -293,7 +292,7 @@ public:
     Pass(PT_PassManager, ID), PMDataManager() { }
 
   // Delete on the fly managers.
-  virtual ~MPPassManager() {
+  ~MPPassManager() override {
     for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
            I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
          I != E; ++I) {
@@ -652,7 +651,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
           // are already checked are still available.
           checkAnalysis = true;
         } else
-          // Do not schedule this analysis. Lower level analsyis
+          // Do not schedule this analysis. Lower level analysis
           // passes are run on the fly.
           delete AnalysisPass;
       }
diff --git a/contrib/llvm/lib/IR/MDBuilder.cpp b/contrib/llvm/lib/IR/MDBuilder.cpp
index c7fcf7a..354592d 100644
--- a/contrib/llvm/lib/IR/MDBuilder.cpp
+++ b/contrib/llvm/lib/IR/MDBuilder.cpp
@@ -53,24 +53,38 @@ MDNode *MDBuilder::createBranchWeights(ArrayRef<uint32_t> Weights) {
   return MDNode::get(Context, Vals);
 }
 
+MDNode *MDBuilder::createFunctionEntryCount(uint64_t Count) {
+  SmallVector<Metadata *, 2> Vals(2);
+  Vals[0] = createString("function_entry_count");
+
+  Type *Int64Ty = Type::getInt64Ty(Context);
+  Vals[1] = createConstant(ConstantInt::get(Int64Ty, Count));
+
+  return MDNode::get(Context, Vals);
+}
+
 MDNode *MDBuilder::createRange(const APInt &Lo, const APInt &Hi) {
   assert(Lo.getBitWidth() == Hi.getBitWidth() && "Mismatched bitwidths!");
+
+  Type *Ty = IntegerType::get(Context, Lo.getBitWidth());
+  return createRange(ConstantInt::get(Ty, Lo), ConstantInt::get(Ty, Hi));
+}
+
+MDNode *MDBuilder::createRange(Constant *Lo, Constant *Hi) {
   // If the range is everything then it is useless.
   if (Hi == Lo)
     return nullptr;
 
   // Return the range [Lo, Hi).
-  Type *Ty = IntegerType::get(Context, Lo.getBitWidth());
-  Metadata *Range[2] = {createConstant(ConstantInt::get(Ty, Lo)),
-                        createConstant(ConstantInt::get(Ty, Hi))};
+  Metadata *Range[2] = {createConstant(Lo), createConstant(Hi)};
   return MDNode::get(Context, Range);
 }
 
 MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
   // To ensure uniqueness the root node is self-referential.
-  MDNode *Dummy = MDNode::getTemporary(Context, None);
+  auto Dummy = MDNode::getTemporary(Context, None);
 
-  SmallVector<Metadata *, 3> Args(1, Dummy);
+  SmallVector<Metadata *, 3> Args(1, Dummy.get());
   if (Extra)
     Args.push_back(Extra);
   if (!Name.empty())
@@ -82,7 +96,7 @@ MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
   //   !1 = metadata !{metadata !0} <- root
   // Replace the dummy operand with the root node itself and delete the dummy.
   Root->replaceOperandWith(0, Root);
-  MDNode::deleteTemporary(Dummy);
+
   // We now have
   //   !1 = metadata !{metadata !1} <- self-referential root
   return Root;
diff --git a/contrib/llvm/lib/IR/Mangler.cpp b/contrib/llvm/lib/IR/Mangler.cpp
index 5eeb797..a0e1b25 100644
--- a/contrib/llvm/lib/IR/Mangler.cpp
+++ b/contrib/llvm/lib/IR/Mangler.cpp
@@ -73,7 +73,7 @@ static bool hasByteCountSuffix(CallingConv::ID CC) {
 /// Microsoft fastcall and stdcall functions require a suffix on their name
 /// indicating the number of words of arguments they take.
 static void addByteCountSuffix(raw_ostream &OS, const Function *F,
-                               const DataLayout &TD) {
+                               const DataLayout &DL) {
   // Calculate arguments size total.
   unsigned ArgWords = 0;
   for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
@@ -83,8 +83,8 @@ static void addByteCountSuffix(raw_ostream &OS, const Function *F,
     if (AI->hasByValOrInAllocaAttr())
       Ty = cast<PointerType>(Ty)->getElementType();
     // Size should be aligned to pointer size.
-    unsigned PtrSize = TD.getPointerSize();
-    ArgWords += RoundUpToAlignment(TD.getTypeAllocSize(Ty), PtrSize);
+    unsigned PtrSize = DL.getPointerSize();
+    ArgWords += RoundUpToAlignment(DL.getTypeAllocSize(Ty), PtrSize);
   }
 
   OS << '@' << ArgWords;
diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp
index 63e5730..23a17a5 100644
--- a/contrib/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm/lib/IR/Metadata.cpp
@@ -1,4 +1,4 @@
-//===-- Metadata.cpp - Implement Metadata classes -------------------------===//
+//===- Metadata.cpp - Implement Metadata classes --------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,6 +13,7 @@
 
 #include "llvm/IR/Metadata.h"
 #include "LLVMContextImpl.h"
+#include "MetadataImpl.h"
 #include "SymbolTableListTraitsImpl.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
@@ -20,6 +21,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
@@ -82,8 +84,7 @@ MetadataAsValue *MetadataAsValue::getIfExists(LLVMContext &Context,
                                               Metadata *MD) {
   MD = canonicalizeMetadataForValue(Context, MD);
   auto &Store = Context.pImpl->MetadataAsValues;
-  auto I = Store.find(MD);
-  return I == Store.end() ? nullptr : I->second;
+  return Store.lookup(MD);
 }
 
 void MetadataAsValue::handleChangedMetadata(Metadata *MD) {
@@ -155,7 +156,8 @@ void ReplaceableMetadataImpl::moveRef(void *Ref, void *New,
 }
 
 void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) {
-  assert(!(MD && isa<MDNodeFwdDecl>(MD)) && "Expected non-temp node");
+  assert(!(MD && isa<MDNode>(MD) && cast<MDNode>(MD)->isTemporary()) &&
+         "Expected non-temp node");
 
   if (UseMap.empty())
     return;
@@ -227,8 +229,8 @@ void ReplaceableMetadataImpl::resolveAllUses(bool ResolveUsers) {
     if (Owner.is<MetadataAsValue *>())
       continue;
 
-    // Resolve UniquableMDNodes that point at this.
-    auto *OwnerMD = dyn_cast<UniquableMDNode>(Owner.get<Metadata *>());
+    // Resolve MDNodes that point at this.
+    auto *OwnerMD = dyn_cast<MDNode>(Owner.get<Metadata *>());
     if (!OwnerMD)
       continue;
     if (OwnerMD->isResolved())
@@ -395,17 +397,37 @@ void MDNode::operator delete(void *Mem) {
   ::operator delete(O);
 }
 
-MDNode::MDNode(LLVMContext &Context, unsigned ID, ArrayRef<Metadata *> MDs)
-    : Metadata(ID), Context(Context), NumOperands(MDs.size()),
-      MDNodeSubclassData(0) {
-  for (unsigned I = 0, E = MDs.size(); I != E; ++I)
-    setOperand(I, MDs[I]);
+MDNode::MDNode(LLVMContext &Context, unsigned ID, StorageType Storage,
+               ArrayRef<Metadata *> Ops1, ArrayRef<Metadata *> Ops2)
+    : Metadata(ID, Storage), NumOperands(Ops1.size() + Ops2.size()),
+      NumUnresolved(0), Context(Context) {
+  unsigned Op = 0;
+  for (Metadata *MD : Ops1)
+    setOperand(Op++, MD);
+  for (Metadata *MD : Ops2)
+    setOperand(Op++, MD);
+
+  if (isDistinct())
+    return;
+
+  if (isUniqued())
+    // Check whether any operands are unresolved, requiring re-uniquing.  If
+    // not, don't support RAUW.
+    if (!countUnresolvedOperands())
+      return;
+
+  this->Context.makeReplaceable(make_unique<ReplaceableMetadataImpl>(Context));
 }
 
-bool MDNode::isResolved() const {
-  if (isa<MDNodeFwdDecl>(this))
-    return false;
-  return cast<UniquableMDNode>(this)->isResolved();
+TempMDNode MDNode::clone() const {
+  switch (getMetadataID()) {
+  default:
+    llvm_unreachable("Invalid MDNode subclass");
+#define HANDLE_MDNODE_LEAF(CLASS)                                              \
+  case CLASS##Kind:                                                            \
+    return cast<CLASS>(this)->cloneImpl();
+#include "llvm/IR/Metadata.def"
+  }
 }
 
 static bool isOperandUnresolved(Metadata *Op) {
@@ -414,55 +436,73 @@ static bool isOperandUnresolved(Metadata *Op) {
   return false;
 }
 
-UniquableMDNode::UniquableMDNode(LLVMContext &C, unsigned ID,
-                                 ArrayRef<Metadata *> Vals, bool AllowRAUW)
-    : MDNode(C, ID, Vals) {
-  if (!AllowRAUW)
-    return;
+unsigned MDNode::countUnresolvedOperands() {
+  assert(NumUnresolved == 0 && "Expected unresolved ops to be uncounted");
+  NumUnresolved = std::count_if(op_begin(), op_end(), isOperandUnresolved);
+  return NumUnresolved;
+}
 
-  // Check whether any operands are unresolved, requiring re-uniquing.
-  unsigned NumUnresolved = 0;
-  for (const auto &Op : operands())
-    NumUnresolved += unsigned(isOperandUnresolved(Op));
+void MDNode::makeUniqued() {
+  assert(isTemporary() && "Expected this to be temporary");
+  assert(!isResolved() && "Expected this to be unresolved");
 
-  if (!NumUnresolved)
-    return;
+  // Enable uniquing callbacks.
+  for (auto &Op : mutable_operands())
+    Op.reset(Op.get(), this);
 
-  ReplaceableUses.reset(new ReplaceableMetadataImpl);
-  SubclassData32 = NumUnresolved;
+  // Make this 'uniqued'.
+  Storage = Uniqued;
+  if (!countUnresolvedOperands())
+    resolve();
+
+  assert(isUniqued() && "Expected this to be uniqued");
 }
 
-void UniquableMDNode::resolve() {
+void MDNode::makeDistinct() {
+  assert(isTemporary() && "Expected this to be temporary");
+  assert(!isResolved() && "Expected this to be unresolved");
+
+  // Pretend to be uniqued, resolve the node, and then store in distinct table.
+  Storage = Uniqued;
+  resolve();
+  storeDistinctInContext();
+
+  assert(isDistinct() && "Expected this to be distinct");
+  assert(isResolved() && "Expected this to be resolved");
+}
+
+void MDNode::resolve() {
+  assert(isUniqued() && "Expected this to be uniqued");
   assert(!isResolved() && "Expected this to be unresolved");
 
   // Move the map, so that this immediately looks resolved.
-  auto Uses = std::move(ReplaceableUses);
-  SubclassData32 = 0;
+  auto Uses = Context.takeReplaceableUses();
+  NumUnresolved = 0;
   assert(isResolved() && "Expected this to be resolved");
 
   // Drop RAUW support.
   Uses->resolveAllUses();
 }
 
-void UniquableMDNode::resolveAfterOperandChange(Metadata *Old, Metadata *New) {
-  assert(SubclassData32 != 0 && "Expected unresolved operands");
+void MDNode::resolveAfterOperandChange(Metadata *Old, Metadata *New) {
+  assert(NumUnresolved != 0 && "Expected unresolved operands");
 
   // Check if an operand was resolved.
   if (!isOperandUnresolved(Old)) {
     if (isOperandUnresolved(New))
       // An operand was un-resolved!
-      ++SubclassData32;
+      ++NumUnresolved;
   } else if (!isOperandUnresolved(New))
     decrementUnresolvedOperandCount();
 }
 
-void UniquableMDNode::decrementUnresolvedOperandCount() {
-  if (!--SubclassData32)
+void MDNode::decrementUnresolvedOperandCount() {
+  if (!--NumUnresolved)
     // Last unresolved operand has just been resolved.
     resolve();
 }
 
-void UniquableMDNode::resolveCycles() {
+void MDNode::resolveCycles() {
   if (isResolved())
     return;
 
@@ -471,62 +511,68 @@ void UniquableMDNode::resolveCycles() {
 
   // Resolve all operands.
   for (const auto &Op : operands()) {
-    if (!Op)
+    auto *N = dyn_cast_or_null<MDNode>(Op);
+    if (!N)
       continue;
-    assert(!isa<MDNodeFwdDecl>(Op) &&
+
+    assert(!N->isTemporary() &&
            "Expected all forward declarations to be resolved");
-    if (auto *N = dyn_cast<UniquableMDNode>(Op))
-      if (!N->isResolved())
-        N->resolveCycles();
+    if (!N->isResolved())
+      N->resolveCycles();
   }
 }
 
-void MDTuple::recalculateHash() {
-  setHash(hash_combine_range(op_begin(), op_end()));
-#ifndef NDEBUG
-  {
-    SmallVector<Metadata *, 8> MDs(op_begin(), op_end());
-    unsigned RawHash = hash_combine_range(MDs.begin(), MDs.end());
-    assert(getHash() == RawHash &&
-           "Expected hash of MDOperand to equal hash of Metadata*");
+static bool hasSelfReference(MDNode *N) {
+  for (Metadata *MD : N->operands())
+    if (MD == N)
+      return true;
+  return false;
+}
+
+MDNode *MDNode::replaceWithPermanentImpl() {
+  if (hasSelfReference(this))
+    return replaceWithDistinctImpl();
+  return replaceWithUniquedImpl();
+}
+
+MDNode *MDNode::replaceWithUniquedImpl() {
+  // Try to uniquify in place.
+  MDNode *UniquedNode = uniquify();
+
+  if (UniquedNode == this) {
+    makeUniqued();
+    return this;
   }
-#endif
+
+  // Collision, so RAUW instead.
+  replaceAllUsesWith(UniquedNode);
+  deleteAsSubclass();
+  return UniquedNode;
+}
+
+MDNode *MDNode::replaceWithDistinctImpl() {
+  makeDistinct();
+  return this;
+}
+
+void MDTuple::recalculateHash() {
+  setHash(MDTupleInfo::KeyTy::calculateHash(this));
 }
 
 void MDNode::dropAllReferences() {
   for (unsigned I = 0, E = NumOperands; I != E; ++I)
     setOperand(I, nullptr);
-  if (auto *N = dyn_cast<UniquableMDNode>(this))
-    if (!N->isResolved()) {
-      N->ReplaceableUses->resolveAllUses(/* ResolveUsers */ false);
-      N->ReplaceableUses.reset();
-    }
-}
-
-namespace llvm {
-/// \brief Make MDOperand transparent for hashing.
-///
-/// This overload of an implementation detail of the hashing library makes
-/// MDOperand hash to the same value as a \a Metadata pointer.
-///
-/// Note that overloading \a hash_value() as follows:
-///
-/// \code
-///     size_t hash_value(const MDOperand &X) { return hash_value(X.get()); }
-/// \endcode
-///
-/// does not cause MDOperand to be transparent.  In particular, a bare pointer
-/// doesn't get hashed before it's combined, whereas \a MDOperand would.
-static const Metadata *get_hashable_data(const MDOperand &X) { return X.get(); }
+  if (!isResolved()) {
+    Context.getReplaceableUses()->resolveAllUses(/* ResolveUsers */ false);
+    (void)Context.takeReplaceableUses();
+  }
 }
 
-void UniquableMDNode::handleChangedOperand(void *Ref, Metadata *New) {
+void MDNode::handleChangedOperand(void *Ref, Metadata *New) {
   unsigned Op = static_cast<MDOperand *>(Ref) - op_begin();
   assert(Op < getNumOperands() && "Expected valid operand");
 
-  if (isStoredDistinctInContext()) {
-    assert(isResolved() && "Expected distinct node to be resolved");
-
+  if (!isUniqued()) {
     // This node is not uniqued.  Just set the operand and be done with it.
     setOperand(Op, New);
     return;
@@ -540,9 +586,9 @@ void UniquableMDNode::handleChangedOperand(void *Ref, Metadata *New) {
 
   // Drop uniquing for self-reference cycles.
   if (New == this) {
-    storeDistinctInContext();
     if (!isResolved())
       resolve();
+    storeDistinctInContext();
     return;
   }
 
@@ -562,7 +608,7 @@ void UniquableMDNode::handleChangedOperand(void *Ref, Metadata *New) {
     // dropAllReferences(), but we still need the use-list).
     for (unsigned O = 0, E = getNumOperands(); O != E; ++O)
       setOperand(O, nullptr);
-    ReplaceableUses->replaceAllUsesWith(Uniqued);
+    Context.getReplaceableUses()->replaceAllUsesWith(Uniqued);
     deleteAsSubclass();
     return;
   }
@@ -571,11 +617,11 @@ void UniquableMDNode::handleChangedOperand(void *Ref, Metadata *New) {
   storeDistinctInContext();
 }
 
-void UniquableMDNode::deleteAsSubclass() {
+void MDNode::deleteAsSubclass() {
   switch (getMetadataID()) {
   default:
-    llvm_unreachable("Invalid subclass of UniquableMDNode");
-#define HANDLE_UNIQUABLE_LEAF(CLASS)                                           \
+    llvm_unreachable("Invalid subclass of MDNode");
+#define HANDLE_MDNODE_LEAF(CLASS)                                              \
   case CLASS##Kind:                                                            \
     delete cast<CLASS>(this);                                                  \
     break;
@@ -583,167 +629,99 @@ void UniquableMDNode::deleteAsSubclass() {
   }
 }
 
-UniquableMDNode *UniquableMDNode::uniquify() {
+template <class T, class InfoT>
+static T *uniquifyImpl(T *N, DenseSet<T *, InfoT> &Store) {
+  if (T *U = getUniqued(Store, N))
+    return U;
+
+  Store.insert(N);
+  return N;
+}
+
+template <class NodeTy> struct MDNode::HasCachedHash {
+  typedef char Yes[1];
+  typedef char No[2];
+  template <class U, U Val> struct SFINAE {};
+
+  template <class U>
+  static Yes &check(SFINAE<void (U::*)(unsigned), &U::setHash> *);
+  template <class U> static No &check(...);
+
+  static const bool value = sizeof(check<NodeTy>(nullptr)) == sizeof(Yes);
+};
+
+MDNode *MDNode::uniquify() {
+  assert(!hasSelfReference(this) && "Cannot uniquify a self-referencing node");
+
+  // Try to insert into uniquing store.
   switch (getMetadataID()) {
   default:
-    llvm_unreachable("Invalid subclass of UniquableMDNode");
-#define HANDLE_UNIQUABLE_LEAF(CLASS)                                           \
-  case CLASS##Kind:                                                            \
-    return cast<CLASS>(this)->uniquifyImpl();
+    llvm_unreachable("Invalid subclass of MDNode");
+#define HANDLE_MDNODE_LEAF(CLASS)                                              \
+  case CLASS##Kind: {                                                          \
+    CLASS *SubclassThis = cast<CLASS>(this);                                   \
+    std::integral_constant<bool, HasCachedHash<CLASS>::value>                  \
+        ShouldRecalculateHash;                                                 \
+    dispatchRecalculateHash(SubclassThis, ShouldRecalculateHash);              \
+    return uniquifyImpl(SubclassThis, getContext().pImpl->CLASS##s);           \
+  }
 #include "llvm/IR/Metadata.def"
   }
 }
 
-void UniquableMDNode::eraseFromStore() {
+void MDNode::eraseFromStore() {
   switch (getMetadataID()) {
   default:
-    llvm_unreachable("Invalid subclass of UniquableMDNode");
-#define HANDLE_UNIQUABLE_LEAF(CLASS)                                           \
+    llvm_unreachable("Invalid subclass of MDNode");
+#define HANDLE_MDNODE_LEAF(CLASS)                                              \
   case CLASS##Kind:                                                            \
-    cast<CLASS>(this)->eraseFromStoreImpl();                                   \
+    getContext().pImpl->CLASS##s.erase(cast<CLASS>(this));                     \
     break;
 #include "llvm/IR/Metadata.def"
   }
 }
 
 MDTuple *MDTuple::getImpl(LLVMContext &Context, ArrayRef<Metadata *> MDs,
-                          bool ShouldCreate) {
-  MDTupleInfo::KeyTy Key(MDs);
-
-  auto &Store = Context.pImpl->MDTuples;
-  auto I = Store.find_as(Key);
-  if (I != Store.end())
-    return *I;
-  if (!ShouldCreate)
-    return nullptr;
-
-  // Coallocate space for the node and Operands together, then placement new.
-  auto *N = new (MDs.size()) MDTuple(Context, MDs, /* AllowRAUW */ true);
-  N->setHash(Key.Hash);
-  Store.insert(N);
-  return N;
-}
-
-MDTuple *MDTuple::getDistinct(LLVMContext &Context, ArrayRef<Metadata *> MDs) {
-  auto *N = new (MDs.size()) MDTuple(Context, MDs, /* AllowRAUW */ false);
-  N->storeDistinctInContext();
-  return N;
-}
-
-MDTuple *MDTuple::uniquifyImpl() {
-  recalculateHash();
-  MDTupleInfo::KeyTy Key(this);
-
-  auto &Store = getContext().pImpl->MDTuples;
-  auto I = Store.find_as(Key);
-  if (I == Store.end()) {
-    Store.insert(this);
-    return this;
+                          StorageType Storage, bool ShouldCreate) {
+  unsigned Hash = 0;
+  if (Storage == Uniqued) {
+    MDTupleInfo::KeyTy Key(MDs);
+    if (auto *N = getUniqued(Context.pImpl->MDTuples, Key))
+      return N;
+    if (!ShouldCreate)
+      return nullptr;
+    Hash = Key.getHash();
+  } else {
+    assert(ShouldCreate && "Expected non-uniqued nodes to always be created");
   }
-  return *I;
-}
-
-void MDTuple::eraseFromStoreImpl() { getContext().pImpl->MDTuples.erase(this); }
 
-MDLocation::MDLocation(LLVMContext &C, unsigned Line, unsigned Column,
-                       ArrayRef<Metadata *> MDs, bool AllowRAUW)
-    : UniquableMDNode(C, MDLocationKind, MDs, AllowRAUW) {
-  assert((MDs.size() == 1 || MDs.size() == 2) &&
-         "Expected a scope and optional inlined-at");
-
-  // Set line and column.
-  assert(Line < (1u << 24) && "Expected 24-bit line");
-  assert(Column < (1u << 8) && "Expected 8-bit column");
-
-  MDNodeSubclassData = Line;
-  SubclassData16 = Column;
-}
-
-MDLocation *MDLocation::constructHelper(LLVMContext &Context, unsigned Line,
-                                        unsigned Column, Metadata *Scope,
-                                        Metadata *InlinedAt, bool AllowRAUW) {
-  SmallVector<Metadata *, 2> Ops;
-  Ops.push_back(Scope);
-  if (InlinedAt)
-    Ops.push_back(InlinedAt);
-  return new (Ops.size()) MDLocation(Context, Line, Column, Ops, AllowRAUW);
-}
-
-static void adjustLine(unsigned &Line) {
-  // Set to unknown on overflow.  Still use 24 bits for now.
-  if (Line >= (1u << 24))
-    Line = 0;
-}
-
-static void adjustColumn(unsigned &Column) {
-  // Set to unknown on overflow.  Still use 8 bits for now.
-  if (Column >= (1u << 8))
-    Column = 0;
-}
-
-MDLocation *MDLocation::getImpl(LLVMContext &Context, unsigned Line,
-                                unsigned Column, Metadata *Scope,
-                                Metadata *InlinedAt, bool ShouldCreate) {
-  // Fixup line/column.
-  adjustLine(Line);
-  adjustColumn(Column);
-
-  MDLocationInfo::KeyTy Key(Line, Column, Scope, InlinedAt);
-
-  auto &Store = Context.pImpl->MDLocations;
-  auto I = Store.find_as(Key);
-  if (I != Store.end())
-    return *I;
-  if (!ShouldCreate)
-    return nullptr;
-
-  auto *N = constructHelper(Context, Line, Column, Scope, InlinedAt,
-                            /* AllowRAUW */ true);
-  Store.insert(N);
-  return N;
+  return storeImpl(new (MDs.size()) MDTuple(Context, Storage, Hash, MDs),
+                   Storage, Context.pImpl->MDTuples);
 }
 
-MDLocation *MDLocation::getDistinct(LLVMContext &Context, unsigned Line,
-                                    unsigned Column, Metadata *Scope,
-                                    Metadata *InlinedAt) {
-  // Fixup line/column.
-  adjustLine(Line);
-  adjustColumn(Column);
-
-  auto *N = constructHelper(Context, Line, Column, Scope, InlinedAt,
-                            /* AllowRAUW */ false);
-  N->storeDistinctInContext();
-  return N;
+void MDNode::deleteTemporary(MDNode *N) {
+  assert(N->isTemporary() && "Expected temporary node");
+  N->replaceAllUsesWith(nullptr);
+  N->deleteAsSubclass();
 }
 
-MDLocation *MDLocation::uniquifyImpl() {
-  MDLocationInfo::KeyTy Key(this);
+void MDNode::storeDistinctInContext() {
+  assert(isResolved() && "Expected resolved nodes");
+  Storage = Distinct;
 
-  auto &Store = getContext().pImpl->MDLocations;
-  auto I = Store.find_as(Key);
-  if (I == Store.end()) {
-    Store.insert(this);
-    return this;
+  // Reset the hash.
+  switch (getMetadataID()) {
+  default:
+    llvm_unreachable("Invalid subclass of MDNode");
+#define HANDLE_MDNODE_LEAF(CLASS)                                              \
+  case CLASS##Kind: {                                                          \
+    std::integral_constant<bool, HasCachedHash<CLASS>::value> ShouldResetHash; \
+    dispatchResetHash(cast<CLASS>(this), ShouldResetHash);                     \
+    break;                                                                     \
+  }
+#include "llvm/IR/Metadata.def"
   }
-  return *I;
-}
-
-void MDLocation::eraseFromStoreImpl() {
-  getContext().pImpl->MDLocations.erase(this);
-}
-
-MDNodeFwdDecl *MDNode::getTemporary(LLVMContext &Context,
-                                    ArrayRef<Metadata *> MDs) {
-  return MDNodeFwdDecl::get(Context, MDs);
-}
-
-void MDNode::deleteTemporary(MDNode *N) { delete cast<MDNodeFwdDecl>(N); }
 
-void UniquableMDNode::storeDistinctInContext() {
-  assert(!IsDistinctInContext && "Expected newly distinct metadata");
-  IsDistinctInContext = true;
-  if (auto *T = dyn_cast<MDTuple>(this))
-    T->setHash(0);
   getContext().pImpl->DistinctMDNodes.insert(this);
 }
 
@@ -751,21 +729,17 @@ void MDNode::replaceOperandWith(unsigned I, Metadata *New) {
   if (getOperand(I) == New)
     return;
 
-  if (isDistinct()) {
+  if (!isUniqued()) {
     setOperand(I, New);
     return;
   }
 
-  cast<UniquableMDNode>(this)->handleChangedOperand(mutable_begin() + I, New);
+  handleChangedOperand(mutable_begin() + I, New);
 }
 
 void MDNode::setOperand(unsigned I, Metadata *New) {
   assert(I < NumOperands);
-  if (isStoredDistinctInContext() || isa<MDNodeFwdDecl>(this))
-    // No need for a callback, this isn't uniqued.
-    mutable_begin()[I].reset(New, nullptr);
-  else
-    mutable_begin()[I].reset(New, this);
+  mutable_begin()[I].reset(New, isUniqued() ? this : nullptr);
 }
 
 /// \brief Get a node, or a self-reference that looks like it.
@@ -794,13 +768,10 @@ MDNode *MDNode::concatenate(MDNode *A, MDNode *B) {
   if (!B)
     return A;
 
-  SmallVector<Metadata *, 4> MDs(A->getNumOperands() + B->getNumOperands());
-
-  unsigned j = 0;
-  for (unsigned i = 0, ie = A->getNumOperands(); i != ie; ++i)
-    MDs[j++] = A->getOperand(i);
-  for (unsigned i = 0, ie = B->getNumOperands(); i != ie; ++i)
-    MDs[j++] = B->getOperand(i);
+  SmallVector<Metadata *, 4> MDs;
+  MDs.reserve(A->getNumOperands() + B->getNumOperands());
+  MDs.append(A->op_begin(), A->op_end());
+  MDs.append(B->op_begin(), B->op_end());
 
   // FIXME: This preserves long-standing behaviour, but is it really the right
   // behaviour?  Or was that an unintended side-effect of node uniquing?
@@ -812,14 +783,9 @@ MDNode *MDNode::intersect(MDNode *A, MDNode *B) {
     return nullptr;
 
   SmallVector<Metadata *, 4> MDs;
-  for (unsigned i = 0, ie = A->getNumOperands(); i != ie; ++i) {
-    Metadata *MD = A->getOperand(i);
-    for (unsigned j = 0, je = B->getNumOperands(); j != je; ++j)
-      if (MD == B->getOperand(j)) {
-        MDs.push_back(MD);
-        break;
-      }
-  }
+  for (Metadata *MD : A->operands())
+    if (std::find(B->op_begin(), B->op_end(), MD) != B->op_end())
+      MDs.push_back(MD);
 
   // FIXME: This preserves long-standing behaviour, but is it really the right
   // behaviour?  Or was that an unintended side-effect of node uniquing?
@@ -831,17 +797,9 @@ MDNode *MDNode::getMostGenericAliasScope(MDNode *A, MDNode *B) {
     return nullptr;
 
   SmallVector<Metadata *, 4> MDs(B->op_begin(), B->op_end());
-  for (unsigned i = 0, ie = A->getNumOperands(); i != ie; ++i) {
-    Metadata *MD = A->getOperand(i);
-    bool insert = true;
-    for (unsigned j = 0, je = B->getNumOperands(); j != je; ++j)
-      if (MD == B->getOperand(j)) {
-        insert = false;
-        break;
-      }
-    if (insert)
-        MDs.push_back(MD);
-  }
+  for (Metadata *MD : A->operands())
+    if (std::find(B->op_begin(), B->op_end(), MD) == B->op_end())
+      MDs.push_back(MD);
 
   // FIXME: This preserves long-standing behaviour, but is it really the right
   // behaviour?  Or was that an unintended side-effect of node uniquing?
@@ -1017,6 +975,50 @@ StringRef NamedMDNode::getName() const {
 //===----------------------------------------------------------------------===//
 // Instruction Metadata method implementations.
 //
+void MDAttachmentMap::set(unsigned ID, MDNode &MD) {
+  for (auto &I : Attachments)
+    if (I.first == ID) {
+      I.second.reset(&MD);
+      return;
+    }
+  Attachments.emplace_back(std::piecewise_construct, std::make_tuple(ID),
+                           std::make_tuple(&MD));
+}
+
+void MDAttachmentMap::erase(unsigned ID) {
+  if (empty())
+    return;
+
+  // Common case is one/last value.
+  if (Attachments.back().first == ID) {
+    Attachments.pop_back();
+    return;
+  }
+
+  for (auto I = Attachments.begin(), E = std::prev(Attachments.end()); I != E;
+       ++I)
+    if (I->first == ID) {
+      *I = std::move(Attachments.back());
+      Attachments.pop_back();
+      return;
+    }
+}
+
+MDNode *MDAttachmentMap::lookup(unsigned ID) const {
+  for (const auto &I : Attachments)
+    if (I.first == ID)
+      return I.second;
+  return nullptr;
+}
+
+void MDAttachmentMap::getAll(
+    SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const {
+  Result.append(Attachments.begin(), Attachments.end());
+
+  // Sort the resulting array so it is stable.
+  if (Result.size() > 1)
+    array_pod_sort(Result.begin(), Result.end());
+}
 
 void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
   if (!Node && !hasMetadata())
@@ -1039,35 +1041,23 @@ void Instruction::dropUnknownMetadata(ArrayRef<unsigned> KnownIDs) {
   if (!hasMetadataHashEntry())
     return; // Nothing to remove!
 
-  DenseMap<const Instruction *, LLVMContextImpl::MDMapTy> &MetadataStore =
-      getContext().pImpl->MetadataStore;
+  auto &InstructionMetadata = getContext().pImpl->InstructionMetadata;
 
   if (KnownSet.empty()) {
     // Just drop our entry at the store.
-    MetadataStore.erase(this);
+    InstructionMetadata.erase(this);
     setHasMetadataHashEntry(false);
     return;
   }
 
-  LLVMContextImpl::MDMapTy &Info = MetadataStore[this];
-  unsigned I;
-  unsigned E;
-  // Walk the array and drop any metadata we don't know.
-  for (I = 0, E = Info.size(); I != E;) {
-    if (KnownSet.count(Info[I].first)) {
-      ++I;
-      continue;
-    }
-
-    Info[I] = std::move(Info.back());
-    Info.pop_back();
-    --E;
-  }
-  assert(E == Info.size());
+  auto &Info = InstructionMetadata[this];
+  Info.remove_if([&KnownSet](const std::pair<unsigned, TrackingMDNodeRef> &I) {
+    return !KnownSet.count(I.first);
+  });
 
-  if (E == 0) {
+  if (Info.empty()) {
     // Drop our entry at the store.
-    MetadataStore.erase(this);
+    InstructionMetadata.erase(this);
     setHasMetadataHashEntry(false);
   }
 }
@@ -1081,56 +1071,37 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
 
   // Handle 'dbg' as a special case since it is not stored in the hash table.
   if (KindID == LLVMContext::MD_dbg) {
-    DbgLoc = DebugLoc::getFromDILocation(Node);
+    DbgLoc = DebugLoc(Node);
     return;
   }
   
   // Handle the case when we're adding/updating metadata on an instruction.
   if (Node) {
-    LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+    auto &Info = getContext().pImpl->InstructionMetadata[this];
     assert(!Info.empty() == hasMetadataHashEntry() &&
            "HasMetadata bit is wonked");
-    if (Info.empty()) {
+    if (Info.empty())
       setHasMetadataHashEntry(true);
-    } else {
-      // Handle replacement of an existing value.
-      for (auto &P : Info)
-        if (P.first == KindID) {
-          P.second.reset(Node);
-          return;
-        }
-    }
-
-    // No replacement, just add it to the list.
-    Info.emplace_back(std::piecewise_construct, std::make_tuple(KindID),
-                      std::make_tuple(Node));
+    Info.set(KindID, *Node);
     return;
   }
 
   // Otherwise, we're removing metadata from an instruction.
   assert((hasMetadataHashEntry() ==
-          (getContext().pImpl->MetadataStore.count(this) > 0)) &&
+          (getContext().pImpl->InstructionMetadata.count(this) > 0)) &&
          "HasMetadata bit out of date!");
   if (!hasMetadataHashEntry())
     return;  // Nothing to remove!
-  LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+  auto &Info = getContext().pImpl->InstructionMetadata[this];
 
-  // Common case is removing the only entry.
-  if (Info.size() == 1 && Info[0].first == KindID) {
-    getContext().pImpl->MetadataStore.erase(this);
-    setHasMetadataHashEntry(false);
+  // Handle removal of an existing value.
+  Info.erase(KindID);
+
+  if (!Info.empty())
     return;
-  }
 
-  // Handle removal of an existing value.
-  for (unsigned i = 0, e = Info.size(); i != e; ++i)
-    if (Info[i].first == KindID) {
-      Info[i] = std::move(Info.back());
-      Info.pop_back();
-      assert(!Info.empty() && "Removing last entry should be handled above");
-      return;
-    }
-  // Otherwise, removing an entry that doesn't exist on the instruction.
+  getContext().pImpl->InstructionMetadata.erase(this);
+  setHasMetadataHashEntry(false);
 }
 
 void Instruction::setAAMetadata(const AAMDNodes &N) {
@@ -1144,15 +1115,12 @@ MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
   if (KindID == LLVMContext::MD_dbg)
     return DbgLoc.getAsMDNode();
 
-  if (!hasMetadataHashEntry()) return nullptr;
-  
-  LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+  if (!hasMetadataHashEntry())
+    return nullptr;
+  auto &Info = getContext().pImpl->InstructionMetadata[this];
   assert(!Info.empty() && "bit out of sync with hash table");
 
-  for (const auto &I : Info)
-    if (I.first == KindID)
-      return I.second;
-  return nullptr;
+  return Info.lookup(KindID);
 }
 
 void Instruction::getAllMetadataImpl(
@@ -1160,50 +1128,111 @@ void Instruction::getAllMetadataImpl(
   Result.clear();
   
   // Handle 'dbg' as a special case since it is not stored in the hash table.
-  if (!DbgLoc.isUnknown()) {
+  if (DbgLoc) {
     Result.push_back(
         std::make_pair((unsigned)LLVMContext::MD_dbg, DbgLoc.getAsMDNode()));
     if (!hasMetadataHashEntry()) return;
   }
-  
+
   assert(hasMetadataHashEntry() &&
-         getContext().pImpl->MetadataStore.count(this) &&
+         getContext().pImpl->InstructionMetadata.count(this) &&
          "Shouldn't have called this");
-  const LLVMContextImpl::MDMapTy &Info =
-    getContext().pImpl->MetadataStore.find(this)->second;
+  const auto &Info = getContext().pImpl->InstructionMetadata.find(this)->second;
   assert(!Info.empty() && "Shouldn't have called this");
-
-  Result.reserve(Result.size() + Info.size());
-  for (auto &I : Info)
-    Result.push_back(std::make_pair(I.first, cast<MDNode>(I.second.get())));
-
-  // Sort the resulting array so it is stable.
-  if (Result.size() > 1)
-    array_pod_sort(Result.begin(), Result.end());
+  Info.getAll(Result);
 }
 
 void Instruction::getAllMetadataOtherThanDebugLocImpl(
     SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const {
   Result.clear();
   assert(hasMetadataHashEntry() &&
-         getContext().pImpl->MetadataStore.count(this) &&
+         getContext().pImpl->InstructionMetadata.count(this) &&
          "Shouldn't have called this");
-  const LLVMContextImpl::MDMapTy &Info =
-    getContext().pImpl->MetadataStore.find(this)->second;
+  const auto &Info = getContext().pImpl->InstructionMetadata.find(this)->second;
   assert(!Info.empty() && "Shouldn't have called this");
-  Result.reserve(Result.size() + Info.size());
-  for (auto &I : Info)
-    Result.push_back(std::make_pair(I.first, cast<MDNode>(I.second.get())));
-
-  // Sort the resulting array so it is stable.
-  if (Result.size() > 1)
-    array_pod_sort(Result.begin(), Result.end());
+  Info.getAll(Result);
 }
 
 /// clearMetadataHashEntries - Clear all hashtable-based metadata from
 /// this instruction.
 void Instruction::clearMetadataHashEntries() {
   assert(hasMetadataHashEntry() && "Caller should check");
-  getContext().pImpl->MetadataStore.erase(this);
+  getContext().pImpl->InstructionMetadata.erase(this);
+  setHasMetadataHashEntry(false);
+}
+
+MDNode *Function::getMetadata(unsigned KindID) const {
+  if (!hasMetadata())
+    return nullptr;
+  return getContext().pImpl->FunctionMetadata[this].lookup(KindID);
+}
+
+MDNode *Function::getMetadata(StringRef Kind) const {
+  if (!hasMetadata())
+    return nullptr;
+  return getMetadata(getContext().getMDKindID(Kind));
+}
+
+void Function::setMetadata(unsigned KindID, MDNode *MD) {
+  if (MD) {
+    if (!hasMetadata())
+      setHasMetadataHashEntry(true);
+
+    getContext().pImpl->FunctionMetadata[this].set(KindID, *MD);
+    return;
+  }
+
+  // Nothing to unset.
+  if (!hasMetadata())
+    return;
+
+  auto &Store = getContext().pImpl->FunctionMetadata[this];
+  Store.erase(KindID);
+  if (Store.empty())
+    clearMetadata();
+}
+
+void Function::setMetadata(StringRef Kind, MDNode *MD) {
+  if (!MD && !hasMetadata())
+    return;
+  setMetadata(getContext().getMDKindID(Kind), MD);
+}
+
+void Function::getAllMetadata(
+    SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs) const {
+  MDs.clear();
+
+  if (!hasMetadata())
+    return;
+
+  getContext().pImpl->FunctionMetadata[this].getAll(MDs);
+}
+
+void Function::dropUnknownMetadata(ArrayRef<unsigned> KnownIDs) {
+  if (!hasMetadata())
+    return;
+  if (KnownIDs.empty()) {
+    clearMetadata();
+    return;
+  }
+
+  SmallSet<unsigned, 5> KnownSet;
+  KnownSet.insert(KnownIDs.begin(), KnownIDs.end());
+
+  auto &Store = getContext().pImpl->FunctionMetadata[this];
+  assert(!Store.empty());
+
+  Store.remove_if([&KnownSet](const std::pair<unsigned, TrackingMDNodeRef> &I) {
+    return !KnownSet.count(I.first);
+  });
+
+  if (Store.empty())
+    clearMetadata();
+}
+
+void Function::clearMetadata() {
+  if (!hasMetadata())
+    return;
+  getContext().pImpl->FunctionMetadata.erase(this);
   setHasMetadataHashEntry(false);
 }
diff --git a/contrib/llvm/lib/IR/MetadataImpl.h b/contrib/llvm/lib/IR/MetadataImpl.h
new file mode 100644
index 0000000..662a50e
--- /dev/null
+++ b/contrib/llvm/lib/IR/MetadataImpl.h
@@ -0,0 +1,46 @@
+//===- MetadataImpl.h - Helpers for implementing metadata -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file has private helpers for implementing metadata types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_METADATAIMPL_H
+#define LLVM_IR_METADATAIMPL_H
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/IR/Metadata.h"
+
+namespace llvm {
+
+template <class T, class InfoT>
+static T *getUniqued(DenseSet<T *, InfoT> &Store,
+                     const typename InfoT::KeyTy &Key) {
+  auto I = Store.find_as(Key);
+  return I == Store.end() ? nullptr : *I;
+}
+
+template <class T, class StoreT>
+T *MDNode::storeImpl(T *N, StorageType Storage, StoreT &Store) {
+  switch (Storage) {
+  case Uniqued:
+    Store.insert(N);
+    break;
+  case Distinct:
+    N->storeDistinctInContext();
+    break;
+  case Temporary:
+    break;
+  }
+  return N;
+}
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/IR/MetadataTracking.cpp b/contrib/llvm/lib/IR/MetadataTracking.cpp
index ba97ca0..47f0b93 100644
--- a/contrib/llvm/lib/IR/MetadataTracking.cpp
+++ b/contrib/llvm/lib/IR/MetadataTracking.cpp
@@ -17,11 +17,8 @@
 using namespace llvm;
 
 ReplaceableMetadataImpl *ReplaceableMetadataImpl::get(Metadata &MD) {
-  if (auto *N = dyn_cast<MDNode>(&MD)) {
-    if (auto *U = dyn_cast<UniquableMDNode>(N))
-      return U->ReplaceableUses.get();
-    return cast<MDNodeFwdDecl>(N);
-  }
+  if (auto *N = dyn_cast<MDNode>(&MD))
+    return N->Context.getReplaceableUses();
   return dyn_cast<ValueAsMetadata>(&MD);
 }
 
diff --git a/contrib/llvm/lib/IR/Module.cpp b/contrib/llvm/lib/IR/Module.cpp
index d32ffcd..043f74e 100644
--- a/contrib/llvm/lib/IR/Module.cpp
+++ b/contrib/llvm/lib/IR/Module.cpp
@@ -278,7 +278,7 @@ void Module::eraseNamedMetadata(NamedMDNode *NMD) {
 }
 
 bool Module::isValidModFlagBehavior(Metadata *MD, ModFlagBehavior &MFB) {
-  if (ConstantInt *Behavior = mdconst::dyn_extract<ConstantInt>(MD)) {
+  if (ConstantInt *Behavior = mdconst::dyn_extract_or_null<ConstantInt>(MD)) {
     uint64_t Val = Behavior->getLimitedValue();
     if (Val >= ModFlagBehaviorFirstVal && Val <= ModFlagBehaviorLastVal) {
       MFB = static_cast<ModFlagBehavior>(Val);
@@ -298,7 +298,7 @@ getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const {
     ModFlagBehavior MFB;
     if (Flag->getNumOperands() >= 3 &&
         isValidModFlagBehavior(Flag->getOperand(0), MFB) &&
-        isa<MDString>(Flag->getOperand(1))) {
+        dyn_cast_or_null<MDString>(Flag->getOperand(1))) {
       // Check the operands of the MDNode before accessing the operands.
       // The verifier will actually catch these failures.
       MDString *Key = cast<MDString>(Flag->getOperand(1));
@@ -365,31 +365,11 @@ void Module::addModuleFlag(MDNode *Node) {
 
 void Module::setDataLayout(StringRef Desc) {
   DL.reset(Desc);
-
-  if (Desc.empty()) {
-    DataLayoutStr = "";
-  } else {
-    DataLayoutStr = DL.getStringRepresentation();
-    // DataLayoutStr is now equivalent to Desc, but since the representation
-    // is not unique, they may not be identical.
-  }
 }
 
-void Module::setDataLayout(const DataLayout *Other) {
-  if (!Other) {
-    DataLayoutStr = "";
-    DL.reset("");
-  } else {
-    DL = *Other;
-    DataLayoutStr = DL.getStringRepresentation();
-  }
-}
+void Module::setDataLayout(const DataLayout &Other) { DL = Other; }
 
-const DataLayout *Module::getDataLayout() const {
-  if (DataLayoutStr.empty())
-    return nullptr;
-  return &DL;
-}
+const DataLayout &Module::getDataLayout() const { return DL; }
 
 //===----------------------------------------------------------------------===//
 // Methods to control the materialization of GlobalValues in the Module.
@@ -414,15 +394,15 @@ std::error_code Module::materialize(GlobalValue *GV) {
   return Materializer->materialize(GV);
 }
 
-void Module::Dematerialize(GlobalValue *GV) {
+void Module::dematerialize(GlobalValue *GV) {
   if (Materializer)
-    return Materializer->Dematerialize(GV);
+    return Materializer->dematerialize(GV);
 }
 
 std::error_code Module::materializeAll() {
   if (!Materializer)
     return std::error_code();
-  return Materializer->MaterializeModule(this);
+  return Materializer->materializeModule(this);
 }
 
 std::error_code Module::materializeAllPermanently() {
@@ -433,6 +413,12 @@ std::error_code Module::materializeAllPermanently() {
   return std::error_code();
 }
 
+std::error_code Module::materializeMetadata() {
+  if (!Materializer)
+    return std::error_code();
+  return Materializer->materializeMetadata();
+}
+
 //===----------------------------------------------------------------------===//
 // Other module related stuff.
 //
diff --git a/contrib/llvm/lib/IR/Operator.cpp b/contrib/llvm/lib/IR/Operator.cpp
new file mode 100644
index 0000000..77dc680
--- /dev/null
+++ b/contrib/llvm/lib/IR/Operator.cpp
@@ -0,0 +1,44 @@
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+
+#include "ConstantsContext.h"
+
+namespace llvm {
+Type *GEPOperator::getSourceElementType() const {
+  if (auto *I = dyn_cast<GetElementPtrInst>(this))
+    return I->getSourceElementType();
+  return cast<GetElementPtrConstantExpr>(this)->getSourceElementType();
+}
+
+bool GEPOperator::accumulateConstantOffset(const DataLayout &DL,
+                                           APInt &Offset) const {
+  assert(Offset.getBitWidth() ==
+             DL.getPointerSizeInBits(getPointerAddressSpace()) &&
+         "The offset must have exactly as many bits as our pointer.");
+
+  for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
+       GTI != GTE; ++GTI) {
+    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+    if (!OpC)
+      return false;
+    if (OpC->isZero())
+      continue;
+
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      unsigned ElementIdx = OpC->getZExtValue();
+      const StructLayout *SL = DL.getStructLayout(STy);
+      Offset += APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
+      continue;
+    }
+
+    // For array or vector indices, scale the index by the size of the type.
+    APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
+    Offset += Index * APInt(Offset.getBitWidth(),
+                            DL.getTypeAllocSize(GTI.getIndexedType()));
+  }
+  return true;
+}
+}
diff --git a/contrib/llvm/lib/IR/Pass.cpp b/contrib/llvm/lib/IR/Pass.cpp
index 91d86ae..df45460 100644
--- a/contrib/llvm/lib/IR/Pass.cpp
+++ b/contrib/llvm/lib/IR/Pass.cpp
@@ -223,8 +223,8 @@ void PassRegistrationListener::enumeratePasses() {
   PassRegistry::getPassRegistry()->enumerateWith(this);
 }
 
-PassNameParser::PassNameParser()
-    : Opt(nullptr) {
+PassNameParser::PassNameParser(cl::Option &O)
+    : cl::parser<const PassInfo *>(O) {
   PassRegistry::getPassRegistry()->addRegistrationListener(this);
 }
 
diff --git a/contrib/llvm/lib/IR/Statepoint.cpp b/contrib/llvm/lib/IR/Statepoint.cpp
index 270c016..83ee611 100644
--- a/contrib/llvm/lib/IR/Statepoint.cpp
+++ b/contrib/llvm/lib/IR/Statepoint.cpp
@@ -20,24 +20,34 @@ using namespace std;
 using namespace llvm;
 
 bool llvm::isStatepoint(const ImmutableCallSite &CS) {
+  if (!CS.getInstruction()) {
+    // This is not a call site
+    return false;
+  }
+
   const Function *F = CS.getCalledFunction();
   return (F && F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint);
 }
-bool llvm::isStatepoint(const Instruction *inst) {
+bool llvm::isStatepoint(const Value *inst) {
   if (isa<InvokeInst>(inst) || isa<CallInst>(inst)) {
     ImmutableCallSite CS(inst);
     return isStatepoint(CS);
   }
   return false;
 }
-bool llvm::isStatepoint(const Instruction &inst) {
+bool llvm::isStatepoint(const Value &inst) {
   return isStatepoint(&inst);
 }
 
 bool llvm::isGCRelocate(const ImmutableCallSite &CS) {
+  if (!CS.getInstruction()) {
+    // This is not a call site
+    return false;
+  }
+
   return isGCRelocate(CS.getInstruction());
 }
-bool llvm::isGCRelocate(const Instruction *inst) {
+bool llvm::isGCRelocate(const Value *inst) {
   if (const CallInst *call = dyn_cast<CallInst>(inst)) {
     if (const Function *F = call->getCalledFunction()) {
       return F->getIntrinsicID() == Intrinsic::experimental_gc_relocate;
@@ -47,14 +57,20 @@ bool llvm::isGCRelocate(const Instruction *inst) {
 }
 
 bool llvm::isGCResult(const ImmutableCallSite &CS) {
+  if (!CS.getInstruction()) {
+    // This is not a call site
+    return false;
+  }
+
   return isGCResult(CS.getInstruction());
 }
-bool llvm::isGCResult(const Instruction *inst) {
+bool llvm::isGCResult(const Value *inst) {
   if (const CallInst *call = dyn_cast<CallInst>(inst)) {
     if (Function *F = call->getCalledFunction()) {
       return (F->getIntrinsicID() == Intrinsic::experimental_gc_result_int ||
               F->getIntrinsicID() == Intrinsic::experimental_gc_result_float ||
-              F->getIntrinsicID() == Intrinsic::experimental_gc_result_ptr);
+              F->getIntrinsicID() == Intrinsic::experimental_gc_result_ptr ||
+              F->getIntrinsicID() == Intrinsic::experimental_gc_result);
     }
   }
   return false;
diff --git a/contrib/llvm/lib/IR/Type.cpp b/contrib/llvm/lib/IR/Type.cpp
index 65060dc..1c40516 100644
--- a/contrib/llvm/lib/IR/Type.cpp
+++ b/contrib/llvm/lib/IR/Type.cpp
@@ -238,6 +238,7 @@ IntegerType *Type::getInt8Ty(LLVMContext &C) { return &C.pImpl->Int8Ty; }
 IntegerType *Type::getInt16Ty(LLVMContext &C) { return &C.pImpl->Int16Ty; }
 IntegerType *Type::getInt32Ty(LLVMContext &C) { return &C.pImpl->Int32Ty; }
 IntegerType *Type::getInt64Ty(LLVMContext &C) { return &C.pImpl->Int64Ty; }
+IntegerType *Type::getInt128Ty(LLVMContext &C) { return &C.pImpl->Int128Ty; }
 
 IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
   return IntegerType::get(C, N);
@@ -764,3 +765,7 @@ bool PointerType::isValidElementType(Type *ElemTy) {
   return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
          !ElemTy->isMetadataTy();
 }
+
+bool PointerType::isLoadableOrStorableType(Type *ElemTy) {
+  return isValidElementType(ElemTy) && !ElemTy->isFunctionTy();
+}
diff --git a/contrib/llvm/lib/IR/TypeFinder.cpp b/contrib/llvm/lib/IR/TypeFinder.cpp
index e2fb8f8..1d2b808 100644
--- a/contrib/llvm/lib/IR/TypeFinder.cpp
+++ b/contrib/llvm/lib/IR/TypeFinder.cpp
@@ -68,7 +68,7 @@ void TypeFinder::run(const Module &M, bool onlyNamed) {
         // instructions with this loop.)
         for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
              OI != OE; ++OI)
-          if (!isa<Instruction>(OI))
+          if (*OI && !isa<Instruction>(OI))
             incorporateValue(*OI);
 
         // Incorporate types hiding in metadata.
diff --git a/contrib/llvm/lib/IR/UseListOrder.cpp b/contrib/llvm/lib/IR/UseListOrder.cpp
deleted file mode 100644
index d064e67..0000000
--- a/contrib/llvm/lib/IR/UseListOrder.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===- UseListOrder.cpp - Implement Use List Order ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implement structures and command-line options for preserving use-list order.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/IR/UseListOrder.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace llvm;
-
-static cl::opt<bool> PreserveBitcodeUseListOrder(
-    "preserve-bc-use-list-order",
-    cl::desc("Experimental support to preserve bitcode use-list order."),
-    cl::init(false), cl::Hidden);
-
-static cl::opt<bool> PreserveAssemblyUseListOrder(
-    "preserve-ll-use-list-order",
-    cl::desc("Experimental support to preserve assembly use-list order."),
-    cl::init(false), cl::Hidden);
-
-bool llvm::shouldPreserveBitcodeUseListOrder() {
-  return PreserveBitcodeUseListOrder;
-}
-
-bool llvm::shouldPreserveAssemblyUseListOrder() {
-  return PreserveAssemblyUseListOrder;
-}
-
-void llvm::setPreserveBitcodeUseListOrder(bool ShouldPreserve) {
-  PreserveBitcodeUseListOrder = ShouldPreserve;
-}
-
-void llvm::setPreserveAssemblyUseListOrder(bool ShouldPreserve) {
-  PreserveAssemblyUseListOrder = ShouldPreserve;
-}
diff --git a/contrib/llvm/lib/IR/Value.cpp b/contrib/llvm/lib/IR/Value.cpp
index 5f7e258..fd0ed31 100644
--- a/contrib/llvm/lib/IR/Value.cpp
+++ b/contrib/llvm/lib/IR/Value.cpp
@@ -23,13 +23,16 @@
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/Statepoint.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -67,15 +70,13 @@ Value::~Value() {
 #ifndef NDEBUG      // Only in -g mode...
   // Check to make sure that there are no uses of this value that are still
   // around when the value is destroyed.  If there are, then we have a dangling
-  // reference and something is wrong.  This code is here to print out what is
-  // still being referenced.  The value in question should be printed as
-  // a <badref>
+  // reference and something is wrong.  This code is here to print out where
+  // the value is still being referenced.
   //
   if (!use_empty()) {
     dbgs() << "While deleting: " << *VTy << " %" << getName() << "\n";
-    for (use_iterator I = use_begin(), E = use_end(); I != E; ++I)
-      dbgs() << "Use still stuck around after Def is destroyed:"
-           << **I << "\n";
+    for (auto *U : users())
+      dbgs() << "Use still stuck around after Def is destroyed:" << *U << "\n";
   }
 #endif
   assert(use_empty() && "Uses remain when a value is destroyed!");
@@ -165,7 +166,7 @@ StringRef Value::getName() const {
   return getValueName()->getKey();
 }
 
-void Value::setName(const Twine &NewName) {
+void Value::setNameImpl(const Twine &NewName) {
   // Fast path for common IRBuilder case of setName("") when there is no name.
   if (NewName.isTriviallyEmpty() && !hasName())
     return;
@@ -186,9 +187,6 @@ void Value::setName(const Twine &NewName) {
   if (getSymTab(this, ST))
     return;  // Cannot set a name on this value (e.g. constant).
 
-  if (Function *F = dyn_cast<Function>(this))
-    getContext().pImpl->IntrinsicIDCache.erase(F);
-
   if (!ST) { // No symbol table to update?  Just do the change.
     if (NameRef.empty()) {
       // Free the name for this value.
@@ -221,6 +219,12 @@ void Value::setName(const Twine &NewName) {
   setValueName(ST->createValueName(NameRef, this));
 }
 
+void Value::setName(const Twine &NewName) {
+  setNameImpl(NewName);
+  if (Function *F = dyn_cast<Function>(this))
+    F->recalculateIntrinsicID();
+}
+
 void Value::takeName(Value *V) {
   ValueSymbolTable *ST = nullptr;
   // If this value has a name, drop it.
@@ -476,133 +480,6 @@ Value *Value::stripInBoundsOffsets() {
   return stripPointerCastsAndOffsets<PSK_InBounds>(this);
 }
 
-/// \brief Check if Value is always a dereferenceable pointer.
-///
-/// Test if V is always a pointer to allocated and suitably aligned memory for
-/// a simple load or store.
-static bool isDereferenceablePointer(const Value *V, const DataLayout *DL,
-                                     SmallPtrSetImpl<const Value *> &Visited) {
-  // Note that it is not safe to speculate into a malloc'd region because
-  // malloc may return null.
-
-  // These are obviously ok.
-  if (isa<AllocaInst>(V)) return true;
-
-  // It's not always safe to follow a bitcast, for example:
-  //   bitcast i8* (alloca i8) to i32*
-  // would result in a 4-byte load from a 1-byte alloca. However,
-  // if we're casting from a pointer from a type of larger size
-  // to a type of smaller size (or the same size), and the alignment
-  // is at least as large as for the resulting pointer type, then
-  // we can look through the bitcast.
-  if (DL)
-    if (const BitCastInst* BC = dyn_cast<BitCastInst>(V)) {
-      Type *STy = BC->getSrcTy()->getPointerElementType(),
-           *DTy = BC->getDestTy()->getPointerElementType();
-      if (STy->isSized() && DTy->isSized() &&
-          (DL->getTypeStoreSize(STy) >=
-           DL->getTypeStoreSize(DTy)) &&
-          (DL->getABITypeAlignment(STy) >=
-           DL->getABITypeAlignment(DTy)))
-        return isDereferenceablePointer(BC->getOperand(0), DL, Visited);
-    }
-
-  // Global variables which can't collapse to null are ok.
-  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
-    return !GV->hasExternalWeakLinkage();
-
-  // byval arguments are okay. Arguments specifically marked as
-  // dereferenceable are okay too.
-  if (const Argument *A = dyn_cast<Argument>(V)) {
-    if (A->hasByValAttr())
-      return true;
-    else if (uint64_t Bytes = A->getDereferenceableBytes()) {
-      Type *Ty = V->getType()->getPointerElementType();
-      if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes)
-        return true;
-    }
-
-    return false;
-  }
-
-  // Return values from call sites specifically marked as dereferenceable are
-  // also okay.
-  if (ImmutableCallSite CS = V) {
-    if (uint64_t Bytes = CS.getDereferenceableBytes(0)) {
-      Type *Ty = V->getType()->getPointerElementType();
-      if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes)
-        return true;
-    }
-  }
-
-  // For GEPs, determine if the indexing lands within the allocated object.
-  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-    // Conservatively require that the base pointer be fully dereferenceable.
-    if (!Visited.insert(GEP->getOperand(0)).second)
-      return false;
-    if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited))
-      return false;
-    // Check the indices.
-    gep_type_iterator GTI = gep_type_begin(GEP);
-    for (User::const_op_iterator I = GEP->op_begin()+1,
-         E = GEP->op_end(); I != E; ++I) {
-      Value *Index = *I;
-      Type *Ty = *GTI++;
-      // Struct indices can't be out of bounds.
-      if (isa<StructType>(Ty))
-        continue;
-      ConstantInt *CI = dyn_cast<ConstantInt>(Index);
-      if (!CI)
-        return false;
-      // Zero is always ok.
-      if (CI->isZero())
-        continue;
-      // Check to see that it's within the bounds of an array.
-      ArrayType *ATy = dyn_cast<ArrayType>(Ty);
-      if (!ATy)
-        return false;
-      if (CI->getValue().getActiveBits() > 64)
-        return false;
-      if (CI->getZExtValue() >= ATy->getNumElements())
-        return false;
-    }
-    // Indices check out; this is dereferenceable.
-    return true;
-  }
-
-  if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
-    return isDereferenceablePointer(ASC->getOperand(0), DL, Visited);
-
-  // If we don't know, assume the worst.
-  return false;
-}
-
-bool Value::isDereferenceablePointer(const DataLayout *DL) const {
-  // When dereferenceability information is provided by a dereferenceable
-  // attribute, we know exactly how many bytes are dereferenceable. If we can
-  // determine the exact offset to the attributed variable, we can use that
-  // information here.
-  Type *Ty = getType()->getPointerElementType();
-  if (Ty->isSized() && DL) {
-    APInt Offset(DL->getTypeStoreSizeInBits(getType()), 0);
-    const Value *BV = stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
-
-    APInt DerefBytes(Offset.getBitWidth(), 0);
-    if (const Argument *A = dyn_cast<Argument>(BV))
-      DerefBytes = A->getDereferenceableBytes();
-    else if (ImmutableCallSite CS = BV)
-      DerefBytes = CS.getDereferenceableBytes(0);
-
-    if (DerefBytes.getBoolValue() && Offset.isNonNegative()) {
-      if (DerefBytes.uge(Offset + DL->getTypeStoreSize(Ty)))
-        return true;
-    }
-  }
-
-  SmallPtrSet<const Value *, 32> Visited;
-  return ::isDereferenceablePointer(this, DL, Visited);
-}
-
 Value *Value::DoPHITranslation(const BasicBlock *CurBB,
                                const BasicBlock *PredBB) {
   PHINode *PN = dyn_cast<PHINode>(this);
diff --git a/contrib/llvm/lib/IR/ValueSymbolTable.cpp b/contrib/llvm/lib/IR/ValueSymbolTable.cpp
index 4f078f0..e10142d 100644
--- a/contrib/llvm/lib/IR/ValueSymbolTable.cpp
+++ b/contrib/llvm/lib/IR/ValueSymbolTable.cpp
@@ -53,7 +53,7 @@ void ValueSymbolTable::reinsertValue(Value* V) {
   while (1) {
     // Trim any suffix off and append the next number.
     UniqueName.resize(BaseSize);
-    raw_svector_ostream(UniqueName) << ++LastUnique;
+    raw_svector_ostream(UniqueName) << "." << ++LastUnique;
 
     // Try insert the vmap entry with this suffix.
     auto IterBool = vmap.insert(std::make_pair(UniqueName, V));
diff --git a/contrib/llvm/lib/IR/ValueTypes.cpp b/contrib/llvm/lib/IR/ValueTypes.cpp
index 2d4da95..d95de39 100644
--- a/contrib/llvm/lib/IR/ValueTypes.cpp
+++ b/contrib/llvm/lib/IR/ValueTypes.cpp
@@ -157,6 +157,7 @@ std::string EVT::getEVTString() const {
   case MVT::v4i64:   return "v4i64";
   case MVT::v8i64:   return "v8i64";
   case MVT::v16i64:  return "v16i64";
+  case MVT::v1i128:  return "v1i128";
   case MVT::v1f32:   return "v1f32";
   case MVT::v2f32:   return "v2f32";
   case MVT::v2f16:   return "v2f16";
@@ -225,6 +226,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
   case MVT::v4i64:   return VectorType::get(Type::getInt64Ty(Context), 4);
   case MVT::v8i64:   return VectorType::get(Type::getInt64Ty(Context), 8);
   case MVT::v16i64:  return VectorType::get(Type::getInt64Ty(Context), 16);
+  case MVT::v1i128:  return VectorType::get(Type::getInt128Ty(Context), 1);
   case MVT::v2f16:   return VectorType::get(Type::getHalfTy(Context), 2);
   case MVT::v4f16:   return VectorType::get(Type::getHalfTy(Context), 4);
   case MVT::v8f16:   return VectorType::get(Type::getHalfTy(Context), 8);
diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp
index 4bf2d1a..5ed137a 100644
--- a/contrib/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@@ -78,7 +78,7 @@
 #include <cstdarg>
 using namespace llvm;
 
-static cl::opt<bool> VerifyDebugInfo("verify-debug-info", cl::init(false));
+static cl::opt<bool> VerifyDebugInfo("verify-debug-info", cl::init(true));
 
 namespace {
 struct VerifierSupport {
@@ -91,7 +91,8 @@ struct VerifierSupport {
   explicit VerifierSupport(raw_ostream &OS)
       : OS(OS), M(nullptr), Broken(false) {}
 
-  void WriteValue(const Value *V) {
+private:
+  void Write(const Value *V) {
     if (!V)
       return;
     if (isa<Instruction>(V)) {
@@ -102,81 +103,65 @@ struct VerifierSupport {
     }
   }
 
-  void WriteMetadata(const Metadata *MD) {
+  void Write(const Metadata *MD) {
     if (!MD)
       return;
-    MD->printAsOperand(OS, true, M);
+    MD->print(OS, M);
     OS << '\n';
   }
 
-  void WriteType(Type *T) {
+  template <class T> void Write(const MDTupleTypedArrayWrapper<T> &MD) {
+    Write(MD.get());
+  }
+
+  void Write(const NamedMDNode *NMD) {
+    if (!NMD)
+      return;
+    NMD->print(OS);
+    OS << '\n';
+  }
+
+  void Write(Type *T) {
     if (!T)
       return;
     OS << ' ' << *T;
   }
 
-  void WriteComdat(const Comdat *C) {
+  void Write(const Comdat *C) {
     if (!C)
       return;
     OS << *C;
   }
 
-  // CheckFailed - A check failed, so print out the condition and the message
-  // that failed.  This provides a nice place to put a breakpoint if you want
-  // to see why something is not correct.
-  void CheckFailed(const Twine &Message, const Value *V1 = nullptr,
-                   const Value *V2 = nullptr, const Value *V3 = nullptr,
-                   const Value *V4 = nullptr) {
-    OS << Message.str() << "\n";
-    WriteValue(V1);
-    WriteValue(V2);
-    WriteValue(V3);
-    WriteValue(V4);
-    Broken = true;
-  }
-
-  void CheckFailed(const Twine &Message, const Metadata *V1, const Metadata *V2,
-                   const Metadata *V3 = nullptr, const Metadata *V4 = nullptr) {
-    OS << Message.str() << "\n";
-    WriteMetadata(V1);
-    WriteMetadata(V2);
-    WriteMetadata(V3);
-    WriteMetadata(V4);
-    Broken = true;
+  template <typename T1, typename... Ts>
+  void WriteTs(const T1 &V1, const Ts &... Vs) {
+    Write(V1);
+    WriteTs(Vs...);
   }
 
-  void CheckFailed(const Twine &Message, const Metadata *V1,
-                   const Value *V2 = nullptr) {
-    OS << Message.str() << "\n";
-    WriteMetadata(V1);
-    WriteValue(V2);
-    Broken = true;
-  }
+  template <typename... Ts> void WriteTs() {}
 
-  void CheckFailed(const Twine &Message, const Value *V1, Type *T2,
-                   const Value *V3 = nullptr) {
-    OS << Message.str() << "\n";
-    WriteValue(V1);
-    WriteType(T2);
-    WriteValue(V3);
-    Broken = true;
-  }
-
-  void CheckFailed(const Twine &Message, Type *T1, Type *T2 = nullptr,
-                   Type *T3 = nullptr) {
-    OS << Message.str() << "\n";
-    WriteType(T1);
-    WriteType(T2);
-    WriteType(T3);
+public:
+  /// \brief A check failed, so printout out the condition and the message.
+  ///
+  /// This provides a nice place to put a breakpoint if you want to see why
+  /// something is not correct.
+  void CheckFailed(const Twine &Message) {
+    OS << Message << '\n';
     Broken = true;
   }
 
-  void CheckFailed(const Twine &Message, const Comdat *C) {
-    OS << Message.str() << "\n";
-    WriteComdat(C);
-    Broken = true;
+  /// \brief A check failed (with values to print).
+  ///
+  /// This calls the Message-only version so that the above is easier to set a
+  /// breakpoint on.
+  template <typename T1, typename... Ts>
+  void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) {
+    CheckFailed(Message);
+    WriteTs(V1, Vs...);
   }
 };
+
 class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   friend class InstVisitor<Verifier>;
 
@@ -191,21 +176,28 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   SmallPtrSet<Instruction *, 16> InstsInThisBlock;
 
   /// \brief Keep track of the metadata nodes that have been checked already.
-  SmallPtrSet<Metadata *, 32> MDNodes;
+  SmallPtrSet<const Metadata *, 32> MDNodes;
+
+  /// \brief Track unresolved string-based type references.
+  SmallDenseMap<const MDString *, const MDNode *, 32> UnresolvedTypeRefs;
 
   /// \brief The personality function referenced by the LandingPadInsts.
   /// All LandingPadInsts within the same function must use the same
   /// personality function.
   const Value *PersonalityFn;
 
-  /// \brief Whether we've seen a call to @llvm.frameallocate in this function
+  /// \brief Whether we've seen a call to @llvm.frameescape in this function
   /// already.
-  bool SawFrameAllocate;
+  bool SawFrameEscape;
+
+  /// Stores the count of how many objects were passed to llvm.frameescape for a
+  /// given function and the largest index passed to llvm.framerecover.
+  DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo;
 
 public:
-  explicit Verifier(raw_ostream &OS = dbgs())
+  explicit Verifier(raw_ostream &OS)
       : VerifierSupport(OS), Context(nullptr), PersonalityFn(nullptr),
-        SawFrameAllocate(false) {}
+        SawFrameEscape(false) {}
 
   bool verify(const Function &F) {
     M = F.getParent();
@@ -240,7 +232,7 @@ public:
     visit(const_cast<Function &>(F));
     InstsInThisBlock.clear();
     PersonalityFn = nullptr;
-    SawFrameAllocate = false;
+    SawFrameEscape = false;
 
     return !Broken;
   }
@@ -259,6 +251,10 @@ public:
         visitFunction(*I);
     }
 
+    // Now that we've visited every function, verify that we never asked to
+    // recover a frame index that wasn't escaped.
+    verifyFrameRecoverIndices();
+
     for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
          I != E; ++I)
       visitGlobalVariable(*I);
@@ -278,6 +274,9 @@ public:
     visitModuleFlags(M);
     visitModuleIdents(M);
 
+    // Verify type referneces last.
+    verifyTypeRefs();
+
     return !Broken;
   }
 
@@ -290,9 +289,9 @@ private:
   void visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias *> &Visited,
                            const GlobalAlias &A, const Constant &C);
   void visitNamedMDNode(const NamedMDNode &NMD);
-  void visitMDNode(MDNode &MD);
-  void visitMetadataAsValue(MetadataAsValue &MD, Function *F);
-  void visitValueAsMetadata(ValueAsMetadata &MD, Function *F);
+  void visitMDNode(const MDNode &MD);
+  void visitMetadataAsValue(const MetadataAsValue &MD, Function *F);
+  void visitValueAsMetadata(const ValueAsMetadata &MD, Function *F);
   void visitComdat(const Comdat &C);
   void visitModuleIdents(const Module &M);
   void visitModuleFlags(const Module &M);
@@ -303,6 +302,37 @@ private:
   void visitBasicBlock(BasicBlock &BB);
   void visitRangeMetadata(Instruction& I, MDNode* Range, Type* Ty);
 
+  template <class Ty> bool isValidMetadataArray(const MDTuple &N);
+#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
+#include "llvm/IR/Metadata.def"
+  void visitDIScope(const DIScope &N);
+  void visitDIDerivedTypeBase(const DIDerivedTypeBase &N);
+  void visitDIVariable(const DIVariable &N);
+  void visitDILexicalBlockBase(const DILexicalBlockBase &N);
+  void visitDITemplateParameter(const DITemplateParameter &N);
+
+  void visitTemplateParams(const MDNode &N, const Metadata &RawParams);
+
+  /// \brief Check for a valid string-based type reference.
+  ///
+  /// Checks if \c MD is a string-based type reference.  If it is, keeps track
+  /// of it (and its user, \c N) for error messages later.
+  bool isValidUUID(const MDNode &N, const Metadata *MD);
+
+  /// \brief Check for a valid type reference.
+  ///
+  /// Checks for subclasses of \a DIType, or \a isValidUUID().
+  bool isTypeRef(const MDNode &N, const Metadata *MD);
+
+  /// \brief Check for a valid scope reference.
+  ///
+  /// Checks for subclasses of \a DIScope, or \a isValidUUID().
+  bool isScopeRef(const MDNode &N, const Metadata *MD);
+
+  /// \brief Check for a valid debug info reference.
+  ///
+  /// Checks for subclasses of \a DINode, or \a isValidUUID().
+  bool isDIRef(const MDNode &N, const Metadata *MD);
 
   // InstVisitor overrides...
   using InstVisitor<Verifier>::visit;
@@ -345,6 +375,8 @@ private:
   void visitUserOp1(Instruction &I);
   void visitUserOp2(Instruction &I) { visitUserOp1(I); }
   void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
+  template <class DbgIntrinsicTy>
+  void visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII);
   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI);
   void visitAtomicRMWInst(AtomicRMWInst &RMWI);
   void visitFenceInst(FenceInst &FI);
@@ -368,136 +400,124 @@ private:
                             bool isReturnValue, const Value *V);
   void VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
                            const Value *V);
+  void VerifyFunctionMetadata(
+      const SmallVector<std::pair<unsigned, MDNode *>, 4> MDs);
 
   void VerifyConstantExprBitcastType(const ConstantExpr *CE);
-};
-class DebugInfoVerifier : public VerifierSupport {
-public:
-  explicit DebugInfoVerifier(raw_ostream &OS = dbgs()) : VerifierSupport(OS) {}
-
-  bool verify(const Module &M) {
-    this->M = &M;
-    verifyDebugInfo();
-    return !Broken;
-  }
-
-private:
-  void verifyDebugInfo();
-  void processInstructions(DebugInfoFinder &Finder);
-  void processCallInst(DebugInfoFinder &Finder, const CallInst &CI);
+  void VerifyStatepoint(ImmutableCallSite CS);
+  void verifyFrameRecoverIndices();
+
+  // Module-level debug info verification...
+  void verifyTypeRefs();
+  template <class MapTy>
+  void verifyBitPieceExpression(const DbgInfoIntrinsic &I,
+                                const MapTy &TypeRefs);
+  void visitUnresolvedTypeRef(const MDString *S, const MDNode *N);
 };
 } // End anonymous namespace
 
 // Assert - We know that cond should be true, if not print an error message.
-#define Assert(C, M) \
-  do { if (!(C)) { CheckFailed(M); return; } } while (0)
-#define Assert1(C, M, V1) \
-  do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
-#define Assert2(C, M, V1, V2) \
-  do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
-#define Assert3(C, M, V1, V2, V3) \
-  do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
-#define Assert4(C, M, V1, V2, V3, V4) \
-  do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+#define Assert(C, ...) \
+  do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0)
 
 void Verifier::visit(Instruction &I) {
   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
-    Assert1(I.getOperand(i) != nullptr, "Operand is null", &I);
+    Assert(I.getOperand(i) != nullptr, "Operand is null", &I);
   InstVisitor<Verifier>::visit(I);
 }
 
 
 void Verifier::visitGlobalValue(const GlobalValue &GV) {
-  Assert1(!GV.isDeclaration() || GV.hasExternalLinkage() ||
-              GV.hasExternalWeakLinkage(),
-          "Global is external, but doesn't have external or weak linkage!",
-          &GV);
+  Assert(!GV.isDeclaration() || GV.hasExternalLinkage() ||
+             GV.hasExternalWeakLinkage(),
+         "Global is external, but doesn't have external or weak linkage!", &GV);
 
-  Assert1(GV.getAlignment() <= Value::MaximumAlignment,
-          "huge alignment values are unsupported", &GV);
-  Assert1(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
-          "Only global variables can have appending linkage!", &GV);
+  Assert(GV.getAlignment() <= Value::MaximumAlignment,
+         "huge alignment values are unsupported", &GV);
+  Assert(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
+         "Only global variables can have appending linkage!", &GV);
 
   if (GV.hasAppendingLinkage()) {
     const GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV);
-    Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
-            "Only global arrays can have appending linkage!", GVar);
+    Assert(GVar && GVar->getValueType()->isArrayTy(),
+           "Only global arrays can have appending linkage!", GVar);
   }
 }
 
 void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
   if (GV.hasInitializer()) {
-    Assert1(GV.getInitializer()->getType() == GV.getType()->getElementType(),
-            "Global variable initializer type does not match global "
-            "variable type!", &GV);
+    Assert(GV.getInitializer()->getType() == GV.getType()->getElementType(),
+           "Global variable initializer type does not match global "
+           "variable type!",
+           &GV);
 
     // If the global has common linkage, it must have a zero initializer and
     // cannot be constant.
     if (GV.hasCommonLinkage()) {
-      Assert1(GV.getInitializer()->isNullValue(),
-              "'common' global must have a zero initializer!", &GV);
-      Assert1(!GV.isConstant(), "'common' global may not be marked constant!",
-              &GV);
-      Assert1(!GV.hasComdat(), "'common' global may not be in a Comdat!", &GV);
+      Assert(GV.getInitializer()->isNullValue(),
+             "'common' global must have a zero initializer!", &GV);
+      Assert(!GV.isConstant(), "'common' global may not be marked constant!",
+             &GV);
+      Assert(!GV.hasComdat(), "'common' global may not be in a Comdat!", &GV);
     }
   } else {
-    Assert1(GV.hasExternalLinkage() || GV.hasExternalWeakLinkage(),
-            "invalid linkage type for global declaration", &GV);
+    Assert(GV.hasExternalLinkage() || GV.hasExternalWeakLinkage(),
+           "invalid linkage type for global declaration", &GV);
   }
 
   if (GV.hasName() && (GV.getName() == "llvm.global_ctors" ||
                        GV.getName() == "llvm.global_dtors")) {
-    Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(),
-            "invalid linkage for intrinsic global variable", &GV);
+    Assert(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+           "invalid linkage for intrinsic global variable", &GV);
     // Don't worry about emitting an error for it not being an array,
     // visitGlobalValue will complain on appending non-array.
-    if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getType()->getElementType())) {
+    if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getValueType())) {
       StructType *STy = dyn_cast<StructType>(ATy->getElementType());
       PointerType *FuncPtrTy =
           FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo();
       // FIXME: Reject the 2-field form in LLVM 4.0.
-      Assert1(STy && (STy->getNumElements() == 2 ||
-                      STy->getNumElements() == 3) &&
-              STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
-              STy->getTypeAtIndex(1) == FuncPtrTy,
-              "wrong type for intrinsic global variable", &GV);
+      Assert(STy &&
+                 (STy->getNumElements() == 2 || STy->getNumElements() == 3) &&
+                 STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
+                 STy->getTypeAtIndex(1) == FuncPtrTy,
+             "wrong type for intrinsic global variable", &GV);
       if (STy->getNumElements() == 3) {
         Type *ETy = STy->getTypeAtIndex(2);
-        Assert1(ETy->isPointerTy() &&
-                    cast<PointerType>(ETy)->getElementType()->isIntegerTy(8),
-                "wrong type for intrinsic global variable", &GV);
+        Assert(ETy->isPointerTy() &&
+                   cast<PointerType>(ETy)->getElementType()->isIntegerTy(8),
+               "wrong type for intrinsic global variable", &GV);
       }
     }
   }
 
   if (GV.hasName() && (GV.getName() == "llvm.used" ||
                        GV.getName() == "llvm.compiler.used")) {
-    Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(),
-            "invalid linkage for intrinsic global variable", &GV);
-    Type *GVType = GV.getType()->getElementType();
+    Assert(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+           "invalid linkage for intrinsic global variable", &GV);
+    Type *GVType = GV.getValueType();
     if (ArrayType *ATy = dyn_cast<ArrayType>(GVType)) {
       PointerType *PTy = dyn_cast<PointerType>(ATy->getElementType());
-      Assert1(PTy, "wrong type for intrinsic global variable", &GV);
+      Assert(PTy, "wrong type for intrinsic global variable", &GV);
       if (GV.hasInitializer()) {
         const Constant *Init = GV.getInitializer();
         const ConstantArray *InitArray = dyn_cast<ConstantArray>(Init);
-        Assert1(InitArray, "wrong initalizer for intrinsic global variable",
-                Init);
+        Assert(InitArray, "wrong initalizer for intrinsic global variable",
+               Init);
         for (unsigned i = 0, e = InitArray->getNumOperands(); i != e; ++i) {
           Value *V = Init->getOperand(i)->stripPointerCastsNoFollowAliases();
-          Assert1(
-              isa<GlobalVariable>(V) || isa<Function>(V) || isa<GlobalAlias>(V),
-              "invalid llvm.used member", V);
-          Assert1(V->hasName(), "members of llvm.used must be named", V);
+          Assert(isa<GlobalVariable>(V) || isa<Function>(V) ||
+                     isa<GlobalAlias>(V),
+                 "invalid llvm.used member", V);
+          Assert(V->hasName(), "members of llvm.used must be named", V);
         }
       }
     }
   }
 
-  Assert1(!GV.hasDLLImportStorageClass() ||
-          (GV.isDeclaration() && GV.hasExternalLinkage()) ||
-          GV.hasAvailableExternallyLinkage(),
-          "Global is marked as dllimport, but not external", &GV);
+  Assert(!GV.hasDLLImportStorageClass() ||
+             (GV.isDeclaration() && GV.hasExternalLinkage()) ||
+             GV.hasAvailableExternallyLinkage(),
+         "Global is marked as dllimport, but not external", &GV);
 
   if (!GV.hasInitializer()) {
     visitGlobalValue(GV);
@@ -515,8 +535,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
       continue;
 
     if (const User *U = dyn_cast<User>(V)) {
-      for (unsigned I = 0, N = U->getNumOperands(); I != N; ++I)
-        WorkStack.push_back(U->getOperand(I));
+      WorkStack.append(U->op_begin(), U->op_end());
     }
 
     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
@@ -538,13 +557,13 @@ void Verifier::visitAliaseeSubExpr(const GlobalAlias &GA, const Constant &C) {
 void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited,
                                    const GlobalAlias &GA, const Constant &C) {
   if (const auto *GV = dyn_cast<GlobalValue>(&C)) {
-    Assert1(!GV->isDeclaration(), "Alias must point to a definition", &GA);
+    Assert(!GV->isDeclaration(), "Alias must point to a definition", &GA);
 
     if (const auto *GA2 = dyn_cast<GlobalAlias>(GV)) {
-      Assert1(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA);
+      Assert(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA);
 
-      Assert1(!GA2->mayBeOverridden(), "Alias cannot point to a weak alias",
-              &GA);
+      Assert(!GA2->mayBeOverridden(), "Alias cannot point to a weak alias",
+             &GA);
     } else {
       // Only continue verifying subexpressions of GlobalAliases.
       // Do not recurse into global initializers.
@@ -565,19 +584,18 @@ void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited,
 }
 
 void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
-  Assert1(!GA.getName().empty(),
-          "Alias name cannot be empty!", &GA);
-  Assert1(GlobalAlias::isValidLinkage(GA.getLinkage()),
-          "Alias should have private, internal, linkonce, weak, linkonce_odr, "
-          "weak_odr, or external linkage!",
-          &GA);
+  Assert(!GA.getName().empty(), "Alias name cannot be empty!", &GA);
+  Assert(GlobalAlias::isValidLinkage(GA.getLinkage()),
+         "Alias should have private, internal, linkonce, weak, linkonce_odr, "
+         "weak_odr, or external linkage!",
+         &GA);
   const Constant *Aliasee = GA.getAliasee();
-  Assert1(Aliasee, "Aliasee cannot be NULL!", &GA);
-  Assert1(GA.getType() == Aliasee->getType(),
-          "Alias and aliasee types should match!", &GA);
+  Assert(Aliasee, "Aliasee cannot be NULL!", &GA);
+  Assert(GA.getType() == Aliasee->getType(),
+         "Alias and aliasee types should match!", &GA);
 
-  Assert1(isa<GlobalValue>(Aliasee) || isa<ConstantExpr>(Aliasee),
-          "Aliasee should be either GlobalValue or ConstantExpr", &GA);
+  Assert(isa<GlobalValue>(Aliasee) || isa<ConstantExpr>(Aliasee),
+         "Aliasee should be either GlobalValue or ConstantExpr", &GA);
 
   visitAliaseeSubExpr(GA, *Aliasee);
 
@@ -587,6 +605,11 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
 void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
   for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) {
     MDNode *MD = NMD.getOperand(i);
+
+    if (NMD.getName() == "llvm.dbg.cu") {
+      Assert(MD && isa<DICompileUnit>(MD), "invalid compile unit", &NMD, MD);
+    }
+
     if (!MD)
       continue;
 
@@ -594,18 +617,30 @@ void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
   }
 }
 
-void Verifier::visitMDNode(MDNode &MD) {
+void Verifier::visitMDNode(const MDNode &MD) {
   // Only visit each node once.  Metadata can be mutually recursive, so this
   // avoids infinite recursion here, as well as being an optimization.
   if (!MDNodes.insert(&MD).second)
     return;
 
+  switch (MD.getMetadataID()) {
+  default:
+    llvm_unreachable("Invalid MDNode subclass");
+  case Metadata::MDTupleKind:
+    break;
+#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS)                                  \
+  case Metadata::CLASS##Kind:                                                  \
+    visit##CLASS(cast<CLASS>(MD));                                             \
+    break;
+#include "llvm/IR/Metadata.def"
+  }
+
   for (unsigned i = 0, e = MD.getNumOperands(); i != e; ++i) {
     Metadata *Op = MD.getOperand(i);
     if (!Op)
       continue;
-    Assert2(!isa<LocalAsMetadata>(Op), "Invalid operand for global metadata!",
-            &MD, Op);
+    Assert(!isa<LocalAsMetadata>(Op), "Invalid operand for global metadata!",
+           &MD, Op);
     if (auto *N = dyn_cast<MDNode>(Op)) {
       visitMDNode(*N);
       continue;
@@ -617,26 +652,26 @@ void Verifier::visitMDNode(MDNode &MD) {
   }
 
   // Check these last, so we diagnose problems in operands first.
-  Assert1(!isa<MDNodeFwdDecl>(MD), "Expected no forward declarations!", &MD);
-  Assert1(MD.isResolved(), "All nodes should be resolved!", &MD);
+  Assert(!MD.isTemporary(), "Expected no forward declarations!", &MD);
+  Assert(MD.isResolved(), "All nodes should be resolved!", &MD);
 }
 
-void Verifier::visitValueAsMetadata(ValueAsMetadata &MD, Function *F) {
-  Assert1(MD.getValue(), "Expected valid value", &MD);
-  Assert2(!MD.getValue()->getType()->isMetadataTy(),
-          "Unexpected metadata round-trip through values", &MD, MD.getValue());
+void Verifier::visitValueAsMetadata(const ValueAsMetadata &MD, Function *F) {
+  Assert(MD.getValue(), "Expected valid value", &MD);
+  Assert(!MD.getValue()->getType()->isMetadataTy(),
+         "Unexpected metadata round-trip through values", &MD, MD.getValue());
 
   auto *L = dyn_cast<LocalAsMetadata>(&MD);
   if (!L)
     return;
 
-  Assert1(F, "function-local metadata used outside a function", L);
+  Assert(F, "function-local metadata used outside a function", L);
 
   // If this was an instruction, bb, or argument, verify that it is in the
   // function that we expect.
   Function *ActualF = nullptr;
   if (Instruction *I = dyn_cast<Instruction>(L->getValue())) {
-    Assert2(I->getParent(), "function-local metadata not in basic block", L, I);
+    Assert(I->getParent(), "function-local metadata not in basic block", L, I);
     ActualF = I->getParent()->getParent();
   } else if (BasicBlock *BB = dyn_cast<BasicBlock>(L->getValue()))
     ActualF = BB->getParent();
@@ -644,10 +679,10 @@ void Verifier::visitValueAsMetadata(ValueAsMetadata &MD, Function *F) {
     ActualF = A->getParent();
   assert(ActualF && "Unimplemented function local metadata case!");
 
-  Assert1(ActualF == F, "function-local metadata used in wrong function", L);
+  Assert(ActualF == F, "function-local metadata used in wrong function", L);
 }
 
-void Verifier::visitMetadataAsValue(MetadataAsValue &MDV, Function *F) {
+void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) {
   Metadata *MD = MDV.getMetadata();
   if (auto *N = dyn_cast<MDNode>(MD)) {
     visitMDNode(*N);
@@ -663,19 +698,415 @@ void Verifier::visitMetadataAsValue(MetadataAsValue &MDV, Function *F) {
     visitValueAsMetadata(*V, F);
 }
 
+bool Verifier::isValidUUID(const MDNode &N, const Metadata *MD) {
+  auto *S = dyn_cast<MDString>(MD);
+  if (!S)
+    return false;
+  if (S->getString().empty())
+    return false;
+
+  // Keep track of names of types referenced via UUID so we can check that they
+  // actually exist.
+  UnresolvedTypeRefs.insert(std::make_pair(S, &N));
+  return true;
+}
+
+/// \brief Check if a value can be a reference to a type.
+bool Verifier::isTypeRef(const MDNode &N, const Metadata *MD) {
+  return !MD || isValidUUID(N, MD) || isa<DIType>(MD);
+}
+
+/// \brief Check if a value can be a ScopeRef.
+bool Verifier::isScopeRef(const MDNode &N, const Metadata *MD) {
+  return !MD || isValidUUID(N, MD) || isa<DIScope>(MD);
+}
+
+/// \brief Check if a value can be a debug info ref.
+bool Verifier::isDIRef(const MDNode &N, const Metadata *MD) {
+  return !MD || isValidUUID(N, MD) || isa<DINode>(MD);
+}
+
+template <class Ty>
+bool isValidMetadataArrayImpl(const MDTuple &N, bool AllowNull) {
+  for (Metadata *MD : N.operands()) {
+    if (MD) {
+      if (!isa<Ty>(MD))
+        return false;
+    } else {
+      if (!AllowNull)
+        return false;
+    }
+  }
+  return true;
+}
+
+template <class Ty>
+bool isValidMetadataArray(const MDTuple &N) {
+  return isValidMetadataArrayImpl<Ty>(N, /* AllowNull */ false);
+}
+
+template <class Ty>
+bool isValidMetadataNullArray(const MDTuple &N) {
+  return isValidMetadataArrayImpl<Ty>(N, /* AllowNull */ true);
+}
+
+void Verifier::visitDILocation(const DILocation &N) {
+  Assert(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
+         "location requires a valid scope", &N, N.getRawScope());
+  if (auto *IA = N.getRawInlinedAt())
+    Assert(isa<DILocation>(IA), "inlined-at should be a location", &N, IA);
+}
+
+void Verifier::visitGenericDINode(const GenericDINode &N) {
+  Assert(N.getTag(), "invalid tag", &N);
+}
+
+void Verifier::visitDIScope(const DIScope &N) {
+  if (auto *F = N.getRawFile())
+    Assert(isa<DIFile>(F), "invalid file", &N, F);
+}
+
+void Verifier::visitDISubrange(const DISubrange &N) {
+  Assert(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N);
+  Assert(N.getCount() >= -1, "invalid subrange count", &N);
+}
+
+void Verifier::visitDIEnumerator(const DIEnumerator &N) {
+  Assert(N.getTag() == dwarf::DW_TAG_enumerator, "invalid tag", &N);
+}
+
+void Verifier::visitDIBasicType(const DIBasicType &N) {
+  Assert(N.getTag() == dwarf::DW_TAG_base_type ||
+             N.getTag() == dwarf::DW_TAG_unspecified_type,
+         "invalid tag", &N);
+}
+
+void Verifier::visitDIDerivedTypeBase(const DIDerivedTypeBase &N) {
+  // Common scope checks.
+  visitDIScope(N);
+
+  Assert(isScopeRef(N, N.getScope()), "invalid scope", &N, N.getScope());
+  Assert(isTypeRef(N, N.getBaseType()), "invalid base type", &N,
+         N.getBaseType());
+
+  // FIXME: Sink this into the subclass verifies.
+  if (!N.getFile() || N.getFile()->getFilename().empty()) {
+    // Check whether the filename is allowed to be empty.
+    uint16_t Tag = N.getTag();
+    Assert(
+        Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
+            Tag == dwarf::DW_TAG_pointer_type ||
+            Tag == dwarf::DW_TAG_ptr_to_member_type ||
+            Tag == dwarf::DW_TAG_reference_type ||
+            Tag == dwarf::DW_TAG_rvalue_reference_type ||
+            Tag == dwarf::DW_TAG_restrict_type ||
+            Tag == dwarf::DW_TAG_array_type ||
+            Tag == dwarf::DW_TAG_enumeration_type ||
+            Tag == dwarf::DW_TAG_subroutine_type ||
+            Tag == dwarf::DW_TAG_inheritance || Tag == dwarf::DW_TAG_friend ||
+            Tag == dwarf::DW_TAG_structure_type ||
+            Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef,
+        "derived/composite type requires a filename", &N, N.getFile());
+  }
+}
+
+void Verifier::visitDIDerivedType(const DIDerivedType &N) {
+  // Common derived type checks.
+  visitDIDerivedTypeBase(N);
+
+  Assert(N.getTag() == dwarf::DW_TAG_typedef ||
+             N.getTag() == dwarf::DW_TAG_pointer_type ||
+             N.getTag() == dwarf::DW_TAG_ptr_to_member_type ||
+             N.getTag() == dwarf::DW_TAG_reference_type ||
+             N.getTag() == dwarf::DW_TAG_rvalue_reference_type ||
+             N.getTag() == dwarf::DW_TAG_const_type ||
+             N.getTag() == dwarf::DW_TAG_volatile_type ||
+             N.getTag() == dwarf::DW_TAG_restrict_type ||
+             N.getTag() == dwarf::DW_TAG_member ||
+             N.getTag() == dwarf::DW_TAG_inheritance ||
+             N.getTag() == dwarf::DW_TAG_friend,
+         "invalid tag", &N);
+  if (N.getTag() == dwarf::DW_TAG_ptr_to_member_type) {
+    Assert(isTypeRef(N, N.getExtraData()), "invalid pointer to member type", &N,
+           N.getExtraData());
+  }
+}
+
+static bool hasConflictingReferenceFlags(unsigned Flags) {
+  return (Flags & DINode::FlagLValueReference) &&
+         (Flags & DINode::FlagRValueReference);
+}
+
+void Verifier::visitTemplateParams(const MDNode &N, const Metadata &RawParams) {
+  auto *Params = dyn_cast<MDTuple>(&RawParams);
+  Assert(Params, "invalid template params", &N, &RawParams);
+  for (Metadata *Op : Params->operands()) {
+    Assert(Op && isa<DITemplateParameter>(Op), "invalid template parameter", &N,
+           Params, Op);
+  }
+}
+
+void Verifier::visitDICompositeType(const DICompositeType &N) {
+  // Common derived type checks.
+  visitDIDerivedTypeBase(N);
+
+  Assert(N.getTag() == dwarf::DW_TAG_array_type ||
+             N.getTag() == dwarf::DW_TAG_structure_type ||
+             N.getTag() == dwarf::DW_TAG_union_type ||
+             N.getTag() == dwarf::DW_TAG_enumeration_type ||
+             N.getTag() == dwarf::DW_TAG_subroutine_type ||
+             N.getTag() == dwarf::DW_TAG_class_type,
+         "invalid tag", &N);
+
+  Assert(!N.getRawElements() || isa<MDTuple>(N.getRawElements()),
+         "invalid composite elements", &N, N.getRawElements());
+  Assert(isTypeRef(N, N.getRawVTableHolder()), "invalid vtable holder", &N,
+         N.getRawVTableHolder());
+  Assert(!N.getRawElements() || isa<MDTuple>(N.getRawElements()),
+         "invalid composite elements", &N, N.getRawElements());
+  Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags",
+         &N);
+  if (auto *Params = N.getRawTemplateParams())
+    visitTemplateParams(N, *Params);
+}
+
+void Verifier::visitDISubroutineType(const DISubroutineType &N) {
+  Assert(N.getTag() == dwarf::DW_TAG_subroutine_type, "invalid tag", &N);
+  if (auto *Types = N.getRawTypeArray()) {
+    Assert(isa<MDTuple>(Types), "invalid composite elements", &N, Types);
+    for (Metadata *Ty : N.getTypeArray()->operands()) {
+      Assert(isTypeRef(N, Ty), "invalid subroutine type ref", &N, Types, Ty);
+    }
+  }
+  Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags",
+         &N);
+}
+
+void Verifier::visitDIFile(const DIFile &N) {
+  Assert(N.getTag() == dwarf::DW_TAG_file_type, "invalid tag", &N);
+}
+
+void Verifier::visitDICompileUnit(const DICompileUnit &N) {
+  Assert(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N);
+
+  // Don't bother verifying the compilation directory or producer string
+  // as those could be empty.
+  Assert(N.getRawFile() && isa<DIFile>(N.getRawFile()), "invalid file", &N,
+         N.getRawFile());
+  Assert(!N.getFile()->getFilename().empty(), "invalid filename", &N,
+         N.getFile());
+
+  if (auto *Array = N.getRawEnumTypes()) {
+    Assert(isa<MDTuple>(Array), "invalid enum list", &N, Array);
+    for (Metadata *Op : N.getEnumTypes()->operands()) {
+      auto *Enum = dyn_cast_or_null<DICompositeType>(Op);
+      Assert(Enum && Enum->getTag() == dwarf::DW_TAG_enumeration_type,
+             "invalid enum type", &N, N.getEnumTypes(), Op);
+    }
+  }
+  if (auto *Array = N.getRawRetainedTypes()) {
+    Assert(isa<MDTuple>(Array), "invalid retained type list", &N, Array);
+    for (Metadata *Op : N.getRetainedTypes()->operands()) {
+      Assert(Op && isa<DIType>(Op), "invalid retained type", &N, Op);
+    }
+  }
+  if (auto *Array = N.getRawSubprograms()) {
+    Assert(isa<MDTuple>(Array), "invalid subprogram list", &N, Array);
+    for (Metadata *Op : N.getSubprograms()->operands()) {
+      Assert(Op && isa<DISubprogram>(Op), "invalid subprogram ref", &N, Op);
+    }
+  }
+  if (auto *Array = N.getRawGlobalVariables()) {
+    Assert(isa<MDTuple>(Array), "invalid global variable list", &N, Array);
+    for (Metadata *Op : N.getGlobalVariables()->operands()) {
+      Assert(Op && isa<DIGlobalVariable>(Op), "invalid global variable ref", &N,
+             Op);
+    }
+  }
+  if (auto *Array = N.getRawImportedEntities()) {
+    Assert(isa<MDTuple>(Array), "invalid imported entity list", &N, Array);
+    for (Metadata *Op : N.getImportedEntities()->operands()) {
+      Assert(Op && isa<DIImportedEntity>(Op), "invalid imported entity ref", &N,
+             Op);
+    }
+  }
+}
+
+void Verifier::visitDISubprogram(const DISubprogram &N) {
+  Assert(N.getTag() == dwarf::DW_TAG_subprogram, "invalid tag", &N);
+  Assert(isScopeRef(N, N.getRawScope()), "invalid scope", &N, N.getRawScope());
+  if (auto *T = N.getRawType())
+    Assert(isa<DISubroutineType>(T), "invalid subroutine type", &N, T);
+  Assert(isTypeRef(N, N.getRawContainingType()), "invalid containing type", &N,
+         N.getRawContainingType());
+  if (auto *RawF = N.getRawFunction()) {
+    auto *FMD = dyn_cast<ConstantAsMetadata>(RawF);
+    auto *F = FMD ? FMD->getValue() : nullptr;
+    auto *FT = F ? dyn_cast<PointerType>(F->getType()) : nullptr;
+    Assert(F && FT && isa<FunctionType>(FT->getElementType()),
+           "invalid function", &N, F, FT);
+  }
+  if (auto *Params = N.getRawTemplateParams())
+    visitTemplateParams(N, *Params);
+  if (auto *S = N.getRawDeclaration()) {
+    Assert(isa<DISubprogram>(S) && !cast<DISubprogram>(S)->isDefinition(),
+           "invalid subprogram declaration", &N, S);
+  }
+  if (auto *RawVars = N.getRawVariables()) {
+    auto *Vars = dyn_cast<MDTuple>(RawVars);
+    Assert(Vars, "invalid variable list", &N, RawVars);
+    for (Metadata *Op : Vars->operands()) {
+      Assert(Op && isa<DILocalVariable>(Op), "invalid local variable", &N, Vars,
+             Op);
+    }
+  }
+  Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags",
+         &N);
+
+  auto *F = N.getFunction();
+  if (!F)
+    return;
+
+  // Check that all !dbg attachments lead to back to N (or, at least, another
+  // subprogram that describes the same function).
+  //
+  // FIXME: Check this incrementally while visiting !dbg attachments.
+  // FIXME: Only check when N is the canonical subprogram for F.
+  SmallPtrSet<const MDNode *, 32> Seen;
+  for (auto &BB : *F)
+    for (auto &I : BB) {
+      // Be careful about using DILocation here since we might be dealing with
+      // broken code (this is the Verifier after all).
+      DILocation *DL =
+          dyn_cast_or_null<DILocation>(I.getDebugLoc().getAsMDNode());
+      if (!DL)
+        continue;
+      if (!Seen.insert(DL).second)
+        continue;
+
+      DILocalScope *Scope = DL->getInlinedAtScope();
+      if (Scope && !Seen.insert(Scope).second)
+        continue;
+
+      DISubprogram *SP = Scope ? Scope->getSubprogram() : nullptr;
+      if (SP && !Seen.insert(SP).second)
+        continue;
+
+      // FIXME: Once N is canonical, check "SP == &N".
+      Assert(SP->describes(F),
+             "!dbg attachment points at wrong subprogram for function", &N, F,
+             &I, DL, Scope, SP);
+    }
+}
+
+void Verifier::visitDILexicalBlockBase(const DILexicalBlockBase &N) {
+  Assert(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N);
+  Assert(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
+         "invalid local scope", &N, N.getRawScope());
+}
+
+void Verifier::visitDILexicalBlock(const DILexicalBlock &N) {
+  visitDILexicalBlockBase(N);
+
+  Assert(N.getLine() || !N.getColumn(),
+         "cannot have column info without line info", &N);
+}
+
+void Verifier::visitDILexicalBlockFile(const DILexicalBlockFile &N) {
+  visitDILexicalBlockBase(N);
+}
+
+void Verifier::visitDINamespace(const DINamespace &N) {
+  Assert(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N);
+  if (auto *S = N.getRawScope())
+    Assert(isa<DIScope>(S), "invalid scope ref", &N, S);
+}
+
+void Verifier::visitDITemplateParameter(const DITemplateParameter &N) {
+  Assert(isTypeRef(N, N.getType()), "invalid type ref", &N, N.getType());
+}
+
+void Verifier::visitDITemplateTypeParameter(const DITemplateTypeParameter &N) {
+  visitDITemplateParameter(N);
+
+  Assert(N.getTag() == dwarf::DW_TAG_template_type_parameter, "invalid tag",
+         &N);
+}
+
+void Verifier::visitDITemplateValueParameter(
+    const DITemplateValueParameter &N) {
+  visitDITemplateParameter(N);
+
+  Assert(N.getTag() == dwarf::DW_TAG_template_value_parameter ||
+             N.getTag() == dwarf::DW_TAG_GNU_template_template_param ||
+             N.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack,
+         "invalid tag", &N);
+}
+
+void Verifier::visitDIVariable(const DIVariable &N) {
+  if (auto *S = N.getRawScope())
+    Assert(isa<DIScope>(S), "invalid scope", &N, S);
+  Assert(isTypeRef(N, N.getRawType()), "invalid type ref", &N, N.getRawType());
+  if (auto *F = N.getRawFile())
+    Assert(isa<DIFile>(F), "invalid file", &N, F);
+}
+
+void Verifier::visitDIGlobalVariable(const DIGlobalVariable &N) {
+  // Checks common to all variables.
+  visitDIVariable(N);
+
+  Assert(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
+  Assert(!N.getName().empty(), "missing global variable name", &N);
+  if (auto *V = N.getRawVariable()) {
+    Assert(isa<ConstantAsMetadata>(V) &&
+               !isa<Function>(cast<ConstantAsMetadata>(V)->getValue()),
+           "invalid global varaible ref", &N, V);
+  }
+  if (auto *Member = N.getRawStaticDataMemberDeclaration()) {
+    Assert(isa<DIDerivedType>(Member), "invalid static data member declaration",
+           &N, Member);
+  }
+}
+
+void Verifier::visitDILocalVariable(const DILocalVariable &N) {
+  // Checks common to all variables.
+  visitDIVariable(N);
+
+  Assert(N.getTag() == dwarf::DW_TAG_auto_variable ||
+             N.getTag() == dwarf::DW_TAG_arg_variable,
+         "invalid tag", &N);
+  Assert(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
+         "local variable requires a valid scope", &N, N.getRawScope());
+}
+
+void Verifier::visitDIExpression(const DIExpression &N) {
+  Assert(N.isValid(), "invalid expression", &N);
+}
+
+void Verifier::visitDIObjCProperty(const DIObjCProperty &N) {
+  Assert(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N);
+  if (auto *T = N.getRawType())
+    Assert(isa<DIType>(T), "invalid type ref", &N, T);
+  if (auto *F = N.getRawFile())
+    Assert(isa<DIFile>(F), "invalid file", &N, F);
+}
+
+void Verifier::visitDIImportedEntity(const DIImportedEntity &N) {
+  Assert(N.getTag() == dwarf::DW_TAG_imported_module ||
+             N.getTag() == dwarf::DW_TAG_imported_declaration,
+         "invalid tag", &N);
+  if (auto *S = N.getRawScope())
+    Assert(isa<DIScope>(S), "invalid scope for imported entity", &N, S);
+  Assert(isDIRef(N, N.getEntity()), "invalid imported entity", &N,
+         N.getEntity());
+}
+
 void Verifier::visitComdat(const Comdat &C) {
-  // All Comdat::SelectionKind values other than Comdat::Any require a
-  // GlobalValue with the same name as the Comdat.
-  const GlobalValue *GV = M->getNamedValue(C.getName());
-  if (C.getSelectionKind() != Comdat::Any)
-    Assert1(GV,
-            "comdat selection kind requires a global value with the same name",
-            &C);
   // The Module is invalid if the GlobalValue has private linkage.  Entities
   // with private linkage don't have entries in the symbol table.
-  if (GV)
-    Assert1(!GV->hasPrivateLinkage(), "comdat global value has private linkage",
-            GV);
+  if (const GlobalValue *GV = M->getNamedValue(C.getName()))
+    Assert(!GV->hasPrivateLinkage(), "comdat global value has private linkage",
+           GV);
 }
 
 void Verifier::visitModuleIdents(const Module &M) {
@@ -687,12 +1118,12 @@ void Verifier::visitModuleIdents(const Module &M) {
   // Scan each llvm.ident entry and make sure that this requirement is met.
   for (unsigned i = 0, e = Idents->getNumOperands(); i != e; ++i) {
     const MDNode *N = Idents->getOperand(i);
-    Assert1(N->getNumOperands() == 1,
-            "incorrect number of operands in llvm.ident metadata", N);
-    Assert1(isa<MDString>(N->getOperand(0)),
-            ("invalid value for llvm.ident metadata entry operand"
-             "(the operand should be a string)"),
-            N->getOperand(0));
+    Assert(N->getNumOperands() == 1,
+           "incorrect number of operands in llvm.ident metadata", N);
+    Assert(dyn_cast_or_null<MDString>(N->getOperand(0)),
+           ("invalid value for llvm.ident metadata entry operand"
+            "(the operand should be a string)"),
+           N->getOperand(0));
   } 
 }
 
@@ -735,22 +1166,21 @@ Verifier::visitModuleFlag(const MDNode *Op,
                           SmallVectorImpl<const MDNode *> &Requirements) {
   // Each module flag should have three arguments, the merge behavior (a
   // constant int), the flag ID (an MDString), and the value.
-  Assert1(Op->getNumOperands() == 3,
-          "incorrect number of operands in module flag", Op);
+  Assert(Op->getNumOperands() == 3,
+         "incorrect number of operands in module flag", Op);
   Module::ModFlagBehavior MFB;
   if (!Module::isValidModFlagBehavior(Op->getOperand(0), MFB)) {
-    Assert1(
-        mdconst::dyn_extract<ConstantInt>(Op->getOperand(0)),
+    Assert(
+        mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)),
         "invalid behavior operand in module flag (expected constant integer)",
         Op->getOperand(0));
-    Assert1(false,
-            "invalid behavior operand in module flag (unexpected constant)",
-            Op->getOperand(0));
+    Assert(false,
+           "invalid behavior operand in module flag (unexpected constant)",
+           Op->getOperand(0));
   }
-  MDString *ID = dyn_cast<MDString>(Op->getOperand(1));
-  Assert1(ID,
-          "invalid ID operand in module flag (expected metadata string)",
-          Op->getOperand(1));
+  MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
+  Assert(ID, "invalid ID operand in module flag (expected metadata string)",
+         Op->getOperand(1));
 
   // Sanity check the values for behaviors with additional requirements.
   switch (MFB) {
@@ -764,13 +1194,13 @@ Verifier::visitModuleFlag(const MDNode *Op,
     // The value should itself be an MDNode with two operands, a flag ID (an
     // MDString), and a value.
     MDNode *Value = dyn_cast<MDNode>(Op->getOperand(2));
-    Assert1(Value && Value->getNumOperands() == 2,
-            "invalid value for 'require' module flag (expected metadata pair)",
-            Op->getOperand(2));
-    Assert1(isa<MDString>(Value->getOperand(0)),
-            ("invalid value for 'require' module flag "
-             "(first value operand should be a string)"),
-            Value->getOperand(0));
+    Assert(Value && Value->getNumOperands() == 2,
+           "invalid value for 'require' module flag (expected metadata pair)",
+           Op->getOperand(2));
+    Assert(isa<MDString>(Value->getOperand(0)),
+           ("invalid value for 'require' module flag "
+            "(first value operand should be a string)"),
+           Value->getOperand(0));
 
     // Append it to the list of requirements, to check once all module flags are
     // scanned.
@@ -781,9 +1211,10 @@ Verifier::visitModuleFlag(const MDNode *Op,
   case Module::Append:
   case Module::AppendUnique: {
     // These behavior types require the operand be an MDNode.
-    Assert1(isa<MDNode>(Op->getOperand(2)),
-            "invalid value for 'append'-type module flag "
-            "(expected a metadata node)", Op->getOperand(2));
+    Assert(isa<MDNode>(Op->getOperand(2)),
+           "invalid value for 'append'-type module flag "
+           "(expected a metadata node)",
+           Op->getOperand(2));
     break;
   }
   }
@@ -791,9 +1222,8 @@ Verifier::visitModuleFlag(const MDNode *Op,
   // Unless this is a "requires" flag, check the ID is unique.
   if (MFB != Module::Require) {
     bool Inserted = SeenIDs.insert(std::make_pair(ID, Op)).second;
-    Assert1(Inserted,
-            "module flag identifiers must be unique (or of 'require' type)",
-            ID);
+    Assert(Inserted,
+           "module flag identifiers must be unique (or of 'require' type)", ID);
   }
 }
 
@@ -838,7 +1268,8 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
         I->getKindAsEnum() == Attribute::NoBuiltin ||
         I->getKindAsEnum() == Attribute::Cold ||
         I->getKindAsEnum() == Attribute::OptimizeNone ||
-        I->getKindAsEnum() == Attribute::JumpTable) {
+        I->getKindAsEnum() == Attribute::JumpTable ||
+        I->getKindAsEnum() == Attribute::Convergent) {
       if (!isFunction) {
         CheckFailed("Attribute '" + I->getAsString() +
                     "' only applies to functions!", V);
@@ -869,14 +1300,15 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
   VerifyAttributeTypes(Attrs, Idx, false, V);
 
   if (isReturnValue)
-    Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
-            !Attrs.hasAttribute(Idx, Attribute::Nest) &&
-            !Attrs.hasAttribute(Idx, Attribute::StructRet) &&
-            !Attrs.hasAttribute(Idx, Attribute::NoCapture) &&
-            !Attrs.hasAttribute(Idx, Attribute::Returned) &&
-            !Attrs.hasAttribute(Idx, Attribute::InAlloca),
-            "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', and "
-            "'returned' do not apply to return values!", V);
+    Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+               !Attrs.hasAttribute(Idx, Attribute::Nest) &&
+               !Attrs.hasAttribute(Idx, Attribute::StructRet) &&
+               !Attrs.hasAttribute(Idx, Attribute::NoCapture) &&
+               !Attrs.hasAttribute(Idx, Attribute::Returned) &&
+               !Attrs.hasAttribute(Idx, Attribute::InAlloca),
+           "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', and "
+           "'returned' do not apply to return values!",
+           V);
 
   // Check for mutually incompatible attributes.  Only inreg is compatible with
   // sret.
@@ -886,45 +1318,59 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
   AttrCount += Attrs.hasAttribute(Idx, Attribute::StructRet) ||
                Attrs.hasAttribute(Idx, Attribute::InReg);
   AttrCount += Attrs.hasAttribute(Idx, Attribute::Nest);
-  Assert1(AttrCount <= 1, "Attributes 'byval', 'inalloca', 'inreg', 'nest', "
-                          "and 'sret' are incompatible!", V);
-
-  Assert1(!(Attrs.hasAttribute(Idx, Attribute::InAlloca) &&
-            Attrs.hasAttribute(Idx, Attribute::ReadOnly)), "Attributes "
-          "'inalloca and readonly' are incompatible!", V);
-
-  Assert1(!(Attrs.hasAttribute(Idx, Attribute::StructRet) &&
-            Attrs.hasAttribute(Idx, Attribute::Returned)), "Attributes "
-          "'sret and returned' are incompatible!", V);
-
-  Assert1(!(Attrs.hasAttribute(Idx, Attribute::ZExt) &&
-            Attrs.hasAttribute(Idx, Attribute::SExt)), "Attributes "
-          "'zeroext and signext' are incompatible!", V);
-
-  Assert1(!(Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
-            Attrs.hasAttribute(Idx, Attribute::ReadOnly)), "Attributes "
-          "'readnone and readonly' are incompatible!", V);
-
-  Assert1(!(Attrs.hasAttribute(Idx, Attribute::NoInline) &&
-            Attrs.hasAttribute(Idx, Attribute::AlwaysInline)), "Attributes "
-          "'noinline and alwaysinline' are incompatible!", V);
-
-  Assert1(!AttrBuilder(Attrs, Idx).
-            hasAttributes(AttributeFuncs::typeIncompatible(Ty, Idx), Idx),
-          "Wrong types for attribute: " +
-          AttributeFuncs::typeIncompatible(Ty, Idx).getAsString(Idx), V);
+  Assert(AttrCount <= 1, "Attributes 'byval', 'inalloca', 'inreg', 'nest', "
+                         "and 'sret' are incompatible!",
+         V);
+
+  Assert(!(Attrs.hasAttribute(Idx, Attribute::InAlloca) &&
+           Attrs.hasAttribute(Idx, Attribute::ReadOnly)),
+         "Attributes "
+         "'inalloca and readonly' are incompatible!",
+         V);
+
+  Assert(!(Attrs.hasAttribute(Idx, Attribute::StructRet) &&
+           Attrs.hasAttribute(Idx, Attribute::Returned)),
+         "Attributes "
+         "'sret and returned' are incompatible!",
+         V);
+
+  Assert(!(Attrs.hasAttribute(Idx, Attribute::ZExt) &&
+           Attrs.hasAttribute(Idx, Attribute::SExt)),
+         "Attributes "
+         "'zeroext and signext' are incompatible!",
+         V);
+
+  Assert(!(Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
+           Attrs.hasAttribute(Idx, Attribute::ReadOnly)),
+         "Attributes "
+         "'readnone and readonly' are incompatible!",
+         V);
+
+  Assert(!(Attrs.hasAttribute(Idx, Attribute::NoInline) &&
+           Attrs.hasAttribute(Idx, Attribute::AlwaysInline)),
+         "Attributes "
+         "'noinline and alwaysinline' are incompatible!",
+         V);
+
+  Assert(!AttrBuilder(Attrs, Idx)
+              .overlaps(AttributeFuncs::typeIncompatible(Ty)),
+         "Wrong types for attribute: " +
+         AttributeSet::get(*Context, Idx,
+                        AttributeFuncs::typeIncompatible(Ty)).getAsString(Idx),
+         V);
 
   if (PointerType *PTy = dyn_cast<PointerType>(Ty)) {
-    if (!PTy->getElementType()->isSized()) {
-      Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
-              !Attrs.hasAttribute(Idx, Attribute::InAlloca),
-              "Attributes 'byval' and 'inalloca' do not support unsized types!",
-              V);
+    SmallPtrSet<const Type*, 4> Visited;
+    if (!PTy->getElementType()->isSized(&Visited)) {
+      Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+                 !Attrs.hasAttribute(Idx, Attribute::InAlloca),
+             "Attributes 'byval' and 'inalloca' do not support unsized types!",
+             V);
     }
   } else {
-    Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal),
-            "Attribute 'byval' only applies to parameters with pointer type!",
-            V);
+    Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal),
+           "Attribute 'byval' only applies to parameters with pointer type!",
+           V);
   }
 }
 
@@ -956,28 +1402,30 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
       continue;
 
     if (Attrs.hasAttribute(Idx, Attribute::Nest)) {
-      Assert1(!SawNest, "More than one parameter has attribute nest!", V);
+      Assert(!SawNest, "More than one parameter has attribute nest!", V);
       SawNest = true;
     }
 
     if (Attrs.hasAttribute(Idx, Attribute::Returned)) {
-      Assert1(!SawReturned, "More than one parameter has attribute returned!",
-              V);
-      Assert1(Ty->canLosslesslyBitCastTo(FT->getReturnType()), "Incompatible "
-              "argument and return types for 'returned' attribute", V);
+      Assert(!SawReturned, "More than one parameter has attribute returned!",
+             V);
+      Assert(Ty->canLosslesslyBitCastTo(FT->getReturnType()),
+             "Incompatible "
+             "argument and return types for 'returned' attribute",
+             V);
       SawReturned = true;
     }
 
     if (Attrs.hasAttribute(Idx, Attribute::StructRet)) {
-      Assert1(!SawSRet, "Cannot have multiple 'sret' parameters!", V);
-      Assert1(Idx == 1 || Idx == 2,
-              "Attribute 'sret' is not on first or second parameter!", V);
+      Assert(!SawSRet, "Cannot have multiple 'sret' parameters!", V);
+      Assert(Idx == 1 || Idx == 2,
+             "Attribute 'sret' is not on first or second parameter!", V);
       SawSRet = true;
     }
 
     if (Attrs.hasAttribute(Idx, Attribute::InAlloca)) {
-      Assert1(Idx == FT->getNumParams(),
-              "inalloca isn't on the last parameter!", V);
+      Assert(Idx == FT->getNumParams(), "inalloca isn't on the last parameter!",
+             V);
     }
   }
 
@@ -986,39 +1434,65 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
 
   VerifyAttributeTypes(Attrs, AttributeSet::FunctionIndex, true, V);
 
-  Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                               Attribute::ReadNone) &&
-            Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                               Attribute::ReadOnly)),
-          "Attributes 'readnone and readonly' are incompatible!", V);
+  Assert(
+      !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) &&
+        Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly)),
+      "Attributes 'readnone and readonly' are incompatible!", V);
 
-  Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                               Attribute::NoInline) &&
-            Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                               Attribute::AlwaysInline)),
-          "Attributes 'noinline and alwaysinline' are incompatible!", V);
+  Assert(
+      !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::NoInline) &&
+        Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                           Attribute::AlwaysInline)),
+      "Attributes 'noinline and alwaysinline' are incompatible!", V);
 
   if (Attrs.hasAttribute(AttributeSet::FunctionIndex, 
                          Attribute::OptimizeNone)) {
-    Assert1(Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                               Attribute::NoInline),
-            "Attribute 'optnone' requires 'noinline'!", V);
+    Assert(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::NoInline),
+           "Attribute 'optnone' requires 'noinline'!", V);
 
-    Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::OptimizeForSize),
-            "Attributes 'optsize and optnone' are incompatible!", V);
+    Assert(!Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                               Attribute::OptimizeForSize),
+           "Attributes 'optsize and optnone' are incompatible!", V);
 
-    Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::MinSize),
-            "Attributes 'minsize and optnone' are incompatible!", V);
+    Assert(!Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize),
+           "Attributes 'minsize and optnone' are incompatible!", V);
   }
 
   if (Attrs.hasAttribute(AttributeSet::FunctionIndex,
                          Attribute::JumpTable)) {
     const GlobalValue *GV = cast<GlobalValue>(V);
-    Assert1(GV->hasUnnamedAddr(),
-            "Attribute 'jumptable' requires 'unnamed_addr'", V);
+    Assert(GV->hasUnnamedAddr(),
+           "Attribute 'jumptable' requires 'unnamed_addr'", V);
+  }
+}
 
+void Verifier::VerifyFunctionMetadata(
+    const SmallVector<std::pair<unsigned, MDNode *>, 4> MDs) {
+  if (MDs.empty())
+    return;
+
+  for (unsigned i = 0; i < MDs.size(); i++) {
+    if (MDs[i].first == LLVMContext::MD_prof) {
+      MDNode *MD = MDs[i].second;
+      Assert(MD->getNumOperands() == 2,
+             "!prof annotations should have exactly 2 operands", MD);
+
+      // Check first operand.
+      Assert(MD->getOperand(0) != nullptr, "first operand should not be null",
+             MD);
+      Assert(isa<MDString>(MD->getOperand(0)),
+             "expected string with name of the !prof annotation", MD);
+      MDString *MDS = cast<MDString>(MD->getOperand(0));
+      StringRef ProfName = MDS->getString();
+      Assert(ProfName.equals("function_entry_count"),
+             "first operand should be 'function_entry_count'", MD);
+
+      // Check second operand.
+      Assert(MD->getOperand(1) != nullptr, "second operand should not be null",
+             MD);
+      Assert(isa<ConstantAsMetadata>(MD->getOperand(1)),
+             "expected integer argument to function_entry_count", MD);
+    }
   }
 }
 
@@ -1026,9 +1500,9 @@ void Verifier::VerifyConstantExprBitcastType(const ConstantExpr *CE) {
   if (CE->getOpcode() != Instruction::BitCast)
     return;
 
-  Assert1(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0),
-                                CE->getType()),
-          "Invalid bitcast", CE);
+  Assert(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0),
+                               CE->getType()),
+         "Invalid bitcast", CE);
 }
 
 bool Verifier::VerifyAttributeCount(AttributeSet Attrs, unsigned Params) {
@@ -1045,6 +1519,159 @@ bool Verifier::VerifyAttributeCount(AttributeSet Attrs, unsigned Params) {
   return false;
 }
 
+/// \brief Verify that statepoint intrinsic is well formed.
+void Verifier::VerifyStatepoint(ImmutableCallSite CS) {
+  assert(CS.getCalledFunction() &&
+         CS.getCalledFunction()->getIntrinsicID() ==
+           Intrinsic::experimental_gc_statepoint);
+
+  const Instruction &CI = *CS.getInstruction();
+
+  Assert(!CS.doesNotAccessMemory() && !CS.onlyReadsMemory(),
+         "gc.statepoint must read and write memory to preserve "
+         "reordering restrictions required by safepoint semantics",
+         &CI);
+
+  const Value *IDV = CS.getArgument(0);
+  Assert(isa<ConstantInt>(IDV), "gc.statepoint ID must be a constant integer",
+         &CI);
+
+  const Value *NumPatchBytesV = CS.getArgument(1);
+  Assert(isa<ConstantInt>(NumPatchBytesV),
+         "gc.statepoint number of patchable bytes must be a constant integer",
+         &CI);
+  const int64_t NumPatchBytes =
+      cast<ConstantInt>(NumPatchBytesV)->getSExtValue();
+  assert(isInt<32>(NumPatchBytes) && "NumPatchBytesV is an i32!");
+  Assert(NumPatchBytes >= 0, "gc.statepoint number of patchable bytes must be "
+                             "positive",
+         &CI);
+
+  const Value *Target = CS.getArgument(2);
+  const PointerType *PT = dyn_cast<PointerType>(Target->getType());
+  Assert(PT && PT->getElementType()->isFunctionTy(),
+         "gc.statepoint callee must be of function pointer type", &CI, Target);
+  FunctionType *TargetFuncType = cast<FunctionType>(PT->getElementType());
+
+  if (NumPatchBytes)
+    Assert(isa<ConstantPointerNull>(Target->stripPointerCasts()),
+           "gc.statepoint must have null as call target if number of patchable "
+           "bytes is non zero",
+           &CI);
+
+  const Value *NumCallArgsV = CS.getArgument(3);
+  Assert(isa<ConstantInt>(NumCallArgsV),
+         "gc.statepoint number of arguments to underlying call "
+         "must be constant integer",
+         &CI);
+  const int NumCallArgs = cast<ConstantInt>(NumCallArgsV)->getZExtValue();
+  Assert(NumCallArgs >= 0,
+         "gc.statepoint number of arguments to underlying call "
+         "must be positive",
+         &CI);
+  const int NumParams = (int)TargetFuncType->getNumParams();
+  if (TargetFuncType->isVarArg()) {
+    Assert(NumCallArgs >= NumParams,
+           "gc.statepoint mismatch in number of vararg call args", &CI);
+
+    // TODO: Remove this limitation
+    Assert(TargetFuncType->getReturnType()->isVoidTy(),
+           "gc.statepoint doesn't support wrapping non-void "
+           "vararg functions yet",
+           &CI);
+  } else
+    Assert(NumCallArgs == NumParams,
+           "gc.statepoint mismatch in number of call args", &CI);
+
+  const Value *FlagsV = CS.getArgument(4);
+  Assert(isa<ConstantInt>(FlagsV),
+         "gc.statepoint flags must be constant integer", &CI);
+  const uint64_t Flags = cast<ConstantInt>(FlagsV)->getZExtValue();
+  Assert((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0,
+         "unknown flag used in gc.statepoint flags argument", &CI);
+
+  // Verify that the types of the call parameter arguments match
+  // the type of the wrapped callee.
+  for (int i = 0; i < NumParams; i++) {
+    Type *ParamType = TargetFuncType->getParamType(i);
+    Type *ArgType = CS.getArgument(5 + i)->getType();
+    Assert(ArgType == ParamType,
+           "gc.statepoint call argument does not match wrapped "
+           "function type",
+           &CI);
+  }
+
+  const int EndCallArgsInx = 4 + NumCallArgs;
+
+  const Value *NumTransitionArgsV = CS.getArgument(EndCallArgsInx+1);
+  Assert(isa<ConstantInt>(NumTransitionArgsV),
+         "gc.statepoint number of transition arguments "
+         "must be constant integer",
+         &CI);
+  const int NumTransitionArgs =
+      cast<ConstantInt>(NumTransitionArgsV)->getZExtValue();
+  Assert(NumTransitionArgs >= 0,
+         "gc.statepoint number of transition arguments must be positive", &CI);
+  const int EndTransitionArgsInx = EndCallArgsInx + 1 + NumTransitionArgs;
+
+  const Value *NumDeoptArgsV = CS.getArgument(EndTransitionArgsInx+1);
+  Assert(isa<ConstantInt>(NumDeoptArgsV),
+         "gc.statepoint number of deoptimization arguments "
+         "must be constant integer",
+         &CI);
+  const int NumDeoptArgs = cast<ConstantInt>(NumDeoptArgsV)->getZExtValue();
+  Assert(NumDeoptArgs >= 0, "gc.statepoint number of deoptimization arguments "
+                            "must be positive",
+         &CI);
+
+  const int ExpectedNumArgs =
+      7 + NumCallArgs + NumTransitionArgs + NumDeoptArgs;
+  Assert(ExpectedNumArgs <= (int)CS.arg_size(),
+         "gc.statepoint too few arguments according to length fields", &CI);
+
+  // Check that the only uses of this gc.statepoint are gc.result or 
+  // gc.relocate calls which are tied to this statepoint and thus part
+  // of the same statepoint sequence
+  for (const User *U : CI.users()) {
+    const CallInst *Call = dyn_cast<const CallInst>(U);
+    Assert(Call, "illegal use of statepoint token", &CI, U);
+    if (!Call) continue;
+    Assert(isGCRelocate(Call) || isGCResult(Call),
+           "gc.result or gc.relocate are the only value uses"
+           "of a gc.statepoint",
+           &CI, U);
+    if (isGCResult(Call)) {
+      Assert(Call->getArgOperand(0) == &CI,
+             "gc.result connected to wrong gc.statepoint", &CI, Call);
+    } else if (isGCRelocate(Call)) {
+      Assert(Call->getArgOperand(0) == &CI,
+             "gc.relocate connected to wrong gc.statepoint", &CI, Call);
+    }
+  }
+
+  // Note: It is legal for a single derived pointer to be listed multiple
+  // times.  It's non-optimal, but it is legal.  It can also happen after
+  // insertion if we strip a bitcast away.
+  // Note: It is really tempting to check that each base is relocated and
+  // that a derived pointer is never reused as a base pointer.  This turns
+  // out to be problematic since optimizations run after safepoint insertion
+  // can recognize equality properties that the insertion logic doesn't know
+  // about.  See example statepoint.ll in the verifier subdirectory
+}
+
+void Verifier::verifyFrameRecoverIndices() {
+  for (auto &Counts : FrameEscapeInfo) {
+    Function *F = Counts.first;
+    unsigned EscapedObjectCount = Counts.second.first;
+    unsigned MaxRecoveredIndex = Counts.second.second;
+    Assert(MaxRecoveredIndex <= EscapedObjectCount,
+           "all indices passed to llvm.framerecover must be less than the "
+           "number of arguments passed ot llvm.frameescape in the parent "
+           "function",
+           F);
+  }
+}
+
 // visitFunction - Verify that a function is ok.
 //
 void Verifier::visitFunction(const Function &F) {
@@ -1052,25 +1679,24 @@ void Verifier::visitFunction(const Function &F) {
   FunctionType *FT = F.getFunctionType();
   unsigned NumArgs = F.arg_size();
 
-  Assert1(Context == &F.getContext(),
-          "Function context does not match Module context!", &F);
+  Assert(Context == &F.getContext(),
+         "Function context does not match Module context!", &F);
 
-  Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
-  Assert2(FT->getNumParams() == NumArgs,
-          "# formal arguments must match # of arguments for function type!",
-          &F, FT);
-  Assert1(F.getReturnType()->isFirstClassType() ||
-          F.getReturnType()->isVoidTy() ||
-          F.getReturnType()->isStructTy(),
-          "Functions cannot return aggregate values!", &F);
+  Assert(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
+  Assert(FT->getNumParams() == NumArgs,
+         "# formal arguments must match # of arguments for function type!", &F,
+         FT);
+  Assert(F.getReturnType()->isFirstClassType() ||
+             F.getReturnType()->isVoidTy() || F.getReturnType()->isStructTy(),
+         "Functions cannot return aggregate values!", &F);
 
-  Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
-          "Invalid struct return type!", &F);
+  Assert(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
+         "Invalid struct return type!", &F);
 
   AttributeSet Attrs = F.getAttributes();
 
-  Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
-          "Attribute after last parameter!", &F);
+  Assert(VerifyAttributeCount(Attrs, FT->getNumParams()),
+         "Attribute after last parameter!", &F);
 
   // Check function attributes.
   VerifyFunctionAttrs(FT, Attrs, &F);
@@ -1078,9 +1704,8 @@ void Verifier::visitFunction(const Function &F) {
   // On function declarations/definitions, we do not support the builtin
   // attribute. We do not check this in VerifyFunctionAttrs since that is
   // checking for Attributes that can/can not ever be on functions.
-  Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                              Attribute::Builtin),
-          "Attribute 'builtin' can only be applied to a callsite.", &F);
+  Assert(!Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::Builtin),
+         "Attribute 'builtin' can only be applied to a callsite.", &F);
 
   // Check that this function meets the restrictions on this calling convention.
   // Sometimes varargs is used for perfectly forwarding thunks, so some of these
@@ -1094,8 +1719,9 @@ void Verifier::visitFunction(const Function &F) {
   case CallingConv::Intel_OCL_BI:
   case CallingConv::PTX_Kernel:
   case CallingConv::PTX_Device:
-    Assert1(!F.isVarArg(), "Calling convention does not support varargs or "
-                           "perfect forwarding!", &F);
+    Assert(!F.isVarArg(), "Calling convention does not support varargs or "
+                          "perfect forwarding!",
+           &F);
     break;
   }
 
@@ -1106,36 +1732,50 @@ void Verifier::visitFunction(const Function &F) {
   unsigned i = 0;
   for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
        ++I, ++i) {
-    Assert2(I->getType() == FT->getParamType(i),
-            "Argument value does not match function argument type!",
-            I, FT->getParamType(i));
-    Assert1(I->getType()->isFirstClassType(),
-            "Function arguments must have first-class types!", I);
+    Assert(I->getType() == FT->getParamType(i),
+           "Argument value does not match function argument type!", I,
+           FT->getParamType(i));
+    Assert(I->getType()->isFirstClassType(),
+           "Function arguments must have first-class types!", I);
     if (!isLLVMdotName)
-      Assert2(!I->getType()->isMetadataTy(),
-              "Function takes metadata but isn't an intrinsic", I, &F);
+      Assert(!I->getType()->isMetadataTy(),
+             "Function takes metadata but isn't an intrinsic", I, &F);
   }
 
+  // Get the function metadata attachments.
+  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+  F.getAllMetadata(MDs);
+  assert(F.hasMetadata() != MDs.empty() && "Bit out-of-sync");
+  VerifyFunctionMetadata(MDs);
+
   if (F.isMaterializable()) {
     // Function has a body somewhere we can't see.
+    Assert(MDs.empty(), "unmaterialized function cannot have metadata", &F,
+           MDs.empty() ? nullptr : MDs.front().second);
   } else if (F.isDeclaration()) {
-    Assert1(F.hasExternalLinkage() || F.hasExternalWeakLinkage(),
-            "invalid linkage type for function declaration", &F);
+    Assert(F.hasExternalLinkage() || F.hasExternalWeakLinkage(),
+           "invalid linkage type for function declaration", &F);
+    Assert(MDs.empty(), "function without a body cannot have metadata", &F,
+           MDs.empty() ? nullptr : MDs.front().second);
   } else {
     // Verify that this function (which has a body) is not named "llvm.*".  It
     // is not legal to define intrinsics.
-    Assert1(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F);
+    Assert(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F);
 
     // Check the entry node
     const BasicBlock *Entry = &F.getEntryBlock();
-    Assert1(pred_empty(Entry),
-            "Entry block to function must not have predecessors!", Entry);
+    Assert(pred_empty(Entry),
+           "Entry block to function must not have predecessors!", Entry);
 
     // The address of the entry block cannot be taken, unless it is dead.
     if (Entry->hasAddressTaken()) {
-      Assert1(!BlockAddress::lookup(Entry)->isConstantUsed(),
-              "blockaddress may not be used with the entry block!", Entry);
+      Assert(!BlockAddress::lookup(Entry)->isConstantUsed(),
+             "blockaddress may not be used with the entry block!", Entry);
     }
+
+    // Visit metadata attachments.
+    for (const auto &I : MDs)
+      visitMDNode(*I.second);
   }
 
   // If this function is actually an intrinsic, verify that it is only used in
@@ -1143,13 +1783,13 @@ void Verifier::visitFunction(const Function &F) {
   if (F.getIntrinsicID()) {
     const User *U;
     if (F.hasAddressTaken(&U))
-      Assert1(0, "Invalid user of intrinsic instruction!", U);
+      Assert(0, "Invalid user of intrinsic instruction!", U);
   }
 
-  Assert1(!F.hasDLLImportStorageClass() ||
-          (F.isDeclaration() && F.hasExternalLinkage()) ||
-          F.hasAvailableExternallyLinkage(),
-          "Function is marked as dllimport, but not external.", &F);
+  Assert(!F.hasDLLImportStorageClass() ||
+             (F.isDeclaration() && F.hasExternalLinkage()) ||
+             F.hasAvailableExternallyLinkage(),
+         "Function is marked as dllimport, but not external.", &F);
 }
 
 // verifyBasicBlock - Verify that a basic block is well formed...
@@ -1158,7 +1798,7 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
   InstsInThisBlock.clear();
 
   // Ensure that basic blocks have terminators!
-  Assert1(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
+  Assert(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
 
   // Check constraints that this basic block imposes on all of the PHI nodes in
   // it.
@@ -1169,12 +1809,14 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
     PHINode *PN;
     for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) {
       // Ensure that PHI nodes have at least one entry!
-      Assert1(PN->getNumIncomingValues() != 0,
-              "PHI nodes must have at least one entry.  If the block is dead, "
-              "the PHI should be removed!", PN);
-      Assert1(PN->getNumIncomingValues() == Preds.size(),
-              "PHINode should have one entry for each predecessor of its "
-              "parent basic block!", PN);
+      Assert(PN->getNumIncomingValues() != 0,
+             "PHI nodes must have at least one entry.  If the block is dead, "
+             "the PHI should be removed!",
+             PN);
+      Assert(PN->getNumIncomingValues() == Preds.size(),
+             "PHINode should have one entry for each predecessor of its "
+             "parent basic block!",
+             PN);
 
       // Get and sort all incoming values in the PHI node...
       Values.clear();
@@ -1189,17 +1831,17 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
         // particular basic block in this PHI node, that the incoming values are
         // all identical.
         //
-        Assert4(i == 0 || Values[i].first  != Values[i-1].first ||
-                Values[i].second == Values[i-1].second,
-                "PHI node has multiple entries for the same basic block with "
-                "different incoming values!", PN, Values[i].first,
-                Values[i].second, Values[i-1].second);
+        Assert(i == 0 || Values[i].first != Values[i - 1].first ||
+                   Values[i].second == Values[i - 1].second,
+               "PHI node has multiple entries for the same basic block with "
+               "different incoming values!",
+               PN, Values[i].first, Values[i].second, Values[i - 1].second);
 
         // Check to make sure that the predecessors and PHI node entries are
         // matched up.
-        Assert3(Values[i].first == Preds[i],
-                "PHI node entries do not match predecessors!", PN,
-                Values[i].first, Preds[i]);
+        Assert(Values[i].first == Preds[i],
+               "PHI node entries do not match predecessors!", PN,
+               Values[i].first, Preds[i]);
       }
     }
   }
@@ -1213,15 +1855,15 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
 
 void Verifier::visitTerminatorInst(TerminatorInst &I) {
   // Ensure that terminators only exist at the end of the basic block.
-  Assert1(&I == I.getParent()->getTerminator(),
-          "Terminator found in the middle of a basic block!", I.getParent());
+  Assert(&I == I.getParent()->getTerminator(),
+         "Terminator found in the middle of a basic block!", I.getParent());
   visitInstruction(I);
 }
 
 void Verifier::visitBranchInst(BranchInst &BI) {
   if (BI.isConditional()) {
-    Assert2(BI.getCondition()->getType()->isIntegerTy(1),
-            "Branch condition is not 'i1' type!", &BI, BI.getCondition());
+    Assert(BI.getCondition()->getType()->isIntegerTy(1),
+           "Branch condition is not 'i1' type!", &BI, BI.getCondition());
   }
   visitTerminatorInst(BI);
 }
@@ -1230,13 +1872,15 @@ void Verifier::visitReturnInst(ReturnInst &RI) {
   Function *F = RI.getParent()->getParent();
   unsigned N = RI.getNumOperands();
   if (F->getReturnType()->isVoidTy())
-    Assert2(N == 0,
-            "Found return instr that returns non-void in Function of void "
-            "return type!", &RI, F->getReturnType());
+    Assert(N == 0,
+           "Found return instr that returns non-void in Function of void "
+           "return type!",
+           &RI, F->getReturnType());
   else
-    Assert2(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(),
-            "Function return type does not match operand "
-            "type of return inst!", &RI, F->getReturnType());
+    Assert(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(),
+           "Function return type does not match operand "
+           "type of return inst!",
+           &RI, F->getReturnType());
 
   // Check to make sure that the return value has necessary properties for
   // terminators...
@@ -1249,32 +1893,32 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
   Type *SwitchTy = SI.getCondition()->getType();
   SmallPtrSet<ConstantInt*, 32> Constants;
   for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
-    Assert1(i.getCaseValue()->getType() == SwitchTy,
-            "Switch constants must all be same type as switch value!", &SI);
-    Assert2(Constants.insert(i.getCaseValue()).second,
-            "Duplicate integer as switch case", &SI, i.getCaseValue());
+    Assert(i.getCaseValue()->getType() == SwitchTy,
+           "Switch constants must all be same type as switch value!", &SI);
+    Assert(Constants.insert(i.getCaseValue()).second,
+           "Duplicate integer as switch case", &SI, i.getCaseValue());
   }
 
   visitTerminatorInst(SI);
 }
 
 void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
-  Assert1(BI.getAddress()->getType()->isPointerTy(),
-          "Indirectbr operand must have pointer type!", &BI);
+  Assert(BI.getAddress()->getType()->isPointerTy(),
+         "Indirectbr operand must have pointer type!", &BI);
   for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i)
-    Assert1(BI.getDestination(i)->getType()->isLabelTy(),
-            "Indirectbr destinations must all have pointer type!", &BI);
+    Assert(BI.getDestination(i)->getType()->isLabelTy(),
+           "Indirectbr destinations must all have pointer type!", &BI);
 
   visitTerminatorInst(BI);
 }
 
 void Verifier::visitSelectInst(SelectInst &SI) {
-  Assert1(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
-                                          SI.getOperand(2)),
-          "Invalid operands for select instruction!", &SI);
+  Assert(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
+                                         SI.getOperand(2)),
+         "Invalid operands for select instruction!", &SI);
 
-  Assert1(SI.getTrueValue()->getType() == SI.getType(),
-          "Select values must have same type as select instruction!", &SI);
+  Assert(SI.getTrueValue()->getType() == SI.getType(),
+         "Select values must have same type as select instruction!", &SI);
   visitInstruction(SI);
 }
 
@@ -1282,7 +1926,7 @@ void Verifier::visitSelectInst(SelectInst &SI) {
 /// a pass, if any exist, it's an error.
 ///
 void Verifier::visitUserOp1(Instruction &I) {
-  Assert1(0, "User-defined operators should not live outside of a pass!", &I);
+  Assert(0, "User-defined operators should not live outside of a pass!", &I);
 }
 
 void Verifier::visitTruncInst(TruncInst &I) {
@@ -1294,11 +1938,11 @@ void Verifier::visitTruncInst(TruncInst &I) {
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
-  Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
-  Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "trunc source and destination must both be a vector or neither", &I);
-  Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I);
+  Assert(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
+  Assert(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
+  Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+         "trunc source and destination must both be a vector or neither", &I);
+  Assert(SrcBitSize > DestBitSize, "DestTy too big for Trunc", &I);
 
   visitInstruction(I);
 }
@@ -1309,14 +1953,14 @@ void Verifier::visitZExtInst(ZExtInst &I) {
   Type *DestTy = I.getType();
 
   // Get the size of the types in bits, we'll need this later
-  Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
-  Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "zext source and destination must both be a vector or neither", &I);
+  Assert(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
+  Assert(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
+  Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+         "zext source and destination must both be a vector or neither", &I);
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
-  Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I);
+  Assert(SrcBitSize < DestBitSize, "Type too small for ZExt", &I);
 
   visitInstruction(I);
 }
@@ -1330,11 +1974,11 @@ void Verifier::visitSExtInst(SExtInst &I) {
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
-  Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
-  Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "sext source and destination must both be a vector or neither", &I);
-  Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I);
+  Assert(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
+  Assert(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
+  Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+         "sext source and destination must both be a vector or neither", &I);
+  Assert(SrcBitSize < DestBitSize, "Type too small for SExt", &I);
 
   visitInstruction(I);
 }
@@ -1347,11 +1991,11 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) {
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
-  Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I);
-  Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "fptrunc source and destination must both be a vector or neither",&I);
-  Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I);
+  Assert(SrcTy->isFPOrFPVectorTy(), "FPTrunc only operates on FP", &I);
+  Assert(DestTy->isFPOrFPVectorTy(), "FPTrunc only produces an FP", &I);
+  Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+         "fptrunc source and destination must both be a vector or neither", &I);
+  Assert(SrcBitSize > DestBitSize, "DestTy too big for FPTrunc", &I);
 
   visitInstruction(I);
 }
@@ -1365,11 +2009,11 @@ void Verifier::visitFPExtInst(FPExtInst &I) {
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
-  Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I);
-  Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "fpext source and destination must both be a vector or neither", &I);
-  Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I);
+  Assert(SrcTy->isFPOrFPVectorTy(), "FPExt only operates on FP", &I);
+  Assert(DestTy->isFPOrFPVectorTy(), "FPExt only produces an FP", &I);
+  Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+         "fpext source and destination must both be a vector or neither", &I);
+  Assert(SrcBitSize < DestBitSize, "DestTy too small for FPExt", &I);
 
   visitInstruction(I);
 }
@@ -1382,17 +2026,17 @@ void Verifier::visitUIToFPInst(UIToFPInst &I) {
   bool SrcVec = SrcTy->isVectorTy();
   bool DstVec = DestTy->isVectorTy();
 
-  Assert1(SrcVec == DstVec,
-          "UIToFP source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isIntOrIntVectorTy(),
-          "UIToFP source must be integer or integer vector", &I);
-  Assert1(DestTy->isFPOrFPVectorTy(),
-          "UIToFP result must be FP or FP vector", &I);
+  Assert(SrcVec == DstVec,
+         "UIToFP source and dest must both be vector or scalar", &I);
+  Assert(SrcTy->isIntOrIntVectorTy(),
+         "UIToFP source must be integer or integer vector", &I);
+  Assert(DestTy->isFPOrFPVectorTy(), "UIToFP result must be FP or FP vector",
+         &I);
 
   if (SrcVec && DstVec)
-    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
-            cast<VectorType>(DestTy)->getNumElements(),
-            "UIToFP source and dest vector length mismatch", &I);
+    Assert(cast<VectorType>(SrcTy)->getNumElements() ==
+               cast<VectorType>(DestTy)->getNumElements(),
+           "UIToFP source and dest vector length mismatch", &I);
 
   visitInstruction(I);
 }
@@ -1405,17 +2049,17 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) {
   bool SrcVec = SrcTy->isVectorTy();
   bool DstVec = DestTy->isVectorTy();
 
-  Assert1(SrcVec == DstVec,
-          "SIToFP source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isIntOrIntVectorTy(),
-          "SIToFP source must be integer or integer vector", &I);
-  Assert1(DestTy->isFPOrFPVectorTy(),
-          "SIToFP result must be FP or FP vector", &I);
+  Assert(SrcVec == DstVec,
+         "SIToFP source and dest must both be vector or scalar", &I);
+  Assert(SrcTy->isIntOrIntVectorTy(),
+         "SIToFP source must be integer or integer vector", &I);
+  Assert(DestTy->isFPOrFPVectorTy(), "SIToFP result must be FP or FP vector",
+         &I);
 
   if (SrcVec && DstVec)
-    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
-            cast<VectorType>(DestTy)->getNumElements(),
-            "SIToFP source and dest vector length mismatch", &I);
+    Assert(cast<VectorType>(SrcTy)->getNumElements() ==
+               cast<VectorType>(DestTy)->getNumElements(),
+           "SIToFP source and dest vector length mismatch", &I);
 
   visitInstruction(I);
 }
@@ -1428,17 +2072,17 @@ void Verifier::visitFPToUIInst(FPToUIInst &I) {
   bool SrcVec = SrcTy->isVectorTy();
   bool DstVec = DestTy->isVectorTy();
 
-  Assert1(SrcVec == DstVec,
-          "FPToUI source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
-          &I);
-  Assert1(DestTy->isIntOrIntVectorTy(),
-          "FPToUI result must be integer or integer vector", &I);
+  Assert(SrcVec == DstVec,
+         "FPToUI source and dest must both be vector or scalar", &I);
+  Assert(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
+         &I);
+  Assert(DestTy->isIntOrIntVectorTy(),
+         "FPToUI result must be integer or integer vector", &I);
 
   if (SrcVec && DstVec)
-    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
-            cast<VectorType>(DestTy)->getNumElements(),
-            "FPToUI source and dest vector length mismatch", &I);
+    Assert(cast<VectorType>(SrcTy)->getNumElements() ==
+               cast<VectorType>(DestTy)->getNumElements(),
+           "FPToUI source and dest vector length mismatch", &I);
 
   visitInstruction(I);
 }
@@ -1451,17 +2095,17 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) {
   bool SrcVec = SrcTy->isVectorTy();
   bool DstVec = DestTy->isVectorTy();
 
-  Assert1(SrcVec == DstVec,
-          "FPToSI source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isFPOrFPVectorTy(),
-          "FPToSI source must be FP or FP vector", &I);
-  Assert1(DestTy->isIntOrIntVectorTy(),
-          "FPToSI result must be integer or integer vector", &I);
+  Assert(SrcVec == DstVec,
+         "FPToSI source and dest must both be vector or scalar", &I);
+  Assert(SrcTy->isFPOrFPVectorTy(), "FPToSI source must be FP or FP vector",
+         &I);
+  Assert(DestTy->isIntOrIntVectorTy(),
+         "FPToSI result must be integer or integer vector", &I);
 
   if (SrcVec && DstVec)
-    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
-            cast<VectorType>(DestTy)->getNumElements(),
-            "FPToSI source and dest vector length mismatch", &I);
+    Assert(cast<VectorType>(SrcTy)->getNumElements() ==
+               cast<VectorType>(DestTy)->getNumElements(),
+           "FPToSI source and dest vector length mismatch", &I);
 
   visitInstruction(I);
 }
@@ -1471,18 +2115,18 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
   Type *SrcTy = I.getOperand(0)->getType();
   Type *DestTy = I.getType();
 
-  Assert1(SrcTy->getScalarType()->isPointerTy(),
-          "PtrToInt source must be pointer", &I);
-  Assert1(DestTy->getScalarType()->isIntegerTy(),
-          "PtrToInt result must be integral", &I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "PtrToInt type mismatch", &I);
+  Assert(SrcTy->getScalarType()->isPointerTy(),
+         "PtrToInt source must be pointer", &I);
+  Assert(DestTy->getScalarType()->isIntegerTy(),
+         "PtrToInt result must be integral", &I);
+  Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToInt type mismatch",
+         &I);
 
   if (SrcTy->isVectorTy()) {
     VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
     VectorType *VDest = dyn_cast<VectorType>(DestTy);
-    Assert1(VSrc->getNumElements() == VDest->getNumElements(),
-          "PtrToInt Vector width mismatch", &I);
+    Assert(VSrc->getNumElements() == VDest->getNumElements(),
+           "PtrToInt Vector width mismatch", &I);
   }
 
   visitInstruction(I);
@@ -1493,23 +2137,23 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
   Type *SrcTy = I.getOperand(0)->getType();
   Type *DestTy = I.getType();
 
-  Assert1(SrcTy->getScalarType()->isIntegerTy(),
-          "IntToPtr source must be an integral", &I);
-  Assert1(DestTy->getScalarType()->isPointerTy(),
-          "IntToPtr result must be a pointer",&I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "IntToPtr type mismatch", &I);
+  Assert(SrcTy->getScalarType()->isIntegerTy(),
+         "IntToPtr source must be an integral", &I);
+  Assert(DestTy->getScalarType()->isPointerTy(),
+         "IntToPtr result must be a pointer", &I);
+  Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "IntToPtr type mismatch",
+         &I);
   if (SrcTy->isVectorTy()) {
     VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
     VectorType *VDest = dyn_cast<VectorType>(DestTy);
-    Assert1(VSrc->getNumElements() == VDest->getNumElements(),
-          "IntToPtr Vector width mismatch", &I);
+    Assert(VSrc->getNumElements() == VDest->getNumElements(),
+           "IntToPtr Vector width mismatch", &I);
   }
   visitInstruction(I);
 }
 
 void Verifier::visitBitCastInst(BitCastInst &I) {
-  Assert1(
+  Assert(
       CastInst::castIsValid(Instruction::BitCast, I.getOperand(0), I.getType()),
       "Invalid bitcast", &I);
   visitInstruction(I);
@@ -1519,15 +2163,15 @@ void Verifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
   Type *SrcTy = I.getOperand(0)->getType();
   Type *DestTy = I.getType();
 
-  Assert1(SrcTy->isPtrOrPtrVectorTy(),
-          "AddrSpaceCast source must be a pointer", &I);
-  Assert1(DestTy->isPtrOrPtrVectorTy(),
-          "AddrSpaceCast result must be a pointer", &I);
-  Assert1(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(),
-          "AddrSpaceCast must be between different address spaces", &I);
+  Assert(SrcTy->isPtrOrPtrVectorTy(), "AddrSpaceCast source must be a pointer",
+         &I);
+  Assert(DestTy->isPtrOrPtrVectorTy(), "AddrSpaceCast result must be a pointer",
+         &I);
+  Assert(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(),
+         "AddrSpaceCast must be between different address spaces", &I);
   if (SrcTy->isVectorTy())
-    Assert1(SrcTy->getVectorNumElements() == DestTy->getVectorNumElements(),
-            "AddrSpaceCast vector pointer number of elements mismatch", &I);
+    Assert(SrcTy->getVectorNumElements() == DestTy->getVectorNumElements(),
+           "AddrSpaceCast vector pointer number of elements mismatch", &I);
   visitInstruction(I);
 }
 
@@ -1538,16 +2182,15 @@ void Verifier::visitPHINode(PHINode &PN) {
   // This can be tested by checking whether the instruction before this is
   // either nonexistent (because this is begin()) or is a PHI node.  If not,
   // then there is some other instruction before a PHI.
-  Assert2(&PN == &PN.getParent()->front() ||
-          isa<PHINode>(--BasicBlock::iterator(&PN)),
-          "PHI nodes not grouped at top of basic block!",
-          &PN, PN.getParent());
+  Assert(&PN == &PN.getParent()->front() ||
+             isa<PHINode>(--BasicBlock::iterator(&PN)),
+         "PHI nodes not grouped at top of basic block!", &PN, PN.getParent());
 
   // Check that all of the values of the PHI node have the same type as the
   // result, and that the incoming blocks are really basic blocks.
-  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
-    Assert1(PN.getType() == PN.getIncomingValue(i)->getType(),
-            "PHI node operands are not the same type as the result!", &PN);
+  for (Value *IncValue : PN.incoming_values()) {
+    Assert(PN.getType() == IncValue->getType(),
+           "PHI node operands are not the same type as the result!", &PN);
   }
 
   // All other PHI node constraints are checked in the visitBasicBlock method.
@@ -1558,32 +2201,36 @@ void Verifier::visitPHINode(PHINode &PN) {
 void Verifier::VerifyCallSite(CallSite CS) {
   Instruction *I = CS.getInstruction();
 
-  Assert1(CS.getCalledValue()->getType()->isPointerTy(),
-          "Called function must be a pointer!", I);
+  Assert(CS.getCalledValue()->getType()->isPointerTy(),
+         "Called function must be a pointer!", I);
   PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
 
-  Assert1(FPTy->getElementType()->isFunctionTy(),
-          "Called function is not pointer to function type!", I);
-  FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
+  Assert(FPTy->getElementType()->isFunctionTy(),
+         "Called function is not pointer to function type!", I);
+
+  Assert(FPTy->getElementType() == CS.getFunctionType(),
+         "Called function is not the same type as the call!", I);
+
+  FunctionType *FTy = CS.getFunctionType();
 
   // Verify that the correct number of arguments are being passed
   if (FTy->isVarArg())
-    Assert1(CS.arg_size() >= FTy->getNumParams(),
-            "Called function requires more parameters than were provided!",I);
+    Assert(CS.arg_size() >= FTy->getNumParams(),
+           "Called function requires more parameters than were provided!", I);
   else
-    Assert1(CS.arg_size() == FTy->getNumParams(),
-            "Incorrect number of arguments passed to called function!", I);
+    Assert(CS.arg_size() == FTy->getNumParams(),
+           "Incorrect number of arguments passed to called function!", I);
 
   // Verify that all arguments to the call match the function type.
   for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
-    Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i),
-            "Call parameter type does not match function signature!",
-            CS.getArgument(i), FTy->getParamType(i), I);
+    Assert(CS.getArgument(i)->getType() == FTy->getParamType(i),
+           "Call parameter type does not match function signature!",
+           CS.getArgument(i), FTy->getParamType(i), I);
 
   AttributeSet Attrs = CS.getAttributes();
 
-  Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
-          "Attribute after last parameter!", I);
+  Assert(VerifyAttributeCount(Attrs, CS.arg_size()),
+         "Attribute after last parameter!", I);
 
   // Verify call attributes.
   VerifyFunctionAttrs(FTy, Attrs, I);
@@ -1594,8 +2241,8 @@ void Verifier::VerifyCallSite(CallSite CS) {
   if (CS.hasInAllocaArgument()) {
     Value *InAllocaArg = CS.getArgument(FTy->getNumParams() - 1);
     if (auto AI = dyn_cast<AllocaInst>(InAllocaArg->stripInBoundsOffsets()))
-      Assert2(AI->isUsedWithInAlloca(),
-              "inalloca argument for call has mismatched alloca", AI, I);
+      Assert(AI->isUsedWithInAlloca(),
+             "inalloca argument for call has mismatched alloca", AI, I);
   }
 
   if (FTy->isVarArg()) {
@@ -1616,25 +2263,25 @@ void Verifier::VerifyCallSite(CallSite CS) {
       VerifyParameterAttrs(Attrs, Idx, Ty, false, I);
 
       if (Attrs.hasAttribute(Idx, Attribute::Nest)) {
-        Assert1(!SawNest, "More than one parameter has attribute nest!", I);
+        Assert(!SawNest, "More than one parameter has attribute nest!", I);
         SawNest = true;
       }
 
       if (Attrs.hasAttribute(Idx, Attribute::Returned)) {
-        Assert1(!SawReturned, "More than one parameter has attribute returned!",
-                I);
-        Assert1(Ty->canLosslesslyBitCastTo(FTy->getReturnType()),
-                "Incompatible argument and return types for 'returned' "
-                "attribute", I);
+        Assert(!SawReturned, "More than one parameter has attribute returned!",
+               I);
+        Assert(Ty->canLosslesslyBitCastTo(FTy->getReturnType()),
+               "Incompatible argument and return types for 'returned' "
+               "attribute",
+               I);
         SawReturned = true;
       }
 
-      Assert1(!Attrs.hasAttribute(Idx, Attribute::StructRet),
-              "Attribute 'sret' cannot be used for vararg call arguments!", I);
+      Assert(!Attrs.hasAttribute(Idx, Attribute::StructRet),
+             "Attribute 'sret' cannot be used for vararg call arguments!", I);
 
       if (Attrs.hasAttribute(Idx, Attribute::InAlloca))
-        Assert1(Idx == CS.arg_size(), "inalloca isn't on the last argument!",
-                I);
+        Assert(Idx == CS.arg_size(), "inalloca isn't on the last argument!", I);
     }
   }
 
@@ -1643,8 +2290,8 @@ void Verifier::VerifyCallSite(CallSite CS) {
       !CS.getCalledFunction()->getName().startswith("llvm.")) {
     for (FunctionType::param_iterator PI = FTy->param_begin(),
            PE = FTy->param_end(); PI != PE; ++PI)
-      Assert1(!(*PI)->isMetadataTy(),
-              "Function has metadata parameter but isn't an intrinsic", I);
+      Assert(!(*PI)->isMetadataTy(),
+             "Function has metadata parameter but isn't an intrinsic", I);
   }
 
   visitInstruction(*I);
@@ -1677,33 +2324,29 @@ static AttrBuilder getParameterABIAttributes(int I, AttributeSet Attrs) {
 }
 
 void Verifier::verifyMustTailCall(CallInst &CI) {
-  Assert1(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI);
+  Assert(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI);
 
   // - The caller and callee prototypes must match.  Pointer types of
   //   parameters or return types may differ in pointee type, but not
   //   address space.
   Function *F = CI.getParent()->getParent();
-  auto GetFnTy = [](Value *V) {
-    return cast<FunctionType>(
-        cast<PointerType>(V->getType())->getElementType());
-  };
-  FunctionType *CallerTy = GetFnTy(F);
-  FunctionType *CalleeTy = GetFnTy(CI.getCalledValue());
-  Assert1(CallerTy->getNumParams() == CalleeTy->getNumParams(),
-          "cannot guarantee tail call due to mismatched parameter counts", &CI);
-  Assert1(CallerTy->isVarArg() == CalleeTy->isVarArg(),
-          "cannot guarantee tail call due to mismatched varargs", &CI);
-  Assert1(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()),
-          "cannot guarantee tail call due to mismatched return types", &CI);
+  FunctionType *CallerTy = F->getFunctionType();
+  FunctionType *CalleeTy = CI.getFunctionType();
+  Assert(CallerTy->getNumParams() == CalleeTy->getNumParams(),
+         "cannot guarantee tail call due to mismatched parameter counts", &CI);
+  Assert(CallerTy->isVarArg() == CalleeTy->isVarArg(),
+         "cannot guarantee tail call due to mismatched varargs", &CI);
+  Assert(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()),
+         "cannot guarantee tail call due to mismatched return types", &CI);
   for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
-    Assert1(
+    Assert(
         isTypeCongruent(CallerTy->getParamType(I), CalleeTy->getParamType(I)),
         "cannot guarantee tail call due to mismatched parameter types", &CI);
   }
 
   // - The calling conventions of the caller and callee must match.
-  Assert1(F->getCallingConv() == CI.getCallingConv(),
-          "cannot guarantee tail call due to mismatched calling conv", &CI);
+  Assert(F->getCallingConv() == CI.getCallingConv(),
+         "cannot guarantee tail call due to mismatched calling conv", &CI);
 
   // - All ABI-impacting function attributes, such as sret, byval, inreg,
   //   returned, and inalloca, must match.
@@ -1712,9 +2355,10 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
   for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
     AttrBuilder CallerABIAttrs = getParameterABIAttributes(I, CallerAttrs);
     AttrBuilder CalleeABIAttrs = getParameterABIAttributes(I, CalleeAttrs);
-    Assert2(CallerABIAttrs == CalleeABIAttrs,
-            "cannot guarantee tail call due to mismatched ABI impacting "
-            "function attributes", &CI, CI.getOperand(I));
+    Assert(CallerABIAttrs == CalleeABIAttrs,
+           "cannot guarantee tail call due to mismatched ABI impacting "
+           "function attributes",
+           &CI, CI.getOperand(I));
   }
 
   // - The call must immediately precede a :ref:`ret <i_ret>` instruction,
@@ -1726,18 +2370,18 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
 
   // Handle the optional bitcast.
   if (BitCastInst *BI = dyn_cast_or_null<BitCastInst>(Next)) {
-    Assert1(BI->getOperand(0) == RetVal,
-            "bitcast following musttail call must use the call", BI);
+    Assert(BI->getOperand(0) == RetVal,
+           "bitcast following musttail call must use the call", BI);
     RetVal = BI;
     Next = BI->getNextNode();
   }
 
   // Check the return.
   ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
-  Assert1(Ret, "musttail call must be precede a ret with an optional bitcast",
-          &CI);
-  Assert1(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal,
-          "musttail call result must be returned", Ret);
+  Assert(Ret, "musttail call must be precede a ret with an optional bitcast",
+         &CI);
+  Assert(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal,
+         "musttail call result must be returned", Ret);
 }
 
 void Verifier::visitCallInst(CallInst &CI) {
@@ -1747,7 +2391,7 @@ void Verifier::visitCallInst(CallInst &CI) {
     verifyMustTailCall(CI);
 
   if (Function *F = CI.getCalledFunction())
-    if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+    if (Intrinsic::ID ID = F->getIntrinsicID())
       visitIntrinsicFunctionCall(ID, CI);
 }
 
@@ -1756,8 +2400,15 @@ void Verifier::visitInvokeInst(InvokeInst &II) {
 
   // Verify that there is a landingpad instruction as the first non-PHI
   // instruction of the 'unwind' destination.
-  Assert1(II.getUnwindDest()->isLandingPad(),
-          "The unwind destination does not have a landingpad instruction!",&II);
+  Assert(II.getUnwindDest()->isLandingPad(),
+         "The unwind destination does not have a landingpad instruction!", &II);
+
+  if (Function *F = II.getCalledFunction())
+    // TODO: Ideally we should use visitIntrinsicFunction here. But it uses
+    //       CallInst as an input parameter. It not woth updating this whole
+    //       function only to support statepoint verification.
+    if (F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint)
+      VerifyStatepoint(ImmutableCallSite(&II));
 
   visitTerminatorInst(II);
 }
@@ -1766,8 +2417,8 @@ void Verifier::visitInvokeInst(InvokeInst &II) {
 /// of the same type!
 ///
 void Verifier::visitBinaryOperator(BinaryOperator &B) {
-  Assert1(B.getOperand(0)->getType() == B.getOperand(1)->getType(),
-          "Both operands to a binary operator are not of the same type!", &B);
+  Assert(B.getOperand(0)->getType() == B.getOperand(1)->getType(),
+         "Both operands to a binary operator are not of the same type!", &B);
 
   switch (B.getOpcode()) {
   // Check that integer arithmetic operators are only used with
@@ -1779,11 +2430,12 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
   case Instruction::UDiv:
   case Instruction::SRem:
   case Instruction::URem:
-    Assert1(B.getType()->isIntOrIntVectorTy(),
-            "Integer arithmetic operators only work with integral types!", &B);
-    Assert1(B.getType() == B.getOperand(0)->getType(),
-            "Integer arithmetic operators must have same type "
-            "for operands and result!", &B);
+    Assert(B.getType()->isIntOrIntVectorTy(),
+           "Integer arithmetic operators only work with integral types!", &B);
+    Assert(B.getType() == B.getOperand(0)->getType(),
+           "Integer arithmetic operators must have same type "
+           "for operands and result!",
+           &B);
     break;
   // Check that floating-point arithmetic operators are only used with
   // floating-point operands.
@@ -1792,30 +2444,32 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
   case Instruction::FMul:
   case Instruction::FDiv:
   case Instruction::FRem:
-    Assert1(B.getType()->isFPOrFPVectorTy(),
-            "Floating-point arithmetic operators only work with "
-            "floating-point types!", &B);
-    Assert1(B.getType() == B.getOperand(0)->getType(),
-            "Floating-point arithmetic operators must have same type "
-            "for operands and result!", &B);
+    Assert(B.getType()->isFPOrFPVectorTy(),
+           "Floating-point arithmetic operators only work with "
+           "floating-point types!",
+           &B);
+    Assert(B.getType() == B.getOperand(0)->getType(),
+           "Floating-point arithmetic operators must have same type "
+           "for operands and result!",
+           &B);
     break;
   // Check that logical operators are only used with integral operands.
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
-    Assert1(B.getType()->isIntOrIntVectorTy(),
-            "Logical operators only work with integral types!", &B);
-    Assert1(B.getType() == B.getOperand(0)->getType(),
-            "Logical operators must have same type for operands and result!",
-            &B);
+    Assert(B.getType()->isIntOrIntVectorTy(),
+           "Logical operators only work with integral types!", &B);
+    Assert(B.getType() == B.getOperand(0)->getType(),
+           "Logical operators must have same type for operands and result!",
+           &B);
     break;
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
-    Assert1(B.getType()->isIntOrIntVectorTy(),
-            "Shifts only work with integral types!", &B);
-    Assert1(B.getType() == B.getOperand(0)->getType(),
-            "Shift return type must be same as operands!", &B);
+    Assert(B.getType()->isIntOrIntVectorTy(),
+           "Shifts only work with integral types!", &B);
+    Assert(B.getType() == B.getOperand(0)->getType(),
+           "Shift return type must be same as operands!", &B);
     break;
   default:
     llvm_unreachable("Unknown BinaryOperator opcode!");
@@ -1828,15 +2482,15 @@ void Verifier::visitICmpInst(ICmpInst &IC) {
   // Check that the operands are the same type
   Type *Op0Ty = IC.getOperand(0)->getType();
   Type *Op1Ty = IC.getOperand(1)->getType();
-  Assert1(Op0Ty == Op1Ty,
-          "Both operands to ICmp instruction are not of the same type!", &IC);
+  Assert(Op0Ty == Op1Ty,
+         "Both operands to ICmp instruction are not of the same type!", &IC);
   // Check that the operands are the right type
-  Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(),
-          "Invalid operand types for ICmp instruction", &IC);
+  Assert(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(),
+         "Invalid operand types for ICmp instruction", &IC);
   // Check that the predicate is valid.
-  Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
-          IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE,
-          "Invalid predicate in ICmp instruction!", &IC);
+  Assert(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
+             IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE,
+         "Invalid predicate in ICmp instruction!", &IC);
 
   visitInstruction(IC);
 }
@@ -1845,72 +2499,70 @@ void Verifier::visitFCmpInst(FCmpInst &FC) {
   // Check that the operands are the same type
   Type *Op0Ty = FC.getOperand(0)->getType();
   Type *Op1Ty = FC.getOperand(1)->getType();
-  Assert1(Op0Ty == Op1Ty,
-          "Both operands to FCmp instruction are not of the same type!", &FC);
+  Assert(Op0Ty == Op1Ty,
+         "Both operands to FCmp instruction are not of the same type!", &FC);
   // Check that the operands are the right type
-  Assert1(Op0Ty->isFPOrFPVectorTy(),
-          "Invalid operand types for FCmp instruction", &FC);
+  Assert(Op0Ty->isFPOrFPVectorTy(),
+         "Invalid operand types for FCmp instruction", &FC);
   // Check that the predicate is valid.
-  Assert1(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE &&
-          FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE,
-          "Invalid predicate in FCmp instruction!", &FC);
+  Assert(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE &&
+             FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE,
+         "Invalid predicate in FCmp instruction!", &FC);
 
   visitInstruction(FC);
 }
 
 void Verifier::visitExtractElementInst(ExtractElementInst &EI) {
-  Assert1(ExtractElementInst::isValidOperands(EI.getOperand(0),
-                                              EI.getOperand(1)),
-          "Invalid extractelement operands!", &EI);
+  Assert(
+      ExtractElementInst::isValidOperands(EI.getOperand(0), EI.getOperand(1)),
+      "Invalid extractelement operands!", &EI);
   visitInstruction(EI);
 }
 
 void Verifier::visitInsertElementInst(InsertElementInst &IE) {
-  Assert1(InsertElementInst::isValidOperands(IE.getOperand(0),
-                                             IE.getOperand(1),
-                                             IE.getOperand(2)),
-          "Invalid insertelement operands!", &IE);
+  Assert(InsertElementInst::isValidOperands(IE.getOperand(0), IE.getOperand(1),
+                                            IE.getOperand(2)),
+         "Invalid insertelement operands!", &IE);
   visitInstruction(IE);
 }
 
 void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
-  Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
-                                             SV.getOperand(2)),
-          "Invalid shufflevector operands!", &SV);
+  Assert(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
+                                            SV.getOperand(2)),
+         "Invalid shufflevector operands!", &SV);
   visitInstruction(SV);
 }
 
 void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   Type *TargetTy = GEP.getPointerOperandType()->getScalarType();
 
-  Assert1(isa<PointerType>(TargetTy),
-    "GEP base pointer is not a vector or a vector of pointers", &GEP);
-  Assert1(cast<PointerType>(TargetTy)->getElementType()->isSized(),
-          "GEP into unsized type!", &GEP);
-  Assert1(GEP.getPointerOperandType()->isVectorTy() ==
-          GEP.getType()->isVectorTy(), "Vector GEP must return a vector value",
-          &GEP);
+  Assert(isa<PointerType>(TargetTy),
+         "GEP base pointer is not a vector or a vector of pointers", &GEP);
+  Assert(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP);
+  Assert(GEP.getPointerOperandType()->isVectorTy() ==
+             GEP.getType()->isVectorTy(),
+         "Vector GEP must return a vector value", &GEP);
 
   SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
   Type *ElTy =
-    GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs);
-  Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
+      GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs);
+  Assert(ElTy, "Invalid indices for GEP pointer type!", &GEP);
 
-  Assert2(GEP.getType()->getScalarType()->isPointerTy() &&
-          cast<PointerType>(GEP.getType()->getScalarType())->getElementType()
-          == ElTy, "GEP is not of right type for indices!", &GEP, ElTy);
+  Assert(GEP.getType()->getScalarType()->isPointerTy() &&
+             GEP.getResultElementType() == ElTy,
+         "GEP is not of right type for indices!", &GEP, ElTy);
 
   if (GEP.getPointerOperandType()->isVectorTy()) {
     // Additional checks for vector GEPs.
     unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements();
-    Assert1(GepWidth == GEP.getType()->getVectorNumElements(),
-            "Vector GEP result width doesn't match operand's", &GEP);
+    Assert(GepWidth == GEP.getType()->getVectorNumElements(),
+           "Vector GEP result width doesn't match operand's", &GEP);
     for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
       Type *IndexTy = Idxs[i]->getType();
-      Assert1(IndexTy->isVectorTy(),
-              "Vector GEP must have vector indices!", &GEP);
+      Assert(IndexTy->isVectorTy(), "Vector GEP must have vector indices!",
+             &GEP);
       unsigned IndexWidth = IndexTy->getVectorNumElements();
-      Assert1(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+      Assert(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
     }
   }
   visitInstruction(GEP);
@@ -1927,34 +2579,33 @@ void Verifier::visitRangeMetadata(Instruction& I,
          "precondition violation");
 
   unsigned NumOperands = Range->getNumOperands();
-  Assert1(NumOperands % 2 == 0, "Unfinished range!", Range);
+  Assert(NumOperands % 2 == 0, "Unfinished range!", Range);
   unsigned NumRanges = NumOperands / 2;
-  Assert1(NumRanges >= 1, "It should have at least one range!", Range);
-  
+  Assert(NumRanges >= 1, "It should have at least one range!", Range);
+
   ConstantRange LastRange(1); // Dummy initial value
   for (unsigned i = 0; i < NumRanges; ++i) {
     ConstantInt *Low =
         mdconst::dyn_extract<ConstantInt>(Range->getOperand(2 * i));
-    Assert1(Low, "The lower limit must be an integer!", Low);
+    Assert(Low, "The lower limit must be an integer!", Low);
     ConstantInt *High =
         mdconst::dyn_extract<ConstantInt>(Range->getOperand(2 * i + 1));
-    Assert1(High, "The upper limit must be an integer!", High);
-    Assert1(High->getType() == Low->getType() &&
-            High->getType() == Ty, "Range types must match instruction type!",
-            &I);
-    
+    Assert(High, "The upper limit must be an integer!", High);
+    Assert(High->getType() == Low->getType() && High->getType() == Ty,
+           "Range types must match instruction type!", &I);
+
     APInt HighV = High->getValue();
     APInt LowV = Low->getValue();
     ConstantRange CurRange(LowV, HighV);
-    Assert1(!CurRange.isEmptySet() && !CurRange.isFullSet(),
-            "Range must not be empty!", Range);
+    Assert(!CurRange.isEmptySet() && !CurRange.isFullSet(),
+           "Range must not be empty!", Range);
     if (i != 0) {
-      Assert1(CurRange.intersectWith(LastRange).isEmptySet(),
-              "Intervals are overlapping", Range);
-      Assert1(LowV.sgt(LastRange.getLower()), "Intervals are not in order",
-              Range);
-      Assert1(!isContiguous(CurRange, LastRange), "Intervals are contiguous",
-              Range);
+      Assert(CurRange.intersectWith(LastRange).isEmptySet(),
+             "Intervals are overlapping", Range);
+      Assert(LowV.sgt(LastRange.getLower()), "Intervals are not in order",
+             Range);
+      Assert(!isContiguous(CurRange, LastRange), "Intervals are contiguous",
+             Range);
     }
     LastRange = ConstantRange(LowV, HighV);
   }
@@ -1964,38 +2615,35 @@ void Verifier::visitRangeMetadata(Instruction& I,
     APInt FirstHigh =
         mdconst::dyn_extract<ConstantInt>(Range->getOperand(1))->getValue();
     ConstantRange FirstRange(FirstLow, FirstHigh);
-    Assert1(FirstRange.intersectWith(LastRange).isEmptySet(),
-            "Intervals are overlapping", Range);
-    Assert1(!isContiguous(FirstRange, LastRange), "Intervals are contiguous",
-            Range);
+    Assert(FirstRange.intersectWith(LastRange).isEmptySet(),
+           "Intervals are overlapping", Range);
+    Assert(!isContiguous(FirstRange, LastRange), "Intervals are contiguous",
+           Range);
   }
 }
 
 void Verifier::visitLoadInst(LoadInst &LI) {
   PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
-  Assert1(PTy, "Load operand must be a pointer.", &LI);
-  Type *ElTy = PTy->getElementType();
-  Assert2(ElTy == LI.getType(),
-          "Load result type does not match pointer operand type!", &LI, ElTy);
-  Assert1(LI.getAlignment() <= Value::MaximumAlignment,
-          "huge alignment values are unsupported", &LI);
+  Assert(PTy, "Load operand must be a pointer.", &LI);
+  Type *ElTy = LI.getType();
+  Assert(LI.getAlignment() <= Value::MaximumAlignment,
+         "huge alignment values are unsupported", &LI);
   if (LI.isAtomic()) {
-    Assert1(LI.getOrdering() != Release && LI.getOrdering() != AcquireRelease,
-            "Load cannot have Release ordering", &LI);
-    Assert1(LI.getAlignment() != 0,
-            "Atomic load must specify explicit alignment", &LI);
+    Assert(LI.getOrdering() != Release && LI.getOrdering() != AcquireRelease,
+           "Load cannot have Release ordering", &LI);
+    Assert(LI.getAlignment() != 0,
+           "Atomic load must specify explicit alignment", &LI);
     if (!ElTy->isPointerTy()) {
-      Assert2(ElTy->isIntegerTy(),
-              "atomic load operand must have integer type!",
-              &LI, ElTy);
+      Assert(ElTy->isIntegerTy(), "atomic load operand must have integer type!",
+             &LI, ElTy);
       unsigned Size = ElTy->getPrimitiveSizeInBits();
-      Assert2(Size >= 8 && !(Size & (Size - 1)),
-              "atomic load operand must be power-of-two byte-sized integer",
-              &LI, ElTy);
+      Assert(Size >= 8 && !(Size & (Size - 1)),
+             "atomic load operand must be power-of-two byte-sized integer", &LI,
+             ElTy);
     }
   } else {
-    Assert1(LI.getSynchScope() == CrossThread,
-            "Non-atomic load cannot have SynchronizationScope specified", &LI);
+    Assert(LI.getSynchScope() == CrossThread,
+           "Non-atomic load cannot have SynchronizationScope specified", &LI);
   }
 
   visitInstruction(LI);
@@ -2003,30 +2651,28 @@ void Verifier::visitLoadInst(LoadInst &LI) {
 
 void Verifier::visitStoreInst(StoreInst &SI) {
   PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
-  Assert1(PTy, "Store operand must be a pointer.", &SI);
+  Assert(PTy, "Store operand must be a pointer.", &SI);
   Type *ElTy = PTy->getElementType();
-  Assert2(ElTy == SI.getOperand(0)->getType(),
-          "Stored value type does not match pointer operand type!",
-          &SI, ElTy);
-  Assert1(SI.getAlignment() <= Value::MaximumAlignment,
-          "huge alignment values are unsupported", &SI);
+  Assert(ElTy == SI.getOperand(0)->getType(),
+         "Stored value type does not match pointer operand type!", &SI, ElTy);
+  Assert(SI.getAlignment() <= Value::MaximumAlignment,
+         "huge alignment values are unsupported", &SI);
   if (SI.isAtomic()) {
-    Assert1(SI.getOrdering() != Acquire && SI.getOrdering() != AcquireRelease,
-            "Store cannot have Acquire ordering", &SI);
-    Assert1(SI.getAlignment() != 0,
-            "Atomic store must specify explicit alignment", &SI);
+    Assert(SI.getOrdering() != Acquire && SI.getOrdering() != AcquireRelease,
+           "Store cannot have Acquire ordering", &SI);
+    Assert(SI.getAlignment() != 0,
+           "Atomic store must specify explicit alignment", &SI);
     if (!ElTy->isPointerTy()) {
-      Assert2(ElTy->isIntegerTy(),
-              "atomic store operand must have integer type!",
-              &SI, ElTy);
+      Assert(ElTy->isIntegerTy(),
+             "atomic store operand must have integer type!", &SI, ElTy);
       unsigned Size = ElTy->getPrimitiveSizeInBits();
-      Assert2(Size >= 8 && !(Size & (Size - 1)),
-              "atomic store operand must be power-of-two byte-sized integer",
-              &SI, ElTy);
+      Assert(Size >= 8 && !(Size & (Size - 1)),
+             "atomic store operand must be power-of-two byte-sized integer",
+             &SI, ElTy);
     }
   } else {
-    Assert1(SI.getSynchScope() == CrossThread,
-            "Non-atomic store cannot have SynchronizationScope specified", &SI);
+    Assert(SI.getSynchScope() == CrossThread,
+           "Non-atomic store cannot have SynchronizationScope specified", &SI);
   }
   visitInstruction(SI);
 }
@@ -2034,15 +2680,15 @@ void Verifier::visitStoreInst(StoreInst &SI) {
 void Verifier::visitAllocaInst(AllocaInst &AI) {
   SmallPtrSet<const Type*, 4> Visited;
   PointerType *PTy = AI.getType();
-  Assert1(PTy->getAddressSpace() == 0,
-          "Allocation instruction pointer not in the generic address space!",
-          &AI);
-  Assert1(PTy->getElementType()->isSized(&Visited), "Cannot allocate unsized type",
-          &AI);
-  Assert1(AI.getArraySize()->getType()->isIntegerTy(),
-          "Alloca array size must have integer type", &AI);
-  Assert1(AI.getAlignment() <= Value::MaximumAlignment,
-          "huge alignment values are unsupported", &AI);
+  Assert(PTy->getAddressSpace() == 0,
+         "Allocation instruction pointer not in the generic address space!",
+         &AI);
+  Assert(AI.getAllocatedType()->isSized(&Visited),
+         "Cannot allocate unsized type", &AI);
+  Assert(AI.getArraySize()->getType()->isIntegerTy(),
+         "Alloca array size must have integer type", &AI);
+  Assert(AI.getAlignment() <= Value::MaximumAlignment,
+         "huge alignment values are unsupported", &AI);
 
   visitInstruction(AI);
 }
@@ -2050,87 +2696,83 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
 void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
 
   // FIXME: more conditions???
-  Assert1(CXI.getSuccessOrdering() != NotAtomic,
-          "cmpxchg instructions must be atomic.", &CXI);
-  Assert1(CXI.getFailureOrdering() != NotAtomic,
-          "cmpxchg instructions must be atomic.", &CXI);
-  Assert1(CXI.getSuccessOrdering() != Unordered,
-          "cmpxchg instructions cannot be unordered.", &CXI);
-  Assert1(CXI.getFailureOrdering() != Unordered,
-          "cmpxchg instructions cannot be unordered.", &CXI);
-  Assert1(CXI.getSuccessOrdering() >= CXI.getFailureOrdering(),
-          "cmpxchg instructions be at least as constrained on success as fail",
-          &CXI);
-  Assert1(CXI.getFailureOrdering() != Release &&
-              CXI.getFailureOrdering() != AcquireRelease,
-          "cmpxchg failure ordering cannot include release semantics", &CXI);
+  Assert(CXI.getSuccessOrdering() != NotAtomic,
+         "cmpxchg instructions must be atomic.", &CXI);
+  Assert(CXI.getFailureOrdering() != NotAtomic,
+         "cmpxchg instructions must be atomic.", &CXI);
+  Assert(CXI.getSuccessOrdering() != Unordered,
+         "cmpxchg instructions cannot be unordered.", &CXI);
+  Assert(CXI.getFailureOrdering() != Unordered,
+         "cmpxchg instructions cannot be unordered.", &CXI);
+  Assert(CXI.getSuccessOrdering() >= CXI.getFailureOrdering(),
+         "cmpxchg instructions be at least as constrained on success as fail",
+         &CXI);
+  Assert(CXI.getFailureOrdering() != Release &&
+             CXI.getFailureOrdering() != AcquireRelease,
+         "cmpxchg failure ordering cannot include release semantics", &CXI);
 
   PointerType *PTy = dyn_cast<PointerType>(CXI.getOperand(0)->getType());
-  Assert1(PTy, "First cmpxchg operand must be a pointer.", &CXI);
+  Assert(PTy, "First cmpxchg operand must be a pointer.", &CXI);
   Type *ElTy = PTy->getElementType();
-  Assert2(ElTy->isIntegerTy(),
-          "cmpxchg operand must have integer type!",
-          &CXI, ElTy);
+  Assert(ElTy->isIntegerTy(), "cmpxchg operand must have integer type!", &CXI,
+         ElTy);
   unsigned Size = ElTy->getPrimitiveSizeInBits();
-  Assert2(Size >= 8 && !(Size & (Size - 1)),
-          "cmpxchg operand must be power-of-two byte-sized integer",
-          &CXI, ElTy);
-  Assert2(ElTy == CXI.getOperand(1)->getType(),
-          "Expected value type does not match pointer operand type!",
-          &CXI, ElTy);
-  Assert2(ElTy == CXI.getOperand(2)->getType(),
-          "Stored value type does not match pointer operand type!",
-          &CXI, ElTy);
+  Assert(Size >= 8 && !(Size & (Size - 1)),
+         "cmpxchg operand must be power-of-two byte-sized integer", &CXI, ElTy);
+  Assert(ElTy == CXI.getOperand(1)->getType(),
+         "Expected value type does not match pointer operand type!", &CXI,
+         ElTy);
+  Assert(ElTy == CXI.getOperand(2)->getType(),
+         "Stored value type does not match pointer operand type!", &CXI, ElTy);
   visitInstruction(CXI);
 }
 
 void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
-  Assert1(RMWI.getOrdering() != NotAtomic,
-          "atomicrmw instructions must be atomic.", &RMWI);
-  Assert1(RMWI.getOrdering() != Unordered,
-          "atomicrmw instructions cannot be unordered.", &RMWI);
+  Assert(RMWI.getOrdering() != NotAtomic,
+         "atomicrmw instructions must be atomic.", &RMWI);
+  Assert(RMWI.getOrdering() != Unordered,
+         "atomicrmw instructions cannot be unordered.", &RMWI);
   PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
-  Assert1(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
+  Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
   Type *ElTy = PTy->getElementType();
-  Assert2(ElTy->isIntegerTy(),
-          "atomicrmw operand must have integer type!",
-          &RMWI, ElTy);
+  Assert(ElTy->isIntegerTy(), "atomicrmw operand must have integer type!",
+         &RMWI, ElTy);
   unsigned Size = ElTy->getPrimitiveSizeInBits();
-  Assert2(Size >= 8 && !(Size & (Size - 1)),
-          "atomicrmw operand must be power-of-two byte-sized integer",
-          &RMWI, ElTy);
-  Assert2(ElTy == RMWI.getOperand(1)->getType(),
-          "Argument value type does not match pointer operand type!",
-          &RMWI, ElTy);
-  Assert1(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() &&
-          RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP,
-          "Invalid binary operation!", &RMWI);
+  Assert(Size >= 8 && !(Size & (Size - 1)),
+         "atomicrmw operand must be power-of-two byte-sized integer", &RMWI,
+         ElTy);
+  Assert(ElTy == RMWI.getOperand(1)->getType(),
+         "Argument value type does not match pointer operand type!", &RMWI,
+         ElTy);
+  Assert(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() &&
+             RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP,
+         "Invalid binary operation!", &RMWI);
   visitInstruction(RMWI);
 }
 
 void Verifier::visitFenceInst(FenceInst &FI) {
   const AtomicOrdering Ordering = FI.getOrdering();
-  Assert1(Ordering == Acquire || Ordering == Release ||
-          Ordering == AcquireRelease || Ordering == SequentiallyConsistent,
-          "fence instructions may only have "
-          "acquire, release, acq_rel, or seq_cst ordering.", &FI);
+  Assert(Ordering == Acquire || Ordering == Release ||
+             Ordering == AcquireRelease || Ordering == SequentiallyConsistent,
+         "fence instructions may only have "
+         "acquire, release, acq_rel, or seq_cst ordering.",
+         &FI);
   visitInstruction(FI);
 }
 
 void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
-  Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
-                                           EVI.getIndices()) ==
-          EVI.getType(),
-          "Invalid ExtractValueInst operands!", &EVI);
+  Assert(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
+                                          EVI.getIndices()) == EVI.getType(),
+         "Invalid ExtractValueInst operands!", &EVI);
 
   visitInstruction(EVI);
 }
 
 void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
-  Assert1(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
-                                           IVI.getIndices()) ==
-          IVI.getOperand(1)->getType(),
-          "Invalid InsertValueInst operands!", &IVI);
+  Assert(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
+                                          IVI.getIndices()) ==
+             IVI.getOperand(1)->getType(),
+         "Invalid InsertValueInst operands!", &IVI);
 
   visitInstruction(IVI);
 }
@@ -2140,43 +2782,44 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
 
   // The landingpad instruction is ill-formed if it doesn't have any clauses and
   // isn't a cleanup.
-  Assert1(LPI.getNumClauses() > 0 || LPI.isCleanup(),
-          "LandingPadInst needs at least one clause or to be a cleanup.", &LPI);
+  Assert(LPI.getNumClauses() > 0 || LPI.isCleanup(),
+         "LandingPadInst needs at least one clause or to be a cleanup.", &LPI);
 
   // The landingpad instruction defines its parent as a landing pad block. The
   // landing pad block may be branched to only by the unwind edge of an invoke.
   for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
     const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator());
-    Assert1(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
-            "Block containing LandingPadInst must be jumped to "
-            "only by the unwind edge of an invoke.", &LPI);
+    Assert(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
+           "Block containing LandingPadInst must be jumped to "
+           "only by the unwind edge of an invoke.",
+           &LPI);
   }
 
   // The landingpad instruction must be the first non-PHI instruction in the
   // block.
-  Assert1(LPI.getParent()->getLandingPadInst() == &LPI,
-          "LandingPadInst not the first non-PHI instruction in the block.",
-          &LPI);
+  Assert(LPI.getParent()->getLandingPadInst() == &LPI,
+         "LandingPadInst not the first non-PHI instruction in the block.",
+         &LPI);
 
   // The personality functions for all landingpad instructions within the same
   // function should match.
   if (PersonalityFn)
-    Assert1(LPI.getPersonalityFn() == PersonalityFn,
-            "Personality function doesn't match others in function", &LPI);
+    Assert(LPI.getPersonalityFn() == PersonalityFn,
+           "Personality function doesn't match others in function", &LPI);
   PersonalityFn = LPI.getPersonalityFn();
 
   // All operands must be constants.
-  Assert1(isa<Constant>(PersonalityFn), "Personality function is not constant!",
-          &LPI);
+  Assert(isa<Constant>(PersonalityFn), "Personality function is not constant!",
+         &LPI);
   for (unsigned i = 0, e = LPI.getNumClauses(); i < e; ++i) {
     Constant *Clause = LPI.getClause(i);
     if (LPI.isCatch(i)) {
-      Assert1(isa<PointerType>(Clause->getType()),
-              "Catch operand does not have pointer type!", &LPI);
+      Assert(isa<PointerType>(Clause->getType()),
+             "Catch operand does not have pointer type!", &LPI);
     } else {
-      Assert1(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI);
-      Assert1(isa<ConstantArray>(Clause) || isa<ConstantAggregateZero>(Clause),
-              "Filter operand is not an array of constants!", &LPI);
+      Assert(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI);
+      Assert(isa<ConstantArray>(Clause) || isa<ConstantAggregateZero>(Clause),
+             "Filter operand is not an array of constants!", &LPI);
     }
   }
 
@@ -2194,46 +2837,46 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
   }
 
   const Use &U = I.getOperandUse(i);
-  Assert2(InstsInThisBlock.count(Op) || DT.dominates(Op, U),
-          "Instruction does not dominate all uses!", Op, &I);
+  Assert(InstsInThisBlock.count(Op) || DT.dominates(Op, U),
+         "Instruction does not dominate all uses!", Op, &I);
 }
 
 /// verifyInstruction - Verify that an instruction is well formed.
 ///
 void Verifier::visitInstruction(Instruction &I) {
   BasicBlock *BB = I.getParent();
-  Assert1(BB, "Instruction not embedded in basic block!", &I);
+  Assert(BB, "Instruction not embedded in basic block!", &I);
 
   if (!isa<PHINode>(I)) {   // Check that non-phi nodes are not self referential
     for (User *U : I.users()) {
-      Assert1(U != (User*)&I || !DT.isReachableFromEntry(BB),
-              "Only PHI nodes may reference their own value!", &I);
+      Assert(U != (User *)&I || !DT.isReachableFromEntry(BB),
+             "Only PHI nodes may reference their own value!", &I);
     }
   }
 
   // Check that void typed values don't have names
-  Assert1(!I.getType()->isVoidTy() || !I.hasName(),
-          "Instruction has a name, but provides a void value!", &I);
+  Assert(!I.getType()->isVoidTy() || !I.hasName(),
+         "Instruction has a name, but provides a void value!", &I);
 
   // Check that the return value of the instruction is either void or a legal
   // value type.
-  Assert1(I.getType()->isVoidTy() ||
-          I.getType()->isFirstClassType(),
-          "Instruction returns a non-scalar type!", &I);
+  Assert(I.getType()->isVoidTy() || I.getType()->isFirstClassType(),
+         "Instruction returns a non-scalar type!", &I);
 
   // Check that the instruction doesn't produce metadata. Calls are already
   // checked against the callee type.
-  Assert1(!I.getType()->isMetadataTy() ||
-          isa<CallInst>(I) || isa<InvokeInst>(I),
-          "Invalid use of metadata!", &I);
+  Assert(!I.getType()->isMetadataTy() || isa<CallInst>(I) || isa<InvokeInst>(I),
+         "Invalid use of metadata!", &I);
 
   // Check that all uses of the instruction, if they are instructions
   // themselves, actually have parent basic blocks.  If the use is not an
   // instruction, it is an error!
   for (Use &U : I.uses()) {
     if (Instruction *Used = dyn_cast<Instruction>(U.getUser()))
-      Assert2(Used->getParent() != nullptr, "Instruction referencing"
-              " instruction not embedded in a basic block!", &I, Used);
+      Assert(Used->getParent() != nullptr,
+             "Instruction referencing"
+             " instruction not embedded in a basic block!",
+             &I, Used);
     else {
       CheckFailed("Use of instruction is not an instruction!", U);
       return;
@@ -2241,43 +2884,46 @@ void Verifier::visitInstruction(Instruction &I) {
   }
 
   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
-    Assert1(I.getOperand(i) != nullptr, "Instruction has null operand!", &I);
+    Assert(I.getOperand(i) != nullptr, "Instruction has null operand!", &I);
 
     // Check to make sure that only first-class-values are operands to
     // instructions.
     if (!I.getOperand(i)->getType()->isFirstClassType()) {
-      Assert1(0, "Instruction operands must be first-class values!", &I);
+      Assert(0, "Instruction operands must be first-class values!", &I);
     }
 
     if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
       // Check to make sure that the "address of" an intrinsic function is never
       // taken.
-      Assert1(!F->isIntrinsic() || i == (isa<CallInst>(I) ? e-1 :
-                                         isa<InvokeInst>(I) ? e-3 : 0),
-              "Cannot take the address of an intrinsic!", &I);
-      Assert1(!F->isIntrinsic() || isa<CallInst>(I) ||
+      Assert(
+          !F->isIntrinsic() ||
+              i == (isa<CallInst>(I) ? e - 1 : isa<InvokeInst>(I) ? e - 3 : 0),
+          "Cannot take the address of an intrinsic!", &I);
+      Assert(
+          !F->isIntrinsic() || isa<CallInst>(I) ||
               F->getIntrinsicID() == Intrinsic::donothing ||
               F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void ||
-              F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64,
-              "Cannot invoke an intrinsinc other than"
-              " donothing or patchpoint", &I);
-      Assert1(F->getParent() == M, "Referencing function in another module!",
-              &I);
+              F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 ||
+              F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint,
+          "Cannot invoke an intrinsinc other than"
+          " donothing or patchpoint",
+          &I);
+      Assert(F->getParent() == M, "Referencing function in another module!",
+             &I);
     } else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
-      Assert1(OpBB->getParent() == BB->getParent(),
-              "Referring to a basic block in another function!", &I);
+      Assert(OpBB->getParent() == BB->getParent(),
+             "Referring to a basic block in another function!", &I);
     } else if (Argument *OpArg = dyn_cast<Argument>(I.getOperand(i))) {
-      Assert1(OpArg->getParent() == BB->getParent(),
-              "Referring to an argument in another function!", &I);
+      Assert(OpArg->getParent() == BB->getParent(),
+             "Referring to an argument in another function!", &I);
     } else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
-      Assert1(GV->getParent() == M, "Referencing global in another module!",
-              &I);
+      Assert(GV->getParent() == M, "Referencing global in another module!", &I);
     } else if (isa<Instruction>(I.getOperand(i))) {
       verifyDominatesUse(I, i);
     } else if (isa<InlineAsm>(I.getOperand(i))) {
-      Assert1((i + 1 == e && isa<CallInst>(I)) ||
-              (i + 3 == e && isa<InvokeInst>(I)),
-              "Cannot take the address of an inline asm!", &I);
+      Assert((i + 1 == e && isa<CallInst>(I)) ||
+                 (i + 3 == e && isa<InvokeInst>(I)),
+             "Cannot take the address of an inline asm!", &I);
     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(I.getOperand(i))) {
       if (CE->getType()->isPtrOrPtrVectorTy()) {
         // If we have a ConstantExpr pointer, we need to see if it came from an
@@ -2303,31 +2949,37 @@ void Verifier::visitInstruction(Instruction &I) {
   }
 
   if (MDNode *MD = I.getMetadata(LLVMContext::MD_fpmath)) {
-    Assert1(I.getType()->isFPOrFPVectorTy(),
-            "fpmath requires a floating point result!", &I);
-    Assert1(MD->getNumOperands() == 1, "fpmath takes one operand!", &I);
+    Assert(I.getType()->isFPOrFPVectorTy(),
+           "fpmath requires a floating point result!", &I);
+    Assert(MD->getNumOperands() == 1, "fpmath takes one operand!", &I);
     if (ConstantFP *CFP0 =
             mdconst::dyn_extract_or_null<ConstantFP>(MD->getOperand(0))) {
       APFloat Accuracy = CFP0->getValueAPF();
-      Assert1(Accuracy.isFiniteNonZero() && !Accuracy.isNegative(),
-              "fpmath accuracy not a positive number!", &I);
+      Assert(Accuracy.isFiniteNonZero() && !Accuracy.isNegative(),
+             "fpmath accuracy not a positive number!", &I);
     } else {
-      Assert1(false, "invalid fpmath accuracy!", &I);
+      Assert(false, "invalid fpmath accuracy!", &I);
     }
   }
 
   if (MDNode *Range = I.getMetadata(LLVMContext::MD_range)) {
-    Assert1(isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I),
-            "Ranges are only for loads, calls and invokes!", &I);
+    Assert(isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I),
+           "Ranges are only for loads, calls and invokes!", &I);
     visitRangeMetadata(I, Range, I.getType());
   }
 
   if (I.getMetadata(LLVMContext::MD_nonnull)) {
-    Assert1(I.getType()->isPointerTy(),
-            "nonnull applies only to pointer types", &I);
-    Assert1(isa<LoadInst>(I),
-            "nonnull applies only to load instructions, use attributes"
-            " for calls or invokes", &I);
+    Assert(I.getType()->isPointerTy(), "nonnull applies only to pointer types",
+           &I);
+    Assert(isa<LoadInst>(I),
+           "nonnull applies only to load instructions, use attributes"
+           " for calls or invokes",
+           &I);
+  }
+
+  if (MDNode *N = I.getDebugLoc().getAsMDNode()) {
+    Assert(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N);
+    visitMDNode(*N);
   }
 
   InstsInThisBlock.insert(&I);
@@ -2391,6 +3043,7 @@ bool Verifier::VerifyIntrinsicType(Type *Ty,
     ArgTys.push_back(Ty);
 
     switch (D.getArgumentKind()) {
+    case IITDescriptor::AK_Any:        return false; // Success
     case IITDescriptor::AK_AnyInteger: return !Ty->isIntOrIntVectorTy();
     case IITDescriptor::AK_AnyFloat:   return !Ty->isFPOrFPVectorTy();
     case IITDescriptor::AK_AnyVector:  return !isa<VectorType>(Ty);
@@ -2454,6 +3107,23 @@ bool Verifier::VerifyIntrinsicType(Type *Ty,
     PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
     return (!ThisArgType || ThisArgType->getElementType() != ReferenceType);
   }
+  case IITDescriptor::VecOfPtrsToElt: {
+    if (D.getArgumentNumber() >= ArgTys.size())
+      return true;
+    VectorType * ReferenceType =
+      dyn_cast<VectorType> (ArgTys[D.getArgumentNumber()]);
+    VectorType *ThisArgVecTy = dyn_cast<VectorType>(Ty);
+    if (!ThisArgVecTy || !ReferenceType || 
+        (ReferenceType->getVectorNumElements() !=
+         ThisArgVecTy->getVectorNumElements()))
+      return true;
+    PointerType *ThisArgEltTy =
+      dyn_cast<PointerType>(ThisArgVecTy->getVectorElementType());
+    if (!ThisArgEltTy)
+      return true;
+    return ThisArgEltTy->getElementType() !=
+           ReferenceType->getVectorElementType();
+  }
   }
   llvm_unreachable("unhandled");
 }
@@ -2470,7 +3140,7 @@ Verifier::VerifyIntrinsicIsVarArg(bool isVarArg,
 
   // If there are no descriptors left, then it can't be a vararg.
   if (Infos.empty())
-    return isVarArg ? true : false;
+    return isVarArg;
 
   // There should be only one descriptor remaining at this point.
   if (Infos.size() != 1)
@@ -2480,7 +3150,7 @@ Verifier::VerifyIntrinsicIsVarArg(bool isVarArg,
   IITDescriptor D = Infos.front();
   Infos = Infos.slice(1);
   if (D.Kind == IITDescriptor::VarArg)
-    return isVarArg ? false : true;
+    return !isVarArg;
 
   return true;
 }
@@ -2489,8 +3159,8 @@ Verifier::VerifyIntrinsicIsVarArg(bool isVarArg,
 ///
 void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   Function *IF = CI.getCalledFunction();
-  Assert1(IF->isDeclaration(), "Intrinsic functions should never be defined!",
-          IF);
+  Assert(IF->isDeclaration(), "Intrinsic functions should never be defined!",
+         IF);
 
   // Verify that the intrinsic prototype lines up with what the .td files
   // describe.
@@ -2502,31 +3172,33 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
 
   SmallVector<Type *, 4> ArgTys;
-  Assert1(!VerifyIntrinsicType(IFTy->getReturnType(), TableRef, ArgTys),
-          "Intrinsic has incorrect return type!", IF);
+  Assert(!VerifyIntrinsicType(IFTy->getReturnType(), TableRef, ArgTys),
+         "Intrinsic has incorrect return type!", IF);
   for (unsigned i = 0, e = IFTy->getNumParams(); i != e; ++i)
-    Assert1(!VerifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys),
-            "Intrinsic has incorrect argument type!", IF);
+    Assert(!VerifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys),
+           "Intrinsic has incorrect argument type!", IF);
 
   // Verify if the intrinsic call matches the vararg property.
   if (IsVarArg)
-    Assert1(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef),
-            "Intrinsic was not defined with variable arguments!", IF);
+    Assert(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef),
+           "Intrinsic was not defined with variable arguments!", IF);
   else
-    Assert1(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef),
-            "Callsite was not defined with variable arguments!", IF);
+    Assert(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef),
+           "Callsite was not defined with variable arguments!", IF);
 
   // All descriptors should be absorbed by now.
-  Assert1(TableRef.empty(), "Intrinsic has too few arguments!", IF);
+  Assert(TableRef.empty(), "Intrinsic has too few arguments!", IF);
 
   // Now that we have the intrinsic ID and the actual argument types (and we
   // know they are legal for the intrinsic!) get the intrinsic name through the
   // usual means.  This allows us to verify the mangling of argument types into
   // the name.
   const std::string ExpectedName = Intrinsic::getName(ID, ArgTys);
-  Assert1(ExpectedName == IF->getName(),
-          "Intrinsic name not mangled correctly for type arguments! "
-          "Should be: " + ExpectedName, IF);
+  Assert(ExpectedName == IF->getName(),
+         "Intrinsic name not mangled correctly for type arguments! "
+         "Should be: " +
+             ExpectedName,
+         IF);
 
   // If the intrinsic takes MDNode arguments, verify that they are either global
   // or are local to *this* function.
@@ -2539,309 +3211,432 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
     break;
   case Intrinsic::ctlz:  // llvm.ctlz
   case Intrinsic::cttz:  // llvm.cttz
-    Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
-            "is_zero_undef argument of bit counting intrinsics must be a "
-            "constant int", &CI);
+    Assert(isa<ConstantInt>(CI.getArgOperand(1)),
+           "is_zero_undef argument of bit counting intrinsics must be a "
+           "constant int",
+           &CI);
+    break;
+  case Intrinsic::dbg_declare: // llvm.dbg.declare
+    Assert(isa<MetadataAsValue>(CI.getArgOperand(0)),
+           "invalid llvm.dbg.declare intrinsic call 1", &CI);
+    visitDbgIntrinsic("declare", cast<DbgDeclareInst>(CI));
+    break;
+  case Intrinsic::dbg_value: // llvm.dbg.value
+    visitDbgIntrinsic("value", cast<DbgValueInst>(CI));
     break;
-  case Intrinsic::dbg_declare: {  // llvm.dbg.declare
-    Assert1(CI.getArgOperand(0) && isa<MetadataAsValue>(CI.getArgOperand(0)),
-            "invalid llvm.dbg.declare intrinsic call 1", &CI);
-  } break;
   case Intrinsic::memcpy:
   case Intrinsic::memmove:
-  case Intrinsic::memset:
-    Assert1(isa<ConstantInt>(CI.getArgOperand(3)),
-            "alignment argument of memory intrinsics must be a constant int",
-            &CI);
-    Assert1(isa<ConstantInt>(CI.getArgOperand(4)),
-            "isvolatile argument of memory intrinsics must be a constant int",
-            &CI);
+  case Intrinsic::memset: {
+    ConstantInt *AlignCI = dyn_cast<ConstantInt>(CI.getArgOperand(3));
+    Assert(AlignCI,
+           "alignment argument of memory intrinsics must be a constant int",
+           &CI);
+    const APInt &AlignVal = AlignCI->getValue();
+    Assert(AlignCI->isZero() || AlignVal.isPowerOf2(),
+           "alignment argument of memory intrinsics must be a power of 2", &CI);
+    Assert(isa<ConstantInt>(CI.getArgOperand(4)),
+           "isvolatile argument of memory intrinsics must be a constant int",
+           &CI);
     break;
+  }
   case Intrinsic::gcroot:
   case Intrinsic::gcwrite:
   case Intrinsic::gcread:
     if (ID == Intrinsic::gcroot) {
       AllocaInst *AI =
         dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
-      Assert1(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI);
-      Assert1(isa<Constant>(CI.getArgOperand(1)),
-              "llvm.gcroot parameter #2 must be a constant.", &CI);
-      if (!AI->getType()->getElementType()->isPointerTy()) {
-        Assert1(!isa<ConstantPointerNull>(CI.getArgOperand(1)),
-                "llvm.gcroot parameter #1 must either be a pointer alloca, "
-                "or argument #2 must be a non-null constant.", &CI);
+      Assert(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI);
+      Assert(isa<Constant>(CI.getArgOperand(1)),
+             "llvm.gcroot parameter #2 must be a constant.", &CI);
+      if (!AI->getAllocatedType()->isPointerTy()) {
+        Assert(!isa<ConstantPointerNull>(CI.getArgOperand(1)),
+               "llvm.gcroot parameter #1 must either be a pointer alloca, "
+               "or argument #2 must be a non-null constant.",
+               &CI);
       }
     }
 
-    Assert1(CI.getParent()->getParent()->hasGC(),
-            "Enclosing function does not use GC.", &CI);
+    Assert(CI.getParent()->getParent()->hasGC(),
+           "Enclosing function does not use GC.", &CI);
     break;
   case Intrinsic::init_trampoline:
-    Assert1(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
-            "llvm.init_trampoline parameter #2 must resolve to a function.",
-            &CI);
+    Assert(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
+           "llvm.init_trampoline parameter #2 must resolve to a function.",
+           &CI);
     break;
   case Intrinsic::prefetch:
-    Assert1(isa<ConstantInt>(CI.getArgOperand(1)) &&
-            isa<ConstantInt>(CI.getArgOperand(2)) &&
-            cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
-            cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
-            "invalid arguments to llvm.prefetch",
-            &CI);
+    Assert(isa<ConstantInt>(CI.getArgOperand(1)) &&
+               isa<ConstantInt>(CI.getArgOperand(2)) &&
+               cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
+               cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
+           "invalid arguments to llvm.prefetch", &CI);
     break;
   case Intrinsic::stackprotector:
-    Assert1(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
-            "llvm.stackprotector parameter #2 must resolve to an alloca.",
-            &CI);
+    Assert(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
+           "llvm.stackprotector parameter #2 must resolve to an alloca.", &CI);
     break;
   case Intrinsic::lifetime_start:
   case Intrinsic::lifetime_end:
   case Intrinsic::invariant_start:
-    Assert1(isa<ConstantInt>(CI.getArgOperand(0)),
-            "size argument of memory use markers must be a constant integer",
-            &CI);
+    Assert(isa<ConstantInt>(CI.getArgOperand(0)),
+           "size argument of memory use markers must be a constant integer",
+           &CI);
     break;
   case Intrinsic::invariant_end:
-    Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
-            "llvm.invariant.end parameter #2 must be a constant integer", &CI);
+    Assert(isa<ConstantInt>(CI.getArgOperand(1)),
+           "llvm.invariant.end parameter #2 must be a constant integer", &CI);
     break;
 
-  case Intrinsic::frameallocate: {
+  case Intrinsic::frameescape: {
     BasicBlock *BB = CI.getParent();
-    Assert1(BB == &BB->getParent()->front(),
-            "llvm.frameallocate used outside of entry block", &CI);
-    Assert1(!SawFrameAllocate,
-            "multiple calls to llvm.frameallocate in one function", &CI);
-    SawFrameAllocate = true;
-    Assert1(isa<ConstantInt>(CI.getArgOperand(0)),
-            "llvm.frameallocate argument must be constant integer size", &CI);
+    Assert(BB == &BB->getParent()->front(),
+           "llvm.frameescape used outside of entry block", &CI);
+    Assert(!SawFrameEscape,
+           "multiple calls to llvm.frameescape in one function", &CI);
+    for (Value *Arg : CI.arg_operands()) {
+      if (isa<ConstantPointerNull>(Arg))
+        continue; // Null values are allowed as placeholders.
+      auto *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
+      Assert(AI && AI->isStaticAlloca(),
+             "llvm.frameescape only accepts static allocas", &CI);
+    }
+    FrameEscapeInfo[BB->getParent()].first = CI.getNumArgOperands();
+    SawFrameEscape = true;
     break;
   }
   case Intrinsic::framerecover: {
     Value *FnArg = CI.getArgOperand(0)->stripPointerCasts();
     Function *Fn = dyn_cast<Function>(FnArg);
-    Assert1(Fn && !Fn->isDeclaration(), "llvm.framerecover first "
-            "argument must be function defined in this module", &CI);
+    Assert(Fn && !Fn->isDeclaration(),
+           "llvm.framerecover first "
+           "argument must be function defined in this module",
+           &CI);
+    auto *IdxArg = dyn_cast<ConstantInt>(CI.getArgOperand(2));
+    Assert(IdxArg, "idx argument of llvm.framerecover must be a constant int",
+           &CI);
+    auto &Entry = FrameEscapeInfo[Fn];
+    Entry.second = unsigned(
+        std::max(uint64_t(Entry.second), IdxArg->getLimitedValue(~0U) + 1));
     break;
   }
 
-  case Intrinsic::experimental_gc_statepoint: {
-    Assert1(!CI.doesNotAccessMemory() &&
-            !CI.onlyReadsMemory(),
-            "gc.statepoint must read and write memory to preserve "
-            "reordering restrictions required by safepoint semantics", &CI);
-    Assert1(!CI.isInlineAsm(),
-            "gc.statepoint support for inline assembly unimplemented", &CI);
-    
-    const Value *Target = CI.getArgOperand(0);
-    const PointerType *PT = dyn_cast<PointerType>(Target->getType());
-    Assert2(PT && PT->getElementType()->isFunctionTy(),
-            "gc.statepoint callee must be of function pointer type",
-            &CI, Target);
-    FunctionType *TargetFuncType = cast<FunctionType>(PT->getElementType());
-    Assert1(!TargetFuncType->isVarArg(),
-            "gc.statepoint support for var arg functions not implemented", &CI);
-
-    const Value *NumCallArgsV = CI.getArgOperand(1);
-    Assert1(isa<ConstantInt>(NumCallArgsV),
-            "gc.statepoint number of arguments to underlying call "
-            "must be constant integer", &CI);
-    const int NumCallArgs = cast<ConstantInt>(NumCallArgsV)->getZExtValue();
-    Assert1(NumCallArgs >= 0,
-            "gc.statepoint number of arguments to underlying call "
-            "must be positive", &CI);
-    Assert1(NumCallArgs == (int)TargetFuncType->getNumParams(),
-            "gc.statepoint mismatch in number of call args", &CI);
-
-    const Value *Unused = CI.getArgOperand(2);
-    Assert1(isa<ConstantInt>(Unused) &&
-            cast<ConstantInt>(Unused)->isNullValue(),
-            "gc.statepoint parameter #3 must be zero", &CI);
-
-    // Verify that the types of the call parameter arguments match
-    // the type of the wrapped callee.
-    for (int i = 0; i < NumCallArgs; i++) {
-      Type *ParamType = TargetFuncType->getParamType(i);
-      Type *ArgType = CI.getArgOperand(3+i)->getType();
-      Assert1(ArgType == ParamType,
-              "gc.statepoint call argument does not match wrapped "
-              "function type", &CI);
-    }
-    const int EndCallArgsInx = 2+NumCallArgs;
-    const Value *NumDeoptArgsV = CI.getArgOperand(EndCallArgsInx+1);
-    Assert1(isa<ConstantInt>(NumDeoptArgsV),
-            "gc.statepoint number of deoptimization arguments "
-            "must be constant integer", &CI);
-    const int NumDeoptArgs = cast<ConstantInt>(NumDeoptArgsV)->getZExtValue();
-    Assert1(NumDeoptArgs >= 0,
-            "gc.statepoint number of deoptimization arguments "
-            "must be positive", &CI);
-
-    Assert1(4 + NumCallArgs + NumDeoptArgs <= (int)CI.getNumArgOperands(),
-            "gc.statepoint too few arguments according to length fields", &CI);
-    
-    // Check that the only uses of this gc.statepoint are gc.result or 
-    // gc.relocate calls which are tied to this statepoint and thus part
-    // of the same statepoint sequence
-    for (User *U : CI.users()) {
-      const CallInst *Call = dyn_cast<const CallInst>(U);
-      Assert2(Call, "illegal use of statepoint token", &CI, U);
-      if (!Call) continue;
-      Assert2(isGCRelocate(Call) || isGCResult(Call),
-              "gc.result or gc.relocate are the only value uses"
-              "of a gc.statepoint", &CI, U);
-      if (isGCResult(Call)) {
-        Assert2(Call->getArgOperand(0) == &CI,
-                "gc.result connected to wrong gc.statepoint",
-                &CI, Call);
-      } else if (isGCRelocate(Call)) {
-        Assert2(Call->getArgOperand(0) == &CI,
-                "gc.relocate connected to wrong gc.statepoint",
-                &CI, Call);
-      }
-    }
+  case Intrinsic::experimental_gc_statepoint:
+    Assert(!CI.isInlineAsm(),
+           "gc.statepoint support for inline assembly unimplemented", &CI);
+    Assert(CI.getParent()->getParent()->hasGC(),
+           "Enclosing function does not use GC.", &CI);
 
-    // Note: It is legal for a single derived pointer to be listed multiple
-    // times.  It's non-optimal, but it is legal.  It can also happen after
-    // insertion if we strip a bitcast away.
-    // Note: It is really tempting to check that each base is relocated and
-    // that a derived pointer is never reused as a base pointer.  This turns
-    // out to be problematic since optimizations run after safepoint insertion
-    // can recognize equality properties that the insertion logic doesn't know
-    // about.  See example statepoint.ll in the verifier subdirectory
+    VerifyStatepoint(ImmutableCallSite(&CI));
     break;
-  }
   case Intrinsic::experimental_gc_result_int:
   case Intrinsic::experimental_gc_result_float:
-  case Intrinsic::experimental_gc_result_ptr: {
+  case Intrinsic::experimental_gc_result_ptr:
+  case Intrinsic::experimental_gc_result: {
+    Assert(CI.getParent()->getParent()->hasGC(),
+           "Enclosing function does not use GC.", &CI);
     // Are we tied to a statepoint properly?
     CallSite StatepointCS(CI.getArgOperand(0));
     const Function *StatepointFn =
       StatepointCS.getInstruction() ? StatepointCS.getCalledFunction() : nullptr;
-    Assert2(StatepointFn && StatepointFn->isDeclaration() &&
-            StatepointFn->getIntrinsicID() == Intrinsic::experimental_gc_statepoint,
-	    "gc.result operand #1 must be from a statepoint",
-	    &CI, CI.getArgOperand(0));
+    Assert(StatepointFn && StatepointFn->isDeclaration() &&
+               StatepointFn->getIntrinsicID() ==
+                   Intrinsic::experimental_gc_statepoint,
+           "gc.result operand #1 must be from a statepoint", &CI,
+           CI.getArgOperand(0));
 
     // Assert that result type matches wrapped callee.
-    const Value *Target = StatepointCS.getArgument(0);
+    const Value *Target = StatepointCS.getArgument(2);
     const PointerType *PT = cast<PointerType>(Target->getType());
     const FunctionType *TargetFuncType =
       cast<FunctionType>(PT->getElementType());
-    Assert1(CI.getType() == TargetFuncType->getReturnType(),
-            "gc.result result type does not match wrapped callee",
-            &CI);
+    Assert(CI.getType() == TargetFuncType->getReturnType(),
+           "gc.result result type does not match wrapped callee", &CI);
     break;
   }
   case Intrinsic::experimental_gc_relocate: {
-    // Are we tied to a statepoint properly?
-    CallSite StatepointCS(CI.getArgOperand(0));
-    const Function *StatepointFn =
-        StatepointCS.getInstruction() ? StatepointCS.getCalledFunction() : nullptr;
-    Assert2(StatepointFn && StatepointFn->isDeclaration() &&
-            StatepointFn->getIntrinsicID() == Intrinsic::experimental_gc_statepoint,
-            "gc.relocate operand #1 must be from a statepoint",
-	    &CI, CI.getArgOperand(0));
+    Assert(CI.getNumArgOperands() == 3, "wrong number of arguments", &CI);
+
+    // Check that this relocate is correctly tied to the statepoint
+
+    // This is case for relocate on the unwinding path of an invoke statepoint
+    if (ExtractValueInst *ExtractValue =
+          dyn_cast<ExtractValueInst>(CI.getArgOperand(0))) {
+      Assert(isa<LandingPadInst>(ExtractValue->getAggregateOperand()),
+             "gc relocate on unwind path incorrectly linked to the statepoint",
+             &CI);
+
+      const BasicBlock *InvokeBB =
+        ExtractValue->getParent()->getUniquePredecessor();
+
+      // Landingpad relocates should have only one predecessor with invoke
+      // statepoint terminator
+      Assert(InvokeBB, "safepoints should have unique landingpads",
+             ExtractValue->getParent());
+      Assert(InvokeBB->getTerminator(), "safepoint block should be well formed",
+             InvokeBB);
+      Assert(isStatepoint(InvokeBB->getTerminator()),
+             "gc relocate should be linked to a statepoint", InvokeBB);
+    }
+    else {
+      // In all other cases relocate should be tied to the statepoint directly.
+      // This covers relocates on a normal return path of invoke statepoint and
+      // relocates of a call statepoint
+      auto Token = CI.getArgOperand(0);
+      Assert(isa<Instruction>(Token) && isStatepoint(cast<Instruction>(Token)),
+             "gc relocate is incorrectly tied to the statepoint", &CI, Token);
+    }
+
+    // Verify rest of the relocate arguments
+
+    GCRelocateOperands Ops(&CI);
+    ImmutableCallSite StatepointCS(Ops.getStatepoint());
 
     // Both the base and derived must be piped through the safepoint
     Value* Base = CI.getArgOperand(1);
-    Assert1(isa<ConstantInt>(Base),
-            "gc.relocate operand #2 must be integer offset", &CI);
-    
+    Assert(isa<ConstantInt>(Base),
+           "gc.relocate operand #2 must be integer offset", &CI);
+
     Value* Derived = CI.getArgOperand(2);
-    Assert1(isa<ConstantInt>(Derived),
-            "gc.relocate operand #3 must be integer offset", &CI);
+    Assert(isa<ConstantInt>(Derived),
+           "gc.relocate operand #3 must be integer offset", &CI);
 
     const int BaseIndex = cast<ConstantInt>(Base)->getZExtValue();
     const int DerivedIndex = cast<ConstantInt>(Derived)->getZExtValue();
     // Check the bounds
-    Assert1(0 <= BaseIndex &&
-            BaseIndex < (int)StatepointCS.arg_size(),
-            "gc.relocate: statepoint base index out of bounds", &CI);
-    Assert1(0 <= DerivedIndex &&
-            DerivedIndex < (int)StatepointCS.arg_size(),
-            "gc.relocate: statepoint derived index out of bounds", &CI);
+    Assert(0 <= BaseIndex && BaseIndex < (int)StatepointCS.arg_size(),
+           "gc.relocate: statepoint base index out of bounds", &CI);
+    Assert(0 <= DerivedIndex && DerivedIndex < (int)StatepointCS.arg_size(),
+           "gc.relocate: statepoint derived index out of bounds", &CI);
 
     // Check that BaseIndex and DerivedIndex fall within the 'gc parameters'
     // section of the statepoint's argument
-    const int NumCallArgs =
-      cast<ConstantInt>(StatepointCS.getArgument(1))->getZExtValue();
+    Assert(StatepointCS.arg_size() > 0,
+           "gc.statepoint: insufficient arguments");
+    Assert(isa<ConstantInt>(StatepointCS.getArgument(3)),
+           "gc.statement: number of call arguments must be constant integer");
+    const unsigned NumCallArgs =
+        cast<ConstantInt>(StatepointCS.getArgument(3))->getZExtValue();
+    Assert(StatepointCS.arg_size() > NumCallArgs + 5,
+           "gc.statepoint: mismatch in number of call arguments");
+    Assert(isa<ConstantInt>(StatepointCS.getArgument(NumCallArgs + 5)),
+           "gc.statepoint: number of transition arguments must be "
+           "a constant integer");
+    const int NumTransitionArgs =
+        cast<ConstantInt>(StatepointCS.getArgument(NumCallArgs + 5))
+            ->getZExtValue();
+    const int DeoptArgsStart = 4 + NumCallArgs + 1 + NumTransitionArgs + 1;
+    Assert(isa<ConstantInt>(StatepointCS.getArgument(DeoptArgsStart)),
+           "gc.statepoint: number of deoptimization arguments must be "
+           "a constant integer");
     const int NumDeoptArgs =
-      cast<ConstantInt>(StatepointCS.getArgument(NumCallArgs + 3))->getZExtValue();
-    const int GCParamArgsStart = NumCallArgs + NumDeoptArgs + 4;
+      cast<ConstantInt>(StatepointCS.getArgument(DeoptArgsStart))->getZExtValue();
+    const int GCParamArgsStart = DeoptArgsStart + 1 + NumDeoptArgs;
     const int GCParamArgsEnd = StatepointCS.arg_size();
-    Assert1(GCParamArgsStart <= BaseIndex &&
-            BaseIndex < GCParamArgsEnd,
-            "gc.relocate: statepoint base index doesn't fall within the "
-	    "'gc parameters' section of the statepoint call", &CI);
-    Assert1(GCParamArgsStart <= DerivedIndex &&
-            DerivedIndex < GCParamArgsEnd,
-            "gc.relocate: statepoint derived index doesn't fall within the "
-	    "'gc parameters' section of the statepoint call", &CI);
-
-
-    // Assert that the result type matches the type of the relocated pointer
+    Assert(GCParamArgsStart <= BaseIndex && BaseIndex < GCParamArgsEnd,
+           "gc.relocate: statepoint base index doesn't fall within the "
+           "'gc parameters' section of the statepoint call",
+           &CI);
+    Assert(GCParamArgsStart <= DerivedIndex && DerivedIndex < GCParamArgsEnd,
+           "gc.relocate: statepoint derived index doesn't fall within the "
+           "'gc parameters' section of the statepoint call",
+           &CI);
+
+    // Relocated value must be a pointer type, but gc_relocate does not need to return the
+    // same pointer type as the relocated pointer. It can be casted to the correct type later
+    // if it's desired. However, they must have the same address space.
     GCRelocateOperands Operands(&CI);
-    Assert1(Operands.derivedPtr()->getType() == CI.getType(),
-            "gc.relocate: relocating a pointer shouldn't change its type",
-            &CI);
+    Assert(Operands.getDerivedPtr()->getType()->isPointerTy(),
+           "gc.relocate: relocated value must be a gc pointer", &CI);
+
+    // gc_relocate return type must be a pointer type, and is verified earlier in
+    // VerifyIntrinsicType().
+    Assert(cast<PointerType>(CI.getType())->getAddressSpace() ==
+           cast<PointerType>(Operands.getDerivedPtr()->getType())->getAddressSpace(),
+           "gc.relocate: relocating a pointer shouldn't change its address space", &CI);
     break;
   }
   };
 }
 
-void DebugInfoVerifier::verifyDebugInfo() {
-  if (!VerifyDebugInfo)
-    return;
+/// \brief Carefully grab the subprogram from a local scope.
+///
+/// This carefully grabs the subprogram from a local scope, avoiding the
+/// built-in assertions that would typically fire.
+static DISubprogram *getSubprogram(Metadata *LocalScope) {
+  if (!LocalScope)
+    return nullptr;
+
+  if (auto *SP = dyn_cast<DISubprogram>(LocalScope))
+    return SP;
+
+  if (auto *LB = dyn_cast<DILexicalBlockBase>(LocalScope))
+    return getSubprogram(LB->getRawScope());
+
+  // Just return null; broken scope chains are checked elsewhere.
+  assert(!isa<DILocalScope>(LocalScope) && "Unknown type of local scope");
+  return nullptr;
+}
+
+template <class DbgIntrinsicTy>
+void Verifier::visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII) {
+  auto *MD = cast<MetadataAsValue>(DII.getArgOperand(0))->getMetadata();
+  Assert(isa<ValueAsMetadata>(MD) ||
+             (isa<MDNode>(MD) && !cast<MDNode>(MD)->getNumOperands()),
+         "invalid llvm.dbg." + Kind + " intrinsic address/value", &DII, MD);
+  Assert(isa<DILocalVariable>(DII.getRawVariable()),
+         "invalid llvm.dbg." + Kind + " intrinsic variable", &DII,
+         DII.getRawVariable());
+  Assert(isa<DIExpression>(DII.getRawExpression()),
+         "invalid llvm.dbg." + Kind + " intrinsic expression", &DII,
+         DII.getRawExpression());
+
+  // Ignore broken !dbg attachments; they're checked elsewhere.
+  if (MDNode *N = DII.getDebugLoc().getAsMDNode())
+    if (!isa<DILocation>(N))
+      return;
 
-  DebugInfoFinder Finder;
-  Finder.processModule(*M);
-  processInstructions(Finder);
+  BasicBlock *BB = DII.getParent();
+  Function *F = BB ? BB->getParent() : nullptr;
+
+  // The scopes for variables and !dbg attachments must agree.
+  DILocalVariable *Var = DII.getVariable();
+  DILocation *Loc = DII.getDebugLoc();
+  Assert(Loc, "llvm.dbg." + Kind + " intrinsic requires a !dbg attachment",
+         &DII, BB, F);
+
+  DISubprogram *VarSP = getSubprogram(Var->getRawScope());
+  DISubprogram *LocSP = getSubprogram(Loc->getRawScope());
+  if (!VarSP || !LocSP)
+    return; // Broken scope chains are checked elsewhere.
+
+  Assert(VarSP == LocSP, "mismatched subprogram between llvm.dbg." + Kind +
+                             " variable and !dbg attachment",
+         &DII, BB, F, Var, Var->getScope()->getSubprogram(), Loc,
+         Loc->getScope()->getSubprogram());
+}
+
+template <class MapTy>
+static uint64_t getVariableSize(const DILocalVariable &V, const MapTy &Map) {
+  // Be careful of broken types (checked elsewhere).
+  const Metadata *RawType = V.getRawType();
+  while (RawType) {
+    // Try to get the size directly.
+    if (auto *T = dyn_cast<DIType>(RawType))
+      if (uint64_t Size = T->getSizeInBits())
+        return Size;
+
+    if (auto *DT = dyn_cast<DIDerivedType>(RawType)) {
+      // Look at the base type.
+      RawType = DT->getRawBaseType();
+      continue;
+    }
 
-  // Verify Debug Info.
-  //
-  // NOTE:  The loud braces are necessary for MSVC compatibility.
-  for (DICompileUnit CU : Finder.compile_units()) {
-    Assert1(CU.Verify(), "DICompileUnit does not Verify!", CU);
-  }
-  for (DISubprogram S : Finder.subprograms()) {
-    Assert1(S.Verify(), "DISubprogram does not Verify!", S);
-  }
-  for (DIGlobalVariable GV : Finder.global_variables()) {
-    Assert1(GV.Verify(), "DIGlobalVariable does not Verify!", GV);
-  }
-  for (DIType T : Finder.types()) {
-    Assert1(T.Verify(), "DIType does not Verify!", T);
+    if (auto *S = dyn_cast<MDString>(RawType)) {
+      // Don't error on missing types (checked elsewhere).
+      RawType = Map.lookup(S);
+      continue;
+    }
+
+    // Missing type or size.
+    break;
   }
-  for (DIScope S : Finder.scopes()) {
-    Assert1(S.Verify(), "DIScope does not Verify!", S);
+
+  // Fail gracefully.
+  return 0;
+}
+
+template <class MapTy>
+void Verifier::verifyBitPieceExpression(const DbgInfoIntrinsic &I,
+                                        const MapTy &TypeRefs) {
+  DILocalVariable *V;
+  DIExpression *E;
+  if (auto *DVI = dyn_cast<DbgValueInst>(&I)) {
+    V = dyn_cast_or_null<DILocalVariable>(DVI->getRawVariable());
+    E = dyn_cast_or_null<DIExpression>(DVI->getRawExpression());
+  } else {
+    auto *DDI = cast<DbgDeclareInst>(&I);
+    V = dyn_cast_or_null<DILocalVariable>(DDI->getRawVariable());
+    E = dyn_cast_or_null<DIExpression>(DDI->getRawExpression());
   }
+
+  // We don't know whether this intrinsic verified correctly.
+  if (!V || !E || !E->isValid())
+    return;
+
+  // Nothing to do if this isn't a bit piece expression.
+  if (!E->isBitPiece())
+    return;
+
+  // The frontend helps out GDB by emitting the members of local anonymous
+  // unions as artificial local variables with shared storage. When SROA splits
+  // the storage for artificial local variables that are smaller than the entire
+  // union, the overhang piece will be outside of the allotted space for the
+  // variable and this check fails.
+  // FIXME: Remove this check as soon as clang stops doing this; it hides bugs.
+  if (V->isArtificial())
+    return;
+
+  // If there's no size, the type is broken, but that should be checked
+  // elsewhere.
+  uint64_t VarSize = getVariableSize(*V, TypeRefs);
+  if (!VarSize)
+    return;
+
+  unsigned PieceSize = E->getBitPieceSize();
+  unsigned PieceOffset = E->getBitPieceOffset();
+  Assert(PieceSize + PieceOffset <= VarSize,
+         "piece is larger than or outside of variable", &I, V, E);
+  Assert(PieceSize != VarSize, "piece covers entire variable", &I, V, E);
 }
 
-void DebugInfoVerifier::processInstructions(DebugInfoFinder &Finder) {
-  for (const Function &F : *M)
-    for (auto I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
-      if (MDNode *MD = I->getMetadata(LLVMContext::MD_dbg))
-        Finder.processLocation(*M, DILocation(MD));
-      if (const CallInst *CI = dyn_cast<CallInst>(&*I))
-        processCallInst(Finder, *CI);
-    }
+void Verifier::visitUnresolvedTypeRef(const MDString *S, const MDNode *N) {
+  // This is in its own function so we get an error for each bad type ref (not
+  // just the first).
+  Assert(false, "unresolved type ref", S, N);
 }
 
-void DebugInfoVerifier::processCallInst(DebugInfoFinder &Finder,
-                                        const CallInst &CI) {
-  if (Function *F = CI.getCalledFunction())
-    if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
-      switch (ID) {
-      case Intrinsic::dbg_declare:
-        Finder.processDeclare(*M, cast<DbgDeclareInst>(&CI));
-        break;
-      case Intrinsic::dbg_value:
-        Finder.processValue(*M, cast<DbgValueInst>(&CI));
-        break;
-      default:
-        break;
-      }
+void Verifier::verifyTypeRefs() {
+  auto *CUs = M->getNamedMetadata("llvm.dbg.cu");
+  if (!CUs)
+    return;
+
+  // Visit all the compile units again to map the type references.
+  SmallDenseMap<const MDString *, const DIType *, 32> TypeRefs;
+  for (auto *CU : CUs->operands())
+    if (auto Ts = cast<DICompileUnit>(CU)->getRetainedTypes())
+      for (DIType *Op : Ts)
+        if (auto *T = dyn_cast<DICompositeType>(Op))
+          if (auto *S = T->getRawIdentifier()) {
+            UnresolvedTypeRefs.erase(S);
+            TypeRefs.insert(std::make_pair(S, T));
+          }
+
+  // Verify debug info intrinsic bit piece expressions.  This needs a second
+  // pass through the intructions, since we haven't built TypeRefs yet when
+  // verifying functions, and simply queuing the DbgInfoIntrinsics to evaluate
+  // later/now would queue up some that could be later deleted.
+  for (const Function &F : *M)
+    for (const BasicBlock &BB : F)
+      for (const Instruction &I : BB)
+        if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I))
+          verifyBitPieceExpression(*DII, TypeRefs);
+
+  // Return early if all typerefs were resolved.
+  if (UnresolvedTypeRefs.empty())
+    return;
+
+  // Sort the unresolved references by name so the output is deterministic.
+  typedef std::pair<const MDString *, const MDNode *> TypeRef;
+  SmallVector<TypeRef, 32> Unresolved(UnresolvedTypeRefs.begin(),
+                                      UnresolvedTypeRefs.end());
+  std::sort(Unresolved.begin(), Unresolved.end(),
+            [](const TypeRef &LHS, const TypeRef &RHS) {
+    return LHS.first->getString() < RHS.first->getString();
+  });
+
+  // Visit the unresolved refs (printing out the errors).
+  for (const TypeRef &TR : Unresolved)
+    visitUnresolvedTypeRef(TR.first, TR.second);
 }
 
 //===----------------------------------------------------------------------===//
@@ -2871,8 +3666,7 @@ bool llvm::verifyModule(const Module &M, raw_ostream *OS) {
 
   // Note that this function's return value is inverted from what you would
   // expect of a function called "verify".
-  DebugInfoVerifier DIV(OS ? *OS : NullStr);
-  return !V.verify(M) || !DIV.verify(M) || Broken;
+  return !V.verify(M) || Broken;
 }
 
 namespace {
@@ -2882,7 +3676,7 @@ struct VerifierLegacyPass : public FunctionPass {
   Verifier V;
   bool FatalErrors;
 
-  VerifierLegacyPass() : FunctionPass(ID), FatalErrors(true) {
+  VerifierLegacyPass() : FunctionPass(ID), V(dbgs()), FatalErrors(true) {
     initializeVerifierLegacyPassPass(*PassRegistry::getPassRegistry());
   }
   explicit VerifierLegacyPass(bool FatalErrors)
@@ -2908,48 +3702,15 @@ struct VerifierLegacyPass : public FunctionPass {
     AU.setPreservesAll();
   }
 };
-struct DebugInfoVerifierLegacyPass : public ModulePass {
-  static char ID;
-
-  DebugInfoVerifier V;
-  bool FatalErrors;
-
-  DebugInfoVerifierLegacyPass() : ModulePass(ID), FatalErrors(true) {
-    initializeDebugInfoVerifierLegacyPassPass(*PassRegistry::getPassRegistry());
-  }
-  explicit DebugInfoVerifierLegacyPass(bool FatalErrors)
-      : ModulePass(ID), V(dbgs()), FatalErrors(FatalErrors) {
-    initializeDebugInfoVerifierLegacyPassPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnModule(Module &M) override {
-    if (!V.verify(M) && FatalErrors)
-      report_fatal_error("Broken debug info found, compilation aborted!");
-
-    return false;
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesAll();
-  }
-};
 }
 
 char VerifierLegacyPass::ID = 0;
 INITIALIZE_PASS(VerifierLegacyPass, "verify", "Module Verifier", false, false)
 
-char DebugInfoVerifierLegacyPass::ID = 0;
-INITIALIZE_PASS(DebugInfoVerifierLegacyPass, "verify-di", "Debug Info Verifier",
-                false, false)
-
 FunctionPass *llvm::createVerifierPass(bool FatalErrors) {
   return new VerifierLegacyPass(FatalErrors);
 }
 
-ModulePass *llvm::createDebugInfoVerifierPass(bool FatalErrors) {
-  return new DebugInfoVerifierLegacyPass(FatalErrors);
-}
-
 PreservedAnalyses VerifierPass::run(Module &M) {
   if (verifyModule(M, &dbgs()) && FatalErrors)
     report_fatal_error("Broken module found, compilation aborted!");
diff --git a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
index c663d43..3cf13a0 100644
--- a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -15,6 +15,8 @@
 #include "llvm/LTO/LTOCodeGenerator.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/Config/config.h"
@@ -24,6 +26,7 @@
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
@@ -33,10 +36,8 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Signals.h"
@@ -44,7 +45,6 @@
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -65,32 +65,31 @@ const char* LTOCodeGenerator::getVersionString() {
 
 LTOCodeGenerator::LTOCodeGenerator()
     : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context)) {
-  initialize();
+  initializeLTOPasses();
 }
 
 LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr<LLVMContext> Context)
     : OwnedContext(std::move(Context)), Context(*OwnedContext),
       IRLinker(new Module("ld-temp.o", *OwnedContext)) {
-  initialize();
+  initializeLTOPasses();
 }
 
-void LTOCodeGenerator::initialize() {
-  TargetMach = nullptr;
-  EmitDwarfDebugInfo = false;
-  ScopeRestrictionsDone = false;
-  CodeModel = LTO_CODEGEN_PIC_MODEL_DEFAULT;
-  DiagHandler = nullptr;
-  DiagContext = nullptr;
-
-  initializeLTOPasses();
+void LTOCodeGenerator::destroyMergedModule() {
+  if (OwnedModule) {
+    assert(IRLinker.getModule() == &OwnedModule->getModule() &&
+           "The linker's module should be the same as the owned module");
+    delete OwnedModule;
+    OwnedModule = nullptr;
+  } else if (IRLinker.getModule())
+    IRLinker.deleteModule();
 }
 
 LTOCodeGenerator::~LTOCodeGenerator() {
+  destroyMergedModule();
+
   delete TargetMach;
   TargetMach = nullptr;
 
-  IRLinker.deleteModule();
-
   for (std::vector<char *>::iterator I = CodegenOptions.begin(),
                                      E = CodegenOptions.end();
        I != E; ++I)
@@ -108,7 +107,7 @@ void LTOCodeGenerator::initializeLTOPasses() {
   initializeGlobalOptPass(R);
   initializeConstantMergePass(R);
   initializeDAHPass(R);
-  initializeInstCombinerPass(R);
+  initializeInstructionCombiningPassPass(R);
   initializeSimpleInlinerPass(R);
   initializePruneEHPass(R);
   initializeGlobalDCEPass(R);
@@ -140,6 +139,22 @@ bool LTOCodeGenerator::addModule(LTOModule *mod) {
   return !ret;
 }
 
+void LTOCodeGenerator::setModule(LTOModule *Mod) {
+  assert(&Mod->getModule().getContext() == &Context &&
+         "Expected module in same context");
+
+  // Delete the old merged module.
+  destroyMergedModule();
+  AsmUndefinedRefs.clear();
+
+  OwnedModule = Mod;
+  IRLinker.setModule(&Mod->getModule());
+
+  const std::vector<const char*> &Undefs = Mod->getAsmUndefinedRefs();
+  for (int I = 0, E = Undefs.size(); I != E; ++I)
+    AsmUndefinedRefs[Undefs[I]] = 1;
+}
+
 void LTOCodeGenerator::setTargetOptions(TargetOptions options) {
   Options = options;
 }
@@ -187,7 +202,7 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
   }
 
   // write bitcode to it
-  WriteBitcodeToFile(IRLinker.getModule(), Out.os());
+  WriteBitcodeToFile(IRLinker.getModule(), Out.os(), ShouldEmbedUselists);
   Out.os().close();
 
   if (Out.os().has_error()) {
@@ -201,12 +216,8 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
   return true;
 }
 
-bool LTOCodeGenerator::compile_to_file(const char** name,
-                                       bool disableOpt,
-                                       bool disableInline,
-                                       bool disableGVNLoadPRE,
-                                       bool disableVectorization,
-                                       std::string& errMsg) {
+bool LTOCodeGenerator::compileOptimizedToFile(const char **name,
+                                              std::string &errMsg) {
   // make unique temp .o file to put generated object file
   SmallString<128> Filename;
   int FD;
@@ -220,9 +231,7 @@ bool LTOCodeGenerator::compile_to_file(const char** name,
   // generate object file
   tool_output_file objFile(Filename.c_str(), FD);
 
-  bool genResult =
-      generateObjectFile(objFile.os(), disableOpt, disableInline,
-                         disableGVNLoadPRE, disableVectorization, errMsg);
+  bool genResult = compileOptimized(objFile.os(), errMsg);
   objFile.os().close();
   if (objFile.os().has_error()) {
     objFile.os().clear_error();
@@ -241,15 +250,10 @@ bool LTOCodeGenerator::compile_to_file(const char** name,
   return true;
 }
 
-const void* LTOCodeGenerator::compile(size_t* length,
-                                      bool disableOpt,
-                                      bool disableInline,
-                                      bool disableGVNLoadPRE,
-                                      bool disableVectorization,
-                                      std::string& errMsg) {
+const void *LTOCodeGenerator::compileOptimized(size_t *length,
+                                               std::string &errMsg) {
   const char *name;
-  if (!compile_to_file(&name, disableOpt, disableInline, disableGVNLoadPRE,
-                       disableVectorization, errMsg))
+  if (!compileOptimizedToFile(&name, errMsg))
     return nullptr;
 
   // read .o file into memory buffer
@@ -272,6 +276,31 @@ const void* LTOCodeGenerator::compile(size_t* length,
   return NativeObjectFile->getBufferStart();
 }
 
+
+bool LTOCodeGenerator::compile_to_file(const char **name,
+                                       bool disableInline,
+                                       bool disableGVNLoadPRE,
+                                       bool disableVectorization,
+                                       std::string &errMsg) {
+  if (!optimize(disableInline, disableGVNLoadPRE,
+                disableVectorization, errMsg))
+    return false;
+
+  return compileOptimizedToFile(name, errMsg);
+}
+
+const void* LTOCodeGenerator::compile(size_t *length,
+                                      bool disableInline,
+                                      bool disableGVNLoadPRE,
+                                      bool disableVectorization,
+                                      std::string &errMsg) {
+  if (!optimize(disableInline, disableGVNLoadPRE,
+                disableVectorization, errMsg))
+    return nullptr;
+
+  return compileOptimized(length, errMsg);
+}
+
 bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
   if (TargetMach)
     return true;
@@ -319,9 +348,25 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
       MCpu = "cyclone";
   }
 
+  CodeGenOpt::Level CGOptLevel;
+  switch (OptLevel) {
+  case 0:
+    CGOptLevel = CodeGenOpt::None;
+    break;
+  case 1:
+    CGOptLevel = CodeGenOpt::Less;
+    break;
+  case 2:
+    CGOptLevel = CodeGenOpt::Default;
+    break;
+  case 3:
+    CGOptLevel = CodeGenOpt::Aggressive;
+    break;
+  }
+
   TargetMach = march->createTargetMachine(TripleStr, MCpu, FeatureStr, Options,
                                           RelocModel, CodeModel::Default,
-                                          CodeGenOpt::Aggressive);
+                                          CGOptLevel);
   return true;
 }
 
@@ -368,10 +413,13 @@ static void findUsedValues(GlobalVariable *LLVMUsed,
       UsedValues.insert(GV);
 }
 
+// Collect names of runtime library functions. User-defined functions with the
+// same names are added to llvm.compiler.used to prevent them from being
+// deleted by optimizations.
 static void accumulateAndSortLibcalls(std::vector<StringRef> &Libcalls,
                                       const TargetLibraryInfo& TLI,
-                                      const TargetLowering *Lowering)
-{
+                                      const Module &Mod,
+                                      const TargetMachine &TM) {
   // TargetLibraryInfo has info on C runtime library calls on the current
   // target.
   for (unsigned I = 0, E = static_cast<unsigned>(LibFunc::NumLibFuncs);
@@ -381,14 +429,21 @@ static void accumulateAndSortLibcalls(std::vector<StringRef> &Libcalls,
       Libcalls.push_back(TLI.getName(F));
   }
 
-  // TargetLowering has info on library calls that CodeGen expects to be
-  // available, both from the C runtime and compiler-rt.
-  if (Lowering)
-    for (unsigned I = 0, E = static_cast<unsigned>(RTLIB::UNKNOWN_LIBCALL);
-         I != E; ++I)
-      if (const char *Name
-          = Lowering->getLibcallName(static_cast<RTLIB::Libcall>(I)))
-        Libcalls.push_back(Name);
+  SmallPtrSet<const TargetLowering *, 1> TLSet;
+
+  for (const Function &F : Mod) {
+    const TargetLowering *Lowering =
+        TM.getSubtargetImpl(F)->getTargetLowering();
+
+    if (Lowering && TLSet.insert(Lowering).second)
+      // TargetLowering has info on library calls that CodeGen expects to be
+      // available, both from the C runtime and compiler-rt.
+      for (unsigned I = 0, E = static_cast<unsigned>(RTLIB::UNKNOWN_LIBCALL);
+           I != E; ++I)
+        if (const char *Name =
+                Lowering->getLibcallName(static_cast<RTLIB::Libcall>(I)))
+          Libcalls.push_back(Name);
+  }
 
   array_pod_sort(Libcalls.begin(), Libcalls.end());
   Libcalls.erase(std::unique(Libcalls.begin(), Libcalls.end()),
@@ -396,23 +451,23 @@ static void accumulateAndSortLibcalls(std::vector<StringRef> &Libcalls,
 }
 
 void LTOCodeGenerator::applyScopeRestrictions() {
-  if (ScopeRestrictionsDone)
+  if (ScopeRestrictionsDone || !ShouldInternalize)
     return;
   Module *mergedModule = IRLinker.getModule();
 
   // Start off with a verification pass.
-  PassManager passes;
+  legacy::PassManager passes;
   passes.add(createVerifierPass());
-  passes.add(createDebugInfoVerifierPass());
 
   // mark which symbols can not be internalized
-  Mangler Mangler(TargetMach->getSubtargetImpl()->getDataLayout());
+  Mangler Mangler(TargetMach->getDataLayout());
   std::vector<const char*> MustPreserveList;
   SmallPtrSet<GlobalValue*, 8> AsmUsed;
   std::vector<StringRef> Libcalls;
-  TargetLibraryInfo TLI(Triple(TargetMach->getTargetTriple()));
-  accumulateAndSortLibcalls(
-      Libcalls, TLI, TargetMach->getSubtargetImpl()->getTargetLowering());
+  TargetLibraryInfoImpl TLII(Triple(TargetMach->getTargetTriple()));
+  TargetLibraryInfo TLI(TLII);
+
+  accumulateAndSortLibcalls(Libcalls, TLI, *mergedModule, *TargetMach);
 
   for (Module::iterator f = mergedModule->begin(),
          e = mergedModule->end(); f != e; ++f)
@@ -457,12 +512,10 @@ void LTOCodeGenerator::applyScopeRestrictions() {
 }
 
 /// Optimize merged modules using various IPO passes
-bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
-                                          bool DisableOpt,
-                                          bool DisableInline,
-                                          bool DisableGVNLoadPRE,
-                                          bool DisableVectorization,
-                                          std::string &errMsg) {
+bool LTOCodeGenerator::optimize(bool DisableInline,
+                                bool DisableGVNLoadPRE,
+                                bool DisableVectorization,
+                                std::string &errMsg) {
   if (!this->determineTarget(errMsg))
     return false;
 
@@ -472,10 +525,13 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
   this->applyScopeRestrictions();
 
   // Instantiate the pass manager to organize the passes.
-  PassManager passes;
+  legacy::PassManager passes;
 
   // Add an appropriate DataLayout instance for this module...
-  mergedModule->setDataLayout(TargetMach->getSubtargetImpl()->getDataLayout());
+  mergedModule->setDataLayout(*TargetMach->getDataLayout());
+
+  passes.add(
+      createTargetTransformInfoWrapperPass(TargetMach->getTargetIRAnalysis()));
 
   Triple TargetTriple(TargetMach->getTargetTriple());
   PassManagerBuilder PMB;
@@ -484,33 +540,38 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
   PMB.SLPVectorize = !DisableVectorization;
   if (!DisableInline)
     PMB.Inliner = createFunctionInliningPass();
-  PMB.LibraryInfo = new TargetLibraryInfo(TargetTriple);
-  if (DisableOpt)
-    PMB.OptLevel = 0;
+  PMB.LibraryInfo = new TargetLibraryInfoImpl(TargetTriple);
+  PMB.OptLevel = OptLevel;
   PMB.VerifyInput = true;
   PMB.VerifyOutput = true;
 
-  PMB.populateLTOPassManager(passes, TargetMach);
+  PMB.populateLTOPassManager(passes);
+
+  // Run our queue of passes all at once now, efficiently.
+  passes.run(*mergedModule);
+
+  return true;
+}
 
-  PassManager codeGenPasses;
+bool LTOCodeGenerator::compileOptimized(raw_pwrite_stream &out,
+                                        std::string &errMsg) {
+  if (!this->determineTarget(errMsg))
+    return false;
 
-  codeGenPasses.add(new DataLayoutPass());
+  Module *mergedModule = IRLinker.getModule();
 
-  formatted_raw_ostream Out(out);
+  legacy::PassManager codeGenPasses;
 
   // If the bitcode files contain ARC code and were compiled with optimization,
   // the ObjCARCContractPass must be run, so do it unconditionally here.
   codeGenPasses.add(createObjCARCContractPass());
 
-  if (TargetMach->addPassesToEmitFile(codeGenPasses, Out,
+  if (TargetMach->addPassesToEmitFile(codeGenPasses, out,
                                       TargetMachine::CGFT_ObjectFile)) {
     errMsg = "target file type not supported";
     return false;
   }
 
-  // Run our queue of passes all at once now, efficiently.
-  passes.run(*mergedModule);
-
   // Run the code generator, and write assembly file
   codeGenPasses.run(*mergedModule);
 
diff --git a/contrib/llvm/lib/LTO/LTOModule.cpp b/contrib/llvm/lib/LTO/LTOModule.cpp
index 8b4a2f4..5cdbca6 100644
--- a/contrib/llvm/lib/LTO/LTOModule.cpp
+++ b/contrib/llvm/lib/LTO/LTOModule.cpp
@@ -179,7 +179,8 @@ static Module *parseBitcodeFileImpl(MemoryBufferRef Buffer,
   std::unique_ptr<MemoryBuffer> LightweightBuf =
       MemoryBuffer::getMemBuffer(*MBOrErr, false);
   ErrorOr<Module *> M = getLazyBitcodeModule(std::move(LightweightBuf), Context,
-                                             DiagnosticHandler);
+                                             DiagnosticHandler,
+                                             true/*ShouldLazyLoadMetadata*/);
   if (!M)
     return nullptr;
   return *M;
@@ -229,7 +230,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
 
   TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
                                                      options);
-  M->setDataLayout(target->getSubtargetImpl()->getDataLayout());
+  M->setDataLayout(*target->getDataLayout());
 
   std::unique_ptr<object::IRObjectFile> IRObj(
       new object::IRObjectFile(Buffer, std::move(M)));
@@ -266,7 +267,7 @@ LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
       Constant *cn = gvn->getInitializer();
       if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) {
         if (ca->isCString()) {
-          name = ".objc_class_name_" + ca->getAsCString().str();
+          name = (".objc_class_name_" + ca->getAsCString()).str();
           return true;
         }
       }
@@ -649,10 +650,8 @@ void LTOModule::parseMetadata() {
         // here.
         StringRef Op =
             _linkeropt_strings.insert(MDOption->getString()).first->first();
-        StringRef DepLibName = _target->getSubtargetImpl()
-                                   ->getTargetLowering()
-                                   ->getObjFileLowering()
-                                   .getDepLibFromLinkerOpt(Op);
+        StringRef DepLibName =
+            _target->getObjFileLowering()->getDepLibFromLinkerOpt(Op);
         if (!DepLibName.empty())
           _deplibs.push_back(DepLibName.data());
         else if (!Op.empty())
diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp
index d5170ad..1b7a331 100644
--- a/contrib/llvm/lib/Linker/LinkModules.cpp
+++ b/contrib/llvm/lib/Linker/LinkModules.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
@@ -225,6 +226,7 @@ void TypeMapTy::linkDefinedTypeBodies() {
       Elements[I] = get(SrcSTy->getElementType(I));
 
     DstSTy->setBody(Elements, SrcSTy->isPacked());
+    DstStructTypesSet.switchToNonOpaque(DstSTy);
   }
   SrcDefinitionsToResolve.clear();
   DstResolvedOpaqueTypes.clear();
@@ -422,12 +424,17 @@ class ModuleLinker {
 
   DiagnosticHandlerFunction DiagnosticHandler;
 
+  /// For symbol clashes, prefer those from Src.
+  bool OverrideFromSrc;
+
 public:
   ModuleLinker(Module *dstM, Linker::IdentifiedStructTypeSet &Set, Module *srcM,
-               DiagnosticHandlerFunction DiagnosticHandler)
+               DiagnosticHandlerFunction DiagnosticHandler,
+               bool OverrideFromSrc)
       : DstM(dstM), SrcM(srcM), TypeMap(Set),
         ValMaterializer(TypeMap, DstM, LazilyLinkGlobalValues),
-        DiagnosticHandler(DiagnosticHandler) {}
+        DiagnosticHandler(DiagnosticHandler), OverrideFromSrc(OverrideFromSrc) {
+  }
 
   bool run();
 
@@ -575,8 +582,7 @@ static GlobalAlias *copyGlobalAliasProto(TypeMapTy &TypeMap, Module &DstM,
   // If there is no linkage to be performed or we're linking from the source,
   // bring over SGA.
   auto *PTy = cast<PointerType>(TypeMap.get(SGA->getType()));
-  return GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
-                             SGA->getLinkage(), SGA->getName(), &DstM);
+  return GlobalAlias::create(PTy, SGA->getLinkage(), SGA->getName(), &DstM);
 }
 
 static GlobalValue *copyGlobalValueProto(TypeMapTy &TypeMap, Module &DstM,
@@ -671,17 +677,12 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
         getComdatLeader(SrcM, ComdatName, SrcGV))
       return true;
 
-    const DataLayout *DstDL = DstM->getDataLayout();
-    const DataLayout *SrcDL = SrcM->getDataLayout();
-    if (!DstDL || !SrcDL) {
-      return emitError(
-          "Linking COMDATs named '" + ComdatName +
-          "': can't do size dependent selection without DataLayout!");
-    }
+    const DataLayout &DstDL = DstM->getDataLayout();
+    const DataLayout &SrcDL = SrcM->getDataLayout();
     uint64_t DstSize =
-        DstDL->getTypeAllocSize(DstGV->getType()->getPointerElementType());
+        DstDL.getTypeAllocSize(DstGV->getType()->getPointerElementType());
     uint64_t SrcSize =
-        SrcDL->getTypeAllocSize(SrcGV->getType()->getPointerElementType());
+        SrcDL.getTypeAllocSize(SrcGV->getType()->getPointerElementType());
     if (Result == Comdat::SelectionKind::ExactMatch) {
       if (SrcGV->getInitializer() != DstGV->getInitializer())
         return emitError("Linking COMDATs named '" + ComdatName +
@@ -728,6 +729,12 @@ bool ModuleLinker::getComdatResult(const Comdat *SrcC,
 bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
                                         const GlobalValue &Dest,
                                         const GlobalValue &Src) {
+  // Should we unconditionally use the Src?
+  if (OverrideFromSrc) {
+    LinkFromSrc = true;
+    return false;
+  }
+
   // We always have to add Src if it has appending linkage.
   if (Src.hasAppendingLinkage()) {
     LinkFromSrc = true;
@@ -767,9 +774,7 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
       return false;
     }
 
-    // FIXME: Make datalayout mandatory and just use getDataLayout().
-    DataLayout DL(Dest.getParent());
-
+    const DataLayout &DL = Dest.getParent()->getDataLayout();
     uint64_t DestSize = DL.getTypeAllocSize(Dest.getType()->getElementType());
     uint64_t SrcSize = DL.getTypeAllocSize(Src.getType()->getElementType());
     LinkFromSrc = SrcSize > DestSize;
@@ -1076,8 +1081,9 @@ bool ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) {
   } else {
     // If the GV is to be lazily linked, don't create it just yet.
     // The ValueMaterializerTy will deal with creating it if it's used.
-    if (!DGV && (SGV->hasLocalLinkage() || SGV->hasLinkOnceLinkage() ||
-                 SGV->hasAvailableExternallyLinkage())) {
+    if (!DGV && !OverrideFromSrc &&
+        (SGV->hasLocalLinkage() || SGV->hasLinkOnceLinkage() ||
+         SGV->hasAvailableExternallyLinkage())) {
       DoNotLinkFromSource.insert(SGV);
       return false;
     }
@@ -1198,6 +1204,13 @@ bool ModuleLinker::linkFunctionBody(Function &Dst, Function &Src) {
     ++DI;
   }
 
+  // Copy over the metadata attachments.
+  SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
+  Src.getAllMetadata(MDs);
+  for (const auto &I : MDs)
+    Dst.setMetadata(I.first, MapMetadata(I.second, ValueMap, RF_None, &TypeMap,
+                                         &ValMaterializer));
+
   // Splice the body of the source function into the dest function.
   Dst.getBasicBlockList().splice(Dst.end(), Src.getBasicBlockList());
 
@@ -1214,7 +1227,7 @@ bool ModuleLinker::linkFunctionBody(Function &Dst, Function &Src) {
   for (Argument &Arg : Src.args())
     ValueMap.erase(&Arg);
 
-  Src.Dematerialize();
+  Src.dematerialize();
   return false;
 }
 
@@ -1255,9 +1268,10 @@ void ModuleLinker::linkNamedMDNodes() {
 
 /// Drop DISubprograms that have been superseded.
 ///
-/// FIXME: this creates an asymmetric result: we strip losing subprograms from
-/// DstM, but leave losing subprograms in SrcM.  Instead we should also strip
-/// losers from SrcM, but this requires extra plumbing in MapMetadata.
+/// FIXME: this creates an asymmetric result: we strip functions from losing
+/// subprograms in DstM, but leave losing subprograms in SrcM.
+/// TODO: Remove this logic once the backend can correctly determine canonical
+/// subprograms.
 void ModuleLinker::stripReplacedSubprograms() {
   // Avoid quadratic runtime by returning early when there's nothing to do.
   if (OverridingFunctions.empty())
@@ -1267,31 +1281,23 @@ void ModuleLinker::stripReplacedSubprograms() {
   auto Functions = std::move(OverridingFunctions);
   OverridingFunctions.clear();
 
-  // Drop subprograms whose functions have been overridden by the new compile
-  // unit.
+  // Drop functions from subprograms if they've been overridden by the new
+  // compile unit.
   NamedMDNode *CompileUnits = DstM->getNamedMetadata("llvm.dbg.cu");
   if (!CompileUnits)
     return;
   for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) {
-    DICompileUnit CU(CompileUnits->getOperand(I));
+    auto *CU = cast<DICompileUnit>(CompileUnits->getOperand(I));
     assert(CU && "Expected valid compile unit");
 
-    DITypedArray<DISubprogram> SPs(CU.getSubprograms());
-    assert(SPs && "Expected valid subprogram array");
-
-    SmallVector<Metadata *, 16> NewSPs;
-    NewSPs.reserve(SPs.getNumElements());
-    for (unsigned S = 0, SE = SPs.getNumElements(); S != SE; ++S) {
-      DISubprogram SP = SPs.getElement(S);
-      if (SP && SP.getFunction() && Functions.count(SP.getFunction()))
+    for (DISubprogram *SP : CU->getSubprograms()) {
+      if (!SP || !SP->getFunction() || !Functions.count(SP->getFunction()))
         continue;
 
-      NewSPs.push_back(SP);
+      // Prevent DebugInfoFinder from tagging this as the canonical subprogram,
+      // since the canonical one is in the incoming module.
+      SP->replaceFunction(nullptr);
     }
-
-    // Redirect operand to the overriding subprogram.
-    if (NewSPs.size() != SPs.getNumElements())
-      CU.replaceSubprograms(DIArray(MDNode::get(DstM->getContext(), NewSPs)));
   }
 }
 
@@ -1416,10 +1422,8 @@ bool ModuleLinker::linkModuleFlagsMetadata() {
       MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
       SmallVector<Metadata *, 8> MDs;
       MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands());
-      for (unsigned i = 0, e = DstValue->getNumOperands(); i != e; ++i)
-        MDs.push_back(DstValue->getOperand(i));
-      for (unsigned i = 0, e = SrcValue->getNumOperands(); i != e; ++i)
-        MDs.push_back(SrcValue->getOperand(i));
+      MDs.append(DstValue->op_begin(), DstValue->op_end());
+      MDs.append(SrcValue->op_begin(), SrcValue->op_end());
 
       replaceDstValue(MDNode::get(DstM->getContext(), MDs));
       break;
@@ -1428,10 +1432,8 @@ bool ModuleLinker::linkModuleFlagsMetadata() {
       SmallSetVector<Metadata *, 16> Elts;
       MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
       MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
-      for (unsigned i = 0, e = DstValue->getNumOperands(); i != e; ++i)
-        Elts.insert(DstValue->getOperand(i));
-      for (unsigned i = 0, e = SrcValue->getNumOperands(); i != e; ++i)
-        Elts.insert(SrcValue->getOperand(i));
+      Elts.insert(DstValue->op_begin(), DstValue->op_end());
+      Elts.insert(SrcValue->op_begin(), SrcValue->op_end());
 
       replaceDstValue(MDNode::get(DstM->getContext(),
                                   makeArrayRef(Elts.begin(), Elts.end())));
@@ -1457,35 +1459,59 @@ bool ModuleLinker::linkModuleFlagsMetadata() {
   return HasErr;
 }
 
+// This function returns true if the triples match.
+static bool triplesMatch(const Triple &T0, const Triple &T1) {
+  // If vendor is apple, ignore the version number.
+  if (T0.getVendor() == Triple::Apple)
+    return T0.getArch() == T1.getArch() &&
+           T0.getSubArch() == T1.getSubArch() &&
+           T0.getVendor() == T1.getVendor() &&
+           T0.getOS() == T1.getOS();
+
+  return T0 == T1;
+}
+
+// This function returns the merged triple.
+static std::string mergeTriples(const Triple &SrcTriple, const Triple &DstTriple) {
+  // If vendor is apple, pick the triple with the larger version number.
+  if (SrcTriple.getVendor() == Triple::Apple)
+    if (DstTriple.isOSVersionLT(SrcTriple))
+      return SrcTriple.str();
+
+  return DstTriple.str();
+}
+
 bool ModuleLinker::run() {
   assert(DstM && "Null destination module");
   assert(SrcM && "Null source module");
 
   // Inherit the target data from the source module if the destination module
   // doesn't have one already.
-  if (!DstM->getDataLayout() && SrcM->getDataLayout())
+  if (DstM->getDataLayout().isDefault())
     DstM->setDataLayout(SrcM->getDataLayout());
 
-  // Copy the target triple from the source to dest if the dest's is empty.
-  if (DstM->getTargetTriple().empty() && !SrcM->getTargetTriple().empty())
-    DstM->setTargetTriple(SrcM->getTargetTriple());
-
-  if (SrcM->getDataLayout() && DstM->getDataLayout() &&
-      *SrcM->getDataLayout() != *DstM->getDataLayout()) {
+  if (SrcM->getDataLayout() != DstM->getDataLayout()) {
     emitWarning("Linking two modules of different data layouts: '" +
                 SrcM->getModuleIdentifier() + "' is '" +
                 SrcM->getDataLayoutStr() + "' whereas '" +
                 DstM->getModuleIdentifier() + "' is '" +
                 DstM->getDataLayoutStr() + "'\n");
   }
-  if (!SrcM->getTargetTriple().empty() &&
-      DstM->getTargetTriple() != SrcM->getTargetTriple()) {
+
+  // Copy the target triple from the source to dest if the dest's is empty.
+  if (DstM->getTargetTriple().empty() && !SrcM->getTargetTriple().empty())
+    DstM->setTargetTriple(SrcM->getTargetTriple());
+
+  Triple SrcTriple(SrcM->getTargetTriple()), DstTriple(DstM->getTargetTriple());
+
+  if (!SrcM->getTargetTriple().empty() && !triplesMatch(SrcTriple, DstTriple))
     emitWarning("Linking two modules of different target triples: " +
                 SrcM->getModuleIdentifier() + "' is '" +
                 SrcM->getTargetTriple() + "' whereas '" +
                 DstM->getModuleIdentifier() + "' is '" +
                 DstM->getTargetTriple() + "'\n");
-  }
+
+  DstM->setTargetTriple(mergeTriples(SrcTriple, DstTriple));
 
   // Append the module inline asm string.
   if (!SrcM->getModuleInlineAsm().empty()) {
@@ -1548,6 +1574,13 @@ bool ModuleLinker::run() {
     MapValue(GV, ValueMap, RF_None, &TypeMap, &ValMaterializer);
   }
 
+  // Strip replaced subprograms before mapping any metadata -- so that we're
+  // not changing metadata from the source module (note that
+  // linkGlobalValueBody() eventually calls RemapInstruction() and therefore
+  // MapMetadata()) -- but after linking global value protocols -- so that
+  // OverridingFunctions has been built.
+  stripReplacedSubprograms();
+
   // Link in the function bodies that are defined in the source module into
   // DstM.
   for (Function &SF : *SrcM) {
@@ -1570,9 +1603,6 @@ bool ModuleLinker::run() {
     linkGlobalValueBody(Src);
   }
 
-  // Strip replaced subprograms before linking together compile units.
-  stripReplacedSubprograms();
-
   // Remap all of the named MDNodes in Src into the DstM module. We do this
   // after linking GlobalValues so that MDNodes that reference GlobalValues
   // are properly remapped.
@@ -1662,6 +1692,14 @@ void Linker::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) {
   NonOpaqueStructTypes.insert(Ty);
 }
 
+void Linker::IdentifiedStructTypeSet::switchToNonOpaque(StructType *Ty) {
+  assert(!Ty->isOpaque());
+  NonOpaqueStructTypes.insert(Ty);
+  bool Removed = OpaqueStructTypes.erase(Ty);
+  (void)Removed;
+  assert(Removed);
+}
+
 void Linker::IdentifiedStructTypeSet::addOpaque(StructType *Ty) {
   assert(Ty->isOpaque());
   OpaqueStructTypes.insert(Ty);
@@ -1718,10 +1756,16 @@ void Linker::deleteModule() {
   Composite = nullptr;
 }
 
-bool Linker::linkInModule(Module *Src) {
+bool Linker::linkInModule(Module *Src, bool OverrideSymbols) {
   ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src,
-                         DiagnosticHandler);
-  return TheLinker.run();
+                         DiagnosticHandler, OverrideSymbols);
+  bool RetCode = TheLinker.run();
+  Composite->dropTriviallyDeadConstantArrays();
+  return RetCode;
+}
+
+void Linker::setModule(Module *Dst) {
+  init(Dst, DiagnosticHandler);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1749,7 +1793,7 @@ bool Linker::LinkModules(Module *Dest, Module *Src) {
 //===----------------------------------------------------------------------===//
 
 LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src,
-                         LLVMLinkerMode Mode, char **OutMessages) {
+                         LLVMLinkerMode Unused, char **OutMessages) {
   Module *D = unwrap(Dest);
   std::string Message;
   raw_string_ostream Stream(Message);
diff --git a/contrib/llvm/lib/MC/ConstantPools.cpp b/contrib/llvm/lib/MC/ConstantPools.cpp
index a239a8f..a723aa8 100644
--- a/contrib/llvm/lib/MC/ConstantPools.cpp
+++ b/contrib/llvm/lib/MC/ConstantPools.cpp
@@ -37,7 +37,7 @@ void ConstantPool::emitEntries(MCStreamer &Streamer) {
 
 const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context,
                                      unsigned Size) {
-  MCSymbol *CPEntryLabel = Context.CreateTempSymbol();
+  MCSymbol *CPEntryLabel = Context.createTempSymbol();
 
   Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size));
   return MCSymbolRefExpr::Create(CPEntryLabel, Context);
@@ -48,8 +48,7 @@ bool ConstantPool::empty() { return Entries.empty(); }
 //
 // AssemblerConstantPools implementation
 //
-ConstantPool *
-AssemblerConstantPools::getConstantPool(const MCSection *Section) {
+ConstantPool *AssemblerConstantPools::getConstantPool(MCSection *Section) {
   ConstantPoolMapTy::iterator CP = ConstantPools.find(Section);
   if (CP == ConstantPools.end())
     return nullptr;
@@ -58,11 +57,11 @@ AssemblerConstantPools::getConstantPool(const MCSection *Section) {
 }
 
 ConstantPool &
-AssemblerConstantPools::getOrCreateConstantPool(const MCSection *Section) {
+AssemblerConstantPools::getOrCreateConstantPool(MCSection *Section) {
   return ConstantPools[Section];
 }
 
-static void emitConstantPool(MCStreamer &Streamer, const MCSection *Section,
+static void emitConstantPool(MCStreamer &Streamer, MCSection *Section,
                              ConstantPool &CP) {
   if (!CP.empty()) {
     Streamer.SwitchSection(Section);
@@ -75,7 +74,7 @@ void AssemblerConstantPools::emitAll(MCStreamer &Streamer) {
   for (ConstantPoolMapTy::iterator CPI = ConstantPools.begin(),
                                    CPE = ConstantPools.end();
        CPI != CPE; ++CPI) {
-    const MCSection *Section = CPI->first;
+    MCSection *Section = CPI->first;
     ConstantPool &CP = CPI->second;
 
     emitConstantPool(Streamer, Section, CP);
@@ -83,7 +82,7 @@ void AssemblerConstantPools::emitAll(MCStreamer &Streamer) {
 }
 
 void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
-  const MCSection *Section = Streamer.getCurrentSection().first;
+  MCSection *Section = Streamer.getCurrentSection().first;
   if (ConstantPool *CP = getConstantPool(Section)) {
     emitConstantPool(Streamer, Section, *CP);
   }
@@ -92,7 +91,7 @@ void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
 const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
                                                const MCExpr *Expr,
                                                unsigned Size) {
-  const MCSection *Section = Streamer.getCurrentSection().first;
+  MCSection *Section = Streamer.getCurrentSection().first;
   return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext(),
                                                    Size);
 }
diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
index e2439ab..18746d1 100644
--- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
@@ -41,94 +41,53 @@ using namespace llvm;
 #define DEBUG_TYPE "reloc-info"
 
 namespace {
-class FragmentWriter {
-  bool IsLittleEndian;
-
-public:
-  FragmentWriter(bool IsLittleEndian);
-  template <typename T> void write(MCDataFragment &F, T Val);
-};
 
 typedef DenseMap<const MCSectionELF *, uint32_t> SectionIndexMapTy;
 
+class ELFObjectWriter;
+
 class SymbolTableWriter {
-  MCAssembler &Asm;
-  FragmentWriter &FWriter;
+  ELFObjectWriter &EWriter;
   bool Is64Bit;
-  SectionIndexMapTy &SectionIndexMap;
 
-  // The symbol .symtab fragment we are writting to.
-  MCDataFragment *SymtabF;
-
-  // .symtab_shndx fragment we are writting to.
-  MCDataFragment *ShndxF;
+  // indexes we are going to write to .symtab_shndx.
+  std::vector<uint32_t> ShndxIndexes;
 
   // The numbel of symbols written so far.
   unsigned NumWritten;
 
   void createSymtabShndx();
 
-  template <typename T> void write(MCDataFragment &F, T Value);
+  template <typename T> void write(T Value);
 
 public:
-  SymbolTableWriter(MCAssembler &Asm, FragmentWriter &FWriter, bool Is64Bit,
-                    SectionIndexMapTy &SectionIndexMap,
-                    MCDataFragment *SymtabF);
+  SymbolTableWriter(ELFObjectWriter &EWriter, bool Is64Bit);
 
   void writeSymbol(uint32_t name, uint8_t info, uint64_t value, uint64_t size,
                    uint8_t other, uint32_t shndx, bool Reserved);
-};
-
-struct ELFRelocationEntry {
-  uint64_t Offset; // Where is the relocation.
-  const MCSymbol *Symbol;       // The symbol to relocate with.
-  unsigned Type;   // The type of the relocation.
-  uint64_t Addend; // The addend to use.
 
-  ELFRelocationEntry(uint64_t Offset, const MCSymbol *Symbol, unsigned Type,
-                     uint64_t Addend)
-      : Offset(Offset), Symbol(Symbol), Type(Type), Addend(Addend) {}
+  ArrayRef<uint32_t> getShndxIndexes() const { return ShndxIndexes; }
 };
 
 class ELFObjectWriter : public MCObjectWriter {
-  FragmentWriter FWriter;
-
-  protected:
-
     static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
     static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant);
-    static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout);
-    static bool isInSymtab(const MCAsmLayout &Layout, const MCSymbolData &Data,
+    static uint64_t SymbolValue(const MCSymbol &Sym, const MCAsmLayout &Layout);
+    static bool isInSymtab(const MCAsmLayout &Layout, const MCSymbol &Symbol,
                            bool Used, bool Renamed);
-    static bool isLocal(const MCSymbolData &Data, bool isUsedInReloc);
-    static bool IsELFMetaDataSection(const MCSectionData &SD);
-    static uint64_t DataSectionSize(const MCSectionData &SD);
-    static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
-                                       const MCSectionData &SD);
-    static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
-                                          const MCSectionData &SD);
-
-    void WriteDataSectionData(MCAssembler &Asm,
-                              const MCAsmLayout &Layout,
-                              const MCSectionELF &Section);
-
-    /*static bool isFixupKindX86RIPRel(unsigned Kind) {
-      return Kind == X86::reloc_riprel_4byte ||
-        Kind == X86::reloc_riprel_4byte_movq_load;
-    }*/
-
-    /// ELFSymbolData - Helper struct for containing some precomputed
-    /// information on symbols.
+    static bool isLocal(const MCSymbol &Symbol, bool isUsedInReloc);
+
+    /// Helper struct for containing some precomputed information on symbols.
     struct ELFSymbolData {
-      MCSymbolData *SymbolData;
+      const MCSymbol *Symbol;
       uint64_t StringIndex;
       uint32_t SectionIndex;
       StringRef Name;
 
       // Support lexicographic sorting.
       bool operator<(const ELFSymbolData &RHS) const {
-        unsigned LHSType = MCELF::GetType(*SymbolData);
-        unsigned RHSType = MCELF::GetType(*RHS.SymbolData);
+        unsigned LHSType = MCELF::GetType(Symbol->getData());
+        unsigned RHSType = MCELF::GetType(RHS.Symbol->getData());
         if (LHSType == ELF::STT_SECTION && RHSType != ELF::STT_SECTION)
           return false;
         if (LHSType != ELF::STT_SECTION && RHSType == ELF::STT_SECTION)
@@ -146,9 +105,8 @@ class ELFObjectWriter : public MCObjectWriter {
     SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
     DenseMap<const MCSymbol *, const MCSymbol *> Renames;
 
-    llvm::DenseMap<const MCSectionData *, std::vector<ELFRelocationEntry>>
-    Relocations;
-    StringTableBuilder ShStrTabBuilder;
+    llvm::DenseMap<const MCSectionELF *, std::vector<ELFRelocationEntry>>
+        Relocations;
 
     /// @}
     /// @name Symbol Table Data
@@ -170,9 +128,12 @@ class ELFObjectWriter : public MCObjectWriter {
     unsigned StringTableIndex;
     // This holds the .symtab section index.
     unsigned SymbolTableIndex;
+    // This holds the .symtab_shndx section index.
+    unsigned SymtabShndxSectionIndex = 0;
 
-    unsigned ShstrtabIndex;
-
+    // Sections in the order they are to be output in the section table.
+    std::vector<const MCSectionELF *> SectionTable;
+    unsigned addToSectionTable(const MCSectionELF *Sec);
 
     // TargetObjectWriter wrappers.
     bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
@@ -185,12 +146,27 @@ class ELFObjectWriter : public MCObjectWriter {
     }
 
   public:
-    ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_ostream &_OS,
+    ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_pwrite_stream &OS,
                     bool IsLittleEndian)
-        : MCObjectWriter(_OS, IsLittleEndian), FWriter(IsLittleEndian),
-          TargetObjectWriter(MOTW), NeedsGOT(false) {}
+        : MCObjectWriter(OS, IsLittleEndian), TargetObjectWriter(MOTW),
+          NeedsGOT(false) {}
+
+    void reset() override {
+      UsedInReloc.clear();
+      WeakrefUsedInReloc.clear();
+      Renames.clear();
+      Relocations.clear();
+      StrTabBuilder.clear();
+      FileSymbolData.clear();
+      LocalSymbolData.clear();
+      ExternalSymbolData.clear();
+      UndefinedSymbolData.clear();
+      NeedsGOT = false;
+      SectionTable.clear();
+      MCObjectWriter::reset();
+    }
 
-    virtual ~ELFObjectWriter();
+    ~ELFObjectWriter() override;
 
     void WriteWord(uint64_t W) {
       if (is64Bit())
@@ -199,27 +175,31 @@ class ELFObjectWriter : public MCObjectWriter {
         Write32(W);
     }
 
-    template <typename T> void write(MCDataFragment &F, T Value) {
-      FWriter.write(F, Value);
+    template <typename T> void write(T Val) {
+      if (IsLittleEndian)
+        support::endian::Writer<support::little>(OS).write(Val);
+      else
+        support::endian::Writer<support::big>(OS).write(Val);
     }
 
-    void WriteHeader(const MCAssembler &Asm,
-                     uint64_t SectionDataSize,
-                     unsigned NumberOfSections);
+    void writeHeader(const MCAssembler &Asm);
 
     void WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
                      const MCAsmLayout &Layout);
 
-    void WriteSymbolTable(MCDataFragment *SymtabF, MCAssembler &Asm,
-                          const MCAsmLayout &Layout,
-                          SectionIndexMapTy &SectionIndexMap);
+    // Start and end offset of each section
+    typedef std::map<const MCSectionELF *, std::pair<uint64_t, uint64_t>>
+        SectionOffsetsTy;
+
+    void writeSymbolTable(MCContext &Ctx, const MCAsmLayout &Layout,
+                          SectionOffsetsTy &SectionOffsets);
 
     bool shouldRelocateWithSymbol(const MCAssembler &Asm,
                                   const MCSymbolRefExpr *RefA,
-                                  const MCSymbolData *SD, uint64_t C,
+                                  const MCSymbol *Sym, uint64_t C,
                                   unsigned Type) const;
 
-    void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+    void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
                           const MCFragment *Fragment, const MCFixup &Fixup,
                           MCValue Target, bool &IsPCRel,
                           uint64_t &FixedValue) override;
@@ -227,129 +207,74 @@ class ELFObjectWriter : public MCObjectWriter {
     uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
                                          const MCSymbol *S);
 
-    // Map from a group section to the signature symbol
-    typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
-    // Map from a signature symbol to the group section
-    typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
-    // Map from a section to the section with the relocations
-    typedef DenseMap<const MCSectionELF*, const MCSectionELF*> RelMapTy;
-    // Map from a section to its offset
-    typedef DenseMap<const MCSectionELF*, uint64_t> SectionOffsetMapTy;
+    // Map from a signature symbol to the group section index
+    typedef DenseMap<const MCSymbol *, unsigned> RevGroupMapTy;
 
     /// Compute the symbol table data
     ///
     /// \param Asm - The assembler.
     /// \param SectionIndexMap - Maps a section to its index.
     /// \param RevGroupMap - Maps a signature symbol to the group section.
-    /// \param NumRegularSections - Number of non-relocation sections.
     void computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
                             const SectionIndexMapTy &SectionIndexMap,
-                            const RevGroupMapTy &RevGroupMap,
-                            unsigned NumRegularSections);
+                            const RevGroupMapTy &RevGroupMap);
 
-    void ComputeIndexMap(MCAssembler &Asm,
-                         SectionIndexMapTy &SectionIndexMap,
-                         const RelMapTy &RelMap);
+    MCSectionELF *createRelocationSection(MCContext &Ctx,
+                                          const MCSectionELF &Sec);
 
-    void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout,
-                                  RelMapTy &RelMap);
-
-    void CompressDebugSections(MCAssembler &Asm, MCAsmLayout &Layout);
-
-    void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
-                          const RelMapTy &RelMap);
-
-    void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
-                                SectionIndexMapTy &SectionIndexMap,
-                                const RelMapTy &RelMap);
-
-    // Create the sections that show up in the symbol table. Currently
-    // those are the .note.GNU-stack section and the group sections.
-    void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
-                               GroupMapTy &GroupMap,
-                               RevGroupMapTy &RevGroupMap,
-                               SectionIndexMapTy &SectionIndexMap,
-                               const RelMapTy &RelMap);
+    const MCSectionELF *createStringTable(MCContext &Ctx);
 
     void ExecutePostLayoutBinding(MCAssembler &Asm,
                                   const MCAsmLayout &Layout) override;
 
-    void WriteSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap,
-                            const MCAsmLayout &Layout,
+    void writeSectionHeader(const MCAssembler &Asm, const MCAsmLayout &Layout,
                             const SectionIndexMapTy &SectionIndexMap,
-                            const SectionOffsetMapTy &SectionOffsetMap);
+                            const SectionOffsetsTy &SectionOffsets);
 
-    void ComputeSectionOrder(MCAssembler &Asm,
-                             std::vector<const MCSectionELF*> &Sections);
+    void writeSectionData(const MCAssembler &Asm, MCSection &Sec,
+                          const MCAsmLayout &Layout);
 
     void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
-                          uint64_t Address, uint64_t Offset,
-                          uint64_t Size, uint32_t Link, uint32_t Info,
-                          uint64_t Alignment, uint64_t EntrySize);
+                          uint64_t Address, uint64_t Offset, uint64_t Size,
+                          uint32_t Link, uint32_t Info, uint64_t Alignment,
+                          uint64_t EntrySize);
 
-    void WriteRelocationsFragment(const MCAssembler &Asm,
-                                  MCDataFragment *F,
-                                  const MCSectionData *SD);
+    void writeRelocations(const MCAssembler &Asm, const MCSectionELF &Sec);
 
-    bool
-    IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                           const MCSymbolData &DataA,
-                                           const MCFragment &FB,
-                                           bool InSet,
-                                           bool IsPCRel) const override;
+    bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                                const MCSymbol &SymA,
+                                                const MCFragment &FB,
+                                                bool InSet,
+                                                bool IsPCRel) const override;
+
+    bool isWeak(const MCSymbol &Sym) const override;
 
     void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
-    void WriteSection(MCAssembler &Asm,
-                      const SectionIndexMapTy &SectionIndexMap,
-                      uint32_t GroupSymbolIndex,
-                      uint64_t Offset, uint64_t Size, uint64_t Alignment,
+    void writeSection(const SectionIndexMapTy &SectionIndexMap,
+                      uint32_t GroupSymbolIndex, uint64_t Offset, uint64_t Size,
                       const MCSectionELF &Section);
   };
 }
 
-FragmentWriter::FragmentWriter(bool IsLittleEndian)
-    : IsLittleEndian(IsLittleEndian) {}
-
-template <typename T> void FragmentWriter::write(MCDataFragment &F, T Val) {
-  if (IsLittleEndian)
-    Val = support::endian::byte_swap<T, support::little>(Val);
-  else
-    Val = support::endian::byte_swap<T, support::big>(Val);
-  const char *Start = (const char *)&Val;
-  F.getContents().append(Start, Start + sizeof(T));
+unsigned ELFObjectWriter::addToSectionTable(const MCSectionELF *Sec) {
+  SectionTable.push_back(Sec);
+  StrTabBuilder.add(Sec->getSectionName());
+  return SectionTable.size();
 }
 
 void SymbolTableWriter::createSymtabShndx() {
-  if (ShndxF)
+  if (!ShndxIndexes.empty())
     return;
 
-  MCContext &Ctx = Asm.getContext();
-  const MCSectionELF *SymtabShndxSection =
-      Ctx.getELFSection(".symtab_shndxr", ELF::SHT_SYMTAB_SHNDX, 0,
-                        SectionKind::getReadOnly(), 4, "");
-  MCSectionData *SymtabShndxSD =
-      &Asm.getOrCreateSectionData(*SymtabShndxSection);
-  SymtabShndxSD->setAlignment(4);
-  ShndxF = new MCDataFragment(SymtabShndxSD);
-  unsigned Index = SectionIndexMap.size() + 1;
-  SectionIndexMap[SymtabShndxSection] = Index;
-
-  for (unsigned I = 0; I < NumWritten; ++I)
-    write(*ShndxF, uint32_t(0));
+  ShndxIndexes.resize(NumWritten);
 }
 
-template <typename T>
-void SymbolTableWriter::write(MCDataFragment &F, T Value) {
-  FWriter.write(F, Value);
+template <typename T> void SymbolTableWriter::write(T Value) {
+  EWriter.write(Value);
 }
 
-SymbolTableWriter::SymbolTableWriter(MCAssembler &Asm, FragmentWriter &FWriter,
-                                     bool Is64Bit,
-                                     SectionIndexMapTy &SectionIndexMap,
-                                     MCDataFragment *SymtabF)
-    : Asm(Asm), FWriter(FWriter), Is64Bit(Is64Bit),
-      SectionIndexMap(SectionIndexMap), SymtabF(SymtabF), ShndxF(nullptr),
-      NumWritten(0) {}
+SymbolTableWriter::SymbolTableWriter(ELFObjectWriter &EWriter, bool Is64Bit)
+    : EWriter(EWriter), Is64Bit(Is64Bit), NumWritten(0) {}
 
 void SymbolTableWriter::writeSymbol(uint32_t name, uint8_t info, uint64_t value,
                                     uint64_t size, uint8_t other,
@@ -359,31 +284,29 @@ void SymbolTableWriter::writeSymbol(uint32_t name, uint8_t info, uint64_t value,
   if (LargeIndex)
     createSymtabShndx();
 
-  if (ShndxF) {
+  if (!ShndxIndexes.empty()) {
     if (LargeIndex)
-      write(*ShndxF, shndx);
+      ShndxIndexes.push_back(shndx);
     else
-      write(*ShndxF, uint32_t(0));
+      ShndxIndexes.push_back(0);
   }
 
   uint16_t Index = LargeIndex ? uint16_t(ELF::SHN_XINDEX) : shndx;
 
-  raw_svector_ostream OS(SymtabF->getContents());
-
   if (Is64Bit) {
-    write(*SymtabF, name);  // st_name
-    write(*SymtabF, info);  // st_info
-    write(*SymtabF, other); // st_other
-    write(*SymtabF, Index); // st_shndx
-    write(*SymtabF, value); // st_value
-    write(*SymtabF, size);  // st_size
+    write(name);  // st_name
+    write(info);  // st_info
+    write(other); // st_other
+    write(Index); // st_shndx
+    write(value); // st_value
+    write(size);  // st_size
   } else {
-    write(*SymtabF, name);            // st_name
-    write(*SymtabF, uint32_t(value)); // st_value
-    write(*SymtabF, uint32_t(size));  // st_size
-    write(*SymtabF, info);            // st_info
-    write(*SymtabF, other);           // st_other
-    write(*SymtabF, Index);           // st_shndx
+    write(name);            // st_name
+    write(uint32_t(value)); // st_value
+    write(uint32_t(size));  // st_size
+    write(info);            // st_info
+    write(other);           // st_other
+    write(Index);           // st_shndx
   }
 
   ++NumWritten;
@@ -421,9 +344,7 @@ ELFObjectWriter::~ELFObjectWriter()
 {}
 
 // Emit the ELF header.
-void ELFObjectWriter::WriteHeader(const MCAssembler &Asm,
-                                  uint64_t SectionDataSize,
-                                  unsigned NumberOfSections) {
+void ELFObjectWriter::writeHeader(const MCAssembler &Asm) {
   // ELF Header
   // ----------
   //
@@ -432,10 +353,7 @@ void ELFObjectWriter::WriteHeader(const MCAssembler &Asm,
   // emitWord method behaves differently for ELF32 and ELF64, writing
   // 4 bytes in the former and 8 in the latter.
 
-  Write8(0x7f); // e_ident[EI_MAG0]
-  Write8('E');  // e_ident[EI_MAG1]
-  Write8('L');  // e_ident[EI_MAG2]
-  Write8('F');  // e_ident[EI_MAG3]
+  WriteBytes(ELF::ElfMagic); // e_ident[EI_MAG0] to e_ident[EI_MAG3]
 
   Write8(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
 
@@ -456,8 +374,7 @@ void ELFObjectWriter::WriteHeader(const MCAssembler &Asm,
   Write32(ELF::EV_CURRENT);         // e_version
   WriteWord(0);                    // e_entry, no entry point in .o file
   WriteWord(0);                    // e_phoff, no program header for .o
-  WriteWord(SectionDataSize + (is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
-            sizeof(ELF::Elf32_Ehdr)));  // e_shoff = sec hdr table off in bytes
+  WriteWord(0);                     // e_shoff = sec hdr table off in bytes
 
   // e_flags = whatever the target wants
   Write32(Asm.getELFHeaderEFlags());
@@ -472,28 +389,24 @@ void ELFObjectWriter::WriteHeader(const MCAssembler &Asm,
   Write16(is64Bit() ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr));
 
   // e_shnum     = # of section header ents
-  if (NumberOfSections >= ELF::SHN_LORESERVE)
-    Write16(ELF::SHN_UNDEF);
-  else
-    Write16(NumberOfSections);
+  Write16(0);
 
   // e_shstrndx  = Section # of '.shstrtab'
-  if (ShstrtabIndex >= ELF::SHN_LORESERVE)
-    Write16(ELF::SHN_XINDEX);
-  else
-    Write16(ShstrtabIndex);
+  assert(StringTableIndex < ELF::SHN_LORESERVE);
+  Write16(StringTableIndex);
 }
 
-uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data,
+uint64_t ELFObjectWriter::SymbolValue(const MCSymbol &Sym,
                                       const MCAsmLayout &Layout) {
+  MCSymbolData &Data = Sym.getData();
   if (Data.isCommon() && Data.isExternal())
     return Data.getCommonAlignment();
 
   uint64_t Res;
-  if (!Layout.getSymbolOffset(&Data, Res))
+  if (!Layout.getSymbolOffset(Sym, Res))
     return 0;
 
-  if (Layout.getAssembler().isThumbFunc(&Data.getSymbol()))
+  if (Layout.getAssembler().isThumbFunc(&Sym))
     Res |= 1;
 
   return Res;
@@ -504,8 +417,8 @@ void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
   // The presence of symbol versions causes undefined symbols and
   // versions declared with @@@ to be renamed.
 
-  for (MCSymbolData &OriginalData : Asm.symbols()) {
-    const MCSymbol &Alias = OriginalData.getSymbol();
+  for (const MCSymbol &Alias : Asm.symbols()) {
+    MCSymbolData &OriginalData = Alias.getData();
 
     // Not an alias.
     if (!Alias.isVariable())
@@ -576,12 +489,11 @@ static uint8_t mergeTypeForSet(uint8_t origType, uint8_t newType) {
 
 void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
                                   const MCAsmLayout &Layout) {
-  MCSymbolData &OrigData = *MSD.SymbolData;
+  MCSymbolData &OrigData = MSD.Symbol->getData();
   assert((!OrigData.getFragment() ||
-          (&OrigData.getFragment()->getParent()->getSection() ==
-           &OrigData.getSymbol().getSection())) &&
+          (OrigData.getFragment()->getParent() == &MSD.Symbol->getSection())) &&
          "The symbol's section doesn't match the fragment's symbol");
-  const MCSymbol *Base = Layout.getBaseSymbol(OrigData.getSymbol());
+  const MCSymbol *Base = Layout.getBaseSymbol(*MSD.Symbol);
 
   // This has to be in sync with when computeSymbolTable uses SHN_ABS or
   // SHN_COMMON.
@@ -603,7 +515,7 @@ void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
   uint8_t Other = MCELF::getOther(OrigData) << (ELF_STO_Shift - ELF_STV_Shift);
   Other |= Visibility;
 
-  uint64_t Value = SymbolValue(OrigData, Layout);
+  uint64_t Value = SymbolValue(*MSD.Symbol, Layout);
   uint64_t Size = 0;
 
   const MCExpr *ESize = OrigData.getSize();
@@ -612,7 +524,7 @@ void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
 
   if (ESize) {
     int64_t Res;
-    if (!ESize->EvaluateAsAbsolute(Res, Layout))
+    if (!ESize->evaluateKnownAbsolute(Res, Layout))
       report_fatal_error("Size expression must be absolute.");
     Size = Res;
   }
@@ -622,16 +534,21 @@ void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
                      MSD.SectionIndex, IsReserved);
 }
 
-void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
-                                       MCAssembler &Asm,
+void ELFObjectWriter::writeSymbolTable(MCContext &Ctx,
                                        const MCAsmLayout &Layout,
-                                       SectionIndexMapTy &SectionIndexMap) {
+                                       SectionOffsetsTy &SectionOffsets) {
+  const MCSectionELF *SymtabSection = SectionTable[SymbolTableIndex - 1];
+
   // The string table must be emitted first because we need the index
   // into the string table for all the symbol names.
 
-  // FIXME: Make sure the start of the symbol table is aligned.
+  SymbolTableWriter Writer(*this, is64Bit());
 
-  SymbolTableWriter Writer(Asm, FWriter, is64Bit(), SectionIndexMap, SymtabF);
+  uint64_t Padding =
+      OffsetToAlignment(OS.tell(), SymtabSection->getAlignment());
+  WriteZeros(Padding);
+
+  uint64_t SecStart = OS.tell();
 
   // The first entry is the undefined symbol entry.
   Writer.writeSymbol(0, 0, 0, 0, 0, 0, false);
@@ -651,7 +568,7 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
 
   for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) {
     ELFSymbolData &MSD = ExternalSymbolData[i];
-    MCSymbolData &Data = *MSD.SymbolData;
+    MCSymbolData &Data = MSD.Symbol->getData();
     assert(((Data.getFlags() & ELF_STB_Global) ||
             (Data.getFlags() & ELF_STB_Weak)) &&
            "External symbol requires STB_GLOBAL or STB_WEAK flag");
@@ -662,11 +579,29 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
 
   for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) {
     ELFSymbolData &MSD = UndefinedSymbolData[i];
-    MCSymbolData &Data = *MSD.SymbolData;
+    MCSymbolData &Data = MSD.Symbol->getData();
     WriteSymbol(Writer, MSD, Layout);
     if (MCELF::GetBinding(Data) == ELF::STB_LOCAL)
       LastLocalSymbolIndex++;
   }
+
+  uint64_t SecEnd = OS.tell();
+  SectionOffsets[SymtabSection] = std::make_pair(SecStart, SecEnd);
+
+  ArrayRef<uint32_t> ShndxIndexes = Writer.getShndxIndexes();
+  if (ShndxIndexes.empty()) {
+    assert(SymtabShndxSectionIndex == 0);
+    return;
+  }
+  assert(SymtabShndxSectionIndex != 0);
+
+  SecStart = OS.tell();
+  const MCSectionELF *SymtabShndxSection =
+      SectionTable[SymtabShndxSectionIndex - 1];
+  for (uint32_t Index : ShndxIndexes)
+    write(Index);
+  SecEnd = OS.tell();
+  SectionOffsets[SymtabShndxSection] = std::make_pair(SecStart, SecEnd);
 }
 
 // It is always valid to create a relocation with a symbol. It is preferable
@@ -674,9 +609,10 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
 // allows us to omit some local symbols from the symbol table.
 bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
                                                const MCSymbolRefExpr *RefA,
-                                               const MCSymbolData *SD,
-                                               uint64_t C,
+                                               const MCSymbol *Sym, uint64_t C,
                                                unsigned Type) const {
+  MCSymbolData *SD = Sym ? &Sym->getData() : nullptr;
+
   // A PCRel relocation to an absolute value has no symbol (or section). We
   // represent that with a relocation to a null section.
   if (!RefA)
@@ -711,8 +647,8 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
 
   // An undefined symbol is not in any section, so the relocation has to point
   // to the symbol itself.
-  const MCSymbol &Sym = SD->getSymbol();
-  if (Sym.isUndefined())
+  assert(Sym && "Expected a symbol");
+  if (Sym->isUndefined())
     return true;
 
   unsigned Binding = MCELF::GetBinding(*SD);
@@ -739,7 +675,7 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
   // If we change such a relocation to use the section, the linker would think
   // that it pointed to another string and subtracting 42 at runtime will
   // produce the wrong value.
-  auto &Sec = cast<MCSectionELF>(Sym.getSection());
+  auto &Sec = cast<MCSectionELF>(Sym->getSection());
   unsigned Flags = Sec.getFlags();
   if (Flags & ELF::SHF_MERGE) {
     if (C != 0)
@@ -762,7 +698,7 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
   // bit. With a symbol that is done by just having the symbol have that bit
   // set, so we would lose the bit if we relocated with the section.
   // FIXME: We could use the section but add the bit to the relocation value.
-  if (Asm.isThumbFunc(&Sym))
+  if (Asm.isThumbFunc(Sym))
     return true;
 
   if (TargetObjectWriter->needsRelocateWithSymbol(*SD, Type))
@@ -789,14 +725,32 @@ static const MCSymbol *getWeakRef(const MCSymbolRefExpr &Ref) {
   return nullptr;
 }
 
-void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+// True if the assembler knows nothing about the final value of the symbol.
+// This doesn't cover the comdat issues, since in those cases the assembler
+// can at least know that all symbols in the section will move together.
+static bool isWeak(const MCSymbolData &D) {
+  if (MCELF::GetType(D) == ELF::STT_GNU_IFUNC)
+    return true;
+
+  switch (MCELF::GetBinding(D)) {
+  default:
+    llvm_unreachable("Unknown binding");
+  case ELF::STB_LOCAL:
+    return false;
+  case ELF::STB_GLOBAL:
+    return false;
+  case ELF::STB_WEAK:
+  case ELF::STB_GNU_UNIQUE:
+    return true;
+  }
+}
+
+void ELFObjectWriter::RecordRelocation(MCAssembler &Asm,
                                        const MCAsmLayout &Layout,
                                        const MCFragment *Fragment,
-                                       const MCFixup &Fixup,
-                                       MCValue Target,
-                                       bool &IsPCRel,
-                                       uint64_t &FixedValue) {
-  const MCSectionData *FixupSection = Fragment->getParent();
+                                       const MCFixup &Fixup, MCValue Target,
+                                       bool &IsPCRel, uint64_t &FixedValue) {
+  const MCSectionELF &FixupSection = cast<MCSectionELF>(*Fragment->getParent());
   uint64_t C = Target.getConstant();
   uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
 
@@ -812,26 +766,29 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
     // or (A + C - R). If B = R + K and the relocation is not pcrel, we can
     // replace B to implement it: (A - R - K + C)
     if (IsPCRel)
-      Asm.getContext().FatalError(
+      Asm.getContext().reportFatalError(
           Fixup.getLoc(),
           "No relocation available to represent this relative expression");
 
     const MCSymbol &SymB = RefB->getSymbol();
 
     if (SymB.isUndefined())
-      Asm.getContext().FatalError(
+      Asm.getContext().reportFatalError(
           Fixup.getLoc(),
           Twine("symbol '") + SymB.getName() +
               "' can not be undefined in a subtraction expression");
 
     assert(!SymB.isAbsolute() && "Should have been folded");
     const MCSection &SecB = SymB.getSection();
-    if (&SecB != &FixupSection->getSection())
-      Asm.getContext().FatalError(
+    if (&SecB != &FixupSection)
+      Asm.getContext().reportFatalError(
           Fixup.getLoc(), "Cannot represent a difference across sections");
 
-    const MCSymbolData &SymBD = Asm.getSymbolData(SymB);
-    uint64_t SymBOffset = Layout.getSymbolOffset(&SymBD);
+    if (::isWeak(SymB.getData()))
+      Asm.getContext().reportFatalError(
+          Fixup.getLoc(), "Cannot represent a subtraction with a weak symbol");
+
+    uint64_t SymBOffset = Layout.getSymbolOffset(SymB);
     uint64_t K = SymBOffset - FixupOffset;
     IsPCRel = true;
     C -= K;
@@ -840,12 +797,11 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
   // We either rejected the fixup or folded B into C at this point.
   const MCSymbolRefExpr *RefA = Target.getSymA();
   const MCSymbol *SymA = RefA ? &RefA->getSymbol() : nullptr;
-  const MCSymbolData *SymAD = SymA ? &Asm.getSymbolData(*SymA) : nullptr;
 
   unsigned Type = GetRelocType(Target, Fixup, IsPCRel);
-  bool RelocateWithSymbol = shouldRelocateWithSymbol(Asm, RefA, SymAD, C, Type);
+  bool RelocateWithSymbol = shouldRelocateWithSymbol(Asm, RefA, SymA, C, Type);
   if (!RelocateWithSymbol && SymA && !SymA->isUndefined())
-    C += Layout.getSymbolOffset(SymAD);
+    C += Layout.getSymbolOffset(*SymA);
 
   uint64_t Addend = 0;
   if (hasRelocationAddend()) {
@@ -869,7 +825,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
         ELFSec ? Asm.getContext().getOrCreateSectionSymbol(*ELFSec)
                : nullptr;
     ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend);
-    Relocations[FixupSection].push_back(Rec);
+    Relocations[&FixupSection].push_back(Rec);
     return;
   }
 
@@ -883,7 +839,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
       UsedInReloc.insert(SymA);
   }
   ELFRelocationEntry Rec(FixupOffset, SymA, Type, Addend);
-  Relocations[FixupSection].push_back(Rec);
+  Relocations[&FixupSection].push_back(Rec);
   return;
 }
 
@@ -891,14 +847,14 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
 uint64_t
 ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
                                              const MCSymbol *S) {
-  const MCSymbolData &SD = Asm.getSymbolData(*S);
-  return SD.getIndex();
+  assert(S->hasData());
+  return S->getIndex();
 }
 
 bool ELFObjectWriter::isInSymtab(const MCAsmLayout &Layout,
-                                 const MCSymbolData &Data, bool Used,
+                                 const MCSymbol &Symbol, bool Used,
                                  bool Renamed) {
-  const MCSymbol &Symbol = Data.getSymbol();
+  const MCSymbolData &Data = Symbol.getData();
   if (Symbol.isVariable()) {
     const MCExpr *Expr = Symbol.getVariableValue();
     if (const MCSymbolRefExpr *Ref = dyn_cast<MCSymbolRefExpr>(Expr)) {
@@ -932,11 +888,11 @@ bool ELFObjectWriter::isInSymtab(const MCAsmLayout &Layout,
   return true;
 }
 
-bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isUsedInReloc) {
+bool ELFObjectWriter::isLocal(const MCSymbol &Symbol, bool isUsedInReloc) {
+  const MCSymbolData &Data = Symbol.getData();
   if (Data.isExternal())
     return false;
 
-  const MCSymbol &Symbol = Data.getSymbol();
   if (Symbol.isDefined())
     return true;
 
@@ -946,69 +902,48 @@ bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isUsedInReloc) {
   return true;
 }
 
-void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm,
-                                      SectionIndexMapTy &SectionIndexMap,
-                                      const RelMapTy &RelMap) {
-  unsigned Index = 1;
-  for (MCAssembler::iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF &>(it->getSection());
-    if (Section.getType() != ELF::SHT_GROUP)
-      continue;
-    SectionIndexMap[&Section] = Index++;
-  }
-
-  for (MCAssembler::iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF &>(it->getSection());
-    if (Section.getType() == ELF::SHT_GROUP ||
-        Section.getType() == ELF::SHT_REL ||
-        Section.getType() == ELF::SHT_RELA)
-      continue;
-    SectionIndexMap[&Section] = Index++;
-    const MCSectionELF *RelSection = RelMap.lookup(&Section);
-    if (RelSection)
-      SectionIndexMap[RelSection] = Index++;
-  }
-}
+void ELFObjectWriter::computeSymbolTable(
+    MCAssembler &Asm, const MCAsmLayout &Layout,
+    const SectionIndexMapTy &SectionIndexMap,
+    const RevGroupMapTy &RevGroupMap) {
+  MCContext &Ctx = Asm.getContext();
+  // Symbol table
+  unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
+  MCSectionELF *SymtabSection =
+      Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0, EntrySize, "");
+  SymtabSection->setAlignment(is64Bit() ? 8 : 4);
+  SymbolTableIndex = addToSectionTable(SymtabSection);
 
-void
-ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
-                                    const SectionIndexMapTy &SectionIndexMap,
-                                    const RevGroupMapTy &RevGroupMap,
-                                    unsigned NumRegularSections) {
   // FIXME: Is this the correct place to do this?
   // FIXME: Why is an undefined reference to _GLOBAL_OFFSET_TABLE_ needed?
   if (NeedsGOT) {
     StringRef Name = "_GLOBAL_OFFSET_TABLE_";
-    MCSymbol *Sym = Asm.getContext().GetOrCreateSymbol(Name);
+    MCSymbol *Sym = Asm.getContext().getOrCreateSymbol(Name);
     MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym);
     Data.setExternal(true);
     MCELF::SetBinding(Data, ELF::STB_GLOBAL);
   }
 
   // Add the data for the symbols.
-  for (MCSymbolData &SD : Asm.symbols()) {
-    const MCSymbol &Symbol = SD.getSymbol();
+  bool HasLargeSectionIndex = false;
+  for (const MCSymbol &Symbol : Asm.symbols()) {
+    MCSymbolData &SD = Symbol.getData();
 
     bool Used = UsedInReloc.count(&Symbol);
     bool WeakrefUsed = WeakrefUsedInReloc.count(&Symbol);
     bool isSignature = RevGroupMap.count(&Symbol);
 
-    if (!isInSymtab(Layout, SD,
-                    Used || WeakrefUsed || isSignature,
+    if (!isInSymtab(Layout, Symbol, Used || WeakrefUsed || isSignature,
                     Renames.count(&Symbol)))
       continue;
 
     ELFSymbolData MSD;
-    MSD.SymbolData = &SD;
+    MSD.Symbol = &Symbol;
     const MCSymbol *BaseSymbol = Layout.getBaseSymbol(Symbol);
 
     // Undefined symbols are global, but this is the first place we
     // are able to set it.
-    bool Local = isLocal(SD, Used);
+    bool Local = isLocal(Symbol, Used);
     if (!Local && MCELF::GetBinding(SD) == ELF::STB_LOCAL) {
       assert(BaseSymbol);
       MCSymbolData &BaseData = Asm.getSymbolData(*BaseSymbol);
@@ -1022,10 +957,13 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
       assert(!Local);
       MSD.SectionIndex = ELF::SHN_COMMON;
     } else if (BaseSymbol->isUndefined()) {
-      if (isSignature && !Used)
-        MSD.SectionIndex = SectionIndexMap.lookup(RevGroupMap.lookup(&Symbol));
-      else
+      if (isSignature && !Used) {
+        MSD.SectionIndex = RevGroupMap.lookup(&Symbol);
+        if (MSD.SectionIndex >= ELF::SHN_LORESERVE)
+          HasLargeSectionIndex = true;
+      } else {
         MSD.SectionIndex = ELF::SHN_UNDEF;
+      }
       if (!Used && WeakrefUsed)
         MCELF::SetBinding(SD, ELF::STB_WEAK);
     } else {
@@ -1033,18 +971,47 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
         static_cast<const MCSectionELF&>(BaseSymbol->getSection());
       MSD.SectionIndex = SectionIndexMap.lookup(&Section);
       assert(MSD.SectionIndex && "Invalid section index!");
+      if (MSD.SectionIndex >= ELF::SHN_LORESERVE)
+        HasLargeSectionIndex = true;
     }
 
-    // The @@@ in symbol version is replaced with @ in undefined symbols and
-    // @@ in defined ones.
+    // The @@@ in symbol version is replaced with @ in undefined symbols and @@
+    // in defined ones.
+    //
+    // FIXME: All name handling should be done before we get to the writer,
+    // including dealing with GNU-style version suffixes.  Fixing this isn't
+    // trivial.
+    //
+    // We thus have to be careful to not perform the symbol version replacement
+    // blindly:
+    //
+    // The ELF format is used on Windows by the MCJIT engine.  Thus, on
+    // Windows, the ELFObjectWriter can encounter symbols mangled using the MS
+    // Visual Studio C++ name mangling scheme. Symbols mangled using the MSVC
+    // C++ name mangling can legally have "@@@" as a sub-string. In that case,
+    // the EFLObjectWriter should not interpret the "@@@" sub-string as
+    // specifying GNU-style symbol versioning. The ELFObjectWriter therefore
+    // checks for the MSVC C++ name mangling prefix which is either "?", "@?",
+    // "__imp_?" or "__imp_@?".
+    //
+    // It would have been interesting to perform the MS mangling prefix check
+    // only when the target triple is of the form *-pc-windows-elf. But, it
+    // seems that this information is not easily accessible from the
+    // ELFObjectWriter.
     StringRef Name = Symbol.getName();
-    SmallString<32> Buf;
-    size_t Pos = Name.find("@@@");
-    if (Pos != StringRef::npos) {
-      Buf += Name.substr(0, Pos);
-      unsigned Skip = MSD.SectionIndex == ELF::SHN_UNDEF ? 2 : 1;
-      Buf += Name.substr(Pos + Skip);
-      Name = Buf;
+    if (!Name.startswith("?") && !Name.startswith("@?") &&
+        !Name.startswith("__imp_?") && !Name.startswith("__imp_@?")) {
+      // This symbol isn't following the MSVC C++ name mangling convention. We
+      // can thus safely interpret the @@@ in symbol names as specifying symbol
+      // versioning.
+      SmallString<32> Buf;
+      size_t Pos = Name.find("@@@");
+      if (Pos != StringRef::npos) {
+        Buf += Name.substr(0, Pos);
+        unsigned Skip = MSD.SectionIndex == ELF::SHN_UNDEF ? 2 : 1;
+        Buf += Name.substr(Pos + Skip);
+        Name = Buf;
+      }
     }
 
     // Sections have their own string table
@@ -1059,6 +1026,13 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
       ExternalSymbolData.push_back(MSD);
   }
 
+  if (HasLargeSectionIndex) {
+    MCSectionELF *SymtabShndxSection =
+        Ctx.getELFSection(".symtab_shndxr", ELF::SHT_SYMTAB_SHNDX, 0, 4, "");
+    SymtabShndxSectionIndex = addToSectionTable(SymtabShndxSection);
+    SymtabShndxSection->setAlignment(4);
+  }
+
   for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i)
     StrTabBuilder.add(*i);
 
@@ -1068,7 +1042,7 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
     FileSymbolData.push_back(StrTabBuilder.getOffset(*i));
 
   for (ELFSymbolData &MSD : LocalSymbolData)
-    MSD.StringIndex = MCELF::GetType(*MSD.SymbolData) == ELF::STT_SECTION
+    MSD.StringIndex = MCELF::GetType(MSD.Symbol->getData()) == ELF::STT_SECTION
                           ? 0
                           : StrTabBuilder.getOffset(MSD.Name);
   for (ELFSymbolData &MSD : ExternalSymbolData)
@@ -1085,57 +1059,44 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
   // symbols with non-local bindings.
   unsigned Index = FileSymbolData.size() + 1;
   for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
-    LocalSymbolData[i].SymbolData->setIndex(Index++);
+    LocalSymbolData[i].Symbol->setIndex(Index++);
 
   for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
-    ExternalSymbolData[i].SymbolData->setIndex(Index++);
+    ExternalSymbolData[i].Symbol->setIndex(Index++);
   for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
-    UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+    UndefinedSymbolData[i].Symbol->setIndex(Index++);
 }
 
-void ELFObjectWriter::CreateRelocationSections(MCAssembler &Asm,
-                                               MCAsmLayout &Layout,
-                                               RelMapTy &RelMap) {
-  for (MCAssembler::const_iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionData &SD = *it;
-    if (Relocations[&SD].empty())
-      continue;
-
-    MCContext &Ctx = Asm.getContext();
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF&>(SD.getSection());
+MCSectionELF *
+ELFObjectWriter::createRelocationSection(MCContext &Ctx,
+                                         const MCSectionELF &Sec) {
+  if (Relocations[&Sec].empty())
+    return nullptr;
 
-    const StringRef SectionName = Section.getSectionName();
-    std::string RelaSectionName = hasRelocationAddend() ? ".rela" : ".rel";
-    RelaSectionName += SectionName;
+  const StringRef SectionName = Sec.getSectionName();
+  std::string RelaSectionName = hasRelocationAddend() ? ".rela" : ".rel";
+  RelaSectionName += SectionName;
 
-    unsigned EntrySize;
-    if (hasRelocationAddend())
-      EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela);
-    else
-      EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
+  unsigned EntrySize;
+  if (hasRelocationAddend())
+    EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela);
+  else
+    EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
 
-    unsigned Flags = 0;
-    StringRef Group = "";
-    if (Section.getFlags() & ELF::SHF_GROUP) {
-      Flags = ELF::SHF_GROUP;
-      Group = Section.getGroup()->getName();
-    }
+  unsigned Flags = 0;
+  if (Sec.getFlags() & ELF::SHF_GROUP)
+    Flags = ELF::SHF_GROUP;
 
-    const MCSectionELF *RelaSection =
-      Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ?
-                        ELF::SHT_RELA : ELF::SHT_REL, Flags,
-                        SectionKind::getReadOnly(),
-                        EntrySize, Group);
-    RelMap[&Section] = RelaSection;
-    Asm.getOrCreateSectionData(*RelaSection);
-  }
+  MCSectionELF *RelaSection = Ctx.createELFRelSection(
+      RelaSectionName, hasRelocationAddend() ? ELF::SHT_RELA : ELF::SHT_REL,
+      Flags, EntrySize, Sec.getGroup(), &Sec);
+  RelaSection->setAlignment(is64Bit() ? 8 : 4);
+  return RelaSection;
 }
 
 static SmallVector<char, 128>
-getUncompressedData(MCAsmLayout &Layout,
-                    MCSectionData::FragmentListType &Fragments) {
+getUncompressedData(const MCAsmLayout &Layout,
+                    const MCSection::FragmentListType &Fragments) {
   SmallVector<char, 128> UncompressedData;
   for (const MCFragment &F : Fragments) {
     const SmallVectorImpl<char> *Contents;
@@ -1164,7 +1125,7 @@ getUncompressedData(MCAsmLayout &Layout,
 static bool
 prependCompressionHeader(uint64_t Size,
                          SmallVectorImpl<char> &CompressedContents) {
-  static const StringRef Magic = "ZLIB";
+  const StringRef Magic = "ZLIB";
   if (Size <= Magic.size() + sizeof(Size) + CompressedContents.size())
     return false;
   if (sys::IsLittleEndianHost)
@@ -1178,123 +1139,41 @@ prependCompressionHeader(uint64_t Size,
   return true;
 }
 
-// Return a single fragment containing the compressed contents of the whole
-// section. Null if the section was not compressed for any reason.
-static std::unique_ptr<MCDataFragment>
-getCompressedFragment(MCAsmLayout &Layout,
-                      MCSectionData::FragmentListType &Fragments) {
-  std::unique_ptr<MCDataFragment> CompressedFragment(new MCDataFragment());
+void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
+                                       const MCAsmLayout &Layout) {
+  MCSectionELF &Section = static_cast<MCSectionELF &>(Sec);
+  StringRef SectionName = Section.getSectionName();
+
+  // Compressing debug_frame requires handling alignment fragments which is
+  // more work (possibly generalizing MCAssembler.cpp:writeFragment to allow
+  // for writing to arbitrary buffers) for little benefit.
+  if (!Asm.getContext().getAsmInfo()->compressDebugSections() ||
+      !SectionName.startswith(".debug_") || SectionName == ".debug_frame") {
+    Asm.writeSectionData(&Section, Layout);
+    return;
+  }
 
-  // Gather the uncompressed data from all the fragments, recording the
-  // alignment fragment, if seen, and any fixups.
+  // Gather the uncompressed data from all the fragments.
+  const MCSection::FragmentListType &Fragments = Section.getFragmentList();
   SmallVector<char, 128> UncompressedData =
       getUncompressedData(Layout, Fragments);
 
-  SmallVectorImpl<char> &CompressedContents = CompressedFragment->getContents();
-
+  SmallVector<char, 128> CompressedContents;
   zlib::Status Success = zlib::compress(
       StringRef(UncompressedData.data(), UncompressedData.size()),
       CompressedContents);
-  if (Success != zlib::StatusOK)
-    return nullptr;
-
-  if (!prependCompressionHeader(UncompressedData.size(), CompressedContents))
-    return nullptr;
-
-  return CompressedFragment;
-}
-
-typedef DenseMap<const MCSectionData *, std::vector<MCSymbolData *>>
-DefiningSymbolMap;
-
-static void UpdateSymbols(const MCAsmLayout &Layout,
-                          const std::vector<MCSymbolData *> &Symbols,
-                          MCFragment &NewFragment) {
-  for (MCSymbolData *Sym : Symbols) {
-    Sym->setOffset(Sym->getOffset() +
-                   Layout.getFragmentOffset(Sym->getFragment()));
-    Sym->setFragment(&NewFragment);
+  if (Success != zlib::StatusOK) {
+    Asm.writeSectionData(&Section, Layout);
+    return;
   }
-}
-
-static void CompressDebugSection(MCAssembler &Asm, MCAsmLayout &Layout,
-                                 const DefiningSymbolMap &DefiningSymbols,
-                                 const MCSectionELF &Section,
-                                 MCSectionData &SD) {
-  StringRef SectionName = Section.getSectionName();
-  MCSectionData::FragmentListType &Fragments = SD.getFragmentList();
-
-  std::unique_ptr<MCDataFragment> CompressedFragment =
-      getCompressedFragment(Layout, Fragments);
 
-  // Leave the section as-is if the fragments could not be compressed.
-  if (!CompressedFragment)
+  if (!prependCompressionHeader(UncompressedData.size(), CompressedContents)) {
+    Asm.writeSectionData(&Section, Layout);
     return;
-
-  // Update the fragment+offsets of any symbols referring to fragments in this
-  // section to refer to the new fragment.
-  auto I = DefiningSymbols.find(&SD);
-  if (I != DefiningSymbols.end())
-    UpdateSymbols(Layout, I->second, *CompressedFragment);
-
-  // Invalidate the layout for the whole section since it will have new and
-  // different fragments now.
-  Layout.invalidateFragmentsFrom(&Fragments.front());
-  Fragments.clear();
-
-  // Complete the initialization of the new fragment
-  CompressedFragment->setParent(&SD);
-  CompressedFragment->setLayoutOrder(0);
-  Fragments.push_back(CompressedFragment.release());
-
-  // Rename from .debug_* to .zdebug_*
+  }
   Asm.getContext().renameELFSection(&Section,
                                     (".z" + SectionName.drop_front(1)).str());
-}
-
-void ELFObjectWriter::CompressDebugSections(MCAssembler &Asm,
-                                            MCAsmLayout &Layout) {
-  if (!Asm.getContext().getAsmInfo()->compressDebugSections())
-    return;
-
-  DefiningSymbolMap DefiningSymbols;
-
-  for (MCSymbolData &SD : Asm.symbols())
-    if (MCFragment *F = SD.getFragment())
-      DefiningSymbols[F->getParent()].push_back(&SD);
-
-  for (MCSectionData &SD : Asm) {
-    const MCSectionELF &Section =
-        static_cast<const MCSectionELF &>(SD.getSection());
-    StringRef SectionName = Section.getSectionName();
-
-    // Compressing debug_frame requires handling alignment fragments which is
-    // more work (possibly generalizing MCAssembler.cpp:writeFragment to allow
-    // for writing to arbitrary buffers) for little benefit.
-    if (!SectionName.startswith(".debug_") || SectionName == ".debug_frame")
-      continue;
-
-    CompressDebugSection(Asm, Layout, DefiningSymbols, Section, SD);
-  }
-}
-
-void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
-                                       const RelMapTy &RelMap) {
-  for (MCAssembler::const_iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionData &SD = *it;
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF&>(SD.getSection());
-
-    const MCSectionELF *RelaSection = RelMap.lookup(&Section);
-    if (!RelaSection)
-      continue;
-    MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection);
-    RelaSD.setAlignment(is64Bit() ? 8 : 4);
-
-    MCDataFragment *F = new MCDataFragment(&RelaSD);
-    WriteRelocationsFragment(Asm, F, &*it);
-  }
+  OS << CompressedContents;
 }
 
 void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
@@ -1315,30 +1194,13 @@ void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
   WriteWord(EntrySize); // sh_entsize
 }
 
-// ELF doesn't require relocations to be in any order. We sort by the r_offset,
-// just to match gnu as for easier comparison. The use type is an arbitrary way
-// of making the sort deterministic.
-static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) {
-  const ELFRelocationEntry &A = *AP;
-  const ELFRelocationEntry &B = *BP;
-  if (A.Offset != B.Offset)
-    return B.Offset - A.Offset;
-  if (B.Type != A.Type)
-    return A.Type - B.Type;
-  llvm_unreachable("ELFRelocs might be unstable!");
-}
-
-static void sortRelocs(const MCAssembler &Asm,
-                       std::vector<ELFRelocationEntry> &Relocs) {
-  array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel);
-}
+void ELFObjectWriter::writeRelocations(const MCAssembler &Asm,
+                                       const MCSectionELF &Sec) {
+  std::vector<ELFRelocationEntry> &Relocs = Relocations[&Sec];
 
-void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
-                                               MCDataFragment *F,
-                                               const MCSectionData *SD) {
-  std::vector<ELFRelocationEntry> &Relocs = Relocations[SD];
-
-  sortRelocs(Asm, Relocs);
+  // Sort the relocation entries. Most targets just sort by Offset, but some
+  // (e.g., MIPS) have additional constraints.
+  TargetObjectWriter->sortRelocs(Asm, Relocs);
 
   for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
     const ELFRelocationEntry &Entry = Relocs[e - i - 1];
@@ -1346,171 +1208,59 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
         Entry.Symbol ? getSymbolIndexInSymbolTable(Asm, Entry.Symbol) : 0;
 
     if (is64Bit()) {
-      write(*F, Entry.Offset);
+      write(Entry.Offset);
       if (TargetObjectWriter->isN64()) {
-        write(*F, uint32_t(Index));
+        write(uint32_t(Index));
 
-        write(*F, TargetObjectWriter->getRSsym(Entry.Type));
-        write(*F, TargetObjectWriter->getRType3(Entry.Type));
-        write(*F, TargetObjectWriter->getRType2(Entry.Type));
-        write(*F, TargetObjectWriter->getRType(Entry.Type));
+        write(TargetObjectWriter->getRSsym(Entry.Type));
+        write(TargetObjectWriter->getRType3(Entry.Type));
+        write(TargetObjectWriter->getRType2(Entry.Type));
+        write(TargetObjectWriter->getRType(Entry.Type));
       } else {
         struct ELF::Elf64_Rela ERE64;
         ERE64.setSymbolAndType(Index, Entry.Type);
-        write(*F, ERE64.r_info);
+        write(ERE64.r_info);
       }
       if (hasRelocationAddend())
-        write(*F, Entry.Addend);
+        write(Entry.Addend);
     } else {
-      write(*F, uint32_t(Entry.Offset));
+      write(uint32_t(Entry.Offset));
 
       struct ELF::Elf32_Rela ERE32;
       ERE32.setSymbolAndType(Index, Entry.Type);
-      write(*F, ERE32.r_info);
+      write(ERE32.r_info);
 
       if (hasRelocationAddend())
-        write(*F, uint32_t(Entry.Addend));
+        write(uint32_t(Entry.Addend));
     }
   }
 }
 
-void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
-                                             MCAsmLayout &Layout,
-                                             SectionIndexMapTy &SectionIndexMap,
-                                             const RelMapTy &RelMap) {
-  MCContext &Ctx = Asm.getContext();
-  MCDataFragment *F;
-
-  unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
-
-  // We construct .shstrtab, .symtab and .strtab in this order to match gnu as.
-  const MCSectionELF *ShstrtabSection =
-    Ctx.getELFSection(".shstrtab", ELF::SHT_STRTAB, 0,
-                      SectionKind::getReadOnly());
-  MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection);
-  ShstrtabSD.setAlignment(1);
-
-  const MCSectionELF *SymtabSection =
-    Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
-                      SectionKind::getReadOnly(),
-                      EntrySize, "");
-  MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection);
-  SymtabSD.setAlignment(is64Bit() ? 8 : 4);
-
-  const MCSectionELF *StrtabSection;
-  StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0,
-                                    SectionKind::getReadOnly());
-  MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection);
-  StrtabSD.setAlignment(1);
-
-  ComputeIndexMap(Asm, SectionIndexMap, RelMap);
-
-  ShstrtabIndex = SectionIndexMap.lookup(ShstrtabSection);
-  SymbolTableIndex = SectionIndexMap.lookup(SymtabSection);
-  StringTableIndex = SectionIndexMap.lookup(StrtabSection);
-
-  // Symbol table
-  F = new MCDataFragment(&SymtabSD);
-  WriteSymbolTable(F, Asm, Layout, SectionIndexMap);
-
-  F = new MCDataFragment(&StrtabSD);
-  F->getContents().append(StrTabBuilder.data().begin(),
-                          StrTabBuilder.data().end());
-
-  F = new MCDataFragment(&ShstrtabSD);
-
-  // Section header string table.
-  for (auto it = Asm.begin(), ie = Asm.end(); it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF&>(it->getSection());
-    ShStrTabBuilder.add(Section.getSectionName());
-  }
-  ShStrTabBuilder.finalize(StringTableBuilder::ELF);
-  F->getContents().append(ShStrTabBuilder.data().begin(),
-                          ShStrTabBuilder.data().end());
-}
-
-void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
-                                            MCAsmLayout &Layout,
-                                            GroupMapTy &GroupMap,
-                                            RevGroupMapTy &RevGroupMap,
-                                            SectionIndexMapTy &SectionIndexMap,
-                                            const RelMapTy &RelMap) {
-  MCContext &Ctx = Asm.getContext();
-
-  // Build the groups
-  for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
-       it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF&>(it->getSection());
-    if (!(Section.getFlags() & ELF::SHF_GROUP))
-      continue;
-
-    const MCSymbol *SignatureSymbol = Section.getGroup();
-    Asm.getOrCreateSymbolData(*SignatureSymbol);
-    const MCSectionELF *&Group = RevGroupMap[SignatureSymbol];
-    if (!Group) {
-      Group = Ctx.CreateELFGroupSection();
-      MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
-      Data.setAlignment(4);
-      MCDataFragment *F = new MCDataFragment(&Data);
-      write(*F, uint32_t(ELF::GRP_COMDAT));
-    }
-    GroupMap[Group] = SignatureSymbol;
-  }
-
-  ComputeIndexMap(Asm, SectionIndexMap, RelMap);
-
-  // Add sections to the groups
-  for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
-       it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF&>(it->getSection());
-    if (!(Section.getFlags() & ELF::SHF_GROUP))
-      continue;
-    const MCSectionELF *Group = RevGroupMap[Section.getGroup()];
-    MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
-    // FIXME: we could use the previous fragment
-    MCDataFragment *F = new MCDataFragment(&Data);
-    uint32_t Index = SectionIndexMap.lookup(&Section);
-    write(*F, Index);
-  }
+const MCSectionELF *ELFObjectWriter::createStringTable(MCContext &Ctx) {
+  const MCSectionELF *StrtabSection = SectionTable[StringTableIndex - 1];
+  OS << StrTabBuilder.data();
+  return StrtabSection;
 }
 
-void ELFObjectWriter::WriteSection(MCAssembler &Asm,
-                                   const SectionIndexMapTy &SectionIndexMap,
-                                   uint32_t GroupSymbolIndex,
-                                   uint64_t Offset, uint64_t Size,
-                                   uint64_t Alignment,
-                                   const MCSectionELF &Section) {
+void ELFObjectWriter::writeSection(const SectionIndexMapTy &SectionIndexMap,
+                                   uint32_t GroupSymbolIndex, uint64_t Offset,
+                                   uint64_t Size, const MCSectionELF &Section) {
   uint64_t sh_link = 0;
   uint64_t sh_info = 0;
 
   switch(Section.getType()) {
-  case ELF::SHT_DYNAMIC:
-    sh_link = ShStrTabBuilder.getOffset(Section.getSectionName());
-    sh_info = 0;
+  default:
+    // Nothing to do.
     break;
 
+  case ELF::SHT_DYNAMIC:
+    llvm_unreachable("SHT_DYNAMIC in a relocatable object");
+
   case ELF::SHT_REL:
   case ELF::SHT_RELA: {
-    const MCSectionELF *SymtabSection;
-    const MCSectionELF *InfoSection;
-    SymtabSection = Asm.getContext().getELFSection(".symtab", ELF::SHT_SYMTAB,
-                                                   0,
-                                                   SectionKind::getReadOnly());
-    sh_link = SectionIndexMap.lookup(SymtabSection);
+    sh_link = SymbolTableIndex;
     assert(sh_link && ".symtab not found");
-
-    // Remove ".rel" and ".rela" prefixes.
-    unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5;
-    StringRef SectionName = Section.getSectionName().substr(SecNameLen);
-    StringRef GroupName =
-        Section.getGroup() ? Section.getGroup()->getName() : "";
-
-    InfoSection = Asm.getContext().getELFSection(SectionName, ELF::SHT_PROGBITS,
-                                                 0, SectionKind::getReadOnly(),
-                                                 0, GroupName);
+    const MCSectionELF *InfoSection = Section.getAssociatedSection();
     sh_info = SectionIndexMap.lookup(InfoSection);
     break;
   }
@@ -1525,286 +1275,225 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
     sh_link = SymbolTableIndex;
     break;
 
-  case ELF::SHT_PROGBITS:
-  case ELF::SHT_STRTAB:
-  case ELF::SHT_NOBITS:
-  case ELF::SHT_NOTE:
-  case ELF::SHT_NULL:
-  case ELF::SHT_ARM_ATTRIBUTES:
-  case ELF::SHT_INIT_ARRAY:
-  case ELF::SHT_FINI_ARRAY:
-  case ELF::SHT_PREINIT_ARRAY:
-  case ELF::SHT_X86_64_UNWIND:
-  case ELF::SHT_MIPS_REGINFO:
-  case ELF::SHT_MIPS_OPTIONS:
-  case ELF::SHT_MIPS_ABIFLAGS:
-    // Nothing to do.
-    break;
-
   case ELF::SHT_GROUP:
     sh_link = SymbolTableIndex;
     sh_info = GroupSymbolIndex;
     break;
-
-  default:
-    llvm_unreachable("FIXME: sh_type value not supported!");
   }
 
   if (TargetObjectWriter->getEMachine() == ELF::EM_ARM &&
-      Section.getType() == ELF::SHT_ARM_EXIDX) {
-    StringRef SecName(Section.getSectionName());
-    if (SecName == ".ARM.exidx") {
-      sh_link = SectionIndexMap.lookup(
-        Asm.getContext().getELFSection(".text",
-                                       ELF::SHT_PROGBITS,
-                                       ELF::SHF_EXECINSTR | ELF::SHF_ALLOC,
-                                       SectionKind::getText()));
-    } else if (SecName.startswith(".ARM.exidx")) {
-      StringRef GroupName =
-          Section.getGroup() ? Section.getGroup()->getName() : "";
-      sh_link = SectionIndexMap.lookup(Asm.getContext().getELFSection(
-          SecName.substr(sizeof(".ARM.exidx") - 1), ELF::SHT_PROGBITS,
-          ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, SectionKind::getText(), 0,
-          GroupName));
-    }
-  }
+      Section.getType() == ELF::SHT_ARM_EXIDX)
+    sh_link = SectionIndexMap.lookup(Section.getAssociatedSection());
 
-  WriteSecHdrEntry(ShStrTabBuilder.getOffset(Section.getSectionName()),
-                   Section.getType(),
-                   Section.getFlags(), 0, Offset, Size, sh_link, sh_info,
-                   Alignment, Section.getEntrySize());
+  WriteSecHdrEntry(StrTabBuilder.getOffset(Section.getSectionName()),
+                   Section.getType(), Section.getFlags(), 0, Offset, Size,
+                   sh_link, sh_info, Section.getAlignment(),
+                   Section.getEntrySize());
 }
 
-bool ELFObjectWriter::IsELFMetaDataSection(const MCSectionData &SD) {
-  return SD.getOrdinal() == ~UINT32_C(0) &&
-    !SD.getSection().isVirtualSection();
-}
-
-uint64_t ELFObjectWriter::DataSectionSize(const MCSectionData &SD) {
-  uint64_t Ret = 0;
-  for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
-       ++i) {
-    const MCFragment &F = *i;
-    assert(F.getKind() == MCFragment::FT_Data);
-    Ret += cast<MCDataFragment>(F).getContents().size();
-  }
-  return Ret;
-}
-
-uint64_t ELFObjectWriter::GetSectionFileSize(const MCAsmLayout &Layout,
-                                             const MCSectionData &SD) {
-  if (IsELFMetaDataSection(SD))
-    return DataSectionSize(SD);
-  return Layout.getSectionFileSize(&SD);
-}
-
-uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout,
-                                                const MCSectionData &SD) {
-  if (IsELFMetaDataSection(SD))
-    return DataSectionSize(SD);
-  return Layout.getSectionAddressSize(&SD);
-}
-
-void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
-                                           const MCAsmLayout &Layout,
-                                           const MCSectionELF &Section) {
-  const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
-
-  uint64_t Padding = OffsetToAlignment(OS.tell(), SD.getAlignment());
-  WriteZeros(Padding);
-
-  if (IsELFMetaDataSection(SD)) {
-    for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
-         ++i) {
-      const MCFragment &F = *i;
-      assert(F.getKind() == MCFragment::FT_Data);
-      WriteBytes(cast<MCDataFragment>(F).getContents());
-    }
-  } else {
-    Asm.writeSectionData(&SD, Layout);
-  }
-}
-
-void ELFObjectWriter::WriteSectionHeader(MCAssembler &Asm,
-                                         const GroupMapTy &GroupMap,
-                                         const MCAsmLayout &Layout,
-                                      const SectionIndexMapTy &SectionIndexMap,
-                                   const SectionOffsetMapTy &SectionOffsetMap) {
-  const unsigned NumSections = Asm.size() + 1;
-
-  std::vector<const MCSectionELF*> Sections;
-  Sections.resize(NumSections - 1);
-
-  for (SectionIndexMapTy::const_iterator i=
-         SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) {
-    const std::pair<const MCSectionELF*, uint32_t> &p = *i;
-    Sections[p.second - 1] = p.first;
-  }
+void ELFObjectWriter::writeSectionHeader(
+    const MCAssembler &Asm, const MCAsmLayout &Layout,
+    const SectionIndexMapTy &SectionIndexMap,
+    const SectionOffsetsTy &SectionOffsets) {
+  const unsigned NumSections = SectionTable.size();
 
   // Null section first.
   uint64_t FirstSectionSize =
-    NumSections >= ELF::SHN_LORESERVE ? NumSections : 0;
-  uint32_t FirstSectionLink =
-    ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0;
-  WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0);
-
-  for (unsigned i = 0; i < NumSections - 1; ++i) {
-    const MCSectionELF &Section = *Sections[i];
-    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+      (NumSections + 1) >= ELF::SHN_LORESERVE ? NumSections + 1 : 0;
+  WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, 0, 0, 0, 0);
+
+  for (const MCSectionELF *Section : SectionTable) {
     uint32_t GroupSymbolIndex;
-    if (Section.getType() != ELF::SHT_GROUP)
+    unsigned Type = Section->getType();
+    if (Type != ELF::SHT_GROUP)
       GroupSymbolIndex = 0;
     else
-      GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm,
-                                                     GroupMap.lookup(&Section));
+      GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm, Section->getGroup());
 
-    uint64_t Size = GetSectionAddressSize(Layout, SD);
-
-    WriteSection(Asm, SectionIndexMap, GroupSymbolIndex,
-                 SectionOffsetMap.lookup(&Section), Size,
-                 SD.getAlignment(), Section);
-  }
-}
-
-void ELFObjectWriter::ComputeSectionOrder(MCAssembler &Asm,
-                                  std::vector<const MCSectionELF*> &Sections) {
-  for (MCAssembler::iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF &>(it->getSection());
-    if (Section.getType() == ELF::SHT_GROUP)
-      Sections.push_back(&Section);
-  }
-
-  for (MCAssembler::iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF &>(it->getSection());
-    if (Section.getType() != ELF::SHT_GROUP &&
-        Section.getType() != ELF::SHT_REL &&
-        Section.getType() != ELF::SHT_RELA)
-      Sections.push_back(&Section);
-  }
+    const std::pair<uint64_t, uint64_t> &Offsets =
+        SectionOffsets.find(Section)->second;
+    uint64_t Size;
+    if (Type == ELF::SHT_NOBITS)
+      Size = Layout.getSectionAddressSize(Section);
+    else
+      Size = Offsets.second - Offsets.first;
 
-  for (MCAssembler::iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF &>(it->getSection());
-    if (Section.getType() == ELF::SHT_REL ||
-        Section.getType() == ELF::SHT_RELA)
-      Sections.push_back(&Section);
+    writeSection(SectionIndexMap, GroupSymbolIndex, Offsets.first, Size,
+                 *Section);
   }
 }
 
 void ELFObjectWriter::WriteObject(MCAssembler &Asm,
                                   const MCAsmLayout &Layout) {
-  GroupMapTy GroupMap;
+  MCContext &Ctx = Asm.getContext();
+  MCSectionELF *StrtabSection =
+      Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0);
+  StringTableIndex = addToSectionTable(StrtabSection);
+
   RevGroupMapTy RevGroupMap;
   SectionIndexMapTy SectionIndexMap;
 
-  unsigned NumUserSections = Asm.size();
+  std::map<const MCSymbol *, std::vector<const MCSectionELF *>> GroupMembers;
 
-  CompressDebugSections(Asm, const_cast<MCAsmLayout &>(Layout));
+  // Write out the ELF header ...
+  writeHeader(Asm);
 
-  DenseMap<const MCSectionELF*, const MCSectionELF*> RelMap;
-  CreateRelocationSections(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
+  // ... then the sections ...
+  SectionOffsetsTy SectionOffsets;
+  std::vector<MCSectionELF *> Groups;
+  std::vector<MCSectionELF *> Relocations;
+  for (MCSection &Sec : Asm) {
+    MCSectionELF &Section = static_cast<MCSectionELF &>(Sec);
 
-  const unsigned NumUserAndRelocSections = Asm.size();
-  CreateIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap,
-                        RevGroupMap, SectionIndexMap, RelMap);
-  const unsigned AllSections = Asm.size();
-  const unsigned NumIndexedSections = AllSections - NumUserAndRelocSections;
+    uint64_t Padding = OffsetToAlignment(OS.tell(), Section.getAlignment());
+    WriteZeros(Padding);
 
-  unsigned NumRegularSections = NumUserSections + NumIndexedSections;
+    // Remember the offset into the file for this section.
+    uint64_t SecStart = OS.tell();
 
-  // Compute symbol table information.
-  computeSymbolTable(Asm, Layout, SectionIndexMap, RevGroupMap,
-                     NumRegularSections);
+    const MCSymbol *SignatureSymbol = Section.getGroup();
+    writeSectionData(Asm, Section, Layout);
 
-  WriteRelocations(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
+    uint64_t SecEnd = OS.tell();
+    SectionOffsets[&Section] = std::make_pair(SecStart, SecEnd);
 
-  CreateMetadataSections(const_cast<MCAssembler&>(Asm),
-                         const_cast<MCAsmLayout&>(Layout),
-                         SectionIndexMap,
-                         RelMap);
+    MCSectionELF *RelSection = createRelocationSection(Ctx, Section);
 
-  uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
-  uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
-                                    sizeof(ELF::Elf32_Ehdr);
-  uint64_t FileOff = HeaderSize;
+    if (SignatureSymbol) {
+      Asm.getOrCreateSymbolData(*SignatureSymbol);
+      unsigned &GroupIdx = RevGroupMap[SignatureSymbol];
+      if (!GroupIdx) {
+        MCSectionELF *Group = Ctx.createELFGroupSection(SignatureSymbol);
+        GroupIdx = addToSectionTable(Group);
+        Group->setAlignment(4);
+        Groups.push_back(Group);
+      }
+      GroupMembers[SignatureSymbol].push_back(&Section);
+      if (RelSection)
+        GroupMembers[SignatureSymbol].push_back(RelSection);
+    }
 
-  std::vector<const MCSectionELF*> Sections;
-  ComputeSectionOrder(Asm, Sections);
-  unsigned NumSections = Sections.size();
-  SectionOffsetMapTy SectionOffsetMap;
-  for (unsigned i = 0; i < NumRegularSections + 1; ++i) {
-    const MCSectionELF &Section = *Sections[i];
-    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+    SectionIndexMap[&Section] = addToSectionTable(&Section);
+    if (RelSection) {
+      SectionIndexMap[RelSection] = addToSectionTable(RelSection);
+      Relocations.push_back(RelSection);
+    }
+  }
 
-    FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
+  for (MCSectionELF *Group : Groups) {
+    uint64_t Padding = OffsetToAlignment(OS.tell(), Group->getAlignment());
+    WriteZeros(Padding);
 
     // Remember the offset into the file for this section.
-    SectionOffsetMap[&Section] = FileOff;
+    uint64_t SecStart = OS.tell();
+
+    const MCSymbol *SignatureSymbol = Group->getGroup();
+    assert(SignatureSymbol);
+    write(uint32_t(ELF::GRP_COMDAT));
+    for (const MCSectionELF *Member : GroupMembers[SignatureSymbol]) {
+      uint32_t SecIndex = SectionIndexMap.lookup(Member);
+      write(SecIndex);
+    }
 
-    // Get the size of the section in the output file (including padding).
-    FileOff += GetSectionFileSize(Layout, SD);
+    uint64_t SecEnd = OS.tell();
+    SectionOffsets[Group] = std::make_pair(SecStart, SecEnd);
   }
 
-  FileOff = RoundUpToAlignment(FileOff, NaturalAlignment);
-
-  const unsigned SectionHeaderOffset = FileOff - HeaderSize;
-
-  uint64_t SectionHeaderEntrySize = is64Bit() ?
-    sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr);
-  FileOff += (NumSections + 1) * SectionHeaderEntrySize;
-
-  for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) {
-    const MCSectionELF &Section = *Sections[i];
-    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+  // Compute symbol table information.
+  computeSymbolTable(Asm, Layout, SectionIndexMap, RevGroupMap);
 
-    FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
+  for (MCSectionELF *RelSection : Relocations) {
+    uint64_t Padding = OffsetToAlignment(OS.tell(), RelSection->getAlignment());
+    WriteZeros(Padding);
 
     // Remember the offset into the file for this section.
-    SectionOffsetMap[&Section] = FileOff;
+    uint64_t SecStart = OS.tell();
+
+    writeRelocations(Asm, *RelSection->getAssociatedSection());
 
-    // Get the size of the section in the output file (including padding).
-    FileOff += GetSectionFileSize(Layout, SD);
+    uint64_t SecEnd = OS.tell();
+    SectionOffsets[RelSection] = std::make_pair(SecStart, SecEnd);
   }
 
-  // Write out the ELF header ...
-  WriteHeader(Asm, SectionHeaderOffset, NumSections + 1);
+  writeSymbolTable(Ctx, Layout, SectionOffsets);
 
-  // ... then the regular sections ...
-  // + because of .shstrtab
-  for (unsigned i = 0; i < NumRegularSections + 1; ++i)
-    WriteDataSectionData(Asm, Layout, *Sections[i]);
+  {
+    uint64_t SecStart = OS.tell();
+    const MCSectionELF *Sec = createStringTable(Ctx);
+    uint64_t SecEnd = OS.tell();
+    SectionOffsets[Sec] = std::make_pair(SecStart, SecEnd);
+  }
 
+  uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
   uint64_t Padding = OffsetToAlignment(OS.tell(), NaturalAlignment);
   WriteZeros(Padding);
 
+  const unsigned SectionHeaderOffset = OS.tell();
+
   // ... then the section header table ...
-  WriteSectionHeader(Asm, GroupMap, Layout, SectionIndexMap,
-                     SectionOffsetMap);
+  writeSectionHeader(Asm, Layout, SectionIndexMap, SectionOffsets);
+
+  uint16_t NumSections = (SectionTable.size() + 1 >= ELF::SHN_LORESERVE)
+                             ? (uint16_t)ELF::SHN_UNDEF
+                             : SectionTable.size() + 1;
+  if (sys::IsLittleEndianHost != IsLittleEndian)
+    sys::swapByteOrder(NumSections);
+  unsigned NumSectionsOffset;
+
+  if (is64Bit()) {
+    uint64_t Val = SectionHeaderOffset;
+    if (sys::IsLittleEndianHost != IsLittleEndian)
+      sys::swapByteOrder(Val);
+    OS.pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
+              offsetof(ELF::Elf64_Ehdr, e_shoff));
+    NumSectionsOffset = offsetof(ELF::Elf64_Ehdr, e_shnum);
+  } else {
+    uint32_t Val = SectionHeaderOffset;
+    if (sys::IsLittleEndianHost != IsLittleEndian)
+      sys::swapByteOrder(Val);
+    OS.pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
+              offsetof(ELF::Elf32_Ehdr, e_shoff));
+    NumSectionsOffset = offsetof(ELF::Elf32_Ehdr, e_shnum);
+  }
+  OS.pwrite(reinterpret_cast<char *>(&NumSections), sizeof(NumSections),
+            NumSectionsOffset);
+}
 
-  // ... and then the remaining sections ...
-  for (unsigned i = NumRegularSections + 1; i < NumSections; ++i)
-    WriteDataSectionData(Asm, Layout, *Sections[i]);
+bool ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+    const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
+    bool InSet, bool IsPCRel) const {
+  if (IsPCRel) {
+    assert(!InSet);
+    if (::isWeak(SymA.getData()))
+      return false;
+  }
+  return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
+                                                                InSet, IsPCRel);
 }
 
-bool
-ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                                      const MCSymbolData &DataA,
-                                                      const MCFragment &FB,
-                                                      bool InSet,
-                                                      bool IsPCRel) const {
-  if (DataA.getFlags() & ELF_STB_Weak || MCELF::GetType(DataA) == ELF::STT_GNU_IFUNC)
+bool ELFObjectWriter::isWeak(const MCSymbol &Sym) const {
+  const MCSymbolData &SD = Sym.getData();
+  if (::isWeak(SD))
+    return true;
+
+  // It is invalid to replace a reference to a global in a comdat
+  // with a reference to a local since out of comdat references
+  // to a local are forbidden.
+  // We could try to return false for more cases, like the reference
+  // being in the same comdat or Sym being an alias to another global,
+  // but it is not clear if it is worth the effort.
+  if (MCELF::GetBinding(SD) != ELF::STB_GLOBAL)
     return false;
-  return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
-                                                 Asm, DataA, FB,InSet, IsPCRel);
+
+  if (!Sym.isInSection())
+    return false;
+
+  const auto &Sec = cast<MCSectionELF>(Sym.getSection());
+  return Sec.getGroup();
 }
 
 MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                                            raw_ostream &OS,
+                                            raw_pwrite_stream &OS,
                                             bool IsLittleEndian) {
   return new ELFObjectWriter(MOTW, OS, IsLittleEndian);
 }
diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp
index 04b8042..b61f5b1 100644
--- a/contrib/llvm/lib/MC/MCAsmInfo.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp
@@ -39,6 +39,7 @@ MCAsmInfo::MCAsmInfo() {
   CommentString = "#";
   LabelSuffix = ":";
   UseAssignmentForEHBegin = false;
+  NeedsLocalForSize = false;
   PrivateGlobalPrefix = "L";
   PrivateLabelPrefix = PrivateGlobalPrefix;
   LinkerPrivateGlobalPrefix = "";
@@ -68,6 +69,7 @@ MCAsmInfo::MCAsmInfo() {
   HasAggressiveSymbolFolding = true;
   COMMDirectiveAlignmentIsInBytes = true;
   LCOMMDirectiveAlignmentType = LCOMM::NoAlignment;
+  HasFunctionAlignment = true;
   HasDotTypeDotSizeDirective = true;
   HasSingleParameterDotFile = true;
   HasIdentDirective = false;
@@ -88,6 +90,7 @@ MCAsmInfo::MCAsmInfo() {
   DwarfRegNumForCFI = false;
   NeedsDwarfSectionOffsetDirective = false;
   UseParensForSymbolVariant = false;
+  UseLogicalShr = true;
 
   // FIXME: Clang's logic should be synced with the logic used to initialize
   //        this member and the two implementations should be merged.
@@ -129,7 +132,7 @@ MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
 
   MCContext &Context = Streamer.getContext();
   const MCExpr *Res = MCSymbolRefExpr::Create(Sym, Context);
-  MCSymbol *PCSym = Context.CreateTempSymbol();
+  MCSymbol *PCSym = Context.createTempSymbol();
   Streamer.EmitLabel(PCSym);
   const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
   return MCBinaryExpr::CreateSub(Res, PC, Context);
diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
index bb3f0d3..97fc76a 100644
--- a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
@@ -36,6 +36,10 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
   NeedsDwarfSectionOffsetDirective = true;
 
   UseIntegratedAssembler = true;
+
+  // FIXME: For now keep the previous behavior, AShr. Need to double-check
+  // other COFF-targeting assemblers and change this if necessary.
+  UseLogicalShr = false;
 }
 
 void MCAsmInfoMicrosoft::anchor() { }
diff --git a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
index 04cc0ff..bb90ff2 100644
--- a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
@@ -16,7 +16,6 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCStreamer.h"
 using namespace llvm;
 
 bool MCAsmInfoDarwin::isSectionAtomizableBySymbols(
@@ -27,25 +26,14 @@ bool MCAsmInfoDarwin::isSectionAtomizableBySymbols(
   // contain.
   // Sections holding 2 byte strings require symbols in order to be atomized.
   // There is no dedicated section for 4 byte strings.
-  if (SMO.getKind().isMergeable1ByteCString())
+  if (SMO.getType() == MachO::S_CSTRING_LITERALS)
     return false;
 
-  if (SMO.getSegmentName() == "__TEXT" &&
-      SMO.getSectionName() == "__objc_classname" &&
-      SMO.getType() == MachO::S_CSTRING_LITERALS)
-    return false;
-
-  if (SMO.getSegmentName() == "__TEXT" &&
-      SMO.getSectionName() == "__objc_methname" &&
-      SMO.getType() == MachO::S_CSTRING_LITERALS)
-    return false;
-
-  if (SMO.getSegmentName() == "__TEXT" &&
-      SMO.getSectionName() == "__objc_methtype" &&
-      SMO.getType() == MachO::S_CSTRING_LITERALS)
+  if (SMO.getSegmentName() == "__DATA" && SMO.getSectionName() == "__cfstring")
     return false;
 
-  if (SMO.getSegmentName() == "__DATA" && SMO.getSectionName() == "__cfstring")
+  if (SMO.getSegmentName() == "__DATA" &&
+      SMO.getSectionName() == "__objc_classrefs")
     return false;
 
   switch (SMO.getType()) {
@@ -105,4 +93,9 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
 
   UseIntegratedAssembler = true;
   SetDirectiveSuppressesReloc = true;
+
+  // FIXME: For now keep the previous behavior, AShr, matching the previous
+  // behavior of as(1) (both -q and -Q: resp. LLVM and gas v1.38).
+  // If/when this changes, the AArch64 Darwin special case can go away.
+  UseLogicalShr = false;
 }
diff --git a/contrib/llvm/lib/MC/MCAsmInfoELF.cpp b/contrib/llvm/lib/MC/MCAsmInfoELF.cpp
index 2fe626e..2bff6e0 100644
--- a/contrib/llvm/lib/MC/MCAsmInfoELF.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoELF.cpp
@@ -20,10 +20,8 @@ using namespace llvm;
 
 void MCAsmInfoELF::anchor() { }
 
-const MCSection *
-MCAsmInfoELF::getNonexecutableStackSection(MCContext &Ctx) const {
-  return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
-                           0, SectionKind::getMetadata());
+MCSection *MCAsmInfoELF::getNonexecutableStackSection(MCContext &Ctx) const {
+  return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0);
 }
 
 MCAsmInfoELF::MCAsmInfoELF() {
diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
index 1fe0f63..cabe63b 100644
--- a/contrib/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
@@ -36,11 +36,10 @@ using namespace llvm;
 
 namespace {
 
-class MCAsmStreamer : public MCStreamer {
-protected:
+class MCAsmStreamer final : public MCStreamer {
+  std::unique_ptr<formatted_raw_ostream> OSOwner;
   formatted_raw_ostream &OS;
   const MCAsmInfo *MAI;
-private:
   std::unique_ptr<MCInstPrinter> InstPrinter;
   std::unique_ptr<MCCodeEmitter> Emitter;
   std::unique_ptr<MCAsmBackend> AsmBackend;
@@ -57,16 +56,18 @@ private:
   void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
 
 public:
-  MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
+  MCAsmStreamer(MCContext &Context, std::unique_ptr<formatted_raw_ostream> os,
                 bool isVerboseAsm, bool useDwarfDirectory,
                 MCInstPrinter *printer, MCCodeEmitter *emitter,
                 MCAsmBackend *asmbackend, bool showInst)
-      : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
-        InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
-        CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
-        ShowInst(showInst), UseDwarfDirectory(useDwarfDirectory) {
-    if (InstPrinter && IsVerboseAsm)
-      InstPrinter->setCommentStream(CommentStream);
+      : MCStreamer(Context), OSOwner(std::move(os)), OS(*OSOwner),
+        MAI(Context.getAsmInfo()), InstPrinter(printer), Emitter(emitter),
+        AsmBackend(asmbackend), CommentStream(CommentToEmit),
+        IsVerboseAsm(isVerboseAsm), ShowInst(showInst),
+        UseDwarfDirectory(useDwarfDirectory) {
+    assert(InstPrinter);
+    if (IsVerboseAsm)
+        InstPrinter->setCommentStream(CommentStream);
   }
 
   inline void EmitEOL() {
@@ -114,8 +115,7 @@ public:
   /// @name MCStreamer Interface
   /// @{
 
-  void ChangeSection(const MCSection *Section,
-                     const MCExpr *Subsection) override;
+  void ChangeSection(MCSection *Section, const MCExpr *Subsection) override;
 
   void EmitLOHDirective(MCLOHType Kind, const MCLOHArgs &Args) override;
   void EmitLabel(MCSymbol *Symbol) override;
@@ -150,11 +150,11 @@ public:
   void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                              unsigned ByteAlignment) override;
 
-  void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = nullptr,
+  void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
                     uint64_t Size = 0, unsigned ByteAlignment = 0) override;
 
-  void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
-                       uint64_t Size, unsigned ByteAlignment = 0) override;
+  void EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size,
+                      unsigned ByteAlignment = 0) override;
 
   void EmitBytes(StringRef Data) override;
 
@@ -267,7 +267,7 @@ void MCAsmStreamer::EmitCommentsAndEOL() {
   }
 
   CommentStream.flush();
-  StringRef Comments = CommentToEmit.str();
+  StringRef Comments = CommentToEmit;
 
   assert(Comments.back() == '\n' &&
          "Comment array not newline terminated");
@@ -297,7 +297,7 @@ void MCAsmStreamer::emitRawComment(const Twine &T, bool TabPrefix) {
   EmitEOL();
 }
 
-void MCAsmStreamer::ChangeSection(const MCSection *Section,
+void MCAsmStreamer::ChangeSection(MCSection *Section,
                                   const MCExpr *Subsection) {
   assert(Section && "Cannot switch to a null section!");
   Section->PrintSwitchToSection(*MAI, OS, Subsection);
@@ -542,7 +542,7 @@ void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
   EmitEOL();
 }
 
-void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+void MCAsmStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
                                  uint64_t Size, unsigned ByteAlignment) {
   if (Symbol)
     AssignSection(Symbol, Section);
@@ -565,7 +565,7 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
 // .tbss sym, size, align
 // This depends that the symbol has already been mangled from the original,
 // e.g. _a.
-void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+void MCAsmStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
                                    uint64_t Size, unsigned ByteAlignment) {
   AssignSection(Symbol, Section);
 
@@ -946,7 +946,7 @@ void MCAsmStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
 }
 
 void MCAsmStreamer::EmitRegisterName(int64_t Register) {
-  if (InstPrinter && !MAI->useDwarfRegNumForCFI()) {
+  if (!MAI->useDwarfRegNumForCFI()) {
     const MCRegisterInfo *MRI = getContext().getRegisterInfo();
     unsigned LLVMRegister = MRI->getLLVMRegNum(Register, true);
     InstPrinter->printRegName(OS, LLVMRegister);
@@ -1102,7 +1102,7 @@ void MCAsmStreamer::EmitWinEHHandlerData() {
   // We only do this so the section switch that terminates the handler
   // data block is visible.
   WinEH::FrameInfo *CurFrame = getCurrentWinFrameInfo();
-  if (const MCSection *XData = WinEH::UnwindEmitter::getXDataSection(
+  if (MCSection *XData = WinEH::UnwindEmitter::getXDataSection(
           CurFrame->Function, getContext()))
     SwitchSectionNoChange(XData);
 
@@ -1167,7 +1167,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
   SmallString<256> Code;
   SmallVector<MCFixup, 4> Fixups;
   raw_svector_ostream VecOS(Code);
-  Emitter->EncodeInstruction(Inst, VecOS, Fixups, STI);
+  Emitter->encodeInstruction(Inst, VecOS, Fixups, STI);
   VecOS.flush();
 
   // If we are showing fixups, create symbolic markers in the encoded
@@ -1256,15 +1256,12 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &S
 
   // Show the MCInst if enabled.
   if (ShowInst) {
-    Inst.dump_pretty(GetCommentOS(), MAI, InstPrinter.get(), "\n ");
+    Inst.dump_pretty(GetCommentOS(), InstPrinter.get(), "\n ");
     GetCommentOS() << "\n";
   }
 
-  // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
-  if (InstPrinter)
-    InstPrinter->printInst(&Inst, OS, "");
-  else
-    Inst.print(OS, MAI);
+  InstPrinter->printInst(&Inst, OS, "", STI);
+
   EmitEOL();
 }
 
@@ -1314,10 +1311,10 @@ void MCAsmStreamer::FinishImpl() {
 }
 
 MCStreamer *llvm::createAsmStreamer(MCContext &Context,
-                                    formatted_raw_ostream &OS,
+                                    std::unique_ptr<formatted_raw_ostream> OS,
                                     bool isVerboseAsm, bool useDwarfDirectory,
                                     MCInstPrinter *IP, MCCodeEmitter *CE,
                                     MCAsmBackend *MAB, bool ShowInst) {
-  return new MCAsmStreamer(Context, OS, isVerboseAsm, useDwarfDirectory, IP, CE,
-                           MAB, ShowInst);
+  return new MCAsmStreamer(Context, std::move(OS), isVerboseAsm,
+                           useDwarfDirectory, IP, CE, MAB, ShowInst);
 }
diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp
index e3c2443..868b0f1 100644
--- a/contrib/llvm/lib/MC/MCAssembler.cpp
+++ b/contrib/llvm/lib/MC/MCAssembler.cpp
@@ -69,19 +69,19 @@ MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
  {
   // Compute the section layout order. Virtual sections must go last.
   for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
-    if (!it->getSection().isVirtualSection())
+    if (!it->isVirtualSection())
       SectionOrder.push_back(&*it);
   for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
-    if (it->getSection().isVirtualSection())
+    if (it->isVirtualSection())
       SectionOrder.push_back(&*it);
 }
 
 bool MCAsmLayout::isFragmentValid(const MCFragment *F) const {
-  const MCSectionData &SD = *F->getParent();
-  const MCFragment *LastValid = LastValidFragment.lookup(&SD);
+  const MCSection *Sec = F->getParent();
+  const MCFragment *LastValid = LastValidFragment.lookup(Sec);
   if (!LastValid)
     return false;
-  assert(LastValid->getParent() == F->getParent());
+  assert(LastValid->getParent() == Sec);
   return F->getLayoutOrder() <= LastValid->getLayoutOrder();
 }
 
@@ -92,16 +92,14 @@ void MCAsmLayout::invalidateFragmentsFrom(MCFragment *F) {
 
   // Otherwise, reset the last valid fragment to the previous fragment
   // (if this is the first fragment, it will be NULL).
-  const MCSectionData &SD = *F->getParent();
-  LastValidFragment[&SD] = F->getPrevNode();
+  LastValidFragment[F->getParent()] = F->getPrevNode();
 }
 
 void MCAsmLayout::ensureValid(const MCFragment *F) const {
-  MCSectionData &SD = *F->getParent();
-
-  MCFragment *Cur = LastValidFragment[&SD];
+  MCSection *Sec = F->getParent();
+  MCFragment *Cur = LastValidFragment[Sec];
   if (!Cur)
-    Cur = &*SD.begin();
+    Cur = Sec->begin();
   else
     Cur = Cur->getNextNode();
 
@@ -120,41 +118,36 @@ uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
 }
 
 // Simple getSymbolOffset helper for the non-varibale case.
-static bool getLabelOffset(const MCAsmLayout &Layout, const MCSymbolData &SD,
+static bool getLabelOffset(const MCAsmLayout &Layout, const MCSymbol &S,
                            bool ReportError, uint64_t &Val) {
+  const MCSymbolData &SD = S.getData();
   if (!SD.getFragment()) {
     if (ReportError)
       report_fatal_error("unable to evaluate offset to undefined symbol '" +
-                         SD.getSymbol().getName() + "'");
+                         S.getName() + "'");
     return false;
   }
   Val = Layout.getFragmentOffset(SD.getFragment()) + SD.getOffset();
   return true;
 }
 
-static bool getSymbolOffsetImpl(const MCAsmLayout &Layout,
-                                const MCSymbolData *SD, bool ReportError,
-                                uint64_t &Val) {
-  const MCSymbol &S = SD->getSymbol();
-
+static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, const MCSymbol &S,
+                                bool ReportError, uint64_t &Val) {
   if (!S.isVariable())
-    return getLabelOffset(Layout, *SD, ReportError, Val);
+    return getLabelOffset(Layout, S, ReportError, Val);
 
   // If SD is a variable, evaluate it.
   MCValue Target;
-  if (!S.getVariableValue()->EvaluateAsValue(Target, &Layout, nullptr))
+  if (!S.getVariableValue()->EvaluateAsRelocatable(Target, &Layout, nullptr))
     report_fatal_error("unable to evaluate offset for variable '" +
                        S.getName() + "'");
 
   uint64_t Offset = Target.getConstant();
 
-  const MCAssembler &Asm = Layout.getAssembler();
-
   const MCSymbolRefExpr *A = Target.getSymA();
   if (A) {
     uint64_t ValA;
-    if (!getLabelOffset(Layout, Asm.getSymbolData(A->getSymbol()), ReportError,
-                        ValA))
+    if (!getLabelOffset(Layout, A->getSymbol(), ReportError, ValA))
       return false;
     Offset += ValA;
   }
@@ -162,8 +155,7 @@ static bool getSymbolOffsetImpl(const MCAsmLayout &Layout,
   const MCSymbolRefExpr *B = Target.getSymB();
   if (B) {
     uint64_t ValB;
-    if (!getLabelOffset(Layout, Asm.getSymbolData(B->getSymbol()), ReportError,
-                        ValB))
+    if (!getLabelOffset(Layout, B->getSymbol(), ReportError, ValB))
       return false;
     Offset -= ValB;
   }
@@ -172,13 +164,13 @@ static bool getSymbolOffsetImpl(const MCAsmLayout &Layout,
   return true;
 }
 
-bool MCAsmLayout::getSymbolOffset(const MCSymbolData *SD, uint64_t &Val) const {
-  return getSymbolOffsetImpl(*this, SD, false, Val);
+bool MCAsmLayout::getSymbolOffset(const MCSymbol &S, uint64_t &Val) const {
+  return getSymbolOffsetImpl(*this, S, false, Val);
 }
 
-uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
+uint64_t MCAsmLayout::getSymbolOffset(const MCSymbol &S) const {
   uint64_t Val;
-  getSymbolOffsetImpl(*this, SD, true, Val);
+  getSymbolOffsetImpl(*this, S, true, Val);
   return Val;
 }
 
@@ -188,12 +180,12 @@ const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const {
 
   const MCExpr *Expr = Symbol.getVariableValue();
   MCValue Value;
-  if (!Expr->EvaluateAsValue(Value, this, nullptr))
+  if (!Expr->evaluateAsValue(Value, *this))
     llvm_unreachable("Invalid Expression");
 
   const MCSymbolRefExpr *RefB = Value.getSymB();
   if (RefB)
-    Assembler.getContext().FatalError(
+    Assembler.getContext().reportFatalError(
         SMLoc(), Twine("symbol '") + RefB->getSymbol().getName() +
                      "' could not be evaluated in a subtraction expression");
 
@@ -201,26 +193,37 @@ const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const {
   if (!A)
     return nullptr;
 
-  return &A->getSymbol();
+  const MCSymbol &ASym = A->getSymbol();
+  const MCAssembler &Asm = getAssembler();
+  const MCSymbolData &ASD = Asm.getSymbolData(ASym);
+  if (ASD.isCommon()) {
+    // FIXME: we should probably add a SMLoc to MCExpr.
+    Asm.getContext().reportFatalError(SMLoc(),
+                                "Common symbol " + ASym.getName() +
+                                    " cannot be used in assignment expr");
+  }
+
+  return &ASym;
 }
 
-uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
+uint64_t MCAsmLayout::getSectionAddressSize(const MCSection *Sec) const {
   // The size is the last fragment's end offset.
-  const MCFragment &F = SD->getFragmentList().back();
+  const MCFragment &F = Sec->getFragmentList().back();
   return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F);
 }
 
-uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
+uint64_t MCAsmLayout::getSectionFileSize(const MCSection *Sec) const {
   // Virtual sections have no file size.
-  if (SD->getSection().isVirtualSection())
+  if (Sec->isVirtualSection())
     return 0;
 
   // Otherwise, the file size is the same as the address space size.
-  return getSectionAddressSize(SD);
+  return getSectionAddressSize(Sec);
 }
 
-uint64_t MCAsmLayout::computeBundlePadding(const MCFragment *F,
-                                           uint64_t FOffset, uint64_t FSize) {
+uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler,
+                                    const MCFragment *F,
+                                    uint64_t FOffset, uint64_t FSize) {
   uint64_t BundleSize = Assembler.getBundleAlignSize();
   assert(BundleSize > 0 &&
          "computeBundlePadding should only be called if bundling is enabled");
@@ -267,9 +270,8 @@ MCFragment::MCFragment() : Kind(FragmentType(~0)) {
 MCFragment::~MCFragment() {
 }
 
-MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
-  : Kind(_Kind), Parent(_Parent), Atom(nullptr), Offset(~UINT64_C(0))
-{
+MCFragment::MCFragment(FragmentType Kind, MCSection *Parent)
+    : Kind(Kind), Parent(Parent), Atom(nullptr), Offset(~UINT64_C(0)) {
   if (Parent)
     Parent->getFragmentList().push_back(this);
 }
@@ -286,84 +288,6 @@ MCEncodedFragmentWithFixups::~MCEncodedFragmentWithFixups() {
 
 /* *** */
 
-MCSectionData::MCSectionData() : Section(nullptr) {}
-
-MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
-  : Section(&_Section),
-    Ordinal(~UINT32_C(0)),
-    Alignment(1),
-    BundleLockState(NotBundleLocked),
-    BundleLockNestingDepth(0),
-    BundleGroupBeforeFirstInst(false),
-    HasInstructions(false)
-{
-  if (A)
-    A->getSectionList().push_back(this);
-}
-
-MCSectionData::iterator
-MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) {
-  if (Subsection == 0 && SubsectionFragmentMap.empty())
-    return end();
-
-  SmallVectorImpl<std::pair<unsigned, MCFragment *> >::iterator MI =
-    std::lower_bound(SubsectionFragmentMap.begin(), SubsectionFragmentMap.end(),
-                     std::make_pair(Subsection, (MCFragment *)nullptr));
-  bool ExactMatch = false;
-  if (MI != SubsectionFragmentMap.end()) {
-    ExactMatch = MI->first == Subsection;
-    if (ExactMatch)
-      ++MI;
-  }
-  iterator IP;
-  if (MI == SubsectionFragmentMap.end())
-    IP = end();
-  else
-    IP = MI->second;
-  if (!ExactMatch && Subsection != 0) {
-    // The GNU as documentation claims that subsections have an alignment of 4,
-    // although this appears not to be the case.
-    MCFragment *F = new MCDataFragment();
-    SubsectionFragmentMap.insert(MI, std::make_pair(Subsection, F));
-    getFragmentList().insert(IP, F);
-    F->setParent(this);
-  }
-  return IP;
-}
-
-void MCSectionData::setBundleLockState(BundleLockStateType NewState) {
-  if (NewState == NotBundleLocked) {
-    if (BundleLockNestingDepth == 0) {
-      report_fatal_error("Mismatched bundle_lock/unlock directives");
-    }
-    if (--BundleLockNestingDepth == 0) {
-      BundleLockState = NotBundleLocked;
-    }
-    return;
-  }
-
-  // If any of the directives is an align_to_end directive, the whole nested
-  // group is align_to_end. So don't downgrade from align_to_end to just locked.
-  if (BundleLockState != BundleLockedAlignToEnd) {
-    BundleLockState = NewState;
-  }
-  ++BundleLockNestingDepth;
-}
-
-/* *** */
-
-MCSymbolData::MCSymbolData() : Symbol(nullptr) {}
-
-MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
-                           uint64_t _Offset, MCAssembler *A)
-    : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset),
-      SymbolSize(nullptr), CommonAlign(-1U), Flags(0), Index(0) {
-  if (A)
-    A->getSymbolList().push_back(this);
-}
-
-/* *** */
-
 MCAssembler::MCAssembler(MCContext &Context_, MCAsmBackend &Backend_,
                          MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
                          raw_ostream &OS_)
@@ -379,8 +303,6 @@ MCAssembler::~MCAssembler() {
 void MCAssembler::reset() {
   Sections.clear();
   Symbols.clear();
-  SectionMap.clear();
-  SymbolMap.clear();
   IndirectSymbols.clear();
   DataRegions.clear();
   LinkerOptions.clear();
@@ -425,6 +347,16 @@ bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const {
   return true;
 }
 
+void MCAssembler::addLocalUsedInReloc(const MCSymbol &Sym) {
+  assert(Sym.isTemporary());
+  LocalsUsedInReloc.insert(&Sym);
+}
+
+bool MCAssembler::isLocalUsedInReloc(const MCSymbol &Sym) const {
+  assert(Sym.isTemporary());
+  return LocalsUsedInReloc.count(&Sym);
+}
+
 bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
   // Non-temporary labels should always be visible to the linker.
   if (!Symbol.isTemporary())
@@ -434,39 +366,29 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
   if (!Symbol.isInSection())
     return false;
 
-  // Otherwise, check if the section requires symbols even for temporary labels.
-  return getBackend().doesSectionRequireSymbols(Symbol.getSection());
+  if (isLocalUsedInReloc(Symbol))
+    return true;
+
+  return false;
 }
 
-const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
+const MCSymbol *MCAssembler::getAtom(const MCSymbol &S) const {
   // Linker visible symbols define atoms.
-  if (isSymbolLinkerVisible(SD->getSymbol()))
-    return SD;
+  if (isSymbolLinkerVisible(S))
+    return &S;
 
   // Absolute and undefined symbols have no defining atom.
-  if (!SD->getFragment())
+  if (!S.getData().getFragment())
     return nullptr;
 
   // Non-linker visible symbols in sections which can't be atomized have no
   // defining atom.
   if (!getContext().getAsmInfo()->isSectionAtomizableBySymbols(
-          SD->getFragment()->getParent()->getSection()))
+          *S.getData().getFragment()->getParent()))
     return nullptr;
 
   // Otherwise, return the atom for the containing fragment.
-  return SD->getFragment()->getAtom();
-}
-
-// Try to fully compute Expr to an absolute value and if that fails produce
-// a relocatable expr.
-// FIXME: Should this be the behavior of EvaluateAsRelocatable itself?
-static bool evaluate(const MCExpr &Expr, const MCAsmLayout &Layout,
-                     const MCFixup &Fixup, MCValue &Target) {
-  if (Expr.EvaluateAsValue(Target, &Layout, &Fixup)) {
-    if (Target.isAbsolute())
-      return true;
-  }
-  return Expr.EvaluateAsRelocatable(Target, &Layout, &Fixup);
+  return S.getData().getFragment()->getAtom();
 }
 
 bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
@@ -478,8 +400,8 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
   // probably merge the two into a single callback that tries to evaluate a
   // fixup and records a relocation if one is needed.
   const MCExpr *Expr = Fixup.getValue();
-  if (!evaluate(*Expr, Layout, Fixup, Target))
-    getContext().FatalError(Fixup.getLoc(), "expected relocatable expression");
+  if (!Expr->EvaluateAsRelocatable(Target, &Layout, &Fixup))
+    getContext().reportFatalError(Fixup.getLoc(), "expected relocatable expression");
 
   bool IsPCRel = Backend.getFixupKindInfo(
     Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
@@ -493,14 +415,11 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
     } else {
       const MCSymbolRefExpr *A = Target.getSymA();
       const MCSymbol &SA = A->getSymbol();
-      if (A->getKind() != MCSymbolRefExpr::VK_None ||
-          SA.AliasedSymbol().isUndefined()) {
+      if (A->getKind() != MCSymbolRefExpr::VK_None || SA.isUndefined()) {
         IsResolved = false;
       } else {
-        const MCSymbolData &DataA = getSymbolData(SA);
-        IsResolved =
-          getWriter().IsSymbolRefDifferenceFullyResolvedImpl(*this, DataA,
-                                                             *DF, false, true);
+        IsResolved = getWriter().IsSymbolRefDifferenceFullyResolvedImpl(
+            *this, SA, *DF, false, true);
       }
     }
   } else {
@@ -510,14 +429,14 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
   Value = Target.getConstant();
 
   if (const MCSymbolRefExpr *A = Target.getSymA()) {
-    const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+    const MCSymbol &Sym = A->getSymbol();
     if (Sym.isDefined())
-      Value += Layout.getSymbolOffset(&getSymbolData(Sym));
+      Value += Layout.getSymbolOffset(Sym);
   }
   if (const MCSymbolRefExpr *B = Target.getSymB()) {
-    const MCSymbol &Sym = B->getSymbol().AliasedSymbol();
+    const MCSymbol &Sym = B->getSymbol();
     if (Sym.isDefined())
-      Value -= Layout.getSymbolOffset(&getSymbolData(Sym));
+      Value -= Layout.getSymbolOffset(Sym);
   }
 
 
@@ -630,7 +549,12 @@ void MCAsmLayout::layoutFragment(MCFragment *F) {
   // The fragment's offset will point to after the padding, and its computed
   // size won't include the padding.
   //
-  if (Assembler.isBundlingEnabled() && F->hasInstructions()) {
+  // When the -mc-relax-all flag is used, we optimize bundling by writting the
+  // bundle padding directly into fragments when the instructions are emitted
+  // inside the streamer.
+  //
+  if (Assembler.isBundlingEnabled() && !Assembler.getRelaxAll() &&
+      F->hasInstructions()) {
     assert(isa<MCEncodedFragment>(F) &&
            "Only MCEncodedFragment implementations have instructions");
     uint64_t FSize = Assembler.computeFragmentSize(*this, *F);
@@ -638,7 +562,8 @@ void MCAsmLayout::layoutFragment(MCFragment *F) {
     if (FSize > Assembler.getBundleAlignSize())
       report_fatal_error("Fragment can't be larger than a bundle size");
 
-    uint64_t RequiredBundlePadding = computeBundlePadding(F, F->Offset, FSize);
+    uint64_t RequiredBundlePadding = computeBundlePadding(Assembler, F,
+                                                          F->Offset, FSize);
     if (RequiredBundlePadding > UINT8_MAX)
       report_fatal_error("Padding cannot exceed 255 bytes");
     F->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding));
@@ -653,24 +578,18 @@ static void writeFragmentContents(const MCFragment &F, MCObjectWriter *OW) {
   OW->WriteBytes(EF.getContents());
 }
 
-/// \brief Write the fragment \p F to the output file.
-static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                          const MCFragment &F) {
-  MCObjectWriter *OW = &Asm.getWriter();
-
-  // FIXME: Embed in fragments instead?
-  uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
-
+void MCAssembler::writeFragmentPadding(const MCFragment &F, uint64_t FSize,
+                                       MCObjectWriter *OW) const {
   // Should NOP padding be written out before this fragment?
   unsigned BundlePadding = F.getBundlePadding();
   if (BundlePadding > 0) {
-    assert(Asm.isBundlingEnabled() &&
+    assert(isBundlingEnabled() &&
            "Writing bundle padding with disabled bundling");
     assert(F.hasInstructions() &&
            "Writing bundle padding for a fragment without instructions");
 
-    unsigned TotalLength = BundlePadding + static_cast<unsigned>(FragmentSize);
-    if (F.alignToBundleEnd() && TotalLength > Asm.getBundleAlignSize()) {
+    unsigned TotalLength = BundlePadding + static_cast<unsigned>(FSize);
+    if (F.alignToBundleEnd() && TotalLength > getBundleAlignSize()) {
       // If the padding itself crosses a bundle boundary, it must be emitted
       // in 2 pieces, since even nop instructions must not cross boundaries.
       //             v--------------v   <- BundleAlignSize
@@ -679,16 +598,27 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
       // | Prev |####|####|    F    |
       // ----------------------------
       //        ^-------------------^   <- TotalLength
-      unsigned DistanceToBoundary = TotalLength - Asm.getBundleAlignSize();
-      if (!Asm.getBackend().writeNopData(DistanceToBoundary, OW))
+      unsigned DistanceToBoundary = TotalLength - getBundleAlignSize();
+      if (!getBackend().writeNopData(DistanceToBoundary, OW))
           report_fatal_error("unable to write NOP sequence of " +
                              Twine(DistanceToBoundary) + " bytes");
       BundlePadding -= DistanceToBoundary;
     }
-    if (!Asm.getBackend().writeNopData(BundlePadding, OW))
+    if (!getBackend().writeNopData(BundlePadding, OW))
       report_fatal_error("unable to write NOP sequence of " +
                          Twine(BundlePadding) + " bytes");
   }
+}
+
+/// \brief Write the fragment \p F to the output file.
+static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                          const MCFragment &F) {
+  MCObjectWriter *OW = &Asm.getWriter();
+
+  // FIXME: Embed in fragments instead?
+  uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
+
+  Asm.writeFragmentPadding(F, FragmentSize, OW);
 
   // This variable (and its dummy usage) is to participate in the assert at
   // the end of the function.
@@ -773,7 +703,7 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
 
   case MCFragment::FT_LEB: {
     const MCLEBFragment &LF = cast<MCLEBFragment>(F);
-    OW->WriteBytes(LF.getContents().str());
+    OW->WriteBytes(LF.getContents());
     break;
   }
 
@@ -789,12 +719,12 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
 
   case MCFragment::FT_Dwarf: {
     const MCDwarfLineAddrFragment &OF = cast<MCDwarfLineAddrFragment>(F);
-    OW->WriteBytes(OF.getContents().str());
+    OW->WriteBytes(OF.getContents());
     break;
   }
   case MCFragment::FT_DwarfFrame: {
     const MCDwarfCallFrameFragment &CF = cast<MCDwarfCallFrameFragment>(F);
-    OW->WriteBytes(CF.getContents().str());
+    OW->WriteBytes(CF.getContents());
     break;
   }
   }
@@ -803,15 +733,15 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
          "The stream should advance by fragment size");
 }
 
-void MCAssembler::writeSectionData(const MCSectionData *SD,
+void MCAssembler::writeSectionData(const MCSection *Sec,
                                    const MCAsmLayout &Layout) const {
   // Ignore virtual sections.
-  if (SD->getSection().isVirtualSection()) {
-    assert(Layout.getSectionFileSize(SD) == 0 && "Invalid size for section!");
+  if (Sec->isVirtualSection()) {
+    assert(Layout.getSectionFileSize(Sec) == 0 && "Invalid size for section!");
 
     // Check that contents are only things legal inside a virtual section.
-    for (MCSectionData::const_iterator it = SD->begin(),
-           ie = SD->end(); it != ie; ++it) {
+    for (MCSection::const_iterator it = Sec->begin(), ie = Sec->end(); it != ie;
+         ++it) {
       switch (it->getKind()) {
       default: llvm_unreachable("Invalid fragment in virtual section!");
       case MCFragment::FT_Data: {
@@ -823,7 +753,7 @@ void MCAssembler::writeSectionData(const MCSectionData *SD,
                "Cannot have fixups in virtual section!");
         for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i)
           if (DF.getContents()[i]) {
-            if (auto *ELFSec = dyn_cast<const MCSectionELF>(&SD->getSection()))
+            if (auto *ELFSec = dyn_cast<const MCSectionELF>(Sec))
               report_fatal_error("non-zero initializer found in section '" +
                   ELFSec->getSectionName() + "'");
             else
@@ -852,12 +782,12 @@ void MCAssembler::writeSectionData(const MCSectionData *SD,
   uint64_t Start = getWriter().getStream().tell();
   (void)Start;
 
-  for (MCSectionData::const_iterator it = SD->begin(), ie = SD->end();
-       it != ie; ++it)
+  for (MCSection::const_iterator it = Sec->begin(), ie = Sec->end(); it != ie;
+       ++it)
     writeFragment(*this, Layout, *it);
 
   assert(getWriter().getStream().tell() - Start ==
-         Layout.getSectionAddressSize(SD));
+         Layout.getSectionAddressSize(Sec));
 }
 
 std::pair<uint64_t, bool> MCAssembler::handleFixup(const MCAsmLayout &Layout,
@@ -892,18 +822,18 @@ void MCAssembler::Finish() {
     // Create dummy fragments to eliminate any empty sections, this simplifies
     // layout.
     if (it->getFragmentList().empty())
-      new MCDataFragment(it);
+      new MCDataFragment(&*it);
 
     it->setOrdinal(SectionIndex++);
   }
 
   // Assign layout order indices to sections and fragments.
   for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) {
-    MCSectionData *SD = Layout.getSectionOrder()[i];
-    SD->setLayoutOrder(i);
+    MCSection *Sec = Layout.getSectionOrder()[i];
+    Sec->setLayoutOrder(i);
 
     unsigned FragmentIndex = 0;
-    for (MCSectionData::iterator iFrag = SD->begin(), iFragEnd = SD->end();
+    for (MCSection::iterator iFrag = Sec->begin(), iFragEnd = Sec->end();
          iFrag != iFragEnd; ++iFrag)
       iFrag->setLayoutOrder(FragmentIndex++);
   }
@@ -931,8 +861,8 @@ void MCAssembler::Finish() {
 
   // Evaluate and apply the fixups, generating relocation entries as necessary.
   for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
-    for (MCSectionData::iterator it2 = it->begin(),
-           ie2 = it->end(); it2 != ie2; ++it2) {
+    for (MCSection::iterator it2 = it->begin(), ie2 = it->end(); it2 != ie2;
+         ++it2) {
       MCEncodedFragmentWithFixups *F =
         dyn_cast<MCEncodedFragmentWithFixups>(it2);
       if (F) {
@@ -1005,7 +935,7 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
   SmallVector<MCFixup, 4> Fixups;
   SmallString<256> Code;
   raw_svector_ostream VecOS(Code);
-  getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups, F.getSubtargetInfo());
+  getEmitter().encodeInstruction(Relaxed, VecOS, Fixups, F.getSubtargetInfo());
   VecOS.flush();
 
   // Update the fragment.
@@ -1018,7 +948,10 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
 
 bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
   uint64_t OldSize = LF.getContents().size();
-  int64_t Value = LF.getValue().evaluateKnownAbsolute(Layout);
+  int64_t Value;
+  bool Abs = LF.getValue().evaluateKnownAbsolute(Value, Layout);
+  if (!Abs)
+    report_fatal_error("sleb128 and uleb128 expressions must be absolute");
   SmallString<8> &Data = LF.getContents();
   Data.clear();
   raw_svector_ostream OSE(Data);
@@ -1034,7 +967,10 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
                                      MCDwarfLineAddrFragment &DF) {
   MCContext &Context = Layout.getAssembler().getContext();
   uint64_t OldSize = DF.getContents().size();
-  int64_t AddrDelta = DF.getAddrDelta().evaluateKnownAbsolute(Layout);
+  int64_t AddrDelta;
+  bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
+  assert(Abs && "We created a line delta with an invalid expression");
+  (void) Abs;
   int64_t LineDelta;
   LineDelta = DF.getLineDelta();
   SmallString<8> &Data = DF.getContents();
@@ -1049,7 +985,10 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
                                               MCDwarfCallFrameFragment &DF) {
   MCContext &Context = Layout.getAssembler().getContext();
   uint64_t OldSize = DF.getContents().size();
-  int64_t AddrDelta = DF.getAddrDelta().evaluateKnownAbsolute(Layout);
+  int64_t AddrDelta;
+  bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
+  assert(Abs && "We created call frame with an invalid expression");
+  (void) Abs;
   SmallString<8> &Data = DF.getContents();
   Data.clear();
   raw_svector_ostream OSE(Data);
@@ -1058,7 +997,7 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
   return OldSize != Data.size();
 }
 
-bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD) {
+bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSection &Sec) {
   // Holds the first fragment which needed relaxing during this layout. It will
   // remain NULL if none were relaxed.
   // When a fragment is relaxed, all the fragments following it should get
@@ -1066,7 +1005,7 @@ bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD) {
   MCFragment *FirstRelaxedFragment = nullptr;
 
   // Attempt to relax all the fragments in the section.
-  for (MCSectionData::iterator I = SD.begin(), IE = SD.end(); I != IE; ++I) {
+  for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
     // Check if this is a fragment that needs relaxation.
     bool RelaxedFrag = false;
     switch(I->getKind()) {
@@ -1105,8 +1044,8 @@ bool MCAssembler::layoutOnce(MCAsmLayout &Layout) {
 
   bool WasRelaxed = false;
   for (iterator it = begin(), ie = end(); it != ie; ++it) {
-    MCSectionData &SD = *it;
-    while (layoutSectionOnce(Layout, SD))
+    MCSection &Sec = *it;
+    while (layoutSectionOnce(Layout, Sec))
       WasRelaxed = true;
   }
 
@@ -1245,27 +1184,14 @@ void MCFragment::dump() {
   OS << ">";
 }
 
-void MCSectionData::dump() {
-  raw_ostream &OS = llvm::errs();
-
-  OS << "<MCSectionData";
-  OS << " Alignment:" << getAlignment()
-     << " Fragments:[\n      ";
-  for (iterator it = begin(), ie = end(); it != ie; ++it) {
-    if (it != begin()) OS << ",\n      ";
-    it->dump();
-  }
-  OS << "]>";
-}
-
 void MCSymbolData::dump() const {
   raw_ostream &OS = llvm::errs();
 
-  OS << "<MCSymbolData Symbol:" << getSymbol()
+  OS << "<MCSymbolData"
      << " Fragment:" << getFragment();
   if (!isCommon())
     OS << " Offset:" << getOffset();
-  OS << " Flags:" << getFlags() << " Index:" << getIndex();
+  OS << " Flags:" << getFlags();
   if (isCommon())
     OS << " (common, size:" << getCommonSize()
        << " align: " << getCommonAlignment() << ")";
@@ -1290,7 +1216,11 @@ void MCAssembler::dump() {
 
   for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
     if (it != symbol_begin()) OS << ",\n           ";
+    OS << "(";
     it->dump();
+    OS << ", Index:" << it->getIndex() << ", ";
+    it->getData().dump();
+    OS << ")";
   }
   OS << "]>\n";
 }
diff --git a/contrib/llvm/lib/MC/MCCodeGenInfo.cpp b/contrib/llvm/lib/MC/MCCodeGenInfo.cpp
index d9dcfd0..347ec2c 100644
--- a/contrib/llvm/lib/MC/MCCodeGenInfo.cpp
+++ b/contrib/llvm/lib/MC/MCCodeGenInfo.cpp
@@ -15,7 +15,7 @@
 #include "llvm/MC/MCCodeGenInfo.h"
 using namespace llvm;
 
-void MCCodeGenInfo::InitMCCodeGenInfo(Reloc::Model RM, CodeModel::Model CM,
+void MCCodeGenInfo::initMCCodeGenInfo(Reloc::Model RM, CodeModel::Model CM,
                                       CodeGenOpt::Level OL) {
   RelocationModel = RM;
   CMModel = CM;
diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp
index b41e6d8..1f2f034 100644
--- a/contrib/llvm/lib/MC/MCContext.cpp
+++ b/contrib/llvm/lib/MC/MCContext.cpp
@@ -10,6 +10,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCLabel.h"
@@ -18,6 +19,7 @@
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -33,7 +35,7 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
                      const MCObjectFileInfo *mofi, const SourceMgr *mgr,
                      bool DoAutoReset)
     : SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi), Allocator(),
-      Symbols(Allocator), UsedNames(Allocator), NextUniqueID(0),
+      Symbols(Allocator), UsedNames(Allocator),
       CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), DwarfLocSeen(false),
       GenDwarfForAssembly(false), GenDwarfFileNumber(0), DwarfVersion(4),
       AllowTemporaryLabels(true), DwarfCompileUnitID(0),
@@ -53,7 +55,6 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
 }
 
 MCContext::~MCContext() {
-
   if (AutoReset)
     reset();
 
@@ -61,7 +62,7 @@ MCContext::~MCContext() {
   // we don't need to free them here.
 
   // If the stream for the .secure_log_unique directive was created free it.
-  delete (raw_ostream*)SecureLog;
+  delete (raw_ostream *)SecureLog;
 }
 
 //===----------------------------------------------------------------------===//
@@ -69,6 +70,14 @@ MCContext::~MCContext() {
 //===----------------------------------------------------------------------===//
 
 void MCContext::reset() {
+  // Call the destructors so the fragments are freed
+  for (auto &I : ELFUniquingMap)
+    I.second->~MCSectionELF();
+  for (auto &I : COFFUniquingMap)
+    I.second->~MCSectionCOFF();
+  for (auto &I : MachOUniquingMap)
+    I.second->~MCSectionMachO();
+
   UsedNames.clear();
   Symbols.clear();
   Allocator.Reset();
@@ -76,17 +85,17 @@ void MCContext::reset() {
   CompilationDir.clear();
   MainFileName.clear();
   MCDwarfLineTablesCUMap.clear();
-  SectionStartEndSyms.clear();
+  SectionsForRanges.clear();
   MCGenDwarfLabelEntries.clear();
   DwarfDebugFlags = StringRef();
   DwarfCompileUnitID = 0;
-  CurrentDwarfLoc = MCDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0);
+  CurrentDwarfLoc = MCDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0);
 
   MachOUniquingMap.clear();
   ELFUniquingMap.clear();
   COFFUniquingMap.clear();
 
-  NextUniqueID = 0;
+  NextID.clear();
   AllowTemporaryLabels = true;
   DwarfLocSeen = false;
   GenDwarfForAssembly = false;
@@ -97,13 +106,15 @@ void MCContext::reset() {
 // Symbol Manipulation
 //===----------------------------------------------------------------------===//
 
-MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
-  assert(!Name.empty() && "Normal symbols cannot be unnamed!");
+MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
+  SmallString<128> NameSV;
+  StringRef NameRef = Name.toStringRef(NameSV);
 
-  MCSymbol *&Sym = Symbols[Name];
+  assert(!NameRef.empty() && "Normal symbols cannot be unnamed!");
 
+  MCSymbol *&Sym = Symbols[NameRef];
   if (!Sym)
-    Sym = CreateSymbol(Name);
+    Sym = CreateSymbol(NameRef, false);
 
   return Sym;
 }
@@ -122,7 +133,7 @@ MCSymbol *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) {
   }
 
   auto NameIter = UsedNames.insert(std::make_pair(Name, true)).first;
-  Sym = new (*this) MCSymbol(NameIter->getKey(), /*isTemporary*/ false);
+  Sym = new (*this) MCSymbol(&*NameIter, /*isTemporary*/ false);
 
   if (!OldSym)
     OldSym = Sym;
@@ -130,53 +141,66 @@ MCSymbol *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) {
   return Sym;
 }
 
-MCSymbol *MCContext::getOrCreateFrameAllocSymbol(StringRef FuncName) {
-  return GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
-                           "frameallocation_" + FuncName);
+MCSymbol *MCContext::getOrCreateFrameAllocSymbol(StringRef FuncName,
+                                                 unsigned Idx) {
+  return getOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + FuncName +
+                           "$frame_escape_" + Twine(Idx));
 }
 
-MCSymbol *MCContext::CreateSymbol(StringRef Name) {
+MCSymbol *MCContext::getOrCreateParentFrameOffsetSymbol(StringRef FuncName) {
+  return getOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + FuncName +
+                           "$parent_frame_offset");
+}
+
+MCSymbol *MCContext::getOrCreateLSDASymbol(StringRef FuncName) {
+  return getOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + "__ehtable$" +
+                           FuncName);
+}
+
+MCSymbol *MCContext::CreateSymbol(StringRef Name, bool AlwaysAddSuffix) {
   // Determine whether this is an assembler temporary or normal label, if used.
-  bool isTemporary = false;
+  bool IsTemporary = false;
   if (AllowTemporaryLabels)
-    isTemporary = Name.startswith(MAI->getPrivateGlobalPrefix());
+    IsTemporary = Name.startswith(MAI->getPrivateGlobalPrefix());
+
+  if (IsTemporary && AlwaysAddSuffix && !UseNamesOnTempLabels)
+    return new (*this) MCSymbol(nullptr, true);
 
-  auto NameEntry = UsedNames.insert(std::make_pair(Name, true));
-  if (!NameEntry.second) {
-    assert(isTemporary && "Cannot rename non-temporary symbols");
-    SmallString<128> NewName = Name;
-    do {
+  SmallString<128> NewName = Name;
+  bool AddSuffix = AlwaysAddSuffix;
+  unsigned &NextUniqueID = NextID[Name];
+  for (;;) {
+    if (AddSuffix) {
       NewName.resize(Name.size());
       raw_svector_ostream(NewName) << NextUniqueID++;
-      NameEntry = UsedNames.insert(std::make_pair(NewName, true));
-    } while (!NameEntry.second);
+    }
+    auto NameEntry = UsedNames.insert(std::make_pair(NewName, true));
+    if (NameEntry.second) {
+      // Ok, we found a name. Have the MCSymbol object itself refer to the copy
+      // of the string that is embedded in the UsedNames entry.
+      MCSymbol *Result = new (*this) MCSymbol(&*NameEntry.first, IsTemporary);
+      return Result;
+    }
+    assert(IsTemporary && "Cannot rename non-temporary symbols");
+    AddSuffix = true;
   }
-
-  // Ok, the entry doesn't already exist.  Have the MCSymbol object itself refer
-  // to the copy of the string that is embedded in the UsedNames entry.
-  MCSymbol *Result =
-      new (*this) MCSymbol(NameEntry.first->getKey(), isTemporary);
-
-  return Result;
+  llvm_unreachable("Infinite loop");
 }
 
-MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
+MCSymbol *MCContext::createTempSymbol(const Twine &Name, bool AlwaysAddSuffix) {
   SmallString<128> NameSV;
-  return GetOrCreateSymbol(Name.toStringRef(NameSV));
+  raw_svector_ostream(NameSV) << MAI->getPrivateGlobalPrefix() << Name;
+  return CreateSymbol(NameSV, AlwaysAddSuffix);
 }
 
-MCSymbol *MCContext::CreateLinkerPrivateTempSymbol() {
+MCSymbol *MCContext::createLinkerPrivateTempSymbol() {
   SmallString<128> NameSV;
-  raw_svector_ostream(NameSV)
-    << MAI->getLinkerPrivateGlobalPrefix() << "tmp" << NextUniqueID++;
-  return CreateSymbol(NameSV);
+  raw_svector_ostream(NameSV) << MAI->getLinkerPrivateGlobalPrefix() << "tmp";
+  return CreateSymbol(NameSV, true);
 }
 
-MCSymbol *MCContext::CreateTempSymbol() {
-  SmallString<128> NameSV;
-  raw_svector_ostream(NameSV)
-    << MAI->getPrivateGlobalPrefix() << "tmp" << NextUniqueID++;
-  return CreateSymbol(NameSV);
+MCSymbol *MCContext::createTempSymbol() {
+  return createTempSymbol("tmp", true);
 }
 
 unsigned MCContext::NextInstance(unsigned LocalLabelVal) {
@@ -197,16 +221,16 @@ MCSymbol *MCContext::getOrCreateDirectionalLocalSymbol(unsigned LocalLabelVal,
                                                        unsigned Instance) {
   MCSymbol *&Sym = LocalSymbols[std::make_pair(LocalLabelVal, Instance)];
   if (!Sym)
-    Sym = CreateTempSymbol();
+    Sym = createTempSymbol();
   return Sym;
 }
 
-MCSymbol *MCContext::CreateDirectionalLocalSymbol(unsigned LocalLabelVal) {
+MCSymbol *MCContext::createDirectionalLocalSymbol(unsigned LocalLabelVal) {
   unsigned Instance = NextInstance(LocalLabelVal);
   return getOrCreateDirectionalLocalSymbol(LocalLabelVal, Instance);
 }
 
-MCSymbol *MCContext::GetDirectionalLocalSymbol(unsigned LocalLabelVal,
+MCSymbol *MCContext::getDirectionalLocalSymbol(unsigned LocalLabelVal,
                                                bool Before) {
   unsigned Instance = GetInstance(LocalLabelVal);
   if (!Before)
@@ -214,24 +238,20 @@ MCSymbol *MCContext::GetDirectionalLocalSymbol(unsigned LocalLabelVal,
   return getOrCreateDirectionalLocalSymbol(LocalLabelVal, Instance);
 }
 
-MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
-  return Symbols.lookup(Name);
-}
-
-MCSymbol *MCContext::LookupSymbol(const Twine &Name) const {
+MCSymbol *MCContext::lookupSymbol(const Twine &Name) const {
   SmallString<128> NameSV;
-  Name.toVector(NameSV);
-  return LookupSymbol(NameSV.str());
+  StringRef NameRef = Name.toStringRef(NameSV);
+  return Symbols.lookup(NameRef);
 }
 
 //===----------------------------------------------------------------------===//
 // Section Management
 //===----------------------------------------------------------------------===//
 
-const MCSectionMachO *MCContext::
-getMachOSection(StringRef Segment, StringRef Section,
-                unsigned TypeAndAttributes,
-                unsigned Reserved2, SectionKind Kind) {
+MCSectionMachO *MCContext::getMachOSection(StringRef Segment, StringRef Section,
+                                           unsigned TypeAndAttributes,
+                                           unsigned Reserved2, SectionKind Kind,
+                                           const char *BeginSymName) {
 
   // We unique sections by their segment/section pair.  The returned section
   // may not have the same flags as the requested section, if so this should be
@@ -244,107 +264,149 @@ getMachOSection(StringRef Segment, StringRef Section,
   Name += Section;
 
   // Do the lookup, if we have a hit, return it.
-  const MCSectionMachO *&Entry = MachOUniquingMap[Name.str()];
-  if (Entry) return Entry;
+  MCSectionMachO *&Entry = MachOUniquingMap[Name];
+  if (Entry)
+    return Entry;
+
+  MCSymbol *Begin = nullptr;
+  if (BeginSymName)
+    Begin = createTempSymbol(BeginSymName, false);
 
   // Otherwise, return a new section.
   return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes,
-                                            Reserved2, Kind);
-}
-
-const MCSectionELF *MCContext::
-getELFSection(StringRef Section, unsigned Type, unsigned Flags,
-              SectionKind Kind) {
-  return getELFSection(Section, Type, Flags, Kind, 0, "");
+                                            Reserved2, Kind, Begin);
 }
 
-void MCContext::renameELFSection(const MCSectionELF *Section, StringRef Name) {
+void MCContext::renameELFSection(MCSectionELF *Section, StringRef Name) {
   StringRef GroupName;
   if (const MCSymbol *Group = Section->getGroup())
     GroupName = Group->getName();
 
-  ELFUniquingMap.erase(SectionGroupPair(Section->getSectionName(), GroupName));
-  auto I =
-      ELFUniquingMap.insert(std::make_pair(SectionGroupPair(Name, GroupName),
-                                           Section)).first;
-  StringRef CachedName = I->first.first;
-  const_cast<MCSectionELF*>(Section)->setSectionName(CachedName);
+  unsigned UniqueID = Section->getUniqueID();
+  ELFUniquingMap.erase(
+      ELFSectionKey{Section->getSectionName(), GroupName, UniqueID});
+  auto I = ELFUniquingMap.insert(std::make_pair(
+                                     ELFSectionKey{Name, GroupName, UniqueID},
+                                     Section))
+               .first;
+  StringRef CachedName = I->first.SectionName;
+  const_cast<MCSectionELF *>(Section)->setSectionName(CachedName);
+}
+
+MCSectionELF *MCContext::createELFRelSection(StringRef Name, unsigned Type,
+                                             unsigned Flags, unsigned EntrySize,
+                                             const MCSymbol *Group,
+                                             const MCSectionELF *Associated) {
+  StringMap<bool>::iterator I;
+  bool Inserted;
+  std::tie(I, Inserted) = ELFRelSecNames.insert(std::make_pair(Name, true));
+
+  return new (*this)
+      MCSectionELF(I->getKey(), Type, Flags, SectionKind::getReadOnly(),
+                   EntrySize, Group, true, nullptr, Associated);
 }
 
-const MCSectionELF *MCContext::
-getELFSection(StringRef Section, unsigned Type, unsigned Flags,
-              SectionKind Kind, unsigned EntrySize, StringRef Group) {
+MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
+                                       unsigned Flags, unsigned EntrySize,
+                                       StringRef Group, unsigned UniqueID,
+                                       const char *BeginSymName) {
+  MCSymbol *GroupSym = nullptr;
+  if (!Group.empty())
+    GroupSym = getOrCreateSymbol(Group);
+
+  return getELFSection(Section, Type, Flags, EntrySize, GroupSym, UniqueID,
+                       BeginSymName, nullptr);
+}
+
+MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
+                                       unsigned Flags, unsigned EntrySize,
+                                       const MCSymbol *GroupSym,
+                                       unsigned UniqueID,
+                                       const char *BeginSymName,
+                                       const MCSectionELF *Associated) {
+  StringRef Group = "";
+  if (GroupSym)
+    Group = GroupSym->getName();
   // Do the lookup, if we have a hit, return it.
   auto IterBool = ELFUniquingMap.insert(
-      std::make_pair(SectionGroupPair(Section, Group), nullptr));
+      std::make_pair(ELFSectionKey{Section, Group, UniqueID}, nullptr));
   auto &Entry = *IterBool.first;
-  if (!IterBool.second) return Entry.second;
+  if (!IterBool.second)
+    return Entry.second;
 
-  // Possibly refine the entry size first.
-  if (!EntrySize) {
-    EntrySize = MCSectionELF::DetermineEntrySize(Kind);
-  }
+  StringRef CachedName = Entry.first.SectionName;
 
-  MCSymbol *GroupSym = nullptr;
-  if (!Group.empty())
-    GroupSym = GetOrCreateSymbol(Group);
+  SectionKind Kind;
+  if (Flags & ELF::SHF_EXECINSTR)
+    Kind = SectionKind::getText();
+  else
+    Kind = SectionKind::getReadOnly();
 
-  StringRef CachedName = Entry.first.first;
-  MCSectionELF *Result = new (*this)
-      MCSectionELF(CachedName, Type, Flags, Kind, EntrySize, GroupSym);
+  MCSymbol *Begin = nullptr;
+  if (BeginSymName)
+    Begin = createTempSymbol(BeginSymName, false);
+
+  MCSectionELF *Result =
+      new (*this) MCSectionELF(CachedName, Type, Flags, Kind, EntrySize,
+                               GroupSym, UniqueID, Begin, Associated);
   Entry.second = Result;
   return Result;
 }
 
-const MCSectionELF *MCContext::CreateELFGroupSection() {
-  MCSectionELF *Result =
-    new (*this) MCSectionELF(".group", ELF::SHT_GROUP, 0,
-                             SectionKind::getReadOnly(), 4, nullptr);
+MCSectionELF *MCContext::createELFGroupSection(const MCSymbol *Group) {
+  MCSectionELF *Result = new (*this)
+      MCSectionELF(".group", ELF::SHT_GROUP, 0, SectionKind::getReadOnly(), 4,
+                   Group, ~0, nullptr, nullptr);
   return Result;
 }
 
-const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
-                                               unsigned Characteristics,
-                                               SectionKind Kind,
-                                               StringRef COMDATSymName,
-                                               int Selection) {
-  // Do the lookup, if we have a hit, return it.
+MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
+                                         unsigned Characteristics,
+                                         SectionKind Kind,
+                                         StringRef COMDATSymName, int Selection,
+                                         const char *BeginSymName) {
+  MCSymbol *COMDATSymbol = nullptr;
+  if (!COMDATSymName.empty()) {
+    COMDATSymbol = getOrCreateSymbol(COMDATSymName);
+    COMDATSymName = COMDATSymbol->getName();
+  }
 
-  SectionGroupTriple T(Section, COMDATSymName, Selection);
+  // Do the lookup, if we have a hit, return it.
+  COFFSectionKey T{Section, COMDATSymName, Selection};
   auto IterBool = COFFUniquingMap.insert(std::make_pair(T, nullptr));
   auto Iter = IterBool.first;
   if (!IterBool.second)
     return Iter->second;
 
-  MCSymbol *COMDATSymbol = nullptr;
-  if (!COMDATSymName.empty())
-    COMDATSymbol = GetOrCreateSymbol(COMDATSymName);
+  MCSymbol *Begin = nullptr;
+  if (BeginSymName)
+    Begin = createTempSymbol(BeginSymName, false);
 
-  StringRef CachedName = std::get<0>(Iter->first);
-  MCSectionCOFF *Result = new (*this)
-      MCSectionCOFF(CachedName, Characteristics, COMDATSymbol, Selection, Kind);
+  StringRef CachedName = Iter->first.SectionName;
+  MCSectionCOFF *Result = new (*this) MCSectionCOFF(
+      CachedName, Characteristics, COMDATSymbol, Selection, Kind, Begin);
 
   Iter->second = Result;
   return Result;
 }
 
-const MCSectionCOFF *
-MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
-                          SectionKind Kind) {
-  return getCOFFSection(Section, Characteristics, Kind, "", 0);
+MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
+                                         unsigned Characteristics,
+                                         SectionKind Kind,
+                                         const char *BeginSymName) {
+  return getCOFFSection(Section, Characteristics, Kind, "", 0, BeginSymName);
 }
 
-const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) {
-  SectionGroupTriple T(Section, "", 0);
+MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) {
+  COFFSectionKey T{Section, "", 0};
   auto Iter = COFFUniquingMap.find(T);
   if (Iter == COFFUniquingMap.end())
     return nullptr;
   return Iter->second;
 }
 
-const MCSectionCOFF *
-MCContext::getAssociativeCOFFSection(const MCSectionCOFF *Sec,
-                                     const MCSymbol *KeySym) {
+MCSectionCOFF *MCContext::getAssociativeCOFFSection(MCSectionCOFF *Sec,
+                                                    const MCSymbol *KeySym) {
   // Return the normal section if we don't have to be associative.
   if (!KeySym)
     return Sec;
@@ -362,11 +424,11 @@ MCContext::getAssociativeCOFFSection(const MCSectionCOFF *Sec,
 // Dwarf Management
 //===----------------------------------------------------------------------===//
 
-/// GetDwarfFile - takes a file name an number to place in the dwarf file and
+/// getDwarfFile - takes a file name an number to place in the dwarf file and
 /// directory tables.  If the file number has already been allocated it is an
 /// error and zero is returned and the client reports the error, else the
 /// allocated file number is returned.  The file numbers may be in any order.
-unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName,
+unsigned MCContext::getDwarfFile(StringRef Directory, StringRef FileName,
                                  unsigned FileNumber, unsigned CUID) {
   MCDwarfLineTable &Table = MCDwarfLineTablesCUMap[CUID];
   return Table.getFile(Directory, FileName, FileNumber);
@@ -375,37 +437,26 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName,
 /// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
 /// currently is assigned and false otherwise.
 bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
-  const SmallVectorImpl<MCDwarfFile>& MCDwarfFiles = getMCDwarfFiles(CUID);
-  if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
+  const SmallVectorImpl<MCDwarfFile> &MCDwarfFiles = getMCDwarfFiles(CUID);
+  if (FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
     return false;
 
   return !MCDwarfFiles[FileNumber].Name.empty();
 }
 
-/// finalizeDwarfSections - Emit end symbols for each non-empty code section.
-/// Also remove empty sections from SectionStartEndSyms, to avoid generating
+/// Remove empty sections from SectionStartEndSyms, to avoid generating
 /// useless debug info for them.
 void MCContext::finalizeDwarfSections(MCStreamer &MCOS) {
-  MCContext &context = MCOS.getContext();
-
-  auto sec = SectionStartEndSyms.begin();
-  while (sec != SectionStartEndSyms.end()) {
-    assert(sec->second.first && "Start symbol must be set by now");
-    MCOS.SwitchSection(sec->first);
-    if (MCOS.mayHaveInstructions()) {
-      MCSymbol *SectionEndSym = context.CreateTempSymbol();
-      MCOS.EmitLabel(SectionEndSym);
-      sec->second.second = SectionEndSym;
-      ++sec;
-    } else {
-      MapVector<const MCSection *, std::pair<MCSymbol *, MCSymbol *> >::iterator
-        to_erase = sec;
-      sec = SectionStartEndSyms.erase(to_erase);
-    }
+  std::vector<MCSection *> Keep;
+  for (MCSection *Sec : SectionsForRanges) {
+    if (MCOS.mayHaveInstructions(*Sec))
+      Keep.push_back(Sec);
   }
+  SectionsForRanges.clear();
+  SectionsForRanges.insert(Keep.begin(), Keep.end());
 }
 
-void MCContext::FatalError(SMLoc Loc, const Twine &Msg) const {
+void MCContext::reportFatalError(SMLoc Loc, const Twine &Msg) const {
   // If we have a source manager and a location, use it. Otherwise just
   // use the generic report_fatal_error().
   if (!SrcMgr || Loc == SMLoc())
diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
index d27d83b..716d76a 100644
--- a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
@@ -33,22 +33,22 @@ using namespace llvm;
 // disassembler context.  If not, it returns NULL.
 //
 LLVMDisasmContextRef
-LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU,
+LLVMCreateDisasmCPUFeatures(const char *TT, const char *CPU,
                             const char *Features, void *DisInfo, int TagType,
                             LLVMOpInfoCallback GetOpInfo,
                             LLVMSymbolLookupCallback SymbolLookUp) {
   // Get the target.
   std::string Error;
-  const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
+  const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error);
   if (!TheTarget)
     return nullptr;
 
-  const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple);
+  const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TT);
   if (!MRI)
     return nullptr;
 
   // Get the assembler info needed to setup the MCContext.
-  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, Triple);
+  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, TT);
   if (!MAI)
     return nullptr;
 
@@ -56,8 +56,8 @@ LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU,
   if (!MII)
     return nullptr;
 
-  const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU,
-                                                                Features);
+  const MCSubtargetInfo *STI =
+      TheTarget->createMCSubtargetInfo(TT, CPU, Features);
   if (!STI)
     return nullptr;
 
@@ -72,25 +72,24 @@ LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU,
     return nullptr;
 
   std::unique_ptr<MCRelocationInfo> RelInfo(
-      TheTarget->createMCRelocationInfo(Triple, *Ctx));
+      TheTarget->createMCRelocationInfo(TT, *Ctx));
   if (!RelInfo)
     return nullptr;
 
   std::unique_ptr<MCSymbolizer> Symbolizer(TheTarget->createMCSymbolizer(
-      Triple, GetOpInfo, SymbolLookUp, DisInfo, Ctx, RelInfo.release()));
+      TT, GetOpInfo, SymbolLookUp, DisInfo, Ctx, std::move(RelInfo)));
   DisAsm->setSymbolizer(std::move(Symbolizer));
 
   // Set up the instruction printer.
   int AsmPrinterVariant = MAI->getAssemblerDialect();
-  MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
-                                                     *MAI, *MII, *MRI, *STI);
+  MCInstPrinter *IP = TheTarget->createMCInstPrinter(
+      Triple(TT), AsmPrinterVariant, *MAI, *MII, *MRI);
   if (!IP)
     return nullptr;
 
-  LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType,
-                                                GetOpInfo, SymbolLookUp,
-                                                TheTarget, MAI, MRI,
-                                                STI, MII, Ctx, DisAsm, IP);
+  LLVMDisasmContext *DC =
+      new LLVMDisasmContext(TT, DisInfo, TagType, GetOpInfo, SymbolLookUp,
+                            TheTarget, MAI, MRI, STI, MII, Ctx, DisAsm, IP);
   if (!DC)
     return nullptr;
 
@@ -98,19 +97,19 @@ LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU,
   return DC;
 }
 
-LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
-                                         void *DisInfo, int TagType,
-                                         LLVMOpInfoCallback GetOpInfo,
-                                         LLVMSymbolLookupCallback SymbolLookUp){
-  return LLVMCreateDisasmCPUFeatures(Triple, CPU, "", DisInfo, TagType,
-                                     GetOpInfo, SymbolLookUp);
+LLVMDisasmContextRef
+LLVMCreateDisasmCPU(const char *TT, const char *CPU, void *DisInfo, int TagType,
+                    LLVMOpInfoCallback GetOpInfo,
+                    LLVMSymbolLookupCallback SymbolLookUp) {
+  return LLVMCreateDisasmCPUFeatures(TT, CPU, "", DisInfo, TagType, GetOpInfo,
+                                     SymbolLookUp);
 }
 
-LLVMDisasmContextRef LLVMCreateDisasm(const char *Triple, void *DisInfo,
+LLVMDisasmContextRef LLVMCreateDisasm(const char *TT, void *DisInfo,
                                       int TagType, LLVMOpInfoCallback GetOpInfo,
                                       LLVMSymbolLookupCallback SymbolLookUp) {
-  return LLVMCreateDisasmCPUFeatures(Triple, "", "", DisInfo, TagType,
-                                     GetOpInfo, SymbolLookUp);
+  return LLVMCreateDisasmCPUFeatures(TT, "", "", DisInfo, TagType, GetOpInfo,
+                                     SymbolLookUp);
 }
 
 //
@@ -268,7 +267,7 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
     SmallVector<char, 64> InsnStr;
     raw_svector_ostream OS(InsnStr);
     formatted_raw_ostream FormattedOS(OS);
-    IP->printInst(&Inst, FormattedOS, AnnotationsStr);
+    IP->printInst(&Inst, FormattedOS, AnnotationsStr, *DC->getSubtargetInfo());
 
     if (DC->getOptions() & LLVMDisassembler_Option_PrintLatency)
       emitLatency(DC, Inst);
@@ -312,11 +311,10 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){
       const MCAsmInfo *MAI = DC->getAsmInfo();
       const MCInstrInfo *MII = DC->getInstrInfo();
       const MCRegisterInfo *MRI = DC->getRegisterInfo();
-      const MCSubtargetInfo *STI = DC->getSubtargetInfo();
       int AsmPrinterVariant = MAI->getAssemblerDialect();
       AsmPrinterVariant = AsmPrinterVariant == 0 ? 1 : 0;
       MCInstPrinter *IP = DC->getTarget()->createMCInstPrinter(
-          AsmPrinterVariant, *MAI, *MII, *MRI, *STI);
+          Triple(DC->getTripleName()), AsmPrinterVariant, *MAI, *MII, *MRI);
       if (IP) {
         DC->setIP(IP);
         DC->addOptions(LLVMDisassembler_Option_AsmPrinterVariant);
diff --git a/contrib/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/contrib/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
index 0145623..1262e2a 100644
--- a/contrib/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
+++ b/contrib/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
@@ -87,7 +87,7 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
   if (SymbolicOp.AddSymbol.Present) {
     if (SymbolicOp.AddSymbol.Name) {
       StringRef Name(SymbolicOp.AddSymbol.Name);
-      MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
+      MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
       Add = MCSymbolRefExpr::Create(Sym, Ctx);
     } else {
       Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, Ctx);
@@ -98,7 +98,7 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
   if (SymbolicOp.SubtractSymbol.Present) {
       if (SymbolicOp.SubtractSymbol.Name) {
       StringRef Name(SymbolicOp.SubtractSymbol.Name);
-      MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
+      MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
       Sub = MCSymbolRefExpr::Create(Sym, Ctx);
     } else {
       Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, Ctx);
@@ -136,7 +136,7 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
   if (!Expr)
     return false;
 
-  MI.addOperand(MCOperand::CreateExpr(Expr));
+  MI.addOperand(MCOperand::createExpr(Expr));
   return true;
 }
 
@@ -186,13 +186,11 @@ void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
 namespace llvm {
 MCSymbolizer *createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
                                  LLVMSymbolLookupCallback SymbolLookUp,
-                                 void *DisInfo,
-                                 MCContext *Ctx,
-                                 MCRelocationInfo *RelInfo) {
+                                 void *DisInfo, MCContext *Ctx,
+                                 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
   assert(Ctx && "No MCContext given for symbolic disassembly");
 
-  return new MCExternalSymbolizer(*Ctx,
-                                  std::unique_ptr<MCRelocationInfo>(RelInfo),
-                                  GetOpInfo, SymbolLookUp, DisInfo);
+  return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo,
+                                  SymbolLookUp, DisInfo);
 }
 }
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
index 5effb01..a7e83f6 100644
--- a/contrib/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -64,12 +64,12 @@ static inline uint64_t ScaleAddrDelta(MCContext &Context, uint64_t AddrDelta) {
 // and if there is information from the last .loc directive that has yet to have
 // a line entry made for it is made.
 //
-void MCLineEntry::Make(MCObjectStreamer *MCOS, const MCSection *Section) {
+void MCLineEntry::Make(MCObjectStreamer *MCOS, MCSection *Section) {
   if (!MCOS->getContext().getDwarfLocSeen())
     return;
 
   // Create a symbol at in the current section for use in the line entry.
-  MCSymbol *LineSym = MCOS->getContext().CreateTempSymbol();
+  MCSymbol *LineSym = MCOS->getContext().createTempSymbol();
   // Set the value of the symbol to use for the MCLineEntry.
   MCOS->EmitLabel(LineSym);
 
@@ -80,7 +80,7 @@ void MCLineEntry::Make(MCObjectStreamer *MCOS, const MCSection *Section) {
   MCLineEntry LineEntry(LineSym, DwarfLoc);
 
   // clear DwarfLocSeen saying the current .loc info is now used.
-  MCOS->getContext().ClearDwarfLocSeen();
+  MCOS->getContext().clearDwarfLocSeen();
 
   // Add the line entry to this section's entries.
   MCOS->getContext()
@@ -115,7 +115,7 @@ static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS,
 // in the LineSection.
 //
 static inline void
-EmitDwarfLineTable(MCObjectStreamer *MCOS, const MCSection *Section,
+EmitDwarfLineTable(MCObjectStreamer *MCOS, MCSection *Section,
                    const MCLineSection::MCLineEntryCollection &LineEntries) {
   unsigned FileNum = 1;
   unsigned LastLine = 1;
@@ -179,28 +179,19 @@ EmitDwarfLineTable(MCObjectStreamer *MCOS, const MCSection *Section,
   }
 
   // Emit a DW_LNE_end_sequence for the end of the section.
-  // Using the pointer Section create a temporary label at the end of the
-  // section and use that and the LastLabel to compute the address delta
-  // and use INT64_MAX as the line delta which is the signal that this is
-  // actually a DW_LNE_end_sequence.
+  // Use the section end label to compute the address delta and use INT64_MAX
+  // as the line delta which is the signal that this is actually a
+  // DW_LNE_end_sequence.
+  MCSymbol *SectionEnd = MCOS->endSection(Section);
 
-  // Switch to the section to be able to create a symbol at its end.
-  // TODO: keep track of the last subsection so that this symbol appears in the
-  // correct place.
-  MCOS->SwitchSection(Section);
+  // Switch back the dwarf line section, in case endSection had to switch the
+  // section.
+  MCContext &Ctx = MCOS->getContext();
+  MCOS->SwitchSection(Ctx.getObjectFileInfo()->getDwarfLineSection());
 
-  MCContext &context = MCOS->getContext();
-  // Create a symbol at the end of the section.
-  MCSymbol *SectionEnd = context.CreateTempSymbol();
-  // Set the value of the symbol, as we are at the end of the section.
-  MCOS->EmitLabel(SectionEnd);
-
-  // Switch back the dwarf line section.
-  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection());
-
-  const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo();
+  const MCAsmInfo *AsmInfo = Ctx.getAsmInfo();
   MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd,
-                                 asmInfo->getPointerSize());
+                                 AsmInfo->getPointerSize());
 }
 
 //
@@ -243,7 +234,8 @@ std::pair<MCSymbol *, MCSymbol *> MCDwarfLineTableHeader::Emit(MCStreamer *MCOS)
       0, // length of DW_LNS_set_epilogue_begin
       1  // DW_LNS_set_isa
   };
-  assert(array_lengthof(StandardOpcodeLengths) == (DWARF2_LINE_OPCODE_BASE - 1));
+  assert(array_lengthof(StandardOpcodeLengths) ==
+         (DWARF2_LINE_OPCODE_BASE - 1));
   return Emit(MCOS, StandardOpcodeLengths);
 }
 
@@ -253,7 +245,7 @@ static const MCExpr *forceExpAbs(MCStreamer &OS, const MCExpr* Expr) {
   if (Context.getAsmInfo()->hasAggressiveSymbolFolding())
     return Expr;
 
-  MCSymbol *ABS = Context.CreateTempSymbol();
+  MCSymbol *ABS = Context.createTempSymbol();
   OS.EmitAssignment(ABS, Expr);
   return MCSymbolRefExpr::Create(ABS, Context);
 }
@@ -272,12 +264,12 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
   // Create a symbol at the beginning of the line table.
   MCSymbol *LineStartSym = Label;
   if (!LineStartSym)
-    LineStartSym = context.CreateTempSymbol();
+    LineStartSym = context.createTempSymbol();
   // Set the value of the symbol, as we are at the start of the line table.
   MCOS->EmitLabel(LineStartSym);
 
   // Create a symbol for the end of the section (to be set when we get there).
-  MCSymbol *LineEndSym = context.CreateTempSymbol();
+  MCSymbol *LineEndSym = context.createTempSymbol();
 
   // The first 4 bytes is the total length of the information for this
   // compilation unit (not including these 4 bytes for the length).
@@ -288,7 +280,7 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
   MCOS->EmitIntValue(2, 2);
 
   // Create a symbol for the end of the prologue (to be set when we get there).
-  MCSymbol *ProEndSym = context.CreateTempSymbol(); // Lprologue_end
+  MCSymbol *ProEndSym = context.createTempSymbol(); // Lprologue_end
 
   // Length of the prologue, is the next 4 bytes.  Which is the start of the
   // section to the end of the prologue.  Not including the 4 bytes for the
@@ -368,8 +360,10 @@ unsigned MCDwarfLineTableHeader::getFile(StringRef &Directory,
     FileNumber = SourceIdMap.size() + 1;
     assert((MCDwarfFiles.empty() || FileNumber == MCDwarfFiles.size()) &&
            "Don't mix autonumbered and explicit numbered line table usage");
+    SmallString<256> Buffer;
     auto IterBool = SourceIdMap.insert(
-        std::make_pair((Directory + Twine('\0') + FileName).str(), FileNumber));
+        std::make_pair((Directory + Twine('\0') + FileName).toStringRef(Buffer),
+                       FileNumber));
     if (!IterBool.second)
       return IterBool.first->second;
   }
@@ -446,7 +440,7 @@ void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta,
   if (LineDelta == INT64_MAX) {
     if (AddrDelta == MAX_SPECIAL_ADDR_DELTA)
       OS << char(dwarf::DW_LNS_const_add_pc);
-    else {
+    else if (AddrDelta) {
       OS << char(dwarf::DW_LNS_advance_pc);
       encodeULEB128(AddrDelta, OS);
     }
@@ -616,9 +610,9 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
 
   // Now emit the table of pairs of PointerSize'ed values for the section
   // addresses and sizes.
-  for (const auto &sec : Sections) {
-    MCSymbol *StartSymbol = sec.second.first;
-    MCSymbol *EndSymbol = sec.second.second;
+  for (MCSection *Sec : Sections) {
+    const MCSymbol *StartSymbol = Sec->getBeginSymbol();
+    MCSymbol *EndSymbol = Sec->getEndSymbol(context);
     assert(StartSymbol && "StartSymbol must not be NULL");
     assert(EndSymbol && "EndSymbol must not be NULL");
 
@@ -648,9 +642,9 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
 
   // Create a symbol at the start and end of this section used in here for the
   // expression to calculate the length in the header.
-  MCSymbol *InfoStart = context.CreateTempSymbol();
+  MCSymbol *InfoStart = context.createTempSymbol();
   MCOS->EmitLabel(InfoStart);
-  MCSymbol *InfoEnd = context.CreateTempSymbol();
+  MCSymbol *InfoEnd = context.createTempSymbol();
 
   // First part: the header.
 
@@ -705,8 +699,8 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
     const auto TextSection = Sections.begin();
     assert(TextSection != Sections.end() && "No text section found");
 
-    MCSymbol *StartSymbol = TextSection->second.first;
-    MCSymbol *EndSymbol = TextSection->second.second;
+    MCSymbol *StartSymbol = (*TextSection)->getBeginSymbol();
+    MCSymbol *EndSymbol = (*TextSection)->getEndSymbol(context);
     assert(StartSymbol && "StartSymbol must not be NULL");
     assert(EndSymbol && "EndSymbol must not be NULL");
 
@@ -811,10 +805,9 @@ static void EmitGenDwarfRanges(MCStreamer *MCOS) {
 
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection());
 
-  for (const auto sec : Sections) {
-
-    MCSymbol *StartSymbol = sec.second.first;
-    MCSymbol *EndSymbol = sec.second.second;
+  for (MCSection *Sec : Sections) {
+    const MCSymbol *StartSymbol = Sec->getBeginSymbol();
+    MCSymbol *EndSymbol = Sec->getEndSymbol(context);
     assert(StartSymbol && "StartSymbol must not be NULL");
     assert(EndSymbol && "EndSymbol must not be NULL");
 
@@ -871,18 +864,18 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
 
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
   if (CreateDwarfSectionSymbols) {
-    InfoSectionSymbol = context.CreateTempSymbol();
+    InfoSectionSymbol = context.createTempSymbol();
     MCOS->EmitLabel(InfoSectionSymbol);
   }
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
   if (CreateDwarfSectionSymbols) {
-    AbbrevSectionSymbol = context.CreateTempSymbol();
+    AbbrevSectionSymbol = context.createTempSymbol();
     MCOS->EmitLabel(AbbrevSectionSymbol);
   }
   if (UseRangesSection) {
     MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection());
     if (CreateDwarfSectionSymbols) {
-      RangesSectionSymbol = context.CreateTempSymbol();
+      RangesSectionSymbol = context.createTempSymbol();
       MCOS->EmitLabel(RangesSectionSymbol);
     }
   }
@@ -940,7 +933,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
   // values so that they don't have things like an ARM thumb bit from the
   // original symbol. So when used they won't get a low bit set after
   // relocation.
-  MCSymbol *Label = context.CreateTempSymbol();
+  MCSymbol *Label = context.createTempSymbol();
   MCOS->EmitLabel(Label);
 
   // Create and entry for the info and add it to the other entries.
@@ -1007,11 +1000,13 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
 namespace {
   class FrameEmitterImpl {
     int CFAOffset;
+    int InitialCFAOffset;
     bool IsEH;
     const MCSymbol *SectionStart;
   public:
     FrameEmitterImpl(bool isEH)
-        : CFAOffset(0), IsEH(isEH), SectionStart(nullptr) {}
+        : CFAOffset(0), InitialCFAOffset(0), IsEH(isEH), SectionStart(nullptr) {
+    }
 
     void setSectionStart(const MCSymbol *Label) { SectionStart = Label; }
 
@@ -1045,11 +1040,16 @@ static void emitEncodingByte(MCObjectStreamer &Streamer, unsigned Encoding) {
 void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
                                           const MCCFIInstruction &Instr) {
   int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
+  auto *MRI = Streamer.getContext().getRegisterInfo();
 
   switch (Instr.getOperation()) {
   case MCCFIInstruction::OpRegister: {
     unsigned Reg1 = Instr.getRegister();
     unsigned Reg2 = Instr.getRegister2();
+    if (!IsEH) {
+      Reg1 = MRI->getDwarfRegNum(MRI->getLLVMRegNum(Reg1, true), false);
+      Reg2 = MRI->getDwarfRegNum(MRI->getLLVMRegNum(Reg2, true), false);
+    }
     Streamer.EmitIntValue(dwarf::DW_CFA_register, 1);
     Streamer.EmitULEB128IntValue(Reg1);
     Streamer.EmitULEB128IntValue(Reg2);
@@ -1082,8 +1082,11 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
     return;
   }
   case MCCFIInstruction::OpDefCfa: {
+    unsigned Reg = Instr.getRegister();
+    if (!IsEH)
+      Reg = MRI->getDwarfRegNum(MRI->getLLVMRegNum(Reg, true), false);
     Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
-    Streamer.EmitULEB128IntValue(Instr.getRegister());
+    Streamer.EmitULEB128IntValue(Reg);
     CFAOffset = -Instr.getOffset();
     Streamer.EmitULEB128IntValue(CFAOffset);
 
@@ -1091,8 +1094,11 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
   }
 
   case MCCFIInstruction::OpDefCfaRegister: {
+    unsigned Reg = Instr.getRegister();
+    if (!IsEH)
+      Reg = MRI->getDwarfRegNum(MRI->getLLVMRegNum(Reg, true), false);
     Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
-    Streamer.EmitULEB128IntValue(Instr.getRegister());
+    Streamer.EmitULEB128IntValue(Reg);
 
     return;
   }
@@ -1103,6 +1109,9 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
       Instr.getOperation() == MCCFIInstruction::OpRelOffset;
 
     unsigned Reg = Instr.getRegister();
+    if (!IsEH)
+      Reg = MRI->getDwarfRegNum(MRI->getLLVMRegNum(Reg, true), false);
+
     int Offset = Instr.getOffset();
     if (IsRelative)
       Offset -= CFAOffset;
@@ -1136,6 +1145,8 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
   }
   case MCCFIInstruction::OpRestore: {
     unsigned Reg = Instr.getRegister();
+    if (!IsEH)
+      Reg = MRI->getDwarfRegNum(MRI->getLLVMRegNum(Reg, true), false);
     Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
     return;
   }
@@ -1234,6 +1245,20 @@ void FrameEmitterImpl::EmitCompactUnwind(MCObjectStreamer &Streamer,
     Streamer.EmitIntValue(0, Size); // No LSDA
 }
 
+static unsigned getCIEVersion(bool IsEH, unsigned DwarfVersion) {
+  if (IsEH)
+    return 1;
+  switch (DwarfVersion) {
+  case 2:
+    return 1;
+  case 3:
+    return 3;
+  case 4:
+    return 4;
+  }
+  llvm_unreachable("Unknown version");
+}
+
 const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
                                           const MCSymbol *personality,
                                           unsigned personalityEncoding,
@@ -1245,10 +1270,10 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
   const MCRegisterInfo *MRI = context.getRegisterInfo();
   const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
 
-  MCSymbol *sectionStart = context.CreateTempSymbol();
+  MCSymbol *sectionStart = context.createTempSymbol();
   streamer.EmitLabel(sectionStart);
 
-  MCSymbol *sectionEnd = context.CreateTempSymbol();
+  MCSymbol *sectionEnd = context.createTempSymbol();
 
   // Length
   const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
@@ -1260,9 +1285,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
   streamer.EmitIntValue(CIE_ID, 4);
 
   // Version
-  // For DWARF2, we use CIE version 1
-  // For DWARF3+, we use CIE version 3
-  uint8_t CIEVersion = context.getDwarfVersion() <= 2 ? 1 : 3;
+  uint8_t CIEVersion = getCIEVersion(IsEH, context.getDwarfVersion());
   streamer.EmitIntValue(CIEVersion, 1);
 
   // Augmentation String
@@ -1276,10 +1299,18 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
     Augmentation += "R";
     if (IsSignalFrame)
       Augmentation += "S";
-    streamer.EmitBytes(Augmentation.str());
+    streamer.EmitBytes(Augmentation);
   }
   streamer.EmitIntValue(0, 1);
 
+  if (CIEVersion >= 4) {
+    // Address Size
+    streamer.EmitIntValue(context.getAsmInfo()->getPointerSize(), 1);
+
+    // Segment Descriptor Size
+    streamer.EmitIntValue(0, 1);
+  }
+
   // Code Alignment Factor
   streamer.EmitULEB128IntValue(context.getAsmInfo()->getMinInstAlignment());
 
@@ -1290,10 +1321,10 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
   if (CIEVersion == 1) {
     assert(MRI->getRARegister() <= 255 &&
            "DWARF 2 encodes return_address_register in one byte");
-    streamer.EmitIntValue(MRI->getDwarfRegNum(MRI->getRARegister(), true), 1);
+    streamer.EmitIntValue(MRI->getDwarfRegNum(MRI->getRARegister(), IsEH), 1);
   } else {
     streamer.EmitULEB128IntValue(
-        MRI->getDwarfRegNum(MRI->getRARegister(), true));
+        MRI->getDwarfRegNum(MRI->getRARegister(), IsEH));
   }
 
   // Augmentation Data Length (optional)
@@ -1337,6 +1368,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
     EmitCFIInstructions(streamer, Instructions, nullptr);
   }
 
+  InitialCFAOffset = CFAOffset;
+
   // Padding
   streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize());
 
@@ -1348,10 +1381,12 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer,
                                     const MCSymbol &cieStart,
                                     const MCDwarfFrameInfo &frame) {
   MCContext &context = streamer.getContext();
-  MCSymbol *fdeStart = context.CreateTempSymbol();
-  MCSymbol *fdeEnd = context.CreateTempSymbol();
+  MCSymbol *fdeStart = context.createTempSymbol();
+  MCSymbol *fdeEnd = context.createTempSymbol();
   const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
 
+  CFAOffset = InitialCFAOffset;
+
   // Length
   const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
   emitAbsValue(streamer, Length, 4);
@@ -1485,12 +1520,12 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
 
   if (!NeedsEHFrameSection) return;
 
-  const MCSection &Section =
-    IsEH ? *const_cast<MCObjectFileInfo*>(MOFI)->getEHFrameSection() :
-           *MOFI->getDwarfFrameSection();
+  MCSection &Section =
+      IsEH ? *const_cast<MCObjectFileInfo *>(MOFI)->getEHFrameSection()
+           : *MOFI->getDwarfFrameSection();
 
   Streamer.SwitchSection(&Section);
-  MCSymbol *SectionStart = Context.CreateTempSymbol();
+  MCSymbol *SectionStart = Context.createTempSymbol();
   Streamer.EmitLabel(SectionStart);
   Emitter.setSectionStart(SectionStart);
 
diff --git a/contrib/llvm/lib/MC/MCELF.cpp b/contrib/llvm/lib/MC/MCELF.cpp
index 386c209..3690634 100644
--- a/contrib/llvm/lib/MC/MCELF.cpp
+++ b/contrib/llvm/lib/MC/MCELF.cpp
@@ -21,7 +21,7 @@ namespace llvm {
 
 void MCELF::SetBinding(MCSymbolData &SD, unsigned Binding) {
   assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
-         Binding == ELF::STB_WEAK);
+         Binding == ELF::STB_WEAK || Binding == ELF::STB_GNU_UNIQUE);
   uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift);
   SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift));
 }
@@ -29,7 +29,7 @@ void MCELF::SetBinding(MCSymbolData &SD, unsigned Binding) {
 unsigned MCELF::GetBinding(const MCSymbolData &SD) {
   uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift;
   assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
-         Binding == ELF::STB_WEAK);
+         Binding == ELF::STB_WEAK || Binding == ELF::STB_GNU_UNIQUE);
   return Binding;
 }
 
diff --git a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
index 84176dc..dc3d6c3 100644
--- a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
@@ -28,3 +28,24 @@ bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
                                                       unsigned Type) const {
   return false;
 }
+
+// ELF doesn't require relocations to be in any order. We sort by the Offset,
+// just to match gnu as for easier comparison. The use type is an arbitrary way
+// of making the sort deterministic.
+static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) {
+  const ELFRelocationEntry &A = *AP;
+  const ELFRelocationEntry &B = *BP;
+  if (A.Offset != B.Offset)
+    return B.Offset - A.Offset;
+  if (B.Type != A.Type)
+    return A.Type - B.Type;
+  //llvm_unreachable("ELFRelocs might be unstable!");
+  return 0;
+}
+
+
+void
+MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm,
+                                    std::vector<ELFRelocationEntry> &Relocs) {
+  array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel);
+}
diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp
index bdc4a84..653a1d2 100644
--- a/contrib/llvm/lib/MC/MCELFStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -32,13 +33,60 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+bool MCELFStreamer::isBundleLocked() const {
+  return getCurrentSectionData()->isBundleLocked();
+}
+
 MCELFStreamer::~MCELFStreamer() {
 }
 
+void MCELFStreamer::mergeFragment(MCDataFragment *DF,
+                                  MCEncodedFragmentWithFixups *EF) {
+  MCAssembler &Assembler = getAssembler();
+
+  if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) {
+    uint64_t FSize = EF->getContents().size();
+
+    if (FSize > Assembler.getBundleAlignSize())
+      report_fatal_error("Fragment can't be larger than a bundle size");
+
+    uint64_t RequiredBundlePadding = computeBundlePadding(
+        Assembler, EF, DF->getContents().size(), FSize);
+
+    if (RequiredBundlePadding > UINT8_MAX)
+      report_fatal_error("Padding cannot exceed 255 bytes");
+
+    if (RequiredBundlePadding > 0) {
+      SmallString<256> Code;
+      raw_svector_ostream VecOS(Code);
+      MCObjectWriter *OW = Assembler.getBackend().createObjectWriter(VecOS);
+
+      EF->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding));
+
+      Assembler.writeFragmentPadding(*EF, FSize, OW);
+      VecOS.flush();
+      delete OW;
+
+      DF->getContents().append(Code.begin(), Code.end());
+    }
+  }
+
+  flushPendingLabels(DF, DF->getContents().size());
+
+  for (unsigned i = 0, e = EF->getFixups().size(); i != e; ++i) {
+    EF->getFixups()[i].setOffset(EF->getFixups()[i].getOffset() +
+                                 DF->getContents().size());
+    DF->getFixups().push_back(EF->getFixups()[i]);
+  }
+  DF->setHasInstructions(true);
+  DF->getContents().append(EF->getContents().begin(), EF->getContents().end());
+}
+
 void MCELFStreamer::InitSections(bool NoExecStack) {
   // This emulates the same behavior of GNU as. This makes it easier
   // to compare the output as the major sections are in the same order.
@@ -87,13 +135,24 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   llvm_unreachable("invalid assembler flag!");
 }
 
-void MCELFStreamer::ChangeSection(const MCSection *Section,
+// If bundle aligment is used and there are any instructions in the section, it
+// needs to be aligned to at least the bundle size.
+static void setSectionAlignmentForBundling(const MCAssembler &Assembler,
+                                           MCSection *Section) {
+  if (Section && Assembler.isBundlingEnabled() && Section->hasInstructions() &&
+      Section->getAlignment() < Assembler.getBundleAlignSize())
+    Section->setAlignment(Assembler.getBundleAlignSize());
+}
+
+void MCELFStreamer::ChangeSection(MCSection *Section,
                                   const MCExpr *Subsection) {
-  MCSectionData *CurSection = getCurrentSectionData();
-  if (CurSection && CurSection->isBundleLocked())
+  MCSection *CurSection = getCurrentSectionData();
+  if (CurSection && isBundleLocked())
     report_fatal_error("Unterminated .bundle_lock when changing a section");
 
   MCAssembler &Asm = getAssembler();
+  // Ensure the previous section gets aligned if necessary.
+  setSectionAlignmentForBundling(Asm, CurSection);
   auto *SectionELF = static_cast<const MCSectionELF *>(Section);
   const MCSymbol *Grp = SectionELF->getGroup();
   if (Grp)
@@ -122,12 +181,11 @@ void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
 // If neither T1 < T2 nor T2 < T1 according to this ordering, use T2 (the user
 // provided type).
 static unsigned CombineSymbolTypes(unsigned T1, unsigned T2) {
-  unsigned TypeOrdering[] = {ELF::STT_NOTYPE, ELF::STT_OBJECT, ELF::STT_FUNC,
-                             ELF::STT_GNU_IFUNC, ELF::STT_TLS};
-  for (unsigned i = 0; i != array_lengthof(TypeOrdering); ++i) {
-    if (T1 == TypeOrdering[i])
+  for (unsigned Type : {ELF::STT_NOTYPE, ELF::STT_OBJECT, ELF::STT_FUNC,
+                        ELF::STT_GNU_IFUNC, ELF::STT_TLS}) {
+    if (T1 == Type)
       return T2;
-    if (T2 == TypeOrdering[i])
+    if (T2 == Type)
       return T1;
   }
 
@@ -143,7 +201,7 @@ bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     // important for matching the string table that 'as' generates.
     IndirectSymbolData ISD;
     ISD.Symbol = Symbol;
-    ISD.SectionData = getCurrentSectionData();
+    ISD.Section = getCurrentSectionData();
     getAssembler().getIndirectSymbols().push_back(ISD);
     return true;
   }
@@ -171,10 +229,16 @@ bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     return false;
 
   case MCSA_NoDeadStrip:
-  case MCSA_ELF_TypeGnuUniqueObject:
     // Ignore for now.
     break;
 
+  case MCSA_ELF_TypeGnuUniqueObject:
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), ELF::STT_OBJECT));
+    MCELF::SetBinding(SD, ELF::STB_GNU_UNIQUE);
+    SD.setExternal(true);
+    BindingExplicitlySet.insert(Symbol);
+    break;
+
   case MCSA_Global:
     MCELF::SetBinding(SD, ELF::STB_GLOBAL);
     SD.setExternal(true);
@@ -253,15 +317,12 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
   MCELF::SetType(SD, ELF::STT_OBJECT);
 
   if (MCELF::GetBinding(SD) == ELF_STB_Local) {
-    const MCSection *Section = getAssembler().getContext().getELFSection(".bss",
-                                                         ELF::SHT_NOBITS,
-                                                         ELF::SHF_WRITE |
-                                                         ELF::SHF_ALLOC,
-                                                         SectionKind::getBSS());
+    MCSection *Section = getAssembler().getContext().getELFSection(
+        ".bss", ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
 
     AssignSection(Symbol, Section);
 
-    struct LocalCommon L = {&SD, Size, ByteAlignment};
+    struct LocalCommon L = {Symbol, Size, ByteAlignment};
     LocalCommons.push_back(L);
   } else {
     SD.setCommon(Size, ByteAlignment);
@@ -287,7 +348,7 @@ void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
 
 void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
                                   const SMLoc &Loc) {
-  if (getCurrentSectionData()->isBundleLocked())
+  if (isBundleLocked())
     report_fatal_error("Emitting values inside a locked bundle is forbidden");
   fixSymbolsInTLSFixups(Value);
   MCObjectStreamer::EmitValueImpl(Value, Size, Loc);
@@ -297,7 +358,7 @@ void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
                                          int64_t Value,
                                          unsigned ValueSize,
                                          unsigned MaxBytesToEmit) {
-  if (getCurrentSectionData()->isBundleLocked())
+  if (isBundleLocked())
     report_fatal_error("Emitting values inside a locked bundle is forbidden");
   MCObjectStreamer::EmitValueToAlignment(ByteAlignment, Value,
                                          ValueSize, MaxBytesToEmit);
@@ -311,9 +372,8 @@ void MCELFStreamer::EmitFileDirective(StringRef Filename) {
 }
 
 void MCELFStreamer::EmitIdent(StringRef IdentString) {
-  const MCSection *Comment = getAssembler().getContext().getELFSection(
-      ".comment", ELF::SHT_PROGBITS, ELF::SHF_MERGE | ELF::SHF_STRINGS,
-      SectionKind::getReadOnly(), 1, "");
+  MCSection *Comment = getAssembler().getContext().getELFSection(
+      ".comment", ELF::SHT_PROGBITS, ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
   PushSection();
   SwitchSection(Comment);
   if (!SeenIdent) {
@@ -422,7 +482,7 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
   SmallVector<MCFixup, 4> Fixups;
   SmallString<256> Code;
   raw_svector_ostream VecOS(Code);
-  Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups, STI);
+  Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
   VecOS.flush();
 
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
@@ -446,12 +506,21 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
   MCDataFragment *DF;
 
   if (Assembler.isBundlingEnabled()) {
-    MCSectionData *SD = getCurrentSectionData();
-    if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst())
+    MCSection &Sec = *getCurrentSectionData();
+    if (Assembler.getRelaxAll() && isBundleLocked())
+      // If the -mc-relax-all flag is used and we are bundle-locked, we re-use
+      // the current bundle group.
+      DF = BundleGroups.back();
+    else if (Assembler.getRelaxAll() && !isBundleLocked())
+      // When not in a bundle-locked group and the -mc-relax-all flag is used,
+      // we create a new temporary fragment which will be later merged into
+      // the current fragment.
+      DF = new MCDataFragment();
+    else if (isBundleLocked() && !Sec.isBundleGroupBeforeFirstInst())
       // If we are bundle-locked, we re-use the current fragment.
       // The bundle-locking directive ensures this is a new data fragment.
       DF = cast<MCDataFragment>(getCurrentFragment());
-    else if (!SD->isBundleLocked() && Fixups.size() == 0) {
+    else if (!isBundleLocked() && Fixups.size() == 0) {
       // Optimize memory usage by emitting the instruction to a
       // MCCompactEncodedInstFragment when not in a bundle-locked group and
       // there are no fixups registered.
@@ -463,7 +532,7 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
       DF = new MCDataFragment();
       insert(DF);
     }
-    if (SD->getBundleLockState() == MCSectionData::BundleLockedAlignToEnd) {
+    if (Sec.getBundleLockState() == MCSection::BundleLockedAlignToEnd) {
       // If this fragment is for a group marked "align_to_end", set a flag
       // in the fragment. This can happen after the fragment has already been
       // created if there are nested bundle_align groups and an inner one
@@ -473,7 +542,7 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
 
     // We're now emitting an instruction in a bundle group, so this flag has
     // to be turned off.
-    SD->setBundleGroupBeforeFirstInst(false);
+    Sec.setBundleGroupBeforeFirstInst(false);
   } else {
     DF = getOrCreateDataFragment();
   }
@@ -485,6 +554,13 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
   }
   DF->setHasInstructions(true);
   DF->getContents().append(Code.begin(), Code.end());
+
+  if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) {
+    if (!isBundleLocked()) {
+      mergeFragment(getOrCreateDataFragment(), DF);
+      delete DF;
+    }
+  }
 }
 
 void MCELFStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
@@ -498,59 +574,88 @@ void MCELFStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
 }
 
 void MCELFStreamer::EmitBundleLock(bool AlignToEnd) {
-  MCSectionData *SD = getCurrentSectionData();
+  MCSection &Sec = *getCurrentSectionData();
 
   // Sanity checks
   //
   if (!getAssembler().isBundlingEnabled())
     report_fatal_error(".bundle_lock forbidden when bundling is disabled");
 
-  if (!SD->isBundleLocked())
-    SD->setBundleGroupBeforeFirstInst(true);
+  if (!isBundleLocked())
+    Sec.setBundleGroupBeforeFirstInst(true);
 
-  SD->setBundleLockState(AlignToEnd ? MCSectionData::BundleLockedAlignToEnd :
-                                      MCSectionData::BundleLocked);
+  if (getAssembler().getRelaxAll() && !isBundleLocked()) {
+    // TODO: drop the lock state and set directly in the fragment
+    MCDataFragment *DF = new MCDataFragment();
+    BundleGroups.push_back(DF);
+  }
+
+  Sec.setBundleLockState(AlignToEnd ? MCSection::BundleLockedAlignToEnd
+                                    : MCSection::BundleLocked);
 }
 
 void MCELFStreamer::EmitBundleUnlock() {
-  MCSectionData *SD = getCurrentSectionData();
+  MCSection &Sec = *getCurrentSectionData();
 
   // Sanity checks
   if (!getAssembler().isBundlingEnabled())
     report_fatal_error(".bundle_unlock forbidden when bundling is disabled");
-  else if (!SD->isBundleLocked())
+  else if (!isBundleLocked())
     report_fatal_error(".bundle_unlock without matching lock");
-  else if (SD->isBundleGroupBeforeFirstInst())
+  else if (Sec.isBundleGroupBeforeFirstInst())
     report_fatal_error("Empty bundle-locked group is forbidden");
 
-  SD->setBundleLockState(MCSectionData::NotBundleLocked);
+  // When the -mc-relax-all flag is used, we emit instructions to fragments
+  // stored on a stack. When the bundle unlock is emited, we pop a fragment 
+  // from the stack a merge it to the one below.
+  if (getAssembler().getRelaxAll()) {
+    assert(!BundleGroups.empty() && "There are no bundle groups");
+    MCDataFragment *DF = BundleGroups.back();
+
+    // FIXME: Use BundleGroups to track the lock state instead.
+    Sec.setBundleLockState(MCSection::NotBundleLocked);
+
+    // FIXME: Use more separate fragments for nested groups.
+    if (!isBundleLocked()) {
+      mergeFragment(getOrCreateDataFragment(), DF);
+      BundleGroups.pop_back();
+      delete DF;
+    }
+
+    if (Sec.getBundleLockState() != MCSection::BundleLockedAlignToEnd)
+      getOrCreateDataFragment()->setAlignToBundleEnd(false);
+  } else
+    Sec.setBundleLockState(MCSection::NotBundleLocked);
 }
 
 void MCELFStreamer::Flush() {
   for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(),
                                                 e = LocalCommons.end();
        i != e; ++i) {
-    MCSymbolData *SD = i->SD;
+    const MCSymbol &Symbol = *i->Symbol;
     uint64_t Size = i->Size;
     unsigned ByteAlignment = i->ByteAlignment;
-    const MCSymbol &Symbol = SD->getSymbol();
-    const MCSection &Section = Symbol.getSection();
+    MCSection &Section = Symbol.getSection();
 
-    MCSectionData &SectData = getAssembler().getOrCreateSectionData(Section);
-    new MCAlignFragment(ByteAlignment, 0, 1, ByteAlignment, &SectData);
+    getAssembler().registerSection(Section);
+    new MCAlignFragment(ByteAlignment, 0, 1, ByteAlignment, &Section);
 
-    MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
-    SD->setFragment(F);
+    MCFragment *F = new MCFillFragment(0, 0, Size, &Section);
+    Symbol.getData().setFragment(F);
 
     // Update the maximum alignment of the section if necessary.
-    if (ByteAlignment > SectData.getAlignment())
-      SectData.setAlignment(ByteAlignment);
+    if (ByteAlignment > Section.getAlignment())
+      Section.setAlignment(ByteAlignment);
   }
 
   LocalCommons.clear();
 }
 
 void MCELFStreamer::FinishImpl() {
+  // Ensure the last section gets aligned if necessary.
+  MCSection *CurSection = getCurrentSectionData();
+  setSectionAlignmentForBundling(getAssembler(), CurSection);
+
   EmitFrames(nullptr);
 
   Flush();
@@ -559,7 +664,7 @@ void MCELFStreamer::FinishImpl() {
 }
 
 MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                    raw_ostream &OS, MCCodeEmitter *CE,
+                                    raw_pwrite_stream &OS, MCCodeEmitter *CE,
                                     bool RelaxAll) {
   MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE);
   if (RelaxAll)
@@ -591,12 +696,12 @@ void MCELFStreamer::EndCOFFSymbolDef() {
   llvm_unreachable("ELF doesn't support this directive");
 }
 
-void MCELFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+void MCELFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
                                  uint64_t Size, unsigned ByteAlignment) {
   llvm_unreachable("ELF doesn't support this directive");
 }
 
-void MCELFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+void MCELFStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
                                    uint64_t Size, unsigned ByteAlignment) {
   llvm_unreachable("ELF doesn't support this directive");
 }
diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp
index 709dc6b..7f048d7 100644
--- a/contrib/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm/lib/MC/MCExpr.cpp
@@ -89,6 +89,7 @@ void MCExpr::print(raw_ostream &OS) const {
 
       OS <<  '+';
       break;
+    case MCBinaryExpr::AShr: OS << ">>"; break;
     case MCBinaryExpr::And:  OS <<  '&'; break;
     case MCBinaryExpr::Div:  OS <<  '/'; break;
     case MCBinaryExpr::EQ:   OS << "=="; break;
@@ -96,6 +97,7 @@ void MCExpr::print(raw_ostream &OS) const {
     case MCBinaryExpr::GTE:  OS << ">="; break;
     case MCBinaryExpr::LAnd: OS << "&&"; break;
     case MCBinaryExpr::LOr:  OS << "||"; break;
+    case MCBinaryExpr::LShr: OS << ">>"; break;
     case MCBinaryExpr::LT:   OS <<  '<'; break;
     case MCBinaryExpr::LTE:  OS << "<="; break;
     case MCBinaryExpr::Mod:  OS <<  '%'; break;
@@ -103,7 +105,6 @@ void MCExpr::print(raw_ostream &OS) const {
     case MCBinaryExpr::NE:   OS << "!="; break;
     case MCBinaryExpr::Or:   OS <<  '|'; break;
     case MCBinaryExpr::Shl:  OS << "<<"; break;
-    case MCBinaryExpr::Shr:  OS << ">>"; break;
     case MCBinaryExpr::Sub:  OS <<  '-'; break;
     case MCBinaryExpr::Xor:  OS <<  '^'; break;
     }
@@ -123,7 +124,7 @@ void MCExpr::print(raw_ostream &OS) const {
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MCExpr::dump() const {
-  print(dbgs());
+  dbgs() << *this;
   dbgs() << '\n';
 }
 #endif
@@ -163,7 +164,7 @@ const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym,
 
 const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind,
                                                MCContext &Ctx) {
-  return Create(Ctx.GetOrCreateSymbol(Name), Kind, Ctx);
+  return Create(Ctx.getOrCreateSymbol(Name), Kind, Ctx);
 }
 
 StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
@@ -192,6 +193,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_GOTPAGE: return "GOTPAGE";
   case VK_GOTPAGEOFF: return "GOTPAGEOFF";
   case VK_SECREL: return "SECREL32";
+  case VK_SIZE: return "SIZE";
   case VK_WEAKREF: return "WEAKREF";
   case VK_ARM_NONE: return "none";
   case VK_ARM_TARGET1: return "target1";
@@ -281,6 +283,18 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_Mips_PCREL_HI16: return "PCREL_HI16";
   case VK_Mips_PCREL_LO16: return "PCREL_LO16";
   case VK_COFF_IMGREL32: return "IMGREL";
+  case VK_Hexagon_PCREL: return "PCREL";
+  case VK_Hexagon_LO16: return "LO16";
+  case VK_Hexagon_HI16: return "HI16";
+  case VK_Hexagon_GPREL: return "GPREL";
+  case VK_Hexagon_GD_GOT: return "GDGOT";
+  case VK_Hexagon_LD_GOT: return "LDGOT";
+  case VK_Hexagon_GD_PLT: return "GDPLT";
+  case VK_Hexagon_LD_PLT: return "LDPLT";
+  case VK_Hexagon_IE: return "IE";
+  case VK_Hexagon_IE_GOT: return "IEGOT";
+  case VK_TPREL: return "tprel";
+  case VK_DTPREL: return "dtprel";
   }
   llvm_unreachable("Invalid variant kind");
 }
@@ -311,6 +325,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
     .Case("gotpageoff", VK_GOTPAGEOFF)
     .Case("imgrel", VK_COFF_IMGREL32)
     .Case("secrel32", VK_SECREL)
+    .Case("size", VK_SIZE)
     .Case("l", VK_PPC_LO)
     .Case("h", VK_PPC_HI)
     .Case("ha", VK_PPC_HA)
@@ -404,13 +419,10 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
   return EvaluateAsAbsolute(Res, &Asm, nullptr, nullptr);
 }
 
-int64_t MCExpr::evaluateKnownAbsolute(const MCAsmLayout &Layout) const {
-  int64_t Res;
-  bool Abs =
-      evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr, true);
-  (void)Abs;
-  assert(Abs && "Not actually absolute");
-  return Res;
+bool MCExpr::evaluateKnownAbsolute(int64_t &Res,
+                                   const MCAsmLayout &Layout) const {
+  return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr,
+                            true);
 }
 
 bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
@@ -433,8 +445,8 @@ bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
     return true;
   }
 
-  bool IsRelocatable = EvaluateAsRelocatableImpl(
-      Value, Asm, Layout, nullptr, Addrs, InSet, /*ForceVarExpansion*/ false);
+  bool IsRelocatable =
+      EvaluateAsRelocatableImpl(Value, Asm, Layout, nullptr, Addrs, InSet);
 
   // Record the current value.
   Res = Value.getConstant();
@@ -443,13 +455,10 @@ bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
 }
 
 /// \brief Helper method for \see EvaluateSymbolAdd().
-static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
-                                                const MCAsmLayout *Layout,
-                                                const SectionAddrMap *Addrs,
-                                                bool InSet,
-                                                const MCSymbolRefExpr *&A,
-                                                const MCSymbolRefExpr *&B,
-                                                int64_t &Addend) {
+static void AttemptToFoldSymbolOffsetDifference(
+    const MCAssembler *Asm, const MCAsmLayout *Layout,
+    const SectionAddrMap *Addrs, bool InSet, const MCSymbolRefExpr *&A,
+    const MCSymbolRefExpr *&B, int64_t &Addend) {
   if (!A || !B)
     return;
 
@@ -482,15 +491,15 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
   if (!Layout)
     return;
 
-  const MCSectionData &SecA = *AD.getFragment()->getParent();
-  const MCSectionData &SecB = *BD.getFragment()->getParent();
+  const MCSection &SecA = *AD.getFragment()->getParent();
+  const MCSection &SecB = *BD.getFragment()->getParent();
 
   if ((&SecA != &SecB) && !Addrs)
     return;
 
   // Eagerly evaluate.
-  Addend += (Layout->getSymbolOffset(&Asm->getSymbolData(A->getSymbol())) -
-             Layout->getSymbolOffset(&Asm->getSymbolData(B->getSymbol())));
+  Addend += Layout->getSymbolOffset(A->getSymbol()) -
+            Layout->getSymbolOffset(B->getSymbol());
   if (Addrs && (&SecA != &SecB))
     Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
 
@@ -523,13 +532,11 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
 /// They might look redundant, but this function can be used before layout
 /// is done (see the object streamer for example) and having the Asm argument
 /// lets us avoid relaxations early.
-static bool EvaluateSymbolicAdd(const MCAssembler *Asm,
-                                const MCAsmLayout *Layout,
-                                const SectionAddrMap *Addrs,
-                                bool InSet,
-                                const MCValue &LHS,const MCSymbolRefExpr *RHS_A,
-                                const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst,
-                                MCValue &Res) {
+static bool
+EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout,
+                    const SectionAddrMap *Addrs, bool InSet, const MCValue &LHS,
+                    const MCSymbolRefExpr *RHS_A, const MCSymbolRefExpr *RHS_B,
+                    int64_t RHS_Cst, MCValue &Res) {
   // FIXME: This routine (and other evaluation parts) are *incredibly* sloppy
   // about dealing with modifiers. This will ultimately bite us, one day.
   const MCSymbolRefExpr *LHS_A = LHS.getSymA();
@@ -587,21 +594,28 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
                                    const MCFixup *Fixup) const {
   MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr;
   return EvaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr,
-                                   false, /*ForceVarExpansion*/ false);
+                                   false);
 }
 
-bool MCExpr::EvaluateAsValue(MCValue &Res, const MCAsmLayout *Layout,
-                             const MCFixup *Fixup) const {
-  MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr;
-  return EvaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr,
-                                   false, /*ForceVarExpansion*/ true);
+bool MCExpr::evaluateAsValue(MCValue &Res, const MCAsmLayout &Layout) const {
+  MCAssembler *Assembler = &Layout.getAssembler();
+  return EvaluateAsRelocatableImpl(Res, Assembler, &Layout, nullptr, nullptr,
+                                   true);
+}
+
+static bool canExpand(const MCSymbol &Sym, const MCAssembler *Asm, bool InSet) {
+  if (InSet)
+    return true;
+  if (!Asm)
+    return false;
+  return !Asm->getWriter().isWeak(Sym);
 }
 
 bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
                                        const MCAsmLayout *Layout,
                                        const MCFixup *Fixup,
-                                       const SectionAddrMap *Addrs, bool InSet,
-                                       bool ForceVarExpansion) const {
+                                       const SectionAddrMap *Addrs,
+                                       bool InSet) const {
   ++stats::MCExprEvaluate;
 
   switch (getKind()) {
@@ -618,28 +632,24 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
     const MCSymbol &Sym = SRE->getSymbol();
 
     // Evaluate recursively if this is a variable.
-    if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None) {
+    if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None &&
+        canExpand(Sym, Asm, InSet)) {
+      bool IsMachO = SRE->hasSubsectionsViaSymbols();
       if (Sym.getVariableValue()->EvaluateAsRelocatableImpl(
-              Res, Asm, Layout, Fixup, Addrs, true, ForceVarExpansion)) {
+              Res, Asm, Layout, Fixup, Addrs, InSet || IsMachO)) {
+        if (!IsMachO)
+          return true;
+
         const MCSymbolRefExpr *A = Res.getSymA();
         const MCSymbolRefExpr *B = Res.getSymB();
-
-        if (SRE->hasSubsectionsViaSymbols()) {
-          // FIXME: This is small hack. Given
-          // a = b + 4
-          // .long a
-          // the OS X assembler will completely drop the 4. We should probably
-          // include it in the relocation or produce an error if that is not
-          // possible.
-          if (!A && !B)
-            return true;
-        } else {
-          if (ForceVarExpansion)
-            return true;
-          bool IsSymbol = A && A->getSymbol().isDefined();
-          if (!IsSymbol)
-            return true;
-        }
+        // FIXME: This is small hack. Given
+        // a = b + 4
+        // .long a
+        // the OS X assembler will completely drop the 4. We should probably
+        // include it in the relocation or produce an error if that is not
+        // possible.
+        if (!A && !B)
+          return true;
       }
     }
 
@@ -651,9 +661,8 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
     const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
     MCValue Value;
 
-    if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout,
-                                                      Fixup, Addrs, InSet,
-                                                      ForceVarExpansion))
+    if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout, Fixup,
+                                                      Addrs, InSet))
       return false;
 
     switch (AUE->getOpcode()) {
@@ -686,12 +695,10 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
     const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
     MCValue LHSValue, RHSValue;
 
-    if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout,
-                                                  Fixup, Addrs, InSet,
-                                                  ForceVarExpansion) ||
-        !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout,
-                                                  Fixup, Addrs, InSet,
-                                                  ForceVarExpansion))
+    if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout, Fixup,
+                                                  Addrs, InSet) ||
+        !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout, Fixup,
+                                                  Addrs, InSet))
       return false;
 
     // We only support a few operations on non-constant expressions, handle
@@ -704,23 +711,22 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
         // Negate RHS and add.
         return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
                                    RHSValue.getSymB(), RHSValue.getSymA(),
-                                   -RHSValue.getConstant(),
-                                   Res);
+                                   -RHSValue.getConstant(), Res);
 
       case MCBinaryExpr::Add:
         return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
                                    RHSValue.getSymA(), RHSValue.getSymB(),
-                                   RHSValue.getConstant(),
-                                   Res);
+                                   RHSValue.getConstant(), Res);
       }
     }
 
     // FIXME: We need target hooks for the evaluation. It may be limited in
-    // width, and gas defines the result of comparisons and right shifts
-    // differently from Apple as.
+    // width, and gas defines the result of comparisons differently from
+    // Apple as.
     int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant();
     int64_t Result = 0;
     switch (ABE->getOpcode()) {
+    case MCBinaryExpr::AShr: Result = LHS >> RHS; break;
     case MCBinaryExpr::Add:  Result = LHS + RHS; break;
     case MCBinaryExpr::And:  Result = LHS & RHS; break;
     case MCBinaryExpr::Div:  Result = LHS / RHS; break;
@@ -729,6 +735,7 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
     case MCBinaryExpr::GTE:  Result = LHS >= RHS; break;
     case MCBinaryExpr::LAnd: Result = LHS && RHS; break;
     case MCBinaryExpr::LOr:  Result = LHS || RHS; break;
+    case MCBinaryExpr::LShr: Result = uint64_t(LHS) >> uint64_t(RHS); break;
     case MCBinaryExpr::LT:   Result = LHS < RHS; break;
     case MCBinaryExpr::LTE:  Result = LHS <= RHS; break;
     case MCBinaryExpr::Mod:  Result = LHS % RHS; break;
@@ -736,7 +743,6 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
     case MCBinaryExpr::NE:   Result = LHS != RHS; break;
     case MCBinaryExpr::Or:   Result = LHS | RHS; break;
     case MCBinaryExpr::Shl:  Result = LHS << RHS; break;
-    case MCBinaryExpr::Shr:  Result = LHS >> RHS; break;
     case MCBinaryExpr::Sub:  Result = LHS - RHS; break;
     case MCBinaryExpr::Xor:  Result = LHS ^ RHS; break;
     }
@@ -749,7 +755,7 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
   llvm_unreachable("Invalid assembly expression kind!");
 }
 
-const MCSection *MCExpr::FindAssociatedSection() const {
+MCSection *MCExpr::FindAssociatedSection() const {
   switch (getKind()) {
   case Target:
     // We never look through target specific expressions.
@@ -773,8 +779,8 @@ const MCSection *MCExpr::FindAssociatedSection() const {
 
   case Binary: {
     const MCBinaryExpr *BE = cast<MCBinaryExpr>(this);
-    const MCSection *LHS_S = BE->getLHS()->FindAssociatedSection();
-    const MCSection *RHS_S = BE->getRHS()->FindAssociatedSection();
+    MCSection *LHS_S = BE->getLHS()->FindAssociatedSection();
+    MCSection *RHS_S = BE->getRHS()->FindAssociatedSection();
 
     // If either section is absolute, return the other.
     if (LHS_S == MCSymbol::AbsolutePseudoSection)
@@ -782,6 +788,10 @@ const MCSection *MCExpr::FindAssociatedSection() const {
     if (RHS_S == MCSymbol::AbsolutePseudoSection)
       return LHS_S;
 
+    // Not always correct, but probably the best we can do without more context.
+    if (BE->getOpcode() == MCBinaryExpr::Sub)
+      return MCSymbol::AbsolutePseudoSection;
+
     // Otherwise, return the first non-null section.
     return LHS_S ? LHS_S : RHS_S;
   }
diff --git a/contrib/llvm/lib/MC/MCInst.cpp b/contrib/llvm/lib/MC/MCInst.cpp
index d7b80f5..7ef69be 100644
--- a/contrib/llvm/lib/MC/MCInst.cpp
+++ b/contrib/llvm/lib/MC/MCInst.cpp
@@ -15,7 +15,7 @@
 
 using namespace llvm;
 
-void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+void MCOperand::print(raw_ostream &OS) const {
   OS << "<MCOperand ";
   if (!isValid())
     OS << "INVALID";
@@ -34,22 +34,21 @@ void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MCOperand::dump() const {
-  print(dbgs(), nullptr);
+  print(dbgs());
   dbgs() << "\n";
 }
 #endif
 
-void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+void MCInst::print(raw_ostream &OS) const {
   OS << "<MCInst " << getOpcode();
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     OS << " ";
-    getOperand(i).print(OS, MAI);
+    getOperand(i).print(OS);
   }
   OS << ">";
 }
 
-void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI,
-                         const MCInstPrinter *Printer,
+void MCInst::dump_pretty(raw_ostream &OS, const MCInstPrinter *Printer,
                          StringRef Separator) const {
   OS << "<MCInst #" << getOpcode();
 
@@ -59,14 +58,14 @@ void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI,
 
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     OS << Separator;
-    getOperand(i).print(OS, MAI);
+    getOperand(i).print(OS);
   }
   OS << ">";
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MCInst::dump() const {
-  print(dbgs(), nullptr);
+  print(dbgs());
   dbgs() << "\n";
 }
 #endif
diff --git a/contrib/llvm/lib/MC/MCInstPrinter.cpp b/contrib/llvm/lib/MC/MCInstPrinter.cpp
index ba71245..0dc3121 100644
--- a/contrib/llvm/lib/MC/MCInstPrinter.cpp
+++ b/contrib/llvm/lib/MC/MCInstPrinter.cpp
@@ -69,11 +69,11 @@ static bool needsLeadingZero(uint64_t Value)
   return false;
 }
 
-format_object1<int64_t> MCInstPrinter::formatDec(const int64_t Value) const {
+format_object<int64_t> MCInstPrinter::formatDec(int64_t Value) const {
   return format("%" PRId64, Value);
 }
 
-format_object1<int64_t> MCInstPrinter::formatHex(const int64_t Value) const {
+format_object<int64_t> MCInstPrinter::formatHex(int64_t Value) const {
   switch(PrintHexStyle) {
   case HexStyle::C:
     if (Value < 0)
@@ -96,7 +96,7 @@ format_object1<int64_t> MCInstPrinter::formatHex(const int64_t Value) const {
   llvm_unreachable("unsupported print style");
 }
 
-format_object1<uint64_t> MCInstPrinter::formatHex(const uint64_t Value) const {
+format_object<uint64_t> MCInstPrinter::formatHex(uint64_t Value) const {
   switch(PrintHexStyle) {
   case HexStyle::C:
      return format("0x%" PRIx64, Value);
diff --git a/contrib/llvm/lib/MC/MCInstrDesc.cpp b/contrib/llvm/lib/MC/MCInstrDesc.cpp
new file mode 100644
index 0000000..decc2d8
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCInstrDesc.cpp
@@ -0,0 +1,70 @@
+//===------ llvm/MC/MCInstrDesc.cpp- Instruction Descriptors --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines methods on the MCOperandInfo and MCInstrDesc classes, which
+// are used to describe target instructions and their operands.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+using namespace llvm;
+
+bool MCInstrDesc::getDeprecatedInfo(MCInst &MI, MCSubtargetInfo &STI,
+                                    std::string &Info) const {
+  if (ComplexDeprecationInfo)
+    return ComplexDeprecationInfo(MI, STI, Info);
+  if (DeprecatedFeature != -1 && STI.getFeatureBits()[DeprecatedFeature]) {
+    // FIXME: it would be nice to include the subtarget feature here.
+    Info = "deprecated";
+    return true;
+  }
+  return false;
+}
+bool MCInstrDesc::mayAffectControlFlow(const MCInst &MI,
+                                       const MCRegisterInfo &RI) const {
+  if (isBranch() || isCall() || isReturn() || isIndirectBranch())
+    return true;
+  unsigned PC = RI.getProgramCounter();
+  if (PC == 0)
+    return false;
+  if (hasDefOfPhysReg(MI, PC, RI))
+    return true;
+  // A variadic instruction may define PC in the variable operand list.
+  // There's currently no indication of which entries in a variable
+  // list are defs and which are uses. While that's the case, this function
+  // needs to assume they're defs in order to be conservatively correct.
+  for (int i = NumOperands, e = MI.getNumOperands(); i != e; ++i) {
+    if (MI.getOperand(i).isReg() &&
+        RI.isSubRegisterEq(PC, MI.getOperand(i).getReg()))
+      return true;
+  }
+  return false;
+}
+
+bool MCInstrDesc::hasImplicitDefOfPhysReg(unsigned Reg,
+                                          const MCRegisterInfo *MRI) const {
+  if (const uint16_t *ImpDefs = ImplicitDefs)
+    for (; *ImpDefs; ++ImpDefs)
+      if (*ImpDefs == Reg || (MRI && MRI->isSubRegister(Reg, *ImpDefs)))
+        return true;
+  return false;
+}
+
+bool MCInstrDesc::hasDefOfPhysReg(const MCInst &MI, unsigned Reg,
+                                  const MCRegisterInfo &RI) const {
+  for (int i = 0, e = NumDefs; i != e; ++i)
+    if (MI.getOperand(i).isReg() &&
+        RI.isSubRegisterEq(Reg, MI.getOperand(i).getReg()))
+      return true;
+  return hasImplicitDefOfPhysReg(Reg, &RI);
+}
diff --git a/contrib/llvm/lib/MC/MCLinkerOptimizationHint.cpp b/contrib/llvm/lib/MC/MCLinkerOptimizationHint.cpp
index 7739878..2c9c67c 100644
--- a/contrib/llvm/lib/MC/MCLinkerOptimizationHint.cpp
+++ b/contrib/llvm/lib/MC/MCLinkerOptimizationHint.cpp
@@ -9,7 +9,7 @@
 
 #include "llvm/MC/MCLinkerOptimizationHint.h"
 #include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/Support/LEB128.h"
 
 using namespace llvm;
@@ -25,11 +25,9 @@ using namespace llvm;
 void MCLOHDirective::Emit_impl(raw_ostream &OutStream,
                                const MachObjectWriter &ObjWriter,
                                const MCAsmLayout &Layout) const {
-  const MCAssembler &Asm = Layout.getAssembler();
   encodeULEB128(Kind, OutStream);
   encodeULEB128(Args.size(), OutStream);
   for (LOHArgs::const_iterator It = Args.begin(), EndIt = Args.end();
        It != EndIt; ++It)
-    encodeULEB128(ObjWriter.getSymbolAddress(&Asm.getSymbolData(**It), Layout),
-                  OutStream);
+    encodeULEB128(ObjWriter.getSymbolAddress(**It, Layout), OutStream);
 }
diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
index a147c3d..6297340 100644
--- a/contrib/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
@@ -26,6 +26,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -39,6 +40,9 @@ private:
   /// need for local relocations. False by default.
   bool LabelSections;
 
+  bool DWARFMustBeAtTheEnd;
+  bool CreatedADWARFSection;
+
   /// HasSectionLabel - map of which sections have already had a non-local
   /// label emitted to them. Used so we don't emit extraneous linker local
   /// labels in the middle of the section.
@@ -50,10 +54,10 @@ private:
   void EmitDataRegionEnd();
 
 public:
-  MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
-                  MCCodeEmitter *Emitter, bool label)
-      : MCObjectStreamer(Context, MAB, OS, Emitter),
-        LabelSections(label) {}
+  MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
+                  MCCodeEmitter *Emitter, bool DWARFMustBeAtTheEnd, bool label)
+      : MCObjectStreamer(Context, MAB, OS, Emitter), LabelSections(label),
+        DWARFMustBeAtTheEnd(DWARFMustBeAtTheEnd), CreatedADWARFSection(false) {}
 
   /// state management
   void reset() override {
@@ -64,7 +68,7 @@ public:
   /// @name MCStreamer Interface
   /// @{
 
-  void ChangeSection(const MCSection *Sect, const MCExpr *Subsect) override;
+  void ChangeSection(MCSection *Sect, const MCExpr *Subsect) override;
   void EmitLabel(MCSymbol *Symbol) override;
   void EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol) override;
   void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
@@ -94,9 +98,9 @@ public:
   }
   void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                              unsigned ByteAlignment) override;
-  void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = nullptr,
+  void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
                     uint64_t Size = 0, unsigned ByteAlignment = 0) override;
-  void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size,
+  void EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size,
                       unsigned ByteAlignment = 0) override;
 
   void EmitFileDirective(StringRef Filename) override {
@@ -119,14 +123,47 @@ public:
 
 } // end anonymous namespace.
 
-void MCMachOStreamer::ChangeSection(const MCSection *Section,
+static bool canGoAfterDWARF(const MCSectionMachO &MSec) {
+  // These sections are created by the assembler itself after the end of
+  // the .s file.
+  StringRef SegName = MSec.getSegmentName();
+  StringRef SecName = MSec.getSectionName();
+
+  if (SegName == "__LD" && SecName == "__compact_unwind")
+    return true;
+
+  if (SegName == "__IMPORT") {
+    if (SecName == "__jump_table")
+      return true;
+
+    if (SecName == "__pointers")
+      return true;
+  }
+
+  if (SegName == "__TEXT" && SecName == "__eh_frame")
+    return true;
+
+  if (SegName == "__DATA" && SecName == "__nl_symbol_ptr")
+    return true;
+
+  return false;
+}
+
+void MCMachOStreamer::ChangeSection(MCSection *Section,
                                     const MCExpr *Subsection) {
   // Change the section normally.
-  MCObjectStreamer::ChangeSection(Section, Subsection);
+  bool Created = MCObjectStreamer::changeSectionImpl(Section, Subsection);
+  const MCSectionMachO &MSec = *cast<MCSectionMachO>(Section);
+  StringRef SegName = MSec.getSegmentName();
+  if (SegName == "__DWARF")
+    CreatedADWARFSection = true;
+  else if (Created && DWARFMustBeAtTheEnd && !canGoAfterDWARF(MSec))
+    assert(!CreatedADWARFSection && "Creating regular section after DWARF");
+
   // Output a linker-local symbol so we don't need section-relative local
   // relocations. The linker hates us when we do that.
   if (LabelSections && !HasSectionLabel[Section]) {
-    MCSymbol *Label = getContext().CreateLinkerPrivateTempSymbol();
+    MCSymbol *Label = getContext().createLinkerPrivateTempSymbol();
     EmitLabel(Label);
     HasSectionLabel[Section] = true;
   }
@@ -171,7 +208,7 @@ void MCMachOStreamer::EmitDataRegion(DataRegionData::KindTy Kind) {
   if (!getAssembler().getBackend().hasDataInCodeSupport())
     return;
   // Create a temporary label to mark the start of the data region.
-  MCSymbol *Start = getContext().CreateTempSymbol();
+  MCSymbol *Start = getContext().createTempSymbol();
   EmitLabel(Start);
   // Record the region for the object writer to use.
   DataRegionData Data = { Kind, Start, nullptr };
@@ -183,11 +220,11 @@ void MCMachOStreamer::EmitDataRegionEnd() {
   if (!getAssembler().getBackend().hasDataInCodeSupport())
     return;
   std::vector<DataRegionData> &Regions = getAssembler().getDataRegions();
-  assert(Regions.size() && "Mismatched .end_data_region!");
+  assert(!Regions.empty() && "Mismatched .end_data_region!");
   DataRegionData &Data = Regions.back();
   assert(!Data.End && "Mismatched .end_data_region!");
   // Create a temporary label to mark the end of the data region.
-  Data.End = getContext().CreateTempSymbol();
+  Data.End = getContext().createTempSymbol();
   EmitLabel(Data.End);
 }
 
@@ -250,7 +287,7 @@ bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     // important for matching the string table that 'as' generates.
     IndirectSymbolData ISD;
     ISD.Symbol = Symbol;
-    ISD.SectionData = getCurrentSectionData();
+    ISD.Section = getCurrentSectionData();
     getAssembler().getIndirectSymbols().push_back(ISD);
     return true;
   }
@@ -364,9 +401,9 @@ void MCMachOStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                       Symbol, Size, ByteAlignment);
 }
 
-void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+void MCMachOStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
                                    uint64_t Size, unsigned ByteAlignment) {
-  MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section);
+  getAssembler().registerSection(*Section);
 
   // The symbol may not be present, which only creates the section.
   if (!Symbol)
@@ -381,21 +418,21 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
 
   // Emit an align fragment if necessary.
   if (ByteAlignment != 1)
-    new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectData);
+    new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, Section);
 
-  MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
+  MCFragment *F = new MCFillFragment(0, 0, Size, Section);
   SD.setFragment(F);
 
   AssignSection(Symbol, Section);
 
   // Update the maximum alignment on the zero fill section if necessary.
-  if (ByteAlignment > SectData.getAlignment())
-    SectData.setAlignment(ByteAlignment);
+  if (ByteAlignment > Section->getAlignment())
+    Section->setAlignment(ByteAlignment);
 }
 
 // This should always be called with the thread local bss section.  Like the
 // .zerofill directive this doesn't actually switch sections on us.
-void MCMachOStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+void MCMachOStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
                                      uint64_t Size, unsigned ByteAlignment) {
   EmitZerofill(Section, Symbol, Size, ByteAlignment);
   return;
@@ -408,7 +445,7 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst,
   SmallVector<MCFixup, 4> Fixups;
   SmallString<256> Code;
   raw_svector_ostream VecOS(Code);
-  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups, STI);
+  getAssembler().getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
   VecOS.flush();
 
   // Add the fixups and data.
@@ -427,13 +464,13 @@ void MCMachOStreamer::FinishImpl() {
 
   // First, scan the symbol table to build a lookup table from fragments to
   // defining symbols.
-  DenseMap<const MCFragment*, MCSymbolData*> DefiningSymbolMap;
-  for (MCSymbolData &SD : getAssembler().symbols()) {
-    if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()) &&
-        SD.getFragment()) {
+  DenseMap<const MCFragment *, const MCSymbol *> DefiningSymbolMap;
+  for (const MCSymbol &Symbol : getAssembler().symbols()) {
+    MCSymbolData &SD = Symbol.getData();
+    if (getAssembler().isSymbolLinkerVisible(Symbol) && SD.getFragment()) {
       // An atom defining symbol should never be internal to a fragment.
       assert(SD.getOffset() == 0 && "Invalid offset in atom defining symbol!");
-      DefiningSymbolMap[SD.getFragment()] = &SD;
+      DefiningSymbolMap[SD.getFragment()] = &Symbol;
     }
   }
 
@@ -441,11 +478,11 @@ void MCMachOStreamer::FinishImpl() {
   // symbol.
   for (MCAssembler::iterator it = getAssembler().begin(),
          ie = getAssembler().end(); it != ie; ++it) {
-    MCSymbolData *CurrentAtom = nullptr;
-    for (MCSectionData::iterator it2 = it->begin(),
-           ie2 = it->end(); it2 != ie2; ++it2) {
-      if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2))
-        CurrentAtom = SD;
+    const MCSymbol *CurrentAtom = nullptr;
+    for (MCSection::iterator it2 = it->begin(), ie2 = it->end(); it2 != ie2;
+         ++it2) {
+      if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(it2))
+        CurrentAtom = Symbol;
       it2->setAtom(CurrentAtom);
     }
   }
@@ -454,10 +491,11 @@ void MCMachOStreamer::FinishImpl() {
 }
 
 MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                      raw_ostream &OS, MCCodeEmitter *CE,
-                                      bool RelaxAll,
+                                      raw_pwrite_stream &OS, MCCodeEmitter *CE,
+                                      bool RelaxAll, bool DWARFMustBeAtTheEnd,
                                       bool LabelSections) {
-  MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE, LabelSections);
+  MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE,
+                                           DWARFMustBeAtTheEnd, LabelSections);
   if (RelaxAll)
     S->getAssembler().setRelaxAll(true);
   return S;
diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp
index fc56728..eb2d912 100644
--- a/contrib/llvm/lib/MC/MCNullStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp
@@ -31,7 +31,7 @@ namespace {
 
     void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                           unsigned ByteAlignment) override {}
-    void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = nullptr,
+    void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
                       uint64_t Size = 0, unsigned ByteAlignment = 0) override {}
     void EmitGPRel32Value(const MCExpr *Value) override {}
   };
diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
index 858181d..e99f036 100644
--- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -183,82 +183,60 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
 
   // Debug Information.
   DwarfAccelNamesSection =
-    Ctx->getMachOSection("__DWARF", "__apple_names",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__apple_names", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata(), "names_begin");
   DwarfAccelObjCSection =
-    Ctx->getMachOSection("__DWARF", "__apple_objc",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__apple_objc", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata(), "objc_begin");
   // 16 character section limit...
   DwarfAccelNamespaceSection =
-    Ctx->getMachOSection("__DWARF", "__apple_namespac",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__apple_namespac", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata(), "namespac_begin");
   DwarfAccelTypesSection =
-    Ctx->getMachOSection("__DWARF", "__apple_types",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__apple_types", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata(), "types_begin");
 
   DwarfAbbrevSection =
-    Ctx->getMachOSection("__DWARF", "__debug_abbrev",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__debug_abbrev", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata(), "section_abbrev");
   DwarfInfoSection =
-    Ctx->getMachOSection("__DWARF", "__debug_info",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__debug_info", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata(), "section_info");
   DwarfLineSection =
-    Ctx->getMachOSection("__DWARF", "__debug_line",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__debug_line", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata(), "section_line");
   DwarfFrameSection =
-    Ctx->getMachOSection("__DWARF", "__debug_frame",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__debug_frame", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata());
   DwarfPubNamesSection =
-    Ctx->getMachOSection("__DWARF", "__debug_pubnames",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__debug_pubnames", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata());
   DwarfPubTypesSection =
-    Ctx->getMachOSection("__DWARF", "__debug_pubtypes",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__debug_pubtypes", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata());
   DwarfGnuPubNamesSection =
-    Ctx->getMachOSection("__DWARF", "__debug_gnu_pubn",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__debug_gnu_pubn", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata());
   DwarfGnuPubTypesSection =
-    Ctx->getMachOSection("__DWARF", "__debug_gnu_pubt",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__debug_gnu_pubt", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata());
   DwarfStrSection =
-    Ctx->getMachOSection("__DWARF", "__debug_str",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__debug_str", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata(), "info_string");
   DwarfLocSection =
-    Ctx->getMachOSection("__DWARF", "__debug_loc",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__debug_loc", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata(), "section_debug_loc");
   DwarfARangesSection =
-    Ctx->getMachOSection("__DWARF", "__debug_aranges",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__debug_aranges", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata());
   DwarfRangesSection =
-    Ctx->getMachOSection("__DWARF", "__debug_ranges",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
-  DwarfMacroInfoSection =
-    Ctx->getMachOSection("__DWARF", "__debug_macinfo",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__debug_ranges", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata(), "debug_range");
   DwarfDebugInlineSection =
-    Ctx->getMachOSection("__DWARF", "__debug_inlined",
-                         MachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
-  StackMapSection =
-    Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps", 0,
-                         SectionKind::getMetadata());
+      Ctx->getMachOSection("__DWARF", "__debug_inlined", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata());
+  StackMapSection = Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps",
+                                         0, SectionKind::getMetadata());
 
   TLSExtraDataSection = TLSTLVSection;
 }
@@ -359,6 +337,7 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
       dwarf::DW_EH_PE_udata8;
     break;
+  case Triple::sparcel:
   case Triple::sparc:
     if (RelocM == Reloc::PIC_) {
       LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
@@ -416,83 +395,54 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
 
 
   // ELF
-  BSSSection =
-    Ctx->getELFSection(".bss", ELF::SHT_NOBITS,
-                       ELF::SHF_WRITE | ELF::SHF_ALLOC,
-                       SectionKind::getBSS());
-
-  TextSection =
-    Ctx->getELFSection(".text", ELF::SHT_PROGBITS,
-                       ELF::SHF_EXECINSTR |
-                       ELF::SHF_ALLOC,
-                       SectionKind::getText());
-
-  DataSection =
-    Ctx->getELFSection(".data", ELF::SHT_PROGBITS,
-                       ELF::SHF_WRITE |ELF::SHF_ALLOC,
-                       SectionKind::getDataRel());
+  BSSSection = Ctx->getELFSection(".bss", ELF::SHT_NOBITS,
+                                  ELF::SHF_WRITE | ELF::SHF_ALLOC);
+
+  TextSection = Ctx->getELFSection(".text", ELF::SHT_PROGBITS,
+                                   ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
+
+  DataSection = Ctx->getELFSection(".data", ELF::SHT_PROGBITS,
+                                   ELF::SHF_WRITE | ELF::SHF_ALLOC);
 
   ReadOnlySection =
-    Ctx->getELFSection(".rodata", ELF::SHT_PROGBITS,
-                       ELF::SHF_ALLOC,
-                       SectionKind::getReadOnly());
+      Ctx->getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
 
   TLSDataSection =
-    Ctx->getELFSection(".tdata", ELF::SHT_PROGBITS,
-                       ELF::SHF_ALLOC | ELF::SHF_TLS |
-                       ELF::SHF_WRITE,
-                       SectionKind::getThreadData());
-
-  TLSBSSSection =
-    Ctx->getELFSection(".tbss", ELF::SHT_NOBITS,
-                       ELF::SHF_ALLOC | ELF::SHF_TLS |
-                       ELF::SHF_WRITE,
-                       SectionKind::getThreadBSS());
-
-  DataRelSection =
-    Ctx->getELFSection(".data.rel", ELF::SHT_PROGBITS,
-                       ELF::SHF_ALLOC |ELF::SHF_WRITE,
-                       SectionKind::getDataRel());
-
-  DataRelLocalSection =
-    Ctx->getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
-                       ELF::SHF_ALLOC |ELF::SHF_WRITE,
-                       SectionKind::getDataRelLocal());
-
-  DataRelROSection =
-    Ctx->getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
-                       ELF::SHF_ALLOC |ELF::SHF_WRITE,
-                       SectionKind::getReadOnlyWithRel());
-
-  DataRelROLocalSection =
-    Ctx->getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS,
-                       ELF::SHF_ALLOC |ELF::SHF_WRITE,
-                       SectionKind::getReadOnlyWithRelLocal());
+      Ctx->getELFSection(".tdata", ELF::SHT_PROGBITS,
+                         ELF::SHF_ALLOC | ELF::SHF_TLS | ELF::SHF_WRITE);
+
+  TLSBSSSection = Ctx->getELFSection(
+      ".tbss", ELF::SHT_NOBITS, ELF::SHF_ALLOC | ELF::SHF_TLS | ELF::SHF_WRITE);
+
+  DataRelSection = Ctx->getELFSection(".data.rel", ELF::SHT_PROGBITS,
+                                      ELF::SHF_ALLOC | ELF::SHF_WRITE);
+
+  DataRelLocalSection = Ctx->getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
+                                           ELF::SHF_ALLOC | ELF::SHF_WRITE);
+
+  DataRelROSection = Ctx->getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
+                                        ELF::SHF_ALLOC | ELF::SHF_WRITE);
+
+  DataRelROLocalSection = Ctx->getELFSection(
+      ".data.rel.ro.local", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE);
 
   MergeableConst4Section =
-    Ctx->getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
-                       ELF::SHF_ALLOC |ELF::SHF_MERGE,
-                       SectionKind::getMergeableConst4());
+      Ctx->getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
+                         ELF::SHF_ALLOC | ELF::SHF_MERGE, 4, "");
 
   MergeableConst8Section =
-    Ctx->getELFSection(".rodata.cst8", ELF::SHT_PROGBITS,
-                       ELF::SHF_ALLOC |ELF::SHF_MERGE,
-                       SectionKind::getMergeableConst8());
+      Ctx->getELFSection(".rodata.cst8", ELF::SHT_PROGBITS,
+                         ELF::SHF_ALLOC | ELF::SHF_MERGE, 8, "");
 
   MergeableConst16Section =
-    Ctx->getELFSection(".rodata.cst16", ELF::SHT_PROGBITS,
-                       ELF::SHF_ALLOC |ELF::SHF_MERGE,
-                       SectionKind::getMergeableConst16());
+      Ctx->getELFSection(".rodata.cst16", ELF::SHT_PROGBITS,
+                         ELF::SHF_ALLOC | ELF::SHF_MERGE, 16, "");
 
-  StaticCtorSection =
-    Ctx->getELFSection(".ctors", ELF::SHT_PROGBITS,
-                       ELF::SHF_ALLOC |ELF::SHF_WRITE,
-                       SectionKind::getDataRel());
+  StaticCtorSection = Ctx->getELFSection(".ctors", ELF::SHT_PROGBITS,
+                                         ELF::SHF_ALLOC | ELF::SHF_WRITE);
 
-  StaticDtorSection =
-    Ctx->getELFSection(".dtors", ELF::SHT_PROGBITS,
-                       ELF::SHF_ALLOC |ELF::SHF_WRITE,
-                       SectionKind::getDataRel());
+  StaticDtorSection = Ctx->getELFSection(".dtors", ELF::SHT_PROGBITS,
+                                         ELF::SHF_ALLOC | ELF::SHF_WRITE);
 
   // Exception Handling Sections.
 
@@ -500,162 +450,112 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
   // it contains relocatable pointers.  In PIC mode, this is probably a big
   // runtime hit for C++ apps.  Either the contents of the LSDA need to be
   // adjusted or this should be a data section.
-  LSDASection =
-    Ctx->getELFSection(".gcc_except_table", ELF::SHT_PROGBITS,
-                       ELF::SHF_ALLOC,
-                       SectionKind::getReadOnly());
+  LSDASection = Ctx->getELFSection(".gcc_except_table", ELF::SHT_PROGBITS,
+                                   ELF::SHF_ALLOC);
 
   COFFDebugSymbolsSection = nullptr;
 
   // Debug Info Sections.
-  DwarfAbbrevSection =
-    Ctx->getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+  DwarfAbbrevSection = Ctx->getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
+                                          "section_abbrev");
   DwarfInfoSection =
-    Ctx->getELFSection(".debug_info", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
-  DwarfLineSection =
-    Ctx->getELFSection(".debug_line", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
-  DwarfFrameSection =
-    Ctx->getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_info", ELF::SHT_PROGBITS, 0, "section_info");
+  DwarfLineSection = Ctx->getELFSection(".debug_line", ELF::SHT_PROGBITS, 0);
+  DwarfFrameSection = Ctx->getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0);
   DwarfPubNamesSection =
-    Ctx->getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0);
   DwarfPubTypesSection =
-    Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0);
   DwarfGnuPubNamesSection =
-    Ctx->getELFSection(".debug_gnu_pubnames", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_gnu_pubnames", ELF::SHT_PROGBITS, 0);
   DwarfGnuPubTypesSection =
-    Ctx->getELFSection(".debug_gnu_pubtypes", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_gnu_pubtypes", ELF::SHT_PROGBITS, 0);
   DwarfStrSection =
-    Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS,
-                       ELF::SHF_MERGE | ELF::SHF_STRINGS,
-                       SectionKind::getMergeable1ByteCString());
-  DwarfLocSection =
-    Ctx->getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS,
+                         ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
+  DwarfLocSection = Ctx->getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0);
   DwarfARangesSection =
-    Ctx->getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0);
   DwarfRangesSection =
-    Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
-  DwarfMacroInfoSection =
-    Ctx->getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0, "debug_range");
 
   // DWARF5 Experimental Debug Info
 
   // Accelerator Tables
   DwarfAccelNamesSection =
-    Ctx->getELFSection(".apple_names", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".apple_names", ELF::SHT_PROGBITS, 0, "names_begin");
   DwarfAccelObjCSection =
-    Ctx->getELFSection(".apple_objc", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
-  DwarfAccelNamespaceSection =
-    Ctx->getELFSection(".apple_namespaces", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".apple_objc", ELF::SHT_PROGBITS, 0, "objc_begin");
+  DwarfAccelNamespaceSection = Ctx->getELFSection(
+      ".apple_namespaces", ELF::SHT_PROGBITS, 0, "namespac_begin");
   DwarfAccelTypesSection =
-    Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0, "types_begin");
 
   // Fission Sections
   DwarfInfoDWOSection =
-    Ctx->getELFSection(".debug_info.dwo", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_info.dwo", ELF::SHT_PROGBITS, 0);
   DwarfTypesDWOSection =
-    Ctx->getELFSection(".debug_types.dwo", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_types.dwo", ELF::SHT_PROGBITS, 0);
   DwarfAbbrevDWOSection =
-    Ctx->getELFSection(".debug_abbrev.dwo", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_abbrev.dwo", ELF::SHT_PROGBITS, 0);
   DwarfStrDWOSection =
-    Ctx->getELFSection(".debug_str.dwo", ELF::SHT_PROGBITS,
-                       ELF::SHF_MERGE | ELF::SHF_STRINGS,
-                       SectionKind::getMergeable1ByteCString());
+      Ctx->getELFSection(".debug_str.dwo", ELF::SHT_PROGBITS,
+                         ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
   DwarfLineDWOSection =
-    Ctx->getELFSection(".debug_line.dwo", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_line.dwo", ELF::SHT_PROGBITS, 0);
   DwarfLocDWOSection =
-    Ctx->getELFSection(".debug_loc.dwo", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_loc.dwo", ELF::SHT_PROGBITS, 0, "skel_loc");
   DwarfStrOffDWOSection =
-    Ctx->getELFSection(".debug_str_offsets.dwo", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_str_offsets.dwo", ELF::SHT_PROGBITS, 0);
   DwarfAddrSection =
-    Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+      Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0, "addr_sec");
 
   StackMapSection =
-    Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS,
-                       ELF::SHF_ALLOC,
-                       SectionKind::getMetadata());
-
+      Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
 }
 
-
 void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
   bool IsWoA = T.getArch() == Triple::arm || T.getArch() == Triple::thumb;
 
   CommDirectiveSupportsAlignment = true;
 
   // COFF
-  BSSSection =
-    Ctx->getCOFFSection(".bss",
-                        COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
-                        COFF::IMAGE_SCN_MEM_READ |
-                        COFF::IMAGE_SCN_MEM_WRITE,
-                        SectionKind::getBSS());
-  TextSection =
-    Ctx->getCOFFSection(".text",
-                        (IsWoA ? COFF::IMAGE_SCN_MEM_16BIT
-                               : (COFF::SectionCharacteristics)0) |
-                        COFF::IMAGE_SCN_CNT_CODE |
-                        COFF::IMAGE_SCN_MEM_EXECUTE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getText());
-  DataSection =
-    Ctx->getCOFFSection(".data",
-                        COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                        COFF::IMAGE_SCN_MEM_READ |
-                        COFF::IMAGE_SCN_MEM_WRITE,
-                        SectionKind::getDataRel());
-  ReadOnlySection =
-    Ctx->getCOFFSection(".rdata",
-                        COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getReadOnly());
+  BSSSection = Ctx->getCOFFSection(
+      ".bss", COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+                  COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+      SectionKind::getBSS());
+  TextSection = Ctx->getCOFFSection(
+      ".text",
+      (IsWoA ? COFF::IMAGE_SCN_MEM_16BIT : (COFF::SectionCharacteristics)0) |
+          COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_MEM_EXECUTE |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getText());
+  DataSection = Ctx->getCOFFSection(
+      ".data", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
+                   COFF::IMAGE_SCN_MEM_WRITE,
+      SectionKind::getDataRel());
+  ReadOnlySection = Ctx->getCOFFSection(
+      ".rdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getReadOnly());
 
   if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
     StaticCtorSection =
-      Ctx->getCOFFSection(".CRT$XCU",
-                          COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                          COFF::IMAGE_SCN_MEM_READ,
-                          SectionKind::getReadOnly());
+        Ctx->getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                            COFF::IMAGE_SCN_MEM_READ,
+                            SectionKind::getReadOnly());
     StaticDtorSection =
-      Ctx->getCOFFSection(".CRT$XTX",
-                          COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                          COFF::IMAGE_SCN_MEM_READ,
-                          SectionKind::getReadOnly());
+        Ctx->getCOFFSection(".CRT$XTX", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                            COFF::IMAGE_SCN_MEM_READ,
+                            SectionKind::getReadOnly());
   } else {
-    StaticCtorSection =
-      Ctx->getCOFFSection(".ctors",
-                          COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                          COFF::IMAGE_SCN_MEM_READ |
-                          COFF::IMAGE_SCN_MEM_WRITE,
-                          SectionKind::getDataRel());
-    StaticDtorSection =
-      Ctx->getCOFFSection(".dtors",
-                          COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                          COFF::IMAGE_SCN_MEM_READ |
-                          COFF::IMAGE_SCN_MEM_WRITE,
-                          SectionKind::getDataRel());
+    StaticCtorSection = Ctx->getCOFFSection(
+        ".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                      COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+        SectionKind::getDataRel());
+    StaticDtorSection = Ctx->getCOFFSection(
+        ".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                      COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+        SectionKind::getDataRel());
   }
 
   // FIXME: We're emitting LSDA info into a readonly section on COFF, even
@@ -675,163 +575,149 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
 
   // Debug info.
   COFFDebugSymbolsSection =
-    Ctx->getCOFFSection(".debug$S",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-
-  DwarfAbbrevSection =
-    Ctx->getCOFFSection(".debug_abbrev",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-  DwarfInfoSection =
-    Ctx->getCOFFSection(".debug_info",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-  DwarfLineSection =
-    Ctx->getCOFFSection(".debug_line",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-  DwarfFrameSection =
-    Ctx->getCOFFSection(".debug_frame",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-  DwarfPubNamesSection =
-    Ctx->getCOFFSection(".debug_pubnames",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-  DwarfPubTypesSection =
-    Ctx->getCOFFSection(".debug_pubtypes",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-  DwarfGnuPubNamesSection =
-    Ctx->getCOFFSection(".debug_gnu_pubnames",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-  DwarfGnuPubTypesSection =
-    Ctx->getCOFFSection(".debug_gnu_pubtypes",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-  DwarfStrSection =
-    Ctx->getCOFFSection(".debug_str",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-  DwarfLocSection =
-    Ctx->getCOFFSection(".debug_loc",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-  DwarfARangesSection =
-    Ctx->getCOFFSection(".debug_aranges",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-  DwarfRangesSection =
-    Ctx->getCOFFSection(".debug_ranges",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-  DwarfMacroInfoSection =
-    Ctx->getCOFFSection(".debug_macinfo",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-  DwarfInfoDWOSection =
-      Ctx->getCOFFSection(".debug_info.dwo",
-                          COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                          COFF::IMAGE_SCN_MEM_READ,
-                          SectionKind::getMetadata());
-  DwarfTypesDWOSection =
-      Ctx->getCOFFSection(".debug_types.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                                                  COFF::IMAGE_SCN_MEM_READ,
-                          SectionKind::getMetadata());
-  DwarfAbbrevDWOSection =
-      Ctx->getCOFFSection(".debug_abbrev.dwo",
-                          COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                          COFF::IMAGE_SCN_MEM_READ,
-                          SectionKind::getMetadata());
-  DwarfStrDWOSection =
-      Ctx->getCOFFSection(".debug_str.dwo",
-                          COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                          COFF::IMAGE_SCN_MEM_READ,
-                          SectionKind::getMetadata());
-  DwarfLineDWOSection =
-      Ctx->getCOFFSection(".debug_line.dwo",
-                          COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                          COFF::IMAGE_SCN_MEM_READ,
-                          SectionKind::getMetadata());
-  DwarfLocDWOSection =
-      Ctx->getCOFFSection(".debug_loc.dwo",
-                          COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                          COFF::IMAGE_SCN_MEM_READ,
-                          SectionKind::getMetadata());
-  DwarfStrOffDWOSection =
-      Ctx->getCOFFSection(".debug_str_offsets.dwo",
-                          COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                          COFF::IMAGE_SCN_MEM_READ,
-                          SectionKind::getMetadata());
-
-  DwarfAddrSection =
-    Ctx->getCOFFSection(".debug_addr",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
-
-  DwarfAccelNamesSection =
-      Ctx->getCOFFSection(".apple_names",
-                          COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                          COFF::IMAGE_SCN_MEM_READ,
-                          SectionKind::getMetadata());
-  DwarfAccelNamespaceSection =
-      Ctx->getCOFFSection(".apple_namespaces",
-                          COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                          COFF::IMAGE_SCN_MEM_READ,
-                          SectionKind::getMetadata());
-  DwarfAccelTypesSection =
-      Ctx->getCOFFSection(".apple_types",
-                          COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                          COFF::IMAGE_SCN_MEM_READ,
-                          SectionKind::getMetadata());
-  DwarfAccelObjCSection =
-      Ctx->getCOFFSection(".apple_objc",
-                          COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                          COFF::IMAGE_SCN_MEM_READ,
+      Ctx->getCOFFSection(".debug$S", COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                          COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                          COFF::IMAGE_SCN_MEM_READ,
                           SectionKind::getMetadata());
 
-  DrectveSection =
-    Ctx->getCOFFSection(".drectve",
-                        COFF::IMAGE_SCN_LNK_INFO |
-                        COFF::IMAGE_SCN_LNK_REMOVE,
-                        SectionKind::getMetadata());
-
-  PDataSection =
-    Ctx->getCOFFSection(".pdata",
-                        COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getDataRel());
-
-  XDataSection =
-    Ctx->getCOFFSection(".xdata",
-                        COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getDataRel());
-
-  TLSDataSection =
-    Ctx->getCOFFSection(".tls$",
-                        COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                        COFF::IMAGE_SCN_MEM_READ |
-                        COFF::IMAGE_SCN_MEM_WRITE,
-                        SectionKind::getDataRel());
+  DwarfAbbrevSection = Ctx->getCOFFSection(
+      ".debug_abbrev",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "section_abbrev");
+  DwarfInfoSection = Ctx->getCOFFSection(
+      ".debug_info",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "section_info");
+  DwarfLineSection = Ctx->getCOFFSection(
+      ".debug_line",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "section_line");
+
+  DwarfFrameSection = Ctx->getCOFFSection(
+      ".debug_frame",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata());
+  DwarfPubNamesSection = Ctx->getCOFFSection(
+      ".debug_pubnames",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata());
+  DwarfPubTypesSection = Ctx->getCOFFSection(
+      ".debug_pubtypes",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata());
+  DwarfGnuPubNamesSection = Ctx->getCOFFSection(
+      ".debug_gnu_pubnames",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata());
+  DwarfGnuPubTypesSection = Ctx->getCOFFSection(
+      ".debug_gnu_pubtypes",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata());
+  DwarfStrSection = Ctx->getCOFFSection(
+      ".debug_str",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "info_string");
+  DwarfLocSection = Ctx->getCOFFSection(
+      ".debug_loc",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "section_debug_loc");
+  DwarfARangesSection = Ctx->getCOFFSection(
+      ".debug_aranges",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata());
+  DwarfRangesSection = Ctx->getCOFFSection(
+      ".debug_ranges",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "debug_range");
+  DwarfInfoDWOSection = Ctx->getCOFFSection(
+      ".debug_info.dwo",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "section_info_dwo");
+  DwarfTypesDWOSection = Ctx->getCOFFSection(
+      ".debug_types.dwo",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "section_types_dwo");
+  DwarfAbbrevDWOSection = Ctx->getCOFFSection(
+      ".debug_abbrev.dwo",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "section_abbrev_dwo");
+  DwarfStrDWOSection = Ctx->getCOFFSection(
+      ".debug_str.dwo",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "skel_string");
+  DwarfLineDWOSection = Ctx->getCOFFSection(
+      ".debug_line.dwo",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata());
+  DwarfLocDWOSection = Ctx->getCOFFSection(
+      ".debug_loc.dwo",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "skel_loc");
+  DwarfStrOffDWOSection = Ctx->getCOFFSection(
+      ".debug_str_offsets.dwo",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata());
+  DwarfAddrSection = Ctx->getCOFFSection(
+      ".debug_addr",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "addr_sec");
+  DwarfAccelNamesSection = Ctx->getCOFFSection(
+      ".apple_names",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "names_begin");
+  DwarfAccelNamespaceSection = Ctx->getCOFFSection(
+      ".apple_namespaces",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "namespac_begin");
+  DwarfAccelTypesSection = Ctx->getCOFFSection(
+      ".apple_types",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "types_begin");
+  DwarfAccelObjCSection = Ctx->getCOFFSection(
+      ".apple_objc",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "objc_begin");
+
+  DrectveSection = Ctx->getCOFFSection(
+      ".drectve", COFF::IMAGE_SCN_LNK_INFO | COFF::IMAGE_SCN_LNK_REMOVE,
+      SectionKind::getMetadata());
+
+  PDataSection = Ctx->getCOFFSection(
+      ".pdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getDataRel());
+
+  XDataSection = Ctx->getCOFFSection(
+      ".xdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getDataRel());
+
+  TLSDataSection = Ctx->getCOFFSection(
+      ".tls$", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
+                   COFF::IMAGE_SCN_MEM_WRITE,
+      SectionKind::getDataRel());
 }
 
 void MCObjectFileInfo::InitMCObjectFileInfo(StringRef T, Reloc::Model relocm,
@@ -868,7 +754,7 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef T, Reloc::Model relocm,
        Arch == Triple::aarch64 ||
        Arch == Triple::ppc || Arch == Triple::ppc64 ||
        Arch == Triple::UnknownArch) &&
-      (TT.isOSDarwin() || TT.isOSBinFormatMachO())) {
+      TT.isOSBinFormatMachO()) {
     Env = IsMachO;
     InitMachOMCObjectFileInfo(TT);
   } else if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
@@ -882,9 +768,9 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef T, Reloc::Model relocm,
   }
 }
 
-const MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const {
+MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const {
   return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP,
-                            SectionKind::getMetadata(), 0, utostr(Hash));
+                            0, utostr(Hash));
 }
 
 void MCObjectFileInfo::InitEHFrameSection() {
@@ -898,9 +784,7 @@ void MCObjectFileInfo::InitEHFrameSection() {
                            SectionKind::getReadOnly());
   else if (Env == IsELF)
     EHFrameSection =
-      Ctx->getELFSection(".eh_frame", EHSectionType,
-                         EHSectionFlags,
-                         SectionKind::getDataRel());
+        Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags);
   else
     EHFrameSection =
       Ctx->getCOFFSection(".eh_frame",
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
index 08fe501..176f5e7 100644
--- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -20,21 +20,17 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
 MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                   raw_ostream &OS, MCCodeEmitter *Emitter_)
+                                   raw_pwrite_stream &OS,
+                                   MCCodeEmitter *Emitter_)
     : MCStreamer(Context),
       Assembler(new MCAssembler(Context, TAB, *Emitter_,
                                 *TAB.createObjectWriter(OS), OS)),
       CurSectionData(nullptr), EmitEHFrame(true), EmitDebugFrame(false) {}
 
-MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                   raw_ostream &OS, MCCodeEmitter *Emitter_,
-                                   MCAssembler *_Assembler)
-    : MCStreamer(Context), Assembler(_Assembler), CurSectionData(nullptr),
-      EmitEHFrame(true), EmitDebugFrame(false) {}
-
 MCObjectStreamer::~MCObjectStreamer() {
   delete &Assembler->getBackend();
   delete &Assembler->getEmitter();
@@ -42,7 +38,7 @@ MCObjectStreamer::~MCObjectStreamer() {
   delete Assembler;
 }
 
-void MCObjectStreamer::flushPendingLabels(MCFragment *F) {
+void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
   if (PendingLabels.size()) {
     if (!F) {
       F = new MCDataFragment();
@@ -51,17 +47,40 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F) {
     }
     for (MCSymbolData *SD : PendingLabels) {
       SD->setFragment(F);
-      SD->setOffset(0);
+      SD->setOffset(FOffset);
     }
     PendingLabels.clear();
   }
 }
 
+bool MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi,
+                                              const MCSymbol *Lo,
+                                              unsigned Size) {
+  // Must have symbol data.
+  if (!Assembler->hasSymbolData(*Hi) || !Assembler->hasSymbolData(*Lo))
+    return false;
+  auto &HiD = Assembler->getSymbolData(*Hi);
+  auto &LoD = Assembler->getSymbolData(*Lo);
+
+  // Must both be assigned to the same (valid) fragment.
+  if (!HiD.getFragment() || HiD.getFragment() != LoD.getFragment())
+    return false;
+
+  // Must be a data fragment.
+  if (!isa<MCDataFragment>(HiD.getFragment()))
+    return false;
+
+  assert(HiD.getOffset() >= LoD.getOffset() &&
+         "Expected Hi to be greater than Lo");
+  EmitIntValue(HiD.getOffset() - LoD.getOffset(), Size);
+  return true;
+}
+
 void MCObjectStreamer::reset() {
   if (Assembler)
     Assembler->reset();
   CurSectionData = nullptr;
-  CurInsertionPoint = MCSectionData::iterator();
+  CurInsertionPoint = MCSection::iterator();
   EmitEHFrame = true;
   EmitDebugFrame = false;
   PendingLabels.clear();
@@ -92,7 +111,8 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() {
   MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
   // When bundling is enabled, we don't want to add data to a fragment that
   // already has instructions (see MCELFStreamer::EmitInstToData for details)
-  if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) {
+  if (!F || (Assembler->isBundlingEnabled() && !Assembler->getRelaxAll() &&
+             F->hasInstructions())) {
     F = new MCDataFragment();
     insert(F);
   }
@@ -123,19 +143,19 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
     return;
   }
   DF->getFixups().push_back(
-      MCFixup::Create(DF->getContents().size(), Value,
+      MCFixup::create(DF->getContents().size(), Value,
                       MCFixup::getKindForSize(Size, false), Loc));
   DF->getContents().resize(DF->getContents().size() + Size, 0);
 }
 
 void MCObjectStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
   // We need to create a local symbol to avoid relocations.
-  Frame.Begin = getContext().CreateTempSymbol();
+  Frame.Begin = getContext().createTempSymbol();
   EmitLabel(Frame.Begin);
 }
 
 void MCObjectStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
-  Frame.End = getContext().CreateTempSymbol();
+  Frame.End = getContext().createTempSymbol();
   EmitLabel(Frame.End);
 }
 
@@ -148,7 +168,9 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
   // If there is a current fragment, mark the symbol as pointing into it.
   // Otherwise queue the label and set its fragment pointer when we emit the
   // next fragment.
-  if (auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment())) {
+  auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+  if (F && !(getAssembler().isBundlingEnabled() &&
+             getAssembler().getRelaxAll())) {
     SD.setFragment(F);
     SD.setOffset(F->getContents().size());
   } else {
@@ -179,12 +201,18 @@ void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
   report_fatal_error("This file format doesn't support weak aliases.");
 }
 
-void MCObjectStreamer::ChangeSection(const MCSection *Section,
+void MCObjectStreamer::ChangeSection(MCSection *Section,
                                      const MCExpr *Subsection) {
+  changeSectionImpl(Section, Subsection);
+}
+
+bool MCObjectStreamer::changeSectionImpl(MCSection *Section,
+                                         const MCExpr *Subsection) {
   assert(Section && "Cannot switch to a null section!");
   flushPendingLabels(nullptr);
 
-  CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
+  bool Created = getAssembler().registerSection(*Section);
+  CurSectionData = Section;
 
   int64_t IntSubsection = 0;
   if (Subsection &&
@@ -193,7 +221,8 @@ void MCObjectStreamer::ChangeSection(const MCSection *Section,
   if (IntSubsection < 0 || IntSubsection > 8192)
     report_fatal_error("Subsection number out of range");
   CurInsertionPoint =
-    CurSectionData->getSubsectionInsertionPoint(unsigned(IntSubsection));
+      CurSectionData->getSubsectionInsertionPoint(unsigned(IntSubsection));
+  return Created;
 }
 
 void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
@@ -201,12 +230,16 @@ void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
   MCStreamer::EmitAssignment(Symbol, Value);
 }
 
+bool MCObjectStreamer::mayHaveInstructions(MCSection &Sec) const {
+  return Sec.hasInstructions();
+}
+
 void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
                                        const MCSubtargetInfo &STI) {
   MCStreamer::EmitInstruction(Inst, STI);
 
-  MCSectionData *SD = getCurrentSectionData();
-  SD->setHasInstructions(true);
+  MCSection *Sec = getCurrentSectionData();
+  Sec->setHasInstructions(true);
 
   // Now that a machine instruction has been assembled into this section, make
   // a line entry for any .loc directive that has been seen.
@@ -225,7 +258,7 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
   //   group. We want to emit all such instructions into the same data
   //   fragment.
   if (Assembler.getRelaxAll() ||
-      (Assembler.isBundlingEnabled() && SD->isBundleLocked())) {
+      (Assembler.isBundlingEnabled() && Sec->isBundleLocked())) {
     MCInst Relaxed;
     getAssembler().getBackend().relaxInstruction(Inst, Relaxed);
     while (getAssembler().getBackend().mayNeedRelaxation(Relaxed))
@@ -240,6 +273,9 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
 
 void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst,
                                           const MCSubtargetInfo &STI) {
+  if (getAssembler().getRelaxAll() && getAssembler().isBundlingEnabled())
+    llvm_unreachable("All instructions should have already been relaxed");
+
   // Always create a new, separate fragment here, because its size can change
   // during relaxation.
   MCRelaxableFragment *IF = new MCRelaxableFragment(Inst, STI);
@@ -247,7 +283,7 @@ void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst,
 
   SmallString<128> Code;
   raw_svector_ostream VecOS(Code);
-  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, IF->getFixups(),
+  getAssembler().getEmitter().encodeInstruction(Inst, VecOS, IF->getFixups(),
                                                 STI);
   VecOS.flush();
   IF->getContents().append(Code.begin(), Code.end());
@@ -348,8 +384,9 @@ void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment,
   insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit));
 
   // Update the maximum alignment on the current section if necessary.
-  if (ByteAlignment > getCurrentSectionData()->getAlignment())
-    getCurrentSectionData()->setAlignment(ByteAlignment);
+  MCSection *CurSec = getCurrentSection().first;
+  if (ByteAlignment > CurSec->getAlignment())
+    CurSec->setAlignment(ByteAlignment);
 }
 
 void MCObjectStreamer::EmitCodeAlignment(unsigned ByteAlignment,
@@ -366,7 +403,7 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
     return false;
   }
 
-  MCSymbol *CurrentPos = getContext().CreateTempSymbol();
+  MCSymbol *CurrentPos = getContext().createTempSymbol();
   EmitLabel(CurrentPos);
   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
   const MCExpr *Ref =
@@ -384,7 +421,7 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
 void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) {
   MCDataFragment *DF = getOrCreateDataFragment();
 
-  DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(), 
+  DF->getFixups().push_back(MCFixup::create(DF->getContents().size(), 
                                             Value, FK_GPRel_4));
   DF->getContents().resize(DF->getContents().size() + 4, 0);
 }
@@ -393,7 +430,7 @@ void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) {
 void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) {
   MCDataFragment *DF = getOrCreateDataFragment();
 
-  DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(), 
+  DF->getFixups().push_back(MCFixup::create(DF->getContents().size(), 
                                             Value, FK_GPRel_4));
   DF->getContents().resize(DF->getContents().size() + 8, 0);
 }
diff --git a/contrib/llvm/lib/MC/MCObjectWriter.cpp b/contrib/llvm/lib/MC/MCObjectWriter.cpp
index 94d7cd6..5cc629b 100644
--- a/contrib/llvm/lib/MC/MCObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/MCObjectWriter.cpp
@@ -17,11 +17,9 @@ using namespace llvm;
 MCObjectWriter::~MCObjectWriter() {
 }
 
-bool
-MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
-                                                   const MCSymbolRefExpr *A,
-                                                   const MCSymbolRefExpr *B,
-                                                   bool InSet) const {
+bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved(
+    const MCAssembler &Asm, const MCSymbolRefExpr *A, const MCSymbolRefExpr *B,
+    bool InSet) const {
   // Modified symbol references cannot be resolved.
   if (A->getKind() != MCSymbolRefExpr::VK_None ||
       B->getKind() != MCSymbolRefExpr::VK_None)
@@ -29,7 +27,7 @@ MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
 
   const MCSymbol &SA = A->getSymbol();
   const MCSymbol &SB = B->getSymbol();
-  if (SA.AliasedSymbol().isUndefined() || SB.AliasedSymbol().isUndefined())
+  if (SA.isUndefined() || SB.isUndefined())
     return false;
 
   const MCSymbolData &DataA = Asm.getSymbolData(SA);
@@ -37,20 +35,17 @@ MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
   if(!DataA.getFragment() || !DataB.getFragment())
     return false;
 
-  return IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA,
-                                                *DataB.getFragment(),
-                                                InSet,
-                                                false);
+  return IsSymbolRefDifferenceFullyResolvedImpl(Asm, SA, *DataB.getFragment(),
+                                                InSet, false);
 }
 
-bool
-MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                                      const MCSymbolData &DataA,
-                                                      const MCFragment &FB,
-                                                      bool InSet,
-                                                      bool IsPCRel) const {
-  const MCSection &SecA = DataA.getSymbol().AliasedSymbol().getSection();
-  const MCSection &SecB = FB.getParent()->getSection();
+bool MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+    const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
+    bool InSet, bool IsPCRel) const {
+  const MCSection &SecA = SymA.getSection();
+  const MCSection &SecB = *FB.getParent();
   // On ELF and COFF  A - B is absolute if A and B are in the same section.
   return &SecA == &SecB;
 }
+
+bool MCObjectWriter::isWeak(const MCSymbol &) const { return false; }
diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
index 5c8ec66..b983d99 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -21,7 +21,7 @@
 #include <cstdlib>
 using namespace llvm;
 
-AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI)  {
+AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
   CurPtr = nullptr;
   isAtStartOfLine = true;
   AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
index e2a4fc1..1e805fd 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -111,8 +111,8 @@ struct ParseStatementInfo {
 
 /// \brief The concrete assembly parser instance.
 class AsmParser : public MCAsmParser {
-  AsmParser(const AsmParser &) LLVM_DELETED_FUNCTION;
-  void operator=(const AsmParser &) LLVM_DELETED_FUNCTION;
+  AsmParser(const AsmParser &) = delete;
+  void operator=(const AsmParser &) = delete;
 private:
   AsmLexer Lexer;
   MCContext &Ctx;
@@ -147,6 +147,9 @@ private:
   /// Boolean tracking whether macro substitution is enabled.
   unsigned MacrosEnabledFlag : 1;
 
+  /// \brief Keeps track of how many .macro's have been instantiated.
+  unsigned NumOfMacroInstantiations;
+
   /// Flag tracking whether any errors have been encountered.
   unsigned HadError : 1;
 
@@ -175,7 +178,7 @@ private:
 public:
   AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
             const MCAsmInfo &MAI);
-  virtual ~AsmParser();
+  ~AsmParser() override;
 
   bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
 
@@ -184,6 +187,10 @@ public:
     ExtensionDirectiveMap[Directive] = Handler;
   }
 
+  void addAliasForDirective(StringRef Directive, StringRef Alias) override {
+    DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
+  }
+
 public:
   /// @name MCAsmParser Interface
   /// {
@@ -247,7 +254,7 @@ private:
                         ArrayRef<MCAsmMacroParameter> Parameters);
   bool expandMacro(raw_svector_ostream &OS, StringRef Body,
                    ArrayRef<MCAsmMacroParameter> Parameters,
-                   ArrayRef<MCAsmMacroArgument> A,
+                   ArrayRef<MCAsmMacroArgument> A, bool EnableAtPseudoVariable,
                    const SMLoc &L);
 
   /// \brief Are macros enabled in the parser?
@@ -319,6 +326,9 @@ private:
   bool parseAssignment(StringRef Name, bool allow_redef,
                        bool NoDeadStrip = false);
 
+  unsigned getBinOpPrecedence(AsmToken::TokenKind K,
+                              MCBinaryExpr::Opcode &Kind);
+
   bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
   bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
   bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
@@ -339,8 +349,8 @@ private:
     DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT,
     DK_INCLUDE, DK_INCBIN, DK_CODE16, DK_CODE16GCC, DK_REPT, DK_IRP, DK_IRPC,
     DK_IF, DK_IFEQ, DK_IFGE, DK_IFGT, DK_IFLE, DK_IFLT, DK_IFNE, DK_IFB,
-    DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFDEF, DK_IFNDEF, DK_IFNOTDEF,
-    DK_ELSEIF, DK_ELSE, DK_ENDIF,
+    DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFNES, DK_IFDEF, DK_IFNDEF,
+    DK_IFNOTDEF, DK_ELSEIF, DK_ELSE, DK_ENDIF,
     DK_SPACE, DK_SKIP, DK_FILE, DK_LINE, DK_LOC, DK_STABS,
     DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, DK_CFI_DEF_CFA,
     DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER,
@@ -435,8 +445,8 @@ private:
   bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
   // ".ifc" or ".ifnc", depending on ExpectEqual.
   bool parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual);
-  // ".ifeqs"
-  bool parseDirectiveIfeqs(SMLoc DirectiveLoc);
+  // ".ifeqs" or ".ifnes", depending on ExpectEqual.
+  bool parseDirectiveIfeqs(SMLoc DirectiveLoc, bool ExpectEqual);
   // ".ifdef" or ".ifndef", depending on expect_defined
   bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
   bool parseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
@@ -486,10 +496,10 @@ extern MCAsmParserExtension *createCOFFAsmParser();
 
 enum { DEFAULT_ADDRSPACE = 0 };
 
-AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
-                     const MCAsmInfo &_MAI)
-    : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
-      PlatformParser(nullptr), CurBuffer(_SM.getMainFileID()),
+AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
+                     const MCAsmInfo &MAI)
+    : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
+      PlatformParser(nullptr), CurBuffer(SM.getMainFileID()),
       MacrosEnabledFlag(true), HadError(false), CppHashLineNumber(0),
       AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false) {
   // Save the old handler.
@@ -500,7 +510,7 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
 
   // Initialize the platform / file format parser.
-  switch (_Ctx.getObjectFileInfo()->getObjectFileType()) {
+  switch (Ctx.getObjectFileInfo()->getObjectFileType()) {
   case MCObjectFileInfo::IsCOFF:
     PlatformParser.reset(createCOFFAsmParser());
     break;
@@ -515,6 +525,8 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
 
   PlatformParser->Initialize(*this);
   initializeDirectiveKindMap();
+
+  NumOfMacroInstantiations = 0;
 }
 
 AsmParser::~AsmParser() {
@@ -618,12 +630,13 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   // If we are generating dwarf for assembly source files save the initial text
   // section and generate a .file directive.
   if (getContext().getGenDwarfForAssembly()) {
-    MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
+    MCSymbol *SectionStartSym = getContext().createTempSymbol();
     getStreamer().EmitLabel(SectionStartSym);
-    auto InsertResult = getContext().addGenDwarfSection(
-        getStreamer().getCurrentSection().first);
-    assert(InsertResult.second && ".text section should not have debug info yet");
-    InsertResult.first->second.first = SectionStartSym;
+    MCSection *Sec = getStreamer().getCurrentSection().first;
+    bool InsertResult = getContext().addGenDwarfSection(Sec);
+    assert(InsertResult && ".text section should not have debug info yet");
+    (void)InsertResult;
+    Sec->setBeginSymbol(SectionStartSym);
     getContext().setGenDwarfFileNumber(getStreamer().EmitDwarfFileDirective(
         0, StringRef(), getContext().getMainFileName()));
   }
@@ -786,7 +799,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
         if (Lexer.getMAI().getDollarIsPC()) {
           // This is a '$' reference, which references the current PC.  Emit a
           // temporary label to the streamer and refer to it.
-          MCSymbol *Sym = Ctx.CreateTempSymbol();
+          MCSymbol *Sym = Ctx.createTempSymbol();
           Out.EmitLabel(Sym);
           Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
                                         getContext());
@@ -843,7 +856,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
       }
     }
 
-    MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+    MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName);
 
     // If this is an absolute variable reference, substitute it now to preserve
     // semantics in the face of reassignment.
@@ -881,7 +894,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
       }
       if (IDVal == "f" || IDVal == "b") {
         MCSymbol *Sym =
-            Ctx.GetDirectionalLocalSymbol(IntVal, IDVal == "b");
+            Ctx.getDirectionalLocalSymbol(IntVal, IDVal == "b");
         Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
         if (IDVal == "b" && Sym->isUndefined())
           return Error(Loc, "invalid reference to undefined symbol");
@@ -902,7 +915,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
   case AsmToken::Dot: {
     // This is a '.' reference, which references the current PC.  Emit a
     // temporary label to the streamer and refer to it.
-    MCSymbol *Sym = Ctx.CreateTempSymbol();
+    MCSymbol *Sym = Ctx.createTempSymbol();
     Out.EmitLabel(Sym);
     Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
     EndLoc = Lexer.getTok().getEndLoc();
@@ -1063,8 +1076,8 @@ bool AsmParser::parseAbsoluteExpression(int64_t &Res) {
   return false;
 }
 
-static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
-                                   MCBinaryExpr::Opcode &Kind) {
+unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K,
+                                       MCBinaryExpr::Opcode &Kind) {
   switch (K) {
   default:
     return 0; // not a binop.
@@ -1116,7 +1129,7 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
     Kind = MCBinaryExpr::Shl;
     return 4;
   case AsmToken::GreaterGreater:
-    Kind = MCBinaryExpr::Shr;
+    Kind = MAI.shouldUseLogicalShr() ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
     return 4;
 
   // High Intermediate Precedence: +, -
@@ -1244,9 +1257,11 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
   case DK_IFC:
     return parseDirectiveIfc(IDLoc, true);
   case DK_IFEQS:
-    return parseDirectiveIfeqs(IDLoc);
+    return parseDirectiveIfeqs(IDLoc, true);
   case DK_IFNC:
     return parseDirectiveIfc(IDLoc, false);
+  case DK_IFNES:
+    return parseDirectiveIfeqs(IDLoc, false);
   case DK_IFDEF:
     return parseDirectiveIfdef(IDLoc, true);
   case DK_IFNDEF:
@@ -1295,9 +1310,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
                                                IDVal.size(), RewrittenLabel));
         IDVal = RewrittenLabel;
       }
-      Sym = getContext().GetOrCreateSymbol(IDVal);
+      Sym = getContext().getOrCreateSymbol(IDVal);
     } else
-      Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
+      Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal);
 
     Sym->redefineIfPossible();
 
@@ -1758,7 +1773,8 @@ static bool isIdentifierChar(char c) {
 
 bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
                             ArrayRef<MCAsmMacroParameter> Parameters,
-                            ArrayRef<MCAsmMacroArgument> A, const SMLoc &L) {
+                            ArrayRef<MCAsmMacroArgument> A,
+                            bool EnableAtPseudoVariable, const SMLoc &L) {
   unsigned NParameters = Parameters.size();
   bool HasVararg = NParameters ? Parameters.back().Vararg : false;
   if ((!IsDarwin || NParameters != 0) && NParameters != A.size())
@@ -1824,36 +1840,47 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
       Pos += 2;
     } else {
       unsigned I = Pos + 1;
-      while (isIdentifierChar(Body[I]) && I + 1 != End)
+
+      // Check for the \@ pseudo-variable.
+      if (EnableAtPseudoVariable && Body[I] == '@' && I + 1 != End)
         ++I;
+      else
+        while (isIdentifierChar(Body[I]) && I + 1 != End)
+          ++I;
 
       const char *Begin = Body.data() + Pos + 1;
       StringRef Argument(Begin, I - (Pos + 1));
       unsigned Index = 0;
-      for (; Index < NParameters; ++Index)
-        if (Parameters[Index].Name == Argument)
-          break;
 
-      if (Index == NParameters) {
-        if (Body[Pos + 1] == '(' && Body[Pos + 2] == ')')
-          Pos += 3;
-        else {
-          OS << '\\' << Argument;
-          Pos = I;
-        }
+      if (Argument == "@") {
+        OS << NumOfMacroInstantiations;
+        Pos += 2;
       } else {
-        bool VarargParameter = HasVararg && Index == (NParameters - 1);
-        for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
-                                                ie = A[Index].end();
-             it != ie; ++it)
-          // We expect no quotes around the string's contents when
-          // parsing for varargs.
-          if (it->getKind() != AsmToken::String || VarargParameter)
-            OS << it->getString();
-          else
-            OS << it->getStringContents();
-
-        Pos += 1 + Argument.size();
+        for (; Index < NParameters; ++Index)
+          if (Parameters[Index].Name == Argument)
+            break;
+
+        if (Index == NParameters) {
+          if (Body[Pos + 1] == '(' && Body[Pos + 2] == ')')
+            Pos += 3;
+          else {
+            OS << '\\' << Argument;
+            Pos = I;
+          }
+        } else {
+          bool VarargParameter = HasVararg && Index == (NParameters - 1);
+          for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
+                                                  ie = A[Index].end();
+               it != ie; ++it)
+            // We expect no quotes around the string's contents when
+            // parsing for varargs.
+            if (it->getKind() != AsmToken::String || VarargParameter)
+              OS << it->getString();
+            else
+              OS << it->getStringContents();
+
+          Pos += 1 + Argument.size();
+        }
       }
     }
     // Update the scan point.
@@ -2111,7 +2138,7 @@ bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
   StringRef Body = M->Body;
   raw_svector_ostream OS(Buf);
 
-  if (expandMacro(OS, Body, M->Parameters, A, getTok().getLoc()))
+  if (expandMacro(OS, Body, M->Parameters, A, true, getTok().getLoc()))
     return true;
 
   // We include the .endmacro in the buffer as our cue to exit the macro
@@ -2127,6 +2154,8 @@ bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
       NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size());
   ActiveMacros.push_back(MI);
 
+  ++NumOfMacroInstantiations;
+
   // Jump to the macro instantiation and prime the lexer.
   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
@@ -2189,7 +2218,7 @@ bool AsmParser::parseAssignment(StringRef Name, bool allow_redef,
 
   // Validate that the LHS is allowed to be a variable (either it has not been
   // used as a symbol, or it is an absolute symbol).
-  MCSymbol *Sym = getContext().LookupSymbol(Name);
+  MCSymbol *Sym = getContext().lookupSymbol(Name);
   if (Sym) {
     // Diagnose assignment to a label.
     //
@@ -2218,7 +2247,7 @@ bool AsmParser::parseAssignment(StringRef Name, bool allow_redef,
     }
     return false;
   } else
-    Sym = getContext().GetOrCreateSymbol(Name);
+    Sym = getContext().getOrCreateSymbol(Name);
 
   Sym->setRedefinable(allow_redef);
 
@@ -2791,7 +2820,7 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
   if (FileNumber == -1)
     getStreamer().EmitFileDirective(Filename);
   else {
-    if (getContext().getGenDwarfForAssembly() == true)
+    if (getContext().getGenDwarfForAssembly())
       Error(DirectiveLoc,
             "input can't have .file dwarf directives when -g is "
             "used to generate dwarf debug info for assembly code");
@@ -3158,7 +3187,7 @@ bool AsmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
   if (parseIdentifier(Name))
     return TokError("expected identifier in directive");
 
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
   if (IsPersonality)
     getStreamer().EmitCFIPersonality(Sym, Encoding);
@@ -3275,7 +3304,7 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
   MCAsmMacroParameters Parameters;
   while (getLexer().isNot(AsmToken::EndOfStatement)) {
 
-    if (Parameters.size() && Parameters.back().Vararg)
+    if (!Parameters.empty() && Parameters.back().Vararg)
       return Error(Lexer.getLoc(),
                    "Vararg parameter '" + Parameters.back().Name +
                    "' should be last one in the list of parameters.");
@@ -3672,7 +3701,7 @@ bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
       if (parseIdentifier(Name))
         return Error(Loc, "expected identifier in directive");
 
-      MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+      MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
       // Assembler local symbols don't make any sense here. Complain loudly.
       if (Sym->isTemporary())
@@ -3705,7 +3734,7 @@ bool AsmParser::parseDirectiveComm(bool IsLocal) {
     return TokError("expected identifier in directive");
 
   // Handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("unexpected token in directive");
@@ -3943,9 +3972,12 @@ bool AsmParser::parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) {
 
 /// parseDirectiveIfeqs
 ///   ::= .ifeqs string1, string2
-bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc, bool ExpectEqual) {
   if (Lexer.isNot(AsmToken::String)) {
-    TokError("expected string parameter for '.ifeqs' directive");
+    if (ExpectEqual)
+      TokError("expected string parameter for '.ifeqs' directive");
+    else
+      TokError("expected string parameter for '.ifnes' directive");
     eatToEndOfStatement();
     return true;
   }
@@ -3954,7 +3986,10 @@ bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc) {
   Lex();
 
   if (Lexer.isNot(AsmToken::Comma)) {
-    TokError("expected comma after first string for '.ifeqs' directive");
+    if (ExpectEqual)
+      TokError("expected comma after first string for '.ifeqs' directive");
+    else
+      TokError("expected comma after first string for '.ifnes' directive");
     eatToEndOfStatement();
     return true;
   }
@@ -3962,7 +3997,10 @@ bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc) {
   Lex();
 
   if (Lexer.isNot(AsmToken::String)) {
-    TokError("expected string parameter for '.ifeqs' directive");
+    if (ExpectEqual)
+      TokError("expected string parameter for '.ifeqs' directive");
+    else
+      TokError("expected string parameter for '.ifnes' directive");
     eatToEndOfStatement();
     return true;
   }
@@ -3972,7 +4010,7 @@ bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc) {
 
   TheCondStack.push_back(TheCondState);
   TheCondState.TheCond = AsmCond::IfCond;
-  TheCondState.CondMet = String1 == String2;
+  TheCondState.CondMet = ExpectEqual == (String1 == String2);
   TheCondState.Ignore = !TheCondState.CondMet;
 
   return false;
@@ -3993,7 +4031,7 @@ bool AsmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
 
     Lex();
 
-    MCSymbol *Sym = getContext().LookupSymbol(Name);
+    MCSymbol *Sym = getContext().lookupSymbol(Name);
 
     if (expect_defined)
       TheCondState.CondMet = (Sym && !Sym->isUndefined());
@@ -4219,6 +4257,7 @@ void AsmParser::initializeDirectiveKindMap() {
   DirectiveKindMap[".ifc"] = DK_IFC;
   DirectiveKindMap[".ifeqs"] = DK_IFEQS;
   DirectiveKindMap[".ifnc"] = DK_IFNC;
+  DirectiveKindMap[".ifnes"] = DK_IFNES;
   DirectiveKindMap[".ifdef"] = DK_IFDEF;
   DirectiveKindMap[".ifndef"] = DK_IFNDEF;
   DirectiveKindMap[".ifnotdef"] = DK_IFNOTDEF;
@@ -4362,7 +4401,8 @@ bool AsmParser::parseDirectiveRept(SMLoc DirectiveLoc, StringRef Dir) {
   SmallString<256> Buf;
   raw_svector_ostream OS(Buf);
   while (Count--) {
-    if (expandMacro(OS, M->Body, None, None, getTok().getLoc()))
+    // Note that the AtPseudoVariable is disabled for instantiations of .rep(t).
+    if (expandMacro(OS, M->Body, None, None, false, getTok().getLoc()))
       return true;
   }
   instantiateMacroLikeBody(M, DirectiveLoc, OS);
@@ -4401,7 +4441,9 @@ bool AsmParser::parseDirectiveIrp(SMLoc DirectiveLoc) {
   raw_svector_ostream OS(Buf);
 
   for (MCAsmMacroArguments::iterator i = A.begin(), e = A.end(); i != e; ++i) {
-    if (expandMacro(OS, M->Body, Parameter, *i, getTok().getLoc()))
+    // Note that the AtPseudoVariable is enabled for instantiations of .irp.
+    // This is undocumented, but GAS seems to support it.
+    if (expandMacro(OS, M->Body, Parameter, *i, true, getTok().getLoc()))
       return true;
   }
 
@@ -4448,7 +4490,9 @@ bool AsmParser::parseDirectiveIrpc(SMLoc DirectiveLoc) {
     MCAsmMacroArgument Arg;
     Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I + 1)));
 
-    if (expandMacro(OS, M->Body, Parameter, Arg, getTok().getLoc()))
+    // Note that the AtPseudoVariable is enabled for instantiations of .irpc.
+    // This is undocumented, but GAS seems to support it.
+    if (expandMacro(OS, M->Body, Parameter, Arg, true, getTok().getLoc()))
       return true;
   }
 
@@ -4594,7 +4638,7 @@ bool AsmParser::parseMSInlineAsm(
         ++InputIdx;
         OutputDecls.push_back(OpDecl);
         OutputDeclsAddressOf.push_back(Operand.needAddressOf());
-        OutputConstraints.push_back('=' + Operand.getConstraint().str());
+        OutputConstraints.push_back(("=" + Operand.getConstraint()).str());
         AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size()));
       } else {
         InputDecls.push_back(OpDecl);
diff --git a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
index 18bdb03..82f7f22 100644
--- a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -272,7 +272,7 @@ bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
       if (getParser().parseIdentifier(Name))
         return TokError("expected identifier in directive");
 
-      MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+      MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
       getStreamer().EmitSymbolAttribute(Sym, Attr);
 
@@ -398,7 +398,7 @@ bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) {
   if (getParser().parseIdentifier(SymbolName))
     return TokError("expected identifier in directive");
 
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName);
 
   getStreamer().BeginCOFFSymbolDef(Sym);
 
@@ -446,7 +446,7 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
-  MCSymbol *Symbol = getContext().GetOrCreateSymbol(SymbolID);
+  MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
 
   Lex();
   getStreamer().EmitCOFFSecRel32(Symbol);
@@ -461,7 +461,7 @@ bool COFFAsmParser::ParseDirectiveSecIdx(StringRef, SMLoc) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
-  MCSymbol *Symbol = getContext().GetOrCreateSymbol(SymbolID);
+  MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
 
   Lex();
   getStreamer().EmitCOFFSectionIndex(Symbol);
@@ -524,7 +524,7 @@ bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
-  MCSymbol *Symbol = getContext().GetOrCreateSymbol(SymbolID);
+  MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
 
   Lex();
   getStreamer().EmitWinCFIStartProc(Symbol);
@@ -568,7 +568,7 @@ bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
-  MCSymbol *handler = getContext().GetOrCreateSymbol(SymbolID);
+  MCSymbol *handler = getContext().getOrCreateSymbol(SymbolID);
 
   Lex();
   getStreamer().EmitWinEHHandler(handler, unwind, except);
diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index 3ea745e..dc664e8 100644
--- a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -407,7 +407,7 @@ bool DarwinAsmParser::parseDirectiveDesc(StringRef, SMLoc) {
     return TokError("expected identifier in directive");
 
   // Handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("unexpected token in '.desc' directive");
@@ -444,7 +444,7 @@ bool DarwinAsmParser::parseDirectiveIndirectSymbol(StringRef, SMLoc Loc) {
   if (getParser().parseIdentifier(Name))
     return TokError("expected identifier in .indirect_symbol directive");
 
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
   // Assembler local symbols don't make any sense here. Complain loudly.
   if (Sym->isTemporary())
@@ -519,7 +519,7 @@ bool DarwinAsmParser::parseDirectiveLsym(StringRef, SMLoc) {
     return TokError("expected identifier in directive");
 
   // Handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("unexpected token in '.lsym' directive");
@@ -626,7 +626,7 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.secure_log_unique' directive");
 
-  if (getContext().getSecureLogUsed() != false)
+  if (getContext().getSecureLogUsed())
     return Error(IDLoc, ".secure_log_unique specified multiple times");
 
   // Get the secure log path.
@@ -695,7 +695,7 @@ bool DarwinAsmParser::parseDirectiveTBSS(StringRef, SMLoc) {
     return TokError("expected identifier in directive");
 
   // Handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("unexpected token in directive");
@@ -778,7 +778,7 @@ bool DarwinAsmParser::parseDirectiveZerofill(StringRef, SMLoc) {
     return TokError("expected identifier in directive");
 
   // handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(IDStr);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(IDStr);
 
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("unexpected token in directive");
diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index e302004..87b15ff 100644
--- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -174,7 +174,7 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
       if (getParser().parseIdentifier(Name))
         return TokError("expected identifier in directive");
 
-      MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+      MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
       getStreamer().EmitSymbolAttribute(Sym, Attr);
 
@@ -199,8 +199,7 @@ bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
       return true;
   }
 
-  getStreamer().SwitchSection(getContext().getELFSection(
-                                Section, Type, Flags, Kind),
+  getStreamer().SwitchSection(getContext().getELFSection(Section, Type, Flags),
                               Subsection);
 
   return false;
@@ -210,7 +209,7 @@ bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
   StringRef Name;
   if (getParser().parseIdentifier(Name))
     return TokError("expected identifier in directive");
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("unexpected token in directive");
@@ -269,40 +268,6 @@ bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
   return false;
 }
 
-static SectionKind computeSectionKind(unsigned Flags, unsigned ElemSize) {
-  if (Flags & ELF::SHF_EXECINSTR)
-    return SectionKind::getText();
-  if (Flags & ELF::SHF_TLS)
-    return SectionKind::getThreadData();
-  if (Flags & ELF::SHF_MERGE) {
-    if (Flags & ELF::SHF_STRINGS) {
-      switch (ElemSize) {
-      default:
-        break;
-      case 1:
-        return SectionKind::getMergeable1ByteCString();
-      case 2:
-        return SectionKind::getMergeable2ByteCString();
-      case 4:
-        return SectionKind::getMergeable4ByteCString();
-      }
-    } else {
-      switch (ElemSize) {
-      default:
-        break;
-      case 4:
-        return SectionKind::getMergeableConst4();
-      case 8:
-        return SectionKind::getMergeableConst8();
-      case 16:
-        return SectionKind::getMergeableConst16();
-      }
-    }
-  }
-
-  return SectionKind::getDataRel();
-}
-
 static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup) {
   unsigned flags = 0;
 
@@ -413,6 +378,8 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
   unsigned Flags = 0;
   const MCExpr *Subsection = nullptr;
   bool UseLastGroup = false;
+  StringRef UniqueStr;
+  int64_t UniqueID = ~0;
 
   // Set the defaults first.
   if (SectionName == ".fini" || SectionName == ".init" ||
@@ -497,6 +464,22 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
             return TokError("Linkage must be 'comdat'");
         }
       }
+      if (getLexer().is(AsmToken::Comma)) {
+        Lex();
+        if (getParser().parseIdentifier(UniqueStr))
+          return TokError("expected identifier in directive");
+        if (UniqueStr != "unique")
+          return TokError("expected 'unique'");
+        if (getLexer().isNot(AsmToken::Comma))
+          return TokError("expected commma");
+        Lex();
+        if (getParser().parseAbsoluteExpression(UniqueID))
+          return true;
+        if (UniqueID < 0)
+          return TokError("unique id must be positive");
+        if (!isUInt<32>(UniqueID) || UniqueID == ~0U)
+          return TokError("unique id is too large");
+      }
     }
   }
 
@@ -544,22 +527,19 @@ EndStmt:
       }
   }
 
-  SectionKind Kind = computeSectionKind(Flags, Size);
-  const MCSection *ELFSection = getContext().getELFSection(
-      SectionName, Type, Flags, Kind, Size, GroupName);
+  MCSection *ELFSection = getContext().getELFSection(SectionName, Type, Flags,
+                                                     Size, GroupName, UniqueID);
   getStreamer().SwitchSection(ELFSection, Subsection);
 
   if (getContext().getGenDwarfForAssembly()) {
-    auto &Sections = getContext().getGenDwarfSectionSyms();
-    auto InsertResult = Sections.insert(
-        std::make_pair(ELFSection, std::make_pair(nullptr, nullptr)));
-    if (InsertResult.second) {
+    bool InsertResult = getContext().addGenDwarfSection(ELFSection);
+    if (InsertResult) {
       if (getContext().getDwarfVersion() <= 2)
         Warning(loc, "DWARF2 only supports one section per compilation unit");
 
-      MCSymbol *SectionStartSymbol = getContext().CreateTempSymbol();
+      MCSymbol *SectionStartSymbol = getContext().createTempSymbol();
       getStreamer().EmitLabel(SectionStartSymbol);
-      InsertResult.first->second.first = SectionStartSymbol;
+      ELFSection->setBeginSymbol(SectionStartSymbol);
     }
   }
 
@@ -600,7 +580,7 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
     return TokError("expected identifier in directive");
 
   // Handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
   // NOTE the comma is optional in all cases.  It is only documented as being
   // optional in the first case, however, GAS will silently treat the comma as
@@ -679,8 +659,8 @@ bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
   if (AliasName.find('@') == StringRef::npos)
     return TokError("expected a '@' in the name");
 
-  MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
   const MCExpr *Value = MCSymbolRefExpr::Create(Sym, getContext());
 
   getStreamer().EmitAssignment(Alias, Value);
@@ -697,9 +677,7 @@ bool ELFAsmParser::ParseDirectiveVersion(StringRef, SMLoc) {
 
   Lex();
 
-  const MCSection *Note =
-    getContext().getELFSection(".note", ELF::SHT_NOTE, 0,
-                               SectionKind::getReadOnly());
+  MCSection *Note = getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
 
   getStreamer().PushSection();
   getStreamer().SwitchSection(Note);
@@ -731,9 +709,9 @@ bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
   if (getParser().parseIdentifier(Name))
     return TokError("expected identifier in directive");
 
-  MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
+  MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName);
 
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
   getStreamer().EmitWeakReference(Alias, Sym);
   return false;
diff --git a/contrib/llvm/lib/MC/MCSection.cpp b/contrib/llvm/lib/MC/MCSection.cpp
index ccf4a7d..04f932b 100644
--- a/contrib/llvm/lib/MC/MCSection.cpp
+++ b/contrib/llvm/lib/MC/MCSection.cpp
@@ -8,8 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -17,6 +19,90 @@ using namespace llvm;
 // MCSection
 //===----------------------------------------------------------------------===//
 
+MCSection::MCSection(SectionVariant V, SectionKind K, MCSymbol *Begin)
+    : Begin(Begin), HasInstructions(false), Variant(V), Kind(K) {}
+
+MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) {
+  if (!End)
+    End = Ctx.createTempSymbol("sec_end", true);
+  return End;
+}
+
+bool MCSection::hasEnded() const { return End && End->isInSection(); }
+
 MCSection::~MCSection() {
 }
 
+void MCSection::setBundleLockState(BundleLockStateType NewState) {
+  if (NewState == NotBundleLocked) {
+    if (BundleLockNestingDepth == 0) {
+      report_fatal_error("Mismatched bundle_lock/unlock directives");
+    }
+    if (--BundleLockNestingDepth == 0) {
+      BundleLockState = NotBundleLocked;
+    }
+    return;
+  }
+
+  // If any of the directives is an align_to_end directive, the whole nested
+  // group is align_to_end. So don't downgrade from align_to_end to just locked.
+  if (BundleLockState != BundleLockedAlignToEnd) {
+    BundleLockState = NewState;
+  }
+  ++BundleLockNestingDepth;
+}
+
+MCSection::iterator
+MCSection::getSubsectionInsertionPoint(unsigned Subsection) {
+  if (Subsection == 0 && SubsectionFragmentMap.empty())
+    return end();
+
+  SmallVectorImpl<std::pair<unsigned, MCFragment *>>::iterator MI =
+      std::lower_bound(SubsectionFragmentMap.begin(),
+                       SubsectionFragmentMap.end(),
+                       std::make_pair(Subsection, (MCFragment *)nullptr));
+  bool ExactMatch = false;
+  if (MI != SubsectionFragmentMap.end()) {
+    ExactMatch = MI->first == Subsection;
+    if (ExactMatch)
+      ++MI;
+  }
+  iterator IP;
+  if (MI == SubsectionFragmentMap.end())
+    IP = end();
+  else
+    IP = MI->second;
+  if (!ExactMatch && Subsection != 0) {
+    // The GNU as documentation claims that subsections have an alignment of 4,
+    // although this appears not to be the case.
+    MCFragment *F = new MCDataFragment();
+    SubsectionFragmentMap.insert(MI, std::make_pair(Subsection, F));
+    getFragmentList().insert(IP, F);
+    F->setParent(this);
+  }
+
+  return IP;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MCSection::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCSection";
+  OS << " Fragments:[\n      ";
+  for (auto it = begin(), ie = end(); it != ie; ++it) {
+    if (it != begin())
+      OS << ",\n      ";
+    it->dump();
+  }
+  OS << "]>";
+}
+#endif
+
+MCSection::iterator MCSection::begin() { return Fragments.begin(); }
+
+MCSection::iterator MCSection::end() { return Fragments.end(); }
+
+MCSection::reverse_iterator MCSection::rbegin() { return Fragments.rbegin(); }
+
+MCSection::reverse_iterator MCSection::rend() { return Fragments.rend(); }
diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp
index a29bb97..3cd8453 100644
--- a/contrib/llvm/lib/MC/MCSectionELF.cpp
+++ b/contrib/llvm/lib/MC/MCSectionELF.cpp
@@ -24,6 +24,9 @@ MCSectionELF::~MCSectionELF() {} // anchor.
 bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
                                               const MCAsmInfo &MAI) const {
 
+  if (isUnique())
+    return false;
+
   // FIXME: Does .section .bss/.data/.text work everywhere??
   if (Name == ".text" || Name == ".data" ||
       (Name == ".bss" && !MAI.usesELFSectionDirectiveForBSS()))
@@ -144,6 +147,10 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
     printName(OS, Group->getName());
     OS << ",comdat";
   }
+
+  if (isUnique())
+    OS << ",unique," << UniqueID;
+
   OS << '\n';
 
   if (Subsection)
@@ -157,13 +164,3 @@ bool MCSectionELF::UseCodeAlign() const {
 bool MCSectionELF::isVirtualSection() const {
   return getType() == ELF::SHT_NOBITS;
 }
-
-unsigned MCSectionELF::DetermineEntrySize(SectionKind Kind) {
-  if (Kind.isMergeable1ByteCString()) return 1;
-  if (Kind.isMergeable2ByteCString()) return 2;
-  if (Kind.isMergeable4ByteCString()) return 4;
-  if (Kind.isMergeableConst4())       return 4;
-  if (Kind.isMergeableConst8())       return 8;
-  if (Kind.isMergeableConst16())      return 16;
-  return 0;
-}
diff --git a/contrib/llvm/lib/MC/MCSectionMachO.cpp b/contrib/llvm/lib/MC/MCSectionMachO.cpp
index 46beda4..c9f1591 100644
--- a/contrib/llvm/lib/MC/MCSectionMachO.cpp
+++ b/contrib/llvm/lib/MC/MCSectionMachO.cpp
@@ -70,8 +70,10 @@ ENTRY(nullptr /*FIXME*/,     S_ATTR_LOC_RELOC)
 };
 
 MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
-                               unsigned TAA, unsigned reserved2, SectionKind K)
-  : MCSection(SV_MachO, K), TypeAndAttributes(TAA), Reserved2(reserved2) {
+                               unsigned TAA, unsigned reserved2, SectionKind K,
+                               MCSymbol *Begin)
+    : MCSection(SV_MachO, K, Begin), TypeAndAttributes(TAA),
+      Reserved2(reserved2) {
   assert(Segment.size() <= 16 && Section.size() <= 16 &&
          "Segment or section string too long");
   for (unsigned i = 0; i != 16; ++i) {
diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp
index f11ee66..9e0cc6b 100644
--- a/contrib/llvm/lib/MC/MCStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCStreamer.cpp
@@ -14,8 +14,10 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCWin64EH.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -144,7 +146,7 @@ void MCStreamer::EmitZeros(uint64_t NumBytes) {
 unsigned MCStreamer::EmitDwarfFileDirective(unsigned FileNo,
                                             StringRef Directory,
                                             StringRef Filename, unsigned CUID) {
-  return getContext().GetDwarfFile(Directory, Filename, FileNo, CUID);
+  return getContext().getDwarfFile(Directory, Filename, FileNo, CUID);
 }
 
 void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -161,7 +163,7 @@ MCSymbol *MCStreamer::getDwarfLineTableSymbol(unsigned CUID) {
   if (!Table.getLabel()) {
     StringRef Prefix = Context.getAsmInfo()->getPrivateGlobalPrefix();
     Table.setLabel(
-        Context.GetOrCreateSymbol(Prefix + "line_table_start" + Twine(CUID)));
+        Context.getOrCreateSymbol(Prefix + "line_table_start" + Twine(CUID)));
   }
   return Table.getLabel();
 }
@@ -186,7 +188,7 @@ void MCStreamer::InitSections(bool NoExecStack) {
   SwitchSection(getContext().getObjectFileInfo()->getTextSection());
 }
 
-void MCStreamer::AssignSection(MCSymbol *Symbol, const MCSection *Section) {
+void MCStreamer::AssignSection(MCSymbol *Symbol, MCSection *Section) {
   if (Section)
     Symbol->setSection(*Section);
   else
@@ -250,7 +252,7 @@ void MCStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
 
 MCSymbol *MCStreamer::EmitCFICommon() {
   EnsureValidDwarfFrame();
-  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCSymbol *Label = getContext().createTempSymbol();
   EmitLabel(Label);
   return Label;
 }
@@ -397,7 +399,7 @@ void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol) {
   if (CurrentWinFrameInfo && !CurrentWinFrameInfo->End)
     report_fatal_error("Starting a function before ending the previous one!");
 
-  MCSymbol *StartProc = getContext().CreateTempSymbol();
+  MCSymbol *StartProc = getContext().createTempSymbol();
   EmitLabel(StartProc);
 
   WinFrameInfos.push_back(new WinEH::FrameInfo(Symbol, StartProc));
@@ -409,7 +411,7 @@ void MCStreamer::EmitWinCFIEndProc() {
   if (CurrentWinFrameInfo->ChainedParent)
     report_fatal_error("Not all chained regions terminated!");
 
-  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCSymbol *Label = getContext().createTempSymbol();
   EmitLabel(Label);
   CurrentWinFrameInfo->End = Label;
 }
@@ -417,7 +419,7 @@ void MCStreamer::EmitWinCFIEndProc() {
 void MCStreamer::EmitWinCFIStartChained() {
   EnsureValidWinFrameInfo();
 
-  MCSymbol *StartProc = getContext().CreateTempSymbol();
+  MCSymbol *StartProc = getContext().createTempSymbol();
   EmitLabel(StartProc);
 
   WinFrameInfos.push_back(new WinEH::FrameInfo(CurrentWinFrameInfo->Function,
@@ -430,7 +432,7 @@ void MCStreamer::EmitWinCFIEndChained() {
   if (!CurrentWinFrameInfo->ChainedParent)
     report_fatal_error("End of a chained region outside a chained region!");
 
-  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCSymbol *Label = getContext().createTempSymbol();
   EmitLabel(Label);
 
   CurrentWinFrameInfo->End = Label;
@@ -461,7 +463,7 @@ void MCStreamer::EmitWinEHHandlerData() {
 void MCStreamer::EmitWinCFIPushReg(unsigned Register) {
   EnsureValidWinFrameInfo();
 
-  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCSymbol *Label = getContext().createTempSymbol();
   EmitLabel(Label);
 
   WinEH::Instruction Inst = Win64EH::Instruction::PushNonVol(Label, Register);
@@ -477,7 +479,7 @@ void MCStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset) {
   if (Offset > 240)
     report_fatal_error("Frame offset must be less than or equal to 240!");
 
-  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCSymbol *Label = getContext().createTempSymbol();
   EmitLabel(Label);
 
   WinEH::Instruction Inst =
@@ -493,7 +495,7 @@ void MCStreamer::EmitWinCFIAllocStack(unsigned Size) {
   if (Size & 7)
     report_fatal_error("Misaligned stack allocation!");
 
-  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCSymbol *Label = getContext().createTempSymbol();
   EmitLabel(Label);
 
   WinEH::Instruction Inst = Win64EH::Instruction::Alloc(Label, Size);
@@ -505,7 +507,7 @@ void MCStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset) {
   if (Offset & 7)
     report_fatal_error("Misaligned saved register offset!");
 
-  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCSymbol *Label = getContext().createTempSymbol();
   EmitLabel(Label);
 
   WinEH::Instruction Inst =
@@ -518,7 +520,7 @@ void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset) {
   if (Offset & 0x0F)
     report_fatal_error("Misaligned saved vector register offset!");
 
-  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCSymbol *Label = getContext().createTempSymbol();
   EmitLabel(Label);
 
   WinEH::Instruction Inst =
@@ -531,7 +533,7 @@ void MCStreamer::EmitWinCFIPushFrame(bool Code) {
   if (CurrentWinFrameInfo->Instructions.size() > 0)
     report_fatal_error("If present, PushMachFrame must be the first UOP");
 
-  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCSymbol *Label = getContext().createTempSymbol();
   EmitLabel(Label);
 
   WinEH::Instruction Inst = Win64EH::Instruction::PushMachFrame(Label, Code);
@@ -541,7 +543,7 @@ void MCStreamer::EmitWinCFIPushFrame(bool Code) {
 void MCStreamer::EmitWinCFIEndProlog() {
   EnsureValidWinFrameInfo();
 
-  MCSymbol *Label = getContext().CreateTempSymbol();
+  MCSymbol *Label = getContext().createTempSymbol();
   EmitLabel(Label);
 
   CurrentWinFrameInfo->PrologEnd = Label;
@@ -638,9 +640,9 @@ void MCStreamer::EmitCOFFSymbolType(int Type) {}
 void MCStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
 void MCStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                        unsigned ByteAlignment) {}
-void MCStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+void MCStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
                                 uint64_t Size, unsigned ByteAlignment) {}
-void MCStreamer::ChangeSection(const MCSection *, const MCExpr *) {}
+void MCStreamer::ChangeSection(MCSection *, const MCExpr *) {}
 void MCStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {}
 void MCStreamer::EmitBytes(StringRef Data) {}
 void MCStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
@@ -661,3 +663,29 @@ void MCStreamer::EmitBundleAlignMode(unsigned AlignPow2) {}
 void MCStreamer::EmitBundleLock(bool AlignToEnd) {}
 void MCStreamer::FinishImpl() {}
 void MCStreamer::EmitBundleUnlock() {}
+
+void MCStreamer::SwitchSection(MCSection *Section, const MCExpr *Subsection) {
+  assert(Section && "Cannot switch to a null section!");
+  MCSectionSubPair curSection = SectionStack.back().first;
+  SectionStack.back().second = curSection;
+  if (MCSectionSubPair(Section, Subsection) != curSection) {
+    SectionStack.back().first = MCSectionSubPair(Section, Subsection);
+    assert(!Section->hasEnded() && "Section already ended");
+    ChangeSection(Section, Subsection);
+    MCSymbol *Sym = Section->getBeginSymbol();
+    if (Sym && !Sym->isInSection())
+      EmitLabel(Sym);
+  }
+}
+
+MCSymbol *MCStreamer::endSection(MCSection *Section) {
+  // TODO: keep track of the last subsection so that this symbol appears in the
+  // correct place.
+  MCSymbol *Sym = Section->getEndSymbol(Context);
+  if (Sym->isInSection())
+    return Sym;
+
+  SwitchSection(Section);
+  EmitLabel(Sym);
+  return Sym;
+}
diff --git a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
index b8e42bd..6abdd3a 100644
--- a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
+++ b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp
@@ -35,7 +35,7 @@ MCSubtargetInfo::InitCPUSchedModel(StringRef CPU) {
 }
 
 void
-MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
+MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef C, StringRef FS,
                                      ArrayRef<SubtargetFeatureKV> PF,
                                      ArrayRef<SubtargetFeatureKV> PD,
                                      const SubtargetInfoKV *ProcSched,
@@ -46,6 +46,7 @@ MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
                                      const unsigned *OC,
                                      const unsigned *FP) {
   TargetTriple = TT;
+  CPU = C;
   ProcFeatures = PF;
   ProcDesc = PD;
   ProcSchedModels = ProcSched;
@@ -62,14 +63,19 @@ MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
 
 /// ToggleFeature - Toggle a feature and returns the re-computed feature
 /// bits. This version does not change the implied bits.
-uint64_t MCSubtargetInfo::ToggleFeature(uint64_t FB) {
+FeatureBitset MCSubtargetInfo::ToggleFeature(uint64_t FB) {
+  FeatureBits.flip(FB);
+  return FeatureBits;
+}
+
+FeatureBitset MCSubtargetInfo::ToggleFeature(const FeatureBitset &FB) {
   FeatureBits ^= FB;
   return FeatureBits;
 }
 
 /// ToggleFeature - Toggle a feature and returns the re-computed feature
 /// bits. This version will also change all implied bits.
-uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) {
+FeatureBitset MCSubtargetInfo::ToggleFeature(StringRef FS) {
   SubtargetFeatures Features;
   FeatureBits = Features.ToggleFeature(FeatureBits, FS, ProcFeatures);
   return FeatureBits;
@@ -92,9 +98,10 @@ MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
   const SubtargetInfoKV *Found =
     std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, CPU);
   if (Found == ProcSchedModels+NumProcs || StringRef(Found->Key) != CPU) {
-    errs() << "'" << CPU
-           << "' is not a recognized processor for this target"
-           << " (ignoring processor)\n";
+    if (CPU != "help") // Don't error if the user asked for help.
+      errs() << "'" << CPU
+             << "' is not a recognized processor for this target"
+             << " (ignoring processor)\n";
     return MCSchedModel::GetDefaultSchedModel();
   }
   assert(Found->Value && "Missing processor SchedModel value");
diff --git a/contrib/llvm/lib/MC/MCSymbol.cpp b/contrib/llvm/lib/MC/MCSymbol.cpp
index 2416525..ddc3814 100644
--- a/contrib/llvm/lib/MC/MCSymbol.cpp
+++ b/contrib/llvm/lib/MC/MCSymbol.cpp
@@ -14,8 +14,7 @@
 using namespace llvm;
 
 // Sentinel value for the absolute pseudo section.
-const MCSection *MCSymbol::AbsolutePseudoSection =
-  reinterpret_cast<const MCSection *>(1);
+MCSection *MCSymbol::AbsolutePseudoSection = reinterpret_cast<MCSection *>(1);
 
 static bool isAcceptableChar(char C) {
   if ((C < 'a' || C > 'z') &&
@@ -39,29 +38,11 @@ static bool NameNeedsQuoting(StringRef Str) {
   return false;
 }
 
-const MCSymbol &MCSymbol::AliasedSymbol() const {
-  const MCSymbol *S = this;
-  while (S->isVariable()) {
-    const MCExpr *Value = S->getVariableValue();
-    if (Value->getKind() != MCExpr::SymbolRef)
-      return *S;
-    const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Value);
-    S = &Ref->getSymbol();
-  }
-  return *S;
-}
-
 void MCSymbol::setVariableValue(const MCExpr *Value) {
   assert(!IsUsed && "Cannot set a variable that has already been used.");
   assert(Value && "Invalid variable value!");
   this->Value = Value;
-
-  // Variables should always be marked as in the same "section" as the value.
-  const MCSection *Section = Value->FindAssociatedSection();
-  if (Section)
-    setSection(*Section);
-  else
-    setUndefined();
+  this->Section = nullptr;
 }
 
 void MCSymbol::print(raw_ostream &OS) const {
@@ -69,6 +50,10 @@ void MCSymbol::print(raw_ostream &OS) const {
   // some targets support quoting names with funny characters.  If the name
   // contains a funny character, then print it quoted.
   StringRef Name = getName();
+  if (Name.empty()) {
+    OS << "\"\"";
+    return;
+  }
   if (!NameNeedsQuoting(Name)) {
     OS << Name;
     return;
@@ -88,7 +73,5 @@ void MCSymbol::print(raw_ostream &OS) const {
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void MCSymbol::dump() const {
-  print(dbgs());
-}
+void MCSymbol::dump() const { dbgs() << *this; }
 #endif
diff --git a/contrib/llvm/lib/MC/MCValue.cpp b/contrib/llvm/lib/MC/MCValue.cpp
index 9dfc56e..495a2b6 100644
--- a/contrib/llvm/lib/MC/MCValue.cpp
+++ b/contrib/llvm/lib/MC/MCValue.cpp
@@ -15,7 +15,7 @@
 
 using namespace llvm;
 
-void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+void MCValue::print(raw_ostream &OS) const {
   if (isAbsolute()) {
     OS << getConstant();
     return;
@@ -26,11 +26,11 @@ void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
   if (getRefKind())
     OS << ':' << getRefKind() <<  ':';
 
-  getSymA()->print(OS);
+  OS << *getSymA();
 
   if (getSymB()) {
     OS << " - ";
-    getSymB()->print(OS);
+    OS << *getSymB();
   }
 
   if (getConstant())
@@ -39,7 +39,7 @@ void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MCValue::dump() const {
-  print(dbgs(), nullptr);
+  print(dbgs());
 }
 #endif
 
diff --git a/contrib/llvm/lib/MC/MCWin64EH.cpp b/contrib/llvm/lib/MC/MCWin64EH.cpp
index dfadb3c..f87ea67 100644
--- a/contrib/llvm/lib/MC/MCWin64EH.cpp
+++ b/contrib/llvm/lib/MC/MCWin64EH.cpp
@@ -153,7 +153,7 @@ static void EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
     return;
 
   MCContext &context = streamer.getContext();
-  MCSymbol *Label = context.CreateTempSymbol();
+  MCSymbol *Label = context.createTempSymbol();
 
   streamer.EmitValueToAlignment(4);
   streamer.EmitLabel(Label);
@@ -224,16 +224,14 @@ void UnwindEmitter::Emit(MCStreamer &Streamer) const {
 
   // Emit the unwind info structs first.
   for (const auto &CFI : Streamer.getWinFrameInfos()) {
-    const MCSection *XData =
-        getXDataSection(CFI->Function, Context);
+    MCSection *XData = getXDataSection(CFI->Function, Context);
     Streamer.SwitchSection(XData);
     EmitUnwindInfo(Streamer, CFI);
   }
 
   // Now emit RUNTIME_FUNCTION entries.
   for (const auto &CFI : Streamer.getWinFrameInfos()) {
-    const MCSection *PData =
-        getPDataSection(CFI->Function, Context);
+    MCSection *PData = getPDataSection(CFI->Function, Context);
     Streamer.SwitchSection(PData);
     EmitRuntimeFunction(Streamer, CFI);
   }
@@ -244,8 +242,7 @@ void UnwindEmitter::EmitUnwindInfo(MCStreamer &Streamer,
   // Switch sections (the static function above is meant to be called from
   // here and from Emit().
   MCContext &context = Streamer.getContext();
-  const MCSection *xdataSect =
-    getXDataSection(info->Function, context);
+  MCSection *xdataSect = getXDataSection(info->Function, context);
   Streamer.SwitchSection(xdataSect);
 
   llvm::EmitUnwindInfo(Streamer, info);
diff --git a/contrib/llvm/lib/MC/MCWinEH.cpp b/contrib/llvm/lib/MC/MCWinEH.cpp
index 47eaf0f..d5d9ead 100644
--- a/contrib/llvm/lib/MC/MCWinEH.cpp
+++ b/contrib/llvm/lib/MC/MCWinEH.cpp
@@ -11,6 +11,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCWinEH.h"
 #include "llvm/Support/COFF.h"
@@ -24,9 +25,10 @@ namespace WinEH {
 /// associated with that comdat. If the code described is not in the main .text
 /// section, make a new section for it. Otherwise use the main unwind info
 /// section.
-static const MCSection *getUnwindInfoSection(
-    StringRef SecName, const MCSectionCOFF *UnwindSec, const MCSymbol *Function,
-    MCContext &Context) {
+static MCSection *getUnwindInfoSection(StringRef SecName,
+                                       MCSectionCOFF *UnwindSec,
+                                       const MCSymbol *Function,
+                                       MCContext &Context) {
   if (Function && Function->isInSection()) {
     // If Function is in a COMDAT, get or create an unwind info section in that
     // COMDAT group.
@@ -58,16 +60,16 @@ static const MCSection *getUnwindInfoSection(
 
 }
 
-const MCSection *UnwindEmitter::getPDataSection(const MCSymbol *Function,
-                                                MCContext &Context) {
-  const MCSectionCOFF *PData =
+MCSection *UnwindEmitter::getPDataSection(const MCSymbol *Function,
+                                          MCContext &Context) {
+  MCSectionCOFF *PData =
       cast<MCSectionCOFF>(Context.getObjectFileInfo()->getPDataSection());
   return getUnwindInfoSection(".pdata", PData, Function, Context);
 }
 
-const MCSection *UnwindEmitter::getXDataSection(const MCSymbol *Function,
-                                                MCContext &Context) {
-  const MCSectionCOFF *XData =
+MCSection *UnwindEmitter::getXDataSection(const MCSymbol *Function,
+                                          MCContext &Context) {
+  MCSectionCOFF *XData =
       cast<MCSectionCOFF>(Context.getObjectFileInfo()->getXDataSection());
   return getUnwindInfoSection(".xdata", XData, Function, Context);
 }
diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp
index d3751bd..ce34ba0 100644
--- a/contrib/llvm/lib/MC/MachObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MachO.h"
+#include "llvm/Support/raw_ostream.h"
 #include <vector>
 using namespace llvm;
 
@@ -38,15 +39,14 @@ void MachObjectWriter::reset() {
   MCObjectWriter::reset();
 }
 
-bool MachObjectWriter::
-doesSymbolRequireExternRelocation(const MCSymbolData *SD) {
+bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
   // Undefined symbols are always extern.
-  if (SD->getSymbol().isUndefined())
+  if (S.isUndefined())
     return true;
 
   // References to weak definitions require external relocation entries; the
   // definition may not always be the one in the same object file.
-  if (SD->getFlags() & SF_WeakDefinition)
+  if (S.getData().getFlags() & SF_WeakDefinition)
     return true;
 
   // Otherwise, we can use an internal relocation.
@@ -55,8 +55,7 @@ doesSymbolRequireExternRelocation(const MCSymbolData *SD) {
 
 bool MachObjectWriter::
 MachSymbolData::operator<(const MachSymbolData &RHS) const {
-  return SymbolData->getSymbol().getName() <
-    RHS.SymbolData->getSymbol().getName();
+  return Symbol->getName() < RHS.Symbol->getName();
 }
 
 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
@@ -69,13 +68,11 @@ bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
                                               const MCAsmLayout &Layout) const {
   return getSectionAddress(Fragment->getParent()) +
-    Layout.getFragmentOffset(Fragment);
+         Layout.getFragmentOffset(Fragment);
 }
 
-uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD,
+uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
                                             const MCAsmLayout &Layout) const {
-  const MCSymbol &S = SD->getSymbol();
-
   // If this is a variable, then recursively evaluate now.
   if (S.isVariable()) {
     if (const MCConstantExpr *C =
@@ -98,29 +95,27 @@ uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD,
 
     uint64_t Address = Target.getConstant();
     if (Target.getSymA())
-      Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
-                                    Target.getSymA()->getSymbol()), Layout);
+      Address += getSymbolAddress(Target.getSymA()->getSymbol(), Layout);
     if (Target.getSymB())
-      Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
-                                    Target.getSymB()->getSymbol()), Layout);
+      Address += getSymbolAddress(Target.getSymB()->getSymbol(), Layout);
     return Address;
   }
 
-  return getSectionAddress(SD->getFragment()->getParent()) +
-    Layout.getSymbolOffset(SD);
+  return getSectionAddress(S.getData().getFragment()->getParent()) +
+         Layout.getSymbolOffset(S);
 }
 
-uint64_t MachObjectWriter::getPaddingSize(const MCSectionData *SD,
+uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
                                           const MCAsmLayout &Layout) const {
-  uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
-  unsigned Next = SD->getLayoutOrder() + 1;
+  uint64_t EndAddr = getSectionAddress(Sec) + Layout.getSectionAddressSize(Sec);
+  unsigned Next = Sec->getLayoutOrder() + 1;
   if (Next >= Layout.getSectionOrder().size())
     return 0;
 
-  const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
-  if (NextSD.getSection().isVirtualSection())
+  const MCSection &NextSec = *Layout.getSectionOrder()[Next];
+  if (NextSec.isVirtualSection())
     return 0;
-  return OffsetToAlignment(EndAddr, NextSD.getAlignment());
+  return OffsetToAlignment(EndAddr, NextSec.getAlignment());
 }
 
 void MachObjectWriter::WriteHeader(unsigned NumLoadCommands,
@@ -199,15 +194,15 @@ void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections,
 
 void MachObjectWriter::WriteSection(const MCAssembler &Asm,
                                     const MCAsmLayout &Layout,
-                                    const MCSectionData &SD,
-                                    uint64_t FileOffset,
+                                    const MCSection &Sec, uint64_t FileOffset,
                                     uint64_t RelocationsStart,
                                     unsigned NumRelocations) {
-  uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
+  uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
+  const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
 
   // The offset is unused for virtual sections.
-  if (SD.getSection().isVirtualSection()) {
-    assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
+  if (Section.isVirtualSection()) {
+    assert(Layout.getSectionFileSize(&Sec) == 0 && "Invalid file size!");
     FileOffset = 0;
   }
 
@@ -217,28 +212,27 @@ void MachObjectWriter::WriteSection(const MCAssembler &Asm,
   uint64_t Start = OS.tell();
   (void) Start;
 
-  const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
   WriteBytes(Section.getSectionName(), 16);
   WriteBytes(Section.getSegmentName(), 16);
   if (is64Bit()) {
-    Write64(getSectionAddress(&SD)); // address
+    Write64(getSectionAddress(&Sec)); // address
     Write64(SectionSize); // size
   } else {
-    Write32(getSectionAddress(&SD)); // address
+    Write32(getSectionAddress(&Sec)); // address
     Write32(SectionSize); // size
   }
   Write32(FileOffset);
 
   unsigned Flags = Section.getTypeAndAttributes();
-  if (SD.hasInstructions())
+  if (Section.hasInstructions())
     Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
 
-  assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
-  Write32(Log2_32(SD.getAlignment()));
+  assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!");
+  Write32(Log2_32(Section.getAlignment()));
   Write32(NumRelocations ? RelocationsStart : 0);
   Write32(NumRelocations);
   Write32(Flags);
-  Write32(IndirectSymBase.lookup(&SD)); // reserved1
+  Write32(IndirectSymBase.lookup(&Sec)); // reserved1
   Write32(Section.getStubSize()); // reserved2
   if (is64Bit())
     Write32(0); // reserved3
@@ -306,37 +300,51 @@ void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
 MachObjectWriter::MachSymbolData *
 MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
   for (auto &Entry : LocalSymbolData)
-    if (&Entry.SymbolData->getSymbol() == &Sym)
+    if (Entry.Symbol == &Sym)
       return &Entry;
 
   for (auto &Entry : ExternalSymbolData)
-    if (&Entry.SymbolData->getSymbol() == &Sym)
+    if (Entry.Symbol == &Sym)
       return &Entry;
 
   for (auto &Entry : UndefinedSymbolData)
-    if (&Entry.SymbolData->getSymbol() == &Sym)
+    if (Entry.Symbol == &Sym)
       return &Entry;
 
   return nullptr;
 }
 
+const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
+  const MCSymbol *S = &Sym;
+  while (S->isVariable()) {
+    const MCExpr *Value = S->getVariableValue();
+    const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value);
+    if (!Ref)
+      return *S;
+    S = &Ref->getSymbol();
+  }
+  return *S;
+}
+
 void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
                                   const MCAsmLayout &Layout) {
-  MCSymbolData &Data = *MSD.SymbolData;
-  const MCSymbol *Symbol = &Data.getSymbol();
-  const MCSymbol *AliasedSymbol = &Symbol->AliasedSymbol();
+  const MCSymbol *Symbol = MSD.Symbol;
+  MCSymbolData &Data = Symbol->getData();
+  const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
   uint8_t SectionIndex = MSD.SectionIndex;
   uint8_t Type = 0;
   uint16_t Flags = Data.getFlags();
   uint64_t Address = 0;
   bool IsAlias = Symbol != AliasedSymbol;
 
+  const MCSymbol &OrigSymbol = *Symbol;
   MachSymbolData *AliaseeInfo;
   if (IsAlias) {
     AliaseeInfo = findSymbolData(*AliasedSymbol);
     if (AliaseeInfo)
       SectionIndex = AliaseeInfo->SectionIndex;
     Symbol = AliasedSymbol;
+    // FIXME: Should this update Data as well?  Do we need OrigSymbol at all?
   }
 
   // Set the N_TYPE bits. See <mach-o/nlist.h>.
@@ -364,7 +372,7 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
   if (IsAlias && Symbol->isUndefined())
     Address = AliaseeInfo->StringIndex;
   else if (Symbol->isDefined())
-    Address = getSymbolAddress(&Data, Layout);
+    Address = getSymbolAddress(OrigSymbol, Layout);
   else if (Data.isCommon()) {
     // Common symbols are encoded with the size in the address
     // field, and their alignment in the flags.
@@ -448,14 +456,11 @@ void MachObjectWriter::WriteLinkerOptionsLoadCommand(
   assert(OS.tell() - Start == Size);
 }
 
-
-void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
+void MachObjectWriter::RecordRelocation(MCAssembler &Asm,
                                         const MCAsmLayout &Layout,
                                         const MCFragment *Fragment,
-                                        const MCFixup &Fixup,
-                                        MCValue Target,
-                                        bool &IsPCRel,
-                                        uint64_t &FixedValue) {
+                                        const MCFixup &Fixup, MCValue Target,
+                                        bool &IsPCRel, uint64_t &FixedValue) {
   TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup,
                                        Target, FixedValue);
 }
@@ -472,8 +477,7 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
   // or stub section.
   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
          ie = Asm.indirect_symbol_end(); it != ie; ++it) {
-    const MCSectionMachO &Section =
-      cast<MCSectionMachO>(it->SectionData->getSection());
+    const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
 
     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
         Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
@@ -488,14 +492,13 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
   unsigned IndirectIndex = 0;
   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
-    const MCSectionMachO &Section =
-      cast<MCSectionMachO>(it->SectionData->getSection());
+    const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
 
     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS)
       continue;
 
     // Initialize the section indirect symbol base, if necessary.
-    IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex));
+    IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
 
     Asm.getOrCreateSymbolData(*it->Symbol);
   }
@@ -504,15 +507,14 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
   IndirectIndex = 0;
   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
-    const MCSectionMachO &Section =
-      cast<MCSectionMachO>(it->SectionData->getSection());
+    const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
 
     if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
         Section.getType() != MachO::S_SYMBOL_STUBS)
       continue;
 
     // Initialize the section indirect symbol base, if necessary.
-    IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex));
+    IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
 
     // Set the symbol type to undefined lazy, but only on construction.
     //
@@ -534,12 +536,11 @@ void MachObjectWriter::ComputeSymbolTable(
   unsigned Index = 1;
   for (MCAssembler::iterator it = Asm.begin(),
          ie = Asm.end(); it != ie; ++it, ++Index)
-    SectionIndexMap[&it->getSection()] = Index;
+    SectionIndexMap[&*it] = Index;
   assert(Index <= 256 && "Too many sections!");
 
   // Build the string table.
-  for (MCSymbolData &SD : Asm.symbols()) {
-    const MCSymbol &Symbol = SD.getSymbol();
+  for (const MCSymbol &Symbol : Asm.symbols()) {
     if (!Asm.isSymbolLinkerVisible(Symbol))
       continue;
 
@@ -552,8 +553,8 @@ void MachObjectWriter::ComputeSymbolTable(
   // The particular order that we collect and then sort the symbols is chosen to
   // match 'as'. Even though it doesn't matter for correctness, this is
   // important for letting us diff .o files.
-  for (MCSymbolData &SD : Asm.symbols()) {
-    const MCSymbol &Symbol = SD.getSymbol();
+  for (const MCSymbol &Symbol : Asm.symbols()) {
+    MCSymbolData &SD = Symbol.getData();
 
     // Ignore non-linker visible symbols.
     if (!Asm.isSymbolLinkerVisible(Symbol))
@@ -563,7 +564,7 @@ void MachObjectWriter::ComputeSymbolTable(
       continue;
 
     MachSymbolData MSD;
-    MSD.SymbolData = &SD;
+    MSD.Symbol = &Symbol;
     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
 
     if (Symbol.isUndefined()) {
@@ -580,8 +581,8 @@ void MachObjectWriter::ComputeSymbolTable(
   }
 
   // Now add the data for local symbols.
-  for (MCSymbolData &SD : Asm.symbols()) {
-    const MCSymbol &Symbol = SD.getSymbol();
+  for (const MCSymbol &Symbol : Asm.symbols()) {
+    MCSymbolData &SD = Symbol.getData();
 
     // Ignore non-linker visible symbols.
     if (!Asm.isSymbolLinkerVisible(Symbol))
@@ -591,7 +592,7 @@ void MachObjectWriter::ComputeSymbolTable(
       continue;
 
     MachSymbolData MSD;
-    MSD.SymbolData = &SD;
+    MSD.Symbol = &Symbol;
     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
 
     if (Symbol.isAbsolute()) {
@@ -611,44 +612,43 @@ void MachObjectWriter::ComputeSymbolTable(
   // Set the symbol indices.
   Index = 0;
   for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
-    LocalSymbolData[i].SymbolData->setIndex(Index++);
+    LocalSymbolData[i].Symbol->setIndex(Index++);
   for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
-    ExternalSymbolData[i].SymbolData->setIndex(Index++);
+    ExternalSymbolData[i].Symbol->setIndex(Index++);
   for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
-    UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+    UndefinedSymbolData[i].Symbol->setIndex(Index++);
+
+  for (const MCSection &Section : Asm) {
+    std::vector<RelAndSymbol> &Relocs = Relocations[&Section];
+    for (RelAndSymbol &Rel : Relocs) {
+      if (!Rel.Sym)
+        continue;
+
+      // Set the Index and the IsExtern bit.
+      unsigned Index = Rel.Sym->getIndex();
+      assert(isInt<24>(Index));
+      if (IsLittleEndian)
+        Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
+      else
+        Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
+    }
+  }
 }
 
 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
                                                const MCAsmLayout &Layout) {
   uint64_t StartAddress = 0;
-  const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
+  const SmallVectorImpl<MCSection *> &Order = Layout.getSectionOrder();
   for (int i = 0, n = Order.size(); i != n ; ++i) {
-    const MCSectionData *SD = Order[i];
-    StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
-    SectionAddress[SD] = StartAddress;
-    StartAddress += Layout.getSectionAddressSize(SD);
+    const MCSection *Sec = Order[i];
+    StartAddress = RoundUpToAlignment(StartAddress, Sec->getAlignment());
+    SectionAddress[Sec] = StartAddress;
+    StartAddress += Layout.getSectionAddressSize(Sec);
 
     // Explicitly pad the section to match the alignment requirements of the
     // following one. This is for 'gas' compatibility, it shouldn't
     /// strictly be necessary.
-    StartAddress += getPaddingSize(SD, Layout);
-  }
-}
-
-void MachObjectWriter::markAbsoluteVariableSymbols(MCAssembler &Asm,
-                                                   const MCAsmLayout &Layout) {
-  for (MCSymbolData &SD : Asm.symbols()) {
-    if (!SD.getSymbol().isVariable())
-      continue;
-
-    // Is the variable is a symbol difference (SA - SB + C) expression,
-    // and neither symbol is external, mark the variable as absolute.
-    const MCExpr *Expr = SD.getSymbol().getVariableValue();
-    MCValue Value;
-    if (Expr->EvaluateAsRelocatable(Value, &Layout, nullptr)) {
-      if (Value.getSymA() && Value.getSymB())
-        const_cast<MCSymbol*>(&SD.getSymbol())->setAbsolute();
-    }
+    StartAddress += getPaddingSize(Sec, Layout);
   }
 }
 
@@ -658,22 +658,11 @@ void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
 
   // Create symbol data for any indirect symbols.
   BindIndirectSymbols(Asm);
-
-  // Mark symbol difference expressions in variables (from .set or = directives)
-  // as absolute.
-  markAbsoluteVariableSymbols(Asm, Layout);
-
-  // Compute symbol table information and bind symbol indices.
-  ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
-                     UndefinedSymbolData);
 }
 
-bool MachObjectWriter::
-IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                       const MCSymbolData &DataA,
-                                       const MCFragment &FB,
-                                       bool InSet,
-                                       bool IsPCRel) const {
+bool MachObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+    const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
+    bool InSet, bool IsPCRel) const {
   if (InSet)
     return true;
 
@@ -682,11 +671,9 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
   //   - addr(atom(B)) - offset(B)
   // and the offsets are not relocatable, so the fixup is fully resolved when
   //  addr(atom(A)) - addr(atom(B)) == 0.
-  const MCSymbolData *A_Base = nullptr, *B_Base = nullptr;
-
-  const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
+  const MCSymbol &SA = findAliasedSymbol(SymA);
   const MCSection &SecA = SA.getSection();
-  const MCSection &SecB = FB.getParent()->getSection();
+  const MCSection &SecB = *FB.getParent();
 
   if (IsPCRel) {
     // The simple (Darwin, except on x86_64) way of dealing with this was to
@@ -736,11 +723,8 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
   if (!FA)
     return false;
 
-  A_Base = FA->getAtom();
-  B_Base = FB.getAtom();
-
   // If the atoms are the same, they are guaranteed to have the same address.
-  if (A_Base == B_Base)
+  if (FA->getAtom() == FB.getAtom())
     return true;
 
   // Otherwise, we can't prove this is fully resolved.
@@ -749,6 +733,10 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
 
 void MachObjectWriter::WriteObject(MCAssembler &Asm,
                                    const MCAsmLayout &Layout) {
+  // Compute symbol table information and bind symbol indices.
+  ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
+                     UndefinedSymbolData);
+
   unsigned NumSections = Asm.size();
   const MCAssembler::VersionMinInfoType &VersionInfo =
     Layout.getAssembler().getVersionMinInfo();
@@ -808,15 +796,15 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
   uint64_t VMSize = 0;
   for (MCAssembler::const_iterator it = Asm.begin(),
          ie = Asm.end(); it != ie; ++it) {
-    const MCSectionData &SD = *it;
-    uint64_t Address = getSectionAddress(&SD);
-    uint64_t Size = Layout.getSectionAddressSize(&SD);
-    uint64_t FileSize = Layout.getSectionFileSize(&SD);
-    FileSize += getPaddingSize(&SD, Layout);
+    const MCSection &Sec = *it;
+    uint64_t Address = getSectionAddress(&Sec);
+    uint64_t Size = Layout.getSectionAddressSize(&Sec);
+    uint64_t FileSize = Layout.getSectionFileSize(&Sec);
+    FileSize += getPaddingSize(&Sec, Layout);
 
     VMSize = std::max(VMSize, Address + Size);
 
-    if (SD.getSection().isVirtualSection())
+    if (it->isVirtualSection())
       continue;
 
     SectionDataSize = std::max(SectionDataSize, Address + Size);
@@ -839,9 +827,9 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
   uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
   for (MCAssembler::const_iterator it = Asm.begin(),
          ie = Asm.end(); it != ie; ++it) {
-    std::vector<MachO::any_relocation_info> &Relocs = Relocations[it];
+    std::vector<RelAndSymbol> &Relocs = Relocations[&*it];
     unsigned NumRelocs = Relocs.size();
-    uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
+    uint64_t SectionStart = SectionDataStart + getSectionAddress(&*it);
     WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
     RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
   }
@@ -918,11 +906,11 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
   // Write the actual section data.
   for (MCAssembler::const_iterator it = Asm.begin(),
          ie = Asm.end(); it != ie; ++it) {
-    Asm.writeSectionData(it, Layout);
+    MCSection &Sec = *it;
+    Asm.writeSectionData(&Sec, Layout);
 
-    uint64_t Pad = getPaddingSize(it, Layout);
-    for (unsigned int i = 0; i < Pad; ++i)
-      Write8(0);
+    uint64_t Pad = getPaddingSize(&Sec, Layout);
+    WriteZeros(Pad);
   }
 
   // Write the extra padding.
@@ -933,10 +921,10 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
          ie = Asm.end(); it != ie; ++it) {
     // Write the section relocation entries, in reverse order to match 'as'
     // (approximately, the exact algorithm is more complicated than this).
-    std::vector<MachO::any_relocation_info> &Relocs = Relocations[it];
+    std::vector<RelAndSymbol> &Relocs = Relocations[&*it];
     for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
-      Write32(Relocs[e - i - 1].r_word0);
-      Write32(Relocs[e - i - 1].r_word1);
+      Write32(Relocs[e - i - 1].MRE.r_word0);
+      Write32(Relocs[e - i - 1].MRE.r_word1);
     }
   }
 
@@ -945,12 +933,8 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
          it = Asm.data_region_begin(), ie = Asm.data_region_end();
          it != ie; ++it) {
     const DataRegionData *Data = &(*it);
-    uint64_t Start =
-      getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->Start),
-                       Layout);
-    uint64_t End =
-      getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->End),
-                       Layout);
+    uint64_t Start = getSymbolAddress(*Data->Start, Layout);
+    uint64_t End = getSymbolAddress(*Data->End, Layout);
     DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
                  << "  start: " << Start << "(" << Data->Start->getName() << ")"
                  << "  end: " << End << "(" << Data->End->getName() << ")"
@@ -981,7 +965,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
       // Indirect symbols in the non-lazy symbol pointer section have some
       // special handling.
       const MCSectionMachO &Section =
-        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+          static_cast<const MCSectionMachO &>(*it->Section);
       if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) {
         // If this symbol is defined and internal, mark it as such.
         if (it->Symbol->isDefined() &&
@@ -994,7 +978,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
         }
       }
 
-      Write32(Asm.getSymbolData(*it->Symbol).getIndex());
+      Write32(it->Symbol->getIndex());
     }
 
     // FIXME: Check that offsets match computed ones.
@@ -1013,7 +997,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
 }
 
 MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
-                                             raw_ostream &OS,
+                                             raw_pwrite_stream &OS,
                                              bool IsLittleEndian) {
   return new MachObjectWriter(MOTW, OS, IsLittleEndian);
 }
diff --git a/contrib/llvm/lib/MC/SubtargetFeature.cpp b/contrib/llvm/lib/MC/SubtargetFeature.cpp
index 587be54..78006e0 100644
--- a/contrib/llvm/lib/MC/SubtargetFeature.cpp
+++ b/contrib/llvm/lib/MC/SubtargetFeature.cpp
@@ -81,11 +81,12 @@ static std::string Join(const std::vector<std::string> &V) {
 }
 
 /// Adding features.
-void SubtargetFeatures::AddFeature(StringRef String) {
-  // Don't add empty features or features we already have.
+void SubtargetFeatures::AddFeature(StringRef String, bool Enable) {
+  // Don't add empty features.
   if (!String.empty())
     // Convert to lowercase, prepend flag if we don't already have a flag.
-    Features.push_back(hasFlag(String) ? String.str() : "+" + String.lower());
+    Features.push_back(hasFlag(String) ? String.lower()
+                                       : (Enable ? "+" : "-") + String.lower());
 }
 
 /// Find KV in array using binary search.
@@ -150,12 +151,12 @@ std::string SubtargetFeatures::getString() const {
 /// feature, set it.
 ///
 static
-void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+void SetImpliedBits(FeatureBitset &Bits, const SubtargetFeatureKV *FeatureEntry,
                     ArrayRef<SubtargetFeatureKV> FeatureTable) {
   for (auto &FE : FeatureTable) {
     if (FeatureEntry->Value == FE.Value) continue;
 
-    if (FeatureEntry->Implies & FE.Value) {
+    if ((FeatureEntry->Implies & FE.Value).any()) {
       Bits |= FE.Value;
       SetImpliedBits(Bits, &FE, FeatureTable);
     }
@@ -166,12 +167,13 @@ void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
 /// feature, clear it.
 ///
 static
-void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+void ClearImpliedBits(FeatureBitset &Bits, 
+                      const SubtargetFeatureKV *FeatureEntry,
                       ArrayRef<SubtargetFeatureKV> FeatureTable) {
   for (auto &FE : FeatureTable) {
     if (FeatureEntry->Value == FE.Value) continue;
 
-    if (FE.Implies & FeatureEntry->Value) {
+    if ((FE.Implies & FeatureEntry->Value).any()) {
       Bits &= ~FE.Value;
       ClearImpliedBits(Bits, &FE, FeatureTable);
     }
@@ -180,8 +182,8 @@ void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
 
 /// ToggleFeature - Toggle a feature and returns the newly updated feature
 /// bits.
-uint64_t
-SubtargetFeatures::ToggleFeature(uint64_t Bits, StringRef Feature,
+FeatureBitset
+SubtargetFeatures::ToggleFeature(FeatureBitset Bits, StringRef Feature,
                                  ArrayRef<SubtargetFeatureKV> FeatureTable) {
 
   // Find feature in table.
@@ -191,7 +193,6 @@ SubtargetFeatures::ToggleFeature(uint64_t Bits, StringRef Feature,
   if (FeatureEntry) {
     if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
       Bits &= ~FeatureEntry->Value;
-
       // For each feature that implies this, clear it.
       ClearImpliedBits(Bits, FeatureEntry, FeatureTable);
     } else {
@@ -212,13 +213,13 @@ SubtargetFeatures::ToggleFeature(uint64_t Bits, StringRef Feature,
 
 /// getFeatureBits - Get feature bits a CPU.
 ///
-uint64_t
+FeatureBitset
 SubtargetFeatures::getFeatureBits(StringRef CPU,
                                   ArrayRef<SubtargetFeatureKV> CPUTable,
                                   ArrayRef<SubtargetFeatureKV> FeatureTable) {
 
   if (CPUTable.empty() || FeatureTable.empty())
-    return 0;
+    return FeatureBitset();
 
 #ifndef NDEBUG
   for (size_t i = 1, e = CPUTable.size(); i != e; ++i) {
@@ -230,7 +231,8 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
           "CPU features table is not sorted");
   }
 #endif
-  uint64_t Bits = 0;                    // Resulting bits
+  // Resulting bits
+  FeatureBitset Bits;
 
   // Check if help is needed
   if (CPU == "help")
@@ -247,7 +249,7 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
 
       // Set the feature implied by this CPU feature, if any.
       for (auto &FE : FeatureTable) {
-        if (CPUEntry->Value & FE.Value)
+        if ((CPUEntry->Value & FE.Value).any())
           SetImpliedBits(Bits, &FE, FeatureTable);
       }
     } else {
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
index ec0e0f7..c945085 100644
--- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -50,7 +50,7 @@ enum AuxiliaryType {
 };
 
 struct AuxSymbol {
-  AuxiliaryType   AuxType;
+  AuxiliaryType AuxType;
   COFF::Auxiliary Aux;
 };
 
@@ -63,14 +63,14 @@ public:
 
   typedef SmallVector<AuxSymbol, 1> AuxiliarySymbols;
 
-  name             Name;
-  int              Index;
+  name Name;
+  int Index;
   AuxiliarySymbols Aux;
-  COFFSymbol      *Other;
-  COFFSection     *Section;
-  int              Relocations;
+  COFFSymbol *Other;
+  COFFSection *Section;
+  int Relocations;
 
-  MCSymbolData const *MCData;
+  const MCSymbol *MC;
 
   COFFSymbol(StringRef name);
   void set_name_offset(uint32_t Offset);
@@ -81,7 +81,7 @@ public:
 // This class contains staging data for a COFF relocation entry.
 struct COFFRelocation {
   COFF::relocation Data;
-  COFFSymbol          *Symb;
+  COFFSymbol *Symb;
 
   COFFRelocation() : Symb(nullptr) {}
   static size_t size() { return COFF::RelocationSize; }
@@ -93,11 +93,11 @@ class COFFSection {
 public:
   COFF::section Header;
 
-  std::string          Name;
-  int                  Number;
-  MCSectionData const *MCData;
-  COFFSymbol          *Symbol;
-  relocations          Relocations;
+  std::string Name;
+  int Number;
+  MCSectionCOFF const *MCSection;
+  COFFSymbol *Symbol;
+  relocations Relocations;
 
   COFFSection(StringRef name);
   static size_t size();
@@ -105,29 +105,28 @@ public:
 
 class WinCOFFObjectWriter : public MCObjectWriter {
 public:
-
-  typedef std::vector<std::unique_ptr<COFFSymbol>>  symbols;
+  typedef std::vector<std::unique_ptr<COFFSymbol>> symbols;
   typedef std::vector<std::unique_ptr<COFFSection>> sections;
 
-  typedef DenseMap<MCSymbol  const *, COFFSymbol *>   symbol_map;
+  typedef DenseMap<MCSymbol const *, COFFSymbol *> symbol_map;
   typedef DenseMap<MCSection const *, COFFSection *> section_map;
 
   std::unique_ptr<MCWinCOFFObjectTargetWriter> TargetObjectWriter;
 
   // Root level file contents.
   COFF::header Header;
-  sections     Sections;
-  symbols      Symbols;
+  sections Sections;
+  symbols Symbols;
   StringTableBuilder Strings;
 
   // Maps used during object file creation.
   section_map SectionMap;
-  symbol_map  SymbolMap;
+  symbol_map SymbolMap;
 
   bool UseBigObj;
 
-  WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_ostream &OS);
-  
+  WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_pwrite_stream &OS);
+
   void reset() override {
     memset(&Header, 0, sizeof(Header));
     Header.Machine = TargetObjectWriter->getMachine();
@@ -140,14 +139,14 @@ public:
   }
 
   COFFSymbol *createSymbol(StringRef Name);
-  COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol * Symbol);
+  COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol *Symbol);
   COFFSection *createSection(StringRef Name);
 
   template <typename object_t, typename list_t>
   object_t *createCOFFEntity(StringRef Name, list_t &List);
 
-  void DefineSection(MCSectionData const &SectionData);
-  void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler,
+  void defineSection(MCSectionCOFF const &Sec);
+  void DefineSymbol(const MCSymbol &Symbol, MCAssembler &Assembler,
                     const MCAsmLayout &Layout);
 
   void SetSymbolName(COFFSymbol &S);
@@ -171,11 +170,13 @@ public:
                                 const MCAsmLayout &Layout) override;
 
   bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                              const MCSymbolData &DataA,
+                                              const MCSymbol &SymA,
                                               const MCFragment &FB, bool InSet,
                                               bool IsPCRel) const override;
 
-  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+  bool isWeak(const MCSymbol &Sym) const override;
+
+  void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
                         const MCFragment *Fragment, const MCFixup &Fixup,
                         MCValue Target, bool &IsPCRel,
                         uint64_t &FixedValue) override;
@@ -193,11 +194,8 @@ static inline void write_uint32_le(void *Data, uint32_t Value) {
 // Symbol class implementation
 
 COFFSymbol::COFFSymbol(StringRef name)
-  : Name(name.begin(), name.end())
-  , Other(nullptr)
-  , Section(nullptr)
-  , Relocations(0)
-  , MCData(nullptr) {
+    : Name(name.begin(), name.end()), Other(nullptr), Section(nullptr),
+      Relocations(0), MC(nullptr) {
   memset(&Data, 0, sizeof(Data));
 }
 
@@ -216,22 +214,22 @@ bool COFFSymbol::should_keep() const {
     return true;
 
   // if it has relocations pointing at it, keep it
-  if (Relocations > 0)   {
+  if (Relocations > 0) {
     assert(Section->Number != -1 && "Sections with relocations must be real!");
     return true;
   }
 
   // if the section its in is being droped, drop it
   if (Section->Number == -1)
-      return false;
+    return false;
 
   // if it is the section symbol, keep it
   if (Section->Symbol == this)
     return true;
 
   // if its temporary, drop it
-  if (MCData && MCData->getSymbol().isTemporary())
-      return false;
+  if (MC && MC->isTemporary())
+    return false;
 
   // otherwise, keep it
   return true;
@@ -241,21 +239,17 @@ bool COFFSymbol::should_keep() const {
 // Section class implementation
 
 COFFSection::COFFSection(StringRef name)
-  : Name(name)
-  , MCData(nullptr)
-  , Symbol(nullptr) {
+    : Name(name), MCSection(nullptr), Symbol(nullptr) {
   memset(&Header, 0, sizeof(Header));
 }
 
-size_t COFFSection::size() {
-  return COFF::SectionSize;
-}
+size_t COFFSection::size() { return COFF::SectionSize; }
 
 //------------------------------------------------------------------------------
 // WinCOFFObjectWriter class implementation
 
 WinCOFFObjectWriter::WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
-                                         raw_ostream &OS)
+                                         raw_pwrite_stream &OS)
     : MCObjectWriter(OS, true), TargetObjectWriter(MOTW) {
   memset(&Header, 0, sizeof(Header));
 
@@ -283,8 +277,7 @@ COFFSection *WinCOFFObjectWriter::createSection(StringRef Name) {
 /// A template used to lookup or create a symbol/section, and initialize it if
 /// needed.
 template <typename object_t, typename list_t>
-object_t *WinCOFFObjectWriter::createCOFFEntity(StringRef Name,
-                                                list_t &List) {
+object_t *WinCOFFObjectWriter::createCOFFEntity(StringRef Name, list_t &List) {
   List.push_back(make_unique<object_t>(Name));
 
   return List.back().get();
@@ -292,15 +285,9 @@ object_t *WinCOFFObjectWriter::createCOFFEntity(StringRef Name,
 
 /// This function takes a section data object from the assembler
 /// and creates the associated COFF section staging object.
-void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
-  assert(SectionData.getSection().getVariant() == MCSection::SV_COFF
-    && "Got non-COFF section in the COFF backend!");
-  // FIXME: Not sure how to verify this (at least in a debug build).
-  MCSectionCOFF const &Sec =
-    static_cast<MCSectionCOFF const &>(SectionData.getSection());
-
+void WinCOFFObjectWriter::defineSection(MCSectionCOFF const &Sec) {
   COFFSection *coff_section = createSection(Sec.getSectionName());
-  COFFSymbol  *coff_symbol = createSymbol(Sec.getSectionName());
+  COFFSymbol *coff_symbol = createSymbol(Sec.getSectionName());
   if (Sec.getSelection() != COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
     if (const MCSymbol *S = Sec.getCOMDATSymbol()) {
       COFFSymbol *COMDATSymbol = GetOrCreateCOFFSymbol(S);
@@ -323,37 +310,66 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
   coff_section->Header.Characteristics = Sec.getCharacteristics();
 
   uint32_t &Characteristics = coff_section->Header.Characteristics;
-  switch (SectionData.getAlignment()) {
-  case 1:    Characteristics |= COFF::IMAGE_SCN_ALIGN_1BYTES;    break;
-  case 2:    Characteristics |= COFF::IMAGE_SCN_ALIGN_2BYTES;    break;
-  case 4:    Characteristics |= COFF::IMAGE_SCN_ALIGN_4BYTES;    break;
-  case 8:    Characteristics |= COFF::IMAGE_SCN_ALIGN_8BYTES;    break;
-  case 16:   Characteristics |= COFF::IMAGE_SCN_ALIGN_16BYTES;   break;
-  case 32:   Characteristics |= COFF::IMAGE_SCN_ALIGN_32BYTES;   break;
-  case 64:   Characteristics |= COFF::IMAGE_SCN_ALIGN_64BYTES;   break;
-  case 128:  Characteristics |= COFF::IMAGE_SCN_ALIGN_128BYTES;  break;
-  case 256:  Characteristics |= COFF::IMAGE_SCN_ALIGN_256BYTES;  break;
-  case 512:  Characteristics |= COFF::IMAGE_SCN_ALIGN_512BYTES;  break;
-  case 1024: Characteristics |= COFF::IMAGE_SCN_ALIGN_1024BYTES; break;
-  case 2048: Characteristics |= COFF::IMAGE_SCN_ALIGN_2048BYTES; break;
-  case 4096: Characteristics |= COFF::IMAGE_SCN_ALIGN_4096BYTES; break;
-  case 8192: Characteristics |= COFF::IMAGE_SCN_ALIGN_8192BYTES; break;
+  switch (Sec.getAlignment()) {
+  case 1:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_1BYTES;
+    break;
+  case 2:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_2BYTES;
+    break;
+  case 4:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_4BYTES;
+    break;
+  case 8:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_8BYTES;
+    break;
+  case 16:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_16BYTES;
+    break;
+  case 32:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_32BYTES;
+    break;
+  case 64:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_64BYTES;
+    break;
+  case 128:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_128BYTES;
+    break;
+  case 256:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_256BYTES;
+    break;
+  case 512:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_512BYTES;
+    break;
+  case 1024:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_1024BYTES;
+    break;
+  case 2048:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_2048BYTES;
+    break;
+  case 4096:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_4096BYTES;
+    break;
+  case 8192:
+    Characteristics |= COFF::IMAGE_SCN_ALIGN_8192BYTES;
+    break;
   default:
     llvm_unreachable("unsupported section alignment");
   }
 
   // Bind internal COFF section to MC section.
-  coff_section->MCData = &SectionData;
-  SectionMap[&SectionData.getSection()] = coff_section;
+  coff_section->MCSection = &Sec;
+  SectionMap[&Sec] = coff_section;
 }
 
-static uint64_t getSymbolValue(const MCSymbolData &Data,
+static uint64_t getSymbolValue(const MCSymbol &Symbol,
                                const MCAsmLayout &Layout) {
+  const MCSymbolData &Data = Symbol.getData();
   if (Data.isCommon() && Data.isExternal())
     return Data.getCommonSize();
 
   uint64_t Res;
-  if (!Layout.getSymbolOffset(&Data, Res))
+  if (!Layout.getSymbolOffset(Symbol, Res))
     return 0;
 
   return Res;
@@ -361,33 +377,30 @@ static uint64_t getSymbolValue(const MCSymbolData &Data,
 
 /// This function takes a symbol data object from the assembler
 /// and creates the associated COFF symbol staging object.
-void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
+void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &Symbol,
                                        MCAssembler &Assembler,
                                        const MCAsmLayout &Layout) {
-  MCSymbol const &Symbol = SymbolData.getSymbol();
   COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol);
   SymbolMap[&Symbol] = coff_symbol;
 
-  if (SymbolData.getFlags() & COFF::SF_WeakExternal) {
+  if (Symbol.getData().getFlags() & COFF::SF_WeakExternal) {
     coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
 
     if (Symbol.isVariable()) {
       const MCSymbolRefExpr *SymRef =
-        dyn_cast<MCSymbolRefExpr>(Symbol.getVariableValue());
+          dyn_cast<MCSymbolRefExpr>(Symbol.getVariableValue());
 
       if (!SymRef)
         report_fatal_error("Weak externals may only alias symbols");
 
       coff_symbol->Other = GetOrCreateCOFFSymbol(&SymRef->getSymbol());
     } else {
-      std::string WeakName = std::string(".weak.")
-                           +  Symbol.getName().str()
-                           + ".default";
+      std::string WeakName = (".weak." + Symbol.getName() + ".default").str();
       COFFSymbol *WeakDefault = createSymbol(WeakName);
       WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
-      WeakDefault->Data.StorageClass  = COFF::IMAGE_SYM_CLASS_EXTERNAL;
-      WeakDefault->Data.Type          = 0;
-      WeakDefault->Data.Value         = 0;
+      WeakDefault->Data.StorageClass = COFF::IMAGE_SYM_CLASS_EXTERNAL;
+      WeakDefault->Data.Type = 0;
+      WeakDefault->Data.Value = 0;
       coff_symbol->Other = WeakDefault;
     }
 
@@ -397,22 +410,21 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
     coff_symbol->Aux[0].AuxType = ATWeakExternal;
     coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
     coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
-      COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
+        COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
 
-    coff_symbol->MCData = &SymbolData;
+    coff_symbol->MC = &Symbol;
   } else {
     const MCSymbolData &ResSymData = Assembler.getSymbolData(Symbol);
     const MCSymbol *Base = Layout.getBaseSymbol(Symbol);
-    coff_symbol->Data.Value = getSymbolValue(ResSymData, Layout);
+    coff_symbol->Data.Value = getSymbolValue(Symbol, Layout);
 
-    coff_symbol->Data.Type         = (ResSymData.getFlags() & 0x0000FFFF) >>  0;
+    coff_symbol->Data.Type = (ResSymData.getFlags() & 0x0000FFFF) >> 0;
     coff_symbol->Data.StorageClass = (ResSymData.getFlags() & 0x00FF0000) >> 16;
 
     // If no storage class was specified in the streamer, define it here.
     if (coff_symbol->Data.StorageClass == 0) {
-      bool IsExternal =
-          ResSymData.isExternal() ||
-          (!ResSymData.getFragment() && !ResSymData.getSymbol().isVariable());
+      bool IsExternal = ResSymData.isExternal() ||
+                        (!ResSymData.getFragment() && !Symbol.isVariable());
 
       coff_symbol->Data.StorageClass = IsExternal
                                            ? COFF::IMAGE_SYM_CLASS_EXTERNAL
@@ -424,8 +436,7 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
     } else {
       const MCSymbolData &BaseData = Assembler.getSymbolData(*Base);
       if (BaseData.getFragment()) {
-        COFFSection *Sec =
-            SectionMap[&BaseData.getFragment()->getParent()->getSection()];
+        COFFSection *Sec = SectionMap[BaseData.getFragment()->getParent()];
 
         if (coff_symbol->Section && coff_symbol->Section != Sec)
           report_fatal_error("conflicting sections for symbol");
@@ -434,7 +445,7 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
       }
     }
 
-    coff_symbol->MCData = &ResSymData;
+    coff_symbol->MC = &Symbol;
   }
 }
 
@@ -446,7 +457,7 @@ static const uint64_t MaxBase64Offset = 0xFFFFFFFFFULL; // 64^6, including 0
 // Encode a string table entry offset in base 64, padded to 6 chars, and
 // prefixed with a double slash: '//AAAAAA', '//AAAAAB', ...
 // Buffer must be at least 8 bytes large. No terminating null appended.
-static void encodeBase64StringEntry(char* Buffer, uint64_t Value) {
+static void encodeBase64StringEntry(char *Buffer, uint64_t Value) {
   assert(Value > Max7DecimalOffset && Value <= MaxBase64Offset &&
          "Illegal section name encoding for value");
 
@@ -457,7 +468,7 @@ static void encodeBase64StringEntry(char* Buffer, uint64_t Value) {
   Buffer[0] = '/';
   Buffer[1] = '/';
 
-  char* Ptr = Buffer + 7;
+  char *Ptr = Buffer + 7;
   for (unsigned i = 0; i < 6; ++i) {
     unsigned Rem = Value % 64;
     Value /= 64;
@@ -474,7 +485,7 @@ void WinCOFFObjectWriter::SetSectionName(COFFSection &S) {
     } else if (StringTableEntry <= Max7DecimalOffset) {
       // With seven digits, we have to skip the terminating null. Because
       // sprintf always appends it, we use a larger temporary buffer.
-      char buffer[9] = { };
+      char buffer[9] = {};
       std::sprintf(buffer, "/%d", unsigned(StringTableEntry));
       std::memcpy(S.Header.Name, buffer, 8);
     } else if (StringTableEntry <= MaxBase64Offset) {
@@ -515,8 +526,8 @@ bool WinCOFFObjectWriter::ExportSymbol(const MCSymbol &Symbol,
 }
 
 bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
-  return (S->Header.Characteristics
-         & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0;
+  return (S->Header.Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) ==
+         0;
 }
 
 //------------------------------------------------------------------------------
@@ -529,8 +540,7 @@ void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) {
     WriteLE16(COFF::BigObjHeader::MinBigObjectVersion);
     WriteLE16(Header.Machine);
     WriteLE32(Header.TimeDateStamp);
-    for (uint8_t MagicChar : COFF::BigObjMagic)
-      Write8(MagicChar);
+    WriteBytes(StringRef(COFF::BigObjMagic, sizeof(COFF::BigObjMagic)));
     WriteLE32(0);
     WriteLE32(0);
     WriteLE32(0);
@@ -563,10 +573,10 @@ void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol &S) {
 }
 
 void WinCOFFObjectWriter::WriteAuxiliarySymbols(
-                                        const COFFSymbol::AuxiliarySymbols &S) {
-  for(COFFSymbol::AuxiliarySymbols::const_iterator i = S.begin(), e = S.end();
-      i != e; ++i) {
-    switch(i->AuxType) {
+    const COFFSymbol::AuxiliarySymbols &S) {
+  for (COFFSymbol::AuxiliarySymbols::const_iterator i = S.begin(), e = S.end();
+       i != e; ++i) {
+    switch (i->AuxType) {
     case ATFunctionDefinition:
       WriteLE32(i->Aux.FunctionDefinition.TagIndex);
       WriteLE32(i->Aux.FunctionDefinition.TotalSize);
@@ -641,54 +651,69 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
   // "Define" each section & symbol. This creates section & symbol
   // entries in the staging area.
   for (const auto &Section : Asm)
-    DefineSection(Section);
+    defineSection(static_cast<const MCSectionCOFF &>(Section));
 
-  for (MCSymbolData &SD : Asm.symbols())
-    if (ExportSymbol(SD.getSymbol(), Asm))
-      DefineSymbol(SD, Asm, Layout);
+  for (const MCSymbol &Symbol : Asm.symbols())
+    if (ExportSymbol(Symbol, Asm))
+      DefineSymbol(Symbol, Asm, Layout);
 }
 
 bool WinCOFFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
-    const MCAssembler &Asm, const MCSymbolData &DataA, const MCFragment &FB,
+    const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
     bool InSet, bool IsPCRel) const {
   // MS LINK expects to be able to replace all references to a function with a
   // thunk to implement their /INCREMENTAL feature.  Make sure we don't optimize
   // away any relocations to functions.
-  if ((((DataA.getFlags() & COFF::SF_TypeMask) >> COFF::SF_TypeShift) >>
+  if ((((SymA.getData().getFlags() & COFF::SF_TypeMask) >>
+        COFF::SF_TypeShift) >>
        COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
     return false;
-  return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA, FB,
+  return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
                                                                 InSet, IsPCRel);
 }
 
-void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
-                                           const MCAsmLayout &Layout,
-                                           const MCFragment *Fragment,
-                                           const MCFixup &Fixup,
-                                           MCValue Target,
-                                           bool &IsPCRel,
-                                           uint64_t &FixedValue) {
+bool WinCOFFObjectWriter::isWeak(const MCSymbol &Sym) const {
+  const MCSymbolData &SD = Sym.getData();
+  if (!SD.isExternal())
+    return false;
+
+  if (!Sym.isInSection())
+    return false;
+
+  const auto &Sec = cast<MCSectionCOFF>(Sym.getSection());
+  if (!Sec.getCOMDATSymbol())
+    return false;
+
+  // It looks like for COFF it is invalid to replace a reference to a global
+  // in a comdat with a reference to a local.
+  // FIXME: Add a specification reference if available.
+  return true;
+}
+
+void WinCOFFObjectWriter::RecordRelocation(
+    MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment,
+    const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) {
   assert(Target.getSymA() && "Relocation must reference a symbol!");
 
   const MCSymbol &Symbol = Target.getSymA()->getSymbol();
-  const MCSymbol &A = Symbol.AliasedSymbol();
+  const MCSymbol &A = Symbol;
   if (!Asm.hasSymbolData(A))
-    Asm.getContext().FatalError(
-        Fixup.getLoc(),
-        Twine("symbol '") + A.getName() + "' can not be undefined");
+    Asm.getContext().reportFatalError(Fixup.getLoc(),
+                                      Twine("symbol '") + A.getName() +
+                                          "' can not be undefined");
 
   const MCSymbolData &A_SD = Asm.getSymbolData(A);
 
-  MCSectionData const *SectionData = Fragment->getParent();
+  MCSection *Section = Fragment->getParent();
 
   // Mark this symbol as requiring an entry in the symbol table.
-  assert(SectionMap.find(&SectionData->getSection()) != SectionMap.end() &&
+  assert(SectionMap.find(Section) != SectionMap.end() &&
          "Section must already have been defined in ExecutePostLayoutBinding!");
-  assert(SymbolMap.find(&A_SD.getSymbol()) != SymbolMap.end() &&
+  assert(SymbolMap.find(&A) != SymbolMap.end() &&
          "Symbol must already have been defined in ExecutePostLayoutBinding!");
 
-  COFFSection *coff_section = SectionMap[&SectionData->getSection()];
-  COFFSymbol *coff_symbol = SymbolMap[&A_SD.getSymbol()];
+  COFFSection *coff_section = SectionMap[Section];
+  COFFSymbol *coff_symbol = SymbolMap[&A];
   const MCSymbolRefExpr *SymB = Target.getSymB();
   bool CrossSection = false;
 
@@ -696,13 +721,13 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
     const MCSymbol *B = &SymB->getSymbol();
     const MCSymbolData &B_SD = Asm.getSymbolData(*B);
     if (!B_SD.getFragment())
-      Asm.getContext().FatalError(
+      Asm.getContext().reportFatalError(
           Fixup.getLoc(),
           Twine("symbol '") + B->getName() +
               "' can not be undefined in a subtraction expression");
 
     if (!A_SD.getFragment())
-      Asm.getContext().FatalError(
+      Asm.getContext().reportFatalError(
           Fixup.getLoc(),
           Twine("symbol '") + Symbol.getName() +
               "' can not be undefined in a subtraction expression");
@@ -710,13 +735,13 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
     CrossSection = &Symbol.getSection() != &B->getSection();
 
     // Offset of the symbol in the section
-    int64_t OffsetOfB = Layout.getSymbolOffset(&B_SD);
+    int64_t OffsetOfB = Layout.getSymbolOffset(*B);
 
     // In the case where we have SymbA and SymB, we just need to store the delta
     // between the two symbols.  Update FixedValue to account for the delta, and
     // skip recording the relocation.
     if (!CrossSection) {
-      int64_t OffsetOfA = Layout.getSymbolOffset(&A_SD);
+      int64_t OffsetOfA = Layout.getSymbolOffset(A);
       FixedValue = (OffsetOfA - OffsetOfB) + Target.getConstant();
       return;
     }
@@ -725,7 +750,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
     int64_t OffsetOfRelocation =
         Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
 
-    FixedValue = OffsetOfRelocation - OffsetOfB;
+    FixedValue = (OffsetOfRelocation - OffsetOfB) + Target.getConstant();
   } else {
     FixedValue = Target.getConstant();
   }
@@ -736,18 +761,19 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
   Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
 
   // Turn relocations for temporary symbols into section relocations.
-  if (coff_symbol->MCData->getSymbol().isTemporary() || CrossSection) {
+  if (coff_symbol->MC->isTemporary() || CrossSection) {
     Reloc.Symb = coff_symbol->Section->Symbol;
-    FixedValue += Layout.getFragmentOffset(coff_symbol->MCData->getFragment()) +
-                  coff_symbol->MCData->getOffset();
+    FixedValue +=
+        Layout.getFragmentOffset(coff_symbol->MC->getData().getFragment()) +
+        coff_symbol->MC->getData().getOffset();
   } else
     Reloc.Symb = coff_symbol;
 
   ++Reloc.Symb->Relocations;
 
   Reloc.Data.VirtualAddress += Fixup.getOffset();
-  Reloc.Data.Type = TargetObjectWriter->getRelocType(Target, Fixup,
-                                                     CrossSection);
+  Reloc.Data.Type = TargetObjectWriter->getRelocType(
+      Target, Fixup, CrossSection, Asm.getBackend());
 
   // FIXME: Can anyone explain what this does other than adjust for the size
   // of the offset?
@@ -768,9 +794,9 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
       break;
     case COFF::IMAGE_REL_ARM_BRANCH11:
     case COFF::IMAGE_REL_ARM_BLX11:
-      // IMAGE_REL_ARM_BRANCH11 and IMAGE_REL_ARM_BLX11 are only used for
-      // pre-ARMv7, which implicitly rules it out of ARMNT (it would be valid
-      // for Windows CE).
+    // IMAGE_REL_ARM_BRANCH11 and IMAGE_REL_ARM_BLX11 are only used for
+    // pre-ARMv7, which implicitly rules it out of ARMNT (it would be valid
+    // for Windows CE).
     case COFF::IMAGE_REL_ARM_BRANCH24:
     case COFF::IMAGE_REL_ARM_BLX24:
     case COFF::IMAGE_REL_ARM_MOV32A:
@@ -827,8 +853,8 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
   Header.NumberOfSections = NumberOfSections;
   Header.NumberOfSymbols = 0;
 
-  for (auto FI = Asm.file_names_begin(), FE = Asm.file_names_end();
-       FI != FE; ++FI) {
+  for (auto FI = Asm.file_names_begin(), FE = Asm.file_names_end(); FI != FE;
+       ++FI) {
     // round up to calculate the number of auxiliary symbols required
     unsigned SymbolSize = UseBigObj ? COFF::Symbol32Size : COFF::Symbol16Size;
     unsigned Count = (FI->size() + SymbolSize - 1) / SymbolSize;
@@ -902,8 +928,7 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
         COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
       continue;
 
-    const MCSectionCOFF &MCSec =
-      static_cast<const MCSectionCOFF &>(Section->MCData->getSection());
+    const MCSectionCOFF &MCSec = *Section->MCSection;
 
     const MCSymbol *COMDAT = MCSec.getCOMDATSymbol();
     assert(COMDAT);
@@ -919,10 +944,10 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
     if (Assoc->Number == -1)
       continue;
 
-    Section->Symbol->Aux[0].Aux.SectionDefinition.Number = SectionIndices[Assoc];
+    Section->Symbol->Aux[0].Aux.SectionDefinition.Number =
+        SectionIndices[Assoc];
   }
 
-
   // Assign file offsets to COFF object file structures.
 
   unsigned offset = 0;
@@ -934,7 +959,7 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
   offset += COFF::SectionSize * Header.NumberOfSections;
 
   for (const auto &Section : Asm) {
-    COFFSection *Sec = SectionMap[&Section.getSection()];
+    COFFSection *Sec = SectionMap[&Section];
 
     if (Sec->Number == -1)
       continue;
@@ -942,6 +967,8 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
     Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(&Section);
 
     if (IsPhysicalSection(Sec)) {
+      // Align the section data to a four byte boundary.
+      offset = RoundUpToAlignment(offset, 4);
       Sec->Header.PointerToRawData = offset;
 
       offset += Sec->Header.SizeOfRawData;
@@ -979,9 +1006,9 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
            "Section's symbol's aux symbol must be a Section Definition!");
     Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData;
     Aux.Aux.SectionDefinition.NumberOfRelocations =
-                                                Sec->Header.NumberOfRelocations;
+        Sec->Header.NumberOfRelocations;
     Aux.Aux.SectionDefinition.NumberOfLinenumbers =
-                                                Sec->Header.NumberOfLineNumbers;
+        Sec->Header.NumberOfLineNumbers;
   }
 
   Header.PointerToSymbolTable = offset;
@@ -1004,18 +1031,24 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
       }
     }
 
-    for (i = Sections.begin(), ie = Sections.end(),
-         j = Asm.begin(), je = Asm.end();
+    for (i = Sections.begin(), ie = Sections.end(), j = Asm.begin(),
+        je = Asm.end();
          (i != ie) && (j != je); ++i, ++j) {
 
       if ((*i)->Number == -1)
         continue;
 
       if ((*i)->Header.PointerToRawData != 0) {
-        assert(OS.tell() == (*i)->Header.PointerToRawData &&
+        assert(OS.tell() <= (*i)->Header.PointerToRawData &&
                "Section::PointerToRawData is insane!");
 
-        Asm.writeSectionData(j, Layout);
+        unsigned SectionDataPadding = (*i)->Header.PointerToRawData - OS.tell();
+        assert(SectionDataPadding < 4 &&
+               "Should only need at most three bytes of padding!");
+
+        WriteZeros(SectionDataPadding);
+
+        Asm.writeSectionData(&*j, Layout);
       }
 
       if ((*i)->Relocations.size() > 0) {
@@ -1050,9 +1083,8 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
   OS.write(Strings.data().data(), Strings.data().size());
 }
 
-MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_) :
-  Machine(Machine_) {
-}
+MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_)
+    : Machine(Machine_) {}
 
 // Pin the vtable to this file.
 void MCWinCOFFObjectTargetWriter::anchor() {}
@@ -1060,9 +1092,8 @@ void MCWinCOFFObjectTargetWriter::anchor() {}
 //------------------------------------------------------------------------------
 // WinCOFFObjectWriter factory function
 
-namespace llvm {
-  MCObjectWriter *createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
-                                            raw_ostream &OS) {
-    return new WinCOFFObjectWriter(MOTW, OS);
-  }
+MCObjectWriter *
+llvm::createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
+                                raw_pwrite_stream &OS) {
+  return new WinCOFFObjectWriter(MOTW, OS);
 }
diff --git a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
index 41a3da7..d2fbd37 100644
--- a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
@@ -39,7 +39,7 @@ using namespace llvm;
 
 namespace llvm {
 MCWinCOFFStreamer::MCWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                     MCCodeEmitter &CE, raw_ostream &OS)
+                                     MCCodeEmitter &CE, raw_pwrite_stream &OS)
     : MCObjectStreamer(Context, MAB, OS, &CE), CurSymbol(nullptr) {}
 
 void MCWinCOFFStreamer::EmitInstToData(const MCInst &Inst,
@@ -49,7 +49,7 @@ void MCWinCOFFStreamer::EmitInstToData(const MCInst &Inst,
   SmallVector<MCFixup, 4> Fixups;
   SmallString<256> Code;
   raw_svector_ostream VecOS(Code);
-  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups, STI);
+  getAssembler().getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
   VecOS.flush();
 
   // Add the fixups and data.
@@ -161,7 +161,7 @@ void MCWinCOFFStreamer::EndCOFFSymbolDef() {
 void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
   MCDataFragment *DF = getOrCreateDataFragment();
   const MCSymbolRefExpr *SRE = MCSymbolRefExpr::Create(Symbol, getContext());
-  MCFixup Fixup = MCFixup::Create(DF->getContents().size(), SRE, FK_SecRel_2);
+  MCFixup Fixup = MCFixup::create(DF->getContents().size(), SRE, FK_SecRel_2);
   DF->getFixups().push_back(Fixup);
   DF->getContents().resize(DF->getContents().size() + 2, 0);
 }
@@ -169,7 +169,7 @@ void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
 void MCWinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
   MCDataFragment *DF = getOrCreateDataFragment();
   const MCSymbolRefExpr *SRE = MCSymbolRefExpr::Create(Symbol, getContext());
-  MCFixup Fixup = MCFixup::Create(DF->getContents().size(), SRE, FK_SecRel_4);
+  MCFixup Fixup = MCFixup::create(DF->getContents().size(), SRE, FK_SecRel_4);
   DF->getFixups().push_back(Fixup);
   DF->getContents().resize(DF->getContents().size() + 4, 0);
 }
@@ -219,10 +219,10 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                               unsigned ByteAlignment) {
   assert(!Symbol->isInSection() && "Symbol must not already have a section!");
 
-  const MCSection *Section = getContext().getObjectFileInfo()->getBSSSection();
-  MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section);
-  if (SectionData.getAlignment() < ByteAlignment)
-    SectionData.setAlignment(ByteAlignment);
+  MCSection *Section = getContext().getObjectFileInfo()->getBSSSection();
+  getAssembler().registerSection(*Section);
+  if (Section->getAlignment() < ByteAlignment)
+    Section->setAlignment(ByteAlignment);
 
   MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
   SD.setExternal(false);
@@ -230,23 +230,21 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
   AssignSection(Symbol, Section);
 
   if (ByteAlignment != 1)
-    new MCAlignFragment(ByteAlignment, /*_Value=*/0, /*_ValueSize=*/0,
-                        ByteAlignment, &SectionData);
+    new MCAlignFragment(ByteAlignment, /*Value=*/0, /*ValueSize=*/0,
+                        ByteAlignment, Section);
 
-  MCFillFragment *Fragment =
-      new MCFillFragment(/*_Value=*/0, /*_ValueSize=*/0, Size, &SectionData);
+  MCFillFragment *Fragment = new MCFillFragment(
+      /*Value=*/0, /*ValueSize=*/0, Size, Section);
   SD.setFragment(Fragment);
 }
 
-void MCWinCOFFStreamer::EmitZerofill(const MCSection *Section,
-                                     MCSymbol *Symbol, uint64_t Size,
-                                     unsigned ByteAlignment) {
+void MCWinCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
+                                     uint64_t Size, unsigned ByteAlignment) {
   llvm_unreachable("not implemented");
 }
 
-void MCWinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section,
-                                       MCSymbol *Symbol, uint64_t Size,
-                                       unsigned ByteAlignment) {
+void MCWinCOFFStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
+                                       uint64_t Size, unsigned ByteAlignment) {
   llvm_unreachable("not implemented");
 }
 
@@ -269,7 +267,7 @@ void MCWinCOFFStreamer::FinishImpl() {
 
 LLVM_ATTRIBUTE_NORETURN
 void MCWinCOFFStreamer::FatalError(const Twine &Msg) const {
-  getContext().FatalError(SMLoc(), Msg);
+  getContext().reportFatalError(SMLoc(), Msg);
 }
 }
 
diff --git a/contrib/llvm/lib/Object/Archive.cpp b/contrib/llvm/lib/Object/Archive.cpp
index 5aada91..fb91eed 100644
--- a/contrib/llvm/lib/Object/Archive.cpp
+++ b/contrib/llvm/lib/Object/Archive.cpp
@@ -20,6 +20,7 @@
 
 using namespace llvm;
 using namespace object;
+using namespace llvm::support::endian;
 
 static const char *const Magic = "!<arch>\n";
 static const char *const ThinMagic = "!<thin>\n";
@@ -110,6 +111,10 @@ uint64_t Archive::Child::getSize() const {
   return Data.size() - StartOfFile;
 }
 
+uint64_t Archive::Child::getRawSize() const {
+  return getHeader()->getSize();
+}
+
 Archive::Child Archive::Child::getNext() const {
   size_t SpaceToSkip = Data.size();
   // If it's odd, add 1 to make it even.
@@ -125,6 +130,13 @@ Archive::Child Archive::Child::getNext() const {
   return Child(Parent, NextLoc);
 }
 
+uint64_t Archive::Child::getChildOffset() const {
+  const char *a = Parent->Data.getBuffer().data();
+  const char *c = Data.data();
+  uint64_t offset = c - a;
+  return offset;
+}
+
 ErrorOr<StringRef> Archive::Child::getName() const {
   StringRef name = getRawName();
   // Check if it's a special name.
@@ -151,7 +163,7 @@ ErrorOr<StringRef> Archive::Child::getName() const {
       return object_error::parse_failed;
 
     // GNU long file names end with a /.
-    if (Parent->kind() == K_GNU) {
+    if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) {
       StringRef::size_type End = StringRef(addr).find('/');
       return StringRef(addr, End);
     }
@@ -262,8 +274,16 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
     return;
   }
 
-  if (Name == "/") {
+  // MIPS 64-bit ELF archives use a special format of a symbol table.
+  // This format is marked by `ar_name` field equals to "/SYM64/".
+  // For detailed description see page 96 in the following document:
+  // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
+
+  bool has64SymTable = false;
+  if (Name == "/" || Name == "/SYM64/") {
     SymbolTable = i;
+    if (Name == "/SYM64/")
+      has64SymTable = true;
 
     ++i;
     if (i == e) {
@@ -274,7 +294,7 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
   }
 
   if (Name == "//") {
-    Format = K_GNU;
+    Format = has64SymTable ? K_MIPS64 : K_GNU;
     StringTable = i;
     ++i;
     FirstRegular = i;
@@ -283,7 +303,7 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
   }
 
   if (Name[0] != '/') {
-    Format = K_GNU;
+    Format = has64SymTable ? K_MIPS64 : K_GNU;
     FirstRegular = i;
     ec = object_error::success;
     return;
@@ -337,11 +357,16 @@ StringRef Archive::Symbol::getName() const {
 
 ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
   const char *Buf = Parent->SymbolTable->getBuffer().begin();
-  const char *Offsets = Buf + 4;
+  const char *Offsets = Buf;
+  if (Parent->kind() == K_MIPS64)
+    Offsets += sizeof(uint64_t);
+  else
+    Offsets += sizeof(uint32_t);
   uint32_t Offset = 0;
   if (Parent->kind() == K_GNU) {
-    Offset = *(reinterpret_cast<const support::ubig32_t*>(Offsets)
-               + SymbolIndex);
+    Offset = read32be(Offsets + SymbolIndex * 4);
+  } else if (Parent->kind() == K_MIPS64) {
+    Offset = read64be(Offsets + SymbolIndex * 8);
   } else if (Parent->kind() == K_BSD) {
     // The SymbolIndex is an index into the ranlib structs that start at
     // Offsets (the first uint32_t is the number of bytes of the ranlib
@@ -349,36 +374,29 @@ ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
     // being a string table offset and the second being the offset into
     // the archive of the member that defines the symbol.  Which is what
     // is needed here.
-    Offset = *(reinterpret_cast<const support::ulittle32_t *>(Offsets) +
-               (SymbolIndex * 2) + 1);
+    Offset = read32le(Offsets + SymbolIndex * 8 + 4);
   } else {
-    uint32_t MemberCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
-    
     // Skip offsets.
-    Buf += sizeof(support::ulittle32_t)
-           + (MemberCount * sizeof(support::ulittle32_t));
-
-    uint32_t SymbolCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
+    uint32_t MemberCount = read32le(Buf);
+    Buf += MemberCount * 4 + 4;
 
+    uint32_t SymbolCount = read32le(Buf);
     if (SymbolIndex >= SymbolCount)
       return object_error::parse_failed;
 
     // Skip SymbolCount to get to the indices table.
-    const char *Indices = Buf + sizeof(support::ulittle32_t);
+    const char *Indices = Buf + 4;
 
     // Get the index of the offset in the file member offset table for this
     // symbol.
-    uint16_t OffsetIndex =
-      *(reinterpret_cast<const support::ulittle16_t*>(Indices)
-        + SymbolIndex);
+    uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
     // Subtract 1 since OffsetIndex is 1 based.
     --OffsetIndex;
 
     if (OffsetIndex >= MemberCount)
       return object_error::parse_failed;
 
-    Offset = *(reinterpret_cast<const support::ulittle32_t*>(Offsets)
-               + OffsetIndex);
+    Offset = read32le(Offsets + OffsetIndex * 4);
   }
 
   const char *Loc = Parent->getData().begin() + Offset;
@@ -404,8 +422,7 @@ Archive::Symbol Archive::Symbol::getNext() const {
     // the string table followed by the string table.
     const char *Buf = Parent->SymbolTable->getBuffer().begin();
     uint32_t RanlibCount = 0;
-    RanlibCount = (*reinterpret_cast<const support::ulittle32_t *>(Buf)) /
-                  (sizeof(uint32_t) * 2);
+    RanlibCount = read32le(Buf) / 8;
     // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
     // don't change the t.StringIndex as we don't want to reference a ranlib
     // past RanlibCount.
@@ -413,10 +430,8 @@ Archive::Symbol Archive::Symbol::getNext() const {
       const char *Ranlibs = Buf + 4;
       uint32_t CurRanStrx = 0;
       uint32_t NextRanStrx = 0;
-      CurRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) +
-                     (t.SymbolIndex * 2));
-      NextRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) +
-                      ((t.SymbolIndex + 1) * 2));
+      CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
+      NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
       t.StringIndex -= CurRanStrx;
       t.StringIndex += NextRanStrx;
     }
@@ -436,8 +451,11 @@ Archive::symbol_iterator Archive::symbol_begin() const {
   const char *buf = SymbolTable->getBuffer().begin();
   if (kind() == K_GNU) {
     uint32_t symbol_count = 0;
-    symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
+    symbol_count = read32be(buf);
     buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
+  } else if (kind() == K_MIPS64) {
+    uint64_t symbol_count = read64be(buf);
+    buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
   } else if (kind() == K_BSD) {
     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
     // which is the number of bytes of ranlib structs that follow.  The ranlib
@@ -446,11 +464,10 @@ Archive::symbol_iterator Archive::symbol_begin() const {
     // define the symbol. After that the next uint32_t is the byte count of
     // the string table followed by the string table.
     uint32_t ranlib_count = 0;
-    ranlib_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) /
-                   (sizeof(uint32_t) * 2);
+    ranlib_count = read32le(buf) / 8;
     const char *ranlibs = buf + 4;
     uint32_t ran_strx = 0;
-    ran_strx = *(reinterpret_cast<const support::ulittle32_t *>(ranlibs));
+    ran_strx = read32le(ranlibs);
     buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
     // Skip the byte count of the string table.
     buf += sizeof(uint32_t);
@@ -458,9 +475,9 @@ Archive::symbol_iterator Archive::symbol_begin() const {
   } else {
     uint32_t member_count = 0;
     uint32_t symbol_count = 0;
-    member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+    member_count = read32le(buf);
     buf += 4 + (member_count * 4); // Skip offsets.
-    symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+    symbol_count = read32le(buf);
     buf += 4 + (symbol_count * 2); // Skip indices.
   }
   uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin();
@@ -470,22 +487,21 @@ Archive::symbol_iterator Archive::symbol_begin() const {
 Archive::symbol_iterator Archive::symbol_end() const {
   if (!hasSymbolTable())
     return symbol_iterator(Symbol(this, 0, 0));
+  return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
+}
 
+uint32_t Archive::getNumberOfSymbols() const {
   const char *buf = SymbolTable->getBuffer().begin();
-  uint32_t symbol_count = 0;
-  if (kind() == K_GNU) {
-    symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
-  } else if (kind() == K_BSD) {
-    symbol_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) /
-                   (sizeof(uint32_t) * 2);
-  } else {
-    uint32_t member_count = 0;
-    member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
-    buf += 4 + (member_count * 4); // Skip offsets.
-    symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
-  }
-  return symbol_iterator(
-    Symbol(this, symbol_count, 0));
+  if (kind() == K_GNU)
+    return read32be(buf);
+  if (kind() == K_MIPS64)
+    return read64be(buf);
+  if (kind() == K_BSD)
+    return read32le(buf) / 8;
+  uint32_t member_count = 0;
+  member_count = read32le(buf);
+  buf += 4 + (member_count * 4); // Skip offsets.
+  return read32le(buf);
 }
 
 Archive::child_iterator Archive::findSym(StringRef name) const {
diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp
index c56eeb1..a2b167a 100644
--- a/contrib/llvm/lib/Object/Binary.cpp
+++ b/contrib/llvm/lib/Object/Binary.cpp
@@ -58,6 +58,7 @@ ErrorOr<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
     case sys::fs::file_magic::macho_bundle:
     case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
     case sys::fs::file_magic::macho_dsym_companion:
+    case sys::fs::file_magic::macho_kext_bundle:
     case sys::fs::file_magic::coff_object:
     case sys::fs::file_magic::coff_import_library:
     case sys::fs::file_magic::pecoff_executable:
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
index cde6fdc..74709c8 100644
--- a/contrib/llvm/lib/Object/COFFObjectFile.cpp
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -190,7 +190,9 @@ std::error_code COFFObjectFile::getSymbolType(DataRefImpl Ref,
     Result = SymbolRef::ST_Data;
   } else if (Symb.isFileRecord()) {
     Result = SymbolRef::ST_File;
-  } else if (SectionNumber == COFF::IMAGE_SYM_DEBUG) {
+  } else if (SectionNumber == COFF::IMAGE_SYM_DEBUG ||
+             Symb.isSectionDefinition()) {
+    // TODO: perhaps we need a new symbol type ST_Section.
     Result = SymbolRef::ST_Debug;
   } else if (!COFF::isReservedSectionNumber(SectionNumber)) {
     const coff_section *Section = nullptr;
@@ -238,59 +240,10 @@ std::error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref,
                                               uint64_t &Result) const {
   COFFSymbolRef Symb = getCOFFSymbol(Ref);
 
-  if (Symb.isAnyUndefined()) {
-    Result = UnknownAddressOrSize;
-    return object_error::success;
-  }
-  if (Symb.isCommon()) {
+  if (Symb.isCommon())
     Result = Symb.getValue();
-    return object_error::success;
-  }
-
-  // Let's attempt to get the size of the symbol by looking at the address of
-  // the symbol after the symbol in question.
-  uint64_t SymbAddr;
-  if (std::error_code EC = getSymbolAddress(Ref, SymbAddr))
-    return EC;
-  int32_t SectionNumber = Symb.getSectionNumber();
-  if (COFF::isReservedSectionNumber(SectionNumber)) {
-    // Absolute and debug symbols aren't sorted in any interesting way.
-    Result = 0;
-    return object_error::success;
-  }
-  const section_iterator SecEnd = section_end();
-  uint64_t AfterAddr = UnknownAddressOrSize;
-  for (const symbol_iterator &SymbI : symbols()) {
-    section_iterator SecI = SecEnd;
-    if (std::error_code EC = SymbI->getSection(SecI))
-      return EC;
-    // Check the symbol's section, skip it if it's in the wrong section.
-    // First, make sure it is in any section.
-    if (SecI == SecEnd)
-      continue;
-    // Second, make sure it is in the same section as the symbol in question.
-    if (!sectionContainsSymbol(SecI->getRawDataRefImpl(), Ref))
-      continue;
-    uint64_t Addr;
-    if (std::error_code EC = SymbI->getAddress(Addr))
-      return EC;
-    // We want to compare our symbol in question with the closest possible
-    // symbol that comes after.
-    if (AfterAddr > Addr && Addr > SymbAddr)
-      AfterAddr = Addr;
-  }
-  if (AfterAddr == UnknownAddressOrSize) {
-    // No symbol comes after this one, assume that everything after our symbol
-    // is part of it.
-    const coff_section *Section = nullptr;
-    if (std::error_code EC = getSection(SectionNumber, Section))
-      return EC;
-    Result = Section->SizeOfRawData - Symb.getValue();
-  } else {
-    // Take the difference between our symbol and the symbol that comes after
-    // our symbol.
-    Result = AfterAddr - SymbAddr;
-  }
+  else
+    Result = UnknownAddressOrSize;
 
   return object_error::success;
 }
@@ -359,12 +312,17 @@ bool COFFObjectFile::isSectionData(DataRefImpl Ref) const {
 
 bool COFFObjectFile::isSectionBSS(DataRefImpl Ref) const {
   const coff_section *Sec = toSec(Ref);
-  return Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+  const uint32_t BssFlags = COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+                            COFF::IMAGE_SCN_MEM_READ |
+                            COFF::IMAGE_SCN_MEM_WRITE;
+  return (Sec->Characteristics & BssFlags) == BssFlags;
 }
 
 bool COFFObjectFile::isSectionVirtual(DataRefImpl Ref) const {
   const coff_section *Sec = toSec(Ref);
-  return Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+  // In COFF, a virtual section won't have any in-file 
+  // content, so the file pointer to the content will be zero.
+  return Sec->PointerToRawData == 0;
 }
 
 bool COFFObjectFile::sectionContainsSymbol(DataRefImpl SecRef,
@@ -1037,6 +995,19 @@ symbol_iterator COFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
   return symbol_iterator(SymbolRef(Ref, this));
 }
 
+section_iterator COFFObjectFile::getRelocationSection(DataRefImpl Rel) const {
+  symbol_iterator Sym = getRelocationSymbol(Rel);
+  if (Sym == symbol_end())
+    return section_end();
+  COFFSymbolRef Symb = getCOFFSymbol(*Sym);
+  if (!Symb.isSection())
+    return section_end();
+  section_iterator Res(section_end());
+  if (getSymbolSection(Sym->getRawDataRefImpl(),Res))
+    return section_end();
+  return Res;
+}
+
 std::error_code COFFObjectFile::getRelocationType(DataRefImpl Rel,
                                                   uint64_t &Res) const {
   const coff_relocation* R = toRel(Rel);
diff --git a/contrib/llvm/lib/Object/ELFYAML.cpp b/contrib/llvm/lib/Object/ELFYAML.cpp
index 7e35fb5..78087d6 100644
--- a/contrib/llvm/lib/Object/ELFYAML.cpp
+++ b/contrib/llvm/lib/Object/ELFYAML.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Object/ELFYAML.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/MipsABIFlags.h"
 
 namespace llvm {
 
@@ -30,6 +31,7 @@ ScalarEnumerationTraits<ELFYAML::ELF_ET>::enumeration(IO &IO,
   ECase(ET_DYN)
   ECase(ET_CORE)
 #undef ECase
+  IO.enumFallback<Hex16>(Value);
 }
 
 void
@@ -234,6 +236,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_ELFOSABI>::enumeration(
   ECase(ELFOSABI_NSK)
   ECase(ELFOSABI_AROS)
   ECase(ELFOSABI_FENIXOS)
+  ECase(ELFOSABI_CLOUDABI)
   ECase(ELFOSABI_C6000_ELFABI)
   ECase(ELFOSABI_C6000_LINUX)
   ECase(ELFOSABI_ARM)
@@ -264,10 +267,33 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
     BCase(EF_MIPS_CPIC)
     BCase(EF_MIPS_ABI2)
     BCase(EF_MIPS_32BITMODE)
+    BCase(EF_MIPS_FP64)
     BCase(EF_MIPS_NAN2008)
-    BCase(EF_MIPS_ABI_O32)
     BCase(EF_MIPS_MICROMIPS)
     BCase(EF_MIPS_ARCH_ASE_M16)
+    BCase(EF_MIPS_ARCH_ASE_MDMX)
+    BCaseMask(EF_MIPS_ABI_O32, EF_MIPS_ABI)
+    BCaseMask(EF_MIPS_ABI_O64, EF_MIPS_ABI)
+    BCaseMask(EF_MIPS_ABI_EABI32, EF_MIPS_ABI)
+    BCaseMask(EF_MIPS_ABI_EABI64, EF_MIPS_ABI)
+    BCaseMask(EF_MIPS_MACH_3900, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_4010, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_4100, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_4650, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_4120, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_4111, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_SB1, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_OCTEON, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_XLR, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_OCTEON2, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_OCTEON3, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_5400, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_5900, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_5500, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_9000, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_LS2E, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_LS2F, EF_MIPS_MACH)
+    BCaseMask(EF_MIPS_MACH_LS3A, EF_MIPS_MACH)
     BCaseMask(EF_MIPS_ARCH_1, EF_MIPS_ARCH)
     BCaseMask(EF_MIPS_ARCH_2, EF_MIPS_ARCH)
     BCaseMask(EF_MIPS_ARCH_3, EF_MIPS_ARCH)
@@ -414,6 +440,16 @@ void ScalarBitSetTraits<ELFYAML::ELF_STO>::bitset(IO &IO,
 #undef BCaseMask
 }
 
+void ScalarEnumerationTraits<ELFYAML::ELF_RSS>::enumeration(
+    IO &IO, ELFYAML::ELF_RSS &Value) {
+#define ECase(X) IO.enumCase(Value, #X, ELF::X);
+  ECase(RSS_UNDEF)
+  ECase(RSS_GP)
+  ECase(RSS_GP0)
+  ECase(RSS_LOC)
+#undef ECase
+}
+
 void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
     IO &IO, ELFYAML::ELF_REL &Value) {
   const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
@@ -444,6 +480,93 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
 #undef ELF_RELOC
 }
 
+void ScalarEnumerationTraits<ELFYAML::MIPS_AFL_REG>::enumeration(
+    IO &IO, ELFYAML::MIPS_AFL_REG &Value) {
+#define ECase(X) IO.enumCase(Value, #X, Mips::AFL_##X);
+  ECase(REG_NONE)
+  ECase(REG_32)
+  ECase(REG_64)
+  ECase(REG_128)
+#undef ECase
+}
+
+void ScalarEnumerationTraits<ELFYAML::MIPS_ABI_FP>::enumeration(
+    IO &IO, ELFYAML::MIPS_ABI_FP &Value) {
+#define ECase(X) IO.enumCase(Value, #X, Mips::Val_GNU_MIPS_ABI_##X);
+  ECase(FP_ANY)
+  ECase(FP_DOUBLE)
+  ECase(FP_SINGLE)
+  ECase(FP_SOFT)
+  ECase(FP_OLD_64)
+  ECase(FP_XX)
+  ECase(FP_64)
+  ECase(FP_64A)
+#undef ECase
+}
+
+void ScalarEnumerationTraits<ELFYAML::MIPS_AFL_EXT>::enumeration(
+    IO &IO, ELFYAML::MIPS_AFL_EXT &Value) {
+#define ECase(X) IO.enumCase(Value, #X, Mips::AFL_##X);
+  ECase(EXT_NONE)
+  ECase(EXT_XLR)
+  ECase(EXT_OCTEON2)
+  ECase(EXT_OCTEONP)
+  ECase(EXT_LOONGSON_3A)
+  ECase(EXT_OCTEON)
+  ECase(EXT_5900)
+  ECase(EXT_4650)
+  ECase(EXT_4010)
+  ECase(EXT_4100)
+  ECase(EXT_3900)
+  ECase(EXT_10000)
+  ECase(EXT_SB1)
+  ECase(EXT_4111)
+  ECase(EXT_4120)
+  ECase(EXT_5400)
+  ECase(EXT_5500)
+  ECase(EXT_LOONGSON_2E)
+  ECase(EXT_LOONGSON_2F)
+  ECase(EXT_OCTEON3)
+#undef ECase
+}
+
+void ScalarEnumerationTraits<ELFYAML::MIPS_ISA>::enumeration(
+    IO &IO, ELFYAML::MIPS_ISA &Value) {
+  IO.enumCase(Value, "MIPS1", 1);
+  IO.enumCase(Value, "MIPS2", 2);
+  IO.enumCase(Value, "MIPS3", 3);
+  IO.enumCase(Value, "MIPS4", 4);
+  IO.enumCase(Value, "MIPS5", 5);
+  IO.enumCase(Value, "MIPS32", 32);
+  IO.enumCase(Value, "MIPS64", 64);
+}
+
+void ScalarBitSetTraits<ELFYAML::MIPS_AFL_ASE>::bitset(
+    IO &IO, ELFYAML::MIPS_AFL_ASE &Value) {
+#define BCase(X) IO.bitSetCase(Value, #X, Mips::AFL_ASE_##X);
+  BCase(DSP)
+  BCase(DSPR2)
+  BCase(EVA)
+  BCase(MCU)
+  BCase(MDMX)
+  BCase(MIPS3D)
+  BCase(MT)
+  BCase(SMARTMIPS)
+  BCase(VIRT)
+  BCase(MSA)
+  BCase(MIPS16)
+  BCase(MICROMIPS)
+  BCase(XPA)
+#undef BCase
+}
+
+void ScalarBitSetTraits<ELFYAML::MIPS_AFL_FLAGS1>::bitset(
+    IO &IO, ELFYAML::MIPS_AFL_FLAGS1 &Value) {
+#define BCase(X) IO.bitSetCase(Value, #X, Mips::AFL_FLAGS1_##X);
+  BCase(ODDSPREG)
+#undef BCase
+}
+
 void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
                                                  ELFYAML::FileHeader &FileHdr) {
   IO.mapRequired("Class", FileHdr.Class);
@@ -495,6 +618,7 @@ static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) {
   IO.mapOptional("Address", Section.Address, Hex64(0));
   IO.mapOptional("Link", Section.Link, StringRef());
   IO.mapOptional("AddressAlign", Section.AddressAlign, Hex64(0));
+  IO.mapOptional("Info", Section.Info, StringRef());
 }
 
 static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) {
@@ -505,10 +629,39 @@ static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) {
 
 static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) {
   commonSectionMapping(IO, Section);
-  IO.mapOptional("Info", Section.Info, StringRef());
   IO.mapOptional("Relocations", Section.Relocations);
 }
 
+static void groupSectionMapping(IO &IO, ELFYAML::Group &group) {
+  commonSectionMapping(IO, group);
+  IO.mapRequired("Members", group.Members);
+}
+
+void MappingTraits<ELFYAML::SectionOrType>::mapping(
+    IO &IO, ELFYAML::SectionOrType &sectionOrType) {
+  IO.mapRequired("SectionOrType", sectionOrType.sectionNameOrType);
+}
+
+static void sectionMapping(IO &IO, ELFYAML::MipsABIFlags &Section) {
+  commonSectionMapping(IO, Section);
+  IO.mapOptional("Version", Section.Version, Hex16(0));
+  IO.mapRequired("ISA", Section.ISALevel);
+  IO.mapOptional("ISARevision", Section.ISARevision, Hex8(0));
+  IO.mapOptional("ISAExtension", Section.ISAExtension,
+                 ELFYAML::MIPS_AFL_EXT(Mips::AFL_EXT_NONE));
+  IO.mapOptional("ASEs", Section.ASEs, ELFYAML::MIPS_AFL_ASE(0));
+  IO.mapOptional("FpABI", Section.FpABI,
+                 ELFYAML::MIPS_ABI_FP(Mips::Val_GNU_MIPS_ABI_FP_ANY));
+  IO.mapOptional("GPRSize", Section.GPRSize,
+                 ELFYAML::MIPS_AFL_REG(Mips::AFL_REG_NONE));
+  IO.mapOptional("CPR1Size", Section.CPR1Size,
+                 ELFYAML::MIPS_AFL_REG(Mips::AFL_REG_NONE));
+  IO.mapOptional("CPR2Size", Section.CPR2Size,
+                 ELFYAML::MIPS_AFL_REG(Mips::AFL_REG_NONE));
+  IO.mapOptional("Flags1", Section.Flags1, ELFYAML::MIPS_AFL_FLAGS1(0));
+  IO.mapOptional("Flags2", Section.Flags2, Hex32(0));
+}
+
 void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
     IO &IO, std::unique_ptr<ELFYAML::Section> &Section) {
   ELFYAML::ELF_SHT sectionType;
@@ -524,6 +677,16 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
       Section.reset(new ELFYAML::RelocationSection());
     sectionMapping(IO, *cast<ELFYAML::RelocationSection>(Section.get()));
     break;
+  case ELF::SHT_GROUP:
+    if (!IO.outputting())
+      Section.reset(new ELFYAML::Group());
+    groupSectionMapping(IO, *cast<ELFYAML::Group>(Section.get()));
+    break;
+  case ELF::SHT_MIPS_ABIFLAGS:
+    if (!IO.outputting())
+      Section.reset(new ELFYAML::MipsABIFlags());
+    sectionMapping(IO, *cast<ELFYAML::MipsABIFlags>(Section.get()));
+    break;
   default:
     if (!IO.outputting())
       Section.reset(new ELFYAML::RawContentSection());
@@ -539,12 +702,49 @@ StringRef MappingTraits<std::unique_ptr<ELFYAML::Section>>::validate(
   return "Section size must be greater or equal to the content size";
 }
 
+namespace {
+struct NormalizedMips64RelType {
+  NormalizedMips64RelType(IO &)
+      : Type(ELFYAML::ELF_REL(ELF::R_MIPS_NONE)),
+        Type2(ELFYAML::ELF_REL(ELF::R_MIPS_NONE)),
+        Type3(ELFYAML::ELF_REL(ELF::R_MIPS_NONE)),
+        SpecSym(ELFYAML::ELF_REL(ELF::RSS_UNDEF)) {}
+  NormalizedMips64RelType(IO &, ELFYAML::ELF_REL Original)
+      : Type(Original & 0xFF), Type2(Original >> 8 & 0xFF),
+        Type3(Original >> 16 & 0xFF), SpecSym(Original >> 24 & 0xFF) {}
+
+  ELFYAML::ELF_REL denormalize(IO &) {
+    ELFYAML::ELF_REL Res = Type | Type2 << 8 | Type3 << 16 | SpecSym << 24;
+    return Res;
+  }
+
+  ELFYAML::ELF_REL Type;
+  ELFYAML::ELF_REL Type2;
+  ELFYAML::ELF_REL Type3;
+  ELFYAML::ELF_RSS SpecSym;
+};
+}
+
 void MappingTraits<ELFYAML::Relocation>::mapping(IO &IO,
                                                  ELFYAML::Relocation &Rel) {
+  const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
+  assert(Object && "The IO context is not initialized");
+
   IO.mapRequired("Offset", Rel.Offset);
   IO.mapRequired("Symbol", Rel.Symbol);
-  IO.mapRequired("Type", Rel.Type);
-  IO.mapOptional("Addend", Rel.Addend);
+
+  if (Object->Header.Machine == ELFYAML::ELF_EM(ELF::EM_MIPS) &&
+      Object->Header.Class == ELFYAML::ELF_ELFCLASS(ELF::ELFCLASS64)) {
+    MappingNormalization<NormalizedMips64RelType, ELFYAML::ELF_REL> Key(
+        IO, Rel.Type);
+    IO.mapRequired("Type", Key->Type);
+    IO.mapOptional("Type2", Key->Type2, ELFYAML::ELF_REL(ELF::R_MIPS_NONE));
+    IO.mapOptional("Type3", Key->Type3, ELFYAML::ELF_REL(ELF::R_MIPS_NONE));
+    IO.mapOptional("SpecSym", Key->SpecSym, ELFYAML::ELF_RSS(ELF::RSS_UNDEF));
+  } else
+    IO.mapRequired("Type", Rel.Type);
+
+  IO.mapOptional("Addend", Rel.Addend, (int64_t)0);
 }
 
 void MappingTraits<ELFYAML::Object>::mapping(IO &IO, ELFYAML::Object &Object) {
@@ -556,5 +756,11 @@ void MappingTraits<ELFYAML::Object>::mapping(IO &IO, ELFYAML::Object &Object) {
   IO.setContext(nullptr);
 }
 
+LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_AFL_REG)
+LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_ABI_FP)
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_EXT)
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_ASE)
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_FLAGS1)
+
 } // end namespace yaml
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Object/IRObjectFile.cpp b/contrib/llvm/lib/Object/IRObjectFile.cpp
index 423ed9e..c12c5d4 100644
--- a/contrib/llvm/lib/Object/IRObjectFile.cpp
+++ b/contrib/llvm/lib/Object/IRObjectFile.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Object/IRObjectFile.h"
 #include "RecordStreamer.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/LLVMContext.h"
@@ -24,6 +25,7 @@
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -35,12 +37,9 @@ using namespace object;
 
 IRObjectFile::IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> Mod)
     : SymbolicFile(Binary::ID_IR, Object), M(std::move(Mod)) {
-  // If we have a DataLayout, setup a mangler.
-  const DataLayout *DL = M->getDataLayout();
-  if (!DL)
-    return;
-
-  Mang.reset(new Mangler(DL));
+  // Setup a mangler with the DataLayout.
+  const DataLayout &DL = M->getDataLayout();
+  Mang.reset(new Mangler(&DL));
 
   const std::string &InlineAsm = M->getModuleInlineAsm();
   if (InlineAsm.empty())
@@ -73,6 +72,7 @@ IRObjectFile::IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> Mod)
   MCContext MCCtx(MAI.get(), MRI.get(), &MOFI);
   MOFI.InitMCObjectFileInfo(Triple, Reloc::Default, CodeModel::Default, MCCtx);
   std::unique_ptr<RecordStreamer> Streamer(new RecordStreamer(MCCtx));
+  T->createNullTargetStreamer(*Streamer);
 
   std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm));
   SourceMgr SrcMgr;
@@ -301,7 +301,9 @@ llvm::object::IRObjectFile::create(MemoryBufferRef Object,
   std::unique_ptr<MemoryBuffer> Buff(
       MemoryBuffer::getMemBuffer(BCOrErr.get(), false));
 
-  ErrorOr<Module *> MOrErr = getLazyBitcodeModule(std::move(Buff), Context);
+  ErrorOr<Module *> MOrErr =
+      getLazyBitcodeModule(std::move(Buff), Context, nullptr,
+                           /*ShouldLazyLoadMetadata*/ true);
   if (std::error_code EC = MOrErr.getError())
     return EC;
 
diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp
index e476976..79f8100 100644
--- a/contrib/llvm/lib/Object/MachOObjectFile.cpp
+++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp
@@ -38,8 +38,12 @@ namespace {
   };
 }
 
-template<typename T>
+template <typename T>
 static T getStruct(const MachOObjectFile *O, const char *P) {
+  // Don't read before the beginning or past the end of the file
+  if (P < O->getData().begin() || P + sizeof(T) > O->getData().end())
+    report_fatal_error("Malformed MachO file.");
+
   T Cmd;
   memcpy(&Cmd, P, sizeof(T));
   if (O->isLittleEndian() != sys::IsLittleEndianHost)
@@ -47,15 +51,26 @@ static T getStruct(const MachOObjectFile *O, const char *P) {
   return Cmd;
 }
 
+template <typename SegmentCmd>
+static uint32_t getSegmentLoadCommandNumSections(const SegmentCmd &S,
+                                                 uint32_t Cmdsize) {
+  const unsigned SectionSize = sizeof(SegmentCmd);
+  if (S.nsects > std::numeric_limits<uint32_t>::max() / SectionSize ||
+      S.nsects * SectionSize > Cmdsize - sizeof(S))
+    report_fatal_error(
+        "Number of sections too large for size of load command.");
+  return S.nsects;
+}
+
 static uint32_t
 getSegmentLoadCommandNumSections(const MachOObjectFile *O,
                                  const MachOObjectFile::LoadCommandInfo &L) {
-  if (O->is64Bit()) {
-    MachO::segment_command_64 S = O->getSegment64LoadCommand(L);
-    return S.nsects;
-  }
-  MachO::segment_command S = O->getSegmentLoadCommand(L);
-  return S.nsects;
+  if (O->is64Bit())
+    return getSegmentLoadCommandNumSections(O->getSegment64LoadCommand(L),
+                                            L.C.cmdsize);
+
+  return getSegmentLoadCommandNumSections(O->getSegmentLoadCommand(L),
+                                          L.C.cmdsize);
 }
 
 static bool isPageZeroSegment(const MachOObjectFile *O,
@@ -233,8 +248,9 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
                                  bool Is64bits, std::error_code &EC)
     : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
       SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr),
-      DataInCodeLoadCmd(nullptr), DyldInfoLoadCmd(nullptr),
-      UuidLoadCmd(nullptr), HasPageZeroSegment(false) {
+      DataInCodeLoadCmd(nullptr), LinkOptHintsLoadCmd(nullptr),
+      DyldInfoLoadCmd(nullptr), UuidLoadCmd(nullptr),
+      HasPageZeroSegment(false) {
   uint32_t LoadCommandCount = this->getHeader().ncmds;
   if (LoadCommandCount == 0)
     return;
@@ -265,6 +281,13 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
         return;
       }
       DataInCodeLoadCmd = Load.Ptr;
+    } else if (Load.C.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT) {
+      // Multiple linker optimization hint tables
+      if (LinkOptHintsLoadCmd) {
+        EC = object_error::parse_failed;
+        return;
+      }
+      LinkOptHintsLoadCmd = Load.Ptr;
     } else if (Load.C.cmd == MachO::LC_DYLD_INFO || 
                Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) {
       // Multiple dyldinfo load commands
@@ -281,6 +304,12 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
       }
       UuidLoadCmd = Load.Ptr;
     } else if (Load.C.cmd == SegmentLoadType) {
+      const unsigned SegmentLoadSize = this->is64Bit()
+                                           ? sizeof(MachO::segment_command_64)
+                                           : sizeof(MachO::segment_command);
+      if (Load.C.cmdsize < SegmentLoadSize)
+        report_fatal_error("Segment load command size is too small.");
+
       uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
       for (unsigned J = 0; J < NumSections; ++J) {
         const char *Sec = getSectionPtr(this, Load, J);
@@ -315,6 +344,9 @@ std::error_code MachOObjectFile::getSymbolName(DataRefImpl Symb,
   StringRef StringTable = getStringTableData();
   MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb);
   const char *Start = &StringTable.data()[Entry.n_strx];
+  if (Start < getData().begin() || Start >= getData().end())
+    report_fatal_error(
+        "Symbol name entry points before beginning or past end of file.");
   Res = StringRef(Start);
   return object_error::success;
 }
@@ -383,49 +415,13 @@ std::error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI,
 
 std::error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
                                                uint64_t &Result) const {
-  uint64_t BeginOffset;
-  uint64_t EndOffset = 0;
-  uint8_t SectionIndex;
-
-  MachO::nlist_base Entry = getSymbolTableEntryBase(this, DRI);
   uint64_t Value;
   getSymbolAddress(DRI, Value);
-  if (Value == UnknownAddressOrSize) {
+  uint32_t flags = getSymbolFlags(DRI);
+  if (flags & SymbolRef::SF_Common)
+    Result = Value;
+  else
     Result = UnknownAddressOrSize;
-    return object_error::success;
-  }
-
-  BeginOffset = Value;
-
-  SectionIndex = Entry.n_sect;
-  if (!SectionIndex) {
-    uint32_t flags = getSymbolFlags(DRI);
-    if (flags & SymbolRef::SF_Common)
-      Result = Value;
-    else
-      Result = UnknownAddressOrSize;
-    return object_error::success;
-  }
-  // Unfortunately symbols are unsorted so we need to touch all
-  // symbols from load command
-  for (const SymbolRef &Symbol : symbols()) {
-    DataRefImpl DRI = Symbol.getRawDataRefImpl();
-    Entry = getSymbolTableEntryBase(this, DRI);
-    getSymbolAddress(DRI, Value);
-    if (Value == UnknownAddressOrSize)
-      continue;
-    if (Entry.n_sect == SectionIndex && Value > BeginOffset)
-      if (!EndOffset || Value < EndOffset)
-        EndOffset = Value;
-  }
-  if (!EndOffset) {
-    DataRefImpl Sec;
-    Sec.d.a = SectionIndex-1;
-    uint64_t Size = getSectionSize(Sec);
-    EndOffset = getSectionAddress(Sec);
-    EndOffset += Size;
-  }
-  Result = EndOffset - BeginOffset;
   return object_error::success;
 }
 
@@ -478,6 +474,9 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
       if (Value && Value != UnknownAddressOrSize)
         Result |= SymbolRef::SF_Common;
     }
+
+    if (!(MachOType & MachO::N_PEXT))
+      Result |= SymbolRef::SF_Exported;
   }
 
   if (MachOFlags & (MachO::N_WEAK_REF | MachO::N_WEAK_DEF))
@@ -502,6 +501,8 @@ std::error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
   } else {
     DataRefImpl DRI;
     DRI.d.a = index - 1;
+    if (DRI.d.a >= Sections.size())
+      report_fatal_error("getSymbolSection: Invalid section index.");
     Res = section_iterator(SectionRef(DRI, this));
   }
 
@@ -675,6 +676,11 @@ MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
   return symbol_iterator(SymbolRef(Sym, this));
 }
 
+section_iterator
+MachOObjectFile::getRelocationSection(DataRefImpl Rel) const {
+  return section_iterator(getAnyRelocationSection(getRelocation(Rel)));
+}
+
 std::error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
                                                    uint64_t &Res) const {
   MachO::any_relocation_info RE = getRelocation(Rel);
@@ -1201,7 +1207,8 @@ basic_symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const {
     return basic_symbol_iterator(SymbolRef(DRI, this));
 
   MachO::symtab_command Symtab = getSymtabLoadCommand();
-  assert(Index < Symtab.nsyms && "Requested symbol index is out of range.");
+  if (Index >= Symtab.nsyms)
+    report_fatal_error("Requested symbol index is out of range.");
   unsigned SymbolTableEntrySize =
     is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
   DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Symtab.symoff));
@@ -1501,7 +1508,7 @@ bool ExportEntry::operator==(const ExportEntry &Other) const {
   if (Stack.size() != Other.Stack.size())
     return false;
   // Not equal if different cumulative strings.
-  if (!CumulativeString.str().equals(Other.CumulativeString.str()))
+  if (!CumulativeString.equals(Other.CumulativeString))
     return false;
   // Equal if all nodes in both stacks match.
   for (unsigned i=0; i < Stack.size(); ++i) {
@@ -1523,7 +1530,7 @@ uint64_t ExportEntry::readULEB128(const uint8_t *&Ptr) {
 }
 
 StringRef ExportEntry::name() const {
-  return CumulativeString.str();
+  return CumulativeString;
 }
 
 uint64_t ExportEntry::flags() const {
@@ -2105,6 +2112,7 @@ MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
 
 ArrayRef<char>
 MachOObjectFile::getSectionRawName(DataRefImpl Sec) const {
+  assert(Sec.d.a < Sections.size() && "Should have detected this earlier");
   const section_base *Base =
     reinterpret_cast<const section_base *>(Sections[Sec.d.a]);
   return makeArrayRef(Base->sectname);
@@ -2112,6 +2120,7 @@ MachOObjectFile::getSectionRawName(DataRefImpl Sec) const {
 
 ArrayRef<char>
 MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const {
+  assert(Sec.d.a < Sections.size() && "Should have detected this earlier");
   const section_base *Base =
     reinterpret_cast<const section_base *>(Sections[Sec.d.a]);
   return makeArrayRef(Base->segname);
@@ -2184,7 +2193,7 @@ MachOObjectFile::getAnyRelocationType(
 }
 
 SectionRef
-MachOObjectFile::getRelocationSection(
+MachOObjectFile::getAnyRelocationSection(
                                    const MachO::any_relocation_info &RE) const {
   if (isRelocationScattered(RE) || getPlainRelocationExternal(RE))
     return *section_end();
@@ -2202,6 +2211,8 @@ MachOObjectFile::getFirstLoadCommandInfo() const {
                                     sizeof(MachO::mach_header);
   Load.Ptr = getPtr(this, HeaderSize);
   Load.C = getStruct<MachO::load_command>(this, Load.Ptr);
+  if (Load.C.cmdsize < 8)
+    report_fatal_error("Load command with size < 8 bytes.");
   return Load;
 }
 
@@ -2210,14 +2221,18 @@ MachOObjectFile::getNextLoadCommandInfo(const LoadCommandInfo &L) const {
   MachOObjectFile::LoadCommandInfo Next;
   Next.Ptr = L.Ptr + L.C.cmdsize;
   Next.C = getStruct<MachO::load_command>(this, Next.Ptr);
+  if (Next.C.cmdsize < 8)
+    report_fatal_error("Load command with size < 8 bytes.");
   return Next;
 }
 
 MachO::section MachOObjectFile::getSection(DataRefImpl DRI) const {
+  assert(DRI.d.a < Sections.size() && "Should have detected this earlier");
   return getStruct<MachO::section>(this, Sections[DRI.d.a]);
 }
 
 MachO::section_64 MachOObjectFile::getSection64(DataRefImpl DRI) const {
+  assert(DRI.d.a < Sections.size() && "Should have detected this earlier");
   return getStruct<MachO::section_64>(this, Sections[DRI.d.a]);
 }
 
@@ -2455,6 +2470,21 @@ MachOObjectFile::getDataInCodeLoadCommand() const {
   return Cmd;
 }
 
+MachO::linkedit_data_command
+MachOObjectFile::getLinkOptHintsLoadCommand() const {
+  if (LinkOptHintsLoadCmd)
+    return getStruct<MachO::linkedit_data_command>(this, LinkOptHintsLoadCmd);
+
+  // If there is no LinkOptHintsLoadCmd return a load command with zero'ed
+  // fields.
+  MachO::linkedit_data_command Cmd;
+  Cmd.cmd = MachO::LC_LINKER_OPTIMIZATION_HINT;
+  Cmd.cmdsize = sizeof(MachO::linkedit_data_command);
+  Cmd.dataoff = 0;
+  Cmd.datasize = 0;
+  return Cmd;
+}
+
 ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const {
   if (!DyldInfoLoadCmd) 
     return ArrayRef<uint8_t>();
diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp
index fd78271..01b7654 100644
--- a/contrib/llvm/lib/Object/ObjectFile.cpp
+++ b/contrib/llvm/lib/Object/ObjectFile.cpp
@@ -76,6 +76,7 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type) {
   case sys::fs::file_magic::macho_bundle:
   case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
   case sys::fs::file_magic::macho_dsym_companion:
+  case sys::fs::file_magic::macho_kext_bundle:
     return createMachOObjectFile(Object);
   case sys::fs::file_magic::coff_object:
   case sys::fs::file_magic::coff_import_library:
diff --git a/contrib/llvm/lib/Object/RecordStreamer.cpp b/contrib/llvm/lib/Object/RecordStreamer.cpp
index 081fadd..42dbd3e 100644
--- a/contrib/llvm/lib/Object/RecordStreamer.cpp
+++ b/contrib/llvm/lib/Object/RecordStreamer.cpp
@@ -89,7 +89,7 @@ bool RecordStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
   return true;
 }
 
-void RecordStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+void RecordStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
                                   uint64_t Size, unsigned ByteAlignment) {
   markDefined(*Symbol);
 }
diff --git a/contrib/llvm/lib/Object/RecordStreamer.h b/contrib/llvm/lib/Object/RecordStreamer.h
index 7dacbdf..d861061 100644
--- a/contrib/llvm/lib/Object/RecordStreamer.h
+++ b/contrib/llvm/lib/Object/RecordStreamer.h
@@ -33,7 +33,7 @@ public:
   void EmitLabel(MCSymbol *Symbol) override;
   void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
   bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
-  void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, uint64_t Size,
+  void EmitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size,
                     unsigned ByteAlignment) override;
   void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                         unsigned ByteAlignment) override;
diff --git a/contrib/llvm/lib/Object/SymbolicFile.cpp b/contrib/llvm/lib/Object/SymbolicFile.cpp
index de98a12..854e68e 100644
--- a/contrib/llvm/lib/Object/SymbolicFile.cpp
+++ b/contrib/llvm/lib/Object/SymbolicFile.cpp
@@ -53,6 +53,7 @@ ErrorOr<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile(
   case sys::fs::file_magic::macho_bundle:
   case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
   case sys::fs::file_magic::macho_dsym_companion:
+  case sys::fs::file_magic::macho_kext_bundle:
   case sys::fs::file_magic::coff_import_library:
   case sys::fs::file_magic::pecoff_executable:
     return ObjectFile::createObjectFile(Object, Type);
diff --git a/contrib/llvm/lib/Option/Arg.cpp b/contrib/llvm/lib/Option/Arg.cpp
index 4c8da58..ac00073 100644
--- a/contrib/llvm/lib/Option/Arg.cpp
+++ b/contrib/llvm/lib/Option/Arg.cpp
@@ -17,22 +17,21 @@
 using namespace llvm;
 using namespace llvm::opt;
 
-Arg::Arg(const Option _Opt, StringRef S, unsigned _Index, const Arg *_BaseArg)
-  : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
-    Claimed(false), OwnsValues(false) {
-}
-
-Arg::Arg(const Option _Opt, StringRef S, unsigned _Index,
-         const char *Value0, const Arg *_BaseArg)
-  : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
-    Claimed(false), OwnsValues(false) {
+Arg::Arg(const Option Opt, StringRef S, unsigned Index, const Arg *BaseArg)
+    : Opt(Opt), BaseArg(BaseArg), Spelling(S), Index(Index), Claimed(false),
+      OwnsValues(false) {}
+
+Arg::Arg(const Option Opt, StringRef S, unsigned Index, const char *Value0,
+         const Arg *BaseArg)
+    : Opt(Opt), BaseArg(BaseArg), Spelling(S), Index(Index), Claimed(false),
+      OwnsValues(false) {
   Values.push_back(Value0);
 }
 
-Arg::Arg(const Option _Opt, StringRef S, unsigned _Index,
-         const char *Value0, const char *Value1, const Arg *_BaseArg)
-  : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
-    Claimed(false), OwnsValues(false) {
+Arg::Arg(const Option Opt, StringRef S, unsigned Index, const char *Value0,
+         const char *Value1, const Arg *BaseArg)
+    : Opt(Opt), BaseArg(BaseArg), Spelling(S), Index(Index), Claimed(false),
+      OwnsValues(false) {
   Values.push_back(Value0);
   Values.push_back(Value1);
 }
@@ -83,15 +82,13 @@ void Arg::renderAsInput(const ArgList &Args, ArgStringList &Output) const {
     return;
   }
 
-  for (unsigned i = 0, e = getNumValues(); i != e; ++i)
-    Output.push_back(getValue(i));
+  Output.append(Values.begin(), Values.end());
 }
 
 void Arg::render(const ArgList &Args, ArgStringList &Output) const {
   switch (getOption().getRenderStyle()) {
   case Option::RenderValuesStyle:
-    for (unsigned i = 0, e = getNumValues(); i != e; ++i)
-      Output.push_back(getValue(i));
+    Output.append(Values.begin(), Values.end());
     break;
 
   case Option::RenderCommaJoinedStyle: {
@@ -109,14 +106,12 @@ void Arg::render(const ArgList &Args, ArgStringList &Output) const {
  case Option::RenderJoinedStyle:
     Output.push_back(Args.GetOrMakeJoinedArgString(
                        getIndex(), getSpelling(), getValue(0)));
-    for (unsigned i = 1, e = getNumValues(); i != e; ++i)
-      Output.push_back(getValue(i));
+    Output.append(Values.begin() + 1, Values.end());
     break;
 
   case Option::RenderSeparateStyle:
     Output.push_back(Args.MakeArgString(getSpelling()));
-    for (unsigned i = 0, e = getNumValues(); i != e; ++i)
-      Output.push_back(getValue(i));
+    Output.append(Values.begin(), Values.end());
     break;
   }
 }
diff --git a/contrib/llvm/lib/Option/ArgList.cpp b/contrib/llvm/lib/Option/ArgList.cpp
index b1a916d..b771a18 100644
--- a/contrib/llvm/lib/Option/ArgList.cpp
+++ b/contrib/llvm/lib/Option/ArgList.cpp
@@ -63,6 +63,26 @@ Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1) const {
   return nullptr;
 }
 
+Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1,
+                                OptSpecifier Id2) const {
+  // FIXME: Make search efficient?
+  for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it)
+    if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2))
+      return *it;
+  return nullptr;
+}
+
+Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1,
+                                OptSpecifier Id2, OptSpecifier Id3) const {
+  // FIXME: Make search efficient?
+  for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it)
+    if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2) || (*it)->getOption().matches(Id3))
+      return *it;
+  return nullptr;
+}
+
 Arg *ArgList::getLastArg(OptSpecifier Id) const {
   Arg *Res = nullptr;
   for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
@@ -253,8 +273,8 @@ void ArgList::AddAllArgValues(ArgStringList &Output, OptSpecifier Id0,
                               OptSpecifier Id1, OptSpecifier Id2) const {
   for (auto Arg : filtered(Id0, Id1, Id2)) {
     Arg->claim();
-    for (unsigned i = 0, e = Arg->getNumValues(); i != e; ++i)
-      Output.push_back(Arg->getValue(i));
+    const auto &Values = Arg->getValues();
+    Output.append(Values.begin(), Values.end());
   }
 }
 
@@ -285,11 +305,6 @@ void ArgList::ClaimAllArgs() const {
       (*it)->claim();
 }
 
-const char *ArgList::MakeArgString(const Twine &T) const {
-  SmallString<256> Str;
-  return MakeArgString(T.toStringRef(Str));
-}
-
 const char *ArgList::GetOrMakeJoinedArgString(unsigned Index,
                                               StringRef LHS,
                                               StringRef RHS) const {
@@ -334,19 +349,18 @@ unsigned InputArgList::MakeIndex(StringRef String0,
   return Index0;
 }
 
-const char *InputArgList::MakeArgString(StringRef Str) const {
+const char *InputArgList::MakeArgStringRef(StringRef Str) const {
   return getArgString(MakeIndex(Str));
 }
 
 //
 
-DerivedArgList::DerivedArgList(const InputArgList &_BaseArgs)
-  : BaseArgs(_BaseArgs) {
-}
+DerivedArgList::DerivedArgList(const InputArgList &BaseArgs)
+    : BaseArgs(BaseArgs) {}
 
 DerivedArgList::~DerivedArgList() {}
 
-const char *DerivedArgList::MakeArgString(StringRef Str) const {
+const char *DerivedArgList::MakeArgStringRef(StringRef Str) const {
   return BaseArgs.MakeArgString(Str);
 }
 
@@ -381,7 +395,7 @@ Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt,
 
 Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt,
                                    StringRef Value) const {
-  unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str());
+  unsigned Index = BaseArgs.MakeIndex((Opt.getName() + Value).str());
   SynthesizedArgs.push_back(make_unique<Arg>(
       Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index,
       BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg));
diff --git a/contrib/llvm/lib/Option/OptTable.cpp b/contrib/llvm/lib/Option/OptTable.cpp
index dca02c17..96ba183 100644
--- a/contrib/llvm/lib/Option/OptTable.cpp
+++ b/contrib/llvm/lib/Option/OptTable.cpp
@@ -84,15 +84,11 @@ static inline bool operator<(const OptTable::Info &I, const char *Name) {
 
 OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
 
-OptTable::OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos,
-                   bool _IgnoreCase)
-  : OptionInfos(_OptionInfos),
-    NumOptionInfos(_NumOptionInfos),
-    IgnoreCase(_IgnoreCase),
-    TheInputOptionID(0),
-    TheUnknownOptionID(0),
-    FirstSearchableIndex(0)
-{
+OptTable::OptTable(const Info *OptionInfos, unsigned NumOptionInfos,
+                   bool IgnoreCase)
+    : OptionInfos(OptionInfos), NumOptionInfos(NumOptionInfos),
+      IgnoreCase(IgnoreCase), TheInputOptionID(0), TheUnknownOptionID(0),
+      FirstSearchableIndex(0) {
   // Explicitly zero initialize the error to work around a bug in array
   // value-initialization on MinGW with gcc 4.3.5.
 
diff --git a/contrib/llvm/lib/Option/Option.cpp b/contrib/llvm/lib/Option/Option.cpp
index cdc63c3..221414d 100644
--- a/contrib/llvm/lib/Option/Option.cpp
+++ b/contrib/llvm/lib/Option/Option.cpp
@@ -35,9 +35,6 @@ Option::Option(const OptTable::Info *info, const OptTable *owner)
   }
 }
 
-Option::~Option() {
-}
-
 void Option::dump() const {
   llvm::errs() << "<";
   switch (getKind()) {
@@ -128,6 +125,11 @@ Arg *Option::accept(const ArgList &Args,
         Val += strlen(Val) + 1;
       }
     }
+
+    if (UnaliasedOption.getKind() == JoinedClass && !getAliasArgs())
+      // A Flag alias for a Joined option must provide an argument.
+      A->getValues().push_back("");
+
     return A;
   }
   case JoinedClass: {
diff --git a/contrib/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm/lib/Passes/PassBuilder.cpp
new file mode 100644
index 0000000..ba71320
--- /dev/null
+++ b/contrib/llvm/lib/Passes/PassBuilder.cpp
@@ -0,0 +1,412 @@
+//===- Parsing, selection, and construction of pass pipelines -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides the implementation of the PassBuilder based on our
+/// static pass registry as well as related functionality. It also provides
+/// helpers to aid in analyzing, debugging, and testing passes and pass
+/// pipelines.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
+#include "llvm/Transforms/Scalar/SimplifyCFG.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief No-op module pass which does nothing.
+struct NoOpModulePass {
+  PreservedAnalyses run(Module &M) { return PreservedAnalyses::all(); }
+  static StringRef name() { return "NoOpModulePass"; }
+};
+
+/// \brief No-op module analysis.
+struct NoOpModuleAnalysis {
+  struct Result {};
+  Result run(Module &) { return Result(); }
+  static StringRef name() { return "NoOpModuleAnalysis"; }
+  static void *ID() { return (void *)&PassID; }
+private:
+  static char PassID;
+};
+
+char NoOpModuleAnalysis::PassID;
+
+/// \brief No-op CGSCC pass which does nothing.
+struct NoOpCGSCCPass {
+  PreservedAnalyses run(LazyCallGraph::SCC &C) {
+    return PreservedAnalyses::all();
+  }
+  static StringRef name() { return "NoOpCGSCCPass"; }
+};
+
+/// \brief No-op CGSCC analysis.
+struct NoOpCGSCCAnalysis {
+  struct Result {};
+  Result run(LazyCallGraph::SCC &) { return Result(); }
+  static StringRef name() { return "NoOpCGSCCAnalysis"; }
+  static void *ID() { return (void *)&PassID; }
+private:
+  static char PassID;
+};
+
+char NoOpCGSCCAnalysis::PassID;
+
+/// \brief No-op function pass which does nothing.
+struct NoOpFunctionPass {
+  PreservedAnalyses run(Function &F) { return PreservedAnalyses::all(); }
+  static StringRef name() { return "NoOpFunctionPass"; }
+};
+
+/// \brief No-op function analysis.
+struct NoOpFunctionAnalysis {
+  struct Result {};
+  Result run(Function &) { return Result(); }
+  static StringRef name() { return "NoOpFunctionAnalysis"; }
+  static void *ID() { return (void *)&PassID; }
+private:
+  static char PassID;
+};
+
+char NoOpFunctionAnalysis::PassID;
+
+} // End anonymous namespace.
+
+void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) {
+#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
+  MAM.registerPass(CREATE_PASS);
+#include "PassRegistry.def"
+}
+
+void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) {
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
+  CGAM.registerPass(CREATE_PASS);
+#include "PassRegistry.def"
+}
+
+void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) {
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
+  FAM.registerPass(CREATE_PASS);
+#include "PassRegistry.def"
+}
+
+#ifndef NDEBUG
+static bool isModulePassName(StringRef Name) {
+#define MODULE_PASS(NAME, CREATE_PASS) if (Name == NAME) return true;
+#define MODULE_ANALYSIS(NAME, CREATE_PASS)                                     \
+  if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">")           \
+    return true;
+#include "PassRegistry.def"
+
+  return false;
+}
+#endif
+
+static bool isCGSCCPassName(StringRef Name) {
+#define CGSCC_PASS(NAME, CREATE_PASS) if (Name == NAME) return true;
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS)                                      \
+  if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">")           \
+    return true;
+#include "PassRegistry.def"
+
+  return false;
+}
+
+static bool isFunctionPassName(StringRef Name) {
+#define FUNCTION_PASS(NAME, CREATE_PASS) if (Name == NAME) return true;
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)                                   \
+  if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">")           \
+    return true;
+#include "PassRegistry.def"
+
+  return false;
+}
+
+bool PassBuilder::parseModulePassName(ModulePassManager &MPM, StringRef Name) {
+#define MODULE_PASS(NAME, CREATE_PASS)                                         \
+  if (Name == NAME) {                                                          \
+    MPM.addPass(CREATE_PASS);                                                  \
+    return true;                                                               \
+  }
+#define MODULE_ANALYSIS(NAME, CREATE_PASS)                                     \
+  if (Name == "require<" NAME ">") {                                           \
+    MPM.addPass(RequireAnalysisPass<decltype(CREATE_PASS)>());                 \
+    return true;                                                               \
+  }                                                                            \
+  if (Name == "invalidate<" NAME ">") {                                        \
+    MPM.addPass(InvalidateAnalysisPass<decltype(CREATE_PASS)>());              \
+    return true;                                                               \
+  }
+#include "PassRegistry.def"
+
+  return false;
+}
+
+bool PassBuilder::parseCGSCCPassName(CGSCCPassManager &CGPM, StringRef Name) {
+#define CGSCC_PASS(NAME, CREATE_PASS)                                          \
+  if (Name == NAME) {                                                          \
+    CGPM.addPass(CREATE_PASS);                                                 \
+    return true;                                                               \
+  }
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS)                                      \
+  if (Name == "require<" NAME ">") {                                           \
+    CGPM.addPass(RequireAnalysisPass<decltype(CREATE_PASS)>());                \
+    return true;                                                               \
+  }                                                                            \
+  if (Name == "invalidate<" NAME ">") {                                        \
+    CGPM.addPass(InvalidateAnalysisPass<decltype(CREATE_PASS)>());             \
+    return true;                                                               \
+  }
+#include "PassRegistry.def"
+
+  return false;
+}
+
+bool PassBuilder::parseFunctionPassName(FunctionPassManager &FPM,
+                                        StringRef Name) {
+#define FUNCTION_PASS(NAME, CREATE_PASS)                                       \
+  if (Name == NAME) {                                                          \
+    FPM.addPass(CREATE_PASS);                                                  \
+    return true;                                                               \
+  }
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)                                   \
+  if (Name == "require<" NAME ">") {                                           \
+    FPM.addPass(RequireAnalysisPass<decltype(CREATE_PASS)>());                 \
+    return true;                                                               \
+  }                                                                            \
+  if (Name == "invalidate<" NAME ">") {                                        \
+    FPM.addPass(InvalidateAnalysisPass<decltype(CREATE_PASS)>());              \
+    return true;                                                               \
+  }
+#include "PassRegistry.def"
+
+  return false;
+}
+
+bool PassBuilder::parseFunctionPassPipeline(FunctionPassManager &FPM,
+                                            StringRef &PipelineText,
+                                            bool VerifyEachPass,
+                                            bool DebugLogging) {
+  for (;;) {
+    // Parse nested pass managers by recursing.
+    if (PipelineText.startswith("function(")) {
+      FunctionPassManager NestedFPM(DebugLogging);
+
+      // Parse the inner pipeline inte the nested manager.
+      PipelineText = PipelineText.substr(strlen("function("));
+      if (!parseFunctionPassPipeline(NestedFPM, PipelineText, VerifyEachPass,
+                                     DebugLogging) ||
+          PipelineText.empty())
+        return false;
+      assert(PipelineText[0] == ')');
+      PipelineText = PipelineText.substr(1);
+
+      // Add the nested pass manager with the appropriate adaptor.
+      FPM.addPass(std::move(NestedFPM));
+    } else {
+      // Otherwise try to parse a pass name.
+      size_t End = PipelineText.find_first_of(",)");
+      if (!parseFunctionPassName(FPM, PipelineText.substr(0, End)))
+        return false;
+      if (VerifyEachPass)
+        FPM.addPass(VerifierPass());
+
+      PipelineText = PipelineText.substr(End);
+    }
+
+    if (PipelineText.empty() || PipelineText[0] == ')')
+      return true;
+
+    assert(PipelineText[0] == ',');
+    PipelineText = PipelineText.substr(1);
+  }
+}
+
+bool PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
+                                         StringRef &PipelineText,
+                                         bool VerifyEachPass,
+                                         bool DebugLogging) {
+  for (;;) {
+    // Parse nested pass managers by recursing.
+    if (PipelineText.startswith("cgscc(")) {
+      CGSCCPassManager NestedCGPM(DebugLogging);
+
+      // Parse the inner pipeline into the nested manager.
+      PipelineText = PipelineText.substr(strlen("cgscc("));
+      if (!parseCGSCCPassPipeline(NestedCGPM, PipelineText, VerifyEachPass,
+                                  DebugLogging) ||
+          PipelineText.empty())
+        return false;
+      assert(PipelineText[0] == ')');
+      PipelineText = PipelineText.substr(1);
+
+      // Add the nested pass manager with the appropriate adaptor.
+      CGPM.addPass(std::move(NestedCGPM));
+    } else if (PipelineText.startswith("function(")) {
+      FunctionPassManager NestedFPM(DebugLogging);
+
+      // Parse the inner pipeline inte the nested manager.
+      PipelineText = PipelineText.substr(strlen("function("));
+      if (!parseFunctionPassPipeline(NestedFPM, PipelineText, VerifyEachPass,
+                                     DebugLogging) ||
+          PipelineText.empty())
+        return false;
+      assert(PipelineText[0] == ')');
+      PipelineText = PipelineText.substr(1);
+
+      // Add the nested pass manager with the appropriate adaptor.
+      CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(NestedFPM)));
+    } else {
+      // Otherwise try to parse a pass name.
+      size_t End = PipelineText.find_first_of(",)");
+      if (!parseCGSCCPassName(CGPM, PipelineText.substr(0, End)))
+        return false;
+      // FIXME: No verifier support for CGSCC passes!
+
+      PipelineText = PipelineText.substr(End);
+    }
+
+    if (PipelineText.empty() || PipelineText[0] == ')')
+      return true;
+
+    assert(PipelineText[0] == ',');
+    PipelineText = PipelineText.substr(1);
+  }
+}
+
+bool PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
+                                          StringRef &PipelineText,
+                                          bool VerifyEachPass,
+                                          bool DebugLogging) {
+  for (;;) {
+    // Parse nested pass managers by recursing.
+    if (PipelineText.startswith("module(")) {
+      ModulePassManager NestedMPM(DebugLogging);
+
+      // Parse the inner pipeline into the nested manager.
+      PipelineText = PipelineText.substr(strlen("module("));
+      if (!parseModulePassPipeline(NestedMPM, PipelineText, VerifyEachPass,
+                                   DebugLogging) ||
+          PipelineText.empty())
+        return false;
+      assert(PipelineText[0] == ')');
+      PipelineText = PipelineText.substr(1);
+
+      // Now add the nested manager as a module pass.
+      MPM.addPass(std::move(NestedMPM));
+    } else if (PipelineText.startswith("cgscc(")) {
+      CGSCCPassManager NestedCGPM(DebugLogging);
+
+      // Parse the inner pipeline inte the nested manager.
+      PipelineText = PipelineText.substr(strlen("cgscc("));
+      if (!parseCGSCCPassPipeline(NestedCGPM, PipelineText, VerifyEachPass,
+                                  DebugLogging) ||
+          PipelineText.empty())
+        return false;
+      assert(PipelineText[0] == ')');
+      PipelineText = PipelineText.substr(1);
+
+      // Add the nested pass manager with the appropriate adaptor.
+      MPM.addPass(
+          createModuleToPostOrderCGSCCPassAdaptor(std::move(NestedCGPM)));
+    } else if (PipelineText.startswith("function(")) {
+      FunctionPassManager NestedFPM(DebugLogging);
+
+      // Parse the inner pipeline inte the nested manager.
+      PipelineText = PipelineText.substr(strlen("function("));
+      if (!parseFunctionPassPipeline(NestedFPM, PipelineText, VerifyEachPass,
+                                     DebugLogging) ||
+          PipelineText.empty())
+        return false;
+      assert(PipelineText[0] == ')');
+      PipelineText = PipelineText.substr(1);
+
+      // Add the nested pass manager with the appropriate adaptor.
+      MPM.addPass(createModuleToFunctionPassAdaptor(std::move(NestedFPM)));
+    } else {
+      // Otherwise try to parse a pass name.
+      size_t End = PipelineText.find_first_of(",)");
+      if (!parseModulePassName(MPM, PipelineText.substr(0, End)))
+        return false;
+      if (VerifyEachPass)
+        MPM.addPass(VerifierPass());
+
+      PipelineText = PipelineText.substr(End);
+    }
+
+    if (PipelineText.empty() || PipelineText[0] == ')')
+      return true;
+
+    assert(PipelineText[0] == ',');
+    PipelineText = PipelineText.substr(1);
+  }
+}
+
+// Primary pass pipeline description parsing routine.
+// FIXME: Should this routine accept a TargetMachine or require the caller to
+// pre-populate the analysis managers with target-specific stuff?
+bool PassBuilder::parsePassPipeline(ModulePassManager &MPM,
+                                    StringRef PipelineText, bool VerifyEachPass,
+                                    bool DebugLogging) {
+  // By default, try to parse the pipeline as-if it were within an implicit
+  // 'module(...)' pass pipeline. If this will parse at all, it needs to
+  // consume the entire string.
+  if (parseModulePassPipeline(MPM, PipelineText, VerifyEachPass, DebugLogging))
+    return PipelineText.empty();
+
+  // This isn't parsable as a module pipeline, look for the end of a pass name
+  // and directly drop down to that layer.
+  StringRef FirstName =
+      PipelineText.substr(0, PipelineText.find_first_of(",)"));
+  assert(!isModulePassName(FirstName) &&
+         "Already handled all module pipeline options.");
+
+  // If this looks like a CGSCC pass, parse the whole thing as a CGSCC
+  // pipeline.
+  if (isCGSCCPassName(FirstName)) {
+    CGSCCPassManager CGPM(DebugLogging);
+    if (!parseCGSCCPassPipeline(CGPM, PipelineText, VerifyEachPass,
+                                DebugLogging) ||
+        !PipelineText.empty())
+      return false;
+    MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+    return true;
+  }
+
+  // Similarly, if this looks like a Function pass, parse the whole thing as
+  // a Function pipelien.
+  if (isFunctionPassName(FirstName)) {
+    FunctionPassManager FPM(DebugLogging);
+    if (!parseFunctionPassPipeline(FPM, PipelineText, VerifyEachPass,
+                                   DebugLogging) ||
+        !PipelineText.empty())
+      return false;
+    MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+    return true;
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Passes/PassRegistry.def b/contrib/llvm/lib/Passes/PassRegistry.def
new file mode 100644
index 0000000..d768a3a
--- /dev/null
+++ b/contrib/llvm/lib/Passes/PassRegistry.def
@@ -0,0 +1,77 @@
+//===- PassRegistry.def - Registry of passes --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is used as the registry of passes that are part of the core LLVM
+// libraries. This file describes both transformation passes and analyses
+// Analyses are registered while transformation passes have names registered
+// that can be used when providing a textual pass pipeline.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef MODULE_ANALYSIS
+#define MODULE_ANALYSIS(NAME, CREATE_PASS)
+#endif
+MODULE_ANALYSIS("lcg", LazyCallGraphAnalysis())
+MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis())
+MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
+#undef MODULE_ANALYSIS
+
+#ifndef MODULE_PASS
+#define MODULE_PASS(NAME, CREATE_PASS)
+#endif
+MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
+MODULE_PASS("no-op-module", NoOpModulePass())
+MODULE_PASS("print", PrintModulePass(dbgs()))
+MODULE_PASS("print-cg", LazyCallGraphPrinterPass(dbgs()))
+MODULE_PASS("verify", VerifierPass())
+#undef MODULE_PASS
+
+#ifndef CGSCC_ANALYSIS
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS)
+#endif
+CGSCC_ANALYSIS("no-op-cgscc", NoOpCGSCCAnalysis())
+#undef CGSCC_ANALYSIS
+
+#ifndef CGSCC_PASS
+#define CGSCC_PASS(NAME, CREATE_PASS)
+#endif
+CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
+CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
+#undef CGSCC_PASS
+
+#ifndef FUNCTION_ANALYSIS
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)
+#endif
+FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis())
+FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis())
+FUNCTION_ANALYSIS("loops", LoopAnalysis())
+FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis())
+FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
+FUNCTION_ANALYSIS("targetir",
+                  TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis())
+#undef FUNCTION_ANALYSIS
+
+#ifndef FUNCTION_PASS
+#define FUNCTION_PASS(NAME, CREATE_PASS)
+#endif
+FUNCTION_PASS("early-cse", EarlyCSEPass())
+FUNCTION_PASS("instcombine", InstCombinePass())
+FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
+FUNCTION_PASS("no-op-function", NoOpFunctionPass())
+FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
+FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
+FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
+FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
+FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs()))
+FUNCTION_PASS("simplify-cfg", SimplifyCFGPass())
+FUNCTION_PASS("verify", VerifierPass())
+FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
+#undef FUNCTION_PASS
diff --git a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
index 1752777..bbac5c2 100644
--- a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
@@ -15,11 +15,14 @@
 #include "llvm/ProfileData/CoverageMapping.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ProfileData/CoverageMappingReader.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace coverage;
@@ -177,32 +180,47 @@ void FunctionRecordIterator::skipOtherFiles() {
     *this = FunctionRecordIterator();
 }
 
+/// Get the function name from the record, removing the filename prefix if
+/// necessary.
+static StringRef getFuncNameWithoutPrefix(const CoverageMappingRecord &Record) {
+  StringRef FunctionName = Record.FunctionName;
+  if (Record.Filenames.empty())
+    return FunctionName;
+  StringRef Filename = sys::path::filename(Record.Filenames[0]);
+  if (FunctionName.startswith(Filename))
+    FunctionName = FunctionName.drop_front(Filename.size() + 1);
+  return FunctionName;
+}
+
 ErrorOr<std::unique_ptr<CoverageMapping>>
-CoverageMapping::load(ObjectFileCoverageMappingReader &CoverageReader,
+CoverageMapping::load(CoverageMappingReader &CoverageReader,
                       IndexedInstrProfReader &ProfileReader) {
   auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping());
 
   std::vector<uint64_t> Counts;
   for (const auto &Record : CoverageReader) {
+    CounterMappingContext Ctx(Record.Expressions);
+
     Counts.clear();
     if (std::error_code EC = ProfileReader.getFunctionCounts(
             Record.FunctionName, Record.FunctionHash, Counts)) {
-      if (EC != instrprof_error::hash_mismatch &&
-          EC != instrprof_error::unknown_function)
+      if (EC == instrprof_error::hash_mismatch) {
+        Coverage->MismatchedFunctionCount++;
+        continue;
+      } else if (EC != instrprof_error::unknown_function)
         return EC;
-      Coverage->MismatchedFunctionCount++;
-      continue;
+      Counts.assign(Record.MappingRegions.size(), 0);
     }
+    Ctx.setCounts(Counts);
+
+    assert(!Record.MappingRegions.empty() && "Function has no regions");
 
-    assert(Counts.size() != 0 && "Function's counts are empty");
-    FunctionRecord Function(Record.FunctionName, Record.Filenames,
-                            Counts.front());
-    CounterMappingContext Ctx(Record.Expressions, Counts);
+    FunctionRecord Function(getFuncNameWithoutPrefix(Record), Record.Filenames);
     for (const auto &Region : Record.MappingRegions) {
       ErrorOr<int64_t> ExecutionCount = Ctx.evaluate(Region.Count);
       if (!ExecutionCount)
         break;
-      Function.CountedRegions.push_back(CountedRegion(Region, *ExecutionCount));
+      Function.pushRegion(Region, *ExecutionCount);
     }
     if (Function.CountedRegions.size() != Record.MappingRegions.size()) {
       Coverage->MismatchedFunctionCount++;
@@ -216,17 +234,21 @@ CoverageMapping::load(ObjectFileCoverageMappingReader &CoverageReader,
 }
 
 ErrorOr<std::unique_ptr<CoverageMapping>>
-CoverageMapping::load(StringRef ObjectFilename, StringRef ProfileFilename) {
+CoverageMapping::load(StringRef ObjectFilename, StringRef ProfileFilename,
+                      Triple::ArchType Arch) {
   auto CounterMappingBuff = MemoryBuffer::getFileOrSTDIN(ObjectFilename);
-  if (auto EC = CounterMappingBuff.getError())
+  if (std::error_code EC = CounterMappingBuff.getError())
     return EC;
-  ObjectFileCoverageMappingReader CoverageReader(CounterMappingBuff.get());
-  if (auto EC = CoverageReader.readHeader())
+  auto CoverageReaderOrErr =
+      BinaryCoverageReader::create(CounterMappingBuff.get(), Arch);
+  if (std::error_code EC = CoverageReaderOrErr.getError())
     return EC;
-  std::unique_ptr<IndexedInstrProfReader> ProfileReader;
-  if (auto EC = IndexedInstrProfReader::create(ProfileFilename, ProfileReader))
+  auto CoverageReader = std::move(CoverageReaderOrErr.get());
+  auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename);
+  if (auto EC = ProfileReaderOrErr.getError())
     return EC;
-  return load(CoverageReader, *ProfileReader);
+  auto ProfileReader = std::move(ProfileReaderOrErr.get());
+  return load(*CoverageReader, *ProfileReader);
 }
 
 namespace {
@@ -304,20 +326,22 @@ class SegmentBuilder {
 public:
   /// Build a list of CoverageSegments from a sorted list of Regions.
   std::vector<CoverageSegment> buildSegments(ArrayRef<CountedRegion> Regions) {
+    const CountedRegion *PrevRegion = nullptr;
     for (const auto &Region : Regions) {
       // Pop any regions that end before this one starts.
       while (!ActiveRegions.empty() &&
              ActiveRegions.back()->endLoc() <= Region.startLoc())
         popRegion();
-      if (Segments.size() && Segments.back().Line == Region.LineStart &&
-          Segments.back().Col == Region.ColumnStart) {
-        if (Region.Kind != coverage::CounterMappingRegion::SkippedRegion)
+      if (PrevRegion && PrevRegion->startLoc() == Region.startLoc() &&
+          PrevRegion->endLoc() == Region.endLoc()) {
+        if (Region.Kind == coverage::CounterMappingRegion::CodeRegion)
           Segments.back().addCount(Region.ExecutionCount);
       } else {
         // Add this region to the stack.
         ActiveRegions.push_back(&Region);
         startSegment(Region);
       }
+      PrevRegion = &Region;
     }
     // Pop any regions that are left in the stack.
     while (!ActiveRegions.empty())
@@ -330,50 +354,47 @@ public:
 std::vector<StringRef> CoverageMapping::getUniqueSourceFiles() const {
   std::vector<StringRef> Filenames;
   for (const auto &Function : getCoveredFunctions())
-    for (const auto &Filename : Function.Filenames)
-      Filenames.push_back(Filename);
+    Filenames.insert(Filenames.end(), Function.Filenames.begin(),
+                     Function.Filenames.end());
   std::sort(Filenames.begin(), Filenames.end());
   auto Last = std::unique(Filenames.begin(), Filenames.end());
   Filenames.erase(Last, Filenames.end());
   return Filenames;
 }
 
-static Optional<unsigned> findMainViewFileID(StringRef SourceFile,
-                                             const FunctionRecord &Function) {
-  llvm::SmallVector<bool, 8> IsExpandedFile(Function.Filenames.size(), false);
-  llvm::SmallVector<bool, 8> FilenameEquivalence(Function.Filenames.size(),
-                                                 false);
+static SmallBitVector gatherFileIDs(StringRef SourceFile,
+                                    const FunctionRecord &Function) {
+  SmallBitVector FilenameEquivalence(Function.Filenames.size(), false);
   for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I)
     if (SourceFile == Function.Filenames[I])
       FilenameEquivalence[I] = true;
+  return FilenameEquivalence;
+}
+
+static Optional<unsigned> findMainViewFileID(StringRef SourceFile,
+                                             const FunctionRecord &Function) {
+  SmallBitVector IsNotExpandedFile(Function.Filenames.size(), true);
+  SmallBitVector FilenameEquivalence = gatherFileIDs(SourceFile, Function);
   for (const auto &CR : Function.CountedRegions)
     if (CR.Kind == CounterMappingRegion::ExpansionRegion &&
         FilenameEquivalence[CR.FileID])
-      IsExpandedFile[CR.ExpandedFileID] = true;
-  for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I)
-    if (FilenameEquivalence[I] && !IsExpandedFile[I])
-      return I;
-  return None;
+      IsNotExpandedFile[CR.ExpandedFileID] = false;
+  IsNotExpandedFile &= FilenameEquivalence;
+  int I = IsNotExpandedFile.find_first();
+  if (I == -1)
+    return None;
+  return I;
 }
 
 static Optional<unsigned> findMainViewFileID(const FunctionRecord &Function) {
-  llvm::SmallVector<bool, 8> IsExpandedFile(Function.Filenames.size(), false);
+  SmallBitVector IsNotExpandedFile(Function.Filenames.size(), true);
   for (const auto &CR : Function.CountedRegions)
     if (CR.Kind == CounterMappingRegion::ExpansionRegion)
-      IsExpandedFile[CR.ExpandedFileID] = true;
-  for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I)
-    if (!IsExpandedFile[I])
-      return I;
-  return None;
-}
-
-static SmallSet<unsigned, 8> gatherFileIDs(StringRef SourceFile,
-                                           const FunctionRecord &Function) {
-  SmallSet<unsigned, 8> IDs;
-  for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I)
-    if (SourceFile == Function.Filenames[I])
-      IDs.insert(I);
-  return IDs;
+      IsNotExpandedFile[CR.ExpandedFileID] = false;
+  int I = IsNotExpandedFile.find_first();
+  if (I == -1)
+    return None;
+  return I;
 }
 
 /// Sort a nested sequence of regions from a single file.
@@ -401,7 +422,7 @@ CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) {
       continue;
     auto FileIDs = gatherFileIDs(Filename, Function);
     for (const auto &CR : Function.CountedRegions)
-      if (FileIDs.count(CR.FileID)) {
+      if (FileIDs.test(CR.FileID)) {
         Regions.push_back(CR);
         if (isExpansion(CR, *MainFileID))
           FileCoverage.Expansions.emplace_back(CR, Function);
@@ -409,6 +430,7 @@ CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) {
   }
 
   sortNestedRegions(Regions.begin(), Regions.end());
+  DEBUG(dbgs() << "Emitting segments for file: " << Filename << "\n");
   FileCoverage.Segments = SegmentBuilder().buildSegments(Regions);
 
   return FileCoverage;
@@ -428,8 +450,8 @@ CoverageMapping::getInstantiations(StringRef Filename) {
   for (const auto &InstantiationSet : InstantiationSetCollector) {
     if (InstantiationSet.second.size() < 2)
       continue;
-    for (auto Function : InstantiationSet.second)
-      Result.push_back(Function);
+    Result.insert(Result.end(), InstantiationSet.second.begin(),
+                  InstantiationSet.second.end());
   }
   return Result;
 }
@@ -450,6 +472,7 @@ CoverageMapping::getCoverageForFunction(const FunctionRecord &Function) {
     }
 
   sortNestedRegions(Regions.begin(), Regions.end());
+  DEBUG(dbgs() << "Emitting segments for function: " << Function.Name << "\n");
   FunctionCoverage.Segments = SegmentBuilder().buildSegments(Regions);
 
   return FunctionCoverage;
@@ -468,7 +491,39 @@ CoverageMapping::getCoverageForExpansion(const ExpansionRecord &Expansion) {
     }
 
   sortNestedRegions(Regions.begin(), Regions.end());
+  DEBUG(dbgs() << "Emitting segments for expansion of file " << Expansion.FileID
+               << "\n");
   ExpansionCoverage.Segments = SegmentBuilder().buildSegments(Regions);
 
   return ExpansionCoverage;
 }
+
+namespace {
+class CoverageMappingErrorCategoryType : public std::error_category {
+  const char *name() const LLVM_NOEXCEPT override { return "llvm.coveragemap"; }
+  std::string message(int IE) const override {
+    auto E = static_cast<coveragemap_error>(IE);
+    switch (E) {
+    case coveragemap_error::success:
+      return "Success";
+    case coveragemap_error::eof:
+      return "End of File";
+    case coveragemap_error::no_data_found:
+      return "No coverage data found";
+    case coveragemap_error::unsupported_version:
+      return "Unsupported coverage format version";
+    case coveragemap_error::truncated:
+      return "Truncated coverage data";
+    case coveragemap_error::malformed:
+      return "Malformed coverage data";
+    }
+    llvm_unreachable("A value of coveragemap_error has no message.");
+  }
+};
+}
+
+static ManagedStatic<CoverageMappingErrorCategoryType> ErrorCategory;
+
+const std::error_category &llvm::coveragemap_category() {
+  return *ErrorCategory;
+}
diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
index 6476d28..cf6cd58 100644
--- a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
@@ -14,9 +14,12 @@
 
 #include "llvm/ProfileData/CoverageMappingReader.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/Object/MachOUniversal.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace coverage;
@@ -33,13 +36,13 @@ void CoverageMappingIterator::increment() {
 
 std::error_code RawCoverageReader::readULEB128(uint64_t &Result) {
   if (Data.size() < 1)
-    return error(instrprof_error::truncated);
+    return coveragemap_error::truncated;
   unsigned N = 0;
   Result = decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
   if (N > Data.size())
-    return error(instrprof_error::malformed);
+    return coveragemap_error::malformed;
   Data = Data.substr(N);
-  return success();
+  return std::error_code();
 }
 
 std::error_code RawCoverageReader::readIntMax(uint64_t &Result,
@@ -47,8 +50,8 @@ std::error_code RawCoverageReader::readIntMax(uint64_t &Result,
   if (auto Err = readULEB128(Result))
     return Err;
   if (Result >= MaxPlus1)
-    return error(instrprof_error::malformed);
-  return success();
+    return coveragemap_error::malformed;
+  return std::error_code();
 }
 
 std::error_code RawCoverageReader::readSize(uint64_t &Result) {
@@ -56,8 +59,8 @@ std::error_code RawCoverageReader::readSize(uint64_t &Result) {
     return Err;
   // Sanity check the number.
   if (Result > Data.size())
-    return error(instrprof_error::malformed);
-  return success();
+    return coveragemap_error::malformed;
+  return std::error_code();
 }
 
 std::error_code RawCoverageReader::readString(StringRef &Result) {
@@ -66,7 +69,7 @@ std::error_code RawCoverageReader::readString(StringRef &Result) {
     return Err;
   Result = Data.substr(0, Length);
   Data = Data.substr(Length);
-  return success();
+  return std::error_code();
 }
 
 std::error_code RawCoverageFilenamesReader::read() {
@@ -79,7 +82,7 @@ std::error_code RawCoverageFilenamesReader::read() {
       return Err;
     Filenames.push_back(Filename);
   }
-  return success();
+  return std::error_code();
 }
 
 std::error_code RawCoverageMappingReader::decodeCounter(unsigned Value,
@@ -88,10 +91,10 @@ std::error_code RawCoverageMappingReader::decodeCounter(unsigned Value,
   switch (Tag) {
   case Counter::Zero:
     C = Counter::getZero();
-    return success();
+    return std::error_code();
   case Counter::CounterValueReference:
     C = Counter::getCounter(Value >> Counter::EncodingTagBits);
-    return success();
+    return std::error_code();
   default:
     break;
   }
@@ -101,15 +104,15 @@ std::error_code RawCoverageMappingReader::decodeCounter(unsigned Value,
   case CounterExpression::Add: {
     auto ID = Value >> Counter::EncodingTagBits;
     if (ID >= Expressions.size())
-      return error(instrprof_error::malformed);
+      return coveragemap_error::malformed;
     Expressions[ID].Kind = CounterExpression::ExprKind(Tag);
     C = Counter::getExpression(ID);
     break;
   }
   default:
-    return error(instrprof_error::malformed);
+    return coveragemap_error::malformed;
   }
-  return success();
+  return std::error_code();
 }
 
 std::error_code RawCoverageMappingReader::readCounter(Counter &C) {
@@ -119,7 +122,7 @@ std::error_code RawCoverageMappingReader::readCounter(Counter &C) {
     return Err;
   if (auto Err = decodeCounter(EncodedCounter, C))
     return Err;
-  return success();
+  return std::error_code();
 }
 
 static const unsigned EncodingExpansionRegionBit = 1
@@ -156,7 +159,7 @@ std::error_code RawCoverageMappingReader::readMappingRegionsSubArray(
         ExpandedFileID = EncodedCounterAndRegion >>
                          Counter::EncodingCounterTagAndExpansionRegionTagBits;
         if (ExpandedFileID >= NumFileIDs)
-          return error(instrprof_error::malformed);
+          return coveragemap_error::malformed;
       } else {
         switch (EncodedCounterAndRegion >>
                 Counter::EncodingCounterTagAndExpansionRegionTagBits) {
@@ -167,23 +170,20 @@ std::error_code RawCoverageMappingReader::readMappingRegionsSubArray(
           Kind = CounterMappingRegion::SkippedRegion;
           break;
         default:
-          return error(instrprof_error::malformed);
+          return coveragemap_error::malformed;
         }
       }
     }
 
     // Read the source range.
-    uint64_t LineStartDelta, CodeBeforeColumnStart, NumLines, ColumnEnd;
+    uint64_t LineStartDelta, ColumnStart, NumLines, ColumnEnd;
     if (auto Err =
             readIntMax(LineStartDelta, std::numeric_limits<unsigned>::max()))
       return Err;
-    if (auto Err = readULEB128(CodeBeforeColumnStart))
+    if (auto Err = readULEB128(ColumnStart))
       return Err;
-    bool HasCodeBefore = CodeBeforeColumnStart & 1;
-    uint64_t ColumnStart = CodeBeforeColumnStart >>
-                           CounterMappingRegion::EncodingHasCodeBeforeBits;
     if (ColumnStart > std::numeric_limits<unsigned>::max())
-      return error(instrprof_error::malformed);
+      return coveragemap_error::malformed;
     if (auto Err = readIntMax(NumLines, std::numeric_limits<unsigned>::max()))
       return Err;
     if (auto Err = readIntMax(ColumnEnd, std::numeric_limits<unsigned>::max()))
@@ -214,14 +214,13 @@ std::error_code RawCoverageMappingReader::readMappingRegionsSubArray(
     });
 
     MappingRegions.push_back(CounterMappingRegion(
-        C, InferredFileID, LineStart, ColumnStart, LineStart + NumLines,
-        ColumnEnd, HasCodeBefore, Kind));
-    MappingRegions.back().ExpandedFileID = ExpandedFileID;
+        C, InferredFileID, ExpandedFileID, LineStart, ColumnStart,
+        LineStart + NumLines, ColumnEnd, Kind));
   }
-  return success();
+  return std::error_code();
 }
 
-std::error_code RawCoverageMappingReader::read(CoverageMappingRecord &Record) {
+std::error_code RawCoverageMappingReader::read() {
 
   // Read the virtual file mapping.
   llvm::SmallVector<unsigned, 8> VirtualFileMapping;
@@ -287,42 +286,10 @@ std::error_code RawCoverageMappingReader::read(CoverageMappingRecord &Record) {
     }
   }
 
-  Record.FunctionName = FunctionName;
-  Record.Filenames = Filenames;
-  Record.Expressions = Expressions;
-  Record.MappingRegions = MappingRegions;
-  return success();
-}
-
-ObjectFileCoverageMappingReader::ObjectFileCoverageMappingReader(
-    StringRef FileName)
-    : CurrentRecord(0) {
-  auto File = llvm::object::ObjectFile::createObjectFile(FileName);
-  if (!File)
-    error(File.getError());
-  else
-    Object = std::move(File.get());
+  return std::error_code();
 }
 
 namespace {
-/// \brief The coverage mapping data for a single function.
-/// It points to the function's name.
-template <typename IntPtrT> struct CoverageMappingFunctionRecord {
-  IntPtrT FunctionNamePtr;
-  uint32_t FunctionNameSize;
-  uint32_t CoverageMappingSize;
-  uint64_t FunctionHash;
-};
-
-/// \brief The coverage mapping data for a single translation unit.
-/// It points to the array of function coverage mapping records and the encoded
-/// filenames array.
-template <typename IntPtrT> struct CoverageMappingTURecord {
-  uint32_t FunctionRecordsSize;
-  uint32_t FilenamesSize;
-  uint32_t CoverageMappingsSize;
-  uint32_t Version;
-};
 
 /// \brief A helper structure to access the data from a section
 /// in an object file.
@@ -334,220 +301,254 @@ struct SectionData {
     if (auto Err = Section.getContents(Data))
       return Err;
     Address = Section.getAddress();
-    return instrprof_error::success;
+    return std::error_code();
   }
 
   std::error_code get(uint64_t Pointer, size_t Size, StringRef &Result) {
     if (Pointer < Address)
-      return instrprof_error::malformed;
+      return coveragemap_error::malformed;
     auto Offset = Pointer - Address;
     if (Offset + Size > Data.size())
-      return instrprof_error::malformed;
+      return coveragemap_error::malformed;
     Result = Data.substr(Pointer - Address, Size);
-    return instrprof_error::success;
+    return std::error_code();
   }
 };
 }
 
-template <typename T>
+template <typename T, support::endianness Endian>
 std::error_code readCoverageMappingData(
     SectionData &ProfileNames, StringRef Data,
-    std::vector<ObjectFileCoverageMappingReader::ProfileMappingRecord> &Records,
+    std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records,
     std::vector<StringRef> &Filenames) {
+  using namespace support;
   llvm::DenseSet<T> UniqueFunctionMappingData;
 
   // Read the records in the coverage data section.
-  while (!Data.empty()) {
-    if (Data.size() < sizeof(CoverageMappingTURecord<T>))
-      return instrprof_error::malformed;
-    auto TU = reinterpret_cast<const CoverageMappingTURecord<T> *>(Data.data());
-    Data = Data.substr(sizeof(CoverageMappingTURecord<T>));
-    switch (TU->Version) {
+  for (const char *Buf = Data.data(), *End = Buf + Data.size(); Buf < End;) {
+    if (Buf + 4 * sizeof(uint32_t) > End)
+      return coveragemap_error::malformed;
+    uint32_t NRecords = endian::readNext<uint32_t, Endian, unaligned>(Buf);
+    uint32_t FilenamesSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
+    uint32_t CoverageSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
+    uint32_t Version = endian::readNext<uint32_t, Endian, unaligned>(Buf);
+
+    switch (Version) {
     case CoverageMappingVersion1:
       break;
     default:
-      return instrprof_error::unsupported_version;
+      return coveragemap_error::unsupported_version;
     }
-    auto Version = CoverageMappingVersion(TU->Version);
 
-    // Get the function records.
-    auto FunctionRecords =
-        reinterpret_cast<const CoverageMappingFunctionRecord<T> *>(Data.data());
-    if (Data.size() <
-        sizeof(CoverageMappingFunctionRecord<T>) * TU->FunctionRecordsSize)
-      return instrprof_error::malformed;
-    Data = Data.substr(sizeof(CoverageMappingFunctionRecord<T>) *
-                       TU->FunctionRecordsSize);
+    // Skip past the function records, saving the start and end for later.
+    const char *FunBuf = Buf;
+    Buf += NRecords * (sizeof(T) + 2 * sizeof(uint32_t) + sizeof(uint64_t));
+    const char *FunEnd = Buf;
 
     // Get the filenames.
-    if (Data.size() < TU->FilenamesSize)
-      return instrprof_error::malformed;
-    auto RawFilenames = Data.substr(0, TU->FilenamesSize);
-    Data = Data.substr(TU->FilenamesSize);
+    if (Buf + FilenamesSize > End)
+      return coveragemap_error::malformed;
     size_t FilenamesBegin = Filenames.size();
-    RawCoverageFilenamesReader Reader(RawFilenames, Filenames);
+    RawCoverageFilenamesReader Reader(StringRef(Buf, FilenamesSize), Filenames);
     if (auto Err = Reader.read())
       return Err;
-
-    // Get the coverage mappings.
-    if (Data.size() < TU->CoverageMappingsSize)
-      return instrprof_error::malformed;
-    auto CoverageMappings = Data.substr(0, TU->CoverageMappingsSize);
-    Data = Data.substr(TU->CoverageMappingsSize);
-
-    for (unsigned I = 0; I < TU->FunctionRecordsSize; ++I) {
-      auto &MappingRecord = FunctionRecords[I];
-
-      // Get the coverage mapping.
-      if (CoverageMappings.size() < MappingRecord.CoverageMappingSize)
-        return instrprof_error::malformed;
-      auto Mapping =
-          CoverageMappings.substr(0, MappingRecord.CoverageMappingSize);
-      CoverageMappings =
-          CoverageMappings.substr(MappingRecord.CoverageMappingSize);
+    Buf += FilenamesSize;
+
+    // We'll read the coverage mapping records in the loop below.
+    const char *CovBuf = Buf;
+    Buf += CoverageSize;
+    const char *CovEnd = Buf;
+    if (Buf > End)
+      return coveragemap_error::malformed;
+
+    while (FunBuf < FunEnd) {
+      // Read the function information
+      T NamePtr = endian::readNext<T, Endian, unaligned>(FunBuf);
+      uint32_t NameSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf);
+      uint32_t DataSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf);
+      uint64_t FuncHash = endian::readNext<uint64_t, Endian, unaligned>(FunBuf);
+
+      // Now use that to read the coverage data.
+      if (CovBuf + DataSize > CovEnd)
+        return coveragemap_error::malformed;
+      auto Mapping = StringRef(CovBuf, DataSize);
+      CovBuf += DataSize;
 
       // Ignore this record if we already have a record that points to the same
-      // function name.
-      // This is useful to ignore the redundant records for the functions
-      // with ODR linkage.
-      if (!UniqueFunctionMappingData.insert(MappingRecord.FunctionNamePtr)
-               .second)
+      // function name. This is useful to ignore the redundant records for the
+      // functions with ODR linkage.
+      if (!UniqueFunctionMappingData.insert(NamePtr).second)
         continue;
-      StringRef FunctionName;
-      if (auto Err =
-              ProfileNames.get(MappingRecord.FunctionNamePtr,
-                               MappingRecord.FunctionNameSize, FunctionName))
-        return Err;
-      Records.push_back(ObjectFileCoverageMappingReader::ProfileMappingRecord(
-          Version, FunctionName, MappingRecord.FunctionHash, Mapping,
+
+      // Finally, grab the name and create a record.
+      StringRef FuncName;
+      if (std::error_code EC = ProfileNames.get(NamePtr, NameSize, FuncName))
+        return EC;
+      Records.push_back(BinaryCoverageReader::ProfileMappingRecord(
+          CoverageMappingVersion(Version), FuncName, FuncHash, Mapping,
           FilenamesBegin, Filenames.size() - FilenamesBegin));
     }
   }
 
-  return instrprof_error::success;
+  return std::error_code();
 }
 
 static const char *TestingFormatMagic = "llvmcovmtestdata";
 
-static std::error_code decodeTestingFormat(StringRef Data,
-                                           SectionData &ProfileNames,
-                                           StringRef &CoverageMapping) {
+static std::error_code loadTestingFormat(StringRef Data,
+                                         SectionData &ProfileNames,
+                                         StringRef &CoverageMapping,
+                                         uint8_t &BytesInAddress,
+                                         support::endianness &Endian) {
+  BytesInAddress = 8;
+  Endian = support::endianness::little;
+
   Data = Data.substr(StringRef(TestingFormatMagic).size());
   if (Data.size() < 1)
-    return instrprof_error::truncated;
+    return coveragemap_error::truncated;
   unsigned N = 0;
   auto ProfileNamesSize =
       decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
   if (N > Data.size())
-    return instrprof_error::malformed;
+    return coveragemap_error::malformed;
   Data = Data.substr(N);
   if (Data.size() < 1)
-    return instrprof_error::truncated;
+    return coveragemap_error::truncated;
   N = 0;
   ProfileNames.Address =
       decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
   if (N > Data.size())
-    return instrprof_error::malformed;
+    return coveragemap_error::malformed;
   Data = Data.substr(N);
   if (Data.size() < ProfileNamesSize)
-    return instrprof_error::malformed;
+    return coveragemap_error::malformed;
   ProfileNames.Data = Data.substr(0, ProfileNamesSize);
   CoverageMapping = Data.substr(ProfileNamesSize);
-  return instrprof_error::success;
+  return std::error_code();
 }
 
-ObjectFileCoverageMappingReader::ObjectFileCoverageMappingReader(
-    std::unique_ptr<MemoryBuffer> &ObjectBuffer, sys::fs::file_magic Type)
-    : CurrentRecord(0) {
-  if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic)) {
-    // This is a special format used for testing.
-    SectionData ProfileNames;
-    StringRef CoverageMapping;
-    if (auto Err = decodeTestingFormat(ObjectBuffer->getBuffer(), ProfileNames,
-                                       CoverageMapping)) {
-      error(Err);
-      return;
-    }
-    error(readCoverageMappingData<uint64_t>(ProfileNames, CoverageMapping,
-                                            MappingRecords, Filenames));
-    Object = OwningBinary<ObjectFile>(std::unique_ptr<ObjectFile>(),
-                                      std::move(ObjectBuffer));
-    return;
+static ErrorOr<SectionRef> lookupSection(ObjectFile &OF, StringRef Name) {
+  StringRef FoundName;
+  for (const auto &Section : OF.sections()) {
+    if (auto EC = Section.getName(FoundName))
+      return EC;
+    if (FoundName == Name)
+      return Section;
   }
-
-  auto File = object::ObjectFile::createObjectFile(
-      ObjectBuffer->getMemBufferRef(), Type);
-  if (!File)
-    error(File.getError());
-  else
-    Object = OwningBinary<ObjectFile>(std::move(File.get()),
-                                      std::move(ObjectBuffer));
+  return coveragemap_error::no_data_found;
 }
 
-std::error_code ObjectFileCoverageMappingReader::readHeader() {
-  const ObjectFile *OF = Object.getBinary();
-  if (!OF)
-    return getError();
-  auto BytesInAddress = OF->getBytesInAddress();
-  if (BytesInAddress != 4 && BytesInAddress != 8)
-    return error(instrprof_error::malformed);
+static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer,
+                                        SectionData &ProfileNames,
+                                        StringRef &CoverageMapping,
+                                        uint8_t &BytesInAddress,
+                                        support::endianness &Endian,
+                                        Triple::ArchType Arch) {
+  auto BinOrErr = object::createBinary(ObjectBuffer);
+  if (std::error_code EC = BinOrErr.getError())
+    return EC;
+  auto Bin = std::move(BinOrErr.get());
+  std::unique_ptr<ObjectFile> OF;
+  if (auto *Universal = dyn_cast<object::MachOUniversalBinary>(Bin.get())) {
+    // If we have a universal binary, try to look up the object for the
+    // appropriate architecture.
+    auto ObjectFileOrErr = Universal->getObjectForArch(Arch);
+    if (std::error_code EC = ObjectFileOrErr.getError())
+      return EC;
+    OF = std::move(ObjectFileOrErr.get());
+  } else if (isa<object::ObjectFile>(Bin.get())) {
+    // For any other object file, upcast and take ownership.
+    OF.reset(cast<object::ObjectFile>(Bin.release()));
+    // If we've asked for a particular arch, make sure they match.
+    if (Arch != Triple::ArchType::UnknownArch && OF->getArch() != Arch)
+      return object_error::arch_not_found;
+  } else
+    // We can only handle object files.
+    return coveragemap_error::malformed;
+
+  // The coverage uses native pointer sizes for the object it's written in.
+  BytesInAddress = OF->getBytesInAddress();
+  Endian = OF->isLittleEndian() ? support::endianness::little
+                                : support::endianness::big;
 
   // Look for the sections that we are interested in.
-  int FoundSectionCount = 0;
-  SectionRef ProfileNames, CoverageMapping;
-  for (const auto &Section : OF->sections()) {
-    StringRef Name;
-    if (auto Err = Section.getName(Name))
-      return Err;
-    if (Name == "__llvm_prf_names") {
-      ProfileNames = Section;
-    } else if (Name == "__llvm_covmap") {
-      CoverageMapping = Section;
-    } else
-      continue;
-    ++FoundSectionCount;
-  }
-  if (FoundSectionCount != 2)
-    return error(instrprof_error::bad_header);
+  auto NamesSection = lookupSection(*OF, "__llvm_prf_names");
+  if (auto EC = NamesSection.getError())
+    return EC;
+  auto CoverageSection = lookupSection(*OF, "__llvm_covmap");
+  if (auto EC = CoverageSection.getError())
+    return EC;
 
   // Get the contents of the given sections.
-  StringRef Data;
-  if (auto Err = CoverageMapping.getContents(Data))
-    return Err;
-  SectionData ProfileNamesData;
-  if (auto Err = ProfileNamesData.load(ProfileNames))
-    return Err;
+  if (std::error_code EC = CoverageSection->getContents(CoverageMapping))
+    return EC;
+  if (std::error_code EC = ProfileNames.load(*NamesSection))
+    return EC;
 
-  // Load the data from the found sections.
-  std::error_code Err;
-  if (BytesInAddress == 4)
-    Err = readCoverageMappingData<uint32_t>(ProfileNamesData, Data,
-                                            MappingRecords, Filenames);
-  else
-    Err = readCoverageMappingData<uint64_t>(ProfileNamesData, Data,
-                                            MappingRecords, Filenames);
-  if (Err)
-    return error(Err);
+  return std::error_code();
+}
 
-  return success();
+ErrorOr<std::unique_ptr<BinaryCoverageReader>>
+BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer,
+                             Triple::ArchType Arch) {
+  std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader());
+
+  SectionData Profile;
+  StringRef Coverage;
+  uint8_t BytesInAddress;
+  support::endianness Endian;
+  std::error_code EC;
+  if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic))
+    // This is a special format used for testing.
+    EC = loadTestingFormat(ObjectBuffer->getBuffer(), Profile, Coverage,
+                           BytesInAddress, Endian);
+  else
+    EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), Profile, Coverage,
+                          BytesInAddress, Endian, Arch);
+  if (EC)
+    return EC;
+
+  if (BytesInAddress == 4 && Endian == support::endianness::little)
+    EC = readCoverageMappingData<uint32_t, support::endianness::little>(
+        Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+  else if (BytesInAddress == 4 && Endian == support::endianness::big)
+    EC = readCoverageMappingData<uint32_t, support::endianness::big>(
+        Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+  else if (BytesInAddress == 8 && Endian == support::endianness::little)
+    EC = readCoverageMappingData<uint64_t, support::endianness::little>(
+        Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+  else if (BytesInAddress == 8 && Endian == support::endianness::big)
+    EC = readCoverageMappingData<uint64_t, support::endianness::big>(
+        Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+  else
+    return coveragemap_error::malformed;
+  if (EC)
+    return EC;
+  return std::move(Reader);
 }
 
 std::error_code
-ObjectFileCoverageMappingReader::readNextRecord(CoverageMappingRecord &Record) {
+BinaryCoverageReader::readNextRecord(CoverageMappingRecord &Record) {
   if (CurrentRecord >= MappingRecords.size())
-    return error(instrprof_error::eof);
+    return coveragemap_error::eof;
 
   FunctionsFilenames.clear();
   Expressions.clear();
   MappingRegions.clear();
   auto &R = MappingRecords[CurrentRecord];
   RawCoverageMappingReader Reader(
-      R.FunctionName, R.CoverageMapping,
-      makeArrayRef(Filenames.data() + R.FilenamesBegin, R.FilenamesSize),
+      R.CoverageMapping,
+      makeArrayRef(Filenames).slice(R.FilenamesBegin, R.FilenamesSize),
       FunctionsFilenames, Expressions, MappingRegions);
-  if (auto Err = Reader.read(Record))
+  if (auto Err = Reader.read())
     return Err;
+
+  Record.FunctionName = R.FunctionName;
   Record.FunctionHash = R.FunctionHash;
+  Record.Filenames = FunctionsFilenames;
+  Record.Expressions = Expressions;
+  Record.MappingRegions = MappingRegions;
+
   ++CurrentRecord;
-  return success();
+  return std::error_code();
 }
diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp
index 6969c2a..d90d2f5 100644
--- a/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp
@@ -109,7 +109,7 @@ static void writeCounter(ArrayRef<CounterExpression> Expressions, Counter C,
 void CoverageMappingWriter::write(raw_ostream &OS) {
   // Sort the regions in an ascending order by the file id and the starting
   // location.
-  std::sort(MappingRegions.begin(), MappingRegions.end());
+  std::stable_sort(MappingRegions.begin(), MappingRegions.end());
 
   // Write out the fileid -> filename mapping.
   encodeULEB128(VirtualFileMapping.size(), OS);
@@ -172,11 +172,7 @@ void CoverageMappingWriter::write(raw_ostream &OS) {
     }
     assert(I->LineStart >= PrevLineStart);
     encodeULEB128(I->LineStart - PrevLineStart, OS);
-    uint64_t CodeBeforeColumnStart =
-        uint64_t(I->HasCodeBefore) |
-        (uint64_t(I->ColumnStart)
-         << CounterMappingRegion::EncodingHasCodeBeforeBits);
-    encodeULEB128(CodeBeforeColumnStart, OS);
+    encodeULEB128(I->ColumnStart, OS);
     assert(I->LineEnd >= I->LineStart);
     encodeULEB128(I->LineEnd - I->LineStart, OS);
     encodeULEB128(I->ColumnEnd, OS);
diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp
index 900dff9..92822a7 100644
--- a/contrib/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp
@@ -29,13 +29,13 @@ class InstrProfErrorCategoryType : public std::error_category {
     case instrprof_error::eof:
       return "End of File";
     case instrprof_error::bad_magic:
-      return "Invalid file format (bad magic)";
+      return "Invalid profile data (bad magic)";
     case instrprof_error::bad_header:
-      return "Invalid header";
+      return "Invalid profile data (file header is corrupt)";
     case instrprof_error::unsupported_version:
-      return "Unsupported format version";
+      return "Unsupported profiling format version";
     case instrprof_error::unsupported_hash_type:
-      return "Unsupported hash function";
+      return "Unsupported profiling hash";
     case instrprof_error::too_large:
       return "Too much profile data";
     case instrprof_error::truncated:
diff --git a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
index 31ed130..3a5b266 100644
--- a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "InstrProfIndexed.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include <cassert>
 
@@ -25,12 +26,7 @@ setupMemoryBuffer(std::string Path) {
       MemoryBuffer::getFileOrSTDIN(Path);
   if (std::error_code EC = BufferOrErr.getError())
     return EC;
-  auto Buffer = std::move(BufferOrErr.get());
-
-  // Sanity check the file.
-  if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
-    return instrprof_error::too_large;
-  return std::move(Buffer);
+  return std::move(BufferOrErr.get());
 }
 
 static std::error_code initializeReader(InstrProfReader &Reader) {
@@ -43,10 +39,16 @@ InstrProfReader::create(std::string Path) {
   auto BufferOrError = setupMemoryBuffer(Path);
   if (std::error_code EC = BufferOrError.getError())
     return EC;
+  return InstrProfReader::create(std::move(BufferOrError.get()));
+}
 
-  auto Buffer = std::move(BufferOrError.get());
-  std::unique_ptr<InstrProfReader> Result;
+ErrorOr<std::unique_ptr<InstrProfReader>>
+InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
+  // Sanity check the buffer.
+  if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
+    return instrprof_error::too_large;
 
+  std::unique_ptr<InstrProfReader> Result;
   // Create the reader.
   if (IndexedInstrProfReader::hasFormat(*Buffer))
     Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
@@ -64,21 +66,32 @@ InstrProfReader::create(std::string Path) {
   return std::move(Result);
 }
 
-std::error_code IndexedInstrProfReader::create(
-    std::string Path, std::unique_ptr<IndexedInstrProfReader> &Result) {
+ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
+IndexedInstrProfReader::create(std::string Path) {
   // Set up the buffer to read.
   auto BufferOrError = setupMemoryBuffer(Path);
   if (std::error_code EC = BufferOrError.getError())
     return EC;
+  return IndexedInstrProfReader::create(std::move(BufferOrError.get()));
+}
+
+
+ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
+IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
+  // Sanity check the buffer.
+  if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
+    return instrprof_error::too_large;
 
-  auto Buffer = std::move(BufferOrError.get());
   // Create the reader.
   if (!IndexedInstrProfReader::hasFormat(*Buffer))
     return instrprof_error::bad_magic;
-  Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
+  auto Result = llvm::make_unique<IndexedInstrProfReader>(std::move(Buffer));
 
   // Initialize the reader and return the result.
-  return initializeReader(*Result);
+  if (std::error_code EC = initializeReader(*Result))
+    return EC;
+
+  return std::move(Result);
 }
 
 void InstrProfIterator::Increment() {
@@ -100,7 +113,7 @@ std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
   // Read the function hash.
   if (Line.is_at_end())
     return error(instrprof_error::truncated);
-  if ((Line++)->getAsInteger(10, Record.Hash))
+  if ((Line++)->getAsInteger(0, Record.Hash))
     return error(instrprof_error::malformed);
 
   // Read the number of counters.
diff --git a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
index d4cde2e..2188543 100644
--- a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -106,7 +106,7 @@ InstrProfWriter::addFunctionCounts(StringRef FunctionName,
   return instrprof_error::success;
 }
 
-void InstrProfWriter::write(raw_fd_ostream &OS) {
+std::pair<uint64_t, uint64_t> InstrProfWriter::writeImpl(raw_ostream &OS) {
   OnDiskChainedHashTableGenerator<InstrProfRecordTrait> Generator;
 
   // Populate the hash table generator.
@@ -128,7 +128,32 @@ void InstrProfWriter::write(raw_fd_ostream &OS) {
   // Write the hash table.
   uint64_t HashTableStart = Generator.Emit(OS);
 
+  return std::make_pair(HashTableStartLoc, HashTableStart);
+}
+
+void InstrProfWriter::write(raw_fd_ostream &OS) {
+  // Write the hash table.
+  auto TableStart = writeImpl(OS);
+
   // Go back and fill in the hash table start.
-  OS.seek(HashTableStartLoc);
-  LE.write<uint64_t>(HashTableStart);
+  using namespace support;
+  OS.seek(TableStart.first);
+  endian::Writer<little>(OS).write<uint64_t>(TableStart.second);
+}
+
+std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {
+  std::string Data;
+  llvm::raw_string_ostream OS(Data);
+  // Write the hash table.
+  auto TableStart = writeImpl(OS);
+  OS.flush();
+
+  // Go back and fill in the hash table start.
+  using namespace support;
+  uint64_t Bytes = endian::byte_swap<uint64_t, little>(TableStart.second);
+  Data.replace(TableStart.first, sizeof(uint64_t), (const char *)&Bytes,
+               sizeof(uint64_t));
+
+  // Return this in an aligned memory buffer.
+  return MemoryBuffer::getMemBufferCopy(Data);
 }
diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp
index 393ecf4..4b0a0e5 100644
--- a/contrib/llvm/lib/Support/APFloat.cpp
+++ b/contrib/llvm/lib/Support/APFloat.cpp
@@ -1248,10 +1248,10 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode,
     return false;
 
   case rmTowardPositive:
-    return sign == false;
+    return !sign;
 
   case rmTowardNegative:
-    return sign == true;
+    return sign;
   }
   llvm_unreachable("Invalid rounding mode found");
 }
@@ -1430,7 +1430,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
 
   /* Determine if the operation on the absolute values is effectively
      an addition or subtraction.  */
-  subtract ^= (sign ^ rhs.sign) ? true : false;
+  subtract ^= static_cast<bool>(sign ^ rhs.sign);
 
   /* Are we bigger exponent-wise than the RHS?  */
   bits = exponent - rhs.exponent;
@@ -3920,7 +3920,7 @@ APFloat::makeZero(bool Negative) {
 
 APFloat llvm::scalbn(APFloat X, int Exp) {
   if (X.isInfinity() || X.isZero() || X.isNaN())
-    return std::move(X);
+    return X;
 
   auto MaxExp = X.getSemantics().maxExponent;
   auto MinExp = X.getSemantics().minExponent;
@@ -3932,5 +3932,5 @@ APFloat llvm::scalbn(APFloat X, int Exp) {
     return APFloat::getZero(X.getSemantics(), X.isNegative());
 
   X.exponent += Exp;
-  return std::move(X);
+  return X;
 }
diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp
index 0ddc2ab..23f89bb 100644
--- a/contrib/llvm/lib/Support/APInt.cpp
+++ b/contrib/llvm/lib/Support/APInt.cpp
@@ -162,7 +162,7 @@ APInt& APInt::operator=(uint64_t RHS) {
   return clearUnusedBits();
 }
 
-/// Profile - This method 'profiles' an APInt for use with FoldingSet.
+/// This method 'profiles' an APInt for use with FoldingSet.
 void APInt::Profile(FoldingSetNodeID& ID) const {
   ID.AddInteger(BitWidth);
 
@@ -176,7 +176,7 @@ void APInt::Profile(FoldingSetNodeID& ID) const {
     ID.AddInteger(pVal[i]);
 }
 
-/// add_1 - This function adds a single "digit" integer, y, to the multiple
+/// This function adds a single "digit" integer, y, to the multiple
 /// "digit" integer array,  x[]. x[] is modified to reflect the addition and
 /// 1 is returned if there is a carry out, otherwise 0 is returned.
 /// @returns the carry of the addition.
@@ -202,7 +202,7 @@ APInt& APInt::operator++() {
   return clearUnusedBits();
 }
 
-/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
+/// This function subtracts a single "digit" (64-bit word), y, from
 /// the multi-digit integer array, x[], propagating the borrowed 1 value until
 /// no further borrowing is neeeded or it runs out of "digits" in x.  The result
 /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
@@ -231,7 +231,7 @@ APInt& APInt::operator--() {
   return clearUnusedBits();
 }
 
-/// add - This function adds the integer array x to the integer array Y and
+/// This function adds the integer array x to the integer array Y and
 /// places the result in dest.
 /// @returns the carry out from the addition
 /// @brief General addition of 64-bit integer arrays
@@ -672,12 +672,20 @@ hash_code llvm::hash_value(const APInt &Arg) {
   return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords());
 }
 
-/// HiBits - This function returns the high "numBits" bits of this APInt.
+bool APInt::isSplat(unsigned SplatSizeInBits) const {
+  assert(getBitWidth() % SplatSizeInBits == 0 &&
+         "SplatSizeInBits must divide width!");
+  // We can check that all parts of an integer are equal by making use of a
+  // little trick: rotate and check if it's still the same value.
+  return *this == rotl(SplatSizeInBits);
+}
+
+/// This function returns the high "numBits" bits of this APInt.
 APInt APInt::getHiBits(unsigned numBits) const {
   return APIntOps::lshr(*this, BitWidth - numBits);
 }
 
-/// LoBits - This function returns the low "numBits" bits of this APInt.
+/// This function returns the low "numBits" bits of this APInt.
 APInt APInt::getLoBits(unsigned numBits) const {
   return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
                         BitWidth - numBits);
@@ -713,7 +721,7 @@ unsigned APInt::countLeadingZerosSlowCase() const {
 
 unsigned APInt::countLeadingOnes() const {
   if (isSingleWord())
-    return CountLeadingOnes_64(VAL << (APINT_BITS_PER_WORD - BitWidth));
+    return llvm::countLeadingOnes(VAL << (APINT_BITS_PER_WORD - BitWidth));
 
   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
   unsigned shift;
@@ -724,13 +732,13 @@ unsigned APInt::countLeadingOnes() const {
     shift = APINT_BITS_PER_WORD - highWordBits;
   }
   int i = getNumWords() - 1;
-  unsigned Count = CountLeadingOnes_64(pVal[i] << shift);
+  unsigned Count = llvm::countLeadingOnes(pVal[i] << shift);
   if (Count == highWordBits) {
     for (i--; i >= 0; --i) {
       if (pVal[i] == -1ULL)
         Count += APINT_BITS_PER_WORD;
       else {
-        Count += CountLeadingOnes_64(pVal[i]);
+        Count += llvm::countLeadingOnes(pVal[i]);
         break;
       }
     }
@@ -756,14 +764,14 @@ unsigned APInt::countTrailingOnesSlowCase() const {
   for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
     Count += APINT_BITS_PER_WORD;
   if (i < getNumWords())
-    Count += CountTrailingOnes_64(pVal[i]);
+    Count += llvm::countTrailingOnes(pVal[i]);
   return std::min(Count, BitWidth);
 }
 
 unsigned APInt::countPopulationSlowCase() const {
   unsigned Count = 0;
   for (unsigned i = 0; i < getNumWords(); ++i)
-    Count += CountPopulation_64(pVal[i]);
+    Count += llvm::countPopulation(pVal[i]);
   return Count;
 }
 
@@ -853,7 +861,7 @@ APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
   return isNeg ? -Tmp : Tmp;
 }
 
-/// RoundToDouble - This function converts this APInt to a double.
+/// This function converts this APInt to a double.
 /// The layout for double is as following (IEEE Standard 754):
 ///  --------------------------------------
 /// |  Sign    Exponent    Fraction    Bias |
@@ -1310,13 +1318,8 @@ APInt APInt::sqrt() const {
   // libc sqrt function which will probably use a hardware sqrt computation.
   // This should be faster than the algorithm below.
   if (magnitude < 52) {
-#if HAVE_ROUND
     return APInt(BitWidth,
                  uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
-#else
-    return APInt(BitWidth,
-                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0])) + 0.5));
-#endif
   }
 
   // Okay, all the short cuts are exhausted. We must compute it. The following
@@ -1508,21 +1511,18 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
   assert(u && "Must provide dividend");
   assert(v && "Must provide divisor");
   assert(q && "Must provide quotient");
-  assert(u != v && u != q && v != q && "Must us different memory");
+  assert(u != v && u != q && v != q && "Must use different memory");
   assert(n>1 && "n must be > 1");
 
-  // Knuth uses the value b as the base of the number system. In our case b
-  // is 2^31 so we just set it to -1u.
-  uint64_t b = uint64_t(1) << 32;
+  // b denotes the base of the number system. In our case b is 2^32.
+  LLVM_CONSTEXPR uint64_t b = uint64_t(1) << 32;
 
-#if 0
   DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
   DEBUG(dbgs() << "KnuthDiv: original:");
   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
   DEBUG(dbgs() << " by");
   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
   DEBUG(dbgs() << '\n');
-#endif
   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
   // u and v by d. Note that we have taken Knuth's advice here to use a power
   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
@@ -1547,13 +1547,12 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
     }
   }
   u[m+n] = u_carry;
-#if 0
+
   DEBUG(dbgs() << "KnuthDiv:   normal:");
   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
   DEBUG(dbgs() << " by");
   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
   DEBUG(dbgs() << '\n');
-#endif
 
   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
   int j = m;
@@ -1583,44 +1582,23 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
     // consists of a simple multiplication by a one-place number, combined with
     // a subtraction.
-    bool isNeg = false;
-    for (unsigned i = 0; i < n; ++i) {
-      uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
-      uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
-      bool borrow = subtrahend > u_tmp;
-      DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
-                   << ", subtrahend == " << subtrahend
-                   << ", borrow = " << borrow << '\n');
-
-      uint64_t result = u_tmp - subtrahend;
-      unsigned k = j + i;
-      u[k++] = (unsigned)(result & (b-1)); // subtract low word
-      u[k++] = (unsigned)(result >> 32);   // subtract high word
-      while (borrow && k <= m+n) { // deal with borrow to the left
-        borrow = u[k] == 0;
-        u[k]--;
-        k++;
-      }
-      isNeg |= borrow;
-      DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
-                    u[j+i+1] << '\n');
-    }
-    DEBUG(dbgs() << "KnuthDiv: after subtraction:");
-    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
-    DEBUG(dbgs() << '\n');
     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
     // this step is actually negative, (u[j+n]...u[j]) should be left as the
     // true value plus b**(n+1), namely as the b's complement of
     // the true value, and a "borrow" to the left should be remembered.
-    //
-    if (isNeg) {
-      bool carry = true;  // true because b's complement is "complement + 1"
-      for (unsigned i = 0; i <= m+n; ++i) {
-        u[i] = ~u[i] + carry; // b's complement
-        carry = carry && u[i] == 0;
-      }
+    int64_t borrow = 0;
+    for (unsigned i = 0; i < n; ++i) {
+      uint64_t p = uint64_t(qp) * uint64_t(v[i]);
+      int64_t subres = int64_t(u[j+i]) - borrow - (unsigned)p;
+      u[j+i] = (unsigned)subres;
+      borrow = (p >> 32) - (subres >> 32);
+      DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j+i]
+                   << ", borrow = " << borrow << '\n');
     }
-    DEBUG(dbgs() << "KnuthDiv: after complement:");
+    bool isNeg = u[j+n] < borrow;
+    u[j+n] -= (unsigned)borrow;
+
+    DEBUG(dbgs() << "KnuthDiv: after subtraction:");
     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
     DEBUG(dbgs() << '\n');
 
@@ -1644,7 +1622,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
       u[j+n] += carry;
     }
     DEBUG(dbgs() << "KnuthDiv: after correction:");
-    DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
+    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
     DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
 
   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
@@ -1677,9 +1655,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
     }
     DEBUG(dbgs() << '\n');
   }
-#if 0
   DEBUG(dbgs() << '\n');
-#endif
 }
 
 void APInt::divide(const APInt LHS, unsigned lhsWords,
@@ -1803,6 +1779,8 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
 
     // The quotient is in Q. Reconstitute the quotient into Quotient's low
     // order words.
+    // This case is currently dead as all users of divide() handle trivial cases
+    // earlier.
     if (lhsWords == 1) {
       uint64_t tmp =
         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
@@ -2281,9 +2259,8 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
   std::reverse(Str.begin()+StartDig, Str.end());
 }
 
-/// toString - This returns the APInt as a std::string.  Note that this is an
-/// inefficient method.  It is better to pass in a SmallVector/SmallString
-/// to the methods above.
+/// Returns the APInt as a std::string. Note that this is an inefficient method.
+/// It is better to pass in a SmallVector/SmallString to the methods above.
 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
   SmallString<40> S;
   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
@@ -2296,13 +2273,13 @@ void APInt::dump() const {
   this->toStringUnsigned(U);
   this->toStringSigned(S);
   dbgs() << "APInt(" << BitWidth << "b, "
-         << U.str() << "u " << S.str() << "s)";
+         << U << "u " << S << "s)";
 }
 
 void APInt::print(raw_ostream &OS, bool isSigned) const {
   SmallString<40> S;
   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
-  OS << S.str();
+  OS << S;
 }
 
 // This implements a variety of operations on a representation of
diff --git a/contrib/llvm/lib/Support/Allocator.cpp b/contrib/llvm/lib/Support/Allocator.cpp
index 7c306b2..f48edac 100644
--- a/contrib/llvm/lib/Support/Allocator.cpp
+++ b/contrib/llvm/lib/Support/Allocator.cpp
@@ -12,12 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/Recycler.h"
 #include "llvm/Support/raw_ostream.h"
-#include <cstring>
 
 namespace llvm {
 
diff --git a/contrib/llvm/lib/Support/COM.cpp b/contrib/llvm/lib/Support/COM.cpp
new file mode 100644
index 0000000..cf3a133
--- /dev/null
+++ b/contrib/llvm/lib/Support/COM.cpp
@@ -0,0 +1,23 @@
+//===-- COM.cpp - Implement COM utility classes -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements utility classes related to COM.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/COM.h"
+
+#include "llvm/Config/config.h"
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/COM.inc"
+#elif LLVM_ON_WIN32
+#include "Windows/COM.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
index 40570ca..3cabc54 100644
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm-c/Support.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
@@ -32,10 +33,8 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
-#include <cerrno>
 #include <cstdlib>
 #include <map>
-#include <system_error>
 using namespace llvm;
 using namespace cl;
 
@@ -83,129 +82,172 @@ void StringSaver::anchor() {}
 
 //===----------------------------------------------------------------------===//
 
-// Globals for name and overview of program.  Program name is not a string to
-// avoid static ctor/dtor issues.
-static char ProgramName[80] = "<premain>";
-static const char *ProgramOverview = nullptr;
+namespace {
 
-// This collects additional help to be printed.
-static ManagedStatic<std::vector<const char *>> MoreHelp;
+class CommandLineParser {
+public:
+  // Globals for name and overview of program.  Program name is not a string to
+  // avoid static ctor/dtor issues.
+  std::string ProgramName;
+  const char *ProgramOverview;
 
-extrahelp::extrahelp(const char *Help) : morehelp(Help) {
-  MoreHelp->push_back(Help);
-}
+  // This collects additional help to be printed.
+  std::vector<const char *> MoreHelp;
 
-static bool OptionListChanged = false;
+  SmallVector<Option *, 4> PositionalOpts;
+  SmallVector<Option *, 4> SinkOpts;
+  StringMap<Option *> OptionsMap;
 
-// MarkOptionsChanged - Internal helper function.
-void cl::MarkOptionsChanged() { OptionListChanged = true; }
+  Option *ConsumeAfterOpt; // The ConsumeAfter option if it exists.
 
-/// RegisteredOptionList - This is the list of the command line options that
-/// have statically constructed themselves.
-static Option *RegisteredOptionList = nullptr;
+  // This collects the different option categories that have been registered.
+  SmallPtrSet<OptionCategory *, 16> RegisteredOptionCategories;
 
-void Option::addArgument() {
-  assert(!NextRegistered && "argument multiply registered!");
+  CommandLineParser() : ProgramOverview(nullptr), ConsumeAfterOpt(nullptr) {}
 
-  NextRegistered = RegisteredOptionList;
-  RegisteredOptionList = this;
-  MarkOptionsChanged();
-}
+  void ParseCommandLineOptions(int argc, const char *const *argv,
+                               const char *Overview);
 
-void Option::removeArgument() {
-  if (RegisteredOptionList == this) {
-    RegisteredOptionList = NextRegistered;
-    MarkOptionsChanged();
-    return;
+  void addLiteralOption(Option &Opt, const char *Name) {
+    if (!Opt.hasArgStr()) {
+      if (!OptionsMap.insert(std::make_pair(Name, &Opt)).second) {
+        errs() << ProgramName << ": CommandLine Error: Option '" << Name
+               << "' registered more than once!\n";
+        report_fatal_error("inconsistency in registered CommandLine options");
+      }
+    }
   }
-  Option *O = RegisteredOptionList;
-  for (; O->NextRegistered != this; O = O->NextRegistered)
-    ;
-  O->NextRegistered = NextRegistered;
-  MarkOptionsChanged();
-}
-
-// This collects the different option categories that have been registered.
-typedef SmallPtrSet<OptionCategory *, 16> OptionCatSet;
-static ManagedStatic<OptionCatSet> RegisteredOptionCategories;
 
-// Initialise the general option category.
-OptionCategory llvm::cl::GeneralCategory("General options");
-
-void OptionCategory::registerCategory() {
-  assert(std::count_if(RegisteredOptionCategories->begin(),
-                       RegisteredOptionCategories->end(),
-                       [this](const OptionCategory *Category) {
-                         return getName() == Category->getName();
-                       }) == 0 &&
-         "Duplicate option categories");
-
-  RegisteredOptionCategories->insert(this);
-}
-
-//===----------------------------------------------------------------------===//
-// Basic, shared command line option processing machinery.
-//
-
-/// GetOptionInfo - Scan the list of registered options, turning them into data
-/// structures that are easier to handle.
-static void GetOptionInfo(SmallVectorImpl<Option *> &PositionalOpts,
-                          SmallVectorImpl<Option *> &SinkOpts,
-                          StringMap<Option *> &OptionsMap) {
-  bool HadErrors = false;
-  SmallVector<const char *, 16> OptionNames;
-  Option *CAOpt = nullptr; // The ConsumeAfter option if it exists.
-  for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) {
-    // If this option wants to handle multiple option names, get the full set.
-    // This handles enum options like "-O1 -O2" etc.
-    O->getExtraOptionNames(OptionNames);
-    if (O->ArgStr[0])
-      OptionNames.push_back(O->ArgStr);
-
-    // Handle named options.
-    for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
+  void addOption(Option *O) {
+    bool HadErrors = false;
+    if (O->ArgStr[0]) {
       // Add argument to the argument map!
-      if (!OptionsMap.insert(std::make_pair(OptionNames[i], O)).second) {
-        errs() << ProgramName << ": CommandLine Error: Option '"
-               << OptionNames[i] << "' registered more than once!\n";
+      if (!OptionsMap.insert(std::make_pair(O->ArgStr, O)).second) {
+        errs() << ProgramName << ": CommandLine Error: Option '" << O->ArgStr
+               << "' registered more than once!\n";
         HadErrors = true;
       }
     }
 
-    OptionNames.clear();
-
     // Remember information about positional options.
     if (O->getFormattingFlag() == cl::Positional)
       PositionalOpts.push_back(O);
     else if (O->getMiscFlags() & cl::Sink) // Remember sink options
       SinkOpts.push_back(O);
     else if (O->getNumOccurrencesFlag() == cl::ConsumeAfter) {
-      if (CAOpt) {
+      if (ConsumeAfterOpt) {
         O->error("Cannot specify more than one option with cl::ConsumeAfter!");
         HadErrors = true;
       }
-      CAOpt = O;
+      ConsumeAfterOpt = O;
     }
+
+    // Fail hard if there were errors. These are strictly unrecoverable and
+    // indicate serious issues such as conflicting option names or an
+    // incorrectly
+    // linked LLVM distribution.
+    if (HadErrors)
+      report_fatal_error("inconsistency in registered CommandLine options");
   }
 
-  if (CAOpt)
-    PositionalOpts.push_back(CAOpt);
+  void removeOption(Option *O) {
+    SmallVector<const char *, 16> OptionNames;
+    O->getExtraOptionNames(OptionNames);
+    if (O->ArgStr[0])
+      OptionNames.push_back(O->ArgStr);
+    for (auto Name : OptionNames)
+      OptionsMap.erase(StringRef(Name));
 
-  // Make sure that they are in order of registration not backwards.
-  std::reverse(PositionalOpts.begin(), PositionalOpts.end());
+    if (O->getFormattingFlag() == cl::Positional)
+      for (auto Opt = PositionalOpts.begin(); Opt != PositionalOpts.end();
+           ++Opt) {
+        if (*Opt == O) {
+          PositionalOpts.erase(Opt);
+          break;
+        }
+      }
+    else if (O->getMiscFlags() & cl::Sink)
+      for (auto Opt = SinkOpts.begin(); Opt != SinkOpts.end(); ++Opt) {
+        if (*Opt == O) {
+          SinkOpts.erase(Opt);
+          break;
+        }
+      }
+    else if (O == ConsumeAfterOpt)
+      ConsumeAfterOpt = nullptr;
+  }
+
+  bool hasOptions() {
+    return (!OptionsMap.empty() || !PositionalOpts.empty() ||
+            nullptr != ConsumeAfterOpt);
+  }
+
+  void updateArgStr(Option *O, const char *NewName) {
+    if (!OptionsMap.insert(std::make_pair(NewName, O)).second) {
+      errs() << ProgramName << ": CommandLine Error: Option '" << O->ArgStr
+             << "' registered more than once!\n";
+      report_fatal_error("inconsistency in registered CommandLine options");
+    }
+    OptionsMap.erase(StringRef(O->ArgStr));
+  }
+
+  void printOptionValues();
+
+  void registerCategory(OptionCategory *cat) {
+    assert(std::count_if(RegisteredOptionCategories.begin(),
+                         RegisteredOptionCategories.end(),
+                         [cat](const OptionCategory *Category) {
+                           return cat->getName() == Category->getName();
+                         }) == 0 &&
+           "Duplicate option categories");
+
+    RegisteredOptionCategories.insert(cat);
+  }
 
-  // Fail hard if there were errors. These are strictly unrecoverable and
-  // indicate serious issues such as conflicting option names or an incorrectly
-  // linked LLVM distribution.
-  if (HadErrors)
-    report_fatal_error("inconsistency in registered CommandLine options");
+private:
+  Option *LookupOption(StringRef &Arg, StringRef &Value);
+};
+
+} // namespace
+
+static ManagedStatic<CommandLineParser> GlobalParser;
+
+void cl::AddLiteralOption(Option &O, const char *Name) {
+  GlobalParser->addLiteralOption(O, Name);
 }
 
+extrahelp::extrahelp(const char *Help) : morehelp(Help) {
+  GlobalParser->MoreHelp.push_back(Help);
+}
+
+void Option::addArgument() {
+  GlobalParser->addOption(this);
+  FullyInitialized = true;
+}
+
+void Option::removeArgument() { GlobalParser->removeOption(this); }
+
+void Option::setArgStr(const char *S) {
+  if (FullyInitialized)
+    GlobalParser->updateArgStr(this, S);
+  ArgStr = S;
+}
+
+// Initialise the general option category.
+OptionCategory llvm::cl::GeneralCategory("General options");
+
+void OptionCategory::registerCategory() {
+  GlobalParser->registerCategory(this);
+}
+
+//===----------------------------------------------------------------------===//
+// Basic, shared command line option processing machinery.
+//
+
 /// LookupOption - Lookup the option specified by the specified option on the
 /// command line.  If there is a value specified (after an equal sign) return
 /// that as well.  This assumes that leading dashes have already been stripped.
-static Option *LookupOption(StringRef &Arg, StringRef &Value,
-                            const StringMap<Option *> &OptionsMap) {
+Option *CommandLineParser::LookupOption(StringRef &Arg, StringRef &Value) {
   // Reject all dashes.
   if (Arg.empty())
     return nullptr;
@@ -271,7 +313,7 @@ static Option *LookupNearestOption(StringRef Arg,
         if (RHS.empty() || !PermitValue)
           NearestString = OptionNames[i];
         else
-          NearestString = std::string(OptionNames[i]) + "=" + RHS.str();
+          NearestString = (Twine(OptionNames[i]) + "=" + RHS).str();
       }
     }
   }
@@ -655,6 +697,12 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
     NewArgv.push_back(nullptr);
 }
 
+// It is called byte order marker but the UTF-8 BOM is actually not affected
+// by the host system's endianness.
+static bool hasUTF8ByteOrderMark(ArrayRef<char> S) {
+  return (S.size() >= 3 && S[0] == '\xef' && S[1] == '\xbb' && S[2] == '\xbf');
+}
+
 static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
                                TokenizerCallback Tokenizer,
                                SmallVectorImpl<const char *> &NewArgv,
@@ -674,6 +722,11 @@ static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
       return false;
     Str = StringRef(UTF8Buf);
   }
+  // If we see UTF-8 BOM sequence at the beginning of a file, we shall remove
+  // these bytes before parsing.
+  // Reference: http://en.wikipedia.org/wiki/UTF-8#Byte_order_mark
+  else if (hasUTF8ByteOrderMark(BufRef))
+    Str = StringRef(BufRef.data() + 3, BufRef.size() - 3);
 
   // Tokenize the contents into NewArgv.
   Tokenizer(Str, Saver, NewArgv, MarkEOLs);
@@ -731,7 +784,7 @@ class StrDupSaver : public StringSaver {
   std::vector<char *> Dups;
 
 public:
-  ~StrDupSaver() {
+  ~StrDupSaver() override {
     for (std::vector<char *>::iterator I = Dups.begin(), E = Dups.end(); I != E;
          ++I) {
       char *Dup = *I;
@@ -777,28 +830,23 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
 
 void cl::ParseCommandLineOptions(int argc, const char *const *argv,
                                  const char *Overview) {
-  // Process all registered options.
-  SmallVector<Option *, 4> PositionalOpts;
-  SmallVector<Option *, 4> SinkOpts;
-  StringMap<Option *> Opts;
-  GetOptionInfo(PositionalOpts, SinkOpts, Opts);
+  GlobalParser->ParseCommandLineOptions(argc, argv, Overview);
+}
 
-  assert((!Opts.empty() || !PositionalOpts.empty()) && "No options specified!");
+void CommandLineParser::ParseCommandLineOptions(int argc,
+                                                const char *const *argv,
+                                                const char *Overview) {
+  assert(hasOptions() && "No options specified!");
 
   // Expand response files.
-  SmallVector<const char *, 20> newArgv;
-  for (int i = 0; i != argc; ++i)
-    newArgv.push_back(argv[i]);
+  SmallVector<const char *, 20> newArgv(argv, argv + argc);
   StrDupSaver Saver;
   ExpandResponseFiles(Saver, TokenizeGNUCommandLine, newArgv);
   argv = &newArgv[0];
   argc = static_cast<int>(newArgv.size());
 
   // Copy the program name into ProgName, making sure not to overflow it.
-  StringRef ProgName = sys::path::filename(argv[0]);
-  size_t Len = std::min(ProgName.size(), size_t(79));
-  memcpy(ProgramName, ProgName.data(), Len);
-  ProgramName[Len] = '\0';
+  ProgramName = sys::path::filename(argv[0]);
 
   ProgramOverview = Overview;
   bool ErrorParsing = false;
@@ -809,25 +857,22 @@ void cl::ParseCommandLineOptions(int argc, const char *const *argv,
   // Determine whether or not there are an unlimited number of positionals
   bool HasUnlimitedPositionals = false;
 
-  Option *ConsumeAfterOpt = nullptr;
+  if (ConsumeAfterOpt) {
+    assert(PositionalOpts.size() > 0 &&
+           "Cannot specify cl::ConsumeAfter without a positional argument!");
+  }
   if (!PositionalOpts.empty()) {
-    if (PositionalOpts[0]->getNumOccurrencesFlag() == cl::ConsumeAfter) {
-      assert(PositionalOpts.size() > 1 &&
-             "Cannot specify cl::ConsumeAfter without a positional argument!");
-      ConsumeAfterOpt = PositionalOpts[0];
-    }
 
     // Calculate how many positional values are _required_.
     bool UnboundedFound = false;
-    for (size_t i = ConsumeAfterOpt ? 1 : 0, e = PositionalOpts.size(); i != e;
-         ++i) {
+    for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) {
       Option *Opt = PositionalOpts[i];
       if (RequiresValue(Opt))
         ++NumPositionalRequired;
       else if (ConsumeAfterOpt) {
         // ConsumeAfter cannot be combined with "optional" positional options
         // unless there is only one positional argument...
-        if (PositionalOpts.size() > 2)
+        if (PositionalOpts.size() > 1)
           ErrorParsing |= Opt->error(
               "error - this positional option will never be matched, "
               "because it does not Require a value, and a "
@@ -841,6 +886,9 @@ void cl::ParseCommandLineOptions(int argc, const char *const *argv,
                                    "another positional argument will match an "
                                    "unbounded number of values, and this option"
                                    " does not require a value!");
+        errs() << ProgramName << ": CommandLine Error: Option '" << Opt->ArgStr
+               << "' is all messed up!\n";
+        errs() << PositionalOpts.size();
       }
       UnboundedFound |= EatsUnboundedNumberOfValues(Opt);
     }
@@ -866,17 +914,6 @@ void cl::ParseCommandLineOptions(int argc, const char *const *argv,
     StringRef Value;
     StringRef ArgName = "";
 
-    // If the option list changed, this means that some command line
-    // option has just been registered or deregistered.  This can occur in
-    // response to things like -load, etc.  If this happens, rescan the options.
-    if (OptionListChanged) {
-      PositionalOpts.clear();
-      SinkOpts.clear();
-      Opts.clear();
-      GetOptionInfo(PositionalOpts, SinkOpts, Opts);
-      OptionListChanged = false;
-    }
-
     // Check to see if this is a positional argument.  This argument is
     // considered to be positional if it doesn't start with '-', if it is "-"
     // itself, or if we have seen "--" already.
@@ -917,7 +954,7 @@ void cl::ParseCommandLineOptions(int argc, const char *const *argv,
       while (!ArgName.empty() && ArgName[0] == '-')
         ArgName = ArgName.substr(1);
 
-      Handler = LookupOption(ArgName, Value, Opts);
+      Handler = LookupOption(ArgName, Value);
       if (!Handler || Handler->getFormattingFlag() != cl::Positional) {
         ProvidePositionalOption(ActivePositionalArg, argv[i], i);
         continue; // We are done!
@@ -929,18 +966,18 @@ void cl::ParseCommandLineOptions(int argc, const char *const *argv,
       while (!ArgName.empty() && ArgName[0] == '-')
         ArgName = ArgName.substr(1);
 
-      Handler = LookupOption(ArgName, Value, Opts);
+      Handler = LookupOption(ArgName, Value);
 
       // Check to see if this "option" is really a prefixed or grouped argument.
       if (!Handler)
-        Handler =
-            HandlePrefixedOrGroupedOption(ArgName, Value, ErrorParsing, Opts);
+        Handler = HandlePrefixedOrGroupedOption(ArgName, Value, ErrorParsing,
+                                                OptionsMap);
 
       // Otherwise, look for the closest available option to report to the user
       // in the upcoming error.
       if (!Handler && SinkOpts.empty())
         NearestHandler =
-            LookupNearestOption(ArgName, Opts, NearestHandlerString);
+            LookupNearestOption(ArgName, OptionsMap, NearestHandlerString);
     }
 
     if (!Handler) {
@@ -1037,8 +1074,8 @@ void cl::ParseCommandLineOptions(int argc, const char *const *argv,
     // positional option and keep the rest for the consume after.  The above
     // loop would have assigned no values to positional options in this case.
     //
-    if (PositionalOpts.size() == 2 && ValNo == 0 && !PositionalVals.empty()) {
-      ErrorParsing |= ProvidePositionalOption(PositionalOpts[1],
+    if (PositionalOpts.size() == 1 && ValNo == 0 && !PositionalVals.empty()) {
+      ErrorParsing |= ProvidePositionalOption(PositionalOpts[0],
                                               PositionalVals[ValNo].first,
                                               PositionalVals[ValNo].second);
       ValNo++;
@@ -1053,7 +1090,7 @@ void cl::ParseCommandLineOptions(int argc, const char *const *argv,
   }
 
   // Loop over args and make sure all required args are specified!
-  for (const auto &Opt : Opts) {
+  for (const auto &Opt : OptionsMap) {
     switch (Opt.second->getNumOccurrencesFlag()) {
     case Required:
     case OneOrMore:
@@ -1076,9 +1113,7 @@ void cl::ParseCommandLineOptions(int argc, const char *const *argv,
 
   // Free all of the memory allocated to the map.  Command line options may only
   // be processed once!
-  Opts.clear();
-  PositionalOpts.clear();
-  MoreHelp->clear();
+  MoreHelp.clear();
 
   // If we had an error processing our arguments, don't let the program execute
   if (ErrorParsing)
@@ -1095,7 +1130,7 @@ bool Option::error(const Twine &Message, StringRef ArgName) {
   if (ArgName.empty())
     errs() << HelpStr; // Be nice for positional arguments
   else
-    errs() << ProgramName << ": for the -" << ArgName;
+    errs() << GlobalParser->ProgramName << ": for the -" << ArgName;
 
   errs() << " option: " << Message << "\n";
   return true;
@@ -1427,10 +1462,9 @@ void basic_parser_impl::printOptionNoValue(const Option &O,
 // -help and -help-hidden option implementation
 //
 
-static int OptNameCompare(const void *LHS, const void *RHS) {
-  typedef std::pair<const char *, Option *> pair_ty;
-
-  return strcmp(((const pair_ty *)LHS)->first, ((const pair_ty *)RHS)->first);
+static int OptNameCompare(const std::pair<const char *, Option *> *LHS,
+                          const std::pair<const char *, Option *> *RHS) {
+  return strcmp(LHS->first, RHS->first);
 }
 
 // Copy Options into a vector so we can sort them as we like.
@@ -1458,7 +1492,7 @@ static void sortOpts(StringMap<Option *> &OptMap,
   }
 
   // Sort the options list alphabetically.
-  qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare);
+  array_pod_sort(Opts.begin(), Opts.end(), OptNameCompare);
 }
 
 namespace {
@@ -1480,38 +1514,26 @@ public:
 
   // Invoke the printer.
   void operator=(bool Value) {
-    if (Value == false)
+    if (!Value)
       return;
 
-    // Get all the options.
-    SmallVector<Option *, 4> PositionalOpts;
-    SmallVector<Option *, 4> SinkOpts;
-    StringMap<Option *> OptMap;
-    GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
-
     StrOptionPairVector Opts;
-    sortOpts(OptMap, Opts, ShowHidden);
-
-    if (ProgramOverview)
-      outs() << "OVERVIEW: " << ProgramOverview << "\n";
+    sortOpts(GlobalParser->OptionsMap, Opts, ShowHidden);
 
-    outs() << "USAGE: " << ProgramName << " [options]";
+    if (GlobalParser->ProgramOverview)
+      outs() << "OVERVIEW: " << GlobalParser->ProgramOverview << "\n";
 
-    // Print out the positional options.
-    Option *CAOpt = nullptr; // The cl::ConsumeAfter option, if it exists...
-    if (!PositionalOpts.empty() &&
-        PositionalOpts[0]->getNumOccurrencesFlag() == ConsumeAfter)
-      CAOpt = PositionalOpts[0];
+    outs() << "USAGE: " << GlobalParser->ProgramName << " [options]";
 
-    for (size_t i = CAOpt != nullptr, e = PositionalOpts.size(); i != e; ++i) {
-      if (PositionalOpts[i]->ArgStr[0])
-        outs() << " --" << PositionalOpts[i]->ArgStr;
-      outs() << " " << PositionalOpts[i]->HelpStr;
+    for (auto Opt : GlobalParser->PositionalOpts) {
+      if (Opt->ArgStr[0])
+        outs() << " --" << Opt->ArgStr;
+      outs() << " " << Opt->HelpStr;
     }
 
     // Print the consume after option info if it exists...
-    if (CAOpt)
-      outs() << " " << CAOpt->HelpStr;
+    if (GlobalParser->ConsumeAfterOpt)
+      outs() << " " << GlobalParser->ConsumeAfterOpt->HelpStr;
 
     outs() << "\n\n";
 
@@ -1524,11 +1546,9 @@ public:
     printOptions(Opts, MaxArgLen);
 
     // Print any extra help the user has declared.
-    for (std::vector<const char *>::iterator I = MoreHelp->begin(),
-                                             E = MoreHelp->end();
-         I != E; ++I)
-      outs() << *I;
-    MoreHelp->clear();
+    for (auto I : GlobalParser->MoreHelp)
+      outs() << I;
+    GlobalParser->MoreHelp.clear();
 
     // Halt the program since help information was printed
     exit(0);
@@ -1540,10 +1560,11 @@ public:
   explicit CategorizedHelpPrinter(bool showHidden) : HelpPrinter(showHidden) {}
 
   // Helper function for printOptions().
-  // It shall return true if A's name should be lexographically
-  // ordered before B's name. It returns false otherwise.
-  static bool OptionCategoryCompare(OptionCategory *A, OptionCategory *B) {
-    return strcmp(A->getName(), B->getName()) < 0;
+  // It shall return a negative value if A's name should be lexicographically
+  // ordered before B's name. It returns a value greater equal zero otherwise.
+  static int OptionCategoryCompare(OptionCategory *const *A,
+                                   OptionCategory *const *B) {
+    return strcmp((*A)->getName(), (*B)->getName());
   }
 
   // Make sure we inherit our base class's operator=()
@@ -1556,16 +1577,16 @@ protected:
 
     // Collect registered option categories into vector in preparation for
     // sorting.
-    for (OptionCatSet::const_iterator I = RegisteredOptionCategories->begin(),
-                                      E = RegisteredOptionCategories->end();
+    for (auto I = GlobalParser->RegisteredOptionCategories.begin(),
+              E = GlobalParser->RegisteredOptionCategories.end();
          I != E; ++I) {
       SortedCategories.push_back(*I);
     }
 
     // Sort the different option categories alphabetically.
     assert(SortedCategories.size() > 0 && "No option categories registered!");
-    std::sort(SortedCategories.begin(), SortedCategories.end(),
-              OptionCategoryCompare);
+    array_pod_sort(SortedCategories.begin(), SortedCategories.end(),
+                   OptionCategoryCompare);
 
     // Create map to empty vectors.
     for (std::vector<OptionCategory *>::const_iterator
@@ -1653,48 +1674,54 @@ static HelpPrinterWrapper WrappedNormalPrinter(UncategorizedNormalPrinter,
 static HelpPrinterWrapper WrappedHiddenPrinter(UncategorizedHiddenPrinter,
                                                CategorizedHiddenPrinter);
 
+// Define a category for generic options that all tools should have.
+static cl::OptionCategory GenericCategory("Generic Options");
+
 // Define uncategorized help printers.
 // -help-list is hidden by default because if Option categories are being used
 // then -help behaves the same as -help-list.
 static cl::opt<HelpPrinter, true, parser<bool>> HLOp(
     "help-list",
     cl::desc("Display list of available options (-help-list-hidden for more)"),
-    cl::location(UncategorizedNormalPrinter), cl::Hidden, cl::ValueDisallowed);
+    cl::location(UncategorizedNormalPrinter), cl::Hidden, cl::ValueDisallowed,
+    cl::cat(GenericCategory));
 
 static cl::opt<HelpPrinter, true, parser<bool>>
     HLHOp("help-list-hidden", cl::desc("Display list of all available options"),
           cl::location(UncategorizedHiddenPrinter), cl::Hidden,
-          cl::ValueDisallowed);
+          cl::ValueDisallowed, cl::cat(GenericCategory));
 
 // Define uncategorized/categorized help printers. These printers change their
 // behaviour at runtime depending on whether one or more Option categories have
 // been declared.
 static cl::opt<HelpPrinterWrapper, true, parser<bool>>
     HOp("help", cl::desc("Display available options (-help-hidden for more)"),
-        cl::location(WrappedNormalPrinter), cl::ValueDisallowed);
+        cl::location(WrappedNormalPrinter), cl::ValueDisallowed,
+        cl::cat(GenericCategory));
 
 static cl::opt<HelpPrinterWrapper, true, parser<bool>>
     HHOp("help-hidden", cl::desc("Display all available options"),
-         cl::location(WrappedHiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+         cl::location(WrappedHiddenPrinter), cl::Hidden, cl::ValueDisallowed,
+         cl::cat(GenericCategory));
 
 static cl::opt<bool> PrintOptions(
     "print-options",
     cl::desc("Print non-default options after command line parsing"),
-    cl::Hidden, cl::init(false));
+    cl::Hidden, cl::init(false), cl::cat(GenericCategory));
 
 static cl::opt<bool> PrintAllOptions(
     "print-all-options",
     cl::desc("Print all option values after command line parsing"), cl::Hidden,
-    cl::init(false));
+    cl::init(false), cl::cat(GenericCategory));
 
 void HelpPrinterWrapper::operator=(bool Value) {
-  if (Value == false)
+  if (!Value)
     return;
 
   // Decide which printer to invoke. If more than one option category is
   // registered then it is useful to show the categorized help instead of
   // uncategorized help.
-  if (RegisteredOptionCategories->size() > 1) {
+  if (GlobalParser->RegisteredOptionCategories.size() > 1) {
     // unhide -help-list option so user can have uncategorized output if they
     // want it.
     HLOp.setHiddenFlag(NotHidden);
@@ -1705,18 +1732,14 @@ void HelpPrinterWrapper::operator=(bool Value) {
 }
 
 // Print the value of each option.
-void cl::PrintOptionValues() {
+void cl::PrintOptionValues() { GlobalParser->printOptionValues(); }
+
+void CommandLineParser::printOptionValues() {
   if (!PrintOptions && !PrintAllOptions)
     return;
 
-  // Get all the options.
-  SmallVector<Option *, 4> PositionalOpts;
-  SmallVector<Option *, 4> SinkOpts;
-  StringMap<Option *> OptMap;
-  GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
-
   SmallVector<std::pair<const char *, Option *>, 128> Opts;
-  sortOpts(OptMap, Opts, /*ShowHidden*/ true);
+  sortOpts(OptionsMap, Opts, /*ShowHidden*/ true);
 
   // Compute the maximum argument length...
   size_t MaxArgLen = 0;
@@ -1790,7 +1813,8 @@ static VersionPrinter VersionPrinterInstance;
 
 static cl::opt<VersionPrinter, true, parser<bool>>
     VersOp("version", cl::desc("Display the version of this program"),
-           cl::location(VersionPrinterInstance), cl::ValueDisallowed);
+           cl::location(VersionPrinterInstance), cl::ValueDisallowed,
+           cl::cat(GenericCategory));
 
 // Utility function for printing the help message.
 void cl::PrintHelpMessage(bool Hidden, bool Categorized) {
@@ -1823,13 +1847,27 @@ void cl::AddExtraVersionPrinter(void (*func)()) {
   ExtraVersionPrinters->push_back(func);
 }
 
-void cl::getRegisteredOptions(StringMap<Option *> &Map) {
-  // Get all the options.
-  SmallVector<Option *, 4> PositionalOpts; // NOT USED
-  SmallVector<Option *, 4> SinkOpts; // NOT USED
-  assert(Map.size() == 0 && "StringMap must be empty");
-  GetOptionInfo(PositionalOpts, SinkOpts, Map);
-  return;
+StringMap<Option *> &cl::getRegisteredOptions() {
+  return GlobalParser->OptionsMap;
+}
+
+void cl::HideUnrelatedOptions(cl::OptionCategory &Category) {
+  for (auto &I : GlobalParser->OptionsMap) {
+    if (I.second->Category != &Category &&
+        I.second->Category != &GenericCategory)
+      I.second->setHiddenFlag(cl::ReallyHidden);
+  }
+}
+
+void cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *> Categories) {
+  auto CategoriesBegin = Categories.begin();
+  auto CategoriesEnd = Categories.end();
+  for (auto &I : GlobalParser->OptionsMap) {
+    if (std::find(CategoriesBegin, CategoriesEnd, I.second->Category) ==
+            CategoriesEnd &&
+        I.second->Category != &GenericCategory)
+      I.second->setHiddenFlag(cl::ReallyHidden);
+  }
 }
 
 void LLVMParseCommandLineOptions(int argc, const char *const *argv,
diff --git a/contrib/llvm/lib/Support/Compression.cpp b/contrib/llvm/lib/Support/Compression.cpp
index 17ae295..b54613e 100644
--- a/contrib/llvm/lib/Support/Compression.cpp
+++ b/contrib/llvm/lib/Support/Compression.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Compression.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
diff --git a/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp b/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp
index e45335d..1bbef23 100644
--- a/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp
+++ b/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp
@@ -109,8 +109,9 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
   if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE)
     Src++;
 
-  // Just allocate enough space up front.  We'll shrink it later.
-  Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
+  // Just allocate enough space up front.  We'll shrink it later.  Allocate
+  // enough that we can fit a null terminator without reallocating.
+  Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1);
   UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]);
   UTF8 *DstEnd = Dst + Out.size();
 
@@ -124,6 +125,46 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
   }
 
   Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]);
+  Out.push_back(0);
+  Out.pop_back();
+  return true;
+}
+
+bool convertUTF8ToUTF16String(StringRef SrcUTF8,
+                              SmallVectorImpl<UTF16> &DstUTF16) {
+  assert(DstUTF16.empty());
+
+  // Avoid OOB by returning early on empty input.
+  if (SrcUTF8.empty()) {
+    DstUTF16.push_back(0);
+    DstUTF16.pop_back();
+    return true;
+  }
+
+  const UTF8 *Src = reinterpret_cast<const UTF8 *>(SrcUTF8.begin());
+  const UTF8 *SrcEnd = reinterpret_cast<const UTF8 *>(SrcUTF8.end());
+
+  // Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding
+  // as UTF-16 should always require the same amount or less code units than the
+  // UTF-8 encoding.  Allocate one extra byte for the null terminator though,
+  // so that someone calling DstUTF16.data() gets a null terminated string.
+  // We resize down later so we don't have to worry that this over allocates.
+  DstUTF16.resize(SrcUTF8.size()+1);
+  UTF16 *Dst = &DstUTF16[0];
+  UTF16 *DstEnd = Dst + DstUTF16.size();
+
+  ConversionResult CR =
+      ConvertUTF8toUTF16(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
+  assert(CR != targetExhausted);
+
+  if (CR != conversionOK) {
+    DstUTF16.clear();
+    return false;
+  }
+
+  DstUTF16.resize(Dst - &DstUTF16[0]);
+  DstUTF16.push_back(0);
+  DstUTF16.pop_back();
   return true;
 }
 
diff --git a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
index 9b0e443..aba0f1d 100644
--- a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -8,13 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/CrashRecoveryContext.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/ThreadLocal.h"
-#include <cstdio>
 #include <setjmp.h>
 using namespace llvm;
 
diff --git a/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp
index 0d504ee..f1a334b 100644
--- a/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp
@@ -63,9 +63,6 @@ private:
 
   DAGDeltaAlgorithm &DDA;
 
-  const changeset_ty &Changes;
-  const std::vector<edge_ty> &Dependencies;
-
   std::vector<change_ty> Roots;
 
   /// Cache of failed test results. Successful test results are never cached
@@ -139,9 +136,8 @@ private:
   }
 
 public:
-  DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
-                        const changeset_ty &_Changes,
-                        const std::vector<edge_ty> &_Dependencies);
+  DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &DDA, const changeset_ty &Changes,
+                        const std::vector<edge_ty> &Dependencies);
 
   changeset_ty Run();
 
@@ -174,21 +170,17 @@ protected:
   }
 
 public:
-  DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &_DDAI,
-                       const changeset_ty &_Required)
-    : DDAI(_DDAI), Required(_Required) {}
+  DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &DDAI,
+                       const changeset_ty &Required)
+      : DDAI(DDAI), Required(Required) {}
 };
 
 }
 
-DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
-                                             const changeset_ty &_Changes,
-                                             const std::vector<edge_ty>
-                                               &_Dependencies)
-  : DDA(_DDA),
-    Changes(_Changes),
-    Dependencies(_Dependencies)
-{
+DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(
+    DAGDeltaAlgorithm &DDA, const changeset_ty &Changes,
+    const std::vector<edge_ty> &Dependencies)
+    : DDA(DDA) {
   for (changeset_ty::const_iterator it = Changes.begin(),
          ie = Changes.end(); it != ie; ++it) {
     Predecessors.insert(std::make_pair(*it, std::vector<change_ty>()));
diff --git a/contrib/llvm/lib/Support/DataStream.cpp b/contrib/llvm/lib/Support/DataStream.cpp
index dbf6465..c243155 100644
--- a/contrib/llvm/lib/Support/DataStream.cpp
+++ b/contrib/llvm/lib/Support/DataStream.cpp
@@ -18,8 +18,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Program.h"
-#include <cerrno>
-#include <cstdio>
 #include <string>
 #include <system_error>
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
@@ -56,9 +54,7 @@ class DataFileStreamer : public DataStreamer {
  int Fd;
 public:
   DataFileStreamer() : Fd(0) {}
-  virtual ~DataFileStreamer() {
-    close(Fd);
-  }
+  ~DataFileStreamer() override { close(Fd); }
   size_t GetBytes(unsigned char *buf, size_t len) override {
     NumStreamFetches++;
     return read(Fd, buf, len);
diff --git a/contrib/llvm/lib/Support/Debug.cpp b/contrib/llvm/lib/Support/Debug.cpp
index cb86ea6..eb99242 100644
--- a/contrib/llvm/lib/Support/Debug.cpp
+++ b/contrib/llvm/lib/Support/Debug.cpp
@@ -28,12 +28,49 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Support/raw_ostream.h"
+
+#undef isCurrentDebugType
+#undef setCurrentDebugType
 
 using namespace llvm;
 
+// Even though LLVM might be built with NDEBUG, define symbols that the code
+// built without NDEBUG can depend on via the llvm/Support/Debug.h header.
+namespace llvm {
+/// Exported boolean set by the -debug option.
+bool DebugFlag = false;
+
+static ManagedStatic<std::vector<std::string>> CurrentDebugType;
+
+/// Return true if the specified string is the debug type
+/// specified on the command line, or if none was specified on the command line
+/// with the -debug-only=X option.
+bool isCurrentDebugType(const char *DebugType) {
+  if (CurrentDebugType->empty())
+    return true;
+  // see if DebugType is in list. Note: do not use find() as that forces us to
+  // unnecessarily create an std::string instance.
+  for (auto d : *CurrentDebugType) {
+    if (d == DebugType)
+      return true;
+  }
+  return false;
+}
+
+/// Set the current debug type, as if the -debug-only=X
+/// option were specified.  Note that DebugFlag also needs to be set to true for
+/// debug output to be produced.
+///
+void setCurrentDebugType(const char *Type) {
+  CurrentDebugType->clear();
+  CurrentDebugType->push_back(Type);
+}
+
+} // namespace llvm
+
 // All Debug.h functionality is a no-op in NDEBUG mode.
 #ifndef NDEBUG
-bool llvm::DebugFlag;  // DebugFlag - Exported boolean set by the -debug option
 
 // -debug - Command line option to enable the DEBUG statements in the passes.
 // This flag may only be enabled in debug builds.
@@ -51,8 +88,6 @@ DebugBufferSize("debug-buffer-size",
                 cl::Hidden,
                 cl::init(0));
 
-static ManagedStatic<std::vector<std::string> > CurrentDebugType;
-
 namespace {
 
 struct DebugOnlyOpt {
@@ -79,34 +114,9 @@ static void debug_user_sig_handler(void *Cookie) {
   // know that debug mode is enabled and dbgs() really is a
   // circular_raw_ostream.  If NDEBUG is defined, then dbgs() ==
   // errs() but this will never be invoked.
-  llvm::circular_raw_ostream *dbgout =
-    static_cast<llvm::circular_raw_ostream *>(&llvm::dbgs());
-  dbgout->flushBufferWithBanner();
-}
-
-// isCurrentDebugType - Return true if the specified string is the debug type
-// specified on the command line, or if none was specified on the command line
-// with the -debug-only=X option.
-//
-bool llvm::isCurrentDebugType(const char *DebugType) {
-  if (CurrentDebugType->empty())
-    return true;
-  // see if DebugType is in list. Note: do not use find() as that forces us to
-  // unnecessarily create an std::string instance.
-  for (auto d : *CurrentDebugType) {
-    if (d == DebugType)
-      return true;
-  }
-  return false;
-}
-
-/// setCurrentDebugType - Set the current debug type, as if the -debug-only=X
-/// option were specified.  Note that DebugFlag also needs to be set to true for
-/// debug output to be produced.
-///
-void llvm::setCurrentDebugType(const char *Type) {
-  CurrentDebugType->clear();
-  CurrentDebugType->push_back(Type);
+  llvm::circular_raw_ostream &dbgout =
+      static_cast<circular_raw_ostream &>(llvm::dbgs());
+  dbgout.flushBufferWithBanner();
 }
 
 /// dbgs - Return a circular-buffered debug stream.
diff --git a/contrib/llvm/lib/Support/Dwarf.cpp b/contrib/llvm/lib/Support/Dwarf.cpp
index 4b6337e..6229825 100644
--- a/contrib/llvm/lib/Support/Dwarf.cpp
+++ b/contrib/llvm/lib/Support/Dwarf.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Dwarf.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
@@ -19,87 +20,19 @@ using namespace dwarf;
 
 const char *llvm::dwarf::TagString(unsigned Tag) {
   switch (Tag) {
-  case DW_TAG_array_type:                return "DW_TAG_array_type";
-  case DW_TAG_class_type:                return "DW_TAG_class_type";
-  case DW_TAG_entry_point:               return "DW_TAG_entry_point";
-  case DW_TAG_enumeration_type:          return "DW_TAG_enumeration_type";
-  case DW_TAG_formal_parameter:          return "DW_TAG_formal_parameter";
-  case DW_TAG_imported_declaration:      return "DW_TAG_imported_declaration";
-  case DW_TAG_label:                     return "DW_TAG_label";
-  case DW_TAG_lexical_block:             return "DW_TAG_lexical_block";
-  case DW_TAG_member:                    return "DW_TAG_member";
-  case DW_TAG_pointer_type:              return "DW_TAG_pointer_type";
-  case DW_TAG_reference_type:            return "DW_TAG_reference_type";
-  case DW_TAG_compile_unit:              return "DW_TAG_compile_unit";
-  case DW_TAG_string_type:               return "DW_TAG_string_type";
-  case DW_TAG_structure_type:            return "DW_TAG_structure_type";
-  case DW_TAG_subroutine_type:           return "DW_TAG_subroutine_type";
-  case DW_TAG_typedef:                   return "DW_TAG_typedef";
-  case DW_TAG_union_type:                return "DW_TAG_union_type";
-  case DW_TAG_unspecified_parameters:    return "DW_TAG_unspecified_parameters";
-  case DW_TAG_variant:                   return "DW_TAG_variant";
-  case DW_TAG_common_block:              return "DW_TAG_common_block";
-  case DW_TAG_common_inclusion:          return "DW_TAG_common_inclusion";
-  case DW_TAG_inheritance:               return "DW_TAG_inheritance";
-  case DW_TAG_inlined_subroutine:        return "DW_TAG_inlined_subroutine";
-  case DW_TAG_module:                    return "DW_TAG_module";
-  case DW_TAG_ptr_to_member_type:        return "DW_TAG_ptr_to_member_type";
-  case DW_TAG_set_type:                  return "DW_TAG_set_type";
-  case DW_TAG_subrange_type:             return "DW_TAG_subrange_type";
-  case DW_TAG_with_stmt:                 return "DW_TAG_with_stmt";
-  case DW_TAG_access_declaration:        return "DW_TAG_access_declaration";
-  case DW_TAG_base_type:                 return "DW_TAG_base_type";
-  case DW_TAG_catch_block:               return "DW_TAG_catch_block";
-  case DW_TAG_const_type:                return "DW_TAG_const_type";
-  case DW_TAG_constant:                  return "DW_TAG_constant";
-  case DW_TAG_enumerator:                return "DW_TAG_enumerator";
-  case DW_TAG_file_type:                 return "DW_TAG_file_type";
-  case DW_TAG_friend:                    return "DW_TAG_friend";
-  case DW_TAG_namelist:                  return "DW_TAG_namelist";
-  case DW_TAG_namelist_item:             return "DW_TAG_namelist_item";
-  case DW_TAG_packed_type:               return "DW_TAG_packed_type";
-  case DW_TAG_subprogram:                return "DW_TAG_subprogram";
-  case DW_TAG_template_type_parameter:   return "DW_TAG_template_type_parameter";
-  case DW_TAG_template_value_parameter:  return "DW_TAG_template_value_parameter";
-  case DW_TAG_thrown_type:               return "DW_TAG_thrown_type";
-  case DW_TAG_try_block:                 return "DW_TAG_try_block";
-  case DW_TAG_variant_part:              return "DW_TAG_variant_part";
-  case DW_TAG_variable:                  return "DW_TAG_variable";
-  case DW_TAG_volatile_type:             return "DW_TAG_volatile_type";
-  case DW_TAG_dwarf_procedure:           return "DW_TAG_dwarf_procedure";
-  case DW_TAG_restrict_type:             return "DW_TAG_restrict_type";
-  case DW_TAG_interface_type:            return "DW_TAG_interface_type";
-  case DW_TAG_namespace:                 return "DW_TAG_namespace";
-  case DW_TAG_imported_module:           return "DW_TAG_imported_module";
-  case DW_TAG_unspecified_type:          return "DW_TAG_unspecified_type";
-  case DW_TAG_partial_unit:              return "DW_TAG_partial_unit";
-  case DW_TAG_imported_unit:             return "DW_TAG_imported_unit";
-  case DW_TAG_condition:                 return "DW_TAG_condition";
-  case DW_TAG_shared_type:               return "DW_TAG_shared_type";
-  case DW_TAG_lo_user:                   return "DW_TAG_lo_user";
-  case DW_TAG_hi_user:                   return "DW_TAG_hi_user";
-  case DW_TAG_auto_variable:             return "DW_TAG_auto_variable";
-  case DW_TAG_arg_variable:              return "DW_TAG_arg_variable";
-  case DW_TAG_expression:                return "DW_TAG_expression";
-  case DW_TAG_rvalue_reference_type:     return "DW_TAG_rvalue_reference_type";
-  case DW_TAG_template_alias:            return "DW_TAG_template_alias";
-  case DW_TAG_coarray_type:              return "DW_TAG_coarray_type";
-  case DW_TAG_generic_subrange:          return "DW_TAG_generic_subrange";
-  case DW_TAG_dynamic_type:              return "DW_TAG_dynamic_type";
-  case DW_TAG_MIPS_loop:                 return "DW_TAG_MIPS_loop";
-  case DW_TAG_type_unit:                 return "DW_TAG_type_unit";
-  case DW_TAG_format_label:              return "DW_TAG_format_label";
-  case DW_TAG_function_template:         return "DW_TAG_function_template";
-  case DW_TAG_class_template:            return "DW_TAG_class_template";
-  case DW_TAG_GNU_template_template_param:
-    return "DW_TAG_GNU_template_template_param";
-  case DW_TAG_GNU_template_parameter_pack:
-    return "DW_TAG_GNU_template_parameter_pack";
-  case DW_TAG_GNU_formal_parameter_pack:
-    return "DW_TAG_GNU_formal_parameter_pack";
-  case DW_TAG_APPLE_property:            return "DW_TAG_APPLE_property";
+  default: return nullptr;
+#define HANDLE_DW_TAG(ID, NAME)                                                \
+  case DW_TAG_##NAME:                                                          \
+    return "DW_TAG_" #NAME;
+#include "llvm/Support/Dwarf.def"
   }
-  return nullptr;
+}
+
+unsigned llvm::dwarf::getTag(StringRef TagString) {
+  return StringSwitch<unsigned>(TagString)
+#define HANDLE_DW_TAG(ID, NAME) .Case("DW_TAG_" #NAME, DW_TAG_##NAME)
+#include "llvm/Support/Dwarf.def"
+      .Default(DW_TAG_invalid);
 }
 
 const char *llvm::dwarf::ChildrenString(unsigned Children) {
@@ -300,199 +233,46 @@ const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
     // DWARF5 Fission Extension Forms
   case DW_FORM_GNU_addr_index:           return "DW_FORM_GNU_addr_index";
   case DW_FORM_GNU_str_index:            return "DW_FORM_GNU_str_index";
+
+  // Alternate debug sections proposal (output of "dwz" tool).
+  case DW_FORM_GNU_ref_alt:              return "DW_FORM_GNU_ref_alt";
+  case DW_FORM_GNU_strp_alt:             return "DW_FORM_GNU_strp_alt";
   }
   return nullptr;
 }
 
 const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
   switch (Encoding) {
-  case DW_OP_addr:                       return "DW_OP_addr";
-  case DW_OP_deref:                      return "DW_OP_deref";
-  case DW_OP_const1u:                    return "DW_OP_const1u";
-  case DW_OP_const1s:                    return "DW_OP_const1s";
-  case DW_OP_const2u:                    return "DW_OP_const2u";
-  case DW_OP_const2s:                    return "DW_OP_const2s";
-  case DW_OP_const4u:                    return "DW_OP_const4u";
-  case DW_OP_const4s:                    return "DW_OP_const4s";
-  case DW_OP_const8u:                    return "DW_OP_const8u";
-  case DW_OP_const8s:                    return "DW_OP_const8s";
-  case DW_OP_constu:                     return "DW_OP_constu";
-  case DW_OP_consts:                     return "DW_OP_consts";
-  case DW_OP_dup:                        return "DW_OP_dup";
-  case DW_OP_drop:                       return "DW_OP_drop";
-  case DW_OP_over:                       return "DW_OP_over";
-  case DW_OP_pick:                       return "DW_OP_pick";
-  case DW_OP_swap:                       return "DW_OP_swap";
-  case DW_OP_rot:                        return "DW_OP_rot";
-  case DW_OP_xderef:                     return "DW_OP_xderef";
-  case DW_OP_abs:                        return "DW_OP_abs";
-  case DW_OP_and:                        return "DW_OP_and";
-  case DW_OP_div:                        return "DW_OP_div";
-  case DW_OP_minus:                      return "DW_OP_minus";
-  case DW_OP_mod:                        return "DW_OP_mod";
-  case DW_OP_mul:                        return "DW_OP_mul";
-  case DW_OP_neg:                        return "DW_OP_neg";
-  case DW_OP_not:                        return "DW_OP_not";
-  case DW_OP_or:                         return "DW_OP_or";
-  case DW_OP_plus:                       return "DW_OP_plus";
-  case DW_OP_plus_uconst:                return "DW_OP_plus_uconst";
-  case DW_OP_shl:                        return "DW_OP_shl";
-  case DW_OP_shr:                        return "DW_OP_shr";
-  case DW_OP_shra:                       return "DW_OP_shra";
-  case DW_OP_xor:                        return "DW_OP_xor";
-  case DW_OP_skip:                       return "DW_OP_skip";
-  case DW_OP_bra:                        return "DW_OP_bra";
-  case DW_OP_eq:                         return "DW_OP_eq";
-  case DW_OP_ge:                         return "DW_OP_ge";
-  case DW_OP_gt:                         return "DW_OP_gt";
-  case DW_OP_le:                         return "DW_OP_le";
-  case DW_OP_lt:                         return "DW_OP_lt";
-  case DW_OP_ne:                         return "DW_OP_ne";
-  case DW_OP_lit0:                       return "DW_OP_lit0";
-  case DW_OP_lit1:                       return "DW_OP_lit1";
-  case DW_OP_lit2:                       return "DW_OP_lit2";
-  case DW_OP_lit3:                       return "DW_OP_lit3";
-  case DW_OP_lit4:                       return "DW_OP_lit4";
-  case DW_OP_lit5:                       return "DW_OP_lit5";
-  case DW_OP_lit6:                       return "DW_OP_lit6";
-  case DW_OP_lit7:                       return "DW_OP_lit7";
-  case DW_OP_lit8:                       return "DW_OP_lit8";
-  case DW_OP_lit9:                       return "DW_OP_lit9";
-  case DW_OP_lit10:                      return "DW_OP_lit10";
-  case DW_OP_lit11:                      return "DW_OP_lit11";
-  case DW_OP_lit12:                      return "DW_OP_lit12";
-  case DW_OP_lit13:                      return "DW_OP_lit13";
-  case DW_OP_lit14:                      return "DW_OP_lit14";
-  case DW_OP_lit15:                      return "DW_OP_lit15";
-  case DW_OP_lit16:                      return "DW_OP_lit16";
-  case DW_OP_lit17:                      return "DW_OP_lit17";
-  case DW_OP_lit18:                      return "DW_OP_lit18";
-  case DW_OP_lit19:                      return "DW_OP_lit19";
-  case DW_OP_lit20:                      return "DW_OP_lit20";
-  case DW_OP_lit21:                      return "DW_OP_lit21";
-  case DW_OP_lit22:                      return "DW_OP_lit22";
-  case DW_OP_lit23:                      return "DW_OP_lit23";
-  case DW_OP_lit24:                      return "DW_OP_lit24";
-  case DW_OP_lit25:                      return "DW_OP_lit25";
-  case DW_OP_lit26:                      return "DW_OP_lit26";
-  case DW_OP_lit27:                      return "DW_OP_lit27";
-  case DW_OP_lit28:                      return "DW_OP_lit28";
-  case DW_OP_lit29:                      return "DW_OP_lit29";
-  case DW_OP_lit30:                      return "DW_OP_lit30";
-  case DW_OP_lit31:                      return "DW_OP_lit31";
-  case DW_OP_reg0:                       return "DW_OP_reg0";
-  case DW_OP_reg1:                       return "DW_OP_reg1";
-  case DW_OP_reg2:                       return "DW_OP_reg2";
-  case DW_OP_reg3:                       return "DW_OP_reg3";
-  case DW_OP_reg4:                       return "DW_OP_reg4";
-  case DW_OP_reg5:                       return "DW_OP_reg5";
-  case DW_OP_reg6:                       return "DW_OP_reg6";
-  case DW_OP_reg7:                       return "DW_OP_reg7";
-  case DW_OP_reg8:                       return "DW_OP_reg8";
-  case DW_OP_reg9:                       return "DW_OP_reg9";
-  case DW_OP_reg10:                      return "DW_OP_reg10";
-  case DW_OP_reg11:                      return "DW_OP_reg11";
-  case DW_OP_reg12:                      return "DW_OP_reg12";
-  case DW_OP_reg13:                      return "DW_OP_reg13";
-  case DW_OP_reg14:                      return "DW_OP_reg14";
-  case DW_OP_reg15:                      return "DW_OP_reg15";
-  case DW_OP_reg16:                      return "DW_OP_reg16";
-  case DW_OP_reg17:                      return "DW_OP_reg17";
-  case DW_OP_reg18:                      return "DW_OP_reg18";
-  case DW_OP_reg19:                      return "DW_OP_reg19";
-  case DW_OP_reg20:                      return "DW_OP_reg20";
-  case DW_OP_reg21:                      return "DW_OP_reg21";
-  case DW_OP_reg22:                      return "DW_OP_reg22";
-  case DW_OP_reg23:                      return "DW_OP_reg23";
-  case DW_OP_reg24:                      return "DW_OP_reg24";
-  case DW_OP_reg25:                      return "DW_OP_reg25";
-  case DW_OP_reg26:                      return "DW_OP_reg26";
-  case DW_OP_reg27:                      return "DW_OP_reg27";
-  case DW_OP_reg28:                      return "DW_OP_reg28";
-  case DW_OP_reg29:                      return "DW_OP_reg29";
-  case DW_OP_reg30:                      return "DW_OP_reg30";
-  case DW_OP_reg31:                      return "DW_OP_reg31";
-  case DW_OP_breg0:                      return "DW_OP_breg0";
-  case DW_OP_breg1:                      return "DW_OP_breg1";
-  case DW_OP_breg2:                      return "DW_OP_breg2";
-  case DW_OP_breg3:                      return "DW_OP_breg3";
-  case DW_OP_breg4:                      return "DW_OP_breg4";
-  case DW_OP_breg5:                      return "DW_OP_breg5";
-  case DW_OP_breg6:                      return "DW_OP_breg6";
-  case DW_OP_breg7:                      return "DW_OP_breg7";
-  case DW_OP_breg8:                      return "DW_OP_breg8";
-  case DW_OP_breg9:                      return "DW_OP_breg9";
-  case DW_OP_breg10:                     return "DW_OP_breg10";
-  case DW_OP_breg11:                     return "DW_OP_breg11";
-  case DW_OP_breg12:                     return "DW_OP_breg12";
-  case DW_OP_breg13:                     return "DW_OP_breg13";
-  case DW_OP_breg14:                     return "DW_OP_breg14";
-  case DW_OP_breg15:                     return "DW_OP_breg15";
-  case DW_OP_breg16:                     return "DW_OP_breg16";
-  case DW_OP_breg17:                     return "DW_OP_breg17";
-  case DW_OP_breg18:                     return "DW_OP_breg18";
-  case DW_OP_breg19:                     return "DW_OP_breg19";
-  case DW_OP_breg20:                     return "DW_OP_breg20";
-  case DW_OP_breg21:                     return "DW_OP_breg21";
-  case DW_OP_breg22:                     return "DW_OP_breg22";
-  case DW_OP_breg23:                     return "DW_OP_breg23";
-  case DW_OP_breg24:                     return "DW_OP_breg24";
-  case DW_OP_breg25:                     return "DW_OP_breg25";
-  case DW_OP_breg26:                     return "DW_OP_breg26";
-  case DW_OP_breg27:                     return "DW_OP_breg27";
-  case DW_OP_breg28:                     return "DW_OP_breg28";
-  case DW_OP_breg29:                     return "DW_OP_breg29";
-  case DW_OP_breg30:                     return "DW_OP_breg30";
-  case DW_OP_breg31:                     return "DW_OP_breg31";
-  case DW_OP_regx:                       return "DW_OP_regx";
-  case DW_OP_fbreg:                      return "DW_OP_fbreg";
-  case DW_OP_bregx:                      return "DW_OP_bregx";
-  case DW_OP_piece:                      return "DW_OP_piece";
-  case DW_OP_deref_size:                 return "DW_OP_deref_size";
-  case DW_OP_xderef_size:                return "DW_OP_xderef_size";
-  case DW_OP_nop:                        return "DW_OP_nop";
-  case DW_OP_push_object_address:        return "DW_OP_push_object_address";
-  case DW_OP_call2:                      return "DW_OP_call2";
-  case DW_OP_call4:                      return "DW_OP_call4";
-  case DW_OP_call_ref:                   return "DW_OP_call_ref";
-  case DW_OP_form_tls_address:           return "DW_OP_form_tls_address";
-  case DW_OP_call_frame_cfa:             return "DW_OP_call_frame_cfa";
-  case DW_OP_bit_piece:                  return "DW_OP_bit_piece";
-  case DW_OP_implicit_value:             return "DW_OP_implicit_value";
-  case DW_OP_stack_value:                return "DW_OP_stack_value";
-
-  // GNU thread-local storage
-  case DW_OP_GNU_push_tls_address:       return "DW_OP_GNU_push_tls_address";
-
-  // DWARF5 Fission Proposal Op Extensions
-  case DW_OP_GNU_addr_index:             return "DW_OP_GNU_addr_index";
-  case DW_OP_GNU_const_index:            return "DW_OP_GNU_const_index";
+  default: return nullptr;
+#define HANDLE_DW_OP(ID, NAME)                                                 \
+  case DW_OP_##NAME:                                                           \
+    return "DW_OP_" #NAME;
+#include "llvm/Support/Dwarf.def"
   }
-  return nullptr;
+}
+
+unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
+  return StringSwitch<unsigned>(OperationEncodingString)
+#define HANDLE_DW_OP(ID, NAME) .Case("DW_OP_" #NAME, DW_OP_##NAME)
+#include "llvm/Support/Dwarf.def"
+      .Default(0);
 }
 
 const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
   switch (Encoding) {
-  case DW_ATE_address:                   return "DW_ATE_address";
-  case DW_ATE_boolean:                   return "DW_ATE_boolean";
-  case DW_ATE_complex_float:             return "DW_ATE_complex_float";
-  case DW_ATE_float:                     return "DW_ATE_float";
-  case DW_ATE_signed:                    return "DW_ATE_signed";
-  case DW_ATE_signed_char:               return "DW_ATE_signed_char";
-  case DW_ATE_unsigned:                  return "DW_ATE_unsigned";
-  case DW_ATE_unsigned_char:             return "DW_ATE_unsigned_char";
-  case DW_ATE_imaginary_float:           return "DW_ATE_imaginary_float";
-  case DW_ATE_UTF:                       return "DW_ATE_UTF";
-  case DW_ATE_packed_decimal:            return "DW_ATE_packed_decimal";
-  case DW_ATE_numeric_string:            return "DW_ATE_numeric_string";
-  case DW_ATE_edited:                    return "DW_ATE_edited";
-  case DW_ATE_signed_fixed:              return "DW_ATE_signed_fixed";
-  case DW_ATE_unsigned_fixed:            return "DW_ATE_unsigned_fixed";
-  case DW_ATE_decimal_float:             return "DW_ATE_decimal_float";
-  case DW_ATE_lo_user:                   return "DW_ATE_lo_user";
-  case DW_ATE_hi_user:                   return "DW_ATE_hi_user";
+  default: return nullptr;
+#define HANDLE_DW_ATE(ID, NAME)                                                \
+  case DW_ATE_##NAME:                                                          \
+    return "DW_ATE_" #NAME;
+#include "llvm/Support/Dwarf.def"
   }
-  return nullptr;
+}
+
+unsigned llvm::dwarf::getAttributeEncoding(StringRef EncodingString) {
+  return StringSwitch<unsigned>(EncodingString)
+#define HANDLE_DW_ATE(ID, NAME) .Case("DW_ATE_" #NAME, DW_ATE_##NAME)
+#include "llvm/Support/Dwarf.def"
+      .Default(0);
 }
 
 const char *llvm::dwarf::DecimalSignString(unsigned Sign) {
@@ -538,47 +318,39 @@ const char *llvm::dwarf::VisibilityString(unsigned Visibility) {
 
 const char *llvm::dwarf::VirtualityString(unsigned Virtuality) {
   switch (Virtuality) {
-  case DW_VIRTUALITY_none:               return "DW_VIRTUALITY_none";
-  case DW_VIRTUALITY_virtual:            return "DW_VIRTUALITY_virtual";
-  case DW_VIRTUALITY_pure_virtual:       return "DW_VIRTUALITY_pure_virtual";
+  default:
+    return nullptr;
+#define HANDLE_DW_VIRTUALITY(ID, NAME)                                         \
+  case DW_VIRTUALITY_##NAME:                                                   \
+    return "DW_VIRTUALITY_" #NAME;
+#include "llvm/Support/Dwarf.def"
   }
-  return nullptr;
+}
+
+unsigned llvm::dwarf::getVirtuality(StringRef VirtualityString) {
+  return StringSwitch<unsigned>(VirtualityString)
+#define HANDLE_DW_VIRTUALITY(ID, NAME)                                         \
+  .Case("DW_VIRTUALITY_" #NAME, DW_VIRTUALITY_##NAME)
+#include "llvm/Support/Dwarf.def"
+      .Default(DW_VIRTUALITY_invalid);
 }
 
 const char *llvm::dwarf::LanguageString(unsigned Language) {
   switch (Language) {
-  case DW_LANG_C89:                      return "DW_LANG_C89";
-  case DW_LANG_C:                        return "DW_LANG_C";
-  case DW_LANG_Ada83:                    return "DW_LANG_Ada83";
-  case DW_LANG_C_plus_plus:              return "DW_LANG_C_plus_plus";
-  case DW_LANG_Cobol74:                  return "DW_LANG_Cobol74";
-  case DW_LANG_Cobol85:                  return "DW_LANG_Cobol85";
-  case DW_LANG_Fortran77:                return "DW_LANG_Fortran77";
-  case DW_LANG_Fortran90:                return "DW_LANG_Fortran90";
-  case DW_LANG_Pascal83:                 return "DW_LANG_Pascal83";
-  case DW_LANG_Modula2:                  return "DW_LANG_Modula2";
-  case DW_LANG_Java:                     return "DW_LANG_Java";
-  case DW_LANG_C99:                      return "DW_LANG_C99";
-  case DW_LANG_Ada95:                    return "DW_LANG_Ada95";
-  case DW_LANG_Fortran95:                return "DW_LANG_Fortran95";
-  case DW_LANG_PLI:                      return "DW_LANG_PLI";
-  case DW_LANG_ObjC:                     return "DW_LANG_ObjC";
-  case DW_LANG_ObjC_plus_plus:           return "DW_LANG_ObjC_plus_plus";
-  case DW_LANG_UPC:                      return "DW_LANG_UPC";
-  case DW_LANG_D:                        return "DW_LANG_D";
-  case DW_LANG_Python:                   return "DW_LANG_Python";
-  case DW_LANG_OpenCL:                   return "DW_LANG_OpenCL";
-  case DW_LANG_Go:                       return "DW_LANG_Go";
-  case DW_LANG_Modula3:                  return "DW_LANG_Modula3";
-  case DW_LANG_Haskell:                  return "DW_LANG_Haskell";
-  case DW_LANG_C_plus_plus_03:           return "DW_LANG_C_plus_plus_03";
-  case DW_LANG_C_plus_plus_11:           return "DW_LANG_C_plus_plus_11";
-  case DW_LANG_OCaml:                    return "DW_LANG_OCaml";
-  case DW_LANG_lo_user:                  return "DW_LANG_lo_user";
-  case DW_LANG_Mips_Assembler:           return "DW_LANG_Mips_Assembler";
-  case DW_LANG_hi_user:                  return "DW_LANG_hi_user";
+  default:
+    return nullptr;
+#define HANDLE_DW_LANG(ID, NAME)                                               \
+  case DW_LANG_##NAME:                                                         \
+    return "DW_LANG_" #NAME;
+#include "llvm/Support/Dwarf.def"
   }
-  return nullptr;
+}
+
+unsigned llvm::dwarf::getLanguage(StringRef LanguageString) {
+  return StringSwitch<unsigned>(LanguageString)
+#define HANDLE_DW_LANG(ID, NAME) .Case("DW_LANG_" #NAME, DW_LANG_##NAME)
+#include "llvm/Support/Dwarf.def"
+      .Default(0);
 }
 
 const char *llvm::dwarf::CaseString(unsigned Case) {
diff --git a/contrib/llvm/lib/Support/FileOutputBuffer.cpp b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
index b176a8b..307ff09 100644
--- a/contrib/llvm/lib/Support/FileOutputBuffer.cpp
+++ b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
@@ -11,11 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Errc.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/FileOutputBuffer.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Errc.h"
 #include <system_error>
 
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
@@ -77,9 +76,16 @@ FileOutputBuffer::create(StringRef FilePath, size_t Size,
   if (EC)
     return EC;
 
+#ifndef LLVM_ON_WIN32
+  // On Windows, CreateFileMapping (the mmap function on Windows)
+  // automatically extends the underlying file. We don't need to
+  // extend the file beforehand. _chsize (ftruncate on Windows) is
+  // pretty slow just like it writes specified amount of bytes,
+  // so we should avoid calling that.
   EC = sys::fs::resize_file(FD, Size);
   if (EC)
     return EC;
+#endif
 
   auto MappedFile = llvm::make_unique<mapped_file_region>(
       FD, mapped_file_region::readwrite, Size, 0, EC);
diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp
index 4635114..b8538ff 100644
--- a/contrib/llvm/lib/Support/FoldingSet.cpp
+++ b/contrib/llvm/lib/Support/FoldingSet.cpp
@@ -51,8 +51,8 @@ bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const {
 ///
 void FoldingSetNodeID::AddPointer(const void *Ptr) {
   // Note: this adds pointers to the hash using sizes and endianness that
-  // depend on the host.  It doesn't matter however, because hashing on
-  // pointer values in inherently unstable.  Nothing  should depend on the 
+  // depend on the host. It doesn't matter, however, because hashing on
+  // pointer values is inherently unstable. Nothing should depend on the
   // ordering of nodes in the folding set.
   Bits.append(reinterpret_cast<unsigned *>(&Ptr),
               reinterpret_cast<unsigned *>(&Ptr+1));
@@ -101,6 +101,8 @@ void FoldingSetNodeID::AddString(StringRef String) {
     // Otherwise do it the hard way.
     // To be compatible with above bulk transfer, we need to take endianness
     // into account.
+    static_assert(sys::IsBigEndianHost || sys::IsLittleEndianHost,
+                  "Unexpected host endianness");
     if (sys::IsBigEndianHost) {
       for (Pos += 4; Pos <= Size; Pos += 4) {
         unsigned V = ((unsigned char)String[Pos - 4] << 24) |
@@ -109,8 +111,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
                       (unsigned char)String[Pos - 1];
         Bits.push_back(V);
       }
-    } else {
-      assert(sys::IsLittleEndianHost && "Unexpected host endianness");
+    } else {  // Little-endian host
       for (Pos += 4; Pos <= Size; Pos += 4) {
         unsigned V = ((unsigned char)String[Pos - 1] << 24) |
                      ((unsigned char)String[Pos - 2] << 16) |
@@ -222,6 +223,8 @@ static void **AllocateBuckets(unsigned NumBuckets) {
 //===----------------------------------------------------------------------===//
 // FoldingSetImpl Implementation
 
+void FoldingSetImpl::anchor() {}
+
 FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
   assert(5 < Log2InitSize && Log2InitSize < 32 &&
          "Initial hash table size out of range");
diff --git a/contrib/llvm/lib/Support/FormattedStream.cpp b/contrib/llvm/lib/Support/FormattedStream.cpp
index 618ec26..2ed71c7 100644
--- a/contrib/llvm/lib/Support/FormattedStream.cpp
+++ b/contrib/llvm/lib/Support/FormattedStream.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
 using namespace llvm;
diff --git a/contrib/llvm/lib/Support/GraphWriter.cpp b/contrib/llvm/lib/Support/GraphWriter.cpp
index 054df52..97aedc8 100644
--- a/contrib/llvm/lib/Support/GraphWriter.cpp
+++ b/contrib/llvm/lib/Support/GraphWriter.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 using namespace llvm;
 
@@ -93,11 +92,12 @@ static bool ExecGraphViewer(StringRef ExecPath, std::vector<const char *> &args,
     errs() << " done. \n";
   } else {
     sys::ExecuteNoWait(ExecPath, args.data(), nullptr, nullptr, 0, &ErrMsg);
-    errs() << "Remember to erase graph file: " << Filename.str() << "\n";
+    errs() << "Remember to erase graph file: " << Filename << "\n";
   }
   return false;
 }
 
+namespace {
 struct GraphSession {
   std::string LogBuffer;
   bool TryFindProgram(StringRef Names, std::string &ProgramPath) {
@@ -114,6 +114,7 @@ struct GraphSession {
     return false;
   }
 };
+} // namespace
 
 static const char *getProgramName(GraphProgram::Name program) {
   switch (program) {
@@ -139,6 +140,29 @@ bool llvm::DisplayGraph(StringRef FilenameRef, bool wait,
   std::string ViewerPath;
   GraphSession S;
 
+#ifdef __APPLE__
+  if (S.TryFindProgram("open", ViewerPath)) {
+    std::vector<const char *> args;
+    args.push_back(ViewerPath.c_str());
+    if (wait)
+      args.push_back("-W");
+    args.push_back(Filename.c_str());
+    args.push_back(nullptr);
+    errs() << "Trying 'open' program... ";
+    if (!ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg))
+      return false;
+  }
+#endif
+  if (S.TryFindProgram("xdg-open", ViewerPath)) {
+    std::vector<const char *> args;
+    args.push_back(ViewerPath.c_str());
+    args.push_back(Filename.c_str());
+    args.push_back(nullptr);
+    errs() << "Trying 'xdg-open' program... ";
+    if (!ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg))
+      return false;
+  }
+
   // Graphviz
   if (S.TryFindProgram("Graphviz", ViewerPath)) {
     std::vector<const char *> args;
diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp
index 2abbc14..1bd1fe2 100644
--- a/contrib/llvm/lib/Support/Host.cpp
+++ b/contrib/llvm/lib/Support/Host.cpp
@@ -137,18 +137,13 @@ static bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
             "c" (subleaf));
     return false;
   #elif defined(_MSC_VER)
-    // __cpuidex was added in MSVC++ 9.0 SP1
-    #if (_MSC_VER > 1500) || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729)
-      int registers[4];
-      __cpuidex(registers, value, subleaf);
-      *rEAX = registers[0];
-      *rEBX = registers[1];
-      *rECX = registers[2];
-      *rEDX = registers[3];
-      return false;
-    #else
-      return true;
-    #endif
+    int registers[4];
+    __cpuidex(registers, value, subleaf);
+    *rEAX = registers[0];
+    *rEBX = registers[1];
+    *rECX = registers[2];
+    *rEDX = registers[3];
+    return false;
   #else
     return true;
   #endif
@@ -187,19 +182,21 @@ static bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
 #endif
 }
 
-static bool OSHasAVXSupport() {
+static bool GetX86XCR0(unsigned *rEAX, unsigned *rEDX) {
 #if defined(__GNUC__)
   // Check xgetbv; this uses a .byte sequence instead of the instruction
   // directly because older assemblers do not include support for xgetbv and
   // there is no easy way to conditionally compile based on the assembler used.
-  int rEAX, rEDX;
-  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
+  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (*rEAX), "=d" (*rEDX) : "c" (0));
+  return false;
 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
-  unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+  unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+  *rEAX = Result;
+  *rEDX = Result >> 32;
+  return false;
 #else
-  int rEAX = 0; // Ensures we return false
+  return true;
 #endif
-  return (rEAX & 6) == 6;
 }
 
 static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
@@ -228,19 +225,30 @@ StringRef sys::getHostCPUName() {
     char     c[12];
   } text;
 
-  GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
-
-  unsigned MaxLeaf = EAX;
-  bool HasSSE3 = (ECX & 0x1);
-  bool HasSSE41 = (ECX & 0x80000);
-  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 
+  unsigned MaxLeaf;
+  GetX86CpuIDAndInfo(0, &MaxLeaf, text.u+0, text.u+2, text.u+1);
+
+  bool HasMMX   = (EDX >> 23) & 1;
+  bool HasSSE   = (EDX >> 25) & 1;
+  bool HasSSE2  = (EDX >> 26) & 1;
+  bool HasSSE3  = (ECX >>  0) & 1;
+  bool HasSSSE3 = (ECX >>  9) & 1;
+  bool HasSSE41 = (ECX >> 19) & 1;
+  bool HasSSE42 = (ECX >> 20) & 1;
+  bool HasMOVBE = (ECX >> 22) & 1;
+  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
   // indicates that the AVX registers will be saved and restored on context
   // switch, then we have full AVX support.
   const unsigned AVXBits = (1 << 27) | (1 << 28);
-  bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport();
-  bool HasAVX2 = HasAVX && MaxLeaf >= 0x7 &&
-                 !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX) &&
-                 (EBX & 0x20);
+  bool HasAVX = ((ECX & AVXBits) == AVXBits) && !GetX86XCR0(&EAX, &EDX) &&
+                ((EAX & 0x6) == 0x6);
+  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+  bool HasLeaf7 = MaxLeaf >= 0x7 &&
+                  !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+  bool HasADX = HasLeaf7 && ((EBX >> 19) & 1);
+  bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20);
+  bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1);
+
   GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
   bool Em64T = (EDX >> 29) & 0x1;
   bool HasTBM = (ECX >> 21) & 0x1;
@@ -303,6 +311,8 @@ StringRef sys::getHostCPUName() {
       case  9: // Intel Pentium M processor, Intel Celeron M processor model 09.
       case 13: // Intel Pentium M processor, Intel Celeron M processor, model
                // 0Dh. All processors are manufactured using the 90 nm process.
+      case 21: // Intel EP80579 Integrated Processor and Intel EP80579
+               // Integrated Processor with Intel QuickAssist Technology
         return "pentium-m";
 
       case 14: // Intel Core Duo processor, Intel Core Solo processor, model
@@ -318,68 +328,85 @@ StringRef sys::getHostCPUName() {
                // manufactured using the 65 nm process
         return "core2";
 
-      case 21: // Intel EP80579 Integrated Processor and Intel EP80579
-               // Integrated Processor with Intel QuickAssist Technology
-        return "i686"; // FIXME: ???
-
       case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model
                // 17h. All processors are manufactured using the 45 nm process.
                //
                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
-        // Not all Penryn processors support SSE 4.1 (such as the Pentium brand)
-        return HasSSE41 ? "penryn" : "core2";
+      case 29: // Intel Xeon processor MP. All processors are manufactured using
+               // the 45 nm process.
+        return "penryn";
 
       case 26: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 45 nm process.
-      case 29: // Intel Xeon processor MP. All processors are manufactured using
-               // the 45 nm process.
       case 30: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
                // As found in a Summer 2010 model iMac.
+      case 46: // Nehalem EX
+        return "nehalem";
       case 37: // Intel Core i7, laptop version.
       case 44: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 32 nm process.
-      case 46: // Nehalem EX
       case 47: // Westmere EX
-        return "corei7";
+        return "westmere";
 
       // SandyBridge:
       case 42: // Intel Core i7 processor. All processors are manufactured
                // using the 32 nm process.
       case 45:
-        // Not all Sandy Bridge processors support AVX (such as the Pentium
-        // versions instead of the i7 versions).
-        return HasAVX ? "corei7-avx" : "corei7";
+        return "sandybridge";
 
       // Ivy Bridge:
       case 58:
       case 62: // Ivy Bridge EP
-        // Not all Ivy Bridge processors support AVX (such as the Pentium
-        // versions instead of the i7 versions).
-        return HasAVX ? "core-avx-i" : "corei7";
+        return "ivybridge";
 
       // Haswell:
       case 60:
       case 63:
       case 69:
       case 70:
-        // Not all Haswell processors support AVX too (such as the Pentium
-        // versions instead of the i7 versions).
-        return HasAVX2 ? "core-avx2" : "corei7";
+        return "haswell";
+
+      // Broadwell:
+      case 61:
+        return "broadwell";
 
       case 28: // Most 45 nm Intel Atom processors
       case 38: // 45 nm Atom Lincroft
       case 39: // 32 nm Atom Medfield
       case 53: // 32 nm Atom Midview
       case 54: // 32 nm Atom Midview
-        return "atom";
+        return "bonnell";
 
       // Atom Silvermont codes from the Intel software optimization guide.
       case 55:
       case 74:
       case 77:
-        return "slm";
-
-      default: return (Em64T) ? "x86-64" : "i686";
+        return "silvermont";
+
+      default: // Unknown family 6 CPU, try to guess.
+        if (HasAVX512)
+          return "knl";
+        if (HasADX)
+          return "broadwell";
+        if (HasAVX2)
+          return "haswell";
+        if (HasAVX)
+          return "sandybridge";
+        if (HasSSE42)
+          return HasMOVBE ? "silvermont" : "nehalem";
+        if (HasSSE41)
+          return "penryn";
+        if (HasSSSE3)
+          return HasMOVBE ? "bonnell" : "core2";
+        if (Em64T)
+          return "x86-64";
+        if (HasSSE2)
+          return "pentium-m";
+        if (HasSSE)
+          return "pentium3";
+        if (HasMMX)
+          return "pentium2";
+        return "pentiumpro";
       }
     case 15: {
       switch (Model) {
@@ -655,6 +682,28 @@ StringRef sys::getHostCPUName() {
   StringRef Str(buffer, CPUInfoSize);
   SmallVector<StringRef, 32> Lines;
   Str.split(Lines, "\n");
+
+  // Look for the CPU features.
+  SmallVector<StringRef, 32> CPUFeatures;
+  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
+    if (Lines[I].startswith("features")) {
+      size_t Pos = Lines[I].find(":");
+      if (Pos != StringRef::npos) {
+        Lines[I].drop_front(Pos + 1).split(CPUFeatures, " ");
+        break;
+      }
+    }
+
+  // We need to check for the presence of vector support independently of
+  // the machine type, since we may only use the vector register set when
+  // supported by the kernel (and hypervisor).
+  bool HaveVectorSupport = false;
+  for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
+    if (CPUFeatures[I] == "vx")
+      HaveVectorSupport = true;
+  }
+
+  // Now check the processor machine type.
   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
     if (Lines[I].startswith("processor ")) {
       size_t Pos = Lines[I].find("machine = ");
@@ -662,6 +711,8 @@ StringRef sys::getHostCPUName() {
         Pos += sizeof("machine = ") - 1;
         unsigned int Id;
         if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
+          if (Id >= 2964 && HaveVectorSupport)
+            return "z13";
           if (Id >= 2827)
             return "zEC12";
           if (Id >= 2817)
@@ -680,7 +731,89 @@ StringRef sys::getHostCPUName() {
 }
 #endif
 
-#if defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
+  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+  unsigned MaxLevel;
+  union {
+    unsigned u[3];
+    char     c[12];
+  } text;
+
+  if (GetX86CpuIDAndInfo(0, &MaxLevel, text.u+0, text.u+2, text.u+1) ||
+      MaxLevel < 1)
+    return false;
+
+  GetX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
+
+  Features["cmov"]   = (EDX >> 15) & 1;
+  Features["mmx"]    = (EDX >> 23) & 1;
+  Features["sse"]    = (EDX >> 25) & 1;
+  Features["sse2"]   = (EDX >> 26) & 1;
+  Features["sse3"]   = (ECX >>  0) & 1;
+  Features["ssse3"]  = (ECX >>  9) & 1;
+  Features["sse4.1"] = (ECX >> 19) & 1;
+  Features["sse4.2"] = (ECX >> 20) & 1;
+
+  Features["pclmul"] = (ECX >>  1) & 1;
+  Features["cx16"]   = (ECX >> 13) & 1;
+  Features["movbe"]  = (ECX >> 22) & 1;
+  Features["popcnt"] = (ECX >> 23) & 1;
+  Features["aes"]    = (ECX >> 25) & 1;
+  Features["rdrnd"]  = (ECX >> 30) & 1;
+
+  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
+  // indicates that the AVX registers will be saved and restored on context
+  // switch, then we have full AVX support.
+  bool HasAVX = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) &&
+                !GetX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6);
+  Features["avx"]    = HasAVX;
+  Features["fma"]    = HasAVX && (ECX >> 12) & 1;
+  Features["f16c"]   = HasAVX && (ECX >> 29) & 1;
+
+  // AVX512 requires additional context to be saved by the OS.
+  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+
+  unsigned MaxExtLevel;
+  GetX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
+                     !GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+  Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
+  Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
+  Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
+  Features["xop"]    = HasAVX && HasExtLeaf1 && ((ECX >> 11) & 1);
+  Features["fma4"]   = HasAVX && HasExtLeaf1 && ((ECX >> 16) & 1);
+  Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
+
+  bool HasLeaf7 = MaxLevel >= 7 &&
+                  !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+
+  // AVX2 is only supported if we have the OS save support from AVX.
+  Features["avx2"]     = HasAVX && HasLeaf7 && (EBX >>  5) & 1;
+
+  Features["fsgsbase"] = HasLeaf7 && ((EBX >>  0) & 1);
+  Features["bmi"]      = HasLeaf7 && ((EBX >>  3) & 1);
+  Features["hle"]      = HasLeaf7 && ((EBX >>  4) & 1);
+  Features["bmi2"]     = HasLeaf7 && ((EBX >>  8) & 1);
+  Features["rtm"]      = HasLeaf7 && ((EBX >> 11) & 1);
+  Features["rdseed"]   = HasLeaf7 && ((EBX >> 18) & 1);
+  Features["adx"]      = HasLeaf7 && ((EBX >> 19) & 1);
+  Features["sha"]      = HasLeaf7 && ((EBX >> 29) & 1);
+
+  // AVX512 is only supported if the OS supports the context save for it.
+  Features["avx512f"]  = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
+  Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
+  Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
+  Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
+  Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
+  Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
+  Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
+
+  return true;
+}
+#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   // Read 1024 bytes from /proc/cpuinfo, which should contain the Features line
   // in all cases.
diff --git a/contrib/llvm/lib/Support/IsInf.cpp b/contrib/llvm/lib/Support/IsInf.cpp
deleted file mode 100644
index d6da0c9..0000000
--- a/contrib/llvm/lib/Support/IsInf.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- IsInf.cpp - Platform-independent wrapper around C99 isinf() -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Platform-independent wrapper around C99 isinf()
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"
-
-#if HAVE_ISINF_IN_MATH_H
-# include <math.h>
-#elif HAVE_ISINF_IN_CMATH
-# include <cmath>
-#elif HAVE_STD_ISINF_IN_CMATH
-# include <cmath>
-using std::isinf;
-#elif HAVE_FINITE_IN_IEEEFP_H
-// A handy workaround I found at http://www.unixguide.net/sun/faq ...
-// apparently this has been a problem with Solaris for years.
-# include <ieeefp.h>
-static int isinf(double x) { return !finite(x) && x==x; }
-#elif defined(_MSC_VER)
-#include <float.h>
-#define isinf(X) (!_finite(X))
-#elif defined(_AIX) && defined(__GNUC__)
-// GCC's fixincludes seems to be removing the isinf() declaration from the
-// system header /usr/include/math.h
-# include <math.h>
-static int isinf(double x) { return !finite(x) && x==x; }
-#elif defined(__hpux)
-// HP-UX is "special"
-#include <math.h>
-static int isinf(double x) { return ((x) == INFINITY) || ((x) == -INFINITY); }
-#else
-# error "Don't know how to get isinf()"
-#endif
-
-namespace llvm {
-
-int IsInf(float f)  { return isinf(f); }
-int IsInf(double d) { return isinf(d); }
-
-} // end namespace llvm;
diff --git a/contrib/llvm/lib/Support/IsNAN.cpp b/contrib/llvm/lib/Support/IsNAN.cpp
deleted file mode 100644
index bdfdfbf..0000000
--- a/contrib/llvm/lib/Support/IsNAN.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-//===-- IsNAN.cpp ---------------------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Platform-independent wrapper around C99 isnan().
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"
-
-#if HAVE_ISNAN_IN_MATH_H
-# include <math.h>
-#elif HAVE_ISNAN_IN_CMATH
-# include <cmath>
-#elif HAVE_STD_ISNAN_IN_CMATH
-# include <cmath>
-using std::isnan;
-#elif defined(_MSC_VER)
-#include <float.h>
-#define isnan _isnan
-#else
-# error "Don't know how to get isnan()"
-#endif
-
-namespace llvm {
-  int IsNAN(float f)  { return isnan(f); }
-  int IsNAN(double d) { return isnan(d); }
-} // end namespace llvm;
diff --git a/contrib/llvm/lib/Support/LockFileManager.cpp b/contrib/llvm/lib/Support/LockFileManager.cpp
index 5b82c36..d07c5f0 100644
--- a/contrib/llvm/lib/Support/LockFileManager.cpp
+++ b/contrib/llvm/lib/Support/LockFileManager.cpp
@@ -7,12 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/Support/LockFileManager.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include <sys/stat.h>
 #include <sys/types.h>
@@ -91,7 +89,7 @@ LockFileManager::LockFileManager(StringRef FileName)
   UniqueLockFileName += "-%%%%%%%%";
   int UniqueLockFileID;
   if (std::error_code EC = sys::fs::createUniqueFile(
-          UniqueLockFileName.str(), UniqueLockFileID, UniqueLockFileName)) {
+          UniqueLockFileName, UniqueLockFileID, UniqueLockFileName)) {
     Error = EC;
     return;
   }
@@ -116,7 +114,7 @@ LockFileManager::LockFileManager(StringRef FileName)
       // We failed to write out PID, so make up an excuse, remove the
       // unique lock file, and fail.
       Error = make_error_code(errc::no_space_on_device);
-      sys::fs::remove(UniqueLockFileName.c_str());
+      sys::fs::remove(UniqueLockFileName);
       return;
     }
   }
@@ -124,7 +122,7 @@ LockFileManager::LockFileManager(StringRef FileName)
   while (1) {
     // Create a link from the lock file name. If this succeeds, we're done.
     std::error_code EC =
-        sys::fs::create_link(UniqueLockFileName.str(), LockFileName.str());
+        sys::fs::create_link(UniqueLockFileName, LockFileName);
     if (!EC)
       return;
 
@@ -137,11 +135,11 @@ LockFileManager::LockFileManager(StringRef FileName)
     // from the lock file.
     if ((Owner = readLockFile(LockFileName))) {
       // Wipe out our unique lock file (it's useless now)
-      sys::fs::remove(UniqueLockFileName.str());
+      sys::fs::remove(UniqueLockFileName);
       return;
     }
 
-    if (!sys::fs::exists(LockFileName.str())) {
+    if (!sys::fs::exists(LockFileName)) {
       // The previous owner released the lock file before we could read it.
       // Try to get ownership again.
       continue;
@@ -149,7 +147,7 @@ LockFileManager::LockFileManager(StringRef FileName)
 
     // There is a lock file that nobody owns; try to clean it up and get
     // ownership.
-    if ((EC = sys::fs::remove(LockFileName.str()))) {
+    if ((EC = sys::fs::remove(LockFileName))) {
       Error = EC;
       return;
     }
@@ -171,8 +169,8 @@ LockFileManager::~LockFileManager() {
     return;
 
   // Since we own the lock, remove the lock file and our own unique lock file.
-  sys::fs::remove(LockFileName.str());
-  sys::fs::remove(UniqueLockFileName.str());
+  sys::fs::remove(LockFileName);
+  sys::fs::remove(UniqueLockFileName);
 }
 
 LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
@@ -186,9 +184,9 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
   Interval.tv_sec = 0;
   Interval.tv_nsec = 1000000;
 #endif
-  // Don't wait more than five minutes for the file to appear.
-  unsigned MaxSeconds = 300;
-  bool LockFileGone = false;
+  // Don't wait more than five minutes per iteration. Total timeout for the file
+  // to appear is ~8.5 mins.
+  const unsigned MaxSeconds = 5*60;
   do {
     // Sleep for the designated interval, to allow the owning process time to
     // finish up and remove the lock file.
@@ -199,47 +197,18 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
 #else
     nanosleep(&Interval, nullptr);
 #endif
-    bool LockFileJustDisappeared = false;
 
-    // If the lock file is still expected to be there, check whether it still
-    // is.
-    if (!LockFileGone) {
-      if (sys::fs::access(LockFileName.c_str(), sys::fs::AccessMode::Exist) ==
-          errc::no_such_file_or_directory) {
-        LockFileGone = true;
-        LockFileJustDisappeared = true;
-      }
+    if (sys::fs::access(LockFileName.c_str(), sys::fs::AccessMode::Exist) ==
+        errc::no_such_file_or_directory) {
+      // If the original file wasn't created, somone thought the lock was dead.
+      if (!sys::fs::exists(FileName))
+        return Res_OwnerDied;
+      return Res_Success;
     }
 
-    // If the lock file is no longer there, check if the original file is
-    // available now.
-    if (LockFileGone) {
-      if (sys::fs::exists(FileName.str())) {
-        return Res_Success;
-      }
-
-      // The lock file is gone, so now we're waiting for the original file to
-      // show up. If this just happened, reset our waiting intervals and keep
-      // waiting.
-      if (LockFileJustDisappeared) {
-        MaxSeconds = 5;
-
-#if LLVM_ON_WIN32
-        Interval = 1;
-#else
-        Interval.tv_sec = 0;
-        Interval.tv_nsec = 1000000;
-#endif
-        continue;
-      }
-    }
-
-    // If we're looking for the lock file to disappear, but the process
-    // owning the lock died without cleaning up, just bail out.
-    if (!LockFileGone &&
-        !processStillExecuting((*Owner).first, (*Owner).second)) {
+    // If the process owning the lock died without cleaning up, just bail out.
+    if (!processStillExecuting((*Owner).first, (*Owner).second))
       return Res_OwnerDied;
-    }
 
     // Exponentially increase the time we wait for the lock to be removed.
 #if LLVM_ON_WIN32
@@ -263,3 +232,7 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
   // Give up.
   return Res_Timeout;
 }
+
+std::error_code LockFileManager::unsafeRemoveLockFile() {
+  return sys::fs::remove(LockFileName);
+}
diff --git a/contrib/llvm/lib/Support/MemoryBuffer.cpp b/contrib/llvm/lib/Support/MemoryBuffer.cpp
index 379db88..98862e9 100644
--- a/contrib/llvm/lib/Support/MemoryBuffer.cpp
+++ b/contrib/llvm/lib/Support/MemoryBuffer.cpp
@@ -23,7 +23,6 @@
 #include "llvm/Support/Program.h"
 #include <cassert>
 #include <cerrno>
-#include <cstdio>
 #include <cstring>
 #include <new>
 #include <sys/types.h>
diff --git a/contrib/llvm/lib/Support/Path.cpp b/contrib/llvm/lib/Support/Path.cpp
index fdb27e6..cf46738 100644
--- a/contrib/llvm/lib/Support/Path.cpp
+++ b/contrib/llvm/lib/Support/Path.cpp
@@ -19,9 +19,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include <cctype>
-#include <cstdio>
 #include <cstring>
-#include <fcntl.h>
 
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
 #include <unistd.h>
@@ -30,6 +28,7 @@
 #endif
 
 using namespace llvm;
+using namespace llvm::support::endian;
 
 namespace {
   using llvm::StringRef;
@@ -48,7 +47,6 @@ namespace {
     // * empty (in this case we return an empty string)
     // * either C: or {//,\\}net.
     // * {/,\}
-    // * {.,..}
     // * {file,directory}name
 
     if (path.empty())
@@ -75,12 +73,6 @@ namespace {
     if (is_separator(path[0]))
       return path.substr(0, 1);
 
-    if (path.startswith(".."))
-      return path.substr(0, 2);
-
-    if (path[0] == '.')
-      return path.substr(0, 1);
-
     // * {file,directory}name
     size_t end = path.find_first_of(separators);
     return path.substr(0, end);
@@ -917,7 +909,7 @@ file_magic identify_magic(StringRef Magic) {
         if (Magic.size() < MinSize)
           return file_magic::coff_import_library;
 
-        int BigObjVersion = *reinterpret_cast<const support::ulittle16_t*>(
+        int BigObjVersion = read16le(
             Magic.data() + offsetof(COFF::BigObjHeader, Version));
         if (BigObjVersion < COFF::BigObjHeader::MinBigObjectVersion)
           return file_magic::coff_import_library;
@@ -960,7 +952,7 @@ file_magic identify_magic(StringRef Magic) {
         unsigned low  = Data2MSB ? 17 : 16;
         if (Magic[high] == 0)
           switch (Magic[low]) {
-            default: break;
+            default: return file_magic::elf;
             case 1: return file_magic::elf_relocatable;
             case 2: return file_magic::elf_executable;
             case 3: return file_magic::elf_shared_object;
@@ -1012,6 +1004,7 @@ file_magic identify_magic(StringRef Magic) {
         case 8: return file_magic::macho_bundle;
         case 9: return file_magic::macho_dynamically_linked_shared_lib_stub;
         case 10: return file_magic::macho_dsym_companion;
+        case 11: return file_magic::macho_kext_bundle;
       }
       break;
     }
@@ -1033,8 +1026,7 @@ file_magic identify_magic(StringRef Magic) {
 
     case 'M': // Possible MS-DOS stub on Windows PE file
       if (Magic[1] == 'Z') {
-        uint32_t off =
-          *reinterpret_cast<const support::ulittle32_t*>(Magic.data() + 0x3c);
+        uint32_t off = read32le(Magic.data() + 0x3c);
         // PE/COFF file, either EXE or DLL.
         if (off < Magic.size() &&
             memcmp(Magic.data()+off, COFF::PEMagic, sizeof(COFF::PEMagic)) == 0)
diff --git a/contrib/llvm/lib/Support/PrettyStackTrace.cpp b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
index 987778a..f9f8cab 100644
--- a/contrib/llvm/lib/Support/PrettyStackTrace.cpp
+++ b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
@@ -16,9 +16,8 @@
 #include "llvm-c/Core.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/config.h"     // Get autoconf configuration settings
-#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Signals.h"
-#include "llvm/Support/ThreadLocal.h"
 #include "llvm/Support/Watchdog.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -28,7 +27,17 @@
 
 using namespace llvm;
 
-static ManagedStatic<sys::ThreadLocal<const PrettyStackTraceEntry> > PrettyStackTraceHead;
+// If backtrace support is not enabled, compile out support for pretty stack
+// traces.  This has the secondary effect of not requiring thread local storage
+// when backtrace support is disabled.
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+
+// We need a thread local pointer to manage the stack of our stack trace
+// objects, but we *really* cannot tolerate destructors running and do not want
+// to pay any overhead of synchronizing. As a consequence, we use a raw
+// thread-local variable.
+static LLVM_THREAD_LOCAL const PrettyStackTraceEntry *PrettyStackTraceHead =
+    nullptr;
 
 static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
   unsigned NextID = 0;
@@ -46,12 +55,12 @@ static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
 /// PrintCurStackTrace - Print the current stack trace to the specified stream.
 static void PrintCurStackTrace(raw_ostream &OS) {
   // Don't print an empty trace.
-  if (!PrettyStackTraceHead->get()) return;
+  if (!PrettyStackTraceHead) return;
   
   // If there are pretty stack frames registered, walk and emit them.
   OS << "Stack dump:\n";
   
-  PrintStack(PrettyStackTraceHead->get(), OS);
+  PrintStack(PrettyStackTraceHead, OS);
   OS.flush();
 }
 
@@ -99,28 +108,23 @@ static void CrashHandler(void *) {
 #endif
 }
 
+// defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+#endif
+
 PrettyStackTraceEntry::PrettyStackTraceEntry() {
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
   // Link ourselves.
-  NextEntry = PrettyStackTraceHead->get();
-  PrettyStackTraceHead->set(this);
+  NextEntry = PrettyStackTraceHead;
+  PrettyStackTraceHead = this;
+#endif
 }
 
 PrettyStackTraceEntry::~PrettyStackTraceEntry() {
-  // Do nothing if PrettyStackTraceHead is uninitialized. This can only happen
-  // if a shutdown occurred after we created the PrettyStackTraceEntry. That
-  // does occur in the following idiom:
-  //
-  // PrettyStackTraceProgram X(...);
-  // llvm_shutdown_obj Y;
-  //
-  // Without this check, we may end up removing ourselves from the stack trace
-  // after PrettyStackTraceHead has already been destroyed.
-  if (!PrettyStackTraceHead.isConstructed())
-    return;
-  
-  assert(PrettyStackTraceHead->get() == this &&
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+  assert(PrettyStackTraceHead == this &&
          "Pretty stack trace entry destruction is out of order");
-  PrettyStackTraceHead->set(getNextEntry());
+  PrettyStackTraceHead = getNextEntry();
+#endif
 }
 
 void PrettyStackTraceString::print(raw_ostream &OS) const {
@@ -135,15 +139,19 @@ void PrettyStackTraceProgram::print(raw_ostream &OS) const {
   OS << '\n';
 }
 
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
 static bool RegisterCrashPrinter() {
   sys::AddSignalHandler(CrashHandler, nullptr);
   return false;
 }
+#endif
 
 void llvm::EnablePrettyStackTrace() {
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
   // The first time this is called, we register the crash printer.
   static bool HandlerRegistered = RegisterCrashPrinter();
   (void)HandlerRegistered;
+#endif
 }
 
 void LLVMEnablePrettyStackTrace() {
diff --git a/contrib/llvm/lib/Support/Process.cpp b/contrib/llvm/lib/Support/Process.cpp
index ad67e1b..6dcbb47 100644
--- a/contrib/llvm/lib/Support/Process.cpp
+++ b/contrib/llvm/lib/Support/Process.cpp
@@ -13,8 +13,8 @@
 
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 
@@ -26,27 +26,10 @@ using namespace sys;
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
-/// \brief A helper function to compute the elapsed wall-time since the program
-/// started.
-///
-/// Note that this routine actually computes the elapsed wall time since the
-/// first time it was called. However, we arrange to have it called during the
-/// startup of the process to get approximately correct results.
-static TimeValue getElapsedWallTime() {
-  static TimeValue &StartTime = *new TimeValue(TimeValue::now());
-  return TimeValue::now() - StartTime;
-}
-
-/// \brief A special global variable to ensure we call \c getElapsedWallTime
-/// during global initialization of the program.
-///
-/// Note that this variable is never referenced elsewhere. Doing so could
-/// create race conditions during program startup or shutdown.
-static volatile TimeValue DummyTimeValue = getElapsedWallTime();
-
 Optional<std::string> Process::FindInEnvPath(const std::string& EnvName,
                                              const std::string& FileName)
 {
+  assert(!path::is_absolute(FileName));
   Optional<std::string> FoundPath;
   Optional<std::string> OptPath = Process::GetEnv(EnvName);
   if (!OptPath.hasValue())
diff --git a/contrib/llvm/lib/Support/Program.cpp b/contrib/llvm/lib/Support/Program.cpp
index b84b82b..34e336b 100644
--- a/contrib/llvm/lib/Support/Program.cpp
+++ b/contrib/llvm/lib/Support/Program.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Program.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Config/config.h"
 #include <system_error>
 using namespace llvm;
diff --git a/contrib/llvm/lib/Support/RandomNumberGenerator.cpp b/contrib/llvm/lib/Support/RandomNumberGenerator.cpp
index 2943137..81d0411 100644
--- a/contrib/llvm/lib/Support/RandomNumberGenerator.cpp
+++ b/contrib/llvm/lib/Support/RandomNumberGenerator.cpp
@@ -13,13 +13,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "rng"
+#include "llvm/Support/RandomNumberGenerator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+#define DEBUG_TYPE "rng"
+
 // Tracking BUG: 19665
 // http://llvm.org/bugs/show_bug.cgi?id=19665
 //
diff --git a/contrib/llvm/lib/Support/Regex.cpp b/contrib/llvm/lib/Support/Regex.cpp
index f7fe1e4..e8344ef 100644
--- a/contrib/llvm/lib/Support/Regex.cpp
+++ b/contrib/llvm/lib/Support/Regex.cpp
@@ -14,8 +14,8 @@
 #include "llvm/Support/Regex.h"
 #include "regex_impl.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include <string>
 using namespace llvm;
 
@@ -159,7 +159,7 @@ std::string Regex::sub(StringRef Repl, StringRef String,
           RefValue < Matches.size())
         Res += Matches[RefValue];
       else if (Error && Error->empty())
-        *Error = "invalid backreference string '" + Ref.str() + "'";
+        *Error = ("invalid backreference string '" + Twine(Ref) + "'").str();
       break;
     }
     }
diff --git a/contrib/llvm/lib/Support/ScaledNumber.cpp b/contrib/llvm/lib/Support/ScaledNumber.cpp
index 6f6699c..987c2d8 100644
--- a/contrib/llvm/lib/Support/ScaledNumber.cpp
+++ b/contrib/llvm/lib/Support/ScaledNumber.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Support/ScaledNumber.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace llvm::ScaledNumbers;
diff --git a/contrib/llvm/lib/Support/SmallPtrSet.cpp b/contrib/llvm/lib/Support/SmallPtrSet.cpp
index c87ee7d..358c8e8 100644
--- a/contrib/llvm/lib/Support/SmallPtrSet.cpp
+++ b/contrib/llvm/lib/Support/SmallPtrSet.cpp
@@ -50,11 +50,12 @@ SmallPtrSetImplBase::insert_imp(const void *Ptr) {
     }
     // Otherwise, hit the big set case, which will call grow.
   }
-  
-  if (NumElements*4 >= CurArraySize*3) {
+
+  if (LLVM_UNLIKELY(NumElements * 4 >= CurArraySize * 3)) {
     // If more than 3/4 of the array is full, grow.
     Grow(CurArraySize < 64 ? 128 : CurArraySize*2);
-  } else if (CurArraySize-(NumElements+NumTombstones) < CurArraySize/8) {
+  } else if (LLVM_UNLIKELY(CurArraySize - (NumElements + NumTombstones) <
+                           CurArraySize / 8)) {
     // If fewer of 1/8 of the array is empty (meaning that many are filled with
     // tombstones), rehash.
     Grow(CurArraySize);
@@ -107,16 +108,16 @@ const void * const *SmallPtrSetImplBase::FindBucketFor(const void *Ptr) const {
   const void *const *Array = CurArray;
   const void *const *Tombstone = nullptr;
   while (1) {
-    // Found Ptr's bucket?
-    if (Array[Bucket] == Ptr)
-      return Array+Bucket;
-    
     // If we found an empty bucket, the pointer doesn't exist in the set.
     // Return a tombstone if we've seen one so far, or the empty bucket if
     // not.
-    if (Array[Bucket] == getEmptyMarker())
+    if (LLVM_LIKELY(Array[Bucket] == getEmptyMarker()))
       return Tombstone ? Tombstone : Array+Bucket;
-    
+
+    // Found Ptr's bucket?
+    if (LLVM_LIKELY(Array[Bucket] == Ptr))
+      return Array+Bucket;
+
     // If this is a tombstone, remember it.  If Ptr ends up not in the set, we
     // prefer to return it than something that would require more probing.
     if (Array[Bucket] == getTombstoneMarker() && !Tombstone)
diff --git a/contrib/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm/lib/Support/SourceMgr.cpp
index b50a66b..d5e3157 100644
--- a/contrib/llvm/lib/Support/SourceMgr.cpp
+++ b/contrib/llvm/lib/Support/SourceMgr.cpp
@@ -14,13 +14,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Locale.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
-#include <system_error>
 using namespace llvm;
 
 static const size_t TabStop = 8;
diff --git a/contrib/llvm/lib/Support/SpecialCaseList.cpp b/contrib/llvm/lib/Support/SpecialCaseList.cpp
index 785cc60..ea417c4 100644
--- a/contrib/llvm/lib/Support/SpecialCaseList.cpp
+++ b/contrib/llvm/lib/Support/SpecialCaseList.cpp
@@ -15,13 +15,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/SpecialCaseList.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Regex.h"
-#include "llvm/Support/raw_ostream.h"
 #include <string>
 #include <system_error>
 #include <utility>
@@ -46,19 +44,27 @@ struct SpecialCaseList::Entry {
   }
 };
 
-SpecialCaseList::SpecialCaseList() : Entries() {}
+SpecialCaseList::SpecialCaseList() : Entries(), Regexps(), IsCompiled(false) {}
 
-std::unique_ptr<SpecialCaseList> SpecialCaseList::create(StringRef Path,
-                                                         std::string &Error) {
-  if (Path.empty())
-    return std::unique_ptr<SpecialCaseList>(new SpecialCaseList());
-  ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
-      MemoryBuffer::getFile(Path);
-  if (std::error_code EC = FileOrErr.getError()) {
-    Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
-    return nullptr;
+std::unique_ptr<SpecialCaseList>
+SpecialCaseList::create(const std::vector<std::string> &Paths,
+                        std::string &Error) {
+  std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
+  for (auto Path : Paths) {
+    ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
+        MemoryBuffer::getFile(Path);
+    if (std::error_code EC = FileOrErr.getError()) {
+      Error = (Twine("can't open file '") + Path + "': " + EC.message()).str();
+      return nullptr;
+    }
+    std::string ParseError;
+    if (!SCL->parse(FileOrErr.get().get(), ParseError)) {
+      Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
+      return nullptr;
+    }
   }
-  return create(FileOrErr.get().get(), Error);
+  SCL->compile();
+  return SCL;
 }
 
 std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
@@ -66,12 +72,14 @@ std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
   std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
   if (!SCL->parse(MB, Error))
     return nullptr;
+  SCL->compile();
   return SCL;
 }
 
-std::unique_ptr<SpecialCaseList> SpecialCaseList::createOrDie(StringRef Path) {
+std::unique_ptr<SpecialCaseList>
+SpecialCaseList::createOrDie(const std::vector<std::string> &Paths) {
   std::string Error;
-  if (auto SCL = create(Path, Error))
+  if (auto SCL = create(Paths, Error))
     return SCL;
   report_fatal_error(Error);
 }
@@ -80,12 +88,8 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
   // Iterate through each line in the blacklist file.
   SmallVector<StringRef, 16> Lines;
   SplitString(MB->getBuffer(), Lines, "\n\r");
-  StringMap<StringMap<std::string> > Regexps;
-  assert(Entries.empty() &&
-         "parse() should be called on an empty SpecialCaseList");
   int LineNo = 1;
-  for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end();
-       I != E; ++I, ++LineNo) {
+  for (auto I = Lines.begin(), E = Lines.end(); I != E; ++I, ++LineNo) {
     // Ignore empty lines and lines starting with "#"
     if (I->empty() || I->startswith("#"))
       continue;
@@ -94,7 +98,7 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
     StringRef Prefix = SplitLine.first;
     if (SplitLine.second.empty()) {
       // Missing ':' in the line.
-      Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" +
+      Error = (Twine("malformed line ") + Twine(LineNo) + ": '" +
                SplitLine.first + "'").str();
       return false;
     }
@@ -119,7 +123,7 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
     Regex CheckRE(Regexp);
     std::string REError;
     if (!CheckRE.isValid(REError)) {
-      Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" +
+      Error = (Twine("malformed regex in line ") + Twine(LineNo) + ": '" +
                SplitLine.second + "': " + REError).str();
       return false;
     }
@@ -129,10 +133,14 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
       Regexps[Prefix][Category] += "|";
     Regexps[Prefix][Category] += "^" + Regexp + "$";
   }
+  return true;
+}
 
+void SpecialCaseList::compile() {
+  assert(!IsCompiled && "compile() should only be called once");
   // Iterate through each of the prefixes, and create Regexs for them.
-  for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(),
-                                                          E = Regexps.end();
+  for (StringMap<StringMap<std::string>>::const_iterator I = Regexps.begin(),
+                                                         E = Regexps.end();
        I != E; ++I) {
     for (StringMap<std::string>::const_iterator II = I->second.begin(),
                                                 IE = I->second.end();
@@ -140,13 +148,15 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
       Entries[I->getKey()][II->getKey()].RegEx.reset(new Regex(II->getValue()));
     }
   }
-  return true;
+  Regexps.clear();
+  IsCompiled = true;
 }
 
 SpecialCaseList::~SpecialCaseList() {}
 
 bool SpecialCaseList::inSection(StringRef Section, StringRef Query,
                                 StringRef Category) const {
+  assert(IsCompiled && "SpecialCaseList::compile() was not called!");
   StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
   if (I == Entries.end()) return false;
   StringMap<Entry>::const_iterator II = I->second.find(Category);
diff --git a/contrib/llvm/lib/Support/StreamingMemoryObject.cpp b/contrib/llvm/lib/Support/StreamingMemoryObject.cpp
index 68beeef..6c5652a 100644
--- a/contrib/llvm/lib/Support/StreamingMemoryObject.cpp
+++ b/contrib/llvm/lib/Support/StreamingMemoryObject.cpp
@@ -8,12 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/StreamingMemoryObject.h"
-#include "llvm/Support/Compiler.h"
 #include <cassert>
 #include <cstddef>
 #include <cstring>
-
-
 using namespace llvm;
 
 namespace {
@@ -45,8 +42,8 @@ private:
     return static_cast<std::ptrdiff_t>(address) < LastChar - FirstChar;
   }
 
-  RawMemoryObject(const RawMemoryObject&) LLVM_DELETED_FUNCTION;
-  void operator=(const RawMemoryObject&) LLVM_DELETED_FUNCTION;
+  RawMemoryObject(const RawMemoryObject&) = delete;
+  void operator=(const RawMemoryObject&) = delete;
 };
 
 uint64_t RawMemoryObject::readBytes(uint8_t *Buf, uint64_t Size,
@@ -76,7 +73,7 @@ namespace llvm {
 // block until we actually want to read it.
 bool StreamingMemoryObject::isValidAddress(uint64_t address) const {
   if (ObjectSize && address < ObjectSize) return true;
-    return fetchToPos(address);
+  return fetchToPos(address);
 }
 
 uint64_t StreamingMemoryObject::getExtent() const {
@@ -90,13 +87,18 @@ uint64_t StreamingMemoryObject::getExtent() const {
 uint64_t StreamingMemoryObject::readBytes(uint8_t *Buf, uint64_t Size,
                                           uint64_t Address) const {
   fetchToPos(Address + Size - 1);
-  if (Address >= BytesRead)
+  // Note: For wrapped bitcode files will set ObjectSize after the
+  // first call to fetchToPos. In such cases, ObjectSize can be
+  // smaller than BytesRead.
+  size_t MaxAddress =
+      (ObjectSize && ObjectSize < BytesRead) ? ObjectSize : BytesRead;
+  if (Address >= MaxAddress)
     return 0;
 
   uint64_t End = Address + Size;
-  if (End > BytesRead)
-    End = BytesRead;
-  assert(static_cast<int64_t>(End - Address) >= 0);
+  if (End > MaxAddress)
+    End = MaxAddress;
+  assert(End >= Address);
   Size = End - Address;
   memcpy(Buf, &Bytes[Address + BytesSkipped], Size);
   return Size;
@@ -112,6 +114,8 @@ bool StreamingMemoryObject::dropLeadingBytes(size_t s) {
 void StreamingMemoryObject::setKnownObjectSize(size_t size) {
   ObjectSize = size;
   Bytes.reserve(size);
+  if (ObjectSize <= BytesRead)
+    EOFReached = true;
 }
 
 MemoryObject *getNonStreamedMemoryObject(const unsigned char *Start,
diff --git a/contrib/llvm/lib/Support/StringExtras.cpp b/contrib/llvm/lib/Support/StringExtras.cpp
index d77ad7f..3e2420f 100644
--- a/contrib/llvm/lib/Support/StringExtras.cpp
+++ b/contrib/llvm/lib/Support/StringExtras.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
diff --git a/contrib/llvm/lib/Support/StringMap.cpp b/contrib/llvm/lib/Support/StringMap.cpp
index ddb7349..7be9466 100644
--- a/contrib/llvm/lib/Support/StringMap.cpp
+++ b/contrib/llvm/lib/Support/StringMap.cpp
@@ -188,9 +188,10 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
   // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
   // the buckets are empty (meaning that many are filled with tombstones),
   // grow/rehash the table.
-  if (NumItems*4 > NumBuckets*3) {
+  if (LLVM_UNLIKELY(NumItems * 4 > NumBuckets * 3)) {
     NewSize = NumBuckets*2;
-  } else if (NumBuckets-(NumItems+NumTombstones) <= NumBuckets/8) {
+  } else if (LLVM_UNLIKELY(NumBuckets - (NumItems + NumTombstones) <=
+                           NumBuckets / 8)) {
     NewSize = NumBuckets;
   } else {
     return BucketNo;
diff --git a/contrib/llvm/lib/Support/SystemUtils.cpp b/contrib/llvm/lib/Support/SystemUtils.cpp
index 2036364..7fa6ae3 100644
--- a/contrib/llvm/lib/Support/SystemUtils.cpp
+++ b/contrib/llvm/lib/Support/SystemUtils.cpp
@@ -13,8 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/Process.h"
-#include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/contrib/llvm/lib/Support/TargetParser.cpp b/contrib/llvm/lib/Support/TargetParser.cpp
new file mode 100644
index 0000000..a3998d2
--- /dev/null
+++ b/contrib/llvm/lib/Support/TargetParser.cpp
@@ -0,0 +1,467 @@
+//===-- TargetParser - Parser for target features ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise hardware features such as
+// FPU/CPU/ARCH names as well as specific support such as HDIV, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/TargetParser.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include <cctype>
+
+using namespace llvm;
+
+namespace {
+
+// List of canonical FPU names (use getFPUSynonym)
+// FIXME: TableGen this.
+struct {
+  const char * Name;
+  ARM::FPUKind ID;
+} FPUNames[] = {
+  { "invalid",              ARM::FK_INVALID },
+  { "vfp",                  ARM::FK_VFP },
+  { "vfpv2",                ARM::FK_VFPV2 },
+  { "vfpv3",                ARM::FK_VFPV3 },
+  { "vfpv3-d16",            ARM::FK_VFPV3_D16 },
+  { "vfpv4",                ARM::FK_VFPV4 },
+  { "vfpv4-d16",            ARM::FK_VFPV4_D16 },
+  { "fpv5-d16",             ARM::FK_FPV5_D16 },
+  { "fp-armv8",             ARM::FK_FP_ARMV8 },
+  { "neon",                 ARM::FK_NEON },
+  { "neon-vfpv4",           ARM::FK_NEON_VFPV4 },
+  { "neon-fp-armv8",        ARM::FK_NEON_FP_ARMV8 },
+  { "crypto-neon-fp-armv8", ARM::FK_CRYPTO_NEON_FP_ARMV8 },
+  { "softvfp",              ARM::FK_SOFTVFP }
+};
+// List of canonical arch names (use getArchSynonym)
+// FIXME: TableGen this.
+struct {
+  const char *Name;
+  ARM::ArchKind ID;
+  const char *DefaultCPU;
+  ARMBuildAttrs::CPUArch DefaultArch;
+} ARCHNames[] = {
+  { "invalid",   ARM::AK_INVALID,  nullptr,   ARMBuildAttrs::CPUArch::Pre_v4 },
+  { "armv2",     ARM::AK_ARMV2,    "2",       ARMBuildAttrs::CPUArch::v4 },
+  { "armv2a",    ARM::AK_ARMV2A,   "2A",      ARMBuildAttrs::CPUArch::v4 },
+  { "armv3",     ARM::AK_ARMV3,    "3",       ARMBuildAttrs::CPUArch::v4 },
+  { "armv3m",    ARM::AK_ARMV3M,   "3M",      ARMBuildAttrs::CPUArch::v4 },
+  { "armv4",     ARM::AK_ARMV4,    "4",       ARMBuildAttrs::CPUArch::v4 },
+  { "armv4t",    ARM::AK_ARMV4T,   "4T",      ARMBuildAttrs::CPUArch::v4T },
+  { "armv5",     ARM::AK_ARMV5,    "5",       ARMBuildAttrs::CPUArch::v5T },
+  { "armv5t",    ARM::AK_ARMV5T,   "5T",      ARMBuildAttrs::CPUArch::v5T },
+  { "armv5te",   ARM::AK_ARMV5TE,  "5TE",     ARMBuildAttrs::CPUArch::v5TE },
+  { "armv6",     ARM::AK_ARMV6,    "6",       ARMBuildAttrs::CPUArch::v6 },
+  { "armv6j",    ARM::AK_ARMV6J,   "6J",      ARMBuildAttrs::CPUArch::v6 },
+  { "armv6k",    ARM::AK_ARMV6K,   "6K",      ARMBuildAttrs::CPUArch::v6K },
+  { "armv6t2",   ARM::AK_ARMV6T2,  "6T2",     ARMBuildAttrs::CPUArch::v6T2 },
+  { "armv6z",    ARM::AK_ARMV6Z,   "6Z",      ARMBuildAttrs::CPUArch::v6KZ },
+  { "armv6zk",   ARM::AK_ARMV6ZK,  "6ZK",     ARMBuildAttrs::CPUArch::v6KZ },
+  { "armv6-m",   ARM::AK_ARMV6M,   "6-M",     ARMBuildAttrs::CPUArch::v6_M },
+  { "armv7",     ARM::AK_ARMV7,    "7",       ARMBuildAttrs::CPUArch::v7 },
+  { "armv7-a",   ARM::AK_ARMV7A,   "7-A",     ARMBuildAttrs::CPUArch::v7 },
+  { "armv7-r",   ARM::AK_ARMV7R,   "7-R",     ARMBuildAttrs::CPUArch::v7 },
+  { "armv7-m",   ARM::AK_ARMV7M,   "7-M",     ARMBuildAttrs::CPUArch::v7 },
+  { "armv8-a",   ARM::AK_ARMV8A,   "8-A",     ARMBuildAttrs::CPUArch::v8 },
+  { "armv8.1-a", ARM::AK_ARMV8_1A, "8.1-A",   ARMBuildAttrs::CPUArch::v8 },
+  // Non-standard Arch names.
+  { "iwmmxt",    ARM::AK_IWMMXT,   "iwmmxt",  ARMBuildAttrs::CPUArch::v5TE },
+  { "iwmmxt2",   ARM::AK_IWMMXT2,  "iwmmxt2", ARMBuildAttrs::CPUArch::v5TE },
+  { "xscale",    ARM::AK_XSCALE,   "xscale",  ARMBuildAttrs::CPUArch::v5TE },
+  { "armv5e",    ARM::AK_ARMV5E,   "5E",      ARMBuildAttrs::CPUArch::v5TE },
+  { "armv5tej",  ARM::AK_ARMV5TEJ, "5TE",     ARMBuildAttrs::CPUArch::v5TE },
+  { "armv6sm",   ARM::AK_ARMV6SM,  "6-M",     ARMBuildAttrs::CPUArch::v6_M },
+  { "armv6hl",   ARM::AK_ARMV6HL,  "6-M",     ARMBuildAttrs::CPUArch::v6_M },
+  { "armv7e-m",  ARM::AK_ARMV7EM,  "7E-M",    ARMBuildAttrs::CPUArch::v7E_M },
+  { "armv7l",    ARM::AK_ARMV7L,   "7-L",     ARMBuildAttrs::CPUArch::v7 },
+  { "armv7hl",   ARM::AK_ARMV7HL,  "7H-L",    ARMBuildAttrs::CPUArch::v7 },
+  { "armv7s",    ARM::AK_ARMV7S,   "7-S",     ARMBuildAttrs::CPUArch::v7 }
+};
+// List of canonical ARCH names (use getARCHSynonym)
+// FIXME: TableGen this.
+struct {
+  const char *Name;
+  ARM::ArchExtKind ID;
+} ARCHExtNames[] = {
+  { "invalid",  ARM::AEK_INVALID },
+  { "crc",      ARM::AEK_CRC },
+  { "crypto",   ARM::AEK_CRYPTO },
+  { "fp",       ARM::AEK_FP },
+  { "idiv",     ARM::AEK_HWDIV },
+  { "mp",       ARM::AEK_MP },
+  { "sec",      ARM::AEK_SEC },
+  { "virt",     ARM::AEK_VIRT }
+};
+// List of CPU names and their arches.
+// The same CPU can have multiple arches and can be default on multiple arches.
+// When finding the Arch for a CPU, first-found prevails. Sort them accordingly.
+// FIXME: TableGen this.
+struct {
+  const char *Name;
+  ARM::ArchKind ArchID;
+  bool Default;
+} CPUNames[] = {
+  { "arm2",          ARM::AK_ARMV2,    true },
+  { "arm6",          ARM::AK_ARMV3,    true },
+  { "arm7m",         ARM::AK_ARMV3M,   true },
+  { "strongarm",     ARM::AK_ARMV4,    true },
+  { "arm7tdmi",      ARM::AK_ARMV4T,   true },
+  { "arm7tdmi-s",    ARM::AK_ARMV4T,   false },
+  { "arm710t",       ARM::AK_ARMV4T,   false },
+  { "arm720t",       ARM::AK_ARMV4T,   false },
+  { "arm9",          ARM::AK_ARMV4T,   false },
+  { "arm9tdmi",      ARM::AK_ARMV4T,   false },
+  { "arm920",        ARM::AK_ARMV4T,   false },
+  { "arm920t",       ARM::AK_ARMV4T,   false },
+  { "arm922t",       ARM::AK_ARMV4T,   false },
+  { "arm9312",       ARM::AK_ARMV4T,   false },
+  { "arm940t",       ARM::AK_ARMV4T,   false },
+  { "ep9312",        ARM::AK_ARMV4T,   false },
+  { "arm10tdmi",     ARM::AK_ARMV5,    true },
+  { "arm10tdmi",     ARM::AK_ARMV5T,   true },
+  { "arm1020t",      ARM::AK_ARMV5T,   false },
+  { "xscale",        ARM::AK_XSCALE,   true },
+  { "xscale",        ARM::AK_ARMV5TE,  false },
+  { "arm9e",         ARM::AK_ARMV5TE,  false },
+  { "arm926ej-s",    ARM::AK_ARMV5TE,  false },
+  { "arm946ej-s",    ARM::AK_ARMV5TE,  false },
+  { "arm966e-s",     ARM::AK_ARMV5TE,  false },
+  { "arm968e-s",     ARM::AK_ARMV5TE,  false },
+  { "arm1020e",      ARM::AK_ARMV5TE,  false },
+  { "arm1022e",      ARM::AK_ARMV5TE,  true },
+  { "iwmmxt",        ARM::AK_ARMV5TE,  false },
+  { "iwmmxt",        ARM::AK_IWMMXT,   true },
+  { "arm1136jf-s",   ARM::AK_ARMV6,    true },
+  { "arm1136j-s",    ARM::AK_ARMV6J,   true },
+  { "arm1136jz-s",   ARM::AK_ARMV6J,   false },
+  { "arm1176j-s",    ARM::AK_ARMV6K,   false },
+  { "mpcore",        ARM::AK_ARMV6K,   false },
+  { "mpcorenovfp",   ARM::AK_ARMV6K,   false },
+  { "arm1176jzf-s",  ARM::AK_ARMV6K,   true },
+  { "arm1176jzf-s",  ARM::AK_ARMV6Z,   true },
+  { "arm1176jzf-s",  ARM::AK_ARMV6ZK,  true },
+  { "arm1156t2-s",   ARM::AK_ARMV6T2,  true },
+  { "arm1156t2f-s",  ARM::AK_ARMV6T2,  false },
+  { "cortex-m0",     ARM::AK_ARMV6M,   true },
+  { "cortex-m0plus", ARM::AK_ARMV6M,   false },
+  { "cortex-m1",     ARM::AK_ARMV6M,   false },
+  { "sc000",         ARM::AK_ARMV6M,   false },
+  { "cortex-a8",     ARM::AK_ARMV7,    true },
+  { "cortex-a5",     ARM::AK_ARMV7A,   false },
+  { "cortex-a7",     ARM::AK_ARMV7A,   false },
+  { "cortex-a8",     ARM::AK_ARMV7A,   true },
+  { "cortex-a9",     ARM::AK_ARMV7A,   false },
+  { "cortex-a12",    ARM::AK_ARMV7A,   false },
+  { "cortex-a15",    ARM::AK_ARMV7A,   false },
+  { "cortex-a17",    ARM::AK_ARMV7A,   false },
+  { "krait",         ARM::AK_ARMV7A,   false },
+  { "cortex-r4",     ARM::AK_ARMV7R,   true },
+  { "cortex-r4f",    ARM::AK_ARMV7R,   false },
+  { "cortex-r5",     ARM::AK_ARMV7R,   false },
+  { "cortex-r7",     ARM::AK_ARMV7R,   false },
+  { "sc300",         ARM::AK_ARMV7M,   false },
+  { "cortex-m3",     ARM::AK_ARMV7M,   true },
+  { "cortex-m4",     ARM::AK_ARMV7M,   false },
+  { "cortex-m7",     ARM::AK_ARMV7M,   false },
+  { "cortex-a53",    ARM::AK_ARMV8A,   true },
+  { "cortex-a57",    ARM::AK_ARMV8A,   false },
+  { "cortex-a72",    ARM::AK_ARMV8A,   false },
+  { "cyclone",       ARM::AK_ARMV8A,   false },
+  { "generic",       ARM::AK_ARMV8_1A, true },
+  // Non-standard Arch names.
+  { "arm1022e",      ARM::AK_ARMV5E,   true },
+  { "arm926ej-s",    ARM::AK_ARMV5TEJ, true },
+  { "cortex-m0",     ARM::AK_ARMV6SM,  true },
+  { "arm1176jzf-s",  ARM::AK_ARMV6HL,  true },
+  { "cortex-a8",     ARM::AK_ARMV7L,   true },
+  { "cortex-a8",     ARM::AK_ARMV7HL,  true },
+  { "cortex-m4",     ARM::AK_ARMV7EM,  true },
+  { "swift",         ARM::AK_ARMV7S,   true },
+  // Invalid CPU
+  { "invalid",       ARM::AK_INVALID,  true }
+};
+
+} // namespace
+
+namespace llvm {
+
+// ======================================================= //
+// Information by ID
+// ======================================================= //
+
+const char *ARMTargetParser::getFPUName(unsigned FPUKind) {
+  if (FPUKind >= ARM::FK_LAST)
+    return nullptr;
+  return FPUNames[FPUKind].Name;
+}
+
+const char *ARMTargetParser::getArchName(unsigned ArchKind) {
+  if (ArchKind >= ARM::AK_LAST)
+    return nullptr;
+  return ARCHNames[ArchKind].Name;
+}
+
+const char *ARMTargetParser::getArchDefaultCPUName(unsigned ArchKind) {
+  if (ArchKind >= ARM::AK_LAST)
+    return nullptr;
+  return ARCHNames[ArchKind].DefaultCPU;
+}
+
+unsigned ARMTargetParser::getArchDefaultCPUArch(unsigned ArchKind) {
+  if (ArchKind >= ARM::AK_LAST)
+    return ARMBuildAttrs::CPUArch::Pre_v4;
+  return ARCHNames[ArchKind].DefaultArch;
+}
+
+const char *ARMTargetParser::getArchExtName(unsigned ArchExtKind) {
+  if (ArchExtKind >= ARM::AEK_LAST)
+    return nullptr;
+  return ARCHExtNames[ArchExtKind].Name;
+}
+
+const char *ARMTargetParser::getDefaultCPU(StringRef Arch) {
+  unsigned AK = parseArch(Arch);
+  if (AK == ARM::AK_INVALID)
+    return nullptr;
+
+  // Look for multiple AKs to find the default for pair AK+Name.
+  for (const auto CPU : CPUNames) {
+    if (CPU.ArchID == AK && CPU.Default)
+      return CPU.Name;
+  }
+  return nullptr;
+}
+
+// ======================================================= //
+// Parsers
+// ======================================================= //
+
+StringRef ARMTargetParser::getFPUSynonym(StringRef FPU) {
+  return StringSwitch<StringRef>(FPU)
+    .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported
+    .Case("vfp2", "vfpv2")
+    .Case("vfp3", "vfpv3")
+    .Case("vfp4", "vfpv4")
+    .Case("vfp3-d16", "vfpv3-d16")
+    .Case("vfp4-d16", "vfpv4-d16")
+    // FIXME: sp-16 is NOT the same as d16
+    .Cases("fp4-sp-d16", "fpv4-sp-d16", "vfpv4-d16")
+    .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16")
+    .Cases("fp5-sp-d16", "fpv5-sp-d16", "fpv5-d16")
+    .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16")
+    // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3.
+    .Case("neon-vfpv3", "neon")
+    .Default(FPU);
+}
+
+StringRef ARMTargetParser::getArchSynonym(StringRef Arch) {
+  return StringSwitch<StringRef>(Arch)
+    .Cases("armv6m",   "v6m",   "armv6-m")
+    .Cases("armv7a",   "v7a",   "armv7-a")
+    .Cases("armv7r",   "v7r",   "armv7-r")
+    .Cases("armv7m",   "v7m",   "armv7-m")
+    .Cases("armv7em",  "v7em",  "armv7e-m")
+    .Cases("armv8",    "v8",    "armv8-a")
+    .Cases("armv8a",   "v8a",   "armv8-a")
+    .Cases("armv8.1a", "v8.1a", "armv8.1-a")
+    .Cases("aarch64",  "arm64", "armv8-a")
+    .Default(Arch);
+}
+
+// MArch is expected to be of the form (arm|thumb)?(eb)?(v.+)?(eb)?, but
+// (iwmmxt|xscale)(eb)? is also permitted. If the former, return
+// "v.+", if the latter, return unmodified string, minus 'eb'.
+// If invalid, return empty string.
+StringRef ARMTargetParser::getCanonicalArchName(StringRef Arch) {
+  size_t offset = StringRef::npos;
+  StringRef A = Arch;
+  StringRef Error = "";
+
+  // Begins with "arm" / "thumb", move past it.
+  if (A.startswith("arm64"))
+    offset = 5;
+  else if (A.startswith("arm"))
+    offset = 3;
+  else if (A.startswith("thumb"))
+    offset = 5;
+  else if (A.startswith("aarch64")) {
+    offset = 7;
+    // AArch64 uses "_be", not "eb" suffix.
+    if (A.find("eb") != StringRef::npos)
+      return Error;
+    if (A.substr(offset,3) == "_be")
+      offset += 3;
+  }
+
+  // Ex. "armebv7", move past the "eb".
+  if (offset != StringRef::npos && A.substr(offset, 2) == "eb")
+    offset += 2;
+  // Or, if it ends with eb ("armv7eb"), chop it off.
+  else if (A.endswith("eb"))
+    A = A.substr(0, A.size() - 2);
+  // Trim the head
+  if (offset != StringRef::npos)
+    A = A.substr(offset);
+
+  // Empty string means offset reached the end, which means it's valid.
+  if (A.empty())
+    return Arch;
+
+  // Only match non-marketing names
+  if (offset != StringRef::npos) {
+  // Must start with 'vN'.
+    if (A[0] != 'v' || !std::isdigit(A[1]))
+      return Error;
+    // Can't have an extra 'eb'.
+    if (A.find("eb") != StringRef::npos)
+      return Error;
+  }
+
+  // Arch will either be a 'v' name (v7a) or a marketing name (xscale).
+  return A;
+}
+
+unsigned ARMTargetParser::parseFPU(StringRef FPU) {
+  StringRef Syn = getFPUSynonym(FPU);
+  for (const auto F : FPUNames) {
+    if (Syn == F.Name)
+      return F.ID;
+  }
+  return ARM::FK_INVALID;
+}
+
+// Allows partial match, ex. "v7a" matches "armv7a".
+unsigned ARMTargetParser::parseArch(StringRef Arch) {
+  StringRef Syn = getArchSynonym(Arch);
+  for (const auto A : ARCHNames) {
+    if (StringRef(A.Name).endswith(Syn))
+      return A.ID;
+  }
+  return ARM::AK_INVALID;
+}
+
+unsigned ARMTargetParser::parseArchExt(StringRef ArchExt) {
+  for (const auto A : ARCHExtNames) {
+    if (ArchExt == A.Name)
+      return A.ID;
+  }
+  return ARM::AEK_INVALID;
+}
+
+unsigned ARMTargetParser::parseCPUArch(StringRef CPU) {
+  for (const auto C : CPUNames) {
+    if (CPU == C.Name)
+      return C.ArchID;
+  }
+  return ARM::AK_INVALID;
+}
+
+// ARM, Thumb, AArch64
+unsigned ARMTargetParser::parseArchISA(StringRef Arch) {
+  return StringSwitch<unsigned>(Arch)
+      .StartsWith("aarch64", ARM::IK_AARCH64)
+      .StartsWith("arm64",   ARM::IK_AARCH64)
+      .StartsWith("thumb",   ARM::IK_THUMB)
+      .StartsWith("arm",     ARM::IK_ARM)
+      .Default(ARM::EK_INVALID);
+}
+
+// Little/Big endian
+unsigned ARMTargetParser::parseArchEndian(StringRef Arch) {
+  if (Arch.startswith("armeb") ||
+      Arch.startswith("thumbeb") ||
+      Arch.startswith("aarch64_be"))
+    return ARM::EK_BIG;
+
+  if (Arch.startswith("arm") || Arch.startswith("thumb")) {
+    if (Arch.endswith("eb"))
+      return ARM::EK_BIG;
+    else
+      return ARM::EK_LITTLE;
+  }
+
+  if (Arch.startswith("aarch64"))
+    return ARM::EK_LITTLE;
+
+  return ARM::EK_INVALID;
+}
+
+// Profile A/R/M
+unsigned ARMTargetParser::parseArchProfile(StringRef Arch) {
+  Arch = getCanonicalArchName(Arch);
+  switch(parseArch(Arch)) {
+  case ARM::AK_ARMV6M:
+  case ARM::AK_ARMV7M:
+  case ARM::AK_ARMV6SM:
+  case ARM::AK_ARMV7EM:
+    return ARM::PK_M;
+  case ARM::AK_ARMV7R:
+    return ARM::PK_R;
+  case ARM::AK_ARMV7:
+  case ARM::AK_ARMV7A:
+  case ARM::AK_ARMV8A:
+  case ARM::AK_ARMV8_1A:
+    return ARM::PK_A;
+  }
+  return ARM::PK_INVALID;
+}
+
+// Version number (ex. v7 = 7).
+unsigned ARMTargetParser::parseArchVersion(StringRef Arch) {
+  Arch = getCanonicalArchName(Arch);
+  switch(parseArch(Arch)) {
+  case ARM::AK_ARMV2:
+  case ARM::AK_ARMV2A:
+    return 2;
+  case ARM::AK_ARMV3:
+  case ARM::AK_ARMV3M:
+    return 3;
+  case ARM::AK_ARMV4:
+  case ARM::AK_ARMV4T:
+    return 4;
+  case ARM::AK_ARMV5:
+  case ARM::AK_ARMV5T:
+  case ARM::AK_ARMV5TE:
+  case ARM::AK_IWMMXT:
+  case ARM::AK_IWMMXT2:
+  case ARM::AK_XSCALE:
+  case ARM::AK_ARMV5E:
+  case ARM::AK_ARMV5TEJ:
+    return 5;
+  case ARM::AK_ARMV6:
+  case ARM::AK_ARMV6J:
+  case ARM::AK_ARMV6K:
+  case ARM::AK_ARMV6T2:
+  case ARM::AK_ARMV6Z:
+  case ARM::AK_ARMV6ZK:
+  case ARM::AK_ARMV6M:
+  case ARM::AK_ARMV6SM:
+  case ARM::AK_ARMV6HL:
+    return 6;
+  case ARM::AK_ARMV7:
+  case ARM::AK_ARMV7A:
+  case ARM::AK_ARMV7R:
+  case ARM::AK_ARMV7M:
+  case ARM::AK_ARMV7L:
+  case ARM::AK_ARMV7HL:
+  case ARM::AK_ARMV7S:
+  case ARM::AK_ARMV7EM:
+    return 7;
+  case ARM::AK_ARMV8A:
+  case ARM::AK_ARMV8_1A:
+    return 8;
+  }
+  return 0;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/TargetRegistry.cpp b/contrib/llvm/lib/Support/TargetRegistry.cpp
index f691883..eefef8a 100644
--- a/contrib/llvm/lib/Support/TargetRegistry.cpp
+++ b/contrib/llvm/lib/Support/TargetRegistry.cpp
@@ -10,7 +10,6 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <vector>
@@ -19,8 +18,8 @@ using namespace llvm;
 // Clients are responsible for avoid race conditions in registration.
 static Target *FirstTarget = nullptr;
 
-TargetRegistry::iterator TargetRegistry::begin() {
-  return iterator(FirstTarget);
+iterator_range<TargetRegistry::iterator> TargetRegistry::targets() {
+  return make_range(iterator(FirstTarget), iterator());
 }
 
 const Target *TargetRegistry::lookupTarget(const std::string &ArchName,
@@ -31,19 +30,17 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName,
   // name, because it might be a backend that has no mapping to a target triple.
   const Target *TheTarget = nullptr;
   if (!ArchName.empty()) {
-    for (TargetRegistry::iterator it = TargetRegistry::begin(),
-           ie = TargetRegistry::end(); it != ie; ++it) {
-      if (ArchName == it->getName()) {
-        TheTarget = &*it;
-        break;
-      }
-    }
+    auto I =
+        std::find_if(targets().begin(), targets().end(),
+                     [&](const Target &T) { return ArchName == T.getName(); });
 
-    if (!TheTarget) {
+    if (I == targets().end()) {
       Error = "error: invalid target '" + ArchName + "'.\n";
       return nullptr;
     }
 
+    TheTarget = &*I;
+
     // Adjust the triple to match (if known), otherwise stick with the
     // given triple.
     Triple::ArchType Type = Triple::getArchTypeForLLVMName(ArchName);
@@ -67,30 +64,28 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName,
 const Target *TargetRegistry::lookupTarget(const std::string &TT,
                                            std::string &Error) {
   // Provide special warning when no targets are initialized.
-  if (begin() == end()) {
+  if (targets().begin() == targets().end()) {
     Error = "Unable to find target for this triple (no targets are registered)";
     return nullptr;
   }
-  const Target *Matching = nullptr;
-  Triple::ArchType Arch =  Triple(TT).getArch();
-  for (iterator it = begin(), ie = end(); it != ie; ++it) {
-    if (it->ArchMatchFn(Arch)) {
-      if (Matching) {
-        Error = std::string("Cannot choose between targets \"") +
-          Matching->Name  + "\" and \"" + it->Name + "\"";
-        return nullptr;
-      }
-      Matching = &*it;
-    }
-  }
+  Triple::ArchType Arch = Triple(TT).getArch();
+  auto ArchMatch = [&](const Target &T) { return T.ArchMatchFn(Arch); };
+  auto I = std::find_if(targets().begin(), targets().end(), ArchMatch);
 
-  if (!Matching) {
+  if (I == targets().end()) {
     Error = "No available targets are compatible with this triple, "
       "see -version for the available targets.";
     return nullptr;
   }
 
-  return Matching;
+  auto J = std::find_if(std::next(I), targets().end(), ArchMatch);
+  if (J != targets().end()) {
+    Error = std::string("Cannot choose between targets \"") + I->Name +
+            "\" and \"" + J->Name + "\"";
+    return nullptr;
+  }
+
+  return &*I;
 }
 
 void TargetRegistry::RegisterTarget(Target &T,
@@ -124,10 +119,8 @@ static int TargetArraySortFn(const std::pair<StringRef, const Target *> *LHS,
 void TargetRegistry::printRegisteredTargetsForVersion() {
   std::vector<std::pair<StringRef, const Target*> > Targets;
   size_t Width = 0;
-  for (TargetRegistry::iterator I = TargetRegistry::begin(),
-       E = TargetRegistry::end();
-       I != E; ++I) {
-    Targets.push_back(std::make_pair(I->getName(), &*I));
+  for (const auto &T : TargetRegistry::targets()) {
+    Targets.push_back(std::make_pair(T.getName(), &T));
     Width = std::max(Width, Targets.back().first.size());
   }
   array_pod_sort(Targets.begin(), Targets.end(), TargetArraySortFn);
diff --git a/contrib/llvm/lib/Support/Timer.cpp b/contrib/llvm/lib/Support/Timer.cpp
index e1a531a..d7b6515 100644
--- a/contrib/llvm/lib/Support/Timer.cpp
+++ b/contrib/llvm/lib/Support/Timer.cpp
@@ -14,12 +14,10 @@
 #include "llvm/Support/Timer.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
-#include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
index 0838e90..a63426f 100644
--- a/contrib/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -12,6 +12,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
 #include <cstring>
 using namespace llvm;
 
@@ -23,6 +24,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case aarch64_be:  return "aarch64_be";
   case arm:         return "arm";
   case armeb:       return "armeb";
+  case bpf:         return "bpf";
   case hexagon:     return "hexagon";
   case mips:        return "mips";
   case mipsel:      return "mipsel";
@@ -36,6 +38,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case amdgcn:      return "amdgcn";
   case sparc:       return "sparc";
   case sparcv9:     return "sparcv9";
+  case sparcel:     return "sparcel";
   case systemz:     return "s390x";
   case tce:         return "tce";
   case thumb:       return "thumb";
@@ -86,10 +89,13 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case amdgcn:
   case r600:        return "amdgpu";
 
+  case bpf:         return "bpf";
+
   case sparcv9:
+  case sparcel:
   case sparc:       return "sparc";
 
-  case systemz:     return "systemz";
+  case systemz:     return "s390";
 
   case x86:
   case x86_64:      return "x86";
@@ -138,6 +144,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   switch (Kind) {
   case UnknownOS: return "unknown";
 
+  case CloudABI: return "cloudabi";
   case Darwin: return "darwin";
   case DragonFly: return "dragonfly";
   case FreeBSD: return "freebsd";
@@ -160,6 +167,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case CUDA: return "cuda";
   case NVCL: return "nvcl";
   case AMDHSA: return "amdhsa";
+  case PS4: return "ps4";
   }
 
   llvm_unreachable("Invalid OSType");
@@ -191,6 +199,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("arm64", aarch64) // "arm64" is an alias for "aarch64"
     .Case("arm", arm)
     .Case("armeb", armeb)
+    .Case("bpf", bpf)
     .Case("mips", mips)
     .Case("mipsel", mipsel)
     .Case("mips64", mips64)
@@ -204,6 +213,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("amdgcn", amdgcn)
     .Case("hexagon", hexagon)
     .Case("sparc", sparc)
+    .Case("sparcel", sparcel)
     .Case("sparcv9", sparcv9)
     .Case("systemz", systemz)
     .Case("tce", tce)
@@ -227,55 +237,61 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
 }
 
 static Triple::ArchType parseARMArch(StringRef ArchName) {
-  size_t offset = StringRef::npos;
+  unsigned ISA = ARMTargetParser::parseArchISA(ArchName);
+  unsigned ENDIAN = ARMTargetParser::parseArchEndian(ArchName);
+
   Triple::ArchType arch = Triple::UnknownArch;
-  bool isThumb = ArchName.startswith("thumb");
-
-  if (ArchName.equals("arm"))
-    return Triple::arm;
-  if (ArchName.equals("armeb"))
-    return Triple::armeb;
-  if (ArchName.equals("thumb"))
-    return Triple::thumb;
-  if (ArchName.equals("thumbeb"))
-    return Triple::thumbeb;
-  if (ArchName.equals("arm64") || ArchName.equals("aarch64"))
-    return Triple::aarch64;
-  if (ArchName.equals("aarch64_be"))
-    return Triple::aarch64_be;
-
-  if (ArchName.startswith("armv")) {
-    offset = 3;
-    if (ArchName.endswith("eb")) {
-      arch = Triple::armeb;
-      ArchName = ArchName.substr(0, ArchName.size() - 2);
-    } else
+  switch (ENDIAN) {
+  case ARM::EK_LITTLE: {
+    switch (ISA) {
+    case ARM::IK_ARM:
       arch = Triple::arm;
-  } else if (ArchName.startswith("armebv")) {
-    offset = 5;
-    arch = Triple::armeb;
-  } else if (ArchName.startswith("thumbv")) {
-    offset = 5;
-    if (ArchName.endswith("eb")) {
-      arch = Triple::thumbeb;
-      ArchName = ArchName.substr(0, ArchName.size() - 2);
-    } else
+      break;
+    case ARM::IK_THUMB:
       arch = Triple::thumb;
-  } else if (ArchName.startswith("thumbebv")) {
-    offset = 7;
-    arch = Triple::thumbeb;
+      break;
+    case ARM::IK_AARCH64:
+      arch = Triple::aarch64;
+      break;
+    }
+    break;
   }
-  return StringSwitch<Triple::ArchType>(ArchName.substr(offset))
-    .Cases("v2", "v2a", isThumb ? Triple::UnknownArch : arch)
-    .Cases("v3", "v3m", isThumb ? Triple::UnknownArch : arch)
-    .Cases("v4", "v4t", arch)
-    .Cases("v5", "v5e", "v5t", "v5te", "v5tej", arch)
-    .Cases("v6", "v6j", "v6k", "v6m", arch)
-    .Cases("v6t2", "v6z", "v6zk", arch)
-    .Cases("v7", "v7a", "v7em", "v7l", arch)
-    .Cases("v7m", "v7r", "v7s", arch)
-    .Cases("v8", "v8a", arch)
-    .Default(Triple::UnknownArch);
+  case ARM::EK_BIG: {
+    switch (ISA) {
+    case ARM::IK_ARM:
+      arch = Triple::armeb;
+      break;
+    case ARM::IK_THUMB:
+      arch = Triple::thumbeb;
+      break;
+    case ARM::IK_AARCH64:
+      arch = Triple::aarch64_be;
+      break;
+    }
+    break;
+  }
+  }
+
+  ArchName = ARMTargetParser::getCanonicalArchName(ArchName);
+  if (ArchName.empty())
+    return Triple::UnknownArch;
+
+  // Thumb only exists in v4+
+  if (ISA == ARM::IK_THUMB &&
+      (ArchName.startswith("v2") || ArchName.startswith("v3")))
+    return Triple::UnknownArch;
+
+  // Thumb only for v6m
+  unsigned Profile = ARMTargetParser::parseArchProfile(ArchName);
+  unsigned Version = ARMTargetParser::parseArchVersion(ArchName);
+  if (Profile == ARM::PK_M && Version == 6) {
+    if (ENDIAN == ARM::EK_BIG)
+      return Triple::thumbeb;
+    else
+      return Triple::thumb;
+  }
+
+  return arch;
 }
 
 static Triple::ArchType parseArch(StringRef ArchName) {
@@ -301,9 +317,11 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("mips64el", Triple::mips64el)
     .Case("r600", Triple::r600)
     .Case("amdgcn", Triple::amdgcn)
+    .Case("bpf", Triple::bpf)
     .Case("hexagon", Triple::hexagon)
     .Case("s390x", Triple::systemz)
     .Case("sparc", Triple::sparc)
+    .Case("sparcel", Triple::sparcel)
     .Cases("sparcv9", "sparc64", Triple::sparcv9)
     .Case("tce", Triple::tce)
     .Case("xcore", Triple::xcore)
@@ -339,6 +357,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
 
 static Triple::OSType parseOS(StringRef OSName) {
   return StringSwitch<Triple::OSType>(OSName)
+    .StartsWith("cloudabi", Triple::CloudABI)
     .StartsWith("darwin", Triple::Darwin)
     .StartsWith("dragonfly", Triple::DragonFly)
     .StartsWith("freebsd", Triple::FreeBSD)
@@ -362,6 +381,7 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("cuda", Triple::CUDA)
     .StartsWith("nvcl", Triple::NVCL)
     .StartsWith("amdhsa", Triple::AMDHSA)
+    .StartsWith("ps4", Triple::PS4)
     .Default(Triple::UnknownOS);
 }
 
@@ -390,31 +410,64 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
 }
 
 static Triple::SubArchType parseSubArch(StringRef SubArchName) {
-  if (SubArchName.endswith("eb"))
-    SubArchName = SubArchName.substr(0, SubArchName.size() - 2);
-
-  return StringSwitch<Triple::SubArchType>(SubArchName)
-    .EndsWith("v8", Triple::ARMSubArch_v8)
-    .EndsWith("v8a", Triple::ARMSubArch_v8)
-    .EndsWith("v7", Triple::ARMSubArch_v7)
-    .EndsWith("v7a", Triple::ARMSubArch_v7)
-    .EndsWith("v7em", Triple::ARMSubArch_v7em)
-    .EndsWith("v7l", Triple::ARMSubArch_v7)
-    .EndsWith("v7m", Triple::ARMSubArch_v7m)
-    .EndsWith("v7r", Triple::ARMSubArch_v7)
-    .EndsWith("v7s", Triple::ARMSubArch_v7s)
-    .EndsWith("v6", Triple::ARMSubArch_v6)
-    .EndsWith("v6m", Triple::ARMSubArch_v6m)
-    .EndsWith("v6t2", Triple::ARMSubArch_v6t2)
-    .EndsWith("v5", Triple::ARMSubArch_v5)
-    .EndsWith("v5e", Triple::ARMSubArch_v5)
-    .EndsWith("v5t", Triple::ARMSubArch_v5)
-    .EndsWith("v5te", Triple::ARMSubArch_v5te)
-    .EndsWith("v4t", Triple::ARMSubArch_v4t)
-    .EndsWith("kalimba3", Triple::KalimbaSubArch_v3)
-    .EndsWith("kalimba4", Triple::KalimbaSubArch_v4)
-    .EndsWith("kalimba5", Triple::KalimbaSubArch_v5)
-    .Default(Triple::NoSubArch);
+  StringRef ARMSubArch = ARMTargetParser::getCanonicalArchName(SubArchName);
+
+  // For now, this is the small part. Early return.
+  if (ARMSubArch.empty())
+    return StringSwitch<Triple::SubArchType>(SubArchName)
+      .EndsWith("kalimba3", Triple::KalimbaSubArch_v3)
+      .EndsWith("kalimba4", Triple::KalimbaSubArch_v4)
+      .EndsWith("kalimba5", Triple::KalimbaSubArch_v5)
+      .Default(Triple::NoSubArch);
+
+  // ARM sub arch.
+  switch(ARMTargetParser::parseArch(ARMSubArch)) {
+  case ARM::AK_ARMV4:
+    return Triple::NoSubArch;
+  case ARM::AK_ARMV4T:
+    return Triple::ARMSubArch_v4t;
+  case ARM::AK_ARMV5:
+  case ARM::AK_ARMV5T:
+  case ARM::AK_ARMV5E:
+    return Triple::ARMSubArch_v5;
+  case ARM::AK_ARMV5TE:
+  case ARM::AK_IWMMXT:
+  case ARM::AK_IWMMXT2:
+  case ARM::AK_XSCALE:
+  case ARM::AK_ARMV5TEJ:
+    return Triple::ARMSubArch_v5te;
+  case ARM::AK_ARMV6:
+  case ARM::AK_ARMV6J:
+  case ARM::AK_ARMV6Z:
+    return Triple::ARMSubArch_v6;
+  case ARM::AK_ARMV6K:
+  case ARM::AK_ARMV6ZK:
+  case ARM::AK_ARMV6HL:
+    return Triple::ARMSubArch_v6k;
+  case ARM::AK_ARMV6T2:
+    return Triple::ARMSubArch_v6t2;
+  case ARM::AK_ARMV6M:
+  case ARM::AK_ARMV6SM:
+    return Triple::ARMSubArch_v6m;
+  case ARM::AK_ARMV7:
+  case ARM::AK_ARMV7A:
+  case ARM::AK_ARMV7R:
+  case ARM::AK_ARMV7L:
+  case ARM::AK_ARMV7HL:
+    return Triple::ARMSubArch_v7;
+  case ARM::AK_ARMV7M:
+    return Triple::ARMSubArch_v7m;
+  case ARM::AK_ARMV7S:
+    return Triple::ARMSubArch_v7s;
+  case ARM::AK_ARMV7EM:
+    return Triple::ARMSubArch_v7em;
+  case ARM::AK_ARMV8A:
+    return Triple::ARMSubArch_v8;
+  case ARM::AK_ARMV8_1A:
+    return Triple::ARMSubArch_v8_1a;
+  default:
+    return Triple::NoSubArch;
+  }
 }
 
 static const char *getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
@@ -428,6 +481,30 @@ static const char *getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
 }
 
 static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
+  switch (T.getArch()) {
+  default:
+    break;
+  case Triple::hexagon:
+  case Triple::mips:
+  case Triple::mipsel:
+  case Triple::mips64:
+  case Triple::mips64el:
+  case Triple::r600:
+  case Triple::amdgcn:
+  case Triple::sparc:
+  case Triple::sparcv9:
+  case Triple::systemz:
+  case Triple::xcore:
+  case Triple::ppc64le:
+    return Triple::ELF;
+
+  case Triple::ppc:
+  case Triple::ppc64:
+    if (T.isOSDarwin())
+      return Triple::MachO;
+    return Triple::ELF;
+  }
+
   if (T.isOSDarwin())
     return Triple::MachO;
   else if (T.isOSWindows())
@@ -706,6 +783,14 @@ void Triple::getOSVersion(unsigned &Major, unsigned &Minor,
                           unsigned &Micro) const {
   StringRef OSName = getOSName();
 
+  // For Android, we care about the Android version rather than the Linux
+  // version.
+  if (getEnvironment() == Android) {
+    OSName = getEnvironmentName().substr(strlen("android"));
+    if (OSName.startswith("eabi"))
+      OSName = OSName.substr(strlen("eabi"));
+  }
+
   // Assume that the OS portion of the triple starts with the canonical name.
   StringRef OSTypeName = getOSTypeName(getOS());
   if (OSName.startswith(OSTypeName))
@@ -808,7 +893,11 @@ void Triple::setOS(OSType Kind) {
 }
 
 void Triple::setEnvironment(EnvironmentType Kind) {
-  setEnvironmentName(getEnvironmentTypeName(Kind));
+  if (ObjectFormat == getDefaultFormat(*this))
+    return setEnvironmentName(getEnvironmentTypeName(Kind));
+
+  setEnvironmentName((getEnvironmentTypeName(Kind) + Twine("-") +
+                      getObjectFormatTypeName(ObjectFormat)).str());
 }
 
 void Triple::setObjectFormat(ObjectFormatType Kind) {
@@ -827,7 +916,7 @@ void Triple::setArchName(StringRef Str) {
   Triple += getVendorName();
   Triple += "-";
   Triple += getOSAndEnvironmentName();
-  setTriple(Triple.str());
+  setTriple(Triple);
 }
 
 void Triple::setVendorName(StringRef Str) {
@@ -869,6 +958,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::ppc:
   case llvm::Triple::r600:
   case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
   case llvm::Triple::tce:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
@@ -883,6 +973,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be:
   case llvm::Triple::amdgcn:
+  case llvm::Triple::bpf:
   case llvm::Triple::le64:
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
@@ -919,6 +1010,7 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::aarch64:
   case Triple::aarch64_be:
   case Triple::amdgcn:
+  case Triple::bpf:
   case Triple::msp430:
   case Triple::systemz:
   case Triple::ppc64le:
@@ -939,6 +1031,7 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::ppc:
   case Triple::r600:
   case Triple::sparc:
+  case Triple::sparcel:
   case Triple::tce:
   case Triple::thumb:
   case Triple::thumbeb:
@@ -975,11 +1068,13 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::thumb:
   case Triple::thumbeb:
   case Triple::xcore:
+  case Triple::sparcel:
     T.setArch(UnknownArch);
     break;
 
   case Triple::aarch64:
   case Triple::aarch64_be:
+  case Triple::bpf:
   case Triple::le64:
   case Triple::amdil64:
   case Triple::amdgcn:
@@ -1010,14 +1105,15 @@ Triple Triple::get64BitArchVariant() const {
   return T;
 }
 
-// FIXME: tblgen this.
 const char *Triple::getARMCPUForArch(StringRef MArch) const {
   if (MArch.empty())
     MArch = getArchName();
 
+  // Some defaults are forced.
   switch (getOS()) {
   case llvm::Triple::FreeBSD:
   case llvm::Triple::NetBSD:
+    // FIXME: This doesn't work on BE/thumb variants.
     if (MArch == "armv6")
       return "arm1176jzf-s";
     break;
@@ -1028,51 +1124,16 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
     break;
   }
 
-  const char *result = nullptr;
-  size_t offset = StringRef::npos;
-  if (MArch.startswith("arm"))
-    offset = 3;
-  if (MArch.startswith("thumb"))
-    offset = 5;
-  if (offset != StringRef::npos && MArch.substr(offset, 2) == "eb")
-    offset += 2;
-  if (MArch.endswith("eb"))
-    MArch = MArch.substr(0, MArch.size() - 2);
-  if (offset != StringRef::npos)
-    result = llvm::StringSwitch<const char *>(MArch.substr(offset))
-      .Cases("v2", "v2a", "arm2")
-      .Case("v3", "arm6")
-      .Case("v3m", "arm7m")
-      .Case("v4", "strongarm")
-      .Case("v4t", "arm7tdmi")
-      .Cases("v5", "v5t", "arm10tdmi")
-      .Cases("v5e", "v5te", "arm1022e")
-      .Case("v5tej", "arm926ej-s")
-      .Cases("v6", "v6k", "arm1136jf-s")
-      .Case("v6j", "arm1136j-s")
-      .Cases("v6z", "v6zk", "arm1176jzf-s")
-      .Case("v6t2", "arm1156t2-s")
-      .Cases("v6m", "v6-m", "cortex-m0")
-      .Cases("v7", "v7a", "v7-a", "v7l", "v7-l", "cortex-a8")
-      .Cases("v7s", "v7-s", "swift")
-      .Cases("v7r", "v7-r", "cortex-r4")
-      .Cases("v7m", "v7-m", "cortex-m3")
-      .Cases("v7em", "v7e-m", "cortex-m4")
-      .Cases("v8", "v8a", "v8-a", "cortex-a53")
-      .Default(nullptr);
-  else
-    result = llvm::StringSwitch<const char *>(MArch)
-      .Case("ep9312", "ep9312")
-      .Case("iwmmxt", "iwmmxt")
-      .Case("xscale", "xscale")
-      .Default(nullptr);
-
-  if (result)
-    return result;
-
-  // If all else failed, return the most base CPU with thumb interworking
-  // supported by LLVM.
-  // FIXME: Should warn once that we're falling back.
+  MArch = ARMTargetParser::getCanonicalArchName(MArch);
+  if (MArch.empty())
+    return nullptr;
+
+  const char *CPU = ARMTargetParser::getDefaultCPU(MArch);
+  if (CPU)
+    return CPU;
+
+  // If no specific architecture version is requested, return the minimum CPU
+  // required by the OS and environment.
   switch (getOS()) {
   case llvm::Triple::NetBSD:
     switch (getEnvironment()) {
@@ -1084,6 +1145,8 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
     default:
       return "strongarm";
     }
+  case llvm::Triple::NaCl:
+    return "cortex-a8";
   default:
     switch (getEnvironment()) {
     case llvm::Triple::EABIHF:
@@ -1093,4 +1156,6 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
       return "arm7tdmi";
     }
   }
+
+  llvm_unreachable("invalid arch name");
 }
diff --git a/contrib/llvm/lib/Support/Twine.cpp b/contrib/llvm/lib/Support/Twine.cpp
index 56ed964..020dd95 100644
--- a/contrib/llvm/lib/Support/Twine.cpp
+++ b/contrib/llvm/lib/Support/Twine.cpp
@@ -28,13 +28,6 @@ void Twine::toVector(SmallVectorImpl<char> &Out) const {
   print(OS);
 }
 
-StringRef Twine::toStringRef(SmallVectorImpl<char> &Out) const {
-  if (isSingleStringRef())
-    return getSingleStringRef();
-  toVector(Out);
-  return StringRef(Out.data(), Out.size());
-}
-
 StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
   if (isUnary()) {
     switch (getLHSKind()) {
@@ -72,6 +65,9 @@ void Twine::printOneChild(raw_ostream &OS, Child Ptr,
   case Twine::StringRefKind:
     OS << *Ptr.stringRef;
     break;
+  case Twine::SmallStringKind:
+    OS << *Ptr.smallString;
+    break;
   case Twine::CharKind:
     OS << Ptr.character;
     break;
@@ -122,6 +118,9 @@ void Twine::printOneChildRepr(raw_ostream &OS, Child Ptr,
     OS << "stringref:\""
        << Ptr.stringRef << "\"";
     break;
+  case Twine::SmallStringKind:
+    OS << "smallstring:\"" << *Ptr.smallString << "\"";
+    break;
   case Twine::CharKind:
     OS << "char:\"" << Ptr.character << "\"";
     break;
diff --git a/contrib/llvm/lib/Support/Unix/COM.inc b/contrib/llvm/lib/Support/Unix/COM.inc
new file mode 100644
index 0000000..5b71de7
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/COM.inc
@@ -0,0 +1,27 @@
+//===- llvm/Support/Unix/COM.inc - Unix COM Implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix portion of COM support.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+namespace sys {
+
+InitializeCOMRAII::InitializeCOMRAII(COMThreadingMode Threading,
+                                     bool SpeedOverMemory) {}
+
+InitializeCOMRAII::~InitializeCOMRAII() {}
+}
+}
diff --git a/contrib/llvm/lib/Support/Unix/Host.inc b/contrib/llvm/lib/Support/Unix/Host.inc
index 519f2a1..4572171 100644
--- a/contrib/llvm/lib/Support/Unix/Host.inc
+++ b/contrib/llvm/lib/Support/Unix/Host.inc
@@ -35,29 +35,15 @@ static std::string getOSVersion() {
 }
 
 std::string sys::getDefaultTargetTriple() {
-  StringRef TargetTripleString(LLVM_DEFAULT_TARGET_TRIPLE);
-  std::pair<StringRef, StringRef> ArchSplit = TargetTripleString.split('-');
-
-  // Normalize the arch, since the target triple may not actually match the
-  // target.
-  std::string Arch = ArchSplit.first;
-
-  std::string Triple(Arch);
-  Triple += '-';
-  Triple += ArchSplit.second;
-
-  // Force i<N>86 to i386.
-  if (Triple[0] == 'i' && isdigit(Triple[1]) &&
-      Triple[2] == '8' && Triple[3] == '6')
-    Triple[1] = '3';
+  std::string TargetTripleString(LLVM_DEFAULT_TARGET_TRIPLE);
 
   // On darwin, we want to update the version to match that of the
   // target.
-  std::string::size_type DarwinDashIdx = Triple.find("-darwin");
+  std::string::size_type DarwinDashIdx = TargetTripleString.find("-darwin");
   if (DarwinDashIdx != std::string::npos) {
-    Triple.resize(DarwinDashIdx + strlen("-darwin"));
-    Triple += getOSVersion();
+    TargetTripleString.resize(DarwinDashIdx + strlen("-darwin"));
+    TargetTripleString += getOSVersion();
   }
 
-  return Triple::normalize(Triple);
+  return Triple::normalize(TargetTripleString);
 }
diff --git a/contrib/llvm/lib/Support/Unix/Process.inc b/contrib/llvm/lib/Support/Unix/Process.inc
index 530f86c..df13bd2 100644
--- a/contrib/llvm/lib/Support/Unix/Process.inc
+++ b/contrib/llvm/lib/Support/Unix/Process.inc
@@ -39,6 +39,9 @@
     !defined(__OpenBSD__) && !defined(__Bitrig__)
 #include <malloc.h>
 #endif
+#if defined(HAVE_MALLCTL)
+#include <malloc_np.h>
+#endif
 #ifdef HAVE_MALLOC_MALLOC_H
 #include <malloc/malloc.h>
 #endif
@@ -98,6 +101,12 @@ size_t Process::GetMallocUsage() {
   malloc_statistics_t Stats;
   malloc_zone_statistics(malloc_default_zone(), &Stats);
   return Stats.size_in_use;   // darwin
+#elif defined(HAVE_MALLCTL)
+  size_t alloc, sz;
+  sz = sizeof(size_t);
+  if (mallctl("stats.allocated", &alloc, &sz, NULL, 0) == 0)
+    return alloc;
+  return 0;
 #elif defined(HAVE_SBRK)
   // Note this is only an approximation and more closely resembles
   // the value returned by mallinfo in the arena field.
@@ -105,8 +114,7 @@ size_t Process::GetMallocUsage() {
   char *EndOfMemory = (char*)sbrk(0);
   if (EndOfMemory != ((char*)-1) && StartOfMemory != ((char*)-1))
     return EndOfMemory - StartOfMemory;
-  else
-    return 0;
+  return 0;
 #else
 #warning Cannot get malloc info on this platform
   return 0;
@@ -191,8 +199,8 @@ public:
   }
 
 private:
-  FDCloser(const FDCloser &) LLVM_DELETED_FUNCTION;
-  void operator=(const FDCloser &) LLVM_DELETED_FUNCTION;
+  FDCloser(const FDCloser &) = delete;
+  void operator=(const FDCloser &) = delete;
 
   int &FD;
   bool KeepOpen;
diff --git a/contrib/llvm/lib/Support/Unix/Program.inc b/contrib/llvm/lib/Support/Unix/Program.inc
index 0f45df1..5816fb8 100644
--- a/contrib/llvm/lib/Support/Unix/Program.inc
+++ b/contrib/llvm/lib/Support/Unix/Program.inc
@@ -18,10 +18,11 @@
 
 #include "Unix.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
-#include <llvm/Config/config.h>
 #if HAVE_SYS_STAT_H
 #include <sys/stat.h>
 #endif
@@ -42,7 +43,18 @@
 #define  _RESTRICT_KYWD
 #endif
 #include <spawn.h>
-#if !defined(__APPLE__)
+
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#endif
+
+#if defined(__APPLE__) && !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE)
+#define USE_NSGETENVIRON 1
+#else
+#define USE_NSGETENVIRON 0
+#endif
+
+#if !USE_NSGETENVIRON
   extern char **environ;
 #else
 #include <crt_externs.h> // _NSGetEnviron
@@ -217,7 +229,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
     }
 
     if (!envp)
-#if !defined(__APPLE__)
+#if !USE_NSGETENVIRON
       envp = const_cast<const char **>(environ);
 #else
       // environ is missing in dylibs.
diff --git a/contrib/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm/lib/Support/Unix/Signals.inc
index e8f4643..bfe2a3a 100644
--- a/contrib/llvm/lib/Support/Unix/Signals.inc
+++ b/contrib/llvm/lib/Support/Unix/Signals.inc
@@ -14,6 +14,7 @@
 
 #include "Unix.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -111,6 +112,12 @@ static void RegisterHandler(int Signal) {
 }
 
 static void RegisterHandlers() {
+  // We need to dereference the signals mutex during handler registration so
+  // that we force its construction. This is to prevent the first use being
+  // during handling an actual signal because you can't safely call new in a
+  // signal handler.
+  *SignalsMutex;
+  
   // If the handlers are already registered, we're done.
   if (NumRegisteredSignals != 0) return;
 
@@ -132,6 +139,11 @@ static void UnregisterHandlers() {
 /// NB: This must be an async signal safe function. It cannot allocate or free
 /// memory, even in debug builds.
 static void RemoveFilesToRemove() {
+  // Avoid constructing ManagedStatic in the signal handler.
+  // If FilesToRemove is not constructed, there are no files to remove.
+  if (!FilesToRemove.isConstructed())
+    return;
+
   // We avoid iterators in case of debug iterators that allocate or release
   // memory.
   std::vector<std::string>& FilesToRemoveRef = *FilesToRemove;
@@ -193,10 +205,11 @@ static RETSIGTYPE SignalHandler(int Sig) {
   }
 
   // Otherwise if it is a fault (like SEGV) run any handler.
-  std::vector<std::pair<void (*)(void *), void *>>& CallBacksToRunRef =
-      *CallBacksToRun;
-  for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
-    CallBacksToRunRef[i].first(CallBacksToRunRef[i].second);
+  if (CallBacksToRun.isConstructed()) {
+    auto &CallBacksToRunRef = *CallBacksToRun;
+    for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
+      CallBacksToRunRef[i].first(CallBacksToRunRef[i].second);
+  }
 
 #ifdef __s390__
   // On S/390, certain signals are delivered with PSW Address pointing to
@@ -324,7 +337,8 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
 }
 #endif
 
-static bool printSymbolizedStackTrace(void **StackTrace, int Depth, FILE *FD) {
+static bool printSymbolizedStackTrace(void **StackTrace, int Depth,
+                                      llvm::raw_ostream &OS) {
   // FIXME: Subtract necessary number from StackTrace entries to turn return addresses
   // into actual instruction addresses.
   // Use llvm-symbolizer tool to symbolize the stack traces.
@@ -382,7 +396,7 @@ static bool printSymbolizedStackTrace(void **StackTrace, int Depth, FILE *FD) {
   int frame_no = 0;
   for (int i = 0; i < Depth; i++) {
     if (!Modules[i]) {
-      fprintf(FD, "#%d %p\n", frame_no++, StackTrace[i]);
+      OS << format("#%d %p\n", frame_no++, StackTrace[i]);
       continue;
     }
     // Read pairs of lines (function name and file/line info) until we
@@ -393,17 +407,17 @@ static bool printSymbolizedStackTrace(void **StackTrace, int Depth, FILE *FD) {
       StringRef FunctionName = *CurLine++;
       if (FunctionName.empty())
         break;
-      fprintf(FD, "#%d %p ", frame_no++, StackTrace[i]);
+      OS << format("#%d %p ", frame_no++, StackTrace[i]);
       if (!FunctionName.startswith("??"))
-        fprintf(FD, "%s ", FunctionName.str().c_str());
+        OS << format("%s ", FunctionName.str().c_str());
       if (CurLine == Lines.end())
         return false;
       StringRef FileLineInfo = *CurLine++;
       if (!FileLineInfo.startswith("??"))
-        fprintf(FD, "%s", FileLineInfo.str().c_str());
+        OS << format("%s", FileLineInfo.str().c_str());
       else
-        fprintf(FD, "(%s+%p)", Modules[i], (void *)Offsets[i]);
-      fprintf(FD, "\n");
+        OS << format("(%s+%p)", Modules[i], (void *)Offsets[i]);
+      OS << "\n";
     }
   }
   return true;
@@ -415,13 +429,13 @@ static bool printSymbolizedStackTrace(void **StackTrace, int Depth, FILE *FD) {
 //
 // On glibc systems we have the 'backtrace' function, which works nicely, but
 // doesn't demangle symbols.
-void llvm::sys::PrintStackTrace(FILE *FD) {
+void llvm::sys::PrintStackTrace(raw_ostream &OS) {
 #if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
   static void* StackTrace[256];
   // Use backtrace() to output a backtrace on Linux systems with glibc.
   int depth = backtrace(StackTrace,
                         static_cast<int>(array_lengthof(StackTrace)));
-  if (printSymbolizedStackTrace(StackTrace, depth, FD))
+  if (printSymbolizedStackTrace(StackTrace, depth, OS))
     return;
 #if HAVE_DLFCN_H && __GNUG__
   int width = 0;
@@ -441,34 +455,34 @@ void llvm::sys::PrintStackTrace(FILE *FD) {
     Dl_info dlinfo;
     dladdr(StackTrace[i], &dlinfo);
 
-    fprintf(FD, "%-2d", i);
+    OS << format("%-2d", i);
 
     const char* name = strrchr(dlinfo.dli_fname, '/');
-    if (!name) fprintf(FD, " %-*s", width, dlinfo.dli_fname);
-    else       fprintf(FD, " %-*s", width, name+1);
+    if (!name) OS << format(" %-*s", width, dlinfo.dli_fname);
+    else       OS << format(" %-*s", width, name+1);
 
-    fprintf(FD, " %#0*lx",
-            (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
+    OS << format(" %#0*lx", (int)(sizeof(void*) * 2) + 2,
+                 (unsigned long)StackTrace[i]);
 
     if (dlinfo.dli_sname != nullptr) {
-      fputc(' ', FD);
+      OS << ' ';
 #  if HAVE_CXXABI_H
       int res;
       char* d = abi::__cxa_demangle(dlinfo.dli_sname, nullptr, nullptr, &res);
 #  else
       char* d = NULL;
 #  endif
-      if (!d) fputs(dlinfo.dli_sname, FD);
-      else    fputs(d, FD);
+      if (!d) OS << dlinfo.dli_sname;
+      else    OS << d;
       free(d);
 
       // FIXME: When we move to C++11, use %t length modifier. It's not in
       // C++03 and causes gcc to issue warnings. Losing the upper 32 bits of
       // the stack offset for a stack dump isn't likely to cause any problems.
-      fprintf(FD, " + %u",(unsigned)((char*)StackTrace[i]-
-                                     (char*)dlinfo.dli_saddr));
+      OS << format(" + %u",(unsigned)((char*)StackTrace[i]-
+                                      (char*)dlinfo.dli_saddr));
     }
-    fputc('\n', FD);
+    OS << '\n';
   }
 #else
   backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO);
@@ -477,17 +491,19 @@ void llvm::sys::PrintStackTrace(FILE *FD) {
 }
 
 static void PrintStackTraceSignalHandler(void *) {
-  PrintStackTrace(stderr);
+  PrintStackTrace(llvm::errs());
 }
 
+void llvm::sys::DisableSystemDialogsOnCrash() {}
+
 /// PrintStackTraceOnErrorSignal - When an error signal (such as SIGABRT or
 /// SIGSEGV) is delivered to the process, print a stack trace and then exit.
-void llvm::sys::PrintStackTraceOnErrorSignal() {
+void llvm::sys::PrintStackTraceOnErrorSignal(bool DisableCrashReporting) {
   AddSignalHandler(PrintStackTraceSignalHandler, nullptr);
 
 #if defined(__APPLE__) && defined(ENABLE_CRASH_OVERRIDES)
   // Environment variable to disable any kind of crash dialog.
-  if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
+  if (DisableCrashReporting || getenv("LLVM_DISABLE_CRASH_REPORT")) {
     mach_port_t self = mach_task_self();
 
     exception_mask_t mask = EXC_MASK_CRASH;
diff --git a/contrib/llvm/lib/Support/Valgrind.cpp b/contrib/llvm/lib/Support/Valgrind.cpp
index 2c6d6aa..facf8d9 100644
--- a/contrib/llvm/lib/Support/Valgrind.cpp
+++ b/contrib/llvm/lib/Support/Valgrind.cpp
@@ -53,7 +53,6 @@ void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
 
 #endif  // !HAVE_VALGRIND_VALGRIND_H
 
-#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
 // These functions require no implementation, tsan just looks at the arguments
 // they're called with. However, they are required to be weak as some other
 // application or library may already be providing these definitions for the
@@ -72,4 +71,4 @@ void AnnotateIgnoreWritesBegin(const char *file, int line) {}
 LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesEnd(const char *file, int line);
 void AnnotateIgnoreWritesEnd(const char *file, int line) {}
 }
-#endif
+
diff --git a/contrib/llvm/lib/Support/Windows/COM.inc b/contrib/llvm/lib/Support/Windows/COM.inc
new file mode 100644
index 0000000..0c50d6f
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/COM.inc
@@ -0,0 +1,37 @@
+//===- llvm/Support/Windows/COM.inc - Windows COM Implementation *- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Windows portion of COM support.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Windows code.
+//===----------------------------------------------------------------------===//
+
+#include <objbase.h>
+
+namespace llvm {
+namespace sys {
+
+InitializeCOMRAII::InitializeCOMRAII(COMThreadingMode Threading,
+                                     bool SpeedOverMemory) {
+  DWORD Coinit = 0;
+  if (Threading == COMThreadingMode::SingleThreaded)
+    Coinit |= COINIT_APARTMENTTHREADED;
+  else
+    Coinit |= COINIT_MULTITHREADED;
+  if (SpeedOverMemory)
+    Coinit |= COINIT_SPEED_OVER_MEMORY;
+  ::CoInitializeEx(nullptr, Coinit);
+}
+
+InitializeCOMRAII::~InitializeCOMRAII() { ::CoUninitialize(); }
+}
+}
diff --git a/contrib/llvm/lib/Support/Windows/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc
index d8b5702..72da7c5 100644
--- a/contrib/llvm/lib/Support/Windows/Path.inc
+++ b/contrib/llvm/lib/Support/Windows/Path.inc
@@ -46,10 +46,6 @@ using llvm::sys::windows::UTF8ToUTF16;
 using llvm::sys::windows::UTF16ToUTF8;
 using llvm::sys::path::widenPath;
 
-static std::error_code windows_error(DWORD E) {
-  return mapWindowsError(E);
-}
-
 static bool is_separator(const wchar_t value) {
   switch (value) {
   case L'\\':
@@ -83,7 +79,7 @@ std::error_code widenPath(const Twine &Path8,
   else {
     CurPathLen = ::GetCurrentDirectoryW(0, NULL);
     if (CurPathLen == 0)
-      return windows_error(::GetLastError());
+      return mapWindowsError(::GetLastError());
   }
 
   // Would the absolute path be longer than our limit?
@@ -174,7 +170,7 @@ std::error_code current_path(SmallVectorImpl<char> &result) {
 
     // A zero return value indicates a failure other than insufficient space.
     if (len == 0)
-      return windows_error(::GetLastError());
+      return mapWindowsError(::GetLastError());
 
     // If there's insufficient space, the len returned is larger than the len
     // given.
@@ -195,7 +191,7 @@ std::error_code create_directory(const Twine &path, bool IgnoreExisting) {
   if (!::CreateDirectoryW(path_utf16.begin(), NULL)) {
     DWORD LastError = ::GetLastError();
     if (LastError != ERROR_ALREADY_EXISTS || !IgnoreExisting)
-      return windows_error(LastError);
+      return mapWindowsError(LastError);
   }
 
   return std::error_code();
@@ -212,7 +208,7 @@ std::error_code create_link(const Twine &to, const Twine &from) {
     return ec;
 
   if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL))
-    return windows_error(::GetLastError());
+    return mapWindowsError(::GetLastError());
 
   return std::error_code();
 }
@@ -232,14 +228,14 @@ std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
 
   if (ST.type() == file_type::directory_file) {
     if (!::RemoveDirectoryW(c_str(path_utf16))) {
-      std::error_code EC = windows_error(::GetLastError());
+      std::error_code EC = mapWindowsError(::GetLastError());
       if (EC != errc::no_such_file_or_directory || !IgnoreNonExisting)
         return EC;
     }
     return std::error_code();
   }
   if (!::DeleteFileW(c_str(path_utf16))) {
-    std::error_code EC = windows_error(::GetLastError());
+    std::error_code EC = mapWindowsError(::GetLastError());
     if (EC != errc::no_such_file_or_directory || !IgnoreNonExisting)
       return EC;
   }
@@ -261,6 +257,7 @@ std::error_code rename(const Twine &from, const Twine &to) {
                       MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
       return std::error_code();
     DWORD LastError = ::GetLastError();
+    ec = mapWindowsError(LastError);
     if (LastError != ERROR_ACCESS_DENIED)
       break;
     // Retry MoveFile() at ACCESS_DENIED.
@@ -294,7 +291,7 @@ std::error_code access(const Twine &Path, AccessMode Mode) {
     DWORD LastError = ::GetLastError();
     if (LastError != ERROR_FILE_NOT_FOUND &&
         LastError != ERROR_PATH_NOT_FOUND)
-      return windows_error(LastError);
+      return mapWindowsError(LastError);
     return errc::no_such_file_or_directory;
   }
 
@@ -358,7 +355,7 @@ static std::error_code getStatus(HANDLE FileHandle, file_status &Result) {
   case FILE_TYPE_UNKNOWN: {
     DWORD Err = ::GetLastError();
     if (Err != NO_ERROR)
-      return windows_error(Err);
+      return mapWindowsError(Err);
     Result = file_status(file_type::type_unknown);
     return std::error_code();
   }
@@ -397,7 +394,7 @@ handle_status_error:
     Result = file_status(file_type::type_unknown);
   else
     Result = file_status(file_type::status_error);
-  return windows_error(LastError);
+  return mapWindowsError(LastError);
 }
 
 std::error_code status(const Twine &path, file_status &result) {
@@ -454,7 +451,7 @@ std::error_code setLastModificationAndAccessTime(int FD, TimeValue Time) {
   FT.dwHighDateTime = UI.HighPart;
   HANDLE FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
   if (!SetFileTime(FileHandle, NULL, &FT, &FT))
-    return windows_error(::GetLastError());
+    return mapWindowsError(::GetLastError());
   return std::error_code();
 }
 
@@ -481,7 +478,7 @@ std::error_code mapped_file_region::init(int FD, uint64_t Offset,
                            (Offset + Size) & 0xffffffff,
                            0);
   if (FileMappingHandle == NULL) {
-    std::error_code ec = windows_error(GetLastError());
+    std::error_code ec = mapWindowsError(GetLastError());
     return ec;
   }
 
@@ -497,7 +494,7 @@ std::error_code mapped_file_region::init(int FD, uint64_t Offset,
                             Offset & 0xffffffff,
                             Size);
   if (Mapping == NULL) {
-    std::error_code ec = windows_error(GetLastError());
+    std::error_code ec = mapWindowsError(GetLastError());
     ::CloseHandle(FileMappingHandle);
     return ec;
   }
@@ -506,7 +503,7 @@ std::error_code mapped_file_region::init(int FD, uint64_t Offset,
     MEMORY_BASIC_INFORMATION mbi;
     SIZE_T Result = VirtualQuery(Mapping, &mbi, sizeof(mbi));
     if (Result == 0) {
-      std::error_code ec = windows_error(GetLastError());
+      std::error_code ec = mapWindowsError(GetLastError());
       ::UnmapViewOfFile(Mapping);
       ::CloseHandle(FileMappingHandle);
       return ec;
@@ -575,7 +572,7 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
   WIN32_FIND_DATAW FirstFind;
   ScopedFindHandle FindHandle(::FindFirstFileW(c_str(path_utf16), &FirstFind));
   if (!FindHandle)
-    return windows_error(::GetLastError());
+    return mapWindowsError(::GetLastError());
 
   size_t FilenameLen = ::wcslen(FirstFind.cFileName);
   while ((FilenameLen == 1 && FirstFind.cFileName[0] == L'.') ||
@@ -586,7 +583,7 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
       // Check for end.
       if (LastError == ERROR_NO_MORE_FILES)
         return detail::directory_iterator_destruct(it);
-      return windows_error(LastError);
+      return mapWindowsError(LastError);
     } else
       FilenameLen = ::wcslen(FirstFind.cFileName);
 
@@ -599,8 +596,8 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
 
   it.IterationHandle = intptr_t(FindHandle.take());
   SmallString<128> directory_entry_path(path);
-  path::append(directory_entry_path, directory_entry_name_utf8.str());
-  it.CurrentEntry = directory_entry(directory_entry_path.str());
+  path::append(directory_entry_path, directory_entry_name_utf8);
+  it.CurrentEntry = directory_entry(directory_entry_path);
 
   return std::error_code();
 }
@@ -621,7 +618,7 @@ std::error_code detail::directory_iterator_increment(detail::DirIterState &it) {
     // Check for end.
     if (LastError == ERROR_NO_MORE_FILES)
       return detail::directory_iterator_destruct(it);
-    return windows_error(LastError);
+    return mapWindowsError(LastError);
   }
 
   size_t FilenameLen = ::wcslen(FindData.cFileName);
@@ -651,7 +648,7 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD) {
                            OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
   if (H == INVALID_HANDLE_VALUE) {
     DWORD LastError = ::GetLastError();
-    std::error_code EC = windows_error(LastError);
+    std::error_code EC = mapWindowsError(LastError);
     // Provide a better error message when trying to open directories.
     // This only runs if we failed to open the file, so there is probably
     // no performances issues.
@@ -665,7 +662,7 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD) {
   int FD = ::_open_osfhandle(intptr_t(H), 0);
   if (FD == -1) {
     ::CloseHandle(H);
-    return windows_error(ERROR_INVALID_HANDLE);
+    return mapWindowsError(ERROR_INVALID_HANDLE);
   }
 
   ResultFD = FD;
@@ -701,7 +698,7 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
 
   if (H == INVALID_HANDLE_VALUE) {
     DWORD LastError = ::GetLastError();
-    std::error_code EC = windows_error(LastError);
+    std::error_code EC = mapWindowsError(LastError);
     // Provide a better error message when trying to open directories.
     // This only runs if we failed to open the file, so there is probably
     // no performances issues.
@@ -722,7 +719,7 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
   int FD = ::_open_osfhandle(intptr_t(H), OpenFlags);
   if (FD == -1) {
     ::CloseHandle(H);
-    return windows_error(ERROR_INVALID_HANDLE);
+    return mapWindowsError(ERROR_INVALID_HANDLE);
   }
 
   ResultFD = FD;
@@ -799,7 +796,7 @@ std::error_code UTF8ToUTF16(llvm::StringRef utf8,
                                     utf8.size(), utf16.begin(), 0);
 
     if (len == 0)
-      return windows_error(::GetLastError());
+      return mapWindowsError(::GetLastError());
 
     utf16.reserve(len + 1);
     utf16.set_size(len);
@@ -808,7 +805,7 @@ std::error_code UTF8ToUTF16(llvm::StringRef utf8,
                                 utf8.size(), utf16.begin(), utf16.size());
 
     if (len == 0)
-      return windows_error(::GetLastError());
+      return mapWindowsError(::GetLastError());
   }
 
   // Make utf16 null terminated.
@@ -828,7 +825,7 @@ std::error_code UTF16ToCodePage(unsigned codepage, const wchar_t *utf16,
                                     0, NULL, NULL);
 
     if (len == 0)
-      return windows_error(::GetLastError());
+      return mapWindowsError(::GetLastError());
 
     utf8.reserve(len);
     utf8.set_size(len);
@@ -838,7 +835,7 @@ std::error_code UTF16ToCodePage(unsigned codepage, const wchar_t *utf16,
                                 utf8.size(), NULL, NULL);
 
     if (len == 0)
-      return windows_error(::GetLastError());
+      return mapWindowsError(::GetLastError());
   }
 
   // Make utf8 null terminated.
diff --git a/contrib/llvm/lib/Support/Windows/Process.inc b/contrib/llvm/lib/Support/Windows/Process.inc
index 854eac7..8164956 100644
--- a/contrib/llvm/lib/Support/Windows/Process.inc
+++ b/contrib/llvm/lib/Support/Windows/Process.inc
@@ -156,10 +156,6 @@ Optional<std::string> Process::GetEnv(StringRef Name) {
   return std::string(Res.data());
 }
 
-static std::error_code windows_error(DWORD E) {
-  return mapWindowsError(E);
-}
-
 static void AllocateAndPush(const SmallVectorImpl<char> &S,
                             SmallVectorImpl<const char *> &Vector,
                             SpecificBumpPtrAllocator<char> &Allocator) {
@@ -235,7 +231,7 @@ Process::GetArgumentVector(SmallVectorImpl<const char *> &Args,
   wchar_t **UnicodeCommandLine =
       CommandLineToArgvW(GetCommandLineW(), &ArgCount);
   if (!UnicodeCommandLine)
-    return windows_error(::GetLastError());
+    return mapWindowsError(::GetLastError());
 
   Args.reserve(ArgCount);
   std::error_code ec;
@@ -329,6 +325,16 @@ class DefaultColors
 };
 
 DefaultColors defaultColors;
+
+WORD fg_color(WORD color) {
+  return color & (FOREGROUND_BLUE | FOREGROUND_GREEN |
+                  FOREGROUND_INTENSITY | FOREGROUND_RED);
+}
+
+WORD bg_color(WORD color) {
+  return color & (BACKGROUND_BLUE | BACKGROUND_GREEN |
+                  BACKGROUND_INTENSITY | BACKGROUND_RED);
+}
 }
 
 bool Process::ColorNeedsFlush() {
@@ -350,6 +356,7 @@ const char *Process::OutputBold(bool bg) {
 const char *Process::OutputColor(char code, bool bold, bool bg) {
   if (UseANSI) return colorcodes[bg?1:0][bold?1:0][code&7];
 
+  WORD current = DefaultColors::GetCurrentColor();
   WORD colors;
   if (bg) {
     colors = ((code&1) ? BACKGROUND_RED : 0) |
@@ -357,12 +364,14 @@ const char *Process::OutputColor(char code, bool bold, bool bg) {
       ((code&4) ? BACKGROUND_BLUE : 0);
     if (bold)
       colors |= BACKGROUND_INTENSITY;
+    colors |= fg_color(current);
   } else {
     colors = ((code&1) ? FOREGROUND_RED : 0) |
       ((code&2) ? FOREGROUND_GREEN : 0 ) |
       ((code&4) ? FOREGROUND_BLUE : 0);
     if (bold)
       colors |= FOREGROUND_INTENSITY;
+    colors |= bg_color(current);
   }
   SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
   return 0;
diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc
index c370077..75685de 100644
--- a/contrib/llvm/lib/Support/Windows/Program.inc
+++ b/contrib/llvm/lib/Support/Windows/Program.inc
@@ -434,7 +434,8 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
   DWORD status;
   BOOL rc = GetExitCodeProcess(PI.ProcessHandle, &status);
   DWORD err = GetLastError();
-  CloseHandle(PI.ProcessHandle);
+  if (err != ERROR_INVALID_HANDLE)
+    CloseHandle(PI.ProcessHandle);
 
   if (!rc) {
     SetLastError(err);
diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc
index 35ba6f8..6006499 100644
--- a/contrib/llvm/lib/Support/Windows/Signals.inc
+++ b/contrib/llvm/lib/Support/Windows/Signals.inc
@@ -10,12 +10,15 @@
 // This file provides the Win32 specific implementation of the Signals class.
 //
 //===----------------------------------------------------------------------===//
-
 #include "llvm/Support/FileSystem.h"
 #include <algorithm>
+#include <signal.h>
 #include <stdio.h>
 #include <vector>
 
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
 // The Windows.h header must be after LLVM and standard headers.
 #include "WindowsSupport.h"
 
@@ -165,13 +168,97 @@ static std::vector<std::string> *FilesToRemove = NULL;
 static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
 static bool RegisteredUnhandledExceptionFilter = false;
 static bool CleanupExecuted = false;
-static bool ExitOnUnhandledExceptions = false;
 static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
 
 // Windows creates a new thread to execute the console handler when an event
 // (such as CTRL/C) occurs.  This causes concurrency issues with the above
 // globals which this critical section addresses.
 static CRITICAL_SECTION CriticalSection;
+static bool CriticalSectionInitialized = false;
+
+static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
+                                     HANDLE hThread, STACKFRAME64 &StackFrame,
+                                     CONTEXT *Context) {
+  DWORD machineType;
+#if defined(_M_X64)
+  machineType = IMAGE_FILE_MACHINE_AMD64;
+#else
+  machineType = IMAGE_FILE_MACHINE_I386;
+#endif
+
+  // Initialize the symbol handler.
+  SymSetOptions(SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES);
+  SymInitialize(hProcess, NULL, TRUE);
+
+  while (true) {
+    if (!StackWalk64(machineType, hProcess, hThread, &StackFrame, Context, NULL,
+                     SymFunctionTableAccess64, SymGetModuleBase64, NULL)) {
+      break;
+    }
+
+    if (StackFrame.AddrFrame.Offset == 0)
+      break;
+
+    using namespace llvm;
+    // Print the PC in hexadecimal.
+    DWORD64 PC = StackFrame.AddrPC.Offset;
+#if defined(_M_X64)
+    OS << format("0x%016llX", PC);
+#elif defined(_M_IX86)
+    OS << format("0x%08lX", static_cast<DWORD>(PC));
+#endif
+
+// Print the parameters.  Assume there are four.
+#if defined(_M_X64)
+    OS << format(" (0x%016llX 0x%016llX 0x%016llX 0x%016llX)",
+            StackFrame.Params[0], StackFrame.Params[1], StackFrame.Params[2],
+            StackFrame.Params[3]);
+#elif defined(_M_IX86)
+    OS << format(" (0x%08lX 0x%08lX 0x%08lX 0x%08lX)",
+            static_cast<DWORD>(StackFrame.Params[0]),
+            static_cast<DWORD>(StackFrame.Params[1]),
+            static_cast<DWORD>(StackFrame.Params[2]),
+            static_cast<DWORD>(StackFrame.Params[3]));
+#endif
+    // Verify the PC belongs to a module in this process.
+    if (!SymGetModuleBase64(hProcess, PC)) {
+      OS << " <unknown module>\n";
+      continue;
+    }
+
+    // Print the symbol name.
+    char buffer[512];
+    IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast<IMAGEHLP_SYMBOL64 *>(buffer);
+    memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL64));
+    symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
+    symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL64);
+
+    DWORD64 dwDisp;
+    if (!SymGetSymFromAddr64(hProcess, PC, &dwDisp, symbol)) {
+      OS << '\n';
+      continue;
+    }
+
+    buffer[511] = 0;
+    if (dwDisp > 0)
+      OS << format(", %s() + 0x%llX bytes(s)", (const char*)symbol->Name,
+                   dwDisp);
+    else
+      OS << format(", %s", (const char*)symbol->Name);
+
+    // Print the source file and line number information.
+    IMAGEHLP_LINE64 line = {};
+    DWORD dwLineDisp;
+    line.SizeOfStruct = sizeof(line);
+    if (SymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) {
+      OS << format(", %s, line %lu", line.FileName, line.LineNumber);
+      if (dwLineDisp > 0)
+        OS << format(" + 0x%lX byte(s)", dwLineDisp);
+    }
+
+    OS << '\n';
+  }
+}
 
 namespace llvm {
 
@@ -184,7 +271,8 @@ namespace llvm {
 /// AvoidMessageBoxHook - Emulates hitting "retry" from an "abort, retry,
 /// ignore" CRT debug report dialog.  "retry" raises an exception which
 /// ultimately triggers our stack dumper.
-static int AvoidMessageBoxHook(int ReportType, char *Message, int *Return) {
+static LLVM_ATTRIBUTE_UNUSED int
+AvoidMessageBoxHook(int ReportType, char *Message, int *Return) {
   // Set *Return to the retry code for the return value of _CrtDbgReport:
   // http://msdn.microsoft.com/en-us/library/8hyw4sy7(v=vs.71).aspx
   // This may also trigger just-in-time debugging via DebugBreak().
@@ -196,6 +284,22 @@ static int AvoidMessageBoxHook(int ReportType, char *Message, int *Return) {
 
 #endif
 
+extern "C" void HandleAbort(int Sig) {
+  if (Sig == SIGABRT) {
+    LLVM_BUILTIN_TRAP;
+  }
+}
+
+static void InitializeThreading() {
+  if (CriticalSectionInitialized)
+    return;
+
+  // Now's the time to create the critical section. This is the first time
+  // through here, and there's only one thread.
+  InitializeCriticalSection(&CriticalSection);
+  CriticalSectionInitialized = true;
+}
+
 static void RegisterHandler() {
 #if __MINGW32__ && !defined(__MINGW64_VERSION_MAJOR)
   // On MinGW.org, we need to load up the symbols explicitly, because the
@@ -214,9 +318,7 @@ static void RegisterHandler() {
     return;
   }
 
-  // Now's the time to create the critical section.  This is the first time
-  // through here, and there's only one thread.
-  InitializeCriticalSection(&CriticalSection);
+  InitializeThreading();
 
   // Enter it immediately.  Now if someone hits CTRL/C, the console handler
   // can't proceed until the globals are updated.
@@ -226,17 +328,6 @@ static void RegisterHandler() {
   OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter);
   SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
 
-  // Environment variable to disable any kind of crash dialog.
-  if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
-#ifdef _MSC_VER
-    _CrtSetReportHook(AvoidMessageBoxHook);
-#endif
-    SetErrorMode(SEM_FAILCRITICALERRORS |
-                 SEM_NOGPFAULTERRORBOX |
-                 SEM_NOOPENFILEERRORBOX);
-    ExitOnUnhandledExceptions = true;
-  }
-
   // IMPORTANT NOTE: Caller must call LeaveCriticalSection(&CriticalSection) or
   // else multi-threading problems will ensue.
 }
@@ -267,7 +358,6 @@ void sys::DontRemoveFileOnSignal(StringRef Filename) {
 
   RegisterHandler();
 
-  FilesToRemove->push_back(Filename);
   std::vector<std::string>::reverse_iterator I =
   std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename);
   if (I != FilesToRemove->rend())
@@ -276,19 +366,63 @@ void sys::DontRemoveFileOnSignal(StringRef Filename) {
   LeaveCriticalSection(&CriticalSection);
 }
 
+void sys::DisableSystemDialogsOnCrash() {
+  // Crash to stack trace handler on abort.
+  signal(SIGABRT, HandleAbort);
+
+  // The following functions are not reliably accessible on MinGW.
+#ifdef _MSC_VER
+  // We're already handling writing a "something went wrong" message.
+  _set_abort_behavior(0, _WRITE_ABORT_MSG);
+  // Disable Dr. Watson.
+  _set_abort_behavior(0, _CALL_REPORTFAULT);
+  _CrtSetReportHook(AvoidMessageBoxHook);
+#endif
+
+  // Disable standard error dialog box.
+  SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX |
+               SEM_NOOPENFILEERRORBOX);
+  _set_error_mode(_OUT_TO_STDERR);
+}
+
 /// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
 /// SIGSEGV) is delivered to the process, print a stack trace and then exit.
-void sys::PrintStackTraceOnErrorSignal() {
+void sys::PrintStackTraceOnErrorSignal(bool DisableCrashReporting) {
+  DisableSystemDialogsOnCrash();
   RegisterHandler();
   LeaveCriticalSection(&CriticalSection);
 }
+}
+
+#if defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR)
+// Provide a prototype for RtlCaptureContext, mingw32 from mingw.org is
+// missing it but mingw-w64 has it.
+extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord);
+#endif
 
-void llvm::sys::PrintStackTrace(FILE *) {
-  // FIXME: Implement.
+void llvm::sys::PrintStackTrace(raw_ostream &OS) {
+
+  STACKFRAME64 StackFrame = {};
+  CONTEXT Context = {};
+  ::RtlCaptureContext(&Context);
+#if defined(_M_X64)
+  StackFrame.AddrPC.Offset = Context.Rip;
+  StackFrame.AddrStack.Offset = Context.Rsp;
+  StackFrame.AddrFrame.Offset = Context.Rbp;
+#else
+  StackFrame.AddrPC.Offset = Context.Eip;
+  StackFrame.AddrStack.Offset = Context.Esp;
+  StackFrame.AddrFrame.Offset = Context.Ebp;
+#endif
+  StackFrame.AddrPC.Mode = AddrModeFlat;
+  StackFrame.AddrStack.Mode = AddrModeFlat;
+  StackFrame.AddrFrame.Mode = AddrModeFlat;
+  PrintStackTraceForThread(OS, GetCurrentProcess(), GetCurrentThread(),
+                           StackFrame, &Context);
 }
 
 
-void sys::SetInterruptFunction(void (*IF)()) {
+void llvm::sys::SetInterruptFunction(void (*IF)()) {
   RegisterHandler();
   InterruptFunction = IF;
   LeaveCriticalSection(&CriticalSection);
@@ -298,16 +432,18 @@ void sys::SetInterruptFunction(void (*IF)()) {
 /// AddSignalHandler - Add a function to be called when a signal is delivered
 /// to the process.  The handler can have a cookie passed to it to identify
 /// what instance of the handler it is.
-void sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
   if (CallBacksToRun == 0)
     CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
   CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
   RegisterHandler();
   LeaveCriticalSection(&CriticalSection);
 }
-}
 
 static void Cleanup() {
+  if (CleanupExecuted)
+    return;
+
   EnterCriticalSection(&CriticalSection);
 
   // Prevent other thread from registering new files and directories for
@@ -323,13 +459,18 @@ static void Cleanup() {
     }
 
   if (CallBacksToRun)
-    for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
-      (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second);
+    for (auto &I : *CallBacksToRun)
+      I.first(I.second);
 
   LeaveCriticalSection(&CriticalSection);
 }
 
 void llvm::sys::RunInterruptHandlers() {
+  // The interrupt handler may be called from an interrupt, but it may also be
+  // called manually (such as the case of report_fatal_error with no registered
+  // error handler). We must ensure that the critical section is properly
+  // initialized.
+  InitializeThreading();
   Cleanup();
 }
 
@@ -337,12 +478,9 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
   Cleanup();
 
   // Initialize the STACKFRAME structure.
-  STACKFRAME64 StackFrame;
-  memset(&StackFrame, 0, sizeof(StackFrame));
+  STACKFRAME64 StackFrame = {};
 
-  DWORD machineType;
 #if defined(_M_X64)
-  machineType = IMAGE_FILE_MACHINE_AMD64;
   StackFrame.AddrPC.Offset = ep->ContextRecord->Rip;
   StackFrame.AddrPC.Mode = AddrModeFlat;
   StackFrame.AddrStack.Offset = ep->ContextRecord->Rsp;
@@ -350,7 +488,6 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
   StackFrame.AddrFrame.Offset = ep->ContextRecord->Rbp;
   StackFrame.AddrFrame.Mode = AddrModeFlat;
 #elif defined(_M_IX86)
-  machineType = IMAGE_FILE_MACHINE_I386;
   StackFrame.AddrPC.Offset = ep->ContextRecord->Eip;
   StackFrame.AddrPC.Mode = AddrModeFlat;
   StackFrame.AddrStack.Offset = ep->ContextRecord->Esp;
@@ -361,90 +498,10 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
 
   HANDLE hProcess = GetCurrentProcess();
   HANDLE hThread = GetCurrentThread();
+  PrintStackTraceForThread(llvm::errs(), hProcess, hThread, StackFrame,
+                           ep->ContextRecord);
 
-  // Initialize the symbol handler.
-  SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_LOAD_LINES);
-  SymInitialize(hProcess, NULL, TRUE);
-
-  while (true) {
-    if (!StackWalk64(machineType, hProcess, hThread, &StackFrame,
-                   ep->ContextRecord, NULL, SymFunctionTableAccess64,
-                   SymGetModuleBase64, NULL)) {
-      break;
-    }
-
-    if (StackFrame.AddrFrame.Offset == 0)
-      break;
-
-    // Print the PC in hexadecimal.
-    DWORD64 PC = StackFrame.AddrPC.Offset;
-#if defined(_M_X64)
-    fprintf(stderr, "0x%016llX", PC);
-#elif defined(_M_IX86)
-    fprintf(stderr, "0x%08lX", static_cast<DWORD>(PC));
-#endif
-
-    // Print the parameters.  Assume there are four.
-#if defined(_M_X64)
-    fprintf(stderr, " (0x%016llX 0x%016llX 0x%016llX 0x%016llX)",
-                StackFrame.Params[0],
-                StackFrame.Params[1],
-                StackFrame.Params[2],
-                StackFrame.Params[3]);
-#elif defined(_M_IX86)
-    fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)",
-                static_cast<DWORD>(StackFrame.Params[0]),
-                static_cast<DWORD>(StackFrame.Params[1]),
-                static_cast<DWORD>(StackFrame.Params[2]),
-                static_cast<DWORD>(StackFrame.Params[3]));
-#endif
-    // Verify the PC belongs to a module in this process.
-    if (!SymGetModuleBase64(hProcess, PC)) {
-      fputs(" <unknown module>\n", stderr);
-      continue;
-    }
-
-    // Print the symbol name.
-    char buffer[512];
-    IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast<IMAGEHLP_SYMBOL64 *>(buffer);
-    memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL64));
-    symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
-    symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL64);
-
-    DWORD64 dwDisp;
-    if (!SymGetSymFromAddr64(hProcess, PC, &dwDisp, symbol)) {
-      fputc('\n', stderr);
-      continue;
-    }
-
-    buffer[511] = 0;
-    if (dwDisp > 0)
-      fprintf(stderr, ", %s() + 0x%llX bytes(s)", symbol->Name, dwDisp);
-    else
-      fprintf(stderr, ", %s", symbol->Name);
-
-    // Print the source file and line number information.
-    IMAGEHLP_LINE64 line;
-    DWORD dwLineDisp;
-    memset(&line, 0, sizeof(line));
-    line.SizeOfStruct = sizeof(line);
-    if (SymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) {
-      fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber);
-      if (dwLineDisp > 0)
-        fprintf(stderr, " + 0x%lX byte(s)", dwLineDisp);
-    }
-
-    fputc('\n', stderr);
-  }
-
-  if (ExitOnUnhandledExceptions)
-    _exit(ep->ExceptionRecord->ExceptionCode);
-
-  // Allow dialog box to pop up allowing choice to start debugger.
-  if (OldFilter)
-    return (*OldFilter)(ep);
-  else
-    return EXCEPTION_CONTINUE_SEARCH;
+  _exit(ep->ExceptionRecord->ExceptionCode);
 }
 
 static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) {
diff --git a/contrib/llvm/lib/Support/Windows/TimeValue.inc b/contrib/llvm/lib/Support/Windows/TimeValue.inc
index 0223ab4..b90b4f1 100644
--- a/contrib/llvm/lib/Support/Windows/TimeValue.inc
+++ b/contrib/llvm/lib/Support/Windows/TimeValue.inc
@@ -47,6 +47,7 @@ std::string TimeValue::str() const {
   __time64_t OurTime = this->toEpochTime();
   int Error = ::_localtime64_s(&Storage, &OurTime);
   assert(!Error);
+  (void)Error;
   LT = &Storage;
 #endif
 
diff --git a/contrib/llvm/lib/Support/Windows/WindowsSupport.h b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
index 1d1cedc..5bb0b8d 100644
--- a/contrib/llvm/lib/Support/Windows/WindowsSupport.h
+++ b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
@@ -19,6 +19,9 @@
 //===          is guaranteed to work on *all* Win32 variants.
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_SUPPORT_WINDOWSSUPPORT_H
+#define LLVM_SUPPORT_WINDOWSSUPPORT_H
+
 // mingw-w64 tends to define it as 0x0502 in its headers.
 #undef _WIN32_WINNT
 #undef _WIN32_IE
@@ -89,7 +92,7 @@ public:
   }
 
   // True if Handle is valid.
-  LLVM_EXPLICIT operator bool() const {
+  explicit operator bool() const {
     return HandleTraits::IsValid(Handle) ? true : false;
   }
 
@@ -178,3 +181,5 @@ std::error_code UTF16ToCurCP(const wchar_t *utf16, size_t utf16_len,
 } // end namespace windows
 } // end namespace sys
 } // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm/lib/Support/YAMLParser.cpp
index 4688ff1..d55da5e 100644
--- a/contrib/llvm/lib/Support/YAMLParser.cpp
+++ b/contrib/llvm/lib/Support/YAMLParser.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/YAMLParser.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
@@ -100,6 +101,7 @@ namespace yaml {
 void Node::anchor() {}
 void NullNode::anchor() {}
 void ScalarNode::anchor() {}
+void BlockScalarNode::anchor() {}
 void KeyValueNode::anchor() {}
 void MappingNode::anchor() {}
 void SequenceNode::anchor() {}
@@ -127,6 +129,7 @@ struct Token : ilist_node<Token> {
     TK_Key,
     TK_Value,
     TK_Scalar,
+    TK_BlockScalar,
     TK_Alias,
     TK_Anchor,
     TK_Tag
@@ -136,6 +139,9 @@ struct Token : ilist_node<Token> {
   /// of the token in the input.
   StringRef Range;
 
+  /// The value of a block scalar node.
+  std::string Value;
+
   Token() : Kind(TK_Error) {}
 };
 }
@@ -162,7 +168,7 @@ struct ilist_node_traits<Token> {
   Token *createNode(const Token &V) {
     return new (Alloc.Allocate<Token>()) Token(V);
   }
-  static void deleteNode(Token *V) {}
+  static void deleteNode(Token *V) { V->~Token(); }
 
   void addNodeToList(Token *) {}
   void removeNodeFromList(Token *) {}
@@ -259,8 +265,8 @@ namespace yaml {
 /// @brief Scans YAML tokens from a MemoryBuffer.
 class Scanner {
 public:
-  Scanner(StringRef Input, SourceMgr &SM);
-  Scanner(MemoryBufferRef Buffer, SourceMgr &SM_);
+  Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true);
+  Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true);
 
   /// @brief Parse the next token and return it without popping it.
   Token &peekNext();
@@ -270,7 +276,7 @@ public:
 
   void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
                   ArrayRef<SMRange> Ranges = None) {
-    SM.PrintMessage(Loc, Kind, Message, Ranges);
+    SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors);
   }
 
   void setError(const Twine &Message, StringRef::iterator Position) {
@@ -347,6 +353,14 @@ private:
   ///          b-break.
   StringRef::iterator skip_b_break(StringRef::iterator Position);
 
+  /// Skip a single s-space[31] starting at Position.
+  ///
+  /// An s-space is 0x20
+  ///
+  /// @returns The code unit after the s-space, or Position if it's not a
+  ///          s-space.
+  StringRef::iterator skip_s_space(StringRef::iterator Position);
+
   /// @brief Skip a single s-white[33] starting at Position.
   ///
   /// A s-white is 0x20 | 0x9
@@ -372,6 +386,10 @@ private:
   StringRef::iterator skip_while( SkipWhileFunc Func
                                 , StringRef::iterator Position);
 
+  /// Skip minimal well-formed code unit subsequences until Func returns its
+  /// input.
+  void advanceWhile(SkipWhileFunc Func);
+
   /// @brief Scan ns-uri-char[39]s starting at Cur.
   ///
   /// This updates Cur and Column while scanning.
@@ -392,6 +410,11 @@ private:
   ///        Pos is whitespace or a new line
   bool isBlankOrBreak(StringRef::iterator Position);
 
+  /// Consume a single b-break[28] if it's present at the current position.
+  ///
+  /// Return false if the code unit at the current position isn't a line break.
+  bool consumeLineBreakIfPresent();
+
   /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
   void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
                              , unsigned AtColumn
@@ -416,6 +439,10 @@ private:
                  , Token::TokenKind Kind
                  , TokenQueueT::iterator InsertPoint);
 
+  /// @brief Skip a single-line comment when the comment starts at the current
+  /// position of the scanner.
+  void skipComment();
+
   /// @brief Skip whitespace and comments until the start of the next token.
   void scanToNextToken();
 
@@ -461,6 +488,30 @@ private:
   /// @brief Scan a block scalar starting with | or >.
   bool scanBlockScalar(bool IsLiteral);
 
+  /// Scan a chomping indicator in a block scalar header.
+  char scanBlockChompingIndicator();
+
+  /// Scan an indentation indicator in a block scalar header.
+  unsigned scanBlockIndentationIndicator();
+
+  /// Scan a block scalar header.
+  ///
+  /// Return false if an error occurred.
+  bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
+                             bool &IsDone);
+
+  /// Look for the indentation level of a block scalar.
+  ///
+  /// Return false if an error occurred.
+  bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
+                             unsigned &LineBreaks, bool &IsDone);
+
+  /// Scan the indentation of a text line in a block scalar.
+  ///
+  /// Return false if an error occurred.
+  bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
+                             bool &IsDone);
+
   /// @brief Scan a tag of the form !stuff.
   bool scanTag();
 
@@ -500,6 +551,9 @@ private:
   /// @brief True if an error has occurred.
   bool Failed;
 
+  /// @brief Should colors be used when printing out the diagnostic messages?
+  bool ShowColors;
+
   /// @brief Queue of tokens. This is required to queue up tokens while looking
   ///        for the end of a simple key. And for cases where a single character
   ///        can produce multiple tokens (e.g. BlockEnd).
@@ -604,6 +658,9 @@ bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
     case Token::TK_Scalar:
       OS << "Scalar: ";
       break;
+    case Token::TK_BlockScalar:
+      OS << "Block Scalar: ";
+      break;
     case Token::TK_Alias:
       OS << "Alias: ";
       break;
@@ -701,11 +758,13 @@ std::string yaml::escape(StringRef Input) {
   return EscapedInput;
 }
 
-Scanner::Scanner(StringRef Input, SourceMgr &sm) : SM(sm) {
+Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors)
+    : SM(sm), ShowColors(ShowColors) {
   init(MemoryBufferRef(Input, "YAML"));
 }
 
-Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_) : SM(SM_) {
+Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors)
+    : SM(SM_), ShowColors(ShowColors) {
   init(Buffer);
 }
 
@@ -806,6 +865,13 @@ StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
   return Position;
 }
 
+StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
+  if (Position == End)
+    return Position;
+  if (*Position == ' ')
+    return Position + 1;
+  return Position;
+}
 
 StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
   if (Position == End)
@@ -834,6 +900,12 @@ StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
   return Position;
 }
 
+void Scanner::advanceWhile(SkipWhileFunc Func) {
+  auto Final = skip_while(Func, Current);
+  Column += Final - Current;
+  Current = Final;
+}
+
 static bool is_ns_hex_digit(const char C) {
   return    (C >= '0' && C <= '9')
          || (C >= 'a' && C <= 'z')
@@ -896,6 +968,16 @@ bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
   return false;
 }
 
+bool Scanner::consumeLineBreakIfPresent() {
+  auto Next = skip_b_break(Current);
+  if (Next == Current)
+    return false;
+  Column = 0;
+  ++Line;
+  Current = Next;
+  return true;
+}
+
 void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
                                     , unsigned AtColumn
                                     , bool IsRequired) {
@@ -961,24 +1043,27 @@ bool Scanner::rollIndent( int ToColumn
   return true;
 }
 
+void Scanner::skipComment() {
+  if (*Current != '#')
+    return;
+  while (true) {
+    // This may skip more than one byte, thus Column is only incremented
+    // for code points.
+    StringRef::iterator I = skip_nb_char(Current);
+    if (I == Current)
+      break;
+    Current = I;
+    ++Column;
+  }
+}
+
 void Scanner::scanToNextToken() {
   while (true) {
     while (*Current == ' ' || *Current == '\t') {
       skip(1);
     }
 
-    // Skip comment.
-    if (*Current == '#') {
-      while (true) {
-        // This may skip more than one byte, thus Column is only incremented
-        // for code points.
-        StringRef::iterator i = skip_nb_char(Current);
-        if (i == Current)
-          break;
-        Current = i;
-        ++Column;
-      }
-    }
+    skipComment();
 
     // Skip EOL.
     StringRef::iterator i = skip_b_break(Current);
@@ -1361,38 +1446,204 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) {
   return true;
 }
 
-bool Scanner::scanBlockScalar(bool IsLiteral) {
-  StringRef::iterator Start = Current;
-  skip(1); // Eat | or >
-  while(true) {
-    StringRef::iterator i = skip_nb_char(Current);
-    if (i == Current) {
-      if (Column == 0)
-        break;
-      i = skip_b_break(Current);
-      if (i != Current) {
-        // We got a line break.
-        Column = 0;
-        ++Line;
-        Current = i;
-        continue;
-      } else {
-        // There was an error, which should already have been printed out.
+char Scanner::scanBlockChompingIndicator() {
+  char Indicator = ' ';
+  if (Current != End && (*Current == '+' || *Current == '-')) {
+    Indicator = *Current;
+    skip(1);
+  }
+  return Indicator;
+}
+
+/// Get the number of line breaks after chomping.
+///
+/// Return the number of trailing line breaks to emit, depending on
+/// \p ChompingIndicator.
+static unsigned getChompedLineBreaks(char ChompingIndicator,
+                                     unsigned LineBreaks, StringRef Str) {
+  if (ChompingIndicator == '-') // Strip all line breaks.
+    return 0;
+  if (ChompingIndicator == '+') // Keep all line breaks.
+    return LineBreaks;
+  // Clip trailing lines.
+  return Str.empty() ? 0 : 1;
+}
+
+unsigned Scanner::scanBlockIndentationIndicator() {
+  unsigned Indent = 0;
+  if (Current != End && (*Current >= '1' && *Current <= '9')) {
+    Indent = unsigned(*Current - '0');
+    skip(1);
+  }
+  return Indent;
+}
+
+bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
+                                    unsigned &IndentIndicator, bool &IsDone) {
+  auto Start = Current;
+
+  ChompingIndicator = scanBlockChompingIndicator();
+  IndentIndicator = scanBlockIndentationIndicator();
+  // Check for the chomping indicator once again.
+  if (ChompingIndicator == ' ')
+    ChompingIndicator = scanBlockChompingIndicator();
+  Current = skip_while(&Scanner::skip_s_white, Current);
+  skipComment();
+
+  if (Current == End) { // EOF, we have an empty scalar.
+    Token T;
+    T.Kind = Token::TK_BlockScalar;
+    T.Range = StringRef(Start, Current - Start);
+    TokenQueue.push_back(T);
+    IsDone = true;
+    return true;
+  }
+
+  if (!consumeLineBreakIfPresent()) {
+    setError("Expected a line break after block scalar header", Current);
+    return false;
+  }
+  return true;
+}
+
+bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
+                                    unsigned BlockExitIndent,
+                                    unsigned &LineBreaks, bool &IsDone) {
+  unsigned MaxAllSpaceLineCharacters = 0;
+  StringRef::iterator LongestAllSpaceLine;
+
+  while (true) {
+    advanceWhile(&Scanner::skip_s_space);
+    if (skip_nb_char(Current) != Current) {
+      // This line isn't empty, so try and find the indentation.
+      if (Column <= BlockExitIndent) { // End of the block literal.
+        IsDone = true;
+        return true;
+      }
+      // We found the block's indentation.
+      BlockIndent = Column;
+      if (MaxAllSpaceLineCharacters > BlockIndent) {
+        setError(
+            "Leading all-spaces line must be smaller than the block indent",
+            LongestAllSpaceLine);
         return false;
       }
+      return true;
     }
-    Current = i;
+    if (skip_b_break(Current) != Current &&
+        Column > MaxAllSpaceLineCharacters) {
+      // Record the longest all-space line in case it's longer than the
+      // discovered block indent.
+      MaxAllSpaceLineCharacters = Column;
+      LongestAllSpaceLine = Current;
+    }
+
+    // Check for EOF.
+    if (Current == End) {
+      IsDone = true;
+      return true;
+    }
+
+    if (!consumeLineBreakIfPresent()) {
+      IsDone = true;
+      return true;
+    }
+    ++LineBreaks;
+  }
+  return true;
+}
+
+bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
+                                    unsigned BlockExitIndent, bool &IsDone) {
+  // Skip the indentation.
+  while (Column < BlockIndent) {
+    auto I = skip_s_space(Current);
+    if (I == Current)
+      break;
+    Current = I;
     ++Column;
   }
 
-  if (Start == Current) {
-    setError("Got empty block scalar", Start);
+  if (skip_nb_char(Current) == Current)
+    return true;
+
+  if (Column <= BlockExitIndent) { // End of the block literal.
+    IsDone = true;
+    return true;
+  }
+
+  if (Column < BlockIndent) {
+    if (Current != End && *Current == '#') { // Trailing comment.
+      IsDone = true;
+      return true;
+    }
+    setError("A text line is less indented than the block scalar", Current);
+    return false;
+  }
+  return true; // A normal text line.
+}
+
+bool Scanner::scanBlockScalar(bool IsLiteral) {
+  // Eat '|' or '>'
+  assert(*Current == '|' || *Current == '>');
+  skip(1);
+
+  char ChompingIndicator;
+  unsigned BlockIndent;
+  bool IsDone = false;
+  if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
     return false;
+  if (IsDone)
+    return true;
+
+  auto Start = Current;
+  unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
+  unsigned LineBreaks = 0;
+  if (BlockIndent == 0) {
+    if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
+                               IsDone))
+      return false;
   }
 
+  // Scan the block's scalars body.
+  SmallString<256> Str;
+  while (!IsDone) {
+    if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
+      return false;
+    if (IsDone)
+      break;
+
+    // Parse the current line.
+    auto LineStart = Current;
+    advanceWhile(&Scanner::skip_nb_char);
+    if (LineStart != Current) {
+      Str.append(LineBreaks, '\n');
+      Str.append(StringRef(LineStart, Current - LineStart));
+      LineBreaks = 0;
+    }
+
+    // Check for EOF.
+    if (Current == End)
+      break;
+
+    if (!consumeLineBreakIfPresent())
+      break;
+    ++LineBreaks;
+  }
+
+  if (Current == End && !LineBreaks)
+    // Ensure that there is at least one line break before the end of file.
+    LineBreaks = 1;
+  Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n');
+
+  // New lines may start a simple key.
+  if (!FlowLevel)
+    IsSimpleKeyAllowed = true;
+
   Token T;
-  T.Kind = Token::TK_Scalar;
+  T.Kind = Token::TK_BlockScalar;
   T.Range = StringRef(Start, Current - Start);
+  T.Value = Str.str().str();
   TokenQueue.push_back(T);
   return true;
 }
@@ -1517,23 +1768,21 @@ bool Scanner::fetchMoreTokens() {
   return false;
 }
 
-Stream::Stream(StringRef Input, SourceMgr &SM)
-    : scanner(new Scanner(Input, SM)), CurrentDoc() {}
+Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors)
+    : scanner(new Scanner(Input, SM, ShowColors)), CurrentDoc() {}
 
-Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM)
-    : scanner(new Scanner(InputBuffer, SM)), CurrentDoc() {}
+Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors)
+    : scanner(new Scanner(InputBuffer, SM, ShowColors)), CurrentDoc() {}
 
 Stream::~Stream() {}
 
 bool Stream::failed() { return scanner->failed(); }
 
 void Stream::printError(Node *N, const Twine &Msg) {
-  SmallVector<SMRange, 1> Ranges;
-  Ranges.push_back(N->getSourceRange());
   scanner->printError( N->getSourceRange().Start
                      , SourceMgr::DK_Error
                      , Msg
-                     , Ranges);
+                     , N->getSourceRange());
 }
 
 document_iterator Stream::begin() {
@@ -1570,11 +1819,11 @@ std::string Node::getVerbatimTag() const {
     if (Raw.find_last_of('!') == 0) {
       Ret = Doc->getTagMap().find("!")->second;
       Ret += Raw.substr(1);
-      return std::move(Ret);
+      return Ret;
     } else if (Raw.startswith("!!")) {
       Ret = Doc->getTagMap().find("!!")->second;
       Ret += Raw.substr(2);
-      return std::move(Ret);
+      return Ret;
     } else {
       StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
       std::map<StringRef, StringRef>::const_iterator It =
@@ -1588,7 +1837,7 @@ std::string Node::getVerbatimTag() const {
         setError(Twine("Unknown tag handle ") + TagHandle, T);
       }
       Ret += Raw.substr(Raw.find_last_of('!') + 1);
-      return std::move(Ret);
+      return Ret;
     }
   }
 
@@ -1596,6 +1845,7 @@ std::string Node::getVerbatimTag() const {
   case NK_Null:
     return "tag:yaml.org,2002:null";
   case NK_Scalar:
+  case NK_BlockScalar:
     // TODO: Tag resolution.
     return "tag:yaml.org,2002:str";
   case NK_Mapping:
@@ -2127,6 +2377,14 @@ parse_property:
                 , AnchorInfo.Range.substr(1)
                 , TagInfo.Range
                 , T.Range);
+  case Token::TK_BlockScalar: {
+    getNext();
+    StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);
+    StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back();
+    return new (NodeAllocator)
+        BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1),
+                        TagInfo.Range, StrCopy, T.Range);
+  }
   case Token::TK_Key:
     // Don't eat the TK_Key, KeyValueNode expects it.
     return new (NodeAllocator)
diff --git a/contrib/llvm/lib/Support/YAMLTraits.cpp b/contrib/llvm/lib/Support/YAMLTraits.cpp
index c87790e..90f34f6 100644
--- a/contrib/llvm/lib/Support/YAMLTraits.cpp
+++ b/contrib/llvm/lib/Support/YAMLTraits.cpp
@@ -7,13 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Errc.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/LineIterator.h"
 #include "llvm/Support/YAMLParser.h"
-#include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cctype>
 #include <cstring>
@@ -167,10 +169,22 @@ void Input::endMapping() {
   }
 }
 
+void Input::beginFlowMapping() { beginMapping(); }
+
+void Input::endFlowMapping() { endMapping(); }
+
 unsigned Input::beginSequence() {
-  if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+  if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode))
     return SQ->Entries.size();
+  if (isa<EmptyHNode>(CurrentNode))
+    return 0;
+  // Treat case where there's a scalar "null" value as an empty sequence.
+  if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) {
+    if (isNull(SN->value()))
+      return 0;
   }
+  // Any other type of HNode is an error.
+  setError(CurrentNode, "not a sequence");
   return 0;
 }
 
@@ -192,12 +206,7 @@ void Input::postflightElement(void *SaveInfo) {
   CurrentNode = reinterpret_cast<HNode *>(SaveInfo);
 }
 
-unsigned Input::beginFlowSequence() {
-  if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
-    return SQ->Entries.size();
-  }
-  return 0;
-}
+unsigned Input::beginFlowSequence() { return beginSequence(); }
 
 bool Input::preflightFlowElement(unsigned index, void *&SaveInfo) {
   if (EC)
@@ -233,6 +242,13 @@ bool Input::matchEnumScalar(const char *Str, bool) {
   return false;
 }
 
+bool Input::matchEnumFallback() {
+  if (ScalarMatchFound)
+    return false;
+  ScalarMatchFound = true;
+  return true;
+}
+
 void Input::endEnumScalar() {
   if (!ScalarMatchFound) {
     setError(CurrentNode, "unknown enumerated scalar");
@@ -294,6 +310,8 @@ void Input::scalarString(StringRef &S, bool) {
   }
 }
 
+void Input::blockScalarString(StringRef &S) { scalarString(S, false); }
+
 void Input::setError(HNode *hnode, const Twine &message) {
   assert(hnode && "HNode must not be NULL");
   this->setError(hnode->_node, message);
@@ -316,6 +334,11 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
       KeyStr = StringRef(Buf, Len);
     }
     return llvm::make_unique<ScalarHNode>(N, KeyStr);
+  } else if (BlockScalarNode *BSN = dyn_cast<BlockScalarNode>(N)) {
+    StringRef Value = BSN->getValue();
+    char *Buf = StringAllocator.Allocate<char>(Value.size());
+    memcpy(Buf, Value.data(), Value.size());
+    return llvm::make_unique<ScalarHNode>(N, StringRef(Buf, Value.size()));
   } else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) {
     auto SQHNode = llvm::make_unique<SequenceHNode>(N);
     for (Node &SN : *SQ) {
@@ -382,6 +405,7 @@ Output::Output(raw_ostream &yout, void *context)
       Out(yout),
       Column(0),
       ColumnAtFlowStart(0),
+      ColumnAtMapFlowStart(0),
       NeedBitValueComma(false),
       NeedFlowSequenceComma(false),
       EnumerationMatchFound(false),
@@ -416,8 +440,13 @@ bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault,
                           bool &UseDefault, void *&) {
   UseDefault = false;
   if (Required || !SameAsDefault) {
-    this->newLineCheck();
-    this->paddedKey(Key);
+    auto State = StateStack.back();
+    if (State == inFlowMapFirstKey || State == inFlowMapOtherKey) {
+      flowKey(Key);
+    } else {
+      this->newLineCheck();
+      this->paddedKey(Key);
+    }
     return true;
   }
   return false;
@@ -427,9 +456,24 @@ void Output::postflightKey(void *) {
   if (StateStack.back() == inMapFirstKey) {
     StateStack.pop_back();
     StateStack.push_back(inMapOtherKey);
+  } else if (StateStack.back() == inFlowMapFirstKey) {
+    StateStack.pop_back();
+    StateStack.push_back(inFlowMapOtherKey);
   }
 }
 
+void Output::beginFlowMapping() {
+  StateStack.push_back(inFlowMapFirstKey);
+  this->newLineCheck();
+  ColumnAtMapFlowStart = Column;
+  output("{ ");
+}
+
+void Output::endFlowMapping() {
+  StateStack.pop_back();
+  this->outputUpToEndOfLine(" }");
+}
+
 void Output::beginDocuments() {
   this->outputUpToEndOfLine("---");
 }
@@ -508,6 +552,13 @@ bool Output::matchEnumScalar(const char *Str, bool Match) {
   return false;
 }
 
+bool Output::matchEnumFallback() {
+  if (EnumerationMatchFound)
+    return false;
+  EnumerationMatchFound = true;
+  return true;
+}
+
 void Output::endEnumScalar() {
   if (!EnumerationMatchFound)
     llvm_unreachable("bad runtime enum value");
@@ -566,6 +617,24 @@ void Output::scalarString(StringRef &S, bool MustQuote) {
   this->outputUpToEndOfLine("'"); // Ending single quote.
 }
 
+void Output::blockScalarString(StringRef &S) {
+  if (!StateStack.empty())
+    newLineCheck();
+  output(" |");
+  outputNewLine();
+
+  unsigned Indent = StateStack.empty() ? 1 : StateStack.size();
+
+  auto Buffer = MemoryBuffer::getMemBuffer(S, "", false);
+  for (line_iterator Lines(*Buffer, false); !Lines.is_at_end(); ++Lines) {
+    for (unsigned I = 0; I < Indent; ++I) {
+      output("  ");
+    }
+    output(*Lines);
+    outputNewLine();
+  }
+}
+
 void Output::setError(const Twine &message) {
 }
 
@@ -589,7 +658,9 @@ void Output::output(StringRef s) {
 
 void Output::outputUpToEndOfLine(StringRef s) {
   this->output(s);
-  if (StateStack.empty() || StateStack.back() != inFlowSeq)
+  if (StateStack.empty() || (StateStack.back() != inFlowSeq &&
+                             StateStack.back() != inFlowMapFirstKey &&
+                             StateStack.back() != inFlowMapOtherKey))
     NeedsNewLine = true;
 }
 
@@ -615,7 +686,9 @@ void Output::newLineCheck() {
 
   if (StateStack.back() == inSeq) {
     OutputDash = true;
-  } else if ((StateStack.size() > 1) && (StateStack.back() == inMapFirstKey) &&
+  } else if ((StateStack.size() > 1) && ((StateStack.back() == inMapFirstKey) ||
+             (StateStack.back() == inFlowSeq) ||
+             (StateStack.back() == inFlowMapFirstKey)) &&
              (StateStack[StateStack.size() - 2] == inSeq)) {
     --Indent;
     OutputDash = true;
@@ -640,6 +713,20 @@ void Output::paddedKey(StringRef key) {
     output(" ");
 }
 
+void Output::flowKey(StringRef Key) {
+  if (StateStack.back() == inFlowMapOtherKey)
+    output(", ");
+  if (Column > 70) {
+    output("\n");
+    for (int I = 0; I < ColumnAtMapFlowStart; ++I)
+      output(" ");
+    Column = ColumnAtMapFlowStart;
+    output("  ");
+  }
+  output(Key);
+  output(": ");
+}
+
 //===----------------------------------------------------------------------===//
 //  traits for built-in types
 //===----------------------------------------------------------------------===//
@@ -669,7 +756,7 @@ StringRef ScalarTraits<StringRef>::input(StringRef Scalar, void *,
   Val = Scalar;
   return StringRef();
 }
- 
+
 void ScalarTraits<std::string>::output(const std::string &Val, void *,
                                      raw_ostream &Out) {
   Out << Val;
diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp
index 1bcc31b..4c0b6c7 100644
--- a/contrib/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm/lib/Support/raw_ostream.cpp
@@ -242,7 +242,7 @@ raw_ostream &raw_ostream::operator<<(double N) {
 
   char buf[16];
   unsigned len;
-  len = snprintf(buf, sizeof(buf), "%e", N);
+  len = format("%e", N).snprint(buf, sizeof(buf));
   if (len <= sizeof(buf) - 2) {
     if (len >= 5 && buf[len - 5] == 'e' && buf[len - 3] == '0') {
       int cs = buf[len - 4];
@@ -410,9 +410,12 @@ raw_ostream &raw_ostream::operator<<(const FormattedString &FS) {
 raw_ostream &raw_ostream::operator<<(const FormattedNumber &FN) {
   if (FN.Hex) {
     unsigned Nibbles = (64 - countLeadingZeros(FN.HexValue)+3)/4;
-    unsigned Width = (FN.Width > Nibbles+2) ? FN.Width : Nibbles+2;
-        
+    unsigned PrefixChars = FN.HexPrefix ? 2 : 0;
+    unsigned Width = std::max(FN.Width, Nibbles + PrefixChars);
+
     char NumberBuffer[20] = "0x0000000000000000";
+    if (!FN.HexPrefix)
+      NumberBuffer[1] = '0';
     char *EndPtr = NumberBuffer+Width;
     char *CurPtr = EndPtr;
     const char A = FN.Upper ? 'A' : 'a';
@@ -484,51 +487,53 @@ void format_object_base::home() {
 //  raw_fd_ostream
 //===----------------------------------------------------------------------===//
 
-raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
-                               sys::fs::OpenFlags Flags)
-    : Error(false), UseAtomicWrites(false), pos(0) {
-  EC = std::error_code();
+static int getFD(StringRef Filename, std::error_code &EC,
+                 sys::fs::OpenFlags Flags) {
   // Handle "-" as stdout. Note that when we do this, we consider ourself
   // the owner of stdout. This means that we can do things like close the
   // file descriptor when we're done and set the "binary" flag globally.
   if (Filename == "-") {
-    FD = STDOUT_FILENO;
+    EC = std::error_code();
     // If user requested binary then put stdout into binary mode if
     // possible.
     if (!(Flags & sys::fs::F_Text))
       sys::ChangeStdoutToBinary();
-    // Close stdout when we're done, to detect any output errors.
-    ShouldClose = true;
-    return;
+    return STDOUT_FILENO;
   }
 
+  int FD;
   EC = sys::fs::openFileForWrite(Filename, FD, Flags);
+  if (EC)
+    return -1;
 
-  if (EC) {
-    ShouldClose = false;
-    return;
-  }
-
-  // Ok, we successfully opened the file, so it'll need to be closed.
-  ShouldClose = true;
+  return FD;
 }
 
-/// raw_fd_ostream ctor - FD is the file descriptor that this writes to.  If
-/// ShouldClose is true, this closes the file when the stream is destroyed.
+raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
+                               sys::fs::OpenFlags Flags)
+    : raw_fd_ostream(getFD(Filename, EC, Flags), true) {}
+
+/// FD is the file descriptor that this writes to.  If ShouldClose is true, this
+/// closes the file when the stream is destroyed.
 raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
-  : raw_ostream(unbuffered), FD(fd),
-    ShouldClose(shouldClose), Error(false), UseAtomicWrites(false) {
-#ifdef O_BINARY
-  // Setting STDOUT to binary mode is necessary in Win32
-  // to avoid undesirable linefeed conversion.
-  // Don't touch STDERR, or w*printf() (in assert()) would barf wide chars.
-  if (fd == STDOUT_FILENO)
-    setmode(fd, O_BINARY);
-#endif
+    : raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose),
+      Error(false), UseAtomicWrites(false) {
+  if (FD < 0 ) {
+    ShouldClose = false;
+    return;
+  }
 
   // Get the starting position.
   off_t loc = ::lseek(FD, 0, SEEK_CUR);
-  if (loc == (off_t)-1)
+#ifdef LLVM_ON_WIN32
+  // MSVCRT's _lseek(SEEK_CUR) doesn't return -1 for pipes.
+  sys::fs::file_status Status;
+  std::error_code EC = status(FD, Status);
+  SupportsSeeking = !EC && Status.type() == sys::fs::file_type::regular_file;
+#else
+  SupportsSeeking = loc != (off_t)-1;
+#endif
+  if (!SupportsSeeking)
     pos = 0;
   else
     pos = static_cast<uint64_t>(loc);
@@ -620,11 +625,19 @@ void raw_fd_ostream::close() {
 uint64_t raw_fd_ostream::seek(uint64_t off) {
   flush();
   pos = ::lseek(FD, off, SEEK_SET);
-  if (pos != off)
+  if (pos == (uint64_t)-1)
     error_detected();
   return pos;
 }
 
+void raw_fd_ostream::pwrite_impl(const char *Ptr, size_t Size,
+                                 uint64_t Offset) {
+  uint64_t Pos = tell();
+  seek(Offset);
+  write(Ptr, Size);
+  seek(Pos);
+}
+
 size_t raw_fd_ostream::preferred_buffer_size() const {
 #if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
   // Windows and Minix have no st_blksize.
@@ -705,7 +718,9 @@ raw_ostream &llvm::outs() {
   // Set buffer settings to model stdout behavior.
   // Delete the file descriptor when the program exits, forcing error
   // detection. If you don't want this behavior, don't use outs().
-  static raw_fd_ostream S(STDOUT_FILENO, true);
+  std::error_code EC;
+  static raw_fd_ostream S("-", EC, sys::fs::F_None);
+  assert(!EC);
   return S;
 }
 
@@ -746,7 +761,14 @@ void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
 // capacity. This allows raw_ostream to write directly into the correct place,
 // and we only need to set the vector size when the data is flushed.
 
+raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O, unsigned)
+    : OS(O) {}
+
 raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
+  init();
+}
+
+void raw_svector_ostream::init() {
   // Set up the initial external buffer. We make sure that the buffer has at
   // least 128 bytes free; raw_ostream itself only requires 64, but we want to
   // make sure that we don't grow the buffer unnecessarily on destruction (when
@@ -760,6 +782,12 @@ raw_svector_ostream::~raw_svector_ostream() {
   flush();
 }
 
+void raw_svector_ostream::pwrite_impl(const char *Ptr, size_t Size,
+                                      uint64_t Offset) {
+  flush();
+  memcpy(OS.begin() + Offset, Ptr, Size);
+}
+
 /// resync - This is called when the SmallVector we're appending to is changed
 /// outside of the raw_svector_ostream's control.  It is only safe to do this
 /// if the raw_svector_ostream has previously been flushed.
@@ -814,3 +842,6 @@ void raw_null_ostream::write_impl(const char *Ptr, size_t Size) {
 uint64_t raw_null_ostream::current_pos() const {
   return 0;
 }
+
+void raw_null_ostream::pwrite_impl(const char *Ptr, size_t Size,
+                                   uint64_t Offset) {}
diff --git a/contrib/llvm/lib/Support/regcomp.c b/contrib/llvm/lib/Support/regcomp.c
index b796929..ebde64f 100644
--- a/contrib/llvm/lib/Support/regcomp.c
+++ b/contrib/llvm/lib/Support/regcomp.c
@@ -1422,7 +1422,7 @@ enlarge(struct parse *p, sopno size)
 	if (p->ssize >= size)
 		return;
 
-	if ((unsigned long)size > SIZE_MAX / sizeof(sop)) {
+	if ((uintptr_t)size > SIZE_MAX / sizeof(sop)) {
 		SETERROR(REG_ESPACE);
 		return;
 	}
@@ -1443,7 +1443,7 @@ static void
 stripsnug(struct parse *p, struct re_guts *g)
 {
 	g->nstates = p->slen;
-	if ((unsigned long)p->slen > SIZE_MAX / sizeof(sop)) {
+	if ((uintptr_t)p->slen > SIZE_MAX / sizeof(sop)) {
 		g->strip = p->strip;
 		SETERROR(REG_ESPACE);
 		return;
diff --git a/contrib/llvm/lib/TableGen/Error.cpp b/contrib/llvm/lib/TableGen/Error.cpp
index 8d9ae20..fd08935 100644
--- a/contrib/llvm/lib/TableGen/Error.cpp
+++ b/contrib/llvm/lib/TableGen/Error.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/TableGen/Error.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
 
@@ -64,11 +65,15 @@ void PrintError(const Twine &Msg) {
 
 void PrintFatalError(const Twine &Msg) {
   PrintError(Msg);
+  // The following call runs the file cleanup handlers.
+  sys::RunInterruptHandlers();
   std::exit(1);
 }
 
 void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
   PrintError(ErrorLoc, Msg);
+  // The following call runs the file cleanup handlers.
+  sys::RunInterruptHandlers();
   std::exit(1);
 }
 
diff --git a/contrib/llvm/lib/TableGen/Main.cpp b/contrib/llvm/lib/TableGen/Main.cpp
index c440451..bb590c7 100644
--- a/contrib/llvm/lib/TableGen/Main.cpp
+++ b/contrib/llvm/lib/TableGen/Main.cpp
@@ -15,37 +15,35 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/TableGen/Main.h"
 #include "TGParser.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Main.h"
 #include "llvm/TableGen/Record.h"
 #include <algorithm>
 #include <cstdio>
 #include <system_error>
 using namespace llvm;
 
-namespace {
-  cl::opt<std::string>
-  OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"),
-                 cl::init("-"));
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"),
+               cl::init("-"));
 
-  cl::opt<std::string>
-  DependFilename("d",
-                 cl::desc("Dependency filename"),
-                 cl::value_desc("filename"),
-                 cl::init(""));
+static cl::opt<std::string>
+DependFilename("d",
+               cl::desc("Dependency filename"),
+               cl::value_desc("filename"),
+               cl::init(""));
 
-  cl::opt<std::string>
-  InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
 
-  cl::list<std::string>
-  IncludeDirs("I", cl::desc("Directory of include files"),
-              cl::value_desc("directory"), cl::Prefix);
-}
+static cl::list<std::string>
+IncludeDirs("I", cl::desc("Directory of include files"),
+            cl::value_desc("directory"), cl::Prefix);
 
 /// \brief Create a dependency file for `-d` option.
 ///
@@ -72,9 +70,7 @@ static int createDependencyFile(const TGParser &Parser, const char *argv0) {
   return 0;
 }
 
-namespace llvm {
-
-int TableGenMain(char *argv0, TableGenMainFn *MainFn) {
+int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) {
   RecordKeeper Records;
 
   // Parse the input file.
@@ -122,5 +118,3 @@ int TableGenMain(char *argv0, TableGenMainFn *MainFn) {
   Out.keep();
   return 0;
 }
-
-}
diff --git a/contrib/llvm/lib/TableGen/Record.cpp b/contrib/llvm/lib/TableGen/Record.cpp
index 3a2ae96..9783922 100644
--- a/contrib/llvm/lib/TableGen/Record.cpp
+++ b/contrib/llvm/lib/TableGen/Record.cpp
@@ -91,11 +91,11 @@ void RecTy::dump() const { print(errs()); }
 
 ListRecTy *RecTy::getListTy() {
   if (!ListTy)
-    ListTy = new ListRecTy(this);
-  return ListTy;
+    ListTy.reset(new ListRecTy(this));
+  return ListTy.get();
 }
 
-bool RecTy::baseClassOf(const RecTy *RHS) const{
+bool RecTy::baseClassOf(const RecTy *RHS) const {
   assert (RHS && "NULL pointer");
   return Kind == RHS->getRecTyKind();
 }
@@ -112,16 +112,16 @@ Init *BitRecTy::convertValue(IntInit *II) {
   return BitInit::get(Val != 0);
 }
 
-Init *BitRecTy::convertValue(TypedInit *VI) {
-  RecTy *Ty = VI->getType();
+Init *BitRecTy::convertValue(TypedInit *TI) {
+  RecTy *Ty = TI->getType();
   if (isa<BitRecTy>(Ty))
-    return VI;  // Accept variable if it is already of bit type!
+    return TI;  // Accept variable if it is already of bit type!
   if (auto *BitsTy = dyn_cast<BitsRecTy>(Ty))
     // Accept only bits<1> expression.
-    return BitsTy->getNumBits() == 1 ? VI : nullptr;
+    return BitsTy->getNumBits() == 1 ? TI : nullptr;
   // Ternary !if can be converted to bit, but only if both sides are
   // convertible to a bit.
-  if (TernOpInit *TOI = dyn_cast<TernOpInit>(VI)) {
+  if (TernOpInit *TOI = dyn_cast<TernOpInit>(TI)) {
     if (TOI->getOpcode() != TernOpInit::TernaryOp::IF)
       return nullptr;
     if (!TOI->getMHS()->convertInitializerTo(BitRecTy::get()) ||
@@ -133,7 +133,7 @@ Init *BitRecTy::convertValue(TypedInit *VI) {
 }
 
 bool BitRecTy::baseClassOf(const RecTy *RHS) const{
-  if(RecTy::baseClassOf(RHS) || getRecTyKind() == IntRecTyKind)
+  if(RecTy::baseClassOf(RHS) || RHS->getRecTyKind() == IntRecTyKind)
     return true;
   if(const BitsRecTy *BitsTy = dyn_cast<BitsRecTy>(RHS))
     return BitsTy->getNumBits() == 1;
@@ -141,13 +141,13 @@ bool BitRecTy::baseClassOf(const RecTy *RHS) const{
 }
 
 BitsRecTy *BitsRecTy::get(unsigned Sz) {
-  static std::vector<BitsRecTy*> Shared;
+  static std::vector<std::unique_ptr<BitsRecTy>> Shared;
   if (Sz >= Shared.size())
     Shared.resize(Sz + 1);
-  BitsRecTy *&Ty = Shared[Sz];
+  std::unique_ptr<BitsRecTy> &Ty = Shared[Sz];
   if (!Ty)
-    Ty = new BitsRecTy(Sz);
-  return Ty;
+    Ty.reset(new BitsRecTy(Sz));
+  return Ty.get();
 }
 
 std::string BitsRecTy::getAsString() const {
@@ -163,9 +163,9 @@ Init *BitsRecTy::convertValue(UnsetInit *UI) {
   return BitsInit::get(NewBits);
 }
 
-Init *BitsRecTy::convertValue(BitInit *UI) {
+Init *BitsRecTy::convertValue(BitInit *BI) {
   if (Size != 1) return nullptr;  // Can only convert single bit.
-  return BitsInit::get(UI);
+  return BitsInit::get(BI);
 }
 
 /// canFitInBitfield - Return true if the number of bits is large enough to hold
@@ -200,15 +200,15 @@ Init *BitsRecTy::convertValue(BitsInit *BI) {
   return nullptr;
 }
 
-Init *BitsRecTy::convertValue(TypedInit *VI) {
-  if (Size == 1 && isa<BitRecTy>(VI->getType()))
-    return BitsInit::get(VI);
+Init *BitsRecTy::convertValue(TypedInit *TI) {
+  if (Size == 1 && isa<BitRecTy>(TI->getType()))
+    return BitsInit::get(TI);
 
-  if (VI->getType()->typeIsConvertibleTo(this)) {
+  if (TI->getType()->typeIsConvertibleTo(this)) {
     SmallVector<Init *, 16> NewBits(Size);
 
     for (unsigned i = 0; i != Size; ++i)
-      NewBits[i] = VarBitInit::get(VI, i);
+      NewBits[i] = VarBitInit::get(TI, i);
     return BitsInit::get(NewBits);
   }
 
@@ -229,11 +229,10 @@ Init *IntRecTy::convertValue(BitInit *BI) {
 Init *IntRecTy::convertValue(BitsInit *BI) {
   int64_t Result = 0;
   for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i)
-    if (BitInit *Bit = dyn_cast<BitInit>(BI->getBit(i))) {
-      Result |= Bit->getValue() << i;
-    } else {
+    if (BitInit *Bit = dyn_cast<BitInit>(BI->getBit(i)))
+      Result |= static_cast<int64_t>(Bit->getValue()) << i;
+    else
       return nullptr;
-    }
   return IntInit::get(Result);
 }
 
@@ -248,16 +247,16 @@ bool IntRecTy::baseClassOf(const RecTy *RHS) const{
   return kind==BitRecTyKind || kind==BitsRecTyKind || kind==IntRecTyKind;
 }
 
-Init *StringRecTy::convertValue(UnOpInit *BO) {
-  if (BO->getOpcode() == UnOpInit::CAST) {
-    Init *L = BO->getOperand()->convertInitializerTo(this);
+Init *StringRecTy::convertValue(UnOpInit *UO) {
+  if (UO->getOpcode() == UnOpInit::CAST) {
+    Init *L = UO->getOperand()->convertInitializerTo(this);
     if (!L) return nullptr;
-    if (L != BO->getOperand())
-      return UnOpInit::get(UnOpInit::CAST, L, new StringRecTy);
-    return BO;
+    if (L != UO->getOperand())
+      return UnOpInit::get(UnOpInit::CAST, L, StringRecTy::get());
+    return UO;
   }
 
-  return convertValue((TypedInit*)BO);
+  return convertValue((TypedInit*)UO);
 }
 
 Init *StringRecTy::convertValue(BinOpInit *BO) {
@@ -266,7 +265,7 @@ Init *StringRecTy::convertValue(BinOpInit *BO) {
     Init *R = BO->getRHS()->convertInitializerTo(this);
     if (!L || !R) return nullptr;
     if (L != BO->getLHS() || R != BO->getRHS())
-      return BinOpInit::get(BinOpInit::STRCONCAT, L, R, new StringRecTy);
+      return BinOpInit::get(BinOpInit::STRCONCAT, L, R, StringRecTy::get());
     return BO;
   }
 
@@ -321,13 +320,13 @@ Init *DagRecTy::convertValue(TypedInit *TI) {
   return nullptr;
 }
 
-Init *DagRecTy::convertValue(UnOpInit *BO) {
-  if (BO->getOpcode() == UnOpInit::CAST) {
-    Init *L = BO->getOperand()->convertInitializerTo(this);
+Init *DagRecTy::convertValue(UnOpInit *UO) {
+  if (UO->getOpcode() == UnOpInit::CAST) {
+    Init *L = UO->getOperand()->convertInitializerTo(this);
     if (!L) return nullptr;
-    if (L != BO->getOperand())
-      return UnOpInit::get(UnOpInit::CAST, L, new DagRecTy);
-    return BO;
+    if (L != UO->getOperand())
+      return UnOpInit::get(UnOpInit::CAST, L, DagRecTy::get());
+    return UO;
   }
   return nullptr;
 }
@@ -338,7 +337,7 @@ Init *DagRecTy::convertValue(BinOpInit *BO) {
     Init *R = BO->getRHS()->convertInitializerTo(this);
     if (!L || !R) return nullptr;
     if (L != BO->getLHS() || R != BO->getRHS())
-      return BinOpInit::get(BinOpInit::CONCAT, L, R, new DagRecTy);
+      return BinOpInit::get(BinOpInit::CONCAT, L, R, DagRecTy::get());
     return BO;
   }
   return nullptr;
@@ -385,7 +384,7 @@ bool RecordRecTy::baseClassOf(const RecTy *RHS) const{
 }
 
 /// resolveTypes - Find a common type that T1 and T2 convert to.
-/// Return 0 if no such type exists.
+/// Return null if no such type exists.
 ///
 RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
   if (T1->typeIsConvertibleTo(T2))
@@ -396,38 +395,20 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
   // If one is a Record type, check superclasses
   if (RecordRecTy *RecTy1 = dyn_cast<RecordRecTy>(T1)) {
     // See if T2 inherits from a type T1 also inherits from
-    const std::vector<Record *> &T1SuperClasses =
-      RecTy1->getRecord()->getSuperClasses();
-    for(std::vector<Record *>::const_iterator i = T1SuperClasses.begin(),
-          iend = T1SuperClasses.end();
-        i != iend;
-        ++i) {
-      RecordRecTy *SuperRecTy1 = RecordRecTy::get(*i);
+    for (Record *SuperRec1 : RecTy1->getRecord()->getSuperClasses()) {
+      RecordRecTy *SuperRecTy1 = RecordRecTy::get(SuperRec1);
       RecTy *NewType1 = resolveTypes(SuperRecTy1, T2);
-      if (NewType1) {
-        if (NewType1 != SuperRecTy1) {
-          delete SuperRecTy1;
-        }
+      if (NewType1)
         return NewType1;
-      }
     }
   }
   if (RecordRecTy *RecTy2 = dyn_cast<RecordRecTy>(T2)) {
     // See if T1 inherits from a type T2 also inherits from
-    const std::vector<Record *> &T2SuperClasses =
-      RecTy2->getRecord()->getSuperClasses();
-    for (std::vector<Record *>::const_iterator i = T2SuperClasses.begin(),
-          iend = T2SuperClasses.end();
-        i != iend;
-        ++i) {
-      RecordRecTy *SuperRecTy2 = RecordRecTy::get(*i);
+    for (Record *SuperRec2 : RecTy2->getRecord()->getSuperClasses()) {
+      RecordRecTy *SuperRecTy2 = RecordRecTy::get(SuperRec2);
       RecTy *NewType2 = resolveTypes(T1, SuperRecTy2);
-      if (NewType2) {
-        if (NewType2 != SuperRecTy2) {
-          delete SuperRecTy2;
-        }
+      if (NewType2)
         return NewType2;
-      }
     }
   }
   return nullptr;
@@ -461,16 +442,13 @@ static void
 ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef<Init *> Range) {
   ID.AddInteger(Range.size());
 
-  for (ArrayRef<Init *>::iterator i = Range.begin(),
-         iend = Range.end();
-       i != iend;
-       ++i)
-    ID.AddPointer(*i);
+  for (Init *I : Range)
+    ID.AddPointer(I);
 }
 
 BitsInit *BitsInit::get(ArrayRef<Init *> Range) {
-  typedef FoldingSet<BitsInit> Pool;
-  static Pool ThePool;  
+  static FoldingSet<BitsInit> ThePool;
+  static std::vector<std::unique_ptr<BitsInit>> TheActualPool;
 
   FoldingSetNodeID ID;
   ProfileBitsInit(ID, Range);
@@ -481,7 +459,7 @@ BitsInit *BitsInit::get(ArrayRef<Init *> Range) {
 
   BitsInit *I = new BitsInit(Range);
   ThePool.InsertNode(I, IP);
-
+  TheActualPool.push_back(std::unique_ptr<BitsInit>(I));
   return I;
 }
 
@@ -517,7 +495,7 @@ std::string BitsInit::getAsString() const {
 // bits initializer will resolve into VarBitInit to keep the field name and bit
 // number used in targets with fixed insn length.
 static Init *fixBitInit(const RecordVal *RV, Init *Before, Init *After) {
-  if (RV || After != UnsetInit::get())
+  if (RV || !isa<UnsetInit>(After))
     return After;
   return Before;
 }
@@ -570,27 +548,12 @@ Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const {
   return const_cast<BitsInit *>(this);
 }
 
-namespace {
-  template<typename T>
-  class Pool : public T {
-  public:
-    ~Pool();
-  };
-  template<typename T>
-  Pool<T>::~Pool() {
-    for (typename T::iterator I = this->begin(), E = this->end(); I != E; ++I) {
-      typename T::value_type &Item = *I;
-      delete Item.second;
-    }
-  }
-}
-
 IntInit *IntInit::get(int64_t V) {
-  static Pool<DenseMap<int64_t, IntInit *> > ThePool;
+  static DenseMap<int64_t, std::unique_ptr<IntInit>> ThePool;
 
-  IntInit *&I = ThePool[V];
-  if (!I) I = new IntInit(V);
-  return I;
+  std::unique_ptr<IntInit> &I = ThePool[V];
+  if (!I) I.reset(new IntInit(V));
+  return I.get();
 }
 
 std::string IntInit::getAsString() const {
@@ -613,11 +576,11 @@ IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
 void StringInit::anchor() { }
 
 StringInit *StringInit::get(StringRef V) {
-  static Pool<StringMap<StringInit *> > ThePool;
+  static StringMap<std::unique_ptr<StringInit>> ThePool;
 
-  StringInit *&I = ThePool[V];
-  if (!I) I = new StringInit(V);
-  return I;
+  std::unique_ptr<StringInit> &I = ThePool[V];
+  if (!I) I.reset(new StringInit(V));
+  return I.get();
 }
 
 static void ProfileListInit(FoldingSetNodeID &ID,
@@ -626,16 +589,12 @@ static void ProfileListInit(FoldingSetNodeID &ID,
   ID.AddInteger(Range.size());
   ID.AddPointer(EltTy);
 
-  for (ArrayRef<Init *>::iterator i = Range.begin(),
-         iend = Range.end();
-       i != iend;
-       ++i)
-    ID.AddPointer(*i);
+  for (Init *I : Range)
+    ID.AddPointer(I);
 }
 
 ListInit *ListInit::get(ArrayRef<Init *> Range, RecTy *EltTy) {
-  typedef FoldingSet<ListInit> Pool;
-  static Pool ThePool;
+  static FoldingSet<ListInit> ThePool;
   static std::vector<std::unique_ptr<ListInit>> TheActualPool;
 
   FoldingSetNodeID ID;
@@ -652,9 +611,7 @@ ListInit *ListInit::get(ArrayRef<Init *> Range, RecTy *EltTy) {
 }
 
 void ListInit::Profile(FoldingSetNodeID &ID) const {
-  ListRecTy *ListType = dyn_cast<ListRecTy>(getType());
-  assert(ListType && "Bad type for ListInit!");
-  RecTy *EltTy = ListType->getElementType();
+  RecTy *EltTy = cast<ListRecTy>(getType())->getElementType();
 
   ProfileListInit(ID, Values, EltTy);
 }
@@ -731,14 +688,10 @@ Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV,
   }
 
   if (Resolved != this) {
-    TypedInit *Typed = dyn_cast<TypedInit>(Resolved);
-    assert(Typed && "Expected typed init for list reference");
-    if (Typed) {
-      Init *New = Typed->resolveListElementReference(R, IRV, Elt);
-      if (New)
-        return New;
-      return VarListElementInit::get(Typed, Elt);
-    }
+    TypedInit *Typed = cast<TypedInit>(Resolved);
+    if (Init *New = Typed->resolveListElementReference(R, IRV, Elt))
+      return New;
+    return VarListElementInit::get(Typed, Elt);
   }
 
   return nullptr;
@@ -752,13 +705,13 @@ Init *OpInit::getBit(unsigned Bit) const {
 
 UnOpInit *UnOpInit::get(UnaryOp opc, Init *lhs, RecTy *Type) {
   typedef std::pair<std::pair<unsigned, Init *>, RecTy *> Key;
-  static Pool<DenseMap<Key, UnOpInit *> > ThePool;
+  static DenseMap<Key, std::unique_ptr<UnOpInit>> ThePool;
 
   Key TheKey(std::make_pair(std::make_pair(opc, lhs), Type));
 
-  UnOpInit *&I = ThePool[TheKey];
-  if (!I) I = new UnOpInit(opc, lhs, Type);
-  return I;
+  std::unique_ptr<UnOpInit> &I = ThePool[TheKey];
+  if (!I) I.reset(new UnOpInit(opc, lhs, Type));
+  return I.get();
 }
 
 Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
@@ -787,7 +740,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
 
           Init *TemplateArgName = QualifyName(*CurRec, CurMultiClass, Name,
                                               ":");
-      
+
           if (CurRec->isTemplateArg(TemplateArgName)) {
             const RecordVal *RV = CurRec->getValue(TemplateArgName);
             assert(RV && "Template arg doesn't exist??");
@@ -800,7 +753,8 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
         }
 
         if (CurMultiClass) {
-          Init *MCName = QualifyName(CurMultiClass->Rec, CurMultiClass, Name, "::");
+          Init *MCName = QualifyName(CurMultiClass->Rec, CurMultiClass, Name,
+                                     "::");
 
           if (CurMultiClass->Rec.isTemplateArg(MCName)) {
             const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
@@ -824,40 +778,25 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
   }
   case HEAD: {
     if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
-      assert(LHSl->getSize() != 0 && "Empty list in car");
+      assert(!LHSl->empty() && "Empty list in head");
       return LHSl->getElement(0);
     }
     break;
   }
   case TAIL: {
     if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
-      assert(LHSl->getSize() != 0 && "Empty list in cdr");
+      assert(!LHSl->empty() && "Empty list in tail");
       // Note the +1.  We can't just pass the result of getValues()
       // directly.
-      ArrayRef<Init *>::iterator begin = LHSl->getValues().begin()+1;
-      ArrayRef<Init *>::iterator end   = LHSl->getValues().end();
-      ListInit *Result =
-        ListInit::get(ArrayRef<Init *>(begin, end - begin),
-                      LHSl->getType());
-      return Result;
+      return ListInit::get(LHSl->getValues().slice(1), LHSl->getType());
     }
     break;
   }
   case EMPTY: {
-    if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
-      if (LHSl->getSize() == 0) {
-        return IntInit::get(1);
-      } else {
-        return IntInit::get(0);
-      }
-    }
-    if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) {
-      if (LHSs->getValue().empty()) {
-        return IntInit::get(1);
-      } else {
-        return IntInit::get(0);
-      }
-    }
+    if (ListInit *LHSl = dyn_cast<ListInit>(LHS))
+      return IntInit::get(LHSl->empty());
+    if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
+      return IntInit::get(LHSs->getValue().empty());
 
     break;
   }
@@ -891,14 +830,14 @@ BinOpInit *BinOpInit::get(BinaryOp opc, Init *lhs,
     RecTy *
     > Key;
 
-  static Pool<DenseMap<Key, BinOpInit *> > ThePool;
+  static DenseMap<Key, std::unique_ptr<BinOpInit>> ThePool;
 
   Key TheKey(std::make_pair(std::make_pair(std::make_pair(opc, lhs), rhs),
                             Type));
 
-  BinOpInit *&I = ThePool[TheKey];
-  if (!I) I = new BinOpInit(opc, lhs, rhs, Type);
-  return I;
+  std::unique_ptr<BinOpInit> &I = ThePool[TheKey];
+  if (!I) I.reset(new BinOpInit(opc, lhs, rhs, Type));
+  return I.get();
 }
 
 Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
@@ -933,7 +872,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
       Args.insert(Args.end(), LHSs->begin(), LHSs->end());
       Args.insert(Args.end(), RHSs->begin(), RHSs->end());
       return ListInit::get(
-          Args, static_cast<ListRecTy *>(LHSs->getType())->getElementType());
+          Args, cast<ListRecTy>(LHSs->getType())->getElementType());
     }
     break;
   }
@@ -1017,9 +956,8 @@ std::string BinOpInit::getAsString() const {
   return Result + "(" + LHS->getAsString() + ", " + RHS->getAsString() + ")";
 }
 
-TernOpInit *TernOpInit::get(TernaryOp opc, Init *lhs,
-                                  Init *mhs, Init *rhs,
-                                  RecTy *Type) {
+TernOpInit *TernOpInit::get(TernaryOp opc, Init *lhs, Init *mhs, Init *rhs,
+                            RecTy *Type) {
   typedef std::pair<
     std::pair<
       std::pair<std::pair<unsigned, RecTy *>, Init *>,
@@ -1028,8 +966,7 @@ TernOpInit *TernOpInit::get(TernaryOp opc, Init *lhs,
     Init *
     > Key;
 
-  typedef DenseMap<Key, TernOpInit *> Pool;
-  static Pool ThePool;
+  static DenseMap<Key, std::unique_ptr<TernOpInit>> ThePool;
 
   Key TheKey(std::make_pair(std::make_pair(std::make_pair(std::make_pair(opc,
                                                                          Type),
@@ -1037,9 +974,9 @@ TernOpInit *TernOpInit::get(TernaryOp opc, Init *lhs,
                                            mhs),
                             rhs));
 
-  TernOpInit *&I = ThePool[TheKey];
-  if (!I) I = new TernOpInit(opc, lhs, mhs, rhs, Type);
-  return I;
+  std::unique_ptr<TernOpInit> &I = ThePool[TheKey];
+  if (!I) I.reset(new TernOpInit(opc, lhs, mhs, rhs, Type));
+  return I.get();
 }
 
 static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
@@ -1048,28 +985,19 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
 static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
                                RecTy *Type, Record *CurRec,
                                MultiClass *CurMultiClass) {
-  std::vector<Init *> NewOperands;
-
-  TypedInit *TArg = dyn_cast<TypedInit>(Arg);
-
   // If this is a dag, recurse
-  if (TArg && TArg->getType()->getAsString() == "dag") {
-    Init *Result = ForeachHelper(LHS, Arg, RHSo, Type,
-                                 CurRec, CurMultiClass);
-    return Result;
-  }
+  if (auto *TArg = dyn_cast<TypedInit>(Arg))
+    if (TArg->getType()->getAsString() == "dag")
+      return ForeachHelper(LHS, Arg, RHSo, Type, CurRec, CurMultiClass);
 
+  std::vector<Init *> NewOperands;
   for (int i = 0; i < RHSo->getNumOperands(); ++i) {
-    OpInit *RHSoo = dyn_cast<OpInit>(RHSo->getOperand(i));
-
-    if (RHSoo) {
-      Init *Result = EvaluateOperation(RHSoo, LHS, Arg,
-                                       Type, CurRec, CurMultiClass);
-      if (Result) {
+    if (auto *RHSoo = dyn_cast<OpInit>(RHSo->getOperand(i))) {
+      if (Init *Result = EvaluateOperation(RHSoo, LHS, Arg,
+                                           Type, CurRec, CurMultiClass))
         NewOperands.push_back(Result);
-      } else {
+      else
         NewOperands.push_back(Arg);
-      }
     } else if (LHS->getAsString() == RHSo->getOperand(i)->getAsString()) {
       NewOperands.push_back(Arg);
     } else {
@@ -1090,9 +1018,8 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
 
   OpInit *RHSo = dyn_cast<OpInit>(RHS);
 
-  if (!RHSo) {
+  if (!RHSo)
     PrintFatalError(CurRec->getLoc(), "!foreach requires an operator\n");
-  }
 
   TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
 
@@ -1104,9 +1031,8 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
       Init *Val = MHSd->getOperator();
       Init *Result = EvaluateOperation(RHSo, LHS, Val,
                                        Type, CurRec, CurMultiClass);
-      if (Result) {
+      if (Result)
         Val = Result;
-      }
 
       std::vector<std::pair<Init *, std::string> > args;
       for (unsigned int i = 0; i < MHSd->getNumArgs(); ++i) {
@@ -1118,9 +1044,8 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
         // Process args
         Init *Result = EvaluateOperation(RHSo, LHS, Arg, Type,
                                          CurRec, CurMultiClass);
-        if (Result) {
+        if (Result)
           Arg = Result;
-        }
 
         // TODO: Process arg names
         args.push_back(std::make_pair(Arg, ArgName));
@@ -1132,26 +1057,21 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
       std::vector<Init *> NewOperands;
       std::vector<Init *> NewList(MHSl->begin(), MHSl->end());
 
-      for (std::vector<Init *>::iterator li = NewList.begin(),
-             liend = NewList.end();
-           li != liend;
-           ++li) {
-        Init *Item = *li;
+      for (Init *&Item : NewList) {
         NewOperands.clear();
         for(int i = 0; i < RHSo->getNumOperands(); ++i) {
           // First, replace the foreach variable with the list item
-          if (LHS->getAsString() == RHSo->getOperand(i)->getAsString()) {
+          if (LHS->getAsString() == RHSo->getOperand(i)->getAsString())
             NewOperands.push_back(Item);
-          } else {
+          else
             NewOperands.push_back(RHSo->getOperand(i));
-          }
         }
 
         // Now run the operator and use its result as the new list item
         const OpInit *NewOp = RHSo->clone(NewOperands);
         Init *NewItem = NewOp->Fold(CurRec, CurMultiClass);
         if (NewItem != NewOp)
-          *li = NewItem;
+          Item = NewItem;
       }
       return ListInit::get(NewList, MHSl->getType());
     }
@@ -1174,48 +1094,40 @@ Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
     VarInit *RHSv = dyn_cast<VarInit>(RHS);
     StringInit *RHSs = dyn_cast<StringInit>(RHS);
 
-    if ((LHSd && MHSd && RHSd)
-        || (LHSv && MHSv && RHSv)
-        || (LHSs && MHSs && RHSs)) {
-      if (RHSd) {
-        Record *Val = RHSd->getDef();
-        if (LHSd->getAsString() == RHSd->getAsString()) {
-          Val = MHSd->getDef();
-        }
-        return DefInit::get(Val);
-      }
-      if (RHSv) {
-        std::string Val = RHSv->getName();
-        if (LHSv->getAsString() == RHSv->getAsString()) {
-          Val = MHSv->getName();
-        }
-        return VarInit::get(Val, getType());
+    if (LHSd && MHSd && RHSd) {
+      Record *Val = RHSd->getDef();
+      if (LHSd->getAsString() == RHSd->getAsString())
+        Val = MHSd->getDef();
+      return DefInit::get(Val);
+    }
+    if (LHSv && MHSv && RHSv) {
+      std::string Val = RHSv->getName();
+      if (LHSv->getAsString() == RHSv->getAsString())
+        Val = MHSv->getName();
+      return VarInit::get(Val, getType());
+    }
+    if (LHSs && MHSs && RHSs) {
+      std::string Val = RHSs->getValue();
+
+      std::string::size_type found;
+      std::string::size_type idx = 0;
+      while (true) {
+        found = Val.find(LHSs->getValue(), idx);
+        if (found == std::string::npos)
+          break;
+        Val.replace(found, LHSs->getValue().size(), MHSs->getValue());
+        idx = found + MHSs->getValue().size();
       }
-      if (RHSs) {
-        std::string Val = RHSs->getValue();
-
-        std::string::size_type found;
-        std::string::size_type idx = 0;
-        do {
-          found = Val.find(LHSs->getValue(), idx);
-          if (found != std::string::npos) {
-            Val.replace(found, LHSs->getValue().size(), MHSs->getValue());
-          }
-          idx = found +  MHSs->getValue().size();
-        } while (found != std::string::npos);
 
-        return StringInit::get(Val);
-      }
+      return StringInit::get(Val);
     }
     break;
   }
 
   case FOREACH: {
-    Init *Result = ForeachHelper(LHS, MHS, RHS, getType(),
-                                 CurRec, CurMultiClass);
-    if (Result) {
+    if (Init *Result = ForeachHelper(LHS, MHS, RHS, getType(),
+                                     CurRec, CurMultiClass))
       return Result;
-    }
     break;
   }
 
@@ -1224,11 +1136,9 @@ Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
     if (Init *I = LHS->convertInitializerTo(IntRecTy::get()))
       LHSi = dyn_cast<IntInit>(I);
     if (LHSi) {
-      if (LHSi->getValue()) {
+      if (LHSi->getValue())
         return MHS;
-      } else {
-        return RHS;
-      }
+      return RHS;
     }
     break;
   }
@@ -1251,11 +1161,10 @@ Init *TernOpInit::resolveReferences(Record &R,
         Init *mhs = MHS->resolveReferences(R, RV);
         return (TernOpInit::get(getOpcode(), lhs, mhs,
                                 RHS, getType()))->Fold(&R, nullptr);
-      } else {
-        Init *rhs = RHS->resolveReferences(R, RV);
-        return (TernOpInit::get(getOpcode(), lhs, MHS,
-                                rhs, getType()))->Fold(&R, nullptr);
       }
+      Init *rhs = RHS->resolveReferences(R, RV);
+      return (TernOpInit::get(getOpcode(), lhs, MHS,
+                              rhs, getType()))->Fold(&R, nullptr);
     }
   }
 
@@ -1274,9 +1183,9 @@ std::string TernOpInit::getAsString() const {
   case SUBST: Result = "!subst"; break;
   case FOREACH: Result = "!foreach"; break;
   case IF: Result = "!if"; break;
- }
-  return Result + "(" + LHS->getAsString() + ", " + MHS->getAsString() + ", "
-    + RHS->getAsString() + ")";
+  }
+  return Result + "(" + LHS->getAsString() + ", " + MHS->getAsString() + ", " +
+         RHS->getAsString() + ")";
 }
 
 RecTy *TypedInit::getFieldType(const std::string &FieldName) const {
@@ -1326,18 +1235,17 @@ VarInit *VarInit::get(const std::string &VN, RecTy *T) {
 
 VarInit *VarInit::get(Init *VN, RecTy *T) {
   typedef std::pair<RecTy *, Init *> Key;
-  static Pool<DenseMap<Key, VarInit *> > ThePool;
+  static DenseMap<Key, std::unique_ptr<VarInit>> ThePool;
 
   Key TheKey(std::make_pair(T, VN));
 
-  VarInit *&I = ThePool[TheKey];
-  if (!I) I = new VarInit(VN, T);
-  return I;
+  std::unique_ptr<VarInit> &I = ThePool[TheKey];
+  if (!I) I.reset(new VarInit(VN, T));
+  return I.get();
 }
 
 const std::string &VarInit::getName() const {
-  StringInit *NameString = dyn_cast<StringInit>(getNameInit());
-  assert(NameString && "VarInit name is not a string!");
+  StringInit *NameString = cast<StringInit>(getNameInit());
   return NameString->getValue();
 }
 
@@ -1356,11 +1264,8 @@ Init *VarInit::resolveListElementReference(Record &R,
   RecordVal *RV = R.getValue(getNameInit());
   assert(RV && "Reference to a non-existent variable?");
   ListInit *LI = dyn_cast<ListInit>(RV->getValue());
-  if (!LI) {
-    TypedInit *VI = dyn_cast<TypedInit>(RV->getValue());
-    assert(VI && "Invalid list element!");
-    return VarListElementInit::get(VI, Elt);
-  }
+  if (!LI)
+    return VarListElementInit::get(cast<TypedInit>(RV->getValue()), Elt);
 
   if (Elt >= LI->getSize())
     return nullptr;  // Out of range reference.
@@ -1391,8 +1296,7 @@ Init *VarInit::getFieldInit(Record &R, const RecordVal *RV,
       assert(TheInit != this && "Infinite loop detected!");
       if (Init *I = TheInit->getFieldInit(R, RV, FieldName))
         return I;
-      else
-        return nullptr;
+      return nullptr;
     }
   return nullptr;
 }
@@ -1411,19 +1315,17 @@ Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const {
 
 VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) {
   typedef std::pair<TypedInit *, unsigned> Key;
-  typedef DenseMap<Key, VarBitInit *> Pool;
-
-  static Pool ThePool;
+  static DenseMap<Key, std::unique_ptr<VarBitInit>> ThePool;
 
   Key TheKey(std::make_pair(T, B));
 
-  VarBitInit *&I = ThePool[TheKey];
-  if (!I) I = new VarBitInit(T, B);
-  return I;
+  std::unique_ptr<VarBitInit> &I = ThePool[TheKey];
+  if (!I) I.reset(new VarBitInit(T, B));
+  return I.get();
 }
 
 std::string VarBitInit::getAsString() const {
-   return TI->getAsString() + "{" + utostr(Bit) + "}";
+  return TI->getAsString() + "{" + utostr(Bit) + "}";
 }
 
 Init *VarBitInit::resolveReferences(Record &R, const RecordVal *RV) const {
@@ -1437,15 +1339,13 @@ Init *VarBitInit::resolveReferences(Record &R, const RecordVal *RV) const {
 VarListElementInit *VarListElementInit::get(TypedInit *T,
                                             unsigned E) {
   typedef std::pair<TypedInit *, unsigned> Key;
-  typedef DenseMap<Key, VarListElementInit *> Pool;
-
-  static Pool ThePool;
+  static DenseMap<Key, std::unique_ptr<VarListElementInit>> ThePool;
 
   Key TheKey(std::make_pair(T, E));
 
-  VarListElementInit *&I = ThePool[TheKey];
-  if (!I) I = new VarListElementInit(T, E);
-  return I;
+  std::unique_ptr<VarListElementInit> &I = ThePool[TheKey];
+  if (!I) I.reset(new VarListElementInit(T, E));
+  return I.get();
 }
 
 std::string VarListElementInit::getAsString() const {
@@ -1469,17 +1369,15 @@ Init *VarListElementInit::getBit(unsigned Bit) const {
 Init *VarListElementInit:: resolveListElementReference(Record &R,
                                                        const RecordVal *RV,
                                                        unsigned Elt) const {
-  Init *Result = TI->resolveListElementReference(R, RV, Element);
-  
-  if (Result) {
+  if (Init *Result = TI->resolveListElementReference(R, RV, Element)) {
     if (TypedInit *TInit = dyn_cast<TypedInit>(Result)) {
       Init *Result2 = TInit->resolveListElementReference(R, RV, Elt);
       if (Result2) return Result2;
-      return new VarListElementInit(TInit, Elt);
+      return VarListElementInit::get(TInit, Elt);
     }
     return Result;
   }
- 
+
   return nullptr;
 }
 
@@ -1505,14 +1403,13 @@ std::string DefInit::getAsString() const {
 
 FieldInit *FieldInit::get(Init *R, const std::string &FN) {
   typedef std::pair<Init *, TableGenStringKey> Key;
-  typedef DenseMap<Key, FieldInit *> Pool;
-  static Pool ThePool;  
+  static DenseMap<Key, std::unique_ptr<FieldInit>> ThePool;
 
   Key TheKey(std::make_pair(R, FN));
 
-  FieldInit *&I = ThePool[TheKey];
-  if (!I) I = new FieldInit(R, FN);
-  return I;
+  std::unique_ptr<FieldInit> &I = ThePool[TheKey];
+  if (!I) I.reset(new FieldInit(R, FN));
+  return I.get();
 }
 
 Init *FieldInit::getBit(unsigned Bit) const {
@@ -1540,15 +1437,13 @@ Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV,
 Init *FieldInit::resolveReferences(Record &R, const RecordVal *RV) const {
   Init *NewRec = RV ? Rec->resolveReferences(R, RV) : Rec;
 
-  Init *BitsVal = NewRec->getFieldInit(R, RV, FieldName);
-  if (BitsVal) {
+  if (Init *BitsVal = NewRec->getFieldInit(R, RV, FieldName)) {
     Init *BVR = BitsVal->resolveReferences(R, RV);
     return BVR->isComplete() ? BVR : const_cast<FieldInit *>(this);
   }
 
-  if (NewRec != Rec) {
+  if (NewRec != Rec)
     return FieldInit::get(NewRec, FieldName);
-  }
   return const_cast<FieldInit *>(this);
 }
 
@@ -1572,8 +1467,8 @@ DagInit *
 DagInit::get(Init *V, const std::string &VN,
              ArrayRef<Init *> ArgRange,
              ArrayRef<std::string> NameRange) {
-  typedef FoldingSet<DagInit> Pool;
-  static Pool ThePool;  
+  static FoldingSet<DagInit> ThePool;
+  static std::vector<std::unique_ptr<DagInit>> TheActualPool;
 
   FoldingSetNodeID ID;
   ProfileDagInit(ID, V, VN, ArgRange, NameRange);
@@ -1584,24 +1479,19 @@ DagInit::get(Init *V, const std::string &VN,
 
   DagInit *I = new DagInit(V, VN, ArgRange, NameRange);
   ThePool.InsertNode(I, IP);
-
+  TheActualPool.push_back(std::unique_ptr<DagInit>(I));
   return I;
 }
 
 DagInit *
 DagInit::get(Init *V, const std::string &VN,
              const std::vector<std::pair<Init*, std::string> > &args) {
-  typedef std::pair<Init*, std::string> PairType;
-
   std::vector<Init *> Args;
   std::vector<std::string> Names;
 
-  for (std::vector<PairType>::const_iterator i = args.begin(),
-         iend = args.end();
-       i != iend;
-       ++i) {
-    Args.push_back(i->first);
-    Names.push_back(i->second);
+  for (const auto &Arg : args) {
+    Args.push_back(Arg.first);
+    Names.push_back(Arg.second);
   }
 
   return DagInit::get(V, VN, Args, Names);
@@ -1629,7 +1519,7 @@ std::string DagInit::getAsString() const {
   std::string Result = "(" + Val->getAsString();
   if (!ValName.empty())
     Result += ":" + ValName;
-  if (Args.size()) {
+  if (!Args.empty()) {
     Result += " " + Args[0]->getAsString();
     if (!ArgNames[0].empty()) Result += ":$" + ArgNames[0];
     for (unsigned i = 1, e = Args.size(); i != e; ++i) {
@@ -1658,9 +1548,7 @@ RecordVal::RecordVal(const std::string &N, RecTy *T, unsigned P)
 }
 
 const std::string &RecordVal::getName() const {
-  StringInit *NameString = dyn_cast<StringInit>(Name);
-  assert(NameString && "RecordVal name is not a string!");
-  return NameString->getValue();
+  return cast<StringInit>(Name)->getValue();
 }
 
 void RecordVal::dump() const { errs() << *this; }
@@ -1688,23 +1576,24 @@ void Record::init() {
 
 void Record::checkName() {
   // Ensure the record name has string type.
-  const TypedInit *TypedName = dyn_cast<const TypedInit>(Name);
-  assert(TypedName && "Record name is not typed!");
+  const TypedInit *TypedName = cast<const TypedInit>(Name);
   RecTy *Type = TypedName->getType();
   if (!isa<StringRecTy>(Type))
     PrintFatalError(getLoc(), "Record name is not a string!");
 }
 
 DefInit *Record::getDefInit() {
-  if (!TheInit)
-    TheInit = new DefInit(this, new RecordRecTy(this));
-  return TheInit;
+  static DenseMap<Record *, std::unique_ptr<DefInit>> ThePool;
+  if (TheInit)
+    return TheInit;
+
+  std::unique_ptr<DefInit> &I = ThePool[this];
+  if (!I) I.reset(new DefInit(this, new RecordRecTy(this)));
+  return I.get();
 }
 
 const std::string &Record::getName() const {
-  const StringInit *NameString = dyn_cast<StringInit>(Name);
-  assert(NameString && "Record name is not a string!");
-  return NameString->getValue();
+  return cast<StringInit>(Name)->getValue();
 }
 
 void Record::setName(Init *NewName) {
@@ -1736,14 +1625,13 @@ void Record::resolveReferencesTo(const RecordVal *RV) {
       continue;
     if (Init *V = Values[i].getValue())
       if (Values[i].setValue(V->resolveReferences(*this, RV)))
-        PrintFatalError(getLoc(), "Invalid value is found when setting '"
-                      + Values[i].getNameInitAsString()
-                      + "' after resolving references"
-                      + (RV ? " against '" + RV->getNameInitAsString()
-                              + "' of ("
-                              + RV->getValue()->getAsUnquotedString() + ")"
-                            : "")
-                      + "\n");
+        PrintFatalError(getLoc(), "Invalid value is found when setting '" +
+                        Values[i].getNameInitAsString() +
+                        "' after resolving references" +
+                        (RV ? " against '" + RV->getNameInitAsString() +
+                              "' of (" + RV->getValue()->getAsUnquotedString() +
+                              ")"
+                            : "") + "\n");
   }
   Init *OldName = getNameInit();
   Init *NewName = Name->resolveReferences(*this, RV);
@@ -1859,12 +1747,11 @@ Record::getValueAsListOfDefs(StringRef FieldName) const {
   ListInit *List = getValueAsListInit(FieldName);
   std::vector<Record*> Defs;
   for (unsigned i = 0; i < List->getSize(); i++) {
-    if (DefInit *DI = dyn_cast<DefInit>(List->getElement(i))) {
+    if (DefInit *DI = dyn_cast<DefInit>(List->getElement(i)))
       Defs.push_back(DI->getDef());
-    } else {
+    else
       PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
         FieldName + "' list is not entirely DefInit!");
-    }
   }
   return Defs;
 }
@@ -1894,12 +1781,11 @@ Record::getValueAsListOfInts(StringRef FieldName) const {
   ListInit *List = getValueAsListInit(FieldName);
   std::vector<int64_t> Ints;
   for (unsigned i = 0; i < List->getSize(); i++) {
-    if (IntInit *II = dyn_cast<IntInit>(List->getElement(i))) {
+    if (IntInit *II = dyn_cast<IntInit>(List->getElement(i)))
       Ints.push_back(II->getValue());
-    } else {
+    else
       PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
         FieldName + "' does not have a list of ints initializer!");
-    }
   }
   return Ints;
 }
@@ -1913,12 +1799,11 @@ Record::getValueAsListOfStrings(StringRef FieldName) const {
   ListInit *List = getValueAsListInit(FieldName);
   std::vector<std::string> Strings;
   for (unsigned i = 0; i < List->getSize(); i++) {
-    if (StringInit *II = dyn_cast<StringInit>(List->getElement(i))) {
+    if (StringInit *II = dyn_cast<StringInit>(List->getElement(i)))
       Strings.push_back(II->getValue());
-    } else {
+    else
       PrintFatalError(getLoc(), "Record `" + getName() + "', field `" +
         FieldName + "' does not have a list of strings initializer!");
-    }
   }
   return Strings;
 }
@@ -1961,7 +1846,7 @@ bool Record::getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const {
     PrintFatalError(getLoc(), "Record `" + getName() +
       "' does not have a field named `" + FieldName.str() + "'!\n");
 
-  if (R->getValue() == UnsetInit::get()) {
+  if (isa<UnsetInit>(R->getValue())) {
     Unset = true;
     return false;
   }
@@ -1994,12 +1879,8 @@ void MultiClass::dump() const {
   Rec.dump();
 
   errs() << "Defs:\n";
-  for (RecordVector::const_iterator r = DefPrototypes.begin(),
-         rend = DefPrototypes.end();
-       r != rend;
-       ++r) {
-    (*r)->dump();
-  }
+  for (const auto &Proto : DefPrototypes)
+    Proto->dump();
 }
 
 
@@ -2007,13 +1888,11 @@ void RecordKeeper::dump() const { errs() << *this; }
 
 raw_ostream &llvm::operator<<(raw_ostream &OS, const RecordKeeper &RK) {
   OS << "------------- Classes -----------------\n";
-  const auto &Classes = RK.getClasses();
-  for (const auto &C : Classes)
+  for (const auto &C : RK.getClasses())
     OS << "class " << *C.second;
 
   OS << "------------- Defs -----------------\n";
-  const auto &Defs = RK.getDefs();
-  for (const auto &D : Defs)
+  for (const auto &D : RK.getDefs())
     OS << "def " << *D.second;
   return OS;
 }
@@ -2040,7 +1919,7 @@ RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
 /// to CurRec's name.
 Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
                         Init *Name, const std::string &Scoper) {
-  RecTy *Type = dyn_cast<TypedInit>(Name)->getType();
+  RecTy *Type = cast<TypedInit>(Name)->getType();
 
   BinOpInit *NewName =
     BinOpInit::get(BinOpInit::STRCONCAT, 
diff --git a/contrib/llvm/lib/TableGen/SetTheory.cpp b/contrib/llvm/lib/TableGen/SetTheory.cpp
index c99c2ba..92f5b2d 100644
--- a/contrib/llvm/lib/TableGen/SetTheory.cpp
+++ b/contrib/llvm/lib/TableGen/SetTheory.cpp
@@ -12,10 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/TableGen/SetTheory.h"
 #include "llvm/Support/Format.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/SetTheory.h"
 
 using namespace llvm;
 
@@ -245,28 +245,28 @@ void SetTheory::Expander::anchor() {}
 
 
 SetTheory::SetTheory() {
-  addOperator("add", new AddOp);
-  addOperator("sub", new SubOp);
-  addOperator("and", new AndOp);
-  addOperator("shl", new ShlOp);
-  addOperator("trunc", new TruncOp);
-  addOperator("rotl", new RotOp(false));
-  addOperator("rotr", new RotOp(true));
-  addOperator("decimate", new DecimateOp);
-  addOperator("interleave", new InterleaveOp);
-  addOperator("sequence", new SequenceOp);
+  addOperator("add", llvm::make_unique<AddOp>());
+  addOperator("sub", llvm::make_unique<SubOp>());
+  addOperator("and", llvm::make_unique<AndOp>());
+  addOperator("shl", llvm::make_unique<ShlOp>());
+  addOperator("trunc", llvm::make_unique<TruncOp>());
+  addOperator("rotl", llvm::make_unique<RotOp>(false));
+  addOperator("rotr", llvm::make_unique<RotOp>(true));
+  addOperator("decimate", llvm::make_unique<DecimateOp>());
+  addOperator("interleave", llvm::make_unique<InterleaveOp>());
+  addOperator("sequence", llvm::make_unique<SequenceOp>());
 }
 
-void SetTheory::addOperator(StringRef Name, Operator *Op) {
-  Operators[Name] = Op;
+void SetTheory::addOperator(StringRef Name, std::unique_ptr<Operator> Op) {
+  Operators[Name] = std::move(Op);
 }
 
-void SetTheory::addExpander(StringRef ClassName, Expander *E) {
-  Expanders[ClassName] = E;
+void SetTheory::addExpander(StringRef ClassName, std::unique_ptr<Expander> E) {
+  Expanders[ClassName] = std::move(E);
 }
 
 void SetTheory::addFieldExpander(StringRef ClassName, StringRef FieldName) {
-  addExpander(ClassName, new FieldExpander(FieldName));
+  addExpander(ClassName, llvm::make_unique<FieldExpander>(FieldName));
 }
 
 void SetTheory::evaluate(Init *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
@@ -289,10 +289,10 @@ void SetTheory::evaluate(Init *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
   DefInit *OpInit = dyn_cast<DefInit>(DagExpr->getOperator());
   if (!OpInit)
     PrintFatalError(Loc, "Bad set expression: " + Expr->getAsString());
-  Operator *Op = Operators.lookup(OpInit->getDef()->getName());
-  if (!Op)
+  auto I = Operators.find(OpInit->getDef()->getName());
+  if (I == Operators.end())
     PrintFatalError(Loc, "Unknown set operator: " + Expr->getAsString());
-  Op->apply(*this, DagExpr, Elts, Loc);
+  I->second->apply(*this, DagExpr, Elts, Loc);
 }
 
 const RecVec *SetTheory::expand(Record *Set) {
@@ -307,11 +307,12 @@ const RecVec *SetTheory::expand(Record *Set) {
     // Skip unnamed superclasses.
     if (!dyn_cast<StringInit>(SC[i]->getNameInit()))
       continue;
-    if (Expander *Exp = Expanders.lookup(SC[i]->getName())) {
+    auto I = Expanders.find(SC[i]->getName());
+    if (I != Expanders.end()) {
       // This breaks recursive definitions.
       RecVec &EltVec = Expansions[Set];
       RecSet Elts;
-      Exp->expand(*this, Set, Elts);
+      I->second->expand(*this, Set, Elts);
       EltVec.assign(Elts.begin(), Elts.end());
       return &EltVec;
     }
diff --git a/contrib/llvm/lib/TableGen/TGLexer.h b/contrib/llvm/lib/TableGen/TGLexer.h
index 1f750fc..cbc30be 100644
--- a/contrib/llvm/lib/TableGen/TGLexer.h
+++ b/contrib/llvm/lib/TableGen/TGLexer.h
@@ -87,8 +87,7 @@ private:
 
 public:
   TGLexer(SourceMgr &SrcMgr);
-  ~TGLexer() {}
-  
+
   tgtok::TokKind Lex() {
     return CurCode = LexToken();
   }
diff --git a/contrib/llvm/lib/TableGen/TGParser.cpp b/contrib/llvm/lib/TableGen/TGParser.cpp
index a438cb6..0e654f9 100644
--- a/contrib/llvm/lib/TableGen/TGParser.cpp
+++ b/contrib/llvm/lib/TableGen/TGParser.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "TGParser.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
@@ -50,12 +51,8 @@ void SubMultiClassReference::dump() const {
   MC->dump();
 
   errs() << "Template args:\n";
-  for (std::vector<Init *>::const_iterator i = TemplateArgs.begin(),
-         iend = TemplateArgs.end();
-       i != iend;
-       ++i) {
-    (*i)->dump();
-  }
+  for (Init *TA : TemplateArgs)
+    TA->dump();
 }
 
 } // end namespace llvm
@@ -87,8 +84,8 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
 
   RecordVal *RV = CurRec->getValue(ValName);
   if (!RV)
-    return Error(Loc, "Value '" + ValName->getAsUnquotedString()
-                 + "' unknown!");
+    return Error(Loc, "Value '" + ValName->getAsUnquotedString() +
+                 "' unknown!");
 
   // Do not allow assignments like 'X = X'.  This will just cause infinite loops
   // in the resolution machinery.
@@ -104,18 +101,16 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
   if (!BitList.empty()) {
     BitsInit *CurVal = dyn_cast<BitsInit>(RV->getValue());
     if (!CurVal)
-      return Error(Loc, "Value '" + ValName->getAsUnquotedString()
-                   + "' is not a bits type");
+      return Error(Loc, "Value '" + ValName->getAsUnquotedString() +
+                   "' is not a bits type");
 
     // Convert the incoming value to a bits type of the appropriate size...
     Init *BI = V->convertInitializerTo(BitsRecTy::get(BitList.size()));
-    if (!BI) {
+    if (!BI)
       return Error(Loc, "Initializer is not compatible with bit range");
-    }
 
     // We should have a BitsInit type now.
-    BitsInit *BInit = dyn_cast<BitsInit>(BI);
-    assert(BInit != nullptr);
+    BitsInit *BInit = cast<BitsInit>(BI);
 
     SmallVector<Init *, 16> NewBits(CurVal->getNumBits());
 
@@ -137,15 +132,13 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
 
   if (RV->setValue(V)) {
     std::string InitType = "";
-    if (BitsInit *BI = dyn_cast<BitsInit>(V)) {
+    if (BitsInit *BI = dyn_cast<BitsInit>(V))
       InitType = (Twine("' of type bit initializer with length ") +
                   Twine(BI->getNumBits())).str();
-    }
-    return Error(Loc, "Value '" + ValName->getAsUnquotedString() + "' of type '"
-                 + RV->getType()->getAsString() +
-                 "' is incompatible with initializer '" + V->getAsString()
-                 + InitType
-                 + "'");
+    return Error(Loc, "Value '" + ValName->getAsUnquotedString() +
+                 "' of type '" + RV->getType()->getAsString() +
+                 "' is incompatible with initializer '" + V->getAsString() +
+                 InitType + "'");
   }
   return false;
 }
@@ -184,9 +177,9 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
 
     } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
       return Error(SubClass.RefRange.Start,
-                   "Value not specified for template argument #"
-                   + utostr(i) + " (" + TArgs[i]->getAsUnquotedString()
-                   + ") of subclass '" + SC->getNameInitAsString() + "'!");
+                   "Value not specified for template argument #" +
+                   utostr(i) + " (" + TArgs[i]->getAsUnquotedString() +
+                   ") of subclass '" + SC->getNameInitAsString() + "'!");
     }
   }
 
@@ -216,27 +209,21 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
   MultiClass *SMC = SubMultiClass.MC;
   Record *CurRec = &CurMC->Rec;
 
-  const std::vector<RecordVal> &MCVals = CurRec->getValues();
-
   // Add all of the values in the subclass into the current class.
-  const std::vector<RecordVal> &SMCVals = SMC->Rec.getValues();
-  for (unsigned i = 0, e = SMCVals.size(); i != e; ++i)
-    if (AddValue(CurRec, SubMultiClass.RefRange.Start, SMCVals[i]))
+  for (const auto &SMCVal : SMC->Rec.getValues())
+    if (AddValue(CurRec, SubMultiClass.RefRange.Start, SMCVal))
       return true;
 
   unsigned newDefStart = CurMC->DefPrototypes.size();
 
   // Add all of the defs in the subclass into the current multiclass.
-  for (MultiClass::RecordVector::const_iterator i = SMC->DefPrototypes.begin(),
-         iend = SMC->DefPrototypes.end();
-       i != iend;
-       ++i) {
+  for (const std::unique_ptr<Record> &R : SMC->DefPrototypes) {
     // Clone the def and add it to the current multiclass
-    auto NewDef = make_unique<Record>(**i);
+    auto NewDef = make_unique<Record>(*R);
 
     // Add all of the values in the superclass into the current def.
-    for (unsigned i = 0, e = MCVals.size(); i != e; ++i)
-      if (AddValue(NewDef.get(), SubMultiClass.RefRange.Start, MCVals[i]))
+    for (const auto &MCVal : CurRec->getValues())
+      if (AddValue(NewDef.get(), SubMultiClass.RefRange.Start, MCVal))
         return true;
 
     CurMC->DefPrototypes.push_back(std::move(NewDef));
@@ -284,9 +271,9 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
       }
     } else if (!CurRec->getValue(SMCTArgs[i])->getValue()->isComplete()) {
       return Error(SubMultiClass.RefRange.Start,
-                   "Value not specified for template argument #"
-                   + utostr(i) + " (" + SMCTArgs[i]->getAsUnquotedString()
-                   + ") of subclass '" + SMC->Rec.getNameInitAsString() + "'!");
+                   "Value not specified for template argument #" +
+                   utostr(i) + " (" + SMCTArgs[i]->getAsUnquotedString() +
+                   ") of subclass '" + SMC->Rec.getNameInitAsString() + "'!");
     }
   }
 
@@ -385,8 +372,7 @@ static bool isObjectStart(tgtok::TokKind K) {
 /// GetNewAnonymousName - Generate a unique anonymous name that can be used as
 /// an identifier.
 std::string TGParser::GetNewAnonymousName() {
-  unsigned Tmp = AnonCounter++; // MSVC2012 ICEs without this.
-  return "anonymous_" + utostr(Tmp);
+  return "anonymous_" + utostr(AnonCounter++);
 }
 
 /// ParseObjectName - If an object name is specified, return it.  Otherwise,
@@ -591,13 +577,12 @@ bool TGParser::ParseRangePiece(std::vector<unsigned> &Ranges) {
   Lex.Lex();
 
   // Add to the range.
-  if (Start < End) {
+  if (Start < End)
     for (; Start <= End; ++Start)
       Ranges.push_back(Start);
-  } else {
+  else
     for (; Start >= End; --Start)
       Ranges.push_back(Start);
-  }
   return false;
 }
 
@@ -758,10 +743,8 @@ Init *TGParser::ParseIDValue(Record *CurRec,
   }
 
   // If this is in a foreach loop, make sure it's not a loop iterator
-  for (LoopVector::iterator i = Loops.begin(), iend = Loops.end();
-       i != iend;
-       ++i) {
-    VarInit *IterVar = dyn_cast<VarInit>(i->IterVar);
+  for (const auto &L : Loops) {
+    VarInit *IterVar = dyn_cast<VarInit>(L.IterVar);
     if (IterVar && IterVar->getName() == Name)
       return IterVar;
   }
@@ -776,7 +759,7 @@ Init *TGParser::ParseIDValue(Record *CurRec,
     Error(NameLoc, "Variable not defined: '" + Name + "'");
     return nullptr;
   }
-  
+
   return StringInit::get(Name);
 }
 
@@ -833,9 +816,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
     Init *LHS = ParseValue(CurRec);
     if (!LHS) return nullptr;
 
-    if (Code == UnOpInit::HEAD
-        || Code == UnOpInit::TAIL
-        || Code == UnOpInit::EMPTY) {
+    if (Code == UnOpInit::HEAD ||
+        Code == UnOpInit::TAIL ||
+        Code == UnOpInit::EMPTY) {
       ListInit *LHSl = dyn_cast<ListInit>(LHS);
       StringInit *LHSs = dyn_cast<StringInit>(LHS);
       TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
@@ -852,14 +835,13 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
         }
       }
 
-      if (Code == UnOpInit::HEAD
-          || Code == UnOpInit::TAIL) {
+      if (Code == UnOpInit::HEAD || Code == UnOpInit::TAIL) {
         if (!LHSl && !LHSt) {
           TokError("expected list type argument in unary operator");
           return nullptr;
         }
 
-        if (LHSl && LHSl->getSize() == 0) {
+        if (LHSl && LHSl->empty()) {
           TokError("empty list argument in unary operator");
           return nullptr;
         }
@@ -870,11 +852,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
             TokError("untyped list element in unary operator");
             return nullptr;
           }
-          if (Code == UnOpInit::HEAD) {
-            Type = Itemt->getType();
-          } else {
-            Type = ListRecTy::get(Itemt->getType());
-          }
+          Type = (Code == UnOpInit::HEAD) ? Itemt->getType()
+                                          : ListRecTy::get(Itemt->getType());
         } else {
           assert(LHSt && "expected list type argument in unary operator");
           ListRecTy *LType = dyn_cast<ListRecTy>(LHSt->getType());
@@ -882,11 +861,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
             TokError("expected list type argument in unary operator");
             return nullptr;
           }
-          if (Code == UnOpInit::HEAD) {
-            Type = LType->getElementType();
-          } else {
-            Type = LType;
-          }
+          Type = (Code == UnOpInit::HEAD) ? LType->getElementType() : LType;
         }
       }
     }
@@ -1256,11 +1231,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
       CurMultiClass->DefPrototypes.push_back(std::move(NewRecOwner));
 
       // Copy the template arguments for the multiclass into the def.
-      const std::vector<Init *> &TArgs =
-                                  CurMultiClass->Rec.getTemplateArgs();
-
-      for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
-        const RecordVal *RV = CurMultiClass->Rec.getValue(TArgs[i]);
+      for (Init *TArg : CurMultiClass->Rec.getTemplateArgs()) {
+        const RecordVal *RV = CurMultiClass->Rec.getValue(TArg);
         assert(RV && "Template arg doesn't exist?");
         NewRec->addValue(*RV);
       }
@@ -1384,10 +1356,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
 
     // Check elements
     RecTy *EltTy = nullptr;
-    for (std::vector<Init *>::iterator i = Vals.begin(), ie = Vals.end();
-         i != ie;
-         ++i) {
-      TypedInit *TArg = dyn_cast<TypedInit>(*i);
+    for (Init *V : Vals) {
+      TypedInit *TArg = dyn_cast<TypedInit>(V);
       if (!TArg) {
         TokError("Untyped list element");
         return nullptr;
@@ -1578,7 +1548,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
         Error(PasteLoc, "LHS of paste is not typed!");
         return nullptr;
       }
-  
+
       if (LHS->getType() != StringRecTy::get()) {
         LHS = UnOpInit::get(UnOpInit::CAST, LHS, StringRecTy::get());
       }
@@ -1593,7 +1563,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
         // These are all of the tokens that can begin an object body.
         // Some of these can also begin values but we disallow those cases
         // because they are unlikely to be useful.
-       
+
         // Trailing paste, concat with an empty string.
         RHS = StringInit::get("");
         break;
@@ -1609,7 +1579,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
         if (RHS->getType() != StringRecTy::get()) {
           RHS = UnOpInit::get(UnOpInit::CAST, RHS, StringRecTy::get());
         }
-  
+
         break;
       }
 
@@ -1676,7 +1646,7 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
   unsigned int ArgN = 0;
   if (ArgsRec && !EltTy) {
     const std::vector<Init *> &TArgs = ArgsRec->getTemplateArgs();
-    if (!TArgs.size()) {
+    if (TArgs.empty()) {
       TokError("template argument provided to non-template class");
       return std::vector<Init*>();
     }
@@ -1743,11 +1713,10 @@ Init *TGParser::ParseDeclaration(Record *CurRec,
   Lex.Lex();
 
   if (ParsingTemplateArgs) {
-    if (CurRec) {
+    if (CurRec)
       DeclName = QualifyName(*CurRec, CurMultiClass, DeclName, ":");
-    } else {
+    else
       assert(CurMultiClass);
-    }
     if (CurMultiClass)
       DeclName = QualifyName(CurMultiClass->Rec, CurMultiClass, DeclName,
                              "::");
@@ -2053,9 +2022,8 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
       return true;
 
     // Otherwise, a def inside a multiclass, add it to the multiclass.
-    for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size(); i != e; ++i)
-      if (CurMultiClass->DefPrototypes[i]->getNameInit()
-          == CurRec->getNameInit())
+    for (const auto &Proto : CurMultiClass->DefPrototypes)
+      if (Proto->getNameInit() == CurRec->getNameInit())
         return Error(DefLoc, "def '" + CurRec->getNameInitAsString() +
                      "' already defined in this multiclass!");
     CurMultiClass->DefPrototypes.push_back(std::move(CurRecOwner));
@@ -2074,20 +2042,16 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
 
   if (CurMultiClass) {
     // Copy the template arguments for the multiclass into the def.
-    const std::vector<Init *> &TArgs =
-                                CurMultiClass->Rec.getTemplateArgs();
-
-    for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
-      const RecordVal *RV = CurMultiClass->Rec.getValue(TArgs[i]);
+    for (Init *TArg : CurMultiClass->Rec.getTemplateArgs()) {
+      const RecordVal *RV = CurMultiClass->Rec.getValue(TArg);
       assert(RV && "Template arg doesn't exist?");
       CurRec->addValue(*RV);
     }
   }
 
-  if (ProcessForeachDefs(CurRec, DefLoc)) {
+  if (ProcessForeachDefs(CurRec, DefLoc))
     return Error(DefLoc, "Could not process loops for def" +
                  CurRec->getNameInitAsString());
-  }
 
   return false;
 }
@@ -2120,8 +2084,7 @@ bool TGParser::ParseForeach(MultiClass *CurMultiClass) {
     // FOREACH Declaration IN Object
     if (ParseObject(CurMultiClass))
       return true;
-  }
-  else {
+  } else {
     SMLoc BraceLoc = Lex.getLoc();
     // Otherwise, this is a group foreach.
     Lex.Lex();  // eat the '{'.
@@ -2160,8 +2123,8 @@ bool TGParser::ParseClass() {
     if (CurRec->getValues().size() > 1 ||  // Account for NAME.
         !CurRec->getSuperClasses().empty() ||
         !CurRec->getTemplateArgs().empty())
-      return TokError("Class '" + CurRec->getNameInitAsString()
-                      + "' already defined");
+      return TokError("Class '" + CurRec->getNameInitAsString() +
+                      "' already defined");
   } else {
     // If this is the first reference to this class, create and add it.
     auto NewRec =
@@ -2359,7 +2322,9 @@ Record *TGParser::
 InstantiateMulticlassDef(MultiClass &MC,
                          Record *DefProto,
                          Init *&DefmPrefix,
-                         SMRange DefmPrefixRange) {
+                         SMRange DefmPrefixRange,
+                         const std::vector<Init *> &TArgs,
+                         std::vector<Init *> &TemplateVals) {
   // We need to preserve DefProto so it can be reused for later
   // instantiations, so create a new Record to inherit from it.
 
@@ -2375,7 +2340,6 @@ InstantiateMulticlassDef(MultiClass &MC,
   }
 
   Init *DefName = DefProto->getNameInit();
-
   StringInit *DefNameString = dyn_cast<StringInit>(DefName);
 
   if (DefNameString) {
@@ -2403,9 +2367,9 @@ InstantiateMulticlassDef(MultiClass &MC,
   // confused.
   if (SetValue(CurRec.get(), Ref.RefRange.Start, "NAME",
                std::vector<unsigned>(), DefmPrefix)) {
-    Error(DefmPrefixRange.Start, "Could not resolve "
-          + CurRec->getNameInitAsString() + ":NAME to '"
-          + DefmPrefix->getAsUnquotedString() + "'");
+    Error(DefmPrefixRange.Start, "Could not resolve " +
+          CurRec->getNameInitAsString() + ":NAME to '" +
+          DefmPrefix->getAsUnquotedString() + "'");
     return nullptr;
   }
 
@@ -2423,12 +2387,40 @@ InstantiateMulticlassDef(MultiClass &MC,
     RecordVal *DefNameRV = CurRec->getValue("NAME");
     CurRec->resolveReferencesTo(DefNameRV);
 
+    // Check if the name is a complex pattern.
+    // If so, resolve it.
+    DefName = CurRec->getNameInit();
+    DefNameString = dyn_cast<StringInit>(DefName);
+
+    // OK the pattern is more complex than simply using NAME.
+    // Let's use the heavy weaponery.
+    if (!DefNameString) {
+      ResolveMulticlassDefArgs(MC, CurRec.get(), DefmPrefixRange.Start,
+                               Lex.getLoc(), TArgs, TemplateVals,
+                               false/*Delete args*/);
+      DefName = CurRec->getNameInit();
+      DefNameString = dyn_cast<StringInit>(DefName);
+
+      if (!DefNameString)
+        DefName = DefName->convertInitializerTo(StringRecTy::get());
+
+      // We ran out of options here...
+      DefNameString = dyn_cast<StringInit>(DefName);
+      if (!DefNameString) {
+        PrintFatalError(CurRec->getLoc()[CurRec->getLoc().size() - 1],
+                        DefName->getAsUnquotedString() + " is not a string.");
+        return nullptr;
+      }
+
+      CurRec->setName(DefName);
+    }
+
     // Now that NAME references are resolved and we're at the top level of
     // any multiclass expansions, add the record to the RecordKeeper. If we are
     // currently in a multiclass, it means this defm appears inside a
     // multiclass and its name won't be fully resolvable until we see
-    // the top-level defm.  Therefore, we don't add this to the
-    // RecordKeeper at this point.  If we did we could get duplicate
+    // the top-level defm. Therefore, we don't add this to the
+    // RecordKeeper at this point. If we did we could get duplicate
     // defs as more than one probably refers to NAME or some other
     // common internal placeholder.
 
@@ -2466,19 +2458,19 @@ bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC,
       if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], std::vector<unsigned>(),
                    TemplateVals[i]))
         return true;
-        
+
       // Resolve it next.
       CurRec->resolveReferencesTo(CurRec->getValue(TArgs[i]));
 
       if (DeleteArgs)
         // Now remove it.
         CurRec->removeValue(TArgs[i]);
-        
+
     } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
-      return Error(SubClassLoc, "value not specified for template argument #"+
-                   utostr(i) + " (" + TArgs[i]->getAsUnquotedString()
-                   + ") of multiclassclass '" + MC.Rec.getNameInitAsString()
-                   + "'");
+      return Error(SubClassLoc, "value not specified for template argument #" +
+                   utostr(i) + " (" + TArgs[i]->getAsUnquotedString() +
+                   ") of multiclassclass '" + MC.Rec.getNameInitAsString() +
+                   "'");
     }
   }
   return false;
@@ -2497,20 +2489,15 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC,
   // with the new created definition.
   if (!CurMultiClass)
     return false;
-  for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size();
-       i != e; ++i)
-    if (CurMultiClass->DefPrototypes[i]->getNameInit()
-        == CurRec->getNameInit())
+  for (const auto &Proto : CurMultiClass->DefPrototypes)
+    if (Proto->getNameInit() == CurRec->getNameInit())
       return Error(DefmPrefixLoc, "defm '" + CurRec->getNameInitAsString() +
                    "' already defined in this multiclass!");
   CurMultiClass->DefPrototypes.push_back(std::unique_ptr<Record>(CurRec));
 
   // Copy the template arguments for the multiclass into the new def.
-  const std::vector<Init *> &TA =
-    CurMultiClass->Rec.getTemplateArgs();
-
-  for (unsigned i = 0, e = TA.size(); i != e; ++i) {
-    const RecordVal *RV = CurMultiClass->Rec.getValue(TA[i]);
+  for (Init * TA : CurMultiClass->Rec.getTemplateArgs()) {
+    const RecordVal *RV = CurMultiClass->Rec.getValue(TA);
     assert(RV && "Template arg doesn't exist?");
     CurRec->addValue(*RV);
   }
@@ -2564,20 +2551,25 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
                    "more template args specified than multiclass expects");
 
     // Loop over all the def's in the multiclass, instantiating each one.
-    for (unsigned i = 0, e = MC->DefPrototypes.size(); i != e; ++i) {
-      Record *DefProto = MC->DefPrototypes[i].get();
-
-      Record *CurRec = InstantiateMulticlassDef(*MC, DefProto, DefmPrefix,
+    for (const std::unique_ptr<Record> &DefProto : MC->DefPrototypes) {
+      // The record name construction goes as follow:
+      //  - If the def name is a string, prepend the prefix.
+      //  - If the def name is a more complex pattern, use that pattern.
+      // As a result, the record is instanciated before resolving
+      // arguments, as it would make its name a string.
+      Record *CurRec = InstantiateMulticlassDef(*MC, DefProto.get(), DefmPrefix,
                                                 SMRange(DefmLoc,
-                                                        DefmPrefixEndLoc));
+                                                        DefmPrefixEndLoc),
+                                                TArgs, TemplateVals);
       if (!CurRec)
         return true;
 
+      // Now that the record is instanciated, we can resolve arguments.
       if (ResolveMulticlassDefArgs(*MC, CurRec, DefmLoc, SubClassLoc,
                                    TArgs, TemplateVals, true/*Delete args*/))
         return Error(SubClassLoc, "could not instantiate def");
 
-      if (ResolveMulticlassDef(*MC, CurRec, DefProto, DefmLoc))
+      if (ResolveMulticlassDef(*MC, CurRec, DefProto.get(), DefmLoc))
         return Error(SubClassLoc, "could not instantiate def");
 
       // Defs that can be used by other definitions should be fully resolved
@@ -2618,9 +2610,7 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
 
       // Get the expanded definition prototypes and teach them about
       // the record values the current class to inherit has
-      for (unsigned i = 0, e = NewRecDefs.size(); i != e; ++i) {
-        Record *CurRec = NewRecDefs[i];
-
+      for (Record *CurRec : NewRecDefs) {
         // Add it.
         if (AddSubClass(CurRec, SubClass))
           return true;
@@ -2636,11 +2626,11 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
   }
 
   if (!CurMultiClass)
-    for (unsigned i = 0, e = NewRecDefs.size(); i != e; ++i)
+    for (Record *CurRec : NewRecDefs)
       // See Record::setName().  This resolve step will see any new
       // name for the def that might have been created when resolving
       // inheritance, values and arguments above.
-      NewRecDefs[i]->resolveReferences();
+      CurRec->resolveReferences();
 
   if (Lex.getCode() != tgtok::semi)
     return TokError("expected ';' at end of defm");
diff --git a/contrib/llvm/lib/TableGen/TGParser.h b/contrib/llvm/lib/TableGen/TGParser.h
index 22a00e5..d69d1f4 100644
--- a/contrib/llvm/lib/TableGen/TGParser.h
+++ b/contrib/llvm/lib/TableGen/TGParser.h
@@ -138,7 +138,9 @@ private:  // Parser methods.
   Record *InstantiateMulticlassDef(MultiClass &MC,
                                    Record *DefProto,
                                    Init *&DefmPrefix,
-                                   SMRange DefmPrefixRange);
+                                   SMRange DefmPrefixRange,
+                                   const std::vector<Init *> &TArgs,
+                                   std::vector<Init *> &TemplateVals);
   bool ResolveMulticlassDefArgs(MultiClass &MC,
                                 Record *DefProto,
                                 SMLoc DefmPrefixLoc,
diff --git a/contrib/llvm/lib/TableGen/TableGenBackend.cpp b/contrib/llvm/lib/TableGen/TableGenBackend.cpp
index 79d5677..77ed841 100644
--- a/contrib/llvm/lib/TableGen/TableGenBackend.cpp
+++ b/contrib/llvm/lib/TableGen/TableGenBackend.cpp
@@ -11,10 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/TableGen/TableGenBackend.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/TableGenBackend.h"
-#include <algorithm>
 
 using namespace llvm;
 
@@ -23,32 +22,30 @@ const size_t MAX_LINE_LEN = 80U;
 static void printLine(raw_ostream &OS, const Twine &Prefix, char Fill,
                       StringRef Suffix) {
   size_t Pos = (size_t)OS.tell();
-  assert((MAX_LINE_LEN - Prefix.str().size() - Suffix.size() > 0) &&
-    "header line exceeds max limit");
+  assert((Prefix.str().size() + Suffix.size() <= MAX_LINE_LEN) &&
+         "header line exceeds max limit");
   OS << Prefix;
-  const size_t e = MAX_LINE_LEN - Suffix.size();
-  for (size_t i = (size_t)OS.tell() - Pos; i < e; ++i)
+  for (size_t i = (size_t)OS.tell() - Pos, e = MAX_LINE_LEN - Suffix.size();
+         i < e; ++i)
     OS << Fill;
   OS << Suffix << '\n';
 }
 
 void llvm::emitSourceFileHeader(StringRef Desc, raw_ostream &OS) {
   printLine(OS, "/*===- TableGen'erated file ", '-', "*- C++ -*-===*\\");
-  printLine(OS, "|*", ' ', "*|");
-  size_t Pos = 0U;
-  size_t PosE;
-  StringRef Prefix("|*");
+  StringRef Prefix("|* ");
   StringRef Suffix(" *|");
-  do{
-    size_t PSLen = Suffix.size() + Prefix.size();
-    PosE = Pos + ((MAX_LINE_LEN > (Desc.size() - PSLen)) ?
-      Desc.size() :
-      MAX_LINE_LEN - PSLen);
-    printLine(OS, Prefix + Desc.slice(Pos, PosE), ' ', Suffix);
-    Pos = PosE;
-  } while(Pos < Desc.size());
   printLine(OS, Prefix, ' ', Suffix);
-  printLine(OS, Prefix + " Automatically generated file, do not edit!", ' ',
+  size_t PSLen = Prefix.size() + Suffix.size();
+  assert(PSLen < MAX_LINE_LEN);
+  size_t Pos = 0U;
+  do {
+    size_t Length = std::min(Desc.size() - Pos, MAX_LINE_LEN - PSLen);
+    printLine(OS, Prefix + Desc.substr(Pos, Length), ' ', Suffix);
+    Pos += Length;
+  } while (Pos < Desc.size());
+  printLine(OS, Prefix, ' ', Suffix);
+  printLine(OS, Prefix + "Automatically generated file, do not edit!", ' ',
     Suffix);
   printLine(OS, Prefix, ' ', Suffix);
   printLine(OS, "\\*===", '-', "===*/");
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.h b/contrib/llvm/lib/Target/AArch64/AArch64.h
index e96d18b..21106c9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.h
@@ -40,9 +40,6 @@ FunctionPass *createAArch64ConditionOptimizerPass();
 FunctionPass *createAArch64AddressTypePromotionPass();
 FunctionPass *createAArch64A57FPLoadBalancing();
 FunctionPass *createAArch64A53Fix835769();
-/// \brief Creates an ARM-specific Target Transformation Info pass.
-ImmutablePass *
-createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM);
 
 FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td
index e6a27c3..9a7d6c8 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@@ -41,6 +41,13 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
                                         "Has zero-cycle zeroing instructions">;
 
 //===----------------------------------------------------------------------===//
+// Architectures.
+//
+
+def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
+  "Support ARM v8.1a instructions", [FeatureCRC]>;
+
+//===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
 
@@ -91,6 +98,8 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
 
 def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
 def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
+// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
+def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA57]>;
 def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
 
 //===----------------------------------------------------------------------===//
@@ -114,12 +123,14 @@ def AppleAsmParserVariant : AsmParserVariant {
 // AsmWriter bits get associated with the correct class.
 def GenericAsmWriter : AsmWriter {
   string AsmWriterClassName  = "InstPrinter";
+  int PassSubtarget = 1;
   int Variant = 0;
   bit isMCAsmWriter = 1;
 }
 
 def AppleAsmWriter : AsmWriter {
   let AsmWriterClassName = "AppleInstPrinter";
+  int PassSubtarget = 1;
   int Variant = 1;
   int isMCAsmWriter = 1;
 }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
index 852a635..d7ef3f4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -16,8 +16,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64.h"
-#include "AArch64InstrInfo.h"
-#include "AArch64Subtarget.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -26,6 +24,8 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
 
@@ -48,7 +48,7 @@ static bool isFirstInstructionInSequence(MachineInstr *MI) {
   case AArch64::PRFUMi:
     return true;
   default:
-    return (MI->mayLoad() || MI->mayStore());
+    return MI->mayLoadOrStore();
   }
 }
 
@@ -79,7 +79,7 @@ static bool isSecondInstructionInSequence(MachineInstr *MI) {
 
 namespace {
 class AArch64A53Fix835769 : public MachineFunctionPass {
-  const AArch64InstrInfo *TII;
+  const TargetInstrInfo *TII;
 
 public:
   static char ID;
@@ -107,17 +107,13 @@ char AArch64A53Fix835769::ID = 0;
 
 bool
 AArch64A53Fix835769::runOnMachineFunction(MachineFunction &F) {
-  const TargetMachine &TM = F.getTarget();
-
-  bool Changed = false;
   DEBUG(dbgs() << "***** AArch64A53Fix835769 *****\n");
-
-  TII = TM.getSubtarget<AArch64Subtarget>().getInstrInfo();
+  bool Changed = false;
+  TII = F.getSubtarget().getInstrInfo();
 
   for (auto &MBB : F) {
     Changed |= runOnBasicBlock(MBB);
   }
-
   return Changed;
 }
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index dd1a1ea..bffd9e6 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -96,6 +96,10 @@ static bool isMla(MachineInstr *MI) {
   }
 }
 
+namespace llvm {
+static void initializeAArch64A57FPLoadBalancingPass(PassRegistry &);
+}
+
 //===----------------------------------------------------------------------===//
 
 namespace {
@@ -109,14 +113,15 @@ static const char *ColorNames[2] = { "Even", "Odd" };
 class Chain;
 
 class AArch64A57FPLoadBalancing : public MachineFunctionPass {
-  const AArch64InstrInfo *TII;
   MachineRegisterInfo *MRI;
   const TargetRegisterInfo *TRI;
   RegisterClassInfo RCI;
 
 public:
   static char ID;
-  explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {}
+  explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {
+    initializeAArch64A57FPLoadBalancingPass(*PassRegistry::getPassRegistry());
+  }
 
   bool runOnMachineFunction(MachineFunction &F) override;
 
@@ -137,14 +142,22 @@ private:
   int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB);
   void scanInstruction(MachineInstr *MI, unsigned Idx,
                        std::map<unsigned, Chain*> &Active,
-                       std::set<std::unique_ptr<Chain>> &AllChains);
+                       std::vector<std::unique_ptr<Chain>> &AllChains);
   void maybeKillChain(MachineOperand &MO, unsigned Idx,
                       std::map<unsigned, Chain*> &RegChains);
   Color getColor(unsigned Register);
   Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L);
 };
+}
+
 char AArch64A57FPLoadBalancing::ID = 0;
 
+INITIALIZE_PASS_BEGIN(AArch64A57FPLoadBalancing, DEBUG_TYPE,
+                      "AArch64 A57 FP Load-Balancing", false, false)
+INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE,
+                    "AArch64 A57 FP Load-Balancing", false, false)
+
+namespace {
 /// A Chain is a sequence of instructions that are linked together by 
 /// an accumulation operand. For example:
 ///
@@ -259,7 +272,7 @@ public:
   }
 
   /// Return true if this chain starts before Other.
-  bool startsBefore(Chain *Other) {
+  bool startsBefore(const Chain *Other) const {
     return StartInstIdx < Other->StartInstIdx;
   }
 
@@ -274,12 +287,12 @@ public:
     raw_string_ostream OS(S);
     
     OS << "{";
-    StartInst->print(OS, NULL, true);
+    StartInst->print(OS, /* SkipOpers= */true);
     OS << " -> ";
-    LastInst->print(OS, NULL, true);
+    LastInst->print(OS, /* SkipOpers= */true);
     if (KillInst) {
       OS << " (kill @ ";
-      KillInst->print(OS, NULL, true);
+      KillInst->print(OS, /* SkipOpers= */true);
       OS << ")";
     }
     OS << "}";
@@ -294,13 +307,16 @@ public:
 //===----------------------------------------------------------------------===//
 
 bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
+  // Don't do anything if this isn't an A53 or A57.
+  if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() ||
+        F.getSubtarget<AArch64Subtarget>().isCortexA57()))
+    return false;
+
   bool Changed = false;
   DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
 
-  const TargetMachine &TM = F.getTarget();
   MRI = &F.getRegInfo();
   TRI = F.getRegInfo().getTargetRegisterInfo();
-  TII = TM.getSubtarget<AArch64Subtarget>().getInstrInfo();
   RCI.runOnMachineFunction(F);
 
   for (auto &MBB : F) {
@@ -320,7 +336,7 @@ bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
   // been killed yet. This is keyed by register - all chains can only have one
   // "link" register between each inst in the chain.
   std::map<unsigned, Chain*> ActiveChains;
-  std::set<std::unique_ptr<Chain>> AllChains;
+  std::vector<std::unique_ptr<Chain>> AllChains;
   unsigned Idx = 0;
   for (auto &MI : MBB)
     scanInstruction(&MI, Idx++, ActiveChains, AllChains);
@@ -431,10 +447,17 @@ bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
   // chains that we cannot change before we look at those we can,
   // so the parity counter is updated and we know what color we should
   // change them to!
+  // Final tie-break with instruction order so pass output is stable (i.e. not
+  // dependent on malloc'd pointer values).
   std::sort(GV.begin(), GV.end(), [](const Chain *G1, const Chain *G2) {
       if (G1->size() != G2->size())
         return G1->size() > G2->size();
-      return G1->requiresFixup() > G2->requiresFixup();
+      if (G1->requiresFixup() != G2->requiresFixup())
+        return G1->requiresFixup() > G2->requiresFixup();
+      // Make sure startsBefore() produces a stable final order.
+      assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
+             "Starts before not total order!");
+      return G1->startsBefore(G2);
     });
 
   Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
@@ -580,10 +603,9 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
   return Changed;
 }
 
-void AArch64A57FPLoadBalancing::
-scanInstruction(MachineInstr *MI, unsigned Idx, 
-                std::map<unsigned, Chain*> &ActiveChains,
-                std::set<std::unique_ptr<Chain>> &AllChains) {
+void AArch64A57FPLoadBalancing::scanInstruction(
+    MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
+    std::vector<std::unique_ptr<Chain>> &AllChains) {
   // Inspect "MI", updating ActiveChains and AllChains.
 
   if (isMul(MI)) {
@@ -602,7 +624,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx,
 
     auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
     ActiveChains[DestReg] = G.get();
-    AllChains.insert(std::move(G));
+    AllChains.push_back(std::move(G));
 
   } else if (isMla(MI)) {
 
@@ -646,7 +668,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx,
           << TRI->getName(DestReg) << "\n");
     auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
     ActiveChains[DestReg] = G.get();
-    AllChains.insert(std::move(G));
+    AllChains.push_back(std::move(G));
 
   } else {
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 287989f..716e1a3 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 5afe0f4..18d21fd 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -64,7 +64,7 @@ STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
 namespace {
 class AArch64AdvSIMDScalar : public MachineFunctionPass {
   MachineRegisterInfo *MRI;
-  const AArch64InstrInfo *TII;
+  const TargetInstrInfo *TII;
 
 private:
   // isProfitableToTransform - Predicate function to determine whether an
@@ -158,7 +158,7 @@ static unsigned getSrcFromCopy(const MachineInstr *MI,
 // getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent
 // that we're considering transforming to, return that AdvSIMD opcode. For all
 // others, return the original opcode.
-static int getTransformOpcode(unsigned Opc) {
+static unsigned getTransformOpcode(unsigned Opc) {
   switch (Opc) {
   default:
     break;
@@ -179,7 +179,7 @@ static int getTransformOpcode(unsigned Opc) {
 }
 
 static bool isTransformable(const MachineInstr *MI) {
-  int Opc = MI->getOpcode();
+  unsigned Opc = MI->getOpcode();
   return Opc != getTransformOpcode(Opc);
 }
 
@@ -268,7 +268,7 @@ AArch64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
   return TransformAll;
 }
 
-static MachineInstr *insertCopy(const AArch64InstrInfo *TII, MachineInstr *MI,
+static MachineInstr *insertCopy(const TargetInstrInfo *TII, MachineInstr *MI,
                                 unsigned Dst, unsigned Src, bool IsKill) {
   MachineInstrBuilder MIB =
       BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AArch64::COPY),
@@ -286,8 +286,8 @@ void AArch64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
   DEBUG(dbgs() << "Scalar transform: " << *MI);
 
   MachineBasicBlock *MBB = MI->getParent();
-  int OldOpc = MI->getOpcode();
-  int NewOpc = getTransformOpcode(OldOpc);
+  unsigned OldOpc = MI->getOpcode();
+  unsigned NewOpc = getTransformOpcode(OldOpc);
   assert(OldOpc != NewOpc && "transform an instruction to itself?!");
 
   // Check if we need a copy for the source registers.
@@ -376,10 +376,8 @@ bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
   bool Changed = false;
   DEBUG(dbgs() << "***** AArch64AdvSIMDScalar *****\n");
 
-  const TargetMachine &TM = mf.getTarget();
   MRI = &mf.getRegInfo();
-  TII = static_cast<const AArch64InstrInfo *>(
-      TM.getSubtargetImpl()->getInstrInfo());
+  TII = mf.getSubtarget().getInstrInfo();
 
   // Just check things on a one-block-at-a-time basis.
   for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 5159dbf..a0a09e4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -13,13 +13,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/AArch64AddressingModes.h"
-#include "MCTargetDesc/AArch64MCExpr.h"
 #include "AArch64.h"
 #include "AArch64MCInstLower.h"
 #include "AArch64MachineFunctionInfo.h"
 #include "AArch64RegisterInfo.h"
 #include "AArch64Subtarget.h"
 #include "InstPrinter/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
@@ -36,8 +36,10 @@
 #include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCLinkerOptimizationHint.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "asm-printer"
@@ -45,19 +47,13 @@ using namespace llvm;
 namespace {
 
 class AArch64AsmPrinter : public AsmPrinter {
-  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
-  /// make the right decision when printing asm code for different targets.
-  const AArch64Subtarget *Subtarget;
-
   AArch64MCInstLower MCInstLowering;
   StackMaps SM;
 
 public:
-  AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer),
-        Subtarget(&TM.getSubtarget<AArch64Subtarget>()),
-        MCInstLowering(OutContext, *this), SM(*this), AArch64FI(nullptr),
-        LOHLabelCounter(0) {}
+  AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+      : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this),
+        SM(*this), AArch64FI(nullptr) {}
 
   const char *getPassName() const override {
     return "AArch64 Assembly Printer";
@@ -118,7 +114,6 @@ private:
 
   typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
   MInstToMCSymbol LOHInstToLabel;
-  unsigned LOHLabelCounter;
 };
 
 } // end of anonymous namespace
@@ -126,38 +121,16 @@ private:
 //===----------------------------------------------------------------------===//
 
 void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
-  if (Subtarget->isTargetMachO()) {
+  Triple TT(TM.getTargetTriple());
+  if (TT.isOSBinFormatMachO()) {
     // Funny Darwin hack: This flag tells the linker that no global symbols
     // contain code that falls through to other global symbols (e.g. the obvious
     // implementation of multiple entry points).  If this doesn't occur, the
     // linker can safely perform dead code stripping.  Since LLVM never
     // generates code that does this, it is always safe to set.
-    OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+    OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
     SM.serializeToStackMapSection();
   }
-
-  // Emit a .data.rel section containing any stubs that were created.
-  if (Subtarget->isTargetELF()) {
-    const TargetLoweringObjectFileELF &TLOFELF =
-      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
-    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    // Output stubs for external and common global variables.
-    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-    if (!Stubs.empty()) {
-      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
-      const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
-
-      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-        OutStreamer.EmitLabel(Stubs[i].first);
-        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
-                                    TD->getPointerSize(0));
-      }
-      Stubs.clear();
-    }
-  }
-
 }
 
 MachineLocation
@@ -183,7 +156,7 @@ void AArch64AsmPrinter::EmitLOHs() {
              "Label hasn't been inserted for LOH related instruction");
       MCArgs.push_back(LabelIt->second);
     }
-    OutStreamer.EmitLOHDirective(D.getKind(), MCArgs);
+    OutStreamer->EmitLOHDirective(D.getKind(), MCArgs);
     MCArgs.clear();
   }
 }
@@ -199,11 +172,11 @@ MCSymbol *AArch64AsmPrinter::GetCPISymbol(unsigned CPID) const {
   // avoid addends on the relocation?), ELF has no such concept and
   // uses a normal private symbol.
   if (getDataLayout().getLinkerPrivateGlobalPrefix()[0])
-    return OutContext.GetOrCreateSymbol(
+    return OutContext.getOrCreateSymbol(
         Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" +
         Twine(getFunctionNumber()) + "_" + Twine(CPID));
 
-  return OutContext.GetOrCreateSymbol(
+  return OutContext.getOrCreateSymbol(
       Twine(getDataLayout().getPrivateGlobalPrefix()) + "CPI" +
       Twine(getFunctionNumber()) + "_" + Twine(CPID));
 }
@@ -226,6 +199,17 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
     O << '#' << Imm;
     break;
   }
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    MCSymbol *Sym = getSymbol(GV);
+
+    // FIXME: Can we get anything other than a plain symbol here?
+    assert(!MO.getTargetFlags() && "Unknown operand target flag!");
+
+    O << *Sym;
+    printOffset(MO.getOffset(), O);
+    break;
+  }
   }
 }
 
@@ -254,8 +238,8 @@ bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
                                            const TargetRegisterClass *RC,
                                            bool isVector, raw_ostream &O) {
   assert(MO.isReg() && "Should only get here with a register!");
-  const AArch64RegisterInfo *RI = static_cast<const AArch64RegisterInfo *>(
-      TM.getSubtargetImpl()->getRegisterInfo());
+  const AArch64RegisterInfo *RI =
+      MF->getSubtarget<AArch64Subtarget>().getRegisterInfo();
   unsigned Reg = MO.getReg();
   unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
   assert(RI->regsOverlap(RegToPrint, Reg));
@@ -364,8 +348,8 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
   assert(NOps == 4);
   OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
   // cast away const; DIetc do not take const operands for some reason.
-  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata()));
-  OS << V.getName();
+  OS << cast<DILocalVariable>(MI->getOperand(NOps - 2).getMetadata())
+            ->getName();
   OS << " <- ";
   // Frame address.  Currently handles register +- offset only.
   assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
@@ -452,15 +436,15 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
 
 void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   // Do any auto-generated pseudo lowerings.
-  if (emitPseudoExpansionLowering(OutStreamer, MI))
+  if (emitPseudoExpansionLowering(*OutStreamer, MI))
     return;
 
   if (AArch64FI->getLOHRelated().count(MI)) {
     // Generate a label for LOH related instruction
-    MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++);
+    MCSymbol *LOHLabel = createTempSymbol("loh");
     // Associate the instruction with the label
     LOHInstToLabel[MI] = LOHLabel;
-    OutStreamer.EmitLabel(LOHLabel);
+    OutStreamer->EmitLabel(LOHLabel);
   }
 
   // Do any manual lowerings.
@@ -468,11 +452,11 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   default:
     break;
   case AArch64::DBG_VALUE: {
-    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+    if (isVerbose() && OutStreamer->hasRawTextSupport()) {
       SmallString<128> TmpStr;
       raw_svector_ostream OS(TmpStr);
       PrintDebugValueComment(MI, OS);
-      OutStreamer.EmitRawText(StringRef(OS.str()));
+      OutStreamer->EmitRawText(StringRef(OS.str()));
     }
     return;
   }
@@ -483,8 +467,8 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case AArch64::TCRETURNri: {
     MCInst TmpInst;
     TmpInst.setOpcode(AArch64::BR);
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    EmitToStreamer(OutStreamer, TmpInst);
+    TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+    EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
   case AArch64::TCRETURNdi: {
@@ -493,7 +477,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     MCInst TmpInst;
     TmpInst.setOpcode(AArch64::B);
     TmpInst.addOperand(Dest);
-    EmitToStreamer(OutStreamer, TmpInst);
+    EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
   case AArch64::TLSDESC_CALLSEQ: {
@@ -516,52 +500,52 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
     MCInst Adrp;
     Adrp.setOpcode(AArch64::ADRP);
-    Adrp.addOperand(MCOperand::CreateReg(AArch64::X0));
+    Adrp.addOperand(MCOperand::createReg(AArch64::X0));
     Adrp.addOperand(SymTLSDesc);
-    EmitToStreamer(OutStreamer, Adrp);
+    EmitToStreamer(*OutStreamer, Adrp);
 
     MCInst Ldr;
     Ldr.setOpcode(AArch64::LDRXui);
-    Ldr.addOperand(MCOperand::CreateReg(AArch64::X1));
-    Ldr.addOperand(MCOperand::CreateReg(AArch64::X0));
+    Ldr.addOperand(MCOperand::createReg(AArch64::X1));
+    Ldr.addOperand(MCOperand::createReg(AArch64::X0));
     Ldr.addOperand(SymTLSDescLo12);
-    Ldr.addOperand(MCOperand::CreateImm(0));
-    EmitToStreamer(OutStreamer, Ldr);
+    Ldr.addOperand(MCOperand::createImm(0));
+    EmitToStreamer(*OutStreamer, Ldr);
 
     MCInst Add;
     Add.setOpcode(AArch64::ADDXri);
-    Add.addOperand(MCOperand::CreateReg(AArch64::X0));
-    Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+    Add.addOperand(MCOperand::createReg(AArch64::X0));
+    Add.addOperand(MCOperand::createReg(AArch64::X0));
     Add.addOperand(SymTLSDescLo12);
-    Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0)));
-    EmitToStreamer(OutStreamer, Add);
+    Add.addOperand(MCOperand::createImm(AArch64_AM::getShiftValue(0)));
+    EmitToStreamer(*OutStreamer, Add);
 
     // Emit a relocation-annotation. This expands to no code, but requests
     // the following instruction gets an R_AARCH64_TLSDESC_CALL.
     MCInst TLSDescCall;
     TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
     TLSDescCall.addOperand(Sym);
-    EmitToStreamer(OutStreamer, TLSDescCall);
+    EmitToStreamer(*OutStreamer, TLSDescCall);
 
     MCInst Blr;
     Blr.setOpcode(AArch64::BLR);
-    Blr.addOperand(MCOperand::CreateReg(AArch64::X1));
-    EmitToStreamer(OutStreamer, Blr);
+    Blr.addOperand(MCOperand::createReg(AArch64::X1));
+    EmitToStreamer(*OutStreamer, Blr);
 
     return;
   }
 
   case TargetOpcode::STACKMAP:
-    return LowerSTACKMAP(OutStreamer, SM, *MI);
+    return LowerSTACKMAP(*OutStreamer, SM, *MI);
 
   case TargetOpcode::PATCHPOINT:
-    return LowerPATCHPOINT(OutStreamer, SM, *MI);
+    return LowerPATCHPOINT(*OutStreamer, SM, *MI);
   }
 
   // Finally, do the automated lowerings for everything else.
   MCInst TmpInst;
   MCInstLowering.Lower(MI, TmpInst);
-  EmitToStreamer(OutStreamer, TmpInst);
+  EmitToStreamer(*OutStreamer, TmpInst);
 }
 
 // Force static initialization.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index e2b6367..d973234 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -476,9 +476,7 @@ bool AArch64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
 
   DEBUG(dbgs() << "***** AArch64BranchRelaxation *****\n");
 
-  TII = (const AArch64InstrInfo *)MF->getTarget()
-            .getSubtargetImpl()
-            ->getInstrInfo();
+  TII = (const AArch64InstrInfo *)MF->getSubtarget().getInstrInfo();
 
   // Renumber all of the machine basic blocks in the function, guaranteeing that
   // the numbers agree with the position of the block in the function.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
index baf80bc..1e2d1c3 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
@@ -46,7 +46,7 @@ static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
                              CCState &State, unsigned SlotAlign) {
   unsigned Size = LocVT.getSizeInBits() / 8;
   unsigned StackAlign = State.getMachineFunction()
-                            .getSubtarget()
+                            .getTarget()
                             .getDataLayout()
                             ->getStackAlignment();
   unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index a391e76..4691e94 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -16,7 +16,7 @@ class CCIfAlign<string Align, CCAction A> :
   CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
 /// CCIfBigEndian - Match only if we're in big endian mode.
 class CCIfBigEndian<CCAction A> :
-  CCIf<"State.getMachineFunction().getSubtarget().getDataLayout()->isBigEndian()", A>;
+  CCIf<"State.getMachineFunction().getTarget().getDataLayout()->isBigEndian()", A>;
 
 //===----------------------------------------------------------------------===//
 // ARM AAPCS64 Calling Convention
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index ba4fc3b..06ff9af 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -92,9 +92,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
   MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I,
                                        unsigned TLSBaseAddrReg) {
     MachineFunction *MF = I->getParent()->getParent();
-    const AArch64TargetMachine *TM =
-        static_cast<const AArch64TargetMachine *>(&MF->getTarget());
-    const AArch64InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
+    const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
 
     // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
     // code sequence assumes the address will be.
@@ -112,9 +110,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
   // inserting a copy instruction after I. Returns the new instruction.
   MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
     MachineFunction *MF = I->getParent()->getParent();
-    const AArch64TargetMachine *TM =
-        static_cast<const AArch64TargetMachine *>(&MF->getTarget());
-    const AArch64InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
+    const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
 
     // Create a virtual register for the TLS base address.
     MachineRegisterInfo &RegInfo = MF->getRegInfo();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
index 87b545b..efdb2e3 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -279,18 +279,16 @@ static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
 /// definition. It also consider definitions of ADRP instructions as uses and
 /// ignore other uses. The ADRPMode is used to collect the information for LHO
 /// that involve ADRP operation only.
-static void initReachingDef(MachineFunction &MF,
+static void initReachingDef(const MachineFunction &MF,
                             InstrToInstrs *ColorOpToReachedUses,
                             BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
                             BlockToSetOfInstrsPerColor &ReachableUses,
                             const MapRegToId &RegToId,
                             const MachineInstr *DummyOp, bool ADRPMode) {
-  const TargetMachine &TM = MF.getTarget();
-  const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
-
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   unsigned NbReg = RegToId.size();
 
-  for (MachineBasicBlock &MBB : MF) {
+  for (const MachineBasicBlock &MBB : MF) {
     auto &BBGen = Gen[&MBB];
     BBGen = make_unique<const MachineInstr *[]>(NbReg);
     std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
@@ -330,7 +328,7 @@ static void initReachingDef(MachineFunction &MF,
         const uint32_t *PreservedRegs = MO.getRegMask();
 
         // Set generated regs.
-        for (const auto Entry : RegToId) {
+        for (const auto &Entry : RegToId) {
           unsigned Reg = Entry.second;
           // Use the global register ID when querying APIs external to this
           // pass.
@@ -384,7 +382,7 @@ static void initReachingDef(MachineFunction &MF,
 ///                 op.reachedUses
 ///
 ///           Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
-static void reachingDefAlgorithm(MachineFunction &MF,
+static void reachingDefAlgorithm(const MachineFunction &MF,
                                  InstrToInstrs *ColorOpToReachedUses,
                                  BlockToSetOfInstrsPerColor &In,
                                  BlockToSetOfInstrsPerColor &Out,
@@ -394,7 +392,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
   bool HasChanged;
   do {
     HasChanged = false;
-    for (MachineBasicBlock &MBB : MF) {
+    for (const MachineBasicBlock &MBB : MF) {
       unsigned CurReg;
       for (CurReg = 0; CurReg < NbReg; ++CurReg) {
         SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
@@ -403,7 +401,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
         SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
         unsigned Size = BBOutSet.size();
         //   In[bb][color] = U Out[bb.predecessors][color]
-        for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
+        for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
           SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
           BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
         }
@@ -435,7 +433,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
 /// @p DummyOp.
 /// \pre ColorOpToReachedUses is an array of at least number of registers of
 /// InstrToInstrs.
-static void reachingDef(MachineFunction &MF,
+static void reachingDef(const MachineFunction &MF,
                         InstrToInstrs *ColorOpToReachedUses,
                         const MapRegToId &RegToId, bool ADRPMode = false,
                         const MachineInstr *DummyOp = nullptr) {
@@ -985,7 +983,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
 /// Look for every register defined by potential LOHs candidates.
 /// Map these registers with dense id in @p RegToId and vice-versa in
 /// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
-static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
+static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
                                MapIdToReg &IdToReg,
                                const TargetRegisterInfo *TRI) {
   unsigned CurRegId = 0;
@@ -1026,8 +1024,7 @@ static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
 }
 
 bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
-  const TargetMachine &TM = MF.getTarget();
-  const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
 
   MapRegToId RegToId;
@@ -1043,8 +1040,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
 
   MachineInstr *DummyOp = nullptr;
   if (BasicBlockScopeOnly) {
-    const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(
-        TM.getSubtargetImpl()->getInstrInfo());
+    const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
     // For local analysis, create a dummy operation to record uses that are not
     // local.
     DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index 0fbd3c6..b9e41c6 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -67,6 +67,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -86,11 +87,12 @@ namespace {
 class AArch64ConditionOptimizer : public MachineFunctionPass {
   const TargetInstrInfo *TII;
   MachineDominatorTree *DomTree;
+  const MachineRegisterInfo *MRI;
 
 public:
   // Stores immediate, compare instruction opcode and branch condition (in this
   // order) of adjusted comparison.
-  typedef std::tuple<int, int, AArch64CC::CondCode> CmpInfo;
+  typedef std::tuple<int, unsigned, AArch64CC::CondCode> CmpInfo;
 
   static char ID;
   AArch64ConditionOptimizer() : MachineFunctionPass(ID) {}
@@ -116,7 +118,6 @@ void initializeAArch64ConditionOptimizerPass(PassRegistry &);
 INITIALIZE_PASS_BEGIN(AArch64ConditionOptimizer, "aarch64-condopt",
                       "AArch64 CondOpt Pass", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_END(AArch64ConditionOptimizer, "aarch64-condopt",
                     "AArch64 CondOpt Pass", false, false)
 
@@ -127,8 +128,6 @@ FunctionPass *llvm::createAArch64ConditionOptimizerPass() {
 void AArch64ConditionOptimizer::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineDominatorTree>();
   AU.addPreserved<MachineDominatorTree>();
-  AU.addRequired<LiveIntervals>();
-  AU.addPreserved<LiveIntervals>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -155,7 +154,7 @@ MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
     // cmn is an alias for adds with a dead destination register.
     case AArch64::ADDSWri:
     case AArch64::ADDSXri:
-      if (I->getOperand(0).isDead())
+      if (MRI->use_empty(I->getOperand(0).getReg()))
         return I;
 
       DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
@@ -216,7 +215,7 @@ static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp) {
 // operator and condition code.
 AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
     MachineInstr *CmpMI, AArch64CC::CondCode Cmp) {
-  int Opc = CmpMI->getOpcode();
+  unsigned Opc = CmpMI->getOpcode();
 
   // CMN (compare with negative immediate) is an alias to ADDS (as
   // "operand - negative" == "operand + positive")
@@ -245,7 +244,7 @@ AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
 void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
     const CmpInfo &Info) {
   int Imm;
-  int Opc;
+  unsigned Opc;
   AArch64CC::CondCode Cmp;
   std::tie(Imm, Opc, Cmp) = Info;
 
@@ -304,8 +303,9 @@ bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,
 bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
                << "********** Function: " << MF.getName() << '\n');
-  TII = MF.getTarget().getSubtargetImpl()->getInstrInfo();
+  TII = MF.getSubtarget().getInstrInfo();
   DomTree = &getAnalysis<MachineDominatorTree>();
+  MRI = &MF.getRegInfo();
 
   bool Changed = false;
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 54f53dc..2b0c92f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -416,7 +416,7 @@ bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
 
     // We never speculate stores, so an AA pointer isn't necessary.
     bool DontMoveAcrossStore = true;
-    if (!I.isSafeToMove(TII, nullptr, DontMoveAcrossStore)) {
+    if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
       DEBUG(dbgs() << "Can't speculate: " << I);
       return false;
     }
@@ -893,15 +893,13 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
                << "********** Function: " << MF.getName() << '\n');
   TII = MF.getSubtarget().getInstrInfo();
   TRI = MF.getSubtarget().getRegisterInfo();
-  SchedModel =
-      MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
+  SchedModel = MF.getSubtarget().getSchedModel();
   MRI = &MF.getRegInfo();
   DomTree = &getAnalysis<MachineDominatorTree>();
   Loops = getAnalysisIfAvailable<MachineLoopInfo>();
   Traces = &getAnalysis<MachineTraceMetrics>();
   MinInstr = nullptr;
-  MinSize = MF.getFunction()->getAttributes().hasAttribute(
-      AttributeSet::FunctionIndex, Attribute::MinSize);
+  MinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
 
   bool Changed = false;
   CmpConv.runOnMachineFunction(MF);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index c850680..c2470f7 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -229,7 +229,7 @@ static bool isStartChunk(uint64_t Chunk) {
   if (Chunk == 0 || Chunk == UINT64_MAX)
     return false;
 
-  return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64;
+  return isMask_64(~Chunk);
 }
 
 /// \brief Check whether this chunk matches the pattern '0...1...' This pattern
@@ -239,7 +239,7 @@ static bool isEndChunk(uint64_t Chunk) {
   if (Chunk == 0 || Chunk == UINT64_MAX)
     return false;
 
-  return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64;
+  return isMask_64(Chunk);
 }
 
 /// \brief Clear or set all bits in the chunk at the given index.
@@ -698,12 +698,15 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
     return expandMOVImm(MBB, MBBI, 32);
   case AArch64::MOVi64imm:
     return expandMOVImm(MBB, MBBI, 64);
-  case AArch64::RET_ReallyLR:
-    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
-        .addReg(AArch64::LR);
+  case AArch64::RET_ReallyLR: {
+    MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
+          .addReg(AArch64::LR);
+    transferImpOps(MI, MIB, MIB);
     MI.eraseFromParent();
     return true;
   }
+  }
   return false;
 }
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index ca4e97b..9977e2b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -245,9 +245,10 @@ public:
   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 
   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
-                         const TargetLibraryInfo *LibInfo)
+                           const TargetLibraryInfo *LibInfo)
       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
-    Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+    Subtarget =
+        &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
     Context = &FuncInfo.Fn->getContext();
   }
 
@@ -663,20 +664,22 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
     Addr.setExtendType(AArch64_AM::LSL);
 
     const Value *Src = U->getOperand(0);
-    if (const auto *I = dyn_cast<Instruction>(Src))
-      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
-        Src = I;
-
-    // Fold the zext or sext when it won't become a noop.
-    if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
-      if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
-          Addr.setExtendType(AArch64_AM::UXTW);
-          Src = ZE->getOperand(0);
-      }
-    } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
-      if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
-        Addr.setExtendType(AArch64_AM::SXTW);
-        Src = SE->getOperand(0);
+    if (const auto *I = dyn_cast<Instruction>(Src)) {
+      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+        // Fold the zext or sext when it won't become a noop.
+        if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
+          if (!isIntExtFree(ZE) &&
+              ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+            Addr.setExtendType(AArch64_AM::UXTW);
+            Src = ZE->getOperand(0);
+          }
+        } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
+          if (!isIntExtFree(SE) &&
+              SE->getOperand(0)->getType()->isIntegerTy(32)) {
+            Addr.setExtendType(AArch64_AM::SXTW);
+            Src = SE->getOperand(0);
+          }
+        }
       }
     }
 
@@ -745,21 +748,22 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
     Addr.setExtendType(AArch64_AM::LSL);
 
     const Value *Src = LHS;
-    if (const auto *I = dyn_cast<Instruction>(Src))
-      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
-        Src = I;
-
-
-    // Fold the zext or sext when it won't become a noop.
-    if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
-      if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
-        Addr.setExtendType(AArch64_AM::UXTW);
-        Src = ZE->getOperand(0);
-      }
-    } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
-      if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
-        Addr.setExtendType(AArch64_AM::SXTW);
-        Src = SE->getOperand(0);
+    if (const auto *I = dyn_cast<Instruction>(Src)) {
+      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+        // Fold the zext or sext when it won't become a noop.
+        if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
+          if (!isIntExtFree(ZE) &&
+              ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+            Addr.setExtendType(AArch64_AM::UXTW);
+            Src = ZE->getOperand(0);
+          }
+        } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
+          if (!isIntExtFree(SE) &&
+              SE->getOperand(0)->getType()->isIntegerTy(32)) {
+            Addr.setExtendType(AArch64_AM::SXTW);
+            Src = SE->getOperand(0);
+          }
+        }
       }
     }
 
@@ -1916,7 +1920,8 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
     // could select it. Emit a copy to subreg if necessary. FastISel will remove
     // it when it selects the integer extend.
     unsigned Reg = lookUpRegForValue(IntExtVal);
-    if (!Reg) {
+    auto *MI = MRI.getUniqueVRegDef(Reg);
+    if (!MI) {
       if (RetVT == MVT::i64 && VT <= MVT::i32) {
         if (WantZExt) {
           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
@@ -1934,10 +1939,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
     // The integer extend has already been emitted - delete all the instructions
     // that have been emitted by the integer extend lowering code and use the
     // result from the load instruction directly.
-    while (Reg) {
-      auto *MI = MRI.getUniqueVRegDef(Reg);
-      if (!MI)
-        break;
+    while (MI) {
       Reg = 0;
       for (auto &Opnd : MI->uses()) {
         if (Opnd.isReg()) {
@@ -1946,6 +1948,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
         }
       }
       MI->eraseFromParent();
+      MI = nullptr;
+      if (Reg)
+        MI = MRI.getUniqueVRegDef(Reg);
     }
     updateValueMap(IntExtVal, ResultReg);
     return true;
@@ -2571,7 +2576,7 @@ bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
     Src1IsKill = true;
   }
-  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32spRegClass, Src1Reg,
+  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
                                        Src1IsKill, Src2Reg, Src2IsKill);
   updateValueMap(SI, ResultReg);
   return true;
@@ -2677,8 +2682,11 @@ bool AArch64FastISel::selectSelect(const Instruction *I) {
       return false;
     bool CondIsKill = hasTrivialKill(Cond);
 
+    const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
+    CondReg = constrainOperandRegClass(II, CondReg, 1);
+
     // Emit a TST instruction (ANDS wzr, reg, #imm).
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri),
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
             AArch64::WZR)
         .addReg(CondReg, getKillRegState(CondIsKill))
         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
@@ -3033,6 +3041,11 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
 
     // Copy all of the result registers out of their specified physreg.
     MVT CopyVT = RVLocs[0].getValVT();
+
+    // TODO: Handle big-endian results
+    if (CopyVT.isVector() && !Subtarget->isLittleEndian())
+      return false;
+
     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::COPY), ResultReg)
@@ -3157,7 +3170,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
 
   // Add a register mask with the call-preserved registers.
   // Proper defs for return values will be added by setPhysRegsDeadExcept().
-  MIB.addRegMask(TRI.getCallPreservedMask(CC));
+  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
 
   CLI.Call = MIB;
 
@@ -3256,7 +3269,7 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
     std::swap(LHS, RHS);
 
   // Simplify multiplies.
-  unsigned IID = II->getIntrinsicID();
+  Intrinsic::ID IID = II->getIntrinsicID();
   switch (IID) {
   default:
     break;
@@ -3324,8 +3337,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     MFI->setFrameAddressIsTaken(true);
 
     const AArch64RegisterInfo *RegInfo =
-        static_cast<const AArch64RegisterInfo *>(
-            TM.getSubtargetImpl()->getRegisterInfo());
+        static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo());
     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -3525,7 +3537,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
       std::swap(LHS, RHS);
 
     // Simplify multiplies.
-    unsigned IID = II->getIntrinsicID();
+    Intrinsic::ID IID = II->getIntrinsicID();
     switch (IID) {
     default:
       break;
@@ -3589,7 +3601,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
                     AArch64_AM::ASR, 31, /*WantResult=*/false);
       } else {
         assert(VT == MVT::i64 && "Unexpected value type.");
-        MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+        // LHSReg and RHSReg cannot be killed by this Mul, since they are
+        // reused in the next instruction.
+        MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
+                            /*IsKill=*/false);
         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
                                         RHSReg, RHSIsKill);
         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
@@ -3618,7 +3633,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
                                             AArch64::sub_32);
       } else {
         assert(VT == MVT::i64 && "Unexpected value type.");
-        MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+        // LHSReg and RHSReg cannot be killed by this Mul, since they are
+        // reused in the next instruction.
+        MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
+                            /*IsKill=*/false);
         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
                                         RHSReg, RHSIsKill);
         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
@@ -4563,7 +4581,7 @@ bool AArch64FastISel::selectShift(const Instruction *I) {
     unsigned ResultReg = 0;
     uint64_t ShiftVal = C->getZExtValue();
     MVT SrcVT = RetVT;
-    bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
+    bool IsZExt = I->getOpcode() != Instruction::AShr;
     const Value *Op0 = I->getOperand(0);
     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
       if (!isIntExtFree(ZExt)) {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index d8e9156..3ba7e70 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -9,6 +9,82 @@
 //
 // This file contains the AArch64 implementation of TargetFrameLowering class.
 //
+// On AArch64, stack frames are structured as follows:
+//
+// The stack grows downward.
+//
+// All of the individual frame areas on the frame below are optional, i.e. it's
+// possible to create a function so that the particular area isn't present
+// in the frame.
+//
+// At function entry, the "frame" looks as follows:
+//
+// |                                   | Higher address
+// |-----------------------------------|
+// |                                   |
+// | arguments passed on the stack     |
+// |                                   |
+// |-----------------------------------| <- sp
+// |                                   | Lower address
+//
+//
+// After the prologue has run, the frame has the following general structure.
+// Note that this doesn't depict the case where a red-zone is used. Also,
+// technically the last frame area (VLAs) doesn't get created until in the
+// main function body, after the prologue is run. However, it's depicted here
+// for completeness.
+//
+// |                                   | Higher address
+// |-----------------------------------|
+// |                                   |
+// | arguments passed on the stack     |
+// |                                   |
+// |-----------------------------------|
+// |                                   |
+// | prev_fp, prev_lr                  |
+// | (a.k.a. "frame record")           |
+// |-----------------------------------| <- fp(=x29)
+// |                                   |
+// | other callee-saved registers      |
+// |                                   |
+// |-----------------------------------|
+// |.empty.space.to.make.part.below....|
+// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
+// |.the.standard.16-byte.alignment....|  compile time; if present)
+// |-----------------------------------|
+// |                                   |
+// | local variables of fixed size     |
+// | including spill slots             |
+// |-----------------------------------| <- bp(not defined by ABI,
+// |.variable-sized.local.variables....|       LLVM chooses X19)
+// |.(VLAs)............................| (size of this area is unknown at
+// |...................................|  compile time)
+// |-----------------------------------| <- sp
+// |                                   | Lower address
+//
+//
+// To access the data in a frame, at-compile time, a constant offset must be
+// computable from one of the pointers (fp, bp, sp) to access it. The size
+// of the areas with a dotted background cannot be computed at compile-time
+// if they are present, making it required to have all three of fp, bp and
+// sp to be set up to be able to access all contents in the frame areas,
+// assuming all of the frame areas are non-empty.
+//
+// For most functions, some of the frame areas are empty. For those functions,
+// it may not be necessary to set up fp or bp:
+// * A base pointer is definitly needed when there are both VLAs and local
+//   variables with more-than-default alignment requirements.
+// * A frame pointer is definitly needed when there are local variables with
+//   more-than-default alignment requirements.
+//
+// In some cases when a base pointer is not strictly needed, it is generated
+// anyway when offsets from the frame pointer to access local variables become
+// so large that the offset can't be encoded in the immediate fields of loads
+// or stores.
+//
+// FIXME: also explain the redzone concept.
+// FIXME: also explain the concept of reserved call frames.
+//
 //===----------------------------------------------------------------------===//
 
 #include "AArch64FrameLowering.h"
@@ -39,33 +115,12 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",
 
 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
 
-static unsigned estimateStackSize(MachineFunction &MF) {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
-  int Offset = 0;
-  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
-    int FixedOff = -FFI->getObjectOffset(i);
-    if (FixedOff > Offset)
-      Offset = FixedOff;
-  }
-  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
-    if (FFI->isDeadObjectIndex(i))
-      continue;
-    Offset += FFI->getObjectSize(i);
-    unsigned Align = FFI->getObjectAlignment(i);
-    // Adjust to alignment boundary
-    Offset = (Offset + Align - 1) / Align * Align;
-  }
-  // This does not include the 16 bytes used for fp and lr.
-  return (unsigned)Offset;
-}
-
 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
   if (!EnableRedZone)
     return false;
   // Don't use the red zone if the function explicitly asks us not to.
   // This is typically used for kernel code.
-  if (MF.getFunction()->getAttributes().hasAttribute(
-          AttributeSet::FunctionIndex, Attribute::NoRedZone))
+  if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone))
     return false;
 
   const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -84,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
 /// pointer register.
 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-#ifndef NDEBUG
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
-  assert(!RegInfo->needsStackRealignment(MF) &&
-         "No stack realignment on AArch64!");
-#endif
-
   return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken() || MFI->hasStackMap() ||
-          MFI->hasPatchPoint());
+          MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF));
 }
 
 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -112,7 +161,7 @@ void AArch64FrameLowering::eliminateCallFramePseudoInstr(
   const AArch64InstrInfo *TII =
       static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
   DebugLoc DL = I->getDebugLoc();
-  int Opc = I->getOpcode();
+  unsigned Opc = I->getOpcode();
   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
 
@@ -167,7 +216,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
   if (CSI.empty())
     return;
 
-  const DataLayout *TD = MF.getSubtarget().getDataLayout();
+  const DataLayout *TD = MF.getTarget().getDataLayout();
   bool HasFP = hasFP(MF);
 
   // Calculate amount of bytes used for return address storing.
@@ -201,8 +250,33 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
   }
 }
 
-void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+/// Get FPOffset by analyzing the first instruction.
+static int getFPOffsetInPrologue(MachineInstr *MBBI) {
+  // First instruction must a) allocate the stack  and b) have an immediate
+  // that is a multiple of -2.
+  assert(((MBBI->getOpcode() == AArch64::STPXpre ||
+           MBBI->getOpcode() == AArch64::STPDpre) &&
+          MBBI->getOperand(3).getReg() == AArch64::SP &&
+          MBBI->getOperand(4).getImm() < 0 &&
+          (MBBI->getOperand(4).getImm() & 1) == 0));
+
+  // Frame pointer is fp = sp - 16. Since the  STPXpre subtracts the space
+  // required for the callee saved register area we get the frame pointer
+  // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
+  int FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
+  assert(FPOffset >= 0 && "Bad Framepointer Offset");
+  return FPOffset;
+}
+
+static bool isCSSave(MachineInstr *MBBI) {
+  return MBBI->getOpcode() == AArch64::STPXi ||
+         MBBI->getOpcode() == AArch64::STPDi ||
+         MBBI->getOpcode() == AArch64::STPXpre ||
+         MBBI->getOpcode() == AArch64::STPDpre;
+}
+
+void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
+                                        MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *Fn = MF.getFunction();
@@ -228,7 +302,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
     AFI->setLocalStackSize(NumBytes);
 
     // Label used to tie together the PROLOG_LABEL and the MachineMoves.
-    MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+    MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
 
     // REDZONE: If the stack size is less than 128 bytes, we don't need
     // to actually allocate.
@@ -251,27 +325,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
 
   // Only set up FP if we actually need to.
   int FPOffset = 0;
-  if (HasFP) {
-    // First instruction must a) allocate the stack  and b) have an immediate
-    // that is a multiple of -2.
-    assert((MBBI->getOpcode() == AArch64::STPXpre ||
-            MBBI->getOpcode() == AArch64::STPDpre) &&
-           MBBI->getOperand(3).getReg() == AArch64::SP &&
-           MBBI->getOperand(4).getImm() < 0 &&
-           (MBBI->getOperand(4).getImm() & 1) == 0);
-
-    // Frame pointer is fp = sp - 16. Since the  STPXpre subtracts the space
-    // required for the callee saved register area we get the frame pointer
-    // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
-    FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
-    assert(FPOffset >= 0 && "Bad Framepointer Offset");
-  }
+  if (HasFP)
+    FPOffset = getFPOffsetInPrologue(MBBI);
 
   // Move past the saves of the callee-saved registers.
-  while (MBBI->getOpcode() == AArch64::STPXi ||
-         MBBI->getOpcode() == AArch64::STPDi ||
-         MBBI->getOpcode() == AArch64::STPXpre ||
-         MBBI->getOpcode() == AArch64::STPDpre) {
+  while (isCSSave(MBBI)) {
     ++MBBI;
     NumBytes -= 16;
   }
@@ -289,11 +347,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
   AFI->setLocalStackSize(NumBytes);
 
   // Allocate space for the rest of the frame.
-  if (NumBytes) {
-    // If we're a leaf function, try using the red zone.
-    if (!canUseRedZone(MF))
-      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
-                      MachineInstr::FrameSetup);
+
+  const unsigned Alignment = MFI->getMaxAlignment();
+  const bool NeedsRealignment = (Alignment > 16);
+  unsigned scratchSPReg = AArch64::SP;
+  if (NeedsRealignment) {
+    // Use the first callee-saved register as a scratch register
+    assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) &&
+           "No scratch register to align SP!");
+    scratchSPReg = AArch64::X9;
+  }
+
+  // If we're a leaf function, try using the red zone.
+  if (NumBytes && !canUseRedZone(MF))
+    // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+    // the correct value here, as NumBytes also includes padding bytes,
+    // which shouldn't be counted here.
+    emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
+                    MachineInstr::FrameSetup);
+
+  assert(!(NeedsRealignment && NumBytes==0) &&
+         "NumBytes should never be 0 when realignment is needed");
+
+  if (NumBytes && NeedsRealignment) {
+    const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+    assert(NrBitsToZero > 1);
+    assert(scratchSPReg != AArch64::SP);
+
+    // SUB X9, SP, NumBytes
+    //   -- X9 is temporary register, so shouldn't contain any live data here,
+    //   -- free to use. This is already produced by emitFrameOffset above.
+    // AND SP, X9, 0b11111...0000
+    // The logical immediates have a non-trivial encoding. The following
+    // formula computes the encoded immediate with all ones but
+    // NrBitsToZero zero bits as least significant bits.
+    uint32_t andMaskEncoded =
+        (1                   <<12) // = N
+      | ((64-NrBitsToZero)   << 6) // immr
+      | ((64-NrBitsToZero-1) << 0) // imms
+      ;
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+      .addReg(scratchSPReg, RegState::Kill)
+      .addImm(andMaskEncoded);
   }
 
   // If we need a base pointer, set it up here. It's whatever the value of the
@@ -303,15 +398,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
   // FIXME: Clarify FrameSetup flags here.
   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
   // needed.
-  //
-  if (RegInfo->hasBasePointer(MF))
-    TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
+  if (RegInfo->hasBasePointer(MF)) {
+    TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
+                     false);
+  }
 
   if (needsFrameMoves) {
-    const DataLayout *TD = MF.getSubtarget().getDataLayout();
+    const DataLayout *TD = MF.getTarget().getDataLayout();
     const int StackGrowth = -TD->getPointerSize(0);
     unsigned FramePtr = RegInfo->getFrameRegister(MF);
-
     // An example of the prologue:
     //
     //     .globl __foo
@@ -444,15 +539,19 @@ static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const AArch64InstrInfo *TII =
       static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
       MF.getSubtarget().getRegisterInfo());
-  DebugLoc DL = MBBI->getDebugLoc();
-  unsigned RetOpcode = MBBI->getOpcode();
-
+  DebugLoc DL;
+  bool IsTailCallReturn = false;
+  if (MBB.end() != MBBI) {
+    DL = MBBI->getDebugLoc();
+    unsigned RetOpcode = MBBI->getOpcode();
+    IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
+      RetOpcode == AArch64::TCRETURNri;
+  }
   int NumBytes = MFI->getStackSize();
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
 
@@ -461,10 +560,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
     return;
 
-  // Initial and residual are named for consitency with the prologue. Note that
+  // Initial and residual are named for consistency with the prologue. Note that
   // in the epilogue, the residual adjustment is executed first.
   uint64_t ArgumentPopSize = 0;
-  if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
+  if (IsTailCallReturn) {
     MachineOperand &StackAdjust = MBBI->getOperand(1);
 
     // For a tail-call in a callee-pops-arguments environment, some or all of
@@ -509,7 +608,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
 
   unsigned NumRestores = 0;
   // Move past the restores of the callee-saved registers.
-  MachineBasicBlock::iterator LastPopI = MBBI;
+  MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
   if (LastPopI != MBB.begin()) {
     do {
@@ -572,9 +671,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
   bool isFixed = MFI->isFixedObjectIndex(FI);
 
   // Use frame pointer to reference fixed objects. Use it for locals if
-  // there are VLAs (and thus the SP isn't reliable as a base).
-  // Make sure useFPForScavengingIndex() does the right thing for the emergency
-  // spill slot.
+  // there are VLAs or a dynamically realigned SP (and thus the SP isn't
+  // reliable as a base). Make sure useFPForScavengingIndex() does the
+  // right thing for the emergency spill slot.
   bool UseFP = false;
   if (AFI->hasStackFrame()) {
     // Note: Keeping the following as multiple 'if' statements rather than
@@ -583,7 +682,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
     // Argument access should always use the FP.
     if (isFixed) {
       UseFP = hasFP(MF);
-    } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
+    } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
+               !RegInfo->needsStackRealignment(MF)) {
       // Use SP or FP, whichever gives us the best chance of the offset
       // being in range for direct access. If the FPOffset is positive,
       // that'll always be best, as the SP will be even further away.
@@ -599,6 +699,10 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
     }
   }
 
+  assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
+         "In the presence of dynamic stack pointer realignment, "
+         "non-argument objects cannot be accessed through the frame pointer");
+
   if (UseFP) {
     FrameReg = RegInfo->getFrameRegister(MF);
     return FPOffset;
@@ -696,6 +800,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
     if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre)
       MIB.addReg(AArch64::SP, RegState::Define);
 
+    MBB.addLiveIn(Reg1);
+    MBB.addLiveIn(Reg2);
     MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
         .addReg(Reg1, getPrologueDeath(MF, Reg1))
         .addReg(AArch64::SP)
@@ -795,6 +901,9 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
   if (RegInfo->hasBasePointer(MF))
     MRI->setPhysRegUsed(RegInfo->getBaseRegister());
 
+  if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
+    MRI->setPhysRegUsed(AArch64::X9);
+
   // If any callee-saved registers are used, the frame cannot be eliminated.
   unsigned NumGPRSpilled = 0;
   unsigned NumFPRSpilled = 0;
@@ -868,7 +977,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
   // The CSR spill slots have not been allocated yet, so estimateStackSize
   // won't include them.
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
+  unsigned CFSize =
+      MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
   DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
   bool BigStack = (CFSize >= 256);
   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index df3875f..b496fcc 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -22,7 +22,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
 public:
   explicit AArch64FrameLowering()
       : TargetFrameLowering(StackGrowsDown, 16, 0, 16,
-                            false /*StackRealignable*/) {}
+                            true /*StackRealignable*/) {}
 
   void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MBBI,
@@ -34,7 +34,7 @@ public:
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
-  void emitPrologue(MachineFunction &MF) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
   int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index bb2e1e2..78a2021 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -53,12 +53,10 @@ public:
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override {
-    AttributeSet FnAttrs = MF.getFunction()->getAttributes();
     ForCodeSize =
-        FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
-                             Attribute::OptimizeForSize) ||
-        FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
-    Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+        MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
+        MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+    Subtarget = &MF.getSubtarget<AArch64Subtarget>();
     return SelectionDAGISel::runOnMachineFunction(MF);
   }
 
@@ -67,7 +65,7 @@ public:
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                    char ConstraintCode,
+                                    unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
 
   SDNode *SelectMLAV64LaneV128(SDNode *N);
@@ -134,8 +132,8 @@ public:
 
   /// Generic helper for the createDTuple/createQTuple
   /// functions. Those should almost always be called instead.
-  SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
-                      unsigned SubRegs[]);
+  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
+                      const unsigned SubRegs[]);
 
   SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
 
@@ -213,13 +211,20 @@ static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
 }
 
 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
-    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
-  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
-  // Require the address to be in a register.  That is safe for all AArch64
-  // variants and it is hard to do anything much smarter without knowing
-  // how the operand is used.
-  OutOps.push_back(Op);
-  return false;
+    const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+  switch(ConstraintID) {
+  default:
+    llvm_unreachable("Unexpected asm memory constraint");
+  case InlineAsm::Constraint_i:
+  case InlineAsm::Constraint_m:
+  case InlineAsm::Constraint_Q:
+    // Require the address to be in a register.  That is safe for all AArch64
+    // variants and it is hard to do anything much smarter without knowing
+    // how the operand is used.
+    OutOps.push_back(Op);
+    return false;
+  }
+  return true;
 }
 
 /// SelectArithImmed - Select an immediate value that can be represented as
@@ -247,8 +252,9 @@ bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
     return false;
 
   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
-  Val = CurDAG->getTargetConstant(Immed, MVT::i32);
-  Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
+  SDLoc dl(N);
+  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
+  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
   return true;
 }
 
@@ -281,7 +287,8 @@ bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
     return false;
 
   Immed &= 0xFFFFFFULL;
-  return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift);
+  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
+                          Shift);
 }
 
 /// getShiftTypeForNode - Translate a shift node to the corresponding
@@ -301,7 +308,7 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
   }
 }
 
-/// \brief Determine wether it is worth to fold V into an extended register.
+/// \brief Determine whether it is worth to fold V into an extended register.
 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
   // it hurts if the value is used at least twice, unless we are optimizing
   // for code size.
@@ -329,7 +336,7 @@ bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
     unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
 
     Reg = N.getOperand(0);
-    Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
+    Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
     return isWorthFolding(N);
   }
 
@@ -430,6 +437,7 @@ static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
 /// is a lane in the upper half of a 128-bit vector.  Recognize and select this
 /// so that we don't emit unnecessary lane extracts.
 SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
+  SDLoc dl(N);
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
   SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
@@ -446,7 +454,7 @@ SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
       return nullptr;
   }
 
-  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
+  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
 
   SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
 
@@ -469,10 +477,11 @@ SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
     break;
   }
 
-  return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops);
+  return CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops);
 }
 
 SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
+  SDLoc dl(N);
   SDValue SMULLOp0;
   SDValue SMULLOp1;
   int LaneIdx;
@@ -481,7 +490,7 @@ SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
                         LaneIdx))
     return nullptr;
 
-  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
+  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
 
   SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
 
@@ -512,7 +521,7 @@ SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
   } else
     llvm_unreachable("Unrecognized intrinsic.");
 
-  return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops);
+  return CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops);
 }
 
 /// Instructions that accept extend modifiers like UXTW expect the register
@@ -523,9 +532,10 @@ static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
   if (N.getValueType() == MVT::i32)
     return N;
 
-  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+  SDLoc dl(N);
+  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
   MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
-                                               SDLoc(N), MVT::i32, N, SubReg);
+                                               dl, MVT::i32, N, SubReg);
   return SDValue(Node, 0);
 }
 
@@ -565,7 +575,8 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
   // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
   assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
   Reg = narrowIfNeeded(CurDAG, Reg);
-  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);
+  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
+                                    MVT::i32);
   return isWorthFolding(N);
 }
 
@@ -595,11 +606,12 @@ static bool isWorthFoldingADDlow(SDValue N) {
 /// reference, which determines the scale.
 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
                                               SDValue &Base, SDValue &OffImm) {
+  SDLoc dl(N);
   const TargetLowering *TLI = getTargetLowering();
   if (N.getOpcode() == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
-    OffImm = CurDAG->getTargetConstant(0, MVT::i64);
+    OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
     return true;
   }
 
@@ -632,7 +644,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
           Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
         }
-        OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64);
+        OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
         return true;
       }
     }
@@ -648,7 +660,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
   //    add x0, Xbase, #offset
   //    ldr x0, [x0]
   Base = N;
-  OffImm = CurDAG->getTargetConstant(0, MVT::i64);
+  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
   return true;
 }
 
@@ -675,7 +687,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
         const TargetLowering *TLI = getTargetLowering();
         Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
       }
-      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64);
+      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
       return true;
     }
   }
@@ -683,12 +695,12 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
 }
 
 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
-  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+  SDLoc dl(N);
+  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
   SDValue ImpDef = SDValue(
-      CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64),
-      0);
+      CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
   MachineSDNode *Node = CurDAG->getMachineNode(
-      TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg);
+      TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
   return SDValue(Node, 0);
 }
 
@@ -702,6 +714,7 @@ bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
   if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
     return false;
 
+  SDLoc dl(N);
   if (WantExtend) {
     AArch64_AM::ShiftExtendType Ext =
         getExtendTypeForNode(N.getOperand(0), true);
@@ -709,10 +722,11 @@ bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
       return false;
 
     Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
-    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
+    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
+                                           MVT::i32);
   } else {
     Offset = N.getOperand(0);
-    SignExtend = CurDAG->getTargetConstant(0, MVT::i32);
+    SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
   }
 
   unsigned LegalShiftVal = Log2_32(Size);
@@ -735,6 +749,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
     return false;
   SDValue LHS = N.getOperand(0);
   SDValue RHS = N.getOperand(1);
+  SDLoc dl(N);
 
   // We don't want to match immediate adds here, because they are better lowered
   // to the register-immediate addressing modes.
@@ -757,7 +772,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
       SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
     Base = LHS;
-    DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+    DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
     return true;
   }
 
@@ -765,12 +780,12 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
       SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
     Base = RHS;
-    DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+    DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
     return true;
   }
 
   // There was no shift, whatever else we find.
-  DoShift = CurDAG->getTargetConstant(false, MVT::i32);
+  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
 
   AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
   // Try to match an unshifted extend on the LHS.
@@ -779,7 +794,8 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
           AArch64_AM::InvalidShiftExtend) {
     Base = RHS;
     Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
-    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
+    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
+                                           MVT::i32);
     if (isWorthFolding(LHS))
       return true;
   }
@@ -790,7 +806,8 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
           AArch64_AM::InvalidShiftExtend) {
     Base = LHS;
     Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
-    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
+    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
+                                           MVT::i32);
     if (isWorthFolding(RHS))
       return true;
   }
@@ -821,6 +838,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
     return false;
   SDValue LHS = N.getOperand(0);
   SDValue RHS = N.getOperand(1);
+  SDLoc DL(N);
 
   // Check if this particular node is reused in any non-memory related
   // operation.  If yes, do not try to fold this node into the address
@@ -843,7 +861,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
   //     MOV  X0, WideImmediate
   //     LDR  X2, [BaseReg, X0]
   if (isa<ConstantSDNode>(RHS)) {
-    int64_t ImmOff = (int64_t)dyn_cast<ConstantSDNode>(RHS)->getZExtValue();
+    int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
     unsigned Scale = Log2_32(Size);
     // Skip the immediate can be seleced by load/store addressing mode.
     // Also skip the immediate can be encoded by a single ADD (SUB is also
@@ -852,7 +870,6 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
         isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
       return false;
 
-    SDLoc DL(N.getNode());
     SDValue Ops[] = { RHS };
     SDNode *MOVI =
         CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
@@ -868,7 +885,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
       SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
     Base = LHS;
-    DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+    DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
     return true;
   }
 
@@ -876,40 +893,40 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
       SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
     Base = RHS;
-    DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+    DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
     return true;
   }
 
   // Match any non-shifted, non-extend, non-immediate add expression.
   Base = LHS;
   Offset = RHS;
-  SignExtend = CurDAG->getTargetConstant(false, MVT::i32);
-  DoShift = CurDAG->getTargetConstant(false, MVT::i32);
+  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
+  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
   // Reg1 + Reg2 is free: no check needed.
   return true;
 }
 
 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
-  static unsigned RegClassIDs[] = {
+  static const unsigned RegClassIDs[] = {
       AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
-  static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1,
-                                AArch64::dsub2, AArch64::dsub3 };
+  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
+                                     AArch64::dsub2, AArch64::dsub3};
 
   return createTuple(Regs, RegClassIDs, SubRegs);
 }
 
 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
-  static unsigned RegClassIDs[] = {
+  static const unsigned RegClassIDs[] = {
       AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
-  static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1,
-                                AArch64::qsub2, AArch64::qsub3 };
+  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
+                                     AArch64::qsub2, AArch64::qsub3};
 
   return createTuple(Regs, RegClassIDs, SubRegs);
 }
 
 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
-                                         unsigned RegClassIDs[],
-                                         unsigned SubRegs[]) {
+                                         const unsigned RegClassIDs[],
+                                         const unsigned SubRegs[]) {
   // There's no special register-class for a vector-list of 1 element: it's just
   // a vector.
   if (Regs.size() == 1)
@@ -917,18 +934,18 @@ SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
 
   assert(Regs.size() >= 2 && Regs.size() <= 4);
 
-  SDLoc DL(Regs[0].getNode());
+  SDLoc DL(Regs[0]);
 
   SmallVector<SDValue, 4> Ops;
 
   // First operand of REG_SEQUENCE is the desired RegClass.
   Ops.push_back(
-      CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
+      CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
 
   // Then we get pairs of source & subregister-position for the components.
   for (unsigned i = 0; i < Regs.size(); ++i) {
     Ops.push_back(Regs[i]);
-    Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
+    Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
   }
 
   SDNode *N =
@@ -1025,19 +1042,21 @@ SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
   SDValue Base = LD->getBasePtr();
   ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
   int OffsetVal = (int)OffsetOp->getZExtValue();
-  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64);
+  SDLoc dl(N);
+  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
   SDValue Ops[] = { Base, Offset, Chain };
-  SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT,
+  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
                                        MVT::Other, Ops);
   // Either way, we're replacing the node, so tell the caller that.
   Done = true;
   SDValue LoadedVal = SDValue(Res, 1);
   if (InsertTo64) {
-    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
     LoadedVal =
         SDValue(CurDAG->getMachineNode(
-                    AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64,
-                    CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg),
+                    AArch64::SUBREG_TO_REG, dl, MVT::i64,
+                    CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
+                    SubReg),
                 0);
   }
 
@@ -1054,13 +1073,10 @@ SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
   EVT VT = N->getValueType(0);
   SDValue Chain = N->getOperand(0);
 
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(N->getOperand(2)); // Mem operand;
-  Ops.push_back(Chain);
+  SDValue Ops[] = {N->getOperand(2), // Mem operand;
+                   Chain};
 
-  std::vector<EVT> ResTys;
-  ResTys.push_back(MVT::Untyped);
-  ResTys.push_back(MVT::Other);
+  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
 
   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   SDValue SuperReg = SDValue(Ld, 0);
@@ -1078,15 +1094,12 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
   EVT VT = N->getValueType(0);
   SDValue Chain = N->getOperand(0);
 
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(N->getOperand(1)); // Mem operand
-  Ops.push_back(N->getOperand(2)); // Incremental
-  Ops.push_back(Chain);
+  SDValue Ops[] = {N->getOperand(1), // Mem operand
+                   N->getOperand(2), // Incremental
+                   Chain};
 
-  std::vector<EVT> ResTys;
-  ResTys.push_back(MVT::i64); // Type of the write back register
-  ResTys.push_back(MVT::Untyped);
-  ResTys.push_back(MVT::Other);
+  const EVT ResTys[] = {MVT::i64, // Type of the write back register
+                        MVT::Untyped, MVT::Other};
 
   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 
@@ -1117,10 +1130,7 @@ SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
 
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(RegSeq);
-  Ops.push_back(N->getOperand(NumVecs + 2));
-  Ops.push_back(N->getOperand(0));
+  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
 
   return St;
@@ -1130,25 +1140,24 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
                                              unsigned Opc) {
   SDLoc dl(N);
   EVT VT = N->getOperand(2)->getValueType(0);
-  SmallVector<EVT, 2> ResTys;
-  ResTys.push_back(MVT::i64);   // Type of the write back register
-  ResTys.push_back(MVT::Other); // Type for the Chain
+  const EVT ResTys[] = {MVT::i64,    // Type of the write back register
+                        MVT::Other}; // Type for the Chain
 
   // Form a REG_SEQUENCE to force register allocation.
   bool Is128Bit = VT.getSizeInBits() == 128;
   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
 
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(RegSeq);
-  Ops.push_back(N->getOperand(NumVecs + 1)); // base register
-  Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental
-  Ops.push_back(N->getOperand(0)); // Chain
+  SDValue Ops[] = {RegSeq,
+                   N->getOperand(NumVecs + 1), // base register
+                   N->getOperand(NumVecs + 2), // Incremental
+                   N->getOperand(0)};          // Chain
   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 
   return St;
 }
 
+namespace {
 /// WidenVector - Given a value in the V64 register class, produce the
 /// equivalent value in the V128 register class.
 class WidenVector {
@@ -1169,6 +1178,7 @@ public:
     return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
   }
 };
+} // namespace
 
 /// NarrowVector - Given a value in the V128 register class, produce the
 /// equivalent value in the V64 register class.
@@ -1197,18 +1207,13 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
 
   SDValue RegSeq = createQTuple(Regs);
 
-  std::vector<EVT> ResTys;
-  ResTys.push_back(MVT::Untyped);
-  ResTys.push_back(MVT::Other);
+  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
 
   unsigned LaneNo =
       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
 
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(RegSeq);
-  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
-  Ops.push_back(N->getOperand(NumVecs + 3));
-  Ops.push_back(N->getOperand(0));
+  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
+                   N->getOperand(NumVecs + 3), N->getOperand(0)};
   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   SDValue SuperReg = SDValue(Ld, 0);
 
@@ -1242,20 +1247,18 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
 
   SDValue RegSeq = createQTuple(Regs);
 
-  std::vector<EVT> ResTys;
-  ResTys.push_back(MVT::i64); // Type of the write back register
-  ResTys.push_back(MVT::Untyped);
-  ResTys.push_back(MVT::Other);
+  const EVT ResTys[] = {MVT::i64, // Type of the write back register
+                        RegSeq->getValueType(0), MVT::Other};
 
   unsigned LaneNo =
       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
 
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(RegSeq);
-  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number
-  Ops.push_back(N->getOperand(NumVecs + 2)); // Base register
-  Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
-  Ops.push_back(N->getOperand(0));
+  SDValue Ops[] = {RegSeq,
+                   CurDAG->getTargetConstant(LaneNo, dl,
+                                             MVT::i64),         // Lane Number
+                   N->getOperand(NumVecs + 2),                  // Base register
+                   N->getOperand(NumVecs + 3),                  // Incremental
+                   N->getOperand(0)};
   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 
   // Update uses of the write back register
@@ -1303,11 +1306,8 @@ SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
   unsigned LaneNo =
       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
 
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(RegSeq);
-  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
-  Ops.push_back(N->getOperand(NumVecs + 3));
-  Ops.push_back(N->getOperand(0));
+  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
+                   N->getOperand(NumVecs + 3), N->getOperand(0)};
   SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
 
   // Transfer memoperands.
@@ -1333,19 +1333,16 @@ SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
 
   SDValue RegSeq = createQTuple(Regs);
 
-  SmallVector<EVT, 2> ResTys;
-  ResTys.push_back(MVT::i64);   // Type of the write back register
-  ResTys.push_back(MVT::Other);
+  const EVT ResTys[] = {MVT::i64, // Type of the write back register
+                        MVT::Other};
 
   unsigned LaneNo =
       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
 
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(RegSeq);
-  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
-  Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register
-  Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
-  Ops.push_back(N->getOperand(0));
+  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
+                   N->getOperand(NumVecs + 2), // Base Register
+                   N->getOperand(NumVecs + 3), // Incremental
+                   N->getOperand(0)};
   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 
   // Transfer memoperands.
@@ -1424,12 +1421,17 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
   } else
     return false;
 
-  assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) &&
-         "bad amount in shift node!");
+  // Bail out on large immediates. This happens when no proper
+  // combining/constant folding was performed.
+  if (!BiggerPattern && (Srl_imm <= 0 || Srl_imm >= VT.getSizeInBits())) {
+    DEBUG((dbgs() << N
+           << ": Found large shift immediate, this should not happen\n"));
+    return false;
+  }
 
   LSB = Srl_imm;
-  MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm)
-                                  : CountTrailingOnes_64(And_imm)) -
+  MSB = Srl_imm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(And_imm)
+                                  : countTrailingOnes<uint64_t>(And_imm)) -
         1;
   if (ClampMSB)
     // Since we're moving the extend before the right shift operation, we need
@@ -1473,7 +1475,7 @@ static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
     return false;
 
   // Check whether we really have several bits extract here.
-  unsigned BitWide = 64 - CountLeadingOnes_64(~(And_mask >> Srl_imm));
+  unsigned BitWide = 64 - countLeadingOnes(~(And_mask >> Srl_imm));
   if (BitWide && isMask_64(And_mask >> Srl_imm)) {
     if (N->getValueType(0) == MVT::i32)
       Opc = AArch64::UBFMWri;
@@ -1529,7 +1531,14 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
   } else
     return false;
 
-  assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!");
+  // Missing combines/constant folding may have left us with strange
+  // constants.
+  if (Shl_imm >= VT.getSizeInBits()) {
+    DEBUG((dbgs() << N
+           << ": Found large shift immediate, this should not happen\n"));
+    return false;
+  }
+
   uint64_t Srl_imm = 0;
   if (!isIntImmediate(N->getOperand(1), Srl_imm))
     return false;
@@ -1596,23 +1605,24 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
     return nullptr;
 
   EVT VT = N->getValueType(0);
+  SDLoc dl(N);
 
   // If the bit extract operation is 64bit but the original type is 32bit, we
   // need to add one EXTRACT_SUBREG.
   if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
-    SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64),
-                       CurDAG->getTargetConstant(MSB, MVT::i64)};
+    SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, MVT::i64),
+                       CurDAG->getTargetConstant(MSB, dl, MVT::i64)};
 
-    SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64);
-    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+    SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
+    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
     MachineSDNode *Node =
-        CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32,
+        CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i32,
                                SDValue(BFM, 0), SubReg);
     return Node;
   }
 
-  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT),
-                   CurDAG->getTargetConstant(MSB, VT)};
+  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, VT),
+                   CurDAG->getTargetConstant(MSB, dl, VT)};
   return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
 }
 
@@ -1816,6 +1826,7 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
     return Op;
 
   EVT VT = Op.getValueType();
+  SDLoc dl(Op);
   unsigned BitWidth = VT.getSizeInBits();
   unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
 
@@ -1823,16 +1834,16 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
   if (ShlAmount > 0) {
     // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
     ShiftNode = CurDAG->getMachineNode(
-        UBFMOpc, SDLoc(Op), VT, Op,
-        CurDAG->getTargetConstant(BitWidth - ShlAmount, VT),
-        CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT));
+        UBFMOpc, dl, VT, Op,
+        CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
+        CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
   } else {
     // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
     assert(ShlAmount < 0 && "expected right shift");
     int ShrAmount = -ShlAmount;
     ShiftNode = CurDAG->getMachineNode(
-        UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT),
-        CurDAG->getTargetConstant(BitWidth - 1, VT));
+        UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
+        CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
   }
 
   return SDValue(ShiftNode, 0);
@@ -1872,7 +1883,7 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
     return false;
 
   ShiftAmount = countTrailingZeros(NonZeroBits);
-  MaskWidth = CountTrailingOnes_64(NonZeroBits >> ShiftAmount);
+  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
 
   // BFI encompasses sufficiently many nodes that it's worth inserting an extra
   // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
@@ -1997,10 +2008,11 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
     return nullptr;
 
   EVT VT = N->getValueType(0);
+  SDLoc dl(N);
   SDValue Ops[] = { Opd0,
                     Opd1,
-                    CurDAG->getTargetConstant(LSB, VT),
-                    CurDAG->getTargetConstant(MSB, VT) };
+                    CurDAG->getTargetConstant(LSB, dl, VT),
+                    CurDAG->getTargetConstant(MSB, dl, VT) };
   return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
 }
 
@@ -2098,7 +2110,7 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
   // finding FBits, but it must still be in range.
   if (FBits == 0 || FBits > RegWidth) return false;
 
-  FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32);
+  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
   return true;
 }
 
@@ -2213,8 +2225,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
     unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
     const TargetLowering *TLI = getTargetLowering();
     SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
-    SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
-                      CurDAG->getTargetConstant(Shifter, MVT::i32) };
+    SDLoc DL(Node);
+    SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
+                      CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
     return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
   }
   case ISD::INTRINSIC_W_CHAIN: {
@@ -2250,11 +2263,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
       SDValue MemAddr = Node->getOperand(4);
 
       // Place arguments in the right order.
-      SmallVector<SDValue, 7> Ops;
-      Ops.push_back(ValLo);
-      Ops.push_back(ValHi);
-      Ops.push_back(MemAddr);
-      Ops.push_back(Chain);
+      SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
 
       SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
       // Transfer memoperands.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6458d56..e6108c3 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
@@ -75,10 +76,9 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
     cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
     cl::init(false));
 
-
-AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
-    : TargetLowering(TM) {
-  Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
+                                             const AArch64Subtarget &STI)
+    : TargetLowering(TM), Subtarget(&STI) {
 
   // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
   // we have to make something up. Arbitrarily, choose ZeroOrOne.
@@ -120,7 +120,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
   }
 
   // Compute derived properties from the register classes
-  computeRegisterProperties();
+  computeRegisterProperties(Subtarget->getRegisterInfo());
 
   // Provide all sorts of operation actions
   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
@@ -282,14 +282,39 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
 
-  // f16 is storage-only, so we promote operations to f32 if we know this is
-  // valid, and ignore them otherwise. The operations not mentioned here will
-  // fail to select, but this is not a major problem as no source language
-  // should be emitting native f16 operations yet.
-  setOperationAction(ISD::FADD, MVT::f16, Promote);
-  setOperationAction(ISD::FDIV, MVT::f16, Promote);
-  setOperationAction(ISD::FMUL, MVT::f16, Promote);
-  setOperationAction(ISD::FSUB, MVT::f16, Promote);
+  // f16 is a storage-only type, always promote it to f32.
+  setOperationAction(ISD::SETCC,       MVT::f16,  Promote);
+  setOperationAction(ISD::BR_CC,       MVT::f16,  Promote);
+  setOperationAction(ISD::SELECT_CC,   MVT::f16,  Promote);
+  setOperationAction(ISD::SELECT,      MVT::f16,  Promote);
+  setOperationAction(ISD::FADD,        MVT::f16,  Promote);
+  setOperationAction(ISD::FSUB,        MVT::f16,  Promote);
+  setOperationAction(ISD::FMUL,        MVT::f16,  Promote);
+  setOperationAction(ISD::FDIV,        MVT::f16,  Promote);
+  setOperationAction(ISD::FREM,        MVT::f16,  Promote);
+  setOperationAction(ISD::FMA,         MVT::f16,  Promote);
+  setOperationAction(ISD::FNEG,        MVT::f16,  Promote);
+  setOperationAction(ISD::FABS,        MVT::f16,  Promote);
+  setOperationAction(ISD::FCEIL,       MVT::f16,  Promote);
+  setOperationAction(ISD::FCOPYSIGN,   MVT::f16,  Promote);
+  setOperationAction(ISD::FCOS,        MVT::f16,  Promote);
+  setOperationAction(ISD::FFLOOR,      MVT::f16,  Promote);
+  setOperationAction(ISD::FNEARBYINT,  MVT::f16,  Promote);
+  setOperationAction(ISD::FPOW,        MVT::f16,  Promote);
+  setOperationAction(ISD::FPOWI,       MVT::f16,  Promote);
+  setOperationAction(ISD::FRINT,       MVT::f16,  Promote);
+  setOperationAction(ISD::FSIN,        MVT::f16,  Promote);
+  setOperationAction(ISD::FSINCOS,     MVT::f16,  Promote);
+  setOperationAction(ISD::FSQRT,       MVT::f16,  Promote);
+  setOperationAction(ISD::FEXP,        MVT::f16,  Promote);
+  setOperationAction(ISD::FEXP2,       MVT::f16,  Promote);
+  setOperationAction(ISD::FLOG,        MVT::f16,  Promote);
+  setOperationAction(ISD::FLOG2,       MVT::f16,  Promote);
+  setOperationAction(ISD::FLOG10,      MVT::f16,  Promote);
+  setOperationAction(ISD::FROUND,      MVT::f16,  Promote);
+  setOperationAction(ISD::FTRUNC,      MVT::f16,  Promote);
+  setOperationAction(ISD::FMINNUM,     MVT::f16,  Promote);
+  setOperationAction(ISD::FMAXNUM,     MVT::f16,  Promote);
 
   // v4f16 is also a storage-only type, so promote it to v4f32 when that is
   // known to be safe.
@@ -371,9 +396,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
   setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
 
   // AArch64 has implementations of a lot of rounding-like FP operations.
-  static MVT RoundingTypes[] = { MVT::f32, MVT::f64};
-  for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) {
-    MVT Ty = RoundingTypes[I];
+  for (MVT Ty : {MVT::f32, MVT::f64}) {
     setOperationAction(ISD::FFLOOR, Ty, Legal);
     setOperationAction(ISD::FNEARBYINT, Ty, Legal);
     setOperationAction(ISD::FCEIL, Ty, Legal);
@@ -469,6 +492,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
 
   setTargetDAGCombine(ISD::SELECT);
   setTargetDAGCombine(ISD::VSELECT);
+  setTargetDAGCombine(ISD::SELECT_CC);
 
   setTargetDAGCombine(ISD::INTRINSIC_VOID);
   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
@@ -484,6 +508,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
 
   // Enable TBZ/TBNZ
   MaskAndBranchFoldingIsLegal = true;
+  EnableExtLdPromotion = true;
 
   setMinFunctionAlignment(2);
 
@@ -534,11 +559,21 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
     setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
     setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
     setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
+    // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
+    // -> v8f16 conversions.
+    setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote);
     // Similarly, there is no direct i32 -> f64 vector conversion instruction.
     setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
     setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
+    // Or, direct i32 -> f16 vector conversion.  Set it so custom, so the
+    // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
+    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
 
     // AArch64 doesn't have MUL.2d:
     setOperationAction(ISD::MUL, MVT::v2i64, Expand);
@@ -570,9 +605,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
     }
 
     // AArch64 has implementations of a lot of rounding-like FP operations.
-    static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 };
-    for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) {
-      MVT Ty = RoundingVecTypes[I];
+    for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
       setOperationAction(ISD::FFLOOR, Ty, Legal);
       setOperationAction(ISD::FNEARBYINT, Ty, Legal);
       setOperationAction(ISD::FCEIL, Ty, Legal);
@@ -647,6 +680,12 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
   setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
   setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
 
+  // [SU][MIN|MAX] are available for all NEON types apart from i64.
+  if (!VT.isFloatingPoint() &&
+      VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
+    for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+      setOperationAction(Opcode, VT.getSimpleVT(), Legal);
+
   if (Subtarget->isLittleEndian()) {
     for (unsigned im = (unsigned)ISD::PRE_INC;
          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
@@ -739,13 +778,6 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
   return MVT::i64;
 }
 
-unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
-  // FIXME: On AArch64, this depends on the type.
-  // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
-  // and the offset has to be a multiple of the related size in bytes.
-  return 4095;
-}
-
 FastISel *
 AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
                                       const TargetLibraryInfo *libInfo) const {
@@ -753,9 +785,8 @@ AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
 }
 
 const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default:
-    return nullptr;
+  switch ((AArch64ISD::NodeType)Opcode) {
+  case AArch64ISD::FIRST_NUMBER:      break;
   case AArch64ISD::CALL:              return "AArch64ISD::CALL";
   case AArch64ISD::ADRP:              return "AArch64ISD::ADRP";
   case AArch64ISD::ADDlow:            return "AArch64ISD::ADDlow";
@@ -827,6 +858,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case AArch64ISD::FCMGTz:            return "AArch64ISD::FCMGTz";
   case AArch64ISD::FCMLEz:            return "AArch64ISD::FCMLEz";
   case AArch64ISD::FCMLTz:            return "AArch64ISD::FCMLTz";
+  case AArch64ISD::SADDV:             return "AArch64ISD::SADDV";
+  case AArch64ISD::UADDV:             return "AArch64ISD::UADDV";
+  case AArch64ISD::SMINV:             return "AArch64ISD::SMINV";
+  case AArch64ISD::UMINV:             return "AArch64ISD::UMINV";
+  case AArch64ISD::SMAXV:             return "AArch64ISD::SMAXV";
+  case AArch64ISD::UMAXV:             return "AArch64ISD::UMAXV";
   case AArch64ISD::NOT:               return "AArch64ISD::NOT";
   case AArch64ISD::BIT:               return "AArch64ISD::BIT";
   case AArch64ISD::CBZ:               return "AArch64ISD::CBZ";
@@ -834,6 +871,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case AArch64ISD::TBZ:               return "AArch64ISD::TBZ";
   case AArch64ISD::TBNZ:              return "AArch64ISD::TBNZ";
   case AArch64ISD::TC_RETURN:         return "AArch64ISD::TC_RETURN";
+  case AArch64ISD::PREFETCH:          return "AArch64ISD::PREFETCH";
   case AArch64ISD::SITOF:             return "AArch64ISD::SITOF";
   case AArch64ISD::UITOF:             return "AArch64ISD::UITOF";
   case AArch64ISD::NVCAST:            return "AArch64ISD::NVCAST";
@@ -869,6 +907,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case AArch64ISD::SMULL:             return "AArch64ISD::SMULL";
   case AArch64ISD::UMULL:             return "AArch64ISD::UMULL";
   }
+  return nullptr;
 }
 
 MachineBasicBlock *
@@ -886,9 +925,8 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
   // EndBB:
   //     Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
 
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
   MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
   DebugLoc DL = MI->getDebugLoc();
   MachineFunction::iterator It = MBB;
@@ -1151,7 +1189,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
              isLegalArithImmed(C - 1ULL))) {
           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
           C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
-          RHS = DAG.getConstant(C, VT);
+          RHS = DAG.getConstant(C, dl, VT);
         }
         break;
       case ISD::SETULT:
@@ -1161,7 +1199,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
             (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
           C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
-          RHS = DAG.getConstant(C, VT);
+          RHS = DAG.getConstant(C, dl, VT);
         }
         break;
       case ISD::SETLE:
@@ -1172,7 +1210,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
              isLegalArithImmed(C + 1ULL))) {
           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
           C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
-          RHS = DAG.getConstant(C, VT);
+          RHS = DAG.getConstant(C, dl, VT);
         }
         break;
       case ISD::SETULE:
@@ -1183,7 +1221,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
              isLegalArithImmed(C + 1ULL))) {
           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
           C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
-          RHS = DAG.getConstant(C, VT);
+          RHS = DAG.getConstant(C, dl, VT);
         }
         break;
       }
@@ -1217,10 +1255,11 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
               DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
                           DAG.getValueType(MVT::i16));
           Cmp = emitComparison(SExt,
-                               DAG.getConstant(ValueofRHS, RHS.getValueType()),
+                               DAG.getConstant(ValueofRHS, dl,
+                                               RHS.getValueType()),
                                CC, dl, DAG);
           AArch64CC = changeIntCCToAArch64CC(CC);
-          AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
+          AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
           return Cmp;
         }
       }
@@ -1228,7 +1267,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   }
   Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
   AArch64CC = changeIntCCToAArch64CC(CC);
-  AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
+  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
   return Cmp;
 }
 
@@ -1264,7 +1303,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
   case ISD::SMULO:
   case ISD::UMULO: {
     CC = AArch64CC::NE;
-    bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false;
+    bool IsSigned = Op.getOpcode() == ISD::SMULO;
     if (Op.getValueType() == MVT::i32) {
       unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
       // For a 32 bit multiply with overflow check we want the instruction
@@ -1275,7 +1314,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
       RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
       SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
       SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
-                                DAG.getConstant(0, MVT::i64));
+                                DAG.getConstant(0, DL, MVT::i64));
       // On AArch64 the upper 32 bits are always zero extended for a 32 bit
       // operation. We need to clear out the upper 32 bits, because we used a
       // widening multiply that wrote all 64 bits. In the end this should be a
@@ -1288,10 +1327,10 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
         // check we have to arithmetic shift right the 32nd bit of the result by
         // 31 bits. Then we compare the result to the upper 32 bits.
         SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
-                                        DAG.getConstant(32, MVT::i64));
+                                        DAG.getConstant(32, DL, MVT::i64));
         UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
         SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
-                                        DAG.getConstant(31, MVT::i64));
+                                        DAG.getConstant(31, DL, MVT::i64));
         // It is important that LowerBits is last, otherwise the arithmetic
         // shift will not be folded into the compare (SUBS).
         SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
@@ -1304,10 +1343,11 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
         // pattern:
         // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
         SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
-                                        DAG.getConstant(32, MVT::i64));
+                                        DAG.getConstant(32, DL, MVT::i64));
         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
         Overflow =
-            DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
+            DAG.getNode(AArch64ISD::SUBS, DL, VTs,
+                        DAG.getConstant(0, DL, MVT::i64),
                         UpperBits).getValue(1);
       }
       break;
@@ -1318,7 +1358,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
     if (IsSigned) {
       SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
       SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
-                                      DAG.getConstant(63, MVT::i64));
+                                      DAG.getConstant(63, DL, MVT::i64));
       // It is important that LowerBits is last, otherwise the arithmetic
       // shift will not be folded into the compare (SUBS).
       SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
@@ -1328,7 +1368,8 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
       SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
       SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
       Overflow =
-          DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
+          DAG.getNode(AArch64ISD::SUBS, DL, VTs,
+                      DAG.getConstant(0, DL, MVT::i64),
                       UpperBits).getValue(1);
     }
     break;
@@ -1347,10 +1388,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
 
 SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
                                              RTLIB::Libcall Call) const {
-  SmallVector<SDValue, 2> Ops;
-  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
-    Ops.push_back(Op.getOperand(i));
-
+  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
   return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false,
                      SDLoc(Op)).first;
 }
@@ -1405,7 +1443,7 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
 
     FVal = Other;
     TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
-                       DAG.getConstant(-1ULL, Other.getValueType()));
+                       DAG.getConstant(-1ULL, dl, Other.getValueType()));
 
     return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
                        CCVal, Cmp);
@@ -1455,24 +1493,25 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
   if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
     return SDValue();
 
+  SDLoc dl(Op);
   AArch64CC::CondCode CC;
   // The actual operation that sets the overflow or carry flag.
   SDValue Value, Overflow;
   std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
 
   // We use 0 and 1 as false and true values.
-  SDValue TVal = DAG.getConstant(1, MVT::i32);
-  SDValue FVal = DAG.getConstant(0, MVT::i32);
+  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
 
   // We use an inverted condition, because the conditional select is inverted
   // too. This will allow it to be selected to a single instruction:
   // CSINC Wd, WZR, WZR, invert(cond).
-  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), MVT::i32);
-  Overflow = DAG.getNode(AArch64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal,
+  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
+  Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
                          CCVal, Overflow);
 
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
-  return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
+  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
 }
 
 // Prefetch operands are:
@@ -1503,7 +1542,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
                    (Locality << 1) |    // Cache level bits
                    (unsigned)IsStream;  // Stream bit
   return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
-                     DAG.getConstant(PrfOp, MVT::i32), Op.getOperand(1));
+                     DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
 }
 
 SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
@@ -1567,6 +1606,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
   if (Op.getOperand(0).getValueType().isVector())
     return LowerVectorFP_TO_INT(Op, DAG);
 
+  // f16 conversions are promoted to f32.
+  if (Op.getOperand(0).getValueType() == MVT::f16) {
+    SDLoc dl(Op);
+    return DAG.getNode(
+        Op.getOpcode(), dl, Op.getValueType(),
+        DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
+  }
+
   if (Op.getOperand(0).getValueType() != MVT::f128) {
     // It's legal except when f128 is involved
     return Op;
@@ -1578,10 +1625,7 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
   else
     LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
 
-  SmallVector<SDValue, 2> Ops;
-  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
-    Ops.push_back(Op.getOperand(i));
-
+  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
   return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false,
                      SDLoc(Op)).first;
 }
@@ -1600,7 +1644,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
         MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
                          InVT.getVectorNumElements());
     In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
-    return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0));
+    return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
   }
 
   if (VT.getSizeInBits() > InVT.getSizeInBits()) {
@@ -1619,6 +1663,15 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
   if (Op.getValueType().isVector())
     return LowerVectorINT_TO_FP(Op, DAG);
 
+  // f16 conversions are promoted to f32.
+  if (Op.getValueType() == MVT::f16) {
+    SDLoc dl(Op);
+    return DAG.getNode(
+        ISD::FP_ROUND, dl, MVT::f16,
+        DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
+        DAG.getIntPtrConstant(0, dl));
+  }
+
   // i128 conversions are libcalls.
   if (Op.getOperand(0).getValueType() == MVT::i128)
     return SDValue();
@@ -1679,7 +1732,7 @@ static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
   Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
   return SDValue(
       DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
-                         DAG.getTargetConstant(AArch64::hsub, MVT::i32)),
+                         DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
       0);
 }
 
@@ -1753,6 +1806,7 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
 
   assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
   EVT VT = N->getValueType(0);
+  SDLoc dl(N);
   unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
   unsigned NumElts = VT.getVectorNumElements();
   MVT TruncVT = MVT::getIntegerVT(EltSize);
@@ -1762,9 +1816,9 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
     const APInt &CInt = C->getAPIntValue();
     // Element types smaller than 32 bits are not legal, so use i32 elements.
     // The values are implicitly truncated so sext vs. zext doesn't matter.
-    Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
+    Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
   }
-  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+  return DAG.getNode(ISD::BUILD_VECTOR, dl,
                      MVT::getVectorVT(TruncVT, NumElts), Ops);
 }
 
@@ -2219,8 +2273,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
                                           AArch64::X3, AArch64::X4, AArch64::X5,
                                           AArch64::X6, AArch64::X7 };
   static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
-  unsigned FirstVariadicGPR =
-      CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs);
+  unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
 
   unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
   int GPRIdx = 0;
@@ -2237,7 +2290,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
                        MachinePointerInfo::getStack(i * 8), false, false, 0);
       MemOps.push_back(Store);
       FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
-                        DAG.getConstant(8, getPointerTy()));
+                        DAG.getConstant(8, DL, getPointerTy()));
     }
   }
   FuncInfo->setVarArgsGPRIndex(GPRIdx);
@@ -2248,8 +2301,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
         AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
         AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
     static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
-    unsigned FirstVariadicFPR =
-        CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs);
+    unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
 
     unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
     int FPRIdx = 0;
@@ -2267,7 +2319,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
                          MachinePointerInfo::getStack(i * 16), false, false, 0);
         MemOps.push_back(Store);
         FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
-                          DAG.getConstant(16, getPointerTy()));
+                          DAG.getConstant(16, DL, getPointerTy()));
       }
     }
     FuncInfo->setVarArgsFPRIndex(FPRIdx);
@@ -2619,8 +2671,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
   if (!IsSibCall)
-    Chain =
-        DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), DL);
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, DL,
+                                                              true),
+                                 DL);
 
   SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy());
 
@@ -2690,7 +2743,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
       }
       unsigned LocMemOffset = VA.getLocMemOffset();
       int32_t Offset = LocMemOffset + BEAlign;
-      SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+      SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
       PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
 
       if (IsTailCall) {
@@ -2705,7 +2758,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
         // clobbered.
         Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
       } else {
-        SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+        SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
 
         DstAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
         DstInfo = MachinePointerInfo::getStack(LocMemOffset);
@@ -2713,11 +2766,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
       if (Outs[i].Flags.isByVal()) {
         SDValue SizeNode =
-            DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64);
+            DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
         SDValue Cpy = DAG.getMemcpy(
             Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
-            /*isVol = */ false,
-            /*AlwaysInline = */ false, DstInfo, MachinePointerInfo());
+            /*isVol = */ false, /*AlwaysInline = */ false,
+            /*isTailCall = */ false,
+            DstInfo, MachinePointerInfo());
 
         MemOpChains.push_back(Cpy);
       } else {
@@ -2782,8 +2836,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   // we've carefully laid out the parameters so that when sp is reset they'll be
   // in the correct location.
   if (IsTailCall && !IsSibCall) {
-    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                               DAG.getIntPtrConstant(0, true), InFlag, DL);
+    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
+                               DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
     InFlag = Chain.getValue(1);
   }
 
@@ -2795,7 +2849,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     // Each tail call may have to adjust the stack by a different amount, so
     // this information must travel along with the operation for eventual
     // consumption by emitEpilogue.
-    Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
+    Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
   }
 
   // Add argument registers to the end of the list so that they are known live
@@ -2806,19 +2860,16 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   // Add a register mask operand representing the call-preserved registers.
   const uint32_t *Mask;
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const AArch64RegisterInfo *ARI =
-      static_cast<const AArch64RegisterInfo *>(TRI);
+  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
   if (IsThisReturn) {
     // For 'this' returns, use the X0-preserving mask if applicable
-    Mask = ARI->getThisReturnPreservedMask(CallConv);
+    Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
     if (!Mask) {
       IsThisReturn = false;
-      Mask = ARI->getCallPreservedMask(CallConv);
+      Mask = TRI->getCallPreservedMask(MF, CallConv);
     }
   } else
-    Mask = ARI->getCallPreservedMask(CallConv);
+    Mask = TRI->getCallPreservedMask(MF, CallConv);
 
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
@@ -2830,8 +2881,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   // If we're doing a tall call, use a TC_RETURN here rather than an
   // actual call instruction.
-  if (IsTailCall)
+  if (IsTailCall) {
+    MF.getFrameInfo()->setHasTailCall();
     return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
+  }
 
   // Returns a chain and a flag for retval copy to use.
   Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
@@ -2841,8 +2894,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
                                 ? RoundUpToAlignment(NumBytes, 16)
                                 : 0;
 
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(CalleePopBytes, true),
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
+                             DAG.getIntPtrConstant(CalleePopBytes, DL, true),
                              InFlag, DL);
   if (!Ins.empty())
     InFlag = Chain.getValue(1);
@@ -2958,7 +3011,7 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
                                      /*isInvariant=*/ true, 8);
     if (GN->getOffset() != 0)
       return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalAddr,
-                         DAG.getConstant(GN->getOffset(), PtrVT));
+                         DAG.getConstant(GN->getOffset(), DL, PtrVT));
     return GlobalAddr;
   }
 
@@ -3038,11 +3091,8 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
   // TLS calls preserve all registers except those that absolutely must be
   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
   // silly).
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const AArch64RegisterInfo *ARI =
-      static_cast<const AArch64RegisterInfo *>(TRI);
-  const uint32_t *Mask = ARI->getTLSCallPreservedMask();
+  const uint32_t *Mask =
+      Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
 
   // Finally, we can make the call. This is just a degenerate version of a
   // normal AArch64 call node: x0 takes the address of the descriptor, and
@@ -3125,11 +3175,13 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
 
     SDValue TPWithOff_lo =
         SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
-                                   HiVar, DAG.getTargetConstant(0, MVT::i32)),
+                                   HiVar,
+                                   DAG.getTargetConstant(0, DL, MVT::i32)),
                 0);
     SDValue TPWithOff =
         SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
-                                   LoVar, DAG.getTargetConstant(0, MVT::i32)),
+                                   LoVar,
+                                   DAG.getTargetConstant(0, DL, MVT::i32)),
                 0);
     return TPWithOff;
   } else if (Model == TLSModel::InitialExec) {
@@ -3165,10 +3217,10 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
         AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 
     TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
-                                       DAG.getTargetConstant(0, MVT::i32)),
+                                       DAG.getTargetConstant(0, DL, MVT::i32)),
                     0);
     TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
-                                       DAG.getTargetConstant(0, MVT::i32)),
+                                       DAG.getTargetConstant(0, DL, MVT::i32)),
                     0);
   } else if (Model == TLSModel::GeneralDynamic) {
     // The call needs a relocation too for linker relaxation. It doesn't make
@@ -3211,7 +3263,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
     // If softenSetCCOperands returned a scalar, we need to compare the result
     // against zero to select between true and false values.
     if (!RHS.getNode()) {
-      RHS = DAG.getConstant(0, LHS.getValueType());
+      RHS = DAG.getConstant(0, dl, LHS.getValueType());
       CC = ISD::SETNE;
     }
   }
@@ -3236,10 +3288,10 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
 
     if (CC == ISD::SETNE)
       OFCC = getInvertedCondCode(OFCC);
-    SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
+    SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
 
-    return DAG.getNode(AArch64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest,
-                       CCVal, Overflow);
+    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+                       Overflow);
   }
 
   if (LHS.getValueType().isInteger()) {
@@ -3261,7 +3313,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
           SDValue Test = LHS.getOperand(0);
           uint64_t Mask = LHS.getConstantOperandVal(1);
           return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
-                             DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
+                             DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
+                             Dest);
         }
 
         return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
@@ -3276,7 +3329,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
           SDValue Test = LHS.getOperand(0);
           uint64_t Mask = LHS.getConstantOperandVal(1);
           return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
-                             DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
+                             DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
+                             Dest);
         }
 
         return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
@@ -3286,7 +3340,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
         // becomes redundant.  This would also increase register pressure.
         uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
         return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
-                           DAG.getConstant(Mask, MVT::i64), Dest);
+                           DAG.getConstant(Mask, dl, MVT::i64), Dest);
       }
     }
     if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
@@ -3296,7 +3350,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
       // becomes redundant.  This would also increase register pressure.
       uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
       return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
-                         DAG.getConstant(Mask, MVT::i64), Dest);
+                         DAG.getConstant(Mask, dl, MVT::i64), Dest);
     }
 
     SDValue CCVal;
@@ -3312,11 +3366,11 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
   AArch64CC::CondCode CC1, CC2;
   changeFPCCToAArch64CC(CC, CC1, CC2);
-  SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
   SDValue BR1 =
       DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
   if (CC2 != AArch64CC::AL) {
-    SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+    SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
     return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
                        Cmp);
   }
@@ -3336,7 +3390,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
     if (SrcVT == MVT::f32 && VT == MVT::f64)
       In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
     else if (SrcVT == MVT::f64 && VT == MVT::f32)
-      In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0));
+      In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2,
+                        DAG.getIntPtrConstant(0, DL));
     else
       // FIXME: Src type is different, bail out for now. Can VT really be a
       // vector type?
@@ -3345,11 +3400,12 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
 
   EVT VecVT;
   EVT EltVT;
-  SDValue EltMask, VecVal1, VecVal2;
+  uint64_t EltMask;
+  SDValue VecVal1, VecVal2;
   if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
     EltVT = MVT::i32;
     VecVT = MVT::v4i32;
-    EltMask = DAG.getConstant(0x80000000ULL, EltVT);
+    EltMask = 0x80000000ULL;
 
     if (!VT.isVector()) {
       VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
@@ -3367,7 +3423,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
     // We want to materialize a mask with the the high bit set, but the AdvSIMD
     // immediate moves cannot materialize that in a single instruction for
     // 64-bit elements. Instead, materialize zero and then negate it.
-    EltMask = DAG.getConstant(0, EltVT);
+    EltMask = 0;
 
     if (!VT.isVector()) {
       VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
@@ -3382,11 +3438,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
     llvm_unreachable("Invalid type for copysign!");
   }
 
-  std::vector<SDValue> BuildVectorOps;
-  for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i)
-    BuildVectorOps.push_back(EltMask);
-
-  SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, BuildVectorOps);
+  SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
 
   // If we couldn't materialize the mask above, then the mask vector will be
   // the zero vector, and we need to negate it here.
@@ -3408,8 +3460,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
 }
 
 SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
-  if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
-          AttributeSet::FunctionIndex, Attribute::NoImplicitFloat))
+  if (DAG.getMachineFunction().getFunction()->hasFnAttribute(
+          Attribute::NoImplicitFloat))
     return SDValue();
 
   if (!Subtarget->hasNEON())
@@ -3426,21 +3478,15 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
   SDValue Val = Op.getOperand(0);
   SDLoc DL(Op);
   EVT VT = Op.getValueType();
-  SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8);
 
-  SDValue VecVal;
-  if (VT == MVT::i32) {
-    VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
-    VecVal = DAG.getTargetInsertSubreg(AArch64::ssub, DL, MVT::v8i8, ZeroVec,
-                                       VecVal);
-  } else {
-    VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
-  }
+  if (VT == MVT::i32)
+    Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+  Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
 
-  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal);
+  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
   SDValue UaddLV = DAG.getNode(
       ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
-      DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, MVT::i32), CtPop);
+      DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
 
   if (VT == MVT::i64)
     UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
@@ -3459,8 +3505,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 
   // We chose ZeroOrOneBooleanContents, so use zero and one.
   EVT VT = Op.getValueType();
-  SDValue TVal = DAG.getConstant(1, VT);
-  SDValue FVal = DAG.getConstant(0, VT);
+  SDValue TVal = DAG.getConstant(1, dl, VT);
+  SDValue FVal = DAG.getConstant(0, dl, VT);
 
   // Handle f128 first, since one possible outcome is a normal integer
   // comparison which gets picked up by the next if statement.
@@ -3497,7 +3543,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   changeFPCCToAArch64CC(CC, CC1, CC2);
   if (CC2 == AArch64CC::AL) {
     changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
-    SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+    SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
 
     // Note that we inverted the condition above, so we reverse the order of
     // the true and false operands here.  This will allow the setcc to be
@@ -3510,11 +3556,11 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
     // of the first as the RHS.  We're effectively OR'ing the two CC's together.
 
     // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
-    SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+    SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
     SDValue CS1 =
         DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
 
-    SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+    SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
     return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
   }
 }
@@ -3529,7 +3575,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 /// operations would *not* be semantically equivalent.
 static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
   if (Cmp == Result)
-    return true;
+    return (Cmp.getValueType() == MVT::f32 ||
+            Cmp.getValueType() == MVT::f64);
 
   ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
   ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
@@ -3544,49 +3591,10 @@ static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
   return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
 }
 
-SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
-                                           SelectionDAG &DAG) const {
-  SDValue CC = Op->getOperand(0);
-  SDValue TVal = Op->getOperand(1);
-  SDValue FVal = Op->getOperand(2);
-  SDLoc DL(Op);
-
-  unsigned Opc = CC.getOpcode();
-  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
-  // instruction.
-  if (CC.getResNo() == 1 &&
-      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
-       Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
-    // Only lower legal XALUO ops.
-    if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0)))
-      return SDValue();
-
-    AArch64CC::CondCode OFCC;
-    SDValue Value, Overflow;
-    std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG);
-    SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
-
-    return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
-                       CCVal, Overflow);
-  }
-
-  if (CC.getOpcode() == ISD::SETCC)
-    return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal,
-                           cast<CondCodeSDNode>(CC.getOperand(2))->get());
-  else
-    return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal,
-                           FVal, ISD::SETNE);
-}
-
-SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
+                                              SDValue RHS, SDValue TVal,
+                                              SDValue FVal, SDLoc dl,
                                               SelectionDAG &DAG) const {
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
-  SDValue LHS = Op.getOperand(0);
-  SDValue RHS = Op.getOperand(1);
-  SDValue TVal = Op.getOperand(2);
-  SDValue FVal = Op.getOperand(3);
-  SDLoc dl(Op);
-
   // Handle f128 first, because it will result in a comparison of some RTLIB
   // call result against zero.
   if (LHS.getValueType() == MVT::f128) {
@@ -3595,7 +3603,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
     // If softenSetCCOperands returned a scalar, we need to compare the result
     // against zero to select between true and false values.
     if (!RHS.getNode()) {
-      RHS = DAG.getConstant(0, LHS.getValueType());
+      RHS = DAG.getConstant(0, dl, LHS.getValueType());
       CC = ISD::SETNE;
     }
   }
@@ -3694,67 +3702,27 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
     SDValue CCVal;
     SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
 
-    EVT VT = Op.getValueType();
+    EVT VT = TVal.getValueType();
     return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
   }
 
   // Now we know we're dealing with FP values.
   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
   assert(LHS.getValueType() == RHS.getValueType());
-  EVT VT = Op.getValueType();
-
-  // Try to match this select into a max/min operation, which have dedicated
-  // opcode in the instruction set.
-  // FIXME: This is not correct in the presence of NaNs, so we only enable this
-  // in no-NaNs mode.
-  if (getTargetMachine().Options.NoNaNsFPMath) {
-    SDValue MinMaxLHS = TVal, MinMaxRHS = FVal;
-    if (selectCCOpsAreFMaxCompatible(LHS, MinMaxRHS) &&
-        selectCCOpsAreFMaxCompatible(RHS, MinMaxLHS)) {
-      CC = ISD::getSetCCSwappedOperands(CC);
-      std::swap(MinMaxLHS, MinMaxRHS);
-    }
-
-    if (selectCCOpsAreFMaxCompatible(LHS, MinMaxLHS) &&
-        selectCCOpsAreFMaxCompatible(RHS, MinMaxRHS)) {
-      switch (CC) {
-      default:
-        break;
-      case ISD::SETGT:
-      case ISD::SETGE:
-      case ISD::SETUGT:
-      case ISD::SETUGE:
-      case ISD::SETOGT:
-      case ISD::SETOGE:
-        return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS);
-        break;
-      case ISD::SETLT:
-      case ISD::SETLE:
-      case ISD::SETULT:
-      case ISD::SETULE:
-      case ISD::SETOLT:
-      case ISD::SETOLE:
-        return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS);
-        break;
-      }
-    }
-  }
-
-  // If that fails, we'll need to perform an FCMP + CSEL sequence.  Go ahead
-  // and do the comparison.
+  EVT VT = TVal.getValueType();
   SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
 
   // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
   // clean.  Some of them require two CSELs to implement.
   AArch64CC::CondCode CC1, CC2;
   changeFPCCToAArch64CC(CC, CC1, CC2);
-  SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
   SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
 
   // If we need a second CSEL, emit it, using the output of the first as the
   // RHS.  We're effectively OR'ing the two CC's together.
   if (CC2 != AArch64CC::AL) {
-    SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+    SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
     return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
   }
 
@@ -3762,6 +3730,58 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
   return CS1;
 }
 
+SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue TVal = Op.getOperand(2);
+  SDValue FVal = Op.getOperand(3);
+  SDLoc DL(Op);
+  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
+SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDValue CCVal = Op->getOperand(0);
+  SDValue TVal = Op->getOperand(1);
+  SDValue FVal = Op->getOperand(2);
+  SDLoc DL(Op);
+
+  unsigned Opc = CCVal.getOpcode();
+  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
+  // instruction.
+  if (CCVal.getResNo() == 1 &&
+      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+       Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
+    // Only lower legal XALUO ops.
+    if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
+      return SDValue();
+
+    AArch64CC::CondCode OFCC;
+    SDValue Value, Overflow;
+    std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
+    SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
+
+    return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
+                       CCVal, Overflow);
+  }
+
+  // Lower it the same way as we would lower a SELECT_CC node.
+  ISD::CondCode CC;
+  SDValue LHS, RHS;
+  if (CCVal.getOpcode() == ISD::SETCC) {
+    LHS = CCVal.getOperand(0);
+    RHS = CCVal.getOperand(1);
+    CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
+  } else {
+    LHS = CCVal;
+    RHS = DAG.getConstant(0, DL, CCVal.getValueType());
+    CC = ISD::SETNE;
+  }
+  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
 SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
                                               SelectionDAG &DAG) const {
   // Jump table entries as PC relative offsets. No additional tweaking
@@ -3892,11 +3912,11 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
     SDValue GRTop, GRTopAddr;
 
     GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                            DAG.getConstant(8, getPointerTy()));
+                            DAG.getConstant(8, DL, getPointerTy()));
 
     GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy());
     GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
-                        DAG.getConstant(GPRSize, getPointerTy()));
+                        DAG.getConstant(GPRSize, DL, getPointerTy()));
 
     MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
                                   MachinePointerInfo(SV, 8), false, false, 8));
@@ -3907,11 +3927,11 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
   if (FPRSize > 0) {
     SDValue VRTop, VRTopAddr;
     VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                            DAG.getConstant(16, getPointerTy()));
+                            DAG.getConstant(16, DL, getPointerTy()));
 
     VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy());
     VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
-                        DAG.getConstant(FPRSize, getPointerTy()));
+                        DAG.getConstant(FPRSize, DL, getPointerTy()));
 
     MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
                                   MachinePointerInfo(SV, 16), false, false, 8));
@@ -3919,15 +3939,17 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
 
   // int __gr_offs at offset 24
   SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                                   DAG.getConstant(24, getPointerTy()));
-  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
+                                   DAG.getConstant(24, DL, getPointerTy()));
+  MemOps.push_back(DAG.getStore(Chain, DL,
+                                DAG.getConstant(-GPRSize, DL, MVT::i32),
                                 GROffsAddr, MachinePointerInfo(SV, 24), false,
                                 false, 4));
 
   // int __vr_offs at offset 28
   SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                                   DAG.getConstant(28, getPointerTy()));
-  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
+                                   DAG.getConstant(28, DL, getPointerTy()));
+  MemOps.push_back(DAG.getStore(Chain, DL,
+                                DAG.getConstant(-FPRSize, DL, MVT::i32),
                                 VROffsAddr, MachinePointerInfo(SV, 28), false,
                                 false, 4));
 
@@ -3944,13 +3966,15 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
                                            SelectionDAG &DAG) const {
   // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
   // pointer.
+  SDLoc DL(Op);
   unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
   const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
 
-  return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1),
-                       Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32),
-                       8, false, false, MachinePointerInfo(DestSV),
+  return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
+                       Op.getOperand(2),
+                       DAG.getConstant(VaListSize, DL, MVT::i32),
+                       8, false, false, false, MachinePointerInfo(DestSV),
                        MachinePointerInfo(SrcSV));
 }
 
@@ -3972,9 +3996,9 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   if (Align > 8) {
     assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
     VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                         DAG.getConstant(Align - 1, getPointerTy()));
+                         DAG.getConstant(Align - 1, DL, getPointerTy()));
     VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList,
-                         DAG.getConstant(-(int64_t)Align, getPointerTy()));
+                         DAG.getConstant(-(int64_t)Align, DL, getPointerTy()));
   }
 
   Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
@@ -3994,7 +4018,7 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
 
   // Increment the pointer, VAList, to the next vaarg
   SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                               DAG.getConstant(ArgSize, getPointerTy()));
+                               DAG.getConstant(ArgSize, DL, getPointerTy()));
   // Store the incremented VAList to the legalized pointer
   SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V),
                                  false, false, 0);
@@ -4006,7 +4030,7 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
                                  MachinePointerInfo(), false, false, false, 0);
     // Round the value down to an f32.
     SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
-                                   DAG.getIntPtrConstant(1));
+                                   DAG.getIntPtrConstant(1, DL));
     SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
     // Merge the rounded value with the chain output of the load.
     return DAG.getMergeValues(Ops, DL);
@@ -4055,7 +4079,7 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   if (Depth) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
-    SDValue Offset = DAG.getConstant(8, getPointerTy());
+    SDValue Offset = DAG.getConstant(8, DL, getPointerTy());
     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
                        MachinePointerInfo(), false, false, false, 0);
@@ -4083,15 +4107,15 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
 
   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
-                                 DAG.getConstant(VTBits, MVT::i64), ShAmt);
+                                 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
-                                   DAG.getConstant(VTBits, MVT::i64));
+                                   DAG.getConstant(VTBits, dl, MVT::i64));
   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
 
-  SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64),
+  SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
                                ISD::SETGE, dl, DAG);
-  SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32);
+  SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
 
   SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
@@ -4103,8 +4127,9 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
   SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
   SDValue TrueValHi = Opc == ISD::SRA
                           ? DAG.getNode(Opc, dl, VT, ShOpHi,
-                                        DAG.getConstant(VTBits - 1, MVT::i64))
-                          : DAG.getConstant(0, VT);
+                                        DAG.getConstant(VTBits - 1, dl,
+                                                        MVT::i64))
+                          : DAG.getConstant(0, dl, VT);
   SDValue Hi =
       DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
 
@@ -4127,24 +4152,24 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
 
   assert(Op.getOpcode() == ISD::SHL_PARTS);
   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
-                                 DAG.getConstant(VTBits, MVT::i64), ShAmt);
+                                 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
-                                   DAG.getConstant(VTBits, MVT::i64));
+                                   DAG.getConstant(VTBits, dl, MVT::i64));
   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
   SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
 
   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
 
-  SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64),
+  SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
                                ISD::SETGE, dl, DAG);
-  SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32);
+  SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
   SDValue Hi =
       DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
 
   // AArch64 shifts of larger than register sizes are wrapped rather than
   // clamped, so we can't just emit "lo << a" if a is too big.
-  SDValue TrueValLo = DAG.getConstant(0, VT);
+  SDValue TrueValLo = DAG.getConstant(0, dl, VT);
   SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
   SDValue Lo =
       DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
@@ -4258,7 +4283,8 @@ AArch64TargetLowering::getSingleConstraintMatchWeight(
 
 std::pair<unsigned, const TargetRegisterClass *>
 AArch64TargetLowering::getRegForInlineAsmConstraint(
-    const std::string &Constraint, MVT VT) const {
+    const TargetRegisterInfo *TRI, const std::string &Constraint,
+    MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':
@@ -4287,7 +4313,7 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
   // Use the default implementation in TargetLowering to convert the register
   // constraint into a member of a register class.
   std::pair<unsigned, const TargetRegisterClass *> Res;
-  Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 
   // Not found as a standard register?
   if (!Res.second) {
@@ -4436,7 +4462,7 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint(
     }
 
     // All assembler immediates are 64-bit integers.
-    Result = DAG.getTargetConstant(CVal, MVT::i64);
+    Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
     break;
   }
 
@@ -4462,7 +4488,7 @@ static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
   SDLoc DL(V64Reg);
 
   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
-                     V64Reg, DAG.getConstant(0, MVT::i32));
+                     V64Reg, DAG.getConstant(0, DL, MVT::i32));
 }
 
 /// getExtFactor - Determine the adjustment factor for the position when
@@ -4594,25 +4620,26 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
       // The extraction can just take the second half
       Src.ShuffleVec =
           DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
-                      DAG.getConstant(NumSrcElts, MVT::i64));
+                      DAG.getConstant(NumSrcElts, dl, MVT::i64));
       Src.WindowBase = -NumSrcElts;
     } else if (Src.MaxElt < NumSrcElts) {
       // The extraction can just take the first half
       Src.ShuffleVec =
           DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
-                      DAG.getConstant(0, MVT::i64));
+                      DAG.getConstant(0, dl, MVT::i64));
     } else {
       // An actual VEXT is needed
       SDValue VEXTSrc1 =
           DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
-                      DAG.getConstant(0, MVT::i64));
+                      DAG.getConstant(0, dl, MVT::i64));
       SDValue VEXTSrc2 =
           DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
-                      DAG.getConstant(NumSrcElts, MVT::i64));
+                      DAG.getConstant(NumSrcElts, dl, MVT::i64));
       unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
 
       Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
-                                   VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
+                                   VEXTSrc2,
+                                   DAG.getConstant(Imm, dl, MVT::i32));
       Src.WindowBase = -Src.MinElt;
     }
   }
@@ -4947,11 +4974,11 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
                                 VT.getVectorNumElements() / 2);
   if (SplitV0) {
     V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
-                     DAG.getConstant(0, MVT::i64));
+                     DAG.getConstant(0, DL, MVT::i64));
   }
   if (V1.getValueType().getSizeInBits() == 128) {
     V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
-                     DAG.getConstant(0, MVT::i64));
+                     DAG.getConstant(0, DL, MVT::i64));
   }
   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
 }
@@ -5018,7 +5045,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
     unsigned Opcode;
     if (EltTy == MVT::i8)
       Opcode = AArch64ISD::DUPLANE8;
-    else if (EltTy == MVT::i16)
+    else if (EltTy == MVT::i16 || EltTy == MVT::f16)
       Opcode = AArch64ISD::DUPLANE16;
     else if (EltTy == MVT::i32 || EltTy == MVT::f32)
       Opcode = AArch64ISD::DUPLANE32;
@@ -5029,7 +5056,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
 
     if (VT.getSizeInBits() == 64)
       OpLHS = WidenVector(OpLHS, DAG);
-    SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, MVT::i64);
+    SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
     return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
   }
   case OP_VEXT1:
@@ -5037,7 +5064,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
   case OP_VEXT3: {
     unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
     return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
-                       DAG.getConstant(Imm, MVT::i32));
+                       DAG.getConstant(Imm, dl, MVT::i32));
   }
   case OP_VUZPL:
     return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
@@ -5074,7 +5101,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
   for (int Val : ShuffleMask) {
     for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
       unsigned Offset = Byte + Val * BytesPerElt;
-      TBLMask.push_back(DAG.getConstant(Offset, MVT::i32));
+      TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
     }
   }
 
@@ -5094,7 +5121,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
       V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
     Shuffle = DAG.getNode(
         ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
-        DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst,
+        DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
         DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
                     makeArrayRef(TBLMask.data(), IndexLen)));
   } else {
@@ -5102,7 +5129,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
       V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
       Shuffle = DAG.getNode(
           ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
-          DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst,
+          DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
           DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
                       makeArrayRef(TBLMask.data(), IndexLen)));
     } else {
@@ -5114,7 +5141,8 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
       //                               &TBLMask[0], IndexLen));
       Shuffle = DAG.getNode(
           ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
-          DAG.getConstant(Intrinsic::aarch64_neon_tbl2, MVT::i32), V1Cst, V2Cst,
+          DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32),
+          V1Cst, V2Cst,
           DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
                       makeArrayRef(TBLMask.data(), IndexLen)));
     }
@@ -5183,7 +5211,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
     } else if (VT.getSizeInBits() == 64)
       V1 = WidenVector(V1, DAG);
 
-    return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, MVT::i64));
+    return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64));
   }
 
   if (isREVMask(ShuffleMask, VT, 64))
@@ -5200,12 +5228,12 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
       std::swap(V1, V2);
     Imm *= getExtFactor(V1);
     return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
-                       DAG.getConstant(Imm, MVT::i32));
+                       DAG.getConstant(Imm, dl, MVT::i32));
   } else if (V2->getOpcode() == ISD::UNDEF &&
              isSingletonEXTMask(ShuffleMask, VT, Imm)) {
     Imm *= getExtFactor(V1);
     return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
-                       DAG.getConstant(Imm, MVT::i32));
+                       DAG.getConstant(Imm, dl, MVT::i32));
   }
 
   unsigned WhichResult;
@@ -5244,7 +5272,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   int NumInputElements = V1.getValueType().getVectorNumElements();
   if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
     SDValue DstVec = DstIsLeft ? V1 : V2;
-    SDValue DstLaneV = DAG.getConstant(Anomaly, MVT::i64);
+    SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
 
     SDValue SrcVec = V1;
     int SrcLane = ShuffleMask[Anomaly];
@@ -5252,7 +5280,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
       SrcVec = V2;
       SrcLane -= VT.getVectorNumElements();
     }
-    SDValue SrcLaneV = DAG.getConstant(SrcLane, MVT::i64);
+    SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
 
     EVT ScalarVT = VT.getVectorElementType();
 
@@ -5342,8 +5370,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5351,8 +5379,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5360,8 +5388,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(16, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(16, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5369,8 +5397,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(24, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(24, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5378,8 +5406,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
         SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5387,8 +5415,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
         SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
     }
@@ -5492,7 +5520,8 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
       IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
   SDValue ResultSLI =
       DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                  DAG.getConstant(Intrin, MVT::i32), X, Y, Shift.getOperand(1));
+                  DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
+                  Shift.getOperand(1));
 
   DEBUG(dbgs() << "aarch64-lower: transformed: \n");
   DEBUG(N->dump(&DAG));
@@ -5542,8 +5571,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5551,8 +5580,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5560,8 +5589,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(16, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(16, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5569,8 +5598,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(24, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(24, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5578,8 +5607,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
         SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5587,8 +5616,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
         SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
     }
@@ -5623,7 +5652,7 @@ static SDValue NormalizeBuildVector(SDValue Op,
     if (Lane.getOpcode() == ISD::Constant) {
       APInt LowBits(EltTy.getSizeInBits(),
                     cast<ConstantSDNode>(Lane)->getZExtValue());
-      Lane = DAG.getConstant(LowBits.getZExtValue(), MVT::i32);
+      Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
     }
     Ops.push_back(Lane);
   }
@@ -5661,13 +5690,13 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal);
         if (VT.getSizeInBits() == 128) {
           SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64,
-                                    DAG.getConstant(CnstVal, MVT::i32));
+                                    DAG.getConstant(CnstVal, dl, MVT::i32));
           return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
         }
 
         // Support the V64 version via subregister insertion.
         SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64,
-                                  DAG.getConstant(CnstVal, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5675,8 +5704,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5684,8 +5713,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5693,8 +5722,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(16, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(16, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5702,8 +5731,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(24, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(24, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5711,8 +5740,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
         SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5720,8 +5749,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
         SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5729,8 +5758,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(264, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(264, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5738,8 +5767,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(272, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(272, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5747,7 +5776,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
         SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5756,7 +5785,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
         SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5764,7 +5793,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
           VT.getSizeInBits() == 128) {
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
         SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
-                                  DAG.getConstant(CnstVal, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5774,8 +5803,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5783,8 +5812,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5792,8 +5821,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(16, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(16, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5801,8 +5830,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(24, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(24, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5810,8 +5839,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
         SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5819,8 +5848,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
         SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5828,8 +5857,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(264, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(264, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
 
@@ -5837,8 +5866,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
         MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
         SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(272, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(272, dl, MVT::i32));
         return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
       }
     }
@@ -5921,8 +5950,10 @@ FailedModImm:
 
     if (VT.getVectorElementType().isFloatingPoint()) {
       SmallVector<SDValue, 8> Ops;
-      MVT NewType =
-          (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+      EVT EltTy = VT.getVectorElementType();
+      assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&
+              "Unsupported floating-point vector type");
+      MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
       for (unsigned i = 0; i < NumElts; ++i)
         Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
       EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
@@ -5942,7 +5973,7 @@ FailedModImm:
     // Now insert the non-constant lanes.
     for (unsigned i = 0; i < NumElts; ++i) {
       SDValue V = Op.getOperand(i);
-      SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
+      SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
       if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
         // Note that type legalization likely mucked about with the VT of the
         // source operand, so we may have to convert it here before inserting.
@@ -5984,7 +6015,7 @@ FailedModImm:
       unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
       MachineSDNode *N =
           DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
-                             DAG.getTargetConstant(SubIdx, MVT::i32));
+                             DAG.getTargetConstant(SubIdx, dl, MVT::i32));
       Vec = SDValue(N, 0);
       ++i;
     }
@@ -5992,7 +6023,7 @@ FailedModImm:
       SDValue V = Op.getOperand(i);
       if (V.getOpcode() == ISD::UNDEF)
         continue;
-      SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
+      SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
     }
     return Vec;
@@ -6215,10 +6246,11 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
 
   case ISD::SHL:
     if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
-      return DAG.getNode(AArch64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0),
-                         DAG.getConstant(Cnt, MVT::i32));
+      return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
+                         DAG.getConstant(Cnt, DL, MVT::i32));
     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                       DAG.getConstant(Intrinsic::aarch64_neon_ushl, MVT::i32),
+                       DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
+                                       MVT::i32),
                        Op.getOperand(0), Op.getOperand(1));
   case ISD::SRA:
   case ISD::SRL:
@@ -6227,8 +6259,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
         Cnt < EltSize) {
       unsigned Opc =
           (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
-      return DAG.getNode(Opc, SDLoc(Op), VT, Op.getOperand(0),
-                         DAG.getConstant(Cnt, MVT::i32));
+      return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
+                         DAG.getConstant(Cnt, DL, MVT::i32));
     }
 
     // Right shift register.  Note, there is not a shift right register
@@ -6240,7 +6272,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
     SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
     SDValue NegShiftLeft =
         DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                    DAG.getConstant(Opc, MVT::i32), Op.getOperand(0), NegShift);
+                    DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
+                    NegShift);
     return NegShiftLeft;
   }
 
@@ -6521,6 +6554,34 @@ bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
   return NumBits1 > NumBits2;
 }
 
+/// Check if it is profitable to hoist instruction in then/else to if.
+/// Not profitable if I and it's user can form a FMA instruction
+/// because we prefer FMSUB/FMADD.
+bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
+  if (I->getOpcode() != Instruction::FMul)
+    return true;
+
+  if (I->getNumUses() != 1)
+    return true;
+
+  Instruction *User = I->user_back();
+
+  if (User &&
+      !(User->getOpcode() == Instruction::FSub ||
+        User->getOpcode() == Instruction::FAdd))
+    return true;
+
+  const TargetOptions &Options = getTargetMachine().Options;
+  EVT VT = getValueType(User->getOperand(0)->getType());
+
+  if (isFMAFasterThanFMulAndFAdd(VT) &&
+      isOperationLegalOrCustom(ISD::FMA, VT) &&
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath))
+    return false;
+
+  return true;
+}
+
 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
 // 64-bit GPR.
 bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
@@ -6553,6 +6614,59 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
           VT1.getSizeInBits() <= 32);
 }
 
+bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
+  if (isa<FPExtInst>(Ext))
+    return false;
+
+  // Vector types are next free.
+  if (Ext->getType()->isVectorTy())
+    return false;
+
+  for (const Use &U : Ext->uses()) {
+    // The extension is free if we can fold it with a left shift in an
+    // addressing mode or an arithmetic operation: add, sub, and cmp.
+
+    // Is there a shift?
+    const Instruction *Instr = cast<Instruction>(U.getUser());
+
+    // Is this a constant shift?
+    switch (Instr->getOpcode()) {
+    case Instruction::Shl:
+      if (!isa<ConstantInt>(Instr->getOperand(1)))
+        return false;
+      break;
+    case Instruction::GetElementPtr: {
+      gep_type_iterator GTI = gep_type_begin(Instr);
+      std::advance(GTI, U.getOperandNo());
+      Type *IdxTy = *GTI;
+      // This extension will end up with a shift because of the scaling factor.
+      // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
+      // Get the shift amount based on the scaling factor:
+      // log2(sizeof(IdxTy)) - log2(8).
+      uint64_t ShiftAmt =
+        countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3;
+      // Is the constant foldable in the shift of the addressing mode?
+      // I.e., shift amount is between 1 and 4 inclusive.
+      if (ShiftAmt == 0 || ShiftAmt > 4)
+        return false;
+      break;
+    }
+    case Instruction::Trunc:
+      // Check if this is a noop.
+      // trunc(sext ty1 to ty2) to ty1.
+      if (Instr->getType() == Ext->getOperand(0)->getType())
+        continue;
+    // FALL THROUGH.
+    default:
+      return false;
+    }
+
+    // At this point we can use the bfm family, so this extension is free
+    // for that use.
+  }
+  return true;
+}
+
 bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
                                           unsigned &RequiredAligment) const {
   if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
@@ -6591,13 +6705,22 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
   bool Fast;
   const Function *F = MF.getFunction();
   if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 &&
-      !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                       Attribute::NoImplicitFloat) &&
+      !F->hasFnAttribute(Attribute::NoImplicitFloat) &&
       (memOpAlign(SrcAlign, DstAlign, 16) ||
        (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
     return MVT::f128;
 
-  return Size >= 8 ? MVT::i64 : MVT::i32;
+  if (Size >= 8 &&
+      (memOpAlign(SrcAlign, DstAlign, 8) ||
+       (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
+    return MVT::i64;
+
+  if (Size >= 4 &&
+      (memOpAlign(SrcAlign, DstAlign, 4) ||
+       (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
+    return MVT::i32;
+
+  return MVT::Other;
 }
 
 // 12-bit optionally shifted immediates are legal for adds.
@@ -6748,7 +6871,7 @@ bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
   unsigned LZ = countLeadingZeros((uint64_t)Val);
   unsigned Shift = (63 - LZ) / 16;
   // MOVZ is free so return true for one or fewer MOVK.
-  return (Shift < 3) ? true : false;
+  return Shift < 3;
 }
 
 // Generate SUBS and CSEL for integer abs.
@@ -6766,14 +6889,14 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
       N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0))
     if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
       if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
-        SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+        SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
                                   N0.getOperand(0));
         // Generate SUBS & CSEL.
         SDValue Cmp =
             DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
-                        N0.getOperand(0), DAG.getConstant(0, VT));
+                        N0.getOperand(0), DAG.getConstant(0, DL, VT));
         return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
-                           DAG.getConstant(AArch64CC::PL, MVT::i32),
+                           DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
                            SDValue(Cmp.getNode(), 1));
       }
   return SDValue();
@@ -6802,8 +6925,8 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
   SDLoc DL(N);
   SDValue N0 = N->getOperand(0);
   unsigned Lg2 = Divisor.countTrailingZeros();
-  SDValue Zero = DAG.getConstant(0, VT);
-  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, VT);
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
 
   // Add (N0 < 0) ? Pow2 - 1 : 0;
   SDValue CCVal;
@@ -6819,7 +6942,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
 
   // Divide by pow2.
   SDValue SRA =
-      DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, MVT::i64));
+      DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
 
   // If we're dividing by a positive value, we're done.  Otherwise, we must
   // negate the result.
@@ -6828,7 +6951,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
 
   if (Created)
     Created->push_back(SRA.getNode());
-  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), SRA);
+  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
 }
 
 static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
@@ -6845,45 +6968,46 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
     APInt Value = C->getAPIntValue();
     EVT VT = N->getValueType(0);
+    SDLoc DL(N);
     if (Value.isNonNegative()) {
       // (mul x, 2^N + 1) => (add (shl x, N), x)
       APInt VM1 = Value - 1;
       if (VM1.isPowerOf2()) {
         SDValue ShiftedVal =
-            DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
-                        DAG.getConstant(VM1.logBase2(), MVT::i64));
-        return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal,
+            DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                        DAG.getConstant(VM1.logBase2(), DL, MVT::i64));
+        return DAG.getNode(ISD::ADD, DL, VT, ShiftedVal,
                            N->getOperand(0));
       }
       // (mul x, 2^N - 1) => (sub (shl x, N), x)
       APInt VP1 = Value + 1;
       if (VP1.isPowerOf2()) {
         SDValue ShiftedVal =
-            DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
-                        DAG.getConstant(VP1.logBase2(), MVT::i64));
-        return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal,
+            DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                        DAG.getConstant(VP1.logBase2(), DL, MVT::i64));
+        return DAG.getNode(ISD::SUB, DL, VT, ShiftedVal,
                            N->getOperand(0));
       }
     } else {
-      // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
-      APInt VNM1 = -Value - 1;
-      if (VNM1.isPowerOf2()) {
-        SDValue ShiftedVal =
-            DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
-                        DAG.getConstant(VNM1.logBase2(), MVT::i64));
-        SDValue Add =
-            DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
-        return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add);
-      }
       // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
       APInt VNP1 = -Value + 1;
       if (VNP1.isPowerOf2()) {
         SDValue ShiftedVal =
-            DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
-                        DAG.getConstant(VNP1.logBase2(), MVT::i64));
-        return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0),
+            DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                        DAG.getConstant(VNP1.logBase2(), DL, MVT::i64));
+        return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0),
                            ShiftedVal);
       }
+      // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+      APInt VNM1 = -Value - 1;
+      if (VNM1.isPowerOf2()) {
+        SDValue ShiftedVal =
+            DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                        DAG.getConstant(VNM1.logBase2(), DL, MVT::i64));
+        SDValue Add =
+            DAG.getNode(ISD::ADD, DL, VT, ShiftedVal, N->getOperand(0));
+        return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Add);
+      }
     }
   }
   return SDValue();
@@ -7037,7 +7161,7 @@ static SDValue tryCombineToEXTR(SDNode *N,
   }
 
   return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
-                     DAG.getConstant(ShiftRHS, MVT::i64));
+                     DAG.getConstant(ShiftRHS, DL, MVT::i64));
 }
 
 static SDValue tryCombineToBSL(SDNode *N,
@@ -7165,10 +7289,10 @@ static SDValue performBitcastCombine(SDNode *N,
   SDLoc dl(N);
   unsigned NumElements = VT.getVectorNumElements();
   if (idx) {
-    SDValue HalfIdx = DAG.getConstant(NumElements, MVT::i64);
+    SDValue HalfIdx = DAG.getConstant(NumElements, dl, MVT::i64);
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx);
   } else {
-    SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, MVT::i32);
+    SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, dl, MVT::i32);
     return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT,
                                       Source, SubReg),
                    0);
@@ -7178,22 +7302,55 @@ static SDValue performBitcastCombine(SDNode *N,
 static SDValue performConcatVectorsCombine(SDNode *N,
                                            TargetLowering::DAGCombinerInfo &DCI,
                                            SelectionDAG &DAG) {
+  SDLoc dl(N);
+  EVT VT = N->getValueType(0);
+  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+  // Optimize concat_vectors of truncated vectors, where the intermediate
+  // type is illegal, to avoid said illegality,  e.g.,
+  //   (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
+  //                          (v2i16 (truncate (v2i64)))))
+  // ->
+  //   (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
+  //                                    (v4i32 (bitcast (v2i64))),
+  //                                    <0, 2, 4, 6>)))
+  // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
+  // on both input and result type, so we might generate worse code.
+  // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
+  if (N->getNumOperands() == 2 &&
+      N0->getOpcode() == ISD::TRUNCATE &&
+      N1->getOpcode() == ISD::TRUNCATE) {
+    SDValue N00 = N0->getOperand(0);
+    SDValue N10 = N1->getOperand(0);
+    EVT N00VT = N00.getValueType();
+
+    if (N00VT == N10.getValueType() &&
+        (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
+        N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
+      MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
+      SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
+      for (size_t i = 0; i < Mask.size(); ++i)
+        Mask[i] = i * 2;
+      return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                         DAG.getVectorShuffle(
+                             MidVT, dl,
+                             DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
+                             DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
+    }
+  }
+
   // Wait 'til after everything is legalized to try this. That way we have
   // legal vector types and such.
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  SDLoc dl(N);
-  EVT VT = N->getValueType(0);
-
   // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
   // splat. The indexed instructions are going to be expecting a DUPLANE64, so
   // canonicalise to that.
-  if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) {
+  if (N0 == N1 && VT.getVectorNumElements() == 2) {
     assert(VT.getVectorElementType().getSizeInBits() == 64);
-    return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT,
-                       WidenVector(N->getOperand(0), DAG),
-                       DAG.getConstant(0, MVT::i64));
+    return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
+                       DAG.getConstant(0, dl, MVT::i64));
   }
 
   // Canonicalise concat_vectors so that the right-hand vector has as few
@@ -7205,10 +7362,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   // becomes
   //    (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
 
-  SDValue Op1 = N->getOperand(1);
-  if (Op1->getOpcode() != ISD::BITCAST)
+  if (N1->getOpcode() != ISD::BITCAST)
     return SDValue();
-  SDValue RHS = Op1->getOperand(0);
+  SDValue RHS = N1->getOperand(0);
   MVT RHSTy = RHS.getValueType().getSimpleVT();
   // If the RHS is not a vector, this is not the pattern we're looking for.
   if (!RHSTy.isVector())
@@ -7218,10 +7374,10 @@ static SDValue performConcatVectorsCombine(SDNode *N,
 
   MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
                                   RHSTy.getVectorNumElements() * 2);
-  return DAG.getNode(
-      ISD::BITCAST, dl, VT,
-      DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
-                  DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS));
+  return DAG.getNode(ISD::BITCAST, dl, VT,
+                     DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
+                                 DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
+                                 RHS));
 }
 
 static SDValue tryCombineFixedPointConvert(SDNode *N,
@@ -7310,15 +7466,16 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
   unsigned NumElems = NarrowTy.getVectorNumElements();
   MVT NewDUPVT = MVT::getVectorVT(ElementTy, NumElems * 2);
 
+  SDLoc dl(N);
   SDValue NewDUP;
   if (IsDUPLANE)
-    NewDUP = DAG.getNode(N.getOpcode(), SDLoc(N), NewDUPVT, N.getOperand(0),
+    NewDUP = DAG.getNode(N.getOpcode(), dl, NewDUPVT, N.getOperand(0),
                          N.getOperand(1));
   else
-    NewDUP = DAG.getNode(AArch64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0));
+    NewDUP = DAG.getNode(AArch64ISD::DUP, dl, NewDUPVT, N.getOperand(0));
 
-  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N.getNode()), NarrowTy,
-                     NewDUP, DAG.getConstant(NumElems, MVT::i64));
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy, NewDUP,
+                     DAG.getConstant(NumElems, dl, MVT::i64));
 }
 
 static bool isEssentiallyExtractSubvector(SDValue N) {
@@ -7443,7 +7600,8 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
   SDLoc dl(Op);
   if (InfoAndKind.IsAArch64) {
     CCVal = DAG.getConstant(
-        AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), MVT::i32);
+        AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
+        MVT::i32);
     Cmp = *InfoAndKind.Info.AArch64.Cmp;
   } else
     Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0,
@@ -7452,7 +7610,7 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
                       CCVal, DAG, dl);
 
   EVT VT = Op->getValueType(0);
-  LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, VT));
+  LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
   return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
 }
 
@@ -7592,12 +7750,15 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
     break;
   }
 
-  if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits)
-    return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
-                       DAG.getConstant(-ShiftAmount, MVT::i32));
-  else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits)
-    return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
-                       DAG.getConstant(ShiftAmount, MVT::i32));
+  if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
+    SDLoc dl(N);
+    return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
+                       DAG.getConstant(-ShiftAmount, dl, MVT::i32));
+  } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
+    SDLoc dl(N);
+    return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
+                       DAG.getConstant(ShiftAmount, dl, MVT::i32));
+  }
 
   return SDValue();
 }
@@ -7618,6 +7779,16 @@ static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
                      N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
 }
 
+static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
+                                           SelectionDAG &DAG) {
+  SDLoc dl(N);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
+                     DAG.getNode(Opc, dl,
+                                 N->getOperand(1).getSimpleValueType(),
+                                 N->getOperand(1)),
+                     DAG.getConstant(0, dl, MVT::i64));
+}
+
 static SDValue performIntrinsicCombine(SDNode *N,
                                        TargetLowering::DAGCombinerInfo &DCI,
                                        const AArch64Subtarget *Subtarget) {
@@ -7630,6 +7801,18 @@ static SDValue performIntrinsicCombine(SDNode *N,
   case Intrinsic::aarch64_neon_vcvtfxu2fp:
     return tryCombineFixedPointConvert(N, DCI, DAG);
     break;
+  case Intrinsic::aarch64_neon_saddv:
+    return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
+  case Intrinsic::aarch64_neon_uaddv:
+    return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
+  case Intrinsic::aarch64_neon_sminv:
+    return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
+  case Intrinsic::aarch64_neon_uminv:
+    return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
+  case Intrinsic::aarch64_neon_smaxv:
+    return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
+  case Intrinsic::aarch64_neon_umaxv:
+    return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
   case Intrinsic::aarch64_neon_fmax:
     return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2));
@@ -7744,9 +7927,9 @@ static SDValue performExtendCombine(SDNode *N,
   EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
                                LoVT.getVectorNumElements());
   Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
-                   DAG.getConstant(0, MVT::i64));
+                   DAG.getConstant(0, DL, MVT::i64));
   Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
-                   DAG.getConstant(InNVT.getVectorNumElements(), MVT::i64));
+                   DAG.getConstant(InNVT.getVectorNumElements(), DL, MVT::i64));
   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
 
@@ -7806,7 +7989,7 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
   unsigned Offset = EltOffset;
   while (--NumVecElts) {
     SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
-                                    DAG.getConstant(Offset, MVT::i64));
+                                    DAG.getConstant(Offset, DL, MVT::i64));
     NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
                           St->getPointerInfo(), St->isVolatile(),
                           St->isNonTemporal(), Alignment);
@@ -7827,14 +8010,13 @@ static SDValue performSTORECombine(SDNode *N,
     return SDValue();
 
   // Cyclone has bad performance on unaligned 16B stores when crossing line and
-  // page boundries. We want to split such stores.
+  // page boundaries. We want to split such stores.
   if (!Subtarget->isCyclone())
     return SDValue();
 
   // Don't split at Oz.
   MachineFunction &MF = DAG.getMachineFunction();
-  bool IsMinSize = MF.getFunction()->getAttributes().hasAttribute(
-      AttributeSet::FunctionIndex, Attribute::MinSize);
+  bool IsMinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
   if (IsMinSize)
     return SDValue();
 
@@ -7868,15 +8050,15 @@ static SDValue performSTORECombine(SDNode *N,
   EVT HalfVT =
       EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
   SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
-                                   DAG.getConstant(0, MVT::i64));
+                                   DAG.getConstant(0, DL, MVT::i64));
   SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
-                                   DAG.getConstant(NumElts, MVT::i64));
+                                   DAG.getConstant(NumElts, DL, MVT::i64));
   SDValue BasePtr = S->getBasePtr();
   SDValue NewST1 =
       DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
                    S->isVolatile(), S->isNonTemporal(), S->getAlignment());
   SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
-                                  DAG.getConstant(8, MVT::i64));
+                                  DAG.getConstant(8, DL, MVT::i64));
   return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
                       S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(),
                       S->getAlignment());
@@ -7944,6 +8126,13 @@ static SDValue performPostLD1Combine(SDNode *N,
       Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
     }
 
+    // Finally, check that the vector doesn't depend on the load.
+    // Again, this would create a cycle.
+    // The load depending on the vector is fine, as that's the case for the
+    // LD1*post we'll eventually generate anyway.
+    if (LoadSDN->isPredecessorOf(Vector.getNode()))
+      continue;
+
     SmallVector<SDValue, 8> Ops;
     Ops.push_back(LD->getOperand(0));  // Chain
     if (IsLaneOp) {
@@ -7961,7 +8150,7 @@ static SDValue performPostLD1Combine(SDNode *N,
                                            LoadSDN->getMemOperand());
 
     // Update the uses.
-    std::vector<SDValue> NewResults;
+    SmallVector<SDValue, 2> NewResults;
     NewResults.push_back(SDValue(LD, 0));             // The result of load
     NewResults.push_back(SDValue(UpdN.getNode(), 2)); // Chain
     DCI.CombineTo(LD, NewResults);
@@ -8455,13 +8644,21 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
 /// the compare-mask instructions rather than going via NZCV, even if LHS and
 /// RHS are really scalar. This replaces any scalar setcc in the above pattern
 /// with a vector one followed by a DUP shuffle on the result.
-static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performSelectCombine(SDNode *N,
+                                    TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
   SDValue N0 = N->getOperand(0);
   EVT ResVT = N->getValueType(0);
 
-  if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1)
+  if (N0.getOpcode() != ISD::SETCC)
     return SDValue();
 
+  // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
+  // scalar SetCCResultType. We also don't expect vectors, because we assume
+  // that selects fed by vector SETCCs are canonicalized to VSELECT.
+  assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
+         "Scalar-SETCC feeding SELECT has unexpected result type!");
+
   // If NumMaskElts == 0, the comparison is larger than select result. The
   // largest real NEON comparison is 64-bits per lane, which means the result is
   // at most 32-bits and an illegal vector. Just bail out for now.
@@ -8479,6 +8676,16 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
   SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
   EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
 
+  // Also bail out if the vector CCVT isn't the same size as ResVT.
+  // This can happen if the SETCC operand size doesn't divide the ResVT size
+  // (e.g., f64 vs v3f32).
+  if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
+    return SDValue();
+
+  // Make sure we didn't create illegal types, if we're not supposed to.
+  assert(DCI.isBeforeLegalize() ||
+         DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
+
   // First perform a vector comparison, where lane 0 is the one we're interested
   // in.
   SDLoc DL(N0);
@@ -8497,6 +8704,75 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
   return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
 }
 
+/// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match FMIN/FMAX patterns.
+static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) {
+  // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y".
+  // Unless the NoNaNsFPMath option is set, be careful about NaNs:
+  // vmax/vmin return NaN if either operand is a NaN;
+  // only do the transformation when it matches that behavior.
+
+  SDValue CondLHS = N->getOperand(0);
+  SDValue CondRHS = N->getOperand(1);
+  SDValue LHS = N->getOperand(2);
+  SDValue RHS = N->getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+  unsigned Opcode;
+  bool IsReversed;
+  if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) &&
+      selectCCOpsAreFMaxCompatible(CondRHS, RHS)) {
+    IsReversed = false; // x CC y ? x : y
+  } else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) &&
+             selectCCOpsAreFMaxCompatible(CondLHS, RHS)) {
+    IsReversed = true ; // x CC y ? y : x
+  } else {
+    return SDValue();
+  }
+
+  bool IsUnordered = false, IsOrEqual;
+  switch (CC) {
+  default:
+    return SDValue();
+  case ISD::SETULT:
+  case ISD::SETULE:
+    IsUnordered = true;
+  case ISD::SETOLT:
+  case ISD::SETOLE:
+  case ISD::SETLT:
+  case ISD::SETLE:
+    IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE);
+    Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN;
+    break;
+
+  case ISD::SETUGT:
+  case ISD::SETUGE:
+    IsUnordered = true;
+  case ISD::SETOGT:
+  case ISD::SETOGE:
+  case ISD::SETGT:
+  case ISD::SETGE:
+    IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE);
+    Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX;
+    break;
+  }
+
+  // If LHS is NaN, an ordered comparison will be false and the result will be
+  // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking
+  // that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+  if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+    return SDValue();
+
+  // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true,
+  // but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be
+  // used for unsafe math or if one of the operands is known to be nonzero.
+  if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath &&
+      !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+    return SDValue();
+
+  return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
+}
+
 SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -8526,9 +8802,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::CONCAT_VECTORS:
     return performConcatVectorsCombine(N, DCI, DAG);
   case ISD::SELECT:
-    return performSelectCombine(N, DAG);
+    return performSelectCombine(N, DCI);
   case ISD::VSELECT:
     return performVSelectCombine(N, DCI.DAG);
+  case ISD::SELECT_CC:
+    return performSelectCCCombine(N, DCI.DAG);
   case ISD::STORE:
     return performSTORECombine(N, DCI, DAG, Subtarget);
   case AArch64ISD::BRCOND:
@@ -8699,7 +8977,7 @@ static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
   Op = SDValue(
       DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
                          DAG.getUNDEF(MVT::i32), Op,
-                         DAG.getTargetConstant(AArch64::hsub, MVT::i32)),
+                         DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
       0);
   Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
   Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
@@ -8760,9 +9038,11 @@ bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
 }
 
 // For the real atomic operations, we have ldxr/stxr up to 128 bits,
-bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
-  return Size <= 128;
+  return Size <= 128 ? AtomicRMWExpansionKind::LLSC
+                     : AtomicRMWExpansionKind::None;
 }
 
 bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
@@ -8822,7 +9102,7 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
     Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
     Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
     Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
-    return Builder.CreateCall3(Stxr, Lo, Hi, Addr);
+    return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
   }
 
   Intrinsic::ID Int =
@@ -8830,10 +9110,10 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
   Type *Tys[] = { Addr->getType() };
   Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
 
-  return Builder.CreateCall2(
-      Stxr, Builder.CreateZExtOrBitCast(
-                Val, Stxr->getFunctionType()->getParamType(0)),
-      Addr);
+  return Builder.CreateCall(Stxr,
+                            {Builder.CreateZExtOrBitCast(
+                                 Val, Stxr->getFunctionType()->getParamType(0)),
+                             Addr});
 }
 
 bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 5a19322..0d9b8b7 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -18,13 +18,14 @@
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
 
 namespace AArch64ISD {
 
-enum {
+enum NodeType : unsigned {
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
   CALL,         // Function call.
@@ -140,6 +141,18 @@ enum {
   FCMLEz,
   FCMLTz,
 
+  // Vector across-lanes addition
+  // Only the lower result lane is defined.
+  SADDV,
+  UADDV,
+
+  // Vector across-lanes min/max
+  // Only the lower result lane is defined.
+  SMINV,
+  UMINV,
+  SMAXV,
+  UMAXV,
+
   // Vector bitwise negation
   NOT,
 
@@ -207,7 +220,8 @@ class AArch64TargetLowering : public TargetLowering {
   bool RequireStrictAlign;
 
 public:
-  explicit AArch64TargetLowering(const TargetMachine &TM);
+  explicit AArch64TargetLowering(const TargetMachine &TM,
+                                 const AArch64Subtarget &STI);
 
   /// Selects the correct CCAssignFn for a given CallingConvention value.
   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
@@ -222,7 +236,7 @@ public:
   MVT getScalarShiftAmountTy(EVT LHSTy) const override;
 
   /// allowsMisalignedMemoryAccesses - Returns true if the target allows
-  /// unaligned memory accesses. of the specified type.
+  /// unaligned memory accesses of the specified type.
   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
                                       unsigned Align = 1,
                                       bool *Fast = nullptr) const override {
@@ -244,10 +258,6 @@ public:
   /// getFunctionAlignment - Return the Log2 alignment of this function.
   unsigned getFunctionAlignment(const Function *F) const;
 
-  /// getMaximalGlobalOffset - Returns the maximal possible offset which can
-  /// be used for loads / stores from the global.
-  unsigned getMaximalGlobalOffset() const override;
-
   /// Returns true if a cast between SrcAS and DestAS is a noop.
   bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
     // Addrspacecasts are always noops.
@@ -285,6 +295,8 @@ public:
   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
   bool isTruncateFree(EVT VT1, EVT VT2) const override;
 
+  bool isProfitableToHoist(Instruction *I) const override;
+
   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
   bool isZExtFree(EVT VT1, EVT VT2) const override;
   bool isZExtFree(SDValue Val, EVT VT2) const override;
@@ -335,13 +347,16 @@ public:
 
   bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
   bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-  bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+  TargetLoweringBase::AtomicRMWExpansionKind
+  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
 
   bool useLoadStackGuardNode() const override;
   TargetLoweringBase::LegalizeTypeAction
   getPreferredVectorAction(EVT VT) const override;
 
 private:
+  bool isExtFreeImpl(const Instruction *Ext) const override;
+
   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
   /// make the right decision when generating code for different targets.
   const AArch64Subtarget *Subtarget;
@@ -405,6 +420,9 @@ private:
   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
+                         SDValue TVal, SDValue FVal, SDLoc dl,
+                         SelectionDAG &DAG) const;
   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -453,12 +471,23 @@ private:
                                  const char *constraint) const override;
 
   std::pair<unsigned, const TargetRegisterClass *>
-  getRegForInlineAsmConstraint(const std::string &Constraint,
+  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                               const std::string &Constraint,
                                MVT VT) const override;
   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
                                     std::vector<SDValue> &Ops,
                                     SelectionDAG &DAG) const override;
 
+  unsigned getInlineAsmMemConstraint(
+      const std::string &ConstraintCode) const override {
+    if (ConstraintCode == "Q")
+      return InlineAsm::Constraint_Q;
+    // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
+    //        followed by llvm_unreachable so we'll leave them unimplemented in
+    //        the backend for now.
+    return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+  }
+
   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
   bool mayBeEmittedAsTailCall(CallInst *CI) const override;
   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index d295c02..3b8b668 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -441,11 +441,11 @@ def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
 // instructions for splatting repeating bit patterns across the immediate.
 def logical_imm32_XFORM : SDNodeXForm<imm, [{
   uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 32);
-  return CurDAG->getTargetConstant(enc, MVT::i32);
+  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
 }]>;
 def logical_imm64_XFORM : SDNodeXForm<imm, [{
   uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 64);
-  return CurDAG->getTargetConstant(enc, MVT::i32);
+  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
 }]>;
 
 let DiagnosticType = "LogicalSecondSource" in {
@@ -682,7 +682,7 @@ def fpimm32 : Operand<f32>,
     }], SDNodeXForm<fpimm, [{
       APFloat InVal = N->getValueAPF();
       uint32_t enc = AArch64_AM::getFP32Imm(InVal);
-      return CurDAG->getTargetConstant(enc, MVT::i32);
+      return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
     }]>> {
   let ParserMatchClass = FPImmOperand;
   let PrintMethod = "printFPImmOperand";
@@ -693,7 +693,7 @@ def fpimm64 : Operand<f64>,
     }], SDNodeXForm<fpimm, [{
       APFloat InVal = N->getValueAPF();
       uint32_t enc = AArch64_AM::getFP64Imm(InVal);
-      return CurDAG->getTargetConstant(enc, MVT::i32);
+      return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
     }]>> {
   let ParserMatchClass = FPImmOperand;
   let PrintMethod = "printFPImmOperand";
@@ -768,7 +768,7 @@ def simdimmtype10 : Operand<i32>,
       uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF()
                                                            .bitcastToAPInt()
                                                            .getZExtValue());
-      return CurDAG->getTargetConstant(enc, MVT::i32);
+      return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
     }]>> {
   let ParserMatchClass = SIMDImmType10Operand;
   let PrintMethod = "printSIMDType10Operand";
@@ -1637,10 +1637,16 @@ multiclass AddSub<bit isSub, string mnemonic,
                   SDPatternOperator OpNode = null_frag> {
   let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
   // Add/Subtract immediate
+  // Increase the weight of the immediate variant to try to match it before
+  // the extended register variant.
+  // We used to match the register variant before the immediate when the
+  // register argument could be implicitly zero-extended.
+  let AddedComplexity = 6 in
   def Wri  : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
                            mnemonic, OpNode> {
     let Inst{31} = 0;
   }
+  let AddedComplexity = 6 in
   def Xri  : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
                            mnemonic, OpNode> {
     let Inst{31} = 1;
@@ -2186,7 +2192,8 @@ class BaseCondSelectOp<bit op, bits<2> op2, RegisterClass regtype, string asm,
 
 def inv_cond_XFORM : SDNodeXForm<imm, [{
   AArch64CC::CondCode CC = static_cast<AArch64CC::CondCode>(N->getZExtValue());
-  return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), MVT::i32);
+  return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), SDLoc(N),
+                                   MVT::i32);
 }]>;
 
 multiclass CondSelectOp<bit op, bits<2> op2, string asm, PatFrag frag> {
@@ -3282,6 +3289,10 @@ class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0,
     : BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> {
   bits<5> Rt;
   bits<5> Rn;
+  let Inst{20-16} = 0b11111;
+  let Unpredictable{20-16} = 0b11111;
+  let Inst{14-10} = 0b11111;
+  let Unpredictable{14-10} = 0b11111;
   let Inst{9-5} = Rn;
   let Inst{4-0} = Rt;
 
@@ -5298,6 +5309,27 @@ class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
   let Inst{4-0}   = Rd;
 }
 
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
+                        dag oops, dag iops, string asm,
+            list<dag> pattern>
+  : I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>,
+    Sched<[WriteV]> {
+  bits<5> Rd;
+  bits<5> Rn;
+  bits<5> Rm;
+  let Inst{31-30} = 0b01;
+  let Inst{29}    = U;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = size;
+  let Inst{21}    = R;
+  let Inst{20-16} = Rm;
+  let Inst{15-11} = opcode;
+  let Inst{10}    = 1;
+  let Inst{9-5}   = Rn;
+  let Inst{4-0}   = Rd;
+}
+
 multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
                             SDPatternOperator OpNode> {
   def v1i64  : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
@@ -5325,6 +5357,16 @@ multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
   def v1i16  : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
 }
 
+multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
+                                 SDPatternOperator OpNode = null_frag> {
+  def v1i32: BaseSIMDThreeScalarTied<U, 0b10, R, opc, (outs FPR32:$dst),
+                                     (ins FPR32:$Rd, FPR32:$Rn, FPR32:$Rm), 
+                                     asm, []>;
+  def v1i16: BaseSIMDThreeScalarTied<U, 0b01, R, opc, (outs FPR16:$dst),
+                                     (ins FPR16:$Rd, FPR16:$Rn, FPR16:$Rm), 
+                                     asm, []>;
+}
+
 multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm,
                              SDPatternOperator OpNode = null_frag> {
   let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
@@ -5885,7 +5927,7 @@ multiclass SIMDIns {
     let Inst{20-18} = idx;
     let Inst{17-16} = 0b10;
     let Inst{14-12} = idx2;
-    let Inst{11} = 0;
+    let Inst{11} = {?};
   }
   def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> {
     bits<2> idx;
@@ -5893,7 +5935,7 @@ multiclass SIMDIns {
     let Inst{20-19} = idx;
     let Inst{18-16} = 0b100;
     let Inst{14-13} = idx2;
-    let Inst{12-11} = 0;
+    let Inst{12-11} = {?,?};
   }
   def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> {
     bits<1> idx;
@@ -5901,7 +5943,7 @@ multiclass SIMDIns {
     let Inst{20} = idx;
     let Inst{19-16} = 0b1000;
     let Inst{14} = idx2;
-    let Inst{13-11} = 0;
+    let Inst{13-11} = {?,?,?};
   }
 
   // For all forms of the INS instruction, the "mov" mnemonic is the
@@ -8517,6 +8559,174 @@ multiclass SIMDLdSt4SingleAliases<string asm> {
 } // end of 'let Predicates = [HasNEON]'
 
 //----------------------------------------------------------------------------
+// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract
+//----------------------------------------------------------------------------
+
+let Predicates = [HasNEON, HasV8_1a] in {
+
+class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
+                                    RegisterOperand regtype, string asm, 
+                                    string kind, list<dag> pattern>
+  : BaseSIMDThreeSameVectorTied<Q, U, size, opcode, regtype, asm, kind, 
+                                pattern> {
+  let Inst{21}=0;
+}
+multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm,
+                                             SDPatternOperator Accum> {
+  def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h",
+    [(set (v4i16 V64:$dst),
+          (Accum (v4i16 V64:$Rd),
+                 (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn),
+                                                   (v4i16 V64:$Rm)))))]>;         
+  def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h",
+    [(set (v8i16 V128:$dst),
+          (Accum (v8i16 V128:$Rd),
+                 (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn),
+                                                   (v8i16 V128:$Rm)))))]>;
+  def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s",
+    [(set (v2i32 V64:$dst),
+          (Accum (v2i32 V64:$Rd),
+                 (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn),
+                                                   (v2i32 V64:$Rm)))))]>;
+  def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s",
+    [(set (v4i32 V128:$dst),
+          (Accum (v4i32 V128:$Rd),
+                 (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn),
+                                                   (v4i32 V128:$Rm)))))]>;
+}
+
+multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
+                                     SDPatternOperator Accum> {
+  def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
+                                          V64, V64, V128_lo, VectorIndexH,
+                                          asm, ".4h", ".4h", ".4h", ".h",
+    [(set (v4i16 V64:$dst),
+          (Accum (v4i16 V64:$Rd),
+                 (v4i16 (int_aarch64_neon_sqrdmulh
+                          (v4i16 V64:$Rn),
+                          (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+                                                    VectorIndexH:$idx))))))]> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+
+  def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
+                                          V128, V128, V128_lo, VectorIndexH,
+                                          asm, ".8h", ".8h", ".8h", ".h",
+    [(set (v8i16 V128:$dst),
+          (Accum (v8i16 V128:$Rd),
+                 (v8i16 (int_aarch64_neon_sqrdmulh
+                          (v8i16 V128:$Rn),
+                          (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+                                                   VectorIndexH:$idx))))))]> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+
+  def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
+                                          V64, V64, V128, VectorIndexS,
+                                          asm, ".2s", ".2s", ".2s", ".s",
+    [(set (v2i32 V64:$dst),
+        (Accum (v2i32 V64:$Rd),
+               (v2i32 (int_aarch64_neon_sqrdmulh
+                        (v2i32 V64:$Rn),
+                        (v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
+                                                 VectorIndexS:$idx))))))]> {
+    bits<2> idx;
+    let Inst{11} = idx{1};
+    let Inst{21} = idx{0};
+  }
+
+  // FIXME: it would be nice to use the scalar (v1i32) instruction here, but 
+  // an intermediate EXTRACT_SUBREG would be untyped.
+  // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we 
+  // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..)))
+  def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+                       (i32 (vector_extract 
+                               (v4i32 (insert_subvector
+                                       (undef), 
+                                        (v2i32 (int_aarch64_neon_sqrdmulh 
+                                                 (v2i32 V64:$Rn),
+                                                 (v2i32 (AArch64duplane32 
+                                                          (v4i32 V128:$Rm),
+                                                          VectorIndexS:$idx)))),
+                                      (i32 0))),
+                               (i64 0))))),
+            (EXTRACT_SUBREG
+                (v2i32 (!cast<Instruction>(NAME # v2i32_indexed)
+                          (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 
+                                                FPR32Op:$Rd, 
+                                                ssub)), 
+                          V64:$Rn,
+                          V128:$Rm, 
+                          VectorIndexS:$idx)),
+                ssub)>;
+
+  def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+                                          V128, V128, V128, VectorIndexS,
+                                          asm, ".4s", ".4s", ".4s", ".s",
+    [(set (v4i32 V128:$dst),
+          (Accum (v4i32 V128:$Rd),
+                 (v4i32 (int_aarch64_neon_sqrdmulh
+                          (v4i32 V128:$Rn),
+                          (v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
+                                                   VectorIndexS:$idx))))))]> {
+    bits<2> idx;
+    let Inst{11} = idx{1};
+    let Inst{21} = idx{0};
+  }
+
+  // FIXME: it would be nice to use the scalar (v1i32) instruction here, but
+  // an intermediate EXTRACT_SUBREG would be untyped.
+  def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+                        (i32 (vector_extract 
+                               (v4i32 (int_aarch64_neon_sqrdmulh 
+                                        (v4i32 V128:$Rn),
+                                        (v4i32 (AArch64duplane32 
+                                                 (v4i32 V128:$Rm),
+                                                 VectorIndexS:$idx)))),
+                               (i64 0))))),
+            (EXTRACT_SUBREG
+                (v4i32 (!cast<Instruction>(NAME # v4i32_indexed)
+                         (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 
+                                               FPR32Op:$Rd, 
+                                               ssub)), 
+                         V128:$Rn,
+                         V128:$Rm, 
+                         VectorIndexS:$idx)),
+                ssub)>;
+
+  def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
+                                        FPR16Op, FPR16Op, V128_lo,
+                                        VectorIndexH, asm, ".h", "", "", ".h", 
+                                        []> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+
+  def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
+                                        FPR32Op, FPR32Op, V128, VectorIndexS,
+                                        asm, ".s", "", "", ".s",
+    [(set (i32 FPR32Op:$dst),
+          (Accum (i32 FPR32Op:$Rd),
+                 (i32 (int_aarch64_neon_sqrdmulh
+                        (i32 FPR32Op:$Rn),
+                        (i32 (vector_extract (v4i32 V128:$Rm),
+                                             VectorIndexS:$idx))))))]> {
+    bits<2> idx;
+    let Inst{11} = idx{1};
+    let Inst{21} = idx{0};
+  }
+}
+} // let Predicates = [HasNeon, HasV8_1a]
+
+//----------------------------------------------------------------------------
 // Crypto extensions
 //----------------------------------------------------------------------------
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index e582ed4..207c34c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
 
 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
-      RI(this, &STI), Subtarget(STI) {}
+      RI(STI.getTargetTriple()), Subtarget(STI) {}
 
 /// GetInstSize - Return the number of bytes of code the specified
 /// instruction may be.  This returns the maximum number of bytes.
@@ -617,10 +617,8 @@ AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
   int OffsetA = 0, OffsetB = 0;
   int WidthA = 0, WidthB = 0;
 
-  assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
-         "MIa must be a store or a load");
-  assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
-         "MIb must be a store or a load");
+  assert(MIa && MIa->mayLoadOrStore() && "MIa must be a load or store.");
+  assert(MIb && MIb->mayLoadOrStore() && "MIb must be a load or store.");
 
   if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
       MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
@@ -707,9 +705,8 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) {
   assert(MBB && "Can't get MachineBasicBlock here");
   MachineFunction *MF = MBB->getParent();
   assert(MF && "Can't get MachineFunction here");
-  const TargetMachine *TM = &MF->getTarget();
-  const TargetInstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
-  const TargetRegisterInfo *TRI = TM->getSubtargetImpl()->getRegisterInfo();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
   MachineRegisterInfo *MRI = &MF->getRegInfo();
 
   for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
@@ -1527,7 +1524,7 @@ void AArch64InstrInfo::copyPhysRegTuple(
   }
 
   for (; SubReg != End; SubReg += Incr) {
-    const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode));
+    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
@@ -1905,7 +1902,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
   }
   assert(Opc && "Unknown register class");
 
-  const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
+  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
                                       .addReg(SrcReg, getKillRegState(isKill))
                                       .addFrameIndex(FI);
 
@@ -2003,7 +2000,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
   }
   assert(Opc && "Unknown register class");
 
-  const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
+  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
                                       .addReg(DestReg, getDefRegState(true))
                                       .addFrameIndex(FI);
   if (Offset)
@@ -2069,10 +2066,10 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
       .setMIFlag(Flag);
 }
 
-MachineInstr *
-AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                                        const SmallVectorImpl<unsigned> &Ops,
-                                        int FrameIndex) const {
+MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                                      MachineInstr *MI,
+                                                      ArrayRef<unsigned> Ops,
+                                                      int FrameIndex) const {
   // This is a bit of a hack. Consider this instruction:
   //
   //   %vreg0<def> = COPY %SP; GPR64all:%vreg0
@@ -2367,7 +2364,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
 
 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
   NopInst.setOpcode(AArch64::HINT);
-  NopInst.addOperand(MCOperand::CreateImm(0));
+  NopInst.addOperand(MCOperand::createImm(0));
 }
 /// useMachineCombiner - return true when a target supports MachineCombiner
 bool AArch64InstrInfo::useMachineCombiner() const {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index d8f1274..fa4b8b7 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -129,10 +129,9 @@ public:
                             const TargetRegisterInfo *TRI) const override;
 
   using TargetInstrInfo::foldMemoryOperandImpl;
-  MachineInstr *
-  foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                        const SmallVectorImpl<unsigned> &Ops,
-                        int FrameIndex) const override;
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                                      ArrayRef<unsigned> Ops,
+                                      int FrameIndex) const override;
 
   bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                      MachineBasicBlock *&FBB,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a6f09e9..c7d6a69 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -14,6 +14,8 @@
 //===----------------------------------------------------------------------===//
 // ARM Instruction Predicate Definitions.
 //
+def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
+                                 AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
 def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
                                AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
 def HasNEON          : Predicate<"Subtarget->hasNEON()">,
@@ -258,6 +260,13 @@ def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
 def AArch64smull    : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
 def AArch64umull    : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
 
+def AArch64saddv    : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
+def AArch64uaddv    : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
+def AArch64sminv    : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
+def AArch64uminv    : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
+def AArch64smaxv    : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
+def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
+
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -489,7 +498,7 @@ def i64imm_32bit : ImmLeaf<i64, [{
 }]>;
 
 def trunc_imm : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i32);
+  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
 }]>;
 
 def : Pat<(i64 i64imm_32bit:$src),
@@ -498,12 +507,12 @@ def : Pat<(i64 i64imm_32bit:$src),
 // Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
 def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
 return CurDAG->getTargetConstant(
-  N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i32);
+  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
 }]>;
 
 def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
 return CurDAG->getTargetConstant(
-  N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i64);
+  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
 }]>;
 
 
@@ -848,57 +857,57 @@ defm UBFM : BitfieldImm<0b10, "ubfm">;
 
 def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
   uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
+  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
 }]>;
 
 def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
   uint64_t enc = 31 - N->getZExtValue();
-  return CurDAG->getTargetConstant(enc, MVT::i64);
+  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
 }]>;
 
 // min(7, 31 - shift_amt)
 def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
   uint64_t enc = 31 - N->getZExtValue();
   enc = enc > 7 ? 7 : enc;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
+  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
 }]>;
 
 // min(15, 31 - shift_amt)
 def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
   uint64_t enc = 31 - N->getZExtValue();
   enc = enc > 15 ? 15 : enc;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
+  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
 }]>;
 
 def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
   uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
+  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
 }]>;
 
 def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
   uint64_t enc = 63 - N->getZExtValue();
-  return CurDAG->getTargetConstant(enc, MVT::i64);
+  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
 }]>;
 
 // min(7, 63 - shift_amt)
 def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
   uint64_t enc = 63 - N->getZExtValue();
   enc = enc > 7 ? 7 : enc;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
+  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
 }]>;
 
 // min(15, 63 - shift_amt)
 def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
   uint64_t enc = 63 - N->getZExtValue();
   enc = enc > 15 ? 15 : enc;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
+  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
 }]>;
 
 // min(31, 63 - shift_amt)
 def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
   uint64_t enc = 63 - N->getZExtValue();
   enc = enc > 31 ? 31 : enc;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
+  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
 }]>;
 
 def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
@@ -2305,6 +2314,20 @@ def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
 def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
 def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
 
+let Predicates = [HasV8_1a] in {
+  // v8.1a "Limited Order Region" extension load-acquire instructions
+  def LDLARW  : LoadAcquire   <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
+  def LDLARX  : LoadAcquire   <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
+  def LDLARB  : LoadAcquire   <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
+  def LDLARH  : LoadAcquire   <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
+
+  // v8.1a "Limited Order Region" extension store-release instructions
+  def STLLRW  : StoreRelease   <0b10, 1, 0, 0, 0, GPR32, "stllr">;
+  def STLLRX  : StoreRelease   <0b11, 1, 0, 0, 0, GPR64, "stllr">;
+  def STLLRB  : StoreRelease   <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
+  def STLLRH  : StoreRelease   <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
+}
+
 //===----------------------------------------------------------------------===//
 // Scaled floating point to integer conversion instructions.
 //===----------------------------------------------------------------------===//
@@ -2341,8 +2364,15 @@ defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
 
 defm FMOV : UnscaledConversion<"fmov">;
 
-def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>;
-def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
+// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
+let isReMaterializable = 1, isCodeGenOnly = 1 in {
+def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
+    PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>,
+    Requires<[NoZCZ]>;
+def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
+    PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>,
+    Requires<[NoZCZ]>;
+}
 
 //===----------------------------------------------------------------------===//
 // Floating point conversion instruction.
@@ -2762,6 +2792,10 @@ defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
 defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
 defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
 defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
+defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
+                                                  int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
+                                                    int_aarch64_neon_sqsub>;
 
 defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
 defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
@@ -2775,6 +2809,55 @@ defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
                                   BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
 defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
 
+def : Pat<(v8i8 (smin V64:$Rn, V64:$Rm)),
+          (SMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (smin V64:$Rn, V64:$Rm)),
+          (SMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (smin V64:$Rn, V64:$Rm)),
+          (SMINv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (smin V128:$Rn, V128:$Rm)),
+          (SMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (smin V128:$Rn, V128:$Rm)),
+          (SMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (smin V128:$Rn, V128:$Rm)),
+          (SMINv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (smax V64:$Rn, V64:$Rm)),
+          (SMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (smax V64:$Rn, V64:$Rm)),
+          (SMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (smax V64:$Rn, V64:$Rm)),
+          (SMAXv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (smax V128:$Rn, V128:$Rm)),
+          (SMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (smax V128:$Rn, V128:$Rm)),
+          (SMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (smax V128:$Rn, V128:$Rm)),
+          (SMAXv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (umin V64:$Rn, V64:$Rm)),
+          (UMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (umin V64:$Rn, V64:$Rm)),
+          (UMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (umin V64:$Rn, V64:$Rm)),
+          (UMINv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (umin V128:$Rn, V128:$Rm)),
+          (UMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (umin V128:$Rn, V128:$Rm)),
+          (UMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (umin V128:$Rn, V128:$Rm)),
+          (UMINv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (umax V64:$Rn, V64:$Rm)),
+          (UMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (umax V64:$Rn, V64:$Rm)),
+          (UMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (umax V64:$Rn, V64:$Rm)),
+          (UMAXv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (umax V128:$Rn, V128:$Rm)),
+          (UMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (umax V128:$Rn, V128:$Rm)),
+          (UMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (umax V128:$Rn, V128:$Rm)),
+          (UMAXv4i32 V128:$Rn, V128:$Rm)>;
+
 def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
           (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
 def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
@@ -2978,6 +3061,20 @@ defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>
 defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
 defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
 defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
+let Predicates = [HasV8_1a] in {
+  defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
+  defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
+  def : Pat<(i32 (int_aarch64_neon_sqadd
+                   (i32 FPR32:$Rd),
+                   (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+                                                   (i32 FPR32:$Rm))))),
+            (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+  def : Pat<(i32 (int_aarch64_neon_sqsub
+                   (i32 FPR32:$Rd),
+                   (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+                                                   (i32 FPR32:$Rm))))),
+            (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+}
 
 def : InstAlias<"cmls $dst, $src1, $src2",
                 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
@@ -3431,10 +3528,10 @@ defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">;
 defm FMAXP   : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
 defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
 defm FMINP   : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
-def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))),
-          (ADDPv2i64p V128:$Rn)>;
-def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))),
-          (ADDPv2i64p V128:$Rn)>;
+def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
+          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
+def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
+          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
 def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
           (FADDPv2i32p V64:$Rn)>;
 def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
@@ -3462,13 +3559,13 @@ def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
 // AdvSIMD INS/DUP instructions
 //----------------------------------------------------------------------------
 
-def DUPv8i8gpr  : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>;
-def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>;
-def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>;
-def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>;
-def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>;
-def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>;
-def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>;
+def DUPv8i8gpr  : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
+def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
+def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
+def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
+def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
+def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
+def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
 
 def DUPv2i64lane : SIMDDup64FromElement;
 def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
@@ -3515,13 +3612,13 @@ def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
 // instruction even if the types don't match: we just have to remap the lane
 // carefully. N.b. this trick only applies to truncations.
 def VecIndex_x2 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(2 * N->getZExtValue(), MVT::i64);
+  return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
 }]>;
 def VecIndex_x4 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(4 * N->getZExtValue(), MVT::i64);
+  return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
 }]>;
 def VecIndex_x8 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(8 * N->getZExtValue(), MVT::i64);
+  return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
 }]>;
 
 multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
@@ -3724,36 +3821,24 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
 defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
 defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
 defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
-defm : Neon_INS_elt_pattern<v16i8, v8i8,  i32, INSvi8lane>;
-defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
-defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
-defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>;
 
 
 // Floating point vector extractions are codegen'd as either a sequence of
-// subregister extractions, possibly fed by an INS if the lane number is
-// anything other than zero.
+// subregister extractions, or a MOV (aka CPY here, alias for DUP) if
+// the lane number is anything other than zero.
 def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
           (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
 def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
           (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
 def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
           (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
+
 def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
-          (f64 (EXTRACT_SUBREG
-            (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0,
-                         V128:$Rn, VectorIndexD:$idx),
-            dsub))>;
+          (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>;
 def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
-          (f32 (EXTRACT_SUBREG
-            (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0,
-                         V128:$Rn, VectorIndexS:$idx),
-            ssub))>;
+          (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>;
 def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
-          (f16 (EXTRACT_SUBREG
-            (INSvi16lane (v8f16 (IMPLICIT_DEF)), 0,
-                         V128:$Rn, VectorIndexH:$idx),
-            hsub))>;
+          (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
 
 // All concat_vectors operations are canonicalised to act on i64 vectors for
 // AArch64. In the general case we need an instruction, which had just as well be
@@ -3799,121 +3884,143 @@ defm FMAXV   : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
 defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
 defm FMINV   : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
 
-multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> {
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-  def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)),
-        (i32 (SMOVvi8to32
+// Patterns for across-vector intrinsics, that have a node equivalent, that
+// returns a vector (with only the low lane defined) instead of a scalar.
+// In effect, opNode is the same as (scalar_to_vector (IntNode)).
+multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
+                                    SDPatternOperator opNode> {
+// If a lane instruction caught the vector_extract around opNode, we can
+// directly match the latter to the instruction.
+def : Pat<(v8i8 (opNode V64:$Rn)),
+          (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+           (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
+def : Pat<(v16i8 (opNode V128:$Rn)),
           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
-          (i64 0)))>;
-  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
-        (i32 (SMOVvi8to32
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
-          (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)),
-        (i32 (SMOVvi8to32
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
-          (i64 0)))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
-        (i32 (SMOVvi8to32
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
-          (i64 0)))>;
+           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
+def : Pat<(v4i16 (opNode V64:$Rn)),
+          (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+           (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
+def : Pat<(v8i16 (opNode V128:$Rn)),
+          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
+def : Pat<(v4i32 (opNode V128:$Rn)),
+          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
+
+
+// If none did, fallback to the explicit patterns, consuming the vector_extract.
+def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
+            (i32 0)), (i64 0))),
+          (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
+            bsub), ssub)>;
+def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
+          (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
+            bsub), ssub)>;
+def : Pat<(i32 (vector_extract (insert_subvector undef,
+            (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))),
+          (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
+            hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
+          (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
+            hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
+          (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
+            ssub), ssub)>;
+
+}
+
+multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
+                                          SDPatternOperator opNode>
+    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
 // If there is a sign extension after this intrinsic, consume it as smov already
 // performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)),
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+            (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)),
+          (i32 (SMOVvi8to32
+            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+              (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+            (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+            (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
+          (i32 (SMOVvi8to32
+            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+             (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+            (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+            (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)),
           (i32 (SMOVvi16to32
            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
             (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
            (i64 0)))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+            (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
           (i32 (SMOVvi16to32
-           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
-           (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)),
-        (i32 (SMOVvi16to32
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
-          (i64 0)))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
-        (i32 (SMOVvi16to32
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
-          (i64 0)))>;
-
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
-          ssub))>;
+            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+             (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+            (i64 0)))>;
 }
 
-multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> {
+multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
+                                            SDPatternOperator opNode>
+    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
 // If there is a masking operation keeping only what has been actually
 // generated, consume it.
-  def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
-          ssub))>;
-  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
-          ssub))>;
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
-          ssub))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+            (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)),
+      (i32 (EXTRACT_SUBREG
+        (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+          (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+        ssub))>;
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
+            maski8_or_more)),
         (i32 (EXTRACT_SUBREG
           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
             (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
           ssub))>;
-
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)),
-          (i32 (EXTRACT_SUBREG
-            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
-            ssub))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+            (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)),
           (i32 (EXTRACT_SUBREG
             (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
               (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
             ssub))>;
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
-          ssub))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
+            maski16_or_more)),
         (i32 (EXTRACT_SUBREG
           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
             (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
           ssub))>;
+}
 
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
-          ssub))>;
+defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  AArch64saddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
+          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
 
-}
+defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
+          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
+def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
+          (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
+def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
+          (SMINPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
+def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
+          (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
+def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
+          (UMINPv2i32 V64:$Rn, V64:$Rn)>;
 
 multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
   def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
@@ -3976,32 +4083,6 @@ def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
           dsub))>;
 }
 
-defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  int_aarch64_neon_saddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
-          (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV",  int_aarch64_neon_uaddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
-          (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>;
-def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
-           (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>;
-def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))),
-           (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>;
-def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))),
-           (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>;
-def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
-           (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
 defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
 defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
 
@@ -4324,6 +4405,10 @@ defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
                                            int_aarch64_neon_sqadd>;
 defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
                                            int_aarch64_neon_sqsub>;
+defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
+                                          int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
+                                          int_aarch64_neon_sqsub>;
 defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
 defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
     TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
@@ -5092,22 +5177,26 @@ def : Pat<(trap), (BRK 1)>;
 // Natural vector casts (64 bit)
 def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
 def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
 def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
 def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
 def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
 
 def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
 def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
 def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
 def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
 
 def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
 def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
 def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
 def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
 
 def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
 def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
 def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
 def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
 def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
@@ -5122,22 +5211,26 @@ def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
 // Natural vector casts (128 bit)
 def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
 def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
 def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
 def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
 def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
 
 def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
 def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
 def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
 def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
 
 def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
 def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
 def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
 def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
 
 def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
 def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
 def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
 def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
 def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8157981..186e71a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -16,6 +16,7 @@
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -63,16 +64,24 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
   // If a matching instruction is found, MergeForward is set to true if the
   // merge is to remove the first instruction and replace the second with
   // a pair-wise insn, and false if the reverse is true.
+  // \p SExtIdx[out] gives the index of the result of the load pair that
+  // must be extended. The value of SExtIdx assumes that the paired load
+  // produces the value in this order: (I, returned iterator), i.e.,
+  // -1 means no value has to be extended, 0 means I, and 1 means the
+  // returned iterator.
   MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
-                                               bool &MergeForward,
+                                               bool &MergeForward, int &SExtIdx,
                                                unsigned Limit);
   // Merge the two instructions indicated into a single pair-wise instruction.
   // If MergeForward is true, erase the first instruction and fold its
   // operation into the second. If false, the reverse. Return the instruction
   // following the first instruction (which may change during processing).
+  // \p SExtIdx index of the result that must be extended for a paired load.
+  // -1 means none, 0 means I, and 1 means Paired.
   MachineBasicBlock::iterator
   mergePairedInsns(MachineBasicBlock::iterator I,
-                   MachineBasicBlock::iterator Paired, bool MergeForward);
+                   MachineBasicBlock::iterator Paired, bool MergeForward,
+                   int SExtIdx);
 
   // Scan the instruction list to find a base register update that can
   // be combined with the current instruction (a load or store) using
@@ -135,6 +144,8 @@ static bool isUnscaledLdst(unsigned Opc) {
     return true;
   case AArch64::LDURXi:
     return true;
+  case AArch64::LDURSWi:
+    return true;
   }
 }
 
@@ -173,6 +184,46 @@ int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
   case AArch64::LDRXui:
   case AArch64::LDURXi:
     return 8;
+  case AArch64::LDRSWui:
+  case AArch64::LDURSWi:
+    return 4;
+  }
+}
+
+static unsigned getMatchingNonSExtOpcode(unsigned Opc,
+                                         bool *IsValidLdStrOpc = nullptr) {
+  if (IsValidLdStrOpc)
+    *IsValidLdStrOpc = true;
+  switch (Opc) {
+  default:
+    if (IsValidLdStrOpc)
+      *IsValidLdStrOpc = false;
+    return UINT_MAX;
+  case AArch64::STRDui:
+  case AArch64::STURDi:
+  case AArch64::STRQui:
+  case AArch64::STURQi:
+  case AArch64::STRWui:
+  case AArch64::STURWi:
+  case AArch64::STRXui:
+  case AArch64::STURXi:
+  case AArch64::LDRDui:
+  case AArch64::LDURDi:
+  case AArch64::LDRQui:
+  case AArch64::LDURQi:
+  case AArch64::LDRWui:
+  case AArch64::LDURWi:
+  case AArch64::LDRXui:
+  case AArch64::LDURXi:
+  case AArch64::STRSui:
+  case AArch64::STURSi:
+  case AArch64::LDRSui:
+  case AArch64::LDURSi:
+    return Opc;
+  case AArch64::LDRSWui:
+    return AArch64::LDRWui;
+  case AArch64::LDURSWi:
+    return AArch64::LDURWi;
   }
 }
 
@@ -210,6 +261,9 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
   case AArch64::LDRXui:
   case AArch64::LDURXi:
     return AArch64::LDPXi;
+  case AArch64::LDRSWui:
+  case AArch64::LDURSWi:
+    return AArch64::LDPSWi;
   }
 }
 
@@ -237,6 +291,8 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
     return AArch64::LDRWpre;
   case AArch64::LDRXui:
     return AArch64::LDRXpre;
+  case AArch64::LDRSWui:
+    return AArch64::LDRSWpre;
   }
 }
 
@@ -264,13 +320,15 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
     return AArch64::LDRWpost;
   case AArch64::LDRXui:
     return AArch64::LDRXpost;
+  case AArch64::LDRSWui:
+    return AArch64::LDRSWpost;
   }
 }
 
 MachineBasicBlock::iterator
 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
                                       MachineBasicBlock::iterator Paired,
-                                      bool MergeForward) {
+                                      bool MergeForward, int SExtIdx) {
   MachineBasicBlock::iterator NextI = I;
   ++NextI;
   // If NextI is the second of the two instructions to be merged, we need
@@ -280,11 +338,13 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
   if (NextI == Paired)
     ++NextI;
 
-  bool IsUnscaled = isUnscaledLdst(I->getOpcode());
+  unsigned Opc =
+      SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
+  bool IsUnscaled = isUnscaledLdst(Opc);
   int OffsetStride =
       IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
 
-  unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
+  unsigned NewOpc = getMatchingPairOpcode(Opc);
   // Insert our new paired instruction after whichever of the paired
   // instructions MergeForward indicates.
   MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
@@ -299,6 +359,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
       Paired->getOperand(2).getImm() + OffsetStride) {
     RtMI = Paired;
     Rt2MI = I;
+    // Here we swapped the assumption made for SExtIdx.
+    // I.e., we turn ldp I, Paired into ldp Paired, I.
+    // Update the index accordingly.
+    if (SExtIdx != -1)
+      SExtIdx = (SExtIdx + 1) % 2;
   } else {
     RtMI = I;
     Rt2MI = Paired;
@@ -325,8 +390,47 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
   DEBUG(dbgs() << "    ");
   DEBUG(Paired->print(dbgs()));
   DEBUG(dbgs() << "  with instruction:\n    ");
-  DEBUG(((MachineInstr *)MIB)->print(dbgs()));
-  DEBUG(dbgs() << "\n");
+
+  if (SExtIdx != -1) {
+    // Generate the sign extension for the proper result of the ldp.
+    // I.e., with X1, that would be:
+    // %W1<def> = KILL %W1, %X1<imp-def>
+    // %X1<def> = SBFMXri %X1<kill>, 0, 31
+    MachineOperand &DstMO = MIB->getOperand(SExtIdx);
+    // Right now, DstMO has the extended register, since it comes from an
+    // extended opcode.
+    unsigned DstRegX = DstMO.getReg();
+    // Get the W variant of that register.
+    unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
+    // Update the result of LDP to use the W instead of the X variant.
+    DstMO.setReg(DstRegW);
+    DEBUG(((MachineInstr *)MIB)->print(dbgs()));
+    DEBUG(dbgs() << "\n");
+    // Make the machine verifier happy by providing a definition for
+    // the X register.
+    // Insert this definition right after the generated LDP, i.e., before
+    // InsertionPoint.
+    MachineInstrBuilder MIBKill =
+        BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+                TII->get(TargetOpcode::KILL), DstRegW)
+            .addReg(DstRegW)
+            .addReg(DstRegX, RegState::Define);
+    MIBKill->getOperand(2).setImplicit();
+    // Create the sign extension.
+    MachineInstrBuilder MIBSXTW =
+        BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+                TII->get(AArch64::SBFMXri), DstRegX)
+            .addReg(DstRegX)
+            .addImm(0)
+            .addImm(31);
+    (void)MIBSXTW;
+    DEBUG(dbgs() << "  Extend operand:\n    ");
+    DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
+    DEBUG(dbgs() << "\n");
+  } else {
+    DEBUG(((MachineInstr *)MIB)->print(dbgs()));
+    DEBUG(dbgs() << "\n");
+  }
 
   // Erase the old instructions.
   I->eraseFromParent();
@@ -380,17 +484,41 @@ static int alignTo(int Num, int PowOf2) {
   return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
 }
 
+static bool mayAlias(MachineInstr *MIa, MachineInstr *MIb,
+                     const AArch64InstrInfo *TII) {
+  // One of the instructions must modify memory.
+  if (!MIa->mayStore() && !MIb->mayStore())
+    return false;
+
+  // Both instructions must be memory operations.
+  if (!MIa->mayLoadOrStore() && !MIb->mayLoadOrStore())
+    return false;
+
+  return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb);
+}
+
+static bool mayAlias(MachineInstr *MIa,
+                     SmallVectorImpl<MachineInstr *> &MemInsns,
+                     const AArch64InstrInfo *TII) {
+  for (auto &MIb : MemInsns)
+    if (mayAlias(MIa, MIb, TII))
+      return true;
+
+  return false;
+}
+
 /// findMatchingInsn - Scan the instructions looking for a load/store that can
 /// be combined with the current instruction into a load/store pair.
 MachineBasicBlock::iterator
 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
-                                      bool &MergeForward, unsigned Limit) {
+                                      bool &MergeForward, int &SExtIdx,
+                                      unsigned Limit) {
   MachineBasicBlock::iterator E = I->getParent()->end();
   MachineBasicBlock::iterator MBBI = I;
   MachineInstr *FirstMI = I;
   ++MBBI;
 
-  int Opc = FirstMI->getOpcode();
+  unsigned Opc = FirstMI->getOpcode();
   bool MayLoad = FirstMI->mayLoad();
   bool IsUnscaled = isUnscaledLdst(Opc);
   unsigned Reg = FirstMI->getOperand(0).getReg();
@@ -414,6 +542,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
   BitVector ModifiedRegs, UsedRegs;
   ModifiedRegs.resize(TRI->getNumRegs());
   UsedRegs.resize(TRI->getNumRegs());
+
+  // Remember any instructions that read/write memory between FirstMI and MI.
+  SmallVector<MachineInstr *, 4> MemInsns;
+
   for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
     MachineInstr *MI = MBBI;
     // Skip DBG_VALUE instructions. Otherwise debug info can affect the
@@ -424,7 +556,19 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
     // Now that we know this is a real instruction, count it.
     ++Count;
 
-    if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) {
+    bool CanMergeOpc = Opc == MI->getOpcode();
+    SExtIdx = -1;
+    if (!CanMergeOpc) {
+      bool IsValidLdStrOpc;
+      unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
+      if (!IsValidLdStrOpc)
+        continue;
+      // Opc will be the first instruction in the pair.
+      SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0;
+      CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
+    }
+
+    if (CanMergeOpc && MI->getOperand(2).isImm()) {
       // If we've found another instruction with the same opcode, check to see
       // if the base and offset are compatible with our starting instruction.
       // These instructions all have scaled immediate operands, so we just
@@ -450,6 +594,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
         bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode());
         if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
           trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+          if (MI->mayLoadOrStore())
+            MemInsns.push_back(MI);
           continue;
         }
         // If the alignment requirements of the paired (scaled) instruction
@@ -458,6 +604,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
         if (IsUnscaled && EnableAArch64UnscaledMemOp &&
             (alignTo(MinOffset, OffsetStride) != MinOffset)) {
           trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+          if (MI->mayLoadOrStore())
+            MemInsns.push_back(MI);
           continue;
         }
         // If the destination register of the loads is the same register, bail
@@ -465,22 +613,29 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
         // registers the same is UNPREDICTABLE and will result in an exception.
         if (MayLoad && Reg == MI->getOperand(0).getReg()) {
           trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+          if (MI->mayLoadOrStore())
+            MemInsns.push_back(MI);
           continue;
         }
 
         // If the Rt of the second instruction was not modified or used between
-        // the two instructions, we can combine the second into the first.
+        // the two instructions and none of the instructions between the second
+        // and first alias with the second, we can combine the second into the
+        // first.
         if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
-            !UsedRegs[MI->getOperand(0).getReg()]) {
+            !UsedRegs[MI->getOperand(0).getReg()] &&
+            !mayAlias(MI, MemInsns, TII)) {
           MergeForward = false;
           return MBBI;
         }
 
         // Likewise, if the Rt of the first instruction is not modified or used
-        // between the two instructions, we can combine the first into the
-        // second.
+        // between the two instructions and none of the instructions between the
+        // first and the second alias with the first, we can combine the first
+        // into the second.
         if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
-            !UsedRegs[FirstMI->getOperand(0).getReg()]) {
+            !UsedRegs[FirstMI->getOperand(0).getReg()] &&
+            !mayAlias(FirstMI, MemInsns, TII)) {
           MergeForward = true;
           return MBBI;
         }
@@ -489,21 +644,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
       }
     }
 
-    // If the instruction wasn't a matching load or store, but does (or can)
-    // modify memory, stop searching, as we don't have alias analysis or
-    // anything like that to tell us whether the access is tromping on the
-    // locations we care about. The big one we want to catch is calls.
-    //
-    // FIXME: Theoretically, we can do better than that for SP and FP based
-    // references since we can effectively know where those are touching. It's
-    // unclear if it's worth the extra code, though. Most paired instructions
-    // will be sequential, perhaps with a few intervening non-memory related
-    // instructions.
-    if (MI->mayStore() || MI->isCall())
-      return E;
-    // Likewise, if we're matching a store instruction, we don't want to
-    // move across a load, as it may be reading the same location.
-    if (FirstMI->mayStore() && MI->mayLoad())
+    // If the instruction wasn't a matching load or store.  Stop searching if we
+    // encounter a call instruction that might modify memory.
+    if (MI->isCall())
       return E;
 
     // Update modified / uses register lists.
@@ -513,6 +656,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
     // return early.
     if (ModifiedRegs[BaseReg])
       return E;
+
+    // Update list of instructions that read/write memory.
+    if (MI->mayLoadOrStore())
+      MemInsns.push_back(MI);
   }
   return E;
 }
@@ -780,6 +927,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
     case AArch64::LDRQui:
     case AArch64::LDRXui:
     case AArch64::LDRWui:
+    case AArch64::LDRSWui:
     // do the unscaled versions as well
     case AArch64::STURSi:
     case AArch64::STURDi:
@@ -790,7 +938,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
     case AArch64::LDURDi:
     case AArch64::LDURQi:
     case AArch64::LDURWi:
-    case AArch64::LDURXi: {
+    case AArch64::LDURXi:
+    case AArch64::LDURSWi: {
       // If this is a volatile load/store, don't mess with it.
       if (MI->hasOrderedMemoryRef()) {
         ++MBBI;
@@ -809,13 +958,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
       }
       // Look ahead up to ScanLimit instructions for a pairable instruction.
       bool MergeForward = false;
+      int SExtIdx = -1;
       MachineBasicBlock::iterator Paired =
-          findMatchingInsn(MBBI, MergeForward, ScanLimit);
+          findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit);
       if (Paired != E) {
         // Merge the loads into a pair. Keeping the iterator straight is a
         // pain, so we let the merge routine tell us what the next instruction
         // is after it's done mucking about.
-        MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
+        MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx);
 
         Modified = true;
         ++NumPairCreated;
@@ -835,7 +985,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
     MachineInstr *MI = MBBI;
     // Do update merging. It's simpler to keep this separate from the above
     // switch, though not strictly necessary.
-    int Opc = MI->getOpcode();
+    unsigned Opc = MI->getOpcode();
     switch (Opc) {
     default:
       // Just move on to the next instruction.
@@ -931,10 +1081,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
 }
 
 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
-  const TargetMachine &TM = Fn.getTarget();
-  TII = static_cast<const AArch64InstrInfo *>(
-      TM.getSubtargetImpl()->getInstrInfo());
-  TRI = TM.getSubtargetImpl()->getRegisterInfo();
+  TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo());
+  TRI = Fn.getSubtarget().getRegisterInfo();
 
   bool Modified = false;
   for (auto &MBB : Fn)
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
index b829341..72edbf1 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -73,7 +73,7 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
   if (!MO.isJTI() && MO.getOffset())
     Expr = MCBinaryExpr::CreateAdd(
         Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
-  return MCOperand::CreateExpr(Expr);
+  return MCOperand::createExpr(Expr);
 }
 
 MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
@@ -148,7 +148,7 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
   RefKind = static_cast<AArch64MCExpr::VariantKind>(RefFlags);
   Expr = AArch64MCExpr::Create(Expr, RefKind, Ctx);
 
-  return MCOperand::CreateExpr(Expr);
+  return MCOperand::createExpr(Expr);
 }
 
 MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
@@ -169,16 +169,16 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
     // Ignore all implicit register operands.
     if (MO.isImplicit())
       return false;
-    MCOp = MCOperand::CreateReg(MO.getReg());
+    MCOp = MCOperand::createReg(MO.getReg());
     break;
   case MachineOperand::MO_RegisterMask:
     // Regmasks are like implicit defs.
     return false;
   case MachineOperand::MO_Immediate:
-    MCOp = MCOperand::CreateImm(MO.getImm());
+    MCOp = MCOperand::createImm(MO.getImm());
     break;
   case MachineOperand::MO_MachineBasicBlock:
-    MCOp = MCOperand::CreateExpr(
+    MCOp = MCOperand::createExpr(
         MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx));
     break;
   case MachineOperand::MO_GlobalAddress:
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index f942c4e..5394875 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -235,7 +235,7 @@ bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
           costs[i + 1][j + 1] = sameParityMax + 1.0;
     }
   }
-  G.setEdgeCosts(edge, std::move(costs));
+  G.updateEdgeCosts(edge, std::move(costs));
 
   return true;
 }
@@ -312,14 +312,14 @@ void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
               costs[i + 1][j + 1] = sameParityMax + 1.0;
         }
       }
-      G.setEdgeCosts(edge, std::move(costs));
+      G.updateEdgeCosts(edge, std::move(costs));
     }
   }
 }
 
 static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg,
                                 const MachineInstr &MI) {
-  LiveInterval LI = LIs.getInterval(reg);
+  const LiveInterval &LI = LIs.getInterval(reg);
   SlotIndex SI = LIs.getInstructionIndex(&MI);
   return LI.expiredAt(SI);
 }
@@ -328,7 +328,7 @@ void A57ChainingConstraint::apply(PBQPRAGraph &G) {
   const MachineFunction &MF = G.getMetadata().MF;
   LiveIntervals &LIs = G.getMetadata().LIS;
 
-  TRI = MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+  TRI = MF.getSubtarget().getRegisterInfo();
   DEBUG(MF.dump());
 
   for (const auto &MBB: MF) {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp b/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
index 97b0f0e..e1b93bf 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -22,7 +22,7 @@
 
 #include "AArch64.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/Constants.h"
@@ -31,12 +31,14 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -112,44 +114,42 @@ private:
     AU.addPreserved<DominatorTreeWrapperPass>();
   }
 
-  /// Type to store a list of User.
-  typedef SmallVector<Value::user_iterator, 4> Users;
+  /// Type to store a list of Uses.
+  typedef SmallVector<Use *, 4> Uses;
   /// Map an insertion point to all the uses it dominates.
-  typedef DenseMap<Instruction *, Users> InsertionPoints;
+  typedef DenseMap<Instruction *, Uses> InsertionPoints;
   /// Map a function to the required insertion point of load for a
   /// global variable.
   typedef DenseMap<Function *, InsertionPoints> InsertionPointsPerFunc;
 
   /// Find the closest point that dominates the given Use.
-  Instruction *findInsertionPoint(Value::user_iterator &Use);
+  Instruction *findInsertionPoint(Use &Use);
 
   /// Check if the given insertion point is dominated by an existing
   /// insertion point.
   /// If true, the given use is added to the list of dominated uses for
   /// the related existing point.
   /// \param NewPt the insertion point to be checked
-  /// \param UseIt the use to be added into the list of dominated uses
+  /// \param Use the use to be added into the list of dominated uses
   /// \param InsertPts existing insertion points
   /// \pre NewPt and all instruction in InsertPts belong to the same function
   /// \return true if one of the insertion point in InsertPts dominates NewPt,
   ///         false otherwise
-  bool isDominated(Instruction *NewPt, Value::user_iterator &UseIt,
-                   InsertionPoints &InsertPts);
+  bool isDominated(Instruction *NewPt, Use &Use, InsertionPoints &InsertPts);
 
   /// Check if the given insertion point can be merged with an existing
   /// insertion point in a common dominator.
   /// If true, the given use is added to the list of the created insertion
   /// point.
   /// \param NewPt the insertion point to be checked
-  /// \param UseIt the use to be added into the list of dominated uses
+  /// \param Use the use to be added into the list of dominated uses
   /// \param InsertPts existing insertion points
   /// \pre NewPt and all instruction in InsertPts belong to the same function
   /// \pre isDominated returns false for the exact same parameters.
   /// \return true if it exists an insertion point in InsertPts that could
   ///         have been merged with NewPt in a common dominator,
   ///         false otherwise
-  bool tryAndMerge(Instruction *NewPt, Value::user_iterator &UseIt,
-                   InsertionPoints &InsertPts);
+  bool tryAndMerge(Instruction *NewPt, Use &Use, InsertionPoints &InsertPts);
 
   /// Compute the minimal insertion points to dominates all the interesting
   /// uses of value.
@@ -182,21 +182,21 @@ private:
   bool promoteConstant(Constant *Cst);
 
   /// Transfer the list of dominated uses of IPI to NewPt in InsertPts.
-  /// Append UseIt to this list and delete the entry of IPI in InsertPts.
-  static void appendAndTransferDominatedUses(Instruction *NewPt,
-                                             Value::user_iterator &UseIt,
+  /// Append Use to this list and delete the entry of IPI in InsertPts.
+  static void appendAndTransferDominatedUses(Instruction *NewPt, Use &Use,
                                              InsertionPoints::iterator &IPI,
                                              InsertionPoints &InsertPts) {
     // Record the dominated use.
-    IPI->second.push_back(UseIt);
+    IPI->second.push_back(&Use);
     // Transfer the dominated uses of IPI to NewPt
     // Inserting into the DenseMap may invalidate existing iterator.
-    // Keep a copy of the key to find the iterator to erase.
+    // Keep a copy of the key to find the iterator to erase.  Keep a copy of the
+    // value so that we don't have to dereference IPI->second.
     Instruction *OldInstr = IPI->first;
-    InsertPts[NewPt] = std::move(IPI->second);
+    Uses OldUses = std::move(IPI->second);
+    InsertPts[NewPt] = std::move(OldUses);
     // Erase IPI.
-    IPI = InsertPts.find(OldInstr);
-    InsertPts.erase(IPI);
+    InsertPts.erase(OldInstr);
   }
 };
 } // end anonymous namespace
@@ -328,23 +328,18 @@ static bool shouldConvert(const Constant *Cst) {
   return isConstantUsingVectorTy(Cst->getType());
 }
 
-Instruction *
-AArch64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) {
+Instruction *AArch64PromoteConstant::findInsertionPoint(Use &Use) {
+  Instruction *User = cast<Instruction>(Use.getUser());
+
   // If this user is a phi, the insertion point is in the related
   // incoming basic block.
-  PHINode *PhiInst = dyn_cast<PHINode>(*Use);
-  Instruction *InsertionPoint;
-  if (PhiInst)
-    InsertionPoint =
-        PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator();
-  else
-    InsertionPoint = dyn_cast<Instruction>(*Use);
-  assert(InsertionPoint && "User is not an instruction!");
-  return InsertionPoint;
+  if (PHINode *PhiInst = dyn_cast<PHINode>(User))
+    return PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator();
+
+  return User;
 }
 
-bool AArch64PromoteConstant::isDominated(Instruction *NewPt,
-                                         Value::user_iterator &UseIt,
+bool AArch64PromoteConstant::isDominated(Instruction *NewPt, Use &Use,
                                          InsertionPoints &InsertPts) {
 
   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
@@ -363,15 +358,14 @@ bool AArch64PromoteConstant::isDominated(Instruction *NewPt,
       DEBUG(dbgs() << "Insertion point dominated by:\n");
       DEBUG(IPI.first->print(dbgs()));
       DEBUG(dbgs() << '\n');
-      IPI.second.push_back(UseIt);
+      IPI.second.push_back(&Use);
       return true;
     }
   }
   return false;
 }
 
-bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
-                                         Value::user_iterator &UseIt,
+bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, Use &Use,
                                          InsertionPoints &InsertPts) {
   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
       *NewPt->getParent()->getParent()).getDomTree();
@@ -391,7 +385,7 @@ bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
       DEBUG(dbgs() << "Merge insertion point with:\n");
       DEBUG(IPI->first->print(dbgs()));
       DEBUG(dbgs() << "\nat considered insertion point.\n");
-      appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
+      appendAndTransferDominatedUses(NewPt, Use, IPI, InsertPts);
       return true;
     }
 
@@ -415,7 +409,7 @@ bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
     DEBUG(dbgs() << '\n');
     DEBUG(NewPt->print(dbgs()));
     DEBUG(dbgs() << '\n');
-    appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
+    appendAndTransferDominatedUses(NewPt, Use, IPI, InsertPts);
     return true;
   }
   return false;
@@ -424,22 +418,22 @@ bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
 void AArch64PromoteConstant::computeInsertionPoints(
     Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) {
   DEBUG(dbgs() << "** Compute insertion points **\n");
-  for (Value::user_iterator UseIt = Val->user_begin(),
-                            EndUseIt = Val->user_end();
-       UseIt != EndUseIt; ++UseIt) {
+  for (Use &Use : Val->uses()) {
+    Instruction *User = dyn_cast<Instruction>(Use.getUser());
+
     // If the user is not an Instruction, we cannot modify it.
-    if (!isa<Instruction>(*UseIt))
+    if (!User)
       continue;
 
     // Filter out uses that should not be converted.
-    if (!shouldConvertUse(Val, cast<Instruction>(*UseIt), UseIt.getOperandNo()))
+    if (!shouldConvertUse(Val, User, Use.getOperandNo()))
       continue;
 
-    DEBUG(dbgs() << "Considered use, opidx " << UseIt.getOperandNo() << ":\n");
-    DEBUG((*UseIt)->print(dbgs()));
+    DEBUG(dbgs() << "Considered use, opidx " << Use.getOperandNo() << ":\n");
+    DEBUG(User->print(dbgs()));
     DEBUG(dbgs() << '\n');
 
-    Instruction *InsertionPoint = findInsertionPoint(UseIt);
+    Instruction *InsertionPoint = findInsertionPoint(Use);
 
     DEBUG(dbgs() << "Considered insertion point:\n");
     DEBUG(InsertionPoint->print(dbgs()));
@@ -449,17 +443,17 @@ void AArch64PromoteConstant::computeInsertionPoints(
     // by another one.
     InsertionPoints &InsertPts =
         InsPtsPerFunc[InsertionPoint->getParent()->getParent()];
-    if (isDominated(InsertionPoint, UseIt, InsertPts))
+    if (isDominated(InsertionPoint, Use, InsertPts))
       continue;
     // This insertion point is useful, check if we can merge some insertion
     // point in a common dominator or if NewPt dominates an existing one.
-    if (tryAndMerge(InsertionPoint, UseIt, InsertPts))
+    if (tryAndMerge(InsertionPoint, Use, InsertPts))
       continue;
 
     DEBUG(dbgs() << "Keep considered insertion point\n");
 
     // It is definitely useful by its own
-    InsertPts[InsertionPoint].push_back(UseIt);
+    InsertPts[InsertionPoint].push_back(&Use);
   }
 }
 
@@ -470,41 +464,32 @@ bool AArch64PromoteConstant::insertDefinitions(
   bool HasChanged = false;
 
   // Traverse all insertion points in all the function.
-  for (InsertionPointsPerFunc::iterator FctToInstPtsIt = InsPtsPerFunc.begin(),
-                                        EndIt = InsPtsPerFunc.end();
-       FctToInstPtsIt != EndIt; ++FctToInstPtsIt) {
-    InsertionPoints &InsertPts = FctToInstPtsIt->second;
+  for (const auto &FctToInstPtsIt : InsPtsPerFunc) {
+    const InsertionPoints &InsertPts = FctToInstPtsIt.second;
 // Do more checking for debug purposes.
 #ifndef NDEBUG
     DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
-        *FctToInstPtsIt->first).getDomTree();
+                            *FctToInstPtsIt.first).getDomTree();
 #endif
-    GlobalVariable *PromotedGV;
     assert(!InsertPts.empty() && "Empty uses does not need a definition");
 
-    Module *M = FctToInstPtsIt->first->getParent();
-    DenseMap<Module *, GlobalVariable *>::iterator MapIt =
-        ModuleToMergedGV.find(M);
-    if (MapIt == ModuleToMergedGV.end()) {
+    Module *M = FctToInstPtsIt.first->getParent();
+    GlobalVariable *&PromotedGV = ModuleToMergedGV[M];
+    if (!PromotedGV) {
       PromotedGV = new GlobalVariable(
           *M, Cst->getType(), true, GlobalValue::InternalLinkage, nullptr,
           "_PromotedConst", nullptr, GlobalVariable::NotThreadLocal);
       PromotedGV->setInitializer(Cst);
-      ModuleToMergedGV[M] = PromotedGV;
       DEBUG(dbgs() << "Global replacement: ");
       DEBUG(PromotedGV->print(dbgs()));
       DEBUG(dbgs() << '\n');
       ++NumPromoted;
       HasChanged = true;
-    } else {
-      PromotedGV = MapIt->second;
     }
 
-    for (InsertionPoints::iterator IPI = InsertPts.begin(),
-                                   EndIPI = InsertPts.end();
-         IPI != EndIPI; ++IPI) {
+    for (const auto &IPI : InsertPts) {
       // Create the load of the global variable.
-      IRBuilder<> Builder(IPI->first->getParent(), IPI->first);
+      IRBuilder<> Builder(IPI.first->getParent(), IPI.first);
       LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV);
       DEBUG(dbgs() << "**********\n");
       DEBUG(dbgs() << "New def: ");
@@ -512,18 +497,15 @@ bool AArch64PromoteConstant::insertDefinitions(
       DEBUG(dbgs() << '\n');
 
       // Update the dominated uses.
-      Users &DominatedUsers = IPI->second;
-      for (Value::user_iterator Use : DominatedUsers) {
+      for (Use *Use : IPI.second) {
 #ifndef NDEBUG
-        assert((DT.dominates(LoadedCst, cast<Instruction>(*Use)) ||
-                (isa<PHINode>(*Use) &&
-                 DT.dominates(LoadedCst, findInsertionPoint(Use)))) &&
+        assert(DT.dominates(LoadedCst, findInsertionPoint(*Use)) &&
                "Inserted definition does not dominate all its uses!");
 #endif
-        DEBUG(dbgs() << "Use to update " << Use.getOperandNo() << ":");
-        DEBUG(Use->print(dbgs()));
+        DEBUG(dbgs() << "Use to update " << Use->getOperandNo() << ":");
+        DEBUG(Use->getUser()->print(dbgs()));
         DEBUG(dbgs() << '\n');
-        Use->setOperand(Use.getOperandNo(), LoadedCst);
+        Use->set(LoadedCst);
         ++NumPromotedUses;
       }
     }
@@ -556,22 +538,19 @@ bool AArch64PromoteConstant::runOnFunction(Function &F) {
   // global variable. Create as few loads of this variable as possible and
   // update the uses accordingly.
   bool LocalChange = false;
-  SmallSet<Constant *, 8> AlreadyChecked;
-
-  for (auto &MBB : F) {
-    for (auto &MI : MBB) {
-      // Traverse the operand, looking for constant vectors. Replace them by a
-      // load of a global variable of constant vector type.
-      for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands();
-           OpIdx != EndOpIdx; ++OpIdx) {
-        Constant *Cst = dyn_cast<Constant>(MI.getOperand(OpIdx));
-        // There is no point in promoting global values as they are already
-        // global. Do not promote constant expressions either, as they may
-        // require some code expansion.
-        if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) &&
-            AlreadyChecked.insert(Cst).second)
-          LocalChange |= promoteConstant(Cst);
-      }
+  SmallPtrSet<Constant *, 8> AlreadyChecked;
+
+  for (Instruction &I : inst_range(&F)) {
+    // Traverse the operand, looking for constant vectors. Replace them by a
+    // load of a global variable of constant vector type.
+    for (Value *Op : I.operand_values()) {
+      Constant *Cst = dyn_cast<Constant>(Op);
+      // There is no point in promoting global values as they are already
+      // global. Do not promote constant expressions either, as they may
+      // require some code expansion.
+      if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) &&
+          AlreadyChecked.insert(Cst).second)
+        LocalChange |= promoteConstant(Cst);
     }
   }
   return LocalChange;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 206cdbb..1836682 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -18,6 +18,7 @@
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -37,9 +38,8 @@ static cl::opt<bool>
 ReserveX18("aarch64-reserve-x18", cl::Hidden,
           cl::desc("Reserve X18, making it unavailable as GPR"));
 
-AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,
-                                         const AArch64Subtarget *sti)
-    : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {}
+AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
+    : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {}
 
 const MCPhysReg *
 AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
@@ -55,7 +55,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 }
 
 const uint32_t *
-AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+                                          CallingConv::ID CC) const {
   if (CC == CallingConv::GHC)
     // This is academic becase all GHC calls are (supposed to be) tail calls
     return CSR_AArch64_NoRegs_RegMask;
@@ -66,15 +67,16 @@ AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
 }
 
 const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
-  if (STI->isTargetDarwin())
+  if (TT.isOSDarwin())
     return CSR_AArch64_TLS_Darwin_RegMask;
 
-  assert(STI->isTargetELF() && "only expect Darwin or ELF TLS");
+  assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS");
   return CSR_AArch64_TLS_ELF_RegMask;
 }
 
 const uint32_t *
-AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
+AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
+                                                CallingConv::ID CC) const {
   // This should return a register mask that is the same as that returned by
   // getCallPreservedMask but that additionally preserves the register used for
   // the first i64 argument (which must also be the register used to return a
@@ -97,12 +99,12 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(AArch64::WSP);
   Reserved.set(AArch64::WZR);
 
-  if (TFI->hasFP(MF) || STI->isTargetDarwin()) {
+  if (TFI->hasFP(MF) || TT.isOSDarwin()) {
     Reserved.set(AArch64::FP);
     Reserved.set(AArch64::W29);
   }
 
-  if (STI->isTargetDarwin() || ReserveX18) {
+  if (TT.isOSDarwin() || ReserveX18) {
     Reserved.set(AArch64::X18); // Platform register
     Reserved.set(AArch64::W18);
   }
@@ -129,10 +131,10 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
     return true;
   case AArch64::X18:
   case AArch64::W18:
-    return STI->isTargetDarwin() || ReserveX18;
+    return TT.isOSDarwin() || ReserveX18;
   case AArch64::FP:
   case AArch64::W29:
-    return TFI->hasFP(MF) || STI->isTargetDarwin();
+    return TFI->hasFP(MF) || TT.isOSDarwin();
   case AArch64::W19:
   case AArch64::X19:
     return hasBasePointer(MF);
@@ -163,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   // large enough that referencing from the FP won't result in things being
   // in range relatively often, we can use a base pointer to allow access
   // from the other direction like the SP normally works.
+  // Furthermore, if both variable sized objects are present, and the
+  // stack needs to be dynamically re-aligned, the base pointer is the only
+  // reliable way to reference the locals.
   if (MFI->hasVarSizedObjects()) {
+    if (needsStackRealignment(MF))
+      return true;
     // Conservatively estimate whether the negative offset from the frame
     // pointer will be sufficient to reach. If a function has a smallish
     // frame, it's less likely to have lots of spills and callee saved
@@ -179,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   return false;
 }
 
+bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const {
+
+  if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+    return false;
+
+  return true;
+}
+
+// FIXME: share this with other backends with identical implementation?
+bool
+AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const Function *F = MF.getFunction();
+  unsigned StackAlign = MF.getTarget()
+                            .getSubtargetImpl(*MF.getFunction())
+                            ->getFrameLowering()
+                            ->getStackAlignment();
+  bool requiresRealignment =
+      ((MFI->getMaxAlignment() > StackAlign) ||
+       F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::StackAlignment));
+
+  return requiresRealignment && canRealignStack(MF);
+}
+
 unsigned
 AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
@@ -269,7 +301,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
   // The FP is only available if there is no dynamic realignment. We
   // don't know for sure yet whether we'll need that, so we guess based
   // on whether there are any local variables that would trigger it.
-  if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset))
+  if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, AArch64::FP, FPOffset))
     return false;
 
   // If we can reference via the stack pointer or base pointer, try that.
@@ -277,7 +309,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
   //        to only disallow SP relative references in the live range of
   //        the VLA(s). In practice, it's unclear how much difference that
   //        would make, but it may be worth doing.
-  if (isFrameOffsetLegal(MI, Offset))
+  if (isFrameOffsetLegal(MI, AArch64::SP, Offset))
     return false;
 
   // The offset likely isn't legal; we want to allocate a virtual base register.
@@ -285,6 +317,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
 }
 
 bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+                                             unsigned BaseReg,
                                              int64_t Offset) const {
   assert(Offset <= INT_MAX && "Offset too big to fit in int.");
   assert(MI && "Unable to get the legal offset for nil instruction.");
@@ -302,10 +335,11 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
   DebugLoc DL; // Defaults to "unknown"
   if (Ins != MBB->end())
     DL = Ins->getDebugLoc();
-
+  const MachineFunction &MF = *MBB->getParent();
+  const AArch64InstrInfo *TII =
+      MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
   const MCInstrDesc &MCID = TII->get(AArch64::ADDXri);
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-  const MachineFunction &MF = *MBB->getParent();
   MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
   unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
 
@@ -324,6 +358,9 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
     ++i;
     assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
   }
+  const MachineFunction *MF = MI.getParent()->getParent();
+  const AArch64InstrInfo *TII =
+      MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
   bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII);
   assert(Done && "Unable to resolve frame index!");
   (void)Done;
@@ -337,6 +374,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
+  const AArch64InstrInfo *TII =
+      MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
   const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>(
       MF.getSubtarget().getFrameLowering());
 
@@ -389,10 +428,10 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   case AArch64::GPR64RegClassID:
   case AArch64::GPR32commonRegClassID:
   case AArch64::GPR64commonRegClassID:
-    return 32 - 1                                      // XZR/SP
-           - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
-           - (STI->isTargetDarwin() || ReserveX18) // X18 reserved as platform register
-           - hasBasePointer(MF);   // X19
+    return 32 - 1                                // XZR/SP
+           - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
+           - (TT.isOSDarwin() || ReserveX18) // X18 reserved as platform register
+           - hasBasePointer(MF);           // X19
   case AArch64::FPR8RegClassID:
   case AArch64::FPR16RegClassID:
   case AArch64::FPR32RegClassID:
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 51a5034..8c379d9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -19,26 +19,24 @@
 
 namespace llvm {
 
-class AArch64InstrInfo;
-class AArch64Subtarget;
 class MachineFunction;
 class RegScavenger;
 class TargetRegisterClass;
+class Triple;
 
 struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
 private:
-  const AArch64InstrInfo *TII;
-  const AArch64Subtarget *STI;
+  const Triple &TT;
 
 public:
-  AArch64RegisterInfo(const AArch64InstrInfo *tii, const AArch64Subtarget *sti);
+  AArch64RegisterInfo(const Triple &TT);
 
   bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
 
   /// Code Generation virtual methods...
-  const MCPhysReg *
-  getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
-  const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+  const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+                                       CallingConv::ID) const override;
 
   unsigned getCSRFirstUseCost() const override {
     // The cost will be compared against BlockFrequency where entry has the
@@ -59,7 +57,8 @@ public:
   ///
   /// Should return NULL in the case that the calling convention does not have
   /// this property
-  const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
+  const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF,
+                                             CallingConv::ID) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
   const TargetRegisterClass *
@@ -73,7 +72,7 @@ public:
   bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
 
   bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
-  bool isFrameOffsetLegal(const MachineInstr *MI,
+  bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
                           int64_t Offset) const override;
   void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg,
                                     int FrameIdx,
@@ -94,6 +93,9 @@ public:
 
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const override;
+  // Base pointer (stack realignment) support.
+  bool canRealignStack(const MachineFunction &MF) const;
+  bool needsStackRealignment(const MachineFunction &MF) const override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td
index 3ec4157..ca4457a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td
@@ -60,7 +60,12 @@ include "AArch64SchedA57WriteRes.td"
 // Cortex-A57. The Cortex-A57 types are directly associated with resources, so
 // defining the aliases precludes the need for mapping them using WriteRes. The
 // aliases are sufficient for creating a coarse, working model. As the model
-// evolves, InstRWs will be used to override these SchedAliases.
+// evolves, InstRWs will be used to override some of these SchedAliases.
+//
+// WARNING: Using SchedAliases is convenient and works well for latency and
+//          resource lookup for instructions. However, this creates an entry in
+//          AArch64WriteLatencyTable with a WriteResourceID of 0, breaking
+//          any SchedReadAdvance since the lookup will fail.
 
 def : SchedAlias<WriteImm,   A57Write_1cyc_1I>;
 def : SchedAlias<WriteI,     A57Write_1cyc_1I>;
@@ -70,8 +75,8 @@ def : SchedAlias<WriteExtr,  A57Write_1cyc_1I>;
 def : SchedAlias<WriteIS,    A57Write_1cyc_1I>;
 def : SchedAlias<WriteID32,  A57Write_19cyc_1M>;
 def : SchedAlias<WriteID64,  A57Write_35cyc_1M>;
-def : SchedAlias<WriteIM32,  A57Write_3cyc_1M>;
-def : SchedAlias<WriteIM64,  A57Write_5cyc_1M>;
+def : WriteRes<WriteIM32, [A57UnitM]> { let Latency = 3; }
+def : WriteRes<WriteIM64, [A57UnitM]> { let Latency = 5; }
 def : SchedAlias<WriteBr,    A57Write_1cyc_1B>;
 def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>;
 def : SchedAlias<WriteLD,    A57Write_4cyc_1L>;
@@ -127,6 +132,15 @@ def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>;
 def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>;
 
 
+// Shifted Register with Shift == 0
+// ----------------------------------------------------------------------------
+
+def A57WriteISReg : SchedWriteVariant<[
+       SchedVar<RegShiftedPred, [WriteISReg]>,
+       SchedVar<NoSchedPred, [WriteI]>]>;
+def : InstRW<[A57WriteISReg], (instregex ".*rs$")>;
+
+
 // Divide and Multiply Instructions
 // -----------------------------------------------------------------------------
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 0cfd582..b9c5399 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -28,15 +28,14 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
   // Check to see if there is a specialized entry-point for memory zeroing.
   ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
   ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
+  const AArch64Subtarget &STI =
+      DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
   const char *bzeroEntry =
-      (V && V->isNullValue())
-          ? DAG.getTarget().getSubtarget<AArch64Subtarget>().getBZeroEntry()
-          : nullptr;
+      (V && V->isNullValue()) ? STI.getBZeroEntry() : nullptr;
   // For small size (< 256), it is not beneficial to use bzero
   // instead of memset.
   if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
-    const AArch64TargetLowering &TLI =
-        *DAG.getTarget().getSubtarget<AArch64Subtarget>().getTargetLowering();
+    const AArch64TargetLowering &TLI = *STI.getTargetLowering();
 
     EVT IntPtr = TLI.getPointerTy();
     Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index 0c36e8f..85b44a2 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -30,7 +30,6 @@ class AArch64StorePairSuppress : public MachineFunctionPass {
   const AArch64InstrInfo *TII;
   const TargetRegisterInfo *TRI;
   const MachineRegisterInfo *MRI;
-  MachineFunction *MF;
   TargetSchedModel SchedModel;
   MachineTraceMetrics *Traces;
   MachineTraceMetrics::Ensemble *MinInstr;
@@ -115,20 +114,16 @@ bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
   }
 }
 
-bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
-  MF = &mf;
-  TII =
-      static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
-  TRI = MF->getSubtarget().getRegisterInfo();
-  MRI = &MF->getRegInfo();
-  const TargetSubtargetInfo &ST =
-      MF->getTarget().getSubtarget<TargetSubtargetInfo>();
+bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
+  const TargetSubtargetInfo &ST = MF.getSubtarget();
+  TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
+  TRI = ST.getRegisterInfo();
+  MRI = &MF.getRegInfo();
   SchedModel.init(ST.getSchedModel(), &ST, TII);
-
   Traces = &getAnalysis<MachineTraceMetrics>();
   MinInstr = nullptr;
 
-  DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n');
+  DEBUG(dbgs() << "*** " << getPassName() << ": " << MF.getName() << '\n');
 
   if (!SchedModel.hasInstrSchedModel()) {
     DEBUG(dbgs() << "  Skipping pass: no machine model present.\n");
@@ -139,7 +134,7 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
   // precisely determine whether a store pair can be formed. But we do want to
   // filter out most situations where we can't form store pairs to avoid
   // computing trace metrics in those cases.
-  for (auto &MBB : *MF) {
+  for (auto &MBB : MF) {
     bool SuppressSTP = false;
     unsigned PrevBaseReg = 0;
     for (auto &MI : MBB) {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 47b5d54..0b97af8 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -47,18 +47,12 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT,
                                    const std::string &FS,
                                    const TargetMachine &TM, bool LittleEndian)
     : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+      HasV8_1aOps(false), 
       HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
-      HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU),
-      TargetTriple(TT),
-      // This nested ternary is horrible, but DL needs to be properly
-      // initialized
-      // before TLInfo is constructed.
-      DL(isTargetMachO()
-             ? "e-m:o-i64:64-i128:128-n32:64-S128"
-             : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
-                             : "E-m:e-i64:64-i128:128-n32:64-S128")),
-      FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)),
-      TSInfo(&DL), TLInfo(TM) {}
+      HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
+      IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
+      InstrInfo(initializeSubtargetDependencies(FS)),
+      TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
 
 /// ClassifyGlobalReference - Find the target operand flags that describe
 /// how a global value should be referenced for the current subtarget.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
index e2740f1..5454b20 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -37,6 +37,8 @@ protected:
   /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
   ARMProcFamilyEnum ARMProcFamily;
 
+  bool HasV8_1aOps;
+
   bool HasFPARMv8;
   bool HasNEON;
   bool HasCrypto;
@@ -48,13 +50,14 @@ protected:
   // HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
   bool HasZeroCycleZeroing;
 
+  bool IsLittle;
+
   /// CPUString - String name of used CPU.
   std::string CPUString;
 
   /// TargetTriple - What processor and OS we're targeting.
   Triple TargetTriple;
 
-  const DataLayout DL;
   AArch64FrameLowering FrameLowering;
   AArch64InstrInfo InstrInfo;
   AArch64SelectionDAGInfo TSInfo;
@@ -82,15 +85,17 @@ public:
     return &TLInfo;
   }
   const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
-  const DataLayout *getDataLayout() const override { return &DL; }
   const AArch64RegisterInfo *getRegisterInfo() const override {
     return &getInstrInfo()->getRegisterInfo();
   }
+  const Triple &getTargetTriple() const { return TargetTriple; }
   bool enableMachineScheduler() const override { return true; }
   bool enablePostMachineScheduler() const override {
     return isCortexA53() || isCortexA57();
   }
 
+  bool hasV8_1aOps() const { return HasV8_1aOps; }
+
   bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
 
   bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
@@ -100,7 +105,7 @@ public:
   bool hasCrypto() const { return HasCrypto; }
   bool hasCRC() const { return HasCRC; }
 
-  bool isLittleEndian() const { return DL.isLittleEndian(); }
+  bool isLittleEndian() const { return IsLittle; }
 
   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
   bool isTargetIOS() const { return TargetTriple.isiOS(); }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index d4f19d2..a9059ab 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -13,10 +13,11 @@
 #include "AArch64.h"
 #include "AArch64TargetMachine.h"
 #include "AArch64TargetObjectFile.h"
+#include "AArch64TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/IR/Function.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetOptions.h"
@@ -84,7 +85,12 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
 static cl::opt<bool>
 EnableGEPOpt("aarch64-gep-opt", cl::Hidden,
              cl::desc("Enable optimizations on complex GEPs"),
-             cl::init(true));
+             cl::init(false));
+
+// FIXME: Unify control over GlobalMerge.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalMerge("aarch64-global-merge", cl::Hidden,
+                  cl::desc("Enable the global merge pass"));
 
 extern "C" void LLVMInitializeAArch64Target() {
   // Register the target.
@@ -103,6 +109,16 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
   return make_unique<AArch64_ELFTargetObjectFile>();
 }
 
+// Helper function to build a DataLayout string
+static std::string computeDataLayout(StringRef TT, bool LittleEndian) {
+  Triple Triple(TT);
+  if (Triple.isOSBinFormatMachO())
+    return "e-m:o-i64:64-i128:128-n32:64-S128";
+  if (LittleEndian)
+    return "e-m:e-i64:64-i128:128-n32:64-S128";
+  return "E-m:e-i64:64-i128:128-n32:64-S128";
+}
+
 /// TargetMachine ctor - Create an AArch64 architecture model.
 ///
 AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
@@ -111,9 +127,12 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
                                            Reloc::Model RM, CodeModel::Model CM,
                                            CodeGenOpt::Level OL,
                                            bool LittleEndian)
-    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    // This nested ternary is horrible, but DL needs to be properly
+    // initialized before TLInfo is constructed.
+    : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS,
+                        Options, RM, CM, OL),
       TLOF(createTLOF(Triple(getTargetTriple()))),
-      Subtarget(TT, CPU, FS, *this, LittleEndian), isLittle(LittleEndian) {
+      isLittle(LittleEndian) {
   initAsmInfo();
 }
 
@@ -121,11 +140,8 @@ AArch64TargetMachine::~AArch64TargetMachine() {}
 
 const AArch64Subtarget *
 AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
-  AttributeSet FnAttrs = F.getAttributes();
-  Attribute CPUAttr =
-      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
-  Attribute FSAttr =
-      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+  Attribute CPUAttr = F.getFnAttribute("target-cpu");
+  Attribute FSAttr = F.getFnAttribute("target-features");
 
   std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
                         ? CPUAttr.getValueAsString().str()
@@ -188,12 +204,10 @@ public:
 };
 } // namespace
 
-void AArch64TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-  // Add first the target-independent BasicTTI pass, then our AArch64 pass. This
-  // allows the AArch64 pass to delegate to the target independent layer when
-  // appropriate.
-  PM.add(createBasicTargetTransformInfoPass(this));
-  PM.add(createAArch64TargetTransformInfoPass(this));
+TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis([this](Function &F) {
+    return TargetTransformInfo(AArch64TTIImpl(this, F));
+  });
 }
 
 TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
@@ -233,8 +247,13 @@ bool AArch64PassConfig::addPreISel() {
   // get a chance to be merged
   if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
     addPass(createAArch64PromoteConstantPass());
-  if (TM->getOptLevel() != CodeGenOpt::None)
-    addPass(createGlobalMergePass(TM));
+  // FIXME: On AArch64, this depends on the type.
+  // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
+  // and the offset has to be a multiple of the related size in bytes.
+  if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+       EnableGlobalMerge == cl::BOU_UNSET) ||
+      EnableGlobalMerge == cl::BOU_TRUE)
+    addPass(createGlobalMergePass(TM, 4095));
   if (TM->getOptLevel() != CodeGenOpt::None)
     addPass(createAArch64AddressTypePromotionPass());
 
@@ -246,7 +265,7 @@ bool AArch64PassConfig::addInstSelector() {
 
   // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
   // references to _TLS_MODULE_BASE_ as possible.
-  if (TM->getSubtarget<AArch64Subtarget>().isTargetELF() &&
+  if (Triple(TM->getTargetTriple()).isOSBinFormatELF() &&
       getOptLevel() != CodeGenOpt::None)
     addPass(createAArch64CleanupLocalDynamicTLSPass());
 
@@ -281,10 +300,7 @@ void AArch64PassConfig::addPostRegAlloc() {
   // Change dead register definitions to refer to the zero register.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
     addPass(createAArch64DeadRegisterDefinitions());
-  if (TM->getOptLevel() != CodeGenOpt::None &&
-      (TM->getSubtarget<AArch64Subtarget>().isCortexA53() ||
-       TM->getSubtarget<AArch64Subtarget>().isCortexA57()) &&
-      usingDefaultRegAlloc())
+  if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
     // Improve performance for some FP/SIMD code for A57.
     addPass(createAArch64A57FPLoadBalancing());
 }
@@ -304,6 +320,6 @@ void AArch64PassConfig::addPreEmitPass() {
   // range of their destination.
   addPass(createAArch64BranchRelaxation());
   if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
-      TM->getSubtarget<AArch64Subtarget>().isTargetMachO())
+      Triple(TM->getTargetTriple()).isOSBinFormatMachO())
     addPass(createAArch64CollectLOHPass());
 }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 75c65c5..ec34fad 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -24,7 +24,6 @@ namespace llvm {
 class AArch64TargetMachine : public LLVMTargetMachine {
 protected:
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
-  AArch64Subtarget Subtarget;
   mutable StringMap<std::unique_ptr<AArch64Subtarget>> SubtargetMap;
 
 public:
@@ -34,17 +33,13 @@ public:
                        CodeGenOpt::Level OL, bool IsLittleEndian);
 
   ~AArch64TargetMachine() override;
-
-  const AArch64Subtarget *getSubtargetImpl() const override {
-    return &Subtarget;
-  }
   const AArch64Subtarget *getSubtargetImpl(const Function &F) const override;
 
   // Pass Pipeline Configuration
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
 
-  /// \brief Register AArch64 analysis passes with a pass manager.
-  void addAnalysisPasses(PassManagerBase &PM) override;
+  /// \brief Get the TargetIRAnalysis for this target.
+  TargetIRAnalysis getTargetIRAnalysis() override;
 
   TargetLoweringObjectFile* getObjFileLowering() const override {
     return TLOF.get();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 4069038..299b4a5 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -13,6 +13,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/Dwarf.h"
 using namespace llvm;
 using namespace dwarf;
@@ -23,6 +24,11 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
   InitializeELF(TM.Options.UseInitArray);
 }
 
+AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile()
+  : TargetLoweringObjectFileMachO() {
+  SupportGOTPCRelWithOffset = false;
+}
+
 const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference(
     const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
     const TargetMachine &TM, MachineModuleInfo *MMI,
@@ -35,7 +41,7 @@ const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference(
     const MCSymbol *Sym = TM.getSymbol(GV, Mang);
     const MCExpr *Res =
         MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
-    MCSymbol *PCSym = getContext().CreateTempSymbol();
+    MCSymbol *PCSym = getContext().createTempSymbol();
     Streamer.EmitLabel(PCSym);
     const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
     return MCBinaryExpr::CreateSub(Res, PC, getContext());
@@ -50,3 +56,18 @@ MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol(
     MachineModuleInfo *MMI) const {
   return TM.getSymbol(GV, Mang);
 }
+
+const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
+    const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
+    MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+  assert((Offset+MV.getConstant() == 0) &&
+         "Arch64 does not support GOT PC rel with extra offset");
+  // On ARM64 Darwin, we can reference symbols with foo@GOT-., which
+  // is an indirect pc-relative reference.
+  const MCExpr *Res =
+      MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
+  MCSymbol *PCSym = getContext().createTempSymbol();
+  Streamer.EmitLabel(PCSym);
+  const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
+  return MCBinaryExpr::CreateSub(Res, PC, getContext());
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
index 2e595f9..d41f445 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -24,6 +24,8 @@ class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
 /// AArch64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
 class AArch64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
 public:
+  AArch64_MachoTargetObjectFile();
+
   const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
                                         unsigned Encoding, Mangler &Mang,
                                         const TargetMachine &TM,
@@ -33,6 +35,11 @@ public:
   MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
                                     const TargetMachine &TM,
                                     MachineModuleInfo *MMI) const override;
+
+  const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+                                          const MCValue &MV, int64_t Offset,
+                                          MachineModuleInfo *MMI,
+                                          MCStreamer &Streamer) const override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b1a2914..ed27cf8 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1,4 +1,4 @@
-//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===//
+//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,18 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-/// \file
-/// This file implements a TargetTransformInfo analysis pass specific to the
-/// AArch64 target machine. It uses the target's detailed information to provide
-/// more precise answers to certain TTI queries, while letting the target
-/// independent and default TTI implementations handle the rest.
-///
-//===----------------------------------------------------------------------===//
 
-#include "AArch64.h"
-#include "AArch64TargetMachine.h"
+#include "AArch64TargetTransformInfo.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
 #include "llvm/Target/TargetLowering.h"
@@ -26,130 +20,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "aarch64tti"
 
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeAArch64TTIPass(PassRegistry &);
-}
-
-namespace {
-
-class AArch64TTI final : public ImmutablePass, public TargetTransformInfo {
-  const AArch64TargetMachine *TM;
-  const AArch64Subtarget *ST;
-  const AArch64TargetLowering *TLI;
-
-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
-public:
-  AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
-    llvm_unreachable("This pass cannot be directly constructed");
-  }
-
-  AArch64TTI(const AArch64TargetMachine *TM)
-      : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
-        TLI(TM->getSubtargetImpl()->getTargetLowering()) {
-    initializeAArch64TTIPass(*PassRegistry::getPassRegistry());
-  }
-
-  void initializePass() override { pushTTIStack(this); }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    TargetTransformInfo::getAnalysisUsage(AU);
-  }
-
-  /// Pass identification.
-  static char ID;
-
-  /// Provide necessary pointer adjustments for the two base classes.
-  void *getAdjustedAnalysisPointer(const void *ID) override {
-    if (ID == &TargetTransformInfo::ID)
-      return (TargetTransformInfo *)this;
-    return this;
-  }
-
-  /// \name Scalar TTI Implementations
-  /// @{
-  unsigned getIntImmCost(int64_t Val) const;
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
-  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                         Type *Ty) const override;
-  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                         Type *Ty) const override;
-  PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
-
-  /// @}
-
-  /// \name Vector TTI Implementations
-  /// @{
-
-  unsigned getNumberOfRegisters(bool Vector) const override {
-    if (Vector) {
-      if (ST->hasNEON())
-        return 32;
-      return 0;
-    }
-    return 31;
-  }
-
-  unsigned getRegisterBitWidth(bool Vector) const override {
-    if (Vector) {
-      if (ST->hasNEON())
-        return 128;
-      return 0;
-    }
-    return 64;
-  }
-
-  unsigned getMaxInterleaveFactor() const override;
-
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
-      override;
-
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
-      override;
-
-  unsigned getArithmeticInstrCost(
-      unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
-      OperandValueKind Opd2Info = OK_AnyValue,
-      OperandValueProperties Opd1PropInfo = OP_None,
-      OperandValueProperties Opd2PropInfo = OP_None) const override;
-
-  unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
-
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const
-      override;
-
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace) const override;
-
-  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override;
-
-  void getUnrollingPreferences(const Function *F, Loop *L,
-                               UnrollingPreferences &UP) const override;
-
-
-  /// @}
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti",
-                   "AArch64 Target Transform Info", true, true, false)
-char AArch64TTI::ID = 0;
-
-ImmutablePass *
-llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) {
-  return new AArch64TTI(TM);
-}
-
 /// \brief Calculate the cost of materializing a 64-bit value. This helper
 /// method might only calculate a fraction of a larger immediate. Therefore it
 /// is valid to return a cost of ZERO.
-unsigned AArch64TTI::getIntImmCost(int64_t Val) const {
+unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) {
   // Check if the immediate can be encoded within an instruction.
   if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
     return 0;
@@ -163,7 +37,7 @@ unsigned AArch64TTI::getIntImmCost(int64_t Val) const {
 }
 
 /// \brief Calculate the cost of materializing the given constant.
-unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -187,25 +61,25 @@ unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
   return std::max(1U, Cost);
 }
 
-unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
-                                 const APInt &Imm, Type *Ty) const {
+unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+                                       const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
   // There is no cost model for constants with a bit size of 0. Return TCC_Free
   // here, so that constant hoisting will ignore this constant.
   if (BitSize == 0)
-    return TCC_Free;
+    return TTI::TCC_Free;
 
   unsigned ImmIdx = ~0U;
   switch (Opcode) {
   default:
-    return TCC_Free;
+    return TTI::TCC_Free;
   case Instruction::GetElementPtr:
     // Always hoist the base address of a GetElementPtr.
     if (Idx == 0)
-      return 2 * TCC_Basic;
-    return TCC_Free;
+      return 2 * TTI::TCC_Basic;
+    return TTI::TCC_Free;
   case Instruction::Store:
     ImmIdx = 0;
     break;
@@ -227,7 +101,7 @@ unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
   case Instruction::LShr:
   case Instruction::AShr:
     if (Idx == 1)
-      return TCC_Free;
+      return TTI::TCC_Free;
     break;
   case Instruction::Trunc:
   case Instruction::ZExt:
@@ -245,26 +119,27 @@ unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
 
   if (Idx == ImmIdx) {
     unsigned NumConstants = (BitSize + 63) / 64;
-    unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
-    return (Cost <= NumConstants * TCC_Basic)
-      ? static_cast<unsigned>(TCC_Free) : Cost;
+    unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+    return (Cost <= NumConstants * TTI::TCC_Basic)
+               ? static_cast<unsigned>(TTI::TCC_Free)
+               : Cost;
   }
-  return AArch64TTI::getIntImmCost(Imm, Ty);
+  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
 }
 
-unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
-                                 const APInt &Imm, Type *Ty) const {
+unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                       const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
   // There is no cost model for constants with a bit size of 0. Return TCC_Free
   // here, so that constant hoisting will ignore this constant.
   if (BitSize == 0)
-    return TCC_Free;
+    return TTI::TCC_Free;
 
   switch (IID) {
   default:
-    return TCC_Free;
+    return TTI::TCC_Free;
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::ssub_with_overflow:
@@ -273,35 +148,36 @@ unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
   case Intrinsic::umul_with_overflow:
     if (Idx == 1) {
       unsigned NumConstants = (BitSize + 63) / 64;
-      unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
-      return (Cost <= NumConstants * TCC_Basic)
-        ? static_cast<unsigned>(TCC_Free) : Cost;
+      unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+      return (Cost <= NumConstants * TTI::TCC_Basic)
+                 ? static_cast<unsigned>(TTI::TCC_Free)
+                 : Cost;
     }
     break;
   case Intrinsic::experimental_stackmap:
     if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
-      return TCC_Free;
+      return TTI::TCC_Free;
     break;
   case Intrinsic::experimental_patchpoint_void:
   case Intrinsic::experimental_patchpoint_i64:
     if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
-      return TCC_Free;
+      return TTI::TCC_Free;
     break;
   }
-  return AArch64TTI::getIntImmCost(Imm, Ty);
+  return AArch64TTIImpl::getIntImmCost(Imm, Ty);
 }
 
-AArch64TTI::PopcntSupportKind
-AArch64TTI::getPopcntSupport(unsigned TyWidth) const {
+TargetTransformInfo::PopcntSupportKind
+AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
   if (TyWidth == 32 || TyWidth == 64)
-    return PSK_FastHardware;
+    return TTI::PSK_FastHardware;
   // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
-  return PSK_Software;
+  return TTI::PSK_Software;
 }
 
-unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
-                                    Type *Src) const {
+unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
+                                          Type *Src) {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
@@ -309,7 +185,7 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
   EVT DstTy = TLI->getValueType(Dst);
 
   if (!SrcTy.isSimple() || !DstTy.isSimple())
-    return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+    return BaseT::getCastInstrCost(Opcode, Dst, Src);
 
   static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
     // LowerVectorINT_TO_FP:
@@ -380,11 +256,11 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
   if (Idx != -1)
     return ConversionTbl[Idx].Cost;
 
-  return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+  return BaseT::getCastInstrCost(Opcode, Dst, Src);
 }
 
-unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                      unsigned Index) const {
+unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                            unsigned Index) {
   assert(Val->isVectorTy() && "This must be a vector type");
 
   if (Index != -1U) {
@@ -408,10 +284,10 @@ unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
   return 2;
 }
 
-unsigned AArch64TTI::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
-    OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
-    OperandValueProperties Opd2PropInfo) const {
+unsigned AArch64TTIImpl::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+    TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
+    TTI::OperandValueProperties Opd2PropInfo) {
   // Legalize the type.
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
 
@@ -442,8 +318,8 @@ unsigned AArch64TTI::getArithmeticInstrCost(
 
   switch (ISD) {
   default:
-    return TargetTransformInfo::getArithmeticInstrCost(
-        Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+    return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+                                         Opd1PropInfo, Opd2PropInfo);
   case ISD::ADD:
   case ISD::MUL:
   case ISD::XOR:
@@ -455,7 +331,7 @@ unsigned AArch64TTI::getArithmeticInstrCost(
   }
 }
 
-unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
+unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
   // Address computations in vectorized code with non-consecutive addresses will
   // likely result in more instructions compared to scalar code where the
   // computation can more often be merged into the index mode. The resulting
@@ -470,14 +346,14 @@ unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
   return 1;
 }
 
-unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                      Type *CondTy) const {
+unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                            Type *CondTy) {
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   // We don't lower vector selects well that are wider than the register width.
   if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
     // We would need this many instructions to hide the scalarization happening.
-    unsigned AmortizationCost = 20;
+    const unsigned AmortizationCost = 20;
     static const TypeConversionCostTblEntry<MVT::SimpleValueType>
     VectorSelectTbl[] = {
       { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
@@ -498,12 +374,12 @@ unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
         return VectorSelectTbl[Idx].Cost;
     }
   }
-  return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
 }
 
-unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
-                                   unsigned Alignment,
-                                   unsigned AddressSpace) const {
+unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                         unsigned Alignment,
+                                         unsigned AddressSpace) {
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
 
   if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
@@ -531,7 +407,7 @@ unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
   return LT.first;
 }
 
-unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const {
+unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
   unsigned Cost = 0;
   for (auto *I : Tys) {
     if (!I->isVectorTy())
@@ -543,14 +419,103 @@ unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const {
   return Cost;
 }
 
-unsigned AArch64TTI::getMaxInterleaveFactor() const {
+unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
   if (ST->isCortexA57())
     return 4;
   return 2;
 }
 
-void AArch64TTI::getUnrollingPreferences(const Function *F, Loop *L,
-                                         UnrollingPreferences &UP) const {
+void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
+                                             TTI::UnrollingPreferences &UP) {
+  // Enable partial unrolling and runtime unrolling.
+  BaseT::getUnrollingPreferences(L, UP);
+
+  // For inner loop, it is more likely to be a hot one, and the runtime check
+  // can be promoted out from LICM pass, so the overhead is less, let's try
+  // a larger threshold to unroll more loops.
+  if (L->getLoopDepth() > 1)
+    UP.PartialThreshold *= 2;
+
   // Disable partial & runtime unrolling on -Os.
   UP.PartialOptSizeThreshold = 0;
 }
+
+Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
+                                                         Type *ExpectedType) {
+  switch (Inst->getIntrinsicID()) {
+  default:
+    return nullptr;
+  case Intrinsic::aarch64_neon_st2:
+  case Intrinsic::aarch64_neon_st3:
+  case Intrinsic::aarch64_neon_st4: {
+    // Create a struct type
+    StructType *ST = dyn_cast<StructType>(ExpectedType);
+    if (!ST)
+      return nullptr;
+    unsigned NumElts = Inst->getNumArgOperands() - 1;
+    if (ST->getNumElements() != NumElts)
+      return nullptr;
+    for (unsigned i = 0, e = NumElts; i != e; ++i) {
+      if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
+        return nullptr;
+    }
+    Value *Res = UndefValue::get(ExpectedType);
+    IRBuilder<> Builder(Inst);
+    for (unsigned i = 0, e = NumElts; i != e; ++i) {
+      Value *L = Inst->getArgOperand(i);
+      Res = Builder.CreateInsertValue(Res, L, i);
+    }
+    return Res;
+  }
+  case Intrinsic::aarch64_neon_ld2:
+  case Intrinsic::aarch64_neon_ld3:
+  case Intrinsic::aarch64_neon_ld4:
+    if (Inst->getType() == ExpectedType)
+      return Inst;
+    return nullptr;
+  }
+}
+
+bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
+                                        MemIntrinsicInfo &Info) {
+  switch (Inst->getIntrinsicID()) {
+  default:
+    break;
+  case Intrinsic::aarch64_neon_ld2:
+  case Intrinsic::aarch64_neon_ld3:
+  case Intrinsic::aarch64_neon_ld4:
+    Info.ReadMem = true;
+    Info.WriteMem = false;
+    Info.Vol = false;
+    Info.NumMemRefs = 1;
+    Info.PtrVal = Inst->getArgOperand(0);
+    break;
+  case Intrinsic::aarch64_neon_st2:
+  case Intrinsic::aarch64_neon_st3:
+  case Intrinsic::aarch64_neon_st4:
+    Info.ReadMem = false;
+    Info.WriteMem = true;
+    Info.Vol = false;
+    Info.NumMemRefs = 1;
+    Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
+    break;
+  }
+
+  switch (Inst->getIntrinsicID()) {
+  default:
+    return false;
+  case Intrinsic::aarch64_neon_ld2:
+  case Intrinsic::aarch64_neon_st2:
+    Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
+    break;
+  case Intrinsic::aarch64_neon_ld3:
+  case Intrinsic::aarch64_neon_st3:
+    Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
+    break;
+  case Intrinsic::aarch64_neon_ld4:
+  case Intrinsic::aarch64_neon_st4:
+    Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
+    break;
+  }
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
new file mode 100644
index 0000000..25c22bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -0,0 +1,147 @@
+//===-- AArch64TargetTransformInfo.h - AArch64 specific TTI -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// AArch64 target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+
+namespace llvm {
+
+class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
+  typedef BasicTTIImplBase<AArch64TTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const AArch64TargetMachine *TM;
+  const AArch64Subtarget *ST;
+  const AArch64TargetLowering *TLI;
+
+  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+  /// are set if the result needs to be inserted and/or extracted from vectors.
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
+
+  const AArch64Subtarget *getST() const { return ST; }
+  const AArch64TargetLowering *getTLI() const { return TLI; }
+
+  enum MemIntrinsicType {
+    VECTOR_LDST_TWO_ELEMENTS,
+    VECTOR_LDST_THREE_ELEMENTS,
+    VECTOR_LDST_FOUR_ELEMENTS
+  };
+
+public:
+  explicit AArch64TTIImpl(const AArch64TargetMachine *TM, Function &F)
+      : BaseT(TM), TM(TM), ST(TM->getSubtargetImpl(F)),
+        TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  AArch64TTIImpl(const AArch64TTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), TM(Arg.TM), ST(Arg.ST),
+        TLI(Arg.TLI) {}
+  AArch64TTIImpl(AArch64TTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), TM(std::move(Arg.TM)),
+        ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {}
+  AArch64TTIImpl &operator=(const AArch64TTIImpl &RHS) {
+    BaseT::operator=(static_cast<const BaseT &>(RHS));
+    TM = RHS.TM;
+    ST = RHS.ST;
+    TLI = RHS.TLI;
+    return *this;
+  }
+  AArch64TTIImpl &operator=(AArch64TTIImpl &&RHS) {
+    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+    TM = std::move(RHS.TM);
+    ST = std::move(RHS.ST);
+    TLI = std::move(RHS.TLI);
+    return *this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+
+  using BaseT::getIntImmCost;
+  unsigned getIntImmCost(int64_t Val);
+  unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                         Type *Ty);
+  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                         Type *Ty);
+  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
+  /// @}
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  unsigned getNumberOfRegisters(bool Vector) {
+    if (Vector) {
+      if (ST->hasNEON())
+        return 32;
+      return 0;
+    }
+    return 31;
+  }
+
+  unsigned getRegisterBitWidth(bool Vector) {
+    if (Vector) {
+      if (ST->hasNEON())
+        return 128;
+      return 0;
+    }
+    return 64;
+  }
+
+  unsigned getMaxInterleaveFactor(unsigned VF);
+
+  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+
+  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+
+  unsigned getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty,
+      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+
+  unsigned getAddressComputationCost(Type *Ty, bool IsComplex);
+
+  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+
+  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                           unsigned AddressSpace);
+
+  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
+
+  void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
+                                           Type *ExpectedType);
+
+  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
+
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 8eb906b..38d34e6 100644
--- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -9,6 +9,7 @@
 
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64TargetStreamer.h"
 #include "Utils/AArch64BaseInfo.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/STLExtras.h"
@@ -113,11 +114,10 @@ public:
 #define GET_OPERAND_DIAGNOSTIC_TYPES
 #include "AArch64GenAsmMatcher.inc"
   };
-  AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
-                 const MCInstrInfo &MII,
-                 const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(_STI) {
-    MCAsmParserExtension::Initialize(_Parser);
+  AArch64AsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+                   const MCInstrInfo &MII, const MCTargetOptions &Options)
+      : MCTargetAsmParser(), STI(STI) {
+    MCAsmParserExtension::Initialize(Parser);
     MCStreamer &S = getParser().getStreamer();
     if (S.getTargetStreamer() == nullptr)
       new AArch64TargetStreamer(S);
@@ -205,14 +205,16 @@ private:
 
   struct BarrierOp {
     unsigned Val; // Not the enum since not all values have names.
+    const char *Data;
+    unsigned Length;
   };
 
   struct SysRegOp {
     const char *Data;
     unsigned Length;
-    uint64_t FeatureBits; // We need to pass through information about which
-                          // core we are compiling for so that the SysReg
-                          // Mappers can appropriately conditionalize.
+    uint32_t MRSReg;
+    uint32_t MSRReg;
+    uint32_t PStateField;
   };
 
   struct SysCRImmOp {
@@ -221,6 +223,8 @@ private:
 
   struct PrefetchOp {
     unsigned Val;
+    const char *Data;
+    unsigned Length;
   };
 
   struct ShiftExtendOp {
@@ -254,8 +258,7 @@ private:
   MCContext &Ctx;
 
 public:
-  AArch64Operand(KindTy K, MCContext &_Ctx)
-      : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {}
+  AArch64Operand(KindTy K, MCContext &Ctx) : Kind(K), Ctx(Ctx) {}
 
   AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
     Kind = o.Kind;
@@ -349,6 +352,11 @@ public:
     return Barrier.Val;
   }
 
+  StringRef getBarrierName() const {
+    assert(Kind == k_Barrier && "Invalid access!");
+    return StringRef(Barrier.Data, Barrier.Length);
+  }
+
   unsigned getReg() const override {
     assert(Kind == k_Register && "Invalid access!");
     return Reg.RegNum;
@@ -374,11 +382,6 @@ public:
     return StringRef(SysReg.Data, SysReg.Length);
   }
 
-  uint64_t getSysRegFeatureBits() const {
-    assert(Kind == k_SysReg && "Invalid access!");
-    return SysReg.FeatureBits;
-  }
-
   unsigned getSysCR() const {
     assert(Kind == k_SysCR && "Invalid access!");
     return SysCRImm.Val;
@@ -389,6 +392,11 @@ public:
     return Prefetch.Val;
   }
 
+  StringRef getPrefetchName() const {
+    assert(Kind == k_Prefetch && "Invalid access!");
+    return StringRef(Prefetch.Data, Prefetch.Length);
+  }
+
   AArch64_AM::ShiftExtendType getShiftExtendType() const {
     assert(Kind == k_ShiftExtend && "Invalid access!");
     return ShiftExtend.Type;
@@ -757,58 +765,47 @@ public:
   }
 
   bool isMovZSymbolG3() const {
-    static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
-    return isMovWSymbol(Variants);
+    return isMovWSymbol(AArch64MCExpr::VK_ABS_G3);
   }
 
   bool isMovZSymbolG2() const {
-    static AArch64MCExpr::VariantKind Variants[] = {
-        AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
-        AArch64MCExpr::VK_TPREL_G2, AArch64MCExpr::VK_DTPREL_G2};
-    return isMovWSymbol(Variants);
+    return isMovWSymbol({AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
+                         AArch64MCExpr::VK_TPREL_G2,
+                         AArch64MCExpr::VK_DTPREL_G2});
   }
 
   bool isMovZSymbolG1() const {
-    static AArch64MCExpr::VariantKind Variants[] = {
-        AArch64MCExpr::VK_ABS_G1,      AArch64MCExpr::VK_ABS_G1_S,
+    return isMovWSymbol({
+        AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S,
         AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1,
         AArch64MCExpr::VK_DTPREL_G1,
-    };
-    return isMovWSymbol(Variants);
+    });
   }
 
   bool isMovZSymbolG0() const {
-    static AArch64MCExpr::VariantKind Variants[] = {
-        AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
-        AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_DTPREL_G0};
-    return isMovWSymbol(Variants);
+    return isMovWSymbol({AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
+                         AArch64MCExpr::VK_TPREL_G0,
+                         AArch64MCExpr::VK_DTPREL_G0});
   }
 
   bool isMovKSymbolG3() const {
-    static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
-    return isMovWSymbol(Variants);
+    return isMovWSymbol(AArch64MCExpr::VK_ABS_G3);
   }
 
   bool isMovKSymbolG2() const {
-    static AArch64MCExpr::VariantKind Variants[] = {
-        AArch64MCExpr::VK_ABS_G2_NC};
-    return isMovWSymbol(Variants);
+    return isMovWSymbol(AArch64MCExpr::VK_ABS_G2_NC);
   }
 
   bool isMovKSymbolG1() const {
-    static AArch64MCExpr::VariantKind Variants[] = {
-      AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_TPREL_G1_NC,
-      AArch64MCExpr::VK_DTPREL_G1_NC
-    };
-    return isMovWSymbol(Variants);
+    return isMovWSymbol({AArch64MCExpr::VK_ABS_G1_NC,
+                         AArch64MCExpr::VK_TPREL_G1_NC,
+                         AArch64MCExpr::VK_DTPREL_G1_NC});
   }
 
   bool isMovKSymbolG0() const {
-    static AArch64MCExpr::VariantKind Variants[] = {
-      AArch64MCExpr::VK_ABS_G0_NC,   AArch64MCExpr::VK_GOTTPREL_G0_NC,
-      AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC
-    };
-    return isMovWSymbol(Variants);
+    return isMovWSymbol(
+        {AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC,
+         AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC});
   }
 
   template<int RegWidth, int Shift>
@@ -855,28 +852,17 @@ public:
   bool isMRSSystemRegister() const {
     if (!isSysReg()) return false;
 
-    bool IsKnownRegister;
-    auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits());
-    Mapper.fromString(getSysReg(), IsKnownRegister);
-
-    return IsKnownRegister;
+    return SysReg.MRSReg != -1U;
   }
   bool isMSRSystemRegister() const {
     if (!isSysReg()) return false;
 
-    bool IsKnownRegister;
-    auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits());
-    Mapper.fromString(getSysReg(), IsKnownRegister);
-
-    return IsKnownRegister;
+    return SysReg.MSRReg != -1U;
   }
   bool isSystemPStateField() const {
     if (!isSysReg()) return false;
 
-    bool IsKnownRegister;
-    AArch64PState::PStateMapper().fromString(getSysReg(), IsKnownRegister);
-
-    return IsKnownRegister;
+    return SysReg.PStateField != -1U;
   }
   bool isReg() const override { return Kind == k_Register && !Reg.isVector; }
   bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
@@ -1113,16 +1099,16 @@ public:
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediates when possible.  Null MCExpr = 0.
     if (!Expr)
-      Inst.addOperand(MCOperand::CreateImm(0));
+      Inst.addOperand(MCOperand::createImm(0));
     else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
-      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+      Inst.addOperand(MCOperand::createImm(CE->getValue()));
     else
-      Inst.addOperand(MCOperand::CreateExpr(Expr));
+      Inst.addOperand(MCOperand::createExpr(Expr));
   }
 
   void addRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getReg()));
+    Inst.addOperand(MCOperand::createReg(getReg()));
   }
 
   void addGPR32as64Operands(MCInst &Inst, unsigned N) const {
@@ -1134,26 +1120,26 @@ public:
     uint32_t Reg = RI->getRegClass(AArch64::GPR32RegClassID).getRegister(
         RI->getEncodingValue(getReg()));
 
-    Inst.addOperand(MCOperand::CreateReg(Reg));
+    Inst.addOperand(MCOperand::createReg(Reg));
   }
 
   void addVectorReg64Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     assert(
         AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg()));
-    Inst.addOperand(MCOperand::CreateReg(AArch64::D0 + getReg() - AArch64::Q0));
+    Inst.addOperand(MCOperand::createReg(AArch64::D0 + getReg() - AArch64::Q0));
   }
 
   void addVectorReg128Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     assert(
         AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg()));
-    Inst.addOperand(MCOperand::CreateReg(getReg()));
+    Inst.addOperand(MCOperand::createReg(getReg()));
   }
 
   void addVectorRegLoOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getReg()));
+    Inst.addOperand(MCOperand::createReg(getReg()));
   }
 
   template <unsigned NumRegs>
@@ -1164,7 +1150,7 @@ public:
     unsigned FirstReg = FirstRegs[NumRegs - 1];
 
     Inst.addOperand(
-        MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0));
+        MCOperand::createReg(FirstReg + getVectorListStart() - AArch64::Q0));
   }
 
   template <unsigned NumRegs>
@@ -1175,32 +1161,32 @@ public:
     unsigned FirstReg = FirstRegs[NumRegs - 1];
 
     Inst.addOperand(
-        MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0));
+        MCOperand::createReg(FirstReg + getVectorListStart() - AArch64::Q0));
   }
 
   void addVectorIndex1Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+    Inst.addOperand(MCOperand::createImm(getVectorIndex()));
   }
 
   void addVectorIndexBOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+    Inst.addOperand(MCOperand::createImm(getVectorIndex()));
   }
 
   void addVectorIndexHOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+    Inst.addOperand(MCOperand::createImm(getVectorIndex()));
   }
 
   void addVectorIndexSOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+    Inst.addOperand(MCOperand::createImm(getVectorIndex()));
   }
 
   void addVectorIndexDOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+    Inst.addOperand(MCOperand::createImm(getVectorIndex()));
   }
 
   void addImmOperands(MCInst &Inst, unsigned N) const {
@@ -1215,16 +1201,16 @@ public:
     assert(N == 2 && "Invalid number of operands!");
     if (isShiftedImm()) {
       addExpr(Inst, getShiftedImmVal());
-      Inst.addOperand(MCOperand::CreateImm(getShiftedImmShift()));
+      Inst.addOperand(MCOperand::createImm(getShiftedImmShift()));
     } else {
       addExpr(Inst, getImm());
-      Inst.addOperand(MCOperand::CreateImm(0));
+      Inst.addOperand(MCOperand::createImm(0));
     }
   }
 
   void addCondCodeOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getCondCode()));
+    Inst.addOperand(MCOperand::createImm(getCondCode()));
   }
 
   void addAdrpLabelOperands(MCInst &Inst, unsigned N) const {
@@ -1233,7 +1219,7 @@ public:
     if (!MCE)
       addExpr(Inst, getImm());
     else
-      Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 12));
+      Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 12));
   }
 
   void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
@@ -1246,119 +1232,119 @@ public:
     const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
 
     if (!MCE) {
-      Inst.addOperand(MCOperand::CreateExpr(getImm()));
+      Inst.addOperand(MCOperand::createExpr(getImm()));
       return;
     }
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / Scale));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue() / Scale));
   }
 
   void addSImm9Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addSImm7s4Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 4));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue() / 4));
   }
 
   void addSImm7s8Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 8));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue() / 8));
   }
 
   void addSImm7s16Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 16));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue() / 16));
   }
 
   void addImm0_7Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addImm1_8Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addImm0_15Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addImm1_16Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
     assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addImm0_31Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addImm1_31Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addImm1_32Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addImm0_63Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addImm1_63Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addImm1_64Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addImm0_127Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addImm0_255Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addImm32_63Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
   }
 
   void addLogicalImm32Operands(MCInst &Inst, unsigned N) const {
@@ -1366,14 +1352,14 @@ public:
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
     uint64_t encoding =
         AArch64_AM::encodeLogicalImmediate(MCE->getValue() & 0xFFFFFFFF, 32);
-    Inst.addOperand(MCOperand::CreateImm(encoding));
+    Inst.addOperand(MCOperand::createImm(encoding));
   }
 
   void addLogicalImm64Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
     uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 64);
-    Inst.addOperand(MCOperand::CreateImm(encoding));
+    Inst.addOperand(MCOperand::createImm(encoding));
   }
 
   void addLogicalImm32NotOperands(MCInst &Inst, unsigned N) const {
@@ -1381,7 +1367,7 @@ public:
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
     int64_t Val = ~MCE->getValue() & 0xFFFFFFFF;
     uint64_t encoding = AArch64_AM::encodeLogicalImmediate(Val, 32);
-    Inst.addOperand(MCOperand::CreateImm(encoding));
+    Inst.addOperand(MCOperand::createImm(encoding));
   }
 
   void addLogicalImm64NotOperands(MCInst &Inst, unsigned N) const {
@@ -1389,14 +1375,14 @@ public:
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
     uint64_t encoding =
         AArch64_AM::encodeLogicalImmediate(~MCE->getValue(), 64);
-    Inst.addOperand(MCOperand::CreateImm(encoding));
+    Inst.addOperand(MCOperand::createImm(encoding));
   }
 
   void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
     uint64_t encoding = AArch64_AM::encodeAdvSIMDModImmType10(MCE->getValue());
-    Inst.addOperand(MCOperand::CreateImm(encoding));
+    Inst.addOperand(MCOperand::createImm(encoding));
   }
 
   void addBranchTarget26Operands(MCInst &Inst, unsigned N) const {
@@ -1410,7 +1396,7 @@ public:
       return;
     }
     assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 2));
   }
 
   void addPCRelLabel19Operands(MCInst &Inst, unsigned N) const {
@@ -1424,7 +1410,7 @@ public:
       return;
     }
     assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 2));
   }
 
   void addBranchTarget14Operands(MCInst &Inst, unsigned N) const {
@@ -1438,64 +1424,52 @@ public:
       return;
     }
     assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
+    Inst.addOperand(MCOperand::createImm(MCE->getValue() >> 2));
   }
 
   void addFPImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getFPImm()));
+    Inst.addOperand(MCOperand::createImm(getFPImm()));
   }
 
   void addBarrierOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getBarrier()));
+    Inst.addOperand(MCOperand::createImm(getBarrier()));
   }
 
   void addMRSSystemRegisterOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
 
-    bool Valid;
-    auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits());
-    uint32_t Bits = Mapper.fromString(getSysReg(), Valid);
-
-    Inst.addOperand(MCOperand::CreateImm(Bits));
+    Inst.addOperand(MCOperand::createImm(SysReg.MRSReg));
   }
 
   void addMSRSystemRegisterOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
 
-    bool Valid;
-    auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits());
-    uint32_t Bits = Mapper.fromString(getSysReg(), Valid);
-
-    Inst.addOperand(MCOperand::CreateImm(Bits));
+    Inst.addOperand(MCOperand::createImm(SysReg.MSRReg));
   }
 
   void addSystemPStateFieldOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
 
-    bool Valid;
-    uint32_t Bits =
-        AArch64PState::PStateMapper().fromString(getSysReg(), Valid);
-
-    Inst.addOperand(MCOperand::CreateImm(Bits));
+    Inst.addOperand(MCOperand::createImm(SysReg.PStateField));
   }
 
   void addSysCROperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getSysCR()));
+    Inst.addOperand(MCOperand::createImm(getSysCR()));
   }
 
   void addPrefetchOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getPrefetch()));
+    Inst.addOperand(MCOperand::createImm(getPrefetch()));
   }
 
   void addShifterOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     unsigned Imm =
         AArch64_AM::getShifterImm(getShiftExtendType(), getShiftExtendAmount());
-    Inst.addOperand(MCOperand::CreateImm(Imm));
+    Inst.addOperand(MCOperand::createImm(Imm));
   }
 
   void addExtendOperands(MCInst &Inst, unsigned N) const {
@@ -1503,7 +1477,7 @@ public:
     AArch64_AM::ShiftExtendType ET = getShiftExtendType();
     if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTW;
     unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount());
-    Inst.addOperand(MCOperand::CreateImm(Imm));
+    Inst.addOperand(MCOperand::createImm(Imm));
   }
 
   void addExtend64Operands(MCInst &Inst, unsigned N) const {
@@ -1511,15 +1485,15 @@ public:
     AArch64_AM::ShiftExtendType ET = getShiftExtendType();
     if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTX;
     unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount());
-    Inst.addOperand(MCOperand::CreateImm(Imm));
+    Inst.addOperand(MCOperand::createImm(Imm));
   }
 
   void addMemExtendOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     AArch64_AM::ShiftExtendType ET = getShiftExtendType();
     bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX;
-    Inst.addOperand(MCOperand::CreateImm(IsSigned));
-    Inst.addOperand(MCOperand::CreateImm(getShiftExtendAmount() != 0));
+    Inst.addOperand(MCOperand::createImm(IsSigned));
+    Inst.addOperand(MCOperand::createImm(getShiftExtendAmount() != 0));
   }
 
   // For 8-bit load/store instructions with a register offset, both the
@@ -1530,8 +1504,8 @@ public:
     assert(N == 2 && "Invalid number of operands!");
     AArch64_AM::ShiftExtendType ET = getShiftExtendType();
     bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX;
-    Inst.addOperand(MCOperand::CreateImm(IsSigned));
-    Inst.addOperand(MCOperand::CreateImm(hasShiftExtendAmount()));
+    Inst.addOperand(MCOperand::createImm(IsSigned));
+    Inst.addOperand(MCOperand::createImm(hasShiftExtendAmount()));
   }
 
   template<int Shift>
@@ -1540,7 +1514,7 @@ public:
 
     const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
     uint64_t Value = CE->getValue();
-    Inst.addOperand(MCOperand::CreateImm((Value >> Shift) & 0xffff));
+    Inst.addOperand(MCOperand::createImm((Value >> Shift) & 0xffff));
   }
 
   template<int Shift>
@@ -1549,7 +1523,7 @@ public:
 
     const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
     uint64_t Value = CE->getValue();
-    Inst.addOperand(MCOperand::CreateImm((~Value >> Shift) & 0xffff));
+    Inst.addOperand(MCOperand::createImm((~Value >> Shift) & 0xffff));
   }
 
   void print(raw_ostream &OS) const override;
@@ -1636,21 +1610,30 @@ public:
     return Op;
   }
 
-  static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, SMLoc S,
+  static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val,
+                                                       StringRef Str,
+                                                       SMLoc S,
                                                        MCContext &Ctx) {
     auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx);
     Op->Barrier.Val = Val;
+    Op->Barrier.Data = Str.data();
+    Op->Barrier.Length = Str.size();
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
   }
 
-  static std::unique_ptr<AArch64Operand>
-  CreateSysReg(StringRef Str, SMLoc S, uint64_t FeatureBits, MCContext &Ctx) {
+  static std::unique_ptr<AArch64Operand> CreateSysReg(StringRef Str, SMLoc S,
+                                                      uint32_t MRSReg,
+                                                      uint32_t MSRReg,
+                                                      uint32_t PStateField,
+                                                      MCContext &Ctx) {
     auto Op = make_unique<AArch64Operand>(k_SysReg, Ctx);
     Op->SysReg.Data = Str.data();
     Op->SysReg.Length = Str.size();
-    Op->SysReg.FeatureBits = FeatureBits;
+    Op->SysReg.MRSReg = MRSReg;
+    Op->SysReg.MSRReg = MSRReg;
+    Op->SysReg.PStateField = PStateField;
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
@@ -1665,10 +1648,14 @@ public:
     return Op;
   }
 
-  static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, SMLoc S,
+  static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val,
+                                                        StringRef Str,
+                                                        SMLoc S,
                                                         MCContext &Ctx) {
     auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx);
     Op->Prefetch.Val = Val;
+    Op->Barrier.Data = Str.data();
+    Op->Barrier.Length = Str.size();
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
@@ -1696,21 +1683,20 @@ void AArch64Operand::print(raw_ostream &OS) const {
        << AArch64_AM::getFPImmFloat(getFPImm()) << ") >";
     break;
   case k_Barrier: {
-    bool Valid;
-    StringRef Name = AArch64DB::DBarrierMapper().toString(getBarrier(), Valid);
-    if (Valid)
+    StringRef Name = getBarrierName();
+    if (!Name.empty())
       OS << "<barrier " << Name << ">";
     else
       OS << "<barrier invalid #" << getBarrier() << ">";
     break;
   }
   case k_Immediate:
-    getImm()->print(OS);
+    OS << *getImm();
     break;
   case k_ShiftedImm: {
     unsigned Shift = getShiftedImmShift();
     OS << "<shiftedimm ";
-    getShiftedImmVal()->print(OS);
+    OS << *getShiftedImmVal();
     OS << ", lsl #" << AArch64_AM::getShiftValue(Shift) << ">";
     break;
   }
@@ -1741,9 +1727,8 @@ void AArch64Operand::print(raw_ostream &OS) const {
     OS << "c" << getSysCR();
     break;
   case k_Prefetch: {
-    bool Valid;
-    StringRef Name = AArch64PRFM::PRFMMapper().toString(getPrefetch(), Valid);
-    if (Valid)
+    StringRef Name = getPrefetchName();
+    if (!Name.empty())
       OS << "<prfop " << Name << ">";
     else
       OS << "<prfop invalid #" << getPrefetch() << ">";
@@ -1986,7 +1971,12 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
       return MatchOperand_ParseFail;
     }
 
-    Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
+    bool Valid;
+    auto Mapper = AArch64PRFM::PRFMMapper();
+    StringRef Name = 
+        Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
+    Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name,
+                                                      S, getContext()));
     return MatchOperand_Success;
   }
 
@@ -1996,14 +1986,17 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
   }
 
   bool Valid;
-  unsigned prfop = AArch64PRFM::PRFMMapper().fromString(Tok.getString(), Valid);
+  auto Mapper = AArch64PRFM::PRFMMapper();
+  unsigned prfop = 
+      Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
   if (!Valid) {
     TokError("pre-fetch hint expected");
     return MatchOperand_ParseFail;
   }
 
   Parser.Lex(); // Eat identifier token.
-  Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
+  Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Tok.getString(),
+                                                    S, getContext()));
   return MatchOperand_Success;
 }
 
@@ -2100,15 +2093,16 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
   const AsmToken &Tok = Parser.getTok();
   if (Tok.is(AsmToken::Real)) {
     APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+    if (isNegative)
+      RealVal.changeSign();
+
     uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
-    // If we had a '-' in front, toggle the sign bit.
-    IntVal ^= (uint64_t)isNegative << 63;
     int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal));
     Parser.Lex(); // Eat the token.
     // Check for out of range values. As an exception, we let Zero through,
     // as we handle that special case in post-processing before matching in
     // order to use the zero register for it.
-    if (Val == -1 && !RealVal.isZero()) {
+    if (Val == -1 && !RealVal.isPosZero()) {
       TokError("expected compatible register or floating-point constant");
       return MatchOperand_ParseFail;
     }
@@ -2605,8 +2599,12 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
       Error(ExprLoc, "barrier operand out of range");
       return MatchOperand_ParseFail;
     }
-    Operands.push_back(
-        AArch64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext()));
+    bool Valid;
+    auto Mapper = AArch64DB::DBarrierMapper();
+    StringRef Name = 
+        Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
+    Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name,
+                                                      ExprLoc, getContext()));
     return MatchOperand_Success;
   }
 
@@ -2616,7 +2614,9 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
   }
 
   bool Valid;
-  unsigned Opt = AArch64DB::DBarrierMapper().fromString(Tok.getString(), Valid);
+  auto Mapper = AArch64DB::DBarrierMapper();
+  unsigned Opt = 
+      Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
   if (!Valid) {
     TokError("invalid barrier option name");
     return MatchOperand_ParseFail;
@@ -2628,8 +2628,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
     return MatchOperand_ParseFail;
   }
 
-  Operands.push_back(
-      AArch64Operand::CreateBarrier(Opt, getLoc(), getContext()));
+  Operands.push_back( AArch64Operand::CreateBarrier(Opt, Tok.getString(),
+                                                    getLoc(), getContext()));
   Parser.Lex(); // Consume the option
 
   return MatchOperand_Success;
@@ -2643,8 +2643,27 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
   if (Tok.isNot(AsmToken::Identifier))
     return MatchOperand_NoMatch;
 
-  Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), getLoc(),
-                     STI.getFeatureBits(), getContext()));
+  bool IsKnown;
+  auto MRSMapper = AArch64SysReg::MRSMapper();
+  uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), STI.getFeatureBits(),
+                                         IsKnown);
+  assert(IsKnown == (MRSReg != -1U) &&
+         "register should be -1 if and only if it's unknown");
+
+  auto MSRMapper = AArch64SysReg::MSRMapper();
+  uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), STI.getFeatureBits(),
+                                         IsKnown);
+  assert(IsKnown == (MSRReg != -1U) &&
+         "register should be -1 if and only if it's unknown");
+
+  auto PStateMapper = AArch64PState::PStateMapper();
+  uint32_t PStateField = 
+      PStateMapper.fromString(Tok.getString(), STI.getFeatureBits(), IsKnown);
+  assert(IsKnown == (PStateField != -1U) &&
+         "register should be -1 if and only if it's unknown");
+
+  Operands.push_back(AArch64Operand::CreateSysReg(
+      Tok.getString(), getLoc(), MRSReg, MSRReg, PStateField, getContext()));
   Parser.Lex(); // Eat identifier
 
   return MatchOperand_Success;
@@ -3626,6 +3645,60 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                                 Op3.getEndLoc(), getContext());
       }
     }
+  } else if (NumOperands == 4 && Tok == "bfc") {
+    // FIXME: Horrible hack to handle BFC->BFM alias.
+    AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
+    AArch64Operand LSBOp = static_cast<AArch64Operand &>(*Operands[2]);
+    AArch64Operand WidthOp = static_cast<AArch64Operand &>(*Operands[3]);
+
+    if (Op1.isReg() && LSBOp.isImm() && WidthOp.isImm()) {
+      const MCConstantExpr *LSBCE = dyn_cast<MCConstantExpr>(LSBOp.getImm());
+      const MCConstantExpr *WidthCE = dyn_cast<MCConstantExpr>(WidthOp.getImm());
+
+      if (LSBCE && WidthCE) {
+        uint64_t LSB = LSBCE->getValue();
+        uint64_t Width = WidthCE->getValue();
+
+        uint64_t RegWidth = 0;
+        if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
+                Op1.getReg()))
+          RegWidth = 64;
+        else
+          RegWidth = 32;
+
+        if (LSB >= RegWidth)
+          return Error(LSBOp.getStartLoc(),
+                       "expected integer in range [0, 31]");
+        if (Width < 1 || Width > RegWidth)
+          return Error(WidthOp.getStartLoc(),
+                       "expected integer in range [1, 32]");
+
+        uint64_t ImmR = 0;
+        if (RegWidth == 32)
+          ImmR = (32 - LSB) & 0x1f;
+        else
+          ImmR = (64 - LSB) & 0x3f;
+
+        uint64_t ImmS = Width - 1;
+
+        if (ImmR != 0 && ImmS >= ImmR)
+          return Error(WidthOp.getStartLoc(),
+                       "requested insert overflows register");
+
+        const MCExpr *ImmRExpr = MCConstantExpr::Create(ImmR, getContext());
+        const MCExpr *ImmSExpr = MCConstantExpr::Create(ImmS, getContext());
+        Operands[0] = AArch64Operand::CreateToken(
+              "bfm", false, Op.getStartLoc(), getContext());
+        Operands[2] = AArch64Operand::CreateReg(
+            RegWidth == 32 ? AArch64::WZR : AArch64::XZR, false, SMLoc(),
+            SMLoc(), getContext());
+        Operands[3] = AArch64Operand::CreateImm(
+            ImmRExpr, LSBOp.getStartLoc(), LSBOp.getEndLoc(), getContext());
+        Operands.emplace_back(
+            AArch64Operand::CreateImm(ImmSExpr, WidthOp.getStartLoc(),
+                                      WidthOp.getEndLoc(), getContext()));
+      }
+    }
   } else if (NumOperands == 5) {
     // FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and
     // UBFIZ -> UBFM aliases.
@@ -3657,8 +3730,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                          "expected integer in range [1, 32]");
 
           uint64_t NewOp3Val = 0;
-          if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains(
-                  Op1.getReg()))
+          if (RegWidth == 32)
             NewOp3Val = (32 - Op3Val) & 0x1f;
           else
             NewOp3Val = (64 - Op3Val) & 0x3f;
@@ -4033,13 +4105,13 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
   if (getParser().parseIdentifier(Name))
     return Error(L, "expected symbol after directive");
 
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
   const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
   Expr = AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_TLSDESC, getContext());
 
   MCInst Inst;
   Inst.setOpcode(AArch64::TLSDESCCALL);
-  Inst.addOperand(MCOperand::CreateExpr(Expr));
+  Inst.addOperand(MCOperand::createExpr(Expr));
 
   getParser().getStreamer().EmitInstruction(Inst, STI);
   return false;
@@ -4082,7 +4154,7 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
     StringRef Name;
     if (getParser().parseIdentifier(Name))
       return TokError("expected identifier in directive");
-    Args.push_back(getContext().GetOrCreateSymbol(Name));
+    Args.push_back(getContext().getOrCreateSymbol(Name));
 
     if (Idx + 1 == NbArgs)
       break;
@@ -4139,7 +4211,7 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
   Parser.Lex(); // Consume the EndOfStatement
 
   auto pair = std::make_pair(IsVector, RegNum);
-  if (!RegisterReqs.insert(std::make_pair(Name, pair)).second)
+  if (RegisterReqs.insert(std::make_pair(Name, pair)).first->second != pair)
     Warning(L, "ignoring redefinition of register alias '" + Name + "'");
 
   return true;
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 878e29c..a1ed703 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -221,13 +221,11 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
 
 static MCSymbolizer *
 createAArch64ExternalSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
-                              LLVMSymbolLookupCallback SymbolLookUp,
-                              void *DisInfo, MCContext *Ctx,
-                              MCRelocationInfo *RelInfo) {
-  return new llvm::AArch64ExternalSymbolizer(
-                                     *Ctx,
-                                     std::unique_ptr<MCRelocationInfo>(RelInfo),
-                                     GetOpInfo, SymbolLookUp, DisInfo);
+                                LLVMSymbolLookupCallback SymbolLookUp,
+                                void *DisInfo, MCContext *Ctx,
+                                std::unique_ptr<MCRelocationInfo> &&RelInfo) {
+  return new llvm::AArch64ExternalSymbolizer(*Ctx, move(RelInfo), GetOpInfo,
+                                             SymbolLookUp, DisInfo);
 }
 
 extern "C" void LLVMInitializeAArch64Disassembler() {
@@ -263,7 +261,7 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
     return Fail;
 
   unsigned Register = FPR128DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -292,7 +290,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
     return Fail;
 
   unsigned Register = FPR64DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -313,7 +311,7 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
     return Fail;
 
   unsigned Register = FPR32DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -334,7 +332,7 @@ static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
     return Fail;
 
   unsigned Register = FPR16DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -355,7 +353,7 @@ static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
     return Fail;
 
   unsigned Register = FPR8DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -376,7 +374,7 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
     return Fail;
 
   unsigned Register = GPR64DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -388,7 +386,7 @@ static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
   unsigned Register = GPR64DecoderTable[RegNo];
   if (Register == AArch64::XZR)
     Register = AArch64::SP;
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -409,7 +407,7 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
     return Fail;
 
   unsigned Register = GPR32DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -422,7 +420,7 @@ static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
   unsigned Register = GPR32DecoderTable[RegNo];
   if (Register == AArch64::WZR)
     Register = AArch64::WSP;
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -443,7 +441,7 @@ static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo,
     return Fail;
 
   unsigned Register = VectorDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -463,7 +461,7 @@ static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
   if (RegNo > 31)
     return Fail;
   unsigned Register = QQDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -486,7 +484,7 @@ static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
   if (RegNo > 31)
     return Fail;
   unsigned Register = QQQDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -510,7 +508,7 @@ static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
   if (RegNo > 31)
     return Fail;
   unsigned Register = QQQQDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -530,7 +528,7 @@ static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
   if (RegNo > 31)
     return Fail;
   unsigned Register = DDDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -553,7 +551,7 @@ static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
   if (RegNo > 31)
     return Fail;
   unsigned Register = DDDDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -577,7 +575,7 @@ static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
   if (RegNo > 31)
     return Fail;
   unsigned Register = DDDDDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return Success;
 }
 
@@ -586,14 +584,14 @@ static DecodeStatus DecodeFixedPointScaleImm32(llvm::MCInst &Inst, unsigned Imm,
                                                const void *Decoder) {
   // scale{5} is asserted as 1 in tblgen.
   Imm |= 0x20;
-  Inst.addOperand(MCOperand::CreateImm(64 - Imm));
+  Inst.addOperand(MCOperand::createImm(64 - Imm));
   return Success;
 }
 
 static DecodeStatus DecodeFixedPointScaleImm64(llvm::MCInst &Inst, unsigned Imm,
                                                uint64_t Addr,
                                                const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(64 - Imm));
+  Inst.addOperand(MCOperand::createImm(64 - Imm));
   return Success;
 }
 
@@ -609,21 +607,21 @@ static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm,
 
   if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal *  4, Addr,
                                      Inst.getOpcode() != AArch64::LDRXl, 0, 4))
-    Inst.addOperand(MCOperand::CreateImm(ImmVal));
+    Inst.addOperand(MCOperand::createImm(ImmVal));
   return Success;
 }
 
 static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm,
                                     uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm((Imm  >> 1) & 1));
-  Inst.addOperand(MCOperand::CreateImm(Imm & 1));
+  Inst.addOperand(MCOperand::createImm((Imm  >> 1) & 1));
+  Inst.addOperand(MCOperand::createImm(Imm & 1));
   return Success;
 }
 
 static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm,
                                             uint64_t Address,
                                             const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(Imm));
+  Inst.addOperand(MCOperand::createImm(Imm));
 
   // Every system register in the encoding space is valid with the syntax
   // S<op0>_<op1>_<Cn>_<Cm>_<op2>, so decoding system registers always succeeds.
@@ -633,7 +631,7 @@ static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm,
 static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm,
                                             uint64_t Address,
                                             const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(Imm));
+  Inst.addOperand(MCOperand::createImm(Imm));
 
   return Success;
 }
@@ -656,20 +654,20 @@ static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
   }
 
   // Add the lane
-  Inst.addOperand(MCOperand::CreateImm(1));
+  Inst.addOperand(MCOperand::createImm(1));
 
   return Success;
 }
 
 static DecodeStatus DecodeVecShiftRImm(llvm::MCInst &Inst, unsigned Imm,
                                        unsigned Add) {
-  Inst.addOperand(MCOperand::CreateImm(Add - Imm));
+  Inst.addOperand(MCOperand::createImm(Add - Imm));
   return Success;
 }
 
 static DecodeStatus DecodeVecShiftLImm(llvm::MCInst &Inst, unsigned Imm,
                                        unsigned Add) {
-  Inst.addOperand(MCOperand::CreateImm((Imm + Add) & (Add - 1)));
+  Inst.addOperand(MCOperand::createImm((Imm + Add) & (Add - 1)));
   return Success;
 }
 
@@ -789,7 +787,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
     break;
   }
 
-  Inst.addOperand(MCOperand::CreateImm(shift));
+  Inst.addOperand(MCOperand::createImm(shift));
   return Success;
 }
 
@@ -821,8 +819,8 @@ static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
       Inst.getOpcode() == AArch64::MOVKXi)
     Inst.addOperand(Inst.getOperand(0));
 
-  Inst.addOperand(MCOperand::CreateImm(imm));
-  Inst.addOperand(MCOperand::CreateImm(shift));
+  Inst.addOperand(MCOperand::createImm(imm));
+  Inst.addOperand(MCOperand::createImm(shift));
   return Success;
 }
 
@@ -840,7 +838,7 @@ static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
     return Fail;
   case AArch64::PRFMui:
     // Rt is an immediate in prefetch.
-    Inst.addOperand(MCOperand::CreateImm(Rt));
+    Inst.addOperand(MCOperand::createImm(Rt));
     break;
   case AArch64::STRBBui:
   case AArch64::LDRBBui:
@@ -883,7 +881,7 @@ static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
 
   DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
   if (!Dis->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4))
-    Inst.addOperand(MCOperand::CreateImm(offset));
+    Inst.addOperand(MCOperand::createImm(offset));
   return Success;
 }
 
@@ -958,7 +956,7 @@ static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
     return Fail;
   case AArch64::PRFUMi:
     // Rt is an immediate in prefetch.
-    Inst.addOperand(MCOperand::CreateImm(Rt));
+    Inst.addOperand(MCOperand::createImm(Rt));
     break;
   case AArch64::STURBBi:
   case AArch64::LDURBBi:
@@ -1059,7 +1057,7 @@ static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
   }
 
   DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-  Inst.addOperand(MCOperand::CreateImm(offset));
+  Inst.addOperand(MCOperand::createImm(offset));
 
   bool IsLoad = fieldFromInstruction(insn, 22, 1);
   bool IsIndexed = fieldFromInstruction(insn, 10, 2) != 0;
@@ -1104,6 +1102,12 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
   case AArch64::STLRW:
   case AArch64::STLRB:
   case AArch64::STLRH:
+  case AArch64::STLLRW:
+  case AArch64::STLLRB:
+  case AArch64::STLLRH:
+  case AArch64::LDLARW:
+  case AArch64::LDLARB:
+  case AArch64::LDLARH:
     DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
     break;
   case AArch64::STLXRX:
@@ -1114,6 +1118,8 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
   case AArch64::LDAXRX:
   case AArch64::LDXRX:
   case AArch64::STLRX:
+  case AArch64::LDLARX:
+  case AArch64::STLLRX:
     DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
     break;
   case AArch64::STLXPW:
@@ -1262,7 +1268,7 @@ static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
   }
 
   DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-  Inst.addOperand(MCOperand::CreateImm(offset));
+  Inst.addOperand(MCOperand::createImm(offset));
 
   // You shouldn't load to the same register twice in an instruction...
   if (IsLoad && Rt == Rt2)
@@ -1329,7 +1335,7 @@ static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
     break;
   }
 
-  Inst.addOperand(MCOperand::CreateImm(extend));
+  Inst.addOperand(MCOperand::createImm(extend));
   return Success;
 }
 
@@ -1360,7 +1366,7 @@ static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
     if (!AArch64_AM::isValidDecodeLogicalImmediate(imm, 32))
       return Fail;
   }
-  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::createImm(imm));
   return Success;
 }
 
@@ -1377,7 +1383,7 @@ static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
   else
     DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
 
-  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::createImm(imm));
 
   switch (Inst.getOpcode()) {
   default:
@@ -1390,13 +1396,13 @@ static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
   case AArch64::MOVIv4i32:
   case AArch64::MVNIv2i32:
   case AArch64::MVNIv4i32:
-    Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2));
+    Inst.addOperand(MCOperand::createImm((cmode & 6) << 2));
     break;
   case AArch64::MOVIv2s_msl:
   case AArch64::MOVIv4s_msl:
   case AArch64::MVNIv2s_msl:
   case AArch64::MVNIv4s_msl:
-    Inst.addOperand(MCOperand::CreateImm(cmode & 1 ? 0x110 : 0x108));
+    Inst.addOperand(MCOperand::createImm(cmode & 1 ? 0x110 : 0x108));
     break;
   }
 
@@ -1415,8 +1421,8 @@ static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst,
   DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
   DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
 
-  Inst.addOperand(MCOperand::CreateImm(imm));
-  Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2));
+  Inst.addOperand(MCOperand::createImm(imm));
+  Inst.addOperand(MCOperand::createImm((cmode & 6) << 2));
 
   return Success;
 }
@@ -1435,7 +1441,7 @@ static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
 
   DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
   if (!Dis->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4))
-    Inst.addOperand(MCOperand::CreateImm(imm));
+    Inst.addOperand(MCOperand::createImm(imm));
 
   return Success;
 }
@@ -1471,8 +1477,8 @@ static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
   }
 
   if (!Dis->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4))
-    Inst.addOperand(MCOperand::CreateImm(ImmVal));
-  Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal));
+    Inst.addOperand(MCOperand::createImm(ImmVal));
+  Inst.addOperand(MCOperand::createImm(12 * ShifterVal));
   return Success;
 }
 
@@ -1488,7 +1494,7 @@ static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
     imm |= ~((1LL << 26) - 1);
 
   if (!Dis->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 4))
-    Inst.addOperand(MCOperand::CreateImm(imm));
+    Inst.addOperand(MCOperand::createImm(imm));
 
   return Success;
 }
@@ -1502,11 +1508,14 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
 
   uint64_t pstate_field = (op1 << 3) | op2;
 
-  Inst.addOperand(MCOperand::CreateImm(pstate_field));
-  Inst.addOperand(MCOperand::CreateImm(crm));
+  Inst.addOperand(MCOperand::createImm(pstate_field));
+  Inst.addOperand(MCOperand::createImm(crm));
 
   bool ValidNamed;
-  (void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed);
+  const AArch64Disassembler *Dis = 
+      static_cast<const AArch64Disassembler *>(Decoder);
+  (void)AArch64PState::PStateMapper().toString(pstate_field, 
+      Dis->getSubtargetInfo().getFeatureBits(), ValidNamed);
 
   return ValidNamed ? Success : Fail;
 }
@@ -1528,9 +1537,9 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
     DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
   else
     DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
-  Inst.addOperand(MCOperand::CreateImm(bit));
+  Inst.addOperand(MCOperand::createImm(bit));
   if (!Dis->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 4))
-    Inst.addOperand(MCOperand::CreateImm(dst));
+    Inst.addOperand(MCOperand::createImm(dst));
 
   return Success;
 }
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 2057c51..07e4a45 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -165,7 +165,7 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
   if (SymbolicOp.AddSymbol.Present) {
     if (SymbolicOp.AddSymbol.Name) {
       StringRef Name(SymbolicOp.AddSymbol.Name);
-      MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
+      MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
       MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
       if (Variant != MCSymbolRefExpr::VK_None)
         Add = MCSymbolRefExpr::Create(Sym, Variant, Ctx);
@@ -180,7 +180,7 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
   if (SymbolicOp.SubtractSymbol.Present) {
     if (SymbolicOp.SubtractSymbol.Name) {
       StringRef Name(SymbolicOp.SubtractSymbol.Name);
-      MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
+      MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
       Sub = MCSymbolRefExpr::Create(Sym, Ctx);
     } else {
       Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, Ctx);
@@ -214,7 +214,7 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
       Expr = MCConstantExpr::Create(0, Ctx);
   }
 
-  MI.addOperand(MCOperand::CreateExpr(Expr));
+  MI.addOperand(MCOperand::createExpr(Expr));
 
   return true;
 }
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 46a1d79..02bd929 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -34,18 +34,13 @@ using namespace llvm;
 
 AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
                                        const MCInstrInfo &MII,
-                                       const MCRegisterInfo &MRI,
-                                       const MCSubtargetInfo &STI)
-    : MCInstPrinter(MAI, MII, MRI) {
-  // Initialize the set of available features.
-  setAvailableFeatures(STI.getFeatureBits());
-}
+                                       const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
 
 AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI,
                                                  const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI)
-    : AArch64InstPrinter(MAI, MII, MRI, STI) {}
+                                                 const MCRegisterInfo &MRI)
+    : AArch64InstPrinter(MAI, MII, MRI) {}
 
 void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
   // This is for .cfi directives.
@@ -53,7 +48,8 @@ void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                   StringRef Annot) {
+                                   StringRef Annot,
+                                   const MCSubtargetInfo &STI) {
   // Check for special encodings and print the canonical alias instead.
 
   unsigned Opcode = MI->getOpcode();
@@ -166,11 +162,23 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     int ImmR = MI->getOperand(3).getImm();
     int ImmS = MI->getOperand(4).getImm();
 
-    // BFI alias
-    if (ImmS < ImmR) {
+    if ((Op2.getReg() == AArch64::WZR || Op2.getReg() == AArch64::XZR) &&
+        (ImmR == 0 || ImmS < ImmR)) {
+      // BFC takes precedence over its entire range, sligtly differently to BFI.
+      int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32;
+      int LSB = (BitWidth - ImmR) % BitWidth;
+      int Width = ImmS + 1;
+
+      O << "\tbfc\t" << getRegisterName(Op0.getReg())
+        << ", #" << LSB << ", #" << Width;
+      printAnnotation(O, Annot);
+      return;
+    } else if (ImmS < ImmR) {
+      // BFI alias
       int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32;
       int LSB = (BitWidth - ImmR) % BitWidth;
       int Width = ImmS + 1;
+
       O << "\tbfi\t" << getRegisterName(Op0.getReg()) << ", "
         << getRegisterName(Op2.getReg()) << ", #" << LSB << ", #" << Width;
       printAnnotation(O, Annot);
@@ -210,8 +218,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     return;
   }
 
-  if (!printAliasInstr(MI, O))
-    printInstruction(MI, O);
+  if (!printAliasInstr(MI, STI, O))
+    printInstruction(MI, STI, O);
 
   printAnnotation(O, Annot);
 }
@@ -614,7 +622,8 @@ static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
 }
 
 void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                        StringRef Annot) {
+                                        StringRef Annot,
+                                        const MCSubtargetInfo &STI) {
   unsigned Opcode = MI->getOpcode();
   StringRef Layout, Mnemonic;
 
@@ -624,7 +633,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
       << getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", ";
 
     unsigned ListOpNum = IsTbx ? 2 : 1;
-    printVectorList(MI, ListOpNum, O, "");
+    printVectorList(MI, ListOpNum, STI, O, "");
 
     O << ", "
       << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg);
@@ -638,7 +647,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     // Now onto the operands: first a vector list with possible lane
     // specifier. E.g. { v0 }[2]
     int OpNum = LdStDesc->ListOperand;
-    printVectorList(MI, OpNum++, O, "");
+    printVectorList(MI, OpNum++, STI, O, "");
 
     if (LdStDesc->HasLane)
       O << '[' << MI->getOperand(OpNum++).getImm() << ']';
@@ -662,7 +671,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     return;
   }
 
-  AArch64InstPrinter::printInst(MI, O, Annot);
+  AArch64InstPrinter::printInst(MI, O, Annot, STI);
 }
 
 bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
@@ -889,6 +898,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
 }
 
 void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
@@ -903,6 +913,7 @@ void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   O << format("#%#llx", Op.getImm());
@@ -922,6 +933,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   assert(Op.isReg() && "Non-register vreg operand!");
@@ -930,6 +942,7 @@ void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   assert(Op.isImm() && "System instruction C[nm] operands must be immediates!");
@@ -937,6 +950,7 @@ void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   if (MO.isImm()) {
@@ -946,18 +960,19 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
         AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
     O << '#' << Val;
     if (Shift != 0)
-      printShifter(MI, OpNum + 1, O);
+      printShifter(MI, OpNum + 1, STI, O);
 
     if (CommentStream)
       *CommentStream << '=' << (Val << Shift) << '\n';
   } else {
     assert(MO.isExpr() && "Unexpected operand type!");
     O << *MO.getExpr();
-    printShifter(MI, OpNum + 1, O);
+    printShifter(MI, OpNum + 1, STI, O);
   }
 }
 
 void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   uint64_t Val = MI->getOperand(OpNum).getImm();
   O << "#0x";
@@ -965,6 +980,7 @@ void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   uint64_t Val = MI->getOperand(OpNum).getImm();
   O << "#0x";
@@ -972,6 +988,7 @@ void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNum).getImm();
   // LSL #0 should not be printed.
@@ -983,18 +1000,21 @@ void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   O << getRegisterName(MI->getOperand(OpNum).getReg());
-  printShifter(MI, OpNum + 1, O);
+  printShifter(MI, OpNum + 1, STI, O);
 }
 
 void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
                                                raw_ostream &O) {
   O << getRegisterName(MI->getOperand(OpNum).getReg());
-  printArithExtend(MI, OpNum + 1, O);
+  printArithExtend(MI, OpNum + 1, STI, O);
 }
 
 void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNum).getImm();
   AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val);
@@ -1038,24 +1058,28 @@ void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
   AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
   O << AArch64CC::getCondCodeName(CC);
 }
 
 void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
   O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC));
 }
 
 void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
 }
 
 template<int Scale>
 void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
   O << '#' << Scale * MI->getOperand(OpNum).getImm();
 }
@@ -1085,10 +1109,12 @@ void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   unsigned prfop = MI->getOperand(OpNum).getImm();
   bool Valid;
-  StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, Valid);
+  StringRef Name = 
+      AArch64PRFM::PRFMMapper().toString(prfop, STI.getFeatureBits(), Valid);
   if (Valid)
     O << Name;
   else
@@ -1096,6 +1122,7 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   float FPImm =
@@ -1151,6 +1178,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
 }
 
 void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O,
                                          StringRef LayoutSuffix) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
@@ -1193,14 +1221,17 @@ void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
   O << " }";
 }
 
-void AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
-                                                        unsigned OpNum,
-                                                        raw_ostream &O) {
-  printVectorList(MI, OpNum, O, "");
+void
+AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
+                                                   unsigned OpNum,
+                                                   const MCSubtargetInfo &STI,
+                                                   raw_ostream &O) {
+  printVectorList(MI, OpNum, STI, O, "");
 }
 
 template <unsigned NumLanes, char LaneKind>
 void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   std::string Suffix(".");
   if (NumLanes)
@@ -1208,15 +1239,17 @@ void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
   else
     Suffix += LaneKind;
 
-  printVectorList(MI, OpNum, O, Suffix);
+  printVectorList(MI, OpNum, STI, O, Suffix);
 }
 
 void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   O << "[" << MI->getOperand(OpNum).getImm() << "]";
 }
 
 void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
 
@@ -1241,6 +1274,7 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
 
@@ -1256,6 +1290,7 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNo).getImm();
   unsigned Opcode = MI->getOpcode();
@@ -1263,9 +1298,11 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
   bool Valid;
   StringRef Name;
   if (Opcode == AArch64::ISB)
-    Name = AArch64ISB::ISBMapper().toString(Val, Valid);
+    Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(), 
+                                            Valid);
   else
-    Name = AArch64DB::DBarrierMapper().toString(Val, Valid);
+    Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(), 
+                                                Valid);
   if (Valid)
     O << Name;
   else
@@ -1273,31 +1310,35 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNo).getImm();
 
-  auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures());
-  std::string Name = Mapper.toString(Val);
+  auto Mapper = AArch64SysReg::MRSMapper();
+  std::string Name = Mapper.toString(Val, STI.getFeatureBits());
 
   O << StringRef(Name).upper();
 }
 
 void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNo).getImm();
 
-  auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures());
-  std::string Name = Mapper.toString(Val);
+  auto Mapper = AArch64SysReg::MSRMapper();
+  std::string Name = Mapper.toString(Val, STI.getFeatureBits());
 
   O << StringRef(Name).upper();
 }
 
 void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNo).getImm();
 
   bool Valid;
-  StringRef Name = AArch64PState::PStateMapper().toString(Val, Valid);
+  StringRef Name = 
+      AArch64PState::PStateMapper().toString(Val, STI.getFeatureBits(), Valid);
   if (Valid)
     O << StringRef(Name.str()).upper();
   else
@@ -1305,6 +1346,7 @@ void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned RawVal = MI->getOperand(OpNo).getImm();
   uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 5f51621..c2077a0 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -26,16 +26,21 @@ class MCOperand;
 class AArch64InstPrinter : public MCInstPrinter {
 public:
   AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                     const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+                     const MCRegisterInfo &MRI);
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
 
   // Autogenerated by tblgen.
-  virtual void printInstruction(const MCInst *MI, raw_ostream &O);
-  virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+  virtual void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                                raw_ostream &O);
+  virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                               raw_ostream &O);
   virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                                       unsigned PrintMethodIdx, raw_ostream &O);
+                                       unsigned PrintMethodIdx,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O);
   virtual StringRef getRegName(unsigned RegNo) const {
     return getRegisterName(RegNo);
   }
@@ -45,90 +50,126 @@ public:
 protected:
   bool printSysAlias(const MCInst *MI, raw_ostream &O);
   // Operand printers
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printHexImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printHexImm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
   void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
                            raw_ostream &O);
-  template<int Amount>
-  void printPostIncOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+  template <int Amount>
+  void printPostIncOperand(const MCInst *MI, unsigned OpNo,
+                           const MCSubtargetInfo &STI, raw_ostream &O) {
     printPostIncOperand(MI, OpNo, Amount, O);
   }
 
-  void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printArithExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVRegOperand(const MCInst *MI, unsigned OpNo,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSysCROperand(const MCInst *MI, unsigned OpNo,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddSubImm(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+  void printLogicalImm32(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printLogicalImm64(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printShifter(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printShiftedRegister(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExtendedRegister(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printArithExtend(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
 
   void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O,
                       char SrcRegKind, unsigned Width);
   template <char SrcRegKind, unsigned Width>
-  void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+  void printMemExtend(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O) {
     printMemExtend(MI, OpNum, O, SrcRegKind, Width);
   }
 
-  void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printInverseCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAlignedLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCondCode(const MCInst *MI, unsigned OpNum,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInverseCondCode(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAlignedLabel(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
   void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale,
                          raw_ostream &O);
   void printAMIndexedWB(const MCInst *MI, unsigned OpNum, unsigned Scale,
                         raw_ostream &O);
 
-  template<int Scale>
-  void printUImm12Offset(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+  template <int Scale>
+  void printUImm12Offset(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O) {
     printUImm12Offset(MI, OpNum, Scale, O);
   }
 
-  template<int BitWidth>
-  void printAMIndexedWB(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+  template <int BitWidth>
+  void printAMIndexedWB(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O) {
     printAMIndexedWB(MI, OpNum, BitWidth / 8, O);
   }
 
-  void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAMNoIndex(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
 
-  template<int Scale>
-  void printImmScale(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  template <int Scale>
+  void printImmScale(const MCInst *MI, unsigned OpNum,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printPrefetchOp(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O,
+  void printVectorList(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O,
                        StringRef LayoutSuffix);
 
   /// Print a list of vector registers where the type suffix is implicit
   /// (i.e. attached to the instruction rather than the registers).
   void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O);
 
   template <unsigned NumLanes, char LaneKind>
-  void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-  void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMSRSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printSystemPStateField(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printTypedVectorList(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printVectorIndex(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAdrpLabel(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+  void printBarrierOption(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMSRSystemRegister(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMRSSystemRegister(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSystemPStateField(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSIMDType10Operand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
 };
 
 class AArch64AppleInstPrinter : public AArch64InstPrinter {
 public:
   AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                        const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+                          const MCRegisterInfo &MRI);
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
-  void printInstruction(const MCInst *MI, raw_ostream &O) override;
-  bool printAliasInstr(const MCInst *MI, raw_ostream &O) override;
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O) override;
+  bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                       raw_ostream &O) override;
   void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
                                unsigned PrintMethodIdx,
+                               const MCSubtargetInfo &STI,
                                raw_ostream &O) override;
   StringRef getRegName(unsigned RegNo) const override {
     return getRegisterName(RegNo);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 4db9dea..ed24343 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -237,15 +237,15 @@ static inline bool processLogicalImmediate(uint64_t Imm, unsigned RegSize,
   if (isShiftedMask_64(Imm)) {
     I = countTrailingZeros(Imm);
     assert(I < 64 && "undefined behavior");
-    CTO = CountTrailingOnes_64(Imm >> I);
+    CTO = countTrailingOnes(Imm >> I);
   } else {
     Imm |= ~Mask;
     if (!isShiftedMask_64(~Imm))
       return false;
 
-    unsigned CLO = CountLeadingOnes_64(Imm);
+    unsigned CLO = countLeadingOnes(Imm);
     I = 64 - CLO;
-    CTO = CLO + CountTrailingOnes_64(Imm) - (64 - Size);
+    CTO = CLO + countTrailingOnes(Imm) - (64 - Size);
   }
 
   // Encode in Immr the number of RORs it would take to get *from* 0^m 1^n
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 27cbac9..31fceb6 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -18,6 +18,7 @@
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MachO.h"
 using namespace llvm;
@@ -246,10 +247,7 @@ bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
   // If the count is not 4-byte aligned, we must be writing data into the text
   // section (otherwise we have unaligned instructions, and thus have far
   // bigger problems), so just write zeros instead.
-  if ((Count & 3) != 0) {
-    for (uint64_t i = 0, e = (Count & 3); i != e; ++i)
-      OW->Write8(0);
-  }
+  OW->WriteZeros(Count % 4);
 
   // We are properly aligned, so write NOPs as requested.
   Count /= 4;
@@ -312,47 +310,11 @@ public:
   DarwinAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI)
       : AArch64AsmBackend(T), MRI(MRI) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64,
                                          MachO::CPU_SUBTYPE_ARM64_ALL);
   }
 
-  bool doesSectionRequireSymbols(const MCSection &Section) const override {
-    // Any section for which the linker breaks things into atoms needs to
-    // preserve symbols, including assembler local symbols, to identify
-    // those atoms. These sections are:
-    // Sections of type:
-    //
-    //    S_CSTRING_LITERALS  (e.g. __cstring)
-    //    S_LITERAL_POINTERS  (e.g.  objc selector pointers)
-    //    S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS
-    //
-    // Sections named:
-    //
-    //    __TEXT,__eh_frame
-    //    __TEXT,__ustring
-    //    __DATA,__cfstring
-    //    __DATA,__objc_classrefs
-    //    __DATA,__objc_catlist
-    //
-    // FIXME: It would be better if the compiler used actual linker local
-    // symbols for each of these sections rather than preserving what
-    // are ostensibly assembler local symbols.
-    const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section);
-    return (SMO.getType() == MachO::S_CSTRING_LITERALS ||
-            SMO.getType() == MachO::S_4BYTE_LITERALS ||
-            SMO.getType() == MachO::S_8BYTE_LITERALS ||
-            SMO.getType() == MachO::S_16BYTE_LITERALS ||
-            SMO.getType() == MachO::S_LITERAL_POINTERS ||
-            (SMO.getSegmentName() == "__TEXT" &&
-             (SMO.getSectionName() == "__eh_frame" ||
-              SMO.getSectionName() == "__ustring")) ||
-            (SMO.getSegmentName() == "__DATA" &&
-             (SMO.getSectionName() == "__cfstring" ||
-              SMO.getSectionName() == "__objc_classrefs" ||
-              SMO.getSectionName() == "__objc_catlist")));
-  }
-
   /// \brief Generate the compact unwind encoding from the CFI directives.
   uint32_t generateCompactUnwindEncoding(
                              ArrayRef<MCCFIInstruction> Instrs) const override {
@@ -496,7 +458,7 @@ public:
   ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian)
     : AArch64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian);
   }
 
@@ -529,14 +491,28 @@ void ELFAArch64AsmBackend::processFixupValue(
     IsResolved = false;
 }
 
+// Returns whether this fixup is based on an address in the .eh_frame section,
+// and therefore should be byte swapped.
+// FIXME: Should be replaced with something more principled.
+static bool isByteSwappedFixup(const MCExpr *E) {
+  MCValue Val;
+  if (!E->EvaluateAsRelocatable(Val, nullptr, nullptr))
+    return false;
+
+  if (!Val.getSymA() || Val.getSymA()->getSymbol().isUndefined())
+    return false;
+
+  const MCSectionELF *SecELF =
+      dyn_cast<MCSectionELF>(&Val.getSymA()->getSymbol().getSection());
+  return SecELF->getSectionName() == ".eh_frame";
+}
+
 void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                       unsigned DataSize, uint64_t Value,
                                       bool IsPCRel) const {
   // store fixups in .eh_frame section in big endian order
   if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) {
-    const MCSection *Sec = Fixup.getValue()->FindAssociatedSection();
-    const MCSectionELF *SecELF = dyn_cast_or_null<const MCSectionELF>(Sec);
-    if (SecELF && SecELF->getSectionName() == ".eh_frame")
+    if (isByteSwappedFixup(Fixup.getValue()))
       Value = ByteSwap_32(unsigned(Value));
   }
   AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 5ea49c3..1f516d1 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -26,7 +26,7 @@ class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
 public:
   AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian);
 
-  virtual ~AArch64ELFObjectWriter();
+  ~AArch64ELFObjectWriter() override;
 
 protected:
   unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
@@ -248,9 +248,9 @@ unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
   llvm_unreachable("Unimplemented fixup -> relocation");
 }
 
-MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS,
-                                                 uint8_t OSABI,
-                                                 bool IsLittleEndian) {
+MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
+                                                   uint8_t OSABI,
+                                                   bool IsLittleEndian) {
   MCELFObjectTargetWriter *MOTW =
       new AArch64ELFObjectWriter(OSABI, IsLittleEndian);
   return createELFObjectWriter(MOTW, OS, IsLittleEndian);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 8dc6c30..204a1ab 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -13,6 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AArch64TargetStreamer.h"
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringExtras.h"
@@ -59,7 +60,7 @@ AArch64TargetAsmStreamer::AArch64TargetAsmStreamer(MCStreamer &S,
   : AArch64TargetStreamer(S), OS(OS) {}
 
 void AArch64TargetAsmStreamer::emitInst(uint32_t Inst) {
-  OS << "\t.inst\t0x" << utohexstr(Inst) << "\n";
+  OS << "\t.inst\t0x" << Twine::utohexstr(Inst) << "\n";
 }
 
 class AArch64TargetELFStreamer : public AArch64TargetStreamer {
@@ -89,15 +90,12 @@ class AArch64ELFStreamer : public MCELFStreamer {
 public:
   friend class AArch64TargetELFStreamer;
 
-  AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
-                   MCCodeEmitter *Emitter)
+  AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                     raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
       : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
         LastEMS(EMS_None) {}
 
-  ~AArch64ELFStreamer() {}
-
-  void ChangeSection(const MCSection *Section,
-                     const MCExpr *Subsection) override {
+  void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
     // We have to keep track of the mapping symbol state of any sections we
     // use. Each one should start off as EMS_None, which is provided as the
     // default constructor by DenseMap::lookup.
@@ -117,15 +115,8 @@ public:
   }
 
   void emitInst(uint32_t Inst) {
-    char Buffer[4];
-    const bool LittleEndian = getContext().getAsmInfo()->isLittleEndian();
-
     EmitA64MappingSymbol();
-    for (unsigned II = 0; II != 4; ++II) {
-      const unsigned I = LittleEndian ? (4 - II - 1) : II;
-      Buffer[4 - II - 1] = uint8_t(Inst >> I * CHAR_BIT);
-    }
-    MCELFStreamer::EmitBytes(StringRef(Buffer, 4));
+    MCELFStreamer::EmitIntValue(Inst, 4);
   }
 
   /// This is one of the functions used to emit data into an ELF section, so the
@@ -167,10 +158,10 @@ private:
   }
 
   void EmitMappingSymbol(StringRef Name) {
-    MCSymbol *Start = getContext().CreateTempSymbol();
+    MCSymbol *Start = getContext().createTempSymbol();
     EmitLabel(Start);
 
-    MCSymbol *Symbol = getContext().GetOrCreateSymbol(
+    MCSymbol *Symbol = getContext().getOrCreateSymbol(
         Name + "." + Twine(MappingSymbolCounter++));
 
     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
@@ -189,8 +180,6 @@ private:
 
   DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
   ElfMappingSymbol LastEMS;
-
-  /// @}
 };
 } // end anonymous namespace
 
@@ -203,24 +192,27 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
 }
 
 namespace llvm {
-MCStreamer *
-createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
-                           bool isVerboseAsm, bool useDwarfDirectory,
-                           MCInstPrinter *InstPrint, MCCodeEmitter *CE,
-                           MCAsmBackend *TAB, bool ShowInst) {
-  MCStreamer *S = llvm::createAsmStreamer(
-      Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
-  new AArch64TargetAsmStreamer(*S, OS);
-  return S;
+MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
+                                                 formatted_raw_ostream &OS,
+                                                 MCInstPrinter *InstPrint,
+                                                 bool isVerboseAsm) {
+  return new AArch64TargetAsmStreamer(S, OS);
 }
 
 MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                        raw_ostream &OS, MCCodeEmitter *Emitter,
-                                        bool RelaxAll) {
+                                        raw_pwrite_stream &OS,
+                                        MCCodeEmitter *Emitter, bool RelaxAll) {
   AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
-  new AArch64TargetELFStreamer(*S);
   if (RelaxAll)
     S->getAssembler().setRelaxAll(true);
   return S;
 }
+
+MCTargetStreamer *
+createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+  Triple TT(STI.getTargetTriple());
+  if (TT.getObjectFormat() == Triple::ELF)
+    return new AArch64TargetELFStreamer(S);
+  return nullptr;
+}
 }
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
index 71b05cc..ef48203 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -19,8 +19,8 @@
 namespace llvm {
 
 MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                        raw_ostream &OS, MCCodeEmitter *Emitter,
-                                        bool RelaxAll);
+                                        raw_pwrite_stream &OS,
+                                        MCCodeEmitter *Emitter, bool RelaxAll);
 }
 
 #endif
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index f048474..ab2cad6 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -48,6 +48,10 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
   UseDataRegionDirectives = true;
 
   ExceptionsType = ExceptionHandling::DwarfCFI;
+
+  // AArch64 Darwin doesn't have the baggage of X86/ARM, so it's fine to use
+  // LShr instead of AShr.
+  UseLogicalShr = true;
 }
 
 const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol(
@@ -59,7 +63,7 @@ const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol(
   MCContext &Context = Streamer.getContext();
   const MCExpr *Res =
       MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context);
-  MCSymbol *PCSym = Context.CreateTempSymbol();
+  MCSymbol *PCSym = Context.createTempSymbol();
   Streamer.EmitLabel(PCSym);
   const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
   return MCBinaryExpr::CreateSub(Res, PC, Context);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 5d03c21..9b88de7 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -15,6 +15,7 @@
 #define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H
 
 #include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
 
 namespace llvm {
 class Target;
@@ -27,7 +28,7 @@ struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin {
                               MCStreamer &Streamer) const override;
 };
 
-struct AArch64MCAsmInfoELF : public MCAsmInfo {
+struct AArch64MCAsmInfoELF : public MCAsmInfoELF {
   explicit AArch64MCAsmInfoELF(StringRef TT);
 };
 
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 4756a192..277ea9f 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -38,11 +38,9 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
   AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
   void operator=(const AArch64MCCodeEmitter &);     // DO NOT IMPLEMENT
 public:
-  AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
-                     MCContext &ctx)
-      : Ctx(ctx) {}
+  AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {}
 
-  ~AArch64MCCodeEmitter() {}
+  ~AArch64MCCodeEmitter() override {}
 
   // getBinaryCodeForInstr - TableGen'erated function for getting the
   // binary encoding for an instruction.
@@ -186,7 +184,7 @@ public:
     }
   }
 
-  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
 
@@ -205,9 +203,8 @@ public:
 
 MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
                                                 const MCRegisterInfo &MRI,
-                                                const MCSubtargetInfo &STI,
                                                 MCContext &Ctx) {
-  return new AArch64MCCodeEmitter(MCII, STI, Ctx);
+  return new AArch64MCCodeEmitter(MCII, Ctx);
 }
 
 /// getMachineOpValue - Return binary encoding of operand. If the machine
@@ -235,7 +232,7 @@ AArch64MCCodeEmitter::getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx,
   else {
     assert(MO.isExpr() && "unable to encode load/store imm operand");
     MCFixupKind Kind = MCFixupKind(FixupKind);
-    Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
+    Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
     ++MCNumFixups;
   }
 
@@ -259,7 +256,7 @@ AArch64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
   MCFixupKind Kind = MI.getOpcode() == AArch64::ADR
                          ? MCFixupKind(AArch64::fixup_aarch64_pcrel_adr_imm21)
                          : MCFixupKind(AArch64::fixup_aarch64_pcrel_adrp_imm21);
-  Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+  Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
 
   MCNumFixups += 1;
 
@@ -289,7 +286,7 @@ AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
 
   // Encode the 12 bits of the fixup.
   MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_add_imm12);
-  Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+  Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
 
   ++MCNumFixups;
 
@@ -309,7 +306,7 @@ uint32_t AArch64MCCodeEmitter::getCondBranchTargetOpValue(
   assert(MO.isExpr() && "Unexpected target type!");
 
   MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch19);
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
 
   ++MCNumFixups;
 
@@ -331,7 +328,7 @@ AArch64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx,
   assert(MO.isExpr() && "Unexpected target type!");
 
   MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_ldr_pcrel_imm19);
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
 
   ++MCNumFixups;
 
@@ -358,7 +355,7 @@ AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
     return MO.getImm();
   assert(MO.isExpr() && "Unexpected movz/movk immediate");
 
-  Fixups.push_back(MCFixup::Create(
+  Fixups.push_back(MCFixup::create(
       0, MO.getExpr(), MCFixupKind(AArch64::fixup_aarch64_movw), MI.getLoc()));
 
   ++MCNumFixups;
@@ -379,7 +376,7 @@ uint32_t AArch64MCCodeEmitter::getTestBranchTargetOpValue(
   assert(MO.isExpr() && "Unexpected ADR target type!");
 
   MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch14);
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
 
   ++MCNumFixups;
 
@@ -403,7 +400,7 @@ AArch64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
   MCFixupKind Kind = MI.getOpcode() == AArch64::BL
                          ? MCFixupKind(AArch64::fixup_aarch64_pcrel_call26)
                          : MCFixupKind(AArch64::fixup_aarch64_pcrel_branch26);
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
 
   ++MCNumFixups;
 
@@ -601,7 +598,7 @@ unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
   return EncodedValue & ~(1u << 30);
 }
 
-void AArch64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
                                              SmallVectorImpl<MCFixup> &Fixups,
                                              const MCSubtargetInfo &STI) const {
   if (MI.getOpcode() == AArch64::TLSDESCCALL) {
@@ -609,7 +606,7 @@ void AArch64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     // following (BLR) instruction. It doesn't emit any code itself so it
     // doesn't go through the normal TableGenerated channels.
     MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call);
-    Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(), Fixup));
+    Fixups.push_back(MCFixup::create(0, MI.getOperand(0).getExpr(), Fixup));
     return;
   }
 
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index e396df8..74b81af 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -16,6 +16,7 @@
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Object/ELF.h"
@@ -85,7 +86,7 @@ void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
   Streamer.visitUsedExpr(*getSubExpr());
 }
 
-const MCSection *AArch64MCExpr::FindAssociatedSection() const {
+MCSection *AArch64MCExpr::FindAssociatedSection() const {
   llvm_unreachable("FIXME: what goes here?");
 }
 
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index db48ac9..95d2277 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -120,7 +120,7 @@ public:
   /// @{
 
   /// Get the kind of this expression.
-  VariantKind getKind() const { return static_cast<VariantKind>(Kind); }
+  VariantKind getKind() const { return Kind; }
 
   /// Get the expression this modifier applies to.
   const MCExpr *getSubExpr() const { return Expr; }
@@ -149,7 +149,7 @@ public:
 
   void visitUsedExpr(MCStreamer &Streamer) const override;
 
-  const MCSection *FindAssociatedSection() const override;
+  MCSection *FindAssociatedSection() const override;
 
   bool EvaluateAsRelocatableImpl(MCValue &Res,
                                  const MCAsmLayout *Layout,
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 0f7a6b8..2e22de0 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -105,112 +105,78 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
     RM = Reloc::Static;
 
   MCCodeGenInfo *X = new MCCodeGenInfo();
-  X->InitMCCodeGenInfo(RM, CM, OL);
+  X->initMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
-static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
+static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T,
                                                  unsigned SyntaxVariant,
                                                  const MCAsmInfo &MAI,
                                                  const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI) {
+                                                 const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
-    return new AArch64InstPrinter(MAI, MII, MRI, STI);
+    return new AArch64InstPrinter(MAI, MII, MRI);
   if (SyntaxVariant == 1)
-    return new AArch64AppleInstPrinter(MAI, MII, MRI, STI);
+    return new AArch64AppleInstPrinter(MAI, MII, MRI);
 
   return nullptr;
 }
 
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
-                                    MCContext &Ctx, MCAsmBackend &TAB,
-                                    raw_ostream &OS, MCCodeEmitter *Emitter,
-                                    const MCSubtargetInfo &STI, bool RelaxAll) {
-  Triple TheTriple(TT);
-
-  if (TheTriple.isOSDarwin())
-    return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
-                               /*LabelSections*/ true);
-
+static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
+                                     MCAsmBackend &TAB, raw_pwrite_stream &OS,
+                                     MCCodeEmitter *Emitter, bool RelaxAll) {
   return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
 }
 
+static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
+                                       raw_pwrite_stream &OS,
+                                       MCCodeEmitter *Emitter, bool RelaxAll,
+                                       bool DWARFMustBeAtTheEnd) {
+  return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
+                             DWARFMustBeAtTheEnd,
+                             /*LabelSections*/ true);
+}
+
 // Force static initialization.
 extern "C" void LLVMInitializeAArch64TargetMC() {
-  // Register the MC asm info.
-  RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo);
-  RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo);
-  RegisterMCAsmInfoFn Z(TheARM64Target, createAArch64MCAsmInfo);
-
-  // Register the MC codegen info.
-  TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget,
-                                        createAArch64MCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget,
-                                        createAArch64MCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target,
-                                        createAArch64MCCodeGenInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget,
-                                      createAArch64MCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget,
-                                      createAArch64MCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheARM64Target,
-                                      createAArch64MCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget,
-                                    createAArch64MCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget,
-                                    createAArch64MCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheARM64Target,
-                                    createAArch64MCRegisterInfo);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget,
-                                          createAArch64MCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget,
-                                          createAArch64MCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target,
-                                          createAArch64MCSubtargetInfo);
+  for (Target *T :
+       {&TheAArch64leTarget, &TheAArch64beTarget, &TheARM64Target}) {
+    // Register the MC asm info.
+    RegisterMCAsmInfoFn X(*T, createAArch64MCAsmInfo);
+
+    // Register the MC codegen info.
+    TargetRegistry::RegisterMCCodeGenInfo(*T, createAArch64MCCodeGenInfo);
+
+    // Register the MC instruction info.
+    TargetRegistry::RegisterMCInstrInfo(*T, createAArch64MCInstrInfo);
+
+    // Register the MC register info.
+    TargetRegistry::RegisterMCRegInfo(*T, createAArch64MCRegisterInfo);
+
+    // Register the MC subtarget info.
+    TargetRegistry::RegisterMCSubtargetInfo(*T, createAArch64MCSubtargetInfo);
+
+    // Register the MC Code Emitter
+    TargetRegistry::RegisterMCCodeEmitter(*T, createAArch64MCCodeEmitter);
+
+    // Register the obj streamers.
+    TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
+    TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer);
+
+    // Register the obj target streamer.
+    TargetRegistry::RegisterObjectTargetStreamer(
+        *T, createAArch64ObjectTargetStreamer);
+
+    // Register the asm streamer.
+    TargetRegistry::RegisterAsmTargetStreamer(*T,
+                                              createAArch64AsmTargetStreamer);
+    // Register the MCInstPrinter.
+    TargetRegistry::RegisterMCInstPrinter(*T, createAArch64MCInstPrinter);
+  }
 
   // Register the asm backend.
-  TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget,
-                                       createAArch64leAsmBackend);
+  for (Target *T : {&TheAArch64leTarget, &TheARM64Target})
+    TargetRegistry::RegisterMCAsmBackend(*T, createAArch64leAsmBackend);
   TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget,
                                        createAArch64beAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheARM64Target,
-                                       createAArch64leAsmBackend);
-
-  // Register the MC Code Emitter
-  TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget,
-                                        createAArch64MCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget,
-                                        createAArch64MCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(TheARM64Target,
-                                        createAArch64MCCodeEmitter);
-
-  // Register the object streamer.
-  TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget,
-                                           createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget,
-                                           createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer);
-
-  // Register the asm streamer.
-  TargetRegistry::RegisterAsmStreamer(TheAArch64leTarget,
-                                      createAArch64MCAsmStreamer);
-  TargetRegistry::RegisterAsmStreamer(TheAArch64beTarget,
-                                      createAArch64MCAsmStreamer);
-  TargetRegistry::RegisterAsmStreamer(TheARM64Target,
-                                      createAArch64MCAsmStreamer);
-
-  // Register the MCInstPrinter.
-  TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget,
-                                        createAArch64MCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget,
-                                        createAArch64MCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheARM64Target,
-                                        createAArch64MCInstPrinter);
 }
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 1553115..4705bdf 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -28,18 +28,20 @@ class MCRegisterInfo;
 class MCObjectWriter;
 class MCStreamer;
 class MCSubtargetInfo;
+class MCTargetStreamer;
 class StringRef;
 class Target;
+class Triple;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheAArch64leTarget;
 extern Target TheAArch64beTarget;
 extern Target TheARM64Target;
 
 MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
-                                        const MCRegisterInfo &MRI,
-                                        const MCSubtargetInfo &STI,
-                                        MCContext &Ctx);
+                                          const MCRegisterInfo &MRI,
+                                          MCContext &Ctx);
 MCAsmBackend *createAArch64leAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI, StringRef TT,
                                         StringRef CPU);
@@ -47,17 +49,22 @@ MCAsmBackend *createAArch64beAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI, StringRef TT,
                                         StringRef CPU);
 
-MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+MCObjectWriter *createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
+                                             uint8_t OSABI,
                                              bool IsLittleEndian);
 
-MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
-                                            uint32_t CPUSubtype);
+MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS,
+                                              uint32_t CPUType,
+                                              uint32_t CPUSubtype);
+
+MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
+                                                 formatted_raw_ostream &OS,
+                                                 MCInstPrinter *InstPrint,
+                                                 bool isVerboseAsm);
+
+MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S,
+                                                    const MCSubtargetInfo &STI);
 
-MCStreamer *
-createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
-                           bool isVerboseAsm, bool useDwarfDirectory,
-                           MCInstPrinter *InstPrint, MCCodeEmitter *CE,
-                           MCAsmBackend *TAB, bool ShowInst);
 } // End llvm namespace
 
 // Defines symbolic names for AArch64 registers.  This defines a mapping from
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index e12a24b..d425975 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -10,6 +10,7 @@
 #include "MCTargetDesc/AArch64FixupKinds.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@@ -33,7 +34,7 @@ public:
       : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype,
                                  /*UseAggressiveSymbolFolding=*/true) {}
 
-  void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+  void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
                         const MCAsmLayout &Layout, const MCFragment *Fragment,
                         const MCFixup &Fixup, MCValue Target,
                         uint64_t &FixedValue) override;
@@ -91,7 +92,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
     // This encompasses the relocation for the whole 21-bit value.
     switch (Sym->getKind()) {
     default:
-      Asm.getContext().FatalError(Fixup.getLoc(),
+      Asm.getContext().reportFatalError(Fixup.getLoc(),
                                   "ADR/ADRP relocations must be GOT relative");
     case MCSymbolRefExpr::VK_PAGE:
       RelocType = unsigned(MachO::ARM64_RELOC_PAGE21);
@@ -112,8 +113,35 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
   }
 }
 
+static bool canUseLocalRelocation(const MCSectionMachO &Section,
+                                  const MCSymbol &Symbol, unsigned Log2Size) {
+  // Debug info sections can use local relocations.
+  if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
+    return true;
+
+  // Otherwise, only pointer sized relocations are supported.
+  if (Log2Size != 3)
+    return false;
+
+  // But only if they don't point to a few forbidden sections.
+  if (!Symbol.isInSection())
+    return true;
+  const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection());
+  if (RefSec.getType() == MachO::S_CSTRING_LITERALS)
+    return false;
+
+  if (RefSec.getSegmentName() == "__DATA" &&
+      RefSec.getSectionName() == "__objc_classrefs")
+    return false;
+
+  // FIXME: ld64 currently handles internal pointer-sized relocations
+  // incorrectly (applying the addend twice). We should be able to return true
+  // unconditionally by this point when that's fixed.
+  return false;
+}
+
 void AArch64MachObjectWriter::RecordRelocation(
-    MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+    MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout,
     const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
     uint64_t &FixedValue) {
   unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
@@ -123,9 +151,9 @@ void AArch64MachObjectWriter::RecordRelocation(
   unsigned Log2Size = 0;
   int64_t Value = 0;
   unsigned Index = 0;
-  unsigned IsExtern = 0;
   unsigned Type = 0;
   unsigned Kind = Fixup.getKind();
+  const MCSymbol *RelSymbol = nullptr;
 
   FixupOffset += Fixup.getOffset();
 
@@ -143,7 +171,7 @@ void AArch64MachObjectWriter::RecordRelocation(
   // assembler local symbols. If we got here, that's not what we have,
   // so complain loudly.
   if (Kind == AArch64::fixup_aarch64_pcrel_branch19) {
-    Asm.getContext().FatalError(Fixup.getLoc(),
+    Asm.getContext().reportFatalError(Fixup.getLoc(),
                                 "conditional branch requires assembler-local"
                                 " label. '" +
                                     Target.getSymA()->getSymbol().getName() +
@@ -154,14 +182,14 @@ void AArch64MachObjectWriter::RecordRelocation(
   // 14-bit branch relocations should only target internal labels, and so
   // should never get here.
   if (Kind == AArch64::fixup_aarch64_pcrel_branch14) {
-    Asm.getContext().FatalError(Fixup.getLoc(),
+    Asm.getContext().reportFatalError(Fixup.getLoc(),
                                 "Invalid relocation on conditional branch!");
     return;
   }
 
   if (!getAArch64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size,
                                   Asm)) {
-    Asm.getContext().FatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!");
+    Asm.getContext().reportFatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!");
     return;
   }
 
@@ -171,11 +199,9 @@ void AArch64MachObjectWriter::RecordRelocation(
     // FIXME: Should this always be extern?
     // SymbolNum of 0 indicates the absolute section.
     Type = MachO::ARM64_RELOC_UNSIGNED;
-    Index = 0;
 
     if (IsPCRel) {
-      IsExtern = 1;
-      Asm.getContext().FatalError(Fixup.getLoc(),
+      Asm.getContext().reportFatalError(Fixup.getLoc(),
                                   "PC relative absolute relocation!");
 
       // FIXME: x86_64 sets the type to a branch reloc here. Should we do
@@ -184,39 +210,36 @@ void AArch64MachObjectWriter::RecordRelocation(
   } else if (Target.getSymB()) { // A - B + constant
     const MCSymbol *A = &Target.getSymA()->getSymbol();
     const MCSymbolData &A_SD = Asm.getSymbolData(*A);
-    const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+    const MCSymbol *A_Base = Asm.getAtom(*A);
 
     const MCSymbol *B = &Target.getSymB()->getSymbol();
     const MCSymbolData &B_SD = Asm.getSymbolData(*B);
-    const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
+    const MCSymbol *B_Base = Asm.getAtom(*B);
 
     // Check for "_foo@got - .", which comes through here as:
     // Ltmp0:
     //    ... _foo@got - Ltmp0
     if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOT &&
         Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None &&
-        Layout.getSymbolOffset(&B_SD) ==
+        Layout.getSymbolOffset(*B) ==
             Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) {
       // SymB is the PC, so use a PC-rel pointer-to-GOT relocation.
-      Index = A_Base->getIndex();
-      IsExtern = 1;
       Type = MachO::ARM64_RELOC_POINTER_TO_GOT;
       IsPCRel = 1;
       MachO::any_relocation_info MRE;
       MRE.r_word0 = FixupOffset;
-      MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
-                     (IsExtern << 27) | (Type << 28));
-      Writer->addRelocation(Fragment->getParent(), MRE);
+      MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+      Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
       return;
     } else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
                Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
       // Otherwise, neither symbol can be modified.
-      Asm.getContext().FatalError(Fixup.getLoc(),
+      Asm.getContext().reportFatalError(Fixup.getLoc(),
                                   "unsupported relocation of modified symbol");
 
     // We don't support PCrel relocations of differences.
     if (IsPCRel)
-      Asm.getContext().FatalError(Fixup.getLoc(),
+      Asm.getContext().reportFatalError(Fixup.getLoc(),
                                   "unsupported pc-relative relocation of "
                                   "difference");
 
@@ -227,50 +250,52 @@ void AArch64MachObjectWriter::RecordRelocation(
     // FIXME: We should probably just synthesize an external symbol and use
     // that.
     if (!A_Base)
-      Asm.getContext().FatalError(
+      Asm.getContext().reportFatalError(
           Fixup.getLoc(),
           "unsupported relocation of local symbol '" + A->getName() +
               "'. Must have non-local symbol earlier in section.");
     if (!B_Base)
-      Asm.getContext().FatalError(
+      Asm.getContext().reportFatalError(
           Fixup.getLoc(),
           "unsupported relocation of local symbol '" + B->getName() +
               "'. Must have non-local symbol earlier in section.");
 
     if (A_Base == B_Base && A_Base)
-      Asm.getContext().FatalError(Fixup.getLoc(),
+      Asm.getContext().reportFatalError(Fixup.getLoc(),
                                   "unsupported relocation with identical base");
 
-    Value += (!A_SD.getFragment() ? 0
-                                  : Writer->getSymbolAddress(&A_SD, Layout)) -
-             (!A_Base || !A_Base->getFragment()
+    Value += (!A_SD.getFragment() ? 0 : Writer->getSymbolAddress(*A, Layout)) -
+             (!A_Base || !A_Base->getData().getFragment()
                   ? 0
-                  : Writer->getSymbolAddress(A_Base, Layout));
-    Value -= (!B_SD.getFragment() ? 0
-                                  : Writer->getSymbolAddress(&B_SD, Layout)) -
-             (!B_Base || !B_Base->getFragment()
+                  : Writer->getSymbolAddress(*A_Base, Layout));
+    Value -= (!B_SD.getFragment() ? 0 : Writer->getSymbolAddress(*B, Layout)) -
+             (!B_Base || !B_Base->getData().getFragment()
                   ? 0
-                  : Writer->getSymbolAddress(B_Base, Layout));
+                  : Writer->getSymbolAddress(*B_Base, Layout));
 
-    Index = A_Base->getIndex();
-    IsExtern = 1;
     Type = MachO::ARM64_RELOC_UNSIGNED;
 
     MachO::any_relocation_info MRE;
     MRE.r_word0 = FixupOffset;
-    MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
-                   (IsExtern << 27) | (Type << 28));
-    Writer->addRelocation(Fragment->getParent(), MRE);
+    MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+    Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
 
-    Index = B_Base->getIndex();
-    IsExtern = 1;
+    RelSymbol = B_Base;
     Type = MachO::ARM64_RELOC_SUBTRACTOR;
   } else { // A + constant
     const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
-    const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
-    const MCSymbolData *Base = Asm.getAtom(&SD);
-    const MCSectionMachO &Section = static_cast<const MCSectionMachO &>(
-        Fragment->getParent()->getSection());
+    const MCSectionMachO &Section =
+        static_cast<const MCSectionMachO &>(*Fragment->getParent());
+
+    bool CanUseLocalRelocation =
+        canUseLocalRelocation(Section, *Symbol, Log2Size);
+    if (Symbol->isTemporary() && (Value || !CanUseLocalRelocation)) {
+      const MCSection &Sec = Symbol->getSection();
+      if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
+        Asm.addLocalUsedInReloc(*Symbol);
+    }
+
+    const MCSymbol *Base = Asm.getAtom(*Symbol);
 
     // If the symbol is a variable and we weren't able to get a Base for it
     // (i.e., it's not in the symbol table associated with a section) resolve
@@ -279,7 +304,7 @@ void AArch64MachObjectWriter::RecordRelocation(
       // If the evaluation is an absolute value, just use that directly
       // to keep things easy.
       int64_t Res;
-      if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute(
+      if (Symbol->getVariableValue()->EvaluateAsAbsolute(
               Res, Layout, Writer->getSectionAddressMap())) {
         FixedValue = Res;
         return;
@@ -290,7 +315,7 @@ void AArch64MachObjectWriter::RecordRelocation(
       // the FixedValue?
       if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout,
                                                              &Fixup))
-        Asm.getContext().FatalError(Fixup.getLoc(),
+        Asm.getContext().reportFatalError(Fixup.getLoc(),
                                     "unable to resolve variable '" +
                                         Symbol->getName() + "'");
       return RecordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
@@ -310,42 +335,38 @@ void AArch64MachObjectWriter::RecordRelocation(
     // sections, and for pointer-sized relocations (.quad), we allow section
     // relocations.  It's code sections that run into trouble.
     if (Base) {
-      Index = Base->getIndex();
-      IsExtern = 1;
+      RelSymbol = Base;
 
       // Add the local offset, if needed.
-      if (Base != &SD)
-        Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
+      if (Base != Symbol)
+        Value +=
+            Layout.getSymbolOffset(*Symbol) - Layout.getSymbolOffset(*Base);
     } else if (Symbol->isInSection()) {
-      // Pointer-sized relocations can use a local relocation. Otherwise,
-      // we have to be in a debug info section.
-      if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3)
-        Asm.getContext().FatalError(
+      if (!CanUseLocalRelocation)
+        Asm.getContext().reportFatalError(
             Fixup.getLoc(),
             "unsupported relocation of local symbol '" + Symbol->getName() +
                 "'. Must have non-local symbol earlier in section.");
       // Adjust the relocation to be section-relative.
       // The index is the section ordinal (1-based).
-      const MCSectionData &SymSD =
-          Asm.getSectionData(SD.getSymbol().getSection());
-      Index = SymSD.getOrdinal() + 1;
-      IsExtern = 0;
-      Value += Writer->getSymbolAddress(&SD, Layout);
+      const MCSection &Sec = Symbol->getSection();
+      Index = Sec.getOrdinal() + 1;
+      Value += Writer->getSymbolAddress(*Symbol, Layout);
 
       if (IsPCRel)
         Value -= Writer->getFragmentAddress(Fragment, Layout) +
                  Fixup.getOffset() + (1ULL << Log2Size);
     } else {
       // Resolve constant variables.
-      if (SD.getSymbol().isVariable()) {
+      if (Symbol->isVariable()) {
         int64_t Res;
-        if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute(
+        if (Symbol->getVariableValue()->EvaluateAsAbsolute(
                 Res, Layout, Writer->getSectionAddressMap())) {
           FixedValue = Res;
           return;
         }
       }
-      Asm.getContext().FatalError(Fixup.getLoc(),
+      Asm.getContext().reportFatalError(Fixup.getLoc(),
                                   "unsupported relocation of variable '" +
                                       Symbol->getName() + "'");
     }
@@ -362,16 +383,16 @@ void AArch64MachObjectWriter::RecordRelocation(
 
     MachO::any_relocation_info MRE;
     MRE.r_word0 = FixupOffset;
-    MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
-                   (IsExtern << 27) | (Type << 28));
-    Writer->addRelocation(Fragment->getParent(), MRE);
+    MRE.r_word1 =
+        (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+    Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 
     // Now set up the Addend relocation.
     Type = MachO::ARM64_RELOC_ADDEND;
     Index = Value;
+    RelSymbol = nullptr;
     IsPCRel = 0;
     Log2Size = 2;
-    IsExtern = 0;
 
     // Put zero into the instruction itself. The addend is in the relocation.
     Value = 0;
@@ -383,14 +404,14 @@ void AArch64MachObjectWriter::RecordRelocation(
   // struct relocation_info (8 bytes)
   MachO::any_relocation_info MRE;
   MRE.r_word0 = FixupOffset;
-  MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
-                 (IsExtern << 27) | (Type << 28));
-  Writer->addRelocation(Fragment->getParent(), MRE);
+  MRE.r_word1 =
+      (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 }
 
-MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS,
-                                                  uint32_t CPUType,
-                                                  uint32_t CPUSubtype) {
+MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_pwrite_stream &OS,
+                                                    uint32_t CPUType,
+                                                    uint32_t CPUSubtype) {
   return createMachObjectWriter(
       new AArch64MachObjectWriter(CPUType, CPUSubtype), OS,
       /*IsLittleEndian=*/true);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index e3112fa..52b000d 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -1,4 +1,4 @@
-//===- AArch64TargetStreamer.cpp - AArch64TargetStreamer class --*- C++ -*---------===//
+//===- AArch64TargetStreamer.cpp - AArch64TargetStreamer class ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,12 +10,9 @@
 // This file implements the AArch64TargetStreamer class.
 //
 //===----------------------------------------------------------------------===//
-#include "llvm/ADT/MapVector.h"
-#include "llvm/MC/ConstantPools.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCStreamer.h"
 
+#include "AArch64TargetStreamer.h"
+#include "llvm/MC/ConstantPools.h"
 using namespace llvm;
 
 //
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
new file mode 100644
index 0000000..fcc0d05
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -0,0 +1,42 @@
+//===-- AArch64TargetStreamer.h - AArch64 Target Streamer ------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64TARGETSTREAMER_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64TARGETSTREAMER_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+
+class AArch64TargetStreamer : public MCTargetStreamer {
+public:
+  AArch64TargetStreamer(MCStreamer &S);
+  ~AArch64TargetStreamer() override;
+
+  void finish() override;
+
+  /// Callback used to implement the ldr= pseudo.
+  /// Add a new entry to the constant pool for the current section and return an
+  /// MCExpr that can be used to refer to the constant pool location.
+  const MCExpr *addConstantPoolEntry(const MCExpr *, unsigned Size);
+
+  /// Callback used to implemnt the .ltorg directive.
+  /// Emit contents of constant pool for the current section.
+  void emitCurrentConstantPool();
+
+  /// Callback used to implement the .inst directive.
+  virtual void emitInst(uint32_t Inst);
+
+private:
+  std::unique_ptr<AssemblerConstantPools> ConstantPools;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index bc6c7a9..28b8e7e 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -18,11 +18,12 @@
 
 using namespace llvm;
 
-StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
-  for (unsigned i = 0; i < NumPairs; ++i) {
-    if (Pairs[i].Value == Value) {
+StringRef AArch64NamedImmMapper::toString(uint32_t Value,
+          const FeatureBitset& FeatureBits, bool &Valid) const {
+  for (unsigned i = 0; i < NumMappings; ++i) {
+    if (Mappings[i].isValueEqual(Value, FeatureBits)) {
       Valid = true;
-      return Pairs[i].Name;
+      return Mappings[i].Name;
     }
   }
 
@@ -30,12 +31,13 @@ StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
   return StringRef();
 }
 
-uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
+uint32_t AArch64NamedImmMapper::fromString(StringRef Name,
+         const FeatureBitset& FeatureBits, bool &Valid) const {
   std::string LowerCaseName = Name.lower();
-  for (unsigned i = 0; i < NumPairs; ++i) {
-    if (Pairs[i].Name == LowerCaseName) {
+  for (unsigned i = 0; i < NumMappings; ++i) {
+    if (Mappings[i].isNameEqual(LowerCaseName, FeatureBits)) {
       Valid = true;
-      return Pairs[i].Value;
+      return Mappings[i].Value;
     }
   }
 
@@ -47,747 +49,781 @@ bool AArch64NamedImmMapper::validImm(uint32_t Value) const {
   return Value < TooBigImm;
 }
 
-const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = {
-  {"s1e1r", S1E1R},
-  {"s1e2r", S1E2R},
-  {"s1e3r", S1E3R},
-  {"s1e1w", S1E1W},
-  {"s1e2w", S1E2W},
-  {"s1e3w", S1E3W},
-  {"s1e0r", S1E0R},
-  {"s1e0w", S1E0W},
-  {"s12e1r", S12E1R},
-  {"s12e1w", S12E1W},
-  {"s12e0r", S12E0R},
-  {"s12e0w", S12E0W},
+const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = {
+  {"s1e1r", S1E1R, {}},
+  {"s1e2r", S1E2R, {}},
+  {"s1e3r", S1E3R, {}},
+  {"s1e1w", S1E1W, {}},
+  {"s1e2w", S1E2W, {}},
+  {"s1e3w", S1E3W, {}},
+  {"s1e0r", S1E0R, {}},
+  {"s1e0w", S1E0W, {}},
+  {"s12e1r", S12E1R, {}},
+  {"s12e1w", S12E1W, {}},
+  {"s12e0r", S12E0R, {}},
+  {"s12e0w", S12E0W, {}},
 };
 
 AArch64AT::ATMapper::ATMapper()
-  : AArch64NamedImmMapper(ATPairs, 0) {}
-
-const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] = {
-  {"oshld", OSHLD},
-  {"oshst", OSHST},
-  {"osh", OSH},
-  {"nshld", NSHLD},
-  {"nshst", NSHST},
-  {"nsh", NSH},
-  {"ishld", ISHLD},
-  {"ishst", ISHST},
-  {"ish", ISH},
-  {"ld", LD},
-  {"st", ST},
-  {"sy", SY}
+  : AArch64NamedImmMapper(ATMappings, 0) {}
+
+const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = {
+  {"oshld", OSHLD, {}},
+  {"oshst", OSHST, {}},
+  {"osh", OSH, {}},
+  {"nshld", NSHLD, {}},
+  {"nshst", NSHST, {}},
+  {"nsh", NSH, {}},
+  {"ishld", ISHLD, {}},
+  {"ishst", ISHST, {}},
+  {"ish", ISH, {}},
+  {"ld", LD, {}},
+  {"st", ST, {}},
+  {"sy", SY, {}}
 };
 
 AArch64DB::DBarrierMapper::DBarrierMapper()
-  : AArch64NamedImmMapper(DBarrierPairs, 16u) {}
-
-const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = {
-  {"zva", ZVA},
-  {"ivac", IVAC},
-  {"isw", ISW},
-  {"cvac", CVAC},
-  {"csw", CSW},
-  {"cvau", CVAU},
-  {"civac", CIVAC},
-  {"cisw", CISW}
+  : AArch64NamedImmMapper(DBarrierMappings, 16u) {}
+
+const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = {
+  {"zva", ZVA, {}},
+  {"ivac", IVAC, {}},
+  {"isw", ISW, {}},
+  {"cvac", CVAC, {}},
+  {"csw", CSW, {}},
+  {"cvau", CVAU, {}},
+  {"civac", CIVAC, {}},
+  {"cisw", CISW, {}}
 };
 
 AArch64DC::DCMapper::DCMapper()
-  : AArch64NamedImmMapper(DCPairs, 0) {}
+  : AArch64NamedImmMapper(DCMappings, 0) {}
 
-const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICPairs[] = {
-  {"ialluis",  IALLUIS},
-  {"iallu", IALLU},
-  {"ivau", IVAU}
+const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = {
+  {"ialluis",  IALLUIS, {}},
+  {"iallu", IALLU, {}},
+  {"ivau", IVAU, {}}
 };
 
 AArch64IC::ICMapper::ICMapper()
-  : AArch64NamedImmMapper(ICPairs, 0) {}
+  : AArch64NamedImmMapper(ICMappings, 0) {}
 
-const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBPairs[] = {
-  {"sy",  SY},
+const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = {
+  {"sy",  SY, {}},
 };
 
 AArch64ISB::ISBMapper::ISBMapper()
-  : AArch64NamedImmMapper(ISBPairs, 16) {}
-
-const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = {
-  {"pldl1keep", PLDL1KEEP},
-  {"pldl1strm", PLDL1STRM},
-  {"pldl2keep", PLDL2KEEP},
-  {"pldl2strm", PLDL2STRM},
-  {"pldl3keep", PLDL3KEEP},
-  {"pldl3strm", PLDL3STRM},
-  {"plil1keep", PLIL1KEEP},
-  {"plil1strm", PLIL1STRM},
-  {"plil2keep", PLIL2KEEP},
-  {"plil2strm", PLIL2STRM},
-  {"plil3keep", PLIL3KEEP},
-  {"plil3strm", PLIL3STRM},
-  {"pstl1keep", PSTL1KEEP},
-  {"pstl1strm", PSTL1STRM},
-  {"pstl2keep", PSTL2KEEP},
-  {"pstl2strm", PSTL2STRM},
-  {"pstl3keep", PSTL3KEEP},
-  {"pstl3strm", PSTL3STRM}
+  : AArch64NamedImmMapper(ISBMappings, 16) {}
+
+const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = {
+  {"pldl1keep", PLDL1KEEP, {}},
+  {"pldl1strm", PLDL1STRM, {}},
+  {"pldl2keep", PLDL2KEEP, {}},
+  {"pldl2strm", PLDL2STRM, {}},
+  {"pldl3keep", PLDL3KEEP, {}},
+  {"pldl3strm", PLDL3STRM, {}},
+  {"plil1keep", PLIL1KEEP, {}},
+  {"plil1strm", PLIL1STRM, {}},
+  {"plil2keep", PLIL2KEEP, {}},
+  {"plil2strm", PLIL2STRM, {}},
+  {"plil3keep", PLIL3KEEP, {}},
+  {"plil3strm", PLIL3STRM, {}},
+  {"pstl1keep", PSTL1KEEP, {}},
+  {"pstl1strm", PSTL1STRM, {}},
+  {"pstl2keep", PSTL2KEEP, {}},
+  {"pstl2strm", PSTL2STRM, {}},
+  {"pstl3keep", PSTL3KEEP, {}},
+  {"pstl3strm", PSTL3STRM, {}}
 };
 
 AArch64PRFM::PRFMMapper::PRFMMapper()
-  : AArch64NamedImmMapper(PRFMPairs, 32) {}
+  : AArch64NamedImmMapper(PRFMMappings, 32) {}
 
-const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStatePairs[] = {
-  {"spsel", SPSel},
-  {"daifset", DAIFSet},
-  {"daifclr", DAIFClr}
+const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = {
+  {"spsel", SPSel, {}},
+  {"daifset", DAIFSet, {}},
+  {"daifclr", DAIFClr, {}},
+
+  // v8.1a "Privileged Access Never" extension-specific PStates
+  {"pan", PAN, {AArch64::HasV8_1aOps}},
 };
 
 AArch64PState::PStateMapper::PStateMapper()
-  : AArch64NamedImmMapper(PStatePairs, 0) {}
-
-const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = {
-  {"mdccsr_el0", MDCCSR_EL0},
-  {"dbgdtrrx_el0", DBGDTRRX_EL0},
-  {"mdrar_el1", MDRAR_EL1},
-  {"oslsr_el1", OSLSR_EL1},
-  {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1},
-  {"pmceid0_el0", PMCEID0_EL0},
-  {"pmceid1_el0", PMCEID1_EL0},
-  {"midr_el1", MIDR_EL1},
-  {"ccsidr_el1", CCSIDR_EL1},
-  {"clidr_el1", CLIDR_EL1},
-  {"ctr_el0", CTR_EL0},
-  {"mpidr_el1", MPIDR_EL1},
-  {"revidr_el1", REVIDR_EL1},
-  {"aidr_el1", AIDR_EL1},
-  {"dczid_el0", DCZID_EL0},
-  {"id_pfr0_el1", ID_PFR0_EL1},
-  {"id_pfr1_el1", ID_PFR1_EL1},
-  {"id_dfr0_el1", ID_DFR0_EL1},
-  {"id_afr0_el1", ID_AFR0_EL1},
-  {"id_mmfr0_el1", ID_MMFR0_EL1},
-  {"id_mmfr1_el1", ID_MMFR1_EL1},
-  {"id_mmfr2_el1", ID_MMFR2_EL1},
-  {"id_mmfr3_el1", ID_MMFR3_EL1},
-  {"id_isar0_el1", ID_ISAR0_EL1},
-  {"id_isar1_el1", ID_ISAR1_EL1},
-  {"id_isar2_el1", ID_ISAR2_EL1},
-  {"id_isar3_el1", ID_ISAR3_EL1},
-  {"id_isar4_el1", ID_ISAR4_EL1},
-  {"id_isar5_el1", ID_ISAR5_EL1},
-  {"id_aa64pfr0_el1", ID_A64PFR0_EL1},
-  {"id_aa64pfr1_el1", ID_A64PFR1_EL1},
-  {"id_aa64dfr0_el1", ID_A64DFR0_EL1},
-  {"id_aa64dfr1_el1", ID_A64DFR1_EL1},
-  {"id_aa64afr0_el1", ID_A64AFR0_EL1},
-  {"id_aa64afr1_el1", ID_A64AFR1_EL1},
-  {"id_aa64isar0_el1", ID_A64ISAR0_EL1},
-  {"id_aa64isar1_el1", ID_A64ISAR1_EL1},
-  {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1},
-  {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1},
-  {"mvfr0_el1", MVFR0_EL1},
-  {"mvfr1_el1", MVFR1_EL1},
-  {"mvfr2_el1", MVFR2_EL1},
-  {"rvbar_el1", RVBAR_EL1},
-  {"rvbar_el2", RVBAR_EL2},
-  {"rvbar_el3", RVBAR_EL3},
-  {"isr_el1", ISR_EL1},
-  {"cntpct_el0", CNTPCT_EL0},
-  {"cntvct_el0", CNTVCT_EL0},
+  : AArch64NamedImmMapper(PStateMappings, 0) {}
+
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
+  {"mdccsr_el0", MDCCSR_EL0, {}},
+  {"dbgdtrrx_el0", DBGDTRRX_EL0, {}},
+  {"mdrar_el1", MDRAR_EL1, {}},
+  {"oslsr_el1", OSLSR_EL1, {}},
+  {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1, {}},
+  {"pmceid0_el0", PMCEID0_EL0, {}},
+  {"pmceid1_el0", PMCEID1_EL0, {}},
+  {"midr_el1", MIDR_EL1, {}},
+  {"ccsidr_el1", CCSIDR_EL1, {}},
+  {"clidr_el1", CLIDR_EL1, {}},
+  {"ctr_el0", CTR_EL0, {}},
+  {"mpidr_el1", MPIDR_EL1, {}},
+  {"revidr_el1", REVIDR_EL1, {}},
+  {"aidr_el1", AIDR_EL1, {}},
+  {"dczid_el0", DCZID_EL0, {}},
+  {"id_pfr0_el1", ID_PFR0_EL1, {}},
+  {"id_pfr1_el1", ID_PFR1_EL1, {}},
+  {"id_dfr0_el1", ID_DFR0_EL1, {}},
+  {"id_afr0_el1", ID_AFR0_EL1, {}},
+  {"id_mmfr0_el1", ID_MMFR0_EL1, {}},
+  {"id_mmfr1_el1", ID_MMFR1_EL1, {}},
+  {"id_mmfr2_el1", ID_MMFR2_EL1, {}},
+  {"id_mmfr3_el1", ID_MMFR3_EL1, {}},
+  {"id_isar0_el1", ID_ISAR0_EL1, {}},
+  {"id_isar1_el1", ID_ISAR1_EL1, {}},
+  {"id_isar2_el1", ID_ISAR2_EL1, {}},
+  {"id_isar3_el1", ID_ISAR3_EL1, {}},
+  {"id_isar4_el1", ID_ISAR4_EL1, {}},
+  {"id_isar5_el1", ID_ISAR5_EL1, {}},
+  {"id_aa64pfr0_el1", ID_A64PFR0_EL1, {}},
+  {"id_aa64pfr1_el1", ID_A64PFR1_EL1, {}},
+  {"id_aa64dfr0_el1", ID_A64DFR0_EL1, {}},
+  {"id_aa64dfr1_el1", ID_A64DFR1_EL1, {}},
+  {"id_aa64afr0_el1", ID_A64AFR0_EL1, {}},
+  {"id_aa64afr1_el1", ID_A64AFR1_EL1, {}},
+  {"id_aa64isar0_el1", ID_A64ISAR0_EL1, {}},
+  {"id_aa64isar1_el1", ID_A64ISAR1_EL1, {}},
+  {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1, {}},
+  {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1, {}},
+  {"mvfr0_el1", MVFR0_EL1, {}},
+  {"mvfr1_el1", MVFR1_EL1, {}},
+  {"mvfr2_el1", MVFR2_EL1, {}},
+  {"rvbar_el1", RVBAR_EL1, {}},
+  {"rvbar_el2", RVBAR_EL2, {}},
+  {"rvbar_el3", RVBAR_EL3, {}},
+  {"isr_el1", ISR_EL1, {}},
+  {"cntpct_el0", CNTPCT_EL0, {}},
+  {"cntvct_el0", CNTVCT_EL0, {}},
 
   // Trace registers
-  {"trcstatr", TRCSTATR},
-  {"trcidr8", TRCIDR8},
-  {"trcidr9", TRCIDR9},
-  {"trcidr10", TRCIDR10},
-  {"trcidr11", TRCIDR11},
-  {"trcidr12", TRCIDR12},
-  {"trcidr13", TRCIDR13},
-  {"trcidr0", TRCIDR0},
-  {"trcidr1", TRCIDR1},
-  {"trcidr2", TRCIDR2},
-  {"trcidr3", TRCIDR3},
-  {"trcidr4", TRCIDR4},
-  {"trcidr5", TRCIDR5},
-  {"trcidr6", TRCIDR6},
-  {"trcidr7", TRCIDR7},
-  {"trcoslsr", TRCOSLSR},
-  {"trcpdsr", TRCPDSR},
-  {"trcdevaff0", TRCDEVAFF0},
-  {"trcdevaff1", TRCDEVAFF1},
-  {"trclsr", TRCLSR},
-  {"trcauthstatus", TRCAUTHSTATUS},
-  {"trcdevarch", TRCDEVARCH},
-  {"trcdevid", TRCDEVID},
-  {"trcdevtype", TRCDEVTYPE},
-  {"trcpidr4", TRCPIDR4},
-  {"trcpidr5", TRCPIDR5},
-  {"trcpidr6", TRCPIDR6},
-  {"trcpidr7", TRCPIDR7},
-  {"trcpidr0", TRCPIDR0},
-  {"trcpidr1", TRCPIDR1},
-  {"trcpidr2", TRCPIDR2},
-  {"trcpidr3", TRCPIDR3},
-  {"trccidr0", TRCCIDR0},
-  {"trccidr1", TRCCIDR1},
-  {"trccidr2", TRCCIDR2},
-  {"trccidr3", TRCCIDR3},
+  {"trcstatr", TRCSTATR, {}},
+  {"trcidr8", TRCIDR8, {}},
+  {"trcidr9", TRCIDR9, {}},
+  {"trcidr10", TRCIDR10, {}},
+  {"trcidr11", TRCIDR11, {}},
+  {"trcidr12", TRCIDR12, {}},
+  {"trcidr13", TRCIDR13, {}},
+  {"trcidr0", TRCIDR0, {}},
+  {"trcidr1", TRCIDR1, {}},
+  {"trcidr2", TRCIDR2, {}},
+  {"trcidr3", TRCIDR3, {}},
+  {"trcidr4", TRCIDR4, {}},
+  {"trcidr5", TRCIDR5, {}},
+  {"trcidr6", TRCIDR6, {}},
+  {"trcidr7", TRCIDR7, {}},
+  {"trcoslsr", TRCOSLSR, {}},
+  {"trcpdsr", TRCPDSR, {}},
+  {"trcdevaff0", TRCDEVAFF0, {}},
+  {"trcdevaff1", TRCDEVAFF1, {}},
+  {"trclsr", TRCLSR, {}},
+  {"trcauthstatus", TRCAUTHSTATUS, {}},
+  {"trcdevarch", TRCDEVARCH, {}},
+  {"trcdevid", TRCDEVID, {}},
+  {"trcdevtype", TRCDEVTYPE, {}},
+  {"trcpidr4", TRCPIDR4, {}},
+  {"trcpidr5", TRCPIDR5, {}},
+  {"trcpidr6", TRCPIDR6, {}},
+  {"trcpidr7", TRCPIDR7, {}},
+  {"trcpidr0", TRCPIDR0, {}},
+  {"trcpidr1", TRCPIDR1, {}},
+  {"trcpidr2", TRCPIDR2, {}},
+  {"trcpidr3", TRCPIDR3, {}},
+  {"trccidr0", TRCCIDR0, {}},
+  {"trccidr1", TRCCIDR1, {}},
+  {"trccidr2", TRCCIDR2, {}},
+  {"trccidr3", TRCCIDR3, {}},
 
   // GICv3 registers
-  {"icc_iar1_el1", ICC_IAR1_EL1},
-  {"icc_iar0_el1", ICC_IAR0_EL1},
-  {"icc_hppir1_el1", ICC_HPPIR1_EL1},
-  {"icc_hppir0_el1", ICC_HPPIR0_EL1},
-  {"icc_rpr_el1", ICC_RPR_EL1},
-  {"ich_vtr_el2", ICH_VTR_EL2},
-  {"ich_eisr_el2", ICH_EISR_EL2},
-  {"ich_elsr_el2", ICH_ELSR_EL2}
+  {"icc_iar1_el1", ICC_IAR1_EL1, {}},
+  {"icc_iar0_el1", ICC_IAR0_EL1, {}},
+  {"icc_hppir1_el1", ICC_HPPIR1_EL1, {}},
+  {"icc_hppir0_el1", ICC_HPPIR0_EL1, {}},
+  {"icc_rpr_el1", ICC_RPR_EL1, {}},
+  {"ich_vtr_el2", ICH_VTR_EL2, {}},
+  {"ich_eisr_el2", ICH_EISR_EL2, {}},
+  {"ich_elsr_el2", ICH_ELSR_EL2, {}},
+
+  // v8.1a "Limited Ordering Regions" extension-specific system registers
+  {"lorid_el1", LORID_EL1, {AArch64::HasV8_1aOps}},
 };
 
-AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits)
-  : SysRegMapper(FeatureBits) {
-    InstPairs = &MRSPairs[0];
-    NumInstPairs = llvm::array_lengthof(MRSPairs);
+AArch64SysReg::MRSMapper::MRSMapper() {
+    InstMappings = &MRSMappings[0];
+    NumInstMappings = llvm::array_lengthof(MRSMappings);
 }
 
-const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = {
-  {"dbgdtrtx_el0", DBGDTRTX_EL0},
-  {"oslar_el1", OSLAR_EL1},
-  {"pmswinc_el0", PMSWINC_EL0},
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = {
+  {"dbgdtrtx_el0", DBGDTRTX_EL0, {}},
+  {"oslar_el1", OSLAR_EL1, {}},
+  {"pmswinc_el0", PMSWINC_EL0, {}},
 
   // Trace registers
-  {"trcoslar", TRCOSLAR},
-  {"trclar", TRCLAR},
+  {"trcoslar", TRCOSLAR, {}},
+  {"trclar", TRCLAR, {}},
 
   // GICv3 registers
-  {"icc_eoir1_el1", ICC_EOIR1_EL1},
-  {"icc_eoir0_el1", ICC_EOIR0_EL1},
-  {"icc_dir_el1", ICC_DIR_EL1},
-  {"icc_sgi1r_el1", ICC_SGI1R_EL1},
-  {"icc_asgi1r_el1", ICC_ASGI1R_EL1},
-  {"icc_sgi0r_el1", ICC_SGI0R_EL1}
+  {"icc_eoir1_el1", ICC_EOIR1_EL1, {}},
+  {"icc_eoir0_el1", ICC_EOIR0_EL1, {}},
+  {"icc_dir_el1", ICC_DIR_EL1, {}},
+  {"icc_sgi1r_el1", ICC_SGI1R_EL1, {}},
+  {"icc_asgi1r_el1", ICC_ASGI1R_EL1, {}},
+  {"icc_sgi0r_el1", ICC_SGI0R_EL1, {}},
+
+  // v8.1a "Privileged Access Never" extension-specific system registers
+  {"pan", PAN, {AArch64::HasV8_1aOps}},
 };
 
-AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits)
-  : SysRegMapper(FeatureBits) {
-    InstPairs = &MSRPairs[0];
-    NumInstPairs = llvm::array_lengthof(MSRPairs);
+AArch64SysReg::MSRMapper::MSRMapper() {
+    InstMappings = &MSRMappings[0];
+    NumInstMappings = llvm::array_lengthof(MSRMappings);
 }
 
 
-const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] = {
-  {"osdtrrx_el1", OSDTRRX_EL1},
-  {"osdtrtx_el1",  OSDTRTX_EL1},
-  {"teecr32_el1", TEECR32_EL1},
-  {"mdccint_el1", MDCCINT_EL1},
-  {"mdscr_el1", MDSCR_EL1},
-  {"dbgdtr_el0", DBGDTR_EL0},
-  {"oseccr_el1", OSECCR_EL1},
-  {"dbgvcr32_el2", DBGVCR32_EL2},
-  {"dbgbvr0_el1", DBGBVR0_EL1},
-  {"dbgbvr1_el1", DBGBVR1_EL1},
-  {"dbgbvr2_el1", DBGBVR2_EL1},
-  {"dbgbvr3_el1", DBGBVR3_EL1},
-  {"dbgbvr4_el1", DBGBVR4_EL1},
-  {"dbgbvr5_el1", DBGBVR5_EL1},
-  {"dbgbvr6_el1", DBGBVR6_EL1},
-  {"dbgbvr7_el1", DBGBVR7_EL1},
-  {"dbgbvr8_el1", DBGBVR8_EL1},
-  {"dbgbvr9_el1", DBGBVR9_EL1},
-  {"dbgbvr10_el1", DBGBVR10_EL1},
-  {"dbgbvr11_el1", DBGBVR11_EL1},
-  {"dbgbvr12_el1", DBGBVR12_EL1},
-  {"dbgbvr13_el1", DBGBVR13_EL1},
-  {"dbgbvr14_el1", DBGBVR14_EL1},
-  {"dbgbvr15_el1", DBGBVR15_EL1},
-  {"dbgbcr0_el1", DBGBCR0_EL1},
-  {"dbgbcr1_el1", DBGBCR1_EL1},
-  {"dbgbcr2_el1", DBGBCR2_EL1},
-  {"dbgbcr3_el1", DBGBCR3_EL1},
-  {"dbgbcr4_el1", DBGBCR4_EL1},
-  {"dbgbcr5_el1", DBGBCR5_EL1},
-  {"dbgbcr6_el1", DBGBCR6_EL1},
-  {"dbgbcr7_el1", DBGBCR7_EL1},
-  {"dbgbcr8_el1", DBGBCR8_EL1},
-  {"dbgbcr9_el1", DBGBCR9_EL1},
-  {"dbgbcr10_el1", DBGBCR10_EL1},
-  {"dbgbcr11_el1", DBGBCR11_EL1},
-  {"dbgbcr12_el1", DBGBCR12_EL1},
-  {"dbgbcr13_el1", DBGBCR13_EL1},
-  {"dbgbcr14_el1", DBGBCR14_EL1},
-  {"dbgbcr15_el1", DBGBCR15_EL1},
-  {"dbgwvr0_el1", DBGWVR0_EL1},
-  {"dbgwvr1_el1", DBGWVR1_EL1},
-  {"dbgwvr2_el1", DBGWVR2_EL1},
-  {"dbgwvr3_el1", DBGWVR3_EL1},
-  {"dbgwvr4_el1", DBGWVR4_EL1},
-  {"dbgwvr5_el1", DBGWVR5_EL1},
-  {"dbgwvr6_el1", DBGWVR6_EL1},
-  {"dbgwvr7_el1", DBGWVR7_EL1},
-  {"dbgwvr8_el1", DBGWVR8_EL1},
-  {"dbgwvr9_el1", DBGWVR9_EL1},
-  {"dbgwvr10_el1", DBGWVR10_EL1},
-  {"dbgwvr11_el1", DBGWVR11_EL1},
-  {"dbgwvr12_el1", DBGWVR12_EL1},
-  {"dbgwvr13_el1", DBGWVR13_EL1},
-  {"dbgwvr14_el1", DBGWVR14_EL1},
-  {"dbgwvr15_el1", DBGWVR15_EL1},
-  {"dbgwcr0_el1", DBGWCR0_EL1},
-  {"dbgwcr1_el1", DBGWCR1_EL1},
-  {"dbgwcr2_el1", DBGWCR2_EL1},
-  {"dbgwcr3_el1", DBGWCR3_EL1},
-  {"dbgwcr4_el1", DBGWCR4_EL1},
-  {"dbgwcr5_el1", DBGWCR5_EL1},
-  {"dbgwcr6_el1", DBGWCR6_EL1},
-  {"dbgwcr7_el1", DBGWCR7_EL1},
-  {"dbgwcr8_el1", DBGWCR8_EL1},
-  {"dbgwcr9_el1", DBGWCR9_EL1},
-  {"dbgwcr10_el1", DBGWCR10_EL1},
-  {"dbgwcr11_el1", DBGWCR11_EL1},
-  {"dbgwcr12_el1", DBGWCR12_EL1},
-  {"dbgwcr13_el1", DBGWCR13_EL1},
-  {"dbgwcr14_el1", DBGWCR14_EL1},
-  {"dbgwcr15_el1", DBGWCR15_EL1},
-  {"teehbr32_el1", TEEHBR32_EL1},
-  {"osdlr_el1", OSDLR_EL1},
-  {"dbgprcr_el1", DBGPRCR_EL1},
-  {"dbgclaimset_el1", DBGCLAIMSET_EL1},
-  {"dbgclaimclr_el1", DBGCLAIMCLR_EL1},
-  {"csselr_el1", CSSELR_EL1},
-  {"vpidr_el2", VPIDR_EL2},
-  {"vmpidr_el2", VMPIDR_EL2},
-  {"sctlr_el1", SCTLR_EL1},
-  {"sctlr_el2", SCTLR_EL2},
-  {"sctlr_el3", SCTLR_EL3},
-  {"actlr_el1", ACTLR_EL1},
-  {"actlr_el2", ACTLR_EL2},
-  {"actlr_el3", ACTLR_EL3},
-  {"cpacr_el1", CPACR_EL1},
-  {"hcr_el2", HCR_EL2},
-  {"scr_el3", SCR_EL3},
-  {"mdcr_el2", MDCR_EL2},
-  {"sder32_el3", SDER32_EL3},
-  {"cptr_el2", CPTR_EL2},
-  {"cptr_el3", CPTR_EL3},
-  {"hstr_el2", HSTR_EL2},
-  {"hacr_el2", HACR_EL2},
-  {"mdcr_el3", MDCR_EL3},
-  {"ttbr0_el1", TTBR0_EL1},
-  {"ttbr0_el2", TTBR0_EL2},
-  {"ttbr0_el3", TTBR0_EL3},
-  {"ttbr1_el1", TTBR1_EL1},
-  {"tcr_el1", TCR_EL1},
-  {"tcr_el2", TCR_EL2},
-  {"tcr_el3", TCR_EL3},
-  {"vttbr_el2", VTTBR_EL2},
-  {"vtcr_el2", VTCR_EL2},
-  {"dacr32_el2", DACR32_EL2},
-  {"spsr_el1", SPSR_EL1},
-  {"spsr_el2", SPSR_EL2},
-  {"spsr_el3", SPSR_EL3},
-  {"elr_el1", ELR_EL1},
-  {"elr_el2", ELR_EL2},
-  {"elr_el3", ELR_EL3},
-  {"sp_el0", SP_EL0},
-  {"sp_el1", SP_EL1},
-  {"sp_el2", SP_EL2},
-  {"spsel", SPSel},
-  {"nzcv", NZCV},
-  {"daif", DAIF},
-  {"currentel", CurrentEL},
-  {"spsr_irq", SPSR_irq},
-  {"spsr_abt", SPSR_abt},
-  {"spsr_und", SPSR_und},
-  {"spsr_fiq", SPSR_fiq},
-  {"fpcr", FPCR},
-  {"fpsr", FPSR},
-  {"dspsr_el0", DSPSR_EL0},
-  {"dlr_el0", DLR_EL0},
-  {"ifsr32_el2", IFSR32_EL2},
-  {"afsr0_el1", AFSR0_EL1},
-  {"afsr0_el2", AFSR0_EL2},
-  {"afsr0_el3", AFSR0_EL3},
-  {"afsr1_el1", AFSR1_EL1},
-  {"afsr1_el2", AFSR1_EL2},
-  {"afsr1_el3", AFSR1_EL3},
-  {"esr_el1", ESR_EL1},
-  {"esr_el2", ESR_EL2},
-  {"esr_el3", ESR_EL3},
-  {"fpexc32_el2", FPEXC32_EL2},
-  {"far_el1", FAR_EL1},
-  {"far_el2", FAR_EL2},
-  {"far_el3", FAR_EL3},
-  {"hpfar_el2", HPFAR_EL2},
-  {"par_el1", PAR_EL1},
-  {"pmcr_el0", PMCR_EL0},
-  {"pmcntenset_el0", PMCNTENSET_EL0},
-  {"pmcntenclr_el0", PMCNTENCLR_EL0},
-  {"pmovsclr_el0", PMOVSCLR_EL0},
-  {"pmselr_el0", PMSELR_EL0},
-  {"pmccntr_el0", PMCCNTR_EL0},
-  {"pmxevtyper_el0", PMXEVTYPER_EL0},
-  {"pmxevcntr_el0", PMXEVCNTR_EL0},
-  {"pmuserenr_el0", PMUSERENR_EL0},
-  {"pmintenset_el1", PMINTENSET_EL1},
-  {"pmintenclr_el1", PMINTENCLR_EL1},
-  {"pmovsset_el0", PMOVSSET_EL0},
-  {"mair_el1", MAIR_EL1},
-  {"mair_el2", MAIR_EL2},
-  {"mair_el3", MAIR_EL3},
-  {"amair_el1", AMAIR_EL1},
-  {"amair_el2", AMAIR_EL2},
-  {"amair_el3", AMAIR_EL3},
-  {"vbar_el1", VBAR_EL1},
-  {"vbar_el2", VBAR_EL2},
-  {"vbar_el3", VBAR_EL3},
-  {"rmr_el1", RMR_EL1},
-  {"rmr_el2", RMR_EL2},
-  {"rmr_el3", RMR_EL3},
-  {"contextidr_el1", CONTEXTIDR_EL1},
-  {"tpidr_el0", TPIDR_EL0},
-  {"tpidr_el2", TPIDR_EL2},
-  {"tpidr_el3", TPIDR_EL3},
-  {"tpidrro_el0", TPIDRRO_EL0},
-  {"tpidr_el1", TPIDR_EL1},
-  {"cntfrq_el0", CNTFRQ_EL0},
-  {"cntvoff_el2", CNTVOFF_EL2},
-  {"cntkctl_el1", CNTKCTL_EL1},
-  {"cnthctl_el2", CNTHCTL_EL2},
-  {"cntp_tval_el0", CNTP_TVAL_EL0},
-  {"cnthp_tval_el2", CNTHP_TVAL_EL2},
-  {"cntps_tval_el1", CNTPS_TVAL_EL1},
-  {"cntp_ctl_el0", CNTP_CTL_EL0},
-  {"cnthp_ctl_el2", CNTHP_CTL_EL2},
-  {"cntps_ctl_el1", CNTPS_CTL_EL1},
-  {"cntp_cval_el0", CNTP_CVAL_EL0},
-  {"cnthp_cval_el2", CNTHP_CVAL_EL2},
-  {"cntps_cval_el1", CNTPS_CVAL_EL1},
-  {"cntv_tval_el0", CNTV_TVAL_EL0},
-  {"cntv_ctl_el0", CNTV_CTL_EL0},
-  {"cntv_cval_el0", CNTV_CVAL_EL0},
-  {"pmevcntr0_el0", PMEVCNTR0_EL0},
-  {"pmevcntr1_el0", PMEVCNTR1_EL0},
-  {"pmevcntr2_el0", PMEVCNTR2_EL0},
-  {"pmevcntr3_el0", PMEVCNTR3_EL0},
-  {"pmevcntr4_el0", PMEVCNTR4_EL0},
-  {"pmevcntr5_el0", PMEVCNTR5_EL0},
-  {"pmevcntr6_el0", PMEVCNTR6_EL0},
-  {"pmevcntr7_el0", PMEVCNTR7_EL0},
-  {"pmevcntr8_el0", PMEVCNTR8_EL0},
-  {"pmevcntr9_el0", PMEVCNTR9_EL0},
-  {"pmevcntr10_el0", PMEVCNTR10_EL0},
-  {"pmevcntr11_el0", PMEVCNTR11_EL0},
-  {"pmevcntr12_el0", PMEVCNTR12_EL0},
-  {"pmevcntr13_el0", PMEVCNTR13_EL0},
-  {"pmevcntr14_el0", PMEVCNTR14_EL0},
-  {"pmevcntr15_el0", PMEVCNTR15_EL0},
-  {"pmevcntr16_el0", PMEVCNTR16_EL0},
-  {"pmevcntr17_el0", PMEVCNTR17_EL0},
-  {"pmevcntr18_el0", PMEVCNTR18_EL0},
-  {"pmevcntr19_el0", PMEVCNTR19_EL0},
-  {"pmevcntr20_el0", PMEVCNTR20_EL0},
-  {"pmevcntr21_el0", PMEVCNTR21_EL0},
-  {"pmevcntr22_el0", PMEVCNTR22_EL0},
-  {"pmevcntr23_el0", PMEVCNTR23_EL0},
-  {"pmevcntr24_el0", PMEVCNTR24_EL0},
-  {"pmevcntr25_el0", PMEVCNTR25_EL0},
-  {"pmevcntr26_el0", PMEVCNTR26_EL0},
-  {"pmevcntr27_el0", PMEVCNTR27_EL0},
-  {"pmevcntr28_el0", PMEVCNTR28_EL0},
-  {"pmevcntr29_el0", PMEVCNTR29_EL0},
-  {"pmevcntr30_el0", PMEVCNTR30_EL0},
-  {"pmccfiltr_el0", PMCCFILTR_EL0},
-  {"pmevtyper0_el0", PMEVTYPER0_EL0},
-  {"pmevtyper1_el0", PMEVTYPER1_EL0},
-  {"pmevtyper2_el0", PMEVTYPER2_EL0},
-  {"pmevtyper3_el0", PMEVTYPER3_EL0},
-  {"pmevtyper4_el0", PMEVTYPER4_EL0},
-  {"pmevtyper5_el0", PMEVTYPER5_EL0},
-  {"pmevtyper6_el0", PMEVTYPER6_EL0},
-  {"pmevtyper7_el0", PMEVTYPER7_EL0},
-  {"pmevtyper8_el0", PMEVTYPER8_EL0},
-  {"pmevtyper9_el0", PMEVTYPER9_EL0},
-  {"pmevtyper10_el0", PMEVTYPER10_EL0},
-  {"pmevtyper11_el0", PMEVTYPER11_EL0},
-  {"pmevtyper12_el0", PMEVTYPER12_EL0},
-  {"pmevtyper13_el0", PMEVTYPER13_EL0},
-  {"pmevtyper14_el0", PMEVTYPER14_EL0},
-  {"pmevtyper15_el0", PMEVTYPER15_EL0},
-  {"pmevtyper16_el0", PMEVTYPER16_EL0},
-  {"pmevtyper17_el0", PMEVTYPER17_EL0},
-  {"pmevtyper18_el0", PMEVTYPER18_EL0},
-  {"pmevtyper19_el0", PMEVTYPER19_EL0},
-  {"pmevtyper20_el0", PMEVTYPER20_EL0},
-  {"pmevtyper21_el0", PMEVTYPER21_EL0},
-  {"pmevtyper22_el0", PMEVTYPER22_EL0},
-  {"pmevtyper23_el0", PMEVTYPER23_EL0},
-  {"pmevtyper24_el0", PMEVTYPER24_EL0},
-  {"pmevtyper25_el0", PMEVTYPER25_EL0},
-  {"pmevtyper26_el0", PMEVTYPER26_EL0},
-  {"pmevtyper27_el0", PMEVTYPER27_EL0},
-  {"pmevtyper28_el0", PMEVTYPER28_EL0},
-  {"pmevtyper29_el0", PMEVTYPER29_EL0},
-  {"pmevtyper30_el0", PMEVTYPER30_EL0},
+const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = {
+  {"osdtrrx_el1", OSDTRRX_EL1, {}},
+  {"osdtrtx_el1",  OSDTRTX_EL1, {}},
+  {"teecr32_el1", TEECR32_EL1, {}},
+  {"mdccint_el1", MDCCINT_EL1, {}},
+  {"mdscr_el1", MDSCR_EL1, {}},
+  {"dbgdtr_el0", DBGDTR_EL0, {}},
+  {"oseccr_el1", OSECCR_EL1, {}},
+  {"dbgvcr32_el2", DBGVCR32_EL2, {}},
+  {"dbgbvr0_el1", DBGBVR0_EL1, {}},
+  {"dbgbvr1_el1", DBGBVR1_EL1, {}},
+  {"dbgbvr2_el1", DBGBVR2_EL1, {}},
+  {"dbgbvr3_el1", DBGBVR3_EL1, {}},
+  {"dbgbvr4_el1", DBGBVR4_EL1, {}},
+  {"dbgbvr5_el1", DBGBVR5_EL1, {}},
+  {"dbgbvr6_el1", DBGBVR6_EL1, {}},
+  {"dbgbvr7_el1", DBGBVR7_EL1, {}},
+  {"dbgbvr8_el1", DBGBVR8_EL1, {}},
+  {"dbgbvr9_el1", DBGBVR9_EL1, {}},
+  {"dbgbvr10_el1", DBGBVR10_EL1, {}},
+  {"dbgbvr11_el1", DBGBVR11_EL1, {}},
+  {"dbgbvr12_el1", DBGBVR12_EL1, {}},
+  {"dbgbvr13_el1", DBGBVR13_EL1, {}},
+  {"dbgbvr14_el1", DBGBVR14_EL1, {}},
+  {"dbgbvr15_el1", DBGBVR15_EL1, {}},
+  {"dbgbcr0_el1", DBGBCR0_EL1, {}},
+  {"dbgbcr1_el1", DBGBCR1_EL1, {}},
+  {"dbgbcr2_el1", DBGBCR2_EL1, {}},
+  {"dbgbcr3_el1", DBGBCR3_EL1, {}},
+  {"dbgbcr4_el1", DBGBCR4_EL1, {}},
+  {"dbgbcr5_el1", DBGBCR5_EL1, {}},
+  {"dbgbcr6_el1", DBGBCR6_EL1, {}},
+  {"dbgbcr7_el1", DBGBCR7_EL1, {}},
+  {"dbgbcr8_el1", DBGBCR8_EL1, {}},
+  {"dbgbcr9_el1", DBGBCR9_EL1, {}},
+  {"dbgbcr10_el1", DBGBCR10_EL1, {}},
+  {"dbgbcr11_el1", DBGBCR11_EL1, {}},
+  {"dbgbcr12_el1", DBGBCR12_EL1, {}},
+  {"dbgbcr13_el1", DBGBCR13_EL1, {}},
+  {"dbgbcr14_el1", DBGBCR14_EL1, {}},
+  {"dbgbcr15_el1", DBGBCR15_EL1, {}},
+  {"dbgwvr0_el1", DBGWVR0_EL1, {}},
+  {"dbgwvr1_el1", DBGWVR1_EL1, {}},
+  {"dbgwvr2_el1", DBGWVR2_EL1, {}},
+  {"dbgwvr3_el1", DBGWVR3_EL1, {}},
+  {"dbgwvr4_el1", DBGWVR4_EL1, {}},
+  {"dbgwvr5_el1", DBGWVR5_EL1, {}},
+  {"dbgwvr6_el1", DBGWVR6_EL1, {}},
+  {"dbgwvr7_el1", DBGWVR7_EL1, {}},
+  {"dbgwvr8_el1", DBGWVR8_EL1, {}},
+  {"dbgwvr9_el1", DBGWVR9_EL1, {}},
+  {"dbgwvr10_el1", DBGWVR10_EL1, {}},
+  {"dbgwvr11_el1", DBGWVR11_EL1, {}},
+  {"dbgwvr12_el1", DBGWVR12_EL1, {}},
+  {"dbgwvr13_el1", DBGWVR13_EL1, {}},
+  {"dbgwvr14_el1", DBGWVR14_EL1, {}},
+  {"dbgwvr15_el1", DBGWVR15_EL1, {}},
+  {"dbgwcr0_el1", DBGWCR0_EL1, {}},
+  {"dbgwcr1_el1", DBGWCR1_EL1, {}},
+  {"dbgwcr2_el1", DBGWCR2_EL1, {}},
+  {"dbgwcr3_el1", DBGWCR3_EL1, {}},
+  {"dbgwcr4_el1", DBGWCR4_EL1, {}},
+  {"dbgwcr5_el1", DBGWCR5_EL1, {}},
+  {"dbgwcr6_el1", DBGWCR6_EL1, {}},
+  {"dbgwcr7_el1", DBGWCR7_EL1, {}},
+  {"dbgwcr8_el1", DBGWCR8_EL1, {}},
+  {"dbgwcr9_el1", DBGWCR9_EL1, {}},
+  {"dbgwcr10_el1", DBGWCR10_EL1, {}},
+  {"dbgwcr11_el1", DBGWCR11_EL1, {}},
+  {"dbgwcr12_el1", DBGWCR12_EL1, {}},
+  {"dbgwcr13_el1", DBGWCR13_EL1, {}},
+  {"dbgwcr14_el1", DBGWCR14_EL1, {}},
+  {"dbgwcr15_el1", DBGWCR15_EL1, {}},
+  {"teehbr32_el1", TEEHBR32_EL1, {}},
+  {"osdlr_el1", OSDLR_EL1, {}},
+  {"dbgprcr_el1", DBGPRCR_EL1, {}},
+  {"dbgclaimset_el1", DBGCLAIMSET_EL1, {}},
+  {"dbgclaimclr_el1", DBGCLAIMCLR_EL1, {}},
+  {"csselr_el1", CSSELR_EL1, {}},
+  {"vpidr_el2", VPIDR_EL2, {}},
+  {"vmpidr_el2", VMPIDR_EL2, {}},
+  {"sctlr_el1", SCTLR_EL1, {}},
+  {"sctlr_el2", SCTLR_EL2, {}},
+  {"sctlr_el3", SCTLR_EL3, {}},
+  {"actlr_el1", ACTLR_EL1, {}},
+  {"actlr_el2", ACTLR_EL2, {}},
+  {"actlr_el3", ACTLR_EL3, {}},
+  {"cpacr_el1", CPACR_EL1, {}},
+  {"hcr_el2", HCR_EL2, {}},
+  {"scr_el3", SCR_EL3, {}},
+  {"mdcr_el2", MDCR_EL2, {}},
+  {"sder32_el3", SDER32_EL3, {}},
+  {"cptr_el2", CPTR_EL2, {}},
+  {"cptr_el3", CPTR_EL3, {}},
+  {"hstr_el2", HSTR_EL2, {}},
+  {"hacr_el2", HACR_EL2, {}},
+  {"mdcr_el3", MDCR_EL3, {}},
+  {"ttbr0_el1", TTBR0_EL1, {}},
+  {"ttbr0_el2", TTBR0_EL2, {}},
+  {"ttbr0_el3", TTBR0_EL3, {}},
+  {"ttbr1_el1", TTBR1_EL1, {}},
+  {"tcr_el1", TCR_EL1, {}},
+  {"tcr_el2", TCR_EL2, {}},
+  {"tcr_el3", TCR_EL3, {}},
+  {"vttbr_el2", VTTBR_EL2, {}},
+  {"vtcr_el2", VTCR_EL2, {}},
+  {"dacr32_el2", DACR32_EL2, {}},
+  {"spsr_el1", SPSR_EL1, {}},
+  {"spsr_el2", SPSR_EL2, {}},
+  {"spsr_el3", SPSR_EL3, {}},
+  {"elr_el1", ELR_EL1, {}},
+  {"elr_el2", ELR_EL2, {}},
+  {"elr_el3", ELR_EL3, {}},
+  {"sp_el0", SP_EL0, {}},
+  {"sp_el1", SP_EL1, {}},
+  {"sp_el2", SP_EL2, {}},
+  {"spsel", SPSel, {}},
+  {"nzcv", NZCV, {}},
+  {"daif", DAIF, {}},
+  {"currentel", CurrentEL, {}},
+  {"spsr_irq", SPSR_irq, {}},
+  {"spsr_abt", SPSR_abt, {}},
+  {"spsr_und", SPSR_und, {}},
+  {"spsr_fiq", SPSR_fiq, {}},
+  {"fpcr", FPCR, {}},
+  {"fpsr", FPSR, {}},
+  {"dspsr_el0", DSPSR_EL0, {}},
+  {"dlr_el0", DLR_EL0, {}},
+  {"ifsr32_el2", IFSR32_EL2, {}},
+  {"afsr0_el1", AFSR0_EL1, {}},
+  {"afsr0_el2", AFSR0_EL2, {}},
+  {"afsr0_el3", AFSR0_EL3, {}},
+  {"afsr1_el1", AFSR1_EL1, {}},
+  {"afsr1_el2", AFSR1_EL2, {}},
+  {"afsr1_el3", AFSR1_EL3, {}},
+  {"esr_el1", ESR_EL1, {}},
+  {"esr_el2", ESR_EL2, {}},
+  {"esr_el3", ESR_EL3, {}},
+  {"fpexc32_el2", FPEXC32_EL2, {}},
+  {"far_el1", FAR_EL1, {}},
+  {"far_el2", FAR_EL2, {}},
+  {"far_el3", FAR_EL3, {}},
+  {"hpfar_el2", HPFAR_EL2, {}},
+  {"par_el1", PAR_EL1, {}},
+  {"pmcr_el0", PMCR_EL0, {}},
+  {"pmcntenset_el0", PMCNTENSET_EL0, {}},
+  {"pmcntenclr_el0", PMCNTENCLR_EL0, {}},
+  {"pmovsclr_el0", PMOVSCLR_EL0, {}},
+  {"pmselr_el0", PMSELR_EL0, {}},
+  {"pmccntr_el0", PMCCNTR_EL0, {}},
+  {"pmxevtyper_el0", PMXEVTYPER_EL0, {}},
+  {"pmxevcntr_el0", PMXEVCNTR_EL0, {}},
+  {"pmuserenr_el0", PMUSERENR_EL0, {}},
+  {"pmintenset_el1", PMINTENSET_EL1, {}},
+  {"pmintenclr_el1", PMINTENCLR_EL1, {}},
+  {"pmovsset_el0", PMOVSSET_EL0, {}},
+  {"mair_el1", MAIR_EL1, {}},
+  {"mair_el2", MAIR_EL2, {}},
+  {"mair_el3", MAIR_EL3, {}},
+  {"amair_el1", AMAIR_EL1, {}},
+  {"amair_el2", AMAIR_EL2, {}},
+  {"amair_el3", AMAIR_EL3, {}},
+  {"vbar_el1", VBAR_EL1, {}},
+  {"vbar_el2", VBAR_EL2, {}},
+  {"vbar_el3", VBAR_EL3, {}},
+  {"rmr_el1", RMR_EL1, {}},
+  {"rmr_el2", RMR_EL2, {}},
+  {"rmr_el3", RMR_EL3, {}},
+  {"contextidr_el1", CONTEXTIDR_EL1, {}},
+  {"tpidr_el0", TPIDR_EL0, {}},
+  {"tpidr_el2", TPIDR_EL2, {}},
+  {"tpidr_el3", TPIDR_EL3, {}},
+  {"tpidrro_el0", TPIDRRO_EL0, {}},
+  {"tpidr_el1", TPIDR_EL1, {}},
+  {"cntfrq_el0", CNTFRQ_EL0, {}},
+  {"cntvoff_el2", CNTVOFF_EL2, {}},
+  {"cntkctl_el1", CNTKCTL_EL1, {}},
+  {"cnthctl_el2", CNTHCTL_EL2, {}},
+  {"cntp_tval_el0", CNTP_TVAL_EL0, {}},
+  {"cnthp_tval_el2", CNTHP_TVAL_EL2, {}},
+  {"cntps_tval_el1", CNTPS_TVAL_EL1, {}},
+  {"cntp_ctl_el0", CNTP_CTL_EL0, {}},
+  {"cnthp_ctl_el2", CNTHP_CTL_EL2, {}},
+  {"cntps_ctl_el1", CNTPS_CTL_EL1, {}},
+  {"cntp_cval_el0", CNTP_CVAL_EL0, {}},
+  {"cnthp_cval_el2", CNTHP_CVAL_EL2, {}},
+  {"cntps_cval_el1", CNTPS_CVAL_EL1, {}},
+  {"cntv_tval_el0", CNTV_TVAL_EL0, {}},
+  {"cntv_ctl_el0", CNTV_CTL_EL0, {}},
+  {"cntv_cval_el0", CNTV_CVAL_EL0, {}},
+  {"pmevcntr0_el0", PMEVCNTR0_EL0, {}},
+  {"pmevcntr1_el0", PMEVCNTR1_EL0, {}},
+  {"pmevcntr2_el0", PMEVCNTR2_EL0, {}},
+  {"pmevcntr3_el0", PMEVCNTR3_EL0, {}},
+  {"pmevcntr4_el0", PMEVCNTR4_EL0, {}},
+  {"pmevcntr5_el0", PMEVCNTR5_EL0, {}},
+  {"pmevcntr6_el0", PMEVCNTR6_EL0, {}},
+  {"pmevcntr7_el0", PMEVCNTR7_EL0, {}},
+  {"pmevcntr8_el0", PMEVCNTR8_EL0, {}},
+  {"pmevcntr9_el0", PMEVCNTR9_EL0, {}},
+  {"pmevcntr10_el0", PMEVCNTR10_EL0, {}},
+  {"pmevcntr11_el0", PMEVCNTR11_EL0, {}},
+  {"pmevcntr12_el0", PMEVCNTR12_EL0, {}},
+  {"pmevcntr13_el0", PMEVCNTR13_EL0, {}},
+  {"pmevcntr14_el0", PMEVCNTR14_EL0, {}},
+  {"pmevcntr15_el0", PMEVCNTR15_EL0, {}},
+  {"pmevcntr16_el0", PMEVCNTR16_EL0, {}},
+  {"pmevcntr17_el0", PMEVCNTR17_EL0, {}},
+  {"pmevcntr18_el0", PMEVCNTR18_EL0, {}},
+  {"pmevcntr19_el0", PMEVCNTR19_EL0, {}},
+  {"pmevcntr20_el0", PMEVCNTR20_EL0, {}},
+  {"pmevcntr21_el0", PMEVCNTR21_EL0, {}},
+  {"pmevcntr22_el0", PMEVCNTR22_EL0, {}},
+  {"pmevcntr23_el0", PMEVCNTR23_EL0, {}},
+  {"pmevcntr24_el0", PMEVCNTR24_EL0, {}},
+  {"pmevcntr25_el0", PMEVCNTR25_EL0, {}},
+  {"pmevcntr26_el0", PMEVCNTR26_EL0, {}},
+  {"pmevcntr27_el0", PMEVCNTR27_EL0, {}},
+  {"pmevcntr28_el0", PMEVCNTR28_EL0, {}},
+  {"pmevcntr29_el0", PMEVCNTR29_EL0, {}},
+  {"pmevcntr30_el0", PMEVCNTR30_EL0, {}},
+  {"pmccfiltr_el0", PMCCFILTR_EL0, {}},
+  {"pmevtyper0_el0", PMEVTYPER0_EL0, {}},
+  {"pmevtyper1_el0", PMEVTYPER1_EL0, {}},
+  {"pmevtyper2_el0", PMEVTYPER2_EL0, {}},
+  {"pmevtyper3_el0", PMEVTYPER3_EL0, {}},
+  {"pmevtyper4_el0", PMEVTYPER4_EL0, {}},
+  {"pmevtyper5_el0", PMEVTYPER5_EL0, {}},
+  {"pmevtyper6_el0", PMEVTYPER6_EL0, {}},
+  {"pmevtyper7_el0", PMEVTYPER7_EL0, {}},
+  {"pmevtyper8_el0", PMEVTYPER8_EL0, {}},
+  {"pmevtyper9_el0", PMEVTYPER9_EL0, {}},
+  {"pmevtyper10_el0", PMEVTYPER10_EL0, {}},
+  {"pmevtyper11_el0", PMEVTYPER11_EL0, {}},
+  {"pmevtyper12_el0", PMEVTYPER12_EL0, {}},
+  {"pmevtyper13_el0", PMEVTYPER13_EL0, {}},
+  {"pmevtyper14_el0", PMEVTYPER14_EL0, {}},
+  {"pmevtyper15_el0", PMEVTYPER15_EL0, {}},
+  {"pmevtyper16_el0", PMEVTYPER16_EL0, {}},
+  {"pmevtyper17_el0", PMEVTYPER17_EL0, {}},
+  {"pmevtyper18_el0", PMEVTYPER18_EL0, {}},
+  {"pmevtyper19_el0", PMEVTYPER19_EL0, {}},
+  {"pmevtyper20_el0", PMEVTYPER20_EL0, {}},
+  {"pmevtyper21_el0", PMEVTYPER21_EL0, {}},
+  {"pmevtyper22_el0", PMEVTYPER22_EL0, {}},
+  {"pmevtyper23_el0", PMEVTYPER23_EL0, {}},
+  {"pmevtyper24_el0", PMEVTYPER24_EL0, {}},
+  {"pmevtyper25_el0", PMEVTYPER25_EL0, {}},
+  {"pmevtyper26_el0", PMEVTYPER26_EL0, {}},
+  {"pmevtyper27_el0", PMEVTYPER27_EL0, {}},
+  {"pmevtyper28_el0", PMEVTYPER28_EL0, {}},
+  {"pmevtyper29_el0", PMEVTYPER29_EL0, {}},
+  {"pmevtyper30_el0", PMEVTYPER30_EL0, {}},
 
   // Trace registers
-  {"trcprgctlr", TRCPRGCTLR},
-  {"trcprocselr", TRCPROCSELR},
-  {"trcconfigr", TRCCONFIGR},
-  {"trcauxctlr", TRCAUXCTLR},
-  {"trceventctl0r", TRCEVENTCTL0R},
-  {"trceventctl1r", TRCEVENTCTL1R},
-  {"trcstallctlr", TRCSTALLCTLR},
-  {"trctsctlr", TRCTSCTLR},
-  {"trcsyncpr", TRCSYNCPR},
-  {"trcccctlr", TRCCCCTLR},
-  {"trcbbctlr", TRCBBCTLR},
-  {"trctraceidr", TRCTRACEIDR},
-  {"trcqctlr", TRCQCTLR},
-  {"trcvictlr", TRCVICTLR},
-  {"trcviiectlr", TRCVIIECTLR},
-  {"trcvissctlr", TRCVISSCTLR},
-  {"trcvipcssctlr", TRCVIPCSSCTLR},
-  {"trcvdctlr", TRCVDCTLR},
-  {"trcvdsacctlr", TRCVDSACCTLR},
-  {"trcvdarcctlr", TRCVDARCCTLR},
-  {"trcseqevr0", TRCSEQEVR0},
-  {"trcseqevr1", TRCSEQEVR1},
-  {"trcseqevr2", TRCSEQEVR2},
-  {"trcseqrstevr", TRCSEQRSTEVR},
-  {"trcseqstr", TRCSEQSTR},
-  {"trcextinselr", TRCEXTINSELR},
-  {"trccntrldvr0", TRCCNTRLDVR0},
-  {"trccntrldvr1", TRCCNTRLDVR1},
-  {"trccntrldvr2", TRCCNTRLDVR2},
-  {"trccntrldvr3", TRCCNTRLDVR3},
-  {"trccntctlr0", TRCCNTCTLR0},
-  {"trccntctlr1", TRCCNTCTLR1},
-  {"trccntctlr2", TRCCNTCTLR2},
-  {"trccntctlr3", TRCCNTCTLR3},
-  {"trccntvr0", TRCCNTVR0},
-  {"trccntvr1", TRCCNTVR1},
-  {"trccntvr2", TRCCNTVR2},
-  {"trccntvr3", TRCCNTVR3},
-  {"trcimspec0", TRCIMSPEC0},
-  {"trcimspec1", TRCIMSPEC1},
-  {"trcimspec2", TRCIMSPEC2},
-  {"trcimspec3", TRCIMSPEC3},
-  {"trcimspec4", TRCIMSPEC4},
-  {"trcimspec5", TRCIMSPEC5},
-  {"trcimspec6", TRCIMSPEC6},
-  {"trcimspec7", TRCIMSPEC7},
-  {"trcrsctlr2", TRCRSCTLR2},
-  {"trcrsctlr3", TRCRSCTLR3},
-  {"trcrsctlr4", TRCRSCTLR4},
-  {"trcrsctlr5", TRCRSCTLR5},
-  {"trcrsctlr6", TRCRSCTLR6},
-  {"trcrsctlr7", TRCRSCTLR7},
-  {"trcrsctlr8", TRCRSCTLR8},
-  {"trcrsctlr9", TRCRSCTLR9},
-  {"trcrsctlr10", TRCRSCTLR10},
-  {"trcrsctlr11", TRCRSCTLR11},
-  {"trcrsctlr12", TRCRSCTLR12},
-  {"trcrsctlr13", TRCRSCTLR13},
-  {"trcrsctlr14", TRCRSCTLR14},
-  {"trcrsctlr15", TRCRSCTLR15},
-  {"trcrsctlr16", TRCRSCTLR16},
-  {"trcrsctlr17", TRCRSCTLR17},
-  {"trcrsctlr18", TRCRSCTLR18},
-  {"trcrsctlr19", TRCRSCTLR19},
-  {"trcrsctlr20", TRCRSCTLR20},
-  {"trcrsctlr21", TRCRSCTLR21},
-  {"trcrsctlr22", TRCRSCTLR22},
-  {"trcrsctlr23", TRCRSCTLR23},
-  {"trcrsctlr24", TRCRSCTLR24},
-  {"trcrsctlr25", TRCRSCTLR25},
-  {"trcrsctlr26", TRCRSCTLR26},
-  {"trcrsctlr27", TRCRSCTLR27},
-  {"trcrsctlr28", TRCRSCTLR28},
-  {"trcrsctlr29", TRCRSCTLR29},
-  {"trcrsctlr30", TRCRSCTLR30},
-  {"trcrsctlr31", TRCRSCTLR31},
-  {"trcssccr0", TRCSSCCR0},
-  {"trcssccr1", TRCSSCCR1},
-  {"trcssccr2", TRCSSCCR2},
-  {"trcssccr3", TRCSSCCR3},
-  {"trcssccr4", TRCSSCCR4},
-  {"trcssccr5", TRCSSCCR5},
-  {"trcssccr6", TRCSSCCR6},
-  {"trcssccr7", TRCSSCCR7},
-  {"trcsscsr0", TRCSSCSR0},
-  {"trcsscsr1", TRCSSCSR1},
-  {"trcsscsr2", TRCSSCSR2},
-  {"trcsscsr3", TRCSSCSR3},
-  {"trcsscsr4", TRCSSCSR4},
-  {"trcsscsr5", TRCSSCSR5},
-  {"trcsscsr6", TRCSSCSR6},
-  {"trcsscsr7", TRCSSCSR7},
-  {"trcsspcicr0", TRCSSPCICR0},
-  {"trcsspcicr1", TRCSSPCICR1},
-  {"trcsspcicr2", TRCSSPCICR2},
-  {"trcsspcicr3", TRCSSPCICR3},
-  {"trcsspcicr4", TRCSSPCICR4},
-  {"trcsspcicr5", TRCSSPCICR5},
-  {"trcsspcicr6", TRCSSPCICR6},
-  {"trcsspcicr7", TRCSSPCICR7},
-  {"trcpdcr", TRCPDCR},
-  {"trcacvr0", TRCACVR0},
-  {"trcacvr1", TRCACVR1},
-  {"trcacvr2", TRCACVR2},
-  {"trcacvr3", TRCACVR3},
-  {"trcacvr4", TRCACVR4},
-  {"trcacvr5", TRCACVR5},
-  {"trcacvr6", TRCACVR6},
-  {"trcacvr7", TRCACVR7},
-  {"trcacvr8", TRCACVR8},
-  {"trcacvr9", TRCACVR9},
-  {"trcacvr10", TRCACVR10},
-  {"trcacvr11", TRCACVR11},
-  {"trcacvr12", TRCACVR12},
-  {"trcacvr13", TRCACVR13},
-  {"trcacvr14", TRCACVR14},
-  {"trcacvr15", TRCACVR15},
-  {"trcacatr0", TRCACATR0},
-  {"trcacatr1", TRCACATR1},
-  {"trcacatr2", TRCACATR2},
-  {"trcacatr3", TRCACATR3},
-  {"trcacatr4", TRCACATR4},
-  {"trcacatr5", TRCACATR5},
-  {"trcacatr6", TRCACATR6},
-  {"trcacatr7", TRCACATR7},
-  {"trcacatr8", TRCACATR8},
-  {"trcacatr9", TRCACATR9},
-  {"trcacatr10", TRCACATR10},
-  {"trcacatr11", TRCACATR11},
-  {"trcacatr12", TRCACATR12},
-  {"trcacatr13", TRCACATR13},
-  {"trcacatr14", TRCACATR14},
-  {"trcacatr15", TRCACATR15},
-  {"trcdvcvr0", TRCDVCVR0},
-  {"trcdvcvr1", TRCDVCVR1},
-  {"trcdvcvr2", TRCDVCVR2},
-  {"trcdvcvr3", TRCDVCVR3},
-  {"trcdvcvr4", TRCDVCVR4},
-  {"trcdvcvr5", TRCDVCVR5},
-  {"trcdvcvr6", TRCDVCVR6},
-  {"trcdvcvr7", TRCDVCVR7},
-  {"trcdvcmr0", TRCDVCMR0},
-  {"trcdvcmr1", TRCDVCMR1},
-  {"trcdvcmr2", TRCDVCMR2},
-  {"trcdvcmr3", TRCDVCMR3},
-  {"trcdvcmr4", TRCDVCMR4},
-  {"trcdvcmr5", TRCDVCMR5},
-  {"trcdvcmr6", TRCDVCMR6},
-  {"trcdvcmr7", TRCDVCMR7},
-  {"trccidcvr0", TRCCIDCVR0},
-  {"trccidcvr1", TRCCIDCVR1},
-  {"trccidcvr2", TRCCIDCVR2},
-  {"trccidcvr3", TRCCIDCVR3},
-  {"trccidcvr4", TRCCIDCVR4},
-  {"trccidcvr5", TRCCIDCVR5},
-  {"trccidcvr6", TRCCIDCVR6},
-  {"trccidcvr7", TRCCIDCVR7},
-  {"trcvmidcvr0", TRCVMIDCVR0},
-  {"trcvmidcvr1", TRCVMIDCVR1},
-  {"trcvmidcvr2", TRCVMIDCVR2},
-  {"trcvmidcvr3", TRCVMIDCVR3},
-  {"trcvmidcvr4", TRCVMIDCVR4},
-  {"trcvmidcvr5", TRCVMIDCVR5},
-  {"trcvmidcvr6", TRCVMIDCVR6},
-  {"trcvmidcvr7", TRCVMIDCVR7},
-  {"trccidcctlr0", TRCCIDCCTLR0},
-  {"trccidcctlr1", TRCCIDCCTLR1},
-  {"trcvmidcctlr0", TRCVMIDCCTLR0},
-  {"trcvmidcctlr1", TRCVMIDCCTLR1},
-  {"trcitctrl", TRCITCTRL},
-  {"trcclaimset", TRCCLAIMSET},
-  {"trcclaimclr", TRCCLAIMCLR},
+  {"trcprgctlr", TRCPRGCTLR, {}},
+  {"trcprocselr", TRCPROCSELR, {}},
+  {"trcconfigr", TRCCONFIGR, {}},
+  {"trcauxctlr", TRCAUXCTLR, {}},
+  {"trceventctl0r", TRCEVENTCTL0R, {}},
+  {"trceventctl1r", TRCEVENTCTL1R, {}},
+  {"trcstallctlr", TRCSTALLCTLR, {}},
+  {"trctsctlr", TRCTSCTLR, {}},
+  {"trcsyncpr", TRCSYNCPR, {}},
+  {"trcccctlr", TRCCCCTLR, {}},
+  {"trcbbctlr", TRCBBCTLR, {}},
+  {"trctraceidr", TRCTRACEIDR, {}},
+  {"trcqctlr", TRCQCTLR, {}},
+  {"trcvictlr", TRCVICTLR, {}},
+  {"trcviiectlr", TRCVIIECTLR, {}},
+  {"trcvissctlr", TRCVISSCTLR, {}},
+  {"trcvipcssctlr", TRCVIPCSSCTLR, {}},
+  {"trcvdctlr", TRCVDCTLR, {}},
+  {"trcvdsacctlr", TRCVDSACCTLR, {}},
+  {"trcvdarcctlr", TRCVDARCCTLR, {}},
+  {"trcseqevr0", TRCSEQEVR0, {}},
+  {"trcseqevr1", TRCSEQEVR1, {}},
+  {"trcseqevr2", TRCSEQEVR2, {}},
+  {"trcseqrstevr", TRCSEQRSTEVR, {}},
+  {"trcseqstr", TRCSEQSTR, {}},
+  {"trcextinselr", TRCEXTINSELR, {}},
+  {"trccntrldvr0", TRCCNTRLDVR0, {}},
+  {"trccntrldvr1", TRCCNTRLDVR1, {}},
+  {"trccntrldvr2", TRCCNTRLDVR2, {}},
+  {"trccntrldvr3", TRCCNTRLDVR3, {}},
+  {"trccntctlr0", TRCCNTCTLR0, {}},
+  {"trccntctlr1", TRCCNTCTLR1, {}},
+  {"trccntctlr2", TRCCNTCTLR2, {}},
+  {"trccntctlr3", TRCCNTCTLR3, {}},
+  {"trccntvr0", TRCCNTVR0, {}},
+  {"trccntvr1", TRCCNTVR1, {}},
+  {"trccntvr2", TRCCNTVR2, {}},
+  {"trccntvr3", TRCCNTVR3, {}},
+  {"trcimspec0", TRCIMSPEC0, {}},
+  {"trcimspec1", TRCIMSPEC1, {}},
+  {"trcimspec2", TRCIMSPEC2, {}},
+  {"trcimspec3", TRCIMSPEC3, {}},
+  {"trcimspec4", TRCIMSPEC4, {}},
+  {"trcimspec5", TRCIMSPEC5, {}},
+  {"trcimspec6", TRCIMSPEC6, {}},
+  {"trcimspec7", TRCIMSPEC7, {}},
+  {"trcrsctlr2", TRCRSCTLR2, {}},
+  {"trcrsctlr3", TRCRSCTLR3, {}},
+  {"trcrsctlr4", TRCRSCTLR4, {}},
+  {"trcrsctlr5", TRCRSCTLR5, {}},
+  {"trcrsctlr6", TRCRSCTLR6, {}},
+  {"trcrsctlr7", TRCRSCTLR7, {}},
+  {"trcrsctlr8", TRCRSCTLR8, {}},
+  {"trcrsctlr9", TRCRSCTLR9, {}},
+  {"trcrsctlr10", TRCRSCTLR10, {}},
+  {"trcrsctlr11", TRCRSCTLR11, {}},
+  {"trcrsctlr12", TRCRSCTLR12, {}},
+  {"trcrsctlr13", TRCRSCTLR13, {}},
+  {"trcrsctlr14", TRCRSCTLR14, {}},
+  {"trcrsctlr15", TRCRSCTLR15, {}},
+  {"trcrsctlr16", TRCRSCTLR16, {}},
+  {"trcrsctlr17", TRCRSCTLR17, {}},
+  {"trcrsctlr18", TRCRSCTLR18, {}},
+  {"trcrsctlr19", TRCRSCTLR19, {}},
+  {"trcrsctlr20", TRCRSCTLR20, {}},
+  {"trcrsctlr21", TRCRSCTLR21, {}},
+  {"trcrsctlr22", TRCRSCTLR22, {}},
+  {"trcrsctlr23", TRCRSCTLR23, {}},
+  {"trcrsctlr24", TRCRSCTLR24, {}},
+  {"trcrsctlr25", TRCRSCTLR25, {}},
+  {"trcrsctlr26", TRCRSCTLR26, {}},
+  {"trcrsctlr27", TRCRSCTLR27, {}},
+  {"trcrsctlr28", TRCRSCTLR28, {}},
+  {"trcrsctlr29", TRCRSCTLR29, {}},
+  {"trcrsctlr30", TRCRSCTLR30, {}},
+  {"trcrsctlr31", TRCRSCTLR31, {}},
+  {"trcssccr0", TRCSSCCR0, {}},
+  {"trcssccr1", TRCSSCCR1, {}},
+  {"trcssccr2", TRCSSCCR2, {}},
+  {"trcssccr3", TRCSSCCR3, {}},
+  {"trcssccr4", TRCSSCCR4, {}},
+  {"trcssccr5", TRCSSCCR5, {}},
+  {"trcssccr6", TRCSSCCR6, {}},
+  {"trcssccr7", TRCSSCCR7, {}},
+  {"trcsscsr0", TRCSSCSR0, {}},
+  {"trcsscsr1", TRCSSCSR1, {}},
+  {"trcsscsr2", TRCSSCSR2, {}},
+  {"trcsscsr3", TRCSSCSR3, {}},
+  {"trcsscsr4", TRCSSCSR4, {}},
+  {"trcsscsr5", TRCSSCSR5, {}},
+  {"trcsscsr6", TRCSSCSR6, {}},
+  {"trcsscsr7", TRCSSCSR7, {}},
+  {"trcsspcicr0", TRCSSPCICR0, {}},
+  {"trcsspcicr1", TRCSSPCICR1, {}},
+  {"trcsspcicr2", TRCSSPCICR2, {}},
+  {"trcsspcicr3", TRCSSPCICR3, {}},
+  {"trcsspcicr4", TRCSSPCICR4, {}},
+  {"trcsspcicr5", TRCSSPCICR5, {}},
+  {"trcsspcicr6", TRCSSPCICR6, {}},
+  {"trcsspcicr7", TRCSSPCICR7, {}},
+  {"trcpdcr", TRCPDCR, {}},
+  {"trcacvr0", TRCACVR0, {}},
+  {"trcacvr1", TRCACVR1, {}},
+  {"trcacvr2", TRCACVR2, {}},
+  {"trcacvr3", TRCACVR3, {}},
+  {"trcacvr4", TRCACVR4, {}},
+  {"trcacvr5", TRCACVR5, {}},
+  {"trcacvr6", TRCACVR6, {}},
+  {"trcacvr7", TRCACVR7, {}},
+  {"trcacvr8", TRCACVR8, {}},
+  {"trcacvr9", TRCACVR9, {}},
+  {"trcacvr10", TRCACVR10, {}},
+  {"trcacvr11", TRCACVR11, {}},
+  {"trcacvr12", TRCACVR12, {}},
+  {"trcacvr13", TRCACVR13, {}},
+  {"trcacvr14", TRCACVR14, {}},
+  {"trcacvr15", TRCACVR15, {}},
+  {"trcacatr0", TRCACATR0, {}},
+  {"trcacatr1", TRCACATR1, {}},
+  {"trcacatr2", TRCACATR2, {}},
+  {"trcacatr3", TRCACATR3, {}},
+  {"trcacatr4", TRCACATR4, {}},
+  {"trcacatr5", TRCACATR5, {}},
+  {"trcacatr6", TRCACATR6, {}},
+  {"trcacatr7", TRCACATR7, {}},
+  {"trcacatr8", TRCACATR8, {}},
+  {"trcacatr9", TRCACATR9, {}},
+  {"trcacatr10", TRCACATR10, {}},
+  {"trcacatr11", TRCACATR11, {}},
+  {"trcacatr12", TRCACATR12, {}},
+  {"trcacatr13", TRCACATR13, {}},
+  {"trcacatr14", TRCACATR14, {}},
+  {"trcacatr15", TRCACATR15, {}},
+  {"trcdvcvr0", TRCDVCVR0, {}},
+  {"trcdvcvr1", TRCDVCVR1, {}},
+  {"trcdvcvr2", TRCDVCVR2, {}},
+  {"trcdvcvr3", TRCDVCVR3, {}},
+  {"trcdvcvr4", TRCDVCVR4, {}},
+  {"trcdvcvr5", TRCDVCVR5, {}},
+  {"trcdvcvr6", TRCDVCVR6, {}},
+  {"trcdvcvr7", TRCDVCVR7, {}},
+  {"trcdvcmr0", TRCDVCMR0, {}},
+  {"trcdvcmr1", TRCDVCMR1, {}},
+  {"trcdvcmr2", TRCDVCMR2, {}},
+  {"trcdvcmr3", TRCDVCMR3, {}},
+  {"trcdvcmr4", TRCDVCMR4, {}},
+  {"trcdvcmr5", TRCDVCMR5, {}},
+  {"trcdvcmr6", TRCDVCMR6, {}},
+  {"trcdvcmr7", TRCDVCMR7, {}},
+  {"trccidcvr0", TRCCIDCVR0, {}},
+  {"trccidcvr1", TRCCIDCVR1, {}},
+  {"trccidcvr2", TRCCIDCVR2, {}},
+  {"trccidcvr3", TRCCIDCVR3, {}},
+  {"trccidcvr4", TRCCIDCVR4, {}},
+  {"trccidcvr5", TRCCIDCVR5, {}},
+  {"trccidcvr6", TRCCIDCVR6, {}},
+  {"trccidcvr7", TRCCIDCVR7, {}},
+  {"trcvmidcvr0", TRCVMIDCVR0, {}},
+  {"trcvmidcvr1", TRCVMIDCVR1, {}},
+  {"trcvmidcvr2", TRCVMIDCVR2, {}},
+  {"trcvmidcvr3", TRCVMIDCVR3, {}},
+  {"trcvmidcvr4", TRCVMIDCVR4, {}},
+  {"trcvmidcvr5", TRCVMIDCVR5, {}},
+  {"trcvmidcvr6", TRCVMIDCVR6, {}},
+  {"trcvmidcvr7", TRCVMIDCVR7, {}},
+  {"trccidcctlr0", TRCCIDCCTLR0, {}},
+  {"trccidcctlr1", TRCCIDCCTLR1, {}},
+  {"trcvmidcctlr0", TRCVMIDCCTLR0, {}},
+  {"trcvmidcctlr1", TRCVMIDCCTLR1, {}},
+  {"trcitctrl", TRCITCTRL, {}},
+  {"trcclaimset", TRCCLAIMSET, {}},
+  {"trcclaimclr", TRCCLAIMCLR, {}},
 
   // GICv3 registers
-  {"icc_bpr1_el1", ICC_BPR1_EL1},
-  {"icc_bpr0_el1", ICC_BPR0_EL1},
-  {"icc_pmr_el1", ICC_PMR_EL1},
-  {"icc_ctlr_el1", ICC_CTLR_EL1},
-  {"icc_ctlr_el3", ICC_CTLR_EL3},
-  {"icc_sre_el1", ICC_SRE_EL1},
-  {"icc_sre_el2", ICC_SRE_EL2},
-  {"icc_sre_el3", ICC_SRE_EL3},
-  {"icc_igrpen0_el1", ICC_IGRPEN0_EL1},
-  {"icc_igrpen1_el1", ICC_IGRPEN1_EL1},
-  {"icc_igrpen1_el3", ICC_IGRPEN1_EL3},
-  {"icc_seien_el1", ICC_SEIEN_EL1},
-  {"icc_ap0r0_el1", ICC_AP0R0_EL1},
-  {"icc_ap0r1_el1", ICC_AP0R1_EL1},
-  {"icc_ap0r2_el1", ICC_AP0R2_EL1},
-  {"icc_ap0r3_el1", ICC_AP0R3_EL1},
-  {"icc_ap1r0_el1", ICC_AP1R0_EL1},
-  {"icc_ap1r1_el1", ICC_AP1R1_EL1},
-  {"icc_ap1r2_el1", ICC_AP1R2_EL1},
-  {"icc_ap1r3_el1", ICC_AP1R3_EL1},
-  {"ich_ap0r0_el2", ICH_AP0R0_EL2},
-  {"ich_ap0r1_el2", ICH_AP0R1_EL2},
-  {"ich_ap0r2_el2", ICH_AP0R2_EL2},
-  {"ich_ap0r3_el2", ICH_AP0R3_EL2},
-  {"ich_ap1r0_el2", ICH_AP1R0_EL2},
-  {"ich_ap1r1_el2", ICH_AP1R1_EL2},
-  {"ich_ap1r2_el2", ICH_AP1R2_EL2},
-  {"ich_ap1r3_el2", ICH_AP1R3_EL2},
-  {"ich_hcr_el2", ICH_HCR_EL2},
-  {"ich_misr_el2", ICH_MISR_EL2},
-  {"ich_vmcr_el2", ICH_VMCR_EL2},
-  {"ich_vseir_el2", ICH_VSEIR_EL2},
-  {"ich_lr0_el2", ICH_LR0_EL2},
-  {"ich_lr1_el2", ICH_LR1_EL2},
-  {"ich_lr2_el2", ICH_LR2_EL2},
-  {"ich_lr3_el2", ICH_LR3_EL2},
-  {"ich_lr4_el2", ICH_LR4_EL2},
-  {"ich_lr5_el2", ICH_LR5_EL2},
-  {"ich_lr6_el2", ICH_LR6_EL2},
-  {"ich_lr7_el2", ICH_LR7_EL2},
-  {"ich_lr8_el2", ICH_LR8_EL2},
-  {"ich_lr9_el2", ICH_LR9_EL2},
-  {"ich_lr10_el2", ICH_LR10_EL2},
-  {"ich_lr11_el2", ICH_LR11_EL2},
-  {"ich_lr12_el2", ICH_LR12_EL2},
-  {"ich_lr13_el2", ICH_LR13_EL2},
-  {"ich_lr14_el2", ICH_LR14_EL2},
-  {"ich_lr15_el2", ICH_LR15_EL2}
-};
-
-const AArch64NamedImmMapper::Mapping
-AArch64SysReg::SysRegMapper::CycloneSysRegPairs[] = {
-  {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3}
+  {"icc_bpr1_el1", ICC_BPR1_EL1, {}},
+  {"icc_bpr0_el1", ICC_BPR0_EL1, {}},
+  {"icc_pmr_el1", ICC_PMR_EL1, {}},
+  {"icc_ctlr_el1", ICC_CTLR_EL1, {}},
+  {"icc_ctlr_el3", ICC_CTLR_EL3, {}},
+  {"icc_sre_el1", ICC_SRE_EL1, {}},
+  {"icc_sre_el2", ICC_SRE_EL2, {}},
+  {"icc_sre_el3", ICC_SRE_EL3, {}},
+  {"icc_igrpen0_el1", ICC_IGRPEN0_EL1, {}},
+  {"icc_igrpen1_el1", ICC_IGRPEN1_EL1, {}},
+  {"icc_igrpen1_el3", ICC_IGRPEN1_EL3, {}},
+  {"icc_seien_el1", ICC_SEIEN_EL1, {}},
+  {"icc_ap0r0_el1", ICC_AP0R0_EL1, {}},
+  {"icc_ap0r1_el1", ICC_AP0R1_EL1, {}},
+  {"icc_ap0r2_el1", ICC_AP0R2_EL1, {}},
+  {"icc_ap0r3_el1", ICC_AP0R3_EL1, {}},
+  {"icc_ap1r0_el1", ICC_AP1R0_EL1, {}},
+  {"icc_ap1r1_el1", ICC_AP1R1_EL1, {}},
+  {"icc_ap1r2_el1", ICC_AP1R2_EL1, {}},
+  {"icc_ap1r3_el1", ICC_AP1R3_EL1, {}},
+  {"ich_ap0r0_el2", ICH_AP0R0_EL2, {}},
+  {"ich_ap0r1_el2", ICH_AP0R1_EL2, {}},
+  {"ich_ap0r2_el2", ICH_AP0R2_EL2, {}},
+  {"ich_ap0r3_el2", ICH_AP0R3_EL2, {}},
+  {"ich_ap1r0_el2", ICH_AP1R0_EL2, {}},
+  {"ich_ap1r1_el2", ICH_AP1R1_EL2, {}},
+  {"ich_ap1r2_el2", ICH_AP1R2_EL2, {}},
+  {"ich_ap1r3_el2", ICH_AP1R3_EL2, {}},
+  {"ich_hcr_el2", ICH_HCR_EL2, {}},
+  {"ich_misr_el2", ICH_MISR_EL2, {}},
+  {"ich_vmcr_el2", ICH_VMCR_EL2, {}},
+  {"ich_vseir_el2", ICH_VSEIR_EL2, {}},
+  {"ich_lr0_el2", ICH_LR0_EL2, {}},
+  {"ich_lr1_el2", ICH_LR1_EL2, {}},
+  {"ich_lr2_el2", ICH_LR2_EL2, {}},
+  {"ich_lr3_el2", ICH_LR3_EL2, {}},
+  {"ich_lr4_el2", ICH_LR4_EL2, {}},
+  {"ich_lr5_el2", ICH_LR5_EL2, {}},
+  {"ich_lr6_el2", ICH_LR6_EL2, {}},
+  {"ich_lr7_el2", ICH_LR7_EL2, {}},
+  {"ich_lr8_el2", ICH_LR8_EL2, {}},
+  {"ich_lr9_el2", ICH_LR9_EL2, {}},
+  {"ich_lr10_el2", ICH_LR10_EL2, {}},
+  {"ich_lr11_el2", ICH_LR11_EL2, {}},
+  {"ich_lr12_el2", ICH_LR12_EL2, {}},
+  {"ich_lr13_el2", ICH_LR13_EL2, {}},
+  {"ich_lr14_el2", ICH_LR14_EL2, {}},
+  {"ich_lr15_el2", ICH_LR15_EL2, {}},
+
+  // Cyclone registers
+  {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3, {AArch64::ProcCyclone}},
+
+  // v8.1a "Privileged Access Never" extension-specific system registers
+  {"pan", PAN, {AArch64::HasV8_1aOps}},
+
+  // v8.1a "Limited Ordering Regions" extension-specific system registers
+  {"lorsa_el1", LORSA_EL1, {AArch64::HasV8_1aOps}},
+  {"lorea_el1", LOREA_EL1, {AArch64::HasV8_1aOps}},
+  {"lorn_el1", LORN_EL1, {AArch64::HasV8_1aOps}},
+  {"lorc_el1", LORC_EL1, {AArch64::HasV8_1aOps}},
+
+  // v8.1a "Virtualization host extensions" system registers
+  {"ttbr1_el2", TTBR1_EL2, {AArch64::HasV8_1aOps}},
+  {"contextidr_el2", CONTEXTIDR_EL2, {AArch64::HasV8_1aOps}},
+  {"cnthv_tval_el2", CNTHV_TVAL_EL2, {AArch64::HasV8_1aOps}},
+  {"cnthv_cval_el2", CNTHV_CVAL_EL2, {AArch64::HasV8_1aOps}},
+  {"cnthv_ctl_el2", CNTHV_CTL_EL2, {AArch64::HasV8_1aOps}},
+  {"sctlr_el12", SCTLR_EL12, {AArch64::HasV8_1aOps}},
+  {"cpacr_el12", CPACR_EL12, {AArch64::HasV8_1aOps}},
+  {"ttbr0_el12", TTBR0_EL12, {AArch64::HasV8_1aOps}},
+  {"ttbr1_el12", TTBR1_EL12, {AArch64::HasV8_1aOps}},
+  {"tcr_el12", TCR_EL12, {AArch64::HasV8_1aOps}},
+  {"afsr0_el12", AFSR0_EL12, {AArch64::HasV8_1aOps}},
+  {"afsr1_el12", AFSR1_EL12, {AArch64::HasV8_1aOps}},
+  {"esr_el12", ESR_EL12, {AArch64::HasV8_1aOps}},
+  {"far_el12", FAR_EL12, {AArch64::HasV8_1aOps}},
+  {"mair_el12", MAIR_EL12, {AArch64::HasV8_1aOps}},
+  {"amair_el12", AMAIR_EL12, {AArch64::HasV8_1aOps}},
+  {"vbar_el12", VBAR_EL12, {AArch64::HasV8_1aOps}},
+  {"contextidr_el12", CONTEXTIDR_EL12, {AArch64::HasV8_1aOps}},
+  {"cntkctl_el12", CNTKCTL_EL12, {AArch64::HasV8_1aOps}},
+  {"cntp_tval_el02", CNTP_TVAL_EL02, {AArch64::HasV8_1aOps}},
+  {"cntp_ctl_el02", CNTP_CTL_EL02, {AArch64::HasV8_1aOps}},
+  {"cntp_cval_el02", CNTP_CVAL_EL02, {AArch64::HasV8_1aOps}},
+  {"cntv_tval_el02", CNTV_TVAL_EL02, {AArch64::HasV8_1aOps}},
+  {"cntv_ctl_el02", CNTV_CTL_EL02, {AArch64::HasV8_1aOps}},
+  {"cntv_cval_el02", CNTV_CVAL_EL02, {AArch64::HasV8_1aOps}},
+  {"spsr_el12", SPSR_EL12, {AArch64::HasV8_1aOps}},
+  {"elr_el12", ELR_EL12, {AArch64::HasV8_1aOps}},
 };
 
 uint32_t
-AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
+AArch64SysReg::SysRegMapper::fromString(StringRef Name, 
+    const FeatureBitset& FeatureBits, bool &Valid) const {
   std::string NameLower = Name.lower();
 
   // First search the registers shared by all
-  for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
-    if (SysRegPairs[i].Name == NameLower) {
+  for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
+    if (SysRegMappings[i].isNameEqual(NameLower, FeatureBits)) {
       Valid = true;
-      return SysRegPairs[i].Value;
-    }
-  }
-
-  // Next search for target specific registers
-  if (FeatureBits & AArch64::ProcCyclone) {
-    for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
-      if (CycloneSysRegPairs[i].Name == NameLower) {
-        Valid = true;
-        return CycloneSysRegPairs[i].Value;
-      }
+      return SysRegMappings[i].Value;
     }
   }
 
   // Now try the instruction-specific registers (either read-only or
   // write-only).
-  for (unsigned i = 0; i < NumInstPairs; ++i) {
-    if (InstPairs[i].Name == NameLower) {
+  for (unsigned i = 0; i < NumInstMappings; ++i) {
+    if (InstMappings[i].isNameEqual(NameLower, FeatureBits)) {
       Valid = true;
-      return InstPairs[i].Value;
+      return InstMappings[i].Value;
     }
   }
 
@@ -814,28 +850,20 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
 }
 
 std::string
-AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
+AArch64SysReg::SysRegMapper::toString(uint32_t Bits, 
+                                      const FeatureBitset& FeatureBits) const {
   // First search the registers shared by all
-  for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
-    if (SysRegPairs[i].Value == Bits) {
-      return SysRegPairs[i].Name;
-    }
-  }
-
-  // Next search for target specific registers
-  if (FeatureBits & AArch64::ProcCyclone) {
-    for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
-      if (CycloneSysRegPairs[i].Value == Bits) {
-        return CycloneSysRegPairs[i].Name;
-      }
+  for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
+    if (SysRegMappings[i].isValueEqual(Bits, FeatureBits)) {
+      return SysRegMappings[i].Name;
     }
   }
 
   // Now try the instruction-specific registers (either read-only or
   // write-only).
-  for (unsigned i = 0; i < NumInstPairs; ++i) {
-    if (InstPairs[i].Value == Bits) {
-      return InstPairs[i].Name;
+  for (unsigned i = 0; i < NumInstMappings; ++i) {
+    if (InstMappings[i].isValueEqual(Bits, FeatureBits)) {
+      return InstMappings[i].Name;
     }
   }
 
@@ -850,40 +878,40 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
                + "_c" + utostr(CRm) + "_" + utostr(Op2);
 }
 
-const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = {
-  {"ipas2e1is", IPAS2E1IS},
-  {"ipas2le1is", IPAS2LE1IS},
-  {"vmalle1is", VMALLE1IS},
-  {"alle2is", ALLE2IS},
-  {"alle3is", ALLE3IS},
-  {"vae1is", VAE1IS},
-  {"vae2is", VAE2IS},
-  {"vae3is", VAE3IS},
-  {"aside1is", ASIDE1IS},
-  {"vaae1is", VAAE1IS},
-  {"alle1is", ALLE1IS},
-  {"vale1is", VALE1IS},
-  {"vale2is", VALE2IS},
-  {"vale3is", VALE3IS},
-  {"vmalls12e1is", VMALLS12E1IS},
-  {"vaale1is", VAALE1IS},
-  {"ipas2e1", IPAS2E1},
-  {"ipas2le1", IPAS2LE1},
-  {"vmalle1", VMALLE1},
-  {"alle2", ALLE2},
-  {"alle3", ALLE3},
-  {"vae1", VAE1},
-  {"vae2", VAE2},
-  {"vae3", VAE3},
-  {"aside1", ASIDE1},
-  {"vaae1", VAAE1},
-  {"alle1", ALLE1},
-  {"vale1", VALE1},
-  {"vale2", VALE2},
-  {"vale3", VALE3},
-  {"vmalls12e1", VMALLS12E1},
-  {"vaale1", VAALE1}
+const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = {
+  {"ipas2e1is", IPAS2E1IS, {}},
+  {"ipas2le1is", IPAS2LE1IS, {}},
+  {"vmalle1is", VMALLE1IS, {}},
+  {"alle2is", ALLE2IS, {}},
+  {"alle3is", ALLE3IS, {}},
+  {"vae1is", VAE1IS, {}},
+  {"vae2is", VAE2IS, {}},
+  {"vae3is", VAE3IS, {}},
+  {"aside1is", ASIDE1IS, {}},
+  {"vaae1is", VAAE1IS, {}},
+  {"alle1is", ALLE1IS, {}},
+  {"vale1is", VALE1IS, {}},
+  {"vale2is", VALE2IS, {}},
+  {"vale3is", VALE3IS, {}},
+  {"vmalls12e1is", VMALLS12E1IS, {}},
+  {"vaale1is", VAALE1IS, {}},
+  {"ipas2e1", IPAS2E1, {}},
+  {"ipas2le1", IPAS2LE1, {}},
+  {"vmalle1", VMALLE1, {}},
+  {"alle2", ALLE2, {}},
+  {"alle3", ALLE3, {}},
+  {"vae1", VAE1, {}},
+  {"vae2", VAE2, {}},
+  {"vae3", VAE3, {}},
+  {"aside1", ASIDE1, {}},
+  {"vaae1", VAAE1, {}},
+  {"alle1", ALLE1, {}},
+  {"vale1", VALE1, {}},
+  {"vale2", VALE2, {}},
+  {"vale3", VALE3, {}},
+  {"vmalls12e1", VMALLS12E1, {}},
+  {"vaale1", VAALE1, {}}
 };
 
 AArch64TLBI::TLBIMapper::TLBIMapper()
-  : AArch64NamedImmMapper(TLBIPairs, 0) {}
+  : AArch64NamedImmMapper(TLBIMappings, 0) {}
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 6d0337c..7125f14 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -22,6 +22,7 @@
 #include "MCTargetDesc/AArch64MCTargetDesc.h" // For AArch64::X0 and friends.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
@@ -280,22 +281,45 @@ struct AArch64NamedImmMapper {
   struct Mapping {
     const char *Name;
     uint32_t Value;
+    // Set of features this mapping is available for
+    // Zero value of FeatureBitSet means the mapping is always available
+    FeatureBitset FeatureBitSet;
+
+    bool isNameEqual(std::string Other, 
+                     const FeatureBitset& FeatureBits) const {
+      if (FeatureBitSet.any() && 
+          (FeatureBitSet & FeatureBits).none())
+        return false;
+      return Name == Other;
+    }
+
+    bool isValueEqual(uint32_t Other, 
+                      const FeatureBitset& FeatureBits) const {
+      if (FeatureBitSet.any() && 
+          (FeatureBitSet & FeatureBits).none())
+        return false;
+      return Value == Other;
+    }
   };
 
   template<int N>
-  AArch64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
-    : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {}
+  AArch64NamedImmMapper(const Mapping (&Mappings)[N], uint32_t TooBigImm)
+    : Mappings(&Mappings[0]), NumMappings(N), TooBigImm(TooBigImm) {}
 
-  StringRef toString(uint32_t Value, bool &Valid) const;
-  uint32_t fromString(StringRef Name, bool &Valid) const;
+  // Maps value to string, depending on availability for FeatureBits given
+  StringRef toString(uint32_t Value, const FeatureBitset& FeatureBits,
+                     bool &Valid) const;
+  // Maps string to value, depending on availability for FeatureBits given
+  uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits, 
+                     bool &Valid) const;
 
   /// Many of the instructions allow an alternative assembly form consisting of
   /// a simple immediate. Currently the only valid forms are ranges [0, N) where
   /// N being 0 indicates no immediate syntax-form is allowed.
   bool validImm(uint32_t Value) const;
 protected:
-  const Mapping *Pairs;
-  size_t NumPairs;
+  const Mapping *Mappings;
+  size_t NumMappings;
   uint32_t TooBigImm;
 };
 
@@ -317,7 +341,7 @@ namespace AArch64AT {
   };
 
   struct ATMapper : AArch64NamedImmMapper {
-    const static Mapping ATPairs[];
+    const static Mapping ATMappings[];
 
     ATMapper();
   };
@@ -341,7 +365,7 @@ namespace AArch64DB {
   };
 
   struct DBarrierMapper : AArch64NamedImmMapper {
-    const static Mapping DBarrierPairs[];
+    const static Mapping DBarrierMappings[];
 
     DBarrierMapper();
   };
@@ -361,7 +385,7 @@ namespace  AArch64DC {
   };
 
   struct DCMapper : AArch64NamedImmMapper {
-    const static Mapping DCPairs[];
+    const static Mapping DCMappings[];
 
     DCMapper();
   };
@@ -378,7 +402,7 @@ namespace  AArch64IC {
 
 
   struct ICMapper : AArch64NamedImmMapper {
-    const static Mapping ICPairs[];
+    const static Mapping ICMappings[];
 
     ICMapper();
   };
@@ -394,7 +418,7 @@ namespace  AArch64ISB {
     SY = 0xf
   };
   struct ISBMapper : AArch64NamedImmMapper {
-    const static Mapping ISBPairs[];
+    const static Mapping ISBMappings[];
 
     ISBMapper();
   };
@@ -424,7 +448,7 @@ namespace AArch64PRFM {
   };
 
   struct PRFMMapper : AArch64NamedImmMapper {
-    const static Mapping PRFMPairs[];
+    const static Mapping PRFMMappings[];
 
     PRFMMapper();
   };
@@ -435,11 +459,14 @@ namespace AArch64PState {
     Invalid = -1,
     SPSel = 0x05,
     DAIFSet = 0x1e,
-    DAIFClr = 0x1f
+    DAIFClr = 0x1f,
+
+    // v8.1a "Privileged Access Never" extension-specific PStates
+    PAN = 0x04,
   };
 
   struct PStateMapper : AArch64NamedImmMapper {
-    const static Mapping PStatePairs[];
+    const static Mapping PStateMappings[];
 
     PStateMapper();
   };
@@ -1122,11 +1149,48 @@ namespace AArch64SysReg {
     ICH_LR13_EL2      = 0xe66d, // 11  100  1100  1101  101
     ICH_LR14_EL2      = 0xe66e, // 11  100  1100  1101  110
     ICH_LR15_EL2      = 0xe66f, // 11  100  1100  1101  111
-  };
 
-  // Cyclone specific system registers
-  enum CycloneSysRegValues {
-    CPM_IOACC_CTL_EL3 = 0xff90
+    // v8.1a "Privileged Access Never" extension-specific system registers
+    PAN               = 0xc213, // 11  000  0100  0010  011
+
+    // v8.1a "Limited Ordering Regions" extension-specific system registers
+    LORSA_EL1         = 0xc520, // 11  000  1010  0100  000
+    LOREA_EL1         = 0xc521, // 11  000  1010  0100  001
+    LORN_EL1          = 0xc522, // 11  000  1010  0100  010
+    LORC_EL1          = 0xc523, // 11  000  1010  0100  011
+    LORID_EL1         = 0xc527, // 11  000  1010  0100  111
+
+    // v8.1a "Virtualization host extensions" system registers
+    TTBR1_EL2         = 0xe101, // 11  100  0010  0000  001
+    CONTEXTIDR_EL2    = 0xe681, // 11  100  1101  0000  001
+    CNTHV_TVAL_EL2    = 0xe718, // 11  100  1110  0011  000
+    CNTHV_CVAL_EL2    = 0xe71a, // 11  100  1110  0011  010
+    CNTHV_CTL_EL2     = 0xe719, // 11  100  1110  0011  001
+    SCTLR_EL12        = 0xe880, // 11  101  0001  0000  000
+    CPACR_EL12        = 0xe882, // 11  101  0001  0000  010
+    TTBR0_EL12        = 0xe900, // 11  101  0010  0000  000
+    TTBR1_EL12        = 0xe901, // 11  101  0010  0000  001
+    TCR_EL12          = 0xe902, // 11  101  0010  0000  010
+    AFSR0_EL12        = 0xea88, // 11  101  0101  0001  000
+    AFSR1_EL12        = 0xea89, // 11  101  0101  0001  001
+    ESR_EL12          = 0xea90, // 11  101  0101  0010  000
+    FAR_EL12          = 0xeb00, // 11  101  0110  0000  000
+    MAIR_EL12         = 0xed10, // 11  101  1010  0010  000
+    AMAIR_EL12        = 0xed18, // 11  101  1010  0011  000
+    VBAR_EL12         = 0xee00, // 11  101  1100  0000  000
+    CONTEXTIDR_EL12   = 0xee81, // 11  101  1101  0000  001
+    CNTKCTL_EL12      = 0xef08, // 11  101  1110  0001  000
+    CNTP_TVAL_EL02    = 0xef10, // 11  101  1110  0010  000
+    CNTP_CTL_EL02     = 0xef11, // 11  101  1110  0010  001
+    CNTP_CVAL_EL02    = 0xef12, // 11  101  1110  0010  010
+    CNTV_TVAL_EL02    = 0xef18, // 11  101  1110  0011  000
+    CNTV_CTL_EL02     = 0xef19, // 11  101  1110  0011  001
+    CNTV_CVAL_EL02    = 0xef1a, // 11  101  1110  0011  010
+    SPSR_EL12         = 0xea00, // 11  101  0100  0000  000
+    ELR_EL12          = 0xea01, // 11  101  0100  0000  001
+
+    // Cyclone specific system registers
+    CPM_IOACC_CTL_EL3 = 0xff90,
   };
 
   // Note that these do not inherit from AArch64NamedImmMapper. This class is
@@ -1134,26 +1198,25 @@ namespace AArch64SysReg {
   // burdening the common AArch64NamedImmMapper with abstractions only needed in
   // this one case.
   struct SysRegMapper {
-    static const AArch64NamedImmMapper::Mapping SysRegPairs[];
-    static const AArch64NamedImmMapper::Mapping CycloneSysRegPairs[];
+    static const AArch64NamedImmMapper::Mapping SysRegMappings[];
 
-    const AArch64NamedImmMapper::Mapping *InstPairs;
-    size_t NumInstPairs;
-    uint64_t FeatureBits;
+    const AArch64NamedImmMapper::Mapping *InstMappings;
+    size_t NumInstMappings;
 
-    SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { }
-    uint32_t fromString(StringRef Name, bool &Valid) const;
-    std::string toString(uint32_t Bits) const;
+    SysRegMapper() { }
+    uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits,
+                        bool &Valid) const;
+    std::string toString(uint32_t Bits, const FeatureBitset& FeatureBits) const;
   };
 
   struct MSRMapper : SysRegMapper {
-    static const AArch64NamedImmMapper::Mapping MSRPairs[];
-    MSRMapper(uint64_t FeatureBits);
+    static const AArch64NamedImmMapper::Mapping MSRMappings[];
+    MSRMapper();
   };
 
   struct MRSMapper : SysRegMapper {
-    static const AArch64NamedImmMapper::Mapping MRSPairs[];
-    MRSMapper(uint64_t FeatureBits);
+    static const AArch64NamedImmMapper::Mapping MRSMappings[];
+    MRSMapper();
   };
 
   uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
@@ -1197,7 +1260,7 @@ namespace AArch64TLBI {
   };
 
   struct TLBIMapper : AArch64NamedImmMapper {
-    const static Mapping TLBIPairs[];
+    const static Mapping TLBIMappings[];
 
     TLBIMapper();
   };
diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
index 387f1f6..7a1865c 100644
--- a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -27,12 +27,15 @@
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <map>
@@ -678,8 +681,13 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
 }
 
 bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
-  TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
-  TRI = Fn.getSubtarget().getRegisterInfo();
+  const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
+  // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
+  // enabled when NEON is available.
+  if (!(STI.isCortexA15() && STI.hasNEON()))
+    return false;
+  TII = STI.getInstrInfo();
+  TRI = STI.getRegisterInfo();
   MRI = &Fn.getRegInfo();
   bool Modified = false;
 
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
index 02db53a..d3cc068 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -34,16 +34,12 @@ FunctionPass *createA15SDOptimizerPass();
 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
 FunctionPass *createARMExpandPseudoPass();
 FunctionPass *createARMGlobalBaseRegPass();
-FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
 FunctionPass *createARMConstantIslandPass();
 FunctionPass *createMLxExpansionPass();
 FunctionPass *createThumb2ITBlockPass();
 FunctionPass *createARMOptimizeBarriersPass();
 FunctionPass *createThumb2SizeReductionPass();
 
-/// \brief Creates an ARM-specific Target Transformation Info pass.
-ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM);
-
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
 
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index 244014b..c7ea18a 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -23,6 +23,9 @@ include "llvm/Target/Target.td"
 def ModeThumb  : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
                                   "Thumb mode">;
 
+def ModeSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
+                                     "Use software floating point features.">;
+
 //===----------------------------------------------------------------------===//
 // ARM Subtarget features.
 //
@@ -162,9 +165,12 @@ def HasV6Ops    : SubtargetFeature<"v6", "HasV6Ops", "true",
 def HasV6MOps   : SubtargetFeature<"v6m", "HasV6MOps", "true",
                                    "Support ARM v6M instructions",
                                    [HasV6Ops]>;
+def HasV6KOps   : SubtargetFeature<"v6k", "HasV6KOps", "true",
+                                   "Support ARM v6k instructions",
+                                   [HasV6Ops]>;
 def HasV6T2Ops  : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
                                    "Support ARM v6t2 instructions",
-                                   [HasV6MOps, FeatureThumb2]>;
+                                   [HasV6MOps, HasV6KOps, FeatureThumb2]>;
 def HasV7Ops    : SubtargetFeature<"v7", "HasV7Ops", "true",
                                    "Support ARM v7 instructions",
                                    [HasV6T2Ops, FeaturePerfMon]>;
@@ -172,6 +178,9 @@ def HasV8Ops    : SubtargetFeature<"v8", "HasV8Ops", "true",
                                    "Support ARM v8 instructions",
                                    [HasV7Ops, FeatureVirtualization,
                                     FeatureMP]>;
+def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
+                                   "Support ARM v8.1a instructions",
+                                   [HasV8Ops, FeatureAClass, FeatureCRC]>;
 
 //===----------------------------------------------------------------------===//
 // ARM Processors supported.
@@ -249,6 +258,14 @@ def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
                                     FeatureTrustZone, FeatureT2XtPk,
                                     FeatureCrypto, FeatureCRC]>;
 
+def ProcR4      : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
+                                   "Cortex-R4 ARM processors",
+                                   [FeatureHWDiv,
+                                    FeatureAvoidPartialCPSR,
+                                    FeatureDSPThumb2, FeatureT2XtPk,
+                                    HasV7Ops, FeatureDB, FeatureHasRAS,
+                                    FeatureRClass]>;
+
 def ProcR5      : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
                                    "Cortex-R5 ARM processors",
                                    [FeatureSlowFPBrcc,
@@ -315,16 +332,24 @@ def : ProcNoItin<"iwmmxt",          [HasV5TEOps]>;
 def : Processor<"arm1136j-s",       ARMV6Itineraries, [HasV6Ops]>;
 def : Processor<"arm1136jf-s",      ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
                                                        FeatureHasSlowFPVMLx]>;
-def : Processor<"arm1176jz-s",      ARMV6Itineraries, [HasV6Ops]>;
-def : Processor<"arm1176jzf-s",     ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
-                                                       FeatureHasSlowFPVMLx]>;
-def : Processor<"mpcorenovfp",      ARMV6Itineraries, [HasV6Ops]>;
-def : Processor<"mpcore",           ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
-                                                       FeatureHasSlowFPVMLx]>;
 
 // V6M Processors.
 def : Processor<"cortex-m0",        ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
                                                        FeatureDB, FeatureMClass]>;
+def : Processor<"cortex-m0plus",    ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
+                                                       FeatureDB, FeatureMClass]>;
+def : Processor<"cortex-m1",        ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
+                                                       FeatureDB, FeatureMClass]>;
+def : Processor<"sc000",            ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
+                                                       FeatureDB, FeatureMClass]>;
+
+// V6K Processors.
+def : Processor<"arm1176jz-s",      ARMV6Itineraries, [HasV6KOps]>;
+def : Processor<"arm1176jzf-s",     ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
+                                                       FeatureHasSlowFPVMLx]>;
+def : Processor<"mpcorenovfp",      ARMV6Itineraries, [HasV6KOps]>;
+def : Processor<"mpcore",           ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
+                                                       FeatureHasSlowFPVMLx]>;
 
 // V6T2 Processors.
 def : Processor<"arm1156t2-s",      ARMV6Itineraries, [HasV6T2Ops,
@@ -377,6 +402,16 @@ def : ProcessorModel<"krait",       CortexA9Model,
                                      FeatureDSPThumb2, FeatureHasRAS,
                                      FeatureAClass]>;
 
+// FIXME: R4 has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r4",   CortexA8Model,
+                                    [ProcR4]>;
+
+// FIXME: R4F has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r4f",  CortexA8Model,
+                                    [ProcR4,
+                                     FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+                                     FeatureVFP3, FeatureVFPOnlySP, FeatureD16]>;
+
 // FIXME: R5 has currently the same ProcessorModel as A8.
 def : ProcessorModel<"cortex-r5",   CortexA8Model,
                                     [ProcR5, HasV7Ops, FeatureDB,
@@ -384,10 +419,20 @@ def : ProcessorModel<"cortex-r5",   CortexA8Model,
                                      FeatureHasRAS, FeatureVFPOnlySP,
                                      FeatureD16, FeatureRClass]>;
 
+// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
+def : ProcessorModel<"cortex-r7",   CortexA8Model,
+                                    [ProcR5, HasV7Ops, FeatureDB,
+                                     FeatureVFP3, FeatureDSPThumb2,
+                                     FeatureHasRAS, FeatureVFPOnlySP,
+                                     FeatureD16, FeatureMP, FeatureRClass]>;
+
 // V7M Processors.
 def : ProcNoItin<"cortex-m3",       [HasV7Ops,
                                      FeatureThumb2, FeatureNoARM, FeatureDB,
                                      FeatureHWDiv, FeatureMClass]>;
+def : ProcNoItin<"sc300",           [HasV7Ops,
+                                     FeatureThumb2, FeatureNoARM, FeatureDB,
+                                     FeatureHWDiv, FeatureMClass]>;
 
 // V7EM Processors.
 def : ProcNoItin<"cortex-m4",       [HasV7Ops,
@@ -416,6 +461,10 @@ def : ProcNoItin<"cortex-a53",      [ProcA53, HasV8Ops, FeatureAClass,
 def : ProcNoItin<"cortex-a57",      [ProcA57, HasV8Ops, FeatureAClass,
                                     FeatureDB, FeatureFPARMv8,
                                     FeatureNEON, FeatureDSPThumb2]>;
+// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
+def : ProcNoItin<"cortex-a72",      [ProcA57, HasV8Ops, FeatureAClass,
+                                    FeatureDB, FeatureFPARMv8,
+                                    FeatureNEON, FeatureDSPThumb2]>;
 
 // Cyclone is very similar to swift
 def : ProcessorModel<"cyclone",     SwiftModel,
@@ -444,7 +493,15 @@ def ARMInstrInfo : InstrInfo;
 // Declare the target which we are implementing
 //===----------------------------------------------------------------------===//
 
+def ARMAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  int PassSubtarget = 1;
+  int Variant = 0;
+  bit isMCAsmWriter = 1;
+}
+
 def ARM : Target {
   // Pull in Instruction Info:
   let InstructionSet = ARMInstrInfo;
+  let AssemblyWriters = [ARMAsmWriter];
 }
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index b17d4aa..04503b8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -15,7 +15,6 @@
 #include "ARMAsmPrinter.h"
 #include "ARM.h"
 #include "ARMConstantPoolValue.h"
-#include "ARMFPUName.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMTargetMachine.h"
 #include "ARMTargetObjectFile.h"
@@ -44,6 +43,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -57,27 +57,31 @@ using namespace llvm;
 
 #define DEBUG_TYPE "asm-printer"
 
+ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM,
+                             std::unique_ptr<MCStreamer> Streamer)
+    : AsmPrinter(TM, std::move(Streamer)), AFI(nullptr), MCP(nullptr),
+      InConstantPool(false) {}
+
 void ARMAsmPrinter::EmitFunctionBodyEnd() {
   // Make sure to terminate any constant pools that were at the end
   // of the function.
   if (!InConstantPool)
     return;
   InConstantPool = false;
-  OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+  OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
 }
 
 void ARMAsmPrinter::EmitFunctionEntryLabel() {
   if (AFI->isThumbFunction()) {
-    OutStreamer.EmitAssemblerFlag(MCAF_Code16);
-    OutStreamer.EmitThumbFunc(CurrentFnSym);
+    OutStreamer->EmitAssemblerFlag(MCAF_Code16);
+    OutStreamer->EmitThumbFunc(CurrentFnSym);
   }
 
-  OutStreamer.EmitLabel(CurrentFnSym);
+  OutStreamer->EmitLabel(CurrentFnSym);
 }
 
 void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
-  uint64_t Size =
-      TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType());
+  uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
   assert(Size && "C++ constructor pointer had zero size!");
 
   const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
@@ -90,7 +94,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
                                              : MCSymbolRefExpr::VK_None),
                                             OutContext);
 
-  OutStreamer.EmitValue(E, Size);
+  OutStreamer->EmitValue(E, Size);
 }
 
 /// runOnMachineFunction - This uses the EmitInstruction()
@@ -99,6 +103,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
 bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   AFI = MF.getInfo<ARMFunctionInfo>();
   MCP = MF.getConstantPool();
+  Subtarget = &MF.getSubtarget<ARMSubtarget>();
 
   SetupMachineFunction(MF);
 
@@ -108,15 +113,12 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
                                             : COFF::IMAGE_SYM_CLASS_EXTERNAL;
     int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
 
-    OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
-    OutStreamer.EmitCOFFSymbolStorageClass(Scl);
-    OutStreamer.EmitCOFFSymbolType(Type);
-    OutStreamer.EndCOFFSymbolDef();
+    OutStreamer->BeginCOFFSymbolDef(CurrentFnSym);
+    OutStreamer->EmitCOFFSymbolStorageClass(Scl);
+    OutStreamer->EmitCOFFSymbolType(Type);
+    OutStreamer->EndCOFFSymbolDef();
   }
 
-  // Have common code print out the function header with linkage info etc.
-  EmitFunctionHeader();
-
   // Emit the rest of the function body.
   EmitFunctionBody();
 
@@ -124,11 +126,11 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // These are created per function, rather than per TU, since it's
   // relatively easy to exceed the thumb branch range within a TU.
   if (! ThumbIndirectPads.empty()) {
-    OutStreamer.EmitAssemblerFlag(MCAF_Code16);
+    OutStreamer->EmitAssemblerFlag(MCAF_Code16);
     EmitAlignment(1);
     for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) {
-      OutStreamer.EmitLabel(ThumbIndirectPads[i].second);
-      EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX)
+      OutStreamer->EmitLabel(ThumbIndirectPads[i].second);
+      EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBX)
         .addReg(ThumbIndirectPads[i].first)
         // Add predicate operands.
         .addImm(ARMCC::AL)
@@ -142,7 +144,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 }
 
 void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
-                                 raw_ostream &O, const char *Modifier) {
+                                 raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(OpNum);
   unsigned TF = MO.getTargetFlags();
 
@@ -163,11 +165,9 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   case MachineOperand::MO_Immediate: {
     int64_t Imm = MO.getImm();
     O << '#';
-    if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
-        (TF == ARMII::MO_LO16))
+    if (TF == ARMII::MO_LO16)
       O << ":lower16:";
-    else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
-             (TF == ARMII::MO_HI16))
+    else if (TF == ARMII::MO_HI16)
       O << ":upper16:";
     O << Imm;
     break;
@@ -177,11 +177,9 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     return;
   case MachineOperand::MO_GlobalAddress: {
     const GlobalValue *GV = MO.getGlobal();
-    if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
-        (TF & ARMII::MO_LO16))
+    if (TF & ARMII::MO_LO16)
       O << ":lower16:";
-    else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
-             (TF & ARMII::MO_HI16))
+    else if (TF & ARMII::MO_HI16)
       O << ":upper16:";
     O << *GetARMGVSymbol(GV, TF);
 
@@ -199,21 +197,21 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
 //===--------------------------------------------------------------------===//
 
 MCSymbol *ARMAsmPrinter::
-GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+GetARMJTIPICJumpTableLabel(unsigned uid) const {
+  const DataLayout *DL = TM.getDataLayout();
   SmallString<60> Name;
   raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
-    << getFunctionNumber() << '_' << uid << '_' << uid2;
-  return OutContext.GetOrCreateSymbol(Name.str());
+                            << getFunctionNumber() << '_' << uid;
+  return OutContext.getOrCreateSymbol(Name);
 }
 
 
 MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
   SmallString<60> Name;
   raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH"
     << getFunctionNumber();
-  return OutContext.GetOrCreateSymbol(Name.str());
+  return OutContext.getOrCreateSymbol(Name);
 }
 
 bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
@@ -417,7 +415,7 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 }
 
 static bool isThumb(const MCSubtargetInfo& STI) {
-  return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+  return STI.getFeatureBits()[ARM::ModeThumb];
 }
 
 void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
@@ -426,79 +424,28 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
   // the start mode, then restore the start mode.
   const bool WasThumb = isThumb(StartInfo);
   if (!EndInfo || WasThumb != isThumb(*EndInfo)) {
-    OutStreamer.EmitAssemblerFlag(WasThumb ? MCAF_Code16 : MCAF_Code32);
+    OutStreamer->EmitAssemblerFlag(WasThumb ? MCAF_Code16 : MCAF_Code32);
   }
 }
 
 void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
-  if (Subtarget->isTargetMachO()) {
-    Reloc::Model RelocM = TM.getRelocationModel();
-    if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) {
-      // Declare all the text sections up front (before the DWARF sections
-      // emitted by AsmPrinter::doInitialization) so the assembler will keep
-      // them together at the beginning of the object file.  This helps
-      // avoid out-of-range branches that are due a fundamental limitation of
-      // the way symbol offsets are encoded with the current Darwin ARM
-      // relocations.
-      const TargetLoweringObjectFileMachO &TLOFMacho =
-        static_cast<const TargetLoweringObjectFileMachO &>(
-          getObjFileLowering());
-
-      // Collect the set of sections our functions will go into.
-      SetVector<const MCSection *, SmallVector<const MCSection *, 8>,
-        SmallPtrSet<const MCSection *, 8> > TextSections;
-      // Default text section comes first.
-      TextSections.insert(TLOFMacho.getTextSection());
-      // Now any user defined text sections from function attributes.
-      for (Module::iterator F = M.begin(), e = M.end(); F != e; ++F)
-        if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage())
-          TextSections.insert(TLOFMacho.SectionForGlobal(F, *Mang, TM));
-      // Now the coalescable sections.
-      TextSections.insert(TLOFMacho.getTextCoalSection());
-      TextSections.insert(TLOFMacho.getConstTextCoalSection());
-
-      // Emit the sections in the .s file header to fix the order.
-      for (unsigned i = 0, e = TextSections.size(); i != e; ++i)
-        OutStreamer.SwitchSection(TextSections[i]);
-
-      if (RelocM == Reloc::DynamicNoPIC) {
-        const MCSection *sect =
-          OutContext.getMachOSection("__TEXT", "__symbol_stub4",
-                                     MachO::S_SYMBOL_STUBS,
-                                     12, SectionKind::getText());
-        OutStreamer.SwitchSection(sect);
-      } else {
-        const MCSection *sect =
-          OutContext.getMachOSection("__TEXT", "__picsymbolstub4",
-                                     MachO::S_SYMBOL_STUBS,
-                                     16, SectionKind::getText());
-        OutStreamer.SwitchSection(sect);
-      }
-      const MCSection *StaticInitSect =
-        OutContext.getMachOSection("__TEXT", "__StaticInit",
-                                   MachO::S_REGULAR |
-                                   MachO::S_ATTR_PURE_INSTRUCTIONS,
-                                   SectionKind::getText());
-      OutStreamer.SwitchSection(StaticInitSect);
-    }
-
-    // Compiling with debug info should not affect the code
-    // generation.  Ensure the cstring section comes before the
-    // optional __DWARF secion. Otherwise, PC-relative loads would
-    // have to use different instruction sequences at "-g" in order to
-    // reach global data in the same object file.
-    OutStreamer.SwitchSection(getObjFileLowering().getCStringSection());
-  }
-
+  Triple TT(TM.getTargetTriple());
   // Use unified assembler syntax.
-  OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
+  OutStreamer->EmitAssemblerFlag(MCAF_SyntaxUnified);
 
   // Emit ARM Build Attributes
-  if (Subtarget->isTargetELF())
+  if (TT.isOSBinFormatELF())
     emitAttributes();
 
-  if (!M.getModuleInlineAsm().empty() && Subtarget->isThumb())
-    OutStreamer.EmitAssemblerFlag(MCAF_Code16);
+  // Use the triple's architecture and subarchitecture to determine
+  // if we're thumb for the purposes of the top level code16 assembler
+  // flag.
+  bool isThumb = TT.getArch() == Triple::thumb ||
+                 TT.getArch() == Triple::thumbeb ||
+                 TT.getSubArch() == Triple::ARMSubArch_v7m ||
+                 TT.getSubArch() == Triple::ARMSubArch_v6m;
+  if (!M.getModuleInlineAsm().empty() && isThumb)
+    OutStreamer->EmitAssemblerFlag(MCAF_Code16);
 }
 
 static void
@@ -526,7 +473,8 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
 
 
 void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
-  if (Subtarget->isTargetMachO()) {
+  Triple TT(TM.getTargetTriple());
+  if (TT.isOSBinFormatMachO()) {
     // All darwin targets use mach-o.
     const TargetLoweringObjectFileMachO &TLOFMacho =
       static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
@@ -538,26 +486,26 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
 
     if (!Stubs.empty()) {
       // Switch with ".non_lazy_symbol_pointer" directive.
-      OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+      OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
       EmitAlignment(2);
 
       for (auto &Stub : Stubs)
-        emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second);
+        emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
 
       Stubs.clear();
-      OutStreamer.AddBlankLine();
+      OutStreamer->AddBlankLine();
     }
 
     Stubs = MMIMacho.GetHiddenGVStubList();
     if (!Stubs.empty()) {
-      OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+      OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
       EmitAlignment(2);
 
       for (auto &Stub : Stubs)
-        emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second);
+        emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
 
       Stubs.clear();
-      OutStreamer.AddBlankLine();
+      OutStreamer->AddBlankLine();
     }
 
     // Funny Darwin hack: This flag tells the linker that no global symbols
@@ -565,29 +513,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
     // implementation of multiple entry points).  If this doesn't occur, the
     // linker can safely perform dead code stripping.  Since LLVM never
     // generates code that does this, it is always safe to set.
-    OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
-  }
-
-  // Emit a .data.rel section containing any stubs that were created.
-  if (Subtarget->isTargetELF()) {
-    const TargetLoweringObjectFileELF &TLOFELF =
-      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
-    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    // Output stubs for external and common global variables.
-    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-    if (!Stubs.empty()) {
-      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
-      const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
-
-      for (auto &stub: Stubs) {
-        OutStreamer.EmitLabel(stub.first);
-        OutStreamer.EmitSymbolValue(stub.second.getPointer(),
-                                    TD->getPointerSize(0));
-      }
-      Stubs.clear();
-    }
+    OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
   }
 }
 
@@ -626,75 +552,101 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
 }
 
 void ARMAsmPrinter::emitAttributes() {
-  MCTargetStreamer &TS = *OutStreamer.getTargetStreamer();
+  MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
   ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
 
   ATS.emitTextAttribute(ARMBuildAttrs::conformance, "2.09");
 
   ATS.switchVendor("aeabi");
 
-  std::string CPUString = Subtarget->getCPUString();
-
-  // FIXME: remove krait check when GNU tools support krait cpu
-  if (CPUString != "generic" && CPUString != "krait")
-    ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
+  // Compute ARM ELF Attributes based on the default subtarget that
+  // we'd have constructed. The existing ARM behavior isn't LTO clean
+  // anyhow.
+  // FIXME: For ifunc related functions we could iterate over and look
+  // for a feature string that doesn't match the default one.
+  StringRef TT = TM.getTargetTriple();
+  StringRef CPU = TM.getTargetCPU();
+  StringRef FS = TM.getTargetFeatureString();
+  std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
+  if (!FS.empty()) {
+    if (!ArchFS.empty())
+      ArchFS = (Twine(ArchFS) + "," + FS).str();
+    else
+      ArchFS = FS;
+  }
+  const ARMBaseTargetMachine &ATM =
+      static_cast<const ARMBaseTargetMachine &>(TM);
+  const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian());
+
+  std::string CPUString = STI.getCPUString();
+
+  if (CPUString.find("generic") != 0) { //CPUString doesn't start with "generic"
+    // FIXME: remove krait check when GNU tools support krait cpu
+    if (STI.isKrait()) {
+      ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
+      // We consider krait as a "cortex-a9" + hwdiv CPU
+      // Enable hwdiv through ".arch_extension idiv"
+      if (STI.hasDivide() || STI.hasDivideInARMMode())
+        ATS.emitArchExtension(ARM::AEK_HWDIV);
+    } else
+      ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
+  }
 
-  ATS.emitAttribute(ARMBuildAttrs::CPU_arch,
-                    getArchForCPU(CPUString, Subtarget));
+  ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, &STI));
 
   // Tag_CPU_arch_profile must have the default value of 0 when "Architecture
   // profile is not applicable (e.g. pre v7, or cross-profile code)".
-  if (Subtarget->hasV7Ops()) {
-    if (Subtarget->isAClass()) {
+  if (STI.hasV7Ops()) {
+    if (STI.isAClass()) {
       ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
                         ARMBuildAttrs::ApplicationProfile);
-    } else if (Subtarget->isRClass()) {
+    } else if (STI.isRClass()) {
       ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
                         ARMBuildAttrs::RealTimeProfile);
-    } else if (Subtarget->isMClass()) {
+    } else if (STI.isMClass()) {
       ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
                         ARMBuildAttrs::MicroControllerProfile);
     }
   }
 
-  ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, Subtarget->hasARMOps() ?
-                      ARMBuildAttrs::Allowed : ARMBuildAttrs::Not_Allowed);
-  if (Subtarget->isThumb1Only()) {
-    ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
-                      ARMBuildAttrs::Allowed);
-  } else if (Subtarget->hasThumb2()) {
+  ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use,
+                    STI.hasARMOps() ? ARMBuildAttrs::Allowed
+                                    : ARMBuildAttrs::Not_Allowed);
+  if (STI.isThumb1Only()) {
+    ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed);
+  } else if (STI.hasThumb2()) {
     ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
                       ARMBuildAttrs::AllowThumb32);
   }
 
-  if (Subtarget->hasNEON()) {
+  if (STI.hasNEON()) {
     /* NEON is not exactly a VFP architecture, but GAS emit one of
      * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
-    if (Subtarget->hasFPARMv8()) {
-      if (Subtarget->hasCrypto())
-        ATS.emitFPU(ARM::CRYPTO_NEON_FP_ARMV8);
+    if (STI.hasFPARMv8()) {
+      if (STI.hasCrypto())
+        ATS.emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8);
       else
-        ATS.emitFPU(ARM::NEON_FP_ARMV8);
-    }
-    else if (Subtarget->hasVFP4())
-      ATS.emitFPU(ARM::NEON_VFPV4);
+        ATS.emitFPU(ARM::FK_NEON_FP_ARMV8);
+    } else if (STI.hasVFP4())
+      ATS.emitFPU(ARM::FK_NEON_VFPV4);
     else
-      ATS.emitFPU(ARM::NEON);
+      ATS.emitFPU(ARM::FK_NEON);
     // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
-    if (Subtarget->hasV8Ops())
+    if (STI.hasV8Ops())
       ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
-                        ARMBuildAttrs::AllowNeonARMv8);
+                        STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a:
+                                            ARMBuildAttrs::AllowNeonARMv8);
   } else {
-    if (Subtarget->hasFPARMv8())
+    if (STI.hasFPARMv8())
       // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
       // FPU, but there are two different names for it depending on the CPU.
-      ATS.emitFPU(Subtarget->hasD16() ? ARM::FPV5_D16 : ARM::FP_ARMV8);
-    else if (Subtarget->hasVFP4())
-      ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4);
-    else if (Subtarget->hasVFP3())
-      ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV3_D16 : ARM::VFPV3);
-    else if (Subtarget->hasVFP2())
-      ATS.emitFPU(ARM::VFPV2);
+      ATS.emitFPU(STI.hasD16() ? ARM::FK_FPV5_D16 : ARM::FK_FP_ARMV8);
+    else if (STI.hasVFP4())
+      ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV4_D16 : ARM::FK_VFPV4);
+    else if (STI.hasVFP3())
+      ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV3_D16 : ARM::FK_VFPV3);
+    else if (STI.hasVFP2())
+      ATS.emitFPU(ARM::FK_VFPV2);
   }
 
   if (TM.getRelocationModel() == Reloc::PIC_) {
@@ -715,26 +667,24 @@ void ARMAsmPrinter::emitAttributes() {
   if (!TM.Options.UnsafeFPMath) {
     ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
                       ARMBuildAttrs::IEEEDenormals);
-    ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
-                      ARMBuildAttrs::Allowed);
+    ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed);
 
     // If the user has permitted this code to choose the IEEE 754
     // rounding at run-time, emit the rounding attribute.
     if (TM.Options.HonorSignDependentRoundingFPMathOption)
-      ATS.emitAttribute(ARMBuildAttrs::ABI_FP_rounding,
-                        ARMBuildAttrs::Allowed);
+      ATS.emitAttribute(ARMBuildAttrs::ABI_FP_rounding, ARMBuildAttrs::Allowed);
   } else {
-    if (!Subtarget->hasVFP2()) {
+    if (!STI.hasVFP2()) {
       // When the target doesn't have an FPU (by design or
       // intention), the assumptions made on the software support
       // mirror that of the equivalent hardware support *if it
       // existed*. For v7 and better we indicate that denormals are
       // flushed preserving sign, and for V6 we indicate that
       // denormals are flushed to positive zero.
-      if (Subtarget->hasV7Ops())
+      if (STI.hasV7Ops())
         ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
                           ARMBuildAttrs::PreserveFPSign);
-    } else if (Subtarget->hasVFP3()) {
+    } else if (STI.hasVFP3()) {
       // In VFPv4, VFPv4U, VFPv3, or VFPv3U, it is preserved. That is,
       // the sign bit of the zero matches the sign bit of the input or
       // result that is being flushed to zero.
@@ -758,7 +708,7 @@ void ARMAsmPrinter::emitAttributes() {
     ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
                       ARMBuildAttrs::AllowIEE754);
 
-  if (Subtarget->allowsUnalignedMem())
+  if (STI.allowsUnalignedMem())
     ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
                       ARMBuildAttrs::Allowed);
   else
@@ -771,18 +721,18 @@ void ARMAsmPrinter::emitAttributes() {
   ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1);
 
   // ABI_HardFP_use attribute to indicate single precision FP.
-  if (Subtarget->isFPOnlySP())
+  if (STI.isFPOnlySP())
     ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
                       ARMBuildAttrs::HardFPSinglePrecision);
 
   // Hard float.  Use both S and D registers and conform to AAPCS-VFP.
-  if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard)
+  if (STI.isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard)
     ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS);
 
   // FIXME: Should we signal R9 usage?
 
-  if (Subtarget->hasFP16())
-      ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
+  if (STI.hasFP16())
+    ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
 
   // FIXME: To support emitting this build attribute as GCC does, the
   // -mfp16-format option and associated plumbing must be
@@ -791,8 +741,8 @@ void ARMAsmPrinter::emitAttributes() {
   ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format,
                     ARMBuildAttrs::FP16FormatIEEE);
 
-  if (Subtarget->hasMPExtension())
-      ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
+  if (STI.hasMPExtension())
+    ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
 
   // Hardware divide in ARM mode is part of base arch, starting from ARMv8.
   // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
@@ -800,8 +750,8 @@ void ARMAsmPrinter::emitAttributes() {
   // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits.
   // AllowDIVExt is only emitted if hwdiv isn't available in the base arch;
   // otherwise, the default value (AllowDIVIfExists) applies.
-  if (Subtarget->hasDivideInARMMode() && !Subtarget->hasV8Ops())
-      ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
+  if (STI.hasDivideInARMMode() && !STI.hasV8Ops())
+    ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
 
   if (MMI) {
     if (const Module *SourceModule = MMI->getModule()) {
@@ -833,22 +783,20 @@ void ARMAsmPrinter::emitAttributes() {
   // it as another callee-saved register, but not as SB or a TLS pointer; It
   // would instead be nicer to push this from the frontend as metadata, as we do
   // for the wchar and enum size tags
-  if (Subtarget->isR9Reserved())
-      ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
-                        ARMBuildAttrs::R9Reserved);
+  if (STI.isR9Reserved())
+    ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9Reserved);
   else
-      ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
-                        ARMBuildAttrs::R9IsGPR);
-
-  if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization())
-      ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
-                        ARMBuildAttrs::AllowTZVirtualization);
-  else if (Subtarget->hasTrustZone())
-      ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
-                        ARMBuildAttrs::AllowTZ);
-  else if (Subtarget->hasVirtualization())
-      ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
-                        ARMBuildAttrs::AllowVirtualization);
+    ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsGPR);
+
+  if (STI.hasTrustZone() && STI.hasVirtualization())
+    ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+                      ARMBuildAttrs::AllowTZVirtualization);
+  else if (STI.hasTrustZone())
+    ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+                      ARMBuildAttrs::AllowTZ);
+  else if (STI.hasVirtualization())
+    ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+                      ARMBuildAttrs::AllowVirtualization);
 
   ATS.finishAttributeSection();
 }
@@ -858,7 +806,7 @@ void ARMAsmPrinter::emitAttributes() {
 static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
                              unsigned LabelId, MCContext &Ctx) {
 
-  MCSymbol *Label = Ctx.GetOrCreateSymbol(Twine(Prefix)
+  MCSymbol *Label = Ctx.getOrCreateSymbol(Twine(Prefix)
                        + "PC" + Twine(FunctionNumber) + "_" + Twine(LabelId));
   return Label;
 }
@@ -908,7 +856,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
     Name = "__imp_";
     getNameWithPrefix(Name, GV);
 
-    return OutContext.GetOrCreateSymbol(Name);
+    return OutContext.getOrCreateSymbol(Name);
   } else if (Subtarget->isTargetELF()) {
     return getSymbol(GV);
   }
@@ -917,18 +865,14 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
 
 void ARMAsmPrinter::
 EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
-  int Size =
-      TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(MCPV->getType());
+  const DataLayout *DL = TM.getDataLayout();
+  int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType());
 
   ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
 
   MCSymbol *MCSym;
   if (ACPV->isLSDA()) {
-    SmallString<128> Str;
-    raw_svector_ostream OS(Str);
-    OS << DL->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
-    MCSym = OutContext.GetOrCreateSymbol(OS.str());
+    MCSym = getCurExceptionSym();
   } else if (ACPV->isBlockAddress()) {
     const BlockAddress *BA =
       cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress();
@@ -968,14 +912,14 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
     if (ACPV->mustAddCurrentAddress()) {
       // We want "(<expr> - .)", but MC doesn't have a concept of the '.'
       // label, so just emit a local label end reference that instead.
-      MCSymbol *DotSym = OutContext.CreateTempSymbol();
-      OutStreamer.EmitLabel(DotSym);
+      MCSymbol *DotSym = OutContext.createTempSymbol();
+      OutStreamer->EmitLabel(DotSym);
       const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
       PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext);
     }
     Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext);
   }
-  OutStreamer.EmitValue(Expr, Size);
+  OutStreamer->EmitValue(Expr, Size);
 }
 
 void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
@@ -987,15 +931,14 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
     OpNum = 3;
 
   const MachineOperand &MO1 = MI->getOperand(OpNum);
-  const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
   unsigned JTI = MO1.getIndex();
 
   // Emit a label for the jump table.
-  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
-  OutStreamer.EmitLabel(JTISymbol);
+  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
+  OutStreamer->EmitLabel(JTISymbol);
 
   // Mark the jump table as data-in-code.
-  OutStreamer.EmitDataRegion(MCDR_DataRegionJT32);
+  OutStreamer->EmitDataRegion(MCDR_DataRegionJT32);
 
   // Emit each entry of the table.
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
@@ -1023,21 +966,20 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
     else if (AFI->isThumbFunction())
       Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(1,OutContext),
                                      OutContext);
-    OutStreamer.EmitValue(Expr, 4);
+    OutStreamer->EmitValue(Expr, 4);
   }
   // Mark the end of jump table data-in-code region.
-  OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+  OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
 }
 
 void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
   unsigned Opcode = MI->getOpcode();
   int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1;
   const MachineOperand &MO1 = MI->getOperand(OpNum);
-  const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
   unsigned JTI = MO1.getIndex();
 
-  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
-  OutStreamer.EmitLabel(JTISymbol);
+  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
+  OutStreamer->EmitLabel(JTISymbol);
 
   // Emit each entry of the table.
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
@@ -1047,11 +989,11 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
   if (MI->getOpcode() == ARM::t2TBB_JT) {
     OffsetWidth = 1;
     // Mark the jump table as data-in-code.
-    OutStreamer.EmitDataRegion(MCDR_DataRegionJT8);
+    OutStreamer->EmitDataRegion(MCDR_DataRegionJT8);
   } else if (MI->getOpcode() == ARM::t2TBH_JT) {
     OffsetWidth = 2;
     // Mark the jump table as data-in-code.
-    OutStreamer.EmitDataRegion(MCDR_DataRegionJT16);
+    OutStreamer->EmitDataRegion(MCDR_DataRegionJT16);
   }
 
   for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
@@ -1060,7 +1002,7 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
                                                           OutContext);
     // If this isn't a TBB or TBH, the entries are direct branch instructions.
     if (OffsetWidth == 4) {
-      EmitToStreamer(OutStreamer, MCInstBuilder(ARM::t2B)
+      EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B)
         .addExpr(MBBSymbolExpr)
         .addImm(ARMCC::AL)
         .addReg(0));
@@ -1081,20 +1023,20 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
                               OutContext);
     Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext),
                                    OutContext);
-    OutStreamer.EmitValue(Expr, OffsetWidth);
+    OutStreamer->EmitValue(Expr, OffsetWidth);
   }
   // Mark the end of jump table data-in-code region. 32-bit offsets use
   // actual branch instructions here, so we don't mark those as a data-region
   // at all.
   if (OffsetWidth != 4)
-    OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+    OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
 }
 
 void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
   assert(MI->getFlag(MachineInstr::FrameSetup) &&
       "Only instruction which are involved into frame setup code are allowed");
 
-  MCTargetStreamer &TS = *OutStreamer.getTargetStreamer();
+  MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
   ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
   const MachineFunction &MF = *MI->getParent()->getParent();
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -1235,11 +1177,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
 #include "ARMGenMCPseudoLowering.inc"
 
 void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
 
   // If we just ended a constant pool, mark it as such.
   if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
-    OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+    OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
     InConstantPool = false;
   }
 
@@ -1249,7 +1191,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     EmitUnwindingInstruction(MI);
 
   // Do any auto-generated pseudo lowerings.
-  if (emitPseudoExpansionLowering(OutStreamer, MI))
+  if (emitPseudoExpansionLowering(*OutStreamer, MI))
     return;
 
   assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
@@ -1265,8 +1207,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case ARM::t2LEApcrel: {
     // FIXME: Need to also handle globals and externals
     MCSymbol *CPISymbol = GetCPISymbol(MI->getOperand(1).getIndex());
-    EmitToStreamer(OutStreamer, MCInstBuilder(MI->getOpcode() ==
-                                              ARM::t2LEApcrel ? ARM::t2ADR
+    EmitToStreamer(*OutStreamer, MCInstBuilder(MI->getOpcode() ==
+                                               ARM::t2LEApcrel ? ARM::t2ADR
                   : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
                      : ARM::ADR))
       .addReg(MI->getOperand(0).getReg())
@@ -1280,23 +1222,22 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case ARM::tLEApcrelJT:
   case ARM::t2LEApcrelJT: {
     MCSymbol *JTIPICSymbol =
-      GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
-                                  MI->getOperand(2).getImm());
-    EmitToStreamer(OutStreamer, MCInstBuilder(MI->getOpcode() ==
-                                              ARM::t2LEApcrelJT ? ARM::t2ADR
+      GetARMJTIPICJumpTableLabel(MI->getOperand(1).getIndex());
+    EmitToStreamer(*OutStreamer, MCInstBuilder(MI->getOpcode() ==
+                                               ARM::t2LEApcrelJT ? ARM::t2ADR
                   : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
                      : ARM::ADR))
       .addReg(MI->getOperand(0).getReg())
       .addExpr(MCSymbolRefExpr::Create(JTIPICSymbol, OutContext))
       // Add predicate operands.
-      .addImm(MI->getOperand(3).getImm())
-      .addReg(MI->getOperand(4).getReg()));
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(MI->getOperand(3).getReg()));
     return;
   }
   // Darwin call instructions are just normal call instructions with different
   // clobber semantics (they clobber R9).
   case ARM::BX_CALL: {
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::MOVr)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVr)
       .addReg(ARM::LR)
       .addReg(ARM::PC)
       // Add predicate operands.
@@ -1305,7 +1246,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       // Add 's' bit operand (always reg0 for this)
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::BX)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX)
       .addReg(MI->getOperand(0).getReg()));
     return;
   }
@@ -1329,19 +1270,19 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
 
     if (!TRegSym) {
-      TRegSym = OutContext.CreateTempSymbol();
+      TRegSym = OutContext.createTempSymbol();
       ThumbIndirectPads.push_back(std::make_pair(TReg, TRegSym));
     }
 
     // Create a link-saving branch to the Reg Indirect Jump Pad.
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBL)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBL)
         // Predicate comes first here.
         .addImm(ARMCC::AL).addReg(0)
         .addExpr(MCSymbolRefExpr::Create(TRegSym, OutContext)));
     return;
   }
   case ARM::BMOVPCRX_CALL: {
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::MOVr)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVr)
       .addReg(ARM::LR)
       .addReg(ARM::PC)
       // Add predicate operands.
@@ -1350,7 +1291,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       // Add 's' bit operand (always reg0 for this)
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::MOVr)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVr)
       .addReg(ARM::PC)
       .addReg(MI->getOperand(0).getReg())
       // Add predicate operands.
@@ -1361,7 +1302,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
   case ARM::BMOVPCB_CALL: {
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::MOVr)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVr)
       .addReg(ARM::LR)
       .addReg(ARM::PC)
       // Add predicate operands.
@@ -1375,7 +1316,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const unsigned TF = Op.getTargetFlags();
     MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
     const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::Bcc)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc)
       .addExpr(GVSymExpr)
       // Add predicate operands.
       .addImm(ARMCC::AL)
@@ -1386,7 +1327,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case ARM::t2MOVi16_ga_pcrel: {
     MCInst TmpInst;
     TmpInst.setOpcode(Opc == ARM::MOVi16_ga_pcrel? ARM::MOVi16 : ARM::t2MOVi16);
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
 
     unsigned TF = MI->getOperand(1).getTargetFlags();
     const GlobalValue *GV = MI->getOperand(1).getGlobal();
@@ -1403,14 +1344,14 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                                       MCBinaryExpr::CreateAdd(LabelSymExpr,
                                       MCConstantExpr::Create(PCAdj, OutContext),
                                       OutContext), OutContext), OutContext);
-      TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+      TmpInst.addOperand(MCOperand::createExpr(PCRelExpr));
 
     // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
+    TmpInst.addOperand(MCOperand::createImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::createReg(0));
     // Add 's' bit operand (always reg0 for this)
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    EmitToStreamer(OutStreamer, TmpInst);
+    TmpInst.addOperand(MCOperand::createReg(0));
+    EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
   case ARM::MOVTi16_ga_pcrel:
@@ -1418,8 +1359,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     MCInst TmpInst;
     TmpInst.setOpcode(Opc == ARM::MOVTi16_ga_pcrel
                       ? ARM::MOVTi16 : ARM::t2MOVTi16);
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+    TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+    TmpInst.addOperand(MCOperand::createReg(MI->getOperand(1).getReg()));
 
     unsigned TF = MI->getOperand(2).getTargetFlags();
     const GlobalValue *GV = MI->getOperand(2).getGlobal();
@@ -1436,13 +1377,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                                    MCBinaryExpr::CreateAdd(LabelSymExpr,
                                       MCConstantExpr::Create(PCAdj, OutContext),
                                           OutContext), OutContext), OutContext);
-      TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+      TmpInst.addOperand(MCOperand::createExpr(PCRelExpr));
     // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
+    TmpInst.addOperand(MCOperand::createImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::createReg(0));
     // Add 's' bit operand (always reg0 for this)
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    EmitToStreamer(OutStreamer, TmpInst);
+    TmpInst.addOperand(MCOperand::createReg(0));
+    EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
   case ARM::tPICADD: {
@@ -1452,12 +1393,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // This adds the address of LPC0 to r0.
 
     // Emit the label.
-    OutStreamer.EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
-                          getFunctionNumber(), MI->getOperand(2).getImm(),
-                          OutContext));
+    OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+                                       getFunctionNumber(),
+                                       MI->getOperand(2).getImm(),
+                                       OutContext));
 
     // Form and emit the add.
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tADDhirr)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
       .addReg(MI->getOperand(0).getReg())
       .addReg(MI->getOperand(0).getReg())
       .addReg(ARM::PC)
@@ -1473,12 +1415,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // This adds the address of LPC0 to r0.
 
     // Emit the label.
-    OutStreamer.EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
-                          getFunctionNumber(), MI->getOperand(2).getImm(),
-                          OutContext));
+    OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+                                       getFunctionNumber(),
+                                       MI->getOperand(2).getImm(),
+                                       OutContext));
 
     // Form and emit the add.
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::ADDrr)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDrr)
       .addReg(MI->getOperand(0).getReg())
       .addReg(ARM::PC)
       .addReg(MI->getOperand(1).getReg())
@@ -1504,9 +1447,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // a PC-relative address at the ldr instruction.
 
     // Emit the label.
-    OutStreamer.EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
-                          getFunctionNumber(), MI->getOperand(2).getImm(),
-                          OutContext));
+    OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+                                       getFunctionNumber(),
+                                       MI->getOperand(2).getImm(),
+                                       OutContext));
 
     // Form and emit the load
     unsigned Opcode;
@@ -1522,7 +1466,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
     case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
     }
-    EmitToStreamer(OutStreamer, MCInstBuilder(Opcode)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(Opcode)
       .addReg(MI->getOperand(0).getReg())
       .addReg(ARM::PC)
       .addReg(MI->getOperand(1).getReg())
@@ -1544,11 +1488,11 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
     // If this is the first entry of the pool, mark it.
     if (!InConstantPool) {
-      OutStreamer.EmitDataRegion(MCDR_DataRegion);
+      OutStreamer->EmitDataRegion(MCDR_DataRegion);
       InConstantPool = true;
     }
 
-    OutStreamer.EmitLabel(GetCPISymbol(LabelId));
+    OutStreamer->EmitLabel(GetCPISymbol(LabelId));
 
     const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
     if (MCPE.isMachineConstantPoolEntry())
@@ -1559,7 +1503,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   case ARM::t2BR_JT: {
     // Lower and emit the instruction itself, then the jump table following it.
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVr)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
       .addReg(ARM::PC)
       .addReg(MI->getOperand(0).getReg())
       // Add predicate operands.
@@ -1572,7 +1516,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   case ARM::t2TBB_JT: {
     // Lower and emit the instruction itself, then the jump table following it.
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::t2TBB)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBB)
       .addReg(ARM::PC)
       .addReg(MI->getOperand(0).getReg())
       // Add predicate operands.
@@ -1587,7 +1531,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   case ARM::t2TBH_JT: {
     // Lower and emit the instruction itself, then the jump table following it.
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::t2TBH)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBH)
       .addReg(ARM::PC)
       .addReg(MI->getOperand(0).getReg())
       // Add predicate operands.
@@ -1606,15 +1550,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     unsigned Opc = MI->getOpcode() == ARM::BR_JTr ?
       ARM::MOVr : ARM::tMOVr;
     TmpInst.setOpcode(Opc);
-    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    TmpInst.addOperand(MCOperand::createReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
     // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
+    TmpInst.addOperand(MCOperand::createImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::createReg(0));
     // Add 's' bit operand (always reg0 for this)
     if (Opc == ARM::MOVr)
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-    EmitToStreamer(OutStreamer, TmpInst);
+      TmpInst.addOperand(MCOperand::createReg(0));
+    EmitToStreamer(*OutStreamer, TmpInst);
 
     // Make sure the Thumb jump table is 4-byte aligned.
     if (Opc == ARM::tMOVr)
@@ -1631,20 +1575,20 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     if (MI->getOperand(1).getReg() == 0) {
       // literal offset
       TmpInst.setOpcode(ARM::LDRi12);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-      TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
+      TmpInst.addOperand(MCOperand::createReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+      TmpInst.addOperand(MCOperand::createImm(MI->getOperand(2).getImm()));
     } else {
       TmpInst.setOpcode(ARM::LDRrs);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
-      TmpInst.addOperand(MCOperand::CreateImm(0));
+      TmpInst.addOperand(MCOperand::createReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+      TmpInst.addOperand(MCOperand::createReg(MI->getOperand(1).getReg()));
+      TmpInst.addOperand(MCOperand::createImm(0));
     }
     // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    EmitToStreamer(OutStreamer, TmpInst);
+    TmpInst.addOperand(MCOperand::createImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::createReg(0));
+    EmitToStreamer(*OutStreamer, TmpInst);
 
     // Output the data for the jump table itself
     EmitJumpTable(MI);
@@ -1653,7 +1597,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case ARM::BR_JTadd: {
     // Lower and emit the instruction itself, then the jump table following it.
     // add pc, target, idx
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::ADDrr)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDrr)
       .addReg(ARM::PC)
       .addReg(MI->getOperand(0).getReg())
       .addReg(MI->getOperand(1).getReg())
@@ -1668,7 +1612,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
   case ARM::SPACE:
-    OutStreamer.EmitZeros(MI->getOperand(1).getImm());
+    OutStreamer->EmitZeros(MI->getOperand(1).getImm());
     return;
   case ARM::TRAP: {
     // Non-Darwin binutils don't yet support the "trap" mnemonic.
@@ -1676,8 +1620,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     if (!Subtarget->isTargetMachO()) {
       //.long 0xe7ffdefe @ trap
       uint32_t Val = 0xe7ffdefeUL;
-      OutStreamer.AddComment("trap");
-      OutStreamer.EmitIntValue(Val, 4);
+      OutStreamer->AddComment("trap");
+      OutStreamer->EmitIntValue(Val, 4);
       return;
     }
     break;
@@ -1685,8 +1629,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case ARM::TRAPNaCl: {
     //.long 0xe7fedef0 @ trap
     uint32_t Val = 0xe7fedef0UL;
-    OutStreamer.AddComment("trap");
-    OutStreamer.EmitIntValue(Val, 4);
+    OutStreamer->AddComment("trap");
+    OutStreamer->EmitIntValue(Val, 4);
     return;
   }
   case ARM::tTRAP: {
@@ -1695,8 +1639,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     if (!Subtarget->isTargetMachO()) {
       //.short 57086 @ trap
       uint16_t Val = 0xdefe;
-      OutStreamer.AddComment("trap");
-      OutStreamer.EmitIntValue(Val, 2);
+      OutStreamer->AddComment("trap");
+      OutStreamer->EmitIntValue(Val, 2);
       return;
     }
     break;
@@ -1715,15 +1659,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     unsigned SrcReg = MI->getOperand(0).getReg();
     unsigned ValReg = MI->getOperand(1).getReg();
     MCSymbol *Label = GetARMSJLJEHLabel();
-    OutStreamer.AddComment("eh_setjmp begin");
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVr)
+    OutStreamer->AddComment("eh_setjmp begin");
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
       .addReg(ValReg)
       .addReg(ARM::PC)
       // Predicate.
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tADDi3)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDi3)
       .addReg(ValReg)
       // 's' bit operand
       .addReg(ARM::CPSR)
@@ -1733,7 +1677,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tSTRi)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tSTRi)
       .addReg(ValReg)
       .addReg(SrcReg)
       // The offset immediate is #4. The operand value is scaled by 4 for the
@@ -1743,7 +1687,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVi8)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVi8)
       .addReg(ARM::R0)
       .addReg(ARM::CPSR)
       .addImm(0)
@@ -1752,13 +1696,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addReg(0));
 
     const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tB)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tB)
       .addExpr(SymbolExpr)
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    OutStreamer.AddComment("eh_setjmp end");
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVi8)
+    OutStreamer->AddComment("eh_setjmp end");
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVi8)
       .addReg(ARM::R0)
       .addReg(ARM::CPSR)
       .addImm(1)
@@ -1766,7 +1710,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    OutStreamer.EmitLabel(Label);
+    OutStreamer->EmitLabel(Label);
     return;
   }
 
@@ -1781,8 +1725,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     unsigned SrcReg = MI->getOperand(0).getReg();
     unsigned ValReg = MI->getOperand(1).getReg();
 
-    OutStreamer.AddComment("eh_setjmp begin");
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::ADDri)
+    OutStreamer->AddComment("eh_setjmp begin");
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDri)
       .addReg(ValReg)
       .addReg(ARM::PC)
       .addImm(8)
@@ -1792,7 +1736,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       // 's' bit operand (always reg0 for this).
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::STRi12)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::STRi12)
       .addReg(ValReg)
       .addReg(SrcReg)
       .addImm(4)
@@ -1800,7 +1744,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::MOVi)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVi)
       .addReg(ARM::R0)
       .addImm(0)
       // Predicate.
@@ -1809,7 +1753,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       // 's' bit operand (always reg0 for this).
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::ADDri)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDri)
       .addReg(ARM::PC)
       .addReg(ARM::PC)
       .addImm(0)
@@ -1819,8 +1763,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       // 's' bit operand (always reg0 for this).
       .addReg(0));
 
-    OutStreamer.AddComment("eh_setjmp end");
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::MOVi)
+    OutStreamer->AddComment("eh_setjmp end");
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVi)
       .addReg(ARM::R0)
       .addImm(1)
       // Predicate.
@@ -1837,7 +1781,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // bx $scratch
     unsigned SrcReg = MI->getOperand(0).getReg();
     unsigned ScratchReg = MI->getOperand(1).getReg();
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::LDRi12)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12)
       .addReg(ARM::SP)
       .addReg(SrcReg)
       .addImm(8)
@@ -1845,7 +1789,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::LDRi12)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12)
       .addReg(ScratchReg)
       .addReg(SrcReg)
       .addImm(4)
@@ -1853,7 +1797,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::LDRi12)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12)
       .addReg(ARM::R7)
       .addReg(SrcReg)
       .addImm(0)
@@ -1861,7 +1805,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::BX)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX)
       .addReg(ScratchReg)
       // Predicate.
       .addImm(ARMCC::AL)
@@ -1876,7 +1820,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // bx $scratch
     unsigned SrcReg = MI->getOperand(0).getReg();
     unsigned ScratchReg = MI->getOperand(1).getReg();
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tLDRi)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi)
       .addReg(ScratchReg)
       .addReg(SrcReg)
       // The offset immediate is #8. The operand value is scaled by 4 for the
@@ -1886,14 +1830,14 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVr)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
       .addReg(ARM::SP)
       .addReg(ScratchReg)
       // Predicate.
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tLDRi)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi)
       .addReg(ScratchReg)
       .addReg(SrcReg)
       .addImm(1)
@@ -1901,7 +1845,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tLDRi)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi)
       .addReg(ARM::R7)
       .addReg(SrcReg)
       .addImm(0)
@@ -1909,7 +1853,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBX)
       .addReg(ScratchReg)
       // Predicate.
       .addImm(ARMCC::AL)
@@ -1921,7 +1865,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   MCInst TmpInst;
   LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
 
-  EmitToStreamer(OutStreamer, TmpInst);
+  EmitToStreamer(*OutStreamer, TmpInst);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
index a621491..7bfb944 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -52,18 +52,14 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
   SmallVector<std::pair<unsigned, MCSymbol*>, 4> ThumbIndirectPads;
 
 public:
-  explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-    : AsmPrinter(TM, Streamer), AFI(nullptr), MCP(nullptr),
-      InConstantPool(false) {
-    Subtarget = &TM.getSubtarget<ARMSubtarget>();
-  }
+  explicit ARMAsmPrinter(TargetMachine &TM,
+                         std::unique_ptr<MCStreamer> Streamer);
 
   const char *getPassName() const override {
     return "ARM Assembly / Object Emitter";
   }
 
-  void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
-                    const char *Modifier = nullptr);
+  void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
 
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
                        unsigned AsmVariant, const char *ExtraCode,
@@ -108,15 +104,19 @@ private:
 public:
   unsigned getISAEncoding() override {
     // ARM/Darwin adds ISA to the DWARF info for each function.
-    if (!Subtarget->isTargetMachO())
+    Triple TT(TM.getTargetTriple());
+    if (!TT.isOSBinFormatMachO())
       return 0;
-    return Subtarget->isThumb() ?
-      ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
+    bool isThumb = TT.getArch() == Triple::thumb ||
+                   TT.getArch() == Triple::thumbeb ||
+                   TT.getSubArch() == Triple::ARMSubArch_v7m ||
+                   TT.getSubArch() == Triple::ARMSubArch_v6m;
+    return isThumb ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
   }
 
 private:
   MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
-  MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
+  MCSymbol *GetARMJTIPICJumpTableLabel(unsigned uid) const;
 
   MCSymbol *GetARMSJLJEHLabel() const;
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c1244225..c5d6b25 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -37,6 +37,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -409,6 +410,8 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   assert((Cond.size() == 2 || Cond.size() == 0) &&
          "ARM branch conditions have two components!");
 
+  // For conditional branches, we use addOperand to preserve CPSR flags.
+
   if (!FBB) {
     if (Cond.empty()) { // Unconditional branch?
       if (isThumb)
@@ -417,13 +420,13 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
         BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
     } else
       BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
-        .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+        .addImm(Cond[0].getImm()).addOperand(Cond[1]);
     return 1;
   }
 
   // Two-way conditional branch.
   BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
-    .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+    .addImm(Cond[0].getImm()).addOperand(Cond[1]);
   if (isThumb)
     BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
   else
@@ -652,7 +655,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
     unsigned NumOps = MCID.getNumOperands();
     MachineOperand JTOP =
-      MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
+      MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
     unsigned JTI = JTOP.getIndex();
     const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
     assert(MJTI != nullptr);
@@ -1431,7 +1434,7 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
 bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
                                         const MachineInstr *MI1,
                                         const MachineRegisterInfo *MRI) const {
-  int Opcode = MI0->getOpcode();
+  unsigned Opcode = MI0->getOpcode();
   if (Opcode == ARM::t2LDRpci ||
       Opcode == ARM::t2LDRpci_pic ||
       Opcode == ARM::tLDRpci ||
@@ -1684,6 +1687,33 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
   if (!NumCycles)
     return false;
 
+  // If we are optimizing for size, see if the branch in the predecessor can be
+  // lowered to cbn?z by the constant island lowering pass, and return false if
+  // so. This results in a shorter instruction sequence.
+  const Function *F = MBB.getParent()->getFunction();
+  if (F->hasFnAttribute(Attribute::OptimizeForSize) ||
+      F->hasFnAttribute(Attribute::MinSize)) {
+    MachineBasicBlock *Pred = *MBB.pred_begin();
+    if (!Pred->empty()) {
+      MachineInstr *LastMI = &*Pred->rbegin();
+      if (LastMI->getOpcode() == ARM::t2Bcc) {
+        MachineBasicBlock::iterator CmpMI = LastMI;
+        if (CmpMI != Pred->begin()) {
+          --CmpMI;
+          if (CmpMI->getOpcode() == ARM::tCMPi8 ||
+              CmpMI->getOpcode() == ARM::t2CMPri) {
+            unsigned Reg = CmpMI->getOperand(0).getReg();
+            unsigned PredReg = 0;
+            ARMCC::CondCodes P = getInstrPredicate(CmpMI, PredReg);
+            if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
+                isARMLowRegister(Reg))
+              return false;
+          }
+        }
+      }
+    }
+  }
+
   // Attempt to estimate the relative costs of predication versus branching.
   unsigned UnpredCost = Probability.getNumerator() * NumCycles;
   UnpredCost /= Probability.getDenominator();
@@ -1741,7 +1771,7 @@ llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
 }
 
 
-int llvm::getMatchingCondBranchOpcode(int Opc) {
+unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
   if (Opc == ARM::B)
     return ARM::Bcc;
   if (Opc == ARM::tB)
@@ -1809,8 +1839,7 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
       return nullptr;
   }
   bool DontMoveAcrossStores = true;
-  if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ nullptr,
-                        DontMoveAcrossStores))
+  if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
     return nullptr;
   return MI;
 }
@@ -1891,6 +1920,13 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
   SeenMIs.insert(NewMI);
   SeenMIs.erase(DefMI);
 
+  // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
+  // DefMI would be invalid when tranferred inside the loop.  Checking for a
+  // loop is expensive, but at least remove kill flags if they are in different
+  // BBs.
+  if (DefMI->getParent() != MI->getParent())
+    NewMI->clearKillInfo();
+
   // The caller will erase MI, but not DefMI.
   DefMI->eraseFromParent();
   return NewMI;
@@ -1991,8 +2027,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
                                       unsigned NumBytes) {
   // This optimisation potentially adds lots of load and store
   // micro-operations, it's only really a great benefit to code-size.
-  if (!MF.getFunction()->getAttributes().hasAttribute(
-          AttributeSet::FunctionIndex, Attribute::MinSize))
+  if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
     return false;
 
   // If only one register is pushed/popped, LLVM can use an LDR/STR
@@ -2286,16 +2321,6 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
       if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
         return true;
       break;
-    case ARM::COPY: {
-      // Walk down one instruction which is potentially an 'and'.
-      const MachineInstr &Copy = *MI;
-      MachineBasicBlock::iterator AND(
-        std::next(MachineBasicBlock::iterator(MI)));
-      if (AND == MI->getParent()->end()) return false;
-      MI = AND;
-      return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
-                               CmpMask, true);
-    }
   }
 
   return false;
@@ -3665,9 +3690,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     // instructions).
     if (Latency > 0 && Subtarget.isThumb2()) {
       const MachineFunction *MF = DefMI->getParent()->getParent();
-      if (MF->getFunction()->getAttributes().
-            hasAttribute(AttributeSet::FunctionIndex,
-                         Attribute::OptimizeForSize))
+      if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
         --Latency;
     }
     return Latency;
@@ -4118,19 +4141,21 @@ enum ARMExeDomain {
 //
 std::pair<uint16_t, uint16_t>
 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
-  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
-  // if they are not predicated.
-  if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
-    return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
-
-  // CortexA9 is particularly picky about mixing the two and wants these
-  // converted.
-  if (Subtarget.isCortexA9() && !isPredicated(MI) &&
-      (MI->getOpcode() == ARM::VMOVRS ||
-       MI->getOpcode() == ARM::VMOVSR ||
-       MI->getOpcode() == ARM::VMOVS))
-    return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
-
+  // If we don't have access to NEON instructions then we won't be able
+  // to swizzle anything to the NEON domain. Check to make sure.
+  if (Subtarget.hasNEON()) {
+    // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
+    // if they are not predicated.
+    if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
+      return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
+
+    // CortexA9 is particularly picky about mixing the two and wants these
+    // converted.
+    if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+        (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR ||
+         MI->getOpcode() == ARM::VMOVS))
+      return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
+  }
   // No other instructions can be swizzled, so just determine their domain.
   unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
 
@@ -4223,6 +4248,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
       // Zap the predicate operands.
       assert(!isPredicated(MI) && "Cannot predicate a VORRd");
 
+      // Make sure we've got NEON instructions.
+      assert(Subtarget.hasNEON() && "VORRd requires NEON");
+
       // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
       DstReg = MI->getOperand(0).getReg();
       SrcReg = MI->getOperand(1).getReg();
@@ -4510,7 +4538,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
 }
 
 bool ARMBaseInstrInfo::hasNOP() const {
-  return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
+  return Subtarget.getFeatureBits()[ARM::HasV6KOps];
 }
 
 bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index ecbcf5c..c7185fe 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -439,7 +439,7 @@ static inline bool isPushOpcode(int Opc) {
 /// register by reference.
 ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
 
-int getMatchingCondBranchOpcode(int Opc);
+unsigned getMatchingCondBranchOpcode(unsigned Opc);
 
 /// Determine if MI can be folded into an ARM MOVCC instruction, and return the
 /// opcode of the SSA instruction representing the conditional MI.
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 8744f1c..3f79a9b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -45,26 +45,27 @@
 
 using namespace llvm;
 
-ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
-    : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) {
+ARMBaseRegisterInfo::ARMBaseRegisterInfo()
+    : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {}
+
+static unsigned getFramePointerReg(const ARMSubtarget &STI) {
   if (STI.isTargetMachO()) {
     if (STI.isTargetDarwin() || STI.isThumb1Only())
-      FramePtr = ARM::R7;
+      return ARM::R7;
     else
-      FramePtr = ARM::R11;
+      return ARM::R11;
   } else if (STI.isTargetWindows())
-    FramePtr = ARM::R11;
+    return ARM::R11;
   else // ARM EABI
-    FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11;
+    return STI.isThumb() ? ARM::R7 : ARM::R11;
 }
 
 const MCPhysReg*
 ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
   const MCPhysReg *RegList =
       STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
 
-  if (!MF) return RegList;
-
   const Function *F = MF->getFunction();
   if (F->getCallingConv() == CallingConv::GHC) {
     // GHC set of callee saved regs is empty as all those regs are
@@ -89,8 +90,10 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   return RegList;
 }
 
-const uint32_t*
-ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+                                          CallingConv::ID CC) const {
+  const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
   if (CC == CallingConv::GHC)
     // This is academic becase all GHC calls are (supposed to be) tail calls
     return CSR_NoRegs_RegMask;
@@ -102,8 +105,10 @@ ARMBaseRegisterInfo::getNoPreservedMask() const {
   return CSR_NoRegs_RegMask;
 }
 
-const uint32_t*
-ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
+                                                CallingConv::ID CC) const {
+  const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
   // This should return a register mask that is the same as that returned by
   // getCallPreservedMask but that additionally preserves the register used for
   // the first i32 argument (which must also be the register used to return a
@@ -121,7 +126,8 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
 
 BitVector ARMBaseRegisterInfo::
 getReservedRegs(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+  const TargetFrameLowering *TFI = STI.getFrameLowering();
 
   // FIXME: avoid re-calculating this every time.
   BitVector Reserved(getNumRegs());
@@ -130,7 +136,7 @@ getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(ARM::FPSCR);
   Reserved.set(ARM::APSR_NZCV);
   if (TFI->hasFP(MF))
-    Reserved.set(FramePtr);
+    Reserved.set(getFramePointerReg(STI));
   if (hasBasePointer(MF))
     Reserved.set(BasePtr);
   // Some targets reserve R9.
@@ -150,9 +156,9 @@ getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-const TargetRegisterClass*
-ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
-                                                                         const {
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+                                               const MachineFunction &) const {
   const TargetRegisterClass *Super = RC;
   TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
   do {
@@ -187,7 +193,8 @@ ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
 unsigned
 ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
                                          MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+  const TargetFrameLowering *TFI = STI.getFrameLowering();
 
   switch (RC->getID()) {
   default:
@@ -238,11 +245,15 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
   // This register should preferably be even (Odd == 0) or odd (Odd == 1).
   // Check if the other part of the pair has already been assigned, and provide
   // the paired register as the first hint.
+  unsigned Paired = Hint.second;
+  if (Paired == 0)
+    return;
+
   unsigned PairedPhys = 0;
-  if (VRM && VRM->hasPhys(Hint.second)) {
-    PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this);
-    if (PairedPhys && MRI.isReserved(PairedPhys))
-      PairedPhys = 0;
+  if (TargetRegisterInfo::isPhysicalRegister(Paired)) {
+    PairedPhys = Paired;
+  } else if (VRM && VRM->hasPhys(Paired)) {
+    PairedPhys = getPairedGPR(VRM->getPhys(Paired), Odd, this);
   }
 
   // First prefer the paired physreg.
@@ -264,7 +275,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
 }
 
 void
-ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
                                         MachineFunction &MF) const {
   MachineRegisterInfo *MRI = &MF.getRegInfo();
   std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
@@ -277,32 +288,14 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
     // change.
     unsigned OtherReg = Hint.second;
     Hint = MRI->getRegAllocationHint(OtherReg);
-    if (Hint.second == Reg)
-      // Make sure the pair has not already divorced.
+    // Make sure the pair has not already divorced.
+    if (Hint.second == Reg) {
       MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
-  }
-}
-
-bool
-ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
-  // CortexA9 has a Write-after-write hazard for NEON registers.
-  if (!STI.isLikeA9())
-    return false;
-
-  switch (RC->getID()) {
-  case ARM::DPRRegClassID:
-  case ARM::DPR_8RegClassID:
-  case ARM::DPR_VFP2RegClassID:
-  case ARM::QPRRegClassID:
-  case ARM::QPR_8RegClassID:
-  case ARM::QPR_VFP2RegClassID:
-  case ARM::SPRRegClassID:
-  case ARM::SPR_8RegClassID:
-    // Avoid reusing S, D, and Q registers.
-    // Don't increase register pressure for QQ and QQQQ.
-    return true;
-  default:
-    return false;
+      if (TargetRegisterInfo::isVirtualRegister(NewReg))
+        MRI->setRegAllocationHint(NewReg,
+            Hint.first == (unsigned)ARMRI::RegPairOdd ? ARMRI::RegPairEven
+            : ARMRI::RegPairOdd, OtherReg);
+    }
   }
 }
 
@@ -350,14 +343,11 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
     return false;
   // Stack realignment requires a frame pointer.  If we already started
   // register allocation with frame pointer elimination, it is too late now.
-  if (!MRI->canReserveReg(FramePtr))
+  if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget<ARMSubtarget>())))
     return false;
   // We may also need a base pointer if there are dynamic allocas or stack
   // pointer adjustments around calls.
-  if (MF.getTarget()
-          .getSubtargetImpl()
-          ->getFrameLowering()
-          ->hasReservedCallFrame(MF))
+  if (MF.getSubtarget().getFrameLowering()->hasReservedCallFrame(MF))
     return true;
   // A base pointer is required and allowed.  Check that it isn't too late to
   // reserve it.
@@ -368,14 +358,10 @@ bool ARMBaseRegisterInfo::
 needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *F = MF.getFunction();
-  unsigned StackAlign = MF.getTarget()
-                            .getSubtargetImpl()
-                            ->getFrameLowering()
-                            ->getStackAlignment();
-  bool requiresRealignment =
-    ((MFI->getMaxAlignment() > StackAlign) ||
-     F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                     Attribute::StackAlignment));
+  unsigned StackAlign =
+      MF.getSubtarget().getFrameLowering()->getStackAlignment();
+  bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+                              F->hasFnAttribute(Attribute::StackAlignment));
 
   return requiresRealignment && canRealignStack(MF);
 }
@@ -391,10 +377,11 @@ cannotEliminateFrame(const MachineFunction &MF) const {
 
 unsigned
 ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+  const TargetFrameLowering *TFI = STI.getFrameLowering();
 
   if (TFI->hasFP(MF))
-    return FramePtr;
+    return getFramePointerReg(STI);
   return ARM::SP;
 }
 
@@ -546,21 +533,20 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   // The incoming offset is relating to the SP at the start of the function,
   // but when we access the local it'll be relative to the SP after local
   // allocation, so adjust our SP-relative offset by that allocation size.
-  Offset = -Offset;
   Offset += MFI->getLocalFrameSize();
   // Assume that we'll have at least some spill slots allocated.
   // FIXME: This is a total SWAG number. We should run some statistics
   //        and pick a real one.
   Offset += 128; // 128 bytes of spill slots
 
-  // If there is a frame pointer, try using it.
+  // If there's a frame pointer and the addressing mode allows it, try using it.
   // The FP is only available if there is no dynamic realignment. We
   // don't know for sure yet whether we'll need that, so we guess based
   // on whether there are any local variables that would trigger it.
   unsigned StackAlign = TFI->getStackAlignment();
-  if (TFI->hasFP(MF) &&
+  if (TFI->hasFP(MF) && 
       !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
-    if (isFrameOffsetLegal(MI, FPOffset))
+    if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset))
       return false;
   }
   // If we can reference via the stack pointer, try that.
@@ -568,7 +554,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   //        to only disallow SP relative references in the live range of
   //        the VLA(s). In practice, it's unclear how much difference that
   //        would make, but it may be worth doing.
-  if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+  if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset))
     return false;
 
   // The offset likely isn't legal, we want to allocate a virtual base register.
@@ -631,7 +617,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
   (void)Done;
 }
 
-bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
                                              int64_t Offset) const {
   const MCInstrDesc &Desc = MI->getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
@@ -675,7 +661,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
     NumBits = 8;
     break;
   case ARMII::AddrModeT1_s:
-    NumBits = 5;
+    NumBits = (BaseReg == ARM::SP ? 8 : 5);
     Scale = 4;
     isSigned = false;
     break;
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index e9bc412..fdc1ef9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -21,10 +21,6 @@
 #include "ARMGenRegisterInfo.inc"
 
 namespace llvm {
-  class ARMSubtarget;
-  class ARMBaseInstrInfo;
-  class Type;
-
 /// Register allocation hints.
 namespace ARMRI {
   enum {
@@ -82,27 +78,22 @@ static inline bool isCalleeSavedRegister(unsigned Reg,
 
 class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
 protected:
-  const ARMSubtarget &STI;
-
-  /// FramePtr - ARM physical register used as frame ptr.
-  unsigned FramePtr;
-
   /// BasePtr - ARM physical register used as a base ptr in complex stack
   /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
   /// variable size stack objects.
   unsigned BasePtr;
 
   // Can be only subclassed.
-  explicit ARMBaseRegisterInfo(const ARMSubtarget &STI);
+  explicit ARMBaseRegisterInfo();
 
   // Return the opcode that implements 'Op', or 0 if no opcode
   unsigned getOpcode(int Op) const;
 
 public:
   /// Code Generation virtual methods...
-  const MCPhysReg *
-  getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
-  const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+  const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+                                       CallingConv::ID) const override;
   const uint32_t *getNoPreservedMask() const;
 
   /// getThisReturnPreservedMask - Returns a call preserved mask specific to the
@@ -113,7 +104,8 @@ public:
   ///
   /// Should return NULL in the case that the calling convention does not have
   /// this property
-  const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
+  const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF,
+                                             CallingConv::ID) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
@@ -124,7 +116,8 @@ public:
   getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
 
   const TargetRegisterClass *
-  getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+  getLargestLegalSuperClass(const TargetRegisterClass *RC,
+                            const MachineFunction &MF) const override;
 
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const override;
@@ -135,11 +128,9 @@ public:
                              const MachineFunction &MF,
                              const VirtRegMap *VRM) const override;
 
-  void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+  void updateRegAllocHint(unsigned Reg, unsigned NewReg,
                           MachineFunction &MF) const override;
 
-  bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const override;
-
   bool hasBasePointer(const MachineFunction &MF) const;
 
   bool canRealignStack(const MachineFunction &MF) const;
@@ -152,7 +143,7 @@ public:
                                     int64_t Offset) const override;
   void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
                          int64_t Offset) const override;
-  bool isFrameOffsetLegal(const MachineInstr *MI,
+  bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
                           int64_t Offset) const override;
 
   bool cannotEliminateFrame(const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
index a421f62..d687568 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -31,7 +31,7 @@ static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
 
   // Try to get the first register.
-  if (unsigned Reg = State.AllocateReg(RegList, 4))
+  if (unsigned Reg = State.AllocateReg(RegList))
     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   else {
     // For the 2nd half of a v2f64, do not fail.
@@ -46,7 +46,7 @@ static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   }
 
   // Try to get the second register.
-  if (unsigned Reg = State.AllocateReg(RegList, 4))
+  if (unsigned Reg = State.AllocateReg(RegList))
     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   else
     State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
@@ -76,11 +76,11 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };
   static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
 
-  unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
+  unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList);
   if (Reg == 0) {
 
     // If we had R3 unallocated only, now we still must to waste it.
-    Reg = State.AllocateReg(GPRArgRegs, 4);
+    Reg = State.AllocateReg(GPRArgRegs);
     assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
 
     // For the 2nd half of a v2f64, do not just fail.
@@ -126,7 +126,7 @@ static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
   static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
 
-  unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+  unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
   if (Reg == 0)
     return false; // we didn't handle it
 
@@ -205,7 +205,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
   switch (LocVT.SimpleTy) {
   case MVT::i32: {
     RegList = RRegList;
-    unsigned RegIdx = State.getFirstUnallocated(RegList.data(), RegList.size());
+    unsigned RegIdx = State.getFirstUnallocated(RegList);
 
     // First consume all registers that would give an unaligned object. Whether
     // we go on stack or in regs, no-one will be using them in future.
@@ -246,7 +246,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
   if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) {
     // If nothing else has used the stack until this point, a non-HFA aggregate
     // can be split between regs and stack.
-    unsigned RegIdx = State.getFirstUnallocated(RegList.data(), RegList.size());
+    unsigned RegIdx = State.getFirstUnallocated(RegList);
     for (auto &It : PendingMembers) {
       if (RegIdx >= RegList.size())
         It.convertToMem(State.AllocateStack(Size, Size));
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 5d29531..6fa5ad7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -53,11 +53,6 @@ static cl::opt<bool>
 AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
           cl::desc("Adjust basic block layout to better use TB[BH]"));
 
-// FIXME: This option should be removed once it has received sufficient testing.
-static cl::opt<bool>
-AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true),
-          cl::desc("Align constant islands in code"));
-
 /// UnknownPadding - Return the worst case padding that could result from
 /// unknown offset bits.  This does not include alignment padding caused by
 /// known offset bits.
@@ -235,8 +230,8 @@ namespace {
       MachineInstr *MI;
       unsigned MaxDisp : 31;
       bool isCond : 1;
-      int UncondBr;
-      ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+      unsigned UncondBr;
+      ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr)
         : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
     };
 
@@ -306,6 +301,8 @@ namespace {
     bool optimizeThumb2Instructions();
     bool optimizeThumb2Branches();
     bool reorderThumb2JumpTables();
+    unsigned removeDeadDefinitions(MachineInstr *MI, unsigned BaseReg,
+                                   unsigned IdxReg);
     bool optimizeThumb2JumpTables();
     MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
                                                   MachineBasicBlock *JTBB);
@@ -383,11 +380,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
                << MCP->getConstants().size() << " CP entries, aligned to "
                << MCP->getConstantPoolAlignment() << " bytes *****\n");
 
-  TII = (const ARMBaseInstrInfo *)MF->getTarget()
-            .getSubtargetImpl()
-            ->getInstrInfo();
+  STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
+  TII = STI->getInstrInfo();
   AFI = MF->getInfo<ARMFunctionInfo>();
-  STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
 
   isThumb = AFI->isThumbFunction();
   isThumb1 = AFI->isThumb1OnlyFunction();
@@ -414,13 +409,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
     MF->RenumberBlocks();
   }
 
-  // Thumb1 functions containing constant pools get 4-byte alignment.
-  // This is so we can keep exact track of where the alignment padding goes.
-
-  // ARM and Thumb2 functions need to be 4-byte aligned.
-  if (!isThumb1)
-    MF->ensureAlignment(2);  // 2 = log2(4)
-
   // Perform the initial placement of the constant pool entries.  To start with,
   // we put them all at the end of the function.
   std::vector<MachineInstr*> CPEMIs;
@@ -437,6 +425,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
   CPEMIs.clear();
   DEBUG(dumpBBs());
 
+  // Functions with jump tables need an alignment of 4 because they use the ADR
+  // instruction, which aligns the PC to 4 bytes before adding an offset.
+  if (!T2JumpTables.empty())
+    MF->ensureAlignment(2);
 
   /// Remove dead constant pool entries.
   MadeChange |= removeUnusedCPEntries();
@@ -515,8 +507,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
   unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
 
   // Mark the basic block as required by the const-pool.
-  // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
-  BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+  BB->setAlignment(MaxAlign);
 
   // The function needs to be as aligned as the basic blocks. The linker may
   // move functions around based on their alignment.
@@ -532,7 +523,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
   // identity mapping of CPI's to CPE's.
   const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
 
-  const DataLayout &TD = *MF->getSubtarget().getDataLayout();
+  const DataLayout &TD = *MF->getTarget().getDataLayout();
   for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
     unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
     assert(Size >= 4 && "Too small constant pool entry");
@@ -606,10 +597,6 @@ ARMConstantIslands::CPEntry
 unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
   assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
 
-  // Everything is 4-byte aligned unless AlignConstantIslands is set.
-  if (!AlignConstantIslands)
-    return 2;
-
   unsigned CPI = CPEMI->getOperand(1).getIndex();
   assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
   unsigned Align = MCP->getConstants()[CPI].getAlignment();
@@ -669,7 +656,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
       if (I->isDebugValue())
         continue;
 
-      int Opc = I->getOpcode();
+      unsigned Opc = I->getOpcode();
       if (I->isBranch()) {
         bool isCond = false;
         unsigned Bits = 0;
@@ -1764,8 +1751,13 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {
 bool ARMConstantIslands::optimizeThumb2Branches() {
   bool MadeChange = false;
 
-  for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
-    ImmBranch &Br = ImmBranches[i];
+  // The order in which branches appear in ImmBranches is approximately their
+  // order within the function body. By visiting later branches first, we reduce
+  // the distance between earlier forward branches and their targets, making it
+  // more likely that the cbn?z optimization, which can only apply to forward
+  // branches, will succeed.
+  for (unsigned i = ImmBranches.size(); i != 0; --i) {
+    ImmBranch &Br = ImmBranches[i-1];
     unsigned Opcode = Br.MI->getOpcode();
     unsigned NewOpc = 0;
     unsigned Scale = 1;
@@ -1852,6 +1844,79 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
   return MadeChange;
 }
 
+/// If we've formed a TBB or TBH instruction, the base register is now
+/// redundant. In most cases, the instructions defining it will now be dead and
+/// can be tidied up. This function removes them if so, and returns the number
+/// of bytes saved.
+unsigned ARMConstantIslands::removeDeadDefinitions(MachineInstr *MI,
+                                                   unsigned BaseReg,
+                                                   unsigned IdxReg) {
+  unsigned BytesRemoved = 0;
+  MachineBasicBlock *MBB = MI->getParent();
+
+  // Scan backwards to find the instruction that defines the base
+  // register. Due to post-RA scheduling, we can't count on it
+  // immediately preceding the branch instruction.
+  MachineBasicBlock::iterator PrevI = MI;
+  MachineBasicBlock::iterator B = MBB->begin();
+  while (PrevI != B && !PrevI->definesRegister(BaseReg))
+    --PrevI;
+
+  // If for some reason we didn't find it, we can't do anything, so
+  // just skip this one.
+  if (!PrevI->definesRegister(BaseReg) || PrevI->hasUnmodeledSideEffects() ||
+      PrevI->mayStore())
+    return BytesRemoved;
+
+  MachineInstr *AddrMI = PrevI;
+  unsigned NewBaseReg = BytesRemoved;
+
+  // Examine the instruction that calculates the jumptable entry address.  Make
+  // sure it only defines the base register and kills any uses other than the
+  // index register. We also need precisely one use to trace backwards to
+  // (hopefully) the LEA.
+  for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
+    const MachineOperand &MO = AddrMI->getOperand(k);
+    if (!MO.isReg() || !MO.getReg())
+      continue;
+    if (MO.isDef() && MO.getReg() != BaseReg)
+      return BytesRemoved;
+
+    if (MO.isUse() && MO.getReg() != IdxReg) {
+      if (!MO.isKill() || (NewBaseReg != 0 && NewBaseReg != MO.getReg()))
+        return BytesRemoved;
+      NewBaseReg = MO.getReg();
+    }
+  }
+
+  // Want to continue searching for AddrMI, but there are 2 problems: AddrMI is
+  // going away soon, and even decrementing once may be invalid.
+  if (PrevI != B)
+    PrevI = std::prev(PrevI);
+
+  DEBUG(dbgs() << "remove addr: " << *AddrMI);
+  BytesRemoved += TII->GetInstSizeInBytes(AddrMI);
+  AddrMI->eraseFromParent();
+
+  // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
+  // that gave us the initial base register definition.
+  for (; PrevI != B && !PrevI->definesRegister(NewBaseReg); --PrevI)
+    ;
+
+  // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
+  // to delete it as well.
+  MachineInstr *LeaMI = PrevI;
+  if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
+       LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
+      LeaMI->getOperand(0).getReg() != NewBaseReg)
+    return BytesRemoved;
+
+  DEBUG(dbgs() << "remove lea: " << *LeaMI);
+  BytesRemoved += TII->GetInstSizeInBytes(LeaMI);
+  LeaMI->eraseFromParent();
+  return BytesRemoved;
+}
+
 /// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
 /// jumptables when it's possible.
 bool ARMConstantIslands::optimizeThumb2JumpTables() {
@@ -1867,7 +1932,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
     MachineInstr *MI = T2JumpTables[i];
     const MCInstrDesc &MCID = MI->getDesc();
     unsigned NumOps = MCID.getNumOperands();
-    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
+    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
     MachineOperand JTOP = MI->getOperand(JTOpIdx);
     unsigned JTI = JTOP.getIndex();
     assert(JTI < JT.size());
@@ -1899,78 +1964,22 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
       unsigned IdxReg = MI->getOperand(1).getReg();
       bool IdxRegKill = MI->getOperand(1).isKill();
 
-      // Scan backwards to find the instruction that defines the base
-      // register. Due to post-RA scheduling, we can't count on it
-      // immediately preceding the branch instruction.
-      MachineBasicBlock::iterator PrevI = MI;
-      MachineBasicBlock::iterator B = MBB->begin();
-      while (PrevI != B && !PrevI->definesRegister(BaseReg))
-        --PrevI;
-
-      // If for some reason we didn't find it, we can't do anything, so
-      // just skip this one.
-      if (!PrevI->definesRegister(BaseReg))
-        continue;
-
-      MachineInstr *AddrMI = PrevI;
-      bool OptOk = true;
-      // Examine the instruction that calculates the jumptable entry address.
-      // Make sure it only defines the base register and kills any uses
-      // other than the index register.
-      for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
-        const MachineOperand &MO = AddrMI->getOperand(k);
-        if (!MO.isReg() || !MO.getReg())
-          continue;
-        if (MO.isDef() && MO.getReg() != BaseReg) {
-          OptOk = false;
-          break;
-        }
-        if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) {
-          OptOk = false;
-          break;
-        }
-      }
-      if (!OptOk)
-        continue;
-
-      // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
-      // that gave us the initial base register definition.
-      for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
-        ;
-
-      // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
-      // to delete it as well.
-      MachineInstr *LeaMI = PrevI;
-      if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
-           LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
-          LeaMI->getOperand(0).getReg() != BaseReg)
-        OptOk = false;
-
-      if (!OptOk)
-        continue;
-
-      DEBUG(dbgs() << "Shrink JT: " << *MI << "     addr: " << *AddrMI
-                   << "      lea: " << *LeaMI);
+      DEBUG(dbgs() << "Shrink JT: " << *MI);
       unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
       MachineBasicBlock::iterator MI_JT = MI;
       MachineInstr *NewJTMI =
         BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
         .addReg(IdxReg, getKillRegState(IdxRegKill))
-        .addJumpTableIndex(JTI, JTOP.getTargetFlags())
-        .addImm(MI->getOperand(JTOpIdx+1).getImm());
+        .addJumpTableIndex(JTI, JTOP.getTargetFlags());
       DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
       // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
       // is 2-byte aligned. For now, asm printer will fix it up.
       unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
-      unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI);
-      OrigSize += TII->GetInstSizeInBytes(LeaMI);
-      OrigSize += TII->GetInstSizeInBytes(MI);
-
-      AddrMI->eraseFromParent();
-      LeaMI->eraseFromParent();
+      unsigned OrigSize = TII->GetInstSizeInBytes(MI);
+      unsigned DeadSize = removeDeadDefinitions(MI, BaseReg, IdxReg);
       MI->eraseFromParent();
 
-      int delta = OrigSize - NewSize;
+      int delta = OrigSize - NewSize + DeadSize;
       BBInfo[MBB->getNumber()].Size -= delta;
       adjustBBOffsetsAfter(MBB);
 
@@ -1995,7 +2004,7 @@ bool ARMConstantIslands::reorderThumb2JumpTables() {
     MachineInstr *MI = T2JumpTables[i];
     const MCInstrDesc &MCID = MI->getDesc();
     unsigned NumOps = MCID.getNumOperands();
-    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
+    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
     MachineOperand JTOP = MI->getOperand(JTOpIdx);
     unsigned JTI = JTOP.getIndex();
     assert(JTI < JT.size());
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index 13bef54..36f63e2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -86,7 +86,7 @@ protected:
   }
 
 public:
-  virtual ~ARMConstantPoolValue();
+  ~ARMConstantPoolValue() override;
 
   ARMCP::ARMCPModifier getModifier() const { return Modifier; }
   const char *getModifierText() const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 7ddf879..4438f50 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1132,7 +1132,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       // Add the source operands (D subregs).
       unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
       unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
-      MIB.addReg(D0).addReg(D1);
+      MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0)
+         .addReg(D1, SrcIsKill ? RegState::Kill : 0);
 
       if (SrcIsKill)      // Add an implicit kill for the Q register.
         MIB->addRegisterKilled(SrcReg, TRI, true);
@@ -1345,11 +1346,9 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
 }
 
 bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
-  const TargetMachine &TM = MF.getTarget();
-  TII = static_cast<const ARMBaseInstrInfo *>(
-      TM.getSubtargetImpl()->getInstrInfo());
-  TRI = TM.getSubtargetImpl()->getRegisterInfo();
-  STI = &TM.getSubtarget<ARMSubtarget>();
+  STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
+  TII = STI->getInstrInfo();
+  TRI = STI->getRegisterInfo();
   AFI = MF.getInfo<ARMFunctionInfo>();
 
   bool Modified = false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFPUName.def b/contrib/llvm/lib/Target/ARM/ARMFPUName.def
deleted file mode 100644
index 34ce85d..0000000
--- a/contrib/llvm/lib/Target/ARM/ARMFPUName.def
+++ /dev/null
@@ -1,34 +0,0 @@
-//===-- ARMFPUName.def - List of the ARM FPU names --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the list of the supported ARM FPU names.
-//
-//===----------------------------------------------------------------------===//
-
-// NOTE: NO INCLUDE GUARD DESIRED!
-
-#ifndef ARM_FPU_NAME
-#error "You must define ARM_FPU_NAME(NAME, ID) before including ARMFPUName.h"
-#endif
-
-ARM_FPU_NAME("vfp", VFP)
-ARM_FPU_NAME("vfpv2", VFPV2)
-ARM_FPU_NAME("vfpv3", VFPV3)
-ARM_FPU_NAME("vfpv3-d16", VFPV3_D16)
-ARM_FPU_NAME("vfpv4", VFPV4)
-ARM_FPU_NAME("vfpv4-d16", VFPV4_D16)
-ARM_FPU_NAME("fpv5-d16", FPV5_D16)
-ARM_FPU_NAME("fp-armv8", FP_ARMV8)
-ARM_FPU_NAME("neon", NEON)
-ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4)
-ARM_FPU_NAME("neon-fp-armv8", NEON_FP_ARMV8)
-ARM_FPU_NAME("crypto-neon-fp-armv8", CRYPTO_NEON_FP_ARMV8)
-ARM_FPU_NAME("softvfp", SOFTVFP)
-
-#undef ARM_FPU_NAME
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 29462f7..4175b4a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -93,11 +93,11 @@ class ARMFastISel final : public FastISel {
     explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
                          const TargetLibraryInfo *libInfo)
         : FastISel(funcInfo, libInfo),
+          Subtarget(
+              &static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget())),
           M(const_cast<Module &>(*funcInfo.Fn->getParent())),
-          TM(funcInfo.MF->getTarget()),
-          TII(*TM.getSubtargetImpl()->getInstrInfo()),
-          TLI(*TM.getSubtargetImpl()->getTargetLowering()) {
-      Subtarget = &TM.getSubtarget<ARMSubtarget>();
+          TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()),
+          TLI(*Subtarget->getTargetLowering()) {
       AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
       isThumb2 = AFI->isThumbFunction();
       Context = &funcInfo.Fn->getContext();
@@ -189,9 +189,7 @@ class ARMFastISel final : public FastISel {
     unsigned ARMSelectCallOp(bool UseReg);
     unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
 
-    const TargetLowering *getTargetLowering() {
-      return TM.getSubtargetImpl()->getTargetLowering();
-    }
+    const TargetLowering *getTargetLowering() { return &TLI; }
 
     // Call handling routines.
   private:
@@ -1659,12 +1657,12 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
     if (Op2Reg == 0) return false;
   }
 
-  unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
-  CondReg = constrainOperandRegClass(TII.get(CmpOpc), CondReg, 0);
+  unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
+  CondReg = constrainOperandRegClass(TII.get(TstOpc), CondReg, 0);
   AddOptionalDefs(
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
           .addReg(CondReg)
-          .addImm(0));
+          .addImm(1));
 
   unsigned MovCCOpc;
   const TargetRegisterClass *RC;
@@ -1796,6 +1794,10 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
   if (!FPVT.isSimple()) return false;
   MVT VT = FPVT.getSimpleVT();
 
+  // FIXME: Support vector types where possible.
+  if (VT.isVector())
+    return false;
+
   // We can get here in the case when we want to use NEON for our fp
   // operations, but can't figure out how to. Just use the vfp instructions
   // if we have them.
@@ -2267,7 +2269,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
 
   // Add a register mask with the call-preserved registers.
   // Proper defs for return values will be added by setPhysRegsDeadExcept().
-  MIB.addRegMask(TRI.getCallPreservedMask(CC));
+  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
 
   // Finish off the call including any return values.
   SmallVector<unsigned, 4> UsedRegs;
@@ -2418,7 +2420,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
 
   // Add a register mask with the call-preserved registers.
   // Proper defs for return values will be added by setPhysRegsDeadExcept().
-  MIB.addRegMask(TRI.getCallPreservedMask(CC));
+  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
 
   // Finish off the call including any return values.
   SmallVector<unsigned, 4> UsedRegs;
@@ -2491,8 +2493,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
                                              : &ARM::GPRRegClass;
 
     const ARMBaseRegisterInfo *RegInfo =
-        static_cast<const ARMBaseRegisterInfo *>(
-            TM.getSubtargetImpl()->getRegisterInfo());
+        static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());
     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
     unsigned SrcReg = FramePtr;
 
@@ -3064,23 +3065,9 @@ bool ARMFastISel::fastLowerArguments() {
 namespace llvm {
   FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
                                 const TargetLibraryInfo *libInfo) {
-    const TargetMachine &TM = funcInfo.MF->getTarget();
-
-    const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
-    // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
-    bool UseFastISel = false;
-    UseFastISel |= Subtarget->isTargetMachO() && !Subtarget->isThumb1Only();
-    UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb();
-    UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb();
-
-    if (UseFastISel) {
-      // iOS always has a FP for backtracking, force other targets
-      // to keep their FP when doing FastISel. The emitted code is
-      // currently superior, and in cases like test-suite's lencod
-      // FastISel isn't quite correct when FP is eliminated.
-      TM.Options.NoFramePointerElim = true;
+    if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())
       return new ARMFastISel(funcInfo, libInfo);
-    }
+
     return nullptr;
   }
 }
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 45c2c30..a52e497 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -43,6 +43,14 @@ ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
     : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
       STI(sti) {}
 
+bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
+  // iOS always has a FP for backtracking, force other targets to keep their FP
+  // when doing FastISel. The emitted code is currently superior, and in cases
+  // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
+  return TargetFrameLowering::noFramePointerElim(MF) ||
+         MF.getSubtarget<ARMSubtarget>().useFastISel();
+}
+
 /// hasFP - Return true if the specified function should have a dedicated frame
 /// pointer register.  This is true if the function has variable sized allocas
 /// or if frame pointer elimination is disabled.
@@ -164,9 +172,13 @@ static int sizeOfSPAdjustment(const MachineInstr *MI) {
 static bool WindowsRequiresStackProbe(const MachineFunction &MF,
                                       size_t StackSizeInBytes) {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  if (MFI->getStackProtectorIndex() > 0)
-    return StackSizeInBytes >= 4080;
-  return StackSizeInBytes >= 4096;
+  const Function *F = MF.getFunction();
+  unsigned StackProbeSize = (MFI->getStackProtectorIndex() > 0) ? 4080 : 4096;
+  if (F->hasFnAttribute("stack-probe-size"))
+    F->getFnAttribute("stack-probe-size")
+        .getValueAsString()
+        .getAsInteger(0, StackProbeSize);
+  return StackSizeInBytes >= StackProbeSize;
 }
 
 namespace {
@@ -225,7 +237,8 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
                                      DebugLoc DL, const unsigned Reg,
                                      const unsigned Alignment,
                                      const bool MustBeSingleInstruction) {
-  const ARMSubtarget &AST = MF.getTarget().getSubtarget<ARMSubtarget>();
+  const ARMSubtarget &AST =
+      static_cast<const ARMSubtarget &>(MF.getSubtarget());
   const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
   const unsigned AlignMask = Alignment - 1;
   const unsigned NrBitsToZero = countTrailingZeros(Alignment);
@@ -273,8 +286,9 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
   }
 }
 
-void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();
+void ARMFrameLowering::emitPrologue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+  assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented");
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo  *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -282,16 +296,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   MCContext &Context = MMI.getContext();
   const TargetMachine &TM = MF.getTarget();
   const MCRegisterInfo *MRI = Context.getRegisterInfo();
-  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
-      TM.getSubtargetImpl()->getRegisterInfo());
-  const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
-                                    TM.getSubtargetImpl()->getInstrInfo());
+  const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
+  const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
   assert(!AFI->isThumb1OnlyFunction() &&
          "This emitPrologue does not support Thumb1!");
   bool isARM = !AFI->isThumbFunction();
-  unsigned Align =
-      TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
-  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+  unsigned Align = STI.getFrameLowering()->getStackAlignment();
+  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
   unsigned NumBytes = MFI->getStackSize();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -309,6 +320,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
     return;
 
   StackAdjustingInsts DefCFAOffsetCandidates;
+  bool HasFP = hasFP(MF);
 
   // Allocate the vararg register save area.
   if (ArgRegsSaveSize) {
@@ -325,6 +337,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       DefCFAOffsetCandidates.addInst(std::prev(MBBI),
                                      NumBytes - ArgRegsSaveSize, true);
     }
+    DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
     return;
   }
 
@@ -373,7 +386,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   }
 
   // Determine starting offsets of spill areas.
-  bool HasFP = hasFP(MF);
   unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
   unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
   unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
@@ -740,11 +752,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
          "This emitEpilogue does not support Thumb1!");
   bool isARM = !AFI->isThumbFunction();
 
-  unsigned Align = MF.getTarget()
-                       .getSubtargetImpl()
-                       ->getFrameLowering()
-                       ->getStackAlignment();
-  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
   int NumBytes = (int)MFI->getStackSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
 
@@ -1468,25 +1476,20 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
     return;
 
   // Naked functions don't spill callee-saved registers.
-  if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                                     Attribute::Naked))
+  if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
     return;
 
   // We are planning to use NEON instructions vst1 / vld1.
-  if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON())
+  if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
     return;
 
   // Don't bother if the default stack alignment is sufficiently high.
-  if (MF.getTarget()
-          .getSubtargetImpl()
-          ->getFrameLowering()
-          ->getStackAlignment() >= 8)
+  if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
     return;
 
   // Aligned spills require stack realignment.
-  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
-      MF.getSubtarget().getRegisterInfo());
-  if (!RegInfo->canRealignStack(MF))
+  if (!static_cast<const ARMBaseRegisterInfo *>(
+           MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
     return;
 
   // We always spill contiguous d-registers starting from d8. Count how many
@@ -1694,8 +1697,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
       if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
         for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
           unsigned Reg = UnspilledCS1GPRs[i];
-          // Don't spill high register if the function is thumb1
-          if (!AFI->isThumb1OnlyFunction() ||
+          // Don't spill high register if the function is thumb
+          if (!AFI->isThumbFunction() ||
               isARMLowRegister(Reg) || Reg == ARM::LR) {
             MRI.setPhysRegUsed(Reg);
             if (!MRI.isReserved(Reg))
@@ -1867,10 +1870,11 @@ static const uint64_t kSplitStackAvailable = 256;
 // ARM can be found at [1].
 //
 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
-void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
+void ARMFrameLowering::adjustForSegmentedStacks(
+    MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
   unsigned Opcode;
   unsigned CFIIndex;
-  const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>();
+  const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
   bool Thumb = ST->isThumb();
 
   // Sadly, this currently doesn't support varargs, platforms other than
@@ -1880,7 +1884,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   if (!ST->isTargetAndroid() && !ST->isTargetLinux())
     report_fatal_error("Segmented stacks not supported on this platform.");
 
-  MachineBasicBlock &prologueMBB = MF.front();
+  assert(&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented");
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   MCContext &Context = MMI.getContext();
@@ -1908,8 +1912,8 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
   MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
 
-  for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
-                                          e = prologueMBB.livein_end();
+  for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
+                                          e = PrologueMBB.livein_end();
        i != e; ++i) {
     AllocMBB->addLiveIn(*i);
     GetMBB->addLiveIn(*i);
@@ -2162,7 +2166,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
       .addCFIIndex(CFIIndex);
 
   // Organizing MBB lists
-  PostStackMBB->addSuccessor(&prologueMBB);
+  PostStackMBB->addSuccessor(&PrologueMBB);
 
   AllocMBB->addSuccessor(PostStackMBB);
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
index b7be436..d763d17 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -28,7 +28,7 @@ public:
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
-  void emitPrologue(MachineFunction &MF) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
   void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const;
@@ -43,6 +43,8 @@ public:
                                   const std::vector<CalleeSavedInfo> &CSI,
                                   const TargetRegisterInfo *TRI) const override;
 
+  bool noFramePointerElim(const MachineFunction &MF) const override;
+
   bool hasFP(const MachineFunction &MF) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
   bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
@@ -55,7 +57,8 @@ public:
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                             RegScavenger *RS) const override;
 
-  void adjustForSegmentedStacks(MachineFunction &MF) const override;
+  void adjustForSegmentedStacks(MachineFunction &MF,
+                                MachineBasicBlock &MBB) const override;
 
  private:
   void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
index 0e4f81c..0157c0a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -44,16 +44,14 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
     if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
       MachineInstr *DefMI = LastMI;
       const MCInstrDesc &LastMCID = LastMI->getDesc();
-      const TargetMachine &TM =
-        MI->getParent()->getParent()->getTarget();
+      const MachineFunction *MF = MI->getParent()->getParent();
       const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
-                                        TM.getSubtargetImpl()->getInstrInfo());
+                                        MF->getSubtarget().getInstrInfo());
 
       // Skip over one non-VFP / NEON instruction.
       if (!LastMI->isBarrier() &&
           // On A9, AGU and NEON/FPU are muxed.
-          !(TII.getSubtarget().isLikeA9() &&
-            (LastMI->mayLoad() || LastMI->mayStore())) &&
+          !(TII.getSubtarget().isLikeA9() && LastMI->mayLoadOrStore()) &&
           (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
         MachineBasicBlock::iterator I = LastMI;
         if (I != LastMI->getParent()->begin()) {
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9621743..4405625e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -70,7 +70,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override {
     // Reset the subtarget each time through.
-    Subtarget = &MF.getTarget().getSubtarget<ARMSubtarget>();
+    Subtarget = &MF.getSubtarget<ARMSubtarget>();
     SelectionDAGISel::runOnMachineFunction(MF);
     return true;
   }
@@ -83,8 +83,8 @@ public:
 
   /// getI32Imm - Return a target constant of type i32 with the specified
   /// value.
-  inline SDValue getI32Imm(unsigned Imm) {
-    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
+    return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
   }
 
   SDNode *Select(SDNode *N) override;
@@ -134,7 +134,7 @@ public:
 
   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
-    Pred = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+    Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
     return true;
   }
@@ -257,7 +257,7 @@ private:
 
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
-  bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
 
   // Form pairs of consecutive R, S, D, or Q registers.
@@ -272,7 +272,8 @@ private:
   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 
   // Get the alignment operand for a NEON VLD or VST instruction.
-  SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
+  SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
+                        bool is64BitVector);
 };
 }
 
@@ -394,11 +395,13 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
     // Now make the transformation.
     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
                           Srl.getOperand(0),
-                          CurDAG->getConstant(Srl_imm+TZ, MVT::i32));
+                          CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
+                                              MVT::i32));
     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
-                         Srl, CurDAG->getConstant(And_imm, MVT::i32));
+                         Srl,
+                         CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
-                         N1, CurDAG->getConstant(TZ, MVT::i32));
+                         N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
     CurDAG->UpdateNodeOperands(N, N0, N1);
   }
 }
@@ -483,7 +486,7 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
   if (!RHS) return false;
   ShImmVal = RHS->getZExtValue() & 31;
   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
-                                  MVT::i32);
+                                  SDLoc(N), MVT::i32);
   return true;
 }
 
@@ -510,7 +513,7 @@ bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
     return false;
   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
-                                  MVT::i32);
+                                  SDLoc(N), MVT::i32);
   return true;
 }
 
@@ -527,7 +530,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
       // Match frame index.
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
-      OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+      OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
       return true;
     }
 
@@ -536,7 +539,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
       Base = N.getOperand(0);
     } else
       Base = N;
-    OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+    OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
     return true;
   }
 
@@ -551,14 +554,14 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
         Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
       }
-      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
       return true;
     }
   }
 
   // Base only.
   Base = N;
-  OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   return true;
 }
 
@@ -583,7 +586,7 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
           Base = Offset = N.getOperand(0);
           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
                                                             ARM_AM::lsl),
-                                          MVT::i32);
+                                          SDLoc(N), MVT::i32);
           return true;
         }
       }
@@ -654,7 +657,7 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
   }
 
   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
-                                  MVT::i32);
+                                  SDLoc(N), MVT::i32);
   return true;
 }
 
@@ -682,7 +685,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
           Base = Offset = N.getOperand(0);
           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
                                                             ARM_AM::lsl),
-                                          MVT::i32);
+                                          SDLoc(N), MVT::i32);
           return AM2_SHOP;
         }
       }
@@ -703,7 +706,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
     Offset = CurDAG->getRegister(0, MVT::i32);
     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
                                                       ARM_AM::no_shift),
-                                    MVT::i32);
+                                    SDLoc(N), MVT::i32);
     return AM2_BASE;
   }
 
@@ -726,7 +729,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
       }
       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
                                                         ARM_AM::no_shift),
-                                      MVT::i32);
+                                      SDLoc(N), MVT::i32);
       return AM2_BASE;
     }
   }
@@ -737,7 +740,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
     Offset = CurDAG->getRegister(0, MVT::i32);
     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
                                                       ARM_AM::no_shift),
-                                    MVT::i32);
+                                    SDLoc(N), MVT::i32);
     return AM2_BASE;
   }
 
@@ -792,7 +795,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
   }
 
   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
-                                  MVT::i32);
+                                  SDLoc(N), MVT::i32);
   return AM2_SHOP;
 }
 
@@ -828,7 +831,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
   }
 
   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
-                                  MVT::i32);
+                                  SDLoc(N), MVT::i32);
   return true;
 }
 
@@ -844,7 +847,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
     if (AddSub == ARM_AM::sub) Val *= -1;
     Offset = CurDAG->getRegister(0, MVT::i32);
-    Opc = CurDAG->getTargetConstant(Val, MVT::i32);
+    Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
     return true;
   }
 
@@ -865,7 +868,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
     Offset = CurDAG->getRegister(0, MVT::i32);
     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
                                                       ARM_AM::no_shift),
-                                    MVT::i32);
+                                    SDLoc(Op), MVT::i32);
     return true;
   }
 
@@ -884,7 +887,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
     // X - C  is canonicalize to X + -C, no need to handle it here.
     Base = N.getOperand(0);
     Offset = N.getOperand(1);
-    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
+                                    MVT::i32);
     return true;
   }
 
@@ -895,7 +899,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
     }
     Offset = CurDAG->getRegister(0, MVT::i32);
-    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
+                                    MVT::i32);
     return true;
   }
 
@@ -915,13 +920,15 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
       AddSub = ARM_AM::sub;
       RHSC = -RHSC;
     }
-    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
+                                    MVT::i32);
     return true;
   }
 
   Base = N.getOperand(0);
   Offset = N.getOperand(1);
-  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
+                                  MVT::i32);
   return true;
 }
 
@@ -936,12 +943,14 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
   int Val;
   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
     Offset = CurDAG->getRegister(0, MVT::i32);
-    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
+                                    MVT::i32);
     return true;
   }
 
   Offset = N;
-  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
+                                  MVT::i32);
   return true;
 }
 
@@ -957,7 +966,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
       Base = N.getOperand(0);
     }
     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
-                                       MVT::i32);
+                                       SDLoc(N), MVT::i32);
     return true;
   }
 
@@ -977,13 +986,13 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
       RHSC = -RHSC;
     }
     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
-                                       MVT::i32);
+                                       SDLoc(N), MVT::i32);
     return true;
   }
 
   Base = N;
   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
-                                     MVT::i32);
+                                     SDLoc(N), MVT::i32);
   return true;
 }
 
@@ -992,21 +1001,27 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
   Addr = N;
 
   unsigned Alignment = 0;
-  if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
+
+  MemSDNode *MemN = cast<MemSDNode>(Parent);
+
+  if (isa<LSBaseSDNode>(MemN) ||
+      ((MemN->getOpcode() == ARMISD::VST1_UPD ||
+        MemN->getOpcode() == ARMISD::VLD1_UPD) &&
+       MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
     // The maximum alignment is equal to the memory size being referenced.
-    unsigned LSNAlign = LSN->getAlignment();
-    unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
-    if (LSNAlign >= MemSize && MemSize > 1)
+    unsigned MMOAlign = MemN->getAlignment();
+    unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
+    if (MMOAlign >= MemSize && MemSize > 1)
       Alignment = MemSize;
   } else {
     // All other uses of addrmode6 are for intrinsics.  For now just record
     // the raw alignment value; it will be refined later based on the legal
     // alignment operands for the intrinsic.
-    Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
+    Alignment = MemN->getAlignment();
   }
 
-  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+  Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
   return true;
 }
 
@@ -1030,7 +1045,7 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
     Offset = N.getOperand(0);
     SDValue N1 = N.getOperand(1);
     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
-                                      MVT::i32);
+                                      SDLoc(N), MVT::i32);
     return true;
   }
 
@@ -1135,7 +1150,7 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
       Base = N;
     }
 
-    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
     return true;
   }
 
@@ -1152,7 +1167,7 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
     if (LHSC != 0 || RHSC != 0) return false;
 
     Base = N;
-    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
     return true;
   }
 
@@ -1160,12 +1175,12 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
   int RHSC;
   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
     Base = N.getOperand(0);
-    OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+    OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
     return true;
   }
 
   Base = N.getOperand(0);
-  OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   return true;
 }
 
@@ -1191,8 +1206,13 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
                                             SDValue &Base, SDValue &OffImm) {
   if (N.getOpcode() == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    // Only multiples of 4 are allowed for the offset, so the frame object
+    // alignment must be at least 4.
+    MachineFrameInfo *MFI = MF->getFrameInfo();
+    if (MFI->getObjectAlignment(FI) < 4)
+      MFI->setObjectAlignment(FI, 4);
     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
-    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
     return true;
   }
 
@@ -1208,9 +1228,14 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
       Base = N.getOperand(0);
       if (Base.getOpcode() == ISD::FrameIndex) {
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        // For LHS+RHS to result in an offset that's a multiple of 4 the object
+        // indexed by the LHS must be 4-byte aligned.
+        MachineFrameInfo *MFI = MF->getFrameInfo();
+        if (MFI->getObjectAlignment(FI) < 4)
+          MFI->setObjectAlignment(FI, 4);
         Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
       }
-      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
       return true;
     }
   }
@@ -1239,7 +1264,7 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
   unsigned ShImmVal = 0;
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
     ShImmVal = RHS->getZExtValue() & 31;
-    Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
+    Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), SDLoc(N));
     return true;
   }
 
@@ -1257,7 +1282,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
       // Match frame index.
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
-      OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+      OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
       return true;
     }
 
@@ -1268,7 +1293,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
         return false;  // We want to select t2LDRpci instead.
     } else
       Base = N;
-    OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+    OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
     return true;
   }
 
@@ -1287,14 +1312,14 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
         Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
       }
-      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
       return true;
     }
   }
 
   // Base only.
   Base = N;
-  OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   return true;
 }
 
@@ -1316,7 +1341,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
         Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
       }
-      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
       return true;
     }
   }
@@ -1333,8 +1358,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
   int RHSC;
   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
-      ? CurDAG->getTargetConstant(RHSC, MVT::i32)
-      : CurDAG->getTargetConstant(-RHSC, MVT::i32);
+      ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
+      : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
     return true;
   }
 
@@ -1383,7 +1408,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
     }
   }
 
-  ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
+  ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
 
   return true;
 }
@@ -1393,7 +1418,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
   // This *must* succeed since it's used for the irreplaceable ldrex and strex
   // instructions.
   Base = N;
-  OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 
   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
     return true;
@@ -1412,15 +1437,15 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
   }
 
-  OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32);
+  OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
   return true;
 }
 
 //===--------------------------------------------------------------------===//
 
 /// getAL - Returns a ARMCC::AL immediate node.
-static inline SDValue getAL(SelectionDAG *CurDAG) {
-  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
+static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) {
+  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
 }
 
 SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
@@ -1479,14 +1504,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
       SDValue Chain = LD->getChain();
       SDValue Base = LD->getBasePtr();
-      SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
+      SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
                        CurDAG->getRegister(0, MVT::i32), Chain };
       return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
                                     MVT::i32, MVT::Other, Ops);
     } else {
       SDValue Chain = LD->getChain();
       SDValue Base = LD->getBasePtr();
-      SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+      SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
                        CurDAG->getRegister(0, MVT::i32), Chain };
       return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
                                     MVT::i32, MVT::Other, Ops);
@@ -1535,7 +1560,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
   if (Match) {
     SDValue Chain = LD->getChain();
     SDValue Base = LD->getBasePtr();
-    SDValue Ops[]= { Base, Offset, getAL(CurDAG),
+    SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
                      CurDAG->getRegister(0, MVT::i32), Chain };
     return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
                                   MVT::Other, Ops);
@@ -1548,9 +1573,9 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDLoc dl(V0.getNode());
   SDValue RegClass =
-    CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32);
-  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
-  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
+    CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
@@ -1559,9 +1584,9 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDLoc dl(V0.getNode());
   SDValue RegClass =
-    CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
-  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
-  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+    CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
@@ -1569,9 +1594,10 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
 /// \brief Form a quad register from a pair of D registers.
 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDLoc dl(V0.getNode());
-  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
-  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
-  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
+                                               MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
@@ -1579,9 +1605,10 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
 /// \brief Form 4 consecutive D registers from a pair of Q registers.
 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDLoc dl(V0.getNode());
-  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
-  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
-  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
+                                               MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
@@ -1591,11 +1618,11 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
                                    SDValue V2, SDValue V3) {
   SDLoc dl(V0.getNode());
   SDValue RegClass =
-    CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32);
-  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
-  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
-  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
-  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
+    CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
+  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
+  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
                                     V2, SubReg2, V3, SubReg3 };
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
@@ -1605,11 +1632,12 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
                                    SDValue V2, SDValue V3) {
   SDLoc dl(V0.getNode());
-  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
-  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
-  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
-  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
-  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
+                                               MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
+  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
+  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
                                     V2, SubReg2, V3, SubReg3 };
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
@@ -1619,11 +1647,12 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
                                    SDValue V2, SDValue V3) {
   SDLoc dl(V0.getNode());
-  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
-  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
-  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
-  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
-  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
+                                               MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
+  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
+  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
                                     V2, SubReg2, V3, SubReg3 };
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
@@ -1632,8 +1661,8 @@ SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
 /// of a NEON VLD or VST instruction.  The supported values depend on the
 /// number of registers being loaded.
-SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
-                                       bool is64BitVector) {
+SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl,
+                                       unsigned NumVecs, bool is64BitVector) {
   unsigned NumRegs = NumVecs;
   if (!is64BitVector && NumVecs < 3)
     NumRegs *= 2;
@@ -1648,7 +1677,7 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
   else
     Alignment = 0;
 
-  return CurDAG->getTargetConstant(Alignment, MVT::i32);
+  return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
 }
 
 static bool isVLDfixed(unsigned Opc)
@@ -1768,7 +1797,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
   SDValue Chain = N->getOperand(0);
   EVT VT = N->getValueType(0);
   bool is64BitVector = VT.is64BitVector();
-  Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
 
   unsigned OpcodeIndex;
   switch (VT.getSimpleVT().SimpleTy) {
@@ -1805,7 +1834,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
     ResTys.push_back(MVT::i32);
   ResTys.push_back(MVT::Other);
 
-  SDValue Pred = getAL(CurDAG);
+  SDValue Pred = getAL(CurDAG, dl);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   SDNode *VLd;
   SmallVector<SDValue, 7> Ops;
@@ -1905,7 +1934,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   SDValue Chain = N->getOperand(0);
   EVT VT = N->getOperand(Vec0Idx).getValueType();
   bool is64BitVector = VT.is64BitVector();
-  Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
 
   unsigned OpcodeIndex;
   switch (VT.getSimpleVT().SimpleTy) {
@@ -1932,7 +1961,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
     ResTys.push_back(MVT::i32);
   ResTys.push_back(MVT::Other);
 
-  SDValue Pred = getAL(CurDAG);
+  SDValue Pred = getAL(CurDAG, dl);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   SmallVector<SDValue, 7> Ops;
 
@@ -2068,7 +2097,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
     if (Alignment == 1)
       Alignment = 0;
   }
-  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
 
   unsigned OpcodeIndex;
   switch (VT.getSimpleVT().SimpleTy) {
@@ -2096,7 +2125,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
     ResTys.push_back(MVT::i32);
   ResTys.push_back(MVT::Other);
 
-  SDValue Pred = getAL(CurDAG);
+  SDValue Pred = getAL(CurDAG, dl);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
 
   SmallVector<SDValue, 8> Ops;
@@ -2126,7 +2155,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
   }
   Ops.push_back(SuperReg);
-  Ops.push_back(getI32Imm(Lane));
+  Ops.push_back(getI32Imm(Lane, dl));
   Ops.push_back(Pred);
   Ops.push_back(Reg0);
   Ops.push_back(Chain);
@@ -2181,7 +2210,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
     if (Alignment == 1)
       Alignment = 0;
   }
-  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
 
   unsigned OpcodeIndex;
   switch (VT.getSimpleVT().SimpleTy) {
@@ -2192,7 +2221,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
   case MVT::v2i32: OpcodeIndex = 2; break;
   }
 
-  SDValue Pred = getAL(CurDAG);
+  SDValue Pred = getAL(CurDAG, dl);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   SDValue SuperReg;
   unsigned Opc = Opcodes[OpcodeIndex];
@@ -2263,7 +2292,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
     Ops.push_back(N->getOperand(1));
   Ops.push_back(RegSeq);
   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
-  Ops.push_back(getAL(CurDAG)); // predicate
+  Ops.push_back(getAL(CurDAG, dl)); // predicate
   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
   return CurDAG->getMachineNode(Opc, dl, VT, Ops);
 }
@@ -2276,6 +2305,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
   unsigned Opc = isSigned
     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
+  SDLoc dl(N);
 
   // For unsigned extracts, check for a shift right and mask
   unsigned And_imm = 0;
@@ -2292,7 +2322,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
 
         // Note: The width operand is encoded as width-1.
-        unsigned Width = CountTrailingOnes_32(And_imm) - 1;
+        unsigned Width = countTrailingOnes(And_imm) - 1;
         unsigned LSB = Srl_imm;
 
         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
@@ -2302,25 +2332,25 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
           if (Subtarget->isThumb()) {
             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
             SDValue Ops[] = { N->getOperand(0).getOperand(0),
-                              CurDAG->getTargetConstant(LSB, MVT::i32),
-                              getAL(CurDAG), Reg0, Reg0 };
+                              CurDAG->getTargetConstant(LSB, dl, MVT::i32),
+                              getAL(CurDAG, dl), Reg0, Reg0 };
             return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
           }
 
           // ARM models shift instructions as MOVsi with shifter operand.
           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
           SDValue ShOpc =
-            CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB),
+            CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
                                       MVT::i32);
           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
-                            getAL(CurDAG), Reg0, Reg0 };
+                            getAL(CurDAG, dl), Reg0, Reg0 };
           return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
         }
 
         SDValue Ops[] = { N->getOperand(0).getOperand(0),
-                          CurDAG->getTargetConstant(LSB, MVT::i32),
-                          CurDAG->getTargetConstant(Width, MVT::i32),
-                          getAL(CurDAG), Reg0 };
+                          CurDAG->getTargetConstant(LSB, dl, MVT::i32),
+                          CurDAG->getTargetConstant(Width, dl, MVT::i32),
+                          getAL(CurDAG, dl), Reg0 };
         return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
       }
     }
@@ -2341,9 +2371,9 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
         return nullptr;
       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
       SDValue Ops[] = { N->getOperand(0).getOperand(0),
-                        CurDAG->getTargetConstant(LSB, MVT::i32),
-                        CurDAG->getTargetConstant(Width, MVT::i32),
-                        getAL(CurDAG), Reg0 };
+                        CurDAG->getTargetConstant(LSB, dl, MVT::i32),
+                        CurDAG->getTargetConstant(Width, dl, MVT::i32),
+                        getAL(CurDAG, dl), Reg0 };
       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
     }
   }
@@ -2360,9 +2390,9 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
 
     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0).getOperand(0),
-                      CurDAG->getTargetConstant(LSB, MVT::i32),
-                      CurDAG->getTargetConstant(Width - 1, MVT::i32),
-                      getAL(CurDAG), Reg0 };
+                      CurDAG->getTargetConstant(LSB, dl, MVT::i32),
+                      CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
+                      getAL(CurDAG, dl), Reg0 };
     return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
   }
 
@@ -2468,7 +2498,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
 
       SDNode *ResNode;
       if (Subtarget->isThumb()) {
-        SDValue Pred = getAL(CurDAG);
+        SDValue Pred = getAL(CurDAG, dl);
         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
@@ -2476,8 +2506,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       } else {
         SDValue Ops[] = {
           CPIdx,
-          CurDAG->getTargetConstant(0, MVT::i32),
-          getAL(CurDAG),
+          CurDAG->getTargetConstant(0, dl, MVT::i32),
+          getAL(CurDAG, dl),
           CurDAG->getRegister(0, MVT::i32),
           CurDAG->getEntryNode()
         };
@@ -2496,13 +2526,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
     SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
     if (Subtarget->isThumb1Only()) {
+      // Set the alignment of the frame object to 4, to avoid having to generate
+      // more than one ADD
+      MachineFrameInfo *MFI = MF->getFrameInfo();
+      if (MFI->getObjectAlignment(FI) < 4)
+        MFI->setObjectAlignment(FI, 4);
       return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
-                                  CurDAG->getTargetConstant(0, MVT::i32));
+                                  CurDAG->getTargetConstant(0, dl, MVT::i32));
     } else {
       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
                       ARM::t2ADDri : ARM::ADDri);
-      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
-                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
+                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
     }
@@ -2528,13 +2563,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
           break;
         SDValue V = N->getOperand(0);
         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
-        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
         if (Subtarget->isThumb()) {
-          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
           return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
         } else {
-          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
+                            Reg0 };
           return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
         }
       }
@@ -2544,13 +2580,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
           break;
         SDValue V = N->getOperand(0);
         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
-        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
         if (Subtarget->isThumb()) {
-          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
           return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
         } else {
-          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
+                            Reg0 };
           return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
         }
       }
@@ -2589,9 +2626,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
           (N1CVal & 0xffffU) == 0xffffU &&
           (N2CVal & 0xffffU) == 0x0U) {
         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
-                                                  MVT::i32);
+                                                  dl, MVT::i32);
         SDValue Ops[] = { N0.getOperand(0), Imm16,
-                          getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+                          getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
         return CurDAG->getMachineNode(Opc, dl, VT, Ops);
       }
     }
@@ -2599,18 +2636,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
   case ARMISD::VMOVRRD:
     return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
-                                  N->getOperand(0), getAL(CurDAG),
+                                  N->getOperand(0), getAL(CurDAG, dl),
                                   CurDAG->getRegister(0, MVT::i32));
   case ISD::UMUL_LOHI: {
     if (Subtarget->isThumb1Only())
       break;
     if (Subtarget->isThumb()) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
-                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
     } else {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
-                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                     ARM::UMULL : ARM::UMULLv5,
@@ -2622,11 +2659,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       break;
     if (Subtarget->isThumb()) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
-                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
     } else {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
-                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                     ARM::SMULL : ARM::SMULLv5,
@@ -2636,12 +2673,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   case ARMISD::UMLAL:{
     if (Subtarget->isThumb()) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
-                        N->getOperand(3), getAL(CurDAG),
+                        N->getOperand(3), getAL(CurDAG, dl),
                         CurDAG->getRegister(0, MVT::i32)};
       return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
     }else{
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
-                        N->getOperand(3), getAL(CurDAG),
+                        N->getOperand(3), getAL(CurDAG, dl),
                         CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
@@ -2652,12 +2689,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   case ARMISD::SMLAL:{
     if (Subtarget->isThumb()) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
-                        N->getOperand(3), getAL(CurDAG),
+                        N->getOperand(3), getAL(CurDAG, dl),
                         CurDAG->getRegister(0, MVT::i32)};
       return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
     }else{
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
-                        N->getOperand(3), getAL(CurDAG),
+                        N->getOperand(3), getAL(CurDAG, dl),
                         CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
@@ -2701,7 +2738,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     assert(N3.getOpcode() == ISD::Register);
 
     SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
-                               cast<ConstantSDNode>(N2)->getZExtValue()),
+                               cast<ConstantSDNode>(N2)->getZExtValue()), dl,
                                MVT::i32);
     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
@@ -2730,7 +2767,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VZIPq32; break;
     }
-    SDValue Pred = getAL(CurDAG);
+    SDValue Pred = getAL(CurDAG, dl);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
@@ -2750,7 +2787,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VUZPq32; break;
     }
-    SDValue Pred = getAL(CurDAG);
+    SDValue Pred = getAL(CurDAG, dl);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
@@ -2769,7 +2806,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VTRNq32; break;
     }
-    SDValue Pred = getAL(CurDAG);
+    SDValue Pred = getAL(CurDAG, dl);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
@@ -3017,7 +3054,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       // Place arguments in the right order.
       SmallVector<SDValue, 7> Ops;
       Ops.push_back(MemAddr);
-      Ops.push_back(getAL(CurDAG));
+      Ops.push_back(getAL(CurDAG, dl));
       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
       Ops.push_back(Chain);
       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
@@ -3033,7 +3070,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
         if (isThumb)
           Result = SDValue(Ld, 0);
         else {
-          SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
+          SDValue SubRegIdx =
+            CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
           Result = SDValue(ResNode,0);
@@ -3045,7 +3083,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
         if (isThumb)
           Result = SDValue(Ld, 1);
         else {
-          SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
+          SDValue SubRegIdx =
+            CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
           Result = SDValue(ResNode,0);
@@ -3065,7 +3104,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
 
       // Store exclusive double return a i32 value which is the return status
       // of the issued store.
-      EVT ResTys[] = { MVT::i32, MVT::Other };
+      const EVT ResTys[] = {MVT::i32, MVT::Other};
 
       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
       // Place arguments in the right order.
@@ -3077,7 +3116,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
         // arm_strexd uses GPRPair.
         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
       Ops.push_back(MemAddr);
-      Ops.push_back(getAL(CurDAG));
+      Ops.push_back(getAL(CurDAG, dl));
       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
       Ops.push_back(Chain);
 
@@ -3269,7 +3308,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
 
     Ops.push_back(N->getOperand(0));
     Ops.push_back(N->getOperand(1));
-    Ops.push_back(getAL(CurDAG));                    // Predicate
+    Ops.push_back(getAL(CurDAG, dl));                // Predicate
     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
     return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
   }
@@ -3285,7 +3324,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     SmallVector<SDValue, 6> Ops;
     Ops.push_back(RegSeq);
     Ops.push_back(N->getOperand(2));
-    Ops.push_back(getAL(CurDAG));                    // Predicate
+    Ops.push_back(getAL(CurDAG, dl));                // Predicate
     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
     return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
   }
@@ -3430,7 +3469,7 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
       // Replace the current flag.
       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
-          Flag, MVT::i32);
+          Flag, dl, MVT::i32);
       // Add the new register node and skip the original two GPRs.
       AsmNodeOperands.push_back(PairedReg);
       // Skip the next two GPRs.
@@ -3451,9 +3490,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
 
 
 bool ARMDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                              std::vector<SDValue> &OutOps) {
-  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+  assert(ConstraintID == InlineAsm::Constraint_m &&
+         "unexpected asm memory constraint");
   // Require the address to be in a register.  That is safe for all ARM
   // variants and it is hard to do anything much smarter without knowing
   // how the operand is used.
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index a3a76e6..629cc90 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -23,6 +23,7 @@
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -40,6 +41,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Type.h"
 #include "llvm/MC/MCSectionMachO.h"
@@ -47,6 +49,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
 #include <utility>
 using namespace llvm;
@@ -156,18 +159,18 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
 }
 
-ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
-    : TargetLowering(TM) {
-  Subtarget = &TM.getSubtarget<ARMSubtarget>();
-  RegInfo = TM.getSubtargetImpl()->getRegisterInfo();
-  Itins = TM.getSubtargetImpl()->getInstrItineraryData();
+ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
+                                     const ARMSubtarget &STI)
+    : TargetLowering(TM), Subtarget(&STI) {
+  RegInfo = Subtarget->getRegisterInfo();
+  Itins = Subtarget->getInstrItineraryData();
 
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
   if (Subtarget->isTargetMachO()) {
     // Uses VFP for Thumb libfuncs if available.
     if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
-        Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) {
+        Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
       // Single-precision floating-point arithmetic.
       setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
       setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
@@ -398,7 +401,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
     addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
   else
     addRegisterClass(MVT::i32, &ARM::GPRRegClass);
-  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
       !Subtarget->isThumb1Only()) {
     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
     addRegisterClass(MVT::f64, &ARM::DPRRegClass);
@@ -565,16 +568,15 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
     setTargetDAGCombine(ISD::FP_TO_SINT);
     setTargetDAGCombine(ISD::FP_TO_UINT);
     setTargetDAGCombine(ISD::FDIV);
+    setTargetDAGCombine(ISD::LOAD);
 
     // It is legal to extload from v4i8 to v4i16 or v4i32.
-    MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
-                  MVT::v4i16, MVT::v2i16,
-                  MVT::v2i32};
-    for (unsigned i = 0; i < 6; ++i) {
+    for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
+                   MVT::v2i32}) {
       for (MVT VT : MVT::integer_vector_valuetypes()) {
-        setLoadExtAction(ISD::EXTLOAD, VT, Tys[i], Legal);
-        setLoadExtAction(ISD::ZEXTLOAD, VT, Tys[i], Legal);
-        setLoadExtAction(ISD::SEXTLOAD, VT, Tys[i], Legal);
+        setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
+        setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
+        setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
       }
     }
   }
@@ -613,11 +615,17 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
     setOperationAction(ISD::FRINT,      MVT::f64, Expand);
     setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
     setOperationAction(ISD::FFLOOR,     MVT::f64, Expand);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
     setOperationAction(ISD::FP_ROUND,   MVT::f32, Custom);
     setOperationAction(ISD::FP_EXTEND,  MVT::f64, Custom);
   }
 
-  computeRegisterProperties();
+  computeRegisterProperties(Subtarget->getRegisterInfo());
 
   // ARM does not have floating-point extending loads.
   for (MVT VT : MVT::fp_valuetypes()) {
@@ -812,7 +820,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
   }
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
-  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
       !Subtarget->isThumb1Only()) {
     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
     // iff target supports vfp2.
@@ -853,7 +861,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
   setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
   setOperationAction(ISD::FREM,      MVT::f64, Expand);
   setOperationAction(ISD::FREM,      MVT::f32, Expand);
-  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
       !Subtarget->isThumb1Only()) {
     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
@@ -867,15 +875,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
   }
 
   // Various VFP goodness
-  if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
-    // int <-> fp are custom expanded into bit_convert + ARMISD ops.
-    if (Subtarget->hasVFP2()) {
-      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
-      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
-      setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
-    }
-
+  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
     // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
     if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
       setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
@@ -932,7 +932,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
 
   setStackPointerRegisterToSaveRestore(ARM::SP);
 
-  if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
+  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
       !Subtarget->hasVFP2())
     setSchedulingPreference(Sched::RegPressure);
   else
@@ -956,6 +956,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
   setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
 }
 
+bool ARMTargetLowering::useSoftFloat() const {
+  return Subtarget->useSoftFloat();
+}
+
 // FIXME: It might make sense to define the representative register class as the
 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
@@ -966,13 +970,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
 // of the difficulty prior to coalescing of modeling operand register classes
 // due to the common occurrence of cross class copies and subregister insertions
 // and extractions.
-std::pair<const TargetRegisterClass*, uint8_t>
-ARMTargetLowering::findRepresentativeClass(MVT VT) const{
+std::pair<const TargetRegisterClass *, uint8_t>
+ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
+                                           MVT VT) const {
   const TargetRegisterClass *RRC = nullptr;
   uint8_t Cost = 1;
   switch (VT.SimpleTy) {
   default:
-    return TargetLowering::findRepresentativeClass(VT);
+    return TargetLowering::findRepresentativeClass(TRI, VT);
   // Use DPR as representative register class for all floating point
   // and vector types. Since there are 32 SPR registers and 32 DPR registers so
   // the cost is 1 for both f32 and f64.
@@ -1004,11 +1009,12 @@ ARMTargetLowering::findRepresentativeClass(MVT VT) const{
 }
 
 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default: return nullptr;
+  switch ((ARMISD::NodeType)Opcode) {
+  case ARMISD::FIRST_NUMBER:  break;
   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
   case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
+  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
   case ARMISD::CALL:          return "ARMISD::CALL";
   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
@@ -1031,11 +1037,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
 
   case ARMISD::RBIT:          return "ARMISD::RBIT";
 
-  case ARMISD::FTOSI:         return "ARMISD::FTOSI";
-  case ARMISD::FTOUI:         return "ARMISD::FTOUI";
-  case ARMISD::SITOF:         return "ARMISD::SITOF";
-  case ARMISD::UITOF:         return "ARMISD::UITOF";
-
   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
   case ARMISD::RRX:           return "ARMISD::RRX";
@@ -1090,6 +1091,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
   case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
+  case ARMISD::VSLI:          return "ARMISD::VSLI";
+  case ARMISD::VSRI:          return "ARMISD::VSRI";
   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
@@ -1140,6 +1143,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
   case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
   }
+  return nullptr;
 }
 
 EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
@@ -1162,6 +1166,20 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
   return TargetLowering::getRegClassFor(VT);
 }
 
+// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
+// source/dest is aligned and the copy size is large enough. We therefore want
+// to align such objects passed to memory intrinsics.
+bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+                                               unsigned &PrefAlign) const {
+  if (!isa<MemIntrinsic>(CI))
+    return false;
+  MinSize = 8;
+  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
+  // cycle faster than 4-byte aligned LDM.
+  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
+  return true;
+}
+
 // Create a fast isel object.
 FastISel *
 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
@@ -1169,12 +1187,6 @@ ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
   return ARM::createFastISel(funcInfo, libInfo);
 }
 
-/// getMaximalGlobalOffset - Returns the maximal possible offset which can
-/// be used for loads / stores from the global.
-unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
-  return (Subtarget->isThumb1Only() ? 127 : 4095);
-}
-
 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
   unsigned NumVals = N->getNumValues();
   if (!NumVals)
@@ -1193,8 +1205,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
 
   // Load are scheduled for latency even if there instruction itinerary
   // is not available.
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
 
   if (MCID.getNumDefs() == 0)
@@ -1369,7 +1380,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
       if (VA.getLocVT() == MVT::v2f64) {
         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
-                          DAG.getConstant(0, MVT::i32));
+                          DAG.getConstant(0, dl, MVT::i32));
 
         VA = RVLocs[++i]; // skip ahead to next loc
         Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
@@ -1383,7 +1394,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
           std::swap (Lo, Hi);
         Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
-                          DAG.getConstant(1, MVT::i32));
+                          DAG.getConstant(1, dl, MVT::i32));
       }
     } else {
       Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
@@ -1414,7 +1425,7 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
                                     const CCValAssign &VA,
                                     ISD::ArgFlagsTy Flags) const {
   unsigned LocMemOffset = VA.getLocMemOffset();
-  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
   return DAG.getStore(Chain, dl, Arg, PtrOff,
                       MachinePointerInfo::getStack(LocMemOffset),
@@ -1454,7 +1465,7 @@ SDValue
 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                              SmallVectorImpl<SDValue> &InVals) const {
   SelectionDAG &DAG                     = CLI.DAG;
-  SDLoc &dl                          = CLI.DL;
+  SDLoc &dl                             = CLI.DL;
   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
@@ -1508,8 +1519,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
   if (!isSibCall)
-    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                                 dl);
+    Chain = DAG.getCALLSEQ_START(Chain,
+                                 DAG.getIntPtrConstant(NumBytes, dl, true), dl);
 
   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
 
@@ -1548,9 +1559,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     if (VA.needsCustom()) {
       if (VA.getLocVT() == MVT::v2f64) {
         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(0, dl, MVT::i32));
         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
-                                  DAG.getConstant(1, MVT::i32));
+                                  DAG.getConstant(1, dl, MVT::i32));
 
         PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
@@ -1595,7 +1606,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
         unsigned int i, j;
         for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
-          SDValue Const = DAG.getConstant(4*i, MVT::i32);
+          SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
           SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
                                      MachinePointerInfo(),
@@ -1614,14 +1625,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
       if (Flags.getByValSize() > 4*offset) {
         unsigned LocMemOffset = VA.getLocMemOffset();
-        SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
+        SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
         SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
                                   StkPtrOff);
-        SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
+        SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
         SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
-        SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
+        SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
                                            MVT::i32);
-        SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
+        SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
+                                            MVT::i32);
 
         SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
         SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
@@ -1771,7 +1783,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                            DAG.getEntryNode(), CPAddr,
                            MachinePointerInfo::getConstantPool(),
                            false, false, false, 0);
-      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
     } else {
@@ -1786,8 +1798,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // FIXME: handle tail calls differently.
   unsigned CallOpc;
-  bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute(
-      AttributeSet::FunctionIndex, Attribute::MinSize);
+  bool HasMinSizeAttr = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
   if (Subtarget->isThumb()) {
     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
       CallOpc = ARMISD::CALL_NOLINK;
@@ -1818,21 +1829,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // Add a register mask operand representing the call-preserved registers.
   if (!isTailCall) {
     const uint32_t *Mask;
-    const TargetRegisterInfo *TRI =
-        getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-    const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
+    const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
     if (isThisReturn) {
       // For 'this' returns, use the R0-preserving mask if applicable
-      Mask = ARI->getThisReturnPreservedMask(CallConv);
+      Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
       if (!Mask) {
         // Set isThisReturn to false if the calling convention is not one that
         // allows 'returned' to be modeled in this way, so LowerCallResult does
         // not try to pass 'this' straight through
         isThisReturn = false;
-        Mask = ARI->getCallPreservedMask(CallConv);
+        Mask = ARI->getCallPreservedMask(MF, CallConv);
       }
     } else
-      Mask = ARI->getCallPreservedMask(CallConv);
+      Mask = ARI->getCallPreservedMask(MF, CallConv);
 
     assert(Mask && "Missing call preserved mask for calling convention");
     Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1842,15 +1851,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     Ops.push_back(InFlag);
 
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
-  if (isTailCall)
+  if (isTailCall) {
+    MF.getFrameInfo()->setHasTailCall();
     return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
+  }
 
   // Returns a chain and a flag for retval copy to use.
   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
   InFlag = Chain.getValue(1);
 
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(0, true), InFlag, dl);
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+                             DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
   if (!Ins.empty())
     InFlag = Chain.getValue(1);
 
@@ -1865,58 +1876,58 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 /// on the stack.  Remember the next parameter register to allocate,
 /// and then confiscate the rest of the parameter registers to insure
 /// this.
-void
-ARMTargetLowering::HandleByVal(
-    CCState *State, unsigned &size, unsigned Align) const {
-  unsigned reg = State->AllocateReg(GPRArgRegs, 4);
+void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
+                                    unsigned Align) const {
   assert((State->getCallOrPrologue() == Prologue ||
           State->getCallOrPrologue() == Call) &&
          "unhandled ParmContext");
 
-  if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
-    if (Subtarget->isAAPCS_ABI() && Align > 4) {
-      unsigned AlignInRegs = Align / 4;
-      unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
-      for (unsigned i = 0; i < Waste; ++i)
-        reg = State->AllocateReg(GPRArgRegs, 4);
-    }
-    if (reg != 0) {
-      unsigned excess = 4 * (ARM::R4 - reg);
-
-      // Special case when NSAA != SP and parameter size greater than size of
-      // all remained GPR regs. In that case we can't split parameter, we must
-      // send it to stack. We also must set NCRN to R4, so waste all
-      // remained registers.
-      const unsigned NSAAOffset = State->getNextStackOffset();
-      if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
-        while (State->AllocateReg(GPRArgRegs, 4))
-          ;
-        return;
-      }
+  // Byval (as with any stack) slots are always at least 4 byte aligned.
+  Align = std::max(Align, 4U);
 
-      // First register for byval parameter is the first register that wasn't
-      // allocated before this method call, so it would be "reg".
-      // If parameter is small enough to be saved in range [reg, r4), then
-      // the end (first after last) register would be reg + param-size-in-regs,
-      // else parameter would be splitted between registers and stack,
-      // end register would be r4 in this case.
-      unsigned ByValRegBegin = reg;
-      unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
-      State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
-      // Note, first register is allocated in the beginning of function already,
-      // allocate remained amount of registers we need.
-      for (unsigned i = reg+1; i != ByValRegEnd; ++i)
-        State->AllocateReg(GPRArgRegs, 4);
-      // A byval parameter that is split between registers and memory needs its
-      // size truncated here.
-      // In the case where the entire structure fits in registers, we set the
-      // size in memory to zero.
-      if (size < excess)
-        size = 0;
-      else
-        size -= excess;
-    }
+  unsigned Reg = State->AllocateReg(GPRArgRegs);
+  if (!Reg)
+    return;
+
+  unsigned AlignInRegs = Align / 4;
+  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
+  for (unsigned i = 0; i < Waste; ++i)
+    Reg = State->AllocateReg(GPRArgRegs);
+
+  if (!Reg)
+    return;
+
+  unsigned Excess = 4 * (ARM::R4 - Reg);
+
+  // Special case when NSAA != SP and parameter size greater than size of
+  // all remained GPR regs. In that case we can't split parameter, we must
+  // send it to stack. We also must set NCRN to R4, so waste all
+  // remained registers.
+  const unsigned NSAAOffset = State->getNextStackOffset();
+  if (NSAAOffset != 0 && Size > Excess) {
+    while (State->AllocateReg(GPRArgRegs))
+      ;
+    return;
   }
+
+  // First register for byval parameter is the first register that wasn't
+  // allocated before this method call, so it would be "reg".
+  // If parameter is small enough to be saved in range [reg, r4), then
+  // the end (first after last) register would be reg + param-size-in-regs,
+  // else parameter would be splitted between registers and stack,
+  // end register would be r4 in this case.
+  unsigned ByValRegBegin = Reg;
+  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
+  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+  // Note, first register is allocated in the beginning of function already,
+  // allocate remained amount of registers we need.
+  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
+    State->AllocateReg(GPRArgRegs);
+  // A byval parameter that is split between registers and memory needs its
+  // size truncated here.
+  // In the case where the entire structure fits in registers, we set the
+  // size in memory to zero.
+  Size = std::max<int>(Size - Excess, 0);
 }
 
 /// MatchingStackOffset - Return true if the given stack call argument is
@@ -1999,7 +2010,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   if (isCalleeStructRet || isCallerStructRet)
     return false;
 
-  // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+  // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
   // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
   // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
   // support in the assembler and linker to be used. This would need to be
@@ -2089,8 +2100,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
       // the caller's fixed stack objects.
       MachineFrameInfo *MFI = MF.getFrameInfo();
       const MachineRegisterInfo *MRI = &MF.getRegInfo();
-      const TargetInstrInfo *TII =
-          getTargetMachine().getSubtargetImpl()->getInstrInfo();
+      const TargetInstrInfo *TII = Subtarget->getInstrInfo();
       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
            i != e;
            ++i, ++realArgIdx) {
@@ -2165,7 +2175,8 @@ static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
     report_fatal_error("Unsupported interrupt attribute. If present, value "
                        "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
 
-  RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
+  RetOps.insert(RetOps.begin() + 1,
+                DAG.getConstant(LROffset, DL, MVT::i32, false));
 
   return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
 }
@@ -2218,7 +2229,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
       if (VA.getLocVT() == MVT::v2f64) {
         // Extract the first half and return it in two registers.
         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
-                                   DAG.getConstant(0, MVT::i32));
+                                   DAG.getConstant(0, dl, MVT::i32));
         SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
 
@@ -2237,7 +2248,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
 
         // Extract the 2nd half and fall through to handle it as an f64 value.
         Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
-                          DAG.getConstant(1, MVT::i32));
+                          DAG.getConstant(1, dl, MVT::i32));
       }
       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
       // available.
@@ -2418,7 +2429,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
                                false, false, false, 0);
   if (RelocM == Reloc::Static)
     return Result;
-  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
 }
 
@@ -2442,7 +2453,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
                          false, false, false, 0);
   SDValue Chain = Argument.getValue(1);
 
-  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
 
   // call __tls_get_addr.
@@ -2494,7 +2505,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
                          false, false, false, 0);
     Chain = Offset.getValue(1);
 
-    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
 
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
@@ -2648,14 +2659,14 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
                                MachinePointerInfo::getConstantPool(),
                                false, false, false, 0);
-  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
 }
 
 SDValue
 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
-  SDValue Val = DAG.getConstant(0, MVT::i32);
+  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
   return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
                      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
                      Op.getOperand(1), Val);
@@ -2665,7 +2676,7 @@ SDValue
 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
   return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
-                     Op.getOperand(1), DAG.getConstant(0, MVT::i32));
+                     Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
 }
 
 SDValue
@@ -2704,7 +2715,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
                   false, false, false, 0);
 
     if (RelocM == Reloc::PIC_) {
-      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
       Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
     }
     return Result;
@@ -2730,7 +2741,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
            "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
     return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
-                       DAG.getConstant(0, MVT::i32));
+                       DAG.getConstant(0, dl, MVT::i32));
   }
 
   ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
@@ -2747,8 +2758,8 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
   }
 
   return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
-                     DAG.getConstant(Intrinsic::arm_dmb, MVT::i32),
-                     DAG.getConstant(Domain, MVT::i32));
+                     DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
+                     DAG.getConstant(Domain, dl, MVT::i32));
 }
 
 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
@@ -2774,8 +2785,8 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
   }
 
   return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
-                     Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
-                     DAG.getConstant(isData, MVT::i32));
+                     Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
+                     DAG.getConstant(isData, dl, MVT::i32));
 }
 
 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
@@ -2828,55 +2839,6 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
 }
 
-void
-ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
-                                  unsigned InRegsParamRecordIdx,
-                                  unsigned ArgSize,
-                                  unsigned &ArgRegsSize,
-                                  unsigned &ArgRegsSaveSize)
-  const {
-  unsigned NumGPRs;
-  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
-    unsigned RBegin, REnd;
-    CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
-    NumGPRs = REnd - RBegin;
-  } else {
-    unsigned int firstUnalloced;
-    firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
-                                                sizeof(GPRArgRegs) /
-                                                sizeof(GPRArgRegs[0]));
-    NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
-  }
-
-  unsigned Align = MF.getTarget()
-                       .getSubtargetImpl()
-                       ->getFrameLowering()
-                       ->getStackAlignment();
-  ArgRegsSize = NumGPRs * 4;
-
-  // If parameter is split between stack and GPRs...
-  if (NumGPRs && Align > 4 &&
-      (ArgRegsSize < ArgSize ||
-        InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
-    // Add padding for part of param recovered from GPRs.  For example,
-    // if Align == 8, its last byte must be at address K*8 - 1.
-    // We need to do it, since remained (stack) part of parameter has
-    // stack alignment, and we need to "attach" "GPRs head" without gaps
-    // to it:
-    // Stack:
-    // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
-    // [ [padding] [GPRs head] ] [        Tail passed via stack       ....
-    //
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    unsigned Padding =
-        OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
-    ArgRegsSaveSize = ArgRegsSize + Padding;
-  } else
-    // We don't need to extend regs save size for byval parameters if they
-    // are passed via GPRs only.
-    ArgRegsSaveSize = ArgRegsSize;
-}
-
 // The remaining GPRs hold either the beginning of variable-argument
 // data, or the beginning of an aggregate passed by value (usually
 // byval).  Either way, we allocate stack slots adjacent to the data
@@ -2890,13 +2852,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
                                   SDLoc dl, SDValue &Chain,
                                   const Value *OrigArg,
                                   unsigned InRegsParamRecordIdx,
-                                  unsigned OffsetFromOrigArg,
-                                  unsigned ArgOffset,
-                                  unsigned ArgSize,
-                                  bool ForceMutable,
-                                  unsigned ByValStoreOffset,
-                                  unsigned TotalArgRegsSaveSize) const {
-
+                                  int ArgOffset,
+                                  unsigned ArgSize) const {
   // Currently, two use-cases possible:
   // Case #1. Non-var-args function, and we meet first byval parameter.
   //          Setup first unallocated register as first byval register;
@@ -2911,83 +2868,39 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned firstRegToSaveIndex, lastRegToSaveIndex;
   unsigned RBegin, REnd;
   if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
     CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
-    firstRegToSaveIndex = RBegin - ARM::R0;
-    lastRegToSaveIndex = REnd - ARM::R0;
   } else {
-    firstRegToSaveIndex = CCInfo.getFirstUnallocated
-      (GPRArgRegs, array_lengthof(GPRArgRegs));
-    lastRegToSaveIndex = 4;
-  }
-
-  unsigned ArgRegsSize, ArgRegsSaveSize;
-  computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
-                 ArgRegsSize, ArgRegsSaveSize);
-
-  // Store any by-val regs to their spots on the stack so that they may be
-  // loaded by deferencing the result of formal parameter pointer or va_next.
-  // Note: once stack area for byval/varargs registers
-  // was initialized, it can't be initialized again.
-  if (ArgRegsSaveSize) {
-    unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
-
-    if (Padding) {
-      assert(AFI->getStoredByValParamsPadding() == 0 &&
-             "The only parameter may be padded.");
-      AFI->setStoredByValParamsPadding(Padding);
-    }
-
-    int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
-                                            Padding +
-                                              ByValStoreOffset -
-                                              (int64_t)TotalArgRegsSaveSize,
-                                            false);
-    SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
-    if (Padding) {
-       MFI->CreateFixedObject(Padding,
-                              ArgOffset + ByValStoreOffset -
-                                (int64_t)ArgRegsSaveSize,
-                              false);
-    }
-
-    SmallVector<SDValue, 4> MemOps;
-    for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
-         ++firstRegToSaveIndex, ++i) {
-      const TargetRegisterClass *RC;
-      if (AFI->isThumb1OnlyFunction())
-        RC = &ARM::tGPRRegClass;
-      else
-        RC = &ARM::GPRRegClass;
+    unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
+    RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
+    REnd = ARM::R4;
+  }
 
-      unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
-      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
-      SDValue Store =
-        DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                     MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
-                     false, false, 0);
-      MemOps.push_back(Store);
-      FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
-                        DAG.getConstant(4, getPointerTy()));
-    }
+  if (REnd != RBegin)
+    ArgOffset = -4 * (ARM::R4 - RBegin);
 
-    AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
+  int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
+  SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
 
-    if (!MemOps.empty())
-      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
-    return FrameIndex;
-  } else {
-    if (ArgSize == 0) {
-      // We cannot allocate a zero-byte object for the first variadic argument,
-      // so just make up a size.
-      ArgSize = 4;
-    }
-    // This will point to the next argument passed via stack.
-    return MFI->CreateFixedObject(
-      ArgSize, ArgOffset, !ForceMutable);
+  SmallVector<SDValue, 4> MemOps;
+  const TargetRegisterClass *RC =
+      AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
+
+  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
+    unsigned VReg = MF.addLiveIn(Reg, RC);
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+    SDValue Store =
+        DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                     MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
+    MemOps.push_back(Store);
+    FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+                      DAG.getConstant(4, dl, getPointerTy()));
   }
+
+  if (!MemOps.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+  return FrameIndex;
 }
 
 // Setup stack frame, the va_list pointer will start from.
@@ -3005,11 +2918,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
   // the result of va_next.
   // If there is no regs to be stored, just point address after last
   // argument passed via stack.
-  int FrameIndex =
-    StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
-                   CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
-                   0, TotalArgRegsSaveSize);
-
+  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
+                                  CCInfo.getInRegsParamsCount(),
+                                  CCInfo.getNextStackOffset(), 4);
   AFI->setVarArgsFrameIndex(FrameIndex);
 }
 
@@ -3035,7 +2946,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
                                                   isVarArg));
 
   SmallVector<SDValue, 16> ArgValues;
-  int lastInsIndex = -1;
   SDValue ArgValue;
   Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
   unsigned CurArgIdx = 0;
@@ -3045,50 +2955,40 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
   // We also increase this value in case of varargs function.
   AFI->setArgRegsSaveSize(0);
 
-  unsigned ByValStoreOffset = 0;
-  unsigned TotalArgRegsSaveSize = 0;
-  unsigned ArgRegsSaveSizeMaxAlign = 4;
-
   // Calculate the amount of stack space that we need to allocate to store
   // byval and variadic arguments that are passed in registers.
   // We need to know this before we allocate the first byval or variadic
   // argument, as they will be allocated a stack slot below the CFA (Canonical
   // Frame Address, the stack pointer at entry to the function).
+  unsigned ArgRegBegin = ARM::R4;
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
+      break;
+
     CCValAssign &VA = ArgLocs[i];
-    if (VA.isMemLoc()) {
-      int index = VA.getValNo();
-      if (index != lastInsIndex) {
-        ISD::ArgFlagsTy Flags = Ins[index].Flags;
-        if (Flags.isByVal()) {
-          unsigned ExtraArgRegsSize;
-          unsigned ExtraArgRegsSaveSize;
-          computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),
-                         Flags.getByValSize(),
-                         ExtraArgRegsSize, ExtraArgRegsSaveSize);
-
-          TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
-          if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
-              ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
-          CCInfo.nextInRegsParam();
-        }
-        lastInsIndex = index;
-      }
-    }
+    unsigned Index = VA.getValNo();
+    ISD::ArgFlagsTy Flags = Ins[Index].Flags;
+    if (!Flags.isByVal())
+      continue;
+
+    assert(VA.isMemLoc() && "unexpected byval pointer in reg");
+    unsigned RBegin, REnd;
+    CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
+    ArgRegBegin = std::min(ArgRegBegin, RBegin);
+
+    CCInfo.nextInRegsParam();
   }
   CCInfo.rewindByValRegsInfo();
-  lastInsIndex = -1;
+
+  int lastInsIndex = -1;
   if (isVarArg && MFI->hasVAStart()) {
-    unsigned ExtraArgRegsSize;
-    unsigned ExtraArgRegsSaveSize;
-    computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
-                   ExtraArgRegsSize, ExtraArgRegsSaveSize);
-    TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
+    unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
+    if (RegIdx != array_lengthof(GPRArgRegs))
+      ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
   }
-  // If the arg regs save area contains N-byte aligned values, the
-  // bottom of it must be at least N-byte aligned.
-  TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
-  TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
+
+  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
+  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
@@ -3121,9 +3021,11 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
           }
           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
-                                 ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
+                                 ArgValue, ArgValue1,
+                                 DAG.getIntPtrConstant(0, dl));
           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
-                                 ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
+                                 ArgValue, ArgValue2,
+                                 DAG.getIntPtrConstant(1, dl));
         } else
           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
 
@@ -3193,18 +3095,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
                    "Byval arguments cannot be implicit");
             unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
 
-            ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
-            int FrameIndex = StoreByValRegs(
-                CCInfo, DAG, dl, Chain, CurOrigArg,
-                CurByValIndex,
-                Ins[VA.getValNo()].PartOffset,
-                VA.getLocMemOffset(),
-                Flags.getByValSize(),
-                true /*force mutable frames*/,
-                ByValStoreOffset,
-                TotalArgRegsSaveSize);
-            ByValStoreOffset += Flags.getByValSize();
-            ByValStoreOffset = std::min(ByValStoreOffset, 16U);
+            int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
+                                            CurByValIndex, VA.getLocMemOffset(),
+                                            Flags.getByValSize());
             InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
             CCInfo.nextInRegsParam();
           } else {
@@ -3278,28 +3171,28 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
       case ISD::SETGE:
         if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
-          RHS = DAG.getConstant(C-1, MVT::i32);
+          RHS = DAG.getConstant(C - 1, dl, MVT::i32);
         }
         break;
       case ISD::SETULT:
       case ISD::SETUGE:
         if (C != 0 && isLegalICmpImmediate(C-1)) {
           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
-          RHS = DAG.getConstant(C-1, MVT::i32);
+          RHS = DAG.getConstant(C - 1, dl, MVT::i32);
         }
         break;
       case ISD::SETLE:
       case ISD::SETGT:
         if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
-          RHS = DAG.getConstant(C+1, MVT::i32);
+          RHS = DAG.getConstant(C + 1, dl, MVT::i32);
         }
         break;
       case ISD::SETULE:
       case ISD::SETUGT:
         if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
-          RHS = DAG.getConstant(C+1, MVT::i32);
+          RHS = DAG.getConstant(C + 1, dl, MVT::i32);
         }
         break;
       }
@@ -3318,7 +3211,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     CompareType = ARMISD::CMPZ;
     break;
   }
-  ARMcc = DAG.getConstant(CondCode, MVT::i32);
+  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
   return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
 }
 
@@ -3364,7 +3257,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
   SDValue Value, OverflowCmp;
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
-
+  SDLoc dl(Op);
 
   // FIXME: We are currently always generating CMPs because we don't support
   // generating CMN through the backend. This is not as good as the natural
@@ -3375,24 +3268,24 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
   default:
     llvm_unreachable("Unknown overflow instruction!");
   case ISD::SADDO:
-    ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
-    Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
+    ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
+    Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
+    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
     break;
   case ISD::UADDO:
-    ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
-    Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
+    ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
+    Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
+    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
     break;
   case ISD::SSUBO:
-    ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
-    Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
+    ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
+    Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
+    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
     break;
   case ISD::USUBO:
-    ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
-    Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
+    ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
+    Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
+    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
     break;
   } // switch (...)
 
@@ -3410,16 +3303,17 @@ ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
   SDValue ARMcc;
   std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDLoc dl(Op);
   // We use 0 and 1 as false and true values.
-  SDValue TVal = DAG.getConstant(1, MVT::i32);
-  SDValue FVal = DAG.getConstant(0, MVT::i32);
+  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
   EVT VT = Op.getValueType();
 
-  SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal,
+  SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
                                  ARMcc, CCR, OverflowCmp);
 
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
-  return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
+  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
 }
 
 
@@ -3442,7 +3336,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
     EVT VT = Op.getValueType();
 
-    return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR,
+    return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
                    OverflowCmp, DAG);
   }
 
@@ -3485,19 +3379,13 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
   // undefined bits before doing a full-word comparison with zero.
   Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
-                     DAG.getConstant(1, Cond.getValueType()));
+                     DAG.getConstant(1, dl, Cond.getValueType()));
 
   return DAG.getSelectCC(dl, Cond,
-                         DAG.getConstant(0, Cond.getValueType()),
+                         DAG.getConstant(0, dl, Cond.getValueType()),
                          SelectTrue, SelectFalse, ISD::SETNE);
 }
 
-static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
-  if (CC == ISD::SETNE)
-    return ISD::SETEQ;
-  return ISD::getSetCCInverse(CC, true);
-}
-
 static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
                                  bool &swpCmpOps, bool &swpVselOps) {
   // Start by selecting the GE condition code for opcodes that return true for
@@ -3589,7 +3477,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
     // If softenSetCCOperands only returned one value, we should compare it to
     // zero.
     if (!RHS.getNode()) {
-      RHS = DAG.getConstant(0, LHS.getValueType());
+      RHS = DAG.getConstant(0, dl, LHS.getValueType());
       CC = ISD::SETNE;
     }
   }
@@ -3605,12 +3493,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
     // inverting the compare condition, swapping 'less' and 'greater') and
     // sometimes need to swap the operands to the VSEL (which inverts the
     // condition in the sense of firing whenever the previous condition didn't)
-    if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
-                                      TrueVal.getValueType() == MVT::f64)) {
+    if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+                                    TrueVal.getValueType() == MVT::f64)) {
       ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
       if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
           CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
-        CC = getInverseCCForVSEL(CC);
+        CC = ISD::getSetCCInverse(CC, true);
         std::swap(TrueVal, FalseVal);
       }
     }
@@ -3624,26 +3512,113 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   ARMCC::CondCodes CondCode, CondCode2;
   FPCCToARMCC(CC, CondCode, CondCode2);
 
-  // Try to generate VSEL on ARMv8.
-  if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
-                                    TrueVal.getValueType() == MVT::f64)) {
-    // We can select VMAXNM/VMINNM from a compare followed by a select with the
+  // Try to generate VMAXNM/VMINNM on ARMv8.
+  if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+                                  TrueVal.getValueType() == MVT::f64)) {
+    // We can use VMAXNM/VMINNM for a compare followed by a select with the
     // same operands, as follows:
-    //   c = fcmp [ogt, olt, ugt, ult] a, b
+    //   c = fcmp [?gt, ?ge, ?lt, ?le] a, b
     //   select c, a, b
-    // We only do this in unsafe-fp-math, because signed zeros and NaNs are
-    // handled differently than the original code sequence.
-    if (getTargetMachine().Options.UnsafeFPMath) {
-      if (LHS == TrueVal && RHS == FalseVal) {
-        if (CC == ISD::SETOGT || CC == ISD::SETUGT)
-          return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
-        if (CC == ISD::SETOLT || CC == ISD::SETULT)
-          return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
-      } else if (LHS == FalseVal && RHS == TrueVal) {
-        if (CC == ISD::SETOLT || CC == ISD::SETULT)
-          return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
-        if (CC == ISD::SETOGT || CC == ISD::SETUGT)
-          return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+    // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
+    bool swapSides = false;
+    if (!getTargetMachine().Options.NoNaNsFPMath) {
+      // transformability may depend on which way around we compare
+      switch (CC) {
+      default:
+        break;
+      case ISD::SETOGT:
+      case ISD::SETOGE:
+      case ISD::SETOLT:
+      case ISD::SETOLE:
+        // the non-NaN should be RHS
+        swapSides = DAG.isKnownNeverNaN(LHS) && !DAG.isKnownNeverNaN(RHS);
+        break;
+      case ISD::SETUGT:
+      case ISD::SETUGE:
+      case ISD::SETULT:
+      case ISD::SETULE:
+        // the non-NaN should be LHS
+        swapSides = DAG.isKnownNeverNaN(RHS) && !DAG.isKnownNeverNaN(LHS);
+        break;
+      }
+    }
+    swapSides = swapSides || (LHS == FalseVal && RHS == TrueVal);
+    if (swapSides) {
+      CC = ISD::getSetCCSwappedOperands(CC);
+      std::swap(LHS, RHS);
+    }
+    if (LHS == TrueVal && RHS == FalseVal) {
+      bool canTransform = true;
+      // FIXME: FastMathFlags::noSignedZeros() doesn't appear reachable from here
+      if (!getTargetMachine().Options.UnsafeFPMath &&
+          !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
+        const ConstantFPSDNode *Zero;
+        switch (CC) {
+        default:
+          break;
+        case ISD::SETOGT:
+        case ISD::SETUGT:
+        case ISD::SETGT:
+          // RHS must not be -0
+          canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
+                         !Zero->isNegative();
+          break;
+        case ISD::SETOGE:
+        case ISD::SETUGE:
+        case ISD::SETGE:
+          // LHS must not be -0
+          canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
+                         !Zero->isNegative();
+          break;
+        case ISD::SETOLT:
+        case ISD::SETULT:
+        case ISD::SETLT:
+          // RHS must not be +0
+          canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
+                          Zero->isNegative();
+          break;
+        case ISD::SETOLE:
+        case ISD::SETULE:
+        case ISD::SETLE:
+          // LHS must not be +0
+          canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
+                          Zero->isNegative();
+          break;
+        }
+      }
+      if (canTransform) {
+        // Note: If one of the elements in a pair is a number and the other
+        // element is NaN, the corresponding result element is the number.
+        // This is consistent with the IEEE 754-2008 standard.
+        // Therefore, a > b ? a : b <=> vmax(a,b), if b is constant and a is NaN
+        switch (CC) {
+        default:
+          break;
+        case ISD::SETOGT:
+        case ISD::SETOGE:
+          if (!DAG.isKnownNeverNaN(RHS))
+            break;
+          return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
+        case ISD::SETUGT:
+        case ISD::SETUGE:
+          if (!DAG.isKnownNeverNaN(LHS))
+            break;
+        case ISD::SETGT:
+        case ISD::SETGE:
+          return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
+        case ISD::SETOLT:
+        case ISD::SETOLE:
+          if (!DAG.isKnownNeverNaN(RHS))
+            break;
+          return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
+        case ISD::SETULT:
+        case ISD::SETULE:
+          if (!DAG.isKnownNeverNaN(LHS))
+            break;
+        case ISD::SETLT:
+        case ISD::SETLE:
+          return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
+        }
       }
     }
 
@@ -3660,12 +3635,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
-  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
   if (CondCode2 != ARMCC::AL) {
-    SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
+    SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
     // FIXME: Needs another CMP because flag can have but one use.
     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
     Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
@@ -3698,7 +3673,7 @@ static bool canChangeToInt(SDValue Op, bool &SeenZero,
 
 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
   if (isFloatingPointZero(Op))
-    return DAG.getConstant(0, MVT::i32);
+    return DAG.getConstant(0, SDLoc(Op), MVT::i32);
 
   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
     return DAG.getLoad(MVT::i32, SDLoc(Op),
@@ -3711,15 +3686,17 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
 
 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
                            SDValue &RetVal1, SDValue &RetVal2) {
+  SDLoc dl(Op);
+
   if (isFloatingPointZero(Op)) {
-    RetVal1 = DAG.getConstant(0, MVT::i32);
-    RetVal2 = DAG.getConstant(0, MVT::i32);
+    RetVal1 = DAG.getConstant(0, dl, MVT::i32);
+    RetVal2 = DAG.getConstant(0, dl, MVT::i32);
     return;
   }
 
   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
     SDValue Ptr = Ld->getBasePtr();
-    RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op),
+    RetVal1 = DAG.getLoad(MVT::i32, dl,
                           Ld->getChain(), Ptr,
                           Ld->getPointerInfo(),
                           Ld->isVolatile(), Ld->isNonTemporal(),
@@ -3727,9 +3704,9 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
 
     EVT PtrType = Ptr.getValueType();
     unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
-    SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op),
-                                 PtrType, Ptr, DAG.getConstant(4, PtrType));
-    RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op),
+    SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
+                                 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
+    RetVal2 = DAG.getLoad(MVT::i32, dl,
                           Ld->getChain(), NewPtr,
                           Ld->getPointerInfo().getWithOffset(4),
                           Ld->isVolatile(), Ld->isNonTemporal(),
@@ -3764,7 +3741,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
     else if (CC == ISD::SETUNE)
       CC = ISD::SETNE;
 
-    SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
+    SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
     SDValue ARMcc;
     if (LHS.getValueType() == MVT::f32) {
       LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
@@ -3784,7 +3761,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
     LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
     RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
-    ARMcc = DAG.getConstant(CondCode, MVT::i32);
+    ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
     SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
     return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
@@ -3808,7 +3785,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
     // If softenSetCCOperands only returned one value, we should compare it to
     // zero.
     if (!RHS.getNode()) {
-      RHS = DAG.getConstant(0, LHS.getValueType());
+      RHS = DAG.getConstant(0, dl, LHS.getValueType());
       CC = ISD::SETNE;
     }
   }
@@ -3834,14 +3811,14 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   ARMCC::CondCodes CondCode, CondCode2;
   FPCCToARMCC(CC, CondCode, CondCode2);
 
-  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
   SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
   if (CondCode2 != ARMCC::AL) {
-    ARMcc = DAG.getConstant(CondCode2, MVT::i32);
+    ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
     SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
   }
@@ -3856,11 +3833,9 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
 
   EVT PTy = getPointerTy();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
-  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
-  SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
-  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
-  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
+  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
+  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
   if (Subtarget->isThumb2()) {
     // Thumb2 uses a two-level jump. That is, it jumps into the jump table
@@ -3868,7 +3843,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
     // to translate it to TBB / TBH later.
     // FIXME: This might not work if the function is extremely large.
     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
-                       Addr, Op.getOperand(2), JTI, UId);
+                       Addr, Op.getOperand(2), JTI);
   }
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
@@ -3876,13 +3851,13 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
                        false, false, false, 0);
     Chain = Addr.getValue(1);
     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
-    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
   } else {
     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
                        MachinePointerInfo::getJumpTable(),
                        false, false, false, 0);
     Chain = Addr.getValue(1);
-    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
   }
 }
 
@@ -3909,7 +3884,6 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   if (VT.isVector())
     return LowerVectorFP_TO_INT(Op, DAG);
-
   if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
     RTLIB::Libcall LC;
     if (Op.getOpcode() == ISD::FP_TO_SINT)
@@ -3922,20 +3896,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
                        /*isSigned*/ false, SDLoc(Op)).first;
   }
 
-  SDLoc dl(Op);
-  unsigned Opc;
-
-  switch (Op.getOpcode()) {
-  default: llvm_unreachable("Invalid opcode!");
-  case ISD::FP_TO_SINT:
-    Opc = ARMISD::FTOSI;
-    break;
-  case ISD::FP_TO_UINT:
-    Opc = ARMISD::FTOUI;
-    break;
-  }
-  Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
-  return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+  return Op;
 }
 
 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -3975,7 +3936,6 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   if (VT.isVector())
     return LowerVectorINT_TO_FP(Op, DAG);
-
   if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
     RTLIB::Libcall LC;
     if (Op.getOpcode() == ISD::SINT_TO_FP)
@@ -3988,21 +3948,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
                        /*isSigned*/ false, SDLoc(Op)).first;
   }
 
-  SDLoc dl(Op);
-  unsigned Opc;
-
-  switch (Op.getOpcode()) {
-  default: llvm_unreachable("Invalid opcode!");
-  case ISD::SINT_TO_FP:
-    Opc = ARMISD::SITOF;
-    break;
-  case ISD::UINT_TO_FP:
-    Opc = ARMISD::UITOF;
-    break;
-  }
-
-  Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
-  return DAG.getNode(Opc, dl, VT, Op);
+  return Op;
 }
 
 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
@@ -4020,12 +3966,12 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
     // Use VBSL to copy the sign bit.
     unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
     SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
-                               DAG.getTargetConstant(EncodedVal, MVT::i32));
+                               DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
     EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
     if (VT == MVT::f64)
       Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
                          DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
-                         DAG.getConstant(32, MVT::i32));
+                         DAG.getConstant(32, dl, MVT::i32));
     else /*if (VT == MVT::f32)*/
       Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
     if (SrcVT == MVT::f32) {
@@ -4033,16 +3979,16 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
       if (VT == MVT::f64)
         Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
                            DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
-                           DAG.getConstant(32, MVT::i32));
+                           DAG.getConstant(32, dl, MVT::i32));
     } else if (VT == MVT::f32)
       Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
                          DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
-                         DAG.getConstant(32, MVT::i32));
+                         DAG.getConstant(32, dl, MVT::i32));
     Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
     Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
 
     SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
-                                            MVT::i32);
+                                            dl, MVT::i32);
     AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
     SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
                                   DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
@@ -4053,7 +3999,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
     if (VT == MVT::f32) {
       Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
-                        DAG.getConstant(0, MVT::i32));
+                        DAG.getConstant(0, dl, MVT::i32));
     } else {
       Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
     }
@@ -4068,8 +4014,8 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
 
   // Or in the signbit with integer operations.
-  SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
-  SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+  SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
+  SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
   Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
   if (VT == MVT::f32) {
     Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
@@ -4100,7 +4046,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   if (Depth) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
-    SDValue Offset = DAG.getConstant(4, MVT::i32);
+    SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
                        MachinePointerInfo(), false, false, false, 0);
@@ -4162,9 +4108,9 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
   // Turn i64->f64 into VMOVDRR.
   if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
-                             DAG.getConstant(0, MVT::i32));
+                             DAG.getConstant(0, dl, MVT::i32));
     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
-                             DAG.getConstant(1, MVT::i32));
+                             DAG.getConstant(1, dl, MVT::i32));
     return DAG.getNode(ISD::BITCAST, dl, DstVT,
                        DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
   }
@@ -4196,7 +4142,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
   // The canonical modified immediate encoding of a zero vector is....0!
-  SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
+  SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
   EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
   SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
   return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
@@ -4219,17 +4165,17 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
 
   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
-                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
+                                 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
-                                   DAG.getConstant(VTBits, MVT::i32));
+                                   DAG.getConstant(VTBits, dl, MVT::i32));
   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
 
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
-                          ARMcc, DAG, dl);
+  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
+                          ISD::SETGE, ARMcc, DAG, dl);
   SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
   SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
                            CCR, Cmp);
@@ -4253,17 +4199,17 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
 
   assert(Op.getOpcode() == ISD::SHL_PARTS);
   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
-                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
+                                 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
-                                   DAG.getConstant(VTBits, MVT::i32));
+                                   DAG.getConstant(VTBits, dl, MVT::i32));
   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
   SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
 
   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
-                          ARMcc, DAG, dl);
+  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
+                          ISD::SETGE, ARMcc, DAG, dl);
   SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
   SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
                            CCR, Cmp);
@@ -4280,14 +4226,14 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   // so that the shift + and get folded into a bitfield extract.
   SDLoc dl(Op);
   SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
-                              DAG.getConstant(Intrinsic::arm_get_fpscr,
+                              DAG.getConstant(Intrinsic::arm_get_fpscr, dl,
                                               MVT::i32));
   SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
-                                  DAG.getConstant(1U << 22, MVT::i32));
+                                  DAG.getConstant(1U << 22, dl, MVT::i32));
   SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
-                              DAG.getConstant(22, MVT::i32));
+                              DAG.getConstant(22, dl, MVT::i32));
   return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
-                     DAG.getConstant(3, MVT::i32));
+                     DAG.getConstant(3, dl, MVT::i32));
 }
 
 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
@@ -4345,10 +4291,10 @@ static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
   if (VT.is64BitVector()) {
     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
-                       DAG.getIntPtrConstant(0));
+                       DAG.getIntPtrConstant(0, DL));
   } else {
     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
-                                    BitCounts, DAG.getIntPtrConstant(0));
+                                    BitCounts, DAG.getIntPtrConstant(0, DL));
     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
   }
 }
@@ -4387,10 +4333,10 @@ static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
   if (VT.is64BitVector()) {
     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
-                       DAG.getIntPtrConstant(0));
+                       DAG.getIntPtrConstant(0, DL));
   } else {
     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
-                                    DAG.getIntPtrConstant(0));
+                                    DAG.getIntPtrConstant(0, DL));
     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
   }
 }
@@ -4424,7 +4370,8 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
   // Left shifts translate directly to the vshiftu intrinsic.
   if (N->getOpcode() == ISD::SHL)
     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
+                       DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
+                                       MVT::i32),
                        N->getOperand(0), N->getOperand(1));
 
   assert((N->getOpcode() == ISD::SRA ||
@@ -4441,7 +4388,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
                              Intrinsic::arm_neon_vshifts :
                              Intrinsic::arm_neon_vshiftu);
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                     DAG.getConstant(vshiftInt, MVT::i32),
+                     DAG.getConstant(vshiftInt, dl, MVT::i32),
                      N->getOperand(0), NegatedCount);
 }
 
@@ -4467,9 +4414,9 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
 
   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
-                           DAG.getConstant(0, MVT::i32));
+                           DAG.getConstant(0, dl, MVT::i32));
   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
-                           DAG.getConstant(1, MVT::i32));
+                           DAG.getConstant(1, dl, MVT::i32));
 
   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
   // captures the result into a carry flag.
@@ -4622,7 +4569,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
 /// operand (e.g., VMOV).  If so, return the encoded value.
 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
                                  unsigned SplatBitSize, SelectionDAG &DAG,
-                                 EVT &VT, bool is128Bits, NEONModImmType type) {
+                                 SDLoc dl, EVT &VT, bool is128Bits,
+                                 NEONModImmType type) {
   unsigned OpCmode, Imm;
 
   // SplatBitSize is set to the smallest size that splats the vector, so a
@@ -4752,7 +4700,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
   }
 
   unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
-  return DAG.getTargetConstant(EncodedVal, MVT::i32);
+  return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
 }
 
 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
@@ -4782,11 +4730,11 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
     // It's a float and we are trying to use NEON operations where
     // possible. Lower it to a splat followed by an extract.
     SDLoc DL(Op);
-    SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
+    SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
     SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
                                       NewVal);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
-                       DAG.getConstant(0, MVT::i32));
+                       DAG.getConstant(0, DL, MVT::i32));
   }
 
   // The rest of our options are NEON only, make sure that's allowed before
@@ -4804,8 +4752,8 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
     return SDValue();
 
   // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
-  SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
-                                     false, VMOVModImm);
+  SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
+                                     VMovVT, false, VMOVModImm);
   if (NewVal != SDValue()) {
     SDLoc DL(Op);
     SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
@@ -4817,11 +4765,11 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
     SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
                                        VecConstant);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
-                       DAG.getConstant(0, MVT::i32));
+                       DAG.getConstant(0, DL, MVT::i32));
   }
 
   // Finally, try a VMVN.i32
-  NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+  NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
                              false, VMVNModImm);
   if (NewVal != SDValue()) {
     SDLoc DL(Op);
@@ -4834,7 +4782,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
     SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
                                        VecConstant);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
-                       DAG.getConstant(0, MVT::i32));
+                       DAG.getConstant(0, DL, MVT::i32));
   }
 
   return SDValue();
@@ -5097,10 +5045,10 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
 
   if (ST->isThumb1Only()) {
     if (Val <= 255 || ~Val <= 255)
-      return DAG.getConstant(Val, MVT::i32);
+      return DAG.getConstant(Val, dl, MVT::i32);
   } else {
     if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
-      return DAG.getConstant(Val, MVT::i32);
+      return DAG.getConstant(Val, dl, MVT::i32);
   }
   return SDValue();
 }
@@ -5122,7 +5070,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       EVT VmovVT;
       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
                                       SplatUndef.getZExtValue(), SplatBitSize,
-                                      DAG, VmovVT, VT.is128BitVector(),
+                                      DAG, dl, VmovVT, VT.is128BitVector(),
                                       VMOVModImm);
       if (Val.getNode()) {
         SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
@@ -5133,7 +5081,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       uint64_t NegatedImm = (~SplatBits).getZExtValue();
       Val = isNEONModifiedImm(NegatedImm,
                                       SplatUndef.getZExtValue(), SplatBitSize,
-                                      DAG, VmovVT, VT.is128BitVector(),
+                                      DAG, dl, VmovVT, VT.is128BitVector(),
                                       VMVNModImm);
       if (Val.getNode()) {
         SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
@@ -5144,7 +5092,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
         int ImmVal = ARM_AM::getFP32Imm(SplatBits);
         if (ImmVal != -1) {
-          SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
+          SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
           return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
         }
       }
@@ -5226,8 +5174,8 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
                              VT.getVectorNumElements();
           N =  DAG.getNode(ARMISD::VDUPLANE, dl, VT,
                  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
-                        Value, DAG.getConstant(index, MVT::i32)),
-                           DAG.getConstant(index, MVT::i32));
+                        Value, DAG.getConstant(index, dl, MVT::i32)),
+                           DAG.getConstant(index, dl, MVT::i32));
         } else
           N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
                         Value->getOperand(0), Value->getOperand(1));
@@ -5243,7 +5191,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
           SmallVector<SDValue, 3> Ops;
           Ops.push_back(N);
           Ops.push_back(Op.getOperand(I));
-          Ops.push_back(DAG.getConstant(I, MVT::i32));
+          Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
           N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
         }
       }
@@ -5307,7 +5255,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       SDValue V = Op.getOperand(i);
       if (V.getOpcode() == ISD::UNDEF)
         continue;
-      SDValue LaneIdx = DAG.getConstant(i, MVT::i32);
+      SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
     }
     return Vec;
@@ -5410,24 +5358,25 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
       VEXTOffsets[i] = NumElts;
       ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
                                    SourceVecs[i],
-                                   DAG.getIntPtrConstant(NumElts));
+                                   DAG.getIntPtrConstant(NumElts, dl));
     } else if (MaxElts[i] < NumElts) {
       // The extraction can just take the first half
       VEXTOffsets[i] = 0;
       ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
                                    SourceVecs[i],
-                                   DAG.getIntPtrConstant(0));
+                                   DAG.getIntPtrConstant(0, dl));
     } else {
       // An actual VEXT is needed
       VEXTOffsets[i] = MinElts[i];
       SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
                                      SourceVecs[i],
-                                     DAG.getIntPtrConstant(0));
+                                     DAG.getIntPtrConstant(0, dl));
       SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
                                      SourceVecs[i],
-                                     DAG.getIntPtrConstant(NumElts));
+                                     DAG.getIntPtrConstant(NumElts, dl));
       ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
-                                   DAG.getConstant(VEXTOffsets[i], MVT::i32));
+                                   DAG.getConstant(VEXTOffsets[i], dl,
+                                                   MVT::i32));
     }
   }
 
@@ -5561,13 +5510,13 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
   case OP_VDUP2:
   case OP_VDUP3:
     return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
-                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
+                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
   case OP_VEXT1:
   case OP_VEXT2:
   case OP_VEXT3:
     return DAG.getNode(ARMISD::VEXT, dl, VT,
                        OpLHS, OpRHS,
-                       DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
+                       DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
   case OP_VUZPL:
   case OP_VUZPR:
     return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
@@ -5594,7 +5543,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
   SmallVector<SDValue, 8> VTBLMask;
   for (ArrayRef<int>::iterator
          I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
-    VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
+    VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
 
   if (V2.getNode()->getOpcode() == ISD::UNDEF)
     return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
@@ -5618,7 +5567,7 @@ static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
   // into the bottom double word. The v8i16 case is similar.
   unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
   return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
-                     DAG.getConstant(ExtractNum, MVT::i32));
+                     DAG.getConstant(ExtractNum, DL, MVT::i32));
 }
 
 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
@@ -5662,7 +5611,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
           return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
       }
       return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
-                         DAG.getConstant(Lane, MVT::i32));
+                         DAG.getConstant(Lane, dl, MVT::i32));
     }
 
     bool ReverseVEXT;
@@ -5671,7 +5620,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
       if (ReverseVEXT)
         std::swap(V1, V2);
       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
-                         DAG.getConstant(Imm, MVT::i32));
+                         DAG.getConstant(Imm, dl, MVT::i32));
     }
 
     if (isVREVMask(ShuffleMask, VT, 64))
@@ -5684,7 +5633,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     if (V2->getOpcode() == ISD::UNDEF &&
         isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
-                         DAG.getConstant(Imm, MVT::i32));
+                         DAG.getConstant(Imm, dl, MVT::i32));
     }
 
     // Check for Neon shuffles that modify both input vectors in place.
@@ -5752,7 +5701,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
                                   ShuffleMask[i] < (int)NumElts ? V1 : V2,
                                   DAG.getConstant(ShuffleMask[i] & (NumElts-1),
-                                                  MVT::i32)));
+                                                  dl, MVT::i32)));
     }
     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
@@ -5807,11 +5756,11 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
   if (Op0.getOpcode() != ISD::UNDEF)
     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
                       DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
-                      DAG.getIntPtrConstant(0));
+                      DAG.getIntPtrConstant(0, dl));
   if (Op1.getOpcode() != ISD::UNDEF)
     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
                       DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
-                      DAG.getIntPtrConstant(1));
+                      DAG.getIntPtrConstant(1, dl));
   return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
 }
 
@@ -5983,14 +5932,15 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
   unsigned NumElts = VT.getVectorNumElements();
   MVT TruncVT = MVT::getIntegerVT(EltSize);
   SmallVector<SDValue, 8> Ops;
+  SDLoc dl(N);
   for (unsigned i = 0; i != NumElts; ++i) {
     ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
     const APInt &CInt = C->getAPIntValue();
     // Element types smaller than 32 bits are not legal, so use i32 elements.
     // The values are implicitly truncated so sext vs. zext doesn't matter.
-    Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
+    Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
   }
-  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+  return DAG.getNode(ISD::BUILD_VECTOR, dl,
                      MVT::getVectorVT(TruncVT, NumElts), Ops);
 }
 
@@ -6103,14 +6053,15 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
   // Get reciprocal estimate.
   // float4 recip = vrecpeq_f32(yf);
   Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
-                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
+                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
+                   Y);
   // Because char has a smaller range than uchar, we can actually get away
   // without any newton steps.  This requires that we use a weird bias
   // of 0xb000, however (again, this has been exhaustively tested).
   // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
   X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
   X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
-  Y = DAG.getConstant(0xb000, MVT::i32);
+  Y = DAG.getConstant(0xb000, dl, MVT::i32);
   Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
   X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
   X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
@@ -6135,9 +6086,10 @@ LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
   // float4 recip = vrecpeq_f32(yf);
   // recip *= vrecpsq_f32(yf, recip);
   N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
-                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
+                   N1);
   N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
-                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+                   DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
                    N1, N2);
   N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
   // Because short has a smaller range than ushort, we can actually get away
@@ -6146,7 +6098,7 @@ LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
   // float4 result = as_float4(as_int4(xf*recip) + 0x89);
   N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
   N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
-  N1 = DAG.getConstant(0x89, MVT::i32);
+  N1 = DAG.getConstant(0x89, dl, MVT::i32);
   N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
   N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
   N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
@@ -6172,13 +6124,13 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
 
     N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
-                     DAG.getIntPtrConstant(4));
+                     DAG.getIntPtrConstant(4, dl));
     N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
-                     DAG.getIntPtrConstant(4));
+                     DAG.getIntPtrConstant(4, dl));
     N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
-                     DAG.getIntPtrConstant(0));
+                     DAG.getIntPtrConstant(0, dl));
     N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
-                     DAG.getIntPtrConstant(0));
+                     DAG.getIntPtrConstant(0, dl));
 
     N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
     N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
@@ -6207,13 +6159,13 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
     N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
 
     N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
-                     DAG.getIntPtrConstant(4));
+                     DAG.getIntPtrConstant(4, dl));
     N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
-                     DAG.getIntPtrConstant(4));
+                     DAG.getIntPtrConstant(4, dl));
     N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
-                     DAG.getIntPtrConstant(0));
+                     DAG.getIntPtrConstant(0, dl));
     N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
-                     DAG.getIntPtrConstant(0));
+                     DAG.getIntPtrConstant(0, dl));
 
     N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
     N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
@@ -6222,7 +6174,8 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
     N0 = LowerCONCAT_VECTORS(N0, DAG);
 
     N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
-                     DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
+                     DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
+                                     MVT::i32),
                      N0);
     return N0;
   }
@@ -6240,13 +6193,14 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
   // recip *= vrecpsq_f32(yf, recip);
   // recip *= vrecpsq_f32(yf, recip);
   N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
-                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1);
+                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
+                   BN1);
   N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
-                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+                   DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
                    BN1, N2);
   N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
   N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
-                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+                   DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
                    BN1, N2);
   N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
   // Simply multiplying by the reciprocal estimate can leave us a few ulps
@@ -6255,7 +6209,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
   // float4 result = as_float4(as_int4(xf*recip) + 2);
   N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
   N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
-  N1 = DAG.getConstant(2, MVT::i32);
+  N1 = DAG.getConstant(2, dl, MVT::i32);
   N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
   N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
   N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
@@ -6342,7 +6296,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
 
   // Address of cos field.
   SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,
-                            DAG.getIntPtrConstant(ArgVT.getStoreSize()));
+                            DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
   SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
                                 MachinePointerInfo(), false, false, false, 0);
 
@@ -6372,12 +6326,12 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
     // Under Power Management extensions, the cycle-count is:
     //    mrc p15, #0, <Rt>, c9, c13, #0
     SDValue Ops[] = { N->getOperand(0), // Chain
-                      DAG.getConstant(Intrinsic::arm_mrc, MVT::i32),
-                      DAG.getConstant(15, MVT::i32),
-                      DAG.getConstant(0, MVT::i32),
-                      DAG.getConstant(9, MVT::i32),
-                      DAG.getConstant(13, MVT::i32),
-                      DAG.getConstant(0, MVT::i32)
+                      DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
+                      DAG.getConstant(15, DL, MVT::i32),
+                      DAG.getConstant(0, DL, MVT::i32),
+                      DAG.getConstant(9, DL, MVT::i32),
+                      DAG.getConstant(13, DL, MVT::i32),
+                      DAG.getConstant(0, DL, MVT::i32)
     };
 
     Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
@@ -6387,13 +6341,13 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
     // Intrinsic is defined to return 0 on unsupported platforms. Technically
     // there are older ARM CPUs that have implementation-specific ways of
     // obtaining this information (FIXME!).
-    Cycles32 = DAG.getConstant(0, MVT::i32);
+    Cycles32 = DAG.getConstant(0, DL, MVT::i32);
     OutChain = DAG.getEntryNode();
   }
 
 
   SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
-                                 Cycles32, DAG.getConstant(0, MVT::i32));
+                                 Cycles32, DAG.getConstant(0, DL, MVT::i32));
   Results.push_back(Cycles64);
   Results.push_back(OutChain);
 }
@@ -6509,8 +6463,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
 void ARMTargetLowering::
 SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
                        MachineBasicBlock *DispatchBB, int FI) const {
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   MachineFunction *MF = MBB->getParent();
   MachineRegisterInfo *MRI = &MF->getRegInfo();
@@ -6622,14 +6575,12 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
   }
 }
 
-MachineBasicBlock *ARMTargetLowering::
-EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
+                                              MachineBasicBlock *MBB) const {
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   MachineFunction *MF = MBB->getParent();
   MachineRegisterInfo *MRI = &MF->getRegInfo();
-  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
   MachineFrameInfo *MFI = MF->getFrameInfo();
   int FI = MFI->getFunctionContextIndex();
 
@@ -6685,7 +6636,6 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
   MachineJumpTableInfo *JTI =
     MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
   unsigned MJTI = JTI->createJumpTableIndex(LPadList);
-  unsigned UId = AFI->createJumpTableUId();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
 
   // Create the MBBs for the dispatch code.
@@ -6768,8 +6718,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
 
     unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
     AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
-                   .addJumpTableIndex(MJTI)
-                   .addImm(UId));
+                   .addJumpTableIndex(MJTI));
 
     unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
     AddDefaultCC(
@@ -6782,8 +6731,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
     BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
       .addReg(NewVReg4, RegState::Kill)
       .addReg(NewVReg1)
-      .addJumpTableIndex(MJTI)
-      .addImm(UId);
+      .addJumpTableIndex(MJTI);
   } else if (Subtarget->isThumb()) {
     unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
     AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
@@ -6828,8 +6776,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
 
     unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
     AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
-                   .addJumpTableIndex(MJTI)
-                   .addImm(UId));
+                   .addJumpTableIndex(MJTI));
 
     unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
     AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
@@ -6858,8 +6805,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
 
     BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
       .addReg(NewVReg6, RegState::Kill)
-      .addJumpTableIndex(MJTI)
-      .addImm(UId);
+      .addJumpTableIndex(MJTI);
   } else {
     unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
     AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
@@ -6920,8 +6866,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
                      .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
     unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
     AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
-                   .addJumpTableIndex(MJTI)
-                   .addImm(UId));
+                   .addJumpTableIndex(MJTI));
 
     MachineMemOperand *JTMMOLd =
       MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
@@ -6938,13 +6883,11 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
       BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
         .addReg(NewVReg5, RegState::Kill)
         .addReg(NewVReg4)
-        .addJumpTableIndex(MJTI)
-        .addImm(UId);
+        .addJumpTableIndex(MJTI);
     } else {
       BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
         .addReg(NewVReg5, RegState::Kill)
-        .addJumpTableIndex(MJTI)
-        .addImm(UId);
+        .addJumpTableIndex(MJTI);
     }
   }
 
@@ -7020,8 +6963,6 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
 
   // The instruction is gone now.
   MI->eraseFromParent();
-
-  return MBB;
 }
 
 static
@@ -7139,8 +7080,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
   // This pseudo instruction has 3 operands: dst, src, size
   // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
   // Otherwise, we will generate unrolled scalar copies.
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineFunction::iterator It = BB;
   ++It;
@@ -7166,9 +7106,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
     UnitSize = 2;
   } else {
     // Check whether we can use NEON instructions.
-    if (!MF->getFunction()->getAttributes().
-          hasAttribute(AttributeSet::FunctionIndex,
-                       Attribute::NoImplicitFloat) &&
+    if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
         Subtarget->hasNEON()) {
       if ((Align % 16 == 0) && SizeVal >= 16)
         UnitSize = 16;
@@ -7259,16 +7197,20 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
 
   // Load an immediate to varEnd.
   unsigned varEnd = MRI.createVirtualRegister(TRC);
-  if (IsThumb2) {
+  if (Subtarget->useMovt(*MF)) {
     unsigned Vtmp = varEnd;
     if ((LoopSize & 0xFFFF0000) != 0)
       Vtmp = MRI.createVirtualRegister(TRC);
-    AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
-                       .addImm(LoopSize & 0xFFFF));
+    AddDefaultPred(BuildMI(BB, dl,
+                           TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16),
+                           Vtmp).addImm(LoopSize & 0xFFFF));
 
     if ((LoopSize & 0xFFFF0000) != 0)
-      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
-                         .addReg(Vtmp).addImm(LoopSize >> 16));
+      AddDefaultPred(BuildMI(BB, dl,
+                             TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16),
+                             varEnd)
+                         .addReg(Vtmp)
+                         .addImm(LoopSize >> 16));
   } else {
     MachineConstantPool *ConstantPool = MF->getConstantPool();
     Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -7371,7 +7313,7 @@ MachineBasicBlock *
 ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
                                        MachineBasicBlock *MBB) const {
   const TargetMachine &TM = getTargetMachine();
-  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
   assert(Subtarget->isTargetWindows() &&
@@ -7436,8 +7378,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
 MachineBasicBlock *
 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   bool isThumb2 = Subtarget->isThumb2();
   switch (MI->getOpcode()) {
@@ -7630,6 +7571,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
     unsigned int ABSSrcReg = MI->getOperand(1).getReg();
     unsigned int ABSDstReg = MI->getOperand(0).getReg();
+    bool ABSSrcKIll = MI->getOperand(1).isKill();
     bool isThumb2 = Subtarget->isThumb2();
     MachineRegisterInfo &MRI = Fn->getRegInfo();
     // In Thumb mode S must not be specified if source register is the SP or
@@ -7663,7 +7605,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     // by if-conversion pass
     BuildMI(*RSBBB, RSBBB->begin(), dl,
       TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
-      .addReg(ABSSrcReg, RegState::Kill)
+      .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
       .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
 
     // insert PHI in SinkBB,
@@ -7700,8 +7642,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
   // Rename pseudo opcodes.
   unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
   if (NewOpc) {
-    const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
-        getTargetMachine().getSubtargetImpl()->getInstrInfo());
+    const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
     MCID = &TII->get(NewOpc);
 
     assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
@@ -7805,6 +7746,7 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
       return false;
     // Fall through.
   case ISD::SIGN_EXTEND: {
+    SDLoc dl(N);
     EVT VT = N->getValueType(0);
     CC = N->getOperand(0);
     if (CC.getValueType() != MVT::i1)
@@ -7813,12 +7755,13 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
     if (AllOnes)
       // When looking for an AllOnes constant, N is an sext, and the 'other'
       // value is 0.
-      OtherOp = DAG.getConstant(0, VT);
+      OtherOp = DAG.getConstant(0, dl, VT);
     else if (N->getOpcode() == ISD::ZERO_EXTEND)
       // When looking for a 0 constant, N can be zext or sext.
-      OtherOp = DAG.getConstant(1, VT);
+      OtherOp = DAG.getConstant(1, dl, VT);
     else
-      OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+      OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
+                                VT);
     return true;
   }
   }
@@ -7957,9 +7900,11 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
   SelectionDAG &DAG = DCI.DAG;
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
+  SDLoc dl(N);
+
   // Build operand list.
   SmallVector<SDValue, 8> Ops;
-  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
+  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
                                 TLI.getPointerTy()));
 
   // Input is the vector.
@@ -7978,9 +7923,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
       llvm_unreachable("Invalid vector element type for padd optimization.");
   }
 
-  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops);
+  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
   unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
-  return DAG.getNode(ExtOp, SDLoc(N), VT, tmp);
+  return DAG.getNode(ExtOp, dl, VT, tmp);
 }
 
 static SDValue findMUL_LOHI(SDValue V) {
@@ -8005,13 +7950,13 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
   // a glue link from the first add to the second add.
   // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
   // a S/UMLAL instruction.
-  //          loAdd   UMUL_LOHI
-  //            \    / :lo    \ :hi
-  //             \  /          \          [no multiline comment]
-  //              ADDC         |  hiAdd
-  //                 \ :glue  /  /
-  //                  \      /  /
-  //                    ADDE
+  //                  UMUL_LOHI
+  //                 / :lo    \ :hi
+  //                /          \          [no multiline comment]
+  //    loAdd ->  ADDE         |
+  //                 \ :glue  /
+  //                  \      /
+  //                    ADDC   <- hiAdd
   //
   assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
   SDValue AddcOp0 = AddcNode->getOperand(0);
@@ -8065,29 +8010,35 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
   else
     IsLeftOperandMUL = true;
   if (MULOp == SDValue())
-     return SDValue();
+    return SDValue();
 
   // Figure out the right opcode.
   unsigned Opc = MULOp->getOpcode();
   unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
 
   // Figure out the high and low input values to the MLAL node.
-  SDValue* HiMul = &MULOp;
   SDValue* HiAdd = nullptr;
   SDValue* LoMul = nullptr;
   SDValue* LowAdd = nullptr;
 
+  // Ensure that ADDE is from high result of ISD::SMUL_LOHI.
+  if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1)))
+    return SDValue();
+
   if (IsLeftOperandMUL)
     HiAdd = &AddeOp1;
   else
     HiAdd = &AddeOp0;
 
 
-  if (AddcOp0->getOpcode() == Opc) {
+  // Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node
+  // whose low result is fed to the ADDC we are checking.
+
+  if (AddcOp0 == MULOp.getValue(0)) {
     LoMul = &AddcOp0;
     LowAdd = &AddcOp1;
   }
-  if (AddcOp1->getOpcode() == Opc) {
+  if (AddcOp1 == MULOp.getValue(0)) {
     LoMul = &AddcOp1;
     LowAdd = &AddcOp0;
   }
@@ -8095,9 +8046,6 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
   if (!LoMul)
     return SDValue();
 
-  if (LoMul->getNode() != HiMul->getNode())
-    return SDValue();
-
   // Create the merged node.
   SelectionDAG &DAG = DCI.DAG;
 
@@ -8271,14 +8219,14 @@ static SDValue PerformMULCombine(SDNode *N,
                         V,
                         DAG.getNode(ISD::SHL, DL, VT,
                                     V,
-                                    DAG.getConstant(Log2_32(MulAmt - 1),
+                                    DAG.getConstant(Log2_32(MulAmt - 1), DL,
                                                     MVT::i32)));
     } else if (isPowerOf2_32(MulAmt + 1)) {
       // (mul x, 2^N - 1) => (sub (shl x, N), x)
       Res = DAG.getNode(ISD::SUB, DL, VT,
                         DAG.getNode(ISD::SHL, DL, VT,
                                     V,
-                                    DAG.getConstant(Log2_32(MulAmt + 1),
+                                    DAG.getConstant(Log2_32(MulAmt + 1), DL,
                                                     MVT::i32)),
                         V);
     } else
@@ -8291,7 +8239,7 @@ static SDValue PerformMULCombine(SDNode *N,
                         V,
                         DAG.getNode(ISD::SHL, DL, VT,
                                     V,
-                                    DAG.getConstant(Log2_32(MulAmtAbs + 1),
+                                    DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
                                                     MVT::i32)));
     } else if (isPowerOf2_32(MulAmtAbs - 1)) {
       // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
@@ -8299,10 +8247,10 @@ static SDValue PerformMULCombine(SDNode *N,
                         V,
                         DAG.getNode(ISD::SHL, DL, VT,
                                     V,
-                                    DAG.getConstant(Log2_32(MulAmtAbs-1),
+                                    DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
                                                     MVT::i32)));
       Res = DAG.getNode(ISD::SUB, DL, VT,
-                        DAG.getConstant(0, MVT::i32),Res);
+                        DAG.getConstant(0, DL, MVT::i32), Res);
 
     } else
       return SDValue();
@@ -8310,7 +8258,7 @@ static SDValue PerformMULCombine(SDNode *N,
 
   if (ShiftAmt != 0)
     Res = DAG.getNode(ISD::SHL, DL, VT,
-                      Res, DAG.getConstant(ShiftAmt, MVT::i32));
+                      Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
 
   // Do not add new nodes to DAG combiner worklist.
   DCI.CombineTo(N, Res, false);
@@ -8339,7 +8287,7 @@ static SDValue PerformANDCombine(SDNode *N,
       EVT VbicVT;
       SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
                                       SplatUndef.getZExtValue(), SplatBitSize,
-                                      DAG, VbicVT, VT.is128BitVector(),
+                                      DAG, dl, VbicVT, VT.is128BitVector(),
                                       OtherModImm);
       if (Val.getNode()) {
         SDValue Input =
@@ -8382,7 +8330,7 @@ static SDValue PerformORCombine(SDNode *N,
       EVT VorrVT;
       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
                                       SplatUndef.getZExtValue(), SplatBitSize,
-                                      DAG, VorrVT, VT.is128BitVector(),
+                                      DAG, dl, VorrVT, VT.is128BitVector(),
                                       OtherModImm);
       if (Val.getNode()) {
         SDValue Input =
@@ -8486,8 +8434,8 @@ static SDValue PerformORCombine(SDNode *N,
       Val >>= countTrailingZeros(~Mask);
 
       Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
-                        DAG.getConstant(Val, MVT::i32),
-                        DAG.getConstant(Mask, MVT::i32));
+                        DAG.getConstant(Val, DL, MVT::i32),
+                        DAG.getConstant(Mask, DL, MVT::i32));
 
       // Do not add new nodes to DAG combiner worklist.
       DCI.CombineTo(N, Res, false);
@@ -8512,9 +8460,9 @@ static SDValue PerformORCombine(SDNode *N,
       // 2a
       unsigned amt = countTrailingZeros(Mask2);
       Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
-                        DAG.getConstant(amt, MVT::i32));
+                        DAG.getConstant(amt, DL, MVT::i32));
       Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
-                        DAG.getConstant(Mask, MVT::i32));
+                        DAG.getConstant(Mask, DL, MVT::i32));
       // Do not add new nodes to DAG combiner worklist.
       DCI.CombineTo(N, Res, false);
       return SDValue();
@@ -8528,9 +8476,9 @@ static SDValue PerformORCombine(SDNode *N,
       // 2b
       unsigned lsb = countTrailingZeros(Mask);
       Res = DAG.getNode(ISD::SRL, DL, VT, N00,
-                        DAG.getConstant(lsb, MVT::i32));
+                        DAG.getConstant(lsb, DL, MVT::i32));
       Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
-                        DAG.getConstant(Mask2, MVT::i32));
+                        DAG.getConstant(Mask2, DL, MVT::i32));
       // Do not add new nodes to DAG combiner worklist.
       DCI.CombineTo(N, Res, false);
       return SDValue();
@@ -8549,7 +8497,7 @@ static SDValue PerformORCombine(SDNode *N,
       return SDValue();
 
     Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
-                      DAG.getConstant(~Mask, MVT::i32));
+                      DAG.getConstant(~Mask, DL, MVT::i32));
 
     // Do not add new nodes to DAG combiner worklist.
     DCI.CombineTo(N, Res, false);
@@ -8630,7 +8578,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
                                  LD->getAlignment());
 
     SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
-                                    DAG.getConstant(4, MVT::i32));
+                                    DAG.getConstant(4, DL, MVT::i32));
     SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
                                  LD->getPointerInfo(), LD->isVolatile(),
                                  LD->isNonTemporal(), LD->isInvariant(),
@@ -8796,7 +8744,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
       // Make the DAGCombiner fold the bitcasts.
       DCI.AddToWorklist(V.getNode());
     }
-    SDValue LaneIdx = DAG.getConstant(Idx, MVT::i32);
+    SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
     Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
   }
   Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
@@ -8884,18 +8832,21 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
                               DAG.getUNDEF(VT), NewMask.data());
 }
 
-/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
-/// NEON load/store intrinsics to merge base address updates.
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
+/// NEON load/store intrinsics, and generic vector load/stores, to merge
+/// base address updates.
+/// For generic load/stores, the memory type is assumed to be a vector.
+/// The caller is assumed to have checked legality.
 static SDValue CombineBaseUpdate(SDNode *N,
                                  TargetLowering::DAGCombinerInfo &DCI) {
-  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
-    return SDValue();
-
   SelectionDAG &DAG = DCI.DAG;
-  bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
-                      N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
-  unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+  const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+                            N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+  const bool isStore = N->getOpcode() == ISD::STORE;
+  const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
   SDValue Addr = N->getOperand(AddrOpIdx);
+  MemSDNode *MemN = cast<MemSDNode>(N);
+  SDLoc dl(N);
 
   // Search for a use of the address operand that is an increment.
   for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
@@ -8911,7 +8862,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
       continue;
 
     // Find the new opcode for the updating load/store.
-    bool isLoad = true;
+    bool isLoadOp = true;
     bool isLaneOp = false;
     unsigned NewOpc = 0;
     unsigned NumVecs = 0;
@@ -8934,19 +8885,19 @@ static SDValue CombineBaseUpdate(SDNode *N,
       case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
         NumVecs = 4; isLaneOp = true; break;
       case Intrinsic::arm_neon_vst1:     NewOpc = ARMISD::VST1_UPD;
-        NumVecs = 1; isLoad = false; break;
+        NumVecs = 1; isLoadOp = false; break;
       case Intrinsic::arm_neon_vst2:     NewOpc = ARMISD::VST2_UPD;
-        NumVecs = 2; isLoad = false; break;
+        NumVecs = 2; isLoadOp = false; break;
       case Intrinsic::arm_neon_vst3:     NewOpc = ARMISD::VST3_UPD;
-        NumVecs = 3; isLoad = false; break;
+        NumVecs = 3; isLoadOp = false; break;
       case Intrinsic::arm_neon_vst4:     NewOpc = ARMISD::VST4_UPD;
-        NumVecs = 4; isLoad = false; break;
+        NumVecs = 4; isLoadOp = false; break;
       case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
-        NumVecs = 2; isLoad = false; isLaneOp = true; break;
+        NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
       case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
-        NumVecs = 3; isLoad = false; isLaneOp = true; break;
+        NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
       case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
-        NumVecs = 4; isLoad = false; isLaneOp = true; break;
+        NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
       }
     } else {
       isLaneOp = true;
@@ -8955,15 +8906,24 @@ static SDValue CombineBaseUpdate(SDNode *N,
       case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
       case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
       case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+      case ISD::LOAD:       NewOpc = ARMISD::VLD1_UPD;
+        NumVecs = 1; isLaneOp = false; break;
+      case ISD::STORE:      NewOpc = ARMISD::VST1_UPD;
+        NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
       }
     }
 
     // Find the size of memory referenced by the load/store.
     EVT VecTy;
-    if (isLoad)
+    if (isLoadOp) {
       VecTy = N->getValueType(0);
-    else
+    } else if (isIntrinsic) {
       VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+    } else {
+      assert(isStore && "Node has to be a load, a store, or an intrinsic!");
+      VecTy = N->getOperand(1).getValueType();
+    }
+
     unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
     if (isLaneOp)
       NumBytes /= VecTy.getVectorNumElements();
@@ -8980,32 +8940,99 @@ static SDValue CombineBaseUpdate(SDNode *N,
       continue;
     }
 
+    // OK, we found an ADD we can fold into the base update.
+    // Now, create a _UPD node, taking care of not breaking alignment.
+
+    EVT AlignedVecTy = VecTy;
+    unsigned Alignment = MemN->getAlignment();
+
+    // If this is a less-than-standard-aligned load/store, change the type to
+    // match the standard alignment.
+    // The alignment is overlooked when selecting _UPD variants; and it's
+    // easier to introduce bitcasts here than fix that.
+    // There are 3 ways to get to this base-update combine:
+    // - intrinsics: they are assumed to be properly aligned (to the standard
+    //   alignment of the memory type), so we don't need to do anything.
+    // - ARMISD::VLDx nodes: they are only generated from the aforementioned
+    //   intrinsics, so, likewise, there's nothing to do.
+    // - generic load/store instructions: the alignment is specified as an
+    //   explicit operand, rather than implicitly as the standard alignment
+    //   of the memory type (like the intrisics).  We need to change the
+    //   memory type to match the explicit alignment.  That way, we don't
+    //   generate non-standard-aligned ARMISD::VLDx nodes.
+    if (isa<LSBaseSDNode>(N)) {
+      if (Alignment == 0)
+        Alignment = 1;
+      if (Alignment < VecTy.getScalarSizeInBits() / 8) {
+        MVT EltTy = MVT::getIntegerVT(Alignment * 8);
+        assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
+        assert(!isLaneOp && "Unexpected generic load/store lane.");
+        unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
+        AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
+      }
+      // Don't set an explicit alignment on regular load/stores that we want
+      // to transform to VLD/VST 1_UPD nodes.
+      // This matches the behavior of regular load/stores, which only get an
+      // explicit alignment if the MMO alignment is larger than the standard
+      // alignment of the memory type.
+      // Intrinsics, however, always get an explicit alignment, set to the
+      // alignment of the MMO.
+      Alignment = 1;
+    }
+
     // Create the new updating load/store node.
+    // First, create an SDVTList for the new updating node's results.
     EVT Tys[6];
-    unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+    unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
     unsigned n;
     for (n = 0; n < NumResultVecs; ++n)
-      Tys[n] = VecTy;
+      Tys[n] = AlignedVecTy;
     Tys[n++] = MVT::i32;
     Tys[n] = MVT::Other;
     SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
+
+    // Then, gather the new node's operands.
     SmallVector<SDValue, 8> Ops;
     Ops.push_back(N->getOperand(0)); // incoming chain
     Ops.push_back(N->getOperand(AddrOpIdx));
     Ops.push_back(Inc);
-    for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
-      Ops.push_back(N->getOperand(i));
+
+    if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
+      // Try to match the intrinsic's signature
+      Ops.push_back(StN->getValue());
+    } else {
+      // Loads (and of course intrinsics) match the intrinsics' signature,
+      // so just add all but the alignment operand.
+      for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
+        Ops.push_back(N->getOperand(i));
     }
-    MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
-    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
-                                           Ops, MemInt->getMemoryVT(),
-                                           MemInt->getMemOperand());
+
+    // For all node types, the alignment operand is always the last one.
+    Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
+
+    // If this is a non-standard-aligned STORE, the penultimate operand is the
+    // stored value.  Bitcast it to the aligned type.
+    if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
+      SDValue &StVal = Ops[Ops.size()-2];
+      StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
+    }
+
+    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys,
+                                           Ops, AlignedVecTy,
+                                           MemN->getMemOperand());
 
     // Update the uses.
-    std::vector<SDValue> NewResults;
-    for (unsigned i = 0; i < NumResultVecs; ++i) {
+    SmallVector<SDValue, 5> NewResults;
+    for (unsigned i = 0; i < NumResultVecs; ++i)
       NewResults.push_back(SDValue(UpdN.getNode(), i));
+
+    // If this is an non-standard-aligned LOAD, the first result is the loaded
+    // value.  Bitcast it to the expected result type.
+    if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
+      SDValue &LdVal = NewResults[0];
+      LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
     }
+
     NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
     DCI.CombineTo(N, NewResults);
     DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
@@ -9015,6 +9042,14 @@ static SDValue CombineBaseUpdate(SDNode *N,
   return SDValue();
 }
 
+static SDValue PerformVLDCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
+
+  return CombineBaseUpdate(N, DCI);
+}
+
 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
 /// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and
@@ -9128,6 +9163,18 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
   return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
 }
 
+static SDValue PerformLOADCombine(SDNode *N,
+                                  TargetLowering::DAGCombinerInfo &DCI) {
+  EVT VT = N->getValueType(0);
+
+  // If this is a legal vector load, try to combine it into a VLD1_UPD.
+  if (ISD::isNormalLoad(N) && VT.isVector() &&
+      DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return CombineBaseUpdate(N, DCI);
+
+  return SDValue();
+}
+
 /// PerformSTORECombine - Target-specific dag combine xforms for
 /// ISD::STORE.
 static SDValue PerformSTORECombine(SDNode *N,
@@ -9196,7 +9243,7 @@ static SDValue PerformSTORECombine(SDNode *N,
     assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
     SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
     SmallVector<SDValue, 8> Chains;
-    SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+    SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, DL,
                                         TLI.getPointerTy());
     SDValue BasePtr = St->getBasePtr();
 
@@ -9205,7 +9252,7 @@ static SDValue PerformSTORECombine(SDNode *N,
     for (unsigned I = 0; I < E; I++) {
       SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                                    StoreType, ShuffWide,
-                                   DAG.getIntPtrConstant(I));
+                                   DAG.getIntPtrConstant(I, DL));
       SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
                                 St->getPointerInfo(), St->isVolatile(),
                                 St->isNonTemporal(), St->getAlignment());
@@ -9233,7 +9280,7 @@ static SDValue PerformSTORECombine(SDNode *N,
                                   St->isNonTemporal(), St->getAlignment());
 
     SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
-                                    DAG.getConstant(4, MVT::i32));
+                                    DAG.getConstant(4, DL, MVT::i32));
     return DAG.getStore(NewST1.getValue(0), DL,
                         StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
                         OffsetPtr, St->getPointerInfo(), St->isVolatile(),
@@ -9266,6 +9313,11 @@ static SDValue PerformSTORECombine(SDNode *N,
                         St->getAAInfo());
   }
 
+  // If this is a legal vector store, try to combine it into a VST1_UPD.
+  if (ISD::isNormalStore(N) && VT.isVector() &&
+      DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return CombineBaseUpdate(N, DCI);
+
   return SDValue();
 }
 
@@ -9335,15 +9387,17 @@ static SDValue PerformVCVTCombine(SDNode *N,
     return SDValue();
   }
 
+  SDLoc dl(N);
   unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
     Intrinsic::arm_neon_vcvtfp2fxu;
-  SDValue FixConv =  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
+  SDValue FixConv =  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
                                  NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
-                                 DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
-                                 DAG.getConstant(Log2_64(C), MVT::i32));
+                                 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
+                                 N0,
+                                 DAG.getConstant(Log2_64(C), dl, MVT::i32));
 
   if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
-    FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv);
+    FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
 
   return FixConv;
 }
@@ -9385,19 +9439,20 @@ static SDValue PerformVDIVCombine(SDNode *N,
     return SDValue();
   }
 
+  SDLoc dl(N);
   SDValue ConvInput = Op.getOperand(0);
   unsigned NumLanes = Op.getValueType().getVectorNumElements();
   if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
     ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
-                            SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
+                            dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
                             ConvInput);
 
   unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
     Intrinsic::arm_neon_vcvtfxu2fp;
-  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
                      Op.getValueType(),
-                     DAG.getConstant(IntrinsicOpcode, MVT::i32),
-                     ConvInput, DAG.getConstant(Log2_64(C), MVT::i32));
+                     DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
+                     ConvInput, DAG.getConstant(Log2_64(C), dl, MVT::i32));
 }
 
 /// Getvshiftimm - Check if this is a valid build_vector for the immediate
@@ -9558,8 +9613,9 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
       VShiftOpc = ARMISD::VQRSHRNsu; break;
     }
 
-    return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
-                       N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
+    SDLoc dl(N);
+    return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
+                       N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
   }
 
   case Intrinsic::arm_neon_vshiftins: {
@@ -9575,9 +9631,10 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
       llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
     }
 
-    return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
+    SDLoc dl(N);
+    return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
                        N->getOperand(1), N->getOperand(2),
-                       DAG.getConstant(Cnt, MVT::i32));
+                       DAG.getConstant(Cnt, dl, MVT::i32));
   }
 
   case Intrinsic::arm_neon_vqrshifts:
@@ -9622,9 +9679,11 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
   default: llvm_unreachable("unexpected shift opcode");
 
   case ISD::SHL:
-    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
-      return DAG.getNode(ARMISD::VSHL, SDLoc(N), VT, N->getOperand(0),
-                         DAG.getConstant(Cnt, MVT::i32));
+    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
+      SDLoc dl(N);
+      return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0),
+                         DAG.getConstant(Cnt, dl, MVT::i32));
+    }
     break;
 
   case ISD::SRA:
@@ -9632,8 +9691,9 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
     if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
       unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
                             ARMISD::VSHRs : ARMISD::VSHRu);
-      return DAG.getNode(VShiftOpc, SDLoc(N), VT, N->getOperand(0),
-                         DAG.getConstant(Cnt, MVT::i32));
+      SDLoc dl(N);
+      return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
+                         DAG.getConstant(Cnt, dl, MVT::i32));
     }
   }
   return SDValue();
@@ -9859,10 +9919,11 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
   case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
   case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
+  case ISD::LOAD:       return PerformLOADCombine(N, DCI);
   case ARMISD::VLD2DUP:
   case ARMISD::VLD3DUP:
   case ARMISD::VLD4DUP:
-    return CombineBaseUpdate(N, DCI);
+    return PerformVLDCombine(N, DCI);
   case ARMISD::BUILD_VECTOR:
     return PerformARMBUILD_VECTORCombine(N, DCI);
   case ISD::INTRINSIC_VOID:
@@ -9882,7 +9943,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
     case Intrinsic::arm_neon_vst2lane:
     case Intrinsic::arm_neon_vst3lane:
     case Intrinsic::arm_neon_vst4lane:
-      return CombineBaseUpdate(N, DCI);
+      return PerformVLDCombine(N, DCI);
     default: break;
     }
     break;
@@ -9945,10 +10006,8 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
   const Function *F = MF.getFunction();
 
   // See if we can use NEON instructions for this...
-  if ((!IsMemset || ZeroMemset) &&
-      Subtarget->hasNEON() &&
-      !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                       Attribute::NoImplicitFloat)) {
+  if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
+      !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
     bool Fast;
     if (Size >= 16 &&
         (memOpAlign(SrcAlign, DstAlign, 16) ||
@@ -9993,6 +10052,28 @@ bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
   return false;
 }
 
+bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
+  EVT VT = ExtVal.getValueType();
+
+  if (!isTypeLegal(VT))
+    return false;
+
+  // Don't create a loadext if we can fold the extension into a wide/long
+  // instruction.
+  // If there's more than one user instruction, the loadext is desirable no
+  // matter what.  There can be two uses by the same instruction.
+  if (ExtVal->use_empty() ||
+      !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
+    return true;
+
+  SDNode *U = *ExtVal->use_begin();
+  if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
+       U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
+    return false;
+
+  return true;
+}
+
 bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
     return false;
@@ -10206,9 +10287,9 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
   // Thumb2 and ARM modes can use cmn for negative immediates.
   if (!Subtarget->isThumb())
-    return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
+    return ARM_AM::getSOImmVal(std::abs(Imm)) != -1;
   if (Subtarget->isThumb2())
-    return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
+    return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1;
   // Thumb1 doesn't have cmn, and only 8-bit immediates.
   return Imm >= 0 && Imm <= 255;
 }
@@ -10219,7 +10300,7 @@ bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
 /// immediate into a register.
 bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
   // Same encoding for add/sub, just flip the sign.
-  int64_t AbsImm = llvm::abs64(Imm);
+  int64_t AbsImm = std::abs(Imm);
   if (!Subtarget->isThumb())
     return ARM_AM::getSOImmVal(AbsImm) != -1;
   if (Subtarget->isThumb2())
@@ -10243,7 +10324,7 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
       if (RHSC < 0 && RHSC > -256) {
         assert(Ptr->getOpcode() == ISD::ADD);
         isInc = false;
-        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+        Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
         return true;
       }
     }
@@ -10257,7 +10338,7 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
       if (RHSC < 0 && RHSC > -0x1000) {
         assert(Ptr->getOpcode() == ISD::ADD);
         isInc = false;
-        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+        Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
         Base = Ptr->getOperand(0);
         return true;
       }
@@ -10300,11 +10381,11 @@ static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
     if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
       assert(Ptr->getOpcode() == ISD::ADD);
       isInc = false;
-      Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+      Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
       return true;
     } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
       isInc = Ptr->getOpcode() == ISD::ADD;
-      Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
+      Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
       return true;
     }
   }
@@ -10546,7 +10627,8 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
 
 typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
 RCPair
-ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ARMTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                                const std::string &Constraint,
                                                 MVT VT) const {
   if (Constraint.size() == 1) {
     // GCC ARM Constraint Letters
@@ -10592,7 +10674,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   if (StringRef("{cc}").equals_lower(Constraint))
     return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
 
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
 
 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
@@ -10751,7 +10833,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
         }
         return;
     }
-    Result = DAG.getTargetConstant(CVal, Op.getValueType());
+    Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
     break;
   }
 
@@ -10819,7 +10901,7 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
   SDValue Size  = Op.getOperand(1);
 
   SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
-                              DAG.getConstant(2, MVT::i32));
+                              DAG.getConstant(2, DL, MVT::i32));
 
   SDValue Flag;
   Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
@@ -10872,11 +10954,7 @@ bool ARM::isBitFieldInvertedMask(unsigned v) {
 
   // there can be 1's on either or both "outsides", all the "inside"
   // bits must be 0's
-  unsigned TO = CountTrailingOnes_32(v);
-  unsigned LO = CountLeadingOnes_32(v);
-  v = (v >> TO) << TO;
-  v = (v << LO) >> LO;
-  return v == 0;
+  return isShiftedMask_32(~v);
 }
 
 /// isFPImmLegal - Returns true if the target can instruction select the
@@ -11118,9 +11196,12 @@ bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
 
 // For the real atomic operations, we have ldrex/strex up to 32 bits,
 // and up to 64 bits on the non-M profiles
-bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
-  return Size <= (Subtarget->isMClass() ? 32U : 64U);
+  return (Size <= (Subtarget->isMClass() ? 32U : 64U))
+             ? AtomicRMWExpansionKind::LLSC
+             : AtomicRMWExpansionKind::None;
 }
 
 // This has so far only been implemented for MachO.
@@ -11213,17 +11294,17 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
     if (!Subtarget->isLittle())
       std::swap (Lo, Hi);
     Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
-    return Builder.CreateCall3(Strex, Lo, Hi, Addr);
+    return Builder.CreateCall(Strex, {Lo, Hi, Addr});
   }
 
   Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
   Type *Tys[] = { Addr->getType() };
   Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
 
-  return Builder.CreateCall2(
-      Strex, Builder.CreateZExtOrBitCast(
-                 Val, Strex->getFunctionType()->getParamType(0)),
-      Addr);
+  return Builder.CreateCall(
+      Strex, {Builder.CreateZExtOrBitCast(
+                  Val, Strex->getFunctionType()->getParamType(0)),
+              Addr});
 }
 
 enum HABaseType {
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 89b0c31..63e87c5 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -27,7 +27,7 @@ namespace llvm {
 
   namespace ARMISD {
     // ARM Specific DAG Nodes
-    enum NodeType {
+    enum NodeType : unsigned {
       // Start the numbering where the builtin ops and target ops leave off.
       FIRST_NUMBER = ISD::BUILTIN_OP_END,
 
@@ -65,11 +65,6 @@ namespace llvm {
 
       RBIT,         // ARM bitreverse instruction
 
-      FTOSI,        // FP to sint within a FP register.
-      FTOUI,        // FP to uint within a FP register.
-      SITOF,        // sint to FP within a FP register.
-      UITOF,        // uint to FP within a FP register.
-
       SRL_FLAG,     // V,Flag = srl_flag X -> srl X, 1 + save carry out.
       SRA_FLAG,     // V,Flag = sra_flag X -> sra X, 1 + save carry out.
       RRX,          // V = RRX X, Flag     -> srl X, 1 + shift in carry flag.
@@ -232,9 +227,11 @@ namespace llvm {
 
   class ARMTargetLowering : public TargetLowering {
   public:
-    explicit ARMTargetLowering(const TargetMachine &TM);
+    explicit ARMTargetLowering(const TargetMachine &TM,
+                               const ARMSubtarget &STI);
 
     unsigned getJumpTableEncoding() const override;
+    bool useSoftFloat() const override;
 
     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
@@ -282,6 +279,8 @@ namespace llvm {
     using TargetLowering::isZExtFree;
     bool isZExtFree(SDValue Val, EVT VT2) const override;
 
+    bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
+
     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
 
 
@@ -332,9 +331,10 @@ namespace llvm {
     ConstraintWeight getSingleConstraintMatchWeight(
       AsmOperandInfo &info, const char *constraint) const override;
 
-    std::pair<unsigned, const TargetRegisterClass*>
-      getRegForInlineAsmConstraint(const std::string &Constraint,
-                                   MVT VT) const override;
+    std::pair<unsigned, const TargetRegisterClass *>
+    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                 const std::string &Constraint,
+                                 MVT VT) const override;
 
     /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
     /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
@@ -344,6 +344,12 @@ namespace llvm {
                                       std::vector<SDValue> &Ops,
                                       SelectionDAG &DAG) const override;
 
+    unsigned getInlineAsmMemConstraint(
+        const std::string &ConstraintCode) const override {
+      // FIXME: Map different constraints differently.
+      return InlineAsm::Constraint_m;
+    }
+
     const ARMSubtarget* getSubtarget() const {
       return Subtarget;
     }
@@ -352,16 +358,15 @@ namespace llvm {
     /// specified value type.
     const TargetRegisterClass *getRegClassFor(MVT VT) const override;
 
-    /// getMaximalGlobalOffset - Returns the maximal possible offset which can
-    /// be used for loads / stores from the global.
-    unsigned getMaximalGlobalOffset() const override;
-
     /// Returns true if a cast between SrcAS and DestAS is a noop.
     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
       // Addrspacecasts are always noops.
       return true;
     }
 
+    bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+                                unsigned &PrefAlign) const override;
+
     /// createFastISel - This method returns a target specific FastISel object,
     /// or null if the target does not support "fast" ISel.
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -406,7 +411,8 @@ namespace llvm {
 
     bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-    bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+    TargetLoweringBase::AtomicRMWExpansionKind
+    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
 
     bool useLoadStackGuardNode() const override;
 
@@ -414,8 +420,9 @@ namespace llvm {
                                    unsigned &Cost) const override;
 
   protected:
-    std::pair<const TargetRegisterClass*, uint8_t>
-    findRepresentativeClass(MVT VT) const override;
+    std::pair<const TargetRegisterClass *, uint8_t>
+    findRepresentativeClass(const TargetRegisterInfo *TRI,
+                            MVT VT) const override;
 
   private:
     /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
@@ -526,12 +533,8 @@ namespace llvm {
                        SDLoc dl, SDValue &Chain,
                        const Value *OrigArg,
                        unsigned InRegsParamRecordIdx,
-                       unsigned OffsetFromOrigArg,
-                       unsigned ArgOffset,
-                       unsigned ArgSize,
-                       bool ForceMutable,
-                       unsigned ByValStoreOffset,
-                       unsigned TotalArgRegsSaveSize) const;
+                       int ArgOffset,
+                       unsigned ArgSize) const;
 
     void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
                               SDLoc dl, SDValue &Chain,
@@ -539,12 +542,6 @@ namespace llvm {
                               unsigned TotalArgRegsSaveSize,
                               bool ForceMutable = false) const;
 
-    void computeRegArea(CCState &CCInfo, MachineFunction &MF,
-                        unsigned InRegsParamRecordIdx,
-                        unsigned ArgSize,
-                        unsigned &ArgRegsSize,
-                        unsigned &ArgRegsSaveSize) const;
-
     SDValue
       LowerCall(TargetLowering::CallLoweringInfo &CLI,
                 SmallVectorImpl<SDValue> &InVals) const override;
@@ -596,8 +593,7 @@ namespace llvm {
                                 MachineBasicBlock *MBB,
                                 MachineBasicBlock *DispatchBB, int FI) const;
 
-    MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr *MI,
-                                             MachineBasicBlock *MBB) const;
+    void EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const;
 
     bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
index 7d27cf3..e79608d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -983,7 +983,12 @@ class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> {
 class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
   list<Predicate> Predicates = [IsARM, HasV6];
 }
-
+class VFPPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [HasVFP2];
+}
+class VFPNoNEONPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [HasVFP2, DontUseNEONForFP];
+}
 //===----------------------------------------------------------------------===//
 // Thumb Instruction Format Definitions.
 //
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 17d1ffa..84f95be 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -30,23 +30,22 @@
 using namespace llvm;
 
 ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
-  : ARMBaseInstrInfo(STI), RI(STI) {
-}
+    : ARMBaseInstrInfo(STI), RI() {}
 
 /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
 void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
   if (hasNOP()) {
     NopInst.setOpcode(ARM::HINT);
-    NopInst.addOperand(MCOperand::CreateImm(0));
-    NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    NopInst.addOperand(MCOperand::CreateReg(0));
+    NopInst.addOperand(MCOperand::createImm(0));
+    NopInst.addOperand(MCOperand::createImm(ARMCC::AL));
+    NopInst.addOperand(MCOperand::createReg(0));
   } else {
     NopInst.setOpcode(ARM::MOVr);
-    NopInst.addOperand(MCOperand::CreateReg(ARM::R0));
-    NopInst.addOperand(MCOperand::CreateReg(ARM::R0));
-    NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    NopInst.addOperand(MCOperand::CreateReg(0));
-    NopInst.addOperand(MCOperand::CreateReg(0));
+    NopInst.addOperand(MCOperand::createReg(ARM::R0));
+    NopInst.addOperand(MCOperand::createReg(ARM::R0));
+    NopInst.addOperand(MCOperand::createImm(ARMCC::AL));
+    NopInst.addOperand(MCOperand::createReg(0));
+    NopInst.addOperand(MCOperand::createReg(0));
   }
 }
 
@@ -93,7 +92,7 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
 void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
                                         Reloc::Model RM) const {
   MachineFunction &MF = *MI->getParent()->getParent();
-  const ARMSubtarget &Subtarget = MF.getTarget().getSubtarget<ARMSubtarget>();
+  const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
 
   if (!Subtarget.useMovt(MF)) {
     if (RM == Reloc::PIC_)
@@ -144,21 +143,24 @@ namespace {
       ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
       if (AFI->getGlobalBaseReg() == 0)
         return false;
-
-      const ARMTargetMachine *TM =
-        static_cast<const ARMTargetMachine *>(&MF.getTarget());
-      if (TM->getRelocationModel() != Reloc::PIC_)
+      const ARMSubtarget &STI =
+          static_cast<const ARMSubtarget &>(MF.getSubtarget());
+      // Don't do this for Thumb1.
+      if (STI.isThumb1Only())
+	return false;
+
+      const TargetMachine &TM = MF.getTarget();
+      if (TM.getRelocationModel() != Reloc::PIC_)
         return false;
 
       LLVMContext *Context = &MF.getFunction()->getContext();
       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
-      unsigned PCAdj = TM->getSubtarget<ARMSubtarget>().isThumb() ? 4 : 8;
+      unsigned PCAdj = STI.isThumb() ? 4 : 8;
       ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(
           *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj);
 
-      unsigned Align =
-          TM->getSubtargetImpl()->getDataLayout()->getPrefTypeAlignment(
-              Type::getInt32PtrTy(*Context));
+      unsigned Align = TM.getDataLayout()->getPrefTypeAlignment(
+          Type::getInt32PtrTy(*Context));
       unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
 
       MachineBasicBlock &FirstMBB = MF.front();
@@ -166,9 +168,8 @@ namespace {
       DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
       unsigned TempReg =
           MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
-      unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ?
-                     ARM::t2LDRpci : ARM::LDRcp;
-      const TargetInstrInfo &TII = *TM->getSubtargetImpl()->getInstrInfo();
+      unsigned Opc = STI.isThumb2() ? ARM::t2LDRpci : ARM::LDRcp;
+      const TargetInstrInfo &TII = *STI.getInstrInfo();
       MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
                                         TII.get(Opc), TempReg)
                                 .addConstantPoolIndex(Idx);
@@ -178,15 +179,13 @@ namespace {
 
       // Fix the GOT address by adding pc.
       unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
-      Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? ARM::tPICADD
-                                                        : ARM::PICADD;
+      Opc = STI.isThumb2() ? ARM::tPICADD : ARM::PICADD;
       MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg)
                 .addReg(TempReg)
                 .addImm(ARMPCLabelIndex);
       if (Opc == ARM::PICADD)
         AddDefaultPred(MIB);
 
-
       return true;
     }
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index 2340670..d64b4ba 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -33,13 +33,12 @@ def SDT_ARMCMov    : SDTypeProfile<1, 3,
 def SDT_ARMBrcond  : SDTypeProfile<0, 2,
                                    [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
 
-def SDT_ARMBrJT    : SDTypeProfile<0, 3,
-                                  [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
-                                   SDTCisVT<2, i32>]>;
+def SDT_ARMBrJT    : SDTypeProfile<0, 2,
+                                  [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
 
-def SDT_ARMBr2JT   : SDTypeProfile<0, 4,
+def SDT_ARMBr2JT   : SDTypeProfile<0, 3,
                                   [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
-                                   SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+                                   SDTCisVT<2, i32>]>;
 
 def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
                                   [SDTCisVT<0, i32>,
@@ -96,7 +95,7 @@ def ARMSmlal         : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>;
 // Node definitions.
 def ARMWrapper       : SDNode<"ARMISD::Wrapper",     SDTIntUnaryOp>;
 def ARMWrapperPIC    : SDNode<"ARMISD::WrapperPIC",  SDTIntUnaryOp>;
-def ARMWrapperJT     : SDNode<"ARMISD::WrapperJT",   SDTIntBinOp>;
+def ARMWrapperJT     : SDNode<"ARMISD::WrapperJT",   SDTIntUnaryOp>;
 
 def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
                               [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
@@ -199,12 +198,17 @@ def HasV6M           : Predicate<"Subtarget->hasV6MOps()">,
 def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">,
                                  AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
 def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV6K           : Predicate<"Subtarget->hasV6KOps()">,
+                                 AssemblerPredicate<"HasV6KOps", "armv6k">;
+def NoV6K            : Predicate<"!Subtarget->hasV6KOps()">;
 def HasV7            : Predicate<"Subtarget->hasV7Ops()">,
                                  AssemblerPredicate<"HasV7Ops", "armv7">;
 def HasV8            : Predicate<"Subtarget->hasV8Ops()">,
                                  AssemblerPredicate<"HasV8Ops", "armv8">;
 def PreV8            : Predicate<"!Subtarget->hasV8Ops()">,
                                  AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
+def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
+                                 AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
 def NoVFP            : Predicate<"!Subtarget->hasVFP2()">;
 def HasVFP2          : Predicate<"Subtarget->hasVFP2()">,
                                  AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -318,12 +322,12 @@ class RegConstraint<string C> {
 
 // imm_neg_XFORM - Return the negation of an i32 immediate value.
 def imm_neg_XFORM : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+  return CurDAG->getTargetConstant(-(int)N->getZExtValue(), SDLoc(N), MVT::i32);
 }]>;
 
 // imm_not_XFORM - Return the complement of a i32 immediate value.
 def imm_not_XFORM : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
+  return CurDAG->getTargetConstant(~(int)N->getZExtValue(), SDLoc(N), MVT::i32);
 }]>;
 
 /// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
@@ -338,7 +342,8 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{
 
 /// Split a 32-bit immediate into two 16 bit parts.
 def hi16 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
+  return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, SDLoc(N),
+                                   MVT::i32);
 }]>;
 
 def lo16AllZero : PatLeaf<(i32 imm), [{
@@ -383,6 +388,9 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
 // Immediate operands with a shared generic asm render method.
 class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; }
 
+// Operands that are part of a memory addressing mode.
+class MemOperand : Operand<i32> { let OperandType = "OPERAND_MEMORY"; }
+
 // Branch target.
 // FIXME: rename brtarget to t2_brtarget
 def brtarget : Operand<OtherVT> {
@@ -477,10 +485,10 @@ def neon_vcvt_imm32 : Operand<i32> {
 def rot_imm_XFORM: SDNodeXForm<imm, [{
   switch (N->getZExtValue()){
   default: llvm_unreachable(nullptr);
-  case 0:  return CurDAG->getTargetConstant(0, MVT::i32);
-  case 8:  return CurDAG->getTargetConstant(1, MVT::i32);
-  case 16: return CurDAG->getTargetConstant(2, MVT::i32);
-  case 24: return CurDAG->getTargetConstant(3, MVT::i32);
+  case 0:  return CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+  case 8:  return CurDAG->getTargetConstant(1, SDLoc(N), MVT::i32);
+  case 16: return CurDAG->getTargetConstant(2, SDLoc(N), MVT::i32);
+  case 24: return CurDAG->getTargetConstant(3, SDLoc(N), MVT::i32);
   }
 }]>;
 def RotImmAsmOperand : AsmOperandClass {
@@ -759,7 +767,8 @@ def bf_inv_mask_imm : Operand<i32>,
 }
 
 def imm1_32_XFORM: SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32);
+  return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N),
+                                   MVT::i32);
 }]>;
 def Imm1_32AsmOperand: AsmOperandClass { let Name = "Imm1_32"; }
 def imm1_32 : Operand<i32>, PatLeaf<(imm), [{
@@ -772,7 +781,8 @@ def imm1_32 : Operand<i32>, PatLeaf<(imm), [{
 }
 
 def imm1_16_XFORM: SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32);
+  return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N),
+                                   MVT::i32);
 }]>;
 def Imm1_16AsmOperand: AsmOperandClass { let Name = "Imm1_16"; }
 def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
@@ -785,7 +795,7 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
 // addrmode_imm12 := reg +/- imm12
 //
 def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
-class AddrMode_Imm12 : Operand<i32>,
+class AddrMode_Imm12 : MemOperand,
                      ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
   // 12-bit immediate operand. Note that instructions using this encode
   // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
@@ -808,7 +818,7 @@ def addrmode_imm12_pre : AddrMode_Imm12 {
 // ldst_so_reg := reg +/- reg shop imm
 //
 def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
-def ldst_so_reg : Operand<i32>,
+def ldst_so_reg : MemOperand,
                   ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
   let EncoderMethod = "getLdStSORegOpValue";
   // FIXME: Simplify the printer
@@ -824,7 +834,7 @@ def ldst_so_reg : Operand<i32>,
 //  {8}       1 is imm8 is non-negative. 0 otherwise.
 //  {7-0}     [0,255] imm8 value.
 def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; }
-def postidx_imm8 : Operand<i32> {
+def postidx_imm8 : MemOperand {
   let PrintMethod = "printPostIdxImm8Operand";
   let ParserMatchClass = PostIdxImm8AsmOperand;
   let MIOperandInfo = (ops i32imm);
@@ -836,7 +846,7 @@ def postidx_imm8 : Operand<i32> {
 //  {8}       1 is imm8 is non-negative. 0 otherwise.
 //  {7-0}     [0,255] imm8 value, scaled by 4.
 def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; }
-def postidx_imm8s4 : Operand<i32> {
+def postidx_imm8s4 : MemOperand {
   let PrintMethod = "printPostIdxImm8s4Operand";
   let ParserMatchClass = PostIdxImm8s4AsmOperand;
   let MIOperandInfo = (ops i32imm);
@@ -849,7 +859,7 @@ def PostIdxRegAsmOperand : AsmOperandClass {
   let Name = "PostIdxReg";
   let ParserMethod = "parsePostIdxReg";
 }
-def postidx_reg : Operand<i32> {
+def postidx_reg : MemOperand {
   let EncoderMethod = "getPostIdxRegOpValue";
   let DecoderMethod = "DecodePostIdxReg";
   let PrintMethod = "printPostIdxRegOperand";
@@ -864,7 +874,7 @@ def postidx_reg : Operand<i32> {
 // FIXME: addrmode2 should be refactored the rest of the way to always
 // use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg).
 def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; }
-def addrmode2 : Operand<i32>,
+def addrmode2 : MemOperand,
                 ComplexPattern<i32, 3, "SelectAddrMode2", []> {
   let EncoderMethod = "getAddrMode2OpValue";
   let PrintMethod = "printAddrMode2Operand";
@@ -876,7 +886,7 @@ def PostIdxRegShiftedAsmOperand : AsmOperandClass {
   let Name = "PostIdxRegShifted";
   let ParserMethod = "parsePostIdxReg";
 }
-def am2offset_reg : Operand<i32>,
+def am2offset_reg : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg",
                 [], [SDNPWantRoot]> {
   let EncoderMethod = "getAddrMode2OffsetOpValue";
@@ -889,7 +899,7 @@ def am2offset_reg : Operand<i32>,
 // FIXME: am2offset_imm should only need the immediate, not the GPR. Having
 // the GPR is purely vestigal at this point.
 def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; }
-def am2offset_imm : Operand<i32>,
+def am2offset_imm : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm",
                 [], [SDNPWantRoot]> {
   let EncoderMethod = "getAddrMode2OffsetOpValue";
@@ -904,7 +914,7 @@ def am2offset_imm : Operand<i32>,
 //
 // FIXME: split into imm vs. reg versions.
 def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
-class AddrMode3 : Operand<i32>,
+class AddrMode3 : MemOperand,
                   ComplexPattern<i32, 3, "SelectAddrMode3", []> {
   let EncoderMethod = "getAddrMode3OpValue";
   let ParserMatchClass = AddrMode3AsmOperand;
@@ -927,7 +937,7 @@ def AM3OffsetAsmOperand : AsmOperandClass {
   let Name = "AM3Offset";
   let ParserMethod = "parseAM3Offset";
 }
-def am3offset : Operand<i32>,
+def am3offset : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode3Offset",
                                [], [SDNPWantRoot]> {
   let EncoderMethod = "getAddrMode3OffsetOpValue";
@@ -946,7 +956,7 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
 // addrmode5 := reg +/- imm8*4
 //
 def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
-class AddrMode5 : Operand<i32>,
+class AddrMode5 : MemOperand,
                   ComplexPattern<i32, 2, "SelectAddrMode5", []> {
   let EncoderMethod = "getAddrMode5OpValue";
   let DecoderMethod = "DecodeAddrMode5Operand";
@@ -965,7 +975,7 @@ def addrmode5_pre : AddrMode5 {
 // addrmode6 := reg with optional alignment
 //
 def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
-def addrmode6 : Operand<i32>,
+def addrmode6 : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
@@ -974,7 +984,7 @@ def addrmode6 : Operand<i32>,
   let ParserMatchClass = AddrMode6AsmOperand;
 }
 
-def am6offset : Operand<i32>,
+def am6offset : MemOperand,
                 ComplexPattern<i32, 1, "SelectAddrMode6Offset",
                                [], [SDNPWantRoot]> {
   let PrintMethod = "printAddrMode6OffsetOperand";
@@ -985,7 +995,7 @@ def am6offset : Operand<i32>,
 
 // Special version of addrmode6 to handle alignment encoding for VST1/VLD1
 // (single element from one lane) for size 32.
-def addrmode6oneL32 : Operand<i32>,
+def addrmode6oneL32 : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
@@ -993,7 +1003,7 @@ def addrmode6oneL32 : Operand<i32>,
 }
 
 // Base class for addrmode6 with specific alignment restrictions.
-class AddrMode6Align : Operand<i32>,
+class AddrMode6Align : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
@@ -1069,7 +1079,7 @@ def addrmode6align64or128or256 : AddrMode6Align {
 
 // Special version of addrmode6 to handle alignment encoding for VLD-dup
 // instructions, specifically VLD4-dup.
-def addrmode6dup : Operand<i32>,
+def addrmode6dup : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
@@ -1080,7 +1090,7 @@ def addrmode6dup : Operand<i32>,
 }
 
 // Base class for addrmode6dup with specific alignment restrictions.
-class AddrMode6DupAlign : Operand<i32>,
+class AddrMode6DupAlign : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
@@ -1144,7 +1154,7 @@ def addrmode6dupalign64or128 : AddrMode6DupAlign {
 
 // addrmodepc := pc + reg
 //
-def addrmodepc : Operand<i32>,
+def addrmodepc : MemOperand,
                  ComplexPattern<i32, 2, "SelectAddrModePC", []> {
   let PrintMethod = "printAddrModePCOperand";
   let MIOperandInfo = (ops GPR, i32imm);
@@ -1153,7 +1163,7 @@ def addrmodepc : Operand<i32>,
 // addr_offset_none := reg
 //
 def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; }
-def addr_offset_none : Operand<i32>,
+def addr_offset_none : MemOperand,
                        ComplexPattern<i32, 1, "SelectAddrOffsetNone", []> {
   let PrintMethod = "printAddrMode7Operand";
   let DecoderMethod = "DecodeAddrMode7Operand";
@@ -1412,7 +1422,8 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
 let isCompare = 1, Defs = [CPSR] in {
 multiclass AI1_cmp_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                       PatFrag opnode, bit Commutable = 0> {
+                       PatFrag opnode, bit Commutable = 0,
+                       string rrDecoderMethod = ""> {
   def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii,
                opc, "\t$Rn, $imm",
                [(opnode GPR:$Rn, mod_imm:$imm)]>,
@@ -1440,6 +1451,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
     let Inst{15-12} = 0b0000;
     let Inst{11-4} = 0b00000000;
     let Inst{3-0} = Rm;
+    let DecoderMethod = rrDecoderMethod;
 
     let Unpredictable{15-12} = 0b1111;
   }
@@ -1835,11 +1847,11 @@ def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary,
   let Inst{7-0} = imm;
 }
 
-def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6K]>;
 def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>;
 
 def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
@@ -2077,7 +2089,7 @@ def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
                     4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
 
 def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
-                      (ins i32imm:$label, nohash_imm:$id, pred:$p),
+                      (ins i32imm:$label, pred:$p),
                       4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
 }
 
@@ -2214,22 +2226,22 @@ let isBranch = 1, isTerminator = 1 in {
 
     let isNotDuplicable = 1, isIndirectBranch = 1 in {
     def BR_JTr : ARMPseudoInst<(outs),
-                      (ins GPR:$target, i32imm:$jt, i32imm:$id),
+                      (ins GPR:$target, i32imm:$jt),
                       0, IIC_Br,
-                      [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>,
+                      [(ARMbrjt GPR:$target, tjumptable:$jt)]>,
                       Sched<[WriteBr]>;
     // FIXME: This shouldn't use the generic "addrmode2," but rather be split
     // into i12 and rs suffixed versions.
     def BR_JTm : ARMPseudoInst<(outs),
-                     (ins addrmode2:$target, i32imm:$jt, i32imm:$id),
+                     (ins addrmode2:$target, i32imm:$jt),
                      0, IIC_Br,
-                     [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
-                       imm:$id)]>, Sched<[WriteBrTbl]>;
+                     [(ARMbrjt (i32 (load addrmode2:$target)),
+                               tjumptable:$jt)]>, Sched<[WriteBrTbl]>;
     def BR_JTadd : ARMPseudoInst<(outs),
-                   (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id),
+                   (ins GPR:$target, GPR:$idx, i32imm:$jt),
                    0, IIC_Br,
-                   [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
-                     imm:$id)]>, Sched<[WriteBrTbl]>;
+                   [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt)]>,
+                   Sched<[WriteBrTbl]>;
     } // isNotDuplicable = 1, isIndirectBranch = 1
   } // isBarrier = 1
 
@@ -2243,6 +2255,7 @@ def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary,
   bits<25> target;
   let Inst{23-0} = target{24-1};
   let Inst{24} = target{0};
+  let isCall = 1;
 }
 
 // Branch and Exchange Jazelle
@@ -2253,6 +2266,7 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
   let Inst{19-8} = 0xfff;
   let Inst{7-4} = 0b0010;
   let Inst{3-0} = func;
+  let isBranch = 1;
 }
 
 // Tail calls.
@@ -4258,6 +4272,30 @@ def CRC32W  : AI_crc32<0, 0b10, "w", int_arm_crc32w>;
 def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>;
 
 //===----------------------------------------------------------------------===//
+// ARMv8.1a Privilege Access Never extension
+//
+// SETPAN #imm1
+
+def SETPAN : AInoP<(outs), (ins imm0_1:$imm), MiscFrm, NoItinerary, "setpan",
+                "\t$imm", []>, Requires<[IsARM, HasV8, HasV8_1a]> {
+  bits<1> imm;
+
+  let Inst{31-28} = 0b1111;
+  let Inst{27-20} = 0b00010001;
+  let Inst{19-16} = 0b0000;
+  let Inst{15-10} = 0b000000;
+  let Inst{9} = imm;
+  let Inst{8} = 0b0;
+  let Inst{7-4} = 0b0000;
+  let Inst{3-0} = 0b0000;
+
+  let Unpredictable{19-16} = 0b1111;
+  let Unpredictable{15-10} = 0b111111;
+  let Unpredictable{8} = 0b1;
+  let Unpredictable{3-0} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
 //  Comparison Instructions...
 //
 
@@ -4361,7 +4399,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, mod_imm_neg:$imm),
 // Note that TST/TEQ don't set all the same flags that CMP does!
 defm TST  : AI1_cmp_irs<0b1000, "tst",
                         IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
-                      BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>;
+                      BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1,
+                      "DecodeTSTInstruction">;
 defm TEQ  : AI1_cmp_irs<0b1001, "teq",
                         IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
                       BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
@@ -5299,8 +5338,8 @@ def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
 def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>;
 def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
             Requires<[IsARM, UseMovt]>;
-def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
-             (LEApcrelJT tjumptable:$dst, imm:$id)>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst),
+             (LEApcrelJT tjumptable:$dst)>;
 
 // TODO: add,sub,and, 3-instr forms?
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 2a7b4b5..f035d61 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -2393,36 +2393,41 @@ def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
 // Extract D sub-registers of Q registers.
 def DSubReg_i8_reg  : SDNodeXForm<imm, [{
   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
-  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N),
+                                   MVT::i32);
 }]>;
 def DSubReg_i16_reg : SDNodeXForm<imm, [{
   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
-  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N),
+                                   MVT::i32);
 }]>;
 def DSubReg_i32_reg : SDNodeXForm<imm, [{
   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
-  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N),
+                                   MVT::i32);
 }]>;
 def DSubReg_f64_reg : SDNodeXForm<imm, [{
   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
-  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N),
+                                   MVT::i32);
 }]>;
 
 // Extract S sub-registers of Q/D registers.
 def SSubReg_f32_reg : SDNodeXForm<imm, [{
   assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
-  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
+  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N),
+                                   MVT::i32);
 }]>;
 
 // Translate lane numbers from Q registers to D subregs.
 def SubReg_i8_lane  : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
+  return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32);
 }]>;
 def SubReg_i16_lane : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
+  return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32);
 }]>;
 def SubReg_i32_lane : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
+  return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32);
 }]>;
 
 //===----------------------------------------------------------------------===//
@@ -2790,7 +2795,7 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                                                      imm:$lane)))))))]>;
 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                     string OpcodeStr, string Dt,
-                    ValueType Ty, SDNode MulOp, SDNode ShOp>
+                    ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
   : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$Vd),
         (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
@@ -2826,7 +2831,7 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                     string OpcodeStr, string Dt,
                     ValueType ResTy, ValueType OpTy,
-                    SDNode MulOp, SDNode ShOp>
+                    SDPatternOperator MulOp, SDPatternOperator ShOp>
   : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd),
         (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
@@ -3674,7 +3679,7 @@ multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
 multiclass N3VMulOpSL_HS<bits<4> op11_8,
                          InstrItinClass itinD16, InstrItinClass itinD32,
                          InstrItinClass itinQ16, InstrItinClass itinQ32,
-                         string OpcodeStr, string Dt, SDNode ShOp> {
+                         string OpcodeStr, string Dt, SDPatternOperator ShOp> {
   def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
                             OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
   def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
@@ -3711,27 +3716,38 @@ multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
 }
 
 // Neon 3-argument intrinsics,
-//   element sizes of 8, 16 and 32 bits:
-multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
-                       InstrItinClass itinD, InstrItinClass itinQ,
+//   element sizes of 16 and 32 bits:
+multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itinD16, InstrItinClass itinD32,
+                       InstrItinClass itinQ16, InstrItinClass itinQ32,
                        string OpcodeStr, string Dt, SDPatternOperator IntOp> {
   // 64-bit vector types.
-  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
-                       OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
-  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
+  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
                        OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
-  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
+  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
                        OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
 
   // 128-bit vector types.
-  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
-                       OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
-  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
+  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
                        OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
-  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
+  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
 }
 
+//   element sizes of 8, 16 and 32 bits:
+multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itinD16, InstrItinClass itinD32,
+                       InstrItinClass itinQ16, InstrItinClass itinQ32,
+                       string OpcodeStr, string Dt, SDPatternOperator IntOp>
+           :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
+                        itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
+  // 64-bit vector types.
+  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
+                       OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+  // 128-bit vector types.
+  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
+                       OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+}
 
 // Neon Long Multiply-Op vector operations,
 //   element sizes of 8, 16 and 32 bits:
@@ -4305,6 +4321,147 @@ defm VMLALu   : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
 defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
 defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
 
+let Predicates = [HasNEON, HasV8_1a] in {
+  // v8.1a Neon Rounding Double Multiply-Op vector operations,
+  // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
+  //            (Q += D * D)
+  defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
+                             IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
+                             null_frag>;
+  def : Pat<(v4i16 (int_arm_neon_vqadds
+                     (v4i16 DPR:$src1),
+                     (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
+                                                   (v4i16 DPR:$Vm))))),
+            (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+  def : Pat<(v2i32 (int_arm_neon_vqadds
+                     (v2i32 DPR:$src1),
+                     (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
+                                                   (v2i32 DPR:$Vm))))),
+            (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+  def : Pat<(v8i16 (int_arm_neon_vqadds
+                     (v8i16 QPR:$src1),
+                     (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
+                                                   (v8i16 QPR:$Vm))))),
+            (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+  def : Pat<(v4i32 (int_arm_neon_vqadds
+                     (v4i32 QPR:$src1),
+                     (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
+                                                   (v4i32 QPR:$Vm))))),
+            (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+
+  defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
+                                  IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
+                                  null_frag>;
+  def : Pat<(v4i16 (int_arm_neon_vqadds
+                     (v4i16 DPR:$src1),
+                     (v4i16 (int_arm_neon_vqrdmulh
+                              (v4i16 DPR:$Vn),
+                              (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+                                                   imm:$lane)))))),
+            (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
+                                    imm:$lane))>;
+  def : Pat<(v2i32 (int_arm_neon_vqadds
+                     (v2i32 DPR:$src1),
+                     (v2i32 (int_arm_neon_vqrdmulh
+                              (v2i32 DPR:$Vn),
+                              (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+                                                   imm:$lane)))))),
+            (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
+                                    imm:$lane))>;
+  def : Pat<(v8i16 (int_arm_neon_vqadds
+                     (v8i16 QPR:$src1),
+                     (v8i16 (int_arm_neon_vqrdmulh
+                              (v8i16 QPR:$src2),
+                              (v8i16 (NEONvduplane (v8i16 QPR:$src3),
+                                                   imm:$lane)))))),
+            (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
+                                    (v8i16 QPR:$src2),
+                                    (v4i16 (EXTRACT_SUBREG
+                                             QPR:$src3,
+                                             (DSubReg_i16_reg imm:$lane))),
+                                    (SubReg_i16_lane imm:$lane)))>;
+  def : Pat<(v4i32 (int_arm_neon_vqadds
+                     (v4i32 QPR:$src1),
+                     (v4i32 (int_arm_neon_vqrdmulh 
+                              (v4i32 QPR:$src2),
+                              (v4i32 (NEONvduplane (v4i32 QPR:$src3), 
+                                                   imm:$lane)))))),
+            (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
+                                    (v4i32 QPR:$src2),
+                                    (v2i32 (EXTRACT_SUBREG
+                                             QPR:$src3,
+                                             (DSubReg_i32_reg imm:$lane))),
+                                    (SubReg_i32_lane imm:$lane)))>;
+
+  //   VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
+  //              (Q -= D * D)
+  defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
+                             IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
+                             null_frag>;
+  def : Pat<(v4i16 (int_arm_neon_vqsubs
+                     (v4i16 DPR:$src1),
+                     (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
+                                                   (v4i16 DPR:$Vm))))),
+            (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+  def : Pat<(v2i32 (int_arm_neon_vqsubs
+                     (v2i32 DPR:$src1),
+                     (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
+                                                   (v2i32 DPR:$Vm))))),
+            (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+  def : Pat<(v8i16 (int_arm_neon_vqsubs
+                     (v8i16 QPR:$src1),
+                     (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
+                                                   (v8i16 QPR:$Vm))))),
+            (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+  def : Pat<(v4i32 (int_arm_neon_vqsubs
+                     (v4i32 QPR:$src1),
+                     (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
+                                                   (v4i32 QPR:$Vm))))),
+            (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+
+  defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
+                                  IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
+                                  null_frag>;
+  def : Pat<(v4i16 (int_arm_neon_vqsubs
+                     (v4i16 DPR:$src1),
+                     (v4i16 (int_arm_neon_vqrdmulh
+                              (v4i16 DPR:$Vn),
+                              (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+                                                   imm:$lane)))))),
+            (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
+  def : Pat<(v2i32 (int_arm_neon_vqsubs
+                     (v2i32 DPR:$src1),
+                     (v2i32 (int_arm_neon_vqrdmulh
+                              (v2i32 DPR:$Vn),
+                              (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+                                                   imm:$lane)))))),
+            (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 
+                                    imm:$lane))>;
+  def : Pat<(v8i16 (int_arm_neon_vqsubs
+                     (v8i16 QPR:$src1),
+                     (v8i16 (int_arm_neon_vqrdmulh
+                              (v8i16 QPR:$src2),
+                              (v8i16 (NEONvduplane (v8i16 QPR:$src3), 
+                                                   imm:$lane)))))),
+            (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
+                                    (v8i16 QPR:$src2),
+                                    (v4i16 (EXTRACT_SUBREG 
+                                             QPR:$src3,
+                                             (DSubReg_i16_reg imm:$lane))),
+                                    (SubReg_i16_lane imm:$lane)))>;
+  def : Pat<(v4i32 (int_arm_neon_vqsubs
+                     (v4i32 QPR:$src1),
+                     (v4i32 (int_arm_neon_vqrdmulh
+                              (v4i32 QPR:$src2),
+                              (v4i32 (NEONvduplane (v4i32 QPR:$src3),
+                                                    imm:$lane)))))),
+            (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
+                                    (v4i32 QPR:$src2),
+                                    (v2i32 (EXTRACT_SUBREG 
+                                             QPR:$src3,
+                                             (DSubReg_i32_reg imm:$lane))),
+                                    (SubReg_i32_lane imm:$lane)))>;
+}
 //   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
 defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
                             "vqdmlal", "s", null_frag>;
@@ -6158,6 +6315,21 @@ class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
                  (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
                  SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
 
+class NVCVTIFPat<SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode GPR:$a)),
+              (f32 (EXTRACT_SUBREG
+                     (v2f32 (Inst
+                       (INSERT_SUBREG
+                         (v2f32 (IMPLICIT_DEF)),
+                         (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
+                     ssub_0))>;
+class NVCVTFIPat<SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(i32 (OpNode SPR:$a)),
+              (i32 (EXTRACT_SUBREG
+                     (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+                                                 SPR:$a, ssub_0))),
+                     ssub_0))>;
+
 def : N3VSPat<fadd, VADDfd>;
 def : N3VSPat<fsub, VSUBfd>;
 def : N3VSPat<fmul, VMULfd>;
@@ -6173,10 +6345,22 @@ def : N2VSPat<fabs, VABSfd>;
 def : N2VSPat<fneg, VNEGfd>;
 def : N3VSPat<NEONfmax, VMAXfd>;
 def : N3VSPat<NEONfmin, VMINfd>;
-def : N2VSPat<arm_ftosi, VCVTf2sd>;
-def : N2VSPat<arm_ftoui, VCVTf2ud>;
-def : N2VSPat<arm_sitof, VCVTs2fd>;
-def : N2VSPat<arm_uitof, VCVTu2fd>;
+def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
+def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
+def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
+def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
+
+// NEON doesn't have any f64 conversions, so provide patterns to make
+// sure the VFP conversions match when extracting from a vector.
+def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
+             (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
+             (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
+             (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
+             (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+
 
 // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
 def : Pat<(f32 (bitconvert GPR:$a)),
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index cc953c6..0fecfa1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -21,7 +21,7 @@ def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
 
 def imm_sr_XFORM: SDNodeXForm<imm, [{
   unsigned Imm = N->getZExtValue();
-  return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), MVT::i32);
+  return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32);
 }]>;
 def ThumbSRImmAsmOperand: AsmOperandClass { let Name = "ImmThumbSR"; }
 def imm_sr : Operand<i32>, PatLeaf<(imm), [{
@@ -33,7 +33,8 @@ def imm_sr : Operand<i32>, PatLeaf<(imm), [{
 }
 
 def imm_comp_XFORM : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+  return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), SDLoc(N),
+                                   MVT::i32);
 }]>;
 
 def imm0_7_neg : PatLeaf<(i32 imm), [{
@@ -61,12 +62,12 @@ def thumb_immshifted : PatLeaf<(imm), [{
 
 def thumb_immshifted_val : SDNodeXForm<imm, [{
   unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getZExtValue());
-  return CurDAG->getTargetConstant(V, MVT::i32);
+  return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);
 }]>;
 
 def thumb_immshifted_shamt : SDNodeXForm<imm, [{
   unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getZExtValue());
-  return CurDAG->getTargetConstant(V, MVT::i32);
+  return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);
 }]>;
 
 // Scaled 4 immediate.
@@ -142,7 +143,7 @@ def t_blxtarget : Operand<i32> {
 
 // t_addrmode_pc := <label> => pc + imm8 * 4
 //
-def t_addrmode_pc : Operand<i32> {
+def t_addrmode_pc : MemOperand {
   let EncoderMethod = "getAddrModePCOpValue";
   let DecoderMethod = "DecodeThumbAddrModePC";
   let PrintMethod = "printThumbLdrLabelOperand";
@@ -153,7 +154,7 @@ def t_addrmode_pc : Operand<i32> {
 // t_addrmode_rr := reg + reg
 //
 def t_addrmode_rr_asm_operand : AsmOperandClass { let Name = "MemThumbRR"; }
-def t_addrmode_rr : Operand<i32>,
+def t_addrmode_rr : MemOperand,
                     ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
   let EncoderMethod = "getThumbAddrModeRegRegOpValue";
   let PrintMethod = "printThumbAddrModeRROperand";
@@ -169,7 +170,7 @@ def t_addrmode_rr : Operand<i32>,
 // the reg+imm forms will match instead. This is a horrible way to do that,
 // as it forces tight coupling between the methods, but it's how selectiondag
 // currently works.
-def t_addrmode_rrs1 : Operand<i32>,
+def t_addrmode_rrs1 : MemOperand,
                       ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
   let EncoderMethod = "getThumbAddrModeRegRegOpValue";
   let PrintMethod = "printThumbAddrModeRROperand";
@@ -177,7 +178,7 @@ def t_addrmode_rrs1 : Operand<i32>,
   let ParserMatchClass = t_addrmode_rr_asm_operand;
   let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
 }
-def t_addrmode_rrs2 : Operand<i32>,
+def t_addrmode_rrs2 : MemOperand,
                       ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
   let EncoderMethod = "getThumbAddrModeRegRegOpValue";
   let DecoderMethod = "DecodeThumbAddrModeRR";
@@ -185,7 +186,7 @@ def t_addrmode_rrs2 : Operand<i32>,
   let ParserMatchClass = t_addrmode_rr_asm_operand;
   let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
 }
-def t_addrmode_rrs4 : Operand<i32>,
+def t_addrmode_rrs4 : MemOperand,
                       ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
   let EncoderMethod = "getThumbAddrModeRegRegOpValue";
   let DecoderMethod = "DecodeThumbAddrModeRR";
@@ -197,7 +198,7 @@ def t_addrmode_rrs4 : Operand<i32>,
 // t_addrmode_is4 := reg + imm5 * 4
 //
 def t_addrmode_is4_asm_operand : AsmOperandClass { let Name = "MemThumbRIs4"; }
-def t_addrmode_is4 : Operand<i32>,
+def t_addrmode_is4 : MemOperand,
                      ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
   let EncoderMethod = "getAddrModeISOpValue";
   let DecoderMethod = "DecodeThumbAddrModeIS";
@@ -209,7 +210,7 @@ def t_addrmode_is4 : Operand<i32>,
 // t_addrmode_is2 := reg + imm5 * 2
 //
 def t_addrmode_is2_asm_operand : AsmOperandClass { let Name = "MemThumbRIs2"; }
-def t_addrmode_is2 : Operand<i32>,
+def t_addrmode_is2 : MemOperand,
                      ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
   let EncoderMethod = "getAddrModeISOpValue";
   let DecoderMethod = "DecodeThumbAddrModeIS";
@@ -221,7 +222,7 @@ def t_addrmode_is2 : Operand<i32>,
 // t_addrmode_is1 := reg + imm5
 //
 def t_addrmode_is1_asm_operand : AsmOperandClass { let Name = "MemThumbRIs1"; }
-def t_addrmode_is1 : Operand<i32>,
+def t_addrmode_is1 : MemOperand,
                      ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
   let EncoderMethod = "getAddrModeISOpValue";
   let DecoderMethod = "DecodeThumbAddrModeIS";
@@ -235,7 +236,7 @@ def t_addrmode_is1 : Operand<i32>,
 // FIXME: This really shouldn't have an explicit SP operand at all. It should
 // be implicit, just like in the instruction encoding itself.
 def t_addrmode_sp_asm_operand : AsmOperandClass { let Name = "MemThumbSPI"; }
-def t_addrmode_sp : Operand<i32>,
+def t_addrmode_sp : MemOperand,
                     ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
   let EncoderMethod = "getAddrModeThumbSPOpValue";
   let DecoderMethod = "DecodeThumbAddrModeSP";
@@ -521,9 +522,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
                           Sched<[WriteBrTbl]>;
 
   def tBR_JTr : tPseudoInst<(outs),
-                      (ins tGPR:$target, i32imm:$jt, i32imm:$id),
+                      (ins tGPR:$target, i32imm:$jt),
                       0, IIC_Br,
-                      [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>,
+                      [(ARMbrjt tGPR:$target, tjumptable:$jt)]>,
                       Sched<[WriteBrTbl]> {
     list<Predicate> Predicates = [IsThumb, IsThumb1Only];
   }
@@ -1254,7 +1255,7 @@ def tLEApcrel   : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
 
 let hasSideEffects = 1 in
 def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
-                              (ins i32imm:$label, nohash_imm:$id, pred:$p),
+                              (ins i32imm:$label, pred:$p),
                               2, IIC_iALUi, []>, Sched<[WriteALU]>;
 
 //===----------------------------------------------------------------------===//
@@ -1355,8 +1356,8 @@ def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src),
 
 
 // JumpTable
-def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
-            (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+def : T1Pat<(ARMWrapperJT tjumptable:$dst),
+            (tLEApcrelJT tjumptable:$dst)>;
 
 // Direct calls
 def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
@@ -1375,6 +1376,17 @@ def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
 def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
             (tLDRBi t_addrmode_is1:$addr)>;
 
+// extload from the stack -> word load from the stack, as it avoids having to
+// materialize the base in a separate register. This only works when a word
+// load puts the byte/halfword value in the same place in the register that the
+// byte/halfword load would, i.e. when little-endian.
+def : T1Pat<(extloadi1  t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
+      Requires<[IsThumb, IsThumb1Only, IsLE]>;
+def : T1Pat<(extloadi8  t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
+      Requires<[IsThumb, IsThumb1Only, IsLE]>;
+def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
+      Requires<[IsThumb, IsThumb1Only, IsLE]>;
+
 // extload -> zextload
 def : T1Pat<(extloadi1  t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
 def : T1Pat<(extloadi1  t_addrmode_is1:$addr),  (tLDRBi t_addrmode_is1:$addr)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 5e41ea1..814b524 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -54,12 +54,14 @@ def t2_so_reg : Operand<i32>,    // reg imm
 
 // t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
 def t2_so_imm_not_XFORM : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+  return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), SDLoc(N),
+                                   MVT::i32);
 }]>;
 
 // t2_so_imm_neg_XFORM - Return the negation of a t2_so_imm value
 def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32);
+  return CurDAG->getTargetConstant(-((int)N->getZExtValue()), SDLoc(N),
+                                   MVT::i32);
 }]>;
 
 // so_imm_notSext_XFORM - Return a so_imm value packed into the format
@@ -68,7 +70,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
 def t2_so_imm_notSext16_XFORM : SDNodeXForm<imm, [{
   APInt apIntN = N->getAPIntValue();
   unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue();
-  return CurDAG->getTargetConstant(~N16bitSignExt, MVT::i32);
+  return CurDAG->getTargetConstant(~N16bitSignExt, SDLoc(N), MVT::i32);
 }]>;
 
 // t2_so_imm - Match a 32-bit immediate operand, which is an
@@ -148,7 +150,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
 
 // t2addrmode_imm12  := reg + imm12
 def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
-def t2addrmode_imm12 : Operand<i32>,
+def t2addrmode_imm12 : MemOperand,
                        ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
   let PrintMethod = "printAddrModeImm12Operand<false>";
   let EncoderMethod = "getAddrModeImm12OpValue";
@@ -178,7 +180,7 @@ def t2adrlabel : Operand<i32> {
 
 // t2addrmode_posimm8  := reg + imm8
 def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";}
-def t2addrmode_posimm8 : Operand<i32> {
+def t2addrmode_posimm8 : MemOperand {
   let PrintMethod = "printT2AddrModeImm8Operand<false>";
   let EncoderMethod = "getT2AddrModeImm8OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm8";
@@ -188,7 +190,7 @@ def t2addrmode_posimm8 : Operand<i32> {
 
 // t2addrmode_negimm8  := reg - imm8
 def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";}
-def t2addrmode_negimm8 : Operand<i32>,
+def t2addrmode_negimm8 : MemOperand,
                       ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
   let PrintMethod = "printT2AddrModeImm8Operand<false>";
   let EncoderMethod = "getT2AddrModeImm8OpValue";
@@ -199,7 +201,7 @@ def t2addrmode_negimm8 : Operand<i32>,
 
 // t2addrmode_imm8  := reg +/- imm8
 def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; }
-class T2AddrMode_Imm8 : Operand<i32>,
+class T2AddrMode_Imm8 : MemOperand,
                         ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
   let EncoderMethod = "getT2AddrModeImm8OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm8";
@@ -215,7 +217,7 @@ def t2addrmode_imm8_pre : T2AddrMode_Imm8 {
   let PrintMethod = "printT2AddrModeImm8Operand<true>";
 }
 
-def t2am_imm8_offset : Operand<i32>,
+def t2am_imm8_offset : MemOperand,
                        ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
                                       [], [SDNPWantRoot]> {
   let PrintMethod = "printT2AddrModeImm8OffsetOperand";
@@ -225,7 +227,7 @@ def t2am_imm8_offset : Operand<i32>,
 
 // t2addrmode_imm8s4  := reg +/- (imm8 << 2)
 def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
-class T2AddrMode_Imm8s4 : Operand<i32> {
+class T2AddrMode_Imm8s4 : MemOperand {
   let EncoderMethod = "getT2AddrModeImm8s4OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm8s4";
   let ParserMatchClass = MemImm8s4OffsetAsmOperand;
@@ -241,7 +243,7 @@ def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 {
 }
 
 def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; }
-def t2am_imm8s4_offset : Operand<i32> {
+def t2am_imm8s4_offset : MemOperand {
   let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
   let EncoderMethod = "getT2Imm8s4OpValue";
   let DecoderMethod = "DecodeT2Imm8S4";
@@ -251,7 +253,7 @@ def t2am_imm8s4_offset : Operand<i32> {
 def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
   let Name = "MemImm0_1020s4Offset";
 }
-def t2addrmode_imm0_1020s4 : Operand<i32>,
+def t2addrmode_imm0_1020s4 : MemOperand,
                          ComplexPattern<i32, 2, "SelectT2AddrModeExclusive"> {
   let PrintMethod = "printT2AddrModeImm0_1020s4Operand";
   let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue";
@@ -262,7 +264,7 @@ def t2addrmode_imm0_1020s4 : Operand<i32>,
 
 // t2addrmode_so_reg  := reg + (reg << imm2)
 def t2addrmode_so_reg_asmoperand : AsmOperandClass {let Name="T2MemRegOffset";}
-def t2addrmode_so_reg : Operand<i32>,
+def t2addrmode_so_reg : MemOperand,
                         ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
   let PrintMethod = "printT2AddrModeSoRegOperand";
   let EncoderMethod = "getT2AddrModeSORegOpValue";
@@ -273,13 +275,13 @@ def t2addrmode_so_reg : Operand<i32>,
 
 // Addresses for the TBB/TBH instructions.
 def addrmode_tbb_asmoperand : AsmOperandClass { let Name = "MemTBB"; }
-def addrmode_tbb : Operand<i32> {
+def addrmode_tbb : MemOperand {
   let PrintMethod = "printAddrModeTBB";
   let ParserMatchClass = addrmode_tbb_asmoperand;
   let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
 }
 def addrmode_tbh_asmoperand : AsmOperandClass { let Name = "MemTBH"; }
-def addrmode_tbh : Operand<i32> {
+def addrmode_tbh : MemOperand {
   let PrintMethod = "printAddrModeTBH";
   let ParserMatchClass = addrmode_tbh_asmoperand;
   let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
@@ -1185,7 +1187,8 @@ class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode>
 
 class T2I_exta_rrot_np<bits<3> opcod, string opc>
   : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot),
-               IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []> {
+               IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []>,
+               Requires<[HasT2ExtractPack, IsThumb2]> {
   bits<2> rot;
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0100;
@@ -1246,7 +1249,7 @@ def t2LEApcrel   : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
                                 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
 let hasSideEffects = 1 in
 def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
-                                (ins i32imm:$label, nohash_imm:$id, pred:$p),
+                                (ins i32imm:$label, pred:$p),
                                 4, IIC_iALUi,
                                 []>, Sched<[WriteALU, ReadALU]>;
 
@@ -3530,18 +3533,18 @@ def t2B   : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
 
 let isNotDuplicable = 1, isIndirectBranch = 1 in {
 def t2BR_JT : t2PseudoInst<(outs),
-          (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id),
+          (ins GPR:$target, GPR:$index, i32imm:$jt),
            0, IIC_Br,
-          [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>,
+          [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt)]>,
           Sched<[WriteBr]>;
 
 // FIXME: Add a non-pc based case that can be predicated.
 def t2TBB_JT : t2PseudoInst<(outs),
-        (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>,
+        (ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>,
         Sched<[WriteBr]>;
 
 def t2TBH_JT : t2PseudoInst<(outs),
-        (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>,
+        (ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>,
         Sched<[WriteBr]>;
 
 def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
@@ -3629,8 +3632,8 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
 
 // Branch and Exchange Jazelle -- for disassembly only
 // Rm = Inst{19-16}
-def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>,
-    Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass, PreV8]> {
+def t2BXJ : T2I<(outs), (ins GPRnopc:$func), NoItinerary, "bxj", "\t$func", []>,
+    Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass]> {
   bits<4> func;
   let Inst{31-27} = 0b11110;
   let Inst{26} = 0;
@@ -3874,8 +3877,8 @@ def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
 def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
            Requires<[IsThumb2, UseMovt]>;
 
-def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
-            (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+def : T2Pat<(ARMWrapperJT tjumptable:$dst),
+            (t2LEApcrelJT tjumptable:$dst)>;
 
 // Pseudo instruction that combines ldr from constpool and add pc. This should
 // be expanded into two instructions late to allow if-conversion and
@@ -4280,6 +4283,23 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
 
 
 //===----------------------------------------------------------------------===//
+// ARMv8.1 Privilege Access Never extension
+//
+// SETPAN #imm1
+
+def t2SETPAN : T1I<(outs), (ins imm0_1:$imm), NoItinerary, "setpan\t$imm", []>,
+               T1Misc<0b0110000>, Requires<[IsThumb2, HasV8, HasV8_1a]> {
+  bits<1> imm;
+
+  let Inst{4} = 0b1;
+  let Inst{3} = imm;
+  let Inst{2-0} = 0b000;
+
+  let Unpredictable{4} = 0b1;
+  let Unpredictable{2-0} = 0b111;
+}
+
+//===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //
 
@@ -4585,17 +4605,21 @@ def : t2InstAlias<"strh${p} $Rt, $addr",
                   (t2STRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
 
 // Extend instruction optional rotate operand.
-def : t2InstAlias<"sxtab${p} $Rd, $Rn, $Rm",
-                (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
-def : t2InstAlias<"sxtah${p} $Rd, $Rn, $Rm",
-                (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
-def : t2InstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
-                (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : InstAlias<"sxtab${p} $Rd, $Rn, $Rm",
+              (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+              Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"sxtah${p} $Rd, $Rn, $Rm",
+              (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+              Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
+              (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+              Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"sxtb16${p} $Rd, $Rm",
+              (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>,
+              Requires<[HasT2ExtractPack, IsThumb2]>;
 
 def : t2InstAlias<"sxtb${p} $Rd, $Rm",
                 (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
-def : t2InstAlias<"sxtb16${p} $Rd, $Rm",
-                (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
 def : t2InstAlias<"sxth${p} $Rd, $Rm",
                 (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
 def : t2InstAlias<"sxtb${p}.w $Rd, $Rm",
@@ -4603,19 +4627,23 @@ def : t2InstAlias<"sxtb${p}.w $Rd, $Rm",
 def : t2InstAlias<"sxth${p}.w $Rd, $Rm",
                 (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
 
-def : t2InstAlias<"uxtab${p} $Rd, $Rn, $Rm",
-                (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
-def : t2InstAlias<"uxtah${p} $Rd, $Rn, $Rm",
-                (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
-def : t2InstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
-                (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>;
+def : InstAlias<"uxtab${p} $Rd, $Rn, $Rm",
+              (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+              Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"uxtah${p} $Rd, $Rn, $Rm",
+              (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+              Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
+              (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+              Requires<[HasT2ExtractPack, IsThumb2]>;
+def : InstAlias<"uxtb16${p} $Rd, $Rm",
+              (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>,
+              Requires<[HasT2ExtractPack, IsThumb2]>;
+
 def : t2InstAlias<"uxtb${p} $Rd, $Rm",
                 (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
-def : t2InstAlias<"uxtb16${p} $Rd, $Rm",
-                (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
 def : t2InstAlias<"uxth${p} $Rd, $Rm",
                 (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
-
 def : t2InstAlias<"uxtb${p}.w $Rd, $Rm",
                 (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
 def : t2InstAlias<"uxth${p}.w $Rd, $Rm",
@@ -4624,15 +4652,17 @@ def : t2InstAlias<"uxth${p}.w $Rd, $Rm",
 // Extend instruction w/o the ".w" optional width specifier.
 def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot",
                   (t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
-def : t2InstAlias<"uxtb16${p} $Rd, $Rm$rot",
-                  (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : InstAlias<"uxtb16${p} $Rd, $Rm$rot",
+                (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>,
+                Requires<[HasT2ExtractPack, IsThumb2]>;
 def : t2InstAlias<"uxth${p} $Rd, $Rm$rot",
                   (t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
 
 def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot",
                   (t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
-def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot",
-                  (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+def : InstAlias<"sxtb16${p} $Rd, $Rm$rot",
+                (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>,
+                Requires<[HasT2ExtractPack, IsThumb2]>;
 def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
                   (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index e0a9314..e83f8c8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -11,16 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-def SDT_FTOI    : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
-def SDT_ITOF    : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
 def SDT_CMPFP0  : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
 def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
                                        SDTCisSameAs<1, 2>]>;
 
-def arm_ftoui  : SDNode<"ARMISD::FTOUI",   SDT_FTOI>;
-def arm_ftosi  : SDNode<"ARMISD::FTOSI",   SDT_FTOI>;
-def arm_sitof  : SDNode<"ARMISD::SITOF",   SDT_ITOF>;
-def arm_uitof  : SDNode<"ARMISD::UITOF",   SDT_ITOF>;
 def arm_fmstat : SDNode<"ARMISD::FMSTAT",  SDTNone, [SDNPInGlue, SDNPOutGlue]>;
 def arm_cmpfp  : SDNode<"ARMISD::CMPFP",   SDT_ARMCmp, [SDNPOutGlue]>;
 def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
@@ -43,7 +37,7 @@ def vfp_f32imm : Operand<f32>,
     }], SDNodeXForm<fpimm, [{
       APFloat InVal = N->getValueAPF();
       uint32_t enc = ARM_AM::getFP32Imm(InVal);
-      return CurDAG->getTargetConstant(enc, MVT::i32);
+      return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
     }]>> {
   let PrintMethod = "printFPImmOperand";
   let ParserMatchClass = FPImmOperand;
@@ -55,7 +49,7 @@ def vfp_f64imm : Operand<f64>,
     }], SDNodeXForm<fpimm, [{
       APFloat InVal = N->getValueAPF();
       uint32_t enc = ARM_AM::getFP64Imm(InVal);
-      return CurDAG->getTargetConstant(enc, MVT::i32);
+      return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
     }]>> {
   let PrintMethod = "printFPImmOperand";
   let ParserMatchClass = FPImmOperand;
@@ -633,7 +627,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
     def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
                     (outs SPR:$Sd), (ins SPR:$Sm),
                     NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
-                    [(set SPR:$Sd, (arm_ftosi (node SPR:$Sm)))]>,
+                    []>,
                     Requires<[HasFPARMv8]> {
       let Inst{17-16} = rm;
     }
@@ -641,7 +635,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
     def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
                     (outs SPR:$Sd), (ins SPR:$Sm),
                     NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"),
-                    [(set SPR:$Sd, (arm_ftoui (node SPR:$Sm)))]>,
+                    []>,
                     Requires<[HasFPARMv8]> {
       let Inst{17-16} = rm;
     }
@@ -649,7 +643,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
     def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
                     (outs SPR:$Sd), (ins DPR:$Dm),
                     NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"),
-                    [(set SPR:$Sd, (arm_ftosi (f64 (node (f64 DPR:$Dm)))))]>,
+                    []>,
                     Requires<[HasFPARMv8, HasDPVFP]> {
       bits<5> Dm;
 
@@ -664,7 +658,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
     def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
                     (outs SPR:$Sd), (ins DPR:$Dm),
                     NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"),
-                    [(set SPR:$Sd, (arm_ftoui (f64 (node (f64 DPR:$Dm)))))]>,
+                    []>,
                     Requires<[HasFPARMv8, HasDPVFP]> {
       bits<5> Dm;
 
@@ -676,6 +670,27 @@ multiclass vcvt_inst<string opc, bits<2> rm,
       let Inst{8} = 1;
     }
   }
+
+  let Predicates = [HasFPARMv8] in {
+    def : Pat<(i32 (fp_to_sint (node SPR:$a))),
+              (COPY_TO_REGCLASS
+                (!cast<Instruction>(NAME#"SS") SPR:$a),
+                GPR)>;
+    def : Pat<(i32 (fp_to_uint (node SPR:$a))),
+              (COPY_TO_REGCLASS
+                (!cast<Instruction>(NAME#"US") SPR:$a),
+                GPR)>;
+  }
+  let Predicates = [HasFPARMv8, HasDPVFP] in {
+    def : Pat<(i32 (fp_to_sint (node (f64 DPR:$a)))),
+              (COPY_TO_REGCLASS
+                (!cast<Instruction>(NAME#"SD") DPR:$a),
+                GPR)>;
+    def : Pat<(i32 (fp_to_uint (node (f64 DPR:$a)))),
+              (COPY_TO_REGCLASS
+                (!cast<Instruction>(NAME#"UD") DPR:$a),
+                GPR)>;
+  }
 }
 
 defm VCVTA : vcvt_inst<"a", 0b00, frnd>;
@@ -980,14 +995,22 @@ class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
 def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
                                (outs DPR:$Dd), (ins SPR:$Sm),
                                IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
-                               [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> {
+                               []> {
   let Inst{7} = 1; // s32
 }
 
+let Predicates=[HasVFP2, HasDPVFP] in {
+  def : VFPPat<(f64 (sint_to_fp GPR:$a)),
+               (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+  def : VFPPat<(f64 (sint_to_fp (i32 (load addrmode5:$a)))),
+               (VSITOD (VLDRS addrmode5:$a))>;
+}
+
 def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
                                 (outs SPR:$Sd),(ins SPR:$Sm),
                                 IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
-                                [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
+                                []> {
   let Inst{7} = 1; // s32
 
   // Some single precision VFP instructions may be executed on both NEON and
@@ -995,17 +1018,31 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
   let D = VFPNeonA8Domain;
 }
 
+def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)),
+                   (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (load addrmode5:$a)))),
+                   (VSITOS (VLDRS addrmode5:$a))>;
+
 def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
                                (outs DPR:$Dd), (ins SPR:$Sm),
                                IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
-                               [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> {
+                               []> {
   let Inst{7} = 0; // u32
 }
 
+let Predicates=[HasVFP2, HasDPVFP] in {
+  def : VFPPat<(f64 (uint_to_fp GPR:$a)),
+               (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+  def : VFPPat<(f64 (uint_to_fp (i32 (load addrmode5:$a)))),
+               (VUITOD (VLDRS addrmode5:$a))>;
+}
+
 def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
                                 (outs SPR:$Sd), (ins SPR:$Sm),
                                 IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
-                                [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
+                                []> {
   let Inst{7} = 0; // u32
 
   // Some single precision VFP instructions may be executed on both NEON and
@@ -1013,6 +1050,12 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
   let D = VFPNeonA8Domain;
 }
 
+def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)),
+                   (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (load addrmode5:$a)))),
+                   (VUITOS (VLDRS addrmode5:$a))>;
+
 // FP -> Int:
 
 class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -1055,14 +1098,22 @@ class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
 def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
                                 (outs SPR:$Sd), (ins DPR:$Dm),
                                 IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
-                                [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> {
+                                []> {
   let Inst{7} = 1; // Z bit
 }
 
+let Predicates=[HasVFP2, HasDPVFP] in {
+  def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
+               (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
+
+  def : VFPPat<(store (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
+               (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
+}
+
 def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
                                  (outs SPR:$Sd), (ins SPR:$Sm),
                                  IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
-                                 [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
+                                 []> {
   let Inst{7} = 1; // Z bit
 
   // Some single precision VFP instructions may be executed on both NEON and
@@ -1070,17 +1121,31 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
   let D = VFPNeonA8Domain;
 }
 
+def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
+                   (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
+
+def : VFPNoNEONPat<(store (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr),
+                   (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
+
 def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
                                (outs SPR:$Sd), (ins DPR:$Dm),
                                IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
-                               [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> {
+                               []> {
   let Inst{7} = 1; // Z bit
 }
 
+let Predicates=[HasVFP2, HasDPVFP] in {
+  def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
+               (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
+
+  def : VFPPat<(store (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
+               (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
+}
+
 def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
                                  (outs SPR:$Sd), (ins SPR:$Sm),
                                  IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
-                                 [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
+                                 []> {
   let Inst{7} = 1; // Z bit
 
   // Some single precision VFP instructions may be executed on both NEON and
@@ -1088,6 +1153,12 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
   let D = VFPNeonA8Domain;
 }
 
+def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
+                   (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
+
+def : VFPNoNEONPat<(store (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr),
+                  (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
+
 // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
 let Uses = [FPSCR] in {
 // FIXME: Verify encoding after integrated assembler is working.
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index fda3e81..5b62a21 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -19,7 +19,7 @@
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -38,6 +38,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -102,7 +103,7 @@ namespace {
                            DebugLoc dl, unsigned Base, unsigned WordOffset,
                            ARMCC::CondCodes Pred, unsigned PredReg);
     bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-                  int Offset, unsigned Base, bool BaseKill, int Opcode,
+                  int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
                   ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
                   DebugLoc dl,
                   ArrayRef<std::pair<unsigned, bool> > Regs,
@@ -115,14 +116,14 @@ namespace {
                         int Offset,
                         unsigned Base,
                         bool BaseKill,
-                        int Opcode,
+                        unsigned Opcode,
                         ARMCC::CondCodes Pred,
                         unsigned PredReg,
                         unsigned Scratch,
                         DebugLoc dl,
                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
     void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
-                      int Opcode, unsigned Size,
+                      unsigned Opcode, unsigned Size,
                       ARMCC::CondCodes Pred, unsigned PredReg,
                       unsigned Scratch, MemOpQueue &MemOps,
                       SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
@@ -158,7 +159,7 @@ static bool definesCPSR(const MachineInstr *MI) {
 }
 
 static int getMemoryOpOffset(const MachineInstr *MI) {
-  int Opcode = MI->getOpcode();
+  unsigned Opcode = MI->getOpcode();
   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
   unsigned NumOperands = MI->getDesc().getNumOperands();
   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
@@ -170,7 +171,8 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
     return OffField;
 
   // Thumb1 immediate offsets are scaled by 4
-  if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
+  if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
+      Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
     return OffField * 4;
 
   int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
@@ -184,7 +186,7 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
   return Offset;
 }
 
-static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
+static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
   switch (Opcode) {
   default: llvm_unreachable("Unhandled opcode!");
   case ARM::LDRi12:
@@ -206,6 +208,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     case ARM_AM::ib: return ARM::STMIB;
     }
   case ARM::tLDRi:
+  case ARM::tLDRspi:
     // tLDMIA is writeback-only - unless the base register is in the input
     // reglist.
     ++NumLDMGened;
@@ -214,6 +217,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     case ARM_AM::ia: return ARM::tLDMIA;
     }
   case ARM::tSTRi:
+  case ARM::tSTRspi:
     // There is no non-writeback tSTMIA either.
     ++NumSTMGened;
     switch (Mode) {
@@ -270,7 +274,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
 namespace llvm {
   namespace ARM_AM {
 
-AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
+AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
   switch (Opcode) {
   default: llvm_unreachable("Unhandled opcode!");
   case ARM::LDMIA_RET:
@@ -328,7 +332,7 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
 } // end namespace llvm
 
 static bool isT1i32Load(unsigned Opc) {
-  return Opc == ARM::tLDRi;
+  return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
 }
 
 static bool isT2i32Load(unsigned Opc) {
@@ -340,7 +344,7 @@ static bool isi32Load(unsigned Opc) {
 }
 
 static bool isT1i32Store(unsigned Opc) {
-  return Opc == ARM::tSTRi;
+  return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
 }
 
 static bool isT2i32Store(unsigned Opc) {
@@ -356,6 +360,8 @@ static unsigned getImmScale(unsigned Opc) {
   default: llvm_unreachable("Unhandled opcode!");
   case ARM::tLDRi:
   case ARM::tSTRi:
+  case ARM::tLDRspi:
+  case ARM::tSTRspi:
     return 1;
   case ARM::tLDRHi:
   case ARM::tSTRHi:
@@ -441,8 +447,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
     if (InsertSub) {
       // An instruction above couldn't be updated, so insert a sub.
       AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
-        .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
-        .addImm(Pred).addReg(PredReg);
+        .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
       return;
     }
 
@@ -460,8 +465,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
     if (MBBI != MBB.end()) --MBBI;
     AddDefaultT1CC(
       BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
-      .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
-      .addImm(Pred).addReg(PredReg);
+      .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
   }
 }
 
@@ -472,7 +476,7 @@ bool
 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MBBI,
                           int Offset, unsigned Base, bool BaseKill,
-                          int Opcode, ARMCC::CondCodes Pred,
+                          unsigned Opcode, ARMCC::CondCodes Pred,
                           unsigned PredReg, unsigned Scratch, DebugLoc dl,
                           ArrayRef<std::pair<unsigned, bool> > Regs,
                           ArrayRef<unsigned> ImpDefs) {
@@ -493,8 +497,9 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
   // non-writeback.
   // It's also not possible to merge an STR of the base register in Thumb1.
   if (isThumb1)
-    for (unsigned I = 0; I < NumRegs; ++I)
-      if (Base == Regs[I].first) {
+    for (const std::pair<unsigned, bool> &R : Regs)
+      if (Base == R.first) {
+        assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
         if (Opcode == ARM::tLDRi) {
           Writeback = false;
           break;
@@ -515,7 +520,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
   } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
     // VLDM/VSTM do not support DB mode without also updating the base reg.
     Mode = ARM_AM::db;
-  } else if (Offset != 0) {
+  } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
     // Check if this is a supported opcode before inserting instructions to
     // calculate a new base register.
     if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
@@ -545,6 +550,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
 
     int BaseOpc =
       isThumb2 ? ARM::t2ADDri :
+      (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
       (isThumb1 && Offset < 8) ? ARM::tADDi3 :
       isThumb1 ? ARM::tADDi8  : ARM::ADDri;
 
@@ -552,7 +558,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
       Offset = - Offset;
       BaseOpc =
         isThumb2 ? ARM::t2SUBri :
-        (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
+        (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
         isThumb1 ? ARM::tSUBi8  : ARM::SUBri;
     }
 
@@ -566,12 +572,11 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
       // or
       //   MOV  NewBase, Base
       //   ADDS NewBase, #imm8.
-      if (Base != NewBase && Offset >= 8) {
-        const ARMSubtarget &Subtarget = MBB.getParent()->getTarget()
-                       .getSubtarget<ARMSubtarget>();
+      if (Base != NewBase &&
+          (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
         // Need to insert a MOV to the new base first.
         if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
-            !Subtarget.hasV6Ops()) {
+            !STI->hasV6Ops()) {
           // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
           if (Pred != ARMCC::AL)
             return false;
@@ -586,9 +591,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
         Base = NewBase;
         BaseKill = false;
       }
-      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
-        .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
-        .addImm(Pred).addReg(PredReg);
+      if (BaseOpc == ARM::tADDrSPi) {
+        assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
+        BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+          .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
+          .addImm(Pred).addReg(PredReg);
+      } else
+        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
+          .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+          .addImm(Pred).addReg(PredReg);
     } else {
       BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
         .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
@@ -643,13 +654,13 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
 
   MIB.addImm(Pred).addReg(PredReg);
 
-  for (unsigned i = 0; i != NumRegs; ++i)
-    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
-                     | getKillRegState(Regs[i].second));
+  for (const std::pair<unsigned, bool> &R : Regs)
+    MIB = MIB.addReg(R.first, getDefRegState(isDef)
+                     | getKillRegState(R.second));
 
   // Add implicit defs for super-registers.
-  for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
-    MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
+  for (unsigned ImpDef : ImpDefs)
+    MIB.addReg(ImpDef, RegState::ImplicitDefine);
 
   return true;
 }
@@ -717,7 +728,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
                                      unsigned memOpsBegin, unsigned memOpsEnd,
                                      unsigned insertAfter, int Offset,
                                      unsigned Base, bool BaseKill,
-                                     int Opcode,
+                                     unsigned Opcode,
                                      ARMCC::CondCodes Pred, unsigned PredReg,
                                      unsigned Scratch,
                                      DebugLoc dl,
@@ -816,7 +827,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
 /// load / store multiple instructions.
 void
 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
-                         unsigned Base, int Opcode, unsigned Size,
+                         unsigned Base, unsigned Opcode, unsigned Size,
                          ARMCC::CondCodes Pred, unsigned PredReg,
                          unsigned Scratch, MemOpQueue &MemOps,
                          SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
@@ -906,7 +917,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
   case ARM::t2SUBri:
   case ARM::SUBri:
     CheckCPSRDef = true;
-  // fallthrough
+    break;
   case ARM::tSUBspi:
     break;
   }
@@ -941,7 +952,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
   case ARM::t2ADDri:
   case ARM::ADDri:
     CheckCPSRDef = true;
-  // fallthrough
+    break;
   case ARM::tADDspi:
     break;
   }
@@ -969,6 +980,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
   case ARM::STRi12:
   case ARM::tLDRi:
   case ARM::tSTRi:
+  case ARM::tLDRspi:
+  case ARM::tSTRspi:
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
   case ARM::t2STRi8:
@@ -1095,7 +1108,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
   unsigned Bytes = getLSMultipleTransferSize(MI);
   unsigned PredReg = 0;
   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
-  int Opcode = MI->getOpcode();
+  unsigned Opcode = MI->getOpcode();
   DebugLoc dl = MI->getDebugLoc();
 
   // Can't use an updating ld/st if the base register is also a dest
@@ -1233,7 +1246,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   unsigned Base = MI->getOperand(1).getReg();
   bool BaseKill = MI->getOperand(1).isKill();
   unsigned Bytes = getLSMultipleTransferSize(MI);
-  int Opcode = MI->getOpcode();
+  unsigned Opcode = MI->getOpcode();
   DebugLoc dl = MI->getDebugLoc();
   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
@@ -1391,7 +1404,7 @@ static bool isMemoryOp(const MachineInstr *MI) {
       MI->getOperand(1).isUndef())
     return false;
 
-  int Opcode = MI->getOpcode();
+  unsigned Opcode = MI->getOpcode();
   switch (Opcode) {
   default: break;
   case ARM::VLDRS:
@@ -1404,6 +1417,8 @@ static bool isMemoryOp(const MachineInstr *MI) {
   case ARM::STRi12:
   case ARM::tLDRi:
   case ARM::tSTRi:
+  case ARM::tLDRspi:
+  case ARM::tSTRspi:
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
   case ARM::t2STRi8:
@@ -1580,7 +1595,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   unsigned NumMemOps = 0;
   MemOpQueue MemOps;
   unsigned CurrBase = 0;
-  int CurrOpc = -1;
+  unsigned CurrOpc = ~0u;
   unsigned CurrSize = 0;
   ARMCC::CondCodes CurrPred = ARMCC::AL;
   unsigned CurrPredReg = 0;
@@ -1595,11 +1610,10 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
 
     bool Advance  = false;
     bool TryMerge = false;
-    bool Clobber  = false;
 
     bool isMemOp = isMemoryOp(MBBI);
     if (isMemOp) {
-      int Opcode = MBBI->getOpcode();
+      unsigned Opcode = MBBI->getOpcode();
       unsigned Size = getLSMultipleTransferSize(MBBI);
       const MachineOperand &MO = MBBI->getOperand(0);
       unsigned Reg = MO.getReg();
@@ -1617,7 +1631,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       // looks like the later ldr(s) use the same base register. Try to
       // merge the ldr's so far, including this one. But don't try to
       // combine the following ldr(s).
-      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+      bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg();
 
       // Watch out for:
       // r4 := ldr [r0, #8]
@@ -1736,7 +1750,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       }
 
       CurrBase = 0;
-      CurrOpc = -1;
+      CurrOpc = ~0u;
       CurrSize = 0;
       CurrPred = ARMCC::AL;
       CurrPredReg = 0;
@@ -1798,12 +1812,11 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
 }
 
 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
-  const TargetMachine &TM = Fn.getTarget();
-  TL = TM.getSubtargetImpl()->getTargetLowering();
+  STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+  TL = STI->getTargetLowering();
   AFI = Fn.getInfo<ARMFunctionInfo>();
-  TII = TM.getSubtargetImpl()->getInstrInfo();
-  TRI = TM.getSubtargetImpl()->getRegisterInfo();
-  STI = &TM.getSubtarget<ARMSubtarget>();
+  TII = STI->getInstrInfo();
+  TRI = STI->getRegisterInfo();
   RS = new RegScavenger();
   isThumb2 = AFI->isThumb2Function();
   isThumb1 = AFI->isThumbFunction() && !isThumb2;
@@ -1813,7 +1826,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
        ++MFI) {
     MachineBasicBlock &MBB = *MFI;
     Modified |= LoadStoreMultipleOpti(MBB);
-    if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
+    if (STI->hasV5TOps())
       Modified |= MergeReturnIntoLDM(MBB);
   }
 
@@ -1861,10 +1874,10 @@ namespace {
 }
 
 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
-  TD = Fn.getSubtarget().getDataLayout();
-  TII = Fn.getSubtarget().getInstrInfo();
-  TRI = Fn.getSubtarget().getRegisterInfo();
+  TD = Fn.getTarget().getDataLayout();
   STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+  TII = STI->getInstrInfo();
+  TRI = STI->getRegisterInfo();
   MRI = &Fn.getRegInfo();
   MF  = &Fn;
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index fd4f5ff..e370b96 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -61,7 +61,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
                                    MCConstantExpr::Create(MO.getOffset(),
                                                           OutContext),
                                    OutContext);
-  return MCOperand::CreateExpr(Expr);
+  return MCOperand::createExpr(Expr);
 
 }
 
@@ -74,13 +74,13 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
     if (MO.isImplicit() && MO.getReg() != ARM::CPSR)
       return false;
     assert(!MO.getSubReg() && "Subregs should be eliminated!");
-    MCOp = MCOperand::CreateReg(MO.getReg());
+    MCOp = MCOperand::createReg(MO.getReg());
     break;
   case MachineOperand::MO_Immediate:
-    MCOp = MCOperand::CreateImm(MO.getImm());
+    MCOp = MCOperand::createImm(MO.getImm());
     break;
   case MachineOperand::MO_MachineBasicBlock:
-    MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+    MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
         MO.getMBB()->getSymbol(), OutContext));
     break;
   case MachineOperand::MO_GlobalAddress: {
@@ -105,7 +105,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
     APFloat Val = MO.getFPImm()->getValueAPF();
     bool ignored;
     Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
-    MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+    MCOp = MCOperand::createFPImm(Val.convertToDouble());
     break;
   }
   case MachineOperand::MO_RegisterMask:
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index 892b269..f5250ff 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -14,11 +14,11 @@ using namespace llvm;
 void ARMFunctionInfo::anchor() { }
 
 ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
-    : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
-      hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+    : isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()),
+      hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()),
       StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false),
       RestoreSPFromFP(false), LRSpilledForFarJump(false),
       FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
-      GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0),
+      GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
       PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
       GlobalBaseReg(0) {}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index ddfdb52..14dd9ef 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -98,10 +98,6 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// registers also aren't included in DPRCSSize above.
   unsigned NumAlignedDPRCS2Regs;
 
-  /// JumpTableUId - Unique id for jumptables.
-  ///
-  unsigned JumpTableUId;
-
   unsigned PICLabelUId;
 
   /// VarArgsFrameIndex - FrameIndex for start of varargs area.
@@ -136,8 +132,7 @@ public:
     LRSpilledForFarJump(false),
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
-    NumAlignedDPRCS2Regs(0),
-    JumpTableUId(0), PICLabelUId(0),
+    NumAlignedDPRCS2Regs(0), PICLabelUId(0),
     VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
 
   explicit ARMFunctionInfo(MachineFunction &MF);
@@ -149,11 +144,7 @@ public:
   unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; }
   void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; }
 
-  unsigned getArgRegsSaveSize(unsigned Align = 0) const {
-    if (!Align)
-      return ArgRegsSaveSize;
-    return (ArgRegsSaveSize + Align - 1) & ~(Align - 1);
-  }
+  unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
   void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
 
   unsigned getReturnRegsCount() const { return ReturnRegsCount; }
@@ -195,14 +186,6 @@ public:
   unsigned getArgumentStackSize() const { return ArgumentStackSize; }
   void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
 
-  unsigned createJumpTableUId() {
-    return JumpTableUId++;
-  }
-
-  unsigned getNumJumpTables() const {
-    return JumpTableUId;
-  }
-
   void initPICLabelUId(unsigned UId) {
     PICLabelUId = UId;
   }
diff --git a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index 1c50f9e..30baf42 100644
--- a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -30,8 +30,6 @@ public:
   const char *getPassName() const override {
     return "optimise barriers pass";
   }
-
-private:
 };
 char ARMOptimizeBarriersPass::ID = 0;
 }
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
index 80b4b48..e6e8cdf 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -16,6 +16,4 @@ using namespace llvm;
 
 void ARMRegisterInfo::anchor() { }
 
-ARMRegisterInfo::ARMRegisterInfo(const ARMSubtarget &sti)
-  : ARMBaseRegisterInfo(sti) {
-}
+ARMRegisterInfo::ARMRegisterInfo() : ARMBaseRegisterInfo() {}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
index b623173..e2e650e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h
@@ -23,7 +23,7 @@ class ARMSubtarget;
 struct ARMRegisterInfo : public ARMBaseRegisterInfo {
   virtual void anchor();
 public:
-  ARMRegisterInfo(const ARMSubtarget &STI);
+  ARMRegisterInfo();
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
index b290e7f..45cc9ea 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -199,7 +199,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
   // Thumb1 instructions that know how to use hi regs.
   let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
   let AltOrderSelect = [{
-      return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+      return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
@@ -209,7 +209,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
 def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
   let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
   let AltOrderSelect = [{
-      return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+      return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
@@ -219,7 +219,7 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
 def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)> {
   let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
   let AltOrderSelect = [{
-      return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+      return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
@@ -237,7 +237,7 @@ def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)>;
 def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
   let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
   let AltOrderSelect = [{
-      return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+      return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
@@ -255,7 +255,7 @@ def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
 def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R12)> {
   let AltOrders = [(and tcGPR, tGPR)];
   let AltOrderSelect = [{
-      return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
+      return MF.getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index fa30ac3..a59cf98 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -24,6 +24,114 @@ ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL)
 ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
 }
 
+// Emit, if possible, a specialized version of the given Libcall. Typically this
+// means selecting the appropriately aligned version, but we also convert memset
+// of 0 into memclr.
+SDValue ARMSelectionDAGInfo::
+EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
+                       SDValue Chain,
+                       SDValue Dst, SDValue Src,
+                       SDValue Size, unsigned Align,
+                       RTLIB::Libcall LC) const {
+  const ARMSubtarget &Subtarget =
+      DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
+  const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
+
+  // Only use a specialized AEABI function if the default version of this
+  // Libcall is an AEABI function.
+  if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
+    return SDValue();
+
+  // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
+  // able to translate memset to memclr and use the value to index the function
+  // name array.
+  enum {
+    AEABI_MEMCPY = 0,
+    AEABI_MEMMOVE,
+    AEABI_MEMSET,
+    AEABI_MEMCLR
+  } AEABILibcall;
+  switch (LC) {
+  case RTLIB::MEMCPY:
+    AEABILibcall = AEABI_MEMCPY;
+    break;
+  case RTLIB::MEMMOVE:
+    AEABILibcall = AEABI_MEMMOVE;
+    break;
+  case RTLIB::MEMSET: 
+    AEABILibcall = AEABI_MEMSET;
+    if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
+      if (ConstantSrc->getZExtValue() == 0)
+        AEABILibcall = AEABI_MEMCLR;
+    break;
+  default:
+    return SDValue();
+  }
+
+  // Choose the most-aligned libcall variant that we can
+  enum {
+    ALIGN1 = 0,
+    ALIGN4,
+    ALIGN8
+  } AlignVariant;
+  if ((Align & 7) == 0)
+    AlignVariant = ALIGN8;
+  else if ((Align & 3) == 0)
+    AlignVariant = ALIGN4;
+  else
+    AlignVariant = ALIGN1;
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = TLI->getDataLayout()->getIntPtrType(*DAG.getContext());
+  Entry.Node = Dst;
+  Args.push_back(Entry);
+  if (AEABILibcall == AEABI_MEMCLR) {
+    Entry.Node = Size;
+    Args.push_back(Entry);
+  } else if (AEABILibcall == AEABI_MEMSET) {
+    // Adjust parameters for memset, EABI uses format (ptr, size, value),
+    // GNU library uses (ptr, value, size)
+    // See RTABI section 4.3.4
+    Entry.Node = Size;
+    Args.push_back(Entry);
+
+    // Extend or truncate the argument to be an i32 value for the call.
+    if (Src.getValueType().bitsGT(MVT::i32))
+      Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+    else if (Src.getValueType().bitsLT(MVT::i32))
+      Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+
+    Entry.Node = Src; 
+    Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+    Entry.isSExt = false;
+    Args.push_back(Entry);
+  } else {
+    Entry.Node = Src;
+    Args.push_back(Entry);
+    
+    Entry.Node = Size;
+    Args.push_back(Entry);
+  }
+
+  char const *FunctionNames[4][3] = {
+    { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
+    { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
+    { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
+    { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
+  };
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl).setChain(Chain)
+    .setCallee(TLI->getLibcallCallingConv(LC),
+               Type::getVoidTy(*DAG.getContext()),
+               DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
+                                     TLI->getPointerTy()), std::move(Args), 0)
+    .setDiscardResult();
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+  
+  return CallResult.second;
+}
+
 SDValue
 ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
                                              SDValue Chain,
@@ -32,7 +140,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
                                              bool isVolatile, bool AlwaysInline,
                                              MachinePointerInfo DstPtrInfo,
                                           MachinePointerInfo SrcPtrInfo) const {
-  const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>();
+  const ARMSubtarget &Subtarget =
+      DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
   // Do repeated 4-byte loads and stores. To be improved.
   // This requires 4-byte alignment.
   if ((Align & 3) != 0)
@@ -41,10 +150,12 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
   // within a subtarget-specific limit.
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
   if (!ConstantSize)
-    return SDValue();
+    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+                                  RTLIB::MEMCPY);
   uint64_t SizeVal = ConstantSize->getZExtValue();
   if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
-    return SDValue();
+    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+                                  RTLIB::MEMCPY);
 
   unsigned BytesLeft = SizeVal & 3;
   unsigned NumMemOps = SizeVal >> 2;
@@ -66,7 +177,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
          i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
       Loads[i] = DAG.getLoad(VT, dl, Chain,
                              DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
-                                         DAG.getConstant(SrcOff, MVT::i32)),
+                                         DAG.getConstant(SrcOff, dl, MVT::i32)),
                              SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
                              false, false, 0);
       TFOps[i] = Loads[i].getValue(1);
@@ -79,7 +190,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
          i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
       TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
                               DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
-                                          DAG.getConstant(DstOff, MVT::i32)),
+                                          DAG.getConstant(DstOff, dl, MVT::i32)),
                               DstPtrInfo.getWithOffset(DstOff),
                               isVolatile, false, 0);
       DstOff += VTSize;
@@ -107,7 +218,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
 
     Loads[i] = DAG.getLoad(VT, dl, Chain,
                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
-                                       DAG.getConstant(SrcOff, MVT::i32)),
+                                       DAG.getConstant(SrcOff, dl, MVT::i32)),
                            SrcPtrInfo.getWithOffset(SrcOff),
                            false, false, false, 0);
     TFOps[i] = Loads[i].getValue(1);
@@ -131,7 +242,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
 
     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
-                                        DAG.getConstant(DstOff, MVT::i32)),
+                                        DAG.getConstant(DstOff, dl, MVT::i32)),
                             DstPtrInfo.getWithOffset(DstOff), false, false, 0);
     ++i;
     DstOff += VTSize;
@@ -141,59 +252,26 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
                      makeArrayRef(TFOps, i));
 }
 
-// Adjust parameters for memset, EABI uses format (ptr, size, value),
-// GNU library uses (ptr, value, size)
-// See RTABI section 4.3.4
+
+SDValue ARMSelectionDAGInfo::
+EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
+                         SDValue Chain,
+                         SDValue Dst, SDValue Src,
+                         SDValue Size, unsigned Align,
+                         bool isVolatile,
+                         MachinePointerInfo DstPtrInfo,
+                         MachinePointerInfo SrcPtrInfo) const {
+  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+                                RTLIB::MEMMOVE);
+}
+
+
 SDValue ARMSelectionDAGInfo::
 EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
                         SDValue Chain, SDValue Dst,
                         SDValue Src, SDValue Size,
                         unsigned Align, bool isVolatile,
                         MachinePointerInfo DstPtrInfo) const {
-  const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>();
-  // Use default for non-AAPCS (or MachO) subtargets
-  if (!Subtarget.isAAPCS_ABI() || Subtarget.isTargetMachO() ||
-      Subtarget.isTargetWindows())
-    return SDValue();
-
-  const ARMTargetLowering &TLI =
-      *DAG.getTarget().getSubtarget<ARMSubtarget>().getTargetLowering();
-  TargetLowering::ArgListTy Args;
-  TargetLowering::ArgListEntry Entry;
-
-  // First argument: data pointer
-  Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext());
-  Entry.Node = Dst;
-  Entry.Ty = IntPtrTy;
-  Args.push_back(Entry);
-
-  // Second argument: buffer size
-  Entry.Node = Size;
-  Entry.Ty = IntPtrTy;
-  Entry.isSExt = false;
-  Args.push_back(Entry);
-
-  // Extend or truncate the argument to be an i32 value for the call.
-  if (Src.getValueType().bitsGT(MVT::i32))
-    Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
-  else
-    Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
-
-  // Third argument: value to fill
-  Entry.Node = Src;
-  Entry.Ty = Type::getInt32Ty(*DAG.getContext());
-  Entry.isSExt = true;
-  Args.push_back(Entry);
-
-  // Emit __eabi_memset call
-  TargetLowering::CallLoweringInfo CLI(DAG);
-  CLI.setDebugLoc(dl).setChain(Chain)
-    .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMSET),
-               Type::getVoidTy(*DAG.getContext()),
-               DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
-                                     TLI.getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
-
-  std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
-  return CallResult.second;
+  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+                                RTLIB::MEMSET);
 }
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
index 94b98e6..1db190f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -48,6 +48,13 @@ public:
                                   MachinePointerInfo DstPtrInfo,
                                   MachinePointerInfo SrcPtrInfo) const override;
 
+  SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
+                                   SDValue Chain,
+                                   SDValue Dst, SDValue Src,
+                                   SDValue Size, unsigned Align, bool isVolatile,
+                                   MachinePointerInfo DstPtrInfo,
+                                   MachinePointerInfo SrcPtrInfo) const override;
+
   // Adjust parameters for memset, see RTABI section 4.3.4
   SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
                                   SDValue Chain,
@@ -55,6 +62,12 @@ public:
                                   SDValue Op3, unsigned Align,
                                   bool isVolatile,
                                   MachinePointerInfo DstPtrInfo) const override;
+
+  SDValue EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
+                                 SDValue Chain,
+                                 SDValue Dst, SDValue Src,
+                                 SDValue Size, unsigned Align,
+                                 RTLIB::Libcall LC) const;
 };
 
 }
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 311afe9..f20318d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -88,56 +88,6 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
                          "Allow IT blocks based on ARMv7"),
               clEnumValEnd));
 
-static std::string computeDataLayout(ARMSubtarget &ST) {
-  std::string Ret = "";
-
-  if (ST.isLittle())
-    // Little endian.
-    Ret += "e";
-  else
-    // Big endian.
-    Ret += "E";
-
-  Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
-
-  // Pointers are 32 bits and aligned to 32 bits.
-  Ret += "-p:32:32";
-
-  // ABIs other than APCS have 64 bit integers with natural alignment.
-  if (!ST.isAPCS_ABI())
-    Ret += "-i64:64";
-
-  // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
-  // bits, others to 64 bits. We always try to align to 64 bits.
-  if (ST.isAPCS_ABI())
-    Ret += "-f64:32:64";
-
-  // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
-  // to 64. We always ty to give them natural alignment.
-  if (ST.isAPCS_ABI())
-    Ret += "-v64:32:64-v128:32:128";
-  else
-    Ret += "-v128:64:128";
-
-  // Try to align aggregates to 32 bits (the default is 64 bits, which has no
-  // particular hardware support on 32-bit ARM).
-  Ret += "-a:0:32";
-
-  // Integer registers are 32 bits.
-  Ret += "-n32";
-
-  // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
-  // aligned everywhere else.
-  if (ST.isTargetNaCl())
-    Ret += "-S128";
-  else if (ST.isAAPCS_ABI())
-    Ret += "-S64";
-  else
-    Ret += "-S32";
-
-  return Ret;
-}
-
 /// initializeSubtargetDependencies - Initializes using a CPU and feature string
 /// so that we can use initializer lists for subtarget initialization.
 ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -147,23 +97,31 @@ ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
   return *this;
 }
 
+ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
+                                                        StringRef FS) {
+  ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS);
+  if (STI.isThumb1Only())
+    return (ARMFrameLowering *)new Thumb1FrameLowering(STI);
+
+  return new ARMFrameLowering(STI);
+}
+
 ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
-                           const std::string &FS, const ARMBaseTargetMachine &TM,
-                           bool IsLittle)
+                           const std::string &FS,
+                           const ARMBaseTargetMachine &TM, bool IsLittle)
     : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
       ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
       TargetTriple(TT), Options(TM.Options), TM(TM),
-      DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
-      TSInfo(DL),
+      TSInfo(*TM.getDataLayout()),
+      FrameLowering(initializeFrameLowering(CPU, FS)),
+      // At this point initializeSubtargetDependencies has been called so
+      // we can query directly.
       InstrInfo(isThumb1Only()
                     ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
                     : !isThumb()
                           ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
                           : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
-      TLInfo(TM),
-      FrameLowering(!isThumb1Only()
-                        ? new ARMFrameLowering(*this)
-                        : (ARMFrameLowering *)new Thumb1FrameLowering(*this)) {}
+      TLInfo(TM, *this) {}
 
 void ARMSubtarget::initializeEnvironment() {
   HasV4TOps = false;
@@ -171,9 +129,11 @@ void ARMSubtarget::initializeEnvironment() {
   HasV5TEOps = false;
   HasV6Ops = false;
   HasV6MOps = false;
+  HasV6KOps = false;
   HasV6T2Ops = false;
   HasV7Ops = false;
   HasV8Ops = false;
+  HasV8_1aOps = false;
   HasVFPv2 = false;
   HasVFPv3 = false;
   HasVFPv4 = false;
@@ -185,6 +145,7 @@ void ARMSubtarget::initializeEnvironment() {
   HasVMLxForwarding = false;
   SlowFPBrcc = false;
   InThumbMode = false;
+  UseSoftFloat = false;
   HasThumb2 = false;
   NoARM = false;
   IsR9Reserved = ReserveR9;
@@ -230,7 +191,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
       ARM_MC::ParseARMTriple(TargetTriple.getTriple(), CPUString);
   if (!FS.empty()) {
     if (!ArchFS.empty())
-      ArchFS = ArchFS + "," + FS.str();
+      ArchFS = (Twine(ArchFS) + "," + FS).str();
     else
       ArchFS = FS;
   }
@@ -293,7 +254,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
 
   switch (IT) {
   case DefaultIT:
-    RestrictIT = hasV8Ops() ? true : false;
+    RestrictIT = hasV8Ops();
     break;
   case RestrictedIT:
     RestrictIT = true;
@@ -304,8 +265,8 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   }
 
   // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
-  uint64_t Bits = getFeatureBits();
-  if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
+  const FeatureBitset &Bits = getFeatureBits();
+  if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters
       (Options.UnsafeFPMath || isTargetDarwin()))
     UseNEONForSinglePrecisionFP = true;
 }
@@ -390,6 +351,12 @@ bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
   // immediates as it is inherently position independent, and may be out of
   // range otherwise.
   return UseMovt && (isTargetWindows() ||
-                     !MF.getFunction()->getAttributes().hasAttribute(
-                         AttributeSet::FunctionIndex, Attribute::MinSize));
+                     !MF.getFunction()->hasFnAttribute(Attribute::MinSize));
+}
+
+bool ARMSubtarget::useFastISel() const {
+  // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
+  return TM.Options.EnableFastISel &&
+         ((isTargetMachO() && !isThumb1Only()) ||
+          (isTargetLinux() && !isThumb()) || (isTargetNaCl() && !isThumb()));
 }
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index dbacd4d..77ceb08 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -43,7 +43,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
 protected:
   enum ARMProcFamilyEnum {
     Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
-    CortexA17, CortexR5, Swift, CortexA53, CortexA57, Krait, 
+    CortexA17, CortexR4, CortexR4F, CortexR5, Swift, CortexA53, CortexA57, Krait,
   };
   enum ARMProcClassEnum {
     None, AClass, RClass, MClass
@@ -56,16 +56,18 @@ protected:
   ARMProcClassEnum ARMProcClass;
 
   /// HasV4TOps, HasV5TOps, HasV5TEOps,
-  /// HasV6Ops, HasV6MOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
+  /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
   /// Specify whether target support specific ARM ISA variants.
   bool HasV4TOps;
   bool HasV5TOps;
   bool HasV5TEOps;
   bool HasV6Ops;
   bool HasV6MOps;
+  bool HasV6KOps;
   bool HasV6T2Ops;
   bool HasV7Ops;
   bool HasV8Ops;
+  bool HasV8_1aOps;
 
   /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
   /// floating point ISAs are supported.
@@ -98,6 +100,9 @@ protected:
   /// InThumbMode - True if compiling for Thumb, false for ARM.
   bool InThumbMode;
 
+  /// UseSoftFloat - True if we're using software floating point features.
+  bool UseSoftFloat;
+
   /// HasThumb2 - True if Thumb2 instructions are supported.
   bool HasThumb2;
 
@@ -248,7 +253,6 @@ public:
   /// so that we can use initializer lists for subtarget initialization.
   ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
 
-  const DataLayout *getDataLayout() const override { return &DL; }
   const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
     return &TSInfo;
   }
@@ -266,16 +270,17 @@ public:
   }
 
 private:
-  const DataLayout DL;
   ARMSelectionDAGInfo TSInfo;
+  // Either Thumb1FrameLowering or ARMFrameLowering.
+  std::unique_ptr<ARMFrameLowering> FrameLowering;
   // Either Thumb1InstrInfo or Thumb2InstrInfo.
   std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
   ARMTargetLowering   TLInfo;
-  // Either Thumb1FrameLowering or ARMFrameLowering.
-  std::unique_ptr<ARMFrameLowering> FrameLowering;
 
   void initializeEnvironment();
   void initSubtargetFeatures(StringRef CPU, StringRef FS);
+  ARMFrameLowering *initializeFrameLowering(StringRef CPU, StringRef FS);
+
 public:
   void computeIssueWidth();
 
@@ -284,9 +289,11 @@ public:
   bool hasV5TEOps() const { return HasV5TEOps; }
   bool hasV6Ops()   const { return HasV6Ops;   }
   bool hasV6MOps()  const { return HasV6MOps;  }
+  bool hasV6KOps()  const { return HasV6KOps; }
   bool hasV6T2Ops() const { return HasV6T2Ops; }
   bool hasV7Ops()   const { return HasV7Ops;  }
   bool hasV8Ops()   const { return HasV8Ops;  }
+  bool hasV8_1aOps() const { return HasV8_1aOps; }
 
   bool isCortexA5() const { return ARMProcFamily == CortexA5; }
   bool isCortexA7() const { return ARMProcFamily == CortexA7; }
@@ -310,7 +317,8 @@ public:
   bool hasCRC() const { return HasCRC; }
   bool hasVirtualization() const { return HasVirtualization; }
   bool useNEONForSinglePrecisionFP() const {
-    return hasNEON() && UseNEONForSinglePrecisionFP; }
+    return hasNEON() && UseNEONForSinglePrecisionFP;
+  }
 
   bool hasDivide() const { return HasHardwareDivide; }
   bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
@@ -388,6 +396,7 @@ public:
   bool isAPCS_ABI() const;
   bool isAAPCS_ABI() const;
 
+  bool useSoftFloat() const { return UseSoftFloat; }
   bool isThumb() const { return InThumbMode; }
   bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
   bool isThumb2() const { return InThumbMode && HasThumb2; }
@@ -441,6 +450,8 @@ public:
   /// symbol.
   bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
 
+  /// True if fast-isel is used.
+  bool useFastISel() const;
 };
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 7a8181b..e794fb7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -14,10 +14,11 @@
 #include "ARMFrameLowering.h"
 #include "ARMTargetMachine.h"
 #include "ARMTargetObjectFile.h"
+#include "ARMTargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -36,6 +37,16 @@ EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden,
                           " to make use of cmpxchg flow-based information"),
                  cl::init(true));
 
+static cl::opt<bool>
+EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden,
+                      cl::desc("Enable ARM load/store optimization pass"),
+                      cl::init(true));
+
+// FIXME: Unify control over GlobalMerge.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalMerge("arm-global-merge", cl::Hidden,
+                  cl::desc("Enable the global merge pass"));
+
 extern "C" void LLVMInitializeARMTarget() {
   // Register the target.
   RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
@@ -104,6 +115,60 @@ computeTargetABI(const Triple &TT, StringRef CPU,
   return TargetABI;
 }
 
+static std::string computeDataLayout(StringRef TT, StringRef CPU,
+                                     const TargetOptions &Options,
+                                     bool isLittle) {
+  const Triple Triple(TT);
+  auto ABI = computeTargetABI(Triple, CPU, Options);
+  std::string Ret = "";
+
+  if (isLittle)
+    // Little endian.
+    Ret += "e";
+  else
+    // Big endian.
+    Ret += "E";
+
+  Ret += DataLayout::getManglingComponent(Triple);
+
+  // Pointers are 32 bits and aligned to 32 bits.
+  Ret += "-p:32:32";
+
+  // ABIs other than APCS have 64 bit integers with natural alignment.
+  if (ABI != ARMBaseTargetMachine::ARM_ABI_APCS)
+    Ret += "-i64:64";
+
+  // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
+  // bits, others to 64 bits. We always try to align to 64 bits.
+  if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
+    Ret += "-f64:32:64";
+
+  // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
+  // to 64. We always ty to give them natural alignment.
+  if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
+    Ret += "-v64:32:64-v128:32:128";
+  else
+    Ret += "-v128:64:128";
+
+  // Try to align aggregates to 32 bits (the default is 64 bits, which has no
+  // particular hardware support on 32-bit ARM).
+  Ret += "-a:0:32";
+
+  // Integer registers are 32 bits.
+  Ret += "-n32";
+
+  // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
+  // aligned everywhere else.
+  if (Triple.isOSNaCl())
+    Ret += "-S128";
+  else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
+    Ret += "-S64";
+  else
+    Ret += "-S32";
+
+  return Ret;
+}
+
 /// TargetMachine ctor - Create an ARM architecture model.
 ///
 ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
@@ -111,7 +176,8 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
                                            const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
                                            CodeGenOpt::Level OL, bool isLittle)
-    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
+                        CPU, FS, Options, RM, CM, OL),
       TargetABI(computeTargetABI(Triple(TT), CPU, Options)),
       TLOF(createTLOF(Triple(getTargetTriple()))),
       Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
@@ -126,11 +192,8 @@ ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
 
 const ARMSubtarget *
 ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
-  AttributeSet FnAttrs = F.getAttributes();
-  Attribute CPUAttr =
-      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
-  Attribute FSAttr =
-      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+  Attribute CPUAttr = F.getFnAttribute("target-cpu");
+  Attribute FSAttr = F.getFnAttribute("target-features");
 
   std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
                         ? CPUAttr.getValueAsString().str()
@@ -144,14 +207,15 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
   // function before we can generate a subtarget. We also need to use
   // it as a key for the subtarget since that can be the only difference
   // between two functions.
-  Attribute SFAttr =
-      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
-  bool SoftFloat = !SFAttr.hasAttribute(Attribute::None)
-                       ? SFAttr.getValueAsString() == "true"
-                       : Options.UseSoftFloat;
-
-  auto &I = SubtargetMap[CPU + FS + (SoftFloat ? "use-soft-float=true"
-                                               : "use-soft-float=false")];
+  bool SoftFloat =
+      F.hasFnAttribute("use-soft-float") &&
+      F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+  // If the soft float attribute is set on the function turn on the soft float
+  // subtarget feature.
+  if (SoftFloat)
+    FS += FS.empty() ? "+soft-float" : ",+soft-float";
+
+  auto &I = SubtargetMap[CPU + FS];
   if (!I) {
     // This needs to be done before we create a new subtarget since any
     // creation will depend on the TM and the code generation flags on the
@@ -162,12 +226,9 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
   return I.get();
 }
 
-void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-  // Add first the target-independent BasicTTI pass, then our ARM pass. This
-  // allows the ARM pass to delegate to the target independent layer when
-  // appropriate.
-  PM.add(createBasicTargetTransformInfoPass(this));
-  PM.add(createARMTargetTransformInfoPass(this));
+TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis(
+      [this](Function &F) { return TargetTransformInfo(ARMTTIImpl(this, F)); });
 }
 
 
@@ -278,8 +339,15 @@ void ARMPassConfig::addIRPasses() {
 }
 
 bool ARMPassConfig::addPreISel() {
-  if (TM->getOptLevel() != CodeGenOpt::None)
-    addPass(createGlobalMergePass(TM));
+  if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+       EnableGlobalMerge == cl::BOU_UNSET) ||
+      EnableGlobalMerge == cl::BOU_TRUE)
+    // FIXME: This is using the thumb1 only constant value for
+    // maximal global offset for merging globals. We may want
+    // to look into using the old value for non-thumb1 code of
+    // 4095 based on the TargetMachine, but this starts to become
+    // tricky when doing code gen per function.
+    addPass(createGlobalMergePass(TM, 127));
 
   return false;
 }
@@ -287,32 +355,30 @@ bool ARMPassConfig::addPreISel() {
 bool ARMPassConfig::addInstSelector() {
   addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
 
-  const ARMSubtarget *Subtarget = &getARMSubtarget();
-  if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() &&
+  if (Triple(TM->getTargetTriple()).isOSBinFormatELF() &&
       TM->Options.EnableFastISel)
     addPass(createARMGlobalBaseRegPass());
   return false;
 }
 
 void ARMPassConfig::addPreRegAlloc() {
-  if (getOptLevel() != CodeGenOpt::None)
-    addPass(createARMLoadStoreOptimizationPass(true));
-  if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
+  if (getOptLevel() != CodeGenOpt::None) {
     addPass(createMLxExpansionPass());
-  // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
-  // enabled when NEON is available.
-  if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() &&
-    getARMSubtarget().hasNEON() && !DisableA15SDOptimization) {
-    addPass(createA15SDOptimizerPass());
+
+    if (EnableARMLoadStoreOpt)
+      addPass(createARMLoadStoreOptimizationPass(/* pre-register alloc */ true));
+
+    if (!DisableA15SDOptimization)
+      addPass(createA15SDOptimizerPass());
   }
 }
 
 void ARMPassConfig::addPreSched2() {
   if (getOptLevel() != CodeGenOpt::None) {
-    addPass(createARMLoadStoreOptimizationPass());
+    if (EnableARMLoadStoreOpt)
+      addPass(createARMLoadStoreOptimizationPass());
 
-    if (getARMSubtarget().hasNEON())
-      addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
+    addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
   }
 
   // Expand some pseudo instructions into multiple instructions to allow
@@ -320,27 +386,25 @@ void ARMPassConfig::addPreSched2() {
   addPass(createARMExpandPseudoPass());
 
   if (getOptLevel() != CodeGenOpt::None) {
-    if (!getARMSubtarget().isThumb1Only()) {
-      // in v8, IfConversion depends on Thumb instruction widths
-      if (getARMSubtarget().restrictIT() &&
-          !getARMSubtarget().prefers32BitThumb())
-        addPass(createThumb2SizeReductionPass());
+    // in v8, IfConversion depends on Thumb instruction widths
+    if (getARMSubtarget().restrictIT())
+      addPass(createThumb2SizeReductionPass());
+    if (!getARMSubtarget().isThumb1Only())
       addPass(&IfConverterID);
-    }
   }
-  if (getARMSubtarget().isThumb2())
-    addPass(createThumb2ITBlockPass());
+  addPass(createThumb2ITBlockPass());
 }
 
 void ARMPassConfig::addPreEmitPass() {
-  if (getARMSubtarget().isThumb2()) {
-    if (!getARMSubtarget().prefers32BitThumb())
-      addPass(createThumb2SizeReductionPass());
+  addPass(createThumb2SizeReductionPass());
 
-    // Constant island pass work on unbundled instructions.
+  // Constant island pass work on unbundled instructions.
+  if (getARMSubtarget().isThumb2())
     addPass(&UnpackMachineBundlesID);
-  }
 
-  addPass(createARMOptimizeBarriersPass());
+  // Don't optimize barriers at -O0.
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createARMOptimizeBarriersPass());
+
   addPass(createARMConstantIslandPass());
 }
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
index 18cf5fa..20ca97b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -44,11 +44,12 @@ public:
                        bool isLittle);
   ~ARMBaseTargetMachine() override;
 
-  const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+  const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
   const ARMSubtarget *getSubtargetImpl(const Function &F) const override;
+  bool isLittleEndian() const { return isLittle; }
 
-  /// \brief Register ARM analysis passes with a pass manager.
-  void addAnalysisPasses(PassManagerBase &PM) override;
+  /// \brief Get the TargetIRAnalysis for this target.
+  TargetIRAnalysis getTargetIRAnalysis() override;
 
   // Pass Pipeline Configuration
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
index 48238bf..80f03c6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMTargetObjectFile.h"
-#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -27,7 +27,8 @@ using namespace dwarf;
 
 void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
                                         const TargetMachine &TM) {
-  bool isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI();
+  bool isAAPCS_ABI = static_cast<const ARMTargetMachine &>(TM).TargetABI ==
+                     ARMTargetMachine::ARMABI::ARM_ABI_AAPCS;
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
   InitializeELF(isAAPCS_ABI);
 
@@ -36,10 +37,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
   }
 
   AttributesSection =
-    getContext().getELFSection(".ARM.attributes",
-                               ELF::SHT_ARM_ATTRIBUTES,
-                               0,
-                               SectionKind::getMetadata());
+      getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0);
 }
 
 const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index ec834e8..4e1b371 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===//
+//===-- ARMTargetTransformInfo.cpp - ARM specific TTI ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,17 +6,8 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-/// \file
-/// This file implements a TargetTransformInfo analysis pass specific to the
-/// ARM target machine. It uses the target's detailed information to provide
-/// more precise answers to certain TTI queries, while letting the target
-/// independent and default TTI implementations handle the rest.
-///
-//===----------------------------------------------------------------------===//
 
-#include "ARM.h"
-#include "ARMTargetMachine.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
+#include "ARMTargetTransformInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
 #include "llvm/Target/TargetLowering.h"
@@ -24,132 +15,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "armtti"
 
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeARMTTIPass(PassRegistry &);
-}
-
-namespace {
-
-class ARMTTI final : public ImmutablePass, public TargetTransformInfo {
-  const ARMBaseTargetMachine *TM;
-  const ARMSubtarget *ST;
-  const ARMTargetLowering *TLI;
-
-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
-public:
-  ARMTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
-    llvm_unreachable("This pass cannot be directly constructed");
-  }
-
-  ARMTTI(const ARMBaseTargetMachine *TM)
-      : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
-        TLI(TM->getSubtargetImpl()->getTargetLowering()) {
-    initializeARMTTIPass(*PassRegistry::getPassRegistry());
-  }
-
-  void initializePass() override {
-    pushTTIStack(this);
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    TargetTransformInfo::getAnalysisUsage(AU);
-  }
-
-  /// Pass identification.
-  static char ID;
-
-  /// Provide necessary pointer adjustments for the two base classes.
-  void *getAdjustedAnalysisPointer(const void *ID) override {
-    if (ID == &TargetTransformInfo::ID)
-      return (TargetTransformInfo*)this;
-    return this;
-  }
-
-  /// \name Scalar TTI Implementations
-  /// @{
-  using TargetTransformInfo::getIntImmCost;
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
-
-  /// @}
-
-
-  /// \name Vector TTI Implementations
-  /// @{
-
-  unsigned getNumberOfRegisters(bool Vector) const override {
-    if (Vector) {
-      if (ST->hasNEON())
-        return 16;
-      return 0;
-    }
-
-    if (ST->isThumb1Only())
-      return 8;
-    return 13;
-  }
-
-  unsigned getRegisterBitWidth(bool Vector) const override {
-    if (Vector) {
-      if (ST->hasNEON())
-        return 128;
-      return 0;
-    }
-
-    return 32;
-  }
-
-  unsigned getMaxInterleaveFactor() const override {
-    // These are out of order CPUs:
-    if (ST->isCortexA15() || ST->isSwift())
-      return 2;
-    return 1;
-  }
-
-  unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
-                          int Index, Type *SubTp) const override;
-
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
-                            Type *Src) const override;
-
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                              Type *CondTy) const override;
-
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                              unsigned Index) const override;
-
-  unsigned getAddressComputationCost(Type *Val,
-                                     bool IsComplex) const override;
-
-  unsigned getArithmeticInstrCost(
-      unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue,
-      OperandValueKind Op2Info = OK_AnyValue,
-      OperandValueProperties Opd1PropInfo = OP_None,
-      OperandValueProperties Opd2PropInfo = OP_None) const override;
-
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace) const override;
-  /// @}
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti",
-                   "ARM Target Transform Info", true, true, false)
-char ARMTTI::ID = 0;
-
-ImmutablePass *
-llvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) {
-  return new ARMTTI(TM);
-}
-
-
-unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned Bits = Ty->getPrimitiveSizeInBits();
@@ -181,8 +47,7 @@ unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
   return 3;
 }
 
-unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
-                                  Type *Src) const {
+unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
@@ -206,7 +71,7 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
   EVT DstTy = TLI->getValueType(Dst);
 
   if (!SrcTy.isSimple() || !DstTy.isSimple())
-    return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+    return BaseT::getCastInstrCost(Opcode, Dst, Src);
 
   // Some arithmetic, load and store operations have specific instructions
   // to cast up/down their types automatically at no extra cost.
@@ -377,11 +242,11 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
       return ARMIntegerConversionTbl[Idx].Cost;
   }
 
-  return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+  return BaseT::getCastInstrCost(Opcode, Dst, Src);
 }
 
-unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
-                                    unsigned Index) const {
+unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+                                        unsigned Index) {
   // Penalize inserting into an D-subregister. We end up with a three times
   // lower estimated throughput on swift.
   if (ST->isSwift() &&
@@ -397,11 +262,11 @@ unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
       ValTy->getVectorElementType()->isIntegerTy())
     return 3;
 
-  return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index);
+  return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
 }
 
-unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                    Type *CondTy) const {
+unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                        Type *CondTy) {
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   // On NEON a a vector select gets lowered to vbsl.
@@ -431,10 +296,10 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     return LT.first;
   }
 
-  return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
 }
 
-unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
+unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
   // Address computations in vectorized code with non-consecutive addresses will
   // likely result in more instructions compared to scalar code where the
   // computation can more often be merged into the index mode. The resulting
@@ -449,13 +314,32 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
   return 1;
 }
 
-unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
-                                Type *SubTp) const {
+unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
+  // Use similar logic that's in ARMISelLowering:
+  // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
+  // to VFP.
+
+  if (ST->hasVFP2() && !ST->isThumb1Only()) {
+    if (Ty->isFloatTy()) {
+      return TargetTransformInfo::TCC_Basic;
+    }
+
+    if (Ty->isDoubleTy()) {
+      return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive :
+        TargetTransformInfo::TCC_Basic;
+    }
+  }
+
+  return TargetTransformInfo::TCC_Expensive;
+}
+
+unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+                                    Type *SubTp) {
   // We only handle costs of reverse and alternate shuffles for now.
-  if (Kind != SK_Reverse && Kind != SK_Alternate)
-    return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+  if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
+    return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 
-  if (Kind == SK_Reverse) {
+  if (Kind == TTI::SK_Reverse) {
     static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
         // Reverse shuffle cost one instruction if we are shuffling within a
         // double word (vrev) or two if we shuffle a quad word (vrev, vext).
@@ -473,11 +357,11 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
 
     int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
     if (Idx == -1)
-      return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+      return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 
     return LT.first * NEONShuffleTbl[Idx].Cost;
   }
-  if (Kind == SK_Alternate) {
+  if (Kind == TTI::SK_Alternate) {
     static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
         // Alt shuffle cost table for ARM. Cost is the number of instructions
         // required to create the shuffled vector.
@@ -499,16 +383,16 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
     int Idx =
         CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
     if (Idx == -1)
-      return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+      return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
     return LT.first * NEONAltShuffleTbl[Idx].Cost;
   }
-  return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 }
 
-unsigned ARMTTI::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
-    OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
-    OperandValueProperties Opd2PropInfo) const {
+unsigned ARMTTIImpl::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+    TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
+    TTI::OperandValueProperties Opd2PropInfo) {
 
   int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
@@ -564,8 +448,8 @@ unsigned ARMTTI::getArithmeticInstrCost(
   if (Idx != -1)
     return LT.first * CostTbl[Idx].Cost;
 
-  unsigned Cost = TargetTransformInfo::getArithmeticInstrCost(
-      Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
+  unsigned Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+                                                Opd1PropInfo, Opd2PropInfo);
 
   // This is somewhat of a hack. The problem that we are facing is that SROA
   // creates a sequence of shift, and, or instructions to construct values.
@@ -581,8 +465,9 @@ unsigned ARMTTI::getArithmeticInstrCost(
   return Cost;
 }
 
-unsigned ARMTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                                 unsigned AddressSpace) const {
+unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                     unsigned Alignment,
+                                     unsigned AddressSpace) {
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
 
   if (Src->isVectorTy() && Alignment != 16 &&
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
new file mode 100644
index 0000000..9479d76
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -0,0 +1,134 @@
+//===-- ARMTargetTransformInfo.h - ARM specific TTI -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// ARM target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
+  typedef BasicTTIImplBase<ARMTTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const ARMSubtarget *ST;
+  const ARMTargetLowering *TLI;
+
+  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+  /// are set if the result needs to be inserted and/or extracted from vectors.
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
+
+  const ARMSubtarget *getST() const { return ST; }
+  const ARMTargetLowering *getTLI() const { return TLI; }
+
+public:
+  explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F)
+      : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  ARMTTIImpl(const ARMTTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+  ARMTTIImpl(ARMTTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
+  ARMTTIImpl &operator=(const ARMTTIImpl &RHS) {
+    BaseT::operator=(static_cast<const BaseT &>(RHS));
+    ST = RHS.ST;
+    TLI = RHS.TLI;
+    return *this;
+  }
+  ARMTTIImpl &operator=(ARMTTIImpl &&RHS) {
+    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+    ST = std::move(RHS.ST);
+    TLI = std::move(RHS.TLI);
+    return *this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+
+  using BaseT::getIntImmCost;
+  unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+
+  /// @}
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  unsigned getNumberOfRegisters(bool Vector) {
+    if (Vector) {
+      if (ST->hasNEON())
+        return 16;
+      return 0;
+    }
+
+    if (ST->isThumb1Only())
+      return 8;
+    return 13;
+  }
+
+  unsigned getRegisterBitWidth(bool Vector) {
+    if (Vector) {
+      if (ST->hasNEON())
+        return 128;
+      return 0;
+    }
+
+    return 32;
+  }
+
+  unsigned getMaxInterleaveFactor(unsigned VF) {
+    // These are out of order CPUs:
+    if (ST->isCortexA15() || ST->isSwift())
+      return 2;
+    return 1;
+  }
+
+  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+                          Type *SubTp);
+
+  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+
+  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+
+  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+
+  unsigned getAddressComputationCost(Type *Val, bool IsComplex);
+
+  unsigned getFPOpCost(Type *Ty);
+
+  unsigned getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty,
+      TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
+      TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
+      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+
+  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                           unsigned AddressSpace);
+
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 56de9d2..30c7d62 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -7,10 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARMFPUName.h"
 #include "ARMFeatures.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "MCTargetDesc/ARMArchName.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMMCExpr.h"
 #include "llvm/ADT/STLExtras.h"
@@ -39,6 +37,7 @@
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/ARMEHABI.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
@@ -244,37 +243,40 @@ class ARMAsmParser : public MCTargetAsmParser {
 
   bool isThumb() const {
     // FIXME: Can tablegen auto-generate this?
-    return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+    return STI.getFeatureBits()[ARM::ModeThumb];
   }
   bool isThumbOne() const {
-    return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) == 0;
+    return isThumb() && !STI.getFeatureBits()[ARM::FeatureThumb2];
   }
   bool isThumbTwo() const {
-    return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2);
+    return isThumb() && STI.getFeatureBits()[ARM::FeatureThumb2];
   }
   bool hasThumb() const {
-    return STI.getFeatureBits() & ARM::HasV4TOps;
+    return STI.getFeatureBits()[ARM::HasV4TOps];
   }
   bool hasV6Ops() const {
-    return STI.getFeatureBits() & ARM::HasV6Ops;
+    return STI.getFeatureBits()[ARM::HasV6Ops];
   }
   bool hasV6MOps() const {
-    return STI.getFeatureBits() & ARM::HasV6MOps;
+    return STI.getFeatureBits()[ARM::HasV6MOps];
   }
   bool hasV7Ops() const {
-    return STI.getFeatureBits() & ARM::HasV7Ops;
+    return STI.getFeatureBits()[ARM::HasV7Ops];
   }
   bool hasV8Ops() const {
-    return STI.getFeatureBits() & ARM::HasV8Ops;
+    return STI.getFeatureBits()[ARM::HasV8Ops];
   }
   bool hasARM() const {
-    return !(STI.getFeatureBits() & ARM::FeatureNoARM);
+    return !STI.getFeatureBits()[ARM::FeatureNoARM];
   }
   bool hasThumb2DSP() const {
-    return STI.getFeatureBits() & ARM::FeatureDSPThumb2;
+    return STI.getFeatureBits()[ARM::FeatureDSPThumb2];
   }
   bool hasD16() const {
-    return STI.getFeatureBits() & ARM::FeatureD16;
+    return STI.getFeatureBits()[ARM::FeatureD16];
+  }
+  bool hasV8_1aOps() const {
+    return STI.getFeatureBits()[ARM::HasV8_1aOps];
   }
 
   void SwitchMode() {
@@ -282,7 +284,7 @@ class ARMAsmParser : public MCTargetAsmParser {
     setAvailableFeatures(FB);
   }
   bool isMClass() const {
-    return STI.getFeatureBits() & ARM::FeatureMClass;
+    return STI.getFeatureBits()[ARM::FeatureMClass];
   }
 
   /// @name Auto-generated Match Functions
@@ -342,10 +344,10 @@ public:
 
   };
 
-  ARMAsmParser(MCSubtargetInfo & _STI, MCAsmParser & _Parser,
+  ARMAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
                const MCInstrInfo &MII, const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(_STI), MII(MII), UC(_Parser) {
-    MCAsmParserExtension::Initialize(_Parser);
+      : STI(STI), MII(MII), UC(Parser) {
+    MCAsmParserExtension::Initialize(Parser);
 
     // Cache the MCRegisterInfo.
     MRI = getContext().getRegisterInfo();
@@ -1747,62 +1749,62 @@ public:
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediates when possible.  Null MCExpr = 0.
     if (!Expr)
-      Inst.addOperand(MCOperand::CreateImm(0));
+      Inst.addOperand(MCOperand::createImm(0));
     else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
-      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+      Inst.addOperand(MCOperand::createImm(CE->getValue()));
     else
-      Inst.addOperand(MCOperand::CreateExpr(Expr));
+      Inst.addOperand(MCOperand::createExpr(Expr));
   }
 
   void addCondCodeOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+    Inst.addOperand(MCOperand::createImm(unsigned(getCondCode())));
     unsigned RegNum = getCondCode() == ARMCC::AL ? 0: ARM::CPSR;
-    Inst.addOperand(MCOperand::CreateReg(RegNum));
+    Inst.addOperand(MCOperand::createReg(RegNum));
   }
 
   void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+    Inst.addOperand(MCOperand::createImm(getCoproc()));
   }
 
   void addCoprocRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+    Inst.addOperand(MCOperand::createImm(getCoproc()));
   }
 
   void addCoprocOptionOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(CoprocOption.Val));
+    Inst.addOperand(MCOperand::createImm(CoprocOption.Val));
   }
 
   void addITMaskOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(ITMask.Mask));
+    Inst.addOperand(MCOperand::createImm(ITMask.Mask));
   }
 
   void addITCondCodeOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+    Inst.addOperand(MCOperand::createImm(unsigned(getCondCode())));
   }
 
   void addCCOutOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getReg()));
+    Inst.addOperand(MCOperand::createReg(getReg()));
   }
 
   void addRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getReg()));
+    Inst.addOperand(MCOperand::createReg(getReg()));
   }
 
   void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 3 && "Invalid number of operands!");
     assert(isRegShiftedReg() &&
            "addRegShiftedRegOperands() on non-RegShiftedReg!");
-    Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg));
-    Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg));
-    Inst.addOperand(MCOperand::CreateImm(
+    Inst.addOperand(MCOperand::createReg(RegShiftedReg.SrcReg));
+    Inst.addOperand(MCOperand::createReg(RegShiftedReg.ShiftReg));
+    Inst.addOperand(MCOperand::createImm(
       ARM_AM::getSORegOpc(RegShiftedReg.ShiftTy, RegShiftedReg.ShiftImm)));
   }
 
@@ -1810,16 +1812,16 @@ public:
     assert(N == 2 && "Invalid number of operands!");
     assert(isRegShiftedImm() &&
            "addRegShiftedImmOperands() on non-RegShiftedImm!");
-    Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg));
+    Inst.addOperand(MCOperand::createReg(RegShiftedImm.SrcReg));
     // Shift of #32 is encoded as 0 where permitted
     unsigned Imm = (RegShiftedImm.ShiftImm == 32 ? 0 : RegShiftedImm.ShiftImm);
-    Inst.addOperand(MCOperand::CreateImm(
+    Inst.addOperand(MCOperand::createImm(
       ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, Imm)));
   }
 
   void addShifterImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm((ShifterImm.isASR << 5) |
+    Inst.addOperand(MCOperand::createImm((ShifterImm.isASR << 5) |
                                          ShifterImm.Imm));
   }
 
@@ -1828,7 +1830,7 @@ public:
     const SmallVectorImpl<unsigned> &RegList = getRegList();
     for (SmallVectorImpl<unsigned>::const_iterator
            I = RegList.begin(), E = RegList.end(); I != E; ++I)
-      Inst.addOperand(MCOperand::CreateReg(*I));
+      Inst.addOperand(MCOperand::createReg(*I));
   }
 
   void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
@@ -1842,7 +1844,7 @@ public:
   void addRotImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     // Encoded as val>>3. The printer handles display as 8, 16, 24.
-    Inst.addOperand(MCOperand::CreateImm(RotImm.Imm >> 3));
+    Inst.addOperand(MCOperand::createImm(RotImm.Imm >> 3));
   }
 
   void addModImmOperands(MCInst &Inst, unsigned N) const {
@@ -1852,21 +1854,21 @@ public:
     if (isImm())
       return addImmOperands(Inst, N);
 
-    Inst.addOperand(MCOperand::CreateImm(ModImm.Bits | (ModImm.Rot << 7)));
+    Inst.addOperand(MCOperand::createImm(ModImm.Bits | (ModImm.Rot << 7)));
   }
 
   void addModImmNotOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     uint32_t Enc = ARM_AM::getSOImmVal(~CE->getValue());
-    Inst.addOperand(MCOperand::CreateImm(Enc));
+    Inst.addOperand(MCOperand::createImm(Enc));
   }
 
   void addModImmNegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     uint32_t Enc = ARM_AM::getSOImmVal(-CE->getValue());
-    Inst.addOperand(MCOperand::CreateImm(Enc));
+    Inst.addOperand(MCOperand::createImm(Enc));
   }
 
   void addBitfieldOperands(MCInst &Inst, unsigned N) const {
@@ -1877,7 +1879,7 @@ public:
     // Make a 32-bit mask w/ the referenced bits clear and all other bits set.
     uint32_t Mask = ~(((uint32_t)0xffffffff >> lsb) << (32 - width) >>
                       (32 - (lsb + width)));
-    Inst.addOperand(MCOperand::CreateImm(Mask));
+    Inst.addOperand(MCOperand::createImm(Mask));
   }
 
   void addImmOperands(MCInst &Inst, unsigned N) const {
@@ -1888,20 +1890,20 @@ public:
   void addFBits16Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(16 - CE->getValue()));
+    Inst.addOperand(MCOperand::createImm(16 - CE->getValue()));
   }
 
   void addFBits32Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(32 - CE->getValue()));
+    Inst.addOperand(MCOperand::createImm(32 - CE->getValue()));
   }
 
   void addFPImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addImm8s4Operands(MCInst &Inst, unsigned N) const {
@@ -1909,7 +1911,7 @@ public:
     // FIXME: We really want to scale the value here, but the LDRD/STRD
     // instruction don't encode operands that way yet.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    Inst.addOperand(MCOperand::createImm(CE->getValue()));
   }
 
   void addImm0_1020s4Operands(MCInst &Inst, unsigned N) const {
@@ -1917,7 +1919,7 @@ public:
     // The immediate is scaled by four in the encoding and is stored
     // in the MCInst as such. Lop off the low two bits here.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
+    Inst.addOperand(MCOperand::createImm(CE->getValue() / 4));
   }
 
   void addImm0_508s4NegOperands(MCInst &Inst, unsigned N) const {
@@ -1925,7 +1927,7 @@ public:
     // The immediate is scaled by four in the encoding and is stored
     // in the MCInst as such. Lop off the low two bits here.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(-(CE->getValue() / 4)));
+    Inst.addOperand(MCOperand::createImm(-(CE->getValue() / 4)));
   }
 
   void addImm0_508s4Operands(MCInst &Inst, unsigned N) const {
@@ -1933,7 +1935,7 @@ public:
     // The immediate is scaled by four in the encoding and is stored
     // in the MCInst as such. Lop off the low two bits here.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
+    Inst.addOperand(MCOperand::createImm(CE->getValue() / 4));
   }
 
   void addImm1_16Operands(MCInst &Inst, unsigned N) const {
@@ -1941,7 +1943,7 @@ public:
     // The constant encodes as the immediate-1, and we store in the instruction
     // the bits as encoded, so subtract off one here.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+    Inst.addOperand(MCOperand::createImm(CE->getValue() - 1));
   }
 
   void addImm1_32Operands(MCInst &Inst, unsigned N) const {
@@ -1949,7 +1951,7 @@ public:
     // The constant encodes as the immediate-1, and we store in the instruction
     // the bits as encoded, so subtract off one here.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+    Inst.addOperand(MCOperand::createImm(CE->getValue() - 1));
   }
 
   void addImmThumbSROperands(MCInst &Inst, unsigned N) const {
@@ -1958,7 +1960,7 @@ public:
     // zero.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     unsigned Imm = CE->getValue();
-    Inst.addOperand(MCOperand::CreateImm((Imm == 32 ? 0 : Imm)));
+    Inst.addOperand(MCOperand::createImm((Imm == 32 ? 0 : Imm)));
   }
 
   void addPKHASRImmOperands(MCInst &Inst, unsigned N) const {
@@ -1967,7 +1969,7 @@ public:
     // the instruction as well.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     int Val = CE->getValue();
-    Inst.addOperand(MCOperand::CreateImm(Val == 32 ? 0 : Val));
+    Inst.addOperand(MCOperand::createImm(Val == 32 ? 0 : Val));
   }
 
   void addT2SOImmNotOperands(MCInst &Inst, unsigned N) const {
@@ -1975,7 +1977,7 @@ public:
     // The operand is actually a t2_so_imm, but we have its bitwise
     // negation in the assembly source, so twiddle it here.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
+    Inst.addOperand(MCOperand::createImm(~CE->getValue()));
   }
 
   void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const {
@@ -1983,7 +1985,7 @@ public:
     // The operand is actually a t2_so_imm, but we have its
     // negation in the assembly source, so twiddle it here.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+    Inst.addOperand(MCOperand::createImm(-CE->getValue()));
   }
 
   void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const {
@@ -1991,18 +1993,18 @@ public:
     // The operand is actually an imm0_4095, but we have its
     // negation in the assembly source, so twiddle it here.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+    Inst.addOperand(MCOperand::createImm(-CE->getValue()));
   }
 
   void addUnsignedOffset_b8s2Operands(MCInst &Inst, unsigned N) const {
     if(const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
-      Inst.addOperand(MCOperand::CreateImm(CE->getValue() >> 2));
+      Inst.addOperand(MCOperand::createImm(CE->getValue() >> 2));
       return;
     }
 
     const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val);
     assert(SR && "Unknown value type!");
-    Inst.addOperand(MCOperand::CreateExpr(SR));
+    Inst.addOperand(MCOperand::createExpr(SR));
   }
 
   void addThumbMemPCOperands(MCInst &Inst, unsigned N) const {
@@ -2010,40 +2012,40 @@ public:
     if (isImm()) {
       const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
       if (CE) {
-        Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+        Inst.addOperand(MCOperand::createImm(CE->getValue()));
         return;
       }
 
       const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val);
       assert(SR && "Unknown value type!");
-      Inst.addOperand(MCOperand::CreateExpr(SR));
+      Inst.addOperand(MCOperand::createExpr(SR));
       return;
     }
 
     assert(isMem()  && "Unknown value type!");
     assert(isa<MCConstantExpr>(Memory.OffsetImm) && "Unknown value type!");
-    Inst.addOperand(MCOperand::CreateImm(Memory.OffsetImm->getValue()));
+    Inst.addOperand(MCOperand::createImm(Memory.OffsetImm->getValue()));
   }
 
   void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
+    Inst.addOperand(MCOperand::createImm(unsigned(getMemBarrierOpt())));
   }
 
   void addInstSyncBarrierOptOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(unsigned(getInstSyncBarrierOpt())));
+    Inst.addOperand(MCOperand::createImm(unsigned(getInstSyncBarrierOpt())));
   }
 
   void addMemNoOffsetOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
   }
 
   void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     int32_t Imm = Memory.OffsetImm->getValue();
-    Inst.addOperand(MCOperand::CreateImm(Imm));
+    Inst.addOperand(MCOperand::createImm(Imm));
   }
 
   void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
@@ -2053,19 +2055,19 @@ public:
     // If we have an immediate that's not a constant, treat it as a label
     // reference needing a fixup. 
     if (!isa<MCConstantExpr>(getImm())) {
-      Inst.addOperand(MCOperand::CreateExpr(getImm()));
+      Inst.addOperand(MCOperand::createExpr(getImm()));
       return;
     }
 
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     int Val = CE->getValue();
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Memory.Alignment));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createImm(Memory.Alignment));
   }
 
   void addDupAlignedMemoryNoneOperands(MCInst &Inst, unsigned N) const {
@@ -2127,9 +2129,9 @@ public:
       Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
                               Memory.ShiftImm, Memory.ShiftType);
     }
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addAM2OffsetImmOperands(MCInst &Inst, unsigned N) const {
@@ -2142,8 +2144,8 @@ public:
     if (Val == INT32_MIN) Val = 0;
     if (Val < 0) Val = -Val;
     Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift);
-    Inst.addOperand(MCOperand::CreateReg(0));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(0));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addAddrMode3Operands(MCInst &Inst, unsigned N) const {
@@ -2152,9 +2154,9 @@ public:
     // reference needing a fixup. If it is a constant, it's something else
     // and we reject it.
     if (isImm()) {
-      Inst.addOperand(MCOperand::CreateExpr(getImm()));
-      Inst.addOperand(MCOperand::CreateReg(0));
-      Inst.addOperand(MCOperand::CreateImm(0));
+      Inst.addOperand(MCOperand::createExpr(getImm()));
+      Inst.addOperand(MCOperand::createReg(0));
+      Inst.addOperand(MCOperand::createImm(0));
       return;
     }
 
@@ -2170,9 +2172,9 @@ public:
       // here.
       Val = ARM_AM::getAM3Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, 0);
     }
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addAM3OffsetOperands(MCInst &Inst, unsigned N) const {
@@ -2180,8 +2182,8 @@ public:
     if (Kind == k_PostIndexRegister) {
       int32_t Val =
         ARM_AM::getAM3Opc(PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub, 0);
-      Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
-      Inst.addOperand(MCOperand::CreateImm(Val));
+      Inst.addOperand(MCOperand::createReg(PostIdxReg.RegNum));
+      Inst.addOperand(MCOperand::createImm(Val));
       return;
     }
 
@@ -2193,8 +2195,8 @@ public:
     if (Val == INT32_MIN) Val = 0;
     if (Val < 0) Val = -Val;
     Val = ARM_AM::getAM3Opc(AddSub, Val);
-    Inst.addOperand(MCOperand::CreateReg(0));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(0));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addAddrMode5Operands(MCInst &Inst, unsigned N) const {
@@ -2203,8 +2205,8 @@ public:
     // reference needing a fixup. If it is a constant, it's something else
     // and we reject it.
     if (isImm()) {
-      Inst.addOperand(MCOperand::CreateExpr(getImm()));
-      Inst.addOperand(MCOperand::CreateImm(0));
+      Inst.addOperand(MCOperand::createExpr(getImm()));
+      Inst.addOperand(MCOperand::createImm(0));
       return;
     }
 
@@ -2215,8 +2217,8 @@ public:
     if (Val == INT32_MIN) Val = 0;
     if (Val < 0) Val = -Val;
     Val = ARM_AM::getAM5Opc(AddSub, Val);
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const {
@@ -2225,29 +2227,29 @@ public:
     // reference needing a fixup. If it is a constant, it's something else
     // and we reject it.
     if (isImm()) {
-      Inst.addOperand(MCOperand::CreateExpr(getImm()));
-      Inst.addOperand(MCOperand::CreateImm(0));
+      Inst.addOperand(MCOperand::createExpr(getImm()));
+      Inst.addOperand(MCOperand::createImm(0));
       return;
     }
 
     int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addMemImm0_1020s4OffsetOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     // The lower two bits are always zero and as such are not encoded.
     int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addMemImm8OffsetOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addMemPosImm8OffsetOperands(MCInst &Inst, unsigned N) const {
@@ -2263,14 +2265,14 @@ public:
     // If this is an immediate, it's a label reference.
     if (isImm()) {
       addExpr(Inst, getImm());
-      Inst.addOperand(MCOperand::CreateImm(0));
+      Inst.addOperand(MCOperand::createImm(0));
       return;
     }
 
     // Otherwise, it's a normal memory reg+offset.
     int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const {
@@ -2278,26 +2280,26 @@ public:
     // If this is an immediate, it's a label reference.
     if (isImm()) {
       addExpr(Inst, getImm());
-      Inst.addOperand(MCOperand::CreateImm(0));
+      Inst.addOperand(MCOperand::createImm(0));
       return;
     }
 
     // Otherwise, it's a normal memory reg+offset.
     int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addMemTBBOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
   }
 
   void addMemTBHOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
   }
 
   void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const {
@@ -2305,50 +2307,50 @@ public:
     unsigned Val =
       ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
                         Memory.ShiftImm, Memory.ShiftType);
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addT2MemRegOffsetOperands(MCInst &Inst, unsigned N) const {
     assert(N == 3 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Memory.ShiftImm));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
+    Inst.addOperand(MCOperand::createImm(Memory.ShiftImm));
   }
 
   void addMemThumbRROperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
   }
 
   void addMemThumbRIs4Operands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0;
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addMemThumbRIs2Operands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 2) : 0;
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addMemThumbRIs1Operands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue()) : 0;
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addMemThumbSPIOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0;
-    Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateImm(Val));
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
   }
 
   void addPostIdxImm8Operands(MCInst &Inst, unsigned N) const {
@@ -2359,7 +2361,7 @@ public:
     bool isAdd = Imm >= 0;
     if (Imm == INT32_MIN) Imm = 0;
     Imm = (Imm < 0 ? -Imm : Imm) | (int)isAdd << 8;
-    Inst.addOperand(MCOperand::CreateImm(Imm));
+    Inst.addOperand(MCOperand::createImm(Imm));
   }
 
   void addPostIdxImm8s4Operands(MCInst &Inst, unsigned N) const {
@@ -2371,65 +2373,65 @@ public:
     if (Imm == INT32_MIN) Imm = 0;
     // Immediate is scaled by 4.
     Imm = ((Imm < 0 ? -Imm : Imm) / 4) | (int)isAdd << 8;
-    Inst.addOperand(MCOperand::CreateImm(Imm));
+    Inst.addOperand(MCOperand::createImm(Imm));
   }
 
   void addPostIdxRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
-    Inst.addOperand(MCOperand::CreateImm(PostIdxReg.isAdd));
+    Inst.addOperand(MCOperand::createReg(PostIdxReg.RegNum));
+    Inst.addOperand(MCOperand::createImm(PostIdxReg.isAdd));
   }
 
   void addPostIdxRegShiftedOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum));
+    Inst.addOperand(MCOperand::createReg(PostIdxReg.RegNum));
     // The sign, shift type, and shift amount are encoded in a single operand
     // using the AM2 encoding helpers.
     ARM_AM::AddrOpc opc = PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub;
     unsigned Imm = ARM_AM::getAM2Opc(opc, PostIdxReg.ShiftImm,
                                      PostIdxReg.ShiftTy);
-    Inst.addOperand(MCOperand::CreateImm(Imm));
+    Inst.addOperand(MCOperand::createImm(Imm));
   }
 
   void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask())));
+    Inst.addOperand(MCOperand::createImm(unsigned(getMSRMask())));
   }
 
   void addBankedRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(unsigned(getBankedReg())));
+    Inst.addOperand(MCOperand::createImm(unsigned(getBankedReg())));
   }
 
   void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
+    Inst.addOperand(MCOperand::createImm(unsigned(getProcIFlags())));
   }
 
   void addVecListOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+    Inst.addOperand(MCOperand::createReg(VectorList.RegNum));
   }
 
   void addVecListIndexedOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
-    Inst.addOperand(MCOperand::CreateImm(VectorList.LaneIndex));
+    Inst.addOperand(MCOperand::createReg(VectorList.RegNum));
+    Inst.addOperand(MCOperand::createImm(VectorList.LaneIndex));
   }
 
   void addVectorIndex8Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+    Inst.addOperand(MCOperand::createImm(getVectorIndex()));
   }
 
   void addVectorIndex16Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+    Inst.addOperand(MCOperand::createImm(getVectorIndex()));
   }
 
   void addVectorIndex32Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
+    Inst.addOperand(MCOperand::createImm(getVectorIndex()));
   }
 
   void addNEONi8splatOperands(MCInst &Inst, unsigned N) const {
@@ -2437,7 +2439,7 @@ public:
     // The immediate encodes the type of constant as well as the value.
     // Mask in that this is an i8 splat.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(CE->getValue() | 0xe00));
+    Inst.addOperand(MCOperand::createImm(CE->getValue() | 0xe00));
   }
 
   void addNEONi16splatOperands(MCInst &Inst, unsigned N) const {
@@ -2446,7 +2448,7 @@ public:
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     unsigned Value = CE->getValue();
     Value = ARM_AM::encodeNEONi16splat(Value);
-    Inst.addOperand(MCOperand::CreateImm(Value));
+    Inst.addOperand(MCOperand::createImm(Value));
   }
 
   void addNEONi16splatNotOperands(MCInst &Inst, unsigned N) const {
@@ -2455,7 +2457,7 @@ public:
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     unsigned Value = CE->getValue();
     Value = ARM_AM::encodeNEONi16splat(~Value & 0xffff);
-    Inst.addOperand(MCOperand::CreateImm(Value));
+    Inst.addOperand(MCOperand::createImm(Value));
   }
 
   void addNEONi32splatOperands(MCInst &Inst, unsigned N) const {
@@ -2464,7 +2466,7 @@ public:
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     unsigned Value = CE->getValue();
     Value = ARM_AM::encodeNEONi32splat(Value);
-    Inst.addOperand(MCOperand::CreateImm(Value));
+    Inst.addOperand(MCOperand::createImm(Value));
   }
 
   void addNEONi32splatNotOperands(MCInst &Inst, unsigned N) const {
@@ -2473,7 +2475,7 @@ public:
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     unsigned Value = CE->getValue();
     Value = ARM_AM::encodeNEONi32splat(~Value);
-    Inst.addOperand(MCOperand::CreateImm(Value));
+    Inst.addOperand(MCOperand::createImm(Value));
   }
 
   void addNEONinvByteReplicateOperands(MCInst &Inst, unsigned N) const {
@@ -2487,7 +2489,7 @@ public:
            "always must be replaced with VMOVv8i8 or VMOVv16i8.");
     unsigned B = ((~Value) & 0xff);
     B |= 0xe00; // cmode = 0b1110
-    Inst.addOperand(MCOperand::CreateImm(B));
+    Inst.addOperand(MCOperand::createImm(B));
   }
   void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
@@ -2500,7 +2502,7 @@ public:
       Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400);
     else if (Value > 0xffffff)
       Value = (Value >> 24) | 0x600;
-    Inst.addOperand(MCOperand::CreateImm(Value));
+    Inst.addOperand(MCOperand::createImm(Value));
   }
 
   void addNEONvmovByteReplicateOperands(MCInst &Inst, unsigned N) const {
@@ -2514,7 +2516,7 @@ public:
            "always must be replaced with VMOVv8i8 or VMOVv16i8.");
     unsigned B = Value & 0xff;
     B |= 0xe00; // cmode = 0b1110
-    Inst.addOperand(MCOperand::CreateImm(B));
+    Inst.addOperand(MCOperand::createImm(B));
   }
   void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
@@ -2527,7 +2529,7 @@ public:
       Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400);
     else if (Value > 0xffffff)
       Value = (Value >> 24) | 0x600;
-    Inst.addOperand(MCOperand::CreateImm(Value));
+    Inst.addOperand(MCOperand::createImm(Value));
   }
 
   void addNEONi64splatOperands(MCInst &Inst, unsigned N) const {
@@ -2539,7 +2541,7 @@ public:
     for (unsigned i = 0; i < 8; ++i, Value >>= 8) {
       Imm |= (Value & 1) << i;
     }
-    Inst.addOperand(MCOperand::CreateImm(Imm | 0x1e00));
+    Inst.addOperand(MCOperand::createImm(Imm | 0x1e00));
   }
 
   void print(raw_ostream &OS) const override;
@@ -2868,7 +2870,7 @@ void ARMOperand::print(raw_ostream &OS) const {
     OS << "<banked reg: " << getBankedReg() << ">";
     break;
   case k_Immediate:
-    getImm()->print(OS);
+    OS << *getImm();
     break;
   case k_MemBarrierOpt:
     OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt(), false) << ">";
@@ -4424,11 +4426,6 @@ ARMAsmParser::parseModImm(OperandVector &Operands) {
   if (CE) {
     // Immediate must fit within 32-bits
     Imm1 = CE->getValue();
-    if (Imm1 < INT32_MIN || Imm1 > UINT32_MAX) {
-      Error(Sx1, "immediate operand must be representable with 32 bits");
-      return MatchOperand_ParseFail;
-    }
-
     int Enc = ARM_AM::getSOImmVal(Imm1);
     if (Enc != -1 && Parser.getTok().is(AsmToken::EndOfStatement)) {
       // We have a match!
@@ -5420,47 +5417,44 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
 /// inclusion of carry set or predication code operands.
 //
 // FIXME: It would be nice to autogen this.
-void ARMAsmParser::
-getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
-                     bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) {
-  if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+                                         bool &CanAcceptCarrySet,
+                                         bool &CanAcceptPredicationCode) {
+  CanAcceptCarrySet =
+      Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
       Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
-      Mnemonic == "add" || Mnemonic == "adc" ||
-      Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
-      Mnemonic == "orr" || Mnemonic == "mvn" ||
-      Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
-      Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" ||
-      Mnemonic == "vfm" || Mnemonic == "vfnm" ||
-      (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" ||
-                      Mnemonic == "mla" || Mnemonic == "smlal" ||
-                      Mnemonic == "umlal" || Mnemonic == "umull"))) {
-    CanAcceptCarrySet = true;
-  } else
-    CanAcceptCarrySet = false;
+      Mnemonic == "add" || Mnemonic == "adc" || Mnemonic == "mul" ||
+      Mnemonic == "bic" || Mnemonic == "asr" || Mnemonic == "orr" ||
+      Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" ||
+      Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "eor" ||
+      Mnemonic == "neg" || Mnemonic == "vfm" || Mnemonic == "vfnm" ||
+      (!isThumb() &&
+       (Mnemonic == "smull" || Mnemonic == "mov" || Mnemonic == "mla" ||
+        Mnemonic == "smlal" || Mnemonic == "umlal" || Mnemonic == "umull"));
 
   if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" ||
-      Mnemonic == "cps" ||  Mnemonic == "it" ||  Mnemonic == "cbz" ||
+      Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" ||
       Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic == "udf" ||
       Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") ||
-      Mnemonic.startswith("vsel") ||
-      Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
-      Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
-      Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
-      Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" ||
+      Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" ||
+      Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" ||
+      Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" ||
+      Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" ||
+      Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" ||
       Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
       (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) {
     // These mnemonics are never predicable
     CanAcceptPredicationCode = false;
   } else if (!isThumb()) {
     // Some instructions are only predicable in Thumb mode
-    CanAcceptPredicationCode
-      = Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" &&
+    CanAcceptPredicationCode =
+        Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" &&
         Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" &&
         Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" &&
         Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" &&
-        Mnemonic != "ldc2" && Mnemonic != "ldc2l" &&
-        Mnemonic != "stc2" && Mnemonic != "stc2l" &&
-        !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs");
+        Mnemonic != "ldc2" && Mnemonic != "ldc2l" && Mnemonic != "stc2" &&
+        Mnemonic != "stc2l" && !Mnemonic.startswith("rfe") &&
+        !Mnemonic.startswith("srs");
   } else if (isThumbOne()) {
     if (hasV6MOps())
       CanAcceptPredicationCode = Mnemonic != "movs";
@@ -6155,6 +6149,14 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
                    "destination operands can't be identical");
     return false;
   }
+  case ARM::t2BXJ: {
+    const unsigned RmReg = Inst.getOperand(0).getReg();
+    // Rm = SP is no longer unpredictable in v8-A
+    if (RmReg == ARM::SP && !hasV8Ops())
+      return Error(Operands[2]->getStartLoc(),
+                   "r13 (SP) is an unpredictable operand to BXJ");
+    return false;
+  }
   case ARM::STRD: {
     // Rt2 must be Rt + 1.
     unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
@@ -6703,8 +6705,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    TmpInst.addOperand(MCOperand::CreateImm(0));
+    TmpInst.addOperand(MCOperand::createReg(0));
+    TmpInst.addOperand(MCOperand::createImm(0));
     TmpInst.addOperand(Inst.getOperand(2));
     TmpInst.addOperand(Inst.getOperand(3));
     Inst = TmpInst;
@@ -6721,8 +6723,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    TmpInst.addOperand(MCOperand::CreateImm(0));
+    TmpInst.addOperand(MCOperand::createReg(0));
+    TmpInst.addOperand(MCOperand::createImm(0));
     TmpInst.addOperand(Inst.getOperand(2));
     TmpInst.addOperand(Inst.getOperand(3));
     Inst = TmpInst;
@@ -6741,13 +6743,13 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
       // Immediate (mod_imm) will be in its encoded form, we must unencode it
       // before passing it to the ADR instruction.
       unsigned Enc = Inst.getOperand(2).getImm();
-      TmpInst.addOperand(MCOperand::CreateImm(
+      TmpInst.addOperand(MCOperand::createImm(
         ARM_AM::rotr32(Enc & 0xFF, (Enc & 0xF00) >> 7)));
     } else {
       // Turn PC-relative expression into absolute expression.
       // Reading PC provides the start of the current instruction + 8 and
       // the transform to adr is biased by that.
-      MCSymbol *Dot = getContext().CreateTempSymbol();
+      MCSymbol *Dot = getContext().createTempSymbol();
       Out.EmitLabel(Dot);
       const MCExpr *OpExpr = Inst.getOperand(2).getExpr();
       const MCExpr *InstPC = MCSymbolRefExpr::Create(Dot,
@@ -6758,7 +6760,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
                                                      getContext());
       const MCExpr *FixupAddr = MCBinaryExpr::CreateAdd(ReadPC, OpExpr,
                                                         getContext());
-      TmpInst.addOperand(MCOperand::CreateExpr(FixupAddr));
+      TmpInst.addOperand(MCOperand::createExpr(FixupAddr));
     }
     TmpInst.addOperand(Inst.getOperand(3));
     TmpInst.addOperand(Inst.getOperand(4));
@@ -6824,7 +6826,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
     TmpInst.addOperand(Inst.getOperand(4)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(5)); // CondCode
@@ -6848,9 +6850,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
     TmpInst.addOperand(Inst.getOperand(4)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(5)); // CondCode
@@ -6874,11 +6876,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
     TmpInst.addOperand(Inst.getOperand(4)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(5)); // CondCode
@@ -6898,7 +6900,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -6920,9 +6922,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -6944,11 +6946,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -6970,13 +6972,13 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -7016,7 +7018,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -7038,9 +7040,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -7062,11 +7064,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -7108,14 +7110,14 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
     TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
     TmpInst.addOperand(Inst.getOperand(4)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(5)); // CondCode
@@ -7135,18 +7137,18 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
     TmpInst.addOperand(Inst.getOperand(4)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(5)); // CondCode
@@ -7166,22 +7168,22 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
     TmpInst.addOperand(Inst.getOperand(4)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(5)); // CondCode
@@ -7202,7 +7204,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -7222,14 +7224,14 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
     TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -7249,18 +7251,18 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -7280,22 +7282,22 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -7334,12 +7336,12 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
     TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -7359,16 +7361,16 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
     TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -7388,20 +7390,20 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(2)); // Rn
     TmpInst.addOperand(Inst.getOperand(3)); // alignment
     TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(1)); // lane
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
@@ -7421,9 +7423,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
@@ -7443,14 +7445,14 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(3)); // CondCode
     TmpInst.addOperand(Inst.getOperand(4));
     Inst = TmpInst;
@@ -7467,9 +7469,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
@@ -7492,9 +7494,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
@@ -7514,14 +7516,14 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(3)); // CondCode
     TmpInst.addOperand(Inst.getOperand(4));
     Inst = TmpInst;
@@ -7538,9 +7540,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
@@ -7563,11 +7565,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
@@ -7587,16 +7589,16 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(3)); // CondCode
     TmpInst.addOperand(Inst.getOperand(4));
     Inst = TmpInst;
@@ -7613,11 +7615,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
@@ -7640,11 +7642,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
@@ -7664,16 +7666,16 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(3)); // CondCode
     TmpInst.addOperand(Inst.getOperand(4));
     Inst = TmpInst;
@@ -7690,11 +7692,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     unsigned Spacing;
     TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
@@ -7719,9 +7721,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(3)); // CondCode
     TmpInst.addOperand(Inst.getOperand(4));
@@ -7741,11 +7743,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(3)); // CondCode
     TmpInst.addOperand(Inst.getOperand(4));
@@ -7767,9 +7769,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
     TmpInst.addOperand(Inst.getOperand(3)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
     TmpInst.addOperand(Inst.getOperand(5));
@@ -7790,11 +7792,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(3)); // CondCode
     TmpInst.addOperand(Inst.getOperand(4));
@@ -7814,13 +7816,13 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
-    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(MCOperand::createReg(0)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(3)); // CondCode
     TmpInst.addOperand(Inst.getOperand(4));
@@ -7842,11 +7844,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(2)); // alignment
     TmpInst.addOperand(Inst.getOperand(3)); // Rm
     TmpInst.addOperand(Inst.getOperand(0)); // Vd
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 2));
-    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+    TmpInst.addOperand(MCOperand::createReg(Inst.getOperand(0).getReg() +
                                             Spacing * 3));
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
     TmpInst.addOperand(Inst.getOperand(5));
@@ -7910,14 +7912,14 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.setOpcode(newOpc);
     TmpInst.addOperand(Inst.getOperand(0)); // Rd
     if (isNarrow)
-      TmpInst.addOperand(MCOperand::CreateReg(
+      TmpInst.addOperand(MCOperand::createReg(
           Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(2)); // Rm
     TmpInst.addOperand(Inst.getOperand(4)); // CondCode
     TmpInst.addOperand(Inst.getOperand(5));
     if (!isNarrow)
-      TmpInst.addOperand(MCOperand::CreateReg(
+      TmpInst.addOperand(MCOperand::createReg(
           Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0));
     Inst = TmpInst;
     return true;
@@ -7947,15 +7949,15 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.setOpcode(newOpc);
     TmpInst.addOperand(Inst.getOperand(0)); // Rd
     if (isNarrow)
-      TmpInst.addOperand(MCOperand::CreateReg(
+      TmpInst.addOperand(MCOperand::createReg(
           Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     if (newOpc != ARM::t2RRX)
-      TmpInst.addOperand(MCOperand::CreateImm(Amount));
+      TmpInst.addOperand(MCOperand::createImm(Amount));
     TmpInst.addOperand(Inst.getOperand(3)); // CondCode
     TmpInst.addOperand(Inst.getOperand(4));
     if (!isNarrow)
-      TmpInst.addOperand(MCOperand::CreateReg(
+      TmpInst.addOperand(MCOperand::createReg(
           Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
     Inst = TmpInst;
     return true;
@@ -7979,7 +7981,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(0)); // Rd
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     TmpInst.addOperand(Inst.getOperand(2)); // Rm
-    TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+    TmpInst.addOperand(MCOperand::createImm(Shifter)); // Shift value and ty
     TmpInst.addOperand(Inst.getOperand(3)); // CondCode
     TmpInst.addOperand(Inst.getOperand(4));
     TmpInst.addOperand(Inst.getOperand(5)); // cc_out
@@ -8010,7 +8012,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(0)); // Rd
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
     if (Opc == ARM::MOVsi)
-      TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+      TmpInst.addOperand(MCOperand::createImm(Shifter)); // Shift value and ty
     TmpInst.addOperand(Inst.getOperand(3)); // CondCode
     TmpInst.addOperand(Inst.getOperand(4));
     TmpInst.addOperand(Inst.getOperand(5)); // cc_out
@@ -8023,7 +8025,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.setOpcode(ARM::MOVsi);
     TmpInst.addOperand(Inst.getOperand(0)); // Rd
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
-    TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+    TmpInst.addOperand(MCOperand::createImm(Shifter)); // Shift value and ty
     TmpInst.addOperand(Inst.getOperand(2)); // CondCode
     TmpInst.addOperand(Inst.getOperand(3));
     TmpInst.addOperand(Inst.getOperand(4)); // cc_out
@@ -8040,7 +8042,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(4)); // Rt
     TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
-    TmpInst.addOperand(MCOperand::CreateImm(4));
+    TmpInst.addOperand(MCOperand::createImm(4));
     TmpInst.addOperand(Inst.getOperand(2)); // CondCode
     TmpInst.addOperand(Inst.getOperand(3));
     Inst = TmpInst;
@@ -8056,7 +8058,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
     TmpInst.addOperand(Inst.getOperand(4)); // Rt
     TmpInst.addOperand(Inst.getOperand(1)); // Rn
-    TmpInst.addOperand(MCOperand::CreateImm(-4));
+    TmpInst.addOperand(MCOperand::createImm(-4));
     TmpInst.addOperand(Inst.getOperand(2)); // CondCode
     TmpInst.addOperand(Inst.getOperand(3));
     Inst = TmpInst;
@@ -8072,8 +8074,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
       TmpInst.addOperand(Inst.getOperand(4)); // Rt
       TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
       TmpInst.addOperand(Inst.getOperand(1)); // Rn
-      TmpInst.addOperand(MCOperand::CreateReg(0));  // am2offset
-      TmpInst.addOperand(MCOperand::CreateImm(4));
+      TmpInst.addOperand(MCOperand::createReg(0));  // am2offset
+      TmpInst.addOperand(MCOperand::createImm(4));
       TmpInst.addOperand(Inst.getOperand(2)); // CondCode
       TmpInst.addOperand(Inst.getOperand(3));
       Inst = TmpInst;
@@ -8090,7 +8092,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
       TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
       TmpInst.addOperand(Inst.getOperand(4)); // Rt
       TmpInst.addOperand(Inst.getOperand(1)); // addrmode_imm12
-      TmpInst.addOperand(MCOperand::CreateImm(-4));
+      TmpInst.addOperand(MCOperand::createImm(-4));
       TmpInst.addOperand(Inst.getOperand(2)); // CondCode
       TmpInst.addOperand(Inst.getOperand(3));
       Inst = TmpInst;
@@ -8103,7 +8105,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
         ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
       break;
     Inst.setOpcode(ARM::t2ADDri);
-    Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+    Inst.addOperand(MCOperand::createReg(0)); // cc_out
     break;
   case ARM::t2SUBri12:
     // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
@@ -8112,7 +8114,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
         ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
       break;
     Inst.setOpcode(ARM::t2SUBri);
-    Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+    Inst.addOperand(MCOperand::createReg(0)); // cc_out
     break;
   case ARM::tADDi8:
     // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
@@ -8185,7 +8187,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     // same, we need to use the 32-bit encoding if it's available.
     if (Inst.getOperand(0).getReg() != Inst.getOperand(2).getReg()) {
       Inst.setOpcode(ARM::t2ADDrr);
-      Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+      Inst.addOperand(MCOperand::createReg(0)); // cc_out
       return true;
     }
     break;
@@ -8238,7 +8240,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
       // the writeback tied operand.
       if (hasWritebackToken)
         Inst.insert(Inst.begin(),
-                    MCOperand::CreateReg(Inst.getOperand(0).getReg()));
+                    MCOperand::createReg(Inst.getOperand(0).getReg()));
       return true;
     }
     break;
@@ -8267,8 +8269,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     assert (isThumbTwo());
     Inst.setOpcode(ARM::t2LDMIA_UPD);
     // Add the base register and writeback operands.
-    Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
-    Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+    Inst.insert(Inst.begin(), MCOperand::createReg(ARM::SP));
+    Inst.insert(Inst.begin(), MCOperand::createReg(ARM::SP));
     return true;
   }
   case ARM::tPUSH: {
@@ -8278,8 +8280,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     assert (isThumbTwo());
     Inst.setOpcode(ARM::t2STMDB_UPD);
     // Add the base register and writeback operands.
-    Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
-    Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+    Inst.insert(Inst.begin(), MCOperand::createReg(ARM::SP));
+    Inst.insert(Inst.begin(), MCOperand::createReg(ARM::SP));
     return true;
   }
   case ARM::t2MOVi: {
@@ -8894,7 +8896,7 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
       }
 
       MCSymbol *Func =
-          getParser().getContext().GetOrCreateSymbol(Tok.getIdentifier());
+          getParser().getContext().getOrCreateSymbol(Tok.getIdentifier());
       getParser().getStreamer().EmitThumbFunc(Func);
       Parser.Lex(); // Consume the identifier token.
       return false;
@@ -9010,7 +9012,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
 
   Parser.Lex(); // Consume the EndOfStatement
 
-  if (!RegisterReqs.insert(std::make_pair(Name, Reg)).second) {
+  if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg) {
     Error(SRegLoc, "redefinition of '" + Name + "' does not match original.");
     return false;
   }
@@ -9037,15 +9039,9 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
 bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
   StringRef Arch = getParser().parseStringToEndOfStatement().trim();
 
-  unsigned ID = StringSwitch<unsigned>(Arch)
-#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \
-    .Case(NAME, ARM::ID)
-#define ARM_ARCH_ALIAS(NAME, ID) \
-    .Case(NAME, ARM::ID)
-#include "MCTargetDesc/ARMArchName.def"
-    .Default(ARM::INVALID_ARCH);
+  unsigned ID = ARMTargetParser::parseArch(Arch);
 
-  if (ID == ARM::INVALID_ARCH) {
+  if (ID == ARM::AK_INVALID) {
     Error(L, "Unknown arch name");
     return false;
   }
@@ -9182,8 +9178,7 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
   // see: http://llvm.org/bugs/show_bug.cgi?id=20757
   STI.InitMCProcessorInfo(CPU, "");
   STI.InitCPUSchedModel(CPU);
-  unsigned FB = ComputeAvailableFeatures(STI.getFeatureBits());
-  setAvailableFeatures(FB);
+  setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
 
   return false;
 }
@@ -9192,66 +9187,65 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
 // tools/clang/lib/Driver/Tools.cpp
 static const struct {
   const unsigned ID;
-  const uint64_t Enabled;
-  const uint64_t Disabled;
+  const FeatureBitset Enabled;
+  const FeatureBitset Disabled;
 } FPUs[] = {
-    {/* ID */ ARM::VFP,
-     /* Enabled */ ARM::FeatureVFP2,
-     /* Disabled */ ARM::FeatureNEON},
-    {/* ID */ ARM::VFPV2,
-     /* Enabled */ ARM::FeatureVFP2,
-     /* Disabled */ ARM::FeatureNEON},
-    {/* ID */ ARM::VFPV3,
-     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3,
-     /* Disabled */ ARM::FeatureNEON | ARM::FeatureD16},
-    {/* ID */ ARM::VFPV3_D16,
-     /* Enable */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16,
-     /* Disabled */ ARM::FeatureNEON},
-    {/* ID */ ARM::VFPV4,
-     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4,
-     /* Disabled */ ARM::FeatureNEON | ARM::FeatureD16},
-    {/* ID */ ARM::VFPV4_D16,
-     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
-         ARM::FeatureD16,
-     /* Disabled */ ARM::FeatureNEON},
-    {/* ID */ ARM::FPV5_D16,
-     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
-         ARM::FeatureFPARMv8 | ARM::FeatureD16,
-     /* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto},
-    {/* ID */ ARM::FP_ARMV8,
-     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
-         ARM::FeatureFPARMv8,
-     /* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto | ARM::FeatureD16},
-    {/* ID */ ARM::NEON,
-     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON,
-     /* Disabled */ ARM::FeatureD16},
-    {/* ID */ ARM::NEON_VFPV4,
-     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
-         ARM::FeatureNEON,
-     /* Disabled */ ARM::FeatureD16},
-    {/* ID */ ARM::NEON_FP_ARMV8,
-     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
-         ARM::FeatureFPARMv8 | ARM::FeatureNEON,
-     /* Disabled */ ARM::FeatureCrypto | ARM::FeatureD16},
-    {/* ID */ ARM::CRYPTO_NEON_FP_ARMV8,
-     /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
-         ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto,
-     /* Disabled */ ARM::FeatureD16},
-    {ARM::SOFTVFP, 0, 0},
+    {/* ID */ ARM::FK_VFP, 
+     /* Enabled */ {ARM::FeatureVFP2}, 
+     /* Disabled */ {ARM::FeatureNEON}},
+    {/* ID */ ARM::FK_VFPV2, 
+     /* Enabled */ {ARM::FeatureVFP2}, 
+     /* Disabled */ {ARM::FeatureNEON}},
+    {/* ID */ ARM::FK_VFPV3, 
+     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3},  
+     /* Disabled */ {ARM::FeatureNEON, ARM::FeatureD16}},
+    {/* ID */ ARM::FK_VFPV3_D16, 
+     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureD16},
+     /* Disabled */ {ARM::FeatureNEON}},
+    {/* ID */ ARM::FK_VFPV4, 
+     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4},
+     /* Disabled */ {ARM::FeatureNEON, ARM::FeatureD16}},
+    {/* ID */ ARM::FK_VFPV4_D16, 
+     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
+                    ARM::FeatureD16},
+     /* Disabled */ {ARM::FeatureNEON}},
+    {/* ID */ ARM::FK_FPV5_D16, 
+     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
+                    ARM::FeatureFPARMv8, ARM::FeatureD16},
+     /* Disabled */ {ARM::FeatureNEON, ARM::FeatureCrypto}},
+    {/* ID */ ARM::FK_FP_ARMV8, 
+     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
+                    ARM::FeatureFPARMv8},
+     /* Disabled */ {ARM::FeatureNEON, ARM::FeatureCrypto, ARM::FeatureD16}},
+    {/* ID */ ARM::FK_NEON, 
+     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON}, 
+     /* Disabled */ {ARM::FeatureD16}},
+    {/* ID */ ARM::FK_NEON_VFPV4, 
+     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
+                    ARM::FeatureNEON}, 
+     /* Disabled */ {ARM::FeatureD16}},
+    {/* ID */ ARM::FK_NEON_FP_ARMV8, 
+     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
+                    ARM::FeatureFPARMv8, ARM::FeatureNEON},
+     /* Disabled */ {ARM::FeatureCrypto, ARM::FeatureD16}},
+    {/* ID */ ARM::FK_CRYPTO_NEON_FP_ARMV8,
+     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
+                    ARM::FeatureFPARMv8, ARM::FeatureNEON, 
+                    ARM::FeatureCrypto},
+     /* Disabled */ {ARM::FeatureD16}},
+    {ARM::FK_SOFTVFP, {}, {}},
 };
 
 /// parseDirectiveFPU
 ///  ::= .fpu str
 bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
+  SMLoc FPUNameLoc = getTok().getLoc();
   StringRef FPU = getParser().parseStringToEndOfStatement().trim();
 
-  unsigned ID = StringSwitch<unsigned>(FPU)
-#define ARM_FPU_NAME(NAME, ID) .Case(NAME, ARM::ID)
-#include "ARMFPUName.def"
-    .Default(ARM::INVALID_FPU);
+  unsigned ID = ARMTargetParser::parseFPU(FPU);
 
-  if (ID == ARM::INVALID_FPU) {
-    Error(L, "Unknown FPU name");
+  if (ID == ARM::FK_INVALID) {
+    Error(FPUNameLoc, "Unknown FPU name");
     return false;
   }
 
@@ -9261,8 +9255,8 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
 
     // Need to toggle features that should be on but are off and that
     // should off but are on.
-    uint64_t Toggle = (Entry.Enabled & ~STI.getFeatureBits()) |
-                      (Entry.Disabled & STI.getFeatureBits());
+    FeatureBitset Toggle = (Entry.Enabled & ~STI.getFeatureBits()) |
+                           (Entry.Disabled & STI.getFeatureBits());
     setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle)));
     break;
   }
@@ -9369,7 +9363,7 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
   StringRef Name(Parser.getTok().getIdentifier());
   Parser.Lex();
 
-  MCSymbol *PR = getParser().getContext().GetOrCreateSymbol(Name);
+  MCSymbol *PR = getParser().getContext().getOrCreateSymbol(Name);
   getTargetStreamer().emitPersonality(PR);
   return false;
 }
@@ -9902,17 +9896,9 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
   SMLoc ArchLoc = Parser.getTok().getLoc();
   getLexer().Lex();
 
-  unsigned ID = StringSwitch<unsigned>(Arch)
-#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \
-    .Case(NAME, ARM::ID)
-#define ARM_ARCH_ALIAS(NAME, ID) \
-    .Case(NAME, ARM::ID)
-#include "MCTargetDesc/ARMArchName.def"
-#undef ARM_ARCH_NAME
-#undef ARM_ARCH_ALIAS
-    .Default(ARM::INVALID_ARCH);
-
-  if (ID == ARM::INVALID_ARCH) {
+  unsigned ID = ARMTargetParser::parseArch(Arch);
+
+  if (ID == ARM::AK_INVALID) {
     Error(ArchLoc, "unknown architecture '" + Arch + "'");
     Parser.eatToEndOfStatement();
     return false;
@@ -9978,7 +9964,7 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
   }
   Lex();
 
-  MCSymbol *Alias = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Alias = getContext().getOrCreateSymbol(Name);
   getTargetStreamer().emitThumbSet(Alias, Value);
   return false;
 }
@@ -9999,30 +9985,30 @@ extern "C" void LLVMInitializeARMAsmParser() {
 static const struct {
   const char *Name;
   const unsigned ArchCheck;
-  const uint64_t Features;
+  const FeatureBitset Features;
 } Extensions[] = {
-  { "crc", Feature_HasV8, ARM::FeatureCRC },
+  { "crc", Feature_HasV8, {ARM::FeatureCRC} },
   { "crypto",  Feature_HasV8,
-    ARM::FeatureCrypto | ARM::FeatureNEON | ARM::FeatureFPARMv8 },
-  { "fp", Feature_HasV8, ARM::FeatureFPARMv8 },
+    {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
+  { "fp", Feature_HasV8, {ARM::FeatureFPARMv8} },
   { "idiv", Feature_HasV7 | Feature_IsNotMClass,
-    ARM::FeatureHWDiv | ARM::FeatureHWDivARM },
+    {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} },
   // FIXME: iWMMXT not supported
-  { "iwmmxt", Feature_None, 0 },
+  { "iwmmxt", Feature_None, {} },
   // FIXME: iWMMXT2 not supported
-  { "iwmmxt2", Feature_None, 0 },
+  { "iwmmxt2", Feature_None, {} },
   // FIXME: Maverick not supported
-  { "maverick", Feature_None, 0 },
-  { "mp", Feature_HasV7 | Feature_IsNotMClass, ARM::FeatureMP },
+  { "maverick", Feature_None, {} },
+  { "mp", Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
   // FIXME: ARMv6-m OS Extensions feature not checked
-  { "os", Feature_None, 0 },
+  { "os", Feature_None, {} },
   // FIXME: Also available in ARMv6-K
-  { "sec", Feature_HasV7, ARM::FeatureTrustZone },
-  { "simd", Feature_HasV8, ARM::FeatureNEON | ARM::FeatureFPARMv8 },
+  { "sec", Feature_HasV7, {ARM::FeatureTrustZone} },
+  { "simd", Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
   // FIXME: Only available in A-class, isel not predicated
-  { "virt", Feature_HasV7, ARM::FeatureVirtualization },
+  { "virt", Feature_HasV7, {ARM::FeatureVirtualization} },
   // FIXME: xscale not supported
-  { "xscale", Feature_None, 0 },
+  { "xscale", Feature_None, {} },
 };
 
 /// parseDirectiveArchExtension
@@ -10050,7 +10036,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
     if (Extension.Name != Name)
       continue;
 
-    if (!Extension.Features)
+    if (Extension.Features.none())
       report_fatal_error("unsupported architectural extension: " + Name);
 
     if ((getAvailableFeatures() & Extension.ArchCheck) != Extension.ArchCheck) {
@@ -10059,9 +10045,10 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
       return false;
     }
 
-    uint64_t ToggleFeatures = EnableFeature
-                                  ? (~STI.getFeatureBits() & Extension.Features)
-                                  : ( STI.getFeatureBits() & Extension.Features);
+    FeatureBitset ToggleFeatures = EnableFeature
+      ? (~STI.getFeatureBits() & Extension.Features)
+      : ( STI.getFeatureBits() & Extension.Features);
+
     uint64_t Features =
         ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
     setAvailableFeatures(Features);
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 51faf69..097ec04 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -91,7 +91,7 @@ public:
     MCDisassembler(STI, Ctx) {
   }
 
-  ~ARMDisassembler() {}
+  ~ARMDisassembler() override {}
 
   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -106,7 +106,7 @@ public:
     MCDisassembler(STI, Ctx) {
   }
 
-  ~ThumbDisassembler() {}
+  ~ThumbDisassembler() override {}
 
   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -212,6 +212,10 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
+                               uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
+                               uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
@@ -431,7 +435,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                                              raw_ostream &CS) const {
   CommentStream = &CS;
 
-  assert(!(STI.getFeatureBits() & ARM::ModeThumb) &&
+  assert(!STI.getFeatureBits()[ARM::ModeThumb] &&
          "Asked to disassemble an ARM instruction but Subtarget is in Thumb "
          "mode!");
 
@@ -578,12 +582,12 @@ static void AddThumb1SBit(MCInst &MI, bool InITBlock) {
     if (I == MI.end()) break;
     if (OpInfo[i].isOptionalDef() && OpInfo[i].RegClass == ARM::CCRRegClassID) {
       if (i > 0 && OpInfo[i-1].isPredicate()) continue;
-      MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR));
+      MI.insert(I, MCOperand::createReg(InITBlock ? 0 : ARM::CPSR));
       return;
     }
   }
 
-  MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR));
+  MI.insert(I, MCOperand::createReg(InITBlock ? 0 : ARM::CPSR));
 }
 
 // Most Thumb instructions don't have explicit predicates in the
@@ -642,22 +646,22 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
   for (unsigned i = 0; i < NumOps; ++i, ++I) {
     if (I == MI.end()) break;
     if (OpInfo[i].isPredicate()) {
-      I = MI.insert(I, MCOperand::CreateImm(CC));
+      I = MI.insert(I, MCOperand::createImm(CC));
       ++I;
       if (CC == ARMCC::AL)
-        MI.insert(I, MCOperand::CreateReg(0));
+        MI.insert(I, MCOperand::createReg(0));
       else
-        MI.insert(I, MCOperand::CreateReg(ARM::CPSR));
+        MI.insert(I, MCOperand::createReg(ARM::CPSR));
       return S;
     }
   }
 
-  I = MI.insert(I, MCOperand::CreateImm(CC));
+  I = MI.insert(I, MCOperand::createImm(CC));
   ++I;
   if (CC == ARMCC::AL)
-    MI.insert(I, MCOperand::CreateReg(0));
+    MI.insert(I, MCOperand::createReg(0));
   else
-    MI.insert(I, MCOperand::CreateReg(ARM::CPSR));
+    MI.insert(I, MCOperand::createReg(ARM::CPSR));
 
   return S;
 }
@@ -696,7 +700,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                                                raw_ostream &CS) const {
   CommentStream = &CS;
 
-  assert((STI.getFeatureBits() & ARM::ModeThumb) &&
+  assert(STI.getFeatureBits()[ARM::ModeThumb] &&
          "Asked to disassemble in Thumb mode but Subtarget is in ARM mode!");
 
   // We want to read exactly 2 bytes of data.
@@ -890,7 +894,7 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
     return MCDisassembler::Fail;
 
   unsigned Register = GPRDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return MCDisassembler::Success;
 }
 
@@ -914,7 +918,7 @@ DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
 
   if (RegNo == 15)
   {
-    Inst.addOperand(MCOperand::CreateReg(ARM::APSR_NZCV));
+    Inst.addOperand(MCOperand::createReg(ARM::APSR_NZCV));
     return MCDisassembler::Success;
   }
 
@@ -945,7 +949,7 @@ static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
      S = MCDisassembler::SoftFail;
 
   unsigned RegisterPair = GPRPairDecoderTable[RegNo/2];
-  Inst.addOperand(MCOperand::CreateReg(RegisterPair));
+  Inst.addOperand(MCOperand::createReg(RegisterPair));
   return S;
 }
 
@@ -975,7 +979,7 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
       return MCDisassembler::Fail;
     }
 
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return MCDisassembler::Success;
 }
 
@@ -1005,7 +1009,7 @@ static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
     return MCDisassembler::Fail;
 
   unsigned Register = SPRDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return MCDisassembler::Success;
 }
 
@@ -1022,15 +1026,16 @@ static const uint16_t DPRDecoderTable[] = {
 
 static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
-  uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
-                                                          .getFeatureBits();
-  bool hasD16 = featureBits & ARM::FeatureD16;
+  const FeatureBitset &featureBits =
+    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+  bool hasD16 = featureBits[ARM::FeatureD16];
 
   if (RegNo > 31 || (hasD16 && RegNo > 15))
     return MCDisassembler::Fail;
 
   unsigned Register = DPRDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return MCDisassembler::Success;
 }
 
@@ -1064,7 +1069,7 @@ static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
   RegNo >>= 1;
 
   unsigned Register = QPRDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return MCDisassembler::Success;
 }
 
@@ -1083,7 +1088,7 @@ static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
     return MCDisassembler::Fail;
 
   unsigned Register = DPairDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return MCDisassembler::Success;
 }
 
@@ -1106,7 +1111,7 @@ static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
     return MCDisassembler::Fail;
 
   unsigned Register = DPairSpacedDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return MCDisassembler::Success;
 }
 
@@ -1116,20 +1121,20 @@ static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
   // AL predicate is not allowed on Thumb1 branches.
   if (Inst.getOpcode() == ARM::tBcc && Val == 0xE)
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(Val));
+  Inst.addOperand(MCOperand::createImm(Val));
   if (Val == ARMCC::AL) {
-    Inst.addOperand(MCOperand::CreateReg(0));
+    Inst.addOperand(MCOperand::createReg(0));
   } else
-    Inst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+    Inst.addOperand(MCOperand::createReg(ARM::CPSR));
   return MCDisassembler::Success;
 }
 
 static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   if (Val)
-    Inst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+    Inst.addOperand(MCOperand::createReg(ARM::CPSR));
   else
-    Inst.addOperand(MCOperand::CreateReg(0));
+    Inst.addOperand(MCOperand::createReg(0));
   return MCDisassembler::Success;
 }
 
@@ -1165,7 +1170,7 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
     Shift = ARM_AM::rrx;
 
   unsigned Op = Shift | (imm << 3);
-  Inst.addOperand(MCOperand::CreateImm(Op));
+  Inst.addOperand(MCOperand::createImm(Op));
 
   return S;
 }
@@ -1200,7 +1205,7 @@ static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
       break;
   }
 
-  Inst.addOperand(MCOperand::CreateImm(Shift));
+  Inst.addOperand(MCOperand::createImm(Shift));
 
   return S;
 }
@@ -1314,7 +1319,7 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
   if (msb != 31) msb_mask = (1U << (msb+1)) - 1;
   uint32_t lsb_mask = (1U << lsb) - 1;
 
-  Inst.addOperand(MCOperand::CreateImm(~(msb_mask ^ lsb_mask)));
+  Inst.addOperand(MCOperand::createImm(~(msb_mask ^ lsb_mask)));
   return S;
 }
 
@@ -1369,13 +1374,13 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
       break;
   }
 
-  uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
-                                                          .getFeatureBits();
-  if ((featureBits & ARM::HasV8Ops) && (coproc != 14))
+  const FeatureBitset &featureBits =
+    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+  if (featureBits[ARM::HasV8Ops] && (coproc != 14))
     return MCDisassembler::Fail;
 
-  Inst.addOperand(MCOperand::CreateImm(coproc));
-  Inst.addOperand(MCOperand::CreateImm(CRd));
+  Inst.addOperand(MCOperand::createImm(coproc));
+  Inst.addOperand(MCOperand::createImm(CRd));
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
 
@@ -1413,7 +1418,7 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
     case ARM::STC_PRE:
     case ARM::STCL_PRE:
       imm = ARM_AM::getAM5Opc(U ? ARM_AM::add : ARM_AM::sub, imm);
-      Inst.addOperand(MCOperand::CreateImm(imm));
+      Inst.addOperand(MCOperand::createImm(imm));
       break;
     case ARM::t2LDC2_POST:
     case ARM::t2LDC2L_POST:
@@ -1436,7 +1441,7 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
     default:
       // The 'option' variant doesn't encode 'U' in the immediate since
       // the immediate is unsigned [0,255].
-      Inst.addOperand(MCOperand::CreateImm(imm));
+      Inst.addOperand(MCOperand::createImm(imm));
       break;
   }
 
@@ -1560,11 +1565,11 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
       Opc = ARM_AM::rrx;
     unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode);
 
-    Inst.addOperand(MCOperand::CreateImm(imm));
+    Inst.addOperand(MCOperand::createImm(imm));
   } else {
-    Inst.addOperand(MCOperand::CreateReg(0));
+    Inst.addOperand(MCOperand::createReg(0));
     unsigned tmp = ARM_AM::getAM2Opc(Op, imm, ARM_AM::lsl, idx_mode);
-    Inst.addOperand(MCOperand::CreateImm(tmp));
+    Inst.addOperand(MCOperand::createImm(tmp));
   }
 
   if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
@@ -1611,7 +1616,7 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
     shift = ARM_AM::getAM2Opc(ARM_AM::add, imm, ShOp);
   else
     shift = ARM_AM::getAM2Opc(ARM_AM::sub, imm, ShOp);
-  Inst.addOperand(MCOperand::CreateImm(shift));
+  Inst.addOperand(MCOperand::createImm(shift));
 
   return S;
 }
@@ -1794,12 +1799,12 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
 
   if (type) {
-    Inst.addOperand(MCOperand::CreateReg(0));
-    Inst.addOperand(MCOperand::CreateImm(U | (imm << 4) | Rm));
+    Inst.addOperand(MCOperand::createReg(0));
+    Inst.addOperand(MCOperand::createImm(U | (imm << 4) | Rm));
   } else {
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
-    Inst.addOperand(MCOperand::CreateImm(U));
+    Inst.addOperand(MCOperand::createImm(U));
   }
 
   if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
@@ -1830,7 +1835,7 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
       break;
   }
 
-  Inst.addOperand(MCOperand::CreateImm(mode));
+  Inst.addOperand(MCOperand::createImm(mode));
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
 
@@ -1932,7 +1937,7 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
         return MCDisassembler::Fail;
 
       Inst.addOperand(
-          MCOperand::CreateImm(fieldFromInstruction(Insn, 0, 4)));
+          MCOperand::createImm(fieldFromInstruction(Insn, 0, 4)));
       return S;
     }
 
@@ -1976,22 +1981,22 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
 
   if (imod && M) {
     Inst.setOpcode(ARM::CPS3p);
-    Inst.addOperand(MCOperand::CreateImm(imod));
-    Inst.addOperand(MCOperand::CreateImm(iflags));
-    Inst.addOperand(MCOperand::CreateImm(mode));
+    Inst.addOperand(MCOperand::createImm(imod));
+    Inst.addOperand(MCOperand::createImm(iflags));
+    Inst.addOperand(MCOperand::createImm(mode));
   } else if (imod && !M) {
     Inst.setOpcode(ARM::CPS2p);
-    Inst.addOperand(MCOperand::CreateImm(imod));
-    Inst.addOperand(MCOperand::CreateImm(iflags));
+    Inst.addOperand(MCOperand::createImm(imod));
+    Inst.addOperand(MCOperand::createImm(iflags));
     if (mode) S = MCDisassembler::SoftFail;
   } else if (!imod && M) {
     Inst.setOpcode(ARM::CPS1p);
-    Inst.addOperand(MCOperand::CreateImm(mode));
+    Inst.addOperand(MCOperand::createImm(mode));
     if (iflags) S = MCDisassembler::SoftFail;
   } else {
     // imod == '00' && M == '0' --> UNPREDICTABLE
     Inst.setOpcode(ARM::CPS1p);
-    Inst.addOperand(MCOperand::CreateImm(mode));
+    Inst.addOperand(MCOperand::createImm(mode));
     S = MCDisassembler::SoftFail;
   }
 
@@ -2016,17 +2021,17 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
 
   if (imod && M) {
     Inst.setOpcode(ARM::t2CPS3p);
-    Inst.addOperand(MCOperand::CreateImm(imod));
-    Inst.addOperand(MCOperand::CreateImm(iflags));
-    Inst.addOperand(MCOperand::CreateImm(mode));
+    Inst.addOperand(MCOperand::createImm(imod));
+    Inst.addOperand(MCOperand::createImm(iflags));
+    Inst.addOperand(MCOperand::createImm(mode));
   } else if (imod && !M) {
     Inst.setOpcode(ARM::t2CPS2p);
-    Inst.addOperand(MCOperand::CreateImm(imod));
-    Inst.addOperand(MCOperand::CreateImm(iflags));
+    Inst.addOperand(MCOperand::createImm(imod));
+    Inst.addOperand(MCOperand::createImm(iflags));
     if (mode) S = MCDisassembler::SoftFail;
   } else if (!imod && M) {
     Inst.setOpcode(ARM::t2CPS1p);
-    Inst.addOperand(MCOperand::CreateImm(mode));
+    Inst.addOperand(MCOperand::createImm(mode));
     if (iflags) S = MCDisassembler::SoftFail;
   } else {
     // imod == '00' && M == '0' --> this is a HINT instruction
@@ -2034,7 +2039,7 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
     // HINT are defined only for immediate in [0..4]
     if(imm > 4) return MCDisassembler::Fail;
     Inst.setOpcode(ARM::t2HINT);
-    Inst.addOperand(MCOperand::CreateImm(imm));
+    Inst.addOperand(MCOperand::createImm(imm));
   }
 
   return S;
@@ -2059,7 +2064,7 @@ static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
 
   if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(imm));
+    Inst.addOperand(MCOperand::createImm(imm));
 
   return S;
 }
@@ -2083,7 +2088,7 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
 
   if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(imm));
+    Inst.addOperand(MCOperand::createImm(imm));
 
   if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2119,6 +2124,55 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
   return S;
 }
 
+static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
+                                  uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+
+  if (Pred == 0xF)
+    return DecodeSETPANInstruction(Inst, Insn, Address, Decoder);
+
+  if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodePredicateOperand(Inst, Pred, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
+                                  uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Imm = fieldFromInstruction(Insn, 9, 1);
+
+  const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+  const FeatureBitset &FeatureBits = Dis->getSubtargetInfo().getFeatureBits();
+
+  if (!FeatureBits[ARM::HasV8_1aOps] || 
+      !FeatureBits[ARM::HasV8Ops])
+    return MCDisassembler::Fail;
+
+  // Decoder can be called from DecodeTST, which does not check the full
+  // encoding is valid.
+  if (fieldFromInstruction(Insn, 20,12) != 0xf11 ||
+      fieldFromInstruction(Insn, 4,4) != 0)
+    return MCDisassembler::Fail;
+  if (fieldFromInstruction(Insn, 10,10) != 0 ||
+      fieldFromInstruction(Insn, 0,4) != 0)
+    S = MCDisassembler::SoftFail;
+
+  Inst.setOpcode(ARM::SETPAN);
+  Inst.addOperand(MCOperand::createImm(Imm));
+
+  return S;
+}
+
 static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
@@ -2132,7 +2186,7 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
 
   if (!add) imm *= -1;
   if (imm == 0 && !add) imm = INT32_MIN;
-  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::createImm(imm));
   if (Rn == 15)
     tryAddingPcLoadReferenceComment(Address, Address + imm + 8, Decoder);
 
@@ -2151,9 +2205,9 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
     return MCDisassembler::Fail;
 
   if (U)
-    Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, imm)));
+    Inst.addOperand(MCOperand::createImm(ARM_AM::getAM5Opc(ARM_AM::add, imm)));
   else
-    Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, imm)));
+    Inst.addOperand(MCOperand::createImm(ARM_AM::getAM5Opc(ARM_AM::sub, imm)));
 
   return S;
 }
@@ -2185,7 +2239,7 @@ DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
   int imm32 = SignExtend32<25>(tmp << 1);
   if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4,
                                 true, 4, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(imm32));
+    Inst.addOperand(MCOperand::createImm(imm32));
 
   return Status;
 }
@@ -2203,13 +2257,13 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
     imm |= fieldFromInstruction(Insn, 24, 1) << 1;
     if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
                                   true, 4, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
+    Inst.addOperand(MCOperand::createImm(SignExtend32<26>(imm)));
     return S;
   }
 
   if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
                                 true, 4, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
+    Inst.addOperand(MCOperand::createImm(SignExtend32<26>(imm)));
   if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
     return MCDisassembler::Fail;
 
@@ -2227,9 +2281,9 @@ static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
   if (!align)
-    Inst.addOperand(MCOperand::CreateImm(0));
+    Inst.addOperand(MCOperand::createImm(0));
   else
-    Inst.addOperand(MCOperand::CreateImm(4 << align));
+    Inst.addOperand(MCOperand::createImm(4 << align));
 
   return S;
 }
@@ -2423,7 +2477,7 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
     case ARM::VLD2b8wb_register:
     case ARM::VLD2b16wb_register:
     case ARM::VLD2b32wb_register:
-      Inst.addOperand(MCOperand::CreateImm(0));
+      Inst.addOperand(MCOperand::createImm(0));
       break;
     case ARM::VLD3d8_UPD:
     case ARM::VLD3d16_UPD:
@@ -2458,7 +2512,7 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
     //
     // The fixed offset encodes as Rm == 0xd, so we check for that.
     if (Rm == 0xd) {
-      Inst.addOperand(MCOperand::CreateReg(0));
+      Inst.addOperand(MCOperand::createReg(0));
       break;
     }
     // Fall through to handle the register offset variant.
@@ -2624,7 +2678,7 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
     case ARM::VST2b32wb_register:
       if (Rm == 0xF)
         return MCDisassembler::Fail;
-      Inst.addOperand(MCOperand::CreateImm(0));
+      Inst.addOperand(MCOperand::createImm(0));
       break;
     case ARM::VST3d8_UPD:
     case ARM::VST3d16_UPD:
@@ -2653,7 +2707,7 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
   switch (Inst.getOpcode()) {
     default:
       if (Rm == 0xD)
-        Inst.addOperand(MCOperand::CreateReg(0));
+        Inst.addOperand(MCOperand::createReg(0));
       else if (Rm != 0xF) {
         if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
           return MCDisassembler::Fail;
@@ -2865,7 +2919,7 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(align));
+  Inst.addOperand(MCOperand::createImm(align));
 
   // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
   // variant encodes Rm == 0xf. Anything else is a register offset post-
@@ -2911,11 +2965,11 @@ static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
   }
 
   if (Rm != 0xF)
-    Inst.addOperand(MCOperand::CreateImm(0));
+    Inst.addOperand(MCOperand::createImm(0));
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(align));
+  Inst.addOperand(MCOperand::createImm(align));
 
   if (Rm != 0xD && Rm != 0xF) {
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
@@ -2948,10 +3002,10 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(0));
+  Inst.addOperand(MCOperand::createImm(0));
 
   if (Rm == 0xD)
-    Inst.addOperand(MCOperand::CreateReg(0));
+    Inst.addOperand(MCOperand::createReg(0));
   else if (Rm != 0xF) {
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
       return MCDisassembler::Fail;
@@ -3000,10 +3054,10 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(align));
+  Inst.addOperand(MCOperand::createImm(align));
 
   if (Rm == 0xD)
-    Inst.addOperand(MCOperand::CreateReg(0));
+    Inst.addOperand(MCOperand::createReg(0));
   else if (Rm != 0xF) {
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
       return MCDisassembler::Fail;
@@ -3034,7 +3088,7 @@ DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
   }
 
-  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::createImm(imm));
 
   switch (Inst.getOpcode()) {
     case ARM::VORRiv4i16:
@@ -3072,32 +3126,32 @@ static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(8 << size));
+  Inst.addOperand(MCOperand::createImm(8 << size));
 
   return S;
 }
 
 static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(8 - Val));
+  Inst.addOperand(MCOperand::createImm(8 - Val));
   return MCDisassembler::Success;
 }
 
 static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(16 - Val));
+  Inst.addOperand(MCOperand::createImm(16 - Val));
   return MCDisassembler::Success;
 }
 
 static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(32 - Val));
+  Inst.addOperand(MCOperand::createImm(32 - Val));
   return MCDisassembler::Success;
 }
 
 static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(64 - Val));
+  Inst.addOperand(MCOperand::createImm(64 - Val));
   return MCDisassembler::Success;
 }
 
@@ -3153,11 +3207,11 @@ static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
     case ARM::tADR:
       break; // tADR does not explicitly represent the PC as an operand.
     case ARM::tADDrSPi:
-      Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+      Inst.addOperand(MCOperand::createReg(ARM::SP));
       break;
   }
 
-  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::createImm(imm));
   return S;
 }
 
@@ -3165,7 +3219,7 @@ static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4,
                                 true, 2, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1)));
+    Inst.addOperand(MCOperand::createImm(SignExtend32<12>(Val << 1)));
   return MCDisassembler::Success;
 }
 
@@ -3173,7 +3227,7 @@ static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<21>(Val) + 4,
                                 true, 4, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val)));
+    Inst.addOperand(MCOperand::createImm(SignExtend32<21>(Val)));
   return MCDisassembler::Success;
 }
 
@@ -3181,7 +3235,7 @@ static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4,
                                 true, 2, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(Val << 1));
+    Inst.addOperand(MCOperand::createImm(Val << 1));
   return MCDisassembler::Success;
 }
 
@@ -3209,7 +3263,7 @@ static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
 
   if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::createImm(imm));
 
   return S;
 }
@@ -3218,7 +3272,7 @@ static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   unsigned imm = Val << 2;
 
-  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::createImm(imm));
   tryAddingPcLoadReferenceComment(Address, (Address & ~2u) + imm + 4, Decoder);
 
   return MCDisassembler::Success;
@@ -3226,8 +3280,8 @@ static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
 
 static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateReg(ARM::SP));
-  Inst.addOperand(MCOperand::CreateImm(Val));
+  Inst.addOperand(MCOperand::createReg(ARM::SP));
+  Inst.addOperand(MCOperand::createImm(Val));
 
   return MCDisassembler::Success;
 }
@@ -3255,7 +3309,7 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
     return MCDisassembler::Fail;
   if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::createImm(imm));
 
   return S;
 }
@@ -3267,10 +3321,11 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
   unsigned Rt = fieldFromInstruction(Insn, 12, 4);
   unsigned Rn = fieldFromInstruction(Insn, 16, 4);
 
-  uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
-                                                          .getFeatureBits();
-  bool hasMP = featureBits & ARM::FeatureMP;
-  bool hasV7Ops = featureBits & ARM::HasV7Ops;
+  const FeatureBitset &featureBits =
+    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+  bool hasMP = featureBits[ARM::FeatureMP];
+  bool hasV7Ops = featureBits[ARM::HasV7Ops];
 
   if (Rn == 15) {
     switch (Inst.getOpcode()) {
@@ -3353,10 +3408,11 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
   imm |= (Rn << 9);
   unsigned add = fieldFromInstruction(Insn, 9, 1);
 
-  uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
-                                                          .getFeatureBits();
-  bool hasMP = featureBits & ARM::FeatureMP;
-  bool hasV7Ops = featureBits & ARM::HasV7Ops;
+  const FeatureBitset &featureBits =
+    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+  bool hasMP = featureBits[ARM::FeatureMP];
+  bool hasV7Ops = featureBits[ARM::HasV7Ops];
 
   if (Rn == 15) {
     switch (Inst.getOpcode()) {
@@ -3433,10 +3489,11 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
   unsigned imm = fieldFromInstruction(Insn, 0, 12);
   imm |= (Rn << 13);
 
-  uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
-                                                          .getFeatureBits();
-  bool hasMP = (featureBits & ARM::FeatureMP);
-  bool hasV7Ops = (featureBits & ARM::HasV7Ops);
+  const FeatureBitset &featureBits =
+    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+  bool hasMP = featureBits[ARM::FeatureMP];
+  bool hasV7Ops = featureBits[ARM::HasV7Ops];
 
   if (Rn == 15) {
     switch (Inst.getOpcode()) {
@@ -3550,9 +3607,10 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
   unsigned U = fieldFromInstruction(Insn, 23, 1);
   int imm = fieldFromInstruction(Insn, 0, 12);
 
-  uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
-                                                          .getFeatureBits();
-  bool hasV7Ops = (featureBits & ARM::HasV7Ops);
+  const FeatureBitset &featureBits =
+    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+  bool hasV7Ops = featureBits[ARM::HasV7Ops];
 
   if (Rt == 15) {
     switch (Inst.getOpcode()) {
@@ -3589,7 +3647,7 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
     else
       imm = -imm;
   }
-  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::createImm(imm));
 
   return S;
 }
@@ -3597,12 +3655,12 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
 static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
                            uint64_t Address, const void *Decoder) {
   if (Val == 0)
-    Inst.addOperand(MCOperand::CreateImm(INT32_MIN));
+    Inst.addOperand(MCOperand::createImm(INT32_MIN));
   else {
     int imm = Val & 0xFF;
 
     if (!(Val & 0x100)) imm *= -1;
-    Inst.addOperand(MCOperand::CreateImm(imm * 4));
+    Inst.addOperand(MCOperand::createImm(imm * 4));
   }
 
   return MCDisassembler::Success;
@@ -3633,7 +3691,7 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
   if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
 
-  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::createImm(imm));
 
   return S;
 }
@@ -3645,7 +3703,7 @@ static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
     imm = INT32_MIN;
   else if (!(Val & 0x100))
     imm *= -1;
-  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::createImm(imm));
 
   return MCDisassembler::Success;
 }
@@ -3778,7 +3836,7 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::createImm(imm));
 
   return S;
 }
@@ -3788,9 +3846,9 @@ static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
                                 uint64_t Address, const void *Decoder) {
   unsigned imm = fieldFromInstruction(Insn, 0, 7);
 
-  Inst.addOperand(MCOperand::CreateReg(ARM::SP));
-  Inst.addOperand(MCOperand::CreateReg(ARM::SP));
-  Inst.addOperand(MCOperand::CreateImm(imm));
+  Inst.addOperand(MCOperand::createReg(ARM::SP));
+  Inst.addOperand(MCOperand::createReg(ARM::SP));
+  Inst.addOperand(MCOperand::createImm(imm));
 
   return MCDisassembler::Success;
 }
@@ -3805,14 +3863,14 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
 
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
     return MCDisassembler::Fail;
-    Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+    Inst.addOperand(MCOperand::createReg(ARM::SP));
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
     return MCDisassembler::Fail;
   } else if (Inst.getOpcode() == ARM::tADDspr) {
     unsigned Rm = fieldFromInstruction(Insn, 3, 4);
 
-    Inst.addOperand(MCOperand::CreateReg(ARM::SP));
-    Inst.addOperand(MCOperand::CreateReg(ARM::SP));
+    Inst.addOperand(MCOperand::createReg(ARM::SP));
+    Inst.addOperand(MCOperand::createReg(ARM::SP));
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
   }
@@ -3825,8 +3883,8 @@ static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
   unsigned imod = fieldFromInstruction(Insn, 4, 1) | 0x2;
   unsigned flags = fieldFromInstruction(Insn, 0, 3);
 
-  Inst.addOperand(MCOperand::CreateImm(imod));
-  Inst.addOperand(MCOperand::CreateImm(flags));
+  Inst.addOperand(MCOperand::createImm(imod));
+  Inst.addOperand(MCOperand::createImm(flags));
 
   return MCDisassembler::Success;
 }
@@ -3839,7 +3897,7 @@ static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
 
   if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(add));
+  Inst.addOperand(MCOperand::createImm(add));
 
   return S;
 }
@@ -3864,7 +3922,7 @@ static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
   if (!tryAddingSymbolicOperand(Address,
                                 (Address & ~2u) + imm32 + 4,
                                 true, 4, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(imm32));
+    Inst.addOperand(MCOperand::createImm(imm32));
   return MCDisassembler::Success;
 }
 
@@ -3873,12 +3931,13 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
   if (Val == 0xA || Val == 0xB)
     return MCDisassembler::Fail;
 
-  uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
-                                                          .getFeatureBits();
-  if ((featureBits & ARM::HasV8Ops) && !(Val == 14 || Val == 15))
+  const FeatureBitset &featureBits =
+    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+  if (featureBits[ARM::HasV8Ops] && !(Val == 14 || Val == 15))
     return MCDisassembler::Fail;
 
-  Inst.addOperand(MCOperand::CreateImm(Val));
+  Inst.addOperand(MCOperand::createImm(Val));
   return MCDisassembler::Success;
 }
 
@@ -3949,16 +4008,16 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
     unsigned imm = fieldFromInstruction(Val, 0, 8);
     switch (byte) {
       case 0:
-        Inst.addOperand(MCOperand::CreateImm(imm));
+        Inst.addOperand(MCOperand::createImm(imm));
         break;
       case 1:
-        Inst.addOperand(MCOperand::CreateImm((imm << 16) | imm));
+        Inst.addOperand(MCOperand::createImm((imm << 16) | imm));
         break;
       case 2:
-        Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 8)));
+        Inst.addOperand(MCOperand::createImm((imm << 24) | (imm << 8)));
         break;
       case 3:
-        Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 16) |
+        Inst.addOperand(MCOperand::createImm((imm << 24) | (imm << 16) |
                                              (imm << 8)  |  imm));
         break;
     }
@@ -3966,7 +4025,7 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
     unsigned unrot = fieldFromInstruction(Val, 0, 7) | 0x80;
     unsigned rot = fieldFromInstruction(Val, 7, 5);
     unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31));
-    Inst.addOperand(MCOperand::CreateImm(imm));
+    Inst.addOperand(MCOperand::createImm(imm));
   }
 
   return MCDisassembler::Success;
@@ -3977,7 +4036,7 @@ DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
                             uint64_t Address, const void *Decoder){
   if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4,
                                 true, 2, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(SignExtend32<9>(Val << 1)));
+    Inst.addOperand(MCOperand::createImm(SignExtend32<9>(Val << 1)));
   return MCDisassembler::Success;
 }
 
@@ -4000,7 +4059,7 @@ static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
 
   if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4,
                                 true, 4, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(imm32));
+    Inst.addOperand(MCOperand::createImm(imm32));
   return MCDisassembler::Success;
 }
 
@@ -4009,7 +4068,7 @@ static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
   if (Val & ~0xf)
     return MCDisassembler::Fail;
 
-  Inst.addOperand(MCOperand::CreateImm(Val));
+  Inst.addOperand(MCOperand::createImm(Val));
   return MCDisassembler::Success;
 }
 
@@ -4018,16 +4077,17 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val,
   if (Val & ~0xf)
     return MCDisassembler::Fail;
 
-  Inst.addOperand(MCOperand::CreateImm(Val));
+  Inst.addOperand(MCOperand::createImm(Val));
   return MCDisassembler::Success;
 }
 
 static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
                           uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
-  uint64_t FeatureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
-                                                          .getFeatureBits();
-  if (FeatureBits & ARM::FeatureMClass) {
+  const FeatureBitset &FeatureBits =
+    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+  if (FeatureBits[ARM::FeatureMClass]) {
     unsigned ValLow = Val & 0xff;
 
     // Validate the SYSm value first.
@@ -4047,7 +4107,7 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
     case 17: // basepri
     case 18: // basepri_max
     case 19: // faultmask
-      if (!(FeatureBits & ARM::HasV7Ops))
+      if (!(FeatureBits[ARM::HasV7Ops]))
         // Values basepri, basepri_max and faultmask are only valid for v7m.
         return MCDisassembler::Fail;
       break;
@@ -4057,7 +4117,7 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
 
     if (Inst.getOpcode() == ARM::t2MSR_M) {
       unsigned Mask = fieldFromInstruction(Val, 10, 2);
-      if (!(FeatureBits & ARM::HasV7Ops)) {
+      if (!(FeatureBits[ARM::HasV7Ops])) {
         // The ARMv6-M MSR bits {11-10} can be only 0b10, other values are
         // unpredictable.
         if (Mask != 2)
@@ -4071,7 +4131,7 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
         // indicates the move for the GE{3:0} bits, the mask{0} bit can be set
         // only if the processor includes the DSP extension.
         if (Mask == 0 || (Mask != 2 && ValLow > 3) ||
-            (!(FeatureBits & ARM::FeatureDSPThumb2) && (Mask & 1)))
+            (!(FeatureBits[ARM::FeatureDSPThumb2]) && (Mask & 1)))
           S = MCDisassembler::SoftFail;
       }
     }
@@ -4080,7 +4140,7 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
     if (Val == 0)
       return MCDisassembler::Fail;
   }
-  Inst.addOperand(MCOperand::CreateImm(Val));
+  Inst.addOperand(MCOperand::createImm(Val));
   return S;
 }
 
@@ -4103,7 +4163,7 @@ static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val,
       return MCDisassembler::SoftFail;
   }
 
-  Inst.addOperand(MCOperand::CreateImm(Val));
+  Inst.addOperand(MCOperand::createImm(Val));
   return MCDisassembler::Success;
 }
 
@@ -4307,18 +4367,18 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
   }
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(align));
+  Inst.addOperand(MCOperand::createImm(align));
   if (Rm != 0xF) {
     if (Rm != 0xD) {
       if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
         return MCDisassembler::Fail;
     } else
-      Inst.addOperand(MCOperand::CreateReg(0));
+      Inst.addOperand(MCOperand::createReg(0));
   }
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(index));
+  Inst.addOperand(MCOperand::createImm(index));
 
   return S;
 }
@@ -4372,18 +4432,18 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
   }
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(align));
+  Inst.addOperand(MCOperand::createImm(align));
   if (Rm != 0xF) {
     if (Rm != 0xD) {
       if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
     } else
-      Inst.addOperand(MCOperand::CreateReg(0));
+      Inst.addOperand(MCOperand::createReg(0));
   }
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(index));
+  Inst.addOperand(MCOperand::createImm(index));
 
   return S;
 }
@@ -4438,20 +4498,20 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
   }
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(align));
+  Inst.addOperand(MCOperand::createImm(align));
   if (Rm != 0xF) {
     if (Rm != 0xD) {
       if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
         return MCDisassembler::Fail;
     } else
-      Inst.addOperand(MCOperand::CreateReg(0));
+      Inst.addOperand(MCOperand::createReg(0));
   }
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(index));
+  Inst.addOperand(MCOperand::createImm(index));
 
   return S;
 }
@@ -4501,20 +4561,20 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
   }
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(align));
+  Inst.addOperand(MCOperand::createImm(align));
   if (Rm != 0xF) {
     if (Rm != 0xD) {
       if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
         return MCDisassembler::Fail;
     } else
-      Inst.addOperand(MCOperand::CreateReg(0));
+      Inst.addOperand(MCOperand::createReg(0));
   }
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(index));
+  Inst.addOperand(MCOperand::createImm(index));
 
   return S;
 }
@@ -4570,13 +4630,13 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
   }
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(align));
+  Inst.addOperand(MCOperand::createImm(align));
   if (Rm != 0xF) {
     if (Rm != 0xD) {
       if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
     } else
-      Inst.addOperand(MCOperand::CreateReg(0));
+      Inst.addOperand(MCOperand::createReg(0));
   }
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -4585,7 +4645,7 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(index));
+  Inst.addOperand(MCOperand::createImm(index));
 
   return S;
 }
@@ -4633,13 +4693,13 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
   }
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(align));
+  Inst.addOperand(MCOperand::createImm(align));
   if (Rm != 0xF) {
     if (Rm != 0xD) {
       if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
     } else
-      Inst.addOperand(MCOperand::CreateReg(0));
+      Inst.addOperand(MCOperand::createReg(0));
   }
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -4648,7 +4708,7 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(index));
+  Inst.addOperand(MCOperand::createImm(index));
 
   return S;
 }
@@ -4713,13 +4773,13 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
   }
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(align));
+  Inst.addOperand(MCOperand::createImm(align));
   if (Rm != 0xF) {
     if (Rm != 0xD) {
       if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
         return MCDisassembler::Fail;
     } else
-      Inst.addOperand(MCOperand::CreateReg(0));
+      Inst.addOperand(MCOperand::createReg(0));
   }
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -4730,7 +4790,7 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(index));
+  Inst.addOperand(MCOperand::createImm(index));
 
   return S;
 }
@@ -4785,13 +4845,13 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
   }
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(align));
+  Inst.addOperand(MCOperand::createImm(align));
   if (Rm != 0xF) {
     if (Rm != 0xD) {
       if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
     } else
-      Inst.addOperand(MCOperand::CreateReg(0));
+      Inst.addOperand(MCOperand::createReg(0));
   }
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -4802,7 +4862,7 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(index));
+  Inst.addOperand(MCOperand::createImm(index));
 
   return S;
 }
@@ -4873,8 +4933,8 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
   if (mask == 0x0)
     return MCDisassembler::Fail;
 
-  Inst.addOperand(MCOperand::CreateImm(pred));
-  Inst.addOperand(MCOperand::CreateImm(mask));
+  Inst.addOperand(MCOperand::createImm(pred));
+  Inst.addOperand(MCOperand::createImm(mask));
   return S;
 }
 
@@ -4960,7 +5020,7 @@ static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
   Val |= fieldFromInstruction(Insn, 12, 3) << 8;
   Val |= fieldFromInstruction(Insn, 26, 1) << 11;
   Val |= sign1 << 12;
-  Inst.addOperand(MCOperand::CreateImm(SignExtend32<13>(Val)));
+  Inst.addOperand(MCOperand::createImm(SignExtend32<13>(Val)));
 
   return MCDisassembler::Success;
 }
@@ -4971,8 +5031,8 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
   DecodeStatus S = MCDisassembler::Success;
 
   // Shift of "asr #32" is not allowed in Thumb2 mode.
-  if (Val == 0x20) S = MCDisassembler::SoftFail;
-  Inst.addOperand(MCOperand::CreateImm(Val));
+  if (Val == 0x20) S = MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::createImm(Val));
   return S;
 }
 
@@ -5028,7 +5088,7 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
   if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(64 - imm));
+  Inst.addOperand(MCOperand::createImm(64 - imm));
 
   return S;
 }
@@ -5058,7 +5118,7 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
   if (!Check(S, DecodeQPRRegisterClass(Inst, Vm, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(64 - imm));
+  Inst.addOperand(MCOperand::createImm(64 - imm));
 
   return S;
 }
@@ -5107,13 +5167,13 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
   if (Rt == Rt2)
     S = MCDisassembler::SoftFail;
 
-  Inst.addOperand(MCOperand::CreateImm(cop));
-  Inst.addOperand(MCOperand::CreateImm(opc1));
+  Inst.addOperand(MCOperand::createImm(cop));
+  Inst.addOperand(MCOperand::createImm(opc1));
   if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
     return MCDisassembler::Fail;
   if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder)))
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateImm(CRm));
+  Inst.addOperand(MCOperand::createImm(CRm));
 
   return S;
 }
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 16eea33..2d36c30 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -40,12 +40,12 @@ static unsigned translateShiftImm(unsigned imm) {
 
 /// Prints the shift value with an immediate value.
 static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
-                          unsigned ShImm, bool UseMarkup) {
+                             unsigned ShImm, bool UseMarkup) {
   if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm))
     return;
   O << ", ";
 
-  assert (!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
+  assert(!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
   O << getShiftOpcStr(ShOpc);
 
   if (ShOpc != ARM_AM::rrx) {
@@ -58,49 +58,52 @@ static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
   }
 }
 
-ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
-                               const MCInstrInfo &MII,
-                               const MCRegisterInfo &MRI,
-                               const MCSubtargetInfo &STI) :
-  MCInstPrinter(MAI, MII, MRI) {
-  // Initialize the set of available features.
-  setAvailableFeatures(STI.getFeatureBits());
-}
+ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                               const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
 
 void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << markup("<reg:")
-     << getRegisterName(RegNo)
-     << markup(">");
+  OS << markup("<reg:") << getRegisterName(RegNo) << markup(">");
 }
 
 void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot) {
+                               StringRef Annot, const MCSubtargetInfo &STI) {
   unsigned Opcode = MI->getOpcode();
 
-  switch(Opcode) {
+  switch (Opcode) {
 
   // Check for HINT instructions w/ canonical names.
   case ARM::HINT:
   case ARM::tHINT:
   case ARM::t2HINT:
     switch (MI->getOperand(0).getImm()) {
-    case 0: O << "\tnop"; break;
-    case 1: O << "\tyield"; break;
-    case 2: O << "\twfe"; break;
-    case 3: O << "\twfi"; break;
-    case 4: O << "\tsev"; break;
+    case 0:
+      O << "\tnop";
+      break;
+    case 1:
+      O << "\tyield";
+      break;
+    case 2:
+      O << "\twfe";
+      break;
+    case 3:
+      O << "\twfi";
+      break;
+    case 4:
+      O << "\tsev";
+      break;
     case 5:
-      if ((getAvailableFeatures() & ARM::HasV8Ops)) {
+      if (STI.getFeatureBits()[ARM::HasV8Ops]) {
         O << "\tsevl";
         break;
       } // Fallthrough for non-v8
     default:
       // Anything else should just print normally.
-      printInstruction(MI, O);
+      printInstruction(MI, STI, O);
       printAnnotation(O, Annot);
       return;
     }
-    printPredicateOperand(MI, 1, O);
+    printPredicateOperand(MI, 1, STI, O);
     if (Opcode == ARM::t2HINT)
       O << ".w";
     printAnnotation(O, Annot);
@@ -115,8 +118,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     const MCOperand &MO3 = MI->getOperand(3);
 
     O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
-    printSBitModifierOperand(MI, 6, O);
-    printPredicateOperand(MI, 4, O);
+    printSBitModifierOperand(MI, 6, STI, O);
+    printPredicateOperand(MI, 4, STI, O);
 
     O << '\t';
     printRegName(O, Dst.getReg());
@@ -137,8 +140,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     const MCOperand &MO2 = MI->getOperand(2);
 
     O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()));
-    printSBitModifierOperand(MI, 5, O);
-    printPredicateOperand(MI, 3, O);
+    printSBitModifierOperand(MI, 5, STI, O);
+    printPredicateOperand(MI, 3, STI, O);
 
     O << '\t';
     printRegName(O, Dst.getReg());
@@ -150,10 +153,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
       return;
     }
 
-    O << ", "
-      << markup("<imm:")
-      << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()))
-      << markup(">");
+    O << ", " << markup("<imm:") << "#"
+      << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) << markup(">");
     printAnnotation(O, Annot);
     return;
   }
@@ -164,11 +165,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
       // Should only print PUSH if there are at least two registers in the list.
       O << '\t' << "push";
-      printPredicateOperand(MI, 2, O);
+      printPredicateOperand(MI, 2, STI, O);
       if (Opcode == ARM::t2STMDB_UPD)
         O << ".w";
       O << '\t';
-      printRegisterList(MI, 4, O);
+      printRegisterList(MI, 4, STI, O);
       printAnnotation(O, Annot);
       return;
     } else
@@ -178,7 +179,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     if (MI->getOperand(2).getReg() == ARM::SP &&
         MI->getOperand(3).getImm() == -4) {
       O << '\t' << "push";
-      printPredicateOperand(MI, 4, O);
+      printPredicateOperand(MI, 4, STI, O);
       O << "\t{";
       printRegName(O, MI->getOperand(1).getReg());
       O << "}";
@@ -193,11 +194,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
       // Should only print POP if there are at least two registers in the list.
       O << '\t' << "pop";
-      printPredicateOperand(MI, 2, O);
+      printPredicateOperand(MI, 2, STI, O);
       if (Opcode == ARM::t2LDMIA_UPD)
         O << ".w";
       O << '\t';
-      printRegisterList(MI, 4, O);
+      printRegisterList(MI, 4, STI, O);
       printAnnotation(O, Annot);
       return;
     } else
@@ -207,7 +208,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     if (MI->getOperand(2).getReg() == ARM::SP &&
         MI->getOperand(4).getImm() == 4) {
       O << '\t' << "pop";
-      printPredicateOperand(MI, 5, O);
+      printPredicateOperand(MI, 5, STI, O);
       O << "\t{";
       printRegName(O, MI->getOperand(0).getReg());
       O << "}";
@@ -221,9 +222,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   case ARM::VSTMDDB_UPD:
     if (MI->getOperand(0).getReg() == ARM::SP) {
       O << '\t' << "vpush";
-      printPredicateOperand(MI, 2, O);
+      printPredicateOperand(MI, 2, STI, O);
       O << '\t';
-      printRegisterList(MI, 4, O);
+      printRegisterList(MI, 4, STI, O);
       printAnnotation(O, Annot);
       return;
     } else
@@ -234,9 +235,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   case ARM::VLDMDIA_UPD:
     if (MI->getOperand(0).getReg() == ARM::SP) {
       O << '\t' << "vpop";
-      printPredicateOperand(MI, 2, O);
+      printPredicateOperand(MI, 2, STI, O);
       O << '\t';
-      printRegisterList(MI, 4, O);
+      printRegisterList(MI, 4, STI, O);
       printAnnotation(O, Annot);
       return;
     } else
@@ -252,12 +253,13 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
 
     O << "\tldm";
 
-    printPredicateOperand(MI, 1, O);
+    printPredicateOperand(MI, 1, STI, O);
     O << '\t';
     printRegName(O, BaseReg);
-    if (Writeback) O << "!";
+    if (Writeback)
+      O << "!";
     O << ", ";
-    printRegisterList(MI, 3, O);
+    printRegisterList(MI, 3, STI, O);
     printAnnotation(O, Annot);
     return;
   }
@@ -268,9 +270,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   // GPRs. However, when decoding them, the two GRPs cannot be automatically
   // expressed as a GPRPair, so we have to manually merge them.
   // FIXME: We would really like to be able to tablegen'erate this.
-  case ARM::LDREXD: case ARM::STREXD:
-  case ARM::LDAEXD: case ARM::STLEXD: {
-    const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
+  case ARM::LDREXD:
+  case ARM::STREXD:
+  case ARM::LDAEXD:
+  case ARM::STLEXD: {
+    const MCRegisterClass &MRC = MRI.getRegClass(ARM::GPRRegClassID);
     bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD;
     unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
     if (MRC.contains(Reg)) {
@@ -280,28 +284,27 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
 
       if (isStore)
         NewMI.addOperand(MI->getOperand(0));
-      NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(Reg, ARM::gsub_0,
-        &MRI.getRegClass(ARM::GPRPairRegClassID)));
+      NewReg = MCOperand::createReg(MRI.getMatchingSuperReg(
+          Reg, ARM::gsub_0, &MRI.getRegClass(ARM::GPRPairRegClassID)));
       NewMI.addOperand(NewReg);
 
       // Copy the rest operands into NewMI.
-      for(unsigned i= isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
+      for (unsigned i = isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
         NewMI.addOperand(MI->getOperand(i));
-      printInstruction(&NewMI, O);
+      printInstruction(&NewMI, STI, O);
       return;
     }
     break;
   }
-    // B9.3.3 ERET (Thumb)
-    // For a target that has Virtualization Extensions, ERET is the preferred
-    // disassembly of SUBS PC, LR, #0
+  // B9.3.3 ERET (Thumb)
+  // For a target that has Virtualization Extensions, ERET is the preferred
+  // disassembly of SUBS PC, LR, #0
   case ARM::t2SUBS_PC_LR: {
-    if (MI->getNumOperands() == 3 &&
-        MI->getOperand(0).isImm() &&
+    if (MI->getNumOperands() == 3 && MI->getOperand(0).isImm() &&
         MI->getOperand(0).getImm() == 0 &&
-        (getAvailableFeatures() & ARM::FeatureVirtualization)) {
+        STI.getFeatureBits()[ARM::FeatureVirtualization]) {
       O << "\teret";
-      printPredicateOperand(MI, 1, O);
+      printPredicateOperand(MI, 1, STI, O);
       printAnnotation(O, Annot);
       return;
     }
@@ -309,20 +312,18 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   }
   }
 
-  printInstruction(MI, O);
+  printInstruction(MI, STI, O);
   printAnnotation(O, Annot);
 }
 
 void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
     unsigned Reg = Op.getReg();
     printRegName(O, Reg);
   } else if (Op.isImm()) {
-    O << markup("<imm:")
-      << '#' << formatImm(Op.getImm())
-      << markup(">");
+    O << markup("<imm:") << '#' << formatImm(Op.getImm()) << markup(">");
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
     const MCExpr *Expr = Op.getExpr();
@@ -354,6 +355,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
                                                raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   if (MO1.isExpr()) {
@@ -370,13 +372,9 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
   if (OffImm == INT32_MIN)
     OffImm = 0;
   if (isSub) {
-    O << markup("<imm:")
-      << "#-" << formatImm(-OffImm)
-      << markup(">");
+    O << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
   } else {
-    O << markup("<imm:")
-      << "#" << formatImm(OffImm)
-      << markup(">");
+    O << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
   }
   O << "]" << markup(">");
 }
@@ -387,10 +385,11 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
 //    REG REG 0,SH_OPC    - e.g. R5, ROR R3
 //    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
 void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+  const MCOperand &MO3 = MI->getOperand(OpNum + 2);
 
   printRegName(O, MO1.getReg());
 
@@ -406,9 +405,10 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   printRegName(O, MO1.getReg());
 
@@ -417,28 +417,25 @@ void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
                    ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
 }
 
-
 //===--------------------------------------------------------------------===//
 // Addressing Mode #2
 //===--------------------------------------------------------------------===//
 
 void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
-  const MCOperand &MO3 = MI->getOperand(Op+2);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+  const MCOperand &MO3 = MI->getOperand(Op + 2);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
 
   if (!MO2.getReg()) {
     if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0.
-      O << ", "
-        << markup("<imm:")
-        << "#"
+      O << ", " << markup("<imm:") << "#"
         << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
-        << ARM_AM::getAM2Offset(MO3.getImm())
-        << markup(">");
+        << ARM_AM::getAM2Offset(MO3.getImm()) << markup(">");
     }
     O << "]" << markup(">");
     return;
@@ -454,9 +451,10 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
 }
 
 void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
-                                           raw_ostream &O) {
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
   O << ", ";
@@ -465,9 +463,10 @@ void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
 }
 
 void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
-                                           raw_ostream &O) {
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
   O << ", ";
@@ -476,35 +475,35 @@ void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
 }
 
 void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, STI, O);
     return;
   }
 
 #ifndef NDEBUG
-  const MCOperand &MO3 = MI->getOperand(Op+2);
+  const MCOperand &MO3 = MI->getOperand(Op + 2);
   unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
-  assert(IdxMode != ARMII::IndexModePost &&
-         "Should be pre or offset index op");
+  assert(IdxMode != ARMII::IndexModePost && "Should be pre or offset index op");
 #endif
 
-  printAM2PreOrOffsetIndexOp(MI, Op, O);
+  printAM2PreOrOffsetIndexOp(MI, Op, STI, O);
 }
 
 void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
                                                  unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   if (!MO1.getReg()) {
     unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
-    O << markup("<imm:")
-      << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
-      << ImmOffs
+    O << markup("<imm:") << '#'
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs
       << markup(">");
     return;
   }
@@ -524,8 +523,8 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
                                                 raw_ostream &O,
                                                 bool AlwaysPrintImm0) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
-  const MCOperand &MO3 = MI->getOperand(Op+2);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+  const MCOperand &MO3 = MI->getOperand(Op + 2);
 
   O << markup("<mem:") << '[';
   printRegName(O, MO1.getReg());
@@ -537,16 +536,12 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
     return;
   }
 
-  //If the op is sub we have to print the immediate even if it is 0
+  // If the op is sub we have to print the immediate even if it is 0
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
   ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
 
   if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
-    O << ", "
-      << markup("<imm:")
-      << "#"
-      << ARM_AM::getAddrOpcStr(op)
-      << ImmOffs
+    O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(op) << ImmOffs
       << markup(">");
   }
   O << ']' << markup(">");
@@ -554,10 +549,11 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
 
 template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  if (!MO1.isReg()) {   //  For label symbolic references.
-    printOperand(MI, Op, O);
+  if (!MO1.isReg()) { //  For label symbolic references.
+    printOperand(MI, Op, STI, O);
     return;
   }
 
@@ -569,9 +565,10 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
 
 void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
                                                  unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   if (MO1.getReg()) {
     O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()));
@@ -580,56 +577,56 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
   }
 
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
-  O << markup("<imm:")
-    << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
+  O << markup("<imm:") << '#'
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
     << markup(">");
 }
 
-void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
-                                             unsigned OpNum,
+void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
+                                             const MCSubtargetInfo &STI,
                                              raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   unsigned Imm = MO.getImm();
-  O << markup("<imm:")
-    << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
+  O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
     << markup(">");
 }
 
 void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   O << (MO2.getImm() ? "" : "-");
   printRegName(O, MO1.getReg());
 }
 
-void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI,
-                                             unsigned OpNum,
-                                             raw_ostream &O) {
+void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   unsigned Imm = MO.getImm();
-  O << markup("<imm:")
-    << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
+  O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
     << markup(">");
 }
 
-
 void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
-  ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
-                                                 .getImm());
+  ARM_AM::AMSubMode Mode =
+      ARM_AM::getAM4SubMode(MI->getOperand(OpNum).getImm());
   O << ARM_AM::getAMSubModeStr(Mode);
 }
 
 template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, OpNum, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, STI, O);
     return;
   }
 
@@ -637,22 +634,19 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
   printRegName(O, MO1.getReg());
 
   unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
-  unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
+  ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm());
   if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
-    O << ", "
-      << markup("<imm:")
-      << "#"
-      << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
-      << ImmOffs * 4
-      << markup(">");
+    O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(Op)
+      << ImmOffs * 4 << markup(">");
   }
   O << "]" << markup(">");
 }
 
 void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
@@ -663,6 +657,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   O << markup("<mem:") << "[";
@@ -672,6 +667,7 @@ void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
 
 void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
                                                  unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   if (MO.getReg() == 0)
@@ -684,49 +680,47 @@ void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
 
 void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
                                                     unsigned OpNum,
+                                                    const MCSubtargetInfo &STI,
                                                     raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   uint32_t v = ~MO.getImm();
   int32_t lsb = countTrailingZeros(v);
-  int32_t width = (32 - countLeadingZeros (v)) - lsb;
+  int32_t width = (32 - countLeadingZeros(v)) - lsb;
   assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
-  O << markup("<imm:") << '#' << lsb << markup(">")
-    << ", "
-    << markup("<imm:") << '#' << width << markup(">");
+  O << markup("<imm:") << '#' << lsb << markup(">") << ", " << markup("<imm:")
+    << '#' << width << markup(">");
 }
 
 void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   unsigned val = MI->getOperand(OpNum).getImm();
-  O << ARM_MB::MemBOptToString(val, (getAvailableFeatures() & ARM::HasV8Ops));
+  O << ARM_MB::MemBOptToString(val, STI.getFeatureBits()[ARM::HasV8Ops]);
 }
 
 void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   unsigned val = MI->getOperand(OpNum).getImm();
   O << ARM_ISB::InstSyncBOptToString(val);
 }
 
 void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   unsigned ShiftOp = MI->getOperand(OpNum).getImm();
   bool isASR = (ShiftOp & (1 << 5)) != 0;
   unsigned Amt = ShiftOp & 0x1f;
   if (isASR) {
-    O << ", asr "
-      << markup("<imm:")
-      << "#" << (Amt == 0 ? 32 : Amt)
-      << markup(">");
-  }
-  else if (Amt) {
-    O << ", lsl "
-      << markup("<imm:")
-      << "#" << Amt
+    O << ", asr " << markup("<imm:") << "#" << (Amt == 0 ? 32 : Amt)
       << markup(">");
+  } else if (Amt) {
+    O << ", lsl " << markup("<imm:") << "#" << Amt << markup(">");
   }
 }
 
 void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
   if (Imm == 0)
@@ -736,6 +730,7 @@ void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
   // A shift amount of 32 is encoded as 0.
@@ -746,16 +741,19 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
   O << "{";
   for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
-    if (i != OpNum) O << ", ";
+    if (i != OpNum)
+      O << ", ";
     printRegName(O, MI->getOperand(i).getReg());
   }
   O << "}";
 }
 
 void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0));
@@ -763,8 +761,8 @@ void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
   printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1));
 }
 
-
 void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
   if (Op.getImm())
@@ -774,16 +772,16 @@ void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
   O << ARM_PROC::IModToString(Op.getImm());
 }
 
 void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O) {
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
   unsigned IFlags = Op.getImm();
-  for (int i=2; i >= 0; --i)
+  for (int i = 2; i >= 0; --i)
     if (IFlags & (1 << i))
       O << ARM_PROC::IFlagsToString(1 << i);
 
@@ -792,60 +790,114 @@ void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
   unsigned SpecRegRBit = Op.getImm() >> 4;
   unsigned Mask = Op.getImm() & 0xf;
-  uint64_t FeatureBits = getAvailableFeatures();
+  const FeatureBitset &FeatureBits = STI.getFeatureBits();
 
-  if (FeatureBits & ARM::FeatureMClass) {
+  if (FeatureBits[ARM::FeatureMClass]) {
     unsigned SYSm = Op.getImm();
     unsigned Opcode = MI->getOpcode();
 
     // For writes, handle extended mask bits if the DSP extension is present.
-    if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::FeatureDSPThumb2)) {
+    if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSPThumb2]) {
       switch (SYSm) {
-      case 0x400: O << "apsr_g"; return;
-      case 0xc00: O << "apsr_nzcvqg"; return;
-      case 0x401: O << "iapsr_g"; return;
-      case 0xc01: O << "iapsr_nzcvqg"; return;
-      case 0x402: O << "eapsr_g"; return;
-      case 0xc02: O << "eapsr_nzcvqg"; return;
-      case 0x403: O << "xpsr_g"; return;
-      case 0xc03: O << "xpsr_nzcvqg"; return;
+      case 0x400:
+        O << "apsr_g";
+        return;
+      case 0xc00:
+        O << "apsr_nzcvqg";
+        return;
+      case 0x401:
+        O << "iapsr_g";
+        return;
+      case 0xc01:
+        O << "iapsr_nzcvqg";
+        return;
+      case 0x402:
+        O << "eapsr_g";
+        return;
+      case 0xc02:
+        O << "eapsr_nzcvqg";
+        return;
+      case 0x403:
+        O << "xpsr_g";
+        return;
+      case 0xc03:
+        O << "xpsr_nzcvqg";
+        return;
       }
     }
 
     // Handle the basic 8-bit mask.
     SYSm &= 0xff;
 
-    if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::HasV7Ops)) {
+    if (Opcode == ARM::t2MSR_M && FeatureBits [ARM::HasV7Ops]) {
       // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an
       // alias for MSR APSR_nzcvq.
       switch (SYSm) {
-      case 0: O << "apsr_nzcvq"; return;
-      case 1: O << "iapsr_nzcvq"; return;
-      case 2: O << "eapsr_nzcvq"; return;
-      case 3: O << "xpsr_nzcvq"; return;
+      case 0:
+        O << "apsr_nzcvq";
+        return;
+      case 1:
+        O << "iapsr_nzcvq";
+        return;
+      case 2:
+        O << "eapsr_nzcvq";
+        return;
+      case 3:
+        O << "xpsr_nzcvq";
+        return;
       }
     }
 
     switch (SYSm) {
-    default: llvm_unreachable("Unexpected mask value!");
-    case  0: O << "apsr"; return;
-    case  1: O << "iapsr"; return;
-    case  2: O << "eapsr"; return;
-    case  3: O << "xpsr"; return;
-    case  5: O << "ipsr"; return;
-    case  6: O << "epsr"; return;
-    case  7: O << "iepsr"; return;
-    case  8: O << "msp"; return;
-    case  9: O << "psp"; return;
-    case 16: O << "primask"; return;
-    case 17: O << "basepri"; return;
-    case 18: O << "basepri_max"; return;
-    case 19: O << "faultmask"; return;
-    case 20: O << "control"; return;
+    default:
+      llvm_unreachable("Unexpected mask value!");
+    case 0:
+      O << "apsr";
+      return;
+    case 1:
+      O << "iapsr";
+      return;
+    case 2:
+      O << "eapsr";
+      return;
+    case 3:
+      O << "xpsr";
+      return;
+    case 5:
+      O << "ipsr";
+      return;
+    case 6:
+      O << "epsr";
+      return;
+    case 7:
+      O << "iepsr";
+      return;
+    case 8:
+      O << "msp";
+      return;
+    case 9:
+      O << "psp";
+      return;
+    case 16:
+      O << "primask";
+      return;
+    case 17:
+      O << "basepri";
+      return;
+    case 18:
+      O << "basepri_max";
+      return;
+    case 19:
+      O << "faultmask";
+      return;
+    case 20:
+      O << "control";
+      return;
     }
   }
 
@@ -854,10 +906,17 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
   if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
     O << "APSR_";
     switch (Mask) {
-    default: llvm_unreachable("Unexpected mask value!");
-    case 4:  O << "g"; return;
-    case 8:  O << "nzcvq"; return;
-    case 12: O << "nzcvqg"; return;
+    default:
+      llvm_unreachable("Unexpected mask value!");
+    case 4:
+      O << "g";
+      return;
+    case 8:
+      O << "nzcvq";
+      return;
+    case 12:
+      O << "nzcvqg";
+      return;
     }
   }
 
@@ -868,14 +927,19 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
 
   if (Mask) {
     O << '_';
-    if (Mask & 8) O << 'f';
-    if (Mask & 4) O << 's';
-    if (Mask & 2) O << 'x';
-    if (Mask & 1) O << 'c';
+    if (Mask & 8)
+      O << 'f';
+    if (Mask & 4)
+      O << 's';
+    if (Mask & 2)
+      O << 'x';
+    if (Mask & 1)
+      O << 'c';
   }
 }
 
 void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   uint32_t Banked = MI->getOperand(OpNum).getImm();
   uint32_t R = (Banked & 0x20) >> 5;
@@ -886,25 +950,40 @@ void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
   if (R) {
     O << "SPSR_";
 
-    switch(SysM) {
-    case 0x0e: O << "fiq"; return;
-    case 0x10: O << "irq"; return;
-    case 0x12: O << "svc"; return;
-    case 0x14: O << "abt"; return;
-    case 0x16: O << "und"; return;
-    case 0x1c: O << "mon"; return;
-    case 0x1e: O << "hyp"; return;
-    default: llvm_unreachable("Invalid banked SPSR register");
+    switch (SysM) {
+    case 0x0e:
+      O << "fiq";
+      return;
+    case 0x10:
+      O << "irq";
+      return;
+    case 0x12:
+      O << "svc";
+      return;
+    case 0x14:
+      O << "abt";
+      return;
+    case 0x16:
+      O << "und";
+      return;
+    case 0x1c:
+      O << "mon";
+      return;
+    case 0x1e:
+      O << "hyp";
+      return;
+    default:
+      llvm_unreachable("Invalid banked SPSR register");
     }
   }
 
   assert(!R && "should have dealt with SPSR regs");
   const char *RegNames[] = {
-    "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", "",
-    "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", "lr_fiq", "",
-    "lr_irq", "sp_irq", "lr_svc",  "sp_svc",  "lr_abt",  "sp_abt", "lr_und", "sp_und",
-    "",       "",       "",        "",        "lr_mon",  "sp_mon", "elr_hyp", "sp_hyp"
-  };
+      "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr",  "lr_usr",
+      "",       "r8_fiq", "r9_fiq",  "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq",
+      "lr_fiq", "",       "lr_irq",  "sp_irq",  "lr_svc",  "sp_svc",  "lr_abt",
+      "sp_abt", "lr_und", "sp_und",  "",        "",        "",        "",
+      "lr_mon", "sp_mon", "elr_hyp", "sp_hyp"};
   const char *Name = RegNames[SysM];
   assert(Name[0] && "invalid banked register operand");
 
@@ -912,6 +991,7 @@ void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
   // Handle the undefined 15 CC value here for printing so we don't abort().
@@ -923,12 +1003,14 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
 
 void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
                                                     unsigned OpNum,
+                                                    const MCSubtargetInfo &STI,
                                                     raw_ostream &O) {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
   O << ARMCondCodeToString(CC);
 }
 
 void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   if (MI->getOperand(OpNum).getReg()) {
     assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
@@ -938,33 +1020,38 @@ void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   O << MI->getOperand(OpNum).getImm();
 }
 
 void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   O << "p" << MI->getOperand(OpNum).getImm();
 }
 
 void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   O << "c" << MI->getOperand(OpNum).getImm();
 }
 
 void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   O << "{" << MI->getOperand(OpNum).getImm() << "}";
 }
 
 void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
 }
 
-template<unsigned scale>
+template <unsigned scale>
 void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
 
   if (MO.isExpr()) {
@@ -985,25 +1072,26 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
-  O << markup("<imm:")
-    << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
+  O << markup("<imm:") << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
     << markup(">");
 }
 
 void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
-  O << markup("<imm:")
-    << "#" << formatImm((Imm == 0 ? 32 : Imm))
+  O << markup("<imm:") << "#" << formatImm((Imm == 0 ? 32 : Imm))
     << markup(">");
 }
 
 void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   // (3 - the number of trailing zeros) is the number of then / else.
   unsigned Mask = MI->getOperand(OpNum).getImm();
-  unsigned Firstcond = MI->getOperand(OpNum-1).getImm();
+  unsigned Firstcond = MI->getOperand(OpNum - 1).getImm();
   unsigned CondBit0 = Firstcond & 1;
   unsigned NumTZ = countTrailingZeros(Mask);
   assert(NumTZ <= 3 && "Invalid IT mask!");
@@ -1017,12 +1105,13 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
   const MCOperand &MO2 = MI->getOperand(Op + 1);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, STI, O);
     return;
   }
 
@@ -1037,22 +1126,21 @@ void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
 
 void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
                                                     unsigned Op,
+                                                    const MCSubtargetInfo &STI,
                                                     raw_ostream &O,
                                                     unsigned Scale) {
   const MCOperand &MO1 = MI->getOperand(Op);
   const MCOperand &MO2 = MI->getOperand(Op + 1);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, STI, O);
     return;
   }
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
   if (unsigned ImmOffs = MO2.getImm()) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << formatImm(ImmOffs * Scale)
+    O << ", " << markup("<imm:") << "#" << formatImm(ImmOffs * Scale)
       << markup(">");
   }
   O << "]" << markup(">");
@@ -1060,25 +1148,29 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
 
 void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
                                                      unsigned Op,
+                                                     const MCSubtargetInfo &STI,
                                                      raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, O, 1);
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 1);
 }
 
 void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
                                                      unsigned Op,
+                                                     const MCSubtargetInfo &STI,
                                                      raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, O, 2);
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 2);
 }
 
 void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
                                                      unsigned Op,
+                                                     const MCSubtargetInfo &STI,
                                                      raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
 }
 
 void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
 }
 
 // Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
@@ -1086,9 +1178,10 @@ void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
 // REG 0   0           - e.g. R5
 // REG IMM, SH_OPC     - e.g. R5, LSL #3
 void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   unsigned Reg = MO1.getReg();
   printRegName(O, Reg);
@@ -1101,12 +1194,13 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
 
 template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
                                                raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, OpNum, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, STI, O);
     return;
   }
 
@@ -1119,26 +1213,20 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
   if (OffImm == INT32_MIN)
     OffImm = 0;
   if (isSub) {
-    O << ", "
-      << markup("<imm:")
-      << "#-" << formatImm(-OffImm)
-      << markup(">");
-  }
-  else if (AlwaysPrintImm0 || OffImm > 0) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << formatImm(OffImm)
-      << markup(">");
+    O << ", " << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
+  } else if (AlwaysPrintImm0 || OffImm > 0) {
+    O << ", " << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
   }
   O << "]" << markup(">");
 }
 
-template<bool AlwaysPrintImm0>
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
                                                 unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
@@ -1149,28 +1237,23 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
   if (OffImm == INT32_MIN)
     OffImm = 0;
   if (isSub) {
-    O << ", "
-      << markup("<imm:")
-      << "#-" << -OffImm
-      << markup(">");
+    O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
   } else if (AlwaysPrintImm0 || OffImm > 0) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << OffImm
-      << markup(">");
+    O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
   }
   O << "]" << markup(">");
 }
 
-template<bool AlwaysPrintImm0>
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
                                                   unsigned OpNum,
+                                                  const MCSubtargetInfo &STI,
                                                   raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
-  if (!MO1.isReg()) {   //  For label symbolic references.
-    printOperand(MI, OpNum, O);
+  if (!MO1.isReg()) { //  For label symbolic references.
+    printOperand(MI, OpNum, STI, O);
     return;
   }
 
@@ -1186,39 +1269,31 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
   if (OffImm == INT32_MIN)
     OffImm = 0;
   if (isSub) {
-    O << ", "
-      << markup("<imm:")
-      << "#-" << -OffImm
-      << markup(">");
+    O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
   } else if (AlwaysPrintImm0 || OffImm > 0) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << OffImm
-      << markup(">");
+    O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
   }
   O << "]" << markup(">");
 }
 
-void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
-                                                       unsigned OpNum,
-                                                       raw_ostream &O) {
+void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
   if (MO2.getImm()) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << formatImm(MO2.getImm() * 4)
+    O << ", " << markup("<imm:") << "#" << formatImm(MO2.getImm() * 4)
       << markup(">");
   }
   O << "]" << markup(">");
 }
 
-void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
-                                                      unsigned OpNum,
-                                                      raw_ostream &O) {
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   int32_t OffImm = (int32_t)MO1.getImm();
   O << ", " << markup("<imm:");
@@ -1231,9 +1306,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
   O << markup(">");
 }
 
-void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
-                                                        unsigned OpNum,
-                                                        raw_ostream &O) {
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   int32_t OffImm = (int32_t)MO1.getImm();
 
@@ -1251,10 +1326,11 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
 
 void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
                                                  unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+  const MCOperand &MO3 = MI->getOperand(OpNum + 2);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
@@ -1266,71 +1342,61 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
   unsigned ShAmt = MO3.getImm();
   if (ShAmt) {
     assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
-    O << ", lsl "
-      << markup("<imm:")
-      << "#" << ShAmt
-      << markup(">");
+    O << ", lsl " << markup("<imm:") << "#" << ShAmt << markup(">");
   }
   O << "]" << markup(">");
 }
 
 void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
-  O << markup("<imm:")
-    << '#' << ARM_AM::getFPImmFloat(MO.getImm())
+  O << markup("<imm:") << '#' << ARM_AM::getFPImmFloat(MO.getImm())
     << markup(">");
 }
 
 void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   unsigned EncodedImm = MI->getOperand(OpNum).getImm();
   unsigned EltBits;
   uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
-  O << markup("<imm:")
-    << "#0x";
+  O << markup("<imm:") << "#0x";
   O.write_hex(Val);
   O << markup(">");
 }
 
 void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
-  O << markup("<imm:")
-    << "#" << formatImm(Imm + 1)
-    << markup(">");
+  O << markup("<imm:") << "#" << formatImm(Imm + 1) << markup(">");
 }
 
 void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
   if (Imm == 0)
     return;
-  O << ", ror "
-    << markup("<imm:")
-    << "#";
-  switch (Imm) {
-  default: assert (0 && "illegal ror immediate!");
-  case 1: O << "8"; break;
-  case 2: O << "16"; break;
-  case 3: O << "24"; break;
-  }
-  O << markup(">");
+  assert(Imm <= 3 && "illegal ror immediate!");
+  O << ", ror " << markup("<imm:") << "#" << 8 * Imm << markup(">");
 }
 
 void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   MCOperand Op = MI->getOperand(OpNum);
 
   // Support for fixups (MCFixup)
   if (Op.isExpr())
-    return printOperand(MI, OpNum, O);
+    return printOperand(MI, OpNum, STI, O);
 
   unsigned Bits = Op.getImm() & 0xFF;
   unsigned Rot = (Op.getImm() & 0xF00) >> 7;
 
-  bool  PrintUnsigned = false;
-  switch (MI->getOpcode()){
+  bool PrintUnsigned = false;
+  switch (MI->getOpcode()) {
   case ARM::MOVi:
     // Movs to PC should be treated unsigned
     PrintUnsigned = (MI->getOperand(OpNum - 1).getReg() == ARM::PC);
@@ -1354,36 +1420,30 @@ void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
   }
 
   // Explicit #bits, #rot implied
-  O << "#"
-    << markup("<imm:")
-    << Bits
-    << markup(">")
-    << ", #"
-    << markup("<imm:")
-    << Rot
-    << markup(">");
+  O << "#" << markup("<imm:") << Bits << markup(">") << ", #" << markup("<imm:")
+    << Rot << markup(">");
 }
 
 void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
-  O << markup("<imm:")
-    << "#" << 16 - MI->getOperand(OpNum).getImm()
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  O << markup("<imm:") << "#" << 16 - MI->getOperand(OpNum).getImm()
     << markup(">");
 }
 
 void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
-  O << markup("<imm:")
-    << "#" << 32 - MI->getOperand(OpNum).getImm()
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  O << markup("<imm:") << "#" << 32 - MI->getOperand(OpNum).getImm()
     << markup(">");
 }
 
 void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   O << "[" << MI->getOperand(OpNum).getImm() << "]";
 }
 
 void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   O << "{";
   printRegName(O, MI->getOperand(OpNum).getReg());
@@ -1391,7 +1451,8 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
-                                          raw_ostream &O) {
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
   unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
@@ -1402,8 +1463,8 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
   O << "}";
 }
 
-void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
-                                              unsigned OpNum,
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
@@ -1416,6 +1477,7 @@ void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
 }
 
 void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
@@ -1430,6 +1492,7 @@ void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
@@ -1447,6 +1510,7 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
 
 void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
                                                 unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   O << "{";
   printRegName(O, MI->getOperand(OpNum).getReg());
@@ -1455,6 +1519,7 @@ void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
 
 void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
                                                 unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
@@ -1468,6 +1533,7 @@ void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
 
 void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
                                                   unsigned OpNum,
+                                                  const MCSubtargetInfo &STI,
                                                   raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
@@ -1482,8 +1548,9 @@ void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
 }
 
 void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
-                                                  unsigned OpNum,
-                                                  raw_ostream &O) {
+                                                 unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
+                                                 raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
   // sort order is guaranteed because they're all of the form D<n>.
@@ -1498,9 +1565,9 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
   O << "[]}";
 }
 
-void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
-                                                      unsigned OpNum,
-                                                      raw_ostream &O) {
+void ARMInstPrinter::printVectorListTwoSpacedAllLanes(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
   unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
@@ -1511,24 +1578,24 @@ void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
   O << "[]}";
 }
 
-void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
-                                                        unsigned OpNum,
-                                                        raw_ostream &O) {
+void ARMInstPrinter::printVectorListThreeSpacedAllLanes(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
   // sort order is guaranteed because they're all of the form D<n>.
   O << "{";
   printRegName(O, MI->getOperand(OpNum).getReg());
-  O  << "[], ";
+  O << "[], ";
   printRegName(O, MI->getOperand(OpNum).getReg() + 2);
   O << "[], ";
   printRegName(O, MI->getOperand(OpNum).getReg() + 4);
   O << "[]}";
 }
 
-void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
-                                                       unsigned OpNum,
-                                                       raw_ostream &O) {
+void ARMInstPrinter::printVectorListFourSpacedAllLanes(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
   // sort order is guaranteed because they're all of the form D<n>.
@@ -1545,6 +1612,7 @@ void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
 
 void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
                                                 unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
@@ -1558,9 +1626,9 @@ void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
   O << "}";
 }
 
-void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI,
-                                                unsigned OpNum,
-                                                raw_ostream &O) {
+void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
   // sort order is guaranteed because they're all of the form D<n>.
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index f179e01..3927c9f 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -24,146 +24,207 @@ class MCOperand;
 class ARMInstPrinter : public MCInstPrinter {
 public:
   ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                 const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+                 const MCRegisterInfo &MRI);
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
 
   // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 
-
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
-  void printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printSORegImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-  void printAddrModeTBB(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrModeTBH(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+
+  void printSORegRegOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSORegImmOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printAddrModeTBB(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrModeTBH(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode2Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
   void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
   void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
   template <bool AlwaysPrintImm0>
-  void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
   void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
   void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
                                   bool AlwaysPrintImm0);
   void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
-                               raw_ostream &O);
-  void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                               const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
   void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
-                               raw_ostream &O);
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
   template <bool AlwaysPrintImm0>
-  void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
   void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
 
   void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O);
-  void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printInstSyncBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMemBOption(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
 
   template <unsigned scale>
-  void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbSRImm(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbITMask(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
   void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
   void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O, unsigned Scale);
   void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O);
   void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O);
   void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O);
   void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  template<bool AlwaysPrintImm0>
+  void printT2SOOperand(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
   void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
-                                 raw_ostream &O);
-  template<bool AlwaysPrintImm0>
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
   void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
-  template<bool AlwaysPrintImm0>
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
   void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
-                                    raw_ostream &O);
+                                    const MCSubtargetInfo &STI, raw_ostream &O);
   void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum,
-                                    raw_ostream &O);
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O);
   void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O);
   void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O);
   void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
-
-  void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printBankedRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printSetendOperand(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCPSIMod(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCPSIFlag(const MCInst *MI, unsigned OpNum,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPredicateOperand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
   void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O);
   void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
-                                raw_ostream &O);
-  void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-  void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                                const MCSubtargetInfo &STI, raw_ostream &O);
+  void printRegisterList(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPImmediate(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCImmediate(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printRotImmOperand(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printModImmOperand(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printPCLabel(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
   void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
-                                 raw_ostream &O);
-  void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFBits16(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFBits32(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorIndex(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListOne(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListTwo(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
-                               raw_ostream &O);
-  void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                                const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListThree(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListFour(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum,
-                                    raw_ostream &O);
+                                    const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O);
   void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O);
   void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O);
   void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.def b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.def
deleted file mode 100644
index 9f007a0..0000000
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.def
+++ /dev/null
@@ -1,50 +0,0 @@
-//===-- ARMArchName.def - List of the ARM arch names ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the list of the supported ARM architecture names,
-// i.e. the supported value for -march= option.
-//
-//===----------------------------------------------------------------------===//
-
-// NOTE: NO INCLUDE GUARD DESIRED!
-
-#ifndef ARM_ARCH_NAME
-#error "You must define ARM_ARCH_NAME before including ARMArchName.def"
-#endif
-
-// ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH)
-ARM_ARCH_NAME("armv2",   ARMV2,   "2",       v4)
-ARM_ARCH_NAME("armv2a",  ARMV2A,  "2A",      v4)
-ARM_ARCH_NAME("armv3",   ARMV3,   "3",       v4)
-ARM_ARCH_NAME("armv3m",  ARMV3M,  "3M",      v4)
-ARM_ARCH_NAME("armv4",   ARMV4,   "4",       v4)
-ARM_ARCH_NAME("armv4t",  ARMV4T,  "4T",      v4T)
-ARM_ARCH_NAME("armv5",   ARMV5,   "5",       v5T)
-ARM_ARCH_NAME("armv5t",  ARMV5T,  "5T",      v5T)
-ARM_ARCH_NAME("armv5te", ARMV5TE, "5TE",     v5TE)
-ARM_ARCH_NAME("armv6",   ARMV6,   "6",       v6)
-ARM_ARCH_NAME("armv6j",  ARMV6J,  "6J",      v6)
-ARM_ARCH_NAME("armv6t2", ARMV6T2, "6T2",     v6T2)
-ARM_ARCH_NAME("armv6z",  ARMV6Z,  "6Z",      v6KZ)
-ARM_ARCH_NAME("armv6zk", ARMV6ZK, "6ZK",     v6KZ)
-ARM_ARCH_NAME("armv6-m", ARMV6M,  "6-M",     v6_M)
-ARM_ARCH_NAME("armv7",   ARMV7,   "7",       v7)
-ARM_ARCH_NAME("armv7-a", ARMV7A,  "7-A",     v7)
-ARM_ARCH_ALIAS("armv7a", ARMV7A)
-ARM_ARCH_NAME("armv7-r", ARMV7R,  "7-R",     v7)
-ARM_ARCH_ALIAS("armv7r", ARMV7R)
-ARM_ARCH_NAME("armv7-m", ARMV7M,  "7-M",     v7)
-ARM_ARCH_ALIAS("armv7m", ARMV7M)
-ARM_ARCH_NAME("armv8-a", ARMV8A,  "8-A",     v8)
-ARM_ARCH_ALIAS("armv8a", ARMV8A)
-ARM_ARCH_NAME("iwmmxt",  IWMMXT,  "iwmmxt",  v5TE)
-ARM_ARCH_NAME("iwmmxt2", IWMMXT2, "iwmmxt2", v5TE)
-
-#undef ARM_ARCH_NAME
-#undef ARM_ARCH_ALIAS
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.h
deleted file mode 100644
index bc05673..0000000
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.h
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- ARMArchName.h - List of the ARM arch names --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMARCHNAME_H
-#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMARCHNAME_H
-
-namespace llvm {
-namespace ARM {
-
-enum ArchKind {
-  INVALID_ARCH = 0
-
-#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) , ID
-#define ARM_ARCH_ALIAS(NAME, ID) /* empty */
-#include "ARMArchName.def"
-};
-
-} // namespace ARM
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 0b2e3b0..6c1f789 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -153,18 +153,20 @@ void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
 }
 } // end anonymous namespace
 
-static unsigned getRelaxedOpcode(unsigned Op) {
+unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const {
+  bool HasThumb2 = STI->getFeatureBits()[ARM::FeatureThumb2];
+
   switch (Op) {
   default:
     return Op;
   case ARM::tBcc:
-    return ARM::t2Bcc;
+    return HasThumb2 ? (unsigned)ARM::t2Bcc : Op;
   case ARM::tLDRpci:
-    return ARM::t2LDRpci;
+    return HasThumb2 ? (unsigned)ARM::t2LDRpci : Op;
   case ARM::tADR:
-    return ARM::t2ADR;
+    return HasThumb2 ? (unsigned)ARM::t2ADR : Op;
   case ARM::tB:
-    return ARM::t2B;
+    return HasThumb2 ? (unsigned)ARM::t2B : Op;
   case ARM::tCBZ:
     return ARM::tHINT;
   case ARM::tCBNZ:
@@ -236,9 +238,9 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
   if ((Inst.getOpcode() == ARM::tCBZ || Inst.getOpcode() == ARM::tCBNZ) &&
       RelaxedOp == ARM::tHINT) {
     Res.setOpcode(RelaxedOp);
-    Res.addOperand(MCOperand::CreateImm(0));
-    Res.addOperand(MCOperand::CreateImm(14));
-    Res.addOperand(MCOperand::CreateReg(0));
+    Res.addOperand(MCOperand::createImm(0));
+    Res.addOperand(MCOperand::createImm(14));
+    Res.addOperand(MCOperand::createReg(0));
     return;
   }
 
@@ -371,7 +373,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
       isAdd = false;
     }
     if (Ctx && Value >= 4096)
-      Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+      Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
     Value |= isAdd << 23;
 
     // Same addressing mode as fixup_arm_pcrel_10,
@@ -392,7 +394,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
       opc = 2; // 0b0010
     }
     if (Ctx && ARM_AM::getSOImmVal(Value) == -1)
-      Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+      Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
     // Encode the immediate and shift the opcode into place.
     return ARM_AM::getSOImmVal(Value) | (opc << 21);
   }
@@ -541,7 +543,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     }
     // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
     if (Ctx && Value >= 256)
-      Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+      Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
     Value = (Value & 0xf) | ((Value & 0xf0) << 4);
     return Value | (isAdd << 23);
   }
@@ -560,7 +562,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     // These values don't encode the low two bits since they're always zero.
     Value >>= 2;
     if (Ctx && Value >= 256)
-      Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+      Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
     Value |= isAdd << 23;
 
     // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
@@ -589,7 +591,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
       (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
       (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
     if (A) {
-      const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+      const MCSymbol &Sym = A->getSymbol();
       if (Asm.isThumbFunc(&Sym))
         Value |= 1;
     }
@@ -598,7 +600,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
   // the basic blocks of the same function.  Thus, we would like to resolve
   // the offset when the destination has the same MCFragment.
   if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
-    const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+    const MCSymbol &Sym = A->getSymbol();
     const MCSymbolData &SymData = Asm.getSymbolData(Sym);
     IsResolved = (SymData.getFragment() == DF);
   }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index f4f1082..4e60372 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -33,7 +33,7 @@ public:
     return ARM::NumTargetFixupKinds;
   }
 
-  bool hasNOP() const { return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0; }
+  bool hasNOP() const { return STI->getFeatureBits()[ARM::HasV6T2Ops]; }
 
   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
 
@@ -47,6 +47,8 @@ public:
   void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value, bool IsPCRel) const override;
 
+  unsigned getRelaxedOpcode(unsigned Op) const;
+
   bool mayNeedRelaxation(const MCInst &Inst) const override;
 
   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index 3bd7ab7..ebef789 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -23,7 +23,7 @@ public:
     HasDataInCodeSupport = true;
   }
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM,
                                      Subtype);
   }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 4efd325..263c4c4 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -18,7 +18,7 @@ public:
   ARMAsmBackendELF(const Target &T, StringRef TT, uint8_t OSABI, bool IsLittle)
       : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createARMELFObjectWriter(OS, OSABI, isLittle());
   }
 };
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
index 33be347..f2c4358 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -17,7 +17,7 @@ class ARMAsmBackendWinCOFF : public ARMAsmBackend {
 public:
   ARMAsmBackendWinCOFF(const Target &T, StringRef Triple)
       : ARMAsmBackend(T, Triple, true) {}
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
   }
 };
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index a821a6b..f4fedee 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -32,7 +32,7 @@ namespace {
   public:
     ARMELFObjectWriter(uint8_t OSABI);
 
-    virtual ~ARMELFObjectWriter();
+    ~ARMELFObjectWriter() override;
 
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
@@ -81,7 +81,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
   unsigned Type = 0;
   if (IsPCRel) {
     switch ((unsigned)Fixup.getKind()) {
-    default: llvm_unreachable("Unimplemented");
+    default:
+      report_fatal_error("unsupported relocation on symbol");
+      return ELF::R_ARM_NONE;
     case FK_Data_4:
       switch (Modifier) {
       default: llvm_unreachable("Unsupported Modifier");
@@ -147,7 +149,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
     }
   } else {
     switch ((unsigned)Fixup.getKind()) {
-    default: llvm_unreachable("invalid fixup kind!");
+    default:
+      report_fatal_error("unsupported relocation on symbol");
+      return ELF::R_ARM_NONE;
     case FK_Data_1:
       switch (Modifier) {
       default: llvm_unreachable("unsupported Modifier");
@@ -247,7 +251,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
   return Type;
 }
 
-MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createARMELFObjectWriter(raw_pwrite_stream &OS,
                                                uint8_t OSABI,
                                                bool IsLittleEndian) {
   MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI);
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 99b5c62..0eb5a81 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -13,8 +13,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARMArchName.h"
-#include "ARMFPUName.h"
 #include "ARMRegisterInfo.h"
 #include "ARMUnwindOpAsm.h"
 #include "llvm/ADT/StringExtras.h"
@@ -40,6 +38,7 @@
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/ARMEHABI.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/FormattedStream.h"
@@ -55,56 +54,6 @@ static std::string GetAEABIUnwindPersonalityName(unsigned Index) {
   return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str();
 }
 
-static const char *GetFPUName(unsigned ID) {
-  switch (ID) {
-  default:
-    llvm_unreachable("Unknown FPU kind");
-    break;
-#define ARM_FPU_NAME(NAME, ID) case ARM::ID: return NAME;
-#include "ARMFPUName.def"
-  }
-  return nullptr;
-}
-
-static const char *GetArchName(unsigned ID) {
-  switch (ID) {
-  default:
-    llvm_unreachable("Unknown ARCH kind");
-    break;
-#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \
-  case ARM::ID: return NAME;
-#define ARM_ARCH_ALIAS(NAME, ID) /* empty */
-#include "ARMArchName.def"
-  }
-  return nullptr;
-}
-
-static const char *GetArchDefaultCPUName(unsigned ID) {
-  switch (ID) {
-  default:
-    llvm_unreachable("Unknown ARCH kind");
-    break;
-#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \
-  case ARM::ID: return DEFAULT_CPU_NAME;
-#define ARM_ARCH_ALIAS(NAME, ID) /* empty */
-#include "ARMArchName.def"
-  }
-  return nullptr;
-}
-
-static unsigned GetArchDefaultCPUArch(unsigned ID) {
-  switch (ID) {
-  default:
-    llvm_unreachable("Unknown ARCH kind");
-    break;
-#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \
-  case ARM::ID: return ARMBuildAttrs::DEFAULT_CPU_ARCH;
-#define ARM_ARCH_ALIAS(NAME, ID) /* empty */
-#include "ARMArchName.def"
-  }
-  return 0;
-}
-
 namespace {
 
 class ARMELFStreamer;
@@ -134,6 +83,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer {
   void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
                             StringRef StrinValue) override;
   void emitArch(unsigned Arch) override;
+  void emitArchExtension(unsigned ArchExt) override;
   void emitObjectArch(unsigned Arch) override;
   void emitFPU(unsigned FPU) override;
   void emitInst(uint32_t Inst, char Suffix = '\0') override;
@@ -247,13 +197,16 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
   OS << "\n";
 }
 void ARMTargetAsmStreamer::emitArch(unsigned Arch) {
-  OS << "\t.arch\t" << GetArchName(Arch) << "\n";
+  OS << "\t.arch\t" << ARMTargetParser::getArchName(Arch) << "\n";
+}
+void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) {
+  OS << "\t.arch_extension\t" << ARMTargetParser::getArchExtName(ArchExt) << "\n";
 }
 void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) {
-  OS << "\t.object_arch\t" << GetArchName(Arch) << '\n';
+  OS << "\t.object_arch\t" << ARMTargetParser::getArchName(Arch) << '\n';
 }
 void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
-  OS << "\t.fpu\t" << GetFPUName(FPU) << "\n";
+  OS << "\t.fpu\t" << ARMTargetParser::getFPUName(FPU) << "\n";
 }
 void ARMTargetAsmStreamer::finishAttributeSection() {
 }
@@ -270,7 +223,7 @@ void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) {
   OS << "\t.inst";
   if (Suffix)
     OS << "." << Suffix;
-  OS << "\t0x" << utohexstr(Inst) << "\n";
+  OS << "\t0x" << Twine::utohexstr(Inst) << "\n";
 }
 
 void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset,
@@ -279,7 +232,7 @@ void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset,
   for (SmallVectorImpl<uint8_t>::const_iterator OCI = Opcodes.begin(),
                                                 OCE = Opcodes.end();
        OCI != OCE; ++OCI)
-    OS << ", 0x" << utohexstr(*OCI);
+    OS << ", 0x" << Twine::utohexstr(*OCI);
   OS << '\n';
 }
 
@@ -322,7 +275,7 @@ private:
   unsigned EmittedArch;
   SmallVector<AttributeItem, 64> Contents;
 
-  const MCSection *AttributeSection;
+  MCSection *AttributeSection;
 
   AttributeItem *getAttributeItem(unsigned Attribute) {
     for (size_t i = 0; i < Contents.size(); ++i)
@@ -433,8 +386,8 @@ private:
 
 public:
   ARMTargetELFStreamer(MCStreamer &S)
-    : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::INVALID_FPU),
-      Arch(ARM::INVALID_ARCH), EmittedArch(ARM::INVALID_ARCH),
+    : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::FK_INVALID),
+      Arch(ARM::AK_INVALID), EmittedArch(ARM::AK_INVALID),
       AttributeSection(nullptr) {}
 };
 
@@ -454,7 +407,7 @@ class ARMELFStreamer : public MCELFStreamer {
 public:
   friend class ARMTargetELFStreamer;
 
-  ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+  ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS,
                  MCCodeEmitter *Emitter, bool IsThumb)
       : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb),
         MappingSymbolCounter(0), LastEMS(EMS_None) {
@@ -478,8 +431,7 @@ public:
   void emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector);
   void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes);
 
-  void ChangeSection(const MCSection *Section,
-                     const MCExpr *Subsection) override {
+  void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
     // We have to keep track of the mapping symbol state of any sections we
     // use. Each one should start off as EMS_None, which is provided as the
     // default constructor by DenseMap::lookup.
@@ -555,7 +507,7 @@ public:
                      const SMLoc &Loc) override {
     if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value))
       if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4))
-        getContext().FatalError(Loc, "relocated expression must be 32-bit");
+        getContext().reportFatalError(Loc, "relocated expression must be 32-bit");
 
     EmitDataMappingSymbol();
     MCELFStreamer::EmitValueImpl(Value, Size);
@@ -607,11 +559,11 @@ private:
   }
 
   void EmitMappingSymbol(StringRef Name) {
-    MCSymbol *Start = getContext().CreateTempSymbol();
+    MCSymbol *Start = getContext().createTempSymbol();
     EmitLabel(Start);
 
     MCSymbol *Symbol =
-      getContext().GetOrCreateSymbol(Name + "." +
+      getContext().getOrCreateSymbol(Name + "." +
                                      Twine(MappingSymbolCounter++));
 
     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
@@ -735,69 +687,78 @@ void ARMTargetELFStreamer::emitObjectArch(unsigned Value) {
 void ARMTargetELFStreamer::emitArchDefaultAttributes() {
   using namespace ARMBuildAttrs;
 
-  setAttributeItem(CPU_name, GetArchDefaultCPUName(Arch), false);
-  if (EmittedArch == ARM::INVALID_ARCH)
-    setAttributeItem(CPU_arch, GetArchDefaultCPUArch(Arch), false);
+  setAttributeItem(CPU_name,
+                   ARMTargetParser::getArchDefaultCPUName(Arch),
+                   false);
+
+  if (EmittedArch == ARM::AK_INVALID)
+    setAttributeItem(CPU_arch,
+                     ARMTargetParser::getArchDefaultCPUArch(Arch),
+                     false);
   else
-    setAttributeItem(CPU_arch, GetArchDefaultCPUArch(EmittedArch), false);
+    setAttributeItem(CPU_arch,
+                     ARMTargetParser::getArchDefaultCPUArch(EmittedArch),
+                     false);
 
   switch (Arch) {
-  case ARM::ARMV2:
-  case ARM::ARMV2A:
-  case ARM::ARMV3:
-  case ARM::ARMV3M:
-  case ARM::ARMV4:
-  case ARM::ARMV5:
+  case ARM::AK_ARMV2:
+  case ARM::AK_ARMV2A:
+  case ARM::AK_ARMV3:
+  case ARM::AK_ARMV3M:
+  case ARM::AK_ARMV4:
+  case ARM::AK_ARMV5:
     setAttributeItem(ARM_ISA_use, Allowed, false);
     break;
 
-  case ARM::ARMV4T:
-  case ARM::ARMV5T:
-  case ARM::ARMV5TE:
-  case ARM::ARMV6:
-  case ARM::ARMV6J:
+  case ARM::AK_ARMV4T:
+  case ARM::AK_ARMV5T:
+  case ARM::AK_ARMV5TE:
+  case ARM::AK_ARMV6:
+  case ARM::AK_ARMV6J:
     setAttributeItem(ARM_ISA_use, Allowed, false);
     setAttributeItem(THUMB_ISA_use, Allowed, false);
     break;
 
-  case ARM::ARMV6T2:
+  case ARM::AK_ARMV6T2:
     setAttributeItem(ARM_ISA_use, Allowed, false);
     setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
     break;
 
-  case ARM::ARMV6Z:
-  case ARM::ARMV6ZK:
+  case ARM::AK_ARMV6K:
+  case ARM::AK_ARMV6Z:
+  case ARM::AK_ARMV6ZK:
     setAttributeItem(ARM_ISA_use, Allowed, false);
     setAttributeItem(THUMB_ISA_use, Allowed, false);
     setAttributeItem(Virtualization_use, AllowTZ, false);
     break;
 
-  case ARM::ARMV6M:
+  case ARM::AK_ARMV6M:
     setAttributeItem(THUMB_ISA_use, Allowed, false);
     break;
 
-  case ARM::ARMV7:
+  case ARM::AK_ARMV7:
     setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
     break;
 
-  case ARM::ARMV7A:
+  case ARM::AK_ARMV7A:
     setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
     setAttributeItem(ARM_ISA_use, Allowed, false);
     setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
     break;
 
-  case ARM::ARMV7R:
+  case ARM::AK_ARMV7R:
     setAttributeItem(CPU_arch_profile, RealTimeProfile, false);
     setAttributeItem(ARM_ISA_use, Allowed, false);
     setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
     break;
 
-  case ARM::ARMV7M:
+  case ARM::AK_ARMV7M:
     setAttributeItem(CPU_arch_profile, MicroControllerProfile, false);
     setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
     break;
 
-  case ARM::ARMV8A:
+  case ARM::AK_ARMV8A:
+  case ARM::AK_ARMV8_1A:
     setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
     setAttributeItem(ARM_ISA_use, Allowed, false);
     setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -805,13 +766,13 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
     setAttributeItem(Virtualization_use, AllowTZVirtualization, false);
     break;
 
-  case ARM::IWMMXT:
+  case ARM::AK_IWMMXT:
     setAttributeItem(ARM_ISA_use, Allowed, false);
     setAttributeItem(THUMB_ISA_use, Allowed, false);
     setAttributeItem(WMMX_arch, AllowWMMXv1, false);
     break;
 
-  case ARM::IWMMXT2:
+  case ARM::AK_IWMMXT2:
     setAttributeItem(ARM_ISA_use, Allowed, false);
     setAttributeItem(THUMB_ISA_use, Allowed, false);
     setAttributeItem(WMMX_arch, AllowWMMXv2, false);
@@ -827,38 +788,38 @@ void ARMTargetELFStreamer::emitFPU(unsigned Value) {
 }
 void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
   switch (FPU) {
-  case ARM::VFP:
-  case ARM::VFPV2:
+  case ARM::FK_VFP:
+  case ARM::FK_VFPV2:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv2,
                      /* OverwriteExisting= */ false);
     break;
 
-  case ARM::VFPV3:
+  case ARM::FK_VFPV3:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv3A,
                      /* OverwriteExisting= */ false);
     break;
 
-  case ARM::VFPV3_D16:
+  case ARM::FK_VFPV3_D16:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv3B,
                      /* OverwriteExisting= */ false);
     break;
 
-  case ARM::VFPV4:
+  case ARM::FK_VFPV4:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv4A,
                      /* OverwriteExisting= */ false);
     break;
 
-  case ARM::VFPV4_D16:
+  case ARM::FK_VFPV4_D16:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv4B,
                      /* OverwriteExisting= */ false);
     break;
 
-  case ARM::FP_ARMV8:
+  case ARM::FK_FP_ARMV8:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPARMv8A,
                      /* OverwriteExisting= */ false);
@@ -866,13 +827,13 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
 
   // FPV5_D16 is identical to FP_ARMV8 except for the number of D registers, so
   // uses the FP_ARMV8_D16 build attribute.
-  case ARM::FPV5_D16:
+  case ARM::FK_FPV5_D16:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPARMv8B,
                      /* OverwriteExisting= */ false);
     break;
 
-  case ARM::NEON:
+  case ARM::FK_NEON:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv3A,
                      /* OverwriteExisting= */ false);
@@ -881,7 +842,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
                      /* OverwriteExisting= */ false);
     break;
 
-  case ARM::NEON_VFPV4:
+  case ARM::FK_NEON_VFPV4:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv4A,
                      /* OverwriteExisting= */ false);
@@ -890,17 +851,16 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
                      /* OverwriteExisting= */ false);
     break;
 
-  case ARM::NEON_FP_ARMV8:
-  case ARM::CRYPTO_NEON_FP_ARMV8:
+  case ARM::FK_NEON_FP_ARMV8:
+  case ARM::FK_CRYPTO_NEON_FP_ARMV8:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPARMv8A,
                      /* OverwriteExisting= */ false);
-    setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
-                     ARMBuildAttrs::AllowNeonARMv8,
-                     /* OverwriteExisting= */ false);
+    // 'Advanced_SIMD_arch' must be emitted not here, but within
+    // ARMAsmPrinter::emitAttributes(), depending on hasV8Ops() and hasV8_1a()
     break;
 
-  case ARM::SOFTVFP:
+  case ARM::FK_SOFTVFP:
     break;
 
   default:
@@ -941,10 +901,10 @@ void ARMTargetELFStreamer::finishAttributeSection() {
   //   ]+
   // ]*
 
-  if (FPU != ARM::INVALID_FPU)
+  if (FPU != ARM::FK_INVALID)
     emitFPUDefaultAttributes();
 
-  if (Arch != ARM::INVALID_ARCH)
+  if (Arch != ARM::AK_INVALID)
     emitArchDefaultAttributes();
 
   if (Contents.empty())
@@ -958,11 +918,8 @@ void ARMTargetELFStreamer::finishAttributeSection() {
   if (AttributeSection) {
     Streamer.SwitchSection(AttributeSection);
   } else {
-    AttributeSection =
-      Streamer.getContext().getELFSection(".ARM.attributes",
-                                          ELF::SHT_ARM_ATTRIBUTES,
-                                          0,
-                                          SectionKind::getMetadata());
+    AttributeSection = Streamer.getContext().getELFSection(
+        ".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0);
     Streamer.SwitchSection(AttributeSection);
 
     // Format version
@@ -1007,7 +964,7 @@ void ARMTargetELFStreamer::finishAttributeSection() {
   }
 
   Contents.clear();
-  FPU = ARM::INVALID_FPU;
+  FPU = ARM::FK_INVALID;
 }
 
 void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
@@ -1067,14 +1024,13 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
   }
 
   // Get .ARM.extab or .ARM.exidx section
-  const MCSectionELF *EHSection = nullptr;
-  if (const MCSymbol *Group = FnSection.getGroup()) {
-    EHSection = getContext().getELFSection(
-      EHSecName, Type, Flags | ELF::SHF_GROUP, Kind,
-      FnSection.getEntrySize(), Group->getName());
-  } else {
-    EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind);
-  }
+  const MCSymbol *Group = FnSection.getGroup();
+  if (Group)
+    Flags |= ELF::SHF_GROUP;
+  MCSectionELF *EHSection =
+      getContext().getELFSection(EHSecName, Type, Flags, 0, Group,
+                                 FnSection.getUniqueID(), nullptr, &FnSection);
+
   assert(EHSection && "Failed to get the required EH section");
 
   // Switch to .ARM.extab or .ARM.exidx section
@@ -1099,7 +1055,7 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
 }
 void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
   MCDataFragment *Frag = getOrCreateDataFragment();
-  Frag->getFixups().push_back(MCFixup::Create(Frag->getContents().size(), Expr,
+  Frag->getFixups().push_back(MCFixup::create(Frag->getContents().size(), Expr,
                                               Kind));
 }
 
@@ -1121,7 +1077,7 @@ void ARMELFStreamer::Reset() {
 
 void ARMELFStreamer::emitFnStart() {
   assert(FnStart == nullptr);
-  FnStart = getContext().CreateTempSymbol();
+  FnStart = getContext().createTempSymbol();
   EmitLabel(FnStart);
 }
 
@@ -1180,14 +1136,14 @@ void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; }
 
 // Add the R_ARM_NONE fixup at the same position
 void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
-  const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name);
+  const MCSymbol *PersonalitySym = getContext().getOrCreateSymbol(Name);
 
   const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create(
       PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext());
 
   visitUsedExpr(*PersonalityRef);
   MCDataFragment *DF = getOrCreateDataFragment();
-  DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(),
+  DF->getFixups().push_back(MCFixup::create(DF->getContents().size(),
                                             PersonalityRef,
                                             MCFixup::getKindForSize(4, false)));
 }
@@ -1224,7 +1180,7 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) {
 
   // Create .ARM.extab label for offset in .ARM.exidx
   assert(!ExTab);
-  ExTab = getContext().CreateTempSymbol();
+  ExTab = getContext().createTempSymbol();
   EmitLabel(ExTab);
 
   // Emit personality
@@ -1347,27 +1303,30 @@ void ARMELFStreamer::emitUnwindRaw(int64_t Offset,
 
 namespace llvm {
 
-MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
-                                bool isVerboseAsm, bool useDwarfDirectory,
-                                MCInstPrinter *InstPrint, MCCodeEmitter *CE,
-                                MCAsmBackend *TAB, bool ShowInst) {
-  MCStreamer *S = llvm::createAsmStreamer(
-      Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
-  new ARMTargetAsmStreamer(*S, OS, *InstPrint, isVerboseAsm);
-  return S;
+MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S,
+                                             formatted_raw_ostream &OS,
+                                             MCInstPrinter *InstPrint,
+                                             bool isVerboseAsm) {
+  return new ARMTargetAsmStreamer(S, OS, *InstPrint, isVerboseAsm);
+}
+
+MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) {
+  return new ARMTargetStreamer(S);
 }
 
-MCStreamer *createARMNullStreamer(MCContext &Ctx) {
-  MCStreamer *S = llvm::createNullStreamer(Ctx);
-  new ARMTargetStreamer(*S);
-  return S;
+MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
+                                                const MCSubtargetInfo &STI) {
+  Triple TT(STI.getTargetTriple());
+  if (TT.getObjectFormat() == Triple::ELF)
+    return new ARMTargetELFStreamer(S);
+  return new ARMTargetStreamer(S);
 }
 
 MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                    raw_ostream &OS, MCCodeEmitter *Emitter,
-                                    bool RelaxAll, bool IsThumb) {
+                                    raw_pwrite_stream &OS,
+                                    MCCodeEmitter *Emitter, bool RelaxAll,
+                                    bool IsThumb) {
     ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
-    new ARMTargetELFStreamer(*S);
     // FIXME: This should eventually end up somewhere else where more
     // intelligent flag decisions can be made. For now we are just maintaining
     // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 66a1618..caa8736 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -59,6 +59,7 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) {
 
   // Exceptions handling
   switch (TheTriple.getOS()) {
+  case Triple::Bitrig:
   case Triple::NetBSD:
     ExceptionsType = ExceptionHandling::DwarfCFI;
     break;
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 7320f40..84bb092 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -37,8 +37,8 @@ STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
 
 namespace {
 class ARMMCCodeEmitter : public MCCodeEmitter {
-  ARMMCCodeEmitter(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION;
-  void operator=(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete;
+  void operator=(const ARMMCCodeEmitter &) = delete;
   const MCInstrInfo &MCII;
   const MCContext &CTX;
   bool IsLittleEndian;
@@ -48,13 +48,13 @@ public:
     : MCII(mcii), CTX(ctx), IsLittleEndian(IsLittle) {
   }
 
-  ~ARMMCCodeEmitter() {}
+  ~ARMMCCodeEmitter() override {}
 
   bool isThumb(const MCSubtargetInfo &STI) const {
-    return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+    return STI.getFeatureBits()[ARM::ModeThumb];
   }
   bool isThumb2(const MCSubtargetInfo &STI) const {
-    return isThumb(STI) && (STI.getFeatureBits() & ARM::FeatureThumb2) != 0;
+    return isThumb(STI) && STI.getFeatureBits()[ARM::FeatureThumb2];
   }
   bool isTargetMachO(const MCSubtargetInfo &STI) const {
     Triple TT(STI.getTargetTriple());
@@ -287,7 +287,7 @@ public:
       // See ARMELFObjectWriter::ExplicitRelSym and
       //     ARMELFObjectWriter::GetRelocTypeInner for more details.
       MCFixupKind Kind = MCFixupKind(FK_Data_4);
-      Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+      Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
       return 0;
     }
 
@@ -318,7 +318,7 @@ public:
       // See ARMELFObjectWriter::ExplicitRelSym and
       //     ARMELFObjectWriter::GetRelocTypeInner for more details.
       MCFixupKind Kind = MCFixupKind(FK_Data_4);
-      Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+      Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
       return 0;
     }
 
@@ -432,7 +432,7 @@ public:
     }
   }
 
-  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
 };
@@ -441,14 +441,12 @@ public:
 
 MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
                                               const MCRegisterInfo &MRI,
-                                              const MCSubtargetInfo &STI,
                                               MCContext &Ctx) {
   return new ARMMCCodeEmitter(MCII, Ctx, true);
 }
 
 MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
                                               const MCRegisterInfo &MRI,
-                                              const MCSubtargetInfo &STI,
                                               MCContext &Ctx) {
   return new ARMMCCodeEmitter(MCII, Ctx, false);
 }
@@ -597,7 +595,7 @@ static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
   assert(MO.isExpr() && "Unexpected branch target type!");
   const MCExpr *Expr = MO.getExpr();
   MCFixupKind Kind = MCFixupKind(FixupKind);
-  Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+  Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
 
   // All of the information is in the fixup.
   return 0;
@@ -902,7 +900,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
         Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
       else
         Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
-      Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+      Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
 
       ++MCNumCPRelocations;
     } else {
@@ -981,7 +979,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
     assert(MO.isExpr() && "Unexpected machine operand type!");
     const MCExpr *Expr = MO.getExpr();
     MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
-    Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+    Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
 
     ++MCNumCPRelocations;
   } else
@@ -1060,7 +1058,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
       break;
     }
 
-    Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
+    Fixups.push_back(MCFixup::create(0, E, Kind, MI.getLoc()));
     return 0;
   }
   // If the expression doesn't have :upper16: or :lower16: on it,
@@ -1196,7 +1194,7 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
     assert(MO.isExpr() && "Unexpected machine operand type!");
     const MCExpr *Expr = MO.getExpr();
     MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10_unscaled);
-    Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+    Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
 
     ++MCNumCPRelocations;
     return (Rn << 9) | (1 << 13);
@@ -1278,7 +1276,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
       Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
     else
       Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
-    Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+    Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
 
     ++MCNumCPRelocations;
   } else {
@@ -1668,7 +1666,7 @@ getShiftRight64Imm(const MCInst &MI, unsigned Op,
 }
 
 void ARMMCCodeEmitter::
-EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+encodeInstruction(const MCInst &MI, raw_ostream &OS,
                   SmallVectorImpl<MCFixup> &Fixups,
                   const MCSubtargetInfo &STI) const {
   // Pseudo instructions don't get encoded.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 68d32b2..5b90de3 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -10,6 +10,7 @@
 #include "ARMMCExpr.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "armmcexpr"
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 06bf6c9..a52abe7 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -26,8 +26,8 @@ private:
   const VariantKind Kind;
   const MCExpr *Expr;
 
-  explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
-    : Kind(_Kind), Expr(_Expr) {}
+  explicit ARMMCExpr(VariantKind Kind, const MCExpr *Expr)
+      : Kind(Kind), Expr(Expr) {}
 
 public:
   /// @name Construction
@@ -62,8 +62,8 @@ public:
                                  const MCFixup *Fixup) const override {
     return false;
   }
-  void visitUsedExpr(MCStreamer &Streamer) const override; 
-  const MCSection *FindAssociatedSection() const override {
+  void visitUsedExpr(MCStreamer &Streamer) const override;
+  MCSection *FindAssociatedSection() const override {
     return getSubExpr()->FindAssociatedSection();
   }
 
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index a6310e5..30deba9 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
 
 static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
                                   std::string &Info) {
-  if (STI.getFeatureBits() & llvm::ARM::HasV7Ops &&
+  if (STI.getFeatureBits()[llvm::ARM::HasV7Ops] &&
       (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) &&
       (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) &&
       // Checks for the deprecated CP15ISB encoding:
@@ -65,7 +65,7 @@ static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
 
 static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
                                  std::string &Info) {
-  if (STI.getFeatureBits() & llvm::ARM::HasV8Ops && MI.getOperand(1).isImm() &&
+  if (STI.getFeatureBits()[llvm::ARM::HasV8Ops] && MI.getOperand(1).isImm() &&
       MI.getOperand(1).getImm() != 8) {
     Info = "applying IT instruction to more than one subsequent instruction is "
            "deprecated";
@@ -77,7 +77,7 @@ static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
 
 static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
                                        std::string &Info) {
-  assert((~STI.getFeatureBits() & llvm::ARM::ModeThumb) &&
+  assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
          "cannot predicate thumb instructions");
 
   assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments");
@@ -94,7 +94,7 @@ static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
 
 static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
                                       std::string &Info) {
-  assert((~STI.getFeatureBits() & llvm::ARM::ModeThumb) &&
+  assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
          "cannot predicate thumb instructions");
 
   assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments");
@@ -153,6 +153,17 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
       // Use CPU to figure out the exact features
       ARMArchFeature = "+v8";
     break;
+  case Triple::ARMSubArch_v8_1a:
+    if (NoCPU)
+      // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
+      //      FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
+      //      FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a
+      ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
+                       "+trustzone,+t2xtpk,+crypto,+crc";
+    else
+      // Use CPU to figure out the exact features
+      ARMArchFeature = "+v8.1a";
+    break;
   case Triple::ARMSubArch_v7m:
     isThumb = true;
     if (NoCPU)
@@ -166,7 +177,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
     if (NoCPU)
       // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
       //       FeatureT2XtPk, FeatureMClass
-      ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass";
+      ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,+t2xtpk,+mclass";
     else
       // Use CPU to figure out the exact features.
       ARMArchFeature = "+v7";
@@ -195,6 +206,9 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
   case Triple::ARMSubArch_v6t2:
     ARMArchFeature = "+v6t2";
     break;
+  case Triple::ARMSubArch_v6k:
+    ARMArchFeature = "+v6k";
+    break;
   case Triple::ARMSubArch_v6m:
     isThumb = true;
     if (NoCPU)
@@ -241,7 +255,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
   std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
   if (!FS.empty()) {
     if (!ArchFS.empty())
-      ArchFS = ArchFS + "," + FS.str();
+      ArchFS = (Twine(ArchFS) + "," + FS).str();
     else
       ArchFS = FS;
   }
@@ -291,41 +305,31 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
     // Default relocation model on Darwin is PIC, not DynamicNoPIC.
     RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
   }
-  X->InitMCCodeGenInfo(RM, CM, OL);
+  X->initMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
-// This is duplicated code. Refactor this.
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
-                                    MCContext &Ctx, MCAsmBackend &MAB,
-                                    raw_ostream &OS, MCCodeEmitter *Emitter,
-                                    const MCSubtargetInfo &STI, bool RelaxAll) {
-  Triple TheTriple(TT);
+static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
+                                     MCAsmBackend &MAB, raw_pwrite_stream &OS,
+                                     MCCodeEmitter *Emitter, bool RelaxAll) {
+  return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
+                              T.getArch() == Triple::thumb);
+}
 
-  switch (TheTriple.getObjectFormat()) {
-  default: llvm_unreachable("unsupported object format");
-  case Triple::MachO: {
-    MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, false);
-    new ARMTargetStreamer(*S);
-    return S;
-  }
-  case Triple::COFF:
-    assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
-    return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS);
-  case Triple::ELF:
-    return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
-                                TheTriple.getArch() == Triple::thumb);
-  }
+static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB,
+                                          raw_pwrite_stream &OS,
+                                          MCCodeEmitter *Emitter, bool RelaxAll,
+                                          bool DWARFMustBeAtTheEnd) {
+  return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd);
 }
 
-static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+static MCInstPrinter *createARMMCInstPrinter(const Triple &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI,
                                              const MCInstrInfo &MII,
-                                             const MCRegisterInfo &MRI,
-                                             const MCSubtargetInfo &STI) {
+                                             const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
-    return new ARMInstPrinter(MAI, MII, MRI, STI);
+    return new ARMInstPrinter(MAI, MII, MRI);
   return nullptr;
 }
 
@@ -379,61 +383,53 @@ static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
 
 // Force static initialization.
 extern "C" void LLVMInitializeARMTargetMC() {
-  // Register the MC asm info.
-  RegisterMCAsmInfoFn X(TheARMLETarget, createARMMCAsmInfo);
-  RegisterMCAsmInfoFn Y(TheARMBETarget, createARMMCAsmInfo);
-  RegisterMCAsmInfoFn A(TheThumbLETarget, createARMMCAsmInfo);
-  RegisterMCAsmInfoFn B(TheThumbBETarget, createARMMCAsmInfo);
-
-  // Register the MC codegen info.
-  TargetRegistry::RegisterMCCodeGenInfo(TheARMLETarget, createARMMCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheARMBETarget, createARMMCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget,
-                                        createARMMCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget,
-                                        createARMMCCodeGenInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheARMLETarget, createARMMCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheARMBETarget, createARMMCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheThumbLETarget, createARMMCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheThumbBETarget, createARMMCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheARMLETarget, createARMMCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheARMBETarget, createARMMCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheThumbLETarget, createARMMCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheThumbBETarget, createARMMCRegisterInfo);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheARMLETarget,
-                                          ARM_MC::createARMMCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheARMBETarget,
-                                          ARM_MC::createARMMCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheThumbLETarget,
-                                          ARM_MC::createARMMCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheThumbBETarget,
-                                          ARM_MC::createARMMCSubtargetInfo);
-
-  // Register the MC instruction analyzer.
-  TargetRegistry::RegisterMCInstrAnalysis(TheARMLETarget,
-                                          createARMMCInstrAnalysis);
-  TargetRegistry::RegisterMCInstrAnalysis(TheARMBETarget,
-                                          createARMMCInstrAnalysis);
-  TargetRegistry::RegisterMCInstrAnalysis(TheThumbLETarget,
-                                          createARMMCInstrAnalysis);
-  TargetRegistry::RegisterMCInstrAnalysis(TheThumbBETarget,
-                                          createARMMCInstrAnalysis);
+  for (Target *T : {&TheARMLETarget, &TheARMBETarget, &TheThumbLETarget,
+                    &TheThumbBETarget}) {
+    // Register the MC asm info.
+    RegisterMCAsmInfoFn X(*T, createARMMCAsmInfo);
+
+    // Register the MC codegen info.
+    TargetRegistry::RegisterMCCodeGenInfo(*T, createARMMCCodeGenInfo);
+
+    // Register the MC instruction info.
+    TargetRegistry::RegisterMCInstrInfo(*T, createARMMCInstrInfo);
+
+    // Register the MC register info.
+    TargetRegistry::RegisterMCRegInfo(*T, createARMMCRegisterInfo);
+
+    // Register the MC subtarget info.
+    TargetRegistry::RegisterMCSubtargetInfo(*T,
+                                            ARM_MC::createARMMCSubtargetInfo);
+
+    // Register the MC instruction analyzer.
+    TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis);
+
+    TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
+    TargetRegistry::RegisterCOFFStreamer(*T, createARMWinCOFFStreamer);
+    TargetRegistry::RegisterMachOStreamer(*T, createARMMachOStreamer);
+
+    // Register the obj target streamer.
+    TargetRegistry::RegisterObjectTargetStreamer(*T,
+                                                 createARMObjectTargetStreamer);
+
+    // Register the asm streamer.
+    TargetRegistry::RegisterAsmTargetStreamer(*T, createARMTargetAsmStreamer);
+
+    // Register the null TargetStreamer.
+    TargetRegistry::RegisterNullTargetStreamer(*T, createARMNullTargetStreamer);
+
+    // Register the MCInstPrinter.
+    TargetRegistry::RegisterMCInstPrinter(*T, createARMMCInstPrinter);
+
+    // Register the MC relocation info.
+    TargetRegistry::RegisterMCRelocationInfo(*T, createARMMCRelocationInfo);
+  }
 
   // Register the MC Code Emitter
-  TargetRegistry::RegisterMCCodeEmitter(TheARMLETarget,
-                                        createARMLEMCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(TheARMBETarget,
-                                        createARMBEMCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(TheThumbLETarget,
-                                        createARMLEMCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(TheThumbBETarget,
-                                        createARMBEMCCodeEmitter);
+  for (Target *T : {&TheARMLETarget, &TheThumbLETarget})
+    TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter);
+  for (Target *T : {&TheARMBETarget, &TheThumbBETarget})
+    TargetRegistry::RegisterMCCodeEmitter(*T, createARMBEMCCodeEmitter);
 
   // Register the asm backend.
   TargetRegistry::RegisterMCAsmBackend(TheARMLETarget, createARMLEAsmBackend);
@@ -442,40 +438,4 @@ extern "C" void LLVMInitializeARMTargetMC() {
                                        createThumbLEAsmBackend);
   TargetRegistry::RegisterMCAsmBackend(TheThumbBETarget,
                                        createThumbBEAsmBackend);
-
-  // Register the object streamer.
-  TargetRegistry::RegisterMCObjectStreamer(TheARMLETarget, createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheARMBETarget, createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheThumbLETarget, createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheThumbBETarget, createMCStreamer);
-
-  // Register the asm streamer.
-  TargetRegistry::RegisterAsmStreamer(TheARMLETarget, createMCAsmStreamer);
-  TargetRegistry::RegisterAsmStreamer(TheARMBETarget, createMCAsmStreamer);
-  TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer);
-  TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer);
-
-  // Register the null streamer.
-  TargetRegistry::RegisterNullStreamer(TheARMLETarget, createARMNullStreamer);
-  TargetRegistry::RegisterNullStreamer(TheARMBETarget, createARMNullStreamer);
-  TargetRegistry::RegisterNullStreamer(TheThumbLETarget, createARMNullStreamer);
-  TargetRegistry::RegisterNullStreamer(TheThumbBETarget, createARMNullStreamer);
-
-  // Register the MCInstPrinter.
-  TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheARMBETarget, createARMMCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheThumbLETarget,
-                                        createARMMCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheThumbBETarget,
-                                        createARMMCInstPrinter);
-
-  // Register the MC relocation info.
-  TargetRegistry::RegisterMCRelocationInfo(TheARMLETarget,
-                                           createARMMCRelocationInfo);
-  TargetRegistry::RegisterMCRelocationInfo(TheARMBETarget,
-                                           createARMMCRelocationInfo);
-  TargetRegistry::RegisterMCRelocationInfo(TheThumbLETarget,
-                                           createARMMCRelocationInfo);
-  TargetRegistry::RegisterMCRelocationInfo(TheThumbBETarget,
-                                           createARMMCRelocationInfo);
 }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index a6c20d5..24ca567 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -29,9 +29,12 @@ class MCRegisterInfo;
 class MCSubtargetInfo;
 class MCStreamer;
 class MCRelocationInfo;
+class MCTargetStreamer;
 class StringRef;
 class Target;
+class Triple;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheARMLETarget, TheThumbLETarget;
 extern Target TheARMBETarget, TheThumbBETarget;
@@ -39,28 +42,26 @@ extern Target TheARMBETarget, TheThumbBETarget;
 namespace ARM_MC {
   std::string ParseARMTriple(StringRef TT, StringRef CPU);
 
-  /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance.
-  /// This is exposed so Asm parser, etc. do not need to go through
-  /// TargetRegistry.
+  /// Create a ARM MCSubtargetInfo instance. This is exposed so Asm parser, etc.
+  /// do not need to go through TargetRegistry.
   MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
                                             StringRef FS);
 }
 
-MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
-                                bool isVerboseAsm, bool useDwarfDirectory,
-                                MCInstPrinter *InstPrint, MCCodeEmitter *CE,
-                                MCAsmBackend *TAB, bool ShowInst);
-
-MCStreamer *createARMNullStreamer(MCContext &Ctx);
+MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S);
+MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S,
+                                             formatted_raw_ostream &OS,
+                                             MCInstPrinter *InstPrint,
+                                             bool isVerboseAsm);
+MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
+                                                const MCSubtargetInfo &STI);
 
 MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
                                         const MCRegisterInfo &MRI,
-                                        const MCSubtargetInfo &STI,
                                         MCContext &Ctx);
 
 MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
                                         const MCRegisterInfo &MRI,
-                                        const MCSubtargetInfo &STI,
                                         MCContext &Ctx);
 
 MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
@@ -79,26 +80,26 @@ MCAsmBackend *createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI
 MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
                                       StringRef TT, StringRef CPU);
 
-/// createARMWinCOFFStreamer - Construct a PE/COFF machine code streamer which
-/// will generate a PE/COFF object file.
+// Construct a PE/COFF machine code streamer which will generate a PE/COFF
+// object file.
 MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                     MCCodeEmitter &Emitter, raw_ostream &OS);
+                                     raw_pwrite_stream &OS,
+                                     MCCodeEmitter *Emitter, bool RelaxAll);
 
-/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
-MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
-                                         uint8_t OSABI,
+/// Construct an ELF Mach-O object writer.
+MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
                                          bool IsLittleEndian);
 
-/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
-MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
-                                          bool Is64Bit,
+/// Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                           uint32_t CPUType,
                                           uint32_t CPUSubtype);
 
-/// createARMWinCOFFObjectWriter - Construct an ARM PE/COFF object writer.
-MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
+/// Construct an ARM PE/COFF object writer.
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
+                                             bool Is64Bit);
 
-/// createARMMachORelocationInfo - Construct ARM Mach-O relocation info.
+/// Construct ARM Mach-O relocation info.
 MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 7da5003..9755330 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MachO.h"
@@ -44,9 +45,8 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
 
   bool requiresExternRelocation(MachObjectWriter *Writer,
                                 const MCAssembler &Asm,
-                                const MCFragment &Fragment,
-                                unsigned RelocType, const MCSymbolData *SD,
-                                uint64_t FixedValue);
+                                const MCFragment &Fragment, unsigned RelocType,
+                                const MCSymbol &S, uint64_t FixedValue);
 
 public:
   ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
@@ -54,10 +54,10 @@ public:
     : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
                                /*UseAggressiveSymbolFolding=*/true) {}
 
-  void RecordRelocation(MachObjectWriter *Writer,
-                        const MCAssembler &Asm, const MCAsmLayout &Layout,
-                        const MCFragment *Fragment, const MCFixup &Fixup,
-                        MCValue Target, uint64_t &FixedValue) override;
+  void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+                        const MCAsmLayout &Layout, const MCFragment *Fragment,
+                        const MCFixup &Fixup, MCValue Target,
+                        uint64_t &FixedValue) override;
 };
 }
 
@@ -88,6 +88,7 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
   case ARM::fixup_arm_ldst_pcrel_12:
   case ARM::fixup_arm_pcrel_10:
   case ARM::fixup_arm_adr_pcrel_12:
+  case ARM::fixup_arm_thumb_br:
     return false;
 
     // Handle 24-bit branch kinds.
@@ -101,12 +102,6 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
     Log2Size = llvm::Log2_32(4);
     return true;
 
-    // Handle Thumb branches.
-  case ARM::fixup_arm_thumb_br:
-    RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22);
-    Log2Size = llvm::Log2_32(2);
-    return true;
-
   case ARM::fixup_t2_uncondbranch:
   case ARM::fixup_arm_thumb_bl:
   case ARM::fixup_arm_thumb_blx:
@@ -160,27 +155,27 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
   const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
 
   if (!A_SD->getFragment())
-    Asm.getContext().FatalError(Fixup.getLoc(),
+    Asm.getContext().reportFatalError(Fixup.getLoc(),
                        "symbol '" + A->getName() +
                        "' can not be undefined in a subtraction expression");
 
-  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+  uint32_t Value = Writer->getSymbolAddress(*A, Layout);
   uint32_t Value2 = 0;
   uint64_t SecAddr =
-    Writer->getSectionAddress(A_SD->getFragment()->getParent());
+      Writer->getSectionAddress(A_SD->getFragment()->getParent());
   FixedValue += SecAddr;
 
   if (const MCSymbolRefExpr *B = Target.getSymB()) {
     const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
 
     if (!B_SD->getFragment())
-      Asm.getContext().FatalError(Fixup.getLoc(),
+      Asm.getContext().reportFatalError(Fixup.getLoc(),
                          "symbol '" + B->getSymbol().getName() +
                          "' can not be undefined in a subtraction expression");
 
     // Select the appropriate difference relocation type.
     Type = MachO::ARM_RELOC_HALF_SECTDIFF;
-    Value2 = Writer->getSymbolAddress(B_SD, Layout);
+    Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
     FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
   }
 
@@ -232,7 +227,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
                    (IsPCRel               << 30) |
                    MachO::R_SCATTERED);
     MRE.r_word1 = Value2;
-    Writer->addRelocation(Fragment->getParent(), MRE);
+    Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
   }
 
   MachO::any_relocation_info MRE;
@@ -243,7 +238,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
                  (IsPCRel     << 30) |
                  MachO::R_SCATTERED);
   MRE.r_word1 = Value;
-  Writer->addRelocation(Fragment->getParent(), MRE);
+  Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
 }
 
 void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
@@ -263,12 +258,13 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
   const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
 
   if (!A_SD->getFragment())
-    Asm.getContext().FatalError(Fixup.getLoc(),
+    Asm.getContext().reportFatalError(Fixup.getLoc(),
                        "symbol '" + A->getName() +
                        "' can not be undefined in a subtraction expression");
 
-  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
-  uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+  uint32_t Value = Writer->getSymbolAddress(*A, Layout);
+  uint64_t SecAddr =
+      Writer->getSectionAddress(A_SD->getFragment()->getParent());
   FixedValue += SecAddr;
   uint32_t Value2 = 0;
 
@@ -277,13 +273,13 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
     const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
 
     if (!B_SD->getFragment())
-      Asm.getContext().FatalError(Fixup.getLoc(),
+      Asm.getContext().reportFatalError(Fixup.getLoc(),
                          "symbol '" + B->getSymbol().getName() +
                          "' can not be undefined in a subtraction expression");
 
     // Select the appropriate difference relocation type.
     Type = MachO::ARM_RELOC_SECTDIFF;
-    Value2 = Writer->getSymbolAddress(B_SD, Layout);
+    Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
     FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
   }
 
@@ -297,7 +293,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
                    (IsPCRel               << 30) |
                    MachO::R_SCATTERED);
     MRE.r_word1 = Value2;
-    Writer->addRelocation(Fragment->getParent(), MRE);
+    Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
   }
 
   MachO::any_relocation_info MRE;
@@ -307,17 +303,17 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
                  (IsPCRel     << 30) |
                  MachO::R_SCATTERED);
   MRE.r_word1 = Value;
-  Writer->addRelocation(Fragment->getParent(), MRE);
+  Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
 }
 
 bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
                                                    const MCAssembler &Asm,
                                                    const MCFragment &Fragment,
                                                    unsigned RelocType,
-                                                   const MCSymbolData *SD,
+                                                   const MCSymbol &S,
                                                    uint64_t FixedValue) {
   // Most cases can be identified purely from the symbol.
-  if (Writer->doesSymbolRequireExternRelocation(SD))
+  if (Writer->doesSymbolRequireExternRelocation(S))
     return true;
   int64_t Value = (int64_t)FixedValue;  // The displacement is signed.
   int64_t Range;
@@ -339,9 +335,7 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
   // BL/BLX also use external relocations when an internal relocation
   // would result in the target being out of range. This gives the linker
   // enough information to generate a branch island.
-  const MCSectionData &SymSD = Asm.getSectionData(
-    SD->getSymbol().getSection());
-  Value += Writer->getSectionAddress(&SymSD);
+  Value += Writer->getSectionAddress(&S.getSection());
   Value -= Writer->getSectionAddress(Fragment.getParent());
   // If the resultant value would be out of range for an internal relocation,
   // use an external instead.
@@ -351,11 +345,10 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
 }
 
 void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
-                                           const MCAssembler &Asm,
+                                           MCAssembler &Asm,
                                            const MCAsmLayout &Layout,
                                            const MCFragment *Fragment,
-                                           const MCFixup &Fixup,
-                                           MCValue Target,
+                                           const MCFixup &Fixup, MCValue Target,
                                            uint64_t &FixedValue) {
   unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
   unsigned Log2Size;
@@ -365,7 +358,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
     // relocation type for the fixup kind. This happens when it's a fixup that's
     // expected to always be resolvable at assembly time and not have any
     // relocations needed.
-    Asm.getContext().FatalError(Fixup.getLoc(),
+    Asm.getContext().reportFatalError(Fixup.getLoc(),
                                 "unsupported relocation on symbol");
 
   // If this is a difference or a defined symbol plus an offset, then we need a
@@ -381,9 +374,9 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
   }
 
   // Get the symbol data, if any.
-  const MCSymbolData *SD = nullptr;
+  const MCSymbol *A = nullptr;
   if (Target.getSymA())
-    SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+    A = &Target.getSymA()->getSymbol();
 
   // FIXME: For other platforms, we need to use scattered relocations for
   // internal relocations with offsets.  If this is an internal relocation with
@@ -393,7 +386,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
   uint32_t Offset = Target.getConstant();
   if (IsPCRel && RelocType == MachO::ARM_RELOC_VANILLA)
     Offset += 1 << Log2Size;
-  if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
+  if (Offset && A && !Writer->doesSymbolRequireExternRelocation(*A))
     return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
                                         Target, RelocType, Log2Size,
                                         FixedValue);
@@ -401,8 +394,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
   // See <reloc.h>.
   uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
   unsigned Index = 0;
-  unsigned IsExtern = 0;
   unsigned Type = 0;
+  const MCSymbol *RelSymbol = nullptr;
 
   if (Target.isAbsolute()) { // constant
     // FIXME!
@@ -410,32 +403,30 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
                        "not yet implemented");
   } else {
     // Resolve constant variables.
-    if (SD->getSymbol().isVariable()) {
+    if (A->isVariable()) {
       int64_t Res;
-      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
-            Res, Layout, Writer->getSectionAddressMap())) {
+      if (A->getVariableValue()->EvaluateAsAbsolute(
+              Res, Layout, Writer->getSectionAddressMap())) {
         FixedValue = Res;
         return;
       }
     }
 
     // Check whether we need an external or internal relocation.
-    if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD,
+    if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, *A,
                                  FixedValue)) {
-      IsExtern = 1;
-      Index = SD->getIndex();
+      RelSymbol = A;
 
       // For external relocations, make sure to offset the fixup value to
       // compensate for the addend of the symbol address, if it was
       // undefined. This occurs with weak definitions, for example.
-      if (!SD->getSymbol().isUndefined())
-        FixedValue -= Layout.getSymbolOffset(SD);
+      if (!A->isUndefined())
+        FixedValue -= Layout.getSymbolOffset(*A);
     } else {
       // The index is the section ordinal (1-based).
-      const MCSectionData &SymSD = Asm.getSectionData(
-        SD->getSymbol().getSection());
-      Index = SymSD.getOrdinal() + 1;
-      FixedValue += Writer->getSectionAddress(&SymSD);
+      const MCSection &Sec = A->getSection();
+      Index = Sec.getOrdinal() + 1;
+      FixedValue += Writer->getSectionAddress(&Sec);
     }
     if (IsPCRel)
       FixedValue -= Writer->getSectionAddress(Fragment->getParent());
@@ -447,11 +438,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
   // struct relocation_info (8 bytes)
   MachO::any_relocation_info MRE;
   MRE.r_word0 = FixupOffset;
-  MRE.r_word1 = ((Index     <<  0) |
-                 (IsPCRel   << 24) |
-                 (Log2Size  << 25) |
-                 (IsExtern  << 27) |
-                 (Type      << 28));
+  MRE.r_word1 =
+      (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
 
   // Even when it's not a scattered relocation, movw/movt always uses
   // a PAIR relocation.
@@ -476,15 +464,14 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
                        (Log2Size              << 25) |
                        (MachO::ARM_RELOC_PAIR << 28));
 
-    Writer->addRelocation(Fragment->getParent(), MREPair);
+    Writer->addRelocation(nullptr, Fragment->getParent(), MREPair);
   }
 
-  Writer->addRelocation(Fragment->getParent(), MRE);
+  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 }
 
-MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
-                                                bool Is64Bit,
-                                                uint32_t CPUType,
+MCObjectWriter *llvm::createARMMachObjectWriter(raw_pwrite_stream &OS,
+                                                bool Is64Bit, uint32_t CPUType,
                                                 uint32_t CPUSubtype) {
   return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit,
                                                         CPUType,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 8acd7af..b680db5 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -63,6 +63,7 @@ void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute,
                                              unsigned IntValue,
                                              StringRef StringValue) {}
 void ARMTargetStreamer::emitArch(unsigned Arch) {}
+void ARMTargetStreamer::emitArchExtension(unsigned ArchExt) {}
 void ARMTargetStreamer::emitObjectArch(unsigned Arch) {}
 void ARMTargetStreamer::emitFPU(unsigned FPU) {}
 void ARMTargetStreamer::finishAttributeSection() {}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 593fe34..173cc93 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -72,14 +72,10 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
     // opcode when r4 is not in .save directive.
 
     // Compute the consecutive registers from r4 to r11.
-    uint32_t Range = 0;
-    uint32_t Mask = (1u << 4);
-    for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) {
-      if ((RegSave & Bit) == 0u)
-        break;
-      ++Range;
-      Mask |= Bit;
-    }
+    uint32_t Mask = RegSave & 0xff0u;
+    uint32_t Range = countTrailingOnes(Mask >> 5); // Exclude r4.
+    // Mask off non-consecutive registers. Keep r4.
+    Mask &= ~(0xffffffe0u << Range);
 
     // Emit this opcode when the mask covers every registers.
     uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask);
@@ -105,50 +101,24 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
 
 /// Emit unwind opcodes for .vsave directives
 void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
-  size_t i = 32;
-
-  while (i > 16) {
-    uint32_t Bit = 1u << (i - 1);
-    if ((VFPRegSave & Bit) == 0u) {
-      --i;
-      continue;
-    }
-
-    uint32_t Range = 0;
-
-    --i;
-    Bit >>= 1;
-
-    while (i > 16 && (VFPRegSave & Bit)) {
-      --i;
-      ++Range;
-      Bit >>= 1;
+  // We only have 4 bits to save the offset in the opcode so look at the lower
+  // and upper 16 bits separately.
+  for (uint32_t Regs : {VFPRegSave & 0xffff0000u, VFPRegSave & 0x0000ffffu}) {
+    while (Regs) {
+      // Now look for a run of set bits. Remember the MSB and LSB of the run.
+      auto RangeMSB = 32 - countLeadingZeros(Regs);
+      auto RangeLen = countLeadingOnes(Regs << (32 - RangeMSB));
+      auto RangeLSB = RangeMSB - RangeLen;
+
+      int Opcode = RangeLSB >= 16
+                       ? ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16
+                       : ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD;
+
+      EmitInt16(Opcode | ((RangeLSB % 16) << 4) | (RangeLen - 1));
+
+      // Zero out bits we're done with.
+      Regs &= ~(-1u << RangeLSB);
     }
-
-    EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 |
-              ((i - 16) << 4) | Range);
-  }
-
-  while (i > 0) {
-    uint32_t Bit = 1u << (i - 1);
-    if ((VFPRegSave & Bit) == 0u) {
-      --i;
-      continue;
-    }
-
-    uint32_t Range = 0;
-
-    --i;
-    Bit >>= 1;
-
-    while (i > 0 && (VFPRegSave & Bit)) {
-      --i;
-      ++Range;
-      Bit >>= 1;
-    }
-
-    EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) |
-              Range);
   }
 }
 
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index d31f1f4..166c04b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -8,7 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/MC/MCWinCOFFObjectWriter.h"
 #include "llvm/Support/COFF.h"
@@ -23,17 +26,19 @@ public:
     : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) {
     assert(!Is64Bit && "AArch64 support not yet implemented");
   }
-  virtual ~ARMWinCOFFObjectWriter() { }
+  ~ARMWinCOFFObjectWriter() override {}
 
   unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
-                        bool IsCrossSection) const override;
+                        bool IsCrossSection,
+                        const MCAsmBackend &MAB) const override;
 
   bool recordRelocation(const MCFixup &) const override;
 };
 
 unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
                                               const MCFixup &Fixup,
-                                              bool IsCrossSection) const {
+                                              bool IsCrossSection,
+                                              const MCAsmBackend &MAB) const {
   assert(getMachine() == COFF::IMAGE_FILE_MACHINE_ARMNT &&
          "AArch64 support not yet implemented");
 
@@ -41,7 +46,10 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
     Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
 
   switch (static_cast<unsigned>(Fixup.getKind())) {
-  default: llvm_unreachable("unsupported relocation type");
+  default: {
+    const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind());
+    report_fatal_error(Twine("unsupported relocation type: ") + Info.Name);
+  }
   case FK_Data_4:
     switch (Modifier) {
     case MCSymbolRefExpr::VK_COFF_IMGREL32:
@@ -74,7 +82,8 @@ bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
 }
 
 namespace llvm {
-MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit) {
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
+                                             bool Is64Bit) {
   MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit);
   return createWinCOFFObjectWriter(MOTW, OS);
 }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index b344ced..b993b1b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -16,8 +16,8 @@ namespace {
 class ARMWinCOFFStreamer : public MCWinCOFFStreamer {
 public:
   ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE,
-                     raw_ostream &OS)
-    : MCWinCOFFStreamer(C, AB, CE, OS) { }
+                     raw_pwrite_stream &OS)
+      : MCWinCOFFStreamer(C, AB, CE, OS) {}
 
   void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
   void EmitThumbFunc(MCSymbol *Symbol) override;
@@ -37,10 +37,11 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
 }
 }
 
-namespace llvm {
-MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                     MCCodeEmitter &Emitter, raw_ostream &OS) {
-  return new ARMWinCOFFStreamer(Context, MAB, Emitter, OS);
-}
+MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
+                                           MCAsmBackend &MAB,
+                                           raw_pwrite_stream &OS,
+                                           MCCodeEmitter *Emitter,
+                                           bool RelaxAll) {
+  return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
 }
 
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
index 35fe9b3..ed2deea 100644
--- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -381,7 +381,10 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
   TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
   TRI = Fn.getSubtarget().getRegisterInfo();
   MRI = &Fn.getRegInfo();
-  const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+  const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>();
+  // Only run this for CortexA9.
+  if (!STI->isCortexA9())
+    return false;
   isLikeA9 = STI->isLikeA9() || STI->isSwift();
   isSwift = STI->isSwift();
 
diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index e464671..df73554 100644
--- a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -15,7 +15,7 @@ using namespace llvm;
 Target llvm::TheARMLETarget,   llvm::TheARMBETarget;
 Target llvm::TheThumbLETarget, llvm::TheThumbBETarget;
 
-extern "C" void LLVMInitializeARMTargetInfo() { 
+extern "C" void LLVMInitializeARMTargetInfo() {
   RegisterTarget<Triple::arm, /*HasJIT=*/true>
     X(TheARMLETarget, "arm", "ARM");
   RegisterTarget<Triple::armeb, /*HasJIT=*/true>
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 13f9358..77cd890 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -41,7 +41,7 @@ static void
 emitSPUpdate(MachineBasicBlock &MBB,
              MachineBasicBlock::iterator &MBBI,
              const TargetInstrInfo &TII, DebugLoc dl,
-             const Thumb1RegisterInfo &MRI,
+             const ThumbRegisterInfo &MRI,
              int NumBytes, unsigned MIFlags = MachineInstr::NoFlags)  {
   emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
                             MRI, MIFlags);
@@ -52,9 +52,9 @@ void Thumb1FrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
   const Thumb1InstrInfo &TII =
-      *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
-  const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
-      MF.getSubtarget().getRegisterInfo());
+      *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
+  const ThumbRegisterInfo *RegInfo =
+      static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
   if (!hasReservedCallFrame(MF)) {
     // If we have alloca, convert as follows:
     // ADJCALLSTACKDOWN -> sub, sp, sp, amount
@@ -82,23 +82,20 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   MBB.erase(I);
 }
 
-void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();
+void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
+                                       MachineBasicBlock &MBB) const {
+  assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented");
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo  *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
-  const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
-      MF.getSubtarget().getRegisterInfo());
+  const ThumbRegisterInfo *RegInfo =
+      static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
   const Thumb1InstrInfo &TII =
-      *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
+      *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
 
-  unsigned Align = MF.getTarget()
-                       .getSubtargetImpl()
-                       ->getFrameLowering()
-                       ->getStackAlignment();
-  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
   unsigned NumBytes = MFI->getStackSize();
   assert(NumBytes >= ArgRegsSaveSize &&
          "ArgRegsSaveSize is included in NumBytes");
@@ -331,20 +328,16 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
   DebugLoc dl = MBBI->getDebugLoc();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
-      MF.getSubtarget().getRegisterInfo());
+  const ThumbRegisterInfo *RegInfo =
+      static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
   const Thumb1InstrInfo &TII =
-      *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
+      *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
 
-  unsigned Align = MF.getTarget()
-                       .getSubtargetImpl()
-                       ->getFrameLowering()
-                       ->getStackAlignment();
-  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
   int NumBytes = (int)MFI->getStackSize();
   assert((unsigned)NumBytes >= ArgRegsSaveSize &&
          "ArgRegsSaveSize is included in NumBytes");
-  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs();
+  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
 
   if (!AFI->hasStackFrame()) {
@@ -466,8 +459,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
     return false;
 
   DebugLoc DL;
-  MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
 
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
@@ -506,7 +498,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
 
   bool isVarArg = AFI->getArgRegsSaveSize() > 0;
   DebugLoc DL = MI->getDebugLoc();
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
index b785b28..31d5732 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -16,7 +16,7 @@
 
 #include "ARMFrameLowering.h"
 #include "Thumb1InstrInfo.h"
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
@@ -27,7 +27,7 @@ public:
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
-  void emitPrologue(MachineFunction &MF) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 8ea912e..028119c 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -22,16 +22,15 @@
 using namespace llvm;
 
 Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
-  : ARMBaseInstrInfo(STI), RI(STI) {
-}
+    : ARMBaseInstrInfo(STI), RI() {}
 
 /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
 void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
   NopInst.setOpcode(ARM::tMOVr);
-  NopInst.addOperand(MCOperand::CreateReg(ARM::R8));
-  NopInst.addOperand(MCOperand::CreateReg(ARM::R8));
-  NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-  NopInst.addOperand(MCOperand::CreateReg(0));
+  NopInst.addOperand(MCOperand::createReg(ARM::R8));
+  NopInst.addOperand(MCOperand::createReg(ARM::R8));
+  NopInst.addOperand(MCOperand::createImm(ARMCC::AL));
+  NopInst.addOperand(MCOperand::createReg(0));
 }
 
 unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
@@ -44,7 +43,7 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                   bool KillSrc) const {
   // Need to check the arch.
   MachineFunction &MF = *MBB.getParent();
-  const ARMSubtarget &st = MF.getTarget().getSubtarget<ARMSubtarget>();
+  const ARMSubtarget &st = MF.getSubtarget<ARMSubtarget>();
 
   assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
          "Thumb1 can only copy GPR registers");
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
index 9fba760..f3f493d 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -15,13 +15,13 @@
 #define LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H
 
 #include "ARMBaseInstrInfo.h"
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
 
 namespace llvm {
   class ARMSubtarget;
 
 class Thumb1InstrInfo : public ARMBaseInstrInfo {
-  Thumb1RegisterInfo RI;
+  ThumbRegisterInfo RI;
 public:
   explicit Thumb1InstrInfo(const ARMSubtarget &STI);
 
@@ -36,7 +36,7 @@ public:
   /// such, whenever a client has an instance of instruction info, it should
   /// always be able to get register info as well (through this method).
   ///
-  const Thumb1RegisterInfo &getRegisterInfo() const override { return RI; }
+  const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
 
   void copyPhysReg(MachineBasicBlock &MBB,
                    MachineBasicBlock::iterator I, DebugLoc DL,
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index fdcb522..b62ae2e 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -90,6 +90,19 @@ static void TrackDefUses(MachineInstr *MI,
   }
 }
 
+/// Clear kill flags for any uses in the given set.  This will likely
+/// conservatively remove more kill flags than are necessary, but removing them
+/// is safer than incorrect kill flags remaining on instructions.
+static void ClearKillFlags(MachineInstr *MI, SmallSet<unsigned, 4> &Uses) {
+  for (MIOperands MO(MI); MO.isValid(); ++MO) {
+    if (!MO->isReg() || MO->isDef() || !MO->isKill())
+      continue;
+    if (!Uses.count(MO->getReg()))
+      continue;
+    MO->setIsKill(false);
+  }
+}
+
 static bool isCopy(MachineInstr *MI) {
   switch (MI->getOpcode()) {
   default:
@@ -222,6 +235,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
             --MBBI;
             MBB.remove(NMI);
             MBB.insert(InsertPos, NMI);
+            ClearKillFlags(MI, Uses);
             ++NumMovedInsts;
             continue;
           }
@@ -253,12 +267,14 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
 }
 
 bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
-  const TargetMachine &TM = Fn.getTarget();
+  const ARMSubtarget &STI =
+      static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+  if (!STI.isThumb2())
+    return false;
   AFI = Fn.getInfo<ARMFunctionInfo>();
-  TII = static_cast<const Thumb2InstrInfo *>(
-      TM.getSubtargetImpl()->getInstrInfo());
-  TRI = TM.getSubtargetImpl()->getRegisterInfo();
-  restrictIT = TM.getSubtarget<ARMSubtarget>().restrictIT();
+  TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
+  TRI = STI.getRegisterInfo();
+  restrictIT = STI.restrictIT();
 
   if (!AFI->isThumbFunction())
     return false;
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 91973e1..dc74f4e 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -30,15 +30,14 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
            cl::init(false));
 
 Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
-  : ARMBaseInstrInfo(STI), RI(STI) {
-}
+    : ARMBaseInstrInfo(STI), RI() {}
 
 /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
 void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
   NopInst.setOpcode(ARM::tHINT);
-  NopInst.addOperand(MCOperand::CreateImm(0));
-  NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-  NopInst.addOperand(MCOperand::CreateReg(0));
+  NopInst.addOperand(MCOperand::createImm(0));
+  NopInst.addOperand(MCOperand::createImm(ARMCC::AL));
+  NopInst.addOperand(MCOperand::createReg(0));
 }
 
 unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
@@ -257,14 +256,19 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
     if (Fits) {
       if (isSub) {
         BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
-          .addReg(BaseReg, RegState::Kill)
+          .addReg(BaseReg)
           .addReg(DestReg, RegState::Kill)
           .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
           .setMIFlags(MIFlags);
       } else {
+        // Here we know that DestReg is not SP but we do not
+        // know anything about BaseReg. t2ADDrr is an invalid
+        // instruction is SP is used as the second argument, but
+        // is fine if SP is the first argument. To be sure we
+        // do not generate invalid encoding, put BaseReg first.
         BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
+          .addReg(BaseReg)
           .addReg(DestReg, RegState::Kill)
-          .addReg(BaseReg, RegState::Kill)
           .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
           .setMIFlags(MIFlags);
       }
@@ -574,13 +578,10 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
       }
     } else if (AddrMode == ARMII::AddrModeT2_i8s4) {
       Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
-      NumBits = 8;
-      // MCInst operand has already scaled value.
+      NumBits = 10; // 8 bits scaled by 4
+      // MCInst operand expects already scaled value.
       Scale = 1;
-      if (Offset < 0) {
-        isSub = true;
-        Offset = -Offset;
-      }
+      assert((Offset & 3) == 0 && "Can't encode this offset!");
     } else {
       llvm_unreachable("Unsupported addressing mode!");
     }
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index 46a1f6d..916ab06 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -15,14 +15,14 @@
 #define LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H
 
 #include "ARMBaseInstrInfo.h"
-#include "Thumb2RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
 
 namespace llvm {
 class ARMSubtarget;
 class ScheduleHazardRecognizer;
 
 class Thumb2InstrInfo : public ARMBaseInstrInfo {
-  Thumb2RegisterInfo RI;
+  ThumbRegisterInfo RI;
 public:
   explicit Thumb2InstrInfo(const ARMSubtarget &STI);
 
@@ -60,7 +60,7 @@ public:
   /// such, whenever a client has an instance of instruction info, it should
   /// always be able to get register info as well (through this method).
   ///
-  const Thumb2RegisterInfo &getRegisterInfo() const override { return RI; }
+  const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
 
 private:
   void expandLoadStackGuard(MachineBasicBlock::iterator MI,
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
deleted file mode 100644
index 0d5d85a..0000000
--- a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-//===-- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Thumb-2 implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Thumb2RegisterInfo.h"
-#include "ARM.h"
-#include "ARMSubtarget.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMSubtarget &sti)
-  : ARMBaseRegisterInfo(sti) {
-}
-
-/// emitLoadConstPool - Emits a load from constpool to materialize the
-/// specified immediate.
-void
-Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
-                                      MachineBasicBlock::iterator &MBBI,
-                                      DebugLoc dl,
-                                      unsigned DestReg, unsigned SubIdx,
-                                      int Val,
-                                      ARMCC::CondCodes Pred, unsigned PredReg,
-                                      unsigned MIFlags) const {
-  MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-  MachineConstantPool *ConstantPool = MF.getConstantPool();
-  const Constant *C = ConstantInt::get(
-           Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
-  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
-
-  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
-    .addReg(DestReg, getDefRegState(true), SubIdx)
-    .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
-    .setMIFlags(MIFlags);
-}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
deleted file mode 100644
index 1dd94cc..0000000
--- a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Thumb-2 implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
-#define LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
-
-#include "ARMBaseRegisterInfo.h"
-
-namespace llvm {
-
-class ARMSubtarget;
-
-struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
-public:
-  Thumb2RegisterInfo(const ARMSubtarget &STI);
-
-  /// emitLoadConstPool - Emits a load from constpool to materialize the
-  /// specified immediate.
-  void
-  emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
-                    DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val,
-                    ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0,
-                    unsigned MIFlags = MachineInstr::NoFlags) const override;
-};
-}
-
-#endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index c51eb8b..0ab1ff9 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -21,6 +21,7 @@
 #include "llvm/IR/Function.h"        // To access Function attributes
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
@@ -332,9 +333,7 @@ Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
 
 static bool VerifyLowRegs(MachineInstr *MI) {
   unsigned Opc = MI->getOpcode();
-  bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA     ||
-                 Opc == ARM::t2LDMDB     || Opc == ARM::t2LDMIA_UPD ||
-                 Opc == ARM::t2LDMDB_UPD);
+  bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
   bool isLROk = (Opc == ARM::t2STMDB_UPD);
   bool isSPOk = isPCOk || isLROk;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -412,16 +411,14 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     HasShift = true;
     OpNum = 4;
     break;
-  case ARM::t2LDMIA:
-  case ARM::t2LDMDB: {
+  case ARM::t2LDMIA: {
     unsigned BaseReg = MI->getOperand(0).getReg();
-    if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA)
-      return false;
+    assert(isARMLowRegister(BaseReg));
 
     // For the non-writeback version (this one), the base register must be
     // one of the registers being loaded.
     bool isOK = false;
-    for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
+    for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
       if (MI->getOperand(i).getReg() == BaseReg) {
         isOK = true;
         break;
@@ -445,7 +442,6 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     break;
   }
   case ARM::t2LDMIA_UPD:
-  case ARM::t2LDMDB_UPD:
   case ARM::t2STMIA_UPD:
   case ARM::t2STMDB_UPD: {
     OpNum = 0;
@@ -469,9 +465,11 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
 
   unsigned OffsetReg = 0;
   bool OffsetKill = false;
+  bool OffsetInternal = false;
   if (HasShift) {
     OffsetReg  = MI->getOperand(2).getReg();
     OffsetKill = MI->getOperand(2).isKill();
+    OffsetInternal = MI->getOperand(2).isInternalRead();
 
     if (MI->getOperand(3).getImm())
       // Thumb1 addressing mode doesn't support shift.
@@ -501,7 +499,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
 
     if (HasOffReg)
-      MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
+      MIB.addReg(OffsetReg, getKillRegState(OffsetKill) |
+                            getInternalReadRegState(OffsetInternal));
   }
 
   // Transfer the rest of operands.
@@ -570,7 +569,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
   if (Entry.LowRegs1 && !VerifyLowRegs(MI))
     return false;
 
-  if (MI->mayLoad() || MI->mayStore())
+  if (MI->mayLoadOrStore())
     return ReduceLoadStore(MBB, MI, Entry);
 
   switch (Opc) {
@@ -1001,17 +1000,15 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
 }
 
 bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
-  const TargetMachine &TM = MF.getTarget();
-  TII = static_cast<const Thumb2InstrInfo *>(
-      TM.getSubtargetImpl()->getInstrInfo());
-  STI = &TM.getSubtarget<ARMSubtarget>();
+  STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
+  if (STI->isThumb1Only() || STI->prefers32BitThumb())
+    return false;
+
+  TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
 
   // Optimizing / minimizing size?
-  AttributeSet FnAttrs = MF.getFunction()->getAttributes();
-  OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
-                                      Attribute::OptimizeForSize);
-  MinimizeSize =
-      FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+  OptimizeSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+  MinimizeSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
 
   BlockInfo.clear();
   BlockInfo.resize(MF.getNumBlockIDs());
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index 928c8e3..b5f9d7e 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------------===//
+//===-- ThumbRegisterInfo.cpp - Thumb-1 Register Information -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
@@ -38,40 +38,36 @@ extern cl::opt<bool> ReuseFrameIndexVals;
 
 using namespace llvm;
 
-Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMSubtarget &sti)
-  : ARMBaseRegisterInfo(sti) {
-}
+ThumbRegisterInfo::ThumbRegisterInfo() : ARMBaseRegisterInfo() {}
+
+const TargetRegisterClass *
+ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+                                              const MachineFunction &MF) const {
+  if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only())
+    return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF);
 
-const TargetRegisterClass*
-Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
-                                                                         const {
   if (ARM::tGPRRegClass.hasSubClassEq(RC))
     return &ARM::tGPRRegClass;
-  return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC);
+  return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF);
 }
 
 const TargetRegisterClass *
-Thumb1RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
-                                                                         const {
+ThumbRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+                                      unsigned Kind) const {
+  if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only())
+    return ARMBaseRegisterInfo::getPointerRegClass(MF, Kind);
   return &ARM::tGPRRegClass;
 }
 
-/// emitLoadConstPool - Emits a load from constpool to materialize the
-/// specified immediate.
-void
-Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
-                                      MachineBasicBlock::iterator &MBBI,
-                                      DebugLoc dl,
-                                      unsigned DestReg, unsigned SubIdx,
-                                      int Val,
-                                      ARMCC::CondCodes Pred, unsigned PredReg,
-                                      unsigned MIFlags) const {
-  assert((isARMLowRegister(DestReg) ||
-          isVirtualRegister(DestReg)) &&
-             "Thumb1 does not have ldr to high register");
-
+static void emitThumb1LoadConstPool(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator &MBBI,
+                                    DebugLoc dl, unsigned DestReg,
+                                    unsigned SubIdx, int Val,
+                                    ARMCC::CondCodes Pred, unsigned PredReg,
+                                    unsigned MIFlags) {
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
   const Constant *C = ConstantInt::get(
           Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
@@ -83,6 +79,42 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
     .setMIFlags(MIFlags);
 }
 
+static void emitThumb2LoadConstPool(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator &MBBI,
+                                    DebugLoc dl, unsigned DestReg,
+                                    unsigned SubIdx, int Val,
+                                    ARMCC::CondCodes Pred, unsigned PredReg,
+                                    unsigned MIFlags) {
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  MachineConstantPool *ConstantPool = MF.getConstantPool();
+  const Constant *C = ConstantInt::get(
+           Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
+    .addReg(DestReg, getDefRegState(true), SubIdx)
+    .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
+    .setMIFlags(MIFlags);
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ThumbRegisterInfo::emitLoadConstPool(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+    unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred,
+    unsigned PredReg, unsigned MIFlags) const {
+  MachineFunction &MF = *MBB.getParent();
+  const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+  if (STI.isThumb1Only()) {
+    assert((isARMLowRegister(DestReg) || isVirtualRegister(DestReg)) &&
+           "Thumb1 does not have ldr to high register");
+    return emitThumb1LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred,
+                                   PredReg, MIFlags);
+  }
+  return emitThumb2LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred,
+                                 PredReg, MIFlags);
+}
 
 /// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
 /// a destreg = basereg + immediate in Thumb code. Materialize the immediate
@@ -317,12 +349,14 @@ static unsigned convertToNonSPOpcode(unsigned Opcode) {
   return Opcode;
 }
 
-bool Thumb1RegisterInfo::
-rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
-                  unsigned FrameReg, int &Offset,
-                  const ARMBaseInstrInfo &TII) const {
+bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
+                                          unsigned FrameRegIdx,
+                                          unsigned FrameReg, int &Offset,
+                                          const ARMBaseInstrInfo &TII) const {
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
+  assert(MBB.getParent()->getSubtarget<ARMSubtarget>().isThumb1Only() &&
+         "This isn't needed for thumb2!");
   DebugLoc dl = MI.getDebugLoc();
   MachineInstrBuilder MIB(*MBB.getParent(), &MI);
   unsigned Opcode = MI.getOpcode();
@@ -386,14 +420,14 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
   return Offset == 0;
 }
 
-void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+void ThumbRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
                                            int64_t Offset) const {
-  const ARMBaseInstrInfo &TII =
-      *static_cast<const ARMBaseInstrInfo *>(MI.getParent()
-                                                 ->getParent()
-                                                 ->getTarget()
-                                                 .getSubtargetImpl()
-                                                 ->getInstrInfo());
+  const MachineFunction &MF = *MI.getParent()->getParent();
+  const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+  if (!STI.isThumb1Only())
+    return ARMBaseRegisterInfo::resolveFrameIndex(MI, BaseReg, Offset);
+
+  const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
   int Off = Offset; // ARM doesn't need the general 64-bit offsets
   unsigned i = 0;
 
@@ -408,18 +442,21 @@ void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
 
 /// saveScavengerRegister - Spill the register so it can be used by the
 /// register scavenger. Return true.
-bool
-Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
-                                          MachineBasicBlock::iterator I,
-                                          MachineBasicBlock::iterator &UseMI,
-                                          const TargetRegisterClass *RC,
-                                          unsigned Reg) const {
+bool ThumbRegisterInfo::saveScavengerRegister(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+    MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC,
+    unsigned Reg) const {
+
+  const ARMSubtarget &STI = MBB.getParent()->getSubtarget<ARMSubtarget>();
+  if (!STI.isThumb1Only())
+    return ARMBaseRegisterInfo::saveScavengerRegister(MBB, I, UseMI, RC, Reg);
+
   // Thumb1 can't use the emergency spill slot on the stack because
   // ldr/str immediate offsets must be positive, and if we're referencing
   // off the frame pointer (if, for example, there are alloca() calls in
   // the function, the offset will be negative. Use R12 instead since that's
   // a call clobbered register that we know won't be used in Thumb1 mode.
-  const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
   DebugLoc DL;
   AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
     .addReg(ARM::R12, RegState::Define)
@@ -457,16 +494,19 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
   return true;
 }
 
-void
-Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                        int SPAdj, unsigned FIOperandNum,
-                                        RegScavenger *RS) const {
-  unsigned VReg = 0;
+void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                            int SPAdj, unsigned FIOperandNum,
+                                            RegScavenger *RS) const {
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const ARMBaseInstrInfo &TII =
-      *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+  if (!STI.isThumb1Only())
+    return ARMBaseRegisterInfo::eliminateFrameIndex(II, SPAdj, FIOperandNum,
+                                                    RS);
+
+  unsigned VReg = 0;
+  const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc dl = MI.getDebugLoc();
   MachineInstrBuilder MIB(*MBB.getParent(), &MI);
@@ -477,8 +517,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                MF.getFrameInfo()->getStackSize() + SPAdj;
 
   if (MF.getFrameInfo()->hasVarSizedObjects()) {
-    assert(SPAdj == 0 && MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
-           "Unexpected");
+    assert(SPAdj == 0 && STI.getFrameLowering()->hasFP(MF) && "Unexpected");
     // There are alloca()'s in this function, must reference off the frame
     // pointer or base pointer instead.
     if (!hasBasePointer(MF)) {
@@ -494,10 +533,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // when !hasReservedCallFrame().
 #ifndef NDEBUG
   if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
-    assert(MF.getTarget()
-               .getSubtargetImpl()
-               ->getFrameLowering()
-               ->hasReservedCallFrame(MF) &&
+    assert(STI.getFrameLowering()->hasReservedCallFrame(MF) &&
            "Cannot use SP to access the emergency spill slot in "
            "functions without a reserved call frame");
     assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h
index 5feaf52..23aaff3 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===//
+//===- ThumbRegisterInfo.h - Thumb Register Information Impl -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the Thumb-1 implementation of the TargetRegisterInfo
-// class.
+// This file contains the Thumb implementation of the TargetRegisterInfo
+// class. With the exception of emitLoadConstPool Thumb2 tracks
+// ARMBaseRegisterInfo, Thumb1 overloads the functions below.
 //
 //===----------------------------------------------------------------------===//
 
@@ -22,12 +23,13 @@ namespace llvm {
   class ARMSubtarget;
   class ARMBaseInstrInfo;
 
-struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
+struct ThumbRegisterInfo : public ARMBaseRegisterInfo {
 public:
-  Thumb1RegisterInfo(const ARMSubtarget &STI);
+  ThumbRegisterInfo();
 
   const TargetRegisterClass *
-  getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+  getLargestLegalSuperClass(const TargetRegisterClass *RC,
+                            const MachineFunction &MF) const override;
 
   const TargetRegisterClass *
   getPointerRegClass(const MachineFunction &MF,
diff --git a/contrib/llvm/lib/Target/ARM/ARMFPUName.h b/contrib/llvm/lib/Target/BPF/BPF.h
index 86acffb..4a0cb20 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFPUName.h
+++ b/contrib/llvm/lib/Target/BPF/BPF.h
@@ -1,4 +1,4 @@
-//===-- ARMFPUName.h - List of the ARM FPU names ----------------*- C++ -*-===//
+//===-- BPF.h - Top-level interface for BPF representation ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,20 +7,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_TARGET_ARM_ARMFPUNAME_H
-#define LLVM_LIB_TARGET_ARM_ARMFPUNAME_H
+#ifndef LLVM_LIB_TARGET_BPF_BPF_H
+#define LLVM_LIB_TARGET_BPF_BPF_H
 
-namespace llvm {
-namespace ARM {
-
-enum FPUKind {
-  INVALID_FPU = 0
+#include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
 
-#define ARM_FPU_NAME(NAME, ID) , ID
-#include "ARMFPUName.def"
-};
+namespace llvm {
+class BPFTargetMachine;
 
-} // namespace ARM
-} // namespace llvm
+FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
+}
 
 #endif
diff --git a/contrib/llvm/lib/Target/BPF/BPF.td b/contrib/llvm/lib/Target/BPF/BPF.td
new file mode 100644
index 0000000..a4ce90a
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPF.td
@@ -0,0 +1,31 @@
+//===-- BPF.td - Describe the BPF Target Machine -----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+include "BPFRegisterInfo.td"
+include "BPFCallingConv.td"
+include "BPFInstrInfo.td"
+
+def BPFInstrInfo : InstrInfo;
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+def BPFInstPrinter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+def BPF : Target {
+  let InstructionSet = BPFInstrInfo;
+  let AssemblyWriters = [BPFInstPrinter];
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
new file mode 100644
index 0000000..3237596
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -0,0 +1,87 @@
+//===-- BPFAsmPrinter.cpp - BPF LLVM assembly writer ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the BPF assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFInstrInfo.h"
+#include "BPFMCInstLower.h"
+#include "BPFTargetMachine.h"
+#include "InstPrinter/BPFInstPrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+namespace {
+class BPFAsmPrinter : public AsmPrinter {
+public:
+  explicit BPFAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+      : AsmPrinter(TM, std::move(Streamer)) {}
+
+  const char *getPassName() const override { return "BPF Assembly Printer"; }
+
+  void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+                    const char *Modifier = nullptr);
+  void EmitInstruction(const MachineInstr *MI) override;
+};
+}
+
+void BPFAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+                                 raw_ostream &O, const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << BPFInstPrinter::getRegisterName(MO.getReg());
+    break;
+
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    break;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    break;
+
+  case MachineOperand::MO_GlobalAddress:
+    O << *getSymbol(MO.getGlobal());
+    break;
+
+  default:
+    llvm_unreachable("<unknown operand type>");
+  }
+}
+
+void BPFAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+
+  BPFMCInstLower MCInstLowering(OutContext, *this);
+
+  MCInst TmpInst;
+  MCInstLowering.Lower(MI, TmpInst);
+  EmitToStreamer(*OutStreamer, TmpInst);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeBPFAsmPrinter() {
+  RegisterAsmPrinter<BPFAsmPrinter> X(TheBPFTarget);
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPFCallingConv.td b/contrib/llvm/lib/Target/BPF/BPFCallingConv.td
new file mode 100644
index 0000000..8cec6fa
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFCallingConv.td
@@ -0,0 +1,29 @@
+//===-- BPFCallingConv.td - Calling Conventions BPF --------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the BPF architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// BPF 64-bit C return-value convention.
+def RetCC_BPF64 : CallingConv<[CCIfType<[i64], CCAssignToReg<[R0]>>]>;
+
+// BPF 64-bit C Calling convention.
+def CC_BPF64 : CallingConv<[
+  // Promote i8/i16/i32 args to i64
+  CCIfType<[ i8, i16, i32 ], CCPromoteToType<i64>>,
+
+  // All arguments get passed in integer registers if there is space.
+  CCIfType<[i64], CCAssignToReg<[ R1, R2, R3, R4, R5 ]>>,
+
+  // Could be assigned to the stack in 8-byte aligned units, but unsupported
+  CCAssignToStack<8, 8>
+]>;
+
+def CSR : CalleeSavedRegs<(add R6, R7, R8, R9, R10)>;
diff --git a/contrib/llvm/lib/Target/BPF/BPFFrameLowering.cpp b/contrib/llvm/lib/Target/BPF/BPFFrameLowering.cpp
new file mode 100644
index 0000000..54c5ece
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFFrameLowering.cpp
@@ -0,0 +1,40 @@
+//===-- BPFFrameLowering.cpp - BPF Frame Information ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the BPF implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPFFrameLowering.h"
+#include "BPFInstrInfo.h"
+#include "BPFSubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+bool BPFFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
+
+void BPFFrameLowering::emitPrologue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {}
+
+void BPFFrameLowering::emitEpilogue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {}
+
+void BPFFrameLowering::processFunctionBeforeCalleeSavedScan(
+    MachineFunction &MF, RegScavenger *RS) const {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  MRI.setPhysRegUnused(BPF::R6);
+  MRI.setPhysRegUnused(BPF::R7);
+  MRI.setPhysRegUnused(BPF::R8);
+  MRI.setPhysRegUnused(BPF::R9);
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPFFrameLowering.h b/contrib/llvm/lib/Target/BPF/BPFFrameLowering.h
new file mode 100644
index 0000000..3b9fc44
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFFrameLowering.h
@@ -0,0 +1,41 @@
+//===-- BPFFrameLowering.h - Define frame lowering for BPF -----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements BPF-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BPFFRAMELOWERING_H
+#define LLVM_LIB_TARGET_BPF_BPFFRAMELOWERING_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+class BPFSubtarget;
+
+class BPFFrameLowering : public TargetFrameLowering {
+public:
+  explicit BPFFrameLowering(const BPFSubtarget &sti)
+      : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {}
+
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+  bool hasFP(const MachineFunction &MF) const override;
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const override;
+
+  void
+  eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI) const override {
+    MBB.erase(MI);
+  }
+};
+}
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
new file mode 100644
index 0000000..d9e654c
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -0,0 +1,160 @@
+//===-- BPFISelDAGToDAG.cpp - A dag to dag inst selector for BPF ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a DAG pattern matching instruction selector for BPF,
+// converting from a legalized dag to a BPF dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFRegisterInfo.h"
+#include "BPFSubtarget.h"
+#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "bpf-isel"
+
+// Instruction Selector Implementation
+namespace {
+
+class BPFDAGToDAGISel : public SelectionDAGISel {
+public:
+  explicit BPFDAGToDAGISel(BPFTargetMachine &TM) : SelectionDAGISel(TM) {}
+
+  const char *getPassName() const override {
+    return "BPF DAG->DAG Pattern Instruction Selection";
+  }
+
+private:
+// Include the pieces autogenerated from the target description.
+#include "BPFGenDAGISel.inc"
+
+  SDNode *Select(SDNode *N) override;
+
+  // Complex Pattern for address selection.
+  bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
+};
+}
+
+// ComplexPattern used on BPF Load/Store instructions
+bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
+  // if Address is FI, get the TargetFrameIndex.
+  SDLoc DL(Addr);
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+    Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
+    return true;
+  }
+
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;
+
+  // Addresses of the form FI+const or FI|const
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+    if (isInt<32>(CN->getSExtValue())) {
+
+      // If the first operand is a FI, get the TargetFI Node
+      if (FrameIndexSDNode *FIN =
+              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+      else
+        Base = Addr.getOperand(0);
+
+      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), DL, MVT::i64);
+      return true;
+    }
+  }
+
+  Base   = Addr;
+  Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
+  return true;
+}
+
+SDNode *BPFDAGToDAGISel::Select(SDNode *Node) {
+  unsigned Opcode = Node->getOpcode();
+
+  // Dump information about the Node being selected
+  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode()) {
+    DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
+    return NULL;
+  }
+
+  // tablegen selection should be handled here.
+  switch (Opcode) {
+  default: break;
+
+  case ISD::UNDEF: {
+    errs() << "BUG: "; Node->dump(CurDAG); errs() << '\n';
+    report_fatal_error("shouldn't see UNDEF during Select");
+    break;
+  }
+
+  case ISD::INTRINSIC_W_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+    switch (IntNo) {
+    case Intrinsic::bpf_load_byte:
+    case Intrinsic::bpf_load_half:
+    case Intrinsic::bpf_load_word: {
+      SDLoc DL(Node);
+      SDValue Chain = Node->getOperand(0);
+      SDValue N1 = Node->getOperand(1);
+      SDValue Skb = Node->getOperand(2);
+      SDValue N3 = Node->getOperand(3);
+
+      SDValue R6Reg = CurDAG->getRegister(BPF::R6, MVT::i64);
+      Chain = CurDAG->getCopyToReg(Chain, DL, R6Reg, Skb, SDValue());
+      Node = CurDAG->UpdateNodeOperands(Node, Chain, N1, R6Reg, N3);
+      break;
+    }
+    }
+    break;
+  }
+
+  case ISD::FrameIndex: {
+    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+    EVT VT = Node->getValueType(0);
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
+    unsigned Opc = BPF::MOV_rr;
+    if (Node->hasOneUse())
+      return CurDAG->SelectNodeTo(Node, Opc, VT, TFI);
+    return CurDAG->getMachineNode(Opc, SDLoc(Node), VT, TFI);
+  }
+  }
+
+  // Select the default instruction
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(dbgs() << "=> ";
+        if (ResNode == nullptr || ResNode == Node)
+          Node->dump(CurDAG);
+        else
+          ResNode->dump(CurDAG);
+        dbgs() << '\n');
+  return ResNode;
+}
+
+FunctionPass *llvm::createBPFISelDag(BPFTargetMachine &TM) {
+  return new BPFDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
new file mode 100644
index 0000000..38c56bb
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -0,0 +1,640 @@
+//===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation  ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that BPF uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPFISelLowering.h"
+#include "BPF.h"
+#include "BPFTargetMachine.h"
+#include "BPFSubtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "bpf-lower"
+
+namespace {
+
+// Diagnostic information for unimplemented or unsupported feature reporting.
+class DiagnosticInfoUnsupported : public DiagnosticInfo {
+private:
+  // Debug location where this diagnostic is triggered.
+  DebugLoc DLoc;
+  const Twine &Description;
+  const Function &Fn;
+  SDValue Value;
+
+  static int KindID;
+
+  static int getKindID() {
+    if (KindID == 0)
+      KindID = llvm::getNextAvailablePluginDiagnosticKind();
+    return KindID;
+  }
+
+public:
+  DiagnosticInfoUnsupported(SDLoc DLoc, const Function &Fn, const Twine &Desc,
+                            SDValue Value)
+      : DiagnosticInfo(getKindID(), DS_Error), DLoc(DLoc.getDebugLoc()),
+        Description(Desc), Fn(Fn), Value(Value) {}
+
+  void print(DiagnosticPrinter &DP) const override {
+    std::string Str;
+    raw_string_ostream OS(Str);
+
+    if (DLoc) {
+      auto DIL = DLoc.get();
+      StringRef Filename = DIL->getFilename();
+      unsigned Line = DIL->getLine();
+      unsigned Column = DIL->getColumn();
+      OS << Filename << ':' << Line << ':' << Column << ' ';
+    }
+
+    OS << "in function " << Fn.getName() << ' ' << *Fn.getFunctionType() << '\n'
+       << Description;
+    if (Value)
+      Value->print(OS);
+    OS << '\n';
+    OS.flush();
+    DP << Str;
+  }
+
+  static bool classof(const DiagnosticInfo *DI) {
+    return DI->getKind() == getKindID();
+  }
+};
+
+int DiagnosticInfoUnsupported::KindID = 0;
+}
+
+BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
+                                     const BPFSubtarget &STI)
+    : TargetLowering(TM) {
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i64, &BPF::GPRRegClass);
+
+  // Compute derived properties from the register classes
+  computeRegisterProperties(STI.getRegisterInfo());
+
+  setStackPointerRegisterToSaveRestore(BPF::R11);
+
+  setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+  setOperationAction(ISD::SETCC, MVT::i64, Expand);
+  setOperationAction(ISD::SELECT, MVT::i64, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+  setOperationAction(ISD::SREM, MVT::i64, Expand);
+  setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+  setOperationAction(ISD::MULHU, MVT::i64, Expand);
+  setOperationAction(ISD::MULHS, MVT::i64, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+
+  setOperationAction(ISD::ADDC, MVT::i64, Expand);
+  setOperationAction(ISD::ADDE, MVT::i64, Expand);
+  setOperationAction(ISD::SUBC, MVT::i64, Expand);
+  setOperationAction(ISD::SUBE, MVT::i64, Expand);
+
+  // no UNDEF allowed
+  setOperationAction(ISD::UNDEF, MVT::i64, Expand);
+
+  setOperationAction(ISD::ROTR, MVT::i64, Expand);
+  setOperationAction(ISD::ROTL, MVT::i64, Expand);
+  setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+  setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
+  setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+
+  setOperationAction(ISD::CTTZ, MVT::i64, Custom);
+  setOperationAction(ISD::CTLZ, MVT::i64, Custom);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
+  setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
+
+  // Extended load operations for i1 types must be promoted
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+  }
+
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  // Function alignments (log2)
+  setMinFunctionAlignment(3);
+  setPrefFunctionAlignment(3);
+
+  // inline memcpy() for kernel to see explicit copy
+  MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 128;
+  MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 128;
+  MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 128;
+}
+
+SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  case ISD::BR_CC:
+    return LowerBR_CC(Op, DAG);
+  case ISD::GlobalAddress:
+    return LowerGlobalAddress(Op, DAG);
+  case ISD::SELECT_CC:
+    return LowerSELECT_CC(Op, DAG);
+  default:
+    llvm_unreachable("unimplemented operand");
+  }
+}
+
+// Calling Convention Implementation
+#include "BPFGenCallingConv.inc"
+
+SDValue BPFTargetLowering::LowerFormalArguments(
+    SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
+    SmallVectorImpl<SDValue> &InVals) const {
+  switch (CallConv) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::C:
+  case CallingConv::Fast:
+    break;
+  }
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_BPF64);
+
+  for (auto &VA : ArgLocs) {
+    if (VA.isRegLoc()) {
+      // Arguments passed in registers
+      EVT RegVT = VA.getLocVT();
+      switch (RegVT.getSimpleVT().SimpleTy) {
+      default: {
+        errs() << "LowerFormalArguments Unhandled argument type: "
+               << RegVT.getSimpleVT().SimpleTy << '\n';
+        llvm_unreachable(0);
+      }
+      case MVT::i64:
+        unsigned VReg = RegInfo.createVirtualRegister(&BPF::GPRRegClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT);
+
+        // If this is an 8/16/32-bit value, it is really passed promoted to 64
+        // bits. Insert an assert[sz]ext to capture this, then truncate to the
+        // right size.
+        if (VA.getLocInfo() == CCValAssign::SExt)
+          ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue,
+                                 DAG.getValueType(VA.getValVT()));
+        else if (VA.getLocInfo() == CCValAssign::ZExt)
+          ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue,
+                                 DAG.getValueType(VA.getValVT()));
+
+        if (VA.getLocInfo() != CCValAssign::Full)
+          ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue);
+
+        InVals.push_back(ArgValue);
+      }
+    } else {
+      DiagnosticInfoUnsupported Err(DL, *MF.getFunction(),
+                                    "defined with too many args", SDValue());
+      DAG.getContext()->diagnose(Err);
+    }
+  }
+
+  if (IsVarArg || MF.getFunction()->hasStructRetAttr()) {
+    DiagnosticInfoUnsupported Err(
+        DL, *MF.getFunction(),
+        "functions with VarArgs or StructRet are not supported", SDValue());
+    DAG.getContext()->diagnose(Err);
+  }
+
+  return Chain;
+}
+
+SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                                     SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  auto &Outs = CLI.Outs;
+  auto &OutVals = CLI.OutVals;
+  auto &Ins = CLI.Ins;
+  SDValue Chain = CLI.Chain;
+  SDValue Callee = CLI.Callee;
+  bool &IsTailCall = CLI.IsTailCall;
+  CallingConv::ID CallConv = CLI.CallConv;
+  bool IsVarArg = CLI.IsVarArg;
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // BPF target does not support tail call optimization.
+  IsTailCall = false;
+
+  switch (CallConv) {
+  default:
+    report_fatal_error("Unsupported calling convention");
+  case CallingConv::Fast:
+  case CallingConv::C:
+    break;
+  }
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_BPF64);
+
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  if (Outs.size() >= 6) {
+    DiagnosticInfoUnsupported Err(CLI.DL, *MF.getFunction(),
+                                  "too many args to ", Callee);
+    DAG.getContext()->diagnose(Err);
+  }
+
+  for (auto &Arg : Outs) {
+    ISD::ArgFlagsTy Flags = Arg.Flags;
+    if (!Flags.isByVal())
+      continue;
+
+    DiagnosticInfoUnsupported Err(CLI.DL, *MF.getFunction(),
+                                  "pass by value not supported ", Callee);
+    DAG.getContext()->diagnose(Err);
+  }
+
+  Chain = DAG.getCALLSEQ_START(
+      Chain, DAG.getConstant(NumBytes, CLI.DL, getPointerTy(), true), CLI.DL);
+
+  SmallVector<std::pair<unsigned, SDValue>, 5> RegsToPass;
+
+  // Walk arg assignments
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default:
+      llvm_unreachable("Unknown loc info");
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, CLI.DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, CLI.DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, CLI.DL, VA.getLocVT(), Arg);
+      break;
+    }
+
+    // Push arguments into RegsToPass vector
+    if (VA.isRegLoc())
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    else
+      llvm_unreachable("call arg pass bug");
+  }
+
+  SDValue InFlag;
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain and
+  // flag operands which copy the outgoing args into registers.  The InFlag in
+  // necessary since all emitted instructions must be stuck together.
+  for (auto &Reg : RegsToPass) {
+    Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, getPointerTy(),
+                                        G->getOffset(), 0);
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy(), 0);
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (auto &Reg : RegsToPass)
+    Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain = DAG.getNode(BPFISD::CALL, CLI.DL, NodeTys, Ops);
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(
+      Chain, DAG.getConstant(NumBytes, CLI.DL, getPointerTy(), true),
+      DAG.getConstant(0, CLI.DL, getPointerTy(), true), InFlag, CLI.DL);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, CLI.DL, DAG,
+                         InVals);
+}
+
+SDValue
+BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+                               bool IsVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               SDLoc DL, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
+
+  if (MF.getFunction()->getReturnType()->isAggregateType()) {
+    DiagnosticInfoUnsupported Err(DL, *MF.getFunction(),
+                                  "only integer returns supported", SDValue());
+    DAG.getContext()->diagnose(Err);
+  }
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_BPF64);
+
+  SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Flag);
+
+    // Guarantee that all emitted copies are stuck together,
+    // avoiding something bad.
+    Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+  }
+
+  unsigned Opc = BPFISD::RET_FLAG;
+  RetOps[0] = Chain; // Update chain.
+
+  // Add the flag if we have it.
+  if (Flag.getNode())
+    RetOps.push_back(Flag);
+
+  return DAG.getNode(Opc, DL, MVT::Other, RetOps);
+}
+
+SDValue BPFTargetLowering::LowerCallResult(
+    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
+    SmallVectorImpl<SDValue> &InVals) const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
+
+  if (Ins.size() >= 2) {
+    DiagnosticInfoUnsupported Err(DL, *MF.getFunction(),
+                                  "only small returns supported", SDValue());
+    DAG.getContext()->diagnose(Err);
+  }
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_BPF64);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (auto &Val : RVLocs) {
+    Chain = DAG.getCopyFromReg(Chain, DL, Val.getLocReg(),
+                               Val.getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
+  switch (CC) {
+  default:
+    break;
+  case ISD::SETULT:
+  case ISD::SETULE:
+  case ISD::SETLT:
+  case ISD::SETLE:
+    CC = ISD::getSetCCSwappedOperands(CC);
+    std::swap(LHS, RHS);
+    break;
+  }
+}
+
+SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue Dest = Op.getOperand(4);
+  SDLoc DL(Op);
+
+  NegateCC(LHS, RHS, CC);
+
+  return DAG.getNode(BPFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS,
+                     DAG.getConstant(CC, DL, MVT::i64), Dest);
+}
+
+SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue TrueV = Op.getOperand(2);
+  SDValue FalseV = Op.getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  SDLoc DL(Op);
+
+  NegateCC(LHS, RHS, CC);
+
+  SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i64);
+
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+  SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
+
+  return DAG.getNode(BPFISD::SELECT_CC, DL, VTs, Ops);
+}
+
+const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch ((BPFISD::NodeType)Opcode) {
+  case BPFISD::FIRST_NUMBER:
+    break;
+  case BPFISD::RET_FLAG:
+    return "BPFISD::RET_FLAG";
+  case BPFISD::CALL:
+    return "BPFISD::CALL";
+  case BPFISD::SELECT_CC:
+    return "BPFISD::SELECT_CC";
+  case BPFISD::BR_CC:
+    return "BPFISD::BR_CC";
+  case BPFISD::Wrapper:
+    return "BPFISD::Wrapper";
+  }
+  return nullptr;
+}
+
+SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i64);
+
+  return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA);
+}
+
+MachineBasicBlock *
+BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                               MachineBasicBlock *BB) const {
+  const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  assert(MI->getOpcode() == BPF::Select && "Unexpected instr type to insert");
+
+  // To "insert" a SELECT instruction, we actually have to insert the diamond
+  // control-flow pattern.  The incoming instruction knows the destination vreg
+  // to set, the condition code register to branch on, the true/false values to
+  // select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator I = BB;
+  ++I;
+
+  // ThisMBB:
+  // ...
+  //  TrueVal = ...
+  //  jmp_XX r1, r2 goto Copy1MBB
+  //  fallthrough --> Copy0MBB
+  MachineBasicBlock *ThisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+  F->insert(I, Copy0MBB);
+  F->insert(I, Copy1MBB);
+  // Update machine-CFG edges by transferring all successors of the current
+  // block to the new block which will contain the Phi node for the select.
+  Copy1MBB->splice(Copy1MBB->begin(), BB,
+                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
+  Copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(Copy0MBB);
+  BB->addSuccessor(Copy1MBB);
+
+  // Insert Branch if Flag
+  unsigned LHS = MI->getOperand(1).getReg();
+  unsigned RHS = MI->getOperand(2).getReg();
+  int CC = MI->getOperand(3).getImm();
+  switch (CC) {
+  case ISD::SETGT:
+    BuildMI(BB, DL, TII.get(BPF::JSGT_rr))
+        .addReg(LHS)
+        .addReg(RHS)
+        .addMBB(Copy1MBB);
+    break;
+  case ISD::SETUGT:
+    BuildMI(BB, DL, TII.get(BPF::JUGT_rr))
+        .addReg(LHS)
+        .addReg(RHS)
+        .addMBB(Copy1MBB);
+    break;
+  case ISD::SETGE:
+    BuildMI(BB, DL, TII.get(BPF::JSGE_rr))
+        .addReg(LHS)
+        .addReg(RHS)
+        .addMBB(Copy1MBB);
+    break;
+  case ISD::SETUGE:
+    BuildMI(BB, DL, TII.get(BPF::JUGE_rr))
+        .addReg(LHS)
+        .addReg(RHS)
+        .addMBB(Copy1MBB);
+    break;
+  case ISD::SETEQ:
+    BuildMI(BB, DL, TII.get(BPF::JEQ_rr))
+        .addReg(LHS)
+        .addReg(RHS)
+        .addMBB(Copy1MBB);
+    break;
+  case ISD::SETNE:
+    BuildMI(BB, DL, TII.get(BPF::JNE_rr))
+        .addReg(LHS)
+        .addReg(RHS)
+        .addMBB(Copy1MBB);
+    break;
+  default:
+    report_fatal_error("unimplemented select CondCode " + Twine(CC));
+  }
+
+  // Copy0MBB:
+  //  %FalseValue = ...
+  //  # fallthrough to Copy1MBB
+  BB = Copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(Copy1MBB);
+
+  // Copy1MBB:
+  //  %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ]
+  // ...
+  BB = Copy1MBB;
+  BuildMI(*BB, BB->begin(), DL, TII.get(BPF::PHI), MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(5).getReg())
+      .addMBB(Copy0MBB)
+      .addReg(MI->getOperand(4).getReg())
+      .addMBB(ThisMBB);
+
+  MI->eraseFromParent(); // The pseudo instruction is gone now.
+  return BB;
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.h b/contrib/llvm/lib/Target/BPF/BPFISelLowering.h
new file mode 100644
index 0000000..ec71dca
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -0,0 +1,90 @@
+//===-- BPFISelLowering.h - BPF DAG Lowering Interface ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that BPF uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BPFISELLOWERING_H
+#define LLVM_LIB_TARGET_BPF_BPFISELLOWERING_H
+
+#include "BPF.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+class BPFSubtarget;
+namespace BPFISD {
+enum NodeType : unsigned {
+  FIRST_NUMBER = ISD::BUILTIN_OP_END,
+  RET_FLAG,
+  CALL,
+  SELECT_CC,
+  BR_CC,
+  Wrapper
+};
+}
+
+class BPFTargetLowering : public TargetLowering {
+public:
+  explicit BPFTargetLowering(const TargetMachine &TM, const BPFSubtarget &STI);
+
+  // Provide custom lowering hooks for some operations.
+  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+
+  // This method returns the name of a target specific DAG node.
+  const char *getTargetNodeName(unsigned Opcode) const override;
+
+  MachineBasicBlock *
+  EmitInstrWithCustomInserter(MachineInstr *MI,
+                              MachineBasicBlock *BB) const override;
+
+private:
+  SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+
+  // Lower the result values of a call, copying them out of physregs into vregs
+  SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                          CallingConv::ID CallConv, bool IsVarArg,
+                          const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL,
+                          SelectionDAG &DAG,
+                          SmallVectorImpl<SDValue> &InVals) const;
+
+  // Lower a call into CALLSEQ_START - BPFISD:CALL - CALLSEQ_END chain
+  SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                    SmallVectorImpl<SDValue> &InVals) const override;
+
+  // Lower incoming arguments, copy physregs into vregs
+  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+                               bool IsVarArg,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               SDLoc DL, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const override;
+
+  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      const SmallVectorImpl<SDValue> &OutVals, SDLoc DL,
+                      SelectionDAG &DAG) const override;
+
+  EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+                          bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+                          MachineFunction &MF) const override {
+    return Size >= 8 ? MVT::i64 : MVT::i32;
+  }
+
+  bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
+                                         Type *Ty) const override {
+    return true;
+  }
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrFormats.td b/contrib/llvm/lib/Target/BPF/BPFInstrFormats.td
new file mode 100644
index 0000000..53f3ad6
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFInstrFormats.td
@@ -0,0 +1,33 @@
+//===-- BPFInstrFormats.td - BPF Instruction Formats -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class InstBPF<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : Instruction {
+  field bits<64> Inst;
+  field bits<64> SoftFail = 0;
+  let Size = 8;
+
+  let Namespace = "BPF";
+  let DecoderNamespace = "BPF";
+
+  bits<3> BPFClass;
+  let Inst{58-56} = BPFClass;
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString = asmstr;
+  let Pattern = pattern;
+}
+
+// Pseudo instructions
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstBPF<outs, ins, asmstr, pattern> {
+  let Inst{63-0} = 0;
+  let isPseudo = 1;
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp
new file mode 100644
index 0000000..28bd0ec
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp
@@ -0,0 +1,168 @@
+//===-- BPFInstrInfo.cpp - BPF Instruction Information ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the BPF implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFInstrInfo.h"
+#include "BPFSubtarget.h"
+#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_INSTRINFO_CTOR_DTOR
+#include "BPFGenInstrInfo.inc"
+
+using namespace llvm;
+
+BPFInstrInfo::BPFInstrInfo()
+    : BPFGenInstrInfo(BPF::ADJCALLSTACKDOWN, BPF::ADJCALLSTACKUP) {}
+
+void BPFInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I, DebugLoc DL,
+                               unsigned DestReg, unsigned SrcReg,
+                               bool KillSrc) const {
+  if (BPF::GPRRegClass.contains(DestReg, SrcReg))
+    BuildMI(MBB, I, DL, get(BPF::MOV_rr), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  else
+    llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+void BPFInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned SrcReg, bool IsKill, int FI,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end())
+    DL = I->getDebugLoc();
+
+  if (RC == &BPF::GPRRegClass)
+    BuildMI(MBB, I, DL, get(BPF::STD))
+        .addReg(SrcReg, getKillRegState(IsKill))
+        .addFrameIndex(FI)
+        .addImm(0);
+  else
+    llvm_unreachable("Can't store this register to stack slot");
+}
+
+void BPFInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned DestReg, int FI,
+                                        const TargetRegisterClass *RC,
+                                        const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end())
+    DL = I->getDebugLoc();
+
+  if (RC == &BPF::GPRRegClass)
+    BuildMI(MBB, I, DL, get(BPF::LDD), DestReg).addFrameIndex(FI).addImm(0);
+  else
+    llvm_unreachable("Can't load this register from stack slot");
+}
+
+bool BPFInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                 MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 bool AllowModify) const {
+  // Start from the bottom of the block and work up, examining the
+  // terminator instructions.
+  MachineBasicBlock::iterator I = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+
+    // Working from the bottom, when we see a non-terminator
+    // instruction, we're done.
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    // A terminator that isn't a branch can't easily be handled
+    // by this analysis.
+    if (!I->isBranch())
+      return true;
+
+    // Handle unconditional branches.
+    if (I->getOpcode() == BPF::JMP) {
+      if (!AllowModify) {
+        TBB = I->getOperand(0).getMBB();
+        continue;
+      }
+
+      // If the block has any instructions after a J, delete them.
+      while (std::next(I) != MBB.end())
+        std::next(I)->eraseFromParent();
+      Cond.clear();
+      FBB = 0;
+
+      // Delete the J if it's equivalent to a fall-through.
+      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        continue;
+      }
+
+      // TBB is used to indicate the unconditinal destination.
+      TBB = I->getOperand(0).getMBB();
+      continue;
+    }
+    // Cannot handle conditional branches
+    return true;
+  }
+
+  return false;
+}
+
+unsigned BPFInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+                                    MachineBasicBlock *TBB,
+                                    MachineBasicBlock *FBB,
+                                    const SmallVectorImpl<MachineOperand> &Cond,
+                                    DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+  if (Cond.empty()) {
+    // Unconditional branch
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(BPF::JMP)).addMBB(TBB);
+    return 1;
+  }
+
+  llvm_unreachable("Unexpected conditional branch");
+}
+
+unsigned BPFInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    if (I->getOpcode() != BPF::JMP)
+      break;
+    // Remove the branch.
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+
+  return Count;
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h
new file mode 100644
index 0000000..4056c2e
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h
@@ -0,0 +1,60 @@
+//===-- BPFInstrInfo.h - BPF Instruction Information ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the BPF implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BPFINSTRINFO_H
+#define LLVM_LIB_TARGET_BPF_BPFINSTRINFO_H
+
+#include "BPFRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "BPFGenInstrInfo.inc"
+
+namespace llvm {
+
+class BPFInstrInfo : public BPFGenInstrInfo {
+  const BPFRegisterInfo RI;
+
+public:
+  BPFInstrInfo();
+
+  const BPFRegisterInfo &getRegisterInfo() const { return RI; }
+
+  void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                   DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const override;
+
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+                           bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const override;
+
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI, unsigned DestReg,
+                            int FrameIndex, const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const override;
+  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                     MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond,
+                     bool AllowModify) const override;
+
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                        MachineBasicBlock *FBB,
+                        const SmallVectorImpl<MachineOperand> &Cond,
+                        DebugLoc DL) const override;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td
new file mode 100644
index 0000000..26b2cfe
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -0,0 +1,560 @@
+//===-- BPFInstrInfo.td - Target Description for BPF Target ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the BPF instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "BPFInstrFormats.td"
+
+// Instruction Operands and Patterns
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_BPFCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>]>;
+def SDT_BPFCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+def SDT_BPFCall         : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+def SDT_BPFSetFlag      : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>]>;
+def SDT_BPFSelectCC     : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>,
+                                               SDTCisSameAs<0, 4>,
+                                               SDTCisSameAs<4, 5>]>;
+def SDT_BPFBrCC         : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
+                                               SDTCisVT<3, OtherVT>]>;
+def SDT_BPFWrapper      : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+                                               SDTCisPtrTy<0>]>;
+
+def BPFcall         : SDNode<"BPFISD::CALL", SDT_BPFCall,
+                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                              SDNPVariadic]>;
+def BPFretflag      : SDNode<"BPFISD::RET_FLAG", SDTNone,
+                             [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def BPFcallseq_start: SDNode<"ISD::CALLSEQ_START", SDT_BPFCallSeqStart,
+                             [SDNPHasChain, SDNPOutGlue]>;
+def BPFcallseq_end  : SDNode<"ISD::CALLSEQ_END",   SDT_BPFCallSeqEnd,
+                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def BPFbrcc         : SDNode<"BPFISD::BR_CC", SDT_BPFBrCC,
+                             [SDNPHasChain, SDNPOutGlue, SDNPInGlue]>;
+
+def BPFselectcc     : SDNode<"BPFISD::SELECT_CC", SDT_BPFSelectCC, [SDNPInGlue]>;
+def BPFWrapper      : SDNode<"BPFISD::Wrapper", SDT_BPFWrapper>;
+
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i64>;
+
+def u64imm   : Operand<i64> {
+  let PrintMethod = "printImm64Operand";
+}
+
+def i64immSExt32 : PatLeaf<(imm),
+                [{return isInt<32>(N->getSExtValue()); }]>;
+
+// Addressing modes.
+def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [frameindex], []>;
+
+// Address operands
+def MEMri : Operand<i64> {
+  let PrintMethod = "printMemOperand";
+  let EncoderMethod = "getMemoryOpValue";
+  let MIOperandInfo = (ops GPR, i16imm);
+}
+
+// Conditional code predicates - used for pattern matching for jump instructions
+def BPF_CC_EQ  : PatLeaf<(imm),
+                         [{return (N->getZExtValue() == ISD::SETEQ);}]>;
+def BPF_CC_NE  : PatLeaf<(imm),
+                         [{return (N->getZExtValue() == ISD::SETNE);}]>;
+def BPF_CC_GE  : PatLeaf<(imm),
+                         [{return (N->getZExtValue() == ISD::SETGE);}]>;
+def BPF_CC_GT  : PatLeaf<(imm),
+                         [{return (N->getZExtValue() == ISD::SETGT);}]>;
+def BPF_CC_GTU : PatLeaf<(imm),
+                         [{return (N->getZExtValue() == ISD::SETUGT);}]>;
+def BPF_CC_GEU : PatLeaf<(imm),
+                         [{return (N->getZExtValue() == ISD::SETUGE);}]>;
+
+// jump instructions
+class JMP_RR<bits<4> Opc, string OpcodeStr, PatLeaf Cond>
+    : InstBPF<(outs), (ins GPR:$dst, GPR:$src, brtarget:$BrDst),
+              !strconcat(OpcodeStr, "\t$dst, $src goto $BrDst"),
+              [(BPFbrcc i64:$dst, i64:$src, Cond, bb:$BrDst)]> {
+  bits<4> op;
+  bits<1> BPFSrc;
+  bits<4> dst;
+  bits<4> src;
+  bits<16> BrDst;
+
+  let Inst{63-60} = op;
+  let Inst{59} = BPFSrc;
+  let Inst{55-52} = src;
+  let Inst{51-48} = dst;
+  let Inst{47-32} = BrDst;
+
+  let op = Opc;
+  let BPFSrc = 1;
+  let BPFClass = 5; // BPF_JMP
+}
+
+class JMP_RI<bits<4> Opc, string OpcodeStr, PatLeaf Cond>
+    : InstBPF<(outs), (ins GPR:$dst, i64imm:$imm, brtarget:$BrDst),
+              !strconcat(OpcodeStr, "i\t$dst, $imm goto $BrDst"),
+              [(BPFbrcc i64:$dst, i64immSExt32:$imm, Cond, bb:$BrDst)]> {
+  bits<4> op;
+  bits<1> BPFSrc;
+  bits<4> dst;
+  bits<16> BrDst;
+  bits<32> imm;
+
+  let Inst{63-60} = op;
+  let Inst{59} = BPFSrc;
+  let Inst{51-48} = dst;
+  let Inst{47-32} = BrDst;
+  let Inst{31-0} = imm;
+
+  let op = Opc;
+  let BPFSrc = 0;
+  let BPFClass = 5; // BPF_JMP
+}
+
+multiclass J<bits<4> Opc, string OpcodeStr, PatLeaf Cond> {
+  def _rr : JMP_RR<Opc, OpcodeStr, Cond>;
+  def _ri : JMP_RI<Opc, OpcodeStr, Cond>;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot=0 in {
+// cmp+goto instructions
+defm JEQ  : J<0x1, "jeq",  BPF_CC_EQ>;
+defm JUGT : J<0x2, "jgt", BPF_CC_GTU>;
+defm JUGE : J<0x3, "jge", BPF_CC_GEU>;
+defm JNE  : J<0x5, "jne",  BPF_CC_NE>;
+defm JSGT : J<0x6, "jsgt", BPF_CC_GT>;
+defm JSGE : J<0x7, "jsge", BPF_CC_GE>;
+}
+
+// ALU instructions
+class ALU_RI<bits<4> Opc, string OpcodeStr, SDNode OpNode>
+    : InstBPF<(outs GPR:$dst), (ins GPR:$src2, i64imm:$imm),
+              !strconcat(OpcodeStr, "i\t$dst, $imm"),
+              [(set GPR:$dst, (OpNode GPR:$src2, i64immSExt32:$imm))]> {
+  bits<4> op;
+  bits<1> BPFSrc;
+  bits<4> dst;
+  bits<32> imm;
+
+  let Inst{63-60} = op;
+  let Inst{59} = BPFSrc;
+  let Inst{51-48} = dst;
+  let Inst{31-0} = imm;
+
+  let op = Opc;
+  let BPFSrc = 0;
+  let BPFClass = 7; // BPF_ALU64
+}
+
+class ALU_RR<bits<4> Opc, string OpcodeStr, SDNode OpNode>
+    : InstBPF<(outs GPR:$dst), (ins GPR:$src2, GPR:$src),
+              !strconcat(OpcodeStr, "\t$dst, $src"),
+              [(set GPR:$dst, (OpNode i64:$src2, i64:$src))]> {
+  bits<4> op;
+  bits<1> BPFSrc;
+  bits<4> dst;
+  bits<4> src;
+
+  let Inst{63-60} = op;
+  let Inst{59} = BPFSrc;
+  let Inst{55-52} = src;
+  let Inst{51-48} = dst;
+
+  let op = Opc;
+  let BPFSrc = 1;
+  let BPFClass = 7; // BPF_ALU64
+}
+
+multiclass ALU<bits<4> Opc, string OpcodeStr, SDNode OpNode> {
+  def _rr : ALU_RR<Opc, OpcodeStr, OpNode>;
+  def _ri : ALU_RI<Opc, OpcodeStr, OpNode>;
+}
+
+let Constraints = "$dst = $src2" in {
+let isAsCheapAsAMove = 1 in {
+  defm ADD : ALU<0x0, "add", add>;
+  defm SUB : ALU<0x1, "sub", sub>;
+  defm OR  : ALU<0x4, "or", or>;
+  defm AND : ALU<0x5, "and", and>;
+  defm SLL : ALU<0x6, "sll", shl>;
+  defm SRL : ALU<0x7, "srl", srl>;
+  defm XOR : ALU<0xa, "xor", xor>;
+  defm SRA : ALU<0xc, "sra", sra>;
+}
+  defm MUL : ALU<0x2, "mul", mul>;
+  defm DIV : ALU<0x3, "div", udiv>;
+}
+
+class MOV_RR<string OpcodeStr>
+    : InstBPF<(outs GPR:$dst), (ins GPR:$src),
+              !strconcat(OpcodeStr, "\t$dst, $src"),
+              []> {
+  bits<4> op;
+  bits<1> BPFSrc;
+  bits<4> dst;
+  bits<4> src;
+
+  let Inst{63-60} = op;
+  let Inst{59} = BPFSrc;
+  let Inst{55-52} = src;
+  let Inst{51-48} = dst;
+
+  let op = 0xb;     // BPF_MOV
+  let BPFSrc = 1;   // BPF_X
+  let BPFClass = 7; // BPF_ALU64
+}
+
+class MOV_RI<string OpcodeStr>
+    : InstBPF<(outs GPR:$dst), (ins i64imm:$imm),
+              !strconcat(OpcodeStr, "\t$dst, $imm"),
+              [(set GPR:$dst, (i64 i64immSExt32:$imm))]> {
+  bits<4> op;
+  bits<1> BPFSrc;
+  bits<4> dst;
+  bits<32> imm;
+
+  let Inst{63-60} = op;
+  let Inst{59} = BPFSrc;
+  let Inst{51-48} = dst;
+  let Inst{31-0} = imm;
+
+  let op = 0xb;     // BPF_MOV
+  let BPFSrc = 0;   // BPF_K
+  let BPFClass = 7; // BPF_ALU64
+}
+
+class LD_IMM64<bits<4> Pseudo, string OpcodeStr>
+    : InstBPF<(outs GPR:$dst), (ins u64imm:$imm),
+              !strconcat(OpcodeStr, "\t$dst, $imm"),
+              [(set GPR:$dst, (i64 imm:$imm))]> {
+
+  bits<3> mode;
+  bits<2> size;
+  bits<4> dst;
+  bits<64> imm;
+
+  let Inst{63-61} = mode;
+  let Inst{60-59} = size;
+  let Inst{51-48} = dst;
+  let Inst{55-52} = Pseudo;
+  let Inst{47-32} = 0;
+  let Inst{31-0} = imm{31-0};
+
+  let mode = 0;     // BPF_IMM
+  let size = 3;     // BPF_DW
+  let BPFClass = 0; // BPF_LD
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def LD_imm64 : LD_IMM64<0, "ld_64">;
+def MOV_rr : MOV_RR<"mov">;
+def MOV_ri : MOV_RI<"mov">;
+}
+
+def LD_pseudo
+    : InstBPF<(outs GPR:$dst), (ins i64imm:$pseudo, u64imm:$imm),
+              "ld_pseudo\t$dst, $pseudo, $imm",
+              [(set GPR:$dst, (int_bpf_pseudo imm:$pseudo, imm:$imm))]> {
+
+  bits<3> mode;
+  bits<2> size;
+  bits<4> dst;
+  bits<64> imm;
+  bits<4> pseudo;
+
+  let Inst{63-61} = mode;
+  let Inst{60-59} = size;
+  let Inst{51-48} = dst;
+  let Inst{55-52} = pseudo;
+  let Inst{47-32} = 0;
+  let Inst{31-0} = imm{31-0};
+
+  let mode = 0;     // BPF_IMM
+  let size = 3;     // BPF_DW
+  let BPFClass = 0; // BPF_LD
+}
+
+// STORE instructions
+class STORE<bits<2> SizeOp, string OpcodeStr, list<dag> Pattern>
+    : InstBPF<(outs), (ins GPR:$src, MEMri:$addr),
+              !strconcat(OpcodeStr, "\t$addr, $src"), Pattern> {
+  bits<3> mode;
+  bits<2> size;
+  bits<4> src;
+  bits<20> addr;
+
+  let Inst{63-61} = mode;
+  let Inst{60-59} = size;
+  let Inst{51-48} = addr{19-16}; // base reg
+  let Inst{55-52} = src;
+  let Inst{47-32} = addr{15-0}; // offset
+
+  let mode = 3;     // BPF_MEM
+  let size = SizeOp;
+  let BPFClass = 3; // BPF_STX
+}
+
+class STOREi64<bits<2> Opc, string OpcodeStr, PatFrag OpNode>
+    : STORE<Opc, OpcodeStr, [(OpNode i64:$src, ADDRri:$addr)]>;
+
+def STW : STOREi64<0x0, "stw", truncstorei32>;
+def STH : STOREi64<0x1, "sth", truncstorei16>;
+def STB : STOREi64<0x2, "stb", truncstorei8>;
+def STD : STOREi64<0x3, "std", store>;
+
+// LOAD instructions
+class LOAD<bits<2> SizeOp, string OpcodeStr, list<dag> Pattern>
+    : InstBPF<(outs GPR:$dst), (ins MEMri:$addr),
+              !strconcat(OpcodeStr, "\t$dst, $addr"), Pattern> {
+  bits<3> mode;
+  bits<2> size;
+  bits<4> dst;
+  bits<20> addr;
+
+  let Inst{63-61} = mode;
+  let Inst{60-59} = size;
+  let Inst{51-48} = dst;
+  let Inst{55-52} = addr{19-16};
+  let Inst{47-32} = addr{15-0};
+
+  let mode = 3;     // BPF_MEM
+  let size = SizeOp;
+  let BPFClass = 1; // BPF_LDX
+}
+
+class LOADi64<bits<2> SizeOp, string OpcodeStr, PatFrag OpNode>
+    : LOAD<SizeOp, OpcodeStr, [(set i64:$dst, (OpNode ADDRri:$addr))]>;
+
+def LDW : LOADi64<0x0, "ldw", zextloadi32>;
+def LDH : LOADi64<0x1, "ldh", zextloadi16>;
+def LDB : LOADi64<0x2, "ldb", zextloadi8>;
+def LDD : LOADi64<0x3, "ldd", load>;
+
+class BRANCH<bits<4> Opc, string OpcodeStr, list<dag> Pattern>
+    : InstBPF<(outs), (ins brtarget:$BrDst),
+              !strconcat(OpcodeStr, "\t$BrDst"), Pattern> {
+  bits<4> op;
+  bits<16> BrDst;
+  bits<1> BPFSrc;
+
+  let Inst{63-60} = op;
+  let Inst{59} = BPFSrc;
+  let Inst{47-32} = BrDst;
+
+  let op = Opc;
+  let BPFSrc = 0;
+  let BPFClass = 5; // BPF_JMP
+}
+
+class CALL<string OpcodeStr>
+    : InstBPF<(outs), (ins calltarget:$BrDst),
+              !strconcat(OpcodeStr, "\t$BrDst"), []> {
+  bits<4> op;
+  bits<32> BrDst;
+  bits<1> BPFSrc;
+
+  let Inst{63-60} = op;
+  let Inst{59} = BPFSrc;
+  let Inst{31-0} = BrDst;
+
+  let op = 8;       // BPF_CALL
+  let BPFSrc = 0;
+  let BPFClass = 5; // BPF_JMP
+}
+
+// Jump always
+let isBranch = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1 in {
+  def JMP : BRANCH<0x0, "jmp", [(br bb:$BrDst)]>;
+}
+
+// Jump and link
+let isCall=1, hasDelaySlot=0, Uses = [R11],
+    // Potentially clobbered registers
+    Defs = [R0, R1, R2, R3, R4, R5] in {
+  def JAL  : CALL<"call">;
+}
+
+class NOP_I<string OpcodeStr>
+    : InstBPF<(outs), (ins i32imm:$imm),
+              !strconcat(OpcodeStr, "\t$imm"), []> {
+  // mov r0, r0 == nop
+  bits<4> op;
+  bits<1> BPFSrc;
+  bits<4> dst;
+  bits<4> src;
+
+  let Inst{63-60} = op;
+  let Inst{59} = BPFSrc;
+  let Inst{55-52} = src;
+  let Inst{51-48} = dst;
+
+  let op = 0xb;     // BPF_MOV
+  let BPFSrc = 1;   // BPF_X
+  let BPFClass = 7; // BPF_ALU64
+  let src = 0;      // R0
+  let dst = 0;      // R0
+}
+
+let hasSideEffects = 0 in
+  def NOP : NOP_I<"nop">;
+
+class RET<string OpcodeStr>
+    : InstBPF<(outs), (ins),
+              !strconcat(OpcodeStr, ""), [(BPFretflag)]> {
+  bits<4> op;
+
+  let Inst{63-60} = op;
+  let Inst{59} = 0;
+  let Inst{31-0} = 0;
+
+  let op = 9;       // BPF_EXIT
+  let BPFClass = 5; // BPF_JMP
+}
+
+let isReturn = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1,
+    isNotDuplicable = 1 in {
+  def RET : RET<"ret">;
+}
+
+// ADJCALLSTACKDOWN/UP pseudo insns
+let Defs = [R11], Uses = [R11] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
+                              "#ADJCALLSTACKDOWN $amt",
+                              [(BPFcallseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+                              "#ADJCALLSTACKUP $amt1 $amt2",
+                              [(BPFcallseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let usesCustomInserter = 1 in {
+  def Select : Pseudo<(outs GPR:$dst),
+                      (ins GPR:$lhs, GPR:$rhs, i64imm:$imm, GPR:$src, GPR:$src2),
+                      "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2",
+                      [(set i64:$dst,
+                       (BPFselectcc i64:$lhs, i64:$rhs, (i64 imm:$imm), i64:$src, i64:$src2))]>;
+}
+
+// load 64-bit global addr into register
+def : Pat<(BPFWrapper tglobaladdr:$in), (LD_imm64 tglobaladdr:$in)>;
+
+// 0xffffFFFF doesn't fit into simm32, optimize common case
+def : Pat<(i64 (and (i64 GPR:$src), 0xffffFFFF)),
+          (SRL_ri (SLL_ri (i64 GPR:$src), 32), 32)>;
+
+// Calls
+def : Pat<(BPFcall tglobaladdr:$dst), (JAL tglobaladdr:$dst)>;
+def : Pat<(BPFcall imm:$dst), (JAL imm:$dst)>;
+
+// Loads
+def : Pat<(extloadi8  ADDRri:$src), (i64 (LDB ADDRri:$src))>;
+def : Pat<(extloadi16 ADDRri:$src), (i64 (LDH ADDRri:$src))>;
+def : Pat<(extloadi32 ADDRri:$src), (i64 (LDW ADDRri:$src))>;
+
+// Atomics
+class XADD<bits<2> SizeOp, string OpcodeStr, PatFrag OpNode>
+    : InstBPF<(outs GPR:$dst), (ins MEMri:$addr, GPR:$val),
+              !strconcat(OpcodeStr, "\t$dst, $addr, $val"),
+              [(set GPR:$dst, (OpNode ADDRri:$addr, GPR:$val))]> {
+  bits<3> mode;
+  bits<2> size;
+  bits<4> src;
+  bits<20> addr;
+
+  let Inst{63-61} = mode;
+  let Inst{60-59} = size;
+  let Inst{51-48} = addr{19-16}; // base reg
+  let Inst{55-52} = src;
+  let Inst{47-32} = addr{15-0}; // offset
+
+  let mode = 6;     // BPF_XADD
+  let size = SizeOp;
+  let BPFClass = 3; // BPF_STX
+}
+
+let Constraints = "$dst = $val" in {
+def XADD32 : XADD<0, "xadd32", atomic_load_add_32>;
+def XADD64 : XADD<3, "xadd64", atomic_load_add_64>;
+// undefined def XADD16 : XADD<1, "xadd16", atomic_load_add_16>;
+// undefined def XADD8  : XADD<2, "xadd8", atomic_load_add_8>;
+}
+
+// bswap16, bswap32, bswap64
+class BSWAP<bits<32> SizeOp, string OpcodeStr, list<dag> Pattern>
+    : InstBPF<(outs GPR:$dst), (ins GPR:$src),
+              !strconcat(OpcodeStr, "\t$dst"),
+              Pattern> {
+  bits<4> op;
+  bits<1> BPFSrc;
+  bits<4> dst;
+  bits<32> imm;
+
+  let Inst{63-60} = op;
+  let Inst{59} = BPFSrc;
+  let Inst{51-48} = dst;
+  let Inst{31-0} = imm;
+
+  let op = 0xd;     // BPF_END
+  let BPFSrc = 1;   // BPF_TO_BE (TODO: use BPF_TO_LE for big-endian target)
+  let BPFClass = 4; // BPF_ALU
+  let imm = SizeOp;
+}
+
+let Constraints = "$dst = $src" in {
+def BSWAP16 : BSWAP<16, "bswap16", [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
+def BSWAP32 : BSWAP<32, "bswap32", [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
+def BSWAP64 : BSWAP<64, "bswap64", [(set GPR:$dst, (bswap GPR:$src))]>;
+}
+
+let Defs = [R0, R1, R2, R3, R4, R5], Uses = [R6], hasSideEffects = 1,
+    hasExtraDefRegAllocReq = 1, hasExtraSrcRegAllocReq = 1, mayLoad = 1 in {
+class LOAD_ABS<bits<2> SizeOp, string OpcodeStr, Intrinsic OpNode>
+    : InstBPF<(outs), (ins GPR:$skb, i64imm:$imm),
+              !strconcat(OpcodeStr, "\tr0, $skb.data + $imm"),
+              [(set R0, (OpNode GPR:$skb, i64immSExt32:$imm))]> {
+  bits<3> mode;
+  bits<2> size;
+  bits<32> imm;
+
+  let Inst{63-61} = mode;
+  let Inst{60-59} = size;
+  let Inst{31-0} = imm;
+
+  let mode = 1;     // BPF_ABS
+  let size = SizeOp;
+  let BPFClass = 0; // BPF_LD
+}
+
+class LOAD_IND<bits<2> SizeOp, string OpcodeStr, Intrinsic OpNode>
+    : InstBPF<(outs), (ins GPR:$skb, GPR:$val),
+              !strconcat(OpcodeStr, "\tr0, $skb.data + $val"),
+              [(set R0, (OpNode GPR:$skb, GPR:$val))]> {
+  bits<3> mode;
+  bits<2> size;
+  bits<4> val;
+
+  let Inst{63-61} = mode;
+  let Inst{60-59} = size;
+  let Inst{55-52} = val;
+
+  let mode = 2;     // BPF_IND
+  let size = SizeOp;
+  let BPFClass = 0; // BPF_LD
+}
+}
+
+def LD_ABS_B : LOAD_ABS<2, "ldabs_b", int_bpf_load_byte>;
+def LD_ABS_H : LOAD_ABS<1, "ldabs_h", int_bpf_load_half>;
+def LD_ABS_W : LOAD_ABS<0, "ldabs_w", int_bpf_load_word>;
+
+def LD_IND_B : LOAD_IND<2, "ldind_b", int_bpf_load_byte>;
+def LD_IND_H : LOAD_IND<1, "ldind_h", int_bpf_load_half>;
+def LD_IND_W : LOAD_IND<0, "ldind_w", int_bpf_load_word>;
diff --git a/contrib/llvm/lib/Target/BPF/BPFMCInstLower.cpp b/contrib/llvm/lib/Target/BPF/BPFMCInstLower.cpp
new file mode 100644
index 0000000..d608afb
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFMCInstLower.cpp
@@ -0,0 +1,77 @@
+//=-- BPFMCInstLower.cpp - Convert BPF MachineInstr to an MCInst ------------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower BPF MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPFMCInstLower.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+MCSymbol *
+BPFMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
+  return Printer.getSymbol(MO.getGlobal());
+}
+
+MCOperand BPFMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+                                             MCSymbol *Sym) const {
+
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+  if (!MO.isJTI() && MO.getOffset())
+    llvm_unreachable("unknown symbol op");
+
+  return MCOperand::createExpr(Expr);
+}
+
+void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit())
+        continue;
+      MCOp = MCOperand::createReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::createImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::createExpr(
+          MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx));
+      break;
+    case MachineOperand::MO_RegisterMask:
+      continue;
+    case MachineOperand::MO_GlobalAddress:
+      MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+      break;
+    }
+
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPFMCInstLower.h b/contrib/llvm/lib/Target/BPF/BPFMCInstLower.h
new file mode 100644
index 0000000..054e894
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFMCInstLower.h
@@ -0,0 +1,43 @@
+//===-- BPFMCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BPFMCINSTLOWER_H
+#define LLVM_LIB_TARGET_BPF_BPFMCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class AsmPrinter;
+class MCContext;
+class MCInst;
+class MCOperand;
+class MCSymbol;
+class MachineInstr;
+class MachineModuleInfoMachO;
+class MachineOperand;
+class Mangler;
+
+// BPFMCInstLower - This class is used to lower an MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY BPFMCInstLower {
+  MCContext &Ctx;
+
+  AsmPrinter &Printer;
+
+public:
+  BPFMCInstLower(MCContext &ctx, AsmPrinter &printer)
+      : Ctx(ctx), Printer(printer) {}
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+  MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
new file mode 100644
index 0000000..8f885c3
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -0,0 +1,88 @@
+//===-- BPFRegisterInfo.cpp - BPF Register Information ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the BPF implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFRegisterInfo.h"
+#include "BPFSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "BPFGenRegisterInfo.inc"
+using namespace llvm;
+
+BPFRegisterInfo::BPFRegisterInfo()
+    : BPFGenRegisterInfo(BPF::R0) {}
+
+const MCPhysReg *
+BPFRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  return CSR_SaveList;
+}
+
+BitVector BPFRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(BPF::R10); // R10 is read only frame pointer
+  Reserved.set(BPF::R11); // R11 is pseudo stack pointer
+  return Reserved;
+}
+
+void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                          int SPAdj, unsigned FIOperandNum,
+                                          RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unexpected");
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned FrameReg = getFrameRegister(MF);
+  int FrameIndex = MI.getOperand(i).getIndex();
+
+  if (MI.getOpcode() == BPF::MOV_rr) {
+    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+    int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+    MI.getOperand(i).ChangeToRegister(FrameReg, false);
+
+    MachineBasicBlock &MBB = *MI.getParent();
+    unsigned reg = MI.getOperand(i - 1).getReg();
+    BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg)
+        .addReg(reg)
+        .addImm(Offset);
+    return;
+  }
+
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+               MI.getOperand(i + 1).getImm();
+
+  if (!isInt<32>(Offset))
+    llvm_unreachable("bug in frame offset");
+
+  MI.getOperand(i).ChangeToRegister(FrameReg, false);
+  MI.getOperand(i + 1).ChangeToImmediate(Offset);
+}
+
+unsigned BPFRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  return BPF::R10;
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
new file mode 100644
index 0000000..7072dd0
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
@@ -0,0 +1,40 @@
+//===-- BPFRegisterInfo.h - BPF Register Information Impl -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the BPF implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BPFREGISTERINFO_H
+#define LLVM_LIB_TARGET_BPF_BPFREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "BPFGenRegisterInfo.inc"
+
+namespace llvm {
+
+struct BPFRegisterInfo : public BPFGenRegisterInfo {
+
+  BPFRegisterInfo();
+
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const override;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+                           unsigned FIOperandNum,
+                           RegScavenger *RS = nullptr) const override;
+
+  unsigned getFrameRegister(const MachineFunction &MF) const override;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.td b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.td
new file mode 100644
index 0000000..c8e24f8
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.td
@@ -0,0 +1,41 @@
+//===-- BPFRegisterInfo.td - BPF Register defs -------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the BPF register file
+//===----------------------------------------------------------------------===//
+
+// Registers are identified with 4-bit ID numbers.
+// Ri - 64-bit integer registers
+class Ri<bits<16> Enc, string n> : Register<n> {
+  let Namespace = "BPF";
+  let HWEncoding = Enc;
+}
+
+// Integer registers
+def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
+def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+
+// Register classes.
+def GPR : RegisterClass<"BPF", [i64], 64, (add R1, R2, R3, R4, R5,
+                                           R6, R7, R8, R9, // callee saved
+                                           R0,  // return value
+                                           R11, // stack ptr
+                                           R10  // frame ptr
+                                          )>;
diff --git a/contrib/llvm/lib/Target/BPF/BPFSubtarget.cpp b/contrib/llvm/lib/Target/BPF/BPFSubtarget.cpp
new file mode 100644
index 0000000..7f7a262
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -0,0 +1,31 @@
+//===-- BPFSubtarget.cpp - BPF Subtarget Information ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BPF specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPFSubtarget.h"
+#include "BPF.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bpf-subtarget"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "BPFGenSubtargetInfo.inc"
+
+void BPFSubtarget::anchor() {}
+
+BPFSubtarget::BPFSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS, const TargetMachine &TM)
+    : BPFGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(*this),
+      TLInfo(TM, *this), TSInfo(TM.getDataLayout()) {}
diff --git a/contrib/llvm/lib/Target/BPF/BPFSubtarget.h b/contrib/llvm/lib/Target/BPF/BPFSubtarget.h
new file mode 100644
index 0000000..347cffd
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFSubtarget.h
@@ -0,0 +1,64 @@
+//===-- BPFSubtarget.h - Define Subtarget for the BPF -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the BPF specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BPFSUBTARGET_H
+#define LLVM_LIB_TARGET_BPF_BPFSUBTARGET_H
+
+#include "BPFFrameLowering.h"
+#include "BPFISelLowering.h"
+#include "BPFInstrInfo.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "BPFGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class BPFSubtarget : public BPFGenSubtargetInfo {
+  virtual void anchor();
+  BPFInstrInfo InstrInfo;
+  BPFFrameLowering FrameLowering;
+  BPFTargetLowering TLInfo;
+  TargetSelectionDAGInfo TSInfo;
+
+public:
+  // This constructor initializes the data members to match that
+  // of the specified triple.
+  BPFSubtarget(const std::string &TT, const std::string &CPU,
+               const std::string &FS, const TargetMachine &TM);
+
+  // ParseSubtargetFeatures - Parses features string setting specified
+  // subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  const BPFInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+  const BPFFrameLowering *getFrameLowering() const override {
+    return &FrameLowering;
+  }
+  const BPFTargetLowering *getTargetLowering() const override {
+    return &TLInfo;
+  }
+  const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
+    return &TSInfo;
+  }
+  const TargetRegisterInfo *getRegisterInfo() const override {
+    return &InstrInfo.getRegisterInfo();
+  }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
new file mode 100644
index 0000000..9487427
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -0,0 +1,69 @@
+//===-- BPFTargetMachine.cpp - Define TargetMachine for BPF ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about BPF target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeBPFTarget() {
+  // Register the target.
+  RegisterTargetMachine<BPFTargetMachine> X(TheBPFTarget);
+}
+
+// DataLayout --> Little-endian, 64-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer.
+BPFTargetMachine::BPFTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+                                   StringRef FS, const TargetOptions &Options,
+                                   Reloc::Model RM, CodeModel::Model CM,
+                                   CodeGenOpt::Level OL)
+    : LLVMTargetMachine(T, "e-m:e-p:64:64-i64:64-n32:64-S128", TT, CPU, FS,
+                        Options, RM, CM, OL),
+      TLOF(make_unique<TargetLoweringObjectFileELF>()),
+      Subtarget(TT, CPU, FS, *this) {
+  initAsmInfo();
+}
+namespace {
+// BPF Code Generator Pass Configuration Options.
+class BPFPassConfig : public TargetPassConfig {
+public:
+  BPFPassConfig(BPFTargetMachine *TM, PassManagerBase &PM)
+      : TargetPassConfig(TM, PM) {}
+
+  BPFTargetMachine &getBPFTargetMachine() const {
+    return getTM<BPFTargetMachine>();
+  }
+
+  bool addInstSelector() override;
+};
+}
+
+TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new BPFPassConfig(this, PM);
+}
+
+// Install an instruction selector pass using
+// the ISelDag to gen BPF code.
+bool BPFPassConfig::addInstSelector() {
+  addPass(createBPFISelDag(getBPFTargetMachine()));
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.h b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.h
new file mode 100644
index 0000000..6aeafb9
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.h
@@ -0,0 +1,43 @@
+//===-- BPFTargetMachine.h - Define TargetMachine for BPF --- C++ ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the BPF specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BPFTARGETMACHINE_H
+#define LLVM_LIB_TARGET_BPF_BPFTARGETMACHINE_H
+
+#include "BPFSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class BPFTargetMachine : public LLVMTargetMachine {
+  std::unique_ptr<TargetLoweringObjectFile> TLOF;
+  BPFSubtarget Subtarget;
+
+public:
+  BPFTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+                   const TargetOptions &Options, Reloc::Model RM,
+                   CodeModel::Model CM, CodeGenOpt::Level OL);
+
+  const BPFSubtarget *getSubtargetImpl() const { return &Subtarget; }
+  const BPFSubtarget *getSubtargetImpl(const Function &) const override {
+    return &Subtarget;
+  }
+
+  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+  TargetLoweringObjectFile *getObjFileLowering() const override {
+    return TLOF.get();
+  }
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
new file mode 100644
index 0000000..552288b
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
@@ -0,0 +1,88 @@
+//===-- BPFInstPrinter.cpp - Convert BPF MCInst to asm syntax -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an BPF MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFInstPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// Include the auto-generated portion of the assembly writer.
+#include "BPFGenAsmWriter.inc"
+
+void BPFInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                               StringRef Annot, const MCSubtargetInfo &STI) {
+  printInstruction(MI, O);
+  printAnnotation(O, Annot);
+}
+
+static void printExpr(const MCExpr *Expr, raw_ostream &O) {
+#ifndef NDEBUG
+  const MCSymbolRefExpr *SRE;
+
+  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr))
+    SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
+  else
+    SRE = dyn_cast<MCSymbolRefExpr>(Expr);
+  assert(SRE && "Unexpected MCExpr type.");
+
+  MCSymbolRefExpr::VariantKind Kind = SRE->getKind();
+
+  assert(Kind == MCSymbolRefExpr::VK_None);
+#endif
+  O << *Expr;
+}
+
+void BPFInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O, const char *Modifier) {
+  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << getRegisterName(Op.getReg());
+  } else if (Op.isImm()) {
+    O << (int32_t)Op.getImm();
+  } else {
+    assert(Op.isExpr() && "Expected an expression");
+    printExpr(Op.getExpr(), O);
+  }
+}
+
+void BPFInstPrinter::printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O,
+                                     const char *Modifier) {
+  const MCOperand &RegOp = MI->getOperand(OpNo);
+  const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
+  // offset
+  if (OffsetOp.isImm())
+    O << formatDec(OffsetOp.getImm());
+  else
+    assert(0 && "Expected an immediate");
+
+  // register
+  assert(RegOp.isReg() && "Register operand not a register");
+  O << '(' << getRegisterName(RegOp.getReg()) << ')';
+}
+
+void BPFInstPrinter::printImm64Operand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm())
+    O << (uint64_t)Op.getImm();
+  else
+    O << Op;
+}
diff --git a/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
new file mode 100644
index 0000000..adcaff6
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
@@ -0,0 +1,42 @@
+//===-- BPFInstPrinter.h - Convert BPF MCInst to asm syntax -------*- C++ -*--//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a BPF MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_INSTPRINTER_BPFINSTPRINTER_H
+#define LLVM_LIB_TARGET_BPF_INSTPRINTER_BPFINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+class MCOperand;
+
+class BPFInstPrinter : public MCInstPrinter {
+public:
+  BPFInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                 const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
+
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                    const char *Modifier = nullptr);
+  void printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O,
+                       const char *Modifier = nullptr);
+  void printImm64Operand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
new file mode 100644
index 0000000..48f34e4
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -0,0 +1,85 @@
+//===-- BPFAsmBackend.cpp - BPF Assembler Backend -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class BPFAsmBackend : public MCAsmBackend {
+public:
+  BPFAsmBackend() : MCAsmBackend() {}
+  ~BPFAsmBackend() override {}
+
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value, bool IsPCRel) const override;
+
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
+
+  // No instruction requires relaxation
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const override {
+    return false;
+  }
+
+  unsigned getNumFixupKinds() const override { return 1; }
+
+  bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
+
+  void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {}
+
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+};
+
+bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+  if ((Count % 8) != 0)
+    return false;
+
+  for (uint64_t i = 0; i < Count; i += 8)
+    OW->Write64(0x15000000);
+
+  return true;
+}
+
+void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+                               unsigned DataSize, uint64_t Value,
+                               bool IsPCRel) const {
+
+  if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
+    assert(Value == 0);
+    return;
+  }
+  assert(Fixup.getKind() == FK_PCRel_2);
+  Value = (uint16_t)((Value - 8) / 8);
+  Data[Fixup.getOffset() + 2] = Value & 0xFF;
+  Data[Fixup.getOffset() + 3] = Value >> 8;
+}
+
+MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
+  return createBPFELFObjectWriter(OS, 0);
+}
+}
+
+MCAsmBackend *llvm::createBPFAsmBackend(const Target &T,
+                                        const MCRegisterInfo &MRI, StringRef TT,
+                                        StringRef CPU) {
+  return new BPFAsmBackend();
+}
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
new file mode 100644
index 0000000..a5562c1
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -0,0 +1,53 @@
+//===-- BPFELFObjectWriter.cpp - BPF ELF Writer ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class BPFELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  BPFELFObjectWriter(uint8_t OSABI);
+
+  ~BPFELFObjectWriter() override;
+
+protected:
+  unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                        bool IsPCRel) const override;
+};
+}
+
+BPFELFObjectWriter::BPFELFObjectWriter(uint8_t OSABI)
+    : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_NONE,
+                              /*HasRelocationAddend*/ false) {}
+
+BPFELFObjectWriter::~BPFELFObjectWriter() {}
+
+unsigned BPFELFObjectWriter::GetRelocType(const MCValue &Target,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel) const {
+  // determine the type of the relocation
+  switch ((unsigned)Fixup.getKind()) {
+  default:
+    llvm_unreachable("invalid fixup kind!");
+  case FK_SecRel_8:
+    return ELF::R_X86_64_64;
+  case FK_SecRel_4:
+    return ELF::R_X86_64_PC32;
+  }
+}
+
+MCObjectWriter *llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) {
+  MCELFObjectTargetWriter *MOTW = new BPFELFObjectWriter(OSABI);
+  return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
new file mode 100644
index 0000000..ab61ae7
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -0,0 +1,36 @@
+//===-- BPFMCAsmInfo.h - BPF asm properties -------------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the BPFMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCASMINFO_H
+#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+class Target;
+
+class BPFMCAsmInfo : public MCAsmInfo {
+public:
+  explicit BPFMCAsmInfo(StringRef TT) {
+    PrivateGlobalPrefix = ".L";
+    WeakRefDirective = "\t.weak\t";
+
+    UsesELFSectionDirectiveForBSS = true;
+    HasSingleParameterDotFile = false;
+    HasDotTypeDotSizeDirective = false;
+  }
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
new file mode 100644
index 0000000..ba8a874
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -0,0 +1,165 @@
+//===-- BPFMCCodeEmitter.cpp - Convert BPF code to machine code -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BPFMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "mccodeemitter"
+
+namespace {
+class BPFMCCodeEmitter : public MCCodeEmitter {
+  BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete;
+  void operator=(const BPFMCCodeEmitter &) = delete;
+  const MCRegisterInfo &MRI;
+
+public:
+  BPFMCCodeEmitter(const MCRegisterInfo &mri) : MRI(mri) {}
+
+  ~BPFMCCodeEmitter() {}
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
+
+  // getMachineOpValue - Return binary encoding of operand. If the machin
+  // operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+
+  uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op,
+                            SmallVectorImpl<MCFixup> &Fixups,
+                            const MCSubtargetInfo &STI) const;
+
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups,
+                         const MCSubtargetInfo &STI) const override;
+};
+}
+
+MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,
+                                            const MCRegisterInfo &MRI,
+                                            MCContext &Ctx) {
+  return new BPFMCCodeEmitter(MRI);
+}
+
+unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                             const MCOperand &MO,
+                                             SmallVectorImpl<MCFixup> &Fixups,
+                                             const MCSubtargetInfo &STI) const {
+  if (MO.isReg())
+    return MRI.getEncodingValue(MO.getReg());
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+
+  assert(MO.isExpr());
+
+  const MCExpr *Expr = MO.getExpr();
+
+  assert(Expr->getKind() == MCExpr::SymbolRef);
+
+  if (MI.getOpcode() == BPF::JAL)
+    // func call name
+    Fixups.push_back(MCFixup::create(0, Expr, FK_SecRel_4));
+  else if (MI.getOpcode() == BPF::LD_imm64)
+    Fixups.push_back(MCFixup::create(0, Expr, FK_SecRel_8));
+  else
+    // bb label
+    Fixups.push_back(MCFixup::create(0, Expr, FK_PCRel_2));
+
+  return 0;
+}
+
+// Emit one byte through output stream
+void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) {
+  OS << (char)C;
+  ++CurByte;
+}
+
+// Emit a series of bytes (little endian)
+void EmitLEConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+                    raw_ostream &OS) {
+  assert(Size <= 8 && "size too big in emit constant");
+
+  for (unsigned i = 0; i != Size; ++i) {
+    EmitByte(Val & 255, CurByte, OS);
+    Val >>= 8;
+  }
+}
+
+// Emit a series of bytes (big endian)
+void EmitBEConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+                    raw_ostream &OS) {
+  assert(Size <= 8 && "size too big in emit constant");
+
+  for (int i = (Size - 1) * 8; i >= 0; i -= 8)
+    EmitByte((Val >> i) & 255, CurByte, OS);
+}
+
+void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                         SmallVectorImpl<MCFixup> &Fixups,
+                                         const MCSubtargetInfo &STI) const {
+  unsigned Opcode = MI.getOpcode();
+  // Keep track of the current byte being emitted
+  unsigned CurByte = 0;
+
+  if (Opcode == BPF::LD_imm64 || Opcode == BPF::LD_pseudo) {
+    uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);
+    EmitByte(Value >> 56, CurByte, OS);
+    EmitByte(((Value >> 48) & 0xff), CurByte, OS);
+    EmitLEConstant(0, 2, CurByte, OS);
+    EmitLEConstant(Value & 0xffffFFFF, 4, CurByte, OS);
+
+    const MCOperand &MO = MI.getOperand(1);
+    uint64_t Imm = MO.isImm() ? MO.getImm() : 0;
+    EmitByte(0, CurByte, OS);
+    EmitByte(0, CurByte, OS);
+    EmitLEConstant(0, 2, CurByte, OS);
+    EmitLEConstant(Imm >> 32, 4, CurByte, OS);
+  } else {
+    // Get instruction encoding and emit it
+    uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);
+    EmitByte(Value >> 56, CurByte, OS);
+    EmitByte((Value >> 48) & 0xff, CurByte, OS);
+    EmitLEConstant((Value >> 32) & 0xffff, 2, CurByte, OS);
+    EmitLEConstant(Value & 0xffffFFFF, 4, CurByte, OS);
+  }
+}
+
+// Encode BPF Memory Operand
+uint64_t BPFMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
+  uint64_t Encoding;
+  const MCOperand Op1 = MI.getOperand(1);
+  assert(Op1.isReg() && "First operand is not register.");
+  Encoding = MRI.getEncodingValue(Op1.getReg());
+  Encoding <<= 16;
+  MCOperand Op2 = MI.getOperand(2);
+  assert(Op2.isImm() && "Second operand is not immediate.");
+  Encoding |= Op2.getImm() & 0xffff;
+  return Encoding;
+}
+
+#include "BPFGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
new file mode 100644
index 0000000..c4cf4b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -0,0 +1,110 @@
+//===-- BPFMCTargetDesc.cpp - BPF Target Descriptions ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides BPF specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFMCTargetDesc.h"
+#include "BPFMCAsmInfo.h"
+#include "InstPrinter/BPFInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "BPFGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "BPFGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "BPFGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createBPFMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitBPFMCInstrInfo(X);
+  return X;
+}
+
+static MCRegisterInfo *createBPFMCRegisterInfo(StringRef TT) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitBPFMCRegisterInfo(X, BPF::R11 /* RAReg doesn't exist */);
+  return X;
+}
+
+static MCSubtargetInfo *createBPFMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                 StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitBPFMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+static MCCodeGenInfo *createBPFMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                             CodeModel::Model CM,
+                                             CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+  X->initMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
+
+static MCStreamer *createBPFMCStreamer(const Triple &T,
+                                       MCContext &Ctx, MCAsmBackend &MAB,
+                                       raw_pwrite_stream &OS, MCCodeEmitter *Emitter,
+                                       bool RelaxAll) {
+  return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+}
+
+static MCInstPrinter *createBPFMCInstPrinter(const Triple &T,
+                                             unsigned SyntaxVariant,
+                                             const MCAsmInfo &MAI,
+                                             const MCInstrInfo &MII,
+                                             const MCRegisterInfo &MRI) {
+  if (SyntaxVariant == 0)
+    return new BPFInstPrinter(MAI, MII, MRI);
+  return 0;
+}
+
+extern "C" void LLVMInitializeBPFTargetMC() {
+  // Register the MC asm info.
+  RegisterMCAsmInfo<BPFMCAsmInfo> X(TheBPFTarget);
+
+  // Register the MC codegen info.
+  TargetRegistry::RegisterMCCodeGenInfo(TheBPFTarget, createBPFMCCodeGenInfo);
+
+  // Register the MC instruction info.
+  TargetRegistry::RegisterMCInstrInfo(TheBPFTarget, createBPFMCInstrInfo);
+
+  // Register the MC register info.
+  TargetRegistry::RegisterMCRegInfo(TheBPFTarget, createBPFMCRegisterInfo);
+
+  // Register the MC subtarget info.
+  TargetRegistry::RegisterMCSubtargetInfo(TheBPFTarget,
+                                          createBPFMCSubtargetInfo);
+
+  // Register the MC code emitter
+  TargetRegistry::RegisterMCCodeEmitter(TheBPFTarget,
+                                        llvm::createBPFMCCodeEmitter);
+
+  // Register the ASM Backend
+  TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFAsmBackend);
+
+  // Register the object streamer
+  TargetRegistry::RegisterELFStreamer(TheBPFTarget, createBPFMCStreamer);
+
+  // Register the MCInstPrinter.
+  TargetRegistry::RegisterMCInstPrinter(TheBPFTarget, createBPFMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
new file mode 100644
index 0000000..ce08b7c
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -0,0 +1,59 @@
+//===-- BPFMCTargetDesc.h - BPF Target Descriptions -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides BPF specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H
+#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+class raw_ostream;
+class raw_pwrite_stream;
+
+extern Target TheBPFTarget;
+
+MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII,
+                                      const MCRegisterInfo &MRI,
+                                      MCContext &Ctx);
+
+MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+                                  StringRef TT, StringRef CPU);
+
+MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
+}
+
+// Defines symbolic names for BPF registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "BPFGenRegisterInfo.inc"
+
+// Defines symbolic names for the BPF instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "BPFGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "BPFGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/DebugInfo/DIContext.cpp b/contrib/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
index 20bc01f..87716e6 100644
--- a/contrib/llvm/lib/DebugInfo/DIContext.cpp
+++ b/contrib/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- DIContext.cpp -----------------------------------------------------===//
+//===-- BPFTargetInfo.cpp - BPF Target Implementation ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DIContext.h"
-#include "llvm/DebugInfo/DWARFContext.h"
+#include "BPF.h"
+#include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-DIContext::~DIContext() {}
+Target llvm::TheBPFTarget;
 
-DIContext *DIContext::getDWARFContext(const object::ObjectFile &Obj) {
-  return new DWARFContextInMemory(Obj);
+extern "C" void LLVMInitializeBPFTargetInfo() {
+  RegisterTarget<Triple::bpf, /*HasJIT=*/true> X(TheBPFTarget, "bpf", "BPF");
 }
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
index f610fbb..f1a7127 100644
--- a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
+++ b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
@@ -15,6 +15,7 @@
 #include "CPPTargetMachine.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
@@ -22,12 +23,12 @@
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Pass.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
@@ -91,6 +92,7 @@ namespace {
   /// CppWriter - This class is the main chunk of code that converts an LLVM
   /// module to a C++ translation unit.
   class CppWriter : public ModulePass {
+    std::unique_ptr<formatted_raw_ostream> OutOwner;
     formatted_raw_ostream &Out;
     const Module *TheModule;
     uint64_t uniqueNum;
@@ -105,8 +107,9 @@ namespace {
 
   public:
     static char ID;
-    explicit CppWriter(formatted_raw_ostream &o) :
-      ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+    explicit CppWriter(std::unique_ptr<formatted_raw_ostream> o)
+        : ModulePass(ID), OutOwner(std::move(o)), Out(*OutOwner), uniqueNum(0),
+          is_inline(false), indent_level(0) {}
 
     const char *getPassName() const override { return "C++ backend"; }
 
@@ -1721,7 +1724,7 @@ void CppWriter::printFunctionUses(const Function* F) {
   // initializers.
   if (GenerationType != GenFunction) {
     nl(Out) << "// Global Variable Definitions"; nl(Out);
-    for (const auto &GV : gvs) {
+    for (auto *GV : gvs) {
       if (GlobalVariable *Var = dyn_cast<GlobalVariable>(GV))
         printVariableBody(Var);
     }
@@ -1942,7 +1945,6 @@ void CppWriter::printModuleBody() {
 void CppWriter::printProgram(const std::string& fname,
                              const std::string& mName) {
   Out << "#include <llvm/Pass.h>\n";
-  Out << "#include <llvm/PassManager.h>\n";
 
   Out << "#include <llvm/ADT/SmallVector.h>\n";
   Out << "#include <llvm/Analysis/Verifier.h>\n";
@@ -1956,6 +1958,7 @@ void CppWriter::printProgram(const std::string& fname,
   Out << "#include <llvm/IR/InlineAsm.h>\n";
   Out << "#include <llvm/IR/Instructions.h>\n";
   Out << "#include <llvm/IR/LLVMContext.h>\n";
+  Out << "#include <llvm/IR/LegacyPassManager.h>\n";
   Out << "#include <llvm/IR/Module.h>\n";
   Out << "#include <llvm/Support/FormattedStream.h>\n";
   Out << "#include <llvm/Support/MathExtras.h>\n";
@@ -1981,7 +1984,8 @@ void CppWriter::printModule(const std::string& fname,
   printEscapedString(mName);
   Out << "\", getGlobalContext());";
   if (!TheModule->getTargetTriple().empty()) {
-    nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
+    nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayoutStr()
+            << "\");";
   }
   if (!TheModule->getTargetTriple().empty()) {
     nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
@@ -2145,13 +2149,12 @@ char CppWriter::ID = 0;
 //                       External Interface declaration
 //===----------------------------------------------------------------------===//
 
-bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
-                                           formatted_raw_ostream &o,
-                                           CodeGenFileType FileType,
-                                           bool DisableVerify,
-                                           AnalysisID StartAfter,
-                                           AnalysisID StopAfter) {
-  if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
-  PM.add(new CppWriter(o));
+bool CPPTargetMachine::addPassesToEmitFile(
+    PassManagerBase &PM, raw_pwrite_stream &o, CodeGenFileType FileType,
+    bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) {
+  if (FileType != TargetMachine::CGFT_AssemblyFile)
+    return true;
+  auto FOut = llvm::make_unique<formatted_raw_ostream>(o);
+  PM.add(new CppWriter(std::move(FOut)));
   return false;
 }
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
index 4bae7f8..02d705e 100644
--- a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h
@@ -22,21 +22,14 @@ namespace llvm {
 
 class formatted_raw_ostream;
 
-class CPPSubtarget : public TargetSubtargetInfo {
-};
-
 struct CPPTargetMachine : public TargetMachine {
-  CPPTargetMachine(const Target &T, StringRef TT,
-                   StringRef CPU, StringRef FS, const TargetOptions &Options,
-                   Reloc::Model RM, CodeModel::Model CM,
-                   CodeGenOpt::Level OL)
-    : TargetMachine(T, TT, CPU, FS, Options), Subtarget() {}
-private:
-  CPPSubtarget Subtarget;
+  CPPTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+                   const TargetOptions &Options, Reloc::Model RM,
+                   CodeModel::Model CM, CodeGenOpt::Level OL)
+      : TargetMachine(T, "", TT, CPU, FS, Options) {}
 
 public:
-  const CPPSubtarget *getSubtargetImpl() const override { return &Subtarget; }
-  bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out,
+  bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
                            CodeGenFileType FileType, bool DisableVerify,
                            AnalysisID StartAfter,
                            AnalysisID StopAfter) override;
diff --git a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
index 096dc73..f88d822 100644
--- a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
@@ -20,8 +20,8 @@ static bool CppBackend_TripleMatchQuality(Triple::ArchType Arch) {
   return false;
 }
 
-extern "C" void LLVMInitializeCppBackendTargetInfo() { 
-  TargetRegistry::RegisterTarget(TheCppBackendTarget, "cpp",    
+extern "C" void LLVMInitializeCppBackendTargetInfo() {
+  TargetRegistry::RegisterTarget(TheCppBackendTarget, "cpp",
                                   "C++ backend",
                                   &CppBackend_TripleMatchQuality);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 44f9d93..a60d1e4 100644
--- a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/HexagonBaseInfo.h"
-#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler.h"
@@ -51,6 +51,8 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
   uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
   uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, void const *Decoder);
 
 static const uint16_t IntRegDecoderTable[] = {
   Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
@@ -67,7 +69,7 @@ Hexagon::P2, Hexagon::P3 };
 static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
   const uint16_t Table[], size_t Size) {
   if (RegNo < Size) {
-    Inst.addOperand(MCOperand::CreateReg(Table[RegNo]));
+    Inst.addOperand(MCOperand::createReg(Table[RegNo]));
     return MCDisassembler::Success;
   }
   else
@@ -81,7 +83,7 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
     return MCDisassembler::Fail;
 
   unsigned Register = IntRegDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return MCDisassembler::Success;
 }
 
@@ -101,7 +103,31 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
     return MCDisassembler::Fail;
 
   unsigned Register = CtrlRegDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t /*Address*/, void const *Decoder) {
+  static const uint16_t CtrlReg64DecoderTable[] = {
+    Hexagon::C1_0, Hexagon::NoRegister,
+    Hexagon::C3_2, Hexagon::NoRegister,
+    Hexagon::NoRegister, Hexagon::NoRegister,
+    Hexagon::C7_6, Hexagon::NoRegister,
+    Hexagon::C9_8, Hexagon::NoRegister,
+    Hexagon::C11_10, Hexagon::NoRegister,
+    Hexagon::CS, Hexagon::NoRegister,
+    Hexagon::UPC, Hexagon::NoRegister
+  };
+
+  if (RegNo >= sizeof(CtrlReg64DecoderTable) / sizeof(CtrlReg64DecoderTable[0]))
+    return MCDisassembler::Fail;
+
+  if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister)
+    return MCDisassembler::Fail;
+
+  unsigned Register = CtrlReg64DecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Register));
   return MCDisassembler::Success;
 }
 
@@ -118,7 +144,7 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
   default:
     return MCDisassembler::Fail;
   }
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return MCDisassembler::Success;
 }
 
@@ -143,7 +169,7 @@ static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
     return MCDisassembler::Fail;
 
   unsigned Register = PredRegDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
+  Inst.addOperand(MCOperand::createReg(Register));
   return MCDisassembler::Success;
 }
 
@@ -176,6 +202,6 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   // Remove parse bits.
   insn &= ~static_cast<uint32_t>(HexagonII::InstParseBits::INST_PARSE_MASK);
   DecodeStatus Result = decodeInstruction(DecoderTable32, MI, insn, Address, this, STI);
-  HexagonMCInst::AppendImplicitOperands(MI);
+  HexagonMCInstrInfo::AppendImplicitOperands(MI);
   return Result;
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
index 64ae69c..dfe79f9 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
@@ -21,26 +21,23 @@
 
 namespace llvm {
   class FunctionPass;
-  class ModulePass;
-  class TargetMachine;
-  class MachineInstr;
-  class HexagonMCInst;
   class HexagonAsmPrinter;
   class HexagonTargetMachine;
+  class MachineInstr;
+  class MCInst;
+  class ModulePass;
   class raw_ostream;
+  class TargetMachine;
 
   FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
                                      CodeGenOpt::Level OptLevel);
   FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM);
   FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM);
   FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
-  FunctionPass *createHexagonCFGOptimizer(const HexagonTargetMachine &TM);
+  FunctionPass *createHexagonCFGOptimizer();
 
-  FunctionPass *createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM);
-  FunctionPass *createHexagonSplitConst32AndConst64(
-                      const HexagonTargetMachine &TM);
-  FunctionPass *createHexagonExpandPredSpillCode(
-                      const HexagonTargetMachine &TM);
+  FunctionPass *createHexagonSplitConst32AndConst64();
+  FunctionPass *createHexagonExpandPredSpillCode();
   FunctionPass *createHexagonHardwareLoops();
   FunctionPass *createHexagonPeephole();
   FunctionPass *createHexagonFixupHwLoops();
@@ -58,7 +55,7 @@ namespace llvm {
   TargetAsmBackend *createHexagonAsmBackend(const Target &,
                                                   const std::string &);
 */
-  void HexagonLowerToMC(const MachineInstr *MI, HexagonMCInst &MCI,
+  void HexagonLowerToMC(MachineInstr const *MI, MCInst &MCI,
                         HexagonAsmPrinter &AP);
 } // end namespace llvm;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
index 2068c6d..53a687c 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@@ -21,31 +21,17 @@ include "llvm/Target/Target.td"
 // Hexagon Subtarget features.
 //===----------------------------------------------------------------------===//
 
-// Hexagon Archtectures
-def ArchV2       : SubtargetFeature<"v2", "HexagonArchVersion", "V2",
-                                    "Hexagon v2">;
-def ArchV3       : SubtargetFeature<"v3", "HexagonArchVersion", "V3",
-                                    "Hexagon v3">;
-def ArchV4       : SubtargetFeature<"v4", "HexagonArchVersion", "V4",
-                                    "Hexagon v4">;
-def ArchV5       : SubtargetFeature<"v5", "HexagonArchVersion", "V5",
-                                    "Hexagon v5">;
+// Hexagon Architectures
+def ArchV4:  SubtargetFeature<"v4",  "HexagonArchVersion", "V4",  "Hexagon V4">;
+def ArchV5:  SubtargetFeature<"v5",  "HexagonArchVersion", "V5",  "Hexagon V5">;
 
 //===----------------------------------------------------------------------===//
 // Hexagon Instruction Predicate Definitions.
 //===----------------------------------------------------------------------===//
-def HasV2T                      : Predicate<"Subtarget.hasV2TOps()">;
-def HasV2TOnly                  : Predicate<"Subtarget.hasV2TOpsOnly()">;
-def NoV2T                       : Predicate<"!Subtarget.hasV2TOps()">;
-def HasV3T                      : Predicate<"Subtarget.hasV3TOps()">;
-def HasV3TOnly                  : Predicate<"Subtarget.hasV3TOpsOnly()">;
-def NoV3T                       : Predicate<"!Subtarget.hasV3TOps()">;
-def HasV4T                      : Predicate<"Subtarget.hasV4TOps()">;
-def NoV4T                       : Predicate<"!Subtarget.hasV4TOps()">;
-def HasV5T                      : Predicate<"Subtarget.hasV5TOps()">;
-def NoV5T                       : Predicate<"!Subtarget.hasV5TOps()">;
-def UseMEMOP                    : Predicate<"Subtarget.useMemOps()">;
-def IEEERndNearV5T              : Predicate<"Subtarget.modeIEEERndNear()">;
+def HasV5T                      : Predicate<"HST->hasV5TOps()">;
+def NoV5T                       : Predicate<"!HST->hasV5TOps()">;
+def UseMEMOP                    : Predicate<"HST->useMemOps()">;
+def IEEERndNearV5T              : Predicate<"HST->modeIEEERndNear()">;
 
 //===----------------------------------------------------------------------===//
 // Classes used for relation maps.
@@ -182,14 +168,6 @@ def getRegForm : InstrMapping {
   let ValueCols = [["reg"]];
 }
 
-def getRegShlForm : InstrMapping {
-  let FilterClass = "ImmRegShl";
-  let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
-  let ColFields = ["InputType"];
-  let KeyCol = ["imm"];
-  let ValueCols = [["reg"]];
-}
-
 //===----------------------------------------------------------------------===//
 // Register File, Calling Conv, Instruction Descriptions
 //===----------------------------------------------------------------------===//
@@ -210,8 +188,10 @@ class Proc<string Name, SchedMachineModel Model,
            list<SubtargetFeature> Features>
  : ProcessorModel<Name, Model, Features>;
 
-def : Proc<"hexagonv4", HexagonModelV4, [ArchV2, ArchV3, ArchV4]>;
-def : Proc<"hexagonv5", HexagonModelV4, [ArchV2, ArchV3, ArchV4, ArchV5]>;
+def : Proc<"hexagonv4",  HexagonModelV4,
+           [ArchV4]>;
+def : Proc<"hexagonv5",  HexagonModelV4,
+           [ArchV4, ArchV5]>;
 
 //===----------------------------------------------------------------------===//
 // Declare the target which we are implementing
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 50f2eca..e9491ba 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -19,7 +19,7 @@
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
 #include "MCTargetDesc/HexagonInstPrinter.h"
-#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -61,6 +61,10 @@ static cl::opt<bool> AlignCalls(
          "hexagon-align-calls", cl::Hidden, cl::init(true),
           cl::desc("Insert falign after call instruction for Hexagon target"));
 
+HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM,
+                                     std::unique_ptr<MCStreamer> Streamer)
+    : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {}
+
 void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
                                     raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(OpNo);
@@ -195,44 +199,29 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     unsigned Size = BundleMIs.size();
     assert((Size + IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!");
     for (unsigned Index = 0; Index < Size; Index++) {
-      HexagonMCInst MCI;
+      MCInst MCI;
 
       HexagonLowerToMC(BundleMIs[Index], MCI, *this);
-      HexagonMCInst::AppendImplicitOperands(MCI);
-      MCI.setPacketBegin(Index == 0);
-      MCI.setPacketEnd(Index == (Size - 1));
-      EmitToStreamer(OutStreamer, MCI);
+      HexagonMCInstrInfo::AppendImplicitOperands(MCI);
+      HexagonMCInstrInfo::setPacketBegin(MCI, Index == 0);
+      HexagonMCInstrInfo::setPacketEnd(MCI, Index == (Size - 1));
+      EmitToStreamer(*OutStreamer, MCI);
     }
   }
   else {
-    HexagonMCInst MCI;
+    MCInst MCI;
     HexagonLowerToMC(MI, MCI, *this);
-    HexagonMCInst::AppendImplicitOperands(MCI);
+    HexagonMCInstrInfo::AppendImplicitOperands(MCI);
     if (MI->getOpcode() == Hexagon::ENDLOOP0) {
-      MCI.setPacketBegin(true);
-      MCI.setPacketEnd(true);
+      HexagonMCInstrInfo::setPacketBegin(MCI, true);
+      HexagonMCInstrInfo::setPacketEnd(MCI, true);
     }
-    EmitToStreamer(OutStreamer, MCI);
+    EmitToStreamer(*OutStreamer, MCI);
   }
 
   return;
 }
 
-static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
-                                                 unsigned SyntaxVariant,
-                                                 const MCAsmInfo &MAI,
-                                                 const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI) {
-  if (SyntaxVariant == 0)
-    return(new HexagonInstPrinter(MAI, MII, MRI));
-  else
-   return nullptr;
-}
-
 extern "C" void LLVMInitializeHexagonAsmPrinter() {
   RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
-
-  TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
-                                        createHexagonMCInstPrinter);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
index 5f4c162..792fc8b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -25,9 +25,12 @@ namespace llvm {
     const HexagonSubtarget *Subtarget;
 
   public:
-    explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer) {
-      Subtarget = &TM.getSubtarget<HexagonSubtarget>();
+    explicit HexagonAsmPrinter(TargetMachine &TM,
+                               std::unique_ptr<MCStreamer> Streamer);
+
+    bool runOnMachineFunction(MachineFunction &Fn) override {
+      Subtarget = &Fn.getSubtarget<HexagonSubtarget>();
+      return AsmPrinter::runOnMachineFunction(Fn);
     }
 
     const char *getPassName() const override {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 307adad..703e691 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -37,15 +37,11 @@ namespace {
 class HexagonCFGOptimizer : public MachineFunctionPass {
 
 private:
-  const HexagonTargetMachine& QTM;
-  const HexagonSubtarget &QST;
-
   void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*);
 
  public:
   static char ID;
-  HexagonCFGOptimizer(const HexagonTargetMachine& TM)
-    : MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
+  HexagonCFGOptimizer() : MachineFunctionPass(ID) {
     initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry());
   }
 
@@ -72,7 +68,8 @@ static bool IsUnconditionalJump(int Opc) {
 void
 HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
                                                MachineBasicBlock* NewTarget) {
-  const HexagonInstrInfo *QII = QTM.getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII =
+      MI->getParent()->getParent()->getSubtarget().getInstrInfo();
   int NewOpcode = 0;
   switch(MI->getOpcode()) {
   case Hexagon::J2_jumpt:
@@ -95,13 +92,12 @@ HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
     llvm_unreachable("Cannot handle this case");
   }
 
-  MI->setDesc(QII->get(NewOpcode));
+  MI->setDesc(TII->get(NewOpcode));
   MI->getOperand(1).setMBB(NewTarget);
 }
 
 
 bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
-
   // Loop over all of the basic blocks.
   for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
        MBBb != MBBe; ++MBBb) {
@@ -248,6 +244,6 @@ void llvm::initializeHexagonCFGOptimizerPass(PassRegistry &Registry) {
   CALL_ONCE_INITIALIZATION(initializePassOnce)
 }
 
-FunctionPass *llvm::createHexagonCFGOptimizer(const HexagonTargetMachine &TM) {
-  return new HexagonCFGOptimizer(TM);
+FunctionPass *llvm::createHexagonCFGOptimizer() {
+  return new HexagonCFGOptimizer();
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp
deleted file mode 100644
index 8d78409..0000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-//===-- llvm/CallingConvLower.cpp - Calling Convention lowering -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Hexagon_CCState class, used for lowering and
-// implementing calling conventions. Adapted from the machine independent
-// version of the class (CCState) but this handles calls to varargs functions
-//
-//===----------------------------------------------------------------------===//
-
-#include "HexagonCallingConvLower.h"
-#include "Hexagon.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-using namespace llvm;
-
-Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
-                                 const TargetMachine &tm,
-                                 SmallVectorImpl<CCValAssign> &locs,
-                                 LLVMContext &c)
-  : CallingConv(CC), IsVarArg(isVarArg), TM(tm), Locs(locs), Context(c) {
-  // No stack is used.
-  StackOffset = 0;
-
-  UsedRegs.resize(
-      (TM.getSubtargetImpl()->getRegisterInfo()->getNumRegs() + 31) / 32);
-}
-
-// HandleByVal - Allocate a stack slot large enough to pass an argument by
-// value. The size and alignment information of the argument is encoded in its
-// parameter attribute.
-void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT,
-                                EVT LocVT, CCValAssign::LocInfo LocInfo,
-                                int MinSize, int MinAlign,
-                                ISD::ArgFlagsTy ArgFlags) {
-  unsigned Align = ArgFlags.getByValAlign();
-  unsigned Size  = ArgFlags.getByValSize();
-  if (MinSize > (int)Size)
-    Size = MinSize;
-  if (MinAlign > (int)Align)
-    Align = MinAlign;
-  unsigned Offset = AllocateStack(Size, Align);
-
-  addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset,
-                             LocVT.getSimpleVT(), LocInfo));
-}
-
-/// MarkAllocated - Mark a register and all of its aliases as allocated.
-void Hexagon_CCState::MarkAllocated(unsigned Reg) {
-  const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo();
-  for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
-    UsedRegs[*AI/32] |= 1 << (*AI&31);
-}
-
-/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
-/// incorporating info about the formals into this state.
-void
-Hexagon_CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg>
-                                        &Ins,
-                                        Hexagon_CCAssignFn Fn,
-                                        unsigned SretValueInRegs) {
-  unsigned NumArgs = Ins.size();
-  unsigned i = 0;
-
-  // If the function returns a small struct in registers, skip
-  // over the first (dummy) argument.
-  if (SretValueInRegs != 0) {
-    ++i;
-  }
-
-
-  for (; i != NumArgs; ++i) {
-    EVT ArgVT = Ins[i].VT;
-    ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
-    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, 0, 0, false)) {
-      dbgs() << "Formal argument #" << i << " has unhandled type "
-             << ArgVT.getEVTString() << "\n";
-      abort();
-    }
-  }
-}
-
-/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
-/// incorporating info about the result values into this state.
-void
-Hexagon_CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               Hexagon_CCAssignFn Fn,
-                               unsigned SretValueInRegs) {
-
-  // For Hexagon, Return small structures in registers.
-  if (SretValueInRegs != 0) {
-    if (SretValueInRegs <= 32) {
-      unsigned Reg = Hexagon::R0;
-      addLoc(CCValAssign::getReg(0, MVT::i32, Reg, MVT::i32,
-                                 CCValAssign::Full));
-      return;
-    }
-    if (SretValueInRegs <= 64) {
-      unsigned Reg = Hexagon::D0;
-      addLoc(CCValAssign::getReg(0, MVT::i64, Reg, MVT::i64,
-                                 CCValAssign::Full));
-      return;
-    }
-  }
-
-
-  // Determine which register each value should be copied into.
-  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
-    EVT VT = Outs[i].VT;
-    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
-    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this, -1, -1, false)){
-      dbgs() << "Return operand #" << i << " has unhandled type "
-           << VT.getEVTString() << "\n";
-      abort();
-    }
-  }
-}
-
-
-/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
-/// about the passed values into this state.
-void
-Hexagon_CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg>
-                                     &Outs,
-                                     Hexagon_CCAssignFn Fn,
-                                     int NonVarArgsParams,
-                                     unsigned SretValueSize) {
-  unsigned NumOps = Outs.size();
-
-  unsigned i = 0;
-  // If the called function returns a small struct in registers, skip
-  // the first actual parameter. We do not want to pass a pointer to
-  // the stack location.
-  if (SretValueSize != 0) {
-    ++i;
-  }
-
-  for (; i != NumOps; ++i) {
-    EVT ArgVT = Outs[i].VT;
-    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
-    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this,
-           NonVarArgsParams, i+1, false)) {
-      dbgs() << "Call operand #" << i << " has unhandled type "
-           << ArgVT.getEVTString() << "\n";
-      abort();
-    }
-  }
-}
-
-/// AnalyzeCallOperands - Same as above except it takes vectors of types
-/// and argument flags.
-void
-Hexagon_CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
-                                     SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
-                                     Hexagon_CCAssignFn Fn) {
-  unsigned NumOps = ArgVTs.size();
-  for (unsigned i = 0; i != NumOps; ++i) {
-    EVT ArgVT = ArgVTs[i];
-    ISD::ArgFlagsTy ArgFlags = Flags[i];
-    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, -1, -1,
-           false)) {
-      dbgs() << "Call operand #" << i << " has unhandled type "
-           << ArgVT.getEVTString() << "\n";
-      abort();
-    }
-  }
-}
-
-/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
-/// incorporating info about the passed values into this state.
-void
-Hexagon_CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
-                                   Hexagon_CCAssignFn Fn,
-                                   unsigned SretValueInRegs) {
-
-  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
-    EVT VT = Ins[i].VT;
-    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-      if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this, -1, -1, false)) {
-        dbgs() << "Call result #" << i << " has unhandled type "
-               << VT.getEVTString() << "\n";
-      abort();
-    }
-  }
-}
-
-/// AnalyzeCallResult - Same as above except it's specialized for calls which
-/// produce a single value.
-void Hexagon_CCState::AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn) {
-  if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this, -1, -1,
-         false)) {
-    dbgs() << "Call result has unhandled type "
-         << VT.getEVTString() << "\n";
-    abort();
-  }
-}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.h b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.h
deleted file mode 100644
index 738ed1a..0000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.h
+++ /dev/null
@@ -1,187 +0,0 @@
-//===-- HexagonCallingConvLower.h - Calling Conventions ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Hexagon_CCState class, used for lowering
-// and implementing calling conventions. Adapted from the target independent
-// version but this handles calls to varargs functions
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONCALLINGCONVLOWER_H
-#define LLVM_LIB_TARGET_HEXAGON_HEXAGONCALLINGCONVLOWER_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-
-//
-// Need to handle varargs.
-//
-namespace llvm {
-  class TargetRegisterInfo;
-  class TargetMachine;
-  class Hexagon_CCState;
-  class SDNode;
-  struct EVT;
-
-/// Hexagon_CCAssignFn - This function assigns a location for Val, updating
-/// State to reflect the change.
-typedef bool Hexagon_CCAssignFn(unsigned ValNo, EVT ValVT,
-                              EVT LocVT, CCValAssign::LocInfo LocInfo,
-                              ISD::ArgFlagsTy ArgFlags, Hexagon_CCState &State,
-                              int NonVarArgsParams,
-                              int CurrentParam,
-                              bool ForceMem);
-
-
-/// CCState - This class holds information needed while lowering arguments and
-/// return values.  It captures which registers are already assigned and which
-/// stack slots are used.  It provides accessors to allocate these values.
-class Hexagon_CCState {
-  CallingConv::ID CallingConv;
-  bool IsVarArg;
-  const TargetMachine &TM;
-  SmallVectorImpl<CCValAssign> &Locs;
-  LLVMContext &Context;
-
-  unsigned StackOffset;
-  SmallVector<uint32_t, 16> UsedRegs;
-public:
-  Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM,
-                  SmallVectorImpl<CCValAssign> &locs, LLVMContext &c);
-
-  void addLoc(const CCValAssign &V) {
-    Locs.push_back(V);
-  }
-
-  LLVMContext &getContext() const { return Context; }
-  const TargetMachine &getTarget() const { return TM; }
-  unsigned getCallingConv() const { return CallingConv; }
-  bool isVarArg() const { return IsVarArg; }
-
-  unsigned getNextStackOffset() const { return StackOffset; }
-
-  /// isAllocated - Return true if the specified register (or an alias) is
-  /// allocated.
-  bool isAllocated(unsigned Reg) const {
-    return UsedRegs[Reg/32] & (1 << (Reg&31));
-  }
-
-  /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
-  /// incorporating info about the formals into this state.
-  void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
-                              Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
-
-  /// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
-  /// incorporating info about the result values into this state.
-  void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
-                     Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
-
-  /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
-  /// about the passed values into this state.
-  void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
-                           Hexagon_CCAssignFn Fn, int NonVarArgsParams,
-                           unsigned SretValueSize);
-
-  /// AnalyzeCallOperands - Same as above except it takes vectors of types
-  /// and argument flags.
-  void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
-                           SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
-                           Hexagon_CCAssignFn Fn);
-
-  /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
-  /// incorporating info about the passed values into this state.
-  void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
-                         Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
-
-  /// AnalyzeCallResult - Same as above except it's specialized for calls which
-  /// produce a single value.
-  void AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn);
-
-  /// getFirstUnallocated - Return the first unallocated register in the set, or
-  /// NumRegs if they are all allocated.
-  unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const {
-    for (unsigned i = 0; i != NumRegs; ++i)
-      if (!isAllocated(Regs[i]))
-        return i;
-    return NumRegs;
-  }
-
-  /// AllocateReg - Attempt to allocate one register.  If it is not available,
-  /// return zero.  Otherwise, return the register, marking it and any aliases
-  /// as allocated.
-  unsigned AllocateReg(unsigned Reg) {
-    if (isAllocated(Reg)) return 0;
-    MarkAllocated(Reg);
-    return Reg;
-  }
-
-  /// Version of AllocateReg with extra register to be shadowed.
-  unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) {
-    if (isAllocated(Reg)) return 0;
-    MarkAllocated(Reg);
-    MarkAllocated(ShadowReg);
-    return Reg;
-  }
-
-  /// AllocateReg - Attempt to allocate one of the specified registers.  If none
-  /// are available, return zero.  Otherwise, return the first one available,
-  /// marking it and any aliases as allocated.
-  unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) {
-    unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
-    if (FirstUnalloc == NumRegs)
-      return 0;    // Didn't find the reg.
-
-    // Mark the register and any aliases as allocated.
-    unsigned Reg = Regs[FirstUnalloc];
-    MarkAllocated(Reg);
-    return Reg;
-  }
-
-  /// Version of AllocateReg with list of registers to be shadowed.
-  unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs,
-                       unsigned NumRegs) {
-    unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
-    if (FirstUnalloc == NumRegs)
-      return 0;    // Didn't find the reg.
-
-    // Mark the register and any aliases as allocated.
-    unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc];
-    MarkAllocated(Reg);
-    MarkAllocated(ShadowReg);
-    return Reg;
-  }
-
-  /// AllocateStack - Allocate a chunk of stack space with the specified size
-  /// and alignment.
-  unsigned AllocateStack(unsigned Size, unsigned Align) {
-    assert(Align && ((Align-1) & Align) == 0); // Align is power of 2.
-    StackOffset = ((StackOffset + Align-1) & ~(Align-1));
-    unsigned Result = StackOffset;
-    StackOffset += Size;
-    return Result;
-  }
-
-  // HandleByVal - Allocate a stack slot large enough to pass an argument by
-  // value. The size and alignment information of the argument is encoded in its
-  // parameter attribute.
-  void HandleByVal(unsigned ValNo, EVT ValVT,
-                   EVT LocVT, CCValAssign::LocInfo LocInfo,
-                   int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
-
-private:
-  /// MarkAllocated - Mark a register and all of its aliases as allocated.
-  void MarkAllocated(unsigned Reg);
-};
-
-
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 1883ad8..1d6455c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -116,38 +116,34 @@ static bool isCombinableInstType(MachineInstr *MI,
   switch(MI->getOpcode()) {
   case Hexagon::A2_tfr: {
     // A COPY instruction can be combined if its arguments are IntRegs (32bit).
-    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isReg());
+    const MachineOperand &Op0 = MI->getOperand(0);
+    const MachineOperand &Op1 = MI->getOperand(1);
+    assert(Op0.isReg() && Op1.isReg());
 
-    unsigned DestReg = MI->getOperand(0).getReg();
-    unsigned SrcReg = MI->getOperand(1).getReg();
+    unsigned DestReg = Op0.getReg();
+    unsigned SrcReg = Op1.getReg();
     return Hexagon::IntRegsRegClass.contains(DestReg) &&
-      Hexagon::IntRegsRegClass.contains(SrcReg);
+           Hexagon::IntRegsRegClass.contains(SrcReg);
   }
 
   case Hexagon::A2_tfrsi: {
     // A transfer-immediate can be combined if its argument is a signed 8bit
     // value.
-    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
-    unsigned DestReg = MI->getOperand(0).getReg();
-
-    // Only combine constant extended TFRI if we are in aggressive mode.
-    return Hexagon::IntRegsRegClass.contains(DestReg) &&
-      (ShouldCombineAggressively || isInt<8>(MI->getOperand(1).getImm()));
-  }
-
-  case Hexagon::TFRI_V4: {
-    if (!ShouldCombineAggressively)
-      return false;
-    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isGlobal());
+    const MachineOperand &Op0 = MI->getOperand(0);
+    const MachineOperand &Op1 = MI->getOperand(1);
+    assert(Op0.isReg());
 
+    unsigned DestReg = Op0.getReg();
     // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a
     // workaround for an ABI bug that prevents GOT relocations on combine
     // instructions
-    if (MI->getOperand(1).getTargetFlags() != HexagonII::MO_NO_FLAG)
+    if (!Op1.isImm() && Op1.getTargetFlags() != HexagonII::MO_NO_FLAG)
       return false;
 
-    unsigned DestReg = MI->getOperand(0).getReg();
-    return Hexagon::IntRegsRegClass.contains(DestReg);
+    // Only combine constant extended A2_tfrsi if we are in aggressive mode.
+    bool NotExt = Op1.isImm() && isInt<8>(Op1.getImm());
+    return Hexagon::IntRegsRegClass.contains(DestReg) &&
+           (ShouldCombineAggressively || NotExt);
   }
 
   default:
@@ -157,13 +153,14 @@ static bool isCombinableInstType(MachineInstr *MI,
   return false;
 }
 
-static bool isGreaterThan8BitTFRI(MachineInstr *I) {
-  return I->getOpcode() == Hexagon::A2_tfrsi &&
-    !isInt<8>(I->getOperand(1).getImm());
-}
-static bool isGreaterThan6BitTFRI(MachineInstr *I) {
-  return I->getOpcode() == Hexagon::A2_tfrsi &&
-    !isUInt<6>(I->getOperand(1).getImm());
+template <unsigned N>
+static bool isGreaterThanNBitTFRI(const MachineInstr *I) {
+  if (I->getOpcode() == Hexagon::TFRI64_V4 ||
+      I->getOpcode() == Hexagon::A2_tfrsi) {
+    const MachineOperand &Op = I->getOperand(1);
+    return !Op.isImm() || !isInt<N>(Op.getImm());
+  }
+  return false;
 }
 
 /// areCombinableOperations - Returns true if the two instruction can be merge
@@ -171,31 +168,17 @@ static bool isGreaterThan6BitTFRI(MachineInstr *I) {
 static bool areCombinableOperations(const TargetRegisterInfo *TRI,
                                     MachineInstr *HighRegInst,
                                     MachineInstr *LowRegInst) {
-  assert((HighRegInst->getOpcode() == Hexagon::A2_tfr ||
-          HighRegInst->getOpcode() == Hexagon::A2_tfrsi ||
-          HighRegInst->getOpcode() == Hexagon::TFRI_V4) &&
-         (LowRegInst->getOpcode() == Hexagon::A2_tfr ||
-          LowRegInst->getOpcode() == Hexagon::A2_tfrsi ||
-          LowRegInst->getOpcode() == Hexagon::TFRI_V4) &&
+  unsigned HiOpc = HighRegInst->getOpcode();
+  unsigned LoOpc = LowRegInst->getOpcode();
+  (void)HiOpc; // Fix compiler warning
+  (void)LoOpc; // Fix compiler warning
+  assert((HiOpc == Hexagon::A2_tfr || HiOpc == Hexagon::A2_tfrsi) &&
+         (LoOpc == Hexagon::A2_tfr || LoOpc == Hexagon::A2_tfrsi) &&
          "Assume individual instructions are of a combinable type");
 
-  const HexagonRegisterInfo *QRI =
-    static_cast<const HexagonRegisterInfo *>(TRI);
-
-  // V4 added some combine variations (mixed immediate and register source
-  // operands), if we are on < V4 we can only combine 2 register-to-register
-  // moves and 2 immediate-to-register moves. We also don't have
-  // constant-extenders.
-  if (!QRI->Subtarget.hasV4TOps())
-    return HighRegInst->getOpcode() == LowRegInst->getOpcode() &&
-      !isGreaterThan8BitTFRI(HighRegInst) &&
-      !isGreaterThan6BitTFRI(LowRegInst);
-
   // There is no combine of two constant extended values.
-  if ((HighRegInst->getOpcode() == Hexagon::TFRI_V4 ||
-       isGreaterThan8BitTFRI(HighRegInst)) &&
-      (LowRegInst->getOpcode() == Hexagon::TFRI_V4 ||
-       isGreaterThan6BitTFRI(LowRegInst)))
+  if (isGreaterThanNBitTFRI<8>(HighRegInst) &&
+      isGreaterThanNBitTFRI<6>(LowRegInst))
     return false;
 
   return true;
@@ -222,10 +205,14 @@ static bool isUnsafeToMoveAcross(MachineInstr *I, unsigned UseReg,
                                   unsigned DestReg,
                                   const TargetRegisterInfo *TRI) {
   return (UseReg && (I->modifiesRegister(UseReg, TRI))) ||
-          I->modifiesRegister(DestReg, TRI) ||
-          I->readsRegister(DestReg, TRI) ||
-          I->hasUnmodeledSideEffects() ||
-          I->isInlineAsm() || I->isDebugValue();
+         I->modifiesRegister(DestReg, TRI) ||
+         I->readsRegister(DestReg, TRI) ||
+         I->hasUnmodeledSideEffects() ||
+         I->isInlineAsm() || I->isDebugValue();
+}
+
+static unsigned UseReg(const MachineOperand& MO) {
+  return MO.isReg() ? MO.getReg() : 0;
 }
 
 /// isSafeToMoveTogether - Returns true if it is safe to move I1 next to I2 such
@@ -235,9 +222,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1,
                                                 unsigned I1DestReg,
                                                 unsigned I2DestReg,
                                                 bool &DoInsertAtI1) {
-
-  bool IsImmUseReg = I2->getOperand(1).isImm() || I2->getOperand(1).isGlobal();
-  unsigned I2UseReg = IsImmUseReg ? 0 : I2->getOperand(1).getReg();
+  unsigned I2UseReg = UseReg(I2->getOperand(1));
 
   // It is not safe to move I1 and I2 into one combine if I2 has a true
   // dependence on I1.
@@ -301,8 +286,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1,
     // At O3 we got better results (dhrystone) by being more conservative here.
     if (!ShouldCombineAggressively)
       End = std::next(MachineBasicBlock::iterator(I2));
-    IsImmUseReg = I1->getOperand(1).isImm() || I1->getOperand(1).isGlobal();
-    unsigned I1UseReg = IsImmUseReg ? 0 : I1->getOperand(1).getReg();
+    unsigned I1UseReg = UseReg(I1->getOperand(1));
     // Track killed operands. If we move across an instruction that kills our
     // operand, we need to update the kill information on the moved I1. It kills
     // the operand now.
@@ -418,7 +402,7 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
 
   // Get target info.
   TRI = MF.getSubtarget().getRegisterInfo();
-  TII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
 
   // Combine aggressively (for code size)
   ShouldCombineAggressively =
@@ -561,7 +545,7 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
   DebugLoc DL = InsertPt->getDebugLoc();
   MachineBasicBlock *BB = InsertPt->getParent();
 
-  // Handle  globals.
+  // Handle globals.
   if (HiOperand.isGlobal()) {
     BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
       .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(),
@@ -577,17 +561,64 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
     return;
   }
 
-  // Handle constant extended immediates.
-  if (!isInt<8>(HiOperand.getImm())) {
-    assert(isInt<8>(LoOperand.getImm()));
+  // Handle block addresses.
+  if (HiOperand.isBlockAddress()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+      .addBlockAddress(HiOperand.getBlockAddress(), HiOperand.getOffset(),
+                       HiOperand.getTargetFlags())
+      .addImm(LoOperand.getImm());
+    return;
+  }
+  if (LoOperand.isBlockAddress()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+      .addImm(HiOperand.getImm())
+      .addBlockAddress(LoOperand.getBlockAddress(), LoOperand.getOffset(),
+                       LoOperand.getTargetFlags());
+    return;
+  }
+
+  // Handle jump tables.
+  if (HiOperand.isJTI()) {
     BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+      .addJumpTableIndex(HiOperand.getIndex(), HiOperand.getTargetFlags())
+      .addImm(LoOperand.getImm());
+    return;
+  }
+  if (LoOperand.isJTI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
       .addImm(HiOperand.getImm())
+      .addJumpTableIndex(LoOperand.getIndex(), LoOperand.getTargetFlags());
+    return;
+  }
+
+  // Handle constant pools.
+  if (HiOperand.isCPI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+      .addConstantPoolIndex(HiOperand.getIndex(), HiOperand.getOffset(),
+                            HiOperand.getTargetFlags())
       .addImm(LoOperand.getImm());
     return;
   }
+  if (LoOperand.isCPI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+      .addImm(HiOperand.getImm())
+      .addConstantPoolIndex(LoOperand.getIndex(), LoOperand.getOffset(),
+                            LoOperand.getTargetFlags());
+    return;
+  }
+
+  // First preference should be given to Hexagon::A2_combineii instruction
+  // as it can include U6 (in Hexagon::A4_combineii) as well.
+  // In this instruction, HiOperand is const extended, if required.
+  if (isInt<8>(LoOperand.getImm())) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+      .addImm(HiOperand.getImm())
+      .addImm(LoOperand.getImm());
+      return;
+  }
 
-  if (!isUInt<6>(LoOperand.getImm())) {
-    assert(isInt<8>(HiOperand.getImm()));
+  // In this instruction, LoOperand is const extended, if required.
+  if (isInt<8>(HiOperand.getImm())) {
     BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
       .addImm(HiOperand.getImm())
       .addImm(LoOperand.getImm());
@@ -611,7 +642,7 @@ void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt,
   DebugLoc DL = InsertPt->getDebugLoc();
   MachineBasicBlock *BB = InsertPt->getParent();
 
-  // Handle global.
+  // Handle globals.
   if (HiOperand.isGlobal()) {
     BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
       .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(),
@@ -619,6 +650,29 @@ void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt,
       .addReg(LoReg, LoRegKillFlag);
     return;
   }
+  // Handle block addresses.
+  if (HiOperand.isBlockAddress()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+      .addBlockAddress(HiOperand.getBlockAddress(), HiOperand.getOffset(),
+                       HiOperand.getTargetFlags())
+      .addReg(LoReg, LoRegKillFlag);
+    return;
+  }
+  // Handle jump tables.
+  if (HiOperand.isJTI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+      .addJumpTableIndex(HiOperand.getIndex(), HiOperand.getTargetFlags())
+      .addReg(LoReg, LoRegKillFlag);
+    return;
+  }
+  // Handle constant pools.
+  if (HiOperand.isCPI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+      .addConstantPoolIndex(HiOperand.getIndex(), HiOperand.getOffset(),
+                            HiOperand.getTargetFlags())
+      .addReg(LoReg, LoRegKillFlag);
+    return;
+  }
   // Insert new combine instruction.
   //  DoubleRegDest = combine #HiImm, LoReg
   BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
@@ -644,6 +698,29 @@ void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt,
                         LoOperand.getTargetFlags());
     return;
   }
+  // Handle block addresses.
+  if (LoOperand.isBlockAddress()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+      .addReg(HiReg, HiRegKillFlag)
+      .addBlockAddress(LoOperand.getBlockAddress(), LoOperand.getOffset(),
+                       LoOperand.getTargetFlags());
+    return;
+  }
+  // Handle jump tables.
+  if (LoOperand.isJTI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+      .addReg(HiOperand.getReg(), HiRegKillFlag)
+      .addJumpTableIndex(LoOperand.getIndex(), LoOperand.getTargetFlags());
+    return;
+  }
+  // Handle constant pools.
+  if (LoOperand.isCPI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+      .addReg(HiOperand.getReg(), HiRegKillFlag)
+      .addConstantPoolIndex(LoOperand.getIndex(), LoOperand.getOffset(),
+                            LoOperand.getTargetFlags());
+    return;
+  }
 
   // Insert new combine instruction.
   //  DoubleRegDest = combine HiReg, #LoImm
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
new file mode 100644
index 0000000..37ed173
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -0,0 +1,1348 @@
+// Replace mux instructions with the corresponding legal instructions.
+// It is meant to work post-SSA, but still on virtual registers. It was
+// originally placed between register coalescing and machine instruction
+// scheduler.
+// In this place in the optimization sequence, live interval analysis had
+// been performed, and the live intervals should be preserved. A large part
+// of the code deals with preserving the liveness information.
+//
+// Liveness tracking aside, the main functionality of this pass is divided
+// into two steps. The first step is to replace an instruction
+//   vreg0 = C2_mux vreg0, vreg1, vreg2
+// with a pair of conditional transfers
+//   vreg0 = A2_tfrt vreg0, vreg1
+//   vreg0 = A2_tfrf vreg0, vreg2
+// It is the intention that the execution of this pass could be terminated
+// after this step, and the code generated would be functionally correct.
+//
+// If the uses of the source values vreg1 and vreg2 are kills, and their
+// definitions are predicable, then in the second step, the conditional
+// transfers will then be rewritten as predicated instructions. E.g.
+//   vreg0 = A2_or vreg1, vreg2
+//   vreg3 = A2_tfrt vreg99, vreg0<kill>
+// will be rewritten as
+//   vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// This replacement has two variants: "up" and "down". Consider this case:
+//   vreg0 = A2_or vreg1, vreg2
+//   ... [intervening instructions] ...
+//   vreg3 = A2_tfrt vreg99, vreg0<kill>
+// variant "up":
+//   vreg3 = A2_port vreg99, vreg1, vreg2
+//   ... [intervening instructions, vreg0->vreg3] ...
+//   [deleted]
+// variant "down":
+//   [deleted]
+//   ... [intervening instructions] ...
+//   vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// Both, one or none of these variants may be valid, and checks are made
+// to rule out inapplicable variants.
+//
+// As an additional optimization, before either of the two steps above is
+// executed, the pass attempts to coalesce the target register with one of
+// the source registers, e.g. given an instruction
+//   vreg3 = C2_mux vreg0, vreg1, vreg2
+// vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds,
+// the instruction would then be (for example)
+//   vreg3 = C2_mux vreg0, vreg3, vreg2
+// and, under certain circumstances, this could result in only one predicated
+// instruction:
+//   vreg3 = A2_tfrf vreg0, vreg2
+//
+
+#define DEBUG_TYPE "expand-condsets"
+#include "HexagonTargetMachine.h"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit",
+  cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions"));
+static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit",
+  cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings"));
+
+namespace llvm {
+  void initializeHexagonExpandCondsetsPass(PassRegistry&);
+  FunctionPass *createHexagonExpandCondsets();
+}
+
+namespace {
+  class HexagonExpandCondsets : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonExpandCondsets() :
+        MachineFunctionPass(ID), HII(0), TRI(0), MRI(0),
+        LIS(0), CoaLimitActive(false),
+        TfrLimitActive(false), CoaCounter(0), TfrCounter(0) {
+      if (OptCoaLimit.getPosition())
+        CoaLimitActive = true, CoaLimit = OptCoaLimit;
+      if (OptTfrLimit.getPosition())
+        TfrLimitActive = true, TfrLimit = OptTfrLimit;
+      initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual const char *getPassName() const {
+      return "Hexagon Expand Condsets";
+    }
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LiveIntervals>();
+      AU.addPreserved<LiveIntervals>();
+      AU.addPreserved<SlotIndexes>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    const HexagonInstrInfo *HII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+    LiveIntervals *LIS;
+
+    bool CoaLimitActive, TfrLimitActive;
+    unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter;
+
+    struct RegisterRef {
+      RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()),
+          Sub(Op.getSubReg()) {}
+      RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {}
+      bool operator== (RegisterRef RR) const {
+        return Reg == RR.Reg && Sub == RR.Sub;
+      }
+      bool operator!= (RegisterRef RR) const { return !operator==(RR); }
+      unsigned Reg, Sub;
+    };
+
+    typedef DenseMap<unsigned,unsigned> ReferenceMap;
+    enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) };
+    enum { Exec_Then = 0x10, Exec_Else = 0x20 };
+    unsigned getMaskForSub(unsigned Sub);
+    bool isCondset(const MachineInstr *MI);
+
+    void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec);
+    bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec);
+
+    LiveInterval::iterator nextSegment(LiveInterval &LI, SlotIndex S);
+    LiveInterval::iterator prevSegment(LiveInterval &LI, SlotIndex S);
+    void makeDefined(unsigned Reg, SlotIndex S, bool SetDef);
+    void makeUndead(unsigned Reg, SlotIndex S);
+    void shrinkToUses(unsigned Reg, LiveInterval &LI);
+    void updateKillFlags(unsigned Reg, LiveInterval &LI);
+    void terminateSegment(LiveInterval::iterator LT, SlotIndex S,
+        LiveInterval &LI);
+    void addInstrToLiveness(MachineInstr *MI);
+    void removeInstrFromLiveness(MachineInstr *MI);
+
+    unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond);
+    MachineInstr *genTfrFor(MachineOperand &SrcOp, unsigned DstR,
+        unsigned DstSR, const MachineOperand &PredOp, bool Cond);
+    bool split(MachineInstr *MI);
+    bool splitInBlock(MachineBasicBlock &B);
+
+    bool isPredicable(MachineInstr *MI);
+    MachineInstr *getReachingDefForPred(RegisterRef RD,
+        MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond);
+    bool canMoveOver(MachineInstr *MI, ReferenceMap &Defs, ReferenceMap &Uses);
+    bool canMoveMemTo(MachineInstr *MI, MachineInstr *ToI, bool IsDown);
+    void predicateAt(RegisterRef RD, MachineInstr *MI,
+        MachineBasicBlock::iterator Where, unsigned PredR, bool Cond);
+    void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR,
+        bool Cond, MachineBasicBlock::iterator First,
+        MachineBasicBlock::iterator Last);
+    bool predicate(MachineInstr *TfrI, bool Cond);
+    bool predicateInBlock(MachineBasicBlock &B);
+
+    void postprocessUndefImplicitUses(MachineBasicBlock &B);
+    void removeImplicitUses(MachineInstr *MI);
+    void removeImplicitUses(MachineBasicBlock &B);
+
+    bool isIntReg(RegisterRef RR, unsigned &BW);
+    bool isIntraBlocks(LiveInterval &LI);
+    bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
+    bool coalesceSegments(MachineFunction &MF);
+  };
+}
+
+char HexagonExpandCondsets::ID = 0;
+
+
+unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) {
+  switch (Sub) {
+    case Hexagon::subreg_loreg:
+      return Sub_Low;
+    case Hexagon::subreg_hireg:
+      return Sub_High;
+    case Hexagon::NoSubRegister:
+      return Sub_None;
+  }
+  llvm_unreachable("Invalid subregister");
+}
+
+
+bool HexagonExpandCondsets::isCondset(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::C2_mux:
+    case Hexagon::C2_muxii:
+    case Hexagon::C2_muxir:
+    case Hexagon::C2_muxri:
+    case Hexagon::MUX64_rr:
+        return true;
+      break;
+  }
+  return false;
+}
+
+
+void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map,
+      unsigned Exec) {
+  unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+  ReferenceMap::iterator F = Map.find(RR.Reg);
+  if (F == Map.end())
+    Map.insert(std::make_pair(RR.Reg, Mask));
+  else
+    F->second |= Mask;
+}
+
+
+bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map,
+      unsigned Exec) {
+  ReferenceMap::iterator F = Map.find(RR.Reg);
+  if (F == Map.end())
+    return false;
+  unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+  if (Mask & F->second)
+    return true;
+  return false;
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::nextSegment(LiveInterval &LI,
+      SlotIndex S) {
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    if (I->start >= S)
+      return I;
+  }
+  return LI.end();
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::prevSegment(LiveInterval &LI,
+      SlotIndex S) {
+  LiveInterval::iterator P = LI.end();
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    if (I->end > S)
+      return P;
+    P = I;
+  }
+  return P;
+}
+
+
+/// Find the implicit use of register Reg in slot index S, and make sure
+/// that the "defined" flag is set to SetDef. While the mux expansion is
+/// going on, predicated instructions will have implicit uses of the
+/// registers that are being defined. This is to keep any preceding
+/// definitions live. If there is no preceding definition, the implicit
+/// use will be marked as "undef", otherwise it will be "defined". This
+/// function is used to update the flag.
+void HexagonExpandCondsets::makeDefined(unsigned Reg, SlotIndex S,
+      bool SetDef) {
+  if (!S.isRegister())
+    return;
+  MachineInstr *MI = LIS->getInstructionFromIndex(S);
+  assert(MI && "Expecting instruction");
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+      continue;
+    bool IsDef = !Op.isUndef();
+    if (Op.isImplicit() && IsDef != SetDef)
+      Op.setIsUndef(!SetDef);
+  }
+}
+
+
+void HexagonExpandCondsets::makeUndead(unsigned Reg, SlotIndex S) {
+  // If S is a block boundary, then there can still be a dead def reaching
+  // this point. Instead of traversing the CFG, queue start points of all
+  // live segments that begin with a register, and end at a block boundary.
+  // This may "resurrect" some truly dead definitions, but doing so is
+  // harmless.
+  SmallVector<MachineInstr*,8> Defs;
+  if (S.isBlock()) {
+    LiveInterval &LI = LIS->getInterval(Reg);
+    for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+      if (!I->start.isRegister() || !I->end.isBlock())
+        continue;
+      MachineInstr *MI = LIS->getInstructionFromIndex(I->start);
+      Defs.push_back(MI);
+    }
+  } else if (S.isRegister()) {
+    MachineInstr *MI = LIS->getInstructionFromIndex(S);
+    Defs.push_back(MI);
+  }
+
+  for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+    MachineInstr *MI = Defs[i];
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+        continue;
+      Op.setIsDead(false);
+    }
+  }
+}
+
+
+/// Shrink the segments in the live interval for a given register to the last
+/// use before each subsequent def. Unlike LiveIntervals::shrinkToUses, this
+/// function will not mark any definitions of Reg as dead. The reason for this
+/// is that this function is used while a MUX instruction is being expanded,
+/// or while a conditional copy is undergoing predication. During these
+/// processes, there may be defs present in the instruction sequence that have
+/// not yet been removed, or there may be missing uses that have not yet been
+/// added. We want to utilize LiveIntervals::shrinkToUses as much as possible,
+/// but since it does not extend any intervals that are too short, we need to
+/// pre-emptively extend them here in anticipation of further changes.
+void HexagonExpandCondsets::shrinkToUses(unsigned Reg, LiveInterval &LI) {
+  SmallVector<MachineInstr*,4> Deads;
+  LIS->shrinkToUses(&LI, &Deads);
+  // Need to undo the deadification made by "shrinkToUses". It's easier to
+  // do it here, since we have a list of all instructions that were just
+  // marked as dead.
+  for (unsigned i = 0, n = Deads.size(); i < n; ++i) {
+    MachineInstr *MI = Deads[i];
+    // Clear the "dead" flag.
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+        continue;
+      Op.setIsDead(false);
+    }
+    // Extend the live segment to the beginning of the next one.
+    LiveInterval::iterator End = LI.end();
+    SlotIndex S = LIS->getInstructionIndex(MI).getRegSlot();
+    LiveInterval::iterator T = LI.FindSegmentContaining(S);
+    assert(T != End);
+    LiveInterval::iterator N = std::next(T);
+    if (N != End)
+      T->end = N->start;
+    else
+      T->end = LIS->getMBBEndIdx(MI->getParent());
+  }
+  updateKillFlags(Reg, LI);
+}
+
+
+/// Given an updated live interval LI for register Reg, update the kill flags
+/// in instructions using Reg to reflect the liveness changes.
+void HexagonExpandCondsets::updateKillFlags(unsigned Reg, LiveInterval &LI) {
+  MRI->clearKillFlags(Reg);
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    SlotIndex EX = I->end;
+    if (!EX.isRegister())
+      continue;
+    MachineInstr *MI = LIS->getInstructionFromIndex(EX);
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+        continue;
+      // Only set the kill flag on the first encountered use of Reg in this
+      // instruction.
+      Op.setIsKill(true);
+      break;
+    }
+  }
+}
+
+
+/// When adding a new instruction to liveness, the newly added definition
+/// will start a new live segment. This may happen at a position that falls
+/// within an existing live segment. In such case that live segment needs to
+/// be truncated to make room for the new segment. Ultimately, the truncation
+/// will occur at the last use, but for now the segment can be terminated
+/// right at the place where the new segment will start. The segments will be
+/// shrunk-to-uses later.
+void HexagonExpandCondsets::terminateSegment(LiveInterval::iterator LT,
+      SlotIndex S, LiveInterval &LI) {
+  // Terminate the live segment pointed to by LT within a live interval LI.
+  if (LT == LI.end())
+    return;
+
+  VNInfo *OldVN = LT->valno;
+  SlotIndex EX = LT->end;
+  LT->end = S;
+  // If LT does not end at a block boundary, the termination is done.
+  if (!EX.isBlock())
+    return;
+
+  // If LT ended at a block boundary, it's possible that its value number
+  // is picked up at the beginning other blocks. Create a new value number
+  // and change such blocks to use it instead.
+  VNInfo *NewVN = 0;
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    if (!I->start.isBlock() || I->valno != OldVN)
+      continue;
+    // Generate on-demand a new value number that is defined by the
+    // block beginning (i.e. -phi).
+    if (!NewVN)
+      NewVN = LI.getNextValue(I->start, LIS->getVNInfoAllocator());
+    I->valno = NewVN;
+  }
+}
+
+
+/// Add the specified instruction to live intervals. This function is used
+/// to update the live intervals while the program code is being changed.
+/// Neither the expansion of a MUX, nor the predication are atomic, and this
+/// function is used to update the live intervals while these transformations
+/// are being done.
+void HexagonExpandCondsets::addInstrToLiveness(MachineInstr *MI) {
+  SlotIndex MX = LIS->isNotInMIMap(MI) ? LIS->InsertMachineInstrInMaps(MI)
+                                       : LIS->getInstructionIndex(MI);
+  DEBUG(dbgs() << "adding liveness info for instr\n  " << MX << "  " << *MI);
+
+  MX = MX.getRegSlot();
+  bool Predicated = HII->isPredicated(MI);
+  MachineBasicBlock *MB = MI->getParent();
+
+  // Strip all implicit uses from predicated instructions. They will be
+  // added again, according to the updated information.
+  if (Predicated)
+    removeImplicitUses(MI);
+
+  // For each def in MI we need to insert a new live segment starting at MX
+  // into the interval. If there already exists a live segment in the interval
+  // that contains MX, we need to terminate it at MX.
+  SmallVector<RegisterRef,2> Defs;
+  for (auto &Op : MI->operands())
+    if (Op.isReg() && Op.isDef())
+      Defs.push_back(RegisterRef(Op));
+
+  for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+    unsigned DefR = Defs[i].Reg;
+    LiveInterval &LID = LIS->getInterval(DefR);
+    DEBUG(dbgs() << "adding def " << PrintReg(DefR, TRI)
+                 << " with interval\n  " << LID << "\n");
+    // If MX falls inside of an existing live segment, terminate it.
+    LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+    if (LT != LID.end())
+      terminateSegment(LT, MX, LID);
+    DEBUG(dbgs() << "after terminating segment\n  " << LID << "\n");
+
+    // Create a new segment starting from MX.
+    LiveInterval::iterator P = prevSegment(LID, MX), N = nextSegment(LID, MX);
+    SlotIndex EX;
+    VNInfo *VN = LID.getNextValue(MX, LIS->getVNInfoAllocator());
+    if (N == LID.end()) {
+      // There is no live segment after MX. End this segment at the end of
+      // the block.
+      EX = LIS->getMBBEndIdx(MB);
+    } else {
+      // If the next segment starts at the block boundary, end the new segment
+      // at the boundary of the preceding block (i.e. the previous index).
+      // Otherwise, end the segment at the beginning of the next segment. In
+      // either case it will be "shrunk-to-uses" later.
+      EX = N->start.isBlock() ? N->start.getPrevIndex() : N->start;
+    }
+    if (Predicated) {
+      // Predicated instruction will have an implicit use of the defined
+      // register. This is necessary so that this definition will not make
+      // any previous definitions dead. If there are no previous live
+      // segments, still add the implicit use, but make it "undef".
+      // Because of the implicit use, the preceding definition is not
+      // dead. Mark is as such (if necessary).
+      MachineOperand ImpUse = MachineOperand::CreateReg(DefR, false, true);
+      ImpUse.setSubReg(Defs[i].Sub);
+      bool Undef = false;
+      if (P == LID.end())
+        Undef = true;
+      else {
+        // If the previous segment extends to the end of the previous block,
+        // the end index may actually be the beginning of this block. If
+        // the previous segment ends at a block boundary, move it back by one,
+        // to get the proper block for it.
+        SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+        MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+        if (PB != MB && !LIS->isLiveInToMBB(LID, MB))
+          Undef = true;
+      }
+      if (!Undef) {
+        makeUndead(DefR, P->valno->def);
+        // We are adding a live use, so extend the previous segment to
+        // include it.
+        P->end = MX;
+      } else {
+        ImpUse.setIsUndef(true);
+      }
+
+      if (!MI->readsRegister(DefR))
+        MI->addOperand(ImpUse);
+      if (N != LID.end())
+        makeDefined(DefR, N->start, true);
+    }
+    LiveRange::Segment NR = LiveRange::Segment(MX, EX, VN);
+    LID.addSegment(NR);
+    DEBUG(dbgs() << "added a new segment " << NR << "\n  " << LID << "\n");
+    shrinkToUses(DefR, LID);
+    DEBUG(dbgs() << "updated imp-uses: " << *MI);
+    LID.verify();
+  }
+
+  // For each use in MI:
+  // - If there is no live segment that contains MX for the used register,
+  //   extend the previous one. Ignore implicit uses.
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse() || Op.isImplicit() || Op.isUndef())
+      continue;
+    unsigned UseR = Op.getReg();
+    LiveInterval &LIU = LIS->getInterval(UseR);
+    // Find the last segment P that starts before MX.
+    LiveInterval::iterator P = LIU.FindSegmentContaining(MX);
+    if (P == LIU.end())
+      P = prevSegment(LIU, MX);
+
+    assert(P != LIU.end() && "MI uses undefined register?");
+    SlotIndex EX = P->end;
+    // If P contains MX, there is not much to do.
+    if (EX > MX) {
+      Op.setIsKill(false);
+      continue;
+    }
+    // Otherwise, extend P to "next(MX)".
+    P->end = MX.getNextIndex();
+    Op.setIsKill(true);
+    // Get the old "kill" instruction, and remove the kill flag.
+    if (MachineInstr *KI = LIS->getInstructionFromIndex(MX))
+      KI->clearRegisterKills(UseR, nullptr);
+    shrinkToUses(UseR, LIU);
+    LIU.verify();
+  }
+}
+
+
+/// Update the live interval information to reflect the removal of the given
+/// instruction from the program. As with "addInstrToLiveness", this function
+/// is called while the program code is being changed.
+void HexagonExpandCondsets::removeInstrFromLiveness(MachineInstr *MI) {
+  SlotIndex MX = LIS->getInstructionIndex(MI).getRegSlot();
+  DEBUG(dbgs() << "removing instr\n  " << MX << "  " << *MI);
+
+  // For each def in MI:
+  // If MI starts a live segment, merge this segment with the previous segment.
+  //
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isDef())
+      continue;
+    unsigned DefR = Op.getReg();
+    LiveInterval &LID = LIS->getInterval(DefR);
+    LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+    assert(LT != LID.end() && "Expecting live segments");
+    DEBUG(dbgs() << "removing def at " << MX << " of " << PrintReg(DefR, TRI)
+                 << " with interval\n  " << LID << "\n");
+    if (LT->start != MX)
+      continue;
+
+    VNInfo *MVN = LT->valno;
+    if (LT != LID.begin()) {
+      // If the current live segment is not the first, the task is easy. If
+      // the previous segment continues into the current block, extend it to
+      // the end of the current one, and merge the value numbers.
+      // Otherwise, remove the current segment, and make the end of it "undef".
+      LiveInterval::iterator P = std::prev(LT);
+      SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+      MachineBasicBlock *MB = MI->getParent();
+      MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+      if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) {
+        makeDefined(DefR, LT->end, false);
+        LID.removeSegment(*LT);
+      } else {
+        // Make the segments adjacent, so that merge-vn can also merge the
+        // segments.
+        P->end = LT->start;
+        makeUndead(DefR, P->valno->def);
+        LID.MergeValueNumberInto(MVN, P->valno);
+      }
+    } else {
+      LiveInterval::iterator N = std::next(LT);
+      LiveInterval::iterator RmB = LT, RmE = N;
+      while (N != LID.end()) {
+        // Iterate until the first register-based definition is found
+        // (i.e. skip all block-boundary entries).
+        LiveInterval::iterator Next = std::next(N);
+        if (N->start.isRegister()) {
+          makeDefined(DefR, N->start, false);
+          break;
+        }
+        if (N->end.isRegister()) {
+          makeDefined(DefR, N->end, false);
+          RmE = Next;
+          break;
+        }
+        RmE = Next;
+        N = Next;
+      }
+      // Erase the segments in one shot to avoid invalidating iterators.
+      LID.segments.erase(RmB, RmE);
+    }
+
+    bool VNUsed = false;
+    for (LiveInterval::iterator I = LID.begin(), E = LID.end(); I != E; ++I) {
+      if (I->valno != MVN)
+        continue;
+      VNUsed = true;
+      break;
+    }
+    if (!VNUsed)
+      MVN->markUnused();
+
+    DEBUG(dbgs() << "new interval: ");
+    if (!LID.empty()) {
+      DEBUG(dbgs() << LID << "\n");
+      LID.verify();
+    } else {
+      DEBUG(dbgs() << "<empty>\n");
+      LIS->removeInterval(DefR);
+    }
+  }
+
+  // For uses there is nothing to do. The intervals will be updated via
+  // shrinkToUses.
+  SmallVector<unsigned,4> Uses;
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse())
+      continue;
+    unsigned R = Op.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    Uses.push_back(R);
+  }
+  LIS->RemoveMachineInstrFromMaps(MI);
+  MI->eraseFromParent();
+  for (unsigned i = 0, n = Uses.size(); i < n; ++i) {
+    LiveInterval &LI = LIS->getInterval(Uses[i]);
+    shrinkToUses(Uses[i], LI);
+  }
+}
+
+
+/// Get the opcode for a conditional transfer of the value in SO (source
+/// operand). The condition (true/false) is given in Cond.
+unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
+      bool Cond) {
+  using namespace Hexagon;
+  if (SO.isReg()) {
+    unsigned PhysR;
+    RegisterRef RS = SO;
+    if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) {
+      const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg);
+      assert(VC->begin() != VC->end() && "Empty register class");
+      PhysR = *VC->begin();
+    } else {
+      assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg));
+      PhysR = RS.Reg;
+    }
+    unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
+    switch (RC->getSize()) {
+      case 4:
+        return Cond ? A2_tfrt : A2_tfrf;
+      case 8:
+        return Cond ? A2_tfrpt : A2_tfrpf;
+    }
+    llvm_unreachable("Invalid register operand");
+  }
+  if (SO.isImm() || SO.isFPImm())
+    return Cond ? C2_cmoveit : C2_cmoveif;
+  llvm_unreachable("Unexpected source operand");
+}
+
+
+/// Generate a conditional transfer, copying the value SrcOp to the
+/// destination register DstR:DstSR, and using the predicate register from
+/// PredOp. The Cond argument specifies whether the predicate is to be
+/// if(PredOp), or if(!PredOp).
+MachineInstr *HexagonExpandCondsets::genTfrFor(MachineOperand &SrcOp,
+      unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, bool Cond) {
+  MachineInstr *MI = SrcOp.getParent();
+  MachineBasicBlock &B = *MI->getParent();
+  MachineBasicBlock::iterator At = MI;
+  DebugLoc DL = MI->getDebugLoc();
+
+  // Don't avoid identity copies here (i.e. if the source and the destination
+  // are the same registers). It is actually better to generate them here,
+  // since this would cause the copy to potentially be predicated in the next
+  // step. The predication will remove such a copy if it is unable to
+  /// predicate.
+
+  unsigned Opc = getCondTfrOpcode(SrcOp, Cond);
+  MachineInstr *TfrI = BuildMI(B, At, DL, HII->get(Opc))
+        .addReg(DstR, RegState::Define, DstSR)
+        .addOperand(PredOp)
+        .addOperand(SrcOp);
+  // We don't want any kills yet.
+  TfrI->clearKillInfo();
+  DEBUG(dbgs() << "created an initial copy: " << *TfrI);
+  return TfrI;
+}
+
+
+/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function
+/// performs all necessary changes to complete the replacement.
+bool HexagonExpandCondsets::split(MachineInstr *MI) {
+  if (TfrLimitActive) {
+    if (TfrCounter >= TfrLimit)
+      return false;
+    TfrCounter++;
+  }
+  DEBUG(dbgs() << "\nsplitting BB#" << MI->getParent()->getNumber()
+               << ": " << *MI);
+  MachineOperand &MD = MI->getOperand(0); // Definition
+  MachineOperand &MP = MI->getOperand(1); // Predicate register
+  assert(MD.isDef());
+  unsigned DR = MD.getReg(), DSR = MD.getSubReg();
+
+  // First, create the two invididual conditional transfers, and add each
+  // of them to the live intervals information. Do that first and then remove
+  // the old instruction from live intervals.
+  if (MachineInstr *TfrT = genTfrFor(MI->getOperand(2), DR, DSR, MP, true))
+    addInstrToLiveness(TfrT);
+  if (MachineInstr *TfrF = genTfrFor(MI->getOperand(3), DR, DSR, MP, false))
+    addInstrToLiveness(TfrF);
+  removeInstrFromLiveness(MI);
+
+  return true;
+}
+
+
+/// Split all MUX instructions in the given block into pairs of contitional
+/// transfers.
+bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B) {
+  bool Changed = false;
+  MachineBasicBlock::iterator I, E, NextI;
+  for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+    NextI = std::next(I);
+    if (isCondset(I))
+      Changed |= split(I);
+  }
+  return Changed;
+}
+
+
+bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
+  if (HII->isPredicated(MI) || !HII->isPredicable(MI))
+    return false;
+  if (MI->hasUnmodeledSideEffects() || MI->mayStore())
+    return false;
+  // Reject instructions with multiple defs (e.g. post-increment loads).
+  bool HasDef = false;
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isDef())
+      continue;
+    if (HasDef)
+      return false;
+    HasDef = true;
+  }
+  for (auto &Mo : MI->memoperands())
+    if (Mo->isVolatile())
+      return false;
+  return true;
+}
+
+
+/// Find the reaching definition for a predicated use of RD. The RD is used
+/// under the conditions given by PredR and Cond, and this function will ignore
+/// definitions that set RD under the opposite conditions.
+MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD,
+      MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) {
+  MachineBasicBlock &B = *UseIt->getParent();
+  MachineBasicBlock::iterator I = UseIt, S = B.begin();
+  if (I == S)
+    return 0;
+
+  bool PredValid = true;
+  do {
+    --I;
+    MachineInstr *MI = &*I;
+    // Check if this instruction can be ignored, i.e. if it is predicated
+    // on the complementary condition.
+    if (PredValid && HII->isPredicated(MI)) {
+      if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(MI)))
+        continue;
+    }
+
+    // Check the defs. If the PredR is defined, invalidate it. If RD is
+    // defined, return the instruction or 0, depending on the circumstances.
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef())
+        continue;
+      RegisterRef RR = Op;
+      if (RR.Reg == PredR) {
+        PredValid = false;
+        continue;
+      }
+      if (RR.Reg != RD.Reg)
+        continue;
+      // If the "Reg" part agrees, there is still the subregister to check.
+      // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but
+      // not vreg1 (w/o subregisters).
+      if (RR.Sub == RD.Sub)
+        return MI;
+      if (RR.Sub == 0 || RD.Sub == 0)
+        return 0;
+      // We have different subregisters, so we can continue looking.
+    }
+  } while (I != S);
+
+  return 0;
+}
+
+
+/// Check if the instruction MI can be safely moved over a set of instructions
+/// whose side-effects (in terms of register defs and uses) are expressed in
+/// the maps Defs and Uses. These maps reflect the conditional defs and uses
+/// that depend on the same predicate register to allow moving instructions
+/// over instructions predicated on the opposite condition.
+bool HexagonExpandCondsets::canMoveOver(MachineInstr *MI, ReferenceMap &Defs,
+      ReferenceMap &Uses) {
+  // In order to be able to safely move MI over instructions that define
+  // "Defs" and use "Uses", no def operand from MI can be defined or used
+  // and no use operand can be defined.
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg())
+      continue;
+    RegisterRef RR = Op;
+    // For physical register we would need to check register aliases, etc.
+    // and we don't want to bother with that. It would be of little value
+    // before the actual register rewriting (from virtual to physical).
+    if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+      return false;
+    // No redefs for any operand.
+    if (isRefInMap(RR, Defs, Exec_Then))
+      return false;
+    // For defs, there cannot be uses.
+    if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then))
+      return false;
+  }
+  return true;
+}
+
+
+/// Check if the instruction accessing memory (TheI) can be moved to the
+/// location ToI.
+bool HexagonExpandCondsets::canMoveMemTo(MachineInstr *TheI, MachineInstr *ToI,
+      bool IsDown) {
+  bool IsLoad = TheI->mayLoad(), IsStore = TheI->mayStore();
+  if (!IsLoad && !IsStore)
+    return true;
+  if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI))
+    return true;
+  if (TheI->hasUnmodeledSideEffects())
+    return false;
+
+  MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI;
+  MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI;
+  bool Ordered = TheI->hasOrderedMemoryRef();
+
+  // Search for aliased memory reference in (StartI, EndI).
+  for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) {
+    MachineInstr *MI = &*I;
+    if (MI->hasUnmodeledSideEffects())
+      return false;
+    bool L = MI->mayLoad(), S = MI->mayStore();
+    if (!L && !S)
+      continue;
+    if (Ordered && MI->hasOrderedMemoryRef())
+      return false;
+
+    bool Conflict = (L && IsStore) || S;
+    if (Conflict)
+      return false;
+  }
+  return true;
+}
+
+
+/// Generate a predicated version of MI (where the condition is given via
+/// PredR and Cond) at the point indicated by Where.
+void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI,
+      MachineBasicBlock::iterator Where, unsigned PredR, bool Cond) {
+  // The problem with updating live intervals is that we can move one def
+  // past another def. In particular, this can happen when moving an A2_tfrt
+  // over an A2_tfrf defining the same register. From the point of view of
+  // live intervals, these two instructions are two separate definitions,
+  // and each one starts another live segment. LiveIntervals's "handleMove"
+  // does not allow such moves, so we need to handle it ourselves. To avoid
+  // invalidating liveness data while we are using it, the move will be
+  // implemented in 4 steps: (1) add a clone of the instruction MI at the
+  // target location, (2) update liveness, (3) delete the old instruction,
+  // and (4) update liveness again.
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = Where->getDebugLoc();  // "Where" points to an instruction.
+  unsigned Opc = MI->getOpcode();
+  unsigned PredOpc = HII->getCondOpcode(Opc, !Cond);
+  MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc));
+  unsigned Ox = 0, NP = MI->getNumOperands();
+  // Skip all defs from MI first.
+  while (Ox < NP) {
+    MachineOperand &MO = MI->getOperand(Ox);
+    if (!MO.isReg() || !MO.isDef())
+      break;
+    Ox++;
+  }
+  // Add the new def, then the predicate register, then the rest of the
+  // operands.
+  MB.addReg(RD.Reg, RegState::Define, RD.Sub);
+  MB.addReg(PredR);
+  while (Ox < NP) {
+    MachineOperand &MO = MI->getOperand(Ox);
+    if (!MO.isReg() || !MO.isImplicit())
+      MB.addOperand(MO);
+    Ox++;
+  }
+
+  MachineFunction &MF = *B.getParent();
+  MachineInstr::mmo_iterator I = MI->memoperands_begin();
+  unsigned NR = std::distance(I, MI->memoperands_end());
+  MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR);
+  for (unsigned i = 0; i < NR; ++i)
+    MemRefs[i] = *I++;
+  MB.setMemRefs(MemRefs, MemRefs+NR);
+
+  MachineInstr *NewI = MB;
+  NewI->clearKillInfo();
+  addInstrToLiveness(NewI);
+}
+
+
+/// In the range [First, Last], rename all references to the "old" register RO
+/// to the "new" register RN, but only in instructions predicated on the given
+/// condition.
+void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN,
+      unsigned PredR, bool Cond, MachineBasicBlock::iterator First,
+      MachineBasicBlock::iterator Last) {
+  MachineBasicBlock::iterator End = std::next(Last);
+  for (MachineBasicBlock::iterator I = First; I != End; ++I) {
+    MachineInstr *MI = &*I;
+    // Do not touch instructions that are not predicated, or are predicated
+    // on the opposite condition.
+    if (!HII->isPredicated(MI))
+      continue;
+    if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(MI)))
+      continue;
+
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || RO != RegisterRef(Op))
+        continue;
+      Op.setReg(RN.Reg);
+      Op.setSubReg(RN.Sub);
+      // In practice, this isn't supposed to see any defs.
+      assert(!Op.isDef() && "Not expecting a def");
+    }
+  }
+}
+
+
+/// For a given conditional copy, predicate the definition of the source of
+/// the copy under the given condition (using the same predicate register as
+/// the copy).
+bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) {
+  // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi).
+  unsigned Opc = TfrI->getOpcode();
+  (void)Opc;
+  assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf);
+  DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
+               << ": " << *TfrI);
+
+  MachineOperand &MD = TfrI->getOperand(0);
+  MachineOperand &MP = TfrI->getOperand(1);
+  MachineOperand &MS = TfrI->getOperand(2);
+  // The source operand should be a <kill>. This is not strictly necessary,
+  // but it makes things a lot simpler. Otherwise, we would need to rename
+  // some registers, which would complicate the transformation considerably.
+  if (!MS.isKill())
+    return false;
+
+  RegisterRef RT(MS);
+  unsigned PredR = MP.getReg();
+  MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond);
+  if (!DefI || !isPredicable(DefI))
+    return false;
+
+  DEBUG(dbgs() << "Source def: " << *DefI);
+
+  // Collect the information about registers defined and used between the
+  // DefI and the TfrI.
+  // Map: reg -> bitmask of subregs
+  ReferenceMap Uses, Defs;
+  MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI;
+
+  // Check if the predicate register is valid between DefI and TfrI.
+  // If it is, we can then ignore instructions predicated on the negated
+  // conditions when collecting def and use information.
+  bool PredValid = true;
+  for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+    if (!I->modifiesRegister(PredR, 0))
+      continue;
+    PredValid = false;
+    break;
+  }
+
+  for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+    MachineInstr *MI = &*I;
+    // If this instruction is predicated on the same register, it could
+    // potentially be ignored.
+    // By default assume that the instruction executes on the same condition
+    // as TfrI (Exec_Then), and also on the opposite one (Exec_Else).
+    unsigned Exec = Exec_Then | Exec_Else;
+    if (PredValid && HII->isPredicated(MI) && MI->readsRegister(PredR))
+      Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else;
+
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg())
+        continue;
+      // We don't want to deal with physical registers. The reason is that
+      // they can be aliased with other physical registers. Aliased virtual
+      // registers must share the same register number, and can only differ
+      // in the subregisters, which we are keeping track of. Physical
+      // registers ters no longer have subregisters---their super- and
+      // subregisters are other physical registers, and we are not checking
+      // that.
+      RegisterRef RR = Op;
+      if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+        return false;
+
+      ReferenceMap &Map = Op.isDef() ? Defs : Uses;
+      addRefToMap(RR, Map, Exec);
+    }
+  }
+
+  // The situation:
+  //   RT = DefI
+  //   ...
+  //   RD = TfrI ..., RT
+
+  // If the register-in-the-middle (RT) is used or redefined between
+  // DefI and TfrI, we may not be able proceed with this transformation.
+  // We can ignore a def that will not execute together with TfrI, and a
+  // use that will. If there is such a use (that does execute together with
+  // TfrI), we will not be able to move DefI down. If there is a use that
+  // executed if TfrI's condition is false, then RT must be available
+  // unconditionally (cannot be predicated).
+  // Essentially, we need to be able to rename RT to RD in this segment.
+  if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else))
+    return false;
+  RegisterRef RD = MD;
+  // If the predicate register is defined between DefI and TfrI, the only
+  // potential thing to do would be to move the DefI down to TfrI, and then
+  // predicate. The reaching def (DefI) must be movable down to the location
+  // of the TfrI.
+  // If the target register of the TfrI (RD) is not used or defined between
+  // DefI and TfrI, consider moving TfrI up to DefI.
+  bool CanUp =   canMoveOver(TfrI, Defs, Uses);
+  bool CanDown = canMoveOver(DefI, Defs, Uses);
+  // The TfrI does not access memory, but DefI could. Check if it's safe
+  // to move DefI down to TfrI.
+  if (DefI->mayLoad() || DefI->mayStore())
+    if (!canMoveMemTo(DefI, TfrI, true))
+      CanDown = false;
+
+  DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no")
+               << ", can move down: " << (CanDown ? "yes\n" : "no\n"));
+  MachineBasicBlock::iterator PastDefIt = std::next(DefIt);
+  if (CanUp)
+    predicateAt(RD, DefI, PastDefIt, PredR, Cond);
+  else if (CanDown)
+    predicateAt(RD, DefI, TfrIt, PredR, Cond);
+  else
+    return false;
+
+  if (RT != RD)
+    renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt);
+
+  // Delete the user of RT first (it should work either way, but this order
+  // of deleting is more natural).
+  removeInstrFromLiveness(TfrI);
+  removeInstrFromLiveness(DefI);
+  return true;
+}
+
+
+/// Predicate all cases of conditional copies in the specified block.
+bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B) {
+  bool Changed = false;
+  MachineBasicBlock::iterator I, E, NextI;
+  for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+    NextI = std::next(I);
+    unsigned Opc = I->getOpcode();
+    if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) {
+      bool Done = predicate(I, (Opc == Hexagon::A2_tfrt));
+      if (!Done) {
+        // If we didn't predicate I, we may need to remove it in case it is
+        // an "identity" copy, e.g.  vreg1 = A2_tfrt vreg2, vreg1.
+        if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2)))
+          removeInstrFromLiveness(I);
+      }
+      Changed |= Done;
+    }
+  }
+  return Changed;
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineInstr *MI) {
+  for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+    MachineOperand &MO = MI->getOperand(i-1);
+    if (MO.isReg() && MO.isUse() && MO.isImplicit())
+      MI->RemoveOperand(i-1);
+  }
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineBasicBlock &B) {
+  for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    if (HII->isPredicated(MI))
+      removeImplicitUses(MI);
+  }
+}
+
+
+void HexagonExpandCondsets::postprocessUndefImplicitUses(MachineBasicBlock &B) {
+  // Implicit uses that are "undef" are only meaningful (outside of the
+  // internals of this pass) when the instruction defines a subregister,
+  // and the implicit-undef use applies to the defined register. In such
+  // cases, the proper way to record the information in the IR is to mark
+  // the definition as "undef", which will be interpreted as "read-undef".
+  typedef SmallSet<unsigned,2> RegisterSet;
+  for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    RegisterSet Undefs;
+    for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+      MachineOperand &MO = MI->getOperand(i-1);
+      if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.isUndef()) {
+        MI->RemoveOperand(i-1);
+        Undefs.insert(MO.getReg());
+      }
+    }
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef() || !Op.getSubReg())
+        continue;
+      if (Undefs.count(Op.getReg()))
+        Op.setIsUndef(true);
+    }
+  }
+}
+
+
+bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
+  if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+    return false;
+  const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg);
+  if (RC == &Hexagon::IntRegsRegClass) {
+    BW = 32;
+    return true;
+  }
+  if (RC == &Hexagon::DoubleRegsRegClass) {
+    BW = (RR.Sub != 0) ? 32 : 64;
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) {
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    LiveRange::Segment &LR = *I;
+    // Range must start at a register...
+    if (!LR.start.isRegister())
+      return false;
+    // ...and end in a register or in a dead slot.
+    if (!LR.end.isRegister() && !LR.end.isDead())
+      return false;
+  }
+  return true;
+}
+
+
+bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
+  if (CoaLimitActive) {
+    if (CoaCounter >= CoaLimit)
+      return false;
+    CoaCounter++;
+  }
+  unsigned BW1, BW2;
+  if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2)
+    return false;
+  if (MRI->isLiveIn(R1.Reg))
+    return false;
+  if (MRI->isLiveIn(R2.Reg))
+    return false;
+
+  LiveInterval &L1 = LIS->getInterval(R1.Reg);
+  LiveInterval &L2 = LIS->getInterval(R2.Reg);
+  bool Overlap = L1.overlaps(L2);
+
+  DEBUG(dbgs() << "compatible registers: ("
+               << (Overlap ? "overlap" : "disjoint") << ")\n  "
+               << PrintReg(R1.Reg, TRI, R1.Sub) << "  " << L1 << "\n  "
+               << PrintReg(R2.Reg, TRI, R2.Sub) << "  " << L2 << "\n");
+  if (R1.Sub || R2.Sub)
+    return false;
+  if (Overlap)
+    return false;
+
+  // Coalescing could have a negative impact on scheduling, so try to limit
+  // to some reasonable extent. Only consider coalescing segments, when one
+  // of them does not cross basic block boundaries.
+  if (!isIntraBlocks(L1) && !isIntraBlocks(L2))
+    return false;
+
+  MRI->replaceRegWith(R2.Reg, R1.Reg);
+
+  // Move all live segments from L2 to L1.
+  typedef DenseMap<VNInfo*,VNInfo*> ValueInfoMap;
+  ValueInfoMap VM;
+  for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) {
+    VNInfo *NewVN, *OldVN = I->valno;
+    ValueInfoMap::iterator F = VM.find(OldVN);
+    if (F == VM.end()) {
+      NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator());
+      VM.insert(std::make_pair(OldVN, NewVN));
+    } else {
+      NewVN = F->second;
+    }
+    L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN));
+  }
+  while (L2.begin() != L2.end())
+    L2.removeSegment(*L2.begin());
+
+  updateKillFlags(R1.Reg, L1);
+  DEBUG(dbgs() << "coalesced: " << L1 << "\n");
+  L1.verify();
+
+  return true;
+}
+
+
+/// Attempt to coalesce one of the source registers to a MUX intruction with
+/// the destination register. This could lead to having only one predicated
+/// instruction in the end instead of two.
+bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) {
+  SmallVector<MachineInstr*,16> Condsets;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock &B = *I;
+    for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) {
+      MachineInstr *MI = &*J;
+      if (!isCondset(MI))
+        continue;
+      MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3);
+      if (!S1.isReg() && !S2.isReg())
+        continue;
+      Condsets.push_back(MI);
+    }
+  }
+
+  bool Changed = false;
+  for (unsigned i = 0, n = Condsets.size(); i < n; ++i) {
+    MachineInstr *CI = Condsets[i];
+    RegisterRef RD = CI->getOperand(0);
+    RegisterRef RP = CI->getOperand(1);
+    MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3);
+    bool Done = false;
+    // Consider this case:
+    //   vreg1 = instr1 ...
+    //   vreg2 = instr2 ...
+    //   vreg0 = C2_mux ..., vreg1, vreg2
+    // If vreg0 was coalesced with vreg1, we could end up with the following
+    // code:
+    //   vreg0 = instr1 ...
+    //   vreg2 = instr2 ...
+    //   vreg0 = A2_tfrf ..., vreg2
+    // which will later become:
+    //   vreg0 = instr1 ...
+    //   vreg0 = instr2_cNotPt ...
+    // i.e. there will be an unconditional definition (instr1) of vreg0
+    // followed by a conditional one. The output dependency was there before
+    // and it unavoidable, but if instr1 is predicable, we will no longer be
+    // able to predicate it here.
+    // To avoid this scenario, don't coalesce the destination register with
+    // a source register that is defined by a predicable instruction.
+    if (S1.isReg()) {
+      RegisterRef RS = S1;
+      MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true);
+      if (!RDef || !HII->isPredicable(RDef))
+        Done = coalesceRegisters(RD, RegisterRef(S1));
+    }
+    if (!Done && S2.isReg()) {
+      RegisterRef RS = S2;
+      MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false);
+      if (!RDef || !HII->isPredicable(RDef))
+        Done = coalesceRegisters(RD, RegisterRef(S2));
+    }
+    Changed |= Done;
+  }
+  return Changed;
+}
+
+
+bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
+  HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo());
+  TRI = MF.getSubtarget().getRegisterInfo();
+  LIS = &getAnalysis<LiveIntervals>();
+  MRI = &MF.getRegInfo();
+
+  bool Changed = false;
+
+  // Try to coalesce the target of a mux with one of its sources.
+  // This could eliminate a register copy in some circumstances.
+  Changed |= coalesceSegments(MF);
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    // First, simply split all muxes into a pair of conditional transfers
+    // and update the live intervals to reflect the new arrangement.
+    // This is done mainly to make the live interval update simpler, than it
+    // would be while trying to predicate instructions at the same time.
+    Changed |= splitInBlock(*I);
+    // Traverse all blocks and collapse predicable instructions feeding
+    // conditional transfers into predicated instructions.
+    // Walk over all the instructions again, so we may catch pre-existing
+    // cases that were not created in the previous step.
+    Changed |= predicateInBlock(*I);
+  }
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    postprocessUndefImplicitUses(*I);
+  return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+static void initializePassOnce(PassRegistry &Registry) {
+  const char *Name = "Hexagon Expand Condsets";
+  PassInfo *PI = new PassInfo(Name, "expand-condsets",
+        &HexagonExpandCondsets::ID, 0, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonExpandCondsetsPass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+
+FunctionPass *llvm::createHexagonExpandCondsets() {
+  return new HexagonExpandCondsets();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index a2a847c..40059fb 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -20,7 +20,6 @@
 #include "Hexagon.h"
 #include "HexagonMachineFunctionInfo.h"
 #include "HexagonSubtarget.h"
-#include "HexagonTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -49,13 +48,9 @@ namespace llvm {
 namespace {
 
 class HexagonExpandPredSpillCode : public MachineFunctionPass {
-    const HexagonTargetMachine& QTM;
-    const HexagonSubtarget &QST;
-
  public:
     static char ID;
-    HexagonExpandPredSpillCode(const HexagonTargetMachine& TM) :
-      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
+    HexagonExpandPredSpillCode() : MachineFunctionPass(ID) {
       PassRegistry &Registry = *PassRegistry::getPassRegistry();
       initializeHexagonExpandPredSpillCodePass(Registry);
     }
@@ -72,7 +67,8 @@ char HexagonExpandPredSpillCode::ID = 0;
 
 bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
 
-  const HexagonInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo();
+  const HexagonSubtarget &QST = Fn.getSubtarget<HexagonSubtarget>();
+  const HexagonInstrInfo *TII = QST.getInstrInfo();
 
   // Loop over all of the basic blocks.
   for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
@@ -83,20 +79,177 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
          ++MII) {
       MachineInstr *MI = MII;
       int Opc = MI->getOpcode();
-      if (Opc == Hexagon::STriw_pred) {
+      if (Opc == Hexagon::S2_storerb_pci_pseudo ||
+          Opc == Hexagon::S2_storerh_pci_pseudo ||
+          Opc == Hexagon::S2_storeri_pci_pseudo ||
+          Opc == Hexagon::S2_storerd_pci_pseudo ||
+          Opc == Hexagon::S2_storerf_pci_pseudo) {
+        unsigned Opcode;
+        if (Opc == Hexagon::S2_storerd_pci_pseudo)
+          Opcode = Hexagon::S2_storerd_pci;
+        else if (Opc == Hexagon::S2_storeri_pci_pseudo)
+          Opcode = Hexagon::S2_storeri_pci;
+        else if (Opc == Hexagon::S2_storerh_pci_pseudo)
+          Opcode = Hexagon::S2_storerh_pci;
+        else if (Opc == Hexagon::S2_storerf_pci_pseudo)
+          Opcode = Hexagon::S2_storerf_pci;
+        else if (Opc == Hexagon::S2_storerb_pci_pseudo)
+          Opcode = Hexagon::S2_storerb_pci;
+        else
+          llvm_unreachable("wrong Opc");
+        MachineOperand &Op0 = MI->getOperand(0);
+        MachineOperand &Op1 = MI->getOperand(1);
+        MachineOperand &Op2 = MI->getOperand(2);
+        MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
+        MachineOperand &Op4 = MI->getOperand(4);
+        // Emit a "C6 = Rn, C6 is the control register for M0".
+        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+                Hexagon::C6)->addOperand(Op3);
+        // Replace the pseude circ_ldd by the real circ_ldd.
+        MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+                                      TII->get(Opcode));
+        NewMI->addOperand(Op0);
+        NewMI->addOperand(Op1);
+        NewMI->addOperand(Op4);
+        NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+                                                    false, /*isDef*/
+                                                    false, /*isImpl*/
+                                                    true   /*isKill*/));
+        NewMI->addOperand(Op2);
+        MII = MBB->erase(MI);
+        --MII;
+      } else if (Opc == Hexagon::L2_loadrd_pci_pseudo ||
+                 Opc == Hexagon::L2_loadri_pci_pseudo ||
+                 Opc == Hexagon::L2_loadrh_pci_pseudo ||
+                 Opc == Hexagon::L2_loadruh_pci_pseudo||
+                 Opc == Hexagon::L2_loadrb_pci_pseudo ||
+                 Opc == Hexagon::L2_loadrub_pci_pseudo) {
+        unsigned Opcode;
+        if (Opc == Hexagon::L2_loadrd_pci_pseudo)
+          Opcode = Hexagon::L2_loadrd_pci;
+        else if (Opc == Hexagon::L2_loadri_pci_pseudo)
+          Opcode = Hexagon::L2_loadri_pci;
+        else if (Opc == Hexagon::L2_loadrh_pci_pseudo)
+          Opcode = Hexagon::L2_loadrh_pci;
+        else if (Opc == Hexagon::L2_loadruh_pci_pseudo)
+          Opcode = Hexagon::L2_loadruh_pci;
+        else if (Opc == Hexagon::L2_loadrb_pci_pseudo)
+          Opcode = Hexagon::L2_loadrb_pci;
+        else if (Opc == Hexagon::L2_loadrub_pci_pseudo)
+          Opcode = Hexagon::L2_loadrub_pci;
+        else
+          llvm_unreachable("wrong Opc");
+
+        MachineOperand &Op0 = MI->getOperand(0);
+        MachineOperand &Op1 = MI->getOperand(1);
+        MachineOperand &Op2 = MI->getOperand(2);
+        MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
+        MachineOperand &Op5 = MI->getOperand(5);
+        // Emit a "C6 = Rn, C6 is the control register for M0".
+        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+                Hexagon::C6)->addOperand(Op4);
+        // Replace the pseude circ_ldd by the real circ_ldd.
+        MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+                                      TII->get(Opcode));
+        NewMI->addOperand(Op1);
+        NewMI->addOperand(Op0);
+        NewMI->addOperand(Op2);
+        NewMI->addOperand(Op5);
+        NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+                                                    false, /*isDef*/
+                                                    false, /*isImpl*/
+                                                    true   /*isKill*/));
+        MII = MBB->erase(MI);
+        --MII;
+      } else if (Opc == Hexagon::L2_loadrd_pbr_pseudo ||
+                 Opc == Hexagon::L2_loadri_pbr_pseudo ||
+                 Opc == Hexagon::L2_loadrh_pbr_pseudo ||
+                 Opc == Hexagon::L2_loadruh_pbr_pseudo||
+                 Opc == Hexagon::L2_loadrb_pbr_pseudo ||
+                 Opc == Hexagon::L2_loadrub_pbr_pseudo) {
+        unsigned Opcode;
+        if (Opc == Hexagon::L2_loadrd_pbr_pseudo)
+          Opcode = Hexagon::L2_loadrd_pbr;
+        else if (Opc == Hexagon::L2_loadri_pbr_pseudo)
+          Opcode = Hexagon::L2_loadri_pbr;
+        else if (Opc == Hexagon::L2_loadrh_pbr_pseudo)
+          Opcode = Hexagon::L2_loadrh_pbr;
+        else if (Opc == Hexagon::L2_loadruh_pbr_pseudo)
+          Opcode = Hexagon::L2_loadruh_pbr;
+        else if (Opc == Hexagon::L2_loadrb_pbr_pseudo)
+          Opcode = Hexagon::L2_loadrb_pbr;
+        else if (Opc == Hexagon::L2_loadrub_pbr_pseudo)
+          Opcode = Hexagon::L2_loadrub_pbr;
+        else
+          llvm_unreachable("wrong Opc");
+        MachineOperand &Op0 = MI->getOperand(0);
+        MachineOperand &Op1 = MI->getOperand(1);
+        MachineOperand &Op2 = MI->getOperand(2);
+        MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
+        // Emit a "C6 = Rn, C6 is the control register for M0".
+        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+                Hexagon::C6)->addOperand(Op4);
+        // Replace the pseudo brev_ldd by the real brev_ldd.
+        MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+                                      TII->get(Opcode));
+        NewMI->addOperand(Op1);
+        NewMI->addOperand(Op0);
+        NewMI->addOperand(Op2);
+        NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+                                                    false, /*isDef*/
+                                                    false, /*isImpl*/
+                                                    true   /*isKill*/));
+        MII = MBB->erase(MI);
+        --MII;
+      } else if (Opc == Hexagon::S2_storerd_pbr_pseudo ||
+                 Opc == Hexagon::S2_storeri_pbr_pseudo ||
+                 Opc == Hexagon::S2_storerh_pbr_pseudo ||
+                 Opc == Hexagon::S2_storerb_pbr_pseudo ||
+                 Opc == Hexagon::S2_storerf_pbr_pseudo) {
+        unsigned Opcode;
+        if (Opc == Hexagon::S2_storerd_pbr_pseudo)
+          Opcode = Hexagon::S2_storerd_pbr;
+        else if (Opc == Hexagon::S2_storeri_pbr_pseudo)
+          Opcode = Hexagon::S2_storeri_pbr;
+        else if (Opc == Hexagon::S2_storerh_pbr_pseudo)
+          Opcode = Hexagon::S2_storerh_pbr;
+        else if (Opc == Hexagon::S2_storerf_pbr_pseudo)
+          Opcode = Hexagon::S2_storerf_pbr;
+        else if (Opc == Hexagon::S2_storerb_pbr_pseudo)
+          Opcode = Hexagon::S2_storerb_pbr;
+        else
+          llvm_unreachable("wrong Opc");
+        MachineOperand &Op0 = MI->getOperand(0);
+        MachineOperand &Op1 = MI->getOperand(1);
+        MachineOperand &Op2 = MI->getOperand(2);
+        MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
+        // Emit a "C6 = Rn, C6 is the control register for M0".
+        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+                Hexagon::C6)->addOperand(Op3);
+        // Replace the pseudo brev_ldd by the real brev_ldd.
+        MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+                                      TII->get(Opcode));
+        NewMI->addOperand(Op0);
+        NewMI->addOperand(Op1);
+        NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+                                                    false, /*isDef*/
+                                                    false, /*isImpl*/
+                                                    true   /*isKill*/));
+        NewMI->addOperand(Op2);
+        MII = MBB->erase(MI);
+        --MII;
+      } else if (Opc == Hexagon::STriw_pred) {
         // STriw_pred [R30], ofst, SrcReg;
         unsigned FP = MI->getOperand(0).getReg();
-        assert(
-            FP ==
-                QTM.getSubtargetImpl()->getRegisterInfo()->getFrameRegister() &&
-            "Not a Frame Pointer, Nor a Spill Slot");
+        assert(FP == QST.getRegisterInfo()->getFrameRegister() &&
+               "Not a Frame Pointer, Nor a Spill Slot");
         assert(MI->getOperand(1).isImm() && "Not an offset");
         int Offset = MI->getOperand(1).getImm();
         int SrcReg = MI->getOperand(2).getReg();
         assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
                "Not a predicate register");
         if (!TII->isValidOffset(Hexagon::S2_storeri_io, Offset)) {
-          if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+          if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) {
             BuildMI(*MBB, MII, MI->getDebugLoc(),
                     TII->get(Hexagon::CONST32_Int_Real),
                       HEXAGON_RESERVED_REG_1).addImm(Offset);
@@ -110,7 +263,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
               .addReg(HEXAGON_RESERVED_REG_1)
               .addImm(0).addReg(HEXAGON_RESERVED_REG_2);
           } else {
-            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi),
                       HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
             BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr),
                       HEXAGON_RESERVED_REG_2).addReg(SrcReg);
@@ -135,14 +288,12 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
         assert(Hexagon::PredRegsRegClass.contains(DstReg) &&
                "Not a predicate register");
         unsigned FP = MI->getOperand(1).getReg();
-        assert(
-            FP ==
-                QTM.getSubtargetImpl()->getRegisterInfo()->getFrameRegister() &&
-            "Not a Frame Pointer, Nor a Spill Slot");
+        assert(FP == QST.getRegisterInfo()->getFrameRegister() &&
+               "Not a Frame Pointer, Nor a Spill Slot");
         assert(MI->getOperand(2).isImm() && "Not an offset");
         int Offset = MI->getOperand(2).getImm();
         if (!TII->isValidOffset(Hexagon::L2_loadri_io, Offset)) {
-          if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+          if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) {
             BuildMI(*MBB, MII, MI->getDebugLoc(),
                     TII->get(Hexagon::CONST32_Int_Real),
                       HEXAGON_RESERVED_REG_1).addImm(Offset);
@@ -157,7 +308,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
             BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp),
                       DstReg).addReg(HEXAGON_RESERVED_REG_2);
           } else {
-            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi),
                       HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
             BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io),
                       HEXAGON_RESERVED_REG_2)
@@ -200,6 +351,6 @@ void llvm::initializeHexagonExpandPredSpillCodePass(PassRegistry &Registry) {
 }
 
 FunctionPass*
-llvm::createHexagonExpandPredSpillCode(const HexagonTargetMachine &TM) {
-  return new HexagonExpandPredSpillCode(TM);
+llvm::createHexagonExpandPredSpillCode() {
+  return new HexagonExpandPredSpillCode();
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index e8d8f14..3d786a9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -6,9 +6,8 @@
 // License. See LICENSE.TXT for details.
 //
 // The loop start address in the LOOPn instruction is encoded as a distance
-// from the LOOPn instruction itself.  If the start address is too far from
-// the LOOPn instruction, the loop needs to be set up manually, i.e. via
-// direct transfers to SAn and LCn.
+// from the LOOPn instruction itself. If the start address is too far from
+// the LOOPn instruction, the instruction needs to use a constant extender.
 // This pass will identify and convert such LOOPn instructions to a proper
 // form.
 //===----------------------------------------------------------------------===//
@@ -21,12 +20,15 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/PassSupport.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
 
+static cl::opt<unsigned> MaxLoopRange(
+    "hexagon-loop-range", cl::Hidden, cl::init(200),
+    cl::desc("Restrict range of loopN instructions (testing only)"));
+
 namespace llvm {
   void initializeHexagonFixupHwLoopsPass(PassRegistry&);
 }
@@ -52,20 +54,15 @@ namespace {
     }
 
   private:
-    /// \brief Maximum distance between the loop instr and the basic block.
-    /// Just an estimate.
-    static const unsigned MAX_LOOP_DISTANCE = 200;
-
     /// \brief Check the offset between each loop instruction and
     /// the loop basic block to determine if we can use the LOOP instruction
     /// or if we need to set the LC/SA registers explicitly.
     bool fixupLoopInstrs(MachineFunction &MF);
 
-    /// \brief Add the instruction to set the LC and SA registers explicitly.
-    void convertLoopInstr(MachineFunction &MF,
-                          MachineBasicBlock::iterator &MII,
-                          RegScavenger &RS);
-
+    /// \brief Replace loop instruction with the constant extended
+    /// version if the loop label is too far from the loop instruction.
+    void useExtLoopInstr(MachineFunction &MF,
+                         MachineBasicBlock::iterator &MII);
   };
 
   char HexagonFixupHwLoops::ID = 0;
@@ -78,20 +75,18 @@ FunctionPass *llvm::createHexagonFixupHwLoops() {
   return new HexagonFixupHwLoops();
 }
 
-
 /// \brief Returns true if the instruction is a hardware loop instruction.
 static bool isHardwareLoop(const MachineInstr *MI) {
   return MI->getOpcode() == Hexagon::J2_loop0r ||
-         MI->getOpcode() == Hexagon::J2_loop0i;
+         MI->getOpcode() == Hexagon::J2_loop0i ||
+         MI->getOpcode() == Hexagon::J2_loop1r ||
+         MI->getOpcode() == Hexagon::J2_loop1i;
 }
 
-
 bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
-  bool Changed = fixupLoopInstrs(MF);
-  return Changed;
+  return fixupLoopInstrs(MF);
 }
 
-
 /// \brief For Hexagon, if the loop label is to far from the
 /// loop instruction then we need to set the LC0 and SA0 registers
 /// explicitly instead of using LOOP(start,count).  This function
@@ -105,41 +100,49 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
   // Offset of the current instruction from the start.
   unsigned InstOffset = 0;
   // Map for each basic block to it's first instruction.
-  DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
+  DenseMap<const MachineBasicBlock *, unsigned> BlockToInstOffset;
+
+  const HexagonInstrInfo *HII =
+      static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
 
   // First pass - compute the offset of each basic block.
-  for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
-       MBB != MBBe; ++MBB) {
-    BlockToInstOffset[MBB] = InstOffset;
-    InstOffset += (MBB->size() * 4);
+  for (const MachineBasicBlock &MBB : MF) {
+    if (MBB.getAlignment()) {
+      // Although we don't know the exact layout of the final code, we need
+      // to account for alignment padding somehow. This heuristic pads each
+      // aligned basic block according to the alignment value.
+      int ByteAlign = (1u << MBB.getAlignment()) - 1;
+      InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign);
+    }
+
+    BlockToInstOffset[&MBB] = InstOffset;
+    for (const MachineInstr &MI : MBB)
+      InstOffset += HII->getSize(&MI);
   }
 
-  // Second pass - check each loop instruction to see if it needs to
-  // be converted.
+  // Second pass - check each loop instruction to see if it needs to be
+  // converted.
   InstOffset = 0;
   bool Changed = false;
-  RegScavenger RS;
-
-  // Loop over all the basic blocks.
-  for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
-       MBB != MBBe; ++MBB) {
-    InstOffset = BlockToInstOffset[MBB];
-    RS.enterBasicBlock(MBB);
+  for (MachineBasicBlock &MBB : MF) {
+    InstOffset = BlockToInstOffset[&MBB];
 
     // Loop over all the instructions.
-    MachineBasicBlock::iterator MIE = MBB->end();
-    MachineBasicBlock::iterator MII = MBB->begin();
+    MachineBasicBlock::iterator MII = MBB.begin();
+    MachineBasicBlock::iterator MIE = MBB.end();
     while (MII != MIE) {
+      InstOffset += HII->getSize(&*MII);
+      if (MII->isDebugValue()) {
+        ++MII;
+        continue;
+      }
       if (isHardwareLoop(MII)) {
-        RS.forward(MII);
         assert(MII->getOperand(0).isMBB() &&
                "Expect a basic block as loop operand");
-        int Sub = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
-        unsigned Dist = Sub > 0 ? Sub : -Sub;
-        if (Dist > MAX_LOOP_DISTANCE) {
-          // Convert to explicity setting LC0 and SA0.
-          convertLoopInstr(MF, MII, RS);
-          MII = MBB->erase(MII);
+        int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
+        if ((unsigned)abs(diff) > MaxLoopRange) {
+          useExtLoopInstr(MF, MII);
+          MII = MBB.erase(MII);
           Changed = true;
         } else {
           ++MII;
@@ -147,39 +150,38 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
       } else {
         ++MII;
       }
-      InstOffset += 4;
     }
   }
 
   return Changed;
 }
 
-
-/// \brief convert a loop instruction to a sequence of instructions that
-/// set the LC0 and SA0 register explicitly.
-void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
-                                           MachineBasicBlock::iterator &MII,
-                                           RegScavenger &RS) {
+/// \brief Replace loop instructions with the constant extended version.
+void HexagonFixupHwLoops::useExtLoopInstr(MachineFunction &MF,
+                                          MachineBasicBlock::iterator &MII) {
   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
   MachineBasicBlock *MBB = MII->getParent();
   DebugLoc DL = MII->getDebugLoc();
-  unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
-
-  // First, set the LC0 with the trip count.
-  if (MII->getOperand(1).isReg()) {
-    // Trip count is a register
-    BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0)
-      .addReg(MII->getOperand(1).getReg());
-  } else {
-    // Trip count is an immediate.
-    BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrsi), Scratch)
-      .addImm(MII->getOperand(1).getImm());
-    BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0)
-      .addReg(Scratch);
+  MachineInstrBuilder MIB;
+  unsigned newOp;
+  switch (MII->getOpcode()) {
+  case Hexagon::J2_loop0r:
+    newOp = Hexagon::J2_loop0rext;
+    break;
+  case Hexagon::J2_loop0i:
+    newOp = Hexagon::J2_loop0iext;
+    break;
+  case Hexagon::J2_loop1r:
+    newOp = Hexagon::J2_loop1rext;
+    break;
+  case Hexagon::J2_loop1i:
+    newOp = Hexagon::J2_loop1iext;
+    break;
+  default:
+    llvm_unreachable("Invalid Hardware Loop Instruction.");
   }
-  // Then, set the SA0 with the loop start address.
-  BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
-    .addMBB(MII->getOperand(0).getMBB());
-  BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::SA0)
-    .addReg(Scratch);
+  MIB = BuildMI(*MBB, MII, DL, TII->get(newOp));
+
+  for (unsigned i = 0; i < MII->getNumOperands(); ++i)
+    MIB.addOperand(MII->getOperand(i));
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 9d1a527..0885a79 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -8,6 +8,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "hexagon-pei"
+
 #include "HexagonFrameLowering.h"
 #include "Hexagon.h"
 #include "HexagonInstrInfo.h"
@@ -16,334 +18,1274 @@
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 
+// Hexagon stack frame layout as defined by the ABI:
+//
+//                                                       Incoming arguments
+//                                                       passed via stack
+//                                                                      |
+//                                                                      |
+//        SP during function's                 FP during function's     |
+//    +-- runtime (top of stack)               runtime (bottom) --+     |
+//    |                                                           |     |
+// --++---------------------+------------------+-----------------++-+-------
+//   |  parameter area for  |  variable-size   |   fixed-size    |LR|  arg
+//   |   called functions   |  local objects   |  local objects  |FP|
+// --+----------------------+------------------+-----------------+--+-------
+//    <-    size known    -> <- size unknown -> <- size known  ->
+//
+// Low address                                                 High address
+//
+// <--- stack growth
+//
+//
+// - In any circumstances, the outgoing function arguments are always accessi-
+//   ble using the SP, and the incoming arguments are accessible using the FP.
+// - If the local objects are not aligned, they can always be accessed using
+//   the FP.
+// - If there are no variable-sized objects, the local objects can always be
+//   accessed using the SP, regardless whether they are aligned or not. (The
+//   alignment padding will be at the bottom of the stack (highest address),
+//   and so the offset with respect to the SP will be known at the compile-
+//   -time.)
+//
+// The only complication occurs if there are both, local aligned objects, and
+// dynamically allocated (variable-sized) objects. The alignment pad will be
+// placed between the FP and the local objects, thus preventing the use of the
+// FP to access the local objects. At the same time, the variable-sized objects
+// will be between the SP and the local objects, thus introducing an unknown
+// distance from the SP to the locals.
+//
+// To avoid this problem, a new register is created that holds the aligned
+// address of the bottom of the stack, referred in the sources as AP (aligned
+// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad
+// that aligns AP to the required boundary (a maximum of the alignments of
+// all stack objects, fixed- and variable-sized). All local objects[1] will
+// then use AP as the base pointer.
+// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get
+// their name from being allocated at fixed locations on the stack, relative
+// to the FP. In the presence of dynamic allocation and local alignment, such
+// objects can only be accessed through the FP.
+//
+// Illustration of the AP:
+//                                                                FP --+
+//                                                                     |
+// ---------------+---------------------+-----+-----------------------++-+--
+//   Rest of the  | Local stack objects | Pad |  Fixed stack objects  |LR|
+//   stack frame  | (aligned)           |     |  (CSR, spills, etc.)  |FP|
+// ---------------+---------------------+-----+-----------------+-----+--+--
+//                                      |<-- Multiple of the -->|
+//                                           stack alignment    +-- AP
+//
+// The AP is set up at the beginning of the function. Since it is not a dedi-
+// cated (reserved) register, it needs to be kept live throughout the function
+// to be available as the base register for local object accesses.
+// Normally, an address of a stack objects is obtained by a pseudo-instruction
+// TFR_FI. To access local objects with the AP register present, a different
+// pseudo-instruction needs to be used: TFR_FIA. The TFR_FIA takes one extra
+// argument compared to TFR_FI: the first input register is the AP register.
+// This keeps the register live between its definition and its uses.
+
+// The AP register is originally set up using pseudo-instruction ALIGNA:
+//   AP = ALIGNA A
+// where
+//   A  - required stack alignment
+// The alignment value must be the maximum of all alignments required by
+// any stack object.
+
+// The dynamic allocation uses a pseudo-instruction ALLOCA:
+//   Rd = ALLOCA Rs, A
+// where
+//   Rd - address of the allocated space
+//   Rs - minimum size (the actual allocated can be larger to accommodate
+//        alignment)
+//   A  - required alignment
+
+
 using namespace llvm;
 
-static cl::opt<bool> DisableDeallocRet(
-                       "disable-hexagon-dealloc-ret",
-                       cl::Hidden,
-                       cl::desc("Disable Dealloc Return for Hexagon target"));
+static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
+    cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));
 
-/// determineFrameLayout - Determine the size of the frame and maximum call
-/// frame size.
-void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
 
-  // Get the number of bytes to allocate from the FrameInfo.
-  unsigned FrameSize = MFI->getStackSize();
+static cl::opt<int> NumberScavengerSlots("number-scavenger-slots",
+    cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2),
+    cl::ZeroOrMore);
 
-  // Get the alignments provided by the target.
-  unsigned TargetAlign = MF.getTarget()
-                             .getSubtargetImpl()
-                             ->getFrameLowering()
-                             ->getStackAlignment();
-  // Get the maximum call frame size of all the calls.
-  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+static cl::opt<int> SpillFuncThreshold("spill-func-threshold",
+    cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"),
+    cl::init(6), cl::ZeroOrMore);
 
-  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
-  // that allocations will be aligned.
-  if (MFI->hasVarSizedObjects())
-    maxCallFrameSize = RoundUpToAlignment(maxCallFrameSize, TargetAlign);
+static cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os",
+    cl::Hidden, cl::desc("Specify Os spill func threshold"),
+    cl::init(1), cl::ZeroOrMore);
 
-  // Update maximum call frame size.
-  MFI->setMaxCallFrameSize(maxCallFrameSize);
+static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame",
+    cl::init(true), cl::Hidden, cl::ZeroOrMore,
+    cl::desc("Enable stack frame shrink wrapping"));
 
-  // Include call frame size in total.
-  FrameSize += maxCallFrameSize;
+static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX),
+    cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame "
+    "shrink-wraps"));
 
-  // Make sure the frame is aligned.
-  FrameSize = RoundUpToAlignment(FrameSize, TargetAlign);
+namespace {
+  /// Map a register pair Reg to the subregister that has the greater "number",
+  /// i.e. D3 (aka R7:6) will be mapped to R7, etc.
+  unsigned getMax32BitSubRegister(unsigned Reg, const TargetRegisterInfo &TRI,
+                                  bool hireg = true) {
+    if (Reg < Hexagon::D0 || Reg > Hexagon::D15)
+      return Reg;
 
-  // Update frame info.
-  MFI->setStackSize(FrameSize);
+    unsigned RegNo = 0;
+    for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) {
+      if (hireg) {
+        if (*SubRegs > RegNo)
+          RegNo = *SubRegs;
+      } else {
+        if (!RegNo || *SubRegs < RegNo)
+          RegNo = *SubRegs;
+      }
+    }
+    return RegNo;
+  }
+
+  /// Returns the callee saved register with the largest id in the vector.
+  unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI,
+                                const TargetRegisterInfo &TRI) {
+    assert(Hexagon::R1 > 0 &&
+           "Assume physical registers are encoded as positive integers");
+    if (CSI.empty())
+      return 0;
+
+    unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
+    for (unsigned I = 1, E = CSI.size(); I < E; ++I) {
+      unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);
+      if (Reg > Max)
+        Max = Reg;
+    }
+    return Max;
+  }
+
+  /// Checks if the basic block contains any instruction that needs a stack
+  /// frame to be already in place.
+  bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR) {
+    for (auto &I : MBB) {
+      const MachineInstr *MI = &I;
+      if (MI->isCall())
+        return true;
+      unsigned Opc = MI->getOpcode();
+      switch (Opc) {
+        case Hexagon::ALLOCA:
+        case Hexagon::ALIGNA:
+          return true;
+        default:
+          break;
+      }
+      // Check individual operands.
+      for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) {
+        // While the presence of a frame index does not prove that a stack
+        // frame will be required, all frame indexes should be within alloc-
+        // frame/deallocframe. Otherwise, the code that translates a frame
+        // index into an offset would have to be aware of the placement of
+        // the frame creation/destruction instructions.
+        if (Mo->isFI())
+          return true;
+        if (!Mo->isReg())
+          continue;
+        unsigned R = Mo->getReg();
+        // Virtual registers will need scavenging, which then may require
+        // a stack slot.
+        if (TargetRegisterInfo::isVirtualRegister(R))
+          return true;
+        if (CSR[R])
+          return true;
+      }
+    }
+    return false;
+  }
+
+  /// Returns true if MBB has a machine instructions that indicates a tail call
+  /// in the block.
+  bool hasTailCall(const MachineBasicBlock &MBB) {
+    MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
+    unsigned RetOpc = I->getOpcode();
+    return RetOpc == Hexagon::TCRETURNi || RetOpc == Hexagon::TCRETURNr;
+  }
+
+  /// Returns true if MBB contains an instruction that returns.
+  bool hasReturn(const MachineBasicBlock &MBB) {
+    for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I)
+      if (I->isReturn())
+        return true;
+    return false;
+  }
+}
+
+
+/// Implements shrink-wrapping of the stack frame. By default, stack frame
+/// is created in the function entry block, and is cleaned up in every block
+/// that returns. This function finds alternate blocks: one for the frame
+/// setup (prolog) and one for the cleanup (epilog).
+void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
+      MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
+  static unsigned ShrinkCounter = 0;
+
+  if (ShrinkLimit.getPosition()) {
+    if (ShrinkCounter >= ShrinkLimit)
+      return;
+    ShrinkCounter++;
+  }
+
+  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HRI = *HST.getRegisterInfo();
+
+  MachineDominatorTree MDT;
+  MDT.runOnMachineFunction(MF);
+  MachinePostDominatorTree MPT;
+  MPT.runOnMachineFunction(MF);
+
+  typedef DenseMap<unsigned,unsigned> UnsignedMap;
+  UnsignedMap RPO;
+  typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType;
+  RPOTType RPOT(&MF);
+  unsigned RPON = 0;
+  for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+    RPO[(*I)->getNumber()] = RPON++;
+
+  // Don't process functions that have loops, at least for now. Placement
+  // of prolog and epilog must take loop structure into account. For simpli-
+  // city don't do it right now.
+  for (auto &I : MF) {
+    unsigned BN = RPO[I.getNumber()];
+    for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) {
+      // If found a back-edge, return.
+      if (RPO[(*SI)->getNumber()] <= BN)
+        return;
+    }
+  }
+
+  // Collect the set of blocks that need a stack frame to execute. Scan
+  // each block for uses/defs of callee-saved registers, calls, etc.
+  SmallVector<MachineBasicBlock*,16> SFBlocks;
+  BitVector CSR(Hexagon::NUM_TARGET_REGS);
+  for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P)
+    CSR[*P] = true;
+
+  for (auto &I : MF)
+    if (needsStackFrame(I, CSR))
+      SFBlocks.push_back(&I);
+
+  DEBUG({
+    dbgs() << "Blocks needing SF: {";
+    for (auto &B : SFBlocks)
+      dbgs() << " BB#" << B->getNumber();
+    dbgs() << " }\n";
+  });
+  // No frame needed?
+  if (SFBlocks.empty())
+    return;
+
+  // Pick a common dominator and a common post-dominator.
+  MachineBasicBlock *DomB = SFBlocks[0];
+  for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
+    DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]);
+    if (!DomB)
+      break;
+  }
+  MachineBasicBlock *PDomB = SFBlocks[0];
+  for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
+    PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]);
+    if (!PDomB)
+      break;
+  }
+  DEBUG({
+    dbgs() << "Computed dom block: BB#";
+    if (DomB) dbgs() << DomB->getNumber();
+    else      dbgs() << "<null>";
+    dbgs() << ", computed pdom block: BB#";
+    if (PDomB) dbgs() << PDomB->getNumber();
+    else       dbgs() << "<null>";
+    dbgs() << "\n";
+  });
+  if (!DomB || !PDomB)
+    return;
+
+  // Make sure that DomB dominates PDomB and PDomB post-dominates DomB.
+  if (!MDT.dominates(DomB, PDomB)) {
+    DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
+    return;
+  }
+  if (!MPT.dominates(PDomB, DomB)) {
+    DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
+    return;
+  }
+
+  // Finally, everything seems right.
+  PrologB = DomB;
+  EpilogB = PDomB;
+}
+
+/// Perform most of the PEI work here:
+/// - saving/restoring of the callee-saved registers,
+/// - stack frame creation and destruction.
+/// Normally, this work is distributed among various functions, but doing it
+/// in one place allows shrink-wrapping of the stack frame.
+void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
+                                        MachineBasicBlock &MBB) const {
+  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HRI = *HST.getRegisterInfo();
+
+  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+  MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;
+  if (EnableShrinkWrapping)
+    findShrunkPrologEpilog(MF, PrologB, EpilogB);
+
+  insertCSRSpillsInBlock(*PrologB, CSI, HRI);
+  insertPrologueInBlock(*PrologB);
+
+  if (EpilogB) {
+    insertCSRRestoresInBlock(*EpilogB, CSI, HRI);
+    insertEpilogueInBlock(*EpilogB);
+  } else {
+    for (auto &B : MF)
+      if (!B.empty() && B.back().isReturn())
+        insertCSRRestoresInBlock(B, CSI, HRI);
+
+    for (auto &B : MF)
+      if (!B.empty() && B.back().isReturn())
+        insertEpilogueInBlock(B);
+  }
 }
 
 
-void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();
+void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
+  MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
-      MF.getSubtarget().getRegisterInfo());
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-  determineFrameLayout(MF);
+  auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
+  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HII = *HST.getInstrInfo();
+  auto &HRI = *HST.getRegisterInfo();
+  DebugLoc dl;
 
+  unsigned MaxAlign = std::max(MFI->getMaxAlignment(), getStackAlignment());
+
+  // Calculate the total stack frame size.
   // Get the number of bytes to allocate from the FrameInfo.
-  int NumBytes = (int) MFI->getStackSize();
+  unsigned FrameSize = MFI->getStackSize();
+  // Round up the max call frame size to the max alignment on the stack.
+  unsigned MaxCFA = RoundUpToAlignment(MFI->getMaxCallFrameSize(), MaxAlign);
+  MFI->setMaxCallFrameSize(MaxCFA);
+
+  FrameSize = MaxCFA + RoundUpToAlignment(FrameSize, MaxAlign);
+  MFI->setStackSize(FrameSize);
 
-  // LLVM expects allocframe not to be the first instruction in the
-  // basic block.
+  bool AlignStack = (MaxAlign > getStackAlignment());
+
+  // Check if frame moves are needed for EH.
+  bool needsFrameMoves = MMI.hasDebugInfo() ||
+    MF.getFunction()->needsUnwindTableEntry();
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  unsigned NumBytes = MFI->getStackSize();
+  unsigned SP = HRI.getStackRegister();
+  unsigned MaxCF = MFI->getMaxCallFrameSize();
   MachineBasicBlock::iterator InsertPt = MBB.begin();
 
-  //
-  // ALLOCA adjust regs.  Iterate over ADJDYNALLOC nodes and change the offset.
-  //
-  HexagonMachineFunctionInfo *FuncInfo =
-    MF.getInfo<HexagonMachineFunctionInfo>();
-  const std::vector<MachineInstr*>& AdjustRegs =
-    FuncInfo->getAllocaAdjustInsts();
-  for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(),
-         e = AdjustRegs.end();
-       i != e; ++i) {
-    MachineInstr* MI = *i;
-    assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) &&
-           "Expected adjust alloca node");
+  auto *FuncInfo = MF.getInfo<HexagonMachineFunctionInfo>();
+  auto &AdjustRegs = FuncInfo->getAllocaAdjustInsts();
 
-    MachineOperand& MO = MI->getOperand(2);
-    assert(MO.isImm() && "Expected immediate");
-    MO.setImm(MFI->getMaxCallFrameSize());
+  for (auto MI : AdjustRegs) {
+    assert((MI->getOpcode() == Hexagon::ALLOCA) && "Expected alloca");
+    expandAlloca(MI, HII, SP, MaxCF);
+    MI->eraseFromParent();
   }
 
   //
-  // Only insert ALLOCFRAME if we need to.
+  // Only insert ALLOCFRAME if we need to or at -O0 for the debugger.  Think
+  // that this shouldn't be required, but doing so now because gcc does and
+  // gdb can't break at the start of the function without it.  Will remove if
+  // this turns out to be a gdb bug.
   //
-  if (hasFP(MF)) {
-    // Check for overflow.
-    // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
-    const int ALLOCFRAME_MAX = 16384;
-    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-
-    if (NumBytes >= ALLOCFRAME_MAX) {
-      // Emit allocframe(#0).
-      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::S2_allocframe)).addImm(0);
-
-      // Subtract offset from frame pointer.
-      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real),
-                                      HEXAGON_RESERVED_REG_1).addImm(NumBytes);
-      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::A2_sub),
-                                      QRI->getStackRegister()).
-                                      addReg(QRI->getStackRegister()).
-                                      addReg(HEXAGON_RESERVED_REG_1);
-    } else {
-      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::S2_allocframe)).addImm(NumBytes);
-    }
+  bool NoOpt = (HTM.getOptLevel() == CodeGenOpt::None);
+  if (!NoOpt && !FuncInfo->hasClobberLR() && !hasFP(MF))
+    return;
+
+  // Check for overflow.
+  // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
+  const unsigned int ALLOCFRAME_MAX = 16384;
+
+  // Create a dummy memory operand to avoid allocframe from being treated as
+  // a volatile memory reference.
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
+                            4, 4);
+
+  if (NumBytes >= ALLOCFRAME_MAX) {
+    // Emit allocframe(#0).
+    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+      .addImm(0)
+      .addMemOperand(MMO);
+
+    // Subtract offset from frame pointer.
+    // We use a caller-saved non-parameter register for that.
+    unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg();
+    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32_Int_Real),
+            CallerSavedReg).addImm(NumBytes);
+    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP)
+      .addReg(SP)
+      .addReg(CallerSavedReg);
+  } else {
+    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+      .addImm(NumBytes)
+      .addMemOperand(MMO);
   }
-}
-// Returns true if MBB has a machine instructions that indicates a tail call
-// in the block.
-bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  unsigned RetOpcode = MBBI->getOpcode();
 
-  return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;
+  if (AlignStack) {
+    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
+        .addReg(SP)
+        .addImm(-int64_t(MaxAlign));
+  }
+
+  if (needsFrameMoves) {
+    std::vector<MCCFIInstruction> Instructions = MMI.getFrameInstructions();
+    MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
+
+    // Advance CFA. DW_CFA_def_cfa
+    unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
+    unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
+
+    // CFA = FP + 8
+    unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
+                                               FrameLabel, DwFPReg, -8));
+    BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
+           .addCFIIndex(CFIIndex);
+
+    // R31 (return addr) = CFA - #4
+    CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+                                               FrameLabel, DwRAReg, -4));
+    BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
+           .addCFIIndex(CFIIndex);
+
+    // R30 (frame ptr) = CFA - #8)
+    CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+                                               FrameLabel, DwFPReg, -8));
+    BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
+           .addCFIIndex(CFIIndex);
+
+    unsigned int regsToMove[] = {
+      Hexagon::R1,  Hexagon::R0,  Hexagon::R3,  Hexagon::R2,
+      Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
+      Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
+      Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
+      Hexagon::D0,  Hexagon::D1,  Hexagon::D8,  Hexagon::D9,  Hexagon::D10,
+      Hexagon::D11, Hexagon::D12, Hexagon::D13, Hexagon::NoRegister
+    };
+
+    const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+    for (unsigned i = 0; regsToMove[i] != Hexagon::NoRegister; ++i) {
+      for (unsigned I = 0, E = CSI.size(); I < E; ++I) {
+        if (CSI[I].getReg() == regsToMove[i]) {
+          // Subtract 8 to make room for R30 and R31, which are added above.
+          int64_t Offset = getFrameIndexOffset(MF, CSI[I].getFrameIdx()) - 8;
+
+          if (regsToMove[i] < Hexagon::D0 || regsToMove[i] > Hexagon::D15) {
+            unsigned DwarfReg = HRI.getDwarfRegNum(regsToMove[i], true);
+            unsigned CFIIndex = MMI.addFrameInst(
+                                    MCCFIInstruction::createOffset(FrameLabel,
+                                                        DwarfReg, Offset));
+            BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
+                   .addCFIIndex(CFIIndex);
+          } else {
+            // Split the double regs into subregs, and generate appropriate
+            // cfi_offsets.
+            // The only reason, we are split double regs is, llvm-mc does not
+            // understand paired registers for cfi_offset.
+            // Eg .cfi_offset r1:0, -64
+            unsigned HiReg = getMax32BitSubRegister(regsToMove[i], HRI);
+            unsigned LoReg = getMax32BitSubRegister(regsToMove[i], HRI, false);
+            unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
+            unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
+            unsigned HiCFIIndex = MMI.addFrameInst(
+                                    MCCFIInstruction::createOffset(FrameLabel,
+                                                        HiDwarfReg, Offset+4));
+            BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
+                   .addCFIIndex(HiCFIIndex);
+            unsigned LoCFIIndex = MMI.addFrameInst(
+                                    MCCFIInstruction::createOffset(FrameLabel,
+                                                        LoDwarfReg, Offset));
+            BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
+                   .addCFIIndex(LoCFIIndex);
+          }
+          break;
+        }
+      } // for CSI.size()
+    } // for regsToMove
+  } // needsFrameMoves
 }
 
-void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
-                                     MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = std::prev(MBB.end());
-  DebugLoc dl = MBBI->getDebugLoc();
+void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
+  MachineFunction &MF = *MBB.getParent();
   //
   // Only insert deallocframe if we need to.  Also at -O0.  See comment
-  // in emitPrologue above.
+  // in insertPrologueInBlock above.
   //
-  if (hasFP(MF) || MF.getTarget().getOptLevel() == CodeGenOpt::None) {
-    MachineBasicBlock::iterator MBBI = std::prev(MBB.end());
-    MachineBasicBlock::iterator MBBI_end = MBB.end();
-
-    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-    // Handle EH_RETURN.
-    if (MBBI->getOpcode() == Hexagon::EH_RETURN_JMPR) {
-      assert(MBBI->getOperand(0).isReg() && "Offset should be in register!");
-      BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe));
-      BuildMI(MBB, MBBI, dl, TII.get(Hexagon::A2_add),
-              Hexagon::R29).addReg(Hexagon::R29).addReg(Hexagon::R28);
-      return;
-    }
-    // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
-    // versions.
-    if (MF.getTarget().getSubtarget<HexagonSubtarget>().hasV4TOps() &&
-        MBBI->getOpcode() == Hexagon::JMPret && !DisableDeallocRet) {
-      // Check for RESTORE_DEALLOC_RET_JMP_V4 call. Don't emit an extra DEALLOC
-      // instruction if we encounter it.
-      MachineBasicBlock::iterator BeforeJMPR =
-        MBB.begin() == MBBI ? MBBI : std::prev(MBBI);
-      if (BeforeJMPR != MBBI &&
-          BeforeJMPR->getOpcode() == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) {
-        // Remove the JMPR node.
-        MBB.erase(MBBI);
-        return;
-      }
+  if (!hasFP(MF) && MF.getTarget().getOptLevel() != CodeGenOpt::None)
+    return;
 
-      // Add dealloc_return.
-      MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::L4_return));
-      // Transfer the function live-out registers.
-      MIB->copyImplicitOps(*MBB.getParent(), &*MBBI);
-      // Remove the JUMPR node.
-      MBB.erase(MBBI);
-    } else { // Add deallocframe for V2 and V3, and V4 tail calls.
-      // Check for RESTORE_DEALLOC_BEFORE_TAILCALL_V4. We don't need an extra
-      // DEALLOCFRAME instruction after it.
-      MachineBasicBlock::iterator Term = MBB.getFirstTerminator();
-      MachineBasicBlock::iterator I =
-        Term == MBB.begin() ?  MBB.end() : std::prev(Term);
-      if (I != MBB.end() &&
-          I->getOpcode() == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4)
-        return;
+  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HII = *HST.getInstrInfo();
+  auto &HRI = *HST.getRegisterInfo();
+  unsigned SP = HRI.getStackRegister();
+
+  MachineInstr *RetI = nullptr;
+  for (auto &I : MBB) {
+    if (!I.isReturn())
+      continue;
+    RetI = &I;
+    break;
+  }
+  unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
 
-      BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe));
+  MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
+  DebugLoc DL;
+  if (InsertPt != MBB.end())
+    DL = InsertPt->getDebugLoc();
+  else if (!MBB.empty())
+    DL = std::prev(MBB.end())->getDebugLoc();
+
+  // Handle EH_RETURN.
+  if (RetOpc == Hexagon::EH_RETURN_JMPR) {
+    BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
+    BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP)
+        .addReg(SP)
+        .addReg(Hexagon::R28);
+    return;
+  }
+
+  // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-
+  // frame instruction if we encounter it.
+  if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) {
+    MachineBasicBlock::iterator It = RetI;
+    ++It;
+    // Delete all instructions after the RESTORE (except labels).
+    while (It != MBB.end()) {
+      if (!It->isLabel())
+        It = MBB.erase(It);
+      else
+        ++It;
     }
+    return;
+  }
+
+  // It is possible that the restoring code is a call to a library function.
+  // All of the restore* functions include "deallocframe", so we need to make
+  // sure that we don't add an extra one.
+  bool NeedsDeallocframe = true;
+  if (!MBB.empty() && InsertPt != MBB.begin()) {
+    MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);
+    unsigned COpc = PrevIt->getOpcode();
+    if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4)
+      NeedsDeallocframe = false;
+  }
+
+  if (!NeedsDeallocframe)
+    return;
+  // If the returning instruction is JMPret, replace it with dealloc_return,
+  // otherwise just add deallocframe. The function could be returning via a
+  // tail call.
+  if (RetOpc != Hexagon::JMPret || DisableDeallocRet) {
+    BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
+    return;
   }
+  unsigned NewOpc = Hexagon::L4_return;
+  MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc));
+  // Transfer the function live-out registers.
+  NewI->copyImplicitOps(MF, RetI);
+  MBB.erase(RetI);
 }
 
+
 bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const HexagonMachineFunctionInfo *FuncInfo =
     MF.getInfo<HexagonMachineFunctionInfo>();
-  return (MFI->hasCalls() || (MFI->getStackSize() > 0) ||
-          FuncInfo->hasClobberLR() );
+  return MFI->hasCalls() || MFI->getStackSize() > 0 ||
+         FuncInfo->hasClobberLR();
 }
 
-static inline
-unsigned uniqueSuperReg(unsigned Reg, const TargetRegisterInfo *TRI) {
-  MCSuperRegIterator SRI(Reg, TRI);
-  assert(SRI.isValid() && "Expected a superreg");
-  unsigned SuperReg = *SRI;
-  ++SRI;
-  assert(!SRI.isValid() && "Expected exactly one superreg");
-  return SuperReg;
-}
 
-bool
-HexagonFrameLowering::spillCalleeSavedRegisters(
-                                        MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                        const TargetRegisterInfo *TRI) const {
-  MachineFunction *MF = MBB.getParent();
-  const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
+enum SpillKind {
+  SK_ToMem,
+  SK_FromMem,
+  SK_FromMemTailcall
+};
 
-  if (CSI.empty()) {
-    return false;
+static const char *
+getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType) {
+  const char * V4SpillToMemoryFunctions[] = {
+    "__save_r16_through_r17",
+    "__save_r16_through_r19",
+    "__save_r16_through_r21",
+    "__save_r16_through_r23",
+    "__save_r16_through_r25",
+    "__save_r16_through_r27" };
+
+  const char * V4SpillFromMemoryFunctions[] = {
+    "__restore_r16_through_r17_and_deallocframe",
+    "__restore_r16_through_r19_and_deallocframe",
+    "__restore_r16_through_r21_and_deallocframe",
+    "__restore_r16_through_r23_and_deallocframe",
+    "__restore_r16_through_r25_and_deallocframe",
+    "__restore_r16_through_r27_and_deallocframe" };
+
+  const char * V4SpillFromMemoryTailcallFunctions[] = {
+    "__restore_r16_through_r17_and_deallocframe_before_tailcall",
+    "__restore_r16_through_r19_and_deallocframe_before_tailcall",
+    "__restore_r16_through_r21_and_deallocframe_before_tailcall",
+    "__restore_r16_through_r23_and_deallocframe_before_tailcall",
+    "__restore_r16_through_r25_and_deallocframe_before_tailcall",
+    "__restore_r16_through_r27_and_deallocframe_before_tailcall"
+  };
+
+  const char **SpillFunc = nullptr;
+
+  switch(SpillType) {
+  case SK_ToMem:
+    SpillFunc = V4SpillToMemoryFunctions;
+    break;
+  case SK_FromMem:
+    SpillFunc = V4SpillFromMemoryFunctions;
+    break;
+  case SK_FromMemTailcall:
+    SpillFunc = V4SpillFromMemoryTailcallFunctions;
+    break;
+  }
+  assert(SpillFunc && "Unknown spill kind");
+
+  // Spill all callee-saved registers up to the highest register used.
+  switch (MaxReg) {
+  case Hexagon::R17:
+    return SpillFunc[0];
+  case Hexagon::R19:
+    return SpillFunc[1];
+  case Hexagon::R21:
+    return SpillFunc[2];
+  case Hexagon::R23:
+    return SpillFunc[3];
+  case Hexagon::R25:
+    return SpillFunc[4];
+  case Hexagon::R27:
+    return SpillFunc[5];
+  default:
+    llvm_unreachable("Unhandled maximum callee save register");
   }
+  return 0;
+}
 
-  // We can only schedule double loads if we spill contiguous callee-saved regs
-  // For instance, we cannot scheduled double-word loads if we spill r24,
-  // r26, and r27.
-  // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
-  // above.
-  bool ContiguousRegs = true;
+/// Adds all callee-saved registers up to MaxReg to the instruction.
+static void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst,
+                                           unsigned MaxReg, bool IsDef) {
+  // Add the callee-saved registers as implicit uses.
+  for (unsigned R = Hexagon::R16; R <= MaxReg; ++R) {
+    MachineOperand ImpUse = MachineOperand::CreateReg(R, IsDef, true);
+    Inst->addOperand(ImpUse);
+  }
+}
 
-  for (unsigned i = 0; i < CSI.size(); ++i) {
+
+int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+      int FI) const {
+  return MF.getFrameInfo()->getObjectOffset(FI);
+}
+
+
+bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
+      const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
+  if (CSI.empty())
+    return true;
+
+  MachineBasicBlock::iterator MI = MBB.begin();
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+
+  if (useSpillFunction(MF, CSI)) {
+    unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI);
+    const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem);
+    // Call spill function.
+    DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
+    MachineInstr *SaveRegsCall =
+        BuildMI(MBB, MI, DL, TII.get(Hexagon::SAVE_REGISTERS_CALL_V4))
+          .addExternalSymbol(SpillFun);
+    // Add callee-saved registers as use.
+    addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false);
+    // Add live in registers.
+    for (unsigned I = 0; I < CSI.size(); ++I)
+      MBB.addLiveIn(CSI[I].getReg());
+    return true;
+  }
+
+  for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
     unsigned Reg = CSI[i].getReg();
+    // Add live in registers. We treat eh_return callee saved register r0 - r3
+    // specially. They are not really callee saved registers as they are not
+    // supposed to be killed.
+    bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
+    int FI = CSI[i].getFrameIdx();
+    const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
+    TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
+    if (IsKill)
+      MBB.addLiveIn(Reg);
+  }
+  return true;
+}
 
-    //
-    // Check if we can use a double-word store.
-    //
-    unsigned SuperReg = uniqueSuperReg(Reg, TRI);
-    bool CanUseDblStore = false;
-    const TargetRegisterClass* SuperRegClass = nullptr;
-
-    if (ContiguousRegs && (i < CSI.size()-1)) {
-      unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI);
-      SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg);
-      CanUseDblStore = (SuperRegNext == SuperReg);
-    }
 
+bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
+      const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
 
-    if (CanUseDblStore) {
-      TII.storeRegToStackSlot(MBB, MI, SuperReg, true,
-                              CSI[i+1].getFrameIdx(), SuperRegClass, TRI);
-      MBB.addLiveIn(SuperReg);
-      ++i;
+  if (useRestoreFunction(MF, CSI)) {
+    bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
+    unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI);
+    SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem;
+    const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);
+
+    // Call spill function.
+    DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
+                                  : MBB.getLastNonDebugInstr()->getDebugLoc();
+    MachineInstr *DeallocCall = nullptr;
+
+    if (HasTC) {
+      unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
+      DeallocCall = BuildMI(MBB, MI, DL, TII.get(ROpc))
+          .addExternalSymbol(RestoreFn);
     } else {
-      // Cannot use a double-word store.
-      ContiguousRegs = false;
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC,
-                              TRI);
-      MBB.addLiveIn(Reg);
+      // The block has a return.
+      MachineBasicBlock::iterator It = MBB.getFirstTerminator();
+      assert(It->isReturn() && std::next(It) == MBB.end());
+      unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
+      DeallocCall = BuildMI(MBB, It, DL, TII.get(ROpc))
+          .addExternalSymbol(RestoreFn);
+      // Transfer the function live-out registers.
+      DeallocCall->copyImplicitOps(MF, It);
     }
+    addCalleeSaveRegistersAsImpOperand(DeallocCall, MaxR, true);
+    return true;
+  }
+
+  for (unsigned i = 0; i < CSI.size(); ++i) {
+    unsigned Reg = CSI[i].getReg();
+    const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
+    int FI = CSI[i].getFrameIdx();
+    TII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
   }
   return true;
 }
 
 
-bool HexagonFrameLowering::restoreCalleeSavedRegisters(
-                                        MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                        const TargetRegisterInfo *TRI) const {
+void HexagonFrameLowering::eliminateCallFramePseudoInstr(MachineFunction &MF,
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
+  MachineInstr &MI = *I;
+  unsigned Opc = MI.getOpcode();
+  (void)Opc; // Silence compiler warning.
+  assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) &&
+         "Cannot handle this call frame pseudo instruction");
+  MBB.erase(I);
+}
+
+
+void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
+    MachineFunction &MF, RegScavenger *RS) const {
+  // If this function has uses aligned stack and also has variable sized stack
+  // objects, then we need to map all spill slots to fixed positions, so that
+  // they can be accessed through FP. Otherwise they would have to be accessed
+  // via AP, which may not be available at the particular place in the program.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool HasAlloca = MFI->hasVarSizedObjects();
+  bool HasAligna = (MFI->getMaxAlignment() > getStackAlignment());
+
+  if (!HasAlloca || !HasAligna)
+    return;
+
+  unsigned LFS = MFI->getLocalFrameSize();
+  int Offset = -LFS;
+  for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (!MFI->isSpillSlotObjectIndex(i) || MFI->isDeadObjectIndex(i))
+      continue;
+    int S = MFI->getObjectSize(i);
+    LFS += S;
+    Offset -= S;
+    MFI->mapLocalFrameObject(i, Offset);
+  }
+
+  MFI->setLocalFrameSize(LFS);
+  unsigned A = MFI->getLocalFrameMaxAlign();
+  assert(A <= 8 && "Unexpected local frame alignment");
+  if (A == 0)
+    MFI->setLocalFrameMaxAlign(8);
+  MFI->setUseLocalStackAllocationBlock(true);
+}
 
-  MachineFunction *MF = MBB.getParent();
-  const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
+/// Returns true if there is no caller saved registers available.
+static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
+                                              const HexagonRegisterInfo &HRI) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const MCPhysReg *CallerSavedRegs = HRI.getCallerSavedRegs(&MF);
+  // Check for an unused caller-saved register.
+  for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
+    MCPhysReg FreeReg = *CallerSavedRegs;
+    if (MRI.isPhysRegUsed(FreeReg))
+      continue;
 
-  if (CSI.empty()) {
+    // Check aliased register usage.
+    bool IsCurrentRegUsed = false;
+    for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
+      if (MRI.isPhysRegUsed(*AI)) {
+        IsCurrentRegUsed = true;
+        break;
+      }
+    if (IsCurrentRegUsed)
+      continue;
+
+    // Neither directly used nor used through an aliased register.
     return false;
   }
+  // All caller-saved registers are used.
+  return true;
+}
 
-  // We can only schedule double loads if we spill contiguous callee-saved regs
-  // For instance, we cannot scheduled double-word loads if we spill r24,
-  // r26, and r27.
-  // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
-  // above.
-  bool ContiguousRegs = true;
 
-  for (unsigned i = 0; i < CSI.size(); ++i) {
-    unsigned Reg = CSI[i].getReg();
+/// Replaces the predicate spill code pseudo instructions by valid instructions.
+bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF)
+      const {
+  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HII = *HST.getInstrInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  bool HasReplacedPseudoInst = false;
+  // Replace predicate spill pseudo instructions by real code.
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock* MBB = MBBb;
+    // Traverse the basic block.
+    MachineBasicBlock::iterator NextII;
+    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+         MII = NextII) {
+      MachineInstr *MI = MII;
+      NextII = std::next(MII);
+      int Opc = MI->getOpcode();
+      if (Opc == Hexagon::STriw_pred) {
+        HasReplacedPseudoInst = true;
+        // STriw_pred FI, 0, SrcReg;
+        unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+        unsigned SrcReg = MI->getOperand(2).getReg();
+        bool IsOrigSrcRegKilled = MI->getOperand(2).isKill();
+
+        assert(MI->getOperand(0).isFI() && "Expect a frame index");
+        assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
+               "Not a predicate register");
+
+        // Insert transfer to general purpose register.
+        //   VirtReg = C2_tfrpr SrcPredReg
+        BuildMI(*MBB, MII, MI->getDebugLoc(), HII.get(Hexagon::C2_tfrpr),
+                VirtReg).addReg(SrcReg, getKillRegState(IsOrigSrcRegKilled));
+
+        // Change instruction to S2_storeri_io.
+        //   S2_storeri_io FI, 0, VirtReg
+        MI->setDesc(HII.get(Hexagon::S2_storeri_io));
+        MI->getOperand(2).setReg(VirtReg);
+        MI->getOperand(2).setIsKill();
 
-    //
-    // Check if we can use a double-word load.
-    //
-    unsigned SuperReg = uniqueSuperReg(Reg, TRI);
-    const TargetRegisterClass* SuperRegClass = nullptr;
-    bool CanUseDblLoad = false;
-    if (ContiguousRegs && (i < CSI.size()-1)) {
-      unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI);
-      SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg);
-      CanUseDblLoad = (SuperRegNext == SuperReg);
+      } else if (Opc == Hexagon::LDriw_pred) {
+        // DstReg = LDriw_pred FI, 0
+        MachineOperand &M0 = MI->getOperand(0);
+        if (M0.isDead()) {
+          MBB->erase(MII);
+          continue;
+        }
+
+        unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+        unsigned DestReg = MI->getOperand(0).getReg();
+
+        assert(MI->getOperand(1).isFI() && "Expect a frame index");
+        assert(Hexagon::PredRegsRegClass.contains(DestReg) &&
+               "Not a predicate register");
+
+        // Change instruction to L2_loadri_io.
+        //   VirtReg = L2_loadri_io FI, 0
+        MI->setDesc(HII.get(Hexagon::L2_loadri_io));
+        MI->getOperand(0).setReg(VirtReg);
+
+        // Insert transfer to general purpose register.
+        //   DestReg = C2_tfrrp VirtReg
+        const MCInstrDesc &D = HII.get(Hexagon::C2_tfrrp);
+        BuildMI(*MBB, std::next(MII), MI->getDebugLoc(), D, DestReg)
+          .addReg(VirtReg, getKillRegState(true));
+        HasReplacedPseudoInst = true;
+      }
     }
+  }
+  return HasReplacedPseudoInst;
+}
 
 
-    if (CanUseDblLoad) {
-      TII.loadRegFromStackSlot(MBB, MI, SuperReg, CSI[i+1].getFrameIdx(),
-                               SuperRegClass, TRI);
-      MBB.addLiveIn(SuperReg);
-      ++i;
-    } else {
-      // Cannot use a double-word load.
-      ContiguousRegs = false;
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
-      MBB.addLiveIn(Reg);
+void HexagonFrameLowering::processFunctionBeforeCalleeSavedScan(
+      MachineFunction &MF, RegScavenger* RS) const {
+  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HRI = *HST.getRegisterInfo();
+
+  bool HasEHReturn = MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn();
+
+  // If we have a function containing __builtin_eh_return we want to spill and
+  // restore all callee saved registers. Pretend that they are used.
+  if (HasEHReturn) {
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+    for (const MCPhysReg *CSRegs = HRI.getCalleeSavedRegs(&MF); *CSRegs;
+         ++CSRegs)
+      if (!MRI.isPhysRegUsed(*CSRegs))
+        MRI.setPhysRegUsed(*CSRegs);
+  }
+
+  const TargetRegisterClass &RC = Hexagon::IntRegsRegClass;
+
+  // Replace predicate register pseudo spill code.
+  bool HasReplacedPseudoInst = replacePredRegPseudoSpillCode(MF);
+
+  // We need to reserve a a spill slot if scavenging could potentially require
+  // spilling a scavenged register.
+  if (HasReplacedPseudoInst && needToReserveScavengingSpillSlots(MF, HRI)) {
+    MachineFrameInfo *MFI = MF.getFrameInfo();
+    for (int i=0; i < NumberScavengerSlots; i++)
+      RS->addScavengingFrameIndex(
+        MFI->CreateSpillStackObject(RC.getSize(), RC.getAlignment()));
+  }
+}
+
+
+#ifndef NDEBUG
+static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
+  dbgs() << '{';
+  for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) {
+    unsigned R = x;
+    dbgs() << ' ' << PrintReg(R, &TRI);
+  }
+  dbgs() << " }";
+}
+#endif
+
+
+bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
+      const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
+  DEBUG(dbgs() << LLVM_FUNCTION_NAME << " on "
+               << MF.getFunction()->getName() << '\n');
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  BitVector SRegs(Hexagon::NUM_TARGET_REGS);
+
+  // Generate a set of unique, callee-saved registers (SRegs), where each
+  // register in the set is maximal in terms of sub-/super-register relation,
+  // i.e. for each R in SRegs, no proper super-register of R is also in SRegs.
+
+  // (1) For each callee-saved register, add that register and all of its
+  // sub-registers to SRegs.
+  DEBUG(dbgs() << "Initial CS registers: {");
+  for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+    unsigned R = CSI[i].getReg();
+    DEBUG(dbgs() << ' ' << PrintReg(R, TRI));
+    for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
+      SRegs[*SR] = true;
+  }
+  DEBUG(dbgs() << " }\n");
+  DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+  // (2) For each reserved register, remove that register and all of its
+  // sub- and super-registers from SRegs.
+  BitVector Reserved = TRI->getReservedRegs(MF);
+  for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) {
+    unsigned R = x;
+    for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
+      SRegs[*SR] = false;
+  }
+  DEBUG(dbgs() << "Res:     "; dump_registers(Reserved, *TRI); dbgs() << "\n");
+  DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+  // (3) Collect all registers that have at least one sub-register in SRegs,
+  // and also have no sub-registers that are reserved. These will be the can-
+  // didates for saving as a whole instead of their individual sub-registers.
+  // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.)
+  BitVector TmpSup(Hexagon::NUM_TARGET_REGS);
+  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+    unsigned R = x;
+    for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR)
+      TmpSup[*SR] = true;
+  }
+  for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) {
+    unsigned R = x;
+    for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) {
+      if (!Reserved[*SR])
+        continue;
+      TmpSup[R] = false;
+      break;
+    }
+  }
+  DEBUG(dbgs() << "TmpSup:  "; dump_registers(TmpSup, *TRI); dbgs() << "\n");
+
+  // (4) Include all super-registers found in (3) into SRegs.
+  SRegs |= TmpSup;
+  DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+  // (5) For each register R in SRegs, if any super-register of R is in SRegs,
+  // remove R from SRegs.
+  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+    unsigned R = x;
+    for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) {
+      if (!SRegs[*SR])
+        continue;
+      SRegs[R] = false;
+      break;
+    }
+  }
+  DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+  // Now, for each register that has a fixed stack slot, create the stack
+  // object for it.
+  CSI.clear();
+
+  typedef TargetFrameLowering::SpillSlot SpillSlot;
+  unsigned NumFixed;
+  int MinOffset = 0;  // CS offsets are negative.
+  const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);
+  for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) {
+    if (!SRegs[S->Reg])
+      continue;
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
+    int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), S->Offset);
+    MinOffset = std::min(MinOffset, S->Offset);
+    CSI.push_back(CalleeSavedInfo(S->Reg, FI));
+    SRegs[S->Reg] = false;
+  }
+
+  // There can be some registers that don't have fixed slots. For example,
+  // we need to store R0-R3 in functions with exception handling. For each
+  // such register, create a non-fixed stack object.
+  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+    unsigned R = x;
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
+    int Off = MinOffset - RC->getSize();
+    unsigned Align = std::min(RC->getAlignment(), getStackAlignment());
+    assert(isPowerOf2_32(Align));
+    Off &= -Align;
+    int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), Off);
+    MinOffset = std::min(MinOffset, Off);
+    CSI.push_back(CalleeSavedInfo(R, FI));
+    SRegs[R] = false;
+  }
+
+  DEBUG({
+    dbgs() << "CS information: {";
+    for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+      int FI = CSI[i].getFrameIdx();
+      int Off = MFI->getObjectOffset(FI);
+      dbgs() << ' ' << PrintReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp";
+      if (Off >= 0)
+        dbgs() << '+';
+      dbgs() << Off;
     }
+    dbgs() << " }\n";
+  });
+
+#ifndef NDEBUG
+  // Verify that all registers were handled.
+  bool MissedReg = false;
+  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+    unsigned R = x;
+    dbgs() << PrintReg(R, TRI) << ' ';
+    MissedReg = true;
   }
+  if (MissedReg)
+    llvm_unreachable("...there are unhandled callee-saved registers!");
+#endif
+
   return true;
 }
 
-void HexagonFrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  MachineInstr &MI = *I;
 
-  if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
-    // Hexagon_TODO: add code
-  } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
-    // Hexagon_TODO: add code
-  } else {
-    llvm_unreachable("Cannot handle this call frame pseudo instruction");
+void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
+      const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
+  MachineBasicBlock &MB = *AI->getParent();
+  DebugLoc DL = AI->getDebugLoc();
+  unsigned A = AI->getOperand(2).getImm();
+
+  // Have
+  //    Rd  = alloca Rs, #A
+  //
+  // If Rs and Rd are different registers, use this sequence:
+  //    Rd  = sub(r29, Rs)
+  //    r29 = sub(r29, Rs)
+  //    Rd  = and(Rd, #-A)    ; if necessary
+  //    r29 = and(r29, #-A)   ; if necessary
+  //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
+  // otherwise, do
+  //    Rd  = sub(r29, Rs)
+  //    Rd  = and(Rd, #-A)    ; if necessary
+  //    r29 = Rd
+  //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
+
+  MachineOperand &RdOp = AI->getOperand(0);
+  MachineOperand &RsOp = AI->getOperand(1);
+  unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg();
+
+  // Rd = sub(r29, Rs)
+  BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd)
+      .addReg(SP)
+      .addReg(Rs);
+  if (Rs != Rd) {
+    // r29 = sub(r29, Rs)
+    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP)
+        .addReg(SP)
+        .addReg(Rs);
+  }
+  if (A > 8) {
+    // Rd  = and(Rd, #-A)
+    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)
+        .addReg(Rd)
+        .addImm(-int64_t(A));
+    if (Rs != Rd)
+      BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP)
+          .addReg(SP)
+          .addImm(-int64_t(A));
+  }
+  if (Rs == Rd) {
+    // r29 = Rd
+    BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP)
+        .addReg(Rd);
+  }
+  if (CF > 0) {
+    // Rd = add(Rd, #CF)
+    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd)
+        .addReg(Rd)
+        .addImm(CF);
   }
-  MBB.erase(I);
 }
 
-int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-                                              int FI) const {
-  return MF.getFrameInfo()->getObjectOffset(FI);
+
+bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  if (!MFI->hasVarSizedObjects())
+    return false;
+  unsigned MaxA = MFI->getMaxAlignment();
+  if (MaxA <= getStackAlignment())
+    return false;
+  return true;
+}
+
+
+MachineInstr *HexagonFrameLowering::getAlignaInstr(MachineFunction &MF) const {
+  for (auto &B : MF)
+    for (auto &I : B)
+      if (I.getOpcode() == Hexagon::ALIGNA)
+        return &I;
+  return nullptr;
+}
+
+
+inline static bool isOptSize(const MachineFunction &MF) {
+  AttributeSet AF = MF.getFunction()->getAttributes();
+  return AF.hasAttribute(AttributeSet::FunctionIndex,
+                         Attribute::OptimizeForSize);
+}
+
+inline static bool isMinSize(const MachineFunction &MF) {
+  AttributeSet AF = MF.getFunction()->getAttributes();
+  return AF.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+}
+
+
+/// Determine whether the callee-saved register saves and restores should
+/// be generated via inline code. If this function returns "true", inline
+/// code will be generated. If this function returns "false", additional
+/// checks are performed, which may still lead to the inline code.
+bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
+      const CSIVect &CSI) const {
+  if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
+    return true;
+  if (!isOptSize(MF) && !isMinSize(MF))
+    if (MF.getTarget().getOptLevel() > CodeGenOpt::Default)
+      return true;
+
+  // Check if CSI only has double registers, and if the registers form
+  // a contiguous block starting from D8.
+  BitVector Regs(Hexagon::NUM_TARGET_REGS);
+  for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+    unsigned R = CSI[i].getReg();
+    if (!Hexagon::DoubleRegsRegClass.contains(R))
+      return true;
+    Regs[R] = true;
+  }
+  int F = Regs.find_first();
+  if (F != Hexagon::D8)
+    return true;
+  while (F >= 0) {
+    int N = Regs.find_next(F);
+    if (N >= 0 && N != F+1)
+      return true;
+    F = N;
+  }
+
+  return false;
+}
+
+
+bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
+      const CSIVect &CSI) const {
+  if (shouldInlineCSR(MF, CSI))
+    return false;
+  unsigned NumCSI = CSI.size();
+  if (NumCSI <= 1)
+    return false;
+
+  unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs
+                                     : SpillFuncThreshold;
+  return Threshold < NumCSI;
+}
+
+
+bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
+      const CSIVect &CSI) const {
+  if (shouldInlineCSR(MF, CSI))
+    return false;
+  unsigned NumCSI = CSI.size();
+  unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1
+                                     : SpillFuncThreshold;
+  return Threshold < NumCSI;
 }
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index 2d6b457..89500cb 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -15,35 +15,88 @@
 
 namespace llvm {
 
-class HexagonFrameLowering : public TargetFrameLowering {
-private:
-  void determineFrameLayout(MachineFunction &MF) const;
+class HexagonInstrInfo;
+class HexagonRegisterInfo;
 
+class HexagonFrameLowering : public TargetFrameLowering {
 public:
-  explicit HexagonFrameLowering() : TargetFrameLowering(StackGrowsDown, 8, 0) {}
+  explicit HexagonFrameLowering()
+      : TargetFrameLowering(StackGrowsDown, 8, 0, 1, true) {}
 
-  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
-  /// the function.
-  void emitPrologue(MachineFunction &MF) const override;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+  // All of the prolog/epilog functionality, including saving and restoring
+  // callee-saved registers is handled in emitPrologue. This is to have the
+  // logic for shrink-wrapping in one place.
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
+      override;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+      override {}
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI,
-                                 const TargetRegisterInfo *TRI) const override;
-
-  void
-  eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator I) const override;
-
-  bool
-  restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator MI,
-                              const std::vector<CalleeSavedInfo> &CSI,
-                              const TargetRegisterInfo *TRI) const override;
+      MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
+      const TargetRegisterInfo *TRI) const override {
+    return true;
+  }
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
+      const TargetRegisterInfo *TRI) const override {
+    return true;
+  }
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override;
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+        RegScavenger *RS = nullptr) const override;
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+        RegScavenger *RS) const override;
+
+  bool targetHandlesStackFrameRounding() const override {
+    return true;
+  }
   int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
   bool hasFP(const MachineFunction &MF) const override;
-  bool hasTailCall(MachineBasicBlock &MBB) const;
+
+  const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries)
+        const override {
+    static const SpillSlot Offsets[] = {
+      { Hexagon::R17, -4 }, { Hexagon::R16, -8 }, { Hexagon::D8, -8 },
+      { Hexagon::R19, -12 }, { Hexagon::R18, -16 }, { Hexagon::D9, -16 },
+      { Hexagon::R21, -20 }, { Hexagon::R20, -24 }, { Hexagon::D10, -24 },
+      { Hexagon::R23, -28 }, { Hexagon::R22, -32 }, { Hexagon::D11, -32 },
+      { Hexagon::R25, -36 }, { Hexagon::R24, -40 }, { Hexagon::D12, -40 },
+      { Hexagon::R27, -44 }, { Hexagon::R26, -48 }, { Hexagon::D13, -48 }
+    };
+    NumEntries = array_lengthof(Offsets);
+    return Offsets;
+  }
+
+  bool assignCalleeSavedSpillSlots(MachineFunction &MF,
+      const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI)
+      const override;
+
+  bool needsAligna(const MachineFunction &MF) const;
+  MachineInstr *getAlignaInstr(MachineFunction &MF) const;
+
+private:
+  typedef std::vector<CalleeSavedInfo> CSIVect;
+
+  void expandAlloca(MachineInstr *AI, const HexagonInstrInfo &TII,
+      unsigned SP, unsigned CF) const;
+  void insertPrologueInBlock(MachineBasicBlock &MBB) const;
+  void insertEpilogueInBlock(MachineBasicBlock &MBB) const;
+  bool insertCSRSpillsInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
+      const HexagonRegisterInfo &HRI) const;
+  bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
+      const HexagonRegisterInfo &HRI) const;
+
+  void adjustForCalleeSavedRegsSpillCall(MachineFunction &MF) const;
+  bool replacePredRegPseudoSpillCode(MachineFunction &MF) const;
+  bool replaceVecPredRegPseudoSpillCode(MachineFunction &MF) const;
+
+  void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB,
+      MachineBasicBlock *&EpilogB) const;
+
+  bool shouldInlineCSR(llvm::MachineFunction&, const CSIVect&) const;
+  bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const;
+  bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 637b0a0..db72899 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -21,14 +21,13 @@
 //  - Countable loops (w/ ind. var for a trip count)
 //  - Assumes loops are normalized by IndVarSimplify
 //  - Try inner-most loops first
-//  - No nested hardware loops.
 //  - No function calls in loops.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/SmallSet.h"
 #include "Hexagon.h"
-#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -49,9 +48,18 @@ using namespace llvm;
 #define DEBUG_TYPE "hwloops"
 
 #ifndef NDEBUG
-static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1));
+static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
+
+// Option to create preheader only for a specific function.
+static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
+                                 cl::init(""));
 #endif
 
+// Option to create a preheader if one doesn't exist.
+static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
+    cl::Hidden, cl::init(true),
+    cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
+
 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
 
 namespace llvm {
@@ -64,9 +72,7 @@ namespace {
     MachineLoopInfo            *MLI;
     MachineRegisterInfo        *MRI;
     MachineDominatorTree       *MDT;
-    const HexagonTargetMachine *TM;
     const HexagonInstrInfo     *TII;
-    const HexagonRegisterInfo  *TRI;
 #ifndef NDEBUG
     static int Counter;
 #endif
@@ -89,14 +95,16 @@ namespace {
     }
 
   private:
+    typedef std::map<unsigned, MachineInstr *> LoopFeederMap;
+
     /// Kinds of comparisons in the compare instructions.
     struct Comparison {
       enum Kind {
         EQ  = 0x01,
         NE  = 0x02,
-        L   = 0x04, // Less-than property.
-        G   = 0x08, // Greater-than property.
-        U   = 0x40, // Unsigned property.
+        L   = 0x04,
+        G   = 0x08,
+        U   = 0x40,
         LTs = L,
         LEs = L | EQ,
         GTs = G,
@@ -113,6 +121,23 @@ namespace {
           return (Kind)(Cmp ^ (L|G));
         return Cmp;
       }
+
+      static Kind getNegatedComparison(Kind Cmp) {
+        if ((Cmp & L) || (Cmp & G))
+          return (Kind)((Cmp ^ (L | G)) ^ EQ);
+        if ((Cmp & NE) || (Cmp & EQ))
+          return (Kind)(Cmp ^ (EQ | NE));
+        return (Kind)0;
+      }
+
+      static bool isSigned(Kind Cmp) {
+        return (Cmp & (L | G) && !(Cmp & U));
+      }
+
+      static bool isUnsigned(Kind Cmp) {
+        return (Cmp & U);
+      }
+
     };
 
     /// \brief Find the register that contains the loop controlling
@@ -130,6 +155,12 @@ namespace {
     bool findInductionRegister(MachineLoop *L, unsigned &Reg,
                                int64_t &IVBump, MachineInstr *&IVOp) const;
 
+    /// \brief Return the comparison kind for the specified opcode.
+    Comparison::Kind getComparisonKind(unsigned CondOpc,
+                                       MachineOperand *InitialValue,
+                                       const MachineOperand *Endvalue,
+                                       int64_t IVBump) const;
+
     /// \brief Analyze the statements in a loop to determine if the loop
     /// has a computable trip count and, if so, return a value that represents
     /// the trip count expression.
@@ -143,24 +174,22 @@ namespace {
     /// If the trip count is not directly available (as an immediate value,
     /// or a register), the function will attempt to insert computation of it
     /// to the loop's preheader.
-    CountValue *computeCount(MachineLoop *Loop,
-                             const MachineOperand *Start,
-                             const MachineOperand *End,
-                             unsigned IVReg,
-                             int64_t IVBump,
-                             Comparison::Kind Cmp) const;
+    CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
+                             const MachineOperand *End, unsigned IVReg,
+                             int64_t IVBump, Comparison::Kind Cmp) const;
 
     /// \brief Return true if the instruction is not valid within a hardware
     /// loop.
-    bool isInvalidLoopOperation(const MachineInstr *MI) const;
+    bool isInvalidLoopOperation(const MachineInstr *MI,
+                                bool IsInnerHWLoop) const;
 
     /// \brief Return true if the loop contains an instruction that inhibits
     /// using the hardware loop.
-    bool containsInvalidInstruction(MachineLoop *L) const;
+    bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
 
     /// \brief Given a loop, check if we can convert it to a hardware loop.
     /// If so, then perform the conversion and return true.
-    bool convertToHardwareLoop(MachineLoop *L);
+    bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
 
     /// \brief Return true if the instruction is now dead.
     bool isDead(const MachineInstr *MI,
@@ -175,14 +204,44 @@ namespace {
     /// defined.  If the instructions are out of order, try to reorder them.
     bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
 
-    /// \brief Get the instruction that loads an immediate value into \p R,
-    /// or 0 if such an instruction does not exist.
-    MachineInstr *defWithImmediate(unsigned R);
+    /// \brief Return true if MO and MI pair is visited only once. If visited
+    /// more than once, this indicates there is recursion. In such a case,
+    /// return false.
+    bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
+                      const MachineOperand *MO,
+                      LoopFeederMap &LoopFeederPhi) const;
+
+    /// \brief Return true if the Phi may generate a value that may underflow,
+    /// or may wrap.
+    bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
+                               MachineBasicBlock *MBB, MachineLoop *L,
+                               LoopFeederMap &LoopFeederPhi) const;
+
+    /// \brief Return true if the induction variable may underflow an unsigned
+    /// value in the first iteration.
+    bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
+                                     const MachineOperand *EndVal,
+                                     MachineBasicBlock *MBB, MachineLoop *L,
+                                     LoopFeederMap &LoopFeederPhi) const;
+
+    /// \brief Check if the given operand has a compile-time known constant
+    /// value. Return true if yes, and false otherwise. When returning true, set
+    /// Val to the corresponding constant value.
+    bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
+
+    /// \brief Check if the operand has a compile-time known constant value.
+    bool isImmediate(const MachineOperand &MO) const {
+      int64_t V;
+      return checkForImmediate(MO, V);
+    }
 
-    /// \brief Get the immediate value referenced to by \p MO, either for
-    /// immediate operands, or for register operands, where the register
-    /// was defined with an immediate value.
-    int64_t getImmediate(MachineOperand &MO);
+    /// \brief Return the immediate for the specified operand.
+    int64_t getImmediate(const MachineOperand &MO) const {
+      int64_t V;
+      if (!checkForImmediate(MO, V))
+        llvm_unreachable("Invalid operand");
+      return V;
+    }
 
     /// \brief Reset the given machine operand to now refer to a new immediate
     /// value.  Assumes that the operand was already referencing an immediate
@@ -265,9 +324,7 @@ namespace {
       return Contents.ImmVal;
     }
 
-    void print(raw_ostream &OS, const TargetMachine *TM = nullptr) const {
-      const TargetRegisterInfo *TRI =
-          TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr;
+    void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
       if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
       if (isImm()) { OS << Contents.ImmVal; }
     }
@@ -282,18 +339,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
                     "Hexagon Hardware Loops", false, false)
 
-
-/// \brief Returns true if the instruction is a hardware loop instruction.
-static bool isHardwareLoop(const MachineInstr *MI) {
-  return MI->getOpcode() == Hexagon::J2_loop0r ||
-    MI->getOpcode() == Hexagon::J2_loop0i;
-}
-
 FunctionPass *llvm::createHexagonHardwareLoops() {
   return new HexagonHardwareLoops();
 }
 
-
 bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
 
@@ -302,22 +351,30 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
   MLI = &getAnalysis<MachineLoopInfo>();
   MRI = &MF.getRegInfo();
   MDT = &getAnalysis<MachineDominatorTree>();
-  TM  = static_cast<const HexagonTargetMachine*>(&MF.getTarget());
-  TII = static_cast<const HexagonInstrInfo *>(
-      TM->getSubtargetImpl()->getInstrInfo());
-  TRI = static_cast<const HexagonRegisterInfo *>(
-      TM->getSubtargetImpl()->getRegisterInfo());
+  TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
 
-  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
-       I != E; ++I) {
-    MachineLoop *L = *I;
-    if (!L->getParentLoop())
-      Changed |= convertToHardwareLoop(L);
-  }
+  for (auto &L : *MLI)
+    if (!L->getParentLoop()) {
+      bool L0Used = false;
+      bool L1Used = false;
+      Changed |= convertToHardwareLoop(L, L0Used, L1Used);
+    }
 
   return Changed;
 }
 
+/// \brief Return the latch block if it's one of the exiting blocks. Otherwise,
+/// return the exiting block. Return 'null' when multiple exiting blocks are
+/// present.
+static MachineBasicBlock* getExitingBlock(MachineLoop *L) {
+  if (MachineBasicBlock *Latch = L->getLoopLatch()) {
+    if (L->isLoopExiting(Latch))
+      return Latch;
+    else
+      return L->getExitingBlock();
+  }
+  return nullptr;
+}
 
 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
                                                  unsigned &Reg,
@@ -327,7 +384,8 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
   MachineBasicBlock *Header = L->getHeader();
   MachineBasicBlock *Preheader = L->getLoopPreheader();
   MachineBasicBlock *Latch = L->getLoopLatch();
-  if (!Header || !Preheader || !Latch)
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  if (!Header || !Preheader || !Latch || !ExitingBlock)
     return false;
 
   // This pair represents an induction register together with an immediate
@@ -357,15 +415,16 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
       unsigned PhiOpReg = Phi->getOperand(i).getReg();
       MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
       unsigned UpdOpc = DI->getOpcode();
-      bool isAdd = (UpdOpc == Hexagon::ADD_ri);
+      bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp);
 
       if (isAdd) {
-        // If the register operand to the add is the PHI we're
-        // looking at, this meets the induction pattern.
+        // If the register operand to the add is the PHI we're looking at, this
+        // meets the induction pattern.
         unsigned IndReg = DI->getOperand(1).getReg();
-        if (MRI->getVRegDef(IndReg) == Phi) {
+        MachineOperand &Opnd2 = DI->getOperand(2);
+        int64_t V;
+        if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
           unsigned UpdReg = DI->getOperand(0).getReg();
-          int64_t V = DI->getOperand(2).getImm();
           IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
         }
       }
@@ -374,13 +433,13 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
 
   SmallVector<MachineOperand,2> Cond;
   MachineBasicBlock *TB = nullptr, *FB = nullptr;
-  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+  bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
   if (NotAnalyzed)
     return false;
 
-  unsigned CSz = Cond.size();
-  assert (CSz == 1 || CSz == 2);
-  unsigned PredR = Cond[CSz-1].getReg();
+  unsigned PredR, PredPos, PredRegFlags;
+  if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
+    return false;
 
   MachineInstr *PredI = MRI->getVRegDef(PredR);
   if (!PredI->isCompare())
@@ -392,7 +451,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
                                          CmpMask, CmpImm);
   // Fail if the compare was not analyzed, or it's not comparing a register
   // with an immediate value.  Not checking the mask here, since we handle
-  // the individual compare opcodes (including CMPb) later on.
+  // the individual compare opcodes (including A4_cmpb*) later on.
   if (!CmpAnalyzed)
     return false;
 
@@ -422,6 +481,44 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
   return true;
 }
 
+// Return the comparison kind for the specified opcode.
+HexagonHardwareLoops::Comparison::Kind
+HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
+                                        MachineOperand *InitialValue,
+                                        const MachineOperand *EndValue,
+                                        int64_t IVBump) const {
+  Comparison::Kind Cmp = (Comparison::Kind)0;
+  switch (CondOpc) {
+  case Hexagon::C2_cmpeqi:
+  case Hexagon::C2_cmpeq:
+  case Hexagon::C2_cmpeqp:
+    Cmp = Comparison::EQ;
+    break;
+  case Hexagon::C4_cmpneq:
+  case Hexagon::C4_cmpneqi:
+    Cmp = Comparison::NE;
+    break;
+  case Hexagon::C4_cmplte:
+    Cmp = Comparison::LEs;
+    break;
+  case Hexagon::C4_cmplteu:
+    Cmp = Comparison::LEu;
+    break;
+  case Hexagon::C2_cmpgtui:
+  case Hexagon::C2_cmpgtu:
+  case Hexagon::C2_cmpgtup:
+    Cmp = Comparison::GTu;
+    break;
+  case Hexagon::C2_cmpgti:
+  case Hexagon::C2_cmpgt:
+  case Hexagon::C2_cmpgtp:
+    Cmp = Comparison::GTs;
+    break;
+  default:
+    return (Comparison::Kind)0;
+  }
+  return Cmp;
+}
 
 /// \brief Analyze the statements in a loop to determine if the loop has
 /// a computable trip count and, if so, return a value that represents
@@ -431,7 +528,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
 /// induction variable patterns that are used in the calculation for
 /// the number of time the loop is executed.
 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
-                                    SmallVectorImpl<MachineInstr *> &OldInsts) {
+    SmallVectorImpl<MachineInstr *> &OldInsts) {
   MachineBasicBlock *TopMBB = L->getTopBlock();
   MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
   assert(PI != TopMBB->pred_end() &&
@@ -455,8 +552,8 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
   // Look for the cmp instruction to determine if we can get a useful trip
   // count.  The trip count can be either a register or an immediate.  The
   // location of the value depends upon the type (reg or imm).
-  MachineBasicBlock *Latch = L->getLoopLatch();
-  if (!Latch)
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  if (!ExitingBlock)
     return nullptr;
 
   unsigned IVReg = 0;
@@ -470,6 +567,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
 
   MachineOperand *InitialValue = nullptr;
   MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
+  MachineBasicBlock *Latch = L->getLoopLatch();
   for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
     MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
     if (MBB == Preheader)
@@ -482,7 +580,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
 
   SmallVector<MachineOperand,2> Cond;
   MachineBasicBlock *TB = nullptr, *FB = nullptr;
-  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+  bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
   if (NotAnalyzed)
     return nullptr;
 
@@ -490,7 +588,18 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
   // TB must be non-null.  If FB is also non-null, one of them must be
   // the header.  Otherwise, branch to TB could be exiting the loop, and
   // the fall through can go to the header.
-  assert (TB && "Latch block without a branch?");
+  assert (TB && "Exit block without a branch?");
+  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
+    MachineBasicBlock *LTB = 0, *LFB = 0;
+    SmallVector<MachineOperand,2> LCond;
+    bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false);
+    if (NotAnalyzed)
+      return nullptr;
+    if (TB == Latch)
+      TB = (LTB == Header) ? LTB : LFB;
+    else
+      FB = (LTB == Header) ? LTB: LFB;
+  }
   assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
   if (!TB || (FB && TB != Header && FB != Header))
     return nullptr;
@@ -499,8 +608,10 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
   // to put imm(0), followed by P in the vector Cond.
   // If TB is not the header, it means that the "not-taken" path must lead
   // to the header.
-  bool Negated = (Cond.size() > 1) ^ (TB != Header);
-  unsigned PredReg = Cond[Cond.size()-1].getReg();
+  bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
+  unsigned PredReg, PredPos, PredRegFlags;
+  if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
+    return nullptr;
   MachineInstr *CondI = MRI->getVRegDef(PredReg);
   unsigned CondOpc = CondI->getOpcode();
 
@@ -539,57 +650,13 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
   if (!EndValue)
     return nullptr;
 
-  switch (CondOpc) {
-    case Hexagon::C2_cmpeqi:
-    case Hexagon::C2_cmpeq:
-      Cmp = !Negated ? Comparison::EQ : Comparison::NE;
-      break;
-    case Hexagon::C2_cmpgtui:
-    case Hexagon::C2_cmpgtu:
-      Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
-      break;
-    case Hexagon::C2_cmpgti:
-    case Hexagon::C2_cmpgt:
-      Cmp = !Negated ? Comparison::GTs : Comparison::LEs;
-      break;
-    // Very limited support for byte/halfword compares.
-    case Hexagon::CMPbEQri_V4:
-    case Hexagon::CMPhEQri_V4: {
-      if (IVBump != 1)
-        return nullptr;
-
-      int64_t InitV, EndV;
-      // Since the comparisons are "ri", the EndValue should be an
-      // immediate.  Check it just in case.
-      assert(EndValue->isImm() && "Unrecognized latch comparison");
-      EndV = EndValue->getImm();
-      // Allow InitialValue to be a register defined with an immediate.
-      if (InitialValue->isReg()) {
-        if (!defWithImmediate(InitialValue->getReg()))
-          return nullptr;
-        InitV = getImmediate(*InitialValue);
-      } else {
-        assert(InitialValue->isImm());
-        InitV = InitialValue->getImm();
-      }
-      if (InitV >= EndV)
-        return nullptr;
-      if (CondOpc == Hexagon::CMPbEQri_V4) {
-        if (!isInt<8>(InitV) || !isInt<8>(EndV))
-          return nullptr;
-      } else {  // Hexagon::CMPhEQri_V4
-        if (!isInt<16>(InitV) || !isInt<16>(EndV))
-          return nullptr;
-      }
-      Cmp = !Negated ? Comparison::EQ : Comparison::NE;
-      break;
-    }
-    default:
-      return nullptr;
-  }
-
+  Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
+  if (!Cmp)
+    return nullptr;
+  if (Negated)
+    Cmp = Comparison::getNegatedComparison(Cmp);
   if (isSwapped)
-   Cmp = Comparison::getSwappedComparison(Cmp);
+    Cmp = Comparison::getSwappedComparison(Cmp);
 
   if (InitialValue->isReg()) {
     unsigned R = InitialValue->getReg();
@@ -603,6 +670,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
     MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
     if (!MDT->properlyDominates(DefBB, Header))
       return nullptr;
+    OldInsts.push_back(MRI->getVRegDef(R));
   }
 
   return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
@@ -626,32 +694,45 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
   // If so, use the immediate value rather than the register.
   if (Start->isReg()) {
     const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
-    if (StartValInstr && StartValInstr->getOpcode() == Hexagon::A2_tfrsi)
+    if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
+                          StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
       Start = &StartValInstr->getOperand(1);
   }
   if (End->isReg()) {
     const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
-    if (EndValInstr && EndValInstr->getOpcode() == Hexagon::A2_tfrsi)
+    if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
+                        EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
       End = &EndValInstr->getOperand(1);
   }
 
-  assert (Start->isReg() || Start->isImm());
-  assert (End->isReg() || End->isImm());
+  if (!Start->isReg() && !Start->isImm())
+    return nullptr;
+  if (!End->isReg() && !End->isImm())
+    return nullptr;
 
   bool CmpLess =     Cmp & Comparison::L;
   bool CmpGreater =  Cmp & Comparison::G;
   bool CmpHasEqual = Cmp & Comparison::EQ;
 
   // Avoid certain wrap-arounds.  This doesn't detect all wrap-arounds.
-  // If loop executes while iv is "less" with the iv value going down, then
-  // the iv must wrap.
   if (CmpLess && IVBump < 0)
+    // Loop going while iv is "less" with the iv value going down.  Must wrap.
     return nullptr;
-  // If loop executes while iv is "greater" with the iv value going up, then
-  // the iv must wrap.
+
   if (CmpGreater && IVBump > 0)
+    // Loop going while iv is "greater" with the iv value going up.  Must wrap.
     return nullptr;
 
+  // Phis that may feed into the loop.
+  LoopFeederMap LoopFeederPhi;
+
+  // Check if the inital value may be zero and can be decremented in the first
+  // iteration. If the value is zero, the endloop instruction will not decrement
+  // the loop counter, so we shoudn't generate a hardware loop in this case.
+  if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
+                                  LoopFeederPhi))
+      return nullptr;
+
   if (Start->isImm() && End->isImm()) {
     // Both, start and end are immediates.
     int64_t StartV = Start->getImm();
@@ -674,14 +755,16 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
     if (CmpHasEqual)
       Dist = Dist > 0 ? Dist+1 : Dist-1;
 
-    // assert (CmpLess => Dist > 0);
-    assert ((!CmpLess || Dist > 0) && "Loop should never iterate!");
-    // assert (CmpGreater => Dist < 0);
-    assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!");
+    // For the loop to iterate, CmpLess should imply Dist > 0.  Similarly,
+    // CmpGreater should imply Dist < 0.  These conditions could actually
+    // fail, for example, in unreachable code (which may still appear to be
+    // reachable in the CFG).
+    if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
+      return nullptr;
 
     // "Normalized" distance, i.e. with the bump set to +-1.
-    int64_t Dist1 = (IVBump > 0) ? (Dist +  (IVBump-1)) /   IVBump
-                               :  (-Dist + (-IVBump-1)) / (-IVBump);
+    int64_t Dist1 = (IVBump > 0) ? (Dist +  (IVBump - 1)) / IVBump
+                                 : (-Dist + (-IVBump - 1)) / (-IVBump);
     assert (Dist1 > 0 && "Fishy thing.  Both operands have the same sign.");
 
     uint64_t Count = Dist1;
@@ -698,14 +781,15 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
 
   // If the induction variable bump is not a power of 2, quit.
   // Othwerise we'd need a general integer division.
-  if (!isPowerOf2_64(abs64(IVBump)))
+  if (!isPowerOf2_64(std::abs(IVBump)))
     return nullptr;
 
   MachineBasicBlock *PH = Loop->getLoopPreheader();
   assert (PH && "Should have a preheader by now");
   MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
-  DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc()
-                                         : DebugLoc();
+  DebugLoc DL;
+  if (InsertPos != PH->end())
+    DL = InsertPos->getDebugLoc();
 
   // If Start is an immediate and End is a register, the trip count
   // will be "reg - imm".  Hexagon's "subtract immediate" instruction
@@ -782,23 +866,37 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
     DistSR = End->getSubReg();
   } else {
     const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
-                              (RegToImm ? TII->get(Hexagon::SUB_ri) :
-                                          TII->get(Hexagon::ADD_ri));
-    unsigned SubR = MRI->createVirtualRegister(IntRC);
-    MachineInstrBuilder SubIB =
-      BuildMI(*PH, InsertPos, DL, SubD, SubR);
-
-    if (RegToReg) {
-      SubIB.addReg(End->getReg(), 0, End->getSubReg())
-           .addReg(Start->getReg(), 0, Start->getSubReg());
-    } else if (RegToImm) {
-      SubIB.addImm(EndV)
-           .addReg(Start->getReg(), 0, Start->getSubReg());
-    } else { // ImmToReg
-      SubIB.addReg(End->getReg(), 0, End->getSubReg())
-           .addImm(-StartV);
+                              (RegToImm ? TII->get(Hexagon::A2_subri) :
+                                          TII->get(Hexagon::A2_addi));
+    if (RegToReg || RegToImm) {
+      unsigned SubR = MRI->createVirtualRegister(IntRC);
+      MachineInstrBuilder SubIB =
+        BuildMI(*PH, InsertPos, DL, SubD, SubR);
+
+      if (RegToReg)
+        SubIB.addReg(End->getReg(), 0, End->getSubReg())
+          .addReg(Start->getReg(), 0, Start->getSubReg());
+      else
+        SubIB.addImm(EndV)
+          .addReg(Start->getReg(), 0, Start->getSubReg());
+      DistR = SubR;
+    } else {
+      // If the loop has been unrolled, we should use the original loop count
+      // instead of recalculating the value. This will avoid additional
+      // 'Add' instruction.
+      const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
+      if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
+          EndValInstr->getOperand(2).getImm() == StartV) {
+        DistR = EndValInstr->getOperand(1).getReg();
+      } else {
+        unsigned SubR = MRI->createVirtualRegister(IntRC);
+        MachineInstrBuilder SubIB =
+          BuildMI(*PH, InsertPos, DL, SubD, SubR);
+        SubIB.addReg(End->getReg(), 0, End->getSubReg())
+             .addImm(-StartV);
+        DistR = SubR;
+      }
     }
-    DistR = SubR;
     DistSR = 0;
   }
 
@@ -811,7 +909,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
   } else {
     // Generate CountR = ADD DistR, AdjVal
     unsigned AddR = MRI->createVirtualRegister(IntRC);
-    const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri);
+    MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
     BuildMI(*PH, InsertPos, DL, AddD, AddR)
       .addReg(DistR, 0, DistSR)
       .addImm(AdjV);
@@ -844,50 +942,50 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
   return new CountValue(CountValue::CV_Register, CountR, CountSR);
 }
 
-
 /// \brief Return true if the operation is invalid within hardware loop.
-bool HexagonHardwareLoops::isInvalidLoopOperation(
-      const MachineInstr *MI) const {
+bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
+                                                  bool IsInnerHWLoop) const {
 
-  // call is not allowed because the callee may use a hardware loop
-  if (MI->getDesc().isCall())
+  // Call is not allowed because the callee may use a hardware loop except for
+  // the case when the call never returns.
+  if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr)
     return true;
 
-  // do not allow nested hardware loops
-  if (isHardwareLoop(MI))
-    return true;
-
-  // check if the instruction defines a hardware loop register
+  // Check if the instruction defines a hardware loop register.
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg() || !MO.isDef())
       continue;
     unsigned R = MO.getReg();
-    if (R == Hexagon::LC0 || R == Hexagon::LC1 ||
-        R == Hexagon::SA0 || R == Hexagon::SA1)
+    if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 ||
+                          R == Hexagon::LC1 || R == Hexagon::SA1))
+      return true;
+    if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1))
       return true;
   }
   return false;
 }
 
-
-/// \brief - Return true if the loop contains an instruction that inhibits
-/// the use of the hardware loop function.
-bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
+/// \brief Return true if the loop contains an instruction that inhibits
+/// the use of the hardware loop instruction.
+bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
+    bool IsInnerHWLoop) const {
   const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
+  DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
   for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
     MachineBasicBlock *MBB = Blocks[i];
     for (MachineBasicBlock::iterator
            MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
       const MachineInstr *MI = &*MII;
-      if (isInvalidLoopOperation(MI))
+      if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
+        DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
         return true;
+      }
     }
   }
   return false;
 }
 
-
 /// \brief Returns true if the instruction is dead.  This was essentially
 /// copied from DeadMachineInstructionElim::isDead, but with special cases
 /// for inline asm, physical registers and instructions with side effects
@@ -928,7 +1026,7 @@ bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
         MachineOperand &Use = *J;
         MachineInstr *UseMI = Use.getParent();
 
-        // If the phi node has a user that is not MI, bail...
+        // If the phi node has a user that is not MI, bail.
         if (MI != UseMI)
           return false;
       }
@@ -965,8 +1063,6 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
           continue;
         if (Use.isDebug())
           UseMI->getOperand(0).setReg(0U);
-        // This may also be a "instr -> phi -> instr" case which can
-        // be removed too.
       }
     }
 
@@ -984,19 +1080,47 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
 ///
 /// The code makes several assumptions about the representation of the loop
 /// in llvm.
-bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
+bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
+                                                 bool &RecL0used,
+                                                 bool &RecL1used) {
   // This is just for sanity.
   assert(L->getHeader() && "Loop without a header?");
 
   bool Changed = false;
+  bool L0Used = false;
+  bool L1Used = false;
+
   // Process nested loops first.
-  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
-    Changed |= convertToHardwareLoop(*I);
+  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+    Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
+    L0Used |= RecL0used;
+    L1Used |= RecL1used;
+  }
 
   // If a nested loop has been converted, then we can't convert this loop.
-  if (Changed)
+  if (Changed && L0Used && L1Used)
     return Changed;
 
+  unsigned LOOP_i;
+  unsigned LOOP_r;
+  unsigned ENDLOOP;
+
+  // Flag used to track loopN instruction:
+  // 1 - Hardware loop is being generated for the inner most loop.
+  // 0 - Hardware loop is being generated for the outer loop.
+  unsigned IsInnerHWLoop = 1;
+
+  if (L0Used) {
+    LOOP_i = Hexagon::J2_loop1i;
+    LOOP_r = Hexagon::J2_loop1r;
+    ENDLOOP = Hexagon::ENDLOOP1;
+    IsInnerHWLoop = 0;
+  } else {
+    LOOP_i = Hexagon::J2_loop0i;
+    LOOP_r = Hexagon::J2_loop0r;
+    ENDLOOP = Hexagon::ENDLOOP0;
+  }
+
 #ifndef NDEBUG
   // Stop trying after reaching the limit (if any).
   int Limit = HWLoopLimit;
@@ -1008,14 +1132,10 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
 #endif
 
   // Does the loop contain any invalid instructions?
-  if (containsInvalidInstruction(L))
+  if (containsInvalidInstruction(L, IsInnerHWLoop))
     return false;
 
-  // Is the induction variable bump feeding the latch condition?
-  if (!fixupInductionVariable(L))
-    return false;
-
-  MachineBasicBlock *LastMBB = L->getExitingBlock();
+  MachineBasicBlock *LastMBB = getExitingBlock(L);
   // Don't generate hw loop if the loop has more than one exit.
   if (!LastMBB)
     return false;
@@ -1024,16 +1144,19 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
   if (LastI == LastMBB->end())
     return false;
 
+  // Is the induction variable bump feeding the latch condition?
+  if (!fixupInductionVariable(L))
+    return false;
+
   // Ensure the loop has a preheader: the loop instruction will be
   // placed there.
-  bool NewPreheader = false;
   MachineBasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     Preheader = createPreheaderForLoop(L);
     if (!Preheader)
       return false;
-    NewPreheader = true;
   }
+
   MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
 
   SmallVector<MachineInstr*, 2> OldInsts;
@@ -1048,31 +1171,30 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
     // so make sure that the register is actually defined at that point.
     MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
     MachineBasicBlock *BBDef = TCDef->getParent();
-    if (!NewPreheader) {
-      if (!MDT->dominates(BBDef, Preheader))
-        return false;
-    } else {
-      // If we have just created a preheader, the dominator tree won't be
-      // aware of it.  Check if the definition of the register dominates
-      // the header, but is not the header itself.
-      if (!MDT->properlyDominates(BBDef, L->getHeader()))
-        return false;
-    }
+    if (!MDT->dominates(BBDef, Preheader))
+      return false;
   }
 
   // Determine the loop start.
-  MachineBasicBlock *LoopStart = L->getTopBlock();
-  if (L->getLoopLatch() != LastMBB) {
-    // When the exit and latch are not the same, use the latch block as the
-    // start.
-    // The loop start address is used only after the 1st iteration, and the
-    // loop latch may contains instrs. that need to be executed after the
-    // first iteration.
-    LoopStart = L->getLoopLatch();
-    // Make sure the latch is a successor of the exit, otherwise it won't work.
-    if (!LastMBB->isSuccessor(LoopStart))
+  MachineBasicBlock *TopBlock = L->getTopBlock();
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  MachineBasicBlock *LoopStart = 0;
+  if (ExitingBlock !=  L->getLoopLatch()) {
+    MachineBasicBlock *TB = 0, *FB = 0;
+    SmallVector<MachineOperand, 2> Cond;
+
+    if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false))
+      return false;
+
+    if (L->contains(TB))
+      LoopStart = TB;
+    else if (L->contains(FB))
+      LoopStart = FB;
+    else
       return false;
   }
+  else
+    LoopStart = TopBlock;
 
   // Convert the loop to a hardware loop.
   DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
@@ -1086,8 +1208,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
     BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
       .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
     // Add the Loop instruction to the beginning of the loop.
-    BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r))
-      .addMBB(LoopStart)
+    BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
       .addReg(CountReg);
   } else {
     assert(TripCount->isImm() && "Expecting immediate value for trip count");
@@ -1095,14 +1216,14 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
     // if the immediate fits in the instructions.  Otherwise, we need to
     // create a new virtual register.
     int64_t CountImm = TripCount->getImm();
-    if (!TII->isValidOffset(Hexagon::J2_loop0i, CountImm)) {
+    if (!TII->isValidOffset(LOOP_i, CountImm)) {
       unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
       BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
         .addImm(CountImm);
-      BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r))
+      BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
         .addMBB(LoopStart).addReg(CountReg);
     } else
-      BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0i))
+      BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
         .addMBB(LoopStart).addImm(CountImm);
   }
 
@@ -1116,8 +1237,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
 
   // Replace the loop branch with an endloop instruction.
   DebugLoc LastIDL = LastI->getDebugLoc();
-  BuildMI(*LastMBB, LastI, LastIDL,
-          TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
+  BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
 
   // The loop ends with either:
   //  - a conditional branch followed by an unconditional branch, or
@@ -1145,10 +1265,18 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
     removeIfDead(OldInsts[i]);
 
   ++NumHWLoops;
+
+  // Set RecL1used and RecL0used only after hardware loop has been
+  // successfully generated. Doing it earlier can cause wrong loop instruction
+  // to be used.
+  if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
+    RecL1used = true;
+  else
+    RecL0used = true;
+
   return true;
 }
 
-
 bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
                                             MachineInstr *CmpI) {
   assert (BumpI != CmpI && "Bump and compare in the same instruction?");
@@ -1189,35 +1317,226 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
   return FoundBump;
 }
 
+/// This function is required to break recursion. Visiting phis in a loop may
+/// result in recursion during compilation. We break the recursion by making
+/// sure that we visit a MachineOperand and its definition in a
+/// MachineInstruction only once. If we attempt to visit more than once, then
+/// there is recursion, and will return false.
+bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
+                                        MachineInstr *MI,
+                                        const MachineOperand *MO,
+                                        LoopFeederMap &LoopFeederPhi) const {
+  if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
+    const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
+    DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
+    // Ignore all BBs that form Loop.
+    for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+      MachineBasicBlock *MBB = Blocks[i];
+      if (A == MBB)
+        return false;
+    }
+    MachineInstr *Def = MRI->getVRegDef(MO->getReg());
+    LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
+    return true;
+  } else
+    // Already visited node.
+    return false;
+}
+
+/// Return true if a Phi may generate a value that can underflow.
+/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
+bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
+    MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
+    MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
+  assert(Phi->isPHI() && "Expecting a Phi.");
+  // Walk through each Phi, and its used operands. Make sure that
+  // if there is recursion in Phi, we won't generate hardware loops.
+  for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
+    if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
+      if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
+                                      Phi->getParent(), L, LoopFeederPhi))
+        return true;
+  return false;
+}
+
+/// Return true if the induction variable can underflow in the first iteration.
+/// An example, is an initial unsigned value that is 0 and is decrement in the
+/// first itertion of a do-while loop.  In this case, we cannot generate a
+/// hardware loop because the endloop instruction does not decrement the loop
+/// counter if it is <= 1. We only need to perform this analysis if the
+/// initial value is a register.
+///
+/// This function assumes the initial value may underfow unless proven
+/// otherwise. If the type is signed, then we don't care because signed
+/// underflow is undefined. We attempt to prove the initial value is not
+/// zero by perfoming a crude analysis of the loop counter. This function
+/// checks if the initial value is used in any comparison prior to the loop
+/// and, if so, assumes the comparison is a range check. This is inexact,
+/// but will catch the simple cases.
+bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
+    const MachineOperand *InitVal, const MachineOperand *EndVal,
+    MachineBasicBlock *MBB, MachineLoop *L,
+    LoopFeederMap &LoopFeederPhi) const {
+  // Only check register values since they are unknown.
+  if (!InitVal->isReg())
+    return false;
+
+  if (!EndVal->isImm())
+    return false;
+
+  // A register value that is assigned an immediate is a known value, and it
+  // won't underflow in the first iteration.
+  int64_t Imm;
+  if (checkForImmediate(*InitVal, Imm))
+    return (EndVal->getImm() == Imm);
+
+  unsigned Reg = InitVal->getReg();
+
+  // We don't know the value of a physical register.
+  if (!TargetRegisterInfo::isVirtualRegister(Reg))
+    return true;
+
+  MachineInstr *Def = MRI->getVRegDef(Reg);
+  if (!Def)
+    return true;
 
-MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) {
+  // If the initial value is a Phi or copy and the operands may not underflow,
+  // then the definition cannot be underflow either.
+  if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
+                                             L, LoopFeederPhi))
+    return false;
+  if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
+                                                    EndVal, Def->getParent(),
+                                                    L, LoopFeederPhi))
+    return false;
+
+  // Iterate over the uses of the initial value. If the initial value is used
+  // in a compare, then we assume this is a range check that ensures the loop
+  // doesn't underflow. This is not an exact test and should be improved.
+  for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
+         E = MRI->use_instr_nodbg_end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    unsigned CmpReg1 = 0, CmpReg2 = 0;
+    int CmpMask = 0, CmpValue = 0;
+
+    if (!TII->analyzeCompare(MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
+      continue;
+
+    MachineBasicBlock *TBB = 0, *FBB = 0;
+    SmallVector<MachineOperand, 2> Cond;
+    if (TII->AnalyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
+      continue;
+
+    Comparison::Kind Cmp = getComparisonKind(MI->getOpcode(), 0, 0, 0);
+    if (Cmp == 0)
+      continue;
+    if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
+      Cmp = Comparison::getNegatedComparison(Cmp);
+    if (CmpReg2 != 0 && CmpReg2 == Reg)
+      Cmp = Comparison::getSwappedComparison(Cmp);
+
+    // Signed underflow is undefined.
+    if (Comparison::isSigned(Cmp))
+      return false;
+
+    // Check if there is a comparison of the inital value. If the initial value
+    // is greater than or not equal to another value, then assume this is a
+    // range check.
+    if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
+      return false;
+  }
+
+  // OK - this is a hack that needs to be improved. We really need to analyze
+  // the instructions performed on the initial value. This works on the simplest
+  // cases only.
+  if (!Def->isCopy() && !Def->isPHI())
+    return false;
+
+  return true;
+}
+
+bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
+                                             int64_t &Val) const {
+  if (MO.isImm()) {
+    Val = MO.getImm();
+    return true;
+  }
+  if (!MO.isReg())
+    return false;
+
+  // MO is a register. Check whether it is defined as an immediate value,
+  // and if so, get the value of it in TV. That value will then need to be
+  // processed to handle potential subregisters in MO.
+  int64_t TV;
+
+  unsigned R = MO.getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(R))
+    return false;
   MachineInstr *DI = MRI->getVRegDef(R);
   unsigned DOpc = DI->getOpcode();
   switch (DOpc) {
+    case TargetOpcode::COPY:
     case Hexagon::A2_tfrsi:
     case Hexagon::A2_tfrpi:
     case Hexagon::CONST32_Int_Real:
-    case Hexagon::CONST64_Int_Real:
-      return DI;
-  }
-  return nullptr;
-}
+    case Hexagon::CONST64_Int_Real: {
+      // Call recursively to avoid an extra check whether operand(1) is
+      // indeed an immediate (it could be a global address, for example),
+      // plus we can handle COPY at the same time.
+      if (!checkForImmediate(DI->getOperand(1), TV))
+        return false;
+      break;
+    }
+    case Hexagon::A2_combineii:
+    case Hexagon::A4_combineir:
+    case Hexagon::A4_combineii:
+    case Hexagon::A4_combineri:
+    case Hexagon::A2_combinew: {
+      const MachineOperand &S1 = DI->getOperand(1);
+      const MachineOperand &S2 = DI->getOperand(2);
+      int64_t V1, V2;
+      if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
+        return false;
+      TV = V2 | (V1 << 32);
+      break;
+    }
+    case TargetOpcode::REG_SEQUENCE: {
+      const MachineOperand &S1 = DI->getOperand(1);
+      const MachineOperand &S3 = DI->getOperand(3);
+      int64_t V1, V3;
+      if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
+        return false;
+      unsigned Sub2 = DI->getOperand(2).getImm();
+      unsigned Sub4 = DI->getOperand(4).getImm();
+      if (Sub2 == Hexagon::subreg_loreg && Sub4 == Hexagon::subreg_hireg)
+        TV = V1 | (V3 << 32);
+      else if (Sub2 == Hexagon::subreg_hireg && Sub4 == Hexagon::subreg_loreg)
+        TV = V3 | (V1 << 32);
+      else
+        llvm_unreachable("Unexpected form of REG_SEQUENCE");
+      break;
+    }
 
+    default:
+      return false;
+  }
 
-int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) {
-  if (MO.isImm())
-    return MO.getImm();
-  assert(MO.isReg());
-  unsigned R = MO.getReg();
-  MachineInstr *DI = defWithImmediate(R);
-  assert(DI && "Need an immediate operand");
-  // All currently supported "define-with-immediate" instructions have the
-  // actual immediate value in the operand(1).
-  int64_t v = DI->getOperand(1).getImm();
-  return v;
+  // By now, we should have successfuly obtained the immediate value defining
+  // the register referenced in MO. Handle a potential use of a subregister.
+  switch (MO.getSubReg()) {
+    case Hexagon::subreg_loreg:
+      Val = TV & 0xFFFFFFFFULL;
+      break;
+    case Hexagon::subreg_hireg:
+      Val = (TV >> 32) & 0xFFFFFFFFULL;
+      break;
+    default:
+      Val = TV;
+      break;
+  }
+  return true;
 }
 
-
 void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
   if (MO.isImm()) {
     MO.setImm(Val);
@@ -1226,30 +1545,32 @@ void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
 
   assert(MO.isReg());
   unsigned R = MO.getReg();
-  MachineInstr *DI = defWithImmediate(R);
-  if (MRI->hasOneNonDBGUse(R)) {
-    // If R has only one use, then just change its defining instruction to
-    // the new immediate value.
-    DI->getOperand(1).setImm(Val);
-    return;
-  }
+  MachineInstr *DI = MRI->getVRegDef(R);
 
   const TargetRegisterClass *RC = MRI->getRegClass(R);
   unsigned NewR = MRI->createVirtualRegister(RC);
   MachineBasicBlock &B = *DI->getParent();
   DebugLoc DL = DI->getDebugLoc();
-  BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR)
-    .addImm(Val);
+  BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
   MO.setReg(NewR);
 }
 
+static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
+  // These two instructions are not extendable.
+  if (CmpOpc == Hexagon::A4_cmpbeqi)
+    return isUInt<8>(Imm);
+  if (CmpOpc == Hexagon::A4_cmpbgti)
+    return isInt<8>(Imm);
+  // The rest of the comparison-with-immediate instructions are extendable.
+  return true;
+}
 
 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
   MachineBasicBlock *Header = L->getHeader();
-  MachineBasicBlock *Preheader = L->getLoopPreheader();
   MachineBasicBlock *Latch = L->getLoopLatch();
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
 
-  if (!Header || !Preheader || !Latch)
+  if (!(Header && Latch && ExitingBlock))
     return false;
 
   // These data structures follow the same concept as the corresponding
@@ -1277,15 +1598,16 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
       unsigned PhiReg = Phi->getOperand(i).getReg();
       MachineInstr *DI = MRI->getVRegDef(PhiReg);
       unsigned UpdOpc = DI->getOpcode();
-      bool isAdd = (UpdOpc == Hexagon::ADD_ri);
+      bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp);
 
       if (isAdd) {
         // If the register operand to the add/sub is the PHI we are looking
         // at, this meets the induction pattern.
         unsigned IndReg = DI->getOperand(1).getReg();
-        if (MRI->getVRegDef(IndReg) == Phi) {
+        MachineOperand &Opnd2 = DI->getOperand(2);
+        int64_t V;
+        if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
           unsigned UpdReg = DI->getOperand(0).getReg();
-          int64_t V = DI->getOperand(2).getImm();
           IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
         }
       }
@@ -1298,17 +1620,38 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
   MachineBasicBlock *TB = nullptr, *FB = nullptr;
   SmallVector<MachineOperand,2> Cond;
   // AnalyzeBranch returns true if it fails to analyze branch.
-  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
-  if (NotAnalyzed)
+  bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
+  if (NotAnalyzed || Cond.empty())
     return false;
 
-  // Check if the latch branch is unconditional.
-  if (Cond.empty())
-    return false;
+  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
+    MachineBasicBlock *LTB = 0, *LFB = 0;
+    SmallVector<MachineOperand,2> LCond;
+    bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false);
+    if (NotAnalyzed)
+      return false;
 
-  if (TB != Header && FB != Header)
-    // The latch does not go back to the header.  Not a latch we know and love.
-    return false;
+    // Since latch is not the exiting block, the latch branch should be an
+    // unconditional branch to the loop header.
+    if (TB == Latch)
+      TB = (LTB == Header) ? LTB : LFB;
+    else
+      FB = (LTB == Header) ? LTB : LFB;
+  }
+  if (TB != Header) {
+    if (FB != Header) {
+      // The latch/exit block does not go back to the header.
+      return false;
+    }
+    // FB is the header (i.e., uncond. jump to branch header)
+    // In this case, the LoopBody -> TB should not be a back edge otherwise
+    // it could result in an infinite loop after conversion to hw_loop.
+    // This case can happen when the Latch has two jumps like this:
+    // Jmp_c OuterLoopHeader <-- TB
+    // Jmp   InnerLoopHeader <-- FB
+    if (MDT->dominates(TB, FB))
+      return false;
+  }
 
   // Expecting a predicate register as a condition.  It won't be a hardware
   // predicate register at this point yet, just a vreg.
@@ -1319,6 +1662,9 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
   if (CSz != 1 && CSz != 2)
     return false;
 
+  if (!Cond[CSz-1].isReg())
+    return false;
+
   unsigned P = Cond[CSz-1].getReg();
   MachineInstr *PredDef = MRI->getVRegDef(P);
 
@@ -1340,8 +1686,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
       if (MO.isImplicit())
         continue;
       if (MO.isUse()) {
-        unsigned R = MO.getReg();
-        if (!defWithImmediate(R)) {
+        if (!isImmediate(MO)) {
           CmpRegs.insert(MO.getReg());
           continue;
         }
@@ -1374,20 +1719,70 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
     // compared against an immediate, we can fix it.
     const RegisterBump &RB = I->second;
     if (CmpRegs.count(RB.first)) {
-      if (!CmpImmOp)
+      if (!CmpImmOp) {
+        // If both operands to the compare instruction are registers, see if
+        // it can be changed to use induction register as one of the operands.
+        MachineInstr *IndI = nullptr;
+        MachineInstr *nonIndI = nullptr;
+        MachineOperand *IndMO = nullptr;
+        MachineOperand *nonIndMO = nullptr;
+
+        for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
+          MachineOperand &MO = PredDef->getOperand(i);
+          if (MO.isReg() && MO.getReg() == RB.first) {
+            DEBUG(dbgs() << "\n DefMI(" << i << ") = "
+                         << *(MRI->getVRegDef(I->first)));
+            if (IndI)
+              return false;
+
+            IndI = MRI->getVRegDef(I->first);
+            IndMO = &MO;
+          } else if (MO.isReg()) {
+            DEBUG(dbgs() << "\n DefMI(" << i << ") = "
+                         << *(MRI->getVRegDef(MO.getReg())));
+            if (nonIndI)
+              return false;
+
+            nonIndI = MRI->getVRegDef(MO.getReg());
+            nonIndMO = &MO;
+          }
+        }
+        if (IndI && nonIndI &&
+            nonIndI->getOpcode() == Hexagon::A2_addi &&
+            nonIndI->getOperand(2).isImm() &&
+            nonIndI->getOperand(2).getImm() == - RB.second) {
+          bool Order = orderBumpCompare(IndI, PredDef);
+          if (Order) {
+            IndMO->setReg(I->first);
+            nonIndMO->setReg(nonIndI->getOperand(1).getReg());
+            return true;
+          }
+        }
         return false;
+      }
 
+      // It is not valid to do this transformation on an unsigned comparison
+      // because it may underflow.
+      Comparison::Kind Cmp = getComparisonKind(PredDef->getOpcode(), 0, 0, 0);
+      if (!Cmp || Comparison::isUnsigned(Cmp))
+        return false;
+
+      // If the register is being compared against an immediate, try changing
+      // the compare instruction to use induction register and adjust the
+      // immediate operand.
       int64_t CmpImm = getImmediate(*CmpImmOp);
       int64_t V = RB.second;
-      if (V > 0 && CmpImm+V < CmpImm)  // Overflow (64-bit).
-        return false;
-      if (V < 0 && CmpImm+V > CmpImm)  // Overflow (64-bit).
+      // Handle Overflow (64-bit).
+      if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
+          ((V < 0) && (CmpImm < INT64_MIN - V)))
         return false;
       CmpImm += V;
-      // Some forms of cmp-immediate allow u9 and s10.  Assume the worst case
-      // scenario, i.e. an 8-bit value.
-      if (CmpImmOp->isImm() && !isInt<8>(CmpImm))
-        return false;
+      // Most comparisons of register against an immediate value allow
+      // the immediate to be constant-extended. There are some exceptions
+      // though. Make sure the new combination will work.
+      if (CmpImmOp->isImm())
+        if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
+          return false;
 
       // Make sure that the compare happens after the bump.  Otherwise,
       // after the fixup, the compare would use a yet-undefined register.
@@ -1411,19 +1806,27 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
   return false;
 }
 
-
 /// \brief Create a preheader for a given loop.
 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
       MachineLoop *L) {
   if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
     return TmpPH;
 
+  if (!HWCreatePreheader)
+    return nullptr;
+
   MachineBasicBlock *Header = L->getHeader();
   MachineBasicBlock *Latch = L->getLoopLatch();
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
   MachineFunction *MF = Header->getParent();
   DebugLoc DL;
 
-  if (!Latch || Header->hasAddressTaken())
+#ifndef NDEBUG
+  if ((PHFn != "") && (PHFn != MF->getName()))
+    return nullptr;
+#endif
+
+  if (!Latch || !ExitingBlock || Header->hasAddressTaken())
     return nullptr;
 
   typedef MachineBasicBlock::instr_iterator instr_iterator;
@@ -1435,16 +1838,14 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
   SmallVector<MachineOperand,2> Tmp1;
   MachineBasicBlock *TB = nullptr, *FB = nullptr;
 
-  if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false))
+  if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
     return nullptr;
 
   for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
     MachineBasicBlock *PB = *I;
-    if (PB != Latch) {
-      bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
-      if (NotAnalyzed)
-        return nullptr;
-    }
+    bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
+    if (NotAnalyzed)
+      return nullptr;
   }
 
   MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
@@ -1453,7 +1854,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
   if (Header->pred_size() > 2) {
     // Ensure that the header has only two predecessors: the preheader and
     // the loop latch.  Any additional predecessors of the header should
-    // join at the newly created preheader.  Inspect all PHI nodes from the
+    // join at the newly created preheader. Inspect all PHI nodes from the
     // header and create appropriate corresponding PHI nodes in the preheader.
 
     for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
@@ -1473,11 +1874,14 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
       // created PHI node in the preheader.
       for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
         unsigned PredR = PN->getOperand(i).getReg();
+        unsigned PredRSub = PN->getOperand(i).getSubReg();
         MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
         if (PredB == Latch)
           continue;
 
-        NewPN->addOperand(MachineOperand::CreateReg(PredR, false));
+        MachineOperand MO = MachineOperand::CreateReg(PredR, false);
+        MO.setSubReg(PredRSub);
+        NewPN->addOperand(MO);
         NewPN->addOperand(MachineOperand::CreateMBB(PredB));
       }
 
@@ -1547,5 +1951,16 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
   TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
   NewPH->addSuccessor(Header);
 
+  MachineLoop *ParentLoop = L->getParentLoop();
+  if (ParentLoop)
+    ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
+
+  // Update the dominator information with the new preheader.
+  if (MDT) {
+    MachineDomTreeNode *HDom = MDT->getNode(Header);
+    MDT->addNewBlock(NewPH, HDom->getIDom()->getBlock());
+    MDT->changeImmediateDominator(Header, NewPH);
+  }
+
   return NewPH;
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index ea3a177..7a213aa 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -13,8 +13,11 @@
 
 #include "Hexagon.h"
 #include "HexagonISelLowering.h"
+#include "HexagonMachineFunctionInfo.h"
 #include "HexagonTargetMachine.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
@@ -45,51 +48,43 @@ namespace llvm {
 ///
 namespace {
 class HexagonDAGToDAGISel : public SelectionDAGISel {
-  /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can
-  /// make the right decision when generating code for different targets.
-  const HexagonSubtarget &Subtarget;
-
-  // Keep a reference to HexagonTargetMachine.
-  const HexagonTargetMachine& TM;
-  DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap;
+  const HexagonTargetMachine& HTM;
+  const HexagonSubtarget *HST;
 public:
-  explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine,
+  explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
                                CodeGenOpt::Level OptLevel)
-    : SelectionDAGISel(targetmachine, OptLevel),
-      Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
-      TM(targetmachine) {
+      : SelectionDAGISel(tm, OptLevel), HTM(tm) {
     initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry());
   }
-  bool hasNumUsesBelowThresGA(SDNode *N) const;
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    // Reset the subtarget each time through.
+    HST = &MF.getSubtarget<HexagonSubtarget>();
+    SelectionDAGISel::runOnMachineFunction(MF);
+    return true;
+  }
+
+  virtual void PreprocessISelDAG() override;
+  virtual void EmitFunctionEntryCode() override;
 
   SDNode *Select(SDNode *N) override;
 
   // Complex Pattern Selectors.
-  inline bool foldGlobalAddress(SDValue &N, SDValue &R);
-  inline bool foldGlobalAddressGP(SDValue &N, SDValue &R);
-  bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP);
-  bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2);
-  bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2);
-  bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2);
-  bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2);
-  bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset);
-  bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2);
-  bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset);
-  bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2);
-  bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2);
-  bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2);
+  inline bool SelectAddrGA(SDValue &N, SDValue &R);
+  inline bool SelectAddrGP(SDValue &N, SDValue &R);
+  bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP);
+  bool SelectAddrFI(SDValue &N, SDValue &R);
 
   const char *getPassName() const override {
     return "Hexagon DAG->DAG Pattern Instruction Selection";
   }
 
+  SDNode *SelectFrameIndex(SDNode *N);
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                    char ConstraintCode,
+                                    unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
-  bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset);
-
   SDNode *SelectLoad(SDNode *N);
   SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl);
   SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl);
@@ -101,88 +96,98 @@ public:
   SDNode *SelectIndexedStore(StoreSDNode *ST, SDLoc dl);
   SDNode *SelectStore(SDNode *N);
   SDNode *SelectSHL(SDNode *N);
-  SDNode *SelectSelect(SDNode *N);
-  SDNode *SelectTruncate(SDNode *N);
   SDNode *SelectMul(SDNode *N);
   SDNode *SelectZeroExtend(SDNode *N);
-  SDNode *SelectIntrinsicWOChain(SDNode *N);
   SDNode *SelectIntrinsicWChain(SDNode *N);
+  SDNode *SelectIntrinsicWOChain(SDNode *N);
   SDNode *SelectConstant(SDNode *N);
   SDNode *SelectConstantFP(SDNode *N);
   SDNode *SelectAdd(SDNode *N);
-  bool isConstExtProfitable(SDNode *N) const;
-
-// XformMskToBitPosU5Imm - Returns the bit position which
-// the single bit 32 bit mask represents.
-// Used in Clr and Set bit immediate memops.
-SDValue XformMskToBitPosU5Imm(uint32_t Imm) {
-  int32_t bitPos;
-  bitPos = Log2_32(Imm);
-  assert(bitPos >= 0 && bitPos < 32 &&
-         "Constant out of range for 32 BitPos Memops");
-  return CurDAG->getTargetConstant(bitPos, MVT::i32);
-}
+  SDNode *SelectBitOp(SDNode *N);
+
+  // XformMskToBitPosU5Imm - Returns the bit position which
+  // the single bit 32 bit mask represents.
+  // Used in Clr and Set bit immediate memops.
+  SDValue XformMskToBitPosU5Imm(uint32_t Imm, SDLoc DL) {
+    int32_t bitPos;
+    bitPos = Log2_32(Imm);
+    assert(bitPos >= 0 && bitPos < 32 &&
+           "Constant out of range for 32 BitPos Memops");
+    return CurDAG->getTargetConstant(bitPos, DL, MVT::i32);
+  }
 
-// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit
-// mask represents. Used in Clr and Set bit immediate memops.
-SDValue XformMskToBitPosU4Imm(uint16_t Imm) {
-  return XformMskToBitPosU5Imm(Imm);
-}
+  // XformMskToBitPosU4Imm - Returns the bit position which the single-bit
+  // 16 bit mask represents. Used in Clr and Set bit immediate memops.
+  SDValue XformMskToBitPosU4Imm(uint16_t Imm, SDLoc DL) {
+    return XformMskToBitPosU5Imm(Imm, DL);
+  }
 
-// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit
-// mask represents. Used in Clr and Set bit immediate memops.
-SDValue XformMskToBitPosU3Imm(uint8_t Imm) {
-  return XformMskToBitPosU5Imm(Imm);
-}
+  // XformMskToBitPosU3Imm - Returns the bit position which the single-bit
+  // 8 bit mask represents. Used in Clr and Set bit immediate memops.
+  SDValue XformMskToBitPosU3Imm(uint8_t Imm, SDLoc DL) {
+    return XformMskToBitPosU5Imm(Imm, DL);
+  }
 
-// Return true if there is exactly one bit set in V, i.e., if V is one of the
-// following integers: 2^0, 2^1, ..., 2^31.
-bool ImmIsSingleBit(uint32_t v) const {
-  uint32_t c = CountPopulation_64(v);
-  // Only return true if we counted 1 bit.
-  return c == 1;
-}
+  // Return true if there is exactly one bit set in V, i.e., if V is one of the
+  // following integers: 2^0, 2^1, ..., 2^31.
+  bool ImmIsSingleBit(uint32_t v) const {
+    return isPowerOf2_32(v);
+  }
 
-// XformM5ToU5Imm - Return a target constant with the specified value, of type
-// i32 where the negative literal is transformed into a positive literal for
-// use in -= memops.
-inline SDValue XformM5ToU5Imm(signed Imm) {
-   assert( (Imm >= -31 && Imm <= -1)  && "Constant out of range for Memops");
-   return CurDAG->getTargetConstant( - Imm, MVT::i32);
-}
+  // XformM5ToU5Imm - Return a target constant with the specified value, of
+  // type i32 where the negative literal is transformed into a positive literal
+  // for use in -= memops.
+  inline SDValue XformM5ToU5Imm(signed Imm, SDLoc DL) {
+     assert( (Imm >= -31 && Imm <= -1)  && "Constant out of range for Memops");
+     return CurDAG->getTargetConstant( - Imm, DL, MVT::i32);
+  }
 
+  // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
+  // [1..128], used in cmpb.gtu instructions.
+  inline SDValue XformU7ToU7M1Imm(signed Imm, SDLoc DL) {
+    assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
+    return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i8);
+  }
 
-// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
-// [1..128], used in cmpb.gtu instructions.
-inline SDValue XformU7ToU7M1Imm(signed Imm) {
-  assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
-  return CurDAG->getTargetConstant(Imm - 1, MVT::i8);
-}
+  // XformS8ToS8M1Imm - Return a target constant decremented by 1.
+  inline SDValue XformSToSM1Imm(signed Imm, SDLoc DL) {
+    return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32);
+  }
 
-// XformS8ToS8M1Imm - Return a target constant decremented by 1.
-inline SDValue XformSToSM1Imm(signed Imm) {
-  return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
-}
+  // XformU8ToU8M1Imm - Return a target constant decremented by 1.
+  inline SDValue XformUToUM1Imm(unsigned Imm, SDLoc DL) {
+    assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
+    return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32);
+  }
 
-// XformU8ToU8M1Imm - Return a target constant decremented by 1.
-inline SDValue XformUToUM1Imm(unsigned Imm) {
-  assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
-  return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
-}
+  // XformSToSM2Imm - Return a target constant decremented by 2.
+  inline SDValue XformSToSM2Imm(unsigned Imm, SDLoc DL) {
+    return CurDAG->getTargetConstant(Imm - 2, DL, MVT::i32);
+  }
+
+  // XformSToSM3Imm - Return a target constant decremented by 3.
+  inline SDValue XformSToSM3Imm(unsigned Imm, SDLoc DL) {
+    return CurDAG->getTargetConstant(Imm - 3, DL, MVT::i32);
+  }
 
-// Include the pieces autogenerated from the target description.
-#include "HexagonGenDAGISel.inc"
-};
+  // Include the pieces autogenerated from the target description.
+  #include "HexagonGenDAGISel.inc"
+
+private:
+  bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src);
+}; // end HexagonDAGToDAGISel
 }  // end anonymous namespace
 
 
 /// createHexagonISelDag - This pass converts a legalized DAG into a
 /// Hexagon-specific DAG, ready for instruction scheduling.
 ///
-FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM,
-                                         CodeGenOpt::Level OptLevel) {
+namespace llvm {
+FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+                                   CodeGenOpt::Level OptLevel) {
   return new HexagonDAGToDAGISel(TM, OptLevel);
 }
+}
 
 static void initializePassOnce(PassRegistry &Registry) {
   const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection";
@@ -196,76 +201,6 @@ void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) {
 }
 
 
-static bool IsS11_0_Offset(SDNode * S) {
-    ConstantSDNode *N = cast<ConstantSDNode>(S);
-
-  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<11>(v);
-}
-
-
-static bool IsS11_1_Offset(SDNode * S) {
-    ConstantSDNode *N = cast<ConstantSDNode>(S);
-
-  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<11,1>(v);
-}
-
-
-static bool IsS11_2_Offset(SDNode * S) {
-    ConstantSDNode *N = cast<ConstantSDNode>(S);
-
-  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<11,2>(v);
-}
-
-
-static bool IsS11_3_Offset(SDNode * S) {
-    ConstantSDNode *N = cast<ConstantSDNode>(S);
-
-  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<11,3>(v);
-}
-
-
-static bool IsU6_0_Offset(SDNode * S) {
-    ConstantSDNode *N = cast<ConstantSDNode>(S);
-
-  // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<6>(v);
-}
-
-
-static bool IsU6_1_Offset(SDNode * S) {
-    ConstantSDNode *N = cast<ConstantSDNode>(S);
-
-  // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<6,1>(v);
-}
-
-
-static bool IsU6_2_Offset(SDNode * S) {
-    ConstantSDNode *N = cast<ConstantSDNode>(S);
-
-  // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<6,2>(v);
-}
-
-
 // Intrinsics that return a a predicate.
 static unsigned doesIntrinsicReturnPredicate(unsigned ID)
 {
@@ -312,268 +247,119 @@ static unsigned doesIntrinsicReturnPredicate(unsigned ID)
   }
 }
 
-
-// Intrinsics that have predicate operands.
-static unsigned doesIntrinsicContainPredicate(unsigned ID)
-{
-  switch (ID) {
-    default:
-      return 0;
-    case Intrinsic::hexagon_C2_tfrpr:
-      return Hexagon::C2_tfrpr;
-    case Intrinsic::hexagon_C2_and:
-      return Hexagon::C2_and;
-    case Intrinsic::hexagon_C2_xor:
-      return Hexagon::C2_xor;
-    case Intrinsic::hexagon_C2_or:
-      return Hexagon::C2_or;
-    case Intrinsic::hexagon_C2_not:
-      return Hexagon::C2_not;
-    case Intrinsic::hexagon_C2_any8:
-      return Hexagon::C2_any8;
-    case Intrinsic::hexagon_C2_all8:
-      return Hexagon::C2_all8;
-    case Intrinsic::hexagon_C2_vitpack:
-      return Hexagon::C2_vitpack;
-    case Intrinsic::hexagon_C2_mask:
-      return Hexagon::C2_mask;
-    case Intrinsic::hexagon_C2_mux:
-      return Hexagon::C2_mux;
-
-      // Mapping hexagon_C2_muxir to MUX_pri.  This is pretty weird - but
-      // that's how it's mapped in q6protos.h.
-    case Intrinsic::hexagon_C2_muxir:
-      return Hexagon::C2_muxri;
-
-      // Mapping hexagon_C2_muxri to MUX_pir.  This is pretty weird - but
-      // that's how it's mapped in q6protos.h.
-    case Intrinsic::hexagon_C2_muxri:
-      return Hexagon::C2_muxir;
-
-    case Intrinsic::hexagon_C2_muxii:
-      return Hexagon::C2_muxii;
-    case Intrinsic::hexagon_C2_vmux:
-      return Hexagon::VMUX_prr64;
-    case Intrinsic::hexagon_S2_valignrb:
-      return Hexagon::VALIGN_rrp;
-    case Intrinsic::hexagon_S2_vsplicerb:
-      return Hexagon::VSPLICE_rrp;
-  }
-}
-
-
-static bool OffsetFitsS11(EVT MemType, int64_t Offset) {
-  if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) {
-    return true;
-  }
-  if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) {
-    return true;
-  }
-  if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) {
-    return true;
-  }
-  if (MemType == MVT::i8 && isInt<11>(Offset)) {
-    return true;
-  }
-  return false;
-}
-
-
-//
-// Try to lower loads of GlobalAdresses into base+offset loads.  Custom
-// lowering for GlobalAddress nodes has already turned it into a
-// CONST32.
-//
-SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl) {
-  SDValue Chain = LD->getChain();
-  SDNode* Const32 = LD->getBasePtr().getNode();
-  unsigned Opcode = 0;
-
-  if (Const32->getOpcode() == HexagonISD::CONST32 &&
-      ISD::isNormalLoad(LD)) {
-    SDValue Base = Const32->getOperand(0);
-    EVT LoadedVT = LD->getMemoryVT();
-    int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
-    if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) {
-      MVT PointerTy = getTargetLowering()->getPointerTy();
-      const GlobalValue* GV =
-        cast<GlobalAddressSDNode>(Base)->getGlobal();
-      SDValue TargAddr =
-        CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
-      SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
-                                               dl, PointerTy,
-                                               TargAddr);
-      // Figure out base + offset opcode
-      if (LoadedVT == MVT::i64) Opcode = Hexagon::L2_loadrd_io;
-      else if (LoadedVT == MVT::i32) Opcode = Hexagon::L2_loadri_io;
-      else if (LoadedVT == MVT::i16) Opcode = Hexagon::L2_loadrh_io;
-      else if (LoadedVT == MVT::i8) Opcode = Hexagon::L2_loadrb_io;
-      else llvm_unreachable("unknown memory type");
-
-      // Build indexed load.
-      SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy);
-      SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
-                                              LD->getValueType(0),
-                                              MVT::Other,
-                                              SDValue(NewBase,0),
-                                              TargetConstOff,
-                                              Chain);
-      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-      MemOp[0] = LD->getMemOperand();
-      cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
-      ReplaceUses(LD, Result);
-      return Result;
-    }
-  }
-
-  return SelectCode(LD);
-}
-
-
 SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
                                                            unsigned Opcode,
-                                                           SDLoc dl)
-{
+                                                           SDLoc dl) {
   SDValue Chain = LD->getChain();
   EVT LoadedVT = LD->getMemoryVT();
   SDValue Base = LD->getBasePtr();
   SDValue Offset = LD->getOffset();
   SDNode *OffsetNode = Offset.getNode();
   int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
-  SDValue N1 = LD->getOperand(1);
-  SDValue CPTmpN1_0;
-  SDValue CPTmpN1_1;
-
-  if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
-      N1.getNode()->getValueType(0) == MVT::i32) {
-    const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
-        TM.getSubtargetImpl()->getInstrInfo());
-    if (TII->isValidAutoIncImm(LoadedVT, Val)) {
-      SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
-      SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
-                                                MVT::Other, Base, TargetConst,
-                                                Chain);
-      SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
-                                                SDValue(Result_1, 0));
-      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-      MemOp[0] = LD->getMemOperand();
-      cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
-      const SDValue Froms[] = { SDValue(LD, 0),
-                                SDValue(LD, 1),
-                                SDValue(LD, 2)
-      };
-      const SDValue Tos[]   = { SDValue(Result_2, 0),
-                                SDValue(Result_1, 1),
-                                SDValue(Result_1, 2)
-      };
-      ReplaceUses(Froms, Tos, 3);
-      return Result_2;
-    }
-    SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
-    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
-    SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
-                                              MVT::Other, Base, TargetConst0,
+
+  const HexagonInstrInfo &TII = *HST->getInstrInfo();
+  if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+    SDValue TargetConst = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+    SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
+                                              MVT::Other, Base, TargetConst,
                                               Chain);
-    SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl,
-                                                MVT::i64, SDValue(Result_1, 0));
-    SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl,
-                                              MVT::i32, Base, TargetConstVal,
-                                                SDValue(Result_1, 1));
+    SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
+                                              SDValue(Result_1, 0));
     MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
     MemOp[0] = LD->getMemOperand();
     cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
     const SDValue Froms[] = { SDValue(LD, 0),
                               SDValue(LD, 1),
-                              SDValue(LD, 2)
-    };
+                              SDValue(LD, 2) };
     const SDValue Tos[]   = { SDValue(Result_2, 0),
-                              SDValue(Result_3, 0),
-                              SDValue(Result_1, 1)
-    };
+                              SDValue(Result_1, 1),
+                              SDValue(Result_1, 2) };
     ReplaceUses(Froms, Tos, 3);
     return Result_2;
   }
-  return SelectCode(LD);
+
+  SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+  SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+  SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
+                                            Base, TargetConst0, Chain);
+  SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
+                                            SDValue(Result_1, 0));
+  SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+                                            Base, TargetConstVal,
+                                            SDValue(Result_1, 1));
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = LD->getMemOperand();
+  cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+  const SDValue Froms[] = { SDValue(LD, 0),
+                            SDValue(LD, 1),
+                            SDValue(LD, 2) };
+  const SDValue Tos[]   = { SDValue(Result_2, 0),
+                            SDValue(Result_3, 0),
+                            SDValue(Result_1, 1) };
+  ReplaceUses(Froms, Tos, 3);
+  return Result_2;
 }
 
 
 SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
                                                            unsigned Opcode,
-                                                           SDLoc dl)
-{
+                                                           SDLoc dl) {
   SDValue Chain = LD->getChain();
   EVT LoadedVT = LD->getMemoryVT();
   SDValue Base = LD->getBasePtr();
   SDValue Offset = LD->getOffset();
   SDNode *OffsetNode = Offset.getNode();
   int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
-  SDValue N1 = LD->getOperand(1);
-  SDValue CPTmpN1_0;
-  SDValue CPTmpN1_1;
-
-  if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
-      N1.getNode()->getValueType(0) == MVT::i32) {
-    const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
-        TM.getSubtargetImpl()->getInstrInfo());
-    if (TII->isValidAutoIncImm(LoadedVT, Val)) {
-      SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
-      SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
-      SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
-                                                MVT::i32, MVT::Other, Base,
-                                                TargetConstVal, Chain);
-      SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
-                                                TargetConst0);
-      SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
-                                                MVT::i64, MVT::Other,
-                                                SDValue(Result_2,0),
-                                                SDValue(Result_1,0));
-      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-      MemOp[0] = LD->getMemOperand();
-      cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
-      const SDValue Froms[] = { SDValue(LD, 0),
-                                SDValue(LD, 1),
-                                SDValue(LD, 2)
-      };
-      const SDValue Tos[]   = { SDValue(Result_3, 0),
-                                SDValue(Result_1, 1),
-                                SDValue(Result_1, 2)
-      };
-      ReplaceUses(Froms, Tos, 3);
-      return Result_3;
-    }
 
-    // Generate an indirect load.
-    SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
-    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+  const HexagonInstrInfo &TII = *HST->getInstrInfo();
+  if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+    SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
     SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
-                                              MVT::Other,
-                                              Base, TargetConst0, Chain);
-    SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
-                                              TargetConst0);
-    SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
+                                              MVT::i32, MVT::Other, Base,
+                                              TargetConstVal, Chain);
+    SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl,
                                               MVT::i64, MVT::Other,
-                                              SDValue(Result_2,0),
+                                              TargetConst0,
                                               SDValue(Result_1,0));
-    // Add offset to base.
-    SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
-                                              Base, TargetConstVal,
-                                              SDValue(Result_1, 1));
     MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
     MemOp[0] = LD->getMemOperand();
     cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
     const SDValue Froms[] = { SDValue(LD, 0),
                               SDValue(LD, 1),
-                              SDValue(LD, 2)
-    };
-    const SDValue Tos[]   = { SDValue(Result_3, 0), // Load value.
-                              SDValue(Result_4, 0), // New address.
-                              SDValue(Result_1, 1)
-    };
+                              SDValue(LD, 2) };
+    const SDValue Tos[]   = { SDValue(Result_2, 0),
+                              SDValue(Result_1, 1),
+                              SDValue(Result_1, 2) };
     ReplaceUses(Froms, Tos, 3);
-    return Result_3;
+    return Result_2;
   }
 
-  return SelectCode(LD);
+  // Generate an indirect load.
+  SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+  SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+  SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+                                            MVT::Other, Base, TargetConst0,
+                                            Chain);
+  SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl,
+                                            MVT::i64, MVT::Other,
+                                            TargetConst0,
+                                            SDValue(Result_1,0));
+  // Add offset to base.
+  SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+                                            Base, TargetConstVal,
+                                            SDValue(Result_1, 1));
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = LD->getMemOperand();
+  cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+  const SDValue Froms[] = { SDValue(LD, 0),
+                            SDValue(LD, 1),
+                            SDValue(LD, 2) };
+  const SDValue Tos[]   = { SDValue(Result_2, 0), // Load value.
+                            SDValue(Result_3, 0), // New address.
+                            SDValue(Result_1, 1) };
+  ReplaceUses(Froms, Tos, 3);
+  return Result_2;
 }
 
 
@@ -587,47 +373,45 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
   EVT LoadedVT = LD->getMemoryVT();
   unsigned Opcode = 0;
 
-  // Check for zero ext loads.
-  bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD);
+  // Check for zero extended loads. Treat any-extend loads as zero extended
+  // loads.
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+  bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
 
   // Figure out the opcode.
-  const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
-      TM.getSubtargetImpl()->getInstrInfo());
+  const HexagonInstrInfo &TII = *HST->getInstrInfo();
   if (LoadedVT == MVT::i64) {
-    if (TII->isValidAutoIncImm(LoadedVT, Val))
+    if (TII.isValidAutoIncImm(LoadedVT, Val))
       Opcode = Hexagon::L2_loadrd_pi;
     else
       Opcode = Hexagon::L2_loadrd_io;
   } else if (LoadedVT == MVT::i32) {
-    if (TII->isValidAutoIncImm(LoadedVT, Val))
+    if (TII.isValidAutoIncImm(LoadedVT, Val))
       Opcode = Hexagon::L2_loadri_pi;
     else
       Opcode = Hexagon::L2_loadri_io;
   } else if (LoadedVT == MVT::i16) {
-    if (TII->isValidAutoIncImm(LoadedVT, Val))
-      Opcode = zextval ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
+    if (TII.isValidAutoIncImm(LoadedVT, Val))
+      Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
     else
-      Opcode = zextval ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
+      Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
   } else if (LoadedVT == MVT::i8) {
-    if (TII->isValidAutoIncImm(LoadedVT, Val))
-      Opcode = zextval ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
+    if (TII.isValidAutoIncImm(LoadedVT, Val))
+      Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
     else
-      Opcode = zextval ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
+      Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
   } else
     llvm_unreachable("unknown memory type");
 
-  // For zero ext i64 loads, we need to add combine instructions.
-  if (LD->getValueType(0) == MVT::i64 &&
-      LD->getExtensionType() == ISD::ZEXTLOAD) {
+  // For zero extended i64 loads, we need to add combine instructions.
+  if (LD->getValueType(0) == MVT::i64 && IsZeroExt)
     return SelectIndexedLoadZeroExtend64(LD, Opcode, dl);
-  }
-  if (LD->getValueType(0) == MVT::i64 &&
-             LD->getExtensionType() == ISD::SEXTLOAD) {
-    // Handle sign ext i64 loads.
+  // Handle sign extended i64 loads.
+  if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD)
     return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
-  }
-  if (TII->isValidAutoIncImm(LoadedVT, Val)) {
-    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+
+  if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
     SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
                                             LD->getValueType(0),
                                             MVT::i32, MVT::Other, Base,
@@ -646,13 +430,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
     ReplaceUses(Froms, Tos, 3);
     return Result;
   } else {
-    SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
-    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+    SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
     SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl,
                                               LD->getValueType(0),
                                               MVT::Other, Base, TargetConst0,
                                               Chain);
-    SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+    SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
                                               Base, TargetConstVal,
                                               SDValue(Result_1, 1));
     MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
@@ -682,7 +466,7 @@ SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
   if (AM != ISD::UNINDEXED) {
     result = SelectIndexedLoad(LD, dl);
   } else {
-    result = SelectBaseOffsetLoad(LD, dl);
+    result = SelectCode(LD);
   }
 
   return result;
@@ -698,14 +482,12 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
   // Get the constant value.
   int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
   EVT StoredVT = ST->getMemoryVT();
+  EVT ValueVT = Value.getValueType();
 
   // Offset value must be within representable range
   // and must have correct alignment properties.
-  const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
-      TM.getSubtargetImpl()->getInstrInfo());
-  if (TII->isValidAutoIncImm(StoredVT, Val)) {
-    SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value,
-                     Chain};
+  const HexagonInstrInfo &TII = *HST->getInstrInfo();
+  if (TII.isValidAutoIncImm(StoredVT, Val)) {
     unsigned Opcode = 0;
 
     // Figure out the post inc version of opcode.
@@ -715,6 +497,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
     else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi;
     else llvm_unreachable("unknown memory type");
 
+    if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
+      assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
+      Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg,
+                                             dl, MVT::i32, Value);
+    }
+    SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, dl, MVT::i32), Value,
+                     Chain};
     // Build post increment store.
     SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
                                             MVT::Other, Ops);
@@ -728,9 +517,10 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
   }
 
   // Note: Order of operands matches the def of instruction:
-  // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ...
+  // def S2_storerd_io
+  //   : STInst<(outs), (ins IntRegs:$base, imm:$offset, DoubleRegs:$src1), ...
   // and it differs for POST_ST* for instance.
-  SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value,
+  SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, dl, MVT::i32), Value,
                     Chain};
   unsigned Opcode = 0;
 
@@ -742,10 +532,10 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
   else llvm_unreachable("unknown memory type");
 
   // Build regular store.
-  SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+  SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
   SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   // Build splitted incriment instruction.
-  SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+  SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
                                             Base,
                                             TargetConstVal,
                                             SDValue(Result_1, 0));
@@ -758,61 +548,6 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
   return Result_2;
 }
 
-
-SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
-                                                   SDLoc dl) {
-  SDValue Chain = ST->getChain();
-  SDNode* Const32 = ST->getBasePtr().getNode();
-  SDValue Value = ST->getValue();
-  unsigned Opcode = 0;
-
-  // Try to lower stores of GlobalAdresses into indexed stores.  Custom
-  // lowering for GlobalAddress nodes has already turned it into a
-  // CONST32.  Avoid truncating stores for the moment.  Post-inc stores
-  // do the same.  Don't think there's a reason for it, so will file a
-  // bug to fix.
-  if ((Const32->getOpcode() == HexagonISD::CONST32) &&
-      !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) {
-    SDValue Base = Const32->getOperand(0);
-    if (Base.getOpcode() == ISD::TargetGlobalAddress) {
-      EVT StoredVT = ST->getMemoryVT();
-      int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
-      if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) {
-        MVT PointerTy = getTargetLowering()->getPointerTy();
-        const GlobalValue* GV =
-          cast<GlobalAddressSDNode>(Base)->getGlobal();
-        SDValue TargAddr =
-          CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
-        SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
-                                                 dl, PointerTy,
-                                                 TargAddr);
-
-        // Figure out base + offset opcode
-        if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io;
-        else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io;
-        else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io;
-        else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io;
-        else llvm_unreachable("unknown memory type");
-
-        SDValue Ops[] = {SDValue(NewBase,0),
-                         CurDAG->getTargetConstant(Offset,PointerTy),
-                         Value, Chain};
-        // build indexed store
-        SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
-                                                MVT::Other, Ops);
-        MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-        MemOp[0] = ST->getMemOperand();
-        cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
-        ReplaceUses(ST, Result);
-        return Result;
-      }
-    }
-  }
-
-  return SelectCode(ST);
-}
-
-
 SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
   SDLoc dl(N);
   StoreSDNode *ST = cast<StoreSDNode>(N);
@@ -823,7 +558,7 @@ SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
     return SelectIndexedStore(ST, dl);
   }
 
-  return SelectBaseOffsetStore(ST, dl);
+  return SelectCode(ST);
 }
 
 SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
@@ -864,7 +599,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
       }
 
       SDValue Chain = LD->getChain();
-      SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+      SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
       OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
                                             MVT::Other,
                                             LD->getBasePtr(), TargetConst0,
@@ -890,7 +625,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
       }
 
       SDValue Chain = LD->getChain();
-      SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+      SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
       OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
                                             MVT::Other,
                                             LD->getBasePtr(), TargetConst0,
@@ -909,187 +644,6 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
   return SelectCode(N);
 }
 
-
-SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) {
-  SDLoc dl(N);
-  SDValue N0 = N->getOperand(0);
-  if (N0.getOpcode() == ISD::SETCC) {
-    SDValue N00 = N0.getOperand(0);
-    if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) {
-      SDValue N000 = N00.getOperand(0);
-      SDValue N001 = N00.getOperand(1);
-      if (cast<VTSDNode>(N001)->getVT() == MVT::i16) {
-        SDValue N01 = N0.getOperand(1);
-        SDValue N02 = N0.getOperand(2);
-
-        // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
-        // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1,
-        // IntRegs:i32:$src2)
-        // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
-        // Pattern complexity = 9  cost = 1  size = 0.
-        if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) {
-          SDValue N1 = N->getOperand(1);
-          if (N01 == N1) {
-            SDValue N2 = N->getOperand(2);
-            if (N000 == N2 &&
-                N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
-                N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
-              SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl,
-                                                        MVT::i32, N000);
-              SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_max, dl,
-                                                      MVT::i32,
-                                                      SDValue(SextNode, 0),
-                                                      N1);
-              ReplaceUses(N, Result);
-              return Result;
-            }
-          }
-        }
-
-        // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
-        // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1,
-        // IntRegs:i32:$src2)
-        // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
-        // Pattern complexity = 9  cost = 1  size = 0.
-        if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) {
-          SDValue N1 = N->getOperand(1);
-          if (N01 == N1) {
-            SDValue N2 = N->getOperand(2);
-            if (N000 == N2 &&
-                N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
-                N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
-              SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl,
-                                                        MVT::i32, N000);
-              SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_min, dl,
-                                                      MVT::i32,
-                                                      SDValue(SextNode, 0),
-                                                      N1);
-              ReplaceUses(N, Result);
-              return Result;
-            }
-          }
-        }
-      }
-    }
-  }
-
-  return SelectCode(N);
-}
-
-
-SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) {
-  SDLoc dl(N);
-  SDValue Shift = N->getOperand(0);
-
-  //
-  // %conv.i = sext i32 %tmp1 to i64
-  // %conv2.i = sext i32 %add to i64
-  // %mul.i = mul nsw i64 %conv2.i, %conv.i
-  // %shr5.i = lshr i64 %mul.i, 32
-  // %conv3.i = trunc i64 %shr5.i to i32
-  //
-  //   --- match with the following ---
-  //
-  // %conv3.i = mpy (%tmp1, %add)
-  //
-  // Trunc to i32.
-  if (N->getValueType(0) == MVT::i32) {
-    // Trunc from i64.
-    if (Shift.getNode()->getValueType(0) == MVT::i64) {
-      // Trunc child is logical shift right.
-      if (Shift.getOpcode() != ISD::SRL) {
-        return SelectCode(N);
-      }
-
-      SDValue ShiftOp0 = Shift.getOperand(0);
-      SDValue ShiftOp1 = Shift.getOperand(1);
-
-      // Shift by const 32
-      if (ShiftOp1.getOpcode() != ISD::Constant) {
-        return SelectCode(N);
-      }
-
-      int32_t ShiftConst =
-        cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue();
-      if (ShiftConst != 32) {
-        return SelectCode(N);
-      }
-
-      // Shifting a i64 signed multiply
-      SDValue Mul = ShiftOp0;
-      if (Mul.getOpcode() != ISD::MUL) {
-        return SelectCode(N);
-      }
-
-      SDValue MulOp0 = Mul.getOperand(0);
-      SDValue MulOp1 = Mul.getOperand(1);
-
-      SDValue OP0;
-      SDValue OP1;
-
-      // Handle sign_extend and sextload
-      if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
-        SDValue Sext0 = MulOp0.getOperand(0);
-        if (Sext0.getNode()->getValueType(0) != MVT::i32) {
-          return SelectCode(N);
-        }
-
-        OP0 = Sext0;
-      } else if (MulOp0.getOpcode() == ISD::LOAD) {
-        LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
-        if (LD->getMemoryVT() != MVT::i32 ||
-            LD->getExtensionType() != ISD::SEXTLOAD ||
-            LD->getAddressingMode() != ISD::UNINDEXED) {
-          return SelectCode(N);
-        }
-
-        SDValue Chain = LD->getChain();
-        SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
-        OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
-                                              MVT::Other,
-                                              LD->getBasePtr(),
-                                              TargetConst0, Chain), 0);
-      } else {
-        return SelectCode(N);
-      }
-
-      // Same goes for the second operand.
-      if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
-        SDValue Sext1 = MulOp1.getOperand(0);
-        if (Sext1.getNode()->getValueType(0) != MVT::i32)
-          return SelectCode(N);
-
-        OP1 = Sext1;
-      } else if (MulOp1.getOpcode() == ISD::LOAD) {
-        LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
-        if (LD->getMemoryVT() != MVT::i32 ||
-            LD->getExtensionType() != ISD::SEXTLOAD ||
-            LD->getAddressingMode() != ISD::UNINDEXED) {
-          return SelectCode(N);
-        }
-
-        SDValue Chain = LD->getChain();
-        SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
-        OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
-                                              MVT::Other,
-                                              LD->getBasePtr(),
-                                              TargetConst0, Chain), 0);
-      } else {
-        return SelectCode(N);
-      }
-
-      // Generate a mpy instruction.
-      SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpy_up, dl, MVT::i32,
-                                              OP0, OP1);
-      ReplaceUses(N, Result);
-      return Result;
-    }
-  }
-
-  return SelectCode(N);
-}
-
-
 SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
   SDLoc dl(N);
   if (N->getValueType(0) == MVT::i32) {
@@ -1107,7 +661,7 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
           int32_t MulConst =
             cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue();
           int32_t ValConst = MulConst << ShlConst;
-          SDValue Val = CurDAG->getTargetConstant(ValConst,
+          SDValue Val = CurDAG->getTargetConstant(ValConst, dl,
                                                   MVT::i32);
           if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode()))
             if (isInt<9>(CN->getSExtValue())) {
@@ -1135,7 +689,8 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
                 int32_t Shl2Const =
                   cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue();
                 int32_t ValConst = 1 << (ShlConst+Shl2Const);
-                SDValue Val = CurDAG->getTargetConstant(-ValConst, MVT::i32);
+                SDValue Val = CurDAG->getTargetConstant(-ValConst, dl,
+                                                        MVT::i32);
                 if (ConstantSDNode *CN =
                     dyn_cast<ConstantSDNode>(Val.getNode()))
                   if (isInt<9>(CN->getSExtValue())) {
@@ -1168,6 +723,37 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
 //
 SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
   SDLoc dl(N);
+
+  SDValue Op0 = N->getOperand(0);
+  EVT OpVT = Op0.getValueType();
+  unsigned OpBW = OpVT.getSizeInBits();
+
+  // Special handling for zero-extending a vector of booleans.
+  if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i1 && OpBW <= 64) {
+    SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0);
+    unsigned NE = OpVT.getVectorNumElements();
+    EVT ExVT = N->getValueType(0);
+    unsigned ES = ExVT.getVectorElementType().getSizeInBits();
+    uint64_t MV = 0, Bit = 1;
+    for (unsigned i = 0; i < NE; ++i) {
+      MV |= Bit;
+      Bit <<= ES;
+    }
+    SDValue Ones = CurDAG->getTargetConstant(MV, dl, MVT::i64);
+    SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64_Int_Real, dl,
+                                             MVT::i64, Ones);
+    if (ExVT.getSizeInBits() == 32) {
+      SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64,
+                                           SDValue(Mask,0), SDValue(OnesReg,0));
+      SDValue SubR = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl,
+                                               MVT::i32);
+      return CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT,
+                                    SDValue(And,0), SubR);
+    }
+    return CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT,
+                                  SDValue(Mask,0), SDValue(OnesReg,0));
+  }
+
   SDNode *IsIntrinsic = N->getOperand(0).getNode();
   if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
     unsigned ID =
@@ -1175,8 +761,8 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
     if (doesIntrinsicReturnPredicate(ID)) {
       // Now we need to differentiate target data types.
       if (N->getValueType(0) == MVT::i64) {
-        // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs).
-        SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+        // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs).
+        SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
         SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
                                                   MVT::i32,
                                                   SDValue(IsIntrinsic, 0));
@@ -1204,56 +790,227 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
   return SelectCode(N);
 }
 
+//
+// Checking for intrinsics circular load/store, and bitreverse load/store
+// instrisics in order to select the correct lowered operation.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
+  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+  if (IntNo == Intrinsic::hexagon_circ_ldd  ||
+      IntNo == Intrinsic::hexagon_circ_ldw  ||
+      IntNo == Intrinsic::hexagon_circ_lduh ||
+      IntNo == Intrinsic::hexagon_circ_ldh  ||
+      IntNo == Intrinsic::hexagon_circ_ldub ||
+      IntNo == Intrinsic::hexagon_circ_ldb) {
+    SDLoc dl(N);
+    SDValue Chain = N->getOperand(0);
+    SDValue Base = N->getOperand(2);
+    SDValue Load = N->getOperand(3);
+    SDValue ModifierExpr = N->getOperand(4);
+    SDValue Offset = N->getOperand(5);
+
+    // We need to add the rerurn type for the load.  This intrinsic has
+    // two return types, one for the load and one for the post-increment.
+    // Only the *_ld instructions push the extra return type, and bump the
+    // result node operand number correspondingly.
+    std::vector<EVT> ResTys;
+    unsigned opc;
+    unsigned memsize, align;
+    MVT MvtSize = MVT::i32;
+
+    if (IntNo == Intrinsic::hexagon_circ_ldd) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i64);
+      opc = Hexagon::L2_loadrd_pci_pseudo;
+      memsize = 8;
+      align = 8;
+    } else if (IntNo == Intrinsic::hexagon_circ_ldw) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadri_pci_pseudo;
+      memsize = 4;
+      align = 4;
+    } else if (IntNo == Intrinsic::hexagon_circ_ldh) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadrh_pci_pseudo;
+      memsize = 2;
+      align = 2;
+      MvtSize = MVT::i16;
+    } else if (IntNo == Intrinsic::hexagon_circ_lduh) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadruh_pci_pseudo;
+      memsize = 2;
+      align = 2;
+      MvtSize = MVT::i16;
+    } else if (IntNo == Intrinsic::hexagon_circ_ldb) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadrb_pci_pseudo;
+      memsize = 1;
+      align = 1;
+      MvtSize = MVT::i8;
+    } else if (IntNo == Intrinsic::hexagon_circ_ldub) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadrub_pci_pseudo;
+      memsize = 1;
+      align = 1;
+      MvtSize = MVT::i8;
+    } else
+      llvm_unreachable("no opc");
+
+    ResTys.push_back(MVT::Other);
+
+    // Copy over the arguments, which are the same mostly.
+    SmallVector<SDValue, 5> Ops;
+    Ops.push_back(Base);
+    Ops.push_back(Load);
+    Ops.push_back(ModifierExpr);
+    int32_t Val = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+    Ops.push_back(CurDAG->getTargetConstant(Val, dl, MVT::i32));
+    Ops.push_back(Chain);
+    SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
+
+    SDValue ST;
+    MachineMemOperand *Mem =
+      MF->getMachineMemOperand(MachinePointerInfo(),
+                               MachineMemOperand::MOStore, memsize, align);
+    if (MvtSize != MVT::i32)
+      ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
+                                 MvtSize, Mem);
+    else
+      ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
+
+    SDNode* Store = SelectStore(ST.getNode());
+
+    const SDValue Froms[] = { SDValue(N, 0),
+                              SDValue(N, 1) };
+    const SDValue Tos[]   = { SDValue(Result, 0),
+                              SDValue(Store, 0) };
+    ReplaceUses(Froms, Tos, 2);
+    return Result;
+  }
+
+  if (IntNo == Intrinsic::hexagon_brev_ldd  ||
+      IntNo == Intrinsic::hexagon_brev_ldw  ||
+      IntNo == Intrinsic::hexagon_brev_ldh  ||
+      IntNo == Intrinsic::hexagon_brev_lduh ||
+      IntNo == Intrinsic::hexagon_brev_ldb  ||
+      IntNo == Intrinsic::hexagon_brev_ldub) {
+    SDLoc dl(N);
+    SDValue Chain = N->getOperand(0);
+    SDValue Base = N->getOperand(2);
+    SDValue Load = N->getOperand(3);
+    SDValue ModifierExpr = N->getOperand(4);
+
+    // We need to add the rerurn type for the load.  This intrinsic has
+    // two return types, one for the load and one for the post-increment.
+    std::vector<EVT> ResTys;
+    unsigned opc;
+    unsigned memsize, align;
+    MVT MvtSize = MVT::i32;
+
+    if (IntNo == Intrinsic::hexagon_brev_ldd) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i64);
+      opc = Hexagon::L2_loadrd_pbr_pseudo;
+      memsize = 8;
+      align = 8;
+    } else if (IntNo == Intrinsic::hexagon_brev_ldw) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadri_pbr_pseudo;
+      memsize = 4;
+      align = 4;
+    } else if (IntNo == Intrinsic::hexagon_brev_ldh) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadrh_pbr_pseudo;
+      memsize = 2;
+      align = 2;
+      MvtSize = MVT::i16;
+    } else if (IntNo == Intrinsic::hexagon_brev_lduh) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadruh_pbr_pseudo;
+      memsize = 2;
+      align = 2;
+      MvtSize = MVT::i16;
+    } else if (IntNo == Intrinsic::hexagon_brev_ldb) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadrb_pbr_pseudo;
+      memsize = 1;
+      align = 1;
+      MvtSize = MVT::i8;
+    } else if (IntNo == Intrinsic::hexagon_brev_ldub) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadrub_pbr_pseudo;
+      memsize = 1;
+      align = 1;
+      MvtSize = MVT::i8;
+    } else
+      llvm_unreachable("no opc");
+
+    ResTys.push_back(MVT::Other);
+
+    // Copy over the arguments, which are the same mostly.
+    SmallVector<SDValue, 4> Ops;
+    Ops.push_back(Base);
+    Ops.push_back(Load);
+    Ops.push_back(ModifierExpr);
+    Ops.push_back(Chain);
+    SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
+    SDValue ST;
+    MachineMemOperand *Mem =
+      MF->getMachineMemOperand(MachinePointerInfo(),
+                               MachineMemOperand::MOStore, memsize, align);
+    if (MvtSize != MVT::i32)
+      ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
+                                 MvtSize, Mem);
+    else
+      ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
+
+    SDNode* Store = SelectStore(ST.getNode());
+
+    const SDValue Froms[] = { SDValue(N, 0),
+                              SDValue(N, 1) };
+    const SDValue Tos[]   = { SDValue(Result, 0),
+                              SDValue(Store, 0) };
+    ReplaceUses(Froms, Tos, 2);
+    return Result;
+  }
+
+  return SelectCode(N);
+}
 
 //
 // Checking for intrinsics which have predicate registers as operand(s)
 // and lowering to the actual intrinsic.
 //
 SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
-  SDLoc dl(N);
-  unsigned ID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
-  unsigned IntrinsicWithPred = doesIntrinsicContainPredicate(ID);
-
-  // We are concerned with only those intrinsics that have predicate registers
-  // as at least one of the operands.
-  if (IntrinsicWithPred) {
-    SmallVector<SDValue, 8> Ops;
-    const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
-        TM.getSubtargetImpl()->getInstrInfo());
-    const MCInstrDesc &MCID = TII->get(IntrinsicWithPred);
-    const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
-
-    // Iterate over all the operands of the intrinsics.
-    // For PredRegs, do the transfer.
-    // For Double/Int Regs, just preserve the value
-    // For immediates, lower it.
-    for (unsigned i = 1; i < N->getNumOperands(); ++i) {
-      SDNode *Arg = N->getOperand(i).getNode();
-      const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI, *MF);
-
-      if (RC == &Hexagon::IntRegsRegClass ||
-          RC == &Hexagon::DoubleRegsRegClass) {
-        Ops.push_back(SDValue(Arg, 0));
-      } else if (RC == &Hexagon::PredRegsRegClass) {
-        // Do the transfer.
-        SDNode *PdRs = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1,
-                                              SDValue(Arg, 0));
-        Ops.push_back(SDValue(PdRs,0));
-      } else if (!RC && (dyn_cast<ConstantSDNode>(Arg) != nullptr)) {
-        // This is immediate operand. Lower it here making sure that we DO have
-        // const SDNode for immediate value.
-        int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue();
-        SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32);
-        Ops.push_back(SDVal);
-      } else {
-        llvm_unreachable("Unimplemented");
-      }
-    }
-    EVT ReturnValueVT = N->getValueType(0);
-    SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl,
-                                            ReturnValueVT, Ops);
-    ReplaceUses(N, Result);
-    return Result;
+  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+  unsigned Bits;
+  switch (IID) {
+  case Intrinsic::hexagon_S2_vsplatrb:
+    Bits = 8;
+    break;
+  case Intrinsic::hexagon_S2_vsplatrh:
+    Bits = 16;
+    break;
+  default:
+    return SelectCode(N);
+  }
+
+  SDValue const &V = N->getOperand(1);
+  SDValue U;
+  if (isValueExtension(V, Bits, U)) {
+    SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+      N->getOperand(0), U);
+    return SelectCode(R.getNode());
   }
   return SelectCode(N);
 }
@@ -1267,47 +1024,30 @@ SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
   APFloat APF = CN->getValueAPF();
   if (N->getValueType(0) == MVT::f32) {
     return CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32,
-              CurDAG->getTargetConstantFP(APF.convertToFloat(), MVT::f32));
+              CurDAG->getTargetConstantFP(APF.convertToFloat(), dl, MVT::f32));
   }
   else if (N->getValueType(0) == MVT::f64) {
     return CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64,
-              CurDAG->getTargetConstantFP(APF.convertToDouble(), MVT::f64));
+              CurDAG->getTargetConstantFP(APF.convertToDouble(), dl, MVT::f64));
   }
 
   return SelectCode(N);
 }
 
-
 //
 // Map predicate true (encoded as -1 in LLVM) to a XOR.
 //
 SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
   SDLoc dl(N);
   if (N->getValueType(0) == MVT::i1) {
-    SDNode* Result;
+    SDNode* Result = 0;
     int32_t Val = cast<ConstantSDNode>(N)->getSExtValue();
     if (Val == -1) {
-      // Create the IntReg = 1 node.
-      SDNode* IntRegTFR =
-        CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
-                               CurDAG->getTargetConstant(0, MVT::i32));
-
-      // Pd = IntReg
-      SDNode* Pd = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1,
-                                          SDValue(IntRegTFR, 0));
-
-      // not(Pd)
-      SDNode* NotPd = CurDAG->getMachineNode(Hexagon::C2_not, dl, MVT::i1,
-                                             SDValue(Pd, 0));
-
-      // xor(not(Pd))
-      Result = CurDAG->getMachineNode(Hexagon::C2_xor, dl, MVT::i1,
-                                      SDValue(Pd, 0), SDValue(NotPd, 0));
-
-      // We have just built:
-      // Rs = Pd
-      // Pd = xor(not(Pd), Pd)
-
+      Result = CurDAG->getMachineNode(Hexagon::TFR_PdTrue, dl, MVT::i1);
+    } else if (Val == 0) {
+      Result = CurDAG->getMachineNode(Hexagon::TFR_PdFalse, dl, MVT::i1);
+    }
+    if (Result) {
       ReplaceUses(N, Result);
       return Result;
     }
@@ -1343,6 +1083,175 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
   return Result;
 }
 
+//
+// Map the following, where possible.
+// AND/FABS -> clrbit
+// OR -> setbit
+// XOR/FNEG ->toggle_bit.
+//
+SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
+  SDLoc dl(N);
+  EVT ValueVT = N->getValueType(0);
+
+  // We handle only 32 and 64-bit bit ops.
+  if (!(ValueVT == MVT::i32 || ValueVT == MVT::i64 ||
+        ValueVT == MVT::f32 || ValueVT == MVT::f64))
+    return SelectCode(N);
+
+  // We handly only fabs and fneg for V5.
+  unsigned Opc = N->getOpcode();
+  if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps())
+    return SelectCode(N);
+
+  int64_t Val = 0;
+  if (Opc != ISD::FABS && Opc != ISD::FNEG) {
+    if (N->getOperand(1).getOpcode() == ISD::Constant)
+      Val = cast<ConstantSDNode>((N)->getOperand(1))->getSExtValue();
+    else
+     return SelectCode(N);
+  }
+
+  if (Opc == ISD::AND) {
+    if (((ValueVT == MVT::i32) &&
+                  (!((Val & 0x80000000) || (Val & 0x7fffffff)))) ||
+        ((ValueVT == MVT::i64) &&
+                  (!((Val & 0x8000000000000000) || (Val & 0x7fffffff)))))
+      // If it's simple AND, do the normal op.
+      return SelectCode(N);
+    else
+      Val = ~Val;
+  }
+
+  // If OR or AND is being fed by shl, srl and, sra don't do this change,
+  // because Hexagon provide |= &= on shl, srl, and sra.
+  // Traverse the DAG to see if there is shl, srl and sra.
+  if (Opc == ISD::OR || Opc == ISD::AND) {
+    switch (N->getOperand(0)->getOpcode()) {
+      default: break;
+      case ISD::SRA:
+      case ISD::SRL:
+      case ISD::SHL:
+        return SelectCode(N);
+    }
+  }
+
+  // Make sure it's power of 2.
+  unsigned bitpos = 0;
+  if (Opc != ISD::FABS && Opc != ISD::FNEG) {
+    if (((ValueVT == MVT::i32) && !isPowerOf2_32(Val)) ||
+        ((ValueVT == MVT::i64) && !isPowerOf2_64(Val)))
+      return SelectCode(N);
+
+    // Get the bit position.
+    bitpos = countTrailingZeros(uint64_t(Val));
+  } else {
+    // For fabs and fneg, it's always the 31st bit.
+    bitpos = 31;
+  }
+
+  unsigned BitOpc = 0;
+  // Set the right opcode for bitwise operations.
+  switch(Opc) {
+    default: llvm_unreachable("Only bit-wise/abs/neg operations are allowed.");
+    case ISD::AND:
+    case ISD::FABS:
+      BitOpc = Hexagon::S2_clrbit_i;
+      break;
+    case ISD::OR:
+      BitOpc = Hexagon::S2_setbit_i;
+      break;
+    case ISD::XOR:
+    case ISD::FNEG:
+      BitOpc = Hexagon::S2_togglebit_i;
+      break;
+  }
+
+  SDNode *Result;
+  // Get the right SDVal for the opcode.
+  SDValue SDVal = CurDAG->getTargetConstant(bitpos, dl, MVT::i32);
+
+  if (ValueVT == MVT::i32 || ValueVT == MVT::f32) {
+    Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT,
+                                    N->getOperand(0), SDVal);
+  } else {
+    // 64-bit gymnastic to use REG_SEQUENCE. But it's worth it.
+    EVT SubValueVT;
+    if (ValueVT == MVT::i64)
+      SubValueVT = MVT::i32;
+    else
+      SubValueVT = MVT::f32;
+
+    SDNode *Reg = N->getOperand(0).getNode();
+    SDValue RegClass = CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID,
+                                                 dl, MVT::i64);
+
+    SDValue SubregHiIdx = CurDAG->getTargetConstant(Hexagon::subreg_hireg, dl,
+                                                    MVT::i32);
+    SDValue SubregLoIdx = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl,
+                                                    MVT::i32);
+
+    SDValue SubregHI = CurDAG->getTargetExtractSubreg(Hexagon::subreg_hireg, dl,
+                                                    MVT::i32, SDValue(Reg, 0));
+
+    SDValue SubregLO = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, dl,
+                                                    MVT::i32, SDValue(Reg, 0));
+
+    // Clear/set/toggle hi or lo registers depending on the bit position.
+    if (SubValueVT != MVT::f32 && bitpos < 32) {
+      SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
+                                               SubregLO, SDVal);
+      const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx,
+                              SDValue(Result0, 0), SubregLoIdx };
+      Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+                                      dl, ValueVT, Ops);
+    } else {
+      if (Opc != ISD::FABS && Opc != ISD::FNEG)
+        SDVal = CurDAG->getTargetConstant(bitpos - 32, dl, MVT::i32);
+      SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
+                                               SubregHI, SDVal);
+      const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx,
+                              SubregLO, SubregLoIdx };
+      Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+                                      dl, ValueVT, Ops);
+    }
+  }
+
+  ReplaceUses(N, Result);
+  return Result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
+  MachineFrameInfo *MFI = MF->getFrameInfo();
+  const HexagonFrameLowering *HFI = HST->getFrameLowering();
+  int FX = cast<FrameIndexSDNode>(N)->getIndex();
+  unsigned StkA = HFI->getStackAlignment();
+  unsigned MaxA = MFI->getMaxAlignment();
+  SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32);
+  SDLoc DL(N);
+  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+  SDNode *R = 0;
+
+  // Use TFR_FI when:
+  // - the object is fixed, or
+  // - there are no objects with higher-than-default alignment, or
+  // - there are no dynamically allocated objects.
+  // Otherwise, use TFR_FIA.
+  if (FX < 0 || MaxA <= StkA || !MFI->hasVarSizedObjects()) {
+    R = CurDAG->getMachineNode(Hexagon::TFR_FI, DL, MVT::i32, FI, Zero);
+  } else {
+    auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
+    unsigned AR = HMFI.getStackAlignBaseVReg();
+    SDValue CH = CurDAG->getEntryNode();
+    SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
+    R = CurDAG->getMachineNode(Hexagon::TFR_FIA, DL, MVT::i32, Ops);
+  }
+
+  if (N->getHasDebugValue())
+    CurDAG->TransferDbgValues(SDValue(N, 0), SDValue(R, 0));
+  return R;
+}
+
 
 SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
   if (N->isMachineOpcode()) {
@@ -1350,7 +1259,6 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
     return nullptr;   // Already selected.
   }
 
-
   switch (N->getOpcode()) {
   case ISD::Constant:
     return SelectConstant(N);
@@ -1358,6 +1266,9 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
   case ISD::ConstantFP:
     return SelectConstantFP(N);
 
+  case ISD::FrameIndex:
+    return SelectFrameIndex(N);
+
   case ISD::ADD:
     return SelectAdd(N);
 
@@ -1370,18 +1281,22 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
   case ISD::STORE:
     return SelectStore(N);
 
-  case ISD::SELECT:
-    return SelectSelect(N);
-
-  case ISD::TRUNCATE:
-    return SelectTruncate(N);
-
   case ISD::MUL:
     return SelectMul(N);
 
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::FABS:
+  case ISD::FNEG:
+    return SelectBitOp(N);
+
   case ISD::ZERO_EXTEND:
     return SelectZeroExtend(N);
 
+  case ISD::INTRINSIC_W_CHAIN:
+    return SelectIntrinsicWChain(N);
+
   case ISD::INTRINSIC_WO_CHAIN:
     return SelectIntrinsicWOChain(N);
   }
@@ -1389,297 +1304,217 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
   return SelectCode(N);
 }
 
+bool HexagonDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+                             std::vector<SDValue> &OutOps) {
+  SDValue Inp = Op, Res;
 
-//
-// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way
-// to define these instructions.
-//
-bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base,
-                                       SDValue &Offset) {
-  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-      Addr.getOpcode() == ISD::TargetGlobalAddress)
-    return false;  // Direct calls.
-
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  switch (ConstraintID) {
+  default:
     return true;
+  case InlineAsm::Constraint_i:
+  case InlineAsm::Constraint_o: // Offsetable.
+  case InlineAsm::Constraint_v: // Not offsetable.
+  case InlineAsm::Constraint_m: // Memory.
+    if (SelectAddrFI(Inp, Res))
+      OutOps.push_back(Res);
+    else
+      OutOps.push_back(Inp);
+    break;
   }
-  Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
-  return true;
-}
-
-
-bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base,
-                                            SDValue &Offset) {
-  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-      Addr.getOpcode() == ISD::TargetGlobalAddress)
-    return false;  // Direct calls.
-
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return (IsS11_0_Offset(Offset.getNode()));
-  }
-  Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
-  return (IsS11_0_Offset(Offset.getNode()));
-}
-
-
-bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base,
-                                            SDValue &Offset) {
-  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-      Addr.getOpcode() == ISD::TargetGlobalAddress)
-    return false;  // Direct calls.
-
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return (IsS11_1_Offset(Offset.getNode()));
-  }
-  Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
-  return (IsS11_1_Offset(Offset.getNode()));
-}
-
-
-bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base,
-                                            SDValue &Offset) {
-  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-      Addr.getOpcode() == ISD::TargetGlobalAddress)
-    return false;  // Direct calls.
-
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return (IsS11_2_Offset(Offset.getNode()));
-  }
-  Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
-  return (IsS11_2_Offset(Offset.getNode()));
-}
-
 
-bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base,
-                                            SDValue &Offset) {
-  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-      Addr.getOpcode() == ISD::TargetGlobalAddress)
-    return false;  // Direct calls.
-
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return (IsU6_0_Offset(Offset.getNode()));
-  }
-  Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
-  return (IsU6_0_Offset(Offset.getNode()));
+  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
+  return false;
 }
 
+void HexagonDAGToDAGISel::PreprocessISelDAG() {
+  SelectionDAG &DAG = *CurDAG;
+  std::vector<SDNode*> Nodes;
+  for (auto I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I)
+    Nodes.push_back(I);
+
+  // Simplify: (or (select c x 0) z)  ->  (select c (or x z) z)
+  //           (or (select c 0 y) z)  ->  (select c z (or y z))
+  // This may not be the right thing for all targets, so do it here.
+  for (auto I: Nodes) {
+    if (I->getOpcode() != ISD::OR)
+      continue;
+
+    auto IsZero = [] (const SDValue &V) -> bool {
+      if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
+        return SC->isNullValue();
+      return false;
+    };
+    auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
+      if (Op.getOpcode() != ISD::SELECT)
+        return false;
+      return IsZero(Op.getOperand(1))  || IsZero(Op.getOperand(2));
+    };
 
-bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base,
-                                            SDValue &Offset) {
-  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-      Addr.getOpcode() == ISD::TargetGlobalAddress)
-    return false;  // Direct calls.
-
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return (IsU6_1_Offset(Offset.getNode()));
+    SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
+    EVT VT = I->getValueType(0);
+    bool SelN0 = IsSelect0(N0);
+    SDValue SOp = SelN0 ? N0 : N1;
+    SDValue VOp = SelN0 ? N1 : N0;
+
+    if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
+      SDValue SC = SOp.getOperand(0);
+      SDValue SX = SOp.getOperand(1);
+      SDValue SY = SOp.getOperand(2);
+      SDLoc DLS = SOp;
+      if (IsZero(SY)) {
+        SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
+        SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
+        DAG.ReplaceAllUsesWith(I, NewSel.getNode());
+      } else if (IsZero(SX)) {
+        SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
+        SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
+        DAG.ReplaceAllUsesWith(I, NewSel.getNode());
+      }
+    }
   }
-  Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
-  return (IsU6_1_Offset(Offset.getNode()));
 }
 
-
-bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base,
-                                            SDValue &Offset) {
-  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-      Addr.getOpcode() == ISD::TargetGlobalAddress)
-    return false;  // Direct calls.
-
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return (IsU6_2_Offset(Offset.getNode()));
-  }
-  Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
-  return (IsU6_2_Offset(Offset.getNode()));
+void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
+  auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
+  auto &HFI = *HST.getFrameLowering();
+  if (!HFI.needsAligna(*MF))
+    return;
+
+  MachineFrameInfo *MFI = MF->getFrameInfo();
+  MachineBasicBlock *EntryBB = MF->begin();
+  unsigned AR = FuncInfo->CreateReg(MVT::i32);
+  unsigned MaxA = MFI->getMaxAlignment();
+  auto &HII = *HST.getInstrInfo();
+  BuildMI(EntryBB, DebugLoc(), HII.get(Hexagon::ALIGNA), AR)
+      .addImm(MaxA);
+  MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
 }
 
-
-bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base,
-                                           SDValue &Offset) {
-
-  if (Addr.getOpcode() != ISD::ADD) {
-    return(SelectADDRriS11_2(Addr, Base, Offset));
-  }
-
-  return SelectADDRriS11_2(Addr, Base, Offset);
+// Match a frame index that can be used in an addressing mode.
+bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) {
+  if (N.getOpcode() != ISD::FrameIndex)
+    return false;
+  auto &HFI = *HST->getFrameLowering();
+  MachineFrameInfo *MFI = MF->getFrameInfo();
+  int FX = cast<FrameIndexSDNode>(N)->getIndex();
+  if (!MFI->isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
+    return false;
+  R = CurDAG->getTargetFrameIndex(FX, MVT::i32);
+  return true;
 }
 
-
-bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base,
-                                            SDValue &Offset) {
-  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-      Addr.getOpcode() == ISD::TargetGlobalAddress)
-    return false;  // Direct calls.
-
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return (IsS11_3_Offset(Offset.getNode()));
-  }
-  Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
-  return (IsS11_3_Offset(Offset.getNode()));
+inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) {
+  return SelectGlobalAddress(N, R, false);
 }
 
-bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1,
-                                       SDValue &R2) {
-  if (Addr.getOpcode() == ISD::FrameIndex) return false;
-  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-      Addr.getOpcode() == ISD::TargetGlobalAddress)
-    return false;  // Direct calls.
-
-  if (Addr.getOpcode() == ISD::ADD) {
-    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
-      if (isInt<13>(CN->getSExtValue()))
-        return false;  // Let the reg+imm pattern catch this!
-    R1 = Addr.getOperand(0);
-    R2 = Addr.getOperand(1);
-    return true;
-  }
-
-  R1 = Addr;
-
-  return true;
+inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) {
+  return SelectGlobalAddress(N, R, true);
 }
 
-
-// Handle generic address case. It is accessed from inlined asm =m constraints,
-// which could have any kind of pointer.
-bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr,
-                                          SDValue &Base, SDValue &Offset) {
-  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
-      Addr.getOpcode() == ISD::TargetGlobalAddress)
-    return false;  // Direct calls.
-
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return true;
+bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
+                                              bool UseGP) {
+  switch (N.getOpcode()) {
+  case ISD::ADD: {
+    SDValue N0 = N.getOperand(0);
+    SDValue N1 = N.getOperand(1);
+    unsigned GAOpc = N0.getOpcode();
+    if (UseGP && GAOpc != HexagonISD::CONST32_GP)
+      return false;
+    if (!UseGP && GAOpc != HexagonISD::CONST32)
+      return false;
+    if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
+      SDValue Addr = N0.getOperand(0);
+      if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
+        if (GA->getOpcode() == ISD::TargetGlobalAddress) {
+          uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
+          R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
+                                             N.getValueType(), NewOff);
+          return true;
+        }
+      }
+    }
+    break;
   }
-
-  if (Addr.getOpcode() == ISD::ADD) {
-    Base = Addr.getOperand(0);
-    Offset = Addr.getOperand(1);
-    return true;
+  case HexagonISD::CONST32:
+    // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
+    // want in the instruction.
+    if (!UseGP)
+      R = N.getOperand(0);
+    return !UseGP;
+  case HexagonISD::CONST32_GP:
+    if (UseGP)
+      R = N.getOperand(0);
+    return UseGP;
+  default:
+    return false;
   }
 
-  Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
-  return true;
+  return false;
 }
 
-
-bool HexagonDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
-                             std::vector<SDValue> &OutOps) {
-  SDValue Op0, Op1;
-
-  switch (ConstraintCode) {
-  case 'o':   // Offsetable.
-  case 'v':   // Not offsetable.
-  default: return true;
-  case 'm':   // Memory.
-    if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
+bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val,
+      unsigned FromBits, SDValue &Src) {
+  unsigned Opc = Val.getOpcode();
+  switch (Opc) {
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND: {
+    SDValue const &Op0 = Val.getOperand(0);
+    EVT T = Op0.getValueType();
+    if (T.isInteger() && T.getSizeInBits() == FromBits) {
+      Src = Op0;
       return true;
+    }
     break;
   }
-
-  OutOps.push_back(Op0);
-  OutOps.push_back(Op1);
-  return false;
-}
-
-bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const {
-  unsigned UseCount = 0;
-  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
-    UseCount++;
+  case ISD::SIGN_EXTEND_INREG:
+  case ISD::AssertSext:
+  case ISD::AssertZext:
+    if (Val.getOperand(0).getValueType().isInteger()) {
+      VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
+      if (T->getVT().getSizeInBits() == FromBits) {
+        Src = Val.getOperand(0);
+        return true;
+      }
+    }
+    break;
+  case ISD::AND: {
+    // Check if this is an AND with "FromBits" of lower bits set to 1.
+    uint64_t FromMask = (1 << FromBits) - 1;
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
+      if (C->getZExtValue() == FromMask) {
+        Src = Val.getOperand(1);
+        return true;
+      }
+    }
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
+      if (C->getZExtValue() == FromMask) {
+        Src = Val.getOperand(0);
+        return true;
+      }
+    }
+    break;
   }
-
-  return (UseCount <= 1);
-
-}
-
-//===--------------------------------------------------------------------===//
-// Return 'true' if use count of the global address is below threshold.
-//===--------------------------------------------------------------------===//
-bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const {
-  assert(N->getOpcode() == ISD::TargetGlobalAddress &&
-         "Expecting a target global address");
-
-  // Always try to fold the address.
-  if (TM.getOptLevel() == CodeGenOpt::Aggressive)
-    return true;
-
-  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
-  DenseMap<const GlobalValue *, unsigned>::const_iterator GI =
-    GlobalAddressUseCountMap.find(GA->getGlobal());
-
-  if (GI == GlobalAddressUseCountMap.end())
-    return false;
-
-  return GI->second <= MaxNumOfUsesForConstExtenders;
-}
-
-//===--------------------------------------------------------------------===//
-// Return true if the non-GP-relative global address can be folded.
-//===--------------------------------------------------------------------===//
-inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) {
-  return foldGlobalAddressImpl(N, R, false);
-}
-
-//===--------------------------------------------------------------------===//
-// Return true if the GP-relative global address can be folded.
-//===--------------------------------------------------------------------===//
-inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) {
-  return foldGlobalAddressImpl(N, R, true);
-}
-
-//===--------------------------------------------------------------------===//
-// Fold offset of the global address if number of uses are below threshold.
-//===--------------------------------------------------------------------===//
-bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R,
-                                                bool ShouldLookForGP) {
-  if (N.getOpcode() == ISD::ADD) {
-    SDValue N0 = N.getOperand(0);
-    SDValue N1 = N.getOperand(1);
-    if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) ||
-        (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) {
-      ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1);
-      GlobalAddressSDNode *GA =
-        dyn_cast<GlobalAddressSDNode>(N0.getOperand(0));
-
-      if (Const && GA &&
-          (GA->getOpcode() == ISD::TargetGlobalAddress)) {
-        if ((N0.getOpcode() == HexagonISD::CONST32) &&
-                !hasNumUsesBelowThresGA(GA))
-            return false;
-        R = CurDAG->getTargetGlobalAddress(GA->getGlobal(),
-                                          SDLoc(Const),
-                                          N.getValueType(),
-                                          GA->getOffset() +
-                                          (uint64_t)Const->getSExtValue());
+  case ISD::OR:
+  case ISD::XOR: {
+    // OR/XOR with the lower "FromBits" bits set to 0.
+    uint64_t FromMask = (1 << FromBits) - 1;
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
+      if ((C->getZExtValue() & FromMask) == 0) {
+        Src = Val.getOperand(1);
+        return true;
+      }
+    }
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
+      if ((C->getZExtValue() & FromMask) == 0) {
+        Src = Val.getOperand(0);
         return true;
       }
     }
   }
+  default:
+    break;
+  }
   return false;
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index ef5d6b9..ed5676c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -43,11 +43,48 @@ using namespace llvm;
 
 static cl::opt<bool>
 EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
-               cl::desc("Control jump table emission on Hexagon target"));
+  cl::desc("Control jump table emission on Hexagon target"));
+
+static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Enable Hexagon SDNode scheduling"));
+
+static cl::opt<bool> EnableFastMath("ffast-math",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Enable Fast Math processing"));
+
+static cl::opt<int> MinimumJumpTables("minimum-jump-tables",
+  cl::Hidden, cl::ZeroOrMore, cl::init(5),
+  cl::desc("Set minimum jump tables"));
+
+static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy",
+  cl::Hidden, cl::ZeroOrMore, cl::init(6),
+  cl::desc("Max #stores to inline memcpy"));
+
+static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
+  cl::Hidden, cl::ZeroOrMore, cl::init(4),
+  cl::desc("Max #stores to inline memcpy"));
+
+static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove",
+  cl::Hidden, cl::ZeroOrMore, cl::init(6),
+  cl::desc("Max #stores to inline memmove"));
+
+static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
+  cl::Hidden, cl::ZeroOrMore, cl::init(4),
+  cl::desc("Max #stores to inline memmove"));
+
+static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset",
+  cl::Hidden, cl::ZeroOrMore, cl::init(8),
+  cl::desc("Max #stores to inline memset"));
+
+static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
+  cl::Hidden, cl::ZeroOrMore, cl::init(4),
+  cl::desc("Max #stores to inline memset"));
+
 
 namespace {
 class HexagonCCState : public CCState {
-  int NumNamedVarArgParams;
+  unsigned NumNamedVarArgParams;
 
 public:
   HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
@@ -56,7 +93,7 @@ public:
       : CCState(CC, isVarArg, MF, locs, C),
         NumNamedVarArgParams(NumNamedVarArgParams) {}
 
-  int getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
+  unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
 };
 }
 
@@ -97,11 +134,7 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
             ISD::ArgFlagsTy ArgFlags, CCState &State) {
   HexagonCCState &HState = static_cast<HexagonCCState &>(State);
 
-  // NumNamedVarArgParams can not be zero for a VarArg function.
-  assert((HState.getNumNamedVarArgParams() > 0) &&
-         "NumNamedVarArgParams is not bigger than zero.");
-
-  if ((int)ValNo < HState.getNumNamedVarArgParams()) {
+  if (ValNo < HState.getNumNamedVarArgParams()) {
     // Deal with named arguments.
     return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
   }
@@ -111,9 +144,8 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
   if (ArgFlags.isByVal()) {
     // If pass-by-value, the size allocated on stack is decided
     // by ArgFlags.getByValSize(), not by the size of LocVT.
-    assert ((ArgFlags.getByValSize() > 8) &&
-            "ByValSize must be bigger than 8 bytes");
-    ofst = State.AllocateStack(ArgFlags.getByValSize(), 4);
+    ofst = State.AllocateStack(ArgFlags.getByValSize(),
+                               ArgFlags.getByValAlign());
     State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
     return false;
   }
@@ -148,9 +180,8 @@ CC_Hexagon (unsigned ValNo, MVT ValVT,
 
   if (ArgFlags.isByVal()) {
     // Passed on stack.
-    assert ((ArgFlags.getByValSize() > 8) &&
-            "ByValSize must be bigger than 8 bytes");
-    unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 4);
+    unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(),
+                                          ArgFlags.getByValAlign());
     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
     return false;
   }
@@ -164,6 +195,12 @@ CC_Hexagon (unsigned ValNo, MVT ValVT,
       LocInfo = CCValAssign::ZExt;
     else
       LocInfo = CCValAssign::AExt;
+  } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
+    LocVT = MVT::i32;
+    LocInfo = CCValAssign::BCvt;
+  } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
+    LocVT = MVT::i64;
+    LocInfo = CCValAssign::BCvt;
   }
 
   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
@@ -188,7 +225,7 @@ static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
     Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
     Hexagon::R5
   };
-  if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+  if (unsigned Reg = State.AllocateReg(RegList)) {
     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
     return false;
   }
@@ -213,7 +250,7 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
   static const MCPhysReg RegList2[] = {
     Hexagon::R1, Hexagon::R3
   };
-  if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) {
+  if (unsigned Reg = State.AllocateReg(RegList1, RegList2)) {
     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
     return false;
   }
@@ -239,6 +276,12 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
       LocInfo = CCValAssign::ZExt;
     else
       LocInfo = CCValAssign::AExt;
+  } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
+    LocVT = MVT::i32;
+    LocInfo = CCValAssign::BCvt;
+  } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
+    LocVT = MVT::i64;
+    LocInfo = CCValAssign::BCvt;
   }
 
   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
@@ -301,9 +344,10 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
                           SDLoc dl) {
 
-  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        /*isVolatile=*/false, /*AlwaysInline=*/false,
+                       /*isTailCall=*/false,
                        MachinePointerInfo(), MachinePointerInfo());
 }
 
@@ -351,8 +395,13 @@ HexagonTargetLowering::LowerReturn(SDValue Chain,
   return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
 }
 
+bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+  // If either no tail call or told not to tail call at all, don't.
+  if (!CI->isTailCall() || HTM.Options.DisableTailCalls)
+    return false;
 
-
+  return true;
+}
 
 /// LowerCallResult - Lower the result values of an ISD::CALL into the
 /// appropriate copies out of appropriate physical registers.  This assumes that
@@ -404,8 +453,10 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   bool &isTailCall                      = CLI.IsTailCall;
   CallingConv::ID CallConv              = CLI.CallConv;
   bool isVarArg                         = CLI.IsVarArg;
+  bool doesNotReturn                    = CLI.DoesNotReturn;
 
   bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+  MachineFunction &MF = DAG.getMachineFunction();
 
   // Check for varargs.
   int NumNamedVarArgParams = -1;
@@ -430,42 +481,39 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
                         *DAG.getContext(), NumNamedVarArgParams);
 
-  if (NumNamedVarArgParams > 0)
+  if (isVarArg)
     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
   else
     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
 
+  if (DAG.getTarget().Options.DisableTailCalls)
+    isTailCall = false;
 
-  if(isTailCall) {
-    bool StructAttrFlag =
-      DAG.getMachineFunction().getFunction()->hasStructRetAttr();
+  if (isTailCall) {
+    bool StructAttrFlag = MF.getFunction()->hasStructRetAttr();
     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
                                                    isVarArg, IsStructRet,
                                                    StructAttrFlag,
                                                    Outs, OutVals, Ins, DAG);
-    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i){
+    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
       CCValAssign &VA = ArgLocs[i];
       if (VA.isMemLoc()) {
         isTailCall = false;
         break;
       }
     }
-    if (isTailCall) {
-      DEBUG(dbgs () << "Eligible for Tail Call\n");
-    } else {
-      DEBUG(dbgs () <<
-            "Argument must be passed on stack. Not eligible for Tail Call\n");
-    }
+    DEBUG(dbgs() << (isTailCall ? "Eligible for Tail Call\n"
+                                : "Argument must be passed on stack. "
+                                  "Not eligible for Tail Call\n"));
   }
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
   SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
 
-  const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
-      DAG.getSubtarget().getRegisterInfo());
-  SDValue StackPtr =
-      DAG.getCopyFromReg(Chain, dl, QRI->getStackRegister(), getPointerTy());
+  auto &HRI = *Subtarget.getRegisterInfo();
+  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(),
+                                        getPointerTy());
 
   // Walk the register/memloc assignments, inserting copies/loads.
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -478,6 +526,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       default:
         // Loc info must be one of Full, SExt, ZExt, or AExt.
         llvm_unreachable("Unknown loc info!");
+      case CCValAssign::BCvt:
       case CCValAssign::Full:
         break;
       case CCValAssign::SExt:
@@ -493,41 +542,38 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
     if (VA.isMemLoc()) {
       unsigned LocMemOffset = VA.getLocMemOffset();
-      SDValue PtrOff = DAG.getConstant(LocMemOffset, StackPtr.getValueType());
-      PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
-
+      SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
+                                        StackPtr.getValueType());
+      MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
       if (Flags.isByVal()) {
         // The argument is a struct passed by value. According to LLVM, "Arg"
         // is is pointer.
-        MemOpChains.push_back(CreateCopyOfByValArgument(Arg, PtrOff, Chain,
+        MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
                                                         Flags, DAG, dl));
       } else {
-        // The argument is not passed by value. "Arg" is a buildin type. It is
-        // not a pointer.
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                           MachinePointerInfo(),false, false,
-                                           0));
+        MachinePointerInfo LocPI = MachinePointerInfo::getStack(LocMemOffset);
+        SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI, false,
+                                 false, 0);
+        MemOpChains.push_back(S);
       }
       continue;
     }
 
     // Arguments that can be passed on register must be kept at RegsToPass
     // vector.
-    if (VA.isRegLoc()) {
+    if (VA.isRegLoc())
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
-    }
   }
 
   // Transform all store nodes into one single node because all store
   // nodes are independent of each other.
-  if (!MemOpChains.empty()) {
+  if (!MemOpChains.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
-  }
 
-  if (!isTailCall)
-    Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
-                                                        getPointerTy(), true),
-                                 dl);
+  if (!isTailCall) {
+    SDValue C = DAG.getConstant(NumBytes, dl, getPointerTy(), true);
+    Chain = DAG.getCALLSEQ_START(Chain, C, dl);
+  }
 
   // Build a sequence of copy-to-reg nodes chained together with token
   // chain and flag operands which copy the outgoing args into registers.
@@ -540,10 +586,9 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                RegsToPass[i].second, InFlag);
       InFlag = Chain.getValue(1);
     }
-  }
-
-  // For tail calls lower the arguments to the 'real' stack slot.
-  if (isTailCall) {
+  } else {
+    // For tail calls lower the arguments to the 'real' stack slot.
+    //
     // Force all the incoming stack arguments to be loaded from the stack
     // before any new outgoing arguments are stored to the stack, because the
     // outgoing stack slots may alias the incoming argument stack slots, and
@@ -558,7 +603,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                RegsToPass[i].second, InFlag);
       InFlag = Chain.getValue(1);
     }
-    InFlag =SDValue();
+    InFlag = SDValue();
   }
 
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -567,8 +612,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   if (flag_aligned_memcpy) {
     const char *MemcpyName =
       "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
-    Callee =
-      DAG.getTargetExternalSymbol(MemcpyName, getPointerTy());
+    Callee = DAG.getTargetExternalSymbol(MemcpyName, getPointerTy());
     flag_aligned_memcpy = false;
   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
@@ -590,19 +634,21 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                   RegsToPass[i].second.getValueType()));
   }
 
-  if (InFlag.getNode()) {
+  if (InFlag.getNode())
     Ops.push_back(InFlag);
-  }
 
-  if (isTailCall)
+  if (isTailCall) {
+    MF.getFrameInfo()->setHasTailCall();
     return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
+  }
 
-  Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
+  int OpCode = doesNotReturn ? HexagonISD::CALLv3nr : HexagonISD::CALLv3;
+  Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
   InFlag = Chain.getValue(1);
 
   // Create the CALLSEQ_END node.
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(0, true), InFlag, dl);
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+                             DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
   InFlag = Chain.getValue(1);
 
   // Handle result values, copying them out of physregs into vregs that we
@@ -616,7 +662,7 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
                                    SDValue &Offset, bool &isInc,
                                    SelectionDAG &DAG) {
   if (Ptr->getOpcode() != ISD::ADD)
-  return false;
+    return false;
 
   if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
     isInc = (Ptr->getOpcode() == ISD::ADD);
@@ -688,8 +734,7 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
                                               SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   MachineFunction &MF = DAG.getMachineFunction();
-  HexagonMachineFunctionInfo *FuncInfo =
-    MF.getInfo<HexagonMachineFunctionInfo>();
+  auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
   switch (Node->getOpcode()) {
     case ISD::INLINEASM: {
       unsigned NumOps = Node->getNumOperands();
@@ -697,7 +742,7 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
         --NumOps;  // Ignore the flag operand.
 
       for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
-        if (FuncInfo->hasClobberLR())
+        if (FuncInfo.hasClobberLR())
           break;
         unsigned Flags =
           cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
@@ -720,11 +765,9 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
                 cast<RegisterSDNode>(Node->getOperand(i))->getReg();
 
               // Check it to be lr
-              const HexagonRegisterInfo *QRI =
-                  static_cast<const HexagonRegisterInfo *>(
-                      DAG.getSubtarget().getRegisterInfo());
+              const HexagonRegisterInfo *QRI = Subtarget.getRegisterInfo();
               if (Reg == QRI->getRARegister()) {
-                FuncInfo->setHasClobberLR(true);
+                FuncInfo.setHasClobberLR(true);
                 break;
               }
             }
@@ -765,10 +808,10 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
     BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
   }
 
-  SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl,
+  SDValue JumpTableBase = DAG.getNode(HexagonISD::JT, dl,
                                       getPointerTy(), TargetJT);
   SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
-                                   DAG.getConstant(2, MVT::i32));
+                                   DAG.getConstant(2, dl, MVT::i32));
   SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase,
                                   ShiftIndex);
   SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress,
@@ -783,44 +826,27 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                                SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   SDValue Size = Op.getOperand(1);
+  SDValue Align = Op.getOperand(2);
   SDLoc dl(Op);
 
-  unsigned SPReg = getStackPointerRegisterToSaveRestore();
+  ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
+  assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
 
-  // Get a reference to the stack pointer.
-  SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+  unsigned A = AlignConst->getSExtValue();
+  auto &HFI = *Subtarget.getFrameLowering();
+  // "Zero" means natural stack alignment.
+  if (A == 0)
+    A = HFI.getStackAlignment();
 
-  // Subtract the dynamic size from the actual stack size to
-  // obtain the new stack size.
-  SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+  DEBUG({
+    dbgs () << LLVM_FUNCTION_NAME << " Align: " << A << " Size: ";
+    Size.getNode()->dump(&DAG);
+    dbgs() << "\n";
+  });
 
-  //
-  // For Hexagon, the outgoing memory arguments area should be on top of the
-  // alloca area on the stack i.e., the outgoing memory arguments should be
-  // at a lower address than the alloca area. Move the alloca area down the
-  // stack by adding back the space reserved for outgoing arguments to SP
-  // here.
-  //
-  // We do not know what the size of the outgoing args is at this point.
-  // So, we add a pseudo instruction ADJDYNALLOC that will adjust the
-  // stack pointer. We patch this instruction with the correct, known
-  // offset in emitPrologue().
-  //
-  // Use a placeholder immediate (zero) for now. This will be patched up
-  // by emitPrologue().
-  SDValue ArgAdjust = DAG.getNode(HexagonISD::ADJDYNALLOC, dl,
-                                  MVT::i32,
-                                  Sub,
-                                  DAG.getConstant(0, MVT::i32));
-
-  // The Sub result contains the new stack start address, so it
-  // must be placed in the stack pointer register.
-  const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
-      DAG.getSubtarget().getRegisterInfo());
-  SDValue CopyChain = DAG.getCopyToReg(Chain, dl, QRI->getStackRegister(), Sub);
-
-  SDValue Ops[2] = { ArgAdjust, CopyChain };
-  return DAG.getMergeValues(Ops, dl);
+  SDValue AC = DAG.getConstant(A, dl, MVT::i32);
+  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+  return DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
 }
 
 SDValue
@@ -836,9 +862,7 @@ const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  HexagonMachineFunctionInfo *FuncInfo =
-    MF.getInfo<HexagonMachineFunctionInfo>();
-
+  auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -875,7 +899,7 @@ const {
           RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass);
         RegInfo.addLiveIn(VA.getLocReg(), VReg);
         InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
-      } else if (RegVT == MVT::i64) {
+      } else if (RegVT == MVT::i64 || RegVT == MVT::f64) {
         unsigned VReg =
           RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
         RegInfo.addLiveIn(VA.getLocReg(), VReg);
@@ -927,7 +951,7 @@ const {
                                             HEXAGON_LRFP_SIZE +
                                             CCInfo.getNextStackOffset(),
                                             true);
-    FuncInfo->setVarArgsFrameIndex(FrameIndex);
+    FuncInfo.setVarArgsFrameIndex(FrameIndex);
   }
 
   return Chain;
@@ -946,6 +970,192 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
                       false, 0);
 }
 
+// Creates a SPLAT instruction for a constant value VAL.
+static SDValue createSplat(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue Val) {
+  if (VT.getSimpleVT() == MVT::v4i8)
+    return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val);
+
+  if (VT.getSimpleVT() == MVT::v4i16)
+    return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val);
+
+  return SDValue();
+}
+
+static bool isSExtFree(SDValue N) {
+  // A sign-extend of a truncate of a sign-extend is free.
+  if (N.getOpcode() == ISD::TRUNCATE &&
+      N.getOperand(0).getOpcode() == ISD::AssertSext)
+    return true;
+  // We have sign-extended loads.
+  if (N.getOpcode() == ISD::LOAD)
+    return true;
+  return false;
+}
+
+SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  SDValue InpVal = Op.getOperand(0);
+  if (isa<ConstantSDNode>(InpVal)) {
+    uint64_t V = cast<ConstantSDNode>(InpVal)->getZExtValue();
+    return DAG.getTargetConstant(countPopulation(V), dl, MVT::i64);
+  }
+  SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal);
+  return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut);
+}
+
+SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue Cmp = Op.getOperand(2);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
+
+  EVT VT = Op.getValueType();
+  EVT LHSVT = LHS.getValueType();
+  EVT RHSVT = RHS.getValueType();
+
+  if (LHSVT == MVT::v2i16) {
+    assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC));
+    unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND
+                                                : ISD::ZERO_EXTEND;
+    SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS);
+    SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS);
+    SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp);
+    return SC;
+  }
+
+  // Treat all other vector types as legal.
+  if (VT.isVector())
+    return Op;
+
+  // Equals and not equals should use sign-extend, not zero-extend, since
+  // we can represent small negative values in the compare instructions.
+  // The LLVM default is to use zero-extend arbitrarily in these cases.
+  if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+      (RHSVT == MVT::i8 || RHSVT == MVT::i16) &&
+      (LHSVT == MVT::i8 || LHSVT == MVT::i16)) {
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
+    if (C && C->getAPIntValue().isNegative()) {
+      LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
+      RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
+      return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
+                         LHS, RHS, Op.getOperand(2));
+    }
+    if (isSExtFree(LHS) || isSExtFree(RHS)) {
+      LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
+      RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
+      return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
+                         LHS, RHS, Op.getOperand(2));
+    }
+  }
+  return SDValue();
+}
+
+SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG)
+      const {
+  SDValue PredOp = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
+  EVT OpVT = Op1.getValueType();
+  SDLoc DL(Op);
+
+  if (OpVT == MVT::v2i16) {
+    SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1);
+    SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2);
+    SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2);
+    SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL);
+    return TR;
+  }
+
+  return SDValue();
+}
+
+// Handle only specific vector loads.
+SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  SDLoc DL(Op);
+  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+  SDValue Chain = LoadNode->getChain();
+  SDValue Ptr = Op.getOperand(1);
+  SDValue LoweredLoad;
+  SDValue Result;
+  SDValue Base = LoadNode->getBasePtr();
+  ISD::LoadExtType Ext = LoadNode->getExtensionType();
+  unsigned Alignment = LoadNode->getAlignment();
+  SDValue LoadChain;
+
+  if(Ext == ISD::NON_EXTLOAD)
+    Ext = ISD::ZEXTLOAD;
+
+  if (VT == MVT::v4i16) {
+    if (Alignment == 2) {
+      SDValue Loads[4];
+      // Base load.
+      Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base,
+                                LoadNode->getPointerInfo(), MVT::i16,
+                                LoadNode->isVolatile(),
+                                LoadNode->isNonTemporal(),
+                                LoadNode->isInvariant(),
+                                Alignment);
+      // Base+2 load.
+      SDValue Increment = DAG.getConstant(2, DL, MVT::i32);
+      Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+      Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+                                LoadNode->getPointerInfo(), MVT::i16,
+                                LoadNode->isVolatile(),
+                                LoadNode->isNonTemporal(),
+                                LoadNode->isInvariant(),
+                                Alignment);
+      // SHL 16, then OR base and base+2.
+      SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32);
+      SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount);
+      SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]);
+      // Base + 4.
+      Increment = DAG.getConstant(4, DL, MVT::i32);
+      Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+      Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+                                LoadNode->getPointerInfo(), MVT::i16,
+                                LoadNode->isVolatile(),
+                                LoadNode->isNonTemporal(),
+                                LoadNode->isInvariant(),
+                                Alignment);
+      // Base + 6.
+      Increment = DAG.getConstant(6, DL, MVT::i32);
+      Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+      Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+                                LoadNode->getPointerInfo(), MVT::i16,
+                                LoadNode->isVolatile(),
+                                LoadNode->isNonTemporal(),
+                                LoadNode->isInvariant(),
+                                Alignment);
+      // SHL 16, then OR base+4 and base+6.
+      Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount);
+      SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]);
+      // Combine to i64. This could be optimised out later if we can
+      // affect reg allocation of this code.
+      Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2);
+      LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                              Loads[0].getValue(1), Loads[1].getValue(1),
+                              Loads[2].getValue(1), Loads[3].getValue(1));
+    } else {
+      // Perform default type expansion.
+      Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(),
+                           LoadNode->isVolatile(), LoadNode->isNonTemporal(),
+                          LoadNode->isInvariant(), LoadNode->getAlignment());
+      LoadChain = Result.getValue(1);
+    }
+  } else
+    llvm_unreachable("Custom lowering unsupported load");
+
+  Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
+  // Since we pretend to lower a load, we need the original chain
+  // info attached to the result.
+  SDValue Ops[] = { Result, LoadChain };
+
+  return DAG.getMergeValues(Ops, DL);
+}
+
+
 SDValue
 HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   EVT ValTy = Op.getValueType();
@@ -958,15 +1168,15 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   else
     Res = DAG.getTargetConstantPool(CP->getConstVal(), ValTy,
                                     CP->getAlignment());
-  return DAG.getNode(HexagonISD::CONST32, dl, ValTy, Res);
+  return DAG.getNode(HexagonISD::CP, dl, ValTy, Res);
 }
 
 SDValue
 HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
-  const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+  const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MFI->setReturnAddressIsTaken(true);
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MFI.setReturnAddressIsTaken(true);
 
   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
     return SDValue();
@@ -976,29 +1186,28 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   if (Depth) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
-    SDValue Offset = DAG.getConstant(4, MVT::i32);
+    SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
                        MachinePointerInfo(), false, false, false, 0);
   }
 
   // Return LR, which contains the return address. Mark it an implicit live-in.
-  unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32));
+  unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
 }
 
 SDValue
 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
-  const HexagonRegisterInfo *TRI = static_cast<const HexagonRegisterInfo *>(
-      DAG.getSubtarget().getRegisterInfo());
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI->setFrameAddressIsTaken(true);
+  const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
+  MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();
+  MFI.setFrameAddressIsTaken(true);
 
   EVT VT = Op.getValueType();
   SDLoc dl(Op);
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
-                                         TRI->getFrameRegister(), VT);
+                                         HRI.getFrameRegister(), VT);
   while (Depth--)
     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
                             MachinePointerInfo(),
@@ -1021,15 +1230,29 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
   SDLoc dl(Op);
   Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
 
-  const HexagonTargetObjectFile &TLOF =
-      static_cast<const HexagonTargetObjectFile &>(getObjFileLowering());
-  if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+  const HexagonTargetObjectFile *TLOF =
+      static_cast<const HexagonTargetObjectFile *>(
+          getTargetMachine().getObjFileLowering());
+  if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine())) {
     return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result);
   }
 
   return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
 }
 
+// Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
+void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) {
+  if (VT != PromotedLdStVT) {
+    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+    AddPromotedToType(ISD::LOAD, VT.getSimpleVT(),
+                      PromotedLdStVT.getSimpleVT());
+
+    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+    AddPromotedToType(ISD::STORE, VT.getSimpleVT(),
+                      PromotedLdStVT.getSimpleVT());
+  }
+}
+
 SDValue
 HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
@@ -1042,465 +1265,971 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
 // TargetLowering Implementation
 //===----------------------------------------------------------------------===//
 
-HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine)
-    : TargetLowering(targetmachine),
-      TM(targetmachine) {
+HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
+                                             const HexagonSubtarget &STI)
+    : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
+      Subtarget(STI) {
+  bool IsV4 = !Subtarget.hasV5TOps();
+  auto &HRI = *Subtarget.getRegisterInfo();
+
+  setPrefLoopAlignment(4);
+  setPrefFunctionAlignment(4);
+  setMinFunctionAlignment(2);
+  setInsertFencesForAtomic(false);
+  setExceptionPointerRegister(Hexagon::R0);
+  setExceptionSelectorRegister(Hexagon::R1);
+  setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
+
+  if (EnableHexSDNodeSched)
+    setSchedulingPreference(Sched::VLIW);
+  else
+    setSchedulingPreference(Sched::Source);
+
+  // Limits for inline expansion of memcpy/memmove
+  MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
+  MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
+  MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
+  MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
+  MaxStoresPerMemset = MaxStoresPerMemsetCL;
+  MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
 
-  const HexagonSubtarget &Subtarget = TM.getSubtarget<HexagonSubtarget>();
+  //
+  // Set up register classes.
+  //
 
-  // Set up the register classes.
-  addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
-  addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
+  addRegisterClass(MVT::i1,    &Hexagon::PredRegsRegClass);
+  addRegisterClass(MVT::v2i1,  &Hexagon::PredRegsRegClass);  // bbbbaaaa
+  addRegisterClass(MVT::v4i1,  &Hexagon::PredRegsRegClass);  // ddccbbaa
+  addRegisterClass(MVT::v8i1,  &Hexagon::PredRegsRegClass);  // hgfedcba
+  addRegisterClass(MVT::i32,   &Hexagon::IntRegsRegClass);
+  addRegisterClass(MVT::v4i8,  &Hexagon::IntRegsRegClass);
+  addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
+  addRegisterClass(MVT::i64,   &Hexagon::DoubleRegsRegClass);
+  addRegisterClass(MVT::v8i8,  &Hexagon::DoubleRegsRegClass);
+  addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
+  addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
 
   if (Subtarget.hasV5TOps()) {
     addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
     addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
   }
 
-  addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
+  //
+  // Handling of scalar operations.
+  //
+  // All operations default to "legal", except:
+  // - indexed loads and stores (pre-/post-incremented),
+  // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
+  //   ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
+  //   FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
+  //   FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
+  // which default to "expand" for at least one type.
+
+  // Misc operations.
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // Default: expand
+  setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
+
+  setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+  setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
+  setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+  // Custom legalize GlobalAddress nodes into CONST32.
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i8,  Custom);
+  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
 
-  computeRegisterProperties();
+  // Hexagon needs to optimize cases with negative constants.
+  setOperationAction(ISD::SETCC, MVT::i8,  Custom);
+  setOperationAction(ISD::SETCC, MVT::i16, Custom);
 
-  // Align loop entry
-  setPrefLoopAlignment(4);
+  // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  setOperationAction(ISD::VAEND,   MVT::Other, Expand);
+  setOperationAction(ISD::VAARG,   MVT::Other, Expand);
 
-  // Limits for inline expansion of memcpy/memmove
-  MaxStoresPerMemcpy = 6;
-  MaxStoresPerMemmove = 6;
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
 
-  //
-  // Library calls for unsupported operations
-  //
+  if (EmitJumpTables)
+    setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+  else
+    setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  // Increase jump tables cutover to 5, was 4.
+  setMinimumJumpTableEntries(MinimumJumpTables);
+
+  // Hexagon has instructions for add/sub with carry. The problem with
+  // modeling these instructions is that they produce 2 results: Rdd and Px.
+  // To model the update of Px, we will have to use Defs[p0..p3] which will
+  // cause any predicate live range to spill. So, we pretend we dont't have
+  // these instructions.
+  setOperationAction(ISD::ADDE, MVT::i8,  Expand);
+  setOperationAction(ISD::ADDE, MVT::i16, Expand);
+  setOperationAction(ISD::ADDE, MVT::i32, Expand);
+  setOperationAction(ISD::ADDE, MVT::i64, Expand);
+  setOperationAction(ISD::SUBE, MVT::i8,  Expand);
+  setOperationAction(ISD::SUBE, MVT::i16, Expand);
+  setOperationAction(ISD::SUBE, MVT::i32, Expand);
+  setOperationAction(ISD::SUBE, MVT::i64, Expand);
+  setOperationAction(ISD::ADDC, MVT::i8,  Expand);
+  setOperationAction(ISD::ADDC, MVT::i16, Expand);
+  setOperationAction(ISD::ADDC, MVT::i32, Expand);
+  setOperationAction(ISD::ADDC, MVT::i64, Expand);
+  setOperationAction(ISD::SUBC, MVT::i8,  Expand);
+  setOperationAction(ISD::SUBC, MVT::i16, Expand);
+  setOperationAction(ISD::SUBC, MVT::i32, Expand);
+  setOperationAction(ISD::SUBC, MVT::i64, Expand);
 
-  setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
-  setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+  // Only add and sub that detect overflow are the saturating ones.
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setOperationAction(ISD::UADDO, VT, Expand);
+    setOperationAction(ISD::SADDO, VT, Expand);
+    setOperationAction(ISD::USUBO, VT, Expand);
+    setOperationAction(ISD::SSUBO, VT, Expand);
+  }
 
-  setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
-  setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+  setOperationAction(ISD::CTLZ, MVT::i8,  Promote);
+  setOperationAction(ISD::CTLZ, MVT::i16, Promote);
+  setOperationAction(ISD::CTTZ, MVT::i8,  Promote);
+  setOperationAction(ISD::CTTZ, MVT::i16, Promote);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8,  Promote);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8,  Promote);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote);
+
+  // In V5, popcount can count # of 1s in i64 but returns i32.
+  // On V4 it will be expanded (set later).
+  setOperationAction(ISD::CTPOP, MVT::i8,  Promote);
+  setOperationAction(ISD::CTPOP, MVT::i16, Promote);
+  setOperationAction(ISD::CTPOP, MVT::i32, Promote);
+  setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+
+  // We custom lower i64 to i64 mul, so that it is not considered as a legal
+  // operation. There is a pattern that will match i64 mul and transform it
+  // to a series of instructions.
+  setOperationAction(ISD::MUL,   MVT::i64, Expand);
+  setOperationAction(ISD::MULHS, MVT::i64, Expand);
+
+  for (unsigned IntExpOp :
+       {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM,
+        ISD::ROTL, ISD::ROTR, ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS,
+        ISD::SRL_PARTS, ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
+    setOperationAction(IntExpOp, MVT::i32, Expand);
+    setOperationAction(IntExpOp, MVT::i64, Expand);
+  }
 
-  setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
-  setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
+  for (unsigned FPExpOp :
+       {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
+        ISD::FPOW, ISD::FCOPYSIGN}) {
+    setOperationAction(FPExpOp, MVT::f32, Expand);
+    setOperationAction(FPExpOp, MVT::f64, Expand);
+  }
 
-  setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
-  setOperationAction(ISD::SDIV, MVT::i32, Expand);
-  setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3");
-  setOperationAction(ISD::SREM, MVT::i32, Expand);
+  // No extending loads from i32.
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+    setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i32, Expand);
+  }
+  // Turn FP truncstore into trunc + store.
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  // Turn FP extload into load/fextend.
+  for (MVT VT : MVT::fp_valuetypes())
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
 
-  setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
-  setOperationAction(ISD::SDIV, MVT::i64, Expand);
-  setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
-  setOperationAction(ISD::SREM, MVT::i64, Expand);
+  // Expand BR_CC and SELECT_CC for all integer and fp types.
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setOperationAction(ISD::BR_CC,     VT, Expand);
+    setOperationAction(ISD::SELECT_CC, VT, Expand);
+  }
+  for (MVT VT : MVT::fp_valuetypes()) {
+    setOperationAction(ISD::BR_CC,     VT, Expand);
+    setOperationAction(ISD::SELECT_CC, VT, Expand);
+  }
+  setOperationAction(ISD::BR_CC, MVT::Other, Expand);
 
-  setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
-  setOperationAction(ISD::UDIV, MVT::i32, Expand);
+  //
+  // Handling of vector operations.
+  //
 
-  setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
-  setOperationAction(ISD::UDIV, MVT::i64, Expand);
+  // Custom lower v4i16 load only. Let v4i16 store to be
+  // promoted for now.
+  promoteLdStType(MVT::v4i8,  MVT::i32);
+  promoteLdStType(MVT::v2i16, MVT::i32);
+  promoteLdStType(MVT::v8i8,  MVT::i64);
+  promoteLdStType(MVT::v2i32, MVT::i64);
+
+  setOperationAction(ISD::LOAD,  MVT::v4i16, Custom);
+  setOperationAction(ISD::STORE, MVT::v4i16, Promote);
+  AddPromotedToType(ISD::LOAD,  MVT::v4i16, MVT::i64);
+  AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64);
+
+  // Set the action for vector operations to "expand", then override it with
+  // either "custom" or "legal" for specific cases.
+  static unsigned VectExpOps[] = {
+    // Integer arithmetic:
+    ISD::ADD,     ISD::SUB,     ISD::MUL,     ISD::SDIV,    ISD::UDIV,
+    ISD::SREM,    ISD::UREM,    ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC,
+    ISD::SUBC,    ISD::SADDO,   ISD::UADDO,   ISD::SSUBO,   ISD::USUBO,
+    ISD::SMUL_LOHI,             ISD::UMUL_LOHI,
+    // Logical/bit:
+    ISD::AND,     ISD::OR,      ISD::XOR,     ISD::ROTL,    ISD::ROTR,
+    ISD::CTPOP,   ISD::CTLZ,    ISD::CTTZ,    ISD::CTLZ_ZERO_UNDEF,
+    ISD::CTTZ_ZERO_UNDEF,
+    // Floating point arithmetic/math functions:
+    ISD::FADD,    ISD::FSUB,    ISD::FMUL,    ISD::FMA,     ISD::FDIV,
+    ISD::FREM,    ISD::FNEG,    ISD::FABS,    ISD::FSQRT,   ISD::FSIN,
+    ISD::FCOS,    ISD::FPOWI,   ISD::FPOW,    ISD::FLOG,    ISD::FLOG2,
+    ISD::FLOG10,  ISD::FEXP,    ISD::FEXP2,   ISD::FCEIL,   ISD::FTRUNC,
+    ISD::FRINT,   ISD::FNEARBYINT,            ISD::FROUND,  ISD::FFLOOR,
+    ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
+    // Misc:
+    ISD::SELECT,  ISD::ConstantPool,
+    // Vector:
+    ISD::BUILD_VECTOR,          ISD::SCALAR_TO_VECTOR,
+    ISD::EXTRACT_VECTOR_ELT,    ISD::INSERT_VECTOR_ELT,
+    ISD::EXTRACT_SUBVECTOR,     ISD::INSERT_SUBVECTOR,
+    ISD::CONCAT_VECTORS,        ISD::VECTOR_SHUFFLE
+  };
 
-  setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
-  setOperationAction(ISD::UREM, MVT::i32, Expand);
+  for (MVT VT : MVT::vector_valuetypes()) {
+    for (unsigned VectExpOp : VectExpOps)
+      setOperationAction(VectExpOp, VT, Expand);
 
-  setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
-  setOperationAction(ISD::UREM, MVT::i64, Expand);
+    // Expand all extended loads and truncating stores:
+    for (MVT TargetVT : MVT::vector_valuetypes()) {
+      setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
+      setTruncStoreAction(VT, TargetVT, Expand);
+    }
 
-  setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
-  setOperationAction(ISD::FDIV, MVT::f32, Expand);
+    setOperationAction(ISD::SRA, VT, Custom);
+    setOperationAction(ISD::SHL, VT, Custom);
+    setOperationAction(ISD::SRL, VT, Custom);
+  }
 
-  setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
-  setOperationAction(ISD::FDIV, MVT::f64, Expand);
+  // Types natively supported:
+  for (MVT NativeVT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v32i1, MVT::v64i1,
+                       MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v1i32,
+                       MVT::v2i32, MVT::v1i64}) {
+    setOperationAction(ISD::BUILD_VECTOR,       NativeVT, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  NativeVT, Custom);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR,  NativeVT, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR,   NativeVT, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     NativeVT, Custom);
+
+    setOperationAction(ISD::ADD, NativeVT, Legal);
+    setOperationAction(ISD::SUB, NativeVT, Legal);
+    setOperationAction(ISD::MUL, NativeVT, Legal);
+    setOperationAction(ISD::AND, NativeVT, Legal);
+    setOperationAction(ISD::OR,  NativeVT, Legal);
+    setOperationAction(ISD::XOR, NativeVT, Legal);
+  }
 
-  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
-  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
-  setOperationAction(ISD::FSIN, MVT::f32, Expand);
-  setOperationAction(ISD::FSIN, MVT::f64, Expand);
+  setOperationAction(ISD::SETCC,          MVT::v2i16, Custom);
+  setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
+  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
 
+  // Subtarget-specific operation actions.
+  //
   if (Subtarget.hasV5TOps()) {
-    // Hexagon V5 Support.
-    setOperationAction(ISD::FADD, MVT::f32, Legal);
-    setOperationAction(ISD::FADD, MVT::f64, Legal);
-    setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
-    setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal);
-    setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal);
-    setCondCodeAction(ISD::SETUEQ, MVT::f32, Legal);
-    setCondCodeAction(ISD::SETUEQ, MVT::f64, Legal);
-
-    setCondCodeAction(ISD::SETOGE, MVT::f32, Legal);
-    setCondCodeAction(ISD::SETOGE, MVT::f64, Legal);
-    setCondCodeAction(ISD::SETUGE, MVT::f32, Legal);
-    setCondCodeAction(ISD::SETUGE, MVT::f64, Legal);
-
-    setCondCodeAction(ISD::SETOGT, MVT::f32, Legal);
-    setCondCodeAction(ISD::SETOGT, MVT::f64, Legal);
-    setCondCodeAction(ISD::SETUGT, MVT::f32, Legal);
-    setCondCodeAction(ISD::SETUGT, MVT::f64, Legal);
-
-    setCondCodeAction(ISD::SETOLE, MVT::f32, Legal);
-    setCondCodeAction(ISD::SETOLE, MVT::f64, Legal);
-    setCondCodeAction(ISD::SETOLT, MVT::f32, Legal);
-    setCondCodeAction(ISD::SETOLT, MVT::f64, Legal);
-
-    setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
-    setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
-
-    setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
-    setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
-    setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
-
-    setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
-    setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
-    setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+    setOperationAction(ISD::FMA,  MVT::f64, Expand);
+    setOperationAction(ISD::FADD, MVT::f64, Expand);
+    setOperationAction(ISD::FSUB, MVT::f64, Expand);
+    setOperationAction(ISD::FMUL, MVT::f64, Expand);
 
+    setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
     setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
     setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
     setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
     setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 
-    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
-    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
-    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+  } else { // V4
+    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+    setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+    setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
+    setOperationAction(ISD::FP_EXTEND,  MVT::f32, Expand);
+    setOperationAction(ISD::FP_ROUND,   MVT::f64, Expand);
+    setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
 
-    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
-    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
-    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
+    setOperationAction(ISD::CTPOP, MVT::i8,  Expand);
+    setOperationAction(ISD::CTPOP, MVT::i16, Expand);
+    setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+    setOperationAction(ISD::CTPOP, MVT::i64, Expand);
 
-    setOperationAction(ISD::FABS, MVT::f32, Legal);
-    setOperationAction(ISD::FABS, MVT::f64, Expand);
+    // Expand these operations for both f32 and f64:
+    for (unsigned FPExpOpV4 :
+         {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) {
+      setOperationAction(FPExpOpV4, MVT::f32, Expand);
+      setOperationAction(FPExpOpV4, MVT::f64, Expand);
+    }
 
-    setOperationAction(ISD::FNEG, MVT::f32, Legal);
-    setOperationAction(ISD::FNEG, MVT::f64, Expand);
-  } else {
+    for (ISD::CondCode FPExpCCV4 :
+         {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
+          ISD::SETUO, ISD::SETO}) {
+      setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
+      setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
+    }
+  }
 
-    // Expand fp<->uint.
-    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand);
-    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+  // Handling of indexed loads/stores: default is "expand".
+  //
+  for (MVT LSXTy : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
+    setIndexedLoadAction(ISD::POST_INC, LSXTy, Legal);
+    setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal);
+  }
 
-    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+  computeRegisterProperties(&HRI);
 
-    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
-    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+  //
+  // Library calls for unsupported operations
+  //
+  bool FastMath  = EnableFastMath;
 
-    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
-    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+  setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
+  setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
+  setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
+  setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
+  setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
+  setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
+  setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
+  setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
 
-    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
-    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
+  setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
+  setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+  setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
+  setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+  setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
+  setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
 
-    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
-    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+  if (IsV4) {
+    // Handle single-precision floating point operations on V4.
+    if (FastMath) {
+      setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3");
+      setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3");
+      setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3");
+      setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2");
+      setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2");
+      // Double-precision compares.
+      setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2");
+      setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2");
+    } else {
+      setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+      setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
+      setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
+      setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
+      setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
+      // Double-precision compares.
+      setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+      setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
+    }
+  }
 
+  // This is the only fast library function for sqrtd.
+  if (FastMath)
+    setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
+
+  // Prefix is: nothing  for "slow-math",
+  //            "fast2_" for V4 fast-math and V5+ fast-math double-precision
+  // (actually, keep fast-math and fast-math2 separate for now)
+  if (FastMath) {
+    setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
+    setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
+    setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
+    setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
+    // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok).
+    setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
+  } else {
+    setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
+    setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
+    setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
+    setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
+    setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
+  }
+
+  if (Subtarget.hasV5TOps()) {
+    if (FastMath)
+      setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
+    else
+      setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
+  } else {
+    // V4
+    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
+    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
+    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
+    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
+    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
     setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
     setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
-
-    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
-    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
-
     setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
     setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
-
-    setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
-    setOperationAction(ISD::FADD, MVT::f64, Expand);
-
-    setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
-    setOperationAction(ISD::FADD, MVT::f32, Expand);
-
-    setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
-    setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
-
+    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
+    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
+    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
+    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
+    setLibcallName(RTLIB::FPEXT_F32_F64,    "__hexagon_extendsfdf2");
+    setLibcallName(RTLIB::FPROUND_F64_F32,  "__hexagon_truncdfsf2");
     setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
-    setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
-
     setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
-    setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
-
     setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
-    setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
-
     setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
-    setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
-
-    setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
-    setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
-
-    setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
-    setCondCodeAction(ISD::SETOGT, MVT::f64, Expand);
+    setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
+    setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
+    setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+    setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
+    setLibcallName(RTLIB::UO_F32,  "__hexagon_unordsf2");
+    setLibcallName(RTLIB::UO_F64,  "__hexagon_unorddf2");
+    setLibcallName(RTLIB::O_F32,   "__hexagon_unordsf2");
+    setLibcallName(RTLIB::O_F64,   "__hexagon_unorddf2");
+  }
 
-    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
-    setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+  // These cause problems when the shift amount is non-constant.
+  setLibcallName(RTLIB::SHL_I128, nullptr);
+  setLibcallName(RTLIB::SRL_I128, nullptr);
+  setLibcallName(RTLIB::SRA_I128, nullptr);
+}
 
-    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
-    setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
 
-    setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
-    setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch ((HexagonISD::NodeType)Opcode) {
+  case HexagonISD::ALLOCA:        return "HexagonISD::ALLOCA";
+  case HexagonISD::ARGEXTEND:     return "HexagonISD::ARGEXTEND";
+  case HexagonISD::AT_GOT:        return "HexagonISD::AT_GOT";
+  case HexagonISD::AT_PCREL:      return "HexagonISD::AT_PCREL";
+  case HexagonISD::BARRIER:       return "HexagonISD::BARRIER";
+  case HexagonISD::BR_JT:         return "HexagonISD::BR_JT";
+  case HexagonISD::CALLR:         return "HexagonISD::CALLR";
+  case HexagonISD::CALLv3nr:      return "HexagonISD::CALLv3nr";
+  case HexagonISD::CALLv3:        return "HexagonISD::CALLv3";
+  case HexagonISD::COMBINE:       return "HexagonISD::COMBINE";
+  case HexagonISD::CONST32_GP:    return "HexagonISD::CONST32_GP";
+  case HexagonISD::CONST32:       return "HexagonISD::CONST32";
+  case HexagonISD::CP:            return "HexagonISD::CP";
+  case HexagonISD::DCFETCH:       return "HexagonISD::DCFETCH";
+  case HexagonISD::EH_RETURN:     return "HexagonISD::EH_RETURN";
+  case HexagonISD::EXTRACTU:      return "HexagonISD::EXTRACTU";
+  case HexagonISD::EXTRACTURP:    return "HexagonISD::EXTRACTURP";
+  case HexagonISD::FCONST32:      return "HexagonISD::FCONST32";
+  case HexagonISD::INSERT:        return "HexagonISD::INSERT";
+  case HexagonISD::INSERTRP:      return "HexagonISD::INSERTRP";
+  case HexagonISD::JT:            return "HexagonISD::JT";
+  case HexagonISD::PACKHL:        return "HexagonISD::PACKHL";
+  case HexagonISD::PIC_ADD:       return "HexagonISD::PIC_ADD";
+  case HexagonISD::POPCOUNT:      return "HexagonISD::POPCOUNT";
+  case HexagonISD::RET_FLAG:      return "HexagonISD::RET_FLAG";
+  case HexagonISD::SHUFFEB:       return "HexagonISD::SHUFFEB";
+  case HexagonISD::SHUFFEH:       return "HexagonISD::SHUFFEH";
+  case HexagonISD::SHUFFOB:       return "HexagonISD::SHUFFOB";
+  case HexagonISD::SHUFFOH:       return "HexagonISD::SHUFFOH";
+  case HexagonISD::TC_RETURN:     return "HexagonISD::TC_RETURN";
+  case HexagonISD::VCMPBEQ:       return "HexagonISD::VCMPBEQ";
+  case HexagonISD::VCMPBGT:       return "HexagonISD::VCMPBGT";
+  case HexagonISD::VCMPBGTU:      return "HexagonISD::VCMPBGTU";
+  case HexagonISD::VCMPHEQ:       return "HexagonISD::VCMPHEQ";
+  case HexagonISD::VCMPHGT:       return "HexagonISD::VCMPHGT";
+  case HexagonISD::VCMPHGTU:      return "HexagonISD::VCMPHGTU";
+  case HexagonISD::VCMPWEQ:       return "HexagonISD::VCMPWEQ";
+  case HexagonISD::VCMPWGT:       return "HexagonISD::VCMPWGT";
+  case HexagonISD::VCMPWGTU:      return "HexagonISD::VCMPWGTU";
+  case HexagonISD::VSHLH:         return "HexagonISD::VSHLH";
+  case HexagonISD::VSHLW:         return "HexagonISD::VSHLW";
+  case HexagonISD::VSPLATB:       return "HexagonISD::VSPLTB";
+  case HexagonISD::VSPLATH:       return "HexagonISD::VSPLATH";
+  case HexagonISD::VSRAH:         return "HexagonISD::VSRAH";
+  case HexagonISD::VSRAW:         return "HexagonISD::VSRAW";
+  case HexagonISD::VSRLH:         return "HexagonISD::VSRLH";
+  case HexagonISD::VSRLW:         return "HexagonISD::VSRLW";
+  case HexagonISD::VSXTBH:        return "HexagonISD::VSXTBH";
+  case HexagonISD::VSXTBW:        return "HexagonISD::VSXTBW";
+  case HexagonISD::OP_END:        break;
+  }
+  return nullptr;
+}
 
-    setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
-    setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+  EVT MTy1 = EVT::getEVT(Ty1);
+  EVT MTy2 = EVT::getEVT(Ty2);
+  if (!MTy1.isSimple() || !MTy2.isSimple())
+    return false;
+  return (MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32);
+}
 
-    setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
-    setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
+bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+  if (!VT1.isSimple() || !VT2.isSimple())
+    return false;
+  return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32);
+}
 
-    setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
-    setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+// shouldExpandBuildVectorWithShuffles
+// Should we expand the build vector with shuffles?
+bool
+HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
+                                  unsigned DefinedValues) const {
 
-    setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
-    setOperationAction(ISD::FMUL, MVT::f64, Expand);
+  // Hexagon vector shuffle operates on element sizes of bytes or halfwords
+  EVT EltVT = VT.getVectorElementType();
+  int EltBits = EltVT.getSizeInBits();
+  if ((EltBits != 8) && (EltBits != 16))
+    return false;
 
-    setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
-    setOperationAction(ISD::MUL, MVT::f32, Expand);
+  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
+}
 
-    setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
-    setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3).  V1 and
+// V2 are the two vectors to select data from, V3 is the permutation.
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+  const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  SDLoc dl(Op);
+  EVT VT = Op.getValueType();
 
-    setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+  if (V2.getOpcode() == ISD::UNDEF)
+    V2 = V1;
+
+  if (SVN->isSplat()) {
+    int Lane = SVN->getSplatIndex();
+    if (Lane == -1) Lane = 0;
+
+    // Test if V1 is a SCALAR_TO_VECTOR.
+    if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
+      return createSplat(DAG, dl, VT, V1.getOperand(0));
+
+    // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
+    // (and probably will turn into a SCALAR_TO_VECTOR once legalization
+    // reaches it).
+    if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
+        !isa<ConstantSDNode>(V1.getOperand(0))) {
+      bool IsScalarToVector = true;
+      for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
+        if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+          IsScalarToVector = false;
+          break;
+        }
+      if (IsScalarToVector)
+        return createSplat(DAG, dl, VT, V1.getOperand(0));
+    }
+    return createSplat(DAG, dl, VT, DAG.getConstant(Lane, dl, MVT::i32));
+  }
 
-    setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
-    setOperationAction(ISD::SUB, MVT::f64, Expand);
+  // FIXME: We need to support more general vector shuffles.  See
+  // below the comment from the ARM backend that deals in the general
+  // case with the vector shuffles.  For now, let expand handle these.
+  return SDValue();
 
-    setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
-    setOperationAction(ISD::SUB, MVT::f32, Expand);
+  // If the shuffle is not directly supported and it has 4 elements, use
+  // the PerfectShuffle-generated table to synthesize it from other shuffles.
+}
 
-    setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
-    setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+// If BUILD_VECTOR has same base element repeated several times,
+// report true.
+static bool isCommonSplatElement(BuildVectorSDNode *BVN) {
+  unsigned NElts = BVN->getNumOperands();
+  SDValue V0 = BVN->getOperand(0);
 
-    setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
-    setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+  for (unsigned i = 1, e = NElts; i != e; ++i) {
+    if (BVN->getOperand(i) != V0)
+      return false;
+  }
+  return true;
+}
 
-    setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
-    setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert
+// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific
+// <VT> = SHL/SRA/SRL <VT> by <IT/i32>.
+static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) {
+  BuildVectorSDNode *BVN = 0;
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  SDValue V3;
+  SDLoc dl(Op);
+  EVT VT = Op.getValueType();
 
-    setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
-    setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+  if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) &&
+      isCommonSplatElement(BVN))
+    V3 = V2;
+  else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) &&
+           isCommonSplatElement(BVN))
+    V3 = V1;
+  else
+    return SDValue();
 
-    setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
-    setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+  SDValue CommonSplat = BVN->getOperand(0);
+  SDValue Result;
 
-    setOperationAction(ISD::FABS, MVT::f32, Expand);
-    setOperationAction(ISD::FABS, MVT::f64, Expand);
-    setOperationAction(ISD::FNEG, MVT::f32, Expand);
-    setOperationAction(ISD::FNEG, MVT::f64, Expand);
+  if (VT.getSimpleVT() == MVT::v4i16) {
+    switch (Op.getOpcode()) {
+    case ISD::SRA:
+      Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat);
+      break;
+    case ISD::SHL:
+      Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat);
+      break;
+    case ISD::SRL:
+      Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat);
+      break;
+    default:
+      return SDValue();
+    }
+  } else if (VT.getSimpleVT() == MVT::v2i32) {
+    switch (Op.getOpcode()) {
+    case ISD::SRA:
+      Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat);
+      break;
+    case ISD::SHL:
+      Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat);
+      break;
+    case ISD::SRL:
+      Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat);
+      break;
+    default:
+      return SDValue();
+    }
+  } else {
+    return SDValue();
   }
 
-  setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
-  setOperationAction(ISD::SREM, MVT::i32, Expand);
+  return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+}
 
-  setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
-  setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
-  setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
-  setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal);
+SDValue
+HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+  SDLoc dl(Op);
+  EVT VT = Op.getValueType();
 
-  setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
-  setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
-  setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
-  setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal);
+  unsigned Size = VT.getSizeInBits();
 
-  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+  // A vector larger than 64 bits cannot be represented in Hexagon.
+  // Expand will split the vector.
+  if (Size > 64)
+    return SDValue();
 
-  // Turn FP extload into load/fextend.
-  for (MVT VT : MVT::fp_valuetypes())
-    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
-  // Hexagon has a i1 sign extending load.
-  for (MVT VT : MVT::integer_valuetypes())
-    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
-  // Turn FP truncstore into trunc + store.
-  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  APInt APSplatBits, APSplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  unsigned NElts = BVN->getNumOperands();
+
+  // Try to generate a SPLAT instruction.
+  if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) &&
+      (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+                            HasAnyUndefs, 0, true) && SplatBitSize <= 16)) {
+    unsigned SplatBits = APSplatBits.getZExtValue();
+    int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >>
+                       (32 - SplatBitSize));
+    return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, dl, MVT::i32));
+  }
 
-  // Custom legalize GlobalAddress nodes into CONST32.
-  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
-  setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
-  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
-  // Truncate action?
-  setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
+  // Try to generate COMBINE to build v2i32 vectors.
+  if (VT.getSimpleVT() == MVT::v2i32) {
+    SDValue V0 = BVN->getOperand(0);
+    SDValue V1 = BVN->getOperand(1);
+
+    if (V0.getOpcode() == ISD::UNDEF)
+      V0 = DAG.getConstant(0, dl, MVT::i32);
+    if (V1.getOpcode() == ISD::UNDEF)
+      V1 = DAG.getConstant(0, dl, MVT::i32);
+
+    ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(V0);
+    ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(V1);
+    // If the element isn't a constant, it is in a register:
+    // generate a COMBINE Register Register instruction.
+    if (!C0 || !C1)
+      return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
+
+    // If one of the operands is an 8 bit integer constant, generate
+    // a COMBINE Immediate Immediate instruction.
+    if (isInt<8>(C0->getSExtValue()) ||
+        isInt<8>(C1->getSExtValue()))
+      return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
+  }
 
-  // Hexagon doesn't have sext_inreg, replace them with shl/sra.
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+  // Try to generate a S2_packhl to build v2i16 vectors.
+  if (VT.getSimpleVT() == MVT::v2i16) {
+    for (unsigned i = 0, e = NElts; i != e; ++i) {
+      if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
+        continue;
+      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(BVN->getOperand(i));
+      // If the element isn't a constant, it is in a register:
+      // generate a S2_packhl instruction.
+      if (!Cst) {
+        SDValue pack = DAG.getNode(HexagonISD::PACKHL, dl, MVT::v4i16,
+                                   BVN->getOperand(1), BVN->getOperand(0));
+
+        return DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::v2i16,
+                                          pack);
+      }
+    }
+  }
 
-  // Hexagon has no REM or DIVREM operations.
-  setOperationAction(ISD::UREM, MVT::i32, Expand);
-  setOperationAction(ISD::SREM, MVT::i32, Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
-  setOperationAction(ISD::SREM, MVT::i64, Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+  // In the general case, generate a CONST32 or a CONST64 for constant vectors,
+  // and insert_vector_elt for all the other cases.
+  uint64_t Res = 0;
+  unsigned EltSize = Size / NElts;
+  SDValue ConstVal;
+  uint64_t Mask = ~uint64_t(0ULL) >> (64 - EltSize);
+  bool HasNonConstantElements = false;
+
+  for (unsigned i = 0, e = NElts; i != e; ++i) {
+    // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon's
+    // combine, const64, etc. are Big Endian.
+    unsigned OpIdx = NElts - i - 1;
+    SDValue Operand = BVN->getOperand(OpIdx);
+    if (Operand.getOpcode() == ISD::UNDEF)
+      continue;
 
-  setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+    int64_t Val = 0;
+    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Operand))
+      Val = Cst->getSExtValue();
+    else
+      HasNonConstantElements = true;
 
-  // Lower SELECT_CC to SETCC and SELECT.
-  setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
-  setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
-  setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+    Val &= Mask;
+    Res = (Res << EltSize) | Val;
+  }
 
-  if (Subtarget.hasV5TOps()) {
+  if (Size == 64)
+    ConstVal = DAG.getConstant(Res, dl, MVT::i64);
+  else
+    ConstVal = DAG.getConstant(Res, dl, MVT::i32);
+
+  // When there are non constant operands, add them with INSERT_VECTOR_ELT to
+  // ConstVal, the constant part of the vector.
+  if (HasNonConstantElements) {
+    EVT EltVT = VT.getVectorElementType();
+    SDValue Width = DAG.getConstant(EltVT.getSizeInBits(), dl, MVT::i64);
+    SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+                                  DAG.getConstant(32, dl, MVT::i64));
+
+    for (unsigned i = 0, e = NElts; i != e; ++i) {
+      // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon
+      // is Big Endian.
+      unsigned OpIdx = NElts - i - 1;
+      SDValue Operand = BVN->getOperand(OpIdx);
+      if (isa<ConstantSDNode>(Operand))
+        // This operand is already in ConstVal.
+        continue;
+
+      if (VT.getSizeInBits() == 64 &&
+          Operand.getValueType().getSizeInBits() == 32) {
+        SDValue C = DAG.getConstant(0, dl, MVT::i32);
+        Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
+      }
 
-    // We need to make the operation type of SELECT node to be Custom,
-    // such that we don't go into the infinite loop of
-    // select ->  setcc -> select_cc -> select loop.
-    setOperationAction(ISD::SELECT, MVT::f32, Custom);
-    setOperationAction(ISD::SELECT, MVT::f64, Custom);
+      SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64);
+      SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
+      SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+      const SDValue Ops[] = {ConstVal, Operand, Combined};
 
-    setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
-    setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+      if (VT.getSizeInBits() == 32)
+        ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
+      else
+        ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
+    }
+  }
 
-  } else {
+  return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
+}
 
-    // Hexagon has no select or setcc: expand to SELECT_CC.
-    setOperationAction(ISD::SELECT, MVT::f32, Expand);
-    setOperationAction(ISD::SELECT, MVT::f64, Expand);
+SDValue
+HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  EVT VT = Op.getValueType();
+  unsigned NElts = Op.getNumOperands();
+  SDValue Vec = Op.getOperand(0);
+  EVT VecVT = Vec.getValueType();
+  SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), dl, MVT::i64);
+  SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+                                DAG.getConstant(32, dl, MVT::i64));
+  SDValue ConstVal = DAG.getConstant(0, dl, MVT::i64);
+
+  ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width);
+  ConstantSDNode *S = dyn_cast<ConstantSDNode>(Shifted);
+
+  if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) {
+    if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
+      // We are trying to concat two v2i16 to a single v4i16.
+      SDValue Vec0 = Op.getOperand(1);
+      SDValue Combined  = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
+      return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
+    }
   }
 
-  if (EmitJumpTables) {
-    setOperationAction(ISD::BR_JT, MVT::Other, Custom);
-  } else {
-    setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) {
+    if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
+      // We are trying to concat two v4i8 to a single v8i8.
+      SDValue Vec0 = Op.getOperand(1);
+      SDValue Combined  = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
+      return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
+    }
   }
-  // Increase jump tables cutover to 5, was 4.
-  setMinimumJumpTableEntries(5);
-
-  setOperationAction(ISD::BR_CC, MVT::f32, Expand);
-  setOperationAction(ISD::BR_CC, MVT::f64, Expand);
-  setOperationAction(ISD::BR_CC, MVT::i1, Expand);
-  setOperationAction(ISD::BR_CC, MVT::i32, Expand);
-  setOperationAction(ISD::BR_CC, MVT::i64, Expand);
 
-  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+  for (unsigned i = 0, e = NElts; i != e; ++i) {
+    unsigned OpIdx = NElts - i - 1;
+    SDValue Operand = Op.getOperand(OpIdx);
 
-  setOperationAction(ISD::FSIN, MVT::f64, Expand);
-  setOperationAction(ISD::FCOS, MVT::f64, Expand);
-  setOperationAction(ISD::FREM, MVT::f64, Expand);
-  setOperationAction(ISD::FSIN, MVT::f32, Expand);
-  setOperationAction(ISD::FCOS, MVT::f32, Expand);
-  setOperationAction(ISD::FREM, MVT::f32, Expand);
-  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
-  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
-
-  // In V4, we have double word add/sub with carry. The problem with
-  // modelling this instruction is that it produces 2 results - Rdd and Px.
-  // To model update of Px, we will have to use Defs[p0..p3] which will
-  // cause any predicate live range to spill. So, we pretend we dont't
-  // have these instructions.
-  setOperationAction(ISD::ADDE, MVT::i8, Expand);
-  setOperationAction(ISD::ADDE, MVT::i16, Expand);
-  setOperationAction(ISD::ADDE, MVT::i32, Expand);
-  setOperationAction(ISD::ADDE, MVT::i64, Expand);
-  setOperationAction(ISD::SUBE, MVT::i8, Expand);
-  setOperationAction(ISD::SUBE, MVT::i16, Expand);
-  setOperationAction(ISD::SUBE, MVT::i32, Expand);
-  setOperationAction(ISD::SUBE, MVT::i64, Expand);
-  setOperationAction(ISD::ADDC, MVT::i8, Expand);
-  setOperationAction(ISD::ADDC, MVT::i16, Expand);
-  setOperationAction(ISD::ADDC, MVT::i32, Expand);
-  setOperationAction(ISD::ADDC, MVT::i64, Expand);
-  setOperationAction(ISD::SUBC, MVT::i8, Expand);
-  setOperationAction(ISD::SUBC, MVT::i16, Expand);
-  setOperationAction(ISD::SUBC, MVT::i32, Expand);
-  setOperationAction(ISD::SUBC, MVT::i64, Expand);
-
-  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
-  setOperationAction(ISD::CTPOP, MVT::i64, Expand);
-  setOperationAction(ISD::CTTZ, MVT::i32, Expand);
-  setOperationAction(ISD::CTTZ, MVT::i64, Expand);
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
-  setOperationAction(ISD::CTLZ, MVT::i32, Expand);
-  setOperationAction(ISD::CTLZ, MVT::i64, Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
-  setOperationAction(ISD::ROTL, MVT::i32, Expand);
-  setOperationAction(ISD::ROTR, MVT::i32, Expand);
-  setOperationAction(ISD::BSWAP, MVT::i32, Expand);
-  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
-  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
-  setOperationAction(ISD::FPOW, MVT::f64, Expand);
-  setOperationAction(ISD::FPOW, MVT::f32, Expand);
-
-  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
-  setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
-  setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
-
-  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
-  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
-
-  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
-  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+    if (VT.getSizeInBits() == 64 &&
+        Operand.getValueType().getSizeInBits() == 32) {
+      SDValue C = DAG.getConstant(0, dl, MVT::i32);
+      Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
+    }
 
-  setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+    SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64);
+    SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
+    SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+    const SDValue Ops[] = {ConstVal, Operand, Combined};
 
-  if (Subtarget.isSubtargetV2()) {
-    setExceptionPointerRegister(Hexagon::R20);
-    setExceptionSelectorRegister(Hexagon::R21);
-  } else {
-    setExceptionPointerRegister(Hexagon::R0);
-    setExceptionSelectorRegister(Hexagon::R1);
+    if (VT.getSizeInBits() == 32)
+      ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
+    else
+      ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
   }
 
-  // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
-  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
+}
 
-  // Use the default implementation.
-  setOperationAction(ISD::VAARG, MVT::Other, Expand);
-  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
-  setOperationAction(ISD::VAEND, MVT::Other, Expand);
-  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
-  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+SDValue
+HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
+  SDLoc dl(Op);
+  SDValue Idx = Op.getOperand(1);
+  SDValue Vec = Op.getOperand(0);
+  EVT VecVT = Vec.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  int EltSize = EltVT.getSizeInBits();
+  SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT ?
+                                  EltSize : VTN * EltSize, dl, MVT::i64);
+
+  // Constant element number.
+  if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Idx)) {
+    uint64_t X = CI->getZExtValue();
+    SDValue Offset = DAG.getConstant(X * EltSize, dl, MVT::i32);
+    const SDValue Ops[] = {Vec, Width, Offset};
+
+    ConstantSDNode *CW = dyn_cast<ConstantSDNode>(Width);
+    assert(CW && "Non constant width in LowerEXTRACT_VECTOR");
+
+    SDValue N;
+    MVT SVT = VecVT.getSimpleVT();
+    uint64_t W = CW->getZExtValue();
+
+    if (W == 32) {
+      // Translate this node into EXTRACT_SUBREG.
+      unsigned Subreg = (X == 0) ? Hexagon::subreg_loreg : 0;
+
+      if (X == 0)
+        Subreg = Hexagon::subreg_loreg;
+      else if (SVT == MVT::v2i32 && X == 1)
+        Subreg = Hexagon::subreg_hireg;
+      else if (SVT == MVT::v4i16 && X == 2)
+        Subreg = Hexagon::subreg_hireg;
+      else if (SVT == MVT::v8i8 && X == 4)
+        Subreg = Hexagon::subreg_hireg;
+      else
+        llvm_unreachable("Bad offset");
+      N = DAG.getTargetExtractSubreg(Subreg, dl, MVT::i32, Vec);
+
+    } else if (VecVT.getSizeInBits() == 32) {
+      N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i32, Ops);
+    } else {
+      N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i64, Ops);
+      if (VT.getSizeInBits() == 32)
+        N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
+    }
 
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
-  setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
+    return DAG.getNode(ISD::BITCAST, dl, VT, N);
+  }
 
-  setMinFunctionAlignment(2);
+  // Variable element number.
+  SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
+                               DAG.getConstant(EltSize, dl, MVT::i32));
+  SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+                                DAG.getConstant(32, dl, MVT::i64));
+  SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
 
-  // Needed for DYNAMIC_STACKALLOC expansion.
-  const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
-      TM.getSubtargetImpl()->getRegisterInfo());
-  setStackPointerRegisterToSaveRestore(QRI->getStackRegister());
-  setSchedulingPreference(Sched::VLIW);
-}
+  const SDValue Ops[] = {Vec, Combined};
 
-const char*
-HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-    default: return nullptr;
-    case HexagonISD::CONST32:     return "HexagonISD::CONST32";
-    case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
-    case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real";
-    case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
-    case HexagonISD::CMPICC:      return "HexagonISD::CMPICC";
-    case HexagonISD::CMPFCC:      return "HexagonISD::CMPFCC";
-    case HexagonISD::BRICC:       return "HexagonISD::BRICC";
-    case HexagonISD::BRFCC:       return "HexagonISD::BRFCC";
-    case HexagonISD::SELECT_ICC:  return "HexagonISD::SELECT_ICC";
-    case HexagonISD::SELECT_FCC:  return "HexagonISD::SELECT_FCC";
-    case HexagonISD::Hi:          return "HexagonISD::Hi";
-    case HexagonISD::Lo:          return "HexagonISD::Lo";
-    case HexagonISD::FTOI:        return "HexagonISD::FTOI";
-    case HexagonISD::ITOF:        return "HexagonISD::ITOF";
-    case HexagonISD::CALL:        return "HexagonISD::CALL";
-    case HexagonISD::RET_FLAG:    return "HexagonISD::RET_FLAG";
-    case HexagonISD::BR_JT:       return "HexagonISD::BR_JT";
-    case HexagonISD::TC_RETURN:   return "HexagonISD::TC_RETURN";
-  case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
+  SDValue N;
+  if (VecVT.getSizeInBits() == 32) {
+    N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i32, Ops);
+  } else {
+    N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i64, Ops);
+    if (VT.getSizeInBits() == 32)
+      N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
   }
+  return DAG.getNode(ISD::BITCAST, dl, VT, N);
 }
 
-bool
-HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
-  EVT MTy1 = EVT::getEVT(Ty1);
-  EVT MTy2 = EVT::getEVT(Ty2);
-  if (!MTy1.isSimple() || !MTy2.isSimple()) {
-    return false;
+SDValue
+HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
+  SDLoc dl(Op);
+  SDValue Vec = Op.getOperand(0);
+  SDValue Val = Op.getOperand(1);
+  SDValue Idx = Op.getOperand(2);
+  EVT VecVT = Vec.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  int EltSize = EltVT.getSizeInBits();
+  SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::INSERT_VECTOR_ELT ?
+                                  EltSize : VTN * EltSize, dl, MVT::i64);
+
+  if (ConstantSDNode *C = cast<ConstantSDNode>(Idx)) {
+    SDValue Offset = DAG.getConstant(C->getSExtValue() * EltSize, dl, MVT::i32);
+    const SDValue Ops[] = {Vec, Val, Width, Offset};
+
+    SDValue N;
+    if (VT.getSizeInBits() == 32)
+      N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, Ops);
+    else
+      N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, Ops);
+
+    return DAG.getNode(ISD::BITCAST, dl, VT, N);
   }
-  return ((MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32));
-}
 
-bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
-  if (!VT1.isSimple() || !VT2.isSimple()) {
-    return false;
+  // Variable element number.
+  SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
+                               DAG.getConstant(EltSize, dl, MVT::i32));
+  SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+                                DAG.getConstant(32, dl, MVT::i64));
+  SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+
+  if (VT.getSizeInBits() == 64 &&
+      Val.getValueType().getSizeInBits() == 32) {
+    SDValue C = DAG.getConstant(0, dl, MVT::i32);
+    Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val);
   }
-  return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32));
+
+  const SDValue Ops[] = {Vec, Val, Combined};
+
+  SDValue N;
+  if (VT.getSizeInBits() == 32)
+    N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
+  else
+    N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
+
+  return DAG.getNode(ISD::BITCAST, dl, VT, N);
 }
 
 bool
@@ -1532,7 +2261,7 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                   DAG.getRegister(Hexagon::R30, getPointerTy()),
-                                  DAG.getIntPtrConstant(4));
+                                  DAG.getIntPtrConstant(4, dl));
   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
                        false, false, 0);
   Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
@@ -1545,43 +2274,54 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue
 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
-  switch (Op.getOpcode()) {
-    default: llvm_unreachable("Should not custom lower this!");
-    case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
-    case ISD::EH_RETURN:          return LowerEH_RETURN(Op, DAG);
-      // Frame & Return address.  Currently unimplemented.
-    case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
-    case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
-    case ISD::GlobalTLSAddress:
-                          llvm_unreachable("TLS not implemented for Hexagon.");
-    case ISD::ATOMIC_FENCE:       return LowerATOMIC_FENCE(Op, DAG);
-    case ISD::GlobalAddress:      return LowerGLOBALADDRESS(Op, DAG);
-    case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
-    case ISD::VASTART:            return LowerVASTART(Op, DAG);
-    case ISD::BR_JT:              return LowerBR_JT(Op, DAG);
-
-    case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
-    case ISD::SELECT:             return Op;
-    case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
-    case ISD::INLINEASM:          return LowerINLINEASM(Op, DAG);
-
+  unsigned Opc = Op.getOpcode();
+  switch (Opc) {
+    default:
+#ifndef NDEBUG
+      Op.getNode()->dumpr(&DAG);
+      if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
+        errs() << "Check for a non-legal type in this operation\n";
+#endif
+      llvm_unreachable("Should not custom lower this!");
+    case ISD::CONCAT_VECTORS:       return LowerCONCAT_VECTORS(Op, DAG);
+    case ISD::INSERT_SUBVECTOR:     return LowerINSERT_VECTOR(Op, DAG);
+    case ISD::INSERT_VECTOR_ELT:    return LowerINSERT_VECTOR(Op, DAG);
+    case ISD::EXTRACT_SUBVECTOR:    return LowerEXTRACT_VECTOR(Op, DAG);
+    case ISD::EXTRACT_VECTOR_ELT:   return LowerEXTRACT_VECTOR(Op, DAG);
+    case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
+    case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
+    case ISD::SRA:
+    case ISD::SHL:
+    case ISD::SRL:                  return LowerVECTOR_SHIFT(Op, DAG);
+    case ISD::ConstantPool:         return LowerConstantPool(Op, DAG);
+    case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
+      // Frame & Return address. Currently unimplemented.
+    case ISD::RETURNADDR:           return LowerRETURNADDR(Op, DAG);
+    case ISD::FRAMEADDR:            return LowerFRAMEADDR(Op, DAG);
+    case ISD::ATOMIC_FENCE:         return LowerATOMIC_FENCE(Op, DAG);
+    case ISD::GlobalAddress:        return LowerGLOBALADDRESS(Op, DAG);
+    case ISD::BlockAddress:         return LowerBlockAddress(Op, DAG);
+    case ISD::VASTART:              return LowerVASTART(Op, DAG);
+    case ISD::BR_JT:                return LowerBR_JT(Op, DAG);
+    // Custom lower some vector loads.
+    case ISD::LOAD:                 return LowerLOAD(Op, DAG);
+    case ISD::DYNAMIC_STACKALLOC:   return LowerDYNAMIC_STACKALLOC(Op, DAG);
+    case ISD::SETCC:                return LowerSETCC(Op, DAG);
+    case ISD::VSELECT:              return LowerVSELECT(Op, DAG);
+    case ISD::CTPOP:                return LowerCTPOP(Op, DAG);
+    case ISD::INTRINSIC_WO_CHAIN:   return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+    case ISD::INLINEASM:            return LowerINLINEASM(Op, DAG);
   }
 }
 
-
-
-//===----------------------------------------------------------------------===//
-//                           Hexagon Scheduler Hooks
-//===----------------------------------------------------------------------===//
 MachineBasicBlock *
 HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                    MachineBasicBlock *BB)
-const {
+      const {
   switch (MI->getOpcode()) {
-    case Hexagon::ADJDYNALLOC: {
+    case Hexagon::ALLOCA: {
       MachineFunction *MF = BB->getParent();
-      HexagonMachineFunctionInfo *FuncInfo =
-        MF->getInfo<HexagonMachineFunctionInfo>();
+      auto *FuncInfo = MF->getInfo<HexagonMachineFunctionInfo>();
       FuncInfo->addAllocaAdjustInst(MI);
       return BB;
     }
@@ -1593,10 +2333,10 @@ const {
 // Inline Assembly Support
 //===----------------------------------------------------------------------===//
 
-std::pair<unsigned, const TargetRegisterClass*>
-HexagonTargetLowering::getRegForInlineAsmConstraint(const
-                                                    std::string &Constraint,
-                                                    MVT VT) const {
+std::pair<unsigned, const TargetRegisterClass *>
+HexagonTargetLowering::getRegForInlineAsmConstraint(
+    const TargetRegisterInfo *TRI, const std::string &Constraint,
+    MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':   // R0-R31
@@ -1617,14 +2357,14 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(const
     }
   }
 
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
 
 /// isFPImmLegal - Returns true if the target can instruction select the
 /// specified FP immediate natively. If false, the legalizer will
 /// materialize the FP immediate as a load from a constant pool.
 bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-  return TM.getSubtarget<HexagonSubtarget>().hasV5TOps();
+  return Subtarget.hasV5TOps();
 }
 
 /// isLegalAddressingMode - Return true if the addressing mode represented by
@@ -1632,14 +2372,12 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
 bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM,
                                                   Type *Ty) const {
   // Allows a signed-extended 11-bit immediate field.
-  if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) {
+  if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1)
     return false;
-  }
 
   // No global is ever allowed as a base.
-  if (AM.BaseGV) {
+  if (AM.BaseGV)
     return false;
-  }
 
   int Scale = AM.Scale;
   if (Scale < 0) Scale = -Scale;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index d03b1b8..584c2c5 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -26,127 +26,145 @@ namespace llvm {
 bool isPositiveHalfWord(SDNode *N);
 
   namespace HexagonISD {
-    enum {
-      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+    enum NodeType : unsigned {
+      OP_BEGIN = ISD::BUILTIN_OP_END,
 
-      CONST32,
+      CONST32 = OP_BEGIN,
       CONST32_GP,  // For marking data present in GP.
-      CONST32_Int_Real,
       FCONST32,
-      SETCC,
-      ADJDYNALLOC,
+      ALLOCA,
       ARGEXTEND,
 
-      CMPICC,      // Compare two GPR operands, set icc.
-      CMPFCC,      // Compare two FP operands, set fcc.
-      BRICC,       // Branch to dest on icc condition
-      BRFCC,       // Branch to dest on fcc condition
-      SELECT_ICC,  // Select between two values using the current ICC flags.
-      SELECT_FCC,  // Select between two values using the current FCC flags.
+      PIC_ADD,
+      AT_GOT,
+      AT_PCREL,
 
-      Hi, Lo,      // Hi/Lo operations, typically on a global address.
+      CALLv3,      // A V3+ call instruction.
+      CALLv3nr,    // A V3+ call instruction that doesn't return.
+      CALLR,
 
-      FTOI,        // FP to Int within a FP register.
-      ITOF,        // Int to FP within a FP register.
-
-      CALL,        // A call instruction.
       RET_FLAG,    // Return with a flag operand.
-      BR_JT,       // Jump table.
-      BARRIER,     // Memory barrier
+      BR_JT,       // Branch through jump table.
+      BARRIER,     // Memory barrier.
+      JT,          // Jump table.
+      CP,          // Constant pool.
+
       POPCOUNT,
       COMBINE,
-      WrapperJT,
-      WrapperCP,
-      WrapperCombineII,
-      WrapperCombineRR,
-      WrapperCombineRI_V4,
-      WrapperCombineIR_V4,
-      WrapperPackhl,
-      WrapperSplatB,
-      WrapperSplatH,
-      WrapperShuffEB,
-      WrapperShuffEH,
-      WrapperShuffOB,
-      WrapperShuffOH,
+      PACKHL,
+      VSPLATB,
+      VSPLATH,
+      SHUFFEB,
+      SHUFFEH,
+      SHUFFOB,
+      SHUFFOH,
+      VSXTBH,
+      VSXTBW,
+      VSRAW,
+      VSRAH,
+      VSRLW,
+      VSRLH,
+      VSHLW,
+      VSHLH,
+      VCMPBEQ,
+      VCMPBGT,
+      VCMPBGTU,
+      VCMPHEQ,
+      VCMPHGT,
+      VCMPHGTU,
+      VCMPWEQ,
+      VCMPWGT,
+      VCMPWGTU,
+
+      INSERT,
+      INSERTRP,
+      EXTRACTU,
+      EXTRACTURP,
       TC_RETURN,
       EH_RETURN,
-      DCFETCH
+      DCFETCH,
+
+      OP_END
     };
   }
 
+  class HexagonSubtarget;
+
   class HexagonTargetLowering : public TargetLowering {
     int VarArgsFrameOffset;   // Frame offset to start of varargs area.
 
-    bool CanReturnSmallStruct(const Function* CalleeFn,
-                              unsigned& RetSize) const;
+    bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize)
+        const;
+    void promoteLdStType(EVT VT, EVT PromotedLdStVT);
+    const HexagonTargetMachine &HTM;
+    const HexagonSubtarget &Subtarget;
 
   public:
-    const TargetMachine &TM;
-    explicit HexagonTargetLowering(const TargetMachine &targetmachine);
+    explicit HexagonTargetLowering(const TargetMachine &TM,
+                                   const HexagonSubtarget &ST);
 
     /// IsEligibleForTailCallOptimization - Check whether the call is eligible
     /// for tail call optimization. Targets which want to do tail call
     /// optimization should implement this function.
-    bool
-    IsEligibleForTailCallOptimization(SDValue Callee,
-                                      CallingConv::ID CalleeCC,
-                                      bool isVarArg,
-                                      bool isCalleeStructRet,
-                                      bool isCallerStructRet,
-                                      const
-                                      SmallVectorImpl<ISD::OutputArg> &Outs,
-                                      const SmallVectorImpl<SDValue> &OutVals,
-                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                      SelectionDAG& DAG) const;
+    bool IsEligibleForTailCallOptimization(SDValue Callee,
+        CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet,
+        bool isCallerStructRet, const SmallVectorImpl<ISD::OutputArg> &Outs,
+        const SmallVectorImpl<SDValue> &OutVals,
+        const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const;
 
     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
     bool isTruncateFree(EVT VT1, EVT VT2) const override;
 
     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
 
-    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+    // Should we expand the build vector with shuffles?
+    bool shouldExpandBuildVectorWithShuffles(EVT VT,
+        unsigned DefinedValues) const override;
 
+    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
     const char *getTargetNodeName(unsigned Opcode) const override;
-    SDValue  LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFormalArguments(SDValue Chain,
-                                 CallingConv::ID CallConv, bool isVarArg,
-                                 const SmallVectorImpl<ISD::InputArg> &Ins,
-                                 SDLoc dl, SelectionDAG &DAG,
-                                 SmallVectorImpl<SDValue> &InVals) const override;
+    SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+        bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl,
+        SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override;
     SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
-                      SmallVectorImpl<SDValue> &InVals) const override;
-
+        SmallVectorImpl<SDValue> &InVals) const override;
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
-                            CallingConv::ID CallConv, bool isVarArg,
-                            const SmallVectorImpl<ISD::InputArg> &Ins,
-                            SDLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals,
-                            const SmallVectorImpl<SDValue> &OutVals,
-                            SDValue Callee) const;
-
+        CallingConv::ID CallConv, bool isVarArg,
+        const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl,
+        SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+        const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const;
+
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
 
-    SDValue LowerReturn(SDValue Chain,
-                        CallingConv::ID CallConv, bool isVarArg,
-                        const SmallVectorImpl<ISD::OutputArg> &Outs,
-                        const SmallVectorImpl<SDValue> &OutVals,
-                        SDLoc dl, SelectionDAG &DAG) const override;
+    SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+        bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs,
+        const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
+        SelectionDAG &DAG) const override;
 
-    MachineBasicBlock *
-    EmitInstrWithCustomInserter(MachineInstr *MI,
-                                MachineBasicBlock *BB) const override;
+    bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+    MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
+        MachineBasicBlock *BB) const override;
 
-    SDValue  LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
-    SDValue  LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     EVT getSetCCResultType(LLVMContext &C, EVT VT) const override {
       if (!VT.isVector())
         return MVT::i1;
@@ -159,10 +177,20 @@ bool isPositiveHalfWord(SDNode *N);
                                     ISD::MemIndexedMode &AM,
                                     SelectionDAG &DAG) const override;
 
-    std::pair<unsigned, const TargetRegisterClass*>
-    getRegForInlineAsmConstraint(const std::string &Constraint,
+    std::pair<unsigned, const TargetRegisterClass *>
+    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                 const std::string &Constraint,
                                  MVT VT) const override;
 
+    unsigned getInlineAsmMemConstraint(
+        const std::string &ConstraintCode) const override {
+      if (ConstraintCode == "o")
+        return InlineAsm::Constraint_o;
+      else if (ConstraintCode == "v")
+        return InlineAsm::Constraint_v;
+      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+    }
+
     // Intrinsics
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     /// isLegalAddressingMode - Return true if the addressing mode represented
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
index 8373652..36a7e9f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -28,20 +28,12 @@ def TypeXTYPE  : IType<8>;
 def TypeENDLOOP: IType<31>;
 
 // Maintain list of valid subtargets for each instruction.
-class SubTarget<bits<4> value> {
-  bits<4> Value = value;
+class SubTarget<bits<6> value> {
+  bits<6> Value = value;
 }
 
-def HasV2SubT     : SubTarget<0xf>;
-def HasV2SubTOnly : SubTarget<0x1>;
-def NoV2SubT      : SubTarget<0x0>;
-def HasV3SubT     : SubTarget<0xe>;
-def HasV3SubTOnly : SubTarget<0x2>;
-def NoV3SubT      : SubTarget<0x1>;
-def HasV4SubT     : SubTarget<0xc>;
-def NoV4SubT      : SubTarget<0x3>;
-def HasV5SubT     : SubTarget<0x8>;
-def NoV5SubT      : SubTarget<0x7>;
+def HasAnySubT    : SubTarget<0x3f>;  // 111111
+def HasV5SubT     : SubTarget<0x3e>;  // 111110
 
 // Addressing modes for load/store instructions
 class AddrModeType<bits<3> value> {
@@ -56,8 +48,8 @@ def BaseLongOffset : AddrModeType<4>;  // Indirect with long offset
 def BaseRegOffset  : AddrModeType<5>;  // Indirect with register offset
 def PostInc        : AddrModeType<6>;  // Post increment addressing mode
 
-class MemAccessSize<bits<3> value> {
-  bits<3> Value = value;
+class MemAccessSize<bits<4> value> {
+  bits<4> Value = value;
 }
 
 def NoMemAccess      : MemAccessSize<0>;// Not a memory acces instruction.
@@ -84,7 +76,7 @@ class OpcodeHexagon {
 
 class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
                   string cstr, InstrItinClass itin, IType type>
-  : Instruction, OpcodeHexagon {
+  : Instruction {
   let Namespace = "Hexagon";
 
   dag OutOperandList = outs;
@@ -92,18 +84,18 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
   let AsmString = asmstr;
   let Pattern = pattern;
   let Constraints = cstr;
-  let Itinerary = itin;
-  let Size = 4;
-
-  // SoftFail is a field the disassembler can use to provide a way for
-  // instructions to not match without killing the whole decode process. It is
-  // mainly used for ARM, but Tablegen expects this field to exist or it fails
-  // to build the decode table.
-  field bits<32> SoftFail = 0;
-
-  // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
-
-  // Instruction type according to the ISA.
+  let Itinerary = itin;
+  let Size = 4;
+
+  // SoftFail is a field the disassembler can use to provide a way for
+  // instructions to not match without killing the whole decode process. It is
+  // mainly used for ARM, but Tablegen expects this field to exist or it fails
+  // to build the decode table.
+  field bits<32> SoftFail = 0;
+
+  // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+  // Instruction type according to the ISA.
   IType Type = type;
   let TSFlags{4-0} = Type.Value;
 
@@ -157,11 +149,11 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
   bits<2> opExtentAlign = 0;
   let TSFlags{33-32} = opExtentAlign; // Alignment exponent before extending.
 
-  // If an instruction is valid on a subtarget (v2-v5), set the corresponding
-  // bit from validSubTargets. v2 is the least significant bit.
+  // If an instruction is valid on a subtarget, set the corresponding
+  // bit from validSubTargets.
   // By default, instruction is valid on all subtargets.
-  SubTarget validSubTargets = HasV2SubT;
-  let TSFlags{37-34} = validSubTargets.Value;
+  SubTarget validSubTargets = HasAnySubT;
+  let TSFlags{39-34} = validSubTargets.Value;
 
   // Addressing mode for load/store instructions.
   AddrModeType addrMode = NoAddrMode;
@@ -169,7 +161,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
 
   // Memory access size for mem access instructions (load/store)
   MemAccessSize accessSize = NoMemAccess;
-  let TSFlags{45-43} = accessSize.Value;
+  let TSFlags{46-43} = accessSize.Value;
 
   bits<1> isTaken = 0;
   let TSFlags {47} = isTaken; // Branch prediction.
@@ -192,7 +184,6 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
                                     "");
   let PNewValue = !if(isPredicatedNew, "new", "");
   let NValueST = !if(isNVStore, "true", "false");
-  let isCodeGenOnly = 1;
 
   // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
 }
@@ -206,7 +197,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
 let mayLoad = 1 in
 class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
              string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
 
 let mayLoad = 1 in
 class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
@@ -226,7 +217,7 @@ class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
 let mayLoad = 1 in
 class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
               string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
 
 // ST Instruction Class in V2/V3 can take SLOT0 only.
 // ST Instruction Class in V4    can take SLOT0 & SLOT1.
@@ -234,7 +225,7 @@ class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
 let mayStore = 1 in
 class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
              string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
 
 class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
               string cstr = "">
@@ -243,7 +234,7 @@ class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
 let mayStore = 1 in
 class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
               string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
 
 // ST Instruction Class in V2/V3 can take SLOT0 only.
 // ST Instruction Class in V4    can take SLOT0 & SLOT1.
@@ -256,13 +247,14 @@ class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
 // In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
 class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
               string cstr = "",  InstrItinClass itin = ST_tc_3stall_SLOT0>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>,
+    OpcodeHexagon;
 
 // ALU32 Instruction Class in V2/V3/V4.
 // Definition of the instruction class NOT CHANGED.
 class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
                 string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>, OpcodeHexagon;
 
 // ALU64 Instruction Class in V2/V3.
 // XTYPE Instruction Class in V4.
@@ -270,7 +262,8 @@ class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
 // Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
 class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
                 string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
-   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+     OpcodeHexagon;
 
 class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
                 string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
@@ -283,7 +276,8 @@ class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
 // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
 class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
             string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+    OpcodeHexagon;
 
 // M Instruction Class in V2/V3.
 // XTYPE Instruction Class in V4.
@@ -299,7 +293,8 @@ class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
 // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
 class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
             string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+    OpcodeHexagon;
 
 // S Instruction Class in V2/V3.
 // XTYPE Instruction Class in V4.
@@ -313,34 +308,37 @@ class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
 // Definition of the instruction class NOT CHANGED.
 class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
             string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>, OpcodeHexagon;
 
 // JR Instruction Class in V2/V3/V4.
 // Definition of the instruction class NOT CHANGED.
 class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
              string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>, OpcodeHexagon;
 
 // CR Instruction Class in V2/V3/V4.
 // Definition of the instruction class NOT CHANGED.
 class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
              string cstr = "", InstrItinClass itin = CR_tc_2early_SLOT3>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>, OpcodeHexagon;
 
 let isCodeGenOnly = 1, isPseudo = 1 in
 class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [],
               string cstr = "", InstrItinClass itin = J_tc_2early_SLOT0123>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>,
+    OpcodeHexagon;
 
 let isCodeGenOnly = 1, isPseudo = 1 in
 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [],
              string cstr = "">
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>,
+    OpcodeHexagon;
 
 let isCodeGenOnly = 1, isPseudo = 1 in
 class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
               string cstr="">
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>,
+    OpcodeHexagon;
 
 //===----------------------------------------------------------------------===//
 //                         Instruction Classes Definitions -
@@ -366,7 +364,6 @@ class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern = [],
                string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
    : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
 
-
 //
 // ALU64 patterns.
 //
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index 5fec80b..7f7b2c9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -17,10 +17,88 @@
 //                        *** Must match BaseInfo.h ***
 //----------------------------------------------------------------------------//
 
-def TypeMEMOP  : IType<9>;
-def TypeNV     : IType<10>;
+def TypeMEMOP    : IType<9>;
+def TypeNV       : IType<10>;
+def TypeDUPLEX   : IType<11>;
 def TypeCOMPOUND : IType<12>;
-def TypePREFIX : IType<30>;
+def TypeAG_VX    : IType<28>;
+def TypeAG_VM    : IType<29>;
+def TypePREFIX   : IType<30>;
+
+//                      Duplex Instruction Class Declaration
+//===----------------------------------------------------------------------===//
+
+class OpcodeDuplex {
+  field bits<32> Inst = ?; // Default to an invalid insn.
+  bits<4> IClass = 0; // ICLASS
+  bits<13> ISubHi = 0; // Low sub-insn
+  bits<13> ISubLo = 0; // High sub-insn
+
+  let Inst{31-29} = IClass{3-1};
+  let Inst{13}    = IClass{0};
+  let Inst{15-14} = 0;
+  let Inst{28-16} = ISubHi;
+  let Inst{12-0}  = ISubLo;
+}
+
+class InstDuplex<bits<4> iClass, list<dag> pattern = [],
+                 string cstr = "">
+  : Instruction, OpcodeDuplex {
+  let Namespace = "Hexagon";
+  IType Type = TypeDUPLEX;  // uses slot 0,1
+  let isCodeGenOnly = 1;
+  let hasSideEffects = 0;
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins);
+  let IClass = iClass;
+  let Constraints = cstr;
+  let Itinerary = DUPLEX;
+  let Size = 4;
+
+  // SoftFail is a field the disassembler can use to provide a way for
+  // instructions to not match without killing the whole decode process. It is
+  // mainly used for ARM, but Tablegen expects this field to exist or it fails
+  // to build the decode table.
+  field bits<32> SoftFail = 0;
+
+  // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+  let TSFlags{4-0} = Type.Value;
+
+  // Predicated instructions.
+  bits<1> isPredicated = 0;
+  let TSFlags{6} = isPredicated;
+  bits<1> isPredicatedFalse = 0;
+  let TSFlags{7} = isPredicatedFalse;
+  bits<1> isPredicatedNew = 0;
+  let TSFlags{8} = isPredicatedNew;
+
+  // New-value insn helper fields.
+  bits<1> isNewValue = 0;
+  let TSFlags{9} = isNewValue; // New-value consumer insn.
+  bits<1> hasNewValue = 0;
+  let TSFlags{10} = hasNewValue; // New-value producer insn.
+  bits<3> opNewValue = 0;
+  let TSFlags{13-11} = opNewValue; // New-value produced operand.
+  bits<1> isNVStorable = 0;
+  let TSFlags{14} = isNVStorable; // Store that can become new-value store.
+  bits<1> isNVStore = 0;
+  let TSFlags{15} = isNVStore; // New-value store insn.
+
+  // Immediate extender helper fields.
+  bits<1> isExtendable = 0;
+  let TSFlags{16} = isExtendable; // Insn may be extended.
+  bits<1> isExtended = 0;
+  let TSFlags{17} = isExtended; // Insn must be extended.
+  bits<3> opExtendable = 0;
+  let TSFlags{20-18} = opExtendable; // Which operand may be extended.
+  bits<1> isExtentSigned = 0;
+  let TSFlags{21} = isExtentSigned; // Signed or unsigned range.
+  bits<5> opExtentBits = 0;
+  let TSFlags{26-22} = opExtentBits; //Number of bits of range before extending.
+  bits<2> opExtentAlign = 0;
+  let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending.
+}
 
 //----------------------------------------------------------------------------//
 //                         Instruction Classes Definitions
@@ -31,7 +109,7 @@ def TypePREFIX : IType<30>;
 //
 class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
              string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>, OpcodeHexagon;
 
 class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
                 string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
@@ -56,7 +134,8 @@ class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
 let mayLoad = 1, mayStore = 1 in
 class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
               string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>,
+    OpcodeHexagon;
 
 class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
                  string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
@@ -65,8 +144,9 @@ class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
 let isCodeGenOnly = 1 in
 class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
   : InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123,
-                TypePREFIX>;
+                TypePREFIX>, OpcodeHexagon;
 
 class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
               string cstr = "">
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>;
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>,
+    OpcodeHexagon;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 5d96259..49b4517 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -62,10 +62,8 @@ const int Hexagon_MEMB_AUTOINC_MIN = -8;
 void HexagonInstrInfo::anchor() {}
 
 HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
-  : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
-    RI(ST), Subtarget(ST) {
-}
-
+    : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
+      RI(), Subtarget(ST) {}
 
 /// isLoadFromStackSlot - If the specified machine instruction is a direct
 /// load from a stack slot, return the virtual or physical register number of
@@ -117,68 +115,172 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
   return 0;
 }
 
+// Find the hardware loop instruction used to set-up the specified loop.
+// On Hexagon, we have two instructions used to set-up the hardware loop
+// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
+// to indicate the end of a loop.
+static MachineInstr *
+findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
+              SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
+  int LOOPi;
+  int LOOPr;
+  if (EndLoopOp == Hexagon::ENDLOOP0) {
+    LOOPi = Hexagon::J2_loop0i;
+    LOOPr = Hexagon::J2_loop0r;
+  } else { // EndLoopOp == Hexagon::EndLOOP1
+    LOOPi = Hexagon::J2_loop1i;
+    LOOPr = Hexagon::J2_loop1r;
+  }
 
-unsigned
-HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
-                             MachineBasicBlock *FBB,
-                             const SmallVectorImpl<MachineOperand> &Cond,
-                             DebugLoc DL) const{
-
-    int BOpc   = Hexagon::J2_jump;
-    int BccOpc = Hexagon::J2_jumpt;
-
-    assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-
-    int regPos = 0;
-    // Check if ReverseBranchCondition has asked to reverse this branch
-    // If we want to reverse the branch an odd number of times, we want
-    // JMP_f.
-    if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
-      BccOpc = Hexagon::J2_jumpf;
-      regPos = 1;
+  // The loop set-up instruction will be in a predecessor block
+  for (MachineBasicBlock::pred_iterator PB = BB->pred_begin(),
+         PE = BB->pred_end(); PB != PE; ++PB) {
+    // If this has been visited, already skip it.
+    if (!Visited.insert(*PB).second)
+      continue;
+    if (*PB == BB)
+      continue;
+    for (MachineBasicBlock::reverse_instr_iterator I = (*PB)->instr_rbegin(),
+           E = (*PB)->instr_rend(); I != E; ++I) {
+      int Opc = I->getOpcode();
+      if (Opc == LOOPi || Opc == LOOPr)
+        return &*I;
+      // We've reached a different loop, which means the loop0 has been removed.
+      if (Opc == EndLoopOp)
+        return 0;
     }
+    // Check the predecessors for the LOOP instruction.
+    MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited);
+    if (loop)
+      return loop;
+  }
+  return 0;
+}
 
-    if (!FBB) {
-      if (Cond.empty()) {
-        // Due to a bug in TailMerging/CFG Optimization, we need to add a
-        // special case handling of a predicated jump followed by an
-        // unconditional jump. If not, Tail Merging and CFG Optimization go
-        // into an infinite loop.
-        MachineBasicBlock *NewTBB, *NewFBB;
-        SmallVector<MachineOperand, 4> Cond;
-        MachineInstr *Term = MBB.getFirstTerminator();
-        if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond,
-                                                 false)) {
-          MachineBasicBlock *NextBB =
-            std::next(MachineFunction::iterator(&MBB));
-          if (NewTBB == NextBB) {
-            ReverseBranchCondition(Cond);
-            RemoveBranch(MBB);
-            return InsertBranch(MBB, TBB, nullptr, Cond, DL);
-          }
+unsigned HexagonInstrInfo::InsertBranch(
+    MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+    const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
+
+  Opcode_t BOpc   = Hexagon::J2_jump;
+  Opcode_t BccOpc = Hexagon::J2_jumpt;
+
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+  // Check if ReverseBranchCondition has asked to reverse this branch
+  // If we want to reverse the branch an odd number of times, we want
+  // J2_jumpf.
+  if (!Cond.empty() && Cond[0].isImm())
+    BccOpc = Cond[0].getImm();
+
+  if (!FBB) {
+    if (Cond.empty()) {
+      // Due to a bug in TailMerging/CFG Optimization, we need to add a
+      // special case handling of a predicated jump followed by an
+      // unconditional jump. If not, Tail Merging and CFG Optimization go
+      // into an infinite loop.
+      MachineBasicBlock *NewTBB, *NewFBB;
+      SmallVector<MachineOperand, 4> Cond;
+      MachineInstr *Term = MBB.getFirstTerminator();
+      if (Term != MBB.end() && isPredicated(Term) &&
+          !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) {
+        MachineBasicBlock *NextBB =
+          std::next(MachineFunction::iterator(&MBB));
+        if (NewTBB == NextBB) {
+          ReverseBranchCondition(Cond);
+          RemoveBranch(MBB);
+          return InsertBranch(MBB, TBB, nullptr, Cond, DL);
         }
-        BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
-      } else {
-        BuildMI(&MBB, DL,
-                get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
       }
-      return 1;
+      BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+    } else if (isEndLoopN(Cond[0].getImm())) {
+      int EndLoopOp = Cond[0].getImm();
+      assert(Cond[1].isMBB());
+      // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+      // Check for it, and change the BB target if needed.
+      SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+      MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+      assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+      Loop->getOperand(0).setMBB(TBB);
+      // Add the ENDLOOP after the finding the LOOP0.
+      BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+    } else if (isNewValueJump(Cond[0].getImm())) {
+      assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump");
+      // New value jump
+      // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset)
+      // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset)
+      unsigned Flags1 = getUndefRegState(Cond[1].isUndef());
+      DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber(););
+      if (Cond[2].isReg()) {
+        unsigned Flags2 = getUndefRegState(Cond[2].isUndef());
+        BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+          addReg(Cond[2].getReg(), Flags2).addMBB(TBB);
+      } else if(Cond[2].isImm()) {
+        BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+          addImm(Cond[2].getImm()).addMBB(TBB);
+      } else
+        llvm_unreachable("Invalid condition for branching");
+    } else {
+      assert((Cond.size() == 2) && "Malformed cond vector");
+      const MachineOperand &RO = Cond[1];
+      unsigned Flags = getUndefRegState(RO.isUndef());
+      BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
     }
+    return 1;
+  }
+  assert((!Cond.empty()) &&
+         "Cond. cannot be empty when multiple branchings are required");
+  assert((!isNewValueJump(Cond[0].getImm())) &&
+         "NV-jump cannot be inserted with another branch");
+  // Special case for hardware loops.  The condition is a basic block.
+  if (isEndLoopN(Cond[0].getImm())) {
+    int EndLoopOp = Cond[0].getImm();
+    assert(Cond[1].isMBB());
+    // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+    // Check for it, and change the BB target if needed.
+    SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+    MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+    assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+    Loop->getOperand(0).setMBB(TBB);
+    // Add the ENDLOOP after the finding the LOOP0.
+    BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+  } else {
+    const MachineOperand &RO = Cond[1];
+    unsigned Flags = getUndefRegState(RO.isUndef());
+    BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+  }
+  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
 
-    BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
-    BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
-
-    return 2;
+  return 2;
 }
 
 
+/// This function can analyze one/two way branching only and should (mostly) be
+/// called by target independent side.
+/// First entry is always the opcode of the branching instruction, except when
+/// the Cond vector is supposed to be empty, e.g., when AnalyzeBranch fails, a
+/// BB with only unconditional jump. Subsequent entries depend upon the opcode,
+/// e.g. Jump_c p will have
+/// Cond[0] = Jump_c
+/// Cond[1] = p
+/// HW-loop ENDLOOP:
+/// Cond[0] = ENDLOOP
+/// Cond[1] = MBB
+/// New value jump:
+/// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode
+/// Cond[1] = R
+/// Cond[2] = Imm
+/// @note Related function is \fn findInstrPredicate which fills in
+/// Cond. vector when a predicated instruction is passed to it.
+/// We follow same protocol in that case too.
+///
 bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                      MachineBasicBlock *&TBB,
-                                 MachineBasicBlock *&FBB,
-                                 SmallVectorImpl<MachineOperand> &Cond,
-                                 bool AllowModify) const {
+                                     MachineBasicBlock *&FBB,
+                                     SmallVectorImpl<MachineOperand> &Cond,
+                                     bool AllowModify) const {
   TBB = nullptr;
   FBB = nullptr;
+  Cond.clear();
 
   // If the block has no terminators, it just falls into the block after it.
   MachineBasicBlock::instr_iterator I = MBB.instr_end();
@@ -200,6 +302,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   do {
     --I;
     if (I->isEHLabel())
+      // Don't analyze EH branches.
       return true;
   } while (I != MBB.instr_begin());
 
@@ -211,9 +314,11 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       return false;
     --I;
   }
-
-  // Delete the JMP if it's equivalent to a fall-through.
-  if (AllowModify && I->getOpcode() == Hexagon::J2_jump &&
+  
+  bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump &&
+                     I->getOperand(0).isMBB();
+  // Delete the J2_jump if it's equivalent to a fall-through.
+  if (AllowModify && JumpToBlock &&
       MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
     DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
     I->eraseFromParent();
@@ -243,9 +348,17 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   } while(I);
 
   int LastOpcode = LastInst->getOpcode();
+  int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0;
+  // If the branch target is not a basic block, it could be a tail call.
+  // (It is, if the target is a function.)
+  if (LastOpcode == Hexagon::J2_jump && !LastInst->getOperand(0).isMBB())
+    return true;
+  if (SecLastOpcode == Hexagon::J2_jump &&
+      !SecondLastInst->getOperand(0).isMBB())
+    return true;
 
   bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode);
-  bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode);
+  bool LastOpcodeHasNVJump = isNewValueJump(LastInst);
 
   // If there is only one terminator instruction, process it.
   if (LastInst && !SecondLastInst) {
@@ -253,32 +366,50 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       TBB = LastInst->getOperand(0).getMBB();
       return false;
     }
-    if (LastOpcode == Hexagon::ENDLOOP0) {
+    if (isEndLoopN(LastOpcode)) {
       TBB = LastInst->getOperand(0).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
       Cond.push_back(LastInst->getOperand(0));
       return false;
     }
     if (LastOpcodeHasJMP_c) {
       TBB = LastInst->getOperand(1).getMBB();
-      if (LastOpcodeHasNot) {
-        Cond.push_back(MachineOperand::CreateImm(0));
-      }
+      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
       Cond.push_back(LastInst->getOperand(0));
       return false;
     }
+    // Only supporting rr/ri versions of new-value jumps.
+    if (LastOpcodeHasNVJump && (LastInst->getNumExplicitOperands() == 3)) {
+      TBB = LastInst->getOperand(2).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+      Cond.push_back(LastInst->getOperand(0));
+      Cond.push_back(LastInst->getOperand(1));
+      return false;
+    }
+    DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber()
+                 << " with one jump\n";);
     // Otherwise, don't know what this is.
     return true;
   }
 
-  int SecLastOpcode = SecondLastInst->getOpcode();
-
   bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode);
-  bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode);
+  bool SecLastOpcodeHasNVJump = isNewValueJump(SecondLastInst);
   if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) {
     TBB =  SecondLastInst->getOperand(1).getMBB();
-    if (SecLastOpcodeHasNot)
-      Cond.push_back(MachineOperand::CreateImm(0));
+    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // Only supporting rr/ri versions of new-value jumps.
+  if (SecLastOpcodeHasNVJump &&
+      (SecondLastInst->getNumExplicitOperands() == 3) &&
+      (LastOpcode == Hexagon::J2_jump)) {
+    TBB = SecondLastInst->getOperand(2).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
     Cond.push_back(SecondLastInst->getOperand(0));
+    Cond.push_back(SecondLastInst->getOperand(1));
     FBB = LastInst->getOperand(0).getMBB();
     return false;
   }
@@ -293,48 +424,40 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     return false;
   }
 
-  // If the block ends with an ENDLOOP, and JMP, handle it.
-  if (SecLastOpcode == Hexagon::ENDLOOP0 &&
-      LastOpcode == Hexagon::J2_jump) {
+  // If the block ends with an ENDLOOP, and J2_jump, handle it.
+  if (isEndLoopN(SecLastOpcode) && LastOpcode == Hexagon::J2_jump) {
     TBB = SecondLastInst->getOperand(0).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
     Cond.push_back(SecondLastInst->getOperand(0));
     FBB = LastInst->getOperand(0).getMBB();
     return false;
   }
-
+  DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber()
+               << " with two jumps";);
   // Otherwise, can't handle this.
   return true;
 }
 
-
 unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-  int BOpc   = Hexagon::J2_jump;
-  int BccOpc = Hexagon::J2_jumpt;
-  int BccOpcNot = Hexagon::J2_jumpf;
-
+  DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber());
   MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin()) return 0;
-  --I;
-  if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc &&
-      I->getOpcode() != BccOpcNot)
-    return 0;
-
-  // Remove the branch.
-  I->eraseFromParent();
-
-  I = MBB.end();
-
-  if (I == MBB.begin()) return 1;
-  --I;
-  if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot)
-    return 1;
-
-  // Remove the branch.
-  I->eraseFromParent();
-  return 2;
+  unsigned Count = 0;
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    // Only removing branches from end of MBB.
+    if (!I->isBranch())
+      return Count;
+    if (Count && (I->getOpcode() == Hexagon::J2_jump))
+      llvm_unreachable("Malformed basic block: unconditional branch not last");
+    MBB.erase(&MBB.back());
+    I = MBB.end();
+    ++Count;
+  }
+  return Count;
 }
 
-
 /// \brief For a comparison instruction, return the source registers in
 /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
 /// compares against in CmpValue. Return true if the comparison instruction
@@ -346,33 +469,39 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
 
   // Set mask and the first source register.
   switch (Opc) {
-    case Hexagon::C2_cmpeqp:
-    case Hexagon::C2_cmpeqi:
     case Hexagon::C2_cmpeq:
+    case Hexagon::C2_cmpeqp:
+    case Hexagon::C2_cmpgt:
     case Hexagon::C2_cmpgtp:
-    case Hexagon::C2_cmpgtup:
-    case Hexagon::C2_cmpgtui:
     case Hexagon::C2_cmpgtu:
+    case Hexagon::C2_cmpgtup:
+    case Hexagon::C4_cmpneq:
+    case Hexagon::C4_cmplte:
+    case Hexagon::C4_cmplteu:
+    case Hexagon::C2_cmpeqi:
     case Hexagon::C2_cmpgti:
-    case Hexagon::C2_cmpgt:
+    case Hexagon::C2_cmpgtui:
+    case Hexagon::C4_cmpneqi:
+    case Hexagon::C4_cmplteui:
+    case Hexagon::C4_cmpltei:
       SrcReg = MI->getOperand(1).getReg();
       Mask = ~0;
       break;
-    case Hexagon::CMPbEQri_V4:
-    case Hexagon::CMPbEQrr_sbsb_V4:
-    case Hexagon::CMPbEQrr_ubub_V4:
-    case Hexagon::CMPbGTUri_V4:
-    case Hexagon::CMPbGTUrr_V4:
-    case Hexagon::CMPbGTrr_V4:
+    case Hexagon::A4_cmpbeq:
+    case Hexagon::A4_cmpbgt:
+    case Hexagon::A4_cmpbgtu:
+    case Hexagon::A4_cmpbeqi:
+    case Hexagon::A4_cmpbgti:
+    case Hexagon::A4_cmpbgtui:
       SrcReg = MI->getOperand(1).getReg();
       Mask = 0xFF;
       break;
-    case Hexagon::CMPhEQri_V4:
-    case Hexagon::CMPhEQrr_shl_V4:
-    case Hexagon::CMPhEQrr_xor_V4:
-    case Hexagon::CMPhGTUri_V4:
-    case Hexagon::CMPhGTUrr_V4:
-    case Hexagon::CMPhGTrr_shl_V4:
+    case Hexagon::A4_cmpheq:
+    case Hexagon::A4_cmphgt:
+    case Hexagon::A4_cmphgtu:
+    case Hexagon::A4_cmpheqi:
+    case Hexagon::A4_cmphgti:
+    case Hexagon::A4_cmphgtui:
       SrcReg = MI->getOperand(1).getReg();
       Mask = 0xFFFF;
       break;
@@ -380,30 +509,36 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
 
   // Set the value/second source register.
   switch (Opc) {
-    case Hexagon::C2_cmpeqp:
     case Hexagon::C2_cmpeq:
+    case Hexagon::C2_cmpeqp:
+    case Hexagon::C2_cmpgt:
     case Hexagon::C2_cmpgtp:
-    case Hexagon::C2_cmpgtup:
     case Hexagon::C2_cmpgtu:
-    case Hexagon::C2_cmpgt:
-    case Hexagon::CMPbEQrr_sbsb_V4:
-    case Hexagon::CMPbEQrr_ubub_V4:
-    case Hexagon::CMPbGTUrr_V4:
-    case Hexagon::CMPbGTrr_V4:
-    case Hexagon::CMPhEQrr_shl_V4:
-    case Hexagon::CMPhEQrr_xor_V4:
-    case Hexagon::CMPhGTUrr_V4:
-    case Hexagon::CMPhGTrr_shl_V4:
+    case Hexagon::C2_cmpgtup:
+    case Hexagon::A4_cmpbeq:
+    case Hexagon::A4_cmpbgt:
+    case Hexagon::A4_cmpbgtu:
+    case Hexagon::A4_cmpheq:
+    case Hexagon::A4_cmphgt:
+    case Hexagon::A4_cmphgtu:
+    case Hexagon::C4_cmpneq:
+    case Hexagon::C4_cmplte:
+    case Hexagon::C4_cmplteu:
       SrcReg2 = MI->getOperand(2).getReg();
       return true;
 
     case Hexagon::C2_cmpeqi:
     case Hexagon::C2_cmpgtui:
     case Hexagon::C2_cmpgti:
-    case Hexagon::CMPbEQri_V4:
-    case Hexagon::CMPbGTUri_V4:
-    case Hexagon::CMPhEQri_V4:
-    case Hexagon::CMPhGTUri_V4:
+    case Hexagon::C4_cmpneqi:
+    case Hexagon::C4_cmplteui:
+    case Hexagon::C4_cmpltei:
+    case Hexagon::A4_cmpbeqi:
+    case Hexagon::A4_cmpbgti:
+    case Hexagon::A4_cmpbgtui:
+    case Hexagon::A4_cmpheqi:
+    case Hexagon::A4_cmphgti:
+    case Hexagon::A4_cmphgtui:
       SrcReg2 = 0;
       Value = MI->getOperand(2).getImm();
       return true;
@@ -553,12 +688,101 @@ void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                                  SmallVectorImpl<MachineInstr*> &NewMIs) const {
   llvm_unreachable("Unimplemented");
 }
+bool
+HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+  const HexagonRegisterInfo &TRI = getRegisterInfo();
+  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MachineBasicBlock &MBB = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned Opc = MI->getOpcode();
+
+  switch (Opc) {
+    case Hexagon::ALIGNA:
+      BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg())
+          .addReg(TRI.getFrameRegister())
+          .addImm(-MI->getOperand(1).getImm());
+      MBB.erase(MI);
+      return true;
+    case Hexagon::TFR_PdTrue: {
+      unsigned Reg = MI->getOperand(0).getReg();
+      BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg)
+        .addReg(Reg, RegState::Undef)
+        .addReg(Reg, RegState::Undef);
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::TFR_PdFalse: {
+      unsigned Reg = MI->getOperand(0).getReg();
+      BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg)
+        .addReg(Reg, RegState::Undef)
+        .addReg(Reg, RegState::Undef);
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::VMULW: {
+      // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies.
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned Src1Reg = MI->getOperand(1).getReg();
+      unsigned Src2Reg = MI->getOperand(2).getReg();
+      unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+      unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+      unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+      unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+              TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+          .addReg(Src2SubHi);
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+              TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+          .addReg(Src2SubLo);
+      MBB.erase(MI);
+      MRI.clearKillFlags(Src1SubHi);
+      MRI.clearKillFlags(Src1SubLo);
+      MRI.clearKillFlags(Src2SubHi);
+      MRI.clearKillFlags(Src2SubLo);
+      return true;
+    }
+    case Hexagon::VMULW_ACC: {
+      // Expand 64-bit vector multiply with addition into 2 scalar multiplies.
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned Src1Reg = MI->getOperand(1).getReg();
+      unsigned Src2Reg = MI->getOperand(2).getReg();
+      unsigned Src3Reg = MI->getOperand(3).getReg();
+      unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+      unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+      unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+      unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+      unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
+      unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+              TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+          .addReg(Src2SubHi).addReg(Src3SubHi);
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+              TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+          .addReg(Src2SubLo).addReg(Src3SubLo);
+      MBB.erase(MI);
+      MRI.clearKillFlags(Src1SubHi);
+      MRI.clearKillFlags(Src1SubLo);
+      MRI.clearKillFlags(Src2SubHi);
+      MRI.clearKillFlags(Src2SubLo);
+      MRI.clearKillFlags(Src3SubHi);
+      MRI.clearKillFlags(Src3SubLo);
+      return true;
+    }
+    case Hexagon::TCRETURNi:
+      MI->setDesc(get(Hexagon::J2_jump));
+      return true;
+    case Hexagon::TCRETURNr:
+      MI->setDesc(get(Hexagon::J2_jumpr));
+      return true;
+  }
 
+  return false;
+}
 
 MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-                                                    MachineInstr* MI,
-                                          const SmallVectorImpl<unsigned> &Ops,
-                                                    int FI) const {
+                                                      MachineInstr *MI,
+                                                      ArrayRef<unsigned> Ops,
+                                                      int FI) const {
   // Hexagon_TODO: Implement.
   return nullptr;
 }
@@ -582,10 +806,6 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
 }
 
 bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
-  // Constant extenders are allowed only for V4 and above.
-  if (!Subtarget.hasV4TOps())
-    return false;
-
   const MCInstrDesc &MID = MI->getDesc();
   const uint64_t F = MID.TSFlags;
   if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
@@ -635,6 +855,16 @@ bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
   return false;
 }
 
+bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
+bool HexagonInstrInfo::isNewValue(Opcode_t Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
 bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
   return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
 }
@@ -649,7 +879,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
 
   switch(Opc) {
   case Hexagon::A2_tfrsi:
-    return isInt<12>(MI->getOperand(1).getImm());
+    return (isOperandExtended(MI, 1) && isConstExtended(MI)) || isInt<12>(MI->getOperand(1).getImm());
 
   case Hexagon::S2_storerd_io:
     return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
@@ -700,7 +930,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
     return (isUInt<6>(MI->getOperand(1).getImm()) &&
             isInt<6>(MI->getOperand(2).getImm()));
 
-  case Hexagon::ADD_ri:
+  case Hexagon::A2_addi:
     return isInt<8>(MI->getOperand(2).getImm());
 
   case Hexagon::A2_aslh:
@@ -709,7 +939,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
   case Hexagon::A2_sxth:
   case Hexagon::A2_zxtb:
   case Hexagon::A2_zxth:
-    return Subtarget.hasV4TOps();
+    return true;
   }
 
   return true;
@@ -755,8 +985,7 @@ bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
   return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask);
 }
 
-int HexagonInstrInfo::
-getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
+int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
   enum Hexagon::PredSense inPredSense;
   inPredSense = invertPredicate ? Hexagon::PredSense_false :
                                   Hexagon::PredSense_true;
@@ -774,14 +1003,6 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
     return !invertPredicate ? Hexagon::C2_ccombinewt :
                               Hexagon::C2_ccombinewf;
 
-  // Word.
-  case Hexagon::STriw_f:
-    return !invertPredicate ? Hexagon::S2_pstorerit_io:
-                              Hexagon::S2_pstorerif_io;
-  case Hexagon::STriw_indexed_f:
-    return !invertPredicate ? Hexagon::S2_pstorerit_io:
-                              Hexagon::S2_pstorerif_io;
-
   // DEALLOC_RETURN.
   case Hexagon::L4_return:
     return !invertPredicate ? Hexagon::L4_return_t:
@@ -794,148 +1015,51 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
 bool HexagonInstrInfo::
 PredicateInstruction(MachineInstr *MI,
                      const SmallVectorImpl<MachineOperand> &Cond) const {
+  if (Cond.empty() || isEndLoopN(Cond[0].getImm())) {
+    DEBUG(dbgs() << "\nCannot predicate:"; MI->dump(););
+    return false;
+  }
   int Opc = MI->getOpcode();
   assert (isPredicable(MI) && "Expected predicable instruction");
-  bool invertJump = (!Cond.empty() && Cond[0].isImm() &&
-                     (Cond[0].getImm() == 0));
-
-  // This will change MI's opcode to its predicate version.
-  // However, its operand list is still the old one, i.e. the
-  // non-predicate one.
-  MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
-
-  int oper = -1;
-  unsigned int GAIdx = 0;
-
-  // Indicates whether the current MI has a GlobalAddress operand
-  bool hasGAOpnd = false;
-  std::vector<MachineOperand> tmpOpnds;
-
-  // Indicates whether we need to shift operands to right.
-  bool needShift = true;
-
-  // The predicate is ALWAYS the FIRST input operand !!!
-  if (MI->getNumOperands() == 0) {
-    // The non-predicate version of MI does not take any operands,
-    // i.e. no outs and no ins. In this condition, the predicate
-    // operand will be directly placed at Operands[0]. No operand
-    // shift is needed.
-    // Example: BARRIER
-    needShift = false;
-    oper = -1;
-  }
-  else if (   MI->getOperand(MI->getNumOperands()-1).isReg()
-           && MI->getOperand(MI->getNumOperands()-1).isDef()
-           && !MI->getOperand(MI->getNumOperands()-1).isImplicit()) {
-    // The non-predicate version of MI does not have any input operands.
-    // In this condition, we extend the length of Operands[] by one and
-    // copy the original last operand to the newly allocated slot.
-    // At this moment, it is just a place holder. Later, we will put
-    // predicate operand directly into it. No operand shift is needed.
-    // Example: r0=BARRIER (this is a faked insn used here for illustration)
-    MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
-    needShift = false;
-    oper = MI->getNumOperands() - 2;
-  }
-  else {
-    // We need to right shift all input operands by one. Duplicate the
-    // last operand into the newly allocated slot.
-    MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
-  }
-
-  if (needShift)
-  {
-    // Operands[ MI->getNumOperands() - 2 ] has been copied into
-    // Operands[ MI->getNumOperands() - 1 ], so we start from
-    // Operands[ MI->getNumOperands() - 3 ].
-    // oper is a signed int.
-    // It is ok if "MI->getNumOperands()-3" is -3, -2, or -1.
-    for (oper = MI->getNumOperands() - 3; oper >= 0; --oper)
-    {
-      MachineOperand &MO = MI->getOperand(oper);
-
-      // Opnd[0] Opnd[1] Opnd[2] Opnd[3] Opnd[4]   Opnd[5]   Opnd[6]   Opnd[7]
-      // <Def0>  <Def1>  <Use0>  <Use1>  <ImpDef0> <ImpDef1> <ImpUse0> <ImpUse1>
-      //               /\~
-      //              /||\~
-      //               ||
-      //        Predicate Operand here
-      if (MO.isReg() && !MO.isUse() && !MO.isImplicit()) {
-        break;
-      }
-      if (MO.isReg()) {
-        MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
-                                                MO.isImplicit(), MO.isKill(),
-                                                MO.isDead(), MO.isUndef(),
-                                                MO.isDebug());
-      }
-      else if (MO.isImm()) {
-        MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
-      }
-      else if (MO.isGlobal()) {
-        // MI can not have more than one GlobalAddress operand.
-        assert(hasGAOpnd == false && "MI can only have one GlobalAddress opnd");
-
-        // There is no member function called "ChangeToGlobalAddress" in the
-        // MachineOperand class (not like "ChangeToRegister" and
-        // "ChangeToImmediate"). So we have to remove them from Operands[] list
-        // first, and then add them back after we have inserted the predicate
-        // operand. tmpOpnds[] is to remember these operands before we remove
-        // them.
-        tmpOpnds.push_back(MO);
-
-        // Operands[oper] is a GlobalAddress operand;
-        // Operands[oper+1] has been copied into Operands[oper+2];
-        hasGAOpnd = true;
-        GAIdx = oper;
-        continue;
-      }
-      else {
-        llvm_unreachable("Unexpected operand type");
-      }
-    }
+  bool invertJump = predOpcodeHasNot(Cond);
+
+  // We have to predicate MI "in place", i.e. after this function returns,
+  // MI will need to be transformed into a predicated form. To avoid com-
+  // plicated manipulations with the operands (handling tied operands,
+  // etc.), build a new temporary instruction, then overwrite MI with it.
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned PredOpc = getCondOpcode(Opc, invertJump);
+  MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc));
+  unsigned NOp = 0, NumOps = MI->getNumOperands();
+  while (NOp < NumOps) {
+    MachineOperand &Op = MI->getOperand(NOp);
+    if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
+      break;
+    T.addOperand(Op);
+    NOp++;
   }
 
-  int regPos = invertJump ? 1 : 0;
-  MachineOperand PredMO = Cond[regPos];
+  unsigned PredReg, PredRegPos, PredRegFlags;
+  bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags);
+  (void)GotPredReg;
+  assert(GotPredReg);
+  T.addReg(PredReg, PredRegFlags);
+  while (NOp < NumOps)
+    T.addOperand(MI->getOperand(NOp++));
 
-  // [oper] now points to the last explicit Def. Predicate operand must be
-  // located at [oper+1]. See diagram above.
-  // This assumes that the predicate is always the first operand,
-  // i.e. Operands[0+numResults], in the set of inputs
-  // It is better to have an assert here to check this. But I don't know how
-  // to write this assert because findFirstPredOperandIdx() would return -1
-  if (oper < -1) oper = -1;
+  MI->setDesc(get(PredOpc));
+  while (unsigned n = MI->getNumOperands())
+    MI->RemoveOperand(n-1);
+  for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i)
+    MI->addOperand(T->getOperand(i));
 
-  MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
-                                          PredMO.isImplicit(), false,
-                                          PredMO.isDead(), PredMO.isUndef(),
-                                          PredMO.isDebug());
+  MachineBasicBlock::instr_iterator TI = &*T;
+  B.erase(TI);
 
-  MachineRegisterInfo &RegInfo = MI->getParent()->getParent()->getRegInfo();
-  RegInfo.clearKillFlags(PredMO.getReg());
-
-  if (hasGAOpnd)
-  {
-    unsigned int i;
-
-    // Operands[GAIdx] is the original GlobalAddress operand, which is
-    // already copied into tmpOpnds[0].
-    // Operands[GAIdx] now stores a copy of Operands[GAIdx-1]
-    // Operands[GAIdx+1] has already been copied into Operands[GAIdx+2],
-    // so we start from [GAIdx+2]
-    for (i = GAIdx + 2; i < MI->getNumOperands(); ++i)
-      tmpOpnds.push_back(MI->getOperand(i));
-
-    // Remove all operands in range [ (GAIdx+1) ... (MI->getNumOperands()-1) ]
-    // It is very important that we always remove from the end of Operands[]
-    // MI->getNumOperands() is at least 2 if program goes to here.
-    for (i = MI->getNumOperands() - 1; i > GAIdx; --i)
-      MI->RemoveOperand(i);
-
-    for (i = 0; i < tmpOpnds.size(); ++i)
-      MI->addOperand(tmpOpnds[i]);
-  }
+  MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
+  MRI.clearKillFlags(PredReg);
 
   return true;
 }
@@ -1014,12 +1138,10 @@ bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
 
 // Returns true, if a ST insn can be promoted to a new-value store.
 bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const {
-  const HexagonRegisterInfo& QRI = getRegisterInfo();
   const uint64_t F = MI->getDesc().TSFlags;
 
   return ((F >> HexagonII::mayNVStorePos) &
-           HexagonII::mayNVStoreMask &
-           QRI.Subtarget.hasV4TOps());
+           HexagonII::mayNVStoreMask);
 }
 
 bool
@@ -1050,15 +1172,20 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
 
 //
 // We indicate that we want to reverse the branch by
-// inserting a 0 at the beginning of the Cond vector.
+// inserting the reversed branching opcode.
 //
-bool HexagonInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
-  if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
-    Cond.erase(Cond.begin());
-  } else {
-    Cond.insert(Cond.begin(), MachineOperand::CreateImm(0));
-  }
+bool HexagonInstrInfo::ReverseBranchCondition(
+    SmallVectorImpl<MachineOperand> &Cond) const {
+  if (Cond.empty())
+    return true;
+  assert(Cond[0].isImm() && "First entry in the cond vector not imm-val");
+  Opcode_t opcode = Cond[0].getImm();
+  //unsigned temp;
+  assert(get(opcode).isBranch() && "Should be a branching condition.");
+  if (isEndLoopN(opcode))
+    return true;
+  Opcode_t NewOpcode = getInvertedPredicatedOpcode(opcode);
+  Cond[0].setImm(NewOpcode);
   return false;
 }
 
@@ -1084,10 +1211,10 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
 }
 
 
-bool HexagonInstrInfo::
-isValidOffset(const int Opcode, const int Offset) const {
+bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
+      bool Extend) const {
   // This function is to check whether the "Offset" is in the correct range of
-  // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
+  // the given "Opcode". If "Offset" is not in the correct range, "A2_addi" is
   // inserted to calculate the final address. Due to this reason, the function
   // assumes that the "Offset" has correct alignment.
   // We used to assert if the offset was not properly aligned, however,
@@ -1095,19 +1222,23 @@ isValidOffset(const int Opcode, const int Offset) const {
   // problem, and we need to allow for it. The front end warns of such
   // misaligns with respect to load size.
 
-  switch(Opcode) {
+  switch (Opcode) {
+  case Hexagon::J2_loop0i:
+  case Hexagon::J2_loop1i:
+    return isUInt<10>(Offset);
+  }
+
+  if (Extend)
+    return true;
 
+  switch (Opcode) {
   case Hexagon::L2_loadri_io:
-  case Hexagon::LDriw_f:
   case Hexagon::S2_storeri_io:
-  case Hexagon::STriw_f:
     return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
       (Offset <= Hexagon_MEMW_OFFSET_MAX);
 
   case Hexagon::L2_loadrd_io:
-  case Hexagon::LDrid_f:
   case Hexagon::S2_storerd_io:
-  case Hexagon::STrid_f:
     return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
       (Offset <= Hexagon_MEMD_OFFSET_MAX);
 
@@ -1123,8 +1254,7 @@ isValidOffset(const int Opcode, const int Offset) const {
     return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
       (Offset <= Hexagon_MEMB_OFFSET_MAX);
 
-  case Hexagon::ADD_ri:
-  case Hexagon::TFR_FI:
+  case Hexagon::A2_addi:
     return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
       (Offset <= Hexagon_ADDI_OFFSET_MAX);
 
@@ -1158,10 +1288,8 @@ isValidOffset(const int Opcode, const int Offset) const {
   case Hexagon::LDriw_pred:
     return true;
 
-  case Hexagon::J2_loop0i:
-    return isUInt<10>(Offset);
-
-  // INLINEASM is very special.
+  case Hexagon::TFR_FI:
+  case Hexagon::TFR_FIA:
   case Hexagon::INLINEASM:
     return true;
   }
@@ -1324,8 +1452,8 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
     case Hexagon::A4_pzxthfnew:
     case Hexagon::A4_pzxtht:
     case Hexagon::A4_pzxthtnew:
-    case Hexagon::ADD_ri_cPt:
-    case Hexagon::ADD_ri_cNotPt:
+    case Hexagon::A2_paddit:
+    case Hexagon::A2_paddif:
     case Hexagon::C2_ccombinewt:
     case Hexagon::C2_ccombinewf:
       return true;
@@ -1334,7 +1462,6 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
 
 bool HexagonInstrInfo::
 isConditionalLoad (const MachineInstr* MI) const {
-  const HexagonRegisterInfo& QRI = getRegisterInfo();
   switch (MI->getOpcode())
   {
     default: return false;
@@ -1350,7 +1477,6 @@ isConditionalLoad (const MachineInstr* MI) const {
     case Hexagon::L2_ploadruhf_io:
     case Hexagon::L2_ploadrubt_io:
     case Hexagon::L2_ploadrubf_io:
-      return true;
     case Hexagon::L2_ploadrdt_pi:
     case Hexagon::L2_ploadrdf_pi:
     case Hexagon::L2_ploadrit_pi:
@@ -1363,7 +1489,6 @@ isConditionalLoad (const MachineInstr* MI) const {
     case Hexagon::L2_ploadruhf_pi:
     case Hexagon::L2_ploadrubt_pi:
     case Hexagon::L2_ploadrubf_pi:
-      return QRI.Subtarget.hasV4TOps();
     case Hexagon::L4_ploadrdt_rr:
     case Hexagon::L4_ploadrdf_rr:
     case Hexagon::L4_ploadrbt_rr:
@@ -1376,7 +1501,7 @@ isConditionalLoad (const MachineInstr* MI) const {
     case Hexagon::L4_ploadruhf_rr:
     case Hexagon::L4_ploadrit_rr:
     case Hexagon::L4_ploadrif_rr:
-      return QRI.Subtarget.hasV4TOps();
+      return true;
   }
 }
 
@@ -1416,7 +1541,6 @@ isConditionalLoad (const MachineInstr* MI) const {
 // is not valid for new-value stores.
 bool HexagonInstrInfo::
 isConditionalStore (const MachineInstr* MI) const {
-  const HexagonRegisterInfo& QRI = getRegisterInfo();
   switch (MI->getOpcode())
   {
     default: return false;
@@ -1450,7 +1574,6 @@ isConditionalStore (const MachineInstr* MI) const {
     case Hexagon::S4_pstorerif_rr:
     case Hexagon::S2_pstorerit_pi:
     case Hexagon::S2_pstorerif_pi:
-      return QRI.Subtarget.hasV4TOps();
 
     // V4 global address store before promoting to dot new.
     case Hexagon::S4_pstorerdt_abs:
@@ -1461,7 +1584,7 @@ isConditionalStore (const MachineInstr* MI) const {
     case Hexagon::S4_pstorerhf_abs:
     case Hexagon::S4_pstorerit_abs:
     case Hexagon::S4_pstorerif_abs:
-      return QRI.Subtarget.hasV4TOps();
+      return true;
 
     // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
     // from the "Conditional Store" list. Because a predicated new value store
@@ -1500,13 +1623,12 @@ bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
   return false;
 }
 
-bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
-  return (getAddrMode(MI) == HexagonII::PostInc);
+bool HexagonInstrInfo::isNewValueJump(Opcode_t Opcode) const {
+  return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
 }
 
-bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
-  return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
+  return (getAddrMode(MI) == HexagonII::PostInc);
 }
 
 // Returns true, if any one of the operands is a dot new
@@ -1548,22 +1670,29 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
 
   switch (MI->getOpcode()) {
   default: llvm_unreachable("Unknown .new type");
-  // store new value byte
-  case Hexagon::STrib_shl_V4:
-    return Hexagon::STrib_shl_nv_V4;
+  case Hexagon::S4_storerb_ur:
+    return Hexagon::S4_storerbnew_ur;
 
-  case Hexagon::STrih_shl_V4:
-    return Hexagon::STrih_shl_nv_V4;
+  case Hexagon::S4_storerh_ur:
+    return Hexagon::S4_storerhnew_ur;
 
-  case Hexagon::STriw_f:
-    return Hexagon::S2_storerinew_io;
+  case Hexagon::S4_storeri_ur:
+    return Hexagon::S4_storerinew_ur;
 
-  case Hexagon::STriw_indexed_f:
-    return Hexagon::S4_storerinew_rr;
+  case Hexagon::S2_storerb_pci:
+    return Hexagon::S2_storerb_pci;
 
-  case Hexagon::STriw_shl_V4:
-    return Hexagon::STriw_shl_nv_V4;
+  case Hexagon::S2_storeri_pci:
+    return Hexagon::S2_storeri_pci;
 
+  case Hexagon::S2_storerh_pci:
+    return Hexagon::S2_storerh_pci;
+
+  case Hexagon::S2_storerd_pci:
+    return Hexagon::S2_storerd_pci;
+
+  case Hexagon::S2_storerf_pci:
+    return Hexagon::S2_storerf_pci;
   }
   return 0;
 }
@@ -1652,19 +1781,14 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
   return false;
 }
 
-bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
-
-  // Constant extenders are allowed only for V4 and above.
-  if (!Subtarget.hasV4TOps())
-    return false;
-
+bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
   const uint64_t F = MI->getDesc().TSFlags;
   unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
   if (isExtended) // Instruction must be extended.
     return true;
 
-  unsigned isExtendable = (F >> HexagonII::ExtendablePos)
-                          & HexagonII::ExtendableMask;
+  unsigned isExtendable =
+    (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
   if (!isExtendable)
     return false;
 
@@ -1685,7 +1809,8 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
   // We currently only handle isGlobal() because it is the only kind of
   // object we are going to end up with here for now.
   // In the future we probably should add isSymbol(), etc.
-  if (MO.isGlobal() || MO.isSymbol())
+  if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() ||
+      MO.isJTI() || MO.isCPI())
     return true;
 
   // If the extendable operand is not 'Immediate' type, the instruction should
@@ -1699,6 +1824,27 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
   return (ImmValue < MinValue || ImmValue > MaxValue);
 }
 
+// Return the number of bytes required to encode the instruction.
+// Hexagon instructions are fixed length, 4 bytes, unless they
+// use a constant extender, which requires another 4 bytes.
+// For debug instructions and prolog labels, return 0.
+unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
+
+  if (MI->isDebugValue() || MI->isPosition())
+    return 0;
+
+  unsigned Size = MI->getDesc().getSize();
+  if (!Size)
+    // Assume the default insn size in case it cannot be determined
+    // for whatever reason.
+    Size = HEXAGON_INSTR_SIZE;
+
+  if (isConstExtended(MI) || isExtended(MI))
+    Size += HEXAGON_INSTR_SIZE;
+
+  return Size;
+}
+
 // Returns the opcode to use when converting MI, which is a conditional jump,
 // into a conditional instruction which uses the .new value of the predicate.
 // We also use branch probabilities to add a hint to the jump.
@@ -1730,10 +1876,6 @@ HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI,
 // Returns true if a particular operand is extendable for an instruction.
 bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
                                          unsigned short OperandNum) const {
-  // Constant extenders are allowed only for V4 and above.
-  if (!Subtarget.hasV4TOps())
-    return false;
-
   const uint64_t F = MI->getDesc().TSFlags;
 
   return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
@@ -1841,8 +1983,36 @@ bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const {
          (Opcode == Hexagon::J2_jumpf);
 }
 
-bool HexagonInstrInfo::PredOpcodeHasNot(Opcode_t Opcode) const {
-  return (Opcode == Hexagon::J2_jumpf) ||
-         (Opcode == Hexagon::J2_jumpfnewpt) ||
-         (Opcode == Hexagon::J2_jumpfnew);
+bool HexagonInstrInfo::predOpcodeHasNot(
+    const SmallVectorImpl<MachineOperand> &Cond) const {
+  if (Cond.empty() || !isPredicated(Cond[0].getImm()))
+    return false;
+  return !isPredicatedTrue(Cond[0].getImm());
+}
+
+bool HexagonInstrInfo::isEndLoopN(Opcode_t Opcode) const {
+  return (Opcode == Hexagon::ENDLOOP0 ||
+          Opcode == Hexagon::ENDLOOP1);
 }
+
+bool HexagonInstrInfo::getPredReg(const SmallVectorImpl<MachineOperand> &Cond,
+                                  unsigned &PredReg, unsigned &PredRegPos,
+                                  unsigned &PredRegFlags) const {
+  if (Cond.empty())
+    return false;
+  assert(Cond.size() == 2);
+  if (isNewValueJump(Cond[0].getImm()) || Cond[1].isMBB()) {
+     DEBUG(dbgs() << "No predregs for new-value jumps/endloop");
+     return false;
+  }
+  PredReg = Cond[1].getReg();
+  PredRegPos = 1;
+  // See IfConversion.cpp why we add RegState::Implicit | RegState::Undef
+  PredRegFlags = 0;
+  if (Cond[1].isImplicit())
+    PredRegFlags = RegState::Implicit;
+  if (Cond[1].isUndef())
+    PredRegFlags |= RegState::Undef;
+  return true;
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 6acfbec..0239cab 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -1,3 +1,4 @@
+
 //===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -26,14 +27,15 @@
 namespace llvm {
 
 struct EVT;
-
+class HexagonSubtarget;
 class HexagonInstrInfo : public HexagonGenInstrInfo {
   virtual void anchor();
   const HexagonRegisterInfo RI;
   const HexagonSubtarget &Subtarget;
-  typedef unsigned Opcode_t;
 
 public:
+  typedef unsigned Opcode_t;
+
   explicit HexagonInstrInfo(HexagonSubtarget &ST);
 
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
@@ -102,15 +104,21 @@ public:
                        const TargetRegisterClass *RC,
                        SmallVectorImpl<MachineInstr*> &NewMIs) const;
 
-  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                      MachineInstr* MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
+  /// expandPostRAPseudo - This function is called for all pseudo instructions
+  /// that remain after register allocation. Many pseudo instructions are
+  /// created to help register allocation. This is the place to convert them
+  /// into real instructions. The target can edit MI in place, or it can insert
+  /// new instructions and erase MI. The function should return true if
+  /// anything was changed.
+  bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                                      ArrayRef<unsigned> Ops,
                                       int FrameIndex) const override;
 
-  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                      MachineInstr* MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
-                                      MachineInstr* LoadMI) const override {
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                                      ArrayRef<unsigned> Ops,
+                                      MachineInstr *LoadMI) const override {
     return nullptr;
   }
 
@@ -154,7 +162,7 @@ public:
   bool isSchedulingBoundary(const MachineInstr *MI,
                             const MachineBasicBlock *MBB,
                             const MachineFunction &MF) const override;
-  bool isValidOffset(const int Opcode, const int Offset) const;
+  bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const;
   bool isValidAutoIncImm(const EVT VT, const int Offset) const;
   bool isMemOp(const MachineInstr *MI) const;
   bool isSpillPredRegOp(const MachineInstr *MI) const;
@@ -178,6 +186,7 @@ public:
   bool isConditionalStore(const MachineInstr* MI) const;
   bool isNewValueInst(const MachineInstr* MI) const;
   bool isNewValue(const MachineInstr* MI) const;
+  bool isNewValue(Opcode_t Opcode) const;
   bool isDotNewInst(const MachineInstr* MI) const;
   int GetDotOldOp(const int opc) const;
   int GetDotNewOp(const MachineInstr* MI) const;
@@ -193,11 +202,13 @@ public:
   bool isNewValueStore(const MachineInstr* MI) const;
   bool isNewValueStore(unsigned Opcode) const;
   bool isNewValueJump(const MachineInstr* MI) const;
+  bool isNewValueJump(Opcode_t Opcode) const;
   bool isNewValueJumpCandidate(const MachineInstr *MI) const;
 
 
   void immediateExtend(MachineInstr *MI) const;
-  bool isConstExtended(MachineInstr *MI) const;
+  bool isConstExtended(const MachineInstr *MI) const;
+  unsigned getSize(const MachineInstr *MI) const;  
   int getDotNewPredJumpOp(MachineInstr *MI,
                       const MachineBranchProbabilityInfo *MBPI) const;
   unsigned getAddrMode(const MachineInstr* MI) const;
@@ -209,10 +220,12 @@ public:
   bool NonExtEquivalentExists (const MachineInstr *MI) const;
   short getNonExtOpcode(const MachineInstr *MI) const;
   bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
-  bool PredOpcodeHasNot(Opcode_t Opcode) const;
-
-private:
-  int getMatchingCondBranchOpcode(int Opc, bool sense) const;
+  bool predOpcodeHasNot(const SmallVectorImpl<MachineOperand> &Cond) const;
+  bool isEndLoopN(Opcode_t Opcode) const;
+  bool getPredReg(const SmallVectorImpl<MachineOperand> &Cond,
+                  unsigned &PredReg, unsigned &PredRegPos,
+                  unsigned &PredRegFlags) const;
+  int getCondOpcode(int Opc, bool sense) const;
 
 };
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
index 7ce65f3..3b32c10 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -29,8 +29,36 @@ def F64 : PatLeaf<(f64 DoubleRegs:$R)>;
 // 64-bit value.
 def LoReg: OutPatFrag<(ops node:$Rs),
                       (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>;
+def HiReg: OutPatFrag<(ops node:$Rs),
+                      (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>;
 
-//===----------------------------------------------------------------------===//
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
+   // Return the byte immediate const-1 as an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformSToSM1Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-2.
+def DEC2_CONST_SIGNED : SDNodeXForm<imm, [{
+   // Return the byte immediate const-2 as an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformSToSM2Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-3.
+def DEC3_CONST_SIGNED : SDNodeXForm<imm, [{
+   // Return the byte immediate const-3 as an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformSToSM3Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
+   // Return the byte immediate const-1 as an SDNode.
+   uint32_t imm = N->getZExtValue();
+   return XformUToUM1Imm(imm, SDLoc(N));
+}]>;
 
 //===----------------------------------------------------------------------===//
 // Compare
@@ -76,10 +104,16 @@ def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>;
 //===----------------------------------------------------------------------===//
 // ALU32/ALU +
 //===----------------------------------------------------------------------===//
+// Add.
+
+def SDT_Int32Leaf  : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
+def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
 def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
   [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
 
 def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
+def HexagonPACKHL  : SDNode<"HexagonISD::PACKHL",  SDTHexagonI64I32I32>;
 
 let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in
 class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev,
@@ -140,12 +174,10 @@ class T_ALU32_combineh<string Op1, string Op2, bits<3> MajOp, bits<3> MinOp,
   let AsmString = "$Rd = combine($Rs"#Op1#", $Rt"#Op2#")";
 }
 
-let isCodeGenOnly = 0 in {
 def A2_combine_hh : T_ALU32_combineh<".h", ".h", 0b011, 0b100, 1>;
 def A2_combine_hl : T_ALU32_combineh<".h", ".l", 0b011, 0b101, 1>;
 def A2_combine_lh : T_ALU32_combineh<".l", ".h", 0b011, 0b110, 1>;
 def A2_combine_ll : T_ALU32_combineh<".l", ".l", 0b011, 0b111, 1>;
-}
 
 class T_ALU32_3op_sfx<string mnemonic, string suffix, bits<3> MajOp,
                       bits<3> MinOp, bit OpsRev, bit IsComm>
@@ -153,12 +185,24 @@ class T_ALU32_3op_sfx<string mnemonic, string suffix, bits<3> MajOp,
   let AsmString = "$Rd = "#mnemonic#"($Rs, $Rt)"#suffix;
 }
 
-let Defs = [USR_OVF], Itinerary = ALU32_3op_tc_2_SLOT0123, 
-    isCodeGenOnly = 0 in {
+def A2_svaddh   : T_ALU32_3op<"vaddh",   0b110, 0b000, 0, 1>;
+def A2_svsubh   : T_ALU32_3op<"vsubh",   0b110, 0b100, 1, 0>;
+
+let Defs = [USR_OVF], Itinerary = ALU32_3op_tc_2_SLOT0123 in {
+  def A2_svaddhs  : T_ALU32_3op_sfx<"vaddh",  ":sat", 0b110, 0b001, 0, 1>;
   def A2_addsat   : T_ALU32_3op_sfx<"add",    ":sat", 0b110, 0b010, 0, 1>;
+  def A2_svadduhs : T_ALU32_3op_sfx<"vadduh", ":sat", 0b110, 0b011, 0, 1>;
+  def A2_svsubhs  : T_ALU32_3op_sfx<"vsubh",  ":sat", 0b110, 0b101, 1, 0>;
   def A2_subsat   : T_ALU32_3op_sfx<"sub",    ":sat", 0b110, 0b110, 1, 0>;
+  def A2_svsubuhs : T_ALU32_3op_sfx<"vsubuh", ":sat", 0b110, 0b111, 1, 0>;
 }
 
+let Itinerary = ALU32_3op_tc_2_SLOT0123 in
+def A2_svavghs  : T_ALU32_3op_sfx<"vavgh",  ":rnd", 0b111, 0b001, 0, 1>;
+
+def A2_svavgh   : T_ALU32_3op<"vavgh",   0b111, 0b000, 0, 1>;
+def A2_svnavgh  : T_ALU32_3op<"vnavgh",  0b111, 0b011, 1, 0>;
+
 multiclass T_ALU32_3op_p<string mnemonic, bits<3> MajOp, bits<3> MinOp,
                          bit OpsRev> {
   def t    : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 0>;
@@ -174,13 +218,11 @@ multiclass T_ALU32_3op_A2<string mnemonic, bits<3> MajOp, bits<3> MinOp,
   defm A2_p#NAME : T_ALU32_3op_p<mnemonic, MajOp, MinOp, OpsRev>;
 }
 
-let isCodeGenOnly = 0 in {
 defm add : T_ALU32_3op_A2<"add", 0b011, 0b000, 0, 1>;
 defm and : T_ALU32_3op_A2<"and", 0b001, 0b000, 0, 1>;
 defm or  : T_ALU32_3op_A2<"or",  0b001, 0b001, 0, 1>;
 defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>;
 defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>;
-}
 
 // Pats for instruction selection.
 class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT>
@@ -194,8 +236,7 @@ def: BinOp32_pat<sub, A2_sub, i32>;
 def: BinOp32_pat<xor, A2_xor, i32>;
 
 // A few special cases producing register pairs:
-let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0,
-    isCodeGenOnly = 0 in {
+let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in {
   def S2_packhl    : T_ALU32_3op  <"packhl",  0b101, 0b100, 0, 0>;
 
   let isPredicable = 1 in
@@ -208,6 +249,9 @@ let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0,
   def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>;
 }
 
+def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>;
+def: BinOp32_pat<HexagonPACKHL,  S2_packhl,   i64>;
+
 let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg"  in
 class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm>
   : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
@@ -229,7 +273,7 @@ class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm>
   let Inst{1-0} = Pd;
 }
 
-let Itinerary = ALU32_3op_tc_2early_SLOT0123, isCodeGenOnly = 0 in {
+let Itinerary = ALU32_3op_tc_2early_SLOT0123 in {
   def C2_cmpeq   : T_ALU32_3op_cmp< "cmp.eq",  0b00, 0, 1>;
   def C2_cmpgt   : T_ALU32_3op_cmp< "cmp.gt",  0b10, 0, 0>;
   def C2_cmpgtu  : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>;
@@ -252,8 +296,7 @@ def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>;
 def: T_cmp32_rr_pat<C2_cmpgt,  RevCmp<setlt>,  i1>;
 def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>;
 
-let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1,
-  isCodeGenOnly = 0 in
+let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1 in
 def C2_mux: ALU32_rr<(outs IntRegs:$Rd),
                      (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
       "$Rd = mux($Pu, $Rs, $Rt)", [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel {
@@ -283,11 +326,11 @@ def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
 
 let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
     isExtentSigned = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 1,
-    AddedComplexity = 75, isCodeGenOnly = 0 in
+    AddedComplexity = 75 in
 def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8),
   "$Rdd = combine(#$s8, #$S8)",
   [(set (i64 DoubleRegs:$Rdd),
-        (i64 (HexagonCOMBINE(i32 s8ExtPred:$s8), (i32 s8ImmPred:$S8))))]> {
+        (i64 (HexagonCOMBINE(i32 s32ImmPred:$s8), (i32 s8ImmPred:$S8))))]> {
     bits<5> Rdd;
     bits<8> s8;
     bits<8> S8;
@@ -303,7 +346,7 @@ def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8),
 //===----------------------------------------------------------------------===//
 // Template class for predicated ADD of a reg and an Immediate value.
 //===----------------------------------------------------------------------===//
-let hasNewValue = 1 in
+let hasNewValue = 1, hasSideEffects = 0 in
 class T_Addri_Pred <bit PredNot, bit PredNew>
   : ALU32_ri <(outs IntRegs:$Rd),
               (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
@@ -329,13 +372,11 @@ class T_Addri_Pred <bit PredNot, bit PredNew>
 //===----------------------------------------------------------------------===//
 // A2_addi: Add a signed immediate to a register.
 //===----------------------------------------------------------------------===//
-let hasNewValue = 1 in
-class T_Addri <Operand immOp, list<dag> pattern = [] >
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_Addri <Operand immOp>
   : ALU32_ri <(outs IntRegs:$Rd),
               (ins IntRegs:$Rs, immOp:$s16),
-  "$Rd = add($Rs, #$s16)", pattern,
-  //[(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs), (s16ExtPred:$s16)))],
-  "", ALU32_ADDI_tc_1_SLOT0123> {
+  "$Rd = add($Rs, #$s16)", [], "", ALU32_ADDI_tc_1_SLOT0123> {
     bits<5> Rd;
     bits<5> Rs;
     bits<16> s16;
@@ -353,31 +394,29 @@ class T_Addri <Operand immOp, list<dag> pattern = [] >
 //===----------------------------------------------------------------------===//
 multiclass Addri_Pred<string mnemonic, bit PredNot> {
   let isPredicatedFalse = PredNot in {
-    def _c#NAME : T_Addri_Pred<PredNot, 0>;
+    def NAME     : T_Addri_Pred<PredNot, 0>;
     // Predicate new
-    def _cdn#NAME : T_Addri_Pred<PredNot, 1>;
+    def NAME#new : T_Addri_Pred<PredNot, 1>;
   }
 }
 
-let isExtendable = 1, InputType = "imm" in
+let isExtendable = 1, isExtentSigned = 1, InputType = "imm" in
 multiclass Addri_base<string mnemonic, SDNode OpNode> {
   let CextOpcode = mnemonic, BaseOpcode = mnemonic#_ri in {
-    let opExtendable = 2, isExtentSigned = 1, opExtentBits = 16,
-    isPredicable = 1 in
-    def NAME : T_Addri< s16Ext, // Rd=add(Rs,#s16)
-                        [(set (i32 IntRegs:$Rd),
-                              (add IntRegs:$Rs, s16ExtPred:$s16))]>;
-
-    let opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
-    hasSideEffects = 0, isPredicated = 1 in {
-      defm Pt : Addri_Pred<mnemonic, 0>;
-      defm NotPt : Addri_Pred<mnemonic, 1>;
+    let opExtendable = 2, opExtentBits = 16, isPredicable = 1 in
+    def A2_#NAME : T_Addri<s16Ext>;
+
+    let opExtendable = 3, opExtentBits = 8, isPredicated = 1 in {
+      defm A2_p#NAME#t : Addri_Pred<mnemonic, 0>;
+      defm A2_p#NAME#f : Addri_Pred<mnemonic, 1>;
     }
   }
 }
 
-let isCodeGenOnly = 0 in
-defm ADD_ri : Addri_base<"add", add>, ImmRegRel, PredNewRel;
+defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel;
+
+def: Pat<(i32 (add I32:$Rs, s32ImmPred:$s16)),
+         (i32 (A2_addi I32:$Rs, imm:$s16))>;
 
 //===----------------------------------------------------------------------===//
 // Template class used for the following ALU32 instructions.
@@ -390,7 +429,7 @@ class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp>
   : ALU32_ri <(outs IntRegs:$Rd),
               (ins IntRegs:$Rs, s10Ext:$s10),
   "$Rd = "#mnemonic#"($Rs, #$s10)" ,
-  [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10))]> {
+  [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10))]> {
     bits<5> Rd;
     bits<5> Rs;
     bits<10> s10;
@@ -406,19 +445,15 @@ class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp>
     let Inst{4-0}   = Rd;
   }
 
-let isCodeGenOnly = 0 in {
-def OR_ri  : T_ALU32ri_logical<"or", or, 0b10>, ImmRegRel;
-def AND_ri : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel;
-}
+def A2_orir  : T_ALU32ri_logical<"or", or, 0b10>, ImmRegRel;
+def A2_andir : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel;
 
 // Subtract register from immediate
 // Rd32=sub(#s10,Rs32)
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 10,
-CextOpcode = "sub", InputType = "imm", hasNewValue = 1, isCodeGenOnly = 0 in
-def SUB_ri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs),
-  "$Rd = sub(#$s10, $Rs)" ,
-  [(set IntRegs:$Rd, (sub s10ExtPred:$s10, IntRegs:$Rs))] > ,
-  ImmRegRel {
+let isExtendable = 1, CextOpcode = "sub", opExtendable = 1, isExtentSigned = 1,
+    opExtentBits = 10, InputType = "imm", hasNewValue = 1, hasSideEffects = 0 in
+def A2_subri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs),
+  "$Rd = sub(#$s10, $Rs)", []>, ImmRegRel {
     bits<5> Rd;
     bits<10> s10;
     bits<5> Rs;
@@ -433,14 +468,18 @@ def SUB_ri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs),
   }
 
 // Nop.
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
 def A2_nop: ALU32Inst <(outs), (ins), "nop" > {
   let IClass = 0b0111;
   let Inst{27-24} = 0b1111;
 }
+
+def: Pat<(sub s32ImmPred:$s10, IntRegs:$Rs),
+         (A2_subri imm:$s10, IntRegs:$Rs)>;
+
 // Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
-def : Pat<(not (i32 IntRegs:$src1)),
-          (SUB_ri -1, (i32 IntRegs:$src1))>;
+def: Pat<(not (i32 IntRegs:$src1)),
+         (A2_subri -1, IntRegs:$src1)>;
 
 let hasSideEffects = 0, hasNewValue = 1 in
 class T_tfr16<bit isHi>
@@ -459,10 +498,8 @@ class T_tfr16<bit isHi>
     let Inst{13-0}  = u16{13-0};
   }
 
-let isCodeGenOnly = 0 in {
 def A2_tfril: T_tfr16<0>;
 def A2_tfrih: T_tfr16<1>;
-}
 
 // Conditional transfer is an alias to conditional "Rd = add(Rs, #0)".
 let isPredicated = 1, hasNewValue = 1, opNewValue = 0 in
@@ -575,20 +612,17 @@ class T_TFRI_Pred<bit PredNot, bit PredNew>
   let Inst{4-0} = Rd;
 }
 
-let isCodeGenOnly = 0 in {
 def C2_cmoveit    : T_TFRI_Pred<0, 0>;
 def C2_cmoveif    : T_TFRI_Pred<1, 0>;
 def C2_cmovenewit : T_TFRI_Pred<0, 1>;
 def C2_cmovenewif : T_TFRI_Pred<1, 1>;
-}
 
 let InputType = "imm", isExtendable = 1, isExtentSigned = 1,
     CextOpcode = "TFR", BaseOpcode = "TFRI", hasNewValue = 1, opNewValue = 0,
     isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1,
-    isPredicated = 0, isPredicable = 1, isReMaterializable = 1,
-    isCodeGenOnly = 0 in
+    isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in
 def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16",
-    [(set (i32 IntRegs:$Rd), s16ExtPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>,
+    [(set (i32 IntRegs:$Rd), s32ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>,
     ImmRegRel, PredRel {
   bits<5> Rd;
   bits<16> s16;
@@ -599,20 +633,26 @@ def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16",
   let Inst{4-0} = Rd;
 }
 
-let isCodeGenOnly = 0 in
 defm A2_tfr  : tfr_base<"TFR">, ImmRegRel, PredNewRel;
+let isAsmParserOnly = 1 in
 defm A2_tfrp : TFR64_base<"TFR64">, PredNewRel;
 
 // Assembler mapped
-let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+    isAsmParserOnly = 1 in
 def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
                       "$dst = #$src1",
                       [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>;
 
 // TODO: see if this instruction can be deleted..
-let isExtendable = 1, opExtendable = 1, opExtentBits = 6 in
-def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u6Ext:$src1),
+let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
+    isAsmParserOnly = 1 in {
+def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64Imm:$src1),
                          "$dst = #$src1">;
+def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst),
+                             (ins s8Ext:$src1, s8Imm:$src2),
+                             "$dst = combine(##$src1, #$src2)">;
+}
 
 //===----------------------------------------------------------------------===//
 // ALU32/ALU -
@@ -642,28 +682,28 @@ class T_MUX1 <bit MajOp, dag ins, string AsmStr>
   let Inst{4-0}   = Rd;
 }
 
-let opExtendable = 2, isCodeGenOnly = 0 in
+let opExtendable = 2 in
 def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8Ext:$s8, IntRegs:$Rs),
                            "$Rd = mux($Pu, #$s8, $Rs)">;
 
-let opExtendable = 3, isCodeGenOnly = 0 in
+let opExtendable = 3 in
 def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
                            "$Rd = mux($Pu, $Rs, #$s8)">;
 
-def : Pat<(i32 (select I1:$Pu, s8ExtPred:$s8, I32:$Rs)),
-          (C2_muxri I1:$Pu, s8ExtPred:$s8, I32:$Rs)>;
+def : Pat<(i32 (select I1:$Pu, s32ImmPred:$s8, I32:$Rs)),
+          (C2_muxri I1:$Pu, s32ImmPred:$s8, I32:$Rs)>;
 
-def : Pat<(i32 (select I1:$Pu, I32:$Rs, s8ExtPred:$s8)),
-          (C2_muxir I1:$Pu, I32:$Rs, s8ExtPred:$s8)>;
+def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32ImmPred:$s8)),
+          (C2_muxir I1:$Pu, I32:$Rs, s32ImmPred:$s8)>;
 
 // C2_muxii: Scalar mux immediates.
 let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1,
-    opExtentBits = 8, opExtendable = 2, isCodeGenOnly = 0 in
+    opExtentBits = 8, opExtendable = 2 in
 def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
                          (ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8),
   "$Rd = mux($Pu, #$s8, #$S8)" ,
   [(set (i32 IntRegs:$Rd),
-        (i32 (select I1:$Pu, s8ExtPred:$s8, s8ImmPred:$S8)))] > {
+        (i32 (select I1:$Pu, s32ImmPred:$s8, s8ImmPred:$S8)))] > {
     bits<5> Rd;
     bits<2> Pu;
     bits<8> s8;
@@ -679,14 +719,20 @@ def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
     let Inst{4-0}   = Rd;
   }
 
+let isCodeGenOnly = 1, isPseudo = 1 in
+def MUX64_rr : ALU64_rr<(outs DoubleRegs:$Rd),
+               (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+               ".error \"should not emit\" ", []>;
+
+
 //===----------------------------------------------------------------------===//
 // template class for non-predicated alu32_2op instructions
 // - aslh, asrh, sxtb, sxth, zxth
 //===----------------------------------------------------------------------===//
 let hasNewValue = 1, opNewValue = 0 in
 class T_ALU32_2op <string mnemonic, bits<3> minOp> :
-    ALU32Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rs),
-    "$Rd = "#mnemonic#"($Rs)", [] > {
+  ALU32Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs),
+             "$Rd = "#mnemonic#"($Rs)", [] > {
   bits<5> Rd;
   bits<5> Rs;
 
@@ -703,13 +749,12 @@ class T_ALU32_2op <string mnemonic, bits<3> minOp> :
 // template class for predicated alu32_2op instructions
 // - aslh, asrh, sxtb, sxth, zxtb, zxth
 //===----------------------------------------------------------------------===//
-let hasSideEffects = 0, validSubTargets = HasV4SubT,
-    hasNewValue = 1, opNewValue = 0 in
-class T_ALU32_2op_Pred <string mnemonic, bits<3> minOp, bit isPredNot, 
-    bit isPredNew > :
-    ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs),
-    !if(isPredNot, "if (!$Pu", "if ($Pu")
-    #!if(isPredNew, ".new) ",") ")#"$Rd = "#mnemonic#"($Rs)"> {
+let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_ALU32_2op_Pred <string mnemonic, bits<3> minOp, bit isPredNot,
+                        bit isPredNew > :
+  ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs),
+             !if(isPredNot, "if (!$Pu", "if ($Pu")
+             #!if(isPredNew, ".new) ",") ")#"$Rd = "#mnemonic#"($Rs)"> {
   bits<5> Rd;
   bits<2> Pu;
   bits<5> Rs;
@@ -741,20 +786,18 @@ multiclass ALU32_2op_base<string mnemonic, bits<3> minOp> {
     let isPredicable = 1, hasSideEffects = 0 in
     def A2_#NAME : T_ALU32_2op<mnemonic, minOp>;
 
-    let validSubTargets = HasV4SubT, isPredicated = 1, hasSideEffects = 0 in {
+    let isPredicated = 1, hasSideEffects = 0 in {
       defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
       defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
     }
   }
 }
 
-let isCodeGenOnly = 0 in {
 defm aslh : ALU32_2op_base<"aslh", 0b000>, PredNewRel;
 defm asrh : ALU32_2op_base<"asrh", 0b001>, PredNewRel;
 defm sxtb : ALU32_2op_base<"sxtb", 0b101>, PredNewRel;
 defm sxth : ALU32_2op_base<"sxth", 0b111>, PredNewRel;
 defm zxth : ALU32_2op_base<"zxth", 0b110>, PredNewRel;
-}
 
 // Rd=zxtb(Rs): assembler mapped to Rd=and(Rs,#255).
 // Compiler would want to generate 'zxtb' instead of 'and' becuase 'zxtb' has
@@ -784,14 +827,13 @@ multiclass ZXTB_base <string mnemonic, bits<3> minOp> {
     let isPredicable = 1, hasSideEffects = 0 in
     def A2_#NAME : T_ZXTB;
 
-    let validSubTargets = HasV4SubT, isPredicated = 1, hasSideEffects = 0 in {
+    let isPredicated = 1, hasSideEffects = 0 in {
       defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
       defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
     }
   }
 }
 
-let isCodeGenOnly=0 in
 defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel;
 
 def: Pat<(shl I32:$src1, (i32 16)),   (A2_aslh I32:$src1)>;
@@ -799,44 +841,182 @@ def: Pat<(sra I32:$src1, (i32 16)),   (A2_asrh I32:$src1)>;
 def: Pat<(sext_inreg I32:$src1, i8),  (A2_sxtb I32:$src1)>;
 def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>;
 
-// Mux.
-def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
-                                                   DoubleRegs:$src2,
-                                                   DoubleRegs:$src3),
-            "$dst = vmux($src1, $src2, $src3)",
-            []>;
+//===----------------------------------------------------------------------===//
+// Template class for vector add and avg
+//===----------------------------------------------------------------------===//
 
+class T_VectALU_64 <string opc, bits<3> majOp, bits<3> minOp,
+                   bit isSat, bit isRnd, bit isCrnd, bit SwapOps >
+  : ALU64_rr < (outs DoubleRegs:$Rdd),
+                (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rdd = "#opc#"($Rss, $Rtt)"#!if(isRnd, ":rnd", "")
+                             #!if(isCrnd,":crnd","")
+                             #!if(isSat, ":sat", ""),
+  [], "", ALU64_tc_2_SLOT23 > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1101;
+
+    let Inst{27-24} = 0b0011;
+    let Inst{23-21} = majOp;
+    let Inst{20-16} = !if (SwapOps, Rtt, Rss);
+    let Inst{12-8} = !if (SwapOps, Rss, Rtt);
+    let Inst{7-5} = minOp;
+    let Inst{4-0} = Rdd;
+  }
+
+// ALU64 - Vector add
+// Rdd=vadd[u][bhw](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+  def A2_vaddub  : T_VectALU_64 < "vaddub", 0b000, 0b000, 0, 0, 0, 0>;
+  def A2_vaddh   : T_VectALU_64 < "vaddh",  0b000, 0b010, 0, 0, 0, 0>;
+  def A2_vaddw   : T_VectALU_64 < "vaddw",  0b000, 0b101, 0, 0, 0, 0>;
+}
+
+// Rdd=vadd[u][bhw](Rss,Rtt):sat
+let Defs = [USR_OVF] in {
+  def A2_vaddubs : T_VectALU_64 < "vaddub", 0b000, 0b001, 1, 0, 0, 0>;
+  def A2_vaddhs  : T_VectALU_64 < "vaddh",  0b000, 0b011, 1, 0, 0, 0>;
+  def A2_vadduhs : T_VectALU_64 < "vadduh", 0b000, 0b100, 1, 0, 0, 0>;
+  def A2_vaddws  : T_VectALU_64 < "vaddw",  0b000, 0b110, 1, 0, 0, 0>;
+}
+
+// ALU64 - Vector average
+// Rdd=vavg[u][bhw](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+  def A2_vavgub : T_VectALU_64 < "vavgub", 0b010, 0b000, 0, 0, 0, 0>;
+  def A2_vavgh  : T_VectALU_64 < "vavgh",  0b010, 0b010, 0, 0, 0, 0>;
+  def A2_vavguh : T_VectALU_64 < "vavguh", 0b010, 0b101, 0, 0, 0, 0>;
+  def A2_vavgw  : T_VectALU_64 < "vavgw",  0b011, 0b000, 0, 0, 0, 0>;
+  def A2_vavguw : T_VectALU_64 < "vavguw", 0b011, 0b011, 0, 0, 0, 0>;
+}
+
+// Rdd=vavg[u][bhw](Rss,Rtt)[:rnd|:crnd]
+def A2_vavgubr : T_VectALU_64 < "vavgub", 0b010, 0b001, 0, 1, 0, 0>;
+def A2_vavghr  : T_VectALU_64 < "vavgh",  0b010, 0b011, 0, 1, 0, 0>;
+def A2_vavghcr : T_VectALU_64 < "vavgh",  0b010, 0b100, 0, 0, 1, 0>;
+def A2_vavguhr : T_VectALU_64 < "vavguh", 0b010, 0b110, 0, 1, 0, 0>;
+
+def A2_vavgwr  : T_VectALU_64 < "vavgw",  0b011, 0b001, 0, 1, 0, 0>;
+def A2_vavgwcr : T_VectALU_64 < "vavgw",  0b011, 0b010, 0, 0, 1, 0>;
+def A2_vavguwr : T_VectALU_64 < "vavguw", 0b011, 0b100, 0, 1, 0, 0>;
+
+// Rdd=vnavg[bh](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+  def A2_vnavgh   : T_VectALU_64 < "vnavgh", 0b100, 0b000, 0, 0, 0, 1>;
+  def A2_vnavgw   : T_VectALU_64 < "vnavgw", 0b100, 0b011, 0, 0, 0, 1>;
+}
+
+// Rdd=vnavg[bh](Rss,Rtt)[:rnd|:crnd]:sat
+let Defs = [USR_OVF] in {
+  def A2_vnavghr  : T_VectALU_64 < "vnavgh", 0b100, 0b001, 1, 1, 0, 1>;
+  def A2_vnavghcr : T_VectALU_64 < "vnavgh", 0b100, 0b010, 1, 0, 1, 1>;
+  def A2_vnavgwr  : T_VectALU_64 < "vnavgw", 0b100, 0b100, 1, 1, 0, 1>;
+  def A2_vnavgwcr : T_VectALU_64 < "vnavgw", 0b100, 0b110, 1, 0, 1, 1>;
+}
+
+// Rdd=vsub[u][bh](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+  def A2_vsubub  : T_VectALU_64 < "vsubub", 0b001, 0b000, 0, 0, 0, 1>;
+  def A2_vsubh   : T_VectALU_64 < "vsubh",  0b001, 0b010, 0, 0, 0, 1>;
+  def A2_vsubw   : T_VectALU_64 < "vsubw",  0b001, 0b101, 0, 0, 0, 1>;
+}
+
+// Rdd=vsub[u][bh](Rss,Rtt):sat
+let Defs = [USR_OVF] in {
+  def A2_vsububs : T_VectALU_64 < "vsubub", 0b001, 0b001, 1, 0, 0, 1>;
+  def A2_vsubhs  : T_VectALU_64 < "vsubh",  0b001, 0b011, 1, 0, 0, 1>;
+  def A2_vsubuhs : T_VectALU_64 < "vsubuh", 0b001, 0b100, 1, 0, 0, 1>;
+  def A2_vsubws  : T_VectALU_64 < "vsubw",  0b001, 0b110, 1, 0, 0, 1>;
+}
+
+// Rdd=vmax[u][bhw](Rss,Rtt)
+def A2_vmaxb  : T_VectALU_64 < "vmaxb",  0b110, 0b110, 0, 0, 0, 1>;
+def A2_vmaxub : T_VectALU_64 < "vmaxub", 0b110, 0b000, 0, 0, 0, 1>;
+def A2_vmaxh  : T_VectALU_64 < "vmaxh",  0b110, 0b001, 0, 0, 0, 1>;
+def A2_vmaxuh : T_VectALU_64 < "vmaxuh", 0b110, 0b010, 0, 0, 0, 1>;
+def A2_vmaxw  : T_VectALU_64 < "vmaxw",  0b110, 0b011, 0, 0, 0, 1>;
+def A2_vmaxuw : T_VectALU_64 < "vmaxuw", 0b101, 0b101, 0, 0, 0, 1>;
+
+// Rdd=vmin[u][bhw](Rss,Rtt)
+def A2_vminb  : T_VectALU_64 < "vminb",  0b110, 0b111, 0, 0, 0, 1>;
+def A2_vminub : T_VectALU_64 < "vminub", 0b101, 0b000, 0, 0, 0, 1>;
+def A2_vminh  : T_VectALU_64 < "vminh",  0b101, 0b001, 0, 0, 0, 1>;
+def A2_vminuh : T_VectALU_64 < "vminuh", 0b101, 0b010, 0, 0, 0, 1>;
+def A2_vminw  : T_VectALU_64 < "vminw",  0b101, 0b011, 0, 0, 0, 1>;
+def A2_vminuw : T_VectALU_64 < "vminuw", 0b101, 0b100, 0, 0, 0, 1>;
 
 //===----------------------------------------------------------------------===//
-// ALU32/PERM -
+// Template class for vector compare
 //===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_vcmp <string Str, bits<4> minOp>
+  : ALU64_rr <(outs PredRegs:$Pd),
+              (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Pd = "#Str#"($Rss, $Rtt)", [],
+  "", ALU64_tc_2early_SLOT23> {
+    bits<2> Pd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1101;
+
+    let Inst{27-23} = 0b00100;
+    let Inst{13} = minOp{3};
+    let Inst{7-5} = minOp{2-0};
+    let Inst{1-0} = Pd;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T>
+  : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))),
+        (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>;
+
+// Vector compare bytes
+def A2_vcmpbeq  : T_vcmp <"vcmpb.eq",  0b0110>;
+def A2_vcmpbgtu : T_vcmp <"vcmpb.gtu", 0b0111>;
+
+// Vector compare halfwords
+def A2_vcmpheq  : T_vcmp <"vcmph.eq",  0b0011>;
+def A2_vcmphgt  : T_vcmp <"vcmph.gt",  0b0100>;
+def A2_vcmphgtu : T_vcmp <"vcmph.gtu", 0b0101>;
+
+// Vector compare words
+def A2_vcmpweq  : T_vcmp <"vcmpw.eq",  0b0000>;
+def A2_vcmpwgt  : T_vcmp <"vcmpw.gt",  0b0001>;
+def A2_vcmpwgtu : T_vcmp <"vcmpw.gtu", 0b0010>;
 
+def: T_vcmp_pat<A2_vcmpbeq,  seteq,  v8i8>;
+def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>;
+def: T_vcmp_pat<A2_vcmpheq,  seteq,  v4i16>;
+def: T_vcmp_pat<A2_vcmphgt,  setgt,  v4i16>;
+def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>;
+def: T_vcmp_pat<A2_vcmpweq,  seteq,  v2i32>;
+def: T_vcmp_pat<A2_vcmpwgt,  setgt,  v2i32>;
+def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
 
 //===----------------------------------------------------------------------===//
-// ALU32/PRED +
+// ALU32/PERM -
 //===----------------------------------------------------------------------===//
 
-// SDNode for converting immediate C to C-1.
-def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
-   // Return the byte immediate const-1 as an SDNode.
-   int32_t imm = N->getSExtValue();
-   return XformSToSM1Imm(imm);
-}]>;
-
-// SDNode for converting immediate C to C-1.
-def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
-   // Return the byte immediate const-1 as an SDNode.
-   uint32_t imm = N->getZExtValue();
-   return XformUToUM1Imm(imm);
-}]>;
 
-def CTLZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
-    "$dst = cl0($src1)",
-    [(set (i32 IntRegs:$dst), (i32 (trunc (ctlz (i64 DoubleRegs:$src1)))))]>;
+//===----------------------------------------------------------------------===//
+// ALU32/PRED +
+//===----------------------------------------------------------------------===//
+// No bits needed.  If cmp.ge is found the assembler parser will
+// transform it to cmp.gt subtracting 1 from the immediate.
+let isPseudo = 1 in {
+def C2_cmpgei: ALU32Inst <
+  (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8Ext:$s8),
+  "$Pd = cmp.ge($Rs, #$s8)">;
+def C2_cmpgeui: ALU32Inst <
+  (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8Ext:$s8),
+  "$Pd = cmp.geu($Rs, #$s8)">;
+}
 
-def CTTZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
-    "$dst = ct0($src1)",
-    [(set (i32 IntRegs:$dst), (i32 (trunc (cttz (i64 DoubleRegs:$src1)))))]>;
 
 //===----------------------------------------------------------------------===//
 // ALU32/PRED -
@@ -845,7 +1025,8 @@ def CTTZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
 
 //===----------------------------------------------------------------------===//
 // ALU64/ALU +
-//===----------------------------------------------------------------------===//// Add.
+//===----------------------------------------------------------------------===//
+// Add.
 //===----------------------------------------------------------------------===//
 // Template Class
 // Add/Subtract halfword
@@ -879,18 +1060,14 @@ class T_XTYPE_ADD_SUB <bits<2> LHbits, bit isSat, bit hasShift, bit isSub>
   }
 
 //Rd=sub(Rt.L,Rs.[LH])
-let isCodeGenOnly = 0 in {
 def A2_subh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 1>;
 def A2_subh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 1>;
-}
 
-let isCodeGenOnly = 0 in {
 //Rd=add(Rt.L,Rs.[LH])
 def A2_addh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 0>;
 def A2_addh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 0>;
-}
 
-let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in {
+let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in {
   //Rd=sub(Rt.L,Rs.[LH]):sat
   def A2_subh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 1>;
   def A2_subh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 1>;
@@ -901,22 +1078,18 @@ let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in {
 }
 
 //Rd=sub(Rt.[LH],Rs.[LH]):<<16
-let isCodeGenOnly = 0 in {
 def A2_subh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 1>;
 def A2_subh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 1>;
 def A2_subh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 1>;
 def A2_subh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 1>;
-}
 
 //Rd=add(Rt.[LH],Rs.[LH]):<<16
-let isCodeGenOnly = 0 in {
 def A2_addh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 0>;
 def A2_addh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 0>;
 def A2_addh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 0>;
 def A2_addh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 0>;
-}
 
-let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in {
+let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in {
   //Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
   def A2_subh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 1>;
   def A2_subh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 1>;
@@ -947,7 +1120,7 @@ def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16),
 def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)),
          (A2_subh_h16_ll I32:$src1, I32:$src2)>;
 
-let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+let hasSideEffects = 0, hasNewValue = 1 in
 def S2_parityp: ALU64Inst<(outs IntRegs:$Rd),
       (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
       "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
@@ -981,12 +1154,10 @@ class T_XTYPE_MIN_MAX < bit isMax, bit isUnsigned >
     let Inst{20-16} = !if(isMax, Rt, Rs);
   }
 
-let isCodeGenOnly = 0 in {
 def A2_min  : T_XTYPE_MIN_MAX < 0, 0 >;
 def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >;
 def A2_max  : T_XTYPE_MIN_MAX < 1, 0 >;
 def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >;
-}
 
 // Here, depending on  the operand being selected, we'll either generate a
 // min or max instruction.
@@ -1053,11 +1224,9 @@ class T_cmp64_rr<string mnemonic, bits<3> MinOp, bit IsComm>
   let Inst{1-0} = Pd;
 }
 
-let isCodeGenOnly = 0 in {
 def C2_cmpeqp  : T_cmp64_rr<"cmp.eq",  0b000, 1>;
 def C2_cmpgtp  : T_cmp64_rr<"cmp.gt",  0b010, 0>;
 def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>;
-}
 
 class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp>
   : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))),
@@ -1069,6 +1238,24 @@ def: T_cmp64_rr_pat<C2_cmpgtup, setugt>;
 def: T_cmp64_rr_pat<C2_cmpgtp,  RevCmp<setlt>>;
 def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>;
 
+def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd),
+      (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+      "$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
+  let hasSideEffects = 0;
+
+  bits<5> Rd;
+  bits<2> Pu;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+  let Inst{27-24} = 0b0001;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{6-5} = Pu;
+  let Inst{4-0} = Rd;
+}
+
 class T_ALU64_rr<string mnemonic, string suffix, bits<4> RegType,
                  bits<3> MajOp, bits<3> MinOp, bit OpsRev, bit IsComm,
                  string Op2Pfx>
@@ -1096,10 +1283,8 @@ class T_ALU64_arith<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit IsSat,
   : T_ALU64_rr<mnemonic, !if(IsSat,":sat",""), 0b0011, MajOp, MinOp, OpsRev,
                IsComm, "">;
 
-let isCodeGenOnly = 0 in {
 def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>;
 def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>;
-}
 
 def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>;
 def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>;
@@ -1109,11 +1294,9 @@ class T_ALU64_logical<string mnemonic, bits<3> MinOp, bit OpsRev, bit IsComm,
   : T_ALU64_rr<mnemonic, "", 0b0011, 0b111, MinOp, OpsRev, IsComm,
                !if(IsNeg,"~","")>;
 
-let isCodeGenOnly = 0 in {
 def A2_andp : T_ALU64_logical<"and", 0b000, 0, 1, 0>;
 def A2_orp  : T_ALU64_logical<"or",  0b010, 0, 1, 0>;
 def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>;
-}
 
 def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>;
 def: Pat<(i64 (or  I64:$Rs, I64:$Rt)), (A2_orp  I64:$Rs, I64:$Rt)>;
@@ -1165,11 +1348,9 @@ class T_LOGICAL_1OP<string MnOp, bits<2> OpBits>
   let Inst{1-0} = Pd;
 }
 
-let isCodeGenOnly = 0 in {
 def C2_any8 : T_LOGICAL_1OP<"any8", 0b00>;
 def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>;
 def C2_not  : T_LOGICAL_1OP<"not",  0b10>;
-}
 
 def: Pat<(i1 (not (i1 PredRegs:$Ps))),
          (C2_not PredRegs:$Ps)>;
@@ -1193,13 +1374,11 @@ class T_LOGICAL_2OP<string MnOp, bits<3> OpBits, bit IsNeg, bit Rev>
   let Inst{1-0} = Pd;
 }
 
-let isCodeGenOnly = 0 in {
 def C2_and  : T_LOGICAL_2OP<"and", 0b000, 0, 1>;
 def C2_or   : T_LOGICAL_2OP<"or",  0b001, 0, 1>;
 def C2_xor  : T_LOGICAL_2OP<"xor", 0b010, 0, 0>;
 def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>;
 def C2_orn  : T_LOGICAL_2OP<"or",  0b111, 1, 1>;
-}
 
 def: Pat<(i1 (and I1:$Ps, I1:$Pt)),       (C2_and  I1:$Ps, I1:$Pt)>;
 def: Pat<(i1 (or  I1:$Ps, I1:$Pt)),       (C2_or   I1:$Ps, I1:$Pt)>;
@@ -1207,7 +1386,7 @@ def: Pat<(i1 (xor I1:$Ps, I1:$Pt)),       (C2_xor  I1:$Ps, I1:$Pt)>;
 def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>;
 def: Pat<(i1 (or  I1:$Ps, (not I1:$Pt))), (C2_orn  I1:$Ps, I1:$Pt)>;
 
-let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+let hasSideEffects = 0, hasNewValue = 1 in
 def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt),
       "$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> {
   bits<5> Rd;
@@ -1222,7 +1401,7 @@ def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt),
   let Inst{4-0} = Rd;
 }
 
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
 def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt),
       "$Rd = mask($Pt)", [], "", S_2op_tc_1_SLOT23> {
   bits<5> Rd;
@@ -1234,18 +1413,6 @@ def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt),
   let Inst{4-0} = Rd;
 }
 
-def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
-                                                    DoubleRegs:$src2,
-                                                    PredRegs:$src3),
-             "$dst = valignb($src1, $src2, $src3)",
-             []>;
-
-def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
-                                                     DoubleRegs:$src2,
-                                                     PredRegs:$src3),
-             "$dst = vspliceb($src1, $src2, $src3)",
-             []>;
-
 // User control register transfer.
 //===----------------------------------------------------------------------===//
 // CR -
@@ -1256,7 +1423,7 @@ def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
 //===----------------------------------------------------------------------===//
 
 def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
-                               [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+                     [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
 
 def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
@@ -1266,7 +1433,7 @@ class CondStr<string CReg, bit True, bit New> {
   string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
 }
 class JumpOpcStr<string Mnemonic, bit New, bit Taken> {
-  string S = Mnemonic # !if(New, !if(Taken,":t",":nt"), "");
+  string S = Mnemonic # !if(Taken, ":t", !if(New, ":nt", ""));
 }
 
 let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0,
@@ -1304,7 +1471,7 @@ class T_JMP_c<bit PredNot, bit isPredNew, bit isTak, string ExtStr>
 
     let Inst{27-24} = 0b1100;
     let Inst{21} = PredNot;
-    let Inst{12} = !if(isPredNew, isTak, zero);
+    let Inst{12} = isTak;
     let Inst{11} = isPredNew;
     let Inst{9-8} = src;
     let Inst{23-22} = dst{16-15};
@@ -1314,7 +1481,7 @@ class T_JMP_c<bit PredNot, bit isPredNew, bit isTak, string ExtStr>
   }
 
 multiclass JMP_Pred<bit PredNot, string ExtStr> {
-  def NAME : T_JMP_c<PredNot, 0, 0, ExtStr>;
+  def NAME       : T_JMP_c<PredNot, 0, 0, ExtStr>; // not taken
   // Predicate new
   def NAME#newpt : T_JMP_c<PredNot, 1, 1, ExtStr>; // taken
   def NAME#new   : T_JMP_c<PredNot, 1, 0, ExtStr>; // not taken
@@ -1361,13 +1528,13 @@ class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>
     let Inst{27-22} = 0b001101;
     let Inst{21} = PredNot;
     let Inst{20-16} = dst;
-    let Inst{12} = !if(isPredNew, isTak, zero);
+    let Inst{12} = isTak;
     let Inst{11} = isPredNew;
     let Inst{9-8} = src;
 }
 
 multiclass JMPR_Pred<bit PredNot> {
-  def NAME: T_JMPr_c<PredNot, 0, 0>;
+  def NAME        : T_JMPr_c<PredNot, 0, 0>; // not taken
   // Predicate new
   def NAME#newpt  : T_JMPr_c<PredNot, 1, 1>; // taken
   def NAME#new    : T_JMPr_c<PredNot, 1, 0>; // not taken
@@ -1404,12 +1571,12 @@ class JUMPR_MISC_CALLR<bit isPred, bit isPredNot,
 
   }
 
-let Defs = VolatileV3.Regs, isCodeGenOnly = 0 in {
+let Defs = VolatileV3.Regs in {
   def J2_callrt : JUMPR_MISC_CALLR<1, 0, (ins PredRegs:$Pu, IntRegs:$Rs)>;
   def J2_callrf : JUMPR_MISC_CALLR<1, 1, (ins PredRegs:$Pu, IntRegs:$Rs)>;
 }
 
-let isTerminator = 1, hasSideEffects = 0, isCodeGenOnly = 0 in {
+let isTerminator = 1, hasSideEffects = 0 in {
   defm J2_jump : JMP_base<"JMP", "">, PredNewRel;
 
   // Deal with explicit assembly
@@ -1451,6 +1618,8 @@ def: Pat<(brind (i32 IntRegs:$dst)),
 //===----------------------------------------------------------------------===//
 // LD +
 //===----------------------------------------------------------------------===//
+
+// Load - Base with Immediate offset addressing mode
 let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, AddedComplexity = 20 in
 class T_load_io <string mnemonic, RegisterClass RC, bits<4> MajOp,
                  Operand ImmOp>
@@ -1471,7 +1640,7 @@ class T_load_io <string mnemonic, RegisterClass RC, bits<4> MajOp,
                        !if (!eq(ImmOpStr, "s11_2Ext"), 13,
                        !if (!eq(ImmOpStr, "s11_1Ext"), 12,
                                         /* s11_0Ext */ 11)));
-    let hasNewValue = !if (!eq(ImmOpStr, "s11_3Ext"), 0, 1);
+    let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
 
     let IClass = 0b1001;
 
@@ -1542,60 +1711,100 @@ multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC,
   }
 }
 
-let accessSize = ByteAccess, isCodeGenOnly = 0 in {
+let accessSize = ByteAccess in {
   defm loadrb:  LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext, 0b1000>;
   defm loadrub: LD_Idxd <"memub", "LDriub", IntRegs, s11_0Ext, u6_0Ext, 0b1001>;
 }
 
-let accessSize = HalfWordAccess, opExtentAlign = 1, isCodeGenOnly = 0 in {
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
   defm loadrh:  LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext, 0b1010>;
   defm loadruh: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext, 0b1011>;
 }
 
-let accessSize = WordAccess, opExtentAlign = 2, isCodeGenOnly = 0 in
+let accessSize = WordAccess, opExtentAlign = 2 in
 defm loadri: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext, 0b1100>;
 
-let accessSize = DoubleWordAccess, opExtentAlign = 3, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess, opExtentAlign = 3 in
 defm loadrd: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext, 0b1110>;
 
-def : Pat < (i32 (sextloadi8 ADDRriS11_0:$addr)),
-            (L2_loadrb_io AddrFI:$addr, 0) >;
-
-def : Pat < (i32 (zextloadi8 ADDRriS11_0:$addr)),
-            (L2_loadrub_io AddrFI:$addr, 0) >;
-
-def : Pat < (i32 (sextloadi16 ADDRriS11_1:$addr)),
-            (L2_loadrh_io AddrFI:$addr, 0) >;
-
-def : Pat < (i32 (zextloadi16 ADDRriS11_1:$addr)),
-            (L2_loadruh_io AddrFI:$addr, 0) >;
-
-def : Pat < (i32 (load ADDRriS11_2:$addr)),
-            (L2_loadri_io AddrFI:$addr, 0) >;
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
+  def L2_loadbsw2_io:   T_load_io<"membh",  IntRegs, 0b0001, s11_1Ext>;
+  def L2_loadbzw2_io:   T_load_io<"memubh", IntRegs, 0b0011, s11_1Ext>;
+}
 
-def : Pat < (i64 (load ADDRriS11_3:$addr)),
-            (L2_loadrd_io AddrFI:$addr, 0) >;
+let accessSize = WordAccess, opExtentAlign = 2 in {
+  def L2_loadbzw4_io: T_load_io<"memubh", DoubleRegs, 0b0101, s11_2Ext>;
+  def L2_loadbsw4_io: T_load_io<"membh",  DoubleRegs, 0b0111, s11_2Ext>;
+}
 
-let AddedComplexity = 20 in {
-def : Pat < (i32 (sextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
-            (L2_loadrb_io IntRegs:$src1, s11_0ExtPred:$offset) >;
+let addrMode = BaseImmOffset, isExtendable = 1, hasSideEffects = 0,
+    opExtendable = 3, isExtentSigned = 1  in
+class T_loadalign_io <string str, bits<4> MajOp, Operand ImmOp>
+  : LDInst<(outs DoubleRegs:$dst),
+           (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+  "$dst = "#str#"($src2 + #$offset)", [],
+  "$src1 = $dst">, AddrModeRel {
+    bits<4> name;
+    bits<5> dst;
+    bits<5> src2;
+    bits<12> offset;
+    bits<11> offsetBits;
 
-def : Pat < (i32 (zextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
-            (L2_loadrub_io IntRegs:$src1, s11_0ExtPred:$offset) >;
+    let offsetBits = !if (!eq(!cast<string>(ImmOp), "s11_1Ext"), offset{11-1},
+                                                  /* s11_0Ext */ offset{10-0});
+    let IClass = 0b1001;
 
-def : Pat < (i32 (sextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
-            (L2_loadrh_io IntRegs:$src1, s11_1ExtPred:$offset) >;
+    let Inst{27}    = 0b0;
+    let Inst{26-25} = offsetBits{10-9};
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src2;
+    let Inst{13-5}  = offsetBits{8-0};
+    let Inst{4-0}   = dst;
+  }
 
-def : Pat < (i32 (zextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
-            (L2_loadruh_io IntRegs:$src1, s11_1ExtPred:$offset) >;
+let accessSize = HalfWordAccess, opExtentBits = 12, opExtentAlign = 1 in
+def L2_loadalignh_io: T_loadalign_io <"memh_fifo", 0b0010, s11_1Ext>;
 
-def : Pat < (i32 (load (add IntRegs:$src1, s11_2ExtPred:$offset))),
-            (L2_loadri_io IntRegs:$src1, s11_2ExtPred:$offset) >;
+let accessSize = ByteAccess, opExtentBits = 11 in
+def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>;
 
-def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))),
-            (L2_loadrd_io IntRegs:$src1, s11_3ExtPred:$offset) >;
+// Patterns to select load-indexed (i.e. load from base+offset).
+multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
+                     InstHexagon MI> {
+  def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
+  def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
+           (VT (MI AddrFI:$fi, imm:$Off))>;
+  def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))),
+           (VT (MI IntRegs:$Rs, imm:$Off))>;
+  def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>;
 }
 
+let AddedComplexity = 20 in {
+  defm: Loadx_pat<load,           i32, s30_2ImmPred, L2_loadri_io>;
+  defm: Loadx_pat<load,           i64, s29_3ImmPred, L2_loadrd_io>;
+  defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>;
+  defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>;
+  defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>;
+
+  defm: Loadx_pat<extloadi1,      i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<extloadi8,      i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<extloadi16,     i32, s31_1ImmPred, L2_loadruh_io>;
+  defm: Loadx_pat<sextloadi8,     i32, s32_0ImmPred, L2_loadrb_io>;
+  defm: Loadx_pat<sextloadi16,    i32, s31_1ImmPred, L2_loadrh_io>;
+  defm: Loadx_pat<zextloadi1,     i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<zextloadi8,     i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<zextloadi16,    i32, s31_1ImmPred, L2_loadruh_io>;
+  // No sextloadi1.
+}
+
+// Sign-extending loads of i1 need to replicate the lowest bit throughout
+// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should
+// do the trick.
+let AddedComplexity = 20 in
+def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))),
+         (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>;
+
 //===----------------------------------------------------------------------===//
 // Post increment load
 //===----------------------------------------------------------------------===//
@@ -1696,51 +1905,70 @@ multiclass LD_PostInc <string mnemonic, string BaseOp, RegisterClass RC,
 }
 
 // post increment byte loads with immediate offset
-let accessSize = ByteAccess, isCodeGenOnly = 0 in {
+let accessSize = ByteAccess in {
   defm loadrb  : LD_PostInc <"memb",  "LDrib", IntRegs, s4_0Imm, 0b1000>;
   defm loadrub : LD_PostInc <"memub", "LDriub", IntRegs, s4_0Imm, 0b1001>;
 }
 
 // post increment halfword loads with immediate offset
-let accessSize = HalfWordAccess, opExtentAlign = 1, isCodeGenOnly = 0 in {
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
   defm loadrh  : LD_PostInc <"memh",  "LDrih", IntRegs, s4_1Imm, 0b1010>;
   defm loadruh : LD_PostInc <"memuh", "LDriuh", IntRegs, s4_1Imm, 0b1011>;
 }
 
 // post increment word loads with immediate offset
-let accessSize = WordAccess, opExtentAlign = 2, isCodeGenOnly = 0 in
+let accessSize = WordAccess, opExtentAlign = 2 in
 defm loadri : LD_PostInc <"memw", "LDriw", IntRegs, s4_2Imm, 0b1100>;
 
 // post increment doubleword loads with immediate offset
-let accessSize = DoubleWordAccess, opExtentAlign = 3, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess, opExtentAlign = 3 in
 defm loadrd : LD_PostInc <"memd", "LDrid", DoubleRegs, s4_3Imm, 0b1110>;
 
-def : Pat< (i32 (extloadi1 ADDRriS11_0:$addr)),
-           (i32 (L2_loadrb_io AddrFI:$addr, 0)) >;
-
-// Load byte any-extend.
-def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)),
-            (i32 (L2_loadrb_io AddrFI:$addr, 0)) >;
+// Rd=memb[u]h(Rx++#s4:1)
+// Rdd=memb[u]h(Rx++#s4:2)
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
+  def L2_loadbsw2_pi   : T_load_pi <"membh", IntRegs, s4_1Imm, 0b0001>;
+  def L2_loadbzw2_pi   : T_load_pi <"memubh", IntRegs, s4_1Imm, 0b0011>;
+}
+let accessSize = WordAccess, opExtentAlign = 2, hasNewValue = 0 in {
+  def L2_loadbsw4_pi   : T_load_pi <"membh", DoubleRegs, s4_2Imm, 0b0111>;
+  def L2_loadbzw4_pi   : T_load_pi <"memubh", DoubleRegs, s4_2Imm, 0b0101>;
+}
 
-// Indexed load byte any-extend.
-let AddedComplexity = 20 in
-def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))),
-            (i32 (L2_loadrb_io IntRegs:$src1, s11_0ImmPred:$offset)) >;
+//===----------------------------------------------------------------------===//
+// Template class for post increment fifo loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, addrMode = PostInc in
+class T_loadalign_pi <string mnemonic, Operand ImmOp, bits<4> MajOp >
+  : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$dst2),
+  (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+  "$dst = "#mnemonic#"($src2++#$offset)" ,
+  [], "$src2 = $dst2, $src1 = $dst" > ,
+  PredNewRel {
+    bits<5> dst;
+    bits<5> src2;
+    bits<5> offset;
+    bits<4> offsetBits;
 
-def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)),
-            (i32 (L2_loadrh_io AddrFI:$addr, 0))>;
+    let offsetBits = !if (!eq(!cast<string>(ImmOp), "s4_1Imm"), offset{4-1},
+                                                  /* s4_0Imm */ offset{3-0});
+    let IClass = 0b1001;
 
-let AddedComplexity = 20 in
-def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))),
-            (i32 (L2_loadrh_io IntRegs:$src1, s11_1ImmPred:$offset)) >;
+    let Inst{27-25} = 0b101;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src2;
+    let Inst{13-12} = 0b00;
+    let Inst{8-5} = offsetBits;
+    let Inst{4-0}   = dst;
+  }
 
-let AddedComplexity = 10 in
-def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)),
-            (i32 (L2_loadrub_io AddrFI:$addr, 0))>;
+// Ryy=memh_fifo(Rx++#s4:1)
+// Ryy=memb_fifo(Rx++#s4:0)
+let accessSize = ByteAccess in
+def L2_loadalignb_pi : T_loadalign_pi <"memb_fifo", s4_0Imm, 0b0100>;
 
-let AddedComplexity = 20 in
-def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))),
-            (i32 (L2_loadrub_io IntRegs:$src1, s11_0ImmPred:$offset))>;
+let accessSize = HalfWordAccess, opExtentAlign = 1 in
+def L2_loadalignh_pi : T_loadalign_pi <"memh_fifo", s4_1Imm, 0b0010>;
 
 //===----------------------------------------------------------------------===//
 // Template class for post increment loads with register offset.
@@ -1768,26 +1996,27 @@ class T_load_pr <string mnemonic, RegisterClass RC, bits<4> MajOp,
     let Inst{4-0}   = dst;
   }
 
-let hasNewValue = 1, isCodeGenOnly = 0 in {
+let hasNewValue = 1 in {
   def L2_loadrb_pr  : T_load_pr <"memb",  IntRegs, 0b1000, ByteAccess>;
   def L2_loadrub_pr : T_load_pr <"memub", IntRegs, 0b1001, ByteAccess>;
   def L2_loadrh_pr  : T_load_pr <"memh",  IntRegs, 0b1010, HalfWordAccess>;
   def L2_loadruh_pr : T_load_pr <"memuh", IntRegs, 0b1011, HalfWordAccess>;
   def L2_loadri_pr  : T_load_pr <"memw",  IntRegs, 0b1100, WordAccess>;
+
+  def L2_loadbzw2_pr : T_load_pr <"memubh", IntRegs, 0b0011, HalfWordAccess>;
 }
 
-let isCodeGenOnly = 0 in
 def L2_loadrd_pr   : T_load_pr <"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
+def L2_loadbzw4_pr : T_load_pr <"memubh", DoubleRegs, 0b0101, WordAccess>;
 
 // Load predicate.
 let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
-isPseudo = 1, Defs = [R10,R11,D5], hasSideEffects = 0 in
-def LDriw_pred : LDInst2<(outs PredRegs:$dst),
-            (ins MEMri:$addr),
-            "Error; should not emit",
-            []>;
+    isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def LDriw_pred : LDInst<(outs PredRegs:$dst),
+                        (ins IntRegs:$addr, s11_2Ext:$off),
+                        ".error \"should not emit\"", []>;
 
-let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0, isCodeGenOnly = 0 in
+let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0 in
   def L2_deallocframe : LDInst<(outs), (ins),
                      "deallocframe",
                      []> {
@@ -1799,7 +2028,7 @@ let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0, isCodeGenOnly = 0
 }
 
 // Load / Post increment circular addressing mode.
-let Uses = [CS], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+let Uses = [CS], hasSideEffects = 0 in
 class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp>
   : LDInst <(outs RC:$dst, IntRegs:$_dst_),
             (ins IntRegs:$Rz, ModRegs:$Mu),
@@ -1809,6 +2038,7 @@ class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp>
     bits<5> Rz;
     bit Mu;
 
+    let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
     let IClass = 0b1001;
 
     let Inst{27-25} = 0b100;
@@ -1821,27 +2051,60 @@ class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp>
     let Inst{4-0} = dst;
  }
 
-let accessSize = ByteAccess, isCodeGenOnly = 0 in {
+let accessSize = ByteAccess in {
   def L2_loadrb_pcr  : T_load_pcr <"memb",  IntRegs, 0b1000>;
   def L2_loadrub_pcr : T_load_pcr <"memub", IntRegs, 0b1001>;
 }
 
-let accessSize = HalfWordAccess, isCodeGenOnly = 0 in {
+let accessSize = HalfWordAccess in {
   def L2_loadrh_pcr   : T_load_pcr <"memh",   IntRegs, 0b1010>;
   def L2_loadruh_pcr  : T_load_pcr <"memuh",  IntRegs, 0b1011>;
+  def L2_loadbsw2_pcr : T_load_pcr <"membh",  IntRegs, 0b0001>;
+  def L2_loadbzw2_pcr : T_load_pcr <"memubh", IntRegs, 0b0011>;
 }
 
-let accessSize = WordAccess, isCodeGenOnly = 0 in {
+let accessSize = WordAccess in {
   def  L2_loadri_pcr  : T_load_pcr <"memw", IntRegs, 0b1100>;
+  let hasNewValue = 0 in {
+    def L2_loadbzw4_pcr : T_load_pcr <"memubh", DoubleRegs, 0b0101>;
+    def L2_loadbsw4_pcr : T_load_pcr <"membh",  DoubleRegs, 0b0111>;
+  }
 }
 
-let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess in
 def L2_loadrd_pcr  : T_load_pcr <"memd", DoubleRegs, 0b1110>;
 
+// Load / Post increment circular addressing mode.
+let Uses = [CS], hasSideEffects = 0 in
+class T_loadalign_pcr<string mnemonic, bits<4> MajOp, MemAccessSize AccessSz >
+  : LDInst <(outs DoubleRegs:$dst, IntRegs:$_dst_),
+            (ins DoubleRegs:$_src_, IntRegs:$Rz, ModRegs:$Mu),
+  "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [],
+  "$Rz = $_dst_, $dst = $_src_" > {
+    bits<5> dst;
+    bits<5> Rz;
+    bit Mu;
+
+    let accessSize = AccessSz;
+    let IClass = 0b1001;
+
+    let Inst{27-25} = 0b100;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = Rz;
+    let Inst{13}    = Mu;
+    let Inst{12}    = 0b0;
+    let Inst{9}     = 0b1;
+    let Inst{7}     = 0b0;
+    let Inst{4-0}   = dst;
+ }
+
+def L2_loadalignb_pcr : T_loadalign_pcr <"memb_fifo", 0b0100, ByteAccess>;
+def L2_loadalignh_pcr : T_loadalign_pcr <"memh_fifo", 0b0010, HalfWordAccess>;
+
 //===----------------------------------------------------------------------===//
 // Circular loads with immediate offset.
 //===----------------------------------------------------------------------===//
-let Uses = [CS], mayLoad = 1, hasSideEffects = 0, hasNewValue = 1 in
+let Uses = [CS], mayLoad = 1, hasSideEffects = 0 in
 class T_load_pci <string mnemonic, RegisterClass RC,
                   Operand ImmOp, bits<4> MajOp>
   : LDInstPI<(outs RC:$dst, IntRegs:$_dst_),
@@ -1855,6 +2118,7 @@ class T_load_pci <string mnemonic, RegisterClass RC,
     bits<4> offsetBits;
 
     string ImmOpStr = !cast<string>(ImmOp);
+    let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
     let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
                      !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
                      !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
@@ -1871,24 +2135,62 @@ class T_load_pci <string mnemonic, RegisterClass RC,
   }
 
 // Byte variants of circ load
-let accessSize = ByteAccess, isCodeGenOnly = 0 in {
+let accessSize = ByteAccess in {
   def L2_loadrb_pci  : T_load_pci <"memb",  IntRegs, s4_0Imm, 0b1000>;
   def L2_loadrub_pci : T_load_pci <"memub", IntRegs, s4_0Imm, 0b1001>;
 }
 
 // Half word variants of circ load
-let accessSize = HalfWordAccess, isCodeGenOnly = 0 in {
+let accessSize = HalfWordAccess in {
   def L2_loadrh_pci   : T_load_pci <"memh",   IntRegs, s4_1Imm, 0b1010>;
   def L2_loadruh_pci  : T_load_pci <"memuh",  IntRegs, s4_1Imm, 0b1011>;
+  def L2_loadbzw2_pci : T_load_pci <"memubh", IntRegs, s4_1Imm, 0b0011>;
+  def L2_loadbsw2_pci : T_load_pci <"membh",  IntRegs, s4_1Imm, 0b0001>;
 }
 
 // Word variants of circ load
-let accessSize = WordAccess, isCodeGenOnly = 0 in
+let accessSize = WordAccess in
 def L2_loadri_pci   : T_load_pci <"memw",   IntRegs,    s4_2Imm, 0b1100>;
 
-let accessSize = DoubleWordAccess, hasNewValue = 0, isCodeGenOnly = 0 in
+let accessSize = WordAccess, hasNewValue = 0 in {
+  def L2_loadbzw4_pci : T_load_pci <"memubh", DoubleRegs, s4_2Imm, 0b0101>;
+  def L2_loadbsw4_pci : T_load_pci <"membh",  DoubleRegs, s4_2Imm, 0b0111>;
+}
+
+let accessSize = DoubleWordAccess, hasNewValue = 0 in
 def L2_loadrd_pci : T_load_pci <"memd", DoubleRegs, s4_3Imm, 0b1110>;
 
+//===----------------------------------------------------------------------===//
+// Circular loads - Pseudo
+//
+// Please note that the input operand order in the pseudo instructions
+// doesn't match with the real instructions. Pseudo instructions operand
+// order should mimics the ordering in the intrinsics. Also, 'src2' doesn't
+// appear in the AsmString because it's same as 'dst'.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1,  mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_load_pci_pseudo <string opc, RegisterClass RC>
+  : LDInstPI<(outs IntRegs:$_dst_, RC:$dst),
+             (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4Imm:$src4),
+  ".error \"$dst = "#opc#"($src1++#$src4:circ($src3))\"",
+  [], "$src1 = $_dst_">;
+
+def L2_loadrb_pci_pseudo  : T_load_pci_pseudo <"memb",  IntRegs>;
+def L2_loadrub_pci_pseudo : T_load_pci_pseudo <"memub", IntRegs>;
+def L2_loadrh_pci_pseudo  : T_load_pci_pseudo <"memh",  IntRegs>;
+def L2_loadruh_pci_pseudo : T_load_pci_pseudo <"memuh", IntRegs>;
+def L2_loadri_pci_pseudo  : T_load_pci_pseudo <"memw",  IntRegs>;
+def L2_loadrd_pci_pseudo  : T_load_pci_pseudo <"memd",  DoubleRegs>;
+
+
+// TODO: memb_fifo and memh_fifo must take destination register as input.
+// One-off circ loads - not enough in common to break into a class.
+let accessSize = ByteAccess in
+def L2_loadalignb_pci : T_load_pci <"memb_fifo", DoubleRegs, s4_0Imm, 0b0100>;
+
+let accessSize = HalfWordAccess, opExtentAlign = 1 in
+def L2_loadalignh_pci : T_load_pci <"memh_fifo", DoubleRegs, s4_1Imm, 0b0010>;
+
 // L[24]_load[wd]_locked: Load word/double with lock.
 let isSoloAX = 1 in
 class T_load_locked <string mnemonic, RegisterClass RC>
@@ -1901,12 +2203,38 @@ class T_load_locked <string mnemonic, RegisterClass RC>
     let Inst{27-21} = 0b0010000;
     let Inst{20-16} = src;
     let Inst{13-12} = !if (!eq(mnemonic, "memd_locked"), 0b01, 0b00);
+    let Inst{5}   = 0;
     let Inst{4-0} = dst;
 }
-let hasNewValue = 1, accessSize = WordAccess, opNewValue = 0, isCodeGenOnly = 0 in
+let hasNewValue = 1, accessSize = WordAccess, opNewValue = 0 in
   def L2_loadw_locked : T_load_locked <"memw_locked", IntRegs>;
-let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess in
   def L4_loadd_locked : T_load_locked <"memd_locked", DoubleRegs>;
+
+// S[24]_store[wd]_locked: Store word/double conditionally.
+let isSoloAX = 1, isPredicateLate = 1 in
+class T_store_locked <string mnemonic, RegisterClass RC>
+  : ST0Inst <(outs PredRegs:$Pd), (ins IntRegs:$Rs, RC:$Rt),
+    mnemonic#"($Rs, $Pd) = $Rt"> {
+    bits<2> Pd;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1010;
+    let Inst{27-23} = 0b00001;
+    let Inst{22} = !if (!eq(mnemonic, "memw_locked"), 0b0, 0b1);
+    let Inst{21} = 0b1;
+    let Inst{20-16} = Rs;
+    let Inst{12-8} = Rt;
+    let Inst{1-0} = Pd;
+}
+
+let accessSize = WordAccess in
+def S2_storew_locked : T_store_locked <"memw_locked", IntRegs>;
+
+let accessSize = DoubleWordAccess in
+def S4_stored_locked : T_store_locked <"memd_locked", DoubleRegs>;
+
 //===----------------------------------------------------------------------===//
 // Bit-reversed loads with auto-increment register
 //===----------------------------------------------------------------------===//
@@ -1936,17 +2264,44 @@ class T_load_pbr<string mnemonic, RegisterClass RC,
       let Inst{4-0} = dst;
   }
 
-let hasNewValue =1, opNewValue = 0, isCodeGenOnly = 0 in {
+let hasNewValue =1, opNewValue = 0 in {
   def L2_loadrb_pbr   : T_load_pbr <"memb",  IntRegs, ByteAccess, 0b1000>;
   def L2_loadrub_pbr  : T_load_pbr <"memub", IntRegs, ByteAccess, 0b1001>;
   def L2_loadrh_pbr   : T_load_pbr <"memh",  IntRegs, HalfWordAccess, 0b1010>;
   def L2_loadruh_pbr  : T_load_pbr <"memuh", IntRegs, HalfWordAccess, 0b1011>;
+  def L2_loadbsw2_pbr : T_load_pbr <"membh", IntRegs, HalfWordAccess, 0b0001>;
+  def L2_loadbzw2_pbr : T_load_pbr <"memubh", IntRegs, HalfWordAccess, 0b0011>;
   def L2_loadri_pbr : T_load_pbr <"memw", IntRegs, WordAccess, 0b1100>;
 }
 
-let isCodeGenOnly = 0 in
+def L2_loadbzw4_pbr : T_load_pbr <"memubh", DoubleRegs, WordAccess, 0b0101>;
+def L2_loadbsw4_pbr : T_load_pbr <"membh",  DoubleRegs, WordAccess, 0b0111>;
 def L2_loadrd_pbr : T_load_pbr <"memd", DoubleRegs, DoubleWordAccess, 0b1110>;
 
+def L2_loadalignb_pbr :T_load_pbr <"memb_fifo", DoubleRegs, ByteAccess, 0b0100>;
+def L2_loadalignh_pbr :T_load_pbr <"memh_fifo", DoubleRegs,
+                                   HalfWordAccess, 0b0010>;
+
+//===----------------------------------------------------------------------===//
+// Bit-reversed loads - Pseudo
+//
+// Please note that 'src2' doesn't appear in the AsmString because
+// it's same as 'dst'.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_load_pbr_pseudo <string opc, RegisterClass RC>
+  : LDInstPI<(outs IntRegs:$_dst_, RC:$dst),
+             (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+  ".error \"$dst = "#opc#"($src1++$src3:brev)\"",
+  [], "$src1 = $_dst_">;
+
+def L2_loadrb_pbr_pseudo  : T_load_pbr_pseudo <"memb",  IntRegs>;
+def L2_loadrub_pbr_pseudo : T_load_pbr_pseudo <"memub", IntRegs>;
+def L2_loadrh_pbr_pseudo  : T_load_pbr_pseudo <"memh",  IntRegs>;
+def L2_loadruh_pbr_pseudo : T_load_pbr_pseudo <"memuh", IntRegs>;
+def L2_loadri_pbr_pseudo  : T_load_pbr_pseudo <"memw",  IntRegs>;
+def L2_loadrd_pbr_pseudo  : T_load_pbr_pseudo <"memd",  DoubleRegs>;
+
 //===----------------------------------------------------------------------===//
 // LD -
 //===----------------------------------------------------------------------===//
@@ -2003,7 +2358,6 @@ class T_M2_mpy < bits<2> LHbits, bit isSat, bit isRnd,
   }
 
 //Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]
-let isCodeGenOnly = 0 in {
 def M2_mpy_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 0>;
 def M2_mpy_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 0>;
 def M2_mpy_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 0>;
@@ -2012,10 +2366,8 @@ def M2_mpy_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 0>;
 def M2_mpy_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 0>;
 def M2_mpy_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 0>;
 def M2_mpy_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 0>;
-}
 
 //Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
-let isCodeGenOnly = 0 in {
 def M2_mpyu_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 1>;
 def M2_mpyu_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 1>;
 def M2_mpyu_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 1>;
@@ -2024,10 +2376,8 @@ def M2_mpyu_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 1>;
 def M2_mpyu_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 1>;
 def M2_mpyu_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 1>;
 def M2_mpyu_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 1>;
-}
 
 //Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]:rnd
-let isCodeGenOnly = 0 in {
 def M2_mpy_rnd_ll_s1: T_M2_mpy <0b00, 0, 1, 1, 0>;
 def M2_mpy_rnd_ll_s0: T_M2_mpy <0b00, 0, 1, 0, 0>;
 def M2_mpy_rnd_lh_s1: T_M2_mpy <0b01, 0, 1, 1, 0>;
@@ -2036,11 +2386,10 @@ def M2_mpy_rnd_hl_s1: T_M2_mpy <0b10, 0, 1, 1, 0>;
 def M2_mpy_rnd_hl_s0: T_M2_mpy <0b10, 0, 1, 0, 0>;
 def M2_mpy_rnd_hh_s1: T_M2_mpy <0b11, 0, 1, 1, 0>;
 def M2_mpy_rnd_hh_s0: T_M2_mpy <0b11, 0, 1, 0, 0>;
-}
 
 //Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:sat]
 //Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
-let Defs = [USR_OVF], isCodeGenOnly = 0 in {
+let Defs = [USR_OVF] in {
   def M2_mpy_sat_ll_s1: T_M2_mpy <0b00, 1, 0, 1, 0>;
   def M2_mpy_sat_ll_s0: T_M2_mpy <0b00, 1, 0, 0, 0>;
   def M2_mpy_sat_lh_s1: T_M2_mpy <0b01, 1, 0, 1, 0>;
@@ -2094,7 +2443,6 @@ class T_M2_mpy_acc < bits<2> LHbits, bit isSat, bit isNac,
   }
 
 //Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]
-let isCodeGenOnly = 0 in {
 def M2_mpy_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 0>;
 def M2_mpy_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 0>;
 def M2_mpy_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 0>;
@@ -2103,10 +2451,8 @@ def M2_mpy_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 0>;
 def M2_mpy_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 0>;
 def M2_mpy_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 0>;
 def M2_mpy_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 0>;
-}
 
 //Rx += mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
-let isCodeGenOnly = 0 in {
 def M2_mpyu_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 1>;
 def M2_mpyu_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 1>;
 def M2_mpyu_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 1>;
@@ -2115,10 +2461,8 @@ def M2_mpyu_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 1>;
 def M2_mpyu_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 1>;
 def M2_mpyu_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 1>;
 def M2_mpyu_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 1>;
-}
 
 //Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]
-let isCodeGenOnly = 0 in {
 def M2_mpy_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 0>;
 def M2_mpy_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 0>;
 def M2_mpy_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 0>;
@@ -2127,10 +2471,8 @@ def M2_mpy_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 0>;
 def M2_mpy_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 0>;
 def M2_mpy_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 0>;
 def M2_mpy_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 0>;
-}
 
 //Rx -= mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
-let isCodeGenOnly = 0 in {
 def M2_mpyu_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 1>;
 def M2_mpyu_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 1>;
 def M2_mpyu_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 1>;
@@ -2139,10 +2481,8 @@ def M2_mpyu_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 1>;
 def M2_mpyu_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 1>;
 def M2_mpyu_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 1>;
 def M2_mpyu_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 1>;
-}
 
 //Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
-let isCodeGenOnly = 0 in {
 def M2_mpy_acc_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 0, 1, 0>;
 def M2_mpy_acc_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 0, 0, 0>;
 def M2_mpy_acc_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 0, 1, 0>;
@@ -2151,10 +2491,8 @@ def M2_mpy_acc_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 0, 1, 0>;
 def M2_mpy_acc_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 0, 0, 0>;
 def M2_mpy_acc_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 0, 1, 0>;
 def M2_mpy_acc_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 0, 0, 0>;
-}
 
 //Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
-let isCodeGenOnly = 0 in {
 def M2_mpy_nac_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 1, 1, 0>;
 def M2_mpy_nac_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 1, 0, 0>;
 def M2_mpy_nac_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 1, 1, 0>;
@@ -2163,7 +2501,6 @@ def M2_mpy_nac_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 1, 1, 0>;
 def M2_mpy_nac_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 1, 0, 0>;
 def M2_mpy_nac_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 1, 1, 0>;
 def M2_mpy_nac_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 1, 0, 0>;
-}
 
 //===----------------------------------------------------------------------===//
 // Template Class
@@ -2197,7 +2534,6 @@ class T_M2_mpyd_acc < bits<2> LHbits, bit isNac, bit hasShift, bit isUnsigned>
     let Inst{12-8} = Rt;
   }
 
-let isCodeGenOnly = 0 in {
 def M2_mpyd_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 0>;
 def M2_mpyd_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 0>;
 def M2_mpyd_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 0>;
@@ -2237,6 +2573,72 @@ def M2_mpyud_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 1>;
 def M2_mpyud_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 1>;
 def M2_mpyud_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 1>;
 def M2_mpyud_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 1>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- Vector Multipy
+// Used for complex multiply real or imaginary, dual multiply and even halfwords
+//===----------------------------------------------------------------------===//
+class T_M2_vmpy < string opc, bits<3> MajOp, bits<3> MinOp, bit hasShift,
+                  bit isRnd, bit isSat >
+  : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rdd = "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
+                              #!if(isRnd,":rnd","")
+                              #!if(isSat,":sat",""),
+  [] > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1000;
+    let Inst{23-21} = MajOp;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = Rdd;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat
+let Defs = [USR_OVF] in {
+def M2_vcmpy_s1_sat_i: T_M2_vmpy <"vcmpyi", 0b110, 0b110, 1, 0, 1>;
+def M2_vcmpy_s0_sat_i: T_M2_vmpy <"vcmpyi", 0b010, 0b110, 0, 0, 1>;
+
+// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat
+def M2_vcmpy_s1_sat_r: T_M2_vmpy <"vcmpyr", 0b101, 0b110, 1, 0, 1>;
+def M2_vcmpy_s0_sat_r: T_M2_vmpy <"vcmpyr", 0b001, 0b110, 0, 0, 1>;
+
+// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
+def M2_vdmpys_s1: T_M2_vmpy <"vdmpy", 0b100, 0b100, 1, 0, 1>;
+def M2_vdmpys_s0: T_M2_vmpy <"vdmpy", 0b000, 0b100, 0, 0, 1>;
+
+// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
+def M2_vmpy2es_s1: T_M2_vmpy <"vmpyeh", 0b100, 0b110, 1, 0, 1>;
+def M2_vmpy2es_s0: T_M2_vmpy <"vmpyeh", 0b000, 0b110, 0, 0, 1>;
+
+//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyh_s0:  T_M2_vmpy <"vmpywoh", 0b000, 0b111, 0, 0, 1>;
+def M2_mmpyh_s1:  T_M2_vmpy <"vmpywoh", 0b100, 0b111, 1, 0, 1>;
+def M2_mmpyh_rs0: T_M2_vmpy <"vmpywoh", 0b001, 0b111, 0, 1, 1>;
+def M2_mmpyh_rs1: T_M2_vmpy <"vmpywoh", 0b101, 0b111, 1, 1, 1>;
+
+//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyl_s0:  T_M2_vmpy <"vmpyweh", 0b000, 0b101, 0, 0, 1>;
+def M2_mmpyl_s1:  T_M2_vmpy <"vmpyweh", 0b100, 0b101, 1, 0, 1>;
+def M2_mmpyl_rs0: T_M2_vmpy <"vmpyweh", 0b001, 0b101, 0, 1, 1>;
+def M2_mmpyl_rs1: T_M2_vmpy <"vmpyweh", 0b101, 0b101, 1, 1, 1>;
+
+//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyuh_s0:  T_M2_vmpy <"vmpywouh", 0b010, 0b111, 0, 0, 1>;
+def M2_mmpyuh_s1:  T_M2_vmpy <"vmpywouh", 0b110, 0b111, 1, 0, 1>;
+def M2_mmpyuh_rs0: T_M2_vmpy <"vmpywouh", 0b011, 0b111, 0, 1, 1>;
+def M2_mmpyuh_rs1: T_M2_vmpy <"vmpywouh", 0b111, 0b111, 1, 1, 1>;
+
+//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyul_s0:  T_M2_vmpy <"vmpyweuh", 0b010, 0b101, 0, 0, 1>;
+def M2_mmpyul_s1:  T_M2_vmpy <"vmpyweuh", 0b110, 0b101, 1, 0, 1>;
+def M2_mmpyul_rs0: T_M2_vmpy <"vmpyweuh", 0b011, 0b101, 0, 1, 1>;
+def M2_mmpyul_rs1: T_M2_vmpy <"vmpyweuh", 0b111, 0b101, 1, 1, 1>;
 }
 
 let hasNewValue = 1, opNewValue = 0 in
@@ -2265,6 +2667,9 @@ class T_MType_mpy <string mnemonic, bits<4> RegTyBits, RegisterClass RC,
     let Inst{4-0}   = dst;
   }
 
+class T_MType_vrcmpy <string mnemonic, bits<3> MajOp, bits<3> MinOp, bit isHi>
+  : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, 1, 1, "", 1, isHi>;
+
 class T_MType_dd  <string mnemonic, bits<3> MajOp, bits<3> MinOp,
                    bit isSat = 0, bit isRnd = 0 >
   : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, isSat, isRnd>;
@@ -2277,30 +2682,37 @@ class T_MType_rr2 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
                    bit isSat = 0, bit isRnd = 0, string op2str = "" >
   : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd, op2str>;
 
-let CextOpcode = "mpyi", InputType = "reg", isCodeGenOnly = 0 in
+def M2_vradduh    : T_MType_dd <"vradduh", 0b000, 0b001, 0, 0>;
+def M2_vdmpyrs_s0 : T_MType_dd <"vdmpy",   0b000, 0b000, 1, 1>;
+def M2_vdmpyrs_s1 : T_MType_dd <"vdmpy",   0b100, 0b000, 1, 1>;
+
+let CextOpcode = "mpyi", InputType = "reg" in
 def M2_mpyi    : T_MType_rr1 <"mpyi", 0b000, 0b000>, ImmRegRel;
 
-let isCodeGenOnly = 0 in {
 def M2_mpy_up  : T_MType_rr1 <"mpy",  0b000, 0b001>;
 def M2_mpyu_up : T_MType_rr1 <"mpyu", 0b010, 0b001>;
-}
 
-let isCodeGenOnly = 0 in
 def M2_dpmpyss_rnd_s0 : T_MType_rr1 <"mpy", 0b001, 0b001, 0, 1>;
 
-let isCodeGenOnly = 0 in {
+def M2_vmpy2s_s0pack : T_MType_rr1 <"vmpyh", 0b001, 0b111, 1, 1>;
+def M2_vmpy2s_s1pack : T_MType_rr1 <"vmpyh", 0b101, 0b111, 1, 1>;
+
 def M2_hmmpyh_rs1 : T_MType_rr2 <"mpy", 0b101, 0b100, 1, 1, ".h">;
 def M2_hmmpyl_rs1 : T_MType_rr2 <"mpy", 0b111, 0b100, 1, 1, ".l">;
-}
+
+def M2_cmpyrs_s0  : T_MType_rr2 <"cmpy", 0b001, 0b110, 1, 1>;
+def M2_cmpyrs_s1  : T_MType_rr2 <"cmpy", 0b101, 0b110, 1, 1>;
+def M2_cmpyrsc_s0 : T_MType_rr2 <"cmpy", 0b011, 0b110, 1, 1, "*">;
+def M2_cmpyrsc_s1 : T_MType_rr2 <"cmpy", 0b111, 0b110, 1, 1, "*">;
 
 // V4 Instructions
-let isCodeGenOnly = 0 in {
+def M2_vraddh : T_MType_dd <"vraddh", 0b001, 0b111, 0>;
 def M2_mpysu_up : T_MType_rr1 <"mpysu", 0b011, 0b001, 0>;
+def M2_mpy_up_s1 : T_MType_rr1 <"mpy", 0b101, 0b010, 0>;
 def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy", 0b111, 0b000, 1>;
 
 def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">;
 def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">;
-}
 
 def: Pat<(i32 (mul   I32:$src1, I32:$src2)), (M2_mpyi    I32:$src1, I32:$src2)>;
 def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up  I32:$src1, I32:$src2)>;
@@ -2325,11 +2737,10 @@ class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern>
     let Inst{12-5} = u8;
   }
 
-let isExtendable = 1, opExtentBits = 8, opExtendable = 2, isCodeGenOnly = 0 in
+let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in
 def M2_mpysip : T_MType_mpy_ri <0, u8Ext,
-                [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u8ExtPred:$u8))]>;
+                [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32ImmPred:$u8))]>;
 
-let isCodeGenOnly = 0 in
 def M2_mpysin :  T_MType_mpy_ri <1, u8Imm,
                 [(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs,
                                                     u8ImmPred:$u8)))]>;
@@ -2345,11 +2756,12 @@ def M2_mpyui : MInst<(outs IntRegs:$dst),
 // Assembler maps to either Rd=+mpyi(Rs,#u8) or Rd=-mpyi(Rs,#u8)
 // depending on the value of m9. See Arch Spec.
 let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
-    CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1 in
+    CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1,
+    isAsmParserOnly = 1 in
 def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2),
     "$dst = mpyi($src1, #$src2)",
     [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
-                                   s9ExtPred:$src2))]>, ImmRegRel;
+                                   s32ImmPred:$src2))]>, ImmRegRel;
 
 let hasNewValue = 1, isExtendable = 1,  opExtentBits = 8, opExtendable = 3,
     InputType = "imm" in
@@ -2397,10 +2809,10 @@ class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp,
     let Inst{4-0} = dst;
   }
 
-let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in {
+let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in {
   def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext,
                   [(set (i32 IntRegs:$dst),
-                        (add (mul IntRegs:$src2, u8ExtPred:$src3),
+                        (add (mul IntRegs:$src2, u32ImmPred:$src3),
                              IntRegs:$src1))]>, ImmRegRel;
 
   def M2_maci   : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0,
@@ -2409,11 +2821,11 @@ let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in {
                             IntRegs:$src1))]>, ImmRegRel;
 }
 
-let CextOpcode = "ADD_acc", isCodeGenOnly = 0 in {
+let CextOpcode = "ADD_acc" in {
   let isExtentSigned = 1 in
   def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext,
                  [(set (i32 IntRegs:$dst),
-                       (add (add (i32 IntRegs:$src2), s8_16ExtPred:$src3),
+                       (add (add (i32 IntRegs:$src2), s16_16ImmPred:$src3),
                             (i32 IntRegs:$src1)))]>, ImmRegRel;
 
   def M2_acci  : T_MType_acc_rr <"+= add",  0b000, 0b001, 0,
@@ -2422,20 +2834,18 @@ let CextOpcode = "ADD_acc", isCodeGenOnly = 0 in {
                             (i32 IntRegs:$src1)))]>, ImmRegRel;
 }
 
-let CextOpcode = "SUB_acc", isCodeGenOnly = 0 in {
+let CextOpcode = "SUB_acc" in {
   let isExtentSigned = 1 in
   def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8Ext>, ImmRegRel;
 
   def M2_nacci  : T_MType_acc_rr <"-= add",  0b100, 0b001, 0>, ImmRegRel;
 }
 
-let Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in
+let Itinerary = M_tc_3x_SLOT23 in
 def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8Ext>;
 
-let isCodeGenOnly = 0 in {
 def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>;
 def M2_subacc : T_MType_acc_rr <"+= sub",  0b000, 0b011, 1>;
-}
 
 class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp,
                         PatLeaf ImmPred>
@@ -2447,10 +2857,187 @@ class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
          (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
 
 def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
-def : T_MType_acc_pat1 <M2_macsin, mul, sub, u8ExtPred>;
+def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>;
 
-def : T_MType_acc_pat1 <M2_naccii, add, sub, s8_16ExtPred>;
+def : T_MType_acc_pat1 <M2_naccii, add, sub, s16_16ImmPred>;
 def : T_MType_acc_pat2 <M2_nacci, add, sub>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- XType Vector Instructions
+//===----------------------------------------------------------------------===//
+class T_XTYPE_Vect < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj >
+  : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rdd = "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"),
+  [] > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1000;
+    let Inst{23-21} = MajOp;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = Rdd;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+class T_XTYPE_Vect_acc < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj >
+  : MInst <(outs DoubleRegs:$Rdd),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rdd += "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"),
+  [], "$dst2 = $Rdd",M_tc_3x_SLOT23 > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1010;
+    let Inst{23-21} = MajOp;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = Rdd;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+class T_XTYPE_Vect_diff < bits<3> MajOp, string opc >
+  : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rtt, DoubleRegs:$Rss),
+  "$Rdd = "#opc#"($Rtt, $Rss)",
+  [], "",M_tc_2_SLOT23 > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1000;
+    let Inst{23-21} = MajOp;
+    let Inst{7-5} = 0b000;
+    let Inst{4-0} = Rdd;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+// Vector reduce add unsigned bytes: Rdd32=vrmpybu(Rss32,Rtt32)
+def A2_vraddub: T_XTYPE_Vect <"vraddub", 0b010, 0b001, 0>;
+def A2_vraddub_acc: T_XTYPE_Vect_acc <"vraddub", 0b010, 0b001, 0>;
+
+// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt)
+def A2_vrsadub: T_XTYPE_Vect <"vrsadub", 0b010, 0b010, 0>;
+def A2_vrsadub_acc: T_XTYPE_Vect_acc <"vrsadub", 0b010, 0b010, 0>;
+
+// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss)
+def M2_vabsdiffh: T_XTYPE_Vect_diff<0b011, "vabsdiffh">;
+
+// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss)
+def M2_vabsdiffw: T_XTYPE_Vect_diff<0b001, "vabsdiffw">;
+
+// Vector reduce complex multiply real or imaginary:
+// Rdd[+]=vrcmpy[ir](Rss,Rtt[*])
+def M2_vrcmpyi_s0:  T_XTYPE_Vect <"vrcmpyi", 0b000, 0b000, 0>;
+def M2_vrcmpyi_s0c: T_XTYPE_Vect <"vrcmpyi", 0b010, 0b000, 1>;
+def M2_vrcmaci_s0:  T_XTYPE_Vect_acc <"vrcmpyi", 0b000, 0b000, 0>;
+def M2_vrcmaci_s0c: T_XTYPE_Vect_acc <"vrcmpyi", 0b010, 0b000, 1>;
+
+def M2_vrcmpyr_s0:  T_XTYPE_Vect <"vrcmpyr", 0b000, 0b001, 0>;
+def M2_vrcmpyr_s0c: T_XTYPE_Vect <"vrcmpyr", 0b011, 0b001, 1>;
+def M2_vrcmacr_s0:  T_XTYPE_Vect_acc <"vrcmpyr", 0b000, 0b001, 0>;
+def M2_vrcmacr_s0c: T_XTYPE_Vect_acc <"vrcmpyr", 0b011, 0b001, 1>;
+
+// Vector reduce halfwords:
+// Rdd[+]=vrmpyh(Rss,Rtt)
+def M2_vrmpy_s0: T_XTYPE_Vect <"vrmpyh", 0b000, 0b010, 0>;
+def M2_vrmac_s0: T_XTYPE_Vect_acc <"vrmpyh", 0b000, 0b010, 0>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- Vector Multipy with accumulation.
+// Used for complex multiply real or imaginary, dual multiply and even halfwords
+//===----------------------------------------------------------------------===//
+let Defs = [USR_OVF] in
+class T_M2_vmpy_acc_sat < string opc, bits<3> MajOp, bits<3> MinOp,
+                          bit hasShift, bit isRnd >
+  : MInst <(outs DoubleRegs:$Rxx),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
+                               #!if(isRnd,":rnd","")#":sat",
+  [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1010;
+    let Inst{23-21} = MajOp;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = Rxx;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+class T_M2_vmpy_acc < string opc, bits<3> MajOp, bits<3> MinOp,
+                      bit hasShift, bit isRnd >
+  : MInst <(outs DoubleRegs:$Rxx),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
+                               #!if(isRnd,":rnd",""),
+  [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1010;
+    let Inst{23-21} = MajOp;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = Rxx;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+// Vector multiply word by signed half with accumulation
+// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmacls_s1:  T_M2_vmpy_acc_sat <"vmpyweh", 0b100, 0b101, 1, 0>;
+def M2_mmacls_s0:  T_M2_vmpy_acc_sat <"vmpyweh", 0b000, 0b101, 0, 0>;
+def M2_mmacls_rs1: T_M2_vmpy_acc_sat <"vmpyweh", 0b101, 0b101, 1, 1>;
+def M2_mmacls_rs0: T_M2_vmpy_acc_sat <"vmpyweh", 0b001, 0b101, 0, 1>;
+
+def M2_mmachs_s1:  T_M2_vmpy_acc_sat <"vmpywoh", 0b100, 0b111, 1, 0>;
+def M2_mmachs_s0:  T_M2_vmpy_acc_sat <"vmpywoh", 0b000, 0b111, 0, 0>;
+def M2_mmachs_rs1: T_M2_vmpy_acc_sat <"vmpywoh", 0b101, 0b111, 1, 1>;
+def M2_mmachs_rs0: T_M2_vmpy_acc_sat <"vmpywoh", 0b001, 0b111, 0, 1>;
+
+// Vector multiply word by unsigned half with accumulation
+// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmaculs_s1:  T_M2_vmpy_acc_sat <"vmpyweuh", 0b110, 0b101, 1, 0>;
+def M2_mmaculs_s0:  T_M2_vmpy_acc_sat <"vmpyweuh", 0b010, 0b101, 0, 0>;
+def M2_mmaculs_rs1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b111, 0b101, 1, 1>;
+def M2_mmaculs_rs0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b011, 0b101, 0, 1>;
+
+def M2_mmacuhs_s1:  T_M2_vmpy_acc_sat <"vmpywouh", 0b110, 0b111, 1, 0>;
+def M2_mmacuhs_s0:  T_M2_vmpy_acc_sat <"vmpywouh", 0b010, 0b111, 0, 0>;
+def M2_mmacuhs_rs1: T_M2_vmpy_acc_sat <"vmpywouh", 0b111, 0b111, 1, 1>;
+def M2_mmacuhs_rs0: T_M2_vmpy_acc_sat <"vmpywouh", 0b011, 0b111, 0, 1>;
+
+// Vector multiply even halfwords with accumulation
+// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
+def M2_vmac2es:    T_M2_vmpy_acc     <"vmpyeh", 0b001, 0b010, 0, 0>;
+def M2_vmac2es_s1: T_M2_vmpy_acc_sat <"vmpyeh", 0b100, 0b110, 1, 0>;
+def M2_vmac2es_s0: T_M2_vmpy_acc_sat <"vmpyeh", 0b000, 0b110, 0, 0>;
+
+// Vector dual multiply with accumulation
+// Rxx+=vdmpy(Rss,Rtt)[:sat]
+def M2_vdmacs_s1: T_M2_vmpy_acc_sat <"vdmpy", 0b100, 0b100, 1, 0>;
+def M2_vdmacs_s0: T_M2_vmpy_acc_sat <"vdmpy", 0b000, 0b100, 0, 0>;
+
+// Vector complex multiply real or imaginary with accumulation
+// Rxx+=vcmpy[ir](Rss,Rtt):sat
+def M2_vcmac_s0_sat_r: T_M2_vmpy_acc_sat <"vcmpyr", 0b001, 0b100, 0, 0>;
+def M2_vcmac_s0_sat_i: T_M2_vmpy_acc_sat <"vcmpyi", 0b010, 0b100, 0, 0>;
+
 //===----------------------------------------------------------------------===//
 // Template Class -- Multiply signed/unsigned halfwords with and without
 // saturation and rounding
@@ -2478,7 +3065,6 @@ class T_M2_mpyd < bits<2> LHbits, bit isRnd, bit hasShift, bit isUnsigned >
     let Inst{12-8} = Rt;
 }
 
-let isCodeGenOnly = 0 in {
 def M2_mpyd_hh_s0: T_M2_mpyd<0b11, 0, 0, 0>;
 def M2_mpyd_hl_s0: T_M2_mpyd<0b10, 0, 0, 0>;
 def M2_mpyd_lh_s0: T_M2_mpyd<0b01, 0, 0, 0>;
@@ -2509,7 +3095,7 @@ def M2_mpyud_hh_s1: T_M2_mpyd<0b11, 0, 1, 1>;
 def M2_mpyud_hl_s1: T_M2_mpyd<0b10, 0, 1, 1>;
 def M2_mpyud_lh_s1: T_M2_mpyd<0b01, 0, 1, 1>;
 def M2_mpyud_ll_s1: T_M2_mpyd<0b00, 0, 1, 1>;
-}
+
 //===----------------------------------------------------------------------===//
 // Template Class for xtype mpy:
 // Vector multiply
@@ -2570,7 +3156,6 @@ class T_XTYPE_mpy64_acc <string op1, string op2, bits<3> MajOp, bits<3> MinOp,
 
 // MPY - Multiply and use full result
 // Rdd = mpy[u](Rs,Rt)
-let isCodeGenOnly = 0 in {
 def M2_dpmpyss_s0 : T_XTYPE_mpy64 < "mpy", 0b000, 0b000, 0, 0, 0>;
 def M2_dpmpyuu_s0 : T_XTYPE_mpy64 < "mpyu", 0b010, 0b000, 0, 0, 0>;
 
@@ -2579,7 +3164,48 @@ def M2_dpmpyss_acc_s0 : T_XTYPE_mpy64_acc < "mpy",  "+", 0b000, 0b000, 0, 0, 0>;
 def M2_dpmpyss_nac_s0 : T_XTYPE_mpy64_acc < "mpy",  "-", 0b001, 0b000, 0, 0, 0>;
 def M2_dpmpyuu_acc_s0 : T_XTYPE_mpy64_acc < "mpyu", "+", 0b010, 0b000, 0, 0, 0>;
 def M2_dpmpyuu_nac_s0 : T_XTYPE_mpy64_acc < "mpyu", "-", 0b011, 0b000, 0, 0, 0>;
-}
+
+// Complex multiply real or imaginary
+// Rxx=cmpy[ir](Rs,Rt)
+def M2_cmpyi_s0 : T_XTYPE_mpy64 < "cmpyi", 0b000, 0b001, 0, 0, 0>;
+def M2_cmpyr_s0 : T_XTYPE_mpy64 < "cmpyr", 0b000, 0b010, 0, 0, 0>;
+
+// Rxx+=cmpy[ir](Rs,Rt)
+def M2_cmaci_s0 : T_XTYPE_mpy64_acc < "cmpyi", "+", 0b000, 0b001, 0, 0, 0>;
+def M2_cmacr_s0 : T_XTYPE_mpy64_acc < "cmpyr", "+", 0b000, 0b010, 0, 0, 0>;
+
+// Complex multiply
+// Rdd=cmpy(Rs,Rt)[:<<]:sat
+def M2_cmpys_s0 : T_XTYPE_mpy64 < "cmpy", 0b000, 0b110, 1, 0, 0>;
+def M2_cmpys_s1 : T_XTYPE_mpy64 < "cmpy", 0b100, 0b110, 1, 1, 0>;
+
+// Rdd=cmpy(Rs,Rt*)[:<<]:sat
+def M2_cmpysc_s0 : T_XTYPE_mpy64 < "cmpy", 0b010, 0b110, 1, 0, 1>;
+def M2_cmpysc_s1 : T_XTYPE_mpy64 < "cmpy", 0b110, 0b110, 1, 1, 1>;
+
+// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat
+def M2_cmacs_s0  : T_XTYPE_mpy64_acc < "cmpy", "+", 0b000, 0b110, 1, 0, 0>;
+def M2_cnacs_s0  : T_XTYPE_mpy64_acc < "cmpy", "-", 0b000, 0b111, 1, 0, 0>;
+def M2_cmacs_s1  : T_XTYPE_mpy64_acc < "cmpy", "+", 0b100, 0b110, 1, 1, 0>;
+def M2_cnacs_s1  : T_XTYPE_mpy64_acc < "cmpy", "-", 0b100, 0b111, 1, 1, 0>;
+
+// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat
+def M2_cmacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b010, 0b110, 1, 0, 1>;
+def M2_cnacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b010, 0b111, 1, 0, 1>;
+def M2_cmacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b110, 0b110, 1, 1, 1>;
+def M2_cnacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b110, 0b111, 1, 1, 1>;
+
+// Vector multiply halfwords
+// Rdd=vmpyh(Rs,Rt)[:<<]:sat
+//let Defs = [USR_OVF] in {
+  def M2_vmpy2s_s1 : T_XTYPE_mpy64 < "vmpyh", 0b100, 0b101, 1, 1, 0>;
+  def M2_vmpy2s_s0 : T_XTYPE_mpy64 < "vmpyh", 0b000, 0b101, 1, 0, 0>;
+//}
+
+// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
+def M2_vmac2     : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b001, 0b001, 0, 0, 0>;
+def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>;
+def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>;
 
 def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))),
                    (i64 (anyext (i32 IntRegs:$src2))))),
@@ -2750,35 +3376,30 @@ multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
   }
 }
 
-let accessSize = ByteAccess, isCodeGenOnly = 0 in
+let accessSize = ByteAccess in
 defm storerb: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm, 0b1000>;
 
-let accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+let accessSize = HalfWordAccess in
 defm storerh: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm, 0b1010>;
 
-let accessSize = WordAccess, isCodeGenOnly = 0 in
+let accessSize = WordAccess in
 defm storeri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm, 0b1100>;
 
-let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess in
 defm storerd: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm, 0b1110>;
 
-let accessSize = HalfWordAccess, isNVStorable = 0, isCodeGenOnly = 0 in
+let accessSize = HalfWordAccess, isNVStorable = 0 in
 defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>;
 
-def : Pat<(post_truncsti8 (i32 IntRegs:$src1), IntRegs:$src2,
-                           s4_3ImmPred:$offset),
-          (S2_storerb_pi IntRegs:$src2, s4_0ImmPred:$offset, IntRegs:$src1)>;
+class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset,
+                  InstHexagon MI>
+  : Pat<(Store Value:$src1, I32:$src2, Offset:$offset),
+        (MI I32:$src2, imm:$offset, Value:$src1)>;
 
-def : Pat<(post_truncsti16 (i32 IntRegs:$src1), IntRegs:$src2,
-                            s4_3ImmPred:$offset),
-          (S2_storerh_pi IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
-
-def : Pat<(post_store (i32 IntRegs:$src1), IntRegs:$src2, s4_2ImmPred:$offset),
-          (S2_storeri_pi IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
-
-def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2,
-                       s4_3ImmPred:$offset),
-          (S2_storerd_pi IntRegs:$src2, s4_3ImmPred:$offset, DoubleRegs:$src1)>;
+def: Storepi_pat<post_truncsti8,  I32, s4_0ImmPred, S2_storerb_pi>;
+def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>;
+def: Storepi_pat<post_store,      I32, s4_2ImmPred, S2_storeri_pi>;
+def: Storepi_pat<post_store,      I64, s4_3ImmPred, S2_storerd_pi>;
 
 //===----------------------------------------------------------------------===//
 // Template class for post increment stores with register offset.
@@ -2805,14 +3426,13 @@ class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp,
     let Inst{7} = 0b0;
   }
 
-let isCodeGenOnly = 0 in {
 def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>;
 def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>;
 def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>;
 def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>;
 
 def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>;
-}
+
 let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
 class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp,
                  bits<3>MajOp, bit isH = 0>
@@ -2906,7 +3526,7 @@ multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
   }
 }
 
-let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in {
+let addrMode = BaseImmOffset, InputType = "imm" in {
   let accessSize = ByteAccess in
     defm storerb: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext, u6_0Ext, 0b000>;
 
@@ -2925,42 +3545,102 @@ let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in {
                             u6_1Ext, 0b011, 1>;
 }
 
-def : Pat<(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr),
-          (S2_storerb_io AddrFI:$addr, 0, (i32 IntRegs:$src1))>;
-
-def : Pat<(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr),
-          (S2_storerh_io AddrFI:$addr, 0, (i32 IntRegs:$src1))>;
-
-def : Pat<(store (i32 IntRegs:$src1), ADDRriS11_2:$addr),
-          (S2_storeri_io AddrFI:$addr, 0, (i32 IntRegs:$src1))>;
-
-def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr),
-          (S2_storerd_io AddrFI:$addr, 0, (i64 DoubleRegs:$src1))>;
+// Patterns for generating stores, where the address takes different forms:
+// - frameindex,
+// - frameindex + offset,
+// - base + offset,
+// - simple (base address without offset).
+// These would usually be used together (via Storex_pat defined below), but
+// in some cases one may want to apply different properties (such as
+// AddedComplexity) to the individual patterns.
+class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+  : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
+class Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+                        InstHexagon MI>
+  : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+        (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
+class Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+                     InstHexagon MI>
+  : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
+        (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
+class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+  : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
+        (MI IntRegs:$Rs, 0, Value:$Rt)>;
+
+// Patterns for generating stores, where the address takes different forms,
+// and where the value being stored is transformed through the value modifier
+// ValueMod.  The address forms are same as above.
+class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
+                     InstHexagon MI>
+  : Pat<(Store Value:$Rs, AddrFI:$fi),
+        (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
+class Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+                         PatFrag ValueMod, InstHexagon MI>
+  : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+        (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
+class Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+                      PatFrag ValueMod, InstHexagon MI>
+  : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
+        (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
+class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
+                         InstHexagon MI>
+  : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
+        (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>;
+
+multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
+                      InstHexagon MI> {
+  def: Storex_fi_pat     <Store, Value,          MI>;
+  def: Storex_fi_add_pat <Store, Value, ImmPred, MI>;
+  def: Storex_add_pat    <Store, Value, ImmPred, MI>;
+}
+
+multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
+                       PatFrag ValueMod, InstHexagon MI> {
+  def: Storexm_fi_pat     <Store, Value,          ValueMod, MI>;
+  def: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
+  def: Storexm_add_pat    <Store, Value, ImmPred, ValueMod, MI>;
+}
+
+// Regular stores in the DAG have two operands: value and address.
+// Atomic stores also have two, but they are reversed: address, value.
+// To use atomic stores with the patterns, they need to have their operands
+// swapped. This relies on the knowledge that the F.Fragment uses names
+// "ptr" and "val".
+class SwapSt<PatFrag F>
+  : PatFrag<(ops node:$val, node:$ptr), F.Fragment>;
 
+let AddedComplexity = 20 in {
+  defm: Storex_pat<truncstorei8,    I32, s32_0ImmPred, S2_storerb_io>;
+  defm: Storex_pat<truncstorei16,   I32, s31_1ImmPred, S2_storerh_io>;
+  defm: Storex_pat<store,           I32, s30_2ImmPred, S2_storeri_io>;
+  defm: Storex_pat<store,           I64, s29_3ImmPred, S2_storerd_io>;
 
-let AddedComplexity = 10 in {
-def : Pat<(truncstorei8 (i32 IntRegs:$src1), (add IntRegs:$src2,
-                                                  s11_0ExtPred:$offset)),
-          (S2_storerb_io IntRegs:$src2, s11_0ImmPred:$offset,
-                         (i32 IntRegs:$src1))>;
+  defm: Storex_pat<SwapSt<atomic_store_8>,  I32, s32_0ImmPred, S2_storerb_io>;
+  defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>;
+  defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>;
+  defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>;
+}
 
-def : Pat<(truncstorei16 (i32 IntRegs:$src1), (add IntRegs:$src2,
-                                                   s11_1ExtPred:$offset)),
-          (S2_storerh_io IntRegs:$src2, s11_1ImmPred:$offset,
-                         (i32 IntRegs:$src1))>;
+// Simple patterns should be tried with the least priority.
+def: Storex_simple_pat<truncstorei8,    I32, S2_storerb_io>;
+def: Storex_simple_pat<truncstorei16,   I32, S2_storerh_io>;
+def: Storex_simple_pat<store,           I32, S2_storeri_io>;
+def: Storex_simple_pat<store,           I64, S2_storerd_io>;
 
-def : Pat<(store (i32 IntRegs:$src1), (add IntRegs:$src2,
-                                           s11_2ExtPred:$offset)),
-          (S2_storeri_io IntRegs:$src2, s11_2ImmPred:$offset,
-                         (i32 IntRegs:$src1))>;
+def: Storex_simple_pat<SwapSt<atomic_store_8>,  I32, S2_storerb_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>;
 
-def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2,
-                                              s11_3ExtPred:$offset)),
-          (S2_storerd_io IntRegs:$src2, s11_3ImmPred:$offset,
-                         (i64 DoubleRegs:$src1))>;
+let AddedComplexity = 20 in {
+  defm: Storexm_pat<truncstorei8,  I64, s32_0ImmPred, LoReg, S2_storerb_io>;
+  defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>;
+  defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>;
 }
 
-// memh(Rx++#s4:1)=Rt.H
+def: Storexm_simple_pat<truncstorei8,  I64, LoReg, S2_storerb_io>;
+def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
+def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
 
 // Store predicate.
 let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
@@ -2971,7 +3651,7 @@ def STriw_pred : STInst<(outs),
 
 // S2_allocframe: Allocate stack frame.
 let Defs = [R29, R30], Uses = [R29, R31, R30],
-    hasSideEffects = 0, accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+    hasSideEffects = 0, accessSize = DoubleWordAccess in
 def S2_allocframe: ST0Inst <
   (outs), (ins u11_3Imm:$u11_3),
   "allocframe(#$u11_3)" > {
@@ -3015,7 +3695,6 @@ class T_store_pci <string mnemonic, RegisterClass RC,
     let Inst{1} = 0b0;
   }
 
-let isCodeGenOnly = 0 in {
 def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000,
                                         ByteAccess>;
 def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010,
@@ -3026,12 +3705,64 @@ def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100,
                                         WordAccess>;
 def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110,
                                         DoubleWordAccess>;
-}
+
+let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4 in
+class T_storenew_pci <string mnemonic, Operand Imm,
+                             bits<2>MajOp, MemAccessSize AlignSize>
+  : NVInst < (outs IntRegs:$_dst_),
+  (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, IntRegs:$Nt),
+  #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $Nt.new",
+  [],
+  "$Rz = $_dst_"> {
+    bits<5> Rz;
+    bits<6> offset;
+    bits<1> Mu;
+    bits<3> Nt;
+
+    let accessSize = AlignSize;
+
+    let IClass = 0b1010;
+    let Inst{27-21} = 0b1001101;
+    let Inst{20-16} = Rz;
+    let Inst{13} = Mu;
+    let Inst{12-11} = MajOp;
+    let Inst{10-8} = Nt;
+    let Inst{7} = 0b0;
+    let Inst{6-3} =
+      !if (!eq(!cast<string>(AlignSize), "WordAccess"),     offset{5-2},
+      !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1},
+                                       /* ByteAccess */     offset{3-0}));
+    let Inst{1} = 0b0;
+  }
+
+def S2_storerbnew_pci : T_storenew_pci <"memb", s4_0Imm, 0b00, ByteAccess>;
+def S2_storerhnew_pci : T_storenew_pci <"memh", s4_1Imm, 0b01, HalfWordAccess>;
+def S2_storerinew_pci : T_storenew_pci <"memw", s4_2Imm, 0b10, WordAccess>;
+
+//===----------------------------------------------------------------------===//
+// Circular stores - Pseudo
+//
+// Please note that the input operand order in the pseudo instructions
+// doesn't match with the real instructions. Pseudo instructions operand
+// order should mimics the ordering in the intrinsics.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_store_pci_pseudo <string opc, RegisterClass RC>
+  : STInstPI<(outs IntRegs:$_dst_),
+             (ins IntRegs:$src1, RC:$src2, IntRegs:$src3, s4Imm:$src4),
+  ".error \""#opc#"($src1++#$src4:circ($src3)) = $src2\"",
+  [], "$_dst_ = $src1">;
+
+def S2_storerb_pci_pseudo : T_store_pci_pseudo <"memb", IntRegs>;
+def S2_storerh_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>;
+def S2_storerf_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>;
+def S2_storeri_pci_pseudo : T_store_pci_pseudo <"memw", IntRegs>;
+def S2_storerd_pci_pseudo : T_store_pci_pseudo <"memd", DoubleRegs>;
 
 //===----------------------------------------------------------------------===//
 // Circular stores with auto-increment register
 //===----------------------------------------------------------------------===//
-let Uses = [CS], isNVStorable = 1, isCodeGenOnly = 0 in
+let Uses = [CS], isNVStorable = 1 in
 class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
                                MemAccessSize AlignSize, string RegSrc = "Rt">
   : STInst <(outs IntRegs:$_dst_),
@@ -3055,14 +3786,43 @@ class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
     let Inst{1} = 0b1;
   }
 
-let isCodeGenOnly = 0 in {
 def S2_storerb_pcr : T_store_pcr<"memb", IntRegs, 0b1000, ByteAccess>;
 def S2_storerh_pcr : T_store_pcr<"memh", IntRegs, 0b1010, HalfWordAccess>;
 def S2_storeri_pcr : T_store_pcr<"memw", IntRegs, 0b1100, WordAccess>;
 def S2_storerd_pcr : T_store_pcr<"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
 def S2_storerf_pcr : T_store_pcr<"memh", IntRegs, 0b1011,
                                  HalfWordAccess, "Rt.h">;
-}
+
+//===----------------------------------------------------------------------===//
+// Circular .new stores with auto-increment register
+//===----------------------------------------------------------------------===//
+let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in
+class T_storenew_pcr <string mnemonic, bits<2>MajOp,
+                                   MemAccessSize AlignSize>
+  : NVInst <(outs IntRegs:$_dst_),
+  (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt),
+  #mnemonic#"($Rz ++ I:circ($Mu)) = $Nt.new" ,
+  [] ,
+  "$Rz = $_dst_"> {
+    bits<5> Rz;
+    bits<1> Mu;
+    bits<3> Nt;
+
+    let accessSize = AlignSize;
+
+    let IClass = 0b1010;
+    let Inst{27-21} = 0b1001101;
+    let Inst{20-16} = Rz;
+    let Inst{13} = Mu;
+    let Inst{12-11} = MajOp;
+    let Inst{10-8} = Nt;
+    let Inst{7} = 0b0;
+    let Inst{1} = 0b1;
+  }
+
+def S2_storerbnew_pcr : T_storenew_pcr <"memb", 0b00, ByteAccess>;
+def S2_storerhnew_pcr : T_storenew_pcr <"memh", 0b01, HalfWordAccess>;
+def S2_storerinew_pcr : T_storenew_pcr <"memw", 0b10, WordAccess>;
 
 //===----------------------------------------------------------------------===//
 // Bit-reversed stores with auto-increment register
@@ -3093,7 +3853,7 @@ class T_store_pbr<string mnemonic, RegisterClass RC,
       let Inst{12-8} = src;
     }
 
-let isNVStorable = 1, isCodeGenOnly = 0 in {
+let isNVStorable = 1 in {
   let BaseOpcode = "S2_storerb_pbr" in
   def S2_storerb_pbr : T_store_pbr<"memb", IntRegs, ByteAccess,
                                              0b000>, NewValueRel;
@@ -3104,28 +3864,71 @@ let isNVStorable = 1, isCodeGenOnly = 0 in {
   def S2_storeri_pbr : T_store_pbr<"memw", IntRegs, WordAccess,
                                              0b100>, NewValueRel;
 }
-let isCodeGenOnly = 0 in {
+
 def S2_storerf_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess, 0b011, 1>;
 def S2_storerd_pbr : T_store_pbr<"memd", DoubleRegs, DoubleWordAccess, 0b110>;
-}
 
 //===----------------------------------------------------------------------===//
-// ST -
+// Bit-reversed .new stores with auto-increment register
 //===----------------------------------------------------------------------===//
+let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3,
+    hasSideEffects = 0 in
+class T_storenew_pbr<string mnemonic, MemAccessSize addrSize, bits<2> majOp>
+  : NVInst <(outs IntRegs:$_dst_),
+            (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt),
+     #mnemonic#"($Rz ++ $Mu:brev) = $Nt.new", [],
+     "$Rz = $_dst_">, NewValueRel {
+    let accessSize = addrSize;
+    bits<5> Rz;
+    bits<1> Mu;
+    bits<3> Nt;
+
+    let IClass = 0b1010;
+
+    let Inst{27-21} = 0b1111101;
+    let Inst{12-11} = majOp;
+    let Inst{7} = 0b0;
+    let Inst{20-16} = Rz;
+    let Inst{13} = Mu;
+    let Inst{10-8} = Nt;
+  }
+
+let BaseOpcode = "S2_storerb_pbr" in
+def S2_storerbnew_pbr : T_storenew_pbr<"memb", ByteAccess, 0b00>;
+
+let BaseOpcode = "S2_storerh_pbr" in
+def S2_storerhnew_pbr : T_storenew_pbr<"memh", HalfWordAccess, 0b01>;
+
+let BaseOpcode = "S2_storeri_pbr" in
+def S2_storerinew_pbr : T_storenew_pbr<"memw", WordAccess, 0b10>;
 
 //===----------------------------------------------------------------------===//
-// STYPE/ALU +
+// Bit-reversed stores - Pseudo
+//
+// Please note that the input operand order in the pseudo instructions
+// doesn't match with the real instructions. Pseudo instructions operand
+// order should mimics the ordering in the intrinsics.
 //===----------------------------------------------------------------------===//
-// Logical NOT.
-def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
-               "$dst = not($src1)",
-               [(set (i64 DoubleRegs:$dst), (not (i64 DoubleRegs:$src1)))]>;
+let isCodeGenOnly = 1,  mayStore = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_store_pbr_pseudo <string opc, RegisterClass RC>
+  : STInstPI<(outs IntRegs:$_dst_),
+             (ins IntRegs:$src1, RC:$src2, IntRegs:$src3),
+  ".error \""#opc#"($src1++$src3:brev) = $src2\"",
+  [], "$_dst_ = $src1">;
 
+def S2_storerb_pbr_pseudo : T_store_pbr_pseudo <"memb", IntRegs>;
+def S2_storerh_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>;
+def S2_storeri_pbr_pseudo : T_store_pbr_pseudo <"memw", IntRegs>;
+def S2_storerf_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>;
+def S2_storerd_pbr_pseudo : T_store_pbr_pseudo <"memd", DoubleRegs>;
 
 //===----------------------------------------------------------------------===//
-// STYPE/ALU -
+// ST -
 //===----------------------------------------------------------------------===//
 
+//===----------------------------------------------------------------------===//
+// Template class for S_2op instructions.
+//===----------------------------------------------------------------------===//
 let hasSideEffects = 0 in
 class T_S2op_1 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
                 RegisterClass RCIn, bits<2> MajOp, bits<3> MinOp, bit isSat>
@@ -3156,26 +3959,62 @@ let hasNewValue = 1 in
 class T_S2op_1_ii <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0>
   : T_S2op_1 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp, isSat>;
 
+// Vector sign/zero extend
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+  def S2_vsxtbh : T_S2op_1_di <"vsxtbh", 0b00, 0b000>;
+  def S2_vsxthw : T_S2op_1_di <"vsxthw", 0b00, 0b100>;
+  def S2_vzxtbh : T_S2op_1_di <"vzxtbh", 0b00, 0b010>;
+  def S2_vzxthw : T_S2op_1_di <"vzxthw", 0b00, 0b110>;
+}
+
+// Vector splat bytes/halfwords
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+  def S2_vsplatrb : T_S2op_1_ii <"vsplatb", 0b01, 0b111>;
+  def S2_vsplatrh : T_S2op_1_di <"vsplath", 0b01, 0b010>;
+}
+
 // Sign extend word to doubleword
-let isCodeGenOnly = 0 in
 def A2_sxtw   : T_S2op_1_di <"sxtw", 0b01, 0b000>;
 
 def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>;
 
+// Vector saturate and pack
+let Defs = [USR_OVF] in {
+  def S2_svsathb  : T_S2op_1_ii <"vsathb", 0b10, 0b000>;
+  def S2_svsathub : T_S2op_1_ii <"vsathub", 0b10, 0b010>;
+  def S2_vsathb   : T_S2op_1_id <"vsathb", 0b00, 0b110>;
+  def S2_vsathub  : T_S2op_1_id <"vsathub", 0b00, 0b000>;
+  def S2_vsatwh   : T_S2op_1_id <"vsatwh", 0b00, 0b010>;
+  def S2_vsatwuh  : T_S2op_1_id <"vsatwuh", 0b00, 0b100>;
+}
+
+// Vector truncate
+def S2_vtrunohb : T_S2op_1_id <"vtrunohb", 0b10, 0b000>;
+def S2_vtrunehb : T_S2op_1_id <"vtrunehb", 0b10, 0b010>;
+
 // Swizzle the bytes of a word
-let isCodeGenOnly = 0 in
 def A2_swiz : T_S2op_1_ii <"swiz", 0b10, 0b111>;
 
 // Saturate
-let Defs = [USR_OVF], isCodeGenOnly = 0 in {
+let Defs = [USR_OVF] in {
   def A2_sat   : T_S2op_1_id <"sat", 0b11, 0b000>;
   def A2_satb  : T_S2op_1_ii <"satb", 0b11, 0b111>;
   def A2_satub : T_S2op_1_ii <"satub", 0b11, 0b110>;
   def A2_sath  : T_S2op_1_ii <"sath", 0b11, 0b100>;
   def A2_satuh : T_S2op_1_ii <"satuh", 0b11, 0b101>;
+  def A2_roundsat : T_S2op_1_id <"round", 0b11, 0b001, 0b1>;
 }
 
-let Itinerary = S_2op_tc_2_SLOT23, isCodeGenOnly = 0 in {
+let Itinerary = S_2op_tc_2_SLOT23 in {
+  // Vector round and pack
+  def S2_vrndpackwh   : T_S2op_1_id <"vrndwh", 0b10, 0b100>;
+
+  let Defs = [USR_OVF] in
+  def S2_vrndpackwhs  : T_S2op_1_id <"vrndwh", 0b10, 0b110, 1>;
+
+  // Bit reverse
+  def S2_brev : T_S2op_1_ii <"brev", 0b01, 0b110>;
+
   // Absolute value word
   def A2_abs    : T_S2op_1_ii <"abs", 0b10, 0b100>;
 
@@ -3227,7 +4066,7 @@ class T_S2op_2_di <string mnemonic, bits<3> MajOp, bits<3> MinOp>
 let hasNewValue = 1 in
 class T_S2op_2_id <string mnemonic, bits<3> MajOp, bits<3> MinOp>
   : T_S2op_2 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, 0, 0>;
-  
+
 let hasNewValue = 1 in
 class T_S2op_2_ii <string mnemonic, bits<3> MajOp, bits<3> MinOp,
                    bit isSat = 0, bit isRnd = 0, list<dag> pattern = []>
@@ -3239,21 +4078,33 @@ class T_S2op_shift <string mnemonic, bits<3> MajOp, bits<3> MinOp, SDNode OpNd>
     [(set (i32 IntRegs:$dst), (OpNd (i32 IntRegs:$src),
                                     (u5ImmPred:$u5)))]>;
 
+// Vector arithmetic shift right by immediate with truncate and pack
+def S2_asr_i_svw_trun : T_S2op_2_id <"vasrw", 0b110, 0b010>;
+
 // Arithmetic/logical shift right/left by immediate
-let Itinerary = S_2op_tc_1_SLOT23, isCodeGenOnly = 0 in {
+let Itinerary = S_2op_tc_1_SLOT23 in {
   def S2_asr_i_r : T_S2op_shift <"asr", 0b000, 0b000, sra>;
   def S2_lsr_i_r : T_S2op_shift <"lsr", 0b000, 0b001, srl>;
   def S2_asl_i_r : T_S2op_shift <"asl", 0b000, 0b010, shl>;
 }
 
 // Shift left by immediate with saturation
-let Defs = [USR_OVF], isCodeGenOnly = 0 in
+let Defs = [USR_OVF] in
 def S2_asl_i_r_sat : T_S2op_2_ii <"asl", 0b010, 0b010, 1>;
 
 // Shift right with round
-let isCodeGenOnly = 0 in
 def S2_asr_i_r_rnd : T_S2op_2_ii <"asr", 0b010, 0b000, 0, 1>;
 
+let isAsmParserOnly = 1 in
+def S2_asr_i_r_rnd_goodsyntax
+  : SInst <(outs IntRegs:$dst), (ins  IntRegs:$src, u5Imm:$u5),
+  "$dst = asrrnd($src, #$u5)",
+  [], "", S_2op_tc_1_SLOT23>;
+
+let isAsmParserOnly = 1 in
+def A2_not: ALU32_rr<(outs IntRegs:$dst),(ins IntRegs:$src),
+  "$dst = not($src)">;
+
 def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5ImmPred:$src2)),
                              (i32 1))),
                    (i32 1))),
@@ -3272,17 +4123,34 @@ class T_S2op_3<string opc, bits<2>MajOp, bits<3>minOp, bits<1> sat = 0>
   let Inst{4-0} = Rdd;
 }
 
-let isCodeGenOnly = 0 in {
 def A2_absp : T_S2op_3 <"abs", 0b10, 0b110>;
 def A2_negp : T_S2op_3 <"neg", 0b10, 0b101>;
 def A2_notp : T_S2op_3 <"not", 0b10, 0b100>;
-}
 
 // Innterleave/deinterleave
-let isCodeGenOnly = 0 in {
 def S2_interleave   : T_S2op_3 <"interleave",   0b11, 0b101>;
 def S2_deinterleave : T_S2op_3 <"deinterleave", 0b11, 0b100>;
-}
+
+// Vector Complex conjugate
+def A2_vconj : T_S2op_3 <"vconj", 0b10, 0b111, 1>;
+
+// Vector saturate without pack
+def S2_vsathb_nopack  : T_S2op_3 <"vsathb",  0b00, 0b111>;
+def S2_vsathub_nopack : T_S2op_3 <"vsathub", 0b00, 0b100>;
+def S2_vsatwh_nopack  : T_S2op_3 <"vsatwh",  0b00, 0b110>;
+def S2_vsatwuh_nopack : T_S2op_3 <"vsatwuh", 0b00, 0b101>;
+
+// Vector absolute value halfwords with and without saturation
+// Rdd64=vabsh(Rss64)[:sat]
+def A2_vabsh    : T_S2op_3 <"vabsh", 0b01, 0b100>;
+def A2_vabshsat : T_S2op_3 <"vabsh", 0b01, 0b101, 1>;
+
+// Vector absolute value words with and without saturation
+def A2_vabsw    : T_S2op_3 <"vabsw", 0b01, 0b110>;
+def A2_vabswsat : T_S2op_3 <"vabsw", 0b01, 0b111, 1>;
+
+def : Pat<(not (i64 DoubleRegs:$src1)),
+          (A2_notp DoubleRegs:$src1)>;
 
 //===----------------------------------------------------------------------===//
 // STYPE/BIT +
@@ -3313,7 +4181,6 @@ class T_COUNT_LEADING_64<string MnOp, bits<3> MajOp, bits<3> MinOp>
     : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b0,
                       (outs IntRegs:$Rd), (ins DoubleRegs:$Rs)>;
 
-let isCodeGenOnly = 0 in {
 def S2_cl0     : T_COUNT_LEADING_32<"cl0",     0b000, 0b101>;
 def S2_cl1     : T_COUNT_LEADING_32<"cl1",     0b000, 0b110>;
 def S2_ct0     : T_COUNT_LEADING_32<"ct0",     0b010, 0b100>;
@@ -3323,14 +4190,28 @@ def S2_cl1p    : T_COUNT_LEADING_64<"cl1",     0b010, 0b100>;
 def S2_clb     : T_COUNT_LEADING_32<"clb",     0b000, 0b100>;
 def S2_clbp    : T_COUNT_LEADING_64<"clb",     0b010, 0b000>;
 def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>;
-}
 
-def: Pat<(i32 (ctlz I32:$Rs)),                (S2_cl0 I32:$Rs)>;
-def: Pat<(i32 (ctlz (not I32:$Rs))),          (S2_cl1 I32:$Rs)>;
-def: Pat<(i32 (cttz I32:$Rs)),                (S2_ct0 I32:$Rs)>;
-def: Pat<(i32 (cttz (not I32:$Rs))),          (S2_ct1 I32:$Rs)>;
-def: Pat<(i32 (trunc (ctlz I64:$Rss))),       (S2_cl0p I64:$Rss)>;
+// Count leading zeros.
+def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
+def: Pat<(i32 (ctlz_zero_undef I32:$Rs)), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz_zero_undef I64:$Rss))), (S2_cl0p I64:$Rss)>;
+
+// Count trailing zeros: 32-bit.
+def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
+def: Pat<(i32 (cttz_zero_undef I32:$Rs)), (S2_ct0 I32:$Rs)>;
+
+// Count leading ones.
+def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
 def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
+def: Pat<(i32 (ctlz_zero_undef (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz_zero_undef (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
+
+// Count trailing ones: 32-bit.
+def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
+def: Pat<(i32 (cttz_zero_undef (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
+
+// The 64-bit counts leading/trailing are defined in HexagonInstrInfoV4.td.
 
 // Bit set/clear/toggle
 
@@ -3365,14 +4246,12 @@ class T_SCT_BIT_REG<string MnOp, bits<2> MinOp>
   let Inst{4-0} = Rd;
 }
 
-let isCodeGenOnly = 0 in {
 def S2_clrbit_i    : T_SCT_BIT_IMM<"clrbit",    0b001>;
 def S2_setbit_i    : T_SCT_BIT_IMM<"setbit",    0b000>;
 def S2_togglebit_i : T_SCT_BIT_IMM<"togglebit", 0b010>;
 def S2_clrbit_r    : T_SCT_BIT_REG<"clrbit",    0b01>;
 def S2_setbit_r    : T_SCT_BIT_REG<"setbit",    0b00>;
 def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>;
-}
 
 def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5ImmPred:$u5)))),
          (S2_clrbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
@@ -3422,10 +4301,8 @@ class T_TEST_BIT_REG<string MnOp, bit IsNeg>
   let Inst{1-0} = Pd;
 }
 
-let isCodeGenOnly = 0 in {
 def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>;
 def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>;
-}
 
 let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
   def: Pat<(i1 (setne (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
@@ -3437,6 +4314,7 @@ let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
   def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))),
            (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
 }
+
 let hasSideEffects = 0 in
 class T_TEST_BITS_IMM<string MnOp, bits<2> MajOp, bit IsNeg>
     : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6Imm:$u6),
@@ -3471,11 +4349,9 @@ class T_TEST_BITS_REG<string MnOp, bits<2> MajOp, bit IsNeg>
   let Inst{1-0} = Pd;
 }
 
-let isCodeGenOnly = 0 in {
 def C2_bitsclri : T_TEST_BITS_IMM<"bitsclr", 0b10, 0>;
 def C2_bitsclr  : T_TEST_BITS_REG<"bitsclr", 0b10, 0>;
 def C2_bitsset  : T_TEST_BITS_REG<"bitsset", 0b01, 0>;
-}
 
 let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
   def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6ImmPred:$u6), 0)),
@@ -3503,6 +4379,14 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
 // XTYPE/PERM +
 //===----------------------------------------------------------------------===//
 
+def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))),
+                               (i32 8)),
+                          (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))),
+                      (i32 16)),
+                 (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))),
+             (zextloadi8 (i32 IntRegs:$b))),
+         (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
+
 //===----------------------------------------------------------------------===//
 // XTYPE/PERM -
 //===----------------------------------------------------------------------===//
@@ -3512,7 +4396,7 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
 //===----------------------------------------------------------------------===//
 
 // Predicate transfer.
-let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+let hasSideEffects = 0, hasNewValue = 1 in
 def C2_tfrpr : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps),
       "$Rd = $Ps", [], "", S_2op_tc_1_SLOT23> {
   bits<5> Rd;
@@ -3526,7 +4410,7 @@ def C2_tfrpr : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps),
 }
 
 // Transfer general register to predicate.
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
 def C2_tfrrp: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs),
       "$Pd = $Rs", [], "", S_2op_tc_2early_SLOT23> {
   bits<2> Pd;
@@ -3538,6 +4422,27 @@ def C2_tfrrp: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs),
   let Inst{1-0} = Pd;
 }
 
+let hasSideEffects = 0, isCodeGenOnly = 1 in
+def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src),
+     "$dst = $src">;
+
+
+// Patterns for loads of i1:
+def: Pat<(i1 (load AddrFI:$fi)),
+         (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
+def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32ImmPred:$Off))),
+         (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
+def: Pat<(i1 (load (i32 IntRegs:$Rs))),
+         (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
+
+def I1toI32: OutPatFrag<(ops node:$Rs),
+                        (C2_muxii (i1 $Rs), 1, 0)>;
+
+def I32toI1: OutPatFrag<(ops node:$Rs),
+                        (i1 (C2_tfrrp (i32 $Rs)))>;
+
+defm: Storexm_pat<store, I1, s32ImmPred, I1toI32, S2_storerb_io>;
+def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
 
 //===----------------------------------------------------------------------===//
 // STYPE/PRED -
@@ -3570,15 +4475,12 @@ class S_2OpInstImmI6<string Mnemonic, SDNode OpNode, bits<3>MinOp>
 }
 
 // Shift by immediate.
-let isCodeGenOnly = 0 in {
 def S2_asr_i_p : S_2OpInstImmI6<"asr", sra, 0b000>;
 def S2_asl_i_p : S_2OpInstImmI6<"asl", shl, 0b010>;
 def S2_lsr_i_p : S_2OpInstImmI6<"lsr", srl, 0b001>;
-}
 
 // Shift left by small amount and add.
-let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0,
-    isCodeGenOnly = 0 in
+let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0 in
 def S2_addasl_rrri: SInst <(outs IntRegs:$Rd),
                            (ins IntRegs:$Rt, IntRegs:$Rs, u3Imm:$u3),
   "$Rd = addasl($Rt, $Rs, #$u3)" ,
@@ -3627,8 +4529,8 @@ def S2_addasl_rrri: SInst <(outs IntRegs:$Rd),
 //===----------------------------------------------------------------------===//
 def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
 
-let hasSideEffects = 1, isSoloAX = 1, isCodeGenOnly = 0 in
-def BARRIER : SYSInst<(outs), (ins),
+let hasSideEffects = 1, isSoloAX = 1 in
+def Y2_barrier : SYSInst<(outs), (ins),
                      "barrier",
                      [(HexagonBARRIER)],"",ST_tc_st_SLOT0> {
   let Inst{31-28} = 0b1010;
@@ -3638,6 +4540,20 @@ def BARRIER : SYSInst<(outs), (ins),
 //===----------------------------------------------------------------------===//
 // SYSTEM/SUPER -
 //===----------------------------------------------------------------------===//
+
+// Generate frameindex addresses. The main reason for the offset operand is
+// that every instruction that is allowed to have frame index as an operand
+// will then have that operand followed by an immediate operand (the offset).
+// This simplifies the frame-index elimination code.
+//
+let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
+    isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in {
+  def TFR_FI  : ALU32_ri<(outs IntRegs:$Rd),
+                         (ins IntRegs:$fi, s32Imm:$off), "">;
+  def TFR_FIA : ALU32_ri<(outs IntRegs:$Rd),
+                         (ins IntRegs:$Rs, IntRegs:$fi, s32Imm:$off), "">;
+}
+
 //===----------------------------------------------------------------------===//
 // CRUSER - Type.
 //===----------------------------------------------------------------------===//
@@ -3683,14 +4599,19 @@ class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0>
 multiclass LOOP_ri<string mnemonic> {
   def i : LOOP_iBase<mnemonic, brtarget>;
   def r : LOOP_rBase<mnemonic, brtarget>;
+
+  let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in {
+    def iext: LOOP_iBase<mnemonic, brtargetExt, 1>;
+    def rext: LOOP_rBase<mnemonic, brtargetExt, 1>;
+  }
 }
 
 
-let Defs = [SA0, LC0, USR], isCodeGenOnly = 0 in
+let Defs = [SA0, LC0, USR] in
 defm J2_loop0 : LOOP_ri<"loop0">;
 
 // Interestingly only loop0's appear to set usr.lpcfg
-let Defs = [SA1, LC1], isCodeGenOnly = 0 in
+let Defs = [SA1, LC1] in
 defm J2_loop1 : LOOP_ri<"loop1">;
 
 let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
@@ -3751,12 +4672,40 @@ multiclass SPLOOP_ri<string mnemonic, bits<2> op> {
   def r : SPLOOP_rBase<mnemonic, op>;
 }
 
-let isCodeGenOnly = 0 in {
 defm J2_ploop1s : SPLOOP_ri<"1", 0b01>;
 defm J2_ploop2s : SPLOOP_ri<"2", 0b10>;
 defm J2_ploop3s : SPLOOP_ri<"3", 0b11>;
+
+// if (Rs[!>=<]=#0) jump:[t/nt]
+let Defs = [PC], isPredicated = 1, isBranch = 1, hasSideEffects = 0,
+    hasSideEffects = 0 in
+class J2_jump_0_Base<string compare, bit isTak, bits<2> op>
+  : CRInst <(outs), (ins IntRegs:$Rs, brtarget:$r13_2),
+  "if ($Rs"#compare#"#0) jump"#!if(isTak, ":t", ":nt")#" $r13_2" > {
+    bits<5> Rs;
+    bits<15> r13_2;
+
+    let IClass = 0b0110;
+
+    let Inst{27-24} = 0b0001;
+    let Inst{23-22} = op;
+    let Inst{12} = isTak;
+    let Inst{21} = r13_2{14};
+    let Inst{20-16} = Rs;
+    let Inst{11-1} = r13_2{12-2};
+    let Inst{13} = r13_2{13};
+  }
+
+multiclass J2_jump_compare_0<string compare, bits<2> op> {
+  def NAME    : J2_jump_0_Base<compare, 0, op>;
+  def NAME#pt : J2_jump_0_Base<compare, 1, op>;
 }
 
+defm J2_jumprz    : J2_jump_compare_0<"!=", 0b00>;
+defm J2_jumprgtez : J2_jump_compare_0<">=", 0b01>;
+defm J2_jumprnz   : J2_jump_compare_0<"==", 0b10>;
+defm J2_jumprltez : J2_jump_compare_0<"<=", 0b11>;
+
 // Transfer to/from Control/GPR Guest/GPR
 let hasSideEffects = 0 in
 class TFR_CR_RS_base<RegisterClass CTRC, RegisterClass RC, bit isDouble>
@@ -3773,8 +4722,9 @@ class TFR_CR_RS_base<RegisterClass CTRC, RegisterClass RC, bit isDouble>
     let Inst{20-16} = src;
     let Inst{4-0} = dst;
   }
-let isCodeGenOnly = 0 in
+
 def A2_tfrrcr : TFR_CR_RS_base<CtrRegs, IntRegs, 0b0>;
+def A4_tfrpcp : TFR_CR_RS_base<CtrRegs64, DoubleRegs, 0b1>;
 def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
 def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
 
@@ -3794,13 +4744,14 @@ class TFR_RD_CR_base<RegisterClass RC, RegisterClass CTRC, bit isSingle>
     let Inst{4-0} = dst;
   }
 
-let hasNewValue = 1, opNewValue = 0, isCodeGenOnly = 0 in
+let hasNewValue = 1, opNewValue = 0 in
 def A2_tfrcrr : TFR_RD_CR_base<IntRegs, CtrRegs, 1>;
+def A4_tfrcpp : TFR_RD_CR_base<DoubleRegs, CtrRegs64, 0>;
 def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
 def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
 
 // Y4_trace: Send value to etm trace.
-let isSoloAX = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+let isSoloAX = 1, hasSideEffects = 0 in
 def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs),
   "trace($Rs)"> {
     bits<5> Rs;
@@ -3810,350 +4761,201 @@ def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs),
     let Inst{20-16} = Rs;
   }
 
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3),
-            "Error; should not emit",
-            [(set (i32 IntRegs:$dst),
-             (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
-                          s12ImmPred:$src3)))]>;
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3),
-            "Error; should not emit",
-            [(set (i32 IntRegs:$dst),
-             (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2,
-                          (i32 IntRegs:$src3))))]>;
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst),
-                              (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3),
-                     "Error; should not emit",
-                     [(set (i32 IntRegs:$dst),
-                           (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2,
-                                        s12ImmPred:$src3)))]>;
-
-// Generate frameindex addresses.
-let isReMaterializable = 1 in
-def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1),
-             "$dst = add($src1)",
-             [(set (i32 IntRegs:$dst), ADDRri:$src1)]>;
-
 // Support for generating global address.
 // Taken from X86InstrInfo.td.
-def SDTHexagonCONST32 : SDTypeProfile<1, 1, [
-                                            SDTCisVT<0, i32>,
-                                            SDTCisVT<1, i32>,
-                                            SDTCisPtrTy<0>]>;
-def HexagonCONST32 : SDNode<"HexagonISD::CONST32",     SDTHexagonCONST32>;
-def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP",     SDTHexagonCONST32>;
+def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+                                             SDTCisVT<1, i32>,
+                                             SDTCisPtrTy<0>]>;
+def HexagonCONST32    : SDNode<"HexagonISD::CONST32",    SDTHexagonCONST32>;
+def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
 
 // HI/LO Instructions
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def LO : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
-                  "$dst.l = #LO($global)",
-                  []>;
-
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def HI : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
-                  "$dst.h = #HI($global)",
-                  []>;
+let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
+    hasNewValue = 1, opNewValue = 0 in
+class REG_IMMED<string RegHalf, string Op, bit Rs, bits<3> MajOp, bit MinOp>
+  : ALU32_ri<(outs IntRegs:$dst),
+              (ins i32imm:$imm_value),
+              "$dst"#RegHalf#" = #"#Op#"($imm_value)", []> {
+    bits<5> dst;
+    bits<32> imm_value;
+    let IClass = 0b0111;
 
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def LOi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
-                  "$dst.l = #LO($imm_value)",
-                  []>;
+    let Inst{27} = Rs;
+    let Inst{26-24} = MajOp;
+    let Inst{21} = MinOp;
+    let Inst{20-16} = dst;
+    let Inst{23-22} = !if (!eq(Op, "LO"), imm_value{15-14}, imm_value{31-30});
+    let Inst{13-0} = !if (!eq(Op, "LO"), imm_value{13-0}, imm_value{29-16});
+}
 
+let isAsmParserOnly = 1 in {
+  def LO : REG_IMMED<".l", "LO", 0b0, 0b001, 0b1>;
+  def LO_H : REG_IMMED<".l", "HI", 0b0, 0b001, 0b1>;
+  def HI : REG_IMMED<".h", "HI", 0b0, 0b010, 0b1>;
+  def HI_L : REG_IMMED<".h", "LO", 0b0, 0b010, 0b1>;
+}
 
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def HIi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
-                  "$dst.h = #HI($imm_value)",
-                  []>;
+let  isMoveImm = 1, isCodeGenOnly = 1 in
+def LO_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
+             "$dst.l = #LO($label@GOTREL)",
+             []>;
 
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def LO_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
-                  "$dst.l = #LO($jt)",
-                  []>;
+let  isMoveImm = 1, isCodeGenOnly = 1 in
+def HI_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
+             "$dst.h = #HI($label@GOTREL)",
+             []>;
 
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def HI_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
-                  "$dst.h = #HI($jt)",
-                  []>;
+let isReMaterializable = 1, isMoveImm = 1,
+    isCodeGenOnly = 1, hasSideEffects = 0 in
+def HI_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+             "$dst.h = #HI($global@GOT)",
+             []>;
 
+let isReMaterializable = 1, isMoveImm = 1,
+    isCodeGenOnly = 1, hasSideEffects = 0 in
+def LO_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+             "$dst.l = #LO($global@GOT)",
+             []>;
 
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0 in
-def LO_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
-                  "$dst.l = #LO($label)",
-                  []>;
+let isReMaterializable = 1, isMoveImm = 1,
+    isCodeGenOnly = 1, hasSideEffects = 0 in
+def HI_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+                "$dst.h = #HI($global@GOTREL)",
+                []>;
 
-let isReMaterializable = 1, isMoveImm = 1 , hasSideEffects = 0 in
-def HI_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
-                  "$dst.h = #HI($label)",
-                  []>;
+let isReMaterializable = 1, isMoveImm = 1,
+    isCodeGenOnly = 1, hasSideEffects = 0 in
+def LO_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+               "$dst.l = #LO($global@GOTREL)",
+               []>;
 
 // This pattern is incorrect. When we add small data, we should change
 // this pattern to use memw(#foo).
 // This is for sdata.
-let isMoveImm = 1 in
-def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+let isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST32 : CONSTLDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
               "$dst = CONST32(#$global)",
               [(set (i32 IntRegs:$dst),
                     (load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
 
-// This is for non-sdata.
-let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global),
-                  "$dst = CONST32(#$global)",
-                  [(set (i32 IntRegs:$dst),
-                        (HexagonCONST32 tglobaladdr:$global))]>;
-
-let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_set_jt : LDInst2<(outs IntRegs:$dst), (ins jumptablebase:$jt),
-                     "$dst = CONST32(#$jt)",
-                     [(set (i32 IntRegs:$dst),
-                           (HexagonCONST32 tjumptable:$jt))]>;
-
-let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32GP_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global),
-                    "$dst = CONST32(#$global)",
-                    [(set (i32 IntRegs:$dst),
-                          (HexagonCONST32_GP tglobaladdr:$global))]>;
-
-let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_Int_Real : LDInst2<(outs IntRegs:$dst), (ins i32imm:$global),
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST32_Int_Real : CONSTLDInst<(outs IntRegs:$dst), (ins i32imm:$global),
                        "$dst = CONST32(#$global)",
                        [(set (i32 IntRegs:$dst), imm:$global) ]>;
 
-// Map BlockAddress lowering to CONST32_Int_Real
-def : Pat<(HexagonCONST32_GP tblockaddress:$addr),
-          (CONST32_Int_Real tblockaddress:$addr)>;
+// Map TLS addressses to a CONST32 instruction
+def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16Ext:$addr)>;
+def: Pat<(HexagonCONST32 bbl:$label),           (A2_tfrsi s16Ext:$label)>;
 
-let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label),
-                    "$dst = CONST32($label)",
-                    [(set (i32 IntRegs:$dst), (HexagonCONST32 bbl:$label))]>;
-
-let isReMaterializable = 1, isMoveImm = 1 in
-def CONST64_Int_Real : LDInst2<(outs DoubleRegs:$dst), (ins i64imm:$global),
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST64_Int_Real : CONSTLDInst<(outs DoubleRegs:$dst), (ins i64imm:$global),
                        "$dst = CONST64(#$global)",
-                       [(set (i64 DoubleRegs:$dst), imm:$global) ]>;
+                       [(set (i64 DoubleRegs:$dst), imm:$global)]>;
 
-def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins),
-                  "$dst = xor($dst, $dst)",
-                  [(set (i1 PredRegs:$dst), 0)]>;
+let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
+    isCodeGenOnly = 1 in
+def TFR_PdTrue : SInst<(outs PredRegs:$dst), (ins), "",
+                 [(set (i1 PredRegs:$dst), 1)]>;
 
-def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-       "$dst = mpy($src1, $src2)",
-       [(set (i32 IntRegs:$dst),
-             (trunc (i64 (srl (i64 (mul (i64 (sext (i32 IntRegs:$src1))),
-                                        (i64 (sext (i32 IntRegs:$src2))))),
-                              (i32 32)))))]>;
+let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
+    isCodeGenOnly = 1 in
+def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), "$dst = xor($dst, $dst)",
+                  [(set (i1 PredRegs:$dst), 0)]>;
 
 // Pseudo instructions.
 def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
-
-def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+def SDT_SPCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
                                         SDTCisVT<1, i32> ]>;
 
-def callseq_end : SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
-                  [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
                     [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
+                    [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
-def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-
-def call : SDNode<"HexagonISD::CALL", SDT_SPCall,
-           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+def SDT_SPCall  : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
 
 // For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
 // Optional Flag and Variable Arguments.
 // Its 1 Operand has pointer type.
-def HexagonTCRet    : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
-                     [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
+                          [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
 
-let Defs = [R29, R30], Uses = [R31, R30, R29] in {
- def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
-                        "Should never be emitted",
-                        [(callseq_start timm:$amt)]>;
-}
+let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+                              ".error \"should not emit\" ",
+                              [(callseq_start timm:$amt)]>;
 
-let Defs = [R29, R30, R31], Uses = [R29] in {
- def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
-                      "Should never be emitted",
-                      [(callseq_end timm:$amt1, timm:$amt2)]>;
-}
-// Call subroutine.
-let isCall = 1, hasSideEffects = 0,
-  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
-          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
-  def CALL : JInst<(outs), (ins calltarget:$dst),
-             "call $dst", []>;
-}
+let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+                             ".error \"should not emit\" ",
+                             [(callseq_end timm:$amt1, timm:$amt2)]>;
 
 // Call subroutine indirectly.
-let Defs = VolatileV3.Regs, isCodeGenOnly = 0 in
+let Defs = VolatileV3.Regs in
 def J2_callr : JUMPR_MISC_CALLR<0, 1>;
 
 // Indirect tail-call.
-let isCodeGenOnly = 1, isCall = 1, isReturn = 1  in
-def TCRETURNR : T_JMPr;
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+    isTerminator = 1, isCodeGenOnly = 1 in
+def TCRETURNr : T_JMPr;
 
 // Direct tail-calls.
-let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
-isTerminator = 1, isCodeGenOnly = 1 in {
-  def TCRETURNtg   : JInst<(outs), (ins calltarget:$dst), "jump $dst",
-      [], "", J_tc_2early_SLOT23>;
-  def TCRETURNtext : JInst<(outs), (ins calltarget:$dst), "jump $dst",
-      [], "", J_tc_2early_SLOT23>;
-}
-
-// Map call instruction.
-def : Pat<(call (i32 IntRegs:$dst)),
-      (J2_callr (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>;
-def : Pat<(call tglobaladdr:$dst),
-      (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>;
-def : Pat<(call texternalsym:$dst),
-      (CALL texternalsym:$dst)>, Requires<[HasV2TOnly]>;
-//Tail calls.
-def : Pat<(HexagonTCRet tglobaladdr:$dst),
-      (TCRETURNtg tglobaladdr:$dst)>;
-def : Pat<(HexagonTCRet texternalsym:$dst),
-      (TCRETURNtext texternalsym:$dst)>;
-def : Pat<(HexagonTCRet (i32 IntRegs:$dst)),
-      (TCRETURNR (i32 IntRegs:$dst))>;
-
-// Atomic load and store support
-// 8 bit atomic load
-def : Pat<(atomic_load_8 ADDRriS11_0:$src1),
-          (i32 (L2_loadrub_io AddrFI:$src1, 0))>;
-
-def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)),
-          (i32 (L2_loadrub_io (i32 IntRegs:$src1), s11_0ImmPred:$offset))>;
-
-// 16 bit atomic load
-def : Pat<(atomic_load_16 ADDRriS11_1:$src1),
-          (i32 (L2_loadruh_io AddrFI:$src1, 0))>;
-
-def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)),
-          (i32 (L2_loadruh_io (i32 IntRegs:$src1), s11_1ImmPred:$offset))>;
-
-def : Pat<(atomic_load_32 ADDRriS11_2:$src1),
-          (i32 (L2_loadri_io AddrFI:$src1, 0))>;
-
-def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)),
-          (i32 (L2_loadri_io (i32 IntRegs:$src1), s11_2ImmPred:$offset))>;
-
-// 64 bit atomic load
-def : Pat<(atomic_load_64 ADDRriS11_3:$src1),
-          (i64 (L2_loadrd_io AddrFI:$src1, 0))>;
-
-def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)),
-          (i64 (L2_loadrd_io (i32 IntRegs:$src1), s11_3ImmPred:$offset))>;
-
-
-def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)),
-          (S2_storerb_io AddrFI:$src2, 0, (i32 IntRegs:$src1))>;
-
-def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset),
-                          (i32 IntRegs:$src1)),
-          (S2_storerb_io (i32 IntRegs:$src2), s11_0ImmPred:$offset,
-                         (i32 IntRegs:$src1))>;
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+    isTerminator = 1, isCodeGenOnly = 1 in
+def TCRETURNi : JInst<(outs), (ins calltarget:$dst), "", []>;
 
-
-def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)),
-          (S2_storerh_io AddrFI:$src2, 0, (i32 IntRegs:$src1))>;
-
-def : Pat<(atomic_store_16 (i32 IntRegs:$src1),
-                          (add (i32 IntRegs:$src2), s11_1ImmPred:$offset)),
-          (S2_storerh_io (i32 IntRegs:$src2), s11_1ImmPred:$offset,
-                         (i32 IntRegs:$src1))>;
-
-def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)),
-          (S2_storeri_io AddrFI:$src2, 0, (i32 IntRegs:$src1))>;
-
-def : Pat<(atomic_store_32 (add (i32 IntRegs:$src2), s11_2ImmPred:$offset),
-                           (i32 IntRegs:$src1)),
-          (S2_storeri_io (i32 IntRegs:$src2), s11_2ImmPred:$offset,
-                         (i32 IntRegs:$src1))>;
-
-
-
-
-def : Pat<(atomic_store_64 ADDRriS11_3:$src2, (i64 DoubleRegs:$src1)),
-          (S2_storerd_io AddrFI:$src2, 0, (i64 DoubleRegs:$src1))>;
-
-def : Pat<(atomic_store_64 (add (i32 IntRegs:$src2), s11_3ImmPred:$offset),
-                           (i64 DoubleRegs:$src1)),
-          (S2_storerd_io (i32 IntRegs:$src2), s11_3ImmPred:$offset,
-                         (i64 DoubleRegs:$src1))>;
+//Tail calls.
+def: Pat<(HexagonTCRet tglobaladdr:$dst),
+         (TCRETURNi tglobaladdr:$dst)>;
+def: Pat<(HexagonTCRet texternalsym:$dst),
+         (TCRETURNi texternalsym:$dst)>;
+def: Pat<(HexagonTCRet (i32 IntRegs:$dst)),
+         (TCRETURNr IntRegs:$dst)>;
 
 // Map from r0 = and(r1, 65535) to r0 = zxth(r1)
-def : Pat <(and (i32 IntRegs:$src1), 65535),
-      (A2_zxth (i32 IntRegs:$src1))>;
+def: Pat<(and (i32 IntRegs:$src1), 65535),
+         (A2_zxth IntRegs:$src1)>;
 
 // Map from r0 = and(r1, 255) to r0 = zxtb(r1).
-def : Pat <(and (i32 IntRegs:$src1), 255),
-      (A2_zxtb (i32 IntRegs:$src1))>;
+def: Pat<(and (i32 IntRegs:$src1), 255),
+         (A2_zxtb IntRegs:$src1)>;
 
 // Map Add(p1, true) to p1 = not(p1).
 //     Add(p1, false) should never be produced,
 //     if it does, it got to be mapped to NOOP.
-def : Pat <(add (i1 PredRegs:$src1), -1),
-      (C2_not (i1 PredRegs:$src1))>;
+def: Pat<(add (i1 PredRegs:$src1), -1),
+         (C2_not PredRegs:$src1)>;
 
 // Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
-def : Pat <(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ImmPred:$src3),
-      (i32 (TFR_condset_ii (i1 PredRegs:$src1), s8ImmPred:$src3,
-                           s8ImmPred:$src2))>;
+def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s32ImmPred:$src3),
+         (C2_muxii PredRegs:$src1, s32ImmPred:$src3, s8ImmPred:$src2)>;
 
 // Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
-// => r0 = TFR_condset_ri(p0, r1, #i)
-def : Pat <(select (not (i1 PredRegs:$src1)), s12ImmPred:$src2,
-                   (i32 IntRegs:$src3)),
-      (i32 (TFR_condset_ri (i1 PredRegs:$src1), (i32 IntRegs:$src3),
-                           s12ImmPred:$src2))>;
+// => r0 = C2_muxir(p0, r1, #i)
+def: Pat<(select (not (i1 PredRegs:$src1)), s32ImmPred:$src2,
+                 (i32 IntRegs:$src3)),
+         (C2_muxir PredRegs:$src1, IntRegs:$src3, s32ImmPred:$src2)>;
 
 // Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
-// => r0 = TFR_condset_ir(p0, #i, r1)
-def : Pat <(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s12ImmPred:$src3),
-      (i32 (TFR_condset_ir (i1 PredRegs:$src1), s12ImmPred:$src3,
-                           (i32 IntRegs:$src2)))>;
+// => r0 = C2_muxri (p0, #i, r1)
+def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32ImmPred:$src3),
+         (C2_muxri PredRegs:$src1, s32ImmPred:$src3, IntRegs:$src2)>;
 
 // Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
-def : Pat <(brcond (not (i1 PredRegs:$src1)), bb:$offset),
-      (J2_jumpf (i1 PredRegs:$src1), bb:$offset)>;
-
-// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2).
-def : Pat <(and (i1 PredRegs:$src1), (not (i1 PredRegs:$src2))),
-      (i1 (C2_andn (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
-
-
-let AddedComplexity = 100 in
-def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))),
-      (i64 (A2_combinew (A2_tfrsi 0),
-                       (L2_loadrub_io (CONST32_set tglobaladdr:$global), 0)))>,
-      Requires<[NoV4T]>;
-
-// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
-let AddedComplexity = 10 in
-def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
-      (i32 (A2_and (i32 (L2_loadrb_io AddrFI:$addr, 0)), (A2_tfrsi 0x1)))>;
+def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset),
+         (J2_jumpf PredRegs:$src1, bb:$offset)>;
 
 // Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo).
-def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
-      (i64 (A2_sxtw (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg))))>;
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
+         (A2_sxtw (LoReg DoubleRegs:$src1))>;
 
-// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(SXTH(Rss.lo)).
-def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)),
-      (i64 (A2_sxtw (i32 (A2_sxth (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
-                                                 subreg_loreg))))))>;
+// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)).
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)),
+         (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>;
 
-// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(SXTB(Rss.lo)).
-def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
-      (i64 (A2_sxtw (i32 (A2_sxtb (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
-                                                 subreg_loreg))))))>;
+// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)).
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
+         (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>;
 
 // We want to prevent emitting pnot's as much as possible.
 // Map brcond with an unsupported setcc to a J2_jumpf.
@@ -4166,144 +4968,68 @@ def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
                         bb:$offset),
       (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
 
-def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
-      (J2_jumpf (i1 PredRegs:$src1), bb:$offset)>;
+def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
+         (J2_jumpf PredRegs:$src1, bb:$offset)>;
 
-def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
-      (J2_jumpt (i1 PredRegs:$src1), bb:$offset)>;
+def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
+         (J2_jumpt PredRegs:$src1, bb:$offset)>;
 
 // cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
-def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)),
-                        bb:$offset),
-      (J2_jumpf (C2_cmpgti (i32 IntRegs:$src1),
-                (DEC_CONST_SIGNED s8ImmPred:$src2)), bb:$offset)>;
-
-// cmp.lt(r0, r1) -> cmp.gt(r1, r0)
-def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
-                        bb:$offset),
-      (J2_jumpt (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>;
-
-def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-                   bb:$offset),
-      (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
-                   bb:$offset)>;
-
-def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
-                        bb:$offset),
-      (J2_jumpf (C2_cmpgtu (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
-                bb:$offset)>;
-
-def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-                   bb:$offset),
-      (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
-                bb:$offset)>;
+def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset),
+        (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ImmPred:$src2)),
+                  bb:$offset)>;
 
 // Map from a 64-bit select to an emulated 64-bit mux.
 // Hexagon does not support 64-bit MUXes; so emulate with combines.
-def : Pat <(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
-                   (i64 DoubleRegs:$src3)),
-      (i64 (A2_combinew (i32 (C2_mux (i1 PredRegs:$src1),
-                                    (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
-                                                         subreg_hireg)),
-                                    (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3),
-                                                         subreg_hireg)))),
-                       (i32 (C2_mux (i1 PredRegs:$src1),
-                                    (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
-                                                         subreg_loreg)),
-                                    (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3),
-                                                         subreg_loreg))))))>;
+def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
+                 (i64 DoubleRegs:$src3)),
+         (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2),
+                                              (HiReg DoubleRegs:$src3)),
+                      (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2),
+                                              (LoReg DoubleRegs:$src3)))>;
 
 // Map from a 1-bit select to logical ops.
 // From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
-def : Pat <(select (i1 PredRegs:$src1), (i1 PredRegs:$src2),
-                   (i1 PredRegs:$src3)),
-      (C2_or (C2_and (i1 PredRegs:$src1), (i1 PredRegs:$src2)),
-             (C2_and (C2_not (i1 PredRegs:$src1)), (i1 PredRegs:$src3)))>;
-
-// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs.
-def : Pat<(i1 (load ADDRriS11_2:$addr)),
-      (i1 (C2_tfrrp (i32 (L2_loadrb_io AddrFI:$addr, 0))))>;
+def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)),
+         (C2_or (C2_and PredRegs:$src1, PredRegs:$src2),
+                (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>;
 
 // Map for truncating from 64 immediates to 32 bit immediates.
-def : Pat<(i32 (trunc (i64 DoubleRegs:$src))),
-      (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg))>;
+def: Pat<(i32 (trunc (i64 DoubleRegs:$src))),
+         (LoReg DoubleRegs:$src)>;
 
 // Map for truncating from i64 immediates to i1 bit immediates.
-def :  Pat<(i1 (trunc (i64 DoubleRegs:$src))),
-       (i1 (C2_tfrrp (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
-                                          subreg_loreg))))>;
-
-// Map memb(Rs) = Rdd -> memb(Rs) = Rt.
-def : Pat<(truncstorei8 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
-      (S2_storerb_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
-                                                     subreg_loreg)))>;
-
-// Map memh(Rs) = Rdd -> memh(Rs) = Rt.
-def : Pat<(truncstorei16 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
-      (S2_storerh_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
-                                                     subreg_loreg)))>;
-// Map memw(Rs) = Rdd -> memw(Rs) = Rt
-def : Pat<(truncstorei32 (i64  DoubleRegs:$src), ADDRriS11_0:$addr),
-      (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
-                                                     subreg_loreg)))>;
-
-// Map memw(Rs) = Rdd -> memw(Rs) = Rt.
-def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
-      (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
-                                                     subreg_loreg)))>;
-
-// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0.
-def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
-      (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>;
-
-
-// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
-def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
-      (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>;
-
-// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt.
-def : Pat<(store (i1 PredRegs:$src1), ADDRriS11_2:$addr),
-      (S2_storerb_io AddrFI:$addr, 0, (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0)) )>;
-
-// Map Rdd = anyext(Rs) -> Rdd = A2_sxtw(Rs).
-// Hexagon_TODO: We can probably use combine but that will cost 2 instructions.
-// Better way to do this?
-def : Pat<(i64 (anyext (i32 IntRegs:$src1))),
-      (i64 (A2_sxtw (i32 IntRegs:$src1)))>;
-
-// Map cmple -> cmpgt.
+def: Pat<(i1 (trunc (i64 DoubleRegs:$src))),
+         (C2_tfrrp (LoReg DoubleRegs:$src))>;
+
 // rs <= rt -> !(rs > rt).
-def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)),
-      (i1 (C2_not (C2_cmpgti (i32 IntRegs:$src1), s10ExtPred:$src2)))>;
+let AddedComplexity = 30 in
+def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
 
 // rs <= rt -> !(rs > rt).
 def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
       (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
 
 // Rss <= Rtt -> !(Rss > Rtt).
-def : Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-      (i1 (C2_not (C2_cmpgtp (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>;
+def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+         (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>;
 
 // Map cmpne -> cmpeq.
 // Hexagon_TODO: We should improve on this.
 // rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)),
-      (i1 (C2_not(i1 (C2_cmpeqi (i32 IntRegs:$src1), s10ExtPred:$src2))))>;
-
-// Map cmpne(Rs) -> !cmpeqe(Rs).
-// rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
-      (i1 (C2_not (i1 (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>;
+let AddedComplexity = 30 in
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C2_not (C2_cmpeqi IntRegs:$src1, s32ImmPred:$src2))>;
 
 // Convert setne back to xor for hexagon since we compute w/ pred registers.
-def : Pat <(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
-      (i1 (C2_xor (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
+def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
+         (C2_xor PredRegs:$src1, PredRegs:$src2)>;
 
 // Map cmpne(Rss) -> !cmpew(Rss).
 // rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-      (i1 (C2_not (i1 (C2_cmpeqp (i64 DoubleRegs:$src1),
-                                     (i64 DoubleRegs:$src2)))))>;
+def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+         (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>;
 
 // Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
 // rs >= rt -> !(rt > rs).
@@ -4311,366 +5037,120 @@ def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
       (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
 
 // cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
-def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)),
-      (i1 (C2_cmpgti (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
+let AddedComplexity = 30 in
+def: Pat<(i1 (setge (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
 
 // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
 // rss >= rtt -> !(rtt > rss).
-def : Pat <(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-      (i1 (C2_not (i1 (C2_cmpgtp (i64 DoubleRegs:$src2),
-                                (i64 DoubleRegs:$src1)))))>;
+def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+         (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>;
 
 // Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
 // !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
 // rs < rt -> !(rs >= rt).
-def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)),
-      (i1 (C2_not (C2_cmpgti (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2))))>;
-
-// Map cmplt(Rs, Rt) -> cmpgt(Rt, Rs).
-// rs < rt -> rt > rs.
-// We can let assembler map it, or we can do in the compiler itself.
-def : Pat <(i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
-      (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>;
-
-// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss).
-// rss < rtt -> (rtt > rss).
-def : Pat <(i1 (setlt (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-      (i1 (C2_cmpgtp (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
-
-// Map from cmpltu(Rs, Rd) -> cmpgtu(Rd, Rs)
-// rs < rt -> rt > rs.
-// We can let assembler map it, or we can do in the compiler itself.
-def : Pat <(i1 (setult (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
-      (i1 (C2_cmpgtu (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>;
-
-// Map from cmpltu(Rss, Rdd) -> cmpgtu(Rdd, Rss).
-// rs < rt -> rt > rs.
-def : Pat <(i1 (setult (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-      (i1 (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
+let AddedComplexity = 30 in
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C2_not (C2_cmpgti IntRegs:$src1,
+                            (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
 
 // Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
-def : Pat <(i1 (setuge (i32 IntRegs:$src1), 0)),
-      (i1 (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src1)))>;
+def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)),
+         (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>;
 
 // Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
-def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)),
-      (i1 (C2_cmpgtui (i32 IntRegs:$src1), (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>;
+def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32ImmPred:$src2)),
+         (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32ImmPred:$src2))>;
 
 // Generate cmpgtu(Rs, #u9)
-def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)),
-      (i1 (C2_cmpgtui (i32 IntRegs:$src1), u9ExtPred:$src2))>;
+def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32ImmPred:$src2)),
+         (C2_cmpgtui IntRegs:$src1, u32ImmPred:$src2)>;
 
 // Map from Rs >= Rt -> !(Rt > Rs).
 // rs >= rt -> !(rt > rs).
-def : Pat <(i1 (setuge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
-      (i1 (C2_not (C2_cmpgtu (i32 IntRegs:$src2), (i32 IntRegs:$src1))))>;
-
-// Map from Rs >= Rt -> !(Rt > Rs).
-// rs >= rt -> !(rt > rs).
-def : Pat <(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-      (i1 (C2_not (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>;
-
-// Map from cmpleu(Rs, Rt) -> !cmpgtu(Rs, Rt).
-// Map from (Rs <= Rt) -> !(Rs > Rt).
-def : Pat <(i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
-      (i1 (C2_not (C2_cmpgtu (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
+def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+         (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>;
 
 // Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
 // Map from (Rs <= Rt) -> !(Rs > Rt).
-def : Pat <(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-      (i1 (C2_not (C2_cmpgtup (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>;
+def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+         (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>;
 
 // Sign extends.
 // i1 -> i32
-def : Pat <(i32 (sext (i1 PredRegs:$src1))),
-      (i32 (C2_muxii (i1 PredRegs:$src1), -1, 0))>;
+def: Pat<(i32 (sext (i1 PredRegs:$src1))),
+         (C2_muxii PredRegs:$src1, -1, 0)>;
 
 // i1 -> i64
-def : Pat <(i64 (sext (i1 PredRegs:$src1))),
-      (i64 (A2_combinew (A2_tfrsi -1), (C2_muxii (i1 PredRegs:$src1), -1, 0)))>;
-
-// Convert sign-extended load back to load and sign extend.
-// i8 -> i64
-def:  Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)),
-      (i64 (A2_sxtw (L2_loadrb_io AddrFI:$src1, 0)))>;
-
-// Convert any-extended load back to load and sign extend.
-// i8 -> i64
-def:  Pat <(i64 (extloadi8 ADDRriS11_0:$src1)),
-      (i64 (A2_sxtw (L2_loadrb_io AddrFI:$src1, 0)))>;
-
-// Convert sign-extended load back to load and sign extend.
-// i16 -> i64
-def:  Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)),
-      (i64 (A2_sxtw (L2_loadrh_io AddrFI:$src1, 0)))>;
-
-// Convert sign-extended load back to load and sign extend.
-// i32 -> i64
-def:  Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)),
-      (i64 (A2_sxtw (L2_loadri_io AddrFI:$src1, 0)))>;
-
+def: Pat<(i64 (sext (i1 PredRegs:$src1))),
+         (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
 
 // Zero extends.
 // i1 -> i32
-def : Pat <(i32 (zext (i1 PredRegs:$src1))),
-      (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))>;
-
-// i1 -> i64
-def : Pat <(i64 (zext (i1 PredRegs:$src1))),
-      (i64 (A2_combinew (A2_tfrsi 0), (C2_muxii (i1 PredRegs:$src1), 1, 0)))>,
-      Requires<[NoV4T]>;
-
-// i32 -> i64
-def : Pat <(i64 (zext (i32 IntRegs:$src1))),
-      (i64 (A2_combinew (A2_tfrsi 0), (i32 IntRegs:$src1)))>,
-      Requires<[NoV4T]>;
-
-// i8 -> i64
-def:  Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
-      (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io AddrFI:$src1, 0)))>,
-      Requires<[NoV4T]>;
-
-let AddedComplexity = 20 in
-def:  Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
-                                s11_0ExtPred:$offset))),
-      (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io IntRegs:$src1,
-                                  s11_0ExtPred:$offset)))>,
-      Requires<[NoV4T]>;
-
-// i1 -> i64
-def:  Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
-      (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io AddrFI:$src1, 0)))>,
-      Requires<[NoV4T]>;
-
-let AddedComplexity = 20 in
-def:  Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
-                                s11_0ExtPred:$offset))),
-      (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrub_io IntRegs:$src1,
-                                  s11_0ExtPred:$offset)))>,
-      Requires<[NoV4T]>;
-
-// i16 -> i64
-def:  Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
-      (i64 (A2_combinew (A2_tfrsi 0), (L2_loadruh_io AddrFI:$src1, 0)))>,
-      Requires<[NoV4T]>;
-
-let AddedComplexity = 20 in
-def:  Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
-                                  s11_1ExtPred:$offset))),
-      (i64 (A2_combinew (A2_tfrsi 0), (L2_loadruh_io IntRegs:$src1,
-                                  s11_1ExtPred:$offset)))>,
-      Requires<[NoV4T]>;
-
-// i32 -> i64
-def:  Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
-      (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io AddrFI:$src1, 0)))>,
-      Requires<[NoV4T]>;
-
-let AddedComplexity = 100 in
-def:  Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
-      (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io IntRegs:$src1,
-                                  s11_2ExtPred:$offset)))>,
-      Requires<[NoV4T]>;
-
-let AddedComplexity = 10 in
-def:  Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
-      (i32 (L2_loadri_io AddrFI:$src1, 0))>;
+def: Pat<(i32 (zext (i1 PredRegs:$src1))),
+         (C2_muxii PredRegs:$src1, 1, 0)>;
 
 // Map from Rs = Pd to Pd = mux(Pd, #1, #0)
-def : Pat <(i32 (zext (i1 PredRegs:$src1))),
-      (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))>;
-
-// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
-def : Pat <(i32 (anyext (i1 PredRegs:$src1))),
-      (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))>;
-
-// Map from Rss = Pd to Rdd = A2_sxtw (mux(Pd, #1, #0))
-def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
-      (i64 (A2_sxtw (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0))))>;
-
-
-let AddedComplexity = 100 in
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
-                           (i32 32))),
-               (i64 (zextloadi32 (i32 (add IntRegs:$src2,
-                                         s11_2ExtPred:$offset2)))))),
-        (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
-                        (L2_loadri_io IntRegs:$src2,
-                                       s11_2ExtPred:$offset2)))>;
-
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
-                           (i32 32))),
-               (i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
-        (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
-                        (L2_loadri_io AddrFI:$srcLow, 0)))>;
-
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
-                           (i32 32))),
-               (i64 (zext (i32 IntRegs:$srcLow))))),
-        (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
-                        IntRegs:$srcLow))>;
-
-let AddedComplexity = 100 in
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
-                           (i32 32))),
-               (i64 (zextloadi32 (i32 (add IntRegs:$src2,
-                                         s11_2ExtPred:$offset2)))))),
-        (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
-                        (L2_loadri_io IntRegs:$src2,
-                                       s11_2ExtPred:$offset2)))>;
-
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
-                           (i32 32))),
-               (i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
-        (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
-                        (L2_loadri_io AddrFI:$srcLow, 0)))>;
-
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
-                           (i32 32))),
-               (i64 (zext (i32 IntRegs:$srcLow))))),
-        (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
-                        IntRegs:$srcLow))>;
-
-// Any extended 64-bit load.
-// anyext i32 -> i64
-def:  Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
-      (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io AddrFI:$src1, 0)))>,
-      Requires<[NoV4T]>;
-
-// When there is an offset we should prefer the pattern below over the pattern above.
-// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc)
-// So this complexity below is comfortably higher to allow for choosing the below.
-// If this is not done then we generate addresses such as
-// ********************************************
-//        r1 = add (r0, #4)
-//        r1 = memw(r1 + #0)
-//  instead of
-//        r1 = memw(r0 + #4)
-// ********************************************
-let AddedComplexity = 100 in
-def:  Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
-      (i64 (A2_combinew (A2_tfrsi 0), (L2_loadri_io IntRegs:$src1,
-                                  s11_2ExtPred:$offset)))>,
-      Requires<[NoV4T]>;
-
-// anyext i16 -> i64.
-def:  Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
-      (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrh_io AddrFI:$src1, 0)))>,
-      Requires<[NoV4T]>;
+def: Pat<(i32 (anyext (i1 PredRegs:$src1))),
+         (C2_muxii PredRegs:$src1, 1, 0)>;
 
-let AddedComplexity = 20 in
-def:  Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
-                                  s11_1ExtPred:$offset))),
-      (i64 (A2_combinew (A2_tfrsi 0), (L2_loadrh_io IntRegs:$src1,
-                                  s11_1ExtPred:$offset)))>,
-      Requires<[NoV4T]>;
-
-// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
-def : Pat<(i64 (zext (i32 IntRegs:$src1))),
-      (i64 (A2_combinew (A2_tfrsi 0), (i32 IntRegs:$src1)))>,
-      Requires<[NoV4T]>;
+// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
+def: Pat<(i64 (anyext (i1 PredRegs:$src1))),
+         (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
 
 // Multiply 64-bit unsigned and use upper result.
 def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
-      (i64
-       (M2_dpmpyuu_acc_s0
-        (i64
-         (A2_combinew
-          (A2_tfrsi 0),
-           (i32
-            (EXTRACT_SUBREG
-             (i64
-              (S2_lsr_i_p
-               (i64
-                (M2_dpmpyuu_acc_s0
-                 (i64
-                  (M2_dpmpyuu_acc_s0
-                   (i64
-                    (A2_combinew (A2_tfrsi 0),
-                     (i32
-                      (EXTRACT_SUBREG
-                       (i64
-                        (S2_lsr_i_p
-                         (i64
-                          (M2_dpmpyuu_s0 
-                            (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
-                                                       subreg_loreg)),
-                                  (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
-                                                       subreg_loreg)))), 32)),
-                       subreg_loreg)))),
-                  (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
-                  (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))),
-                 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
-                 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))),
-               32)), subreg_loreg)))),
-        (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
-        (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>;
-
-// Multiply 64-bit signed and use upper result.
-def : Pat <(mulhs (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
-      (i64
-       (M2_dpmpyss_acc_s0
-        (i64
-         (A2_combinew (A2_tfrsi 0),
-          (i32
-           (EXTRACT_SUBREG
-            (i64
-             (S2_lsr_i_p
-              (i64
-               (M2_dpmpyss_acc_s0
-                (i64
-                 (M2_dpmpyss_acc_s0
-                  (i64
-                   (A2_combinew (A2_tfrsi 0),
-                    (i32
-                     (EXTRACT_SUBREG
-                      (i64
-                       (S2_lsr_i_p
-                        (i64
-                         (M2_dpmpyuu_s0 
-                           (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
-                                                      subreg_loreg)),
-                                 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
-                                                      subreg_loreg)))), 32)),
-                      subreg_loreg)))),
-                  (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
-                  (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))),
-                (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
-                (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))),
-              32)), subreg_loreg)))),
-        (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)),
-        (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>;
+  (A2_addp
+    (M2_dpmpyuu_acc_s0
+      (S2_lsr_i_p
+        (A2_addp
+          (M2_dpmpyuu_acc_s0
+            (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (LoReg $src2)), 32),
+            (HiReg $src1),
+            (LoReg $src2)),
+          (A2_combinew (A2_tfrsi 0),
+                       (LoReg (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2))))),
+        32),
+      (HiReg $src1),
+      (HiReg $src2)),
+    (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2)), 32)
+)>;
 
 // Hexagon specific ISD nodes.
-//def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>;
-def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2,
-                                  [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC",
-                                  SDTHexagonADJDYNALLOC>;
-// Needed to tag these instructions for stack layout.
-let usesCustomInserter = 1 in
-def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1,
-                                                     s16Imm:$src2),
-                  "$dst = add($src1, #$src2)",
-                  [(set (i32 IntRegs:$dst),
-                        (Hexagon_ADJDYNALLOC (i32 IntRegs:$src1),
-                                             s16ImmPred:$src2))]>;
+def SDTHexagonALLOCA : SDTypeProfile<1, 2,
+      [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA,
+      [SDNPHasChain]>;
+
+// The reason for the custom inserter is to record all ALLOCA instructions
+// in MachineFunctionInfo.
+let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1,
+    usesCustomInserter = 1 in
+def ALLOCA: ALU32Inst<(outs IntRegs:$Rd),
+      (ins IntRegs:$Rs, u32Imm:$A), "",
+      [(set (i32 IntRegs:$Rd),
+            (HexagonALLOCA (i32 IntRegs:$Rs), (i32 imm:$A)))]>;
+
+let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in
+def ALIGNA : ALU32Inst<(outs IntRegs:$Rd), (ins u32Imm:$A), "", []>;
 
 def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
 def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>;
+let isCodeGenOnly = 1 in
 def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1),
                 "$dst = $src1",
                 [(set (i32 IntRegs:$dst),
                       (Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>;
 
 let AddedComplexity = 100 in
-def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
-      (COPY (i32 IntRegs:$src1))>;
+def: Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
+         (i32 IntRegs:$src1)>;
 
-def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
+def HexagonJT:     SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
+def HexagonCP:     SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
 
-def : Pat<(HexagonWrapperJT tjumptable:$dst),
-          (i32 (CONST32_set_jt tjumptable:$dst))>;
+def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi s16Ext:$dst)>;
+def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi s16Ext:$dst)>;
 
 // XTYPE/SHIFT
 //
@@ -4820,7 +5300,6 @@ let AddedComplexity = 100 in
   defm _xacc  : xtype_imm_base< opc1, "^= ", OpNode, xor, 0b100, minOp>;
 }
 
-let isCodeGenOnly = 0 in {
 defm S2_asr : xtype_imm_acc<"asr", sra, 0b00>;
 
 defm S2_lsr : xtype_imm_acc<"lsr", srl, 0b01>,
@@ -4828,7 +5307,6 @@ defm S2_lsr : xtype_imm_acc<"lsr", srl, 0b01>,
 
 defm S2_asl : xtype_imm_acc<"asl", shl, 0b10>,
               xtype_xor_imm_acc<"asl", shl, 0b10>;
-}
 
 multiclass xtype_reg_acc_r<string opc1, SDNode OpNode, bits<2>minOp> {
   let AddedComplexity = 100 in
@@ -4854,12 +5332,10 @@ multiclass xtype_reg_acc<string OpcStr, SDNode OpNode, bits<2> minOp > {
   defm _r_p : xtype_reg_acc_p <OpcStr, OpNode, minOp>;
 }
 
-let isCodeGenOnly = 0 in {
 defm S2_asl : xtype_reg_acc<"asl", shl, 0b10>;
 defm S2_asr : xtype_reg_acc<"asr", sra, 0b00>;
 defm S2_lsr : xtype_reg_acc<"lsr", srl, 0b01>;
 defm S2_lsl : xtype_reg_acc<"lsl", shl, 0b11>;
-}
 
 //===----------------------------------------------------------------------===//
 let hasSideEffects = 0 in
@@ -4890,9 +5366,42 @@ class T_S3op_64 <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit SwapOps,
   : T_S3op_1 <mnemonic, DoubleRegs, MajOp, MinOp, SwapOps,
               isSat, isRnd, hasShift>;
 
-let isCodeGenOnly = 0 in
+let Itinerary = S_3op_tc_1_SLOT23 in {
+  def S2_shuffeb : T_S3op_64 < "shuffeb", 0b00, 0b010, 0>;
+  def S2_shuffeh : T_S3op_64 < "shuffeh", 0b00, 0b110, 0>;
+  def S2_shuffob : T_S3op_64 < "shuffob", 0b00, 0b100, 1>;
+  def S2_shuffoh : T_S3op_64 < "shuffoh", 0b10, 0b000, 1>;
+
+  def S2_vtrunewh : T_S3op_64 < "vtrunewh", 0b10, 0b010, 0>;
+  def S2_vtrunowh : T_S3op_64 < "vtrunowh", 0b10, 0b100, 0>;
+}
+
 def S2_lfsp : T_S3op_64 < "lfs", 0b10, 0b110, 0>;
 
+let hasSideEffects = 0 in
+class T_S3op_2 <string mnemonic, bits<3> MajOp, bit SwapOps>
+  : SInst < (outs DoubleRegs:$Rdd),
+            (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu),
+  "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu)",
+  [], "", S_3op_tc_1_SLOT23 > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+    bits<2> Pu;
+
+    let IClass = 0b1100;
+
+    let Inst{27-24} = 0b0010;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = !if (SwapOps, Rtt, Rss);
+    let Inst{12-8} = !if (SwapOps, Rss, Rtt);
+    let Inst{6-5} = Pu;
+    let Inst{4-0} = Rdd;
+  }
+
+def S2_valignrb  : T_S3op_2 < "valignb",  0b000, 1>;
+def S2_vsplicerb : T_S3op_2 < "vspliceb", 0b100, 0>;
+
 //===----------------------------------------------------------------------===//
 // Template class used by vector shift, vector rotate, vector neg,
 // 32-bit shift, 64-bit shifts, etc.
@@ -4943,31 +5452,78 @@ class T_S3op_shiftVect <string mnemonic, bits<2> MajOp, bits<2> MinOp>
 // Shift by register
 // Rdd=[asr|lsr|asl|lsl](Rss,Rt)
 
-let isCodeGenOnly = 0 in {
 def S2_asr_r_p : T_S3op_shift64 < "asr", sra, 0b00>;
 def S2_lsr_r_p : T_S3op_shift64 < "lsr", srl, 0b01>;
 def S2_asl_r_p : T_S3op_shift64 < "asl", shl, 0b10>;
 def S2_lsl_r_p : T_S3op_shift64 < "lsl", shl, 0b11>;
-}
 
 // Rd=[asr|lsr|asl|lsl](Rs,Rt)
 
-let isCodeGenOnly = 0 in {
 def S2_asr_r_r : T_S3op_shift32<"asr", sra, 0b00>;
 def S2_lsr_r_r : T_S3op_shift32<"lsr", srl, 0b01>;
 def S2_asl_r_r : T_S3op_shift32<"asl", shl, 0b10>;
 def S2_lsl_r_r : T_S3op_shift32<"lsl", shl, 0b11>;
-}
 
 // Shift by register with saturation
 // Rd=asr(Rs,Rt):sat
 // Rd=asl(Rs,Rt):sat
 
-let Defs = [USR_OVF], isCodeGenOnly = 0 in {
+let Defs = [USR_OVF] in {
   def S2_asr_r_r_sat : T_S3op_shift32_Sat<"asr", 0b00>;
   def S2_asl_r_r_sat : T_S3op_shift32_Sat<"asl", 0b10>;
 }
 
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_S3op_8 <string opc, bits<3> MinOp, bit isSat, bit isRnd, bit hasShift, bit hasSplat = 0>
+  : SInst < (outs IntRegs:$Rd),
+            (ins DoubleRegs:$Rss, IntRegs:$Rt),
+  "$Rd = "#opc#"($Rss, $Rt"#!if(hasSplat, "*", "")#")"
+                           #!if(hasShift, ":<<1", "")
+                           #!if(isRnd, ":rnd", "")
+                           #!if(isSat, ":sat", ""),
+  [], "", S_3op_tc_1_SLOT23 > {
+    bits<5> Rd;
+    bits<5> Rss;
+    bits<5> Rt;
+
+    let IClass = 0b1100;
+
+    let Inst{27-24} = 0b0101;
+    let Inst{20-16} = Rss;
+    let Inst{12-8}  = Rt;
+    let Inst{7-5}   = MinOp;
+    let Inst{4-0}   = Rd;
+  }
+
+def S2_asr_r_svw_trun : T_S3op_8<"vasrw", 0b010, 0, 0, 0>;
+
+let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
+def S2_vcrotate : T_S3op_shiftVect < "vcrotate", 0b11, 0b00>;
+
+let hasSideEffects = 0 in
+class T_S3op_7 <string mnemonic, bit MajOp >
+  : SInst <(outs DoubleRegs:$Rdd),
+           (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, u3Imm:$u3),
+  "$Rdd = "#mnemonic#"($Rss, $Rtt, #$u3)" ,
+  [], "", S_3op_tc_1_SLOT23 > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+    bits<3> u3;
+
+    let IClass = 0b1100;
+
+    let Inst{27-24} = 0b0000;
+    let Inst{23}    = MajOp;
+    let Inst{20-16} = !if(MajOp, Rss, Rtt);
+    let Inst{12-8}  =  !if(MajOp, Rtt, Rss);
+    let Inst{7-5}   = u3;
+    let Inst{4-0}   = Rdd;
+  }
+
+def S2_valignib  : T_S3op_7 < "valignb", 0>;
+def S2_vspliceib : T_S3op_7 < "vspliceb", 1>;
+
 //===----------------------------------------------------------------------===//
 // Template class for 'insert bitfield' instructions
 //===----------------------------------------------------------------------===//
@@ -5021,17 +5577,45 @@ class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp>
 
 // Rx=insert(Rs,Rtt)
 // Rx=insert(Rs,#u5,#U5)
-let hasNewValue = 1, isCodeGenOnly = 0 in {
+let hasNewValue = 1 in {
   def S2_insert_rp : T_S3op_insert <"insert", IntRegs>;
   def S2_insert    : T_S2op_insert <0b1111, IntRegs, u5Imm>;
 }
 
 // Rxx=insert(Rss,Rtt)
 // Rxx=insert(Rss,#u6,#U6)
-let isCodeGenOnly = 0 in {
 def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>;
 def S2_insertp    : T_S2op_insert <0b0011, DoubleRegs, u6Imm>;
-}
+
+
+def SDTHexagonINSERT:
+  SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                       SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
+def SDTHexagonINSERTRP:
+  SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                       SDTCisInt<0>, SDTCisVT<3, i64>]>;
+
+def HexagonINSERT   : SDNode<"HexagonISD::INSERT",   SDTHexagonINSERT>;
+def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>;
+
+def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2),
+         (S2_insert I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2)>;
+def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2),
+         (S2_insertp I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2)>;
+def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru),
+         (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
+def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru),
+         (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
+
+let AddedComplexity = 100 in
+def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))),
+                                     (i32 (extloadi8  (add I32:$b, 3))),
+                                     24, 8),
+                      (i32 16)),
+                 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
+             (zextloadi8 I32:$b)),
+         (A2_swiz (L2_loadri_io I32:$b, 0))>;
+
 
 //===----------------------------------------------------------------------===//
 // Template class for 'extract bitfield' instructions
@@ -5089,18 +5673,39 @@ class T_S2op_extract <string mnemonic, bits<4> RegTyBits,
 
 // Rdd=extractu(Rss,Rtt)
 // Rdd=extractu(Rss,#u6,#U6)
-let isCodeGenOnly = 0 in {
 def S2_extractup_rp : T_S3op_64 < "extractu", 0b00, 0b000, 0>;
 def S2_extractup    : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6Imm>;
-}
 
 // Rd=extractu(Rs,Rtt)
 // Rd=extractu(Rs,#u5,#U5)
-let hasNewValue = 1, isCodeGenOnly = 0 in {
+let hasNewValue = 1 in {
   def S2_extractu_rp : T_S3op_extract<"extractu", 0b00>;
   def S2_extractu    : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>;
 }
 
+def SDTHexagonEXTRACTU:
+  SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
+                       SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDTHexagonEXTRACTURP:
+  SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
+                       SDTCisVT<2, i64>]>;
+
+def HexagonEXTRACTU   : SDNode<"HexagonISD::EXTRACTU",   SDTHexagonEXTRACTU>;
+def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>;
+
+def: Pat<(HexagonEXTRACTU I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3),
+         (S2_extractu I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3)>;
+def: Pat<(HexagonEXTRACTU I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3),
+         (S2_extractup I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3)>;
+def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2),
+         (S2_extractu_rp I32:$src1, I64:$src2)>;
+def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2),
+         (S2_extractup_rp I64:$src1, I64:$src2)>;
+
+// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
+def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)),
+         (M2_mpysin IntRegs:$src1, u8ImmPred:$src2)>;
+
 //===----------------------------------------------------------------------===//
 // :raw for of tableindx[bdhw] insns
 //===----------------------------------------------------------------------===//
@@ -5127,16 +5732,26 @@ class tableidxRaw<string OpStr, bits<2>MinOp>
     let Inst{4-0}   = Rx;
   }
 
-let isCodeGenOnly = 0 in {
 def S2_tableidxb : tableidxRaw<"tableidxb", 0b00>;
 def S2_tableidxh : tableidxRaw<"tableidxh", 0b01>;
 def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>;
 def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>;
-}
 
-// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
-def : Pat <(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)),
-      (i32 (M2_mpysin (i32 IntRegs:$src1), u8ImmPred:$src2))>;
+//===----------------------------------------------------------------------===//
+// Template class for 'table index' instructions which are assembler mapped
+// to their :raw format.
+//===----------------------------------------------------------------------===//
+let isPseudo = 1 in
+class tableidx_goodsyntax <string mnemonic>
+  : SInst <(outs IntRegs:$Rx),
+           (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, u5Imm:$u5),
+           "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)",
+           [], "$Rx = $_dst_" >;
+
+def S2_tableidxb_goodsyntax : tableidx_goodsyntax<"tableidxb">;
+def S2_tableidxh_goodsyntax : tableidx_goodsyntax<"tableidxh">;
+def S2_tableidxw_goodsyntax : tableidx_goodsyntax<"tableidxw">;
+def S2_tableidxd_goodsyntax : tableidx_goodsyntax<"tableidxd">;
 
 //===----------------------------------------------------------------------===//
 // V3 Instructions +
@@ -5167,3 +5782,9 @@ include "HexagonInstrInfoV5.td"
 //===----------------------------------------------------------------------===//
 // V5 Instructions -
 //===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/64/Vector +
+//===----------------------------------------------------------------------===///
+
+include "HexagonInstrInfoVector.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
index 8e91476..84d035d 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -21,8 +21,7 @@ def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall,
 // J +
 //===----------------------------------------------------------------------===//
 // Call subroutine.
-let isCall = 1, hasSideEffects = 1, validSubTargets = HasV3SubT,
-    Defs = VolatileV3.Regs, isPredicable = 1,
+let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicable = 1,
     isExtended = 0, isExtendable = 1, opExtendable = 0,
     isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in
 class T_Call<string ExtStr>
@@ -37,8 +36,7 @@ class T_Call<string ExtStr>
   let Inst{0} = 0b0;
 }
 
-let isCall = 1, hasSideEffects = 1, validSubTargets = HasV3SubT,
-    Defs = VolatileV3.Regs, isPredicated = 1,
+let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicated = 1,
     isExtended = 0, isExtendable = 1, opExtendable = 1,
     isExtentSigned = 1, opExtentBits = 17, opExtentAlign = 2 in
 class T_CallPred<bit IfTrue, string ExtStr>
@@ -64,9 +62,11 @@ multiclass T_Calls<string ExtStr> {
   def f    : T_CallPred<0, ExtStr>;
 }
 
-let isCodeGenOnly = 0 in
 defm J2_call: T_Calls<"">, PredRel;
 
+let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs in
+def CALLv3nr :  T_Call<"">, PredRel;
+
 //===----------------------------------------------------------------------===//
 // J -
 //===----------------------------------------------------------------------===//
@@ -76,13 +76,10 @@ defm J2_call: T_Calls<"">, PredRel;
 // JR +
 //===----------------------------------------------------------------------===//
 // Call subroutine from register.
-let isCall = 1, hasSideEffects = 0,
-  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
-                P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
-  def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst),
-              "callr $dst",
-              []>, Requires<[HasV3TOnly]>;
- }
+
+let isCodeGenOnly = 1, Defs = VolatileV3.Regs in {
+  def CALLRv3nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return.
+}
 
 //===----------------------------------------------------------------------===//
 // JR -
@@ -92,20 +89,16 @@ let isCall = 1, hasSideEffects = 0,
 // ALU64/ALU +
 //===----------------------------------------------------------------------===//
 
-
-let Defs = [USR_OVF], Itinerary = ALU64_tc_2_SLOT23,
-    validSubTargets = HasV3SubT, isCodeGenOnly = 0 in
+let Defs = [USR_OVF], Itinerary = ALU64_tc_2_SLOT23 in
 def A2_addpsat : T_ALU64_arith<"add", 0b011, 0b101, 1, 0, 1>;
 
 class T_ALU64_addsp_hl<string suffix, bits<3> MinOp>
   : T_ALU64_rr<"add", suffix, 0b0011, 0b011, MinOp, 0, 0, "">;
 
-let isCodeGenOnly = 0 in {
 def A2_addspl : T_ALU64_addsp_hl<":raw:lo", 0b110>;
 def A2_addsph : T_ALU64_addsp_hl<":raw:hi", 0b111>;
-}
 
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0, isAsmParserOnly = 1 in
 def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd),
   (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)",
   [(set (i64 DoubleRegs:$Rd), (i64 (add (i64 (sext (i32 IntRegs:$Rs))),
@@ -134,12 +127,10 @@ class T_XTYPE_MIN_MAX_P<bit isMax, bit isUnsigned>
   let Inst{4-0} = Rd;
 }
 
-let isCodeGenOnly = 0 in {
 def A2_minp  : T_XTYPE_MIN_MAX_P<0, 0>;
 def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>;
 def A2_maxp  : T_XTYPE_MIN_MAX_P<1, 0>;
 def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>;
-}
 
 multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
   defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>;
@@ -164,25 +155,112 @@ let AddedComplexity = 200 in {
 
 
 //def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset),
-//      (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+//      (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>;
 
 //def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset),
-//      (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+//      (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>;
 
 //def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset),
-//      (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+//      (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>;
 
 //def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset),
-//      (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+//      (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>;
 
 //def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset),
-//      (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
-
+//      (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>;
 
 // Map call instruction
-def : Pat<(call (i32 IntRegs:$dst)),
-      (J2_call (i32 IntRegs:$dst))>, Requires<[HasV3T]>;
-def : Pat<(call tglobaladdr:$dst),
-      (J2_call tglobaladdr:$dst)>, Requires<[HasV3T]>;
-def : Pat<(call texternalsym:$dst),
-      (J2_call texternalsym:$dst)>, Requires<[HasV3T]>;
+def : Pat<(callv3 (i32 IntRegs:$dst)),
+      (J2_callr (i32 IntRegs:$dst))>;
+def : Pat<(callv3 tglobaladdr:$dst),
+      (J2_call tglobaladdr:$dst)>;
+def : Pat<(callv3 texternalsym:$dst),
+      (J2_call texternalsym:$dst)>;
+def : Pat<(callv3 tglobaltlsaddr:$dst),
+      (J2_call tglobaltlsaddr:$dst)>;
+
+def : Pat<(callv3nr (i32 IntRegs:$dst)),
+      (CALLRv3nr (i32 IntRegs:$dst))>;
+def : Pat<(callv3nr tglobaladdr:$dst),
+      (CALLv3nr tglobaladdr:$dst)>;
+def : Pat<(callv3nr texternalsym:$dst),
+      (CALLv3nr texternalsym:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// :raw form of vrcmpys:hi/lo insns
+//===----------------------------------------------------------------------===//
+// Vector reduce complex multiply by scalar.
+let Defs = [USR_OVF], hasSideEffects = 0 in
+class T_vrcmpRaw<string HiLo, bits<3>MajOp>:
+  MInst<(outs DoubleRegs:$Rdd),
+         (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+         "$Rdd = vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, []> {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1000;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = Rss;
+    let Inst{12-8}  = Rtt;
+    let Inst{7-5}   = 0b100;
+    let Inst{4-0}   = Rdd;
+}
+
+def M2_vrcmpys_s1_h: T_vrcmpRaw<"hi", 0b101>;
+def M2_vrcmpys_s1_l: T_vrcmpRaw<"lo", 0b111>;
+
+// Assembler mapped to M2_vrcmpys_s1_h or M2_vrcmpys_s1_l
+let hasSideEffects = 0, isAsmParserOnly = 1 in
+def M2_vrcmpys_s1
+ : MInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, IntRegs:$Rt),
+ "$Rdd=vrcmpys($Rss,$Rt):<<1:sat">;
+
+// Vector reduce complex multiply by scalar with accumulation.
+let Defs = [USR_OVF], hasSideEffects = 0 in
+class T_vrcmpys_acc<string HiLo, bits<3>MajOp>:
+  MInst <(outs DoubleRegs:$Rxx),
+         (ins DoubleRegs:$_src_, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rxx += vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, [],
+  "$Rxx = $_src_"> {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1010;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = Rss;
+    let Inst{12-8}  = Rtt;
+    let Inst{7-5}   = 0b100;
+    let Inst{4-0}   = Rxx;
+  }
+
+def M2_vrcmpys_acc_s1_h: T_vrcmpys_acc<"hi", 0b101>;
+def M2_vrcmpys_acc_s1_l: T_vrcmpys_acc<"lo", 0b111>;
+
+// Assembler mapped to M2_vrcmpys_acc_s1_h or M2_vrcmpys_acc_s1_l
+
+let isAsmParserOnly = 1 in
+def M2_vrcmpys_acc_s1
+  : MInst <(outs DoubleRegs:$dst),
+           (ins DoubleRegs:$dst2, DoubleRegs:$src1, IntRegs:$src2),
+           "$dst += vrcmpys($src1, $src2):<<1:sat", [],
+           "$dst2 = $dst">;
+
+def M2_vrcmpys_s1rp_h : T_MType_vrcmpy <"vrcmpys", 0b101, 0b110, 1>;
+def M2_vrcmpys_s1rp_l : T_MType_vrcmpy <"vrcmpys", 0b101, 0b111, 0>;
+
+// Assembler mapped to M2_vrcmpys_s1rp_h or M2_vrcmpys_s1rp_l
+let isAsmParserOnly = 1 in
+def M2_vrcmpys_s1rp
+  : MInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, IntRegs:$Rt),
+  "$Rd=vrcmpys($Rss,$Rt):<<1:rnd:sat">;
+
+
+// S2_cabacdecbin: Cabac decode bin.
+let Defs = [P0], isPredicateLate = 1, Itinerary = S_3op_tc_1_SLOT23 in
+def S2_cabacdecbin : T_S3op_64 < "decbin", 0b11, 0b110, 0>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 08bfd67..8b667c6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -11,6 +11,28 @@
 //
 //===----------------------------------------------------------------------===//
 
+def DuplexIClass0:  InstDuplex < 0 >;
+def DuplexIClass1:  InstDuplex < 1 >;
+def DuplexIClass2:  InstDuplex < 2 >;
+let isExtendable = 1 in {
+  def DuplexIClass3:  InstDuplex < 3 >;
+  def DuplexIClass4:  InstDuplex < 4 >;
+  def DuplexIClass5:  InstDuplex < 5 >;
+  def DuplexIClass6:  InstDuplex < 6 >;
+  def DuplexIClass7:  InstDuplex < 7 >;
+}
+def DuplexIClass8:  InstDuplex < 8 >;
+def DuplexIClass9:  InstDuplex < 9 >;
+def DuplexIClassA:  InstDuplex < 0xA >;
+def DuplexIClassB:  InstDuplex < 0xB >;
+def DuplexIClassC:  InstDuplex < 0xC >;
+def DuplexIClassD:  InstDuplex < 0xD >;
+def DuplexIClassE:  InstDuplex < 0xE >;
+def DuplexIClassF:  InstDuplex < 0xF >;
+
+def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
+def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
+
 let hasSideEffects = 0 in
 class T_Immext<Operand ImmType>
   : EXTENDERInst<(outs), (ins ImmType:$imm),
@@ -35,19 +57,9 @@ def BITPOS32 : SDNodeXForm<imm, [{
    // Return the bit position we will set [0-31].
    // As an SDNode.
    int32_t imm = N->getSExtValue();
-   return XformMskToBitPosU5Imm(imm);
+   return XformMskToBitPosU5Imm(imm, SDLoc(N));
 }]>;
 
-// Fold (add (CONST32 tglobaladdr:$addr) <offset>) into a global address.
-def FoldGlobalAddr : ComplexPattern<i32, 1, "foldGlobalAddress", [], []>;
-
-// Fold (add (CONST32_GP tglobaladdr:$addr) <offset>) into a global address.
-def FoldGlobalAddrGP : ComplexPattern<i32, 1, "foldGlobalAddressGP", [], []>;
-
-def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr),
-                                       (HexagonCONST32 node:$addr), [{
-  return hasNumUsesBelowThresGA(N->getOperand(0).getNode());
-}]>;
 
 // Hexagon V4 Architecture spec defines 8 instruction classes:
 // LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
@@ -119,21 +131,19 @@ class T_ALU32_3op_not<string mnemonic, bits<3> MajOp, bits<3> MinOp,
   let AsmString = "$Rd = "#mnemonic#"($Rs, ~$Rt)";
 }
 
-let BaseOpcode = "andn_rr", CextOpcode = "andn", isCodeGenOnly = 0 in
+let BaseOpcode = "andn_rr", CextOpcode = "andn" in
 def A4_andn    : T_ALU32_3op_not<"and", 0b001, 0b100, 1>;
-let BaseOpcode = "orn_rr", CextOpcode = "orn", isCodeGenOnly = 0 in
+let BaseOpcode = "orn_rr", CextOpcode = "orn" in
 def A4_orn     : T_ALU32_3op_not<"or",  0b001, 0b101, 1>;
 
-let CextOpcode = "rcmp.eq", isCodeGenOnly = 0 in
+let CextOpcode = "rcmp.eq" in
 def A4_rcmpeq  : T_ALU32_3op<"cmp.eq",  0b011, 0b010, 0, 1>;
-let CextOpcode = "!rcmp.eq", isCodeGenOnly = 0 in
+let CextOpcode = "!rcmp.eq" in
 def A4_rcmpneq : T_ALU32_3op<"!cmp.eq", 0b011, 0b011, 0, 1>;
 
-let isCodeGenOnly = 0 in {
 def C4_cmpneq  : T_ALU32_3op_cmp<"!cmp.eq",  0b00, 1, 1>;
 def C4_cmplte  : T_ALU32_3op_cmp<"!cmp.gt",  0b10, 1, 0>;
 def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>;
-}
 
 // Pats for instruction selection.
 
@@ -146,11 +156,15 @@ class CmpInReg<PatFrag Op>
 def: T_cmp32_rr_pat<A4_rcmpeq,  CmpInReg<seteq>, i32>;
 def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
 
+def: T_cmp32_rr_pat<C4_cmpneq,  setne,  i1>;
+def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>;
+
+def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>;
+
 class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm>
   : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
     "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>,
     ImmRegRel {
-  let validSubTargets = HasV4SubT;
   let InputType = "reg";
   let CextOpcode = mnemonic;
   let isCompare = 1;
@@ -169,13 +183,26 @@ class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm>
   let Inst{1-0} = Pd;
 }
 
-let isCodeGenOnly = 0 in {
 def A4_cmpbeq  : T_CMP_rrbh<"cmpb.eq",  0b110, 1>;
 def A4_cmpbgt  : T_CMP_rrbh<"cmpb.gt",  0b010, 0>;
 def A4_cmpbgtu : T_CMP_rrbh<"cmpb.gtu", 0b111, 0>;
 def A4_cmpheq  : T_CMP_rrbh<"cmph.eq",  0b011, 1>;
 def A4_cmphgt  : T_CMP_rrbh<"cmph.gt",  0b100, 0>;
 def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>;
+
+let AddedComplexity = 100 in {
+  def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+                       255), 0)),
+           (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
+  def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+                       255), 0)),
+           (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>;
+  def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+                           65535), 0)),
+           (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>;
+  def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+                           65535), 0)),
+           (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>;
 }
 
 class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
@@ -183,7 +210,6 @@ class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
   : ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm),
     "$Pd = "#mnemonic#"($Rs, #$Imm)", [], "", ALU64_tc_2early_SLOT23>,
     ImmRegRel {
-  let validSubTargets = HasV4SubT;
   let InputType = "imm";
   let CextOpcode = mnemonic;
   let isCompare = 1;
@@ -208,19 +234,17 @@ class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
   let Inst{1-0} = Pd;
 }
 
-let isCodeGenOnly = 0 in {
 def A4_cmpbeqi  : T_CMP_ribh<"cmpb.eq",  0b00, 0, 1, u8Imm, 0, 0, 8>;
 def A4_cmpbgti  : T_CMP_ribh<"cmpb.gt",  0b01, 0, 0, s8Imm, 0, 1, 8>;
 def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7Ext, 1, 0, 7>;
 def A4_cmpheqi  : T_CMP_ribh<"cmph.eq",  0b00, 1, 1, s8Ext, 1, 1, 8>;
 def A4_cmphgti  : T_CMP_ribh<"cmph.gt",  0b01, 1, 0, s8Ext, 1, 1, 8>;
 def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7Ext, 1, 0, 7>;
-}
+
 class T_RCMP_EQ_ri<string mnemonic, bit IsNeg>
   : ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8Ext:$s8),
     "$Rd = "#mnemonic#"($Rs, #$s8)", [], "", ALU32_2op_tc_1_SLOT0123>,
     ImmRegRel {
-  let validSubTargets = HasV4SubT;
   let InputType = "imm";
   let CextOpcode = !if (IsNeg, "!rcmp.eq", "rcmp.eq");
   let isExtendable = 1;
@@ -243,22 +267,19 @@ class T_RCMP_EQ_ri<string mnemonic, bit IsNeg>
   let Inst{4-0} = Rd;
 }
 
-let isCodeGenOnly = 0 in {
 def A4_rcmpeqi  : T_RCMP_EQ_ri<"cmp.eq",  0>;
 def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>;
-}
 
-def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s8ExtPred:$s8)))),
-         (A4_rcmpeqi IntRegs:$Rs, s8ExtPred:$s8)>;
-def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s8ExtPred:$s8)))),
-         (A4_rcmpneqi IntRegs:$Rs, s8ExtPred:$s8)>;
+def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
+         (A4_rcmpeqi IntRegs:$Rs, s32ImmPred:$s8)>;
+def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
+         (A4_rcmpneqi IntRegs:$Rs, s32ImmPred:$s8)>;
 
 // Preserve the S2_tstbit_r generation
 def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
                                          (i32 IntRegs:$src1))), 0)))),
          (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>;
 
-
 //===----------------------------------------------------------------------===//
 // ALU32 -
 //===----------------------------------------------------------------------===//
@@ -286,26 +307,23 @@ class T_Combine1 <bits<2> MajOp, dag ins, string AsmStr>
     let Inst{4-0}   = Rdd;
   }
 
-let opExtendable = 2, isCodeGenOnly = 0 in
+let opExtendable = 2 in
 def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8Ext:$s8),
                                     "$Rdd = combine($Rs, #$s8)">;
 
-let opExtendable = 1, isCodeGenOnly = 0 in
+let opExtendable = 1 in
 def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs),
                                     "$Rdd = combine(#$s8, $Rs)">;
 
-def HexagonWrapperCombineRI_V4 :
-  SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>;
-def HexagonWrapperCombineIR_V4 :
-  SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>;
+// The complexity of the combines involving immediates should be greater
+// than the complexity of the combine with two registers.
+let AddedComplexity = 50 in {
+def: Pat<(HexagonCOMBINE IntRegs:$r, s32ImmPred:$i),
+         (A4_combineri IntRegs:$r, s32ImmPred:$i)>;
 
-def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i),
-           (A4_combineri IntRegs:$r, s8ExtPred:$i)>,
-          Requires<[HasV4T]>;
-
-def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r),
-           (A4_combineir s8ExtPred:$i, IntRegs:$r)>,
-          Requires<[HasV4T]>;
+def: Pat<(HexagonCOMBINE s32ImmPred:$i, IntRegs:$r),
+         (A4_combineir s32ImmPred:$i, IntRegs:$r)>;
+}
 
 // A4_combineii: Set two small immediates.
 let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in
@@ -323,6 +341,12 @@ def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6),
     let Inst{4-0}   = Rdd;
   }
 
+// The complexity of the combine with two immediates should be greater than
+// the complexity of a combine involving a register.
+let AddedComplexity = 75 in
+def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6),
+         (A4_combineii imm:$s8, imm:$u6)>;
+
 //===----------------------------------------------------------------------===//
 // ALU32/PERM -
 //===----------------------------------------------------------------------===//
@@ -330,24 +354,182 @@ def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6),
 //===----------------------------------------------------------------------===//
 // LD +
 //===----------------------------------------------------------------------===//
+
+def Zext64: OutPatFrag<(ops node:$Rs),
+  (i64 (A4_combineir 0, (i32 $Rs)))>;
+def Sext64: OutPatFrag<(ops node:$Rs),
+  (i64 (A2_sxtw (i32 $Rs)))>;
+
+// Patterns to generate indexed loads with different forms of the address:
+// - frameindex,
+// - base + offset,
+// - base (without offset).
+multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
+                      PatLeaf ImmPred, InstHexagon MI> {
+  def: Pat<(VT (Load AddrFI:$fi)),
+           (VT (ValueMod (MI AddrFI:$fi, 0)))>;
+  def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))),
+           (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
+  def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))),
+           (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
+  def: Pat<(VT (Load (i32 IntRegs:$Rs))),
+           (VT (ValueMod (MI IntRegs:$Rs, 0)))>;
+}
+
+defm: Loadxm_pat<extloadi1,   i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi8,   i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi16,  i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<zextloadi1,  i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi8,  i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<sextloadi8,  i64, Sext64, s32_0ImmPred, L2_loadrb_io>;
+defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>;
+
+// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs).
+def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>;
+
 //===----------------------------------------------------------------------===//
 // Template class for load instructions with Absolute set addressing mode.
 //===----------------------------------------------------------------------===//
-let isExtended = 1, opExtendable = 2, hasSideEffects = 0,
-validSubTargets = HasV4SubT, addrMode = AbsoluteSet in
-class T_LD_abs_set<string mnemonic, RegisterClass RC>:
-            LDInst2<(outs RC:$dst1, IntRegs:$dst2),
-            (ins u0AlwaysExt:$addr),
-            "$dst1 = "#mnemonic#"($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
+let isExtended = 1, opExtendable = 2, opExtentBits = 6, addrMode = AbsoluteSet,
+    hasSideEffects = 0 in
+class T_LD_abs_set<string mnemonic, RegisterClass RC, bits<4>MajOp>:
+            LDInst<(outs RC:$dst1, IntRegs:$dst2),
+            (ins u6Ext:$addr),
+            "$dst1 = "#mnemonic#"($dst2 = #$addr)",
+            []> {
+  bits<7> name;
+  bits<5> dst1;
+  bits<5> dst2;
+  bits<6> addr;
+
+  let IClass = 0b1001;
+  let Inst{27-25} = 0b101;
+  let Inst{24-21} = MajOp;
+  let Inst{13-12} = 0b01;
+  let Inst{4-0}   = dst1;
+  let Inst{20-16} = dst2;
+  let Inst{11-8}  = addr{5-2};
+  let Inst{6-5}   = addr{1-0};
+}
+
+let accessSize = ByteAccess, hasNewValue = 1 in {
+  def L4_loadrb_ap   : T_LD_abs_set <"memb",   IntRegs, 0b1000>;
+  def L4_loadrub_ap  : T_LD_abs_set <"memub",  IntRegs, 0b1001>;
+}
+
+let accessSize = HalfWordAccess, hasNewValue = 1 in {
+  def L4_loadrh_ap  : T_LD_abs_set <"memh",  IntRegs, 0b1010>;
+  def L4_loadruh_ap : T_LD_abs_set <"memuh", IntRegs, 0b1011>;
+  def L4_loadbsw2_ap : T_LD_abs_set <"membh",  IntRegs, 0b0001>;
+  def L4_loadbzw2_ap : T_LD_abs_set <"memubh", IntRegs, 0b0011>;
+}
+
+let accessSize = WordAccess, hasNewValue = 1 in
+  def L4_loadri_ap : T_LD_abs_set <"memw", IntRegs, 0b1100>;
+
+let accessSize = WordAccess in {
+  def L4_loadbzw4_ap : T_LD_abs_set <"memubh", DoubleRegs, 0b0101>;
+  def L4_loadbsw4_ap : T_LD_abs_set <"membh",  DoubleRegs, 0b0111>;
+}
 
-def LDrid_abs_set_V4  : T_LD_abs_set <"memd", DoubleRegs>;
-def LDrib_abs_set_V4  : T_LD_abs_set <"memb", IntRegs>;
-def LDriub_abs_set_V4 : T_LD_abs_set <"memub", IntRegs>;
-def LDrih_abs_set_V4  : T_LD_abs_set <"memh", IntRegs>;
-def LDriw_abs_set_V4  : T_LD_abs_set <"memw", IntRegs>;
-def LDriuh_abs_set_V4 : T_LD_abs_set <"memuh", IntRegs>;
+let accessSize = DoubleWordAccess in
+def L4_loadrd_ap : T_LD_abs_set <"memd", DoubleRegs, 0b1110>;
+
+let accessSize = ByteAccess in
+  def L4_loadalignb_ap : T_LD_abs_set <"memb_fifo", DoubleRegs, 0b0100>;
+
+let accessSize = HalfWordAccess in
+def L4_loadalignh_ap : T_LD_abs_set <"memh_fifo", DoubleRegs, 0b0010>;
+
+// Load - Indirect with long offset
+let InputType = "imm", addrMode = BaseLongOffset, isExtended = 1,
+opExtentBits = 6, opExtendable = 3 in
+class T_LoadAbsReg <string mnemonic, string CextOp, RegisterClass RC,
+                    bits<4> MajOp>
+  : LDInst <(outs RC:$dst), (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3),
+  "$dst = "#mnemonic#"($src1<<#$src2 + #$src3)",
+  [] >, ImmRegShl {
+    bits<5> dst;
+    bits<5> src1;
+    bits<2> src2;
+    bits<6> src3;
+    let CextOpcode = CextOp;
+    let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
+
+    let IClass = 0b1001;
+    let Inst{27-25} = 0b110;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src1;
+    let Inst{13}    = src2{1};
+    let Inst{12}    = 0b1;
+    let Inst{11-8}  = src3{5-2};
+    let Inst{7}     = src2{0};
+    let Inst{6-5}   = src3{1-0};
+    let Inst{4-0}   = dst;
+  }
+
+let accessSize = ByteAccess in {
+  def L4_loadrb_ur  : T_LoadAbsReg<"memb",  "LDrib", IntRegs, 0b1000>;
+  def L4_loadrub_ur : T_LoadAbsReg<"memub", "LDriub", IntRegs, 0b1001>;
+  def L4_loadalignb_ur : T_LoadAbsReg<"memb_fifo", "LDrib_fifo",
+                                      DoubleRegs, 0b0100>;
+}
+
+let accessSize = HalfWordAccess in {
+  def L4_loadrh_ur   : T_LoadAbsReg<"memh",   "LDrih",    IntRegs, 0b1010>;
+  def L4_loadruh_ur  : T_LoadAbsReg<"memuh",  "LDriuh",   IntRegs, 0b1011>;
+  def L4_loadbsw2_ur : T_LoadAbsReg<"membh",  "LDribh2",  IntRegs, 0b0001>;
+  def L4_loadbzw2_ur : T_LoadAbsReg<"memubh", "LDriubh2", IntRegs, 0b0011>;
+  def L4_loadalignh_ur : T_LoadAbsReg<"memh_fifo", "LDrih_fifo",
+                                      DoubleRegs, 0b0010>;
+}
+
+let accessSize = WordAccess in {
+  def L4_loadri_ur   : T_LoadAbsReg<"memw", "LDriw", IntRegs, 0b1100>;
+  def L4_loadbsw4_ur : T_LoadAbsReg<"membh", "LDribh4", DoubleRegs, 0b0111>;
+  def L4_loadbzw4_ur : T_LoadAbsReg<"memubh", "LDriubh4", DoubleRegs, 0b0101>;
+}
+
+let accessSize = DoubleWordAccess in
+def L4_loadrd_ur  : T_LoadAbsReg<"memd", "LDrid", DoubleRegs, 0b1110>;
+
+
+multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> {
+  def  : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
+                             (HexagonCONST32 tglobaladdr:$src3)))),
+              (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3)>;
+  def  : Pat <(VT (ldOp (add IntRegs:$src1,
+                             (HexagonCONST32 tglobaladdr:$src2)))),
+              (MI IntRegs:$src1, 0, tglobaladdr:$src2)>;
+
+  def  : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
+                             (HexagonCONST32 tconstpool:$src3)))),
+              (MI IntRegs:$src1, u2ImmPred:$src2, tconstpool:$src3)>;
+  def  : Pat <(VT (ldOp (add IntRegs:$src1,
+                             (HexagonCONST32 tconstpool:$src2)))),
+              (MI IntRegs:$src1, 0, tconstpool:$src2)>;
+
+  def  : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
+                             (HexagonCONST32 tjumptable:$src3)))),
+              (MI IntRegs:$src1, u2ImmPred:$src2, tjumptable:$src3)>;
+  def  : Pat <(VT (ldOp (add IntRegs:$src1,
+                             (HexagonCONST32 tjumptable:$src2)))),
+              (MI IntRegs:$src1, 0, tjumptable:$src2)>;
+}
+
+let AddedComplexity  = 60 in {
+defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>;
+defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>;
+defm : T_LoadAbsReg_Pat <extloadi8,  L4_loadrub_ur>;
+
+defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>;
+defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>;
+defm : T_LoadAbsReg_Pat <extloadi16,  L4_loadruh_ur>;
+
+defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>;
+defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>;
+}
 
 //===----------------------------------------------------------------------===//
 // Template classes for the non-predicated load instructions with
@@ -430,217 +612,234 @@ multiclass ld_idxd_shl <string mnemonic, string CextOp, RegisterClass RC,
   }
 }
 
-let hasNewValue = 1, accessSize = ByteAccess, isCodeGenOnly = 0 in {
+let hasNewValue = 1, accessSize = ByteAccess in {
   defm loadrb  : ld_idxd_shl<"memb", "LDrib", IntRegs, 0b000>;
   defm loadrub : ld_idxd_shl<"memub", "LDriub", IntRegs, 0b001>;
 }
 
-let hasNewValue = 1, accessSize = HalfWordAccess, isCodeGenOnly = 0 in {
+let hasNewValue = 1, accessSize = HalfWordAccess in {
   defm loadrh  : ld_idxd_shl<"memh", "LDrih", IntRegs, 0b010>;
   defm loadruh : ld_idxd_shl<"memuh", "LDriuh", IntRegs, 0b011>;
 }
 
-let hasNewValue = 1, accessSize = WordAccess, isCodeGenOnly = 0 in
+let hasNewValue = 1, accessSize = WordAccess in
 defm loadri : ld_idxd_shl<"memw", "LDriw", IntRegs, 0b100>;
 
-let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess in
 defm loadrd  : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>;
 
 // 'def pats' for load instructions with base + register offset and non-zero
 // immediate value. Immediate value is used to left-shift the second
 // register operand.
+class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+  : Pat<(VT (Load (add (i32 IntRegs:$Rs),
+                       (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2))))),
+        (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
+
 let AddedComplexity = 40 in {
-def : Pat <(i32 (sextloadi8 (add IntRegs:$src1,
-                                 (shl IntRegs:$src2, u2ImmPred:$offset)))),
-           (L4_loadrb_rr IntRegs:$src1,
-            IntRegs:$src2, u2ImmPred:$offset)>,
-            Requires<[HasV4T]>;
-
-def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
-                                 (shl IntRegs:$src2, u2ImmPred:$offset)))),
-           (L4_loadrub_rr IntRegs:$src1,
-            IntRegs:$src2, u2ImmPred:$offset)>,
-            Requires<[HasV4T]>;
-
-def : Pat <(i32 (extloadi8 (add IntRegs:$src1,
-                                (shl IntRegs:$src2, u2ImmPred:$offset)))),
-           (L4_loadrub_rr IntRegs:$src1,
-            IntRegs:$src2, u2ImmPred:$offset)>,
-            Requires<[HasV4T]>;
-
-def : Pat <(i32 (sextloadi16 (add IntRegs:$src1,
-                                  (shl IntRegs:$src2, u2ImmPred:$offset)))),
-           (L4_loadrh_rr IntRegs:$src1,
-            IntRegs:$src2, u2ImmPred:$offset)>,
-            Requires<[HasV4T]>;
-
-def : Pat <(i32 (zextloadi16 (add IntRegs:$src1,
-                                  (shl IntRegs:$src2, u2ImmPred:$offset)))),
-           (L4_loadruh_rr IntRegs:$src1,
-            IntRegs:$src2, u2ImmPred:$offset)>,
-            Requires<[HasV4T]>;
-
-def : Pat <(i32 (extloadi16 (add IntRegs:$src1,
-                                 (shl IntRegs:$src2, u2ImmPred:$offset)))),
-           (L4_loadruh_rr IntRegs:$src1,
-            IntRegs:$src2, u2ImmPred:$offset)>,
-            Requires<[HasV4T]>;
-
-def : Pat <(i32 (load (add IntRegs:$src1,
-                           (shl IntRegs:$src2, u2ImmPred:$offset)))),
-           (L4_loadri_rr IntRegs:$src1,
-            IntRegs:$src2, u2ImmPred:$offset)>,
-            Requires<[HasV4T]>;
-
-def : Pat <(i64 (load (add IntRegs:$src1,
-                           (shl IntRegs:$src2, u2ImmPred:$offset)))),
-           (L4_loadrd_rr IntRegs:$src1,
-            IntRegs:$src2, u2ImmPred:$offset)>,
-            Requires<[HasV4T]>;
+  def: Loadxs_pat<extloadi8,   i32, L4_loadrub_rr>;
+  def: Loadxs_pat<zextloadi8,  i32, L4_loadrub_rr>;
+  def: Loadxs_pat<sextloadi8,  i32, L4_loadrb_rr>;
+  def: Loadxs_pat<extloadi16,  i32, L4_loadruh_rr>;
+  def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>;
+  def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>;
+  def: Loadxs_pat<load,        i32, L4_loadri_rr>;
+  def: Loadxs_pat<load,        i64, L4_loadrd_rr>;
 }
 
-
 // 'def pats' for load instruction base + register offset and
 // zero immediate value.
-let AddedComplexity = 10 in {
-def : Pat <(i64 (load (add IntRegs:$src1, IntRegs:$src2))),
-           (L4_loadrd_rr IntRegs:$src1, IntRegs:$src2, 0)>,
-            Requires<[HasV4T]>;
+class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+  : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))),
+        (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
+
+let AddedComplexity = 20 in {
+  def: Loadxs_simple_pat<extloadi8,   i32, L4_loadrub_rr>;
+  def: Loadxs_simple_pat<zextloadi8,  i32, L4_loadrub_rr>;
+  def: Loadxs_simple_pat<sextloadi8,  i32, L4_loadrb_rr>;
+  def: Loadxs_simple_pat<extloadi16,  i32, L4_loadruh_rr>;
+  def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>;
+  def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>;
+  def: Loadxs_simple_pat<load,        i32, L4_loadri_rr>;
+  def: Loadxs_simple_pat<load,        i64, L4_loadrd_rr>;
+}
 
-def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
-           (L4_loadrb_rr IntRegs:$src1, IntRegs:$src2, 0)>,
-            Requires<[HasV4T]>;
+// zext i1->i64
+def: Pat<(i64 (zext (i1 PredRegs:$src1))),
+         (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>;
+
+// zext i32->i64
+def: Pat<(i64 (zext (i32 IntRegs:$src1))),
+         (Zext64 IntRegs:$src1)>;
 
-def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
-           (L4_loadrub_rr IntRegs:$src1, IntRegs:$src2, 0)>,
-            Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
 
-def : Pat <(i32 (extloadi8 (add IntRegs:$src1, IntRegs:$src2))),
-           (L4_loadrub_rr IntRegs:$src1, IntRegs:$src2, 0)>,
-            Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+//===----------------------------------------------------------------------===//
+// Template class for store instructions with Absolute set addressing mode.
+//===----------------------------------------------------------------------===//
+let isExtended = 1, opExtendable = 1, opExtentBits = 6,
+    addrMode = AbsoluteSet, isNVStorable = 1 in
+class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC,
+                   bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
+  : STInst<(outs IntRegs:$dst),
+           (ins u6Ext:$addr, RC:$src),
+    mnemonic#"($dst = #$addr) = $src"#!if(isHalf, ".h","")>, NewValueRel {
+    bits<5> dst;
+    bits<6> addr;
+    bits<5> src;
+    let accessSize = AccessSz;
+    let BaseOpcode = BaseOp#"_AbsSet";
 
-def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
-           (L4_loadrh_rr IntRegs:$src1, IntRegs:$src2, 0)>,
-            Requires<[HasV4T]>;
+    let IClass = 0b1010;
 
-def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
-           (L4_loadruh_rr IntRegs:$src1, IntRegs:$src2, 0)>,
-            Requires<[HasV4T]>;
+    let Inst{27-24} = 0b1011;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = dst;
+    let Inst{13}    = 0b0;
+    let Inst{12-8}  = src;
+    let Inst{7}     = 0b1;
+    let Inst{5-0}   = addr;
+  }
 
-def : Pat <(i32 (extloadi16 (add IntRegs:$src1, IntRegs:$src2))),
-           (L4_loadruh_rr IntRegs:$src1, IntRegs:$src2, 0)>,
-            Requires<[HasV4T]>;
+def S4_storerb_ap : T_ST_absset <"memb", "STrib", IntRegs, 0b000, ByteAccess>;
+def S4_storerh_ap : T_ST_absset <"memh", "STrih", IntRegs, 0b010,
+                                 HalfWordAccess>;
+def S4_storeri_ap : T_ST_absset <"memw", "STriw", IntRegs, 0b100, WordAccess>;
 
-def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))),
-           (L4_loadri_rr IntRegs:$src1, IntRegs:$src2, 0)>,
-            Requires<[HasV4T]>;
+let isNVStorable = 0 in {
+  def S4_storerf_ap : T_ST_absset <"memh", "STrif", IntRegs,
+                                   0b011, HalfWordAccess, 1>;
+  def S4_storerd_ap : T_ST_absset <"memd", "STrid", DoubleRegs,
+                                   0b110, DoubleWordAccess>;
 }
 
-// zext i1->i64
-def : Pat <(i64 (zext (i1 PredRegs:$src1))),
-      (i64 (A4_combineir 0, (C2_muxii (i1 PredRegs:$src1), 1, 0)))>,
-      Requires<[HasV4T]>;
+let opExtendable = 1, isNewValue = 1, isNVStore = 1, opNewValue = 2,
+isExtended = 1, opExtentBits= 6 in
+class T_ST_absset_nv <string mnemonic, string BaseOp, bits<2> MajOp,
+                      MemAccessSize AccessSz >
+  : NVInst <(outs IntRegs:$dst),
+            (ins u6Ext:$addr, IntRegs:$src),
+    mnemonic#"($dst = #$addr) = $src.new">, NewValueRel {
+    bits<5> dst;
+    bits<6> addr;
+    bits<3> src;
+    let accessSize = AccessSz;
+    let BaseOpcode = BaseOp#"_AbsSet";
 
-// zext i32->i64
-def : Pat <(i64 (zext (i32 IntRegs:$src1))),
-      (i64 (A4_combineir 0, (i32 IntRegs:$src1)))>,
-      Requires<[HasV4T]>;
-// zext i8->i64
-def:  Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
-      (i64 (A4_combineir 0, (L2_loadrub_io AddrFI:$src1, 0)))>,
-      Requires<[HasV4T]>;
-
-let AddedComplexity = 20 in
-def:  Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
-                                s11_0ExtPred:$offset))),
-      (i64 (A4_combineir 0, (L2_loadrub_io IntRegs:$src1,
-                                  s11_0ExtPred:$offset)))>,
-      Requires<[HasV4T]>;
+    let IClass = 0b1010;
 
-// zext i1->i64
-def:  Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
-      (i64 (A4_combineir 0, (L2_loadrub_io AddrFI:$src1, 0)))>,
-      Requires<[HasV4T]>;
-
-let AddedComplexity = 20 in
-def:  Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
-                                s11_0ExtPred:$offset))),
-      (i64 (A4_combineir 0, (L2_loadrub_io IntRegs:$src1,
-                                  s11_0ExtPred:$offset)))>,
-      Requires<[HasV4T]>;
-
-// zext i16->i64
-def:  Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
-      (i64 (A4_combineir 0, (L2_loadruh_io AddrFI:$src1, 0)))>,
-      Requires<[HasV4T]>;
-
-let AddedComplexity = 20 in
-def:  Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
-                                  s11_1ExtPred:$offset))),
-      (i64 (A4_combineir 0, (L2_loadruh_io IntRegs:$src1,
-                                  s11_1ExtPred:$offset)))>,
-      Requires<[HasV4T]>;
-
-// anyext i16->i64
-def:  Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
-      (i64 (A4_combineir 0, (L2_loadrh_io AddrFI:$src1, 0)))>,
-      Requires<[HasV4T]>;
-
-let AddedComplexity = 20 in
-def:  Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
-                                  s11_1ExtPred:$offset))),
-      (i64 (A4_combineir 0, (L2_loadrh_io IntRegs:$src1,
-                                  s11_1ExtPred:$offset)))>,
-      Requires<[HasV4T]>;
+    let Inst{27-21} = 0b1011101;
+    let Inst{20-16} = dst;
+    let Inst{13-11} = 0b000;
+    let Inst{12-11} = MajOp;
+    let Inst{10-8}  = src;
+    let Inst{7}     = 0b1;
+    let Inst{5-0}   = addr;
+  }
 
-// zext i32->i64
-def:  Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
-      (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>,
-      Requires<[HasV4T]>;
+let mayStore = 1, addrMode = AbsoluteSet in {
+  def S4_storerbnew_ap : T_ST_absset_nv <"memb", "STrib", 0b00, ByteAccess>;
+  def S4_storerhnew_ap : T_ST_absset_nv <"memh", "STrih", 0b01, HalfWordAccess>;
+  def S4_storerinew_ap : T_ST_absset_nv <"memw", "STriw", 0b10, WordAccess>;
+}
 
-let AddedComplexity = 100 in
-def:  Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
-      (i64 (A4_combineir 0, (L2_loadri_io IntRegs:$src1,
-                                  s11_2ExtPred:$offset)))>,
-      Requires<[HasV4T]>;
+let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm",
+addrMode = BaseLongOffset, AddedComplexity = 40 in
+class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC,
+                     bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
+  : STInst<(outs),
+           (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, RC:$src4),
+   mnemonic#"($src1<<#$src2 + #$src3) = $src4"#!if(isHalf, ".h",""),
+   []>, ImmRegShl, NewValueRel {
+
+    bits<5> src1;
+    bits<2> src2;
+    bits<6> src3;
+    bits<5> src4;
 
-// anyext i32->i64
-def:  Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
-      (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>,
-      Requires<[HasV4T]>;
+    let accessSize = AccessSz;
+    let CextOpcode = CextOp;
+    let BaseOpcode = CextOp#"_shl";
+    let IClass = 0b1010;
 
-let AddedComplexity = 100 in
-def:  Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
-      (i64 (A4_combineir 0, (L2_loadri_io IntRegs:$src1,
-                                  s11_2ExtPred:$offset)))>,
-      Requires<[HasV4T]>;
+    let Inst{27-24} =0b1101;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = src1;
+    let Inst{13}    = src2{1};
+    let Inst{12-8}  = src4;
+    let Inst{7}     = 0b1;
+    let Inst{6}     = src2{0};
+    let Inst{5-0}   = src3;
+}
 
+def S4_storerb_ur : T_StoreAbsReg <"memb", "STrib", IntRegs, 0b000, ByteAccess>;
+def S4_storerh_ur : T_StoreAbsReg <"memh", "STrih", IntRegs, 0b010,
+                                   HalfWordAccess>;
+def S4_storerf_ur : T_StoreAbsReg <"memh", "STrif", IntRegs, 0b011,
+                                   HalfWordAccess, 1>;
+def S4_storeri_ur : T_StoreAbsReg <"memw", "STriw", IntRegs, 0b100, WordAccess>;
+def S4_storerd_ur : T_StoreAbsReg <"memd", "STrid", DoubleRegs, 0b110,
+                                   DoubleWordAccess>;
 
+let AddedComplexity = 40 in
+multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
+                           PatFrag stOp> {
+ def : Pat<(stOp (VT RC:$src4),
+                 (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+                      u32ImmPred:$src3)),
+          (MI IntRegs:$src1, u2ImmPred:$src2, u32ImmPred:$src3, RC:$src4)>;
 
-//===----------------------------------------------------------------------===//
-// LD -
-//===----------------------------------------------------------------------===//
+ def : Pat<(stOp (VT RC:$src4),
+                 (add (shl IntRegs:$src1, u2ImmPred:$src2),
+                      (HexagonCONST32 tglobaladdr:$src3))),
+           (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
 
-//===----------------------------------------------------------------------===//
-// ST +
-//===----------------------------------------------------------------------===//
-///
-//===----------------------------------------------------------------------===//
-// Template class for store instructions with Absolute set addressing mode.
-//===----------------------------------------------------------------------===//
-let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT,
-addrMode = AbsoluteSet in
-class T_ST_abs_set<string mnemonic, RegisterClass RC>:
-            STInst2<(outs IntRegs:$dst1),
-            (ins RC:$src1, u0AlwaysExt:$src2),
-            mnemonic#"($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
+ def : Pat<(stOp (VT RC:$src4),
+                 (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))),
+           (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
+}
 
-def STrid_abs_set_V4 : T_ST_abs_set <"memd", DoubleRegs>;
-def STrib_abs_set_V4 : T_ST_abs_set <"memb", IntRegs>;
-def STrih_abs_set_V4 : T_ST_abs_set <"memh", IntRegs>;
-def STriw_abs_set_V4 : T_ST_abs_set <"memw", IntRegs>;
+defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>;
+defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>;
+defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>;
+defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>;
+
+let mayStore = 1, isNVStore = 1, isExtended = 1, addrMode = BaseLongOffset,
+    opExtentBits = 6, isNewValue = 1, opNewValue = 3, opExtendable = 2 in
+class T_StoreAbsRegNV <string mnemonic, string CextOp, bits<2> MajOp,
+                       MemAccessSize AccessSz>
+  : NVInst <(outs ),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, IntRegs:$src4),
+  mnemonic#"($src1<<#$src2 + #$src3) = $src4.new">, NewValueRel {
+    bits<5> src1;
+    bits<2> src2;
+    bits<6> src3;
+    bits<3> src4;
+
+    let CextOpcode  = CextOp;
+    let BaseOpcode  = CextOp#"_shl";
+    let IClass      = 0b1010;
+
+    let Inst{27-21} = 0b1101101;
+    let Inst{12-11} = 0b00;
+    let Inst{7}     = 0b1;
+    let Inst{20-16} = src1;
+    let Inst{13}    = src2{1};
+    let Inst{12-11} = MajOp;
+    let Inst{10-8}  = src4;
+    let Inst{6}     = src2{0};
+    let Inst{5-0}   = src3;
+  }
+
+def S4_storerbnew_ur : T_StoreAbsRegNV <"memb", "STrib", 0b00, ByteAccess>;
+def S4_storerhnew_ur : T_StoreAbsRegNV <"memh", "STrih", 0b01, HalfWordAccess>;
+def S4_storerinew_ur : T_StoreAbsRegNV <"memw", "STriw", 0b10, WordAccess>;
 
 //===----------------------------------------------------------------------===//
 // Template classes for the non-predicated store instructions with
@@ -804,8 +1003,7 @@ multiclass ST_Idxd_shl_nv <string mnemonic, string CextOp, RegisterClass RC,
   }
 }
 
-let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0,
-    isCodeGenOnly = 0 in {
+let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0 in {
   let accessSize = ByteAccess in
   defm storerb: ST_Idxd_shl<"memb", "STrib", IntRegs, 0b000>,
                 ST_Idxd_shl_nv<"memb", "STrib", IntRegs, 0b00>;
@@ -825,83 +1023,18 @@ let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0,
   defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>;
 }
 
-let Predicates = [HasV4T], AddedComplexity = 10 in {
-def : Pat<(truncstorei8 (i32 IntRegs:$src4),
-                       (add IntRegs:$src1, (shl IntRegs:$src2,
-                                                u2ImmPred:$src3))),
-          (S4_storerb_rr IntRegs:$src1, IntRegs:$src2,
-                                u2ImmPred:$src3, IntRegs:$src4)>;
-
-def : Pat<(truncstorei16 (i32 IntRegs:$src4),
-                        (add IntRegs:$src1, (shl IntRegs:$src2,
-                                                 u2ImmPred:$src3))),
-          (S4_storerh_rr IntRegs:$src1, IntRegs:$src2,
-                                u2ImmPred:$src3, IntRegs:$src4)>;
-
-def : Pat<(store (i32 IntRegs:$src4),
-                 (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
-          (S4_storeri_rr IntRegs:$src1, IntRegs:$src2,
-                                u2ImmPred:$src3, IntRegs:$src4)>;
-
-def : Pat<(store (i64 DoubleRegs:$src4),
-                (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
-          (S4_storerd_rr IntRegs:$src1, IntRegs:$src2,
-                                u2ImmPred:$src3, DoubleRegs:$src4)>;
-}
-
-let isExtended = 1, opExtendable = 2 in
-class T_ST_LongOff <string mnemonic, PatFrag stOp, RegisterClass RC, ValueType VT> :
-            STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, RC:$src4),
-            mnemonic#"($src1<<#$src2+##$src3) = $src4",
-            [(stOp (VT RC:$src4),
-                    (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
-                         u0AlwaysExtPred:$src3))]>,
-            Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 2, mayStore = 1, isNVStore = 1 in
-class T_ST_LongOff_nv <string mnemonic> :
-            NVInst_V4<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
-            mnemonic#"($src1<<#$src2+##$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-multiclass ST_LongOff <string mnemonic, string BaseOp, PatFrag stOp> {
-  let  BaseOpcode = BaseOp#"_shl" in {
-    let isNVStorable = 1 in
-    def NAME#_V4 : T_ST_LongOff<mnemonic, stOp, IntRegs, i32>;
-
-    def NAME#_nv_V4 : T_ST_LongOff_nv<mnemonic>;
-  }
-}
-
-let AddedComplexity = 10, validSubTargets = HasV4SubT in {
-  def STrid_shl_V4 : T_ST_LongOff<"memd", store, DoubleRegs, i64>;
-  defm STrib_shl   : ST_LongOff <"memb", "STrib", truncstorei8>, NewValueRel;
-  defm STrih_shl   : ST_LongOff <"memh", "Strih", truncstorei16>, NewValueRel;
-  defm STriw_shl   : ST_LongOff <"memw", "STriw", store>, NewValueRel;
-}
-
-let AddedComplexity = 40 in
-multiclass T_ST_LOff_Pats <InstHexagon I, RegisterClass RC, ValueType VT,
-                           PatFrag stOp> {
- def : Pat<(stOp (VT RC:$src4),
-           (add (shl IntRegs:$src1, u2ImmPred:$src2),
-               (NumUsesBelowThresCONST32 tglobaladdr:$src3))),
-           (I IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
+class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+  : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs),
+                               (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2)))),
+        (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>;
 
- def : Pat<(stOp (VT RC:$src4),
-           (add IntRegs:$src1,
-               (NumUsesBelowThresCONST32 tglobaladdr:$src3))),
-           (I IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
+let AddedComplexity = 40 in {
+  def: Storexs_pat<truncstorei8,  I32, S4_storerb_rr>;
+  def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>;
+  def: Storexs_pat<store,         I32, S4_storeri_rr>;
+  def: Storexs_pat<store,         I64, S4_storerd_rr>;
 }
 
-defm : T_ST_LOff_Pats<STrid_shl_V4, DoubleRegs, i64, store>;
-defm : T_ST_LOff_Pats<STriw_shl_V4, IntRegs, i32, store>;
-defm : T_ST_LOff_Pats<STrib_shl_V4, IntRegs, i32, truncstorei8>;
-defm : T_ST_LOff_Pats<STrih_shl_V4, IntRegs, i32, truncstorei16>;
-
 // memd(Rx++#s4:3)=Rtt
 // memd(Rx++#s4:3:circ(Mu))=Rtt
 // memd(Rx++I:circ(Mu))=Rtt
@@ -1004,8 +1137,8 @@ multiclass ST_Imm <string mnemonic, string CextOp, Operand OffsetOp,
   }
 }
 
-let hasSideEffects = 0, validSubTargets = HasV4SubT, addrMode = BaseImmOffset,
-    InputType = "imm", isCodeGenOnly = 0 in {
+let hasSideEffects = 0, addrMode = BaseImmOffset,
+    InputType = "imm" in {
   let accessSize = ByteAccess in
   defm S4_storeirb : ST_Imm<"memb", "STrib", u6_0Imm, 0b00>;
 
@@ -1016,22 +1149,49 @@ let hasSideEffects = 0, validSubTargets = HasV4SubT, addrMode = BaseImmOffset,
   defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>;
 }
 
-let Predicates = [HasV4T], AddedComplexity = 10 in {
-def: Pat<(truncstorei8 s8ExtPred:$src3, (add IntRegs:$src1, u6_0ImmPred:$src2)),
-            (S4_storeirb_io IntRegs:$src1, u6_0ImmPred:$src2, s8ExtPred:$src3)>;
+def IMM_BYTE : SDNodeXForm<imm, [{
+  // -1 etc is  represented as 255 etc
+  // assigning to a byte restores our desired signed value.
+  int8_t imm = N->getSExtValue();
+  return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def IMM_HALF : SDNodeXForm<imm, [{
+  // -1 etc is  represented as 65535 etc
+  // assigning to a short restores our desired signed value.
+  int16_t imm = N->getSExtValue();
+  return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def IMM_WORD : SDNodeXForm<imm, [{
+  // -1 etc can be represented as 4294967295 etc
+  // Currently, it's not doing this. But some optimization
+  // might convert -1 to a large +ve number.
+  // assigning to a word restores our desired signed value.
+  int32_t imm = N->getSExtValue();
+  return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
 
-def: Pat<(truncstorei16 s8ExtPred:$src3, (add IntRegs:$src1,
-                                              u6_1ImmPred:$src2)),
-            (S4_storeirh_io IntRegs:$src1, u6_1ImmPred:$src2, s8ExtPred:$src3)>;
+def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>;
+def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>;
+def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>;
 
-def: Pat<(store s8ExtPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2)),
-            (S4_storeiri_io IntRegs:$src1, u6_2ImmPred:$src2, s8ExtPred:$src3)>;
+let AddedComplexity = 40 in {
+  // Not using frameindex patterns for these stores, because the offset
+  // is not extendable. This could cause problems during removing the frame
+  // indices, since the offset with respect to R29/R30 may not fit in the
+  // u6 field.
+  def: Storexm_add_pat<truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte,
+                       S4_storeirb_io>;
+  def: Storexm_add_pat<truncstorei16, s32ImmPred, u6_1ImmPred, ToImmHalf,
+                       S4_storeirh_io>;
+  def: Storexm_add_pat<store, s32ImmPred, u6_2ImmPred, ToImmWord,
+                       S4_storeiri_io>;
 }
 
-let AddedComplexity = 6 in
-def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
-           (S4_storeirb_io IntRegs:$src1, 0, s8ExtPred:$src2)>,
-           Requires<[HasV4T]>;
+def: Storexm_simple_pat<truncstorei8,  s32ImmPred, ToImmByte, S4_storeirb_io>;
+def: Storexm_simple_pat<truncstorei16, s32ImmPred, ToImmHalf, S4_storeirh_io>;
+def: Storexm_simple_pat<store,         s32ImmPred, ToImmWord, S4_storeiri_io>;
 
 // memb(Rx++#s4:0:circ(Mu))=Rt
 // memb(Rx++I:circ(Mu))=Rt
@@ -1039,16 +1199,10 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
 // memb(Rx++Mu:brev)=Rt
 // memb(gp+#u16:0)=Rt
 
-
 // Store halfword.
 // TODO: needs to be implemented
 // memh(Re=#U6)=Rt.H
 // memh(Rs+#s11:1)=Rt.H
-let AddedComplexity = 6 in
-def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
-           (S4_storeirh_io IntRegs:$src1, 0, s8ExtPred:$src2)>,
-           Requires<[HasV4T]>;
-
 // memh(Rs+Ru<<#u2)=Rt.H
 // TODO: needs to be implemented.
 
@@ -1065,7 +1219,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
 // if ([!]Pv[.new]) memh(#u6)=Rt.H
 // if ([!]Pv[.new]) memh(#u6)=Rt
 
-
 // if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
 // TODO: needs to be implemented.
 
@@ -1075,20 +1228,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
 // Store word.
 // memw(Re=#U6)=Rt
 // TODO: Needs to be implemented.
-
-// Store predicate:
-let hasSideEffects = 0 in
-def STriw_pred_V4 : STInst2<(outs),
-            (ins MEMri:$addr, PredRegs:$src1),
-            "Error; should not emit",
-            []>,
-            Requires<[HasV4T]>;
-
-let AddedComplexity = 6 in
-def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)),
-           (S4_storeiri_io IntRegs:$src1, 0, s8ExtPred:$src2)>,
-           Requires<[HasV4T]>;
-
 // memw(Rx++#s4:2)=Rt
 // memw(Rx++#s4:2:circ(Mu))=Rt
 // memw(Rx++I:circ(Mu))=Rt
@@ -1203,7 +1342,7 @@ multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC,
   }
 }
 
-let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in {
+let addrMode = BaseImmOffset, InputType = "imm" in {
   let accessSize = ByteAccess in
   defm storerb: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext,
                            u6_0Ext, 0b00>, AddrModeRel;
@@ -1218,11 +1357,45 @@ let addrMode = BaseImmOffset, InputType = "imm", isCodeGenOnly = 0 in {
 }
 
 //===----------------------------------------------------------------------===//
+// Post increment loads with register offset.
+//===----------------------------------------------------------------------===//
+
+let hasNewValue = 1 in
+def L2_loadbsw2_pr : T_load_pr <"membh", IntRegs, 0b0001, HalfWordAccess>;
+
+def L2_loadbsw4_pr : T_load_pr <"membh", DoubleRegs, 0b0111, WordAccess>;
+
+let hasSideEffects = 0, addrMode = PostInc in
+class T_loadalign_pr <string mnemonic, bits<4> MajOp, MemAccessSize AccessSz>
+  : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$_dst_),
+              (ins DoubleRegs:$src1, IntRegs:$src2, ModRegs:$src3),
+  "$dst = "#mnemonic#"($src2++$src3)", [],
+  "$src1 = $dst, $src2 = $_dst_"> {
+    bits<5> dst;
+    bits<5> src2;
+    bits<1> src3;
+
+    let accessSize = AccessSz;
+    let IClass = 0b1001;
+
+    let Inst{27-25} = 0b110;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src2;
+    let Inst{13}    = src3;
+    let Inst{12}    = 0b0;
+    let Inst{7}     = 0b0;
+    let Inst{4-0}   = dst;
+  }
+
+def L2_loadalignb_pr : T_loadalign_pr <"memb_fifo", 0b0100, ByteAccess>;
+def L2_loadalignh_pr : T_loadalign_pr <"memh_fifo", 0b0010, HalfWordAccess>;
+
+//===----------------------------------------------------------------------===//
 // Template class for non-predicated post increment .new stores
 // mem[bhwd](Rx++#s4:[0123])=Nt.new
 //===----------------------------------------------------------------------===//
-let isPredicable = 1, hasSideEffects = 0, validSubTargets = HasV4SubT,
-    addrMode = PostInc, isNVStore = 1, isNewValue = 1, opNewValue = 3 in
+let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1,
+    isNewValue = 1, opNewValue = 3 in
 class T_StorePI_nv <string mnemonic, Operand ImmOp, bits<2> MajOp >
   : NVInstPI_V4 <(outs IntRegs:$_dst_),
                  (ins IntRegs:$src1, ImmOp:$offset, IntRegs:$src2),
@@ -1254,8 +1427,8 @@ class T_StorePI_nv <string mnemonic, Operand ImmOp, bits<2> MajOp >
 // Template class for predicated post increment .new stores
 // if([!]Pv[.new]) mem[bhwd](Rx++#s4:[0123])=Nt.new
 //===----------------------------------------------------------------------===//
-let isPredicated = 1, hasSideEffects = 0, validSubTargets = HasV4SubT,
-    addrMode = PostInc, isNVStore = 1, isNewValue = 1, opNewValue = 4 in
+let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1,
+    isNewValue = 1, opNewValue = 4 in
 class T_StorePI_nv_pred <string mnemonic, Operand ImmOp,
                          bits<2> MajOp, bit isPredNot, bit isPredNew >
   : NVInstPI_V4 <(outs IntRegs:$_dst_),
@@ -1310,13 +1483,13 @@ multiclass ST_PostInc_nv<string mnemonic, string BaseOp, Operand ImmOp,
   }
 }
 
-let accessSize = ByteAccess, isCodeGenOnly = 0 in
+let accessSize = ByteAccess in
 defm storerbnew: ST_PostInc_nv <"memb", "STrib", s4_0Imm, 0b00>;
 
-let accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+let accessSize = HalfWordAccess in
 defm storerhnew: ST_PostInc_nv <"memh", "STrih", s4_1Imm, 0b01>;
 
-let accessSize = WordAccess, isCodeGenOnly = 0 in
+let accessSize = WordAccess in
 defm storerinew: ST_PostInc_nv <"memw", "STriw", s4_2Imm, 0b10>;
 
 //===----------------------------------------------------------------------===//
@@ -1343,15 +1516,12 @@ class T_StorePI_RegNV <string mnemonic, bits<2> MajOp, MemAccessSize AccessSz>
     let Inst{7}     = 0b0;
   }
 
-let isCodeGenOnly = 0 in {
 def S2_storerbnew_pr : T_StorePI_RegNV<"memb", 0b00, ByteAccess>;
 def S2_storerhnew_pr : T_StorePI_RegNV<"memh", 0b01, HalfWordAccess>;
 def S2_storerinew_pr : T_StorePI_RegNV<"memw", 0b10, WordAccess>;
-}
 
 // memb(Rx++#s4:0:circ(Mu))=Nt.new
 // memb(Rx++I:circ(Mu))=Nt.new
-// memb(Rx++Mu)=Nt.new
 // memb(Rx++Mu:brev)=Nt.new
 // memh(Rx++#s4:1:circ(Mu))=Nt.new
 // memh(Rx++I:circ(Mu))=Nt.new
@@ -1401,7 +1571,7 @@ class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
       let RegOp = !if(!eq(NvOpNum, 0), src2, src1);
 
       let IClass = 0b0010;
-      let Inst{26} = 0b0;
+      let Inst{27-26} = 0b00;
       let Inst{25-23} = majOp;
       let Inst{22} = isNegCond;
       let Inst{18-16} = Ns;
@@ -1415,9 +1585,9 @@ class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
 multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum,
                        bit isNegCond> {
   // Branch not taken:
-  def _nt_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>;
+  def _nt: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>;
   // Branch taken:
-  def _t_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>;
+  def _t : NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>;
 }
 
 // NvOpNum = 0 -> First Operand is a new-value Register
@@ -1426,8 +1596,8 @@ multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum,
 multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp,
                        bit NvOpNum> {
   let BaseOpcode = BaseOp#_NVJ in {
-    defm _t_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond
-    defm _f_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond
+    defm _t_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond
+    defm _f_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond
   }
 }
 
@@ -1438,13 +1608,12 @@ multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp,
 // if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2
 
 let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
-    Defs = [PC], hasSideEffects = 0, validSubTargets = HasV4SubT,
-    isCodeGenOnly = 0 in {
-  defm CMPEQrr  : NVJrr_base<"cmp.eq",  "CMPEQ",  0b000, 0>, PredRel;
-  defm CMPGTrr  : NVJrr_base<"cmp.gt",  "CMPGT",  0b001, 0>, PredRel;
-  defm CMPGTUrr : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel;
-  defm CMPLTrr  : NVJrr_base<"cmp.gt",  "CMPLT",  0b011, 1>, PredRel;
-  defm CMPLTUrr : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel;
+    Defs = [PC], hasSideEffects = 0 in {
+  defm J4_cmpeq  : NVJrr_base<"cmp.eq",  "CMPEQ",  0b000, 0>, PredRel;
+  defm J4_cmpgt  : NVJrr_base<"cmp.gt",  "CMPGT",  0b001, 0>, PredRel;
+  defm J4_cmpgtu : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel;
+  defm J4_cmplt  : NVJrr_base<"cmp.gt",  "CMPLT",  0b011, 1>, PredRel;
+  defm J4_cmpltu : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1482,15 +1651,15 @@ class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond,
 
 multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> {
   // Branch not taken:
-  def _nt_V4: NVJri_template<mnemonic, majOp, isNegCond, 0>;
+  def _nt: NVJri_template<mnemonic, majOp, isNegCond, 0>;
   // Branch taken:
-  def _t_V4: NVJri_template<mnemonic, majOp, isNegCond, 1>;
+  def _t : NVJri_template<mnemonic, majOp, isNegCond, 1>;
 }
 
 multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> {
   let BaseOpcode = BaseOp#_NVJri in {
-    defm _t_Jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond
-    defm _f_Jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond
+    defm _t_jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond
+    defm _f_jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond
   }
 }
 
@@ -1499,11 +1668,10 @@ multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> {
 // if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2
 
 let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
-    Defs = [PC], hasSideEffects = 0, validSubTargets = HasV4SubT,
-    isCodeGenOnly = 0 in {
-  defm CMPEQri  : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel;
-  defm CMPGTri  : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel;
-  defm CMPGTUri : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel;
+    Defs = [PC], hasSideEffects = 0 in {
+  defm J4_cmpeqi  : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel;
+  defm J4_cmpgti  : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel;
+  defm J4_cmpgtui : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1540,16 +1708,16 @@ class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal,
 multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal,
                              bit isNegCond> {
   // Branch not taken:
-  def _nt_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>;
+  def _nt: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>;
   // Branch taken:
-  def _t_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>;
+  def _t : NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>;
 }
 
 multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
                              string ImmVal> {
   let BaseOpcode = BaseOp#_NVJ_ConstImm in {
-    defm _t_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True
-    defm _f_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False
+    defm _t_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True
+    defm _f_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False
   }
 }
 
@@ -1558,14 +1726,14 @@ multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
 // if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2
 
 let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1,
-    Defs = [PC], hasSideEffects = 0, isCodeGenOnly = 0 in {
-  defm TSTBIT0  : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel;
-  defm CMPEQn1  : NVJ_ConstImm_base<"cmp.eq", "CMPEQ",  0b100, "-1">, PredRel;
-  defm CMPGTn1  : NVJ_ConstImm_base<"cmp.gt", "CMPGT",  0b101, "-1">, PredRel;
+    Defs = [PC], hasSideEffects = 0 in {
+  defm J4_tstbit0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel;
+  defm J4_cmpeqn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ",  0b100, "-1">, PredRel;
+  defm J4_cmpgtn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT",  0b101, "-1">, PredRel;
 }
 
 // J4_hintjumpr: Hint indirect conditional jump.
-let isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+let isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
 def J4_hintjumpr: JRInst <
   (outs),
   (ins IntRegs:$Rs),
@@ -1586,8 +1754,7 @@ def J4_hintjumpr: JRInst <
 
 // PC-relative add
 let hasNewValue = 1, isExtendable = 1, opExtendable = 1,
-    isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0,
-    Uses = [PC], validSubTargets = HasV4SubT in
+    isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0, Uses = [PC] in
 def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6Ext:$u6),
   "$Rd = add(pc, #$u6)", [], "", CR_tc_2_SLOT3 > {
     bits<5> Rd;
@@ -1625,7 +1792,6 @@ class T_LOGICAL_3OP<string MnOp1, string MnOp2, bits<2> OpBits, bit IsNeg>
   let Inst{1-0} = Pd;
 }
 
-let isCodeGenOnly = 0 in {
 def C4_and_and  : T_LOGICAL_3OP<"and", "and", 0b00, 0>;
 def C4_and_or   : T_LOGICAL_3OP<"and", "or",  0b01, 0>;
 def C4_or_and   : T_LOGICAL_3OP<"or",  "and", 0b10, 0>;
@@ -1634,7 +1800,69 @@ def C4_and_andn : T_LOGICAL_3OP<"and", "and", 0b00, 1>;
 def C4_and_orn  : T_LOGICAL_3OP<"and", "or",  0b01, 1>;
 def C4_or_andn  : T_LOGICAL_3OP<"or",  "and", 0b10, 1>;
 def C4_or_orn   : T_LOGICAL_3OP<"or",  "or",  0b11, 1>;
-}
+
+// op(Ps, op(Pt, Pu))
+class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
+  : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))),
+        (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
+
+// op(Ps, op(Pt, ~Pu))
+class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
+  : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))),
+        (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
+
+def: LogLog_pat<and, and, C4_and_and>;
+def: LogLog_pat<and, or,  C4_and_or>;
+def: LogLog_pat<or,  and, C4_or_and>;
+def: LogLog_pat<or,  or,  C4_or_or>;
+
+def: LogLogNot_pat<and, and, C4_and_andn>;
+def: LogLogNot_pat<and, or,  C4_and_orn>;
+def: LogLogNot_pat<or,  and, C4_or_andn>;
+def: LogLogNot_pat<or,  or,  C4_or_orn>;
+
+//===----------------------------------------------------------------------===//
+// PIC: Support for PIC compilations. The patterns and SD nodes defined
+// below are needed to support code generation for PIC
+//===----------------------------------------------------------------------===//
+
+def SDT_HexagonPICAdd
+  : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDT_HexagonGOTAdd
+  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def SDT_HexagonGOTAddInternal   : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+def SDT_HexagonGOTAddInternalJT : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+def SDT_HexagonGOTAddInternalBA : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+
+def Hexagonpic_add      : SDNode<"HexagonISD::PIC_ADD", SDT_HexagonPICAdd>;
+def Hexagonat_got       : SDNode<"HexagonISD::AT_GOT", SDT_HexagonGOTAdd>;
+def Hexagongat_pcrel    : SDNode<"HexagonISD::AT_PCREL",
+                                 SDT_HexagonGOTAddInternal>;
+def Hexagongat_pcrel_jt : SDNode<"HexagonISD::AT_PCREL",
+                                 SDT_HexagonGOTAddInternalJT>;
+def Hexagongat_pcrel_ba : SDNode<"HexagonISD::AT_PCREL",
+                                 SDT_HexagonGOTAddInternalBA>;
+
+// PIC: Map from a block address computation to a PC-relative add
+def: Pat<(Hexagongat_pcrel_ba tblockaddress:$src1),
+         (C4_addipc u32ImmPred:$src1)>;
+
+// PIC: Map from the computation to generate a GOT pointer to a PC-relative add
+def: Pat<(Hexagonpic_add texternalsym:$src1),
+         (C4_addipc u32ImmPred:$src1)>;
+
+// PIC: Map from a jump table address computation to a PC-relative add
+def: Pat<(Hexagongat_pcrel_jt tjumptable:$src1),
+         (C4_addipc u32ImmPred:$src1)>;
+
+// PIC: Map from a GOT-relative symbol reference to a load
+def: Pat<(Hexagonat_got (i32 IntRegs:$src1), tglobaladdr:$src2),
+         (L2_loadri_io IntRegs:$src1, s30_2ImmPred:$src2)>;
+
+// PIC: Map from a static symbol reference to a PC-relative add
+def: Pat<(Hexagongat_pcrel tglobaladdr:$src1),
+         (C4_addipc u32ImmPred:$src1)>;
 
 //===----------------------------------------------------------------------===//
 // CR -
@@ -1645,12 +1873,15 @@ def C4_or_orn   : T_LOGICAL_3OP<"or",  "or",  0b11, 1>;
 //===----------------------------------------------------------------------===//
 
 // Logical with-not instructions.
-let validSubTargets = HasV4SubT, isCodeGenOnly = 0 in {
-  def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>;
-  def A4_ornp  : T_ALU64_logical<"or",  0b011, 1, 0, 1>;
-}
+def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>;
+def A4_ornp  : T_ALU64_logical<"or",  0b011, 1, 0, 1>;
+
+def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
+         (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+def: Pat<(i64 (or  (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
+         (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
 
-let hasNewValue = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasNewValue = 1, hasSideEffects = 0 in
 def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
       "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
   bits<5> Rd;
@@ -1663,15 +1894,16 @@ def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
   let Inst{12-8} = Rt;
   let Inst{4-0} = Rd;
 }
+
 //  Add and accumulate.
 //  Rd=add(Rs,add(Ru,#s6))
 let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 6,
-    opExtendable = 3, isCodeGenOnly = 0 in
+    opExtendable = 3 in
 def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
                             (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6),
   "$Rd = add($Rs, add($Ru, #$s6))" ,
   [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs),
-                           (add (i32 IntRegs:$Ru), s6_16ExtPred:$s6)))],
+                           (add (i32 IntRegs:$Ru), s16_16ImmPred:$s6)))],
   "", ALU64_tc_2_SLOT23> {
     bits<5> Rd;
     bits<5> Rs;
@@ -1690,7 +1922,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
   }
 
 let isExtentSigned = 1, hasSideEffects = 0, hasNewValue = 1, isExtendable = 1,
-    opExtentBits = 6, opExtendable = 2, isCodeGenOnly = 0 in
+    opExtentBits = 6, opExtendable = 2 in
 def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd),
                            (ins IntRegs:$Rs, s6Ext:$s6, IntRegs:$Ru),
   "$Rd = add($Rs, sub(#$s6, $Ru))",
@@ -1710,31 +1942,64 @@ def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd),
     let Inst{7-5}   = s6{2-0};
     let Inst{4-0}   = Ru;
   }
-  
+
+// Rd=add(Rs,sub(#s6,Ru))
+def: Pat<(add (i32 IntRegs:$src1), (sub s32ImmPred:$src2,
+                                        (i32 IntRegs:$src3))),
+         (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
+
+// Rd=sub(add(Rs,#s6),Ru)
+def: Pat<(sub (add (i32 IntRegs:$src1), s32ImmPred:$src2),
+                   (i32 IntRegs:$src3)),
+         (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
+
+// Rd=add(sub(Rs,Ru),#s6)
+def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)),
+                   (s32ImmPred:$src2)),
+         (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
+
+
+//  Add or subtract doublewords with carry.
+//TODO:
+//  Rdd=add(Rss,Rtt,Px):carry
+//TODO:
+//  Rdd=sub(Rss,Rtt,Px):carry
+
 // Extract bitfield
 // Rdd=extract(Rss,#u6,#U6)
 // Rdd=extract(Rss,Rtt)
 // Rd=extract(Rs,Rtt)
 // Rd=extract(Rs,#u5,#U5)
 
-let isCodeGenOnly = 0 in {
 def S4_extractp_rp : T_S3op_64 < "extract",  0b11, 0b100, 0>;
 def S4_extractp    : T_S2op_extract <"extract",  0b1010, DoubleRegs, u6Imm>;
-}
 
-let hasNewValue = 1, isCodeGenOnly = 0 in {
+let hasNewValue = 1 in {
   def S4_extract_rp : T_S3op_extract<"extract",  0b01>;
   def S4_extract    : T_S2op_extract <"extract",  0b1101, IntRegs, u5Imm>;
 }
 
-let Itinerary = M_tc_3x_SLOT23, Defs = [USR_OVF], isCodeGenOnly = 0 in {
+// Complex add/sub halfwords/words
+let Defs = [USR_OVF] in {
+  def S4_vxaddsubh : T_S3op_64 < "vxaddsubh", 0b01, 0b100, 0, 1>;
+  def S4_vxaddsubw : T_S3op_64 < "vxaddsubw", 0b01, 0b000, 0, 1>;
+  def S4_vxsubaddh : T_S3op_64 < "vxsubaddh", 0b01, 0b110, 0, 1>;
+  def S4_vxsubaddw : T_S3op_64 < "vxsubaddw", 0b01, 0b010, 0, 1>;
+}
+
+let Defs = [USR_OVF] in {
+  def S4_vxaddsubhr : T_S3op_64 < "vxaddsubh", 0b11, 0b000, 0, 1, 1, 1>;
+  def S4_vxsubaddhr : T_S3op_64 < "vxsubaddh", 0b11, 0b010, 0, 1, 1, 1>;
+}
+
+let Itinerary = M_tc_3x_SLOT23, Defs = [USR_OVF] in {
   def M4_mac_up_s1_sat: T_MType_acc_rr<"+= mpy", 0b011, 0b000, 0, [], 0, 1, 1>;
   def M4_nac_up_s1_sat: T_MType_acc_rr<"-= mpy", 0b011, 0b001, 0, [], 0, 1, 1>;
 }
 
 // Logical xor with xor accumulation.
 // Rxx^=xor(Rss,Rtt)
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
 def M4_xor_xacc
   : SInst <(outs DoubleRegs:$Rxx),
            (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
@@ -1749,36 +2014,102 @@ def M4_xor_xacc
 
     let IClass = 0b1100;
 
-    let Inst{27-23} = 0b10101;
+    let Inst{27-22} = 0b101010;
     let Inst{20-16} = Rss;
     let Inst{12-8}  = Rtt;
+    let Inst{7-5}   = 0b000;
     let Inst{4-0}   = Rxx;
   }
-  
+
+// Rotate and reduce bytes
+// Rdd=vrcrotate(Rss,Rt,#u2)
+let hasSideEffects = 0 in
+def S4_vrcrotate
+  : SInst <(outs DoubleRegs:$Rdd),
+           (ins DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2),
+  "$Rdd = vrcrotate($Rss, $Rt, #$u2)",
+  [], "", S_3op_tc_3x_SLOT23> {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rt;
+    bits<2> u2;
+
+    let IClass = 0b1100;
+
+    let Inst{27-22} = 0b001111;
+    let Inst{20-16} = Rss;
+    let Inst{13}    = u2{1};
+    let Inst{12-8}  = Rt;
+    let Inst{7-6}   = 0b11;
+    let Inst{5}     = u2{0};
+    let Inst{4-0}   = Rdd;
+  }
+
+// Rotate and reduce bytes with accumulation
+// Rxx+=vrcrotate(Rss,Rt,#u2)
+let hasSideEffects = 0 in
+def S4_vrcrotate_acc
+  : SInst <(outs DoubleRegs:$Rxx),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2),
+  "$Rxx += vrcrotate($Rss, $Rt, #$u2)", [],
+  "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Rt;
+    bits<2> u2;
+
+    let IClass = 0b1100;
+
+    let Inst{27-21} = 0b1011101;
+    let Inst{20-16} = Rss;
+    let Inst{13}    = u2{1};
+    let Inst{12-8}  = Rt;
+    let Inst{5}     = u2{0};
+    let Inst{4-0}   = Rxx;
+  }
+
+// Vector reduce conditional negate halfwords
+let hasSideEffects = 0 in
+def S2_vrcnegh
+  : SInst <(outs DoubleRegs:$Rxx),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt),
+  "$Rxx += vrcnegh($Rss, $Rt)", [],
+  "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Rt;
+
+    let IClass = 0b1100;
+
+    let Inst{27-21} = 0b1011001;
+    let Inst{20-16} = Rss;
+    let Inst{13}    = 0b1;
+    let Inst{12-8}  = Rt;
+    let Inst{7-5}   = 0b111;
+    let Inst{4-0}   = Rxx;
+  }
+
 // Split bitfield
-let isCodeGenOnly = 0 in
 def A4_bitspliti : T_S2op_2_di <"bitsplit", 0b110, 0b100>;
 
 // Arithmetic/Convergent round
-let isCodeGenOnly = 0 in
 def A4_cround_ri : T_S2op_2_ii <"cround", 0b111, 0b000>;
 
-let isCodeGenOnly = 0 in
 def A4_round_ri  : T_S2op_2_ii <"round", 0b111, 0b100>;
 
-let Defs = [USR_OVF], isCodeGenOnly = 0 in
+let Defs = [USR_OVF] in
 def A4_round_ri_sat : T_S2op_2_ii <"round", 0b111, 0b110, 1>;
 
 // Logical-logical words.
 // Compound or-and -- Rx=or(Ru,and(Rx,#s10))
 let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 10,
-    opExtendable = 3, isCodeGenOnly = 0 in
+    opExtendable = 3 in
 def S4_or_andix:
   ALU64Inst<(outs IntRegs:$Rx),
             (ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10),
   "$Rx = or($Ru, and($_src_, #$s10))" ,
   [(set (i32 IntRegs:$Rx),
-        (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s10ExtPred:$s10)))] ,
+        (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32ImmPred:$s10)))] ,
   "$_src_ = $Rx", ALU64_tc_2_SLOT23> {
     bits<5> Rx;
     bits<5> Ru;
@@ -1795,7 +2126,7 @@ def S4_or_andix:
 
 // Miscellaneous ALU64 instructions.
 //
-let hasNewValue = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasNewValue = 1, hasSideEffects = 0 in
 def A4_modwrapu: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
       "$Rd = modwrap($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
   bits<5> Rd;
@@ -1810,7 +2141,7 @@ def A4_modwrapu: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
   let Inst{4-0} = Rd;
 }
 
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
 def A4_bitsplit: ALU64Inst<(outs DoubleRegs:$Rd),
       (ins IntRegs:$Rs, IntRegs:$Rt),
       "$Rd = bitsplit($Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
@@ -1826,7 +2157,54 @@ def A4_bitsplit: ALU64Inst<(outs DoubleRegs:$Rd),
   let Inst{4-0} = Rd;
 }
 
-let isCodeGenOnly = 0 in {
+let hasSideEffects = 0 in
+def dep_S2_packhl: ALU64Inst<(outs DoubleRegs:$Rd),
+      (ins IntRegs:$Rs, IntRegs:$Rt),
+      "$Rd = packhl($Rs, $Rt):deprecated", [], "", ALU64_tc_1_SLOT23> {
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+  let Inst{27-24} = 0b0100;
+  let Inst{21} = 0b0;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{4-0} = Rd;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+def dep_A2_addsat: ALU64Inst<(outs IntRegs:$Rd),
+      (ins IntRegs:$Rs, IntRegs:$Rt),
+      "$Rd = add($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> {
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+  let Inst{27-21} = 0b0101100;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{7} = 0b0;
+  let Inst{4-0} = Rd;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+def dep_A2_subsat: ALU64Inst<(outs IntRegs:$Rd),
+      (ins IntRegs:$Rs, IntRegs:$Rt),
+      "$Rd = sub($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> {
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+  let Inst{27-21} = 0b0101100;
+  let Inst{20-16} = Rt;
+  let Inst{12-8} = Rs;
+  let Inst{7} = 0b1;
+  let Inst{4-0} = Rd;
+}
+
 // Rx[&|]=xor(Rs,Rt)
 def M4_or_xor   : T_MType_acc_rr < "|= xor", 0b110, 0b001, 0>;
 def M4_and_xor  : T_MType_acc_rr < "&= xor", 0b010, 0b010, 0>;
@@ -1849,7 +2227,24 @@ def M4_and_and  : T_MType_acc_rr < "&= and", 0b010, 0b000, 0>;
 def M4_xor_andn : T_MType_acc_rr < "^= and", 0b001, 0b010, 0, [], 1>;
 def M4_or_andn  : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>;
 def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>;
-}
+
+def: T_MType_acc_pat2 <M4_or_xor, xor, or>;
+def: T_MType_acc_pat2 <M4_and_xor, xor, and>;
+def: T_MType_acc_pat2 <M4_or_and, and, or>;
+def: T_MType_acc_pat2 <M4_and_and, and, and>;
+def: T_MType_acc_pat2 <M4_xor_and, and, xor>;
+def: T_MType_acc_pat2 <M4_or_or, or, or>;
+def: T_MType_acc_pat2 <M4_and_or, or, and>;
+def: T_MType_acc_pat2 <M4_xor_or, or, xor>;
+
+class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp>
+  : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2,
+                                              (not IntRegs:$src3)))),
+         (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>;
+
+def: T_MType_acc_pat3 <M4_or_andn, and, or>;
+def: T_MType_acc_pat3 <M4_and_andn, and, and>;
+def: T_MType_acc_pat3 <M4_xor_andn, and, xor>;
 
 // Compound or-or and or-and
 let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1,
@@ -1859,7 +2254,7 @@ class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode>
                (ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10),
   "$Rx |= "#mnemonic#"($Rs, #$s10)",
   [(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1),
-                           (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10)))],
+                           (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10)))],
   "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel {
     bits<5> Rx;
     bits<5> Rs;
@@ -1875,10 +2270,10 @@ class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode>
     let Inst{4-0}   = Rx;
   }
 
-let CextOpcode = "ORr_ANDr", isCodeGenOnly = 0 in
+let CextOpcode = "ORr_ANDr" in
 def S4_or_andi : T_CompOR <"and", 0b00, and>;
 
-let CextOpcode = "ORr_ORr", isCodeGenOnly = 0 in
+let CextOpcode = "ORr_ORr" in
 def S4_or_ori : T_CompOR <"or", 0b10, or>;
 
 //    Modulo wrap
@@ -1923,22 +2318,33 @@ def S4_or_ori : T_CompOR <"or", 0b10, or>;
 //===----------------------------------------------------------------------===//
 
 // Bit reverse
-let isCodeGenOnly = 0 in
 def S2_brevp : T_S2op_3 <"brev", 0b11, 0b110>;
 
 // Bit count
-let isCodeGenOnly = 0 in {
 def S2_ct0p : T_COUNT_LEADING_64<"ct0", 0b111, 0b010>;
 def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>;
 def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>;
-}
 
-def: Pat<(i32 (trunc (cttz (i64 DoubleRegs:$Rss)))),
-         (S2_ct0p (i64 DoubleRegs:$Rss))>;
-def: Pat<(i32 (trunc (cttz (not (i64 DoubleRegs:$Rss))))),
-         (S2_ct1p (i64 DoubleRegs:$Rss))>;
+// Count trailing zeros: 64-bit.
+def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
+def: Pat<(i32 (trunc (cttz_zero_undef I64:$Rss))), (S2_ct0p I64:$Rss)>;
+
+// Count trailing ones: 64-bit.
+def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
+def: Pat<(i32 (trunc (cttz_zero_undef (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
+
+// Define leading/trailing patterns that require zero-extensions to 64 bits.
+def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>;
+def: Pat<(i64 (ctlz_zero_undef I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>;
+def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>;
+def: Pat<(i64 (cttz_zero_undef I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>;
+def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>;
+def: Pat<(i64 (ctlz_zero_undef (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>;
+def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>;
+def: Pat<(i64 (cttz_zero_undef (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>;
 
-let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+
+let hasSideEffects = 0, hasNewValue = 1 in
 def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6),
     "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
   bits<5> Rs;
@@ -1953,7 +2359,7 @@ def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6),
   let Inst{4-0} = Rd;
 }
 
-let hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+let hasSideEffects = 0, hasNewValue = 1 in
 def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6),
     "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
   bits<5> Rs;
@@ -1970,10 +2376,8 @@ def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6),
 
 
 // Bit test/set/clear
-let isCodeGenOnly = 0 in {
 def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>;
 def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>;
-}
 
 let AddedComplexity = 20 in {   // Complexity greater than cmp reg-imm.
   def: Pat<(i1 (seteq (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
@@ -1993,11 +2397,9 @@ let AddedComplexity = 100 in
 def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
          (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
 
-let isCodeGenOnly = 0 in {
 def C4_nbitsset  : T_TEST_BITS_REG<"!bitsset", 0b01, 1>;
 def C4_nbitsclr  : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>;
 def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>;
-}
 
 // Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
 // represented as a compare against "value & 0xFF", which is an exact match
@@ -2022,14 +2424,13 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
 
 // Rd=add(#u6,mpyi(Rs,#U6)) -- Multiply by immed and add immed.
 
-let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1,
-    isCodeGenOnly = 0 in
+let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
 def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
   (ins u6Ext:$u6, IntRegs:$Rs, u6Imm:$U6),
   "$Rd = add(#$u6, mpyi($Rs, #$U6))" ,
   [(set (i32 IntRegs:$Rd),
         (add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6),
-             u6ExtPred:$u6))] ,"",ALU64_tc_3x_SLOT23> {
+             u32ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> {
     bits<5> Rd;
     bits<6> u6;
     bits<5> Rs;
@@ -2049,12 +2450,12 @@ def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
 
 // Rd=add(#u6,mpyi(Rs,Rt))
 let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1,
-    isExtendable = 1, opExtentBits = 6, opExtendable = 1, isCodeGenOnly = 0 in
+    isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
 def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd),
   (ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt),
   "$Rd = add(#$u6, mpyi($Rs, $Rt))" ,
   [(set (i32 IntRegs:$Rd),
-        (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u6ExtPred:$u6))],
+        (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32ImmPred:$u6))],
   "", ALU64_tc_3x_SLOT23>, ImmRegRel {
     bits<5> Rd;
     bits<6> u6;
@@ -2099,18 +2500,16 @@ class T_AddMpy <bit MajOp, PatLeaf ImmPred, dag ins>
     let Inst{4-0}   = src1;
   }
 
-let isCodeGenOnly = 0 in
 def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred,
                        (ins IntRegs:$src1, u6_2Imm:$src2, IntRegs:$src3)>;
 
 let isExtendable = 1, opExtentBits = 6, opExtendable = 3,
-    CextOpcode = "ADD_MPY", InputType = "imm", isCodeGenOnly = 0 in
-def M4_mpyri_addr : T_AddMpy<0b1, u6ExtPred,
+    CextOpcode = "ADD_MPY", InputType = "imm" in
+def M4_mpyri_addr : T_AddMpy<0b1, u32ImmPred,
                     (ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel;
 
 // Rx=add(Ru,mpyi(Rx,Rs))
-let validSubTargets = HasV4SubT, CextOpcode = "ADD_MPY", InputType = "reg",
-    hasNewValue = 1, isCodeGenOnly = 0 in
+let CextOpcode = "ADD_MPY", InputType = "reg", hasNewValue = 1 in
 def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx),
                               (ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs),
   "$Rx = add($Ru, mpyi($_src_, $Rs))",
@@ -2129,51 +2528,101 @@ def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx),
     let Inst{20-16} = Rs;
   }
 
-// Rd=add(##,mpyi(Rs,#U6))
-def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
-                     (HexagonCONST32 tglobaladdr:$src1)),
-           (i32 (M4_mpyri_addi tglobaladdr:$src1, IntRegs:$src2,
-                               u6ImmPred:$src3))>;
 
-// Rd=add(##,mpyi(Rs,Rt))
-def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
-                     (HexagonCONST32 tglobaladdr:$src1)),
-           (i32 (M4_mpyrr_addi tglobaladdr:$src1, IntRegs:$src2,
-                               IntRegs:$src3))>;
+// Vector reduce multiply word by signed half (32x16)
+//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+def M4_vrmpyeh_s0 : T_M2_vmpy<"vrmpyweh", 0b010, 0b100, 0, 0, 0>;
+def M4_vrmpyeh_s1 : T_M2_vmpy<"vrmpyweh", 0b110, 0b100, 1, 0, 0>;
 
-// Polynomial multiply words
-// Rdd=pmpyw(Rs,Rt)
-// Rxx^=pmpyw(Rs,Rt)
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def M4_vrmpyoh_s0 : T_M2_vmpy<"vrmpywoh", 0b001, 0b010, 0, 0, 0>;
+def M4_vrmpyoh_s1 : T_M2_vmpy<"vrmpywoh", 0b101, 0b010, 1, 0, 0>;
 
-// Vector reduce multiply word by signed half (32x16)
-// Rdd=vrmpyweh(Rss,Rtt)[:<<1]
-// Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-// Rxx+=vrmpyweh(Rss,Rtt)[:<<1]
-// Rxx+=vrmpywoh(Rss,Rtt)[:<<1]
-
-// Multiply and use upper result
-// Rd=mpy(Rs,Rt.H):<<1:sat
-// Rd=mpy(Rs,Rt.L):<<1:sat
-// Rd=mpy(Rs,Rt):<<1
-// Rd=mpy(Rs,Rt):<<1:sat
-// Rd=mpysu(Rs,Rt)
-// Rx+=mpy(Rs,Rt):<<1:sat
-// Rx-=mpy(Rs,Rt):<<1:sat
-
-// Vector multiply bytes
-// Rdd=vmpybsu(Rs,Rt)
-// Rdd=vmpybu(Rs,Rt)
-// Rxx+=vmpybsu(Rs,Rt)
-// Rxx+=vmpybu(Rs,Rt)
+//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
+def M4_vrmpyeh_acc_s0: T_M2_vmpy_acc<"vrmpyweh", 0b001, 0b110, 0, 0>;
+def M4_vrmpyeh_acc_s1: T_M2_vmpy_acc<"vrmpyweh", 0b101, 0b110, 1, 0>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def M4_vrmpyoh_acc_s0: T_M2_vmpy_acc<"vrmpywoh", 0b011, 0b110, 0, 0>;
+def M4_vrmpyoh_acc_s1: T_M2_vmpy_acc<"vrmpywoh", 0b111, 0b110, 1, 0>;
+
+// Vector multiply halfwords, signed by unsigned
+// Rdd=vmpyhsu(Rs,Rt)[:<<]:sat
+def M2_vmpy2su_s0 : T_XTYPE_mpy64 < "vmpyhsu", 0b000, 0b111, 1, 0, 0>;
+def M2_vmpy2su_s1 : T_XTYPE_mpy64 < "vmpyhsu", 0b100, 0b111, 1, 1, 0>;
+
+// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
+def M2_vmac2su_s0 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b011, 0b101, 1, 0, 0>;
+def M2_vmac2su_s1 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b111, 0b101, 1, 1, 0>;
 
 // Vector polynomial multiply halfwords
 // Rdd=vpmpyh(Rs,Rt)
+def M4_vpmpyh : T_XTYPE_mpy64 < "vpmpyh", 0b110, 0b111, 0, 0, 0>;
+
 // Rxx^=vpmpyh(Rs,Rt)
+def M4_vpmpyh_acc : T_XTYPE_mpy64_acc < "vpmpyh", "^", 0b101, 0b111, 0, 0, 0>;
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+def M4_pmpyw : T_XTYPE_mpy64 < "pmpyw", 0b010, 0b111, 0, 0, 0>;
+
+// Rxx^=pmpyw(Rs,Rt)
+def M4_pmpyw_acc  : T_XTYPE_mpy64_acc < "pmpyw", "^", 0b001, 0b111, 0, 0, 0>;
 
 //===----------------------------------------------------------------------===//
 // XTYPE/MPY -
 //===----------------------------------------------------------------------===//
 
+//===----------------------------------------------------------------------===//
+// ALU64/Vector compare
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Template class for vector compare
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0 in
+class T_vcmpImm <string Str, bits<2> cmpOp, bits<2> minOp, Operand ImmOprnd>
+  : ALU64_rr <(outs PredRegs:$Pd),
+              (ins DoubleRegs:$Rss, ImmOprnd:$Imm),
+  "$Pd = "#Str#"($Rss, #$Imm)",
+  [], "", ALU64_tc_2early_SLOT23> {
+    bits<2> Pd;
+    bits<5> Rss;
+    bits<32> Imm;
+    bits<8> ImmBits;
+    let ImmBits{6-0} = Imm{6-0};
+    let ImmBits{7} = !if (!eq(cmpOp,0b10), 0b0, Imm{7}); // 0 for vcmp[bhw].gtu
+
+    let IClass = 0b1101;
+
+    let Inst{27-24} = 0b1100;
+    let Inst{22-21} = cmpOp;
+    let Inst{20-16} = Rss;
+    let Inst{12-5} = ImmBits;
+    let Inst{4-3} = minOp;
+    let Inst{1-0} = Pd;
+  }
+
+// Vector compare bytes
+def A4_vcmpbgt   : T_vcmp <"vcmpb.gt", 0b1010>;
+def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>;
+
+let AsmString = "$Pd = any8(vcmpb.eq($Rss, $Rtt))" in
+def A4_vcmpbeq_any : T_vcmp <"any8(vcmpb.gt", 0b1000>;
+
+def A4_vcmpbeqi  : T_vcmpImm <"vcmpb.eq",  0b00, 0b00, u8Imm>;
+def A4_vcmpbgti  : T_vcmpImm <"vcmpb.gt",  0b01, 0b00, s8Imm>;
+def A4_vcmpbgtui : T_vcmpImm <"vcmpb.gtu", 0b10, 0b00, u7Imm>;
+
+// Vector compare halfwords
+def A4_vcmpheqi  : T_vcmpImm <"vcmph.eq",  0b00, 0b01, s8Imm>;
+def A4_vcmphgti  : T_vcmpImm <"vcmph.gt",  0b01, 0b01, s8Imm>;
+def A4_vcmphgtui : T_vcmpImm <"vcmph.gtu", 0b10, 0b01, u7Imm>;
+
+// Vector compare words
+def A4_vcmpweqi  : T_vcmpImm <"vcmpw.eq",  0b00, 0b10, s8Imm>;
+def A4_vcmpwgti  : T_vcmpImm <"vcmpw.gt",  0b01, 0b10, s8Imm>;
+def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7Imm>;
 
 //===----------------------------------------------------------------------===//
 // XTYPE/SHIFT +
@@ -2184,13 +2633,13 @@ def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
 // Rx=and(#u8,asl(Rx,#U5))  Rx=and(#u8,lsr(Rx,#U5))
 // Rx=or(#u8,asl(Rx,#U5))   Rx=or(#u8,lsr(Rx,#U5))
 let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
-    hasNewValue = 1, opNewValue = 0, validSubTargets = HasV4SubT in
+    hasNewValue = 1, opNewValue = 0 in
 class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh,
                         bit asl_lsr, bits<2> MajOp, InstrItinClass Itin>
   : MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5),
       "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))",
       [(set (i32 IntRegs:$Rd),
-            (Op (Sh I32:$Rx, u5ImmPred:$U5), u8ExtPred:$u8))],
+            (Op (Sh I32:$Rx, u5ImmPred:$U5), u32ImmPred:$u8))],
       "$Rd = $Rx", Itin> {
 
   bits<5> Rd;
@@ -2216,29 +2665,48 @@ multiclass T_ShiftOperate<string mnemonic, SDNode Op, bits<2> MajOp,
   def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", Op, srl, 1, MajOp, Itin>;
 }
 
-let AddedComplexity = 200, isCodeGenOnly = 0 in {
+let AddedComplexity = 200 in {
   defm S4_addi : T_ShiftOperate<"add", add, 0b10, ALU64_tc_2_SLOT23>;
   defm S4_andi : T_ShiftOperate<"and", and, 0b00, ALU64_tc_2_SLOT23>;
 }
 
-let AddedComplexity = 30, isCodeGenOnly = 0 in
+let AddedComplexity = 30 in
 defm S4_ori  : T_ShiftOperate<"or",  or,  0b01, ALU64_tc_1_SLOT23>;
 
-let isCodeGenOnly = 0 in
 defm S4_subi : T_ShiftOperate<"sub", sub, 0b11, ALU64_tc_1_SLOT23>;
 
+let AddedComplexity = 200 in {
+  def: Pat<(add addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)),
+           (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+  def: Pat<(add addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)),
+           (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+  def: Pat<(sub addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)),
+           (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+  def: Pat<(sub addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)),
+           (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+}
+
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
+def S2_vcnegh   : T_S3op_shiftVect < "vcnegh",   0b11, 0b01>;
 
 // Rd=[cround|round](Rs,Rt)
-let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23, isCodeGenOnly = 0 in {
+let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in {
   def A4_cround_rr    : T_S3op_3 < "cround", IntRegs, 0b11, 0b00>;
   def A4_round_rr     : T_S3op_3 < "round", IntRegs, 0b11, 0b10>;
 }
 
 // Rd=round(Rs,Rt):sat
-let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23,
-    isCodeGenOnly = 0 in
+let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
 def A4_round_rr_sat : T_S3op_3 < "round", IntRegs, 0b11, 0b11, 1>;
 
+// Rd=[cmpyiwh|cmpyrwh](Rss,Rt):<<1:rnd:sat
+let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
+  def M4_cmpyi_wh     : T_S3op_8<"cmpyiwh", 0b100, 1, 1, 1>;
+  def M4_cmpyr_wh     : T_S3op_8<"cmpyrwh", 0b110, 1, 1, 1>;
+}
+
 // Rdd=[add|sub](Rss,Rtt,Px):carry
 let isPredicateLate = 1, hasSideEffects = 0 in
 class T_S3op_carry <string mnemonic, bits<3> MajOp>
@@ -2261,13 +2729,51 @@ class T_S3op_carry <string mnemonic, bits<3> MajOp>
     let Inst{4-0}   = Rdd;
   }
 
-let isCodeGenOnly = 0 in {
 def A4_addp_c : T_S3op_carry < "add", 0b110 >;
 def A4_subp_c : T_S3op_carry < "sub", 0b111 >;
-}
+
+let Itinerary = S_3op_tc_3_SLOT23, hasSideEffects = 0 in
+class T_S3op_6 <string mnemonic, bits<3> MinOp, bit isUnsigned>
+  : SInst <(outs DoubleRegs:$Rxx),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Ru),
+  "$Rxx = "#mnemonic#"($Rss, $Ru)" ,
+  [] , "$dst2 = $Rxx"> {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Ru;
+
+    let IClass = 0b1100;
+
+    let Inst{27-21} = 0b1011001;
+    let Inst{20-16} = Rss;
+    let Inst{13}    = isUnsigned;
+    let Inst{12-8}  = Rxx;
+    let Inst{7-5}   = MinOp;
+    let Inst{4-0}   = Ru;
+  }
+
+// Vector reduce maximum halfwords
+// Rxx=vrmax[u]h(Rss,Ru)
+def A4_vrmaxh  : T_S3op_6 < "vrmaxh",  0b001, 0>;
+def A4_vrmaxuh : T_S3op_6 < "vrmaxuh", 0b001, 1>;
+
+// Vector reduce maximum words
+// Rxx=vrmax[u]w(Rss,Ru)
+def A4_vrmaxw  : T_S3op_6 < "vrmaxw",  0b010, 0>;
+def A4_vrmaxuw : T_S3op_6 < "vrmaxuw", 0b010, 1>;
+
+// Vector reduce minimum halfwords
+// Rxx=vrmin[u]h(Rss,Ru)
+def A4_vrminh  : T_S3op_6 < "vrminh",  0b101, 0>;
+def A4_vrminuh : T_S3op_6 < "vrminuh", 0b101, 1>;
+
+// Vector reduce minimum words
+// Rxx=vrmin[u]w(Rss,Ru)
+def A4_vrminw  : T_S3op_6 < "vrminw",  0b110, 0>;
+def A4_vrminuw : T_S3op_6 < "vrminuw", 0b110, 1>;
 
 // Shift an immediate left by register amount.
-let hasNewValue = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasNewValue = 1, hasSideEffects = 0 in
 def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6Imm:$s6, IntRegs:$Rt),
   "$Rd = lsl(#$s6, $Rt)" ,
   [(set (i32 IntRegs:$Rd), (shl s6ImmPred:$s6,
@@ -2299,7 +2805,7 @@ def MEMOPIMM : SDNodeXForm<imm, [{
   // Call the transformation function XformM5ToU5Imm to get the negative
   // immediate's positive counterpart.
   int32_t imm = N->getSExtValue();
-  return XformM5ToU5Imm(imm);
+  return XformM5ToU5Imm(imm, SDLoc(N));
 }]>;
 
 def MEMOPIMM_HALF : SDNodeXForm<imm, [{
@@ -2308,7 +2814,7 @@ def MEMOPIMM_HALF : SDNodeXForm<imm, [{
   // Call the transformation function XformM5ToU5Imm to get the negative
   // immediate's positive counterpart.
   int16_t imm = N->getSExtValue();
-  return XformM5ToU5Imm(imm);
+  return XformM5ToU5Imm(imm, SDLoc(N));
 }]>;
 
 def MEMOPIMM_BYTE : SDNodeXForm<imm, [{
@@ -2317,14 +2823,14 @@ def MEMOPIMM_BYTE : SDNodeXForm<imm, [{
   // Call the transformation function XformM5ToU5Imm to get the negative
   // immediate's positive counterpart.
   int8_t imm = N->getSExtValue();
-  return XformM5ToU5Imm(imm);
+  return XformM5ToU5Imm(imm, SDLoc(N));
 }]>;
 
 def SETMEMIMM : SDNodeXForm<imm, [{
    // Return the bit position we will set [0-31].
    // As an SDNode.
    int32_t imm = N->getSExtValue();
-   return XformMskToBitPosU5Imm(imm);
+   return XformMskToBitPosU5Imm(imm, SDLoc(N));
 }]>;
 
 def CLRMEMIMM : SDNodeXForm<imm, [{
@@ -2332,14 +2838,14 @@ def CLRMEMIMM : SDNodeXForm<imm, [{
    // As an SDNode.
    // we bit negate the value first
    int32_t imm = ~(N->getSExtValue());
-   return XformMskToBitPosU5Imm(imm);
+   return XformMskToBitPosU5Imm(imm, SDLoc(N));
 }]>;
 
 def SETMEMIMM_SHORT : SDNodeXForm<imm, [{
    // Return the bit position we will set [0-15].
    // As an SDNode.
    int16_t imm = N->getSExtValue();
-   return XformMskToBitPosU4Imm(imm);
+   return XformMskToBitPosU4Imm(imm, SDLoc(N));
 }]>;
 
 def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{
@@ -2347,14 +2853,14 @@ def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{
    // As an SDNode.
    // we bit negate the value first
    int16_t imm = ~(N->getSExtValue());
-   return XformMskToBitPosU4Imm(imm);
+   return XformMskToBitPosU4Imm(imm, SDLoc(N));
 }]>;
 
 def SETMEMIMM_BYTE : SDNodeXForm<imm, [{
    // Return the bit position we will set [0-7].
    // As an SDNode.
    int8_t imm =  N->getSExtValue();
-   return XformMskToBitPosU3Imm(imm);
+   return XformMskToBitPosU3Imm(imm, SDLoc(N));
 }]>;
 
 def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{
@@ -2362,7 +2868,7 @@ def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{
    // As an SDNode.
    // we bit negate the value first
    int8_t imm = ~(N->getSExtValue());
-   return XformMskToBitPosU3Imm(imm);
+   return XformMskToBitPosU3Imm(imm, SDLoc(N));
 }]>;
 
 //===----------------------------------------------------------------------===//
@@ -2450,15 +2956,14 @@ multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> {
 }
 
 // Define MemOp instructions.
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0,
-    validSubTargets =HasV4SubT in {
-  let opExtentBits = 6, accessSize = ByteAccess, isCodeGenOnly = 0 in
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in {
+  let opExtentBits = 6, accessSize = ByteAccess in
   defm memopb_io : MemOp_base <"memb", 0b00, u6_0Ext>;
 
-  let opExtentBits = 7, accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+  let opExtentBits = 7, accessSize = HalfWordAccess in
   defm memoph_io : MemOp_base <"memh", 0b01, u6_1Ext>;
 
-  let opExtentBits = 8, accessSize = WordAccess, isCodeGenOnly = 0 in
+  let opExtentBits = 8, accessSize = WordAccess in
   defm memopw_io : MemOp_base <"memw", 0b10, u6_2Ext>;
 }
 
@@ -2469,43 +2974,43 @@ let isExtendable = 1, opExtendable = 1, isExtentSigned = 0,
 // mem[bh](Rs+#u6) += #U5
 //===----------------------------------------------------------------------===//
 
-multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
                           InstHexagon MI, SDNode OpNode> {
   let AddedComplexity = 180 in
-  def : Pat < (stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend),
-                    IntRegs:$addr),
-              (MI IntRegs:$addr, #0, u5ImmPred:$addend )>;
+  def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend),
+                  IntRegs:$addr),
+            (MI IntRegs:$addr, 0, u5ImmPred:$addend)>;
 
   let AddedComplexity = 190 in
-  def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)),
-                     u5ImmPred:$addend),
-             (add IntRegs:$base, ExtPred:$offset)),
-       (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>;
+  def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ImmPred:$offset)),
+                  u5ImmPred:$addend),
+            (add IntRegs:$base, ImmPred:$offset)),
+            (MI IntRegs:$base, ImmPred:$offset, u5ImmPred:$addend)>;
 }
 
-multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
                           InstHexagon addMI, InstHexagon subMI> {
-  defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>;
-  defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>;
+  defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, addMI, add>;
+  defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, subMI, sub>;
 }
 
 multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
   // Half Word
-  defm : MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred,
-                         L4_iadd_memoph_io, L4_isub_memoph_io>;
+  defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred,
+                        L4_iadd_memoph_io, L4_isub_memoph_io>;
   // Byte
-  defm : MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred,
-                         L4_iadd_memopb_io, L4_isub_memopb_io>;
+  defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u32ImmPred,
+                        L4_iadd_memopb_io, L4_isub_memopb_io>;
 }
 
-let Predicates = [HasV4T, UseMEMOP] in {
-  defm : MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend
-  defm : MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend
-  defm : MemOpi_u5ExtType<extloadi8,  extloadi16>;  // any extend
+let Predicates = [UseMEMOP] in {
+  defm: MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend
+  defm: MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend
+  defm: MemOpi_u5ExtType<extloadi8,  extloadi16>;  // any extend
 
   // Word
-  defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, L4_iadd_memopw_io,
-                         L4_isub_memopw_io>;
+  defm: MemOpi_u5ALUOp <load, store, u30_2ImmPred, L4_iadd_memopw_io,
+                        L4_isub_memopw_io>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2515,38 +3020,37 @@ let Predicates = [HasV4T, UseMEMOP] in {
 // mem[bh](Rs+#u6) += #m5
 //===----------------------------------------------------------------------===//
 
-multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
-                          PatLeaf immPred, ComplexPattern addrPred,
-                          SDNodeXForm xformFunc, InstHexagon MI> {
+multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
+                          PatLeaf immPred, SDNodeXForm xformFunc,
+                          InstHexagon MI> {
   let AddedComplexity = 190 in
-  def : Pat <(stOp (add (ldOp IntRegs:$addr), immPred:$subend),
-                   IntRegs:$addr),
-             (MI IntRegs:$addr, #0, (xformFunc immPred:$subend) )>;
+  def: Pat<(stOp (add (ldOp IntRegs:$addr), immPred:$subend), IntRegs:$addr),
+           (MI IntRegs:$addr, 0, (xformFunc immPred:$subend))>;
 
   let AddedComplexity = 195 in
-  def : Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)),
-                       immPred:$subend),
-                  (add IntRegs:$base, extPred:$offset)),
-            (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>;
+  def: Pat<(stOp (add (ldOp (add IntRegs:$base, ImmPred:$offset)),
+                  immPred:$subend),
+           (add IntRegs:$base, ImmPred:$offset)),
+           (MI IntRegs:$base, ImmPred:$offset, (xformFunc immPred:$subend))>;
 }
 
 multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
   // Half Word
-  defm : MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred,
-                        ADDRriU6_1, MEMOPIMM_HALF, L4_isub_memoph_io>;
+  defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u31_1ImmPred, m5HImmPred,
+                       MEMOPIMM_HALF, L4_isub_memoph_io>;
   // Byte
-  defm : MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred,
-                        ADDRriU6_0, MEMOPIMM_BYTE, L4_isub_memopb_io>;
+  defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u32ImmPred, m5BImmPred,
+                       MEMOPIMM_BYTE, L4_isub_memopb_io>;
 }
 
-let Predicates = [HasV4T, UseMEMOP] in {
-  defm : MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend
-  defm : MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend
-  defm : MemOpi_m5ExtType<extloadi8,  extloadi16>;  // any extend
+let Predicates = [UseMEMOP] in {
+  defm: MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend
+  defm: MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend
+  defm: MemOpi_m5ExtType<extloadi8,  extloadi16>;  // any extend
 
   // Word
-  defm : MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred,
-                          ADDRriU6_2, MEMOPIMM, L4_isub_memopw_io>;
+  defm: MemOpi_m5Pats <load, store, u30_2ImmPred, m5ImmPred,
+                       MEMOPIMM, L4_isub_memopw_io>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2556,52 +3060,50 @@ let Predicates = [HasV4T, UseMEMOP] in {
 //===----------------------------------------------------------------------===//
 
 multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred,
-                     PatLeaf extPred, ComplexPattern addrPred,
-                     SDNodeXForm xformFunc, InstHexagon MI, SDNode OpNode> {
+                     PatLeaf extPred, SDNodeXForm xformFunc, InstHexagon MI,
+                     SDNode OpNode> {
 
   // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5)
   let AddedComplexity = 250 in
-  def : Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
-                          immPred:$bitend),
-                  (add IntRegs:$base, extPred:$offset)),
-            (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>;
+  def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+                  immPred:$bitend),
+           (add IntRegs:$base, extPred:$offset)),
+           (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>;
 
   // mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
   let AddedComplexity = 225 in
-  def : Pat <(stOp (OpNode (ldOp (addrPred IntRegs:$addr, extPred:$offset)),
-                           immPred:$bitend),
-                   (addrPred (i32 IntRegs:$addr), extPred:$offset)),
-             (MI IntRegs:$addr, extPred:$offset, (xformFunc immPred:$bitend))>;
+  def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), immPred:$bitend), IntRegs:$addr),
+           (MI IntRegs:$addr, 0, (xformFunc immPred:$bitend))>;
 }
 
-multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf> {
   // Byte - clrbit
-  defm : MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred,
-                       ADDRriU6_0, CLRMEMIMM_BYTE, L4_iand_memopb_io, and>;
+  defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u32ImmPred,
+                       CLRMEMIMM_BYTE, L4_iand_memopb_io, and>;
   // Byte - setbit
-  defm : MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred,  u6ExtPred,
-                       ADDRriU6_0, SETMEMIMM_BYTE, L4_ior_memopb_io, or>;
+  defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u32ImmPred,
+                       SETMEMIMM_BYTE, L4_ior_memopb_io, or>;
   // Half Word - clrbit
-  defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred,
-                       ADDRriU6_1, CLRMEMIMM_SHORT, L4_iand_memoph_io, and>;
+  defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u31_1ImmPred,
+                       CLRMEMIMM_SHORT, L4_iand_memoph_io, and>;
   // Half Word - setbit
-  defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred,
-                       ADDRriU6_1, SETMEMIMM_SHORT, L4_ior_memoph_io, or>;
+  defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u31_1ImmPred,
+                       SETMEMIMM_SHORT, L4_ior_memoph_io, or>;
 }
 
-let Predicates = [HasV4T, UseMEMOP] in {
+let Predicates = [UseMEMOP] in {
   // mem[bh](Rs+#0) = [clrbit|setbit](#U5)
   // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5)
-  defm : MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend
-  defm : MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend
-  defm : MemOpi_bitExtType<extloadi8,  extloadi16>;  // any extend
+  defm: MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend
+  defm: MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend
+  defm: MemOpi_bitExtType<extloadi8,  extloadi16>;  // any extend
 
   // memw(Rs+#0) = [clrbit|setbit](#U5)
   // memw(Rs+#u6:2) = [clrbit|setbit](#U5)
-  defm : MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, ADDRriU6_2,
-                       CLRMEMIMM, L4_iand_memopw_io, and>;
-  defm : MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, ADDRriU6_2,
-                       SETMEMIMM, L4_ior_memopw_io, or>;
+  defm: MemOpi_bitPats<load, store, Clr5ImmPred, u30_2ImmPred, CLRMEMIMM,
+                       L4_iand_memopw_io, and>;
+  defm: MemOpi_bitPats<load, store, Set5ImmPred, u30_2ImmPred, SETMEMIMM,
+                       L4_ior_memopw_io, or>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2611,54 +3113,51 @@ let Predicates = [HasV4T, UseMEMOP] in {
 // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
 //===----------------------------------------------------------------------===//
 
-multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, ComplexPattern addrPred,
-                     PatLeaf extPred, InstHexagon MI, SDNode OpNode> {
+multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
+                        InstHexagon MI, SDNode OpNode> {
   let AddedComplexity = 141 in
   // mem[bhw](Rs+#0) [+-&|]= Rt
-  def : Pat <(stOp (OpNode (ldOp (addrPred IntRegs:$addr, extPred:$offset)),
-                           (i32 IntRegs:$addend)),
-                   (addrPred (i32 IntRegs:$addr), extPred:$offset)),
-             (MI IntRegs:$addr, extPred:$offset, (i32 IntRegs:$addend) )>;
+  def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), (i32 IntRegs:$addend)),
+                 IntRegs:$addr),
+           (MI IntRegs:$addr, 0, (i32 IntRegs:$addend))>;
 
   // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
   let AddedComplexity = 150 in
-  def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
-                           (i32 IntRegs:$orend)),
-                   (add IntRegs:$base, extPred:$offset)),
-             (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend) )>;
+  def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+                  (i32 IntRegs:$orend)),
+           (add IntRegs:$base, extPred:$offset)),
+           (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend))>;
 }
 
-multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp,
-                        ComplexPattern addrPred, PatLeaf extPred,
+multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
                         InstHexagon addMI, InstHexagon subMI,
-                        InstHexagon andMI, InstHexagon orMI > {
-
-  defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, addMI, add>;
-  defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, subMI, sub>;
-  defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, andMI, and>;
-  defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, orMI,  or>;
+                        InstHexagon andMI, InstHexagon orMI> {
+  defm: MemOpr_Pats <ldOp, stOp, extPred, addMI, add>;
+  defm: MemOpr_Pats <ldOp, stOp, extPred, subMI, sub>;
+  defm: MemOpr_Pats <ldOp, stOp, extPred, andMI, and>;
+  defm: MemOpr_Pats <ldOp, stOp, extPred, orMI,  or>;
 }
 
 multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
   // Half Word
-  defm : MemOPr_ALUOp <ldOpHalf, truncstorei16, ADDRriU6_1, u6_1ExtPred,
-                       L4_add_memoph_io, L4_sub_memoph_io,
-                       L4_and_memoph_io, L4_or_memoph_io>;
+  defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred,
+                      L4_add_memoph_io, L4_sub_memoph_io,
+                      L4_and_memoph_io, L4_or_memoph_io>;
   // Byte
-  defm : MemOPr_ALUOp <ldOpByte, truncstorei8, ADDRriU6_0, u6ExtPred,
-                       L4_add_memopb_io, L4_sub_memopb_io,
-                       L4_and_memopb_io, L4_or_memopb_io>;
+  defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u32ImmPred,
+                      L4_add_memopb_io, L4_sub_memopb_io,
+                      L4_and_memopb_io, L4_or_memopb_io>;
 }
 
 // Define 'def Pats' for MemOps with register addend.
-let Predicates = [HasV4T, UseMEMOP] in {
+let Predicates = [UseMEMOP] in {
   // Byte, Half Word
-  defm : MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend
-  defm : MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
-  defm : MemOPr_ExtType<extloadi8,  extloadi16>;  // any extend
+  defm: MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend
+  defm: MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
+  defm: MemOPr_ExtType<extloadi8,  extloadi16>;  // any extend
   // Word
-  defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, L4_add_memopw_io,
-                       L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io >;
+  defm: MemOPr_ALUOp <load, store, u30_2ImmPred, L4_add_memopw_io,
+                      L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2676,311 +3175,41 @@ let Predicates = [HasV4T, UseMEMOP] in {
 // incorrect code for negative numbers.
 // Pd=cmpb.eq(Rs,#u8)
 
-let isCompare = 1, isExtendable = 1, opExtendable = 2, hasSideEffects = 0,
-    validSubTargets = HasV4SubT in
-class CMP_NOT_REG_IMM<string OpName, bits<2> op, Operand ImmOp,
-                      list<dag> Pattern>
-  : ALU32Inst <(outs PredRegs:$dst), (ins IntRegs:$src1, ImmOp:$src2),
-    "$dst = !cmp."#OpName#"($src1, #$src2)",
-    Pattern,
-    "", ALU32_2op_tc_2early_SLOT0123> {
-    bits<2> dst;
-    bits<5> src1;
-    bits<10> src2;
+// p=!cmp.eq(r1,#s10)
+def C4_cmpneqi  : T_CMP <"cmp.eq",  0b00, 1, s10Ext>;
+def C4_cmpltei  : T_CMP <"cmp.gt",  0b01, 1, s10Ext>;
+def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9Ext>;
 
-    let IClass = 0b0111;
-    let Inst{27-24} = 0b0101;
-    let Inst{23-22} = op;
-    let Inst{20-16} = src1;
-    let Inst{21} = !if (!eq(OpName, "gtu"), 0b0, src2{9});
-    let Inst{13-5} = src2{8-0};
-    let Inst{4-2} = 0b100;
-    let Inst{1-0} = dst;
-}
-
-let opExtentBits = 10, isExtentSigned = 1 in {
-def C4_cmpneqi : CMP_NOT_REG_IMM <"eq", 0b00, s10Ext, [(set (i1 PredRegs:$dst),
-                 (setne (i32 IntRegs:$src1), s10ExtPred:$src2))]>;
-
-def C4_cmpltei : CMP_NOT_REG_IMM <"gt", 0b01, s10Ext, [(set (i1 PredRegs:$dst),
-                 (not (setgt (i32 IntRegs:$src1), s10ExtPred:$src2)))]>;
-
-}
-let opExtentBits = 9 in
-def C4_cmplteui : CMP_NOT_REG_IMM <"gtu", 0b10, u9Ext, [(set (i1 PredRegs:$dst),
-                  (not (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)))]>;
-
-
-
-// p=!cmp.eq(r1,r2)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst),
-                           (ins IntRegs:$src1, IntRegs:$src2),
-      "$dst = !cmp.eq($src1, $src2)",
-      [(set (i1 PredRegs:$dst),
-            (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2)))]>,
-      Requires<[HasV4T]>;
-
-// p=!cmp.gt(r1,r2)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst),
-                           (ins IntRegs:$src1, IntRegs:$src2),
-      "$dst = !cmp.gt($src1, $src2)",
-      [(set (i1 PredRegs:$dst),
-            (not (setgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
-      Requires<[HasV4T]>;
-
-
-// p=!cmp.gtu(r1,r2)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPnotGTU_rr : ALU32_rr<(outs PredRegs:$dst),
-                            (ins IntRegs:$src1, IntRegs:$src2),
-      "$dst = !cmp.gtu($src1, $src2)",
-      [(set (i1 PredRegs:$dst),
-            (not (setugt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
-      Requires<[HasV4T]>;
-
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, u8Imm:$src2),
-            "$dst = cmpb.eq($src1, #$src2)",
-            [(set (i1 PredRegs:$dst),
-                  (seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>,
-            Requires<[HasV4T]>;
-
-def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)),
-                       bb:$offset),
-      (J2_jumpf (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
-                bb:$offset)>,
-      Requires<[HasV4T]>;
-
-// Pd=cmpb.eq(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2),
-            "$dst = cmpb.eq($src1, $src2)",
-            [(set (i1 PredRegs:$dst),
-                  (seteq (and (xor (i32 IntRegs:$src1),
-                                   (i32 IntRegs:$src2)), 255), 0))]>,
-            Requires<[HasV4T]>;
-
-// Pd=cmpb.eq(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2),
-            "$dst = cmpb.eq($src1, $src2)",
-            [(set (i1 PredRegs:$dst),
-                  (seteq (shl (i32 IntRegs:$src1), (i32 24)),
-                         (shl (i32 IntRegs:$src2), (i32 24))))]>,
-            Requires<[HasV4T]>;
-
-// Pd=cmpb.gt(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2),
-            "$dst = cmpb.gt($src1, $src2)",
-            [(set (i1 PredRegs:$dst),
-                  (setgt (shl (i32 IntRegs:$src1), (i32 24)),
-                         (shl (i32 IntRegs:$src2), (i32 24))))]>,
-            Requires<[HasV4T]>;
-
-// Pd=cmpb.gtu(Rs,#u7)
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7,
-isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", InputType = "imm" in
-def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, u7Ext:$src2),
-            "$dst = cmpb.gtu($src1, #$src2)",
-            [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
-                                              u7ExtPred:$src2))]>,
-            Requires<[HasV4T]>, ImmRegRel;
+def : T_CMP_pat <C4_cmpneqi,  setne,  s32ImmPred>;
+def : T_CMP_pat <C4_cmpltei,  setle,  s32ImmPred>;
+def : T_CMP_pat <C4_cmplteui, setule, u9ImmPred>;
+
+// rs <= rt -> !(rs > rt).
+/*
+def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
+//         (C4_cmpltei IntRegs:$src1, s32ImmPred:$src2)>;
+*/
+// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1).
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
+
+// rs != rt -> !(rs == rt).
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C4_cmpneqi IntRegs:$src1, s32ImmPred:$src2)>;
 
 // SDNode for converting immediate C to C-1.
 def DEC_CONST_BYTE : SDNodeXForm<imm, [{
    // Return the byte immediate const-1 as an SDNode.
    int32_t imm = N->getSExtValue();
-   return XformU7ToU7M1Imm(imm);
+   return XformU7ToU7M1Imm(imm, SDLoc(N));
 }]>;
 
 // For the sequence
-//   zext( seteq ( and(Rs, 255), u8))
-// Generate
-//   Pd=cmpb.eq(Rs, #u8)
-//   if (Pd.new) Rd=#1
-//   if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)),
-                                           u8ExtPred:$u8)))),
-           (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs),
-                                                 (u8ExtPred:$u8))),
-                                1, 0))>,
-           Requires<[HasV4T]>;
-
-// For the sequence
-//   zext( setne ( and(Rs, 255), u8))
-// Generate
-//   Pd=cmpb.eq(Rs, #u8)
-//   if (Pd.new) Rd=#0
-//   if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)),
-                                           u8ExtPred:$u8)))),
-           (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs),
-                                                 (u8ExtPred:$u8))),
-                                0, 1))>,
-           Requires<[HasV4T]>;
-
-// For the sequence
-//   zext( seteq (Rs, and(Rt, 255)))
-// Generate
-//   Pd=cmpb.eq(Rs, Rt)
-//   if (Pd.new) Rd=#1
-//   if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt),
-                                 (i32 (and (i32 IntRegs:$Rs), 255)))))),
-           (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs),
-                                                      (i32 IntRegs:$Rt))),
-                                1, 0))>,
-           Requires<[HasV4T]>;
-
-// For the sequence
-//   zext( setne (Rs, and(Rt, 255)))
-// Generate
-//   Pd=cmpb.eq(Rs, Rt)
-//   if (Pd.new) Rd=#0
-//   if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt),
-                                 (i32 (and (i32 IntRegs:$Rs), 255)))))),
-           (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs),
-                                                      (i32 IntRegs:$Rt))),
-                                0, 1))>,
-           Requires<[HasV4T]>;
-
-// For the sequence
-//   zext( setugt ( and(Rs, 255), u8))
-// Generate
-//   Pd=cmpb.gtu(Rs, #u8)
-//   if (Pd.new) Rd=#1
-//   if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)),
-                                            u8ExtPred:$u8)))),
-           (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs),
-                                                  (u8ExtPred:$u8))),
-                                1, 0))>,
-           Requires<[HasV4T]>;
-
-// For the sequence
-//   zext( setugt ( and(Rs, 254), u8))
-// Generate
-//   Pd=cmpb.gtu(Rs, #u8)
-//   if (Pd.new) Rd=#1
-//   if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)),
-                                            u8ExtPred:$u8)))),
-           (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs),
-                                                  (u8ExtPred:$u8))),
-                                1, 0))>,
-           Requires<[HasV4T]>;
-
-// For the sequence
-//   zext( setult ( Rs, Rt))
-// Generate
-//   Pd=cmp.ltu(Rs, Rt)
-//   if (Pd.new) Rd=#1
-//   if (!Pd.new) Rd=#0
-// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
-           (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt),
-                                              (i32 IntRegs:$Rs))),
-                                1, 0))>,
-           Requires<[HasV4T]>;
-
-// For the sequence
-//   zext( setlt ( Rs, Rt))
-// Generate
-//   Pd=cmp.lt(Rs, Rt)
-//   if (Pd.new) Rd=#1
-//   if (!Pd.new) Rd=#0
-// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
-           (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt),
-                                             (i32 IntRegs:$Rs))),
-                                1, 0))>,
-           Requires<[HasV4T]>;
-
-// For the sequence
-//   zext( setugt ( Rs, Rt))
-// Generate
-//   Pd=cmp.gtu(Rs, Rt)
-//   if (Pd.new) Rd=#1
-//   if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
-           (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs),
-                                              (i32 IntRegs:$Rt))),
-                                1, 0))>,
-           Requires<[HasV4T]>;
-
-// This pattern interefers with coremark performance, not implementing at this
-// time.
-// For the sequence
-//   zext( setgt ( Rs, Rt))
-// Generate
-//   Pd=cmp.gt(Rs, Rt)
-//   if (Pd.new) Rd=#1
-//   if (!Pd.new) Rd=#0
-
-// For the sequence
-//   zext( setuge ( Rs, Rt))
-// Generate
-//   Pd=cmp.ltu(Rs, Rt)
-//   if (Pd.new) Rd=#0
-//   if (!Pd.new) Rd=#1
-// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
-           (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt),
-                                              (i32 IntRegs:$Rs))),
-                                0, 1))>,
-           Requires<[HasV4T]>;
-
-// For the sequence
-//   zext( setge ( Rs, Rt))
-// Generate
-//   Pd=cmp.lt(Rs, Rt)
-//   if (Pd.new) Rd=#0
-//   if (!Pd.new) Rd=#1
-// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
-           (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt),
-                                             (i32 IntRegs:$Rs))),
-                                0, 1))>,
-           Requires<[HasV4T]>;
-
-// For the sequence
-//   zext( setule ( Rs, Rt))
-// Generate
-//   Pd=cmp.gtu(Rs, Rt)
-//   if (Pd.new) Rd=#0
-//   if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
-           (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs),
-                                              (i32 IntRegs:$Rt))),
-                                0, 1))>,
-           Requires<[HasV4T]>;
-
-// For the sequence
-//   zext( setle ( Rs, Rt))
-// Generate
-//   Pd=cmp.gt(Rs, Rt)
-//   if (Pd.new) Rd=#0
-//   if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
-           (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rs),
-                                             (i32 IntRegs:$Rt))),
-                                0, 1))>,
-           Requires<[HasV4T]>;
-
-// For the sequence
 //   zext( setult ( and(Rs, 255), u8))
 // Use the isdigit transformation below
 
-// Generate code of the form 'mux_ii(cmpbgtu(Rdd, C-1),0,1)'
+// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)'
 // for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
 // The isdigit transformation relies on two 'clever' aspects:
 // 1) The data type is unsigned which allows us to eliminate a zero test after
@@ -2993,130 +3222,11 @@ def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
 // The code is transformed upstream of llvm into
 //   retval = (c-48) < 10 ? 1 : 0;
 let AddedComplexity = 139 in
-def : Pat <(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
-                                  u7StrictPosImmPred:$src2)))),
-  (i32 (C2_muxii (i1 (CMPbGTUri_V4 (i32 IntRegs:$src1),
-                                 (DEC_CONST_BYTE u7StrictPosImmPred:$src2))),
-                   0, 1))>,
-                   Requires<[HasV4T]>;
-
-// Pd=cmpb.gtu(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU",
-InputType = "reg" in
-def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2),
-            "$dst = cmpb.gtu($src1, $src2)",
-            [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
-                                             (and (i32 IntRegs:$src2), 255)))]>,
-            Requires<[HasV4T]>, ImmRegRel;
-
-// Following instruction is not being extended as it results into the incorrect
-// code for negative numbers.
-
-// Signed half compare(.eq) ri.
-// Pd=cmph.eq(Rs,#s8)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, s8Imm:$src2),
-            "$dst = cmph.eq($src1, #$src2)",
-            [(set (i1 PredRegs:$dst), (seteq (and (i32 IntRegs:$src1), 65535),
-                                             s8ImmPred:$src2))]>,
-            Requires<[HasV4T]>;
-
-// Signed half compare(.eq) rr.
-// Case 1: xor + and, then compare:
-//   r0=xor(r0,r1)
-//   r0=and(r0,#0xffff)
-//   p0=cmp.eq(r0,#0)
-// Pd=cmph.eq(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2),
-            "$dst = cmph.eq($src1, $src2)",
-            [(set (i1 PredRegs:$dst), (seteq (and (xor (i32 IntRegs:$src1),
-                                                       (i32 IntRegs:$src2)),
-                                                  65535), 0))]>,
-            Requires<[HasV4T]>;
-
-// Signed half compare(.eq) rr.
-// Case 2: shift left 16 bits then compare:
-//   r0=asl(r0,16)
-//   r1=asl(r1,16)
-//   p0=cmp.eq(r0,r1)
-// Pd=cmph.eq(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2),
-            "$dst = cmph.eq($src1, $src2)",
-            [(set (i1 PredRegs:$dst),
-                  (seteq (shl (i32 IntRegs:$src1), (i32 16)),
-                         (shl (i32 IntRegs:$src2), (i32 16))))]>,
-            Requires<[HasV4T]>;
-
-/* Incorrect Pattern -- immediate should be right shifted before being
-used in the cmph.gt instruction.
-// Signed half compare(.gt) ri.
-// Pd=cmph.gt(Rs,#s8)
-
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
-isCompare = 1, validSubTargets = HasV4SubT in
-def CMPhGTri_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, s8Ext:$src2),
-            "$dst = cmph.gt($src1, #$src2)",
-            [(set (i1 PredRegs:$dst),
-                  (setgt (shl (i32 IntRegs:$src1), (i32 16)),
-                         s8ExtPred:$src2))]>,
-            Requires<[HasV4T]>;
-*/
-
-// Signed half compare(.gt) rr.
-// Pd=cmph.gt(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2),
-            "$dst = cmph.gt($src1, $src2)",
-            [(set (i1 PredRegs:$dst),
-                  (setgt (shl (i32 IntRegs:$src1), (i32 16)),
-                         (shl (i32 IntRegs:$src2), (i32 16))))]>,
-            Requires<[HasV4T]>;
-
-// Unsigned half compare rr (.gtu).
-// Pd=cmph.gtu(Rs,Rt)
-let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU",
-InputType = "reg" in
-def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2),
-            "$dst = cmph.gtu($src1, $src2)",
-            [(set (i1 PredRegs:$dst),
-                  (setugt (and (i32 IntRegs:$src1), 65535),
-                          (and (i32 IntRegs:$src2), 65535)))]>,
-            Requires<[HasV4T]>, ImmRegRel;
-
-// Unsigned half compare ri (.gtu).
-// Pd=cmph.gtu(Rs,#u7)
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7,
-isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU",
-InputType = "imm" in
-def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, u7Ext:$src2),
-            "$dst = cmph.gtu($src1, #$src2)",
-            [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535),
-                                              u7ExtPred:$src2))]>,
-            Requires<[HasV4T]>, ImmRegRel;
-
-let validSubTargets = HasV4SubT in
-def NTSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-    "$dst = !tstbit($src1, $src2)",
-    [(set (i1 PredRegs:$dst),
-          (seteq (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>,
-    Requires<[HasV4T]>;
-
-let validSubTargets = HasV4SubT in
-def NTSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
-    "$dst = !tstbit($src1, $src2)",
-    [(set (i1 PredRegs:$dst),
-          (seteq (and (shl 1, u5ImmPred:$src2), (i32 IntRegs:$src1)), 0))]>,
-    Requires<[HasV4T]>;
+def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
+                         u7StrictPosImmPred:$src2)))),
+         (C2_muxii (A4_cmpbgtui IntRegs:$src1,
+                    (DEC_CONST_BYTE u7StrictPosImmPred:$src2)),
+          0, 1)>;
 
 //===----------------------------------------------------------------------===//
 // XTYPE/PRED -
@@ -3173,40 +3283,23 @@ multiclass LD_MISC_L4_RETURN<string mnemonic> {
 }
 
 let isReturn = 1, isTerminator = 1,
-    Defs = [R29, R30, R31, PC], Uses = [R30], hasSideEffects = 0,
-    validSubTargets = HasV4SubT, isCodeGenOnly = 0 in
+    Defs = [R29, R30, R31, PC], Uses = [R30], hasSideEffects = 0 in
 defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel;
 
 // Restore registers and dealloc return function call.
 let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
-  Defs = [R29, R30, R31, PC] in {
-let validSubTargets = HasV4SubT in
-  def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs),
-                                   (ins calltarget:$dst),
-             "jump $dst",
-             []>,
-             Requires<[HasV4T]>;
+    Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in {
+  def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">;
 }
 
 // Restore registers and dealloc frame before a tail call.
-let isCall = 1, isBarrier = 1,
-  Defs = [R29, R30, R31, PC] in {
-let validSubTargets = HasV4SubT in
-  def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs),
-                                           (ins calltarget:$dst),
-             "call $dst",
-             []>,
-             Requires<[HasV4T]>;
+let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in {
+  def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel;
 }
 
 // Save registers function call.
-let isCall = 1, isBarrier = 1,
-  Uses = [R29, R31] in {
-  def SAVE_REGISTERS_CALL_V4 : JInst<(outs),
-                               (ins calltarget:$dst),
-             "call $dst // Save_calle_saved_registers",
-             []>,
-             Requires<[HasV4T]>;
+let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
+  def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel;
 }
 
 //===----------------------------------------------------------------------===//
@@ -3278,7 +3371,7 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
 //===----------------------------------------------------------------------===//
 class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
                  bits<2> MajOp, bit isHalf>
-  : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1, isHalf>,
+  : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1, isHalf>,
                   AddrModeRel {
   string ImmOpStr = !cast<string>(ImmOp);
   let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3295,7 +3388,7 @@ class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
 //===----------------------------------------------------------------------===//
 // Multiclass for store instructions with absolute addressing.
 //===----------------------------------------------------------------------===//
-let validSubTargets = HasV4SubT, addrMode = Absolute, isExtended = 1 in
+let addrMode = Absolute, isExtended = 1 in
 multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC,
                   Operand ImmOp, bits<2> MajOp, bit isHalf = 0> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
@@ -3319,7 +3412,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC,
 let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1,
     isNewValue = 1, opNewValue = 1 in
 class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs>
-  : NVInst_V4<(outs), (ins u0AlwaysExt:$addr, IntRegs:$src),
+  : NVInst_V4<(outs), (ins u32Imm:$addr, IntRegs:$src),
   mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src.new",
   [], "", V2LDST_tc_st_SLOT0> {
     bits<19> addr;
@@ -3397,7 +3490,7 @@ class T_StoreAbs_NV <string mnemonic, Operand ImmOp, bits<2> MajOp>
 //===----------------------------------------------------------------------===//
 // Multiclass for new-value store instructions with absolute addressing.
 //===----------------------------------------------------------------------===//
-let validSubTargets = HasV4SubT, addrMode = Absolute, isExtended = 1  in
+let addrMode = Absolute, isExtended = 1  in
 multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp,
                    bits<2> MajOp> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
@@ -3417,22 +3510,22 @@ multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp,
 //===----------------------------------------------------------------------===//
 // Stores with absolute addressing
 //===----------------------------------------------------------------------===//
-let accessSize = ByteAccess, isCodeGenOnly = 0 in
+let accessSize = ByteAccess in
 defm storerb : ST_Abs    <"memb", "STrib", IntRegs, u16_0Imm, 0b00>,
                ST_Abs_NV <"memb", "STrib", u16_0Imm, 0b00>;
 
-let accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+let accessSize = HalfWordAccess in
 defm storerh : ST_Abs    <"memh", "STrih", IntRegs, u16_1Imm, 0b01>,
                ST_Abs_NV <"memh", "STrih", u16_1Imm, 0b01>;
 
-let accessSize = WordAccess, isCodeGenOnly = 0 in
+let accessSize = WordAccess in
 defm storeri : ST_Abs    <"memw", "STriw", IntRegs, u16_2Imm, 0b10>,
                ST_Abs_NV <"memw", "STriw", u16_2Imm, 0b10>;
 
-let isNVStorable = 0, accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+let isNVStorable = 0, accessSize = DoubleWordAccess in
 defm storerd : ST_Abs <"memd", "STrid", DoubleRegs, u16_3Imm, 0b11>;
 
-let isNVStorable = 0, accessSize = HalfWordAccess, isCodeGenOnly = 0 in
+let isNVStorable = 0, accessSize = HalfWordAccess in
 defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>;
 
 //===----------------------------------------------------------------------===//
@@ -3442,7 +3535,7 @@ defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>;
 // if ([!]Pv[.new]) mem[bhwd](##global)=Rt
 //===----------------------------------------------------------------------===//
 
-let validSubTargets = HasV4SubT in
+let isAsmParserOnly = 1 in
 class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC,
                  Operand ImmOp, bits<2> MajOp, bit isHalf = 0>
   : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0, isHalf> {
@@ -3452,7 +3545,7 @@ class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC,
     let BaseOpcode = BaseOp#_abs;
   }
 
-let validSubTargets = HasV4SubT in
+let isAsmParserOnly = 1 in
 multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp,
                   bits<2> MajOp, bit isHalf = 0> {
   // Set BaseOpcode same as absolute addressing instructions so that
@@ -3483,77 +3576,44 @@ let isNVStorable = 0, accessSize = HalfWordAccess in
 def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs,
                               u16_1Imm, 0b01, 1>, PredNewRel;
 
-let Predicates = [HasV4T], AddedComplexity = 30 in {
-def : Pat<(truncstorei8 (i32 IntRegs:$src1),
-                        (HexagonCONST32 tglobaladdr:$absaddr)),
-          (S2_storerbabs tglobaladdr: $absaddr, IntRegs: $src1)>;
-
-def : Pat<(truncstorei16 (i32 IntRegs:$src1),
-                          (HexagonCONST32 tglobaladdr:$absaddr)),
-          (S2_storerhabs tglobaladdr: $absaddr, IntRegs: $src1)>;
-
-def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
-          (S2_storeriabs tglobaladdr: $absaddr, IntRegs: $src1)>;
-
-def : Pat<(store (i64 DoubleRegs:$src1),
-                 (HexagonCONST32 tglobaladdr:$absaddr)),
-          (S2_storerdabs tglobaladdr: $absaddr, DoubleRegs: $src1)>;
+class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI>
+  : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>;
+
+class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod,
+                 InstHexagon MI>
+  : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>;
+
+class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI>
+  : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>;
+
+class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
+                  InstHexagon MI>
+  : Pat<(Store Value:$val, Addr:$addr),
+        (MI Addr:$addr, (ValueMod Value:$val))>;
+
+def: Storea_pat<SwapSt<atomic_store_8>,  I32, addrgp, S2_storerbgp>;
+def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
+def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
+def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
+
+let AddedComplexity = 100 in {
+  def: Storea_pat<truncstorei8,  I32, addrgp, S2_storerbgp>;
+  def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
+  def: Storea_pat<store,         I32, addrgp, S2_storerigp>;
+  def: Storea_pat<store,         I64, addrgp, S2_storerdgp>;
+
+  // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+  //       to "r0 = 1; memw(#foo) = r0"
+  let AddedComplexity = 100 in
+  def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+           (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
 }
 
-// 64 bit atomic store
-def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
-                            (i64 DoubleRegs:$src1)),
-           (S2_storerdgp tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
-           Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memd(#foo)
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
-                  (HexagonCONST32_GP tglobaladdr:$global)),
-           (S2_storerdgp tglobaladdr:$global, (i64 DoubleRegs:$src1))>;
-
-// 8 bit atomic store
-def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
-                            (i32 IntRegs:$src1)),
-            (S2_storerbgp tglobaladdr:$global, (i32 IntRegs:$src1))>;
-
-// Map from store(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1),
-          (HexagonCONST32_GP tglobaladdr:$global)),
-          (S2_storerbgp tglobaladdr:$global, (i32 IntRegs:$src1))>;
-
-// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
-//       to "r0 = 1; memw(#foo) = r0"
-let AddedComplexity = 100 in
-def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
-          (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
-
-def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
-                           (i32 IntRegs:$src1)),
-          (S2_storerhgp tglobaladdr:$global, (i32 IntRegs:$src1))>;
-
-// Map from store(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1),
-                         (HexagonCONST32_GP tglobaladdr:$global)),
-          (S2_storerhgp tglobaladdr:$global, (i32 IntRegs:$src1))>;
-
-// 32 bit atomic store
-def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
-                           (i32 IntRegs:$src1)),
-          (S2_storerigp tglobaladdr:$global, (i32 IntRegs:$src1))>;
-
-// Map from store(globaladdress) -> memw(#foo)
-let AddedComplexity = 100 in
-def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
-          (S2_storerigp tglobaladdr:$global, (i32 IntRegs:$src1))>;
-
 //===----------------------------------------------------------------------===//
 // Template class for non predicated load instructions with
 // absolute addressing mode.
 //===----------------------------------------------------------------------===//
-let isPredicable = 1, hasSideEffects = 0, validSubTargets = HasV4SubT in
+let isPredicable = 1, hasSideEffects = 0 in
 class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
                    bits<3> MajOp, Operand AddrOp, bit isAbs>
   : LDInst <(outs RC:$dst), (ins AddrOp:$addr),
@@ -3582,7 +3642,7 @@ class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
 
 class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
                  bits<3> MajOp>
-  : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1>, AddrModeRel {
+  : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1>, AddrModeRel {
 
     string ImmOpStr = !cast<string>(ImmOp);
     let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3595,11 +3655,12 @@ class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
                         !if (!eq(ImmOpStr, "u16_1Imm"), 1,
                                         /* u16_0Imm */ 0)));
   }
+
 //===----------------------------------------------------------------------===//
 // Template class for predicated load instructions with
 // absolute addressing mode.
 //===----------------------------------------------------------------------===//
-let isPredicated = 1, hasNewValue = 1, opExtentBits = 6, opExtendable = 2 in
+let isPredicated = 1, opExtentBits = 6, opExtendable = 2 in
 class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp,
                       bit isPredNot, bit isPredNew>
   : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u6Ext:$absaddr),
@@ -3611,6 +3672,7 @@ class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp,
 
     let isPredicatedNew = isPredNew;
     let isPredicatedFalse = isPredNot;
+    let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
 
     let IClass = 0b1001;
 
@@ -3649,20 +3711,20 @@ multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC,
   }
 }
 
-let accessSize = ByteAccess, hasNewValue = 1, isCodeGenOnly = 0 in {
+let accessSize = ByteAccess, hasNewValue = 1 in {
   defm loadrb  : LD_Abs<"memb",  "LDrib",  IntRegs, u16_0Imm, 0b000>;
   defm loadrub : LD_Abs<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>;
 }
 
-let accessSize = HalfWordAccess, hasNewValue = 1, isCodeGenOnly = 0 in {
+let accessSize = HalfWordAccess, hasNewValue = 1 in {
   defm loadrh  : LD_Abs<"memh",  "LDrih",  IntRegs, u16_1Imm, 0b010>;
   defm loadruh : LD_Abs<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>;
 }
 
-let accessSize = WordAccess, hasNewValue = 1, isCodeGenOnly = 0 in
+let accessSize = WordAccess, hasNewValue = 1 in
 defm loadri  : LD_Abs<"memw",  "LDriw",  IntRegs, u16_2Imm, 0b100>;
 
-let accessSize = DoubleWordAccess, isCodeGenOnly = 0 in
+let accessSize = DoubleWordAccess in
 defm loadrd  : LD_Abs<"memd",  "LDrid", DoubleRegs, u16_3Imm, 0b110>;
 
 //===----------------------------------------------------------------------===//
@@ -3672,6 +3734,7 @@ defm loadrd  : LD_Abs<"memd",  "LDrid", DoubleRegs, u16_3Imm, 0b110>;
 // if ([!]Pv[.new]) Rx=mem[bhwd](##global)
 //===----------------------------------------------------------------------===//
 
+let isAsmParserOnly = 1 in
 class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp,
                 bits<3> MajOp>
   : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0>, PredNewRel {
@@ -3694,439 +3757,175 @@ def L2_loadrigp  : T_LoadGP<"memw",  "LDriw",  IntRegs, u16_2Imm, 0b100>;
 let accessSize = DoubleWordAccess in
 def L2_loadrdgp  : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
 
-let Predicates = [HasV4T], AddedComplexity  = 30 in {
-def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))),
-          (L4_loadri_abs tglobaladdr: $absaddr)>;
-
-def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
-          (L4_loadrb_abs tglobaladdr:$absaddr)>;
-
-def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
-          (L4_loadrub_abs tglobaladdr:$absaddr)>;
-
-def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
-          (L4_loadrh_abs tglobaladdr:$absaddr)>;
-
-def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
-          (L4_loadruh_abs tglobaladdr:$absaddr)>;
-}
-
-def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
-           (i64 (L2_loadrdgp tglobaladdr:$global))>;
-
-def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
-           (i32 (L2_loadrigp tglobaladdr:$global))>;
-
-def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
-           (i32 (L2_loadruhgp tglobaladdr:$global))>;
-
-def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
-           (i32 (L2_loadrubgp tglobaladdr:$global))>;
-
-// Map from load(globaladdress) -> memw(#foo + 0)
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
-           (i64 (L2_loadrdgp tglobaladdr:$global))>;
+def: Loada_pat<atomic_load_8,  i32, addrgp, L2_loadrubgp>;
+def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
+def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>;
+def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>;
 
 // Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
-let AddedComplexity = 100 in
-def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
-           (i1 (C2_tfrrp (i32 (L2_loadrbgp tglobaladdr:$global))))>;
+def: Loadam_pat<load, i1, addrga, I32toI1, L4_loadrub_abs>;
+def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
+
+def: Stoream_pat<store, I1, addrga, I1toI32, S2_storerbabs>;
+def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>;
+
+// Map from load(globaladdress) -> mem[u][bhwd](#foo)
+class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
+  : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))),
+         (VT (MI tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in {
+  def: LoadGP_pats <extloadi8, L2_loadrbgp>;
+  def: LoadGP_pats <sextloadi8, L2_loadrbgp>;
+  def: LoadGP_pats <zextloadi8, L2_loadrubgp>;
+  def: LoadGP_pats <extloadi16, L2_loadrhgp>;
+  def: LoadGP_pats <sextloadi16, L2_loadrhgp>;
+  def: LoadGP_pats <zextloadi16, L2_loadruhgp>;
+  def: LoadGP_pats <load, L2_loadrigp>;
+  def: LoadGP_pats <load, L2_loadrdgp, i64>;
+}
 
 // When the Interprocedural Global Variable optimizer realizes that a certain
 // global variable takes only two constant values, it shrinks the global to
 // a boolean. Catch those loads here in the following 3 patterns.
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (L2_loadrbgp tglobaladdr:$global))>;
+let AddedComplexity = 100 in {
+  def: LoadGP_pats <extloadi1, L2_loadrubgp>;
+  def: LoadGP_pats <zextloadi1, L2_loadrubgp>;
+}
 
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (L2_loadrbgp tglobaladdr:$global))>;
+// Transfer global address into a register
+def: Pat<(HexagonCONST32 tglobaladdr:$Rs),      (A2_tfrsi s16Ext:$Rs)>;
+def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi s16Ext:$Rs)>;
+def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs),   (A2_tfrsi s16Ext:$Rs)>;
+
+let AddedComplexity  = 30 in {
+  def: Storea_pat<truncstorei8,  I32, u32ImmPred, S2_storerbabs>;
+  def: Storea_pat<truncstorei16, I32, u32ImmPred, S2_storerhabs>;
+  def: Storea_pat<store,         I32, u32ImmPred, S2_storeriabs>;
+}
 
-// Map from load(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (L2_loadrbgp tglobaladdr:$global))>;
+let AddedComplexity  = 30 in {
+  def: Loada_pat<load,        i32, u32ImmPred, L4_loadri_abs>;
+  def: Loada_pat<sextloadi8,  i32, u32ImmPred, L4_loadrb_abs>;
+  def: Loada_pat<zextloadi8,  i32, u32ImmPred, L4_loadrub_abs>;
+  def: Loada_pat<sextloadi16, i32, u32ImmPred, L4_loadrh_abs>;
+  def: Loada_pat<zextloadi16, i32, u32ImmPred, L4_loadruh_abs>;
+}
 
-// Map from load(globaladdress) -> memb(#foo)
+// Indexed store word - global address.
+// memw(Rs+#u6:2)=#S8
 let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (L2_loadrbgp tglobaladdr:$global))>;
+def: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>;
 
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (L2_loadrubgp tglobaladdr:$global))>;
+// Load from a global address that has only one use in the current basic block.
+let AddedComplexity = 100 in {
+  def: Loada_pat<extloadi8,   i32, addrga, L4_loadrub_abs>;
+  def: Loada_pat<sextloadi8,  i32, addrga, L4_loadrb_abs>;
+  def: Loada_pat<zextloadi8,  i32, addrga, L4_loadrub_abs>;
 
-// Map from load(globaladdress) -> memub(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (L2_loadrubgp tglobaladdr:$global))>;
+  def: Loada_pat<extloadi16,  i32, addrga, L4_loadruh_abs>;
+  def: Loada_pat<sextloadi16, i32, addrga, L4_loadrh_abs>;
+  def: Loada_pat<zextloadi16, i32, addrga, L4_loadruh_abs>;
 
-// Map from load(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (L2_loadrhgp tglobaladdr:$global))>;
+  def: Loada_pat<load,        i32, addrga, L4_loadri_abs>;
+  def: Loada_pat<load,        i64, addrga, L4_loadrd_abs>;
+}
 
-// Map from load(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (L2_loadrhgp tglobaladdr:$global))>;
+// Store to a global address that has only one use in the current basic block.
+let AddedComplexity = 100 in {
+  def: Storea_pat<truncstorei8,  I32, addrga, S2_storerbabs>;
+  def: Storea_pat<truncstorei16, I32, addrga, S2_storerhabs>;
+  def: Storea_pat<store,         I32, addrga, S2_storeriabs>;
+  def: Storea_pat<store,         I64, addrga, S2_storerdabs>;
 
-// Map from load(globaladdress) -> memuh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (L2_loadruhgp tglobaladdr:$global))>;
+  def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>;
+}
 
-// Map from load(globaladdress) -> memw(#foo)
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
 let AddedComplexity = 100 in
-def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (L2_loadrigp tglobaladdr:$global))>;
-
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+           (i1 (C2_tfrrp (i32 (L2_loadrbgp tglobaladdr:$global))))>;
 
 // Transfer global address into a register
 let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1,
-isAsCheapAsAMove = 1, isReMaterializable = 1, validSubTargets = HasV4SubT in
+isAsCheapAsAMove = 1, isReMaterializable = 1, isCodeGenOnly = 1 in
 def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
            "$dst = #$src1",
-           [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>,
-           Requires<[HasV4T]>;
+           [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>;
 
 // Transfer a block address into a register
 def : Pat<(HexagonCONST32_GP tblockaddress:$src1),
-          (TFRI_V4 tblockaddress:$src1)>,
-          Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 2, AddedComplexity=50,
-hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in
-def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst),
-                           (ins PredRegs:$src1, s16Ext:$src2),
-           "if($src1) $dst = #$src2",
-           []>,
-           Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 2, AddedComplexity=50, isPredicatedFalse = 1,
-hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in
-def TFRI_cNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
-                              (ins PredRegs:$src1, s16Ext:$src2),
-           "if(!$src1) $dst = #$src2",
-           []>,
-           Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 2, AddedComplexity=50,
-hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in
-def TFRI_cdnPt_V4 : ALU32_ri<(outs IntRegs:$dst),
-                             (ins PredRegs:$src1, s16Ext:$src2),
-           "if($src1.new) $dst = #$src2",
-           []>,
-           Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 2, AddedComplexity=50, isPredicatedFalse = 1,
-hasSideEffects = 0, isPredicated = 1, validSubTargets = HasV4SubT in
-def TFRI_cdnNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
-                                (ins PredRegs:$src1, s16Ext:$src2),
-           "if(!$src1.new) $dst = #$src2",
-           []>,
-           Requires<[HasV4T]>;
-
-let AddedComplexity = 50, Predicates = [HasV4T] in
-def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
-           (TFRI_V4 tglobaladdr:$src1)>,
-           Requires<[HasV4T]>;
-
-
-// Load - Indirect with long offset: These instructions take global address
-// as an operand
-let isExtended = 1, opExtendable = 3, AddedComplexity = 40,
-validSubTargets = HasV4SubT in
-def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst),
-            (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset),
-            "$dst=memd($src1<<#$src2+##$offset)",
-            [(set (i64 DoubleRegs:$dst),
-                  (load (add (shl IntRegs:$src1, u2ImmPred:$src2),
-                        (HexagonCONST32 tglobaladdr:$offset))))]>,
-            Requires<[HasV4T]>;
-
-let AddedComplexity = 40 in
-multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
-let isExtended = 1, opExtendable = 3, validSubTargets = HasV4SubT in
-  def _lo_V4 : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset),
-            !strconcat("$dst = ",
-            !strconcat(OpcStr, "($src1<<#$src2+##$offset)")),
-            [(set IntRegs:$dst,
-                  (i32 (OpNode (add (shl IntRegs:$src1, u2ImmPred:$src2),
-                          (HexagonCONST32 tglobaladdr:$offset)))))]>,
-            Requires<[HasV4T]>;
-}
-
-defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>;
-defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>;
-defm LDriub_ind_anyext : LD_indirect_lo<"memub", extloadi8>;
-defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>;
-defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>;
-defm LDriuh_ind_anyext : LD_indirect_lo<"memuh", extloadi16>;
-defm LDriw_ind : LD_indirect_lo<"memw", load>;
-
-let AddedComplexity = 40 in
-def : Pat <(i32 (sextloadi8 (add IntRegs:$src1,
-                                 (NumUsesBelowThresCONST32 tglobaladdr:$offset)))),
-           (i32 (LDrib_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>,
-           Requires<[HasV4T]>;
-
-let AddedComplexity = 40 in
-def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
-                                 (NumUsesBelowThresCONST32 tglobaladdr:$offset)))),
-           (i32 (LDriub_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>,
-           Requires<[HasV4T]>;
-
-let Predicates = [HasV4T], AddedComplexity  = 30 in {
-def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
-          (S2_storerbabs u0AlwaysExtPred:$src2, IntRegs: $src1)>;
-
-def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
-          (S2_storerhabs u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+          (TFRI_V4 tblockaddress:$src1)>;
 
-def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
-          (S2_storeriabs u0AlwaysExtPred:$src2, IntRegs: $src1)>;
-}
-
-let Predicates = [HasV4T], AddedComplexity  = 30 in {
-def : Pat<(i32 (load u0AlwaysExtPred:$src)),
-          (L4_loadri_abs u0AlwaysExtPred:$src)>;
-
-def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)),
-          (L4_loadrb_abs u0AlwaysExtPred:$src)>;
-
-def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)),
-          (L4_loadrub_abs u0AlwaysExtPred:$src)>;
-
-def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)),
-          (L4_loadrh_abs u0AlwaysExtPred:$src)>;
-
-def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)),
-          (L4_loadruh_abs u0AlwaysExtPred:$src)>;
-}
-
-// Indexed store word - global address.
-// memw(Rs+#u6:2)=#S8
-let AddedComplexity = 10 in
-def STriw_offset_ext_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u6_2Imm:$src2, globaladdress:$src3),
-            "memw($src1+#$src2) = ##$src3",
-            [(store (HexagonCONST32 tglobaladdr:$src3),
-                    (add IntRegs:$src1, u6_2ImmPred:$src2))]>,
-            Requires<[HasV4T]>;
-
-def : Pat<(i64 (ctlz (i64 DoubleRegs:$src1))),
-          (i64 (A4_combineir (i32 0), (i32 (CTLZ64_rr DoubleRegs:$src1))))>,
-          Requires<[HasV4T]>;
-
-def : Pat<(i64 (cttz (i64 DoubleRegs:$src1))),
-          (i64 (A4_combineir (i32 0), (i32 (CTTZ64_rr DoubleRegs:$src1))))>,
-          Requires<[HasV4T]>;
-
-
-// i8 -> i64 loads
-// We need a complexity of 120 here to override preceding handling of
-// zextloadi8.
-let Predicates = [HasV4T], AddedComplexity = 120 in {
-def:  Pat <(i64 (extloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
-      (i64 (A4_combineir 0, (L4_loadrb_abs tglobaladdr:$addr)))>;
-
-def:  Pat <(i64 (zextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
-      (i64 (A4_combineir 0, (L4_loadrub_abs tglobaladdr:$addr)))>;
-
-def:  Pat <(i64 (sextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
-      (i64 (A2_sxtw (L4_loadrb_abs tglobaladdr:$addr)))>;
-
-def:  Pat <(i64 (extloadi8 FoldGlobalAddr:$addr)),
-      (i64 (A4_combineir 0, (L4_loadrb_abs FoldGlobalAddr:$addr)))>;
-
-def:  Pat <(i64 (zextloadi8 FoldGlobalAddr:$addr)),
-      (i64 (A4_combineir 0, (L4_loadrub_abs FoldGlobalAddr:$addr)))>;
+let AddedComplexity = 50 in
+def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
+           (TFRI_V4 tglobaladdr:$src1)>;
 
-def:  Pat <(i64 (sextloadi8 FoldGlobalAddr:$addr)),
-      (i64 (A2_sxtw (L4_loadrb_abs FoldGlobalAddr:$addr)))>;
-}
-// i16 -> i64 loads
+// i8/i16/i32 -> i64 loads
 // We need a complexity of 120 here to override preceding handling of
-// zextloadi16.
+// zextload.
 let AddedComplexity = 120 in {
-def:  Pat <(i64 (extloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
-      (i64 (A4_combineir 0, (L4_loadrh_abs tglobaladdr:$addr)))>,
-      Requires<[HasV4T]>;
-
-def:  Pat <(i64 (zextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
-      (i64 (A4_combineir 0, (L4_loadruh_abs tglobaladdr:$addr)))>,
-      Requires<[HasV4T]>;
+  def: Loadam_pat<extloadi8,   i64, addrga, Zext64, L4_loadrub_abs>;
+  def: Loadam_pat<sextloadi8,  i64, addrga, Sext64, L4_loadrb_abs>;
+  def: Loadam_pat<zextloadi8,  i64, addrga, Zext64, L4_loadrub_abs>;
 
-def:  Pat <(i64 (sextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
-      (i64 (A2_sxtw (L4_loadrh_abs tglobaladdr:$addr)))>,
-      Requires<[HasV4T]>;
+  def: Loadam_pat<extloadi16,  i64, addrga, Zext64, L4_loadruh_abs>;
+  def: Loadam_pat<sextloadi16, i64, addrga, Sext64, L4_loadrh_abs>;
+  def: Loadam_pat<zextloadi16, i64, addrga, Zext64, L4_loadruh_abs>;
 
-def:  Pat <(i64 (extloadi16 FoldGlobalAddr:$addr)),
-      (i64 (A4_combineir 0, (L4_loadrh_abs FoldGlobalAddr:$addr)))>,
-      Requires<[HasV4T]>;
-
-def:  Pat <(i64 (zextloadi16 FoldGlobalAddr:$addr)),
-      (i64 (A4_combineir 0, (L4_loadruh_abs FoldGlobalAddr:$addr)))>,
-      Requires<[HasV4T]>;
-
-def:  Pat <(i64 (sextloadi16 FoldGlobalAddr:$addr)),
-      (i64 (A2_sxtw (L4_loadrh_abs FoldGlobalAddr:$addr)))>,
-      Requires<[HasV4T]>;
+  def: Loadam_pat<extloadi32,  i64, addrga, Zext64, L4_loadri_abs>;
+  def: Loadam_pat<sextloadi32, i64, addrga, Sext64, L4_loadri_abs>;
+  def: Loadam_pat<zextloadi32, i64, addrga, Zext64, L4_loadri_abs>;
 }
-// i32->i64 loads
-// We need a complexity of 120 here to override preceding handling of
-// zextloadi32.
-let AddedComplexity = 120 in {
-def:  Pat <(i64 (extloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
-      (i64 (A4_combineir 0, (L4_loadri_abs tglobaladdr:$addr)))>,
-      Requires<[HasV4T]>;
 
-def:  Pat <(i64 (zextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
-      (i64 (A4_combineir 0, (L4_loadri_abs tglobaladdr:$addr)))>,
-      Requires<[HasV4T]>;
+let AddedComplexity = 100 in {
+  def: Loada_pat<extloadi8,   i32, addrgp, L4_loadrub_abs>;
+  def: Loada_pat<sextloadi8,  i32, addrgp, L4_loadrb_abs>;
+  def: Loada_pat<zextloadi8,  i32, addrgp, L4_loadrub_abs>;
 
-def:  Pat <(i64 (sextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))),
-      (i64 (A2_sxtw (L4_loadri_abs tglobaladdr:$addr)))>,
-      Requires<[HasV4T]>;
+  def: Loada_pat<extloadi16,  i32, addrgp, L4_loadruh_abs>;
+  def: Loada_pat<sextloadi16, i32, addrgp, L4_loadrh_abs>;
+  def: Loada_pat<zextloadi16, i32, addrgp, L4_loadruh_abs>;
 
-def:  Pat <(i64 (extloadi32 FoldGlobalAddr:$addr)),
-      (i64 (A4_combineir 0, (L4_loadri_abs FoldGlobalAddr:$addr)))>,
-      Requires<[HasV4T]>;
-
-def:  Pat <(i64 (zextloadi32 FoldGlobalAddr:$addr)),
-      (i64 (A4_combineir 0, (L4_loadri_abs FoldGlobalAddr:$addr)))>,
-      Requires<[HasV4T]>;
-
-def:  Pat <(i64 (sextloadi32 FoldGlobalAddr:$addr)),
-      (i64 (A2_sxtw (L4_loadri_abs FoldGlobalAddr:$addr)))>,
-      Requires<[HasV4T]>;
+  def: Loada_pat<load,        i32, addrgp, L4_loadri_abs>;
+  def: Loada_pat<load,        i64, addrgp, L4_loadrd_abs>;
 }
 
-// Indexed store double word - global address.
-// memw(Rs+#u6:2)=#S8
-let AddedComplexity = 10 in
-def STrih_offset_ext_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u6_1Imm:$src2, globaladdress:$src3),
-            "memh($src1+#$src2) = ##$src3",
-            [(truncstorei16 (HexagonCONST32 tglobaladdr:$src3),
-                    (add IntRegs:$src1, u6_1ImmPred:$src2))]>,
-            Requires<[HasV4T]>;
-// Map from store(globaladdress + x) -> memd(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(store (i64 DoubleRegs:$src1),
-                 FoldGlobalAddrGP:$addr),
-          (S2_storerdabs FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_64 FoldGlobalAddrGP:$addr,
-                           (i64 DoubleRegs:$src1)),
-          (S2_storerdabs FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
-          (S2_storerbabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
-            Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_8 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
-          (S2_storerbabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
-            Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memh(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
-          (S2_storerhabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
-            Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_16 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
-          (S2_storerhabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
-            Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memw(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(store (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
-          (S2_storeriabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
-           Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_32 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
-          (S2_storeriabs FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
-            Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memd(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i64 (load FoldGlobalAddrGP:$addr)),
-          (i64 (L4_loadrd_abs FoldGlobalAddrGP:$addr))>,
-           Requires<[HasV4T]>;
-
-def : Pat<(atomic_load_64 FoldGlobalAddrGP:$addr),
-          (i64 (L4_loadrd_abs FoldGlobalAddrGP:$addr))>,
-           Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i32 (extloadi8 FoldGlobalAddrGP:$addr)),
-          (i32 (L4_loadrb_abs FoldGlobalAddrGP:$addr))>,
-           Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i32 (sextloadi8 FoldGlobalAddrGP:$addr)),
-          (i32 (L4_loadrb_abs FoldGlobalAddrGP:$addr))>,
-           Requires<[HasV4T]>;
-
-//let AddedComplexity = 100 in
-let AddedComplexity = 100 in
-def : Pat<(i32 (extloadi16 FoldGlobalAddrGP:$addr)),
-          (i32 (L4_loadrh_abs FoldGlobalAddrGP:$addr))>,
-           Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memh(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i32 (sextloadi16 FoldGlobalAddrGP:$addr)),
-          (i32 (L4_loadrh_abs FoldGlobalAddrGP:$addr))>,
-           Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memuh(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i32 (zextloadi16 FoldGlobalAddrGP:$addr)),
-          (i32 (L4_loadruh_abs FoldGlobalAddrGP:$addr))>,
-           Requires<[HasV4T]>;
-
-def : Pat<(atomic_load_16 FoldGlobalAddrGP:$addr),
-          (i32 (L4_loadruh_abs FoldGlobalAddrGP:$addr))>,
-           Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memub(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i32 (zextloadi8 FoldGlobalAddrGP:$addr)),
-          (i32 (L4_loadrub_abs FoldGlobalAddrGP:$addr))>,
-           Requires<[HasV4T]>;
-
-def : Pat<(atomic_load_8 FoldGlobalAddrGP:$addr),
-          (i32 (L4_loadrub_abs FoldGlobalAddrGP:$addr))>,
-           Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memw(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(i32 (load FoldGlobalAddrGP:$addr)),
-          (i32 (L4_loadri_abs FoldGlobalAddrGP:$addr))>,
-           Requires<[HasV4T]>;
+let AddedComplexity = 100 in {
+  def: Storea_pat<truncstorei8,  I32, addrgp, S2_storerbabs>;
+  def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhabs>;
+  def: Storea_pat<store,         I32, addrgp, S2_storeriabs>;
+  def: Storea_pat<store,         I64, addrgp, S2_storerdabs>;
+}
 
-def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr),
-          (i32 (L4_loadri_abs FoldGlobalAddrGP:$addr))>,
-           Requires<[HasV4T]>;
+def: Loada_pat<atomic_load_8,  i32, addrgp, L4_loadrub_abs>;
+def: Loada_pat<atomic_load_16, i32, addrgp, L4_loadruh_abs>;
+def: Loada_pat<atomic_load_32, i32, addrgp, L4_loadri_abs>;
+def: Loada_pat<atomic_load_64, i64, addrgp, L4_loadrd_abs>;
+
+def: Storea_pat<SwapSt<atomic_store_8>,  I32, addrgp, S2_storerbabs>;
+def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhabs>;
+def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storeriabs>;
+def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdabs>;
+
+let Constraints = "@earlyclobber $dst" in
+def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b,
+                                                   IntRegs:$c, IntRegs:$d),
+  ".error \"Should never try to emit Insert4\"",
+  [(set (i64 DoubleRegs:$dst),
+        (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))),
+                         (i32 16)),
+                    (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))),
+                (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))),
+                     (i32 32))),
+            (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>;
 
 //===----------------------------------------------------------------------===//
 // :raw for of boundscheck:hi:lo insns
 //===----------------------------------------------------------------------===//
 
 // A4_boundscheck_lo: Detect if a register is within bounds.
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
 def A4_boundscheck_lo: ALU64Inst <
   (outs PredRegs:$Pd),
   (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
@@ -4146,7 +3945,7 @@ def A4_boundscheck_lo: ALU64Inst <
   }
 
 // A4_boundscheck_hi: Detect if a register is within bounds.
-let hasSideEffects = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 0 in
 def A4_boundscheck_hi: ALU64Inst <
   (outs PredRegs:$Pd),
   (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
@@ -4165,13 +3964,13 @@ def A4_boundscheck_hi: ALU64Inst <
     let Inst{12-8} = Rtt;
   }
 
-let hasSideEffects = 0 in
+let hasSideEffects = 0, isAsmParserOnly = 1 in
 def A4_boundscheck : MInst <
   (outs PredRegs:$Pd), (ins IntRegs:$Rs, DoubleRegs:$Rtt),
   "$Pd=boundscheck($Rs,$Rtt)">;
 
 // A4_tlbmatch: Detect if a VA/ASID matches a TLB entry.
-let isPredicateLate = 1, hasSideEffects = 0, isCodeGenOnly = 0 in
+let isPredicateLate = 1, hasSideEffects = 0 in
 def A4_tlbmatch : ALU64Inst<(outs PredRegs:$Pd),
   (ins DoubleRegs:$Rs, IntRegs:$Rt),
   "$Pd = tlbmatch($Rs, $Rt)",
@@ -4198,7 +3997,7 @@ def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
 
 // Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't
 // really do a load.
-let hasSideEffects = 1, mayLoad = 0, isCodeGenOnly = 0 in
+let hasSideEffects = 1, mayLoad = 0 in
 def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3),
       "dcfetch($Rs + #$u11_3)",
       [(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3)],
@@ -4220,12 +4019,12 @@ def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3),
 let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
     isPredicated = 1, isPredicatedNew = 1, isExtendable = 1,
     opExtentBits = 11, opExtentAlign = 2, opExtendable = 1,
-    isTerminator = 1, validSubTargets = HasV4SubT in
+    isTerminator = 1 in
 class CJInst_tstbit_R0<string px, bit np, string tnt>
   : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
   ""#px#" = tstbit($Rs, #0); if ("
     #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
-  [], "", COMPOUND, TypeCOMPOUND> {
+  [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
   bits<4> Rs;
   bits<11> r9_2;
 
@@ -4248,14 +4047,14 @@ class CJInst_tstbit_R0<string px, bit np, string tnt>
   let Inst{7-1} = r9_2{8-2};
 }
 
-let Defs = [PC, P0], Uses = [P0], isCodeGenOnly = 0 in {
+let Defs = [PC, P0], Uses = [P0] in {
   def J4_tstbit0_tp0_jump_nt : CJInst_tstbit_R0<"p0", 0, "nt">;
   def J4_tstbit0_tp0_jump_t : CJInst_tstbit_R0<"p0", 0, "t">;
   def J4_tstbit0_fp0_jump_nt : CJInst_tstbit_R0<"p0", 1, "nt">;
   def J4_tstbit0_fp0_jump_t : CJInst_tstbit_R0<"p0", 1, "t">;
 }
 
-let Defs = [PC, P1], Uses = [P1], isCodeGenOnly = 0 in {
+let Defs = [PC, P1], Uses = [P1] in {
   def J4_tstbit0_tp1_jump_nt : CJInst_tstbit_R0<"p1", 0, "nt">;
   def J4_tstbit0_tp1_jump_t : CJInst_tstbit_R0<"p1", 0, "t">;
   def J4_tstbit0_fp1_jump_nt : CJInst_tstbit_R0<"p1", 1, "nt">;
@@ -4266,12 +4065,12 @@ let Defs = [PC, P1], Uses = [P1], isCodeGenOnly = 0 in {
 let isBranch = 1, hasSideEffects = 0,
     isExtentSigned = 1, isPredicated = 1, isPredicatedNew = 1,
     isExtendable = 1, opExtentBits = 11, opExtentAlign = 2,
-    opExtendable = 2, isTerminator = 1, validSubTargets = HasV4SubT in
+    opExtendable = 2, isTerminator = 1 in
 class CJInst_RR<string px, string op, bit np, string tnt>
   : InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2),
   ""#px#" = cmp."#op#"($Rs, $Rt); if ("
    #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
-  [], "", COMPOUND, TypeCOMPOUND> {
+  [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
   bits<4> Rs;
   bits<4> Rt;
   bits<11> r9_2;
@@ -4314,21 +4113,18 @@ multiclass T_pnp_CJInst_RR<string op>{
   defm J4_cmp#NAME#_f : T_tnt_CJInst_RR<op, 1>;
 }
 // TypeCJ Instructions compare RR and jump
-let isCodeGenOnly = 0 in {
 defm eq : T_pnp_CJInst_RR<"eq">;
 defm gt : T_pnp_CJInst_RR<"gt">;
 defm gtu : T_pnp_CJInst_RR<"gtu">;
-}
 
 let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
     isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11,
-    opExtentAlign = 2, opExtendable = 2, isTerminator = 1,
-    validSubTargets = HasV4SubT in
+    opExtentAlign = 2, opExtendable = 2, isTerminator = 1 in
 class CJInst_RU5<string px, string op, bit np, string tnt>
   : InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2),
   ""#px#" = cmp."#op#"($Rs, #$U5); if ("
     #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
-  [], "", COMPOUND, TypeCOMPOUND> {
+  [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
   bits<4> Rs;
   bits<5> U5;
   bits<11> r9_2;
@@ -4371,21 +4167,19 @@ multiclass T_pnp_CJInst_RU5<string op>{
   defm J4_cmp#NAME#i_f : T_tnt_CJInst_RU5<op, 1>;
 }
 // TypeCJ Instructions compare RI and jump
-let isCodeGenOnly = 0 in {
 defm eq : T_pnp_CJInst_RU5<"eq">;
 defm gt : T_pnp_CJInst_RU5<"gt">;
 defm gtu : T_pnp_CJInst_RU5<"gtu">;
-}
 
 let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
     isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1,
     isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 1,
-    isTerminator = 1, validSubTargets = HasV4SubT in
+    isTerminator = 1 in
 class CJInst_Rn1<string px, string op, bit np, string tnt>
   : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
   ""#px#" = cmp."#op#"($Rs,#-1); if ("
   #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
-  [], "", COMPOUND, TypeCOMPOUND> {
+  [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
   bits<4> Rs;
   bits<11> r9_2;
 
@@ -4427,16 +4221,13 @@ multiclass T_pnp_CJInst_Rn1<string op>{
   defm J4_cmp#NAME#n1_f : T_tnt_CJInst_Rn1<op, 1>;
 }
 // TypeCJ Instructions compare -1 and jump
-let isCodeGenOnly = 0 in {
 defm eq : T_pnp_CJInst_Rn1<"eq">;
 defm gt : T_pnp_CJInst_Rn1<"gt">;
-}
 
 // J4_jumpseti: Direct unconditional jump and set register to immediate.
 let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
     isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
-    opExtentAlign = 2, opExtendable = 2, validSubTargets = HasV4SubT,
-    isCodeGenOnly = 0 in
+    opExtentAlign = 2, opExtendable = 2 in
 def J4_jumpseti: CJInst <
   (outs IntRegs:$Rd),
   (ins u6Imm:$U6, brtarget:$r9_2),
@@ -4456,8 +4247,7 @@ def J4_jumpseti: CJInst <
 // J4_jumpsetr: Direct unconditional jump and transfer register.
 let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
     isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
-    opExtentAlign = 2, opExtendable = 2, validSubTargets = HasV4SubT,
-    isCodeGenOnly = 0 in
+    opExtentAlign = 2, opExtendable = 2 in
 def J4_jumpsetr: CJInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs, brtarget:$r9_2),
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
index 5674aa3..337f4ea 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
@@ -15,7 +15,34 @@
 // XTYPE/MPY
 //===----------------------------------------------------------------------===//
 
-let isCodeGenOnly = 0 in
+  //Rdd[+]=vrmpybsu(Rss,Rtt)
+let Predicates = [HasV5T] in {
+  def M5_vrmpybsu: T_XTYPE_Vect<"vrmpybsu", 0b110, 0b001, 0>;
+  def M5_vrmacbsu: T_XTYPE_Vect_acc<"vrmpybsu", 0b110, 0b001, 0>;
+
+  //Rdd[+]=vrmpybu(Rss,Rtt)
+  def M5_vrmpybuu: T_XTYPE_Vect<"vrmpybu", 0b100, 0b001, 0>;
+  def M5_vrmacbuu: T_XTYPE_Vect_acc<"vrmpybu", 0b100, 0b001, 0>;
+
+  def M5_vdmpybsu: T_M2_vmpy<"vdmpybsu", 0b101, 0b001, 0, 0, 1>;
+  def M5_vdmacbsu: T_M2_vmpy_acc_sat <"vdmpybsu", 0b001, 0b001, 0, 0>;
+}
+
+// Vector multiply bytes
+// Rdd=vmpyb[s]u(Rs,Rt)
+let Predicates = [HasV5T] in {
+  def M5_vmpybsu: T_XTYPE_mpy64 <"vmpybsu", 0b010, 0b001, 0, 0, 0>;
+  def M5_vmpybuu: T_XTYPE_mpy64 <"vmpybu",  0b100, 0b001, 0, 0, 0>;
+
+  // Rxx+=vmpyb[s]u(Rs,Rt)
+  def M5_vmacbsu: T_XTYPE_mpy64_acc <"vmpybsu", "+", 0b110, 0b001, 0, 0, 0>;
+  def M5_vmacbuu: T_XTYPE_mpy64_acc <"vmpybu", "+", 0b100, 0b001, 0, 0, 0>;
+
+  // Rd=vaddhub(Rss,Rtt):sat
+  let hasNewValue = 1, opNewValue = 0 in
+    def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>;
+}
+
 def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm,
       [(set I64:$dst,
             (sra (i64 (add (i64 (sra I64:$src1, u6ImmPred:$src2)), 1)),
@@ -25,41 +52,43 @@ def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm,
   let Inst{13-8} = src2;
 }
 
-let isCodeGenOnly = 0 in
+let isAsmParserOnly = 1 in
+def S2_asr_i_p_rnd_goodsyntax
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+    "$dst = asrrnd($src1, #$src2)">;
+
 def C4_fastcorner9 : T_LOGICAL_2OP<"fastcorner9", 0b000, 0, 0>,
   Requires<[HasV5T]> {
   let Inst{13,7,4} = 0b111;
 }
 
-let isCodeGenOnly = 0 in
 def C4_fastcorner9_not : T_LOGICAL_2OP<"!fastcorner9", 0b000, 0, 0>,
   Requires<[HasV5T]> {
   let Inst{20,13,7,4} = 0b1111;
 }
 
-def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [
-                                            SDTCisVT<0, f32>,
-                                            SDTCisPtrTy<1>]>;
-def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32",     SDTHexagonFCONST32>;
+def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>,
+                                              SDTCisPtrTy<1>]>;
+def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>;
 
-let isReMaterializable = 1, isMoveImm = 1 in
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
 def FCONST32_nsdata : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
-              "$dst = CONST32(#$global)",
-              [(set (f32 IntRegs:$dst),
-              (HexagonFCONST32 tglobaladdr:$global))]>,
-               Requires<[HasV5T]>;
+                             "$dst = CONST32(#$global)",
+                             [(set F32:$dst,
+                              (HexagonFCONST32 tglobaladdr:$global))]>,
+                             Requires<[HasV5T]>;
 
-let isReMaterializable = 1, isMoveImm = 1 in
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
 def CONST64_Float_Real : LDInst<(outs DoubleRegs:$dst), (ins f64imm:$src1),
-                       "$dst = CONST64(#$src1)",
-                       [(set DoubleRegs:$dst, fpimm:$src1)]>,
-          Requires<[HasV5T]>;
+                                "$dst = CONST64(#$src1)",
+                                [(set F64:$dst, fpimm:$src1)]>,
+                                Requires<[HasV5T]>;
 
-let isReMaterializable = 1, isMoveImm = 1 in
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
 def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1),
-                       "$dst = CONST32(#$src1)",
-                       [(set IntRegs:$dst, fpimm:$src1)]>,
-          Requires<[HasV5T]>;
+                                "$dst = CONST32(#$src1)",
+                                [(set F32:$dst, fpimm:$src1)]>,
+                                Requires<[HasV5T]>;
 
 // Transfer immediate float.
 // Only works with single precision fp value.
@@ -68,35 +97,33 @@ def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1),
 // Make sure that complexity is more than the CONST32 pattern in
 // HexagonInstrInfo.td patterns.
 let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1,
-isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT,
-isCodeGenOnly = 1 in
+    isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT,
+    isCodeGenOnly = 1 in
 def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1),
-           "$dst = #$src1",
-           [(set IntRegs:$dst, fpimm:$src1)]>,
-          Requires<[HasV5T]>;
+                      "$dst = #$src1",
+                      [(set F32:$dst, fpimm:$src1)]>,
+                      Requires<[HasV5T]>;
 
 let isExtended = 1, opExtendable = 2, isPredicated = 1,
-hasSideEffects = 0, validSubTargets = HasV5SubT in
+    hasSideEffects = 0, validSubTargets = HasV5SubT, isCodeGenOnly = 1 in
 def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst),
                           (ins PredRegs:$src1, f32Ext:$src2),
-           "if ($src1) $dst = #$src2",
-           []>,
-          Requires<[HasV5T]>;
+                          "if ($src1) $dst = #$src2", []>,
+                          Requires<[HasV5T]>;
 
-let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1,
-hasSideEffects = 0, validSubTargets = HasV5SubT in
+let isPseudo = 1, isExtended = 1, opExtendable = 2, isPredicated = 1,
+    isPredicatedFalse = 1, hasSideEffects = 0, validSubTargets = HasV5SubT in
 def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst),
                              (ins PredRegs:$src1, f32Ext:$src2),
-           "if (!$src1) $dst =#$src2",
-           []>,
-          Requires<[HasV5T]>;
+                             "if (!$src1) $dst = #$src2", []>,
+                             Requires<[HasV5T]>;
 
 def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
                                            SDTCisVT<1, i64>]>;
 
 def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
 
-let hasNewValue = 1, validSubTargets = HasV5SubT, isCodeGenOnly = 0 in
+let hasNewValue = 1, validSubTargets = HasV5SubT in
 def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
   "$Rd = popcount($Rss)",
   [(set I32:$Rd, (HexagonPOPCOUNT I64:$Rss))], "", S_2op_tc_2_SLOT23>,
@@ -112,6 +139,14 @@ def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
     let Inst{20-16} = Rss;
   }
 
+defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
+defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
+
+defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
+defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
+def: Storex_simple_pat<store, F32, S2_storeri_io>;
+def: Storex_simple_pat<store, F64, S2_storerd_io>;
+
 let isFP = 1, hasNewValue = 1, opNewValue = 0 in
 class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp>
   : MInst<(outs IntRegs:$Rd),
@@ -134,27 +169,51 @@ class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp>
     let Inst{4-0} = Rd;
   }
 
-let isCommutable = 1, isCodeGenOnly = 0 in {
+let isCommutable = 1 in {
   def F2_sfadd : T_MInstFloat < "sfadd", 0b000, 0b000>;
   def F2_sfmpy : T_MInstFloat < "sfmpy", 0b010, 0b000>;
 }
 
-let isCodeGenOnly = 0 in
 def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>;
 
-let Itinerary = M_tc_3x_SLOT23, isCodeGenOnly = 0 in {
+def: Pat<(f32 (fadd F32:$src1, F32:$src2)),
+         (F2_sfadd F32:$src1, F32:$src2)>;
+
+def: Pat<(f32 (fsub F32:$src1, F32:$src2)),
+         (F2_sfsub F32:$src1, F32:$src2)>;
+
+def: Pat<(f32 (fmul F32:$src1, F32:$src2)),
+         (F2_sfmpy F32:$src1, F32:$src2)>;
+
+let Itinerary = M_tc_3x_SLOT23 in {
   def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>;
   def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>;
 }
 
-let isCodeGenOnly = 0 in {
+let AddedComplexity = 100, Predicates = [HasV5T] in {
+  def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)),
+                        F32:$src1, F32:$src2)),
+           (F2_sfmin F32:$src1, F32:$src2)>;
+
+  def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)),
+                        F32:$src2, F32:$src1)),
+           (F2_sfmin F32:$src1, F32:$src2)>;
+
+  def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)),
+                        F32:$src1, F32:$src2)),
+           (F2_sfmax F32:$src1, F32:$src2)>;
+
+  def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)),
+                        F32:$src2, F32:$src1)),
+           (F2_sfmax F32:$src1, F32:$src2)>;
+}
+
 def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>;
 def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>;
-}
 
 // F2_sfrecipa: Reciprocal approximation for division.
 let isPredicateLate = 1, isFP = 1,
-hasSideEffects = 0, hasNewValue = 1, isCodeGenOnly = 0 in
+hasSideEffects = 0, hasNewValue = 1 in
 def F2_sfrecipa: MInst <
   (outs IntRegs:$Rd, PredRegs:$Pe),
   (ins IntRegs:$Rs, IntRegs:$Rt),
@@ -210,7 +269,6 @@ class T_fcmp32 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
   let Inst{27-21} = 0b0111111;
 }
 
-let isCodeGenOnly = 0 in {
 def F2_dfcmpeq : T_fcmp64<"dfcmp.eq", setoeq, 0b000>;
 def F2_dfcmpgt : T_fcmp64<"dfcmp.gt", setogt, 0b001>;
 def F2_dfcmpge : T_fcmp64<"dfcmp.ge", setoge, 0b010>;
@@ -220,6 +278,250 @@ def F2_sfcmpge : T_fcmp32<"sfcmp.ge", setoge, 0b000>;
 def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", setuo,  0b001>;
 def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>;
 def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for ordered gt, ge, eq operations.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasV5T] in
+multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
+  // IntRegs
+  def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
+           (IntMI F32:$src1, F32:$src2)>;
+  // DoubleRegs
+  def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
+           (DoubleMI F64:$src1, F64:$src2)>;
+}
+
+defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>;
+defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>;
+defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations.
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
+  // IntRegs
+  def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
+           (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+                  (IntMI F32:$src1, F32:$src2))>;
+
+  // DoubleRegs
+  def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
+           (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+                  (DoubleMI F64:$src1, F64:$src2))>;
+}
+
+defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>;
+defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>;
+defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for the following dags:
+// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2))
+// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2)
+// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2)
+// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2))
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI,
+                         InstHexagon DoubleMI> {
+  // IntRegs
+  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+           (C2_not (IntMI F32:$src1, F32:$src2))>;
+  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+           (IntMI F32:$src1, F32:$src2)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+           (IntMI F32:$src1, F32:$src2)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+           (C2_not (IntMI F32:$src1, F32:$src2))>;
+
+  // DoubleRegs
+  def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+            (C2_not (DoubleMI F64:$src1, F64:$src2))>;
+  def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+            (DoubleMI F64:$src1, F64:$src2)>;
+  def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+            (DoubleMI F64:$src1, F64:$src2)>;
+  def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+            (C2_not (DoubleMI F64:$src1, F64:$src2))>;
+}
+
+defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>;
+defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>;
+defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for the following dags:
+// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1))
+// seteq(setolt(op1, op2), 1) -> setogt(op2, op1)
+// setne(setolt(op1, op2), 0) -> setogt(op2, op1)
+// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1))
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI,
+                         InstHexagon DoubleMI> {
+  // IntRegs
+  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+           (C2_not (IntMI F32:$src2, F32:$src1))>;
+  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+           (IntMI F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+           (IntMI F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+           (C2_not (IntMI F32:$src2, F32:$src1))>;
+
+  // DoubleRegs
+  def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+           (C2_not (DoubleMI F64:$src2, F64:$src1))>;
+  def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+           (DoubleMI F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+           (DoubleMI F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+           (C2_not (DoubleMI F64:$src2, F64:$src1))>;
+}
+
+defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>;
+defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>;
+
+
+// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (seto F32:$src1, F32:$src2)),
+           (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>;
+  def: Pat<(i1 (seto F32:$src1, fpimm:$src2)),
+           (C2_not (F2_sfcmpuo (TFRI_f fpimm:$src2), F32:$src1))>;
+  def: Pat<(i1 (seto F64:$src1, F64:$src2)),
+           (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>;
+  def: Pat<(i1 (seto F64:$src1, fpimm:$src2)),
+           (C2_not (F2_dfcmpuo (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
+}
+
+// Ordered lt.
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (setolt F32:$src1, F32:$src2)),
+           (F2_sfcmpgt F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setolt F32:$src1, fpimm:$src2)),
+           (F2_sfcmpgt (f32 (TFRI_f fpimm:$src2)), F32:$src1)>;
+  def: Pat<(i1 (setolt F64:$src1, F64:$src2)),
+           (F2_dfcmpgt F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setolt F64:$src1, fpimm:$src2)),
+           (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+}
+
+// Unordered lt.
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (setult F32:$src1, F32:$src2)),
+           (C2_or (F2_sfcmpuo  F32:$src1, F32:$src2),
+                  (F2_sfcmpgt F32:$src2, F32:$src1))>;
+  def: Pat<(i1 (setult F32:$src1, fpimm:$src2)),
+           (C2_or (F2_sfcmpuo  F32:$src1, (TFRI_f fpimm:$src2)),
+                  (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1))>;
+  def: Pat<(i1 (setult F64:$src1, F64:$src2)),
+           (C2_or (F2_dfcmpuo  F64:$src1, F64:$src2),
+                  (F2_dfcmpgt F64:$src2, F64:$src1))>;
+  def: Pat<(i1 (setult F64:$src1, fpimm:$src2)),
+           (C2_or (F2_dfcmpuo  F64:$src1, (CONST64_Float_Real fpimm:$src2)),
+                  (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
+}
+
+// Ordered le.
+let Predicates = [HasV5T] in {
+  // rs <= rt -> rt >= rs.
+  def: Pat<(i1 (setole F32:$src1, F32:$src2)),
+           (F2_sfcmpge F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setole F32:$src1, fpimm:$src2)),
+           (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>;
+
+  // Rss <= Rtt -> Rtt >= Rss.
+  def: Pat<(i1 (setole F64:$src1, F64:$src2)),
+           (F2_dfcmpge F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setole F64:$src1, fpimm:$src2)),
+           (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+}
+
+// Unordered le.
+let Predicates = [HasV5T] in {
+// rs <= rt -> rt >= rs.
+  def: Pat<(i1 (setule F32:$src1, F32:$src2)),
+           (C2_or (F2_sfcmpuo  F32:$src1, F32:$src2),
+                  (F2_sfcmpge F32:$src2, F32:$src1))>;
+  def: Pat<(i1 (setule F32:$src1, fpimm:$src2)),
+           (C2_or (F2_sfcmpuo  F32:$src1, (TFRI_f fpimm:$src2)),
+                  (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1))>;
+  def: Pat<(i1 (setule F64:$src1, F64:$src2)),
+           (C2_or (F2_dfcmpuo  F64:$src1, F64:$src2),
+                  (F2_dfcmpge F64:$src2, F64:$src1))>;
+  def: Pat<(i1 (setule F64:$src1, fpimm:$src2)),
+           (C2_or (F2_dfcmpuo  F64:$src1, (CONST64_Float_Real fpimm:$src2)),
+                  (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
+}
+
+// Ordered ne.
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (setone F32:$src1, F32:$src2)),
+           (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
+  def: Pat<(i1 (setone F64:$src1, F64:$src2)),
+           (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
+  def: Pat<(i1 (setone F32:$src1, fpimm:$src2)),
+           (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>;
+  def: Pat<(i1 (setone F64:$src1, fpimm:$src2)),
+           (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>;
+}
+
+// Unordered ne.
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (setune F32:$src1, F32:$src2)),
+           (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+                  (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>;
+  def: Pat<(i1 (setune F64:$src1, F64:$src2)),
+           (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+                  (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>;
+  def: Pat<(i1 (setune F32:$src1, fpimm:$src2)),
+           (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)),
+                  (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2))))>;
+  def: Pat<(i1 (setune F64:$src1, fpimm:$src2)),
+           (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)),
+                  (C2_not (F2_dfcmpeq F64:$src1,
+                                        (CONST64_Float_Real fpimm:$src2))))>;
+}
+
+// Besides set[o|u][comparions], we also need set[comparisons].
+let Predicates = [HasV5T] in {
+  // lt.
+  def: Pat<(i1 (setlt F32:$src1, F32:$src2)),
+           (F2_sfcmpgt F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setlt F32:$src1, fpimm:$src2)),
+           (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1)>;
+  def: Pat<(i1 (setlt F64:$src1, F64:$src2)),
+           (F2_dfcmpgt F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setlt F64:$src1, fpimm:$src2)),
+           (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+
+  // le.
+  // rs <= rt -> rt >= rs.
+  def: Pat<(i1 (setle F32:$src1, F32:$src2)),
+           (F2_sfcmpge F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setle F32:$src1, fpimm:$src2)),
+           (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>;
+
+  // Rss <= Rtt -> Rtt >= Rss.
+  def: Pat<(i1 (setle F64:$src1, F64:$src2)),
+           (F2_dfcmpge F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setle F64:$src1, fpimm:$src2)),
+           (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+
+  // ne.
+  def: Pat<(i1 (setne F32:$src1, F32:$src2)),
+           (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
+  def: Pat<(i1 (setne F64:$src1, F64:$src2)),
+           (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
+  def: Pat<(i1 (setne F32:$src1, fpimm:$src2)),
+           (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>;
+  def: Pat<(i1 (setne F64:$src1, fpimm:$src2)),
+           (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>;
 }
 
 // F2 convert template classes:
@@ -302,7 +604,6 @@ class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp,
   }
 
 // Convert single precision to double precision and vice-versa.
-let isCodeGenOnly = 0 in {
 def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000,
                                        fextend, F64, F32>;
 
@@ -364,10 +665,9 @@ let AddedComplexity = 20, Predicates = [HasV5T, IEEERndNearV5T] in {
   def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000,
                                          fp_to_sint, I32, F32>;
 }
-}
 
 // Fix up radicand.
-let isFP = 1, hasNewValue = 1, isCodeGenOnly = 0 in
+let isFP = 1, hasNewValue = 1 in
 def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs),
   "$Rd = sffixupr($Rs)",
   [], "" , S_2op_tc_3or4x_SLOT23>, Requires<[HasV5T]> {
@@ -382,6 +682,14 @@ def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs),
     let Inst{4-0}   = Rd;
   }
 
+// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
+let Predicates = [HasV5T] in {
+  def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>;
+  def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>;
+  def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>;
+  def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>;
+}
+
 // F2_sffma: Floating-point fused multiply add.
 let isFP = 1, hasNewValue = 1 in
 class T_sfmpy_acc <bit isSub, bit isLib>
@@ -406,15 +714,16 @@ class T_sfmpy_acc <bit isSub, bit isLib>
     let Inst{4-0}   = Rx;
   }
 
-let isCodeGenOnly = 0 in {
 def F2_sffma: T_sfmpy_acc <0, 0>;
 def F2_sffms: T_sfmpy_acc <1, 0>;
 def F2_sffma_lib: T_sfmpy_acc <0, 1>;
 def F2_sffms_lib: T_sfmpy_acc <1, 1>;
-}
+
+def : Pat <(f32 (fma F32:$src2, F32:$src3, F32:$src1)),
+           (F2_sffma F32:$src1, F32:$src2, F32:$src3)>;
 
 // Floating-point fused multiply add w/ additional scaling (2**pu).
-let isFP = 1, hasNewValue = 1, isCodeGenOnly = 0 in
+let isFP = 1, hasNewValue = 1 in
 def F2_sffma_sc: MInst <
   (outs IntRegs:$Rx),
   (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu),
@@ -437,11 +746,147 @@ def F2_sffma_sc: MInst <
     let Inst{4-0}   = Rx;
   }
 
+let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 3,
+    isPseudo = 1, InputType = "imm" in
+def MUX_ir_f : ALU32_rr<(outs IntRegs:$dst),
+      (ins PredRegs:$src1, IntRegs:$src2, f32Ext:$src3),
+      "$dst = mux($src1, $src2, #$src3)",
+      [(set F32:$dst, (f32 (select I1:$src1, F32:$src2, fpimm:$src3)))]>,
+    Requires<[HasV5T]>;
+
+let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 2,
+    isPseudo = 1, InputType = "imm" in
+def MUX_ri_f : ALU32_rr<(outs IntRegs:$dst),
+      (ins PredRegs:$src1, f32Ext:$src2, IntRegs:$src3),
+      "$dst = mux($src1, #$src2, $src3)",
+      [(set F32:$dst, (f32 (select I1:$src1, fpimm:$src2, F32:$src3)))]>,
+    Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F32:$src2, F32:$src3),
+         (C2_mux I1:$src1, F32:$src2, F32:$src3)>,
+     Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4),
+         (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>,
+     Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F64:$src2, F64:$src3),
+         (C2_vmux I1:$src1, F64:$src2, F64:$src3)>,
+    Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4),
+         (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>,
+     Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = MUX_ir_f(p0, #i, r1)
+def: Pat<(select (not I1:$src1), fpimm:$src2, F32:$src3),
+         (MUX_ir_f I1:$src1, F32:$src3, fpimm:$src2)>,
+     Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = MUX_ri_f(p0, r1, #i)
+def: Pat<(select (not I1:$src1), F32:$src2, fpimm:$src3),
+         (MUX_ri_f I1:$src1, fpimm:$src3, F32:$src2)>,
+     Requires<[HasV5T]>;
+
+def: Pat<(i32 (fp_to_sint F64:$src1)),
+         (LoReg (F2_conv_df2d_chop F64:$src1))>,
+     Requires<[HasV5T]>;
+
+//===----------------------------------------------------------------------===//
+// :natural forms of vasrh and vasrhub insns
+//===----------------------------------------------------------------------===//
+// S5_asrhub_rnd_sat: Vector arithmetic shift right by immediate with round,
+// saturate, and pack.
+let Defs = [USR_OVF], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_ASRHUB<bit isSat>
+  : SInst <(outs IntRegs:$Rd),
+  (ins DoubleRegs:$Rss, u4Imm:$u4),
+  "$Rd = vasrhub($Rss, #$u4):"#!if(isSat, "sat", "raw"),
+  [], "", S_2op_tc_2_SLOT23>,
+  Requires<[HasV5T]> {
+    bits<5> Rd;
+    bits<5> Rss;
+    bits<4> u4;
+
+    let IClass = 0b1000;
+
+    let Inst{27-21} = 0b1000011;
+    let Inst{20-16} = Rss;
+    let Inst{13-12} = 0b00;
+    let Inst{11-8} = u4;
+    let Inst{7-6} = 0b10;
+    let Inst{5} = isSat;
+    let Inst{4-0} = Rd;
+  }
+
+def S5_asrhub_rnd_sat : T_ASRHUB <0>;
+def S5_asrhub_sat : T_ASRHUB <1>;
+
+let isAsmParserOnly = 1 in
+def S5_asrhub_rnd_sat_goodsyntax
+  : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4Imm:$u4),
+  "$Rd = vasrhub($Rss, #$u4):rnd:sat">, Requires<[HasV5T]>;
+
+// S5_vasrhrnd: Vector arithmetic shift right by immediate with round.
+let hasSideEffects = 0 in
+def S5_vasrhrnd : SInst <(outs DoubleRegs:$Rdd),
+                         (ins DoubleRegs:$Rss, u4Imm:$u4),
+  "$Rdd = vasrh($Rss, #$u4):raw">,
+  Requires<[HasV5T]> {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<4> u4;
+
+    let IClass = 0b1000;
+
+    let Inst{27-21} = 0b0000001;
+    let Inst{20-16} = Rss;
+    let Inst{13-12} = 0b00;
+    let Inst{11-8}  = u4;
+    let Inst{7-5}   = 0b000;
+    let Inst{4-0}   = Rdd;
+  }
+
+let isAsmParserOnly = 1 in
+def S5_vasrhrnd_goodsyntax
+  : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, u4Imm:$u4),
+  "$Rdd = vasrh($Rss,#$u4):rnd">, Requires<[HasV5T]>;
+
+// Floating point reciprocal square root approximation
+let Uses = [USR], isPredicateLate = 1, isFP = 1,
+    hasSideEffects = 0, hasNewValue = 1, opNewValue = 0,
+    validSubTargets = HasV5SubT in
+def F2_sfinvsqrta: SInst <
+  (outs IntRegs:$Rd, PredRegs:$Pe),
+  (ins IntRegs:$Rs),
+  "$Rd, $Pe = sfinvsqrta($Rs)" > ,
+  Requires<[HasV5T]> {
+    bits<5> Rd;
+    bits<2> Pe;
+    bits<5> Rs;
+
+    let IClass = 0b1000;
+
+    let Inst{27-21} = 0b1011111;
+    let Inst{20-16} = Rs;
+    let Inst{7} = 0b0;
+    let Inst{6-5} = Pe;
+    let Inst{4-0} = Rd;
+  }
+
+// Complex multiply 32x16
+let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
+  def M4_cmpyi_whc : T_S3op_8<"cmpyiwh", 0b101, 1, 1, 1, 1>;
+  def M4_cmpyr_whc : T_S3op_8<"cmpyrwh", 0b111, 1, 1, 1, 1>;
+}
+
 // Classify floating-point value
-let isFP = 1, isCodeGenOnly = 0 in
+let isFP = 1 in
  def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>;
 
-let isFP = 1, isCodeGenOnly = 0 in
+let isFP = 1 in
 def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5),
   "$Pd = dfclass($Rss, #$u5)",
   [], "" , ALU64_tc_2early_SLOT23 > , Requires<[HasV5T]> {
@@ -459,7 +904,6 @@ def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5),
   }
 
 // Instructions to create floating point constant
-let hasNewValue = 1, opNewValue = 0 in
 class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg>
   : ALU64Inst<(outs RC:$dst), (ins u10Imm:$src),
   "$dst = "#mnemonic#"(#$src)"#!if(isNeg, ":neg", ":pos"),
@@ -476,546 +920,13 @@ class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg>
     let Inst{4-0}   = dst;
   }
 
-let isCodeGenOnly = 0 in {
+let hasNewValue = 1, opNewValue = 0 in {
 def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>;
 def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>;
-def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>;
-def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>;
 }
 
-// Convert single precision to double precision and vice-versa.
-def CONVERT_sf2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
-                "$dst = convert_sf2df($src)",
-                [(set DoubleRegs:$dst, (fextend IntRegs:$src))]>,
-          Requires<[HasV5T]>;
-
-def CONVERT_df2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
-                "$dst = convert_df2sf($src)",
-                [(set IntRegs:$dst, (fround DoubleRegs:$src))]>,
-          Requires<[HasV5T]>;
-
-
-// Load.
-def LDrid_f : LDInst<(outs DoubleRegs:$dst),
-            (ins MEMri:$addr),
-            "$dst = memd($addr)",
-            [(set DoubleRegs:$dst, (f64 (load ADDRriS11_3:$addr)))]>,
-          Requires<[HasV5T]>;
-
-
-let AddedComplexity = 20 in
-def LDrid_indexed_f : LDInst<(outs DoubleRegs:$dst),
-            (ins IntRegs:$src1, s11_3Imm:$offset),
-            "$dst = memd($src1+#$offset)",
-            [(set DoubleRegs:$dst, (f64 (load (add IntRegs:$src1,
-                                              s11_3ImmPred:$offset))))]>,
-          Requires<[HasV5T]>;
-
-def LDriw_f : LDInst<(outs IntRegs:$dst),
-            (ins MEMri:$addr), "$dst = memw($addr)",
-            [(set IntRegs:$dst, (f32 (load ADDRriS11_2:$addr)))]>,
-          Requires<[HasV5T]>;
-
-
-let AddedComplexity = 20 in
-def LDriw_indexed_f : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s11_2Imm:$offset),
-            "$dst = memw($src1+#$offset)",
-            [(set IntRegs:$dst, (f32 (load (add IntRegs:$src1,
-                                           s11_2ImmPred:$offset))))]>,
-          Requires<[HasV5T]>;
-
-// Store.
-def STriw_f : STInst<(outs),
-            (ins MEMri:$addr, IntRegs:$src1),
-            "memw($addr) = $src1",
-            [(store (f32 IntRegs:$src1), ADDRriS11_2:$addr)]>,
-          Requires<[HasV5T]>;
-
-let AddedComplexity = 10 in
-def STriw_indexed_f : STInst<(outs),
-            (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
-            "memw($src1+#$src2) = $src3",
-            [(store (f32 IntRegs:$src3),
-                (add IntRegs:$src1, s11_2ImmPred:$src2))]>,
-          Requires<[HasV5T]>;
-
-def STrid_f : STInst<(outs),
-            (ins MEMri:$addr, DoubleRegs:$src1),
-            "memd($addr) = $src1",
-            [(store (f64 DoubleRegs:$src1), ADDRriS11_2:$addr)]>,
-          Requires<[HasV5T]>;
-
-// Indexed store double word.
-let AddedComplexity = 10 in
-def STrid_indexed_f : STInst<(outs),
-            (ins IntRegs:$src1, s11_3Imm:$src2,  DoubleRegs:$src3),
-            "memd($src1+#$src2) = $src3",
-            [(store (f64 DoubleRegs:$src3),
-                                (add IntRegs:$src1, s11_3ImmPred:$src2))]>,
-          Requires<[HasV5T]>;
-
-
-// Add
-let isCommutable = 1 in
-def fADD_rr : ALU64_rr<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2),
-            "$dst = sfadd($src1, $src2)",
-            [(set IntRegs:$dst, (fadd IntRegs:$src1, IntRegs:$src2))]>,
-          Requires<[HasV5T]>;
-
-let isCommutable = 1 in
-def fADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
-                                                     DoubleRegs:$src2),
-               "$dst = dfadd($src1, $src2)",
-               [(set DoubleRegs:$dst, (fadd DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>,
-          Requires<[HasV5T]>;
-
-def fSUB_rr : ALU64_rr<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2),
-            "$dst = sfsub($src1, $src2)",
-            [(set IntRegs:$dst, (fsub IntRegs:$src1, IntRegs:$src2))]>,
-          Requires<[HasV5T]>;
-
-def fSUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
-                                                     DoubleRegs:$src2),
-               "$dst = dfsub($src1, $src2)",
-               [(set DoubleRegs:$dst, (fsub DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>,
-               Requires<[HasV5T]>;
-
-let isCommutable = 1 in
-def fMUL_rr : ALU64_rr<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2),
-            "$dst = sfmpy($src1, $src2)",
-            [(set IntRegs:$dst, (fmul IntRegs:$src1, IntRegs:$src2))]>,
-            Requires<[HasV5T]>;
-
-let isCommutable = 1 in
-def fMUL64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
-                                                     DoubleRegs:$src2),
-               "$dst = dfmpy($src1, $src2)",
-               [(set DoubleRegs:$dst, (fmul DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>,
-               Requires<[HasV5T]>;
-
-// Compare.
-let isCompare = 1 in {
-multiclass FCMP64_rr<string OpcStr, PatFrag OpNode> {
-  def _rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
-                 [(set PredRegs:$dst,
-                        (OpNode (f64 DoubleRegs:$b), (f64 DoubleRegs:$c)))]>,
-                 Requires<[HasV5T]>;
-}
-
-multiclass FCMP32_rr<string OpcStr, PatFrag OpNode> {
-  def _rr : ALU64_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
-                 [(set PredRegs:$dst,
-                        (OpNode (f32 IntRegs:$b), (f32 IntRegs:$c)))]>,
-                 Requires<[HasV5T]>;
-}
-}
-
-defm FCMPOEQ64 : FCMP64_rr<"dfcmp.eq", setoeq>;
-defm FCMPUEQ64 : FCMP64_rr<"dfcmp.eq", setueq>;
-defm FCMPOGT64 : FCMP64_rr<"dfcmp.gt", setogt>;
-defm FCMPUGT64 : FCMP64_rr<"dfcmp.gt", setugt>;
-defm FCMPOGE64 : FCMP64_rr<"dfcmp.ge", setoge>;
-defm FCMPUGE64 : FCMP64_rr<"dfcmp.ge", setuge>;
-
-defm FCMPOEQ32 : FCMP32_rr<"sfcmp.eq", setoeq>;
-defm FCMPUEQ32 : FCMP32_rr<"sfcmp.eq", setueq>;
-defm FCMPOGT32 : FCMP32_rr<"sfcmp.gt", setogt>;
-defm FCMPUGT32 : FCMP32_rr<"sfcmp.gt", setugt>;
-defm FCMPOGE32 : FCMP32_rr<"sfcmp.ge", setoge>;
-defm FCMPUGE32 : FCMP32_rr<"sfcmp.ge", setuge>;
-
-// olt.
-def : Pat <(i1 (setolt (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
-      (i1 (FCMPOGT32_rr IntRegs:$src2, IntRegs:$src1))>,
-      Requires<[HasV5T]>;
-
-def : Pat <(i1 (setolt (f32 IntRegs:$src1), (fpimm:$src2))),
-      (i1 (FCMPOGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>,
-      Requires<[HasV5T]>;
-
-def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
-      (i1 (FCMPOGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
-      Requires<[HasV5T]>;
-
-def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (fpimm:$src2))),
-      (i1 (FCMPOGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
-                        (f64 DoubleRegs:$src1)))>,
-      Requires<[HasV5T]>;
-
-// gt.
-def : Pat <(i1 (setugt (f64 DoubleRegs:$src1), (fpimm:$src2))),
-      (i1 (FCMPUGT64_rr (f64 DoubleRegs:$src1),
-                        (f64 (CONST64_Float_Real fpimm:$src2))))>,
-      Requires<[HasV5T]>;
-
-def : Pat <(i1 (setugt (f32 IntRegs:$src1), (fpimm:$src2))),
-      (i1 (FCMPUGT32_rr (f32 IntRegs:$src1), (f32 (TFRI_f fpimm:$src2))))>,
-      Requires<[HasV5T]>;
-
-// ult.
-def : Pat <(i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
-      (i1 (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1))>,
-      Requires<[HasV5T]>;
-
-def : Pat <(i1 (setult (f32 IntRegs:$src1), (fpimm:$src2))),
-      (i1 (FCMPUGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>,
-      Requires<[HasV5T]>;
-
-def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
-      (i1 (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
-      Requires<[HasV5T]>;
-
-def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (fpimm:$src2))),
-      (i1 (FCMPUGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
-                        (f64 DoubleRegs:$src1)))>,
-      Requires<[HasV5T]>;
-
-// le.
-// rs <= rt -> rt >= rs.
-def : Pat<(i1 (setole (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
-      (i1 (FCMPOGE32_rr IntRegs:$src2, IntRegs:$src1))>,
-      Requires<[HasV5T]>;
-
-def : Pat<(i1 (setole (f32 IntRegs:$src1), (fpimm:$src2))),
-      (i1 (FCMPOGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>,
-      Requires<[HasV5T]>;
-
-
-// Rss <= Rtt -> Rtt >= Rss.
-def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
-      (i1 (FCMPOGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
-      Requires<[HasV5T]>;
-
-def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (fpimm:$src2))),
-      (i1 (FCMPOGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
-                                DoubleRegs:$src1))>,
-      Requires<[HasV5T]>;
-
-// rs <= rt -> rt >= rs.
-def : Pat<(i1 (setule (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
-      (i1 (FCMPUGE32_rr IntRegs:$src2, IntRegs:$src1))>,
-      Requires<[HasV5T]>;
-
-def : Pat<(i1 (setule (f32 IntRegs:$src1), (fpimm:$src2))),
-      (i1 (FCMPUGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>,
-      Requires<[HasV5T]>;
-
-// Rss <= Rtt -> Rtt >= Rss.
-def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
-      (i1 (FCMPUGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>,
-      Requires<[HasV5T]>;
-
-def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (fpimm:$src2))),
-      (i1 (FCMPUGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)),
-                                DoubleRegs:$src1))>,
-      Requires<[HasV5T]>;
-
-// ne.
-def : Pat<(i1 (setone (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
-      (i1 (C2_not (FCMPOEQ32_rr IntRegs:$src1, IntRegs:$src2)))>,
-      Requires<[HasV5T]>;
-
-def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
-      (i1 (C2_not (FCMPOEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>,
-      Requires<[HasV5T]>;
-
-def : Pat<(i1 (setune (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
-      (i1 (C2_not (FCMPUEQ32_rr IntRegs:$src1, IntRegs:$src2)))>,
-      Requires<[HasV5T]>;
-
-def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
-      (i1 (C2_not (FCMPUEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>,
-      Requires<[HasV5T]>;
-
-def : Pat<(i1 (setone (f32 IntRegs:$src1), (fpimm:$src2))),
-      (i1 (C2_not (FCMPOEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>,
-      Requires<[HasV5T]>;
-
-def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (fpimm:$src2))),
-      (i1 (C2_not (FCMPOEQ64_rr DoubleRegs:$src1,
-                              (f64 (CONST64_Float_Real fpimm:$src2)))))>,
-      Requires<[HasV5T]>;
-
-def : Pat<(i1 (setune (f32 IntRegs:$src1), (fpimm:$src2))),
-      (i1 (C2_not (FCMPUEQ32_rr IntRegs:$src1,  (f32 (TFRI_f fpimm:$src2)))))>,
-      Requires<[HasV5T]>;
-
-def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (fpimm:$src2))),
-      (i1 (C2_not (FCMPUEQ64_rr DoubleRegs:$src1,
-                              (f64 (CONST64_Float_Real fpimm:$src2)))))>,
-      Requires<[HasV5T]>;
-
-// Convert Integer to Floating Point.
-def CONVERT_d2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
-              "$dst = convert_d2sf($src)",
-              [(set (f32 IntRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_ud2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
-              "$dst = convert_ud2sf($src)",
-              [(set (f32 IntRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_uw2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
-              "$dst = convert_uw2sf($src)",
-              [(set (f32 IntRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_w2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
-              "$dst = convert_w2sf($src)",
-              [(set (f32 IntRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_d2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
-              "$dst = convert_d2df($src)",
-              [(set (f64 DoubleRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_ud2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
-              "$dst = convert_ud2df($src)",
-              [(set (f64 DoubleRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_uw2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
-              "$dst = convert_uw2df($src)",
-              [(set (f64 DoubleRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_w2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
-              "$dst = convert_w2df($src)",
-              [(set (f64 DoubleRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-// Convert Floating Point to Integer - default.
-def CONVERT_df2uw : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
-              "$dst = convert_df2uw($src):chop",
-              [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_df2w : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
-              "$dst = convert_df2w($src):chop",
-              [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_sf2uw : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
-              "$dst = convert_sf2uw($src):chop",
-              [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_sf2w : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
-              "$dst = convert_sf2w($src):chop",
-              [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_df2d : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
-              "$dst = convert_df2d($src):chop",
-              [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_df2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
-              "$dst = convert_df2ud($src):chop",
-              [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_sf2d : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
-              "$dst = convert_sf2d($src):chop",
-              [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-def CONVERT_sf2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
-              "$dst = convert_sf2ud($src):chop",
-              [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
-              Requires<[HasV5T]>;
-
-// Convert Floating Point to Integer: non-chopped.
-let AddedComplexity = 20 in
-def CONVERT_df2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
-              "$dst = convert_df2uw($src)",
-              [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
-              Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_df2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
-              "$dst = convert_df2w($src)",
-              [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
-              Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_sf2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
-              "$dst = convert_sf2uw($src)",
-              [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
-              Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_sf2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
-              "$dst = convert_sf2w($src)",
-              [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
-              Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_df2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
-              "$dst = convert_df2d($src)",
-              [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
-              Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_df2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
-              "$dst = convert_df2ud($src)",
-              [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
-              Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_sf2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
-              "$dst = convert_sf2d($src)",
-              [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
-              Requires<[HasV5T, IEEERndNearV5T]>;
-
-let AddedComplexity = 20 in
-def CONVERT_sf2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
-              "$dst = convert_sf2ud($src)",
-              [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
-              Requires<[HasV5T, IEEERndNearV5T]>;
-
-
-
-// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
-def : Pat <(i32 (bitconvert (f32 IntRegs:$src))),
-           (i32 (A2_tfr IntRegs:$src))>,
-          Requires<[HasV5T]>;
-
-def : Pat <(f32 (bitconvert (i32 IntRegs:$src))),
-           (f32 (A2_tfr IntRegs:$src))>,
-          Requires<[HasV5T]>;
-
-def : Pat <(i64 (bitconvert (f64 DoubleRegs:$src))),
-           (i64 (A2_tfrp DoubleRegs:$src))>,
-          Requires<[HasV5T]>;
-
-def : Pat <(f64 (bitconvert (i64 DoubleRegs:$src))),
-           (f64 (A2_tfrp DoubleRegs:$src))>,
-          Requires<[HasV5T]>;
-
-def FMADD_sp : ALU64_acc<(outs IntRegs:$dst),
-                  (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-              "$dst += sfmpy($src2, $src3)",
-              [(set (f32 IntRegs:$dst),
-                  (fma IntRegs:$src2, IntRegs:$src3, IntRegs:$src1))],
-                  "$src1 = $dst">,
-              Requires<[HasV5T]>;
-
-
-// Floating point max/min.
-
-let AddedComplexity = 100 in
-def FMAX_sp : ALU64_rr<(outs IntRegs:$dst),
-                  (ins IntRegs:$src1, IntRegs:$src2),
-              "$dst = sfmax($src1, $src2)",
-              [(set IntRegs:$dst, (f32 (select (i1 (setolt IntRegs:$src2,
-                                                        IntRegs:$src1)),
-                                             IntRegs:$src1,
-                                             IntRegs:$src2)))]>,
-               Requires<[HasV5T]>;
-
-let AddedComplexity = 100 in
-def FMIN_sp : ALU64_rr<(outs IntRegs:$dst),
-                  (ins IntRegs:$src1, IntRegs:$src2),
-              "$dst = sfmin($src1, $src2)",
-              [(set IntRegs:$dst, (f32 (select (i1 (setogt IntRegs:$src2,
-                                                        IntRegs:$src1)),
-                                             IntRegs:$src1,
-                                             IntRegs:$src2)))]>,
-               Requires<[HasV5T]>;
-
-// Pseudo instruction to encode a set of conditional transfers.
-// This instruction is used instead of a mux and trades-off codesize
-// for performance. We conduct this transformation optimistically in
-// the hope that these instructions get promoted to dot-new transfers.
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_rr_f : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
-                                                        IntRegs:$src2,
-                                                        IntRegs:$src3),
-                     "Error; should not emit",
-                     [(set IntRegs:$dst, (f32 (select PredRegs:$src1,
-                                                 IntRegs:$src2,
-                                                 IntRegs:$src3)))]>,
-               Requires<[HasV5T]>;
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_rr64_f : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
-                                                        DoubleRegs:$src2,
-                                                        DoubleRegs:$src3),
-                     "Error; should not emit",
-                     [(set DoubleRegs:$dst, (f64 (select PredRegs:$src1,
-                                                 DoubleRegs:$src2,
-                                                 DoubleRegs:$src3)))]>,
-               Requires<[HasV5T]>;
-
-
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ri_f : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, f32imm:$src3),
-            "Error; should not emit",
-            [(set IntRegs:$dst,
-             (f32 (select PredRegs:$src1, IntRegs:$src2, fpimm:$src3)))]>,
-               Requires<[HasV5T]>;
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ir_f : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, f32imm:$src2, IntRegs:$src3),
-            "Error; should not emit",
-            [(set IntRegs:$dst,
-             (f32 (select PredRegs:$src1, fpimm:$src2, IntRegs:$src3)))]>,
-               Requires<[HasV5T]>;
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ii_f : ALU32_rr<(outs IntRegs:$dst),
-                              (ins PredRegs:$src1, f32imm:$src2, f32imm:$src3),
-                     "Error; should not emit",
-                     [(set IntRegs:$dst, (f32 (select PredRegs:$src1,
-                                                 fpimm:$src2,
-                                                 fpimm:$src3)))]>,
-               Requires<[HasV5T]>;
-
-
-def : Pat <(select (i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
-                   (f32 IntRegs:$src3),
-                   (f32 IntRegs:$src4)),
-    (TFR_condset_rr_f (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1), IntRegs:$src4,
-                      IntRegs:$src3)>, Requires<[HasV5T]>;
-
-def : Pat <(select (i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
-                   (f64 DoubleRegs:$src3),
-                   (f64 DoubleRegs:$src4)),
-      (TFR_condset_rr64_f (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1),
-                DoubleRegs:$src4, DoubleRegs:$src3)>, Requires<[HasV5T]>;
-
-// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
-def : Pat <(select (not PredRegs:$src1), fpimm:$src2, fpimm:$src3),
-      (TFR_condset_ii_f PredRegs:$src1, fpimm:$src3, fpimm:$src2)>;
-
-// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
-// => r0 = TFR_condset_ri(p0, r1, #i)
-def : Pat <(select (not PredRegs:$src1), fpimm:$src2, IntRegs:$src3),
-      (TFR_condset_ri_f PredRegs:$src1, IntRegs:$src3, fpimm:$src2)>;
-
-// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
-// => r0 = TFR_condset_ir(p0, #i, r1)
-def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, fpimm:$src3),
-      (TFR_condset_ir_f PredRegs:$src1, fpimm:$src3, IntRegs:$src2)>;
-
-def : Pat <(i32 (fp_to_sint (f64 DoubleRegs:$src1))),
-          (i32 (EXTRACT_SUBREG (i64 (CONVERT_df2d (f64 DoubleRegs:$src1))), subreg_loreg))>,
-          Requires<[HasV5T]>;
+def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>;
+def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>;
 
 def : Pat <(fabs (f32 IntRegs:$src1)),
            (S2_clrbit_i (f32 IntRegs:$src1), 31)>,
@@ -1024,13 +935,3 @@ def : Pat <(fabs (f32 IntRegs:$src1)),
 def : Pat <(fneg (f32 IntRegs:$src1)),
            (S2_togglebit_i (f32 IntRegs:$src1), 31)>,
           Requires<[HasV5T]>;
-
-/*
-def : Pat <(fabs (f64 DoubleRegs:$src1)),
-          (S2_clrbit_i (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>,
-          Requires<[HasV5T]>;
-
-def : Pat <(fabs (f64 DoubleRegs:$src1)),
-          (S2_clrbit_i (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>,
-          Requires<[HasV5T]>;
-          */
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
new file mode 100644
index 0000000..f4fb946
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
@@ -0,0 +1,483 @@
+//===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon Vector instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def V2I1:  PatLeaf<(v2i1  PredRegs:$R)>;
+def V4I1:  PatLeaf<(v4i1  PredRegs:$R)>;
+def V8I1:  PatLeaf<(v8i1  PredRegs:$R)>;
+def V4I8:  PatLeaf<(v4i8  IntRegs:$R)>;
+def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;
+def V8I8:  PatLeaf<(v8i8  DoubleRegs:$R)>;
+def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
+def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
+
+
+multiclass bitconvert_32<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a IntRegs:$src))),
+             (b IntRegs:$src)>;
+  def : Pat <(a (bitconvert (b IntRegs:$src))),
+             (a IntRegs:$src)>;
+}
+
+multiclass bitconvert_64<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a DoubleRegs:$src))),
+             (b DoubleRegs:$src)>;
+  def : Pat <(a (bitconvert (b DoubleRegs:$src))),
+             (a DoubleRegs:$src)>;
+}
+
+// Bit convert vector types.
+defm : bitconvert_32<v4i8, i32>;
+defm : bitconvert_32<v2i16, i32>;
+defm : bitconvert_32<v2i16, v4i8>;
+
+defm : bitconvert_64<v8i8, i64>;
+defm : bitconvert_64<v4i16, i64>;
+defm : bitconvert_64<v2i32, i64>;
+defm : bitconvert_64<v8i8, v4i16>;
+defm : bitconvert_64<v8i8, v2i32>;
+defm : bitconvert_64<v4i16, v2i32>;
+
+
+// Vector shift support. Vector shifting in Hexagon is rather different
+// from internal representation of LLVM.
+// LLVM assumes all shifts (in vector case) will have the form
+// <VT> = SHL/SRA/SRL <VT> by <VT>
+// while Hexagon has the following format:
+// <VT> = SHL/SRA/SRL <VT> by <IT/i32>
+// As a result, special care is needed to guarantee correctness and
+// performance.
+class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
+  : S_2OpInstImm<Str, MajOp, MinOp, u4Imm,
+      [(set (v4i16 DoubleRegs:$dst),
+            (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> {
+  bits<4> src2;
+  let Inst{11-8} = src2;
+}
+
+class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
+  : S_2OpInstImm<Str, MajOp, MinOp, u5Imm,
+      [(set (v2i32 DoubleRegs:$dst),
+            (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> {
+  bits<5> src2;
+  let Inst{12-8} = src2;
+}
+
+def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+          (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
+
+def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+          (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
+
+def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>;
+def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>;
+def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>;
+
+def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>;
+def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>;
+def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>;
+
+
+def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
+def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
+
+// Replicate the low 8-bits from 32-bits input register into each of the
+// four bytes of 32-bits destination register.
+def: Pat<(v4i8  (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
+
+// Replicate the low 16-bits from 32-bits input register into each of the
+// four halfwords of 64-bits destination register.
+def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
+
+
+class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
+  : Pat <(Op Type:$Rss, Type:$Rtt),
+         (MI Type:$Rss, Type:$Rtt)>;
+
+def: VArith_pat <A2_vaddub, add, V8I8>;
+def: VArith_pat <A2_vaddh,  add, V4I16>;
+def: VArith_pat <A2_vaddw,  add, V2I32>;
+def: VArith_pat <A2_vsubub, sub, V8I8>;
+def: VArith_pat <A2_vsubh,  sub, V4I16>;
+def: VArith_pat <A2_vsubw,  sub, V2I32>;
+
+def: VArith_pat <A2_and,    and, V2I16>;
+def: VArith_pat <A2_xor,    xor, V2I16>;
+def: VArith_pat <A2_or,     or,  V2I16>;
+
+def: VArith_pat <A2_andp,   and, V8I8>;
+def: VArith_pat <A2_andp,   and, V4I16>;
+def: VArith_pat <A2_andp,   and, V2I32>;
+def: VArith_pat <A2_orp,    or,  V8I8>;
+def: VArith_pat <A2_orp,    or,  V4I16>;
+def: VArith_pat <A2_orp,    or,  V2I32>;
+def: VArith_pat <A2_xorp,   xor, V8I8>;
+def: VArith_pat <A2_xorp,   xor, V4I16>;
+def: VArith_pat <A2_xorp,   xor, V2I32>;
+
+def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+                                                    (i32 u5ImmPred:$c))))),
+         (S2_asr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+                                                    (i32 u5ImmPred:$c))))),
+         (S2_lsr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+                                                    (i32 u5ImmPred:$c))))),
+         (S2_asl_i_vw V2I32:$b, imm:$c)>;
+
+def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+         (S2_asr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+         (S2_lsr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+         (S2_asl_i_vh V4I16:$b, imm:$c)>;
+
+
+def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
+def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
+
+def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
+
+def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)),
+         (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)),
+         (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)),
+         (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)),
+         (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)),
+         (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)),
+         (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
+
+// Vector shift words by register
+def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>;
+def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>;
+def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>;
+def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>;
+
+// Vector shift halfwords by register
+def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>;
+def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>;
+def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>;
+def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>;
+
+class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+  : Pat <(Op Value:$Rs, I32:$Rt),
+         (MI Value:$Rs, I32:$Rt)>;
+
+def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
+def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
+def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
+def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
+def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
+def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
+
+
+def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
+def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
+def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
+
+def HexagonVCMPBEQ:  SDNode<"HexagonISD::VCMPBEQ",  SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGT:  SDNode<"HexagonISD::VCMPBGT",  SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPHEQ:  SDNode<"HexagonISD::VCMPHEQ",  SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGT:  SDNode<"HexagonISD::VCMPHGT",  SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPWEQ:  SDNode<"HexagonISD::VCMPWEQ",  SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGT:  SDNode<"HexagonISD::VCMPWGT",  SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
+
+
+class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+  : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
+         (MI Value:$Rs, Value:$Rt)>;
+
+def: vcmp_i1_pat<A2_vcmpbeq,  HexagonVCMPBEQ,  V8I8>;
+def: vcmp_i1_pat<A4_vcmpbgt,  HexagonVCMPBGT,  V8I8>;
+def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
+
+def: vcmp_i1_pat<A2_vcmpheq,  HexagonVCMPHEQ,  V4I16>;
+def: vcmp_i1_pat<A2_vcmphgt,  HexagonVCMPHGT,  V4I16>;
+def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
+
+def: vcmp_i1_pat<A2_vcmpweq,  HexagonVCMPWEQ,  V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgt,  HexagonVCMPWGT,  V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
+
+
+class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
+  : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
+         (MI InVal:$Rs, InVal:$Rt)>;
+
+def: vcmp_vi1_pat<A2_vcmpweq,  seteq,  V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgt,  setgt,  V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
+
+def: vcmp_vi1_pat<A2_vcmpheq,  seteq,  V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgt,  setgt,  V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
+
+
+// Hexagon doesn't have a vector multiply with C semantics.
+// Instead, generate a pseudo instruction that gets expaneded into two
+// scalar MPYI instructions.
+// This is expanded by ExpandPostRAPseudos.
+let isPseudo = 1 in
+def VMULW : PseudoM<(outs DoubleRegs:$Rd),
+      (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+      ".error \"Should never try to emit VMULW\"",
+      [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>;
+
+let isPseudo = 1 in
+def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd),
+      (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt),
+      ".error \"Should never try to emit VMULW_ACC\"",
+      [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))],
+      "$Rd = $Rx">;
+
+// Adds two v4i8: Hexagon does not have an insn for this one, so we
+// use the double add v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+         (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+// Subtract two v4i8: Hexagon does not have an insn for this one, so we
+// use the double sub v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+         (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// No 32 bit vector mux.
+//
+def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
+         (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
+         (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// 64-bit vector mux.
+//
+def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
+         (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
+def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
+         (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
+def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
+         (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
+
+//
+// No 32 bit vector compare.
+//
+def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
+         (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
+         (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
+         (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
+         (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
+         (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
+         (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+
+class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
+                    ValueType CmpTy>
+  : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
+        (InvMI Value:$Rt, Value:$Rs)>;
+
+// Map from a compare operation to the corresponding instruction with the
+// order of operands reversed, e.g.  x > y --> cmp.lt(y,x).
+def: InvertCmp_pat<A4_vcmpbgt,  setlt,  V8I8,  i1>;
+def: InvertCmp_pat<A4_vcmpbgt,  setlt,  V8I8,  v8i1>;
+def: InvertCmp_pat<A2_vcmphgt,  setlt,  V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgt,  setlt,  V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgt,  setlt,  V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgt,  setlt,  V2I32, v2i1>;
+
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8,  i1>;
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8,  v8i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
+
+// Map from vcmpne(Rss) -> !vcmpew(Rss).
+// rs != rt -> !(rs == rt).
+def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
+         (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
+
+
+// Truncate: from vector B copy all 'E'ven 'B'yte elements:
+// A[0] = B[0];  A[1] = B[2];  A[2] = B[4];  A[3] = B[6];
+def: Pat<(v4i8 (trunc V4I16:$Rs)),
+         (S2_vtrunehb V4I16:$Rs)>;
+
+// Truncate: from vector B copy all 'O'dd 'B'yte elements:
+// A[0] = B[1];  A[1] = B[3];  A[2] = B[5];  A[3] = B[7];
+// S2_vtrunohb
+
+// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
+// A[0] = B[0];  A[1] = B[2];  A[2] = C[0];  A[3] = C[2];
+// S2_vtruneh
+
+def: Pat<(v2i16 (trunc V2I32:$Rs)),
+         (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
+
+
+def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
+def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
+
+def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
+def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
+
+def: Pat<(v4i16 (zext   V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (zext   V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (anyext V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (sext   V4I8:$Rs)),  (S2_vsxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (sext   V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
+
+// Sign extends a v2i8 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
+         (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
+
+// Sign extends a v2i16 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
+         (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
+
+
+// Multiplies two v2i16 and returns a v2i32.  We are using here the
+// saturating multiply, as hexagon does not provide a non saturating
+// vector multiply, and saturation does not impact the result that is
+// in double precision of the operands.
+
+// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
+// with the C semantics for this one, this pattern uses the half word
+// multiply vmpyh that takes two v2i16 and returns a v2i32.  This is
+// then truncated to fit this back into a v2i16 and to simulate the
+// wrap around semantics for unsigned in C.
+def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
+                      (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
+
+def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
+         (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
+                             (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
+
+// Multiplies two v4i16 vectors.
+def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
+         (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
+                      (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
+
+def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
+  (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
+               (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
+
+// Multiplies two v4i8 vectors.
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+         (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
+     Requires<[HasV5T]>;
+
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+         (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
+
+// Multiplies two v8i8 vectors.
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+         (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
+                      (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
+     Requires<[HasV5T]>;
+
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+         (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
+                      (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
+
+
+class shuffler<SDNode Op, string Str>
+  : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c),
+      "$a = " # Str # "($b, $c)",
+      [(set (i64 DoubleRegs:$a),
+            (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))],
+      "", S_3op_tc_1_SLOT23>;
+
+def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
+
+def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
+def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
+def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
+def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
+
+class ShufflePat<InstHexagon MI, SDNode Op>
+  : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
+        (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
+def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
+
+// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
+def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
+
+// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
+def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
+
+// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
+def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
+
+
+// Truncated store from v4i16 to v4i8.
+def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr),
+    [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
+
+// Truncated store from v2i32 to v2i16.
+def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
+                             (truncstore node:$val, node:$ptr),
+    [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
+
+def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
+         (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
+                                                      (LoReg $Rs))))>;
+
+def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
+         (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
+
+
+// Zero and sign extended load from v2i8 into v2i16.
+def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
+    [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
+    [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
+         (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
+         (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
+         (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
+
+def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
+         (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index 7d3f9d9..4275230 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -13,3495 +13,1273 @@
 // March 4, 2008
 //===----------------------------------------------------------------------===//
 
-//
-// ALU 32 types.
-//
+class T_I_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID imm:$Is),
+         (MI imm:$Is)>;
 
-class qi_ALU32_sisi<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class qi_ALU32_sis10<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class qi_ALU32_sis8<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class qi_ALU32_siu8<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class qi_ALU32_siu9<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_ALU32_qisisi<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
-                                      IntRegs:$src3),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
-                                        IntRegs:$src3))]>;
-
-class si_ALU32_qis8si<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2,
-                                       IntRegs:$src3),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, $src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
-                                        IntRegs:$src3))]>;
-
-class si_ALU32_qisis8<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
-                                       s8Imm:$src3),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
-                                        imm:$src3))]>;
-
-class si_ALU32_qis8s8<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, s8Imm:$src3),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
-
-class si_ALU32_sisi<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU32_sisi_sat<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU32_sisi_rnd<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU32_sis16<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_ALU32_sis10<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_ALU32_s10si<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
-             [(set IntRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
-
-class si_lo_ALU32_siu16<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
-             !strconcat("$dst.l = ", !strconcat(opc , "#$src2")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_hi_ALU32_siu16<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
-             !strconcat("$dst.h = ", !strconcat(opc , "#$src2")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_ALU32_s16<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins s16Imm:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "#$src1")),
-             [(set IntRegs:$dst, (IntID imm:$src1))]>;
-
-class di_ALU32_s8<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "#$src1")),
-             [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
-
-class di_ALU64_di<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
-             !strconcat("$dst = ", !strconcat(opc , "$src")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
-
-class si_ALU32_si<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
-             !strconcat("$dst = ", !strconcat(opc , "($src)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
-
-class si_ALU32_si_tfr<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
-             !strconcat("$dst = ", !strconcat(opc , "$src")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+class T_R_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I32:$Rs),
+         (MI I32:$Rs)>;
 
-//
-// ALU 64 types.
-//
+class T_P_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs),
+         (MI DoubleRegs:$Rs)>;
+
+class T_II_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
+  : Pat<(IntID Imm1:$Is, Imm2:$It),
+        (MI Imm1:$Is, Imm2:$It)>;
+
+class T_RI_pat <InstHexagon MI, Intrinsic IntID, PatLeaf ImmPred = PatLeaf<(i32 imm)>>
+  : Pat<(IntID I32:$Rs, ImmPred:$It),
+        (MI I32:$Rs, ImmPred:$It)>;
+
+class T_IR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred = PatLeaf<(i32 imm)>>
+  : Pat<(IntID ImmPred:$Is, I32:$Rt),
+        (MI ImmPred:$Is, I32:$Rt)>;
+
+class T_PI_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID I64:$Rs, imm:$It),
+        (MI DoubleRegs:$Rs, imm:$It)>;
+
+class T_RP_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID I32:$Rs, I64:$Rt),
+        (MI I32:$Rs, DoubleRegs:$Rt)>;
+
+class T_RR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I32:$Rs, I32:$Rt),
+         (MI I32:$Rs, I32:$Rt)>;
+
+class T_PP_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I64:$Rt),
+         (MI DoubleRegs:$Rs, DoubleRegs:$Rt)>;
 
-class si_ALU64_si_sat<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
-             !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
-
-class si_ALU64_didi<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class di_ALU64_sidi<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
-
-class di_ALU64_didi<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>;
-
-class di_ALU64_qididi<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2,
-                                          DoubleRegs:$src3),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2,
-                                           DoubleRegs:$src3))]>;
-
-class di_ALU64_sisi<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_ALU64_didi_sat<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>;
-
-class di_ALU64_didi_rnd<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>;
-
-class di_ALU64_didi_crnd<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>;
-
-class di_ALU64_didi_rnd_sat<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>;
-
-class di_ALU64_didi_crnd_sat<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd:sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>;
-
-class qi_ALU64_didi<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class si_ALU64_sisi<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_sat_lh<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_sat_hh<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_sat_lh<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_sat_hl<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_sat_ll<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_hh<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_hl<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_lh<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_l16_ll<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_sat_hh<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1.H, $src2.H):sat:<<16")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_sat_lh<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1.L, $src2.H):sat:<<16")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_sat_hl<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1.H, $src2.L):sat:<<16")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_sat_ll<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1.L, $src2.L):sat:<<16")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_hh<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<16")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_hl<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<16")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_lh<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<16")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_h16_ll<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<16")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_lh<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_ll<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_ALU64_sisi_sat<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+class T_QII_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
+  : Pat <(IntID (i32 PredRegs:$Ps), Imm1:$Is, Imm2:$It),
+         (MI PredRegs:$Ps, Imm1:$Is, Imm2:$It)>;
 
-//
-// SInst classes.
-//
+class T_QRI_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
+  : Pat <(IntID (i32 PredRegs:$Ps), I32:$Rs, ImmPred:$Is),
+         (MI PredRegs:$Ps, I32:$Rs, ImmPred:$Is)>;
 
-class qi_SInst_qi<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
-             !strconcat("$dst = ", !strconcat(opc , "($src)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
-
-class qi_SInst_qi_pxfer<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
-             !strconcat("$dst = ", !strconcat(opc , "$src")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
-
-class qi_SInst_qiqi<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class qi_SInst_qiqi_neg<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, !$src2)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_SInst_di<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
-             !strconcat("$dst = ", !strconcat(opc , "($src)")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
-
-class di_SInst_di_sat<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
-             !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
-
-class si_SInst_di<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
-          !strconcat("$dst = ", !strconcat(opc , "($src)")),
-          [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
-
-class si_SInst_di_sat<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
-          !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
-          [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
-
-class di_SInst_disi<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
-
-class di_SInst_didi<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class di_SInst_si<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
-          !strconcat("$dst = ", !strconcat(opc , "($src1)")),
-          [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class si_SInst_sisiu3<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
-          [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
-                                     imm:$src3))]>;
-
-class si_SInst_diu5<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-          [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-class si_SInst_disi<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-          [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
-
-class si_SInst_sidi<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-          [(set IntRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
-
-class di_SInst_disisi<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2,
-                                       IntRegs:$src3),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2,
-                                        IntRegs:$src3))]>;
-
-class di_SInst_sisi<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-          [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class qi_SInst_siu5<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-          [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class qi_SInst_siu6<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-          [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class qi_SInst_sisi<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-          [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_SInst_si<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
-          !strconcat("$dst = ", !strconcat(opc , "($src)")),
-          [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
-
-class si_SInst_si_sat<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
-          !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
-          [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
-
-class di_SInst_qi<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src),
-          !strconcat("$dst = ", !strconcat(opc , "($src)")),
-          [(set DoubleRegs:$dst, (IntID IntRegs:$src))]>;
-
-class si_SInst_qi<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
-          !strconcat("$dst = ", !strconcat(opc , "$src")),
-          [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
-
-class si_SInst_qiqi<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-          [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class qi_SInst_si<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
-          !strconcat("$dst = ", !strconcat(opc , "$src")),
-          [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
-
-class si_SInst_sisi<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-          [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_SInst_diu6<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-class si_SInst_siu5<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-          [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_SInst_siu5_rnd<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")),
-          [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_SInst_siu5u5<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2, u5Imm:$src3),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
-          [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
-
-class si_SInst_sisisi_acc<string opc, Intrinsic IntID>
-  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-              !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
-              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                         IntRegs:$src2))],
-              "$dst2 = $dst">;
-
-class si_SInst_sisisi_nac<string opc, Intrinsic IntID>
-  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-              !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
-              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                         IntRegs:$src2))],
-              "$dst2 = $dst">;
-
-class di_SInst_didisi_acc<string opc, Intrinsic IntID>
-  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
-                                             DoubleRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_SInst_didisi_nac<string opc, Intrinsic IntID>
-  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           IntRegs:$src2),
-          !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
-                                        DoubleRegs:$src1, IntRegs:$src2))],
-          "$dst2 = $dst">;
-
-class si_SInst_sisiu5u5<string opc, Intrinsic IntID>
-  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        u5Imm:$src2, u5Imm:$src3),
-              !strconcat("$dst = ", !strconcat(opc ,
-                                               "($src1, #$src2, #$src3)")),
-              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                         imm:$src2, imm:$src3))],
-              "$dst2 = $dst">;
-
-class si_SInst_sisidi<string opc, Intrinsic IntID>
-  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        DoubleRegs:$src2),
-              !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                         DoubleRegs:$src2))],
-              "$dst2 = $dst">;
-
-class di_SInst_didiu6u6<string opc, Intrinsic IntID>
-  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           u6Imm:$src2, u6Imm:$src3),
-              !strconcat("$dst = ", !strconcat(opc ,
-                                               "($src1, #$src2, #$src3)")),
-              [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
-                                            imm:$src2, imm:$src3))],
-              "$dst2 = $dst">;
-
-class di_SInst_dididi<string opc, Intrinsic IntID>
-  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           DoubleRegs:$src2),
-              !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-              [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
-                                            DoubleRegs:$src1,
-                                            DoubleRegs:$src2))],
-              "$dst2 = $dst">;
-
-class di_SInst_diu6u6<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2,
-                                       u6Imm:$src3),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2,
-                                        imm:$src3))]>;
-
-class di_SInst_didiqi<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
-                                       IntRegs:$src3),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
-                                        IntRegs:$src3))]>;
-
-class di_SInst_didiu3<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
-                                       u3Imm:$src3),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
-                                        imm:$src3))]>;
-
-class di_SInst_didisi_or<string opc, Intrinsic IntID>
-  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           IntRegs:$src2),
-          !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
-                                        IntRegs:$src2))],
-          "$dst2 = $dst">;
-
-class di_SInst_didisi_and<string opc, Intrinsic IntID>
-  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           IntRegs:$src2),
-          !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
-                                        IntRegs:$src2))],
-          "$dst2 = $dst">;
-
-class di_SInst_didiu6_and<string opc, Intrinsic IntID>
-  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           u6Imm:$src2),
-          !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
-                                        imm:$src2))],
-          "$dst2 = $dst">;
-
-class di_SInst_didiu6_or<string opc, Intrinsic IntID>
-  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           u6Imm:$src2),
-          !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
-                                        imm:$src2))],
-          "$dst2 = $dst">;
-
-class di_SInst_didiu6_xor<string opc, Intrinsic IntID>
-  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           u6Imm:$src2),
-          !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
-                                        imm:$src2))],
-          "$dst2 = $dst">;
-
-class si_SInst_sisisi_and<string opc, Intrinsic IntID>
-  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-              !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
-              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                         IntRegs:$src2))],
-              "$dst2 = $dst">;
-
-class si_SInst_sisisi_or<string opc, Intrinsic IntID>
-  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-              !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
-              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                         IntRegs:$src2))],
-              "$dst2 = $dst">;
-
-
-class si_SInst_sisiu5_and<string opc, Intrinsic IntID>
-  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        u5Imm:$src2),
-              !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
-              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                         imm:$src2))],
-              "$dst2 = $dst">;
-
-class si_SInst_sisiu5_or<string opc, Intrinsic IntID>
-  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        u5Imm:$src2),
-              !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
-              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                         imm:$src2))],
-              "$dst2 = $dst">;
-
-class si_SInst_sisiu5_xor<string opc, Intrinsic IntID>
-  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        u5Imm:$src2),
-              !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
-              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                         imm:$src2))],
-              "$dst2 = $dst">;
-
-class si_SInst_sisiu5_acc<string opc, Intrinsic IntID>
-  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        u5Imm:$src2),
-              !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
-              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                         imm:$src2))],
-              "$dst2 = $dst">;
-
-class si_SInst_sisiu5_nac<string opc, Intrinsic IntID>
-  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        u5Imm:$src2),
-              !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
-              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                         imm:$src2))],
-              "$dst2 = $dst">;
-
-class di_SInst_didiu6_acc<string opc, Intrinsic IntID>
-  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           u5Imm:$src2),
-              !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
-              [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
-                                            DoubleRegs:$src1, imm:$src2))],
-              "$dst2 = $dst">;
-
-class di_SInst_didiu6_nac<string opc, Intrinsic IntID>
-  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           u5Imm:$src2),
-              !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
-              [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
-                                            imm:$src2))],
-              "$dst2 = $dst">;
+class T_QIR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
+  : Pat <(IntID (i32 PredRegs:$Ps), ImmPred:$Is, I32:$Rs),
+         (MI PredRegs:$Ps, ImmPred:$Is, I32:$Rs)>;
 
+class T_RRI_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I32:$Rs, I32:$Rt, imm:$Iu),
+         (MI I32:$Rs, I32:$Rt, imm:$Iu)>;
 
-//
-// MInst classes.
-//
+class T_RII_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I32:$Rs, imm:$It, imm:$Iu),
+         (MI I32:$Rs, imm:$It, imm:$Iu)>;
 
-class di_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.H, $src2.H):<<1:rnd")),
-               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.H, $src2.H):rnd")),
-               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.H, $src2.L):<<1:rnd")),
-               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.H, $src2.L):rnd")),
-               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.L, $src2.H):<<1:rnd")),
-               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.L, $src2.H):rnd")),
-               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.L, $src2.L):<<1:rnd")),
-               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.L, $src2.L):rnd")),
-               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_disisi_acc<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-             !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2))],
-             "$dst2 = $dst">;
-
-class di_MInst_disisi_nac<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-             !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2))],
-             "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-             !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2))],
-             "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-             !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2):sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2))],
-             "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_sat_conj<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-             !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*):sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2))],
-             "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_sat_conj<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-             !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2*):sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2))],
-             "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_s1_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-             !strconcat("$dst -= ", !strconcat(opc ,
-                                               "($src1, $src2):<<1:sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2))],
-             "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_s1_sat_conj<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-             !strconcat("$dst += ", !strconcat(opc ,
-                                               "($src1, $src2*):<<1:sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2))],
-             "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_s1_sat_conj<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-             !strconcat("$dst -= ", !strconcat(opc ,
-                                               "($src1, $src2*):<<1:sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2))],
-             "$dst2 = $dst">;
-
-class di_MInst_s8s8<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins s8Imm:$src1, s8Imm:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")),
-             [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>;
-
-class si_MInst_sis9<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_MInst_sisi<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_hh<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_lh<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_hl<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_ll<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-
-class si_MInst_sisi_hh<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_lh<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_hl<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_ll<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_up<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_didi<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>;
-
-class di_MInst_didi_conj<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*)")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>;
-
-class di_MInst_sisi_s1_sat_conj<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, $src2*):<<1:sat")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, $src2):<<1:rnd:sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>;
-
-class di_MInst_didi_sat<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>;
-
-class di_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, $src2):rnd:sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>;
-
-class si_SInst_sisi_sat<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
-          [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_SInst_didi_sat<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
-          [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class si_SInst_disi_s1_rnd_sat<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, $src2):<<1:rnd:sat")),
-             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, $src2):<<1:rnd:sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_l_s1_rnd_sat<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, $src2.L):<<1:rnd:sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_h_s1_rnd_sat<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, $src2.H):<<1:rnd:sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_sat_conj<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, $src2*):rnd:sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_s1_rnd_sat_conj<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, $src2*):<<1:rnd:sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_sat<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, $src2):rnd:sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisisi_xacc<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
-                                        IntRegs:$src3),
-             !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
-                                        IntRegs:$src3))],
-             "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
-                                        IntRegs:$src3),
-             !strconcat("$dst += ", !strconcat(opc , "($src2, $src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
-                                        IntRegs:$src3))],
-             "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
-                                        IntRegs:$src3),
-             !strconcat("$dst -= ", !strconcat(opc , "($src2, $src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
-                                        IntRegs:$src3))],
-             "$dst2 = $dst">;
-
-class si_MInst_sisis8_acc<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
-                                        s8Imm:$src3),
-             !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
-                                        imm:$src3))],
-             "$dst2 = $dst">;
-
-class si_MInst_sisis8_nac<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
-                                        s8Imm:$src3),
-             !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
-                                        imm:$src3))],
-             "$dst2 = $dst">;
-
-class si_MInst_sisiu4u5<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        u4Imm:$src2, u5Imm:$src3),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1, #$src2, #$src3)")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          imm:$src2, imm:$src3))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisiu8_acc<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
-                                        u8Imm:$src3),
-               !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
-                                          imm:$src3))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisiu8_nac<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
-                                        u8Imm:$src3),
-               !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
-                                          imm:$src3))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_hh<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_lh<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1.L, $src2.H):sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_lh_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1.L, $src2.H):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_hh<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1.H, $src2.H):sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_hh_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1.H, $src2.H):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_hh_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1.H, $src2.H):<<1")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hh<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_hh_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.H, $src2.H):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_hh<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.H, $src2.H):sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_hl_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.H, $src2.L):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_hl<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.H, $src2.L):sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_lh_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.L, $src2.H):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_lh<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.L, $src2.H):sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_ll_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.L, $src2.L):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_sat_ll<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.L, $src2.L):sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hh_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.H, $src2.H):<<1")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_hl<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_hl_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1.H, $src2.L):<<1")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hl<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hl_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.H, $src2.L):<<1")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_lh<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_lh_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1.L, $src2.H):<<1")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_lh<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_lh_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.L, $src2.H):<<1")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_ll<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_ll_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1.L, $src2.L):<<1")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_ll_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1.L, $src2.L):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_hl_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1.H, $src2.L):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_ll<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1.L, $src2.L):sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_acc_sat_hl<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1.H, $src2.L):sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_ll<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_ll_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.L, $src2.L):<<1")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hh_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.H, $src2.H):sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hh_s1_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.H, $src2.H):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hl_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.H, $src2.L):sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_hl_s1_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.H, $src2.L):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_lh_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.L, $src2.H):sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_lh_s1_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.L, $src2.H):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_ll_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.L, $src2.L):sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_nac_ll_s1_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1.L, $src2.L):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                          IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_ALU32_sisi<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_sat<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_sat_conj<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*):sat")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_sisi_s1_sat<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_didi_s1_sat<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                           DoubleRegs:$src2))]>;
-
-class si_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, $src2):<<1:rnd:sat")),
-             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class si_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
-             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class si_MInst_sisi_sat_hh<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_hh_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.H, $src2.H):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_hl<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_hl_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.H, $src2.L):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_lh<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_lh_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.L, $src2.H):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_ll<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_ll_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.L, $src2.L):<<1:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_hh<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.H, $src2.H):rnd:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.H, $src2.H):rnd")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ", !strconcat(opc ,
-                                                "($src1.H, $src2.H):<<1:rnd")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_hh_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ",
-                          !strconcat(opc ,
-                                     "($src1.H, $src2.H):<<1:rnd:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ",
-                          !strconcat(opc , "($src1.H, $src2.L):rnd")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ",
-                          !strconcat(opc , "($src1.H, $src2.L):<<1:rnd")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_hl<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ",
-                          !strconcat(opc , "($src1.H, $src2.L):rnd:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_hl_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ",
-                          !strconcat(opc , "($src1.H, $src2.L):<<1:rnd:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ",
-                          !strconcat(opc , "($src1.L, $src2.H):rnd")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_lh<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ",
-                          !strconcat(opc , "($src1.L, $src2.H):rnd:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_lh_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ",
-                          !strconcat(opc , "($src1.L, $src2.H):<<1:rnd:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ",
-                          !strconcat(opc , "($src1.L, $src2.H):<<1:rnd")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_ll<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ",
-                          !strconcat(opc , "($src1.L, $src2.L):rnd:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_sat_rnd_ll_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ",
-                          !strconcat(opc , "($src1.L, $src2.L):<<1:rnd:sat")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ",
-                          !strconcat(opc , "($src1.L, $src2.L):rnd")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-               !strconcat("$dst = ",
-                          !strconcat(opc , "($src1.L, $src2.L):<<1:rnd")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_dididi_acc_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
-                                           DoubleRegs:$src1, DoubleRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
-                                             DoubleRegs:$src1,
-                                             DoubleRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           DoubleRegs:$src2),
-               !strconcat("$dst += ",
-                          !strconcat(opc , "($src1, $src2):rnd:sat")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
-                                             DoubleRegs:$src1,
-                                             DoubleRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_dididi_acc_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
-                                           DoubleRegs:$src1,
-                                           DoubleRegs:$src2),
-               !strconcat("$dst += ",
-                          !strconcat(opc , "($src1, $src2):<<1")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
-                                             DoubleRegs:$src1,
-                                             DoubleRegs:$src2))],
-               "$dst2 = $dst">;
-
-
-class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
-                                           DoubleRegs:$src1,
-                                           DoubleRegs:$src2),
-               !strconcat("$dst += ",
-                          !strconcat(opc , "($src1, $src2):<<1:sat")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
-                                             DoubleRegs:$src1,
-                                             DoubleRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_dididi_acc_s1_rnd_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           DoubleRegs:$src2),
-               !strconcat("$dst += ",
-                          !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
-                                             DoubleRegs:$src1,
-                                             DoubleRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_dididi_acc<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           DoubleRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
-                                             DoubleRegs:$src1,
-                                             DoubleRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_dididi_acc_conj<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           DoubleRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
-                                             DoubleRegs:$src1,
-                                             DoubleRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_hh<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_hl<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_lh<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_ll<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_hh_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst += ",
-                          !strconcat(opc , "($src1.H, $src2.H):<<1")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_hl_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst += ",
-                          !strconcat(opc , "($src1.H, $src2.L):<<1")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_lh_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst += ",
-                          !strconcat(opc , "($src1.L, $src2.H):<<1")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_ll_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst += ",
-                          !strconcat(opc , "($src1.L, $src2.L):<<1")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_hh<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_hl<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_lh<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_ll<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_hh_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst -= ",
-                          !strconcat(opc , "($src1.H, $src2.H):<<1")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_hl_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst -= ",
-                          !strconcat(opc , "($src1.H, $src2.L):<<1")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_lh_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst -= ",
-                          !strconcat(opc , "($src1.L, $src2.H):<<1")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_nac_ll_s1<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst -= ",
-                          !strconcat(opc , "($src1.L, $src2.L):<<1")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disisi_acc_s1_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst += ",
-                          !strconcat(opc , "($src1, $src2):<<1:sat")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class di_MInst_disi_s1_sat<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
-
-class di_MInst_didisi_acc_s1_sat<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           IntRegs:$src2),
-               !strconcat("$dst += ",
-                          !strconcat(opc , "($src1, $src2):<<1:sat")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
-                                             DoubleRegs:$src1,
-                                             IntRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_disi_s1_rnd_sat<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ",
-                        !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
-             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
-
-class si_MInst_didi<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-
-class T_RI_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat<(IntID (i32 IntRegs:$Rs), imm:$It),
-        (MI IntRegs:$Rs, imm:$It)>;
+class T_IRI_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID imm:$It, I32:$Rs, imm:$Iu),
+         (MI imm:$It, I32:$Rs, imm:$Iu)>;
 
-//
-// LDInst classes.
-//
-let mayLoad = 1, hasSideEffects = 0 in
-class di_LDInstPI_diu4<string opc, Intrinsic IntID>
-  : LDInstPI<(outs IntRegs:$dst, DoubleRegs:$dst2),
-           (ins IntRegs:$src1, IntRegs:$src2, CtrRegs:$src3, s4Imm:$offset),
-           "$dst2 = memd($src1++#$offset:circ($src3))",
-           [],
-           "$src1 = $dst">;
+class T_IRR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID imm:$Is, I32:$Rs, I32:$Rt),
+         (MI imm:$Is, I32:$Rs, I32:$Rt)>;
 
-/********************************************************************
-*            ALU32/ALU                                              *
-*********************************************************************/
+class T_RIR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I32:$Rs, imm:$Is, I32:$Rt),
+         (MI I32:$Rs, imm:$Is, I32:$Rt)>;
 
-// ALU32 / ALU / Add.
-def HEXAGON_A2_add:
-  si_ALU32_sisi                   <"add",      int_hexagon_A2_add>;
-def HEXAGON_A2_addi:
-  si_ALU32_sis16                  <"add",      int_hexagon_A2_addi>;
-
-// ALU32 / ALU / Logical operations.
-def HEXAGON_A2_and:
-  si_ALU32_sisi                   <"and",      int_hexagon_A2_and>;
-def HEXAGON_A2_andir:
-  si_ALU32_sis10                  <"and",      int_hexagon_A2_andir>;
-def HEXAGON_A2_not:
-  si_ALU32_si                     <"not",      int_hexagon_A2_not>;
-def HEXAGON_A2_or:
-  si_ALU32_sisi                   <"or",       int_hexagon_A2_or>;
-def HEXAGON_A2_orir:
-  si_ALU32_sis10                  <"or",       int_hexagon_A2_orir>;
-def HEXAGON_A2_xor:
-  si_ALU32_sisi                   <"xor",      int_hexagon_A2_xor>;
-
-// ALU32 / ALU / Negate.
-def HEXAGON_A2_neg:
-  si_ALU32_si                     <"neg",      int_hexagon_A2_neg>;
-
-// ALU32 / ALU / Subtract.
-def HEXAGON_A2_sub:
-  si_ALU32_sisi                   <"sub",      int_hexagon_A2_sub>;
-def HEXAGON_A2_subri:
-  si_ALU32_s10si                  <"sub",      int_hexagon_A2_subri>;
-
-// ALU32 / ALU / Transfer Immediate.
-def HEXAGON_A2_tfril:
-  si_lo_ALU32_siu16               <"",         int_hexagon_A2_tfril>;
-def HEXAGON_A2_tfrih:
-  si_hi_ALU32_siu16               <"",         int_hexagon_A2_tfrih>;
-def HEXAGON_A2_tfrsi:
-  si_ALU32_s16                    <"",         int_hexagon_A2_tfrsi>;
-def HEXAGON_A2_tfrpi:
-  di_ALU32_s8                     <"",         int_hexagon_A2_tfrpi>;
-
-// ALU32 / ALU / Transfer Register.
-def HEXAGON_A2_tfr:
-  si_ALU32_si_tfr                  <"",        int_hexagon_A2_tfr>;
+class T_RRR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I32:$Rs, I32:$Rt, I32:$Ru),
+         (MI I32:$Rs, I32:$Rt, I32:$Ru)>;
 
-/********************************************************************
-*            ALU32/PERM                                             *
-*********************************************************************/
+class T_PPI_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I64:$Rt, imm:$Iu),
+         (MI DoubleRegs:$Rs, DoubleRegs:$Rt, imm:$Iu)>;
 
-// ALU32 / PERM / Combine.
-def HEXAGON_A2_combinew:
-  di_ALU32_sisi                   <"combine",  int_hexagon_A2_combinew>;
-def HEXAGON_A2_combine_hh:
-  si_MInst_sisi_hh                <"combine",  int_hexagon_A2_combine_hh>;
-def HEXAGON_A2_combine_lh:
-  si_MInst_sisi_lh                <"combine",  int_hexagon_A2_combine_lh>;
-def HEXAGON_A2_combine_hl:
-  si_MInst_sisi_hl                <"combine",  int_hexagon_A2_combine_hl>;
-def HEXAGON_A2_combine_ll:
-  si_MInst_sisi_ll                <"combine",  int_hexagon_A2_combine_ll>;
-def HEXAGON_A2_combineii:
-  di_MInst_s8s8                   <"combine",  int_hexagon_A2_combineii>;
-
-// ALU32 / PERM / Mux.
-def HEXAGON_C2_mux:
-  si_ALU32_qisisi                 <"mux",      int_hexagon_C2_mux>;
-def HEXAGON_C2_muxri:
-  si_ALU32_qis8si                 <"mux",      int_hexagon_C2_muxri>;
-def HEXAGON_C2_muxir:
-  si_ALU32_qisis8                 <"mux",      int_hexagon_C2_muxir>;
-def HEXAGON_C2_muxii:
-  si_ALU32_qis8s8                 <"mux",      int_hexagon_C2_muxii>;
-
-// ALU32 / PERM / Shift halfword.
-def HEXAGON_A2_aslh:
-  si_ALU32_si                     <"aslh",     int_hexagon_A2_aslh>;
-def HEXAGON_A2_asrh:
-  si_ALU32_si                     <"asrh",     int_hexagon_A2_asrh>;
-def SI_to_SXTHI_asrh:
-  si_ALU32_si                     <"asrh",     int_hexagon_SI_to_SXTHI_asrh>;
-
-// ALU32 / PERM / Sign/zero extend.
-def HEXAGON_A2_sxth:
-  si_ALU32_si                     <"sxth",     int_hexagon_A2_sxth>;
-def HEXAGON_A2_sxtb:
-  si_ALU32_si                     <"sxtb",     int_hexagon_A2_sxtb>;
-def HEXAGON_A2_zxth:
-  si_ALU32_si                     <"zxth",     int_hexagon_A2_zxth>;
-def HEXAGON_A2_zxtb:
-  si_ALU32_si                     <"zxtb",     int_hexagon_A2_zxtb>;
+class T_PII_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, imm:$It, imm:$Iu),
+         (MI DoubleRegs:$Rs, imm:$It, imm:$Iu)>;
 
-/********************************************************************
-*            ALU32/PRED                                             *
-*********************************************************************/
+class T_PPP_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I64:$Rt, I64:$Ru),
+         (MI DoubleRegs:$Rs, DoubleRegs:$Rt, DoubleRegs:$Ru)>;
 
-// ALU32 / PRED / Compare.
-def HEXAGON_C2_cmpeq:
-  qi_ALU32_sisi                   <"cmp.eq",   int_hexagon_C2_cmpeq>;
-def HEXAGON_C2_cmpeqi:
-  qi_ALU32_sis10                  <"cmp.eq",   int_hexagon_C2_cmpeqi>;
-def HEXAGON_C2_cmpgei:
-  qi_ALU32_sis8                   <"cmp.ge",   int_hexagon_C2_cmpgei>;
-def HEXAGON_C2_cmpgeui:
-  qi_ALU32_siu8                   <"cmp.geu",  int_hexagon_C2_cmpgeui>;
-def HEXAGON_C2_cmpgt:
-  qi_ALU32_sisi                   <"cmp.gt",   int_hexagon_C2_cmpgt>;
-def HEXAGON_C2_cmpgti:
-  qi_ALU32_sis10                  <"cmp.gt",   int_hexagon_C2_cmpgti>;
-def HEXAGON_C2_cmpgtu:
-  qi_ALU32_sisi                   <"cmp.gtu",  int_hexagon_C2_cmpgtu>;
-def HEXAGON_C2_cmpgtui:
-  qi_ALU32_siu9                   <"cmp.gtu",  int_hexagon_C2_cmpgtui>;
-def HEXAGON_C2_cmplt:
-  qi_ALU32_sisi                   <"cmp.lt",   int_hexagon_C2_cmplt>;
-def HEXAGON_C2_cmpltu:
-  qi_ALU32_sisi                   <"cmp.ltu",  int_hexagon_C2_cmpltu>;
+class T_PPR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Ru),
+         (MI DoubleRegs:$Rs, DoubleRegs:$Rt, I32:$Ru)>;
 
-/********************************************************************
-*            ALU32/VH                                               *
-*********************************************************************/
+class T_PRR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I32:$Rt, I32:$Ru),
+         (MI DoubleRegs:$Rs, I32:$Rt, I32:$Ru)>;
 
-// ALU32 / VH / Vector add halfwords.
-// Rd32=vadd[u]h(Rs32,Rt32:sat]
-def HEXAGON_A2_svaddh:
-  si_ALU32_sisi                   <"vaddh",    int_hexagon_A2_svaddh>;
-def HEXAGON_A2_svaddhs:
-  si_ALU32_sisi_sat               <"vaddh",    int_hexagon_A2_svaddhs>;
-def HEXAGON_A2_svadduhs:
-  si_ALU32_sisi_sat               <"vadduh",   int_hexagon_A2_svadduhs>;
-
-// ALU32 / VH / Vector average halfwords.
-def HEXAGON_A2_svavgh:
-  si_ALU32_sisi                   <"vavgh",    int_hexagon_A2_svavgh>;
-def HEXAGON_A2_svavghs:
-  si_ALU32_sisi_rnd               <"vavgh",    int_hexagon_A2_svavghs>;
-def HEXAGON_A2_svnavgh:
-  si_ALU32_sisi                   <"vnavgh",   int_hexagon_A2_svnavgh>;
-
-// ALU32 / VH / Vector subtract halfwords.
-def HEXAGON_A2_svsubh:
-  si_ALU32_sisi                   <"vsubh",    int_hexagon_A2_svsubh>;
-def HEXAGON_A2_svsubhs:
-  si_ALU32_sisi_sat               <"vsubh",    int_hexagon_A2_svsubhs>;
-def HEXAGON_A2_svsubuhs:
-  si_ALU32_sisi_sat               <"vsubuh",   int_hexagon_A2_svsubuhs>;
+class T_PPQ_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I64:$Rt, (i32 PredRegs:$Ru)),
+         (MI DoubleRegs:$Rs, DoubleRegs:$Rt, PredRegs:$Ru)>;
 
-/********************************************************************
-*            ALU64/ALU                                              *
-*********************************************************************/
+class T_PR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I32:$Rt),
+         (MI DoubleRegs:$Rs, I32:$Rt)>;
 
-// ALU64 / ALU / Add.
-def HEXAGON_A2_addp:
-  di_ALU64_didi                   <"add",      int_hexagon_A2_addp>;
-def HEXAGON_A2_addsat:
-  si_ALU64_sisi_sat               <"add",      int_hexagon_A2_addsat>;
-
-// ALU64 / ALU / Add halfword.
-// Even though the definition says hl, it should be lh -
-//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
-def HEXAGON_A2_addh_l16_hl:
-  si_ALU64_sisi_l16_lh            <"add",      int_hexagon_A2_addh_l16_hl>;
-def HEXAGON_A2_addh_l16_ll:
-  si_ALU64_sisi_l16_ll            <"add",      int_hexagon_A2_addh_l16_ll>;
-
-def HEXAGON_A2_addh_l16_sat_hl:
-  si_ALU64_sisi_l16_sat_lh        <"add",      int_hexagon_A2_addh_l16_sat_hl>;
-def HEXAGON_A2_addh_l16_sat_ll:
-  si_ALU64_sisi_l16_sat_ll        <"add",      int_hexagon_A2_addh_l16_sat_ll>;
-
-def HEXAGON_A2_addh_h16_hh:
-  si_ALU64_sisi_h16_hh            <"add",      int_hexagon_A2_addh_h16_hh>;
-def HEXAGON_A2_addh_h16_hl:
-  si_ALU64_sisi_h16_hl            <"add",      int_hexagon_A2_addh_h16_hl>;
-def HEXAGON_A2_addh_h16_lh:
-  si_ALU64_sisi_h16_lh            <"add",      int_hexagon_A2_addh_h16_lh>;
-def HEXAGON_A2_addh_h16_ll:
-  si_ALU64_sisi_h16_ll            <"add",      int_hexagon_A2_addh_h16_ll>;
-
-def HEXAGON_A2_addh_h16_sat_hh:
-  si_ALU64_sisi_h16_sat_hh        <"add",      int_hexagon_A2_addh_h16_sat_hh>;
-def HEXAGON_A2_addh_h16_sat_hl:
-  si_ALU64_sisi_h16_sat_hl        <"add",      int_hexagon_A2_addh_h16_sat_hl>;
-def HEXAGON_A2_addh_h16_sat_lh:
-  si_ALU64_sisi_h16_sat_lh        <"add",      int_hexagon_A2_addh_h16_sat_lh>;
-def HEXAGON_A2_addh_h16_sat_ll:
-  si_ALU64_sisi_h16_sat_ll        <"add",      int_hexagon_A2_addh_h16_sat_ll>;
-
-// ALU64 / ALU / Compare.
-def HEXAGON_C2_cmpeqp:
-  qi_ALU64_didi                   <"cmp.eq",   int_hexagon_C2_cmpeqp>;
-def HEXAGON_C2_cmpgtp:
-  qi_ALU64_didi                   <"cmp.gt",   int_hexagon_C2_cmpgtp>;
-def HEXAGON_C2_cmpgtup:
-  qi_ALU64_didi                   <"cmp.gtu",  int_hexagon_C2_cmpgtup>;
-
-// ALU64 / ALU / Logical operations.
-def HEXAGON_A2_andp:
-  di_ALU64_didi                   <"and",      int_hexagon_A2_andp>;
-def HEXAGON_A2_orp:
-  di_ALU64_didi                   <"or",       int_hexagon_A2_orp>;
-def HEXAGON_A2_xorp:
-  di_ALU64_didi                   <"xor",      int_hexagon_A2_xorp>;
-
-// ALU64 / ALU / Maximum.
-def HEXAGON_A2_max:
-  si_ALU64_sisi                   <"max",      int_hexagon_A2_max>;
-def HEXAGON_A2_maxu:
-  si_ALU64_sisi                   <"maxu",     int_hexagon_A2_maxu>;
-
-// ALU64 / ALU / Minimum.
-def HEXAGON_A2_min:
-  si_ALU64_sisi                   <"min",      int_hexagon_A2_min>;
-def HEXAGON_A2_minu:
-  si_ALU64_sisi                   <"minu",     int_hexagon_A2_minu>;
-
-// ALU64 / ALU / Subtract.
-def HEXAGON_A2_subp:
-  di_ALU64_didi                   <"sub",      int_hexagon_A2_subp>;
-def HEXAGON_A2_subsat:
-  si_ALU64_sisi_sat               <"sub",      int_hexagon_A2_subsat>;
-
-// ALU64 / ALU / Subtract halfword.
-// Even though the definition says hl, it should be lh -
-//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
-def HEXAGON_A2_subh_l16_hl:
-  si_ALU64_sisi_l16_lh            <"sub",      int_hexagon_A2_subh_l16_hl>;
-def HEXAGON_A2_subh_l16_ll:
-  si_ALU64_sisi_l16_ll            <"sub",      int_hexagon_A2_subh_l16_ll>;
-
-def HEXAGON_A2_subh_l16_sat_hl:
-  si_ALU64_sisi_l16_sat_lh        <"sub",      int_hexagon_A2_subh_l16_sat_hl>;
-def HEXAGON_A2_subh_l16_sat_ll:
-  si_ALU64_sisi_l16_sat_ll        <"sub",      int_hexagon_A2_subh_l16_sat_ll>;
-
-def HEXAGON_A2_subh_h16_hh:
-  si_ALU64_sisi_h16_hh            <"sub",      int_hexagon_A2_subh_h16_hh>;
-def HEXAGON_A2_subh_h16_hl:
-  si_ALU64_sisi_h16_hl            <"sub",      int_hexagon_A2_subh_h16_hl>;
-def HEXAGON_A2_subh_h16_lh:
-  si_ALU64_sisi_h16_lh            <"sub",      int_hexagon_A2_subh_h16_lh>;
-def HEXAGON_A2_subh_h16_ll:
-  si_ALU64_sisi_h16_ll            <"sub",      int_hexagon_A2_subh_h16_ll>;
-
-def HEXAGON_A2_subh_h16_sat_hh:
-  si_ALU64_sisi_h16_sat_hh        <"sub",      int_hexagon_A2_subh_h16_sat_hh>;
-def HEXAGON_A2_subh_h16_sat_hl:
-  si_ALU64_sisi_h16_sat_hl        <"sub",      int_hexagon_A2_subh_h16_sat_hl>;
-def HEXAGON_A2_subh_h16_sat_lh:
-  si_ALU64_sisi_h16_sat_lh        <"sub",      int_hexagon_A2_subh_h16_sat_lh>;
-def HEXAGON_A2_subh_h16_sat_ll:
-  si_ALU64_sisi_h16_sat_ll        <"sub",      int_hexagon_A2_subh_h16_sat_ll>;
-
-// ALU64 / ALU / Transfer register.
-def HEXAGON_A2_tfrp:
-  di_ALU64_di                     <"",         int_hexagon_A2_tfrp>;
+class T_D_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID (F64:$Rs)),
+        (MI (F64:$Rs))>;
 
-/********************************************************************
-*            ALU64/BIT                                              *
-*********************************************************************/
+class T_DI_pat <InstHexagon MI, Intrinsic IntID,
+                PatLeaf ImmPred = PatLeaf<(i32 imm)>>
+  : Pat<(IntID F64:$Rs, ImmPred:$It),
+        (MI F64:$Rs, ImmPred:$It)>;
 
-// ALU64 / BIT / Masked parity.
-def HEXAGON_S2_parityp:
-  si_ALU64_didi                   <"parity",   int_hexagon_S2_parityp>;
+class T_F_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID F32:$Rs),
+        (MI F32:$Rs)>;
 
-/********************************************************************
-*            ALU64/PERM                                             *
-*********************************************************************/
+class T_FI_pat <InstHexagon MI, Intrinsic IntID,
+                 PatLeaf ImmPred = PatLeaf<(i32 imm)>>
+  : Pat<(IntID F32:$Rs, ImmPred:$It),
+        (MI F32:$Rs, ImmPred:$It)>;
 
-// ALU64 / PERM / Vector pack high and low halfwords.
-def HEXAGON_S2_packhl:
-  di_ALU64_sisi                   <"packhl",   int_hexagon_S2_packhl>;
+class T_FF_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID F32:$Rs, F32:$Rt),
+        (MI F32:$Rs, F32:$Rt)>;
 
-/********************************************************************
-*            ALU64/VB                                               *
-*********************************************************************/
+class T_DD_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID F64:$Rs, F64:$Rt),
+        (MI F64:$Rs, F64:$Rt)>;
 
-// ALU64 / VB / Vector add unsigned bytes.
-def HEXAGON_A2_vaddub:
-  di_ALU64_didi                   <"vaddub",   int_hexagon_A2_vaddub>;
-def HEXAGON_A2_vaddubs:
-  di_ALU64_didi_sat               <"vaddub",   int_hexagon_A2_vaddubs>;
-
-// ALU64 / VB / Vector average unsigned bytes.
-def HEXAGON_A2_vavgub:
-  di_ALU64_didi                   <"vavgub",   int_hexagon_A2_vavgub>;
-def HEXAGON_A2_vavgubr:
-  di_ALU64_didi_rnd               <"vavgub",   int_hexagon_A2_vavgubr>;
-
-// ALU64 / VB / Vector compare unsigned bytes.
-def HEXAGON_A2_vcmpbeq:
-  qi_ALU64_didi                   <"vcmpb.eq", int_hexagon_A2_vcmpbeq>;
-def HEXAGON_A2_vcmpbgtu:
-  qi_ALU64_didi                   <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>;
-
-// ALU64 / VB / Vector maximum/minimum unsigned bytes.
-def HEXAGON_A2_vmaxub:
-  di_ALU64_didi                   <"vmaxub",   int_hexagon_A2_vmaxub>;
-def HEXAGON_A2_vminub:
-  di_ALU64_didi                   <"vminub",   int_hexagon_A2_vminub>;
-
-// ALU64 / VB / Vector subtract unsigned bytes.
-def HEXAGON_A2_vsubub:
-  di_ALU64_didi                   <"vsubub",   int_hexagon_A2_vsubub>;
-def HEXAGON_A2_vsububs:
-  di_ALU64_didi_sat               <"vsubub",   int_hexagon_A2_vsububs>;
+class T_FFF_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID F32:$Rs, F32:$Rt, F32:$Ru),
+        (MI F32:$Rs, F32:$Rt, F32:$Ru)>;
 
-// ALU64 / VB / Vector mux.
-def HEXAGON_C2_vmux:
-  di_ALU64_qididi                 <"vmux",     int_hexagon_C2_vmux>;
+class T_FFFQ_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID F32:$Rs, F32:$Rt, F32:$Ru, (i32 PredRegs:$Rx)),
+         (MI F32:$Rs, F32:$Rt, F32:$Ru, PredRegs:$Rx)>;
 
+//===----------------------------------------------------------------------===//
+// MPYS / Multipy signed/unsigned halfwords
+//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
+//===----------------------------------------------------------------------===//
 
-/********************************************************************
-*            ALU64/VH                                               *
-*********************************************************************/
+def : T_RR_pat <M2_mpy_ll_s1, int_hexagon_M2_mpy_ll_s1>;
+def : T_RR_pat <M2_mpy_ll_s0, int_hexagon_M2_mpy_ll_s0>;
+def : T_RR_pat <M2_mpy_lh_s1, int_hexagon_M2_mpy_lh_s1>;
+def : T_RR_pat <M2_mpy_lh_s0, int_hexagon_M2_mpy_lh_s0>;
+def : T_RR_pat <M2_mpy_hl_s1, int_hexagon_M2_mpy_hl_s1>;
+def : T_RR_pat <M2_mpy_hl_s0, int_hexagon_M2_mpy_hl_s0>;
+def : T_RR_pat <M2_mpy_hh_s1, int_hexagon_M2_mpy_hh_s1>;
+def : T_RR_pat <M2_mpy_hh_s0, int_hexagon_M2_mpy_hh_s0>;
+
+def : T_RR_pat <M2_mpyu_ll_s1, int_hexagon_M2_mpyu_ll_s1>;
+def : T_RR_pat <M2_mpyu_ll_s0, int_hexagon_M2_mpyu_ll_s0>;
+def : T_RR_pat <M2_mpyu_lh_s1, int_hexagon_M2_mpyu_lh_s1>;
+def : T_RR_pat <M2_mpyu_lh_s0, int_hexagon_M2_mpyu_lh_s0>;
+def : T_RR_pat <M2_mpyu_hl_s1, int_hexagon_M2_mpyu_hl_s1>;
+def : T_RR_pat <M2_mpyu_hl_s0, int_hexagon_M2_mpyu_hl_s0>;
+def : T_RR_pat <M2_mpyu_hh_s1, int_hexagon_M2_mpyu_hh_s1>;
+def : T_RR_pat <M2_mpyu_hh_s0, int_hexagon_M2_mpyu_hh_s0>;
+
+def : T_RR_pat <M2_mpy_sat_ll_s1, int_hexagon_M2_mpy_sat_ll_s1>;
+def : T_RR_pat <M2_mpy_sat_ll_s0, int_hexagon_M2_mpy_sat_ll_s0>;
+def : T_RR_pat <M2_mpy_sat_lh_s1, int_hexagon_M2_mpy_sat_lh_s1>;
+def : T_RR_pat <M2_mpy_sat_lh_s0, int_hexagon_M2_mpy_sat_lh_s0>;
+def : T_RR_pat <M2_mpy_sat_hl_s1, int_hexagon_M2_mpy_sat_hl_s1>;
+def : T_RR_pat <M2_mpy_sat_hl_s0, int_hexagon_M2_mpy_sat_hl_s0>;
+def : T_RR_pat <M2_mpy_sat_hh_s1, int_hexagon_M2_mpy_sat_hh_s1>;
+def : T_RR_pat <M2_mpy_sat_hh_s0, int_hexagon_M2_mpy_sat_hh_s0>;
+
+def : T_RR_pat <M2_mpy_rnd_ll_s1, int_hexagon_M2_mpy_rnd_ll_s1>;
+def : T_RR_pat <M2_mpy_rnd_ll_s0, int_hexagon_M2_mpy_rnd_ll_s0>;
+def : T_RR_pat <M2_mpy_rnd_lh_s1, int_hexagon_M2_mpy_rnd_lh_s1>;
+def : T_RR_pat <M2_mpy_rnd_lh_s0, int_hexagon_M2_mpy_rnd_lh_s0>;
+def : T_RR_pat <M2_mpy_rnd_hl_s1, int_hexagon_M2_mpy_rnd_hl_s1>;
+def : T_RR_pat <M2_mpy_rnd_hl_s0, int_hexagon_M2_mpy_rnd_hl_s0>;
+def : T_RR_pat <M2_mpy_rnd_hh_s1, int_hexagon_M2_mpy_rnd_hh_s1>;
+def : T_RR_pat <M2_mpy_rnd_hh_s0, int_hexagon_M2_mpy_rnd_hh_s0>;
+
+def : T_RR_pat <M2_mpy_sat_rnd_ll_s1, int_hexagon_M2_mpy_sat_rnd_ll_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_ll_s0, int_hexagon_M2_mpy_sat_rnd_ll_s0>;
+def : T_RR_pat <M2_mpy_sat_rnd_lh_s1, int_hexagon_M2_mpy_sat_rnd_lh_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_lh_s0, int_hexagon_M2_mpy_sat_rnd_lh_s0>;
+def : T_RR_pat <M2_mpy_sat_rnd_hl_s1, int_hexagon_M2_mpy_sat_rnd_hl_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_hl_s0, int_hexagon_M2_mpy_sat_rnd_hl_s0>;
+def : T_RR_pat <M2_mpy_sat_rnd_hh_s1, int_hexagon_M2_mpy_sat_rnd_hh_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_hh_s0, int_hexagon_M2_mpy_sat_rnd_hh_s0>;
 
-// ALU64 / VH / Vector add halfwords.
-// Rdd64=vadd[u]h(Rss64,Rtt64:sat]
-def HEXAGON_A2_vaddh:
-  di_ALU64_didi                   <"vaddh",    int_hexagon_A2_vaddh>;
-def HEXAGON_A2_vaddhs:
-  di_ALU64_didi_sat               <"vaddh",    int_hexagon_A2_vaddhs>;
-def HEXAGON_A2_vadduhs:
-  di_ALU64_didi_sat               <"vadduh",   int_hexagon_A2_vadduhs>;
-
-// ALU64 / VH / Vector average halfwords.
-// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat]
-def HEXAGON_A2_vavgh:
-  di_ALU64_didi                   <"vavgh",    int_hexagon_A2_vavgh>;
-def HEXAGON_A2_vavghcr:
-  di_ALU64_didi_crnd              <"vavgh",    int_hexagon_A2_vavghcr>;
-def HEXAGON_A2_vavghr:
-  di_ALU64_didi_rnd               <"vavgh",    int_hexagon_A2_vavghr>;
-def HEXAGON_A2_vavguh:
-  di_ALU64_didi                   <"vavguh",   int_hexagon_A2_vavguh>;
-def HEXAGON_A2_vavguhr:
-  di_ALU64_didi_rnd               <"vavguh",   int_hexagon_A2_vavguhr>;
-def HEXAGON_A2_vnavgh:
-  di_ALU64_didi                   <"vnavgh",   int_hexagon_A2_vnavgh>;
-def HEXAGON_A2_vnavghcr:
-  di_ALU64_didi_crnd_sat          <"vnavgh",   int_hexagon_A2_vnavghcr>;
-def HEXAGON_A2_vnavghr:
-  di_ALU64_didi_rnd_sat           <"vnavgh",   int_hexagon_A2_vnavghr>;
-
-// ALU64 / VH / Vector compare halfwords.
-def HEXAGON_A2_vcmpheq:
-  qi_ALU64_didi                   <"vcmph.eq", int_hexagon_A2_vcmpheq>;
-def HEXAGON_A2_vcmphgt:
-  qi_ALU64_didi                   <"vcmph.gt", int_hexagon_A2_vcmphgt>;
-def HEXAGON_A2_vcmphgtu:
-  qi_ALU64_didi                   <"vcmph.gtu",int_hexagon_A2_vcmphgtu>;
-
-// ALU64 / VH / Vector maximum halfwords.
-def HEXAGON_A2_vmaxh:
-  di_ALU64_didi                   <"vmaxh",    int_hexagon_A2_vmaxh>;
-def HEXAGON_A2_vmaxuh:
-  di_ALU64_didi                   <"vmaxuh",   int_hexagon_A2_vmaxuh>;
-
-// ALU64 / VH / Vector minimum halfwords.
-def HEXAGON_A2_vminh:
-  di_ALU64_didi                   <"vminh",    int_hexagon_A2_vminh>;
-def HEXAGON_A2_vminuh:
-  di_ALU64_didi                   <"vminuh",   int_hexagon_A2_vminuh>;
-
-// ALU64 / VH / Vector subtract halfwords.
-def HEXAGON_A2_vsubh:
-  di_ALU64_didi                   <"vsubh",    int_hexagon_A2_vsubh>;
-def HEXAGON_A2_vsubhs:
-  di_ALU64_didi_sat               <"vsubh",    int_hexagon_A2_vsubhs>;
-def HEXAGON_A2_vsubuhs:
-  di_ALU64_didi_sat               <"vsubuh",   int_hexagon_A2_vsubuhs>;
 
+//===----------------------------------------------------------------------===//
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the accumulator.
+//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
 
-/********************************************************************
-*            ALU64/VW                                               *
-*********************************************************************/
+def : T_RRR_pat <M2_mpy_acc_ll_s1, int_hexagon_M2_mpy_acc_ll_s1>;
+def : T_RRR_pat <M2_mpy_acc_ll_s0, int_hexagon_M2_mpy_acc_ll_s0>;
+def : T_RRR_pat <M2_mpy_acc_lh_s1, int_hexagon_M2_mpy_acc_lh_s1>;
+def : T_RRR_pat <M2_mpy_acc_lh_s0, int_hexagon_M2_mpy_acc_lh_s0>;
+def : T_RRR_pat <M2_mpy_acc_hl_s1, int_hexagon_M2_mpy_acc_hl_s1>;
+def : T_RRR_pat <M2_mpy_acc_hl_s0, int_hexagon_M2_mpy_acc_hl_s0>;
+def : T_RRR_pat <M2_mpy_acc_hh_s1, int_hexagon_M2_mpy_acc_hh_s1>;
+def : T_RRR_pat <M2_mpy_acc_hh_s0, int_hexagon_M2_mpy_acc_hh_s0>;
+
+def : T_RRR_pat <M2_mpyu_acc_ll_s1, int_hexagon_M2_mpyu_acc_ll_s1>;
+def : T_RRR_pat <M2_mpyu_acc_ll_s0, int_hexagon_M2_mpyu_acc_ll_s0>;
+def : T_RRR_pat <M2_mpyu_acc_lh_s1, int_hexagon_M2_mpyu_acc_lh_s1>;
+def : T_RRR_pat <M2_mpyu_acc_lh_s0, int_hexagon_M2_mpyu_acc_lh_s0>;
+def : T_RRR_pat <M2_mpyu_acc_hl_s1, int_hexagon_M2_mpyu_acc_hl_s1>;
+def : T_RRR_pat <M2_mpyu_acc_hl_s0, int_hexagon_M2_mpyu_acc_hl_s0>;
+def : T_RRR_pat <M2_mpyu_acc_hh_s1, int_hexagon_M2_mpyu_acc_hh_s1>;
+def : T_RRR_pat <M2_mpyu_acc_hh_s0, int_hexagon_M2_mpyu_acc_hh_s0>;
+
+def : T_RRR_pat <M2_mpy_nac_ll_s1, int_hexagon_M2_mpy_nac_ll_s1>;
+def : T_RRR_pat <M2_mpy_nac_ll_s0, int_hexagon_M2_mpy_nac_ll_s0>;
+def : T_RRR_pat <M2_mpy_nac_lh_s1, int_hexagon_M2_mpy_nac_lh_s1>;
+def : T_RRR_pat <M2_mpy_nac_lh_s0, int_hexagon_M2_mpy_nac_lh_s0>;
+def : T_RRR_pat <M2_mpy_nac_hl_s1, int_hexagon_M2_mpy_nac_hl_s1>;
+def : T_RRR_pat <M2_mpy_nac_hl_s0, int_hexagon_M2_mpy_nac_hl_s0>;
+def : T_RRR_pat <M2_mpy_nac_hh_s1, int_hexagon_M2_mpy_nac_hh_s1>;
+def : T_RRR_pat <M2_mpy_nac_hh_s0, int_hexagon_M2_mpy_nac_hh_s0>;
+
+def : T_RRR_pat <M2_mpyu_nac_ll_s1, int_hexagon_M2_mpyu_nac_ll_s1>;
+def : T_RRR_pat <M2_mpyu_nac_ll_s0, int_hexagon_M2_mpyu_nac_ll_s0>;
+def : T_RRR_pat <M2_mpyu_nac_lh_s1, int_hexagon_M2_mpyu_nac_lh_s1>;
+def : T_RRR_pat <M2_mpyu_nac_lh_s0, int_hexagon_M2_mpyu_nac_lh_s0>;
+def : T_RRR_pat <M2_mpyu_nac_hl_s1, int_hexagon_M2_mpyu_nac_hl_s1>;
+def : T_RRR_pat <M2_mpyu_nac_hl_s0, int_hexagon_M2_mpyu_nac_hl_s0>;
+def : T_RRR_pat <M2_mpyu_nac_hh_s1, int_hexagon_M2_mpyu_nac_hh_s1>;
+def : T_RRR_pat <M2_mpyu_nac_hh_s0, int_hexagon_M2_mpyu_nac_hh_s0>;
+
+def : T_RRR_pat <M2_mpy_acc_sat_ll_s1, int_hexagon_M2_mpy_acc_sat_ll_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_ll_s0, int_hexagon_M2_mpy_acc_sat_ll_s0>;
+def : T_RRR_pat <M2_mpy_acc_sat_lh_s1, int_hexagon_M2_mpy_acc_sat_lh_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_lh_s0, int_hexagon_M2_mpy_acc_sat_lh_s0>;
+def : T_RRR_pat <M2_mpy_acc_sat_hl_s1, int_hexagon_M2_mpy_acc_sat_hl_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_hl_s0, int_hexagon_M2_mpy_acc_sat_hl_s0>;
+def : T_RRR_pat <M2_mpy_acc_sat_hh_s1, int_hexagon_M2_mpy_acc_sat_hh_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_hh_s0, int_hexagon_M2_mpy_acc_sat_hh_s0>;
+
+def : T_RRR_pat <M2_mpy_nac_sat_ll_s1, int_hexagon_M2_mpy_nac_sat_ll_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_ll_s0, int_hexagon_M2_mpy_nac_sat_ll_s0>;
+def : T_RRR_pat <M2_mpy_nac_sat_lh_s1, int_hexagon_M2_mpy_nac_sat_lh_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_lh_s0, int_hexagon_M2_mpy_nac_sat_lh_s0>;
+def : T_RRR_pat <M2_mpy_nac_sat_hl_s1, int_hexagon_M2_mpy_nac_sat_hl_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_hl_s0, int_hexagon_M2_mpy_nac_sat_hl_s0>;
+def : T_RRR_pat <M2_mpy_nac_sat_hh_s1, int_hexagon_M2_mpy_nac_sat_hh_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_hh_s0, int_hexagon_M2_mpy_nac_sat_hh_s0>;
 
-// ALU64 / VW / Vector add words.
-// Rdd32=vaddw(Rss32,Rtt32)[:sat]
-def HEXAGON_A2_vaddw:
-  di_ALU64_didi                   <"vaddw",    int_hexagon_A2_vaddw>;
-def HEXAGON_A2_vaddws:
-  di_ALU64_didi_sat               <"vaddw",   int_hexagon_A2_vaddws>;
-
-// ALU64 / VW / Vector average words.
-def HEXAGON_A2_vavguw:
-  di_ALU64_didi                   <"vavguw",   int_hexagon_A2_vavguw>;
-def HEXAGON_A2_vavguwr:
-  di_ALU64_didi_rnd               <"vavguw",   int_hexagon_A2_vavguwr>;
-def HEXAGON_A2_vavgw:
-  di_ALU64_didi                   <"vavgw",    int_hexagon_A2_vavgw>;
-def HEXAGON_A2_vavgwcr:
-  di_ALU64_didi_crnd              <"vavgw",    int_hexagon_A2_vavgwcr>;
-def HEXAGON_A2_vavgwr:
-  di_ALU64_didi_rnd               <"vavgw",    int_hexagon_A2_vavgwr>;
-def HEXAGON_A2_vnavgw:
-  di_ALU64_didi                   <"vnavgw",   int_hexagon_A2_vnavgw>;
-def HEXAGON_A2_vnavgwcr:
-  di_ALU64_didi_crnd_sat          <"vnavgw",   int_hexagon_A2_vnavgwcr>;
-def HEXAGON_A2_vnavgwr:
-  di_ALU64_didi_rnd_sat           <"vnavgw",   int_hexagon_A2_vnavgwr>;
-
-// ALU64 / VW / Vector compare words.
-def HEXAGON_A2_vcmpweq:
-  qi_ALU64_didi                   <"vcmpw.eq", int_hexagon_A2_vcmpweq>;
-def HEXAGON_A2_vcmpwgt:
-  qi_ALU64_didi                   <"vcmpw.gt", int_hexagon_A2_vcmpwgt>;
-def HEXAGON_A2_vcmpwgtu:
-  qi_ALU64_didi                   <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>;
-
-// ALU64 / VW / Vector maximum words.
-def HEXAGON_A2_vmaxw:
-  di_ALU64_didi                   <"vmaxw",    int_hexagon_A2_vmaxw>;
-def HEXAGON_A2_vmaxuw:
-  di_ALU64_didi                   <"vmaxuw",   int_hexagon_A2_vmaxuw>;
-
-// ALU64 / VW / Vector minimum words.
-def HEXAGON_A2_vminw:
-  di_ALU64_didi                   <"vminw",    int_hexagon_A2_vminw>;
-def HEXAGON_A2_vminuw:
-  di_ALU64_didi                   <"vminuw",   int_hexagon_A2_vminuw>;
-
-// ALU64 / VW / Vector subtract words.
-def HEXAGON_A2_vsubw:
-  di_ALU64_didi                   <"vsubw",    int_hexagon_A2_vsubw>;
-def HEXAGON_A2_vsubws:
-  di_ALU64_didi_sat               <"vsubw",    int_hexagon_A2_vsubws>;
 
+//===----------------------------------------------------------------------===//
+// Multiply signed/unsigned halfwords with and without saturation and rounding
+// into a 64-bits destination register.
+//===----------------------------------------------------------------------===//
 
-/********************************************************************
-*            CR                                                     *
-*********************************************************************/
+def : T_RR_pat <M2_mpyd_hh_s0, int_hexagon_M2_mpyd_hh_s0>;
+def : T_RR_pat <M2_mpyd_hl_s0, int_hexagon_M2_mpyd_hl_s0>;
+def : T_RR_pat <M2_mpyd_lh_s0, int_hexagon_M2_mpyd_lh_s0>;
+def : T_RR_pat <M2_mpyd_ll_s0, int_hexagon_M2_mpyd_ll_s0>;
+def : T_RR_pat <M2_mpyd_hh_s1, int_hexagon_M2_mpyd_hh_s1>;
+def : T_RR_pat <M2_mpyd_hl_s1, int_hexagon_M2_mpyd_hl_s1>;
+def : T_RR_pat <M2_mpyd_lh_s1, int_hexagon_M2_mpyd_lh_s1>;
+def : T_RR_pat <M2_mpyd_ll_s1, int_hexagon_M2_mpyd_ll_s1>;
+
+def : T_RR_pat <M2_mpyd_rnd_hh_s0, int_hexagon_M2_mpyd_rnd_hh_s0>;
+def : T_RR_pat <M2_mpyd_rnd_hl_s0, int_hexagon_M2_mpyd_rnd_hl_s0>;
+def : T_RR_pat <M2_mpyd_rnd_lh_s0, int_hexagon_M2_mpyd_rnd_lh_s0>;
+def : T_RR_pat <M2_mpyd_rnd_ll_s0, int_hexagon_M2_mpyd_rnd_ll_s0>;
+def : T_RR_pat <M2_mpyd_rnd_hh_s1, int_hexagon_M2_mpyd_rnd_hh_s1>;
+def : T_RR_pat <M2_mpyd_rnd_hl_s1, int_hexagon_M2_mpyd_rnd_hl_s1>;
+def : T_RR_pat <M2_mpyd_rnd_lh_s1, int_hexagon_M2_mpyd_rnd_lh_s1>;
+def : T_RR_pat <M2_mpyd_rnd_ll_s1, int_hexagon_M2_mpyd_rnd_ll_s1>;
+
+def : T_RR_pat <M2_mpyud_hh_s0, int_hexagon_M2_mpyud_hh_s0>;
+def : T_RR_pat <M2_mpyud_hl_s0, int_hexagon_M2_mpyud_hl_s0>;
+def : T_RR_pat <M2_mpyud_lh_s0, int_hexagon_M2_mpyud_lh_s0>;
+def : T_RR_pat <M2_mpyud_ll_s0, int_hexagon_M2_mpyud_ll_s0>;
+def : T_RR_pat <M2_mpyud_hh_s1, int_hexagon_M2_mpyud_hh_s1>;
+def : T_RR_pat <M2_mpyud_hl_s1, int_hexagon_M2_mpyud_hl_s1>;
+def : T_RR_pat <M2_mpyud_lh_s1, int_hexagon_M2_mpyud_lh_s1>;
+def : T_RR_pat <M2_mpyud_ll_s1, int_hexagon_M2_mpyud_ll_s1>;
+
+//===----------------------------------------------------------------------===//
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the 64-bit destination register.
+//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
 
-// CR / Logical reductions on predicates.
-def HEXAGON_C2_all8:
-  qi_SInst_qi                     <"all8",     int_hexagon_C2_all8>;
-def HEXAGON_C2_any8:
-  qi_SInst_qi                     <"any8",     int_hexagon_C2_any8>;
-
-// CR / Logical operations on predicates.
-def HEXAGON_C2_pxfer_map:
-  qi_SInst_qi_pxfer               <"",         int_hexagon_C2_pxfer_map>;
-def HEXAGON_C2_and:
-  qi_SInst_qiqi                   <"and",      int_hexagon_C2_and>;
-def HEXAGON_C2_andn:
-  qi_SInst_qiqi_neg               <"and",      int_hexagon_C2_andn>;
-def HEXAGON_C2_not:
-  qi_SInst_qi                     <"not",      int_hexagon_C2_not>;
-def HEXAGON_C2_or:
-  qi_SInst_qiqi                   <"or",       int_hexagon_C2_or>;
-def HEXAGON_C2_orn:
-  qi_SInst_qiqi_neg               <"or",       int_hexagon_C2_orn>;
-def HEXAGON_C2_xor:
-  qi_SInst_qiqi                   <"xor",      int_hexagon_C2_xor>;
+def : T_PRR_pat <M2_mpyd_acc_hh_s0, int_hexagon_M2_mpyd_acc_hh_s0>;
+def : T_PRR_pat <M2_mpyd_acc_hl_s0, int_hexagon_M2_mpyd_acc_hl_s0>;
+def : T_PRR_pat <M2_mpyd_acc_lh_s0, int_hexagon_M2_mpyd_acc_lh_s0>;
+def : T_PRR_pat <M2_mpyd_acc_ll_s0, int_hexagon_M2_mpyd_acc_ll_s0>;
+
+def : T_PRR_pat <M2_mpyd_acc_hh_s1, int_hexagon_M2_mpyd_acc_hh_s1>;
+def : T_PRR_pat <M2_mpyd_acc_hl_s1, int_hexagon_M2_mpyd_acc_hl_s1>;
+def : T_PRR_pat <M2_mpyd_acc_lh_s1, int_hexagon_M2_mpyd_acc_lh_s1>;
+def : T_PRR_pat <M2_mpyd_acc_ll_s1, int_hexagon_M2_mpyd_acc_ll_s1>;
+
+def : T_PRR_pat <M2_mpyd_nac_hh_s0, int_hexagon_M2_mpyd_nac_hh_s0>;
+def : T_PRR_pat <M2_mpyd_nac_hl_s0, int_hexagon_M2_mpyd_nac_hl_s0>;
+def : T_PRR_pat <M2_mpyd_nac_lh_s0, int_hexagon_M2_mpyd_nac_lh_s0>;
+def : T_PRR_pat <M2_mpyd_nac_ll_s0, int_hexagon_M2_mpyd_nac_ll_s0>;
+
+def : T_PRR_pat <M2_mpyd_nac_hh_s1, int_hexagon_M2_mpyd_nac_hh_s1>;
+def : T_PRR_pat <M2_mpyd_nac_hl_s1, int_hexagon_M2_mpyd_nac_hl_s1>;
+def : T_PRR_pat <M2_mpyd_nac_lh_s1, int_hexagon_M2_mpyd_nac_lh_s1>;
+def : T_PRR_pat <M2_mpyd_nac_ll_s1, int_hexagon_M2_mpyd_nac_ll_s1>;
+
+def : T_PRR_pat <M2_mpyud_acc_hh_s0, int_hexagon_M2_mpyud_acc_hh_s0>;
+def : T_PRR_pat <M2_mpyud_acc_hl_s0, int_hexagon_M2_mpyud_acc_hl_s0>;
+def : T_PRR_pat <M2_mpyud_acc_lh_s0, int_hexagon_M2_mpyud_acc_lh_s0>;
+def : T_PRR_pat <M2_mpyud_acc_ll_s0, int_hexagon_M2_mpyud_acc_ll_s0>;
+
+def : T_PRR_pat <M2_mpyud_acc_hh_s1, int_hexagon_M2_mpyud_acc_hh_s1>;
+def : T_PRR_pat <M2_mpyud_acc_hl_s1, int_hexagon_M2_mpyud_acc_hl_s1>;
+def : T_PRR_pat <M2_mpyud_acc_lh_s1, int_hexagon_M2_mpyud_acc_lh_s1>;
+def : T_PRR_pat <M2_mpyud_acc_ll_s1, int_hexagon_M2_mpyud_acc_ll_s1>;
+
+def : T_PRR_pat <M2_mpyud_nac_hh_s0, int_hexagon_M2_mpyud_nac_hh_s0>;
+def : T_PRR_pat <M2_mpyud_nac_hl_s0, int_hexagon_M2_mpyud_nac_hl_s0>;
+def : T_PRR_pat <M2_mpyud_nac_lh_s0, int_hexagon_M2_mpyud_nac_lh_s0>;
+def : T_PRR_pat <M2_mpyud_nac_ll_s0, int_hexagon_M2_mpyud_nac_ll_s0>;
+
+def : T_PRR_pat <M2_mpyud_nac_hh_s1, int_hexagon_M2_mpyud_nac_hh_s1>;
+def : T_PRR_pat <M2_mpyud_nac_hl_s1, int_hexagon_M2_mpyud_nac_hl_s1>;
+def : T_PRR_pat <M2_mpyud_nac_lh_s1, int_hexagon_M2_mpyud_nac_lh_s1>;
+def : T_PRR_pat <M2_mpyud_nac_ll_s1, int_hexagon_M2_mpyud_nac_ll_s1>;
+
+// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vcmpy_s1_sat_i, int_hexagon_M2_vcmpy_s1_sat_i>;
+def : T_PP_pat <M2_vcmpy_s0_sat_i, int_hexagon_M2_vcmpy_s0_sat_i>;
+
+// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vcmpy_s1_sat_r, int_hexagon_M2_vcmpy_s1_sat_r>;
+def : T_PP_pat <M2_vcmpy_s0_sat_r, int_hexagon_M2_vcmpy_s0_sat_r>;
+
+// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vdmpys_s1, int_hexagon_M2_vdmpys_s1>;
+def : T_PP_pat <M2_vdmpys_s0, int_hexagon_M2_vdmpys_s0>;
+
+// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vmpy2es_s1, int_hexagon_M2_vmpy2es_s1>;
+def : T_PP_pat <M2_vmpy2es_s0, int_hexagon_M2_vmpy2es_s0>;
+
+//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyh_s0,  int_hexagon_M2_mmpyh_s0>;
+def : T_PP_pat <M2_mmpyh_s1,  int_hexagon_M2_mmpyh_s1>;
+def : T_PP_pat <M2_mmpyh_rs0, int_hexagon_M2_mmpyh_rs0>;
+def : T_PP_pat <M2_mmpyh_rs1, int_hexagon_M2_mmpyh_rs1>;
+
+//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyl_s0,  int_hexagon_M2_mmpyl_s0>;
+def : T_PP_pat <M2_mmpyl_s1,  int_hexagon_M2_mmpyl_s1>;
+def : T_PP_pat <M2_mmpyl_rs0, int_hexagon_M2_mmpyl_rs0>;
+def : T_PP_pat <M2_mmpyl_rs1, int_hexagon_M2_mmpyl_rs1>;
+
+//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyuh_s0,  int_hexagon_M2_mmpyuh_s0>;
+def : T_PP_pat <M2_mmpyuh_s1,  int_hexagon_M2_mmpyuh_s1>;
+def : T_PP_pat <M2_mmpyuh_rs0, int_hexagon_M2_mmpyuh_rs0>;
+def : T_PP_pat <M2_mmpyuh_rs1, int_hexagon_M2_mmpyuh_rs1>;
+
+//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyul_s0,  int_hexagon_M2_mmpyul_s0>;
+def : T_PP_pat <M2_mmpyul_s1,  int_hexagon_M2_mmpyul_s1>;
+def : T_PP_pat <M2_mmpyul_rs0, int_hexagon_M2_mmpyul_rs0>;
+def : T_PP_pat <M2_mmpyul_rs1, int_hexagon_M2_mmpyul_rs1>;
+
+// Vector reduce add unsigned bytes: Rdd32[+]=vrmpybu(Rss32,Rtt32)
+def : T_PP_pat  <A2_vraddub,     int_hexagon_A2_vraddub>;
+def : T_PPP_pat <A2_vraddub_acc, int_hexagon_A2_vraddub_acc>;
+
+// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt)
+def : T_PP_pat  <A2_vrsadub,     int_hexagon_A2_vrsadub>;
+def : T_PPP_pat <A2_vrsadub_acc, int_hexagon_A2_vrsadub_acc>;
+
+// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss)
+def : T_PP_pat <M2_vabsdiffh, int_hexagon_M2_vabsdiffh>;
+
+// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss)
+def : T_PP_pat <M2_vabsdiffw, int_hexagon_M2_vabsdiffw>;
+
+// Vector reduce complex multiply real or imaginary:
+// Rdd[+]=vrcmpy[ir](Rss,Rtt[*])
+def : T_PP_pat  <M2_vrcmpyi_s0,  int_hexagon_M2_vrcmpyi_s0>;
+def : T_PP_pat  <M2_vrcmpyi_s0c, int_hexagon_M2_vrcmpyi_s0c>;
+def : T_PPP_pat <M2_vrcmaci_s0,  int_hexagon_M2_vrcmaci_s0>;
+def : T_PPP_pat <M2_vrcmaci_s0c, int_hexagon_M2_vrcmaci_s0c>;
+
+def : T_PP_pat  <M2_vrcmpyr_s0,  int_hexagon_M2_vrcmpyr_s0>;
+def : T_PP_pat  <M2_vrcmpyr_s0c, int_hexagon_M2_vrcmpyr_s0c>;
+def : T_PPP_pat <M2_vrcmacr_s0,  int_hexagon_M2_vrcmacr_s0>;
+def : T_PPP_pat <M2_vrcmacr_s0c, int_hexagon_M2_vrcmacr_s0c>;
+
+// Vector reduce halfwords
+// Rdd[+]=vrmpyh(Rss,Rtt)
+def : T_PP_pat  <M2_vrmpy_s0, int_hexagon_M2_vrmpy_s0>;
+def : T_PPP_pat <M2_vrmac_s0, int_hexagon_M2_vrmac_s0>;
 
+//===----------------------------------------------------------------------===//
+// Vector Multipy with accumulation
+//===----------------------------------------------------------------------===//
+
+// Vector multiply word by signed half with accumulation
+// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PPP_pat <M2_mmacls_s1, int_hexagon_M2_mmacls_s1>;
+def : T_PPP_pat <M2_mmacls_s0, int_hexagon_M2_mmacls_s0>;
+def : T_PPP_pat <M2_mmacls_rs1, int_hexagon_M2_mmacls_rs1>;
+def : T_PPP_pat <M2_mmacls_rs0, int_hexagon_M2_mmacls_rs0>;
+def : T_PPP_pat <M2_mmachs_s1, int_hexagon_M2_mmachs_s1>;
+def : T_PPP_pat <M2_mmachs_s0, int_hexagon_M2_mmachs_s0>;
+def : T_PPP_pat <M2_mmachs_rs1, int_hexagon_M2_mmachs_rs1>;
+def : T_PPP_pat <M2_mmachs_rs0, int_hexagon_M2_mmachs_rs0>;
+
+// Vector multiply word by unsigned half with accumulation
+// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PPP_pat <M2_mmaculs_s1, int_hexagon_M2_mmaculs_s1>;
+def : T_PPP_pat <M2_mmaculs_s0, int_hexagon_M2_mmaculs_s0>;
+def : T_PPP_pat <M2_mmaculs_rs1, int_hexagon_M2_mmaculs_rs1>;
+def : T_PPP_pat <M2_mmaculs_rs0, int_hexagon_M2_mmaculs_rs0>;
+def : T_PPP_pat <M2_mmacuhs_s1, int_hexagon_M2_mmacuhs_s1>;
+def : T_PPP_pat <M2_mmacuhs_s0, int_hexagon_M2_mmacuhs_s0>;
+def : T_PPP_pat <M2_mmacuhs_rs1, int_hexagon_M2_mmacuhs_rs1>;
+def : T_PPP_pat <M2_mmacuhs_rs0, int_hexagon_M2_mmacuhs_rs0>;
+
+// Vector multiply even halfwords with accumulation
+// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
+def : T_PPP_pat <M2_vmac2es, int_hexagon_M2_vmac2es>;
+def : T_PPP_pat <M2_vmac2es_s1, int_hexagon_M2_vmac2es_s1>;
+def : T_PPP_pat <M2_vmac2es_s0, int_hexagon_M2_vmac2es_s0>;
+
+// Vector dual multiply with accumulation
+// Rxx+=vdmpy(Rss,Rtt)[:sat]
+def : T_PPP_pat <M2_vdmacs_s1, int_hexagon_M2_vdmacs_s1>;
+def : T_PPP_pat <M2_vdmacs_s0, int_hexagon_M2_vdmacs_s0>;
+
+// Vector complex multiply real or imaginary with accumulation
+// Rxx+=vcmpy[ir](Rss,Rtt):sat
+def : T_PPP_pat <M2_vcmac_s0_sat_r, int_hexagon_M2_vcmac_s0_sat_r>;
+def : T_PPP_pat <M2_vcmac_s0_sat_i, int_hexagon_M2_vcmac_s0_sat_i>;
+
+//===----------------------------------------------------------------------===//
+// Add/Subtract halfword
+// Rd=add(Rt.L,Rs.[HL])[:sat]
+// Rd=sub(Rt.L,Rs.[HL])[:sat]
+// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16]
+// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16]
+//===----------------------------------------------------------------------===//
+
+//Rd=add(Rt.L,Rs.[LH])
+def : T_RR_pat <A2_addh_l16_ll,     int_hexagon_A2_addh_l16_ll>;
+def : T_RR_pat <A2_addh_l16_hl,     int_hexagon_A2_addh_l16_hl>;
+
+//Rd=add(Rt.L,Rs.[LH]):sat
+def : T_RR_pat <A2_addh_l16_sat_ll, int_hexagon_A2_addh_l16_sat_ll>;
+def : T_RR_pat <A2_addh_l16_sat_hl, int_hexagon_A2_addh_l16_sat_hl>;
+
+//Rd=sub(Rt.L,Rs.[LH])
+def : T_RR_pat <A2_subh_l16_ll,     int_hexagon_A2_subh_l16_ll>;
+def : T_RR_pat <A2_subh_l16_hl,     int_hexagon_A2_subh_l16_hl>;
+
+//Rd=sub(Rt.L,Rs.[LH]):sat
+def : T_RR_pat <A2_subh_l16_sat_ll, int_hexagon_A2_subh_l16_sat_ll>;
+def : T_RR_pat <A2_subh_l16_sat_hl, int_hexagon_A2_subh_l16_sat_hl>;
+
+//Rd=add(Rt.[LH],Rs.[LH]):<<16
+def : T_RR_pat <A2_addh_h16_ll,     int_hexagon_A2_addh_h16_ll>;
+def : T_RR_pat <A2_addh_h16_lh,     int_hexagon_A2_addh_h16_lh>;
+def : T_RR_pat <A2_addh_h16_hl,     int_hexagon_A2_addh_h16_hl>;
+def : T_RR_pat <A2_addh_h16_hh,     int_hexagon_A2_addh_h16_hh>;
+
+//Rd=sub(Rt.[LH],Rs.[LH]):<<16
+def : T_RR_pat <A2_subh_h16_ll,     int_hexagon_A2_subh_h16_ll>;
+def : T_RR_pat <A2_subh_h16_lh,     int_hexagon_A2_subh_h16_lh>;
+def : T_RR_pat <A2_subh_h16_hl,     int_hexagon_A2_subh_h16_hl>;
+def : T_RR_pat <A2_subh_h16_hh,     int_hexagon_A2_subh_h16_hh>;
+
+//Rd=add(Rt.[LH],Rs.[LH]):sat:<<16
+def : T_RR_pat <A2_addh_h16_sat_ll, int_hexagon_A2_addh_h16_sat_ll>;
+def : T_RR_pat <A2_addh_h16_sat_lh, int_hexagon_A2_addh_h16_sat_lh>;
+def : T_RR_pat <A2_addh_h16_sat_hl, int_hexagon_A2_addh_h16_sat_hl>;
+def : T_RR_pat <A2_addh_h16_sat_hh, int_hexagon_A2_addh_h16_sat_hh>;
+
+//Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
+def : T_RR_pat <A2_subh_h16_sat_ll, int_hexagon_A2_subh_h16_sat_ll>;
+def : T_RR_pat <A2_subh_h16_sat_lh, int_hexagon_A2_subh_h16_sat_lh>;
+def : T_RR_pat <A2_subh_h16_sat_hl, int_hexagon_A2_subh_h16_sat_hl>;
+def : T_RR_pat <A2_subh_h16_sat_hh, int_hexagon_A2_subh_h16_sat_hh>;
+
+// ALU64 / ALU / min max
+def : T_RR_pat<A2_max,  int_hexagon_A2_max>;
+def : T_RR_pat<A2_min,  int_hexagon_A2_min>;
+def : T_RR_pat<A2_maxu, int_hexagon_A2_maxu>;
+def : T_RR_pat<A2_minu, int_hexagon_A2_minu>;
+
+// Shift and accumulate
+def : T_RRI_pat <S2_asr_i_r_nac,  int_hexagon_S2_asr_i_r_nac>;
+def : T_RRI_pat <S2_lsr_i_r_nac,  int_hexagon_S2_lsr_i_r_nac>;
+def : T_RRI_pat <S2_asl_i_r_nac,  int_hexagon_S2_asl_i_r_nac>;
+def : T_RRI_pat <S2_asr_i_r_acc,  int_hexagon_S2_asr_i_r_acc>;
+def : T_RRI_pat <S2_lsr_i_r_acc,  int_hexagon_S2_lsr_i_r_acc>;
+def : T_RRI_pat <S2_asl_i_r_acc,  int_hexagon_S2_asl_i_r_acc>;
+
+def : T_RRI_pat <S2_asr_i_r_and,  int_hexagon_S2_asr_i_r_and>;
+def : T_RRI_pat <S2_lsr_i_r_and,  int_hexagon_S2_lsr_i_r_and>;
+def : T_RRI_pat <S2_asl_i_r_and,  int_hexagon_S2_asl_i_r_and>;
+def : T_RRI_pat <S2_asr_i_r_or,   int_hexagon_S2_asr_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_or,   int_hexagon_S2_lsr_i_r_or>;
+def : T_RRI_pat <S2_asl_i_r_or,   int_hexagon_S2_asl_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
+def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
+
+def : T_PPI_pat <S2_asr_i_p_nac,  int_hexagon_S2_asr_i_p_nac>;
+def : T_PPI_pat <S2_lsr_i_p_nac,  int_hexagon_S2_lsr_i_p_nac>;
+def : T_PPI_pat <S2_asl_i_p_nac,  int_hexagon_S2_asl_i_p_nac>;
+def : T_PPI_pat <S2_asr_i_p_acc,  int_hexagon_S2_asr_i_p_acc>;
+def : T_PPI_pat <S2_lsr_i_p_acc,  int_hexagon_S2_lsr_i_p_acc>;
+def : T_PPI_pat <S2_asl_i_p_acc,  int_hexagon_S2_asl_i_p_acc>;
+
+def : T_PPI_pat <S2_asr_i_p_and,  int_hexagon_S2_asr_i_p_and>;
+def : T_PPI_pat <S2_lsr_i_p_and,  int_hexagon_S2_lsr_i_p_and>;
+def : T_PPI_pat <S2_asl_i_p_and,  int_hexagon_S2_asl_i_p_and>;
+def : T_PPI_pat <S2_asr_i_p_or,   int_hexagon_S2_asr_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_or,   int_hexagon_S2_lsr_i_p_or>;
+def : T_PPI_pat <S2_asl_i_p_or,   int_hexagon_S2_asl_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
+def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
+
+def : T_RRR_pat <S2_asr_r_r_nac,  int_hexagon_S2_asr_r_r_nac>;
+def : T_RRR_pat <S2_lsr_r_r_nac,  int_hexagon_S2_lsr_r_r_nac>;
+def : T_RRR_pat <S2_asl_r_r_nac,  int_hexagon_S2_asl_r_r_nac>;
+def : T_RRR_pat <S2_lsl_r_r_nac,  int_hexagon_S2_lsl_r_r_nac>;
+def : T_RRR_pat <S2_asr_r_r_acc,  int_hexagon_S2_asr_r_r_acc>;
+def : T_RRR_pat <S2_lsr_r_r_acc,  int_hexagon_S2_lsr_r_r_acc>;
+def : T_RRR_pat <S2_asl_r_r_acc,  int_hexagon_S2_asl_r_r_acc>;
+def : T_RRR_pat <S2_lsl_r_r_acc,  int_hexagon_S2_lsl_r_r_acc>;
+
+def : T_RRR_pat <S2_asr_r_r_and,  int_hexagon_S2_asr_r_r_and>;
+def : T_RRR_pat <S2_lsr_r_r_and,  int_hexagon_S2_lsr_r_r_and>;
+def : T_RRR_pat <S2_asl_r_r_and,  int_hexagon_S2_asl_r_r_and>;
+def : T_RRR_pat <S2_lsl_r_r_and,  int_hexagon_S2_lsl_r_r_and>;
+def : T_RRR_pat <S2_asr_r_r_or,   int_hexagon_S2_asr_r_r_or>;
+def : T_RRR_pat <S2_lsr_r_r_or,   int_hexagon_S2_lsr_r_r_or>;
+def : T_RRR_pat <S2_asl_r_r_or,   int_hexagon_S2_asl_r_r_or>;
+def : T_RRR_pat <S2_lsl_r_r_or,   int_hexagon_S2_lsl_r_r_or>;
+
+def : T_PPR_pat <S2_asr_r_p_nac,  int_hexagon_S2_asr_r_p_nac>;
+def : T_PPR_pat <S2_lsr_r_p_nac,  int_hexagon_S2_lsr_r_p_nac>;
+def : T_PPR_pat <S2_asl_r_p_nac,  int_hexagon_S2_asl_r_p_nac>;
+def : T_PPR_pat <S2_lsl_r_p_nac,  int_hexagon_S2_lsl_r_p_nac>;
+def : T_PPR_pat <S2_asr_r_p_acc,  int_hexagon_S2_asr_r_p_acc>;
+def : T_PPR_pat <S2_lsr_r_p_acc,  int_hexagon_S2_lsr_r_p_acc>;
+def : T_PPR_pat <S2_asl_r_p_acc,  int_hexagon_S2_asl_r_p_acc>;
+def : T_PPR_pat <S2_lsl_r_p_acc,  int_hexagon_S2_lsl_r_p_acc>;
+
+def : T_PPR_pat <S2_asr_r_p_and,  int_hexagon_S2_asr_r_p_and>;
+def : T_PPR_pat <S2_lsr_r_p_and,  int_hexagon_S2_lsr_r_p_and>;
+def : T_PPR_pat <S2_asl_r_p_and,  int_hexagon_S2_asl_r_p_and>;
+def : T_PPR_pat <S2_lsl_r_p_and,  int_hexagon_S2_lsl_r_p_and>;
+def : T_PPR_pat <S2_asr_r_p_or,   int_hexagon_S2_asr_r_p_or>;
+def : T_PPR_pat <S2_lsr_r_p_or,   int_hexagon_S2_lsr_r_p_or>;
+def : T_PPR_pat <S2_asl_r_p_or,   int_hexagon_S2_asl_r_p_or>;
+def : T_PPR_pat <S2_lsl_r_p_or,   int_hexagon_S2_lsl_r_p_or>;
+
+def : T_RRI_pat <S2_asr_i_r_nac,  int_hexagon_S2_asr_i_r_nac>;
+def : T_RRI_pat <S2_lsr_i_r_nac,  int_hexagon_S2_lsr_i_r_nac>;
+def : T_RRI_pat <S2_asl_i_r_nac,  int_hexagon_S2_asl_i_r_nac>;
+def : T_RRI_pat <S2_asr_i_r_acc,  int_hexagon_S2_asr_i_r_acc>;
+def : T_RRI_pat <S2_lsr_i_r_acc,  int_hexagon_S2_lsr_i_r_acc>;
+def : T_RRI_pat <S2_asl_i_r_acc,  int_hexagon_S2_asl_i_r_acc>;
+
+def : T_RRI_pat <S2_asr_i_r_and,  int_hexagon_S2_asr_i_r_and>;
+def : T_RRI_pat <S2_lsr_i_r_and,  int_hexagon_S2_lsr_i_r_and>;
+def : T_RRI_pat <S2_asl_i_r_and,  int_hexagon_S2_asl_i_r_and>;
+def : T_RRI_pat <S2_asr_i_r_or,   int_hexagon_S2_asr_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_or,   int_hexagon_S2_lsr_i_r_or>;
+def : T_RRI_pat <S2_asl_i_r_or,   int_hexagon_S2_asl_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
+def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
+
+def : T_PPI_pat <S2_asr_i_p_nac,  int_hexagon_S2_asr_i_p_nac>;
+def : T_PPI_pat <S2_lsr_i_p_nac,  int_hexagon_S2_lsr_i_p_nac>;
+def : T_PPI_pat <S2_asl_i_p_nac,  int_hexagon_S2_asl_i_p_nac>;
+def : T_PPI_pat <S2_asr_i_p_acc,  int_hexagon_S2_asr_i_p_acc>;
+def : T_PPI_pat <S2_lsr_i_p_acc,  int_hexagon_S2_lsr_i_p_acc>;
+def : T_PPI_pat <S2_asl_i_p_acc,  int_hexagon_S2_asl_i_p_acc>;
+
+def : T_PPI_pat <S2_asr_i_p_and,  int_hexagon_S2_asr_i_p_and>;
+def : T_PPI_pat <S2_lsr_i_p_and,  int_hexagon_S2_lsr_i_p_and>;
+def : T_PPI_pat <S2_asl_i_p_and,  int_hexagon_S2_asl_i_p_and>;
+def : T_PPI_pat <S2_asr_i_p_or,   int_hexagon_S2_asr_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_or,   int_hexagon_S2_lsr_i_p_or>;
+def : T_PPI_pat <S2_asl_i_p_or,   int_hexagon_S2_asl_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
+def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
+
+def : T_RRR_pat <S2_asr_r_r_nac,  int_hexagon_S2_asr_r_r_nac>;
+def : T_RRR_pat <S2_lsr_r_r_nac,  int_hexagon_S2_lsr_r_r_nac>;
+def : T_RRR_pat <S2_asl_r_r_nac,  int_hexagon_S2_asl_r_r_nac>;
+def : T_RRR_pat <S2_lsl_r_r_nac,  int_hexagon_S2_lsl_r_r_nac>;
+def : T_RRR_pat <S2_asr_r_r_acc,  int_hexagon_S2_asr_r_r_acc>;
+def : T_RRR_pat <S2_lsr_r_r_acc,  int_hexagon_S2_lsr_r_r_acc>;
+def : T_RRR_pat <S2_asl_r_r_acc,  int_hexagon_S2_asl_r_r_acc>;
+def : T_RRR_pat <S2_lsl_r_r_acc,  int_hexagon_S2_lsl_r_r_acc>;
+
+def : T_RRR_pat <S2_asr_r_r_and,  int_hexagon_S2_asr_r_r_and>;
+def : T_RRR_pat <S2_lsr_r_r_and,  int_hexagon_S2_lsr_r_r_and>;
+def : T_RRR_pat <S2_asl_r_r_and,  int_hexagon_S2_asl_r_r_and>;
+def : T_RRR_pat <S2_lsl_r_r_and,  int_hexagon_S2_lsl_r_r_and>;
+def : T_RRR_pat <S2_asr_r_r_or,   int_hexagon_S2_asr_r_r_or>;
+def : T_RRR_pat <S2_lsr_r_r_or,   int_hexagon_S2_lsr_r_r_or>;
+def : T_RRR_pat <S2_asl_r_r_or,   int_hexagon_S2_asl_r_r_or>;
+def : T_RRR_pat <S2_lsl_r_r_or,   int_hexagon_S2_lsl_r_r_or>;
+
+def : T_PPR_pat <S2_asr_r_p_nac,  int_hexagon_S2_asr_r_p_nac>;
+def : T_PPR_pat <S2_lsr_r_p_nac,  int_hexagon_S2_lsr_r_p_nac>;
+def : T_PPR_pat <S2_asl_r_p_nac,  int_hexagon_S2_asl_r_p_nac>;
+def : T_PPR_pat <S2_lsl_r_p_nac,  int_hexagon_S2_lsl_r_p_nac>;
+def : T_PPR_pat <S2_asr_r_p_acc,  int_hexagon_S2_asr_r_p_acc>;
+def : T_PPR_pat <S2_lsr_r_p_acc,  int_hexagon_S2_lsr_r_p_acc>;
+def : T_PPR_pat <S2_asl_r_p_acc,  int_hexagon_S2_asl_r_p_acc>;
+def : T_PPR_pat <S2_lsl_r_p_acc,  int_hexagon_S2_lsl_r_p_acc>;
+
+def : T_PPR_pat <S2_asr_r_p_and,  int_hexagon_S2_asr_r_p_and>;
+def : T_PPR_pat <S2_lsr_r_p_and,  int_hexagon_S2_lsr_r_p_and>;
+def : T_PPR_pat <S2_asl_r_p_and,  int_hexagon_S2_asl_r_p_and>;
+def : T_PPR_pat <S2_lsl_r_p_and,  int_hexagon_S2_lsl_r_p_and>;
+def : T_PPR_pat <S2_asr_r_p_or,   int_hexagon_S2_asr_r_p_or>;
+def : T_PPR_pat <S2_lsr_r_p_or,   int_hexagon_S2_lsr_r_p_or>;
+def : T_PPR_pat <S2_asl_r_p_or,   int_hexagon_S2_asl_r_p_or>;
+def : T_PPR_pat <S2_lsl_r_p_or,   int_hexagon_S2_lsl_r_p_or>;
 
 /********************************************************************
-*            MTYPE/ALU                                              *
+*            ALU32/ALU                                              *
 *********************************************************************/
+def : T_RR_pat<A2_add,      int_hexagon_A2_add>;
+def : T_RI_pat<A2_addi,     int_hexagon_A2_addi>;
+def : T_RR_pat<A2_sub,      int_hexagon_A2_sub>;
+def : T_IR_pat<A2_subri,    int_hexagon_A2_subri>;
+def : T_RR_pat<A2_and,      int_hexagon_A2_and>;
+def : T_RI_pat<A2_andir,    int_hexagon_A2_andir>;
+def : T_RR_pat<A2_or,       int_hexagon_A2_or>;
+def : T_RI_pat<A2_orir,     int_hexagon_A2_orir>;
+def : T_RR_pat<A2_xor,      int_hexagon_A2_xor>;
+def : T_RR_pat<A2_combinew, int_hexagon_A2_combinew>;
+
+// Assembler mapped from Rd32=not(Rs32) to Rd32=sub(#-1,Rs32)
+def : Pat <(int_hexagon_A2_not (I32:$Rs)),
+           (A2_subri -1, IntRegs:$Rs)>;
+
+// Assembler mapped from Rd32=neg(Rs32) to Rd32=sub(#0,Rs32)
+def : Pat <(int_hexagon_A2_neg IntRegs:$Rs),
+           (A2_subri 0, IntRegs:$Rs)>;
+
+// Transfer immediate
+def  : Pat <(int_hexagon_A2_tfril (I32:$Rs), u16_0ImmPred:$Is),
+            (A2_tfril IntRegs:$Rs, u16_0ImmPred:$Is)>;
+def  : Pat <(int_hexagon_A2_tfrih (I32:$Rs), u16_0ImmPred:$Is),
+            (A2_tfrih IntRegs:$Rs, u16_0ImmPred:$Is)>;
+
+//  Transfer Register/immediate.
+def : T_R_pat <A2_tfr, int_hexagon_A2_tfr>;
+def : T_I_pat <A2_tfrsi, int_hexagon_A2_tfrsi>;
+
+// Assembler mapped from Rdd32=Rss32 to Rdd32=combine(Rss.H32,Rss.L32)
+def : Pat<(int_hexagon_A2_tfrp DoubleRegs:$src),
+          (A2_combinew (HiReg DoubleRegs:$src), (LoReg DoubleRegs:$src))>;
 
-// MTYPE / ALU / Add and accumulate.
-def HEXAGON_M2_acci:
-  si_MInst_sisisi_acc             <"add",      int_hexagon_M2_acci>;
-def HEXAGON_M2_accii:
-  si_MInst_sisis8_acc             <"add",      int_hexagon_M2_accii>;
-def HEXAGON_M2_nacci:
-  si_MInst_sisisi_nac             <"add",      int_hexagon_M2_nacci>;
-def HEXAGON_M2_naccii:
-  si_MInst_sisis8_nac             <"add",      int_hexagon_M2_naccii>;
+/********************************************************************
+*            ALU32/PERM                                             *
+*********************************************************************/
+// Combine
+def: T_RR_pat<A2_combine_hh, int_hexagon_A2_combine_hh>;
+def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
+def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
+def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
+
+def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>;
 
-// MTYPE / ALU / Subtract and accumulate.
-def HEXAGON_M2_subacc:
-  si_MInst_sisisi_acc             <"sub",      int_hexagon_M2_subacc>;
+def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))),
+         (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>;
 
-// MTYPE / ALU / Vector absolute difference.
-def HEXAGON_M2_vabsdiffh:
-  di_MInst_didi                   <"vabsdiffh",int_hexagon_M2_vabsdiffh>;
-def HEXAGON_M2_vabsdiffw:
-  di_MInst_didi                   <"vabsdiffw",int_hexagon_M2_vabsdiffw>;
+// Mux
+def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>;
+def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>;
+def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>;
 
-// MTYPE / ALU / XOR and xor with destination.
-def HEXAGON_M2_xor_xacc:
-  si_MInst_sisisi_xacc            <"xor",      int_hexagon_M2_xor_xacc>;
+// Shift halfword
+def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
+def : T_R_pat<A2_asrh, int_hexagon_A2_asrh>;
+def : T_R_pat<A2_asrh, int_hexagon_SI_to_SXTHI_asrh>;
 
+// Sign/zero extend
+def : T_R_pat<A2_sxth, int_hexagon_A2_sxth>;
+def : T_R_pat<A2_sxtb, int_hexagon_A2_sxtb>;
+def : T_R_pat<A2_zxth, int_hexagon_A2_zxth>;
+def : T_R_pat<A2_zxtb, int_hexagon_A2_zxtb>;
 
 /********************************************************************
-*            MTYPE/COMPLEX                                          *
+*            ALU32/PRED                                             *
 *********************************************************************/
+// Compare
+def : T_RR_pat<C2_cmpeq,  int_hexagon_C2_cmpeq>;
+def : T_RR_pat<C2_cmpgt,  int_hexagon_C2_cmpgt>;
+def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
 
-// MTYPE / COMPLEX / Complex multiply.
-// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat
-def HEXAGON_M2_cmpys_s1:
-  di_MInst_sisi_s1_sat            <"cmpy",     int_hexagon_M2_cmpys_s1>;
-def HEXAGON_M2_cmpys_s0:
-  di_MInst_sisi_sat               <"cmpy",     int_hexagon_M2_cmpys_s0>;
-def HEXAGON_M2_cmpysc_s1:
-  di_MInst_sisi_s1_sat_conj       <"cmpy",     int_hexagon_M2_cmpysc_s1>;
-def HEXAGON_M2_cmpysc_s0:
-  di_MInst_sisi_sat_conj          <"cmpy",     int_hexagon_M2_cmpysc_s0>;
-
-def HEXAGON_M2_cmacs_s1:
-  di_MInst_disisi_acc_s1_sat      <"cmpy",     int_hexagon_M2_cmacs_s1>;
-def HEXAGON_M2_cmacs_s0:
-  di_MInst_disisi_acc_sat         <"cmpy",     int_hexagon_M2_cmacs_s0>;
-def HEXAGON_M2_cmacsc_s1:
-  di_MInst_disisi_acc_s1_sat_conj <"cmpy",     int_hexagon_M2_cmacsc_s1>;
-def HEXAGON_M2_cmacsc_s0:
-  di_MInst_disisi_acc_sat_conj    <"cmpy",     int_hexagon_M2_cmacsc_s0>;
-
-def HEXAGON_M2_cnacs_s1:
-  di_MInst_disisi_nac_s1_sat      <"cmpy",     int_hexagon_M2_cnacs_s1>;
-def HEXAGON_M2_cnacs_s0:
-  di_MInst_disisi_nac_sat         <"cmpy",     int_hexagon_M2_cnacs_s0>;
-def HEXAGON_M2_cnacsc_s1:
-  di_MInst_disisi_nac_s1_sat_conj <"cmpy",     int_hexagon_M2_cnacsc_s1>;
-def HEXAGON_M2_cnacsc_s0:
-  di_MInst_disisi_nac_sat_conj    <"cmpy",     int_hexagon_M2_cnacsc_s0>;
-
-// MTYPE / COMPLEX / Complex multiply real or imaginary.
-def HEXAGON_M2_cmpyr_s0:
-  di_MInst_sisi                   <"cmpyr",    int_hexagon_M2_cmpyr_s0>;
-def HEXAGON_M2_cmacr_s0:
-  di_MInst_disisi_acc             <"cmpyr",    int_hexagon_M2_cmacr_s0>;
-
-def HEXAGON_M2_cmpyi_s0:
-  di_MInst_sisi                   <"cmpyi",    int_hexagon_M2_cmpyi_s0>;
-def HEXAGON_M2_cmaci_s0:
-  di_MInst_disisi_acc             <"cmpyi",    int_hexagon_M2_cmaci_s0>;
-
-// MTYPE / COMPLEX / Complex multiply with round and pack.
-// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
-def HEXAGON_M2_cmpyrs_s0:
-  si_MInst_sisi_rnd_sat           <"cmpy",     int_hexagon_M2_cmpyrs_s0>;
-def HEXAGON_M2_cmpyrs_s1:
-  si_MInst_sisi_s1_rnd_sat        <"cmpy",     int_hexagon_M2_cmpyrs_s1>;
-
-def HEXAGON_M2_cmpyrsc_s0:
-  si_MInst_sisi_rnd_sat_conj      <"cmpy",     int_hexagon_M2_cmpyrsc_s0>;
-def HEXAGON_M2_cmpyrsc_s1:
-  si_MInst_sisi_s1_rnd_sat_conj   <"cmpy",     int_hexagon_M2_cmpyrsc_s1>;
-
-//MTYPE / COMPLEX / Vector complex multiply real or imaginary.
-def HEXAGON_M2_vcmpy_s0_sat_i:
-  di_MInst_didi_sat               <"vcmpyi",   int_hexagon_M2_vcmpy_s0_sat_i>;
-def HEXAGON_M2_vcmpy_s1_sat_i:
-  di_MInst_didi_s1_sat            <"vcmpyi",   int_hexagon_M2_vcmpy_s1_sat_i>;
-
-def HEXAGON_M2_vcmpy_s0_sat_r:
-  di_MInst_didi_sat               <"vcmpyr",   int_hexagon_M2_vcmpy_s0_sat_r>;
-def HEXAGON_M2_vcmpy_s1_sat_r:
-  di_MInst_didi_s1_sat            <"vcmpyr",   int_hexagon_M2_vcmpy_s1_sat_r>;
-
-def HEXAGON_M2_vcmac_s0_sat_i:
-  di_MInst_dididi_acc_sat         <"vcmpyi",   int_hexagon_M2_vcmac_s0_sat_i>;
-def HEXAGON_M2_vcmac_s0_sat_r:
-  di_MInst_dididi_acc_sat         <"vcmpyr",   int_hexagon_M2_vcmac_s0_sat_r>;
-
-//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
-def HEXAGON_M2_vrcmpyi_s0:
-  di_MInst_didi                   <"vrcmpyi",  int_hexagon_M2_vrcmpyi_s0>;
-def HEXAGON_M2_vrcmpyr_s0:
-  di_MInst_didi                   <"vrcmpyr",  int_hexagon_M2_vrcmpyr_s0>;
-
-def HEXAGON_M2_vrcmpyi_s0c:
-  di_MInst_didi_conj              <"vrcmpyi",  int_hexagon_M2_vrcmpyi_s0c>;
-def HEXAGON_M2_vrcmpyr_s0c:
-  di_MInst_didi_conj              <"vrcmpyr",  int_hexagon_M2_vrcmpyr_s0c>;
-
-def HEXAGON_M2_vrcmaci_s0:
-  di_MInst_dididi_acc             <"vrcmpyi",  int_hexagon_M2_vrcmaci_s0>;
-def HEXAGON_M2_vrcmacr_s0:
-  di_MInst_dididi_acc             <"vrcmpyr",  int_hexagon_M2_vrcmacr_s0>;
-
-def HEXAGON_M2_vrcmaci_s0c:
-  di_MInst_dididi_acc_conj        <"vrcmpyi",  int_hexagon_M2_vrcmaci_s0c>;
-def HEXAGON_M2_vrcmacr_s0c:
-  di_MInst_dididi_acc_conj        <"vrcmpyr",  int_hexagon_M2_vrcmacr_s0c>;
+def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>;
+def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>;
+def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>;
 
+def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)),
+      (i32 (C2_cmpgti (I32:$src1),
+                      (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
 
-/********************************************************************
-*            MTYPE/MPYH                                             *
-*********************************************************************/
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)),
+      (i32 (C2_cmpgtui (I32:$src1),
+                       (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>;
+
+// The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0.
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)),
+      (i32 (C2_cmpeq (I32:$src1), (I32:$src1)))>;
 
-// MTYPE / MPYH / Multiply and use lower result.
-//def HEXAGON_M2_mpysmi:
-//FIXME: Hexagon_M2_mpysmi should really by of the type si_MInst_sim9,
-// not si_MInst_sis9 - but for now, we will use s9.
-// def Hexagon_M2_mpysmi:
-//  si_MInst_sim9                   <"mpyi",     int_hexagon_M2_mpysmi>;
-def Hexagon_M2_mpysmi:
-  si_MInst_sis9                   <"mpyi",     int_hexagon_M2_mpysmi>;
-def HEXAGON_M2_mpyi:
-  si_MInst_sisi                   <"mpyi",     int_hexagon_M2_mpyi>;
-def HEXAGON_M2_mpyui:
-  si_MInst_sisi                   <"mpyui",    int_hexagon_M2_mpyui>;
-def HEXAGON_M2_macsip:
-  si_MInst_sisiu8_acc             <"mpyi",     int_hexagon_M2_macsip>;
-def HEXAGON_M2_maci:
-  si_MInst_sisisi_acc             <"mpyi",     int_hexagon_M2_maci>;
-def HEXAGON_M2_macsin:
-  si_MInst_sisiu8_nac             <"mpyi",     int_hexagon_M2_macsin>;
-
-// MTYPE / MPYH / Multiply word by half (32x16).
-//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat]
-//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat]
-def HEXAGON_M2_mmpyl_rs1:
-  di_MInst_didi_s1_rnd_sat        <"vmpyweh",  int_hexagon_M2_mmpyl_rs1>;
-def HEXAGON_M2_mmpyl_s1:
-  di_MInst_didi_s1_sat            <"vmpyweh",  int_hexagon_M2_mmpyl_s1>;
-def HEXAGON_M2_mmpyl_rs0:
-  di_MInst_didi_rnd_sat           <"vmpyweh",  int_hexagon_M2_mmpyl_rs0>;
-def HEXAGON_M2_mmpyl_s0:
-  di_MInst_didi_sat               <"vmpyweh",  int_hexagon_M2_mmpyl_s0>;
-def HEXAGON_M2_mmpyh_rs1:
-  di_MInst_didi_s1_rnd_sat        <"vmpywoh",  int_hexagon_M2_mmpyh_rs1>;
-def HEXAGON_M2_mmpyh_s1:
-  di_MInst_didi_s1_sat            <"vmpywoh",  int_hexagon_M2_mmpyh_s1>;
-def HEXAGON_M2_mmpyh_rs0:
-  di_MInst_didi_rnd_sat           <"vmpywoh",  int_hexagon_M2_mmpyh_rs0>;
-def HEXAGON_M2_mmpyh_s0:
-  di_MInst_didi_sat               <"vmpywoh",  int_hexagon_M2_mmpyh_s0>;
-def HEXAGON_M2_mmacls_rs1:
-  di_MInst_dididi_acc_s1_rnd_sat  <"vmpyweh",  int_hexagon_M2_mmacls_rs1>;
-def HEXAGON_M2_mmacls_s1:
-  di_MInst_dididi_acc_s1_sat      <"vmpyweh",  int_hexagon_M2_mmacls_s1>;
-def HEXAGON_M2_mmacls_rs0:
-  di_MInst_dididi_acc_rnd_sat     <"vmpyweh",  int_hexagon_M2_mmacls_rs0>;
-def HEXAGON_M2_mmacls_s0:
-  di_MInst_dididi_acc_sat         <"vmpyweh",  int_hexagon_M2_mmacls_s0>;
-def HEXAGON_M2_mmachs_rs1:
-  di_MInst_dididi_acc_s1_rnd_sat  <"vmpywoh",  int_hexagon_M2_mmachs_rs1>;
-def HEXAGON_M2_mmachs_s1:
-  di_MInst_dididi_acc_s1_sat      <"vmpywoh",  int_hexagon_M2_mmachs_s1>;
-def HEXAGON_M2_mmachs_rs0:
-  di_MInst_dididi_acc_rnd_sat     <"vmpywoh",  int_hexagon_M2_mmachs_rs0>;
-def HEXAGON_M2_mmachs_s0:
-  di_MInst_dididi_acc_sat         <"vmpywoh",  int_hexagon_M2_mmachs_s0>;
-
-// MTYPE / MPYH / Multiply word by unsigned half (32x16).
-//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat]
-//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat]
-def HEXAGON_M2_mmpyul_rs1:
-  di_MInst_didi_s1_rnd_sat        <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>;
-def HEXAGON_M2_mmpyul_s1:
-  di_MInst_didi_s1_sat            <"vmpyweuh", int_hexagon_M2_mmpyul_s1>;
-def HEXAGON_M2_mmpyul_rs0:
-  di_MInst_didi_rnd_sat           <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>;
-def HEXAGON_M2_mmpyul_s0:
-  di_MInst_didi_sat               <"vmpyweuh", int_hexagon_M2_mmpyul_s0>;
-def HEXAGON_M2_mmpyuh_rs1:
-  di_MInst_didi_s1_rnd_sat        <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>;
-def HEXAGON_M2_mmpyuh_s1:
-  di_MInst_didi_s1_sat            <"vmpywouh", int_hexagon_M2_mmpyuh_s1>;
-def HEXAGON_M2_mmpyuh_rs0:
-  di_MInst_didi_rnd_sat           <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>;
-def HEXAGON_M2_mmpyuh_s0:
-  di_MInst_didi_sat               <"vmpywouh", int_hexagon_M2_mmpyuh_s0>;
-def HEXAGON_M2_mmaculs_rs1:
-  di_MInst_dididi_acc_s1_rnd_sat  <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>;
-def HEXAGON_M2_mmaculs_s1:
-  di_MInst_dididi_acc_s1_sat      <"vmpyweuh", int_hexagon_M2_mmaculs_s1>;
-def HEXAGON_M2_mmaculs_rs0:
-  di_MInst_dididi_acc_rnd_sat     <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>;
-def HEXAGON_M2_mmaculs_s0:
-  di_MInst_dididi_acc_sat         <"vmpyweuh", int_hexagon_M2_mmaculs_s0>;
-def HEXAGON_M2_mmacuhs_rs1:
-  di_MInst_dididi_acc_s1_rnd_sat  <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>;
-def HEXAGON_M2_mmacuhs_s1:
-  di_MInst_dididi_acc_s1_sat      <"vmpywouh", int_hexagon_M2_mmacuhs_s1>;
-def HEXAGON_M2_mmacuhs_rs0:
-  di_MInst_dididi_acc_rnd_sat     <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>;
-def HEXAGON_M2_mmacuhs_s0:
-  di_MInst_dididi_acc_sat         <"vmpywouh", int_hexagon_M2_mmacuhs_s0>;
-
-// MTYPE / MPYH / Multiply and use upper result.
-def HEXAGON_M2_hmmpyh_rs1:
-  si_MInst_sisi_h_s1_rnd_sat      <"mpy",      int_hexagon_M2_hmmpyh_rs1>;
-def HEXAGON_M2_hmmpyl_rs1:
-  si_MInst_sisi_l_s1_rnd_sat      <"mpy",      int_hexagon_M2_hmmpyl_rs1>;
-def HEXAGON_M2_mpy_up:
-  si_MInst_sisi                   <"mpy",      int_hexagon_M2_mpy_up>;
-def HEXAGON_M2_dpmpyss_rnd_s0:
-  si_MInst_sisi_rnd               <"mpy",      int_hexagon_M2_dpmpyss_rnd_s0>;
-def HEXAGON_M2_mpyu_up:
-  si_MInst_sisi                   <"mpyu",     int_hexagon_M2_mpyu_up>;
-
-// MTYPE / MPYH / Multiply and use full result.
-def HEXAGON_M2_dpmpyuu_s0:
-  di_MInst_sisi                   <"mpyu",     int_hexagon_M2_dpmpyuu_s0>;
-def HEXAGON_M2_dpmpyuu_acc_s0:
-  di_MInst_disisi_acc             <"mpyu",     int_hexagon_M2_dpmpyuu_acc_s0>;
-def HEXAGON_M2_dpmpyuu_nac_s0:
-  di_MInst_disisi_nac             <"mpyu",     int_hexagon_M2_dpmpyuu_nac_s0>;
-def HEXAGON_M2_dpmpyss_s0:
-  di_MInst_sisi                   <"mpy",      int_hexagon_M2_dpmpyss_s0>;
-def HEXAGON_M2_dpmpyss_acc_s0:
-  di_MInst_disisi_acc             <"mpy",      int_hexagon_M2_dpmpyss_acc_s0>;
-def HEXAGON_M2_dpmpyss_nac_s0:
-  di_MInst_disisi_nac             <"mpy",      int_hexagon_M2_dpmpyss_nac_s0>;
+def : Pat <(i32 (int_hexagon_C2_cmplt (I32:$src1),
+                                      (I32:$src2))),
+      (i32 (C2_cmpgt (I32:$src2), (I32:$src1)))>;
 
+def : Pat <(i32 (int_hexagon_C2_cmpltu (I32:$src1),
+                                       (I32:$src2))),
+      (i32 (C2_cmpgtu (I32:$src2), (I32:$src1)))>;
 
 /********************************************************************
-*            MTYPE/MPYS                                             *
+*            ALU32/VH                                               *
 *********************************************************************/
+// Vector add, subtract, average halfwords
+def: T_RR_pat<A2_svaddh,   int_hexagon_A2_svaddh>;
+def: T_RR_pat<A2_svaddhs,  int_hexagon_A2_svaddhs>;
+def: T_RR_pat<A2_svadduhs, int_hexagon_A2_svadduhs>;
 
-// MTYPE / MPYS / Scalar 16x16 multiply signed.
-//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]|
-//          [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]]
-def HEXAGON_M2_mpy_hh_s0:
-  si_MInst_sisi_hh                <"mpy",     int_hexagon_M2_mpy_hh_s0>;
-def HEXAGON_M2_mpy_hh_s1:
-  si_MInst_sisi_hh_s1             <"mpy",     int_hexagon_M2_mpy_hh_s1>;
-def HEXAGON_M2_mpy_rnd_hh_s1:
-  si_MInst_sisi_rnd_hh_s1         <"mpy",     int_hexagon_M2_mpy_rnd_hh_s1>;
-def HEXAGON_M2_mpy_sat_rnd_hh_s1:
-  si_MInst_sisi_sat_rnd_hh_s1     <"mpy",     int_hexagon_M2_mpy_sat_rnd_hh_s1>;
-def HEXAGON_M2_mpy_sat_hh_s1:
-  si_MInst_sisi_sat_hh_s1         <"mpy",     int_hexagon_M2_mpy_sat_hh_s1>;
-def HEXAGON_M2_mpy_rnd_hh_s0:
-  si_MInst_sisi_rnd_hh            <"mpy",     int_hexagon_M2_mpy_rnd_hh_s0>;
-def HEXAGON_M2_mpy_sat_rnd_hh_s0:
-  si_MInst_sisi_sat_rnd_hh        <"mpy",     int_hexagon_M2_mpy_sat_rnd_hh_s0>;
-def HEXAGON_M2_mpy_sat_hh_s0:
-  si_MInst_sisi_sat_hh            <"mpy",     int_hexagon_M2_mpy_sat_hh_s0>;
-
-def HEXAGON_M2_mpy_hl_s0:
-  si_MInst_sisi_hl                <"mpy",     int_hexagon_M2_mpy_hl_s0>;
-def HEXAGON_M2_mpy_hl_s1:
-  si_MInst_sisi_hl_s1             <"mpy",     int_hexagon_M2_mpy_hl_s1>;
-def HEXAGON_M2_mpy_rnd_hl_s1:
-  si_MInst_sisi_rnd_hl_s1         <"mpy",     int_hexagon_M2_mpy_rnd_hl_s1>;
-def HEXAGON_M2_mpy_sat_rnd_hl_s1:
-  si_MInst_sisi_sat_rnd_hl_s1     <"mpy",     int_hexagon_M2_mpy_sat_rnd_hl_s1>;
-def HEXAGON_M2_mpy_sat_hl_s1:
-  si_MInst_sisi_sat_hl_s1         <"mpy",     int_hexagon_M2_mpy_sat_hl_s1>;
-def HEXAGON_M2_mpy_rnd_hl_s0:
-  si_MInst_sisi_rnd_hl            <"mpy",     int_hexagon_M2_mpy_rnd_hl_s0>;
-def HEXAGON_M2_mpy_sat_rnd_hl_s0:
-  si_MInst_sisi_sat_rnd_hl        <"mpy",     int_hexagon_M2_mpy_sat_rnd_hl_s0>;
-def HEXAGON_M2_mpy_sat_hl_s0:
-  si_MInst_sisi_sat_hl            <"mpy",     int_hexagon_M2_mpy_sat_hl_s0>;
-
-def HEXAGON_M2_mpy_lh_s0:
-  si_MInst_sisi_lh                <"mpy",     int_hexagon_M2_mpy_lh_s0>;
-def HEXAGON_M2_mpy_lh_s1:
-  si_MInst_sisi_lh_s1             <"mpy",     int_hexagon_M2_mpy_lh_s1>;
-def HEXAGON_M2_mpy_rnd_lh_s1:
-  si_MInst_sisi_rnd_lh_s1         <"mpy",     int_hexagon_M2_mpy_rnd_lh_s1>;
-def HEXAGON_M2_mpy_sat_rnd_lh_s1:
-  si_MInst_sisi_sat_rnd_lh_s1     <"mpy",     int_hexagon_M2_mpy_sat_rnd_lh_s1>;
-def HEXAGON_M2_mpy_sat_lh_s1:
-  si_MInst_sisi_sat_lh_s1         <"mpy",     int_hexagon_M2_mpy_sat_lh_s1>;
-def HEXAGON_M2_mpy_rnd_lh_s0:
-  si_MInst_sisi_rnd_lh            <"mpy",     int_hexagon_M2_mpy_rnd_lh_s0>;
-def HEXAGON_M2_mpy_sat_rnd_lh_s0:
-  si_MInst_sisi_sat_rnd_lh        <"mpy",     int_hexagon_M2_mpy_sat_rnd_lh_s0>;
-def HEXAGON_M2_mpy_sat_lh_s0:
-  si_MInst_sisi_sat_lh            <"mpy",     int_hexagon_M2_mpy_sat_lh_s0>;
-
-def HEXAGON_M2_mpy_ll_s0:
-  si_MInst_sisi_ll                <"mpy",     int_hexagon_M2_mpy_ll_s0>;
-def HEXAGON_M2_mpy_ll_s1:
-  si_MInst_sisi_ll_s1             <"mpy",     int_hexagon_M2_mpy_ll_s1>;
-def HEXAGON_M2_mpy_rnd_ll_s1:
-  si_MInst_sisi_rnd_ll_s1         <"mpy",     int_hexagon_M2_mpy_rnd_ll_s1>;
-def HEXAGON_M2_mpy_sat_rnd_ll_s1:
-  si_MInst_sisi_sat_rnd_ll_s1     <"mpy",     int_hexagon_M2_mpy_sat_rnd_ll_s1>;
-def HEXAGON_M2_mpy_sat_ll_s1:
-  si_MInst_sisi_sat_ll_s1         <"mpy",     int_hexagon_M2_mpy_sat_ll_s1>;
-def HEXAGON_M2_mpy_rnd_ll_s0:
-  si_MInst_sisi_rnd_ll            <"mpy",     int_hexagon_M2_mpy_rnd_ll_s0>;
-def HEXAGON_M2_mpy_sat_rnd_ll_s0:
-  si_MInst_sisi_sat_rnd_ll        <"mpy",     int_hexagon_M2_mpy_sat_rnd_ll_s0>;
-def HEXAGON_M2_mpy_sat_ll_s0:
-  si_MInst_sisi_sat_ll            <"mpy",     int_hexagon_M2_mpy_sat_ll_s0>;
-
-//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]]
-def HEXAGON_M2_mpyd_hh_s0:
-  di_MInst_sisi_hh                <"mpy",     int_hexagon_M2_mpyd_hh_s0>;
-def HEXAGON_M2_mpyd_hh_s1:
-  di_MInst_sisi_hh_s1             <"mpy",     int_hexagon_M2_mpyd_hh_s1>;
-def HEXAGON_M2_mpyd_rnd_hh_s1:
-  di_MInst_sisi_rnd_hh_s1         <"mpy",     int_hexagon_M2_mpyd_rnd_hh_s1>;
-def HEXAGON_M2_mpyd_rnd_hh_s0:
-  di_MInst_sisi_rnd_hh            <"mpy",     int_hexagon_M2_mpyd_rnd_hh_s0>;
-
-def HEXAGON_M2_mpyd_hl_s0:
-  di_MInst_sisi_hl                <"mpy",     int_hexagon_M2_mpyd_hl_s0>;
-def HEXAGON_M2_mpyd_hl_s1:
-  di_MInst_sisi_hl_s1             <"mpy",     int_hexagon_M2_mpyd_hl_s1>;
-def HEXAGON_M2_mpyd_rnd_hl_s1:
-  di_MInst_sisi_rnd_hl_s1         <"mpy",     int_hexagon_M2_mpyd_rnd_hl_s1>;
-def HEXAGON_M2_mpyd_rnd_hl_s0:
-  di_MInst_sisi_rnd_hl            <"mpy",     int_hexagon_M2_mpyd_rnd_hl_s0>;
-
-def HEXAGON_M2_mpyd_lh_s0:
-  di_MInst_sisi_lh                <"mpy",     int_hexagon_M2_mpyd_lh_s0>;
-def HEXAGON_M2_mpyd_lh_s1:
-  di_MInst_sisi_lh_s1             <"mpy",     int_hexagon_M2_mpyd_lh_s1>;
-def HEXAGON_M2_mpyd_rnd_lh_s1:
-  di_MInst_sisi_rnd_lh_s1         <"mpy",     int_hexagon_M2_mpyd_rnd_lh_s1>;
-def HEXAGON_M2_mpyd_rnd_lh_s0:
-  di_MInst_sisi_rnd_lh            <"mpy",     int_hexagon_M2_mpyd_rnd_lh_s0>;
-
-def HEXAGON_M2_mpyd_ll_s0:
-  di_MInst_sisi_ll                <"mpy",     int_hexagon_M2_mpyd_ll_s0>;
-def HEXAGON_M2_mpyd_ll_s1:
-  di_MInst_sisi_ll_s1             <"mpy",     int_hexagon_M2_mpyd_ll_s1>;
-def HEXAGON_M2_mpyd_rnd_ll_s1:
-  di_MInst_sisi_rnd_ll_s1         <"mpy",     int_hexagon_M2_mpyd_rnd_ll_s1>;
-def HEXAGON_M2_mpyd_rnd_ll_s0:
-  di_MInst_sisi_rnd_ll            <"mpy",     int_hexagon_M2_mpyd_rnd_ll_s0>;
-
-//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
-def HEXAGON_M2_mpy_acc_hh_s0:
-  si_MInst_sisisi_acc_hh            <"mpy",  int_hexagon_M2_mpy_acc_hh_s0>;
-def HEXAGON_M2_mpy_acc_hh_s1:
-  si_MInst_sisisi_acc_hh_s1         <"mpy",  int_hexagon_M2_mpy_acc_hh_s1>;
-def HEXAGON_M2_mpy_acc_sat_hh_s1:
-  si_MInst_sisisi_acc_sat_hh_s1     <"mpy",  int_hexagon_M2_mpy_acc_sat_hh_s1>;
-def HEXAGON_M2_mpy_acc_sat_hh_s0:
-  si_MInst_sisisi_acc_sat_hh        <"mpy",  int_hexagon_M2_mpy_acc_sat_hh_s0>;
-
-def HEXAGON_M2_mpy_acc_hl_s0:
-  si_MInst_sisisi_acc_hl            <"mpy",  int_hexagon_M2_mpy_acc_hl_s0>;
-def HEXAGON_M2_mpy_acc_hl_s1:
-  si_MInst_sisisi_acc_hl_s1         <"mpy",  int_hexagon_M2_mpy_acc_hl_s1>;
-def HEXAGON_M2_mpy_acc_sat_hl_s1:
-  si_MInst_sisisi_acc_sat_hl_s1     <"mpy",  int_hexagon_M2_mpy_acc_sat_hl_s1>;
-def HEXAGON_M2_mpy_acc_sat_hl_s0:
-  si_MInst_sisisi_acc_sat_hl        <"mpy",  int_hexagon_M2_mpy_acc_sat_hl_s0>;
-
-def HEXAGON_M2_mpy_acc_lh_s0:
-  si_MInst_sisisi_acc_lh            <"mpy",  int_hexagon_M2_mpy_acc_lh_s0>;
-def HEXAGON_M2_mpy_acc_lh_s1:
-  si_MInst_sisisi_acc_lh_s1         <"mpy",  int_hexagon_M2_mpy_acc_lh_s1>;
-def HEXAGON_M2_mpy_acc_sat_lh_s1:
-  si_MInst_sisisi_acc_sat_lh_s1     <"mpy",  int_hexagon_M2_mpy_acc_sat_lh_s1>;
-def HEXAGON_M2_mpy_acc_sat_lh_s0:
-  si_MInst_sisisi_acc_sat_lh        <"mpy",  int_hexagon_M2_mpy_acc_sat_lh_s0>;
-
-def HEXAGON_M2_mpy_acc_ll_s0:
-  si_MInst_sisisi_acc_ll            <"mpy",  int_hexagon_M2_mpy_acc_ll_s0>;
-def HEXAGON_M2_mpy_acc_ll_s1:
-  si_MInst_sisisi_acc_ll_s1         <"mpy",  int_hexagon_M2_mpy_acc_ll_s1>;
-def HEXAGON_M2_mpy_acc_sat_ll_s1:
-  si_MInst_sisisi_acc_sat_ll_s1     <"mpy",  int_hexagon_M2_mpy_acc_sat_ll_s1>;
-def HEXAGON_M2_mpy_acc_sat_ll_s0:
-  si_MInst_sisisi_acc_sat_ll        <"mpy",  int_hexagon_M2_mpy_acc_sat_ll_s0>;
-
-//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
-def HEXAGON_M2_mpy_nac_hh_s0:
-  si_MInst_sisisi_nac_hh            <"mpy",  int_hexagon_M2_mpy_nac_hh_s0>;
-def HEXAGON_M2_mpy_nac_hh_s1:
-  si_MInst_sisisi_nac_hh_s1         <"mpy",  int_hexagon_M2_mpy_nac_hh_s1>;
-def HEXAGON_M2_mpy_nac_sat_hh_s1:
-  si_MInst_sisisi_nac_sat_hh_s1     <"mpy",  int_hexagon_M2_mpy_nac_sat_hh_s1>;
-def HEXAGON_M2_mpy_nac_sat_hh_s0:
-  si_MInst_sisisi_nac_sat_hh        <"mpy",  int_hexagon_M2_mpy_nac_sat_hh_s0>;
-
-def HEXAGON_M2_mpy_nac_hl_s0:
-  si_MInst_sisisi_nac_hl            <"mpy",  int_hexagon_M2_mpy_nac_hl_s0>;
-def HEXAGON_M2_mpy_nac_hl_s1:
-  si_MInst_sisisi_nac_hl_s1         <"mpy",  int_hexagon_M2_mpy_nac_hl_s1>;
-def HEXAGON_M2_mpy_nac_sat_hl_s1:
-  si_MInst_sisisi_nac_sat_hl_s1     <"mpy",  int_hexagon_M2_mpy_nac_sat_hl_s1>;
-def HEXAGON_M2_mpy_nac_sat_hl_s0:
-  si_MInst_sisisi_nac_sat_hl        <"mpy",  int_hexagon_M2_mpy_nac_sat_hl_s0>;
-
-def HEXAGON_M2_mpy_nac_lh_s0:
-  si_MInst_sisisi_nac_lh            <"mpy",  int_hexagon_M2_mpy_nac_lh_s0>;
-def HEXAGON_M2_mpy_nac_lh_s1:
-  si_MInst_sisisi_nac_lh_s1         <"mpy",  int_hexagon_M2_mpy_nac_lh_s1>;
-def HEXAGON_M2_mpy_nac_sat_lh_s1:
-  si_MInst_sisisi_nac_sat_lh_s1     <"mpy",  int_hexagon_M2_mpy_nac_sat_lh_s1>;
-def HEXAGON_M2_mpy_nac_sat_lh_s0:
-  si_MInst_sisisi_nac_sat_lh        <"mpy",  int_hexagon_M2_mpy_nac_sat_lh_s0>;
-
-def HEXAGON_M2_mpy_nac_ll_s0:
-  si_MInst_sisisi_nac_ll            <"mpy",  int_hexagon_M2_mpy_nac_ll_s0>;
-def HEXAGON_M2_mpy_nac_ll_s1:
-  si_MInst_sisisi_nac_ll_s1         <"mpy",  int_hexagon_M2_mpy_nac_ll_s1>;
-def HEXAGON_M2_mpy_nac_sat_ll_s1:
-  si_MInst_sisisi_nac_sat_ll_s1     <"mpy",  int_hexagon_M2_mpy_nac_sat_ll_s1>;
-def HEXAGON_M2_mpy_nac_sat_ll_s0:
-  si_MInst_sisisi_nac_sat_ll        <"mpy",  int_hexagon_M2_mpy_nac_sat_ll_s0>;
-
-//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
-def HEXAGON_M2_mpyd_acc_hh_s0:
-  di_MInst_disisi_acc_hh          <"mpy",    int_hexagon_M2_mpyd_acc_hh_s0>;
-def HEXAGON_M2_mpyd_acc_hh_s1:
-  di_MInst_disisi_acc_hh_s1       <"mpy",    int_hexagon_M2_mpyd_acc_hh_s1>;
-
-def HEXAGON_M2_mpyd_acc_hl_s0:
-  di_MInst_disisi_acc_hl          <"mpy",    int_hexagon_M2_mpyd_acc_hl_s0>;
-def HEXAGON_M2_mpyd_acc_hl_s1:
-  di_MInst_disisi_acc_hl_s1       <"mpy",    int_hexagon_M2_mpyd_acc_hl_s1>;
-
-def HEXAGON_M2_mpyd_acc_lh_s0:
-  di_MInst_disisi_acc_lh          <"mpy",    int_hexagon_M2_mpyd_acc_lh_s0>;
-def HEXAGON_M2_mpyd_acc_lh_s1:
-  di_MInst_disisi_acc_lh_s1       <"mpy",    int_hexagon_M2_mpyd_acc_lh_s1>;
-
-def HEXAGON_M2_mpyd_acc_ll_s0:
-  di_MInst_disisi_acc_ll          <"mpy",    int_hexagon_M2_mpyd_acc_ll_s0>;
-def HEXAGON_M2_mpyd_acc_ll_s1:
-  di_MInst_disisi_acc_ll_s1       <"mpy",    int_hexagon_M2_mpyd_acc_ll_s1>;
-
-//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
-def HEXAGON_M2_mpyd_nac_hh_s0:
-  di_MInst_disisi_nac_hh          <"mpy",    int_hexagon_M2_mpyd_nac_hh_s0>;
-def HEXAGON_M2_mpyd_nac_hh_s1:
-  di_MInst_disisi_nac_hh_s1       <"mpy",    int_hexagon_M2_mpyd_nac_hh_s1>;
-
-def HEXAGON_M2_mpyd_nac_hl_s0:
-  di_MInst_disisi_nac_hl          <"mpy",    int_hexagon_M2_mpyd_nac_hl_s0>;
-def HEXAGON_M2_mpyd_nac_hl_s1:
-  di_MInst_disisi_nac_hl_s1       <"mpy",    int_hexagon_M2_mpyd_nac_hl_s1>;
-
-def HEXAGON_M2_mpyd_nac_lh_s0:
-  di_MInst_disisi_nac_lh          <"mpy",    int_hexagon_M2_mpyd_nac_lh_s0>;
-def HEXAGON_M2_mpyd_nac_lh_s1:
-  di_MInst_disisi_nac_lh_s1       <"mpy",    int_hexagon_M2_mpyd_nac_lh_s1>;
-
-def HEXAGON_M2_mpyd_nac_ll_s0:
-  di_MInst_disisi_nac_ll          <"mpy",    int_hexagon_M2_mpyd_nac_ll_s0>;
-def HEXAGON_M2_mpyd_nac_ll_s1:
-  di_MInst_disisi_nac_ll_s1       <"mpy",    int_hexagon_M2_mpyd_nac_ll_s1>;
-
-// MTYPE / MPYS / Scalar 16x16 multiply unsigned.
-//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def HEXAGON_M2_mpyu_hh_s0:
-  si_MInst_sisi_hh                <"mpyu",    int_hexagon_M2_mpyu_hh_s0>;
-def HEXAGON_M2_mpyu_hh_s1:
-  si_MInst_sisi_hh_s1             <"mpyu",    int_hexagon_M2_mpyu_hh_s1>;
-def HEXAGON_M2_mpyu_hl_s0:
-  si_MInst_sisi_hl                <"mpyu",    int_hexagon_M2_mpyu_hl_s0>;
-def HEXAGON_M2_mpyu_hl_s1:
-  si_MInst_sisi_hl_s1             <"mpyu",    int_hexagon_M2_mpyu_hl_s1>;
-def HEXAGON_M2_mpyu_lh_s0:
-  si_MInst_sisi_lh                <"mpyu",    int_hexagon_M2_mpyu_lh_s0>;
-def HEXAGON_M2_mpyu_lh_s1:
-  si_MInst_sisi_lh_s1             <"mpyu",    int_hexagon_M2_mpyu_lh_s1>;
-def HEXAGON_M2_mpyu_ll_s0:
-  si_MInst_sisi_ll                <"mpyu",    int_hexagon_M2_mpyu_ll_s0>;
-def HEXAGON_M2_mpyu_ll_s1:
-  si_MInst_sisi_ll_s1             <"mpyu",    int_hexagon_M2_mpyu_ll_s1>;
-
-//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def HEXAGON_M2_mpyud_hh_s0:
-  di_MInst_sisi_hh                <"mpyu",    int_hexagon_M2_mpyud_hh_s0>;
-def HEXAGON_M2_mpyud_hh_s1:
-  di_MInst_sisi_hh_s1             <"mpyu",    int_hexagon_M2_mpyud_hh_s1>;
-def HEXAGON_M2_mpyud_hl_s0:
-  di_MInst_sisi_hl                <"mpyu",    int_hexagon_M2_mpyud_hl_s0>;
-def HEXAGON_M2_mpyud_hl_s1:
-  di_MInst_sisi_hl_s1             <"mpyu",    int_hexagon_M2_mpyud_hl_s1>;
-def HEXAGON_M2_mpyud_lh_s0:
-  di_MInst_sisi_lh                <"mpyu",    int_hexagon_M2_mpyud_lh_s0>;
-def HEXAGON_M2_mpyud_lh_s1:
-  di_MInst_sisi_lh_s1             <"mpyu",    int_hexagon_M2_mpyud_lh_s1>;
-def HEXAGON_M2_mpyud_ll_s0:
-  di_MInst_sisi_ll                <"mpyu",    int_hexagon_M2_mpyud_ll_s0>;
-def HEXAGON_M2_mpyud_ll_s1:
-  di_MInst_sisi_ll_s1             <"mpyu",    int_hexagon_M2_mpyud_ll_s1>;
-
-//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def HEXAGON_M2_mpyu_acc_hh_s0:
-  si_MInst_sisisi_acc_hh            <"mpyu",    int_hexagon_M2_mpyu_acc_hh_s0>;
-def HEXAGON_M2_mpyu_acc_hh_s1:
-  si_MInst_sisisi_acc_hh_s1         <"mpyu",    int_hexagon_M2_mpyu_acc_hh_s1>;
-def HEXAGON_M2_mpyu_acc_hl_s0:
-  si_MInst_sisisi_acc_hl            <"mpyu",    int_hexagon_M2_mpyu_acc_hl_s0>;
-def HEXAGON_M2_mpyu_acc_hl_s1:
-  si_MInst_sisisi_acc_hl_s1         <"mpyu",    int_hexagon_M2_mpyu_acc_hl_s1>;
-def HEXAGON_M2_mpyu_acc_lh_s0:
-  si_MInst_sisisi_acc_lh            <"mpyu",    int_hexagon_M2_mpyu_acc_lh_s0>;
-def HEXAGON_M2_mpyu_acc_lh_s1:
-  si_MInst_sisisi_acc_lh_s1         <"mpyu",    int_hexagon_M2_mpyu_acc_lh_s1>;
-def HEXAGON_M2_mpyu_acc_ll_s0:
-  si_MInst_sisisi_acc_ll            <"mpyu",    int_hexagon_M2_mpyu_acc_ll_s0>;
-def HEXAGON_M2_mpyu_acc_ll_s1:
-  si_MInst_sisisi_acc_ll_s1         <"mpyu",    int_hexagon_M2_mpyu_acc_ll_s1>;
-
-//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def HEXAGON_M2_mpyu_nac_hh_s0:
-  si_MInst_sisisi_nac_hh            <"mpyu",    int_hexagon_M2_mpyu_nac_hh_s0>;
-def HEXAGON_M2_mpyu_nac_hh_s1:
-  si_MInst_sisisi_nac_hh_s1         <"mpyu",    int_hexagon_M2_mpyu_nac_hh_s1>;
-def HEXAGON_M2_mpyu_nac_hl_s0:
-  si_MInst_sisisi_nac_hl            <"mpyu",    int_hexagon_M2_mpyu_nac_hl_s0>;
-def HEXAGON_M2_mpyu_nac_hl_s1:
-  si_MInst_sisisi_nac_hl_s1         <"mpyu",    int_hexagon_M2_mpyu_nac_hl_s1>;
-def HEXAGON_M2_mpyu_nac_lh_s0:
-  si_MInst_sisisi_nac_lh            <"mpyu",    int_hexagon_M2_mpyu_nac_lh_s0>;
-def HEXAGON_M2_mpyu_nac_lh_s1:
-  si_MInst_sisisi_nac_lh_s1         <"mpyu",    int_hexagon_M2_mpyu_nac_lh_s1>;
-def HEXAGON_M2_mpyu_nac_ll_s0:
-  si_MInst_sisisi_nac_ll            <"mpyu",    int_hexagon_M2_mpyu_nac_ll_s0>;
-def HEXAGON_M2_mpyu_nac_ll_s1:
-  si_MInst_sisisi_nac_ll_s1         <"mpyu",    int_hexagon_M2_mpyu_nac_ll_s1>;
-
-//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def HEXAGON_M2_mpyud_acc_hh_s0:
-  di_MInst_disisi_acc_hh            <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>;
-def HEXAGON_M2_mpyud_acc_hh_s1:
-  di_MInst_disisi_acc_hh_s1         <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>;
-def HEXAGON_M2_mpyud_acc_hl_s0:
-  di_MInst_disisi_acc_hl            <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>;
-def HEXAGON_M2_mpyud_acc_hl_s1:
-  di_MInst_disisi_acc_hl_s1         <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>;
-def HEXAGON_M2_mpyud_acc_lh_s0:
-  di_MInst_disisi_acc_lh            <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>;
-def HEXAGON_M2_mpyud_acc_lh_s1:
-  di_MInst_disisi_acc_lh_s1         <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>;
-def HEXAGON_M2_mpyud_acc_ll_s0:
-  di_MInst_disisi_acc_ll            <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>;
-def HEXAGON_M2_mpyud_acc_ll_s1:
-  di_MInst_disisi_acc_ll_s1         <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>;
-
-//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
-def HEXAGON_M2_mpyud_nac_hh_s0:
-  di_MInst_disisi_nac_hh            <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>;
-def HEXAGON_M2_mpyud_nac_hh_s1:
-  di_MInst_disisi_nac_hh_s1         <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>;
-def HEXAGON_M2_mpyud_nac_hl_s0:
-  di_MInst_disisi_nac_hl            <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>;
-def HEXAGON_M2_mpyud_nac_hl_s1:
-  di_MInst_disisi_nac_hl_s1         <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>;
-def HEXAGON_M2_mpyud_nac_lh_s0:
-  di_MInst_disisi_nac_lh            <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>;
-def HEXAGON_M2_mpyud_nac_lh_s1:
-  di_MInst_disisi_nac_lh_s1         <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>;
-def HEXAGON_M2_mpyud_nac_ll_s0:
-  di_MInst_disisi_nac_ll            <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>;
-def HEXAGON_M2_mpyud_nac_ll_s1:
-  di_MInst_disisi_nac_ll_s1         <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>;
+def: T_RR_pat<A2_svsubh,   int_hexagon_A2_svsubh>;
+def: T_RR_pat<A2_svsubhs,  int_hexagon_A2_svsubhs>;
+def: T_RR_pat<A2_svsubuhs, int_hexagon_A2_svsubuhs>;
 
+def: T_RR_pat<A2_svavgh,   int_hexagon_A2_svavgh>;
+def: T_RR_pat<A2_svavghs,  int_hexagon_A2_svavghs>;
+def: T_RR_pat<A2_svnavgh,  int_hexagon_A2_svnavgh>;
 
 /********************************************************************
-*            MTYPE/VB                                               *
+*            ALU64/ALU                                              *
 *********************************************************************/
+def: T_RR_pat<A2_addsat,   int_hexagon_A2_addsat>;
+def: T_RR_pat<A2_subsat,   int_hexagon_A2_subsat>;
+def: T_PP_pat<A2_addp,     int_hexagon_A2_addp>;
+def: T_PP_pat<A2_subp,     int_hexagon_A2_subp>;
+
+def: T_PP_pat<A2_andp,     int_hexagon_A2_andp>;
+def: T_PP_pat<A2_orp,      int_hexagon_A2_orp>;
+def: T_PP_pat<A2_xorp,     int_hexagon_A2_xorp>;
 
-// MTYPE / VB / Vector reduce add unsigned bytes.
-def HEXAGON_A2_vraddub:
-  di_MInst_didi                   <"vraddub", int_hexagon_A2_vraddub>;
-def HEXAGON_A2_vraddub_acc:
-  di_MInst_dididi_acc             <"vraddub", int_hexagon_A2_vraddub_acc>;
+def: T_PP_pat<C2_cmpeqp,   int_hexagon_C2_cmpeqp>;
+def: T_PP_pat<C2_cmpgtp,   int_hexagon_C2_cmpgtp>;
+def: T_PP_pat<C2_cmpgtup,  int_hexagon_C2_cmpgtup>;
 
-// MTYPE / VB / Vector sum of absolute differences unsigned bytes.
-def HEXAGON_A2_vrsadub:
-  di_MInst_didi                   <"vrsadub", int_hexagon_A2_vrsadub>;
-def HEXAGON_A2_vrsadub_acc:
-  di_MInst_dididi_acc             <"vrsadub", int_hexagon_A2_vrsadub_acc>;
+def: T_PP_pat<S2_parityp,  int_hexagon_S2_parityp>;
+def: T_RR_pat<S2_packhl,   int_hexagon_S2_packhl>;
 
 /********************************************************************
-*            MTYPE/VH                                               *
+*            ALU64/VB                                               *
 *********************************************************************/
+// ALU64 - Vector add
+def : T_PP_pat <A2_vaddub,   int_hexagon_A2_vaddub>;
+def : T_PP_pat <A2_vaddubs,  int_hexagon_A2_vaddubs>;
+def : T_PP_pat <A2_vaddh,    int_hexagon_A2_vaddh>;
+def : T_PP_pat <A2_vaddhs,   int_hexagon_A2_vaddhs>;
+def : T_PP_pat <A2_vadduhs,  int_hexagon_A2_vadduhs>;
+def : T_PP_pat <A2_vaddw,    int_hexagon_A2_vaddw>;
+def : T_PP_pat <A2_vaddws,   int_hexagon_A2_vaddws>;
+
+// ALU64 - Vector average
+def : T_PP_pat <A2_vavgub,   int_hexagon_A2_vavgub>;
+def : T_PP_pat <A2_vavgubr,  int_hexagon_A2_vavgubr>;
+def : T_PP_pat <A2_vavgh,    int_hexagon_A2_vavgh>;
+def : T_PP_pat <A2_vavghr,   int_hexagon_A2_vavghr>;
+def : T_PP_pat <A2_vavghcr,  int_hexagon_A2_vavghcr>;
+def : T_PP_pat <A2_vavguh,   int_hexagon_A2_vavguh>;
+def : T_PP_pat <A2_vavguhr,  int_hexagon_A2_vavguhr>;
+
+def : T_PP_pat <A2_vavgw,    int_hexagon_A2_vavgw>;
+def : T_PP_pat <A2_vavgwr,   int_hexagon_A2_vavgwr>;
+def : T_PP_pat <A2_vavgwcr,  int_hexagon_A2_vavgwcr>;
+def : T_PP_pat <A2_vavguw,   int_hexagon_A2_vavguw>;
+def : T_PP_pat <A2_vavguwr,  int_hexagon_A2_vavguwr>;
+
+// ALU64 - Vector negative average
+def : T_PP_pat <A2_vnavgh,   int_hexagon_A2_vnavgh>;
+def : T_PP_pat <A2_vnavghr,  int_hexagon_A2_vnavghr>;
+def : T_PP_pat <A2_vnavghcr, int_hexagon_A2_vnavghcr>;
+def : T_PP_pat <A2_vnavgw,   int_hexagon_A2_vnavgw>;
+def : T_PP_pat <A2_vnavgwr,  int_hexagon_A2_vnavgwr>;
+def : T_PP_pat <A2_vnavgwcr, int_hexagon_A2_vnavgwcr>;
+
+// ALU64 - Vector max
+def : T_PP_pat <A2_vmaxh,    int_hexagon_A2_vmaxh>;
+def : T_PP_pat <A2_vmaxw,    int_hexagon_A2_vmaxw>;
+def : T_PP_pat <A2_vmaxub,   int_hexagon_A2_vmaxub>;
+def : T_PP_pat <A2_vmaxuh,   int_hexagon_A2_vmaxuh>;
+def : T_PP_pat <A2_vmaxuw,   int_hexagon_A2_vmaxuw>;
+
+// ALU64 - Vector min
+def : T_PP_pat <A2_vminh,    int_hexagon_A2_vminh>;
+def : T_PP_pat <A2_vminw,    int_hexagon_A2_vminw>;
+def : T_PP_pat <A2_vminub,   int_hexagon_A2_vminub>;
+def : T_PP_pat <A2_vminuh,   int_hexagon_A2_vminuh>;
+def : T_PP_pat <A2_vminuw,   int_hexagon_A2_vminuw>;
+
+// ALU64 - Vector sub
+def : T_PP_pat <A2_vsubub,   int_hexagon_A2_vsubub>;
+def : T_PP_pat <A2_vsububs,  int_hexagon_A2_vsububs>;
+def : T_PP_pat <A2_vsubh,    int_hexagon_A2_vsubh>;
+def : T_PP_pat <A2_vsubhs,   int_hexagon_A2_vsubhs>;
+def : T_PP_pat <A2_vsubuhs,  int_hexagon_A2_vsubuhs>;
+def : T_PP_pat <A2_vsubw,    int_hexagon_A2_vsubw>;
+def : T_PP_pat <A2_vsubws,   int_hexagon_A2_vsubws>;
+
+// ALU64 - Vector compare bytes
+def : T_PP_pat <A2_vcmpbeq,  int_hexagon_A2_vcmpbeq>;
+def : T_PP_pat <A4_vcmpbgt,  int_hexagon_A4_vcmpbgt>;
+def : T_PP_pat <A2_vcmpbgtu, int_hexagon_A2_vcmpbgtu>;
+
+// ALU64 - Vector compare halfwords
+def : T_PP_pat <A2_vcmpheq,  int_hexagon_A2_vcmpheq>;
+def : T_PP_pat <A2_vcmphgt,  int_hexagon_A2_vcmphgt>;
+def : T_PP_pat <A2_vcmphgtu, int_hexagon_A2_vcmphgtu>;
+
+// ALU64 - Vector compare words
+def : T_PP_pat <A2_vcmpweq,  int_hexagon_A2_vcmpweq>;
+def : T_PP_pat <A2_vcmpwgt,  int_hexagon_A2_vcmpwgt>;
+def : T_PP_pat <A2_vcmpwgtu, int_hexagon_A2_vcmpwgtu>;
 
-// MTYPE / VH / Vector dual multiply.
-def HEXAGON_M2_vdmpys_s1:
-  di_MInst_didi_s1_sat            <"vdmpy",   int_hexagon_M2_vdmpys_s1>;
-def HEXAGON_M2_vdmpys_s0:
-  di_MInst_didi_sat               <"vdmpy",   int_hexagon_M2_vdmpys_s0>;
-def HEXAGON_M2_vdmacs_s1:
-  di_MInst_dididi_acc_s1_sat      <"vdmpy",   int_hexagon_M2_vdmacs_s1>;
-def HEXAGON_M2_vdmacs_s0:
-  di_MInst_dididi_acc_sat         <"vdmpy",   int_hexagon_M2_vdmacs_s0>;
-
-// MTYPE / VH / Vector dual multiply with round and pack.
-def HEXAGON_M2_vdmpyrs_s0:
-  si_MInst_didi_rnd_sat           <"vdmpy",   int_hexagon_M2_vdmpyrs_s0>;
-def HEXAGON_M2_vdmpyrs_s1:
-  si_MInst_didi_s1_rnd_sat        <"vdmpy",   int_hexagon_M2_vdmpyrs_s1>;
-
-// MTYPE / VH / Vector multiply even halfwords.
-def HEXAGON_M2_vmpy2es_s1:
-  di_MInst_didi_s1_sat            <"vmpyeh",  int_hexagon_M2_vmpy2es_s1>;
-def HEXAGON_M2_vmpy2es_s0:
-  di_MInst_didi_sat               <"vmpyeh",  int_hexagon_M2_vmpy2es_s0>;
-def HEXAGON_M2_vmac2es:
-  di_MInst_dididi_acc             <"vmpyeh",  int_hexagon_M2_vmac2es>;
-def HEXAGON_M2_vmac2es_s1:
-  di_MInst_dididi_acc_s1_sat      <"vmpyeh",  int_hexagon_M2_vmac2es_s1>;
-def HEXAGON_M2_vmac2es_s0:
-  di_MInst_dididi_acc_sat         <"vmpyeh",  int_hexagon_M2_vmac2es_s0>;
-
-// MTYPE / VH / Vector multiply halfwords.
-def HEXAGON_M2_vmpy2s_s0:
-  di_MInst_sisi_sat               <"vmpyh",   int_hexagon_M2_vmpy2s_s0>;
-def HEXAGON_M2_vmpy2s_s1:
-  di_MInst_sisi_s1_sat            <"vmpyh",   int_hexagon_M2_vmpy2s_s1>;
-def HEXAGON_M2_vmac2:
-  di_MInst_disisi_acc             <"vmpyh",   int_hexagon_M2_vmac2>;
-def HEXAGON_M2_vmac2s_s0:
-  di_MInst_disisi_acc_sat         <"vmpyh",   int_hexagon_M2_vmac2s_s0>;
-def HEXAGON_M2_vmac2s_s1:
-  di_MInst_disisi_acc_s1_sat      <"vmpyh",   int_hexagon_M2_vmac2s_s1>;
-
-// MTYPE / VH / Vector multiply halfwords with round and pack.
-def HEXAGON_M2_vmpy2s_s0pack:
-  si_MInst_sisi_rnd_sat           <"vmpyh",   int_hexagon_M2_vmpy2s_s0pack>;
-def HEXAGON_M2_vmpy2s_s1pack:
-  si_MInst_sisi_s1_rnd_sat        <"vmpyh",   int_hexagon_M2_vmpy2s_s1pack>;
-
-// MTYPE / VH / Vector reduce multiply halfwords.
-// Rxx32+=vrmpyh(Rss32,Rtt32)
-def HEXAGON_M2_vrmpy_s0:
-  di_MInst_didi                   <"vrmpyh",  int_hexagon_M2_vrmpy_s0>;
-def HEXAGON_M2_vrmac_s0:
-  di_MInst_dididi_acc             <"vrmpyh",  int_hexagon_M2_vrmac_s0>;
-
+// ALU64 / VB / Vector mux.
+def : Pat<(int_hexagon_C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+          (C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+// MPY - Multiply and use full result
+// Rdd = mpy[u](Rs, Rt)
+def : T_RR_pat <M2_dpmpyss_s0, int_hexagon_M2_dpmpyss_s0>;
+def : T_RR_pat <M2_dpmpyuu_s0, int_hexagon_M2_dpmpyuu_s0>;
+
+// Complex multiply real or imaginary
+def : T_RR_pat <M2_cmpyi_s0,   int_hexagon_M2_cmpyi_s0>;
+def : T_RR_pat <M2_cmpyr_s0,   int_hexagon_M2_cmpyr_s0>;
+
+// Complex multiply
+def : T_RR_pat <M2_cmpys_s0,   int_hexagon_M2_cmpys_s0>;
+def : T_RR_pat <M2_cmpysc_s0,  int_hexagon_M2_cmpysc_s0>;
+def : T_RR_pat <M2_cmpys_s1,   int_hexagon_M2_cmpys_s1>;
+def : T_RR_pat <M2_cmpysc_s1,  int_hexagon_M2_cmpysc_s1>;
+
+// Vector multiply halfwords
+// Rdd=vmpyh(Rs,Rt)[:<<1]:sat
+def : T_RR_pat <M2_vmpy2s_s0,  int_hexagon_M2_vmpy2s_s0>;
+def : T_RR_pat <M2_vmpy2s_s1,  int_hexagon_M2_vmpy2s_s1>;
+
+// Rxx[+-]= mpy[u](Rs,Rt)
+def : T_PRR_pat <M2_dpmpyss_acc_s0, int_hexagon_M2_dpmpyss_acc_s0>;
+def : T_PRR_pat <M2_dpmpyss_nac_s0, int_hexagon_M2_dpmpyss_nac_s0>;
+def : T_PRR_pat <M2_dpmpyuu_acc_s0, int_hexagon_M2_dpmpyuu_acc_s0>;
+def : T_PRR_pat <M2_dpmpyuu_nac_s0, int_hexagon_M2_dpmpyuu_nac_s0>;
+
+// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat
+def : T_PRR_pat <M2_cmacs_s0, int_hexagon_M2_cmacs_s0>;
+def : T_PRR_pat <M2_cnacs_s0, int_hexagon_M2_cnacs_s0>;
+def : T_PRR_pat <M2_cmacs_s1, int_hexagon_M2_cmacs_s1>;
+def : T_PRR_pat <M2_cnacs_s1, int_hexagon_M2_cnacs_s1>;
+
+// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat
+def : T_PRR_pat <M2_cmacsc_s0, int_hexagon_M2_cmacsc_s0>;
+def : T_PRR_pat <M2_cnacsc_s0, int_hexagon_M2_cnacsc_s0>;
+def : T_PRR_pat <M2_cmacsc_s1, int_hexagon_M2_cmacsc_s1>;
+def : T_PRR_pat <M2_cnacsc_s1, int_hexagon_M2_cnacsc_s1>;
+
+// Rxx+=cmpy[ir](Rs,Rt)
+def : T_PRR_pat <M2_cmaci_s0, int_hexagon_M2_cmaci_s0>;
+def : T_PRR_pat <M2_cmacr_s0, int_hexagon_M2_cmacr_s0>;
+
+// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
+def : T_PRR_pat <M2_vmac2, int_hexagon_M2_vmac2>;
+def : T_PRR_pat <M2_vmac2s_s0, int_hexagon_M2_vmac2s_s0>;
+def : T_PRR_pat <M2_vmac2s_s1, int_hexagon_M2_vmac2s_s1>;
 
 /********************************************************************
-*            STYPE/ALU                                              *
+*            CR                                                     *
 *********************************************************************/
+class qi_CRInst_qi_pat<InstHexagon Inst, Intrinsic IntID> :
+  Pat<(i32 (IntID IntRegs:$Rs)),
+      (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs))))>;
+
+class qi_CRInst_qiqi_pat<InstHexagon Inst, Intrinsic IntID> :
+  Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt)),
+      (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs), (C2_tfrrp IntRegs:$Rt))))>;
+
+def: qi_CRInst_qi_pat<C2_not,     int_hexagon_C2_not>;
+def: qi_CRInst_qi_pat<C2_all8,    int_hexagon_C2_all8>;
+def: qi_CRInst_qi_pat<C2_any8,    int_hexagon_C2_any8>;
+
+def: qi_CRInst_qiqi_pat<C2_and,   int_hexagon_C2_and>;
+def: qi_CRInst_qiqi_pat<C2_andn,  int_hexagon_C2_andn>;
+def: qi_CRInst_qiqi_pat<C2_or,    int_hexagon_C2_or>;
+def: qi_CRInst_qiqi_pat<C2_orn,   int_hexagon_C2_orn>;
+def: qi_CRInst_qiqi_pat<C2_xor,   int_hexagon_C2_xor>;
+
+// Multiply 32x32 and use lower result
+def : T_RRI_pat <M2_macsip, int_hexagon_M2_macsip>;
+def : T_RRI_pat <M2_macsin, int_hexagon_M2_macsin>;
+def : T_RRR_pat <M2_maci, int_hexagon_M2_maci>;
+
+// Subtract and accumulate
+def : T_RRR_pat <M2_subacc, int_hexagon_M2_subacc>;
+
+// Add and accumulate
+def : T_RRR_pat <M2_acci,   int_hexagon_M2_acci>;
+def : T_RRR_pat <M2_nacci,  int_hexagon_M2_nacci>;
+def : T_RRI_pat <M2_accii,  int_hexagon_M2_accii>;
+def : T_RRI_pat <M2_naccii, int_hexagon_M2_naccii>;
+
+// XOR and XOR with destination
+def : T_RRR_pat <M2_xor_xacc, int_hexagon_M2_xor_xacc>;
+
+class MType_R32_pat <Intrinsic IntID, InstHexagon OutputInst> :
+      Pat <(IntID IntRegs:$src1, IntRegs:$src2),
+           (OutputInst IntRegs:$src1, IntRegs:$src2)>;
 
-// STYPE / ALU / Absolute value.
-def HEXAGON_A2_abs:
-  si_SInst_si                     <"abs",     int_hexagon_A2_abs>;
-def HEXAGON_A2_absp:
-  di_SInst_di                     <"abs",     int_hexagon_A2_absp>;
-def HEXAGON_A2_abssat:
-  si_SInst_si_sat                 <"abs",     int_hexagon_A2_abssat>;
+// Vector dual multiply with round and pack
 
-// STYPE / ALU / Negate.
-def HEXAGON_A2_negp:
-  di_SInst_di                     <"neg",     int_hexagon_A2_negp>;
-def HEXAGON_A2_negsat:
-  si_SInst_si_sat                 <"neg",     int_hexagon_A2_negsat>;
+def : Pat <(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+           (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>;
 
-// STYPE / ALU / Logical Not.
-def HEXAGON_A2_notp:
-  di_SInst_di                     <"not",     int_hexagon_A2_notp>;
+def : Pat <(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+           (M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2)>;
 
-// STYPE / ALU / Sign extend word to doubleword.
-def HEXAGON_A2_sxtw:
-  di_SInst_si                     <"sxtw",     int_hexagon_A2_sxtw>;
+// Vector multiply halfwords with round and pack
 
+def : MType_R32_pat <int_hexagon_M2_vmpy2s_s0pack, M2_vmpy2s_s0pack>;
+def : MType_R32_pat <int_hexagon_M2_vmpy2s_s1pack, M2_vmpy2s_s1pack>;
+
+// Multiply and use lower result
+def : MType_R32_pat <int_hexagon_M2_mpyi, M2_mpyi>;
+def : T_RI_pat<M2_mpysmi, int_hexagon_M2_mpysmi>;
+
+// Assembler mapped from Rd32=mpyui(Rs32,Rt32) to Rd32=mpyi(Rs32,Rt32)
+def : MType_R32_pat <int_hexagon_M2_mpyui, M2_mpyi>;
+
+// Multiply and use upper result
+def : MType_R32_pat <int_hexagon_M2_mpy_up, M2_mpy_up>;
+def : MType_R32_pat <int_hexagon_M2_mpyu_up, M2_mpyu_up>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyh_rs1, M2_hmmpyh_rs1>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyl_rs1, M2_hmmpyl_rs1>;
+def : MType_R32_pat <int_hexagon_M2_dpmpyss_rnd_s0, M2_dpmpyss_rnd_s0>;
+
+// Complex multiply with round and pack
+// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
+def : MType_R32_pat <int_hexagon_M2_cmpyrs_s0, M2_cmpyrs_s0>;
+def : MType_R32_pat <int_hexagon_M2_cmpyrs_s1, M2_cmpyrs_s1>;
+def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s0, M2_cmpyrsc_s0>;
+def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s1, M2_cmpyrsc_s1>;
 
 /********************************************************************
-*            STYPE/BIT                                              *
+*            STYPE/ALU                                              *
 *********************************************************************/
+def : T_P_pat <A2_absp, int_hexagon_A2_absp>;
+def : T_P_pat <A2_negp, int_hexagon_A2_negp>;
+def : T_P_pat <A2_notp, int_hexagon_A2_notp>;
 
-// STYPE / BIT / Count leading.
-def HEXAGON_S2_cl0:
-  si_SInst_si                     <"cl0",     int_hexagon_S2_cl0>;
-def HEXAGON_S2_cl0p:
-  si_SInst_di                     <"cl0",     int_hexagon_S2_cl0p>;
-def HEXAGON_S2_cl1:
-  si_SInst_si                     <"cl1",     int_hexagon_S2_cl1>;
-def HEXAGON_S2_cl1p:
-  si_SInst_di                     <"cl1",     int_hexagon_S2_cl1p>;
-def HEXAGON_S2_clb:
-  si_SInst_si                     <"clb",     int_hexagon_S2_clb>;
-def HEXAGON_S2_clbp:
-  si_SInst_di                     <"clb",     int_hexagon_S2_clbp>;
-def HEXAGON_S2_clbnorm:
-  si_SInst_si                     <"normamt", int_hexagon_S2_clbnorm>;
-
-// STYPE / BIT / Count trailing.
-def HEXAGON_S2_ct0:
-  si_SInst_si                     <"ct0",     int_hexagon_S2_ct0>;
-def HEXAGON_S2_ct1:
-  si_SInst_si                     <"ct1",     int_hexagon_S2_ct1>;
-
-// STYPE / BIT / Compare bit mask.
-def Hexagon_C2_bitsclr:
-  qi_SInst_sisi                   <"bitsclr", int_hexagon_C2_bitsclr>;
-def Hexagon_C2_bitsclri:
-  qi_SInst_siu6                   <"bitsclr", int_hexagon_C2_bitsclri>;
-def Hexagon_C2_bitsset:
-  qi_SInst_sisi                   <"bitsset", int_hexagon_C2_bitsset>;
-
-// STYPE / BIT / Extract unsigned.
-// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm])
-def HEXAGON_S2_extractu:
-  si_SInst_siu5u5                 <"extractu",int_hexagon_S2_extractu>;
-def HEXAGON_S2_extractu_rp:
-  si_SInst_sidi                   <"extractu",int_hexagon_S2_extractu_rp>;
-def HEXAGON_S2_extractup:
-  di_SInst_diu6u6                 <"extractu",int_hexagon_S2_extractup>;
-def HEXAGON_S2_extractup_rp:
-  di_SInst_didi                   <"extractu",int_hexagon_S2_extractup_rp>;
-
-// STYPE / BIT / Insert bitfield.
-def Hexagon_S2_insert:
-  si_SInst_sisiu5u5               <"insert",  int_hexagon_S2_insert>;
-def Hexagon_S2_insert_rp:
-  si_SInst_sisidi                 <"insert",  int_hexagon_S2_insert_rp>;
-def Hexagon_S2_insertp:
-  di_SInst_didiu6u6               <"insert",  int_hexagon_S2_insertp>;
-def Hexagon_S2_insertp_rp:
-  di_SInst_dididi                 <"insert",  int_hexagon_S2_insertp_rp>;
-
-// STYPE / BIT / Innterleave/deinterleave.
-def Hexagon_S2_interleave:
-  di_SInst_di                     <"interleave", int_hexagon_S2_interleave>;
-def Hexagon_S2_deinterleave:
-  di_SInst_di                     <"deinterleave", int_hexagon_S2_deinterleave>;
-
-// STYPE / BIT / Linear feedback-shift Iteration.
-def Hexagon_S2_lfsp:
-  di_SInst_didi                   <"lfs",     int_hexagon_S2_lfsp>;
-
-// STYPE / BIT / Bit reverse.
-def Hexagon_S2_brev:
-  si_SInst_si                     <"brev",    int_hexagon_S2_brev>;
-
-// STYPE / BIT / Set/Clear/Toggle Bit.
-def HEXAGON_S2_setbit_i:
-  si_SInst_siu5                   <"setbit",  int_hexagon_S2_setbit_i>;
-def HEXAGON_S2_togglebit_i:
-  si_SInst_siu5                   <"togglebit", int_hexagon_S2_togglebit_i>;
-def HEXAGON_S2_clrbit_i:
-  si_SInst_siu5                   <"clrbit",  int_hexagon_S2_clrbit_i>;
-def HEXAGON_S2_setbit_r:
-  si_SInst_sisi                   <"setbit",  int_hexagon_S2_setbit_r>;
-def HEXAGON_S2_togglebit_r:
-  si_SInst_sisi                   <"togglebit", int_hexagon_S2_togglebit_r>;
-def HEXAGON_S2_clrbit_r:
-  si_SInst_sisi                   <"clrbit",  int_hexagon_S2_clrbit_r>;
-
-// STYPE / BIT / Test Bit.
-def HEXAGON_S2_tstbit_i:
-  qi_SInst_siu5                   <"tstbit",  int_hexagon_S2_tstbit_i>;
-def HEXAGON_S2_tstbit_r:
-  qi_SInst_sisi                   <"tstbit",  int_hexagon_S2_tstbit_r>;
+/********************************************************************
+*            STYPE/BIT                                              *
+*********************************************************************/
 
+// Count leading/trailing
+def: T_R_pat<S2_cl0,     int_hexagon_S2_cl0>;
+def: T_P_pat<S2_cl0p,    int_hexagon_S2_cl0p>;
+def: T_R_pat<S2_cl1,     int_hexagon_S2_cl1>;
+def: T_P_pat<S2_cl1p,    int_hexagon_S2_cl1p>;
+def: T_R_pat<S2_clb,     int_hexagon_S2_clb>;
+def: T_P_pat<S2_clbp,    int_hexagon_S2_clbp>;
+def: T_R_pat<S2_clbnorm, int_hexagon_S2_clbnorm>;
+def: T_R_pat<S2_ct0,     int_hexagon_S2_ct0>;
+def: T_R_pat<S2_ct1,     int_hexagon_S2_ct1>;
+
+// Compare bit mask
+def: T_RR_pat<C2_bitsclr,  int_hexagon_C2_bitsclr>;
+def: T_RI_pat<C2_bitsclri, int_hexagon_C2_bitsclri>;
+def: T_RR_pat<C2_bitsset,  int_hexagon_C2_bitsset>;
+
+// Vector shuffle
+def : T_PP_pat <S2_shuffeb, int_hexagon_S2_shuffeb>;
+def : T_PP_pat <S2_shuffob, int_hexagon_S2_shuffob>;
+def : T_PP_pat <S2_shuffeh, int_hexagon_S2_shuffeh>;
+def : T_PP_pat <S2_shuffoh, int_hexagon_S2_shuffoh>;
+
+// Vector truncate
+def : T_PP_pat <S2_vtrunewh, int_hexagon_S2_vtrunewh>;
+def : T_PP_pat <S2_vtrunowh, int_hexagon_S2_vtrunowh>;
+
+// Linear feedback-shift Iteration.
+def : T_PP_pat <S2_lfsp, int_hexagon_S2_lfsp>;
+
+// Vector splice
+def : T_PPQ_pat <S2_vsplicerb, int_hexagon_S2_vsplicerb>;
+def : T_PPI_pat <S2_vspliceib, int_hexagon_S2_vspliceib>;
+
+// Shift by immediate and add
+def : T_RRI_pat<S2_addasl_rrri, int_hexagon_S2_addasl_rrri>;
+
+// Extract bitfield
+def : T_PII_pat<S2_extractup,    int_hexagon_S2_extractup>;
+def : T_RII_pat<S2_extractu,     int_hexagon_S2_extractu>;
+def : T_RP_pat <S2_extractu_rp,  int_hexagon_S2_extractu_rp>;
+def : T_PP_pat <S2_extractup_rp, int_hexagon_S2_extractup_rp>;
+
+// Insert bitfield
+def : Pat <(int_hexagon_S2_insert_rp IntRegs:$src1, IntRegs:$src2,
+                                     DoubleRegs:$src3),
+           (S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3)>;
+
+def : Pat<(i64 (int_hexagon_S2_insertp_rp (I64:$src1),
+                 (I64:$src2), (I64:$src3))),
+          (i64 (S2_insertp_rp (I64:$src1), (I64:$src2),
+                              (I64:$src3)))>;
+
+def : Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2,
+                                 u5ImmPred:$src3, u5ImmPred:$src4),
+          (S2_insert IntRegs:$src1, IntRegs:$src2,
+                     u5ImmPred:$src3, u5ImmPred:$src4)>;
+
+def : Pat<(i64 (int_hexagon_S2_insertp (I64:$src1),
+                 (I64:$src2), u6ImmPred:$src3, u6ImmPred:$src4)),
+          (i64 (S2_insertp (I64:$src1), (I64:$src2),
+                           u6ImmPred:$src3, u6ImmPred:$src4))>;
+
+
+// Innterleave/deinterleave
+def : T_P_pat <S2_interleave, int_hexagon_S2_interleave>;
+def : T_P_pat <S2_deinterleave, int_hexagon_S2_deinterleave>;
+
+// Set/Clear/Toggle Bit
+def: T_RI_pat<S2_setbit_i,    int_hexagon_S2_setbit_i>;
+def: T_RI_pat<S2_clrbit_i,    int_hexagon_S2_clrbit_i>;
+def: T_RI_pat<S2_togglebit_i, int_hexagon_S2_togglebit_i>;
+
+def: T_RR_pat<S2_setbit_r,    int_hexagon_S2_setbit_r>;
+def: T_RR_pat<S2_clrbit_r,    int_hexagon_S2_clrbit_r>;
+def: T_RR_pat<S2_togglebit_r, int_hexagon_S2_togglebit_r>;
+
+// Test Bit
+def: T_RI_pat<S2_tstbit_i,    int_hexagon_S2_tstbit_i>;
+def: T_RR_pat<S2_tstbit_r,    int_hexagon_S2_tstbit_r>;
 
 /********************************************************************
 *            STYPE/COMPLEX                                          *
 *********************************************************************/
+// Vector Complex conjugate
+def : T_P_pat <A2_vconj, int_hexagon_A2_vconj>;
 
-// STYPE / COMPLEX / Vector Complex conjugate.
-def HEXAGON_A2_vconj:
-  di_SInst_di_sat                 <"vconj",   int_hexagon_A2_vconj>;
-
-// STYPE / COMPLEX / Vector Complex rotate.
-def HEXAGON_S2_vcrotate:
-  di_SInst_disi                   <"vcrotate",int_hexagon_S2_vcrotate>;
-
+// Vector Complex rotate
+def : T_PR_pat <S2_vcrotate, int_hexagon_S2_vcrotate>;
 
 /********************************************************************
 *            STYPE/PERM                                             *
 *********************************************************************/
 
-// STYPE / PERM / Saturate.
-def HEXAGON_A2_sat:
-  si_SInst_di                     <"sat",     int_hexagon_A2_sat>;
-def HEXAGON_A2_satb:
-  si_SInst_si                     <"satb",    int_hexagon_A2_satb>;
-def HEXAGON_A2_sath:
-  si_SInst_si                     <"sath",    int_hexagon_A2_sath>;
-def HEXAGON_A2_satub:
-  si_SInst_si                     <"satub",   int_hexagon_A2_satub>;
-def HEXAGON_A2_satuh:
-  si_SInst_si                     <"satuh",   int_hexagon_A2_satuh>;
-
-// STYPE / PERM / Swizzle bytes.
-def HEXAGON_A2_swiz:
-  si_SInst_si                     <"swiz",    int_hexagon_A2_swiz>;
-
-// STYPE / PERM / Vector align.
-// Need custom lowering
-def HEXAGON_S2_valignib:
-  di_SInst_didiu3                 <"valignb", int_hexagon_S2_valignib>;
-def HEXAGON_S2_valignrb:
-  di_SInst_didiqi                 <"valignb", int_hexagon_S2_valignrb>;
-
-// STYPE / PERM / Vector round and pack.
-def HEXAGON_S2_vrndpackwh:
-  si_SInst_di                     <"vrndwh",  int_hexagon_S2_vrndpackwh>;
-def HEXAGON_S2_vrndpackwhs:
-  si_SInst_di_sat                 <"vrndwh",  int_hexagon_S2_vrndpackwhs>;
-
-// STYPE / PERM / Vector saturate and pack.
-def HEXAGON_S2_svsathb:
-  si_SInst_si                     <"vsathb",  int_hexagon_S2_svsathb>;
-def HEXAGON_S2_vsathb:
-  si_SInst_di                     <"vsathb",  int_hexagon_S2_vsathb>;
-def HEXAGON_S2_svsathub:
-  si_SInst_si                     <"vsathub", int_hexagon_S2_svsathub>;
-def HEXAGON_S2_vsathub:
-  si_SInst_di                     <"vsathub", int_hexagon_S2_vsathub>;
-def HEXAGON_S2_vsatwh:
-  si_SInst_di                     <"vsatwh",  int_hexagon_S2_vsatwh>;
-def HEXAGON_S2_vsatwuh:
-  si_SInst_di                     <"vsatwuh", int_hexagon_S2_vsatwuh>;
-
-// STYPE / PERM / Vector saturate without pack.
-def HEXAGON_S2_vsathb_nopack:
-  di_SInst_di                     <"vsathb",  int_hexagon_S2_vsathb_nopack>;
-def HEXAGON_S2_vsathub_nopack:
-  di_SInst_di                     <"vsathub", int_hexagon_S2_vsathub_nopack>;
-def HEXAGON_S2_vsatwh_nopack:
-  di_SInst_di                     <"vsatwh",  int_hexagon_S2_vsatwh_nopack>;
-def HEXAGON_S2_vsatwuh_nopack:
-  di_SInst_di                     <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>;
-
-// STYPE / PERM / Vector shuffle.
-def HEXAGON_S2_shuffeb:
-  di_SInst_didi                   <"shuffeb", int_hexagon_S2_shuffeb>;
-def HEXAGON_S2_shuffeh:
-  di_SInst_didi                   <"shuffeh", int_hexagon_S2_shuffeh>;
-def HEXAGON_S2_shuffob:
-  di_SInst_didi                   <"shuffob", int_hexagon_S2_shuffob>;
-def HEXAGON_S2_shuffoh:
-  di_SInst_didi                   <"shuffoh", int_hexagon_S2_shuffoh>;
-
-// STYPE / PERM / Vector splat bytes.
-def HEXAGON_S2_vsplatrb:
-  si_SInst_si                     <"vsplatb", int_hexagon_S2_vsplatrb>;
-
-// STYPE / PERM / Vector splat halfwords.
-def HEXAGON_S2_vsplatrh:
-  di_SInst_si                     <"vsplath", int_hexagon_S2_vsplatrh>;
-
-// STYPE / PERM / Vector splice.
-def Hexagon_S2_vsplicerb:
-  di_SInst_didiqi                 <"vspliceb",int_hexagon_S2_vsplicerb>;
-def Hexagon_S2_vspliceib:
-  di_SInst_didiu3                 <"vspliceb",int_hexagon_S2_vspliceib>;
-
-// STYPE / PERM / Sign extend.
-def HEXAGON_S2_vsxtbh:
-  di_SInst_si                     <"vsxtbh",  int_hexagon_S2_vsxtbh>;
-def HEXAGON_S2_vsxthw:
-  di_SInst_si                     <"vsxthw",  int_hexagon_S2_vsxthw>;
-
-// STYPE / PERM / Truncate.
-def HEXAGON_S2_vtrunehb:
-  si_SInst_di                     <"vtrunehb",int_hexagon_S2_vtrunehb>;
-def HEXAGON_S2_vtrunohb:
-  si_SInst_di                     <"vtrunohb",int_hexagon_S2_vtrunohb>;
-def HEXAGON_S2_vtrunewh:
-  di_SInst_didi                   <"vtrunewh",int_hexagon_S2_vtrunewh>;
-def HEXAGON_S2_vtrunowh:
-  di_SInst_didi                   <"vtrunowh",int_hexagon_S2_vtrunowh>;
-
-// STYPE / PERM / Zero extend.
-def HEXAGON_S2_vzxtbh:
-  di_SInst_si                     <"vzxtbh",  int_hexagon_S2_vzxtbh>;
-def HEXAGON_S2_vzxthw:
-  di_SInst_si                     <"vzxthw",  int_hexagon_S2_vzxthw>;
-
+// Vector saturate without pack
+def : T_P_pat <S2_vsathb_nopack, int_hexagon_S2_vsathb_nopack>;
+def : T_P_pat <S2_vsathub_nopack, int_hexagon_S2_vsathub_nopack>;
+def : T_P_pat <S2_vsatwh_nopack, int_hexagon_S2_vsatwh_nopack>;
+def : T_P_pat <S2_vsatwuh_nopack, int_hexagon_S2_vsatwuh_nopack>;
 
 /********************************************************************
 *            STYPE/PRED                                             *
 *********************************************************************/
 
-// STYPE / PRED / Mask generate from predicate.
-def HEXAGON_C2_mask:
-  di_SInst_qi                     <"mask",   int_hexagon_C2_mask>;
-
-// STYPE / PRED / Predicate transfer.
-def HEXAGON_C2_tfrpr:
-  si_SInst_qi                     <"",       int_hexagon_C2_tfrpr>;
-def HEXAGON_C2_tfrrp:
-  qi_SInst_si                     <"",       int_hexagon_C2_tfrrp>;
+// Predicate transfer
+def: Pat<(i32 (int_hexagon_C2_tfrpr (I32:$Rs))),
+         (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>;
+def: Pat<(i32 (int_hexagon_C2_tfrrp (I32:$Rs))),
+         (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>;
 
-// STYPE / PRED / Viterbi pack even and odd predicate bits.
-def HEXAGON_C2_vitpack:
-  si_SInst_qiqi                   <"vitpack",int_hexagon_C2_vitpack>;
+// Mask generate from predicate
+def: Pat<(i64 (int_hexagon_C2_mask (I32:$Rs))),
+         (i64 (C2_mask (C2_tfrrp (I32:$Rs))))>;
 
+// Viterbi pack even and odd predicate bits
+def: Pat<(i32 (int_hexagon_C2_vitpack (I32:$Rs), (I32:$Rt))),
+         (i32 (C2_vitpack (C2_tfrrp (I32:$Rs)),
+                          (C2_tfrrp (I32:$Rt))))>;
 
 /********************************************************************
 *            STYPE/SHIFT                                            *
 *********************************************************************/
 
-// STYPE / SHIFT / Shift by immediate.
-def HEXAGON_S2_asl_i_r:
-  si_SInst_siu5                   <"asl",     int_hexagon_S2_asl_i_r>;
-def HEXAGON_S2_asr_i_r:
-  si_SInst_siu5                   <"asr",     int_hexagon_S2_asr_i_r>;
-def HEXAGON_S2_lsr_i_r:
-  si_SInst_siu5                   <"lsr",     int_hexagon_S2_lsr_i_r>;
-def HEXAGON_S2_asl_i_p:
-  di_SInst_diu6                   <"asl",     int_hexagon_S2_asl_i_p>;
-def HEXAGON_S2_asr_i_p:
-  di_SInst_diu6                   <"asr",     int_hexagon_S2_asr_i_p>;
-def HEXAGON_S2_lsr_i_p:
-  di_SInst_diu6                   <"lsr",     int_hexagon_S2_lsr_i_p>;
-
-// STYPE / SHIFT / Shift by immediate and accumulate.
-def HEXAGON_S2_asl_i_r_acc:
-  si_SInst_sisiu5_acc             <"asl",     int_hexagon_S2_asl_i_r_acc>;
-def HEXAGON_S2_asr_i_r_acc:
-  si_SInst_sisiu5_acc             <"asr",     int_hexagon_S2_asr_i_r_acc>;
-def HEXAGON_S2_lsr_i_r_acc:
-  si_SInst_sisiu5_acc             <"lsr",     int_hexagon_S2_lsr_i_r_acc>;
-def HEXAGON_S2_asl_i_r_nac:
-  si_SInst_sisiu5_nac             <"asl",     int_hexagon_S2_asl_i_r_nac>;
-def HEXAGON_S2_asr_i_r_nac:
-  si_SInst_sisiu5_nac             <"asr",     int_hexagon_S2_asr_i_r_nac>;
-def HEXAGON_S2_lsr_i_r_nac:
-  si_SInst_sisiu5_nac             <"lsr",     int_hexagon_S2_lsr_i_r_nac>;
-def HEXAGON_S2_asl_i_p_acc:
-  di_SInst_didiu6_acc             <"asl",     int_hexagon_S2_asl_i_p_acc>;
-def HEXAGON_S2_asr_i_p_acc:
-  di_SInst_didiu6_acc             <"asr",     int_hexagon_S2_asr_i_p_acc>;
-def HEXAGON_S2_lsr_i_p_acc:
-  di_SInst_didiu6_acc             <"lsr",     int_hexagon_S2_lsr_i_p_acc>;
-def HEXAGON_S2_asl_i_p_nac:
-  di_SInst_didiu6_nac             <"asl",     int_hexagon_S2_asl_i_p_nac>;
-def HEXAGON_S2_asr_i_p_nac:
-  di_SInst_didiu6_nac             <"asr",     int_hexagon_S2_asr_i_p_nac>;
-def HEXAGON_S2_lsr_i_p_nac:
-  di_SInst_didiu6_nac             <"lsr",     int_hexagon_S2_lsr_i_p_nac>;
-
-// STYPE / SHIFT / Shift by immediate and add.
-def HEXAGON_S2_addasl_rrri:
-  si_SInst_sisiu3                 <"addasl",  int_hexagon_S2_addasl_rrri>;
-
-// STYPE / SHIFT / Shift by immediate and logical.
-def HEXAGON_S2_asl_i_r_and:
-  si_SInst_sisiu5_and             <"asl",     int_hexagon_S2_asl_i_r_and>;
-def HEXAGON_S2_asr_i_r_and:
-  si_SInst_sisiu5_and             <"asr",     int_hexagon_S2_asr_i_r_and>;
-def HEXAGON_S2_lsr_i_r_and:
-  si_SInst_sisiu5_and             <"lsr",     int_hexagon_S2_lsr_i_r_and>;
-
-def HEXAGON_S2_asl_i_r_xacc:
-  si_SInst_sisiu5_xor             <"asl",     int_hexagon_S2_asl_i_r_xacc>;
-def HEXAGON_S2_lsr_i_r_xacc:
-  si_SInst_sisiu5_xor             <"lsr",     int_hexagon_S2_lsr_i_r_xacc>;
-
-def HEXAGON_S2_asl_i_r_or:
-  si_SInst_sisiu5_or              <"asl",     int_hexagon_S2_asl_i_r_or>;
-def HEXAGON_S2_asr_i_r_or:
-  si_SInst_sisiu5_or              <"asr",     int_hexagon_S2_asr_i_r_or>;
-def HEXAGON_S2_lsr_i_r_or:
-  si_SInst_sisiu5_or              <"lsr",     int_hexagon_S2_lsr_i_r_or>;
-
-def HEXAGON_S2_asl_i_p_and:
-  di_SInst_didiu6_and             <"asl",     int_hexagon_S2_asl_i_p_and>;
-def HEXAGON_S2_asr_i_p_and:
-  di_SInst_didiu6_and             <"asr",     int_hexagon_S2_asr_i_p_and>;
-def HEXAGON_S2_lsr_i_p_and:
-  di_SInst_didiu6_and             <"lsr",     int_hexagon_S2_lsr_i_p_and>;
-
-def HEXAGON_S2_asl_i_p_xacc:
-  di_SInst_didiu6_xor             <"asl",     int_hexagon_S2_asl_i_p_xacc>;
-def HEXAGON_S2_lsr_i_p_xacc:
-  di_SInst_didiu6_xor             <"lsr",     int_hexagon_S2_lsr_i_p_xacc>;
-
-def HEXAGON_S2_asl_i_p_or:
-  di_SInst_didiu6_or              <"asl",     int_hexagon_S2_asl_i_p_or>;
-def HEXAGON_S2_asr_i_p_or:
-  di_SInst_didiu6_or              <"asr",     int_hexagon_S2_asr_i_p_or>;
-def HEXAGON_S2_lsr_i_p_or:
-  di_SInst_didiu6_or              <"lsr",     int_hexagon_S2_lsr_i_p_or>;
-
-// STYPE / SHIFT / Shift right by immediate with rounding.
-def HEXAGON_S2_asr_i_r_rnd:
-  si_SInst_siu5_rnd               <"asr",     int_hexagon_S2_asr_i_r_rnd>;
-def HEXAGON_S2_asr_i_r_rnd_goodsyntax:
-  si_SInst_siu5              <"asrrnd",  int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
-
-// STYPE / SHIFT / Shift left by immediate with saturation.
-def HEXAGON_S2_asl_i_r_sat:
-  si_SInst_sisi_sat               <"asl",     int_hexagon_S2_asl_i_r_sat>;
-
-// STYPE / SHIFT / Shift by register.
-def HEXAGON_S2_asl_r_r:
-  si_SInst_sisi                   <"asl",     int_hexagon_S2_asl_r_r>;
-def HEXAGON_S2_asr_r_r:
-  si_SInst_sisi                   <"asr",     int_hexagon_S2_asr_r_r>;
-def HEXAGON_S2_lsl_r_r:
-  si_SInst_sisi                   <"lsl",     int_hexagon_S2_lsl_r_r>;
-def HEXAGON_S2_lsr_r_r:
-  si_SInst_sisi                   <"lsr",     int_hexagon_S2_lsr_r_r>;
-def HEXAGON_S2_asl_r_p:
-  di_SInst_disi                   <"asl",     int_hexagon_S2_asl_r_p>;
-def HEXAGON_S2_asr_r_p:
-  di_SInst_disi                   <"asr",     int_hexagon_S2_asr_r_p>;
-def HEXAGON_S2_lsl_r_p:
-  di_SInst_disi                   <"lsl",     int_hexagon_S2_lsl_r_p>;
-def HEXAGON_S2_lsr_r_p:
-  di_SInst_disi                   <"lsr",     int_hexagon_S2_lsr_r_p>;
-
-// STYPE / SHIFT / Shift by register and accumulate.
-def HEXAGON_S2_asl_r_r_acc:
-  si_SInst_sisisi_acc             <"asl",     int_hexagon_S2_asl_r_r_acc>;
-def HEXAGON_S2_asr_r_r_acc:
-  si_SInst_sisisi_acc             <"asr",     int_hexagon_S2_asr_r_r_acc>;
-def HEXAGON_S2_lsl_r_r_acc:
-  si_SInst_sisisi_acc             <"lsl",     int_hexagon_S2_lsl_r_r_acc>;
-def HEXAGON_S2_lsr_r_r_acc:
-  si_SInst_sisisi_acc             <"lsr",     int_hexagon_S2_lsr_r_r_acc>;
-def HEXAGON_S2_asl_r_p_acc:
-  di_SInst_didisi_acc             <"asl",     int_hexagon_S2_asl_r_p_acc>;
-def HEXAGON_S2_asr_r_p_acc:
-  di_SInst_didisi_acc             <"asr",     int_hexagon_S2_asr_r_p_acc>;
-def HEXAGON_S2_lsl_r_p_acc:
-  di_SInst_didisi_acc             <"lsl",     int_hexagon_S2_lsl_r_p_acc>;
-def HEXAGON_S2_lsr_r_p_acc:
-  di_SInst_didisi_acc             <"lsr",     int_hexagon_S2_lsr_r_p_acc>;
-
-def HEXAGON_S2_asl_r_r_nac:
-  si_SInst_sisisi_nac             <"asl",     int_hexagon_S2_asl_r_r_nac>;
-def HEXAGON_S2_asr_r_r_nac:
-  si_SInst_sisisi_nac             <"asr",     int_hexagon_S2_asr_r_r_nac>;
-def HEXAGON_S2_lsl_r_r_nac:
-  si_SInst_sisisi_nac             <"lsl",     int_hexagon_S2_lsl_r_r_nac>;
-def HEXAGON_S2_lsr_r_r_nac:
-  si_SInst_sisisi_nac             <"lsr",     int_hexagon_S2_lsr_r_r_nac>;
-def HEXAGON_S2_asl_r_p_nac:
-  di_SInst_didisi_nac             <"asl",     int_hexagon_S2_asl_r_p_nac>;
-def HEXAGON_S2_asr_r_p_nac:
-  di_SInst_didisi_nac             <"asr",     int_hexagon_S2_asr_r_p_nac>;
-def HEXAGON_S2_lsl_r_p_nac:
-  di_SInst_didisi_nac             <"lsl",     int_hexagon_S2_lsl_r_p_nac>;
-def HEXAGON_S2_lsr_r_p_nac:
-  di_SInst_didisi_nac             <"lsr",     int_hexagon_S2_lsr_r_p_nac>;
-
-// STYPE / SHIFT / Shift by register and logical.
-def HEXAGON_S2_asl_r_r_and:
-  si_SInst_sisisi_and             <"asl",     int_hexagon_S2_asl_r_r_and>;
-def HEXAGON_S2_asr_r_r_and:
-  si_SInst_sisisi_and             <"asr",     int_hexagon_S2_asr_r_r_and>;
-def HEXAGON_S2_lsl_r_r_and:
-  si_SInst_sisisi_and             <"lsl",     int_hexagon_S2_lsl_r_r_and>;
-def HEXAGON_S2_lsr_r_r_and:
-  si_SInst_sisisi_and             <"lsr",     int_hexagon_S2_lsr_r_r_and>;
-
-def HEXAGON_S2_asl_r_r_or:
-  si_SInst_sisisi_or              <"asl",     int_hexagon_S2_asl_r_r_or>;
-def HEXAGON_S2_asr_r_r_or:
-  si_SInst_sisisi_or              <"asr",     int_hexagon_S2_asr_r_r_or>;
-def HEXAGON_S2_lsl_r_r_or:
-  si_SInst_sisisi_or              <"lsl",     int_hexagon_S2_lsl_r_r_or>;
-def HEXAGON_S2_lsr_r_r_or:
-  si_SInst_sisisi_or              <"lsr",     int_hexagon_S2_lsr_r_r_or>;
-
-def HEXAGON_S2_asl_r_p_and:
-  di_SInst_didisi_and             <"asl",     int_hexagon_S2_asl_r_p_and>;
-def HEXAGON_S2_asr_r_p_and:
-  di_SInst_didisi_and             <"asr",     int_hexagon_S2_asr_r_p_and>;
-def HEXAGON_S2_lsl_r_p_and:
-  di_SInst_didisi_and             <"lsl",     int_hexagon_S2_lsl_r_p_and>;
-def HEXAGON_S2_lsr_r_p_and:
-  di_SInst_didisi_and             <"lsr",     int_hexagon_S2_lsr_r_p_and>;
-
-def HEXAGON_S2_asl_r_p_or:
-  di_SInst_didisi_or              <"asl",     int_hexagon_S2_asl_r_p_or>;
-def HEXAGON_S2_asr_r_p_or:
-  di_SInst_didisi_or              <"asr",     int_hexagon_S2_asr_r_p_or>;
-def HEXAGON_S2_lsl_r_p_or:
-  di_SInst_didisi_or              <"lsl",     int_hexagon_S2_lsl_r_p_or>;
-def HEXAGON_S2_lsr_r_p_or:
-  di_SInst_didisi_or              <"lsr",     int_hexagon_S2_lsr_r_p_or>;
-
-// STYPE / SHIFT / Shift by register with saturation.
-def HEXAGON_S2_asl_r_r_sat:
-  si_SInst_sisi_sat               <"asl",     int_hexagon_S2_asl_r_r_sat>;
-def HEXAGON_S2_asr_r_r_sat:
-  si_SInst_sisi_sat               <"asr",     int_hexagon_S2_asr_r_r_sat>;
-
-// STYPE / SHIFT / Table Index.
-def Hexagon_S2_tableidxb_goodsyntax:
-  si_MInst_sisiu4u5          <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>;
-def Hexagon_S2_tableidxd_goodsyntax:
-  si_MInst_sisiu4u5          <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>;
-def Hexagon_S2_tableidxh_goodsyntax:
-  si_MInst_sisiu4u5          <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>;
-def Hexagon_S2_tableidxw_goodsyntax:
-  si_MInst_sisiu4u5          <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>;
+def : T_PI_pat <S2_asr_i_p, int_hexagon_S2_asr_i_p>;
+def : T_PI_pat <S2_lsr_i_p, int_hexagon_S2_lsr_i_p>;
+def : T_PI_pat <S2_asl_i_p, int_hexagon_S2_asl_i_p>;
+
+def : T_PR_pat <S2_asr_r_p, int_hexagon_S2_asr_r_p>;
+def : T_PR_pat <S2_lsr_r_p, int_hexagon_S2_lsr_r_p>;
+def : T_PR_pat <S2_asl_r_p, int_hexagon_S2_asl_r_p>;
+def : T_PR_pat <S2_lsl_r_p, int_hexagon_S2_lsl_r_p>;
+
+def : T_RR_pat <S2_asr_r_r, int_hexagon_S2_asr_r_r>;
+def : T_RR_pat <S2_lsr_r_r, int_hexagon_S2_lsr_r_r>;
+def : T_RR_pat <S2_asl_r_r, int_hexagon_S2_asl_r_r>;
+def : T_RR_pat <S2_lsl_r_r, int_hexagon_S2_lsl_r_r>;
+
+def : T_RR_pat <S2_asr_r_r_sat, int_hexagon_S2_asr_r_r_sat>;
+def : T_RR_pat <S2_asl_r_r_sat, int_hexagon_S2_asl_r_r_sat>;
+
+def : T_R_pat <S2_vsxtbh,   int_hexagon_S2_vsxtbh>;
+def : T_R_pat <S2_vzxtbh,   int_hexagon_S2_vzxtbh>;
+def : T_R_pat <S2_vsxthw,   int_hexagon_S2_vsxthw>;
+def : T_R_pat <S2_vzxthw,   int_hexagon_S2_vzxthw>;
+def : T_R_pat <S2_vsplatrh, int_hexagon_S2_vsplatrh>;
+def : T_R_pat <A2_sxtw,     int_hexagon_A2_sxtw>;
+
+// Vector saturate and pack
+def : T_R_pat <S2_svsathb,  int_hexagon_S2_svsathb>;
+def : T_R_pat <S2_svsathub, int_hexagon_S2_svsathub>;
+def : T_P_pat <S2_vsathub,  int_hexagon_S2_vsathub>;
+def : T_P_pat <S2_vsatwh,   int_hexagon_S2_vsatwh>;
+def : T_P_pat <S2_vsatwuh,  int_hexagon_S2_vsatwuh>;
+def : T_P_pat <S2_vsathb,   int_hexagon_S2_vsathb>;
+
+def : T_P_pat <S2_vtrunohb,    int_hexagon_S2_vtrunohb>;
+def : T_P_pat <S2_vtrunehb,    int_hexagon_S2_vtrunehb>;
+def : T_P_pat <S2_vrndpackwh,  int_hexagon_S2_vrndpackwh>;
+def : T_P_pat <S2_vrndpackwhs, int_hexagon_S2_vrndpackwhs>;
+def : T_R_pat <S2_brev,        int_hexagon_S2_brev>;
+def : T_R_pat <S2_vsplatrb,    int_hexagon_S2_vsplatrb>;
+
+def : T_R_pat <A2_abs,    int_hexagon_A2_abs>;
+def : T_R_pat <A2_abssat, int_hexagon_A2_abssat>;
+def : T_R_pat <A2_negsat, int_hexagon_A2_negsat>;
+
+def : T_R_pat <A2_swiz,   int_hexagon_A2_swiz>;
+
+def : T_P_pat <A2_sat,    int_hexagon_A2_sat>;
+def : T_R_pat <A2_sath,   int_hexagon_A2_sath>;
+def : T_R_pat <A2_satuh,  int_hexagon_A2_satuh>;
+def : T_R_pat <A2_satub,  int_hexagon_A2_satub>;
+def : T_R_pat <A2_satb,   int_hexagon_A2_satb>;
+
+// Vector arithmetic shift right by immediate with truncate and pack.
+def : T_PI_pat<S2_asr_i_svw_trun, int_hexagon_S2_asr_i_svw_trun>;
+
+def : T_RI_pat <S2_asr_i_r,     int_hexagon_S2_asr_i_r>;
+def : T_RI_pat <S2_lsr_i_r,     int_hexagon_S2_lsr_i_r>;
+def : T_RI_pat <S2_asl_i_r,     int_hexagon_S2_asl_i_r>;
+def : T_RI_pat <S2_asr_i_r_rnd, int_hexagon_S2_asr_i_r_rnd>;
+def : T_RI_pat <S2_asr_i_r_rnd_goodsyntax,
+                int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
+
+// Shift left by immediate with saturation.
+def : T_RI_pat <S2_asl_i_r_sat, int_hexagon_S2_asl_i_r_sat>;
 
+//===----------------------------------------------------------------------===//
+// Template 'def pat' to map tableidx[bhwd] intrinsics to :raw instructions.
+//===----------------------------------------------------------------------===//
+class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst,
+                         SDNodeXForm XformImm>
+  : Pat <(IntID IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3, u5ImmPred:$src4),
+         (OutputInst IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3,
+                     (XformImm u5ImmPred:$src4))>;
+
+
+// Table Index : Extract and insert bits.
+// Map to the real hardware instructions after subtracting appropriate
+// values from the 4th input operand. Please note that subtraction is not
+// needed for int_hexagon_S2_tableidxb_goodsyntax.
+
+def : Pat <(int_hexagon_S2_tableidxb_goodsyntax IntRegs:$src1, IntRegs:$src2,
+                                              u4ImmPred:$src3, u5ImmPred:$src4),
+           (S2_tableidxb IntRegs:$src1, IntRegs:$src2,
+                         u4ImmPred:$src3, u5ImmPred:$src4)>;
+
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxh_goodsyntax, S2_tableidxh,
+                         DEC_CONST_SIGNED>;
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw,
+                         DEC2_CONST_SIGNED>;
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxd_goodsyntax, S2_tableidxd,
+                         DEC3_CONST_SIGNED>;
 
 /********************************************************************
 *            STYPE/VH                                               *
 *********************************************************************/
 
-// STYPE / VH / Vector absolute value halfwords.
-// Rdd64=vabsh(Rss64)
-def HEXAGON_A2_vabsh:
-  di_SInst_di                     <"vabsh",   int_hexagon_A2_vabsh>;
-def HEXAGON_A2_vabshsat:
-  di_SInst_di_sat                 <"vabsh",   int_hexagon_A2_vabshsat>;
-
-// STYPE / VH / Vector shift halfwords by immediate.
-// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32)
-def HEXAGON_S2_asl_i_vh:
-  di_SInst_disi                   <"vaslh",   int_hexagon_S2_asl_i_vh>;
-def HEXAGON_S2_asr_i_vh:
-  di_SInst_disi                   <"vasrh",   int_hexagon_S2_asr_i_vh>;
-def HEXAGON_S2_lsr_i_vh:
-  di_SInst_disi                   <"vlsrh",   int_hexagon_S2_lsr_i_vh>;
-
-// STYPE / VH / Vector shift halfwords by register.
-// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32)
-def HEXAGON_S2_asl_r_vh:
-  di_SInst_disi                   <"vaslh",   int_hexagon_S2_asl_r_vh>;
-def HEXAGON_S2_asr_r_vh:
-  di_SInst_disi                   <"vasrh",   int_hexagon_S2_asr_r_vh>;
-def HEXAGON_S2_lsl_r_vh:
-  di_SInst_disi                   <"vlslh",   int_hexagon_S2_lsl_r_vh>;
-def HEXAGON_S2_lsr_r_vh:
-  di_SInst_disi                   <"vlsrh",   int_hexagon_S2_lsr_r_vh>;
+// Vector absolute value halfwords with and without saturation
+// Rdd64=vabsh(Rss64)[:sat]
+def : T_P_pat <A2_vabsh, int_hexagon_A2_vabsh>;
+def : T_P_pat <A2_vabshsat, int_hexagon_A2_vabshsat>;
 
+// Vector shift halfwords by immediate
+// Rdd64=[vaslh/vasrh/vlsrh](Rss64,u4)
+def : T_PI_pat <S2_asr_i_vh, int_hexagon_S2_asr_i_vh>;
+def : T_PI_pat <S2_lsr_i_vh, int_hexagon_S2_lsr_i_vh>;
+def : T_PI_pat <S2_asl_i_vh, int_hexagon_S2_asl_i_vh>;
+
+// Vector shift halfwords by register
+// Rdd64=[vaslw/vasrw/vlslw/vlsrw](Rss64,Rt32)
+def : T_PR_pat <S2_asr_r_vh, int_hexagon_S2_asr_r_vh>;
+def : T_PR_pat <S2_lsr_r_vh, int_hexagon_S2_lsr_r_vh>;
+def : T_PR_pat <S2_asl_r_vh, int_hexagon_S2_asl_r_vh>;
+def : T_PR_pat <S2_lsl_r_vh, int_hexagon_S2_lsl_r_vh>;
 
 /********************************************************************
 *            STYPE/VW                                               *
 *********************************************************************/
 
-// STYPE / VW / Vector absolute value words.
-def HEXAGON_A2_vabsw:
-  di_SInst_di                     <"vabsw",   int_hexagon_A2_vabsw>;
-def HEXAGON_A2_vabswsat:
-  di_SInst_di_sat                 <"vabsw",   int_hexagon_A2_vabswsat>;
-
-// STYPE / VW / Vector shift words by immediate.
-// Rdd64=v[asl/vsl]w(Rss64,Rt32)
-def HEXAGON_S2_asl_i_vw:
-  di_SInst_disi                   <"vaslw",   int_hexagon_S2_asl_i_vw>;
-def HEXAGON_S2_asr_i_vw:
-  di_SInst_disi                   <"vasrw",   int_hexagon_S2_asr_i_vw>;
-def HEXAGON_S2_lsr_i_vw:
-  di_SInst_disi                   <"vlsrw",   int_hexagon_S2_lsr_i_vw>;
-
-// STYPE / VW / Vector shift words by register.
-// Rdd64=v[asl/vsl]w(Rss64,Rt32)
-def HEXAGON_S2_asl_r_vw:
-  di_SInst_disi                   <"vaslw",   int_hexagon_S2_asl_r_vw>;
-def HEXAGON_S2_asr_r_vw:
-  di_SInst_disi                   <"vasrw",   int_hexagon_S2_asr_r_vw>;
-def HEXAGON_S2_lsl_r_vw:
-  di_SInst_disi                   <"vlslw",   int_hexagon_S2_lsl_r_vw>;
-def HEXAGON_S2_lsr_r_vw:
-  di_SInst_disi                   <"vlsrw",   int_hexagon_S2_lsr_r_vw>;
-
-// STYPE / VW / Vector shift words with truncate and pack.
-def HEXAGON_S2_asr_r_svw_trun:
-  si_SInst_disi                   <"vasrw",   int_hexagon_S2_asr_r_svw_trun>;
-def HEXAGON_S2_asr_i_svw_trun:
-  si_SInst_diu5                   <"vasrw",   int_hexagon_S2_asr_i_svw_trun>;
-
-// LD / Circular loads.
-def HEXAGON_circ_ldd:
-  di_LDInstPI_diu4                <"circ_ldd", int_hexagon_circ_ldd>;
+// Vector absolute value words with and without saturation
+def : T_P_pat <A2_vabsw, int_hexagon_A2_vabsw>;
+def : T_P_pat <A2_vabswsat, int_hexagon_A2_vabswsat>;
+
+// Vector shift words by immediate.
+// Rdd64=[vasrw/vlsrw|vaslw](Rss64,u5)
+def : T_PI_pat <S2_asr_i_vw, int_hexagon_S2_asr_i_vw>;
+def : T_PI_pat <S2_lsr_i_vw, int_hexagon_S2_lsr_i_vw>;
+def : T_PI_pat <S2_asl_i_vw, int_hexagon_S2_asl_i_vw>;
+
+// Vector shift words by register.
+// Rdd64=[vasrw/vlsrw|vaslw|vlslw](Rss64,Rt32)
+def : T_PR_pat <S2_asr_r_vw, int_hexagon_S2_asr_r_vw>;
+def : T_PR_pat <S2_lsr_r_vw, int_hexagon_S2_lsr_r_vw>;
+def : T_PR_pat <S2_asl_r_vw, int_hexagon_S2_asl_r_vw>;
+def : T_PR_pat <S2_lsl_r_vw, int_hexagon_S2_lsl_r_vw>;
+
+// Vector shift words with truncate and pack
+
+def : T_PR_pat <S2_asr_r_svw_trun, int_hexagon_S2_asr_r_svw_trun>;
+
+def : T_R_pat<L2_loadw_locked, int_hexagon_L2_loadw_locked>;
+def : T_R_pat<L4_loadd_locked, int_hexagon_L4_loadd_locked>;
+
+def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))),
+         (i32 (C2_tfrpr (S2_storew_locked (I32:$Rs), (I32:$Rt))))>;
+def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))),
+         (i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>;
+
+/********************************************************************
+*            ST
+*********************************************************************/
+
+class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val>
+  : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru),
+        (MI I32:$Rs, Val:$Rt, I32:$Ru)>;
+
+def : T_stb_pat <S2_storerh_pbr_pseudo, int_hexagon_brev_sth,   I32>;
+def : T_stb_pat <S2_storerb_pbr_pseudo, int_hexagon_brev_stb,   I32>;
+def : T_stb_pat <S2_storeri_pbr_pseudo, int_hexagon_brev_stw,   I32>;
+def : T_stb_pat <S2_storerf_pbr_pseudo, int_hexagon_brev_sthhi, I32>;
+def : T_stb_pat <S2_storerd_pbr_pseudo, int_hexagon_brev_std,   I64>;
+
+class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val>
+  : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s),
+        (MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>;
+
+def: T_stc_pat<S2_storerb_pci_pseudo, int_hexagon_circ_stb,   s4_0ImmPred, I32>;
+def: T_stc_pat<S2_storerh_pci_pseudo, int_hexagon_circ_sth,   s4_1ImmPred, I32>;
+def: T_stc_pat<S2_storeri_pci_pseudo, int_hexagon_circ_stw,   s4_2ImmPred, I32>;
+def: T_stc_pat<S2_storerd_pci_pseudo, int_hexagon_circ_std,   s4_3ImmPred, I64>;
+def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
 
 include "HexagonIntrinsicsV3.td"
 include "HexagonIntrinsicsV4.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
index df89378..4c28b28 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@@ -14,8 +14,8 @@
 def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
       (i64
        (A2_combinew
-        (HEXAGON_M2_maci
-         (HEXAGON_M2_maci
+        (M2_maci
+         (M2_maci
           (i32
            (EXTRACT_SUBREG
             (i64
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
index 2a54e62..6152cb0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
@@ -11,40 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-
-
-
-// MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
-def Hexagon_M2_vrcmpys_s1:
-  di_MInst_disi_s1_sat            <"vrcmpys",  int_hexagon_M2_vrcmpys_s1>;
-def Hexagon_M2_vrcmpys_acc_s1:
-  di_MInst_didisi_acc_s1_sat      <"vrcmpys",  int_hexagon_M2_vrcmpys_acc_s1>;
-def Hexagon_M2_vrcmpys_s1rp:
-  si_MInst_disi_s1_rnd_sat        <"vrcmpys",  int_hexagon_M2_vrcmpys_s1rp>;
-
-
-
-
-/********************************************************************
-*            MTYPE/VB                                               *
-*********************************************************************/
-
-// MTYPE / VB / Vector reduce add unsigned bytes.
-def Hexagon_M2_vradduh:
-  si_MInst_didi                   <"vradduh",  int_hexagon_M2_vradduh>;
-
-
-/********************************************************************
-*            ALU64/ALU                                              *
-*********************************************************************/
-
-// ALU64 / ALU / Add.
-def Hexagon_A2_addsp:
-  di_ALU64_sidi                   <"add",      int_hexagon_A2_addsp>;
-def Hexagon_A2_addpsat:
-  di_ALU64_didi                   <"add",      int_hexagon_A2_addpsat>;
-
-def Hexagon_A2_maxp:
-  di_ALU64_didi                   <"max",      int_hexagon_A2_maxp>;
-def Hexagon_A2_maxup:
-  di_ALU64_didi                   <"maxu",     int_hexagon_A2_maxup>;
+// Vector reduce complex multiply real or imaginary
+def : T_PR_pat <M2_vrcmpys_s1,     int_hexagon_M2_vrcmpys_s1>;
+def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
+def : T_PR_pat <M2_vrcmpys_s1rp,   int_hexagon_M2_vrcmpys_s1rp>;
+
+// Vector reduce add unsigned halfwords
+def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
+
+def: T_RP_pat<A2_addsp,   int_hexagon_A2_addsp>;
+def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
+def: T_PP_pat<A2_minp,    int_hexagon_A2_minp>;
+def: T_PP_pat<A2_minup,   int_hexagon_A2_minup>;
+def: T_PP_pat<A2_maxp,    int_hexagon_A2_maxp>;
+def: T_PP_pat<A2_maxup,   int_hexagon_A2_maxup>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
index 77b148b..c80a188 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -12,359 +12,307 @@
 // 80-V9418-12 Rev. A
 // June 15, 2010
 
+// Vector reduce multiply word by signed half (32x16)
+//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
+def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
+def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
+
+//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
+
+// Vector multiply halfwords, signed by unsigned
+// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
+def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
+
+// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
+def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
+// Rxx[^]=vpmpyh(Rs,Rt)
+def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
+// Rxx^=pmpyw(Rs,Rt)
+def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
+
+//Rxx^=asr(Rss,Rt)
+def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
+//Rxx^=asl(Rss,Rt)
+def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
+//Rxx^=lsr(Rss,Rt)
+def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
+//Rxx^=lsl(Rss,Rt)
+def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
+
+// Multiply and use upper result
+def : MType_R32_pat <int_hexagon_M2_mpysu_up, M2_mpysu_up>;
+def : MType_R32_pat <int_hexagon_M2_mpy_up_s1, M2_mpy_up_s1>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyh_s1, M2_hmmpyh_s1>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyl_s1, M2_hmmpyl_s1>;
+def : MType_R32_pat <int_hexagon_M2_mpy_up_s1_sat, M2_mpy_up_s1_sat>;
+
+// Vector reduce add unsigned halfwords
+def : Pat <(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2),
+           (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>;
+
+def : T_P_pat <S2_brevp, int_hexagon_S2_brevp>;
+
+def: T_P_pat  <S2_ct0p,      int_hexagon_S2_ct0p>;
+def: T_P_pat  <S2_ct1p,      int_hexagon_S2_ct1p>;
+def: T_RR_pat<C4_nbitsset,  int_hexagon_C4_nbitsset>;
+def: T_RR_pat<C4_nbitsclr,  int_hexagon_C4_nbitsclr>;
+def: T_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
+
+
+class vcmpImm_pat <InstHexagon MI, Intrinsic IntID, PatLeaf immPred> :
+      Pat <(IntID  (i64 DoubleRegs:$src1), immPred:$src2),
+           (MI (i64 DoubleRegs:$src1), immPred:$src2)>;
+
+def : vcmpImm_pat <A4_vcmpbeqi, int_hexagon_A4_vcmpbeqi, u8ImmPred>;
+def : vcmpImm_pat <A4_vcmpbgti, int_hexagon_A4_vcmpbgti, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmpbgtui, int_hexagon_A4_vcmpbgtui, u7ImmPred>;
+
+def : vcmpImm_pat <A4_vcmpheqi, int_hexagon_A4_vcmpheqi, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmphgti, int_hexagon_A4_vcmphgti, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmphgtui, int_hexagon_A4_vcmphgtui, u7ImmPred>;
+
+def : vcmpImm_pat <A4_vcmpweqi, int_hexagon_A4_vcmpweqi, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmpwgti, int_hexagon_A4_vcmpwgti, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmpwgtui, int_hexagon_A4_vcmpwgtui, u7ImmPred>;
+
+def : T_PP_pat<A4_vcmpbeq_any, int_hexagon_A4_vcmpbeq_any>;
+
+def : T_RR_pat<A4_cmpbeq,   int_hexagon_A4_cmpbeq>;
+def : T_RR_pat<A4_cmpbgt,   int_hexagon_A4_cmpbgt>;
+def : T_RR_pat<A4_cmpbgtu,  int_hexagon_A4_cmpbgtu>;
+def : T_RR_pat<A4_cmpheq,   int_hexagon_A4_cmpheq>;
+def : T_RR_pat<A4_cmphgt,   int_hexagon_A4_cmphgt>;
+def : T_RR_pat<A4_cmphgtu,  int_hexagon_A4_cmphgtu>;
+
+def : T_RI_pat<A4_cmpbeqi,  int_hexagon_A4_cmpbeqi>;
+def : T_RI_pat<A4_cmpbgti,  int_hexagon_A4_cmpbgti>;
+def : T_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
+
+def : T_RI_pat<A4_cmpheqi,  int_hexagon_A4_cmpheqi>;
+def : T_RI_pat<A4_cmphgti,  int_hexagon_A4_cmphgti>;
+def : T_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
+
+def : T_RP_pat <A4_boundscheck, int_hexagon_A4_boundscheck>;
+
+def : T_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>;
+
+def : Pat <(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2,
+                                      IntRegs:$src3),
+           (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>;
+def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>;
+def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
+def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>;
+// Multiply 32x32 and use upper result
+def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
+def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
+
+// Complex multiply 32x16
+def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
+def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
+
+def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
+def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
+
+def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
+def : T_PP_pat<A4_ornp,  int_hexagon_A4_ornp>;
+
+// Complex add/sub halfwords/words
+def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
+def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
+def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
+def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
+
+def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
+def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
+
+// Extract bitfield
+def : T_PP_pat  <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
+def : T_RP_pat  <S4_extract_rp, int_hexagon_S4_extract_rp>;
+def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
+def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
+
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
+
+// Shift an immediate left by register amount
+def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
+
+// Vector reduce maximum halfwords
+def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
+def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
 
-//
-// ALU 32 types.
-//
-
-class si_ALU32_sisi_not<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class di_ALU32_s8si<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
-             [(set DoubleRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+// Vector reduce maximum words
+def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
+def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
 
-class di_ALU32_sis8<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+// Vector reduce minimum halfwords
+def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
+def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
 
-class qi_neg_ALU32_sisi<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+// Vector reduce minimum words
+def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
+def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
 
-class qi_neg_ALU32_sis10<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
-             !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+// Rotate and reduce bytes
+def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
+                                     u2ImmPred:$src3),
+           (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2ImmPred:$src3)>;
+
+// Rotate and reduce bytes with accumulation
+// Rxx+=vrcrotate(Rss,Rt,#u2)
+def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+                                         IntRegs:$src3, u2ImmPred:$src4),
+           (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+                             IntRegs:$src3, u2ImmPred:$src4)>;
+
+// Vector conditional negate
+def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
 
-class qi_neg_ALU32_siu9<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
-             !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+// Logical xor with xor accumulation
+def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
+
+// ALU64 - Vector min/max byte
+def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
+def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
+
+// Shift and add/sub/and/or
+def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
+def : T_IRI_pat <S4_ori_asl_ri,  int_hexagon_S4_ori_asl_ri>;
+def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
+def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
+def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
+def : T_IRI_pat <S4_ori_lsr_ri,  int_hexagon_S4_ori_lsr_ri>;
+def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
+def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
 
-class si_neg_ALU32_sisi<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class si_neg_ALU32_sis8<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
-             !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class si_ALU32_sis8<string opc, Intrinsic IntID>
-  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-
-//
-// SInst Classes.
-//
-class qi_neg_SInst_qiqi<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class qi_SInst_qi_andqiqi_neg<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
-                                     IntRegs:$src3),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, and($src2, !$src3)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
-                                         IntRegs:$src3))]>;
-
-class qi_SInst_qi_andqiqi<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
-                                     IntRegs:$src3),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, and($src2, $src3)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
-                                         IntRegs:$src3))]>;
-
-class qi_SInst_qi_orqiqi_neg<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
-                                     IntRegs:$src3),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, or($src2, !$src3)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
-                                         IntRegs:$src3))]>;
-
-class qi_SInst_qi_orqiqi<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
-                                     IntRegs:$src3),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, or($src2, $src3)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
-                                         IntRegs:$src3))]>;
-
-class si_SInst_si_addsis6<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, add($src2, #$src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
-                                        imm:$src3))]>;
-
-class si_SInst_si_subs6si<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, sub(#$src2, $src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
-                                        IntRegs:$src3))]>;
-
-class di_ALU64_didi_neg<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class di_MInst_dididi_xacc<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                           DoubleRegs:$src2),
-               !strconcat("$dst ^= ", !strconcat(opc , "($src1, $src2)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
-                                             DoubleRegs:$src2))],
-               "$dst2 = $dst">;
-
-class si_MInst_sisisi_and<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
-                                    IntRegs:$src3),
-             !strconcat("$dst &= ", !strconcat(opc , "($src2, $src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
-                                        IntRegs:$src3))]>;
-
-class si_MInst_sisisi_andn<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
-                                    IntRegs:$src3),
-             !strconcat("$dst &= ", !strconcat(opc , "($src2, ~$src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
-                                        IntRegs:$src3))]>;
-
-class si_SInst_sisis10_andi<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s10Imm:$src3),
-             !strconcat("$dst = ", !strconcat(opc ,
-                                              "($src1, and($src2, #$src3))")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
-                                        imm:$src3))]>;
-
-class si_MInst_sisisi_xor<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
-                                    IntRegs:$src3),
-             !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
-                                        IntRegs:$src3))]>;
-
-class si_MInst_sisisi_xorn<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
-                                    IntRegs:$src3),
-             !strconcat("$dst ^= ", !strconcat(opc , "($src2, ~$src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
-                                        IntRegs:$src3))]>;
-
-class si_SInst_sisis10_or<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, s10Imm:$src3),
-             !strconcat("$dst |= ", !strconcat(opc , "($src2, #$src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
-                                        imm:$src3))]>;
-
-class si_MInst_sisisi_or<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
-                                    IntRegs:$src3),
-             !strconcat("$dst |= ", !strconcat(opc , "($src2, $src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
-                                        IntRegs:$src3))]>;
-
-class si_MInst_sisisi_orn<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
-                                    IntRegs:$src3),
-             !strconcat("$dst |= ", !strconcat(opc , "($src2, ~$src3)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
-                                        IntRegs:$src3))]>;
-
-class si_SInst_siu5_sat<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")),
-          [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+// Split bitfield
+def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
+def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>;
 
+def: T_RR_pat<S4_parity,   int_hexagon_S4_parity>;
+
+def: T_RI_pat<S4_ntstbit_i,  int_hexagon_S4_ntstbit_i>;
+def: T_RR_pat<S4_ntstbit_r,  int_hexagon_S4_ntstbit_r>;
+
+def: T_RI_pat<S4_clbaddi,  int_hexagon_S4_clbaddi>;
+def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>;
+def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>;
 
 /********************************************************************
 *            ALU32/ALU                                              *
 *********************************************************************/
 
 // ALU32 / ALU / Logical Operations.
-def Hexagon_A4_orn  : si_ALU32_sisi_not <"or",  int_hexagon_A4_orn>;
-def Hexagon_A4_andn : si_ALU32_sisi_not <"and", int_hexagon_A4_andn>;
-
+def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
+def: T_RR_pat<A4_orn,  int_hexagon_A4_orn>;
 
 /********************************************************************
 *            ALU32/PERM                                             *
 *********************************************************************/
 
-// ALU32 / PERM / Combine Words Into Doublewords.
-def Hexagon_A4_combineir : di_ALU32_s8si  <"combine", int_hexagon_A4_combineir>;
-def Hexagon_A4_combineri : di_ALU32_sis8  <"combine", int_hexagon_A4_combineri>;
-
+// Combine Words Into Doublewords.
+def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32ImmPred>;
+def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32ImmPred>;
 
 /********************************************************************
 *            ALU32/PRED                                             *
 *********************************************************************/
 
-// ALU32 / PRED / Conditional Shift Halfword.
-// ALU32 / PRED / Conditional Sign Extend.
-// ALU32 / PRED / Conditional Zero Extend.
-// ALU32 / PRED / Compare.
-def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>;
-def Hexagon_C4_cmplte  : qi_neg_ALU32_sisi  <"cmp.gt", int_hexagon_C4_cmplte>;
-def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi  <"cmp.gtu",int_hexagon_C4_cmplteu>;
+// Compare
+def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32ImmPred>;
+def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32ImmPred>;
+def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32ImmPred>;
 
-def: T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi>;
-def: T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei>;
-def: T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui>;
-
-// ALU32 / PRED / cmpare To General Register.
-def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>;
-def Hexagon_A4_rcmpneqi: si_neg_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpneqi>;
-def Hexagon_A4_rcmpeq  : si_ALU32_sisi     <"cmp.eq", int_hexagon_A4_rcmpeq>;
-def Hexagon_A4_rcmpeqi : si_ALU32_sis8     <"cmp.eq", int_hexagon_A4_rcmpeqi>;
+def: T_RR_pat<A4_rcmpeq,  int_hexagon_A4_rcmpeq>;
+def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
 
+def: T_RI_pat<A4_rcmpeqi,  int_hexagon_A4_rcmpeqi>;
+def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
 
 /********************************************************************
 *            CR                                                     *
 *********************************************************************/
 
-// CR / Corner Detection Acceleration.
-def Hexagon_C4_fastcorner9:
-  qi_SInst_qiqi<"fastcorner9", int_hexagon_C4_fastcorner9>;
-def Hexagon_C4_fastcorner9_not:
-  qi_neg_SInst_qiqi<"fastcorner9",int_hexagon_C4_fastcorner9_not>;
-
 // CR / Logical Operations On Predicates.
-def Hexagon_C4_and_andn:
-  qi_SInst_qi_andqiqi_neg         <"and",      int_hexagon_C4_and_andn>;
-def Hexagon_C4_and_and:
-  qi_SInst_qi_andqiqi             <"and",      int_hexagon_C4_and_and>;
-def Hexagon_C4_and_orn:
-  qi_SInst_qi_orqiqi_neg          <"and",      int_hexagon_C4_and_orn>;
-def Hexagon_C4_and_or:
-  qi_SInst_qi_orqiqi              <"and",      int_hexagon_C4_and_or>;
-def Hexagon_C4_or_andn:
-  qi_SInst_qi_andqiqi_neg         <"or",       int_hexagon_C4_or_andn>;
-def Hexagon_C4_or_and:
-  qi_SInst_qi_andqiqi             <"or",       int_hexagon_C4_or_and>;
-def Hexagon_C4_or_orn:
-  qi_SInst_qi_orqiqi_neg          <"or",       int_hexagon_C4_or_orn>;
-def Hexagon_C4_or_or:
-  qi_SInst_qi_orqiqi              <"or",       int_hexagon_C4_or_or>;
 
+class qi_CRInst_qiqiqi_pat<Intrinsic IntID, InstHexagon Inst> :
+  Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt, IntRegs:$Ru)),
+      (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs),
+                           (C2_tfrrp IntRegs:$Rt),
+                           (C2_tfrrp IntRegs:$Ru))))>;
+
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_and,   C4_and_and>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_andn,  C4_and_andn>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_or,    C4_and_or>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_orn,   C4_and_orn>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_and,    C4_or_and>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_andn,   C4_or_andn>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_or,     C4_or_or>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_orn,    C4_or_orn>;
 
 /********************************************************************
 *            XTYPE/ALU                                              *
 *********************************************************************/
 
-// XTYPE / ALU / Add And Accumulate.
-def Hexagon_S4_addaddi:
-  si_SInst_si_addsis6             <"add",      int_hexagon_S4_addaddi>;
-def Hexagon_S4_subaddi:
-  si_SInst_si_subs6si             <"add",      int_hexagon_S4_subaddi>;
+// Add And Accumulate.
 
-// XTYPE / ALU / Logical Doublewords.
-def Hexagon_S4_andnp:
-  di_ALU64_didi_neg               <"and",      int_hexagon_A4_andnp>;
-def Hexagon_S4_ornp:
-  di_ALU64_didi_neg               <"or",       int_hexagon_A4_ornp>;
+def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
+def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
 
-// XTYPE / ALU / Logical-logical Doublewords.
-def Hexagon_M4_xor_xacc:
-  di_MInst_dididi_xacc            <"xor",      int_hexagon_M4_xor_xacc>;
 
 // XTYPE / ALU / Logical-logical Words.
-def HEXAGON_M4_and_and:
-  si_MInst_sisisi_and             <"and",      int_hexagon_M4_and_and>;
-def HEXAGON_M4_and_or:
-  si_MInst_sisisi_and             <"or",       int_hexagon_M4_and_or>;
-def HEXAGON_M4_and_xor:
-  si_MInst_sisisi_and             <"xor",      int_hexagon_M4_and_xor>;
-def HEXAGON_M4_and_andn:
-  si_MInst_sisisi_andn            <"and",      int_hexagon_M4_and_andn>;
-def HEXAGON_M4_xor_and:
-  si_MInst_sisisi_xor             <"and",      int_hexagon_M4_xor_and>;
-def HEXAGON_M4_xor_or:
-  si_MInst_sisisi_xor             <"or",       int_hexagon_M4_xor_or>;
-def HEXAGON_M4_xor_andn:
-  si_MInst_sisisi_xorn            <"and",      int_hexagon_M4_xor_andn>;
-def HEXAGON_M4_or_and:
-  si_MInst_sisisi_or              <"and",      int_hexagon_M4_or_and>;
-def HEXAGON_M4_or_or:
-  si_MInst_sisisi_or              <"or",       int_hexagon_M4_or_or>;
-def HEXAGON_M4_or_xor:
-  si_MInst_sisisi_or              <"xor",      int_hexagon_M4_or_xor>;
-def HEXAGON_M4_or_andn:
-  si_MInst_sisisi_orn             <"and",      int_hexagon_M4_or_andn>;
-def HEXAGON_S4_or_andix:
-  si_SInst_sisis10_andi           <"or",       int_hexagon_S4_or_andix>;
-def HEXAGON_S4_or_andi:
-  si_SInst_sisis10_or             <"and",      int_hexagon_S4_or_andi>;
-def HEXAGON_S4_or_ori:
-  si_SInst_sisis10_or             <"or",       int_hexagon_S4_or_ori>;
-
-// XTYPE / ALU / Modulo wrap.
-def HEXAGON_A4_modwrapu:
-  si_ALU64_sisi                   <"modwrap",  int_hexagon_A4_modwrapu>;
-
-// XTYPE / ALU / Round.
-def HEXAGON_A4_cround_ri:
-  si_SInst_siu5                   <"cround",   int_hexagon_A4_cround_ri>;
-def HEXAGON_A4_cround_rr:
-  si_SInst_sisi                   <"cround",   int_hexagon_A4_cround_rr>;
-def HEXAGON_A4_round_ri:
-  si_SInst_siu5                   <"round",    int_hexagon_A4_round_ri>;
-def HEXAGON_A4_round_rr:
-  si_SInst_sisi                   <"round",    int_hexagon_A4_round_rr>;
-def HEXAGON_A4_round_ri_sat:
-  si_SInst_siu5_sat               <"round",    int_hexagon_A4_round_ri_sat>;
-def HEXAGON_A4_round_rr_sat:
-  si_SInst_sisi_sat               <"round",    int_hexagon_A4_round_rr_sat>;
-
-// XTYPE / ALU / Vector reduce add unsigned halfwords.
-// XTYPE / ALU / Vector add bytes.
-// XTYPE / ALU / Vector conditional negate.
-// XTYPE / ALU / Vector maximum bytes.
-// XTYPE / ALU / Vector reduce maximum halfwords.
-// XTYPE / ALU / Vector reduce maximum words.
-// XTYPE / ALU / Vector minimum bytes.
-// XTYPE / ALU / Vector reduce minimum halfwords.
-// XTYPE / ALU / Vector reduce minimum words.
-// XTYPE / ALU / Vector subtract bytes.
-
-
-/********************************************************************
-*            XTYPE/BIT                                              *
-*********************************************************************/
-
-// XTYPE / BIT / Count leading.
-// XTYPE / BIT / Count trailing.
-// XTYPE / BIT / Extract bitfield.
-// XTYPE / BIT / Masked parity.
-// XTYPE / BIT / Bit reverse.
-// XTYPE / BIT / Split bitfield.
-
-
-/********************************************************************
-*            XTYPE/COMPLEX                                          *
-*********************************************************************/
-
-// XTYPE / COMPLEX / Complex add/sub halfwords.
-// XTYPE / COMPLEX / Complex add/sub words.
-// XTYPE / COMPLEX / Complex multiply 32x16.
-// XTYPE / COMPLEX / Vector reduce complex rotate.
-
-
-/********************************************************************
-*            XTYPE/MPY                                              *
-*********************************************************************/
-
-// XTYPE / COMPLEX / Complex add/sub halfwords.
+def : T_RRR_pat <M4_or_xor,   int_hexagon_M4_or_xor>;
+def : T_RRR_pat <M4_and_xor,  int_hexagon_M4_and_xor>;
+def : T_RRR_pat <M4_or_and,   int_hexagon_M4_or_and>;
+def : T_RRR_pat <M4_and_and,  int_hexagon_M4_and_and>;
+def : T_RRR_pat <M4_xor_and,  int_hexagon_M4_xor_and>;
+def : T_RRR_pat <M4_or_or,    int_hexagon_M4_or_or>;
+def : T_RRR_pat <M4_and_or,   int_hexagon_M4_and_or>;
+def : T_RRR_pat <M4_xor_or,   int_hexagon_M4_xor_or>;
+def : T_RRR_pat <M4_or_andn,  int_hexagon_M4_or_andn>;
+def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
+def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
+
+def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
+def : T_RRI_pat <S4_or_andix,  int_hexagon_S4_or_andix>;
+def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
+
+// Modulo wrap.
+def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
+
+// Arithmetic/Convergent round
+// Rd=[cround|round](Rs,Rt)[:sat]
+// Rd=[cround|round](Rs,#u5)[:sat]
+def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
+def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
+
+def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
+def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
+
+def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
+def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
+
+def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
index 1d44b52..60e6b1e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
@@ -1,395 +1,111 @@
-class sf_SInst_sf<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "($src1)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class si_SInst_sf<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "($src1)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class sf_SInst_si<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "($src1)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class sf_SInst_di<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "($src1)")),
-             [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>;
-
-class sf_SInst_df<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "($src1)")),
-             [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>;
-
-class si_SInst_df<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "($src1)")),
-             [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>;
-
-class df_SInst_sf<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "($src1)")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class di_SInst_sf<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "($src1)")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class df_SInst_si<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "($src1)")),
-             [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
-
-class df_SInst_df<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "($src1)")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>;
-
-class di_SInst_df<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "($src1)")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>;
-
-
-class df_SInst_di<string opc, Intrinsic IntID>
-  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "($src1)")),
-             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>;
-
-class sf_MInst_sfsf<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class df_MInst_dfdf<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-           !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-           [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class qi_ALU64_dfdf<string opc, Intrinsic IntID>
-  : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
-           !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-           [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
-
-class qi_ALU64_dfu5<string opc, Intrinsic IntID>
-  : ALU64_ri<(outs PredRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2),
-           !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-           [(set PredRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-
-class sf_MInst_sfsfsf_acc<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
-                                        IntRegs:$dst2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1, $src2)")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1,
-                                          IntRegs:$src2, IntRegs:$dst2))],
-               "$dst2 = $dst">;
-
-class sf_MInst_sfsfsf_nac<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
-                                        IntRegs:$dst2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1, $src2)")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1,
-                                          IntRegs:$src2, IntRegs:$dst2))],
-               "$dst2 = $dst">;
-
-
-class sf_MInst_sfsfsfsi_sc<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2, IntRegs:$src3),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1, $src2, $src3):scale")),
-               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
-                                        IntRegs:$src2, IntRegs:$src3))],
-               "$dst2 = $dst">;
-
-class sf_MInst_sfsfsf_acc_lib<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
-                                        IntRegs:$dst2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1, $src2):lib")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1,
-                                          IntRegs:$src2, IntRegs:$dst2))],
-               "$dst2 = $dst">;
-
-class sf_MInst_sfsfsf_nac_lib<string opc, Intrinsic IntID>
-  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
-                                        IntRegs:$dst2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1, $src2):lib")),
-               [(set IntRegs:$dst, (IntID IntRegs:$src1,
-                                          IntRegs:$src2, IntRegs:$dst2))],
-               "$dst2 = $dst">;
-
-class df_MInst_dfdfdf_acc<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
-                                        DoubleRegs:$dst2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1, $src2)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                          DoubleRegs:$src2, DoubleRegs:$dst2))],
-               "$dst2 = $dst">;
-
-class df_MInst_dfdfdf_nac<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
-                                        DoubleRegs:$dst2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1, $src2)")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                          DoubleRegs:$src2, DoubleRegs:$dst2))],
-               "$dst2 = $dst">;
-
-
-class df_MInst_dfdfdfsi_sc<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
-                                        DoubleRegs:$src2, IntRegs:$src3),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1, $src2, $src3):scale")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
-                                        DoubleRegs:$src2, IntRegs:$src3))],
-               "$dst2 = $dst">;
-
-class df_MInst_dfdfdf_acc_lib<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
-                                        DoubleRegs:$dst2),
-               !strconcat("$dst += ", !strconcat(opc ,
-                                                 "($src1, $src2):lib")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                          DoubleRegs:$src2, DoubleRegs:$dst2))],
-               "$dst2 = $dst">;
-
-class df_MInst_dfdfdf_nac_lib<string opc, Intrinsic IntID>
-  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
-                                        DoubleRegs:$dst2),
-               !strconcat("$dst -= ", !strconcat(opc ,
-                                                 "($src1, $src2):lib")),
-               [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
-                                          DoubleRegs:$src2, DoubleRegs:$dst2))],
-               "$dst2 = $dst">;
-
-class qi_SInst_sfsf<string opc, Intrinsic IntID>
-  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
-
-class qi_SInst_sfu5<string opc, Intrinsic IntID>
-  : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
-             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
-
-class sf_ALU64_u10_pos<string opc, Intrinsic IntID>
-  : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")),
-             [(set IntRegs:$dst, (IntID imm:$src1))]>;
-
-class sf_ALU64_u10_neg<string opc, Intrinsic IntID>
-  : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")),
-             [(set IntRegs:$dst, (IntID imm:$src1))]>;
-
-class df_ALU64_u10_pos<string opc, Intrinsic IntID>
-  : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")),
-             [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
-
-class df_ALU64_u10_neg<string opc, Intrinsic IntID>
-  : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1),
-             !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")),
-             [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
-
-class di_MInst_diu6<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-class di_MInst_diu4_rnd<string opc, Intrinsic IntID>
-  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")),
-          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-class si_MInst_diu4_rnd_sat<string opc, Intrinsic IntID>
-  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd:sat")),
-          [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-class si_SInst_diu4_sat<string opc, Intrinsic IntID>
-  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2),
-          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")),
-          [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
-
-
-def HEXAGON_C4_fastcorner9:
-    qi_SInst_qiqi       <"fastcorner9", int_hexagon_C4_fastcorner9>;
-def HEXAGON_C4_fastcorner9_not:
-    qi_SInst_qiqi <"!fastcorner9", int_hexagon_C4_fastcorner9_not>;
-def HEXAGON_M5_vrmpybuu:
-    di_MInst_didi <"vrmpybu", int_hexagon_M5_vrmpybuu>;
-def HEXAGON_M5_vrmacbuu:
-    di_MInst_dididi_acc <"vrmpybu", int_hexagon_M5_vrmacbuu>;
-def HEXAGON_M5_vrmpybsu:
-    di_MInst_didi <"vrmpybsu", int_hexagon_M5_vrmpybsu>;
-def HEXAGON_M5_vrmacbsu:
-    di_MInst_dididi_acc <"vrmpybsu", int_hexagon_M5_vrmacbsu>;
-def HEXAGON_M5_vmpybuu:
-    di_MInst_sisi <"vmpybu", int_hexagon_M5_vmpybuu>;
-def HEXAGON_M5_vmpybsu:
-    di_MInst_sisi <"vmpybsu", int_hexagon_M5_vmpybsu>;
-def HEXAGON_M5_vmacbuu:
-    di_MInst_disisi_acc <"vmpybu", int_hexagon_M5_vmacbuu>;
-def HEXAGON_M5_vmacbsu:
-    di_MInst_disisi_acc <"vmpybsu", int_hexagon_M5_vmacbsu>;
-def HEXAGON_M5_vdmpybsu:
-    di_MInst_didi_sat <"vdmpybsu", int_hexagon_M5_vdmpybsu>;
-def HEXAGON_M5_vdmacbsu:
-    di_MInst_dididi_acc_sat <"vdmpybsu", int_hexagon_M5_vdmacbsu>;
-def HEXAGON_A5_vaddhubs:
-    si_SInst_didi_sat <"vaddhub", int_hexagon_A5_vaddhubs>;
-def HEXAGON_S5_popcountp:
-    si_SInst_di <"popcount", int_hexagon_S5_popcountp>;
-def HEXAGON_S5_asrhub_rnd_sat_goodsyntax:
-    si_MInst_diu4_rnd_sat <"vasrhub", int_hexagon_S5_asrhub_rnd_sat_goodsyntax>;
-def HEXAGON_S5_asrhub_sat:
-    si_SInst_diu4_sat <"vasrhub", int_hexagon_S5_asrhub_sat>;
-def HEXAGON_S5_vasrhrnd_goodsyntax:
-    di_MInst_diu4_rnd <"vasrh", int_hexagon_S5_vasrhrnd_goodsyntax>;
-def HEXAGON_S2_asr_i_p_rnd:
-    di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p_rnd>;
-def HEXAGON_S2_asr_i_p_rnd_goodsyntax:
-    di_MInst_diu6 <"asrrnd", int_hexagon_S2_asr_i_p_rnd_goodsyntax>;
-def HEXAGON_F2_sfadd:
-    sf_MInst_sfsf <"sfadd", int_hexagon_F2_sfadd>;
-def HEXAGON_F2_sfsub:
-    sf_MInst_sfsf <"sfsub", int_hexagon_F2_sfsub>;
-def HEXAGON_F2_sfmpy:
-    sf_MInst_sfsf <"sfmpy", int_hexagon_F2_sfmpy>;
-def HEXAGON_F2_sffma:
-    sf_MInst_sfsfsf_acc <"sfmpy", int_hexagon_F2_sffma>;
-def HEXAGON_F2_sffma_sc:
-    sf_MInst_sfsfsfsi_sc <"sfmpy", int_hexagon_F2_sffma_sc>;
-def HEXAGON_F2_sffms:
-    sf_MInst_sfsfsf_nac <"sfmpy", int_hexagon_F2_sffms>;
-def HEXAGON_F2_sffma_lib:
-    sf_MInst_sfsfsf_acc_lib <"sfmpy", int_hexagon_F2_sffma_lib>;
-def HEXAGON_F2_sffms_lib:
-    sf_MInst_sfsfsf_nac_lib <"sfmpy", int_hexagon_F2_sffms_lib>;
-def HEXAGON_F2_sfcmpeq:
-    qi_SInst_sfsf <"sfcmp.eq", int_hexagon_F2_sfcmpeq>;
-def HEXAGON_F2_sfcmpgt:
-    qi_SInst_sfsf <"sfcmp.gt", int_hexagon_F2_sfcmpgt>;
-def HEXAGON_F2_sfcmpge:
-    qi_SInst_sfsf <"sfcmp.ge", int_hexagon_F2_sfcmpge>;
-def HEXAGON_F2_sfcmpuo:
-    qi_SInst_sfsf <"sfcmp.uo", int_hexagon_F2_sfcmpuo>;
-def HEXAGON_F2_sfmax:
-    sf_MInst_sfsf <"sfmax", int_hexagon_F2_sfmax>;
-def HEXAGON_F2_sfmin:
-    sf_MInst_sfsf <"sfmin", int_hexagon_F2_sfmin>;
-def HEXAGON_F2_sfclass:
-    qi_SInst_sfu5 <"sfclass", int_hexagon_F2_sfclass>;
-def HEXAGON_F2_sfimm_p:
-    sf_ALU64_u10_pos <"sfmake", int_hexagon_F2_sfimm_p>;
-def HEXAGON_F2_sfimm_n:
-    sf_ALU64_u10_neg <"sfmake", int_hexagon_F2_sfimm_n>;
-def HEXAGON_F2_sffixupn:
-    sf_MInst_sfsf <"sffixupn", int_hexagon_F2_sffixupn>;
-def HEXAGON_F2_sffixupd:
-    sf_MInst_sfsf <"sffixupd", int_hexagon_F2_sffixupd>;
-def HEXAGON_F2_sffixupr:
-    sf_SInst_sf <"sffixupr", int_hexagon_F2_sffixupr>;
-def HEXAGON_F2_dfadd:
-    df_MInst_dfdf <"dfadd", int_hexagon_F2_dfadd>;
-def HEXAGON_F2_dfsub:
-    df_MInst_dfdf <"dfsub", int_hexagon_F2_dfsub>;
-def HEXAGON_F2_dfmpy:
-    df_MInst_dfdf <"dfmpy", int_hexagon_F2_dfmpy>;
-def HEXAGON_F2_dffma:
-    df_MInst_dfdfdf_acc <"dfmpy", int_hexagon_F2_dffma>;
-def HEXAGON_F2_dffms:
-    df_MInst_dfdfdf_nac <"dfmpy", int_hexagon_F2_dffms>;
-def HEXAGON_F2_dffma_lib:
-    df_MInst_dfdfdf_acc_lib <"dfmpy", int_hexagon_F2_dffma_lib>;
-def HEXAGON_F2_dffms_lib:
-    df_MInst_dfdfdf_nac_lib <"dfmpy", int_hexagon_F2_dffms_lib>;
-def HEXAGON_F2_dffma_sc:
-    df_MInst_dfdfdfsi_sc <"dfmpy", int_hexagon_F2_dffma_sc>;
-def HEXAGON_F2_dfmax:
-    df_MInst_dfdf <"dfmax", int_hexagon_F2_dfmax>;
-def HEXAGON_F2_dfmin:
-    df_MInst_dfdf <"dfmin", int_hexagon_F2_dfmin>;
-def HEXAGON_F2_dfcmpeq:
-    qi_ALU64_dfdf <"dfcmp.eq", int_hexagon_F2_dfcmpeq>;
-def HEXAGON_F2_dfcmpgt:
-    qi_ALU64_dfdf <"dfcmp.gt", int_hexagon_F2_dfcmpgt>;
-def HEXAGON_F2_dfcmpge:
-    qi_ALU64_dfdf <"dfcmp.ge", int_hexagon_F2_dfcmpge>;
-def HEXAGON_F2_dfcmpuo:
-    qi_ALU64_dfdf <"dfcmp.uo", int_hexagon_F2_dfcmpuo>;
-def HEXAGON_F2_dfclass:
-    qi_ALU64_dfu5 <"dfclass", int_hexagon_F2_dfclass>;
-def HEXAGON_F2_dfimm_p:
-    df_ALU64_u10_pos <"dfmake", int_hexagon_F2_dfimm_p>;
-def HEXAGON_F2_dfimm_n:
-    df_ALU64_u10_neg <"dfmake", int_hexagon_F2_dfimm_n>;
-def HEXAGON_F2_dffixupn:
-    df_MInst_dfdf <"dffixupn", int_hexagon_F2_dffixupn>;
-def HEXAGON_F2_dffixupd:
-    df_MInst_dfdf <"dffixupd", int_hexagon_F2_dffixupd>;
-def HEXAGON_F2_dffixupr:
-    df_SInst_df <"dffixupr", int_hexagon_F2_dffixupr>;
-def HEXAGON_F2_conv_sf2df:
-    df_SInst_sf <"convert_sf2df", int_hexagon_F2_conv_sf2df>;
-def HEXAGON_F2_conv_df2sf:
-    sf_SInst_df <"convert_df2sf", int_hexagon_F2_conv_df2sf>;
-def HEXAGON_F2_conv_uw2sf:
-    sf_SInst_si <"convert_uw2sf", int_hexagon_F2_conv_uw2sf>;
-def HEXAGON_F2_conv_uw2df:
-    df_SInst_si <"convert_uw2df", int_hexagon_F2_conv_uw2df>;
-def HEXAGON_F2_conv_w2sf:
-    sf_SInst_si <"convert_w2sf", int_hexagon_F2_conv_w2sf>;
-def HEXAGON_F2_conv_w2df:
-    df_SInst_si <"convert_w2df", int_hexagon_F2_conv_w2df>;
-def HEXAGON_F2_conv_ud2sf:
-    sf_SInst_di <"convert_ud2sf", int_hexagon_F2_conv_ud2sf>;
-def HEXAGON_F2_conv_ud2df:
-    df_SInst_di <"convert_ud2df", int_hexagon_F2_conv_ud2df>;
-def HEXAGON_F2_conv_d2sf:
-    sf_SInst_di <"convert_d2sf", int_hexagon_F2_conv_d2sf>;
-def HEXAGON_F2_conv_d2df:
-    df_SInst_di <"convert_d2df", int_hexagon_F2_conv_d2df>;
-def HEXAGON_F2_conv_sf2uw:
-    si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw>;
-def HEXAGON_F2_conv_sf2w:
-    si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w>;
-def HEXAGON_F2_conv_sf2ud:
-    di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud>;
-def HEXAGON_F2_conv_sf2d:
-    di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d>;
-def HEXAGON_F2_conv_df2uw:
-    si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw>;
-def HEXAGON_F2_conv_df2w:
-    si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w>;
-def HEXAGON_F2_conv_df2ud:
-    di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud>;
-def HEXAGON_F2_conv_df2d:
-    di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d>;
-def HEXAGON_F2_conv_sf2uw_chop:
-    si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw_chop>;
-def HEXAGON_F2_conv_sf2w_chop:
-    si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w_chop>;
-def HEXAGON_F2_conv_sf2ud_chop:
-    di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud_chop>;
-def HEXAGON_F2_conv_sf2d_chop:
-    di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d_chop>;
-def HEXAGON_F2_conv_df2uw_chop:
-    si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw_chop>;
-def HEXAGON_F2_conv_df2w_chop:
-    si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w_chop>;
-def HEXAGON_F2_conv_df2ud_chop:
-    di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud_chop>;
-def HEXAGON_F2_conv_df2d_chop:
-    di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d_chop>;
+//===- HexagonIntrinsicsV5.td - V5 Instruction intrinsics --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//Rdd[+]=vrmpybsu(Rss,Rtt)
+//Rdd[+]=vrmpybuu(Rss,Rtt)
+let Predicates = [HasV5T]  in {
+def : T_PP_pat  <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>;
+def : T_PP_pat  <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>;
+
+def : T_PP_pat <M5_vdmpybsu, int_hexagon_M5_vdmpybsu>;
+
+def : T_PPP_pat <M5_vrmacbsu, int_hexagon_M5_vrmacbsu>;
+def : T_PPP_pat <M5_vrmacbuu, int_hexagon_M5_vrmacbuu>;
+//Rxx+=vdmpybsu(Rss,Rtt):sat
+def : T_PPP_pat <M5_vdmacbsu, int_hexagon_M5_vdmacbsu>;
+
+// Vector multiply bytes
+// Rdd=vmpyb[s]u(Rs,Rt)
+def : T_RR_pat <M5_vmpybsu, int_hexagon_M5_vmpybsu>;
+def : T_RR_pat <M5_vmpybuu, int_hexagon_M5_vmpybuu>;
+
+// Rxx+=vmpyb[s]u(Rs,Rt)
+def : T_PRR_pat <M5_vmacbsu, int_hexagon_M5_vmacbsu>;
+def : T_PRR_pat <M5_vmacbuu, int_hexagon_M5_vmacbuu>;
+
+// Rd=vaddhub(Rss,Rtt):sat
+def : T_PP_pat <A5_vaddhubs, int_hexagon_A5_vaddhubs>;
+}
+
+def : T_FF_pat<F2_sfadd, int_hexagon_F2_sfadd>;
+def : T_FF_pat<F2_sfsub, int_hexagon_F2_sfsub>;
+def : T_FF_pat<F2_sfmpy, int_hexagon_F2_sfmpy>;
+def : T_FF_pat<F2_sfmax, int_hexagon_F2_sfmax>;
+def : T_FF_pat<F2_sfmin, int_hexagon_F2_sfmin>;
+
+def : T_FF_pat<F2_sffixupn, int_hexagon_F2_sffixupn>;
+def : T_FF_pat<F2_sffixupd, int_hexagon_F2_sffixupd>;
+def : T_F_pat <F2_sffixupr, int_hexagon_F2_sffixupr>;
+
+def: qi_CRInst_qiqi_pat<C4_fastcorner9,     int_hexagon_C4_fastcorner9>;
+def: qi_CRInst_qiqi_pat<C4_fastcorner9_not, int_hexagon_C4_fastcorner9_not>;
+
+def : T_P_pat <S5_popcountp, int_hexagon_S5_popcountp>;
+def : T_PI_pat <S5_asrhub_sat, int_hexagon_S5_asrhub_sat>;
+
+def : T_PI_pat <S2_asr_i_p_rnd, int_hexagon_S2_asr_i_p_rnd>;
+def : T_PI_pat <S2_asr_i_p_rnd_goodsyntax,
+                int_hexagon_S2_asr_i_p_rnd_goodsyntax>;
+
+def : T_PI_pat <S5_asrhub_rnd_sat_goodsyntax,
+                int_hexagon_S5_asrhub_rnd_sat_goodsyntax>;
+
+def : T_PI_pat <S5_vasrhrnd_goodsyntax, int_hexagon_S5_vasrhrnd_goodsyntax>;
+
+def : T_FFF_pat <F2_sffma, int_hexagon_F2_sffma>;
+def : T_FFF_pat <F2_sffms, int_hexagon_F2_sffms>;
+def : T_FFF_pat <F2_sffma_lib, int_hexagon_F2_sffma_lib>;
+def : T_FFF_pat <F2_sffms_lib, int_hexagon_F2_sffms_lib>;
+def : T_FFFQ_pat <F2_sffma_sc, int_hexagon_F2_sffma_sc>;
+
+// Compare floating-point value
+def : T_FF_pat <F2_sfcmpge, int_hexagon_F2_sfcmpge>;
+def : T_FF_pat <F2_sfcmpuo, int_hexagon_F2_sfcmpuo>;
+def : T_FF_pat <F2_sfcmpeq, int_hexagon_F2_sfcmpeq>;
+def : T_FF_pat <F2_sfcmpgt, int_hexagon_F2_sfcmpgt>;
+
+def : T_DD_pat <F2_dfcmpeq, int_hexagon_F2_dfcmpeq>;
+def : T_DD_pat <F2_dfcmpgt, int_hexagon_F2_dfcmpgt>;
+def : T_DD_pat <F2_dfcmpge, int_hexagon_F2_dfcmpge>;
+def : T_DD_pat <F2_dfcmpuo, int_hexagon_F2_dfcmpuo>;
+
+// Create floating-point value
+def : T_I_pat <F2_sfimm_p, int_hexagon_F2_sfimm_p>;
+def : T_I_pat <F2_sfimm_n, int_hexagon_F2_sfimm_n>;
+def : T_I_pat <F2_dfimm_p, int_hexagon_F2_dfimm_p>;
+def : T_I_pat <F2_dfimm_n, int_hexagon_F2_dfimm_n>;
+
+def : T_DI_pat <F2_dfclass, int_hexagon_F2_dfclass>;
+def : T_FI_pat <F2_sfclass, int_hexagon_F2_sfclass>;
+def : T_F_pat <F2_conv_sf2df, int_hexagon_F2_conv_sf2df>;
+def : T_D_pat <F2_conv_df2sf, int_hexagon_F2_conv_df2sf>;
+def : T_R_pat <F2_conv_uw2sf, int_hexagon_F2_conv_uw2sf>;
+def : T_R_pat <F2_conv_uw2df, int_hexagon_F2_conv_uw2df>;
+def : T_R_pat <F2_conv_w2sf,  int_hexagon_F2_conv_w2sf>;
+def : T_R_pat <F2_conv_w2df,  int_hexagon_F2_conv_w2df>;
+def : T_P_pat <F2_conv_ud2sf, int_hexagon_F2_conv_ud2sf>;
+def : T_P_pat <F2_conv_ud2df, int_hexagon_F2_conv_ud2df>;
+def : T_P_pat <F2_conv_d2sf,  int_hexagon_F2_conv_d2sf>;
+def : T_P_pat <F2_conv_d2df,  int_hexagon_F2_conv_d2df>;
+def : T_F_pat <F2_conv_sf2uw, int_hexagon_F2_conv_sf2uw>;
+def : T_F_pat <F2_conv_sf2w,  int_hexagon_F2_conv_sf2w>;
+def : T_F_pat <F2_conv_sf2ud, int_hexagon_F2_conv_sf2ud>;
+def : T_F_pat <F2_conv_sf2d,  int_hexagon_F2_conv_sf2d>;
+def : T_D_pat <F2_conv_df2uw, int_hexagon_F2_conv_df2uw>;
+def : T_D_pat <F2_conv_df2w,  int_hexagon_F2_conv_df2w>;
+def : T_D_pat <F2_conv_df2ud, int_hexagon_F2_conv_df2ud>;
+def : T_D_pat <F2_conv_df2d,  int_hexagon_F2_conv_df2d>;
+def : T_F_pat <F2_conv_sf2uw_chop, int_hexagon_F2_conv_sf2uw_chop>;
+def : T_F_pat <F2_conv_sf2w_chop,  int_hexagon_F2_conv_sf2w_chop>;
+def : T_F_pat <F2_conv_sf2ud_chop, int_hexagon_F2_conv_sf2ud_chop>;
+def : T_F_pat <F2_conv_sf2d_chop,  int_hexagon_F2_conv_sf2d_chop>;
+def : T_D_pat <F2_conv_df2uw_chop, int_hexagon_F2_conv_df2uw_chop>;
+def : T_D_pat <F2_conv_df2w_chop,  int_hexagon_F2_conv_df2w_chop>;
+def : T_D_pat <F2_conv_df2ud_chop, int_hexagon_F2_conv_df2ud_chop>;
+def : T_D_pat <F2_conv_df2d_chop,  int_hexagon_F2_conv_df2d_chop>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 3f5d6d8..535d1f9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -15,7 +15,6 @@
 #include "Hexagon.h"
 #include "HexagonAsmPrinter.h"
 #include "HexagonMachineFunctionInfo.h"
-#include "MCTargetDesc/HexagonMCInst.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Mangler.h"
@@ -35,11 +34,11 @@ static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol,
     ME = MCBinaryExpr::CreateAdd(ME, MCConstantExpr::Create(MO.getOffset(), MC),
                                  MC);
 
-  return (MCOperand::CreateExpr(ME));
+  return (MCOperand::createExpr(ME));
 }
 
 // Create an MCInst from a MachineInstr
-void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI,
+void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCI,
                             HexagonAsmPrinter& AP) {
   MCI.setOpcode(MI->getOpcode());
 
@@ -54,20 +53,20 @@ void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI,
     case MachineOperand::MO_Register:
       // Ignore all implicit register operands.
       if (MO.isImplicit()) continue;
-      MCO = MCOperand::CreateReg(MO.getReg());
+      MCO = MCOperand::createReg(MO.getReg());
       break;
     case MachineOperand::MO_FPImmediate: {
       APFloat Val = MO.getFPImm()->getValueAPF();
       // FP immediates are used only when setting GPRs, so they may be dealt
       // with like regular immediates from this point on.
-      MCO = MCOperand::CreateImm(*Val.bitcastToAPInt().getRawData());
+      MCO = MCOperand::createImm(*Val.bitcastToAPInt().getRawData());
       break;
     }
     case MachineOperand::MO_Immediate:
-      MCO = MCOperand::CreateImm(MO.getImm());
+      MCO = MCOperand::createImm(MO.getImm());
       break;
     case MachineOperand::MO_MachineBasicBlock:
-      MCO = MCOperand::CreateExpr
+      MCO = MCOperand::createExpr
               (MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(),
                AP.OutContext));
       break;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index cb18df6..7672358 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -27,6 +27,7 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
   // returning the value of the returned struct in a register. This field
   // holds the virtual register into which the sret argument is passed.
   unsigned SRetReturnReg;
+  unsigned StackAlignBaseReg;
   std::vector<MachineInstr*> AllocaAdjustInsts;
   int VarArgsFrameIndex;
   bool HasClobberLR;
@@ -35,10 +36,11 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
   virtual void anchor();
 
 public:
-  HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0),
-    HasEHReturn(false) {}
+  HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseReg(0),
+    HasClobberLR(0), HasEHReturn(false) {}
 
   HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
+                                                    StackAlignBaseReg(0),
                                                     HasClobberLR(0),
                                                     HasEHReturn(false) {}
 
@@ -74,6 +76,9 @@ public:
 
   bool hasEHReturn() const { return HasEHReturn; };
   void setHasEHReturn(bool H = true) { HasEHReturn = H; };
+
+  void setStackAlignBaseVReg(unsigned R) { StackAlignBaseReg = R; }
+  unsigned getStackAlignBaseVReg() const { return StackAlignBaseReg; }
 };
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 97c626f..35f732c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -205,20 +205,17 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
   // are disabled, then these HazardRecs will be disabled.
   const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
-  const TargetMachine &TM = DAG->MF.getTarget();
+  const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
+  const TargetInstrInfo *TII = STI.getInstrInfo();
   delete Top.HazardRec;
   delete Bot.HazardRec;
-  Top.HazardRec =
-      TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer(
-          Itin, DAG);
-  Bot.HazardRec =
-      TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer(
-          Itin, DAG);
+  Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+  Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
 
   delete Top.ResourceModel;
   delete Bot.ResourceModel;
-  Top.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel());
-  Bot.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel());
+  Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
+  Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
 
   assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) &&
          "-misched-topdown incompatible with -misched-bottomup");
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
index 1e023c3..6034344 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -54,11 +54,9 @@ class VLIWResourceModel {
   unsigned TotalPackets;
 
 public:
-VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) :
-    SchedModel(SM), TotalPackets(0) {
-  ResourcesModel =
-      TM.getSubtargetImpl()->getInstrInfo()->CreateTargetScheduleState(
-          *TM.getSubtargetImpl());
+  VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM)
+      : SchedModel(SM), TotalPackets(0) {
+  ResourcesModel = STI.getInstrInfo()->CreateTargetScheduleState(STI);
 
     // This hard requirement could be relaxed,
     // but for now do not let it proceed.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 7edba92..81af4db 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -199,10 +200,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA,
     // of registers by individual passes in the backend. At this time,
     // we don't know the scope of usage and definitions of these
     // instructions.
-    if (MII->getOpcode() == Hexagon::TFR_condset_ii ||
-        MII->getOpcode() == Hexagon::TFR_condset_ri ||
-        MII->getOpcode() == Hexagon::TFR_condset_ir ||
-        MII->getOpcode() == Hexagon::LDriw_pred     ||
+    if (MII->getOpcode() == Hexagon::LDriw_pred     ||
         MII->getOpcode() == Hexagon::STriw_pred)
       return false;
   }
@@ -299,48 +297,48 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
 
   switch (MI->getOpcode()) {
     case Hexagon::C2_cmpeq:
-      return taken ? Hexagon::CMPEQrr_t_Jumpnv_t_V4
-                   : Hexagon::CMPEQrr_t_Jumpnv_nt_V4;
+      return taken ? Hexagon::J4_cmpeq_t_jumpnv_t
+                   : Hexagon::J4_cmpeq_t_jumpnv_nt;
 
     case Hexagon::C2_cmpeqi: {
       if (reg >= 0)
-        return taken ? Hexagon::CMPEQri_t_Jumpnv_t_V4
-                     : Hexagon::CMPEQri_t_Jumpnv_nt_V4;
+        return taken ? Hexagon::J4_cmpeqi_t_jumpnv_t
+                     : Hexagon::J4_cmpeqi_t_jumpnv_nt;
       else
-        return taken ? Hexagon::CMPEQn1_t_Jumpnv_t_V4
-                     : Hexagon::CMPEQn1_t_Jumpnv_nt_V4;
+        return taken ? Hexagon::J4_cmpeqn1_t_jumpnv_t
+                     : Hexagon::J4_cmpeqn1_t_jumpnv_nt;
     }
 
     case Hexagon::C2_cmpgt: {
       if (secondRegNewified)
-        return taken ? Hexagon::CMPLTrr_t_Jumpnv_t_V4
-                     : Hexagon::CMPLTrr_t_Jumpnv_nt_V4;
+        return taken ? Hexagon::J4_cmplt_t_jumpnv_t
+                     : Hexagon::J4_cmplt_t_jumpnv_nt;
       else
-        return taken ? Hexagon::CMPGTrr_t_Jumpnv_t_V4
-                     : Hexagon::CMPGTrr_t_Jumpnv_nt_V4;
+        return taken ? Hexagon::J4_cmpgt_t_jumpnv_t
+                     : Hexagon::J4_cmpgt_t_jumpnv_nt;
     }
 
     case Hexagon::C2_cmpgti: {
       if (reg >= 0)
-        return taken ? Hexagon::CMPGTri_t_Jumpnv_t_V4
-                     : Hexagon::CMPGTri_t_Jumpnv_nt_V4;
+        return taken ? Hexagon::J4_cmpgti_t_jumpnv_t
+                     : Hexagon::J4_cmpgti_t_jumpnv_nt;
       else
-        return taken ? Hexagon::CMPGTn1_t_Jumpnv_t_V4
-                     : Hexagon::CMPGTn1_t_Jumpnv_nt_V4;
+        return taken ? Hexagon::J4_cmpgtn1_t_jumpnv_t
+                     : Hexagon::J4_cmpgtn1_t_jumpnv_nt;
     }
 
     case Hexagon::C2_cmpgtu: {
       if (secondRegNewified)
-        return taken ? Hexagon::CMPLTUrr_t_Jumpnv_t_V4
-                     : Hexagon::CMPLTUrr_t_Jumpnv_nt_V4;
+        return taken ? Hexagon::J4_cmpltu_t_jumpnv_t
+                     : Hexagon::J4_cmpltu_t_jumpnv_nt;
       else
-        return taken ? Hexagon::CMPGTUrr_t_Jumpnv_t_V4
-                     : Hexagon::CMPGTUrr_t_Jumpnv_nt_V4;
+        return taken ? Hexagon::J4_cmpgtu_t_jumpnv_t
+                     : Hexagon::J4_cmpgtu_t_jumpnv_nt;
     }
 
     case Hexagon::C2_cmpgtui:
-      return taken ? Hexagon::CMPGTUri_t_Jumpnv_t_V4
-                   : Hexagon::CMPGTUri_t_Jumpnv_nt_V4;
+      return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t
+                   : Hexagon::J4_cmpgtui_t_jumpnv_nt;
 
     default:
        llvm_unreachable("Could not find matching New Value Jump instruction.");
@@ -355,19 +353,15 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
                << "********** Function: "
                << MF.getName() << "\n");
 
-#if 0
-  // for now disable this, if we move NewValueJump before register
-  // allocation we need this information.
-  LiveVariables &LVs = getAnalysis<LiveVariables>();
-#endif
+  // If we move NewValueJump before register allocation we'll need live variable
+  // analysis here too.
 
   QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
   QRI = static_cast<const HexagonRegisterInfo *>(
       MF.getSubtarget().getRegisterInfo());
   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
 
-  if (!QRI->Subtarget.hasV4TOps() ||
-      DisableNewValueJumps) {
+  if (DisableNewValueJumps) {
     return false;
   }
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
index 5a6de0a..b7f364e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
@@ -66,162 +66,131 @@ def nOneImm : Operand<i32>;
 // Immediate predicates
 //
 def s32ImmPred  : PatLeaf<(i32 imm), [{
-  // s32ImmPred predicate - True if the immediate fits in a 32-bit sign extended
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<32>(v);
 }]>;
 
-def s32_24ImmPred  : PatLeaf<(i32 imm), [{
-  // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
-  // extended field that is a multiple of 0x1000000.
+def s32_0ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<32>(v);
+}]>;
+
+def s31_1ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<31,1>(v);
+}]>;
+
+def s30_2ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<31,1>(v);
+}]>;
+
+def s29_3ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<31,1>(v);
+}]>;
+
+def s22_10ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<22,10>(v);
+}]>;
+
+def s8_24ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<32,24>(v);
+  return isShiftedInt<8,24>(v);
 }]>;
 
-def s32_16s8ImmPred  : PatLeaf<(i32 imm), [{
-  // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
-  // extended field that is a multiple of 0x10000.
+def s16_16ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<24,16>(v);
+  return isShiftedInt<16,16>(v);
 }]>;
 
 def s26_6ImmPred  : PatLeaf<(i32 imm), [{
-  // s26_6ImmPred predicate - True if the immediate fits in a 32-bit
-  // sign extended field.
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedInt<26,6>(v);
 }]>;
 
-
 def s16ImmPred  : PatLeaf<(i32 imm), [{
-  // s16ImmPred predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<16>(v);
 }]>;
 
-
 def s13ImmPred  : PatLeaf<(i32 imm), [{
-  // s13ImmPred predicate - True if the immediate fits in a 13-bit sign extended
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<13>(v);
 }]>;
 
-
 def s12ImmPred  : PatLeaf<(i32 imm), [{
-  // s12ImmPred predicate - True if the immediate fits in a 12-bit
-  // sign extended field.
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<12>(v);
 }]>;
 
 def s11_0ImmPred  : PatLeaf<(i32 imm), [{
-  // s11_0ImmPred predicate - True if the immediate fits in a 11-bit
-  // sign extended field.
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<11>(v);
 }]>;
 
-
 def s11_1ImmPred  : PatLeaf<(i32 imm), [{
-  // s11_1ImmPred predicate - True if the immediate fits in a 12-bit
-  // sign extended field and is a multiple of 2.
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedInt<11,1>(v);
 }]>;
 
-
 def s11_2ImmPred  : PatLeaf<(i32 imm), [{
-  // s11_2ImmPred predicate - True if the immediate fits in a 13-bit
-  // sign extended field and is a multiple of 4.
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedInt<11,2>(v);
 }]>;
 
-
 def s11_3ImmPred  : PatLeaf<(i32 imm), [{
-  // s11_3ImmPred predicate - True if the immediate fits in a 14-bit
-  // sign extended field and is a multiple of 8.
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedInt<11,3>(v);
 }]>;
 
-
 def s10ImmPred  : PatLeaf<(i32 imm), [{
-  // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<10>(v);
 }]>;
 
-
 def s9ImmPred  : PatLeaf<(i32 imm), [{
-  // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<9>(v);
 }]>;
 
 def m9ImmPred  : PatLeaf<(i32 imm), [{
-  // m9ImmPred predicate - True if the immediate fits in a 9-bit magnitude
-  // field. The range of m9 is -255 to 255.
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<9>(v) && (v != -256);
 }]>;
 
 def s8ImmPred  : PatLeaf<(i32 imm), [{
-  // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<8>(v);
 }]>;
 
-
 def s8Imm64Pred  : PatLeaf<(i64 imm), [{
-  // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<8>(v);
 }]>;
 
-
 def s6ImmPred  : PatLeaf<(i32 imm), [{
-  // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<6>(v);
 }]>;
 
-
 def s4_0ImmPred  : PatLeaf<(i32 imm), [{
-  // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<4>(v);
 }]>;
 
-
 def s4_1ImmPred  : PatLeaf<(i32 imm), [{
-  // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
-  // field of 2.
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedInt<4,1>(v);
 }]>;
 
-
 def s4_2ImmPred  : PatLeaf<(i32 imm), [{
-  // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
-  // field that is a multiple of 4.
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedInt<4,2>(v);
 }]>;
 
-
 def s4_3ImmPred  : PatLeaf<(i32 imm), [{
-  // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
-  // field that is a multiple of 8.
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedInt<4,3>(v);
 }]>;
@@ -233,50 +202,61 @@ def u64ImmPred  : PatLeaf<(i64 imm), [{
 }]>;
 
 def u32ImmPred  : PatLeaf<(i32 imm), [{
-  // u32ImmPred predicate - True if the immediate fits in a 32-bit field.
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<32>(v);
 }]>;
 
+def u32_0ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<32>(v);
+}]>;
+
+def u31_1ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<31,1>(v);
+}]>;
+
+def u30_2ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<30,2>(v);
+}]>;
+
+def u29_3ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<29,3>(v);
+}]>;
+
 def u26_6ImmPred  : PatLeaf<(i32 imm), [{
-  // u26_6ImmPred - True if the immediate fits in a 32-bit field and
-  // is a multiple of 64.
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedUInt<26,6>(v);
 }]>;
 
 def u16ImmPred  : PatLeaf<(i32 imm), [{
-  // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<16>(v);
 }]>;
 
 def u16_s8ImmPred  : PatLeaf<(i32 imm), [{
-  // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
-  // extended s8 field.
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedUInt<16,8>(v);
 }]>;
 
+def u16_0ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<16>(v);
+}]>;
+
 def u11_3ImmPred : PatLeaf<(i32 imm), [{
-  // True if the immediate fits in a 14-bit unsigned field, and the lowest
-  // three bits are 0.
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedUInt<11,3>(v);
 }]>;
 
 def u9ImmPred  : PatLeaf<(i32 imm), [{
-  // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<9>(v);
 }]>;
 
-
 def u8ImmPred  : PatLeaf<(i32 imm), [{
-  // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<8>(v);
 }]>;
@@ -288,75 +268,56 @@ def u7StrictPosImmPred : ImmLeaf<i32, [{
 }]>;
 
 def u7ImmPred  : PatLeaf<(i32 imm), [{
-  // u7ImmPred predicate - True if the immediate fits in a 7-bit unsigned
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<7>(v);
 }]>;
 
-
 def u6ImmPred  : PatLeaf<(i32 imm), [{
-  // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<6>(v);
 }]>;
 
 def u6_0ImmPred  : PatLeaf<(i32 imm), [{
-  // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
-  // field. Same as u6ImmPred.
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<6>(v);
 }]>;
 
 def u6_1ImmPred  : PatLeaf<(i32 imm), [{
-  // u6_1ImmPred predicate - True if the immediate fits in a 7-bit unsigned
-  // field that is 1 bit alinged - multiple of 2.
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedUInt<6,1>(v);
 }]>;
 
 def u6_2ImmPred  : PatLeaf<(i32 imm), [{
-  // u6_2ImmPred predicate - True if the immediate fits in a 8-bit unsigned
-  // field that is 2 bits alinged - multiple of 4.
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedUInt<6,2>(v);
 }]>;
 
 def u6_3ImmPred  : PatLeaf<(i32 imm), [{
-  // u6_3ImmPred predicate - True if the immediate fits in a 9-bit unsigned
-  // field that is 3 bits alinged - multiple of 8.
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedUInt<6,3>(v);
 }]>;
 
 def u5ImmPred  : PatLeaf<(i32 imm), [{
-  // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<5>(v);
 }]>;
 
+def u4ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<4>(v);
+}]>;
 
 def u3ImmPred  : PatLeaf<(i32 imm), [{
-  // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<3>(v);
 }]>;
 
-
 def u2ImmPred  : PatLeaf<(i32 imm), [{
-  // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<2>(v);
 }]>;
 
-
 def u1ImmPred  : PatLeaf<(i1 imm), [{
-  // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
-  // field.
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<1>(v);
 }]>;
@@ -499,356 +460,20 @@ let PrintMethod = "printExtOperand" in {
   def u6_3Ext : Operand<i32>;
 }
 
-let PrintMethod = "printImmOperand" in
-def u0AlwaysExt : Operand<i32>;
-
-// Predicates for constant extendable operands
-def s16ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 16-bit sign extended field.
-    return isInt<16>(v);
-  else {
-    if (isInt<16>(v))
-      return true;
-
-    // Return true if extending this immediate is profitable and the value
-    // can fit in a 32-bit signed field.
-    return isConstExtProfitable(Node) && isInt<32>(v);
-  }
-}]>;
-
-def s10ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 10-bit sign extended field.
-    return isInt<10>(v);
-  else {
-    if (isInt<10>(v))
-      return true;
-
-    // Return true if extending this immediate is profitable and the value
-    // can fit in a 32-bit signed field.
-    return isConstExtProfitable(Node) && isInt<32>(v);
-  }
-}]>;
-
-def s9ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 9-bit sign extended field.
-    return isInt<9>(v);
-  else {
-    if (isInt<9>(v))
-      return true;
-
-    // Return true if extending this immediate is profitable and the value
-    // can fit in a 32-bit unsigned field.
-    return isConstExtProfitable(Node) && isInt<32>(v);
-  }
-}]>;
-
-def s8ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 8-bit sign extended field.
-    return isInt<8>(v);
-  else {
-    if (isInt<8>(v))
-      return true;
-
-    // Return true if extending this immediate is profitable and the value
-    // can fit in a 32-bit signed field.
-    return isConstExtProfitable(Node) && isInt<32>(v);
-  }
-}]>;
-
-def s8_16ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate fits in a 8-bit sign extended field.
-    return isInt<8>(v);
-  else {
-    if (isInt<8>(v))
-      return true;
-
-    // Return true if extending this immediate is profitable and the value
-    // can't fit in a 16-bit signed field. This is required to avoid
-    // unnecessary constant extenders.
-    return isConstExtProfitable(Node) && !isInt<16>(v);
-  }
-}]>;
-
-def s6ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 6-bit sign extended field.
-    return isInt<6>(v);
-  else {
-    if (isInt<6>(v))
-      return true;
-
-    // Return true if extending this immediate is profitable and the value
-    // can fit in a 32-bit unsigned field.
-    return isConstExtProfitable(Node) && isInt<32>(v);
-  }
-}]>;
-
-def s6_16ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate fits in a 6-bit sign extended field.
-    return isInt<6>(v);
-  else {
-    if (isInt<6>(v))
-      return true;
-
-    // Return true if extending this immediate is profitable and the value
-    // can't fit in a 16-bit signed field. This is required to avoid
-    // unnecessary constant extenders.
-    return isConstExtProfitable(Node) && !isInt<16>(v);
-  }
-}]>;
-
-def s6_10ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 6-bit sign extended field.
-    return isInt<6>(v);
-  else {
-    if (isInt<6>(v))
-      return true;
-
-    // Return true if extending this immediate is profitable and the value
-    // can't fit in a 10-bit signed field. This is required to avoid
-    // unnecessary constant extenders.
-    return isConstExtProfitable(Node) && !isInt<10>(v);
-  }
-}]>;
-
-def s11_0ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 11-bit sign extended field.
-    return isShiftedInt<11,0>(v);
-  else {
-    if (isInt<11>(v))
-      return true;
-
-    // Return true if extending this immediate is profitable and the value
-    // can fit in a 32-bit signed field.
-    return isConstExtProfitable(Node) && isInt<32>(v);
-  }
-}]>;
-
-def s11_1ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 12-bit sign extended field and
-    // is 2 byte aligned.
-    return isShiftedInt<11,1>(v);
-  else {
-    if (isInt<12>(v))
-      return isShiftedInt<11,1>(v);
-
-    // Return true if extending this immediate is profitable and the low 1 bit
-    // is zero (2-byte aligned).
-    return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0);
-  }
-}]>;
-
-def s11_2ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 13-bit sign extended field and
-    // is 4-byte aligned.
-    return isShiftedInt<11,2>(v);
-  else {
-    if (isInt<13>(v))
-      return isShiftedInt<11,2>(v);
-
-    // Return true if extending this immediate is profitable and the low 2-bits
-    // are zero (4-byte aligned).
-    return isConstExtProfitable(Node)  && isInt<32>(v) && ((v % 4) == 0);
-  }
-}]>;
-
-def s11_3ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 14-bit sign extended field and
-    // is 8-byte aligned.
-    return isShiftedInt<11,3>(v);
-  else {
-    if (isInt<14>(v))
-     return isShiftedInt<11,3>(v);
-
-    // Return true if extending this immediate is profitable and the low 3-bits
-    // are zero (8-byte aligned).
-    return isConstExtProfitable(Node)  && isInt<32>(v) && ((v % 8) == 0);
-  }
-}]>;
-
-def u0AlwaysExtPred : PatLeaf<(i32 imm), [{
-  // Predicate for an unsigned 32-bit value that always needs to be extended.
-  if (Subtarget.hasV4TOps()) {
-    if (isConstExtProfitable(Node)) {
-      int64_t v = (int64_t)N->getSExtValue();
-      return isUInt<32>(v);
-    }
-  }
-  return false;
-}]>;
-
-def u6ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 6-bit unsigned field.
-    return isUInt<6>(v);
-  else {
-    if (isUInt<6>(v))
-      return true;
-
-    // Return true if extending this immediate is profitable and the value
-    // can fit in a 32-bit unsigned field.
-    return isConstExtProfitable(Node) && isUInt<32>(v);
-  }
-}]>;
-
-def u7ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 7-bit unsigned field.
-    return isUInt<7>(v);
-  else {
-    if (isUInt<7>(v))
-      return true;
-
-    // Return true if extending this immediate is profitable and the value
-    // can fit in a 32-bit unsigned field.
-    return isConstExtProfitable(Node) && isUInt<32>(v);
-  }
-}]>;
-
-def u8ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 8-bit unsigned field.
-    return isUInt<8>(v);
-  else {
-    if (isUInt<8>(v))
-      return true;
-
-    // Return true if extending this immediate is profitable and the value
-    // can fit in a 32-bit unsigned field.
-    return isConstExtProfitable(Node) && isUInt<32>(v);
-  }
-}]>;
-
-def u9ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 9-bit unsigned field.
-    return isUInt<9>(v);
-  else {
-    if (isUInt<9>(v))
-      return true;
-
-    // Return true if extending this immediate is profitable and the value
-    // can fit in a 32-bit unsigned field.
-    return isConstExtProfitable(Node) && isUInt<32>(v);
-  }
-}]>;
-
-def u6_1ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 7-bit unsigned field and
-    // is 2-byte aligned.
-    return isShiftedUInt<6,1>(v);
-  else {
-    if (isUInt<7>(v))
-      return isShiftedUInt<6,1>(v);
-
-    // Return true if extending this immediate is profitable and the value
-    // can fit in a 32-bit unsigned field.
-    return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0);
-  }
-}]>;
-
-def u6_2ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 8-bit unsigned field and
-    // is 4-byte aligned.
-    return isShiftedUInt<6,2>(v);
-  else {
-    if (isUInt<8>(v))
-      return isShiftedUInt<6,2>(v);
-
-    // Return true if extending this immediate is profitable and the value
-    // can fit in a 32-bit unsigned field.
-    return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0);
-  }
-}]>;
-
-def u6_3ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (!Subtarget.hasV4TOps())
-    // Return true if the immediate can fit in a 9-bit unsigned field and
-    // is 8-byte aligned.
-    return isShiftedUInt<6,3>(v);
-  else {
-    if (isUInt<9>(v))
-      return isShiftedUInt<6,3>(v);
-
-    // Return true if extending this immediate is profitable and the value
-    // can fit in a 32-bit unsigned field.
-    return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0);
-  }
-}]>;
-
 
 // This complex pattern exists only to create a machine instruction operand
 // of type "frame index". There doesn't seem to be a way to do that directly
 // in the patterns.
 def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>;
 
-// Addressing modes.
-
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
-def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
-def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
-def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
-def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
-def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
-def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
-def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
+// These complex patterns are not strictly necessary, since global address
+// folding will happen during DAG combining. For distinguishing between GA
+// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used.
+def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>;
+def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>;
 
 // Address operands.
 
-def MEMrr : Operand<i32> {
-  let PrintMethod = "printMEMrrOperand";
-  let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-def MEMri : Operand<i32> {
-  let PrintMethod = "printMEMriOperand";
-  let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-def MEMri_s11_2 : Operand<i32>,
-  ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
-  let PrintMethod = "printMEMriOperand";
-  let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
-def FrameIndex : Operand<i32> {
-  let PrintMethod = "printFrameIndexOperand";
-  let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
 let PrintMethod = "printGlobalOperand" in {
   def globaladdress : Operand<i32>;
   def globaladdressExt : Operand<i32>;
@@ -858,7 +483,9 @@ let PrintMethod = "printJumpTable" in
 def jumptablebase : Operand<i32>;
 
 def brtarget : Operand<OtherVT>;
-def brtargetExt : Operand<OtherVT>;
+def brtargetExt : Operand<OtherVT> {
+  let PrintMethod = "printExtBrtarget";
+}
 def calltarget : Operand<i32>;
 
 def bblabel : Operand<i32>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
index e9b2ef6..503bfdb 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -112,7 +112,7 @@ INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole",
 
 bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
   QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  QRI = MF.getTarget().getSubtarget<HexagonSubtarget>().getRegisterInfo();
+  QRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
   MRI = &MF.getRegInfo();
 
   DenseMap<unsigned, unsigned> PeepholeMap;
@@ -271,15 +271,8 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
           switch (Op) {
             case Hexagon::C2_mux:
             case Hexagon::C2_muxii:
-            case Hexagon::TFR_condset_ii:
               NewOp = Op;
               break;
-            case Hexagon::TFR_condset_ri:
-              NewOp = Hexagon::TFR_condset_ir;
-              break;
-            case Hexagon::TFR_condset_ir:
-              NewOp = Hexagon::TFR_condset_ri;
-              break;
             case Hexagon::C2_muxri:
               NewOp = Hexagon::C2_muxir;
               break;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index a64c9df..8f255a0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -30,36 +30,58 @@
 #include "llvm/IR/Type.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
+HexagonRegisterInfo::HexagonRegisterInfo()
+    : HexagonGenRegisterInfo(Hexagon::R31) {}
 
-HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st)
-  : HexagonGenRegisterInfo(Hexagon::R31),
-    Subtarget(st) {
+
+bool HexagonRegisterInfo::isEHReturnCalleeSaveReg(unsigned R) const {
+  return R == Hexagon::R0 || R == Hexagon::R1 || R == Hexagon::R2 ||
+         R == Hexagon::R3 || R == Hexagon::D0 || R == Hexagon::D1;
+}
+
+bool HexagonRegisterInfo::isCalleeSaveReg(unsigned Reg) const {
+  return Hexagon::R16 <= Reg && Reg <= Hexagon::R27;
 }
 
+
 const MCPhysReg *
-HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  static const MCPhysReg CalleeSavedRegsV2[] = {
-    Hexagon::R24,   Hexagon::R25,   Hexagon::R26,   Hexagon::R27, 0
+HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF) const {
+  static const MCPhysReg CallerSavedRegsV4[] = {
+    Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+    Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
+    Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
+    Hexagon::R15, 0
   };
+
+  auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
+  switch (HST.getHexagonArchVersion()) {
+  case HexagonSubtarget::V4:
+  case HexagonSubtarget::V5:
+    return CallerSavedRegsV4;
+  }
+  llvm_unreachable(
+    "Callee saved registers requested for unknown archtecture version");
+}
+
+
+const MCPhysReg *
+HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   static const MCPhysReg CalleeSavedRegsV3[] = {
     Hexagon::R16,   Hexagon::R17,   Hexagon::R18,   Hexagon::R19,
     Hexagon::R20,   Hexagon::R21,   Hexagon::R22,   Hexagon::R23,
     Hexagon::R24,   Hexagon::R25,   Hexagon::R26,   Hexagon::R27, 0
   };
 
-  switch(Subtarget.getHexagonArchVersion()) {
-  case HexagonSubtarget::V1:
-    break;
-  case HexagonSubtarget::V2:
-    return CalleeSavedRegsV2;
-  case HexagonSubtarget::V3:
+  switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
   case HexagonSubtarget::V4:
   case HexagonSubtarget::V5:
     return CalleeSavedRegsV3;
@@ -86,212 +108,153 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
 }
 
 
-const TargetRegisterClass* const*
-HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
-  static const TargetRegisterClass * const CalleeSavedRegClassesV2[] = {
-    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
-    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
-    };
-  static const TargetRegisterClass * const CalleeSavedRegClassesV3[] = {
-    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
-    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
-    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
-    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
-    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
-    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
-  };
-
-  switch(Subtarget.getHexagonArchVersion()) {
-  case HexagonSubtarget::V1:
-    break;
-  case HexagonSubtarget::V2:
-    return CalleeSavedRegClassesV2;
-  case HexagonSubtarget::V3:
-  case HexagonSubtarget::V4:
-  case HexagonSubtarget::V5:
-    return CalleeSavedRegClassesV3;
-  }
-  llvm_unreachable("Callee saved register classes requested for unknown "
-                   "architecture version");
-}
-
 void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                              int SPAdj, unsigned FIOperandNum,
+                                              int SPAdj, unsigned FIOp,
                                               RegScavenger *RS) const {
   //
   // Hexagon_TODO: Do we need to enforce this for Hexagon?
   assert(SPAdj == 0 && "Unexpected");
 
   MachineInstr &MI = *II;
-  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
 
-  // Addressable stack objects are accessed using neg. offsets from %fp.
-  MachineFunction &MF = *MI.getParent()->getParent();
-  const HexagonInstrInfo &TII =
-      *static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+  MachineBasicBlock &MB = *MI.getParent();
+  MachineFunction &MF = *MB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
+  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HII = *HST.getInstrInfo();
+  auto &HFI = *HST.getFrameLowering();
 
-  unsigned FrameReg = getFrameRegister(MF);
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-  if (!TFI->hasFP(MF)) {
+  int FI = MI.getOperand(FIOp).getIndex();
+  int Offset = MFI.getObjectOffset(FI) + MI.getOperand(FIOp+1).getImm();
+  bool HasAlloca = MFI.hasVarSizedObjects();
+  bool HasAlign = needsStackRealignment(MF);
+
+  // XXX: Fixed objects cannot be accessed through SP if there are aligned
+  // objects in the local frame, or if there are dynamically allocated objects.
+  // In such cases, there has to be FP available.
+  if (!HFI.hasFP(MF)) {
+    assert(!HasAlloca && !HasAlign && "This function must have frame pointer");
     // We will not reserve space on the stack for the lr and fp registers.
-    Offset -= 2 * Hexagon_WordSize;
+    Offset -= 8;
   }
 
-  const unsigned FrameSize = MFI.getStackSize();
+  unsigned SP = getStackRegister(), FP = getFrameRegister();
+  unsigned AP = 0;
+  if (MachineInstr *AI = HFI.getAlignaInstr(MF))
+    AP = AI->getOperand(0).getReg();
+  unsigned FrameSize = MFI.getStackSize();
+
+  // Special handling of dbg_value instructions and INLINEASM.
+  if (MI.isDebugValue() || MI.isInlineAsm()) {
+    MI.getOperand(FIOp).ChangeToRegister(SP, false /*isDef*/);
+    MI.getOperand(FIOp+1).ChangeToImmediate(Offset+FrameSize);
+    return;
+  }
 
-  if (!MFI.hasVarSizedObjects() &&
-      TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) &&
-      !TII.isSpillPredRegOp(&MI)) {
-    // Replace frame index with a stack pointer reference.
-    MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), false,
-                                                 false, true);
-    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(FrameSize+Offset);
+  bool UseFP = false, UseAP = false;  // Default: use SP.
+  if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
+    UseFP = HasAlloca || HasAlign;
   } else {
-    // Replace frame index with a frame pointer reference.
-    if (!TII.isValidOffset(MI.getOpcode(), Offset)) {
-
-      // If the offset overflows, then correct it.
-      //
-      // For loads, we do not need a reserved register
-      // r0 = memw(r30 + #10000) to:
-      //
-      // r0 = add(r30, #10000)
-      // r0 = memw(r0)
-      if ( (MI.getOpcode() == Hexagon::L2_loadri_io)  ||
-           (MI.getOpcode() == Hexagon::L2_loadrd_io)   ||
-           (MI.getOpcode() == Hexagon::L2_loadrh_io) ||
-           (MI.getOpcode() == Hexagon::L2_loadruh_io) ||
-           (MI.getOpcode() == Hexagon::L2_loadrb_io) ||
-           (MI.getOpcode() == Hexagon::L2_loadrub_io) ||
-           (MI.getOpcode() == Hexagon::LDriw_f) ||
-           (MI.getOpcode() == Hexagon::LDrid_f)) {
-        unsigned dstReg = (MI.getOpcode() == Hexagon::L2_loadrd_io) ?
-          getSubReg(MI.getOperand(0).getReg(), Hexagon::subreg_loreg) :
-          MI.getOperand(0).getReg();
-
-        // Check if offset can fit in addi.
-        if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
-          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                  TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
-          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                  TII.get(Hexagon::A2_add),
-                  dstReg).addReg(FrameReg).addReg(dstReg);
-        } else {
-          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                  TII.get(Hexagon::ADD_ri),
-                  dstReg).addReg(FrameReg).addImm(Offset);
-        }
-
-        MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
-        MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
-      } else if ((MI.getOpcode() == Hexagon::S2_storeri_io) ||
-                 (MI.getOpcode() == Hexagon::S2_storerd_io) ||
-                 (MI.getOpcode() == Hexagon::S2_storerh_io) ||
-                 (MI.getOpcode() == Hexagon::S2_storerb_io) ||
-                 (MI.getOpcode() == Hexagon::STrid_f) ||
-                 (MI.getOpcode() == Hexagon::STriw_f)) {
-        // For stores, we need a reserved register. Change
-        // memw(r30 + #10000) = r0 to:
-        //
-        // rs = add(r30, #10000);
-        // memw(rs) = r0
-        unsigned resReg = HEXAGON_RESERVED_REG_1;
-
-        // Check if offset can fit in addi.
-        if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
-          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                  TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
-          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                  TII.get(Hexagon::A2_add),
-                  resReg).addReg(FrameReg).addReg(resReg);
-        } else {
-          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                  TII.get(Hexagon::ADD_ri),
-                  resReg).addReg(FrameReg).addImm(Offset);
-        }
-        MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true);
-        MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
-      } else if (TII.isMemOp(&MI)) {
-        // use the constant extender if the instruction provides it
-        // and we are V4TOps.
-        if (Subtarget.hasV4TOps()) {
-          if (TII.isConstExtended(&MI)) {
-            MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
-            MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
-            TII.immediateExtend(&MI);
-          } else {
-            llvm_unreachable("Need to implement for memops");
-          }
-        } else {
-          // Only V3 and older instructions here.
-          unsigned ResReg = HEXAGON_RESERVED_REG_1;
-          if (!MFI.hasVarSizedObjects() &&
-              TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
-            MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(),
-                                                         false, false, false);
-            MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset);
-          } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
-            BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                    TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset);
-            BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                    TII.get(Hexagon::A2_add), ResReg).addReg(FrameReg).
-              addReg(ResReg);
-            MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
-                                                         true);
-            MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
-          } else {
-            BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                    TII.get(Hexagon::ADD_ri), ResReg).addReg(FrameReg).
-              addImm(Offset);
-            MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
-                                                         true);
-            MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
-          }
-        }
-      } else {
-        unsigned dstReg = MI.getOperand(0).getReg();
-        BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
-        BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                TII.get(Hexagon::A2_add),
-                dstReg).addReg(FrameReg).addReg(dstReg);
-        // Can we delete MI??? r2 = add (r2, #0).
-        MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
-        MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
-      }
-    } else {
-      // If the offset is small enough to fit in the immediate field, directly
-      // encode it.
-      MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
-      MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
+    if (HasAlloca) {
+      if (HasAlign)
+        UseAP = true;
+      else
+        UseFP = true;
     }
   }
 
+  unsigned Opc = MI.getOpcode();
+  bool ValidSP = HII.isValidOffset(Opc, FrameSize+Offset);
+  bool ValidFP = HII.isValidOffset(Opc, Offset);
+
+  // Calculate the actual offset in the instruction.
+  int64_t RealOffset = Offset;
+  if (!UseFP && !UseAP)
+    RealOffset = FrameSize+Offset;
+
+  switch (Opc) {
+    case Hexagon::TFR_FIA:
+      MI.setDesc(HII.get(Hexagon::A2_addi));
+      MI.getOperand(FIOp).ChangeToImmediate(RealOffset);
+      MI.RemoveOperand(FIOp+1);
+      return;
+    case Hexagon::TFR_FI:
+      // Set up the instruction for updating below.
+      MI.setDesc(HII.get(Hexagon::A2_addi));
+      break;
+  }
+
+  unsigned BP = 0;
+  bool Valid = false;
+  if (UseFP) {
+    BP = FP;
+    Valid = ValidFP;
+  } else if (UseAP) {
+    BP = AP;
+    Valid = ValidFP;
+  } else {
+    BP = SP;
+    Valid = ValidSP;
+  }
+
+  if (Valid) {
+    MI.getOperand(FIOp).ChangeToRegister(BP, false);
+    MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset);
+    return;
+  }
+
+#ifndef NDEBUG
+  const Function *F = MF.getFunction();
+  dbgs() << "In function ";
+  if (F) dbgs() << F->getName();
+  else   dbgs() << "<?>";
+  dbgs() << ", BB#" << MB.getNumber() << "\n" << MI;
+#endif
+  llvm_unreachable("Unhandled instruction");
 }
 
+
 unsigned HexagonRegisterInfo::getRARegister() const {
   return Hexagon::R31;
 }
 
+
 unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
                                                &MF) const {
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-  if (TFI->hasFP(MF)) {
+  if (TFI->hasFP(MF))
     return Hexagon::R30;
-  }
-
   return Hexagon::R29;
 }
 
+
 unsigned HexagonRegisterInfo::getFrameRegister() const {
   return Hexagon::R30;
 }
 
+
 unsigned HexagonRegisterInfo::getStackRegister() const {
   return Hexagon::R29;
 }
 
+
+bool
+HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+  return MF.getSubtarget().getFrameLowering()->hasFP(MF);
+}
+
+
+bool
+HexagonRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return MFI->getMaxAlignment() > 8;
+}
+
+
+unsigned HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const {
+  return Hexagon::R6;
+}
+
+
 #define GET_REGINFO_TARGET_DESC
 #include "HexagonGenRegisterInfo.inc"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
index a83b502..7edefee 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -37,39 +37,37 @@
 #define HEXAGON_RESERVED_REG_2 Hexagon::R11
 
 namespace llvm {
-
-class HexagonSubtarget;
-class HexagonInstrInfo;
-class Type;
-
-struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
-  HexagonSubtarget &Subtarget;
-
-  HexagonRegisterInfo(HexagonSubtarget &st);
+class HexagonRegisterInfo : public HexagonGenRegisterInfo {
+public:
+  HexagonRegisterInfo();
 
   /// Code Generation virtual methods...
-  const MCPhysReg *
-  getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF)
+        const override;
 
-  const TargetRegisterClass* const*
-  getCalleeSavedRegClasses(const MachineFunction *MF = nullptr) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, unsigned FIOperandNum,
-                           RegScavenger *RS = nullptr) const override;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+        unsigned FIOperandNum, RegScavenger *RS = nullptr) const override;
 
-  /// determineFrameLayout - Determine the size of the frame and maximum call
-  /// frame size.
-  void determineFrameLayout(MachineFunction &MF) const;
-
-  /// requiresRegisterScavenging - returns true since we may need scavenging for
-  /// a temporary register when generating hardware loop instructions.
+  /// Returns true since we may need scavenging for a temporary register
+  /// when generating hardware loop instructions.
   bool requiresRegisterScavenging(const MachineFunction &MF) const override {
     return true;
   }
 
+  /// Returns true. Spill code for predicate registers might need an extra
+  /// register.
+  bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
+    return true;
+  }
+
+  bool needsStackRealignment(const MachineFunction &MF) const override;
+
+  /// Returns true if the frame pointer is valid.
+  bool useFPForScavengingIndex(const MachineFunction &MF) const override;
+
   bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
     return true;
   }
@@ -79,6 +77,13 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
   unsigned getFrameRegister(const MachineFunction &MF) const override;
   unsigned getFrameRegister() const;
   unsigned getStackRegister() const;
+
+  const MCPhysReg *getCallerSavedRegs(const MachineFunction *MF) const;
+
+  unsigned getFirstCallerSavedNonParamReg() const;
+
+  bool isEHReturnCalleeSaveReg(unsigned Reg) const;
+  bool isCalleeSaveReg(unsigned Reg) const;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index decd947..edf1c25 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -163,18 +163,20 @@ let Namespace = "Hexagon" in {
 // FIXME: the register order should be defined in terms of the preferred
 // allocation order...
 //
-def IntRegs : RegisterClass<"Hexagon", [i32,f32], 32,
+def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
                             (add (sequence "R%u", 0, 9),
                                  (sequence "R%u", 12, 28),
                                  R10, R11, R29, R30, R31)> {
 }
 
-def DoubleRegs : RegisterClass<"Hexagon", [i64,f64], 64,
+def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
                                (add (sequence "D%u", 0, 4),
                                     (sequence "D%u", 6, 13), D5, D14, D15)>;
 
 
-def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))>
+def PredRegs : RegisterClass<"Hexagon", 
+                             [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32,
+                             (add (sequence "P%u", 0, 3))>
 {
   let Size = 32;
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 8fdd493..4efb5f7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -48,12 +48,9 @@ using namespace llvm;
 namespace {
 
 class HexagonSplitConst32AndConst64 : public MachineFunctionPass {
-  const HexagonTargetMachine &QTM;
-
  public:
     static char ID;
-    HexagonSplitConst32AndConst64(const HexagonTargetMachine &TM)
-        : MachineFunctionPass(ID), QTM(TM) {}
+    HexagonSplitConst32AndConst64() : MachineFunctionPass(ID) {}
 
     const char *getPassName() const override {
       return "Hexagon Split Const32s and Const64s";
@@ -68,13 +65,13 @@ char HexagonSplitConst32AndConst64::ID = 0;
 bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
 
   const HexagonTargetObjectFile &TLOF =
-      (const HexagonTargetObjectFile &)QTM.getSubtargetImpl()
-          ->getTargetLowering()
-          ->getObjFileLowering();
+      *static_cast<const HexagonTargetObjectFile *>(
+          Fn.getTarget().getObjFileLowering());
   if (TLOF.IsSmallDataEnabled())
     return true;
 
-  const TargetInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
 
   // Loop over all of the basic blocks
   for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
@@ -86,7 +83,8 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
     while (MII != MIE) {
       MachineInstr *MI = MII;
       int Opc = MI->getOpcode();
-      if (Opc == Hexagon::CONST32_set) {
+      if (Opc == Hexagon::CONST32_Int_Real &&
+          MI->getOperand(1).isBlockAddress()) {
         int DestReg = MI->getOperand(0).getReg();
         MachineOperand &Symbol = MI->getOperand (1);
 
@@ -99,69 +97,53 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
         MII = MBB->erase (MI);
         continue;
       }
-      else if (Opc == Hexagon::CONST32_set_jt) {
-        int DestReg = MI->getOperand(0).getReg();
-        MachineOperand &Symbol = MI->getOperand (1);
 
-        BuildMI (*MBB, MII, MI->getDebugLoc(),
-                 TII->get(Hexagon::LO_jt), DestReg).addOperand(Symbol);
-        BuildMI (*MBB, MII, MI->getDebugLoc(),
-                 TII->get(Hexagon::HI_jt), DestReg).addOperand(Symbol);
-        // MBB->erase returns the iterator to the next instruction, which is the
-        // one we want to process next
-        MII = MBB->erase (MI);
-        continue;
-      }
-      else if (Opc == Hexagon::CONST32_Label) {
+      else if (Opc == Hexagon::CONST32_Int_Real ||
+               Opc == Hexagon::CONST32_Float_Real) {
         int DestReg = MI->getOperand(0).getReg();
-        MachineOperand &Symbol = MI->getOperand (1);
 
-        BuildMI (*MBB, MII, MI->getDebugLoc(),
-                 TII->get(Hexagon::LO_label), DestReg).addOperand(Symbol);
-        BuildMI (*MBB, MII, MI->getDebugLoc(),
-                 TII->get(Hexagon::HI_label), DestReg).addOperand(Symbol);
-        // MBB->erase returns the iterator to the next instruction, which is the
-        // one we want to process next
+        // We have to convert an FP immediate into its corresponding integer
+        // representation
+        int64_t ImmValue;
+        if (Opc == Hexagon::CONST32_Float_Real) {
+          APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF();
+          ImmValue = *Val.bitcastToAPInt().getRawData();
+        }
+        else
+          ImmValue = MI->getOperand(1).getImm();
+
+        BuildMI(*MBB, MII, MI->getDebugLoc(),
+                 TII->get(Hexagon::A2_tfrsi), DestReg).addImm(ImmValue);
         MII = MBB->erase (MI);
         continue;
       }
-      else if (Opc == Hexagon::CONST32_Int_Real) {
+      else if (Opc == Hexagon::CONST64_Int_Real ||
+               Opc == Hexagon::CONST64_Float_Real) {
         int DestReg = MI->getOperand(0).getReg();
-        int64_t ImmValue = MI->getOperand(1).getImm ();
 
-        BuildMI (*MBB, MII, MI->getDebugLoc(),
-                 TII->get(Hexagon::LOi), DestReg).addImm(ImmValue);
-        BuildMI (*MBB, MII, MI->getDebugLoc(),
-                 TII->get(Hexagon::HIi), DestReg).addImm(ImmValue);
-        MII = MBB->erase (MI);
-        continue;
-      }
-      else if (Opc == Hexagon::CONST64_Int_Real) {
-        int DestReg = MI->getOperand(0).getReg();
-        int64_t ImmValue = MI->getOperand(1).getImm ();
-        unsigned DestLo = QTM.getSubtargetImpl()->getRegisterInfo()->getSubReg(
-            DestReg, Hexagon::subreg_loreg);
-        unsigned DestHi = QTM.getSubtargetImpl()->getRegisterInfo()->getSubReg(
-            DestReg, Hexagon::subreg_hireg);
+        // We have to convert an FP immediate into its corresponding integer
+        // representation
+        int64_t ImmValue;
+        if (Opc == Hexagon::CONST64_Float_Real) {
+          APFloat Val =  MI->getOperand(1).getFPImm()->getValueAPF();
+          ImmValue = *Val.bitcastToAPInt().getRawData();
+        }
+        else
+          ImmValue = MI->getOperand(1).getImm();
+
+        unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::subreg_loreg);
+        unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::subreg_hireg);
 
         int32_t LowWord = (ImmValue & 0xFFFFFFFF);
         int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF;
 
-        // Lower Registers Lower Half
-        BuildMI (*MBB, MII, MI->getDebugLoc(),
-                 TII->get(Hexagon::LOi), DestLo).addImm(LowWord);
-        // Lower Registers Higher Half
+        BuildMI(*MBB, MII, MI->getDebugLoc(),
+                 TII->get(Hexagon::A2_tfrsi), DestLo).addImm(LowWord);
         BuildMI (*MBB, MII, MI->getDebugLoc(),
-                 TII->get(Hexagon::HIi), DestLo).addImm(LowWord);
-        // Higher Registers Lower Half
-        BuildMI (*MBB, MII, MI->getDebugLoc(),
-                 TII->get(Hexagon::LOi), DestHi).addImm(HighWord);
-        // Higher Registers Higher Half.
-        BuildMI (*MBB, MII, MI->getDebugLoc(),
-                 TII->get(Hexagon::HIi), DestHi).addImm(HighWord);
+                 TII->get(Hexagon::A2_tfrsi), DestHi).addImm(HighWord);
         MII = MBB->erase (MI);
         continue;
-       }
+      }
       ++MII;
     }
   }
@@ -176,6 +158,6 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
 //===----------------------------------------------------------------------===//
 
 FunctionPass *
-llvm::createHexagonSplitConst32AndConst64(const HexagonTargetMachine &TM) {
-  return new HexagonSplitConst32AndConst64(TM);
+llvm::createHexagonSplitConst32AndConst64() {
+  return new HexagonSplitConst32AndConst64();
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
deleted file mode 100644
index a304e65..0000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ /dev/null
@@ -1,235 +0,0 @@
-//===-- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//
-//===----------------------------------------------------------------------===//
-// This pass tries to provide opportunities for better optimization of muxes.
-// The default code generated for something like: flag = (a == b) ? 1 : 3;
-// would be:
-//
-//   {p0 = cmp.eq(r0,r1)}
-//   {r3 = mux(p0,#1,#3)}
-//
-// This requires two packets.  If we use .new predicated immediate transfers,
-// then we can do this in a single packet, e.g.:
-//
-//   {p0 = cmp.eq(r0,r1)
-//    if (p0.new) r3 = #1
-//    if (!p0.new) r3 = #3}
-//
-// Note that the conditional assignments are not generated in .new form here.
-// We assume opptimisically that they will be formed later.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Hexagon.h"
-#include "HexagonMachineFunctionInfo.h"
-#include "HexagonSubtarget.h"
-#include "HexagonTargetMachine.h"
-#include "llvm/CodeGen/LatencyPriorityQueue.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "xfer"
-
-namespace llvm {
-  void initializeHexagonSplitTFRCondSetsPass(PassRegistry&);
-}
-
-
-namespace {
-
-class HexagonSplitTFRCondSets : public MachineFunctionPass {
-    const HexagonTargetMachine &QTM;
-    const HexagonSubtarget &QST;
-
- public:
-    static char ID;
-    HexagonSplitTFRCondSets(const HexagonTargetMachine& TM) :
-      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
-      initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry());
-    }
-
-    const char *getPassName() const override {
-      return "Hexagon Split TFRCondSets";
-    }
-    bool runOnMachineFunction(MachineFunction &Fn) override;
-};
-
-
-char HexagonSplitTFRCondSets::ID = 0;
-
-
-bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
-
-  const TargetInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo();
-
-  // Loop over all of the basic blocks.
-  for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
-       MBBb != MBBe; ++MBBb) {
-    MachineBasicBlock* MBB = MBBb;
-    // Traverse the basic block.
-    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
-         ++MII) {
-      MachineInstr *MI = MII;
-      int Opc1, Opc2;
-      switch(MI->getOpcode()) {
-        case Hexagon::TFR_condset_rr_f:
-        case Hexagon::TFR_condset_rr64_f: {
-          int DestReg = MI->getOperand(0).getReg();
-          int SrcReg1 = MI->getOperand(2).getReg();
-          int SrcReg2 = MI->getOperand(3).getReg();
-
-          if (MI->getOpcode() == Hexagon::TFR_condset_rr_f) {
-            Opc1 = Hexagon::A2_tfrt;
-            Opc2 = Hexagon::A2_tfrf;
-          }
-          else if (MI->getOpcode() == Hexagon::TFR_condset_rr64_f) {
-            Opc1 = Hexagon::A2_tfrpt;
-            Opc2 = Hexagon::A2_tfrpf;
-          }
-
-          // Minor optimization: do not emit the predicated copy if the source
-          // and the destination is the same register.
-          if (DestReg != SrcReg1) {
-            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc1),
-                    DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
-          }
-          if (DestReg != SrcReg2) {
-            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc2),
-                    DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
-          }
-          MII = MBB->erase(MI);
-          --MII;
-          break;
-        }
-        case Hexagon::TFR_condset_ri:
-        case Hexagon::TFR_condset_ri_f: {
-          int DestReg = MI->getOperand(0).getReg();
-          int SrcReg1 = MI->getOperand(2).getReg();
-
-          //  Do not emit the predicated copy if the source and the destination
-          // is the same register.
-          if (DestReg != SrcReg1) {
-            BuildMI(*MBB, MII, MI->getDebugLoc(),
-              TII->get(Hexagon::A2_tfrt), DestReg).
-              addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
-          }
-          if (MI->getOpcode() ==  Hexagon::TFR_condset_ri ) {
-            BuildMI(*MBB, MII, MI->getDebugLoc(),
-              TII->get(Hexagon::C2_cmoveif), DestReg).
-              addReg(MI->getOperand(1).getReg()).
-              addImm(MI->getOperand(3).getImm());
-          } else if (MI->getOpcode() ==  Hexagon::TFR_condset_ri_f ) {
-            BuildMI(*MBB, MII, MI->getDebugLoc(),
-              TII->get(Hexagon::TFRI_cNotPt_f), DestReg).
-              addReg(MI->getOperand(1).getReg()).
-              addFPImm(MI->getOperand(3).getFPImm());
-          }
-
-          MII = MBB->erase(MI);
-          --MII;
-          break;
-        }
-        case Hexagon::TFR_condset_ir:
-        case Hexagon::TFR_condset_ir_f: {
-          int DestReg = MI->getOperand(0).getReg();
-          int SrcReg2 = MI->getOperand(3).getReg();
-
-          if (MI->getOpcode() ==  Hexagon::TFR_condset_ir ) {
-            BuildMI(*MBB, MII, MI->getDebugLoc(),
-              TII->get(Hexagon::C2_cmoveit), DestReg).
-              addReg(MI->getOperand(1).getReg()).
-              addImm(MI->getOperand(2).getImm());
-          } else if (MI->getOpcode() ==  Hexagon::TFR_condset_ir_f ) {
-            BuildMI(*MBB, MII, MI->getDebugLoc(),
-              TII->get(Hexagon::TFRI_cPt_f), DestReg).
-              addReg(MI->getOperand(1).getReg()).
-              addFPImm(MI->getOperand(2).getFPImm());
-          }
-
-          // Do not emit the predicated copy if the source and
-          // the destination is the same register.
-          if (DestReg != SrcReg2) {
-            BuildMI(*MBB, MII, MI->getDebugLoc(),
-              TII->get(Hexagon::A2_tfrf), DestReg).
-              addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
-          }
-          MII = MBB->erase(MI);
-          --MII;
-          break;
-        }
-        case Hexagon::TFR_condset_ii:
-        case Hexagon::TFR_condset_ii_f: {
-          int DestReg = MI->getOperand(0).getReg();
-          int SrcReg1 = MI->getOperand(1).getReg();
-
-          if (MI->getOpcode() ==  Hexagon::TFR_condset_ii ) {
-            int Immed1 = MI->getOperand(2).getImm();
-            int Immed2 = MI->getOperand(3).getImm();
-            BuildMI(*MBB, MII, MI->getDebugLoc(),
-                    TII->get(Hexagon::C2_cmoveit),
-                    DestReg).addReg(SrcReg1).addImm(Immed1);
-            BuildMI(*MBB, MII, MI->getDebugLoc(),
-                    TII->get(Hexagon::C2_cmoveif),
-                    DestReg).addReg(SrcReg1).addImm(Immed2);
-          } else if (MI->getOpcode() ==  Hexagon::TFR_condset_ii_f ) {
-            BuildMI(*MBB, MII, MI->getDebugLoc(),
-                    TII->get(Hexagon::TFRI_cPt_f), DestReg).
-                    addReg(SrcReg1).
-                    addFPImm(MI->getOperand(2).getFPImm());
-            BuildMI(*MBB, MII, MI->getDebugLoc(),
-                    TII->get(Hexagon::TFRI_cNotPt_f), DestReg).
-                    addReg(SrcReg1).
-                    addFPImm(MI->getOperand(3).getFPImm());
-          }
-          MII = MBB->erase(MI);
-          --MII;
-          break;
-        }
-      }
-    }
-  }
-  return true;
-}
-
-}
-
-//===----------------------------------------------------------------------===//
-//                         Public Constructor Functions
-//===----------------------------------------------------------------------===//
-
-static void initializePassOnce(PassRegistry &Registry) {
-  const char *Name = "Hexagon Split TFRCondSets";
-  PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr",
-                              &HexagonSplitTFRCondSets::ID, nullptr, false,
-                              false);
-  Registry.registerPass(*PI, true);
-}
-
-void llvm::initializeHexagonSplitTFRCondSetsPass(PassRegistry &Registry) {
-  CALL_ONCE_INITIALIZATION(initializePassOnce)
-}
-
-FunctionPass*
-llvm::createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM) {
-  return new HexagonSplitTFRCondSets(TM);
-}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index 657893f..d61cc54 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -48,18 +48,17 @@ EnableIEEERndNear(
     cl::Hidden, cl::ZeroOrMore, cl::init(false),
     cl::desc("Generate non-chopped conversion from fp to int."));
 
+static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
+      cl::Hidden, cl::ZeroOrMore, cl::init(false),
+      cl::desc("Disable Hexagon MI Scheduling"));
+
 HexagonSubtarget &
 HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
   // If the programmer has not specified a Hexagon version, default to -mv4.
   if (CPUString.empty())
     CPUString = "hexagonv4";
 
-  if (CPUString == "hexagonv2") {
-    HexagonArchVersion = V2;
-  } else if (CPUString == "hexagonv3") {
-    EnableV3 = true;
-    HexagonArchVersion = V3;
-  } else if (CPUString == "hexagonv4") {
+  if (CPUString == "hexagonv4") {
     HexagonArchVersion = V4;
   } else if (CPUString == "hexagonv5") {
     HexagonArchVersion = V5;
@@ -73,10 +72,9 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
 
 HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
                                    const TargetMachine &TM)
-    : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU.str()),
-      DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32"),
-      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
-      TSInfo(DL), FrameLowering() {
+    : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
+      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+      TSInfo(*TM.getDataLayout()), FrameLowering() {
 
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUString);
@@ -97,3 +95,9 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
 
 // Pin the vtable to this file.
 void HexagonSubtarget::anchor() {}
+
+bool HexagonSubtarget::enableMachineScheduler() const {
+  if (DisableHexagonMISched.getNumOccurrences())
+    return !DisableHexagonMISched;
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index 34e327f..780567b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -39,13 +39,12 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
 
 public:
   enum HexagonArchEnum {
-    V1, V2, V3, V4, V5
+    V4, V5
   };
 
   HexagonArchEnum HexagonArchVersion;
 private:
   std::string CPUString;
-  const DataLayout DL;       // Calculates type size & alignment.
   HexagonInstrInfo InstrInfo;
   HexagonTargetLowering TLInfo;
   HexagonSelectionDAGInfo TSInfo;
@@ -74,7 +73,6 @@ public:
   const HexagonSelectionDAGInfo *getSelectionDAGInfo() const override {
     return &TSInfo;
   }
-  const DataLayout *getDataLayout() const override { return &DL; }
 
   HexagonSubtarget &initializeSubtargetDependencies(StringRef CPU,
                                                     StringRef FS);
@@ -83,18 +81,16 @@ public:
   /// subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
-  bool hasV2TOps () const { return HexagonArchVersion >= V2; }
-  bool hasV2TOpsOnly () const { return HexagonArchVersion == V2; }
-  bool hasV3TOps () const { return HexagonArchVersion >= V3; }
-  bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; }
-  bool hasV4TOps () const { return HexagonArchVersion >= V4; }
-  bool hasV4TOpsOnly () const { return HexagonArchVersion == V4; }
-  bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; }
-  bool hasV5TOps () const { return HexagonArchVersion >= V5; }
-  bool hasV5TOpsOnly () const { return HexagonArchVersion == V5; }
-  bool modeIEEERndNear () const { return ModeIEEERndNear; }
-
-  bool isSubtargetV2() const { return HexagonArchVersion == V2;}
+  bool useMemOps() const { return UseMemOps; }
+  bool hasV5TOps() const { return getHexagonArchVersion() >= V5; }
+  bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; }
+  bool modeIEEERndNear() const { return ModeIEEERndNear; }
+  bool enableMachineScheduler() const override;
+  // Always use the TargetLowering default scheduler.
+  // FIXME: This will use the vliw scheduler which is probably just hurting
+  // compiler time and will be removed eventually anyway.
+  bool enableMachineSchedDefaultSched() const override { return false; }
+
   const std::string &getCPUString () const { return CPUString; }
 
   // Threshold for small data section
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 52aff27..0679866 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -17,8 +17,8 @@
 #include "HexagonMachineScheduler.h"
 #include "HexagonTargetObjectFile.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
@@ -27,15 +27,15 @@
 using namespace llvm;
 
 static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
-      cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
-
-static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
-      cl::Hidden, cl::ZeroOrMore, cl::init(false),
-      cl::desc("Disable Hexagon MI Scheduling"));
+  cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
 
 static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
-      cl::Hidden, cl::ZeroOrMore, cl::init(false),
-      cl::desc("Disable Hexagon CFG Optimization"));
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Disable Hexagon CFG Optimization"));
+
+static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
+  cl::init(true), cl::Hidden, cl::ZeroOrMore,
+  cl::desc("Early expansion of MUX"));
 
 
 /// HexagonTargetMachineModule - Note that this is used on hosts that
@@ -59,6 +59,10 @@ static MachineSchedRegistry
 SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
                     createVLIWMachineSched);
 
+namespace llvm {
+  FunctionPass *createHexagonExpandCondsets();
+}
+
 /// HexagonTargetMachine ctor - Create an ILP32 architecture model.
 ///
 
@@ -69,7 +73,8 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
                                            const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:32-i64:64-a:0-n32", TT, CPU, FS,
+                        Options, RM, CM, OL),
       TLOF(make_unique<HexagonTargetObjectFile>()),
       Subtarget(TT, CPU, FS, *this) {
     initAsmInfo();
@@ -83,14 +88,13 @@ class HexagonPassConfig : public TargetPassConfig {
 public:
   HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
     : TargetPassConfig(TM, PM) {
-    // FIXME: Rather than calling enablePass(&MachineSchedulerID) below, define
-    // HexagonSubtarget::enableMachineScheduler() { return true; }.
-    // That will bypass the SelectionDAG VLIW scheduler, which is probably just
-    // hurting compile time and will be removed eventually anyway.
-    if (DisableHexagonMISched)
-      disablePass(&MachineSchedulerID);
-    else
-      enablePass(&MachineSchedulerID);
+    bool NoOpt = (TM->getOptLevel() == CodeGenOpt::None);
+    if (!NoOpt) {
+      if (EnableExpandCondsets) {
+        Pass *Exp = createHexagonExpandCondsets();
+        insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp));
+      }
+    }
   }
 
   HexagonTargetMachine &getHexagonTargetMachine() const {
@@ -138,33 +142,26 @@ void HexagonPassConfig::addPreRegAlloc() {
 }
 
 void HexagonPassConfig::addPostRegAlloc() {
-  const HexagonTargetMachine &TM = getHexagonTargetMachine();
   if (getOptLevel() != CodeGenOpt::None)
     if (!DisableHexagonCFGOpt)
-      addPass(createHexagonCFGOptimizer(TM), false);
+      addPass(createHexagonCFGOptimizer(), false);
 }
 
 void HexagonPassConfig::addPreSched2() {
-  const HexagonTargetMachine &TM = getHexagonTargetMachine();
-
   addPass(createHexagonCopyToCombine(), false);
   if (getOptLevel() != CodeGenOpt::None)
     addPass(&IfConverterID, false);
-  addPass(createHexagonSplitConst32AndConst64(TM));
+  addPass(createHexagonSplitConst32AndConst64());
 }
 
 void HexagonPassConfig::addPreEmitPass() {
-  const HexagonTargetMachine &TM = getHexagonTargetMachine();
   bool NoOpt = (getOptLevel() == CodeGenOpt::None);
 
   if (!NoOpt)
     addPass(createHexagonNewValueJump(), false);
 
   // Expand Spill code for predicate registers.
-  addPass(createHexagonExpandPredSpillCode(TM), false);
-
-  // Split up TFRcondsets into conditional transfers.
-  addPass(createHexagonSplitTFRCondSets(TM), false);
+  addPass(createHexagonExpandPredSpillCode(), false);
 
   // Create Packets.
   if (!NoOpt) {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
index 4a9f447..5774f7e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -32,8 +32,7 @@ public:
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
   ~HexagonTargetMachine() override;
-
-  const HexagonSubtarget *getSubtargetImpl() const override {
+  const HexagonSubtarget *getSubtargetImpl(const Function &) const override {
     return &Subtarget;
   }
   static unsigned getModuleMatchQuality(const Module &M);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index f4ab5e2..4ea0e0d 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -33,14 +33,10 @@ void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
   InitializeELF(TM.Options.UseInitArray);
 
-  SmallDataSection =
-    getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
-                               ELF::SHF_WRITE | ELF::SHF_ALLOC,
-                               SectionKind::getDataRel());
-  SmallBSSSection =
-    getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
-                               ELF::SHF_WRITE | ELF::SHF_ALLOC,
-                               SectionKind::getBSS());
+  SmallDataSection = getContext().getELFSection(
+      ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+  SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+                                               ELF::SHF_WRITE | ELF::SHF_ALLOC);
 }
 
 // sdata/sbss support taken largely from the MIPS Backend.
@@ -79,14 +75,13 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
 
   if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) {
     Type *Ty = GV->getType()->getElementType();
-    return IsInSmallSection(
-        TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty));
+    return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
   }
 
   return false;
 }
 
-const MCSection *
+MCSection *
 HexagonTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
                                                 SectionKind Kind, Mangler &Mang,
                                                 const TargetMachine &TM) const {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
index c974204..da0eeeb 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -16,8 +16,9 @@
 namespace llvm {
 
   class HexagonTargetObjectFile : public TargetLoweringObjectFileELF {
-    const MCSectionELF *SmallDataSection;
-    const MCSectionELF *SmallBSSSection;
+    MCSectionELF *SmallDataSection;
+    MCSectionELF *SmallBSSSection;
+
   public:
     void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
 
@@ -30,9 +31,9 @@ namespace llvm {
                                 const TargetMachine &TM) const;
 
     bool IsSmallDataEnabled () const;
-    const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
-                                        SectionKind Kind, Mangler &Mang,
-                                        const TargetMachine &TM) const override;
+    MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                                      Mangler &Mang,
+                                      const TargetMachine &TM) const override;
   };
 
 } // namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index c960527..0cc59bc 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -264,8 +264,7 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
 
 
 static bool IsIndirectCall(MachineInstr* MI) {
-  return ((MI->getOpcode() == Hexagon::J2_callr) ||
-          (MI->getOpcode() == Hexagon::CALLRv3));
+  return MI->getOpcode() == Hexagon::J2_callr;
 }
 
 // Reserve resources for constant extender. Trigure an assertion if
@@ -371,7 +370,7 @@ static bool IsDirectJump(MachineInstr* MI) {
 
 static bool IsSchedBarrier(MachineInstr* MI) {
   switch (MI->getOpcode()) {
-  case Hexagon::BARRIER:
+  case Hexagon::Y2_barrier:
     return true;
   }
   return false;
@@ -390,7 +389,9 @@ static bool IsLoopN(MachineInstr *MI) {
 /// callee-saved register.
 static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
                                      const TargetRegisterInfo *TRI) {
-  for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) {
+  for (const MCPhysReg *CSR =
+           TRI->getCalleeSavedRegs(MI->getParent()->getParent());
+       *CSR; ++CSR) {
     unsigned CalleeSavedReg = *CSR;
     if (MI->modifiesRegister(CalleeSavedReg, TRI))
       return true;
@@ -402,10 +403,7 @@ static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
 // or new-value store.
 bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) {
   const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  if ( isCondInst(MI) || QII->mayBeNewStore(MI))
-    return true;
-  else
-    return false;
+  return isCondInst(MI) || QII->mayBeNewStore(MI);
 }
 
 bool HexagonPacketizerList::isCondInst (MachineInstr* MI) {
@@ -721,10 +719,7 @@ bool HexagonPacketizerList::CanPromoteToNewValue(
     MachineBasicBlock::iterator &MII) {
 
   const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  const HexagonRegisterInfo *QRI =
-      (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
-  if (!QRI->Subtarget.hasV4TOps() ||
-      !QII->mayBeNewStore(MI))
+  if (!QII->mayBeNewStore(MI))
     return false;
 
   MachineInstr *PacketMI = PacketSU->getInstr();
@@ -955,6 +950,9 @@ bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
   if (MI->isDebugValue())
     return true;
 
+  if (MI->isCFIInstruction())
+    return false;
+
   // We must print out inline assembly
   if (MI->isInlineAsm())
     return false;
@@ -972,11 +970,10 @@ bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
 // isSoloInstruction: - Returns true for instructions that must be
 // scheduled in their own packet.
 bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) {
-
-  if (MI->isInlineAsm())
+  if (MI->isEHLabel() || MI->isCFIInstruction())
     return true;
 
-  if (MI->isEHLabel())
+  if (MI->isInlineAsm())
     return true;
 
   // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
@@ -1055,84 +1052,82 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
   // first store is not in SLOT0. New value store, new value jump,
   // dealloc_return and memop always take SLOT0.
   // Arch spec 3.4.4.2
-  if (QRI->Subtarget.hasV4TOps()) {
-    if (MCIDI.mayStore() && MCIDJ.mayStore() &&
-       (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) {
-      Dependence = true;
-      return false;
-    }
+  if (MCIDI.mayStore() && MCIDJ.mayStore() &&
+      (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) {
+    Dependence = true;
+    return false;
+  }
 
-    if ((QII->isMemOp(J) && MCIDI.mayStore())
-        || (MCIDJ.mayStore() && QII->isMemOp(I))
-        || (QII->isMemOp(J) && QII->isMemOp(I))) {
-      Dependence = true;
-      return false;
-    }
+  if ((QII->isMemOp(J) && MCIDI.mayStore())
+      || (MCIDJ.mayStore() && QII->isMemOp(I))
+      || (QII->isMemOp(J) && QII->isMemOp(I))) {
+    Dependence = true;
+    return false;
+  }
 
-    //if dealloc_return
-    if (MCIDJ.mayStore() && QII->isDeallocRet(I)) {
-      Dependence = true;
-      return false;
-    }
+  //if dealloc_return
+  if (MCIDJ.mayStore() && QII->isDeallocRet(I)) {
+    Dependence = true;
+    return false;
+  }
 
-    // If an instruction feeds new value jump, glue it.
-    MachineBasicBlock::iterator NextMII = I;
-    ++NextMII;
-    if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) {
-      MachineInstr *NextMI = NextMII;
+  // If an instruction feeds new value jump, glue it.
+  MachineBasicBlock::iterator NextMII = I;
+  ++NextMII;
+  if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) {
+    MachineInstr *NextMI = NextMII;
 
-      bool secondRegMatch = false;
-      bool maintainNewValueJump = false;
+    bool secondRegMatch = false;
+    bool maintainNewValueJump = false;
 
-      if (NextMI->getOperand(1).isReg() &&
-          I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) {
-        secondRegMatch = true;
-        maintainNewValueJump = true;
-      }
+    if (NextMI->getOperand(1).isReg() &&
+        I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) {
+      secondRegMatch = true;
+      maintainNewValueJump = true;
+    }
 
-      if (!secondRegMatch &&
-           I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) {
-        maintainNewValueJump = true;
-      }
+    if (!secondRegMatch &&
+          I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) {
+      maintainNewValueJump = true;
+    }
 
-      for (std::vector<MachineInstr*>::iterator
-            VI = CurrentPacketMIs.begin(),
-             VE = CurrentPacketMIs.end();
-           (VI != VE && maintainNewValueJump); ++VI) {
-        SUnit *PacketSU = MIToSUnit.find(*VI)->second;
+    for (std::vector<MachineInstr*>::iterator
+          VI = CurrentPacketMIs.begin(),
+            VE = CurrentPacketMIs.end();
+          (VI != VE && maintainNewValueJump); ++VI) {
+      SUnit *PacketSU = MIToSUnit.find(*VI)->second;
 
-        // NVJ can not be part of the dual jump - Arch Spec: section 7.8
-        if (PacketSU->getInstr()->getDesc().isCall()) {
-          Dependence = true;
-          break;
-        }
-        // Validate
-        // 1. Packet does not have a store in it.
-        // 2. If the first operand of the nvj is newified, and the second
-        //    operand is also a reg, it (second reg) is not defined in
-        //    the same packet.
-        // 3. If the second operand of the nvj is newified, (which means
-        //    first operand is also a reg), first reg is not defined in
-        //    the same packet.
-        if (PacketSU->getInstr()->getDesc().mayStore()               ||
-            PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe ||
-            // Check #2.
-            (!secondRegMatch && NextMI->getOperand(1).isReg() &&
-             PacketSU->getInstr()->modifiesRegister(
-                               NextMI->getOperand(1).getReg(), QRI)) ||
-            // Check #3.
-            (secondRegMatch &&
-             PacketSU->getInstr()->modifiesRegister(
-                               NextMI->getOperand(0).getReg(), QRI))) {
-          Dependence = true;
-          break;
-        }
+      // NVJ can not be part of the dual jump - Arch Spec: section 7.8
+      if (PacketSU->getInstr()->getDesc().isCall()) {
+        Dependence = true;
+        break;
+      }
+      // Validate
+      // 1. Packet does not have a store in it.
+      // 2. If the first operand of the nvj is newified, and the second
+      //    operand is also a reg, it (second reg) is not defined in
+      //    the same packet.
+      // 3. If the second operand of the nvj is newified, (which means
+      //    first operand is also a reg), first reg is not defined in
+      //    the same packet.
+      if (PacketSU->getInstr()->getDesc().mayStore()               ||
+          PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe ||
+          // Check #2.
+          (!secondRegMatch && NextMI->getOperand(1).isReg() &&
+            PacketSU->getInstr()->modifiesRegister(
+                              NextMI->getOperand(1).getReg(), QRI)) ||
+          // Check #3.
+          (secondRegMatch &&
+            PacketSU->getInstr()->modifiesRegister(
+                              NextMI->getOperand(0).getReg(), QRI))) {
+        Dependence = true;
+        break;
       }
-      if (!Dependence)
-        GlueToNewValueJump = true;
-      else
-        return false;
     }
+    if (!Dependence)
+      GlueToNewValueJump = true;
+    else
+      return false;
   }
 
   if (SUJ->isSucc(SUI)) {
@@ -1254,9 +1249,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
       else if ((DepType == SDep::Order) &&
                !I->hasOrderedMemoryRef() &&
                !J->hasOrderedMemoryRef()) {
-        if (QRI->Subtarget.hasV4TOps() &&
-            // hexagonv4 allows dual store.
-            MCIDI.mayStore() && MCIDJ.mayStore()) {
+        if (MCIDI.mayStore() && MCIDJ.mayStore()) {
           /* do nothing */
         }
         // store followed by store-- not OK on V2
@@ -1278,7 +1271,6 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
       // packetized in a same packet. This implies that the store is using
       // caller's SP. Hence, offset needs to be updated accordingly.
       else if (DepType == SDep::Data
-               && QRI->Subtarget.hasV4TOps()
                && J->getOpcode() == Hexagon::S2_allocframe
                && (I->getOpcode() == Hexagon::S2_storerd_io
                    || I->getOpcode() == Hexagon::S2_storeri_io
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
deleted file mode 100644
index edbe29a..0000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
+++ /dev/null
@@ -1,149 +0,0 @@
-//===-- HexagonVarargsCallingConvention.h - Calling Conventions -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the functions that assign locations to outgoing function
-// arguments. Adapted from the target independent version but this handles
-// calls to varargs functions
-//
-//===----------------------------------------------------------------------===//
-//
-
-
-
-
-static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
-                                    EVT LocVT, CCValAssign::LocInfo LocInfo,
-                                    ISD::ArgFlagsTy ArgFlags,
-                                    Hexagon_CCState &State,
-                                    int NonVarArgsParams,
-                                    int CurrentParam,
-                                    bool ForceMem);
-
-
-static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
-                                 EVT LocVT, CCValAssign::LocInfo LocInfo,
-                                 ISD::ArgFlagsTy ArgFlags,
-                                 Hexagon_CCState &State,
-                                 int NonVarArgsParams,
-                                 int CurrentParam,
-                                 bool ForceMem) {
-  unsigned ByValSize = 0;
-  if (ArgFlags.isByVal() &&
-      ((ByValSize = ArgFlags.getByValSize()) >
-       (MVT(MVT::i64).getSizeInBits() / 8))) {
-    ForceMem = true;
-  }
-
-
-  // Only assign registers for named (non-varargs) arguments
-  if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <=
-                                                  NonVarArgsParams))) {
-
-    if (LocVT == MVT::i32 ||
-        LocVT == MVT::i16 ||
-        LocVT == MVT::i8 ||
-        LocVT == MVT::f32) {
-      static const unsigned RegList1[] = {
-        Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
-        Hexagon::R5
-      };
-      if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
-        State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
-                                         LocVT.getSimpleVT(), LocInfo));
-        return false;
-      }
-    }
-
-    if (LocVT == MVT::i64 ||
-        LocVT == MVT::f64) {
-      static const unsigned RegList2[] = {
-        Hexagon::D0, Hexagon::D1, Hexagon::D2
-      };
-      if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
-        State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
-                                         LocVT.getSimpleVT(), LocInfo));
-        return false;
-      }
-    }
-  }
-
-  const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
-  unsigned Alignment = State.getTarget()
-                           .getSubtargetImpl()
-                           ->getDataLayout()
-                           ->getABITypeAlignment(ArgTy);
-  unsigned Size =
-      State.getTarget().getSubtargetImpl()->getDataLayout()->getTypeSizeInBits(
-          ArgTy) /
-      8;
-
-  // If it's passed by value, then we need the size of the aggregate not of
-  // the pointer.
-  if (ArgFlags.isByVal()) {
-    Size = ByValSize;
-
-    // Hexagon_TODO: Get the alignment of the contained type here.
-    Alignment = 8;
-  }
-
-  unsigned Offset3 = State.AllocateStack(Size, Alignment);
-  State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
-                                   LocVT.getSimpleVT(), LocInfo));
-  return false;
-}
-
-
-static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
-                                    EVT LocVT, CCValAssign::LocInfo LocInfo,
-                                    ISD::ArgFlagsTy ArgFlags,
-                                    Hexagon_CCState &State,
-                                    int NonVarArgsParams,
-                                    int CurrentParam,
-                                    bool ForceMem) {
-
-  if (LocVT == MVT::i32 ||
-      LocVT == MVT::f32) {
-    static const unsigned RegList1[] = {
-      Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
-      Hexagon::R5
-    };
-    if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
-      State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
-                                       LocVT.getSimpleVT(), LocInfo));
-      return false;
-    }
-  }
-
-  if (LocVT == MVT::i64 ||
-      LocVT == MVT::f64) {
-    static const unsigned RegList2[] = {
-      Hexagon::D0, Hexagon::D1, Hexagon::D2
-    };
-    if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
-      State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
-                                       LocVT.getSimpleVT(), LocInfo));
-      return false;
-    }
-  }
-
-  const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
-  unsigned Alignment = State.getTarget()
-                           .getSubtargetImpl()
-                           ->getDataLayout()
-                           ->getABITypeAlignment(ArgTy);
-  unsigned Size =
-      State.getTarget().getSubtargetImpl()->getDataLayout()->getTypeSizeInBits(
-          ArgTy) /
-      8;
-
-  unsigned Offset3 = State.AllocateStack(Size, Alignment);
-  State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
-                                   LocVT.getSimpleVT(), LocInfo));
-  return false;
-}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index bdccf88..155aa9e 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -57,7 +57,7 @@ public:
   ELFHexagonAsmBackend(Target const &T, uint8_t OSABI)
       : HexagonAsmBackend(T), OSABI(OSABI) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     StringRef CPU("HexagonV4");
     return createHexagonELFObjectWriter(OS, OSABI, CPU);
   }
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 8e02f79..6a72f20 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -70,7 +70,7 @@ namespace HexagonII {
     PostInc        = 6   // Post increment addressing mode
   };
 
-  enum MemAccessSize {
+  enum class MemAccessSize {
     NoMemAccess = 0,            // Not a memory acces instruction.
     ByteAccess = 1,             // Byte access instruction (memb).
     HalfWordAccess = 2,         // Half word access instruction (memh).
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index 56c9dc7..fde935b 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -11,6 +11,7 @@
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 #define DEBUG_TYPE "hexagon-elf-writer"
 
@@ -26,8 +27,8 @@ private:
 public:
   HexagonELFObjectWriter(uint8_t OSABI, StringRef C);
 
-  virtual unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
-                                bool IsPCRel) const override;
+  unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
+                        bool IsPCRel) const override;
 };
 }
 
@@ -54,9 +55,9 @@ unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/,
   return Type;
 }
 
-MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS,
                                                    uint8_t OSABI,
                                                    StringRef CPU) {
   MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU);
   return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true);
-}
-\ No newline at end of file
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
new file mode 100644
index 0000000..4bbfbec
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
@@ -0,0 +1,137 @@
+//===-- HexagonFixupKinds.h - Hexagon Specific Fixup Entries --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_HEXAGON_HEXAGONFIXUPKINDS_H
+#define LLVM_HEXAGON_HEXAGONFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace Hexagon {
+enum Fixups {
+  // Branch fixups for R_HEX_B{22,15,7}_PCREL.
+  fixup_Hexagon_B22_PCREL = FirstTargetFixupKind,
+  fixup_Hexagon_B15_PCREL,
+  fixup_Hexagon_B7_PCREL,
+  fixup_Hexagon_LO16,
+  fixup_Hexagon_HI16,
+  fixup_Hexagon_32,
+  fixup_Hexagon_16,
+  fixup_Hexagon_8,
+  fixup_Hexagon_GPREL16_0,
+  fixup_Hexagon_GPREL16_1,
+  fixup_Hexagon_GPREL16_2,
+  fixup_Hexagon_GPREL16_3,
+  fixup_Hexagon_HL16,
+  fixup_Hexagon_B13_PCREL,
+  fixup_Hexagon_B9_PCREL,
+  fixup_Hexagon_B32_PCREL_X,
+  fixup_Hexagon_32_6_X,
+  fixup_Hexagon_B22_PCREL_X,
+  fixup_Hexagon_B15_PCREL_X,
+  fixup_Hexagon_B13_PCREL_X,
+  fixup_Hexagon_B9_PCREL_X,
+  fixup_Hexagon_B7_PCREL_X,
+  fixup_Hexagon_16_X,
+  fixup_Hexagon_12_X,
+  fixup_Hexagon_11_X,
+  fixup_Hexagon_10_X,
+  fixup_Hexagon_9_X,
+  fixup_Hexagon_8_X,
+  fixup_Hexagon_7_X,
+  fixup_Hexagon_6_X,
+  fixup_Hexagon_32_PCREL,
+  fixup_Hexagon_COPY,
+  fixup_Hexagon_GLOB_DAT,
+  fixup_Hexagon_JMP_SLOT,
+  fixup_Hexagon_RELATIVE,
+  fixup_Hexagon_PLT_B22_PCREL,
+  fixup_Hexagon_GOTREL_LO16,
+  fixup_Hexagon_GOTREL_HI16,
+  fixup_Hexagon_GOTREL_32,
+  fixup_Hexagon_GOT_LO16,
+  fixup_Hexagon_GOT_HI16,
+  fixup_Hexagon_GOT_32,
+  fixup_Hexagon_GOT_16,
+  fixup_Hexagon_DTPMOD_32,
+  fixup_Hexagon_DTPREL_LO16,
+  fixup_Hexagon_DTPREL_HI16,
+  fixup_Hexagon_DTPREL_32,
+  fixup_Hexagon_DTPREL_16,
+  fixup_Hexagon_GD_PLT_B22_PCREL,
+  fixup_Hexagon_LD_PLT_B22_PCREL,
+  fixup_Hexagon_GD_GOT_LO16,
+  fixup_Hexagon_GD_GOT_HI16,
+  fixup_Hexagon_GD_GOT_32,
+  fixup_Hexagon_GD_GOT_16,
+  fixup_Hexagon_LD_GOT_LO16,
+  fixup_Hexagon_LD_GOT_HI16,
+  fixup_Hexagon_LD_GOT_32,
+  fixup_Hexagon_LD_GOT_16,
+  fixup_Hexagon_IE_LO16,
+  fixup_Hexagon_IE_HI16,
+  fixup_Hexagon_IE_32,
+  fixup_Hexagon_IE_16,
+  fixup_Hexagon_IE_GOT_LO16,
+  fixup_Hexagon_IE_GOT_HI16,
+  fixup_Hexagon_IE_GOT_32,
+  fixup_Hexagon_IE_GOT_16,
+  fixup_Hexagon_TPREL_LO16,
+  fixup_Hexagon_TPREL_HI16,
+  fixup_Hexagon_TPREL_32,
+  fixup_Hexagon_TPREL_16,
+  fixup_Hexagon_6_PCREL_X,
+  fixup_Hexagon_GOTREL_32_6_X,
+  fixup_Hexagon_GOTREL_16_X,
+  fixup_Hexagon_GOTREL_11_X,
+  fixup_Hexagon_GOT_32_6_X,
+  fixup_Hexagon_GOT_16_X,
+  fixup_Hexagon_GOT_11_X,
+  fixup_Hexagon_DTPREL_32_6_X,
+  fixup_Hexagon_DTPREL_16_X,
+  fixup_Hexagon_DTPREL_11_X,
+  fixup_Hexagon_GD_GOT_32_6_X,
+  fixup_Hexagon_GD_GOT_16_X,
+  fixup_Hexagon_GD_GOT_11_X,
+  fixup_Hexagon_LD_GOT_32_6_X,
+  fixup_Hexagon_LD_GOT_16_X,
+  fixup_Hexagon_LD_GOT_11_X,
+  fixup_Hexagon_IE_32_6_X,
+  fixup_Hexagon_IE_16_X,
+  fixup_Hexagon_IE_GOT_32_6_X,
+  fixup_Hexagon_IE_GOT_16_X,
+  fixup_Hexagon_IE_GOT_11_X,
+  fixup_Hexagon_TPREL_32_6_X,
+  fixup_Hexagon_TPREL_16_X,
+  fixup_Hexagon_TPREL_11_X,
+
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+enum FixupBitmaps : unsigned {
+  Word8 = 0xff,
+  Word16 = 0xffff,
+  Word32 = 0xffffffff,
+  Word32_LO = 0x00c03fff,
+  Word32_HL = 0x0, // Not Implemented
+  Word32_GP = 0x0, // Not Implemented
+  Word32_B7 = 0x00001f18,
+  Word32_B9 = 0x003000fe,
+  Word32_B13 = 0x00202ffe,
+  Word32_B15 = 0x00df20fe,
+  Word32_B22 = 0x01ff3ffe,
+  Word32_R6 = 0x000007e0,
+  Word32_U6 = 0x0,  // Not Implemented
+  Word32_U16 = 0x0, // Not Implemented
+  Word32_X26 = 0x0fff3fff
+};
+} // namespace Hexagon
+} // namespace llvm
+
+#endif // LLVM_HEXAGON_HEXAGONFIXUPKINDS_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 5c6f036..15cda71 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -14,7 +14,7 @@
 #include "HexagonAsmPrinter.h"
 #include "Hexagon.h"
 #include "HexagonInstPrinter.h"
-#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
@@ -73,50 +73,46 @@ StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
   return MII.getName(Opcode);
 }
 
-StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
-  return getRegisterName(RegNo);
+void HexagonInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << getRegisterName(RegNo);
 }
 
-void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                   StringRef Annot) {
-  printInst((const HexagonMCInst*)(MI), O, Annot);
-}
-
-void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O,
-                                   StringRef Annot) {
+void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O,
+                                   StringRef Annot,
+                                   const MCSubtargetInfo &STI) {
   const char startPacket = '{',
              endPacket = '}';
   // TODO: add outer HW loop when it's supported too.
   if (MI->getOpcode() == Hexagon::ENDLOOP0) {
     // Ending a harware loop is different from ending an regular packet.
-    assert(MI->isPacketEnd() && "Loop-end must also end the packet");
+    assert(HexagonMCInstrInfo::isPacketEnd(*MI) && "Loop-end must also end the packet");
 
-    if (MI->isPacketBegin()) {
+    if (HexagonMCInstrInfo::isPacketBegin(*MI)) {
       // There must be a packet to end a loop.
       // FIXME: when shuffling is always run, this shouldn't be needed.
-      HexagonMCInst Nop;
+      MCInst Nop;
       StringRef NoAnnot;
 
       Nop.setOpcode (Hexagon::A2_nop);
-      Nop.setPacketBegin (MI->isPacketBegin());
-      printInst (&Nop, O, NoAnnot);
+      HexagonMCInstrInfo::setPacketBegin (Nop, HexagonMCInstrInfo::isPacketBegin(*MI));
+      printInst (&Nop, O, NoAnnot, STI);
     }
 
     // Close the packet.
-    if (MI->isPacketEnd())
+    if (HexagonMCInstrInfo::isPacketEnd(*MI))
       O << PacketPadding << endPacket;
 
     printInstruction(MI, O);
   }
   else {
     // Prefix the insn opening the packet.
-    if (MI->isPacketBegin())
+    if (HexagonMCInstrInfo::isPacketBegin(*MI))
       O << PacketPadding << startPacket << '\n';
 
     printInstruction(MI, O);
 
     // Suffix the insn closing the packet.
-    if (MI->isPacketEnd())
+    if (HexagonMCInstrInfo::isPacketEnd(*MI))
       // Suffix the packet in a new line always, since the GNU assembler has
       // issues with a closing brace on the same line as CONST{32,64}.
       O << '\n' << PacketPadding << endPacket;
@@ -130,7 +126,7 @@ void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   const MCOperand& MO = MI->getOperand(OpNo);
 
   if (MO.isReg()) {
-    O << getRegisterName(MO.getReg());
+    printRegName(O, MO.getReg());
   } else if(MO.isExpr()) {
     O << *MO.getExpr();
   } else if(MO.isImm()) {
@@ -192,7 +188,7 @@ void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo,
   const MCOperand& MO0 = MI->getOperand(OpNo);
   const MCOperand& MO1 = MI->getOperand(OpNo + 1);
 
-  O << getRegisterName(MO0.getReg());
+  printRegName(O, MO0.getReg());
   O << " + #" << MO1.getImm();
 }
 
@@ -201,7 +197,8 @@ void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
   const MCOperand& MO0 = MI->getOperand(OpNo);
   const MCOperand& MO1 = MI->getOperand(OpNo + 1);
 
-  O << getRegisterName(MO0.getReg()) << ", #" << MO1.getImm();
+  printRegName(O, MO0.getReg());
+  O << ", #" << MO1.getImm();
 }
 
 void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo,
@@ -252,3 +249,17 @@ void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo,
   printOperand(MI, OpNo, O);
   O << ')';
 }
+
+void HexagonInstPrinter::printExtBrtarget(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) const {
+  const MCOperand &MO = MI->getOperand(OpNo);
+  const MCInstrDesc &MII = getMII().get(MI->getOpcode());
+
+  assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) &&
+         "Expecting an extendable operand");
+
+  if (MO.isExpr() || isExtended(MII.TSFlags)) {
+    O << "##";
+  }
+  printOperand(MI, OpNo, O);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index 55ae95c..3fedaed 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -18,20 +18,18 @@
 #include "llvm/MC/MCInstrInfo.h"
 
 namespace llvm {
-  class HexagonMCInst;
-
   class HexagonInstPrinter : public MCInstPrinter {
   public:
-    explicit HexagonInstPrinter(const MCAsmInfo &MAI,
-                                const MCInstrInfo &MII,
-                                const MCRegisterInfo &MRI)
+    explicit HexagonInstPrinter(MCAsmInfo const &MAI,
+                                MCInstrInfo const &MII,
+                                MCRegisterInfo const &MRI)
       : MCInstPrinter(MAI, MII, MRI), MII(MII) {}
 
-    void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
-    void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot);
+    void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
+                   const MCSubtargetInfo &STI) override;
     virtual StringRef getOpcodeName(unsigned Opcode) const;
     void printInstruction(const MCInst *MI, raw_ostream &O);
-    StringRef getRegName(unsigned RegNo) const;
+    void printRegName(raw_ostream &OS, unsigned RegNo) const override;
     static const char *getRegisterName(unsigned RegNo);
 
     void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
@@ -58,6 +56,7 @@ namespace llvm {
     void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
            const;
     void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+    void printExtBrtarget(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
 
     void printConstantPool(const MCInst *MI, unsigned OpNo,
                            raw_ostream &O) const;
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 487872a..ae3953a 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -9,8 +9,9 @@
 
 #include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonFixupKinds.h"
 #include "MCTargetDesc/HexagonMCCodeEmitter.h"
-#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -35,10 +36,11 @@ namespace {
 /// Possible values for instruction packet parse field.
 enum class ParseField { duplex = 0x0, last0 = 0x1, last1 = 0x2, end = 0x3 };
 /// \brief Returns the packet bits based on instruction position.
-uint32_t getPacketBits(HexagonMCInst const &HMI) {
+uint32_t getPacketBits(MCInst const &HMI) {
   unsigned const ParseFieldOffset = 14;
-  ParseField Field = HMI.isPacketEnd() ? ParseField::end : ParseField::last0;
-  return static_cast <uint32_t> (Field) << ParseFieldOffset;
+  ParseField Field = HexagonMCInstrInfo::isPacketEnd(HMI) ? ParseField::end
+                                                          : ParseField::last0;
+  return static_cast<uint32_t>(Field) << ParseFieldOffset;
 }
 void emitLittleEndian(uint64_t Binary, raw_ostream &OS) {
   OS << static_cast<uint8_t>((Binary >> 0x00) & 0xff);
@@ -49,20 +51,455 @@ void emitLittleEndian(uint64_t Binary, raw_ostream &OS) {
 }
 
 HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII,
-                                           MCSubtargetInfo const &aMST,
                                            MCContext &aMCT)
-    : MST(aMST), MCT(aMCT) {}
+    : MCT(aMCT), MCII(aMII), Addend(new unsigned(0)),
+      Extended(new bool(false)) {}
 
-void HexagonMCCodeEmitter::EncodeInstruction(MCInst const &MI, raw_ostream &OS,
+void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS,
                                              SmallVectorImpl<MCFixup> &Fixups,
                                              MCSubtargetInfo const &STI) const {
-  HexagonMCInst const &HMB = static_cast<HexagonMCInst const &>(MI);
-  uint64_t Binary = getBinaryCodeForInstr(HMB, Fixups, STI) | getPacketBits(HMB);
-  assert(HMB.getDesc().getSize() == 4 && "All instructions should be 32bit");
+  uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI) | getPacketBits(MI);
+  assert(HexagonMCInstrInfo::getDesc(MCII, MI).getSize() == 4 &&
+         "All instructions should be 32bit");
+  (void)&MCII;
   emitLittleEndian(Binary, OS);
   ++MCNumEmitted;
 }
 
+static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
+                                      const MCOperand &MO,
+                                      const MCSymbolRefExpr::VariantKind kind) {
+  const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI);
+  unsigned insnType = llvm::HexagonMCInstrInfo::getType(MCII, MI);
+
+  if (insnType == HexagonII::TypePREFIX) {
+    switch (kind) {
+    case llvm::MCSymbolRefExpr::VK_GOTOFF:
+      return Hexagon::fixup_Hexagon_GOTREL_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_GOT:
+      return Hexagon::fixup_Hexagon_GOT_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_TPREL:
+      return Hexagon::fixup_Hexagon_TPREL_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_DTPREL:
+      return Hexagon::fixup_Hexagon_DTPREL_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+      return Hexagon::fixup_Hexagon_GD_GOT_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+      return Hexagon::fixup_Hexagon_LD_GOT_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+      return Hexagon::fixup_Hexagon_IE_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+      return Hexagon::fixup_Hexagon_IE_GOT_32_6_X;
+    default:
+      if (MCID.isBranch())
+        return Hexagon::fixup_Hexagon_B32_PCREL_X;
+      else
+        return Hexagon::fixup_Hexagon_32_6_X;
+    }
+  } else if (MCID.isBranch())
+    return (Hexagon::fixup_Hexagon_B13_PCREL);
+
+  switch (MCID.getOpcode()) {
+  case Hexagon::HI:
+  case Hexagon::A2_tfrih:
+    switch (kind) {
+    case llvm::MCSymbolRefExpr::VK_GOT:
+      return Hexagon::fixup_Hexagon_GOT_HI16;
+    case llvm::MCSymbolRefExpr::VK_GOTOFF:
+      return Hexagon::fixup_Hexagon_GOTREL_HI16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+      return Hexagon::fixup_Hexagon_GD_GOT_HI16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+      return Hexagon::fixup_Hexagon_LD_GOT_HI16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+      return Hexagon::fixup_Hexagon_IE_HI16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+      return Hexagon::fixup_Hexagon_IE_GOT_HI16;
+    case llvm::MCSymbolRefExpr::VK_TPREL:
+      return Hexagon::fixup_Hexagon_TPREL_HI16;
+    case llvm::MCSymbolRefExpr::VK_DTPREL:
+      return Hexagon::fixup_Hexagon_DTPREL_HI16;
+    default:
+      return Hexagon::fixup_Hexagon_HI16;
+    }
+
+  case Hexagon::LO:
+  case Hexagon::A2_tfril:
+    switch (kind) {
+    case llvm::MCSymbolRefExpr::VK_GOT:
+      return Hexagon::fixup_Hexagon_GOT_LO16;
+    case llvm::MCSymbolRefExpr::VK_GOTOFF:
+      return Hexagon::fixup_Hexagon_GOTREL_LO16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+      return Hexagon::fixup_Hexagon_GD_GOT_LO16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+      return Hexagon::fixup_Hexagon_LD_GOT_LO16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+      return Hexagon::fixup_Hexagon_IE_LO16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+      return Hexagon::fixup_Hexagon_IE_GOT_LO16;
+    case llvm::MCSymbolRefExpr::VK_TPREL:
+      return Hexagon::fixup_Hexagon_TPREL_LO16;
+    case llvm::MCSymbolRefExpr::VK_DTPREL:
+      return Hexagon::fixup_Hexagon_DTPREL_LO16;
+    default:
+      return Hexagon::fixup_Hexagon_LO16;
+    }
+
+  // The only relocs left should be GP relative:
+  default:
+    if (MCID.mayStore() || MCID.mayLoad()) {
+      for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses;
+           ++ImpUses) {
+        if (*ImpUses == Hexagon::GP) {
+          switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
+          case HexagonII::MemAccessSize::ByteAccess:
+            return fixup_Hexagon_GPREL16_0;
+          case HexagonII::MemAccessSize::HalfWordAccess:
+            return fixup_Hexagon_GPREL16_1;
+          case HexagonII::MemAccessSize::WordAccess:
+            return fixup_Hexagon_GPREL16_2;
+          case HexagonII::MemAccessSize::DoubleWordAccess:
+            return fixup_Hexagon_GPREL16_3;
+          default:
+            llvm_unreachable("unhandled fixup");
+          }
+        }
+      }
+    } else
+      llvm_unreachable("unhandled fixup");
+  }
+
+  return LastTargetFixupKind;
+}
+
+unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
+                                              const MCOperand &MO,
+                                              const MCExpr *ME,
+                                              SmallVectorImpl<MCFixup> &Fixups,
+                                              const MCSubtargetInfo &STI) const
+
+{
+  int64_t Res;
+
+  if (ME->EvaluateAsAbsolute(Res))
+    return Res;
+
+  MCExpr::ExprKind MK = ME->getKind();
+  if (MK == MCExpr::Constant) {
+    return cast<MCConstantExpr>(ME)->getValue();
+  }
+  if (MK == MCExpr::Binary) {
+    unsigned Res;
+    Res = getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI);
+    Res +=
+        getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI);
+    return Res;
+  }
+
+  assert(MK == MCExpr::SymbolRef);
+
+  Hexagon::Fixups FixupKind =
+      Hexagon::Fixups(Hexagon::fixup_Hexagon_TPREL_LO16);
+  const MCSymbolRefExpr *MCSRE = static_cast<const MCSymbolRefExpr *>(ME);
+  const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI);
+  unsigned bits = HexagonMCInstrInfo::getExtentBits(MCII, MI) -
+                  HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+  const MCSymbolRefExpr::VariantKind kind = MCSRE->getKind();
+
+  DEBUG(dbgs() << "----------------------------------------\n");
+  DEBUG(dbgs() << "Opcode Name: " << HexagonMCInstrInfo::getName(MCII, MI)
+               << "\n");
+  DEBUG(dbgs() << "Opcode: " << MCID.getOpcode() << "\n");
+  DEBUG(dbgs() << "Relocation bits: " << bits << "\n");
+  DEBUG(dbgs() << "Addend: " << *Addend << "\n");
+  DEBUG(dbgs() << "----------------------------------------\n");
+
+  switch (bits) {
+  default:
+    DEBUG(dbgs() << "unrecognized bit count of " << bits << '\n');
+    break;
+
+  case 32:
+    switch (kind) {
+    case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL:
+      FixupKind = Hexagon::fixup_Hexagon_32_PCREL;
+      break;
+    case llvm::MCSymbolRefExpr::VK_GOT:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOT_32_6_X
+                            : Hexagon::fixup_Hexagon_GOT_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_GOTOFF:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOTREL_32_6_X
+                            : Hexagon::fixup_Hexagon_GOTREL_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_GOT_32_6_X
+                            : Hexagon::fixup_Hexagon_GD_GOT_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_GOT_32_6_X
+                            : Hexagon::fixup_Hexagon_LD_GOT_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_32_6_X
+                            : Hexagon::fixup_Hexagon_IE_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_GOT_32_6_X
+                            : Hexagon::fixup_Hexagon_IE_GOT_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_TPREL:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_TPREL_32_6_X
+                            : Hexagon::fixup_Hexagon_TPREL_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_DTPREL:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_DTPREL_32_6_X
+                            : Hexagon::fixup_Hexagon_DTPREL_32;
+      break;
+    default:
+      FixupKind =
+          *Extended ? Hexagon::fixup_Hexagon_32_6_X : Hexagon::fixup_Hexagon_32;
+      break;
+    }
+    break;
+
+  case 22:
+    switch (kind) {
+    case llvm::MCSymbolRefExpr::VK_Hexagon_GD_PLT:
+      FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL;
+      break;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_LD_PLT:
+      FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL;
+      break;
+    default:
+      if (MCID.isBranch() || MCID.isCall()) {
+        FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X
+                              : Hexagon::fixup_Hexagon_B22_PCREL;
+      } else {
+        errs() << "unrecognized relocation, bits: " << bits << "\n";
+        errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+      }
+      break;
+    }
+    break;
+
+  case 16:
+    if (*Extended) {
+      switch (kind) {
+      default:
+        FixupKind = Hexagon::fixup_Hexagon_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GOT_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOTOFF:
+        FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+        FixupKind = Hexagon::fixup_Hexagon_IE_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_TPREL:
+        FixupKind = Hexagon::fixup_Hexagon_TPREL_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_DTPREL:
+        FixupKind = Hexagon::fixup_Hexagon_DTPREL_16_X;
+        break;
+      }
+    } else
+      switch (kind) {
+      default:
+        errs() << "unrecognized relocation, bits " << bits << "\n";
+        errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOTOFF:
+        if ((MCID.getOpcode() == Hexagon::HI) ||
+            (MCID.getOpcode() == Hexagon::LO_H))
+          FixupKind = Hexagon::fixup_Hexagon_GOTREL_HI16;
+        else
+          FixupKind = Hexagon::fixup_Hexagon_GOTREL_LO16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_GPREL:
+        FixupKind = Hexagon::fixup_Hexagon_GPREL16_0;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_LO16:
+        FixupKind = Hexagon::fixup_Hexagon_LO16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_HI16:
+        FixupKind = Hexagon::fixup_Hexagon_HI16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_TPREL:
+        FixupKind = Hexagon::fixup_Hexagon_TPREL_16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_DTPREL:
+        FixupKind = Hexagon::fixup_Hexagon_DTPREL_16;
+        break;
+      }
+    break;
+
+  case 15:
+    if (MCID.isBranch() || MCID.isCall())
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_B15_PCREL_X
+                            : Hexagon::fixup_Hexagon_B15_PCREL;
+    break;
+
+  case 13:
+    if (MCID.isBranch())
+      FixupKind = Hexagon::fixup_Hexagon_B13_PCREL;
+    else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 12:
+    if (*Extended)
+      switch (kind) {
+      default:
+        FixupKind = Hexagon::fixup_Hexagon_12_X;
+        break;
+      // There isn't a GOT_12_X, both 11_X and 16_X resolve to 6/26
+      case llvm::MCSymbolRefExpr::VK_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GOT_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOTOFF:
+        FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X;
+        break;
+      }
+    else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 11:
+    if (*Extended)
+      switch (kind) {
+      default:
+        FixupKind = Hexagon::fixup_Hexagon_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GOT_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOTOFF:
+        FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GD_GOT_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_LD_GOT_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_IE_GOT_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_TPREL:
+        FixupKind = Hexagon::fixup_Hexagon_TPREL_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_DTPREL:
+        FixupKind = Hexagon::fixup_Hexagon_DTPREL_11_X;
+        break;
+      }
+    else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 10:
+    if (*Extended)
+      FixupKind = Hexagon::fixup_Hexagon_10_X;
+    break;
+
+  case 9:
+    if (MCID.isBranch() ||
+        (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR))
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_B9_PCREL_X
+                            : Hexagon::fixup_Hexagon_B9_PCREL;
+    else if (*Extended)
+      FixupKind = Hexagon::fixup_Hexagon_9_X;
+    else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 8:
+    if (*Extended)
+      FixupKind = Hexagon::fixup_Hexagon_8_X;
+    else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 7:
+    if (MCID.isBranch() ||
+        (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR))
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_B7_PCREL_X
+                            : Hexagon::fixup_Hexagon_B7_PCREL;
+    else if (*Extended)
+      FixupKind = Hexagon::fixup_Hexagon_7_X;
+    else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 6:
+    if (*Extended) {
+      switch (kind) {
+      default:
+        FixupKind = Hexagon::fixup_Hexagon_6_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL:
+        FixupKind = Hexagon::fixup_Hexagon_6_PCREL_X;
+        break;
+      // This is part of an extender, GOT_11 is a
+      // Word32_U6 unsigned/truncated reloc.
+      case llvm::MCSymbolRefExpr::VK_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GOT_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOTOFF:
+        FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X;
+        break;
+      }
+    } else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 0:
+    FixupKind = getFixupNoBits(MCII, MI, MO, kind);
+    break;
+  }
+
+  MCFixup fixup =
+      MCFixup::create(*Addend, MO.getExpr(), MCFixupKind(FixupKind));
+  Fixups.push_back(fixup);
+  // All of the information is in the fixup.
+  return (0);
+}
+
 unsigned
 HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
                                         SmallVectorImpl<MCFixup> &Fixups,
@@ -71,18 +508,16 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
     return MCT.getRegisterInfo()->getEncodingValue(MO.getReg());
   if (MO.isImm())
     return static_cast<unsigned>(MO.getImm());
-  llvm_unreachable("Only Immediates and Registers implemented right now");
-}
 
-MCSubtargetInfo const &HexagonMCCodeEmitter::getSubtargetInfo() const {
-  return MST;
+  // MO must be an ME.
+  assert(MO.isExpr());
+  return getExprOpValue(MI, MO, MO.getExpr(), Fixups, STI);
 }
 
 MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII,
                                                 MCRegisterInfo const &MRI,
-                                                MCSubtargetInfo const &MST,
                                                 MCContext &MCT) {
-  return new HexagonMCCodeEmitter(MII, MST, MCT);
+  return new HexagonMCCodeEmitter(MII, MCT);
 }
 
 #include "HexagonGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index 96048ad..939380a 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -26,16 +26,22 @@
 namespace llvm {
 
 class HexagonMCCodeEmitter : public MCCodeEmitter {
-  MCSubtargetInfo const &MST;
   MCContext &MCT;
+  MCInstrInfo const &MCII;
+  std::unique_ptr<unsigned> Addend;
+  std::unique_ptr<bool> Extended;
+
+  // helper routine for getMachineOpValue()
+  unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO,
+                          const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups,
+                          const MCSubtargetInfo &STI) const;
 
 public:
-  HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCSubtargetInfo const &aMST,
-                       MCContext &aMCT);
+  HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT);
 
   MCSubtargetInfo const &getSubtargetInfo() const;
 
-  void EncodeInstruction(MCInst const &MI, raw_ostream &OS,
+  void encodeInstruction(MCInst const &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          MCSubtargetInfo const &STI) const override;
 
@@ -51,8 +57,8 @@ public:
                              MCSubtargetInfo const &STI) const;
 
 private:
-  HexagonMCCodeEmitter(HexagonMCCodeEmitter const &) LLVM_DELETED_FUNCTION;
-  void operator=(HexagonMCCodeEmitter const &) LLVM_DELETED_FUNCTION;
+  HexagonMCCodeEmitter(HexagonMCCodeEmitter const &) = delete;
+  void operator=(HexagonMCCodeEmitter const &) = delete;
 }; // class HexagonMCCodeEmitter
 
 } // namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
deleted file mode 100644
index d8b9a25..0000000
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
+++ /dev/null
@@ -1,223 +0,0 @@
-//===- HexagonMCInst.cpp - Hexagon sub-class of MCInst --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class extends MCInst to allow some Hexagon VLIW annotations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "HexagonInstrInfo.h"
-#include "MCTargetDesc/HexagonBaseInfo.h"
-#include "MCTargetDesc/HexagonMCInst.h"
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
-
-using namespace llvm;
-
-std::unique_ptr <MCInstrInfo const> HexagonMCInst::MCII;
-
-HexagonMCInst::HexagonMCInst() : MCInst() {}
-HexagonMCInst::HexagonMCInst(MCInstrDesc const &mcid) : MCInst() {}
-
-void HexagonMCInst::AppendImplicitOperands(MCInst &MCI) {
-  MCI.addOperand(MCOperand::CreateImm(0));
-  MCI.addOperand(MCOperand::CreateInst(nullptr));
-}
-
-std::bitset<16> HexagonMCInst::GetImplicitBits(MCInst const &MCI) {
-  SanityCheckImplicitOperands(MCI);
-  std::bitset<16> Bits(MCI.getOperand(MCI.getNumOperands() - 2).getImm());
-  return Bits;
-}
-
-void HexagonMCInst::SetImplicitBits(MCInst &MCI, std::bitset<16> Bits) {
-  SanityCheckImplicitOperands(MCI);
-  MCI.getOperand(MCI.getNumOperands() - 2).setImm(Bits.to_ulong());
-}
-
-void HexagonMCInst::setPacketBegin(bool f) {
-  std::bitset<16> Bits(GetImplicitBits(*this));
-  Bits.set(packetBeginIndex, f);
-  SetImplicitBits(*this, Bits);
-}
-
-bool HexagonMCInst::isPacketBegin() const {
-  std::bitset<16> Bits(GetImplicitBits(*this));
-  return Bits.test(packetBeginIndex);
-}
-
-void HexagonMCInst::setPacketEnd(bool f) {
-  std::bitset<16> Bits(GetImplicitBits(*this));
-  Bits.set(packetEndIndex, f);
-  SetImplicitBits(*this, Bits);
-}
-
-bool HexagonMCInst::isPacketEnd() const {
-  std::bitset<16> Bits(GetImplicitBits(*this));
-  return Bits.test(packetEndIndex);
-}
-
-void HexagonMCInst::resetPacket() {
-  setPacketBegin(false);
-  setPacketEnd(false);
-}
-
-// Return the slots used by the insn.
-unsigned HexagonMCInst::getUnits(const HexagonTargetMachine *TM) const {
-  const HexagonInstrInfo *QII = TM->getSubtargetImpl()->getInstrInfo();
-  const InstrItineraryData *II =
-      TM->getSubtargetImpl()->getInstrItineraryData();
-  const InstrStage *IS =
-      II->beginStage(QII->get(this->getOpcode()).getSchedClass());
-
-  return (IS->getUnits());
-}
-
-MCInstrDesc const& HexagonMCInst::getDesc() const { return (MCII->get(getOpcode())); }
-
-// Return the Hexagon ISA class for the insn.
-unsigned HexagonMCInst::getType() const {
-  const uint64_t F = getDesc().TSFlags;
-
-  return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
-}
-
-// Return whether the insn is an actual insn.
-bool HexagonMCInst::isCanon() const {
-  return (!getDesc().isPseudo() && !isPrefix() &&
-          getType() != HexagonII::TypeENDLOOP);
-}
-
-// Return whether the insn is a prefix.
-bool HexagonMCInst::isPrefix() const {
-  return (getType() == HexagonII::TypePREFIX);
-}
-
-// Return whether the insn is solo, i.e., cannot be in a packet.
-bool HexagonMCInst::isSolo() const {
-  const uint64_t F = getDesc().TSFlags;
-  return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
-}
-
-// Return whether the insn is a new-value consumer.
-bool HexagonMCInst::isNewValue() const {
-  const uint64_t F = getDesc().TSFlags;
-  return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
-}
-
-// Return whether the instruction is a legal new-value producer.
-bool HexagonMCInst::hasNewValue() const {
-  const uint64_t F = getDesc().TSFlags;
-  return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
-}
-
-// Return the operand that consumes or produces a new value.
-const MCOperand &HexagonMCInst::getNewValue() const {
-  const uint64_t F = getDesc().TSFlags;
-  const unsigned O =
-      (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask;
-  const MCOperand &MCO = getOperand(O);
-
-  assert((isNewValue() || hasNewValue()) && MCO.isReg());
-  return (MCO);
-}
-
-// Return whether the instruction needs to be constant extended.
-// 1) Always return true if the instruction has 'isExtended' flag set.
-//
-// isExtendable:
-// 2) For immediate extended operands, return true only if the value is
-//    out-of-range.
-// 3) For global address, always return true.
-
-bool HexagonMCInst::isConstExtended(void) const {
-  if (isExtended())
-    return true;
-
-  if (!isExtendable())
-    return false;
-
-  short ExtOpNum = getCExtOpNum();
-  int MinValue = getMinValue();
-  int MaxValue = getMaxValue();
-  const MCOperand &MO = getOperand(ExtOpNum);
-
-  // We could be using an instruction with an extendable immediate and shoehorn
-  // a global address into it. If it is a global address it will be constant
-  // extended. We do this for COMBINE.
-  // We currently only handle isGlobal() because it is the only kind of
-  // object we are going to end up with here for now.
-  // In the future we probably should add isSymbol(), etc.
-  if (MO.isExpr())
-    return true;
-
-  // If the extendable operand is not 'Immediate' type, the instruction should
-  // have 'isExtended' flag set.
-  assert(MO.isImm() && "Extendable operand must be Immediate type");
-
-  int ImmValue = MO.getImm();
-  return (ImmValue < MinValue || ImmValue > MaxValue);
-}
-
-// Return whether the instruction must be always extended.
-bool HexagonMCInst::isExtended(void) const {
-  const uint64_t F = getDesc().TSFlags;
-  return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
-}
-
-// Return true if the instruction may be extended based on the operand value.
-bool HexagonMCInst::isExtendable(void) const {
-  const uint64_t F = getDesc().TSFlags;
-  return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
-}
-
-// Return number of bits in the constant extended operand.
-unsigned HexagonMCInst::getBitCount(void) const {
-  const uint64_t F = getDesc().TSFlags;
-  return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
-}
-
-// Return constant extended operand number.
-unsigned short HexagonMCInst::getCExtOpNum(void) const {
-  const uint64_t F = getDesc().TSFlags;
-  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
-}
-
-// Return whether the operand can be constant extended.
-bool HexagonMCInst::isOperandExtended(const unsigned short OperandNum) const {
-  const uint64_t F = getDesc().TSFlags;
-  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) ==
-         OperandNum;
-}
-
-// Return the min value that a constant extendable operand can have
-// without being extended.
-int HexagonMCInst::getMinValue(void) const {
-  const uint64_t F = getDesc().TSFlags;
-  unsigned isSigned =
-      (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
-  unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
-
-  if (isSigned) // if value is signed
-    return -1U << (bits - 1);
-  else
-    return 0;
-}
-
-// Return the max value that a constant extendable operand can have
-// without being extended.
-int HexagonMCInst::getMaxValue(void) const {
-  const uint64_t F = getDesc().TSFlags;
-  unsigned isSigned =
-      (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
-  unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
-
-  if (isSigned) // if value is signed
-    return ~(-1U << (bits - 1));
-  else
-    return ~(-1U << bits);
-}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
deleted file mode 100644
index ce9a8db..0000000
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
+++ /dev/null
@@ -1,108 +0,0 @@
-//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class extends MCInst to allow some VLIW annotations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINST_H
-#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINST_H
-
-#include "HexagonTargetMachine.h"
-#include "llvm/MC/MCInst.h"
-#include <memory>
-
-extern "C" void LLVMInitializeHexagonTargetMC();
-namespace llvm {
-class MCOperand;
-
-class HexagonMCInst : public MCInst {
-  friend void ::LLVMInitializeHexagonTargetMC();
-  // Used to access TSFlags
-  static std::unique_ptr <MCInstrInfo const> MCII;
-
-public:
-  explicit HexagonMCInst();
-  HexagonMCInst(const MCInstrDesc &mcid);
-
-  static void AppendImplicitOperands(MCInst &MCI);
-  static std::bitset<16> GetImplicitBits(MCInst const &MCI);
-  static void SetImplicitBits(MCInst &MCI, std::bitset<16> Bits);
-  static void SanityCheckImplicitOperands(MCInst const &MCI) {
-    assert(MCI.getNumOperands() >= 2 && "At least the two implicit operands");
-    assert(MCI.getOperand(MCI.getNumOperands() - 1).isInst() &&
-           "Implicit bits and flags");
-    assert(MCI.getOperand(MCI.getNumOperands() - 2).isImm() &&
-           "Parent pointer");
-  }
-
-  void setPacketBegin(bool Y);
-  bool isPacketBegin() const;
-  static const size_t packetBeginIndex = 0;
-  void setPacketEnd(bool Y);
-  bool isPacketEnd() const;
-  static const size_t packetEndIndex = 1;
-  void resetPacket();
-
-  // Return the slots used by the insn.
-  unsigned getUnits(const HexagonTargetMachine *TM) const;
-
-  // Return the Hexagon ISA class for the insn.
-  unsigned getType() const;
-
-  MCInstrDesc const &getDesc() const;
-
-  // Return whether the insn is an actual insn.
-  bool isCanon() const;
-
-  // Return whether the insn is a prefix.
-  bool isPrefix() const;
-
-  // Return whether the insn is solo, i.e., cannot be in a packet.
-  bool isSolo() const;
-
-  // Return whether the instruction needs to be constant extended.
-  bool isConstExtended() const;
-
-  // Return constant extended operand number.
-  unsigned short getCExtOpNum(void) const;
-
-  // Return whether the insn is a new-value consumer.
-  bool isNewValue() const;
-
-  // Return whether the instruction is a legal new-value producer.
-  bool hasNewValue() const;
-
-  // Return the operand that consumes or produces a new value.
-  const MCOperand &getNewValue() const;
-
-  // Return number of bits in the constant extended operand.
-  unsigned getBitCount(void) const;
-
-private:
-  // Return whether the instruction must be always extended.
-  bool isExtended() const;
-
-  // Return true if the insn may be extended based on the operand value.
-  bool isExtendable() const;
-
-  // Return true if the operand can be constant extended.
-  bool isOperandExtended(const unsigned short OperandNum) const;
-
-  // Return the min value that a constant extendable operand can have
-  // without being extended.
-  int getMinValue() const;
-
-  // Return the max value that a constant extendable operand can have
-  // without being extended.
-  int getMaxValue() const;
-};
-}
-
-#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
new file mode 100644
index 0000000..93c7a0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -0,0 +1,248 @@
+//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInstrInfo to allow Hexagon specific MCInstr queries
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCInstrInfo.h"
+#include "HexagonBaseInfo.h"
+
+namespace llvm {
+void HexagonMCInstrInfo::AppendImplicitOperands(MCInst &MCI) {
+  MCI.addOperand(MCOperand::createImm(0));
+  MCI.addOperand(MCOperand::createInst(nullptr));
+}
+
+HexagonII::MemAccessSize
+HexagonMCInstrInfo::getAccessSize(MCInstrInfo const &MCII, MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+
+  return (HexagonII::MemAccessSize((F >> HexagonII::MemAccessSizePos) &
+                                   HexagonII::MemAccesSizeMask));
+}
+
+unsigned HexagonMCInstrInfo::getBitCount(MCInstrInfo const &MCII,
+                                         MCInst const &MCI) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
+}
+
+// Return constant extended operand number.
+unsigned short HexagonMCInstrInfo::getCExtOpNum(MCInstrInfo const &MCII,
+                                                MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII,
+                                               MCInst const &MCI) {
+  return (MCII.get(MCI.getOpcode()));
+}
+
+unsigned HexagonMCInstrInfo::getExtentAlignment(MCInstrInfo const &MCII,
+                                                MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::ExtentAlignPos) & HexagonII::ExtentAlignMask);
+}
+
+unsigned HexagonMCInstrInfo::getExtentBits(MCInstrInfo const &MCII,
+                                           MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
+}
+
+std::bitset<16> HexagonMCInstrInfo::GetImplicitBits(MCInst const &MCI) {
+  SanityCheckImplicitOperands(MCI);
+  std::bitset<16> Bits(MCI.getOperand(MCI.getNumOperands() - 2).getImm());
+  return Bits;
+}
+
+// Return the max value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII,
+                                    MCInst const &MCI) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  unsigned isSigned =
+      (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+  unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+
+  if (isSigned) // if value is signed
+    return ~(-1U << (bits - 1));
+  else
+    return ~(-1U << bits);
+}
+
+// Return the min value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII,
+                                    MCInst const &MCI) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  unsigned isSigned =
+      (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+  unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+
+  if (isSigned) // if value is signed
+    return -1U << (bits - 1);
+  else
+    return 0;
+}
+
+char const *HexagonMCInstrInfo::getName(MCInstrInfo const &MCII,
+                                        MCInst const &MCI) {
+  return MCII.getName(MCI.getOpcode());
+}
+
+// Return the operand that consumes or produces a new value.
+MCOperand const &HexagonMCInstrInfo::getNewValue(MCInstrInfo const &MCII,
+                                                 MCInst const &MCI) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  unsigned const O =
+      (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask;
+  MCOperand const &MCO = MCI.getOperand(O);
+
+  assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) ||
+          HexagonMCInstrInfo::hasNewValue(MCII, MCI)) &&
+         MCO.isReg());
+  return (MCO);
+}
+
+// Return the Hexagon ISA class for the insn.
+unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII,
+                                     MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+
+  return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+}
+
+// Return whether the instruction is a legal new-value producer.
+bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII,
+                                     MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
+}
+
+// Return whether the insn is an actual insn.
+bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
+  return (!HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() &&
+          !HexagonMCInstrInfo::isPrefix(MCII, MCI) &&
+          HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP);
+}
+
+// Return whether the instruction needs to be constant extended.
+// 1) Always return true if the instruction has 'isExtended' flag set.
+//
+// isExtendable:
+// 2) For immediate extended operands, return true only if the value is
+//    out-of-range.
+// 3) For global address, always return true.
+
+bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
+                                         MCInst const &MCI) {
+  if (HexagonMCInstrInfo::isExtended(MCII, MCI))
+    return true;
+
+  if (!HexagonMCInstrInfo::isExtendable(MCII, MCI))
+    return false;
+
+  short ExtOpNum = HexagonMCInstrInfo::getCExtOpNum(MCII, MCI);
+  int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI);
+  int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI);
+  MCOperand const &MO = MCI.getOperand(ExtOpNum);
+
+  // We could be using an instruction with an extendable immediate and shoehorn
+  // a global address into it. If it is a global address it will be constant
+  // extended. We do this for COMBINE.
+  // We currently only handle isGlobal() because it is the only kind of
+  // object we are going to end up with here for now.
+  // In the future we probably should add isSymbol(), etc.
+  if (MO.isExpr())
+    return true;
+
+  // If the extendable operand is not 'Immediate' type, the instruction should
+  // have 'isExtended' flag set.
+  assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+  int ImmValue = MO.getImm();
+  return (ImmValue < MinValue || ImmValue > MaxValue);
+}
+
+// Return true if the instruction may be extended based on the operand value.
+bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII,
+                                      MCInst const &MCI) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+}
+
+// Return whether the instruction must be always extended.
+bool HexagonMCInstrInfo::isExtended(MCInstrInfo const &MCII,
+                                    MCInst const &MCI) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+}
+
+// Return whether the insn is a new-value consumer.
+bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII,
+                                    MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
+// Return whether the operand can be constant extended.
+bool HexagonMCInstrInfo::isOperandExtended(MCInstrInfo const &MCII,
+                                           MCInst const &MCI,
+                                           unsigned short OperandNum) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) ==
+         OperandNum;
+}
+
+bool HexagonMCInstrInfo::isPacketBegin(MCInst const &MCI) {
+  std::bitset<16> Bits(GetImplicitBits(MCI));
+  return Bits.test(packetBeginIndex);
+}
+
+bool HexagonMCInstrInfo::isPacketEnd(MCInst const &MCI) {
+  std::bitset<16> Bits(GetImplicitBits(MCI));
+  return Bits.test(packetEndIndex);
+}
+
+// Return whether the insn is a prefix.
+bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) {
+  return (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypePREFIX);
+}
+
+// Return whether the insn is solo, i.e., cannot be in a packet.
+bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
+}
+
+void HexagonMCInstrInfo::resetPacket(MCInst &MCI) {
+  setPacketBegin(MCI, false);
+  setPacketEnd(MCI, false);
+}
+
+void HexagonMCInstrInfo::SetImplicitBits(MCInst &MCI, std::bitset<16> Bits) {
+  SanityCheckImplicitOperands(MCI);
+  MCI.getOperand(MCI.getNumOperands() - 2).setImm(Bits.to_ulong());
+}
+
+void HexagonMCInstrInfo::setPacketBegin(MCInst &MCI, bool f) {
+  std::bitset<16> Bits(GetImplicitBits(MCI));
+  Bits.set(packetBeginIndex, f);
+  SetImplicitBits(MCI, Bits);
+}
+
+void HexagonMCInstrInfo::setPacketEnd(MCInst &MCI, bool f) {
+  std::bitset<16> Bits(GetImplicitBits(MCI));
+  Bits.set(packetEndIndex, f);
+  SetImplicitBits(MCI, Bits);
+}
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
new file mode 100644
index 0000000..082c80d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -0,0 +1,122 @@
+//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility functions for Hexagon specific MCInst queries
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+
+#include <bitset>
+
+namespace llvm {
+class MCInstrDesc;
+class MCInstrInfo;
+class MCInst;
+class MCOperand;
+namespace HexagonII {
+enum class MemAccessSize;
+}
+namespace HexagonMCInstrInfo {
+void AppendImplicitOperands(MCInst &MCI);
+
+// Return memory access size
+HexagonII::MemAccessSize getAccessSize(MCInstrInfo const &MCII,
+                                       MCInst const &MCI);
+
+// Return number of bits in the constant extended operand.
+unsigned getBitCount(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return constant extended operand number.
+unsigned short getCExtOpNum(MCInstrInfo const &MCII, MCInst const &MCI);
+
+MCInstrDesc const &getDesc(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the implicit alignment of the extendable operand
+unsigned getExtentAlignment(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the number of logical bits of the extendable operand
+unsigned getExtentBits(MCInstrInfo const &MCII, MCInst const &MCI);
+
+std::bitset<16> GetImplicitBits(MCInst const &MCI);
+
+// Return the max value that a constant extendable operand can have
+// without being extended.
+int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the min value that a constant extendable operand can have
+// without being extended.
+int getMinValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return instruction name
+char const *getName(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the operand that consumes or produces a new value.
+MCOperand const &getNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the Hexagon ISA class for the insn.
+unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the instruction is a legal new-value producer.
+bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the insn is an actual insn.
+bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the instruction needs to be constant extended.
+bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return true if the insn may be extended based on the operand value.
+bool isExtendable(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the instruction must be always extended.
+bool isExtended(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the insn is a new-value consumer.
+bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return true if the operand can be constant extended.
+bool isOperandExtended(MCInstrInfo const &MCII, MCInst const &MCI,
+                       unsigned short OperandNum);
+
+bool isPacketBegin(MCInst const &MCI);
+
+bool isPacketEnd(MCInst const &MCI);
+
+// Return whether the insn is a prefix.
+bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the insn is solo, i.e., cannot be in a packet.
+bool isSolo(MCInstrInfo const &MCII, MCInst const &MCI);
+
+static const size_t packetBeginIndex = 0;
+static const size_t packetEndIndex = 1;
+
+void resetPacket(MCInst &MCI);
+
+inline void SanityCheckImplicitOperands(MCInst const &MCI) {
+  assert(MCI.getNumOperands() >= 2 && "At least the two implicit operands");
+  assert(MCI.getOperand(MCI.getNumOperands() - 1).isInst() &&
+         "Implicit bits and flags");
+  assert(MCI.getOperand(MCI.getNumOperands() - 2).isImm() && "Parent pointer");
+}
+
+void SetImplicitBits(MCInst &MCI, std::bitset<16> Bits);
+
+void setPacketBegin(MCInst &MCI, bool Y);
+
+void setPacketEnd(MCInst &MCI, bool Y);
+}
+}
+
+#endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index ae5a22b..59395e2 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -14,7 +14,6 @@
 #include "HexagonMCTargetDesc.h"
 #include "HexagonMCAsmInfo.h"
 #include "MCTargetDesc/HexagonInstPrinter.h"
-#include "MCTargetDesc/HexagonMCInst.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -36,7 +35,7 @@ using namespace llvm;
 #define GET_REGINFO_MC_DESC
 #include "HexagonGenRegisterInfo.inc"
 
-static MCInstrInfo *createHexagonMCInstrInfo() {
+MCInstrInfo *llvm::createHexagonMCInstrInfo() {
   MCInstrInfo *X = new MCInstrInfo();
   InitHexagonMCInstrInfo(X);
   return X;
@@ -48,15 +47,6 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
   return X;
 }
 
-static MCStreamer *
-createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                         raw_ostream &OS, MCCodeEmitter *CE,
-                         bool RelaxAll) {
-  MCELFStreamer *ES = new MCELFStreamer(Context, MAB, OS, CE);
-  return ES;
-}
-
-
 static MCSubtargetInfo *
 createHexagonMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
   MCSubtargetInfo *X = new MCSubtargetInfo();
@@ -76,32 +66,25 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
   return MAI;
 }
 
-static MCStreamer *createMCStreamer(Target const &T, StringRef TT,
-                                    MCContext &Context, MCAsmBackend &MAB,
-                                    raw_ostream &OS, MCCodeEmitter *Emitter,
-                                    MCSubtargetInfo const &STI, bool RelaxAll) {
-  MCStreamer *ES = createHexagonELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
-  new MCTargetStreamer(*ES);
-  return ES;
-}
-
-
 static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
                                                  CodeModel::Model CM,
                                                  CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
   // For the time being, use static relocations, since there's really no
   // support for PIC yet.
-  X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
+  X->initMCCodeGenInfo(Reloc::Static, CM, OL);
   return X;
 }
-static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
+
+static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T,
                                                  unsigned SyntaxVariant,
                                                  const MCAsmInfo &MAI,
                                                  const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI) {
-    return new HexagonInstPrinter(MAI, MII, MRI);
+                                                 const MCRegisterInfo &MRI) {
+  if (SyntaxVariant == 0)
+    return(new HexagonInstPrinter(MAI, MII, MRI));
+  else
+   return nullptr;
 }
 
 // Force static initialization.
@@ -116,7 +99,6 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
   // Register the MC instruction info.
   TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget,
                                       createHexagonMCInstrInfo);
-  HexagonMCInst::MCII.reset (createHexagonMCInstrInfo());
 
   // Register the MC register info.
   TargetRegistry::RegisterMCRegInfo(TheHexagonTarget,
@@ -137,7 +119,4 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
   // Register the asm backend
   TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget,
                                        createHexagonAsmBackend);
-
-  // Register the obj streamer
-  TargetRegistry::RegisterMCObjectStreamer(TheHexagonTarget, createMCStreamer);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 02fd516..de63fd2 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -27,20 +27,22 @@ class MCSubtargetInfo;
 class Target;
 class StringRef;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheHexagonTarget;
 
+MCInstrInfo *createHexagonMCInstrInfo();
+
 MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII,
                                           MCRegisterInfo const &MRI,
-                                          MCSubtargetInfo const &MST,
                                           MCContext &MCT);
 
 MCAsmBackend *createHexagonAsmBackend(Target const &T,
                                       MCRegisterInfo const &MRI, StringRef TT,
                                       StringRef CPU);
 
-MCObjectWriter *createHexagonELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
-                                             StringRef CPU);
+MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
+                                             uint8_t OSABI, StringRef CPU);
 
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index acf1214..6c43d978 100644
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
 #include "MSP430GenAsmWriter.inc"
 
 void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                  StringRef Annot) {
+                                  StringRef Annot, const MCSubtargetInfo &STI) {
   printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 7fae505..70141a9 100644
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -25,7 +25,8 @@ namespace llvm {
                       const MCRegisterInfo &MRI)
       : MCInstPrinter(MAI, MII, MRI) {}
 
-    void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+    void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                   const MCSubtargetInfo &STI) override;
 
     // Autogenerated by tblgen.
     void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 4c70803..6bcfb32 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -54,16 +54,15 @@ static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM,
                                                 CodeModel::Model CM,
                                                 CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
-  X->InitMCCodeGenInfo(RM, CM, OL);
+  X->initMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
-static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI,
                                                 const MCInstrInfo &MII,
-                                                const MCRegisterInfo &MRI,
-                                                const MCSubtargetInfo &STI) {
+                                                const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
     return new MSP430InstPrinter(MAI, MII, MRI);
   return nullptr;
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index 586f5d9..241f1d6 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H
 #define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H
 
+#include "llvm/Support/DataTypes.h"
+
 namespace llvm {
 class Target;
 
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 22a973e..a99c9a3 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -39,8 +39,8 @@ using namespace llvm;
 namespace {
   class MSP430AsmPrinter : public AsmPrinter {
   public:
-    MSP430AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer) {}
+    MSP430AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+        : AsmPrinter(TM, std::move(Streamer)) {}
 
     const char *getPassName() const override {
       return "MSP430 Assembly Printer";
@@ -152,7 +152,7 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
   MCInst TmpInst;
   MCInstLowering.Lower(MI, TmpInst);
-  EmitToStreamer(OutStreamer, TmpInst);
+  EmitToStreamer(*OutStreamer, TmpInst);
 }
 
 // Force static initialization.
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
index d6cb9f6..eb72080 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -39,8 +39,9 @@ bool MSP430FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const
   return !MF.getFrameInfo()->hasVarSizedObjects();
 }
 
-void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+void MSP430FrameLowering::emitPrologue(MachineFunction &MF,
+                                       MachineBasicBlock &MBB) const {
+  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
   const MSP430InstrInfo &TII =
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
index 1941af2..48c4dc86 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
@@ -27,7 +27,7 @@ public:
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
-  void emitPrologue(MachineFunction &MF) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 81c176b..5ce5013 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -92,14 +92,9 @@ namespace {
 ///
 namespace {
   class MSP430DAGToDAGISel : public SelectionDAGISel {
-    const MSP430TargetLowering &Lowering;
-    const MSP430Subtarget &Subtarget;
-
   public:
     MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
-        : SelectionDAGISel(TM, OptLevel),
-          Lowering(*TM.getSubtargetImpl()->getTargetLowering()),
-          Subtarget(*TM.getSubtargetImpl()) {}
+        : SelectionDAGISel(TM, OptLevel) {}
 
     const char *getPassName() const override {
       return "MSP430 DAG->DAG Pattern Instruction Selection";
@@ -109,7 +104,7 @@ namespace {
     bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
     bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
 
-    bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+    bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                                       std::vector<SDValue> &OutOps) override;
 
     // Include the pieces autogenerated from the target description.
@@ -279,18 +274,18 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
     Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, 0,
                                          0/*AM.SymbolFlags*/);
   else
-    Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16);
+    Disp = CurDAG->getTargetConstant(AM.Disp, SDLoc(N), MVT::i16);
 
   return true;
 }
 
 bool MSP430DAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                              std::vector<SDValue> &OutOps) {
   SDValue Op0, Op1;
-  switch (ConstraintCode) {
+  switch (ConstraintID) {
   default: return true;
-  case 'm':   // memory
+  case InlineAsm::Constraint_m: // memory
     if (!SelectAddr(Op, Op0, Op1))
       return true;
     break;
@@ -406,10 +401,10 @@ SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
     SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16);
     if (Node->hasOneUse())
-      return CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16,
-                                  TFI, CurDAG->getTargetConstant(0, MVT::i16));
-    return CurDAG->getMachineNode(MSP430::ADD16ri, dl, MVT::i16,
-                                  TFI, CurDAG->getTargetConstant(0, MVT::i16));
+      return CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16, TFI,
+                                  CurDAG->getTargetConstant(0, dl, MVT::i16));
+    return CurDAG->getMachineNode(MSP430::ADD16ri, dl, MVT::i16, TFI,
+                                  CurDAG->getTargetConstant(0, dl, MVT::i16));
   }
   case ISD::LOAD:
     if (SDNode *ResNode = SelectIndexedLoad(Node))
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index 04bb6d0..bc51741 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -57,7 +57,8 @@ HWMultMode("msp430-hwmult-mode", cl::Hidden,
                 "Assume hardware multiplier cannot be used inside interrupts"),
              clEnumValEnd));
 
-MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM)
+MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
+                                           const MSP430Subtarget &STI)
     : TargetLowering(TM) {
 
   // Set up the register classes.
@@ -65,7 +66,7 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM)
   addRegisterClass(MVT::i16, &MSP430::GR16RegClass);
 
   // Compute derived properties from the register classes
-  computeRegisterProperties();
+  computeRegisterProperties(STI.getRegisterInfo());
 
   // Provide all sorts of operation actions
 
@@ -224,10 +225,10 @@ MSP430TargetLowering::getConstraintType(const std::string &Constraint) const {
   return TargetLowering::getConstraintType(Constraint);
 }
 
-std::pair<unsigned, const TargetRegisterClass*>
-MSP430TargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint,
-                             MVT VT) const {
+std::pair<unsigned, const TargetRegisterClass *>
+MSP430TargetLowering::getRegForInlineAsmConstraint(
+    const TargetRegisterInfo *TRI, const std::string &Constraint,
+    MVT VT) const {
   if (Constraint.size() == 1) {
     // GCC Constraint Letters
     switch (Constraint[0]) {
@@ -240,7 +241,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
     }
   }
 
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
 
 //===----------------------------------------------------------------------===//
@@ -328,7 +329,7 @@ static void AnalyzeArguments(CCState &State,
     if (!UseStack && Parts <= RegsLeft) {
       unsigned FirstVal = ValNo;
       for (unsigned j = 0; j < Parts; j++) {
-        unsigned Reg = State.AllocateReg(RegList, NbRegs);
+        unsigned Reg = State.AllocateReg(RegList);
         State.addLoc(CCValAssign::getReg(ValNo++, ArgVT, Reg, LocVT, LocInfo));
         RegsLeft--;
       }
@@ -592,7 +593,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
 
-  Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, dl,
                                                       getPointerTy(), true),
                                dl);
 
@@ -633,17 +634,19 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
 
       SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    StackPtr,
-                                   DAG.getIntPtrConstant(VA.getLocMemOffset()));
+                                   DAG.getIntPtrConstant(VA.getLocMemOffset(),
+                                                         dl));
 
       SDValue MemOp;
       ISD::ArgFlagsTy Flags = Outs[i].Flags;
 
       if (Flags.isByVal()) {
-        SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i16);
+        SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i16);
         MemOp = DAG.getMemcpy(Chain, dl, PtrOff, Arg, SizeNode,
                               Flags.getByValAlign(),
                               /*isVolatile*/false,
                               /*AlwaysInline=*/true,
+                              /*isTailCall=*/false,
                               MachinePointerInfo(),
                               MachinePointerInfo());
       } else {
@@ -698,8 +701,9 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
 
   // Create the CALLSEQ_END node.
   Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getConstant(NumBytes, getPointerTy(), true),
-                             DAG.getConstant(0, getPointerTy(), true),
+                             DAG.getConstant(NumBytes, dl, getPointerTy(),
+                                             true),
+                             DAG.getConstant(0, dl, getPointerTy(), true),
                              InFlag, dl);
   InFlag = Chain.getValue(1);
 
@@ -841,7 +845,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
     // fold constant into instruction.
     if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
       LHS = RHS;
-      RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+      RHS = DAG.getConstant(C->getSExtValue() + 1, dl, C->getValueType(0));
       TCC = MSP430CC::COND_LO;
       break;
     }
@@ -854,7 +858,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
     // fold constant into instruction.
     if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
       LHS = RHS;
-      RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+      RHS = DAG.getConstant(C->getSExtValue() + 1, dl, C->getValueType(0));
       TCC = MSP430CC::COND_HS;
       break;
     }
@@ -867,7 +871,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
     // fold constant into instruction.
     if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
       LHS = RHS;
-      RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+      RHS = DAG.getConstant(C->getSExtValue() + 1, dl, C->getValueType(0));
       TCC = MSP430CC::COND_L;
       break;
     }
@@ -880,7 +884,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
     // fold constant into instruction.
     if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
       LHS = RHS;
-      RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+      RHS = DAG.getConstant(C->getSExtValue() + 1, dl, C->getValueType(0));
       TCC = MSP430CC::COND_GE;
       break;
     }
@@ -888,7 +892,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
     break;
   }
 
-  TargetCC = DAG.getConstant(TCC, MVT::i8);
+  TargetCC = DAG.getConstant(TCC, dl, MVT::i8);
   return DAG.getNode(MSP430ISD::CMP, dl, MVT::Glue, LHS, RHS);
 }
 
@@ -965,7 +969,7 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
      break;
   }
   EVT VT = Op.getValueType();
-  SDValue One  = DAG.getConstant(1, VT);
+  SDValue One  = DAG.getConstant(1, dl, VT);
   if (Convert) {
     SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SR,
                                     MVT::i16, Flag);
@@ -977,13 +981,9 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
       SR = DAG.getNode(ISD::XOR, dl, MVT::i16, SR, One);
     return SR;
   } else {
-    SDValue Zero = DAG.getConstant(0, VT);
+    SDValue Zero = DAG.getConstant(0, dl, VT);
     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
-    SmallVector<SDValue, 4> Ops;
-    Ops.push_back(One);
-    Ops.push_back(Zero);
-    Ops.push_back(TargetCC);
-    Ops.push_back(Flag);
+    SDValue Ops[] = {One, Zero, TargetCC, Flag};
     return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, Ops);
   }
 }
@@ -1001,11 +1001,7 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op,
   SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
 
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
-  SmallVector<SDValue, 4> Ops;
-  Ops.push_back(TrueV);
-  Ops.push_back(FalseV);
-  Ops.push_back(TargetCC);
-  Ops.push_back(Flag);
+  SDValue Ops[] = {TrueV, FalseV, TargetCC, Flag};
 
   return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, Ops);
 }
@@ -1054,7 +1050,7 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
     SDValue Offset =
-        DAG.getConstant(getDataLayout()->getPointerSize(), MVT::i16);
+        DAG.getConstant(getDataLayout()->getPointerSize(), dl, MVT::i16);
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
@@ -1135,7 +1131,7 @@ bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
       return false;
 
     Base = Op->getOperand(0);
-    Offset = DAG.getConstant(RHSC, VT);
+    Offset = DAG.getConstant(RHSC, SDLoc(N), VT);
     AM = ISD::POST_INC;
     return true;
   }
@@ -1145,8 +1141,8 @@ bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
 
 
 const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default: return nullptr;
+  switch ((MSP430ISD::NodeType)Opcode) {
+  case MSP430ISD::FIRST_NUMBER:       break;
   case MSP430ISD::RET_FLAG:           return "MSP430ISD::RET_FLAG";
   case MSP430ISD::RETI_FLAG:          return "MSP430ISD::RETI_FLAG";
   case MSP430ISD::RRA:                return "MSP430ISD::RRA";
@@ -1156,10 +1152,13 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MSP430ISD::Wrapper:            return "MSP430ISD::Wrapper";
   case MSP430ISD::BR_CC:              return "MSP430ISD::BR_CC";
   case MSP430ISD::CMP:                return "MSP430ISD::CMP";
+  case MSP430ISD::SETCC:              return "MSP430ISD::SETCC";
   case MSP430ISD::SELECT_CC:          return "MSP430ISD::SELECT_CC";
   case MSP430ISD::SHL:                return "MSP430ISD::SHL";
   case MSP430ISD::SRA:                return "MSP430ISD::SRA";
+  case MSP430ISD::SRL:                return "MSP430ISD::SRL";
   }
+  return nullptr;
 }
 
 bool MSP430TargetLowering::isTruncateFree(Type *Ty1,
@@ -1201,8 +1200,7 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
   MachineFunction *F = BB->getParent();
   MachineRegisterInfo &RI = F->getRegInfo();
   DebugLoc dl = MI->getDebugLoc();
-  const TargetInstrInfo &TII =
-      *getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo &TII = *F->getSubtarget().getInstrInfo();
 
   unsigned Opc;
   const TargetRegisterClass * RC;
@@ -1313,8 +1311,7 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
       Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
     return EmitShiftInstr(MI, BB);
 
-  const TargetInstrInfo &TII =
-      *getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
 
   assert((Opc == MSP430::Select16 || Opc == MSP430::Select8) &&
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
index 073ddc9..80d3ae1 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -21,7 +21,7 @@
 
 namespace llvm {
   namespace MSP430ISD {
-    enum {
+    enum NodeType : unsigned {
       FIRST_NUMBER = ISD::BUILTIN_OP_END,
 
       /// Return with a flag operand. Operand 0 is the chain operand.
@@ -66,9 +66,11 @@ namespace llvm {
     };
   }
 
+  class MSP430Subtarget;
   class MSP430TargetLowering : public TargetLowering {
   public:
-    explicit MSP430TargetLowering(const TargetMachine &TM);
+    explicit MSP430TargetLowering(const TargetMachine &TM,
+                                  const MSP430Subtarget &STI);
 
     MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; }
 
@@ -95,8 +97,9 @@ namespace llvm {
 
     TargetLowering::ConstraintType
     getConstraintType(const std::string &Constraint) const override;
-    std::pair<unsigned, const TargetRegisterClass*>
-    getRegForInlineAsmConstraint(const std::string &Constraint,
+    std::pair<unsigned, const TargetRegisterClass *>
+    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                 const std::string &Constraint,
                                  MVT VT) const override;
 
     /// isTruncateFree - Return true if it's free to truncate a value of type
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
index 77b91b7..b039778 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -26,7 +26,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 MCSymbol *MSP430MCInstLower::
@@ -51,7 +50,7 @@ GetExternalSymbolSymbol(const MachineOperand &MO) const {
 
 MCSymbol *MSP430MCInstLower::
 GetJumpTableSymbol(const MachineOperand &MO) const {
-  const DataLayout *DL = Printer.TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = Printer.TM.getDataLayout();
   SmallString<256> Name;
   raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
                             << Printer.getFunctionNumber() << '_'
@@ -63,12 +62,12 @@ GetJumpTableSymbol(const MachineOperand &MO) const {
   }
 
   // Create a symbol for the name.
-  return Ctx.GetOrCreateSymbol(Name.str());
+  return Ctx.getOrCreateSymbol(Name);
 }
 
 MCSymbol *MSP430MCInstLower::
 GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
-  const DataLayout *DL = Printer.TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = Printer.TM.getDataLayout();
   SmallString<256> Name;
   raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "CPI"
                             << Printer.getFunctionNumber() << '_'
@@ -80,7 +79,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
   }
 
   // Create a symbol for the name.
-  return Ctx.GetOrCreateSymbol(Name.str());
+  return Ctx.getOrCreateSymbol(Name);
 }
 
 MCSymbol *MSP430MCInstLower::
@@ -108,7 +107,7 @@ LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
     Expr = MCBinaryExpr::CreateAdd(Expr,
                                    MCConstantExpr::Create(MO.getOffset(), Ctx),
                                    Ctx);
-  return MCOperand::CreateExpr(Expr);
+  return MCOperand::createExpr(Expr);
 }
 
 void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
@@ -125,13 +124,13 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     case MachineOperand::MO_Register:
       // Ignore all implicit register operands.
       if (MO.isImplicit()) continue;
-      MCOp = MCOperand::CreateReg(MO.getReg());
+      MCOp = MCOperand::createReg(MO.getReg());
       break;
     case MachineOperand::MO_Immediate:
-      MCOp = MCOperand::CreateImm(MO.getImm());
+      MCOp = MCOperand::createImm(MO.getImm());
       break;
     case MachineOperand::MO_MachineBasicBlock:
-      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+      MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
                          MO.getMBB()->getSymbol(), Ctx));
       break;
     case MachineOperand::MO_GlobalAddress:
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
index 3f88a69..0cfa4a4 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -26,8 +26,7 @@ public:
   MSP430RegisterInfo();
 
   /// Code Generation virtual methods...
-  const MCPhysReg *
-  getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
   const TargetRegisterClass*
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
index cb83b92..3dda3bf 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -25,15 +25,14 @@ using namespace llvm;
 
 void MSP430Subtarget::anchor() { }
 
-MSP430Subtarget &MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
+MSP430Subtarget &
+MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
   ParseSubtargetFeatures("generic", FS);
   return *this;
 }
 
 MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &CPU,
                                  const std::string &FS, const TargetMachine &TM)
-    : MSP430GenSubtargetInfo(TT, CPU, FS),
-      // FIXME: Check DataLayout string.
-      DL("e-m:e-p:16:16-i32:16:32-a:16-n8:16"), FrameLowering(),
-      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
-      TSInfo(DL) {}
+    : MSP430GenSubtargetInfo(TT, CPU, FS), FrameLowering(),
+      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+      TSInfo(*TM.getDataLayout()) {}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
index 58eb07b..30d46d3 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h
@@ -32,7 +32,6 @@ class StringRef;
 class MSP430Subtarget : public MSP430GenSubtargetInfo {
   virtual void anchor();
   bool ExtendedInsts;
-  const DataLayout DL; // Calculates type size & alignment
   MSP430FrameLowering FrameLowering;
   MSP430InstrInfo InstrInfo;
   MSP430TargetLowering TLInfo;
@@ -55,7 +54,6 @@ public:
     return &FrameLowering;
   }
   const MSP430InstrInfo *getInstrInfo() const override { return &InstrInfo; }
-  const DataLayout *getDataLayout() const override { return &DL; }
   const TargetRegisterInfo *getRegisterInfo() const override {
     return &InstrInfo.getRegisterInfo();
   }
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index 66e7503..d6cc4ae 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -15,8 +15,8 @@
 #include "MSP430.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
@@ -30,8 +30,10 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT,
                                          const TargetOptions &Options,
                                          Reloc::Model RM, CodeModel::Model CM,
                                          CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    : LLVMTargetMachine(T, "e-m:e-p:16:16-i32:16:32-a:16-n8:16", TT, CPU, FS,
+                        Options, RM, CM, OL),
       TLOF(make_unique<TargetLoweringObjectFileELF>()),
+      // FIXME: Check DataLayout string.
       Subtarget(TT, CPU, FS, *this) {
   initAsmInfo();
 }
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
index 0e54ed6..6ccd30d 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
@@ -34,7 +34,7 @@ public:
                       CodeGenOpt::Level OL);
   ~MSP430TargetMachine() override;
 
-  const MSP430Subtarget *getSubtargetImpl() const override {
+  const MSP430Subtarget *getSubtargetImpl(const Function &F) const override {
     return &Subtarget;
   }
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 699dfae..aade12b 100644
--- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/MipsABIInfo.h"
 #include "MCTargetDesc/MipsMCExpr.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "MipsRegisterInfo.h"
@@ -28,6 +29,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <memory>
 
 using namespace llvm;
@@ -45,14 +47,20 @@ public:
     ATReg(1), Reorder(true), Macro(true), Features(Features_) {}
 
   MipsAssemblerOptions(const MipsAssemblerOptions *Opts) {
-    ATReg = Opts->getATRegNum();
+    ATReg = Opts->getATRegIndex();
     Reorder = Opts->isReorder();
     Macro = Opts->isMacro();
     Features = Opts->getFeatures();
   }
 
-  unsigned getATRegNum() const { return ATReg; }
-  bool setATReg(unsigned Reg);
+  unsigned getATRegIndex() const { return ATReg; }
+  bool setATRegIndex(unsigned Reg) {
+    if (Reg > 31)
+      return false;
+
+    ATReg = Reg;
+    return true;
+  }
 
   bool isReorder() const { return Reorder; }
   void setReorder() { Reorder = true; }
@@ -70,14 +78,7 @@ public:
   // The full table can be found in MipsGenSubtargetInfo.inc (MipsFeatureKV[]).
   // The reason we need this mask is explained in the selectArch function.
   // FIXME: Ideally we would like TableGen to generate this information.
-  static const uint64_t AllArchRelatedMask =
-      Mips::FeatureMips1 | Mips::FeatureMips2 | Mips::FeatureMips3 |
-      Mips::FeatureMips3_32 | Mips::FeatureMips3_32r2 | Mips::FeatureMips4 |
-      Mips::FeatureMips4_32 | Mips::FeatureMips4_32r2 | Mips::FeatureMips5 |
-      Mips::FeatureMips5_32r2 | Mips::FeatureMips32 | Mips::FeatureMips32r2 |
-      Mips::FeatureMips32r6 | Mips::FeatureMips64 | Mips::FeatureMips64r2 |
-      Mips::FeatureMips64r6 | Mips::FeatureCnMips | Mips::FeatureFP64Bit |
-      Mips::FeatureGP64Bit | Mips::FeatureNaN2008;
+  static const FeatureBitset AllArchRelatedMask;
 
 private:
   unsigned ATReg;
@@ -87,6 +88,17 @@ private:
 };
 }
 
+const FeatureBitset MipsAssemblerOptions::AllArchRelatedMask = {
+    Mips::FeatureMips1, Mips::FeatureMips2, Mips::FeatureMips3,
+    Mips::FeatureMips3_32, Mips::FeatureMips3_32r2, Mips::FeatureMips4,
+    Mips::FeatureMips4_32, Mips::FeatureMips4_32r2, Mips::FeatureMips5,
+    Mips::FeatureMips5_32r2, Mips::FeatureMips32, Mips::FeatureMips32r2,
+    Mips::FeatureMips32r3, Mips::FeatureMips32r5, Mips::FeatureMips32r6,
+    Mips::FeatureMips64, Mips::FeatureMips64r2, Mips::FeatureMips64r3,
+    Mips::FeatureMips64r5, Mips::FeatureMips64r6, Mips::FeatureCnMips,
+    Mips::FeatureFP64Bit, Mips::FeatureGP64Bit, Mips::FeatureNaN2008
+};
+
 namespace {
 class MipsAsmParser : public MCTargetAsmParser {
   MipsTargetStreamer &getTargetStreamer() {
@@ -95,6 +107,7 @@ class MipsAsmParser : public MCTargetAsmParser {
   }
 
   MCSubtargetInfo &STI;
+  MipsABIInfo ABI;
   SmallVector<std::unique_ptr<MipsAssemblerOptions>, 2> AssemblerOptions;
   MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a
                        // nullptr, which indicates that no function is currently
@@ -150,6 +163,9 @@ class MipsAsmParser : public MCTargetAsmParser {
   parseRegisterPair (OperandVector &Operands);
 
   MipsAsmParser::OperandMatchResultTy
+  parseMovePRegPair(OperandVector &Operands);
+
+  MipsAsmParser::OperandMatchResultTy
   parseRegisterList (OperandVector  &Operands);
 
   bool searchSymbolAlias(OperandVector &Operands);
@@ -163,21 +179,41 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool expandInstruction(MCInst &Inst, SMLoc IDLoc,
                          SmallVectorImpl<MCInst> &Instructions);
 
-  bool expandLoadImm(MCInst &Inst, SMLoc IDLoc,
+  bool expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
+                         SmallVectorImpl<MCInst> &Instructions);
+
+  bool loadImmediate(int64_t ImmValue, unsigned DstReg, unsigned SrcReg,
+                     bool Is32BitImm, SMLoc IDLoc,
                      SmallVectorImpl<MCInst> &Instructions);
 
-  bool expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
+  bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
+                     SmallVectorImpl<MCInst> &Instructions);
+
+  bool expandLoadAddressImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
                             SmallVectorImpl<MCInst> &Instructions);
 
-  bool expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
+  bool expandLoadAddressReg(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
                             SmallVectorImpl<MCInst> &Instructions);
+  bool expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc,
+                                  SmallVectorImpl<MCInst> &Instructions);
 
-  void expandLoadAddressSym(MCInst &Inst, SMLoc IDLoc,
+  void expandLoadAddressSym(const MCOperand &DstRegOp, const MCOperand &SymOp,
+                            bool Is32BitSym, SMLoc IDLoc,
                             SmallVectorImpl<MCInst> &Instructions);
 
   void expandMemInst(MCInst &Inst, SMLoc IDLoc,
                      SmallVectorImpl<MCInst> &Instructions, bool isLoad,
                      bool isImmOpnd);
+
+  bool expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
+                               SmallVectorImpl<MCInst> &Instructions);
+
+  void createNop(bool hasShortDelaySlot, SMLoc IDLoc,
+                 SmallVectorImpl<MCInst> &Instructions);
+
+  void createAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg,
+                  SmallVectorImpl<MCInst> &Instructions);
+
   bool reportParseError(Twine ErrorMsg);
   bool reportParseError(SMLoc Loc, Twine ErrorMsg);
 
@@ -195,6 +231,7 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool parseDirectiveNaN();
   bool parseDirectiveSet();
   bool parseDirectiveOption();
+  bool parseInsnDirective();
 
   bool parseSetAtDirective();
   bool parseSetNoAtDirective();
@@ -221,6 +258,8 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
                        StringRef Directive);
 
+  bool parseInternalDirectiveReallowModule();
+
   MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
 
   bool eatComma(StringRef ErrorStr);
@@ -245,7 +284,10 @@ class MipsAsmParser : public MCTargetAsmParser {
 
   unsigned getGPR(int RegNo);
 
-  int getATReg(SMLoc Loc);
+  /// Returns the internal register number for the current AT. Also checks if
+  /// the current AT is unavailable (set to $0) and gives an error if it is.
+  /// This should be used in pseudo-instruction expansions which need AT.
+  unsigned getATReg(SMLoc Loc);
 
   bool processInstruction(MCInst &Inst, SMLoc IDLoc,
                           SmallVectorImpl<MCInst> &Instructions);
@@ -278,7 +320,7 @@ class MipsAsmParser : public MCTargetAsmParser {
   // FeatureMipsGP64 | FeatureMips1)
   // Clearing Mips3 is equivalent to clear (FeatureMips3 | FeatureMips4).
   void selectArch(StringRef ArchFeature) {
-    uint64_t FeatureBits = STI.getFeatureBits();
+    FeatureBitset FeatureBits = STI.getFeatureBits();
     FeatureBits &= ~MipsAssemblerOptions::AllArchRelatedMask;
     STI.setFeatureBits(FeatureBits);
     setAvailableFeatures(
@@ -287,7 +329,7 @@ class MipsAsmParser : public MCTargetAsmParser {
   }
 
   void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
-    if (!(STI.getFeatureBits() & Feature)) {
+    if (!(STI.getFeatureBits()[Feature])) {
       setAvailableFeatures(
           ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
     }
@@ -295,7 +337,7 @@ class MipsAsmParser : public MCTargetAsmParser {
   }
 
   void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
-    if (STI.getFeatureBits() & Feature) {
+    if (STI.getFeatureBits()[Feature]) {
       setAvailableFeatures(
           ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
     }
@@ -313,9 +355,13 @@ public:
 
   MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
                 const MCInstrInfo &MII, const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(sti) {
+      : MCTargetAsmParser(), STI(sti),
+        ABI(MipsABIInfo::computeTargetABI(Triple(sti.getTargetTriple()),
+                                          sti.getCPU(), Options)) {
     MCAsmParserExtension::Initialize(parser);
 
+    parser.addAliasForDirective(".asciiz", ".asciz");
+
     // Initialize the set of available features.
     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
     
@@ -329,12 +375,6 @@ public:
 
     getTargetStreamer().updateABIInfo(*this);
 
-    // Assert exactly one ABI was chosen.
-    assert((((STI.getFeatureBits() & Mips::FeatureO32) != 0) +
-            ((STI.getFeatureBits() & Mips::FeatureEABI) != 0) +
-            ((STI.getFeatureBits() & Mips::FeatureN32) != 0) +
-            ((STI.getFeatureBits() & Mips::FeatureN64) != 0)) == 1);
-
     if (!isABI_O32() && !useOddSPReg() != 0)
       report_fatal_error("-mno-odd-spreg requires the O32 ABI");
 
@@ -344,55 +384,76 @@ public:
   /// True if all of $fcc0 - $fcc7 exist for the current ISA.
   bool hasEightFccRegisters() const { return hasMips4() || hasMips32(); }
 
-  bool isGP64bit() const { return STI.getFeatureBits() & Mips::FeatureGP64Bit; }
-  bool isFP64bit() const { return STI.getFeatureBits() & Mips::FeatureFP64Bit; }
-  bool isABI_N32() const { return STI.getFeatureBits() & Mips::FeatureN32; }
-  bool isABI_N64() const { return STI.getFeatureBits() & Mips::FeatureN64; }
-  bool isABI_O32() const { return STI.getFeatureBits() & Mips::FeatureO32; }
-  bool isABI_FPXX() const { return STI.getFeatureBits() & Mips::FeatureFPXX; }
+  bool isGP64bit() const { return STI.getFeatureBits()[Mips::FeatureGP64Bit]; }
+  bool isFP64bit() const { return STI.getFeatureBits()[Mips::FeatureFP64Bit]; }
+  const MipsABIInfo &getABI() const { return ABI; }
+  bool isABI_N32() const { return ABI.IsN32(); }
+  bool isABI_N64() const { return ABI.IsN64(); }
+  bool isABI_O32() const { return ABI.IsO32(); }
+  bool isABI_FPXX() const { return STI.getFeatureBits()[Mips::FeatureFPXX]; }
 
   bool useOddSPReg() const {
-    return !(STI.getFeatureBits() & Mips::FeatureNoOddSPReg);
+    return !(STI.getFeatureBits()[Mips::FeatureNoOddSPReg]);
   }
 
   bool inMicroMipsMode() const {
-    return STI.getFeatureBits() & Mips::FeatureMicroMips;
+    return STI.getFeatureBits()[Mips::FeatureMicroMips];
   }
-  bool hasMips1() const { return STI.getFeatureBits() & Mips::FeatureMips1; }
-  bool hasMips2() const { return STI.getFeatureBits() & Mips::FeatureMips2; }
-  bool hasMips3() const { return STI.getFeatureBits() & Mips::FeatureMips3; }
-  bool hasMips4() const { return STI.getFeatureBits() & Mips::FeatureMips4; }
-  bool hasMips5() const { return STI.getFeatureBits() & Mips::FeatureMips5; }
+  bool hasMips1() const { return STI.getFeatureBits()[Mips::FeatureMips1]; }
+  bool hasMips2() const { return STI.getFeatureBits()[Mips::FeatureMips2]; }
+  bool hasMips3() const { return STI.getFeatureBits()[Mips::FeatureMips3]; }
+  bool hasMips4() const { return STI.getFeatureBits()[Mips::FeatureMips4]; }
+  bool hasMips5() const { return STI.getFeatureBits()[Mips::FeatureMips5]; }
   bool hasMips32() const {
-    return (STI.getFeatureBits() & Mips::FeatureMips32);
+    return STI.getFeatureBits()[Mips::FeatureMips32];
   }
   bool hasMips64() const {
-    return (STI.getFeatureBits() & Mips::FeatureMips64);
+    return STI.getFeatureBits()[Mips::FeatureMips64];
   }
   bool hasMips32r2() const {
-    return (STI.getFeatureBits() & Mips::FeatureMips32r2);
+    return STI.getFeatureBits()[Mips::FeatureMips32r2];
   }
   bool hasMips64r2() const {
-    return (STI.getFeatureBits() & Mips::FeatureMips64r2);
+    return STI.getFeatureBits()[Mips::FeatureMips64r2];
+  }
+  bool hasMips32r3() const {
+    return (STI.getFeatureBits()[Mips::FeatureMips32r3]);
+  }
+  bool hasMips64r3() const {
+    return (STI.getFeatureBits()[Mips::FeatureMips64r3]);
+  }
+  bool hasMips32r5() const {
+    return (STI.getFeatureBits()[Mips::FeatureMips32r5]);
+  }
+  bool hasMips64r5() const {
+    return (STI.getFeatureBits()[Mips::FeatureMips64r5]);
   }
   bool hasMips32r6() const {
-    return (STI.getFeatureBits() & Mips::FeatureMips32r6);
+    return STI.getFeatureBits()[Mips::FeatureMips32r6];
   }
   bool hasMips64r6() const {
-    return (STI.getFeatureBits() & Mips::FeatureMips64r6);
+    return STI.getFeatureBits()[Mips::FeatureMips64r6];
+  }
+
+  bool hasDSP() const { return STI.getFeatureBits()[Mips::FeatureDSP]; }
+  bool hasDSPR2() const { return STI.getFeatureBits()[Mips::FeatureDSPR2]; }
+  bool hasMSA() const { return STI.getFeatureBits()[Mips::FeatureMSA]; }
+  bool hasCnMips() const {
+    return (STI.getFeatureBits()[Mips::FeatureCnMips]);
   }
-  bool hasDSP() const { return (STI.getFeatureBits() & Mips::FeatureDSP); }
-  bool hasDSPR2() const { return (STI.getFeatureBits() & Mips::FeatureDSPR2); }
-  bool hasMSA() const { return (STI.getFeatureBits() & Mips::FeatureMSA); }
 
   bool inMips16Mode() const {
-    return STI.getFeatureBits() & Mips::FeatureMips16;
+    return STI.getFeatureBits()[Mips::FeatureMips16];
+  }
+
+  bool useSoftFloat() const {
+    return STI.getFeatureBits()[Mips::FeatureSoftFloat];
   }
-  // TODO: see how can we get this info.
-  bool abiUsesSoftFloat() const { return false; }
 
-  /// Warn if RegNo is the current assembler temporary.
-  void warnIfAssemblerTemporary(int RegNo, SMLoc Loc);
+  /// Warn if RegIndex is the same as the current AT.
+  void warnIfRegIndexIsAT(unsigned RegIndex, SMLoc Loc);
+
+  void warnIfNoMacro(SMLoc Loc);
 };
 }
 
@@ -501,7 +562,7 @@ public:
   /// target.
   unsigned getGPR32Reg() const {
     assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!");
-    AsmParser.warnIfAssemblerTemporary(RegIdx.Index, StartLoc);
+    AsmParser.warnIfRegIndexIsAT(RegIdx.Index, StartLoc);
     unsigned ClassID = Mips::GPR32RegClassID;
     return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
   }
@@ -643,11 +704,11 @@ public:
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediate when possible.  Null MCExpr = 0.
     if (!Expr)
-      Inst.addOperand(MCOperand::CreateImm(0));
+      Inst.addOperand(MCOperand::createImm(0));
     else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
-      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+      Inst.addOperand(MCOperand::createImm(CE->getValue()));
     else
-      Inst.addOperand(MCOperand::CreateExpr(Expr));
+      Inst.addOperand(MCOperand::createExpr(Expr));
   }
 
   void addRegOperands(MCInst &Inst, unsigned N) const {
@@ -659,17 +720,22 @@ public:
   /// is not a k_RegisterIndex compatible with RegKind_GPR
   void addGPR32AsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getGPR32Reg()));
+    Inst.addOperand(MCOperand::createReg(getGPR32Reg()));
   }
 
   void addGPRMM16AsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getGPRMM16Reg()));
+    Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
   }
 
   void addGPRMM16AsmRegZeroOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getGPRMM16Reg()));
+    Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
+  }
+
+  void addGPRMM16AsmRegMovePOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
   }
 
   /// Render the operand to an MCInst as a GPR64
@@ -677,22 +743,22 @@ public:
   /// is not a k_RegisterIndex compatible with RegKind_GPR
   void addGPR64AsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getGPR64Reg()));
+    Inst.addOperand(MCOperand::createReg(getGPR64Reg()));
   }
 
   void addAFGR64AsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getAFGR64Reg()));
+    Inst.addOperand(MCOperand::createReg(getAFGR64Reg()));
   }
 
   void addFGR64AsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getFGR64Reg()));
+    Inst.addOperand(MCOperand::createReg(getFGR64Reg()));
   }
 
   void addFGR32AsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getFGR32Reg()));
+    Inst.addOperand(MCOperand::createReg(getFGR32Reg()));
     // FIXME: We ought to do this for -integrated-as without -via-file-asm too.
     if (!AsmParser.useOddSPReg() && RegIdx.Index & 1)
       AsmParser.Error(StartLoc, "-mno-odd-spreg prohibits the use of odd FPU "
@@ -701,57 +767,57 @@ public:
 
   void addFGRH32AsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getFGRH32Reg()));
+    Inst.addOperand(MCOperand::createReg(getFGRH32Reg()));
   }
 
   void addFCCAsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getFCCReg()));
+    Inst.addOperand(MCOperand::createReg(getFCCReg()));
   }
 
   void addMSA128AsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getMSA128Reg()));
+    Inst.addOperand(MCOperand::createReg(getMSA128Reg()));
   }
 
   void addMSACtrlAsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getMSACtrlReg()));
+    Inst.addOperand(MCOperand::createReg(getMSACtrlReg()));
   }
 
   void addCOP2AsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getCOP2Reg()));
+    Inst.addOperand(MCOperand::createReg(getCOP2Reg()));
   }
 
   void addCOP3AsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getCOP3Reg()));
+    Inst.addOperand(MCOperand::createReg(getCOP3Reg()));
   }
 
   void addACC64DSPAsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getACC64DSPReg()));
+    Inst.addOperand(MCOperand::createReg(getACC64DSPReg()));
   }
 
   void addHI32DSPAsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getHI32DSPReg()));
+    Inst.addOperand(MCOperand::createReg(getHI32DSPReg()));
   }
 
   void addLO32DSPAsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getLO32DSPReg()));
+    Inst.addOperand(MCOperand::createReg(getLO32DSPReg()));
   }
 
   void addCCRAsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getCCRReg()));
+    Inst.addOperand(MCOperand::createReg(getCCRReg()));
   }
 
   void addHWRegsAsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getHWRegsReg()));
+    Inst.addOperand(MCOperand::createReg(getHWRegsReg()));
   }
 
   void addImmOperands(MCInst &Inst, unsigned N) const {
@@ -763,7 +829,7 @@ public:
   void addMemOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
 
-    Inst.addOperand(MCOperand::CreateReg(getMemBase()->getGPR32Reg()));
+    Inst.addOperand(MCOperand::createReg(getMemBase()->getGPR32Reg()));
 
     const MCExpr *Expr = getMemOff();
     addExpr(Inst, Expr);
@@ -772,7 +838,7 @@ public:
   void addMicroMipsMemOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
 
-    Inst.addOperand(MCOperand::CreateReg(getMemBase()->getGPRMM16Reg()));
+    Inst.addOperand(MCOperand::createReg(getMemBase()->getGPRMM16Reg()));
 
     const MCExpr *Expr = getMemOff();
     addExpr(Inst, Expr);
@@ -782,14 +848,20 @@ public:
     assert(N == 1 && "Invalid number of operands!");
 
     for (auto RegNo : getRegList())
-      Inst.addOperand(MCOperand::CreateReg(RegNo));
+      Inst.addOperand(MCOperand::createReg(RegNo));
   }
 
   void addRegPairOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     unsigned RegNo = getRegPair();
-    Inst.addOperand(MCOperand::CreateReg(RegNo++));
-    Inst.addOperand(MCOperand::CreateReg(RegNo));
+    Inst.addOperand(MCOperand::createReg(RegNo++));
+    Inst.addOperand(MCOperand::createReg(RegNo));
+  }
+
+  void addMovePRegPairOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+    for (auto RegNo : getRegList())
+      Inst.addOperand(MCOperand::createReg(RegNo));
   }
 
   bool isReg() const override {
@@ -856,6 +928,25 @@ public:
     return 1 <= Val && Val <= 4;
   }
   bool isRegList() const { return Kind == k_RegList; }
+  bool isMovePRegPair() const {
+    if (Kind != k_RegList || RegList.List->size() != 2)
+      return false;
+
+    unsigned R0 = RegList.List->front();
+    unsigned R1 = RegList.List->back();
+
+    if ((R0 == Mips::A1 && R1 == Mips::A2) ||
+        (R0 == Mips::A1 && R1 == Mips::A3) ||
+        (R0 == Mips::A2 && R1 == Mips::A3) ||
+        (R0 == Mips::A0 && R1 == Mips::S5) ||
+        (R0 == Mips::A0 && R1 == Mips::S6) ||
+        (R0 == Mips::A0 && R1 == Mips::A1) ||
+        (R0 == Mips::A0 && R1 == Mips::A2) ||
+        (R0 == Mips::A0 && R1 == Mips::A3))
+      return true;
+
+    return false;
+  }
 
   StringRef getToken() const {
     assert(Kind == k_Token && "Invalid access!");
@@ -1009,9 +1100,7 @@ public:
     assert (Regs.size() > 0 && "Empty list not allowed");
 
     auto Op = make_unique<MipsOperand>(k_RegList, Parser);
-    Op->RegList.List = new SmallVector<unsigned, 10>();
-    for (auto Reg : Regs)
-      Op->RegList.List->push_back(Reg);
+    Op->RegList.List = new SmallVector<unsigned, 10>(Regs.begin(), Regs.end());
     Op->StartLoc = StartLoc;
     Op->EndLoc = EndLoc;
     return Op;
@@ -1042,6 +1131,12 @@ public:
             (RegIdx.Index >= 2 && RegIdx.Index <= 7) ||
             RegIdx.Index == 17);
   }
+  bool isMM16AsmRegMoveP() const {
+    if (!(isRegIdx() && RegIdx.Kind))
+      return false;
+    return (RegIdx.Index == 0 || (RegIdx.Index >= 2 && RegIdx.Index <= 3) ||
+      (RegIdx.Index >= 16 && RegIdx.Index <= 20));
+  }
   bool isFGRAsmReg() const {
     // AFGR64 is $0-$15 but we handle this in getAFGR64()
     return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31;
@@ -1101,14 +1196,14 @@ public:
     switch (Kind) {
     case k_Immediate:
       OS << "Imm<";
-      Imm.Val->print(OS);
+      OS << *Imm.Val;
       OS << ">";
       break;
     case k_Memory:
       OS << "Mem<";
       Mem.Base->print(OS);
       OS << ", ";
-      Mem.Off->print(OS);
+      OS << *Mem.Off;
       OS << ">";
       break;
     case k_PhysRegister:
@@ -1167,6 +1262,13 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
     switch (Opcode) {
     default:
       break;
+    case Mips::BBIT0:
+    case Mips::BBIT032:
+    case Mips::BBIT1:
+    case Mips::BBIT132:
+      assert(hasCnMips() && "instruction only valid for octeon cpus");
+      // Fall through
+
     case Mips::BEQ:
     case Mips::BNE:
     case Mips::BEQ_MM:
@@ -1229,23 +1331,72 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
                                                       "nop instruction");
   }
 
-  if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder()) {
-    // If this instruction has a delay slot and .set reorder is active,
-    // emit a NOP after it.
-    Instructions.push_back(Inst);
-    MCInst NopInst;
-    if (hasShortDelaySlot(Inst.getOpcode())) {
-      NopInst.setOpcode(Mips::MOVE16_MM);
-      NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
-      NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
-    } else {
-      NopInst.setOpcode(Mips::SLL);
-      NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
-      NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
-      NopInst.addOperand(MCOperand::CreateImm(0));
+  if (hasCnMips()) {
+    const unsigned Opcode = Inst.getOpcode();
+    MCOperand Opnd;
+    int Imm;
+
+    switch (Opcode) {
+      default:
+        break;
+
+      case Mips::BBIT0:
+      case Mips::BBIT032:
+      case Mips::BBIT1:
+      case Mips::BBIT132:
+        assert(MCID.getNumOperands() == 3 && "unexpected number of operands");
+        // The offset is handled above
+        Opnd = Inst.getOperand(1);
+        if (!Opnd.isImm())
+          return Error(IDLoc, "expected immediate operand kind");
+        Imm = Opnd.getImm();
+        if (Imm < 0 || Imm > (Opcode == Mips::BBIT0 ||
+                              Opcode == Mips::BBIT1 ? 63 : 31))
+          return Error(IDLoc, "immediate operand value out of range");
+        if (Imm > 31) {
+          Inst.setOpcode(Opcode == Mips::BBIT0 ? Mips::BBIT032
+                                               : Mips::BBIT132);
+          Inst.getOperand(1).setImm(Imm - 32);
+        }
+        break;
+
+      case Mips::CINS:
+      case Mips::CINS32:
+      case Mips::EXTS:
+      case Mips::EXTS32:
+        assert(MCID.getNumOperands() == 4 && "unexpected number of operands");
+        // Check length
+        Opnd = Inst.getOperand(3);
+        if (!Opnd.isImm())
+          return Error(IDLoc, "expected immediate operand kind");
+        Imm = Opnd.getImm();
+        if (Imm < 0 || Imm > 31)
+          return Error(IDLoc, "immediate operand value out of range");
+        // Check position
+        Opnd = Inst.getOperand(2);
+        if (!Opnd.isImm())
+          return Error(IDLoc, "expected immediate operand kind");
+        Imm = Opnd.getImm();
+        if (Imm < 0 || Imm > (Opcode == Mips::CINS ||
+                              Opcode == Mips::EXTS ? 63 : 31))
+          return Error(IDLoc, "immediate operand value out of range");
+        if (Imm > 31) {
+          Inst.setOpcode(Opcode == Mips::CINS ? Mips::CINS32 : Mips::EXTS32);
+          Inst.getOperand(2).setImm(Imm - 32);
+        }
+        break;
+
+      case Mips::SEQi:
+      case Mips::SNEi:
+        assert(MCID.getNumOperands() == 3 && "unexpected number of operands");
+        Opnd = Inst.getOperand(2);
+        if (!Opnd.isImm())
+          return Error(IDLoc, "expected immediate operand kind");
+        Imm = Opnd.getImm();
+        if (!isInt<10>(Imm))
+          return Error(IDLoc, "immediate operand value out of range");
+        break;
     }
-    Instructions.push_back(NopInst);
-    return false;
   }
 
   if (MCID.mayLoad() || MCID.mayStore()) {
@@ -1282,8 +1433,38 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
     } // for
   }   // if load/store
 
-  // TODO: Handle this with the AsmOperandClass.PredicateMethod.
   if (inMicroMipsMode()) {
+    if (MCID.mayLoad()) {
+      // Try to create 16-bit GP relative load instruction.
+      for (unsigned i = 0; i < MCID.getNumOperands(); i++) {
+        const MCOperandInfo &OpInfo = MCID.OpInfo[i];
+        if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
+            (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+          MCOperand &Op = Inst.getOperand(i);
+          if (Op.isImm()) {
+            int MemOffset = Op.getImm();
+            MCOperand &DstReg = Inst.getOperand(0);
+            MCOperand &BaseReg = Inst.getOperand(1);
+            if (isIntN(9, MemOffset) && (MemOffset % 4 == 0) &&
+                getContext().getRegisterInfo()->getRegClass(
+                  Mips::GPRMM16RegClassID).contains(DstReg.getReg()) &&
+                BaseReg.getReg() == Mips::GP) {
+              MCInst TmpInst;
+              TmpInst.setLoc(IDLoc);
+              TmpInst.setOpcode(Mips::LWGP_MM);
+              TmpInst.addOperand(MCOperand::createReg(DstReg.getReg()));
+              TmpInst.addOperand(MCOperand::createReg(Mips::GP));
+              TmpInst.addOperand(MCOperand::createImm(MemOffset));
+              Instructions.push_back(TmpInst);
+              return false;
+            }
+          }
+        }
+      } // for
+    }   // if load
+
+    // TODO: Handle this with the AsmOperandClass.PredicateMethod.
+
     MCOperand Opnd;
     int Imm;
 
@@ -1396,24 +1577,43 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
         if (!isUInt<5>(Imm))
           return Error(IDLoc, "immediate operand value out of range");
         break;
+      case Mips::ADDIUPC_MM:
+        MCOperand Opnd = Inst.getOperand(1);
+        if (!Opnd.isImm())
+          return Error(IDLoc, "expected immediate operand kind");
+        int Imm = Opnd.getImm();
+        if ((Imm % 4 != 0) || !isIntN(25, Imm))
+          return Error(IDLoc, "immediate operand value out of range");
+        break;
     }
   }
 
-  if (needsExpansion(Inst))
-    return expandInstruction(Inst, IDLoc, Instructions);
-  else
+  if (needsExpansion(Inst)) {
+    if (expandInstruction(Inst, IDLoc, Instructions))
+      return true;
+  } else
     Instructions.push_back(Inst);
 
+  // If this instruction has a delay slot and .set reorder is active,
+  // emit a NOP after it.
+  if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder())
+    createNop(hasShortDelaySlot(Inst.getOpcode()), IDLoc, Instructions);
+
   return false;
 }
 
 bool MipsAsmParser::needsExpansion(MCInst &Inst) {
 
   switch (Inst.getOpcode()) {
-  case Mips::LoadImm32Reg:
-  case Mips::LoadAddr32Imm:
-  case Mips::LoadAddr32Reg:
-  case Mips::LoadImm64Reg:
+  case Mips::LoadImm32:
+  case Mips::LoadImm64:
+  case Mips::LoadAddrImm32:
+  case Mips::LoadAddrReg32:
+  case Mips::B_MM_Pseudo:
+  case Mips::LWM_MM:
+  case Mips::SWM_MM:
+  case Mips::JalOneReg:
+  case Mips::JalTwoReg:
     return true;
   default:
     return false;
@@ -1424,256 +1624,326 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
                                       SmallVectorImpl<MCInst> &Instructions) {
   switch (Inst.getOpcode()) {
   default: llvm_unreachable("unimplemented expansion");
-  case Mips::LoadImm32Reg:
-    return expandLoadImm(Inst, IDLoc, Instructions);
-  case Mips::LoadImm64Reg:
-    if (!isGP64bit()) {
-      Error(IDLoc, "instruction requires a 64-bit architecture");
-      return true;
-    }
-    return expandLoadImm(Inst, IDLoc, Instructions);
-  case Mips::LoadAddr32Imm:
-    return expandLoadAddressImm(Inst, IDLoc, Instructions);
-  case Mips::LoadAddr32Reg:
-    return expandLoadAddressReg(Inst, IDLoc, Instructions);
+  case Mips::LoadImm32:
+    return expandLoadImm(Inst, true, IDLoc, Instructions);
+  case Mips::LoadImm64:
+    return expandLoadImm(Inst, false, IDLoc, Instructions);
+  case Mips::LoadAddrImm32:
+    return expandLoadAddressImm(Inst, true, IDLoc, Instructions);
+  case Mips::LoadAddrReg32:
+    return expandLoadAddressReg(Inst, true, IDLoc, Instructions);
+  case Mips::B_MM_Pseudo:
+    return expandUncondBranchMMPseudo(Inst, IDLoc, Instructions);
+  case Mips::SWM_MM:
+  case Mips::LWM_MM:
+    return expandLoadStoreMultiple(Inst, IDLoc, Instructions);
+  case Mips::JalOneReg:
+  case Mips::JalTwoReg:
+    return expandJalWithRegs(Inst, IDLoc, Instructions);
   }
 }
 
 namespace {
-template <bool PerformShift>
-void createShiftOr(MCOperand Operand, unsigned RegNo, SMLoc IDLoc,
-                   SmallVectorImpl<MCInst> &Instructions) {
+template <unsigned ShiftAmount>
+void createLShiftOri(MCOperand Operand, unsigned RegNo, SMLoc IDLoc,
+                     SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
-  if (PerformShift) {
+  if (ShiftAmount >= 32) {
+    tmpInst.setOpcode(Mips::DSLL32);
+    tmpInst.addOperand(MCOperand::createReg(RegNo));
+    tmpInst.addOperand(MCOperand::createReg(RegNo));
+    tmpInst.addOperand(MCOperand::createImm(ShiftAmount - 32));
+    tmpInst.setLoc(IDLoc);
+    Instructions.push_back(tmpInst);
+    tmpInst.clear();
+  } else if (ShiftAmount > 0) {
     tmpInst.setOpcode(Mips::DSLL);
-    tmpInst.addOperand(MCOperand::CreateReg(RegNo));
-    tmpInst.addOperand(MCOperand::CreateReg(RegNo));
-    tmpInst.addOperand(MCOperand::CreateImm(16));
+    tmpInst.addOperand(MCOperand::createReg(RegNo));
+    tmpInst.addOperand(MCOperand::createReg(RegNo));
+    tmpInst.addOperand(MCOperand::createImm(ShiftAmount));
     tmpInst.setLoc(IDLoc);
     Instructions.push_back(tmpInst);
     tmpInst.clear();
   }
+  // There's no need for an ORi if the immediate is 0.
+  if (Operand.isImm() && Operand.getImm() == 0)
+    return;
+
   tmpInst.setOpcode(Mips::ORi);
-  tmpInst.addOperand(MCOperand::CreateReg(RegNo));
-  tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+  tmpInst.addOperand(MCOperand::createReg(RegNo));
+  tmpInst.addOperand(MCOperand::createReg(RegNo));
   tmpInst.addOperand(Operand);
   tmpInst.setLoc(IDLoc);
   Instructions.push_back(tmpInst);
 }
 
-template <int Shift, bool PerformShift>
-void createShiftOr(int64_t Value, unsigned RegNo, SMLoc IDLoc,
-                   SmallVectorImpl<MCInst> &Instructions) {
-  createShiftOr<PerformShift>(
-      MCOperand::CreateImm(((Value & (0xffffLL << Shift)) >> Shift)), RegNo,
-      IDLoc, Instructions);
+template <unsigned ShiftAmount>
+void createLShiftOri(int64_t Value, unsigned RegNo, SMLoc IDLoc,
+                     SmallVectorImpl<MCInst> &Instructions) {
+  createLShiftOri<ShiftAmount>(MCOperand::createImm(Value), RegNo, IDLoc,
+                               Instructions);
 }
 }
 
-bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
+bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
+                                      SmallVectorImpl<MCInst> &Instructions) {
+  // Create a JALR instruction which is going to replace the pseudo-JAL.
+  MCInst JalrInst;
+  JalrInst.setLoc(IDLoc);
+  const MCOperand FirstRegOp = Inst.getOperand(0);
+  const unsigned Opcode = Inst.getOpcode();
+
+  if (Opcode == Mips::JalOneReg) {
+    // jal $rs => jalr $rs
+    if (inMicroMipsMode()) {
+      JalrInst.setOpcode(Mips::JALR16_MM);
+      JalrInst.addOperand(FirstRegOp);
+    } else {
+      JalrInst.setOpcode(Mips::JALR);
+      JalrInst.addOperand(MCOperand::createReg(Mips::RA));
+      JalrInst.addOperand(FirstRegOp);
+    }
+  } else if (Opcode == Mips::JalTwoReg) {
+    // jal $rd, $rs => jalr $rd, $rs
+    JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR);
+    JalrInst.addOperand(FirstRegOp);
+    const MCOperand SecondRegOp = Inst.getOperand(1);
+    JalrInst.addOperand(SecondRegOp);
+  }
+  Instructions.push_back(JalrInst);
+
+  // If .set reorder is active, emit a NOP after it.
+  if (AssemblerOptions.back()->isReorder()) {
+    // This is a 32-bit NOP because these 2 pseudo-instructions
+    // do not have a short delay slot.
+    MCInst NopInst;
+    NopInst.setOpcode(Mips::SLL);
+    NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
+    NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
+    NopInst.addOperand(MCOperand::createImm(0));
+    Instructions.push_back(NopInst);
+  }
+
+  return false;
+}
+
+bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
+                                  unsigned SrcReg, bool Is32BitImm, SMLoc IDLoc,
                                   SmallVectorImpl<MCInst> &Instructions) {
+  if (!Is32BitImm && !isGP64bit()) {
+    Error(IDLoc, "instruction requires a 64-bit architecture");
+    return true;
+  }
+
+  bool UseSrcReg = false;
+  if (SrcReg != Mips::NoRegister)
+    UseSrcReg = true;
+
   MCInst tmpInst;
-  const MCOperand &ImmOp = Inst.getOperand(1);
-  assert(ImmOp.isImm() && "expected immediate operand kind");
-  const MCOperand &RegOp = Inst.getOperand(0);
-  assert(RegOp.isReg() && "expected register operand kind");
 
-  int64_t ImmValue = ImmOp.getImm();
   tmpInst.setLoc(IDLoc);
   // FIXME: gas has a special case for values that are 000...1111, which
   // becomes a li -1 and then a dsrl
   if (0 <= ImmValue && ImmValue <= 65535) {
-    // For 0 <= j <= 65535.
+    // For unsigned and positive signed 16-bit values (0 <= j <= 65535):
     // li d,j => ori d,$zero,j
+    if (!UseSrcReg)
+      SrcReg = isGP64bit() ? Mips::ZERO_64 : Mips::ZERO;
     tmpInst.setOpcode(Mips::ORi);
-    tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
-    tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
+    tmpInst.addOperand(MCOperand::createReg(DstReg));
+    tmpInst.addOperand(MCOperand::createReg(SrcReg));
+    tmpInst.addOperand(MCOperand::createImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else if (ImmValue < 0 && ImmValue >= -32768) {
-    // For -32768 <= j < 0.
+    // For negative signed 16-bit values (-32768 <= j < 0):
     // li d,j => addiu d,$zero,j
+    if (!UseSrcReg)
+      SrcReg = Mips::ZERO;
     tmpInst.setOpcode(Mips::ADDiu);
-    tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
-    tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
+    tmpInst.addOperand(MCOperand::createReg(DstReg));
+    tmpInst.addOperand(MCOperand::createReg(SrcReg));
+    tmpInst.addOperand(MCOperand::createImm(ImmValue));
     Instructions.push_back(tmpInst);
-  } else if ((ImmValue & 0xffffffff) == ImmValue) {
-    // For any value of j that is representable as a 32-bit integer, create
-    // a sequence of:
+  } else if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) {
+    warnIfNoMacro(IDLoc);
+
+    // For all other values which are representable as a 32-bit integer:
     // li d,j => lui d,hi16(j)
     //           ori d,d,lo16(j)
-    tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
-    Instructions.push_back(tmpInst);
-    createShiftOr<0, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+    uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
+    uint16_t Bits15To0 = ImmValue & 0xffff;
+
+    if (!Is32BitImm && !isInt<32>(ImmValue)) {
+      // For DLI, expand to an ORi instead of a LUi to avoid sign-extending the
+      // upper 32 bits.
+      tmpInst.setOpcode(Mips::ORi);
+      tmpInst.addOperand(MCOperand::createReg(DstReg));
+      tmpInst.addOperand(MCOperand::createReg(Mips::ZERO));
+      tmpInst.addOperand(MCOperand::createImm(Bits31To16));
+      tmpInst.setLoc(IDLoc);
+      Instructions.push_back(tmpInst);
+      // Move the value to the upper 16 bits by doing a 16-bit left shift.
+      createLShiftOri<16>(0, DstReg, IDLoc, Instructions);
+    } else {
+      tmpInst.setOpcode(Mips::LUi);
+      tmpInst.addOperand(MCOperand::createReg(DstReg));
+      tmpInst.addOperand(MCOperand::createImm(Bits31To16));
+      Instructions.push_back(tmpInst);
+    }
+    createLShiftOri<0>(Bits15To0, DstReg, IDLoc, Instructions);
+
+    if (UseSrcReg)
+      createAddu(DstReg, DstReg, SrcReg, Instructions);
+
   } else if ((ImmValue & (0xffffLL << 48)) == 0) {
-    if (!isGP64bit()) {
-      Error(IDLoc, "instruction requires a 64-bit architecture");
+    if (Is32BitImm) {
+      Error(IDLoc, "instruction requires a 32-bit immediate");
       return true;
     }
+    warnIfNoMacro(IDLoc);
 
     //            <-------  lo32 ------>
     // <-------  hi32 ------>
     // <- hi16 ->             <- lo16 ->
     //  _________________________________
     // |          |          |          |
-    // | 16-bytes | 16-bytes | 16-bytes |
+    // | 16-bits  | 16-bits  | 16-bits  |
     // |__________|__________|__________|
     //
-    // For any value of j that is representable as a 48-bit integer, create
-    // a sequence of:
+    // For any 64-bit value that is representable as a 48-bit integer:
     // li d,j => lui d,hi16(j)
     //           ori d,d,hi16(lo32(j))
     //           dsll d,d,16
     //           ori d,d,lo16(lo32(j))
+    uint16_t Bits47To32 = (ImmValue >> 32) & 0xffff;
+    uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
+    uint16_t Bits15To0 = ImmValue & 0xffff;
+
     tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(
-        MCOperand::CreateImm((ImmValue & (0xffffLL << 32)) >> 32));
+    tmpInst.addOperand(MCOperand::createReg(DstReg));
+    tmpInst.addOperand(MCOperand::createImm(Bits47To32));
     Instructions.push_back(tmpInst);
-    createShiftOr<16, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
-    createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+    createLShiftOri<0>(Bits31To16, DstReg, IDLoc, Instructions);
+    createLShiftOri<16>(Bits15To0, DstReg, IDLoc, Instructions);
+
+    if (UseSrcReg)
+      createAddu(DstReg, DstReg, SrcReg, Instructions);
+
   } else {
-    if (!isGP64bit()) {
-      Error(IDLoc, "instruction requires a 64-bit architecture");
+    if (Is32BitImm) {
+      Error(IDLoc, "instruction requires a 32-bit immediate");
       return true;
     }
+    warnIfNoMacro(IDLoc);
 
     // <-------  hi32 ------> <-------  lo32 ------>
     // <- hi16 ->                        <- lo16 ->
     //  ___________________________________________
     // |          |          |          |          |
-    // | 16-bytes | 16-bytes | 16-bytes | 16-bytes |
+    // | 16-bits  | 16-bits  | 16-bits  | 16-bits  |
     // |__________|__________|__________|__________|
     //
-    // For any value of j that isn't representable as a 48-bit integer.
+    // For all other values which are representable as a 64-bit integer:
     // li d,j => lui d,hi16(j)
     //           ori d,d,lo16(hi32(j))
     //           dsll d,d,16
     //           ori d,d,hi16(lo32(j))
     //           dsll d,d,16
     //           ori d,d,lo16(lo32(j))
+    uint16_t Bits63To48 = (ImmValue >> 48) & 0xffff;
+    uint16_t Bits47To32 = (ImmValue >> 32) & 0xffff;
+    uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
+    uint16_t Bits15To0 = ImmValue & 0xffff;
+
     tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(
-        MCOperand::CreateImm((ImmValue & (0xffffLL << 48)) >> 48));
+    tmpInst.addOperand(MCOperand::createReg(DstReg));
+    tmpInst.addOperand(MCOperand::createImm(Bits63To48));
     Instructions.push_back(tmpInst);
-    createShiftOr<32, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
-    createShiftOr<16, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
-    createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+    createLShiftOri<0>(Bits47To32, DstReg, IDLoc, Instructions);
+
+    // When Bits31To16 is 0, do a left shift of 32 bits instead of doing
+    // two left shifts of 16 bits.
+    if (Bits31To16 == 0) {
+      createLShiftOri<32>(Bits15To0, DstReg, IDLoc, Instructions);
+    } else {
+      createLShiftOri<16>(Bits31To16, DstReg, IDLoc, Instructions);
+      createLShiftOri<16>(Bits15To0, DstReg, IDLoc, Instructions);
+    }
+
+    if (UseSrcReg)
+      createAddu(DstReg, DstReg, SrcReg, Instructions);
   }
   return false;
 }
 
+bool MipsAsmParser::expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
+                                  SmallVectorImpl<MCInst> &Instructions) {
+  const MCOperand &ImmOp = Inst.getOperand(1);
+  assert(ImmOp.isImm() && "expected immediate operand kind");
+  const MCOperand &DstRegOp = Inst.getOperand(0);
+  assert(DstRegOp.isReg() && "expected register operand kind");
+
+  if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), Mips::NoRegister,
+                    Is32BitImm, IDLoc, Instructions))
+    return true;
+
+  return false;
+}
+
 bool
-MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
+MipsAsmParser::expandLoadAddressReg(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
                                     SmallVectorImpl<MCInst> &Instructions) {
-  MCInst tmpInst;
+  const MCOperand &DstRegOp = Inst.getOperand(0);
+  assert(DstRegOp.isReg() && "expected register operand kind");
+
   const MCOperand &ImmOp = Inst.getOperand(2);
   assert((ImmOp.isImm() || ImmOp.isExpr()) &&
          "expected immediate operand kind");
   if (!ImmOp.isImm()) {
-    expandLoadAddressSym(Inst, IDLoc, Instructions);
+    expandLoadAddressSym(DstRegOp, ImmOp, Is32BitImm, IDLoc, Instructions);
     return false;
   }
   const MCOperand &SrcRegOp = Inst.getOperand(1);
   assert(SrcRegOp.isReg() && "expected register operand kind");
-  const MCOperand &DstRegOp = Inst.getOperand(0);
-  assert(DstRegOp.isReg() && "expected register operand kind");
-  int ImmValue = ImmOp.getImm();
-  if (-32768 <= ImmValue && ImmValue <= 65535) {
-    // For -32768 <= j <= 65535.
-    // la d,j(s) => addiu d,s,j
-    tmpInst.setOpcode(Mips::ADDiu);
-    tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
-    Instructions.push_back(tmpInst);
-  } else {
-    // For any other value of j that is representable as a 32-bit integer.
-    // la d,j(s) => lui d,hi16(j)
-    //              ori d,d,lo16(j)
-    //              addu d,d,s
-    tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
-    Instructions.push_back(tmpInst);
-    tmpInst.clear();
-    tmpInst.setOpcode(Mips::ORi);
-    tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
-    Instructions.push_back(tmpInst);
-    tmpInst.clear();
-    tmpInst.setOpcode(Mips::ADDu);
-    tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
-    Instructions.push_back(tmpInst);
-  }
+
+  if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), SrcRegOp.getReg(),
+                    Is32BitImm, IDLoc, Instructions))
+    return true;
+
   return false;
 }
 
 bool
-MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
+MipsAsmParser::expandLoadAddressImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
                                     SmallVectorImpl<MCInst> &Instructions) {
-  MCInst tmpInst;
+  const MCOperand &DstRegOp = Inst.getOperand(0);
+  assert(DstRegOp.isReg() && "expected register operand kind");
+
   const MCOperand &ImmOp = Inst.getOperand(1);
   assert((ImmOp.isImm() || ImmOp.isExpr()) &&
          "expected immediate operand kind");
   if (!ImmOp.isImm()) {
-    expandLoadAddressSym(Inst, IDLoc, Instructions);
+    expandLoadAddressSym(DstRegOp, ImmOp, Is32BitImm, IDLoc, Instructions);
     return false;
   }
-  const MCOperand &RegOp = Inst.getOperand(0);
-  assert(RegOp.isReg() && "expected register operand kind");
-  int ImmValue = ImmOp.getImm();
-  if (-32768 <= ImmValue && ImmValue <= 65535) {
-    // For -32768 <= j <= 65535.
-    // la d,j => addiu d,$zero,j
-    tmpInst.setOpcode(Mips::ADDiu);
-    tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
-    tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
-    Instructions.push_back(tmpInst);
-  } else {
-    // For any other value of j that is representable as a 32-bit integer.
-    // la d,j => lui d,hi16(j)
-    //           ori d,d,lo16(j)
-    tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
-    Instructions.push_back(tmpInst);
-    tmpInst.clear();
-    tmpInst.setOpcode(Mips::ORi);
-    tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
-    Instructions.push_back(tmpInst);
-  }
+
+  if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), Mips::NoRegister,
+                    Is32BitImm, IDLoc, Instructions))
+    return true;
+
   return false;
 }
 
-void
-MipsAsmParser::expandLoadAddressSym(MCInst &Inst, SMLoc IDLoc,
-                                    SmallVectorImpl<MCInst> &Instructions) {
-  // FIXME: If we do have a valid at register to use, we should generate a
-  // slightly shorter sequence here.
+void MipsAsmParser::expandLoadAddressSym(
+    const MCOperand &DstRegOp, const MCOperand &SymOp, bool Is32BitSym,
+    SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+  warnIfNoMacro(IDLoc);
+
+  if (Is32BitSym && isABI_N64())
+    Warning(IDLoc, "instruction loads the 32-bit address of a 64-bit symbol");
+
   MCInst tmpInst;
-  int ExprOperandNo = 1;
-  // Sometimes the assembly parser will get the immediate expression as
-  // a $zero + an immediate.
-  if (Inst.getNumOperands() == 3) {
-    assert(Inst.getOperand(1).getReg() ==
-           (isGP64bit() ? Mips::ZERO_64 : Mips::ZERO));
-    ExprOperandNo = 2;
-  }
-  const MCOperand &SymOp = Inst.getOperand(ExprOperandNo);
-  assert(SymOp.isExpr() && "expected symbol operand kind");
-  const MCOperand &RegOp = Inst.getOperand(0);
-  unsigned RegNo = RegOp.getReg();
+  unsigned RegNo = DstRegOp.getReg();
   const MCSymbolRefExpr *Symbol = cast<MCSymbolRefExpr>(SymOp.getExpr());
   const MCSymbolRefExpr *HiExpr =
       MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
@@ -1681,7 +1951,7 @@ MipsAsmParser::expandLoadAddressSym(MCInst &Inst, SMLoc IDLoc,
   const MCSymbolRefExpr *LoExpr =
       MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
                               MCSymbolRefExpr::VK_Mips_ABS_LO, getContext());
-  if (isGP64bit()) {
+  if (!Is32BitSym) {
     // If it's a 64-bit architecture, expand to:
     // la d,sym => lui  d,highest(sym)
     //             ori  d,d,higher(sym)
@@ -1697,28 +1967,67 @@ MipsAsmParser::expandLoadAddressSym(MCInst &Inst, SMLoc IDLoc,
                                 MCSymbolRefExpr::VK_Mips_HIGHER, getContext());
 
     tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::CreateReg(RegNo));
-    tmpInst.addOperand(MCOperand::CreateExpr(HighestExpr));
+    tmpInst.addOperand(MCOperand::createReg(RegNo));
+    tmpInst.addOperand(MCOperand::createExpr(HighestExpr));
     Instructions.push_back(tmpInst);
 
-    createShiftOr<false>(MCOperand::CreateExpr(HigherExpr), RegNo, SMLoc(),
-                         Instructions);
-    createShiftOr<true>(MCOperand::CreateExpr(HiExpr), RegNo, SMLoc(),
+    createLShiftOri<0>(MCOperand::createExpr(HigherExpr), RegNo, SMLoc(),
+                       Instructions);
+    createLShiftOri<16>(MCOperand::createExpr(HiExpr), RegNo, SMLoc(),
                         Instructions);
-    createShiftOr<true>(MCOperand::CreateExpr(LoExpr), RegNo, SMLoc(),
+    createLShiftOri<16>(MCOperand::createExpr(LoExpr), RegNo, SMLoc(),
                         Instructions);
   } else {
     // Otherwise, expand to:
     // la d,sym => lui  d,hi16(sym)
     //             ori  d,d,lo16(sym)
     tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::CreateReg(RegNo));
-    tmpInst.addOperand(MCOperand::CreateExpr(HiExpr));
+    tmpInst.addOperand(MCOperand::createReg(RegNo));
+    tmpInst.addOperand(MCOperand::createExpr(HiExpr));
     Instructions.push_back(tmpInst);
 
-    createShiftOr<false>(MCOperand::CreateExpr(LoExpr), RegNo, SMLoc(),
-                         Instructions);
+    createLShiftOri<0>(MCOperand::createExpr(LoExpr), RegNo, SMLoc(),
+                       Instructions);
+  }
+}
+
+bool MipsAsmParser::expandUncondBranchMMPseudo(
+    MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+  assert(getInstDesc(Inst.getOpcode()).getNumOperands() == 1 &&
+         "unexpected number of operands");
+
+  MCOperand Offset = Inst.getOperand(0);
+  if (Offset.isExpr()) {
+    Inst.clear();
+    Inst.setOpcode(Mips::BEQ_MM);
+    Inst.addOperand(MCOperand::createReg(Mips::ZERO));
+    Inst.addOperand(MCOperand::createReg(Mips::ZERO));
+    Inst.addOperand(MCOperand::createExpr(Offset.getExpr()));
+  } else {
+    assert(Offset.isImm() && "expected immediate operand kind");
+    if (isIntN(11, Offset.getImm())) {
+      // If offset fits into 11 bits then this instruction becomes microMIPS
+      // 16-bit unconditional branch instruction.
+      Inst.setOpcode(Mips::B16_MM);
+    } else {
+      if (!isIntN(17, Offset.getImm()))
+        Error(IDLoc, "branch target out of range");
+      if (OffsetToAlignment(Offset.getImm(), 1LL << 1))
+        Error(IDLoc, "branch to misaligned address");
+      Inst.clear();
+      Inst.setOpcode(Mips::BEQ_MM);
+      Inst.addOperand(MCOperand::createReg(Mips::ZERO));
+      Inst.addOperand(MCOperand::createReg(Mips::ZERO));
+      Inst.addOperand(MCOperand::createImm(Offset.getImm()));
+    }
   }
+  Instructions.push_back(Inst);
+
+  // If .set reorder is active, emit a NOP after the branch instruction.
+  if (AssemblerOptions.back()->isReorder())
+    createNop(true, IDLoc, Instructions);
+
+  return false;
 }
 
 void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
@@ -1779,29 +2088,27 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   if (isLoad && IsGPR && (BaseRegNum != RegOpNum))
     TmpRegNum = RegOpNum;
   else {
-    int AT = getATReg(IDLoc);
     // At this point we need AT to perform the expansions and we exit if it is
     // not available.
-    if (!AT)
+    TmpRegNum = getATReg(IDLoc);
+    if (!TmpRegNum)
       return;
-    TmpRegNum = getReg(
-        (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, AT);
   }
 
   TempInst.setOpcode(Mips::LUi);
-  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+  TempInst.addOperand(MCOperand::createReg(TmpRegNum));
   if (isImmOpnd)
-    TempInst.addOperand(MCOperand::CreateImm(HiOffset));
+    TempInst.addOperand(MCOperand::createImm(HiOffset));
   else {
     if (ExprOffset->getKind() == MCExpr::SymbolRef) {
       SR = static_cast<const MCSymbolRefExpr *>(ExprOffset);
       const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create(
           SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI,
           getContext());
-      TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
+      TempInst.addOperand(MCOperand::createExpr(HiExpr));
     } else {
       const MCExpr *HiExpr = evaluateRelocExpr(ExprOffset, "hi");
-      TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
+      TempInst.addOperand(MCOperand::createExpr(HiExpr));
     }
   }
   // Add the instruction to the list.
@@ -1809,34 +2116,86 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   // Prepare TempInst for next instruction.
   TempInst.clear();
   // Add temp register to base.
-  TempInst.setOpcode(Mips::ADDu);
-  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
-  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
-  TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
-  Instructions.push_back(TempInst);
-  TempInst.clear();
+  if (BaseRegNum != Mips::ZERO) {
+    TempInst.setOpcode(Mips::ADDu);
+    TempInst.addOperand(MCOperand::createReg(TmpRegNum));
+    TempInst.addOperand(MCOperand::createReg(TmpRegNum));
+    TempInst.addOperand(MCOperand::createReg(BaseRegNum));
+    Instructions.push_back(TempInst);
+    TempInst.clear();
+  }
   // And finally, create original instruction with low part
   // of offset and new base.
   TempInst.setOpcode(Inst.getOpcode());
-  TempInst.addOperand(MCOperand::CreateReg(RegOpNum));
-  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+  TempInst.addOperand(MCOperand::createReg(RegOpNum));
+  TempInst.addOperand(MCOperand::createReg(TmpRegNum));
   if (isImmOpnd)
-    TempInst.addOperand(MCOperand::CreateImm(LoOffset));
+    TempInst.addOperand(MCOperand::createImm(LoOffset));
   else {
     if (ExprOffset->getKind() == MCExpr::SymbolRef) {
       const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create(
           SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_LO,
           getContext());
-      TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
+      TempInst.addOperand(MCOperand::createExpr(LoExpr));
     } else {
       const MCExpr *LoExpr = evaluateRelocExpr(ExprOffset, "lo");
-      TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
+      TempInst.addOperand(MCOperand::createExpr(LoExpr));
     }
   }
   Instructions.push_back(TempInst);
   TempInst.clear();
 }
 
+bool
+MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
+                                       SmallVectorImpl<MCInst> &Instructions) {
+  unsigned OpNum = Inst.getNumOperands();
+  unsigned Opcode = Inst.getOpcode();
+  unsigned NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM32_MM : Mips::LWM32_MM;
+
+  assert (Inst.getOperand(OpNum - 1).isImm() &&
+          Inst.getOperand(OpNum - 2).isReg() &&
+          Inst.getOperand(OpNum - 3).isReg() && "Invalid instruction operand.");
+
+  if (OpNum < 8 && Inst.getOperand(OpNum - 1).getImm() <= 60 &&
+      Inst.getOperand(OpNum - 1).getImm() >= 0 &&
+      Inst.getOperand(OpNum - 2).getReg() == Mips::SP &&
+      Inst.getOperand(OpNum - 3).getReg() == Mips::RA)
+    // It can be implemented as SWM16 or LWM16 instruction.
+    NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM16_MM : Mips::LWM16_MM;
+
+  Inst.setOpcode(NewOpcode);
+  Instructions.push_back(Inst);
+  return false;
+}
+
+void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc,
+                              SmallVectorImpl<MCInst> &Instructions) {
+  MCInst NopInst;
+  if (hasShortDelaySlot) {
+    NopInst.setOpcode(Mips::MOVE16_MM);
+    NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
+    NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
+  } else {
+    NopInst.setOpcode(Mips::SLL);
+    NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
+    NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
+    NopInst.addOperand(MCOperand::createImm(0));
+  }
+  Instructions.push_back(NopInst);
+}
+
+void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg,
+                               unsigned TrgReg,
+                               SmallVectorImpl<MCInst> &Instructions) {
+  MCInst AdduInst;
+  AdduInst.setOpcode(Mips::ADDu);
+  AdduInst.addOperand(MCOperand::createReg(DstReg));
+  AdduInst.addOperand(MCOperand::createReg(SrcReg));
+  AdduInst.addOperand(MCOperand::createReg(TrgReg));
+  Instructions.push_back(AdduInst);
+}
+
 unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
   // As described by the Mips32r2 spec, the registers Rd and Rs for
   // jalr.hb must be different.
@@ -1893,15 +2252,15 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   llvm_unreachable("Implement any new match types added!");
 }
 
-void MipsAsmParser::warnIfAssemblerTemporary(int RegIndex, SMLoc Loc) {
-  if ((RegIndex != 0) && 
-      ((int)AssemblerOptions.back()->getATRegNum() == RegIndex)) {
-    if (RegIndex == 1)
-      Warning(Loc, "used $at without \".set noat\"");
-    else
-      Warning(Loc, Twine("used $") + Twine(RegIndex) + " with \".set at=$" +
-                       Twine(RegIndex) + "\"");
-  }
+void MipsAsmParser::warnIfRegIndexIsAT(unsigned RegIndex, SMLoc Loc) {
+  if (RegIndex != 0 && AssemblerOptions.back()->getATRegIndex() == RegIndex)
+    Warning(Loc, "used $at (currently $" + Twine(RegIndex) +
+                     ") without \".set noat\"");
+}
+
+void MipsAsmParser::warnIfNoMacro(SMLoc Loc) {
+  if (!AssemblerOptions.back()->isMacro())
+    Warning(Loc, "macro instruction expanded into multiple instructions");
 }
 
 void
@@ -2075,19 +2434,15 @@ int MipsAsmParser::matchMSA128CtrlRegisterName(StringRef Name) {
   return CC;
 }
 
-bool MipsAssemblerOptions::setATReg(unsigned Reg) {
-  if (Reg > 31)
-    return false;
-
-  ATReg = Reg;
-  return true;
-}
-
-int MipsAsmParser::getATReg(SMLoc Loc) {
-  int AT = AssemblerOptions.back()->getATRegNum();
-  if (AT == 0)
+unsigned MipsAsmParser::getATReg(SMLoc Loc) {
+  unsigned ATIndex = AssemblerOptions.back()->getATRegIndex();
+  if (ATIndex == 0) {
     reportParseError(Loc,
                      "pseudo-instruction requires $at, which is not available");
+    return 0;
+  }
+  unsigned AT = getReg(
+      (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, ATIndex);
   return AT;
 }
 
@@ -2147,7 +2502,7 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
       return true;
 
     SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
-    MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier);
+    MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier);
     // Otherwise create a symbol reference.
     const MCExpr *Res =
         MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
@@ -2271,7 +2626,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
   if (Tok.isNot(AsmToken::Identifier))
     return true;
 
-  std::string Str = Tok.getIdentifier().str();
+  std::string Str = Tok.getIdentifier();
 
   Parser.Lex(); // Eat the identifier.
   // Now make an expression from the rest of the operand.
@@ -2448,7 +2803,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
 
 bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
-  MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
+  MCSymbol *Sym = getContext().lookupSymbol(Parser.getTok().getIdentifier());
   if (Sym) {
     SMLoc S = Parser.getTok().getLoc();
     const MCExpr *Expr;
@@ -2791,6 +3146,45 @@ MipsAsmParser::parseRegisterPair(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseMovePRegPair(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> TmpOperands;
+  SmallVector<unsigned, 10> Regs;
+
+  if (Parser.getTok().isNot(AsmToken::Dollar))
+    return MatchOperand_ParseFail;
+
+  SMLoc S = Parser.getTok().getLoc();
+
+  if (parseAnyRegister(TmpOperands) != MatchOperand_Success)
+    return MatchOperand_ParseFail;
+
+  MipsOperand *Reg = &static_cast<MipsOperand &>(*TmpOperands.back());
+  unsigned RegNo = isGP64bit() ? Reg->getGPR64Reg() : Reg->getGPR32Reg();
+  Regs.push_back(RegNo);
+
+  SMLoc E = Parser.getTok().getLoc();
+  if (Parser.getTok().isNot(AsmToken::Comma)) {
+    Error(E, "',' expected");
+    return MatchOperand_ParseFail;
+  }
+
+  // Remove comma.
+  Parser.Lex();
+
+  if (parseAnyRegister(TmpOperands) != MatchOperand_Success)
+    return MatchOperand_ParseFail;
+
+  Reg = &static_cast<MipsOperand &>(*TmpOperands.back());
+  RegNo = isGP64bit() ? Reg->getGPR64Reg() : Reg->getGPR32Reg();
+  Regs.push_back(RegNo);
+
+  Operands.push_back(MipsOperand::CreateRegList(Regs, S, E, *this));
+
+  return MatchOperand_Success;
+}
+
 MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
 
   MCSymbolRefExpr::VariantKind VK =
@@ -2953,67 +3347,84 @@ bool MipsAsmParser::reportParseError(SMLoc Loc, Twine ErrorMsg) {
 bool MipsAsmParser::parseSetNoAtDirective() {
   MCAsmParser &Parser = getParser();
   // Line should look like: ".set noat".
-  // set at reg to 0.
-  AssemblerOptions.back()->setATReg(0);
-  // eat noat
-  Parser.Lex();
+
+  // Set the $at register to $0.
+  AssemblerOptions.back()->setATRegIndex(0);
+
+  Parser.Lex(); // Eat "noat".
+
   // If this is not the end of the statement, report an error.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("unexpected token, expected end of statement");
     return false;
   }
+
+  getTargetStreamer().emitDirectiveSetNoAt();
   Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
 bool MipsAsmParser::parseSetAtDirective() {
+  // Line can be: ".set at", which sets $at to $1
+  //          or  ".set at=$reg", which sets $at to $reg.
   MCAsmParser &Parser = getParser();
-  // Line can be .set at - defaults to $1
-  // or .set at=$reg
-  int AtRegNo;
-  getParser().Lex();
+  Parser.Lex(); // Eat "at".
+
   if (getLexer().is(AsmToken::EndOfStatement)) {
-    AssemblerOptions.back()->setATReg(1);
+    // No register was specified, so we set $at to $1.
+    AssemblerOptions.back()->setATRegIndex(1);
+
+    getTargetStreamer().emitDirectiveSetAt();
     Parser.Lex(); // Consume the EndOfStatement.
     return false;
-  } else if (getLexer().is(AsmToken::Equal)) {
-    getParser().Lex(); // Eat the '='.
-    if (getLexer().isNot(AsmToken::Dollar)) {
-      reportParseError("unexpected token, expected dollar sign '$'");
+  }
+
+  if (getLexer().isNot(AsmToken::Equal)) {
+    reportParseError("unexpected token, expected equals sign");
+    return false;
+  }
+  Parser.Lex(); // Eat "=".
+
+  if (getLexer().isNot(AsmToken::Dollar)) {
+    if (getLexer().is(AsmToken::EndOfStatement)) {
+      reportParseError("no register specified");
       return false;
-    }
-    Parser.Lex(); // Eat the '$'.
-    const AsmToken &Reg = Parser.getTok();
-    if (Reg.is(AsmToken::Identifier)) {
-      AtRegNo = matchCPURegisterName(Reg.getIdentifier());
-    } else if (Reg.is(AsmToken::Integer)) {
-      AtRegNo = Reg.getIntVal();
     } else {
-      reportParseError("unexpected token, expected identifier or integer");
-      return false;
-    }
-
-    if (AtRegNo < 0 || AtRegNo > 31) {
-      reportParseError("unexpected token in statement");
+      reportParseError("unexpected token, expected dollar sign '$'");
       return false;
     }
+  }
+  Parser.Lex(); // Eat "$".
 
-    if (!AssemblerOptions.back()->setATReg(AtRegNo)) {
-      reportParseError("invalid register");
-      return false;
-    }
-    getParser().Lex(); // Eat the register.
+  // Find out what "reg" is.
+  unsigned AtRegNo;
+  const AsmToken &Reg = Parser.getTok();
+  if (Reg.is(AsmToken::Identifier)) {
+    AtRegNo = matchCPURegisterName(Reg.getIdentifier());
+  } else if (Reg.is(AsmToken::Integer)) {
+    AtRegNo = Reg.getIntVal();
+  } else {
+    reportParseError("unexpected token, expected identifier or integer");
+    return false;
+  }
 
-    if (getLexer().isNot(AsmToken::EndOfStatement)) {
-      reportParseError("unexpected token, expected end of statement");
-      return false;
-    }
-    Parser.Lex(); // Consume the EndOfStatement.
+  // Check if $reg is a valid register. If it is, set $at to $reg.
+  if (!AssemblerOptions.back()->setATRegIndex(AtRegNo)) {
+    reportParseError("invalid register");
     return false;
-  } else {
-    reportParseError("unexpected token in statement");
+  }
+  Parser.Lex(); // Eat "reg".
+
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
     return false;
   }
+
+  getTargetStreamer().emitDirectiveSetAtWithArg(AtRegNo);
+
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
 }
 
 bool MipsAsmParser::parseSetReorderDirective() {
@@ -3053,6 +3464,7 @@ bool MipsAsmParser::parseSetMacroDirective() {
     return false;
   }
   AssemblerOptions.back()->setMacro();
+  getTargetStreamer().emitDirectiveSetMacro();
   Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
@@ -3070,6 +3482,7 @@ bool MipsAsmParser::parseSetNoMacroDirective() {
     return false;
   }
   AssemblerOptions.back()->setNoMacro();
+  getTargetStreamer().emitDirectiveSetNoMacro();
   Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
@@ -3223,11 +3636,7 @@ bool MipsAsmParser::parseSetAssignment() {
   if (Parser.parseExpression(Value))
     return reportParseError("expected valid expression after comma");
 
-  // Check if the Name already exists as a symbol.
-  MCSymbol *Sym = getContext().LookupSymbol(Name);
-  if (Sym)
-    return reportParseError("symbol already defined");
-  Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
   Sym->setVariableValue(Value);
 
   return false;
@@ -3267,9 +3676,13 @@ bool MipsAsmParser::parseSetArchDirective() {
           .Case("mips5", "mips5")
           .Case("mips32", "mips32")
           .Case("mips32r2", "mips32r2")
+          .Case("mips32r3", "mips32r3")
+          .Case("mips32r5", "mips32r5")
           .Case("mips32r6", "mips32r6")
           .Case("mips64", "mips64")
           .Case("mips64r2", "mips64r2")
+          .Case("mips64r3", "mips64r3")
+          .Case("mips64r5", "mips64r5")
           .Case("mips64r6", "mips64r6")
           .Case("cnmips", "cnmips")
           .Case("r4000", "mips3") // This is an implementation of Mips3.
@@ -3327,6 +3740,14 @@ bool MipsAsmParser::parseSetFeature(uint64_t Feature) {
     selectArch("mips32r2");
     getTargetStreamer().emitDirectiveSetMips32R2();
     break;
+  case Mips::FeatureMips32r3:
+    selectArch("mips32r3");
+    getTargetStreamer().emitDirectiveSetMips32R3();
+    break;
+  case Mips::FeatureMips32r5:
+    selectArch("mips32r5");
+    getTargetStreamer().emitDirectiveSetMips32R5();
+    break;
   case Mips::FeatureMips32r6:
     selectArch("mips32r6");
     getTargetStreamer().emitDirectiveSetMips32R6();
@@ -3339,6 +3760,14 @@ bool MipsAsmParser::parseSetFeature(uint64_t Feature) {
     selectArch("mips64r2");
     getTargetStreamer().emitDirectiveSetMips64R2();
     break;
+  case Mips::FeatureMips64r3:
+    selectArch("mips64r3");
+    getTargetStreamer().emitDirectiveSetMips64R3();
+    break;
+  case Mips::FeatureMips64r5:
+    selectArch("mips64r5");
+    getTargetStreamer().emitDirectiveSetMips64R5();
+    break;
   case Mips::FeatureMips64r6:
     selectArch("mips64r6");
     getTargetStreamer().emitDirectiveSetMips64R6();
@@ -3443,12 +3872,20 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
   if (!eatComma("unexpected token, expected comma"))
     return true;
 
-  StringRef Name;
-  if (Parser.parseIdentifier(Name))
-    reportParseError("expected identifier");
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  const MCExpr *Expr;
+  if (Parser.parseExpression(Expr)) {
+    reportParseError("expected expression");
+    return false;
+  }
 
-  getTargetStreamer().emitDirectiveCpsetup(FuncReg, Save, *Sym, SaveIsReg);
+  if (Expr->getKind() != MCExpr::SymbolRef) {
+    reportParseError("expected symbol");
+    return false;
+  }
+  const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
+
+  getTargetStreamer().emitDirectiveCpsetup(FuncReg, Save, Ref->getSymbol(),
+                                           SaveIsReg);
   return false;
 }
 
@@ -3524,12 +3961,20 @@ bool MipsAsmParser::parseDirectiveSet() {
     return parseSetFeature(Mips::FeatureMips32);
   } else if (Tok.getString() == "mips32r2") {
     return parseSetFeature(Mips::FeatureMips32r2);
+  } else if (Tok.getString() == "mips32r3") {
+    return parseSetFeature(Mips::FeatureMips32r3);
+  } else if (Tok.getString() == "mips32r5") {
+    return parseSetFeature(Mips::FeatureMips32r5);
   } else if (Tok.getString() == "mips32r6") {
     return parseSetFeature(Mips::FeatureMips32r6);
   } else if (Tok.getString() == "mips64") {
     return parseSetFeature(Mips::FeatureMips64);
   } else if (Tok.getString() == "mips64r2") {
     return parseSetFeature(Mips::FeatureMips64r2);
+  } else if (Tok.getString() == "mips64r3") {
+    return parseSetFeature(Mips::FeatureMips64r3);
+  } else if (Tok.getString() == "mips64r5") {
+    return parseSetFeature(Mips::FeatureMips64r5);
   } else if (Tok.getString() == "mips64r6") {
     return parseSetFeature(Mips::FeatureMips64r6);
   } else if (Tok.getString() == "dsp") {
@@ -3652,6 +4097,23 @@ bool MipsAsmParser::parseDirectiveOption() {
   return false;
 }
 
+/// parseInsnDirective
+///  ::= .insn
+bool MipsAsmParser::parseInsnDirective() {
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+
+  // The actual label marking happens in
+  // MipsELFStreamer::createPendingLabelRelocs().
+  getTargetStreamer().emitDirectiveInsn();
+
+  getParser().Lex(); // Eat EndOfStatement token.
+  return false;
+}
+
 /// parseDirectiveModule
 ///  ::= .module oddspreg
 ///  ::= .module nooddspreg
@@ -3836,7 +4298,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
       return false;
     }
 
-    MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+    MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName);
 
     getTargetStreamer().emitDirectiveEnt(*Sym);
     CurrentFn = Sym;
@@ -4042,9 +4504,28 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
   if (IDVal == ".module")
     return parseDirectiveModule();
 
+  if (IDVal == ".llvm_internal_mips_reallow_module_directive")
+    return parseInternalDirectiveReallowModule();
+
+  if (IDVal == ".insn")
+    return parseInsnDirective();
+
   return true;
 }
 
+bool MipsAsmParser::parseInternalDirectiveReallowModule() {
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+
+  getTargetStreamer().reallowModuleDirective();
+
+  getParser().Lex(); // Eat EndOfStatement token.
+  return false;
+}
+
 extern "C" void LLVMInitializeMipsAsmParser() {
   RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget);
   RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget);
diff --git a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index d42b948..eb97c93 100644
--- a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -30,41 +30,22 @@ typedef MCDisassembler::DecodeStatus DecodeStatus;
 
 namespace {
 
-/// A disasembler class for Mips.
-class MipsDisassemblerBase : public MCDisassembler {
+class MipsDisassembler : public MCDisassembler {
+  bool IsMicroMips;
+  bool IsBigEndian;
 public:
-  MipsDisassemblerBase(const MCSubtargetInfo &STI, MCContext &Ctx,
-                       bool IsBigEndian)
+  MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, bool IsBigEndian)
       : MCDisassembler(STI, Ctx),
-        IsGP64Bit(STI.getFeatureBits() & Mips::FeatureGP64Bit),
+        IsMicroMips(STI.getFeatureBits()[Mips::FeatureMicroMips]),
         IsBigEndian(IsBigEndian) {}
 
-  virtual ~MipsDisassemblerBase() {}
-
-  bool isGP64Bit() const { return IsGP64Bit; }
-
-private:
-  bool IsGP64Bit;
-protected:
-  bool IsBigEndian;
-};
-
-/// A disasembler class for Mips32.
-class MipsDisassembler : public MipsDisassemblerBase {
-  bool IsMicroMips;
-public:
-  MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, bool bigEndian)
-      : MipsDisassemblerBase(STI, Ctx, bigEndian) {
-    IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips;
-  }
-
-  bool hasMips3() const { return STI.getFeatureBits() & Mips::FeatureMips3; }
-  bool hasMips32() const { return STI.getFeatureBits() & Mips::FeatureMips32; }
+  bool hasMips3() const { return STI.getFeatureBits()[Mips::FeatureMips3]; }
+  bool hasMips32() const { return STI.getFeatureBits()[Mips::FeatureMips32]; }
   bool hasMips32r6() const {
-    return STI.getFeatureBits() & Mips::FeatureMips32r6;
+    return STI.getFeatureBits()[Mips::FeatureMips32r6];
   }
 
-  bool isGP64() const { return STI.getFeatureBits() & Mips::FeatureGP64Bit; }
+  bool isGP64() const { return STI.getFeatureBits()[Mips::FeatureGP64Bit]; }
 
   bool hasCOP3() const {
     // Only present in MIPS-I and MIPS-II
@@ -77,19 +58,6 @@ public:
                               raw_ostream &CStream) const override;
 };
 
-/// A disasembler class for Mips64.
-class Mips64Disassembler : public MipsDisassemblerBase {
-public:
-  Mips64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
-                     bool bigEndian) :
-    MipsDisassemblerBase(STI, Ctx, bigEndian) {}
-
-  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
-                              ArrayRef<uint8_t> Bytes, uint64_t Address,
-                              raw_ostream &VStream,
-                              raw_ostream &CStream) const override;
-};
-
 } // end anonymous namespace
 
 // Forward declare these because the autogenerated code will reference them.
@@ -114,6 +82,11 @@ static DecodeStatus DecodeGPRMM16ZeroRegisterClass(MCInst &Inst,
                                                    uint64_t Address,
                                                    const void *Decoder);
 
+static DecodeStatus DecodeGPRMM16MovePRegisterClass(MCInst &Inst,
+                                                    unsigned RegNo,
+                                                    uint64_t Address,
+                                                    const void *Decoder);
+
 static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
                                              unsigned RegNo,
                                              uint64_t Address,
@@ -235,6 +208,13 @@ static DecodeStatus DecodeBranchTarget7MM(MCInst &Inst,
                                           uint64_t Address,
                                           const void *Decoder);
 
+// DecodeBranchTarget10MM - Decode microMIPS branch offset, which is
+// shifted left by 1 bit.
+static DecodeStatus DecodeBranchTarget10MM(MCInst &Inst,
+                                           unsigned Offset,
+                                           uint64_t Address,
+                                           const void *Decoder);
+
 // DecodeBranchTargetMM - Decode microMIPS branch offset, which is
 // shifted left by 1 bit.
 static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
@@ -287,6 +267,16 @@ static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst,
                                           uint64_t Address,
                                           const void *Decoder);
 
+static DecodeStatus DecodeMemMMGPImm7Lsl2(MCInst &Inst,
+                                          unsigned Insn,
+                                          uint64_t Address,
+                                          const void *Decoder);
+
+static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
+                                               unsigned Insn,
+                                               uint64_t Address,
+                                               const void *Decoder);
+
 static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
                                      unsigned Insn,
                                      uint64_t Address,
@@ -375,6 +365,9 @@ static DecodeStatus DecodeANDI16Imm(MCInst &Inst, unsigned Insn,
 static DecodeStatus DecodeUImm5lsl2(MCInst &Inst, unsigned Insn,
                                    uint64_t Address, const void *Decoder);
 
+static DecodeStatus DecodeSimm23Lsl2(MCInst &Inst, unsigned Insn,
+                                     uint64_t Address, const void *Decoder);
+
 /// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't
 /// handle.
 template <typename InsnType>
@@ -419,6 +412,10 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
                                            uint64_t Address,
                                            const void *Decoder);
 
+static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn,
+                                       uint64_t Address,
+                                       const void *Decoder);
+
 namespace llvm {
 extern Target TheMipselTarget, TheMipsTarget, TheMips64Target,
               TheMips64elTarget;
@@ -438,20 +435,6 @@ static MCDisassembler *createMipselDisassembler(
   return new MipsDisassembler(STI, Ctx, false);
 }
 
-static MCDisassembler *createMips64Disassembler(
-                       const Target &T,
-                       const MCSubtargetInfo &STI,
-                       MCContext &Ctx) {
-  return new Mips64Disassembler(STI, Ctx, true);
-}
-
-static MCDisassembler *createMips64elDisassembler(
-                       const Target &T,
-                       const MCSubtargetInfo &STI,
-                       MCContext &Ctx) {
-  return new Mips64Disassembler(STI, Ctx, false);
-}
-
 extern "C" void LLVMInitializeMipsDisassembler() {
   // Register the disassembler.
   TargetRegistry::RegisterMCDisassembler(TheMipsTarget,
@@ -459,15 +442,15 @@ extern "C" void LLVMInitializeMipsDisassembler() {
   TargetRegistry::RegisterMCDisassembler(TheMipselTarget,
                                          createMipselDisassembler);
   TargetRegistry::RegisterMCDisassembler(TheMips64Target,
-                                         createMips64Disassembler);
+                                         createMipsDisassembler);
   TargetRegistry::RegisterMCDisassembler(TheMips64elTarget,
-                                         createMips64elDisassembler);
+                                         createMipselDisassembler);
 }
 
 #include "MipsGenDisassemblerTables.inc"
 
 static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
-  const MipsDisassemblerBase *Dis = static_cast<const MipsDisassemblerBase*>(D);
+  const MipsDisassembler *Dis = static_cast<const MipsDisassembler*>(D);
   const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo();
   return *(RegInfo->getRegClass(RC).begin() + RegNo);
 }
@@ -507,13 +490,13 @@ static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address,
     return MCDisassembler::Fail;
   // $n
   tmp = fieldFromInstruction(insn, 16, NSize);
-  MI.addOperand(MCOperand::CreateImm(tmp));
+  MI.addOperand(MCOperand::createImm(tmp));
   // $ws
   tmp = fieldFromInstruction(insn, 11, 5);
   if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail)
     return MCDisassembler::Fail;
   // $n2
-  MI.addOperand(MCOperand::CreateImm(0));
+  MI.addOperand(MCOperand::createImm(0));
 
   return MCDisassembler::Success;
 }
@@ -547,12 +530,12 @@ static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn,
     MI.setOpcode(Mips::BEQZALC);
 
   if (HasRs)
-    MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+    MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rs)));
 
-  MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+  MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                      Rt)));
-  MI.addOperand(MCOperand::CreateImm(Imm));
+  MI.addOperand(MCOperand::createImm(Imm));
 
   return MCDisassembler::Success;
 }
@@ -586,12 +569,12 @@ static DecodeStatus DecodeDaddiGroupBranch(MCInst &MI, InsnType insn,
     MI.setOpcode(Mips::BNEZALC);
 
   if (HasRs)
-    MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+    MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rs)));
 
-  MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+  MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                      Rt)));
-  MI.addOperand(MCOperand::CreateImm(Imm));
+  MI.addOperand(MCOperand::createImm(Imm));
 
   return MCDisassembler::Success;
 }
@@ -628,13 +611,13 @@ static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn,
   }
 
   if (HasRs)
-    MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+    MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rs)));
 
-  MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+  MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                      Rt)));
 
-  MI.addOperand(MCOperand::CreateImm(Imm));
+  MI.addOperand(MCOperand::createImm(Imm));
 
   return MCDisassembler::Success;
 }
@@ -672,13 +655,13 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
   }
 
   if (HasRs)
-    MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+    MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                               Rs)));
 
-  MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+  MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                      Rt)));
 
-  MI.addOperand(MCOperand::CreateImm(Imm));
+  MI.addOperand(MCOperand::createImm(Imm));
 
   return MCDisassembler::Success;
 }
@@ -720,14 +703,14 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,
   }
 
   if (HasRs)
-    MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+    MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rs)));
 
   if (HasRt)
-    MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+    MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rt)));
 
-  MI.addOperand(MCOperand::CreateImm(Imm));
+  MI.addOperand(MCOperand::createImm(Imm));
 
   return MCDisassembler::Success;
 }
@@ -764,12 +747,12 @@ static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn,
   }
 
   if (HasRs)
-    MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+    MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rs)));
-  MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+  MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                      Rt)));
 
-  MI.addOperand(MCOperand::CreateImm(Imm));
+  MI.addOperand(MCOperand::createImm(Imm));
 
   return MCDisassembler::Success;
 }
@@ -854,10 +837,17 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
     if (Result == MCDisassembler::Fail)
       return MCDisassembler::Fail;
 
-    DEBUG(dbgs() << "Trying MicroMips32 table (32-bit instructions):\n");
-    // Calling the auto-generated decoder function.
-    Result = decodeInstruction(DecoderTableMicroMips32, Instr, Insn, Address,
-                               this, STI);
+    if (hasMips32r6()) {
+      DEBUG(dbgs() << "Trying MicroMips32r632 table (32-bit instructions):\n");
+      // Calling the auto-generated decoder function.
+      Result = decodeInstruction(DecoderTableMicroMips32r632, Instr, Insn, Address,
+                                 this, STI);
+    } else {
+      DEBUG(dbgs() << "Trying MicroMips32 table (32-bit instructions):\n");
+      // Calling the auto-generated decoder function.
+      Result = decodeInstruction(DecoderTableMicroMips32, Instr, Insn, Address,
+                                 this, STI);
+    }
     if (Result != MCDisassembler::Fail) {
       Size = 4;
       return Result;
@@ -899,39 +889,19 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
     }
   }
 
-  DEBUG(dbgs() << "Trying Mips table (32-bit opcodes):\n");
-  // Calling the auto-generated decoder function.
-  Result =
-      decodeInstruction(DecoderTableMips32, Instr, Insn, Address, this, STI);
-  if (Result != MCDisassembler::Fail) {
-    Size = 4;
-    return Result;
+  if (isGP64()) {
+    DEBUG(dbgs() << "Trying Mips64 (GPR64) table (32-bit opcodes):\n");
+    Result = decodeInstruction(DecoderTableMips6432, Instr, Insn,
+                               Address, this, STI);
+    if (Result != MCDisassembler::Fail) {
+      Size = 4;
+      return Result;
+    }
   }
 
-  return MCDisassembler::Fail;
-}
-
-DecodeStatus Mips64Disassembler::getInstruction(MCInst &Instr, uint64_t &Size,
-                                                ArrayRef<uint8_t> Bytes,
-                                                uint64_t Address,
-                                                raw_ostream &VStream,
-                                                raw_ostream &CStream) const {
-  uint32_t Insn;
-
-  DecodeStatus Result =
-      readInstruction32(Bytes, Address, Size, Insn, IsBigEndian, false);
-  if (Result == MCDisassembler::Fail)
-    return MCDisassembler::Fail;
-
+  DEBUG(dbgs() << "Trying Mips table (32-bit opcodes):\n");
   // Calling the auto-generated decoder function.
   Result =
-      decodeInstruction(DecoderTableMips6432, Instr, Insn, Address, this, STI);
-  if (Result != MCDisassembler::Fail) {
-    Size = 4;
-    return Result;
-  }
-  // If we fail to decode in Mips64 decoder space we can try in Mips32
-  Result =
       decodeInstruction(DecoderTableMips32, Instr, Insn, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
     Size = 4;
@@ -959,7 +929,7 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst,
     return MCDisassembler::Fail;
 
   unsigned Reg = getReg(Decoder, Mips::GPR64RegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -970,7 +940,7 @@ static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst,
   if (RegNo > 7)
     return MCDisassembler::Fail;
   unsigned Reg = getReg(Decoder, Mips::GPRMM16RegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -981,7 +951,18 @@ static DecodeStatus DecodeGPRMM16ZeroRegisterClass(MCInst &Inst,
   if (RegNo > 7)
     return MCDisassembler::Fail;
   unsigned Reg = getReg(Decoder, Mips::GPRMM16ZeroRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeGPRMM16MovePRegisterClass(MCInst &Inst,
+                                                    unsigned RegNo,
+                                                    uint64_t Address,
+                                                    const void *Decoder) {
+  if (RegNo > 7)
+    return MCDisassembler::Fail;
+  unsigned Reg = getReg(Decoder, Mips::GPRMM16MovePRegClassID, RegNo);
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -992,7 +973,7 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
   if (RegNo > 31)
     return MCDisassembler::Fail;
   unsigned Reg = getReg(Decoder, Mips::GPR32RegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1000,7 +981,7 @@ static DecodeStatus DecodePtrRegisterClass(MCInst &Inst,
                                            unsigned RegNo,
                                            uint64_t Address,
                                            const void *Decoder) {
-  if (static_cast<const MipsDisassembler *>(Decoder)->isGP64Bit())
+  if (static_cast<const MipsDisassembler *>(Decoder)->isGP64())
     return DecodeGPR64RegisterClass(Inst, RegNo, Address, Decoder);
 
   return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder);
@@ -1021,7 +1002,7 @@ static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
     return MCDisassembler::Fail;
 
   unsigned Reg = getReg(Decoder, Mips::FGR64RegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1033,7 +1014,7 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
     return MCDisassembler::Fail;
 
   unsigned Reg = getReg(Decoder, Mips::FGR32RegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1044,7 +1025,7 @@ static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst,
   if (RegNo > 31)
     return MCDisassembler::Fail;
   unsigned Reg = getReg(Decoder, Mips::CCRRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1055,7 +1036,7 @@ static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst,
   if (RegNo > 7)
     return MCDisassembler::Fail;
   unsigned Reg = getReg(Decoder, Mips::FCCRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1066,7 +1047,7 @@ static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo,
     return MCDisassembler::Fail;
 
   unsigned Reg = getReg(Decoder, Mips::FGRCCRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1083,12 +1064,12 @@ static DecodeStatus DecodeMem(MCInst &Inst,
 
   if(Inst.getOpcode() == Mips::SC ||
      Inst.getOpcode() == Mips::SCD){
-    Inst.addOperand(MCOperand::CreateReg(Reg));
+    Inst.addOperand(MCOperand::createReg(Reg));
   }
 
-  Inst.addOperand(MCOperand::CreateReg(Reg));
-  Inst.addOperand(MCOperand::CreateReg(Base));
-  Inst.addOperand(MCOperand::CreateImm(Offset));
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
 
   return MCDisassembler::Success;
 }
@@ -1103,9 +1084,9 @@ static DecodeStatus DecodeCacheOp(MCInst &Inst,
 
   Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
 
-  Inst.addOperand(MCOperand::CreateReg(Base));
-  Inst.addOperand(MCOperand::CreateImm(Offset));
-  Inst.addOperand(MCOperand::CreateImm(Hint));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
+  Inst.addOperand(MCOperand::createImm(Hint));
 
   return MCDisassembler::Success;
 }
@@ -1120,9 +1101,9 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
 
   Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
 
-  Inst.addOperand(MCOperand::CreateReg(Base));
-  Inst.addOperand(MCOperand::CreateImm(Offset));
-  Inst.addOperand(MCOperand::CreateImm(Hint));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
+  Inst.addOperand(MCOperand::createImm(Hint));
 
   return MCDisassembler::Success;
 }
@@ -1137,9 +1118,9 @@ static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
 
   Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
 
-  Inst.addOperand(MCOperand::CreateReg(Base));
-  Inst.addOperand(MCOperand::CreateImm(Offset));
-  Inst.addOperand(MCOperand::CreateImm(Hint));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
+  Inst.addOperand(MCOperand::createImm(Hint));
 
   return MCDisassembler::Success;
 }
@@ -1153,8 +1134,8 @@ static DecodeStatus DecodeSyncI(MCInst &Inst,
 
   Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
 
-  Inst.addOperand(MCOperand::CreateReg(Base));
-  Inst.addOperand(MCOperand::CreateImm(Offset));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
 
   return MCDisassembler::Success;
 }
@@ -1168,8 +1149,8 @@ static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
   Reg = getReg(Decoder, Mips::MSA128BRegClassID, Reg);
   Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
 
-  Inst.addOperand(MCOperand::CreateReg(Reg));
-  Inst.addOperand(MCOperand::CreateReg(Base));
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Base));
 
   // The immediate field of an LD/ST instruction is scaled which means it must
   // be multiplied (when decoding) by the size (in bytes) of the instructions'
@@ -1186,19 +1167,19 @@ static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
     break;
   case Mips::LD_B:
   case Mips::ST_B:
-    Inst.addOperand(MCOperand::CreateImm(Offset));
+    Inst.addOperand(MCOperand::createImm(Offset));
     break;
   case Mips::LD_H:
   case Mips::ST_H:
-    Inst.addOperand(MCOperand::CreateImm(Offset * 2));
+    Inst.addOperand(MCOperand::createImm(Offset * 2));
     break;
   case Mips::LD_W:
   case Mips::ST_W:
-    Inst.addOperand(MCOperand::CreateImm(Offset * 4));
+    Inst.addOperand(MCOperand::createImm(Offset * 4));
     break;
   case Mips::LD_D:
   case Mips::ST_D:
-    Inst.addOperand(MCOperand::CreateImm(Offset * 8));
+    Inst.addOperand(MCOperand::createImm(Offset * 8));
     break;
   }
 
@@ -1237,20 +1218,20 @@ static DecodeStatus DecodeMemMMImm4(MCInst &Inst,
   switch (Inst.getOpcode()) {
     case Mips::LBU16_MM:
       if (Offset == 0xf)
-        Inst.addOperand(MCOperand::CreateImm(-1));
+        Inst.addOperand(MCOperand::createImm(-1));
       else
-        Inst.addOperand(MCOperand::CreateImm(Offset));
+        Inst.addOperand(MCOperand::createImm(Offset));
       break;
     case Mips::SB16_MM:
-      Inst.addOperand(MCOperand::CreateImm(Offset));
+      Inst.addOperand(MCOperand::createImm(Offset));
       break;
     case Mips::LHU16_MM:
     case Mips::SH16_MM:
-      Inst.addOperand(MCOperand::CreateImm(Offset << 1));
+      Inst.addOperand(MCOperand::createImm(Offset << 1));
       break;
     case Mips::LW16_MM:
     case Mips::SW16_MM:
-      Inst.addOperand(MCOperand::CreateImm(Offset << 2));
+      Inst.addOperand(MCOperand::createImm(Offset << 2));
       break;
   }
 
@@ -1266,9 +1247,41 @@ static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst,
 
   Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
 
-  Inst.addOperand(MCOperand::CreateReg(Reg));
-  Inst.addOperand(MCOperand::CreateReg(Mips::SP));
-  Inst.addOperand(MCOperand::CreateImm(Offset << 2));
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Mips::SP));
+  Inst.addOperand(MCOperand::createImm(Offset << 2));
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemMMGPImm7Lsl2(MCInst &Inst,
+                                          unsigned Insn,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  unsigned Offset = Insn & 0x7F;
+  unsigned Reg = fieldFromInstruction(Insn, 7, 3);
+
+  Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Mips::GP));
+  Inst.addOperand(MCOperand::createImm(Offset << 2));
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
+                                               unsigned Insn,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  int Offset = SignExtend32<4>(Insn & 0xf);
+
+  if (DecodeRegListOperand16(Inst, Insn, Address, Decoder)
+      == MCDisassembler::Fail)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::createReg(Mips::SP));
+  Inst.addOperand(MCOperand::createImm(Offset << 2));
 
   return MCDisassembler::Success;
 }
@@ -1290,19 +1303,19 @@ static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
     if (DecodeRegListOperand(Inst, Insn, Address, Decoder)
         == MCDisassembler::Fail)
       return MCDisassembler::Fail;
-    Inst.addOperand(MCOperand::CreateReg(Base));
-    Inst.addOperand(MCOperand::CreateImm(Offset));
+    Inst.addOperand(MCOperand::createReg(Base));
+    Inst.addOperand(MCOperand::createImm(Offset));
     break;
   case Mips::SC_MM:
-    Inst.addOperand(MCOperand::CreateReg(Reg));
+    Inst.addOperand(MCOperand::createReg(Reg));
     // fallthrough
   default:
-    Inst.addOperand(MCOperand::CreateReg(Reg));
+    Inst.addOperand(MCOperand::createReg(Reg));
     if (Inst.getOpcode() == Mips::LWP_MM || Inst.getOpcode() == Mips::SWP_MM)
-      Inst.addOperand(MCOperand::CreateReg(Reg+1));
+      Inst.addOperand(MCOperand::createReg(Reg+1));
 
-    Inst.addOperand(MCOperand::CreateReg(Base));
-    Inst.addOperand(MCOperand::CreateImm(Offset));
+    Inst.addOperand(MCOperand::createReg(Base));
+    Inst.addOperand(MCOperand::createImm(Offset));
   }
 
   return MCDisassembler::Success;
@@ -1319,9 +1332,9 @@ static DecodeStatus DecodeMemMMImm16(MCInst &Inst,
   Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
   Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
 
-  Inst.addOperand(MCOperand::CreateReg(Reg));
-  Inst.addOperand(MCOperand::CreateReg(Base));
-  Inst.addOperand(MCOperand::CreateImm(Offset));
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
 
   return MCDisassembler::Success;
 }
@@ -1337,9 +1350,9 @@ static DecodeStatus DecodeFMem(MCInst &Inst,
   Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg);
   Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
 
-  Inst.addOperand(MCOperand::CreateReg(Reg));
-  Inst.addOperand(MCOperand::CreateReg(Base));
-  Inst.addOperand(MCOperand::CreateImm(Offset));
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
 
   return MCDisassembler::Success;
 }
@@ -1355,9 +1368,9 @@ static DecodeStatus DecodeFMem2(MCInst &Inst,
   Reg = getReg(Decoder, Mips::COP2RegClassID, Reg);
   Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
 
-  Inst.addOperand(MCOperand::CreateReg(Reg));
-  Inst.addOperand(MCOperand::CreateReg(Base));
-  Inst.addOperand(MCOperand::CreateImm(Offset));
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
 
   return MCDisassembler::Success;
 }
@@ -1373,9 +1386,9 @@ static DecodeStatus DecodeFMem3(MCInst &Inst,
   Reg = getReg(Decoder, Mips::COP3RegClassID, Reg);
   Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
 
-  Inst.addOperand(MCOperand::CreateReg(Reg));
-  Inst.addOperand(MCOperand::CreateReg(Base));
-  Inst.addOperand(MCOperand::CreateImm(Offset));
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
 
   return MCDisassembler::Success;
 }
@@ -1391,9 +1404,9 @@ static DecodeStatus DecodeFMemCop2R6(MCInst &Inst,
   Reg = getReg(Decoder, Mips::COP2RegClassID, Reg);
   Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
 
-  Inst.addOperand(MCOperand::CreateReg(Reg));
-  Inst.addOperand(MCOperand::CreateReg(Base));
-  Inst.addOperand(MCOperand::CreateImm(Offset));
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
 
   return MCDisassembler::Success;
 }
@@ -1409,12 +1422,12 @@ static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
   Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
 
   if(Inst.getOpcode() == Mips::SC_R6 || Inst.getOpcode() == Mips::SCD_R6){
-    Inst.addOperand(MCOperand::CreateReg(Rt));
+    Inst.addOperand(MCOperand::createReg(Rt));
   }
 
-  Inst.addOperand(MCOperand::CreateReg(Rt));
-  Inst.addOperand(MCOperand::CreateReg(Base));
-  Inst.addOperand(MCOperand::CreateImm(Offset));
+  Inst.addOperand(MCOperand::createReg(Rt));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
 
   return MCDisassembler::Success;
 }
@@ -1426,7 +1439,7 @@ static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst,
   // Currently only hardware register 29 is supported.
   if (RegNo != 29)
     return  MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateReg(Mips::HWR29));
+  Inst.addOperand(MCOperand::createReg(Mips::HWR29));
   return MCDisassembler::Success;
 }
 
@@ -1439,7 +1452,7 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
 
   ;
   unsigned Reg = getReg(Decoder, Mips::AFGR64RegClassID, RegNo /2);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1451,7 +1464,7 @@ static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst,
     return MCDisassembler::Fail;
 
   unsigned Reg = getReg(Decoder, Mips::ACC64DSPRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1463,7 +1476,7 @@ static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst,
     return MCDisassembler::Fail;
 
   unsigned Reg = getReg(Decoder, Mips::HI32DSPRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1475,7 +1488,7 @@ static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst,
     return MCDisassembler::Fail;
 
   unsigned Reg = getReg(Decoder, Mips::LO32DSPRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1487,7 +1500,7 @@ static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst,
     return MCDisassembler::Fail;
 
   unsigned Reg = getReg(Decoder, Mips::MSA128BRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1499,7 +1512,7 @@ static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst,
     return MCDisassembler::Fail;
 
   unsigned Reg = getReg(Decoder, Mips::MSA128HRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1511,7 +1524,7 @@ static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst,
     return MCDisassembler::Fail;
 
   unsigned Reg = getReg(Decoder, Mips::MSA128WRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1523,7 +1536,7 @@ static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst,
     return MCDisassembler::Fail;
 
   unsigned Reg = getReg(Decoder, Mips::MSA128DRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1535,7 +1548,7 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
     return MCDisassembler::Fail;
 
   unsigned Reg = getReg(Decoder, Mips::MSACtrlRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1547,7 +1560,7 @@ static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst,
     return MCDisassembler::Fail;
 
   unsigned Reg = getReg(Decoder, Mips::COP2RegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -1556,7 +1569,7 @@ static DecodeStatus DecodeBranchTarget(MCInst &Inst,
                                        uint64_t Address,
                                        const void *Decoder) {
   int32_t BranchOffset = (SignExtend32<16>(Offset) * 4) + 4;
-  Inst.addOperand(MCOperand::CreateImm(BranchOffset));
+  Inst.addOperand(MCOperand::createImm(BranchOffset));
   return MCDisassembler::Success;
 }
 
@@ -1566,7 +1579,7 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
                                      const void *Decoder) {
 
   unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2;
-  Inst.addOperand(MCOperand::CreateImm(JumpOffset));
+  Inst.addOperand(MCOperand::createImm(JumpOffset));
   return MCDisassembler::Success;
 }
 
@@ -1576,7 +1589,7 @@ static DecodeStatus DecodeBranchTarget21(MCInst &Inst,
                                          const void *Decoder) {
   int32_t BranchOffset = SignExtend32<21>(Offset) * 4;
 
-  Inst.addOperand(MCOperand::CreateImm(BranchOffset));
+  Inst.addOperand(MCOperand::createImm(BranchOffset));
   return MCDisassembler::Success;
 }
 
@@ -1586,7 +1599,7 @@ static DecodeStatus DecodeBranchTarget26(MCInst &Inst,
                                          const void *Decoder) {
   int32_t BranchOffset = SignExtend32<26>(Offset) * 4;
 
-  Inst.addOperand(MCOperand::CreateImm(BranchOffset));
+  Inst.addOperand(MCOperand::createImm(BranchOffset));
   return MCDisassembler::Success;
 }
 
@@ -1595,7 +1608,16 @@ static DecodeStatus DecodeBranchTarget7MM(MCInst &Inst,
                                           uint64_t Address,
                                           const void *Decoder) {
   int32_t BranchOffset = SignExtend32<7>(Offset) << 1;
-  Inst.addOperand(MCOperand::CreateImm(BranchOffset));
+  Inst.addOperand(MCOperand::createImm(BranchOffset));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBranchTarget10MM(MCInst &Inst,
+                                           unsigned Offset,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  int32_t BranchOffset = SignExtend32<10>(Offset) << 1;
+  Inst.addOperand(MCOperand::createImm(BranchOffset));
   return MCDisassembler::Success;
 }
 
@@ -1604,7 +1626,7 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
                                          uint64_t Address,
                                          const void *Decoder) {
   int32_t BranchOffset = SignExtend32<16>(Offset) * 2;
-  Inst.addOperand(MCOperand::CreateImm(BranchOffset));
+  Inst.addOperand(MCOperand::createImm(BranchOffset));
   return MCDisassembler::Success;
 }
 
@@ -1613,7 +1635,7 @@ static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
                                        uint64_t Address,
                                        const void *Decoder) {
   unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 1;
-  Inst.addOperand(MCOperand::CreateImm(JumpOffset));
+  Inst.addOperand(MCOperand::createImm(JumpOffset));
   return MCDisassembler::Success;
 }
 
@@ -1622,11 +1644,11 @@ static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst,
                                        uint64_t Address,
                                        const void *Decoder) {
   if (Value == 0)
-    Inst.addOperand(MCOperand::CreateImm(1));
+    Inst.addOperand(MCOperand::createImm(1));
   else if (Value == 0x7)
-    Inst.addOperand(MCOperand::CreateImm(-1));
+    Inst.addOperand(MCOperand::createImm(-1));
   else
-    Inst.addOperand(MCOperand::CreateImm(Value << 2));
+    Inst.addOperand(MCOperand::createImm(Value << 2));
   return MCDisassembler::Success;
 }
 
@@ -1634,7 +1656,7 @@ static DecodeStatus DecodeUImm6Lsl2(MCInst &Inst,
                                     unsigned Value,
                                     uint64_t Address,
                                     const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(Value << 2));
+  Inst.addOperand(MCOperand::createImm(Value << 2));
   return MCDisassembler::Success;
 }
 
@@ -1643,9 +1665,9 @@ static DecodeStatus DecodeLiSimm7(MCInst &Inst,
                                   uint64_t Address,
                                   const void *Decoder) {
   if (Value == 0x7F)
-    Inst.addOperand(MCOperand::CreateImm(-1));
+    Inst.addOperand(MCOperand::createImm(-1));
   else
-    Inst.addOperand(MCOperand::CreateImm(Value));
+    Inst.addOperand(MCOperand::createImm(Value));
   return MCDisassembler::Success;
 }
 
@@ -1653,7 +1675,7 @@ static DecodeStatus DecodeSimm4(MCInst &Inst,
                                 unsigned Value,
                                 uint64_t Address,
                                 const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(SignExtend32<4>(Value)));
+  Inst.addOperand(MCOperand::createImm(SignExtend32<4>(Value)));
   return MCDisassembler::Success;
 }
 
@@ -1661,7 +1683,7 @@ static DecodeStatus DecodeSimm16(MCInst &Inst,
                                  unsigned Insn,
                                  uint64_t Address,
                                  const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Insn)));
+  Inst.addOperand(MCOperand::createImm(SignExtend32<16>(Insn)));
   return MCDisassembler::Success;
 }
 
@@ -1670,7 +1692,7 @@ static DecodeStatus DecodeLSAImm(MCInst &Inst,
                                  uint64_t Address,
                                  const void *Decoder) {
   // We add one to the immediate field as it was encoded as 'imm - 1'.
-  Inst.addOperand(MCOperand::CreateImm(Insn + 1));
+  Inst.addOperand(MCOperand::createImm(Insn + 1));
   return MCDisassembler::Success;
 }
 
@@ -1681,7 +1703,7 @@ static DecodeStatus DecodeInsSize(MCInst &Inst,
   // First we need to grab the pos(lsb) from MCInst.
   int Pos = Inst.getOperand(2).getImm();
   int Size = (int) Insn - Pos + 1;
-  Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Size)));
+  Inst.addOperand(MCOperand::createImm(SignExtend32<16>(Size)));
   return MCDisassembler::Success;
 }
 
@@ -1690,19 +1712,19 @@ static DecodeStatus DecodeExtSize(MCInst &Inst,
                                   uint64_t Address,
                                   const void *Decoder) {
   int Size = (int) Insn  + 1;
-  Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Size)));
+  Inst.addOperand(MCOperand::createImm(SignExtend32<16>(Size)));
   return MCDisassembler::Success;
 }
 
 static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
                                      uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(SignExtend32<19>(Insn) * 4));
+  Inst.addOperand(MCOperand::createImm(SignExtend32<19>(Insn) * 4));
   return MCDisassembler::Success;
 }
 
 static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn,
                                      uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(SignExtend32<18>(Insn) * 8));
+  Inst.addOperand(MCOperand::createImm(SignExtend32<18>(Insn) * 8));
   return MCDisassembler::Success;
 }
 
@@ -1716,7 +1738,7 @@ static DecodeStatus DecodeSimm9SP(MCInst &Inst, unsigned Insn,
   case 511: DecodedValue = -257; break;
   default: DecodedValue = SignExtend32<9>(Insn); break;
   }
-  Inst.addOperand(MCOperand::CreateImm(DecodedValue * 4));
+  Inst.addOperand(MCOperand::createImm(DecodedValue * 4));
   return MCDisassembler::Success;
 }
 
@@ -1726,13 +1748,13 @@ static DecodeStatus DecodeANDI16Imm(MCInst &Inst, unsigned Insn,
   assert(Insn < 16);
   int32_t DecodedValues[] = {128, 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64,
                              255, 32768, 65535};
-  Inst.addOperand(MCOperand::CreateImm(DecodedValues[Insn]));
+  Inst.addOperand(MCOperand::createImm(DecodedValues[Insn]));
   return MCDisassembler::Success;
 }
 
 static DecodeStatus DecodeUImm5lsl2(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(Insn << 2));
+  Inst.addOperand(MCOperand::createImm(Insn << 2));
   return MCDisassembler::Success;
 }
 
@@ -1751,10 +1773,10 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst,
 
   RegNum = RegLst & 0xf;
   for (unsigned i = 0; i < RegNum; i++)
-    Inst.addOperand(MCOperand::CreateReg(Regs[i]));
+    Inst.addOperand(MCOperand::createReg(Regs[i]));
 
   if (RegLst & 0x10)
-    Inst.addOperand(MCOperand::CreateReg(Mips::RA));
+    Inst.addOperand(MCOperand::createReg(Mips::RA));
 
   return MCDisassembler::Success;
 }
@@ -1763,18 +1785,64 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
                                            uint64_t Address,
                                            const void *Decoder) {
   unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3};
-  unsigned RegNum;
-
   unsigned RegLst = fieldFromInstruction(Insn, 4, 2);
-  // Empty register lists are not allowed.
-  if (RegLst == 0)
-    return MCDisassembler::Fail;
+  unsigned RegNum = RegLst & 0x3;
+
+  for (unsigned i = 0; i <= RegNum; i++)
+    Inst.addOperand(MCOperand::createReg(Regs[i]));
+
+  Inst.addOperand(MCOperand::createReg(Mips::RA));
+
+  return MCDisassembler::Success;
+}
 
-  RegNum = RegLst & 0x3;
-  for (unsigned i = 0; i < RegNum - 1; i++)
-    Inst.addOperand(MCOperand::CreateReg(Regs[i]));
+static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn,
+                                       uint64_t Address, const void *Decoder) {
 
-  Inst.addOperand(MCOperand::CreateReg(Mips::RA));
+  unsigned RegPair = fieldFromInstruction(Insn, 7, 3);
 
+  switch (RegPair) {
+  default:
+    return MCDisassembler::Fail;
+  case 0:
+    Inst.addOperand(MCOperand::createReg(Mips::A1));
+    Inst.addOperand(MCOperand::createReg(Mips::A2));
+    break;
+  case 1:
+    Inst.addOperand(MCOperand::createReg(Mips::A1));
+    Inst.addOperand(MCOperand::createReg(Mips::A3));
+    break;
+  case 2:
+    Inst.addOperand(MCOperand::createReg(Mips::A2));
+    Inst.addOperand(MCOperand::createReg(Mips::A3));
+    break;
+  case 3:
+    Inst.addOperand(MCOperand::createReg(Mips::A0));
+    Inst.addOperand(MCOperand::createReg(Mips::S5));
+    break;
+  case 4:
+    Inst.addOperand(MCOperand::createReg(Mips::A0));
+    Inst.addOperand(MCOperand::createReg(Mips::S6));
+    break;
+  case 5:
+    Inst.addOperand(MCOperand::createReg(Mips::A0));
+    Inst.addOperand(MCOperand::createReg(Mips::A1));
+    break;
+  case 6:
+    Inst.addOperand(MCOperand::createReg(Mips::A0));
+    Inst.addOperand(MCOperand::createReg(Mips::A2));
+    break;
+  case 7:
+    Inst.addOperand(MCOperand::createReg(Mips::A0));
+    Inst.addOperand(MCOperand::createReg(Mips::A3));
+    break;
+  }
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSimm23Lsl2(MCInst &Inst, unsigned Insn,
+                                     uint64_t Address, const void *Decoder) {
+  Inst.addOperand(MCOperand::createImm(SignExtend32<23>(Insn) << 2));
   return MCDisassembler::Success;
 }
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 61743ff..e80a47b 100644
--- a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -77,7 +77,7 @@ void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                StringRef Annot) {
+                                StringRef Annot, const MCSubtargetInfo &STI) {
   switch (MI->getOpcode()) {
   default:
     break;
@@ -290,6 +290,7 @@ bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI,
 bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) {
   switch (MI.getOpcode()) {
   case Mips::BEQ:
+  case Mips::BEQ_MM:
     // beq $zero, $zero, $L2 => b $L2
     // beq $r0, $zero, $L2 => beqz $r0, $L2
     return (isReg<Mips::ZERO>(MI, 0) && isReg<Mips::ZERO>(MI, 1) &&
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 468dc07..713f35c 100644
--- a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -86,7 +86,8 @@ public:
   static const char *getRegisterName(unsigned RegNo);
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
   bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
   void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
index 5b0f950..70b9cca 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
@@ -14,15 +14,18 @@ using namespace llvm;
 uint8_t MipsABIFlagsSection::getFpABIValue() {
   switch (FpABI) {
   case FpABIKind::ANY:
-    return Val_GNU_MIPS_ABI_FP_ANY;
+    return Mips::Val_GNU_MIPS_ABI_FP_ANY;
+  case FpABIKind::SOFT:
+    return Mips::Val_GNU_MIPS_ABI_FP_SOFT;
   case FpABIKind::XX:
-    return Val_GNU_MIPS_ABI_FP_XX;
+    return Mips::Val_GNU_MIPS_ABI_FP_XX;
   case FpABIKind::S32:
-    return Val_GNU_MIPS_ABI_FP_DOUBLE;
+    return Mips::Val_GNU_MIPS_ABI_FP_DOUBLE;
   case FpABIKind::S64:
     if (Is32BitABI)
-      return OddSPReg ? Val_GNU_MIPS_ABI_FP_64 : Val_GNU_MIPS_ABI_FP_64A;
-    return Val_GNU_MIPS_ABI_FP_DOUBLE;
+      return OddSPReg ? Mips::Val_GNU_MIPS_ABI_FP_64
+                      : Mips::Val_GNU_MIPS_ABI_FP_64A;
+    return Mips::Val_GNU_MIPS_ABI_FP_DOUBLE;
   }
 
   llvm_unreachable("unexpected fp abi value");
@@ -43,7 +46,7 @@ StringRef MipsABIFlagsSection::getFpABIString(FpABIKind Value) {
 
 uint8_t MipsABIFlagsSection::getCPR1SizeValue() {
   if (FpABI == FpABIKind::XX)
-    return (uint8_t)AFL_REG_32;
+    return (uint8_t)Mips::AFL_REG_32;
   return (uint8_t)CPR1Size;
 }
 
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
index 8bcfb0f..b078cd3 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -11,74 +11,16 @@
 #define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H
 
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MipsABIFlags.h"
 
 namespace llvm {
 
 class MCStreamer;
 
 struct MipsABIFlagsSection {
-  // Values for the xxx_size bytes of an ABI flags structure.
-  enum AFL_REG {
-    AFL_REG_NONE = 0x00, // No registers.
-    AFL_REG_32 = 0x01,   // 32-bit registers.
-    AFL_REG_64 = 0x02,   // 64-bit registers.
-    AFL_REG_128 = 0x03   // 128-bit registers.
-  };
-
-  // Masks for the ases word of an ABI flags structure.
-  enum AFL_ASE {
-    AFL_ASE_DSP = 0x00000001,       // DSP ASE.
-    AFL_ASE_DSPR2 = 0x00000002,     // DSP R2 ASE.
-    AFL_ASE_EVA = 0x00000004,       // Enhanced VA Scheme.
-    AFL_ASE_MCU = 0x00000008,       // MCU (MicroController) ASE.
-    AFL_ASE_MDMX = 0x00000010,      // MDMX ASE.
-    AFL_ASE_MIPS3D = 0x00000020,    // MIPS-3D ASE.
-    AFL_ASE_MT = 0x00000040,        // MT ASE.
-    AFL_ASE_SMARTMIPS = 0x00000080, // SmartMIPS ASE.
-    AFL_ASE_VIRT = 0x00000100,      // VZ ASE.
-    AFL_ASE_MSA = 0x00000200,       // MSA ASE.
-    AFL_ASE_MIPS16 = 0x00000400,    // MIPS16 ASE.
-    AFL_ASE_MICROMIPS = 0x00000800, // MICROMIPS ASE.
-    AFL_ASE_XPA = 0x00001000        // XPA ASE.
-  };
-
-  // Values for the isa_ext word of an ABI flags structure.
-  enum AFL_EXT {
-    AFL_EXT_XLR = 1,          // RMI Xlr instruction.
-    AFL_EXT_OCTEON2 = 2,      // Cavium Networks Octeon2.
-    AFL_EXT_OCTEONP = 3,      // Cavium Networks OcteonP.
-    AFL_EXT_LOONGSON_3A = 4,  // Loongson 3A.
-    AFL_EXT_OCTEON = 5,       // Cavium Networks Octeon.
-    AFL_EXT_5900 = 6,         // MIPS R5900 instruction.
-    AFL_EXT_4650 = 7,         // MIPS R4650 instruction.
-    AFL_EXT_4010 = 8,         // LSI R4010 instruction.
-    AFL_EXT_4100 = 9,         // NEC VR4100 instruction.
-    AFL_EXT_3900 = 10,        // Toshiba R3900 instruction.
-    AFL_EXT_10000 = 11,       // MIPS R10000 instruction.
-    AFL_EXT_SB1 = 12,         // Broadcom SB-1 instruction.
-    AFL_EXT_4111 = 13,        // NEC VR4111/VR4181 instruction.
-    AFL_EXT_4120 = 14,        // NEC VR4120 instruction.
-    AFL_EXT_5400 = 15,        // NEC VR5400 instruction.
-    AFL_EXT_5500 = 16,        // NEC VR5500 instruction.
-    AFL_EXT_LOONGSON_2E = 17, // ST Microelectronics Loongson 2E.
-    AFL_EXT_LOONGSON_2F = 18  // ST Microelectronics Loongson 2F.
-  };
-
-  // Values for the fp_abi word of an ABI flags structure.
-  enum Val_GNU_MIPS_ABI {
-    Val_GNU_MIPS_ABI_FP_ANY = 0,
-    Val_GNU_MIPS_ABI_FP_DOUBLE = 1,
-    Val_GNU_MIPS_ABI_FP_XX = 5,
-    Val_GNU_MIPS_ABI_FP_64 = 6,
-    Val_GNU_MIPS_ABI_FP_64A = 7
-  };
-
-  enum AFL_FLAGS1 {
-    AFL_FLAGS1_ODDSPREG = 1
-  };
-
-  // Internal representation of the values used in .module fp=value
-  enum class FpABIKind { ANY, XX, S32, S64 };
+  // Internal representation of the fp_abi related values used in .module.
+  enum class FpABIKind { ANY, XX, S32, S64, SOFT };
 
   // Version of flags structure.
   uint16_t Version;
@@ -87,11 +29,11 @@ struct MipsABIFlagsSection {
   // The revision of ISA: 0 for MIPS V and below, 1-n otherwise.
   uint8_t ISARevision;
   // The size of general purpose registers.
-  AFL_REG GPRSize;
+  Mips::AFL_REG GPRSize;
   // The size of co-processor 1 registers.
-  AFL_REG CPR1Size;
+  Mips::AFL_REG CPR1Size;
   // The size of co-processor 2 registers.
-  AFL_REG CPR2Size;
+  Mips::AFL_REG CPR2Size;
   // Processor-specific extension.
   uint32_t ISAExtensionSet;
   // Mask of ASEs used.
@@ -107,9 +49,10 @@ protected:
 
 public:
   MipsABIFlagsSection()
-      : Version(0), ISALevel(0), ISARevision(0), GPRSize(AFL_REG_NONE),
-        CPR1Size(AFL_REG_NONE), CPR2Size(AFL_REG_NONE), ISAExtensionSet(0),
-        ASESet(0), OddSPReg(false), Is32BitABI(false), FpABI(FpABIKind::ANY) {}
+      : Version(0), ISALevel(0), ISARevision(0), GPRSize(Mips::AFL_REG_NONE),
+        CPR1Size(Mips::AFL_REG_NONE), CPR2Size(Mips::AFL_REG_NONE),
+        ISAExtensionSet(0), ASESet(0), OddSPReg(false), Is32BitABI(false),
+        FpABI(FpABIKind::ANY) {}
 
   uint16_t getVersionValue() { return (uint16_t)Version; }
   uint8_t getISALevelValue() { return (uint8_t)ISALevel; }
@@ -125,7 +68,7 @@ public:
     uint32_t Value = 0;
 
     if (OddSPReg)
-      Value |= (uint32_t)AFL_FLAGS1_ODDSPREG;
+      Value |= (uint32_t)Mips::AFL_FLAGS1_ODDSPREG;
 
     return Value;
   }
@@ -145,6 +88,10 @@ public:
       ISALevel = 64;
       if (P.hasMips64r6())
         ISARevision = 6;
+      else if (P.hasMips64r5())
+        ISARevision = 5;
+      else if (P.hasMips64r3())
+        ISARevision = 3;
       else if (P.hasMips64r2())
         ISARevision = 2;
       else
@@ -153,6 +100,10 @@ public:
       ISALevel = 32;
       if (P.hasMips32r6())
         ISARevision = 6;
+      else if (P.hasMips32r5())
+        ISARevision = 5;
+      else if (P.hasMips32r3())
+        ISARevision = 3;
       else if (P.hasMips32r2())
         ISARevision = 2;
       else
@@ -176,32 +127,32 @@ public:
 
   template <class PredicateLibrary>
   void setGPRSizeFromPredicates(const PredicateLibrary &P) {
-    GPRSize = P.isGP64bit() ? AFL_REG_64 : AFL_REG_32;
+    GPRSize = P.isGP64bit() ? Mips::AFL_REG_64 : Mips::AFL_REG_32;
   }
 
   template <class PredicateLibrary>
   void setCPR1SizeFromPredicates(const PredicateLibrary &P) {
-    if (P.abiUsesSoftFloat())
-      CPR1Size = AFL_REG_NONE;
+    if (P.useSoftFloat())
+      CPR1Size = Mips::AFL_REG_NONE;
     else if (P.hasMSA())
-      CPR1Size = AFL_REG_128;
+      CPR1Size = Mips::AFL_REG_128;
     else
-      CPR1Size = P.isFP64bit() ? AFL_REG_64 : AFL_REG_32;
+      CPR1Size = P.isFP64bit() ? Mips::AFL_REG_64 : Mips::AFL_REG_32;
   }
 
   template <class PredicateLibrary>
   void setASESetFromPredicates(const PredicateLibrary &P) {
     ASESet = 0;
     if (P.hasDSP())
-      ASESet |= AFL_ASE_DSP;
+      ASESet |= Mips::AFL_ASE_DSP;
     if (P.hasDSPR2())
-      ASESet |= AFL_ASE_DSPR2;
+      ASESet |= Mips::AFL_ASE_DSPR2;
     if (P.hasMSA())
-      ASESet |= AFL_ASE_MSA;
+      ASESet |= Mips::AFL_ASE_MSA;
     if (P.inMicroMipsMode())
-      ASESet |= AFL_ASE_MICROMIPS;
+      ASESet |= Mips::AFL_ASE_MICROMIPS;
     if (P.inMips16Mode())
-      ASESet |= AFL_ASE_MIPS16;
+      ASESet |= Mips::AFL_ASE_MIPS16;
   }
 
   template <class PredicateLibrary>
@@ -209,7 +160,9 @@ public:
     Is32BitABI = P.isABI_O32();
 
     FpABI = FpABIKind::ANY;
-    if (P.isABI_N32() || P.isABI_N64())
+    if (P.useSoftFloat())
+      FpABI = FpABIKind::SOFT;
+    else if (P.isABI_N32() || P.isABI_N64())
       FpABI = FpABIKind::S64;
     else if (P.isABI_O32()) {
       if (P.isABI_FPXX())
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index f885369..b1f7c2f 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -9,6 +9,9 @@
 
 #include "MipsABIInfo.h"
 #include "MipsRegisterInfo.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCTargetOptions.h"
 
 using namespace llvm;
 
@@ -43,3 +46,79 @@ unsigned MipsABIInfo::GetCalleeAllocdArgSizeInBytes(CallingConv::ID CC) const {
     return 0;
   llvm_unreachable("Unhandled ABI");
 }
+
+MipsABIInfo MipsABIInfo::computeTargetABI(Triple TT, StringRef CPU,
+                                          const MCTargetOptions &Options) {
+  if (Options.getABIName().startswith("o32"))
+    return MipsABIInfo::O32();
+  else if (Options.getABIName().startswith("n32"))
+    return MipsABIInfo::N32();
+  else if (Options.getABIName().startswith("n64"))
+    return MipsABIInfo::N64();
+  else if (Options.getABIName().startswith("eabi"))
+    return MipsABIInfo::EABI();
+  else if (!Options.getABIName().empty())
+    llvm_unreachable("Unknown ABI option for MIPS");
+
+  // FIXME: This shares code with the selectMipsCPU routine that's
+  // used and not shared in a couple of other places. This needs unifying
+  // at some level.
+  if (CPU.empty() || CPU == "generic") {
+    if (TT.getArch() == Triple::mips || TT.getArch() == Triple::mipsel)
+      CPU = "mips32";
+    else
+      CPU = "mips64";
+  }
+
+  return StringSwitch<MipsABIInfo>(CPU)
+      .Case("mips1", MipsABIInfo::O32())
+      .Case("mips2", MipsABIInfo::O32())
+      .Case("mips32", MipsABIInfo::O32())
+      .Case("mips32r2", MipsABIInfo::O32())
+      .Case("mips32r3", MipsABIInfo::O32())
+      .Case("mips32r5", MipsABIInfo::O32())
+      .Case("mips32r6", MipsABIInfo::O32())
+      .Case("mips16", MipsABIInfo::O32())
+      .Case("mips3", MipsABIInfo::N64())
+      .Case("mips4", MipsABIInfo::N64())
+      .Case("mips5", MipsABIInfo::N64())
+      .Case("mips64", MipsABIInfo::N64())
+      .Case("mips64r2", MipsABIInfo::N64())
+      .Case("mips64r3", MipsABIInfo::N64())
+      .Case("mips64r5", MipsABIInfo::N64())
+      .Case("mips64r6", MipsABIInfo::N64())
+      .Case("octeon", MipsABIInfo::N64())
+      .Default(MipsABIInfo::Unknown());
+}
+
+unsigned MipsABIInfo::GetStackPtr() const {
+  return ArePtrs64bit() ? Mips::SP_64 : Mips::SP;
+}
+
+unsigned MipsABIInfo::GetFramePtr() const {
+  return ArePtrs64bit() ? Mips::FP_64 : Mips::FP;
+}
+
+unsigned MipsABIInfo::GetNullPtr() const {
+  return ArePtrs64bit() ? Mips::ZERO_64 : Mips::ZERO;
+}
+
+unsigned MipsABIInfo::GetPtrAdduOp() const {
+  return ArePtrs64bit() ? Mips::DADDu : Mips::ADDu;
+}
+
+unsigned MipsABIInfo::GetPtrAddiuOp() const {
+  return ArePtrs64bit() ? Mips::DADDiu : Mips::ADDiu;
+}
+
+unsigned MipsABIInfo::GetEhDataReg(unsigned I) const {
+  static const unsigned EhDataReg[] = {
+    Mips::A0, Mips::A1, Mips::A2, Mips::A3
+  };
+  static const unsigned EhDataReg64[] = {
+    Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64
+  };
+
+  return IsN64() ? EhDataReg64[I] : EhDataReg[I];
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
index eccb50d8..9a6ba94 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
@@ -7,15 +7,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef MIPSABIINFO_H
-#define MIPSABIINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIINFO_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIINFO_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/MC/MCRegisterInfo.h"
 
 namespace llvm {
 
+class MCTargetOptions;
+class StringRef;
+class TargetRegisterClass;
+
 class MipsABIInfo {
 public:
   enum class ABI { Unknown, O32, N32, N64, EABI };
@@ -31,6 +36,8 @@ public:
   static MipsABIInfo N32() { return MipsABIInfo(ABI::N32); }
   static MipsABIInfo N64() { return MipsABIInfo(ABI::N64); }
   static MipsABIInfo EABI() { return MipsABIInfo(ABI::EABI); }
+  static MipsABIInfo computeTargetABI(Triple TT, StringRef CPU,
+                                      const MCTargetOptions &Options);
 
   bool IsKnown() const { return ThisABI != ABI::Unknown; }
   bool IsO32() const { return ThisABI == ABI::O32; }
@@ -55,6 +62,15 @@ public:
   bool operator<(const MipsABIInfo Other) const {
     return ThisABI < Other.GetEnumValue();
   }
+
+  unsigned GetStackPtr() const;
+  unsigned GetFramePtr() const;
+  unsigned GetNullPtr() const;
+  unsigned GetPtrAdduOp() const;
+  unsigned GetPtrAddiuOp() const;
+  inline bool ArePtrs64bit() const { return IsN64(); }
+
+  unsigned GetEhDataReg(unsigned I) const;
 };
 }
 
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 6670dc2..d823ffc 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -68,14 +68,14 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     Value = (int64_t)Value / 4;
     // We now check if Value can be encoded as a 16-bit signed immediate.
     if (!isIntN(16, Value) && Ctx)
-      Ctx->FatalError(Fixup.getLoc(), "out of range PC16 fixup");
+      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC16 fixup");
     break;
   case Mips::fixup_MIPS_PC19_S2:
     // Forcing a signed division because Value can be negative.
     Value = (int64_t)Value / 4;
     // We now check if Value can be encoded as a 19-bit signed immediate.
     if (!isIntN(19, Value) && Ctx)
-      Ctx->FatalError(Fixup.getLoc(), "out of range PC19 fixup");
+      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC19 fixup");
     break;
   case Mips::fixup_Mips_26:
     // So far we are only using this type for jumps.
@@ -109,7 +109,15 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     Value = (int64_t) Value / 2;
     // We now check if Value can be encoded as a 7-bit signed immediate.
     if (!isIntN(7, Value) && Ctx)
-      Ctx->FatalError(Fixup.getLoc(), "out of range PC7 fixup");
+      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC7 fixup");
+    break;
+  case Mips::fixup_MICROMIPS_PC10_S1:
+    Value -= 2;
+    // Forcing a signed division because Value can be negative.
+    Value = (int64_t) Value / 2;
+    // We now check if Value can be encoded as a 10-bit signed immediate.
+    if (!isIntN(10, Value) && Ctx)
+      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC10 fixup");
     break;
   case Mips::fixup_MICROMIPS_PC16_S1:
     Value -= 4;
@@ -117,14 +125,14 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     Value = (int64_t)Value / 2;
     // We now check if Value can be encoded as a 16-bit signed immediate.
     if (!isIntN(16, Value) && Ctx)
-      Ctx->FatalError(Fixup.getLoc(), "out of range PC16 fixup");
+      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC16 fixup");
     break;
   case Mips::fixup_MIPS_PC18_S3:
     // Forcing a signed division because Value can be negative.
     Value = (int64_t)Value / 8;
     // We now check if Value can be encoded as a 18-bit signed immediate.
     if (!isIntN(18, Value) && Ctx)
-      Ctx->FatalError(Fixup.getLoc(), "out of range PC18 fixup");
+      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC18 fixup");
     break;
   case Mips::fixup_MIPS_PC21_S2:
     Value -= 4;
@@ -132,7 +140,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     Value = (int64_t) Value / 4;
     // We now check if Value can be encoded as a 21-bit signed immediate.
     if (!isIntN(21, Value) && Ctx)
-      Ctx->FatalError(Fixup.getLoc(), "out of range PC21 fixup");
+      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC21 fixup");
     break;
   case Mips::fixup_MIPS_PC26_S2:
     Value -= 4;
@@ -140,14 +148,15 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     Value = (int64_t) Value / 4;
     // We now check if Value can be encoded as a 26-bit signed immediate.
     if (!isIntN(26, Value) && Ctx)
-      Ctx->FatalError(Fixup.getLoc(), "out of range PC26 fixup");
+      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC26 fixup");
     break;
   }
 
   return Value;
 }
 
-MCObjectWriter *MipsAsmBackend::createObjectWriter(raw_ostream &OS) const {
+MCObjectWriter *
+MipsAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
   return createMipsELFObjectWriter(OS,
     MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit);
 }
@@ -157,7 +166,8 @@ MCObjectWriter *MipsAsmBackend::createObjectWriter(raw_ostream &OS) const {
 //   microMIPS:  x | x | a | b
 
 static bool needsMMLEByteOrder(unsigned Kind) {
-  return Kind >= Mips::fixup_MICROMIPS_26_S1 &&
+  return Kind != Mips::fixup_MICROMIPS_PC10_S1 &&
+         Kind >= Mips::fixup_MICROMIPS_26_S1 &&
          Kind < Mips::LastTargetFixupKind;
 }
 
@@ -190,6 +200,7 @@ void MipsAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
   switch ((unsigned)Kind) {
   case FK_Data_2:
   case Mips::fixup_Mips_16:
+  case Mips::fixup_MICROMIPS_PC10_S1:
     FullSize = 2;
     break;
   case FK_Data_8:
@@ -280,6 +291,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
     { "fixup_MICROMIPS_LO16",    0,     16,   0 },
     { "fixup_MICROMIPS_GOT16",   0,     16,   0 },
     { "fixup_MICROMIPS_PC7_S1",  0,      7,   MCFixupKindInfo::FKF_IsPCRel },
+    { "fixup_MICROMIPS_PC10_S1", 0,     10,   MCFixupKindInfo::FKF_IsPCRel },
     { "fixup_MICROMIPS_PC16_S1", 0,     16,   MCFixupKindInfo::FKF_IsPCRel },
     { "fixup_MICROMIPS_CALL16",  0,     16,   0 },
     { "fixup_MICROMIPS_GOT_DISP",        0,     16,   0 },
@@ -344,6 +356,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
     { "fixup_MICROMIPS_LO16",   16,     16,   0 },
     { "fixup_MICROMIPS_GOT16",  16,     16,   0 },
     { "fixup_MICROMIPS_PC7_S1",  9,      7,   MCFixupKindInfo::FKF_IsPCRel },
+    { "fixup_MICROMIPS_PC10_S1", 6,     10,   MCFixupKindInfo::FKF_IsPCRel },
     { "fixup_MICROMIPS_PC16_S1",16,     16,   MCFixupKindInfo::FKF_IsPCRel },
     { "fixup_MICROMIPS_CALL16", 16,     16,   0 },
     { "fixup_MICROMIPS_GOT_DISP",        16,     16,   0 },
@@ -381,12 +394,7 @@ bool MipsAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
   // If the count is not 4-byte aligned, we must be writing data into the text
   // section (otherwise we have unaligned instructions, and thus have far
   // bigger problems), so just write zeros instead.
-  for (uint64_t i = 0, e = Count % 4; i != e; ++i)
-    OW->Write8(0);
-
-  uint64_t NumNops = Count / 4;
-  for (uint64_t i = 0; i != NumNops; ++i)
-    OW->Write32(0);
+  OW->WriteZeros(Count);
   return true;
 }
 
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index dd0e54c..b3d5a49 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -32,12 +32,11 @@ class MipsAsmBackend : public MCAsmBackend {
   bool Is64Bit;  // 32 or 64 bit words
 
 public:
-  MipsAsmBackend(const Target &T, Triple::OSType _OSType, bool _isLittle,
-                 bool _is64Bit)
-      : MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle),
-        Is64Bit(_is64Bit) {}
+  MipsAsmBackend(const Target &T, Triple::OSType OSType, bool IsLittle,
+                 bool Is64Bit)
+      : MCAsmBackend(), OSType(OSType), IsLittle(IsLittle), Is64Bit(Is64Bit) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override;
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
 
   void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value, bool IsPCRel) const override;
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 56aac4e..8d9e3e3 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -10,6 +10,7 @@
 #include "MCTargetDesc/MipsBaseInfo.h"
 #include "MCTargetDesc/MipsFixupKinds.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELF.h"
 #include "llvm/MC/MCELFObjectWriter.h"
@@ -22,25 +23,41 @@
 using namespace llvm;
 
 namespace {
+// A helper structure based on ELFRelocationEntry, used for sorting entries in
+// the relocation table.
+struct MipsRelocationEntry {
+  MipsRelocationEntry(const ELFRelocationEntry &R)
+      : R(R), SortOffset(R.Offset), HasMatchingHi(false) {}
+  const ELFRelocationEntry R;
+  // SortOffset equals R.Offset except for the *HI16 relocations, for which it
+  // will be set based on the R.Offset of the matching *LO16 relocation.
+  int64_t SortOffset;
+  // True when this is a *LO16 relocation chosen as a match for a *HI16
+  // relocation.
+  bool HasMatchingHi;
+};
+
   class MipsELFObjectWriter : public MCELFObjectTargetWriter {
   public:
     MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
                         bool _isN64, bool IsLittleEndian);
 
-    virtual ~MipsELFObjectWriter();
+    ~MipsELFObjectWriter() override;
 
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
     bool needsRelocateWithSymbol(const MCSymbolData &SD,
                                  unsigned Type) const override;
+    virtual void sortRelocs(const MCAssembler &Asm,
+                            std::vector<ELFRelocationEntry> &Relocs) override;
   };
 }
 
 MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
                                          bool _isN64, bool IsLittleEndian)
-  : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
-                            /*HasRelocationAddend*/ (_isN64) ? true : false,
-                            /*IsN64*/ _isN64) {}
+    : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
+                              /*HasRelocationAddend*/ _isN64,
+                              /*IsN64*/ _isN64) {}
 
 MipsELFObjectWriter::~MipsELFObjectWriter() {}
 
@@ -54,9 +71,11 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
   switch (Kind) {
   default:
     llvm_unreachable("invalid fixup kind!");
+  case Mips::fixup_Mips_32:
   case FK_Data_4:
     Type = ELF::R_MIPS_32;
     break;
+  case Mips::fixup_Mips_64:
   case FK_Data_8:
     Type = ELF::R_MIPS_64;
     break;
@@ -165,6 +184,9 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
   case Mips::fixup_MICROMIPS_PC7_S1:
     Type = ELF::R_MICROMIPS_PC7_S1;
     break;
+  case Mips::fixup_MICROMIPS_PC10_S1:
+    Type = ELF::R_MICROMIPS_PC10_S1;
+    break;
   case Mips::fixup_MICROMIPS_PC16_S1:
     Type = ELF::R_MICROMIPS_PC16_S1;
     break;
@@ -220,6 +242,169 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
   return Type;
 }
 
+// Sort entries by SortOffset in descending order.
+// When there are more *HI16 relocs paired with one *LO16 reloc, the 2nd rule
+// sorts them in ascending order of R.Offset.
+static int cmpRelMips(const MipsRelocationEntry *AP,
+                      const MipsRelocationEntry *BP) {
+  const MipsRelocationEntry &A = *AP;
+  const MipsRelocationEntry &B = *BP;
+  if (A.SortOffset != B.SortOffset)
+    return B.SortOffset - A.SortOffset;
+  if (A.R.Offset != B.R.Offset)
+    return A.R.Offset - B.R.Offset;
+  if (B.R.Type != A.R.Type)
+    return B.R.Type - A.R.Type;
+  //llvm_unreachable("ELFRelocs might be unstable!");
+  return 0;
+}
+
+// For the given Reloc.Type, return the matching relocation type, as in the
+// table below.
+static unsigned getMatchingLoType(const MCAssembler &Asm,
+                                  const ELFRelocationEntry &Reloc) {
+  unsigned Type = Reloc.Type;
+  if (Type == ELF::R_MIPS_HI16)
+    return ELF::R_MIPS_LO16;
+  if (Type == ELF::R_MICROMIPS_HI16)
+    return ELF::R_MICROMIPS_LO16;
+  if (Type == ELF::R_MIPS16_HI16)
+    return ELF::R_MIPS16_LO16;
+
+  const MCSymbolData &SD = Asm.getSymbolData(*Reloc.Symbol);
+
+  if (MCELF::GetBinding(SD) != ELF::STB_LOCAL)
+    return ELF::R_MIPS_NONE;
+
+  if (Type == ELF::R_MIPS_GOT16)
+    return ELF::R_MIPS_LO16;
+  if (Type == ELF::R_MICROMIPS_GOT16)
+    return ELF::R_MICROMIPS_LO16;
+  if (Type == ELF::R_MIPS16_GOT16)
+    return ELF::R_MIPS16_LO16;
+
+  return ELF::R_MIPS_NONE;
+}
+
+// Return true if First needs a matching *LO16, its matching *LO16 type equals
+// Second's type and both relocations are against the same symbol.
+static bool areMatchingHiAndLo(const MCAssembler &Asm,
+                               const ELFRelocationEntry &First,
+                               const ELFRelocationEntry &Second) {
+  return getMatchingLoType(Asm, First) != ELF::R_MIPS_NONE &&
+         getMatchingLoType(Asm, First) == Second.Type &&
+         First.Symbol && First.Symbol == Second.Symbol;
+}
+
+// Return true if MipsRelocs[Index] is a *LO16 preceded by a matching *HI16.
+static bool
+isPrecededByMatchingHi(const MCAssembler &Asm, uint32_t Index,
+                       std::vector<MipsRelocationEntry> &MipsRelocs) {
+  return Index < MipsRelocs.size() - 1 &&
+         areMatchingHiAndLo(Asm, MipsRelocs[Index + 1].R, MipsRelocs[Index].R);
+}
+
+// Return true if MipsRelocs[Index] is a *LO16 not preceded by a matching *HI16
+// and not chosen by a *HI16 as a match.
+static bool isFreeLo(const MCAssembler &Asm, uint32_t Index,
+                     std::vector<MipsRelocationEntry> &MipsRelocs) {
+  return Index < MipsRelocs.size() && !MipsRelocs[Index].HasMatchingHi &&
+         !isPrecededByMatchingHi(Asm, Index, MipsRelocs);
+}
+
+// Lo is chosen as a match for Hi, set their fields accordingly.
+// Mips instructions have fixed length of at least two bytes (two for
+// micromips/mips16, four for mips32/64), so we can set HI's SortOffset to
+// matching LO's Offset minus one to simplify the sorting function.
+static void setMatch(MipsRelocationEntry &Hi, MipsRelocationEntry &Lo) {
+  Lo.HasMatchingHi = true;
+  Hi.SortOffset = Lo.R.Offset - 1;
+}
+
+// We sort relocation table entries by offset, except for one additional rule
+// required by MIPS ABI: every *HI16 relocation must be immediately followed by
+// the corresponding *LO16 relocation. We also support a GNU extension that
+// allows more *HI16s paired with one *LO16.
+//
+// *HI16 relocations and their matching *LO16 are:
+//
+// +---------------------------------------------+-------------------+
+// |               *HI16                         |  matching *LO16   |
+// |---------------------------------------------+-------------------|
+// |  R_MIPS_HI16, local R_MIPS_GOT16            |    R_MIPS_LO16    |
+// |  R_MICROMIPS_HI16, local R_MICROMIPS_GOT16  | R_MICROMIPS_LO16  |
+// |  R_MIPS16_HI16, local R_MIPS16_GOT16        |  R_MIPS16_LO16    |
+// +---------------------------------------------+-------------------+
+//
+// (local R_*_GOT16 meaning R_*_GOT16 against the local symbol.)
+//
+// To handle *HI16 and *LO16 relocations, the linker needs a combined addend
+// ("AHL") calculated from both *HI16 ("AHI") and *LO16 ("ALO") relocations:
+// AHL = (AHI << 16) + (short)ALO;
+//
+// We are reusing gnu as sorting algorithm so we are emitting the relocation
+// table sorted the same way as gnu as would sort it, for easier comparison of
+// the generated .o files.
+//
+// The logic is:
+// search the table (starting from the highest offset and going back to zero)
+// for all *HI16 relocations that don't have a matching *LO16.
+// For every such HI, find a matching LO with highest offset that isn't already
+// matched with another HI. If there are no free LOs, match it with the first
+// found (starting from lowest offset).
+// When there are more HIs matched with one LO, sort them in descending order by
+// offset.
+//
+// In other words, when searching for a matching LO:
+// - don't look for a 'better' match for the HIs that are already followed by a
+//   matching LO;
+// - prefer LOs without a pair;
+// - prefer LOs with higher offset;
+void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+                                     std::vector<ELFRelocationEntry> &Relocs) {
+  if (Relocs.size() < 2)
+    return;
+
+  // The default function sorts entries by Offset in descending order.
+  MCELFObjectTargetWriter::sortRelocs(Asm, Relocs);
+
+  // Init MipsRelocs from Relocs.
+  std::vector<MipsRelocationEntry> MipsRelocs;
+  for (unsigned I = 0, E = Relocs.size(); I != E; ++I)
+    MipsRelocs.push_back(MipsRelocationEntry(Relocs[I]));
+
+  // Find a matching LO for all HIs that need it.
+  for (int32_t I = 0, E = MipsRelocs.size(); I != E; ++I) {
+    if (getMatchingLoType(Asm, MipsRelocs[I].R) == ELF::R_MIPS_NONE ||
+        (I > 0 && isPrecededByMatchingHi(Asm, I - 1, MipsRelocs)))
+      continue;
+
+    int32_t MatchedLoIndex = -1;
+
+    // Search the list in the ascending order of Offset.
+    for (int32_t J = MipsRelocs.size() - 1, N = -1; J != N; --J) {
+      // check for a match
+      if (areMatchingHiAndLo(Asm, MipsRelocs[I].R, MipsRelocs[J].R) &&
+          (MatchedLoIndex == -1 || // first match
+           // or we already have a match,
+           // but this one is with higher offset and it's free
+           (MatchedLoIndex > J && isFreeLo(Asm, J, MipsRelocs))))
+        MatchedLoIndex = J;
+    }
+
+    if (MatchedLoIndex != -1)
+      // We have a match.
+      setMatch(MipsRelocs[I], MipsRelocs[MatchedLoIndex]);
+  }
+
+  // SortOffsets are calculated, call the sorting function.
+  array_pod_sort(MipsRelocs.begin(), MipsRelocs.end(), cmpRelMips);
+
+  // Copy sorted MipsRelocs back to Relocs.
+  for (unsigned I = 0, E = MipsRelocs.size(); I != E; ++I)
+    Relocs[I] = MipsRelocs[I].R;
+}
+
 bool
 MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
                                              unsigned Type) const {
@@ -259,12 +444,11 @@ MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
   }
 }
 
-MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createMipsELFObjectWriter(raw_pwrite_stream &OS,
                                                 uint8_t OSABI,
                                                 bool IsLittleEndian,
                                                 bool Is64Bit) {
-  MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI,
-                                                (Is64Bit) ? true : false,
-                                                IsLittleEndian);
+  MCELFObjectTargetWriter *MOTW =
+      new MipsELFObjectWriter(Is64Bit, OSABI, Is64Bit, IsLittleEndian);
   return createELFObjectWriter(MOTW, OS, IsLittleEndian);
 }
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index 18c4a20..d2b5183 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -21,8 +21,6 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
 
   MCContext &Context = getContext();
   const MCRegisterInfo *MCRegInfo = Context.getRegisterInfo();
-  MipsTargetELFStreamer *ELFTargetStreamer =
-      static_cast<MipsTargetELFStreamer *>(getTargetStreamer());
 
   for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) {
     const MCOperand &Op = Inst.getOperand(OpIndex);
@@ -34,6 +32,14 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
     RegInfoRecord->SetPhysRegUsed(Reg, MCRegInfo);
   }
 
+  createPendingLabelRelocs();
+}
+
+void MipsELFStreamer::createPendingLabelRelocs() {
+  MipsTargetELFStreamer *ELFTargetStreamer =
+      static_cast<MipsTargetELFStreamer *>(getTargetStreamer());
+
+  // FIXME: Also mark labels when in MIPS16 mode.
   if (ELFTargetStreamer->isMicroMipsEnabled()) {
     for (auto Label : Labels) {
       MCSymbolData &Data = getOrCreateSymbolData(Label);
@@ -52,7 +58,7 @@ void MipsELFStreamer::EmitLabel(MCSymbol *Symbol) {
   Labels.push_back(Symbol);
 }
 
-void MipsELFStreamer::SwitchSection(const MCSection * Section,
+void MipsELFStreamer::SwitchSection(MCSection *Section,
                                     const MCExpr *Subsection) {
   MCELFStreamer::SwitchSection(Section, Subsection);
   Labels.clear();
@@ -69,11 +75,10 @@ void MipsELFStreamer::EmitMipsOptionRecords() {
     I->EmitMipsOptionRecord();
 }
 
-namespace llvm {
-MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                     raw_ostream &OS, MCCodeEmitter *Emitter,
-                                     const MCSubtargetInfo &STI,
-                                     bool RelaxAll) {
-  return new MipsELFStreamer(Context, MAB, OS, Emitter, STI);
-}
+MCELFStreamer *llvm::createMipsELFStreamer(MCContext &Context,
+                                           MCAsmBackend &MAB,
+                                           raw_pwrite_stream &OS,
+                                           MCCodeEmitter *Emitter,
+                                           bool RelaxAll) {
+  return new MipsELFStreamer(Context, MAB, OS, Emitter);
 }
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index 136146b..af9311f 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -33,11 +33,11 @@ class MipsELFStreamer : public MCELFStreamer {
 
 
 public:
-  MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
-                  MCCodeEmitter *Emitter, const MCSubtargetInfo &STI)
+  MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
+                  MCCodeEmitter *Emitter)
       : MCELFStreamer(Context, MAB, OS, Emitter) {
 
-    RegInfoRecord = new MipsRegInfoRecord(this, Context, STI);
+    RegInfoRecord = new MipsRegInfoRecord(this, Context);
     MipsOptionRecords.push_back(
         std::unique_ptr<MipsRegInfoRecord>(RegInfoRecord));
   }
@@ -55,7 +55,7 @@ public:
 
   /// Overriding this function allows us to dismiss all labels that are
   /// candidates for marking as microMIPS when .section directive is processed.
-  void SwitchSection(const MCSection *Section,
+  void SwitchSection(MCSection *Section,
                      const MCExpr *Subsection = nullptr) override;
 
   /// Overriding this function allows us to dismiss all labels that are
@@ -65,10 +65,13 @@ public:
 
   /// Emits all the option records stored up until the point it's called.
   void EmitMipsOptionRecords();
+
+  /// Mark labels as microMIPS, if necessary for the subtarget.
+  void createPendingLabelRelocs();
 };
 
 MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                     raw_ostream &OS, MCCodeEmitter *Emitter,
-                                     const MCSubtargetInfo &STI, bool RelaxAll);
+                                     raw_pwrite_stream &OS,
+                                     MCCodeEmitter *Emitter, bool RelaxAll);
 } // namespace llvm.
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 71c11d7..e601963 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -18,7 +18,7 @@ namespace Mips {
   // one can have multiple fixup types for a given relocation and thus need
   // to be uniquely named.
   //
-  // This table *must* be in the save order of
+  // This table *must* be in the same order of
   // MCFixupKindInfo Infos[Mips::NumTargetFixupKinds]
   // in MipsAsmBackend.cpp.
   //
@@ -161,6 +161,9 @@ namespace Mips {
     // resulting in - R_MICROMIPS_PC7_S1
     fixup_MICROMIPS_PC7_S1,
 
+    // resulting in - R_MICROMIPS_PC10_S1
+    fixup_MICROMIPS_PC10_S1,
+
     // resulting in - R_MICROMIPS_PC16_S1
     fixup_MICROMIPS_PC16_S1,
 
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index a54a2eb..a0d9e15 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -35,14 +36,12 @@
 namespace llvm {
 MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
                                          const MCRegisterInfo &MRI,
-                                         const MCSubtargetInfo &STI,
                                          MCContext &Ctx) {
   return new MipsMCCodeEmitter(MCII, Ctx, false);
 }
 
 MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
                                          const MCRegisterInfo &MRI,
-                                         const MCSubtargetInfo &STI,
                                          MCContext &Ctx) {
   return new MipsMCCodeEmitter(MCII, Ctx, true);
 }
@@ -114,7 +113,11 @@ static void LowerDextDins(MCInst& InstIn) {
 }
 
 bool MipsMCCodeEmitter::isMicroMips(const MCSubtargetInfo &STI) const {
-  return STI.getFeatureBits() & Mips::FeatureMicroMips;
+  return STI.getFeatureBits()[Mips::FeatureMicroMips];
+}
+
+bool MipsMCCodeEmitter::isMips32r6(const MCSubtargetInfo &STI) const {
+  return STI.getFeatureBits()[Mips::FeatureMips32r6];
 }
 
 void MipsMCCodeEmitter::EmitByte(unsigned char C, raw_ostream &OS) const {
@@ -139,10 +142,10 @@ void MipsMCCodeEmitter::EmitInstruction(uint64_t Val, unsigned Size,
   }
 }
 
-/// EncodeInstruction - Emit the instruction.
+/// encodeInstruction - Emit the instruction.
 /// Size the instruction with Desc.getSize().
 void MipsMCCodeEmitter::
-EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+encodeInstruction(const MCInst &MI, raw_ostream &OS,
                   SmallVectorImpl<MCFixup> &Fixups,
                   const MCSubtargetInfo &STI) const
 {
@@ -175,13 +178,22 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   unsigned Opcode = TmpInst.getOpcode();
   if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) &&
       (Opcode != Mips::SLL_MM) && !Binary)
-    llvm_unreachable("unimplemented opcode in EncodeInstruction()");
+    llvm_unreachable("unimplemented opcode in encodeInstruction()");
+
+  int NewOpcode = -1;
+  if (isMicroMips(STI)) {
+    if (isMips32r6(STI)) {
+      NewOpcode = Mips::MipsR62MicroMipsR6(Opcode, Mips::Arch_micromipsr6);
+      if (NewOpcode == -1)
+        NewOpcode = Mips::Std2MicroMipsR6(Opcode, Mips::Arch_micromipsr6);
+    }
+    else
+      NewOpcode = Mips::Std2MicroMips(Opcode, Mips::Arch_micromips);
 
-  if (STI.getFeatureBits() & Mips::FeatureMicroMips) {
-    int NewOpcode = Mips::Std2MicroMips (Opcode, Mips::Arch_micromips);
     if (NewOpcode != -1) {
       if (Fixups.size() > N)
         Fixups.pop_back();
+
       Opcode = NewOpcode;
       TmpInst.setOpcode (NewOpcode);
       Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
@@ -215,7 +227,7 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
          "getBranchTargetOpValue expects only expressions or immediates");
 
   const MCExpr *Expr = MO.getExpr();
-  Fixups.push_back(MCFixup::Create(0, Expr,
+  Fixups.push_back(MCFixup::create(0, Expr,
                                    MCFixupKind(Mips::fixup_Mips_PC16)));
   return 0;
 }
@@ -237,11 +249,33 @@ getBranchTarget7OpValueMM(const MCInst &MI, unsigned OpNo,
          "getBranchTargetOpValueMM expects only expressions or immediates");
 
   const MCExpr *Expr = MO.getExpr();
-  Fixups.push_back(MCFixup::Create(0, Expr,
+  Fixups.push_back(MCFixup::create(0, Expr,
                                    MCFixupKind(Mips::fixup_MICROMIPS_PC7_S1)));
   return 0;
 }
 
+/// getBranchTargetOpValueMMPC10 - Return binary encoding of the microMIPS
+/// 10-bit branch target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getBranchTargetOpValueMMPC10(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const {
+
+  const MCOperand &MO = MI.getOperand(OpNo);
+
+  // If the destination is an immediate, divide by 2.
+  if (MO.isImm()) return MO.getImm() >> 1;
+
+  assert(MO.isExpr() &&
+         "getBranchTargetOpValuePC10 expects only expressions or immediates");
+
+  const MCExpr *Expr = MO.getExpr();
+  Fixups.push_back(MCFixup::create(0, Expr,
+                   MCFixupKind(Mips::fixup_MICROMIPS_PC10_S1)));
+  return 0;
+}
+
 /// getBranchTargetOpValue - Return binary encoding of the microMIPS branch
 /// target operand. If the machine operand requires relocation,
 /// record the relocation and return zero.
@@ -259,7 +293,7 @@ getBranchTargetOpValueMM(const MCInst &MI, unsigned OpNo,
          "getBranchTargetOpValueMM expects only expressions or immediates");
 
   const MCExpr *Expr = MO.getExpr();
-  Fixups.push_back(MCFixup::Create(0, Expr,
+  Fixups.push_back(MCFixup::create(0, Expr,
                    MCFixupKind(Mips::
                                fixup_MICROMIPS_PC16_S1)));
   return 0;
@@ -282,7 +316,7 @@ getBranchTarget21OpValue(const MCInst &MI, unsigned OpNo,
          "getBranchTarget21OpValue expects only expressions or immediates");
 
   const MCExpr *Expr = MO.getExpr();
-  Fixups.push_back(MCFixup::Create(0, Expr,
+  Fixups.push_back(MCFixup::create(0, Expr,
                                    MCFixupKind(Mips::fixup_MIPS_PC21_S2)));
   return 0;
 }
@@ -304,7 +338,7 @@ getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo,
          "getBranchTarget26OpValue expects only expressions or immediates");
 
   const MCExpr *Expr = MO.getExpr();
-  Fixups.push_back(MCFixup::Create(0, Expr,
+  Fixups.push_back(MCFixup::create(0, Expr,
                                    MCFixupKind(Mips::fixup_MIPS_PC26_S2)));
   return 0;
 }
@@ -344,7 +378,7 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
          "getJumpTargetOpValue expects only expressions or an immediate");
 
   const MCExpr *Expr = MO.getExpr();
-  Fixups.push_back(MCFixup::Create(0, Expr,
+  Fixups.push_back(MCFixup::create(0, Expr,
                                    MCFixupKind(Mips::fixup_Mips_26)));
   return 0;
 }
@@ -362,7 +396,7 @@ getJumpTargetOpValueMM(const MCInst &MI, unsigned OpNo,
          "getJumpTargetOpValueMM expects only expressions or an immediate");
 
   const MCExpr *Expr = MO.getExpr();
-  Fixups.push_back(MCFixup::Create(0, Expr,
+  Fixups.push_back(MCFixup::create(0, Expr,
                                    MCFixupKind(Mips::fixup_MICROMIPS_26_S1)));
   return 0;
 }
@@ -429,7 +463,7 @@ getSImm9AddiuspValue(const MCInst &MI, unsigned OpNo,
 }
 
 unsigned MipsMCCodeEmitter::
-getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups,
+getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
                const MCSubtargetInfo &STI) const {
   int64_t Res;
 
@@ -468,7 +502,7 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups,
                                    : Mips::fixup_Mips_LO16;
       break;
     }
-    Fixups.push_back(MCFixup::Create(0, MipsExpr, MCFixupKind(FixupKind)));
+    Fixups.push_back(MCFixup::create(0, MipsExpr, MCFixupKind(FixupKind)));
     return 0;
   }
 
@@ -478,6 +512,9 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups,
     switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
     default: llvm_unreachable("Unknown fixup kind!");
       break;
+    case MCSymbolRefExpr::VK_None:
+      FixupKind = Mips::fixup_Mips_32; // FIXME: This is ok for O32/N32 but not N64.
+      break;
     case MCSymbolRefExpr::VK_Mips_GPOFF_HI :
       FixupKind = Mips::fixup_Mips_GPOFF_HI;
       break;
@@ -572,7 +609,7 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups,
       break;
     } // switch
 
-    Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind)));
+    Fixups.push_back(MCFixup::create(0, Expr, MCFixupKind(FixupKind)));
     return 0;
   }
   return 0;
@@ -714,6 +751,21 @@ getMemEncodingMMSPImm5Lsl2(const MCInst &MI, unsigned OpNo,
 }
 
 unsigned MipsMCCodeEmitter::
+getMemEncodingMMGPImm7Lsl2(const MCInst &MI, unsigned OpNo,
+                           SmallVectorImpl<MCFixup> &Fixups,
+                           const MCSubtargetInfo &STI) const {
+  // Register is encoded in bits 9-7, offset is encoded in bits 6-0.
+  assert(MI.getOperand(OpNo).isReg() &&
+         MI.getOperand(OpNo).getReg() == Mips::GP &&
+         "Unexpected base register!");
+
+  unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1),
+                                       Fixups, STI) >> 2;
+
+  return OffBits & 0x7F;
+}
+
+unsigned MipsMCCodeEmitter::
 getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
                       SmallVectorImpl<MCFixup> &Fixups,
                       const MCSubtargetInfo &STI) const {
@@ -808,7 +860,7 @@ MipsMCCodeEmitter::getSimm19Lsl2Encoding(const MCInst &MI, unsigned OpNo,
          "getSimm19Lsl2Encoding expects only expressions or an immediate");
 
   const MCExpr *Expr = MO.getExpr();
-  Fixups.push_back(MCFixup::Create(0, Expr,
+  Fixups.push_back(MCFixup::create(0, Expr,
                                    MCFixupKind(Mips::fixup_MIPS_PC19_S2)));
   return 0;
 }
@@ -829,7 +881,7 @@ MipsMCCodeEmitter::getSimm18Lsl3Encoding(const MCInst &MI, unsigned OpNo,
          "getSimm18Lsl2Encoding expects only expressions or an immediate");
 
   const MCExpr *Expr = MO.getExpr();
-  Fixups.push_back(MCFixup::Create(0, Expr,
+  Fixups.push_back(MCFixup::create(0, Expr,
                                    MCFixupKind(Mips::fixup_MIPS_PC18_S3)));
   return 0;
 }
@@ -905,4 +957,50 @@ MipsMCCodeEmitter::getRegisterPairOpValue(const MCInst &MI, unsigned OpNo,
   return getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
 }
 
+unsigned
+MipsMCCodeEmitter::getMovePRegPairOpValue(const MCInst &MI, unsigned OpNo,
+                                          SmallVectorImpl<MCFixup> &Fixups,
+                                          const MCSubtargetInfo &STI) const {
+  unsigned res = 0;
+
+  if (MI.getOperand(0).getReg() == Mips::A1 &&
+      MI.getOperand(1).getReg() == Mips::A2)
+    res = 0;
+  else if (MI.getOperand(0).getReg() == Mips::A1 &&
+           MI.getOperand(1).getReg() == Mips::A3)
+    res = 1;
+  else if (MI.getOperand(0).getReg() == Mips::A2 &&
+           MI.getOperand(1).getReg() == Mips::A3)
+    res = 2;
+  else if (MI.getOperand(0).getReg() == Mips::A0 &&
+           MI.getOperand(1).getReg() == Mips::S5)
+    res = 3;
+  else if (MI.getOperand(0).getReg() == Mips::A0 &&
+           MI.getOperand(1).getReg() == Mips::S6)
+    res = 4;
+  else if (MI.getOperand(0).getReg() == Mips::A0 &&
+           MI.getOperand(1).getReg() == Mips::A1)
+    res = 5;
+  else if (MI.getOperand(0).getReg() == Mips::A0 &&
+           MI.getOperand(1).getReg() == Mips::A2)
+    res = 6;
+  else if (MI.getOperand(0).getReg() == Mips::A0 &&
+           MI.getOperand(1).getReg() == Mips::A3)
+    res = 7;
+
+  return res;
+}
+
+unsigned
+MipsMCCodeEmitter::getSimm23Lsl2Encoding(const MCInst &MI, unsigned OpNo,
+                                         SmallVectorImpl<MCFixup> &Fixups,
+                                         const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  assert(MO.isImm() && "getSimm23Lsl2Encoding expects only an immediate");
+  // The immediate is encoded as 'immediate >> 2'.
+  unsigned Res = static_cast<unsigned>(MO.getImm());
+  assert((Res & 3) == 0);
+  return Res >> 2;
+}
+
 #include "MipsGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 0f0f49d..911cc2f 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -31,26 +31,27 @@ class MCSubtargetInfo;
 class raw_ostream;
 
 class MipsMCCodeEmitter : public MCCodeEmitter {
-  MipsMCCodeEmitter(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
-  void operator=(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  MipsMCCodeEmitter(const MipsMCCodeEmitter &) = delete;
+  void operator=(const MipsMCCodeEmitter &) = delete;
   const MCInstrInfo &MCII;
   MCContext &Ctx;
   bool IsLittleEndian;
 
   bool isMicroMips(const MCSubtargetInfo &STI) const;
+  bool isMips32r6(const MCSubtargetInfo &STI) const;
 
 public:
   MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle)
       : MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
 
-  ~MipsMCCodeEmitter() {}
+  ~MipsMCCodeEmitter() override {}
 
   void EmitByte(unsigned char C, raw_ostream &OS) const;
 
   void EmitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI,
                        raw_ostream &OS) const;
 
-  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
 
@@ -108,6 +109,13 @@ public:
                                      SmallVectorImpl<MCFixup> &Fixups,
                                      const MCSubtargetInfo &STI) const;
 
+  // getBranchTargetOpValueMMPC10 - Return binary encoding of the microMIPS
+  // 10-bit branch target operand. If the machine operand requires relocation,
+  // record the relocation and return zero.
+  unsigned getBranchTargetOpValueMMPC10(const MCInst &MI, unsigned OpNo,
+                                        SmallVectorImpl<MCFixup> &Fixups,
+                                        const MCSubtargetInfo &STI) const;
+
   // getBranchTargetOpValue - Return binary encoding of the microMIPS branch
   // target operand. If the machine operand requires relocation,
   // record the relocation and return zero.
@@ -161,6 +169,9 @@ public:
   unsigned getMemEncodingMMSPImm5Lsl2(const MCInst &MI, unsigned OpNo,
                                       SmallVectorImpl<MCFixup> &Fixups,
                                       const MCSubtargetInfo &STI) const;
+  unsigned getMemEncodingMMGPImm7Lsl2(const MCInst &MI, unsigned OpNo,
+                                      SmallVectorImpl<MCFixup> &Fixups,
+                                      const MCSubtargetInfo &STI) const;
   unsigned getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
                                  SmallVectorImpl<MCFixup> &Fixups,
                                  const MCSubtargetInfo &STI) const;
@@ -198,6 +209,14 @@ public:
                                   SmallVectorImpl<MCFixup> &Fixups,
                                   const MCSubtargetInfo &STI) const;
 
+  unsigned getMovePRegPairOpValue(const MCInst &MI, unsigned OpNo,
+                                  SmallVectorImpl<MCFixup> &Fixups,
+                                  const MCSubtargetInfo &STI) const;
+
+  unsigned getSimm23Lsl2Encoding(const MCInst &MI, unsigned OpNo,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
+
   unsigned getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
                           const MCSubtargetInfo &STI) const;
 
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index 2b8f0c8..ee11461 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -51,7 +51,7 @@ public:
                                  const MCAsmLayout *Layout,
                                  const MCFixup *Fixup) const override;
   void visitUsedExpr(MCStreamer &Streamer) const override;
-  const MCSection *FindAssociatedSection() const override {
+  MCSection *FindAssociatedSection() const override {
     return getSubExpr()->FindAssociatedSection();
   }
 
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index e756b47..687b800 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -23,10 +23,8 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg);
 
 // This function creates an MCELFStreamer for Mips NaCl.
 MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                         raw_ostream &OS,
-                                         MCCodeEmitter *Emitter,
-                                         const MCSubtargetInfo &STI,
-                                         bool RelaxAll);
+                                         raw_pwrite_stream &OS,
+                                         MCCodeEmitter *Emitter, bool RelaxAll);
 }
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index bab4254..2e3179a 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -43,7 +43,7 @@ using namespace llvm;
 
 /// Select the Mips CPU for the given triple and cpu name.
 /// FIXME: Merge with the copy in MipsSubtarget.cpp
-static inline StringRef selectMipsCPU(StringRef TT, StringRef CPU) {
+StringRef MIPS_MC::selectMipsCPU(StringRef TT, StringRef CPU) {
   if (CPU.empty() || CPU == "generic") {
     Triple TheTriple(TT);
     if (TheTriple.getArch() == Triple::mips ||
@@ -69,7 +69,7 @@ static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) {
 
 static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
                                                   StringRef FS) {
-  CPU = selectMipsCPU(TT, CPU);
+  CPU = MIPS_MC::selectMipsCPU(TT, CPU);
   MCSubtargetInfo *X = new MCSubtargetInfo();
   InitMipsMCSubtargetInfo(X, TT, CPU, FS);
   return X;
@@ -93,108 +93,85 @@ static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM,
     RM = Reloc::Static;
   else if (RM == Reloc::Default)
     RM = Reloc::PIC_;
-  X->InitMCCodeGenInfo(RM, CM, OL);
+  X->initMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
-static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
+static MCInstPrinter *createMipsMCInstPrinter(const Triple &T,
                                               unsigned SyntaxVariant,
                                               const MCAsmInfo &MAI,
                                               const MCInstrInfo &MII,
-                                              const MCRegisterInfo &MRI,
-                                              const MCSubtargetInfo &STI) {
+                                              const MCRegisterInfo &MRI) {
   return new MipsInstPrinter(MAI, MII, MRI);
 }
 
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
-                                    MCContext &Context, MCAsmBackend &MAB,
-                                    raw_ostream &OS, MCCodeEmitter *Emitter,
-                                    const MCSubtargetInfo &STI, bool RelaxAll) {
+static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
+                                    MCAsmBackend &MAB, raw_pwrite_stream &OS,
+                                    MCCodeEmitter *Emitter, bool RelaxAll) {
   MCStreamer *S;
-  if (!Triple(TT).isOSNaCl())
-    S = createMipsELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll);
+  if (!T.isOSNaCl())
+    S = createMipsELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
   else
-    S = createMipsNaClELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll);
-  new MipsTargetELFStreamer(*S, STI);
+    S = createMipsNaClELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
   return S;
 }
 
-static MCStreamer *
-createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
-                    bool isVerboseAsm, bool useDwarfDirectory,
-                    MCInstPrinter *InstPrint, MCCodeEmitter *CE,
-                    MCAsmBackend *TAB, bool ShowInst) {
-  MCStreamer *S = llvm::createAsmStreamer(
-      Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
-  new MipsTargetAsmStreamer(*S, OS);
-  return S;
+static MCTargetStreamer *createMipsAsmTargetStreamer(MCStreamer &S,
+                                                     formatted_raw_ostream &OS,
+                                                     MCInstPrinter *InstPrint,
+                                                     bool isVerboseAsm) {
+  return new MipsTargetAsmStreamer(S, OS);
 }
 
-static MCStreamer *createMipsNullStreamer(MCContext &Ctx) {
-  MCStreamer *S = llvm::createNullStreamer(Ctx);
-  new MipsTargetStreamer(*S);
-  return S;
+static MCTargetStreamer *createMipsNullTargetStreamer(MCStreamer &S) {
+  return new MipsTargetStreamer(S);
+}
+
+static MCTargetStreamer *
+createMipsObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+  return new MipsTargetELFStreamer(S, STI);
 }
 
 extern "C" void LLVMInitializeMipsTargetMC() {
-  // Register the MC asm info.
-  RegisterMCAsmInfoFn X(TheMipsTarget, createMipsMCAsmInfo);
-  RegisterMCAsmInfoFn Y(TheMipselTarget, createMipsMCAsmInfo);
-  RegisterMCAsmInfoFn A(TheMips64Target, createMipsMCAsmInfo);
-  RegisterMCAsmInfoFn B(TheMips64elTarget, createMipsMCAsmInfo);
-
-  // Register the MC codegen info.
-  TargetRegistry::RegisterMCCodeGenInfo(TheMipsTarget,
-                                        createMipsMCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheMipselTarget,
-                                        createMipsMCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheMips64Target,
-                                        createMipsMCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheMips64elTarget,
-                                        createMipsMCCodeGenInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheMipselTarget, createMipsMCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheMips64Target, createMipsMCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget,
-                                      createMipsMCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheMipsTarget, createMipsMCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheMipselTarget, createMipsMCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheMips64Target, createMipsMCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheMips64elTarget,
-                                    createMipsMCRegisterInfo);
+  for (Target *T : {&TheMipsTarget, &TheMipselTarget, &TheMips64Target,
+                    &TheMips64elTarget}) {
+    // Register the MC asm info.
+    RegisterMCAsmInfoFn X(*T, createMipsMCAsmInfo);
+
+    // Register the MC codegen info.
+    TargetRegistry::RegisterMCCodeGenInfo(*T, createMipsMCCodeGenInfo);
+
+    // Register the MC instruction info.
+    TargetRegistry::RegisterMCInstrInfo(*T, createMipsMCInstrInfo);
+
+    // Register the MC register info.
+    TargetRegistry::RegisterMCRegInfo(*T, createMipsMCRegisterInfo);
+
+    // Register the elf streamer.
+    TargetRegistry::RegisterELFStreamer(*T, createMCStreamer);
+
+    // Register the asm target streamer.
+    TargetRegistry::RegisterAsmTargetStreamer(*T, createMipsAsmTargetStreamer);
+
+    TargetRegistry::RegisterNullTargetStreamer(*T,
+                                               createMipsNullTargetStreamer);
+
+    // Register the MC subtarget info.
+    TargetRegistry::RegisterMCSubtargetInfo(*T, createMipsMCSubtargetInfo);
+
+    // Register the MCInstPrinter.
+    TargetRegistry::RegisterMCInstPrinter(*T, createMipsMCInstPrinter);
+
+    TargetRegistry::RegisterObjectTargetStreamer(
+        *T, createMipsObjectTargetStreamer);
+  }
 
   // Register the MC Code Emitter
-  TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget,
-                                        createMipsMCCodeEmitterEB);
-  TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget,
-                                        createMipsMCCodeEmitterEL);
-  TargetRegistry::RegisterMCCodeEmitter(TheMips64Target,
-                                        createMipsMCCodeEmitterEB);
-  TargetRegistry::RegisterMCCodeEmitter(TheMips64elTarget,
-                                        createMipsMCCodeEmitterEL);
-
-  // Register the object streamer.
-  TargetRegistry::RegisterMCObjectStreamer(TheMipsTarget, createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheMipselTarget, createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheMips64Target, createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget,
-                                           createMCStreamer);
-
-  // Register the asm streamer.
-  TargetRegistry::RegisterAsmStreamer(TheMipsTarget, createMCAsmStreamer);
-  TargetRegistry::RegisterAsmStreamer(TheMipselTarget, createMCAsmStreamer);
-  TargetRegistry::RegisterAsmStreamer(TheMips64Target, createMCAsmStreamer);
-  TargetRegistry::RegisterAsmStreamer(TheMips64elTarget, createMCAsmStreamer);
-
-  TargetRegistry::RegisterNullStreamer(TheMipsTarget, createMipsNullStreamer);
-  TargetRegistry::RegisterNullStreamer(TheMipselTarget, createMipsNullStreamer);
-  TargetRegistry::RegisterNullStreamer(TheMips64Target, createMipsNullStreamer);
-  TargetRegistry::RegisterNullStreamer(TheMips64elTarget,
-                                       createMipsNullStreamer);
+  for (Target *T : {&TheMipsTarget, &TheMips64Target})
+    TargetRegistry::RegisterMCCodeEmitter(*T, createMipsMCCodeEmitterEB);
+
+  for (Target *T : {&TheMipselTarget, &TheMips64elTarget})
+    TargetRegistry::RegisterMCCodeEmitter(*T, createMipsMCCodeEmitterEL);
 
   // Register the asm backend.
   TargetRegistry::RegisterMCAsmBackend(TheMipsTarget,
@@ -206,23 +183,4 @@ extern "C" void LLVMInitializeMipsTargetMC() {
   TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget,
                                        createMipsAsmBackendEL64);
 
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget,
-                                          createMipsMCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheMipselTarget,
-                                          createMipsMCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheMips64Target,
-                                          createMipsMCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheMips64elTarget,
-                                          createMipsMCSubtargetInfo);
-
-  // Register the MCInstPrinter.
-  TargetRegistry::RegisterMCInstPrinter(TheMipsTarget,
-                                        createMipsMCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheMipselTarget,
-                                        createMipsMCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheMips64Target,
-                                        createMipsMCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheMips64elTarget,
-                                        createMipsMCInstPrinter);
 }
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index f08a8f4..577a8b3 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -27,6 +27,7 @@ class MCSubtargetInfo;
 class StringRef;
 class Target;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheMipsTarget;
 extern Target TheMipselTarget;
@@ -35,11 +36,9 @@ extern Target TheMips64elTarget;
 
 MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
                                          const MCRegisterInfo &MRI,
-                                         const MCSubtargetInfo &STI,
                                          MCContext &Ctx);
 MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
                                          const MCRegisterInfo &MRI,
-                                         const MCSubtargetInfo &STI,
                                          MCContext &Ctx);
 
 MCAsmBackend *createMipsAsmBackendEB32(const Target &T,
@@ -55,10 +54,13 @@ MCAsmBackend *createMipsAsmBackendEL64(const Target &T,
                                        const MCRegisterInfo &MRI, StringRef TT,
                                        StringRef CPU);
 
-MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS,
-                                          uint8_t OSABI,
-                                          bool IsLittleEndian,
-                                          bool Is64Bit);
+MCObjectWriter *createMipsELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
+                                          bool IsLittleEndian, bool Is64Bit);
+
+namespace MIPS_MC {
+StringRef selectMipsCPU(StringRef TT, StringRef CPU);
+}
+
 } // End llvm namespace
 
 // Defines symbolic names for Mips registers.  This defines a mapping from
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 92b8455..aef9bd3 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -36,11 +36,11 @@ const unsigned LoadStoreStackMaskReg = Mips::T7;
 
 class MipsNaClELFStreamer : public MipsELFStreamer {
 public:
-  MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
-                      MCCodeEmitter *Emitter, const MCSubtargetInfo &STI)
-    : MipsELFStreamer(Context, TAB, OS, Emitter, STI), PendingCall(false) {}
+  MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                      raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
+      : MipsELFStreamer(Context, TAB, OS, Emitter), PendingCall(false) {}
 
-  ~MipsNaClELFStreamer() {}
+  ~MipsNaClELFStreamer() override {}
 
 private:
   // Whether we started the sandboxing sequence for calls.  Calls are bundled
@@ -94,9 +94,9 @@ private:
                 const MCSubtargetInfo &STI) {
     MCInst MaskInst;
     MaskInst.setOpcode(Mips::AND);
-    MaskInst.addOperand(MCOperand::CreateReg(AddrReg));
-    MaskInst.addOperand(MCOperand::CreateReg(AddrReg));
-    MaskInst.addOperand(MCOperand::CreateReg(MaskReg));
+    MaskInst.addOperand(MCOperand::createReg(AddrReg));
+    MaskInst.addOperand(MCOperand::createReg(AddrReg));
+    MaskInst.addOperand(MCOperand::createReg(MaskReg));
     MipsELFStreamer::EmitInstruction(MaskInst, STI);
   }
 
@@ -252,12 +252,10 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg) {
 }
 
 MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                         raw_ostream &OS,
+                                         raw_pwrite_stream &OS,
                                          MCCodeEmitter *Emitter,
-                                         const MCSubtargetInfo &STI,
                                          bool RelaxAll) {
-  MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter,
-                                                   STI);
+  MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter);
   if (RelaxAll)
     S->getAssembler().setRelaxAll(true);
 
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
index 0ef2208..4911632 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -9,14 +9,15 @@
 
 #include "MipsOptionRecord.h"
 #include "MipsELFStreamer.h"
+#include "MipsTargetStreamer.h"
 #include "llvm/MC/MCSectionELF.h"
 
 using namespace llvm;
 
 void MipsRegInfoRecord::EmitMipsOptionRecord() {
   MCAssembler &MCA = Streamer->getAssembler();
-  Triple T(STI.getTargetTriple());
-  uint64_t Features = STI.getFeatureBits();
+  MipsTargetStreamer *MTS =
+      static_cast<MipsTargetStreamer *>(Streamer->getTargetStreamer());
 
   Streamer->PushSection();
 
@@ -24,17 +25,17 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() {
   // we don't emit .Mips.options for other ELFs other than N64.
   // Since .reginfo has the same information as .Mips.options (ODK_REGINFO),
   // we can use the same abstraction (MipsRegInfoRecord class) to handle both.
-  if (Features & Mips::FeatureN64) {
+  if (MTS->getABI().IsN64()) {
     // The EntrySize value of 1 seems strange since the records are neither
     // 1-byte long nor fixed length but it matches the value GAS emits.
-    const MCSectionELF *Sec =
+    MCSectionELF *Sec =
         Context.getELFSection(".MIPS.options", ELF::SHT_MIPS_OPTIONS,
-                              ELF::SHF_ALLOC | ELF::SHF_MIPS_NOSTRIP,
-                              SectionKind::getMetadata(), 1, "");
-    MCA.getOrCreateSectionData(*Sec).setAlignment(8);
+                              ELF::SHF_ALLOC | ELF::SHF_MIPS_NOSTRIP, 1, "");
+    MCA.registerSection(*Sec);
+    Sec->setAlignment(8);
     Streamer->SwitchSection(Sec);
 
-    Streamer->EmitIntValue(1, 1);  // kind
+    Streamer->EmitIntValue(ELF::ODK_REGINFO, 1);  // kind
     Streamer->EmitIntValue(40, 1); // size
     Streamer->EmitIntValue(0, 2);  // section
     Streamer->EmitIntValue(0, 4);  // info
@@ -46,11 +47,10 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() {
     Streamer->EmitIntValue(ri_cprmask[3], 4);
     Streamer->EmitIntValue(ri_gp_value, 8);
   } else {
-    const MCSectionELF *Sec =
-        Context.getELFSection(".reginfo", ELF::SHT_MIPS_REGINFO, ELF::SHF_ALLOC,
-                              SectionKind::getMetadata(), 24, "");
-    MCA.getOrCreateSectionData(*Sec)
-        .setAlignment(Features & Mips::FeatureN32 ? 8 : 4);
+    MCSectionELF *Sec = Context.getELFSection(".reginfo", ELF::SHT_MIPS_REGINFO,
+                                              ELF::SHF_ALLOC, 24, "");
+    MCA.registerSection(*Sec);
+    Sec->setAlignment(MTS->getABI().IsN32() ? 8 : 4);
     Streamer->SwitchSection(Sec);
 
     Streamer->EmitIntValue(ri_gprmask, 4);
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 1e092f2..8e6f047 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -43,6 +43,9 @@ void MipsTargetStreamer::emitDirectiveSetNoMacro() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetMsa() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetNoMsa() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetAt() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetAtWithArg(unsigned RegNo) {
+  forbidModuleDirective();
+}
 void MipsTargetStreamer::emitDirectiveSetNoAt() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveEnd(StringRef Name) {}
 void MipsTargetStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {}
@@ -51,6 +54,7 @@ void MipsTargetStreamer::emitDirectiveNaN2008() {}
 void MipsTargetStreamer::emitDirectiveNaNLegacy() {}
 void MipsTargetStreamer::emitDirectiveOptionPic0() {}
 void MipsTargetStreamer::emitDirectiveOptionPic2() {}
+void MipsTargetStreamer::emitDirectiveInsn() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
                                    unsigned ReturnReg) {}
 void MipsTargetStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) {}
@@ -59,7 +63,7 @@ void MipsTargetStreamer::emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) {
 void MipsTargetStreamer::emitDirectiveSetArch(StringRef Arch) {
   forbidModuleDirective();
 }
-void MipsTargetStreamer::emitDirectiveSetMips0() {}
+void MipsTargetStreamer::emitDirectiveSetMips0() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetMips1() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetMips2() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetMips3() { forbidModuleDirective(); }
@@ -67,12 +71,16 @@ void MipsTargetStreamer::emitDirectiveSetMips4() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetMips5() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetMips32() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetMips32R2() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips32R3() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips32R5() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetMips32R6() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetMips64() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetMips64R2() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips64R3() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips64R5() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetMips64R6() { forbidModuleDirective(); }
-void MipsTargetStreamer::emitDirectiveSetPop() {}
-void MipsTargetStreamer::emitDirectiveSetPush() {}
+void MipsTargetStreamer::emitDirectiveSetPop() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetPush() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
@@ -84,6 +92,10 @@ void MipsTargetStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
   if (!Enabled && !IsO32ABI)
     report_fatal_error("+nooddspreg is only valid for O32");
 }
+void MipsTargetStreamer::emitDirectiveSetFp(
+    MipsABIFlagsSection::FpABIKind Value) {
+  forbidModuleDirective();
+}
 
 MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S,
                                              formatted_raw_ostream &OS)
@@ -144,6 +156,11 @@ void MipsTargetAsmStreamer::emitDirectiveSetAt() {
   MipsTargetStreamer::emitDirectiveSetAt();
 }
 
+void MipsTargetAsmStreamer::emitDirectiveSetAtWithArg(unsigned RegNo) {
+  OS << "\t.set\tat=$" << Twine(RegNo) << "\n";
+  MipsTargetStreamer::emitDirectiveSetAtWithArg(RegNo);
+}
+
 void MipsTargetAsmStreamer::emitDirectiveSetNoAt() {
   OS << "\t.set\tnoat\n";
   MipsTargetStreamer::emitDirectiveSetNoAt();
@@ -173,6 +190,11 @@ void MipsTargetAsmStreamer::emitDirectiveOptionPic2() {
   OS << "\t.option\tpic2\n";
 }
 
+void MipsTargetAsmStreamer::emitDirectiveInsn() {
+  MipsTargetStreamer::emitDirectiveInsn();
+  OS << "\t.insn\n";
+}
+
 void MipsTargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
                                       unsigned ReturnReg) {
   OS << "\t.frame\t$"
@@ -186,7 +208,10 @@ void MipsTargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) {
   MipsTargetStreamer::emitDirectiveSetArch(Arch);
 }
 
-void MipsTargetAsmStreamer::emitDirectiveSetMips0() { OS << "\t.set\tmips0\n"; }
+void MipsTargetAsmStreamer::emitDirectiveSetMips0() {
+  OS << "\t.set\tmips0\n";
+  MipsTargetStreamer::emitDirectiveSetMips0();
+}
 
 void MipsTargetAsmStreamer::emitDirectiveSetMips1() {
   OS << "\t.set\tmips1\n";
@@ -223,6 +248,16 @@ void MipsTargetAsmStreamer::emitDirectiveSetMips32R2() {
   MipsTargetStreamer::emitDirectiveSetMips32R2();
 }
 
+void MipsTargetAsmStreamer::emitDirectiveSetMips32R3() {
+  OS << "\t.set\tmips32r3\n";
+  MipsTargetStreamer::emitDirectiveSetMips32R3();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips32R5() {
+  OS << "\t.set\tmips32r5\n";
+  MipsTargetStreamer::emitDirectiveSetMips32R5();
+}
+
 void MipsTargetAsmStreamer::emitDirectiveSetMips32R6() {
   OS << "\t.set\tmips32r6\n";
   MipsTargetStreamer::emitDirectiveSetMips32R6();
@@ -238,6 +273,16 @@ void MipsTargetAsmStreamer::emitDirectiveSetMips64R2() {
   MipsTargetStreamer::emitDirectiveSetMips64R2();
 }
 
+void MipsTargetAsmStreamer::emitDirectiveSetMips64R3() {
+  OS << "\t.set\tmips64r3\n";
+  MipsTargetStreamer::emitDirectiveSetMips64R3();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips64R5() {
+  OS << "\t.set\tmips64r5\n";
+  MipsTargetStreamer::emitDirectiveSetMips64R5();
+}
+
 void MipsTargetAsmStreamer::emitDirectiveSetMips64R6() {
   OS << "\t.set\tmips64r6\n";
   MipsTargetStreamer::emitDirectiveSetMips64R6();
@@ -253,9 +298,15 @@ void MipsTargetAsmStreamer::emitDirectiveSetNoDsp() {
   MipsTargetStreamer::emitDirectiveSetNoDsp();
 }
 
-void MipsTargetAsmStreamer::emitDirectiveSetPop() { OS << "\t.set\tpop\n"; }
+void MipsTargetAsmStreamer::emitDirectiveSetPop() {
+  OS << "\t.set\tpop\n";
+  MipsTargetStreamer::emitDirectiveSetPop();
+}
 
-void MipsTargetAsmStreamer::emitDirectiveSetPush() { OS << "\t.set\tpush\n"; }
+void MipsTargetAsmStreamer::emitDirectiveSetPush() {
+ OS << "\t.set\tpush\n";
+ MipsTargetStreamer::emitDirectiveSetPush();
+}
 
 // Print a 32 bit hex number with all numbers.
 static void printHex32(unsigned Value, raw_ostream &OS) {
@@ -314,15 +365,13 @@ void MipsTargetAsmStreamer::emitDirectiveModuleFP(
 
 void MipsTargetAsmStreamer::emitDirectiveSetFp(
     MipsABIFlagsSection::FpABIKind Value) {
+  MipsTargetStreamer::emitDirectiveSetFp(Value);
+
   StringRef ModuleValue;
   OS << "\t.set\tfp=";
   OS << ABIFlagsSection.getFpABIString(Value) << "\n";
 }
 
-void MipsTargetAsmStreamer::emitMipsAbiFlags() {
-  // No action required for text output.
-}
-
 void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
                                                         bool IsO32ABI) {
   MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI);
@@ -335,61 +384,58 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
                                              const MCSubtargetInfo &STI)
     : MipsTargetStreamer(S), MicroMipsEnabled(false), STI(STI) {
   MCAssembler &MCA = getStreamer().getAssembler();
-  uint64_t Features = STI.getFeatureBits();
-  Triple T(STI.getTargetTriple());
-  Pic = (MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_)
-            ? true
-            : false;
-
-  // Update e_header flags
-  unsigned EFlags = 0;
+  Pic = MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_;
+
+  const FeatureBitset &Features = STI.getFeatureBits();
+
+  // Set the header flags that we can in the constructor.
+  // FIXME: This is a fairly terrible hack. We set the rest
+  // of these in the destructor. The problem here is two-fold:
+  //
+  // a: Some of the eflags can be set/reset by directives.
+  // b: There aren't any usage paths that initialize the ABI
+  //    pointer until after we initialize either an assembler
+  //    or the target machine.
+  // We can fix this by making the target streamer construct
+  // the ABI, but this is fraught with wide ranging dependency
+  // issues as well.
+  unsigned EFlags = MCA.getELFHeaderEFlags();
 
   // Architecture
-  if (Features & Mips::FeatureMips64r6)
+  if (Features[Mips::FeatureMips64r6])
     EFlags |= ELF::EF_MIPS_ARCH_64R6;
-  else if (Features & Mips::FeatureMips64r2)
+  else if (Features[Mips::FeatureMips64r2] ||
+           Features[Mips::FeatureMips64r3] ||
+           Features[Mips::FeatureMips64r5])
     EFlags |= ELF::EF_MIPS_ARCH_64R2;
-  else if (Features & Mips::FeatureMips64)
+  else if (Features[Mips::FeatureMips64])
     EFlags |= ELF::EF_MIPS_ARCH_64;
-  else if (Features & Mips::FeatureMips5)
+  else if (Features[Mips::FeatureMips5])
     EFlags |= ELF::EF_MIPS_ARCH_5;
-  else if (Features & Mips::FeatureMips4)
+  else if (Features[Mips::FeatureMips4])
     EFlags |= ELF::EF_MIPS_ARCH_4;
-  else if (Features & Mips::FeatureMips3)
+  else if (Features[Mips::FeatureMips3])
     EFlags |= ELF::EF_MIPS_ARCH_3;
-  else if (Features & Mips::FeatureMips32r6)
+  else if (Features[Mips::FeatureMips32r6])
     EFlags |= ELF::EF_MIPS_ARCH_32R6;
-  else if (Features & Mips::FeatureMips32r2)
+  else if (Features[Mips::FeatureMips32r2] ||
+           Features[Mips::FeatureMips32r3] ||
+           Features[Mips::FeatureMips32r5])
     EFlags |= ELF::EF_MIPS_ARCH_32R2;
-  else if (Features & Mips::FeatureMips32)
+  else if (Features[Mips::FeatureMips32])
     EFlags |= ELF::EF_MIPS_ARCH_32;
-  else if (Features & Mips::FeatureMips2)
+  else if (Features[Mips::FeatureMips2])
     EFlags |= ELF::EF_MIPS_ARCH_2;
   else
     EFlags |= ELF::EF_MIPS_ARCH_1;
 
-  // ABI
-  // N64 does not require any ABI bits.
-  if (Features & Mips::FeatureO32)
-    EFlags |= ELF::EF_MIPS_ABI_O32;
-  else if (Features & Mips::FeatureN32)
-    EFlags |= ELF::EF_MIPS_ABI2;
-
-  if (Features & Mips::FeatureGP64Bit) {
-    if (Features & Mips::FeatureO32)
-      EFlags |= ELF::EF_MIPS_32BITMODE; /* Compatibility Mode */
-  } else if (Features & Mips::FeatureMips64r2 || Features & Mips::FeatureMips64)
-    EFlags |= ELF::EF_MIPS_32BITMODE;
-
   // Other options.
-  if (Features & Mips::FeatureNaN2008)
+  if (Features[Mips::FeatureNaN2008])
     EFlags |= ELF::EF_MIPS_NAN2008;
 
   // -mabicalls and -mplt are not implemented but we should act as if they were
   // given.
   EFlags |= ELF::EF_MIPS_CPIC;
-  if (Features & Mips::FeatureN64)
-    EFlags |= ELF::EF_MIPS_PIC;
 
   MCA.setELFHeaderEFlags(EFlags);
 }
@@ -413,16 +459,42 @@ void MipsTargetELFStreamer::finish() {
   const MCObjectFileInfo &OFI = *MCA.getContext().getObjectFileInfo();
 
   // .bss, .text and .data are always at least 16-byte aligned.
-  MCSectionData &TextSectionData =
-      MCA.getOrCreateSectionData(*OFI.getTextSection());
-  MCSectionData &DataSectionData =
-      MCA.getOrCreateSectionData(*OFI.getDataSection());
-  MCSectionData &BSSSectionData =
-      MCA.getOrCreateSectionData(*OFI.getBSSSection());
+  MCSection &TextSection = *OFI.getTextSection();
+  MCA.registerSection(TextSection);
+  MCSection &DataSection = *OFI.getDataSection();
+  MCA.registerSection(DataSection);
+  MCSection &BSSSection = *OFI.getBSSSection();
+  MCA.registerSection(BSSSection);
+
+  TextSection.setAlignment(std::max(16u, TextSection.getAlignment()));
+  DataSection.setAlignment(std::max(16u, DataSection.getAlignment()));
+  BSSSection.setAlignment(std::max(16u, BSSSection.getAlignment()));
+
+  const FeatureBitset &Features = STI.getFeatureBits();
+
+  // Update e_header flags. See the FIXME and comment above in
+  // the constructor for a full rundown on this.
+  unsigned EFlags = MCA.getELFHeaderEFlags();
+
+  // ABI
+  // N64 does not require any ABI bits.
+  if (getABI().IsO32())
+    EFlags |= ELF::EF_MIPS_ABI_O32;
+  else if (getABI().IsN32())
+    EFlags |= ELF::EF_MIPS_ABI2;
+
+  if (Features[Mips::FeatureGP64Bit]) {
+    if (getABI().IsO32())
+      EFlags |= ELF::EF_MIPS_32BITMODE; /* Compatibility Mode */
+  } else if (Features[Mips::FeatureMips64r2] || Features[Mips::FeatureMips64])
+    EFlags |= ELF::EF_MIPS_32BITMODE;
 
-  TextSectionData.setAlignment(std::max(16u, TextSectionData.getAlignment()));
-  DataSectionData.setAlignment(std::max(16u, DataSectionData.getAlignment()));
-  BSSSectionData.setAlignment(std::max(16u, BSSSectionData.getAlignment()));
+  // If we've set the cpic eflag and we're n64, go ahead and set the pic
+  // one as well.
+  if (EFlags & ELF::EF_MIPS_CPIC && getABI().IsN64())
+    EFlags |= ELF::EF_MIPS_PIC;
+
+  MCA.setELFHeaderEFlags(EFlags);
 
   // Emit all the option records.
   // At the moment we are only emitting .Mips.options (ODK_REGINFO) and
@@ -441,9 +513,8 @@ void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol,
   const MCSymbol &RhsSym =
       static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
   MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym);
-  uint8_t Type = MCELF::GetType(Data);
-  if ((Type != ELF::STT_FUNC) ||
-      !(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2)))
+
+  if (!(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2)))
     return;
 
   MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol);
@@ -493,15 +564,14 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
   MCContext &Context = MCA.getContext();
   MCStreamer &OS = getStreamer();
 
-  const MCSectionELF *Sec = Context.getELFSection(".pdr", ELF::SHT_PROGBITS,
-                                                  ELF::SHF_ALLOC | ELF::SHT_REL,
-                                                  SectionKind::getMetadata());
+  MCSectionELF *Sec = Context.getELFSection(".pdr", ELF::SHT_PROGBITS,
+                                            ELF::SHF_ALLOC | ELF::SHT_REL);
 
   const MCSymbolRefExpr *ExprRef =
       MCSymbolRefExpr::Create(Name, MCSymbolRefExpr::VK_None, Context);
 
-  MCSectionData &SecData = MCA.getOrCreateSectionData(*Sec);
-  SecData.setAlignment(4);
+  MCA.registerSection(*Sec);
+  Sec->setAlignment(4);
 
   OS.PushSection();
 
@@ -572,6 +642,12 @@ void MipsTargetELFStreamer::emitDirectiveOptionPic2() {
   MCA.setELFHeaderEFlags(Flags);
 }
 
+void MipsTargetELFStreamer::emitDirectiveInsn() {
+  MipsTargetStreamer::emitDirectiveInsn();
+  MipsELFStreamer &MEF = static_cast<MipsELFStreamer &>(Streamer);
+  MEF.createPendingLabelRelocs();
+}
+
 void MipsTargetELFStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
                                       unsigned ReturnReg_) {
   MCContext &Context = getStreamer().getAssembler().getContext();
@@ -604,7 +680,7 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
   // addui $gp, $gp, %lo(_gp_disp)
   // addu  $gp, $gp, $reg
   // when support for position independent code is enabled.
-  if (!Pic || (isN32() || isN64()))
+  if (!Pic || (getABI().IsN32() || getABI().IsN64()))
     return;
 
   // There's a GNU extension controlled by -mno-shared that allows
@@ -616,33 +692,33 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
 
   StringRef SymName("_gp_disp");
   MCAssembler &MCA = getStreamer().getAssembler();
-  MCSymbol *GP_Disp = MCA.getContext().GetOrCreateSymbol(SymName);
+  MCSymbol *GP_Disp = MCA.getContext().getOrCreateSymbol(SymName);
   MCA.getOrCreateSymbolData(*GP_Disp);
 
   MCInst TmpInst;
   TmpInst.setOpcode(Mips::LUi);
-  TmpInst.addOperand(MCOperand::CreateReg(Mips::GP));
+  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
   const MCSymbolRefExpr *HiSym = MCSymbolRefExpr::Create(
       "_gp_disp", MCSymbolRefExpr::VK_Mips_ABS_HI, MCA.getContext());
-  TmpInst.addOperand(MCOperand::CreateExpr(HiSym));
+  TmpInst.addOperand(MCOperand::createExpr(HiSym));
   getStreamer().EmitInstruction(TmpInst, STI);
 
   TmpInst.clear();
 
   TmpInst.setOpcode(Mips::ADDiu);
-  TmpInst.addOperand(MCOperand::CreateReg(Mips::GP));
-  TmpInst.addOperand(MCOperand::CreateReg(Mips::GP));
+  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
+  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
   const MCSymbolRefExpr *LoSym = MCSymbolRefExpr::Create(
       "_gp_disp", MCSymbolRefExpr::VK_Mips_ABS_LO, MCA.getContext());
-  TmpInst.addOperand(MCOperand::CreateExpr(LoSym));
+  TmpInst.addOperand(MCOperand::createExpr(LoSym));
   getStreamer().EmitInstruction(TmpInst, STI);
 
   TmpInst.clear();
 
   TmpInst.setOpcode(Mips::ADDu);
-  TmpInst.addOperand(MCOperand::CreateReg(Mips::GP));
-  TmpInst.addOperand(MCOperand::CreateReg(Mips::GP));
-  TmpInst.addOperand(MCOperand::CreateReg(RegNo));
+  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
+  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
+  TmpInst.addOperand(MCOperand::createReg(RegNo));
   getStreamer().EmitInstruction(TmpInst, STI);
 
   forbidModuleDirective();
@@ -653,7 +729,7 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
                                                  const MCSymbol &Sym,
                                                  bool IsReg) {
   // Only N32 and N64 emit anything for .cpsetup iff PIC is set.
-  if (!Pic || !(isN32() || isN64()))
+  if (!Pic || !(getABI().IsN32() || getABI().IsN64()))
     return;
 
   MCAssembler &MCA = getStreamer().getAssembler();
@@ -663,43 +739,44 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
   if (IsReg) {
     // move $save, $gpreg
     Inst.setOpcode(Mips::DADDu);
-    Inst.addOperand(MCOperand::CreateReg(RegOrOffset));
-    Inst.addOperand(MCOperand::CreateReg(Mips::GP));
-    Inst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+    Inst.addOperand(MCOperand::createReg(RegOrOffset));
+    Inst.addOperand(MCOperand::createReg(Mips::GP));
+    Inst.addOperand(MCOperand::createReg(Mips::ZERO));
   } else {
     // sd $gpreg, offset($sp)
     Inst.setOpcode(Mips::SD);
-    Inst.addOperand(MCOperand::CreateReg(Mips::GP));
-    Inst.addOperand(MCOperand::CreateReg(Mips::SP));
-    Inst.addOperand(MCOperand::CreateImm(RegOrOffset));
+    Inst.addOperand(MCOperand::createReg(Mips::GP));
+    Inst.addOperand(MCOperand::createReg(Mips::SP));
+    Inst.addOperand(MCOperand::createImm(RegOrOffset));
   }
   getStreamer().EmitInstruction(Inst, STI);
   Inst.clear();
 
   const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create(
-      Sym.getName(), MCSymbolRefExpr::VK_Mips_GPOFF_HI, MCA.getContext());
+      &Sym, MCSymbolRefExpr::VK_Mips_GPOFF_HI, MCA.getContext());
   const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create(
-      Sym.getName(), MCSymbolRefExpr::VK_Mips_GPOFF_LO, MCA.getContext());
+      &Sym, MCSymbolRefExpr::VK_Mips_GPOFF_LO, MCA.getContext());
+
   // lui $gp, %hi(%neg(%gp_rel(funcSym)))
   Inst.setOpcode(Mips::LUi);
-  Inst.addOperand(MCOperand::CreateReg(Mips::GP));
-  Inst.addOperand(MCOperand::CreateExpr(HiExpr));
+  Inst.addOperand(MCOperand::createReg(Mips::GP));
+  Inst.addOperand(MCOperand::createExpr(HiExpr));
   getStreamer().EmitInstruction(Inst, STI);
   Inst.clear();
 
   // addiu  $gp, $gp, %lo(%neg(%gp_rel(funcSym)))
   Inst.setOpcode(Mips::ADDiu);
-  Inst.addOperand(MCOperand::CreateReg(Mips::GP));
-  Inst.addOperand(MCOperand::CreateReg(Mips::GP));
-  Inst.addOperand(MCOperand::CreateExpr(LoExpr));
+  Inst.addOperand(MCOperand::createReg(Mips::GP));
+  Inst.addOperand(MCOperand::createReg(Mips::GP));
+  Inst.addOperand(MCOperand::createExpr(LoExpr));
   getStreamer().EmitInstruction(Inst, STI);
   Inst.clear();
 
   // daddu  $gp, $gp, $funcreg
   Inst.setOpcode(Mips::DADDu);
-  Inst.addOperand(MCOperand::CreateReg(Mips::GP));
-  Inst.addOperand(MCOperand::CreateReg(Mips::GP));
-  Inst.addOperand(MCOperand::CreateReg(RegNo));
+  Inst.addOperand(MCOperand::createReg(Mips::GP));
+  Inst.addOperand(MCOperand::createReg(Mips::GP));
+  Inst.addOperand(MCOperand::createReg(RegNo));
   getStreamer().EmitInstruction(Inst, STI);
 
   forbidModuleDirective();
@@ -709,11 +786,10 @@ void MipsTargetELFStreamer::emitMipsAbiFlags() {
   MCAssembler &MCA = getStreamer().getAssembler();
   MCContext &Context = MCA.getContext();
   MCStreamer &OS = getStreamer();
-  const MCSectionELF *Sec =
-      Context.getELFSection(".MIPS.abiflags", ELF::SHT_MIPS_ABIFLAGS,
-                            ELF::SHF_ALLOC, SectionKind::getMetadata(), 24, "");
-  MCSectionData &ABIShndxSD = MCA.getOrCreateSectionData(*Sec);
-  ABIShndxSD.setAlignment(8);
+  MCSectionELF *Sec = Context.getELFSection(
+      ".MIPS.abiflags", ELF::SHT_MIPS_ABIFLAGS, ELF::SHF_ALLOC, 24, "");
+  MCA.registerSection(*Sec);
+  Sec->setAlignment(8);
   OS.SwitchSection(Sec);
 
   OS << ABIFlagsSection;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
new file mode 100644
index 0000000..7350b97
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
@@ -0,0 +1,223 @@
+//=- MicroMips32r6InstrFormats.td - Mips32r6 Instruction Formats -*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes microMIPS32r6 instruction formats.
+//
+//===----------------------------------------------------------------------===//
+
+class MMR6Arch<string opstr> {
+  string Arch = "micromipsr6";
+  string BaseOpcode = opstr;
+}
+
+class POOL32A_BITSWAP_FM_MMR6<bits<6> funct> : MipsR6Inst {
+  bits<5> rd;
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rd;
+  let Inst{15-12} = 0b0000;
+  let Inst{11-6} = funct;
+  let Inst{5-0} = 0b111100;
+}
+
+class CACHE_PREF_FM_MMR6<bits<6> opgroup, bits<4> funct> : MipsR6Inst {
+  bits<21> addr;
+  bits<5> hint;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = opgroup;
+  let Inst{25-21} = hint;
+  let Inst{20-16} = addr{20-16};
+  let Inst{15-12} = funct;
+  let Inst{11-0}  = addr{11-0};
+}
+
+class ARITH_FM_MMR6<string instr_asm, bits<10> funct> : MMR6Arch<instr_asm> {
+  bits<5> rd;
+  bits<5> rt;
+  bits<5> rs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = rd;
+  let Inst{10}    = 0;
+  let Inst{9-0}   = funct;
+}
+
+class ADDI_FM_MMR6<string instr_asm, bits<6> op> : MMR6Arch<instr_asm> {
+  bits<5>  rt;
+  bits<5>  rs;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-0}  = imm16;
+}
+
+class SIGN_EXTEND_FM_MMR6<string instr_asm, bits<10> funct>
+    : MMR6Arch<instr_asm> {
+  bits<5> rd;
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rd;
+  let Inst{20-16} = rt;
+  let Inst{15-6}  = funct;
+  let Inst{5-0}   = 0b111100;
+}
+
+class PCREL19_FM_MMR6<bits<2> funct> : MipsR6Inst {
+  bits<5> rt;
+  bits<19> imm;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b011110;
+  let Inst{25-21} = rt;
+  let Inst{20-19} = funct;
+  let Inst{18-0}  = imm;
+}
+
+class PCREL16_FM_MMR6<bits<5> funct> : MipsR6Inst {
+  bits<5> rt;
+  bits<16> imm;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b011110;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = funct;
+  let Inst{15-0}  = imm;
+}
+
+class POOL32A_FM_MMR6<bits<10> funct> : MipsR6Inst {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = rd;
+  let Inst{10}    = 0;
+  let Inst{9-0}   = funct;
+}
+
+class POOL32A_2R_FM_MMR6<bits<10> funct> : MipsR6Inst {
+  bits<5> rs;
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-6}  = funct;
+  let Inst{5-0}   = 0b111100;
+}
+
+class SPECIAL_2R_FM_MMR6<bits<6> funct> : MipsR6Inst {
+  bits<5> rs;
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = 0b00000;
+  let Inst{15-11} = rt;
+  let Inst{10-6}  = 0b00001;
+  let Inst{5-0}   = funct;
+}
+
+class POOL32A_ALIGN_FM_MMR6<bits<6> funct> : MipsR6Inst {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+  bits<2> bp;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-9}  = bp;
+  let Inst{8-6}   = 0b000;
+  let Inst{5-0}   = funct;
+}
+
+class AUI_FM_MMR6 : MipsR6Inst {
+  bits<5> rs;
+  bits<5> rt;
+  bits<16> imm;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b000100;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-0} = imm;
+}
+
+class POOL32A_LSA_FM<bits<6> funct> : MipsR6Inst {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+  bits<2> imm2;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = rd;
+  let Inst{10-9}  = imm2;
+  let Inst{8-6}   = 0b000;
+  let Inst{5-0}   = funct;
+}
+
+class CMP_BRANCH_1R_RT_OFF16_FM_MMR6<bits<6> funct> : MipsR6Inst {
+  bits<5> rt;
+  bits<16> offset;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = funct;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = 0b00000;
+  let Inst{15-0}  = offset;
+}
+
+class CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<bits<6> funct> : MipsR6Inst {
+  bits<5> rt;
+  bits<16> offset;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = funct;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = offset;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
new file mode 100644
index 0000000..2259d5d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -0,0 +1,329 @@
+//=- MicroMips32r6InstrInfo.td - MicroMips r6 Instruction Information -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes microMIPSr6 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Encodings
+//
+//===----------------------------------------------------------------------===//
+class ADD_MMR6_ENC : ARITH_FM_MMR6<"add", 0x110>;
+class ADDIU_MMR6_ENC : ADDI_FM_MMR6<"addiu", 0xc>;
+class ADDU_MMR6_ENC : ARITH_FM_MMR6<"addu", 0x150>;
+class ADDIUPC_MMR6_ENC : PCREL19_FM_MMR6<0b00>;
+class ALUIPC_MMR6_ENC : PCREL16_FM_MMR6<0b11111>;
+class AND_MMR6_ENC : ARITH_FM_MMR6<"and", 0x250>;
+class ANDI_MMR6_ENC : ADDI_FM_MMR6<"andi", 0x34>;
+class AUIPC_MMR6_ENC  : PCREL16_FM_MMR6<0b11110>;
+class ALIGN_MMR6_ENC : POOL32A_ALIGN_FM_MMR6<0b011111>;
+class AUI_MMR6_ENC : AUI_FM_MMR6;
+class BALC_MMR6_ENC  : BRANCH_OFF26_FM<0b101101>;
+class BC_MMR6_ENC : BRANCH_OFF26_FM<0b100101>;
+class BITSWAP_MMR6_ENC : POOL32A_BITSWAP_FM_MMR6<0b101100>;
+class BEQZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b011101>;
+class BNEZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b011111>;
+class BGTZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b111000>;
+class BLTZALC_MMR6_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<0b111000>;
+class BGEZALC_MMR6_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<0b110000>;
+class BLEZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b110000>;
+class CACHE_MMR6_ENC : CACHE_PREF_FM_MMR6<0b001000, 0b0110>;
+class CLO_MMR6_ENC : POOL32A_2R_FM_MMR6<0b0100101100>;
+class CLZ_MMR6_ENC : SPECIAL_2R_FM_MMR6<0b010000>;
+class DIV_MMR6_ENC : ARITH_FM_MMR6<"div", 0x118>;
+class DIVU_MMR6_ENC : ARITH_FM_MMR6<"divu", 0x198>;
+class JIALC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b100000>;
+class JIC_MMR6_ENC   : JMP_IDX_COMPACT_FM<0b101000>;
+class LSA_MMR6_ENC : POOL32A_LSA_FM<0b001111>;
+class LWPC_MMR6_ENC  : PCREL19_FM_MMR6<0b01>;
+class MOD_MMR6_ENC : ARITH_FM_MMR6<"mod", 0x158>;
+class MODU_MMR6_ENC : ARITH_FM_MMR6<"modu", 0x1d8>;
+class MUL_MMR6_ENC : ARITH_FM_MMR6<"mul", 0x18>;
+class MUH_MMR6_ENC : ARITH_FM_MMR6<"muh", 0x58>;
+class MULU_MMR6_ENC : ARITH_FM_MMR6<"mulu", 0x98>;
+class MUHU_MMR6_ENC : ARITH_FM_MMR6<"muhu", 0xd8>;
+class NOR_MMR6_ENC : ARITH_FM_MMR6<"nor", 0x2d0>;
+class OR_MMR6_ENC : ARITH_FM_MMR6<"or", 0x290>;
+class ORI_MMR6_ENC : ADDI_FM_MMR6<"ori", 0x14>;
+class PREF_MMR6_ENC : CACHE_PREF_FM_MMR6<0b011000, 0b0010>;
+class SEB_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seb", 0b0010101100>;
+class SEH_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seh", 0b0011101100>;
+class SELEQZ_MMR6_ENC : POOL32A_FM_MMR6<0b0101000000>;
+class SELNEZ_MMR6_ENC : POOL32A_FM_MMR6<0b0110000000>;
+class SUB_MMR6_ENC : ARITH_FM_MMR6<"sub", 0x190>;
+class SUBU_MMR6_ENC : ARITH_FM_MMR6<"subu", 0x1d0>;
+class XOR_MMR6_ENC : ARITH_FM_MMR6<"xor", 0x310>;
+class XORI_MMR6_ENC : ADDI_FM_MMR6<"xori", 0x1c>;
+
+class CMP_CBR_RT_Z_MMR6_DESC_BASE<string instr_asm, DAGOperand opnd,
+                                  RegisterOperand GPROpnd>
+    : BRANCH_DESC_BASE, MMR6Arch<instr_asm> {
+  dag InOperandList = (ins GPROpnd:$rt, opnd:$offset);
+  dag OutOperandList = (outs);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $offset");
+  list<Register> Defs = [AT];
+}
+
+class BEQZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"beqzalc", brtarget_mm,
+                                                      GPR32Opnd> {
+  list<Register> Defs = [RA];
+}
+
+class BGEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bgezalc", brtarget_mm,
+                                                      GPR32Opnd> {
+  list<Register> Defs = [RA];
+}
+
+class BGTZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bgtzalc", brtarget_mm,
+                                                      GPR32Opnd> {
+  list<Register> Defs = [RA];
+}
+
+class BLEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"blezalc", brtarget_mm,
+                                                      GPR32Opnd> {
+  list<Register> Defs = [RA];
+}
+
+class BLTZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bltzalc", brtarget_mm,
+                                                      GPR32Opnd> {
+  list<Register> Defs = [RA];
+}
+
+class BNEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bnezalc", brtarget_mm,
+                                                      GPR32Opnd> {
+  list<Register> Defs = [RA];
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Descriptions
+//
+//===----------------------------------------------------------------------===//
+
+class ADD_MMR6_DESC : ArithLogicR<"add", GPR32Opnd>;
+class ADDIU_MMR6_DESC : ArithLogicI<"addiu", simm16, GPR32Opnd>;
+class ADDU_MMR6_DESC : ArithLogicR<"addu", GPR32Opnd>;
+class MUL_MMR6_DESC : ArithLogicR<"mul", GPR32Opnd>;
+class MUH_MMR6_DESC : ArithLogicR<"muh", GPR32Opnd>;
+class MULU_MMR6_DESC : ArithLogicR<"mulu", GPR32Opnd>;
+class MUHU_MMR6_DESC : ArithLogicR<"muhu", GPR32Opnd>;
+
+class BC_MMR6_DESC_BASE<string instr_asm, DAGOperand opnd>
+    : BRANCH_DESC_BASE, MMR6Arch<instr_asm> {
+  dag InOperandList = (ins opnd:$offset);
+  dag OutOperandList = (outs);
+  string AsmString = !strconcat(instr_asm, "\t$offset");
+  bit isBarrier = 1;
+}
+
+class BALC_MMR6_DESC : BC_MMR6_DESC_BASE<"balc", brtarget26> {
+  bit isCall = 1;
+  list<Register> Defs = [RA];
+}
+class BC_MMR6_DESC : BC_MMR6_DESC_BASE<"bc", brtarget26>;
+class SUB_MMR6_DESC : ArithLogicR<"sub", GPR32Opnd>;
+class SUBU_MMR6_DESC : ArithLogicR<"subu", GPR32Opnd>;
+
+class BITSWAP_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+    : MMR6Arch<instr_asm> {
+  dag OutOperandList = (outs GPROpnd:$rd);
+  dag InOperandList = (ins GPROpnd:$rt);
+  string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
+  list<dag> Pattern = [];
+}
+
+class BITSWAP_MMR6_DESC : BITSWAP_MMR6_DESC_BASE<"bitswap", GPR32Opnd>;
+
+class CACHE_HINT_MMR6_DESC<string instr_asm, Operand MemOpnd,
+                           RegisterOperand GPROpnd> : MMR6Arch<instr_asm> {
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
+  string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
+  list<dag> Pattern = [];
+  string DecoderMethod = "DecodeCacheOpMM";
+}
+
+class CACHE_MMR6_DESC : CACHE_HINT_MMR6_DESC<"cache", mem_mm_12, GPR32Opnd>;
+class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, GPR32Opnd>;
+
+class CLO_CLZ_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+    : MMR6Arch<instr_asm> {
+  dag OutOperandList = (outs GPROpnd:$rt);
+  dag InOperandList = (ins GPROpnd:$rs);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
+}
+
+class CLO_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clo", GPR32Opnd>;
+class CLZ_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clz", GPR32Opnd>;
+
+class JMP_MMR6_IDX_COMPACT_DESC_BASE<string opstr, DAGOperand opnd,
+                                     RegisterOperand GPROpnd>
+    : MMR6Arch<opstr> {
+  dag InOperandList = (ins GPROpnd:$rt, opnd:$offset);
+  string AsmString = !strconcat(opstr, "\t$rt, $offset");
+  list<dag> Pattern = [];
+  bit isTerminator = 1;
+  bit hasDelaySlot = 0;
+}
+
+class JIALC_MMR6_DESC : JMP_MMR6_IDX_COMPACT_DESC_BASE<"jialc", calloffset16,
+                                                       GPR32Opnd> {
+  bit isCall = 1;
+  list<Register> Defs = [RA];
+}
+
+class JIC_MMR6_DESC : JMP_MMR6_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16,
+                                                     GPR32Opnd> {
+  bit isBarrier = 1;
+  list<Register> Defs = [AT];
+}
+
+class ALIGN_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                      Operand ImmOpnd>  : MMR6Arch<instr_asm> {
+  dag OutOperandList = (outs GPROpnd:$rd);
+  dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$bp);
+  string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $bp");
+  list<dag> Pattern = [];
+}
+
+class ALIGN_MMR6_DESC : ALIGN_MMR6_DESC_BASE<"align", GPR32Opnd, uimm2>;
+
+class AUI_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+    : MMR6Arch<instr_asm> {
+  dag OutOperandList = (outs GPROpnd:$rt);
+  dag InOperandList = (ins GPROpnd:$rs, simm16:$imm);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm");
+  list<dag> Pattern = [];
+}
+
+class AUI_MMR6_DESC : AUI_MMR6_DESC_BASE<"aui", GPR32Opnd>;
+
+class SEB_MMR6_DESC : SignExtInReg<"seb", i8, GPR32Opnd, II_SEB>;
+class SEH_MMR6_DESC : SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>;
+class ALUIPC_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+    : MMR6Arch<instr_asm> {
+  dag OutOperandList = (outs GPROpnd:$rt);
+  dag InOperandList = (ins simm16:$imm);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $imm");
+  list<dag> Pattern = [];
+}
+
+class ALUIPC_MMR6_DESC : ALUIPC_MMR6_DESC_BASE<"aluipc", GPR32Opnd>;
+class AUIPC_MMR6_DESC : ALUIPC_MMR6_DESC_BASE<"auipc", GPR32Opnd>;
+
+class LSA_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                         Operand ImmOpnd> : MMR6Arch<instr_asm> {
+  dag OutOperandList = (outs GPROpnd:$rd);
+  dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$imm2);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $rd, $imm2");
+  list<dag> Pattern = [];
+}
+
+class LSA_MMR6_DESC : LSA_MMR6_DESC_BASE<"lsa", GPR32Opnd, uimm2>;
+
+class PCREL_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                           Operand ImmOpnd> : MMR6Arch<instr_asm> {
+  dag OutOperandList = (outs GPROpnd:$rt);
+  dag InOperandList = (ins ImmOpnd:$imm);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $imm");
+  list<dag> Pattern = [];
+}
+
+class ADDIUPC_MMR6_DESC : PCREL_MMR6_DESC_BASE<"addiupc", GPR32Opnd, simm19_lsl2>;
+class LWPC_MMR6_DESC: PCREL_MMR6_DESC_BASE<"lwpc", GPR32Opnd, simm19_lsl2>;
+
+class SELEQNE_Z_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+    : MMR6Arch<instr_asm> {
+  dag OutOperandList = (outs GPROpnd:$rd);
+  dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt);
+  string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
+  list<dag> Pattern = [];
+}
+
+class SELEQZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"seleqz", GPR32Opnd>;
+class SELNEZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"selnez", GPR32Opnd>;
+class DIV_MMR6_DESC : ArithLogicR<"div", GPR32Opnd>;
+class DIVU_MMR6_DESC : ArithLogicR<"divu", GPR32Opnd>;
+class MOD_MMR6_DESC : ArithLogicR<"mod", GPR32Opnd>;
+class MODU_MMR6_DESC : ArithLogicR<"modu", GPR32Opnd>;
+class AND_MMR6_DESC : ArithLogicR<"and", GPR32Opnd>;
+class ANDI_MMR6_DESC : ArithLogicI<"andi", simm16, GPR32Opnd>;
+class NOR_MMR6_DESC : ArithLogicR<"nor", GPR32Opnd>;
+class OR_MMR6_DESC : ArithLogicR<"or", GPR32Opnd>;
+class ORI_MMR6_DESC : ArithLogicI<"ori", simm16, GPR32Opnd>;
+class XOR_MMR6_DESC : ArithLogicR<"xor", GPR32Opnd>;
+class XORI_MMR6_DESC : ArithLogicI<"xori", simm16, GPR32Opnd>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Definitions
+//
+//===----------------------------------------------------------------------===//
+
+let DecoderNamespace = "MicroMips32r6" in {
+def ADD_MMR6 : StdMMR6Rel, ADD_MMR6_DESC, ADD_MMR6_ENC, ISA_MICROMIPS32R6;
+def ADDIU_MMR6 : StdMMR6Rel, ADDIU_MMR6_DESC, ADDIU_MMR6_ENC, ISA_MICROMIPS32R6;
+def ADDU_MMR6 : StdMMR6Rel, ADDU_MMR6_DESC, ADDU_MMR6_ENC, ISA_MICROMIPS32R6;
+def ADDIUPC_MMR6 : R6MMR6Rel, ADDIUPC_MMR6_ENC, ADDIUPC_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def ALUIPC_MMR6 : R6MMR6Rel, ALUIPC_MMR6_ENC, ALUIPC_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def AND_MMR6 : StdMMR6Rel, AND_MMR6_DESC, AND_MMR6_ENC, ISA_MICROMIPS32R6;
+def ANDI_MMR6 : StdMMR6Rel, ANDI_MMR6_DESC, ANDI_MMR6_ENC, ISA_MICROMIPS32R6;
+def AUIPC_MMR6 : R6MMR6Rel, AUIPC_MMR6_ENC, AUIPC_MMR6_DESC, ISA_MICROMIPS32R6;
+def ALIGN_MMR6 : R6MMR6Rel, ALIGN_MMR6_ENC, ALIGN_MMR6_DESC, ISA_MICROMIPS32R6;
+def AUI_MMR6 : R6MMR6Rel, AUI_MMR6_ENC, AUI_MMR6_DESC, ISA_MICROMIPS32R6;
+def BALC_MMR6 : R6MMR6Rel, BALC_MMR6_ENC, BALC_MMR6_DESC, ISA_MICROMIPS32R6;
+def BC_MMR6 : R6MMR6Rel, BC_MMR6_ENC, BC_MMR6_DESC, ISA_MICROMIPS32R6;
+def BITSWAP_MMR6 : R6MMR6Rel, BITSWAP_MMR6_ENC, BITSWAP_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def BEQZALC_MMR6 : R6MMR6Rel, BEQZALC_MMR6_ENC, BEQZALC_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def BGEZALC_MMR6 : R6MMR6Rel, BGEZALC_MMR6_ENC, BGEZALC_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def BGTZALC_MMR6 : R6MMR6Rel, BGTZALC_MMR6_ENC, BGTZALC_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def BLEZALC_MMR6 : R6MMR6Rel, BLEZALC_MMR6_ENC, BLEZALC_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def BLTZALC_MMR6 : R6MMR6Rel, BLTZALC_MMR6_ENC, BLTZALC_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def BNEZALC_MMR6 : R6MMR6Rel, BNEZALC_MMR6_ENC, BNEZALC_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def CACHE_MMR6 : R6MMR6Rel, CACHE_MMR6_ENC, CACHE_MMR6_DESC, ISA_MICROMIPS32R6;
+def CLO_MMR6 : R6MMR6Rel, CLO_MMR6_ENC, CLO_MMR6_DESC, ISA_MICROMIPS32R6;
+def CLZ_MMR6 : R6MMR6Rel, CLZ_MMR6_ENC, CLZ_MMR6_DESC, ISA_MICROMIPS32R6;
+def DIV_MMR6 : R6MMR6Rel, DIV_MMR6_DESC, DIV_MMR6_ENC, ISA_MICROMIPS32R6;
+def DIVU_MMR6 : R6MMR6Rel, DIVU_MMR6_DESC, DIVU_MMR6_ENC, ISA_MICROMIPS32R6;
+def JIALC_MMR6 : R6MMR6Rel, JIALC_MMR6_ENC, JIALC_MMR6_DESC, ISA_MICROMIPS32R6;
+def JIC_MMR6 : R6MMR6Rel, JIC_MMR6_ENC, JIC_MMR6_DESC, ISA_MICROMIPS32R6;
+def LSA_MMR6 : R6MMR6Rel, LSA_MMR6_ENC, LSA_MMR6_DESC, ISA_MICROMIPS32R6;
+def LWPC_MMR6 : R6MMR6Rel, LWPC_MMR6_ENC, LWPC_MMR6_DESC, ISA_MICROMIPS32R6;
+def MOD_MMR6 : R6MMR6Rel, MOD_MMR6_DESC, MOD_MMR6_ENC, ISA_MICROMIPS32R6;
+def MODU_MMR6 : R6MMR6Rel, MODU_MMR6_DESC, MODU_MMR6_ENC, ISA_MICROMIPS32R6;
+def MUL_MMR6 : R6MMR6Rel, MUL_MMR6_DESC, MUL_MMR6_ENC, ISA_MICROMIPS32R6;
+def MUH_MMR6 : R6MMR6Rel, MUH_MMR6_DESC, MUH_MMR6_ENC, ISA_MICROMIPS32R6;
+def MULU_MMR6 : R6MMR6Rel, MULU_MMR6_DESC, MULU_MMR6_ENC, ISA_MICROMIPS32R6;
+def MUHU_MMR6 : R6MMR6Rel, MUHU_MMR6_DESC, MUHU_MMR6_ENC, ISA_MICROMIPS32R6;
+def NOR_MMR6 : StdMMR6Rel, NOR_MMR6_DESC, NOR_MMR6_ENC, ISA_MICROMIPS32R6;
+def OR_MMR6 : StdMMR6Rel, OR_MMR6_DESC, OR_MMR6_ENC, ISA_MICROMIPS32R6;
+def ORI_MMR6 : StdMMR6Rel, ORI_MMR6_DESC, ORI_MMR6_ENC, ISA_MICROMIPS32R6;
+def PREF_MMR6 : R6MMR6Rel, PREF_MMR6_ENC, PREF_MMR6_DESC, ISA_MICROMIPS32R6;
+def SEB_MMR6 : StdMMR6Rel, SEB_MMR6_DESC, SEB_MMR6_ENC, ISA_MICROMIPS32R6;
+def SEH_MMR6 : StdMMR6Rel, SEH_MMR6_DESC, SEH_MMR6_ENC, ISA_MICROMIPS32R6;
+def SELEQZ_MMR6 : R6MMR6Rel, SELEQZ_MMR6_ENC, SELEQZ_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def SELNEZ_MMR6 : R6MMR6Rel, SELNEZ_MMR6_ENC, SELNEZ_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def SUB_MMR6 : StdMMR6Rel, SUB_MMR6_DESC, SUB_MMR6_ENC, ISA_MICROMIPS32R6;
+def SUBU_MMR6 : StdMMR6Rel, SUBU_MMR6_DESC, SUBU_MMR6_ENC, ISA_MICROMIPS32R6;
+def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6;
+def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
index fae7059..004b0d5 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -95,7 +95,7 @@ def FNEG_MM : MMRel, ABSS_FT<"neg.d", AFGR64Opnd, AFGR64Opnd, II_NEG, fneg>,
               ABS_FM_MM<1, 0x2d>;
 
 def FMOV_D32_MM : MMRel, ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>,
-                  ABS_FM_MM<1, 0x1>, AdditionalRequires<[NotFP64bit]>;
+                  ABS_FM_MM<1, 0x1>, FGR_32;
 
 def MOVZ_I_S_MM : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd,
                                      II_MOVZ_S>, CMov_I_F_FM_MM<0x78, 0>;
@@ -124,9 +124,9 @@ def MFC1_MM : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd,
 def MTC1_MM : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd,
                              II_MTC1, bitconvert>, MFC1_FM_MM<0xa0>;
 def MFHC1_MM : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>,
-               MFC1_FM_MM<0xc0>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>;
+               MFC1_FM_MM<0xc0>, ISA_MIPS32R2, FGR_32;
 def MTHC1_MM : MMRel, MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>,
-               MFC1_FM_MM<0xe0>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>;
+               MFC1_FM_MM<0xe0>, ISA_MIPS32R2, FGR_32;
 
 def MADD_S_MM : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
                 MADDS_FM_MM<0x1>;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
index 51b5c0c..560afa4 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -131,6 +131,17 @@ class LOAD_STORE_SP_FM_MM16<bits<6> op> {
   let Inst{4-0}   = offset;
 }
 
+class LOAD_GP_FM_MM16<bits<6> op> {
+  bits<3> rt;
+  bits<7> offset;
+
+  bits<16> Inst;
+
+  let Inst{15-10} = op;
+  let Inst{9-7} = rt;
+  let Inst{6-0} = offset;
+}
+
 class ADDIUS5_FM_MM16 {
   bits<5> rd;
   bits<4> imm;
@@ -238,6 +249,29 @@ class BEQNEZ_FM_MM16<bits<6> op> {
   let Inst{6-0}   = offset;
 }
 
+class B16_FM {
+  bits<10> offset;
+
+  bits<16> Inst;
+
+  let Inst{15-10} = 0x33;
+  let Inst{9-0}   = offset;
+}
+
+class MOVEP_FM_MM16 {
+  bits<3> dst_regs;
+  bits<3> rt;
+  bits<3> rs;
+
+  bits<16> Inst;
+
+  let Inst{15-10} = 0x21;
+  let Inst{9-7}   = dst_regs;
+  let Inst{6-4}   = rt;
+  let Inst{3-1}   = rs;
+  let Inst{0}     = 0;
+}
+
 //===----------------------------------------------------------------------===//
 // MicroMIPS 32-bit Instruction Formats
 //===----------------------------------------------------------------------===//
@@ -898,3 +932,14 @@ class BARRIER_FM_MM<bits<5> op> : MMArch {
   let Inst{10-6}  = 0x0;
   let Inst{5-0}   = 0x0;
 }
+
+class ADDIUPC_FM_MM {
+  bits<3> rs;
+  bits<23> imm;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x1e;
+  let Inst{25-23} = rs;
+  let Inst{22-0} = imm;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index 241f452..2aab739 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -1,8 +1,10 @@
 def addrimm12 : ComplexPattern<iPTR, 2, "selectIntAddrMM", [frameindex]>;
+def addrimm4lsl2 : ComplexPattern<iPTR, 2, "selectIntAddrLSL2MM", [frameindex]>;
 
 def simm4 : Operand<i32> {
   let DecoderMethod = "DecodeSimm4";
 }
+def simm7 : Operand<i32>;
 def li_simm7 : Operand<i32> {
   let DecoderMethod = "DecodeLiSimm7";
 }
@@ -64,7 +66,7 @@ def MicroMipsMemGPRMM16AsmOperand : AsmOperandClass {
 
 class mem_mm_4_generic : Operand<i32> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops ptr_rc, simm4);
+  let MIOperandInfo = (ops GPRMM16, simm4);
   let OperandType = "OPERAND_MEMORY";
   let ParserMatchClass = MicroMipsMemGPRMM16AsmOperand;
 }
@@ -96,6 +98,13 @@ def mem_mm_sp_imm5_lsl2 : Operand<i32> {
   let EncoderMethod = "getMemEncodingMMSPImm5Lsl2";
 }
 
+def mem_mm_gp_imm7_lsl2 : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops GPRMM16:$base, simm7:$offset);
+  let OperandType = "OPERAND_MEMORY";
+  let EncoderMethod = "getMemEncodingMMGPImm7Lsl2";
+}
+
 def mem_mm_12 : Operand<i32> {
   let PrintMethod = "printMemOperand";
   let MIOperandInfo = (ops GPR32, simm12);
@@ -135,10 +144,23 @@ def brtarget7_mm : Operand<OtherVT> {
   let ParserMatchClass = MipsJumpTargetAsmOperand;
 }
 
+def brtarget10_mm : Operand<OtherVT> {
+  let EncoderMethod = "getBranchTargetOpValueMMPC10";
+  let OperandType   = "OPERAND_PCREL";
+  let DecoderMethod = "DecodeBranchTarget10MM";
+  let ParserMatchClass = MipsJumpTargetAsmOperand;
+}
+
 def brtarget_mm : Operand<OtherVT> {
   let EncoderMethod = "getBranchTargetOpValueMM";
   let OperandType   = "OPERAND_PCREL";
   let DecoderMethod = "DecodeBranchTargetMM";
+  let ParserMatchClass = MipsJumpTargetAsmOperand;
+}
+
+def simm23_lsl2 : Operand<i32> {
+  let EncoderMethod = "getSimm23Lsl2Encoding";
+  let DecoderMethod = "DecodeSimm23Lsl2";
 }
 
 class CompactBranchMM<string opstr, DAGOperand opnd, PatFrag cond_op,
@@ -170,6 +192,28 @@ class StoreLeftRightMM<string opstr, SDNode OpNode, RegisterOperand RO,
   let DecoderMethod = "DecodeMemMMImm12";
 }
 
+/// A register pair used by movep instruction.
+def MovePRegPairAsmOperand : AsmOperandClass {
+  let Name = "MovePRegPair";
+  let ParserMethod = "parseMovePRegPair";
+  let PredicateMethod = "isMovePRegPair";
+}
+
+def movep_regpair : Operand<i32> {
+  let EncoderMethod = "getMovePRegPairOpValue";
+  let ParserMatchClass = MovePRegPairAsmOperand;
+  let PrintMethod = "printRegisterList";
+  let DecoderMethod = "DecodeMovePRegPair";
+  let MIOperandInfo = (ops GPR32Opnd, GPR32Opnd);
+}
+
+class MovePMM16<string opstr, RegisterOperand RO> :
+MicroMipsInst16<(outs movep_regpair:$dst_regs), (ins RO:$rs, RO:$rt),
+                 !strconcat(opstr, "\t$dst_regs, $rs, $rt"), [],
+                 NoItinerary, FrmR> {
+  let isReMaterializable = 1;
+}
+
 /// A register pair used by load/store pair instructions.
 def RegPairAsmOperand : AsmOperandClass {
   let Name = "RegPair";
@@ -294,6 +338,15 @@ class StoreSPMM16<string opstr, DAGOperand RO, InstrItinClass Itin,
   let mayStore = 1;
 }
 
+class LoadGPMM16<string opstr, DAGOperand RO, InstrItinClass Itin,
+                 Operand MemOpnd> :
+  MicroMipsInst16<(outs RO:$rt), (ins MemOpnd:$offset),
+                  !strconcat(opstr, "\t$rt, $offset"), [], Itin, FrmI> {
+  let DecoderMethod = "DecodeMemMMGPImm7Lsl2";
+  let canFoldAsLoad = 1;
+  let mayLoad = 1;
+}
+
 class AddImmUR2<string opstr, RegisterOperand RO> :
   MicroMipsInst16<(outs RO:$rd), (ins RO:$rs, simm3_lsa2:$imm),
                   !strconcat(opstr, "\t$rd, $rs, $imm"),
@@ -422,6 +475,10 @@ class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO,
   InstSE<(outs RO:$rd), (ins PtrRC:$base, PtrRC:$index),
          !strconcat(opstr, "\t$rd, ${index}(${base})"), [], Itin, FrmFI>;
 
+class AddImmUPC<string opstr, RegisterOperand RO> :
+  InstSE<(outs RO:$rs), (ins simm23_lsl2:$imm),
+         !strconcat(opstr, "\t$rs, $imm"), [], NoItinerary, FrmR>;
+
 /// A list of registers used by load/store multiple instructions.
 def RegListAsmOperand : AsmOperandClass {
   let Name = "RegList";
@@ -470,6 +527,7 @@ class StoreMultMM16<string opstr,
                     ComplexPattern Addr = addr> :
   MicroMipsInst16<(outs), (ins reglist16:$rt, mem_mm_4sp:$addr),
                   !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
+  let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
   let mayStore = 1;
 }
 
@@ -478,9 +536,22 @@ class LoadMultMM16<string opstr,
                    ComplexPattern Addr = addr> :
   MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr),
                   !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
+  let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
   let mayLoad = 1;
 }
 
+class UncondBranchMM16<string opstr> :
+  MicroMipsInst16<(outs), (ins brtarget10_mm:$offset),
+                  !strconcat(opstr, "\t$offset"),
+                  [], IIBranch, FrmI> {
+  let isBranch = 1;
+  let isTerminator = 1;
+  let isBarrier = 1;
+  let hasDelaySlot = 1;
+  let Predicates = [RelocPIC, InMicroMips];
+  let Defs = [AT];
+}
+
 def ADDU16_MM : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>,
                 ARITH_FM_MM16<0>;
 def SUBU16_MM : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
@@ -510,6 +581,8 @@ def SH16_MM : StoreMM16<"sh16", GPRMM16OpndZero, GPRMM16Opnd, truncstorei16,
                         LOAD_STORE_FM_MM16<0x2a>;
 def SW16_MM : StoreMM16<"sw16", GPRMM16OpndZero, GPRMM16Opnd, store, II_SW,
                         mem_mm_4_lsl2>, LOAD_STORE_FM_MM16<0x3a>;
+def LWGP_MM : LoadGPMM16<"lw", GPRMM16Opnd, II_LW, mem_mm_gp_imm7_lsl2>,
+                         LOAD_GP_FM_MM16<0x19>;
 def LWSP_MM : LoadSPMM16<"lw", GPR32Opnd, II_LW, mem_mm_sp_imm5_lsl2>,
               LOAD_STORE_SP_FM_MM16<0x12>;
 def SWSP_MM : StoreSPMM16<"sw", GPR32Opnd, II_SW, mem_mm_sp_imm5_lsl2>,
@@ -521,6 +594,7 @@ def ADDIUSP_MM : AddImmUSP<"addiusp">, ADDIUSP_FM_MM16;
 def MFHI16_MM : MoveFromHILOMM<"mfhi", GPR32Opnd, AC0>, MFHILO_FM_MM16<0x10>;
 def MFLO16_MM : MoveFromHILOMM<"mflo", GPR32Opnd, AC0>, MFHILO_FM_MM16<0x12>;
 def MOVE16_MM : MoveMM16<"move", GPR32Opnd>, MOVE_FM_MM16<0x03>;
+def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMoveP>, MOVEP_FM_MM16;
 def LI16_MM : LoadImmMM16<"li16", li_simm7, GPRMM16Opnd>, LI_FM_MM16,
               IsAsCheapAsAMove;
 def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>;
@@ -532,6 +606,7 @@ def BEQZ16_MM : CBranchZeroMM<"beqz16", brtarget7_mm, GPRMM16Opnd>,
                 BEQNEZ_FM_MM16<0x23>;
 def BNEZ16_MM : CBranchZeroMM<"bnez16", brtarget7_mm, GPRMM16Opnd>,
                 BEQNEZ_FM_MM16<0x2b>;
+def B16_MM : UncondBranchMM16<"b16">, B16_FM;
 def BREAK16_MM : BrkSdbbp16MM<"break16">, BRKSDBBP16_FM_MM<0x28>;
 def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16">, BRKSDBBP16_FM_MM<0x2C>;
 
@@ -567,8 +642,10 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
                      LW_FM_MM<0xc>;
 
   /// Arithmetic Instructions (3-Operand, R-Type)
-  def ADDu_MM  : MMRel, ArithLogicR<"addu", GPR32Opnd>, ADD_FM_MM<0, 0x150>;
-  def SUBu_MM  : MMRel, ArithLogicR<"subu", GPR32Opnd>, ADD_FM_MM<0, 0x1d0>;
+  def ADDu_MM  : MMRel, ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU, add>,
+                 ADD_FM_MM<0, 0x150>;
+  def SUBu_MM  : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
+                 ADD_FM_MM<0, 0x1d0>;
   def MUL_MM   : MMRel, ArithLogicR<"mul", GPR32Opnd>, ADD_FM_MM<0, 0x210>;
   def ADD_MM   : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM_MM<0, 0x110>;
   def SUB_MM   : MMRel, ArithLogicR<"sub", GPR32Opnd>, ADD_FM_MM<0, 0x190>;
@@ -591,6 +668,9 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
   def UDIV_MM  : MMRel, Div<"divu", II_DIVU, GPR32Opnd, [HI0, LO0]>,
                  MULT_FM_MM<0x2ec>;
 
+  /// Arithmetic Instructions with PC and Immediate
+  def ADDIUPC_MM : AddImmUPC<"addiupc", GPRMM16Opnd>, ADDIUPC_FM_MM;
+
   /// Shift Instructions
   def SLL_MM   : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL>,
                  SRA_FM_MM<0, 0>;
@@ -645,6 +725,19 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
   def SWP_MM  : StorePairMM<"swp">, LWM_FM_MM<0x9>;
   def LWP_MM  : LoadPairMM<"lwp">, LWM_FM_MM<0x1>;
 
+  /// Load and Store multiple pseudo Instructions
+  class LoadWordMultMM<string instr_asm > :
+    MipsAsmPseudoInst<(outs reglist:$rt), (ins mem_mm_12:$addr),
+                      !strconcat(instr_asm, "\t$rt, $addr")> ;
+
+  class StoreWordMultMM<string instr_asm > :
+    MipsAsmPseudoInst<(outs), (ins reglist:$rt, mem_mm_12:$addr),
+                      !strconcat(instr_asm, "\t$rt, $addr")> ;
+
+
+  def SWM_MM  : StoreWordMultMM<"swm">;
+  def LWM_MM  : LoadWordMultMM<"lwm">;
+
   /// Move Conditional
   def MOVZ_I_MM : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd,
                   NoItinerary>, ADD_FM_MM<0, 0x58>;
@@ -697,6 +790,7 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
     def J_MM        : MMRel, JumpFJ<jmptarget_mm, "j", br, bb, "j">,
                       J_FM_MM<0x35>;
     def JAL_MM      : MMRel, JumpLink<"jal", calltarget_mm>, J_FM_MM<0x3d>;
+    def JALX_MM     : MMRel, JumpLink<"jalx", calltarget>, J_FM_MM<0x3c>;
   }
   def JR_MM   : MMRel, IndirectBranch<"jr", GPR32Opnd>, JR_FM_MM<0x3c>;
   def JALR_MM : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM_MM<0x03c>;
@@ -791,6 +885,8 @@ def : MipsPat<(i32 immSExt16:$imm),
               (ADDiu_MM ZERO, immSExt16:$imm)>;
 def : MipsPat<(i32 immZExt16:$imm),
               (ORi_MM ZERO, immZExt16:$imm)>;
+def : MipsPat<(not GPR32:$in),
+              (NOR_MM GPR32Opnd:$in, ZERO)>;
 
 def : MipsPat<(add GPRMM16:$src, immSExtAddiur2:$imm),
               (ADDIUR2_MM GPRMM16:$src, immSExtAddiur2:$imm)>;
@@ -814,10 +910,26 @@ def : MipsPat<(srl GPRMM16:$src, immZExt2Shift:$imm),
 def : MipsPat<(srl GPR32:$src, immZExt5:$imm),
               (SRL_MM GPR32:$src, immZExt5:$imm)>;
 
+def : MipsPat<(store GPRMM16:$src, addrimm4lsl2:$addr),
+              (SW16_MM GPRMM16:$src, addrimm4lsl2:$addr)>;
+def : MipsPat<(store GPR32:$src, addr:$addr),
+              (SW_MM GPR32:$src, addr:$addr)>;
+
+def : MipsPat<(load addrimm4lsl2:$addr),
+              (LW16_MM addrimm4lsl2:$addr)>;
+def : MipsPat<(load addr:$addr),
+              (LW_MM addr:$addr)>;
+
 //===----------------------------------------------------------------------===//
 // MicroMips instruction aliases
 //===----------------------------------------------------------------------===//
 
+class UncondBranchMMPseudo<string opstr> :
+  MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset),
+                    !strconcat(opstr, "\t$offset")>;
+
+  def B_MM_Pseudo : UncondBranchMMPseudo<"b">;
+
   def : MipsInstAlias<"wait", (WAIT_MM 0x0), 1>;
   def : MipsInstAlias<"nop", (SLL_MM ZERO, ZERO, 0), 1>;
   def : MipsInstAlias<"nop", (MOVE16_MM ZERO, ZERO), 1>;
diff --git a/contrib/llvm/lib/Target/Mips/Mips.h b/contrib/llvm/lib/Target/Mips/Mips.h
index 87f1b04..671d7a8 100644
--- a/contrib/llvm/lib/Target/Mips/Mips.h
+++ b/contrib/llvm/lib/Target/Mips/Mips.h
@@ -20,9 +20,13 @@
 
 namespace llvm {
   class MipsTargetMachine;
+  class ModulePass;
   class FunctionPass;
 
-  FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+  ModulePass *createMipsOs16Pass(MipsTargetMachine &TM);
+  ModulePass *createMips16HardFloatPass(MipsTargetMachine &TM);
+
+  FunctionPass *createMipsModuleISelDagPass(MipsTargetMachine &TM);
   FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM);
   FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
   FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td
index 5ad5683..dbb5f7b 100644
--- a/contrib/llvm/lib/Target/Mips/Mips.td
+++ b/contrib/llvm/lib/Target/Mips/Mips.td
@@ -28,12 +28,15 @@ class PredicateControl {
   list<Predicate> FGRPredicates = [];
   // Predicates for the instruction group membership such as ISA's and ASE's
   list<Predicate> InsnPredicates = [];
+  // Predicate for marking the instruction as usable in hard-float mode only.
+  list<Predicate> HardFloatPredicate = [];
   // Predicates for anything else
   list<Predicate> AdditionalPredicates = [];
   list<Predicate> Predicates = !listconcat(EncodingPredicates,
                                            GPRPredicates,
                                            FGRPredicates,
                                            InsnPredicates,
+                                           HardFloatPredicate,
                                            AdditionalPredicates);
 }
 
@@ -69,14 +72,8 @@ def FeatureNaN2008     : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
                                 "IEEE 754-2008 NaN encoding">;
 def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
                                 "true", "Only supports single precision float">;
-def FeatureO32         : SubtargetFeature<"o32", "ABI", "MipsABIInfo::O32()",
-                                "Enable o32 ABI">;
-def FeatureN32         : SubtargetFeature<"n32", "ABI", "MipsABIInfo::N32()",
-                                "Enable n32 ABI">;
-def FeatureN64         : SubtargetFeature<"n64", "ABI", "MipsABIInfo::N64()",
-                                "Enable n64 ABI">;
-def FeatureEABI        : SubtargetFeature<"eabi", "ABI", "MipsABIInfo::EABI()",
-                                "Enable eabi ABI">;
+def FeatureSoftFloat   : SubtargetFeature<"soft-float", "IsSoftFloat", "true",
+                                "Does not support floating point instructions">;
 def FeatureNoOddSPReg  : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false",
                               "Disable odd numbered single-precision "
                               "registers">;
@@ -122,10 +119,16 @@ def FeatureMips32r2    : SubtargetFeature<"mips32r2", "MipsArchVersion",
                                 "Mips32r2", "Mips32r2 ISA Support",
                                 [FeatureMips3_32r2, FeatureMips4_32r2,
                                  FeatureMips5_32r2, FeatureMips32]>;
+def FeatureMips32r3    : SubtargetFeature<"mips32r3", "MipsArchVersion",
+                                "Mips32r3", "Mips32r3 ISA Support",
+                                [FeatureMips32r2]>;
+def FeatureMips32r5    : SubtargetFeature<"mips32r5", "MipsArchVersion",
+                                "Mips32r5", "Mips32r5 ISA Support",
+                                [FeatureMips32r3]>;
 def FeatureMips32r6    : SubtargetFeature<"mips32r6", "MipsArchVersion",
                                 "Mips32r6",
                                 "Mips32r6 ISA Support [experimental]",
-                                [FeatureMips32r2, FeatureFP64Bit,
+                                [FeatureMips32r5, FeatureFP64Bit,
                                  FeatureNaN2008]>;
 def FeatureMips64      : SubtargetFeature<"mips64", "MipsArchVersion",
                                 "Mips64", "Mips64 ISA Support",
@@ -133,10 +136,16 @@ def FeatureMips64      : SubtargetFeature<"mips64", "MipsArchVersion",
 def FeatureMips64r2    : SubtargetFeature<"mips64r2", "MipsArchVersion",
                                 "Mips64r2", "Mips64r2 ISA Support",
                                 [FeatureMips64, FeatureMips32r2]>;
+def FeatureMips64r3    : SubtargetFeature<"mips64r3", "MipsArchVersion",
+                                "Mips64r3", "Mips64r3 ISA Support",
+                                [FeatureMips64r2, FeatureMips32r3]>;
+def FeatureMips64r5    : SubtargetFeature<"mips64r5", "MipsArchVersion",
+                                "Mips64r5", "Mips64r5 ISA Support",
+                                [FeatureMips64r3, FeatureMips32r5]>;
 def FeatureMips64r6    : SubtargetFeature<"mips64r6", "MipsArchVersion",
                                 "Mips64r6",
                                 "Mips64r6 ISA Support [experimental]",
-                                [FeatureMips32r6, FeatureMips64r2,
+                                [FeatureMips32r6, FeatureMips64r5,
                                  FeatureNaN2008]>;
 
 def FeatureMips16  : SubtargetFeature<"mips16", "InMips16Mode", "true",
@@ -162,20 +171,24 @@ def FeatureCnMips     : SubtargetFeature<"cnmips", "HasCnMips",
 class Proc<string Name, list<SubtargetFeature> Features>
  : Processor<Name, MipsGenericItineraries, Features>;
 
-def : Proc<"mips1", [FeatureMips1, FeatureO32]>;
-def : Proc<"mips2", [FeatureMips2, FeatureO32]>;
-def : Proc<"mips32", [FeatureMips32, FeatureO32]>;
-def : Proc<"mips32r2", [FeatureMips32r2, FeatureO32]>;
-def : Proc<"mips32r6", [FeatureMips32r6, FeatureO32]>;
-
-def : Proc<"mips3", [FeatureMips3, FeatureN64]>;
-def : Proc<"mips4", [FeatureMips4, FeatureN64]>;
-def : Proc<"mips5", [FeatureMips5, FeatureN64]>;
-def : Proc<"mips64", [FeatureMips64, FeatureN64]>;
-def : Proc<"mips64r2", [FeatureMips64r2, FeatureN64]>;
-def : Proc<"mips64r6", [FeatureMips64r6, FeatureN64]>;
-def : Proc<"mips16", [FeatureMips16, FeatureO32]>;
-def : Proc<"octeon", [FeatureMips64r2, FeatureN64, FeatureCnMips]>;
+def : Proc<"mips1", [FeatureMips1]>;
+def : Proc<"mips2", [FeatureMips2]>;
+def : Proc<"mips32", [FeatureMips32]>;
+def : Proc<"mips32r2", [FeatureMips32r2]>;
+def : Proc<"mips32r3", [FeatureMips32r3]>;
+def : Proc<"mips32r5", [FeatureMips32r5]>;
+def : Proc<"mips32r6", [FeatureMips32r6]>;
+
+def : Proc<"mips3", [FeatureMips3]>;
+def : Proc<"mips4", [FeatureMips4]>;
+def : Proc<"mips5", [FeatureMips5]>;
+def : Proc<"mips64", [FeatureMips64]>;
+def : Proc<"mips64r2", [FeatureMips64r2]>;
+def : Proc<"mips64r3", [FeatureMips64r3]>;
+def : Proc<"mips64r5", [FeatureMips64r5]>;
+def : Proc<"mips64r6", [FeatureMips64r6]>;
+def : Proc<"mips16", [FeatureMips16]>;
+def : Proc<"octeon", [FeatureMips64r2, FeatureCnMips]>;
 
 def MipsAsmParser : AsmParser {
   let ShouldEmitMatchRegisterName = 0;
diff --git a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
index 6070276..db2a924 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -32,11 +32,12 @@ using namespace llvm;
 Mips16FrameLowering::Mips16FrameLowering(const MipsSubtarget &STI)
     : MipsFrameLowering(STI, STI.stackAlignment()) {}
 
-void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();
+void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
+                                       MachineBasicBlock &MBB) const {
+  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const Mips16InstrInfo &TII =
-      *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
+      *static_cast<const Mips16InstrInfo *>(STI.getInstrInfo());
   MachineBasicBlock::iterator MBBI = MBB.begin();
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
   uint64_t StackSize = MFI->getStackSize();
@@ -84,7 +85,7 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const Mips16InstrInfo &TII =
-      *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
+      *static_cast<const Mips16InstrInfo *>(STI.getInstrInfo());
   DebugLoc dl = MBBI->getDebugLoc();
   uint64_t StackSize = MFI->getStackSize();
 
@@ -143,25 +144,6 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
-// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
-void Mips16FrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  if (!hasReservedCallFrame(MF)) {
-    int64_t Amount = I->getOperand(0).getImm();
-
-    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
-      Amount = -Amount;
-
-    const Mips16InstrInfo &TII =
-        *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
-
-    TII.adjustStackPtr(Mips::SP, Amount, MBB, I);
-  }
-
-  MBB.erase(I);
-}
-
 bool
 Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -174,7 +156,7 @@ void Mips16FrameLowering::
 processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                      RegScavenger *RS) const {
   const Mips16InstrInfo &TII =
-      *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
+      *static_cast<const Mips16InstrInfo *>(STI.getInstrInfo());
   const MipsRegisterInfo &RI = TII.getRegisterInfo();
   const BitVector Reserved = RI.getReservedRegs(MF);
   bool SaveS2 = Reserved[Mips::S2];
diff --git a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h
index 012d558..f281c92 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h
+++ b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h
@@ -23,13 +23,9 @@ public:
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
-  void emitPrologue(MachineFunction &MF) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                  MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I) const override;
-
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
diff --git a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
index 32dc90a..893fc7c 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Mips16HardFloat.h"
+#include "MipsTargetMachine.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Debug.h"
@@ -19,38 +19,51 @@
 #include <algorithm>
 #include <string>
 
-#define DEBUG_TYPE "mips16-hard-float"
+using namespace llvm;
 
-static void inlineAsmOut
-  (LLVMContext &C, StringRef AsmString, BasicBlock *BB ) {
-  std::vector<llvm::Type *> AsmArgTypes;
-  std::vector<llvm::Value*> AsmArgs;
-  llvm::FunctionType *AsmFTy =
-    llvm::FunctionType::get(Type::getVoidTy(C),
-                            AsmArgTypes, false);
-  llvm::InlineAsm *IA =
-    llvm::InlineAsm::get(AsmFTy, AsmString, "", true,
-                         /* IsAlignStack */ false,
-                         llvm::InlineAsm::AD_ATT);
-  CallInst::Create(IA, AsmArgs, "", BB);
-}
+#define DEBUG_TYPE "mips16-hard-float"
 
 namespace {
+  class Mips16HardFloat : public ModulePass {
+  public:
+    static char ID;
 
-class InlineAsmHelper {
-  LLVMContext &C;
-  BasicBlock *BB;
-public:
-  InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) :
-    C(C_), BB(BB_) {
-  }
+    Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID), TM(TM_) {}
 
-  void Out(StringRef AsmString) {
-    inlineAsmOut(C, AsmString, BB);
-  }
+    const char *getPassName() const override {
+      return "MIPS16 Hard Float Pass";
+    }
 
-};
+    bool runOnModule(Module &M) override;
+
+  protected:
+    const MipsTargetMachine &TM;
+  };
+
+  class InlineAsmHelper {
+    LLVMContext &C;
+    BasicBlock *BB;
+  public:
+    InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) :
+      C(C_), BB(BB_) {
+      }
+
+    void Out(StringRef AsmString) {
+      std::vector<llvm::Type *> AsmArgTypes;
+      std::vector<llvm::Value*> AsmArgs;
+
+      llvm::FunctionType *AsmFTy = llvm::FunctionType::get(Type::getVoidTy(C),
+                                                           AsmArgTypes, false);
+      llvm::InlineAsm *IA = llvm::InlineAsm::get(AsmFTy, AsmString, "", true,
+                                                 /* IsAlignStack */ false,
+                                                 llvm::InlineAsm::AD_ATT);
+      CallInst::Create(IA, AsmArgs, "", BB);
+    }
+  };
+
+  char Mips16HardFloat::ID = 0;
 }
+
 //
 // Return types that matter for hard float are:
 // float, double, complex float, and complex double
@@ -154,11 +167,11 @@ static bool needsFPStubFromParams(Function &F) {
   if (F.arg_size() >=1) {
     Type *ArgType = F.getFunctionType()->getParamType(0);
     switch (ArgType->getTypeID()) {
-      case Type::FloatTyID:
-      case Type::DoubleTyID:
-        return true;
-      default:
-        break;
+    case Type::FloatTyID:
+    case Type::DoubleTyID:
+      return true;
+    default:
+      break;
     }
   }
   return false;
@@ -182,10 +195,8 @@ static bool needsFPHelperFromSig(Function &F) {
 // We swap between FP and Integer registers to allow Mips16 and Mips32 to
 // interoperate
 //
-
-static void swapFPIntParams
-  (FPParamVariant PV, Module *M, InlineAsmHelper &IAH,
-   bool LE, bool ToFP) {
+static void swapFPIntParams(FPParamVariant PV, Module *M, InlineAsmHelper &IAH,
+                            bool LE, bool ToFP) {
   //LLVMContext &Context = M->getContext();
   std::string MI = ToFP? "mtc1 ": "mfc1 ";
   switch (PV) {
@@ -242,6 +253,7 @@ static void swapFPIntParams
     return;
   }
 }
+
 //
 // Make sure that we know we already need a stub for this function.
 // Having called needsFPHelperFromSig
@@ -297,8 +309,8 @@ static void assureFPCallStub(Function &F, Module *M,
     break;
   case CFRet:
     if (LE) {
-    IAH.Out("mfc1 $$2,$$f0");
-    IAH.Out("mfc1 $$3,$$f2");
+      IAH.Out("mfc1 $$2,$$f0");
+      IAH.Out("mfc1 $$3,$$f2");
     } else {
       IAH.Out("mfc1 $$3,$$f0");
       IAH.Out("mfc1 $$3,$$f2");
@@ -331,28 +343,27 @@ static void assureFPCallStub(Function &F, Module *M,
 //
 // Functions that are llvm intrinsics and don't need helpers.
 //
-static const char *IntrinsicInline[] =
-  {"fabs",
-   "fabsf",
-   "llvm.ceil.f32", "llvm.ceil.f64",
-   "llvm.copysign.f32", "llvm.copysign.f64",
-   "llvm.cos.f32", "llvm.cos.f64",
-   "llvm.exp.f32", "llvm.exp.f64",
-   "llvm.exp2.f32", "llvm.exp2.f64",
-   "llvm.fabs.f32", "llvm.fabs.f64",
-   "llvm.floor.f32", "llvm.floor.f64",
-   "llvm.fma.f32", "llvm.fma.f64",
-   "llvm.log.f32", "llvm.log.f64",
-   "llvm.log10.f32", "llvm.log10.f64",
-   "llvm.nearbyint.f32", "llvm.nearbyint.f64",
-   "llvm.pow.f32", "llvm.pow.f64",
-   "llvm.powi.f32", "llvm.powi.f64",
-   "llvm.rint.f32", "llvm.rint.f64",
-   "llvm.round.f32", "llvm.round.f64",
-   "llvm.sin.f32", "llvm.sin.f64",
-   "llvm.sqrt.f32", "llvm.sqrt.f64",
-   "llvm.trunc.f32", "llvm.trunc.f64",
-  };
+static const char *IntrinsicInline[] = {
+  "fabs", "fabsf",
+  "llvm.ceil.f32", "llvm.ceil.f64",
+  "llvm.copysign.f32", "llvm.copysign.f64",
+  "llvm.cos.f32", "llvm.cos.f64",
+  "llvm.exp.f32", "llvm.exp.f64",
+  "llvm.exp2.f32", "llvm.exp2.f64",
+  "llvm.fabs.f32", "llvm.fabs.f64",
+  "llvm.floor.f32", "llvm.floor.f64",
+  "llvm.fma.f32", "llvm.fma.f64",
+  "llvm.log.f32", "llvm.log.f64",
+  "llvm.log10.f32", "llvm.log10.f64",
+  "llvm.nearbyint.f32", "llvm.nearbyint.f64",
+  "llvm.pow.f32", "llvm.pow.f64",
+  "llvm.powi.f32", "llvm.powi.f64",
+  "llvm.rint.f32", "llvm.rint.f64",
+  "llvm.round.f32", "llvm.round.f64",
+  "llvm.sin.f32", "llvm.sin.f64",
+  "llvm.sqrt.f32", "llvm.sqrt.f64",
+  "llvm.trunc.f32", "llvm.trunc.f64",
+};
 
 static bool isIntrinsicInline(Function *F) {
   return std::binary_search(std::begin(IntrinsicInline),
@@ -384,9 +395,10 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
         Type *T = RVal->getType();
         FPReturnVariant RV = whichFPReturnVariant(T);
         if (RV == NoFPRet) continue;
-        static const char* Helper[NoFPRet] =
-          {"__mips16_ret_sf", "__mips16_ret_df", "__mips16_ret_sc",
-           "__mips16_ret_dc"};
+        static const char* Helper[NoFPRet] = {
+          "__mips16_ret_sf", "__mips16_ret_df", "__mips16_ret_sc",
+          "__mips16_ret_dc"
+        };
         const char *Name = Helper[RV];
         AttributeSet A;
         Value *Params[] = {RVal};
@@ -406,33 +418,33 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
         Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, nullptr));
         CallInst::Create(F, Params, "", &Inst );
       } else if (const CallInst *CI = dyn_cast<CallInst>(I)) {
-          const Value* V = CI->getCalledValue();
-          const Type* T = nullptr;
-          if (V) T = V->getType();
-          const PointerType *PFT=nullptr;
-          if (T) PFT = dyn_cast<PointerType>(T);
-          const FunctionType *FT=nullptr;
-          if (PFT) FT = dyn_cast<FunctionType>(PFT->getElementType());
-          Function *F_ =  CI->getCalledFunction();
-          if (FT && needsFPReturnHelper(*FT) &&
-              !(F_ && isIntrinsicInline(F_))) {
+        const Value* V = CI->getCalledValue();
+        const Type* T = nullptr;
+        if (V) T = V->getType();
+        const PointerType *PFT=nullptr;
+        if (T) PFT = dyn_cast<PointerType>(T);
+        const FunctionType *FT=nullptr;
+        if (PFT) FT = dyn_cast<FunctionType>(PFT->getElementType());
+        Function *F_ =  CI->getCalledFunction();
+        if (FT && needsFPReturnHelper(*FT) &&
+            !(F_ && isIntrinsicInline(F_))) {
+          Modified=true;
+          F.addFnAttr("saveS2");
+        }
+        if (F_ && !isIntrinsicInline(F_)) {
+          // pic mode calls are handled by already defined
+          // helper functions
+          if (needsFPReturnHelper(*F_)) {
             Modified=true;
             F.addFnAttr("saveS2");
           }
-          if (F_ && !isIntrinsicInline(F_)) {
-          // pic mode calls are handled by already defined
-          // helper functions
-            if (needsFPReturnHelper(*F_)) {
+          if (TM.getRelocationModel() != Reloc::PIC_ ) {
+            if (needsFPHelperFromSig(*F_)) {
+              assureFPCallStub(*F_, M, TM);
               Modified=true;
-              F.addFnAttr("saveS2");
-            }
-            if (TM.getRelocationModel() != Reloc::PIC_ ) {
-              if (needsFPHelperFromSig(*F_)) {
-                assureFPCallStub(*F_, M, TM);
-                Modified=true;
-              }
             }
           }
+        }
       }
     }
   return Modified;
@@ -489,7 +501,6 @@ static void removeUseSoftFloat(Function &F) {
   F.addAttributes(AttributeSet::FunctionIndex, A);
 }
 
-namespace llvm {
 
 //
 // This pass only makes sense when the underlying chip has floating point but
@@ -530,11 +541,7 @@ bool Mips16HardFloat::runOnModule(Module &M) {
   return Modified;
 }
 
-char Mips16HardFloat::ID = 0;
-
-}
 
-ModulePass *llvm::createMips16HardFloat(MipsTargetMachine &TM) {
+ModulePass *llvm::createMips16HardFloatPass(MipsTargetMachine &TM) {
   return new Mips16HardFloat(TM);
 }
-
diff --git a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.h b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.h
deleted file mode 100644
index 586cc25..0000000
--- a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===---- Mips16HardFloat.h for Mips16 Hard Float                  --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a phase which implements part of the floating point
-// interoperability between Mips16 and Mips32 code.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOAT_H
-#define LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOAT_H
-
-#include "MCTargetDesc/MipsMCTargetDesc.h"
-#include "MipsTargetMachine.h"
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-namespace llvm {
-
-class Mips16HardFloat : public ModulePass {
-public:
-  static char ID;
-
-  Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID), TM(TM_) {}
-
-  const char *getPassName() const override { return "MIPS16 Hard Float Pass"; }
-  bool runOnModule(Module &M) override;
-
-protected:
-  const MipsTargetMachine &TM;
-};
-
-ModulePass *createMips16HardFloat(MipsTargetMachine &TM);
-
-}
-#endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 36b3ac9..7b6a2a1 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
 #define DEBUG_TYPE "mips-isel"
 
 bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &TM.getSubtarget<MipsSubtarget>();
+  Subtarget = &static_cast<const MipsSubtarget &>(MF.getSubtarget());
   if (!Subtarget->inMips16Mode())
     return false;
   return MipsDAGToDAGISel::runOnMachineFunction(MF);
@@ -72,7 +72,7 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
   MachineBasicBlock &MBB = MF.front();
   MachineBasicBlock::iterator I = MBB.begin();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
   DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
   unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
   const TargetRegisterClass *RC = &Mips::CPU16RegsRegClass;
@@ -102,7 +102,7 @@ void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) {
 
   MachineBasicBlock &MBB = MF.front();
   MachineBasicBlock::iterator I = MBB.begin();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
   DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
   unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
 
@@ -134,7 +134,7 @@ void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
         switch (SD->getMemoryVT().getSizeInBits()) {
         case 8:
         case 16:
-          AliasReg = TM.getSubtargetImpl()->getFrameLowering()->hasFP(*MF)
+          AliasReg = Subtarget->getFrameLowering()->hasFP(*MF)
                          ? AliasFPReg
                          : getMips16SPAliasReg();
           return;
@@ -146,7 +146,7 @@ void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
         switch (SD->getMemoryVT().getSizeInBits()) {
         case 8:
         case 16:
-          AliasReg = TM.getSubtargetImpl()->getFrameLowering()->hasFP(*MF)
+          AliasReg = Subtarget->getFrameLowering()->hasFP(*MF)
                          ? AliasFPReg
                          : getMips16SPAliasReg();
           return;
@@ -163,14 +163,15 @@ void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
 bool Mips16DAGToDAGISel::selectAddr16(
   SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
   SDValue &Alias) {
+  SDLoc DL(Addr);
   EVT ValTy = Addr.getValueType();
 
-  Alias = CurDAG->getTargetConstant(0, ValTy);
+  Alias = CurDAG->getTargetConstant(0, DL, ValTy);
 
   // if Address is FI, get the TargetFrameIndex.
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
-    Offset = CurDAG->getTargetConstant(0, ValTy);
+    Offset = CurDAG->getTargetConstant(0, DL, ValTy);
     getMips16SPRefReg(Parent, Alias);
     return true;
   }
@@ -199,7 +200,7 @@ bool Mips16DAGToDAGISel::selectAddr16(
       else
         Base = Addr.getOperand(0);
 
-      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), DL, ValTy);
       return true;
     }
   }
@@ -235,7 +236,7 @@ bool Mips16DAGToDAGISel::selectAddr16(
     }
   }
   Base   = Addr;
-  Offset = CurDAG->getTargetConstant(0, ValTy);
+  Offset = CurDAG->getTargetConstant(0, DL, ValTy);
   return true;
 }
 
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 7479695..846e3c9 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -127,7 +127,7 @@ Mips16TargetLowering::Mips16TargetLowering(const MipsTargetMachine &TM,
   // Set up the register classes
   addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
 
-  if (!TM.Options.UseSoftFloat)
+  if (!Subtarget.useSoftFloat())
     setMips16HardFloatLibCalls();
 
   setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Expand);
@@ -149,7 +149,7 @@ Mips16TargetLowering::Mips16TargetLowering(const MipsTargetMachine &TM,
   setOperationAction(ISD::BSWAP, MVT::i32, Expand);
   setOperationAction(ISD::BSWAP, MVT::i64, Expand);
 
-  computeRegisterProperties();
+  computeRegisterProperties(STI.getRegisterInfo());
 }
 
 const MipsTargetLowering *
@@ -522,8 +522,7 @@ MachineBasicBlock *Mips16TargetLowering::
 emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
   if (DontExpandCondPseudos16)
     return BB;
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
   // To "insert" a SELECT_CC instruction, we actually have to insert the
   // diamond control-flow pattern.  The incoming instruction knows the
@@ -580,13 +579,12 @@ emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
   return BB;
 }
 
-MachineBasicBlock *Mips16TargetLowering::emitSelT16
-  (unsigned Opc1, unsigned Opc2,
-   MachineInstr *MI, MachineBasicBlock *BB) const {
+MachineBasicBlock *
+Mips16TargetLowering::emitSelT16(unsigned Opc1, unsigned Opc2, MachineInstr *MI,
+                                 MachineBasicBlock *BB) const {
   if (DontExpandCondPseudos16)
     return BB;
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
   // To "insert" a SELECT_CC instruction, we actually have to insert the
   // diamond control-flow pattern.  The incoming instruction knows the
@@ -645,13 +643,13 @@ MachineBasicBlock *Mips16TargetLowering::emitSelT16
 
 }
 
-MachineBasicBlock *Mips16TargetLowering::emitSeliT16
-  (unsigned Opc1, unsigned Opc2,
-   MachineInstr *MI, MachineBasicBlock *BB) const {
+MachineBasicBlock *
+Mips16TargetLowering::emitSeliT16(unsigned Opc1, unsigned Opc2,
+                                  MachineInstr *MI,
+                                  MachineBasicBlock *BB) const {
   if (DontExpandCondPseudos16)
     return BB;
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
   // To "insert" a SELECT_CC instruction, we actually have to insert the
   // diamond control-flow pattern.  The incoming instruction knows the
@@ -710,14 +708,13 @@ MachineBasicBlock *Mips16TargetLowering::emitSeliT16
 
 }
 
-MachineBasicBlock
-  *Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
-                                             MachineInstr *MI,
-                                             MachineBasicBlock *BB) const {
+MachineBasicBlock *
+Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+                                          MachineInstr *MI,
+                                          MachineBasicBlock *BB) const {
   if (DontExpandCondPseudos16)
     return BB;
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   unsigned regX = MI->getOperand(0).getReg();
   unsigned regY = MI->getOperand(1).getReg();
   MachineBasicBlock *target = MI->getOperand(2).getMBB();
@@ -729,12 +726,11 @@ MachineBasicBlock
 }
 
 MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
-  unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, bool ImmSigned,
-  MachineInstr *MI,  MachineBasicBlock *BB) const {
+    unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, bool ImmSigned,
+    MachineInstr *MI, MachineBasicBlock *BB) const {
   if (DontExpandCondPseudos16)
     return BB;
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   unsigned regX = MI->getOperand(0).getReg();
   int64_t imm = MI->getOperand(1).getImm();
   MachineBasicBlock *target = MI->getOperand(2).getMBB();
@@ -763,13 +759,12 @@ static unsigned Mips16WhichOp8uOr16simm
     llvm_unreachable("immediate field not usable");
 }
 
-MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins(
-  unsigned SltOpc,
-  MachineInstr *MI,  MachineBasicBlock *BB) const {
+MachineBasicBlock *
+Mips16TargetLowering::emitFEXT_CCRX16_ins(unsigned SltOpc, MachineInstr *MI,
+                                          MachineBasicBlock *BB) const {
   if (DontExpandCondPseudos16)
     return BB;
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   unsigned CC = MI->getOperand(0).getReg();
   unsigned regX = MI->getOperand(1).getReg();
   unsigned regY = MI->getOperand(2).getReg();
@@ -781,13 +776,13 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins(
   return BB;
 }
 
-MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins(
-  unsigned SltiOpc, unsigned SltiXOpc,
-  MachineInstr *MI,  MachineBasicBlock *BB )const {
+MachineBasicBlock *
+Mips16TargetLowering::emitFEXT_CCRXI16_ins(unsigned SltiOpc, unsigned SltiXOpc,
+                                           MachineInstr *MI,
+                                           MachineBasicBlock *BB) const {
   if (DontExpandCondPseudos16)
     return BB;
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   unsigned CC = MI->getOperand(0).getReg();
   unsigned regX = MI->getOperand(1).getReg();
   int64_t Imm = MI->getOperand(2).getImm();
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index 7e1fbfd..a49572e 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -1,4 +1,3 @@
-
 //===-- Mips16InstrInfo.cpp - Mips16 Instruction Information --------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -25,6 +24,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cctype>
 
 using namespace llvm;
@@ -32,7 +32,7 @@ using namespace llvm;
 #define DEBUG_TYPE "mips16-instrinfo"
 
 Mips16InstrInfo::Mips16InstrInfo(const MipsSubtarget &STI)
-    : MipsInstrInfo(STI, Mips::Bimm16), RI(STI) {}
+    : MipsInstrInfo(STI, Mips::Bimm16), RI() {}
 
 const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
   return RI;
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
index e7d0c07..6540b40 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
@@ -18,7 +18,7 @@
 #include "MipsInstrInfo.h"
 
 namespace llvm {
-
+class MipsSubtarget;
 class Mips16InstrInfo : public MipsInstrInfo {
   const Mips16RegisterInfo RI;
 
@@ -77,7 +77,7 @@ public:
 
   /// Adjust SP by Amount bytes.
   void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
-                      MachineBasicBlock::iterator I) const;
+                      MachineBasicBlock::iterator I) const override;
 
   /// Emit a series of instructions to load an immediate.
   // This is to adjust some FrameReg. We return the new register to be used
diff --git a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
index 0bb452a..ebd51d7 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -41,8 +41,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "mips16-registerinfo"
 
-Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST)
-  : MipsRegisterInfo(ST) {}
+Mips16RegisterInfo::Mips16RegisterInfo() : MipsRegisterInfo() {}
 
 bool Mips16RegisterInfo::requiresRegisterScavenging
   (const MachineFunction &MF) const {
@@ -140,8 +139,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
     DebugLoc DL = II->getDebugLoc();
     unsigned NewImm;
     const Mips16InstrInfo &TII =
-        *static_cast<const Mips16InstrInfo *>(
-            MBB.getParent()->getSubtarget().getInstrInfo());
+        *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
     FrameReg = TII.loadImmediate(FrameReg, Offset, MBB, II, DL, NewImm);
     Offset = SignExtend64<16>(NewImm);
     IsKill = true;
diff --git a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.h b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.h
index 3cdf836..d67a79b 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.h
@@ -21,7 +21,7 @@ class Mips16InstrInfo;
 
 class Mips16RegisterInfo : public MipsRegisterInfo {
 public:
-  Mips16RegisterInfo(const MipsSubtarget &Subtarget);
+  Mips16RegisterInfo();
 
   bool requiresRegisterScavenging(const MachineFunction &MF) const override;
 
diff --git a/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td b/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
index e9a4289..13216be 100644
--- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -11,6 +11,25 @@
 //
 //===----------------------------------------------------------------------===//
 
+class R6MMR6Rel;
+
+def MipsR62MicroMipsR6 : InstrMapping {
+  let FilterClass = "R6MMR6Rel";
+  // Instructions with the same BaseOpcode and isNVStore values form a row.
+  let RowFields = ["BaseOpcode"];
+  // Instructions with the same predicate sense form a column.
+  let ColFields = ["Arch"];
+  // The key column is the unpredicated instructions.
+  let KeyCol = ["mipsr6"];
+  // Value columns are PredSense=true and PredSense=false
+  let ValueCols = [["mipsr6"], ["micromipsr6"]];
+}
+
+class MipsR6Arch<string opstr> {
+  string Arch = "mipsr6";
+  string BaseOpcode = opstr;
+}
+
 class MipsR6Inst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>,
                    PredicateControl {
   let DecoderNamespace = "Mips32r6_64r6";
diff --git a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index 49c6322..d6ab8a6 100644
--- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -188,52 +188,52 @@ multiclass CMP_CC_M <FIELD_CMP_FORMAT Format, string Typestr,
                      RegisterOperand FGROpnd>{
   def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_AF>,
                     CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>,
-                    ISA_MIPS32R6;
+                    ISA_MIPS32R6, HARDFLOAT;
   def CMP_UN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UN>,
                      CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>,
-                     ISA_MIPS32R6;
+                     ISA_MIPS32R6, HARDFLOAT;
   def CMP_EQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_EQ>,
                      CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>,
-                     ISA_MIPS32R6;
+                     ISA_MIPS32R6, HARDFLOAT;
   def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UEQ>,
                       CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>,
-                      ISA_MIPS32R6;
+                      ISA_MIPS32R6, HARDFLOAT;
   def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>,
                      CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>,
-                     ISA_MIPS32R6;
+                     ISA_MIPS32R6, HARDFLOAT;
   def CMP_ULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULT>,
                       CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>,
-                      ISA_MIPS32R6;
+                      ISA_MIPS32R6, HARDFLOAT;
   def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>,
                      CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>,
-                     ISA_MIPS32R6;
+                     ISA_MIPS32R6, HARDFLOAT;
   def CMP_ULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULE>,
                       CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>,
-                      ISA_MIPS32R6;
+                      ISA_MIPS32R6, HARDFLOAT;
   def CMP_SAF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SAF>,
                       CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>,
-                      ISA_MIPS32R6;
+                      ISA_MIPS32R6, HARDFLOAT;
   def CMP_SUN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUN>,
                       CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>,
-                      ISA_MIPS32R6;
+                      ISA_MIPS32R6, HARDFLOAT;
   def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SEQ>,
                       CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>,
-                      ISA_MIPS32R6;
+                      ISA_MIPS32R6, HARDFLOAT;
   def CMP_SUEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUEQ>,
                        CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>,
-                       ISA_MIPS32R6;
+                       ISA_MIPS32R6, HARDFLOAT;
   def CMP_SLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLT>,
                       CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>,
-                      ISA_MIPS32R6;
+                      ISA_MIPS32R6, HARDFLOAT;
   def CMP_SULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULT>,
                        CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>,
-                       ISA_MIPS32R6;
+                       ISA_MIPS32R6, HARDFLOAT;
   def CMP_SLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLE>,
                       CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>,
-                      ISA_MIPS32R6;
+                      ISA_MIPS32R6, HARDFLOAT;
   def CMP_SULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULE>,
                        CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>,
-                       ISA_MIPS32R6;
+                       ISA_MIPS32R6, HARDFLOAT;
 }
 
 //===----------------------------------------------------------------------===//
@@ -243,7 +243,7 @@ multiclass CMP_CC_M <FIELD_CMP_FORMAT Format, string Typestr,
 //===----------------------------------------------------------------------===//
 
 class PCREL_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
-                      Operand ImmOpnd> {
+                      Operand ImmOpnd> : MipsR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rs);
   dag InOperandList = (ins ImmOpnd:$imm);
   string AsmString = !strconcat(instr_asm, "\t$rs, $imm");
@@ -255,7 +255,7 @@ class LWPC_DESC: PCREL_DESC_BASE<"lwpc", GPR32Opnd, simm19_lsl2>;
 class LWUPC_DESC: PCREL_DESC_BASE<"lwupc", GPR32Opnd, simm19_lsl2>;
 
 class ALIGN_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
-                      Operand ImmOpnd> {
+                      Operand ImmOpnd>  : MipsR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rd);
   dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$bp);
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $bp");
@@ -264,7 +264,8 @@ class ALIGN_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
 
 class ALIGN_DESC : ALIGN_DESC_BASE<"align", GPR32Opnd, uimm2>;
 
-class ALUIPC_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+class ALUIPC_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+    : MipsR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rs);
   dag InOperandList = (ins simm16:$imm);
   string AsmString = !strconcat(instr_asm, "\t$rs, $imm");
@@ -274,7 +275,8 @@ class ALUIPC_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
 class ALUIPC_DESC : ALUIPC_DESC_BASE<"aluipc", GPR32Opnd>;
 class AUIPC_DESC : ALUIPC_DESC_BASE<"auipc", GPR32Opnd>;
 
-class AUI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+class AUI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+    : MipsR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rs);
   dag InOperandList = (ins GPROpnd:$rt, simm16:$imm);
   string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $imm");
@@ -289,7 +291,8 @@ class BRANCH_DESC_BASE {
   bit hasDelaySlot = 0;
 }
 
-class BC_DESC_BASE<string instr_asm, DAGOperand opnd> : BRANCH_DESC_BASE {
+class BC_DESC_BASE<string instr_asm, DAGOperand opnd> : BRANCH_DESC_BASE,
+    MipsR6Arch<instr_asm> {
   dag InOperandList = (ins opnd:$offset);
   dag OutOperandList = (outs);
   string AsmString = !strconcat(instr_asm, "\t$offset");
@@ -313,7 +316,8 @@ class CMP_CBR_EQNE_Z_DESC_BASE<string instr_asm, DAGOperand opnd,
 }
 
 class CMP_CBR_RT_Z_DESC_BASE<string instr_asm, DAGOperand opnd,
-                             RegisterOperand GPROpnd> : BRANCH_DESC_BASE {
+                             RegisterOperand GPROpnd>
+    : BRANCH_DESC_BASE, MipsR6Arch<instr_asm> {
   dag InOperandList = (ins GPROpnd:$rt, opnd:$offset);
   dag OutOperandList = (outs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $offset");
@@ -373,7 +377,8 @@ class BOVC_DESC   : CMP_BC_DESC_BASE<"bovc", brtarget, GPR32Opnd>;
 class BNVC_DESC   : CMP_BC_DESC_BASE<"bnvc", brtarget, GPR32Opnd>;
 
 class JMP_IDX_COMPACT_DESC_BASE<string opstr, DAGOperand opnd,
-                                RegisterOperand GPROpnd> {
+                                RegisterOperand GPROpnd>
+    : MipsR6Arch<opstr> {
   dag InOperandList = (ins GPROpnd:$rt, opnd:$offset);
   string AsmString = !strconcat(opstr, "\t$rt, $offset");
   list<dag> Pattern = [];
@@ -400,7 +405,8 @@ class JR_HB_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR32Opnd> {
   bit isBarrier=1;
 }
 
-class BITSWAP_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+class BITSWAP_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+    : MipsR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rd);
   dag InOperandList = (ins GPROpnd:$rt);
   string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
@@ -410,7 +416,8 @@ class BITSWAP_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
 class BITSWAP_DESC : BITSWAP_DESC_BASE<"bitswap", GPR32Opnd>;
 
 class DIVMOD_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
-                       SDPatternOperator Op=null_frag> {
+                       SDPatternOperator Op=null_frag>
+    : MipsR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rd);
   dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt);
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
@@ -451,7 +458,7 @@ class BNEZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bnezalc", brtarget, GPR32Opnd> {
 }
 
 class MUL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
-                       SDPatternOperator Op=null_frag> {
+                       SDPatternOperator Op=null_frag> : MipsR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rd);
   dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt);
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
@@ -479,7 +486,8 @@ class SEL_D_DESC : COP1_SEL_DESC_BASE<"sel.d", FGR64Opnd> {
 }
 class SEL_S_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd>;
 
-class SELEQNE_Z_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+class SELEQNE_Z_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+    : MipsR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rd);
   dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt);
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
@@ -544,7 +552,7 @@ class CLASS_S_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd>;
 class CLASS_D_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd>;
 
 class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
-                      RegisterOperand GPROpnd> {
+                      RegisterOperand GPROpnd> : MipsR6Arch<instr_asm> {
   dag OutOperandList = (outs);
   dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
   string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
@@ -580,7 +588,7 @@ class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd>;
 class SWC2_R6_DESC : COP2ST_DESC_BASE<"swc2", COP2Opnd>;
 
 class LSA_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
-                       Operand ImmOpnd> {
+                       Operand ImmOpnd> : MipsR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rd);
   dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$imm2);
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $imm2");
@@ -610,7 +618,8 @@ class SC_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
 
 class SC_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd>;
 
-class CLO_CLZ_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+class CLO_CLZ_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+    : MipsR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rd);
   dag InOperandList = (ins GPROpnd:$rs);
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
@@ -642,90 +651,90 @@ class SDBBP_R6_DESC {
 //
 //===----------------------------------------------------------------------===//
 
-def ADDIUPC : ADDIUPC_ENC, ADDIUPC_DESC, ISA_MIPS32R6;
-def ALIGN : ALIGN_ENC, ALIGN_DESC, ISA_MIPS32R6;
-def ALUIPC : ALUIPC_ENC, ALUIPC_DESC, ISA_MIPS32R6;
-def AUI : AUI_ENC, AUI_DESC, ISA_MIPS32R6;
-def AUIPC : AUIPC_ENC, AUIPC_DESC, ISA_MIPS32R6;
+def ADDIUPC : R6MMR6Rel, ADDIUPC_ENC, ADDIUPC_DESC, ISA_MIPS32R6;
+def ALIGN : R6MMR6Rel, ALIGN_ENC, ALIGN_DESC, ISA_MIPS32R6;
+def ALUIPC : R6MMR6Rel, ALUIPC_ENC, ALUIPC_DESC, ISA_MIPS32R6;
+def AUI : R6MMR6Rel, AUI_ENC, AUI_DESC, ISA_MIPS32R6;
+def AUIPC : R6MMR6Rel, AUIPC_ENC, AUIPC_DESC, ISA_MIPS32R6;
 def BAL : BAL_ENC, BAL_DESC, ISA_MIPS32R6;
-def BALC : BALC_ENC, BALC_DESC, ISA_MIPS32R6;
-def BC1EQZ : BC1EQZ_ENC, BC1EQZ_DESC, ISA_MIPS32R6;
-def BC1NEZ : BC1NEZ_ENC, BC1NEZ_DESC, ISA_MIPS32R6;
+def BALC : R6MMR6Rel, BALC_ENC, BALC_DESC, ISA_MIPS32R6;
+def BC1EQZ : BC1EQZ_ENC, BC1EQZ_DESC, ISA_MIPS32R6, HARDFLOAT;
+def BC1NEZ : BC1NEZ_ENC, BC1NEZ_DESC, ISA_MIPS32R6, HARDFLOAT;
 def BC2EQZ : BC2EQZ_ENC, BC2EQZ_DESC, ISA_MIPS32R6;
 def BC2NEZ : BC2NEZ_ENC, BC2NEZ_DESC, ISA_MIPS32R6;
-def BC : BC_ENC, BC_DESC, ISA_MIPS32R6;
+def BC : R6MMR6Rel, BC_ENC, BC_DESC, ISA_MIPS32R6;
 def BEQC : BEQC_ENC, BEQC_DESC, ISA_MIPS32R6;
-def BEQZALC : BEQZALC_ENC, BEQZALC_DESC, ISA_MIPS32R6;
+def BEQZALC : R6MMR6Rel, BEQZALC_ENC, BEQZALC_DESC, ISA_MIPS32R6;
 def BEQZC : BEQZC_ENC, BEQZC_DESC, ISA_MIPS32R6;
 def BGEC : BGEC_ENC, BGEC_DESC, ISA_MIPS32R6;
 def BGEUC : BGEUC_ENC, BGEUC_DESC, ISA_MIPS32R6;
-def BGEZALC : BGEZALC_ENC, BGEZALC_DESC, ISA_MIPS32R6;
+def BGEZALC : R6MMR6Rel, BGEZALC_ENC, BGEZALC_DESC, ISA_MIPS32R6;
 def BGEZC : BGEZC_ENC, BGEZC_DESC, ISA_MIPS32R6;
-def BGTZALC : BGTZALC_ENC, BGTZALC_DESC, ISA_MIPS32R6;
+def BGTZALC : R6MMR6Rel, BGTZALC_ENC, BGTZALC_DESC, ISA_MIPS32R6;
 def BGTZC : BGTZC_ENC, BGTZC_DESC, ISA_MIPS32R6;
-def BITSWAP : BITSWAP_ENC, BITSWAP_DESC, ISA_MIPS32R6;
-def BLEZALC : BLEZALC_ENC, BLEZALC_DESC, ISA_MIPS32R6;
+def BITSWAP : R6MMR6Rel, BITSWAP_ENC, BITSWAP_DESC, ISA_MIPS32R6;
+def BLEZALC : R6MMR6Rel, BLEZALC_ENC, BLEZALC_DESC, ISA_MIPS32R6;
 def BLEZC : BLEZC_ENC, BLEZC_DESC, ISA_MIPS32R6;
 def BLTC : BLTC_ENC, BLTC_DESC, ISA_MIPS32R6;
 def BLTUC : BLTUC_ENC, BLTUC_DESC, ISA_MIPS32R6;
-def BLTZALC : BLTZALC_ENC, BLTZALC_DESC, ISA_MIPS32R6;
+def BLTZALC : R6MMR6Rel, BLTZALC_ENC, BLTZALC_DESC, ISA_MIPS32R6;
 def BLTZC : BLTZC_ENC, BLTZC_DESC, ISA_MIPS32R6;
 def BNEC : BNEC_ENC, BNEC_DESC, ISA_MIPS32R6;
-def BNEZALC : BNEZALC_ENC, BNEZALC_DESC, ISA_MIPS32R6;
+def BNEZALC : R6MMR6Rel, BNEZALC_ENC, BNEZALC_DESC, ISA_MIPS32R6;
 def BNEZC : BNEZC_ENC, BNEZC_DESC, ISA_MIPS32R6;
 def BNVC : BNVC_ENC, BNVC_DESC, ISA_MIPS32R6;
 def BOVC : BOVC_ENC, BOVC_DESC, ISA_MIPS32R6;
-def CACHE_R6 : CACHE_ENC, CACHE_DESC, ISA_MIPS32R6;
-def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6;
-def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6;
-def CLO_R6 : CLO_R6_ENC, CLO_R6_DESC, ISA_MIPS32R6;
-def CLZ_R6 : CLZ_R6_ENC, CLZ_R6_DESC, ISA_MIPS32R6;
+def CACHE_R6 : R6MMR6Rel, CACHE_ENC, CACHE_DESC, ISA_MIPS32R6;
+def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+def CLO_R6 : R6MMR6Rel, CLO_R6_ENC, CLO_R6_DESC, ISA_MIPS32R6;
+def CLZ_R6 : R6MMR6Rel, CLZ_R6_ENC, CLZ_R6_DESC, ISA_MIPS32R6;
 defm S : CMP_CC_M<FIELD_CMP_FORMAT_S, "s", FGR32Opnd>;
 defm D : CMP_CC_M<FIELD_CMP_FORMAT_D, "d", FGR64Opnd>;
-def DIV : DIV_ENC, DIV_DESC, ISA_MIPS32R6;
-def DIVU : DIVU_ENC, DIVU_DESC, ISA_MIPS32R6;
-def JIALC : JIALC_ENC, JIALC_DESC, ISA_MIPS32R6;
-def JIC : JIC_ENC, JIC_DESC, ISA_MIPS32R6;
+def DIV : R6MMR6Rel, DIV_ENC, DIV_DESC, ISA_MIPS32R6;
+def DIVU : R6MMR6Rel, DIVU_ENC, DIVU_DESC, ISA_MIPS32R6;
+def JIALC : R6MMR6Rel, JIALC_ENC, JIALC_DESC, ISA_MIPS32R6;
+def JIC : R6MMR6Rel, JIC_ENC, JIC_DESC, ISA_MIPS32R6;
 def JR_HB_R6 : JR_HB_R6_ENC, JR_HB_R6_DESC, ISA_MIPS32R6;
 def LDC2_R6 : LDC2_R6_ENC, LDC2_R6_DESC, ISA_MIPS32R6;
 def LL_R6 : LL_R6_ENC, LL_R6_DESC, ISA_MIPS32R6;
-def LSA_R6 : LSA_R6_ENC, LSA_R6_DESC, ISA_MIPS32R6;
+def LSA_R6 : R6MMR6Rel, LSA_R6_ENC, LSA_R6_DESC, ISA_MIPS32R6;
 def LWC2_R6 : LWC2_R6_ENC, LWC2_R6_DESC, ISA_MIPS32R6;
-def LWPC : LWPC_ENC, LWPC_DESC, ISA_MIPS32R6;
+def LWPC : R6MMR6Rel, LWPC_ENC, LWPC_DESC, ISA_MIPS32R6;
 def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6;
-def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6;
-def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6;
-def MAXA_D : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6;
-def MAXA_S : MAXA_S_ENC, MAXA_S_DESC, ISA_MIPS32R6;
-def MAX_D : MAX_D_ENC, MAX_D_DESC, ISA_MIPS32R6;
-def MAX_S : MAX_S_ENC, MAX_S_DESC, ISA_MIPS32R6;
-def MINA_D : MINA_D_ENC, MINA_D_DESC, ISA_MIPS32R6;
-def MINA_S : MINA_S_ENC, MINA_S_DESC, ISA_MIPS32R6;
-def MIN_D : MIN_D_ENC, MIN_D_DESC, ISA_MIPS32R6;
-def MIN_S : MIN_S_ENC, MIN_S_DESC, ISA_MIPS32R6;
-def MOD : MOD_ENC, MOD_DESC, ISA_MIPS32R6;
-def MODU : MODU_ENC, MODU_DESC, ISA_MIPS32R6;
-def MSUBF_S : MSUBF_S_ENC, MSUBF_S_DESC, ISA_MIPS32R6;
-def MSUBF_D : MSUBF_D_ENC, MSUBF_D_DESC, ISA_MIPS32R6;
-def MUH    : MUH_ENC, MUH_DESC, ISA_MIPS32R6;
-def MUHU   : MUHU_ENC, MUHU_DESC, ISA_MIPS32R6;
-def MUL_R6 : MUL_R6_ENC, MUL_R6_DESC, ISA_MIPS32R6;
-def MULU   : MULU_ENC, MULU_DESC, ISA_MIPS32R6;
+def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+def MAXA_D  : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+def MAXA_S  : MAXA_S_ENC, MAXA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+def MAX_D   : MAX_D_ENC, MAX_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+def MAX_S   : MAX_S_ENC, MAX_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+def MINA_D  : MINA_D_ENC, MINA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+def MINA_S  : MINA_S_ENC, MINA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+def MIN_D   : MIN_D_ENC, MIN_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+def MIN_S   : MIN_S_ENC, MIN_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+def MOD : R6MMR6Rel, MOD_ENC, MOD_DESC, ISA_MIPS32R6;
+def MODU : R6MMR6Rel, MODU_ENC, MODU_DESC, ISA_MIPS32R6;
+def MSUBF_S : MSUBF_S_ENC, MSUBF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+def MSUBF_D : MSUBF_D_ENC, MSUBF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+def MUH    : R6MMR6Rel, MUH_ENC, MUH_DESC, ISA_MIPS32R6;
+def MUHU   : R6MMR6Rel, MUHU_ENC, MUHU_DESC, ISA_MIPS32R6;
+def MUL_R6 : R6MMR6Rel, MUL_R6_ENC, MUL_R6_DESC, ISA_MIPS32R6;
+def MULU   : R6MMR6Rel, MULU_ENC, MULU_DESC, ISA_MIPS32R6;
 def NAL; // BAL with rd=0
-def PREF_R6 : PREF_ENC, PREF_DESC, ISA_MIPS32R6;
-def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6;
-def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6;
+def PREF_R6 : R6MMR6Rel, PREF_ENC, PREF_DESC, ISA_MIPS32R6;
+def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6, HARDFLOAT;
 def SC_R6 : SC_R6_ENC, SC_R6_DESC, ISA_MIPS32R6;
 def SDBBP_R6 : SDBBP_R6_ENC, SDBBP_R6_DESC, ISA_MIPS32R6;
 def SDC2_R6 : SDC2_R6_ENC, SDC2_R6_DESC, ISA_MIPS32R6;
-def SELEQZ : SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6, GPR_32;
-def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6;
-def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6;
-def SELNEZ : SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6, GPR_32;
-def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6;
-def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6;
-def SEL_D : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6;
-def SEL_S : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6;
+def SELEQZ : R6MMR6Rel, SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6, GPR_32;
+def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+def SELNEZ : R6MMR6Rel, SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6, GPR_32;
+def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+def SEL_D    : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+def SEL_S    : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6, HARDFLOAT;
 def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6;
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
index d6628d4..272933f 100644
--- a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -16,6 +16,10 @@
 //===----------------------------------------------------------------------===//
 
 // Unsigned Operand
+def uimm5_64      : Operand<i64> {
+  let PrintMethod = "printUnsignedImm";
+}
+
 def uimm16_64      : Operand<i64> {
   let PrintMethod = "printUnsignedImm";
 }
@@ -41,6 +45,38 @@ def immSExt10_64 : PatLeaf<(i64 imm),
 def immZExt16_64 : PatLeaf<(i64 imm),
                            [{ return isInt<16>(N->getZExtValue()); }]>;
 
+def immZExt5_64 : ImmLeaf<i64, [{ return Imm == (Imm & 0x1f); }]>;
+
+// Transformation function: get log2 of low 32 bits of immediate
+def Log2LO : SDNodeXForm<imm, [{
+  return getImm(N, Log2_64((unsigned) N->getZExtValue()));
+}]>;
+
+// Transformation function: get log2 of high 32 bits of immediate
+def Log2HI : SDNodeXForm<imm, [{
+  return getImm(N, Log2_64((unsigned) (N->getZExtValue() >> 32)));
+}]>;
+
+// Predicate: True if immediate is a power of 2 and fits 32 bits
+def PowerOf2LO : PatLeaf<(imm), [{
+  if (N->getValueType(0) == MVT::i64) {
+    uint64_t Imm = N->getZExtValue();
+    return isPowerOf2_64(Imm) && (Imm & 0xffffffff) == Imm;
+  }
+  else
+    return false;
+}]>;
+
+// Predicate: True if immediate is a power of 2 and exceeds 32 bits
+def PowerOf2HI : PatLeaf<(imm), [{
+  if (N->getValueType(0) == MVT::i64) {
+    uint64_t Imm = N->getZExtValue();
+    return isPowerOf2_64(Imm) && (Imm & 0xffffffff00000000) == Imm;
+  }
+  else
+    return false;
+}]>;
+
 //===----------------------------------------------------------------------===//
 // Instructions specific format
 //===----------------------------------------------------------------------===//
@@ -305,12 +341,34 @@ class SetCC64_I<string opstr, PatFrag cond_op>:
   let TwoOperandAliasConstraint = "$rt = $rs";
 }
 
+class CBranchBitNum<string opstr, DAGOperand opnd, PatFrag cond_op,
+                    RegisterOperand RO, bits<64> shift = 1> :
+  InstSE<(outs), (ins RO:$rs, uimm5_64:$p, opnd:$offset),
+         !strconcat(opstr, "\t$rs, $p, $offset"),
+         [(brcond (i32 (cond_op (and RO:$rs, (shl shift, immZExt5_64:$p)), 0)),
+                  bb:$offset)], IIBranch, FrmI, opstr> {
+  let isBranch = 1;
+  let isTerminator = 1;
+  let hasDelaySlot = 1;
+  let Defs = [AT];
+}
+
 // Unsigned Byte Add
 let Pattern = [(set GPR64Opnd:$rd,
                     (and (add GPR64Opnd:$rs, GPR64Opnd:$rt), 255))] in
 def BADDu  : ArithLogicR<"baddu", GPR64Opnd, 1, II_BADDU>,
                               ADD_FM<0x1c, 0x28>;
 
+// Branch on Bit Clear /+32
+def BBIT0  : CBranchBitNum<"bbit0", brtarget, seteq, GPR64Opnd>, BBIT_FM<0x32>;
+def BBIT032: CBranchBitNum<"bbit032", brtarget, seteq, GPR64Opnd, 0x100000000>,
+                           BBIT_FM<0x36>;
+
+// Branch on Bit Set /+32
+def BBIT1  : CBranchBitNum<"bbit1", brtarget, setne, GPR64Opnd>, BBIT_FM<0x3a>;
+def BBIT132: CBranchBitNum<"bbit132", brtarget, setne, GPR64Opnd, 0x100000000>,
+                           BBIT_FM<0x3e>;
+
 // Multiply Doubleword to GPR
 let Defs = [HI0, LO0, P0, P1, P2] in
 def DMUL  : ArithLogicR<"dmul", GPR64Opnd, 1, II_DMUL, mul>,
@@ -361,6 +419,14 @@ def VMULU : ArithLogicR<"vmulu", GPR64Opnd, 0, II_DMUL>,
 
 }
 
+/// Move between CPU and coprocessor registers
+let DecoderNamespace = "Mips64", Predicates = [HasMips64] in {
+def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>;
+def DMTC0 : MFC3OP<"dmtc0", GPR64Opnd>, MFC3OP_FM<0x10, 5>, ISA_MIPS3;
+def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd>, MFC3OP_FM<0x12, 1>, ISA_MIPS3;
+def DMTC2 : MFC3OP<"dmtc2", GPR64Opnd>, MFC3OP_FM<0x12, 5>, ISA_MIPS3;
+}
+
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
 //===----------------------------------------------------------------------===//
@@ -428,6 +494,24 @@ def : MipsPat<(trunc (assertzext GPR64:$src)),
 def : MipsPat<(i32 (trunc GPR64:$src)),
               (SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>;
 
+// Bypass trunc nodes for bitwise ops.
+def : MipsPat<(i32 (trunc (and GPR64:$lhs, GPR64:$rhs))),
+              (EXTRACT_SUBREG (AND64 GPR64:$lhs, GPR64:$rhs), sub_32)>;
+def : MipsPat<(i32 (trunc (or GPR64:$lhs, GPR64:$rhs))),
+              (EXTRACT_SUBREG (OR64 GPR64:$lhs, GPR64:$rhs), sub_32)>;
+def : MipsPat<(i32 (trunc (xor GPR64:$lhs, GPR64:$rhs))),
+              (EXTRACT_SUBREG (XOR64 GPR64:$lhs, GPR64:$rhs), sub_32)>;
+
+// variable shift instructions patterns
+def : MipsPat<(shl GPR64:$rt, (i32 (trunc GPR64:$rs))),
+              (DSLLV GPR64:$rt, (EXTRACT_SUBREG GPR64:$rs, sub_32))>;
+def : MipsPat<(srl GPR64:$rt, (i32 (trunc GPR64:$rs))),
+              (DSRLV GPR64:$rt, (EXTRACT_SUBREG GPR64:$rs, sub_32))>;
+def : MipsPat<(sra GPR64:$rt, (i32 (trunc GPR64:$rs))),
+              (DSRAV GPR64:$rt, (EXTRACT_SUBREG GPR64:$rs, sub_32))>;
+def : MipsPat<(rotr GPR64:$rt, (i32 (trunc GPR64:$rs))),
+              (DROTRV GPR64:$rt, (EXTRACT_SUBREG GPR64:$rs, sub_32))>;
+
 // 32-to-64-bit extension
 def : MipsPat<(i64 (anyext GPR32:$src)), (SLL64_32 GPR32:$src)>;
 def : MipsPat<(i64 (zext GPR32:$src)), (DSRL (DSLL64_32 GPR32:$src), 32)>;
@@ -450,6 +534,18 @@ let AdditionalPredicates = [NotDSP] in {
                 (DADDiu GPR64:$lhs, imm:$imm)>;
 }
 
+// Octeon bbit0/bbit1 MipsPattern
+let Predicates = [HasMips64, HasCnMips] in {
+def : MipsPat<(brcond (i32 (seteq (and i64:$lhs, PowerOf2LO:$mask), 0)), bb:$dst),
+              (BBIT0 i64:$lhs, (Log2LO PowerOf2LO:$mask), bb:$dst)>;
+def : MipsPat<(brcond (i32 (seteq (and i64:$lhs, PowerOf2HI:$mask), 0)), bb:$dst),
+              (BBIT032 i64:$lhs, (Log2HI PowerOf2HI:$mask), bb:$dst)>;
+def : MipsPat<(brcond (i32 (setne (and i64:$lhs, PowerOf2LO:$mask), 0)), bb:$dst),
+              (BBIT1 i64:$lhs, (Log2LO PowerOf2LO:$mask), bb:$dst)>;
+def : MipsPat<(brcond (i32 (setne (and i64:$lhs, PowerOf2HI:$mask), 0)), bb:$dst),
+              (BBIT132 i64:$lhs, (Log2HI PowerOf2HI:$mask), bb:$dst)>;
+}
+
 //===----------------------------------------------------------------------===//
 // Instruction aliases
 //===----------------------------------------------------------------------===//
@@ -471,6 +567,15 @@ def : MipsInstAlias<"dadd $rs, $imm",
 def : MipsInstAlias<"dsll $rd, $rt, $rs",
                     (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
                     ISA_MIPS3;
+def : MipsInstAlias<"dneg $rt, $rs",
+                    (DSUB GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>,
+                    ISA_MIPS3;
+def : MipsInstAlias<"dneg $rt",
+                    (DSUB GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 0>,
+                    ISA_MIPS3;
+def : MipsInstAlias<"dnegu $rt, $rs",
+                    (DSUBu GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>,
+                    ISA_MIPS3;
 def : MipsInstAlias<"dsubu $rt, $rs, $imm",
                     (DADDiu GPR64Opnd:$rt, GPR64Opnd:$rs,
                             InvertedImOperand64:$imm), 0>, ISA_MIPS3;
@@ -501,19 +606,6 @@ def : MipsInstAlias<"dsrl $rd, $rt, $rs",
                     (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
                     ISA_MIPS3;
 
-class LoadImm64< string instr_asm, Operand Od, RegisterOperand RO> :
-  MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64),
-                     !strconcat(instr_asm, "\t$rt, $imm64")> ;
-def LoadImm64Reg : LoadImm64<"dli", imm64, GPR64Opnd>;
-
-/// Move between CPU and coprocessor registers
-let DecoderNamespace = "Mips64", Predicates = [HasMips64] in {
-def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>;
-def DMTC0 : MFC3OP<"dmtc0", GPR64Opnd>, MFC3OP_FM<0x10, 5>, ISA_MIPS3;
-def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd>, MFC3OP_FM<0x12, 1>, ISA_MIPS3;
-def DMTC2 : MFC3OP<"dmtc2", GPR64Opnd>, MFC3OP_FM<0x12, 5>, ISA_MIPS3;
-}
-
 // Two operand (implicit 0 selector) versions:
 def : MipsInstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
 def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
@@ -526,3 +618,12 @@ def : MipsInstAlias<"syncs",      (SYNC 0x6), 0>;
 def : MipsInstAlias<"syncw",      (SYNC 0x4), 0>;
 def : MipsInstAlias<"syncws",     (SYNC 0x5), 0>;
 }
+
+//===----------------------------------------------------------------------===//
+// Assembler Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+class LoadImmediate64<string instr_asm, Operand Od, RegisterOperand RO> :
+  MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64),
+                     !strconcat(instr_asm, "\t$rt, $imm64")> ;
+def LoadImm64 : LoadImmediate64<"dli", imm64, GPR64Opnd>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 50c0441..a3995b8c 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -19,9 +19,9 @@
 #include "MipsAsmPrinter.h"
 #include "MipsInstrInfo.h"
 #include "MipsMCInstLower.h"
+#include "MipsTargetMachine.h"
 #include "MipsTargetStreamer.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -54,11 +54,11 @@ using namespace llvm;
 #define DEBUG_TYPE "mips-asm-printer"
 
 MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() const {
-  return static_cast<MipsTargetStreamer &>(*OutStreamer.getTargetStreamer());
+  return static_cast<MipsTargetStreamer &>(*OutStreamer->getTargetStreamer());
 }
 
 bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &TM.getSubtarget<MipsSubtarget>();
+  Subtarget = &MF.getSubtarget<MipsSubtarget>();
 
   // Initialize TargetLoweringObjectFile.
   const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
@@ -120,7 +120,7 @@ void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
 
   if (HasLinkReg) {
     unsigned ZeroReg = Subtarget->isGP64bit() ? Mips::ZERO_64 : Mips::ZERO;
-    TmpInst0.addOperand(MCOperand::CreateReg(ZeroReg));
+    TmpInst0.addOperand(MCOperand::createReg(ZeroReg));
   }
 
   lowerOperand(MI->getOperand(0), MCOp);
@@ -143,7 +143,7 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
   // If we just ended a constant pool, mark it as such.
   if (InConstantPool && MI->getOpcode() != Mips::CONSTPOOL_ENTRY) {
-    OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+    OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
     InConstantPool = false;
   }
   if (MI->getOpcode() == Mips::CONSTPOOL_ENTRY) {
@@ -159,11 +159,11 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
     // If this is the first entry of the pool, mark it.
     if (!InConstantPool) {
-      OutStreamer.EmitDataRegion(MCDR_DataRegion);
+      OutStreamer->EmitDataRegion(MCDR_DataRegion);
       InConstantPool = true;
     }
 
-    OutStreamer.EmitLabel(GetCPISymbol(LabelId));
+    OutStreamer->EmitLabel(GetCPISymbol(LabelId));
 
     const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
     if (MCPE.isMachineConstantPoolEntry())
@@ -179,14 +179,14 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
   do {
     // Do any auto-generated pseudo lowerings.
-    if (emitPseudoExpansionLowering(OutStreamer, &*I))
+    if (emitPseudoExpansionLowering(*OutStreamer, &*I))
       continue;
 
     if (I->getOpcode() == Mips::PseudoReturn ||
         I->getOpcode() == Mips::PseudoReturn64 ||
         I->getOpcode() == Mips::PseudoIndirectBranch ||
         I->getOpcode() == Mips::PseudoIndirectBranch64) {
-      emitPseudoIndirectBranch(OutStreamer, &*I);
+      emitPseudoIndirectBranch(*OutStreamer, &*I);
       continue;
     }
 
@@ -203,7 +203,7 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
     MCInst TmpInst0;
     MCInstLowering.Lower(I, TmpInst0);
-    EmitToStreamer(OutStreamer, TmpInst0);
+    EmitToStreamer(*OutStreamer, TmpInst0);
   } while ((++I != E) && I->isInsideBundle()); // Delay slot check
 }
 
@@ -251,6 +251,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
 
   // Set the CPU and FPU Bitmasks
   const MachineFrameInfo *MFI = MF->getFrameInfo();
+  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   // size of stack area to which FP callee-saved regs are saved.
   unsigned CPURegSize = Mips::GPR32RegClass.getSize();
@@ -258,33 +259,22 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
   unsigned AFGR64RegSize = Mips::AFGR64RegClass.getSize();
   bool HasAFGR64Reg = false;
   unsigned CSFPRegsSize = 0;
-  unsigned i, e = CSI.size();
 
-  // Set FPU Bitmask.
-  for (i = 0; i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (Mips::GPR32RegClass.contains(Reg))
-      break;
+  for (const auto &I : CSI) {
+    unsigned Reg = I.getReg();
+    unsigned RegNum = TRI->getEncodingValue(Reg);
 
-    unsigned RegNum =
-        TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue(Reg);
-    if (Mips::AFGR64RegClass.contains(Reg)) {
+    // If it's a floating point register, set the FPU Bitmask.
+    // If it's a general purpose register, set the CPU Bitmask.
+    if (Mips::FGR32RegClass.contains(Reg)) {
+      FPUBitmask |= (1 << RegNum);
+      CSFPRegsSize += FGR32RegSize;
+    } else if (Mips::AFGR64RegClass.contains(Reg)) {
       FPUBitmask |= (3 << RegNum);
       CSFPRegsSize += AFGR64RegSize;
       HasAFGR64Reg = true;
-      continue;
-    }
-
-    FPUBitmask |= (1 << RegNum);
-    CSFPRegsSize += FGR32RegSize;
-  }
-
-  // Set CPU Bitmask.
-  for (; i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    unsigned RegNum =
-        TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue(Reg);
-    CPUBitmask |= (1 << RegNum);
+    } else if (Mips::GPR32RegClass.contains(Reg))
+      CPUBitmask |= (1 << RegNum);
   }
 
   // FP Regs are saved right below where the virtual frame pointer points to.
@@ -308,7 +298,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
 
 /// Frame Directive
 void MipsAsmPrinter::emitFrameDirective() {
-  const TargetRegisterInfo &RI = *TM.getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo &RI = *MF->getSubtarget().getRegisterInfo();
 
   unsigned stackReg  = RI.getFrameRegister(*MF);
   unsigned returnReg = RI.getRARegister();
@@ -319,7 +309,7 @@ void MipsAsmPrinter::emitFrameDirective() {
 
 /// Emit Set directives.
 const char *MipsAsmPrinter::getCurrentABIString() const {
-  switch (Subtarget->getABI().GetEnumValue()) {
+  switch (static_cast<MipsTargetMachine &>(TM).getABI().GetEnumValue()) {
   case MipsABIInfo::ABI::O32:  return "abi32";
   case MipsABIInfo::ABI::N32:  return "abiN32";
   case MipsABIInfo::ABI::N64:  return "abi64";
@@ -347,7 +337,7 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() {
     TS.emitDirectiveSetNoMips16();
 
   TS.emitDirectiveEnt(*CurrentFnSym);
-  OutStreamer.EmitLabel(CurrentFnSym);
+  OutStreamer->EmitLabel(CurrentFnSym);
 }
 
 /// EmitFunctionBodyStart - Targets can override this to emit stuff before
@@ -357,10 +347,7 @@ void MipsAsmPrinter::EmitFunctionBodyStart() {
 
   MCInstLowering.Initialize(&MF->getContext());
 
-  bool IsNakedFunction =
-    MF->getFunction()->
-      getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                   Attribute::Naked);
+  bool IsNakedFunction = MF->getFunction()->hasFnAttribute(Attribute::Naked);
   if (!IsNakedFunction)
     emitFrameDirective();
 
@@ -393,7 +380,13 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
   if (!InConstantPool)
     return;
   InConstantPool = false;
-  OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+  OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+}
+
+void MipsAsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) {
+  MipsTargetStreamer &TS = getTargetStreamer();
+  if (MBB.size() == 0)
+    TS.emitDirectiveInsn();
 }
 
 /// isBlockOnlyReachableByFallthough - Return true if the basic block has
@@ -440,7 +433,7 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
 
 // Print out an operand for an inline asm expression.
 bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
-                                     unsigned AsmVariant,const char *ExtraCode,
+                                     unsigned AsmVariant, const char *ExtraCode,
                                      raw_ostream &O) {
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
@@ -454,12 +447,12 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
     case 'X': // hex const int
       if ((MO.getType()) != MachineOperand::MO_Immediate)
         return true;
-      O << "0x" << StringRef(utohexstr(MO.getImm())).lower();
+      O << "0x" << Twine::utohexstr(MO.getImm());
       return false;
     case 'x': // hex const int (low 16 bits)
       if ((MO.getType()) != MachineOperand::MO_Immediate)
         return true;
-      O << "0x" << StringRef(utohexstr(MO.getImm() & 0xffff)).lower();
+      O << "0x" << Twine::utohexstr(MO.getImm() & 0xffff);
       return false;
     case 'd': // decimal const int
       if ((MO.getType()) != MachineOperand::MO_Immediate)
@@ -542,25 +535,31 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
                                            unsigned OpNum, unsigned AsmVariant,
                                            const char *ExtraCode,
                                            raw_ostream &O) {
-  int Offset = 0;
+  assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands");
+  const MachineOperand &BaseMO = MI->getOperand(OpNum);
+  const MachineOperand &OffsetMO = MI->getOperand(OpNum + 1);
+  assert(BaseMO.isReg() && "Unexpected base pointer for inline asm memory operand.");
+  assert(OffsetMO.isImm() && "Unexpected offset for inline asm memory operand.");
+  int Offset = OffsetMO.getImm();
+
   // Currently we are expecting either no ExtraCode or 'D'
   if (ExtraCode) {
     if (ExtraCode[0] == 'D')
-      Offset = 4;
+      Offset += 4;
     else
       return true; // Unknown modifier.
+    // FIXME: M = high order bits
+    // FIXME: L = low order bits
   }
 
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  assert(MO.isReg() && "unexpected inline asm memory operand");
-  O << Offset << "($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+  O << Offset << "($" << MipsInstPrinter::getRegisterName(BaseMO.getReg()) << ")";
 
   return false;
 }
 
 void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                   raw_ostream &O) {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
   const MachineOperand &MO = MI->getOperand(opNum);
   bool closeP = false;
 
@@ -689,7 +688,21 @@ printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O) {
 }
 
 void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
-  bool IsABICalls = Subtarget->isABICalls();
+
+  // Compute MIPS architecture attributes based on the default subtarget
+  // that we'd have constructed. Module level directives aren't LTO
+  // clean anyhow.
+  // FIXME: For ifunc related functions we could iterate over and look
+  // for a feature string that doesn't match the default one.
+  StringRef TT = TM.getTargetTriple();
+  StringRef CPU =
+      MIPS_MC::selectMipsCPU(TM.getTargetTriple(), TM.getTargetCPU());
+  StringRef FS = TM.getTargetFeatureString();
+  const MipsTargetMachine &MTM = static_cast<const MipsTargetMachine &>(TM);
+  const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM);
+
+  bool IsABICalls = STI.isABICalls();
+  const MipsABIInfo &ABI = MTM.getABI();
   if (IsABICalls) {
     getTargetStreamer().emitDirectiveAbiCalls();
     Reloc::Model RM = TM.getRelocationModel();
@@ -697,54 +710,49 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
     //        Ideally it should test for properties of the ABI and not the ABI
     //        itself.
     //        For the moment, I'm only correcting enough to make MIPS-IV work.
-    if (RM == Reloc::Static && !Subtarget->isABI_N64())
+    if (RM == Reloc::Static && !ABI.IsN64())
       getTargetStreamer().emitDirectiveOptionPic0();
   }
 
   // Tell the assembler which ABI we are using
   std::string SectionName = std::string(".mdebug.") + getCurrentABIString();
-  OutStreamer.SwitchSection(OutContext.getELFSection(
-      SectionName, ELF::SHT_PROGBITS, 0, SectionKind::getDataRel()));
+  OutStreamer->SwitchSection(
+      OutContext.getELFSection(SectionName, ELF::SHT_PROGBITS, 0));
 
   // NaN: At the moment we only support:
   // 1. .nan legacy (default)
   // 2. .nan 2008
-  Subtarget->isNaN2008() ? getTargetStreamer().emitDirectiveNaN2008()
-    : getTargetStreamer().emitDirectiveNaNLegacy();
+  STI.isNaN2008() ? getTargetStreamer().emitDirectiveNaN2008()
+                  : getTargetStreamer().emitDirectiveNaNLegacy();
 
   // TODO: handle O64 ABI
 
-  if (Subtarget->isABI_EABI()) {
-    if (Subtarget->isGP32bit())
-      OutStreamer.SwitchSection(
-          OutContext.getELFSection(".gcc_compiled_long32", ELF::SHT_PROGBITS, 0,
-                                   SectionKind::getDataRel()));
+  if (ABI.IsEABI()) {
+    if (STI.isGP32bit())
+      OutStreamer->SwitchSection(OutContext.getELFSection(".gcc_compiled_long32",
+                                                          ELF::SHT_PROGBITS, 0));
     else
-      OutStreamer.SwitchSection(
-          OutContext.getELFSection(".gcc_compiled_long64", ELF::SHT_PROGBITS, 0,
-                                   SectionKind::getDataRel()));
+      OutStreamer->SwitchSection(OutContext.getELFSection(".gcc_compiled_long64",
+                                                          ELF::SHT_PROGBITS, 0));
   }
 
-  getTargetStreamer().updateABIInfo(*Subtarget);
+  getTargetStreamer().updateABIInfo(STI);
 
   // We should always emit a '.module fp=...' but binutils 2.24 does not accept
   // it. We therefore emit it when it contradicts the ABI defaults (-mfpxx or
   // -mfp64) and omit it otherwise.
-  if (Subtarget->isABI_O32() && (Subtarget->isABI_FPXX() ||
-                                 Subtarget->isFP64bit()))
+  if (ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit()))
     getTargetStreamer().emitDirectiveModuleFP();
 
   // We should always emit a '.module [no]oddspreg' but binutils 2.24 does not
   // accept it. We therefore emit it when it contradicts the default or an
   // option has changed the default (i.e. FPXX) and omit it otherwise.
-  if (Subtarget->isABI_O32() && (!Subtarget->useOddSPReg() ||
-                                 Subtarget->isABI_FPXX()))
-    getTargetStreamer().emitDirectiveModuleOddSPReg(Subtarget->useOddSPReg(),
-                                                    Subtarget->isABI_O32());
+  if (ABI.IsO32() && (!STI.useOddSPReg() || STI.isABI_FPXX()))
+    getTargetStreamer().emitDirectiveModuleOddSPReg(STI.useOddSPReg(),
+                                                    ABI.IsO32());
 }
 
-void MipsAsmPrinter::emitInlineAsmStart(
-    const MCSubtargetInfo &StartInfo) const {
+void MipsAsmPrinter::emitInlineAsmStart() const {
   MipsTargetStreamer &TS = getTargetStreamer();
 
   // GCC's choice of assembler options for inline assembly code ('at', 'macro'
@@ -757,31 +765,33 @@ void MipsAsmPrinter::emitInlineAsmStart(
   TS.emitDirectiveSetAt();
   TS.emitDirectiveSetMacro();
   TS.emitDirectiveSetReorder();
-  OutStreamer.AddBlankLine();
+  OutStreamer->AddBlankLine();
 }
 
 void MipsAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
                                       const MCSubtargetInfo *EndInfo) const {
-  OutStreamer.AddBlankLine();
+  OutStreamer->AddBlankLine();
   getTargetStreamer().emitDirectiveSetPop();
 }
 
-void MipsAsmPrinter::EmitJal(MCSymbol *Symbol) {
+void MipsAsmPrinter::EmitJal(const MCSubtargetInfo &STI, MCSymbol *Symbol) {
   MCInst I;
   I.setOpcode(Mips::JAL);
   I.addOperand(
-      MCOperand::CreateExpr(MCSymbolRefExpr::Create(Symbol, OutContext)));
-  OutStreamer.EmitInstruction(I, getSubtargetInfo());
+      MCOperand::createExpr(MCSymbolRefExpr::Create(Symbol, OutContext)));
+  OutStreamer->EmitInstruction(I, STI);
 }
 
-void MipsAsmPrinter::EmitInstrReg(unsigned Opcode, unsigned Reg) {
+void MipsAsmPrinter::EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode,
+                                  unsigned Reg) {
   MCInst I;
   I.setOpcode(Opcode);
-  I.addOperand(MCOperand::CreateReg(Reg));
-  OutStreamer.EmitInstruction(I, getSubtargetInfo());
+  I.addOperand(MCOperand::createReg(Reg));
+  OutStreamer->EmitInstruction(I, STI);
 }
 
-void MipsAsmPrinter::EmitInstrRegReg(unsigned Opcode, unsigned Reg1,
+void MipsAsmPrinter::EmitInstrRegReg(const MCSubtargetInfo &STI,
+                                     unsigned Opcode, unsigned Reg1,
                                      unsigned Reg2) {
   MCInst I;
   //
@@ -795,22 +805,24 @@ void MipsAsmPrinter::EmitInstrRegReg(unsigned Opcode, unsigned Reg1,
     Reg2 = Temp;
   }
   I.setOpcode(Opcode);
-  I.addOperand(MCOperand::CreateReg(Reg1));
-  I.addOperand(MCOperand::CreateReg(Reg2));
-  OutStreamer.EmitInstruction(I, getSubtargetInfo());
+  I.addOperand(MCOperand::createReg(Reg1));
+  I.addOperand(MCOperand::createReg(Reg2));
+  OutStreamer->EmitInstruction(I, STI);
 }
 
-void MipsAsmPrinter::EmitInstrRegRegReg(unsigned Opcode, unsigned Reg1,
+void MipsAsmPrinter::EmitInstrRegRegReg(const MCSubtargetInfo &STI,
+                                        unsigned Opcode, unsigned Reg1,
                                         unsigned Reg2, unsigned Reg3) {
   MCInst I;
   I.setOpcode(Opcode);
-  I.addOperand(MCOperand::CreateReg(Reg1));
-  I.addOperand(MCOperand::CreateReg(Reg2));
-  I.addOperand(MCOperand::CreateReg(Reg3));
-  OutStreamer.EmitInstruction(I, getSubtargetInfo());
+  I.addOperand(MCOperand::createReg(Reg1));
+  I.addOperand(MCOperand::createReg(Reg2));
+  I.addOperand(MCOperand::createReg(Reg3));
+  OutStreamer->EmitInstruction(I, STI);
 }
 
-void MipsAsmPrinter::EmitMovFPIntPair(unsigned MovOpc, unsigned Reg1,
+void MipsAsmPrinter::EmitMovFPIntPair(const MCSubtargetInfo &STI,
+                                      unsigned MovOpc, unsigned Reg1,
                                       unsigned Reg2, unsigned FPReg1,
                                       unsigned FPReg2, bool LE) {
   if (!LE) {
@@ -818,59 +830,60 @@ void MipsAsmPrinter::EmitMovFPIntPair(unsigned MovOpc, unsigned Reg1,
     Reg1 = Reg2;
     Reg2 = temp;
   }
-  EmitInstrRegReg(MovOpc, Reg1, FPReg1);
-  EmitInstrRegReg(MovOpc, Reg2, FPReg2);
+  EmitInstrRegReg(STI, MovOpc, Reg1, FPReg1);
+  EmitInstrRegReg(STI, MovOpc, Reg2, FPReg2);
 }
 
-void MipsAsmPrinter::EmitSwapFPIntParams(Mips16HardFloatInfo::FPParamVariant PV,
+void MipsAsmPrinter::EmitSwapFPIntParams(const MCSubtargetInfo &STI,
+                                         Mips16HardFloatInfo::FPParamVariant PV,
                                          bool LE, bool ToFP) {
   using namespace Mips16HardFloatInfo;
   unsigned MovOpc = ToFP ? Mips::MTC1 : Mips::MFC1;
   switch (PV) {
   case FSig:
-    EmitInstrRegReg(MovOpc, Mips::A0, Mips::F12);
+    EmitInstrRegReg(STI, MovOpc, Mips::A0, Mips::F12);
     break;
   case FFSig:
-    EmitMovFPIntPair(MovOpc, Mips::A0, Mips::A1, Mips::F12, Mips::F14, LE);
+    EmitMovFPIntPair(STI, MovOpc, Mips::A0, Mips::A1, Mips::F12, Mips::F14, LE);
     break;
   case FDSig:
-    EmitInstrRegReg(MovOpc, Mips::A0, Mips::F12);
-    EmitMovFPIntPair(MovOpc, Mips::A2, Mips::A3, Mips::F14, Mips::F15, LE);
+    EmitInstrRegReg(STI, MovOpc, Mips::A0, Mips::F12);
+    EmitMovFPIntPair(STI, MovOpc, Mips::A2, Mips::A3, Mips::F14, Mips::F15, LE);
     break;
   case DSig:
-    EmitMovFPIntPair(MovOpc, Mips::A0, Mips::A1, Mips::F12, Mips::F13, LE);
+    EmitMovFPIntPair(STI, MovOpc, Mips::A0, Mips::A1, Mips::F12, Mips::F13, LE);
     break;
   case DDSig:
-    EmitMovFPIntPair(MovOpc, Mips::A0, Mips::A1, Mips::F12, Mips::F13, LE);
-    EmitMovFPIntPair(MovOpc, Mips::A2, Mips::A3, Mips::F14, Mips::F15, LE);
+    EmitMovFPIntPair(STI, MovOpc, Mips::A0, Mips::A1, Mips::F12, Mips::F13, LE);
+    EmitMovFPIntPair(STI, MovOpc, Mips::A2, Mips::A3, Mips::F14, Mips::F15, LE);
     break;
   case DFSig:
-    EmitMovFPIntPair(MovOpc, Mips::A0, Mips::A1, Mips::F12, Mips::F13, LE);
-    EmitInstrRegReg(MovOpc, Mips::A2, Mips::F14);
+    EmitMovFPIntPair(STI, MovOpc, Mips::A0, Mips::A1, Mips::F12, Mips::F13, LE);
+    EmitInstrRegReg(STI, MovOpc, Mips::A2, Mips::F14);
     break;
   case NoSig:
     return;
   }
 }
 
-void
-MipsAsmPrinter::EmitSwapFPIntRetval(Mips16HardFloatInfo::FPReturnVariant RV,
-                                    bool LE) {
+void MipsAsmPrinter::EmitSwapFPIntRetval(
+    const MCSubtargetInfo &STI, Mips16HardFloatInfo::FPReturnVariant RV,
+    bool LE) {
   using namespace Mips16HardFloatInfo;
   unsigned MovOpc = Mips::MFC1;
   switch (RV) {
   case FRet:
-    EmitInstrRegReg(MovOpc, Mips::V0, Mips::F0);
+    EmitInstrRegReg(STI, MovOpc, Mips::V0, Mips::F0);
     break;
   case DRet:
-    EmitMovFPIntPair(MovOpc, Mips::V0, Mips::V1, Mips::F0, Mips::F1, LE);
+    EmitMovFPIntPair(STI, MovOpc, Mips::V0, Mips::V1, Mips::F0, Mips::F1, LE);
     break;
   case CFRet:
-    EmitMovFPIntPair(MovOpc, Mips::V0, Mips::V1, Mips::F0, Mips::F1, LE);
+    EmitMovFPIntPair(STI, MovOpc, Mips::V0, Mips::V1, Mips::F0, Mips::F1, LE);
     break;
   case CDRet:
-    EmitMovFPIntPair(MovOpc, Mips::V0, Mips::V1, Mips::F0, Mips::F1, LE);
-    EmitMovFPIntPair(MovOpc, Mips::A0, Mips::A1, Mips::F2, Mips::F3, LE);
+    EmitMovFPIntPair(STI, MovOpc, Mips::V0, Mips::V1, Mips::F0, Mips::F1, LE);
+    EmitMovFPIntPair(STI, MovOpc, Mips::A0, Mips::A1, Mips::F2, Mips::F3, LE);
     break;
   case NoFPRet:
     break;
@@ -879,13 +892,20 @@ MipsAsmPrinter::EmitSwapFPIntRetval(Mips16HardFloatInfo::FPReturnVariant RV,
 
 void MipsAsmPrinter::EmitFPCallStub(
     const char *Symbol, const Mips16HardFloatInfo::FuncSignature *Signature) {
-  MCSymbol *MSymbol = OutContext.GetOrCreateSymbol(StringRef(Symbol));
+  MCSymbol *MSymbol = OutContext.getOrCreateSymbol(StringRef(Symbol));
   using namespace Mips16HardFloatInfo;
-  bool LE = Subtarget->isLittle();
+  bool LE = getDataLayout().isLittleEndian();
+  // Construct a local MCSubtargetInfo here.
+  // This is because the MachineFunction won't exist (but have not yet been
+  // freed) and since we're at the global level we can use the default
+  // constructed subtarget.
+  std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+      TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
+
   //
   // .global xxxx
   //
-  OutStreamer.EmitSymbolAttribute(MSymbol, MCSA_Global);
+  OutStreamer->EmitSymbolAttribute(MSymbol, MCSA_Global);
   const char *RetType;
   //
   // make the comment field identifying the return and parameter
@@ -933,23 +953,23 @@ void MipsAsmPrinter::EmitFPCallStub(
     Parms = "";
     break;
   }
-  OutStreamer.AddComment("\t# Stub function to call " + Twine(RetType) + " " +
-                         Twine(Symbol) + " (" + Twine(Parms) + ")");
+  OutStreamer->AddComment("\t# Stub function to call " + Twine(RetType) + " " +
+                          Twine(Symbol) + " (" + Twine(Parms) + ")");
   //
   // probably not necessary but we save and restore the current section state
   //
-  OutStreamer.PushSection();
+  OutStreamer->PushSection();
   //
   // .section mips16.call.fpxxxx,"ax",@progbits
   //
-  const MCSectionELF *M = OutContext.getELFSection(
+  MCSectionELF *M = OutContext.getELFSection(
       ".mips16.call.fp." + std::string(Symbol), ELF::SHT_PROGBITS,
-      ELF::SHF_ALLOC | ELF::SHF_EXECINSTR, SectionKind::getText());
-  OutStreamer.SwitchSection(M, nullptr);
+      ELF::SHF_ALLOC | ELF::SHF_EXECINSTR);
+  OutStreamer->SwitchSection(M, nullptr);
   //
   // .align 2
   //
-  OutStreamer.EmitValueToAlignment(4);
+  OutStreamer->EmitValueToAlignment(4);
   MipsTargetStreamer &TS = getTargetStreamer();
   //
   // .set nomips16
@@ -963,19 +983,16 @@ void MipsAsmPrinter::EmitFPCallStub(
   //  __call_stub_fp_xxxx:
   //
   std::string x = "__call_stub_fp_" + std::string(Symbol);
-  MCSymbol *Stub = OutContext.GetOrCreateSymbol(StringRef(x));
+  MCSymbol *Stub = OutContext.getOrCreateSymbol(StringRef(x));
   TS.emitDirectiveEnt(*Stub);
   MCSymbol *MType =
-      OutContext.GetOrCreateSymbol("__call_stub_fp_" + Twine(Symbol));
-  OutStreamer.EmitSymbolAttribute(MType, MCSA_ELF_TypeFunction);
-  OutStreamer.EmitLabel(Stub);
-  //
-  // we just handle non pic for now. these function will not be
-  // called otherwise. when the full stub generation is moved here
-  // we need to deal with pic.
-  //
-  if (TM.getRelocationModel() == Reloc::PIC_)
-    llvm_unreachable("should not be here if we are compiling pic");
+      OutContext.getOrCreateSymbol("__call_stub_fp_" + Twine(Symbol));
+  OutStreamer->EmitSymbolAttribute(MType, MCSA_ELF_TypeFunction);
+  OutStreamer->EmitLabel(Stub);
+
+  // Only handle non-pic for now.
+  assert(TM.getRelocationModel() != Reloc::PIC_ &&
+         "should not be here if we are compiling pic");
   TS.emitDirectiveSetReorder();
   //
   // We need to add a MipsMCExpr class to MCTargetDesc to fully implement
@@ -992,31 +1009,31 @@ void MipsAsmPrinter::EmitFPCallStub(
   //
   // Mov $18, $31
 
-  EmitInstrRegRegReg(Mips::ADDu, Mips::S2, Mips::RA, Mips::ZERO);
+  EmitInstrRegRegReg(*STI, Mips::ADDu, Mips::S2, Mips::RA, Mips::ZERO);
 
-  EmitSwapFPIntParams(Signature->ParamSig, LE, true);
+  EmitSwapFPIntParams(*STI, Signature->ParamSig, LE, true);
 
   // Jal xxxx
   //
-  EmitJal(MSymbol);
+  EmitJal(*STI, MSymbol);
 
   // fix return values
-  EmitSwapFPIntRetval(Signature->RetSig, LE);
+  EmitSwapFPIntRetval(*STI, Signature->RetSig, LE);
   //
   // do the return
   // if (Signature->RetSig == NoFPRet)
   //  llvm_unreachable("should not be any stubs here with no return value");
   // else
-  EmitInstrReg(Mips::JR, Mips::S2);
+  EmitInstrReg(*STI, Mips::JR, Mips::S2);
 
-  MCSymbol *Tmp = OutContext.CreateTempSymbol();
-  OutStreamer.EmitLabel(Tmp);
+  MCSymbol *Tmp = OutContext.createTempSymbol();
+  OutStreamer->EmitLabel(Tmp);
   const MCSymbolRefExpr *E = MCSymbolRefExpr::Create(Stub, OutContext);
   const MCSymbolRefExpr *T = MCSymbolRefExpr::Create(Tmp, OutContext);
   const MCExpr *T_min_E = MCBinaryExpr::CreateSub(T, E, OutContext);
-  OutStreamer.EmitELFSize(Stub, T_min_E);
+  OutStreamer->EmitELFSize(Stub, T_min_E);
   TS.emitDirectiveEnd(x);
-  OutStreamer.PopSection();
+  OutStreamer->PopSection();
 }
 
 void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
@@ -1032,7 +1049,7 @@ void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
     EmitFPCallStub(Symbol, Signature);
   }
   // return to the text section
-  OutStreamer.SwitchSection(OutContext.getObjectFileInfo()->getTextSection());
+  OutStreamer->SwitchSection(OutContext.getObjectFileInfo()->getTextSection());
 }
 
 void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
index 723155f..a7f3304 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -60,27 +60,31 @@ private:
   std::map<const char *, const llvm::Mips16HardFloatInfo::FuncSignature *>
   StubsNeeded;
 
-  void emitInlineAsmStart(const MCSubtargetInfo &StartInfo) const override;
+  void emitInlineAsmStart() const override;
 
   void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
                         const MCSubtargetInfo *EndInfo) const override;
 
-  void EmitJal(MCSymbol *Symbol);
+  void EmitJal(const MCSubtargetInfo &STI, MCSymbol *Symbol);
 
-  void EmitInstrReg(unsigned Opcode, unsigned Reg);
+  void EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode, unsigned Reg);
 
-  void EmitInstrRegReg(unsigned Opcode, unsigned Reg1, unsigned Reg2);
+  void EmitInstrRegReg(const MCSubtargetInfo &STI, unsigned Opcode,
+                       unsigned Reg1, unsigned Reg2);
 
-  void EmitInstrRegRegReg(unsigned Opcode, unsigned Reg1, unsigned Reg2,
-                          unsigned Reg3);
+  void EmitInstrRegRegReg(const MCSubtargetInfo &STI, unsigned Opcode,
+                          unsigned Reg1, unsigned Reg2, unsigned Reg3);
 
-  void EmitMovFPIntPair(unsigned MovOpc, unsigned Reg1, unsigned Reg2,
-                        unsigned FPReg1, unsigned FPReg2, bool LE);
+  void EmitMovFPIntPair(const MCSubtargetInfo &STI, unsigned MovOpc,
+                        unsigned Reg1, unsigned Reg2, unsigned FPReg1,
+                        unsigned FPReg2, bool LE);
 
-  void EmitSwapFPIntParams(Mips16HardFloatInfo::FPParamVariant, bool LE,
+  void EmitSwapFPIntParams(const MCSubtargetInfo &STI,
+                           Mips16HardFloatInfo::FPParamVariant, bool LE,
                            bool ToFP);
 
-  void EmitSwapFPIntRetval(Mips16HardFloatInfo::FPReturnVariant, bool LE);
+  void EmitSwapFPIntRetval(const MCSubtargetInfo &STI,
+                           Mips16HardFloatInfo::FPReturnVariant, bool LE);
 
   void EmitFPCallStub(const char *, const Mips16HardFloatInfo::FuncSignature *);
 
@@ -94,14 +98,10 @@ public:
   const MipsFunctionInfo *MipsFI;
   MipsMCInstLower MCInstLowering;
 
-  // We initialize the subtarget here and in runOnMachineFunction
-  // since there are certain target specific flags (ABI) that could
-  // reside on the TargetMachine, but are on the subtarget currently
-  // and we need them for the beginning of file output before we've
-  // seen a single function.
-  explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer), MCP(nullptr), InConstantPool(false),
-        Subtarget(&TM.getSubtarget<MipsSubtarget>()), MCInstLowering(*this) {}
+  explicit MipsAsmPrinter(TargetMachine &TM,
+                          std::unique_ptr<MCStreamer> Streamer)
+      : AsmPrinter(TM, std::move(Streamer)), MCP(nullptr),
+        InConstantPool(false), MCInstLowering(*this) {}
 
   const char *getPassName() const override {
     return "Mips Assembly Printer";
@@ -124,6 +124,7 @@ public:
   void EmitFunctionEntryLabel() override;
   void EmitFunctionBodyStart() override;
   void EmitFunctionBodyEnd() override;
+  void EmitBasicBlockEnd(const MachineBasicBlock &MBB) override;
   bool isBlockOnlyReachableByFallthrough(
                                    const MachineBasicBlock* MBB) const override;
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/contrib/llvm/lib/Target/Mips/MipsCCState.h b/contrib/llvm/lib/Target/Mips/MipsCCState.h
index 9a08ade..081c393 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCCState.h
+++ b/contrib/llvm/lib/Target/Mips/MipsCCState.h
@@ -85,10 +85,10 @@ public:
   // provide a means of accessing ArgListEntry::IsFixed. Delete them from this
   // class. This doesn't stop them being used via the base class though.
   void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
-                           CCAssignFn Fn) LLVM_DELETED_FUNCTION;
+                           CCAssignFn Fn) = delete;
   void AnalyzeCallOperands(const SmallVectorImpl<MVT> &Outs,
                            SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
-                           CCAssignFn Fn) LLVM_DELETED_FUNCTION;
+                           CCAssignFn Fn) = delete;
 
   void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
                               CCAssignFn Fn) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
index dcd88f2..93e1908 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
+++ b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
@@ -64,9 +64,9 @@ def RetCC_F128HardFloat : CallingConv<[
 // Handle F128 specially since we can't identify the original type during the
 // tablegen-erated code.
 def RetCC_F128 : CallingConv<[
-  CCIfSubtarget<"abiUsesSoftFloat()",
+  CCIfSubtarget<"useSoftFloat()",
       CCIfType<[i64], CCDelegateTo<RetCC_F128SoftFloat>>>,
-  CCIfSubtargetNot<"abiUsesSoftFloat()",
+  CCIfSubtargetNot<"useSoftFloat()",
       CCIfType<[i64], CCDelegateTo<RetCC_F128HardFloat>>>
 ]>;
 
@@ -90,6 +90,9 @@ def CC_MipsO32 : CallingConv<[
 // Only the return rules are defined here for O32. The rules for argument
 // passing are defined in MipsISelLowering.cpp.
 def RetCC_MipsO32 : CallingConv<[
+  // Promote i1/i8/i16 return values to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
   // i32 are returned in registers V0, V1, A0, A1
   CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
 
@@ -131,7 +134,7 @@ def CC_MipsN : CallingConv<[
   CCIfType<[i8, i16, i32], CCIfOrigArgWasNotFloat<CCPromoteToType<i64>>>,
 
   // The only i32's we have left are soft-float arguments.
-  CCIfSubtarget<"abiUsesSoftFloat()", CCIfType<[i32], CCDelegateTo<CC_MipsN_SoftFloat>>>,
+  CCIfSubtarget<"useSoftFloat()", CCIfType<[i32], CCDelegateTo<CC_MipsN_SoftFloat>>>,
 
   // Integer arguments are passed in integer registers.
   CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64,
@@ -369,7 +372,7 @@ def CC_Mips_FixedArg : CallingConv<[
   // f128 should only occur for the N64 ABI where long double is 128-bit. On
   // N32, long double is equivalent to double.
   CCIfType<[i64],
-      CCIfSubtargetNot<"abiUsesSoftFloat()",
+      CCIfSubtargetNot<"useSoftFloat()",
           CCIfOrigArgWasF128<CCBitConvertToType<f64>>>>,
 
   CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_Mips_FastCC>>,
diff --git a/contrib/llvm/lib/Target/Mips/MipsCondMov.td b/contrib/llvm/lib/Target/Mips/MipsCondMov.td
index af10cd4..2d96d9b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCondMov.td
+++ b/contrib/llvm/lib/Target/Mips/MipsCondMov.td
@@ -27,7 +27,8 @@ class CMov_I_I_FT<string opstr, RegisterOperand CRC, RegisterOperand DRC,
 class CMov_I_F_FT<string opstr, RegisterOperand CRC, RegisterOperand DRC,
                   InstrItinClass Itin> :
   InstSE<(outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F),
-         !strconcat(opstr, "\t$fd, $fs, $rt"), [], Itin, FrmFR, opstr> {
+         !strconcat(opstr, "\t$fd, $fs, $rt"), [], Itin, FrmFR, opstr>,
+  HARDFLOAT {
   let Constraints = "$F = $fd";
 }
 
@@ -37,7 +38,7 @@ class CMov_F_I_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
   InstSE<(outs RC:$rd), (ins RC:$rs, FCCRegsOpnd:$fcc, RC:$F),
          !strconcat(opstr, "\t$rd, $rs, $fcc"),
          [(set RC:$rd, (OpNode RC:$rs, FCCRegsOpnd:$fcc, RC:$F))],
-         Itin, FrmFR, opstr> {
+         Itin, FrmFR, opstr>, HARDFLOAT {
   let Constraints = "$F = $rd";
 }
 
@@ -47,7 +48,7 @@ class CMov_F_F_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
   InstSE<(outs RC:$fd), (ins RC:$fs, FCCRegsOpnd:$fcc, RC:$F),
          !strconcat(opstr, "\t$fd, $fs, $fcc"),
          [(set RC:$fd, (OpNode RC:$fs, FCCRegsOpnd:$fcc, RC:$F))],
-         Itin, FrmFR, opstr> {
+         Itin, FrmFR, opstr>, HARDFLOAT {
   let Constraints = "$F = $fd";
 }
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index c4e5ac0..96553d2 100644
--- a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -448,14 +448,12 @@ bool MipsConstantIslands::runOnMachineFunction(MachineFunction &mf) {
   // FIXME:
   MF = &mf;
   MCP = mf.getConstantPool();
-  STI = &mf.getTarget().getSubtarget<MipsSubtarget>();
+  STI = &static_cast<const MipsSubtarget &>(mf.getSubtarget());
   DEBUG(dbgs() << "constant island machine function " << "\n");
   if (!STI->inMips16Mode() || !MipsSubtarget::useConstantIslands()) {
     return false;
   }
-  TII = (const Mips16InstrInfo *)MF->getTarget()
-            .getSubtargetImpl()
-            ->getInstrInfo();
+  TII = (const Mips16InstrInfo *)STI->getInstrInfo();
   MFI = MF->getInfo<MipsFunctionInfo>();
   DEBUG(dbgs() << "constant island processing " << "\n");
   //
@@ -562,7 +560,7 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
   // identity mapping of CPI's to CPE's.
   const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
 
-  const DataLayout &TD = *MF->getSubtarget().getDataLayout();
+  const DataLayout &TD = *MF->getTarget().getDataLayout();
   for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
     unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
     assert(Size >= 4 && "Too small constant pool entry");
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index d5027b9..4faeb33 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -69,7 +69,7 @@ namespace {
 
   class RegDefsUses {
   public:
-    RegDefsUses(TargetMachine &TM);
+    RegDefsUses(const TargetRegisterInfo &TRI);
     void init(const MachineInstr &MI);
 
     /// This function sets all caller-saved registers in Defs.
@@ -140,7 +140,7 @@ namespace {
   /// memory instruction can be moved to a delay slot.
   class MemDefsUses : public InspectMemInstr {
   public:
-    MemDefsUses(const MachineFrameInfo *MFI);
+    MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI);
 
   private:
     typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
@@ -158,6 +158,7 @@ namespace {
 
     const MachineFrameInfo *MFI;
     SmallPtrSet<ValueType, 4> Uses, Defs;
+    const DataLayout &DL;
 
     /// Flags indicating whether loads or stores with no underlying objects have
     /// been seen.
@@ -199,6 +200,9 @@ namespace {
     Iter replaceWithCompactBranch(MachineBasicBlock &MBB,
                                   Iter Branch, DebugLoc DL);
 
+    Iter replaceWithCompactJump(MachineBasicBlock &MBB,
+                                Iter Jump, DebugLoc DL);
+
     /// This function checks if it is valid to move Candidate to the delay slot
     /// and returns true if it isn't. It also updates memory and register
     /// dependence information.
@@ -209,8 +213,8 @@ namespace {
     /// moved to the delay slot. Returns true on success.
     template<typename IterTy>
     bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
-                     RegDefsUses &RegDU, InspectMemInstr &IM,
-                     IterTy &Filler, Iter Slot) const;
+                     RegDefsUses &RegDU, InspectMemInstr &IM, Iter Slot,
+                     IterTy &Filler) const;
 
     /// This function searches in the backward direction for an instruction that
     /// can be moved to the delay slot. Returns true on success.
@@ -278,11 +282,7 @@ static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
 
 #ifndef NDEBUG
     const MachineFunction &MF = *MBB.getParent();
-    assert(MF.getTarget()
-               .getSubtargetImpl()
-               ->getRegisterInfo()
-               ->getAllocatableSet(MF)
-               .test(R) &&
+    assert(MF.getSubtarget().getRegisterInfo()->getAllocatableSet(MF).test(R) &&
            "Shouldn't move an instruction with unallocatable registers across "
            "basic block boundaries.");
 #endif
@@ -292,9 +292,8 @@ static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
   }
 }
 
-RegDefsUses::RegDefsUses(TargetMachine &TM)
-    : TRI(*TM.getSubtargetImpl()->getRegisterInfo()),
-      Defs(TRI.getNumRegs(), false), Uses(TRI.getNumRegs(), false) {}
+RegDefsUses::RegDefsUses(const TargetRegisterInfo &TRI)
+    : TRI(TRI), Defs(TRI.getNumRegs(), false), Uses(TRI.getNumRegs(), false) {}
 
 void RegDefsUses::init(const MachineInstr &MI) {
   // Add all register operands which are explicit and non-variadic.
@@ -316,13 +315,22 @@ void RegDefsUses::init(const MachineInstr &MI) {
 void RegDefsUses::setCallerSaved(const MachineInstr &MI) {
   assert(MI.isCall());
 
+  // Add RA/RA_64 to Defs to prevent users of RA/RA_64 from going into
+  // the delay slot. The reason is that RA/RA_64 must not be changed
+  // in the delay slot so that the callee can return to the caller.
+  if (MI.definesRegister(Mips::RA) || MI.definesRegister(Mips::RA_64)) {
+    Defs.set(Mips::RA);
+    Defs.set(Mips::RA_64);
+  }
+
   // If MI is a call, add all caller-saved registers to Defs.
   BitVector CallerSavedRegs(TRI.getNumRegs(), true);
 
   CallerSavedRegs.reset(Mips::ZERO);
   CallerSavedRegs.reset(Mips::ZERO_64);
 
-  for (const MCPhysReg *R = TRI.getCalleeSavedRegs(); *R; ++R)
+  for (const MCPhysReg *R = TRI.getCalleeSavedRegs(MI.getParent()->getParent());
+       *R; ++R)
     for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI)
       CallerSavedRegs.reset(*AI);
 
@@ -429,9 +437,9 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
   return true;
 }
 
-MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_)
-  : InspectMemInstr(false), MFI(MFI_), SeenNoObjLoad(false),
-    SeenNoObjStore(false) {}
+MemDefsUses::MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI_)
+    : InspectMemInstr(false), MFI(MFI_), DL(DL), SeenNoObjLoad(false),
+      SeenNoObjStore(false) {}
 
 bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
   bool HasHazard = false;
@@ -484,7 +492,7 @@ getUnderlyingObjects(const MachineInstr &MI,
   const Value *V = (*MI.memoperands_begin())->getValue();
 
   SmallVector<Value *, 4> Objs;
-  GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+  GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
 
   for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), E = Objs.end();
        I != E; ++I) {
@@ -500,8 +508,8 @@ getUnderlyingObjects(const MachineInstr &MI,
 // Replace Branch with the compact branch instruction.
 Iter Filler::replaceWithCompactBranch(MachineBasicBlock &MBB,
                                       Iter Branch, DebugLoc DL) {
-  const MipsInstrInfo *TII= static_cast<const MipsInstrInfo *>(
-    TM.getSubtargetImpl()->getInstrInfo());
+  const MipsInstrInfo *TII =
+      MBB.getParent()->getSubtarget<MipsSubtarget>().getInstrInfo();
 
   unsigned NewOpcode =
     (((unsigned) Branch->getOpcode()) == Mips::BEQ) ? Mips::BEQZC_MM
@@ -520,6 +528,24 @@ Iter Filler::replaceWithCompactBranch(MachineBasicBlock &MBB,
   return Branch;
 }
 
+// Replace Jumps with the compact jump instruction.
+Iter Filler::replaceWithCompactJump(MachineBasicBlock &MBB,
+                                    Iter Jump, DebugLoc DL) {
+  const MipsInstrInfo *TII =
+      MBB.getParent()->getSubtarget<MipsSubtarget>().getInstrInfo();
+
+  const MCInstrDesc &NewDesc = TII->get(Mips::JRC16_MM);
+  MachineInstrBuilder MIB = BuildMI(MBB, Jump, DL, NewDesc);
+
+  MIB.addReg(Jump->getOperand(0).getReg());
+
+  Iter tmpIter = Jump;
+  Jump = std::prev(Jump);
+  MBB.erase(tmpIter);
+
+  return Jump;
+}
+
 // For given opcode returns opcode of corresponding instruction with short
 // delay slot.
 static int getEquivalentCallShort(int Opcode) {
@@ -543,9 +569,9 @@ static int getEquivalentCallShort(int Opcode) {
 /// We assume there is only one delay slot per delayed instruction.
 bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
-  bool InMicroMipsMode = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode();
-  const MipsInstrInfo *TII =
-      static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
+  const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>();
+  bool InMicroMipsMode = STI.inMicroMipsMode();
+  const MipsInstrInfo *TII = STI.getInstrInfo();
 
   for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
     if (!hasUnoccupiedSlot(&*I))
@@ -587,17 +613,29 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
     // adding NOP replace this instruction with the corresponding compact
     // branch instruction, i.e. BEQZC or BNEZC.
     unsigned Opcode = I->getOpcode();
-    if (InMicroMipsMode &&
-        (Opcode == Mips::BEQ || Opcode == Mips::BNE) &&
-        ((unsigned) I->getOperand(1).getReg()) == Mips::ZERO) {
-
-      I = replaceWithCompactBranch(MBB, I, I->getDebugLoc());
-
-    } else {
-      // Bundle the NOP to the instruction with the delay slot.
-      BuildMI(MBB, std::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
-      MIBundleBuilder(MBB, I, std::next(I, 2));
+    if (InMicroMipsMode) {
+      switch (Opcode) {
+        case Mips::BEQ:
+        case Mips::BNE:
+          if (((unsigned) I->getOperand(1).getReg()) == Mips::ZERO) {
+            I = replaceWithCompactBranch(MBB, I, I->getDebugLoc());
+            continue;
+          }
+          break;
+        case Mips::JR:
+        case Mips::PseudoReturn:
+        case Mips::PseudoIndirectBranch:
+          // For microMIPS the PseudoReturn and PseudoIndirectBranch are allways
+          // expanded to JR_MM, so they can be replaced with JRC16_MM.
+          I = replaceWithCompactJump(MBB, I, I->getDebugLoc());
+          continue;
+        default:
+          break;
+      }
     }
+    // Bundle the NOP to the instruction with the delay slot.
+    BuildMI(MBB, std::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
+    MIBundleBuilder(MBB, I, std::next(I, 2));
   }
 
   return Changed;
@@ -611,45 +649,60 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
 
 template<typename IterTy>
 bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
-                         RegDefsUses &RegDU, InspectMemInstr& IM,
-                         IterTy &Filler, Iter Slot) const {
-  for (IterTy I = Begin; I != End; ++I) {
+                         RegDefsUses &RegDU, InspectMemInstr& IM, Iter Slot,
+                         IterTy &Filler) const {
+  bool IsReverseIter = std::is_convertible<IterTy, ReverseIter>::value;
+
+  for (IterTy I = Begin; I != End;) {
+    IterTy CurrI = I;
+    ++I;
+
     // skip debug value
-    if (I->isDebugValue())
+    if (CurrI->isDebugValue())
       continue;
 
-    if (terminateSearch(*I))
+    if (terminateSearch(*CurrI))
       break;
 
-    assert((!I->isCall() && !I->isReturn() && !I->isBranch()) &&
+    assert((!CurrI->isCall() && !CurrI->isReturn() && !CurrI->isBranch()) &&
            "Cannot put calls, returns or branches in delay slot.");
 
-    if (delayHasHazard(*I, RegDU, IM))
+    if (CurrI->isKill()) {
+      CurrI->eraseFromParent();
+
+      // This special case is needed for reverse iterators, because when we
+      // erase an instruction, the iterators are updated to point to the next
+      // instruction.
+      if (IsReverseIter && I != End)
+        I = CurrI;
+      continue;
+    }
+
+    if (delayHasHazard(*CurrI, RegDU, IM))
       continue;
 
-    if (TM.getSubtarget<MipsSubtarget>().isTargetNaCl()) {
+    const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>();
+    if (STI.isTargetNaCl()) {
       // In NaCl, instructions that must be masked are forbidden in delay slots.
       // We only check for loads, stores and SP changes.  Calls, returns and
       // branches are not checked because non-NaCl targets never put them in
       // delay slots.
       unsigned AddrIdx;
-      if ((isBasePlusOffsetMemoryAccess(I->getOpcode(), &AddrIdx) &&
-           baseRegNeedsLoadStoreMask(I->getOperand(AddrIdx).getReg())) ||
-          I->modifiesRegister(Mips::SP,
-                              TM.getSubtargetImpl()->getRegisterInfo()))
+      if ((isBasePlusOffsetMemoryAccess(CurrI->getOpcode(), &AddrIdx) &&
+           baseRegNeedsLoadStoreMask(CurrI->getOperand(AddrIdx).getReg())) ||
+          CurrI->modifiesRegister(Mips::SP, STI.getRegisterInfo()))
         continue;
     }
 
-    bool InMicroMipsMode = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode();
-    const MipsInstrInfo *TII = static_cast<const MipsInstrInfo *>(
-        TM.getSubtargetImpl()->getInstrInfo());
+    bool InMicroMipsMode = STI.inMicroMipsMode();
+    const MipsInstrInfo *TII = STI.getInstrInfo();
     unsigned Opcode = (*Slot).getOpcode();
-    if (InMicroMipsMode && TII->GetInstSizeInBytes(&(*I)) == 2 &&
+    if (InMicroMipsMode && TII->GetInstSizeInBytes(&(*CurrI)) == 2 &&
         (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
          Opcode == Mips::PseudoReturn))
       continue;
 
-    Filler = I;
+    Filler = CurrI;
     return true;
   }
 
@@ -660,14 +713,14 @@ bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
   if (DisableBackwardSearch)
     return false;
 
-  RegDefsUses RegDU(TM);
-  MemDefsUses MemDU(MBB.getParent()->getFrameInfo());
+  RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
+  MemDefsUses MemDU(*TM.getDataLayout(), MBB.getParent()->getFrameInfo());
   ReverseIter Filler;
 
   RegDU.init(*Slot);
 
-  if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler,
-      Slot))
+  if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Slot,
+                   Filler))
     return false;
 
   MBB.splice(std::next(Slot), &MBB, std::next(Filler).base());
@@ -681,13 +734,13 @@ bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
   if (DisableForwardSearch || !Slot->isCall())
     return false;
 
-  RegDefsUses RegDU(TM);
+  RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
   NoMemInstr NM;
   Iter Filler;
 
   RegDU.setCallerSaved(*Slot);
 
-  if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Filler, Slot))
+  if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Slot, Filler))
     return false;
 
   MBB.splice(std::next(Slot), &MBB, Filler);
@@ -705,7 +758,7 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
   if (!SuccBB)
     return false;
 
-  RegDefsUses RegDU(TM);
+  RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
   bool HasMultipleSuccs = false;
   BB2BrMap BrMap;
   std::unique_ptr<InspectMemInstr> IM;
@@ -727,11 +780,11 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
     IM.reset(new LoadFromStackOrConst());
   } else {
     const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo();
-    IM.reset(new MemDefsUses(MFI));
+    IM.reset(new MemDefsUses(*TM.getDataLayout(), MFI));
   }
 
-  if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Filler,
-      Slot))
+  if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Slot,
+                   Filler))
     return false;
 
   insertDelayFiller(Filler, BrMap);
@@ -758,7 +811,7 @@ MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const {
 std::pair<MipsInstrInfo::BranchType, MachineInstr *>
 Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const {
   const MipsInstrInfo *TII =
-      static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
+      MBB.getParent()->getSubtarget<MipsSubtarget>().getInstrInfo();
   MachineBasicBlock *TrueBB = nullptr, *FalseBB = nullptr;
   SmallVector<MachineInstr*, 2> BranchInstrs;
   SmallVector<MachineOperand, 2> Cond;
@@ -814,7 +867,10 @@ bool Filler::examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
 
 bool Filler::delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU,
                             InspectMemInstr &IM) const {
-  bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill());
+  assert(!Candidate.isKill() &&
+         "KILL instructions should have been eliminated at this point.");
+
+  bool HasHazard = Candidate.isImplicitDef();
 
   HasHazard |= IM.hasHazard(Candidate);
   HasHazard |= RegDU.update(Candidate, 0, Candidate.getNumOperands());
diff --git a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
index 4588f40..4faee10 100644
--- a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -1,19 +1,22 @@
 //===-- MipsastISel.cpp - Mips FastISel implementation
 //---------------------===//
 
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "MipsCCState.h"
+#include "MipsInstrInfo.h"
 #include "MipsISelLowering.h"
 #include "MipsMachineFunction.h"
 #include "MipsRegisterInfo.h"
 #include "MipsSubtarget.h"
 #include "MipsTargetMachine.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 
 using namespace llvm;
 
@@ -43,6 +46,7 @@ class MipsFastISel final : public FastISel {
     void setKind(BaseKind K) { Kind = K; }
     BaseKind getKind() const { return Kind; }
     bool isRegBase() const { return Kind == RegBase; }
+    bool isFIBase() const { return Kind == FrameIndexBase; }
     void setReg(unsigned Reg) {
       assert(isRegBase() && "Invalid base register access!");
       Base.Reg = Reg;
@@ -51,6 +55,15 @@ class MipsFastISel final : public FastISel {
       assert(isRegBase() && "Invalid base register access!");
       return Base.Reg;
     }
+    void setFI(unsigned FI) {
+      assert(isFIBase() && "Invalid base frame index access!");
+      Base.FI = FI;
+    }
+    unsigned getFI() const {
+      assert(isFIBase() && "Invalid base frame index access!");
+      return Base.FI;
+    }
+
     void setOffset(int64_t Offset_) { Offset = Offset_; }
     int64_t getOffset() const { return Offset; }
     void setGlobalValue(const GlobalValue *G) { GV = G; }
@@ -60,9 +73,9 @@ class MipsFastISel final : public FastISel {
   /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
   /// make the right decision when generating code for different targets.
   const TargetMachine &TM;
+  const MipsSubtarget *Subtarget;
   const TargetInstrInfo &TII;
   const TargetLowering &TLI;
-  const MipsSubtarget *Subtarget;
   MipsFunctionInfo *MFI;
 
   // Convenience variables to avoid some queries.
@@ -77,6 +90,7 @@ class MipsFastISel final : public FastISel {
 
 private:
   // Selection routines.
+  bool selectLogicalOp(const Instruction *I);
   bool selectLoad(const Instruction *I);
   bool selectStore(const Instruction *I);
   bool selectBranch(const Instruction *I);
@@ -87,12 +101,15 @@ private:
   bool selectRet(const Instruction *I);
   bool selectTrunc(const Instruction *I);
   bool selectIntExt(const Instruction *I);
+  bool selectShift(const Instruction *I);
 
   // Utility helper routines.
   bool isTypeLegal(Type *Ty, MVT &VT);
+  bool isTypeSupported(Type *Ty, MVT &VT);
   bool isLoadTypeLegal(Type *Ty, MVT &VT);
   bool computeAddress(const Value *Obj, Address &Addr);
   bool computeCallAddress(const Value *V, Address &Addr);
+  void simplifyAddress(Address &Addr);
 
   // Emit helper routines.
   bool emitCmp(unsigned DestReg, const CmpInst *CI);
@@ -116,6 +133,9 @@ private:
 
   unsigned getRegEnsuringSimpleIntegerWidening(const Value *, bool IsUnsigned);
 
+  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
+                         const Value *RHS);
+
   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
   unsigned materializeGV(const GlobalValue *GV, MVT VT);
   unsigned materializeInt(const Constant *C, MVT VT);
@@ -157,17 +177,18 @@ public:
   explicit MipsFastISel(FunctionLoweringInfo &funcInfo,
                         const TargetLibraryInfo *libInfo)
       : FastISel(funcInfo, libInfo), TM(funcInfo.MF->getTarget()),
-        TII(*TM.getSubtargetImpl()->getInstrInfo()),
-        TLI(*TM.getSubtargetImpl()->getTargetLowering()),
-        Subtarget(&TM.getSubtarget<MipsSubtarget>()) {
+        Subtarget(&funcInfo.MF->getSubtarget<MipsSubtarget>()),
+        TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()) {
     MFI = funcInfo.MF->getInfo<MipsFunctionInfo>();
     Context = &funcInfo.Fn->getContext();
-    TargetSupported = ((TM.getRelocationModel() == Reloc::PIC_) &&
-                       ((Subtarget->hasMips32r2() || Subtarget->hasMips32()) &&
-                        (Subtarget->isABI_O32())));
+    TargetSupported =
+        ((TM.getRelocationModel() == Reloc::PIC_) &&
+         ((Subtarget->hasMips32r2() || Subtarget->hasMips32()) &&
+          (static_cast<const MipsTargetMachine &>(TM).getABI().IsO32())));
     UnsupportedFPMode = Subtarget->isFP64bit();
   }
 
+  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
   unsigned fastMaterializeConstant(const Constant *C) override;
   bool fastSelectInstruction(const Instruction *I) override;
 
@@ -185,9 +206,9 @@ static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, MVT LocVT,
   llvm_unreachable("should not be called");
 }
 
-bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, MVT LocVT,
-                     CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
-                     CCState &State) {
+static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, MVT LocVT,
+                            CCValAssign::LocInfo LocInfo,
+                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
   llvm_unreachable("should not be called");
 }
 
@@ -197,6 +218,62 @@ CCAssignFn *MipsFastISel::CCAssignFnForCall(CallingConv::ID CC) const {
   return CC_MipsO32;
 }
 
+unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
+                                     const Value *LHS, const Value *RHS) {
+  // Canonicalize immediates to the RHS first.
+  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
+    std::swap(LHS, RHS);
+
+  unsigned Opc;
+  if (ISDOpc == ISD::AND) {
+    Opc = Mips::AND;
+  } else if (ISDOpc == ISD::OR) {
+    Opc = Mips::OR;
+  } else if (ISDOpc == ISD::XOR) {
+    Opc = Mips::XOR;
+  } else
+    llvm_unreachable("unexpected opcode");
+
+  unsigned LHSReg = getRegForValue(LHS);
+  unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+  if (!ResultReg)
+    return 0;
+
+  unsigned RHSReg;
+  if (!LHSReg)
+    return 0;
+
+  if (const auto *C = dyn_cast<ConstantInt>(RHS))
+    RHSReg = materializeInt(C, MVT::i32);
+  else
+    RHSReg = getRegForValue(RHS);
+
+  if (!RHSReg)
+    return 0;
+
+  emitInst(Opc, ResultReg).addReg(LHSReg).addReg(RHSReg);
+  return ResultReg;
+}
+
+unsigned MipsFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
+  assert(TLI.getValueType(AI->getType(), true) == MVT::i32 &&
+         "Alloca should always return a pointer.");
+
+  DenseMap<const AllocaInst *, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(AI);
+
+  if (SI != FuncInfo.StaticAllocaMap.end()) {
+    unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LEA_ADDiu),
+            ResultReg)
+        .addFrameIndex(SI->second)
+        .addImm(0);
+    return ResultReg;
+  }
+
+  return 0;
+}
+
 unsigned MipsFastISel::materializeInt(const Constant *C, MVT VT) {
   if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
     return 0;
@@ -303,21 +380,91 @@ unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) {
 }
 
 bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
-  // This construct looks a big awkward but it is how other ports handle this
-  // and as this function is more fully completed, these cases which
-  // return false will have additional code in them.
-  //
-  if (isa<Instruction>(Obj))
-    return false;
-  else if (isa<ConstantExpr>(Obj))
-    return false;
+
+  const User *U = nullptr;
+  unsigned Opcode = Instruction::UserOp1;
+  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+    // Don't walk into other basic blocks unless the object is an alloca from
+    // another block, otherwise it may not have a virtual register assigned.
+    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+      Opcode = I->getOpcode();
+      U = I;
+    }
+  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+    Opcode = C->getOpcode();
+    U = C;
+  }
+  switch (Opcode) {
+  default:
+    break;
+  case Instruction::BitCast: {
+    // Look through bitcasts.
+    return computeAddress(U->getOperand(0), Addr);
+  }
+  case Instruction::GetElementPtr: {
+    Address SavedAddr = Addr;
+    uint64_t TmpOffset = Addr.getOffset();
+    // Iterate through the GEP folding the constants into offsets where
+    // we can.
+    gep_type_iterator GTI = gep_type_begin(U);
+    for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
+         ++i, ++GTI) {
+      const Value *Op = *i;
+      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+        const StructLayout *SL = DL.getStructLayout(STy);
+        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+        TmpOffset += SL->getElementOffset(Idx);
+      } else {
+        uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+        for (;;) {
+          if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+            // Constant-offset addressing.
+            TmpOffset += CI->getSExtValue() * S;
+            break;
+          }
+          if (canFoldAddIntoGEP(U, Op)) {
+            // A compatible add with a constant operand. Fold the constant.
+            ConstantInt *CI =
+                cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+            TmpOffset += CI->getSExtValue() * S;
+            // Iterate on the other operand.
+            Op = cast<AddOperator>(Op)->getOperand(0);
+            continue;
+          }
+          // Unsupported
+          goto unsupported_gep;
+        }
+      }
+    }
+    // Try to grab the base operand now.
+    Addr.setOffset(TmpOffset);
+    if (computeAddress(U->getOperand(0), Addr))
+      return true;
+    // We failed, restore everything and try the other options.
+    Addr = SavedAddr;
+  unsupported_gep:
+    break;
+  }
+  case Instruction::Alloca: {
+    const AllocaInst *AI = cast<AllocaInst>(Obj);
+    DenseMap<const AllocaInst *, int>::iterator SI =
+        FuncInfo.StaticAllocaMap.find(AI);
+    if (SI != FuncInfo.StaticAllocaMap.end()) {
+      Addr.setKind(Address::FrameIndexBase);
+      Addr.setFI(SI->second);
+      return true;
+    }
+    break;
+  }
+  }
   Addr.setReg(getRegForValue(Obj));
   return Addr.getReg() != 0;
 }
 
 bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) {
   const GlobalValue *GV = dyn_cast<GlobalValue>(V);
-  if (GV && isa<Function>(GV) && dyn_cast<Function>(GV)->isIntrinsic())
+  if (GV && isa<Function>(GV) && cast<Function>(GV)->isIntrinsic())
     return false;
   if (!GV)
     return false;
@@ -340,6 +487,21 @@ bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) {
   return TLI.isTypeLegal(VT);
 }
 
+bool MipsFastISel::isTypeSupported(Type *Ty, MVT &VT) {
+  if (Ty->isVectorTy())
+    return false;
+
+  if (isTypeLegal(Ty, VT))
+    return true;
+
+  // If this is a type than can be sign or zero-extended to a basic operation
+  // go ahead and accept it now.
+  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+    return true;
+
+  return false;
+}
+
 bool MipsFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
   if (isTypeLegal(Ty, VT))
     return true;
@@ -516,8 +678,26 @@ bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
   default:
     return false;
   }
-  emitInstLoad(Opc, ResultReg, Addr.getReg(), Addr.getOffset());
-  return true;
+  if (Addr.isRegBase()) {
+    simplifyAddress(Addr);
+    emitInstLoad(Opc, ResultReg, Addr.getReg(), Addr.getOffset());
+    return true;
+  }
+  if (Addr.isFIBase()) {
+    unsigned FI = Addr.getFI();
+    unsigned Align = 4;
+    unsigned Offset = Addr.getOffset();
+    MachineFrameInfo &MFI = *MF->getFrameInfo();
+    MachineMemOperand *MMO = MF->getMachineMemOperand(
+        MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad,
+        MFI.getObjectSize(FI), Align);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+        .addFrameIndex(FI)
+        .addImm(Offset)
+        .addMemOperand(MMO);
+    return true;
+  }
+  return false;
 }
 
 bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr,
@@ -549,7 +729,53 @@ bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr,
   default:
     return false;
   }
-  emitInstStore(Opc, SrcReg, Addr.getReg(), Addr.getOffset());
+  if (Addr.isRegBase()) {
+    simplifyAddress(Addr);
+    emitInstStore(Opc, SrcReg, Addr.getReg(), Addr.getOffset());
+    return true;
+  }
+  if (Addr.isFIBase()) {
+    unsigned FI = Addr.getFI();
+    unsigned Align = 4;
+    unsigned Offset = Addr.getOffset();
+    MachineFrameInfo &MFI = *MF->getFrameInfo();
+    MachineMemOperand *MMO = MF->getMachineMemOperand(
+        MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad,
+        MFI.getObjectSize(FI), Align);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+        .addReg(SrcReg)
+        .addFrameIndex(FI)
+        .addImm(Offset)
+        .addMemOperand(MMO);
+    return true;
+  }
+  return false;
+}
+
+bool MipsFastISel::selectLogicalOp(const Instruction *I) {
+  MVT VT;
+  if (!isTypeSupported(I->getType(), VT))
+    return false;
+
+  unsigned ResultReg;
+  switch (I->getOpcode()) {
+  default:
+    llvm_unreachable("Unexpected instruction.");
+  case Instruction::And:
+    ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
+    break;
+  case Instruction::Or:
+    ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
+    break;
+  case Instruction::Xor:
+    ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
+    break;
+  }
+
+  if (!ResultReg)
+    return false;
+
+  updateValueMap(I, ResultReg);
   return true;
 }
 
@@ -775,7 +1001,9 @@ bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
         }
       }
     }
-    if (((ArgVT == MVT::i32) || (ArgVT == MVT::f32)) && VA.isMemLoc()) {
+    if (((ArgVT == MVT::i32) || (ArgVT == MVT::f32) || (ArgVT == MVT::i16) ||
+         (ArgVT == MVT::i8)) &&
+        VA.isMemLoc()) {
       switch (VA.getLocMemOffset()) {
       case 0:
         VA.convertToReg(Mips::A0);
@@ -889,6 +1117,8 @@ bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
       CopyVT = MVT::i32;
 
     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
+    if (!ResultReg)
+      return false;
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::COPY),
             ResultReg).addReg(RVLocs[0].getLocReg());
@@ -919,7 +1149,7 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
   MVT RetVT;
   if (CLI.RetTy->isVoidTy())
     RetVT = MVT::isVoid;
-  else if (!isTypeLegal(CLI.RetTy, RetVT))
+  else if (!isTypeSupported(CLI.RetTy, RetVT))
     return false;
 
   for (auto Flag : CLI.OutFlags)
@@ -965,32 +1195,96 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
 
   // Add a register mask with the call-preserved registers.
   // Proper defs for return values will be added by setPhysRegsDeadExcept().
-  MIB.addRegMask(TRI.getCallPreservedMask(CC));
+  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
 
   CLI.Call = MIB;
 
-  // Add implicit physical register uses to the call.
-  for (auto Reg : CLI.OutRegs)
-    MIB.addReg(Reg, RegState::Implicit);
-
-  // Add a register mask with the call-preserved registers.  Proper
-  // defs for return values will be added by setPhysRegsDeadExcept().
-  MIB.addRegMask(TRI.getCallPreservedMask(CC));
-
-  CLI.Call = MIB;
   // Finish off the call including any return values.
   return finishCall(CLI, RetVT, NumBytes);
 }
 
 bool MipsFastISel::selectRet(const Instruction *I) {
+  const Function &F = *I->getParent()->getParent();
   const ReturnInst *Ret = cast<ReturnInst>(I);
 
   if (!FuncInfo.CanLowerReturn)
     return false;
+
+  // Build a list of return value registers.
+  SmallVector<unsigned, 4> RetRegs;
+
   if (Ret->getNumOperands() > 0) {
-    return false;
+    CallingConv::ID CC = F.getCallingConv();
+    SmallVector<ISD::OutputArg, 4> Outs;
+    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+    // Analyze operands of the call, assigning locations to each operand.
+    SmallVector<CCValAssign, 16> ValLocs;
+    MipsCCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs,
+                       I->getContext());
+    CCAssignFn *RetCC = RetCC_Mips;
+    CCInfo.AnalyzeReturn(Outs, RetCC);
+
+    // Only handle a single return value for now.
+    if (ValLocs.size() != 1)
+      return false;
+
+    CCValAssign &VA = ValLocs[0];
+    const Value *RV = Ret->getOperand(0);
+
+    // Don't bother handling odd stuff for now.
+    if ((VA.getLocInfo() != CCValAssign::Full) &&
+        (VA.getLocInfo() != CCValAssign::BCvt))
+      return false;
+
+    // Only handle register returns for now.
+    if (!VA.isRegLoc())
+      return false;
+
+    unsigned Reg = getRegForValue(RV);
+    if (Reg == 0)
+      return false;
+
+    unsigned SrcReg = Reg + VA.getValNo();
+    unsigned DestReg = VA.getLocReg();
+    // Avoid a cross-class copy. This is very unlikely.
+    if (!MRI.getRegClass(SrcReg)->contains(DestReg))
+      return false;
+
+    EVT RVEVT = TLI.getValueType(RV->getType());
+    if (!RVEVT.isSimple())
+      return false;
+
+    if (RVEVT.isVector())
+      return false;
+
+    MVT RVVT = RVEVT.getSimpleVT();
+    if (RVVT == MVT::f128)
+      return false;
+
+    MVT DestVT = VA.getValVT();
+    // Special handling for extended integers.
+    if (RVVT != DestVT) {
+      if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
+        return false;
+
+      if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
+        bool IsZExt = Outs[0].Flags.isZExt();
+        SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
+        if (SrcReg == 0)
+          return false;
+      }
+    }
+
+    // Make the copy.
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
+
+    // Add register to return instruction.
+    RetRegs.push_back(VA.getLocReg());
   }
-  emitInst(Mips::RetRA);
+  MachineInstrBuilder MIB = emitInst(Mips::RetRA);
+  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+    MIB.addReg(RetRegs[i], RegState::Implicit);
   return true;
 }
 
@@ -1107,6 +1401,13 @@ bool MipsFastISel::emitIntZExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
 
 bool MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
                               unsigned DestReg, bool IsZExt) {
+  // FastISel does not have plumbing to deal with extensions where the SrcVT or
+  // DestVT are odd things, so test to make sure that they are both types we can
+  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
+  // bail out to SelectionDAG.
+  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && (DestVT != MVT::i32)) ||
+      ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && (SrcVT != MVT::i16)))
+    return false;
   if (IsZExt)
     return emitIntZExt(SrcVT, SrcReg, DestVT, DestReg);
   return emitIntSExt(SrcVT, SrcReg, DestVT, DestReg);
@@ -1115,7 +1416,83 @@ bool MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
 unsigned MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
                                   bool isZExt) {
   unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
-  return emitIntExt(SrcVT, SrcReg, DestVT, DestReg, isZExt);
+  bool Success = emitIntExt(SrcVT, SrcReg, DestVT, DestReg, isZExt);
+  return Success ? DestReg : 0;
+}
+
+bool MipsFastISel::selectShift(const Instruction *I) {
+  MVT RetVT;
+
+  if (!isTypeSupported(I->getType(), RetVT))
+    return false;
+
+  unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+  if (!ResultReg)
+    return false;
+
+  unsigned Opcode = I->getOpcode();
+  const Value *Op0 = I->getOperand(0);
+  unsigned Op0Reg = getRegForValue(Op0);
+  if (!Op0Reg)
+    return false;
+
+  // If AShr or LShr, then we need to make sure the operand0 is sign extended.
+  if (Opcode == Instruction::AShr || Opcode == Instruction::LShr) {
+    unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+    if (!TempReg)
+      return false;
+
+    MVT Op0MVT = TLI.getValueType(Op0->getType(), true).getSimpleVT();
+    bool IsZExt = Opcode == Instruction::LShr;
+    if (!emitIntExt(Op0MVT, Op0Reg, MVT::i32, TempReg, IsZExt))
+      return false;
+
+    Op0Reg = TempReg;
+  }
+
+  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
+    uint64_t ShiftVal = C->getZExtValue();
+
+    switch (Opcode) {
+    default:
+      llvm_unreachable("Unexpected instruction.");
+    case Instruction::Shl:
+      Opcode = Mips::SLL;
+      break;
+    case Instruction::AShr:
+      Opcode = Mips::SRA;
+      break;
+    case Instruction::LShr:
+      Opcode = Mips::SRL;
+      break;
+    }
+
+    emitInst(Opcode, ResultReg).addReg(Op0Reg).addImm(ShiftVal);
+    updateValueMap(I, ResultReg);
+    return true;
+  }
+
+  unsigned Op1Reg = getRegForValue(I->getOperand(1));
+  if (!Op1Reg)
+    return false;
+
+  switch (Opcode) {
+  default:
+    llvm_unreachable("Unexpected instruction.");
+  case Instruction::Shl:
+    Opcode = Mips::SLLV;
+    break;
+  case Instruction::AShr:
+    Opcode = Mips::SRAV;
+    break;
+  case Instruction::LShr:
+    Opcode = Mips::SRLV;
+    break;
+  }
+
+  emitInst(Opcode, ResultReg).addReg(Op0Reg).addReg(Op1Reg);
+  updateValueMap(I, ResultReg);
+  return true;
 }
 
 bool MipsFastISel::fastSelectInstruction(const Instruction *I) {
@@ -1128,6 +1505,14 @@ bool MipsFastISel::fastSelectInstruction(const Instruction *I) {
     return selectLoad(I);
   case Instruction::Store:
     return selectStore(I);
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    return selectShift(I);
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    return selectLogicalOp(I);
   case Instruction::Br:
     return selectBranch(I);
   case Instruction::Ret:
@@ -1167,6 +1552,17 @@ unsigned MipsFastISel::getRegEnsuringSimpleIntegerWidening(const Value *V,
   return VReg;
 }
 
+void MipsFastISel::simplifyAddress(Address &Addr) {
+  if (!isInt<16>(Addr.getOffset())) {
+    unsigned TempReg =
+        materialize32BitInt(Addr.getOffset(), &Mips::GPR32RegClass);
+    unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
+    emitInst(Mips::ADDu, DestReg).addReg(TempReg).addReg(Addr.getReg());
+    Addr.setReg(DestReg);
+    Addr.setOffset(0);
+  }
+}
+
 namespace llvm {
 FastISel *Mips::createFastISel(FunctionLoweringInfo &funcInfo,
                                const TargetLibraryInfo *libInfo) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
index 3014a0d..826fbaf 100644
--- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
@@ -100,7 +100,7 @@ bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
 
 uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
 
   int64_t Offset = 0;
 
@@ -131,3 +131,20 @@ uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
 
   return RoundUpToAlignment(Offset, getStackAlignment());
 }
+
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
+void MipsFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  unsigned SP = STI.getABI().IsN64() ? Mips::SP_64 : Mips::SP;
+
+  if (!hasReservedCallFrame(MF)) {
+    int64_t Amount = I->getOperand(0).getImm();
+    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+      Amount = -Amount;
+
+    STI.getInstrInfo()->adjustStackPtr(SP, Amount, MBB, I);
+  }
+
+  MBB.erase(I);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
index 90a8d2a..0b51830 100644
--- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
@@ -32,6 +32,13 @@ public:
 
   bool hasFP(const MachineFunction &MF) const override;
 
+  bool isFPCloseToIncomingSP() const override { return false; }
+
+  void
+  eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I) const override;
+
 protected:
   uint64_t estimateStackSize(const MachineFunction &MF) const;
 };
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 0bdabf3..2c9868a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -47,7 +47,7 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &TM.getSubtarget<MipsSubtarget>();
+  Subtarget = &static_cast<const MipsSubtarget &>(MF.getSubtarget());
   bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
 
   processFunctionAfterISel(MF);
@@ -95,6 +95,12 @@ bool MipsDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base,
   return false;
 }
 
+bool MipsDAGToDAGISel::selectIntAddrLSL2MM(SDValue Addr, SDValue &Base,
+                                           SDValue &Offset) const {
+  llvm_unreachable("Unimplemented function.");
+  return false;
+}
+
 bool MipsDAGToDAGISel::selectIntAddrMSA(SDValue Addr, SDValue &Base,
                                         SDValue &Offset) const {
   llvm_unreachable("Unimplemented function.");
@@ -107,7 +113,8 @@ bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
   return false;
 }
 
-bool MipsDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
+bool MipsDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm,
+                                    unsigned MinSizeInBits) const {
   llvm_unreachable("Unimplemented function.");
   return false;
 }
@@ -224,18 +231,18 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
 }
 
 bool MipsDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                              std::vector<SDValue> &OutOps) {
-  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
-  OutOps.push_back(Op);
-  return false;
-}
-
-/// createMipsISelDag - This pass converts a legalized DAG into a
-/// MIPS-specific DAG, ready for instruction scheduling.
-FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
-  if (TM.getSubtargetImpl()->inMips16Mode())
-    return llvm::createMips16ISelDag(TM);
-
-  return llvm::createMipsSEISelDag(TM);
+  // All memory constraints can at least accept raw pointers.
+  switch(ConstraintID) {
+  default:
+    llvm_unreachable("Unexpected asm memory constraint");
+  case InlineAsm::Constraint_i:
+  case InlineAsm::Constraint_m:
+  case InlineAsm::Constraint_R:
+  case InlineAsm::Constraint_ZC:
+    OutOps.push_back(Op);
+    return false;
+  }
+  return true;
 }
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
index ff8760d..1426d0f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -73,6 +73,9 @@ private:
   virtual bool selectIntAddrMM(SDValue Addr, SDValue &Base,
                                SDValue &Offset) const;
 
+  virtual bool selectIntAddrLSL2MM(SDValue Addr, SDValue &Base,
+                                   SDValue &Offset) const;
+
   /// Match addr+simm10 and addr
   virtual bool selectIntAddrMSA(SDValue Addr, SDValue &Base,
                                 SDValue &Offset) const;
@@ -81,7 +84,8 @@ private:
                             SDValue &Offset, SDValue &Alias);
 
   /// \brief Select constant vector splats.
-  virtual bool selectVSplat(SDNode *N, APInt &Imm) const;
+  virtual bool selectVSplat(SDNode *N, APInt &Imm,
+                            unsigned MinSizeInBits) const;
   /// \brief Select constant vector splats whose value fits in a uimm1.
   virtual bool selectVSplatUimm1(SDValue N, SDValue &Imm) const;
   /// \brief Select constant vector splats whose value fits in a uimm2.
@@ -116,20 +120,15 @@ private:
 
   // getImm - Return a target constant with the specified value.
   inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
-    return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
+    return CurDAG->getTargetConstant(Imm, SDLoc(Node), Node->getValueType(0));
   }
 
   virtual void processFunctionAfterISel(MachineFunction &MF) = 0;
 
   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                    char ConstraintCode,
+                                    unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
 };
-
-/// createMipsISelDag - This pass converts a legalized DAG into a
-/// MIPS-specific DAG, ready for instruction scheduling.
-FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
-
 }
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 37fc784..6c7f089 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -70,7 +70,7 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
   if (!isShiftedMask_64(I))
     return false;
 
-  Size = CountPopulation_64(I);
+  Size = countPopulation(I);
   Pos = countTrailingZeros(I);
   return true;
 }
@@ -112,7 +112,8 @@ SDValue MipsTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
 }
 
 const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
+  switch ((MipsISD::NodeType)Opcode) {
+  case MipsISD::FIRST_NUMBER:      break;
   case MipsISD::JmpLink:           return "MipsISD::JmpLink";
   case MipsISD::TailCall:          return "MipsISD::TailCall";
   case MipsISD::Hi:                return "MipsISD::Hi";
@@ -142,6 +143,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::BuildPairF64:      return "MipsISD::BuildPairF64";
   case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
   case MipsISD::Wrapper:           return "MipsISD::Wrapper";
+  case MipsISD::DynAlloc:          return "MipsISD::DynAlloc";
   case MipsISD::Sync:              return "MipsISD::Sync";
   case MipsISD::Ext:               return "MipsISD::Ext";
   case MipsISD::Ins:               return "MipsISD::Ins";
@@ -161,6 +163,28 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::EXTR_RS_W:         return "MipsISD::EXTR_RS_W";
   case MipsISD::SHILO:             return "MipsISD::SHILO";
   case MipsISD::MTHLIP:            return "MipsISD::MTHLIP";
+  case MipsISD::MULSAQ_S_W_PH:     return "MipsISD::MULSAQ_S_W_PH";
+  case MipsISD::MAQ_S_W_PHL:       return "MipsISD::MAQ_S_W_PHL";
+  case MipsISD::MAQ_S_W_PHR:       return "MipsISD::MAQ_S_W_PHR";
+  case MipsISD::MAQ_SA_W_PHL:      return "MipsISD::MAQ_SA_W_PHL";
+  case MipsISD::MAQ_SA_W_PHR:      return "MipsISD::MAQ_SA_W_PHR";
+  case MipsISD::DPAU_H_QBL:        return "MipsISD::DPAU_H_QBL";
+  case MipsISD::DPAU_H_QBR:        return "MipsISD::DPAU_H_QBR";
+  case MipsISD::DPSU_H_QBL:        return "MipsISD::DPSU_H_QBL";
+  case MipsISD::DPSU_H_QBR:        return "MipsISD::DPSU_H_QBR";
+  case MipsISD::DPAQ_S_W_PH:       return "MipsISD::DPAQ_S_W_PH";
+  case MipsISD::DPSQ_S_W_PH:       return "MipsISD::DPSQ_S_W_PH";
+  case MipsISD::DPAQ_SA_L_W:       return "MipsISD::DPAQ_SA_L_W";
+  case MipsISD::DPSQ_SA_L_W:       return "MipsISD::DPSQ_SA_L_W";
+  case MipsISD::DPA_W_PH:          return "MipsISD::DPA_W_PH";
+  case MipsISD::DPS_W_PH:          return "MipsISD::DPS_W_PH";
+  case MipsISD::DPAQX_S_W_PH:      return "MipsISD::DPAQX_S_W_PH";
+  case MipsISD::DPAQX_SA_W_PH:     return "MipsISD::DPAQX_SA_W_PH";
+  case MipsISD::DPAX_W_PH:         return "MipsISD::DPAX_W_PH";
+  case MipsISD::DPSX_W_PH:         return "MipsISD::DPSX_W_PH";
+  case MipsISD::DPSQX_S_W_PH:      return "MipsISD::DPSQX_S_W_PH";
+  case MipsISD::DPSQX_SA_W_PH:     return "MipsISD::DPSQX_SA_W_PH";
+  case MipsISD::MULSA_W_PH:        return "MipsISD::MULSA_W_PH";
   case MipsISD::MULT:              return "MipsISD::MULT";
   case MipsISD::MULTU:             return "MipsISD::MULTU";
   case MipsISD::MADD_DSP:          return "MipsISD::MADD_DSP";
@@ -197,13 +221,13 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::PCKEV:             return "MipsISD::PCKEV";
   case MipsISD::PCKOD:             return "MipsISD::PCKOD";
   case MipsISD::INSVE:             return "MipsISD::INSVE";
-  default:                         return nullptr;
   }
+  return nullptr;
 }
 
 MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
                                        const MipsSubtarget &STI)
-    : TargetLowering(TM), Subtarget(STI) {
+    : TargetLowering(TM), Subtarget(STI), ABI(TM.getABI()) {
   // Mips does not have i1 type, so use i32 for
   // setcc operations results (slt, sgt, ...).
   setBooleanContents(ZeroOrOneBooleanContent);
@@ -221,9 +245,23 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1,  Promote);
   }
 
-  // MIPS doesn't have extending float->double load/store
-  for (MVT VT : MVT::fp_valuetypes())
+  // MIPS doesn't have extending float->double load/store.  Set LoadExtAction
+  // for f32, f16
+  for (MVT VT : MVT::fp_valuetypes()) {
     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
+  }
+
+  // Set LoadExtAction for f16 vectors to Expand
+  for (MVT VT : MVT::fp_vector_valuetypes()) {
+    MVT F16VT = MVT::getVectorVT(MVT::f16, VT.getVectorNumElements());
+    if (F16VT.isValid())
+      setLoadExtAction(ISD::EXTLOAD, VT, F16VT, Expand);
+  }
+
+  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 
   // Used by legalize types to correctly generate the setcc result.
@@ -339,6 +377,12 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
   setOperationAction(ISD::FREM,              MVT::f32,   Expand);
   setOperationAction(ISD::FREM,              MVT::f64,   Expand);
 
+  // Lower f16 conversion operations into library calls
+  setOperationAction(ISD::FP16_TO_FP,        MVT::f32,   Expand);
+  setOperationAction(ISD::FP_TO_FP16,        MVT::f32,   Expand);
+  setOperationAction(ISD::FP16_TO_FP,        MVT::f64,   Expand);
+  setOperationAction(ISD::FP_TO_FP16,        MVT::f64,   Expand);
+
   setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
 
   setOperationAction(ISD::VASTART,           MVT::Other, Custom);
@@ -393,14 +437,12 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
 
   // The arguments on the stack are defined in terms of 4-byte slots on O32
   // and 8-byte slots on N32/N64.
-  setMinStackArgumentAlignment(
-      (Subtarget.isABI_N32() || Subtarget.isABI_N64()) ? 8 : 4);
+  setMinStackArgumentAlignment((ABI.IsN32() || ABI.IsN64()) ? 8 : 4);
 
-  setStackPointerRegisterToSaveRestore(Subtarget.isABI_N64() ? Mips::SP_64
-                                                             : Mips::SP);
+  setStackPointerRegisterToSaveRestore(ABI.IsN64() ? Mips::SP_64 : Mips::SP);
 
-  setExceptionPointerRegister(Subtarget.isABI_N64() ? Mips::A0_64 : Mips::A0);
-  setExceptionSelectorRegister(Subtarget.isABI_N64() ? Mips::A1_64 : Mips::A1);
+  setExceptionPointerRegister(ABI.IsN64() ? Mips::A0_64 : Mips::A0);
+  setExceptionSelectorRegister(ABI.IsN64() ? Mips::A1_64 : Mips::A1);
 
   MaxStoresPerMemcpy = 16;
 
@@ -526,7 +568,7 @@ static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) {
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
 
   return DAG.getNode(MipsISD::FPCmp, DL, MVT::Glue, LHS, RHS,
-                     DAG.getConstant(condCodeToFCC(CC), MVT::i32));
+                     DAG.getConstant(condCodeToFCC(CC), DL, MVT::i32));
 }
 
 // Creates and returns a CMovFPT/F node.
@@ -681,9 +723,11 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
   if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits())
     return SDValue();
 
-  return DAG.getNode(MipsISD::Ext, SDLoc(N), ValTy,
-                     ShiftRight.getOperand(0), DAG.getConstant(Pos, MVT::i32),
-                     DAG.getConstant(SMSize, MVT::i32));
+  SDLoc DL(N);
+  return DAG.getNode(MipsISD::Ext, DL, ValTy,
+                     ShiftRight.getOperand(0),
+                     DAG.getConstant(Pos, DL, MVT::i32),
+                     DAG.getConstant(SMSize, DL, MVT::i32));
 }
 
 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
@@ -735,9 +779,11 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
   if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits()))
     return SDValue();
 
-  return DAG.getNode(MipsISD::Ins, SDLoc(N), ValTy, Shl.getOperand(0),
-                     DAG.getConstant(SMPos0, MVT::i32),
-                     DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0));
+  SDLoc DL(N);
+  return DAG.getNode(MipsISD::Ins, DL, ValTy, Shl.getOperand(0),
+                     DAG.getConstant(SMPos0, DL, MVT::i32),
+                     DAG.getConstant(SMSize0, DL, MVT::i32),
+                     And0.getOperand(0));
 }
 
 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
@@ -969,16 +1015,14 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case Mips::DIVU:
   case Mips::MOD:
   case Mips::MODU:
-    return insertDivByZeroTrap(
-        MI, *BB, *getTargetMachine().getSubtargetImpl()->getInstrInfo(), false);
+    return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), false);
   case Mips::PseudoDSDIV:
   case Mips::PseudoDUDIV:
   case Mips::DDIV:
   case Mips::DDIVU:
   case Mips::DMOD:
   case Mips::DMODU:
-    return insertDivByZeroTrap(
-        MI, *BB, *getTargetMachine().getSubtargetImpl()->getInstrInfo(), true);
+    return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), true);
   case Mips::SEL_D:
     return emitSEL_D(MI, BB);
 
@@ -1014,8 +1058,7 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
   const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
   unsigned LL, SC, AND, NOR, ZERO, BEQ;
 
@@ -1098,8 +1141,7 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
 MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg(
     MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg,
     unsigned SrcReg) const {
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
   if (Subtarget.hasMips32r2() && Size == 1) {
@@ -1135,8 +1177,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
   const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
   unsigned Dest = MI->getOperand(0).getReg();
@@ -1288,8 +1329,7 @@ MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
   const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
   unsigned LL, SC, ZERO, BNE, BEQ;
 
@@ -1371,8 +1411,7 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
   const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
   unsigned Dest    = MI->getOperand(0).getReg();
@@ -1509,10 +1548,8 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
 MachineBasicBlock *MipsTargetLowering::emitSEL_D(MachineInstr *MI,
                                                  MachineBasicBlock *BB) const {
   MachineFunction *MF = BB->getParent();
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
   MachineBasicBlock::iterator II(MI);
@@ -1547,7 +1584,7 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
     DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(*getDataLayout());
 
   Index = DAG.getNode(ISD::MUL, DL, PTy, Index,
-                      DAG.getConstant(EntrySize, PTy));
+                      DAG.getConstant(EntrySize, DL, PTy));
   SDValue Addr = DAG.getNode(ISD::ADD, DL, PTy, Index, Table);
 
   EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
@@ -1556,8 +1593,7 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
                         false, 0);
   Chain = Addr.getValue(1);
 
-  if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) ||
-      Subtarget.isABI_N64()) {
+  if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || ABI.IsN64()) {
     // For PIC, the sequence is:
     // BRIND(load(Jumptable + index) + RelocBase)
     // RelocBase can be JumpTable, GOT or some sort of global base.
@@ -1586,7 +1622,7 @@ SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   Mips::CondCode CC =
     (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
   unsigned Opc = invertFPCondCodeUser(CC) ? Mips::BRANCH_F : Mips::BRANCH_T;
-  SDValue BrCode = DAG.getConstant(Opc, MVT::i32);
+  SDValue BrCode = DAG.getConstant(Opc, DL, MVT::i32);
   SDValue FCC0 = DAG.getRegister(Mips::FCC0, MVT::i32);
   return DAG.getNode(MipsISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode,
                      FCC0, Dest, CondRes);
@@ -1627,10 +1663,11 @@ SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   assert(Cond.getOpcode() == MipsISD::FPCmp &&
          "Floating point operand expected.");
 
-  SDValue True  = DAG.getConstant(1, MVT::i32);
-  SDValue False = DAG.getConstant(0, MVT::i32);
+  SDLoc DL(Op);
+  SDValue True  = DAG.getConstant(1, DL, MVT::i32);
+  SDValue False = DAG.getConstant(0, DL, MVT::i32);
 
-  return createCMovFP(DAG, Cond, True, False, SDLoc(Op));
+  return createCMovFP(DAG, Cond, True, False, DL);
 }
 
 SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
@@ -1639,12 +1676,11 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = N->getGlobal();
 
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
-      !Subtarget.isABI_N64()) {
-    const MipsTargetObjectFile &TLOF =
-      (const MipsTargetObjectFile&)getObjFileLowering();
-
-    if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine()))
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !ABI.IsN64()) {
+    const MipsTargetObjectFile *TLOF =
+        static_cast<const MipsTargetObjectFile *>(
+            getTargetMachine().getObjFileLowering());
+    if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine()))
       // %gp_rel relocation
       return getAddrGPRel(N, SDLoc(N), Ty, DAG);
 
@@ -1653,8 +1689,7 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
   }
 
   if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV)))
-    return getAddrLocal(N, SDLoc(N), Ty, DAG,
-                        Subtarget.isABI_N32() || Subtarget.isABI_N64());
+    return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
 
   if (LargeGOT)
     return getAddrGlobalLargeGOT(N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16,
@@ -1662,9 +1697,8 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
                                  MachinePointerInfo::getGOT());
 
   return getAddrGlobal(N, SDLoc(N), Ty, DAG,
-                       (Subtarget.isABI_N32() || Subtarget.isABI_N64())
-                           ? MipsII::MO_GOT_DISP
-                           : MipsII::MO_GOT16,
+                       (ABI.IsN32() || ABI.IsN64()) ? MipsII::MO_GOT_DISP
+                                                    : MipsII::MO_GOT16,
                        DAG.getEntryNode(), MachinePointerInfo::getGOT());
 }
 
@@ -1673,12 +1707,10 @@ SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
   EVT Ty = Op.getValueType();
 
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
-      !Subtarget.isABI_N64())
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !ABI.IsN64())
     return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
 
-  return getAddrLocal(N, SDLoc(N), Ty, DAG,
-                      Subtarget.isABI_N32() || Subtarget.isABI_N64());
+  return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
 }
 
 SDValue MipsTargetLowering::
@@ -1766,12 +1798,10 @@ lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
   EVT Ty = Op.getValueType();
 
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
-      !Subtarget.isABI_N64())
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !ABI.IsN64())
     return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
 
-  return getAddrLocal(N, SDLoc(N), Ty, DAG,
-                      Subtarget.isABI_N32() || Subtarget.isABI_N64());
+  return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
 }
 
 SDValue MipsTargetLowering::
@@ -1780,20 +1810,19 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
   EVT Ty = Op.getValueType();
 
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
-      !Subtarget.isABI_N64()) {
-    const MipsTargetObjectFile &TLOF =
-      (const MipsTargetObjectFile&)getObjFileLowering();
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !ABI.IsN64()) {
+    const MipsTargetObjectFile *TLOF =
+        static_cast<const MipsTargetObjectFile *>(
+            getTargetMachine().getObjFileLowering());
 
-    if (TLOF.IsConstantInSmallSection(N->getConstVal(), getTargetMachine()))
+    if (TLOF->IsConstantInSmallSection(N->getConstVal(), getTargetMachine()))
       // %gp_rel relocation
       return getAddrGPRel(N, SDLoc(N), Ty, DAG);
 
     return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
   }
 
-  return getAddrLocal(N, SDLoc(N), Ty, DAG,
-                      Subtarget.isABI_N32() || Subtarget.isABI_N64());
+  return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
 }
 
 SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -1819,8 +1848,7 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   unsigned Align = Node->getConstantOperandVal(3);
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   SDLoc DL(Node);
-  unsigned ArgSlotSizeInBytes =
-      (Subtarget.isABI_N32() || Subtarget.isABI_N64()) ? 8 : 4;
+  unsigned ArgSlotSizeInBytes = (ABI.IsN32() || ABI.IsN64()) ? 8 : 4;
 
   SDValue VAListLoad = DAG.getLoad(getPointerTy(), DL, Chain, VAListPtr,
                                    MachinePointerInfo(SV), false, false, false,
@@ -1838,19 +1866,19 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
     assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
 
     VAList = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
-                         DAG.getConstant(Align - 1,
-                                         VAList.getValueType()));
+                         DAG.getConstant(Align - 1, DL, VAList.getValueType()));
 
     VAList = DAG.getNode(ISD::AND, DL, VAList.getValueType(), VAList,
-                         DAG.getConstant(-(int64_t)Align,
+                         DAG.getConstant(-(int64_t)Align, DL,
                                          VAList.getValueType()));
   }
 
   // Increment the pointer, VAList, to the next vaarg.
   unsigned ArgSizeInBytes = getDataLayout()->getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext()));
   SDValue Tmp3 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
-                             DAG.getConstant(RoundUpToAlignment(ArgSizeInBytes, ArgSlotSizeInBytes),
-                                             VAList.getValueType()));
+                             DAG.getConstant(RoundUpToAlignment(ArgSizeInBytes,
+                                                            ArgSlotSizeInBytes),
+                                             DL, VAList.getValueType()));
   // Store the incremented VAList to the legalized pointer
   Chain = DAG.getStore(VAListLoad.getValue(1), DL, Tmp3, VAListPtr,
                       MachinePointerInfo(SV), false, false, 0);
@@ -1863,7 +1891,7 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   if (!Subtarget.isLittle() && ArgSizeInBytes < ArgSlotSizeInBytes) {
     unsigned Adjustment = ArgSlotSizeInBytes - ArgSizeInBytes;
     VAList = DAG.getNode(ISD::ADD, DL, VAListPtr.getValueType(), VAList,
-                         DAG.getIntPtrConstant(Adjustment));
+                         DAG.getIntPtrConstant(Adjustment, DL));
   }
   // Load the actual argument out of the pointer VAList
   return DAG.getLoad(VT, DL, Chain, VAList, MachinePointerInfo(), false, false,
@@ -1874,9 +1902,9 @@ static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG,
                                 bool HasExtractInsert) {
   EVT TyX = Op.getOperand(0).getValueType();
   EVT TyY = Op.getOperand(1).getValueType();
-  SDValue Const1 = DAG.getConstant(1, MVT::i32);
-  SDValue Const31 = DAG.getConstant(31, MVT::i32);
   SDLoc DL(Op);
+  SDValue Const1 = DAG.getConstant(1, DL, MVT::i32);
+  SDValue Const31 = DAG.getConstant(31, DL, MVT::i32);
   SDValue Res;
 
   // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it
@@ -1912,7 +1940,8 @@ static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG,
     return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Res);
 
   SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
-                             Op.getOperand(0), DAG.getConstant(0, MVT::i32));
+                             Op.getOperand(0),
+                             DAG.getConstant(0, DL, MVT::i32));
   return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
 }
 
@@ -1921,8 +1950,8 @@ static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG,
   unsigned WidthX = Op.getOperand(0).getValueSizeInBits();
   unsigned WidthY = Op.getOperand(1).getValueSizeInBits();
   EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY);
-  SDValue Const1 = DAG.getConstant(1, MVT::i32);
   SDLoc DL(Op);
+  SDValue Const1 = DAG.getConstant(1, DL, MVT::i32);
 
   // Bitcast to integer nodes.
   SDValue X = DAG.getNode(ISD::BITCAST, DL, TyX, Op.getOperand(0));
@@ -1932,7 +1961,7 @@ static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG,
     // ext  E, Y, width(Y) - 1, 1  ; extract bit width(Y)-1 of Y
     // ins  X, E, width(X) - 1, 1  ; insert extracted bit at bit width(X)-1 of X
     SDValue E = DAG.getNode(MipsISD::Ext, DL, TyY, Y,
-                            DAG.getConstant(WidthY - 1, MVT::i32), Const1);
+                            DAG.getConstant(WidthY - 1, DL, MVT::i32), Const1);
 
     if (WidthX > WidthY)
       E = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, E);
@@ -1940,7 +1969,8 @@ static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG,
       E = DAG.getNode(ISD::TRUNCATE, DL, TyX, E);
 
     SDValue I = DAG.getNode(MipsISD::Ins, DL, TyX, E,
-                            DAG.getConstant(WidthX - 1, MVT::i32), Const1, X);
+                            DAG.getConstant(WidthX - 1, DL, MVT::i32), Const1,
+                            X);
     return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), I);
   }
 
@@ -1952,7 +1982,7 @@ static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG,
   SDValue SllX = DAG.getNode(ISD::SHL, DL, TyX, X, Const1);
   SDValue SrlX = DAG.getNode(ISD::SRL, DL, TyX, SllX, Const1);
   SDValue SrlY = DAG.getNode(ISD::SRL, DL, TyY, Y,
-                             DAG.getConstant(WidthY - 1, MVT::i32));
+                             DAG.getConstant(WidthY - 1, DL, MVT::i32));
 
   if (WidthX > WidthY)
     SrlY = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, SrlY);
@@ -1960,7 +1990,7 @@ static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG,
     SrlY = DAG.getNode(ISD::TRUNCATE, DL, TyX, SrlY);
 
   SDValue SllY = DAG.getNode(ISD::SHL, DL, TyX, SrlY,
-                             DAG.getConstant(WidthX - 1, MVT::i32));
+                             DAG.getConstant(WidthX - 1, DL, MVT::i32));
   SDValue Or = DAG.getNode(ISD::OR, DL, TyX, SrlX, SllY);
   return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Or);
 }
@@ -1983,9 +2013,8 @@ lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   MFI->setFrameAddressIsTaken(true);
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
-  SDValue FrameAddr =
-      DAG.getCopyFromReg(DAG.getEntryNode(), DL,
-                         Subtarget.isABI_N64() ? Mips::FP_64 : Mips::FP, VT);
+  SDValue FrameAddr = DAG.getCopyFromReg(
+      DAG.getEntryNode(), DL, ABI.IsN64() ? Mips::FP_64 : Mips::FP, VT);
   return FrameAddr;
 }
 
@@ -2001,7 +2030,7 @@ SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MVT VT = Op.getSimpleValueType();
-  unsigned RA = Subtarget.isABI_N64() ? Mips::RA_64 : Mips::RA;
+  unsigned RA = ABI.IsN64() ? Mips::RA_64 : Mips::RA;
   MFI->setReturnAddressIsTaken(true);
 
   // Return RA, which contains the return address. Mark it an implicit live-in.
@@ -2023,12 +2052,12 @@ SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
   SDValue Offset    = Op.getOperand(1);
   SDValue Handler   = Op.getOperand(2);
   SDLoc DL(Op);
-  EVT Ty = Subtarget.isABI_N64() ? MVT::i64 : MVT::i32;
+  EVT Ty = ABI.IsN64() ? MVT::i64 : MVT::i32;
 
   // Store stack offset in V1, store jump target in V0. Glue CopyToReg and
   // EH_RETURN nodes, so that instructions are emitted back-to-back.
-  unsigned OffsetReg = Subtarget.isABI_N64() ? Mips::V1_64 : Mips::V1;
-  unsigned AddrReg = Subtarget.isABI_N64() ? Mips::V0_64 : Mips::V0;
+  unsigned OffsetReg = ABI.IsN64() ? Mips::V1_64 : Mips::V1;
+  unsigned AddrReg = ABI.IsN64() ? Mips::V0_64 : Mips::V0;
   Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue());
   Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1));
   return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain,
@@ -2044,7 +2073,7 @@ SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op,
   unsigned SType = 0;
   SDLoc DL(Op);
   return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0),
-                     DAG.getConstant(SType, MVT::i32));
+                     DAG.getConstant(SType, DL, MVT::i32));
 }
 
 SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
@@ -2061,17 +2090,17 @@ SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
   //  lo = 0
   //  hi = (shl lo, shamt[4:0])
   SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt,
-                            DAG.getConstant(-1, MVT::i32));
+                            DAG.getConstant(-1, DL, MVT::i32));
   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
-                                      DAG.getConstant(1, VT));
+                                      DAG.getConstant(1, DL, VT));
   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not);
   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
   SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
   SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
   SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
-                             DAG.getConstant(VT.getSizeInBits(), MVT::i32));
+                             DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32));
   Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond,
-                   DAG.getConstant(0, VT), ShiftLeftLo);
+                   DAG.getConstant(0, DL, VT), ShiftLeftLo);
   Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or);
 
   SDValue Ops[2] = {Lo, Hi};
@@ -2099,21 +2128,21 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
   //   lo = (srl hi, shamt[4:0])
   //   hi = 0
   SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt,
-                            DAG.getConstant(-1, MVT::i32));
+                            DAG.getConstant(-1, DL, MVT::i32));
   SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
-                                     DAG.getConstant(1, VT));
+                                     DAG.getConstant(1, DL, VT));
   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, ShiftLeft1Hi, Not);
   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
   SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
   SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL,
                                      DL, VT, Hi, Shamt);
   SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
-                             DAG.getConstant(VT.getSizeInBits(), MVT::i32));
+                             DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32));
   SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi,
-                            DAG.getConstant(VT.getSizeInBits() - 1, VT));
+                            DAG.getConstant(VT.getSizeInBits() - 1, DL, VT));
   Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or);
   Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond,
-                   IsSRA ? Ext : DAG.getConstant(0, VT), ShiftRightHi);
+                   IsSRA ? Ext : DAG.getConstant(0, DL, VT), ShiftRightHi);
 
   SDValue Ops[2] = {Lo, Hi};
   return DAG.getMergeValues(Ops, DL);
@@ -2129,7 +2158,7 @@ static SDValue createLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
 
   if (Offset)
     Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr,
-                      DAG.getConstant(Offset, BasePtrVT));
+                      DAG.getConstant(Offset, DL, BasePtrVT));
 
   SDValue Ops[] = { Chain, Ptr, Src };
   return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, MemVT,
@@ -2194,7 +2223,7 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   //  (set tmp2, (shl tmp1, 32))
   //  (set dst, (srl tmp2, 32))
   SDLoc DL(LD);
-  SDValue Const32 = DAG.getConstant(32, MVT::i32);
+  SDValue Const32 = DAG.getConstant(32, DL, MVT::i32);
   SDValue SLL = DAG.getNode(ISD::SHL, DL, MVT::i64, LWR, Const32);
   SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i64, SLL, Const32);
   SDValue Ops[] = { SRL, LWR.getValue(1) };
@@ -2210,7 +2239,7 @@ static SDValue createStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
 
   if (Offset)
     Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr,
-                      DAG.getConstant(Offset, BasePtrVT));
+                      DAG.getConstant(Offset, DL, BasePtrVT));
 
   SDValue Ops[] = { Chain, Value, Ptr };
   return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, MemVT,
@@ -2292,8 +2321,9 @@ SDValue MipsTargetLowering::lowerADD(SDValue Op, SelectionDAG &DAG) const {
   EVT ValTy = Op->getValueType(0);
   int FI = MFI->CreateFixedObject(Op.getValueSizeInBits() / 8, 0, false);
   SDValue InArgsAddr = DAG.getFrameIndex(FI, ValTy);
-  return DAG.getNode(ISD::ADD, SDLoc(Op), ValTy, InArgsAddr,
-                     DAG.getConstant(0, ValTy));
+  SDLoc DL(Op);
+  return DAG.getNode(ISD::ADD, DL, ValTy, InArgsAddr,
+                     DAG.getConstant(0, DL, ValTy));
 }
 
 SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
@@ -2325,12 +2355,9 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
 
 static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
                        CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
-                       CCState &State, const MCPhysReg *F64Regs) {
-  const MipsSubtarget &Subtarget =
-      State.getMachineFunction().getTarget()
-          .getSubtarget<const MipsSubtarget>();
-
-  static const unsigned IntRegsSize = 4, FloatRegsSize = 2;
+                       CCState &State, ArrayRef<MCPhysReg> F64Regs) {
+  const MipsSubtarget &Subtarget = static_cast<const MipsSubtarget &>(
+      State.getMachineFunction().getSubtarget());
 
   static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 };
   static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 };
@@ -2368,39 +2395,39 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
   // f32 and f64 are allocated in A0, A1, A2, A3 when either of the following
   // is true: function is vararg, argument is 3rd or higher, there is previous
   // argument which is not f32 or f64.
-  bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1
-      || State.getFirstUnallocated(F32Regs, FloatRegsSize) != ValNo;
+  bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1 ||
+                                State.getFirstUnallocated(F32Regs) != ValNo;
   unsigned OrigAlign = ArgFlags.getOrigAlign();
   bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8);
 
   if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
-    Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    Reg = State.AllocateReg(IntRegs);
     // If this is the first part of an i64 arg,
     // the allocated register must be either A0 or A2.
     if (isI64 && (Reg == Mips::A1 || Reg == Mips::A3))
-      Reg = State.AllocateReg(IntRegs, IntRegsSize);
+      Reg = State.AllocateReg(IntRegs);
     LocVT = MVT::i32;
   } else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) {
     // Allocate int register and shadow next int register. If first
     // available register is Mips::A1 or Mips::A3, shadow it too.
-    Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    Reg = State.AllocateReg(IntRegs);
     if (Reg == Mips::A1 || Reg == Mips::A3)
-      Reg = State.AllocateReg(IntRegs, IntRegsSize);
-    State.AllocateReg(IntRegs, IntRegsSize);
+      Reg = State.AllocateReg(IntRegs);
+    State.AllocateReg(IntRegs);
     LocVT = MVT::i32;
   } else if (ValVT.isFloatingPoint() && !AllocateFloatsInIntReg) {
     // we are guaranteed to find an available float register
     if (ValVT == MVT::f32) {
-      Reg = State.AllocateReg(F32Regs, FloatRegsSize);
+      Reg = State.AllocateReg(F32Regs);
       // Shadow int register
-      State.AllocateReg(IntRegs, IntRegsSize);
+      State.AllocateReg(IntRegs);
     } else {
-      Reg = State.AllocateReg(F64Regs, FloatRegsSize);
+      Reg = State.AllocateReg(F64Regs);
       // Shadow int registers
-      unsigned Reg2 = State.AllocateReg(IntRegs, IntRegsSize);
+      unsigned Reg2 = State.AllocateReg(IntRegs);
       if (Reg2 == Mips::A1 || Reg2 == Mips::A3)
-        State.AllocateReg(IntRegs, IntRegsSize);
-      State.AllocateReg(IntRegs, IntRegsSize);
+        State.AllocateReg(IntRegs);
+      State.AllocateReg(IntRegs);
     }
   } else
     llvm_unreachable("Cannot handle this ValVT.");
@@ -2453,7 +2480,7 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
                                    bool IsTailCall, SelectionDAG &DAG) const {
   if (!IsTailCall) {
     SDValue PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr,
-                                 DAG.getIntPtrConstant(Offset));
+                                 DAG.getIntPtrConstant(Offset, DL));
     return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo(), false,
                         false, 0);
   }
@@ -2482,8 +2509,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
   // used for the function (that is, Mips linker doesn't generate lazy binding
   // stub for a function whose address is taken in the program).
   if (IsPICCall && !InternalLinkage && IsCallReloc) {
-    unsigned GPReg = Subtarget.isABI_N64() ? Mips::GP_64 : Mips::GP;
-    EVT Ty = Subtarget.isABI_N64() ? MVT::i64 : MVT::i32;
+    unsigned GPReg = ABI.IsN64() ? Mips::GP_64 : Mips::GP;
+    EVT Ty = ABI.IsN64() ? MVT::i64 : MVT::i32;
     RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty)));
   }
 
@@ -2506,9 +2533,9 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
                                       RegsToPass[i].second.getValueType()));
 
   // Add a register mask operand representing the call-preserved registers.
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv);
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  const uint32_t *Mask =
+      TRI->getCallPreservedMask(CLI.DAG.getMachineFunction(), CLI.CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
   if (Subtarget.inMips16HardFloat()) {
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(CLI.Callee)) {
@@ -2543,7 +2570,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering();
+  const TargetFrameLowering *TFL = Subtarget.getFrameLowering();
   MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
   bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
 
@@ -2555,7 +2582,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Allocate the reserved argument area. It seems strange to do this from the
   // caller side but removing it breaks the frame size calculation.
-  const MipsABIInfo &ABI = Subtarget.getABI();
   CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
 
   CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(), Callee.getNode());
@@ -2580,14 +2606,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // byval arguments to the stack.
   unsigned StackAlignment = TFL->getStackAlignment();
   NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment);
-  SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
+  SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, DL, true);
 
   if (!IsTailCall)
     Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal, DL);
 
   SDValue StackPtr = DAG.getCopyFromReg(
-      Chain, DL, Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP,
-      getPointerTy());
+      Chain, DL, ABI.IsN64() ? Mips::SP_64 : Mips::SP, getPointerTy());
 
   // With EABI is it possible to have 16 args on registers.
   std::deque< std::pair<unsigned, SDValue> > RegsToPass;
@@ -2633,9 +2658,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg);
         else if (ValVT == MVT::f64 && LocVT == MVT::i32) {
           SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
-                                   Arg, DAG.getConstant(0, MVT::i32));
+                                   Arg, DAG.getConstant(0, DL, MVT::i32));
           SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
-                                   Arg, DAG.getConstant(1, MVT::i32));
+                                   Arg, DAG.getConstant(1, DL, MVT::i32));
           if (!Subtarget.isLittle())
             std::swap(Lo, Hi);
           unsigned LocRegLo = VA.getLocReg();
@@ -2674,7 +2699,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
       Arg = DAG.getNode(
           ISD::SHL, DL, VA.getLocVT(), Arg,
-          DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+          DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT()));
     }
 
     // Arguments that can be passed on register must be kept at
@@ -2701,9 +2726,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   // node so that legalize doesn't hack it.
-  bool IsPICCall =
-      (Subtarget.isABI_N64() || IsPIC); // true if calls are translated to
-                                         // jalr $25
+  bool IsPICCall = (ABI.IsN64() || IsPIC); // true if calls are translated to
+                                           // jalr $25
   bool GlobalOrExternal = false, InternalLinkage = false, IsCallReloc = false;
   SDValue CalleeLo;
   EVT Ty = Callee.getValueType();
@@ -2714,8 +2738,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       InternalLinkage = Val->hasInternalLinkage();
 
       if (InternalLinkage)
-        Callee = getAddrLocal(G, DL, Ty, DAG,
-                              Subtarget.isABI_N32() || Subtarget.isABI_N64());
+        Callee = getAddrLocal(G, DL, Ty, DAG, ABI.IsN32() || ABI.IsN64());
       else if (LargeGOT) {
         Callee = getAddrGlobalLargeGOT(G, DL, Ty, DAG, MipsII::MO_CALL_HI16,
                                        MipsII::MO_CALL_LO16, Chain,
@@ -2734,7 +2757,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     const char *Sym = S->getSymbol();
 
-    if (!Subtarget.isABI_N64() && !IsPIC) // !N64 && static
+    if (!ABI.IsN64() && !IsPIC) // !N64 && static
       Callee =
           DAG.getTargetExternalSymbol(Sym, getPointerTy(), MipsII::MO_NO_FLAG);
     else if (LargeGOT) {
@@ -2765,7 +2788,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Create the CALLSEQ_END node.
   Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
-                             DAG.getIntPtrConstant(0, true), InFlag, DL);
+                             DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
   InFlag = Chain.getValue(1);
 
   // Handle result values, copying them out of physregs into vregs that we
@@ -2804,7 +2827,7 @@ SDValue MipsTargetLowering::LowerCallResult(
           VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA;
       Val = DAG.getNode(
           Shift, DL, VA.getLocVT(), Val,
-          DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+          DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT()));
     }
 
     switch (VA.getLocInfo()) {
@@ -2857,7 +2880,7 @@ static SDValue UnpackFromArgumentSlot(SDValue Val, const CCValAssign &VA,
         VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA;
     Val = DAG.getNode(
         Opcode, DL, VA.getLocVT(), Val,
-        DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+        DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT()));
     break;
   }
   }
@@ -2919,7 +2942,6 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
   SmallVector<CCValAssign, 16> ArgLocs;
   MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
                      *DAG.getContext());
-  const MipsABIInfo &ABI = Subtarget.getABI();
   CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
   Function::const_arg_iterator FuncArg =
     DAG.getMachineFunction().getFunction()->arg_begin();
@@ -2975,7 +2997,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
           (RegVT == MVT::i64 && ValVT == MVT::f64) ||
           (RegVT == MVT::f64 && ValVT == MVT::i64))
         ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue);
-      else if (Subtarget.isABI_O32() && RegVT == MVT::i32 &&
+      else if (ABI.IsO32() && RegVT == MVT::i32 &&
                ValVT == MVT::f64) {
         unsigned Reg2 = addLiveIn(DAG.getMachineFunction(),
                                   getNextIntArgReg(ArgReg), RC);
@@ -2990,12 +3012,12 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
     } else { // VA.isRegLoc()
       MVT LocVT = VA.getLocVT();
 
-      if (Subtarget.isABI_O32()) {
+      if (ABI.IsO32()) {
         // We ought to be able to use LocVT directly but O32 sets it to i32
         // when allocating floating point values to integer registers.
         // This shouldn't influence how we load the value into registers unless
         // we are targetting softfloat.
-        if (VA.getValVT().isFloatingPoint() && !Subtarget.abiUsesSoftFloat())
+        if (VA.getValVT().isFloatingPoint() && !Subtarget.useSoftFloat())
           LocVT = VA.getValVT();
       }
 
@@ -3027,7 +3049,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
       unsigned Reg = MipsFI->getSRetReturnReg();
       if (!Reg) {
         Reg = MF.getRegInfo().createVirtualRegister(
-            getRegClassFor(Subtarget.isABI_N64() ? MVT::i64 : MVT::i32));
+            getRegClassFor(ABI.IsN64() ? MVT::i64 : MVT::i32));
         MipsFI->setSRetReturnReg(Reg);
       }
       SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]);
@@ -3065,7 +3087,7 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
 
 bool
 MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
-  if (Subtarget.hasMips3() && Subtarget.abiUsesSoftFloat()) {
+  if (Subtarget.hasMips3() && Subtarget.useSoftFloat()) {
     if (Type == MVT::i32)
       return true;
   }
@@ -3132,7 +3154,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
       unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
       Val = DAG.getNode(
           ISD::SHL, DL, VA.getLocVT(), Val,
-          DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+          DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT()));
     }
 
     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
@@ -3153,7 +3175,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
     if (!Reg)
       llvm_unreachable("sret virtual register not created in the entry block");
     SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
-    unsigned V0 = Subtarget.isABI_N64() ? Mips::V0_64 : Mips::V0;
+    unsigned V0 = ABI.IsN64() ? Mips::V0_64 : Mips::V0;
 
     Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag);
     Flag = Chain.getValue(1);
@@ -3204,6 +3226,10 @@ getConstraintType(const std::string &Constraint) const
         return C_Memory;
     }
   }
+
+  if (Constraint == "ZC")
+    return C_Memory;
+
   return TargetLowering::getConstraintType(Constraint);
 }
 
@@ -3288,7 +3314,7 @@ parsePhysicalReg(StringRef C, std::string &Prefix,
 std::pair<unsigned, const TargetRegisterClass *> MipsTargetLowering::
 parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
   const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+      Subtarget.getRegisterInfo();
   const TargetRegisterClass *RC;
   std::string Prefix;
   unsigned long long Reg;
@@ -3362,9 +3388,10 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
 /// Given a register class constraint, like 'r', if this corresponds directly
 /// to an LLVM register class, return a register of 0 and the register class
 /// pointer.
-std::pair<unsigned, const TargetRegisterClass*> MipsTargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
-{
+std::pair<unsigned, const TargetRegisterClass *>
+MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                                 const std::string &Constraint,
+                                                 MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'd': // Address register. Same as 'r' unless generating MIPS16 code.
@@ -3420,7 +3447,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
   if (R.second)
     return R;
 
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
 
 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
@@ -3429,6 +3456,7 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                      std::string &Constraint,
                                                      std::vector<SDValue>&Ops,
                                                      SelectionDAG &DAG) const {
+  SDLoc DL(Op);
   SDValue Result;
 
   // Only support length 1 constraints for now.
@@ -3443,7 +3471,7 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       EVT Type = Op.getValueType();
       int64_t Val = C->getSExtValue();
       if (isInt<16>(Val)) {
-        Result = DAG.getTargetConstant(Val, Type);
+        Result = DAG.getTargetConstant(Val, DL, Type);
         break;
       }
     }
@@ -3453,7 +3481,7 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       EVT Type = Op.getValueType();
       int64_t Val = C->getZExtValue();
       if (Val == 0) {
-        Result = DAG.getTargetConstant(0, Type);
+        Result = DAG.getTargetConstant(0, DL, Type);
         break;
       }
     }
@@ -3463,7 +3491,7 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       EVT Type = Op.getValueType();
       uint64_t Val = (uint64_t)C->getZExtValue();
       if (isUInt<16>(Val)) {
-        Result = DAG.getTargetConstant(Val, Type);
+        Result = DAG.getTargetConstant(Val, DL, Type);
         break;
       }
     }
@@ -3473,7 +3501,7 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       EVT Type = Op.getValueType();
       int64_t Val = C->getSExtValue();
       if ((isInt<32>(Val)) && ((Val & 0xffff) == 0)){
-        Result = DAG.getTargetConstant(Val, Type);
+        Result = DAG.getTargetConstant(Val, DL, Type);
         break;
       }
     }
@@ -3483,7 +3511,7 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       EVT Type = Op.getValueType();
       int64_t Val = C->getSExtValue();
       if ((Val >= -65535) && (Val <= -1)) {
-        Result = DAG.getTargetConstant(Val, Type);
+        Result = DAG.getTargetConstant(Val, DL, Type);
         break;
       }
     }
@@ -3493,7 +3521,7 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       EVT Type = Op.getValueType();
       int64_t Val = C->getSExtValue();
       if ((isInt<15>(Val))) {
-        Result = DAG.getTargetConstant(Val, Type);
+        Result = DAG.getTargetConstant(Val, DL, Type);
         break;
       }
     }
@@ -3503,7 +3531,7 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       EVT Type = Op.getValueType();
       int64_t Val = C->getSExtValue();
       if ((Val <= 65535) && (Val >= 1)) {
-        Result = DAG.getTargetConstant(Val, Type);
+        Result = DAG.getTargetConstant(Val, DL, Type);
         break;
       }
     }
@@ -3564,12 +3592,16 @@ bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
 }
 
 unsigned MipsTargetLowering::getJumpTableEncoding() const {
-  if (Subtarget.isABI_N64())
+  if (ABI.IsN64())
     return MachineJumpTableInfo::EK_GPRel64BlockAddress;
 
   return TargetLowering::getJumpTableEncoding();
 }
 
+bool MipsTargetLowering::useSoftFloat() const {
+  return Subtarget.useSoftFloat();
+}
+
 void MipsTargetLowering::copyByValRegs(
     SDValue Chain, SDLoc DL, std::vector<SDValue> &OutChains, SelectionDAG &DAG,
     const ISD::ArgFlagsTy &Flags, SmallVectorImpl<SDValue> &InVals,
@@ -3582,7 +3614,6 @@ void MipsTargetLowering::copyByValRegs(
   unsigned RegAreaSize = NumRegs * GPRSizeInBytes;
   unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize);
   int FrameObjOffset;
-  const MipsABIInfo &ABI = Subtarget.getABI();
   ArrayRef<MCPhysReg> ByValArgRegs = ABI.GetByValArgRegs();
 
   if (RegAreaSize)
@@ -3610,7 +3641,7 @@ void MipsTargetLowering::copyByValRegs(
     unsigned VReg = addLiveIn(MF, ArgReg, RC);
     unsigned Offset = I * GPRSizeInBytes;
     SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN,
-                                   DAG.getConstant(Offset, PtrTy));
+                                   DAG.getConstant(Offset, DL, PtrTy));
     SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy),
                                  StorePtr, MachinePointerInfo(FuncArg, Offset),
                                  false, false, 0);
@@ -3634,14 +3665,14 @@ void MipsTargetLowering::passByValArg(
   unsigned NumRegs = LastReg - FirstReg;
 
   if (NumRegs) {
-    const ArrayRef<MCPhysReg> ArgRegs = Subtarget.getABI().GetByValArgRegs();
+    const ArrayRef<MCPhysReg> ArgRegs = ABI.GetByValArgRegs();
     bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes);
     unsigned I = 0;
 
     // Copy words to registers.
     for (; I < NumRegs - LeftoverBytes; ++I, OffsetInBytes += RegSizeInBytes) {
       SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
-                                    DAG.getConstant(OffsetInBytes, PtrTy));
+                                    DAG.getConstant(OffsetInBytes, DL, PtrTy));
       SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr,
                                     MachinePointerInfo(), false, false, false,
                                     Alignment);
@@ -3667,7 +3698,8 @@ void MipsTargetLowering::passByValArg(
 
         // Load subword.
         SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
-                                      DAG.getConstant(OffsetInBytes, PtrTy));
+                                      DAG.getConstant(OffsetInBytes, DL,
+                                                      PtrTy));
         SDValue LoadVal = DAG.getExtLoad(
             ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(),
             MVT::getIntegerVT(LoadSizeInBytes * 8), false, false, false,
@@ -3683,7 +3715,7 @@ void MipsTargetLowering::passByValArg(
           Shamt = (RegSizeInBytes - (TotalBytesLoaded + LoadSizeInBytes)) * 8;
 
         SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal,
-                                    DAG.getConstant(Shamt, MVT::i32));
+                                    DAG.getConstant(Shamt, DL, MVT::i32));
 
         if (Val.getNode())
           Val = DAG.getNode(ISD::OR, DL, RegTy, Val, Shift);
@@ -3704,11 +3736,13 @@ void MipsTargetLowering::passByValArg(
   // Copy remainder of byval arg to it with memcpy.
   unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes;
   SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
-                            DAG.getConstant(OffsetInBytes, PtrTy));
+                            DAG.getConstant(OffsetInBytes, DL, PtrTy));
   SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr,
-                            DAG.getIntPtrConstant(VA.getLocMemOffset()));
-  Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy),
+                            DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
+  Chain = DAG.getMemcpy(Chain, DL, Dst, Src,
+                        DAG.getConstant(MemCpySize, DL, PtrTy),
                         Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false,
+                        /*isTailCall=*/false,
                         MachinePointerInfo(), MachinePointerInfo());
   MemOpChains.push_back(Chain);
 }
@@ -3717,8 +3751,8 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
                                          SDValue Chain, SDLoc DL,
                                          SelectionDAG &DAG,
                                          CCState &State) const {
-  const ArrayRef<MCPhysReg> ArgRegs = Subtarget.getABI().GetVarArgRegs();
-  unsigned Idx = State.getFirstUnallocated(ArgRegs.data(), ArgRegs.size());
+  const ArrayRef<MCPhysReg> ArgRegs = ABI.GetVarArgRegs();
+  unsigned Idx = State.getFirstUnallocated(ArgRegs);
   unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
   MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
   const TargetRegisterClass *RC = getRegClassFor(RegTy);
@@ -3733,7 +3767,6 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
     VaArgOffset =
         RoundUpToAlignment(State.getNextStackOffset(), RegSizeInBytes);
   else {
-    const MipsABIInfo &ABI = Subtarget.getABI();
     VaArgOffset =
         (int)ABI.GetCalleeAllocdArgSizeInBytes(State.getCallingConv()) -
         (int)(RegSizeInBytes * (ArgRegs.size() - Idx));
@@ -3764,8 +3797,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
 
 void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size,
                                      unsigned Align) const {
-  MachineFunction &MF = State->getMachineFunction();
-  const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering();
+  const TargetFrameLowering *TFL = Subtarget.getFrameLowering();
 
   assert(Size && "Byval argument's size shouldn't be 0.");
 
@@ -3776,10 +3808,10 @@ void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size,
 
   if (State->getCallingConv() != CallingConv::Fast) {
     unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
-    const ArrayRef<MCPhysReg> IntArgRegs = Subtarget.getABI().GetByValArgRegs();
+    const ArrayRef<MCPhysReg> IntArgRegs = ABI.GetByValArgRegs();
     // FIXME: The O32 case actually describes no shadow registers.
     const MCPhysReg *ShadowRegs =
-        Subtarget.isABI_O32() ? IntArgRegs.data() : Mips64DPRegs;
+        ABI.IsO32() ? IntArgRegs.data() : Mips64DPRegs;
 
     // We used to check the size as well but we can't do that anymore since
     // CCState::HandleByVal() rounds up the size after calling this function.
@@ -3787,7 +3819,7 @@ void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size,
            "Byval argument's alignment should be a multiple of"
            "RegSizeInBytes.");
 
-    FirstReg = State->getFirstUnallocated(IntArgRegs.data(), IntArgRegs.size());
+    FirstReg = State->getFirstUnallocated(IntArgRegs);
 
     // If Align > RegSizeInBytes, the first arg register must be even.
     // FIXME: This condition happens to do the right thing but it's not the
@@ -3816,7 +3848,7 @@ MipsTargetLowering::emitPseudoSELECT(MachineInstr *MI, MachineBasicBlock *BB,
          "conditional-move instructions.");
 
   const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+      Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
   // To "insert" a SELECT instruction, we actually have to insert the
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index 4da337a..6ea14b5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_LIB_TARGET_MIPS_MIPSISELLOWERING_H
 #define LLVM_LIB_TARGET_MIPS_MIPSISELLOWERING_H
 
+#include "MCTargetDesc/MipsABIInfo.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
 #include "Mips.h"
 #include "llvm/CodeGen/CallingConvLower.h"
@@ -26,7 +27,7 @@
 
 namespace llvm {
   namespace MipsISD {
-    enum NodeType {
+    enum NodeType : unsigned {
       // Start the numbering from where ISD NodeType finishes.
       FIRST_NUMBER = ISD::BUILTIN_OP_END,
 
@@ -361,6 +362,8 @@ namespace llvm {
 
     // Subtarget Info
     const MipsSubtarget &Subtarget;
+    // Cache the ABI from the TargetMachine, we use it everywhere.
+    const MipsABIInfo &ABI;
 
   private:
     // Create a TargetGlobalAddress node.
@@ -488,9 +491,10 @@ namespace llvm {
     std::pair<unsigned, const TargetRegisterClass *>
     parseRegForInlineAsmConstraint(StringRef C, MVT VT) const;
 
-    std::pair<unsigned, const TargetRegisterClass*>
-              getRegForInlineAsmConstraint(const std::string &Constraint,
-                                           MVT VT) const override;
+    std::pair<unsigned, const TargetRegisterClass *>
+    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                 const std::string &Constraint,
+                                 MVT VT) const override;
 
     /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
     /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
@@ -501,6 +505,15 @@ namespace llvm {
                                       std::vector<SDValue> &Ops,
                                       SelectionDAG &DAG) const override;
 
+    unsigned getInlineAsmMemConstraint(
+        const std::string &ConstraintCode) const override {
+      if (ConstraintCode == "R")
+        return InlineAsm::Constraint_R;
+      else if (ConstraintCode == "ZC")
+        return InlineAsm::Constraint_ZC;
+      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+    }
+
     bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
 
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
@@ -517,6 +530,7 @@ namespace llvm {
     bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
 
     unsigned getJumpTableEncoding() const override;
+    bool useSoftFloat() const override;
 
     /// Emit a sign-extension using sll/sra, seb, or seh appropriately.
     MachineBasicBlock *emitSignExtendToI32InReg(MachineInstr *MI,
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
index ed97cb4..cb91225 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -65,6 +65,8 @@ def IsSingleFloat    : Predicate<"Subtarget->isSingleFloat()">,
                        AssemblerPredicate<"FeatureSingleFloat">;
 def IsNotSingleFloat : Predicate<"!Subtarget->isSingleFloat()">,
                        AssemblerPredicate<"!FeatureSingleFloat">;
+def IsNotSoftFloat   : Predicate<"!Subtarget->useSoftFloat()">,
+                       AssemblerPredicate<"!FeatureSoftFloat">;
 
 //===----------------------------------------------------------------------===//
 // Mips FGR size adjectives.
@@ -73,6 +75,7 @@ def IsNotSingleFloat : Predicate<"!Subtarget->isSingleFloat()">,
 
 class FGR_32 { list<Predicate> FGRPredicates = [NotFP64bit]; }
 class FGR_64 { list<Predicate> FGRPredicates = [IsFP64bit]; }
+class HARDFLOAT { list<Predicate> HardFloatPredicate = [IsNotSoftFloat]; }
 
 //===----------------------------------------------------------------------===//
 
@@ -98,22 +101,19 @@ def fpimm0neg : PatLeaf<(fpimm), [{
 //
 // Only S32 and D32 are supported right now.
 //===----------------------------------------------------------------------===//
-
 class ADDS_FT<string opstr, RegisterOperand RC, InstrItinClass Itin, bit IsComm,
               SDPatternOperator OpNode= null_frag> :
   InstSE<(outs RC:$fd), (ins RC:$fs, RC:$ft),
          !strconcat(opstr, "\t$fd, $fs, $ft"),
-         [(set RC:$fd, (OpNode RC:$fs, RC:$ft))], Itin, FrmFR, opstr> {
+         [(set RC:$fd, (OpNode RC:$fs, RC:$ft))], Itin, FrmFR, opstr>,
+  HARDFLOAT {
   let isCommutable = IsComm;
 }
 
 multiclass ADDS_M<string opstr, InstrItinClass Itin, bit IsComm,
                   SDPatternOperator OpNode = null_frag> {
-  def _D32 : MMRel, ADDS_FT<opstr, AFGR64Opnd, Itin, IsComm, OpNode>,
-             AdditionalRequires<[NotFP64bit]>;
-  def _D64 : ADDS_FT<opstr, FGR64Opnd, Itin,
-                     IsComm, OpNode>,
-             AdditionalRequires<[IsFP64bit]> {
+  def _D32 : MMRel, ADDS_FT<opstr, AFGR64Opnd, Itin, IsComm, OpNode>, FGR_32;
+  def _D64 : ADDS_FT<opstr, FGR64Opnd, Itin, IsComm, OpNode>, FGR_64 {
     string DecoderNamespace = "Mips64";
   }
 }
@@ -122,23 +122,21 @@ class ABSS_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
               InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
   InstSE<(outs DstRC:$fd), (ins SrcRC:$fs), !strconcat(opstr, "\t$fd, $fs"),
          [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR, opstr>,
+  HARDFLOAT,
   NeverHasSideEffects;
 
 multiclass ABSS_M<string opstr, InstrItinClass Itin,
                   SDPatternOperator OpNode= null_frag> {
   def _D32 : MMRel, ABSS_FT<opstr, AFGR64Opnd, AFGR64Opnd, Itin, OpNode>,
-             AdditionalRequires<[NotFP64bit]>;
-  def _D64 : ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>,
-             AdditionalRequires<[IsFP64bit]> {
+             FGR_32;
+  def _D64 : ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>, FGR_64 {
     string DecoderNamespace = "Mips64";
   }
 }
 
 multiclass ROUND_M<string opstr, InstrItinClass Itin> {
-  def _D32 : MMRel, ABSS_FT<opstr, FGR32Opnd, AFGR64Opnd, Itin>,
-             AdditionalRequires<[NotFP64bit]>;
-  def _D64 : ABSS_FT<opstr, FGR32Opnd, FGR64Opnd, Itin>,
-             AdditionalRequires<[IsFP64bit]> {
+  def _D32 : MMRel, ABSS_FT<opstr, FGR32Opnd, AFGR64Opnd, Itin>, FGR_32;
+  def _D64 : ABSS_FT<opstr, FGR32Opnd, FGR64Opnd, Itin>, FGR_64 {
     let DecoderNamespace = "Mips64";
   }
 }
@@ -146,17 +144,17 @@ multiclass ROUND_M<string opstr, InstrItinClass Itin> {
 class MFC1_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
               InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
   InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"),
-         [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR, opstr>;
+         [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR, opstr>, HARDFLOAT;
 
 class MTC1_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
               InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
   InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
-         [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR, opstr>;
+         [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR, opstr>, HARDFLOAT;
 
 class MTC1_64_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
                  InstrItinClass Itin> :
   InstSE<(outs DstRC:$fs), (ins DstRC:$fs_in, SrcRC:$rt),
-         !strconcat(opstr, "\t$rt, $fs"), [], Itin, FrmFR, opstr> {
+         !strconcat(opstr, "\t$rt, $fs"), [], Itin, FrmFR, opstr>, HARDFLOAT {
   // $fs_in is part of a white lie to work around a widespread bug in the FPU
   // implementation. See expandBuildPairF64 for details.
   let Constraints = "$fs = $fs_in";
@@ -165,7 +163,8 @@ class MTC1_64_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
 class LW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
             SDPatternOperator OpNode= null_frag> :
   InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
-         [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr> {
+         [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr>,
+  HARDFLOAT {
   let DecoderMethod = "DecodeFMem";
   let mayLoad = 1;
 }
@@ -173,7 +172,7 @@ class LW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
 class SW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
             SDPatternOperator OpNode= null_frag> :
   InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
-         [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr> {
+         [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr>, HARDFLOAT {
   let DecoderMethod = "DecodeFMem";
   let mayStore = 1;
 }
@@ -183,21 +182,21 @@ class MADDS_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
   InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
          !strconcat(opstr, "\t$fd, $fr, $fs, $ft"),
          [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))], Itin,
-         FrmFR, opstr>;
+         FrmFR, opstr>, HARDFLOAT;
 
 class NMADDS_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
                 SDPatternOperator OpNode = null_frag> :
   InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
          !strconcat(opstr, "\t$fd, $fr, $fs, $ft"),
          [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))],
-         Itin, FrmFR, opstr>;
+         Itin, FrmFR, opstr>, HARDFLOAT;
 
 class LWXC1_FT<string opstr, RegisterOperand DRC,
                InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
   InstSE<(outs DRC:$fd), (ins PtrRC:$base, PtrRC:$index),
          !strconcat(opstr, "\t$fd, ${index}(${base})"),
          [(set DRC:$fd, (OpNode (add iPTR:$base, iPTR:$index)))], Itin,
-         FrmFI, opstr> {
+         FrmFI, opstr>, HARDFLOAT {
   let AddedComplexity = 20;
 }
 
@@ -206,7 +205,7 @@ class SWXC1_FT<string opstr, RegisterOperand DRC,
   InstSE<(outs), (ins DRC:$fs, PtrRC:$base, PtrRC:$index),
          !strconcat(opstr, "\t$fs, ${index}(${base})"),
          [(OpNode DRC:$fs, (add iPTR:$base, iPTR:$index))], Itin,
-         FrmFI, opstr> {
+         FrmFI, opstr>, HARDFLOAT {
   let AddedComplexity = 20;
 }
 
@@ -215,7 +214,7 @@ class BC1F_FT<string opstr, DAGOperand opnd, InstrItinClass Itin,
   InstSE<(outs), (ins FCCRegsOpnd:$fcc, opnd:$offset),
          !strconcat(opstr, "\t$fcc, $offset"),
          [(MipsFPBrcond Op, FCCRegsOpnd:$fcc, bb:$offset)], Itin,
-         FrmFI, opstr> {
+         FrmFI, opstr>, HARDFLOAT {
   let isBranch = 1;
   let isTerminator = 1;
   let hasDelaySlot = DelaySlot;
@@ -227,7 +226,7 @@ class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
   InstSE<(outs), (ins RC:$fs, RC:$ft, condcode:$cond),
          !strconcat("c.$cond.", typestr, "\t$fs, $ft"),
          [(OpNode RC:$fs, RC:$ft, imm:$cond)], Itin, FrmFR,
-         !strconcat("c.$cond.", typestr)> {
+         !strconcat("c.$cond.", typestr)>, HARDFLOAT {
   let Defs = [FCC0];
   let isCodeGenOnly = 1;
 }
@@ -236,7 +235,7 @@ class C_COND_FT<string CondStr, string Typestr, RegisterOperand RC,
                 InstrItinClass itin>  :
    InstSE<(outs), (ins RC:$fs, RC:$ft),
           !strconcat("c.", CondStr, ".", Typestr, "\t$fs, $ft"), [], itin,
-          FrmFR>;
+          FrmFR>, HARDFLOAT;
 
 multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt,
                     InstrItinClass itin> {
@@ -260,10 +259,10 @@ multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt,
 
 defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>, ISA_MIPS1_NOT_32R6_64R6;
 defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
-           AdditionalRequires<[NotFP64bit]>;
+           FGR_32;
 let DecoderNamespace = "Mips64" in
 defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
-           AdditionalRequires<[IsFP64bit]>;
+           FGR_64;
 
 //===----------------------------------------------------------------------===//
 // Floating Point Instructions
@@ -363,15 +362,15 @@ def MFC1 : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1,
 def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1,
                           bitconvert>, MFC1_FM<4>;
 def MFHC1_D32 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>,
-                MFC1_FM<3>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>;
+                MFC1_FM<3>, ISA_MIPS32R2, FGR_32;
 def MFHC1_D64 : MFC1_FT<"mfhc1", GPR32Opnd, FGR64Opnd, II_MFHC1>,
-                MFC1_FM<3>, ISA_MIPS32R2, AdditionalRequires<[IsFP64bit]> {
+                MFC1_FM<3>, ISA_MIPS32R2, FGR_64 {
   let DecoderNamespace = "Mips64";
 }
 def MTHC1_D32 : MMRel, MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>,
-                MFC1_FM<7>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>;
+                MFC1_FM<7>, ISA_MIPS32R2, FGR_32;
 def MTHC1_D64 : MTC1_64_FT<"mthc1", FGR64Opnd, GPR32Opnd, II_MTHC1>,
-                MFC1_FM<7>, ISA_MIPS32R2, AdditionalRequires<[IsFP64bit]> {
+                MFC1_FM<7>, ISA_MIPS32R2, FGR_64 {
   let DecoderNamespace = "Mips64";
 }
 def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64Opnd, II_DMFC1,
@@ -382,9 +381,9 @@ def DMTC1 : MTC1_FT<"dmtc1", FGR64Opnd, GPR64Opnd, II_DMTC1,
 def FMOV_S   : MMRel, ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>,
                ABSS_FM<0x6, 16>;
 def FMOV_D32 : MMRel, ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>,
-               ABSS_FM<0x6, 17>, AdditionalRequires<[NotFP64bit]>;
+               ABSS_FM<0x6, 17>, FGR_32;
 def FMOV_D64 : ABSS_FT<"mov.d", FGR64Opnd, FGR64Opnd, II_MOV_D>,
-               ABSS_FM<0x6, 17>, AdditionalRequires<[IsFP64bit]> {
+               ABSS_FM<0x6, 17>, FGR_64 {
                  let DecoderNamespace = "Mips64";
 }
 
@@ -513,36 +512,14 @@ def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, IIBranch, MIPS_BRANCH_T>,
 def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, IIBranch, MIPS_BRANCH_T, 0>,
             BC1F_FM<1, 1>, ISA_MIPS2_NOT_32R6_64R6;
 
-//===----------------------------------------------------------------------===//
-// Floating Point Flag Conditions
-//===----------------------------------------------------------------------===//
-// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h.
-// They must be kept in synch.
-def MIPS_FCOND_F    : PatLeaf<(i32 0)>;
-def MIPS_FCOND_UN   : PatLeaf<(i32 1)>;
-def MIPS_FCOND_OEQ  : PatLeaf<(i32 2)>;
-def MIPS_FCOND_UEQ  : PatLeaf<(i32 3)>;
-def MIPS_FCOND_OLT  : PatLeaf<(i32 4)>;
-def MIPS_FCOND_ULT  : PatLeaf<(i32 5)>;
-def MIPS_FCOND_OLE  : PatLeaf<(i32 6)>;
-def MIPS_FCOND_ULE  : PatLeaf<(i32 7)>;
-def MIPS_FCOND_SF   : PatLeaf<(i32 8)>;
-def MIPS_FCOND_NGLE : PatLeaf<(i32 9)>;
-def MIPS_FCOND_SEQ  : PatLeaf<(i32 10)>;
-def MIPS_FCOND_NGL  : PatLeaf<(i32 11)>;
-def MIPS_FCOND_LT   : PatLeaf<(i32 12)>;
-def MIPS_FCOND_NGE  : PatLeaf<(i32 13)>;
-def MIPS_FCOND_LE   : PatLeaf<(i32 14)>;
-def MIPS_FCOND_NGT  : PatLeaf<(i32 15)>;
-
 /// Floating Point Compare
 def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>,
                ISA_MIPS1_NOT_32R6_64R6;
 def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
-               ISA_MIPS1_NOT_32R6_64R6, AdditionalRequires<[NotFP64bit]>;
+               ISA_MIPS1_NOT_32R6_64R6, FGR_32;
 let DecoderNamespace = "Mips64" in
 def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
-               ISA_MIPS1_NOT_32R6_64R6, AdditionalRequires<[IsFP64bit]>;
+               ISA_MIPS1_NOT_32R6_64R6, FGR_64;
 
 //===----------------------------------------------------------------------===//
 // Floating Point Pseudo-Instructions
@@ -554,10 +531,8 @@ class BuildPairF64Base<RegisterOperand RO> :
   PseudoSE<(outs RO:$dst), (ins GPR32Opnd:$lo, GPR32Opnd:$hi),
            [(set RO:$dst, (MipsBuildPairF64 GPR32Opnd:$lo, GPR32Opnd:$hi))]>;
 
-def BuildPairF64 : BuildPairF64Base<AFGR64Opnd>,
-                   AdditionalRequires<[NotFP64bit]>;
-def BuildPairF64_64 : BuildPairF64Base<FGR64Opnd>,
-                      AdditionalRequires<[IsFP64bit]>;
+def BuildPairF64 : BuildPairF64Base<AFGR64Opnd>, FGR_32, HARDFLOAT;
+def BuildPairF64_64 : BuildPairF64Base<FGR64Opnd>, FGR_64, HARDFLOAT;
 
 // This pseudo instr gets expanded into 2 mfc1 instrs after register
 // allocation.
@@ -567,22 +542,20 @@ class ExtractElementF64Base<RegisterOperand RO> :
   PseudoSE<(outs GPR32Opnd:$dst), (ins RO:$src, i32imm:$n),
            [(set GPR32Opnd:$dst, (MipsExtractElementF64 RO:$src, imm:$n))]>;
 
-def ExtractElementF64 : ExtractElementF64Base<AFGR64Opnd>,
-                        AdditionalRequires<[NotFP64bit]>;
-def ExtractElementF64_64 : ExtractElementF64Base<FGR64Opnd>,
-                           AdditionalRequires<[IsFP64bit]>;
+def ExtractElementF64 : ExtractElementF64Base<AFGR64Opnd>, FGR_32, HARDFLOAT;
+def ExtractElementF64_64 : ExtractElementF64Base<FGR64Opnd>, FGR_64, HARDFLOAT;
 
 //===----------------------------------------------------------------------===//
 // InstAliases.
 //===----------------------------------------------------------------------===//
 def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>,
-      ISA_MIPS1_NOT_32R6_64R6;
+      ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
 def : MipsInstAlias<"bc1tl $offset", (BC1TL FCC0, brtarget:$offset)>,
-      ISA_MIPS2_NOT_32R6_64R6;
+      ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT;
 def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>,
-      ISA_MIPS1_NOT_32R6_64R6;
+      ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
 def : MipsInstAlias<"bc1fl $offset", (BC1FL FCC0, brtarget:$offset)>,
-      ISA_MIPS2_NOT_32R6_64R6;
+      ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT;
 
 //===----------------------------------------------------------------------===//
 // Floating Point Patterns
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
index d8dae25..02ecf32 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -50,6 +50,20 @@ def Std2MicroMips : InstrMapping {
   let ValueCols = [["se"], ["micromips"]];
 }
 
+class StdMMR6Rel;
+
+def Std2MicroMipsR6 : InstrMapping {
+  let FilterClass = "StdMMR6Rel";
+  // Instructions with the same BaseOpcode and isNVStore values form a row.
+  let RowFields = ["BaseOpcode"];
+  // Instructions with the same predicate sense form a column.
+  let ColFields = ["Arch"];
+  // The key column is the unpredicated instructions.
+  let KeyCol = ["se"];
+  // Value columns are PredSense=true and PredSense=false
+  let ValueCols = [["se"], ["micromipsr6"]];
+}
+
 class StdArch {
   string Arch = "se";
 }
@@ -297,6 +311,19 @@ class BGEZ_FM<bits<6> op, bits<5> funct> : StdArch {
   let Inst{15-0}  = offset;
 }
 
+class BBIT_FM<bits<6> op> : StdArch {
+  bits<5>  rs;
+  bits<5>  p;
+  bits<16> offset;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = p;
+  let Inst{15-0}  = offset;
+}
+
 class SLTI_FM<bits<6> op> : StdArch {
   bits<5> rt;
   bits<5> rs;
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
index db149d4..4589535 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -29,7 +29,7 @@
 #include "MipsGenInstrInfo.inc"
 
 namespace llvm {
-
+class MipsSubtarget;
 class MipsInstrInfo : public MipsGenInstrInfo {
   virtual void anchor();
 protected:
@@ -117,6 +117,10 @@ public:
                                 const TargetRegisterInfo *TRI,
                                 int64_t Offset) const = 0;
 
+  virtual void adjustStackPtr(unsigned SP, int64_t Amount,
+                              MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const = 0;
+
   /// Create an instruction which has the same operands and memory operands
   /// as MI but has a new opcode.
   MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc,
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index aef1039..58791cf 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -182,14 +182,14 @@ def HasMips64r6  :    Predicate<"Subtarget->hasMips64r6()">,
                       AssemblerPredicate<"FeatureMips64r6">;
 def NotMips64r6  :    Predicate<"!Subtarget->hasMips64r6()">,
                       AssemblerPredicate<"!FeatureMips64r6">;
+def HasMicroMips32r6 : Predicate<"Subtarget->inMicroMips32r6Mode()">,
+                       AssemblerPredicate<"FeatureMicroMips,FeatureMips32r6">;
 def InMips16Mode :    Predicate<"Subtarget->inMips16Mode()">,
                       AssemblerPredicate<"FeatureMips16">;
 def HasCnMips    :    Predicate<"Subtarget->hasCnMips()">,
                       AssemblerPredicate<"FeatureCnMips">;
-def RelocStatic :     Predicate<"TM.getRelocationModel() == Reloc::Static">,
-                      AssemblerPredicate<"FeatureMips32">;
-def RelocPIC    :     Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
-                      AssemblerPredicate<"FeatureMips32">;
+def RelocStatic :     Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def RelocPIC    :     Predicate<"TM.getRelocationModel() == Reloc::PIC_">;
 def NoNaNsFPMath :    Predicate<"TM.Options.NoNaNsFPMath">;
 def HasStdEnc :       Predicate<"Subtarget->hasStandardEncoding()">,
                       AssemblerPredicate<"!FeatureMips16">;
@@ -249,6 +249,9 @@ class ISA_MIPS64_NOT_64R6 {
 class ISA_MIPS64R2 { list<Predicate> InsnPredicates = [HasMips64r2]; }
 class ISA_MIPS32R6 { list<Predicate> InsnPredicates = [HasMips32r6]; }
 class ISA_MIPS64R6 { list<Predicate> InsnPredicates = [HasMips64r6]; }
+class ISA_MICROMIPS32R6 {
+  list<Predicate> InsnPredicates = [HasMicroMips32r6];
+}
 
 // The portions of MIPS-III that were also added to MIPS32
 class INSN_MIPS3_32 { list<Predicate> InsnPredicates = [HasMips3_32]; }
@@ -1000,7 +1003,7 @@ class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd,
               SDPatternOperator Op = null_frag>:
   InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ext:$size),
          !strconcat(opstr, " $rt, $rs, $pos, $size"),
-         [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], NoItinerary,
+         [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], II_EXT,
          FrmR, opstr>, ISA_MIPS32R2;
 
 class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd,
@@ -1008,7 +1011,7 @@ class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd,
   InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ins:$size, RO:$src),
          !strconcat(opstr, " $rt, $rs, $pos, $size"),
          [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size, RO:$src))],
-         NoItinerary, FrmR, opstr>, ISA_MIPS32R2 {
+         II_INS, FrmR, opstr>, ISA_MIPS32R2 {
   let Constraints = "$src = $rt";
 }
 
@@ -1120,8 +1123,9 @@ def LONG_BRANCH_ADDiu : PseudoSE<(outs GPR32Opnd:$dst),
 
 /// Arithmetic Instructions (ALU Immediate)
 let AdditionalPredicates = [NotInMicroMips] in {
-def ADDiu : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd, II_ADDIU, immSExt16,
-                               add>, ADDI_FM<0x9>, IsAsCheapAsAMove;
+def ADDiu : MMRel, StdMMR6Rel, ArithLogicI<"addiu", simm16, GPR32Opnd,
+                                           II_ADDIU, immSExt16, add>,
+            ADDI_FM<0x9>, IsAsCheapAsAMove;
 }
 def ADDi  : MMRel, ArithLogicI<"addi", simm16, GPR32Opnd>, ADDI_FM<0x8>,
             ISA_MIPS1_NOT_32R6_64R6;
@@ -1130,36 +1134,40 @@ def SLTi  : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, GPR32Opnd>,
 def SLTiu : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, GPR32Opnd>,
             SLTI_FM<0xb>;
 let AdditionalPredicates = [NotInMicroMips] in {
-def ANDi  : MMRel, ArithLogicI<"andi", uimm16, GPR32Opnd, II_ANDI, immZExt16,
-                               and>, ADDI_FM<0xc>;
+def ANDi  : MMRel, StdMMR6Rel,
+            ArithLogicI<"andi", uimm16, GPR32Opnd, II_ANDI, immZExt16, and>,
+            ADDI_FM<0xc>;
 }
-def ORi   : MMRel, ArithLogicI<"ori", uimm16, GPR32Opnd, II_ORI, immZExt16,
-                               or>,
+def ORi   : MMRel, StdMMR6Rel,
+            ArithLogicI<"ori", uimm16, GPR32Opnd, II_ORI, immZExt16, or>,
             ADDI_FM<0xd>;
-def XORi  : MMRel, ArithLogicI<"xori", uimm16, GPR32Opnd, II_XORI, immZExt16,
-                               xor>,
+def XORi  : MMRel, StdMMR6Rel,
+            ArithLogicI<"xori", uimm16, GPR32Opnd, II_XORI, immZExt16, xor>,
             ADDI_FM<0xe>;
 def LUi   : MMRel, LoadUpper<"lui", GPR32Opnd, uimm16>, LUI_FM;
-
+let AdditionalPredicates = [NotInMicroMips] in {
 /// Arithmetic Instructions (3-Operand, R-Type)
-def ADDu  : MMRel, ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU, add>,
+def ADDu  : MMRel, StdMMR6Rel, ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU, add>,
             ADD_FM<0, 0x21>;
 def SUBu  : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
             ADD_FM<0, 0x23>;
+}
 let Defs = [HI0, LO0] in
 def MUL   : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>,
             ADD_FM<0x1c, 2>, ISA_MIPS32_NOT_32R6_64R6;
-def ADD   : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM<0, 0x20>;
+def ADD   : MMRel, StdMMR6Rel, ArithLogicR<"add", GPR32Opnd>, ADD_FM<0, 0x20>;
 def SUB   : MMRel, ArithLogicR<"sub", GPR32Opnd>, ADD_FM<0, 0x22>;
 def SLT   : MMRel, SetCC_R<"slt", setlt, GPR32Opnd>, ADD_FM<0, 0x2a>;
 def SLTu  : MMRel, SetCC_R<"sltu", setult, GPR32Opnd>, ADD_FM<0, 0x2b>;
-def AND   : MMRel, ArithLogicR<"and", GPR32Opnd, 1, II_AND, and>,
+let AdditionalPredicates = [NotInMicroMips] in {
+def AND   : MMRel, StdMMR6Rel, ArithLogicR<"and", GPR32Opnd, 1, II_AND, and>,
             ADD_FM<0, 0x24>;
-def OR    : MMRel, ArithLogicR<"or", GPR32Opnd, 1, II_OR, or>,
+def OR    : MMRel, StdMMR6Rel, ArithLogicR<"or", GPR32Opnd, 1, II_OR, or>,
             ADD_FM<0, 0x25>;
-def XOR   : MMRel, ArithLogicR<"xor", GPR32Opnd, 1, II_XOR, xor>,
+def XOR   : MMRel, StdMMR6Rel, ArithLogicR<"xor", GPR32Opnd, 1, II_XOR, xor>,
             ADD_FM<0, 0x26>;
-def NOR   : MMRel, LogicNOR<"nor", GPR32Opnd>, ADD_FM<0, 0x27>;
+}
+def NOR   : MMRel, StdMMR6Rel, LogicNOR<"nor", GPR32Opnd>, ADD_FM<0, 0x27>;
 
 /// Shift Instructions
 let AdditionalPredicates = [NotInMicroMips] in {
@@ -1192,11 +1200,15 @@ def LBu : Load<"lbu", GPR32Opnd, zextloadi8, II_LBU, addrDefault>, MMRel,
 def LH  : Load<"lh", GPR32Opnd, sextloadi16, II_LH, addrDefault>, MMRel,
           LW_FM<0x21>;
 def LHu : Load<"lhu", GPR32Opnd, zextloadi16, II_LHU>, MMRel, LW_FM<0x25>;
+let AdditionalPredicates = [NotInMicroMips] in {
 def LW  : Load<"lw", GPR32Opnd, load, II_LW, addrDefault>, MMRel,
           LW_FM<0x23>;
+}
 def SB  : Store<"sb", GPR32Opnd, truncstorei8, II_SB>, MMRel, LW_FM<0x28>;
 def SH  : Store<"sh", GPR32Opnd, truncstorei16, II_SH>, MMRel, LW_FM<0x29>;
+let AdditionalPredicates = [NotInMicroMips] in {
 def SW  : Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>;
+}
 
 /// load/store left/right
 let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug
@@ -1211,6 +1223,7 @@ def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>,
           ISA_MIPS1_NOT_32R6_64R6;
 }
 
+let AdditionalPredicates = [NotInMicroMips] in {
 // COP2 Memory Instructions
 def LWC2 : LW_FT2<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>,
            ISA_MIPS1_NOT_32R6_64R6;
@@ -1230,6 +1243,7 @@ let DecoderNamespace = "COP3_" in {
   def SDC3 : SW_FT3<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>,
              ISA_MIPS2;
 }
+}
 
 def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32;
 def SYNCI : MMRel, SYNCI_FT<"synci">, SYNCI_FM, ISA_MIPS32R2;
@@ -1308,8 +1322,8 @@ let AdditionalPredicates = [NotInMicroMips] in {
   def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>;
 }
 
-// FIXME: JALX really requires either MIPS16 or microMIPS in addition to MIPS32.
-def JALX  : JumpLink<"jalx", calltarget>, FJ<0x1D>, ISA_MIPS32_NOT_32R6_64R6;
+def JALX : MMRel, JumpLink<"jalx", calltarget>, FJ<0x1D>,
+           ISA_MIPS32_NOT_32R6_64R6;
 def BGEZAL : MMRel, BGEZAL_FT<"bgezal", brtarget, GPR32Opnd>, BGEZAL_FM<0x11>,
              ISA_MIPS1_NOT_32R6_64R6;
 def BGEZALL : MMRel, BGEZAL_FT<"bgezall", brtarget, GPR32Opnd, 0>,
@@ -1396,9 +1410,9 @@ def MFLO : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, AC0>, MFLO_FM<0x12>,
 }
 
 /// Sign Ext In Register Instructions.
-def SEB : MMRel, SignExtInReg<"seb", i8, GPR32Opnd, II_SEB>,
+def SEB : MMRel, StdMMR6Rel, SignExtInReg<"seb", i8, GPR32Opnd, II_SEB>,
           SEB_FM<0x10, 0x20>, ISA_MIPS32R2;
-def SEH : MMRel, SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>,
+def SEH : MMRel, StdMMR6Rel, SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>,
           SEB_FM<0x18, 0x20>, ISA_MIPS32R2;
 
 /// Count Leading
@@ -1556,8 +1570,6 @@ def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>;
 let Predicates = [NotInMicroMips] in {
 def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>;
 }
-def : MipsInstAlias<"jal $rs", (JALR RA, GPR32Opnd:$rs), 0>;
-def : MipsInstAlias<"jal $rd,$rs", (JALR GPR32Opnd:$rd, GPR32Opnd:$rs), 0>;
 def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>, ISA_MIPS32;
 def : MipsInstAlias<"not $rt, $rs",
                     (NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>;
@@ -1573,6 +1585,8 @@ def : MipsInstAlias<"sltu $rt, $rs, $imm",
                     (SLTiu GPR32Opnd:$rt, GPR32Opnd:$rs, simm16:$imm), 0>;
 def : MipsInstAlias<"xor $rs, $rt, $imm",
                     (XORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
+def : MipsInstAlias<"xor $rs, $imm",
+                    (XORi GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
 def : MipsInstAlias<"or $rs, $rt, $imm",
                     (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
 def : MipsInstAlias<"or $rs, $imm",
@@ -1582,11 +1596,17 @@ def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
 def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
 def : MipsInstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
 def : MipsInstAlias<"mtc2 $rt, $rd", (MTC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+let AdditionalPredicates = [NotInMicroMips] in {
 def : MipsInstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>;
+}
 def : MipsInstAlias<"bnez $rs,$offset",
                     (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"bnezl $rs,$offset",
+                    (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
 def : MipsInstAlias<"beqz $rs,$offset",
                     (BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"beqzl $rs,$offset",
+                    (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
 def : MipsInstAlias<"syscall", (SYSCALL 0), 1>;
     
 def : MipsInstAlias<"break", (BREAK 0, 0), 1>;
@@ -1631,20 +1651,26 @@ def : MipsInstAlias<"sync",
 // Assembler Pseudo Instructions
 //===----------------------------------------------------------------------===//
 
-class LoadImm32< string instr_asm, Operand Od, RegisterOperand RO> :
+class LoadImmediate32<string instr_asm, Operand Od, RegisterOperand RO> :
   MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
                      !strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadImm32Reg : LoadImm32<"li", uimm5, GPR32Opnd>;
+def LoadImm32 : LoadImmediate32<"li", uimm5, GPR32Opnd>;
 
-class LoadAddress<string instr_asm, Operand MemOpnd, RegisterOperand RO> :
+class LoadAddressFromReg32<string instr_asm, Operand MemOpnd,
+                           RegisterOperand RO> :
   MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr),
                      !strconcat(instr_asm, "\t$rt, $addr")> ;
-def LoadAddr32Reg : LoadAddress<"la", mem, GPR32Opnd>;
+def LoadAddrReg32 : LoadAddressFromReg32<"la", mem, GPR32Opnd>;
 
-class LoadAddressImm<string instr_asm, Operand Od, RegisterOperand RO> :
+class LoadAddressFromImm32<string instr_asm, Operand Od, RegisterOperand RO> :
   MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
                      !strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadAddr32Imm : LoadAddressImm<"la", uimm5, GPR32Opnd>;
+def LoadAddrImm32 : LoadAddressFromImm32<"la", uimm5, GPR32Opnd>;
+
+def JalTwoReg : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rs),
+                      "jal\t$rd, $rs"> ;
+def JalOneReg : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs),
+                      "jal\t$rs"> ;
 
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
@@ -1748,9 +1774,11 @@ def : WrapperPat<tblockaddress, ADDiu, GPR32>;
 def : WrapperPat<tjumptable, ADDiu, GPR32>;
 def : WrapperPat<tglobaltlsaddr, ADDiu, GPR32>;
 
+let AdditionalPredicates = [NotInMicroMips] in {
 // Mips does not have "not", so we expand our way
 def : MipsPat<(not GPR32:$in),
               (NOR GPR32Opnd:$in, ZERO)>;
+}
 
 // extended loads
 def : MipsPat<(i32 (extloadi1  addr:$src)), (LBu addr:$src)>;
@@ -1853,7 +1881,9 @@ def : MipsPat<(bswap GPR32:$rt), (ROTR (WSBH GPR32:$rt), 16)>;
 let AddedComplexity = 40 in {
   def : LoadRegImmPat<LBu, i32, zextloadi8>;
   def : LoadRegImmPat<LH, i32, sextloadi16>;
+  let AdditionalPredicates = [NotInMicroMips] in {
   def : LoadRegImmPat<LW, i32, load>;
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -1885,3 +1915,7 @@ include "MipsMSAInstrInfo.td"
 include "MicroMipsInstrFormats.td"
 include "MicroMipsInstrInfo.td"
 include "MicroMipsInstrFPU.td"
+
+// Micromips r6
+include "MicroMips32r6InstrFormats.td"
+include "MicroMips32r6InstrInfo.td"
diff --git a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
index aae0922..90f8cc0 100644
--- a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
@@ -63,11 +63,9 @@ namespace {
   public:
     static char ID;
     MipsLongBranch(TargetMachine &tm)
-      : MachineFunctionPass(ID), TM(tm),
-        IsPIC(TM.getRelocationModel() == Reloc::PIC_),
-        ABI(TM.getSubtarget<MipsSubtarget>().getABI()),
-        LongBranchSeqSize(!IsPIC ? 2 : (ABI.IsN64() ? 10 :
-            (!TM.getSubtarget<MipsSubtarget>().isTargetNaCl() ? 9 : 10))) {}
+        : MachineFunctionPass(ID), TM(tm),
+          IsPIC(TM.getRelocationModel() == Reloc::PIC_),
+          ABI(static_cast<const MipsTargetMachine &>(TM).getABI()) {}
 
     const char *getPassName() const override {
       return "Mips Long Branch";
@@ -170,7 +168,7 @@ void MipsLongBranch::initMBBInfo() {
   MBBInfos.resize(MF->size());
 
   const MipsInstrInfo *TII =
-      static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
+      static_cast<const MipsInstrInfo *>(MF->getSubtarget().getInstrInfo());
   for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) {
     MachineBasicBlock *MBB = MF->getBlockNumbered(I);
 
@@ -216,8 +214,8 @@ int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) {
 // MachineBasicBlock operand MBBOpnd.
 void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
                                    DebugLoc DL, MachineBasicBlock *MBBOpnd) {
-  const MipsInstrInfo *TII =
-      static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
+  const MipsInstrInfo *TII = static_cast<const MipsInstrInfo *>(
+      MBB.getParent()->getSubtarget().getInstrInfo());
   unsigned NewOpc = TII->getOppositeBranchOpc(Br->getOpcode());
   const MCInstrDesc &NewDesc = TII->get(NewOpc);
 
@@ -258,9 +256,10 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
   const BasicBlock *BB = MBB->getBasicBlock();
   MachineFunction::iterator FallThroughMBB = ++MachineFunction::iterator(MBB);
   MachineBasicBlock *LongBrMBB = MF->CreateMachineBasicBlock(BB);
-
+  const MipsSubtarget &Subtarget =
+      static_cast<const MipsSubtarget &>(MF->getSubtarget());
   const MipsInstrInfo *TII =
-      static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
+      static_cast<const MipsInstrInfo *>(Subtarget.getInstrInfo());
 
   MF->insert(FallThroughMBB, LongBrMBB);
   MBB->removeSuccessor(TgtMBB);
@@ -275,8 +274,6 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
     // We must select between the MIPS32r6/MIPS64r6 BAL (which is a normal
     // instruction) and the pre-MIPS32r6/MIPS64r6 definition (which is an
     // pseudo-instruction wrapping BGEZAL).
-
-    const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
     unsigned BalOp = Subtarget.hasMips32r6() ? Mips::BAL : Mips::BAL_BR;
 
     if (!ABI.IsN64()) {
@@ -333,7 +330,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
       BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA)
         .addReg(Mips::SP).addImm(0);
 
-      if (!TM.getSubtarget<MipsSubtarget>().isTargetNaCl()) {
+      if (!Subtarget.isTargetNaCl()) {
         MIBundleBuilder(*BalTgtMBB, Pos)
           .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT))
           .append(BuildMI(*MF, DL, TII->get(Mips::ADDiu), Mips::SP)
@@ -452,14 +449,17 @@ static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
 }
 
 bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
+  const MipsSubtarget &STI =
+      static_cast<const MipsSubtarget &>(F.getSubtarget());
   const MipsInstrInfo *TII =
-      static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
+      static_cast<const MipsInstrInfo *>(STI.getInstrInfo());
+  LongBranchSeqSize =
+      !IsPIC ? 2 : (ABI.IsN64() ? 10 : (!STI.isTargetNaCl() ? 9 : 10));
 
-  const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
   if (STI.inMips16Mode() || !STI.enableLongBranchPass())
     return false;
   if ((TM.getRelocationModel() == Reloc::PIC_) &&
-      TM.getSubtarget<MipsSubtarget>().isABI_O32() &&
+      static_cast<const MipsTargetMachine &>(TM).getABI().IsO32() &&
       F.getInfo<MipsFunctionInfo>()->globalBaseRegSet())
     emitGPDisp(F, TII);
 
@@ -481,10 +481,10 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
       if (!I->Br || I->HasLongBranch)
         continue;
 
-      int ShVal = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode() ? 2 : 4;
+      int ShVal = STI.inMicroMipsMode() ? 2 : 4;
       int64_t Offset = computeOffset(I->Br) / ShVal;
 
-      if (TM.getSubtarget<MipsSubtarget>().isTargetNaCl()) {
+      if (STI.isTargetNaCl()) {
         // The offset calculation does not include sandboxing instructions
         // that will be added later in the MC layer.  Since at this point we
         // don't know the exact amount of code that "sandboxing" will add, we
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
index 821392e..9e61180 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -22,6 +22,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
 
 using namespace llvm;
 
@@ -103,14 +104,14 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx);
 
   if (!Offset)
-    return MCOperand::CreateExpr(MCSym);
+    return MCOperand::createExpr(MCSym);
 
   // Assume offset is never negative.
   assert(Offset > 0);
 
   const MCConstantExpr *OffsetExpr =  MCConstantExpr::Create(Offset, *Ctx);
   const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
-  return MCOperand::CreateExpr(Add);
+  return MCOperand::createExpr(Add);
 }
 
 /*
@@ -134,9 +135,9 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO,
   case MachineOperand::MO_Register:
     // Ignore all implicit register operands.
     if (MO.isImplicit()) break;
-    return MCOperand::CreateReg(MO.getReg());
+    return MCOperand::createReg(MO.getReg());
   case MachineOperand::MO_Immediate:
-    return MCOperand::CreateImm(MO.getImm() + offset);
+    return MCOperand::createImm(MO.getImm() + offset);
   case MachineOperand::MO_MachineBasicBlock:
   case MachineOperand::MO_GlobalAddress:
   case MachineOperand::MO_ExternalSymbol:
@@ -158,7 +159,7 @@ MCOperand MipsMCInstLower::createSub(MachineBasicBlock *BB1,
   const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::Create(BB2->getSymbol(), *Ctx);
   const MCBinaryExpr *Sub = MCBinaryExpr::CreateSub(Sym1, Sym2, *Ctx);
 
-  return MCOperand::CreateExpr(MipsMCExpr::Create(Kind, Sub, *Ctx));
+  return MCOperand::createExpr(MipsMCExpr::Create(Kind, Sub, *Ctx));
 }
 
 void MipsMCInstLower::
diff --git a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index 68230e6..970e98e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -63,6 +63,9 @@ def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT",
 def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT",
     SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
 
+def immZExt4Ptr : ImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
+def immZExt6Ptr : ImmLeaf<iPTR, [{return isUInt<6>(Imm);}]>;
+
 // Operands
 
 // The immediate of an LSA instruction needs special handling
@@ -84,6 +87,14 @@ def uimm4 : Operand<i32> {
   let PrintMethod = "printUnsignedImm8";
 }
 
+def uimm4_ptr : Operand<iPTR> {
+  let PrintMethod = "printUnsignedImm8";
+}
+
+def uimm6_ptr : Operand<iPTR> {
+  let PrintMethod = "printUnsignedImm8";
+}
+
 def uimm8 : Operand<i32> {
   let PrintMethod = "printUnsignedImm8";
 }
@@ -364,7 +375,7 @@ def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{
   APInt Imm;
   EVT EltTy = N->getValueType(0).getVectorElementType();
 
-  return selectVSplat (N, Imm) &&
+  return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
          Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
 }]>;
 
@@ -373,7 +384,7 @@ def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
   SDNode *BV = N->getOperand(0).getNode();
   EVT EltTy = N->getValueType(0).getVectorElementType();
 
-  return selectVSplat (BV, Imm) &&
+  return selectVSplat(BV, Imm, EltTy.getSizeInBits()) &&
          Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
 }]>;
 
@@ -1273,9 +1284,9 @@ class MSA_COPY_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
                          RegisterOperand ROWS,
                          InstrItinClass itin = NoItinerary> {
   dag OutOperandList = (outs ROD:$rd);
-  dag InOperandList = (ins ROWS:$ws, uimm4:$n);
+  dag InOperandList = (ins ROWS:$ws, uimm4_ptr:$n);
   string AsmString = !strconcat(instr_asm, "\t$rd, $ws[$n]");
-  list<dag> Pattern = [(set ROD:$rd, (OpNode (VecTy ROWS:$ws), immZExt4:$n))];
+  list<dag> Pattern = [(set ROD:$rd, (OpNode (VecTy ROWS:$ws), immZExt4Ptr:$n))];
   InstrItinClass Itinerary = itin;
 }
 
@@ -1293,8 +1304,8 @@ class MSA_ELM_SLD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
 
 class MSA_COPY_PSEUDO_BASE<SDPatternOperator OpNode, ValueType VecTy,
                            RegisterClass RCD, RegisterClass RCWS> :
-      MSAPseudo<(outs RCD:$wd), (ins RCWS:$ws, uimm4:$n),
-                [(set RCD:$wd, (OpNode (VecTy RCWS:$ws), immZExt4:$n))]> {
+      MSAPseudo<(outs RCD:$wd), (ins RCWS:$ws, uimm4_ptr:$n),
+                [(set RCD:$wd, (OpNode (VecTy RCWS:$ws), immZExt4Ptr:$n))]> {
   bit usesCustomInserter = 1;
 }
 
@@ -1479,29 +1490,30 @@ class MSA_INSERT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
                            RegisterOperand ROWD, RegisterOperand ROS,
                            InstrItinClass itin = NoItinerary> {
   dag OutOperandList = (outs ROWD:$wd);
-  dag InOperandList = (ins ROWD:$wd_in, ROS:$rs, uimm6:$n);
+  dag InOperandList = (ins ROWD:$wd_in, ROS:$rs, uimm6_ptr:$n);
   string AsmString = !strconcat(instr_asm, "\t$wd[$n], $rs");
   list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in,
                                               ROS:$rs,
-                                              immZExt6:$n))];
+                                              immZExt6Ptr:$n))];
   InstrItinClass Itinerary = itin;
   string Constraints = "$wd = $wd_in";
 }
 
 class MSA_INSERT_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
                              RegisterOperand ROWD, RegisterOperand ROFS> :
-      MSAPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, uimm6:$n, ROFS:$fs),
+      MSAPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, uimm6_ptr:$n, ROFS:$fs),
                 [(set ROWD:$wd, (OpNode (Ty ROWD:$wd_in), ROFS:$fs,
-                                        immZExt6:$n))]> {
+                                        immZExt6Ptr:$n))]> {
   bit usesCustomInserter = 1;
   string Constraints = "$wd = $wd_in";
 }
 
 class MSA_INSERT_VIDX_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
-                                  RegisterOperand ROWD, RegisterOperand ROFS> :
-      MSAPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, GPR32Opnd:$n, ROFS:$fs),
+                                  RegisterOperand ROWD, RegisterOperand ROFS,
+                                  RegisterOperand ROIdx> :
+      MSAPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, ROIdx:$n, ROFS:$fs),
                 [(set ROWD:$wd, (OpNode (Ty ROWD:$wd_in), ROFS:$fs,
-                                        GPR32Opnd:$n))]> {
+                                        ROIdx:$n))]> {
   bit usesCustomInserter = 1;
   string Constraints = "$wd = $wd_in";
 }
@@ -2302,13 +2314,13 @@ class INSERT_D_DESC : MSA_INSERT_DESC_BASE<"insert.d", vinsert_v2i64,
                                            MSA128DOpnd, GPR64Opnd>;
 
 class INSERT_B_VIDX_PSEUDO_DESC :
-    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v16i8, MSA128BOpnd, GPR32Opnd>;
+    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v16i8, MSA128BOpnd, GPR32Opnd, GPR32Opnd>;
 class INSERT_H_VIDX_PSEUDO_DESC :
-    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v8i16, MSA128HOpnd, GPR32Opnd>;
+    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v8i16, MSA128HOpnd, GPR32Opnd, GPR32Opnd>;
 class INSERT_W_VIDX_PSEUDO_DESC :
-    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v4i32, MSA128WOpnd, GPR32Opnd>;
+    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v4i32, MSA128WOpnd, GPR32Opnd, GPR32Opnd>;
 class INSERT_D_VIDX_PSEUDO_DESC :
-    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2i64, MSA128DOpnd, GPR64Opnd>;
+    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2i64, MSA128DOpnd, GPR64Opnd, GPR32Opnd>;
 
 class INSERT_FW_PSEUDO_DESC : MSA_INSERT_PSEUDO_BASE<vector_insert, v4f32,
                                                      MSA128WOpnd, FGR32Opnd>;
@@ -2316,9 +2328,23 @@ class INSERT_FD_PSEUDO_DESC : MSA_INSERT_PSEUDO_BASE<vector_insert, v2f64,
                                                      MSA128DOpnd, FGR64Opnd>;
 
 class INSERT_FW_VIDX_PSEUDO_DESC :
-    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v4f32, MSA128WOpnd, FGR32Opnd>;
+    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v4f32, MSA128WOpnd, FGR32Opnd, GPR32Opnd>;
 class INSERT_FD_VIDX_PSEUDO_DESC :
-    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2f64, MSA128DOpnd, FGR64Opnd>;
+    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2f64, MSA128DOpnd, FGR64Opnd, GPR32Opnd>;
+
+class INSERT_B_VIDX64_PSEUDO_DESC :
+    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v16i8, MSA128BOpnd, GPR32Opnd, GPR64Opnd>;
+class INSERT_H_VIDX64_PSEUDO_DESC :
+    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v8i16, MSA128HOpnd, GPR32Opnd, GPR64Opnd>;
+class INSERT_W_VIDX64_PSEUDO_DESC :
+    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v4i32, MSA128WOpnd, GPR32Opnd, GPR64Opnd>;
+class INSERT_D_VIDX64_PSEUDO_DESC :
+    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2i64, MSA128DOpnd, GPR64Opnd, GPR64Opnd>;
+
+class INSERT_FW_VIDX64_PSEUDO_DESC :
+    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v4f32, MSA128WOpnd, FGR32Opnd, GPR64Opnd>;
+class INSERT_FD_VIDX64_PSEUDO_DESC :
+    MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2f64, MSA128DOpnd, FGR64Opnd, GPR64Opnd>;
 
 class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8,
                                          MSA128BOpnd>;
@@ -3236,6 +3262,13 @@ def INSERT_D_VIDX_PSEUDO : INSERT_D_VIDX_PSEUDO_DESC;
 def INSERT_FW_VIDX_PSEUDO : INSERT_FW_VIDX_PSEUDO_DESC;
 def INSERT_FD_VIDX_PSEUDO : INSERT_FD_VIDX_PSEUDO_DESC;
 
+def INSERT_B_VIDX64_PSEUDO : INSERT_B_VIDX64_PSEUDO_DESC;
+def INSERT_H_VIDX64_PSEUDO : INSERT_H_VIDX64_PSEUDO_DESC;
+def INSERT_W_VIDX64_PSEUDO : INSERT_W_VIDX64_PSEUDO_DESC;
+def INSERT_D_VIDX64_PSEUDO : INSERT_D_VIDX64_PSEUDO_DESC;
+def INSERT_FW_VIDX64_PSEUDO : INSERT_FW_VIDX64_PSEUDO_DESC;
+def INSERT_FD_VIDX64_PSEUDO : INSERT_FD_VIDX64_PSEUDO_DESC;
+
 def LD_B: LD_B_ENC, LD_B_DESC;
 def LD_H: LD_H_ENC, LD_H_DESC;
 def LD_W: LD_W_ENC, LD_W_DESC;
@@ -3805,3 +3838,93 @@ def : MSAPat<(f64 (vector_extract v2f64:$ws, i32:$idx)),
              (f64 (EXTRACT_SUBREG (SPLAT_D v2f64:$ws,
                                            i32:$idx),
                                   sub_64))>;
+
+// Vector extraction with variable index (N64 ABI)
+def : MSAPat<
+  (i32 (vextract_sext_i8 v16i8:$ws, i64:$idx)),
+  (SRA (COPY_TO_REGCLASS
+         (i32 (EXTRACT_SUBREG
+                (SPLAT_B v16i8:$ws,
+                  (COPY_TO_REGCLASS
+                    (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
+                sub_lo)),
+         GPR32),
+       (i32 24))>;
+def : MSAPat<
+  (i32 (vextract_sext_i16 v8i16:$ws, i64:$idx)),
+  (SRA (COPY_TO_REGCLASS
+         (i32 (EXTRACT_SUBREG
+                (SPLAT_H v8i16:$ws,
+                  (COPY_TO_REGCLASS
+                    (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
+                sub_lo)),
+         GPR32),
+       (i32 16))>;
+def : MSAPat<
+  (i32 (vextract_sext_i32 v4i32:$ws, i64:$idx)),
+  (COPY_TO_REGCLASS
+    (i32 (EXTRACT_SUBREG
+           (SPLAT_W v4i32:$ws,
+             (COPY_TO_REGCLASS
+               (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
+           sub_lo)),
+    GPR32)>;
+def : MSAPat<
+  (i64 (vextract_sext_i64 v2i64:$ws, i64:$idx)),
+  (COPY_TO_REGCLASS
+    (i64 (EXTRACT_SUBREG
+           (SPLAT_D v2i64:$ws,
+             (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
+           sub_64)),
+    GPR64), [HasMSA, IsGP64bit]>;
+
+def : MSAPat<
+  (i32 (vextract_zext_i8 v16i8:$ws, i64:$idx)),
+  (SRL (COPY_TO_REGCLASS
+         (i32 (EXTRACT_SUBREG
+                 (SPLAT_B v16i8:$ws,
+                   (COPY_TO_REGCLASS
+                     (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
+                 sub_lo)),
+         GPR32),
+       (i32 24))>;
+def : MSAPat<
+  (i32 (vextract_zext_i16 v8i16:$ws, i64:$idx)),
+  (SRL (COPY_TO_REGCLASS
+         (i32 (EXTRACT_SUBREG
+                (SPLAT_H v8i16:$ws,
+                  (COPY_TO_REGCLASS
+                    (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
+                sub_lo)),
+         GPR32),
+       (i32 16))>;
+def : MSAPat<
+  (i32 (vextract_zext_i32 v4i32:$ws, i64:$idx)),
+  (COPY_TO_REGCLASS
+    (i32 (EXTRACT_SUBREG
+           (SPLAT_W v4i32:$ws,
+             (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
+           sub_lo)),
+    GPR32)>;
+def : MSAPat<
+  (i64 (vextract_zext_i64 v2i64:$ws, i64:$idx)),
+  (COPY_TO_REGCLASS
+    (i64 (EXTRACT_SUBREG
+           (SPLAT_D v2i64:$ws,
+             (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
+           sub_64)),
+    GPR64),
+  [HasMSA, IsGP64bit]>;
+
+def : MSAPat<
+  (f32 (vector_extract v4f32:$ws, i64:$idx)),
+  (f32 (EXTRACT_SUBREG
+         (SPLAT_W v4f32:$ws,
+           (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
+         sub_lo))>;
+def : MSAPat<
+  (f64 (vector_extract v2f64:$ws, i64:$idx)),
+  (f64 (EXTRACT_SUBREG
+         (SPLAT_D v2f64:$ws,
+           (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
+         sub_64))>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
index 19babc7..0d1ee04 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
@@ -7,10 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MipsMachineFunction.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsInstrInfo.h"
+#include "MipsMachineFunction.h"
 #include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Function.h"
@@ -59,15 +60,7 @@ void MipsCallEntry::printCustom(raw_ostream &O) const {
 #endif
 }
 
-MipsFunctionInfo::~MipsFunctionInfo() {
-  for (StringMap<const MipsCallEntry *>::iterator
-       I = ExternalCallEntries.begin(), E = ExternalCallEntries.end(); I != E;
-       ++I)
-    delete I->getValue();
-
-  for (const auto &Entry : GlobalCallEntries)
-    delete Entry.second;
-}
+MipsFunctionInfo::~MipsFunctionInfo() {}
 
 bool MipsFunctionInfo::globalBaseRegSet() const {
   return GlobalBaseReg;
@@ -78,12 +71,19 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() {
   if (GlobalBaseReg)
     return GlobalBaseReg;
 
-  const MipsSubtarget &ST = MF.getTarget().getSubtarget<MipsSubtarget>();
+  MipsSubtarget const &STI =
+      static_cast<const MipsSubtarget &>(MF.getSubtarget());
 
   const TargetRegisterClass *RC =
-    ST.inMips16Mode() ? &Mips::CPU16RegsRegClass
-                      : ST.isABI_N64() ? &Mips::GPR64RegClass
-                                       : &Mips::GPR32RegClass;
+      STI.inMips16Mode()
+          ? &Mips::CPU16RegsRegClass
+          : STI.inMicroMipsMode()
+                ? &Mips::GPRMM16RegClass
+                : static_cast<const MipsTargetMachine &>(MF.getTarget())
+                          .getABI()
+                          .IsN64()
+                      ? &Mips::GPR64RegClass
+                      : &Mips::GPR32RegClass;
   return GlobalBaseReg = MF.getRegInfo().createVirtualRegister(RC);
 }
 
@@ -101,9 +101,10 @@ unsigned MipsFunctionInfo::getMips16SPAliasReg() {
 
 void MipsFunctionInfo::createEhDataRegsFI() {
   for (int I = 0; I < 4; ++I) {
-    const MipsSubtarget &ST = MF.getTarget().getSubtarget<MipsSubtarget>();
-    const TargetRegisterClass *RC = ST.isABI_N64() ?
-        &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+    const TargetRegisterClass *RC =
+        static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI().IsN64()
+            ? &Mips::GPR64RegClass
+            : &Mips::GPR32RegClass;
 
     EhDataRegFI[I] = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
         RC->getAlignment(), false);
@@ -116,21 +117,21 @@ bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
 }
 
 MachinePointerInfo MipsFunctionInfo::callPtrInfo(StringRef Name) {
-  const MipsCallEntry *&E = ExternalCallEntries[Name];
+  std::unique_ptr<const MipsCallEntry> &E = ExternalCallEntries[Name];
 
   if (!E)
-    E = new MipsCallEntry(Name);
+    E = llvm::make_unique<MipsCallEntry>(Name);
 
-  return MachinePointerInfo(E);
+  return MachinePointerInfo(E.get());
 }
 
 MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) {
-  const MipsCallEntry *&E = GlobalCallEntries[Val];
+  std::unique_ptr<const MipsCallEntry> &E = GlobalCallEntries[Val];
 
   if (!E)
-    E = new MipsCallEntry(Val);
+    E = llvm::make_unique<MipsCallEntry>(Val);
 
-  return MachinePointerInfo(E);
+  return MachinePointerInfo(E.get());
 }
 
 int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
index 217f307..32436ef 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
@@ -144,8 +144,9 @@ private:
   int MoveF64ViaSpillFI;
 
   /// MipsCallEntry maps.
-  StringMap<const MipsCallEntry *> ExternalCallEntries;
-  ValueMap<const GlobalValue *, const MipsCallEntry *> GlobalCallEntries;
+  StringMap<std::unique_ptr<const MipsCallEntry>> ExternalCallEntries;
+  ValueMap<const GlobalValue *, std::unique_ptr<const MipsCallEntry>>
+      GlobalCallEntries;
 };
 
 } // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
index b011e8f..b18a673 100644
--- a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -8,15 +8,36 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MipsISelDAGToDAG.h"
-#include "MipsModuleISelDAGToDAG.h"
-#include "llvm/Support/Casting.h"
+#include "Mips.h"
+#include "MipsTargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
+using namespace llvm;
+
 #define DEBUG_TYPE "mips-isel"
 
-namespace llvm {
+namespace {
+  class MipsModuleDAGToDAGISel : public MachineFunctionPass {
+  public:
+    static char ID;
+
+    explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_)
+      : MachineFunctionPass(ID), TM(TM_) {}
+
+    // Pass Name
+    const char *getPassName() const override {
+      return "MIPS DAG->DAG Pattern Instruction Selection";
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+  protected:
+    MipsTargetMachine &TM;
+  };
+
+  char MipsModuleDAGToDAGISel::ID = 0;
+}
 
 bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n");
@@ -24,13 +45,6 @@ bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
-char MipsModuleDAGToDAGISel::ID = 0;
-
-}
-
-
-llvm::FunctionPass *llvm::createMipsModuleISelDag(MipsTargetMachine &TM) {
+llvm::FunctionPass *llvm::createMipsModuleISelDagPass(MipsTargetMachine &TM) {
   return new MipsModuleDAGToDAGISel(TM);
 }
-
-
diff --git a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.h
deleted file mode 100644
index 85bae47..0000000
--- a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.h
+++ /dev/null
@@ -1,58 +0,0 @@
-//===---- MipsModuleISelDAGToDAG.h -  Change Subtarget             --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a pass used to change the subtarget for the
-// Mips Instruction selector.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MIPS_MIPSMODULEISELDAGTODAG_H
-#define LLVM_LIB_TARGET_MIPS_MIPSMODULEISELDAGTODAG_H
-
-#include "Mips.h"
-#include "MipsSubtarget.h"
-#include "MipsTargetMachine.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-
-
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MipsModuleDAGToDAGISel - MIPS specific code to select MIPS machine
-// instructions for SelectionDAG operations.
-//===----------------------------------------------------------------------===//
-namespace llvm {
-
-class MipsModuleDAGToDAGISel : public MachineFunctionPass {
-public:
-
-  static char ID;
-
-  explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_)
-      : MachineFunctionPass(ID), TM(TM_) {}
-
-  // Pass Name
-  const char *getPassName() const override {
-    return "MIPS DAG->DAG Pattern Instruction Selection";
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-protected:
-  MipsTargetMachine &TM;
-};
-
-/// createMipsISelDag - This pass converts a legalized DAG into a
-/// MIPS-specific DAG, ready for instruction scheduling.
-FunctionPass *createMipsModuleISelDag(MipsTargetMachine &TM);
-}
-
-#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp b/contrib/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
index 22c524e..7c940ee 100644
--- a/contrib/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -174,7 +174,7 @@ void MBBInfo::postVisit() {
 
 // OptimizePICCall methods.
 bool OptimizePICCall::runOnMachineFunction(MachineFunction &F) {
-  if (F.getTarget().getSubtarget<MipsSubtarget>().inMips16Mode())
+  if (static_cast<const MipsSubtarget &>(F.getSubtarget()).inMips16Mode())
     return false;
 
   // Do a pre-order traversal of the dominator tree.
diff --git a/contrib/llvm/lib/Target/Mips/MipsOptionRecord.h b/contrib/llvm/lib/Target/Mips/MipsOptionRecord.h
index f82544a..746feab 100644
--- a/contrib/llvm/lib/Target/Mips/MipsOptionRecord.h
+++ b/contrib/llvm/lib/Target/Mips/MipsOptionRecord.h
@@ -36,9 +36,8 @@ public:
 
 class MipsRegInfoRecord : public MipsOptionRecord {
 public:
-  MipsRegInfoRecord(MipsELFStreamer *S, MCContext &Context,
-                    const MCSubtargetInfo &STI)
-      : Streamer(S), Context(Context), STI(STI) {
+  MipsRegInfoRecord(MipsELFStreamer *S, MCContext &Context)
+      : Streamer(S), Context(Context) {
     ri_gprmask = 0;
     ri_cprmask[0] = ri_cprmask[1] = ri_cprmask[2] = ri_cprmask[3] = 0;
     ri_gp_value = 0;
@@ -53,7 +52,7 @@ public:
     COP2RegClass = &(TRI->getRegClass(Mips::COP2RegClassID));
     COP3RegClass = &(TRI->getRegClass(Mips::COP3RegClassID));
   }
-  ~MipsRegInfoRecord() {}
+  ~MipsRegInfoRecord() override {}
 
   void EmitMipsOptionRecord() override;
   void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo);
@@ -61,7 +60,6 @@ public:
 private:
   MipsELFStreamer *Streamer;
   MCContext &Context;
-  const MCSubtargetInfo &STI;
   const MCRegisterClass *GPR32RegClass;
   const MCRegisterClass *GPR64RegClass;
   const MCRegisterClass *FGR32RegClass;
diff --git a/contrib/llvm/lib/Target/Mips/MipsOs16.cpp b/contrib/llvm/lib/Target/Mips/MipsOs16.cpp
index 7aae964..b6cd791 100644
--- a/contrib/llvm/lib/Target/Mips/MipsOs16.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsOs16.cpp
@@ -11,14 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MipsOs16.h"
+#include "llvm/IR/Instructions.h"
+#include "Mips.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
-#define DEBUG_TYPE "mips-os16"
+using namespace llvm;
 
+#define DEBUG_TYPE "mips-os16"
 
 static cl::opt<std::string> Mips32FunctionMask(
   "mips32-function-mask",
@@ -27,70 +29,83 @@ static cl::opt<std::string> Mips32FunctionMask(
   cl::Hidden);
 
 namespace {
+  class MipsOs16 : public ModulePass {
+  public:
+    static char ID;
+
+    MipsOs16() : ModulePass(ID) {}
+
+    const char *getPassName() const override {
+      return "MIPS Os16 Optimization";
+    }
+
+    bool runOnModule(Module &M) override;
+  };
+
+  char MipsOs16::ID = 0;
+}
 
-  // Figure out if we need float point based on the function signature.
-  // We need to move variables in and/or out of floating point
-  // registers because of the ABI
-  //
-  bool needsFPFromSig(Function &F) {
-    Type* RetType = F.getReturnType();
-    switch (RetType->getTypeID()) {
+// Figure out if we need float point based on the function signature.
+// We need to move variables in and/or out of floating point
+// registers because of the ABI
+//
+static  bool needsFPFromSig(Function &F) {
+  Type* RetType = F.getReturnType();
+  switch (RetType->getTypeID()) {
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+    return true;
+  default:
+    ;
+  }
+  if (F.arg_size() >=1) {
+    Argument &Arg = F.getArgumentList().front();
+    switch (Arg.getType()->getTypeID()) {
     case Type::FloatTyID:
     case Type::DoubleTyID:
       return true;
     default:
       ;
     }
-    if (F.arg_size() >=1) {
-      Argument &Arg = F.getArgumentList().front();
-      switch (Arg.getType()->getTypeID()) {
-        case Type::FloatTyID:
-        case Type::DoubleTyID:
-          return true;
-        default:
-          ;
-      }
-    }
-    return false;
   }
+  return false;
+}
 
-  // Figure out if the function will need floating point operations
-  //
-  bool needsFP(Function &F) {
-    if (needsFPFromSig(F))
-      return true;
-    for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-      for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+// Figure out if the function will need floating point operations
+//
+static bool needsFP(Function &F) {
+  if (needsFPFromSig(F))
+    return true;
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
          I != E; ++I) {
-        const Instruction &Inst = *I;
-        switch (Inst.getOpcode()) {
-        case Instruction::FAdd:
-        case Instruction::FSub:
-        case Instruction::FMul:
-        case Instruction::FDiv:
-        case Instruction::FRem:
-        case Instruction::FPToUI:
-        case Instruction::FPToSI:
-        case Instruction::UIToFP:
-        case Instruction::SIToFP:
-        case Instruction::FPTrunc:
-        case Instruction::FPExt:
-        case Instruction::FCmp:
+      const Instruction &Inst = *I;
+      switch (Inst.getOpcode()) {
+      case Instruction::FAdd:
+      case Instruction::FSub:
+      case Instruction::FMul:
+      case Instruction::FDiv:
+      case Instruction::FRem:
+      case Instruction::FPToUI:
+      case Instruction::FPToSI:
+      case Instruction::UIToFP:
+      case Instruction::SIToFP:
+      case Instruction::FPTrunc:
+      case Instruction::FPExt:
+      case Instruction::FCmp:
+        return true;
+      default:
+        ;
+      }
+      if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+        DEBUG(dbgs() << "Working on call" << "\n");
+        Function &F_ =  *CI->getCalledFunction();
+        if (needsFPFromSig(F_))
           return true;
-        default:
-          ;
-        }
-        if (const CallInst *CI = dyn_cast<CallInst>(I)) {
-          DEBUG(dbgs() << "Working on call" << "\n");
-          Function &F_ =  *CI->getCalledFunction();
-          if (needsFPFromSig(F_))
-            return true;
-        }
       }
-    return false;
-  }
+    }
+  return false;
 }
-namespace llvm {
 
 
 bool MipsOs16::runOnModule(Module &M) {
@@ -136,12 +151,6 @@ bool MipsOs16::runOnModule(Module &M) {
   return modified;
 }
 
-char MipsOs16::ID = 0;
-
-}
-
-ModulePass *llvm::createMipsOs16(MipsTargetMachine &TM) {
+ModulePass *llvm::createMipsOs16Pass(MipsTargetMachine &TM) {
   return new MipsOs16;
 }
-
-
diff --git a/contrib/llvm/lib/Target/Mips/MipsOs16.h b/contrib/llvm/lib/Target/Mips/MipsOs16.h
deleted file mode 100644
index 77183ec..0000000
--- a/contrib/llvm/lib/Target/Mips/MipsOs16.h
+++ /dev/null
@@ -1,47 +0,0 @@
-//===---- MipsOs16.h for Mips Option -Os16                         --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an optimization phase for the MIPS target.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MIPS_MIPSOS16_H
-#define LLVM_LIB_TARGET_MIPS_MIPSOS16_H
-
-#include "MCTargetDesc/MipsMCTargetDesc.h"
-#include "MipsTargetMachine.h"
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-namespace llvm {
-
-class MipsOs16 : public ModulePass {
-
-public:
-  static char ID;
-
-  MipsOs16() : ModulePass(ID) {
-
-  }
-
-  const char *getPassName() const override {
-    return "MIPS Os16 Optimization";
-  }
-
-  bool runOnModule(Module &M) override;
-
-};
-
-ModulePass *createMipsOs16(MipsTargetMachine &TM);
-
-}
-
-#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 20ef3f3..f72fb4d 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -17,6 +17,7 @@
 #include "MipsInstrInfo.h"
 #include "MipsMachineFunction.h"
 #include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -42,15 +43,15 @@ using namespace llvm;
 #define GET_REGINFO_TARGET_DESC
 #include "MipsGenRegisterInfo.inc"
 
-MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST)
-  : MipsGenRegisterInfo(Mips::RA), Subtarget(ST) {}
+MipsRegisterInfo::MipsRegisterInfo() : MipsGenRegisterInfo(Mips::RA) {}
 
 unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
 
 const TargetRegisterClass *
 MipsRegisterInfo::getPointerRegClass(const MachineFunction &MF,
                                      unsigned Kind) const {
-  return Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+  MipsABIInfo ABI = MF.getSubtarget<MipsSubtarget>().getABI();
+  return ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
 }
 
 unsigned
@@ -81,6 +82,7 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
 /// Mips Callee Saved Registers
 const MCPhysReg *
 MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  const MipsSubtarget &Subtarget = MF->getSubtarget<MipsSubtarget>();
   if (Subtarget.isSingleFloat())
     return CSR_SingleFloatOnly_SaveList;
 
@@ -99,8 +101,10 @@ MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   return CSR_O32_SaveList;
 }
 
-const uint32_t*
-MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+const uint32_t *
+MipsRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+                                       CallingConv::ID) const {
+  const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
   if (Subtarget.isSingleFloat())
     return CSR_SingleFloatOnly_RegMask;
 
@@ -134,6 +138,7 @@ getReservedRegs(const MachineFunction &MF) const {
   };
 
   BitVector Reserved(getNumRegs());
+  const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
   typedef TargetRegisterClass::const_iterator RegIter;
 
   for (unsigned I = 0; I < array_lengthof(ReservedGPR32); ++I)
@@ -167,7 +172,7 @@ getReservedRegs(const MachineFunction &MF) const {
       Reserved.set(*Reg);
   }
   // Reserve FP if this function should have a dedicated frame pointer register.
-  if (MF.getSubtarget().getFrameLowering()->hasFP(MF)) {
+  if (Subtarget.getFrameLowering()->hasFP(MF)) {
     if (Subtarget.inMips16Mode())
       Reserved.set(Mips::S0);
     else {
@@ -256,8 +261,10 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
 
 unsigned MipsRegisterInfo::
 getFrameRegister(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-  bool IsN64 = Subtarget.isABI_N64();
+  const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
+  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+  bool IsN64 =
+      static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI().IsN64();
 
   if (Subtarget.inMips16Mode())
     return TFI->hasFP(MF) ? Mips::S0 : Mips::SP;
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
index 9ec4a38..76e84bd 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -21,15 +21,9 @@
 #include "MipsGenRegisterInfo.inc"
 
 namespace llvm {
-class MipsSubtarget;
-class Type;
-
 class MipsRegisterInfo : public MipsGenRegisterInfo {
-protected:
-  const MipsSubtarget &Subtarget;
-
 public:
-  MipsRegisterInfo(const MipsSubtarget &Subtarget);
+  MipsRegisterInfo();
 
   /// getRegisterNumbering - Given the enum value for some register, e.g.
   /// Mips::RA, return the number that it corresponds to (e.g. 31).
@@ -47,9 +41,9 @@ public:
 
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const override;
-  const MCPhysReg *
-  getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
-  const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+  const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+                                       CallingConv::ID) const override;
   static const uint32_t *getMips16RetHelperMask();
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
@@ -63,9 +57,6 @@ public:
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 
-  void processFunctionBeforeFrameFinalized(MachineFunction &MF,
-                                       RegScavenger *RS = nullptr) const;
-
   /// Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const override;
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
index 1eb8d9a..7497a25 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -302,6 +302,16 @@ def GPRMM16Zero : RegisterClass<"Mips", [i32], 32, (add
   // Return Values and Arguments
   V0, V1, A0, A1, A2, A3)>;
 
+def GPRMM16MoveP : RegisterClass<"Mips", [i32], 32, (add
+  // Reserved
+  ZERO,
+  // Callee save
+  S1,
+  // Return Values and Arguments
+  V0, V1,
+  // Callee save
+  S0, S2, S3, S4)>;
+
 def GPR64 : RegisterClass<"Mips", [i64], 64, (add
 // Reserved
   ZERO_64, AT_64,
@@ -461,6 +471,11 @@ def GPRMM16AsmOperandZero : MipsAsmRegOperand {
   let PredicateMethod = "isMM16AsmRegZero";
 }
 
+def GPRMM16AsmOperandMoveP : MipsAsmRegOperand {
+  let Name = "GPRMM16AsmRegMoveP";
+  let PredicateMethod = "isMM16AsmRegMoveP";
+}
+
 def ACC64DSPAsmOperand : MipsAsmRegOperand {
   let Name = "ACC64DSPAsmReg";
   let PredicateMethod = "isACCAsmReg";
@@ -524,6 +539,10 @@ def GPRMM16OpndZero : RegisterOperand<GPRMM16Zero> {
   let ParserMatchClass = GPRMM16AsmOperandZero;
 }
 
+def GPRMM16OpndMoveP : RegisterOperand<GPRMM16MoveP> {
+  let ParserMatchClass = GPRMM16AsmOperandMoveP;
+}
+
 def GPR64Opnd : RegisterOperand<GPR64> {
   let ParserMatchClass = GPR64AsmOperand;
 }
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index 97d9edf..19efa59 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -71,11 +71,17 @@ private:
 
   MachineFunction &MF;
   MachineRegisterInfo &MRI;
+  const MipsSubtarget &Subtarget;
+  const MipsSEInstrInfo &TII;
+  const MipsRegisterInfo &RegInfo;
 };
 }
 
 ExpandPseudo::ExpandPseudo(MachineFunction &MF_)
-  : MF(MF_), MRI(MF.getRegInfo()) {}
+    : MF(MF_), MRI(MF.getRegInfo()),
+      Subtarget(static_cast<const MipsSubtarget &>(MF.getSubtarget())),
+      TII(*static_cast<const MipsSEInstrInfo *>(Subtarget.getInstrInfo())),
+      RegInfo(*Subtarget.getRegisterInfo()) {}
 
 bool ExpandPseudo::expand() {
   bool Expanded = false;
@@ -146,11 +152,6 @@ void ExpandPseudo::expandLoadCCond(MachineBasicBlock &MBB, Iter I) {
 
   assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
 
-  const MipsSEInstrInfo &TII =
-      *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
-                                        MF.getSubtarget().getRegisterInfo());
-
   const TargetRegisterClass *RC = RegInfo.intRegClass(4);
   unsigned VR = MRI.createVirtualRegister(RC);
   unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
@@ -166,11 +167,6 @@ void ExpandPseudo::expandStoreCCond(MachineBasicBlock &MBB, Iter I) {
 
   assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
 
-  const MipsSEInstrInfo &TII =
-      *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
-                                        MF.getSubtarget().getRegisterInfo());
-
   const TargetRegisterClass *RC = RegInfo.intRegClass(4);
   unsigned VR = MRI.createVirtualRegister(RC);
   unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
@@ -189,11 +185,6 @@ void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I,
 
   assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
 
-  const MipsSEInstrInfo &TII =
-      *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
-                                        MF.getSubtarget().getRegisterInfo());
-
   const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
   unsigned VR0 = MRI.createVirtualRegister(RC);
   unsigned VR1 = MRI.createVirtualRegister(RC);
@@ -219,11 +210,6 @@ void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I,
 
   assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
 
-  const MipsSEInstrInfo &TII =
-      *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
-                                        MF.getSubtarget().getRegisterInfo());
-
   const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
   unsigned VR0 = MRI.createVirtualRegister(RC);
   unsigned VR1 = MRI.createVirtualRegister(RC);
@@ -254,11 +240,6 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I,
   //  mfhi $vr1, src
   //  copy dst_hi, $vr1
 
-  const MipsSEInstrInfo &TII =
-      *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
-                                        MF.getSubtarget().getRegisterInfo());
-
   unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
   unsigned VRegSize = RegInfo.getMinimalPhysRegClass(Dst)->getSize() / 2;
   const TargetRegisterClass *RC = RegInfo.intRegClass(VRegSize);
@@ -298,16 +279,8 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
   // register). Unfortunately, we have to make this decision before register
   // allocation so for now we use a spill/reload sequence for all
   // double-precision values in regardless of being an odd/even register.
-
-  const TargetMachine &TM = MF.getTarget();
-  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
   if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) ||
       (FP64 && !Subtarget.useOddSPReg())) {
-    const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>(
-                                     TM.getSubtargetImpl()->getInstrInfo());
-    const MipsRegisterInfo &TRI = *static_cast<const MipsRegisterInfo *>(
-                                      TM.getSubtargetImpl()->getRegisterInfo());
-
     unsigned DstReg = I->getOperand(0).getReg();
     unsigned LoReg = I->getOperand(1).getReg();
     unsigned HiReg = I->getOperand(2).getReg();
@@ -327,11 +300,11 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
     int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC2);
     if (!Subtarget.isLittle())
       std::swap(LoReg, HiReg);
-    TII.storeRegToStack(MBB, I, LoReg, I->getOperand(1).isKill(), FI, RC, &TRI,
-                        0);
-    TII.storeRegToStack(MBB, I, HiReg, I->getOperand(2).isKill(), FI, RC, &TRI,
-                        4);
-    TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &TRI, 0);
+    TII.storeRegToStack(MBB, I, LoReg, I->getOperand(1).isKill(), FI, RC,
+                        &RegInfo, 0);
+    TII.storeRegToStack(MBB, I, HiReg, I->getOperand(2).isKill(), FI, RC,
+                        &RegInfo, 4);
+    TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &RegInfo, 0);
     return true;
   }
 
@@ -359,15 +332,8 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
   // allocation so for now we use a spill/reload sequence for all
   // double-precision values in regardless of being an odd/even register.
 
-  const TargetMachine &TM = MF.getTarget();
-  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
   if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) ||
       (FP64 && !Subtarget.useOddSPReg())) {
-    const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>(
-                                     TM.getSubtargetImpl()->getInstrInfo());
-    const MipsRegisterInfo &TRI = *static_cast<const MipsRegisterInfo *>(
-                                      TM.getSubtargetImpl()->getRegisterInfo());
-
     unsigned DstReg = I->getOperand(0).getReg();
     unsigned SrcReg = I->getOperand(1).getReg();
     unsigned N = I->getOperand(2).getImm();
@@ -386,9 +352,9 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
     // We re-use the same spill slot each time so that the stack frame doesn't
     // grow too much in functions with a large number of moves.
     int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC);
-    TII.storeRegToStack(MBB, I, SrcReg, I->getOperand(1).isKill(), FI, RC, &TRI,
-                        0);
-    TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &TRI, Offset);
+    TII.storeRegToStack(MBB, I, SrcReg, I->getOperand(1).isKill(), FI, RC,
+                        &RegInfo, 0);
+    TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &RegInfo, Offset);
     return true;
   }
 
@@ -398,33 +364,24 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
 MipsSEFrameLowering::MipsSEFrameLowering(const MipsSubtarget &STI)
     : MipsFrameLowering(STI, STI.stackAlignment()) {}
 
-unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const {
-  static const unsigned EhDataReg[] = {
-    Mips::A0, Mips::A1, Mips::A2, Mips::A3
-  };
-  static const unsigned EhDataReg64[] = {
-    Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64
-  };
-
-  return STI.isABI_N64() ? EhDataReg64[I] : EhDataReg[I];
-}
-
-void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB   = MF.front();
+void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
+                                       MachineBasicBlock &MBB) const {
+  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
   MachineFrameInfo *MFI    = MF.getFrameInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
   const MipsSEInstrInfo &TII =
-      *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
-                                        MF.getSubtarget().getRegisterInfo());
+      *static_cast<const MipsSEInstrInfo *>(STI.getInstrInfo());
+  const MipsRegisterInfo &RegInfo =
+      *static_cast<const MipsRegisterInfo *>(STI.getRegisterInfo());
 
   MachineBasicBlock::iterator MBBI = MBB.begin();
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-  unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
-  unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
-  unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
-  unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+  MipsABIInfo ABI = STI.getABI();
+  unsigned SP = ABI.GetStackPtr();
+  unsigned FP = ABI.GetFramePtr();
+  unsigned ZERO = ABI.GetNullPtr();
+  unsigned ADDu = ABI.GetPtrAdduOp();
 
   // First, compute final stack size.
   uint64_t StackSize = MFI->getStackSize();
@@ -507,21 +464,21 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
   }
 
   if (MipsFI->callsEhReturn()) {
-    const TargetRegisterClass *RC = STI.isABI_N64() ?
-        &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+    const TargetRegisterClass *PtrRC =
+        ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
 
     // Insert instructions that spill eh data registers.
     for (int I = 0; I < 4; ++I) {
-      if (!MBB.isLiveIn(ehDataReg(I)))
-        MBB.addLiveIn(ehDataReg(I));
-      TII.storeRegToStackSlot(MBB, MBBI, ehDataReg(I), false,
-                              MipsFI->getEhDataRegFI(I), RC, &RegInfo);
+      if (!MBB.isLiveIn(ABI.GetEhDataReg(I)))
+        MBB.addLiveIn(ABI.GetEhDataReg(I));
+      TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false,
+                              MipsFI->getEhDataRegFI(I), PtrRC, &RegInfo);
     }
 
     // Emit .cfi_offset directives for eh data registers.
     for (int I = 0; I < 4; ++I) {
       int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I));
-      unsigned Reg = MRI->getDwarfRegNum(ehDataReg(I), true);
+      unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true);
       unsigned CFIIndex = MMI.addFrameInst(
           MCCFIInstruction::createOffset(nullptr, Reg, Offset));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -550,15 +507,16 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
   const MipsSEInstrInfo &TII =
-      *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
-                                        MF.getSubtarget().getRegisterInfo());
+      *static_cast<const MipsSEInstrInfo *>(STI.getInstrInfo());
+  const MipsRegisterInfo &RegInfo =
+      *static_cast<const MipsRegisterInfo *>(STI.getRegisterInfo());
 
   DebugLoc dl = MBBI->getDebugLoc();
-  unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
-  unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
-  unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
-  unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+  MipsABIInfo ABI = STI.getABI();
+  unsigned SP = ABI.GetStackPtr();
+  unsigned FP = ABI.GetFramePtr();
+  unsigned ZERO = ABI.GetNullPtr();
+  unsigned ADDu = ABI.GetPtrAdduOp();
 
   // if framepointer enabled, restore the stack pointer.
   if (hasFP(MF)) {
@@ -573,8 +531,8 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
   }
 
   if (MipsFI->callsEhReturn()) {
-    const TargetRegisterClass *RC = STI.isABI_N64() ?
-        &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+    const TargetRegisterClass *RC =
+        ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
 
     // Find first instruction that restores a callee-saved register.
     MachineBasicBlock::iterator I = MBBI;
@@ -583,8 +541,8 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
 
     // Insert instructions that restore eh data registers.
     for (int J = 0; J < 4; ++J) {
-      TII.loadRegFromStackSlot(MBB, I, ehDataReg(J), MipsFI->getEhDataRegFI(J),
-                               RC, &RegInfo);
+      TII.loadRegFromStackSlot(MBB, I, ABI.GetEhDataReg(J),
+                               MipsFI->getEhDataRegFI(J), RC, &RegInfo);
     }
   }
 
@@ -605,7 +563,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                           const TargetRegisterInfo *TRI) const {
   MachineFunction *MF = MBB.getParent();
   MachineBasicBlock *EntryBlock = MF->begin();
-  const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
 
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     // Add the callee-saved register as live-in. Do not add if the register is
@@ -641,32 +599,13 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
     !MFI->hasVarSizedObjects();
 }
 
-// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
-void MipsSEFrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  const MipsSEInstrInfo &TII =
-      *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
-
-  if (!hasReservedCallFrame(MF)) {
-    int64_t Amount = I->getOperand(0).getImm();
-
-    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
-      Amount = -Amount;
-
-    unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
-    TII.adjustStackPtr(SP, Amount, MBB, I);
-  }
-
-  MBB.erase(I);
-}
-
 void MipsSEFrameLowering::
 processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                      RegScavenger *RS) const {
   MachineRegisterInfo &MRI = MF.getRegInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-  unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+  MipsABIInfo ABI = STI.getABI();
+  unsigned FP = ABI.GetFramePtr();
 
   // Mark $fp as used if function has dedicated frame pointer.
   if (hasFP(MF))
@@ -695,8 +634,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   if (isInt<16>(MaxSPOffset))
     return;
 
-  const TargetRegisterClass *RC = STI.isABI_N64() ?
-    &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+  const TargetRegisterClass *RC =
+      ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
   int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
                                                 RC->getAlignment(), false);
   RS->addScavengingFrameIndex(FI);
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
index 0eca1df..2fcd6bb 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
@@ -24,13 +24,9 @@ public:
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
-  void emitPrologue(MachineFunction &MF) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                  MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I) const override;
-
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 8c2620c..990a2f8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
 #define DEBUG_TYPE "mips-isel"
 
 bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &TM.getSubtarget<MipsSubtarget>();
+  Subtarget = &static_cast<const MipsSubtarget &>(MF.getSubtarget());
   if (Subtarget->inMips16Mode())
     return false;
   return MipsDAGToDAGISel::runOnMachineFunction(MF);
@@ -130,17 +130,17 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
   MachineBasicBlock &MBB = MF.front();
   MachineBasicBlock::iterator I = MBB.begin();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
   DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
   unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
   const TargetRegisterClass *RC;
-
-  RC = (Subtarget->isABI_N64()) ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+  const MipsABIInfo &ABI = static_cast<const MipsTargetMachine &>(TM).getABI();
+  RC = (ABI.IsN64()) ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
 
   V0 = RegInfo.createVirtualRegister(RC);
   V1 = RegInfo.createVirtualRegister(RC);
 
-  if (Subtarget->isABI_N64()) {
+  if (ABI.IsN64()) {
     MF.getRegInfo().addLiveIn(Mips::T9_64);
     MBB.addLiveIn(Mips::T9_64);
 
@@ -172,7 +172,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
   MF.getRegInfo().addLiveIn(Mips::T9);
   MBB.addLiveIn(Mips::T9);
 
-  if (Subtarget->isABI_N32()) {
+  if (ABI.IsN32()) {
     // lui $v0, %hi(%neg(%gp_rel(fname)))
     // addu $v1, $v0, $t9
     // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
@@ -185,7 +185,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
     return;
   }
 
-  assert(Subtarget->isABI_O32());
+  assert(ABI.IsO32());
 
   // For O32 ABI, the following instruction sequence is emitted to initialize
   // the global base register:
@@ -253,9 +253,10 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
     // that SLTu64 produces an i32. We need to fix this in the long run but for
     // now, just make the DAG type-correct by asserting the upper bits are zero.
     Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT,
-                                   CurDAG->getTargetConstant(0, VT),
+                                   CurDAG->getTargetConstant(0, DL, VT),
                                    SDValue(Carry, 0),
-                                   CurDAG->getTargetConstant(Mips::sub_32, VT));
+                                   CurDAG->getTargetConstant(Mips::sub_32, DL,
+                                                             VT));
   }
 
   // Generate a second addition only if we know that RHS is not a
@@ -276,7 +277,7 @@ bool MipsSEDAGToDAGISel::selectAddrFrameIndex(SDValue Addr, SDValue &Base,
     EVT ValTy = Addr.getValueType();
 
     Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
-    Offset = CurDAG->getTargetConstant(0, ValTy);
+    Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), ValTy);
     return true;
   }
   return false;
@@ -298,7 +299,8 @@ bool MipsSEDAGToDAGISel::selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base,
       else
         Base = Addr.getOperand(0);
 
-      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr),
+                                         ValTy);
       return true;
     }
   }
@@ -372,7 +374,7 @@ bool MipsSEDAGToDAGISel::selectAddrRegReg(SDValue Addr, SDValue &Base,
 bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
                                            SDValue &Offset) const {
   Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
+  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getValueType());
   return true;
 }
 
@@ -382,6 +384,17 @@ bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
     selectAddrDefault(Addr, Base, Offset);
 }
 
+bool MipsSEDAGToDAGISel::selectAddrRegImm9(SDValue Addr, SDValue &Base,
+                                           SDValue &Offset) const {
+  if (selectAddrFrameIndex(Addr, Base, Offset))
+    return true;
+
+  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 9))
+    return true;
+
+  return false;
+}
+
 bool MipsSEDAGToDAGISel::selectAddrRegImm10(SDValue Addr, SDValue &Base,
                                             SDValue &Offset) const {
   if (selectAddrFrameIndex(Addr, Base, Offset))
@@ -405,12 +418,45 @@ bool MipsSEDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base,
   return false;
 }
 
+bool MipsSEDAGToDAGISel::selectAddrRegImm16(SDValue Addr, SDValue &Base,
+                                            SDValue &Offset) const {
+  if (selectAddrFrameIndex(Addr, Base, Offset))
+    return true;
+
+  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 16))
+    return true;
+
+  return false;
+}
+
 bool MipsSEDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base,
                                          SDValue &Offset) const {
   return selectAddrRegImm12(Addr, Base, Offset) ||
     selectAddrDefault(Addr, Base, Offset);
 }
 
+bool MipsSEDAGToDAGISel::selectIntAddrLSL2MM(SDValue Addr, SDValue &Base,
+                                             SDValue &Offset) const {
+  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 7)) {
+    if (isa<FrameIndexSDNode>(Base))
+      return false;
+
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Offset)) {
+      unsigned CnstOff = CN->getZExtValue();
+      return (CnstOff == (CnstOff & 0x3c));
+    }
+
+    return false;
+  }
+
+  // For all other cases where "lw" would be selected, don't select "lw16"
+  // because it would result in additional instructions to prepare operands.
+  if (selectAddrRegImm(Addr, Base, Offset))
+    return false;
+
+  return selectAddrDefault(Addr, Base, Offset);
+}
+
 bool MipsSEDAGToDAGISel::selectIntAddrMSA(SDValue Addr, SDValue &Base,
                                           SDValue &Offset) const {
   if (selectAddrRegImm10(Addr, Base, Offset))
@@ -427,7 +473,8 @@ bool MipsSEDAGToDAGISel::selectIntAddrMSA(SDValue Addr, SDValue &Base,
 // Returns true and sets Imm if:
 // * MSA is enabled
 // * N is a ISD::BUILD_VECTOR representing a constant splat
-bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
+bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm,
+                                      unsigned MinSizeInBits) const {
   if (!Subtarget->hasMSA())
     return false;
 
@@ -440,9 +487,8 @@ bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
   unsigned SplatBitSize;
   bool HasAnyUndefs;
 
-  if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
-                             HasAnyUndefs, 8,
-                             !Subtarget->isLittle()))
+  if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
+                             MinSizeInBits, !Subtarget->isLittle()))
     return false;
 
   Imm = SplatValue;
@@ -475,11 +521,12 @@ selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
   if (N->getOpcode() == ISD::BITCAST)
     N = N->getOperand(0);
 
-  if (selectVSplat (N.getNode(), ImmValue) &&
+  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
       ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+
     if (( Signed && ImmValue.isSignedIntN(ImmBitSize)) ||
         (!Signed && ImmValue.isIntN(ImmBitSize))) {
-      Imm = CurDAG->getTargetConstant(ImmValue, EltTy);
+      Imm = CurDAG->getTargetConstant(ImmValue, SDLoc(N), EltTy);
       return true;
     }
   }
@@ -550,12 +597,12 @@ bool MipsSEDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const {
   if (N->getOpcode() == ISD::BITCAST)
     N = N->getOperand(0);
 
-  if (selectVSplat (N.getNode(), ImmValue) &&
+  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
       ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
     int32_t Log2 = ImmValue.exactLogBase2();
 
     if (Log2 != -1) {
-      Imm = CurDAG->getTargetConstant(Log2, EltTy);
+      Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy);
       return true;
     }
   }
@@ -581,14 +628,15 @@ bool MipsSEDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const {
   if (N->getOpcode() == ISD::BITCAST)
     N = N->getOperand(0);
 
-  if (selectVSplat(N.getNode(), ImmValue) &&
+  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
       ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
     // Extract the run of set bits starting with bit zero from the bitwise
     // inverse of ImmValue, and test that the inverse of this is the same
     // as the original value.
     if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) {
 
-      Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), EltTy);
+      Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), SDLoc(N),
+                                      EltTy);
       return true;
     }
   }
@@ -614,12 +662,13 @@ bool MipsSEDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
   if (N->getOpcode() == ISD::BITCAST)
     N = N->getOperand(0);
 
-  if (selectVSplat(N.getNode(), ImmValue) &&
+  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
       ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
     // Extract the run of set bits starting with bit zero, and test that the
     // result is the same as the original value
     if (ImmValue == (ImmValue & ~(ImmValue + 1))) {
-      Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), EltTy);
+      Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), SDLoc(N),
+                                      EltTy);
       return true;
     }
   }
@@ -635,12 +684,12 @@ bool MipsSEDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N,
   if (N->getOpcode() == ISD::BITCAST)
     N = N->getOperand(0);
 
-  if (selectVSplat(N.getNode(), ImmValue) &&
+  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
       ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
     int32_t Log2 = (~ImmValue).exactLogBase2();
 
     if (Log2 != -1) {
-      Imm = CurDAG->getTargetConstant(Log2, EltTy);
+      Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy);
       return true;
     }
   }
@@ -718,7 +767,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
     SDLoc DL(CN);
     SDNode *RegOpnd;
     SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
-                                                MVT::i64);
+                                                DL, MVT::i64);
 
     // The first instruction can be a LUi which is different from other
     // instructions (ADDiu, ORI and SLL) in that it does not have a register
@@ -733,7 +782,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
 
     // The remaining instructions in the sequence are handled here.
     for (++Inst; Inst != Seq.end(); ++Inst) {
-      ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+      ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), DL,
                                           MVT::i64);
       RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
                                        SDValue(RegOpnd, 0), ImmOpnd);
@@ -804,7 +853,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
     }
 
     SDNode *Rdhwr =
-      CurDAG->getMachineNode(RdhwrOpc, SDLoc(Node),
+      CurDAG->getMachineNode(RdhwrOpc, DL,
                              Node->getValueType(0),
                              CurDAG->getRegister(Mips::HWR29, MVT::i32));
     SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg,
@@ -867,10 +916,10 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
     if (!SplatValue.isSignedIntN(10))
       return std::make_pair(false, nullptr);
 
-    SDValue Imm = CurDAG->getTargetConstant(SplatValue,
+    SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL,
                                             ViaVecTy.getVectorElementType());
 
-    SDNode *Res = CurDAG->getMachineNode(LdiOp, SDLoc(Node), ViaVecTy, Imm);
+    SDNode *Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm);
 
     if (ResVecTy != ViaVecTy) {
       // If LdiOp is writing to a different register class to ResVecTy, then
@@ -880,9 +929,9 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
       const TargetLowering *TLI = getTargetLowering();
       MVT ResVecTySimple = ResVecTy.getSimpleVT();
       const TargetRegisterClass *RC = TLI->getRegClassFor(ResVecTySimple);
-      Res = CurDAG->getMachineNode(Mips::COPY_TO_REGCLASS, SDLoc(Node),
+      Res = CurDAG->getMachineNode(Mips::COPY_TO_REGCLASS, DL,
                                    ResVecTy, SDValue(Res, 0),
-                                   CurDAG->getTargetConstant(RC->getID(),
+                                   CurDAG->getTargetConstant(RC->getID(), DL,
                                                              MVT::i32));
     }
 
@@ -894,6 +943,73 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
   return std::make_pair(false, nullptr);
 }
 
+bool MipsSEDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+                             std::vector<SDValue> &OutOps) {
+  SDValue Base, Offset;
+
+  switch(ConstraintID) {
+  default:
+    llvm_unreachable("Unexpected asm memory constraint");
+  // All memory constraints can at least accept raw pointers.
+  case InlineAsm::Constraint_i:
+    OutOps.push_back(Op);
+    OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
+    return false;
+  case InlineAsm::Constraint_m:
+    if (selectAddrRegImm16(Op, Base, Offset)) {
+      OutOps.push_back(Base);
+      OutOps.push_back(Offset);
+      return false;
+    }
+    OutOps.push_back(Op);
+    OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
+    return false;
+  case InlineAsm::Constraint_R:
+    // The 'R' constraint is supposed to be much more complicated than this.
+    // However, it's becoming less useful due to architectural changes and
+    // ought to be replaced by other constraints such as 'ZC'.
+    // For now, support 9-bit signed offsets which is supportable by all
+    // subtargets for all instructions.
+    if (selectAddrRegImm9(Op, Base, Offset)) {
+      OutOps.push_back(Base);
+      OutOps.push_back(Offset);
+      return false;
+    }
+    OutOps.push_back(Op);
+    OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
+    return false;
+  case InlineAsm::Constraint_ZC:
+    // ZC matches whatever the pref, ll, and sc instructions can handle for the
+    // given subtarget.
+    if (Subtarget->inMicroMipsMode()) {
+      // On microMIPS, they can handle 12-bit offsets.
+      if (selectAddrRegImm12(Op, Base, Offset)) {
+        OutOps.push_back(Base);
+        OutOps.push_back(Offset);
+        return false;
+      }
+    } else if (Subtarget->hasMips32r6()) {
+      // On MIPS32r6/MIPS64r6, they can only handle 9-bit offsets.
+      if (selectAddrRegImm9(Op, Base, Offset)) {
+        OutOps.push_back(Base);
+        OutOps.push_back(Offset);
+        return false;
+      }
+    } else if (selectAddrRegImm16(Op, Base, Offset)) {
+      // Prior to MIPS32r6/MIPS64r6, they can handle 16-bit offsets.
+      OutOps.push_back(Base);
+      OutOps.push_back(Offset);
+      return false;
+    }
+    // In all cases, 0-bit offsets are acceptable.
+    OutOps.push_back(Op);
+    OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
+    return false;
+  }
+  return true;
+}
+
 FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) {
   return new MipsSEDAGToDAGISel(TM);
 }
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 2e11fa7..a894034 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -56,20 +56,30 @@ private:
   bool selectIntAddr(SDValue Addr, SDValue &Base,
                      SDValue &Offset) const override;
 
+  bool selectAddrRegImm9(SDValue Addr, SDValue &Base,
+                         SDValue &Offset) const;
+
   bool selectAddrRegImm10(SDValue Addr, SDValue &Base,
                           SDValue &Offset) const;
 
   bool selectAddrRegImm12(SDValue Addr, SDValue &Base,
                           SDValue &Offset) const;
 
+  bool selectAddrRegImm16(SDValue Addr, SDValue &Base,
+                          SDValue &Offset) const;
+
   bool selectIntAddrMM(SDValue Addr, SDValue &Base,
                        SDValue &Offset) const override;
 
+  bool selectIntAddrLSL2MM(SDValue Addr, SDValue &Base,
+                           SDValue &Offset) const override;
+
   bool selectIntAddrMSA(SDValue Addr, SDValue &Base,
                         SDValue &Offset) const override;
 
   /// \brief Select constant vector splats.
-  bool selectVSplat(SDNode *N, APInt &Imm) const override;
+  bool selectVSplat(SDNode *N, APInt &Imm,
+                    unsigned MinSizeInBits) const override;
   /// \brief Select constant vector splats whose value fits in a given integer.
   bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
                                   unsigned ImmBitSize) const;
@@ -108,6 +118,10 @@ private:
   // Insert instructions to initialize the global base register in the
   // first MBB of the function.
   void initGlobalBaseReg(MachineFunction &MF);
+
+  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                    unsigned ConstraintID,
+                                    std::vector<SDValue> &OutOps) override;
 };
 
 FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM);
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 2c033ce..ae2837a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -99,7 +99,7 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
     setTargetDAGCombine(ISD::XOR);
   }
 
-  if (!Subtarget.abiUsesSoftFloat()) {
+  if (!Subtarget.useSoftFloat()) {
     addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
 
     // When dealing with single precision only, use libcalls
@@ -224,7 +224,7 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
     setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
   }
 
-  computeRegisterProperties();
+  computeRegisterProperties(Subtarget.getRegisterInfo());
 }
 
 const MipsTargetLowering *
@@ -800,7 +800,7 @@ static SDValue genConstMult(SDValue X, uint64_t C, SDLoc DL, EVT VT,
 
   // Return 0.
   if (C == 0)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, DL, VT);
 
   // Return x.
   if (C == 1)
@@ -809,7 +809,7 @@ static SDValue genConstMult(SDValue X, uint64_t C, SDLoc DL, EVT VT,
   // If c is power of 2, return (shl x, log2(c)).
   if (isPowerOf2_64(C))
     return DAG.getNode(ISD::SHL, DL, VT, X,
-                       DAG.getConstant(Log2_64(C), ShiftTy));
+                       DAG.getConstant(Log2_64(C), DL, ShiftTy));
 
   unsigned Log2Ceil = Log2_64_Ceil(C);
   uint64_t Floor = 1LL << Log2_64(C);
@@ -864,8 +864,9 @@ static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
       (SplatValue.getZExtValue() >= EltSize))
     return SDValue();
 
-  return DAG.getNode(Opc, SDLoc(N), Ty, N->getOperand(0),
-                     DAG.getConstant(SplatValue.getZExtValue(), MVT::i32));
+  SDLoc DL(N);
+  return DAG.getNode(Opc, DL, Ty, N->getOperand(0),
+                     DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32));
 }
 
 static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
@@ -1145,16 +1146,22 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case Mips::INSERT_FD_PSEUDO:
     return emitINSERT_FD(MI, BB);
   case Mips::INSERT_B_VIDX_PSEUDO:
+  case Mips::INSERT_B_VIDX64_PSEUDO:
     return emitINSERT_DF_VIDX(MI, BB, 1, false);
   case Mips::INSERT_H_VIDX_PSEUDO:
+  case Mips::INSERT_H_VIDX64_PSEUDO:
     return emitINSERT_DF_VIDX(MI, BB, 2, false);
   case Mips::INSERT_W_VIDX_PSEUDO:
+  case Mips::INSERT_W_VIDX64_PSEUDO:
     return emitINSERT_DF_VIDX(MI, BB, 4, false);
   case Mips::INSERT_D_VIDX_PSEUDO:
+  case Mips::INSERT_D_VIDX64_PSEUDO:
     return emitINSERT_DF_VIDX(MI, BB, 8, false);
   case Mips::INSERT_FW_VIDX_PSEUDO:
+  case Mips::INSERT_FW_VIDX64_PSEUDO:
     return emitINSERT_DF_VIDX(MI, BB, 4, true);
   case Mips::INSERT_FD_VIDX_PSEUDO:
+  case Mips::INSERT_FD_VIDX64_PSEUDO:
     return emitINSERT_DF_VIDX(MI, BB, 8, true);
   case Mips::FILL_FW_PSEUDO:
     return emitFILL_FW(MI, BB);
@@ -1212,7 +1219,7 @@ SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
                            Nd.getAlignment());
 
   // i32 load from higher address.
-  Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
+  Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
   SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr,
                            MachinePointerInfo(), Nd.isVolatile(),
                            Nd.isNonTemporal(), Nd.isInvariant(),
@@ -1237,9 +1244,9 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
   EVT PtrVT = Ptr.getValueType();
   SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
-                           Val, DAG.getConstant(0, MVT::i32));
+                           Val, DAG.getConstant(0, DL, MVT::i32));
   SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
-                           Val, DAG.getConstant(1, MVT::i32));
+                           Val, DAG.getConstant(1, DL, MVT::i32));
 
   if (!Subtarget.isLittle())
     std::swap(Lo, Hi);
@@ -1250,7 +1257,7 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
                        Nd.getAAInfo());
 
   // i32 store to higher address.
-  Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
+  Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
   return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
                       Nd.isVolatile(), Nd.isNonTemporal(),
                       std::min(Nd.getAlignment(), 4U), Nd.getAAInfo());
@@ -1283,9 +1290,9 @@ SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
 
 static SDValue initAccumulator(SDValue In, SDLoc DL, SelectionDAG &DAG) {
   SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
-                             DAG.getConstant(0, MVT::i32));
+                             DAG.getConstant(0, DL, MVT::i32));
   SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
-                             DAG.getConstant(1, MVT::i32));
+                             DAG.getConstant(1, DL, MVT::i32));
   return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi);
 }
 
@@ -1381,7 +1388,7 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
   SDValue LaneB = Op->getOperand(2);
 
   if (ResVecTy == MVT::v2i64) {
-    LaneA = DAG.getConstant(0, MVT::i32);
+    LaneA = DAG.getConstant(0, DL, MVT::i32);
     ViaVecTy = MVT::v4i32;
   } else
     LaneA = LaneB;
@@ -1399,7 +1406,8 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
 }
 
 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) {
-  return DAG.getConstant(Op->getConstantOperandVal(ImmOp), Op->getValueType(0));
+  return DAG.getConstant(Op->getConstantOperandVal(ImmOp), SDLoc(Op),
+                         Op->getValueType(0));
 }
 
 static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
@@ -1415,7 +1423,7 @@ static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
 
     SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
     SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
-                              DAG.getConstant(32, MVT::i32));
+                              DAG.getConstant(32, DL, MVT::i32));
     SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
   }
 
@@ -1451,8 +1459,9 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
     if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) {
       APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
 
-      SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), MVT::i32);
-      SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), MVT::i32);
+      SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL,
+                                           MVT::i32);
+      SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32);
 
       if (BigEndian)
         std::swap(BitImmLoOp, BitImmHiOp);
@@ -1474,8 +1483,8 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
 
     Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG);
 
-    Exp2Imm =
-        DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, VecTy), Exp2Imm);
+    Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy),
+                          Exp2Imm);
   }
 
   return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
@@ -1484,7 +1493,7 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
 static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
   EVT ResTy = Op->getValueType(0);
   SDLoc DL(Op);
-  SDValue One = DAG.getConstant(1, ResTy);
+  SDValue One = DAG.getConstant(1, DL, ResTy);
   SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2));
 
   return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
@@ -1496,7 +1505,7 @@ static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
   EVT ResTy = Op->getValueType(0);
   APInt BitImm = APInt(ResTy.getVectorElementType().getSizeInBits(), 1)
                  << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue();
-  SDValue BitMask = DAG.getConstant(~BitImm, ResTy);
+  SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy);
 
   return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
 }
@@ -1578,8 +1587,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
     APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(),
                                        Op->getConstantOperandVal(3));
     return DAG.getNode(ISD::VSELECT, DL, VecTy,
-                       DAG.getConstant(Mask, VecTy, true), Op->getOperand(2),
-                       Op->getOperand(1));
+                       DAG.getConstant(Mask, DL, VecTy, true),
+                       Op->getOperand(2), Op->getOperand(1));
   }
   case Intrinsic::mips_binsri_b:
   case Intrinsic::mips_binsri_h:
@@ -1591,8 +1600,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
     APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(),
                                       Op->getConstantOperandVal(3));
     return DAG.getNode(ISD::VSELECT, DL, VecTy,
-                       DAG.getConstant(Mask, VecTy, true), Op->getOperand(2),
-                       Op->getOperand(1));
+                       DAG.getConstant(Mask, DL, VecTy, true),
+                       Op->getOperand(2), Op->getOperand(1));
   }
   case Intrinsic::mips_bmnz_v:
     return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
@@ -1613,7 +1622,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_bneg_w:
   case Intrinsic::mips_bneg_d: {
     EVT VecTy = Op->getValueType(0);
-    SDValue One = DAG.getConstant(1, VecTy);
+    SDValue One = DAG.getConstant(1, DL, VecTy);
 
     return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
                        DAG.getNode(ISD::SHL, DL, VecTy, One,
@@ -1649,7 +1658,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_bset_w:
   case Intrinsic::mips_bset_d: {
     EVT VecTy = Op->getValueType(0);
-    SDValue One = DAG.getConstant(1, VecTy);
+    SDValue One = DAG.getConstant(1, DL, VecTy);
 
     return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
                        DAG.getNode(ISD::SHL, DL, VecTy, One,
@@ -1836,11 +1845,9 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_fill_h:
   case Intrinsic::mips_fill_w:
   case Intrinsic::mips_fill_d: {
-    SmallVector<SDValue, 16> Ops;
     EVT ResTy = Op->getValueType(0);
-
-    for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i)
-      Ops.push_back(Op->getOperand(1));
+    SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(),
+                                 Op->getOperand(1));
 
     // If ResTy is v2i64 then the type legalizer will break this node down into
     // an equivalent v4i32.
@@ -1925,7 +1932,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_insve_d:
     return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0),
                        Op->getOperand(1), Op->getOperand(2), Op->getOperand(3),
-                       DAG.getConstant(0, MVT::i32));
+                       DAG.getConstant(0, DL, MVT::i32));
   case Intrinsic::mips_ldi_b:
   case Intrinsic::mips_ldi_h:
   case Intrinsic::mips_ldi_w:
@@ -2291,9 +2298,9 @@ lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
 static bool isConstantOrUndef(const SDValue Op) {
   if (Op->getOpcode() == ISD::UNDEF)
     return true;
-  if (dyn_cast<ConstantSDNode>(Op))
+  if (isa<ConstantSDNode>(Op))
     return true;
-  if (dyn_cast<ConstantFPSDNode>(Op))
+  if (isa<ConstantFPSDNode>(Op))
     return true;
   return false;
 }
@@ -2365,7 +2372,7 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
     }
 
     // SelectionDAG::getConstant will promote SplatValue appropriately.
-    SDValue Result = DAG.getConstant(SplatValue, ViaVecTy);
+    SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
 
     // Bitcast to the type we originally wanted
     if (ViaVecTy != ResTy)
@@ -2387,7 +2394,7 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
     for (unsigned i = 0; i < NumElts; ++i) {
       Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
                            Node->getOperand(i),
-                           DAG.getConstant(i, MVT::i32));
+                           DAG.getConstant(i, DL, MVT::i32));
     }
     return Vector;
   }
@@ -2403,7 +2410,7 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
 // It is therefore possible to lower into SHF when the mask takes the form:
 //   <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
 // When undef's appear they are treated as if they were whatever value is
-// necessary in order to fit the above form.
+// necessary in order to fit the above forms.
 //
 // For example:
 //   %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
@@ -2457,181 +2464,331 @@ static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
     Imm |= Idx & 0x3;
   }
 
-  return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy,
-                     DAG.getConstant(Imm, MVT::i32), Op->getOperand(0));
+  SDLoc DL(Op);
+  return DAG.getNode(MipsISD::SHF, DL, ResTy,
+                     DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0));
+}
+
+/// Determine whether a range fits a regular pattern of values.
+/// This function accounts for the possibility of jumping over the End iterator.
+template <typename ValType>
+static bool
+fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
+                   unsigned CheckStride,
+                   typename SmallVectorImpl<ValType>::const_iterator End,
+                   ValType ExpectedIndex, unsigned ExpectedIndexStride) {
+  auto &I = Begin;
+
+  while (I != End) {
+    if (*I != -1 && *I != ExpectedIndex)
+      return false;
+    ExpectedIndex += ExpectedIndexStride;
+
+    // Incrementing past End is undefined behaviour so we must increment one
+    // step at a time and check for End at each step.
+    for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
+      ; // Empty loop body.
+  }
+  return true;
+}
+
+// Determine whether VECTOR_SHUFFLE is a SPLATI.
+//
+// It is a SPLATI when the mask is:
+//   <x, x, x, ...>
+// where x is any valid index.
+//
+// When undef's appear in the mask they are treated as if they were whatever
+// value is necessary in order to fit the above form.
+static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy,
+                                    SmallVector<int, 16> Indices,
+                                    SelectionDAG &DAG) {
+  assert((Indices.size() % 2) == 0);
+
+  int SplatIndex = -1;
+  for (const auto &V : Indices) {
+    if (V != -1) {
+      SplatIndex = V;
+      break;
+    }
+  }
+
+  return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex,
+                                 0);
 }
 
 // Lower VECTOR_SHUFFLE into ILVEV (if possible).
 //
 // ILVEV interleaves the even elements from each vector.
 //
-// It is possible to lower into ILVEV when the mask takes the form:
-//   <0, n, 2, n+2, 4, n+4, ...>
+// It is possible to lower into ILVEV when the mask consists of two of the
+// following forms interleaved:
+//   <0, 2, 4, ...>
+//   <n, n+2, n+4, ...>
 // where n is the number of elements in the vector.
+// For example:
+//   <0, 0, 2, 2, 4, 4, ...>
+//   <0, n, 2, n+2, 4, n+4, ...>
 //
 // When undef's appear in the mask they are treated as if they were whatever
-// value is necessary in order to fit the above form.
+// value is necessary in order to fit the above forms.
 static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy,
                                          SmallVector<int, 16> Indices,
                                          SelectionDAG &DAG) {
-  assert ((Indices.size() % 2) == 0);
-  int WsIdx = 0;
-  int WtIdx = ResTy.getVectorNumElements();
+  assert((Indices.size() % 2) == 0);
+
+  SDValue Wt;
+  SDValue Ws;
+  const auto &Begin = Indices.begin();
+  const auto &End = Indices.end();
+
+  // Check even elements are taken from the even elements of one half or the
+  // other and pick an operand accordingly.
+  if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
+    Wt = Op->getOperand(0);
+  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2))
+    Wt = Op->getOperand(1);
+  else
+    return SDValue();
 
-  for (unsigned i = 0; i < Indices.size(); i += 2) {
-    if (Indices[i] != -1 && Indices[i] != WsIdx)
-      return SDValue();
-    if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
-      return SDValue();
-    WsIdx += 2;
-    WtIdx += 2;
-  }
+  // Check odd elements are taken from the even elements of one half or the
+  // other and pick an operand accordingly.
+  if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
+    Ws = Op->getOperand(0);
+  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2))
+    Ws = Op->getOperand(1);
+  else
+    return SDValue();
 
-  return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Op->getOperand(0),
-                     Op->getOperand(1));
+  return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt);
 }
 
 // Lower VECTOR_SHUFFLE into ILVOD (if possible).
 //
 // ILVOD interleaves the odd elements from each vector.
 //
-// It is possible to lower into ILVOD when the mask takes the form:
-//   <1, n+1, 3, n+3, 5, n+5, ...>
+// It is possible to lower into ILVOD when the mask consists of two of the
+// following forms interleaved:
+//   <1, 3, 5, ...>
+//   <n+1, n+3, n+5, ...>
 // where n is the number of elements in the vector.
+// For example:
+//   <1, 1, 3, 3, 5, 5, ...>
+//   <1, n+1, 3, n+3, 5, n+5, ...>
 //
 // When undef's appear in the mask they are treated as if they were whatever
-// value is necessary in order to fit the above form.
+// value is necessary in order to fit the above forms.
 static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy,
                                          SmallVector<int, 16> Indices,
                                          SelectionDAG &DAG) {
-  assert ((Indices.size() % 2) == 0);
-  int WsIdx = 1;
-  int WtIdx = ResTy.getVectorNumElements() + 1;
+  assert((Indices.size() % 2) == 0);
+
+  SDValue Wt;
+  SDValue Ws;
+  const auto &Begin = Indices.begin();
+  const auto &End = Indices.end();
+
+  // Check even elements are taken from the odd elements of one half or the
+  // other and pick an operand accordingly.
+  if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
+    Wt = Op->getOperand(0);
+  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2))
+    Wt = Op->getOperand(1);
+  else
+    return SDValue();
 
-  for (unsigned i = 0; i < Indices.size(); i += 2) {
-    if (Indices[i] != -1 && Indices[i] != WsIdx)
-      return SDValue();
-    if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
-      return SDValue();
-    WsIdx += 2;
-    WtIdx += 2;
-  }
+  // Check odd elements are taken from the odd elements of one half or the
+  // other and pick an operand accordingly.
+  if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
+    Ws = Op->getOperand(0);
+  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2))
+    Ws = Op->getOperand(1);
+  else
+    return SDValue();
 
-  return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Op->getOperand(0),
-                     Op->getOperand(1));
+  return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws);
 }
 
-// Lower VECTOR_SHUFFLE into ILVL (if possible).
+// Lower VECTOR_SHUFFLE into ILVR (if possible).
 //
-// ILVL interleaves consecutive elements from the left half of each vector.
+// ILVR interleaves consecutive elements from the right (lowest-indexed) half of
+// each vector.
 //
-// It is possible to lower into ILVL when the mask takes the form:
-//   <0, n, 1, n+1, 2, n+2, ...>
+// It is possible to lower into ILVR when the mask consists of two of the
+// following forms interleaved:
+//   <0, 1, 2, ...>
+//   <n, n+1, n+2, ...>
 // where n is the number of elements in the vector.
+// For example:
+//   <0, 0, 1, 1, 2, 2, ...>
+//   <0, n, 1, n+1, 2, n+2, ...>
 //
 // When undef's appear in the mask they are treated as if they were whatever
-// value is necessary in order to fit the above form.
-static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy,
+// value is necessary in order to fit the above forms.
+static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy,
                                         SmallVector<int, 16> Indices,
                                         SelectionDAG &DAG) {
-  assert ((Indices.size() % 2) == 0);
-  int WsIdx = 0;
-  int WtIdx = ResTy.getVectorNumElements();
+  assert((Indices.size() % 2) == 0);
+
+  SDValue Wt;
+  SDValue Ws;
+  const auto &Begin = Indices.begin();
+  const auto &End = Indices.end();
+
+  // Check even elements are taken from the right (lowest-indexed) elements of
+  // one half or the other and pick an operand accordingly.
+  if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
+    Wt = Op->getOperand(0);
+  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1))
+    Wt = Op->getOperand(1);
+  else
+    return SDValue();
 
-  for (unsigned i = 0; i < Indices.size(); i += 2) {
-    if (Indices[i] != -1 && Indices[i] != WsIdx)
-      return SDValue();
-    if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
-      return SDValue();
-    WsIdx ++;
-    WtIdx ++;
-  }
+  // Check odd elements are taken from the right (lowest-indexed) elements of
+  // one half or the other and pick an operand accordingly.
+  if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
+    Ws = Op->getOperand(0);
+  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1))
+    Ws = Op->getOperand(1);
+  else
+    return SDValue();
 
-  return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Op->getOperand(0),
-                     Op->getOperand(1));
+  return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt);
 }
 
-// Lower VECTOR_SHUFFLE into ILVR (if possible).
+// Lower VECTOR_SHUFFLE into ILVL (if possible).
 //
-// ILVR interleaves consecutive elements from the right half of each vector.
+// ILVL interleaves consecutive elements from the left (highest-indexed) half
+// of each vector.
 //
-// It is possible to lower into ILVR when the mask takes the form:
-//   <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
+// It is possible to lower into ILVL when the mask consists of two of the
+// following forms interleaved:
+//   <x, x+1, x+2, ...>
+//   <n+x, n+x+1, n+x+2, ...>
 // where n is the number of elements in the vector and x is half n.
+// For example:
+//   <x, x, x+1, x+1, x+2, x+2, ...>
+//   <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
 //
 // When undef's appear in the mask they are treated as if they were whatever
-// value is necessary in order to fit the above form.
-static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy,
+// value is necessary in order to fit the above forms.
+static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy,
                                         SmallVector<int, 16> Indices,
                                         SelectionDAG &DAG) {
-  assert ((Indices.size() % 2) == 0);
-  unsigned NumElts = ResTy.getVectorNumElements();
-  int WsIdx = NumElts / 2;
-  int WtIdx = NumElts + NumElts / 2;
+  assert((Indices.size() % 2) == 0);
+
+  unsigned HalfSize = Indices.size() / 2;
+  SDValue Wt;
+  SDValue Ws;
+  const auto &Begin = Indices.begin();
+  const auto &End = Indices.end();
+
+  // Check even elements are taken from the left (highest-indexed) elements of
+  // one half or the other and pick an operand accordingly.
+  if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
+    Wt = Op->getOperand(0);
+  else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1))
+    Wt = Op->getOperand(1);
+  else
+    return SDValue();
 
-  for (unsigned i = 0; i < Indices.size(); i += 2) {
-    if (Indices[i] != -1 && Indices[i] != WsIdx)
-      return SDValue();
-    if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
-      return SDValue();
-    WsIdx ++;
-    WtIdx ++;
-  }
+  // Check odd elements are taken from the left (highest-indexed) elements of
+  // one half or the other and pick an operand accordingly.
+  if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
+    Ws = Op->getOperand(0);
+  else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize,
+                                   1))
+    Ws = Op->getOperand(1);
+  else
+    return SDValue();
 
-  return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Op->getOperand(0),
-                     Op->getOperand(1));
+  return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt);
 }
 
 // Lower VECTOR_SHUFFLE into PCKEV (if possible).
 //
 // PCKEV copies the even elements of each vector into the result vector.
 //
-// It is possible to lower into PCKEV when the mask takes the form:
-//   <0, 2, 4, ..., n, n+2, n+4, ...>
+// It is possible to lower into PCKEV when the mask consists of two of the
+// following forms concatenated:
+//   <0, 2, 4, ...>
+//   <n, n+2, n+4, ...>
 // where n is the number of elements in the vector.
+// For example:
+//   <0, 2, 4, ..., 0, 2, 4, ...>
+//   <0, 2, 4, ..., n, n+2, n+4, ...>
 //
 // When undef's appear in the mask they are treated as if they were whatever
-// value is necessary in order to fit the above form.
+// value is necessary in order to fit the above forms.
 static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy,
                                          SmallVector<int, 16> Indices,
                                          SelectionDAG &DAG) {
-  assert ((Indices.size() % 2) == 0);
-  int Idx = 0;
+  assert((Indices.size() % 2) == 0);
+
+  SDValue Wt;
+  SDValue Ws;
+  const auto &Begin = Indices.begin();
+  const auto &Mid = Indices.begin() + Indices.size() / 2;
+  const auto &End = Indices.end();
+
+  if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
+    Wt = Op->getOperand(0);
+  else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2))
+    Wt = Op->getOperand(1);
+  else
+    return SDValue();
 
-  for (unsigned i = 0; i < Indices.size(); ++i) {
-    if (Indices[i] != -1 && Indices[i] != Idx)
-      return SDValue();
-    Idx += 2;
-  }
+  if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
+    Ws = Op->getOperand(0);
+  else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2))
+    Ws = Op->getOperand(1);
+  else
+    return SDValue();
 
-  return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Op->getOperand(0),
-                     Op->getOperand(1));
+  return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt);
 }
 
 // Lower VECTOR_SHUFFLE into PCKOD (if possible).
 //
 // PCKOD copies the odd elements of each vector into the result vector.
 //
-// It is possible to lower into PCKOD when the mask takes the form:
-//   <1, 3, 5, ..., n+1, n+3, n+5, ...>
+// It is possible to lower into PCKOD when the mask consists of two of the
+// following forms concatenated:
+//   <1, 3, 5, ...>
+//   <n+1, n+3, n+5, ...>
 // where n is the number of elements in the vector.
+// For example:
+//   <1, 3, 5, ..., 1, 3, 5, ...>
+//   <1, 3, 5, ..., n+1, n+3, n+5, ...>
 //
 // When undef's appear in the mask they are treated as if they were whatever
-// value is necessary in order to fit the above form.
+// value is necessary in order to fit the above forms.
 static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy,
                                          SmallVector<int, 16> Indices,
                                          SelectionDAG &DAG) {
-  assert ((Indices.size() % 2) == 0);
-  int Idx = 1;
+  assert((Indices.size() % 2) == 0);
+
+  SDValue Wt;
+  SDValue Ws;
+  const auto &Begin = Indices.begin();
+  const auto &Mid = Indices.begin() + Indices.size() / 2;
+  const auto &End = Indices.end();
+
+  if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
+    Wt = Op->getOperand(0);
+  else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2))
+    Wt = Op->getOperand(1);
+  else
+    return SDValue();
 
-  for (unsigned i = 0; i < Indices.size(); ++i) {
-    if (Indices[i] != -1 && Indices[i] != Idx)
-      return SDValue();
-    Idx += 2;
-  }
+  if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
+    Ws = Op->getOperand(0);
+  else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2))
+    Ws = Op->getOperand(1);
+  else
+    return SDValue();
 
-  return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Op->getOperand(0),
-                     Op->getOperand(1));
+  return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt);
 }
 
 // Lower VECTOR_SHUFFLE into VSHF.
@@ -2667,7 +2824,7 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
 
   for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end();
        ++I)
-    Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy));
+    Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy));
 
   SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, Ops);
 
@@ -2707,10 +2864,11 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
   for (int i = 0; i < ResTyNumElts; ++i)
     Indices.push_back(Node->getMaskElt(i));
 
-  SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG);
-  if (Result.getNode())
-    return Result;
-  Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG);
+  // splati.[bhwd] is preferable to the others but is matched from
+  // MipsISD::VSHF.
+  if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
+    return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
+  SDValue Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG);
   if (Result.getNode())
     return Result;
   Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG);
@@ -2728,6 +2886,9 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
   Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG);
   if (Result.getNode())
     return Result;
+  Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG);
+  if (Result.getNode())
+    return Result;
   return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
 }
 
@@ -2747,8 +2908,7 @@ emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
   //  $vr0 = phi($vr2, $fbb, $vr1, $tbb)
 
   MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   const TargetRegisterClass *RC = &Mips::GPR32RegClass;
   DebugLoc DL = MI->getDebugLoc();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -2813,8 +2973,7 @@ emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB,
   //  $rd = phi($rd1, $fbb, $rd2, $tbb)
 
   MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   const TargetRegisterClass *RC = &Mips::GPR32RegClass;
   DebugLoc DL = MI->getDebugLoc();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -2875,8 +3034,7 @@ emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB,
 // for lane 1 because it would require FR=0 mode which isn't supported by MSA.
 MachineBasicBlock * MipsSETargetLowering::
 emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
   unsigned Fd = MI->getOperand(0).getReg();
@@ -2921,8 +3079,7 @@ MachineBasicBlock * MipsSETargetLowering::
 emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{
   assert(Subtarget.isFP64bit());
 
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
   unsigned Fd  = MI->getOperand(0).getReg();
   unsigned Ws  = MI->getOperand(1).getReg();
@@ -2951,8 +3108,7 @@ emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{
 MachineBasicBlock *
 MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI,
                                     MachineBasicBlock *BB) const {
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
   unsigned Wd = MI->getOperand(0).getReg();
@@ -2988,8 +3144,7 @@ MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI,
                                     MachineBasicBlock *BB) const {
   assert(Subtarget.isFP64bit());
 
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
   unsigned Wd = MI->getOperand(0).getReg();
@@ -3037,8 +3192,7 @@ MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI,
                                          MachineBasicBlock *BB,
                                          unsigned EltSizeInBytes,
                                          bool IsFP) const {
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
   unsigned Wd = MI->getOperand(0).getReg();
@@ -3048,7 +3202,7 @@ MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI,
 
   const TargetRegisterClass *VecRC = nullptr;
   const TargetRegisterClass *GPRRC =
-      Subtarget.isGP64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+      Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
   unsigned EltLog2Size;
   unsigned InsertOp = 0;
   unsigned InsveOp = 0;
@@ -3126,8 +3280,9 @@ MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI,
   // sld.df inteprets $rt modulo the number of columns so we only need to negate
   // the lane index to do this.
   unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC);
-  BuildMI(*BB, MI, DL, TII->get(Mips::SUB), LaneTmp2)
-      .addReg(Mips::ZERO)
+  BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB),
+          LaneTmp2)
+      .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO)
       .addReg(LaneReg);
   BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd)
       .addReg(WdTmp2)
@@ -3148,8 +3303,7 @@ MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI,
 MachineBasicBlock *
 MipsSETargetLowering::emitFILL_FW(MachineInstr *MI,
                                   MachineBasicBlock *BB) const {
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
   unsigned Wd = MI->getOperand(0).getReg();
@@ -3180,8 +3334,7 @@ MipsSETargetLowering::emitFILL_FD(MachineInstr *MI,
                                   MachineBasicBlock *BB) const {
   assert(Subtarget.isFP64bit());
 
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
   unsigned Wd = MI->getOperand(0).getReg();
@@ -3209,8 +3362,7 @@ MipsSETargetLowering::emitFILL_FD(MachineInstr *MI,
 MachineBasicBlock *
 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI,
                                     MachineBasicBlock *BB) const {
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
   const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
   unsigned Ws1 = RegInfo.createVirtualRegister(RC);
@@ -3239,8 +3391,7 @@ MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI,
 MachineBasicBlock *
 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr *MI,
                                     MachineBasicBlock *BB) const {
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
   const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
   unsigned Ws1 = RegInfo.createVirtualRegister(RC);
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index 180b043..786307b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
 MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI)
     : MipsInstrInfo(STI, STI.getRelocationModel() == Reloc::PIC_ ? Mips::B
                                                                  : Mips::J),
-      RI(STI) {}
+      RI() {}
 
 const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
   return RI;
@@ -359,10 +359,10 @@ unsigned MipsSEInstrInfo::getOppositeBranchOpc(unsigned Opc) const {
 void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const {
-  const MipsSubtarget &STI = Subtarget;
+  MipsABIInfo ABI = Subtarget.getABI();
   DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
-  unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
-  unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+  unsigned ADDu = ABI.GetPtrAdduOp();
+  unsigned ADDiu = ABI.GetPtrAddiuOp();
 
   if (Amount == 0)
     return;
@@ -610,7 +610,8 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
   // This pseudo instruction is generated as part of the lowering of
   // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and
   // indirect jump to TargetReg
-  unsigned ADDU = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+  MipsABIInfo ABI = Subtarget.getABI();
+  unsigned ADDU = ABI.GetPtrAdduOp();
   unsigned SP = Subtarget.isGP64bit() ? Mips::SP_64 : Mips::SP;
   unsigned RA = Subtarget.isGP64bit() ? Mips::RA_64 : Mips::RA;
   unsigned T9 = Subtarget.isGP64bit() ? Mips::T9_64 : Mips::T9;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
index d16fab2..bebbabf 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
@@ -68,7 +68,7 @@ public:
 
   /// Adjust SP by Amount bytes.
   void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
-                      MachineBasicBlock::iterator I) const;
+                      MachineBasicBlock::iterator I) const override;
 
   /// Emit a series of instructions to load an immediate. If NewImm is a
   /// non-NULL parameter, the last instruction is not emitted, but instead
diff --git a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
index 55c6638..8c74a98 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -18,6 +18,7 @@
 #include "MipsMachineFunction.h"
 #include "MipsSEInstrInfo.h"
 #include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -41,8 +42,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "mips-reg-info"
 
-MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST)
-  : MipsRegisterInfo(ST) {}
+MipsSERegisterInfo::MipsSERegisterInfo() : MipsRegisterInfo() {}
 
 bool MipsSERegisterInfo::
 requiresRegisterScavenging(const MachineFunction &MF) const {
@@ -110,6 +110,8 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
   MachineFunction &MF = *MI.getParent()->getParent();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  MipsABIInfo ABI =
+      static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI();
 
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   int MinCSFI = 0;
@@ -132,7 +134,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
   unsigned FrameReg;
 
   if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI)
-    FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+    FrameReg = ABI.GetStackPtr();
   else
     FrameReg = getFrameRegister(MF);
 
@@ -165,15 +167,16 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
       // (where n < 16) and doesn't, but does fit into 16-bits then use an ADDiu
       MachineBasicBlock &MBB = *MI.getParent();
       DebugLoc DL = II->getDebugLoc();
-      unsigned ADDiu = Subtarget.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
-      const TargetRegisterClass *RC =
-          Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+      const TargetRegisterClass *PtrRC =
+          ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
       MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
-      unsigned Reg = RegInfo.createVirtualRegister(RC);
+      unsigned Reg = RegInfo.createVirtualRegister(PtrRC);
       const MipsSEInstrInfo &TII =
           *static_cast<const MipsSEInstrInfo *>(
               MBB.getParent()->getSubtarget().getInstrInfo());
-      BuildMI(MBB, II, DL, TII.get(ADDiu), Reg).addReg(FrameReg).addImm(Offset);
+      BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAddiuOp()), Reg)
+          .addReg(FrameReg)
+          .addImm(Offset);
 
       FrameReg = Reg;
       Offset = 0;
@@ -183,14 +186,13 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
       // instructions.
       MachineBasicBlock &MBB = *MI.getParent();
       DebugLoc DL = II->getDebugLoc();
-      unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
       unsigned NewImm = 0;
       const MipsSEInstrInfo &TII =
           *static_cast<const MipsSEInstrInfo *>(
               MBB.getParent()->getSubtarget().getInstrInfo());
       unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL,
                                        OffsetBitSize == 16 ? &NewImm : nullptr);
-      BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg)
+      BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAdduOp()), Reg).addReg(FrameReg)
         .addReg(Reg, RegState::Kill);
 
       FrameReg = Reg;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.h
index 6b70d07..ebae190 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.h
@@ -22,7 +22,7 @@ class MipsSEInstrInfo;
 
 class MipsSERegisterInfo : public MipsRegisterInfo {
 public:
-  MipsSERegisterInfo(const MipsSubtarget &Subtarget);
+  MipsSERegisterInfo();
 
   bool requiresRegisterScavenging(const MachineFunction &MF) const override;
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
index ea98199..54b5d28 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSchedule.td
+++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
@@ -65,7 +65,9 @@ def II_DSRL32           : InstrItinClass;
 def II_DSRLV            : InstrItinClass;
 def II_DSUBU            : InstrItinClass;
 def II_DSUB             : InstrItinClass;
+def II_EXT              : InstrItinClass; // Any EXT instruction
 def II_FLOOR            : InstrItinClass;
+def II_INS              : InstrItinClass; // Any INS instruction
 def II_LB               : InstrItinClass;
 def II_LBU              : InstrItinClass;
 def II_LD               : InstrItinClass;
@@ -198,6 +200,8 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_DSUB            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_DROTR           , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_DROTRV          , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_EXT             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_INS             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_LUI             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_MOVF            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_MOVN            , [InstrStage<1,  [ALU]>]>,
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
index d035ebe..7ea10eb 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -33,93 +33,46 @@ using namespace llvm;
 
 // FIXME: Maybe this should be on by default when Mips16 is specified
 //
-static cl::opt<bool> Mixed16_32(
-  "mips-mixed-16-32",
-  cl::init(false),
-  cl::desc("Allow for a mixture of Mips16 "
-           "and Mips32 code in a single source file"),
-  cl::Hidden);
-
-static cl::opt<bool> Mips_Os16(
-  "mips-os16",
-  cl::init(false),
-  cl::desc("Compile all functions that don' use "
-           "floating point as Mips 16"),
-  cl::Hidden);
-
 static cl::opt<bool>
-Mips16HardFloat("mips16-hard-float", cl::NotHidden,
-                cl::desc("MIPS: mips16 hard float enable."),
-                cl::init(false));
+    Mixed16_32("mips-mixed-16-32", cl::init(false),
+               cl::desc("Allow for a mixture of Mips16 "
+                        "and Mips32 code in a single output file"),
+               cl::Hidden);
+
+static cl::opt<bool> Mips_Os16("mips-os16", cl::init(false),
+                               cl::desc("Compile all functions that don't use "
+                                        "floating point as Mips 16"),
+                               cl::Hidden);
+
+static cl::opt<bool> Mips16HardFloat("mips16-hard-float", cl::NotHidden,
+                                     cl::desc("Enable mips16 hard float."),
+                                     cl::init(false));
 
 static cl::opt<bool>
-Mips16ConstantIslands(
-  "mips16-constant-islands", cl::NotHidden,
-  cl::desc("MIPS: mips16 constant islands enable."),
-  cl::init(true));
+    Mips16ConstantIslands("mips16-constant-islands", cl::NotHidden,
+                          cl::desc("Enable mips16 constant islands."),
+                          cl::init(true));
 
 static cl::opt<bool>
-GPOpt("mgpopt", cl::Hidden,
-      cl::desc("MIPS: Enable gp-relative addressing of small data items"));
-
-/// Select the Mips CPU for the given triple and cpu name.
-/// FIXME: Merge with the copy in MipsMCTargetDesc.cpp
-static StringRef selectMipsCPU(Triple TT, StringRef CPU) {
-  if (CPU.empty() || CPU == "generic") {
-    if (TT.getArch() == Triple::mips || TT.getArch() == Triple::mipsel)
-      CPU = "mips32";
-    else
-      CPU = "mips64";
-  }
-  return CPU;
-}
+    GPOpt("mgpopt", cl::Hidden,
+          cl::desc("Enable gp-relative addressing of mips small data items"));
 
 void MipsSubtarget::anchor() { }
 
-static std::string computeDataLayout(const MipsSubtarget &ST) {
-  std::string Ret = "";
-
-  // There are both little and big endian mips.
-  if (ST.isLittle())
-    Ret += "e";
-  else
-    Ret += "E";
-
-  Ret += "-m:m";
-
-  // Pointers are 32 bit on some ABIs.
-  if (!ST.isABI_N64())
-    Ret += "-p:32:32";
-
-  // 8 and 16 bit integers only need no have natural alignment, but try to
-  // align them to 32 bits. 64 bit integers have natural alignment.
-  Ret += "-i8:8:32-i16:16:32-i64:64";
-
-  // 32 bit registers are always available and the stack is at least 64 bit
-  // aligned. On N64 64 bit registers are also available and the stack is
-  // 128 bit aligned.
-  if (ST.isABI_N64() || ST.isABI_N32())
-    Ret += "-n32:64-S128";
-  else
-    Ret += "-n32-S64";
-
-  return Ret;
-}
-
 MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
                              const std::string &FS, bool little,
                              const MipsTargetMachine &TM)
     : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault),
-      ABI(MipsABIInfo::Unknown()), IsLittle(little), IsSingleFloat(false),
-      IsFPXX(false), NoABICalls(false), IsFP64bit(false), UseOddSPReg(true),
+      IsLittle(little), IsSoftFloat(false), IsSingleFloat(false), IsFPXX(false),
+      NoABICalls(false), IsFP64bit(false), UseOddSPReg(true),
       IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false), HasCnMips(false),
       HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
       HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
       InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
       HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
-      HasMSA(false), TM(TM), TargetTriple(TT),
-      DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS, TM))),
-      TSInfo(DL), InstrInfo(MipsInstrInfo::create(*this)),
+      HasMSA(false), TM(TM), TargetTriple(TT), TSInfo(*TM.getDataLayout()),
+      InstrInfo(
+          MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
       FrameLowering(MipsFrameLowering::create(*this)),
       TLInfo(MipsTargetLowering::create(TM, *this)) {
 
@@ -135,13 +88,6 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
   if (MipsArchVersion == Mips5)
     report_fatal_error("Code generation for MIPS-V is not implemented", false);
 
-  // Assert exactly one ABI was chosen.
-  assert(ABI.IsKnown());
-  assert((((getFeatureBits() & Mips::FeatureO32) != 0) +
-          ((getFeatureBits() & Mips::FeatureEABI) != 0) +
-          ((getFeatureBits() & Mips::FeatureN32) != 0) +
-          ((getFeatureBits() & Mips::FeatureN64) != 0)) == 1);
-
   // Check if Architecture and ABI are compatible.
   assert(((!isGP64bit() && (isABI_O32() || isABI_EABI())) ||
           (isGP64bit() && (isABI_N32() || isABI_N64()))) &&
@@ -195,23 +141,19 @@ CodeGenOpt::Level MipsSubtarget::getOptLevelToEnablePostRAScheduler() const {
 MipsSubtarget &
 MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
                                                const TargetMachine &TM) {
-  std::string CPUName = selectMipsCPU(TargetTriple, CPU);
-  
+  std::string CPUName = MIPS_MC::selectMipsCPU(TM.getTargetTriple(), CPU);
+
   // Parse features string.
   ParseSubtargetFeatures(CPUName, FS);
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUName);
 
-  if (InMips16Mode && !TM.Options.UseSoftFloat)
+  if (InMips16Mode && !IsSoftFloat)
     InMips16HardFloat = true;
 
   return *this;
 }
 
-bool MipsSubtarget::abiUsesSoftFloat() const {
-  return TM.Options.UseSoftFloat && !InMips16HardFloat;
-}
-
 bool MipsSubtarget::useConstantIslands() {
   DEBUG(dbgs() << "use constant islands " << Mips16ConstantIslands << "\n");
   return Mips16ConstantIslands;
@@ -220,3 +162,9 @@ bool MipsSubtarget::useConstantIslands() {
 Reloc::Model MipsSubtarget::getRelocationModel() const {
   return TM.getRelocationModel();
 }
+
+bool MipsSubtarget::isABI_EABI() const { return getABI().IsEABI(); }
+bool MipsSubtarget::isABI_N64() const { return getABI().IsN64(); }
+bool MipsSubtarget::isABI_N32() const { return getABI().IsN32(); }
+bool MipsSubtarget::isABI_O32() const { return getABI().IsO32(); }
+const MipsABIInfo &MipsSubtarget::getABI() const { return TM.getABI(); }
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
index aca69a6..0bfafc8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -38,19 +38,19 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
 
   enum MipsArchEnum {
     MipsDefault,
-    Mips1, Mips2, Mips32, Mips32r2, Mips32r6, Mips3, Mips4, Mips5, Mips64,
-    Mips64r2, Mips64r6
+    Mips1, Mips2, Mips32, Mips32r2, Mips32r3, Mips32r5, Mips32r6, Mips32Max,
+    Mips3, Mips4, Mips5, Mips64, Mips64r2, Mips64r3, Mips64r5, Mips64r6
   };
 
   // Mips architecture version
   MipsArchEnum MipsArchVersion;
 
-  // Selected ABI
-  MipsABIInfo ABI;
-
   // IsLittle - The target is Little Endian
   bool IsLittle;
 
+  // IsSoftFloat - The target does not support any floating point instructions.
+  bool IsSoftFloat;
+
   // IsSingleFloat - The target only supports single precision float
   // point operations. This enable the target to use all 32 32-bit
   // floating point registers instead of only using even ones.
@@ -140,7 +140,6 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
 
   Triple TargetTriple;
 
-  const DataLayout DL; // Calculates type size & alignment
   const MipsSelectionDAGInfo TSInfo;
   std::unique_ptr<const MipsInstrInfo> InstrInfo;
   std::unique_ptr<const MipsFrameLowering> FrameLowering;
@@ -153,12 +152,12 @@ public:
   CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override;
 
   /// Only O32 and EABI supported right now.
-  bool isABI_EABI() const { return ABI.IsEABI(); }
-  bool isABI_N64() const { return ABI.IsN64(); }
-  bool isABI_N32() const { return ABI.IsN32(); }
-  bool isABI_O32() const { return ABI.IsO32(); }
+  bool isABI_EABI() const;
+  bool isABI_N64() const;
+  bool isABI_N32() const;
+  bool isABI_O32() const;
+  const MipsABIInfo &getABI() const;
   bool isABI_FPXX() const { return isABI_O32() && IsFPXX; }
-  const MipsABIInfo &getABI() const { return ABI; }
 
   /// This constructor initializes the data members to match that
   /// of the specified triple.
@@ -178,21 +177,30 @@ public:
   bool hasMips4_32() const { return HasMips4_32; }
   bool hasMips4_32r2() const { return HasMips4_32r2; }
   bool hasMips32() const {
-    return MipsArchVersion >= Mips32 && MipsArchVersion != Mips3 &&
-           MipsArchVersion != Mips4 && MipsArchVersion != Mips5;
+    return (MipsArchVersion >= Mips32 && MipsArchVersion < Mips32Max) ||
+           hasMips64();
   }
   bool hasMips32r2() const {
-    return MipsArchVersion == Mips32r2 || MipsArchVersion == Mips32r6 ||
-           MipsArchVersion == Mips64r2 || MipsArchVersion == Mips64r6;
+    return (MipsArchVersion >= Mips32r2 && MipsArchVersion < Mips32Max) ||
+           hasMips64r2();
+  }
+  bool hasMips32r3() const {
+    return (MipsArchVersion >= Mips32r3 && MipsArchVersion < Mips32Max) ||
+           hasMips64r2();
+  }
+  bool hasMips32r5() const {
+    return (MipsArchVersion >= Mips32r5 && MipsArchVersion < Mips32Max) ||
+           hasMips64r2();
   }
   bool hasMips32r6() const {
-    return MipsArchVersion == Mips32r6 || MipsArchVersion == Mips64r6;
+    return (MipsArchVersion >= Mips32r6 && MipsArchVersion < Mips32Max) ||
+           hasMips64r6();
   }
   bool hasMips64() const { return MipsArchVersion >= Mips64; }
-  bool hasMips64r2() const {
-    return MipsArchVersion == Mips64r2 || MipsArchVersion == Mips64r6;
-  }
-  bool hasMips64r6() const { return MipsArchVersion == Mips64r6; }
+  bool hasMips64r2() const { return MipsArchVersion >= Mips64r2; }
+  bool hasMips64r3() const { return MipsArchVersion >= Mips64r3; }
+  bool hasMips64r5() const { return MipsArchVersion >= Mips64r5; }
+  bool hasMips64r6() const { return MipsArchVersion >= Mips64r6; }
 
   bool hasCnMips() const { return HasCnMips; }
 
@@ -220,6 +228,7 @@ public:
     return inMips16Mode() && InMips16HardFloat;
   }
   bool inMicroMipsMode() const { return InMicroMipsMode; }
+  bool inMicroMips32r6Mode() const { return InMicroMipsMode && hasMips32r6(); }
   bool hasDSP() const { return HasDSP; }
   bool hasDSPR2() const { return HasDSPR2; }
   bool hasMSA() const { return HasMSA; }
@@ -227,7 +236,7 @@ public:
 
   bool hasStandardEncoding() const { return !inMips16Mode(); }
 
-  bool abiUsesSoftFloat() const;
+  bool useSoftFloat() const { return IsSoftFloat; }
 
   bool enableLongBranchPass() const {
     return hasStandardEncoding() || allowMixed16_32();
@@ -238,9 +247,9 @@ public:
   bool hasMTHC1() const { return hasMips32r2(); }
 
   bool allowMixed16_32() const { return inMips16ModeDefault() |
-                                        AllowMixed16_32;}
+                                        AllowMixed16_32; }
 
-  bool os16() const { return Os16;};
+  bool os16() const { return Os16; }
 
   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
 
@@ -270,7 +279,6 @@ public:
   const MipsSelectionDAGInfo *getSelectionDAGInfo() const override {
     return &TSInfo;
   }
-  const DataLayout *getDataLayout() const override { return &DL; }
   const MipsInstrInfo *getInstrInfo() const override { return InstrInfo.get(); }
   const TargetFrameLowering *getFrameLowering() const override {
     return FrameLowering.get();
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 426b71d..b279184 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -14,14 +14,11 @@
 #include "MipsTargetMachine.h"
 #include "Mips.h"
 #include "Mips16FrameLowering.h"
-#include "Mips16HardFloat.h"
 #include "Mips16ISelDAGToDAG.h"
 #include "Mips16ISelLowering.h"
 #include "Mips16InstrInfo.h"
 #include "MipsFrameLowering.h"
 #include "MipsInstrInfo.h"
-#include "MipsModuleISelDAGToDAG.h"
-#include "MipsOs16.h"
 #include "MipsSEFrameLowering.h"
 #include "MipsSEISelDAGToDAG.h"
 #include "MipsSEISelLowering.h"
@@ -29,11 +26,12 @@
 #include "MipsTargetObjectFile.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
+
 using namespace llvm;
 
 #define DEBUG_TYPE "mips"
@@ -46,6 +44,40 @@ extern "C" void LLVMInitializeMipsTarget() {
   RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget);
 }
 
+static std::string computeDataLayout(StringRef TT, StringRef CPU,
+                                     const TargetOptions &Options,
+                                     bool isLittle) {
+  std::string Ret = "";
+  MipsABIInfo ABI =
+      MipsABIInfo::computeTargetABI(Triple(TT), CPU, Options.MCOptions);
+
+  // There are both little and big endian mips.
+  if (isLittle)
+    Ret += "e";
+  else
+    Ret += "E";
+
+  Ret += "-m:m";
+
+  // Pointers are 32 bit on some ABIs.
+  if (!ABI.IsN64())
+    Ret += "-p:32:32";
+
+  // 8 and 16 bit integers only need no have natural alignment, but try to
+  // align them to 32 bits. 64 bit integers have natural alignment.
+  Ret += "-i8:8:32-i16:16:32-i64:64";
+
+  // 32 bit registers are always available and the stack is at least 64 bit
+  // aligned. On N64 64 bit registers are also available and the stack is
+  // 128 bit aligned.
+  if (ABI.IsN64() || ABI.IsN32())
+    Ret += "-n32:64-S128";
+  else
+    Ret += "-n32-S64";
+
+  return Ret;
+}
+
 // On function prologue, the stack is created by decrementing
 // its pointer. Once decremented, all references are done with positive
 // offset from the stack/frame pointer, using StackGrowsUp enables
@@ -56,11 +88,11 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, StringRef TT,
                                      const TargetOptions &Options,
                                      Reloc::Model RM, CodeModel::Model CM,
                                      CodeGenOpt::Level OL, bool isLittle)
-    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-      isLittle(isLittle),
-      TLOF(make_unique<MipsTargetObjectFile>()),
-      Subtarget(nullptr),
-      DefaultSubtarget(TT, CPU, FS, isLittle, *this),
+    : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
+                        CPU, FS, Options, RM, CM, OL),
+      isLittle(isLittle), TLOF(make_unique<MipsTargetObjectFile>()),
+      ABI(MipsABIInfo::computeTargetABI(Triple(TT), CPU, Options.MCOptions)),
+      Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this),
       NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16",
                         isLittle, *this),
       Mips16Subtarget(TT, CPU, FS.empty() ? "+mips16" : FS.str() + ",+mips16",
@@ -91,11 +123,8 @@ MipselTargetMachine(const Target &T, StringRef TT,
 
 const MipsSubtarget *
 MipsTargetMachine::getSubtargetImpl(const Function &F) const {
-  AttributeSet FnAttrs = F.getAttributes();
-  Attribute CPUAttr =
-      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
-  Attribute FSAttr =
-      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+  Attribute CPUAttr = F.getFnAttribute("target-cpu");
+  Attribute FSAttr = F.getFnAttribute("target-features");
 
   std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
                         ? CPUAttr.getValueAsString().str()
@@ -104,30 +133,25 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const {
                        ? FSAttr.getValueAsString().str()
                        : TargetFS;
   bool hasMips16Attr =
-      !FnAttrs.getAttribute(AttributeSet::FunctionIndex, "mips16")
-           .hasAttribute(Attribute::None);
+      !F.getFnAttribute("mips16").hasAttribute(Attribute::None);
   bool hasNoMips16Attr =
-      !FnAttrs.getAttribute(AttributeSet::FunctionIndex, "nomips16")
-           .hasAttribute(Attribute::None);
+      !F.getFnAttribute("nomips16").hasAttribute(Attribute::None);
 
   // FIXME: This is related to the code below to reset the target options,
   // we need to know whether or not the soft float flag is set on the
-  // function before we can generate a subtarget. We also need to use
-  // it as a key for the subtarget since that can be the only difference
-  // between two functions.
-  Attribute SFAttr =
-      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
-  bool softFloat = !SFAttr.hasAttribute(Attribute::None)
-                       ? SFAttr.getValueAsString() == "true"
-                       : Options.UseSoftFloat;
+  // function, so we can enable it as a subtarget feature.
+  bool softFloat =
+      F.hasFnAttribute("use-soft-float") &&
+      F.getFnAttribute("use-soft-float").getValueAsString() == "true";
 
   if (hasMips16Attr)
     FS += FS.empty() ? "+mips16" : ",+mips16";
   else if (hasNoMips16Attr)
     FS += FS.empty() ? "-mips16" : ",-mips16";
+  if (softFloat)
+    FS += FS.empty() ? "+soft-float" : ",+soft-float";
 
-  auto &I = SubtargetMap[CPU + FS + (softFloat ? "use-soft-float=true"
-                                               : "use-soft-float=false")];
+  auto &I = SubtargetMap[CPU + FS];
   if (!I) {
     // This needs to be done before we create a new subtarget since any
     // creation will depend on the TM and the code generation flags on the
@@ -185,14 +209,14 @@ void MipsPassConfig::addIRPasses() {
   TargetPassConfig::addIRPasses();
   addPass(createAtomicExpandPass(&getMipsTargetMachine()));
   if (getMipsSubtarget().os16())
-    addPass(createMipsOs16(getMipsTargetMachine()));
+    addPass(createMipsOs16Pass(getMipsTargetMachine()));
   if (getMipsSubtarget().inMips16HardFloat())
-    addPass(createMips16HardFloat(getMipsTargetMachine()));
+    addPass(createMips16HardFloatPass(getMipsTargetMachine()));
 }
 // Install an instruction selector pass using
 // the ISelDag to gen Mips code.
 bool MipsPassConfig::addInstSelector() {
-  addPass(createMipsModuleISelDag(getMipsTargetMachine()));
+  addPass(createMipsModuleISelDagPass(getMipsTargetMachine()));
   addPass(createMips16ISelDag(getMipsTargetMachine()));
   addPass(createMipsSEISelDag(getMipsTargetMachine()));
   return false;
@@ -208,17 +232,17 @@ void MipsPassConfig::addPreRegAlloc() {
     addPass(createMipsOptimizePICCallPass(getMipsTargetMachine()));
 }
 
-void MipsTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-  if (Subtarget->allowMixed16_32()) {
-    DEBUG(errs() << "No ");
-    //FIXME: The Basic Target Transform Info
-    // pass needs to become a function pass instead of
-    // being an immutable pass and then this method as it exists now
-    // would be unnecessary.
-    PM.add(createNoTargetTransformInfoPass());
-  } else
-    LLVMTargetMachine::addAnalysisPasses(PM);
-  DEBUG(errs() << "Target Transform Info Pass Added\n");
+TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis([this](Function &F) {
+    if (Subtarget->allowMixed16_32()) {
+      DEBUG(errs() << "No Target Transform Info Pass Added\n");
+      // FIXME: This is no longer necessary as the TTI returned is per-function.
+      return TargetTransformInfo(getDataLayout());
+    }
+
+    DEBUG(errs() << "Target Transform Info Pass Added\n");
+    return TargetTransformInfo(BasicTTIImpl(this, F));
+  });
 }
 
 // Implemented by targets that want to run passes immediately before
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
index 4b097a2..5427d6a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
@@ -14,7 +14,9 @@
 #ifndef LLVM_LIB_TARGET_MIPS_MIPSTARGETMACHINE_H
 #define LLVM_LIB_TARGET_MIPS_MIPSTARGETMACHINE_H
 
+#include "MCTargetDesc/MipsABIInfo.h"
 #include "MipsSubtarget.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetFrameLowering.h"
@@ -27,6 +29,8 @@ class MipsRegisterInfo;
 class MipsTargetMachine : public LLVMTargetMachine {
   bool isLittle;
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
+  // Selected ABI
+  MipsABIInfo ABI;
   MipsSubtarget *Subtarget;
   MipsSubtarget DefaultSubtarget;
   MipsSubtarget NoMips16Subtarget;
@@ -40,9 +44,9 @@ public:
                     CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
   ~MipsTargetMachine() override;
 
-  void addAnalysisPasses(PassManagerBase &PM) override;
+  TargetIRAnalysis getTargetIRAnalysis() override;
 
-  const MipsSubtarget *getSubtargetImpl() const override {
+  const MipsSubtarget *getSubtargetImpl() const {
     if (Subtarget)
       return Subtarget;
     return &DefaultSubtarget;
@@ -61,6 +65,7 @@ public:
   }
 
   bool isLittleEndian() const { return isLittle; }
+  const MipsABIInfo &getABI() const { return ABI; }
 };
 
 /// MipsebTargetMachine - Mips32/64 big endian target machine.
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
index b56c39b..0f2db60 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -9,6 +9,7 @@
 
 #include "MipsTargetObjectFile.h"
 #include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -39,16 +40,12 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
   InitializeELF(TM.Options.UseInitArray);
 
-  SmallDataSection =
-    getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
-                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
-                               SectionKind::getDataRel());
+  SmallDataSection = getContext().getELFSection(
+      ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
 
-  SmallBSSSection =
-    getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
-                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
-                               SectionKind::getBSS());
-  this->TM = &TM;
+  SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+                                               ELF::SHF_WRITE | ELF::SHF_ALLOC);
+  this->TM = &static_cast<const MipsTargetMachine &>(TM);
 }
 
 // A address must be loaded from a small section if its size is less than the
@@ -88,7 +85,8 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
 bool MipsTargetObjectFile::
 IsGlobalInSmallSectionImpl(const GlobalValue *GV,
                            const TargetMachine &TM) const {
-  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+  const MipsSubtarget &Subtarget =
+      *static_cast<const MipsTargetMachine &>(TM).getSubtargetImpl();
 
   // Return if small section is not available.
   if (!Subtarget.useSmallSection())
@@ -109,13 +107,13 @@ IsGlobalInSmallSectionImpl(const GlobalValue *GV,
     return false;
 
   Type *Ty = GV->getType()->getElementType();
-  return IsInSmallSection(
-      TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty));
+  return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
 }
 
-const MCSection *MipsTargetObjectFile::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler &Mang, const TargetMachine &TM) const {
+MCSection *
+MipsTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+                                             SectionKind Kind, Mangler &Mang,
+                                             const TargetMachine &TM) const {
   // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
   // sections?
 
@@ -132,14 +130,16 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
 /// Return true if this constant should be placed into small data section.
 bool MipsTargetObjectFile::
 IsConstantInSmallSection(const Constant *CN, const TargetMachine &TM) const {
-  return (TM.getSubtarget<MipsSubtarget>().useSmallSection() &&
-          LocalSData &&
-          IsInSmallSection(TM.getSubtargetImpl()->getDataLayout()
-                           ->getTypeAllocSize(CN->getType())));
+  return (static_cast<const MipsTargetMachine &>(TM)
+              .getSubtargetImpl()
+              ->useSmallSection() &&
+          LocalSData && IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(
+                            CN->getType())));
 }
 
-const MCSection *MipsTargetObjectFile::
-getSectionForConstant(SectionKind Kind, const Constant *C) const {
+MCSection *
+MipsTargetObjectFile::getSectionForConstant(SectionKind Kind,
+                                            const Constant *C) const {
   if (IsConstantInSmallSection(C, *TM))
     return SmallDataSection;
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
index 3a2b298..725f2ff 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
@@ -13,11 +13,11 @@
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 
 namespace llvm {
-
+class MipsTargetMachine;
   class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
-    const MCSection *SmallDataSection;
-    const MCSection *SmallBSSSection;
-    const TargetMachine *TM;
+    MCSection *SmallDataSection;
+    MCSection *SmallBSSSection;
+    const MipsTargetMachine *TM;
   public:
 
     void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
@@ -31,16 +31,16 @@ namespace llvm {
     bool IsGlobalInSmallSectionImpl(const GlobalValue *GV,
                                     const TargetMachine &TM) const;
 
-    const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
-                                        SectionKind Kind, Mangler &Mang,
-                                        const TargetMachine &TM) const override;
+    MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                                      Mangler &Mang,
+                                      const TargetMachine &TM) const override;
 
     /// Return true if this constant should be placed into small data section.
     bool IsConstantInSmallSection(const Constant *CN,
                                   const TargetMachine &TM) const;
 
-    const MCSection *getSectionForConstant(SectionKind Kind,
-                                           const Constant *C) const override;
+    MCSection *getSectionForConstant(SectionKind Kind,
+                                     const Constant *C) const override;
   };
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
index 6dcd15a..22b0c6c 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
@@ -11,6 +11,7 @@
 #define LLVM_LIB_TARGET_MIPS_MIPSTARGETSTREAMER_H
 
 #include "MCTargetDesc/MipsABIFlagsSection.h"
+#include "MCTargetDesc/MipsABIInfo.h"
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
@@ -34,6 +35,7 @@ public:
   virtual void emitDirectiveSetMsa();
   virtual void emitDirectiveSetNoMsa();
   virtual void emitDirectiveSetAt();
+  virtual void emitDirectiveSetAtWithArg(unsigned RegNo);
   virtual void emitDirectiveSetNoAt();
   virtual void emitDirectiveEnd(StringRef Name);
 
@@ -43,6 +45,7 @@ public:
   virtual void emitDirectiveNaNLegacy();
   virtual void emitDirectiveOptionPic0();
   virtual void emitDirectiveOptionPic2();
+  virtual void emitDirectiveInsn();
   virtual void emitFrame(unsigned StackReg, unsigned StackSize,
                          unsigned ReturnReg);
   virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff);
@@ -57,9 +60,13 @@ public:
   virtual void emitDirectiveSetMips5();
   virtual void emitDirectiveSetMips32();
   virtual void emitDirectiveSetMips32R2();
+  virtual void emitDirectiveSetMips32R3();
+  virtual void emitDirectiveSetMips32R5();
   virtual void emitDirectiveSetMips32R6();
   virtual void emitDirectiveSetMips64();
   virtual void emitDirectiveSetMips64R2();
+  virtual void emitDirectiveSetMips64R3();
+  virtual void emitDirectiveSetMips64R5();
   virtual void emitDirectiveSetMips64R6();
   virtual void emitDirectiveSetDsp();
   virtual void emitDirectiveSetNoDsp();
@@ -86,21 +93,27 @@ public:
   }
 
   virtual void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI);
-  virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value){};
-  virtual void emitMipsAbiFlags(){};
+  virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value);
   void forbidModuleDirective() { ModuleDirectiveAllowed = false; }
+  void reallowModuleDirective() { ModuleDirectiveAllowed = true; }
   bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; }
 
   // This method enables template classes to set internal abi flags
   // structure values.
   template <class PredicateLibrary>
   void updateABIInfo(const PredicateLibrary &P) {
+    ABI = &P.getABI();
     ABIFlagsSection.setAllFromPredicates(P);
   }
 
   MipsABIFlagsSection &getABIFlagsSection() { return ABIFlagsSection; }
+  const MipsABIInfo &getABI() const {
+    assert(ABI && "ABI hasn't been set!");
+    return *ABI;
+  }
 
 protected:
+  const MipsABIInfo *ABI;
   MipsABIFlagsSection ABIFlagsSection;
 
   bool GPRInfoSet;
@@ -138,6 +151,7 @@ public:
   void emitDirectiveSetMsa() override;
   void emitDirectiveSetNoMsa() override;
   void emitDirectiveSetAt() override;
+  void emitDirectiveSetAtWithArg(unsigned RegNo) override;
   void emitDirectiveSetNoAt() override;
   void emitDirectiveEnd(StringRef Name) override;
 
@@ -147,6 +161,7 @@ public:
   void emitDirectiveNaNLegacy() override;
   void emitDirectiveOptionPic0() override;
   void emitDirectiveOptionPic2() override;
+  void emitDirectiveInsn() override;
   void emitFrame(unsigned StackReg, unsigned StackSize,
                  unsigned ReturnReg) override;
   void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
@@ -161,9 +176,13 @@ public:
   void emitDirectiveSetMips5() override;
   void emitDirectiveSetMips32() override;
   void emitDirectiveSetMips32R2() override;
+  void emitDirectiveSetMips32R3() override;
+  void emitDirectiveSetMips32R5() override;
   void emitDirectiveSetMips32R6() override;
   void emitDirectiveSetMips64() override;
   void emitDirectiveSetMips64R2() override;
+  void emitDirectiveSetMips64R3() override;
+  void emitDirectiveSetMips64R5() override;
   void emitDirectiveSetMips64R6() override;
   void emitDirectiveSetDsp() override;
   void emitDirectiveSetNoDsp() override;
@@ -180,7 +199,6 @@ public:
                              bool Is32BitABI) override;
   void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
   void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override;
-  void emitMipsAbiFlags() override;
 };
 
 // This part is for ELF object output
@@ -211,6 +229,7 @@ public:
   void emitDirectiveNaNLegacy() override;
   void emitDirectiveOptionPic0() override;
   void emitDirectiveOptionPic2() override;
+  void emitDirectiveInsn() override;
   void emitFrame(unsigned StackReg, unsigned StackSize,
                  unsigned ReturnReg) override;
   void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
@@ -223,12 +242,7 @@ public:
 
   // ABI Flags
   void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
-  void emitMipsAbiFlags() override;
-
-protected:
-  bool isO32() const { return STI.getFeatureBits() & Mips::FeatureO32; }
-  bool isN32() const { return STI.getFeatureBits() & Mips::FeatureN32; }
-  bool isN64() const { return STI.getFeatureBits() & Mips::FeatureN64; }
+  void emitMipsAbiFlags();
 };
 }
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index 80b2f62..ac92df9 100644
--- a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -28,13 +28,9 @@ using namespace llvm;
 
 #include "NVPTXGenAsmWriter.inc"
 
-
 NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                                   const MCRegisterInfo &MRI,
-                                   const MCSubtargetInfo &STI)
-  : MCInstPrinter(MAI, MII, MRI) {
-  setAvailableFeatures(STI.getFeatureBits());
-}
+                                   const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
 
 void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
   // Decode the virtual register
@@ -72,7 +68,7 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void NVPTXInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                 StringRef Annot) {
+                                 StringRef Annot, const MCSubtargetInfo &STI) {
   printInstruction(MI, OS);
 
   // Next always print the annotation.
diff --git a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 0496964..02c5a21 100644
--- a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -25,10 +25,11 @@ class MCSubtargetInfo;
 class NVPTXInstPrinter : public MCInstPrinter {
 public:
   NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                   const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+                   const MCRegisterInfo &MRI);
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 11d737e..b9df3d1 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -39,6 +39,8 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) {
   InlineAsmEnd = " inline asm";
 
   SupportsDebugInformation = CompileForDebugging;
+  // PTX does not allow .align on functions.
+  HasFunctionAlignment = false;
   HasDotTypeDotSizeDirective = false;
 
   Data8bitsDirective = " .b8 ";
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 158ca90..d500105 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -54,52 +54,39 @@ createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
 static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
     StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
-  X->InitMCCodeGenInfo(RM, CM, OL);
+  X->initMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
-static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T,
+static MCInstPrinter *createNVPTXMCInstPrinter(const Triple &T,
                                                unsigned SyntaxVariant,
                                                const MCAsmInfo &MAI,
                                                const MCInstrInfo &MII,
-                                               const MCRegisterInfo &MRI,
-                                               const MCSubtargetInfo &STI) {
+                                               const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
-    return new NVPTXInstPrinter(MAI, MII, MRI, STI);
+    return new NVPTXInstPrinter(MAI, MII, MRI);
   return nullptr;
 }
 
 // Force static initialization.
 extern "C" void LLVMInitializeNVPTXTargetMC() {
-  // Register the MC asm info.
-  RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32);
-  RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64);
-
-  // Register the MC codegen info.
-  TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32,
-                                        createNVPTXMCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64,
-                                        createNVPTXMCCodeGenInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32,
-                                    createNVPTXMCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64,
-                                    createNVPTXMCRegisterInfo);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32,
-                                          createNVPTXMCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64,
-                                          createNVPTXMCSubtargetInfo);
-
-  // Register the MCInstPrinter.
-  TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget32,
-                                        createNVPTXMCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget64,
-                                        createNVPTXMCInstPrinter);
+  for (Target *T : {&TheNVPTXTarget32, &TheNVPTXTarget64}) {
+    // Register the MC asm info.
+    RegisterMCAsmInfo<NVPTXMCAsmInfo> X(*T);
+
+    // Register the MC codegen info.
+    TargetRegistry::RegisterMCCodeGenInfo(*T, createNVPTXMCCodeGenInfo);
+
+    // Register the MC instruction info.
+    TargetRegistry::RegisterMCInstrInfo(*T, createNVPTXMCInstrInfo);
+
+    // Register the MC register info.
+    TargetRegistry::RegisterMCRegInfo(*T, createNVPTXMCRegisterInfo);
+
+    // Register the MC subtarget info.
+    TargetRegistry::RegisterMCSubtargetInfo(*T, createNVPTXMCSubtargetInfo);
+
+    // Register the MCInstPrinter.
+    TargetRegistry::RegisterMCInstPrinter(*T, createNVPTXMCInstPrinter);
+  }
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index 98821d2..bfd5123 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
 #define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
 
+#include <stdint.h>
+
 namespace llvm {
 class Target;
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
index a3382eb..382525d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -59,7 +59,6 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
   llvm_unreachable("Unknown condition code");
 }
 
-ImmutablePass *createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM);
 FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
                                  llvm::CodeGenOpt::Level OptLevel);
 ModulePass *createNVPTXAssignValidGlobalNamesPass();
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.td b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
index 93fabf6..96abfa8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
@@ -32,20 +32,28 @@ def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21",
                             "Target SM 2.1">;
 def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30",
                             "Target SM 3.0">;
+def SM32 : SubtargetFeature<"sm_32", "SmVersion", "32",
+                            "Target SM 3.2">;
 def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35",
                             "Target SM 3.5">;
+def SM37 : SubtargetFeature<"sm_37", "SmVersion", "37",
+                            "Target SM 3.7">;
 def SM50 : SubtargetFeature<"sm_50", "SmVersion", "50",
                             "Target SM 5.0">;
+def SM52 : SubtargetFeature<"sm_52", "SmVersion", "52",
+                            "Target SM 5.2">;
+def SM53 : SubtargetFeature<"sm_53", "SmVersion", "53",
+                            "Target SM 5.3">;
 
 // PTX Versions
-def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30",
-                             "Use PTX version 3.0">;
-def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31",
-                             "Use PTX version 3.1">;
 def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
                              "Use PTX version 3.2">;
 def PTX40 : SubtargetFeature<"ptx40", "PTXVersion", "40",
                              "Use PTX version 4.0">;
+def PTX41 : SubtargetFeature<"ptx41", "PTXVersion", "41",
+                             "Use PTX version 4.1">;
+def PTX42 : SubtargetFeature<"ptx42", "PTXVersion", "42",
+                             "Use PTX version 4.2">;
 
 //===----------------------------------------------------------------------===//
 // NVPTX supported processors.
@@ -57,8 +65,12 @@ class Proc<string Name, list<SubtargetFeature> Features>
 def : Proc<"sm_20", [SM20]>;
 def : Proc<"sm_21", [SM21]>;
 def : Proc<"sm_30", [SM30]>;
+def : Proc<"sm_32", [SM32, PTX40]>;
 def : Proc<"sm_35", [SM35]>;
-def : Proc<"sm_50", [SM50]>;
+def : Proc<"sm_37", [SM37, PTX41]>;
+def : Proc<"sm_50", [SM50, PTX40]>;
+def : Proc<"sm_52", [SM52, PTX41]>;
+def : Proc<"sm_53", [SM53, PTX42]>;
 
 
 def NVPTXInstrInfo : InstrInfo {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 1f37696..4f3ccf4 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -12,11 +12,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXAllocaHoisting.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
+using namespace llvm;
 
-namespace llvm {
+namespace {
+// Hoisting the alloca instructions in the non-entry blocks to the entry
+// block.
+class NVPTXAllocaHoisting : public FunctionPass {
+public:
+  static char ID; // Pass ID
+  NVPTXAllocaHoisting() : FunctionPass(ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved<MachineFunctionAnalysis>();
+    AU.addPreserved<StackProtector>();
+  }
+
+  const char *getPassName() const override {
+    return "NVPTX specific alloca hoisting";
+  }
+
+  bool runOnFunction(Function &function) override;
+};
+} // namespace
 
 bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
   bool functionModified = false;
@@ -36,11 +58,15 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
   return functionModified;
 }
 
-char NVPTXAllocaHoisting::ID = 1;
-static RegisterPass<NVPTXAllocaHoisting>
-X("alloca-hoisting", "Hoisting alloca instructions in non-entry "
-                     "blocks to the entry block");
+char NVPTXAllocaHoisting::ID = 0;
+
+namespace llvm {
+void initializeNVPTXAllocaHoistingPass(PassRegistry &);
+}
 
-FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); }
+INITIALIZE_PASS(
+    NVPTXAllocaHoisting, "alloca-hoisting",
+    "Hoisting alloca instructions in non-entry blocks to the entry block",
+    false, false)
 
-} // end namespace llvm
+FunctionPass *llvm::createAllocaHoisting() { return new NVPTXAllocaHoisting; }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index c343980..7a6fc7d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -14,38 +14,10 @@
 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
 #define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
 
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Pass.h"
-
 namespace llvm {
-
 class FunctionPass;
-class Function;
-
-// Hoisting the alloca instructions in the non-entry blocks to the entry
-// block.
-class NVPTXAllocaHoisting : public FunctionPass {
-public:
-  static char ID; // Pass ID
-  NVPTXAllocaHoisting() : FunctionPass(ID) {}
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<DataLayoutPass>();
-    AU.addPreserved<MachineFunctionAnalysis>();
-    AU.addPreserved<StackProtector>();
-  }
-
-  const char *getPassName() const override {
-    return "NVPTX specific alloca hoisting";
-  }
-
-  bool runOnFunction(Function &function) override;
-};
 
 extern FunctionPass *createAllocaHoisting();
-
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index beec9b2..3bbea40 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/DebugInfo.h"
@@ -36,6 +37,7 @@
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CommandLine.h"
@@ -45,6 +47,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TimeValue.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
 #include <sstream>
 using namespace llvm;
 
@@ -116,7 +119,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
 
   DebugLoc curLoc = MI.getDebugLoc();
 
-  if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
+  if (!prevDebugLoc && !curLoc)
     return;
 
   if (prevDebugLoc == curLoc)
@@ -124,50 +127,43 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
 
   prevDebugLoc = curLoc;
 
-  if (curLoc.isUnknown())
+  if (!curLoc)
     return;
 
-  const MachineFunction *MF = MI.getParent()->getParent();
-  //const TargetMachine &TM = MF->getTarget();
-
-  const LLVMContext &ctx = MF->getFunction()->getContext();
-  DIScope Scope(curLoc.getScope(ctx));
-
-  assert((!Scope || Scope.isScope()) &&
-    "Scope of a DebugLoc should be null or a DIScope.");
+  auto *Scope = cast_or_null<DIScope>(curLoc.getScope());
   if (!Scope)
      return;
 
-  StringRef fileName(Scope.getFilename());
-  StringRef dirName(Scope.getDirectory());
+  StringRef fileName(Scope->getFilename());
+  StringRef dirName(Scope->getDirectory());
   SmallString<128> FullPathName = dirName;
   if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
     sys::path::append(FullPathName, fileName);
-    fileName = FullPathName.str();
+    fileName = FullPathName;
   }
 
-  if (filenameMap.find(fileName.str()) == filenameMap.end())
+  if (filenameMap.find(fileName) == filenameMap.end())
     return;
 
   // Emit the line from the source file.
   if (InterleaveSrc)
-    this->emitSrcInText(fileName.str(), curLoc.getLine());
+    this->emitSrcInText(fileName, curLoc.getLine());
 
   std::stringstream temp;
-  temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
+  temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine()
        << " " << curLoc.getCol();
-  OutStreamer.EmitRawText(Twine(temp.str().c_str()));
+  OutStreamer->EmitRawText(temp.str());
 }
 
 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   SmallString<128> Str;
   raw_svector_ostream OS(Str);
-  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
+  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA)
     emitLineNumberAsDotLoc(*MI);
 
   MCInst Inst;
   lowerToMCInst(MI, Inst);
-  EmitToStreamer(OutStreamer, Inst);
+  EmitToStreamer(*OutStreamer, Inst);
 }
 
 // Handle symbol backtracking for targets that do not support image handles
@@ -229,19 +225,17 @@ void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
   const char *Sym = MFI->getImageHandleSymbol(Index);
   std::string *SymNamePtr =
     nvTM.getManagedStrPool()->getManagedString(Sym);
-  MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol(
+  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(
     StringRef(SymNamePtr->c_str())));
 }
 
 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
   OutMI.setOpcode(MI->getOpcode());
-  const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>();
-
   // Special: Do not mangle symbol operand of CALL_PROTOTYPE
   if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
     const MachineOperand &MO = MI->getOperand(0);
     OutMI.addOperand(GetSymbolRef(
-      OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));
+      OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
     return;
   }
 
@@ -249,7 +243,7 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
     const MachineOperand &MO = MI->getOperand(i);
 
     MCOperand MCOp;
-    if (!ST.hasImageHandles()) {
+    if (!nvptxSubtarget->hasImageHandles()) {
       if (lowerImageHandleOperand(MI, i, MCOp)) {
         OutMI.addOperand(MCOp);
         continue;
@@ -266,13 +260,13 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
   switch (MO.getType()) {
   default: llvm_unreachable("unknown operand type");
   case MachineOperand::MO_Register:
-    MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg()));
+    MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
     break;
   case MachineOperand::MO_Immediate:
-    MCOp = MCOperand::CreateImm(MO.getImm());
+    MCOp = MCOperand::createImm(MO.getImm());
     break;
   case MachineOperand::MO_MachineBasicBlock:
-    MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+    MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
         MO.getMBB()->getSymbol(), OutContext));
     break;
   case MachineOperand::MO_ExternalSymbol:
@@ -288,11 +282,11 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
     switch (Cnt->getType()->getTypeID()) {
     default: report_fatal_error("Unsupported FP type"); break;
     case Type::FloatTyID:
-      MCOp = MCOperand::CreateExpr(
+      MCOp = MCOperand::createExpr(
         NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext));
       break;
     case Type::DoubleTyID:
-      MCOp = MCOperand::CreateExpr(
+      MCOp = MCOperand::createExpr(
         NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext));
       break;
     }
@@ -342,16 +336,16 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
   const MCExpr *Expr;
   Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
                                  OutContext);
-  return MCOperand::CreateExpr(Expr);
+  return MCOperand::createExpr(Expr);
 }
 
 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
-  const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
-  const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+  const DataLayout *TD = TM.getDataLayout();
+  const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
 
   Type *Ty = F->getReturnType();
 
-  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+  bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
 
   if (Ty->getTypeID() == Type::VoidTyID)
     return;
@@ -418,6 +412,42 @@ void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
   printReturnValStr(F, O);
 }
 
+// Return true if MBB is the header of a loop marked with
+// llvm.loop.unroll.disable.
+// TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
+bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
+    const MachineBasicBlock &MBB) const {
+  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+  // TODO: isLoopHeader() should take "const MachineBasicBlock *".
+  // We insert .pragma "nounroll" only to the loop header.
+  if (!LI.isLoopHeader(const_cast<MachineBasicBlock *>(&MBB)))
+    return false;
+
+  // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
+  // we iterate through each back edge of the loop with header MBB, and check
+  // whether its metadata contains llvm.loop.unroll.disable.
+  for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
+    const MachineBasicBlock *PMBB = *I;
+    if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
+      // Edges from other loops to MBB are not back edges.
+      continue;
+    }
+    if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
+      if (MDNode *LoopID = PBB->getTerminator()->getMetadata("llvm.loop")) {
+        if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
+  AsmPrinter::EmitBasicBlockStart(MBB);
+  if (isLoopHeaderOfNoUnroll(MBB))
+    OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
+}
+
 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
   SmallString<128> Str;
   raw_svector_ostream O(Str);
@@ -445,39 +475,37 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
   if (llvm::isKernelFunction(*F))
     emitKernelFunctionDirectives(*F, O);
 
-  OutStreamer.EmitRawText(O.str());
+  OutStreamer->EmitRawText(O.str());
 
   prevDebugLoc = DebugLoc();
 }
 
 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
   VRegMapping.clear();
-  OutStreamer.EmitRawText(StringRef("{\n"));
+  OutStreamer->EmitRawText(StringRef("{\n"));
   setAndEmitFunctionVirtualRegisters(*MF);
 
   SmallString<128> Str;
   raw_svector_ostream O(Str);
   emitDemotedVars(MF->getFunction(), O);
-  OutStreamer.EmitRawText(O.str());
+  OutStreamer->EmitRawText(O.str());
 }
 
 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
-  OutStreamer.EmitRawText(StringRef("}\n"));
+  OutStreamer->EmitRawText(StringRef("}\n"));
   VRegMapping.clear();
 }
 
 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
   unsigned RegNo = MI->getOperand(0).getReg();
-  const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
-  if (TRI->isVirtualRegister(RegNo)) {
-    OutStreamer.AddComment(Twine("implicit-def: ") +
-                           getVirtualRegisterName(RegNo));
+  if (TargetRegisterInfo::isVirtualRegister(RegNo)) {
+    OutStreamer->AddComment(Twine("implicit-def: ") +
+                            getVirtualRegisterName(RegNo));
   } else {
-    OutStreamer.AddComment(
-        Twine("implicit-def: ") +
-        TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo));
+    OutStreamer->AddComment(Twine("implicit-def: ") +
+                            nvptxSubtarget->getRegisterInfo()->getName(RegNo));
   }
-  OutStreamer.AddBlankLine();
+  OutStreamer->AddBlankLine();
 }
 
 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
@@ -487,15 +515,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
   // If none of reqntid* is specified, don't output reqntid directive.
   unsigned reqntidx, reqntidy, reqntidz;
   bool specified = false;
-  if (llvm::getReqNTIDx(F, reqntidx) == false)
+  if (!llvm::getReqNTIDx(F, reqntidx))
     reqntidx = 1;
   else
     specified = true;
-  if (llvm::getReqNTIDy(F, reqntidy) == false)
+  if (!llvm::getReqNTIDy(F, reqntidy))
     reqntidy = 1;
   else
     specified = true;
-  if (llvm::getReqNTIDz(F, reqntidz) == false)
+  if (!llvm::getReqNTIDz(F, reqntidz))
     reqntidz = 1;
   else
     specified = true;
@@ -509,15 +537,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
   // If none of maxntid* is specified, don't output maxntid directive.
   unsigned maxntidx, maxntidy, maxntidz;
   specified = false;
-  if (llvm::getMaxNTIDx(F, maxntidx) == false)
+  if (!llvm::getMaxNTIDx(F, maxntidx))
     maxntidx = 1;
   else
     specified = true;
-  if (llvm::getMaxNTIDy(F, maxntidy) == false)
+  if (!llvm::getMaxNTIDy(F, maxntidy))
     maxntidy = 1;
   else
     specified = true;
-  if (llvm::getMaxNTIDz(F, maxntidz) == false)
+  if (!llvm::getMaxNTIDz(F, maxntidz))
     maxntidz = 1;
   else
     specified = true;
@@ -607,7 +635,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
     return false;
 
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
-    if (GV->getName().str() == "llvm.used")
+    if (GV->getName() == "llvm.used")
       return false;
     return true;
   }
@@ -622,7 +650,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
 
 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
-    if (othergv->getName().str() == "llvm.used")
+    if (othergv->getName() == "llvm.used")
       return true;
   }
 
@@ -638,7 +666,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
   }
 
   for (const User *UU : U->users())
-    if (usedInOneFunc(UU, oneFunc) == false)
+    if (!usedInOneFunc(UU, oneFunc))
       return false;
 
   return true;
@@ -652,7 +680,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
  * 3. Is the global variable referenced only in one function?
  */
 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
-  if (gv->hasInternalLinkage() == false)
+  if (!gv->hasInternalLinkage())
     return false;
   const PointerType *Pty = gv->getType();
   if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
@@ -661,7 +689,7 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
   const Function *oneFunc = nullptr;
 
   bool flag = usedInOneFunc(gv, oneFunc);
-  if (flag == false)
+  if (!flag)
     return false;
   if (!oneFunc)
     return false;
@@ -746,37 +774,45 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
   DbgFinder.processModule(M);
 
   unsigned i = 1;
-  for (DICompileUnit DIUnit : DbgFinder.compile_units()) {
-    StringRef Filename(DIUnit.getFilename());
-    StringRef Dirname(DIUnit.getDirectory());
+  for (const DICompileUnit *DIUnit : DbgFinder.compile_units()) {
+    StringRef Filename = DIUnit->getFilename();
+    StringRef Dirname = DIUnit->getDirectory();
     SmallString<128> FullPathName = Dirname;
     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
       sys::path::append(FullPathName, Filename);
-      Filename = FullPathName.str();
+      Filename = FullPathName;
     }
-    if (filenameMap.find(Filename.str()) != filenameMap.end())
+    if (filenameMap.find(Filename) != filenameMap.end())
       continue;
-    filenameMap[Filename.str()] = i;
-    OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
+    filenameMap[Filename] = i;
+    OutStreamer->EmitDwarfFileDirective(i, "", Filename);
     ++i;
   }
 
-  for (DISubprogram SP : DbgFinder.subprograms()) {
-    StringRef Filename(SP.getFilename());
-    StringRef Dirname(SP.getDirectory());
+  for (DISubprogram *SP : DbgFinder.subprograms()) {
+    StringRef Filename = SP->getFilename();
+    StringRef Dirname = SP->getDirectory();
     SmallString<128> FullPathName = Dirname;
     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
       sys::path::append(FullPathName, Filename);
-      Filename = FullPathName.str();
+      Filename = FullPathName;
     }
-    if (filenameMap.find(Filename.str()) != filenameMap.end())
+    if (filenameMap.find(Filename) != filenameMap.end())
       continue;
-    filenameMap[Filename.str()] = i;
+    filenameMap[Filename] = i;
     ++i;
   }
 }
 
 bool NVPTXAsmPrinter::doInitialization(Module &M) {
+  // Construct a default subtarget off of the TargetMachine defaults. The
+  // rest of NVPTX isn't friendly to change subtargets per function and
+  // so the default TargetMachine will have all of the options.
+  StringRef TT = TM.getTargetTriple();
+  StringRef CPU = TM.getTargetCPU();
+  StringRef FS = TM.getTargetFeatureString();
+  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
+  const NVPTXSubtarget STI(TT, CPU, FS, NTM);
 
   SmallString<128> Str1;
   raw_svector_ostream OS1(Str1);
@@ -791,26 +827,27 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
       .Initialize(OutContext, TM);
 
-  Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout());
+  Mang = new Mangler(TM.getDataLayout());
 
   // Emit header before any dwarf directives are emitted below.
-  emitHeader(M, OS1);
-  OutStreamer.EmitRawText(OS1.str());
+  emitHeader(M, OS1, STI);
+  OutStreamer->EmitRawText(OS1.str());
 
   // Already commented out
   //bool Result = AsmPrinter::doInitialization(M);
 
   // Emit module-level inline asm if it exists.
   if (!M.getModuleInlineAsm().empty()) {
-    OutStreamer.AddComment("Start of file scope inline assembly");
-    OutStreamer.AddBlankLine();
-    OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm()));
-    OutStreamer.AddBlankLine();
-    OutStreamer.AddComment("End of file scope inline assembly");
-    OutStreamer.AddBlankLine();
+    OutStreamer->AddComment("Start of file scope inline assembly");
+    OutStreamer->AddBlankLine();
+    OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm()));
+    OutStreamer->AddBlankLine();
+    OutStreamer->AddComment("End of file scope inline assembly");
+    OutStreamer->AddBlankLine();
   }
 
-  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
+  // If we're not NVCL we're CUDA, go ahead and emit filenames.
+  if (Triple(TM.getTargetTriple()).getOS() != Triple::NVCL)
     recordAndEmitFilenames(M);
 
   GlobalsEmitted = false;
@@ -848,25 +885,27 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) {
 
   OS2 << '\n';
 
-  OutStreamer.EmitRawText(OS2.str());
+  OutStreamer->EmitRawText(OS2.str());
 }
 
-void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
+                                 const NVPTXSubtarget &STI) {
   O << "//\n";
   O << "// Generated by LLVM NVPTX Back-End\n";
   O << "//\n";
   O << "\n";
 
-  unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
+  unsigned PTXVersion = STI.getPTXVersion();
   O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
 
   O << ".target ";
-  O << nvptxSubtarget.getTargetName();
+  O << STI.getTargetName();
 
-  if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
+  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
+  if (NTM.getDrvInterface() == NVPTX::NVCL)
     O << ", texmode_independent";
-  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
-    if (!nvptxSubtarget.hasDouble())
+  else {
+    if (!STI.hasDouble())
       O << ", map_f64_to_f32";
   }
 
@@ -876,7 +915,7 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
   O << "\n";
 
   O << ".address_size ";
-  if (nvptxSubtarget.is64Bit())
+  if (NTM.is64Bit())
     O << "64";
   else
     O << "32";
@@ -886,7 +925,6 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
 }
 
 bool NVPTXAsmPrinter::doFinalization(Module &M) {
-
   // If we did not emit any functions, then the global declarations have not
   // yet been emitted.
   if (!GlobalsEmitted) {
@@ -948,7 +986,7 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
 
 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
                                            raw_ostream &O) {
-  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
+  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
     if (V->hasExternalLinkage()) {
       if (isa<GlobalVariable>(V)) {
         const GlobalVariable *GVar = cast<GlobalVariable>(V);
@@ -967,7 +1005,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
       msg.append("Error: ");
       msg.append("Symbol ");
       if (V->hasName())
-        msg.append(V->getName().str());
+        msg.append(V->getName());
       msg.append("has unsupported appending linkage type");
       llvm_unreachable(msg.c_str());
     } else if (!V->hasInternalLinkage() &&
@@ -992,7 +1030,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
       GVar->getName().startswith("nvvm."))
     return;
 
-  const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *TD = TM.getDataLayout();
 
   // GlobalVariables are always constant pointers themselves.
   const PointerType *PTy = GVar->getType();
@@ -1103,7 +1141,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
 
   const Function *demotedFunc = nullptr;
   if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
-    O << "// " << GVar->getName().str() << " has been demoted\n";
+    O << "// " << GVar->getName() << " has been demoted\n";
     if (localDecls.find(demotedFunc) != localDecls.end())
       localDecls[demotedFunc].push_back(GVar);
     else {
@@ -1142,7 +1180,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
       if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) {
         const Constant *Initializer = GVar->getInitializer();
-        // 'undef' is treated as there is no value spefied.
+        // 'undef' is treated as there is no value specified.
         if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
           O << " = ";
           printScalarConstant(Initializer, O);
@@ -1151,9 +1189,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
         // The frontend adds zero-initializer to variables that don't have an
         // initial value, so skip warning for this case.
         if (!GVar->getInitializer()->isNullValue()) {
-          std::string warnMsg = "initial value of '" + GVar->getName().str() +
-              "' is not allowed in addrspace(" +
-              llvm::utostr_32(PTy->getAddressSpace()) + ")";
+          std::string warnMsg =
+              ("initial value of '" + GVar->getName() +
+               "' is not allowed in addrspace(" +
+               Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str();
           report_fatal_error(warnMsg.c_str());
         }
       }
@@ -1180,7 +1219,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
           AggBuffer aggBuffer(ElementSize, O, *this);
           bufferAggregateConstant(Initializer, &aggBuffer);
           if (aggBuffer.numSymbols) {
-            if (nvptxSubtarget.is64Bit()) {
+            if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
               O << " .u64 " << *getSymbol(GVar) << "[";
               O << ElementSize / 8;
             } else {
@@ -1278,7 +1317,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
   case Type::DoubleTyID:
     return "f64";
   case Type::PointerTyID:
-    if (nvptxSubtarget.is64Bit())
+    if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
       if (useB4PTR)
         return "b64";
       else
@@ -1295,7 +1334,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
                                             raw_ostream &O) {
 
-  const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *TD = TM.getDataLayout();
 
   // GlobalVariables are always constant pointers themselves.
   const PointerType *PTy = GVar->getType();
@@ -1369,50 +1408,22 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
 
 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
                                      int paramIndex, raw_ostream &O) {
-  if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
-      (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
-    O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
-  else {
-    std::string argName = I->getName();
-    const char *p = argName.c_str();
-    while (*p) {
-      if (*p == '.')
-        O << "_";
-      else
-        O << *p;
-      p++;
-    }
-  }
+  O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
 }
 
 void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
-  Function::const_arg_iterator I, E;
-  int i = 0;
-
-  if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
-      (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
-    O << *CurrentFnSym << "_param_" << paramIndex;
-    return;
-  }
-
-  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
-    if (i == paramIndex) {
-      printParamName(I, paramIndex, O);
-      return;
-    }
-  }
-  llvm_unreachable("paramIndex out of bound");
+  O << *CurrentFnSym << "_param_" << paramIndex;
 }
 
 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
-  const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *TD = TM.getDataLayout();
   const AttributeSet &PAL = F->getAttributes();
-  const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+  const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
   Function::const_arg_iterator I, E;
   unsigned paramIndex = 0;
   bool first = true;
   bool isKernelFunc = llvm::isKernelFunction(*F);
-  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+  bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
   MVT thePointerTy = TLI->getPointerTy();
 
   O << "(\n";
@@ -1431,21 +1442,21 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
         if (isImage(*I)) {
           std::string sname = I->getName();
           if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
-            if (nvptxSubtarget.hasImageHandles())
+            if (nvptxSubtarget->hasImageHandles())
               O << "\t.param .u64 .ptr .surfref ";
             else
               O << "\t.param .surfref ";
             O << *CurrentFnSym << "_param_" << paramIndex;
           }
           else { // Default image is read_only
-            if (nvptxSubtarget.hasImageHandles())
+            if (nvptxSubtarget->hasImageHandles())
               O << "\t.param .u64 .ptr .texref ";
             else
               O << "\t.param .texref ";
             O << *CurrentFnSym << "_param_" << paramIndex;
           }
         } else {
-          if (nvptxSubtarget.hasImageHandles())
+          if (nvptxSubtarget->hasImageHandles())
             O << "\t.param .u64 .ptr .samplerref ";
           else
             O << "\t.param .samplerref ";
@@ -1455,7 +1466,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
       }
     }
 
-    if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
+    if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) {
       if (Ty->isAggregateType() || Ty->isVectorTy()) {
         // Just print .param .align <a> .b8 .param[size];
         // <a> = PAL.getparamalignment
@@ -1478,7 +1489,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
           // Special handling for pointer arguments to kernel
           O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
 
-          if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
+          if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
+              NVPTX::CUDA) {
             Type *ETy = PTy->getElementType();
             int addrSpace = PTy->getAddressSpace();
             switch (addrSpace) {
@@ -1607,7 +1619,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
   if (NumBytes) {
     O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
       << getFunctionNumber() << "[" << NumBytes << "];\n";
-    if (nvptxSubtarget.is64Bit()) {
+    if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
       O << "\t.reg .b64 \t%SP;\n";
       O << "\t.reg .b64 \t%SPL;\n";
     } else {
@@ -1655,7 +1667,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
     }
   }
 
-  OutStreamer.EmitRawText(O.str());
+  OutStreamer->EmitRawText(O.str());
 }
 
 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
@@ -1738,7 +1750,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
                                    AggBuffer *aggBuffer) {
 
-  const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *TD = TM.getDataLayout();
 
   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
     int s = TD->getTypeAllocSize(CPV->getType());
@@ -1754,12 +1766,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
   case Type::IntegerTyID: {
     const Type *ETy = CPV->getType();
     if (ETy == Type::getInt8Ty(CPV->getContext())) {
-      unsigned char c =
-          (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+      unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
       ptr = &c;
       aggBuffer->addBytes(ptr, 1, Bytes);
     } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
-      short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+      short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
       ptr = (unsigned char *)&int16;
       aggBuffer->addBytes(ptr, 2, Bytes);
     } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
@@ -1770,7 +1781,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
         break;
       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
-                ConstantFoldConstantExpression(Cexpr, TD))) {
+                ConstantFoldConstantExpression(Cexpr, *TD))) {
           int int32 = (int)(constInt->getZExtValue());
           ptr = (unsigned char *)&int32;
           aggBuffer->addBytes(ptr, 4, Bytes);
@@ -1778,7 +1789,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
         }
         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
-          aggBuffer->addSymbol(v);
+          aggBuffer->addSymbol(v, Cexpr->getOperand(0));
           aggBuffer->addZeros(4);
           break;
         }
@@ -1792,7 +1803,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
         break;
       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
-                ConstantFoldConstantExpression(Cexpr, TD))) {
+                ConstantFoldConstantExpression(Cexpr, *TD))) {
           long long int64 = (long long)(constInt->getZExtValue());
           ptr = (unsigned char *)&int64;
           aggBuffer->addBytes(ptr, 8, Bytes);
@@ -1800,7 +1811,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
         }
         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
-          aggBuffer->addSymbol(v);
+          aggBuffer->addSymbol(v, Cexpr->getOperand(0));
           aggBuffer->addZeros(8);
           break;
         }
@@ -1829,10 +1840,10 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
   }
   case Type::PointerTyID: {
     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
-      aggBuffer->addSymbol(GVar);
+      aggBuffer->addSymbol(GVar, GVar);
     } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
       const Value *v = Cexpr->stripPointerCasts();
-      aggBuffer->addSymbol(v);
+      aggBuffer->addSymbol(v, Cexpr);
     }
     unsigned int s = TD->getTypeAllocSize(CPV->getType());
     aggBuffer->addZeros(s);
@@ -1862,7 +1873,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
 
 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
                                               AggBuffer *aggBuffer) {
-  const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *TD = TM.getDataLayout();
   int Bytes;
 
   // Old constants
@@ -1973,6 +1984,212 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
   return false;
 }
 
+/// lowerConstantForGV - Return an MCExpr for the given Constant.  This is mostly
+/// a copy from AsmPrinter::lowerConstant, except customized to only handle
+/// expressions that are representable in PTX and create
+/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
+const MCExpr *
+NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
+  MCContext &Ctx = OutContext;
+
+  if (CV->isNullValue() || isa<UndefValue>(CV))
+    return MCConstantExpr::Create(0, Ctx);
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+    return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+    const MCSymbolRefExpr *Expr =
+      MCSymbolRefExpr::Create(getSymbol(GV), Ctx);
+    if (ProcessingGeneric) {
+      return NVPTXGenericMCSymbolRefExpr::Create(Expr, Ctx);
+    } else {
+      return Expr;
+    }
+  }
+
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+  if (!CE) {
+    llvm_unreachable("Unknown constant value to lower!");
+  }
+
+  switch (CE->getOpcode()) {
+  default:
+    // If the code isn't optimized, there may be outstanding folding
+    // opportunities. Attempt to fold the expression using DataLayout as a
+    // last resort before giving up.
+    if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout()))
+      if (C != CE)
+        return lowerConstantForGV(C, ProcessingGeneric);
+
+    // Otherwise report the problem to the user.
+    {
+      std::string S;
+      raw_string_ostream OS(S);
+      OS << "Unsupported expression in static initializer: ";
+      CE->printAsOperand(OS, /*PrintType=*/false,
+                     !MF ? nullptr : MF->getFunction()->getParent());
+      report_fatal_error(OS.str());
+    }
+
+  case Instruction::AddrSpaceCast: {
+    // Strip the addrspacecast and pass along the operand
+    PointerType *DstTy = cast<PointerType>(CE->getType());
+    if (DstTy->getAddressSpace() == 0) {
+      return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
+    }
+    std::string S;
+    raw_string_ostream OS(S);
+    OS << "Unsupported expression in static initializer: ";
+    CE->printAsOperand(OS, /*PrintType=*/ false,
+                       !MF ? 0 : MF->getFunction()->getParent());
+    report_fatal_error(OS.str());
+  }
+
+  case Instruction::GetElementPtr: {
+    const DataLayout &DL = *TM.getDataLayout();
+
+    // Generate a symbolic expression for the byte address
+    APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
+    cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
+
+    const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
+                                            ProcessingGeneric);
+    if (!OffsetAI)
+      return Base;
+
+    int64_t Offset = OffsetAI.getSExtValue();
+    return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+                                   Ctx);
+  }
+
+  case Instruction::Trunc:
+    // We emit the value and depend on the assembler to truncate the generated
+    // expression properly.  This is important for differences between
+    // blockaddress labels.  Since the two labels are in the same function, it
+    // is reasonable to treat their delta as a 32-bit value.
+    // FALL THROUGH.
+  case Instruction::BitCast:
+    return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
+
+  case Instruction::IntToPtr: {
+    const DataLayout &DL = *TM.getDataLayout();
+
+    // Handle casts to pointers by changing them into casts to the appropriate
+    // integer type.  This promotes constant folding and simplifies this code.
+    Constant *Op = CE->getOperand(0);
+    Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
+                                      false/*ZExt*/);
+    return lowerConstantForGV(Op, ProcessingGeneric);
+  }
+
+  case Instruction::PtrToInt: {
+    const DataLayout &DL = *TM.getDataLayout();
+
+    // Support only foldable casts to/from pointers that can be eliminated by
+    // changing the pointer to the appropriately sized integer type.
+    Constant *Op = CE->getOperand(0);
+    Type *Ty = CE->getType();
+
+    const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
+
+    // We can emit the pointer value into this slot if the slot is an
+    // integer slot equal to the size of the pointer.
+    if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
+      return OpExpr;
+
+    // Otherwise the pointer is smaller than the resultant integer, mask off
+    // the high bits so we are sure to get a proper truncation if the input is
+    // a constant expr.
+    unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
+    const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+    return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+  }
+
+  // The MC library also has a right-shift operator, but it isn't consistently
+  // signed or unsigned between different targets.
+  case Instruction::Add: {
+    const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
+    const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
+    switch (CE->getOpcode()) {
+    default: llvm_unreachable("Unknown binary operator constant cast expr");
+    case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+    }
+  }
+  }
+}
+
+// Copy of MCExpr::print customized for NVPTX
+void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
+  switch (Expr.getKind()) {
+  case MCExpr::Target:
+    return cast<MCTargetExpr>(&Expr)->PrintImpl(OS);
+  case MCExpr::Constant:
+    OS << cast<MCConstantExpr>(Expr).getValue();
+    return;
+
+  case MCExpr::SymbolRef: {
+    const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
+    const MCSymbol &Sym = SRE.getSymbol();
+    OS << Sym;
+    return;
+  }
+
+  case MCExpr::Unary: {
+    const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
+    switch (UE.getOpcode()) {
+    case MCUnaryExpr::LNot:  OS << '!'; break;
+    case MCUnaryExpr::Minus: OS << '-'; break;
+    case MCUnaryExpr::Not:   OS << '~'; break;
+    case MCUnaryExpr::Plus:  OS << '+'; break;
+    }
+    printMCExpr(*UE.getSubExpr(), OS);
+    return;
+  }
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
+
+    // Only print parens around the LHS if it is non-trivial.
+    if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
+        isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
+      printMCExpr(*BE.getLHS(), OS);
+    } else {
+      OS << '(';
+      printMCExpr(*BE.getLHS(), OS);
+      OS<< ')';
+    }
+
+    switch (BE.getOpcode()) {
+    case MCBinaryExpr::Add:
+      // Print "X-42" instead of "X+-42".
+      if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
+        if (RHSC->getValue() < 0) {
+          OS << RHSC->getValue();
+          return;
+        }
+      }
+
+      OS <<  '+';
+      break;
+    default: llvm_unreachable("Unhandled binary operator");
+    }
+
+    // Only print parens around the LHS if it is non-trivial.
+    if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
+      printMCExpr(*BE.getRHS(), OS);
+    } else {
+      OS << '(';
+      printMCExpr(*BE.getRHS(), OS);
+      OS << ')';
+    }
+    return;
+  }
+  }
+
+  llvm_unreachable("Invalid expression kind!");
+}
+
 /// PrintAsmOperand - Print out an operand for an inline asm expression.
 ///
 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -2067,16 +2284,9 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
   }
 }
 
-
-// Force static initialization.
-extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
-  RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
-  RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
-}
-
 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   std::stringstream temp;
-  LineReader *reader = this->getReader(filename.str());
+  LineReader *reader = this->getReader(filename);
   temp << "\n//";
   temp << filename.str();
   temp << ":";
@@ -2084,7 +2294,7 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   temp << " ";
   temp << reader->readLine(line);
   temp << "\n";
-  this->OutStreamer.EmitRawText(Twine(temp.str()));
+  this->OutStreamer->EmitRawText(temp.str());
 }
 
 LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index c11b579..301c686 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -40,6 +40,7 @@
 // (subclass of MCStreamer).
 
 namespace llvm {
+  class MCOperand;
 
 class LineReader {
 private:
@@ -86,14 +87,22 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
     std::vector<unsigned char> buffer; // the buffer
     SmallVector<unsigned, 4> symbolPosInBuffer;
     SmallVector<const Value *, 4> Symbols;
+    // SymbolsBeforeStripping[i] is the original form of Symbols[i] before
+    // stripping pointer casts, i.e.,
+    // Symbols[i] == SymbolsBeforeStripping[i]->stripPointerCasts().
+    //
+    // We need to keep these values because AggBuffer::print decides whether to
+    // emit a "generic()" cast for Symbols[i] depending on the address space of
+    // SymbolsBeforeStripping[i].
+    SmallVector<const Value *, 4> SymbolsBeforeStripping;
     unsigned curpos;
     raw_ostream &O;
     NVPTXAsmPrinter &AP;
     bool EmitGeneric;
 
   public:
-    AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
-        : size(_size), buffer(_size), O(_O), AP(_AP) {
+    AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP)
+        : size(size), buffer(size), O(O), AP(AP) {
       curpos = 0;
       numSymbols = 0;
       EmitGeneric = AP.EmitGeneric;
@@ -119,9 +128,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
       }
       return curpos;
     }
-    void addSymbol(const Value *GVar) {
+    void addSymbol(const Value *GVar, const Value *GVarBeforeStripping) {
       symbolPosInBuffer.push_back(curpos);
       Symbols.push_back(GVar);
+      SymbolsBeforeStripping.push_back(GVarBeforeStripping);
       numSymbols++;
     }
     void print() {
@@ -138,17 +148,18 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
         unsigned int nSym = 0;
         unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
         unsigned int nBytes = 4;
-        if (AP.nvptxSubtarget.is64Bit())
+        if (static_cast<const NVPTXTargetMachine &>(AP.TM).is64Bit())
           nBytes = 8;
         for (pos = 0; pos < size; pos += nBytes) {
           if (pos)
             O << ", ";
           if (pos == nextSymbolPos) {
             const Value *v = Symbols[nSym];
+            const Value *v0 = SymbolsBeforeStripping[nSym];
             if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
               MCSymbol *Name = AP.getSymbol(GVar);
-              PointerType *PTy = dyn_cast<PointerType>(GVar->getType());
-              bool IsNonGenericPointer = false;
+              PointerType *PTy = dyn_cast<PointerType>(v0->getType());
+              bool IsNonGenericPointer = false; // Is v0 a non-generic pointer?
               if (PTy && PTy->getAddressSpace() != 0) {
                 IsNonGenericPointer = true;
               }
@@ -159,8 +170,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
               } else {
                 O << *Name;
               }
-            } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
-              O << *AP.lowerConstant(Cexpr);
+            } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
+              const MCExpr *Expr =
+                AP.lowerConstantForGV(cast<Constant>(CExpr), false);
+              AP.printMCExpr(*Expr, O);
             } else
               llvm_unreachable("symbol type unknown");
             nSym++;
@@ -187,6 +200,7 @@ private:
   const Function *F;
   std::string CurrentFnName;
 
+  void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override;
   void EmitFunctionEntryLabel() override;
   void EmitFunctionBodyStart() override;
   void EmitFunctionBodyEnd() override;
@@ -211,7 +225,7 @@ private:
   void printParamName(Function::const_arg_iterator I, int paramIndex,
                       raw_ostream &O);
   void emitGlobals(const Module &M);
-  void emitHeader(Module &M, raw_ostream &O);
+  void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI);
   void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
   void emitVirtualRegister(unsigned int vr, raw_ostream &);
   void emitFunctionExternParamList(const MachineFunction &MF);
@@ -230,6 +244,10 @@ private:
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
                              unsigned AsmVariant, const char *ExtraCode,
                              raw_ostream &) override;
+
+  const MCExpr *lowerConstantForGV(const Constant *CV, bool ProcessingGeneric);
+  void printMCExpr(const MCExpr &Expr, raw_ostream &OS);
+
 protected:
   bool doInitialization(Module &M) override;
   bool doFinalization(Module &M) override;
@@ -247,8 +265,10 @@ private:
   typedef DenseMap<unsigned, unsigned> VRegMap;
   typedef DenseMap<const TargetRegisterClass *, VRegMap> VRegRCMap;
   VRegRCMap VRegMapping;
-  // cache the subtarget here.
-  const NVPTXSubtarget &nvptxSubtarget;
+
+  // Cache the subtarget here.
+  const NVPTXSubtarget *nvptxSubtarget;
+
   // Build the map between type name and ID based on module's type
   // symbol table.
   std::map<const Type *, std::string> TypeNameMap;
@@ -281,6 +301,8 @@ private:
                                MCOperand &MCOp);
   void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp);
 
+  bool isLoopHeaderOfNoUnroll(const MachineBasicBlock &MBB) const;
+
   LineReader *reader;
   LineReader *getReader(std::string);
 
@@ -298,12 +320,12 @@ private:
   bool EmitGeneric;
 
 public:
-  NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer),
-        nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+  NVPTXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+      : AsmPrinter(TM, std::move(Streamer)),
+        EmitGeneric(static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
+                    NVPTX::CUDA) {
     CurrentBankselLabelInBasicBlock = "";
     reader = nullptr;
-    EmitGeneric = (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA);
   }
 
   ~NVPTXAsmPrinter() {
@@ -311,6 +333,15 @@ public:
       delete reader;
   }
 
+  bool runOnMachineFunction(MachineFunction &F) override {
+    nvptxSubtarget = &F.getSubtarget<NVPTXSubtarget>();
+    return AsmPrinter::runOnMachineFunction(F);
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineLoopInfo>();
+    AsmPrinter::getAnalysisUsage(AU);
+  }
+
   bool ignoreLoc(const MachineInstr &);
 
   std::string getVirtualRegisterName(unsigned) const;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index 962b123..7d4be8e 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -19,8 +19,8 @@
 
 #include "NVPTX.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index f3a095d..ae63cae 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -123,19 +123,17 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
     // =>
     // %0 = gep X, indices
     // %1 = addrspacecast %0
-    GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0),
-                                                           Indices,
-                                                           GEP->getName(),
-                                                           GEPI);
+    GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(
+        GEP->getSourceElementType(), Cast->getOperand(0), Indices,
+        GEP->getName(), GEPI);
     NewGEPI->setIsInBounds(GEP->isInBounds());
     GEP->replaceAllUsesWith(
         new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI));
   } else {
     // GEP is a constant expression.
     Constant *NewGEPCE = ConstantExpr::getGetElementPtr(
-        cast<Constant>(Cast->getOperand(0)),
-        Indices,
-        GEP->isInBounds());
+        GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
+        Indices, GEP->isInBounds());
     GEP->replaceAllUsesWith(
         ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType()));
   }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 314df38..5503494 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -26,15 +26,15 @@
 
 using namespace llvm;
 
-NVPTXFrameLowering::NVPTXFrameLowering(NVPTXSubtarget &STI)
-    : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
-      is64bit(STI.is64Bit()) {}
+NVPTXFrameLowering::NVPTXFrameLowering()
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0) {}
 
 bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
 
-void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
+void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
+                                      MachineBasicBlock &MBB) const {
   if (MF.getFrameInfo()->hasStackObjects()) {
-    MachineBasicBlock &MBB = MF.front();
+    assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
     // Insert "mov.u32 %SP, %Depot"
     MachineBasicBlock::iterator MBBI = MBB.begin();
     // This instruction really occurs before first instruction
@@ -45,7 +45,7 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
 
     // mov %SPL, %depot;
     // cvta.local %SP, %SPL;
-    if (is64bit) {
+    if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
       unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass);
       MachineInstr *MI =
           BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get(
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
index 0846b78..14f8bb7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -19,18 +19,16 @@
 namespace llvm {
 class NVPTXSubtarget;
 class NVPTXFrameLowering : public TargetFrameLowering {
-  bool is64bit;
-
 public:
-  explicit NVPTXFrameLowering(NVPTXSubtarget &STI);
+  explicit NVPTXFrameLowering();
 
   bool hasFP(const MachineFunction &MF) const override;
-  void emitPrologue(MachineFunction &MF) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                  MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I) const override;
+  void
+  eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I) const override;
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 5f06d9a..6fd09c4 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -22,10 +22,10 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/ValueMap.h"
-#include "llvm/PassManager.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 
 using namespace llvm;
@@ -343,9 +343,11 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
     // GetElementPtrConstantExpr
     return cast<GEPOperator>(C)->isInBounds()
                ? Builder.CreateGEP(
+                     cast<GEPOperator>(C)->getSourceElementType(),
                      NewOperands[0],
                      makeArrayRef(&NewOperands[1], NumOperands - 1))
                : Builder.CreateInBoundsGEP(
+                     cast<GEPOperator>(C)->getSourceElementType(),
                      NewOperands[0],
                      makeArrayRef(&NewOperands[1], NumOperands - 1));
   case Instruction::Select:
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index cd0422d..fa38a68 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -50,11 +50,15 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
 
 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
                                      CodeGenOpt::Level OptLevel)
-    : SelectionDAGISel(tm, OptLevel),
-      Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
+    : SelectionDAGISel(tm, OptLevel), TM(tm) {
   doMulWide = (OptLevel > 0);
 }
 
+bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+    Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
+    return SelectionDAGISel::runOnMachineFunction(MF);
+}
+
 int NVPTXDAGToDAGISel::getDivF32Level() const {
   if (UsePrecDivF32.getNumOccurrences() > 0) {
     // If nvptx-prec-div32=N is used on the command-line, always honor it
@@ -74,10 +78,7 @@ bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
     return UsePrecSqrtF32;
   } else {
     // Otherwise, use sqrt.approx if fast math is enabled
-    if (TM.Options.UnsafeFPMath)
-      return false;
-    else
-      return true;
+    return !TM.Options.UnsafeFPMath;
   }
 }
 
@@ -89,16 +90,14 @@ bool NVPTXDAGToDAGISel::useF32FTZ() const {
     const Function *F = MF->getFunction();
     // Otherwise, check for an nvptx-f32ftz attribute on the function
     if (F->hasFnAttribute("nvptx-f32ftz"))
-      return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
-                                              "nvptx-f32ftz")
-                                              .getValueAsString() == "true");
+      return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
     else
       return false;
   }
 }
 
 bool NVPTXDAGToDAGISel::allowFMA() const {
-  const NVPTXTargetLowering *TL = Subtarget.getTargetLowering();
+  const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
   return TL->allowFMA(*MF, OptLevel);
 }
 
@@ -525,8 +524,7 @@ SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
   }
 }
 
-static unsigned int getCodeAddrSpace(MemSDNode *N,
-                                     const NVPTXSubtarget &Subtarget) {
+static unsigned int getCodeAddrSpace(MemSDNode *N) {
   const Value *Src = N->getMemOperand()->getValue();
 
   if (!Src)
@@ -579,20 +577,16 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
     switch (SrcAddrSpace) {
     default: report_fatal_error("Bad address space in addrspacecast");
     case ADDRESS_SPACE_GLOBAL:
-      Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64
-                                : NVPTX::cvta_global_yes;
+      Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
       break;
     case ADDRESS_SPACE_SHARED:
-      Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64
-                                : NVPTX::cvta_shared_yes;
+      Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
       break;
     case ADDRESS_SPACE_CONST:
-      Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64
-                                : NVPTX::cvta_const_yes;
+      Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
       break;
     case ADDRESS_SPACE_LOCAL:
-      Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64
-                                : NVPTX::cvta_local_yes;
+      Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
       break;
     }
     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
@@ -604,20 +598,20 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
     switch (DstAddrSpace) {
     default: report_fatal_error("Bad address space in addrspacecast");
     case ADDRESS_SPACE_GLOBAL:
-      Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64
-                                : NVPTX::cvta_to_global_yes;
+      Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
+                         : NVPTX::cvta_to_global_yes;
       break;
     case ADDRESS_SPACE_SHARED:
-      Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64
-                                : NVPTX::cvta_to_shared_yes;
+      Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
+                         : NVPTX::cvta_to_shared_yes;
       break;
     case ADDRESS_SPACE_CONST:
-      Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64
-                                : NVPTX::cvta_to_const_yes;
+      Opc =
+          TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
       break;
     case ADDRESS_SPACE_LOCAL:
-      Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64
-                                : NVPTX::cvta_to_local_yes;
+      Opc =
+          TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
       break;
     }
     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
@@ -638,7 +632,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
     return nullptr;
 
   // Address Space Setting
-  unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
+  unsigned int codeAddrSpace = getCodeAddrSpace(LD);
 
   // Volatile Setting
   // - .volatile is only availalble for .global and .shared
@@ -709,13 +703,12 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
     default:
       return nullptr;
     }
-    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType), getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth), Addr, Chain };
+    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+                      getI32Imm(fromTypeWidth, dl), Addr, Chain };
     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
-  } else if (Subtarget.is64Bit()
-                 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
-                 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
+  } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
+                          : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
     switch (TargetVT) {
     case MVT::i8:
       Opcode = NVPTX::LD_i8_asi;
@@ -738,14 +731,13 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
     default:
       return nullptr;
     }
-    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType), getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
+    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+                      getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
-  } else if (Subtarget.is64Bit()
-                 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
-                 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
-    if (Subtarget.is64Bit()) {
+  } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
+                          : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
+    if (TM.is64Bit()) {
       switch (TargetVT) {
       case MVT::i8:
         Opcode = NVPTX::LD_i8_ari_64;
@@ -792,12 +784,12 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
         return nullptr;
       }
     }
-    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType), getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
+    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+                      getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
   } else {
-    if (Subtarget.is64Bit()) {
+    if (TM.is64Bit()) {
       switch (TargetVT) {
       case MVT::i8:
         Opcode = NVPTX::LD_i8_areg_64;
@@ -844,9 +836,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
         return nullptr;
       }
     }
-    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType), getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth), N1, Chain };
+    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+                      getI32Imm(fromTypeWidth, dl), N1, Chain };
     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
   }
 
@@ -874,7 +866,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
     return nullptr;
 
   // Address Space Setting
-  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
 
   // Volatile Setting
   // - .volatile is only availalble for .global and .shared
@@ -970,13 +962,12 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
       break;
     }
 
-    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
-                      getI32Imm(VecType), getI32Imm(FromType),
-                      getI32Imm(FromTypeWidth), Addr, Chain };
+    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+                      getI32Imm(FromTypeWidth, DL), Addr, Chain };
     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
-  } else if (Subtarget.is64Bit()
-                 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
-                 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+  } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
+                          : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
     switch (N->getOpcode()) {
     default:
       return nullptr;
@@ -1024,14 +1015,13 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
       break;
     }
 
-    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
-                      getI32Imm(VecType), getI32Imm(FromType),
-                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
+    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+                      getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
-  } else if (Subtarget.is64Bit()
-                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
-                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
-    if (Subtarget.is64Bit()) {
+  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+                          : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+    if (TM.is64Bit()) {
       switch (N->getOpcode()) {
       default:
         return nullptr;
@@ -1127,13 +1117,13 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
       }
     }
 
-    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
-                      getI32Imm(VecType), getI32Imm(FromType),
-                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
+    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+                      getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
 
     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   } else {
-    if (Subtarget.is64Bit()) {
+    if (TM.is64Bit()) {
       switch (N->getOpcode()) {
       default:
         return nullptr;
@@ -1229,9 +1219,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
       }
     }
 
-    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
-                      getI32Imm(VecType), getI32Imm(FromType),
-                      getI32Imm(FromTypeWidth), Op1, Chain };
+    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+                      getI32Imm(FromTypeWidth, DL), Op1, Chain };
     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   }
 
@@ -1425,10 +1415,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
 
     SDValue Ops[] = { Addr, Chain };
     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
-  } else if (Subtarget.is64Bit()
-                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
-                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
-    if (Subtarget.is64Bit()) {
+  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+                          : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+    if (TM.is64Bit()) {
       switch (N->getOpcode()) {
       default:
         return nullptr;
@@ -1710,7 +1699,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
 
     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   } else {
-    if (Subtarget.is64Bit()) {
+    if (TM.is64Bit()) {
       switch (N->getOpcode()) {
       default:
         return nullptr;
@@ -2013,7 +2002,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
     return nullptr;
 
   // Address Space Setting
-  unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
+  unsigned int codeAddrSpace = getCodeAddrSpace(ST);
 
   // Volatile Setting
   // - .volatile is only availalble for .global and .shared
@@ -2079,13 +2068,13 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
     default:
       return nullptr;
     }
-    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType), getI32Imm(toType),
-                      getI32Imm(toTypeWidth), Addr, Chain };
+    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
+                      getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
+                      Chain };
     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
-  } else if (Subtarget.is64Bit()
-                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
-                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+                          : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
     switch (SourceVT) {
     case MVT::i8:
       Opcode = NVPTX::ST_i8_asi;
@@ -2108,14 +2097,14 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
     default:
       return nullptr;
     }
-    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType), getI32Imm(toType),
-                      getI32Imm(toTypeWidth), Base, Offset, Chain };
+    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
+                      getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
+                      Offset, Chain };
     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
-  } else if (Subtarget.is64Bit()
-                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
-                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
-    if (Subtarget.is64Bit()) {
+  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+                          : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+    if (TM.is64Bit()) {
       switch (SourceVT) {
       case MVT::i8:
         Opcode = NVPTX::ST_i8_ari_64;
@@ -2162,12 +2151,13 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
         return nullptr;
       }
     }
-    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType), getI32Imm(toType),
-                      getI32Imm(toTypeWidth), Base, Offset, Chain };
+    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
+                      getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
+                      Offset, Chain };
     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   } else {
-    if (Subtarget.is64Bit()) {
+    if (TM.is64Bit()) {
       switch (SourceVT) {
       case MVT::i8:
         Opcode = NVPTX::ST_i8_areg_64;
@@ -2214,9 +2204,10 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
         return nullptr;
       }
     }
-    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType), getI32Imm(toType),
-                      getI32Imm(toTypeWidth), N2, Chain };
+    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
+                      getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
+                      Chain };
     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   }
 
@@ -2241,7 +2232,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
   EVT StoreVT = MemSD->getMemoryVT();
 
   // Address Space Setting
-  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
 
   if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
     report_fatal_error("Cannot store to pointer that points to constant "
@@ -2290,11 +2281,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
     return nullptr;
   }
 
-  StOps.push_back(getI32Imm(IsVolatile));
-  StOps.push_back(getI32Imm(CodeAddrSpace));
-  StOps.push_back(getI32Imm(VecType));
-  StOps.push_back(getI32Imm(ToType));
-  StOps.push_back(getI32Imm(ToTypeWidth));
+  StOps.push_back(getI32Imm(IsVolatile, DL));
+  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
+  StOps.push_back(getI32Imm(VecType, DL));
+  StOps.push_back(getI32Imm(ToType, DL));
+  StOps.push_back(getI32Imm(ToTypeWidth, DL));
 
   if (SelectDirectAddr(N2, Addr)) {
     switch (N->getOpcode()) {
@@ -2344,9 +2335,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
       break;
     }
     StOps.push_back(Addr);
-  } else if (Subtarget.is64Bit()
-                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
-                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+                          : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
     switch (N->getOpcode()) {
     default:
       return nullptr;
@@ -2395,10 +2385,9 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
     }
     StOps.push_back(Base);
     StOps.push_back(Offset);
-  } else if (Subtarget.is64Bit()
-                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
-                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
-    if (Subtarget.is64Bit()) {
+  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+                          : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+    if (TM.is64Bit()) {
       switch (N->getOpcode()) {
       default:
         return nullptr;
@@ -2496,7 +2485,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
     StOps.push_back(Base);
     StOps.push_back(Offset);
   } else {
-    if (Subtarget.is64Bit()) {
+    if (TM.is64Bit()) {
       switch (N->getOpcode()) {
       default:
         return nullptr;
@@ -2725,13 +2714,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
 
   SmallVector<SDValue, 2> Ops;
-  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
+  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
   Ops.push_back(Chain);
   Ops.push_back(Flag);
 
-  SDNode *Ret =
-      CurDAG->getMachineNode(Opc, DL, VTs, Ops);
-  return Ret;
+  return CurDAG->getMachineNode(Opc, DL, VTs, Ops);
 }
 
 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
@@ -2761,7 +2748,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
   SmallVector<SDValue, 6> Ops;
   for (unsigned i = 0; i < NumElts; ++i)
     Ops.push_back(N->getOperand(i + 2));
-  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
+  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
   Ops.push_back(Chain);
 
   // Determine target opcode
@@ -2889,8 +2876,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
   SmallVector<SDValue, 8> Ops;
   for (unsigned i = 0; i < NumElts; ++i)
     Ops.push_back(N->getOperand(i + 3));
-  Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
-  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
+  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
+  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
   Ops.push_back(Chain);
   Ops.push_back(Flag);
 
@@ -2985,7 +2972,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
   // the selected StoreParam node.
   case NVPTXISD::StoreParamU32: {
     Opcode = NVPTX::StoreParamI32;
-    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
+    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
                                                 MVT::i32);
     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
                                          MVT::i32, Ops[0], CvtNone);
@@ -2994,7 +2981,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
   }
   case NVPTXISD::StoreParamS32: {
     Opcode = NVPTX::StoreParamI32;
-    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
+    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
                                                 MVT::i32);
     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
                                          MVT::i32, Ops[0], CvtNone);
@@ -4742,6 +4729,7 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
 /// SelectBFE - Look for instruction sequences that can be made more efficient
 /// by using the 'bfe' (bit-field extract) PTX instruction
 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
+  SDLoc DL(N);
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
   SDValue Len;
@@ -4772,8 +4760,8 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
     }
 
     // How many bits are in our mask?
-    uint64_t NumBits = CountTrailingOnes_64(MaskVal);
-    Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
+    uint64_t NumBits = countTrailingOnes(MaskVal);
+    Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
 
     if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
       // We have a 'srl/and' pair, extract the effective start bit and length
@@ -4791,7 +4779,7 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
           // emitting the srl/and pair.
           return NULL;
         }
-        Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
+        Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
       } else {
         // Do not handle the case where the shift amount (can be zero if no srl
         // was found) is not constant. We could handle this case, but it would
@@ -4836,10 +4824,10 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
         NumZeros = 0;
         // The number of bits in the result bitfield will be the number of
         // trailing ones (the AND) minus the number of bits we shift off
-        NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
+        NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
       } else if (isShiftedMask_64(MaskVal)) {
         NumZeros = countTrailingZeros(MaskVal);
-        unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
+        unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
         // The number of bits in the result bitfield will be the number of
         // trailing zeros plus the number of set bits in the mask minus the
         // number of bits we shift off
@@ -4856,8 +4844,8 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
       }
 
       Val = AndLHS;
-      Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
-      Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
+      Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
+      Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
     } else if (LHS->getOpcode() == ISD::SHL) {
       // Here, we have a pattern like:
       //
@@ -4897,10 +4885,10 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
       }
 
       Start =
-        CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
+        CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
       Len =
         CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
-                                  OuterShiftAmt, MVT::i32);
+                                  OuterShiftAmt, DL, MVT::i32);
 
       if (N->getOpcode() == ISD::SRA) {
         // If we have a arithmetic right shift, we need to use the signed bfe
@@ -4941,10 +4929,7 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
     Val, Start, Len
   };
 
-  SDNode *Ret =
-    CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
-
-  return Ret;
+  return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops);
 }
 
 // SelectDirectAddr - Match a direct address for DAG.
@@ -4976,7 +4961,8 @@ bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
       SDValue base = Addr.getOperand(0);
       if (SelectDirectAddr(base, Base)) {
-        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
+        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
+                                           mvt);
         return true;
       }
     }
@@ -5001,7 +4987,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(
     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
-    Offset = CurDAG->getTargetConstant(0, mvt);
+    Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
     return true;
   }
   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
@@ -5019,7 +5005,8 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(
         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
       else
         Base = Addr.getOperand(0);
-      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
+      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
+                                         mvt);
       return true;
     }
   }
@@ -5056,15 +5043,15 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 /// inline asm expressions.
 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
-    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
+    const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
   SDValue Op0, Op1;
-  switch (ConstraintCode) {
+  switch (ConstraintID) {
   default:
     return true;
-  case 'm': // memory
+  case InlineAsm::Constraint_m: // memory
     if (SelectDirectAddr(Op, Op0)) {
       OutOps.push_back(Op0);
-      OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+      OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
       return false;
     }
     if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 69afcd7..fe20580 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -26,6 +26,7 @@ using namespace llvm;
 namespace {
 
 class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
+  const NVPTXTargetMachine &TM;
 
   // If true, generate mul.wide from sext and mul
   bool doMulWide;
@@ -43,11 +44,11 @@ public:
   const char *getPassName() const override {
     return "NVPTX DAG->DAG Pattern Instruction Selection";
   }
-
-  const NVPTXSubtarget &Subtarget;
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  const NVPTXSubtarget *Subtarget;
 
   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                    char ConstraintCode,
+                                    unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
 private:
 // Include the pieces autogenerated from the target description.
@@ -70,8 +71,8 @@ private:
   SDNode *SelectSurfaceIntrinsic(SDNode *N);
   SDNode *SelectBFE(SDNode *N);
         
-  inline SDValue getI32Imm(unsigned Imm) {
-    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  inline SDValue getI32Imm(unsigned Imm, SDLoc DL) {
+    return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
   }
 
   // Match direct address complex pattern.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 093ba1a..805847a 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -106,9 +106,9 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
 }
 
 // NVPTXTargetLowering Constructor.
-NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
-    : TargetLowering(TM), nvTM(&TM),
-      nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
+                                         const NVPTXSubtarget &STI)
+    : TargetLowering(TM), nvTM(&TM), STI(STI) {
 
   // always lower memset, memcpy, and memmove intrinsics to load/store
   // instructions, rather
@@ -167,14 +167,14 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
   setOperationAction(ISD::SRA_PARTS, MVT::i64  , Custom);
   setOperationAction(ISD::SRL_PARTS, MVT::i64  , Custom);
 
-  if (nvptxSubtarget.hasROT64()) {
+  if (STI.hasROT64()) {
     setOperationAction(ISD::ROTL, MVT::i64, Legal);
     setOperationAction(ISD::ROTR, MVT::i64, Legal);
   } else {
     setOperationAction(ISD::ROTL, MVT::i64, Expand);
     setOperationAction(ISD::ROTR, MVT::i64, Expand);
   }
-  if (nvptxSubtarget.hasROT32()) {
+  if (STI.hasROT32()) {
     setOperationAction(ISD::ROTL, MVT::i32, Legal);
     setOperationAction(ISD::ROTR, MVT::i32, Legal);
   } else {
@@ -259,6 +259,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
   setOperationAction(ISD::CTPOP, MVT::i32, Legal);
   setOperationAction(ISD::CTPOP, MVT::i64, Legal);
 
+  // PTX does not directly support SELP of i1, so promote to i32 first
+  setOperationAction(ISD::SELECT, MVT::i1, Custom);
+
   // We have some custom DAG combine patterns for these nodes
   setTargetDAGCombine(ISD::ADD);
   setTargetDAGCombine(ISD::AND);
@@ -268,17 +271,19 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
 
   // Now deduce the information based on the above mentioned
   // actions
-  computeRegisterProperties();
+  computeRegisterProperties(STI.getRegisterInfo());
 }
 
 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default:
-    return nullptr;
+  switch ((NVPTXISD::NodeType)Opcode) {
+  case NVPTXISD::FIRST_NUMBER:
+    break;
   case NVPTXISD::CALL:
     return "NVPTXISD::CALL";
   case NVPTXISD::RET_FLAG:
     return "NVPTXISD::RET_FLAG";
+  case NVPTXISD::LOAD_PARAM:
+    return "NVPTXISD::LOAD_PARAM";
   case NVPTXISD::Wrapper:
     return "NVPTXISD::Wrapper";
   case NVPTXISD::DeclareParam:
@@ -287,10 +292,14 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
     return "NVPTXISD::DeclareScalarParam";
   case NVPTXISD::DeclareRet:
     return "NVPTXISD::DeclareRet";
+  case NVPTXISD::DeclareScalarRet:
+    return "NVPTXISD::DeclareScalarRet";
   case NVPTXISD::DeclareRetParam:
     return "NVPTXISD::DeclareRetParam";
   case NVPTXISD::PrintCall:
     return "NVPTXISD::PrintCall";
+  case NVPTXISD::PrintCallUni:
+    return "NVPTXISD::PrintCallUni";
   case NVPTXISD::LoadParam:
     return "NVPTXISD::LoadParam";
   case NVPTXISD::LoadParamV2:
@@ -363,6 +372,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
     return "NVPTXISD::FUN_SHFR_CLAMP";
   case NVPTXISD::IMAD:
     return "NVPTXISD::IMAD";
+  case NVPTXISD::Dummy:
+    return "NVPTXISD::Dummy";
   case NVPTXISD::MUL_WIDE_SIGNED:
     return "NVPTXISD::MUL_WIDE_SIGNED";
   case NVPTXISD::MUL_WIDE_UNSIGNED:
@@ -852,6 +863,7 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case NVPTXISD::Suld3DV4I16Zero:       return "NVPTXISD::Suld3DV4I16Zero";
   case NVPTXISD::Suld3DV4I32Zero:       return "NVPTXISD::Suld3DV4I32Zero";
   }
+  return nullptr;
 }
 
 TargetLoweringBase::LegalizeTypeAction
@@ -876,7 +888,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
                                   unsigned retAlignment,
                                   const ImmutableCallSite *CS) const {
 
-  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+  bool isABI = (STI.getSmVersion() >= 20);
   assert(isABI && "Non-ABI compilation is not supported");
   if (!isABI)
     return "";
@@ -927,7 +939,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
     }
     first = false;
 
-    if (Outs[OIdx].Flags.isByVal() == false) {
+    if (!Outs[OIdx].Flags.isByVal()) {
       if (Ty->isAggregateType() || Ty->isVectorTy()) {
         unsigned align = 0;
         const CallInst *CallI = cast<CallInst>(CS->getInstruction());
@@ -1041,7 +1053,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   Type *retTy = CLI.RetTy;
   ImmutableCallSite *CS = CLI.CS;
 
-  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+  bool isABI = (STI.getSmVersion() >= 20);
   assert(isABI && "Non-ABI compilation is not supported");
   if (!isABI)
     return Chain;
@@ -1050,9 +1062,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   const Function *F = MF.getFunction();
 
   SDValue tempChain = Chain;
-  Chain =
-      DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
-                           dl);
+  Chain = DAG.getCALLSEQ_START(Chain,
+                               DAG.getIntPtrConstant(uniqueCallSite, dl, true),
+                               dl);
   SDValue InFlag = Chain.getValue(1);
 
   unsigned paramCount = 0;
@@ -1072,7 +1084,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     EVT VT = Outs[OIdx].VT;
     Type *Ty = Args[i].Ty;
 
-    if (Outs[OIdx].Flags.isByVal() == false) {
+    if (!Outs[OIdx].Flags.isByVal()) {
       if (Ty->isAggregateType()) {
         // aggregate
         SmallVector<EVT, 16> vtparts;
@@ -1083,9 +1095,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         // declare .param .align <align> .b8 .param<n>[<size>];
         unsigned sz = TD->getTypeAllocSize(Ty);
         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-        SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32),
-                                      DAG.getConstant(paramCount, MVT::i32),
-                                      DAG.getConstant(sz, MVT::i32), InFlag };
+        SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl,
+                                                             MVT::i32),
+                                      DAG.getConstant(paramCount, dl, MVT::i32),
+                                      DAG.getConstant(sz, dl, MVT::i32),
+                                      InFlag };
         Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
                             DeclareParamOps);
         InFlag = Chain.getValue(1);
@@ -1100,8 +1114,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           }
           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
           SDValue CopyParamOps[] = { Chain,
-                                     DAG.getConstant(paramCount, MVT::i32),
-                                     DAG.getConstant(Offsets[j], MVT::i32),
+                                     DAG.getConstant(paramCount, dl, MVT::i32),
+                                     DAG.getConstant(Offsets[j], dl, MVT::i32),
                                      StVal, InFlag };
           Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
                                           CopyParamVTs, CopyParamOps,
@@ -1121,9 +1135,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         // declare .param .align <align> .b8 .param<n>[<size>];
         unsigned sz = TD->getTypeAllocSize(Ty);
         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-        SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32),
-                                      DAG.getConstant(paramCount, MVT::i32),
-                                      DAG.getConstant(sz, MVT::i32), InFlag };
+        SDValue DeclareParamOps[] = { Chain,
+                                      DAG.getConstant(align, dl, MVT::i32),
+                                      DAG.getConstant(paramCount, dl, MVT::i32),
+                                      DAG.getConstant(sz, dl, MVT::i32),
+                                      InFlag };
         Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
                             DeclareParamOps);
         InFlag = Chain.getValue(1);
@@ -1144,8 +1160,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
           SDValue CopyParamOps[] = { Chain,
-                                     DAG.getConstant(paramCount, MVT::i32),
-                                     DAG.getConstant(0, MVT::i32), Elt,
+                                     DAG.getConstant(paramCount, dl, MVT::i32),
+                                     DAG.getConstant(0, dl, MVT::i32), Elt,
                                      InFlag };
           Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
                                           CopyParamVTs, CopyParamOps,
@@ -1161,9 +1177,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
           SDValue CopyParamOps[] = { Chain,
-                                     DAG.getConstant(paramCount, MVT::i32),
-                                     DAG.getConstant(0, MVT::i32), Elt0, Elt1,
-                                     InFlag };
+                                     DAG.getConstant(paramCount, dl, MVT::i32),
+                                     DAG.getConstant(0, dl, MVT::i32), Elt0,
+                                     Elt1, InFlag };
           Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl,
                                           CopyParamVTs, CopyParamOps,
                                           MemVT, MachinePointerInfo());
@@ -1193,8 +1209,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
             SDValue StoreVal;
             SmallVector<SDValue, 8> Ops;
             Ops.push_back(Chain);
-            Ops.push_back(DAG.getConstant(paramCount, MVT::i32));
-            Ops.push_back(DAG.getConstant(curOffset, MVT::i32));
+            Ops.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
+            Ops.push_back(DAG.getConstant(curOffset, dl, MVT::i32));
 
             unsigned Opc = NVPTXISD::StoreParamV2;
 
@@ -1261,9 +1277,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       }
       SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
       SDValue DeclareParamOps[] = { Chain,
-                                    DAG.getConstant(paramCount, MVT::i32),
-                                    DAG.getConstant(sz, MVT::i32),
-                                    DAG.getConstant(0, MVT::i32), InFlag };
+                                    DAG.getConstant(paramCount, dl, MVT::i32),
+                                    DAG.getConstant(sz, dl, MVT::i32),
+                                    DAG.getConstant(0, dl, MVT::i32), InFlag };
       Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
                           DeclareParamOps);
       InFlag = Chain.getValue(1);
@@ -1276,8 +1292,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         OutV = DAG.getNode(opc, dl, MVT::i16, OutV);
       }
       SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-      SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
-                                 DAG.getConstant(0, MVT::i32), OutV, InFlag };
+      SDValue CopyParamOps[] = { Chain,
+                                 DAG.getConstant(paramCount, dl, MVT::i32),
+                                 DAG.getConstant(0, dl, MVT::i32), OutV,
+                                 InFlag };
 
       unsigned opcode = NVPTXISD::StoreParam;
       if (Outs[OIdx].Flags.isZExt())
@@ -1306,9 +1324,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // so we don't need to worry about natural alignment or not.
     // See TargetLowering::LowerCallTo().
     SDValue DeclareParamOps[] = {
-      Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32),
-      DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
-      InFlag
+      Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), dl, MVT::i32),
+      DAG.getConstant(paramCount, dl, MVT::i32),
+      DAG.getConstant(sz, dl, MVT::i32), InFlag
     };
     Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
                         DeclareParamOps);
@@ -1319,7 +1337,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
       SDValue srcAddr =
           DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
-                      DAG.getConstant(curOffset, getPointerTy()));
+                      DAG.getConstant(curOffset, dl, getPointerTy()));
       SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
                                    MachinePointerInfo(), false, false, false,
                                    PartAlign);
@@ -1327,9 +1345,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
       }
       SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-      SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
-                                 DAG.getConstant(curOffset, MVT::i32), theVal,
-                                 InFlag };
+      SDValue CopyParamOps[] = { Chain,
+                                 DAG.getConstant(paramCount, dl, MVT::i32),
+                                 DAG.getConstant(curOffset, dl, MVT::i32),
+                                 theVal, InFlag };
       Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
                                       CopyParamOps, elemtype,
                                       MachinePointerInfo());
@@ -1361,9 +1380,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       if (resultsz < 32)
         resultsz = 32;
       SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-      SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
-                                  DAG.getConstant(resultsz, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32), InFlag };
+      SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
+                                  DAG.getConstant(resultsz, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32), InFlag };
       Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
                           DeclareRetOps);
       InFlag = Chain.getValue(1);
@@ -1371,9 +1390,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       retAlignment = getArgumentAlignment(Callee, CS, retTy, 0);
       SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
       SDValue DeclareRetOps[] = { Chain,
-                                  DAG.getConstant(retAlignment, MVT::i32),
-                                  DAG.getConstant(resultsz / 8, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32), InFlag };
+                                  DAG.getConstant(retAlignment, dl, MVT::i32),
+                                  DAG.getConstant(resultsz / 8, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32), InFlag };
       Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
                           DeclareRetOps);
       InFlag = Chain.getValue(1);
@@ -1401,7 +1420,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // Op to just print "call"
   SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   SDValue PrintCallOps[] = {
-    Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag
+    Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
   };
   Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
                       dl, PrintCallVTs, PrintCallOps);
@@ -1427,20 +1446,22 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     else
       opcode = NVPTXISD::CallArg;
     SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-    SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
-                             DAG.getConstant(i, MVT::i32), InFlag };
+    SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
+                             DAG.getConstant(i, dl, MVT::i32), InFlag };
     Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
     InFlag = Chain.getValue(1);
   }
   SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
+  SDValue CallArgEndOps[] = { Chain,
+                              DAG.getConstant(Func ? 1 : 0, dl, MVT::i32),
                               InFlag };
   Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
   InFlag = Chain.getValue(1);
 
   if (!Func) {
     SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-    SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
+    SDValue PrototypeOps[] = { Chain,
+                               DAG.getConstant(uniqueCallSite, dl, MVT::i32),
                                InFlag };
     Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
     InFlag = Chain.getValue(1);
@@ -1452,11 +1473,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       EVT ObjectVT = getValueType(retTy);
       unsigned NumElts = ObjectVT.getVectorNumElements();
       EVT EltVT = ObjectVT.getVectorElementType();
-      assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters(
-                 F->getContext(), ObjectVT) == NumElts &&
+      assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),
+                                                      ObjectVT) == NumElts &&
              "Vector was not scalarized");
       unsigned sz = EltVT.getSizeInBits();
-      bool needTruncate = sz < 8 ? true : false;
+      bool needTruncate = sz < 8;
 
       if (NumElts == 1) {
         // Just a simple load
@@ -1471,11 +1492,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           LoadRetVTs.push_back(EltVT);
         LoadRetVTs.push_back(MVT::Other);
         LoadRetVTs.push_back(MVT::Glue);
-        SmallVector<SDValue, 4> LoadRetOps;
-        LoadRetOps.push_back(Chain);
-        LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
-        LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
-        LoadRetOps.push_back(InFlag);
+        SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
+                                DAG.getConstant(0, dl, MVT::i32), InFlag};
         SDValue retval = DAG.getMemIntrinsicNode(
             NVPTXISD::LoadParam, dl,
             DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
@@ -1501,11 +1519,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         }
         LoadRetVTs.push_back(MVT::Other);
         LoadRetVTs.push_back(MVT::Glue);
-        SmallVector<SDValue, 4> LoadRetOps;
-        LoadRetOps.push_back(Chain);
-        LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
-        LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
-        LoadRetOps.push_back(InFlag);
+        SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
+                                DAG.getConstant(0, dl, MVT::i32), InFlag};
         SDValue retval = DAG.getMemIntrinsicNode(
             NVPTXISD::LoadParamV2, dl,
             DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
@@ -1547,11 +1562,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           }
           LoadRetVTs.push_back(MVT::Other);
           LoadRetVTs.push_back(MVT::Glue);
-          SmallVector<SDValue, 4> LoadRetOps;
-          LoadRetOps.push_back(Chain);
-          LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
-          LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32));
-          LoadRetOps.push_back(InFlag);
+          SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
+                                  DAG.getConstant(Ofst, dl, MVT::i32), InFlag};
           SDValue retval = DAG.getMemIntrinsicNode(
               Opc, dl, DAG.getVTList(LoadRetVTs),
               LoadRetOps, EltVT, MachinePointerInfo());
@@ -1583,7 +1595,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
         unsigned sz = VTs[i].getSizeInBits();
         unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
-        bool needTruncate = sz < 8 ? true : false;
+        bool needTruncate = sz < 8;
         if (VTs[i].isInteger() && (sz < 8))
           sz = 8;
 
@@ -1605,11 +1617,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         LoadRetVTs.push_back(MVT::Other);
         LoadRetVTs.push_back(MVT::Glue);
 
-        SmallVector<SDValue, 4> LoadRetOps;
-        LoadRetOps.push_back(Chain);
-        LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
-        LoadRetOps.push_back(DAG.getConstant(Offsets[i], MVT::i32));
-        LoadRetOps.push_back(InFlag);
+        SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
+                                DAG.getConstant(Offsets[i], dl, MVT::i32),
+                                InFlag};
         SDValue retval = DAG.getMemIntrinsicNode(
             NVPTXISD::LoadParam, dl,
             DAG.getVTList(LoadRetVTs), LoadRetOps,
@@ -1624,8 +1634,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     }
   }
 
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
-                             DAG.getIntPtrConstant(uniqueCallSite + 1, true),
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getIntPtrConstant(uniqueCallSite, dl, true),
+                             DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
+                                                   true),
                              InFlag, dl);
   uniqueCallSite++;
 
@@ -1651,7 +1663,7 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
     unsigned NumSubElem = VVT.getVectorNumElements();
     for (unsigned j = 0; j < NumSubElem; ++j) {
       Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
-                                DAG.getIntPtrConstant(j)));
+                                DAG.getIntPtrConstant(j, dl)));
     }
   }
   return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
@@ -1675,7 +1687,7 @@ SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
   SDValue ShAmt  = Op.getOperand(2);
   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
 
-  if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
+  if (VTBits == 32 && STI.getSmVersion() >= 35) {
 
     // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
     // {dHi, dLo} = {aHi, aLo} >> Amt
@@ -1700,16 +1712,18 @@ SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
     //      dHi = aHi >> Amt
 
     SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
-                                   DAG.getConstant(VTBits, MVT::i32), ShAmt);
+                                   DAG.getConstant(VTBits, dl, MVT::i32),
+                                   ShAmt);
     SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
     SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
-                                     DAG.getConstant(VTBits, MVT::i32));
+                                     DAG.getConstant(VTBits, dl, MVT::i32));
     SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
     SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
     SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
 
     SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
-                               DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
+                               DAG.getConstant(VTBits, dl, MVT::i32),
+                               ISD::SETGE);
     SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
     SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
 
@@ -1735,7 +1749,7 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
   SDValue ShOpHi = Op.getOperand(1);
   SDValue ShAmt  = Op.getOperand(2);
 
-  if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
+  if (VTBits == 32 && STI.getSmVersion() >= 35) {
 
     // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
     // {dHi, dLo} = {aHi, aLo} << Amt
@@ -1760,16 +1774,18 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
     //      dHi = (aHi << Amt) | (aLo >> (size-Amt))
 
     SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
-                                   DAG.getConstant(VTBits, MVT::i32), ShAmt);
+                                   DAG.getConstant(VTBits, dl, MVT::i32),
+                                   ShAmt);
     SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
     SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
-                                     DAG.getConstant(VTBits, MVT::i32));
+                                     DAG.getConstant(VTBits, dl, MVT::i32));
     SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
     SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
     SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
 
     SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
-                               DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
+                               DAG.getConstant(VTBits, dl, MVT::i32),
+                               ISD::SETGE);
     SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
     SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
 
@@ -1803,11 +1819,29 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SRA_PARTS:
   case ISD::SRL_PARTS:
     return LowerShiftRightParts(Op, DAG);
+  case ISD::SELECT:
+    return LowerSelect(Op, DAG);
   default:
     llvm_unreachable("Custom lowering not defined for operation");
   }
 }
 
+SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Op0 = Op->getOperand(0);
+  SDValue Op1 = Op->getOperand(1);
+  SDValue Op2 = Op->getOperand(2);
+  SDLoc DL(Op.getNode());
+
+  assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
+
+  Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
+  Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
+  SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
+
+  return Trunc;
+}
+
 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   if (Op.getValueType() == MVT::i1)
     return LowerLOADi1(Op, DAG);
@@ -1924,16 +1958,14 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
     // Then the split values
     for (unsigned i = 0; i < NumElts; ++i) {
       SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
-                                   DAG.getIntPtrConstant(i));
+                                   DAG.getIntPtrConstant(i, DL));
       if (NeedExt)
         ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
       Ops.push_back(ExtVal);
     }
 
     // Then any remaining arguments
-    for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
-      Ops.push_back(N->getOperand(i));
-    }
+    Ops.append(N->op_begin() + 2, N->op_end());
 
     SDValue NewSt = DAG.getMemIntrinsicNode(
         Opcode, DL, DAG.getVTList(MVT::Other), Ops,
@@ -2029,13 +2061,13 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
 
   const Function *F = MF.getFunction();
   const AttributeSet &PAL = F->getAttributes();
-  const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering();
+  const TargetLowering *TLI = STI.getTargetLowering();
 
   SDValue Root = DAG.getRoot();
   std::vector<SDValue> OutChains;
 
   bool isKernel = llvm::isKernelFunction(*F);
-  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+  bool isABI = (STI.getSmVersion() >= 20);
   assert(isABI && "Non-ABI compilation is not supported");
   if (!isABI)
     return Chain;
@@ -2070,7 +2102,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
             (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
                                      : nullptr))) {
       assert(isKernel && "Only kernels can have image/sampler params");
-      InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
+      InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
       continue;
     }
 
@@ -2109,7 +2141,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
     // to newly created nodes. The SDNodes for params have to
     // appear in the same order as their order of appearance
     // in the original function. "idx+1" holds that order.
-    if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
+    if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) {
       if (Ty->isAggregateType()) {
         SmallVector<EVT, 16> vtparts;
         SmallVector<uint64_t, 16> offsets;
@@ -2132,7 +2164,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
                                llvm::ADDRESS_SPACE_PARAM));
           SDValue srcAddr =
               DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
-                          DAG.getConstant(offsets[parti], getPointerTy()));
+                          DAG.getConstant(offsets[parti], dl, getPointerTy()));
           unsigned partAlign =
               aggregateIsPacked ? 1
                                 : TD->getABITypeAlignment(
@@ -2197,9 +2229,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
             P.getNode()->setIROrder(idx + 1);
 
           SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
-                                     DAG.getIntPtrConstant(0));
+                                     DAG.getIntPtrConstant(0, dl));
           SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
-                                     DAG.getIntPtrConstant(1));
+                                     DAG.getIntPtrConstant(1, dl));
 
           if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) {
             Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0);
@@ -2232,7 +2264,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
                                  llvm::ADDRESS_SPACE_PARAM));
             SDValue SrcAddr =
                 DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
-                            DAG.getConstant(Ofst, getPointerTy()));
+                            DAG.getConstant(Ofst, dl, getPointerTy()));
             SDValue P = DAG.getLoad(
                 VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
                 false, true,
@@ -2244,7 +2276,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
               if (i + j >= NumElts)
                 break;
               SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
-                                        DAG.getIntPtrConstant(j));
+                                        DAG.getIntPtrConstant(j, dl));
               if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
                 Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
               InVals.push_back(Elt);
@@ -2302,7 +2334,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
     else {
       SDValue p2 = DAG.getNode(
           ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
-          DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p);
+          DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p);
       InVals.push_back(p2);
     }
   }
@@ -2333,7 +2365,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   Type *RetTy = F->getReturnType();
   const DataLayout *TD = getDataLayout();
 
-  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+  bool isABI = (STI.getSmVersion() >= 20);
   assert(isABI && "Non-ABI compilation is not supported");
   if (!isABI)
     return Chain;
@@ -2356,7 +2388,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
       // We only have one element, so just directly store it
       if (NeedExtend)
         StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
-      SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal };
+      SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal };
       Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
                                       DAG.getVTList(MVT::Other), Ops,
                                       EltVT, MachinePointerInfo());
@@ -2371,7 +2403,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
         StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1);
       }
 
-      SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0,
+      SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal0,
                         StoreVal1 };
       Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl,
                                       DAG.getVTList(MVT::Other), Ops,
@@ -2403,7 +2435,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
         SDValue StoreVal;
         SmallVector<SDValue, 8> Ops;
         Ops.push_back(Chain);
-        Ops.push_back(DAG.getConstant(Offset, MVT::i32));
+        Ops.push_back(DAG.getConstant(Offset, dl, MVT::i32));
         unsigned Opc = NVPTXISD::StoreRetvalV2;
         EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType();
 
@@ -2468,7 +2500,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
         if (TheValType.isVector())
           TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                                TheValType.getVectorElementType(), TmpVal,
-                               DAG.getIntPtrConstant(j));
+                               DAG.getIntPtrConstant(j, dl));
         EVT TheStoreType = ValVTs[i];
         if (RetTy->isIntegerTy() &&
             TD->getTypeAllocSizeInBits(RetTy) < 32) {
@@ -2482,7 +2514,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
 
         SDValue Ops[] = {
           Chain,
-          DAG.getConstant(Offsets[i], MVT::i32),
+          DAG.getConstant(Offsets[i], dl, MVT::i32),
           TmpVal };
         Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
                                         DAG.getVTList(MVT::Other), Ops,
@@ -3753,7 +3785,8 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
 }
 
 std::pair<unsigned, const TargetRegisterClass *>
-NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                                  const std::string &Constraint,
                                                   MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
@@ -3774,7 +3807,7 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
       return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
     }
   }
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
 
 /// getFunctionAlignment - Return the Log2 alignment of this function.
@@ -3885,7 +3918,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
         const SDNode *left = N0.getOperand(0).getNode();
         const SDNode *right = N0.getOperand(1).getNode();
 
-        if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right))
+        if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
           opIsLive = true;
 
         if (!opIsLive)
@@ -4103,6 +4136,7 @@ static SDValue TryMULWIDECombine(SDNode *N,
     return SDValue();
   }
 
+  SDLoc DL(N);
   unsigned OptSize = MulType.getSizeInBits() >> 1;
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
@@ -4125,7 +4159,7 @@ static SDValue TryMULWIDECombine(SDNode *N,
     unsigned BitWidth = MulType.getSizeInBits();
     if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
       APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
-      RHS = DCI.DAG.getConstant(MulVal, MulType);
+      RHS = DCI.DAG.getConstant(MulVal, DL, MulType);
     } else {
       return SDValue();
     }
@@ -4147,9 +4181,9 @@ static SDValue TryMULWIDECombine(SDNode *N,
   // Truncate the operands to the correct size. Note that these are just for
   // type consistency and will (likely) be eliminated in later phases.
   SDValue TruncLHS =
-    DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, LHS);
+    DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS);
   SDValue TruncRHS =
-    DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, RHS);
+    DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS);
 
   unsigned Opc;
   if (Signed) {
@@ -4158,7 +4192,7 @@ static SDValue TryMULWIDECombine(SDNode *N,
     Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
   }
 
-  return DCI.DAG.getNode(Opc, SDLoc(N), MulType, TruncLHS, TruncRHS);
+  return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
 }
 
 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
@@ -4196,7 +4230,7 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
     default: break;
     case ISD::ADD:
     case ISD::FADD:
-      return PerformADDCombine(N, DCI, nvptxSubtarget, OptLevel);
+      return PerformADDCombine(N, DCI, STI, OptLevel);
     case ISD::MUL:
       return PerformMULCombine(N, DCI, OptLevel);
     case ISD::SHL:
@@ -4281,15 +4315,12 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
   }
   }
 
-  SmallVector<SDValue, 8> OtherOps;
-
   // Copy regular operands
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    OtherOps.push_back(N->getOperand(i));
+  SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
 
   // The select routine does not have access to the LoadSDNode instance, so
   // pass along the extension information
-  OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
+  OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
 
   SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
                                           LD->getMemoryVT(),
@@ -4398,8 +4429,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
       OtherOps.push_back(Chain); // Chain
                                  // Skip operand 1 (intrinsic ID)
       // Others
-      for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
-        OtherOps.push_back(N->getOperand(i));
+      OtherOps.append(N->op_begin() + 2, N->op_end());
 
       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
 
@@ -4430,9 +4460,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
              "Custom handling of non-i8 ldu/ldg?");
 
       // Just copy all operands as-is
-      SmallVector<SDValue, 4> Ops;
-      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-        Ops.push_back(N->getOperand(i));
+      SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
 
       // Force output to i16
       SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
@@ -4490,10 +4518,9 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
   delete DwarfLocSection;
   delete DwarfARangesSection;
   delete DwarfRangesSection;
-  delete DwarfMacroInfoSection;
 }
 
-const MCSection *
+MCSection *
 NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
                                               SectionKind Kind, Mangler &Mang,
                                               const TargetMachine &TM) const {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index b3fea3f..5142ae3 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -21,7 +21,7 @@
 
 namespace llvm {
 namespace NVPTXISD {
-enum NodeType {
+enum NodeType : unsigned {
   // Start the numbering from where ISD NodeType finishes.
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
   Wrapper,
@@ -436,7 +436,8 @@ class NVPTXSubtarget;
 //===--------------------------------------------------------------------===//
 class NVPTXTargetLowering : public TargetLowering {
 public:
-  explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM);
+  explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
+                               const NVPTXSubtarget &STI);
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -469,7 +470,8 @@ public:
   ConstraintType
   getConstraintType(const std::string &Constraint) const override;
   std::pair<unsigned, const TargetRegisterClass *>
-  getRegForInlineAsmConstraint(const std::string &Constraint,
+  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                               const std::string &Constraint,
                                MVT VT) const override;
 
   SDValue LowerFormalArguments(
@@ -495,6 +497,12 @@ public:
                                     std::vector<SDValue> &Ops,
                                     SelectionDAG &DAG) const override;
 
+  unsigned getInlineAsmMemConstraint(
+      const std::string &ConstraintCode) const override {
+    // FIXME: Map different constraints differently.
+    return InlineAsm::Constraint_m;
+  }
+
   const NVPTXTargetMachine *nvTM;
 
   // PTX always uses 32-bit shift amounts
@@ -510,7 +518,7 @@ public:
   bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
 
 private:
-  const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
+  const NVPTXSubtarget &STI; // cache the subtarget here
 
   SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
                      EVT = MVT::i32) const;
@@ -529,6 +537,8 @@ private:
   SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
 
+  SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
+
   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
                           SelectionDAG &DAG) const override;
   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 740ca03..dabc3be 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -28,9 +28,7 @@ using namespace llvm;
 // Pin the vtable to this file.
 void NVPTXInstrInfo::anchor() {}
 
-// FIXME: Add the subtarget support on this constructor.
-NVPTXInstrInfo::NVPTXInstrInfo(NVPTXSubtarget &STI)
-    : NVPTXGenInstrInfo(), RegInfo(STI) {}
+NVPTXInstrInfo::NVPTXInstrInfo() : NVPTXGenInstrInfo(), RegInfo() {}
 
 void NVPTXInstrInfo::copyPhysReg(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
index 6de7536..9b5d491 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -27,7 +27,7 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo {
   const NVPTXRegisterInfo RegInfo;
   virtual void anchor();
 public:
-  explicit NVPTXInstrInfo(NVPTXSubtarget &STI);
+  explicit NVPTXInstrInfo();
 
   const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 2c571c4..6fdd60f 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -117,24 +117,24 @@ def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
 //===----------------------------------------------------------------------===//
 
 
-def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
-def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
-def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
+def hasAtomRedG32 : Predicate<"Subtarget->hasAtomRedG32()">;
+def hasAtomRedS32 : Predicate<"Subtarget->hasAtomRedS32()">;
+def hasAtomRedGen32 : Predicate<"Subtarget->hasAtomRedGen32()">;
 def useAtomRedG32forGen32 :
-  Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
-def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
-def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
-def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
-def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
+  Predicate<"!Subtarget->hasAtomRedGen32() && Subtarget->hasAtomRedG32()">;
+def hasBrkPt : Predicate<"Subtarget->hasBrkPt()">;
+def hasAtomRedG64 : Predicate<"Subtarget->hasAtomRedG64()">;
+def hasAtomRedS64 : Predicate<"Subtarget->hasAtomRedS64()">;
+def hasAtomRedGen64 : Predicate<"Subtarget->hasAtomRedGen64()">;
 def useAtomRedG64forGen64 :
-  Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
-def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
-def hasVote : Predicate<"Subtarget.hasVote()">;
-def hasDouble : Predicate<"Subtarget.hasDouble()">;
-def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
-def hasLDG : Predicate<"Subtarget.hasLDG()">;
-def hasLDU : Predicate<"Subtarget.hasLDU()">;
-def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
+  Predicate<"!Subtarget->hasAtomRedGen64() && Subtarget->hasAtomRedG64()">;
+def hasAtomAddF32 : Predicate<"Subtarget->hasAtomAddF32()">;
+def hasVote : Predicate<"Subtarget->hasVote()">;
+def hasDouble : Predicate<"Subtarget->hasDouble()">;
+def reqPTX20 : Predicate<"Subtarget->reqPTX20()">;
+def hasLDG : Predicate<"Subtarget->hasLDG()">;
+def hasLDU : Predicate<"Subtarget->hasLDU()">;
+def hasGenericLdSt : Predicate<"Subtarget->hasGenericLdSt()">;
 
 def doF32FTZ : Predicate<"useF32FTZ()">;
 def doNoF32FTZ : Predicate<"!useF32FTZ()">;
@@ -150,12 +150,12 @@ def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
 def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
 def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
 
-def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
-def noHWROT32 : Predicate<"!Subtarget.hasHWROT32()">;
+def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">;
+def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
 
 def true : Predicate<"1">;
 
-def hasPTX31 : Predicate<"Subtarget.getPTXVersion() >= 31">;
+def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
 
 
 //===----------------------------------------------------------------------===//
@@ -452,13 +452,13 @@ def Int4Const : PatLeaf<(imm), [{
 def SHL2MUL32 : SDNodeXForm<imm, [{
   const APInt &v = N->getAPIntValue();
   APInt temp(32, 1);
-  return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
+  return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i32);
 }]>;
 
 def SHL2MUL16 : SDNodeXForm<imm, [{
   const APInt &v = N->getAPIntValue();
   APInt temp(16, 1);
-  return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
+  return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i16);
 }]>;
 
 def MULWIDES64
@@ -1138,7 +1138,7 @@ def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
     []>;
 
 def SUB_FRM_32 : SDNodeXForm<imm, [{
-    return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
+    return CurDAG->getTargetConstant(32-N->getZExtValue(), SDLoc(N), MVT::i32);
 }]>;
 
 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
@@ -1189,7 +1189,7 @@ def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
     []>;
 
 def SUB_FRM_64 : SDNodeXForm<imm, [{
-    return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
+    return CurDAG->getTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32);
 }]>;
 
 def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
@@ -1356,11 +1356,6 @@ defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
 defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
 defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
 
-// Special select for predicate operands
-def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
-              (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
-              (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
-
 //
 // Funnnel shift in clamp mode
 //
@@ -1659,12 +1654,12 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
             (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
 }
 
-defm FSetGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
-defm FSetLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
-defm FSetGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
-defm FSetLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
-defm FSetEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
-defm FSetNE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
+defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
+defm FSetOLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
+defm FSetOGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
+defm FSetOLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
+defm FSetOEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
+defm FSetONE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
 
 defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>;
 defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>;
@@ -1673,6 +1668,13 @@ defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>;
 defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>;
 defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>;
 
+defm FSetGT : FSET_FORMAT<setgt, CmpGT, CmpGT_FTZ>;
+defm FSetLT : FSET_FORMAT<setlt, CmpLT, CmpLT_FTZ>;
+defm FSetGE : FSET_FORMAT<setge, CmpGE, CmpGE_FTZ>;
+defm FSetLE : FSET_FORMAT<setle, CmpLE, CmpLE_FTZ>;
+defm FSetEQ : FSET_FORMAT<seteq, CmpEQ, CmpEQ_FTZ>;
+defm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>;
+
 defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
 defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index f0c3663..6ab0fad 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -12,6 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXLowerAggrCopies.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
@@ -22,10 +24,33 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "nvptx"
 
 using namespace llvm;
 
-namespace llvm { FunctionPass *createLowerAggrCopies(); }
+namespace {
+// actual analysis class, which is a functionpass
+struct NVPTXLowerAggrCopies : public FunctionPass {
+  static char ID;
+
+  NVPTXLowerAggrCopies() : FunctionPass(ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved<MachineFunctionAnalysis>();
+    AU.addPreserved<StackProtector>();
+  }
+
+  bool runOnFunction(Function &F) override;
+
+  static const unsigned MaxAggrCopySize = 128;
+
+  const char *getPassName() const override {
+    return "Lower aggregate copies/intrinsics into loops";
+  }
+};
+} // namespace
 
 char NVPTXLowerAggrCopies::ID = 0;
 
@@ -45,8 +70,8 @@ static void convertTransferToLoop(
 
   // srcAddr and dstAddr are expected to be pointer types,
   // so no check is made here.
-  unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
-  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace();
+  unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
 
   // Cast pointers to (char *)
   srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
@@ -59,9 +84,11 @@ static void convertTransferToLoop(
   ind->addIncoming(ConstantInt::get(indType, 0), origBB);
 
   // load from srcAddr+ind
-  Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile);
+  Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
+                               srcVolatile);
   // store at dstAddr+ind
-  loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile);
+  loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind),
+                   dstVolatile);
 
   // The value for ind coming from backedge is (ind + 1)
   Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
@@ -81,7 +108,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
   origBB->getTerminator()->setSuccessor(0, loopBB);
   IRBuilder<> builder(origBB, origBB->getTerminator());
 
-  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
 
   // Cast pointer to the type of value getting stored
   dstAddr =
@@ -91,7 +118,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
   PHINode *ind = loop.CreatePHI(len->getType(), 0);
   ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
 
-  loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false);
+  loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false);
 
   Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
   ind->addIncoming(newind, loopBB);
@@ -104,7 +131,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
   SmallVector<MemTransferInst *, 4> aggrMemcpys;
   SmallVector<MemSetInst *, 4> aggrMemsets;
 
-  const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout();
+  const DataLayout &DL = F.getParent()->getDataLayout();
   LLVMContext &Context = F.getParent()->getContext();
 
   //
@@ -117,10 +144,10 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
          ++II) {
       if (LoadInst *load = dyn_cast<LoadInst>(II)) {
 
-        if (load->hasOneUse() == false)
+        if (!load->hasOneUse())
           continue;
 
-        if (DL->getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+        if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize)
           continue;
 
         User *use = load->user_back();
@@ -166,7 +193,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
     StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
     Value *srcAddr = load->getOperand(0);
     Value *dstAddr = store->getOperand(1);
-    unsigned numLoads = DL->getTypeStoreSize(load->getType());
+    unsigned numLoads = DL.getTypeStoreSize(load->getType());
     Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
 
     convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index da301d5..3c39f53 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -15,35 +15,10 @@
 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
 #define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
 
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Pass.h"
-
 namespace llvm {
+class FunctionPass;
 
-// actual analysis class, which is a functionpass
-struct NVPTXLowerAggrCopies : public FunctionPass {
-  static char ID;
-
-  NVPTXLowerAggrCopies() : FunctionPass(ID) {}
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<DataLayoutPass>();
-    AU.addPreserved<MachineFunctionAnalysis>();
-    AU.addPreserved<StackProtector>();
-  }
-
-  bool runOnFunction(Function &F) override;
-
-  static const unsigned MaxAggrCopySize = 128;
-
-  const char *getPassName() const override {
-    return "Lower aggregate copies/intrinsics into loops";
-  }
-};
-
-extern FunctionPass *createLowerAggrCopies();
+FunctionPass *createLowerAggrCopies();
 }
 
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
index 3149399..68dfbb7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
@@ -35,7 +35,8 @@ namespace llvm {
 void initializeNVPTXLowerStructArgsPass(PassRegistry &);
 }
 
-class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass {
+namespace {
+class NVPTXLowerStructArgs : public FunctionPass {
   bool runOnFunction(Function &F) override;
 
   void handleStructPtrArgs(Function &);
@@ -48,6 +49,7 @@ public:
     return "Copy structure (byval *) arguments to stack";
   }
 };
+} // namespace
 
 char NVPTXLowerStructArgs::ID = 1;
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
index 137248b..779b65e 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -45,3 +45,13 @@ void NVPTXFloatMCExpr::PrintImpl(raw_ostream &OS) const {
     OS << std::string(NumHex - HexStr.length(), '0');
   OS << utohexstr(API.getZExtValue());
 }
+
+const NVPTXGenericMCSymbolRefExpr*
+NVPTXGenericMCSymbolRefExpr::Create(const MCSymbolRefExpr *SymExpr,
+                                    MCContext &Ctx) {
+  return new (Ctx) NVPTXGenericMCSymbolRefExpr(SymExpr);
+}
+
+void NVPTXGenericMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
+  OS << "generic(" << *SymExpr << ")";
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
index d39a394..8c6b219 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -29,8 +29,8 @@ private:
   const VariantKind Kind;
   const APFloat Flt;
 
-  explicit NVPTXFloatMCExpr(VariantKind _Kind, APFloat _Flt)
-    : Kind(_Kind), Flt(_Flt) {}
+  explicit NVPTXFloatMCExpr(VariantKind Kind, APFloat Flt)
+      : Kind(Kind), Flt(Flt) {}
 
 public:
   /// @name Construction
@@ -68,9 +68,7 @@ public:
     return false;
   }
   void visitUsedExpr(MCStreamer &Streamer) const override {};
-  const MCSection *FindAssociatedSection() const override {
-    return nullptr;
-  }
+  MCSection *FindAssociatedSection() const override { return nullptr; }
 
   // There are no TLS NVPTXMCExprs at the moment.
   void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
@@ -79,6 +77,48 @@ public:
     return E->getKind() == MCExpr::Target;
   }
 };
+
+/// A wrapper for MCSymbolRefExpr that tells the assembly printer that the
+/// symbol should be enclosed by generic().
+class NVPTXGenericMCSymbolRefExpr : public MCTargetExpr {
+private:
+  const MCSymbolRefExpr *SymExpr;
+
+  explicit NVPTXGenericMCSymbolRefExpr(const MCSymbolRefExpr *_SymExpr)
+      : SymExpr(_SymExpr) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const NVPTXGenericMCSymbolRefExpr
+  *Create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx);
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  /// getOpcode - Get the kind of this expression.
+  const MCSymbolRefExpr *getSymbolExpr() const { return SymExpr; }
+
+  /// @}
+
+  void PrintImpl(raw_ostream &OS) const override;
+  bool EvaluateAsRelocatableImpl(MCValue &Res,
+                                 const MCAsmLayout *Layout,
+                                 const MCFixup *Fixup) const override {
+    return false;
+  }
+  void visitUsedExpr(MCStreamer &Streamer) const override {};
+  MCSection *FindAssociatedSection() const override { return nullptr; }
+
+  // There are no TLS NVPTXMCExprs at the moment.
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+  };
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index a1e1b9e..5fd69a6 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -48,9 +48,9 @@ MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
 char NVPTXPrologEpilogPass::ID = 0;
 
 bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
-  const TargetMachine &TM = MF.getTarget();
-  const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
-  const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo();
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  const TargetFrameLowering &TFI = *STI.getFrameLowering();
+  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
   bool Modified = false;
 
   calculateFrameObjectOffsets(MF);
@@ -68,7 +68,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
   }
 
   // Add function prolog/epilog
-  TFI.emitPrologue(MF);
+  TFI.emitPrologue(MF, MF.front());
 
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     // If last instruction is a return instruction, add an epilogue
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 358ccce..6e97f9e 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -71,15 +71,14 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
 }
 }
 
-NVPTXRegisterInfo::NVPTXRegisterInfo(const NVPTXSubtarget &st)
-    : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {}
+NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {}
 
 #define GET_REGINFO_TARGET_DESC
 #include "NVPTXGenRegisterInfo.inc"
 
 /// NVPTX Callee Saved Registers
 const MCPhysReg *
-NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const {
   static const MCPhysReg CalleeSavedRegs[] = { 0 };
   return CalleeSavedRegs;
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
index d2e6733..c310a9c 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -22,27 +22,20 @@
 #include "NVPTXGenRegisterInfo.inc"
 
 namespace llvm {
-
-// Forward Declarations.
-class TargetInstrInfo;
-class NVPTXSubtarget;
-
 class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
 private:
-  bool Is64Bit;
   // Hold Strings that can be free'd all together with NVPTXRegisterInfo
   ManagedStringPool ManagedStrPool;
 
 public:
-  NVPTXRegisterInfo(const NVPTXSubtarget &st);
+  NVPTXRegisterInfo();
 
   //------------------------------------------------------
   // Pure virtual functions from TargetRegisterInfo
   //------------------------------------------------------
 
   // NVPTX callee saved registers
-  const MCPhysReg *
-  getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index b7f53c7..e83f735 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -16,6 +16,7 @@
 #include "NVPTX.h"
 #include "NVPTXMachineFunctionInfo.h"
 #include "NVPTXSubtarget.h"
+#include "NVPTXTargetMachine.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -142,8 +143,9 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
   case NVPTX::LD_i64_avar: {
     // The handle is a parameter value being loaded, replace with the
     // parameter symbol
-    const NVPTXSubtarget &ST = MF.getTarget().getSubtarget<NVPTXSubtarget>();
-    if (ST.getDrvInterface() == NVPTX::CUDA) {
+    const NVPTXTargetMachine &TM =
+        static_cast<const NVPTXTargetMachine &>(MF.getTarget());
+    if (TM.getDrvInterface() == NVPTX::CUDA) {
       // For CUDA, we preserve the param loads coming from function arguments
       return false;
     }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
index f1d3cb4..0d2627d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
@@ -26,7 +26,7 @@ namespace llvm {
 class NVPTXSection : public MCSection {
   virtual void anchor();
 public:
-  NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {}
+  NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {}
   virtual ~NVPTXSection() {}
 
   /// Override this as NVPTX has its own way of printing switching
@@ -36,11 +36,8 @@ public:
                             const MCExpr *Subsection) const override {}
 
   /// Base address of PTX sections is zero.
-  bool isBaseAddressKnownZero() const override { return true; }
   bool UseCodeAlign() const override { return false; }
   bool isVirtualSection() const override { return false; }
-  std::string getLabelBeginName() const override { return ""; }
-  std::string getLabelEndName() const override { return ""; }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 3d52532..069d6e1 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXSubtarget.h"
+#include "NVPTXTargetMachine.h"
 
 using namespace llvm;
 
@@ -25,17 +26,6 @@ using namespace llvm;
 // Pin the vtable to this file.
 void NVPTXSubtarget::anchor() {}
 
-static std::string computeDataLayout(bool is64Bit) {
-  std::string Ret = "e";
-
-  if (!is64Bit)
-    Ret += "-p:32:32";
-
-  Ret += "-i64:64-v16:16-v32:32-n16:32:64";
-
-  return Ret;
-}
-
 NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                                 StringRef FS) {
     // Provide the default CPU if we don't have one.
@@ -54,18 +44,18 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
 }
 
 NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
-                               const std::string &FS, const TargetMachine &TM,
-                               bool is64Bit)
-    : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
-      SmVersion(20), DL(computeDataLayout(is64Bit)),
-      InstrInfo(initializeSubtargetDependencies(CPU, FS)),
-      TLInfo((const NVPTXTargetMachine &)TM), TSInfo(&DL),
-      FrameLowering(*this) {
-
-  Triple T(TT);
-
-  if (T.getOS() == Triple::NVCL)
-    drvInterface = NVPTX::NVCL;
-  else
-    drvInterface = NVPTX::CUDA;
+                               const std::string &FS,
+                               const NVPTXTargetMachine &TM)
+    : NVPTXGenSubtargetInfo(TT, CPU, FS), PTXVersion(0), SmVersion(20), TM(TM),
+      InstrInfo(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)),
+      TSInfo(TM.getDataLayout()), FrameLowering() {}
+
+bool NVPTXSubtarget::hasImageHandles() const {
+  // Enable handles for Kepler+, where CUDA supports indirect surfaces and
+  // textures
+  if (TM.getDrvInterface() == NVPTX::CUDA)
+    return (SmVersion >= 30);
+
+  // Disabled, otherwise
+  return false;
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index fb2d404..e9833e5 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -32,8 +32,6 @@ namespace llvm {
 class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   virtual void anchor();
   std::string TargetName;
-  NVPTX::DrvInterface drvInterface;
-  bool Is64Bit;
 
   // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
   unsigned PTXVersion;
@@ -41,7 +39,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31
   unsigned int SmVersion;
 
-  const DataLayout DL; // Calculates type size & alignment
+  const NVPTXTargetMachine &TM;
   NVPTXInstrInfo InstrInfo;
   NVPTXTargetLowering TLInfo;
   TargetSelectionDAGInfo TSInfo;
@@ -55,13 +53,12 @@ public:
   /// of the specified module.
   ///
   NVPTXSubtarget(const std::string &TT, const std::string &CPU,
-                 const std::string &FS, const TargetMachine &TM, bool is64Bit);
+                 const std::string &FS, const NVPTXTargetMachine &TM);
 
   const TargetFrameLowering *getFrameLowering() const override {
     return &FrameLowering;
   }
   const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
-  const DataLayout *getDataLayout() const override { return &DL; }
   const NVPTXRegisterInfo *getRegisterInfo() const override {
     return &InstrInfo.getRegisterInfo();
   }
@@ -95,20 +92,9 @@ public:
   }
   inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
   inline bool hasROT64() const { return SmVersion >= 20; }
-
-  bool hasImageHandles() const {
-    // Enable handles for Kepler+, where CUDA supports indirect surfaces and
-    // textures
-    if (getDrvInterface() == NVPTX::CUDA)
-      return (SmVersion >= 30);
-
-    // Disabled, otherwise
-    return false;
-  }
-  bool is64Bit() const { return Is64Bit; }
+  bool hasImageHandles() const;
 
   unsigned int getSmVersion() const { return SmVersion; }
-  NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
   std::string getTargetName() const { return TargetName; }
 
   unsigned getPTXVersion() const { return PTXVersion; }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index c7f9507..ac27c30 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -17,6 +17,7 @@
 #include "NVPTXAllocaHoisting.h"
 #include "NVPTXLowerAggrCopies.h"
 #include "NVPTXTargetObjectFile.h"
+#include "NVPTXTargetTransformInfo.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -24,12 +25,12 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
@@ -49,6 +50,7 @@ using namespace llvm;
 namespace llvm {
 void initializeNVVMReflectPass(PassRegistry&);
 void initializeGenericToNVVMPass(PassRegistry&);
+void initializeNVPTXAllocaHoistingPass(PassRegistry &);
 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
 void initializeNVPTXLowerStructArgsPass(PassRegistry &);
@@ -63,20 +65,37 @@ extern "C" void LLVMInitializeNVPTXTarget() {
   // but it's very NVPTX-specific.
   initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
   initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
+  initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry());
   initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
   initializeNVPTXFavorNonGenericAddrSpacesPass(
     *PassRegistry::getPassRegistry());
   initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry());
 }
 
+static std::string computeDataLayout(bool is64Bit) {
+  std::string Ret = "e";
+
+  if (!is64Bit)
+    Ret += "-p:32:32";
+
+  Ret += "-i64:64-v16:16-v32:32-n16:32:64";
+
+  return Ret;
+}
+
 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL, bool is64bit)
-    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-      TLOF(make_unique<NVPTXTargetObjectFile>()),
-      Subtarget(TT, CPU, FS, *this, is64bit) {
+    : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
+                        CM, OL),
+      is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()),
+      Subtarget(TT, CPU, FS, *this) {
+  if (Triple(TT).getOS() == Triple::NVCL)
+    drvInterface = NVPTX::NVCL;
+  else
+    drvInterface = NVPTX::CUDA;
   initAsmInfo();
 }
 
@@ -124,12 +143,9 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
   return PassConfig;
 }
 
-void NVPTXTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-  // Add first the target-independent BasicTTI pass, then our NVPTX pass. This
-  // allows the NVPTX pass to delegate to the target independent layer when
-  // appropriate.
-  PM.add(createBasicTargetTransformInfoPass(this));
-  PM.add(createNVPTXTargetTransformInfoPass(this));
+TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis(
+      [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); });
 }
 
 void NVPTXPassConfig::addIRPasses() {
@@ -148,29 +164,27 @@ void NVPTXPassConfig::addIRPasses() {
   addPass(createNVPTXAssignValidGlobalNamesPass());
   addPass(createGenericToNVVMPass());
   addPass(createNVPTXFavorNonGenericAddrSpacesPass());
+  // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
+  // them unused. We could remove dead code in an ad-hoc manner, but that
+  // requires manual work and might be error-prone.
+  addPass(createDeadCodeEliminationPass());
   addPass(createSeparateConstOffsetFromGEPPass());
-  // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used
-  // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates
-  // significantly better code than EarlyCSE for some of our benchmarks.
+  // ReassociateGEPs exposes more opportunites for SLSR. See
+  // the example in reassociate-geps-and-slsr.ll.
+  addPass(createStraightLineStrengthReducePass());
+  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
+  // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
+  // for some of our benchmarks.
   if (getOptLevel() == CodeGenOpt::Aggressive)
     addPass(createGVNPass());
   else
     addPass(createEarlyCSEPass());
-  // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave
-  // some dead code.  We could remove dead code in an ad-hoc manner, but that
-  // requires manual work and might be error-prone.
-  //
-  // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts,
-  // and leave them unused.
-  //
-  // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the
-  // old index and some of its intermediate results may become unused.
-  addPass(createDeadCodeEliminationPass());
+  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
+  addPass(createNaryReassociatePass());
 }
 
 bool NVPTXPassConfig::addInstSelector() {
-  const NVPTXSubtarget &ST =
-    getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();
+  const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
 
   addPass(createLowerAggrCopies());
   addPass(createAllocaHoisting());
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index fa97ec8..2cd10e8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -25,7 +25,9 @@ namespace llvm {
 /// NVPTXTargetMachine
 ///
 class NVPTXTargetMachine : public LLVMTargetMachine {
+  bool is64bit;
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
+  NVPTX::DrvInterface drvInterface;
   NVPTXSubtarget Subtarget;
 
   // Hold Strings that can be free'd all together with NVPTXTargetMachine
@@ -37,9 +39,12 @@ public:
                      CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
 
   ~NVPTXTargetMachine() override;
-
-  const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
-
+  const NVPTXSubtarget *getSubtargetImpl(const Function &) const override {
+    return &Subtarget;
+  }
+  const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
+  bool is64Bit() const { return is64bit; }
+  NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
   ManagedStringPool *getManagedStrPool() const {
     return const_cast<ManagedStringPool *>(&ManagedStrPool);
   }
@@ -47,7 +52,7 @@ public:
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
 
   // Emission of machine code through MCJIT is not supported.
-  bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
+  bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_pwrite_stream &,
                          bool = true) override {
     return true;
   }
@@ -55,8 +60,7 @@ public:
     return TLOF.get();
   }
 
-  /// \brief Register NVPTX analysis passes with a pass manager.
-  void addAnalysisPasses(PassManagerBase &PM) override;
+  TargetIRAnalysis getTargetIRAnalysis() override;
 
 }; // NVPTXTargetMachine.
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 00ceca5..5ecdc87 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -41,7 +41,6 @@ public:
     DwarfLocSection = nullptr;
     DwarfARangesSection = nullptr;
     DwarfRangesSection = nullptr;
-    DwarfMacroInfoSection = nullptr;
   }
 
   virtual ~NVPTXTargetObjectFile();
@@ -83,24 +82,22 @@ public:
         new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
     DwarfRangesSection =
         new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
-    DwarfMacroInfoSection =
-        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
   }
 
-  const MCSection *getSectionForConstant(SectionKind Kind,
-                                         const Constant *C) const override {
+  MCSection *getSectionForConstant(SectionKind Kind,
+                                   const Constant *C) const override {
     return ReadOnlySection;
   }
 
-  const MCSection *getExplicitSectionGlobal(const GlobalValue *GV,
-                                       SectionKind Kind, Mangler &Mang,
-                                       const TargetMachine &TM) const override {
+  MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                                      Mangler &Mang,
+                                      const TargetMachine &TM) const override {
     return DataSection;
   }
 
-  const MCSection *
-  SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-                         const TargetMachine &TM) const override;
+  MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                                    Mangler &Mang,
+                                    const TargetMachine &TM) const override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index b09d0d4..dc81802 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -1,4 +1,4 @@
-//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI pass ---------===//
+//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,19 +6,13 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// \file
-// This file implements a TargetTransformInfo analysis pass specific to the
-// NVPTX target machine. It uses the target's detailed information to provide
-// more precise answers to certain TTI queries, while letting the target
-// independent and default TTI implementations handle the rest.
-//
-//===----------------------------------------------------------------------===//
 
-#include "NVPTXTargetMachine.h"
+#include "NVPTXTargetTransformInfo.h"
+#include "NVPTXUtilities.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
 #include "llvm/Target/TargetLowering.h"
@@ -26,69 +20,79 @@ using namespace llvm;
 
 #define DEBUG_TYPE "NVPTXtti"
 
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeNVPTXTTIPass(PassRegistry &);
-}
-
-namespace {
-
-class NVPTXTTI final : public ImmutablePass, public TargetTransformInfo {
-  const NVPTXTargetLowering *TLI;
-public:
-  NVPTXTTI() : ImmutablePass(ID), TLI(nullptr) {
-    llvm_unreachable("This pass cannot be directly constructed");
-  }
-
-  NVPTXTTI(const NVPTXTargetMachine *TM)
-      : ImmutablePass(ID), TLI(TM->getSubtargetImpl()->getTargetLowering()) {
-    initializeNVPTXTTIPass(*PassRegistry::getPassRegistry());
+// Whether the given intrinsic reads threadIdx.x/y/z.
+static bool readsThreadIndex(const IntrinsicInst *II) {
+  switch (II->getIntrinsicID()) {
+    default: return false;
+    case Intrinsic::nvvm_read_ptx_sreg_tid_x:
+    case Intrinsic::nvvm_read_ptx_sreg_tid_y:
+    case Intrinsic::nvvm_read_ptx_sreg_tid_z:
+      return true;
   }
+}
 
-  void initializePass() override { pushTTIStack(this); }
+static bool readsLaneId(const IntrinsicInst *II) {
+  return II->getIntrinsicID() == Intrinsic::ptx_read_laneid;
+}
 
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    TargetTransformInfo::getAnalysisUsage(AU);
+// Whether the given intrinsic is an atomic instruction in PTX.
+static bool isNVVMAtomic(const IntrinsicInst *II) {
+  switch (II->getIntrinsicID()) {
+    default: return false;
+    case Intrinsic::nvvm_atomic_load_add_f32:
+    case Intrinsic::nvvm_atomic_load_inc_32:
+    case Intrinsic::nvvm_atomic_load_dec_32:
+      return true;
   }
+}
 
-  /// Pass identification.
-  static char ID;
-
-  /// Provide necessary pointer adjustments for the two base classes.
-  void *getAdjustedAnalysisPointer(const void *ID) override {
-    if (ID == &TargetTransformInfo::ID)
-      return (TargetTransformInfo *)this;
-    return this;
+bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
+  // Without inter-procedural analysis, we conservatively assume that arguments
+  // to __device__ functions are divergent.
+  if (const Argument *Arg = dyn_cast<Argument>(V))
+    return !isKernelFunction(*Arg->getParent());
+
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    // Without pointer analysis, we conservatively assume values loaded from
+    // generic or local address space are divergent.
+    if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      unsigned AS = LI->getPointerAddressSpace();
+      return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL;
+    }
+    // Atomic instructions may cause divergence. Atomic instructions are
+    // executed sequentially across all threads in a warp. Therefore, an earlier
+    // executed thread may see different memory inputs than a later executed
+    // thread. For example, suppose *a = 0 initially.
+    //
+    //   atom.global.add.s32 d, [a], 1
+    //
+    // returns 0 for the first thread that enters the critical region, and 1 for
+    // the second thread.
+    if (I->isAtomic())
+      return true;
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      // Instructions that read threadIdx are obviously divergent.
+      if (readsThreadIndex(II) || readsLaneId(II))
+        return true;
+      // Handle the NVPTX atomic instrinsics that cannot be represented as an
+      // atomic IR instruction.
+      if (isNVVMAtomic(II))
+        return true;
+    }
+    // Conservatively consider the return value of function calls as divergent.
+    // We could analyze callees with bodies more precisely using
+    // inter-procedural analysis.
+    if (isa<CallInst>(I))
+      return true;
   }
 
-  bool hasBranchDivergence() const override;
-
-  unsigned getArithmeticInstrCost(
-      unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
-      OperandValueKind Opd2Info = OK_AnyValue,
-      OperandValueProperties Opd1PropInfo = OP_None,
-      OperandValueProperties Opd2PropInfo = OP_None) const override;
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(NVPTXTTI, TargetTransformInfo, "NVPTXtti",
-                   "NVPTX Target Transform Info", true, true, false)
-char NVPTXTTI::ID = 0;
-
-ImmutablePass *
-llvm::createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM) {
-  return new NVPTXTTI(TM);
+  return false;
 }
 
-bool NVPTXTTI::hasBranchDivergence() const { return true; }
-
-unsigned NVPTXTTI::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
-    OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
-    OperandValueProperties Opd2PropInfo) const {
+unsigned NVPTXTTIImpl::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+    TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
+    TTI::OperandValueProperties Opd2PropInfo) {
   // Legalize the type.
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
 
@@ -96,8 +100,8 @@ unsigned NVPTXTTI::getArithmeticInstrCost(
 
   switch (ISD) {
   default:
-    return TargetTransformInfo::getArithmeticInstrCost(
-        Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+    return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+                                         Opd1PropInfo, Opd2PropInfo);
   case ISD::ADD:
   case ISD::MUL:
   case ISD::XOR:
@@ -109,7 +113,7 @@ unsigned NVPTXTTI::getArithmeticInstrCost(
     if (LT.second.SimpleTy == MVT::i64)
       return 2 * LT.first;
     // Delegate other cases to the basic TTI.
-    return TargetTransformInfo::getArithmeticInstrCost(
-        Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+    return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+                                         Opd1PropInfo, Opd2PropInfo);
   }
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
new file mode 100644
index 0000000..4280888
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -0,0 +1,76 @@
+//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// NVPTX target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
+
+#include "NVPTX.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
+  typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const NVPTXSubtarget *ST;
+  const NVPTXTargetLowering *TLI;
+
+  const NVPTXSubtarget *getST() const { return ST; };
+  const NVPTXTargetLowering *getTLI() const { return TLI; };
+
+public:
+  explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM)
+      : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  NVPTXTTIImpl(const NVPTXTTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+  NVPTXTTIImpl(NVPTXTTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
+  NVPTXTTIImpl &operator=(const NVPTXTTIImpl &RHS) {
+    BaseT::operator=(static_cast<const BaseT &>(RHS));
+    ST = RHS.ST;
+    TLI = RHS.TLI;
+    return *this;
+  }
+  NVPTXTTIImpl &operator=(NVPTXTTIImpl &&RHS) {
+    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+    ST = std::move(RHS.ST);
+    TLI = std::move(RHS.TLI);
+    return *this;
+  }
+
+  bool hasBranchDivergence() { return true; }
+
+  bool isSourceOfDivergence(const Value *V);
+
+  unsigned getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty,
+      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index cf1feac..1f178af 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -293,12 +293,9 @@ bool llvm::isKernelFunction(const Function &F) {
   unsigned x = 0;
   bool retval = llvm::findOneNVVMAnnotation(
       &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x);
-  if (retval == false) {
+  if (!retval) {
     // There is no NVVM metadata, check the calling convention
-    if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
-      return true;
-    else
-      return false;
+    return F.getCallingConv() == llvm::CallingConv::PTX_Kernel;
   }
   return (x == 1);
 }
@@ -307,7 +304,7 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
   std::vector<unsigned> Vs;
   bool retval = llvm::findAllNVVMAnnotation(
       &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs);
-  if (retval == false)
+  if (!retval)
     return false;
   for (int i = 0, e = Vs.size(); i < e; i++) {
     unsigned v = Vs[i];
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
index 85aa34e..a237247 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
@@ -735,19 +735,19 @@ def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst),
 
 def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32);
+  return CurDAG->getTargetConstant(SVOp->getMaskElt(0), SDLoc(N), MVT::i32);
 }]>;
 def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32);
+  return CurDAG->getTargetConstant(SVOp->getMaskElt(1), SDLoc(N), MVT::i32);
 }]>;
 def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32);
+  return CurDAG->getTargetConstant(SVOp->getMaskElt(2), SDLoc(N), MVT::i32);
 }]>;
 def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32);
+  return CurDAG->getTargetConstant(SVOp->getMaskElt(3), SDLoc(N), MVT::i32);
 }]>;
 
 // The spurious call is here to silence a compiler warning about N being
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp
deleted file mode 100644
index 5f074b3..0000000
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the functions that can be used in CodeGen.
-//
-//===----------------------------------------------------------------------===//
-
-#include "NVPTXutil.h"
-#include "NVPTX.h"
-
-using namespace llvm;
-
-namespace llvm {
-
-bool isParamLoad(const MachineInstr *MI) {
-  if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
-      (MI->getOpcode() != NVPTX::LD_i64_avar))
-    return false;
-  if (MI->getOperand(2).isImm() == false)
-    return false;
-  if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM)
-    return false;
-  return true;
-}
-
-#define DATA_MASK 0x7f
-#define DIGIT_WIDTH 7
-#define MORE_BYTES 0x80
-
-static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) {
-  char *a;
-  char *end = space + splen;
-
-  a = space;
-  do {
-    unsigned char uc;
-
-    if (a >= end)
-      return 1;
-    uc = val & DATA_MASK;
-    val >>= DIGIT_WIDTH;
-    if (val != 0)
-      uc |= MORE_BYTES;
-    *a = uc;
-    a++;
-  } while (val);
-  *nbytes = a - space;
-  return 0;
-}
-
-#undef DATA_MASK
-#undef DIGIT_WIDTH
-#undef MORE_BYTES
-
-uint64_t encode_leb128(const char *str) {
-  union {
-    uint64_t x;
-    char a[8];
-  } temp64;
-
-  temp64.x = 0;
-
-  for (unsigned i = 0, e = strlen(str); i != e; ++i)
-    temp64.a[i] = str[e - 1 - i];
-
-  char encoded[16];
-  int nbytes;
-
-  int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
-
-  (void) retval;
-  assert(retval == 0 && "Encoding to leb128 failed");
-
-  assert(nbytes <= 8 &&
-         "Cannot support register names with leb128 encoding > 8 bytes");
-
-  temp64.x = 0;
-  for (int i = 0; i < nbytes; ++i)
-    temp64.a[i] = encoded[i];
-
-  return temp64.x;
-}
-
-} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.h b/contrib/llvm/lib/Target/NVPTX/NVPTXutil.h
deleted file mode 100644
index 1915dac..0000000
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the functions that can be used in CodeGen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
-#define LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-
-namespace llvm {
-bool isParamLoad(const MachineInstr *);
-uint64_t encode_leb128(const char *str);
-}
-
-#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index a8d6b95..5e375b7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include <map>
 #include <sstream>
@@ -137,6 +138,26 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) {
   // ConstantArray can be found successfully, see if it can be
   // found in VarMap. If so, replace the uses of CallInst with the
   // value found in VarMap. If not, replace the use  with value 0.
+
+  // IR for __nvvm_reflect calls differs between CUDA versions:
+  // CUDA 6.5 and earlier uses this sequence:
+  //    %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8
+  //        (i8 addrspace(4)* getelementptr inbounds
+  //           ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
+  //    %reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
+  //
+  // Value returned by Sym->getOperand(0) is a Constant with a
+  // ConstantDataSequential operand which can be converted to string and used
+  // for lookup.
+  //
+  // CUDA 7.0 does it slightly differently:
+  //   %reflect = call i32 @__nvvm_reflect(i8* addrspacecast
+  //        (i8 addrspace(1)* getelementptr inbounds
+  //           ([8 x i8], [8 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*))
+  //
+  // In this case, we get a Constant with a GlobalVariable operand and we need
+  // to dig deeper to find its initializer with the string we'll use for lookup.
+
   for (User *U : ReflectFunction->users()) {
     assert(isa<CallInst>(U) && "Only a call instruction can use _reflect");
     CallInst *Reflect = cast<CallInst>(U);
@@ -158,16 +179,23 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) {
     const Value *Sym = GEP->getOperand(0);
     assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");
 
-    const Constant *SymStr = cast<Constant>(Sym);
+    const Value *Operand = cast<Constant>(Sym)->getOperand(0);
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) {
+      // For CUDA-7.0 style __nvvm_reflect calls we need to find operand's
+      // initializer.
+      assert(GV->hasInitializer() &&
+             "Format of _reflect function not recognized");
+      const Constant *Initializer = GV->getInitializer();
+      Operand = Initializer;
+    }
 
-    assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) &&
+    assert(isa<ConstantDataSequential>(Operand) &&
            "Format of _reflect function not recognized");
-
-    assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() &&
+    assert(cast<ConstantDataSequential>(Operand)->isCString() &&
            "Format of _reflect function not recognized");
 
     std::string ReflectArg =
-        cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString();
+        cast<ConstantDataSequential>(Operand)->getAsString();
 
     ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1);
     DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index cd36e58..83de4d9 100644
--- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -132,6 +132,35 @@ static const MCPhysReg VSFRegs[64] = {
   PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
   PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
 };
+static const MCPhysReg VSSRegs[64] = {
+  PPC::F0,  PPC::F1,  PPC::F2,  PPC::F3,
+  PPC::F4,  PPC::F5,  PPC::F6,  PPC::F7,
+  PPC::F8,  PPC::F9,  PPC::F10, PPC::F11,
+  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+  PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+  PPC::VF0,  PPC::VF1,  PPC::VF2,  PPC::VF3,
+  PPC::VF4,  PPC::VF5,  PPC::VF6,  PPC::VF7,
+  PPC::VF8,  PPC::VF9,  PPC::VF10, PPC::VF11,
+  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+static unsigned QFRegs[32] = {
+  PPC::QF0,  PPC::QF1,  PPC::QF2,  PPC::QF3,
+  PPC::QF4,  PPC::QF5,  PPC::QF6,  PPC::QF7,
+  PPC::QF8,  PPC::QF9,  PPC::QF10, PPC::QF11,
+  PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
+  PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
+  PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
+  PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
+  PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
+};
 static const MCPhysReg CRBITRegs[32] = {
   PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
   PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
@@ -261,9 +290,9 @@ class PPCAsmParser : public MCTargetAsmParser {
 
 
 public:
-  PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
-               const MCInstrInfo &_MII, const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(_STI), MII(_MII) {
+  PPCAsmParser(MCSubtargetInfo &STI, MCAsmParser &, const MCInstrInfo &MII,
+               const MCTargetOptions &Options)
+      : MCTargetAsmParser(), STI(STI), MII(MII) {
     // Check for 64-bit vs. 32-bit pointer mode.
     Triple TheTriple(STI.getTargetTriple());
     IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -415,7 +444,9 @@ public:
 
   bool isToken() const override { return Kind == Token; }
   bool isImm() const override { return Kind == Immediate || Kind == Expression; }
+  bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); }
   bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
+  bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); }
   bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
   bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
   bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
@@ -429,6 +460,9 @@ public:
   bool isU8ImmX8() const { return Kind == Immediate &&
                                   isUInt<8>(getImm()) &&
                                   (getImm() & 7) == 0; }
+  
+  bool isU10Imm() const { return Kind == Immediate && isUInt<10>(getImm()); }
+  bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); }
   bool isU16Imm() const {
     switch (Kind) {
       case Expression:
@@ -507,22 +541,22 @@ public:
 
   void addRegGPRCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(RRegs[getReg()]));
+    Inst.addOperand(MCOperand::createReg(RRegs[getReg()]));
   }
 
   void addRegGPRCNoR0Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(RRegsNoR0[getReg()]));
+    Inst.addOperand(MCOperand::createReg(RRegsNoR0[getReg()]));
   }
 
   void addRegG8RCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(XRegs[getReg()]));
+    Inst.addOperand(MCOperand::createReg(XRegs[getReg()]));
   }
 
   void addRegG8RCNoX0Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(XRegsNoX0[getReg()]));
+    Inst.addOperand(MCOperand::createReg(XRegsNoX0[getReg()]));
   }
 
   void addRegGxRCOperands(MCInst &Inst, unsigned N) const {
@@ -541,63 +575,83 @@ public:
 
   void addRegF4RCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+    Inst.addOperand(MCOperand::createReg(FRegs[getReg()]));
   }
 
   void addRegF8RCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+    Inst.addOperand(MCOperand::createReg(FRegs[getReg()]));
   }
 
   void addRegVRRCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()]));
+    Inst.addOperand(MCOperand::createReg(VRegs[getReg()]));
   }
 
   void addRegVSRCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(VSRegs[getVSReg()]));
+    Inst.addOperand(MCOperand::createReg(VSRegs[getVSReg()]));
   }
 
   void addRegVSFRCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()]));
+    Inst.addOperand(MCOperand::createReg(VSFRegs[getVSReg()]));
+  }
+
+  void addRegVSSRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(VSSRegs[getVSReg()]));
+  }
+
+  void addRegQFRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+  }
+
+  void addRegQSRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+  }
+
+  void addRegQBRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
   }
 
   void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));
+    Inst.addOperand(MCOperand::createReg(CRBITRegs[getCRBit()]));
   }
 
   void addRegCRRCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(CRRegs[getCCReg()]));
+    Inst.addOperand(MCOperand::createReg(CRRegs[getCCReg()]));
   }
 
   void addCRBitMaskOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(CRRegs[getCRBitMask()]));
+    Inst.addOperand(MCOperand::createReg(CRRegs[getCRBitMask()]));
   }
 
   void addImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     if (Kind == Immediate)
-      Inst.addOperand(MCOperand::CreateImm(getImm()));
+      Inst.addOperand(MCOperand::createImm(getImm()));
     else
-      Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+      Inst.addOperand(MCOperand::createExpr(getExpr()));
   }
 
   void addS16ImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     switch (Kind) {
       case Immediate:
-        Inst.addOperand(MCOperand::CreateImm(getImm()));
+        Inst.addOperand(MCOperand::createImm(getImm()));
         break;
       case ContextImmediate:
-        Inst.addOperand(MCOperand::CreateImm(getImmS16Context()));
+        Inst.addOperand(MCOperand::createImm(getImmS16Context()));
         break;
       default:
-        Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+        Inst.addOperand(MCOperand::createExpr(getExpr()));
         break;
     }
   }
@@ -606,13 +660,13 @@ public:
     assert(N == 1 && "Invalid number of operands!");
     switch (Kind) {
       case Immediate:
-        Inst.addOperand(MCOperand::CreateImm(getImm()));
+        Inst.addOperand(MCOperand::createImm(getImm()));
         break;
       case ContextImmediate:
-        Inst.addOperand(MCOperand::CreateImm(getImmU16Context()));
+        Inst.addOperand(MCOperand::createImm(getImmU16Context()));
         break;
       default:
-        Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+        Inst.addOperand(MCOperand::createExpr(getExpr()));
         break;
     }
   }
@@ -620,14 +674,14 @@ public:
   void addBranchTargetOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     if (Kind == Immediate)
-      Inst.addOperand(MCOperand::CreateImm(getImm() / 4));
+      Inst.addOperand(MCOperand::createImm(getImm() / 4));
     else
-      Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+      Inst.addOperand(MCOperand::createExpr(getExpr()));
   }
 
   void addTLSRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateExpr(getTLSReg()));
+    Inst.addOperand(MCOperand::createExpr(getTLSReg()));
   }
 
   StringRef getToken() const {
@@ -738,10 +792,10 @@ void PPCOperand::print(raw_ostream &OS) const {
     OS << getImm();
     break;
   case Expression:
-    getExpr()->print(OS);
+    OS << *getExpr();
     break;
   case TLSRegister:
-    getTLSReg()->print(OS);
+    OS << *getTLSReg();
     break;
   }
 }
@@ -749,30 +803,64 @@ void PPCOperand::print(raw_ostream &OS) const {
 static void
 addNegOperand(MCInst &Inst, MCOperand &Op, MCContext &Ctx) {
   if (Op.isImm()) {
-    Inst.addOperand(MCOperand::CreateImm(-Op.getImm()));
+    Inst.addOperand(MCOperand::createImm(-Op.getImm()));
     return;
   }
   const MCExpr *Expr = Op.getExpr();
   if (const MCUnaryExpr *UnExpr = dyn_cast<MCUnaryExpr>(Expr)) {
     if (UnExpr->getOpcode() == MCUnaryExpr::Minus) {
-      Inst.addOperand(MCOperand::CreateExpr(UnExpr->getSubExpr()));
+      Inst.addOperand(MCOperand::createExpr(UnExpr->getSubExpr()));
       return;
     }
   } else if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Expr)) {
     if (BinExpr->getOpcode() == MCBinaryExpr::Sub) {
       const MCExpr *NE = MCBinaryExpr::CreateSub(BinExpr->getRHS(),
                                                  BinExpr->getLHS(), Ctx);
-      Inst.addOperand(MCOperand::CreateExpr(NE));
+      Inst.addOperand(MCOperand::createExpr(NE));
       return;
     }
   }
-  Inst.addOperand(MCOperand::CreateExpr(MCUnaryExpr::CreateMinus(Expr, Ctx)));
+  Inst.addOperand(MCOperand::createExpr(MCUnaryExpr::CreateMinus(Expr, Ctx)));
 }
 
 void PPCAsmParser::ProcessInstruction(MCInst &Inst,
                                       const OperandVector &Operands) {
   int Opcode = Inst.getOpcode();
   switch (Opcode) {
+  case PPC::DCBTx:
+  case PPC::DCBTT:
+  case PPC::DCBTSTx:
+  case PPC::DCBTSTT: {
+    MCInst TmpInst;
+    TmpInst.setOpcode((Opcode == PPC::DCBTx || Opcode == PPC::DCBTT) ?
+                      PPC::DCBT : PPC::DCBTST);
+    TmpInst.addOperand(MCOperand::createImm(
+      (Opcode == PPC::DCBTx || Opcode == PPC::DCBTSTx) ? 0 : 16));
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::DCBTCT:
+  case PPC::DCBTDS: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(PPC::DCBT);
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::DCBTSTCT:
+  case PPC::DCBTSTDS: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(PPC::DCBTST);
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    Inst = TmpInst;
+    break;
+  }
   case PPC::LAx: {
     MCInst TmpInst;
     TmpInst.setOpcode(PPC::LA);
@@ -826,9 +914,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::EXTLWI? PPC::RLWINM : PPC::RLWINMo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(B));
-    TmpInst.addOperand(MCOperand::CreateImm(0));
-    TmpInst.addOperand(MCOperand::CreateImm(N - 1));
+    TmpInst.addOperand(MCOperand::createImm(B));
+    TmpInst.addOperand(MCOperand::createImm(0));
+    TmpInst.addOperand(MCOperand::createImm(N - 1));
     Inst = TmpInst;
     break;
   }
@@ -840,9 +928,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::EXTRWI? PPC::RLWINM : PPC::RLWINMo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(B + N));
-    TmpInst.addOperand(MCOperand::CreateImm(32 - N));
-    TmpInst.addOperand(MCOperand::CreateImm(31));
+    TmpInst.addOperand(MCOperand::createImm(B + N));
+    TmpInst.addOperand(MCOperand::createImm(32 - N));
+    TmpInst.addOperand(MCOperand::createImm(31));
     Inst = TmpInst;
     break;
   }
@@ -855,9 +943,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(32 - B));
-    TmpInst.addOperand(MCOperand::CreateImm(B));
-    TmpInst.addOperand(MCOperand::CreateImm((B + N) - 1));
+    TmpInst.addOperand(MCOperand::createImm(32 - B));
+    TmpInst.addOperand(MCOperand::createImm(B));
+    TmpInst.addOperand(MCOperand::createImm((B + N) - 1));
     Inst = TmpInst;
     break;
   }
@@ -870,9 +958,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(32 - (B + N)));
-    TmpInst.addOperand(MCOperand::CreateImm(B));
-    TmpInst.addOperand(MCOperand::CreateImm((B + N) - 1));
+    TmpInst.addOperand(MCOperand::createImm(32 - (B + N)));
+    TmpInst.addOperand(MCOperand::createImm(B));
+    TmpInst.addOperand(MCOperand::createImm((B + N) - 1));
     Inst = TmpInst;
     break;
   }
@@ -883,9 +971,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::ROTRWI? PPC::RLWINM : PPC::RLWINMo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(32 - N));
-    TmpInst.addOperand(MCOperand::CreateImm(0));
-    TmpInst.addOperand(MCOperand::CreateImm(31));
+    TmpInst.addOperand(MCOperand::createImm(32 - N));
+    TmpInst.addOperand(MCOperand::createImm(0));
+    TmpInst.addOperand(MCOperand::createImm(31));
     Inst = TmpInst;
     break;
   }
@@ -896,9 +984,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::SLWI? PPC::RLWINM : PPC::RLWINMo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(N));
-    TmpInst.addOperand(MCOperand::CreateImm(0));
-    TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+    TmpInst.addOperand(MCOperand::createImm(N));
+    TmpInst.addOperand(MCOperand::createImm(0));
+    TmpInst.addOperand(MCOperand::createImm(31 - N));
     Inst = TmpInst;
     break;
   }
@@ -909,9 +997,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::SRWI? PPC::RLWINM : PPC::RLWINMo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(32 - N));
-    TmpInst.addOperand(MCOperand::CreateImm(N));
-    TmpInst.addOperand(MCOperand::CreateImm(31));
+    TmpInst.addOperand(MCOperand::createImm(32 - N));
+    TmpInst.addOperand(MCOperand::createImm(N));
+    TmpInst.addOperand(MCOperand::createImm(31));
     Inst = TmpInst;
     break;
   }
@@ -922,9 +1010,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::CLRRWI? PPC::RLWINM : PPC::RLWINMo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(0));
-    TmpInst.addOperand(MCOperand::CreateImm(0));
-    TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+    TmpInst.addOperand(MCOperand::createImm(0));
+    TmpInst.addOperand(MCOperand::createImm(0));
+    TmpInst.addOperand(MCOperand::createImm(31 - N));
     Inst = TmpInst;
     break;
   }
@@ -936,9 +1024,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::CLRLSLWI? PPC::RLWINM : PPC::RLWINMo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(N));
-    TmpInst.addOperand(MCOperand::CreateImm(B - N));
-    TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+    TmpInst.addOperand(MCOperand::createImm(N));
+    TmpInst.addOperand(MCOperand::createImm(B - N));
+    TmpInst.addOperand(MCOperand::createImm(31 - N));
     Inst = TmpInst;
     break;
   }
@@ -950,8 +1038,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::EXTLDI? PPC::RLDICR : PPC::RLDICRo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(B));
-    TmpInst.addOperand(MCOperand::CreateImm(N - 1));
+    TmpInst.addOperand(MCOperand::createImm(B));
+    TmpInst.addOperand(MCOperand::createImm(N - 1));
     Inst = TmpInst;
     break;
   }
@@ -963,8 +1051,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::EXTRDI? PPC::RLDICL : PPC::RLDICLo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(B + N));
-    TmpInst.addOperand(MCOperand::CreateImm(64 - N));
+    TmpInst.addOperand(MCOperand::createImm(B + N));
+    TmpInst.addOperand(MCOperand::createImm(64 - N));
     Inst = TmpInst;
     break;
   }
@@ -977,8 +1065,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(64 - (B + N)));
-    TmpInst.addOperand(MCOperand::CreateImm(B));
+    TmpInst.addOperand(MCOperand::createImm(64 - (B + N)));
+    TmpInst.addOperand(MCOperand::createImm(B));
     Inst = TmpInst;
     break;
   }
@@ -989,8 +1077,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::ROTRDI? PPC::RLDICL : PPC::RLDICLo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(64 - N));
-    TmpInst.addOperand(MCOperand::CreateImm(0));
+    TmpInst.addOperand(MCOperand::createImm(64 - N));
+    TmpInst.addOperand(MCOperand::createImm(0));
     Inst = TmpInst;
     break;
   }
@@ -1001,8 +1089,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::SLDI? PPC::RLDICR : PPC::RLDICRo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(N));
-    TmpInst.addOperand(MCOperand::CreateImm(63 - N));
+    TmpInst.addOperand(MCOperand::createImm(N));
+    TmpInst.addOperand(MCOperand::createImm(63 - N));
     Inst = TmpInst;
     break;
   }
@@ -1013,8 +1101,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::SRDI? PPC::RLDICL : PPC::RLDICLo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(64 - N));
-    TmpInst.addOperand(MCOperand::CreateImm(N));
+    TmpInst.addOperand(MCOperand::createImm(64 - N));
+    TmpInst.addOperand(MCOperand::createImm(N));
     Inst = TmpInst;
     break;
   }
@@ -1025,8 +1113,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::CLRRDI? PPC::RLDICR : PPC::RLDICRo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(0));
-    TmpInst.addOperand(MCOperand::CreateImm(63 - N));
+    TmpInst.addOperand(MCOperand::createImm(0));
+    TmpInst.addOperand(MCOperand::createImm(63 - N));
     Inst = TmpInst;
     break;
   }
@@ -1038,8 +1126,60 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     TmpInst.setOpcode(Opcode == PPC::CLRLSLDI? PPC::RLDIC : PPC::RLDICo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(N));
-    TmpInst.addOperand(MCOperand::CreateImm(B - N));
+    TmpInst.addOperand(MCOperand::createImm(N));
+    TmpInst.addOperand(MCOperand::createImm(B - N));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::RLWINMbm:
+  case PPC::RLWINMobm: {
+    unsigned MB, ME;
+    int64_t BM = Inst.getOperand(3).getImm();
+    if (!isRunOfOnes(BM, MB, ME))
+      break;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(MCOperand::createImm(MB));
+    TmpInst.addOperand(MCOperand::createImm(ME));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::RLWIMIbm:
+  case PPC::RLWIMIobm: {
+    unsigned MB, ME;
+    int64_t BM = Inst.getOperand(3).getImm();
+    if (!isRunOfOnes(BM, MB, ME))
+      break;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(0)); // The tied operand.
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(MCOperand::createImm(MB));
+    TmpInst.addOperand(MCOperand::createImm(ME));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::RLWNMbm:
+  case PPC::RLWNMobm: {
+    unsigned MB, ME;
+    int64_t BM = Inst.getOperand(3).getImm();
+    if (!isRunOfOnes(BM, MB, ME))
+      break;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(MCOperand::createImm(MB));
+    TmpInst.addOperand(MCOperand::createImm(ME));
     Inst = TmpInst;
     break;
   }
@@ -1105,10 +1245,18 @@ MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
                !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
       RegNo = FRegs[IntVal];
       return false;
+    } else if (Name.startswith_lower("vs") &&
+               !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 64) {
+      RegNo = VSRegs[IntVal];
+      return false;
     } else if (Name.startswith_lower("v") &&
                !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
       RegNo = VRegs[IntVal];
       return false;
+    } else if (Name.startswith_lower("q") &&
+               !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+      RegNo = QFRegs[IntVal];
+      return false;
     } else if (Name.startswith_lower("cr") &&
                !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
       RegNo = CRRegs[IntVal];
@@ -1526,6 +1674,21 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
       return true;
   }
 
+  // We'll now deal with an unfortunate special case: the syntax for the dcbt
+  // and dcbtst instructions differs for server vs. embedded cores.
+  //  The syntax for dcbt is:
+  //    dcbt ra, rb, th [server]
+  //    dcbt th, ra, rb [embedded]
+  //  where th can be omitted when it is 0. dcbtst is the same. We take the
+  //  server form to be the default, so swap the operands if we're parsing for
+  //  an embedded core (they'll be swapped again upon printing).
+  if (STI.getFeatureBits()[PPC::FeatureBookE] &&
+      Operands.size() == 4 &&
+      (Name == "dcbt" || Name == "dcbtst")) {
+    std::swap(Operands[1], Operands[3]);
+    std::swap(Operands[2], Operands[1]);
+  }
+
   return false;
 }
 
@@ -1700,7 +1863,7 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
     Error(L, "expected identifier in directive");
     return false;
   }
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
   if (getLexer().isNot(AsmToken::Comma)) {
     Error(L, "unexpected token in directive");
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 5251b60..4799ea2 100644
--- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -25,7 +25,7 @@ class PPCDisassembler : public MCDisassembler {
 public:
   PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
     : MCDisassembler(STI, Ctx) {}
-  virtual ~PPCDisassembler() {}
+  ~PPCDisassembler() override {}
 
   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -131,6 +131,26 @@ static const unsigned VSFRegs[] = {
   PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
 };
 
+static const unsigned VSSRegs[] = {
+  PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+  PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+  PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+  PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+  PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+  PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+  PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+
 static const unsigned GPRegs[] = {
   PPC::R0, PPC::R1, PPC::R2, PPC::R3,
   PPC::R4, PPC::R5, PPC::R6, PPC::R7,
@@ -164,11 +184,22 @@ static const unsigned G8Regs[] = {
   PPC::X28, PPC::X29, PPC::X30, PPC::X31
 };
 
+static const unsigned QFRegs[] = {
+  PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
+  PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+  PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
+  PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
+  PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
+  PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
+  PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
+  PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
+};
+
 template <std::size_t N>
 static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
                                         const unsigned (&Regs)[N]) {
   assert(RegNo < N && "Invalid register number");
-  Inst.addOperand(MCOperand::CreateReg(Regs[RegNo]));
+  Inst.addOperand(MCOperand::createReg(Regs[RegNo]));
   return MCDisassembler::Success;
 }
 
@@ -178,6 +209,12 @@ static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
   return decodeRegisterClass(Inst, RegNo, CRRegs);
 }
 
+static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, CRRegs);
+}
+
 static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
@@ -214,6 +251,12 @@ static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
   return decodeRegisterClass(Inst, RegNo, VSFRegs);
 }
 
+static DecodeStatus DecodeVSSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, VSSRegs);
+}
+
 static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
@@ -235,11 +278,20 @@ static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
 #define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
 #define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
 
+static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, QFRegs);
+}
+
+#define DecodeQSRCRegisterClass DecodeQFRCRegisterClass
+#define DecodeQBRCRegisterClass DecodeQFRCRegisterClass
+
 template<unsigned N>
 static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
                                       int64_t Address, const void *Decoder) {
   assert(isUInt<N>(Imm) && "Invalid immediate");
-  Inst.addOperand(MCOperand::CreateImm(Imm));
+  Inst.addOperand(MCOperand::createImm(Imm));
   return MCDisassembler::Success;
 }
 
@@ -247,7 +299,7 @@ template<unsigned N>
 static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
                                       int64_t Address, const void *Decoder) {
   assert(isUInt<N>(Imm) && "Invalid immediate");
-  Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm)));
+  Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm)));
   return MCDisassembler::Success;
 }
 
@@ -270,19 +322,19 @@ static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
   case PPC::LFSU:
   case PPC::LFDU:
     // Add the tied output operand.
-    Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+    Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
     break;
   case PPC::STBU:
   case PPC::STHU:
   case PPC::STWU:
   case PPC::STFSU:
   case PPC::STFDU:
-    Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base]));
+    Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
     break;
   }
 
-  Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp)));
-  Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+  Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp)));
+  Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
   return MCDisassembler::Success;
 }
 
@@ -298,12 +350,12 @@ static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm,
 
   if (Inst.getOpcode() == PPC::LDU)
     // Add the tied output operand.
-    Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+    Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
   else if (Inst.getOpcode() == PPC::STDU)
-    Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base]));
+    Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
 
-  Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp << 2)));
-  Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+  Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 2)));
+  Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
   return MCDisassembler::Success;
 }
 
@@ -314,7 +366,7 @@ static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
   unsigned Zeros = countTrailingZeros(Imm);
   assert(Zeros < 8 && "Invalid CR bit value");
 
-  Inst.addOperand(MCOperand::CreateReg(CRRegs[7 - Zeros]));
+  Inst.addOperand(MCOperand::createReg(CRRegs[7 - Zeros]));
   return MCDisassembler::Success;
 }
 
@@ -335,6 +387,15 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   uint32_t Inst =
       (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0);
 
+  if (STI.getFeatureBits()[PPC::FeatureQPX]) {
+    DecodeStatus result =
+      decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
+    if (result != MCDisassembler::Fail)
+      return result;
+
+    MI.clear();
+  }
+
   return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
 }
 
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 670c40a..1a130e8 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -17,6 +17,8 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
@@ -31,14 +33,28 @@ static cl::opt<bool>
 FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
              cl::desc("Use full register names when printing assembly"));
 
+#define PRINT_ALIAS_INSTR
 #include "PPCGenAsmWriter.inc"
 
 void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << getRegisterName(RegNo);
+  const char *RegName = getRegisterName(RegNo);
+  if (RegName[0] == 'q' /* QPX */) {
+    // The system toolchain on the BG/Q does not understand QPX register names
+    // in .cfi_* directives, so print the name of the floating-point
+    // subregister instead.
+    std::string RN(RegName);
+
+    RN[0] = 'f';
+    OS << RN;
+
+    return;
+  }
+
+  OS << RegName;
 }
 
 void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot) {
+                               StringRef Annot, const MCSubtargetInfo &STI) {
   // Check for slwi/srwi mnemonics.
   if (MI->getOpcode() == PPC::RLWINM) {
     unsigned char SH = MI->getOperand(2).getImm();
@@ -87,6 +103,38 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
       return;
     }
   }
+
+  // dcbt[st] is printed manually here because:
+  //  1. The assembly syntax is different between embedded and server targets
+  //  2. We must print the short mnemonics for TH == 0 because the
+  //     embedded/server syntax default will not be stable across assemblers
+  //  The syntax for dcbt is:
+  //    dcbt ra, rb, th [server]
+  //    dcbt th, ra, rb [embedded]
+  //  where th can be omitted when it is 0. dcbtst is the same.
+  if (MI->getOpcode() == PPC::DCBT || MI->getOpcode() == PPC::DCBTST) {
+    unsigned char TH = MI->getOperand(0).getImm();
+    O << "\tdcbt";
+    if (MI->getOpcode() == PPC::DCBTST)
+      O << "st";
+    if (TH == 16)
+      O << "t";
+    O << " ";
+
+    bool IsBookE = STI.getFeatureBits()[PPC::FeatureBookE];
+    if (IsBookE && TH != 0 && TH != 16)
+      O << (unsigned int) TH << ", ";
+
+    printOperand(MI, 1, O);
+    O << ", ";
+    printOperand(MI, 2, O);
+
+    if (!IsBookE && TH != 0 && TH != 16)
+      O << ", " << (unsigned int) TH;
+
+    printAnnotation(O, Annot);
+    return;
+  }
   
   // For fast-isel, a COPY_TO_REGCLASS may survive this long.  This is
   // used when converting a 32-bit float to a 64-bit float as part of
@@ -98,8 +146,9 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   // precision).  FIXME: Is there a better solution?
   if (MI->getOpcode() == TargetOpcode::COPY_TO_REGCLASS)
     return;
-  
-  printInstruction(MI, O);
+
+  if (!printAliasInstr(MI, O))
+    printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
 
@@ -201,6 +250,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
   printOperand(MI, OpNo+1, O);
 }
 
+void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned int Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 1 && "Invalid u1imm argument!");
+  O << (unsigned int)Value;
+}
+
 void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
                                        raw_ostream &O) {
   unsigned int Value = MI->getOperand(OpNo).getImm();
@@ -208,6 +264,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
   O << (unsigned int)Value;
 }
 
+void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned int Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 8 && "Invalid u3imm argument!");
+  O << (unsigned int)Value;
+}
+
 void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
                                        raw_ostream &O) {
   unsigned int Value = MI->getOperand(OpNo).getImm();
@@ -236,6 +299,20 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
   O << (unsigned int)Value;
 }
 
+void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  unsigned short Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 1023 && "Invalid u10imm argument!");
+  O << (unsigned short)Value;
+}
+
+void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  unsigned short Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 4095 && "Invalid u12imm argument!");
+  O << (unsigned short)Value;
+}
+
 void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
                                         raw_ostream &O) {
   if (MI->getOperand(OpNo).isImm())
@@ -338,6 +415,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
   switch (RegName[0]) {
   case 'r':
   case 'f':
+  case 'q': // for QPX
   case 'v':
     if (RegName[1] == 's')
       return RegName + 2;
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index b21aa22..8e18783 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -32,22 +32,31 @@ public:
   }
   
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
   
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
   
+  bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                               unsigned PrintMethodIdx,
+                               raw_ostream &OS);
 
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printPredicateOperand(const MCInst *MI, unsigned OpNo,
                              raw_ostream &O, const char *Modifier = nullptr);
 
+  void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU10ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index bea88a2..86885e1 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -178,12 +178,7 @@ public:
     for (uint64_t i = 0; i != NumNops; ++i)
       OW->Write32(0x60000000);
 
-    switch (Count % 4) {
-    default: break; // No leftover bytes to write
-    case 1: OW->Write8(0); break;
-    case 2: OW->Write16(0); break;
-    case 3: OW->Write16(0); OW->Write8(0); break;
-    }
+    OW->WriteZeros(Count % 4);
 
     return true;
   }
@@ -208,7 +203,7 @@ namespace {
   public:
     DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { }
 
-    MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+    MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
       bool is64 = getPointerSize() == 8;
       return createPPCMachObjectWriter(
           OS,
@@ -224,8 +219,7 @@ namespace {
     ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) :
       PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { }
 
-
-    MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+    MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
       bool is64 = getPointerSize() == 8;
       return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI);
     }
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index b817394..3e3489f 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -412,7 +412,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
   }
 }
 
-MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createPPCELFObjectWriter(raw_pwrite_stream &OS,
                                                bool Is64Bit,
                                                bool IsLittleEndian,
                                                uint8_t OSABI) {
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 2b4f2d8..d8fab5b 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -45,6 +45,10 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
 void PPCELFMCAsmInfo::anchor() { }
 
 PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
+  // FIXME: This is not always needed. For example, it is not needed in the
+  // v2 abi.
+  NeedsLocalForSize = true;
+
   if (is64Bit) {
     PointerSize = CalleeSaveStackSlotSize = 8;
   }
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 786b7fe..17f4cd4 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -14,11 +14,13 @@
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCFixupKinds.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -31,19 +33,19 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
 
 namespace {
 class PPCMCCodeEmitter : public MCCodeEmitter {
-  PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
-  void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
+  void operator=(const PPCMCCodeEmitter &) = delete;
 
   const MCInstrInfo &MCII;
   const MCContext &CTX;
   bool IsLittleEndian;
 
 public:
-  PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool isLittle)
-    : MCII(mcii), CTX(ctx), IsLittleEndian(isLittle) {
-  }
-  
-  ~PPCMCCodeEmitter() {}
+  PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+      : MCII(mcii), CTX(ctx),
+        IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
+
+  ~PPCMCCodeEmitter() override {}
 
   unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
                                SmallVectorImpl<MCFixup> &Fixups,
@@ -96,7 +98,7 @@ public:
   uint64_t getBinaryCodeForInstr(const MCInst &MI,
                                  SmallVectorImpl<MCFixup> &Fixups,
                                  const MCSubtargetInfo &STI) const;
-  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override {
     // For fast-isel, a float COPY_TO_REGCLASS can survive this long.
@@ -158,14 +160,11 @@ public:
 };
   
 } // end anonymous namespace
-  
+
 MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
                                             const MCRegisterInfo &MRI,
-                                            const MCSubtargetInfo &STI,
                                             MCContext &Ctx) {
-  Triple TT(STI.getTargetTriple());
-  bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
-  return new PPCMCCodeEmitter(MCII, Ctx, IsLittleEndian);
+  return new PPCMCCodeEmitter(MCII, Ctx);
 }
 
 unsigned PPCMCCodeEmitter::
@@ -176,7 +175,7 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
   if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
   
   // Add a fixup for the branch target.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_br24));
   return 0;
 }
@@ -188,7 +187,7 @@ unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
   if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
 
   // Add a fixup for the branch target.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_brcond14));
   return 0;
 }
@@ -201,7 +200,7 @@ getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
   if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
 
   // Add a fixup for the branch target.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_br24abs));
   return 0;
 }
@@ -214,7 +213,7 @@ getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
   if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
 
   // Add a fixup for the branch target.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_brcond14abs));
   return 0;
 }
@@ -226,7 +225,7 @@ unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
   if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
   
   // Add a fixup for the immediate field.
-  Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
+  Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_half16));
   return 0;
 }
@@ -244,7 +243,7 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
     return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits;
   
   // Add a fixup for the displacement field.
-  Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
+  Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_half16));
   return RegBits;
 }
@@ -263,7 +262,7 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
     return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF) | RegBits;
   
   // Add a fixup for the displacement field.
-  Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
+  Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_half16ds));
   return RegBits;
 }
@@ -326,7 +325,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
   // Add a fixup for the TLS register, which simply provides a relocation
   // hint to the linker that this statement is part of a relocation sequence.
   // Return the thread-pointer register's encoding.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_nofixup));
   Triple TT(STI.getTargetTriple());
   bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le;
@@ -340,7 +339,7 @@ unsigned PPCMCCodeEmitter::getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
   // (__tls_get_addr), which we create via getDirectBrEncoding as usual,
   // and one for the TLSGD or TLSLD symbol, which is emitted here.
   const MCOperand &MO = MI.getOperand(OpNo+1);
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_nofixup));
   return getDirectBrEncoding(MI, OpNo, Fixups, STI);
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index f0a6bb9..ca72ccf 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -36,9 +36,8 @@ private:
 
   int64_t EvaluateAsInt64(int64_t Value) const;
 
-  explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr,
-                     bool _IsDarwin)
-    : Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {}
+  explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr, bool IsDarwin)
+      : Kind(Kind), Expr(Expr), IsDarwin(IsDarwin) {}
 
 public:
   /// @name Construction
@@ -83,7 +82,7 @@ public:
                                  const MCAsmLayout *Layout,
                                  const MCFixup *Fixup) const override;
   void visitUsedExpr(MCStreamer &Streamer) const override;
-  const MCSection *FindAssociatedSection() const override {
+  MCSection *FindAssociatedSection() const override {
     return getSubExpr()->FindAssociatedSection();
   }
 
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index f2da389..8474376 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -108,7 +108,7 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
         (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le))
       CM = CodeModel::Medium;
   }
-  X->InitMCCodeGenInfo(RM, CM, OL);
+  X->initMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
@@ -145,6 +145,7 @@ public:
   }
   void emitTCEntry(const MCSymbol &S) override {
     // Creates a R_PPC64_TOC relocation
+    Streamer.EmitValueToAlignment(8);
     Streamer.EmitSymbolValue(&S, 8);
   }
   void emitMachine(StringRef CPU) override {
@@ -222,99 +223,60 @@ public:
 };
 }
 
-// This is duplicated code. Refactor this.
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
-                                    MCContext &Ctx, MCAsmBackend &MAB,
-                                    raw_ostream &OS, MCCodeEmitter *Emitter,
-                                    const MCSubtargetInfo &STI, bool RelaxAll) {
-  if (Triple(TT).isOSDarwin()) {
-    MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
-    new PPCTargetMachOStreamer(*S);
-    return S;
-  }
-
-  MCStreamer *S = createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
-  new PPCTargetELFStreamer(*S);
-  return S;
+static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
+                                                 formatted_raw_ostream &OS,
+                                                 MCInstPrinter *InstPrint,
+                                                 bool isVerboseAsm) {
+  return new PPCTargetAsmStreamer(S, OS);
 }
 
-static MCStreamer *
-createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
-                    bool isVerboseAsm, bool useDwarfDirectory,
-                    MCInstPrinter *InstPrint, MCCodeEmitter *CE,
-                    MCAsmBackend *TAB, bool ShowInst) {
-
-  MCStreamer *S = llvm::createAsmStreamer(
-      Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
-  new PPCTargetAsmStreamer(*S, OS);
-  return S;
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+  Triple TT(STI.getTargetTriple());
+  if (TT.getObjectFormat() == Triple::ELF)
+    return new PPCTargetELFStreamer(S);
+  return new PPCTargetMachOStreamer(S);
 }
 
-static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI,
                                              const MCInstrInfo &MII,
-                                             const MCRegisterInfo &MRI,
-                                             const MCSubtargetInfo &STI) {
-  bool isDarwin = Triple(STI.getTargetTriple()).isOSDarwin();
-  return new PPCInstPrinter(MAI, MII, MRI, isDarwin);
+                                             const MCRegisterInfo &MRI) {
+  return new PPCInstPrinter(MAI, MII, MRI, T.isOSDarwin());
 }
 
 extern "C" void LLVMInitializePowerPCTargetMC() {
-  // Register the MC asm info.
-  RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo);
-  RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo);  
-  RegisterMCAsmInfoFn E(ThePPC64LETarget, createPPCMCAsmInfo);  
-
-  // Register the MC codegen info.
-  TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(ThePPC64LETarget,
-                                        createPPCMCCodeGenInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(ThePPC64LETarget,
-                                      createPPCMCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(ThePPC64LETarget, createPPCMCRegisterInfo);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
-                                          createPPCMCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
-                                          createPPCMCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(ThePPC64LETarget,
-                                          createPPCMCSubtargetInfo);
-
-  // Register the MC Code Emitter
-  TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(ThePPC64LETarget,
-                                        createPPCMCCodeEmitter);
-  
+  for (Target *T : {&ThePPC32Target, &ThePPC64Target, &ThePPC64LETarget}) {
+    // Register the MC asm info.
+    RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo);
+
+    // Register the MC codegen info.
+    TargetRegistry::RegisterMCCodeGenInfo(*T, createPPCMCCodeGenInfo);
+
+    // Register the MC instruction info.
+    TargetRegistry::RegisterMCInstrInfo(*T, createPPCMCInstrInfo);
+
+    // Register the MC register info.
+    TargetRegistry::RegisterMCRegInfo(*T, createPPCMCRegisterInfo);
+
+    // Register the MC subtarget info.
+    TargetRegistry::RegisterMCSubtargetInfo(*T, createPPCMCSubtargetInfo);
+
+    // Register the MC Code Emitter
+    TargetRegistry::RegisterMCCodeEmitter(*T, createPPCMCCodeEmitter);
+
     // Register the asm backend.
-  TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(ThePPC64LETarget, createPPCAsmBackend);
-  
-  // Register the object streamer.
-  TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(ThePPC64LETarget, createMCStreamer);
-
-  // Register the asm streamer.
-  TargetRegistry::RegisterAsmStreamer(ThePPC32Target, createMCAsmStreamer);
-  TargetRegistry::RegisterAsmStreamer(ThePPC64Target, createMCAsmStreamer);
-  TargetRegistry::RegisterAsmStreamer(ThePPC64LETarget, createMCAsmStreamer);
-
-  // Register the MCInstPrinter.
-  TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(ThePPC64LETarget,
-                                        createPPCMCInstPrinter);
+    TargetRegistry::RegisterMCAsmBackend(*T, createPPCAsmBackend);
+
+    // Register the object target streamer.
+    TargetRegistry::RegisterObjectTargetStreamer(*T,
+                                                 createObjectTargetStreamer);
+
+    // Register the asm target streamer.
+    TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
+
+    // Register the MCInstPrinter.
+    TargetRegistry::RegisterMCInstPrinter(*T, createPPCMCInstPrinter);
+  }
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 68f7f7a..5f2117c 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -18,6 +18,7 @@
 #undef PPC
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
 
 namespace llvm {
 class MCAsmBackend;
@@ -29,29 +30,56 @@ class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
 class StringRef;
+class raw_pwrite_stream;
 class raw_ostream;
 
 extern Target ThePPC32Target;
 extern Target ThePPC64Target;
 extern Target ThePPC64LETarget;
-  
+
 MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
                                       const MCRegisterInfo &MRI,
-                                      const MCSubtargetInfo &STI,
                                       MCContext &Ctx);
 
 MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
                                   StringRef TT, StringRef CPU);
 
-/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
-MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
-                                         bool Is64Bit,
-                                         bool IsLittleEndian,
-                                         uint8_t OSABI);
-/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer.
-MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+/// Construct an PPC ELF object writer.
+MCObjectWriter *createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
+                                         bool IsLittleEndian, uint8_t OSABI);
+/// Construct a PPC Mach-O object writer.
+MCObjectWriter *createPPCMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                           uint32_t CPUType,
                                           uint32_t CPUSubtype);
+
+/// Returns true iff Val consists of one contiguous run of 1s with any number of
+/// 0s on either side.  The 1s are allowed to wrap from LSB to MSB, so
+/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is not,
+/// since all 1s are not contiguous.
+static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+  if (!Val)
+    return false;
+
+  if (isShiftedMask_32(Val)) {
+    // look for the first non-zero bit
+    MB = countLeadingZeros(Val);
+    // look for the first zero bit after the run of ones
+    ME = countLeadingZeros((Val - 1) ^ Val);
+    return true;
+  } else {
+    Val = ~Val; // invert mask
+    if (isShiftedMask_32(Val)) {
+      // effectively look for the first zero bit
+      ME = countLeadingZeros(Val) - 1;
+      // effectively look for the first one bit after the run of zeros
+      MB = countLeadingZeros((Val - 1) ^ Val) + 1;
+      return true;
+    }
+  }
+  // no run present
+  return false;
+}
+
 } // End llvm namespace
 
 // Generated files will use "namespace PPC". To avoid symbol clash,
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index df2f14a..3c906d2 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -41,7 +41,7 @@ public:
       : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
                                  /*UseAggressiveSymbolFolding=*/Is64Bit) {}
 
-  void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+  void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
                         const MCAsmLayout &Layout, const MCFragment *Fragment,
                         const MCFixup &Fixup, MCValue Target,
                         uint64_t &FixedValue) override {
@@ -212,7 +212,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
     report_fatal_error("symbol '" + A->getName() +
                        "' can not be undefined in a subtraction expression");
 
-  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+  uint32_t Value = Writer->getSymbolAddress(*A, Layout);
   uint64_t SecAddr =
       Writer->getSectionAddress(A_SD->getFragment()->getParent());
   FixedValue += SecAddr;
@@ -226,7 +226,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
                          "' can not be undefined in a subtraction expression");
 
     // FIXME: is Type correct? see include/llvm/Support/MachO.h
-    Value2 = Writer->getSymbolAddress(B_SD, Layout);
+    Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
     FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
   }
   // FIXME: does FixedValue get used??
@@ -244,7 +244,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
     if (FixupOffset > 0xffffff) {
       char Buffer[32];
       format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
-      Asm.getContext().FatalError(Fixup.getLoc(),
+      Asm.getContext().reportFatalError(Fixup.getLoc(),
                                   Twine("Section too large, can't encode "
                                         "r_address (") +
                                       Buffer + ") into 24 bits of scattered "
@@ -282,7 +282,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
     MachO::any_relocation_info MRE;
     makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR,
                                 Log2Size, IsPCRel, Value2);
-    Writer->addRelocation(Fragment->getParent(), MRE);
+    Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
   } else {
     // If the offset is more than 24-bits, it won't fit in a scattered
     // relocation offset field, so we fall back to using a non-scattered
@@ -296,7 +296,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
   }
   MachO::any_relocation_info MRE;
   makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value);
-  Writer->addRelocation(Fragment->getParent(), MRE);
+  Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
   return true;
 }
 
@@ -324,16 +324,16 @@ void PPCMachObjectWriter::RecordPPCRelocation(
 
   // this doesn't seem right for RIT_PPC_BR24
   // Get the symbol data, if any.
-  const MCSymbolData *SD = nullptr;
+  const MCSymbol *A = nullptr;
   if (Target.getSymA())
-    SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+    A = &Target.getSymA()->getSymbol();
 
   // See <reloc.h>.
   const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
   unsigned Index = 0;
-  unsigned IsExtern = 0;
   unsigned Type = RelocType;
 
+  const MCSymbol *RelSymbol = nullptr;
   if (Target.isAbsolute()) { // constant
                              // SymbolNum of 0 indicates the absolute section.
                              //
@@ -344,9 +344,9 @@ void PPCMachObjectWriter::RecordPPCRelocation(
     // the above line stolen from ARM, not sure
   } else {
     // Resolve constant variables.
-    if (SD->getSymbol().isVariable()) {
+    if (A->isVariable()) {
       int64_t Res;
-      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+      if (A->getVariableValue()->EvaluateAsAbsolute(
               Res, Layout, Writer->getSectionAddressMap())) {
         FixedValue = Res;
         return;
@@ -354,20 +354,18 @@ void PPCMachObjectWriter::RecordPPCRelocation(
     }
 
     // Check whether we need an external or internal relocation.
-    if (Writer->doesSymbolRequireExternRelocation(SD)) {
-      IsExtern = 1;
-      Index = SD->getIndex();
+    if (Writer->doesSymbolRequireExternRelocation(*A)) {
+      RelSymbol = A;
       // For external relocations, make sure to offset the fixup value to
       // compensate for the addend of the symbol address, if it was
       // undefined. This occurs with weak definitions, for example.
-      if (!SD->getSymbol().isUndefined())
-        FixedValue -= Layout.getSymbolOffset(SD);
+      if (!A->isUndefined())
+        FixedValue -= Layout.getSymbolOffset(*A);
     } else {
       // The index is the section ordinal (1-based).
-      const MCSectionData &SymSD =
-          Asm.getSectionData(SD->getSymbol().getSection());
-      Index = SymSD.getOrdinal() + 1;
-      FixedValue += Writer->getSectionAddress(&SymSD);
+      const MCSection &Sec = A->getSection();
+      Index = Sec.getOrdinal() + 1;
+      FixedValue += Writer->getSectionAddress(&Sec);
     }
     if (IsPCRel)
       FixedValue -= Writer->getSectionAddress(Fragment->getParent());
@@ -375,13 +373,12 @@ void PPCMachObjectWriter::RecordPPCRelocation(
 
   // struct relocation_info (8 bytes)
   MachO::any_relocation_info MRE;
-  makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern,
-                     Type);
-  Writer->addRelocation(Fragment->getParent(), MRE);
+  makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, false, Type);
+  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 }
 
-MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
-                                                uint32_t CPUType,
+MCObjectWriter *llvm::createPPCMachObjectWriter(raw_pwrite_stream &OS,
+                                                bool Is64Bit, uint32_t CPUType,
                                                 uint32_t CPUSubtype) {
   return createMachObjectWriter(
       new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
index 8fb33df..ae8d8b4 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -34,18 +34,19 @@ namespace llvm {
 #ifndef NDEBUG
   FunctionPass *createPPCCTRLoopsVerify();
 #endif
+  FunctionPass *createPPCLoopDataPrefetchPass();
+  FunctionPass *createPPCLoopPreIncPrepPass(PPCTargetMachine &TM);
+  FunctionPass *createPPCTOCRegDepsPass();
   FunctionPass *createPPCEarlyReturnPass();
   FunctionPass *createPPCVSXCopyPass();
-  FunctionPass *createPPCVSXCopyCleanupPass();
   FunctionPass *createPPCVSXFMAMutatePass();
+  FunctionPass *createPPCVSXSwapRemovalPass();
   FunctionPass *createPPCBranchSelectionPass();
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+  FunctionPass *createPPCTLSDynamicCallPass();
   void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                     AsmPrinter &AP, bool isDarwin);
 
-  /// \brief Creates an PPC-specific Target Transformation Info pass.
-  ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
-
   void initializePPCVSXFMAMutatePass(PassRegistry&);
   extern char &PPCVSXFMAMutateID;
 
@@ -93,12 +94,7 @@ namespace llvm {
     MO_TOC_LO    = 7 << 4,
 
     // Symbol for VK_PPC_TLS fixup attached to an ADD instruction
-    MO_TLS       = 8 << 4,
-
-    // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr
-    // call sequences.
-    MO_TLSLD     = 9 << 4,
-    MO_TLSGD     = 10 << 4
+    MO_TLS       = 8 << 4
   };
   } // end namespace PPCII
   
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index a7fd62c..1a02bcc 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -18,7 +18,7 @@ include "llvm/Target/Target.td"
 //===----------------------------------------------------------------------===//
 // PowerPC Subtarget features.
 //
- 
+
 //===----------------------------------------------------------------------===//
 // CPU Directives                                                             //
 //===----------------------------------------------------------------------===//
@@ -86,12 +86,19 @@ def FeatureISEL      : SubtargetFeature<"isel","HasISEL", "true",
                                         "Enable the isel instruction">;
 def FeaturePOPCNTD   : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
                                         "Enable the popcnt[dw] instructions">;
+def FeatureBPERMD    : SubtargetFeature<"bpermd", "HasBPERMD", "true",
+                                        "Enable the bpermd instruction">;
+def FeatureExtDiv    : SubtargetFeature<"extdiv", "HasExtDiv", "true",
+                                        "Enable extended divide instructions">;
 def FeatureLDBRX     : SubtargetFeature<"ldbrx","HasLDBRX", "true",
                                         "Enable the ldbrx instruction">;
 def FeatureCMPB      : SubtargetFeature<"cmpb", "HasCMPB", "true",
                                         "Enable the cmpb instruction">;
+def FeatureICBT      : SubtargetFeature<"icbt","HasICBT", "true",
+                                        "Enable icbt instruction">;
 def FeatureBookE     : SubtargetFeature<"booke", "IsBookE", "true",
-                                        "Enable Book E instructions">;
+                                        "Enable Book E instructions",
+                                        [FeatureICBT]>;
 def FeatureMSYNC     : SubtargetFeature<"msync", "HasOnlyMSYNC", "true",
                               "Has only the msync instruction instead of sync",
                               [FeatureBookE]>;
@@ -106,15 +113,66 @@ def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
 def FeatureVSX       : SubtargetFeature<"vsx","HasVSX", "true",
                                         "Enable VSX instructions",
                                         [FeatureAltivec]>;
+def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
+                                        "Enable POWER8 Altivec instructions",
+                                        [FeatureAltivec]>;
+def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
+                                       "Enable POWER8 Crypto instructions",
+                                       [FeatureP8Altivec]>;
 def FeatureP8Vector  : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
                                         "Enable POWER8 vector instructions",
-                                        [FeatureVSX, FeatureAltivec]>;
+                                        [FeatureVSX, FeatureP8Altivec]>;
+def FeatureDirectMove :
+  SubtargetFeature<"direct-move", "HasDirectMove", "true",
+                   "Enable Power8 direct move instructions",
+                   [FeatureVSX]>;
+def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics",
+                                             "HasPartwordAtomics", "true",
+                                             "Enable l[bh]arx and st[bh]cx.">;
+def FeatureInvariantFunctionDescriptors :
+  SubtargetFeature<"invariant-function-descriptors",
+                   "HasInvariantFunctionDescriptors", "true",
+                   "Assume function descriptors are invariant">;
+def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
+                                  "Enable Hardware Transactional Memory instructions">;
 
 def DeprecatedMFTB   : SubtargetFeature<"", "DeprecatedMFTB", "true",
                                         "Treat mftb as deprecated">;
 def DeprecatedDST    : SubtargetFeature<"", "DeprecatedDST", "true",
   "Treat vector data stream cache control instructions as deprecated">;
 
+/*  Since new processors generally contain a superset of features of those that
+    came before them, the idea is to make implementations of new processors
+    less error prone and easier to read.
+    Namely:
+        list<SubtargetFeature> Power8FeatureList = ...
+        list<SubtargetFeature> FutureProcessorSpecificFeatureList =
+            [ features that Power8 does not support ]
+        list<SubtargetFeature> FutureProcessorFeatureList =
+            !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
+
+    Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
+    well as providing a single point of definition if the feature set will be
+    used elsewhere.
+*/
+def ProcessorFeatures {
+  list<SubtargetFeature> Power7FeatureList =
+      [DirectivePwr7, FeatureAltivec, FeatureVSX,
+       FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+       FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+       FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+       FeatureFPRND, FeatureFPCVT, FeatureISEL,
+       FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
+       Feature64Bit /*, Feature64BitRegs */,
+       FeatureBPERMD, FeatureExtDiv,
+       DeprecatedMFTB, DeprecatedDST];
+  list<SubtargetFeature> Power8SpecificFeatures =
+      [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
+       FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
+  list<SubtargetFeature> Power8FeatureList =
+      !listconcat(Power7FeatureList, Power8SpecificFeatures);
+}
+
 // Note: Future features to add when support is extended to more
 // recent ISA levels:
 //
@@ -122,16 +180,6 @@ def DeprecatedDST    : SubtargetFeature<"", "DeprecatedDST", "true",
 // POPCNTB      p5 through p7      popcntb and related instructions
 
 //===----------------------------------------------------------------------===//
-// ABI Selection                                                              //
-//===----------------------------------------------------------------------===//
-
-def FeatureELFv1 : SubtargetFeature<"elfv1", "TargetABI", "PPC_ABI_ELFv1",
-                                    "Use the ELFv1 ABI">;
-
-def FeatureELFv2 : SubtargetFeature<"elfv2", "TargetABI", "PPC_ABI_ELFv2",
-                                    "Use the ELFv2 ABI">;
-
-//===----------------------------------------------------------------------===//
 // Classes used for relation maps.
 //===----------------------------------------------------------------------===//
 // RecFormRel - Filter class used to relate non-record-form instructions with
@@ -202,12 +250,12 @@ include "PPCInstrInfo.td"
 def : Processor<"generic", G3Itineraries, [Directive32]>;
 def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
                                           FeatureFRES, FeatureFRSQRTE,
-                                          FeatureBookE, FeatureMSYNC,
-                                          DeprecatedMFTB]>;
+                                          FeatureICBT, FeatureBookE, 
+                                          FeatureMSYNC, DeprecatedMFTB]>;
 def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
                                           FeatureFRES, FeatureFRSQRTE,
-                                          FeatureBookE, FeatureMSYNC,
-                                          DeprecatedMFTB]>;
+                                          FeatureICBT, FeatureBookE, 
+                                          FeatureMSYNC, DeprecatedMFTB]>;
 def : Processor<"601", G3Itineraries, [Directive601]>;
 def : Processor<"602", G3Itineraries, [Directive602]>;
 def : Processor<"603", G3Itineraries, [Directive603,
@@ -234,6 +282,7 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
                                             FeatureFRES, FeatureFRSQRTE]>;
 def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
                                            FeatureFRES, FeatureFRSQRTE]>;
+
 def : ProcessorModel<"970", G5Model,
                   [Directive970, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt,
@@ -247,14 +296,14 @@ def : ProcessorModel<"g5", G5Model,
                    DeprecatedMFTB, DeprecatedDST]>;
 def : ProcessorModel<"e500mc", PPCE500mcModel,
                   [DirectiveE500mc, FeatureMFOCRF,
-                   FeatureSTFIWX, FeatureBookE, FeatureISEL,
-                   DeprecatedMFTB]>;
+                   FeatureSTFIWX, FeatureICBT, FeatureBookE, 
+                   FeatureISEL, DeprecatedMFTB]>;
 def : ProcessorModel<"e5500", PPCE5500Model,
                   [DirectiveE5500, FeatureMFOCRF, Feature64Bit,
-                   FeatureSTFIWX, FeatureBookE, FeatureISEL,
-                   DeprecatedMFTB]>;
+                   FeatureSTFIWX, FeatureICBT, FeatureBookE, 
+                   FeatureISEL, DeprecatedMFTB]>;
 def : ProcessorModel<"a2", PPCA2Model,
-                  [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+                  [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
                    FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
                    FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
                    FeatureSTFIWX, FeatureLFIWAX,
@@ -262,7 +311,7 @@ def : ProcessorModel<"a2", PPCA2Model,
                    FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
                /*, Feature64BitRegs */, DeprecatedMFTB]>;
 def : ProcessorModel<"a2q", PPCA2Model,
-                  [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+                  [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
                    FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
                    FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
                    FeatureSTFIWX, FeatureLFIWAX,
@@ -303,35 +352,15 @@ def : ProcessorModel<"pwr6x", G5Model,
                    FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
                    FeatureFPRND, Feature64Bit,
                    DeprecatedMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr7", P7Model,
-                  [DirectivePwr7, FeatureAltivec, FeatureVSX,
-                   FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
-                   FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
-                   FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
-                   FeatureFPRND, FeatureFPCVT, FeatureISEL,
-                   FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
-                   Feature64Bit /*, Feature64BitRegs */,
-                   DeprecatedMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr8", P8Model,
-                  [DirectivePwr8, FeatureAltivec, FeatureVSX, FeatureP8Vector,
-                   FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
-                   FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
-                   FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
-                   FeatureFPRND, FeatureFPCVT, FeatureISEL,
-                   FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
-                   Feature64Bit /*, Feature64BitRegs */,
-                   DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
+def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
 def : Processor<"ppc", G3Itineraries, [Directive32]>;
 def : ProcessorModel<"ppc64", G5Model,
                   [Directive64, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
                    FeatureFRSQRTE, FeatureSTFIWX,
                    Feature64Bit /*, Feature64BitRegs */]>;
-def : ProcessorModel<"ppc64le", G5Model,
-                  [Directive64, FeatureAltivec,
-                   FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
-                   FeatureFRSQRTE, FeatureSTFIWX,
-                   Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.Power8FeatureList>;
 
 //===----------------------------------------------------------------------===//
 // Calling Conventions
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 49fd2f9..4f1c3c7 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -68,13 +68,12 @@ namespace {
   class PPCAsmPrinter : public AsmPrinter {
   protected:
     MapVector<MCSymbol*, MCSymbol*> TOC;
-    const PPCSubtarget &Subtarget;
-    uint64_t TOCLabelID;
+    const PPCSubtarget *Subtarget;
     StackMaps SM;
   public:
-    explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer),
-        Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0), SM(*this) {}
+    explicit PPCAsmPrinter(TargetMachine &TM,
+                           std::unique_ptr<MCStreamer> Streamer)
+        : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
 
     const char *getPassName() const override {
       return "PowerPC Assembly Printer";
@@ -99,13 +98,19 @@ namespace {
                        const MachineInstr &MI);
     void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
                          const MachineInstr &MI);
+    void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
+    bool runOnMachineFunction(MachineFunction &MF) override {
+      Subtarget = &MF.getSubtarget<PPCSubtarget>();
+      return AsmPrinter::runOnMachineFunction(MF);
+    }
   };
 
   /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
   class PPCLinuxAsmPrinter : public PPCAsmPrinter {
   public:
-    explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : PPCAsmPrinter(TM, Streamer) {}
+    explicit PPCLinuxAsmPrinter(TargetMachine &TM,
+                                std::unique_ptr<MCStreamer> Streamer)
+        : PPCAsmPrinter(TM, std::move(Streamer)) {}
 
     const char *getPassName() const override {
       return "Linux PPC Assembly Printer";
@@ -124,8 +129,9 @@ namespace {
   /// OS X
   class PPCDarwinAsmPrinter : public PPCAsmPrinter {
   public:
-    explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : PPCAsmPrinter(TM, Streamer) {}
+    explicit PPCDarwinAsmPrinter(TargetMachine &TM,
+                                 std::unique_ptr<MCStreamer> Streamer)
+        : PPCAsmPrinter(TM, std::move(Streamer)) {}
 
     const char *getPassName() const override {
       return "Darwin PPC Assembly Printer";
@@ -144,6 +150,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
   switch (RegName[0]) {
     case 'r':
     case 'f':
+    case 'q': // for QPX
     case 'v':
       if (RegName[1] == 's')
         return RegName + 2;
@@ -156,7 +163,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
 
 void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
                                  raw_ostream &O) {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
   const MachineOperand &MO = MI->getOperand(OpNo);
   
   switch (MO.getType()) {
@@ -164,7 +171,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
     // Linux assembler (Others?) does not take register mnemonics.
     // FIXME - What about special registers used in mfspr/mtspr?
-    if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+    if (!Subtarget->isDarwin())
+      RegName = stripRegisterPrefix(RegName);
     O << RegName;
     return;
   }
@@ -279,7 +287,8 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
     case 'y': // A memory reference for an X-form instruction
       {
         const char *RegName = "r0";
-        if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+        if (!Subtarget->isDarwin())
+          RegName = stripRegisterPrefix(RegName);
         O << RegName << ", ";
         printOperand(MI, OpNo, O);
         return false;
@@ -311,17 +320,9 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
 /// exists for it.  If not, create one.  Then return a symbol that references
 /// the TOC entry.
 MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
   MCSymbol *&TOCEntry = TOC[Sym];
-
-  // To avoid name clash check if the name already exists.
-  while (!TOCEntry) {
-    if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) +
-                                "C" + Twine(TOCLabelID++)) == nullptr) {
-      TOCEntry = GetTempSymbol("C", TOCLabelID);
-    }
-  }
-
+  if (!TOCEntry)
+    TOCEntry = createTempSymbol("C");
   return TOCEntry;
 }
 
@@ -400,12 +401,45 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
     EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
 }
 
+/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
+/// call to __tls_get_addr to the current output stream.
+void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
+                                MCSymbolRefExpr::VariantKind VK) {
+  StringRef Name = "__tls_get_addr";
+  MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(Name);
+  MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+
+  assert(MI->getOperand(0).isReg() &&
+         ((Subtarget->isPPC64() && MI->getOperand(0).getReg() == PPC::X3) ||
+          (!Subtarget->isPPC64() && MI->getOperand(0).getReg() == PPC::R3)) &&
+         "GETtls[ld]ADDR[32] must define GPR3");
+  assert(MI->getOperand(1).isReg() &&
+         ((Subtarget->isPPC64() && MI->getOperand(1).getReg() == PPC::X3) ||
+          (!Subtarget->isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) &&
+         "GETtls[ld]ADDR[32] must read GPR3");
+
+  if (!Subtarget->isPPC64() && !Subtarget->isDarwin() &&
+      TM.getRelocationModel() == Reloc::PIC_)
+    Kind = MCSymbolRefExpr::VK_PLT;
+  const MCSymbolRefExpr *TlsRef =
+    MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext);
+  const MachineOperand &MO = MI->getOperand(2);
+  const GlobalValue *GValue = MO.getGlobal();
+  MCSymbol *MOSymbol = getSymbol(GValue);
+  const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, VK, OutContext);
+  EmitToStreamer(*OutStreamer,
+                 MCInstBuilder(Subtarget->isPPC64() ?
+                               PPC::BL8_NOP_TLS : PPC::BL_TLS)
+                 .addExpr(TlsRef)
+                 .addExpr(SymVar));
+}
+
 /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
 /// the current output stream.
 ///
 void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   MCInst TmpInst;
-  bool isPPC64 = Subtarget.isPPC64();
+  bool isPPC64 = Subtarget->isPPC64();
   bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
   const Module *M = MF->getFunction()->getParent();
   PICLevel::Level PL = M->getPICLevel();
@@ -416,9 +450,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case TargetOpcode::DBG_VALUE:
     llvm_unreachable("Should be handled target independently");
   case TargetOpcode::STACKMAP:
-    return LowerSTACKMAP(OutStreamer, SM, *MI);
+    return LowerSTACKMAP(*OutStreamer, SM, *MI);
   case TargetOpcode::PATCHPOINT:
-    return LowerPATCHPOINT(OutStreamer, SM, *MI);
+    return LowerPATCHPOINT(*OutStreamer, SM, *MI);
 
   case PPC::MoveGOTtoLR: {
     // Transform %LR = MoveGOTtoLR
@@ -428,7 +462,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     //      blrl
     // This will return the pointer to _GLOBAL_OFFSET_TABLE_@local
     MCSymbol *GOTSymbol =
-      OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+      OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
     const MCExpr *OffsExpr =
       MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol,
                                                       MCSymbolRefExpr::VK_PPC_LOCAL,
@@ -437,7 +471,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                               OutContext);
 
     // Emit the 'bl'.
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL).addExpr(OffsExpr));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL).addExpr(OffsExpr));
     return;
   }
   case PPC::MovePCtoLR:
@@ -449,13 +483,13 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     MCSymbol *PICBase = MF->getPICBaseSymbol();
     
     // Emit the 'bl'.
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL)
       // FIXME: We would like an efficient form for this, so we don't have to do
       // a lot of extra uniquing.
       .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
     
     // Emit the label.
-    OutStreamer.EmitLabel(PICBase);
+    OutStreamer->EmitLabel(PICBase);
     return;
   }
   case PPC::UpdateGBR: {
@@ -478,16 +512,16 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
     // Step 1: lwz %Rt, .L$poff - .L$pb(%Ri)
     TmpInst.getOperand(1) =
-        MCOperand::CreateExpr(MCBinaryExpr::CreateSub(Exp, PB, OutContext));
+        MCOperand::createExpr(MCBinaryExpr::CreateSub(Exp, PB, OutContext));
     TmpInst.getOperand(0) = TR;
     TmpInst.getOperand(2) = PICR;
-    EmitToStreamer(OutStreamer, TmpInst);
+    EmitToStreamer(*OutStreamer, TmpInst);
 
     TmpInst.setOpcode(PPC::ADD4);
     TmpInst.getOperand(0) = PICR;
     TmpInst.getOperand(1) = TR;
     TmpInst.getOperand(2) = PICR;
-    EmitToStreamer(OutStreamer, TmpInst);
+    EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
   case PPC::LWZtoc: {
@@ -515,7 +549,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       const MCExpr *Exp =
         MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_GOT,
                                 OutContext);
-      TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+      TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
     } else {
       MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
 
@@ -523,12 +557,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
         MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None,
                                 OutContext);
       const MCExpr *PB =
-        MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")),
+        MCSymbolRefExpr::Create(OutContext.getOrCreateSymbol(Twine(".LTOC")),
                                                              OutContext);
       Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext);
-      TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+      TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
     }
-    EmitToStreamer(OutStreamer, TmpInst);
+    EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
   case PPC::LDtocJTI:
@@ -560,8 +594,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MCExpr *Exp =
       MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
                               OutContext);
-    TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
-    EmitToStreamer(OutStreamer, TmpInst);
+    TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+    EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
       
@@ -607,8 +641,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MCExpr *Exp =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
                               OutContext);
-    TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
-    EmitToStreamer(OutStreamer, TmpInst);
+    TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
+    EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
   case PPC::LDtocL: {
@@ -649,8 +683,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MCExpr *Exp =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
                               OutContext);
-    TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
-    EmitToStreamer(OutStreamer, TmpInst);
+    TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+    EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
   case PPC::ADDItocL: {
@@ -683,24 +717,24 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MCExpr *Exp =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
                               OutContext);
-    TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
-    EmitToStreamer(OutStreamer, TmpInst);
+    TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
+    EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
   case PPC::ADDISgotTprelHA: {
     // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym>
     // Into:      %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
     MCSymbol *MOSymbol = getSymbol(GValue);
     const MCExpr *SymGotTprel =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
                               OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addReg(PPC::X2)
-                                .addExpr(SymGotTprel));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
+                                 .addReg(MI->getOperand(0).getReg())
+                                 .addReg(MI->getOperand(1).getReg())
+                                 .addExpr(SymGotTprel));
     return;
   }
   case PPC::LDgotTprelL:
@@ -716,17 +750,17 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MCExpr *Exp =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
                               OutContext);
-    TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
-    EmitToStreamer(OutStreamer, TmpInst);
+    TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+    EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
 
   case PPC::PPC32PICGOT: {
-    MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
-    MCSymbol *GOTRef = OutContext.CreateTempSymbol();
-    MCSymbol *NextInstr = OutContext.CreateTempSymbol();
+    MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+    MCSymbol *GOTRef = OutContext.createTempSymbol();
+    MCSymbol *NextInstr = OutContext.createTempSymbol();
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL)
       // FIXME: We would like an efficient form for this, so we don't have to do
       // a lot of extra uniquing.
       .addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext)));
@@ -734,52 +768,52 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext),
                                 MCSymbolRefExpr::Create(GOTRef, OutContext),
         OutContext);
-    OutStreamer.EmitLabel(GOTRef);
-    OutStreamer.EmitValue(OffsExpr, 4);
-    OutStreamer.EmitLabel(NextInstr);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR)
-                                .addReg(MI->getOperand(0).getReg()));
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LWZ)
-                                .addReg(MI->getOperand(1).getReg())
-                                .addImm(0)
-                                .addReg(MI->getOperand(0).getReg()));
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADD4)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addReg(MI->getOperand(1).getReg())
-                                .addReg(MI->getOperand(0).getReg()));
+    OutStreamer->EmitLabel(GOTRef);
+    OutStreamer->EmitValue(OffsExpr, 4);
+    OutStreamer->EmitLabel(NextInstr);
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR)
+                                 .addReg(MI->getOperand(0).getReg()));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LWZ)
+                                 .addReg(MI->getOperand(1).getReg())
+                                 .addImm(0)
+                                 .addReg(MI->getOperand(0).getReg()));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADD4)
+                                 .addReg(MI->getOperand(0).getReg())
+                                 .addReg(MI->getOperand(1).getReg())
+                                 .addReg(MI->getOperand(0).getReg()));
     return;
   }
   case PPC::PPC32GOT: {
-    MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+    MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
     const MCExpr *SymGotTlsL =
       MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO,
                               OutContext);
     const MCExpr *SymGotTlsHA =                               
       MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA,
                               OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addExpr(SymGotTlsL));
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addReg(MI->getOperand(0).getReg())
-                                .addExpr(SymGotTlsHA));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI)
+                                 .addReg(MI->getOperand(0).getReg())
+                                 .addExpr(SymGotTlsL));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
+                                 .addReg(MI->getOperand(0).getReg())
+                                 .addReg(MI->getOperand(0).getReg())
+                                 .addExpr(SymGotTlsHA));
     return;
   }
   case PPC::ADDIStlsgdHA: {
     // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym>
     // Into:      %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
     MCSymbol *MOSymbol = getSymbol(GValue);
     const MCExpr *SymGotTlsGD =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA,
                               OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addReg(PPC::X2)
-                                .addExpr(SymGotTlsGD));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
+                                 .addReg(MI->getOperand(0).getReg())
+                                 .addReg(MI->getOperand(1).getReg())
+                                 .addExpr(SymGotTlsGD));
     return;
   }
   case PPC::ADDItlsgdL:
@@ -791,32 +825,40 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
     MCSymbol *MOSymbol = getSymbol(GValue);
-    const MCExpr *SymGotTlsGD =
-      MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
-                                         MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO :
-                                         MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
-                              OutContext);
-    EmitToStreamer(OutStreamer,
-                   MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+    const MCExpr *SymGotTlsGD = MCSymbolRefExpr::Create(
+        MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO
+                                       : MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
+        OutContext);
+    EmitToStreamer(*OutStreamer,
+                   MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
                    .addReg(MI->getOperand(0).getReg())
                    .addReg(MI->getOperand(1).getReg())
                    .addExpr(SymGotTlsGD));
     return;
   }
+  case PPC::GETtlsADDR:
+    // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
+    // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
+  case PPC::GETtlsADDR32: {
+    // Transform: %R3 = GETtlsADDR32 %R3, <ga:@sym>
+    // Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT
+    EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD);
+    return;
+  }
   case PPC::ADDIStlsldHA: {
     // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
     // Into:      %Xd = ADDIS8 %X2, sym@got@tlsld@ha
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
     MCSymbol *MOSymbol = getSymbol(GValue);
     const MCExpr *SymGotTlsLD =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA,
                               OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addReg(PPC::X2)
-                                .addExpr(SymGotTlsLD));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
+                                 .addReg(MI->getOperand(0).getReg())
+                                 .addReg(MI->getOperand(1).getReg())
+                                 .addExpr(SymGotTlsLD));
     return;
   }
   case PPC::ADDItlsldL:
@@ -828,16 +870,24 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
     MCSymbol *MOSymbol = getSymbol(GValue);
-    const MCExpr *SymGotTlsLD =
-      MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
-                                         MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO :
-                                         MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
-                              OutContext);
-    EmitToStreamer(OutStreamer,
-                   MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
-                   .addReg(MI->getOperand(0).getReg())
-                   .addReg(MI->getOperand(1).getReg())
-                   .addExpr(SymGotTlsLD));
+    const MCExpr *SymGotTlsLD = MCSymbolRefExpr::Create(
+        MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO
+                                       : MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
+        OutContext);
+    EmitToStreamer(*OutStreamer,
+                   MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+                       .addReg(MI->getOperand(0).getReg())
+                       .addReg(MI->getOperand(1).getReg())
+                       .addExpr(SymGotTlsLD));
+    return;
+  }
+  case PPC::GETtlsldADDR:
+    // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
+    // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld)
+  case PPC::GETtlsldADDR32: {
+    // Transform: %R3 = GETtlsldADDR32 %R3, <ga:@sym>
+    // Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT
+    EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSLD);
     return;
   }
   case PPC::ADDISdtprelHA:
@@ -852,11 +902,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MCExpr *SymDtprel =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
                               OutContext);
-    EmitToStreamer(OutStreamer,
-                   MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
-                   .addReg(MI->getOperand(0).getReg())
-                   .addReg(Subtarget.isPPC64() ? PPC::X3 : PPC::R3)
-                   .addExpr(SymDtprel));
+    EmitToStreamer(
+        *OutStreamer,
+        MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
+            .addReg(MI->getOperand(0).getReg())
+            .addReg(Subtarget->isPPC64() ? PPC::X3 : PPC::R3)
+            .addExpr(SymDtprel));
     return;
   }
   case PPC::ADDIdtprelL:
@@ -871,41 +922,41 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MCExpr *SymDtprel =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
                               OutContext);
-    EmitToStreamer(OutStreamer,
-                   MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
-                   .addReg(MI->getOperand(0).getReg())
-                   .addReg(MI->getOperand(1).getReg())
-                   .addExpr(SymDtprel));
+    EmitToStreamer(*OutStreamer,
+                   MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+                       .addReg(MI->getOperand(0).getReg())
+                       .addReg(MI->getOperand(1).getReg())
+                       .addExpr(SymDtprel));
     return;
   }
   case PPC::MFOCRF:
   case PPC::MFOCRF8:
-    if (!Subtarget.hasMFOCRF()) {
+    if (!Subtarget->hasMFOCRF()) {
       // Transform: %R3 = MFOCRF %CR7
       // Into:      %R3 = MFCR   ;; cr7
       unsigned NewOpcode =
         MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8;
-      OutStreamer.AddComment(PPCInstPrinter::
-                             getRegisterName(MI->getOperand(1).getReg()));
-      EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode)
+      OutStreamer->AddComment(PPCInstPrinter::
+                              getRegisterName(MI->getOperand(1).getReg()));
+      EmitToStreamer(*OutStreamer, MCInstBuilder(NewOpcode)
                                   .addReg(MI->getOperand(0).getReg()));
       return;
     }
     break;
   case PPC::MTOCRF:
   case PPC::MTOCRF8:
-    if (!Subtarget.hasMFOCRF()) {
+    if (!Subtarget->hasMFOCRF()) {
       // Transform: %CR7 = MTOCRF %R3
       // Into:      MTCRF mask, %R3 ;; cr7
       unsigned NewOpcode =
         MI->getOpcode() == PPC::MTOCRF ? PPC::MTCRF : PPC::MTCRF8;
       unsigned Mask = 0x80 >> OutContext.getRegisterInfo()
                               ->getEncodingValue(MI->getOperand(0).getReg());
-      OutStreamer.AddComment(PPCInstPrinter::
-                             getRegisterName(MI->getOperand(0).getReg()));
-      EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode)
-                                  .addImm(Mask)
-                                  .addReg(MI->getOperand(1).getReg()));
+      OutStreamer->AddComment(PPCInstPrinter::
+                              getRegisterName(MI->getOperand(0).getReg()));
+      EmitToStreamer(*OutStreamer, MCInstBuilder(NewOpcode)
+                                     .addImm(Mask)
+                                     .addReg(MI->getOperand(1).getReg()));
       return;
     }
     break;
@@ -919,7 +970,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // suite shows a handful of test cases that fail this check for
     // Darwin.  Those need to be investigated before this sanity test
     // can be enabled for those subtargets.
-    if (!Subtarget.isDarwin()) {
+    if (!Subtarget->isDarwin()) {
       unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
       const MachineOperand &MO = MI->getOperand(OpNum);
       if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4)
@@ -931,32 +982,32 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
 
   LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
-  EmitToStreamer(OutStreamer, TmpInst);
+  EmitToStreamer(*OutStreamer, TmpInst);
 }
 
 void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
-  if (Subtarget.isELFv2ABI()) {
+  if (static_cast<const PPCTargetMachine &>(TM).isELFv2ABI()) {
     PPCTargetStreamer *TS =
-      static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+      static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
 
     if (TS)
       TS->emitAbiVersion(2);
   }
 
-  if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_)
+  if (static_cast<const PPCTargetMachine &>(TM).isPPC64() ||
+      TM.getRelocationModel() != Reloc::PIC_)
     return AsmPrinter::EmitStartOfAsmFile(M);
 
   if (M.getPICLevel() == PICLevel::Small)
     return AsmPrinter::EmitStartOfAsmFile(M);
 
-  OutStreamer.SwitchSection(OutContext.getELFSection(".got2",
-         ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
-         SectionKind::getReadOnly()));
+  OutStreamer->SwitchSection(OutContext.getELFSection(
+      ".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC));
 
-  MCSymbol *TOCSym = OutContext.GetOrCreateSymbol(Twine(".LTOC"));
-  MCSymbol *CurrentPos = OutContext.CreateTempSymbol();
+  MCSymbol *TOCSym = OutContext.getOrCreateSymbol(Twine(".LTOC"));
+  MCSymbol *CurrentPos = OutContext.createTempSymbol();
 
-  OutStreamer.EmitLabel(CurrentPos);
+  OutStreamer->EmitLabel(CurrentPos);
 
   // The GOT pointer points to the middle of the GOT, in order to reference the
   // entire 64kB range.  0x8000 is the midpoint.
@@ -965,121 +1016,93 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
                             MCConstantExpr::Create(0x8000, OutContext),
                             OutContext);
 
-  OutStreamer.EmitAssignment(TOCSym, tocExpr);
+  OutStreamer->EmitAssignment(TOCSym, tocExpr);
 
-  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
 }
 
 void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
   // linux/ppc32 - Normal entry label.
-  if (!Subtarget.isPPC64() && 
+  if (!Subtarget->isPPC64() && 
       (TM.getRelocationModel() != Reloc::PIC_ || 
        MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small))
     return AsmPrinter::EmitFunctionEntryLabel();
 
-  if (!Subtarget.isPPC64()) {
+  if (!Subtarget->isPPC64()) {
     const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
   	if (PPCFI->usesPICBase()) {
       MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
       MCSymbol *PICBase = MF->getPICBaseSymbol();
-      OutStreamer.EmitLabel(RelocSymbol);
+      OutStreamer->EmitLabel(RelocSymbol);
 
       const MCExpr *OffsExpr =
         MCBinaryExpr::CreateSub(
-          MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")),
+          MCSymbolRefExpr::Create(OutContext.getOrCreateSymbol(Twine(".LTOC")),
                                                                OutContext),
                                   MCSymbolRefExpr::Create(PICBase, OutContext),
           OutContext);
-      OutStreamer.EmitValue(OffsExpr, 4);
-      OutStreamer.EmitLabel(CurrentFnSym);
+      OutStreamer->EmitValue(OffsExpr, 4);
+      OutStreamer->EmitLabel(CurrentFnSym);
       return;
     } else
       return AsmPrinter::EmitFunctionEntryLabel();
   }
 
   // ELFv2 ABI - Normal entry label.
-  if (Subtarget.isELFv2ABI())
+  if (Subtarget->isELFv2ABI())
     return AsmPrinter::EmitFunctionEntryLabel();
 
   // Emit an official procedure descriptor.
-  MCSectionSubPair Current = OutStreamer.getCurrentSection();
-  const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
-      ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
-      SectionKind::getReadOnly());
-  OutStreamer.SwitchSection(Section);
-  OutStreamer.EmitLabel(CurrentFnSym);
-  OutStreamer.EmitValueToAlignment(8);
-  MCSymbol *Symbol1 = 
-    OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
+  MCSectionSubPair Current = OutStreamer->getCurrentSection();
+  MCSectionELF *Section = OutStreamer->getContext().getELFSection(
+      ".opd", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+  OutStreamer->SwitchSection(Section);
+  OutStreamer->EmitLabel(CurrentFnSym);
+  OutStreamer->EmitValueToAlignment(8);
+  MCSymbol *Symbol1 = CurrentFnSymForSize;
   // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
   // entry point.
-  OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
-			8 /*size*/);
-  MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
+  OutStreamer->EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
+                         8 /*size*/);
+  MCSymbol *Symbol2 = OutContext.getOrCreateSymbol(StringRef(".TOC."));
   // Generates a R_PPC64_TOC relocation for TOC base insertion.
-  OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
-                        MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext),
-                        8/*size*/);
+  OutStreamer->EmitValue(
+    MCSymbolRefExpr::Create(Symbol2, MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext),
+    8/*size*/);
   // Emit a null environment pointer.
-  OutStreamer.EmitIntValue(0, 8 /* size */);
-  OutStreamer.SwitchSection(Current.first, Current.second);
-
-  MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
-                          ".L." + Twine(CurrentFnSym->getName()));
-  OutStreamer.EmitLabel(RealFnSym);
-  CurrentFnSymForSize = RealFnSym;
+  OutStreamer->EmitIntValue(0, 8 /* size */);
+  OutStreamer->SwitchSection(Current.first, Current.second);
 }
 
 
 bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
-  const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *TD = TM.getDataLayout();
 
   bool isPPC64 = TD->getPointerSizeInBits() == 64;
 
   PPCTargetStreamer &TS =
-      static_cast<PPCTargetStreamer &>(*OutStreamer.getTargetStreamer());
+      static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
 
   if (!TOC.empty()) {
-    const MCSectionELF *Section;
-    
+    MCSectionELF *Section;
+
     if (isPPC64)
-      Section = OutStreamer.getContext().getELFSection(".toc",
-        ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
-        SectionKind::getReadOnly());
-	else
-      Section = OutStreamer.getContext().getELFSection(".got2",
-        ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
-        SectionKind::getReadOnly());
-    OutStreamer.SwitchSection(Section);
+      Section = OutStreamer->getContext().getELFSection(
+          ".toc", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+        else
+          Section = OutStreamer->getContext().getELFSection(
+              ".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+    OutStreamer->SwitchSection(Section);
 
     for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
          E = TOC.end(); I != E; ++I) {
-      OutStreamer.EmitLabel(I->second);
+      OutStreamer->EmitLabel(I->second);
       MCSymbol *S = I->first;
       if (isPPC64)
         TS.emitTCEntry(*S);
       else
-        OutStreamer.EmitSymbolValue(S, 4);
-    }
-  }
-
-  MachineModuleInfoELF &MMIELF =
-    MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-  MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-  if (!Stubs.empty()) {
-    OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
-    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-      // L_foo$stub:
-      OutStreamer.EmitLabel(Stubs[i].first);
-      //   .long _foo
-      OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
-                                                    OutContext),
-                            isPPC64 ? 8 : 4/*size*/);
+        OutStreamer->EmitSymbolValue(S, 4);
     }
-
-    Stubs.clear();
-    OutStreamer.AddBlankLine();
   }
 
   return AsmPrinter::doFinalization(M);
@@ -1103,36 +1126,36 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
   //
   // This ensures we have r2 set up correctly while executing the function
   // body, no matter which entry point is called.
-  if (Subtarget.isELFv2ABI()
+  if (Subtarget->isELFv2ABI()
       // Only do all that if the function uses r2 in the first place.
       && !MF->getRegInfo().use_empty(PPC::X2)) {
 
-    MCSymbol *GlobalEntryLabel = OutContext.CreateTempSymbol();
-    OutStreamer.EmitLabel(GlobalEntryLabel);
+    MCSymbol *GlobalEntryLabel = OutContext.createTempSymbol();
+    OutStreamer->EmitLabel(GlobalEntryLabel);
     const MCSymbolRefExpr *GlobalEntryLabelExp =
       MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext);
 
-    MCSymbol *TOCSymbol = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
+    MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
     const MCExpr *TOCDeltaExpr =
       MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext),
                               GlobalEntryLabelExp, OutContext);
 
     const MCExpr *TOCDeltaHi =
       PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
-                                .addReg(PPC::X2)
-                                .addReg(PPC::X12)
-                                .addExpr(TOCDeltaHi));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
+                                 .addReg(PPC::X2)
+                                 .addReg(PPC::X12)
+                                 .addExpr(TOCDeltaHi));
 
     const MCExpr *TOCDeltaLo =
       PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI)
-                                .addReg(PPC::X2)
-                                .addReg(PPC::X2)
-                                .addExpr(TOCDeltaLo));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
+                                 .addReg(PPC::X2)
+                                 .addReg(PPC::X2)
+                                 .addExpr(TOCDeltaLo));
 
-    MCSymbol *LocalEntryLabel = OutContext.CreateTempSymbol();
-    OutStreamer.EmitLabel(LocalEntryLabel);
+    MCSymbol *LocalEntryLabel = OutContext.createTempSymbol();
+    OutStreamer->EmitLabel(LocalEntryLabel);
     const MCSymbolRefExpr *LocalEntryLabelExp =
        MCSymbolRefExpr::Create(LocalEntryLabel, OutContext);
     const MCExpr *LocalOffsetExp =
@@ -1140,7 +1163,7 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
                               GlobalEntryLabelExp, OutContext);
 
     PPCTargetStreamer *TS =
-      static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+      static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
 
     if (TS)
       TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp);
@@ -1158,9 +1181,9 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
   // FIXME: We should fill in the eight-byte mandatory fields as described in
   // the PPC64 ELF ABI (this is a low-priority item because GDB does not
   // currently make use of these fields).
-  if (Subtarget.isPPC64()) {
-    OutStreamer.EmitIntValue(0, 4/*size*/);
-    OutStreamer.EmitIntValue(0, 8/*size*/);
+  if (Subtarget->isPPC64()) {
+    OutStreamer->EmitIntValue(0, 4/*size*/);
+    OutStreamer->EmitIntValue(0, 8/*size*/);
   }
 }
 
@@ -1189,74 +1212,89 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
     "ppc64le"
   };
 
-  unsigned Directive = Subtarget.getDarwinDirective();
-  if (Subtarget.hasMFOCRF() && Directive < PPC::DIR_970)
-    Directive = PPC::DIR_970;
-  if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
-    Directive = PPC::DIR_7400;
-  if (Subtarget.isPPC64() && Directive < PPC::DIR_64)
-    Directive = PPC::DIR_64;
+  // Get the numerically largest directive.
+  // FIXME: How should we merge darwin directives?
+  unsigned Directive = PPC::DIR_NONE;
+  for (const Function &F : M) {
+    const PPCSubtarget &STI = TM.getSubtarget<PPCSubtarget>(F);
+    unsigned FDir = STI.getDarwinDirective();
+    Directive = Directive > FDir ? FDir : STI.getDarwinDirective();
+    if (STI.hasMFOCRF() && Directive < PPC::DIR_970)
+      Directive = PPC::DIR_970;
+    if (STI.hasAltivec() && Directive < PPC::DIR_7400)
+      Directive = PPC::DIR_7400;
+    if (STI.isPPC64() && Directive < PPC::DIR_64)
+      Directive = PPC::DIR_64;
+  }
+
   assert(Directive <= PPC::DIR_64 && "Directive out of range.");
 
   assert(Directive < array_lengthof(CPUDirectives) &&
          "CPUDirectives[] might not be up-to-date!");
   PPCTargetStreamer &TStreamer =
-      *static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+      *static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
   TStreamer.emitMachine(CPUDirectives[Directive]);
 
   // Prime text sections so they are adjacent.  This reduces the likelihood a
   // large data or debug section causes a branch to exceed 16M limit.
   const TargetLoweringObjectFileMachO &TLOFMacho = 
     static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
-  OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+  OutStreamer->SwitchSection(TLOFMacho.getTextCoalSection());
   if (TM.getRelocationModel() == Reloc::PIC_) {
-    OutStreamer.SwitchSection(
+    OutStreamer->SwitchSection(
            OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
                                       MachO::S_SYMBOL_STUBS |
                                       MachO::S_ATTR_PURE_INSTRUCTIONS,
                                       32, SectionKind::getText()));
   } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
-    OutStreamer.SwitchSection(
+    OutStreamer->SwitchSection(
            OutContext.getMachOSection("__TEXT","__symbol_stub1",
                                       MachO::S_SYMBOL_STUBS |
                                       MachO::S_ATTR_PURE_INSTRUCTIONS,
                                       16, SectionKind::getText()));
   }
-  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
 }
 
 static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
   // Remove $stub suffix, add $lazy_ptr.
   StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5);
-  return Ctx.GetOrCreateSymbol(NoStub + "$lazy_ptr");
+  return Ctx.getOrCreateSymbol(NoStub + "$lazy_ptr");
 }
 
 static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
   // Add $tmp suffix to $stub, yielding $stub$tmp.
-  return Ctx.GetOrCreateSymbol(Sym->getName() + "$tmp");
+  return Ctx.getOrCreateSymbol(Sym->getName() + "$tmp");
 }
 
 void PPCDarwinAsmPrinter::
 EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
-  bool isPPC64 =
-      TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64;
-  bool isDarwin = Subtarget.isDarwin();
-  
+  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+
+  // Construct a local MCSubtargetInfo and shadow EmitToStreamer here.
+  // This is because the MachineFunction won't exist (but have not yet been
+  // freed) and since we're at the global level we can use the default
+  // constructed subtarget.
+  std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+      TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
+  auto EmitToStreamer = [&STI] (MCStreamer &S, const MCInst &Inst) {
+    S.EmitInstruction(Inst, *STI);
+  };
+
   const TargetLoweringObjectFileMachO &TLOFMacho = 
     static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
 
   // .lazy_symbol_pointer
-  const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
-  
+  MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
+
   // Output stubs for dynamically-linked functions
   if (TM.getRelocationModel() == Reloc::PIC_) {
-    const MCSection *StubSection = 
-    OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
-                               MachO::S_SYMBOL_STUBS |
-                               MachO::S_ATTR_PURE_INSTRUCTIONS,
-                               32, SectionKind::getText());
+    MCSection *StubSection = OutContext.getMachOSection(
+        "__TEXT", "__picsymbolstub1",
+        MachO::S_SYMBOL_STUBS | MachO::S_ATTR_PURE_INSTRUCTIONS, 32,
+        SectionKind::getText());
     for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-      OutStreamer.SwitchSection(StubSection);
+      OutStreamer->SwitchSection(StubSection);
       EmitAlignment(4);
       
       MCSymbol *Stub = Stubs[i].first;
@@ -1264,8 +1302,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
       MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
       MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
                                            
-      OutStreamer.EmitLabel(Stub);
-      OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+      OutStreamer->EmitLabel(Stub);
+      OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
 
       const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
       const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
@@ -1273,110 +1311,108 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
         MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext);
 
       // mflr r0
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
       // bcl 20, 31, AnonSymbol
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(Anon));
-      OutStreamer.EmitLabel(AnonSymbol);
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(Anon));
+      OutStreamer->EmitLabel(AnonSymbol);
       // mflr r11
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
       // addis r11, r11, ha16(LazyPtr - AnonSymbol)
-      const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, isDarwin, OutContext);
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
+      const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, true, OutContext);
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
         .addReg(PPC::R11)
         .addReg(PPC::R11)
         .addExpr(SubHa16));
       // mtlr r0
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
 
       // ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
       // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
-      const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, isDarwin, OutContext);
-      EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+      const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, true, OutContext);
+      EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
         .addReg(PPC::R12)
         .addExpr(SubLo16).addExpr(SubLo16)
         .addReg(PPC::R11));
       // mtctr r12
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
       // bctr
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR));
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTR));
 
-      OutStreamer.SwitchSection(LSPSection);
-      OutStreamer.EmitLabel(LazyPtr);
-      OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+      OutStreamer->SwitchSection(LSPSection);
+      OutStreamer->EmitLabel(LazyPtr);
+      OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
 
       MCSymbol *DyldStubBindingHelper =
-        OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+        OutContext.getOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
       if (isPPC64) {
         // .quad dyld_stub_binding_helper
-        OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+        OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 8);
       } else {
         // .long dyld_stub_binding_helper
-        OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+        OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4);
       }
     }
-    OutStreamer.AddBlankLine();
+    OutStreamer->AddBlankLine();
     return;
   }
-  
-  const MCSection *StubSection =
-    OutContext.getMachOSection("__TEXT","__symbol_stub1",
-                               MachO::S_SYMBOL_STUBS |
-                               MachO::S_ATTR_PURE_INSTRUCTIONS,
-                               16, SectionKind::getText());
+
+  MCSection *StubSection = OutContext.getMachOSection(
+      "__TEXT", "__symbol_stub1",
+      MachO::S_SYMBOL_STUBS | MachO::S_ATTR_PURE_INSTRUCTIONS, 16,
+      SectionKind::getText());
   for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
     MCSymbol *Stub = Stubs[i].first;
     MCSymbol *RawSym = Stubs[i].second.getPointer();
     MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
     const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
 
-    OutStreamer.SwitchSection(StubSection);
+    OutStreamer->SwitchSection(StubSection);
     EmitAlignment(4);
-    OutStreamer.EmitLabel(Stub);
-    OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+    OutStreamer->EmitLabel(Stub);
+    OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
 
     // lis r11, ha16(LazyPtr)
     const MCExpr *LazyPtrHa16 =
-      PPCMCExpr::CreateHa(LazyPtrExpr, isDarwin, OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LIS)
+      PPCMCExpr::CreateHa(LazyPtrExpr, true, OutContext);
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LIS)
       .addReg(PPC::R11)
       .addExpr(LazyPtrHa16));
 
     // ldu r12, lo16(LazyPtr)(r11)
     // lwzu r12, lo16(LazyPtr)(r11)
     const MCExpr *LazyPtrLo16 =
-      PPCMCExpr::CreateLo(LazyPtrExpr, isDarwin, OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+      PPCMCExpr::CreateLo(LazyPtrExpr, true, OutContext);
+    EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
       .addReg(PPC::R12)
       .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
       .addReg(PPC::R11));
 
     // mtctr r12
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
     // bctr
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTR));
 
-    OutStreamer.SwitchSection(LSPSection);
-    OutStreamer.EmitLabel(LazyPtr);
-    OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+    OutStreamer->SwitchSection(LSPSection);
+    OutStreamer->EmitLabel(LazyPtr);
+    OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
 
     MCSymbol *DyldStubBindingHelper =
-      OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+      OutContext.getOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
     if (isPPC64) {
       // .quad dyld_stub_binding_helper
-      OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+      OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 8);
     } else {
       // .long dyld_stub_binding_helper
-      OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+      OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4);
     }
   }
   
-  OutStreamer.AddBlankLine();
+  OutStreamer->AddBlankLine();
 }
 
 
 bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
-  bool isPPC64 =
-      TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64;
+  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
 
   // Darwin/PPC always uses mach-o.
   const TargetLoweringObjectFileMachO &TLOFMacho = 
@@ -1409,19 +1445,19 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   // Output macho stubs for external and common global variables.
   if (!Stubs.empty()) {
     // Switch with ".non_lazy_symbol_pointer" directive.
-    OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+    OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
     EmitAlignment(isPPC64 ? 3 : 2);
     
     for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
       // L_foo$stub:
-      OutStreamer.EmitLabel(Stubs[i].first);
+      OutStreamer->EmitLabel(Stubs[i].first);
       //   .indirect_symbol _foo
       MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
-      OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+      OutStreamer->EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
 
       if (MCSym.getInt())
         // External to current translation unit.
-        OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
+        OutStreamer->EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
       else
         // Internal to current translation unit.
         //
@@ -1429,32 +1465,32 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
         // need to be indirect and pc-rel. We accomplish this by using NLPs.
         // However, sometimes the types are local to the file. So we need to
         // fill in the value for the NLP in those cases.
-        OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
-                                                      OutContext),
+        OutStreamer->EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+                                                       OutContext),
                               isPPC64 ? 8 : 4/*size*/);
     }
 
     Stubs.clear();
-    OutStreamer.AddBlankLine();
+    OutStreamer->AddBlankLine();
   }
 
   Stubs = MMIMacho.GetHiddenGVStubList();
   if (!Stubs.empty()) {
-    OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+    OutStreamer->SwitchSection(getObjFileLowering().getDataSection());
     EmitAlignment(isPPC64 ? 3 : 2);
     
     for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
       // L_foo$stub:
-      OutStreamer.EmitLabel(Stubs[i].first);
+      OutStreamer->EmitLabel(Stubs[i].first);
       //   .long _foo
-      OutStreamer.EmitValue(MCSymbolRefExpr::
-                            Create(Stubs[i].second.getPointer(),
-                                   OutContext),
-                            isPPC64 ? 8 : 4/*size*/);
+      OutStreamer->EmitValue(MCSymbolRefExpr::
+                             Create(Stubs[i].second.getPointer(),
+                                    OutContext),
+                             isPPC64 ? 8 : 4/*size*/);
     }
 
     Stubs.clear();
-    OutStreamer.AddBlankLine();
+    OutStreamer->AddBlankLine();
   }
 
   // Funny Darwin hack: This flag tells the linker that no global symbols
@@ -1462,7 +1498,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   // implementation of multiple entry points).  If this doesn't occur, the
   // linker can safely perform dead code stripping.  Since LLVM never generates
   // code that does this, it is always safe to set.
-  OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+  OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
 
   return AsmPrinter::doFinalization(M);
 }
@@ -1471,13 +1507,12 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
 /// for a MachineFunction to the given output stream, in a format that the
 /// Darwin assembler can deal with.
 ///
-static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
-                                           MCStreamer &Streamer) {
-  const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
-
-  if (Subtarget->isDarwin())
-    return new PPCDarwinAsmPrinter(tm, Streamer);
-  return new PPCLinuxAsmPrinter(tm, Streamer);
+static AsmPrinter *
+createPPCAsmPrinterPass(TargetMachine &tm,
+                        std::unique_ptr<MCStreamer> &&Streamer) {
+  if (Triple(tm.getTargetTriple()).isMacOSX())
+    return new PPCDarwinAsmPrinter(tm, std::move(Streamer));
+  return new PPCLinuxAsmPrinter(tm, std::move(Streamer));
 }
 
 // Force static initialization.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 167f335..69afd68 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -30,6 +30,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
@@ -42,7 +43,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
@@ -94,8 +94,8 @@ namespace {
     bool runOnFunction(Function &F) override;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<LoopInfo>();
-      AU.addPreserved<LoopInfo>();
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addRequired<ScalarEvolution>();
@@ -146,7 +146,7 @@ namespace {
 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
                     false, false)
@@ -168,12 +168,12 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() {
 #endif // NDEBUG
 
 bool PPCCTRLoops::runOnFunction(Function &F) {
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   SE = &getAnalysis<ScalarEvolution>();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-  LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
+  DL = &F.getParent()->getDataLayout();
+  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+  LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
 
   bool MadeChange = false;
 
@@ -229,7 +229,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
 
       if (!TM)
         return true;
-      const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
+      const TargetLowering *TLI =
+          TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
 
       if (Function *F = CI->getCalledFunction()) {
         // Most intrinsics don't become function calls, but some might.
@@ -399,7 +400,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
       if (!TM)
         return true;
-      const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
+      const TargetLowering *TLI =
+          TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
 
       if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
         return true;
@@ -530,7 +532,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
   // selected branch.
   MadeChange = true;
 
-  SCEVExpander SCEVE(*SE, "loopcnt");
+  SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt");
   LLVMContext &C = SE->getContext();
   Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
                                        Type::getInt32Ty(C);
@@ -551,7 +553,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
   IRBuilder<> CondBuilder(CountedExitBranch);
   Value *DecFunc =
     Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
-  Value *NewCond = CondBuilder.CreateCall(DecFunc);
+  Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
   Value *OldCond = CountedExitBranch->getCondition();
   CountedExitBranch->setCondition(NewCond);
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 56176f1..874a6fc 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -55,13 +55,18 @@ def RetCC_PPC : CallingConv<[
   // only the ELFv2 ABI fully utilizes all these registers.
   CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
   CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
-  
+
+  // QPX vectors are returned in QF1 and QF2. 
+  CCIfType<[v4f64, v4f32, v4i1],
+           CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
+ 
   // Vector types returned as "direct" go into V2 .. V9; note that only the
   // ELFv2 ABI fully utilizes all these registers.
-  CCIfType<[v16i8, v8i16, v4i32, v4f32],
-           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
-  CCIfType<[v2f64, v2i64],
-           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], 
+           CCIfSubtarget<"hasAltivec()",
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
+  CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
+           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
 ]>;
 
 // No explicit register is specified for the AnyReg calling convention. The
@@ -108,10 +113,13 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
   CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
   CCIfType<[f32],  CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
   CCIfType<[f64],  CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
-  CCIfType<[v16i8, v8i16, v4i32, v4f32],
-           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
-  CCIfType<[v2f64, v2i64],
-           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
+  CCIfType<[v4f64, v4f32, v4i1],
+           CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], 
+           CCIfSubtarget<"hasAltivec()",
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
+  CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
+           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
 ]>;
 
 //===----------------------------------------------------------------------===//
@@ -144,6 +152,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
   // alignment and size as doubles.
   CCIfType<[f32,f64], CCAssignToStack<8, 8>>,  
 
+  // QPX vectors that are stored in double precision need 32-byte alignment.
+  CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,
+
   // Vectors get 16-byte stack slots that are 16-byte aligned.
   CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>
 ]>;
@@ -158,12 +169,17 @@ def CC_PPC32_SVR4_VarArg : CallingConv<[
 // In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
 // put vector arguments in vector registers before putting them on the stack.
 def CC_PPC32_SVR4 : CallingConv<[
+  // QPX vectors mirror the scalar FP convention.
+  CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
+    CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
+
   // The first 12 Vector arguments are passed in AltiVec registers.
-  CCIfType<[v16i8, v8i16, v4i32, v4f32],
-           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
-  CCIfType<[v2f64, v2i64],
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], 
+           CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
+                          V8, V9, V10, V11, V12, V13]>>>,
+  CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
            CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
-                          VSH10, VSH11, VSH12, VSH13]>>,
+                          VSH10, VSH11, VSH12, VSH13]>>>,
            
   CCDelegateTo<CC_PPC32_SVR4_Common>
 ]>;  
@@ -224,9 +240,12 @@ def CSR_SVR464   : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
                                         F27, F28, F29, F30, F31, CR2, CR3, CR4
                                    )>;
 
-
 def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>;
 
+def CSR_SVR464_R2 : CalleeSavedRegs<(add CSR_SVR464, X2)>;
+
+def CSR_SVR464_R2_Altivec : CalleeSavedRegs<(add CSR_SVR464_Altivec, X2)>;
+
 def CSR_NoRegs : CalleeSavedRegs<(add)>;
 
 def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10),
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
new file mode 100644
index 0000000..fc89753
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -0,0 +1,202 @@
+//===------------- PPCEarlyReturn.cpp - Form Early Returns ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass that form early (predicated) returns. If-conversion handles some of
+// this, but this pass picks up some remaining cases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-early-ret"
+STATISTIC(NumBCLR, "Number of early conditional returns");
+STATISTIC(NumBLR,  "Number of early returns");
+
+namespace llvm {
+  void initializePPCEarlyReturnPass(PassRegistry&);
+}
+
+namespace {
+  // PPCEarlyReturn pass - For simple functions without epilogue code, move
+  // returns up, and create conditional returns, to avoid unnecessary
+  // branch-to-blr sequences.
+  struct PPCEarlyReturn : public MachineFunctionPass {
+    static char ID;
+    PPCEarlyReturn() : MachineFunctionPass(ID) {
+      initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry());
+    }
+
+    const TargetInstrInfo *TII;
+
+protected:
+    bool processBlock(MachineBasicBlock &ReturnMBB) {
+      bool Changed = false;
+
+      MachineBasicBlock::iterator I = ReturnMBB.begin();
+      I = ReturnMBB.SkipPHIsAndLabels(I);
+
+      // The block must be essentially empty except for the blr.
+      if (I == ReturnMBB.end() ||
+          (I->getOpcode() != PPC::BLR && I->getOpcode() != PPC::BLR8) ||
+          I != ReturnMBB.getLastNonDebugInstr())
+        return Changed;
+
+      SmallVector<MachineBasicBlock*, 8> PredToRemove;
+      for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
+           PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
+        bool OtherReference = false, BlockChanged = false;
+        for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
+          MachineInstrBuilder MIB;
+          if (J->getOpcode() == PPC::B) {
+            if (J->getOperand(0).getMBB() == &ReturnMBB) {
+              // This is an unconditional branch to the return. Replace the
+              // branch with a blr.
+              MIB =
+                BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()));
+              MIB.copyImplicitOps(I);
+              MachineBasicBlock::iterator K = J--;
+              K->eraseFromParent();
+              BlockChanged = true;
+              ++NumBLR;
+              continue;
+            }
+          } else if (J->getOpcode() == PPC::BCC) {
+            if (J->getOperand(2).getMBB() == &ReturnMBB) {
+              // This is a conditional branch to the return. Replace the branch
+              // with a bclr.
+              MIB = BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
+                      .addImm(J->getOperand(0).getImm())
+                      .addReg(J->getOperand(1).getReg());
+              MIB.copyImplicitOps(I);
+              MachineBasicBlock::iterator K = J--;
+              K->eraseFromParent();
+              BlockChanged = true;
+              ++NumBCLR;
+              continue;
+            }
+          } else if (J->getOpcode() == PPC::BC || J->getOpcode() == PPC::BCn) {
+            if (J->getOperand(1).getMBB() == &ReturnMBB) {
+              // This is a conditional branch to the return. Replace the branch
+              // with a bclr.
+              MIB = BuildMI(**PI, J, J->getDebugLoc(),
+                            TII->get(J->getOpcode() == PPC::BC ?
+                                     PPC::BCLR : PPC::BCLRn))
+                      .addReg(J->getOperand(0).getReg());
+              MIB.copyImplicitOps(I);
+              MachineBasicBlock::iterator K = J--;
+              K->eraseFromParent();
+              BlockChanged = true;
+              ++NumBCLR;
+              continue;
+            }
+          } else if (J->isBranch()) {
+            if (J->isIndirectBranch()) {
+              if (ReturnMBB.hasAddressTaken())
+                OtherReference = true;
+            } else
+              for (unsigned i = 0; i < J->getNumOperands(); ++i)
+                if (J->getOperand(i).isMBB() &&
+                    J->getOperand(i).getMBB() == &ReturnMBB)
+                  OtherReference = true;
+          } else if (!J->isTerminator() && !J->isDebugValue())
+            break;
+
+          if (J == (*PI)->begin())
+            break;
+
+          --J;
+        }
+
+        if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
+          OtherReference = true;
+
+        // Predecessors are stored in a vector and can't be removed here.
+        if (!OtherReference && BlockChanged) {
+          PredToRemove.push_back(*PI);
+        }
+
+        if (BlockChanged)
+          Changed = true;
+      }
+
+      for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
+        PredToRemove[i]->removeSuccessor(&ReturnMBB);
+
+      if (Changed && !ReturnMBB.hasAddressTaken()) {
+        // We now might be able to merge this blr-only block into its
+        // by-layout predecessor.
+        if (ReturnMBB.pred_size() == 1) {
+          MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
+          if (PrevMBB.isLayoutSuccessor(&ReturnMBB) && PrevMBB.canFallThrough()) {
+            // Move the blr into the preceding block.
+            PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
+            PrevMBB.removeSuccessor(&ReturnMBB);
+          }
+        }
+
+        if (ReturnMBB.pred_empty())
+          ReturnMBB.eraseFromParent();
+      }
+
+      return Changed;
+    }
+
+public:
+    bool runOnMachineFunction(MachineFunction &MF) override {
+      TII = MF.getSubtarget().getInstrInfo();
+
+      bool Changed = false;
+
+      // If the function does not have at least two blocks, then there is
+      // nothing to do.
+      if (MF.size() < 2)
+        return Changed;
+
+      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+        MachineBasicBlock &B = *I++;
+        if (processBlock(B))
+          Changed = true;
+      }
+
+      return Changed;
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
+                "PowerPC Early-Return Creation", false, false)
+
+char PPCEarlyReturn::ID = 0;
+FunctionPass*
+llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 13bd0c7..0b8e23c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -17,6 +17,7 @@
 #include "MCTargetDesc/PPCPredicates.h"
 #include "PPCCallingConv.h"
 #include "PPCISelLowering.h"
+#include "PPCMachineFunctionInfo.h"
 #include "PPCSubtarget.h"
 #include "PPCTargetMachine.h"
 #include "llvm/ADT/Optional.h"
@@ -85,18 +86,20 @@ typedef struct Address {
 class PPCFastISel final : public FastISel {
 
   const TargetMachine &TM;
+  const PPCSubtarget *PPCSubTarget;
+  PPCFunctionInfo *PPCFuncInfo;
   const TargetInstrInfo &TII;
   const TargetLowering &TLI;
-  const PPCSubtarget *PPCSubTarget;
   LLVMContext *Context;
 
   public:
     explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
                          const TargetLibraryInfo *LibInfo)
         : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
-          TII(*TM.getSubtargetImpl()->getInstrInfo()),
-          TLI(*TM.getSubtargetImpl()->getTargetLowering()),
-          PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()),
+          PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
+          PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
+          TII(*PPCSubTarget->getInstrInfo()),
+          TLI(*PPCSubTarget->getTargetLowering()),
           Context(&FuncInfo.Fn->getContext()) {}
 
   // Backend specific FastISel code.
@@ -141,6 +144,7 @@ class PPCFastISel final : public FastISel {
   private:
     bool isTypeLegal(Type *Ty, MVT &VT);
     bool isLoadTypeLegal(Type *Ty, MVT &VT);
+    bool isValueAvailable(const Value *V) const;
     bool isVSFRCRegister(unsigned Register) const {
       return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID;
     }
@@ -280,6 +284,17 @@ bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
   return false;
 }
 
+bool PPCFastISel::isValueAvailable(const Value *V) const {
+  if (!isa<Instruction>(V))
+    return true;
+
+  const auto *I = cast<Instruction>(V);
+  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+    return true;
+
+  return false;
+}
+
 // Given a value Obj, create an Address object Addr that represents its
 // address.  Return false if we can't handle it.
 bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
@@ -672,8 +687,18 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
       case PPC::STFS: Opc = PPC::STFSX; break;
       case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
     }
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
-      .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
+
+    auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+        .addReg(SrcReg);
+
+    // If we have an index register defined we use it in the store inst,
+    // otherwise we use X0 as base as it makes the vector instructions to
+    // use zero in the computation of the effective address regardless the
+    // content of the register.
+    if (IndexReg)
+      MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
+    else
+      MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
   }
 
   return true;
@@ -718,30 +743,31 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
 
   // For now, just try the simplest case where it's fed by a compare.
   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
-    Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
-    if (!OptPPCPred)
-      return false;
-
-    PPC::Predicate PPCPred = OptPPCPred.getValue();
+    if (isValueAvailable(CI)) {
+      Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
+      if (!OptPPCPred)
+        return false;
 
-    // Take advantage of fall-through opportunities.
-    if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
-      std::swap(TBB, FBB);
-      PPCPred = PPC::InvertPredicate(PPCPred);
-    }
+      PPC::Predicate PPCPred = OptPPCPred.getValue();
 
-    unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
+      // Take advantage of fall-through opportunities.
+      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+        std::swap(TBB, FBB);
+        PPCPred = PPC::InvertPredicate(PPCPred);
+      }
 
-    if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
-                    CondReg))
-      return false;
+      unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
 
-    BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
-      .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
-    fastEmitBranch(FBB, DbgLoc);
-    FuncInfo.MBB->addSuccessor(TBB);
-    return true;
+      if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+                      CondReg))
+        return false;
 
+      BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
+        .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
+      fastEmitBranch(FBB, DbgLoc);
+      FuncInfo.MBB->addSuccessor(TBB);
+      return true;
+    }
   } else if (const ConstantInt *CI =
              dyn_cast<ConstantInt>(BI->getCondition())) {
     uint64_t Imm = CI->getZExtValue();
@@ -945,6 +971,8 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
 }
 
 // Attempt to fast-select an integer-to-floating-point conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+//        direct moves should be implemented.
 bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
   MVT DstVT;
   Type *DstTy = I->getType();
@@ -1052,6 +1080,8 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
 }
 
 // Attempt to fast-select a floating-point-to-integer conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+//        direct moves should be implemented.
 bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
   MVT DstVT, SrcVT;
   Type *DstTy = I->getType();
@@ -1234,9 +1264,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
   CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
 
   // Reserve space for the linkage area on the stack.
-  bool isELFv2ABI = PPCSubTarget->isELFv2ABI();
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
-                                                          isELFv2ABI);
+  unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize();
   CCInfo.AllocateStack(LinkageSize, 8);
 
   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
@@ -1275,7 +1303,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
 
   // Prepare to assign register arguments.  Every argument uses up a
   // GPR protocol register even if it's passed in a floating-point
-  // register.
+  // register (unless we're using the fast calling convention).
   unsigned NextGPR = PPC::X3;
   unsigned NextFPR = PPC::F1;
 
@@ -1325,7 +1353,8 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
     unsigned ArgReg;
     if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
       ArgReg = NextFPR++;
-      ++NextGPR;
+      if (CC != CallingConv::Fast)
+        ++NextGPR;
     } else
       ArgReg = NextGPR++;
 
@@ -1432,6 +1461,9 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
   else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
            RetVT != MVT::i8)
     return false;
+  else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
+    // We can't handle boolean returns when CR bits are in use.
+    return false;
 
   // FIXME: No multi-register return values yet.
   if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
@@ -1523,13 +1555,14 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
   for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
     MIB.addReg(RegArgs[II], RegState::Implicit);
 
-  // Direct calls in the ELFv2 ABI need the TOC register live into the call.
-  if (PPCSubTarget->isELFv2ABI())
-    MIB.addReg(PPC::X2, RegState::Implicit);
+  // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
+  // into the call.
+  PPCFuncInfo->setUsesTOCBasePtr();
+  MIB.addReg(PPC::X2, RegState::Implicit);
 
   // Add a register mask with the call-preserved registers.  Proper
   // defs for return values will be added by setPhysRegsDeadExcept().
-  MIB.addRegMask(TRI.getCallPreservedMask(CC));
+  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
 
   CLI.Call = MIB;
 
@@ -1863,6 +1896,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
   unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
   unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
 
+  PPCFuncInfo->setUsesTOCBasePtr();
   // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
   if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
@@ -1912,6 +1946,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
   if (GV->isThreadLocal())
     return 0;
 
+  PPCFuncInfo->setUsesTOCBasePtr();
   // For small code model, generate a simple TOC load.
   if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
@@ -2297,13 +2332,10 @@ namespace llvm {
   // Create the fast instruction selector for PowerPC64 ELF.
   FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
                                 const TargetLibraryInfo *LibInfo) {
-    const TargetMachine &TM = FuncInfo.MF->getTarget();
-
     // Only available on 64-bit ELF for now.
-    const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
-    if (Subtarget->isPPC64() && Subtarget->isSVR4ABI())
+    const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
+    if (Subtarget.isPPC64() && Subtarget.isSVR4ABI())
       return new PPCFastISel(FuncInfo, LibInfo);
-
     return nullptr;
   }
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 1b85047..b4008e4 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -16,6 +16,7 @@
 #include "PPCInstrInfo.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -36,10 +37,58 @@ static const uint16_t VRRegNo[] = {
  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
 };
 
+static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
+  if (STI.isDarwinABI())
+    return STI.isPPC64() ? 16 : 8;
+  // SVR4 ABI:
+  return STI.isPPC64() ? 16 : 4;
+}
+
+static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
+  return STI.isELFv2ABI() ? 24 : 40;
+}
+
+static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
+  // For the Darwin ABI:
+  // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
+  // for saving the frame pointer (if needed.)  While the published ABI has
+  // not used this slot since at least MacOSX 10.2, there is older code
+  // around that does use it, and that needs to continue to work.
+  if (STI.isDarwinABI())
+    return STI.isPPC64() ? -8U : -4U;
+
+  // SVR4 ABI: First slot in the general register save area.
+  return STI.isPPC64() ? -8U : -4U;
+}
+
+static unsigned computeLinkageSize(const PPCSubtarget &STI) {
+  if (STI.isDarwinABI() || STI.isPPC64())
+    return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
+
+  // SVR4 ABI:
+  return 8;
+}
+
+static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
+  if (STI.isDarwinABI())
+    return STI.isPPC64() ? -16U : -8U;
+
+  // SVR4 ABI: First slot in the general register save area.
+  return STI.isPPC64()
+             ? -16U
+             : (STI.getTargetMachine().getRelocationModel() == Reloc::PIC_)
+                   ? -12U
+                   : -8U;
+}
+
 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
-                          (STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0),
-      Subtarget(STI) {}
+                          STI.getPlatformStackAlignment(), 0),
+      Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
+      TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
+      FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
+      LinkageSize(computeLinkageSize(Subtarget)),
+      BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {}
 
 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
@@ -355,6 +404,20 @@ static bool hasNonRISpills(const MachineFunction &MF) {
   return FuncInfo->hasNonRISpills();
 }
 
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+  const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+  // We need a save/restore of LR if there is any def of LR (which is
+  // defined by calls, including the PIC setup sequence), or if there is
+  // some use of the LR stack slot (e.g. for builtin_return_address).
+  // (LR comes in 32 and 64 bit versions.)
+  MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+  return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
 /// determineFrameLayout - Determine the size of the frame and maximum call
 /// frame size.
 unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
@@ -372,15 +435,15 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
   unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
 
   const PPCRegisterInfo *RegInfo =
-      static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+      static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
 
   // If we are a leaf function, and use up to 224 bytes of stack space,
   // don't have a frame pointer, calls, or dynamic alloca then we do not need
   // to adjust the stack pointer (we fit in the Red Zone).
   // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
   // stackless code if all local vars are reg-allocated.
-  bool DisableRedZone = MF.getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
+  bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+  unsigned LR = RegInfo->getRARegister();
   if (!DisableRedZone &&
       (Subtarget.isPPC64() ||                      // 32-bit SVR4, no stack-
        !Subtarget.isSVR4ABI() ||                   //   allocated locals.
@@ -388,6 +451,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
       FrameSize <= 224 &&                          // Fits in red zone.
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
       !MFI->adjustsStack() &&                      // No calls.
+      !MustSaveLR(MF, LR) &&
       !RegInfo->hasBasePointer(MF)) { // No special alignment.
     // No need for frame
     if (UpdateMF)
@@ -399,9 +463,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
   unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
 
   // Maximum call frame needs to be at least big enough for linkage area.
-  unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(),
-                                             Subtarget.isDarwinABI(),
-                                             Subtarget.isELFv2ABI());
+  unsigned minCallFrameSize = getLinkageSize();
   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
 
   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
@@ -444,8 +506,7 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
 
   // Naked functions have no stack frame pushed, so we don't have a frame
   // pointer.
-  if (MF.getFunction()->getAttributes().hasAttribute(
-          AttributeSet::FunctionIndex, Attribute::Naked))
+  if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
     return false;
 
   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
@@ -461,7 +522,7 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
 
   const PPCRegisterInfo *RegInfo =
-      static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+      static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
   bool HasBP = RegInfo->hasBasePointer(MF);
   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
@@ -494,29 +555,28 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
     }
 }
 
-void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+void PPCFrameLowering::emitPrologue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const PPCInstrInfo &TII =
-      *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
+      *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
   const PPCRegisterInfo *RegInfo =
-      static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+      static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
 
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
   DebugLoc dl;
   bool needsCFI = MMI.hasDebugInfo() ||
     MF.getFunction()->needsUnwindTableEntry();
-  bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
 
   // Get processor type.
   bool isPPC64 = Subtarget.isPPC64();
   // Get the ABI.
-  bool isDarwinABI = Subtarget.isDarwinABI();
   bool isSVR4ABI = Subtarget.isSVR4ABI();
   bool isELFv2ABI = Subtarget.isELFv2ABI();
-  assert((isDarwinABI || isSVR4ABI) &&
+  assert((Subtarget.isDarwinABI() || isSVR4ABI) &&
          "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
 
   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
@@ -582,7 +642,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
 
-  int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+  int LROffset = getReturnSaveOffset();
 
   int FPOffset = 0;
   if (HasFP) {
@@ -592,8 +652,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
       assert(FPIndex && "No Frame Pointer Save Slot!");
       FPOffset = FFI->getObjectOffset(FPIndex);
     } else {
-      FPOffset =
-          PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+      FPOffset = getFramePointerSaveOffset();
     }
   }
 
@@ -605,10 +664,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
       assert(BPIndex && "No Base Pointer Save Slot!");
       BPOffset = FFI->getObjectOffset(BPIndex);
     } else {
-      BPOffset =
-        PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
-                                                   isDarwinABI,
-                                                   isPIC);
+      BPOffset = getBasePointerSaveOffset();
     }
   }
 
@@ -864,9 +920,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   assert(MBBI != MBB.end() && "Returning block has no terminator");
   const PPCInstrInfo &TII =
-      *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
+      *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
   const PPCRegisterInfo *RegInfo =
-      static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+      static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
 
   unsigned RetOpcode = MBBI->getOpcode();
   DebugLoc dl;
@@ -890,9 +946,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   // Get processor type.
   bool isPPC64 = Subtarget.isPPC64();
   // Get the ABI.
-  bool isDarwinABI = Subtarget.isDarwinABI();
   bool isSVR4ABI = Subtarget.isSVR4ABI();
-  bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
 
   // Check if the link register (LR) has been saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -920,7 +974,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
                                                 : PPC::ADD4 );
 
-  int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+  int LROffset = getReturnSaveOffset();
 
   int FPOffset = 0;
   if (HasFP) {
@@ -930,8 +984,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
       assert(FPIndex && "No Frame Pointer Save Slot!");
       FPOffset = FFI->getObjectOffset(FPIndex);
     } else {
-      FPOffset =
-          PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+      FPOffset = getFramePointerSaveOffset();
     }
   }
 
@@ -943,10 +996,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
       assert(BPIndex && "No Base Pointer Save Slot!");
       BPOffset = FFI->getObjectOffset(BPIndex);
     } else {
-      BPOffset =
-        PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
-                                                   isDarwinABI,
-                                                   isPIC);
+      BPOffset = getBasePointerSaveOffset();
     }
   }
 
@@ -1108,25 +1158,11 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   }
 }
 
-/// MustSaveLR - Return true if this function requires that we save the LR
-/// register onto the stack in the prolog and restore it in the epilog of the
-/// function.
-static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
-  const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
-
-  // We need a save/restore of LR if there is any def of LR (which is
-  // defined by calls, including the PIC setup sequence), or if there is
-  // some use of the LR stack slot (e.g. for builtin_return_address).
-  // (LR comes in 32 and 64 bit versions.)
-  MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
-  return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
-}
-
 void
 PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                    RegScavenger *) const {
   const PPCRegisterInfo *RegInfo =
-      static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+      static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
 
   //  Save and clear the LR state.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -1139,13 +1175,12 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   int FPSI = FI->getFramePointerSaveIndex();
   bool isPPC64 = Subtarget.isPPC64();
   bool isDarwinABI  = Subtarget.isDarwinABI();
-  bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
   MachineFrameInfo *MFI = MF.getFrameInfo();
 
   // If the frame pointer save index hasn't been defined yet.
   if (!FPSI && needsFP(MF)) {
     // Find out what the fix offset of the frame pointer save area.
-    int FPOffset = getFramePointerSaveOffset(isPPC64, isDarwinABI);
+    int FPOffset = getFramePointerSaveOffset();
     // Allocate the frame index for frame pointer save area.
     FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
     // Save the result.
@@ -1154,7 +1189,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   int BPSI = FI->getBasePointerSaveIndex();
   if (!BPSI && RegInfo->hasBasePointer(MF)) {
-    int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI, isPIC);
+    int BPOffset = getBasePointerSaveOffset();
     // Allocate the frame index for the base pointer save area.
     BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
     // Save the result.
@@ -1266,7 +1301,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
   }
 
   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
 
   int64_t LowerBound = 0;
 
@@ -1310,7 +1345,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
   }
 
   const PPCRegisterInfo *RegInfo =
-      static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+      static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
   if (RegInfo->hasBasePointer(MF)) {
     HasGPSaveArea = true;
 
@@ -1458,7 +1493,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction *MF = MBB.getParent();
   const PPCInstrInfo &TII =
-      *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
+      *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
   DebugLoc DL;
   bool CRSpilled = false;
   MachineInstrBuilder CRMIB;
@@ -1519,8 +1554,7 @@ restoreCRs(bool isPPC64, bool is31,
            const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
 
   MachineFunction *MF = MBB.getParent();
-  const PPCInstrInfo &TII =
-      *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
+  const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
   DebugLoc DL;
   unsigned RestoreOp, MoveReg;
 
@@ -1552,8 +1586,7 @@ restoreCRs(bool isPPC64, bool is31,
 void PPCFrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  const PPCInstrInfo &TII =
-      *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
       I->getOpcode() == PPC::ADJCALLSTACKUP) {
     // Add (actually subtract) back the amount the callee popped on return.
@@ -1603,7 +1636,7 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction *MF = MBB.getParent();
   const PPCInstrInfo &TII =
-      *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
+      *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
   bool CR2Spilled = false;
   bool CR3Spilled = false;
   bool CR4Spilled = false;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index c482588..28d074e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -23,6 +23,11 @@ class PPCSubtarget;
 
 class PPCFrameLowering: public TargetFrameLowering {
   const PPCSubtarget &Subtarget;
+  const unsigned ReturnSaveOffset;
+  const unsigned TOCSaveOffset;
+  const unsigned FramePointerSaveOffset;
+  const unsigned LinkageSize;
+  const unsigned BasePointerSaveOffset;
 
 public:
   PPCFrameLowering(const PPCSubtarget &STI);
@@ -33,7 +38,7 @@ public:
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
-  void emitPrologue(MachineFunction &MF) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
   bool hasFP(const MachineFunction &MF) const override;
@@ -67,56 +72,23 @@ public:
 
   /// getReturnSaveOffset - Return the previous frame offset to save the
   /// return address.
-  static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) {
-    if (isDarwinABI)
-      return isPPC64 ? 16 : 8;
-    // SVR4 ABI:
-    return isPPC64 ? 16 : 4;
-  }
+  unsigned getReturnSaveOffset() const { return ReturnSaveOffset; }
 
   /// getTOCSaveOffset - Return the previous frame offset to save the
   /// TOC register -- 64-bit SVR4 ABI only.
-  static unsigned getTOCSaveOffset(bool isELFv2ABI) {
-    return isELFv2ABI ? 24 : 40;
-  }
+  unsigned getTOCSaveOffset() const { return TOCSaveOffset; }
 
   /// getFramePointerSaveOffset - Return the previous frame offset to save the
   /// frame pointer.
-  static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
-    // For the Darwin ABI:
-    // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
-    // for saving the frame pointer (if needed.)  While the published ABI has
-    // not used this slot since at least MacOSX 10.2, there is older code
-    // around that does use it, and that needs to continue to work.
-    if (isDarwinABI)
-      return isPPC64 ? -8U : -4U;
-
-    // SVR4 ABI: First slot in the general register save area.
-    return isPPC64 ? -8U : -4U;
-  }
+  unsigned getFramePointerSaveOffset() const { return FramePointerSaveOffset; }
 
   /// getBasePointerSaveOffset - Return the previous frame offset to save the
   /// base pointer.
-  static unsigned getBasePointerSaveOffset(bool isPPC64,
-                                           bool isDarwinABI,
-                                           bool isPIC) {
-    if (isDarwinABI)
-      return isPPC64 ? -16U : -8U;
-
-    // SVR4 ABI: First slot in the general register save area.
-    return isPPC64 ? -16U : isPIC ? -12U : -8U;
-  }
+  unsigned getBasePointerSaveOffset() const { return BasePointerSaveOffset; }
 
   /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
   ///
-  static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI,
-                                 bool isELFv2ABI) {
-    if (isDarwinABI || isPPC64)
-      return (isELFv2ABI ? 4 : 6) * (isPPC64 ? 8 : 4);
-
-    // SVR4 ABI:
-    return 8;
-  }
+  unsigned getLinkageSize() const { return LinkageSize; }
 
   const SpillSlot *
   getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index d9b242c..7234e30 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -160,7 +160,7 @@ unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
   // new group.
   if (isLoadAfterStore(SU) && CurSlots < 6) {
     unsigned Directive =
-      DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+        DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
     // If we're using a special group-terminating nop, then we need only one.
     if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
         Directive == PPC::DIR_PWR8 )
@@ -220,7 +220,7 @@ void PPCDispatchGroupSBHazardRecognizer::Reset() {
 
 void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
   unsigned Directive =
-    DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+      DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
   // If the group has now filled all of its slots, or if we're using a special
   // group-terminating nop, the group is complete.
   if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 75ab343..afc1f36 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -42,11 +42,15 @@ using namespace llvm;
 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
 
-cl::opt<bool> UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
-  cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden);
-cl::opt<bool> BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates",
-  cl::desc("stress rotate selection in aggressive ppc isel for "
-           "bit permutations"), cl::Hidden);
+static cl::opt<bool>
+    UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
+                       cl::desc("use aggressive ppc isel for bit permutations"),
+                       cl::Hidden);
+static cl::opt<bool> BPermRewriterNoMasking(
+    "ppc-bit-perm-rewriter-stress-rotates",
+    cl::desc("stress rotate selection in aggressive ppc isel for "
+             "bit permutations"),
+    cl::Hidden);
 
 namespace llvm {
   void initializePPCDAGToDAGISelPass(PassRegistry&);
@@ -59,22 +63,20 @@ namespace {
   ///
   class PPCDAGToDAGISel : public SelectionDAGISel {
     const PPCTargetMachine &TM;
-    const PPCTargetLowering *PPCLowering;
     const PPCSubtarget *PPCSubTarget;
+    const PPCTargetLowering *PPCLowering;
     unsigned GlobalBaseReg;
   public:
     explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
-        : SelectionDAGISel(tm), TM(tm),
-          PPCLowering(TM.getSubtargetImpl()->getTargetLowering()),
-          PPCSubTarget(TM.getSubtargetImpl()) {
+        : SelectionDAGISel(tm), TM(tm) {
       initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnMachineFunction(MachineFunction &MF) override {
       // Make sure we re-emit a set of the global base reg if necessary
       GlobalBaseReg = 0;
-      PPCLowering = TM.getSubtargetImpl()->getTargetLowering();
-      PPCSubTarget = TM.getSubtargetImpl();
+      PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
+      PPCLowering = PPCSubTarget->getTargetLowering();
       SelectionDAGISel::runOnMachineFunction(MF);
 
       if (!PPCSubTarget->isSVR4ABI())
@@ -88,28 +90,21 @@ namespace {
 
     /// getI32Imm - Return a target constant with the specified value, of type
     /// i32.
-    inline SDValue getI32Imm(unsigned Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
+      return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
     }
 
     /// getI64Imm - Return a target constant with the specified value, of type
     /// i64.
-    inline SDValue getI64Imm(uint64_t Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i64);
+    inline SDValue getI64Imm(uint64_t Imm, SDLoc dl) {
+      return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
     }
 
     /// getSmallIPtrImm - Return a target constant of pointer type.
-    inline SDValue getSmallIPtrImm(unsigned Imm) {
-      return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy());
+    inline SDValue getSmallIPtrImm(unsigned Imm, SDLoc dl) {
+      return CurDAG->getTargetConstant(Imm, dl, PPCLowering->getPointerTy());
     }
 
-    /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
-    /// with any number of 0s on either side.  The 1s are allowed to wrap from
-    /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
-    /// 0x0F0F0000 is not, since all 1s are not contiguous.
-    static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
-
-
     /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
     /// rotate and mask opcode and mask operation.
     static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
@@ -184,20 +179,35 @@ namespace {
     /// register can be improved, but it is wrong to substitute Reg+Reg for
     /// Reg in an asm, because the load or store opcode would have to change.
     bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                      char ConstraintCode,
+                                      unsigned ConstraintID,
                                       std::vector<SDValue> &OutOps) override {
-      // We need to make sure that this one operand does not end up in r0
-      // (because we might end up lowering this as 0(%op)).
-      const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
-      const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
-      SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
-      SDValue NewOp =
-        SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
-                                       SDLoc(Op), Op.getValueType(),
-                                       Op, RC), 0);
-
-      OutOps.push_back(NewOp);
-      return false;
+
+      switch(ConstraintID) {
+      default:
+        errs() << "ConstraintID: " << ConstraintID << "\n";
+        llvm_unreachable("Unexpected asm memory constraint");
+      case InlineAsm::Constraint_es:
+      case InlineAsm::Constraint_i:
+      case InlineAsm::Constraint_m:
+      case InlineAsm::Constraint_o:
+      case InlineAsm::Constraint_Q:
+      case InlineAsm::Constraint_Z:
+      case InlineAsm::Constraint_Zy:
+        // We need to make sure that this one operand does not end up in r0
+        // (because we might end up lowering this as 0(%op)).
+        const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
+        const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
+        SDLoc dl(Op);
+        SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
+        SDValue NewOp =
+          SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                         dl, Op.getValueType(),
+                                         Op, RC), 0);
+
+        OutOps.push_back(NewOp);
+        return false;
+      }
+      return true;
     }
 
     void InsertVRSaveCode(MachineFunction &MF);
@@ -221,6 +231,8 @@ private:
 
     bool AllUsersSelectZero(SDNode *N);
     void SwapAllSelectUsers(SDNode *N);
+
+    SDNode *transferMemOperands(SDNode *N, SDNode *Result);
   };
 }
 
@@ -258,7 +270,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
   unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
   unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
 
-  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
   MachineBasicBlock &EntryBB = *Fn.begin();
   DebugLoc dl;
   // Emit the following code into the entry block:
@@ -294,7 +306,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
 ///
 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
   if (!GlobalBaseReg) {
-    const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+    const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
     // Insert the set of GlobalBaseReg into the first MBB of the function
     MachineBasicBlock &FirstMBB = MF->front();
     MachineBasicBlock::iterator MBBI = FirstMBB.begin();
@@ -313,7 +325,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
           unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
           BuildMI(FirstMBB, MBBI, dl,
-                  TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg)
+                  TII.get(PPC::UpdateGBR), GlobalBaseReg)
                   .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
           MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
         }
@@ -395,33 +407,9 @@ SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
   unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
   if (SN->hasOneUse())
     return CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
-                                getSmallIPtrImm(Offset));
+                                getSmallIPtrImm(Offset, dl));
   return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
-                                getSmallIPtrImm(Offset));
-}
-
-bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
-  if (!Val)
-    return false;
-
-  if (isShiftedMask_32(Val)) {
-    // look for the first non-zero bit
-    MB = countLeadingZeros(Val);
-    // look for the first zero bit after the run of ones
-    ME = countLeadingZeros((Val - 1) ^ Val);
-    return true;
-  } else {
-    Val = ~Val; // invert mask
-    if (isShiftedMask_32(Val)) {
-      // effectively look for the first zero bit
-      ME = countLeadingZeros(Val) - 1;
-      // effectively look for the first one bit after the run of zeros
-      MB = countLeadingZeros((Val - 1) ^ Val) + 1;
-      return true;
-    }
-  }
-  // no run present
-  return false;
+                                getSmallIPtrImm(Offset, dl));
 }
 
 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
@@ -536,8 +524,8 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
       }
 
       SH &= 31;
-      SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
-                          getI32Imm(ME) };
+      SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
+                          getI32Imm(ME, dl) };
       return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
     }
   }
@@ -665,8 +653,8 @@ static SDNode *SelectInt64Direct(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
   unsigned Lo = Imm & 0xFFFF;
   unsigned Hi = (Imm >> 16) & 0xFFFF;
 
-  auto getI32Imm = [CurDAG](unsigned Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i32);
+  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
   };
 
   // Simple value.
@@ -756,8 +744,8 @@ static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
   if (!RMin)
     return SelectInt64Direct(CurDAG, dl, Imm);
 
-  auto getI32Imm = [CurDAG](unsigned Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i32);
+  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
   };
 
   SDValue Val = SDValue(SelectInt64Direct(CurDAG, dl, MatImm), 0);
@@ -1207,8 +1195,8 @@ class BitPermutationSelector {
     }
   }
 
-  SDValue getI32Imm(unsigned Imm) {
-    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  SDValue getI32Imm(unsigned Imm, SDLoc dl) {
+    return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
   }
 
   uint64_t getZerosMask() {
@@ -1280,7 +1268,8 @@ class BitPermutationSelector {
       SDValue VRot;
       if (VRI.RLAmt) {
         SDValue Ops[] =
-          { VRI.V, getI32Imm(VRI.RLAmt), getI32Imm(0), getI32Imm(31) };
+          { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
+            getI32Imm(31, dl) };
         VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
                                               Ops), 0);
       } else {
@@ -1290,10 +1279,10 @@ class BitPermutationSelector {
       SDValue ANDIVal, ANDISVal;
       if (ANDIMask != 0)
         ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
-                            VRot, getI32Imm(ANDIMask)), 0);
+                            VRot, getI32Imm(ANDIMask, dl)), 0);
       if (ANDISMask != 0)
         ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
-                             VRot, getI32Imm(ANDISMask)), 0);
+                             VRot, getI32Imm(ANDISMask, dl)), 0);
 
       SDValue TotalVal;
       if (!ANDIVal)
@@ -1339,8 +1328,10 @@ class BitPermutationSelector {
       if (VRI.RLAmt) {
         if (InstCnt) *InstCnt += 1;
         SDValue Ops[] =
-          { VRI.V, getI32Imm(VRI.RLAmt), getI32Imm(0), getI32Imm(31) };
-        Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
+          { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
+            getI32Imm(31, dl) };
+        Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
+                      0);
       } else {
         Res = VRI.V;
       }
@@ -1360,13 +1351,15 @@ class BitPermutationSelector {
     for (auto &BG : BitGroups) {
       if (!Res) {
         SDValue Ops[] =
-          { BG.V, getI32Imm(BG.RLAmt), getI32Imm(Bits.size() - BG.EndIdx - 1),
-            getI32Imm(Bits.size() - BG.StartIdx - 1) };
+          { BG.V, getI32Imm(BG.RLAmt, dl),
+            getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
+            getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
         Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
       } else {
         SDValue Ops[] =
-          { Res, BG.V, getI32Imm(BG.RLAmt), getI32Imm(Bits.size() - BG.EndIdx - 1),
-            getI32Imm(Bits.size() - BG.StartIdx - 1) };
+          { Res, BG.V, getI32Imm(BG.RLAmt, dl),
+              getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
+            getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
         Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
       }
     }
@@ -1385,10 +1378,10 @@ class BitPermutationSelector {
       SDValue ANDIVal, ANDISVal;
       if (ANDIMask != 0)
         ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
-                            Res, getI32Imm(ANDIMask)), 0);
+                            Res, getI32Imm(ANDIMask, dl)), 0);
       if (ANDISMask != 0)
         ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
-                             Res, getI32Imm(ANDISMask)), 0);
+                             Res, getI32Imm(ANDISMask, dl)), 0);
 
       if (!ANDIVal)
         Res = ANDISVal;
@@ -1439,27 +1432,27 @@ class BitPermutationSelector {
       assert(InstMaskStart >= 32 && "Mask cannot start out of range");
       assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
       SDValue Ops[] =
-        { V, getI32Imm(RLAmt), getI32Imm(InstMaskStart - 32),
-          getI32Imm(InstMaskEnd - 32) };
+        { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
+          getI32Imm(InstMaskEnd - 32, dl) };
       return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
                                             Ops), 0);
     }
 
     if (InstMaskEnd == 63) {
       SDValue Ops[] =
-        { V, getI32Imm(RLAmt), getI32Imm(InstMaskStart) };
+        { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
     }
 
     if (InstMaskStart == 0) {
       SDValue Ops[] =
-        { V, getI32Imm(RLAmt), getI32Imm(InstMaskEnd) };
+        { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) };
       return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
     }
 
     if (InstMaskEnd == 63 - RLAmt) {
       SDValue Ops[] =
-        { V, getI32Imm(RLAmt), getI32Imm(InstMaskStart) };
+        { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
       return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
     }
 
@@ -1500,15 +1493,15 @@ class BitPermutationSelector {
       assert(InstMaskStart >= 32 && "Mask cannot start out of range");
       assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
       SDValue Ops[] =
-        { Base, V, getI32Imm(RLAmt), getI32Imm(InstMaskStart - 32),
-          getI32Imm(InstMaskEnd - 32) };
+        { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
+          getI32Imm(InstMaskEnd - 32, dl) };
       return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
                                             Ops), 0);
     }
 
     if (InstMaskEnd == 63 - RLAmt) {
       SDValue Ops[] =
-        { Base, V, getI32Imm(RLAmt), getI32Imm(InstMaskStart) };
+        { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
       return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
     }
 
@@ -1655,10 +1648,10 @@ class BitPermutationSelector {
         SDValue ANDIVal, ANDISVal;
         if (ANDIMask != 0)
           ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
-                              VRot, getI32Imm(ANDIMask)), 0);
+                              VRot, getI32Imm(ANDIMask, dl)), 0);
         if (ANDISMask != 0)
           ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
-                               VRot, getI32Imm(ANDISMask)), 0);
+                               VRot, getI32Imm(ANDISMask, dl)), 0);
 
         if (!ANDIVal)
           TotalVal = ANDISVal;
@@ -1805,10 +1798,10 @@ class BitPermutationSelector {
         SDValue ANDIVal, ANDISVal;
         if (ANDIMask != 0)
           ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
-                              Res, getI32Imm(ANDIMask)), 0);
+                              Res, getI32Imm(ANDIMask, dl)), 0);
         if (ANDISMask != 0)
           ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
-                               Res, getI32Imm(ANDISMask)), 0);
+                               Res, getI32Imm(ANDISMask, dl)), 0);
 
         if (!ANDIVal)
           Res = ANDISVal;
@@ -1953,11 +1946,13 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
         // SETEQ/SETNE comparison with 16-bit immediate, fold it.
         if (isUInt<16>(Imm))
           return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
-                                                getI32Imm(Imm & 0xFFFF)), 0);
+                                                getI32Imm(Imm & 0xFFFF, dl)),
+                         0);
         // If this is a 16-bit signed immediate, fold it.
         if (isInt<16>((int)Imm))
           return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
-                                                getI32Imm(Imm & 0xFFFF)), 0);
+                                                getI32Imm(Imm & 0xFFFF, dl)),
+                         0);
 
         // For non-equality comparisons, the default code would materialize the
         // constant, then compare against it, like this:
@@ -1969,21 +1964,22 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
         //   cmplwi cr0,r0,0x5678
         //   beq cr0,L6
         SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
-                                           getI32Imm(Imm >> 16)), 0);
+                                           getI32Imm(Imm >> 16, dl)), 0);
         return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
-                                              getI32Imm(Imm & 0xFFFF)), 0);
+                                              getI32Imm(Imm & 0xFFFF, dl)), 0);
       }
       Opc = PPC::CMPLW;
     } else if (ISD::isUnsignedIntSetCC(CC)) {
       if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
         return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
-                                              getI32Imm(Imm & 0xFFFF)), 0);
+                                              getI32Imm(Imm & 0xFFFF, dl)), 0);
       Opc = PPC::CMPLW;
     } else {
       short SImm;
       if (isIntS16Immediate(RHS, SImm))
         return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
-                                              getI32Imm((int)SImm & 0xFFFF)),
+                                              getI32Imm((int)SImm & 0xFFFF,
+                                                        dl)),
                          0);
       Opc = PPC::CMPW;
     }
@@ -1994,11 +1990,13 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
         // SETEQ/SETNE comparison with 16-bit immediate, fold it.
         if (isUInt<16>(Imm))
           return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
-                                                getI32Imm(Imm & 0xFFFF)), 0);
+                                                getI32Imm(Imm & 0xFFFF, dl)),
+                         0);
         // If this is a 16-bit signed immediate, fold it.
         if (isInt<16>(Imm))
           return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
-                                                getI32Imm(Imm & 0xFFFF)), 0);
+                                                getI32Imm(Imm & 0xFFFF, dl)),
+                         0);
 
         // For non-equality comparisons, the default code would materialize the
         // constant, then compare against it, like this:
@@ -2011,22 +2009,23 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
         //   beq cr0,L6
         if (isUInt<32>(Imm)) {
           SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
-                                             getI64Imm(Imm >> 16)), 0);
+                                             getI64Imm(Imm >> 16, dl)), 0);
           return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
-                                                getI64Imm(Imm & 0xFFFF)), 0);
+                                                getI64Imm(Imm & 0xFFFF, dl)),
+                         0);
         }
       }
       Opc = PPC::CMPLD;
     } else if (ISD::isUnsignedIntSetCC(CC)) {
       if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
         return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
-                                              getI64Imm(Imm & 0xFFFF)), 0);
+                                              getI64Imm(Imm & 0xFFFF, dl)), 0);
       Opc = PPC::CMPLD;
     } else {
       short SImm;
       if (isIntS16Immediate(RHS, SImm))
         return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
-                                              getI64Imm(SImm & 0xFFFF)),
+                                              getI64Imm(SImm & 0xFFFF, dl)),
                          0);
       Opc = PPC::CMPD;
     }
@@ -2101,7 +2100,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
 
 // getVCmpInst: return the vector compare instruction for the specified
 // vector type and condition code. Since this is for altivec specific code,
-// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
+// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
                                 bool HasVSX, bool &Swap, bool &Negate) {
   Swap = false;
@@ -2180,6 +2179,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
           return PPC::VCMPEQUH;
         else if (VecVT == MVT::v4i32)
           return PPC::VCMPEQUW;
+        else if (VecVT == MVT::v2i64)
+          return PPC::VCMPEQUD;
         break;
       case ISD::SETGT:
         if (VecVT == MVT::v16i8)
@@ -2188,6 +2189,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
           return PPC::VCMPGTSH;
         else if (VecVT == MVT::v4i32)
           return PPC::VCMPGTSW;
+        else if (VecVT == MVT::v2i64)
+          return PPC::VCMPGTSD;
         break;
       case ISD::SETUGT:
         if (VecVT == MVT::v16i8)
@@ -2196,6 +2199,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
           return PPC::VCMPGTUH;
         else if (VecVT == MVT::v4i32)
           return PPC::VCMPGTUW;
+        else if (VecVT == MVT::v2i64)
+          return PPC::VCMPGTUD;
         break;
       default:
         break;
@@ -2222,26 +2227,29 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
       default: break;
       case ISD::SETEQ: {
         Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
-        SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
+        SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
+                          getI32Imm(31, dl) };
         return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
       }
       case ISD::SETNE: {
         if (isPPC64) break;
         SDValue AD =
           SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
-                                         Op, getI32Imm(~0U)), 0);
+                                         Op, getI32Imm(~0U, dl)), 0);
         return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
                                     AD.getValue(1));
       }
       case ISD::SETLT: {
-        SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+        SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
+                          getI32Imm(31, dl) };
         return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
       }
       case ISD::SETGT: {
         SDValue T =
           SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
         T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
-        SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+        SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
+                          getI32Imm(31, dl) };
         return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
       }
       }
@@ -2252,34 +2260,35 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
       case ISD::SETEQ:
         if (isPPC64) break;
         Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
-                                            Op, getI32Imm(1)), 0);
+                                            Op, getI32Imm(1, dl)), 0);
         return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
                               SDValue(CurDAG->getMachineNode(PPC::LI, dl,
                                                              MVT::i32,
-                                                             getI32Imm(0)), 0),
-                                      Op.getValue(1));
+                                                             getI32Imm(0, dl)),
+                                      0), Op.getValue(1));
       case ISD::SETNE: {
         if (isPPC64) break;
         Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
         SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
-                                            Op, getI32Imm(~0U));
+                                            Op, getI32Imm(~0U, dl));
         return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
                                     Op, SDValue(AD, 1));
       }
       case ISD::SETLT: {
         SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
-                                                    getI32Imm(1)), 0);
+                                                    getI32Imm(1, dl)), 0);
         SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
                                                     Op), 0);
-        SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+        SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
+                          getI32Imm(31, dl) };
         return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
       }
       case ISD::SETGT: {
-        SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
-        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
-                     0);
+        SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
+                          getI32Imm(31, dl) };
+        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
         return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
-                                    getI32Imm(1));
+                                    getI32Imm(1, dl));
       }
       }
     }
@@ -2291,6 +2300,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   // Altivec Vector compare instructions do not set any CR register by default and
   // vector compare operations return the same type as the operands.
   if (LHS.getValueType().isVector()) {
+    if (PPCSubTarget->hasQPX())
+      return nullptr;
+
     EVT VecVT = LHS.getValueType();
     bool Swap, Negate;
     unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
@@ -2326,15 +2338,23 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
                                          CCReg), 0);
 
-  SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
-                      getI32Imm(31), getI32Imm(31) };
+  SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
+                      getI32Imm(31, dl), getI32Imm(31, dl) };
   if (!Inv)
     return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
 
   // Get the specified bit.
   SDValue Tmp =
     SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
-  return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
+  return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
+}
+
+SDNode *PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
+  // Transfer memoperands.
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+  return Result;
 }
 
 
@@ -2394,7 +2414,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     SDValue N0 = N->getOperand(0);
     SDValue ShiftAmt =
       CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
-                                  getConstantIntValue(), N->getValueType(0));
+                                  getConstantIntValue(), dl,
+                                  N->getValueType(0));
     if (N->getValueType(0) == MVT::i64) {
       SDNode *Op =
         CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
@@ -2455,9 +2476,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       SDValue Chain = LD->getChain();
       SDValue Base = LD->getBasePtr();
       SDValue Ops[] = { Offset, Base, Chain };
-      return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
-                                    PPCLowering->getPointerTy(),
-                                    MVT::Other, Ops);
+      return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
+                                      LD->getValueType(0),
+                                      PPCLowering->getPointerTy(),
+                                      MVT::Other, Ops));
     } else {
       unsigned Opcode;
       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
@@ -2466,6 +2488,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
         switch (LoadedVT.getSimpleVT().SimpleTy) {
           default: llvm_unreachable("Invalid PPC load type!");
+          case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
+          case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
           case MVT::f64: Opcode = PPC::LFDUX; break;
           case MVT::f32: Opcode = PPC::LFSUX; break;
           case MVT::i32: Opcode = PPC::LWZUX; break;
@@ -2490,9 +2514,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       SDValue Chain = LD->getChain();
       SDValue Base = LD->getBasePtr();
       SDValue Ops[] = { Base, Offset, Chain };
-      return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
-                                    PPCLowering->getPointerTy(),
-                                    MVT::Other, Ops);
+      return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
+                                      LD->getValueType(0),
+                                      PPCLowering->getPointerTy(),
+                                      MVT::Other, Ops));
     }
   }
 
@@ -2505,7 +2530,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     if (isInt32Immediate(N->getOperand(1), Imm) &&
         isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
       SDValue Val = N->getOperand(0).getOperand(0);
-      SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
+                        getI32Imm(ME, dl) };
       return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
     }
     // If this is just a masked value where the input is not handled above, and
@@ -2514,14 +2540,15 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
         isRunOfOnes(Imm, MB, ME) &&
         N->getOperand(0).getOpcode() != ISD::ROTL) {
       SDValue Val = N->getOperand(0);
-      SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
+      SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
+                        getI32Imm(ME, dl) };
       return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
     }
     // If this is a 64-bit zero-extension mask, emit rldicl.
     if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
         isMask_64(Imm64)) {
       SDValue Val = N->getOperand(0);
-      MB = 64 - CountTrailingOnes_64(Imm64);
+      MB = 64 - countTrailingOnes(Imm64);
       SH = 0;
 
       // If the operand is a logical right shift, we can fold it into this
@@ -2536,7 +2563,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
         SH = 64 - Imm;
       }
 
-      SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB) };
+      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
       return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
     }
     // AND X, 0 -> 0, not "rlwinm 32".
@@ -2554,7 +2581,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       if (isRunOfOnes(Imm, MB, ME)) {
         SDValue Ops[] = { N->getOperand(0).getOperand(0),
                             N->getOperand(0).getOperand(1),
-                            getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
+                            getI32Imm(0, dl), getI32Imm(MB, dl),
+                            getI32Imm(ME, dl) };
         return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
       }
     }
@@ -2595,7 +2623,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
         isRotateAndMask(N, Imm, true, SH, MB, ME)) {
       SDValue Ops[] = { N->getOperand(0).getOperand(0),
-                          getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+                          getI32Imm(SH, dl), getI32Imm(MB, dl),
+                          getI32Imm(ME, dl) };
       return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
     }
 
@@ -2607,7 +2636,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
         isRotateAndMask(N, Imm, true, SH, MB, ME)) {
       SDValue Ops[] = { N->getOperand(0).getOperand(0),
-                          getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+                          getI32Imm(SH, dl), getI32Imm(MB, dl),
+                          getI32Imm(ME, dl) };
       return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
     }
 
@@ -2627,11 +2657,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
     SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
                                         N->getOperand(0),
-                                        CurDAG->getTargetConstant(1, InVT)), 0);
+                                        CurDAG->getTargetConstant(1, dl, InVT)),
+                 0);
     SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
     SDValue SRIdxVal =
       CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
-                                PPC::sub_eq : PPC::sub_gt, MVT::i32);
+                                PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
 
     return CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1,
                                 CR0Reg, SRIdxVal,
@@ -2658,7 +2689,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
                 N->getValueType(0) == MVT::i32) {
               SDNode *Tmp =
                 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
-                                       N->getOperand(0), getI32Imm(~0U));
+                                       N->getOperand(0), getI32Imm(~0U, dl));
               return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
                                           SDValue(Tmp, 0), N->getOperand(0),
                                           SDValue(Tmp, 1));
@@ -2703,12 +2734,21 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     else if (N->getValueType(0) == MVT::i64)
       SelectCCOp = PPC::SELECT_CC_I8;
     else if (N->getValueType(0) == MVT::f32)
-      SelectCCOp = PPC::SELECT_CC_F4;
+      if (PPCSubTarget->hasP8Vector())
+        SelectCCOp = PPC::SELECT_CC_VSSRC;
+      else
+        SelectCCOp = PPC::SELECT_CC_F4;
     else if (N->getValueType(0) == MVT::f64)
       if (PPCSubTarget->hasVSX())
         SelectCCOp = PPC::SELECT_CC_VSFRC;
       else
         SelectCCOp = PPC::SELECT_CC_F8;
+    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
+      SelectCCOp = PPC::SELECT_CC_QFRC;
+    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
+      SelectCCOp = PPC::SELECT_CC_QSRC;
+    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
+      SelectCCOp = PPC::SELECT_CC_QBRC;
     else if (N->getValueType(0) == MVT::v2f64 ||
              N->getValueType(0) == MVT::v2i64)
       SelectCCOp = PPC::SELECT_CC_VSRC;
@@ -2716,7 +2756,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       SelectCCOp = PPC::SELECT_CC_VRRC;
 
     SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
-                        getI32Imm(BROpc) };
+                        getI32Imm(BROpc, dl) };
     return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
   }
   case ISD::VSELECT:
@@ -2750,7 +2790,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
         DM[1] = 1 - tmp;
       }
 
-      SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), MVT::i32);
+      SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
+                                              MVT::i32);
 
       if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
           Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
@@ -2789,7 +2830,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     // Op #4 is the Flag.
     // Prevent PPC::PRED_* from being selected into LI.
     SDValue Pred =
-      getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+      getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(), dl);
     SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
       N->getOperand(0), N->getOperand(4) };
     return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
@@ -2819,7 +2860,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     }
 
     SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
-    SDValue Ops[] = { getI32Imm(PCC), CondCode,
+    SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
                         N->getOperand(4), N->getOperand(0) };
     return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
   }
@@ -2838,8 +2879,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
             "Only supported for 64-bit ABI and 32-bit SVR4");
     if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
       SDValue GA = N->getOperand(0);
-      return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
-                                    N->getOperand(1));
+      return transferMemOperands(N, CurDAG->getMachineNode(PPC::LWZtoc, dl,
+                                      MVT::i32, GA, N->getOperand(1)));
     }
 
     // For medium and large code model, we generate two instructions as
@@ -2859,12 +2900,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     SDValue GA = N->getOperand(0);
     SDValue TOCbase = N->getOperand(1);
     SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
-                                        TOCbase, GA);
+                                         TOCbase, GA);
 
     if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
         CModel == CodeModel::Large)
-      return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
-                                    SDValue(Tmp, 0));
+      return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
+                                      MVT::i64, GA, SDValue(Tmp, 0)));
 
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
       const GlobalValue *GValue = G->getGlobal();
@@ -2872,8 +2913,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
            (GValue->isDeclaration() || GValue->isWeakForLinker())) ||
           GValue->isDeclaration() || GValue->hasCommonLinkage() ||
           GValue->hasAvailableExternallyLinkage())
-        return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
-                                      SDValue(Tmp, 0));
+        return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
+                                        MVT::i64, GA, SDValue(Tmp, 0)));
     }
 
     return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
@@ -2922,7 +2963,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       // Into:    tmp = VSPLTIS[BHW] elt
       //          VADDU[BHW]M tmp, tmp
       // Where:   [BHW] = B for size = 1, H for size = 2, W for size = 4
-      SDValue EltVal = getI32Imm(Elt >> 1);
+      SDValue EltVal = getI32Imm(Elt >> 1, dl);
       SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
       SDValue TmpVal = SDValue(Tmp, 0);
       return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
@@ -2934,9 +2975,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       // Into:    tmp1 = VSPLTIS[BHW] elt-16
       //          tmp2 = VSPLTIS[BHW] -16
       //          VSUBU[BHW]M tmp1, tmp2
-      SDValue EltVal = getI32Imm(Elt - 16);
+      SDValue EltVal = getI32Imm(Elt - 16, dl);
       SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
-      EltVal = getI32Imm(-16);
+      EltVal = getI32Imm(-16, dl);
       SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
       return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
                                     SDValue(Tmp2, 0));
@@ -2948,9 +2989,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       // Into:    tmp1 = VSPLTIS[BHW] elt+16
       //          tmp2 = VSPLTIS[BHW] -16
       //          VADDU[BHW]M tmp1, tmp2
-      SDValue EltVal = getI32Imm(Elt + 16);
+      SDValue EltVal = getI32Imm(Elt + 16, dl);
       SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
-      EltVal = getI32Imm(-16);
+      EltVal = getI32Imm(-16, dl);
       SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
       return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
                                     SDValue(Tmp2, 0));
@@ -3159,7 +3200,8 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
   bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
   if (NonTrivialMask && !Alt) {
     // Res = Mask & CMPB
-    Res = CurDAG->getNode(ISD::AND, dl, VT, Res, CurDAG->getConstant(Mask, VT));
+    Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
+                          CurDAG->getConstant(Mask, dl, VT));
   } else if (Alt) {
     // Res = (CMPB & Mask) | (~CMPB & Alt)
     // Which, as suggested here:
@@ -3168,8 +3210,9 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
     // Res = Alt ^ ((Alt ^ Mask) & CMPB)
     // useful because the (Alt ^ Mask) can be pre-computed.
     Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
-                          CurDAG->getConstant(Mask ^ Alt, VT));
-    Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, CurDAG->getConstant(Alt, VT));
+                          CurDAG->getConstant(Mask ^ Alt, dl, VT));
+    Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
+                          CurDAG->getConstant(Alt, dl, VT));
   }
 
   return Res;
@@ -3201,20 +3244,20 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
   EVT VT = N->getValueType(0);
   SDValue Cond = N->getOperand(0);
   SDValue ConstTrue =
-    CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, VT);
-  SDValue ConstFalse = CurDAG->getConstant(0, VT);
+    CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
+  SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
 
   do {
     SDNode *User = *N->use_begin();
     if (User->getNumOperands() != 2)
       break;
 
-    auto TryFold = [this, N, User](SDValue Val) {
+    auto TryFold = [this, N, User, dl](SDValue Val) {
       SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
       SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
       SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
 
-      return CurDAG->FoldConstantArithmetic(User->getOpcode(),
+      return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
                                             User->getValueType(0),
                                             O0.getNode(), O1.getNode());
     };
@@ -3404,8 +3447,12 @@ void PPCDAGToDAGISel::PeepholeCROps() {
       case PPC::SELECT_I8:
       case PPC::SELECT_F4:
       case PPC::SELECT_F8:
+      case PPC::SELECT_QFRC:
+      case PPC::SELECT_QSRC:
+      case PPC::SELECT_QBRC:
       case PPC::SELECT_VRRC:
       case PPC::SELECT_VSFRC:
+      case PPC::SELECT_VSSRC:
       case PPC::SELECT_VSRC: {
         SDValue Op = MachineNode->getOperand(0);
         if (Op.isMachineOpcode()) {
@@ -3711,8 +3758,12 @@ void PPCDAGToDAGISel::PeepholeCROps() {
       case PPC::SELECT_I8:
       case PPC::SELECT_F4:
       case PPC::SELECT_F8:
+      case PPC::SELECT_QFRC:
+      case PPC::SELECT_QSRC:
+      case PPC::SELECT_QBRC:
       case PPC::SELECT_VRRC:
       case PPC::SELECT_VSFRC:
+      case PPC::SELECT_VSSRC:
       case PPC::SELECT_VSRC:
         if (Op1Set)
           ResNode = MachineNode->getOperand(1).getNode();
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e580c81..bb9315e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -39,6 +39,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
+
 using namespace llvm;
 
 // FIXME: Remove this once soft-float is supported.
@@ -57,9 +58,9 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
 // FIXME: Remove this once the bug has been fixed!
 extern cl::opt<bool> ANDIGlueBug;
 
-PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
-    : TargetLowering(TM),
-      Subtarget(*TM.getSubtargetImpl()) {
+PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
+                                     const PPCSubtarget &STI)
+    : TargetLowering(TM), Subtarget(STI) {
   // Use _setjmp/_longjmp instead of setjmp/longjmp.
   setUseUnderscoreSetJmp(true);
   setUseUnderscoreLongJmp(true);
@@ -88,11 +89,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+  setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
+  setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
 
   if (Subtarget.useCRBits()) {
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -172,13 +177,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
 
   // If we're enabling GP optimizations, use hardware square root
   if (!Subtarget.hasFSQRT() &&
-      !(TM.Options.UnsafeFPMath &&
-        Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
+      !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
+        Subtarget.hasFRE()))
     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 
   if (!Subtarget.hasFSQRT() &&
-      !(TM.Options.UnsafeFPMath &&
-        Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
+      !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
+        Subtarget.hasFRES()))
     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 
   if (Subtarget.hasFCPSGN()) {
@@ -400,6 +405,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
       // add/sub are legal for all supported vector VT's.
       setOperationAction(ISD::ADD , VT, Legal);
       setOperationAction(ISD::SUB , VT, Legal);
+      
+      // Vector instructions introduced in P8
+      if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
+        setOperationAction(ISD::CTPOP, VT, Legal);
+        setOperationAction(ISD::CTLZ, VT, Legal);
+      }
+      else {
+        setOperationAction(ISD::CTPOP, VT, Expand);
+        setOperationAction(ISD::CTLZ, VT, Expand);
+      }
 
       // We promote all shuffles to v16i8.
       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
@@ -455,8 +470,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
       setOperationAction(ISD::FPOW, VT, Expand);
       setOperationAction(ISD::BSWAP, VT, Expand);
-      setOperationAction(ISD::CTPOP, VT, Expand);
-      setOperationAction(ISD::CTLZ, VT, Expand);
       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
       setOperationAction(ISD::CTTZ, VT, Expand);
       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
@@ -504,7 +517,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
     }
 
-    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+    
+    if (Subtarget.hasP8Altivec()) 
+      setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+    else
+      setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+      
     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 
@@ -557,20 +575,32 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
 
       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
 
+      if (Subtarget.hasP8Vector())
+        addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
+
       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
 
       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
 
-      // VSX v2i64 only supports non-arithmetic operations.
-      setOperationAction(ISD::ADD, MVT::v2i64, Expand);
-      setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+      if (Subtarget.hasP8Altivec()) {
+        setOperationAction(ISD::SHL, MVT::v2i64, Legal);
+        setOperationAction(ISD::SRA, MVT::v2i64, Legal);
+        setOperationAction(ISD::SRL, MVT::v2i64, Legal);
 
-      setOperationAction(ISD::SHL, MVT::v2i64, Expand);
-      setOperationAction(ISD::SRA, MVT::v2i64, Expand);
-      setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+        setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
+      }
+      else {
+        setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+        setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+        setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+
+        setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
 
-      setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+        // VSX v2i64 only supports non-arithmetic operations.
+        setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+        setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+      }
 
       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
@@ -593,6 +623,167 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
 
       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
     }
+
+    if (Subtarget.hasP8Altivec()) {
+      addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
+      addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
+    }
+  }
+
+  if (Subtarget.hasQPX()) {
+    setOperationAction(ISD::FADD, MVT::v4f64, Legal);
+    setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
+    setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
+    setOperationAction(ISD::FREM, MVT::v4f64, Expand);
+
+    setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
+    setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
+
+    setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
+    setOperationAction(ISD::STORE , MVT::v4f64, Custom);
+
+    setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
+    setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
+
+    if (!Subtarget.useCRBits())
+      setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
+    setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
+
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
+    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
+    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
+
+    setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
+    setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
+
+    setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
+    setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
+    setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
+
+    setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
+    setOperationAction(ISD::FABS , MVT::v4f64, Legal);
+    setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
+    setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
+    setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
+    setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
+    setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
+    setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
+    setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
+    setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
+    setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
+
+    setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
+
+    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
+    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
+
+    addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
+
+    setOperationAction(ISD::FADD, MVT::v4f32, Legal);
+    setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
+    setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
+    setOperationAction(ISD::FREM, MVT::v4f32, Expand);
+
+    setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
+    setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
+
+    setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
+    setOperationAction(ISD::STORE , MVT::v4f32, Custom);
+
+    if (!Subtarget.useCRBits())
+      setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
+    setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
+    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
+    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+
+    setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
+    setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
+
+    setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
+    setOperationAction(ISD::FABS , MVT::v4f32, Legal);
+    setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
+    setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
+    setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
+    setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
+    setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
+    setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
+
+    setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+
+    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
+    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
+
+    addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
+
+    setOperationAction(ISD::AND , MVT::v4i1, Legal);
+    setOperationAction(ISD::OR , MVT::v4i1, Legal);
+    setOperationAction(ISD::XOR , MVT::v4i1, Legal);
+
+    if (!Subtarget.useCRBits())
+      setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
+    setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
+
+    setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
+    setOperationAction(ISD::STORE , MVT::v4i1, Custom);
+
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
+    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
+
+    setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
+
+    addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
+
+    setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
+    setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
+    setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
+
+    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+    setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+    setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+    setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
+    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
+
+    // These need to set FE_INEXACT, and so cannot be vectorized here.
+    setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
+    setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
+
+    if (TM.Options.UnsafeFPMath) {
+      setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
+      setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
+
+      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+    } else {
+      setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
+      setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
+
+      setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+      setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+    }
   }
 
   if (Subtarget.has64BitSupport())
@@ -606,8 +797,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
   }
 
   setBooleanContents(ZeroOrOneBooleanContent);
-  // Altivec instructions set fields to all zeros or all ones.
-  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+  if (Subtarget.hasAltivec()) {
+    // Altivec instructions set fields to all zeros or all ones.
+    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+  }
 
   if (!isPPC64) {
     // These libcalls are not available in 32-bit.
@@ -672,8 +866,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
 
   // With 32 condition bits, we don't need to sink (and duplicate) compares
   // aggressively in CodeGenPrep.
-  if (Subtarget.useCRBits())
+  if (Subtarget.useCRBits()) {
     setHasMultipleConditionRegisters();
+    setJumpIsExpensive();
+  }
 
   setMinFunctionAlignment(2);
   if (Subtarget.isDarwin())
@@ -704,7 +900,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
   else
     setSchedulingPreference(Sched::Hybrid);
 
-  computeRegisterProperties();
+  computeRegisterProperties(STI.getRegisterInfo());
 
   // The Freescale cores do better with aggressive inlining of memcpy and
   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
@@ -716,6 +912,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
     MaxStoresPerMemcpyOptSize = 8;
     MaxStoresPerMemmove = 32;
     MaxStoresPerMemmoveOptSize = 8;
+  } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
+    // The A2 also benefits from (very) aggressive inlining of memcpy and
+    // friends. The overhead of a the function call, even when warm, can be
+    // over one hundred cycles.
+    MaxStoresPerMemset = 128;
+    MaxStoresPerMemcpy = 128;
+    MaxStoresPerMemmove = 128;
   }
 }
 
@@ -763,8 +966,8 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
 }
 
 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default: return nullptr;
+  switch ((PPCISD::NodeType)Opcode) {
+  case PPCISD::FIRST_NUMBER:    break;
   case PPCISD::FSEL:            return "PPCISD::FSEL";
   case PPCISD::FCFID:           return "PPCISD::FCFID";
   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
@@ -784,17 +987,14 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::Hi:              return "PPCISD::Hi";
   case PPCISD::Lo:              return "PPCISD::Lo";
   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
-  case PPCISD::LOAD:            return "PPCISD::LOAD";
-  case PPCISD::LOAD_TOC:        return "PPCISD::LOAD_TOC";
   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
   case PPCISD::SRL:             return "PPCISD::SRL";
   case PPCISD::SRA:             return "PPCISD::SRA";
   case PPCISD::SHL:             return "PPCISD::SHL";
+  case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
   case PPCISD::CALL:            return "PPCISD::CALL";
   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
-  case PPCISD::CALL_TLS:        return "PPCISD::CALL_TLS";
-  case PPCISD::CALL_NOP_TLS:    return "PPCISD::CALL_NOP_TLS";
   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
@@ -803,14 +1003,19 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
+  case PPCISD::MFVSR:           return "PPCISD::MFVSR";
+  case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
+  case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
+  case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
+  case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
   case PPCISD::VCMP:            return "PPCISD::VCMP";
   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
   case PPCISD::LBRX:            return "PPCISD::LBRX";
   case PPCISD::STBRX:           return "PPCISD::STBRX";
   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
-  case PPCISD::LARX:            return "PPCISD::LARX";
-  case PPCISD::STCX:            return "PPCISD::STCX";
+  case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
+  case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
   case PPCISD::BDZ:             return "PPCISD::BDZ";
@@ -819,27 +1024,44 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
-  case PPCISD::ADDIS_TOC_HA:    return "PPCISD::ADDIS_TOC_HA";
-  case PPCISD::LD_TOC_L:        return "PPCISD::LD_TOC_L";
-  case PPCISD::ADDI_TOC_L:      return "PPCISD::ADDI_TOC_L";
   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
+  case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
+  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
+  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
+  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
+  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
   case PPCISD::SC:              return "PPCISD::SC";
+  case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
+  case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
+  case PPCISD::RFEBB:           return "PPCISD::RFEBB";
+  case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
+  case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
+  case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
+  case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
+  case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
+  case PPCISD::QBFLT:           return "PPCISD::QBFLT";
+  case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
   }
+  return nullptr;
 }
 
-EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const {
   if (!VT.isVector())
     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
+
+  if (Subtarget.hasQPX())
+    return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
+
   return VT.changeVectorElementTypeToInteger();
 }
 
@@ -875,11 +1097,11 @@ static bool isConstantOrUndef(int Op, int Val) {
 /// VPKUHUM instruction.
 /// The ShuffleKind distinguishes between big-endian operations with
 /// two different inputs (0), either-endian operations with two identical
-/// inputs (1), and little-endian operantion with two different inputs (2).
+/// inputs (1), and little-endian operations with two different inputs (2).
 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                                SelectionDAG &DAG) {
-  bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+  bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
   if (ShuffleKind == 0) {
     if (IsLE)
       return false;
@@ -906,11 +1128,11 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
 /// VPKUWUM instruction.
 /// The ShuffleKind distinguishes between big-endian operations with
 /// two different inputs (0), either-endian operations with two identical
-/// inputs (1), and little-endian operantion with two different inputs (2).
+/// inputs (1), and little-endian operations with two different inputs (2).
 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                                SelectionDAG &DAG) {
-  bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+  bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
   if (ShuffleKind == 0) {
     if (IsLE)
       return false;
@@ -937,6 +1159,56 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
   return true;
 }
 
+/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
+/// current subtarget.
+///
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operations with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+                               SelectionDAG &DAG) {
+  const PPCSubtarget& Subtarget =
+    static_cast<const PPCSubtarget&>(DAG.getSubtarget());
+  if (!Subtarget.hasP8Vector())
+    return false;
+
+  bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+  if (ShuffleKind == 0) {
+    if (IsLE)
+      return false;
+    for (unsigned i = 0; i != 16; i += 4)
+      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
+          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
+          !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
+          !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
+        return false;
+  } else if (ShuffleKind == 2) {
+    if (!IsLE)
+      return false;
+    for (unsigned i = 0; i != 16; i += 4)
+      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
+          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
+          !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
+          !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
+        return false;
+  } else if (ShuffleKind == 1) {
+    unsigned j = IsLE ? 0 : 4;
+    for (unsigned i = 0; i != 8; i += 4)
+      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
+          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
+          !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
+          !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
+          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
+          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
+          !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
+          !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
+        return false;
+  }
+  return true;
+}
+
 /// isVMerge - Common function, used to match vmrg* shuffles.
 ///
 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
@@ -965,7 +1237,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
 /// the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
                              unsigned ShuffleKind, SelectionDAG &DAG) {
-  if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
     if (ShuffleKind == 1) // unary
       return isVMerge(N, UnitSize, 0, 0);
     else if (ShuffleKind == 2) // swapped
@@ -990,7 +1262,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
 /// the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
                              unsigned ShuffleKind, SelectionDAG &DAG) {
-  if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
     if (ShuffleKind == 1) // unary
       return isVMerge(N, UnitSize, 8, 8);
     else if (ShuffleKind == 2) // swapped
@@ -1034,8 +1306,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
   if (ShiftAmt < i) return -1;
 
   ShiftAmt -= i;
-  bool isLE = DAG.getTarget().getSubtargetImpl()->getDataLayout()->
-    isLittleEndian();
+  bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
 
   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
     // Check the rest of the elements to see if they are consecutive.
@@ -1086,29 +1357,13 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
   return true;
 }
 
-/// isAllNegativeZeroVector - Returns true if all elements of build_vector
-/// are -0.0.
-bool PPC::isAllNegativeZeroVector(SDNode *N) {
-  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
-
-  APInt APVal, APUndef;
-  unsigned BitSize;
-  bool HasAnyUndefs;
-
-  if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
-    if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
-      return CFP->getValueAPF().isNegZero();
-
-  return false;
-}
-
 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
                                 SelectionDAG &DAG) {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
   assert(isSplatShuffleMask(SVOp, EltSize));
-  if (DAG.getSubtarget().getDataLayout()->isLittleEndian())
+  if (DAG.getTarget().getDataLayout()->isLittleEndian())
     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
   else
     return SVOp->getMaskElt(0) / EltSize;
@@ -1161,17 +1416,17 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
     // Finally, check the least significant entry.
     if (LeadingZero) {
       if (!UniquedVals[Multiple-1].getNode())
-        return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
+        return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
-      if (Val < 16)
-        return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
+      if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
+        return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
     }
     if (LeadingOnes) {
       if (!UniquedVals[Multiple-1].getNode())
-        return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
+        return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
-        return DAG.getTargetConstant(Val, MVT::i32);
+        return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
     }
 
     return SDValue();
@@ -1202,17 +1457,10 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
   // immediate field for would be zero, and we prefer to use vxor for it.
   if (ValSizeInBytes < ByteSize) return SDValue();
 
-  // If the element value is larger than the splat value, cut it in half and
-  // check to see if the two halves are equal.  Continue doing this until we
-  // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
-  while (ValSizeInBytes > ByteSize) {
-    ValSizeInBytes >>= 1;
-
-    // If the top half equals the bottom half, we're still ok.
-    if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
-         (Value                        & ((1 << (8*ValSizeInBytes))-1)))
-      return SDValue();
-  }
+  // If the element value is larger than the splat value, check if it consists
+  // of a repeated bit pattern of size ByteSize.
+  if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
+    return SDValue();
 
   // Properly sign extend the value.
   int MaskVal = SignExtend32(Value, ByteSize * 8);
@@ -1222,10 +1470,40 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
 
   // Finally, if this value fits in a 5 bit sext field, return it
   if (SignExtend32<5>(MaskVal) == MaskVal)
-    return DAG.getTargetConstant(MaskVal, MVT::i32);
+    return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
   return SDValue();
 }
 
+/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
+/// amount, otherwise return -1.
+int PPC::isQVALIGNIShuffleMask(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
+    return -1;
+
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+  // Find the first non-undef value in the shuffle mask.
+  unsigned i;
+  for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
+    /*search*/;
+
+  if (i == 4) return -1;  // all undef.
+
+  // Otherwise, check to see if the rest of the elements are consecutively
+  // numbered from this value.
+  unsigned ShiftAmt = SVOp->getMaskElt(i);
+  if (ShiftAmt < i) return -1;
+  ShiftAmt -= i;
+
+  // Check the rest of the elements to see if they are consecutive.
+  for (++i; i != 4; ++i)
+    if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+      return -1;
+
+  return ShiftAmt;
+}
+
 //===----------------------------------------------------------------------===//
 //  Addressing Mode Selection
 //===----------------------------------------------------------------------===//
@@ -1351,7 +1629,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
     short imm = 0;
     if (isIntS16Immediate(N.getOperand(1), imm) &&
         (!Aligned || (imm & 3) == 0)) {
-      Disp = DAG.getTargetConstant(imm, N.getValueType());
+      Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
@@ -1391,7 +1669,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
         } else {
           Base = N.getOperand(0);
         }
-        Disp = DAG.getTargetConstant(imm, N.getValueType());
+        Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
         return true;
       }
     }
@@ -1402,7 +1680,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
     // this as "d, 0"
     short Imm;
     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
-      Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
+      Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                              CN->getValueType(0));
       return true;
@@ -1415,16 +1693,17 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
       int Addr = (int)CN->getZExtValue();
 
       // Otherwise, break this down into an LIS + disp.
-      Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
+      Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
 
-      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
+      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
+                                   MVT::i32);
       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
       return true;
     }
   }
 
-  Disp = DAG.getTargetConstant(0, getPointerTy());
+  Disp = DAG.getTargetConstant(0, dl, getPointerTy());
   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
@@ -1485,9 +1764,16 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
   } else
     return false;
 
-  // PowerPC doesn't have preinc load/store instructions for vectors.
-  if (VT.isVector())
-    return false;
+  // PowerPC doesn't have preinc load/store instructions for vectors (except
+  // for QPX, which does have preinc r+r forms).
+  if (VT.isVector()) {
+    if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
+      return false;
+    } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
+      AM = ISD::PRE_INC;
+      return true;
+    }
+  }
 
   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
 
@@ -1544,8 +1830,9 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
 
 /// GetLabelAccessInfo - Return true if we should reference labels using a
 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
-static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
-                               unsigned &LoOpFlags,
+static bool GetLabelAccessInfo(const TargetMachine &TM,
+                               const PPCSubtarget &Subtarget,
+                               unsigned &HiOpFlags, unsigned &LoOpFlags,
                                const GlobalValue *GV = nullptr) {
   HiOpFlags = PPCII::MO_HA;
   LoOpFlags = PPCII::MO_LO;
@@ -1560,7 +1847,7 @@ static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
 
   // If this is a reference to a global value that requires a non-lazy-ptr, make
   // sure that instruction lowering adds it.
-  if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
+  if (GV && Subtarget.hasLazyResolverStub(GV)) {
     HiOpFlags |= PPCII::MO_NLP_FLAG;
     LoOpFlags |= PPCII::MO_NLP_FLAG;
 
@@ -1575,9 +1862,9 @@ static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
 
 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
                              SelectionDAG &DAG) {
-  EVT PtrVT = HiPart.getValueType();
-  SDValue Zero = DAG.getConstant(0, PtrVT);
   SDLoc DL(HiPart);
+  EVT PtrVT = HiPart.getValueType();
+  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
 
   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
@@ -1592,6 +1879,28 @@ static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
 }
 
+static void setUsesTOCBasePtr(MachineFunction &MF) {
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  FuncInfo->setUsesTOCBasePtr();
+}
+
+static void setUsesTOCBasePtr(SelectionDAG &DAG) {
+  setUsesTOCBasePtr(DAG.getMachineFunction());
+}
+
+static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
+                           SDValue GA) {
+  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
+  SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
+                DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
+
+  SDValue Ops[] = { GA, Reg };
+  return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
+                                 DAG.getVTList(VT, MVT::Other), Ops, VT,
+                                 MachinePointerInfo::getGOT(), 0, false, true,
+                                 false, 0);
+}
+
 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
                                              SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
@@ -1601,20 +1910,19 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
   // 64-bit SVR4 ABI code is always position-independent.
   // The actual address of the GlobalValue is stored in the TOC.
   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+    setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
-    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
-                       DAG.getRegister(PPC::X2, MVT::i64));
+    return getTOCEntry(DAG, SDLoc(CP), true, GA);
   }
 
   unsigned MOHiFlag, MOLoFlag;
-  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  bool isPIC =
+      GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
 
   if (isPIC && Subtarget.isSVR4ABI()) {
     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
                                            PPCII::MO_PIC_FLAG);
-    SDLoc DL(CP);
-    return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
-                       DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+    return getTOCEntry(DAG, SDLoc(CP), false, GA);
   }
 
   SDValue CPIHi =
@@ -1631,20 +1939,19 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   // 64-bit SVR4 ABI code is always position-independent.
   // The actual address of the GlobalValue is stored in the TOC.
   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+    setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
-    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
-                       DAG.getRegister(PPC::X2, MVT::i64));
+    return getTOCEntry(DAG, SDLoc(JT), true, GA);
   }
 
   unsigned MOHiFlag, MOLoFlag;
-  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  bool isPIC =
+      GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
 
   if (isPIC && Subtarget.isSVR4ABI()) {
     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
                                         PPCII::MO_PIC_FLAG);
-    SDLoc DL(GA);
-    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA,
-                       DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+    return getTOCEntry(DAG, SDLoc(GA), false, GA);
   }
 
   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
@@ -1661,39 +1968,19 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
   // 64-bit SVR4 ABI code is always position-independent.
   // The actual BlockAddress is stored in the TOC.
   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+    setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
-    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA,
-                       DAG.getRegister(PPC::X2, MVT::i64));
+    return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
   }
 
   unsigned MOHiFlag, MOLoFlag;
-  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  bool isPIC =
+      GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
   return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
 }
 
-// Generate a call to __tls_get_addr for the given GOT entry Op.
-std::pair<SDValue,SDValue>
-PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
-                                SelectionDAG &DAG) const {
-
-  Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
-  TargetLowering::ArgListTy Args;
-  TargetLowering::ArgListEntry Entry;
-  Entry.Node = Op;
-  Entry.Ty = IntPtrTy;
-  Args.push_back(Entry);
-
-  TargetLowering::CallLoweringInfo CLI(DAG);
-  CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
-    .setCallee(CallingConv::C, IntPtrTy,
-               DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
-               std::move(Args), 0);
-
-  return LowerCallTo(CLI);
-}
-
 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                               SelectionDAG &DAG) const {
 
@@ -1728,6 +2015,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                                 PPCII::MO_TLS);
     SDValue GOTPtr;
     if (is64bit) {
+      setUsesTOCBasePtr(DAG);
       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
                            PtrVT, GOTReg, TGA);
@@ -1739,10 +2027,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
   }
 
   if (Model == TLSModel::GeneralDynamic) {
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                             PPCII::MO_TLSGD);
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
     SDValue GOTPtr;
     if (is64bit) {
+      setUsesTOCBasePtr(DAG);
       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
                                    GOTReg, TGA);
@@ -1752,17 +2040,15 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
       else
         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
     }
-    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
-                                   GOTPtr, TGA);
-    std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
-    return CallResult.first;
+    return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
+                       GOTPtr, TGA, TGA);
   }
 
   if (Model == TLSModel::LocalDynamic) {
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                             PPCII::MO_TLSLD);
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
     SDValue GOTPtr;
     if (is64bit) {
+      setUsesTOCBasePtr(DAG);
       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
                            GOTReg, TGA);
@@ -1772,13 +2058,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
       else
         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
     }
-    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
-                                   GOTPtr, TGA);
-    std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
-    SDValue TLSAddr = CallResult.first;
-    SDValue Chain = CallResult.second;
-    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
-                                      Chain, TLSAddr, TGA);
+    SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
+                                  PtrVT, GOTPtr, TGA, TGA);
+    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
+                                      PtrVT, TLSAddr, TGA);
     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
   }
 
@@ -1795,20 +2078,20 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
   // 64-bit SVR4 ABI code is always position-independent.
   // The actual address of the GlobalValue is stored in the TOC.
   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+    setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
-    return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
-                       DAG.getRegister(PPC::X2, MVT::i64));
+    return getTOCEntry(DAG, DL, true, GA);
   }
 
   unsigned MOHiFlag, MOLoFlag;
-  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
+  bool isPIC =
+      GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV);
 
   if (isPIC && Subtarget.isSVR4ABI()) {
     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
                                             GSDN->getOffset(),
                                             PPCII::MO_PIC_FLAG);
-    return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
-                       DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32));
+    return getTOCEntry(DAG, DL, false, GA);
   }
 
   SDValue GAHi =
@@ -1865,7 +2148,7 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
       unsigned Log2b = Log2_32(VT.getSizeInBits());
       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
-                                DAG.getConstant(Log2b, MVT::i32));
+                                DAG.getConstant(Log2b, dl, MVT::i32));
       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
     }
     // Leave comparisons against 0 and -1 alone for now, since they're usually
@@ -1885,7 +2168,7 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
     EVT VT = Op.getValueType();
     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
                                 Op.getOperand(1));
-    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
+    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
   }
   return SDValue();
 }
@@ -1911,11 +2194,11 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
   if (VT == MVT::i64) {
     // Check if GprIndex is even
     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
-                                 DAG.getConstant(1, MVT::i32));
+                                 DAG.getConstant(1, dl, MVT::i32));
     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
-                                DAG.getConstant(0, MVT::i32), ISD::SETNE);
+                                DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
-                                          DAG.getConstant(1, MVT::i32));
+                                          DAG.getConstant(1, dl, MVT::i32));
     // Align GprIndex to be even if it isn't
     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
                            GprIndex);
@@ -1923,7 +2206,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
 
   // fpr index is 1 byte after gpr
   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
-                               DAG.getConstant(1, MVT::i32));
+                               DAG.getConstant(1, dl, MVT::i32));
 
   // fpr
   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
@@ -1932,10 +2215,10 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
   InChain = FprIndex.getValue(1);
 
   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
-                                       DAG.getConstant(8, MVT::i32));
+                                       DAG.getConstant(8, dl, MVT::i32));
 
   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
-                                        DAG.getConstant(4, MVT::i32));
+                                        DAG.getConstant(4, dl, MVT::i32));
 
   // areas
   SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
@@ -1950,12 +2233,12 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
 
   // select overflow_area if index > 8
   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
-                            DAG.getConstant(8, MVT::i32), ISD::SETLT);
+                            DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
 
   // adjustment constant gpr_index * 4/8
   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
                                     VT.isInteger() ? GprIndex : FprIndex,
-                                    DAG.getConstant(VT.isInteger() ? 4 : 8,
+                                    DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
                                                     MVT::i32));
 
   // OurReg = RegSaveArea + RegConstant
@@ -1965,12 +2248,12 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
   // Floating types are 32 bytes into RegSaveArea
   if (VT.isFloatingPoint())
     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
-                         DAG.getConstant(32, MVT::i32));
+                         DAG.getConstant(32, dl, MVT::i32));
 
   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                    VT.isInteger() ? GprIndex : FprIndex,
-                                   DAG.getConstant(VT == MVT::i64 ? 2 : 1,
+                                   DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
                                                    MVT::i32));
 
   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
@@ -1984,7 +2267,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
   // increase overflow_area by 4/8 if gpr/fpr > 8
   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
-                                          MVT::i32));
+                                          dl, MVT::i32));
 
   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
                              OverflowAreaPlusN);
@@ -2006,8 +2289,8 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
   return DAG.getMemcpy(Op.getOperand(0), Op,
                        Op.getOperand(1), Op.getOperand(2),
-                       DAG.getConstant(12, MVT::i32), 8, false, true,
-                       MachinePointerInfo(), MachinePointerInfo());
+                       DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
+                       false, MachinePointerInfo(), MachinePointerInfo());
 }
 
 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
@@ -2036,7 +2319,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
   Entry.Node = Trmp; Args.push_back(Entry);
 
   // TrampSize == (isPPC64 ? 48 : 40);
-  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
+  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
                                isPPC64 ? MVT::i64 : MVT::i32);
   Args.push_back(Entry);
 
@@ -2097,8 +2380,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
   // } va_list[1];
 
 
-  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
-  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
+  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
+  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
 
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -2109,13 +2392,13 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
                                  PtrVT);
 
   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
-  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
+  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
 
   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
-  SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
+  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
 
   uint64_t FPROffset = 1;
-  SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
+  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
 
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
 
@@ -2177,7 +2460,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
   };
   const unsigned NumArgRegs = array_lengthof(ArgRegs);
 
-  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
 
   // Skip one register if the first unallocated register has an even register
   // number and there are still argument registers available which have not been
@@ -2205,7 +2488,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
 
   const unsigned NumArgRegs = array_lengthof(ArgRegs);
 
-  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
 
   // If there is only one Floating-point register left we need to put both f64
   // values of a split ppc_fp128 value on the stack.
@@ -2220,16 +2503,16 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
   return false;
 }
 
-/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// FPR - The set of FP registers that should be allocated for arguments,
 /// on Darwin.
-static const MCPhysReg *GetFPR() {
-  static const MCPhysReg FPR[] = {
-    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
-    PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
-  };
+static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
+                                PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
+                                PPC::F11, PPC::F12, PPC::F13};
 
-  return FPR;
-}
+/// QFPR - The set of QPX registers that should be allocated for arguments.
+static const MCPhysReg QFPR[] = {
+    PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
+    PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
 
 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
 /// the stack.
@@ -2257,8 +2540,13 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
   // Altivec parameters are padded to a 16 byte boundary.
   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
-      ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+      ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
+      ArgVT == MVT::v1i128)
     Align = 16;
+  // QPX vector types stored in double-precision are padded to a 32 byte
+  // boundary.
+  else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
+    Align = 32;
 
   // ByVal parameters are aligned as requested.
   if (Flags.isByVal()) {
@@ -2297,7 +2585,7 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
                                    unsigned ParamAreaSize,
                                    unsigned &ArgOffset,
                                    unsigned &AvailableFPRs,
-                                   unsigned &AvailableVRs) {
+                                   unsigned &AvailableVRs, bool HasQPX) {
   bool UseMemory = false;
 
   // Respect alignment of argument on the stack.
@@ -2321,14 +2609,19 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
   // However, if the argument is actually passed in an FPR or a VR,
   // we don't use memory after all.
   if (!Flags.isByVal()) {
-    if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
+    if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
+        // QPX registers overlap with the scalar FP registers.
+        (HasQPX && (ArgVT == MVT::v4f32 ||
+                    ArgVT == MVT::v4f64 ||
+                    ArgVT == MVT::v4i1)))
       if (AvailableFPRs > 0) {
         --AvailableFPRs;
         return false;
       }
     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
-        ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+        ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
+        ArgVT == MVT::v1i128)
       if (AvailableVRs > 0) {
         --AvailableVRs;
         return false;
@@ -2340,10 +2633,9 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
 
 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
 /// ensure minimum alignment required for target.
-static unsigned EnsureStackAlignment(const TargetMachine &Target,
+static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
                                      unsigned NumBytes) {
-  unsigned TargetAlign =
-      Target.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
+  unsigned TargetAlign = Lowering->getStackAlignment();
   unsigned AlignMask = TargetAlign - 1;
   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
   return NumBytes;
@@ -2424,7 +2716,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
                  *DAG.getContext());
 
   // Reserve space for the linkage area on the stack.
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);
+  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
 
   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -2445,7 +2737,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
           RC = &PPC::GPRCRegClass;
           break;
         case MVT::f32:
-          RC = &PPC::F4RCRegClass;
+          if (Subtarget.hasP8Vector())
+            RC = &PPC::VSSRCRegClass;
+          else
+            RC = &PPC::F4RCRegClass;
           break;
         case MVT::f64:
           if (Subtarget.hasVSX())
@@ -2456,13 +2751,21 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
         case MVT::v16i8:
         case MVT::v8i16:
         case MVT::v4i32:
-        case MVT::v4f32:
           RC = &PPC::VRRCRegClass;
           break;
+        case MVT::v4f32:
+          RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
+          break;
         case MVT::v2f64:
         case MVT::v2i64:
           RC = &PPC::VSHRCRegClass;
           break;
+        case MVT::v4f64:
+          RC = &PPC::QFRCRegClass;
+          break;
+        case MVT::v4i1:
+          RC = &PPC::QBRCRegClass;
+          break;
       }
 
       // Transform the arguments stored in physical registers into virtual ones.
@@ -2510,7 +2813,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
   // call optimized function's reserved stack space needs to be aligned so that
   // taking the difference between two stack areas will result in an aligned
   // stack.
-  MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+  MinReservedArea =
+      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
   FuncInfo->setMinReservedArea(MinReservedArea);
 
   SmallVector<SDValue, 8> MemOps;
@@ -2532,10 +2836,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
     if (DisablePPCFloatInVariadic)
       NumFPArgRegs = 0;
 
-    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
-                                                          NumGPArgRegs));
-    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
-                                                          NumFPArgRegs));
+    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
+    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
 
     // Make room for NumGPArgRegs and NumFPArgRegs.
     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
@@ -2562,7 +2864,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
                                    MachinePointerInfo(), false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
-      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
 
@@ -2581,7 +2883,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
                                    MachinePointerInfo(), false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by eight for the next argument to store
-      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
+      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
                                          PtrVT);
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
@@ -2625,22 +2927,20 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
   MachineFrameInfo *MFI = MF.getFrameInfo();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
+  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+         "fastcc not supported on varargs functions");
+
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   // Potential tail calls could cause overwriting of argument stack slots.
   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
                        (CallConv == CallingConv::Fast));
   unsigned PtrByteSize = 8;
-
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
-                                                          isELFv2ABI);
+  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
 
   static const MCPhysReg GPR[] = {
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
-
-  static const MCPhysReg *FPR = GetFPR();
-
   static const MCPhysReg VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -2653,6 +2953,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
   const unsigned Num_GPR_Regs = array_lengthof(GPR);
   const unsigned Num_FPR_Regs = 13;
   const unsigned Num_VR_Regs  = array_lengthof(VR);
+  const unsigned Num_QFPR_Regs = Num_FPR_Regs;
 
   // Do a first pass over the arguments to determine whether the ABI
   // guarantees that our caller has allocated the parameter save area
@@ -2668,7 +2969,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
   for (unsigned i = 0, e = Ins.size(); i != e; ++i)
     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
                                PtrByteSize, LinkageSize, ParamAreaSize,
-                               NumBytes, AvailableFPRs, AvailableVRs))
+                               NumBytes, AvailableFPRs, AvailableVRs,
+                               Subtarget.hasQPX()))
       HasParameterArea = true;
 
   // Add DAG nodes to load the arguments or copy them out of registers.  On
@@ -2676,7 +2978,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
   // although the first ones are often in registers.
 
   unsigned ArgOffset = LinkageSize;
-  unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
+  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+  unsigned &QFPR_idx = FPR_idx;
   SmallVector<SDValue, 8> MemOps;
   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
   unsigned CurArgIdx = 0;
@@ -2692,21 +2995,33 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
     }
-    /* Respect alignment of argument on the stack.  */
-    unsigned Align =
-      CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
-    ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
-    unsigned CurArgOffset = ArgOffset;
+    // We re-align the argument offset for each argument, except when using the
+    // fast calling convention, when we need to make sure we do that only when
+    // we'll actually use a stack slot.
+    unsigned CurArgOffset, Align;
+    auto ComputeArgOffset = [&]() {
+      /* Respect alignment of argument on the stack.  */
+      Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
+      ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+      CurArgOffset = ArgOffset;
+    };
+
+    if (CallConv != CallingConv::Fast) {
+      ComputeArgOffset();
 
-    /* Compute GPR index associated with argument offset.  */
-    GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
-    GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
+      /* Compute GPR index associated with argument offset.  */
+      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+      GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
+    }
 
     // FIXME the codegen can be much improved in some cases.
     // We do not have to keep everything in memory.
     if (Flags.isByVal()) {
       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
 
+      if (CallConv == CallingConv::Fast)
+        ComputeArgOffset();
+
       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
       ObjSize = Flags.getByValSize();
       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
@@ -2744,13 +3059,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
         // address of the enclosing doubleword on big-endian systems.
         SDValue Arg = FIN;
         if (!isLittleEndian) {
-          SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT);
+          SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
         }
         InVals.push_back(Arg);
 
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store;
 
@@ -2790,7 +3105,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
         SDValue Addr = FIN;
         if (j) {
-          SDValue Off = DAG.getConstant(j, PtrVT);
+          SDValue Off = DAG.getConstant(j, dl, PtrVT);
           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
         }
         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
@@ -2812,7 +3127,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
       // passed directly.  Clang may use those instead of "byval" aggregate
       // types to avoid forcing arguments to memory unnecessarily.
       if (GPR_idx != Num_GPR_Regs) {
-        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
 
         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
@@ -2820,10 +3135,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
           // value to MVT::i64 and then truncate to the correct register size.
           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
       } else {
+        if (CallConv == CallingConv::Fast)
+          ComputeArgOffset();
+
         needsLoad = true;
         ArgSize = PtrByteSize;
       }
-      ArgOffset += 8;
+      if (CallConv != CallingConv::Fast || needsLoad)
+        ArgOffset += 8;
       break;
 
     case MVT::f32:
@@ -2835,40 +3154,51 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
         unsigned VReg;
 
         if (ObjectVT == MVT::f32)
-          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+          VReg = MF.addLiveIn(FPR[FPR_idx],
+                              Subtarget.hasP8Vector()
+                                  ? &PPC::VSSRCRegClass
+                                  : &PPC::F4RCRegClass);
         else
-          VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ?
-                                            &PPC::VSFRCRegClass :
-                                            &PPC::F8RCRegClass);
+          VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
+                                                ? &PPC::VSFRCRegClass
+                                                : &PPC::F8RCRegClass);
 
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         ++FPR_idx;
-      } else if (GPR_idx != Num_GPR_Regs) {
+      } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
+        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
+        // once we support fp <-> gpr moves.
+
         // This can only ever happen in the presence of f32 array types,
         // since otherwise we never run out of FPRs before running out
         // of GPRs.
-        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
 
         if (ObjectVT == MVT::f32) {
           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
-                                 DAG.getConstant(32, MVT::i32));
+                                 DAG.getConstant(32, dl, MVT::i32));
           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
         }
 
         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
       } else {
+        if (CallConv == CallingConv::Fast)
+          ComputeArgOffset();
+
         needsLoad = true;
       }
 
       // When passing an array of floats, the array occupies consecutive
       // space in the argument area; only round up to the next doubleword
       // at the end of the array.  Otherwise, each float takes 8 bytes.
-      ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
-      ArgOffset += ArgSize;
-      if (Flags.isInConsecutiveRegsLast())
-        ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+      if (CallConv != CallingConv::Fast || needsLoad) {
+        ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
+        ArgOffset += ArgSize;
+        if (Flags.isInConsecutiveRegsLast())
+          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+      }
       break;
     case MVT::v4f32:
     case MVT::v4i32:
@@ -2876,6 +3206,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
     case MVT::v16i8:
     case MVT::v2f64:
     case MVT::v2i64:
+    case MVT::v1i128:
+      if (!Subtarget.hasQPX()) {
       // These can be scalar arguments or elements of a vector array type
       // passed directly.  The latter are used to implement ELFv2 homogenous
       // vector aggregates.
@@ -2886,9 +3218,43 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         ++VR_idx;
       } else {
+        if (CallConv == CallingConv::Fast)
+          ComputeArgOffset();
+
+        needsLoad = true;
+      }
+      if (CallConv != CallingConv::Fast || needsLoad)
+        ArgOffset += 16;
+      break;
+      } // not QPX
+
+      assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
+             "Invalid QPX parameter type");
+      /* fall through */
+
+    case MVT::v4f64:
+    case MVT::v4i1:
+      // QPX vectors are treated like their scalar floating-point subregisters
+      // (except that they're larger).
+      unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
+      if (QFPR_idx != Num_QFPR_Regs) {
+        const TargetRegisterClass *RC;
+        switch (ObjectVT.getSimpleVT().SimpleTy) {
+        case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
+        case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
+        default:         RC = &PPC::QBRCRegClass; break;
+        }
+
+        unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+        ++QFPR_idx;
+      } else {
+        if (CallConv == CallingConv::Fast)
+          ComputeArgOffset();
         needsLoad = true;
       }
-      ArgOffset += 16;
+      if (CallConv != CallingConv::Fast || needsLoad)
+        ArgOffset += Sz;
       break;
     }
 
@@ -2917,7 +3283,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
   // call optimized functions' reserved stack space needs to be aligned so that
   // taking the difference between two stack areas will result in an aligned
   // stack.
-  MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+  MinReservedArea =
+      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
   FuncInfo->setMinReservedArea(MinReservedArea);
 
   // If the function takes variable number of arguments, make a frame index for
@@ -2940,7 +3307,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
                                    MachinePointerInfo(), false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
-      SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
+      SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
   }
@@ -2971,9 +3338,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
                        (CallConv == CallingConv::Fast));
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
-
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
-                                                          false);
+  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
   unsigned ArgOffset = LinkageSize;
   // Area that is at least reserved in caller of this function.
   unsigned MinReservedArea = ArgOffset;
@@ -2986,9 +3351,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
-
-  static const MCPhysReg *FPR = GetFPR();
-
   static const MCPhysReg VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -3281,7 +3643,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
   // call optimized functions' reserved stack space needs to be aligned so that
   // taking the difference between two stack areas will result in an aligned
   // stack.
-  MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+  MinReservedArea =
+      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
   FuncInfo->setMinReservedArea(MinReservedArea);
 
   // If the function takes variable number of arguments, make a frame index for
@@ -3310,7 +3673,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
                                    MachinePointerInfo(), false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
-      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
   }
@@ -3388,7 +3751,7 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
       SignExtend32<26>(Addr) != Addr)
     return nullptr;  // Top 6 bits have to be sext of immediate.
 
-  return DAG.getConstant((int)C->getZExtValue() >> 2,
+  return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op),
                          DAG.getTargetLoweringInfo().getPointerTy()).getNode();
 }
 
@@ -3436,8 +3799,9 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
   if (SPDiff) {
     // Calculate the new stack slot for the return address.
     int SlotSize = isPPC64 ? 8 : 4;
-    int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
-                                                                   isDarwinABI);
+    const PPCFrameLowering *FL =
+        MF.getSubtarget<PPCSubtarget>().getFrameLowering();
+    int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
                                                           NewRetAddrLoc, true);
     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
@@ -3449,8 +3813,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
     // slot as the FP is never overwritten.
     if (isDarwinABI) {
-      int NewFPLoc =
-        SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+      int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
       int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
                                                           true);
       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
@@ -3520,9 +3883,9 @@ static SDValue
 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
                           SDLoc dl) {
-  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
-                       false, false, MachinePointerInfo(),
+                       false, false, false, MachinePointerInfo(),
                        MachinePointerInfo());
 }
 
@@ -3544,7 +3907,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
       else
         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
-                           DAG.getConstant(ArgOffset, PtrVT));
+                           DAG.getConstant(ArgOffset, dl, PtrVT));
     }
     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
                                        MachinePointerInfo(), false, false, 0));
@@ -3575,8 +3938,8 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
                                         isPPC64, isDarwinABI, dl);
 
   // Emit callseq_end just before tailcall node.
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(0, true), InFlag, dl);
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+                             DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
   InFlag = Chain.getValue(1);
 }
 
@@ -3596,11 +3959,11 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
 
 static
 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
-                     SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
-                     bool IsPatchPoint,
+                     SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
+                     bool isTailCall, bool IsPatchPoint,
                      SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
                      SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
-                     const PPCSubtarget &Subtarget) {
+                     ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
 
   bool isPPC64 = Subtarget.isPPC64();
   bool isSVR4ABI = Subtarget.isSVR4ABI();
@@ -3701,50 +4064,51 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
       //   6. On return of the callee, the TOC of the caller needs to be
       //      restored (this is done in FinishCall()).
       //
-      // All those operations are flagged together to ensure that no other
+      // The loads are scheduled at the beginning of the call sequence, and the
+      // register copies are flagged together to ensure that no other
       // operations can be scheduled in between. E.g. without flagging the
-      // operations together, a TOC access in the caller could be scheduled
-      // between the load of the callee TOC and the branch to the callee, which
+      // copies together, a TOC access in the caller could be scheduled between
+      // the assignment of the callee TOC and the branch to the callee, which
       // results in the TOC access going through the TOC of the callee instead
       // of going through the TOC of the caller, which leads to incorrect code.
 
       // Load the address of the function entry point from the function
       // descriptor.
-      SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
-      SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs,
-                              makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
-      Chain = LoadFuncPtr.getValue(1);
-      InFlag = LoadFuncPtr.getValue(2);
+      SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
+      if (LDChain.getValueType() == MVT::Glue)
+        LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
 
-      // Load environment pointer into r11.
-      // Offset of the environment pointer within the function descriptor.
-      SDValue PtrOff = DAG.getIntPtrConstant(16);
+      bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors();
 
+      MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
+      SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
+                                        false, false, LoadsInv, 8);
+
+      // Load environment pointer into r11.
+      SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
-      SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
-                                       InFlag);
-      Chain = LoadEnvPtr.getValue(1);
-      InFlag = LoadEnvPtr.getValue(2);
+      SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr,
+                                       MPI.getWithOffset(16), false, false,
+                                       LoadsInv, 8);
+
+      SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
+      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
+      SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC,
+                                   MPI.getWithOffset(8), false, false,
+                                   LoadsInv, 8);
+
+      setUsesTOCBasePtr(DAG);
+      SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
+                                        InFlag);
+      Chain = TOCVal.getValue(0);
+      InFlag = TOCVal.getValue(1);
 
       SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
                                         InFlag);
+
       Chain = EnvVal.getValue(0);
       InFlag = EnvVal.getValue(1);
 
-      // Load TOC of the callee into r2. We are using a target-specific load
-      // with r2 hard coded, because the result of a target-independent load
-      // would never go directly into r2, since r2 is a reserved register (which
-      // prevents the register allocator from allocating it), resulting in an
-      // additional register being allocated and an unnecessary move instruction
-      // being generated.
-      VTs = DAG.getVTList(MVT::Other, MVT::Glue);
-      SDValue TOCOff = DAG.getIntPtrConstant(8);
-      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
-      SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
-                                       AddTOC, InFlag);
-      Chain = LoadTOCPtr.getValue(0);
-      InFlag = LoadTOCPtr.getValue(1);
-
       MTCTROps[0] = Chain;
       MTCTROps[1] = LoadFuncPtr;
       MTCTROps[2] = InFlag;
@@ -3772,27 +4136,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
   if (Callee.getNode()) {
     Ops.push_back(Chain);
     Ops.push_back(Callee);
-
-    // If this is a call to __tls_get_addr, find the symbol whose address
-    // is to be taken and add it to the list.  This will be used to 
-    // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld).
-    // We find the symbol by walking the chain to the CopyFromReg, walking
-    // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
-    // pulling the symbol from that node.
-    if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
-      if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
-        assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
-        SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
-        SDValue TGTAddr = AddI->getOperand(1);
-        assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
-               "Didn't find target global TLS address where we expected one");
-        Ops.push_back(TGTAddr);
-        CallOpc = PPCISD::CALL_TLS;
-      }
   }
   // If this is a tail call add stack pointer delta.
   if (isTailCall)
-    Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
+    Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
 
   // Add argument registers to the end of the list so that they are known live
   // into the call.
@@ -3800,9 +4147,12 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                   RegsToPass[i].second.getValueType()));
 
-  // Direct calls in the ELFv2 ABI need the TOC register live into the call.
-  if (Callee.getNode() && isELFv2ABI && !IsPatchPoint)
+  // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
+  // into the call.
+  if (isSVR4ABI && isPPC64 && !IsPatchPoint) {
+    setUsesTOCBasePtr(DAG);
     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+  }
 
   return CallOpc;
 }
@@ -3869,17 +4219,17 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
                               SmallVector<std::pair<unsigned, SDValue>, 8>
                                 &RegsToPass,
                               SDValue InFlag, SDValue Chain,
-                              SDValue &Callee,
+                              SDValue CallSeqStart, SDValue &Callee,
                               int SPDiff, unsigned NumBytes,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
-                              SmallVectorImpl<SDValue> &InVals) const {
+                              SmallVectorImpl<SDValue> &InVals,
+                              ImmutableCallSite *CS) const {
 
-  bool isELFv2ABI = Subtarget.isELFv2ABI();
   std::vector<EVT> NodeTys;
   SmallVector<SDValue, 8> Ops;
-  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
-                                 isTailCall, IsPatchPoint, RegsToPass, Ops,
-                                 NodeTys, Subtarget);
+  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
+                                 SPDiff, isTailCall, IsPatchPoint, RegsToPass,
+                                 Ops, NodeTys, CS, Subtarget);
 
   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
@@ -3893,9 +4243,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
 
   // Add a register mask operand representing the call-preserved registers.
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  const uint32_t *Mask =
+      TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
 
@@ -3911,6 +4261,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
             isa<ConstantSDNode>(Callee)) &&
     "Expecting an global address, external symbol, absolute value or register");
 
+    DAG.getMachineFunction().getFrameInfo()->setHasTailCall();
     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
   }
 
@@ -3939,8 +4290,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
 
       EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
-      unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
-      SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
+      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+      SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
 
       // The address needs to go after the chain input but before the flag (or
@@ -3948,19 +4299,16 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
       Ops.insert(std::next(Ops.begin()), AddTOC);
     } else if ((CallOpc == PPCISD::CALL) &&
                (!isLocalCall(Callee) ||
-                DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
+                DAG.getTarget().getRelocationModel() == Reloc::PIC_))
       // Otherwise insert NOP for non-local calls.
       CallOpc = PPCISD::CALL_NOP;
-    } else if (CallOpc == PPCISD::CALL_TLS)
-      // For 64-bit SVR4, TLS calls are always non-local.
-      CallOpc = PPCISD::CALL_NOP_TLS;
   }
 
   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
   InFlag = Chain.getValue(1);
 
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(BytesCalleePops, true),
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+                             DAG.getIntPtrConstant(BytesCalleePops, dl, true),
                              InFlag, dl);
   if (!Ins.empty())
     InFlag = Chain.getValue(1);
@@ -3983,12 +4331,13 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   CallingConv::ID CallConv              = CLI.CallConv;
   bool isVarArg                         = CLI.IsVarArg;
   bool IsPatchPoint                     = CLI.IsPatchPoint;
+  ImmutableCallSite *CS                 = CLI.CS;
 
   if (isTailCall)
     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
                                                    Ins, DAG);
 
-  if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+  if (!isTailCall && CS && CS->isMustTailCall())
     report_fatal_error("failed to perform tail call elimination on a call "
                        "site marked musttail");
 
@@ -3996,16 +4345,16 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     if (Subtarget.isPPC64())
       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
                               isTailCall, IsPatchPoint, Outs, OutVals, Ins,
-                              dl, DAG, InVals);
+                              dl, DAG, InVals, CS);
     else
       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
                               isTailCall, IsPatchPoint, Outs, OutVals, Ins,
-                              dl, DAG, InVals);
+                              dl, DAG, InVals, CS);
   }
 
   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
                           isTailCall, IsPatchPoint, Outs, OutVals, Ins,
-                          dl, DAG, InVals);
+                          dl, DAG, InVals, CS);
 }
 
 SDValue
@@ -4016,7 +4365,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
                                     const SmallVectorImpl<SDValue> &OutVals,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     SDLoc dl, SelectionDAG &DAG,
-                                    SmallVectorImpl<SDValue> &InVals) const {
+                                    SmallVectorImpl<SDValue> &InVals,
+                                    ImmutableCallSite *CS) const {
   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
   // of the 32-bit SVR4 ABI stack frame layout.
 
@@ -4046,7 +4396,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
                  *DAG.getContext());
 
   // Reserve space for the linkage area on the stack.
-  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),
+  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
                        PtrByteSize);
 
   if (isVarArg) {
@@ -4102,7 +4452,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
 
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
                                dl);
   SDValue CallSeqStart = Chain;
 
@@ -4142,7 +4492,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
       // Memory reserved in the local variable space of the callers stack frame.
       unsigned LocMemOffset = ByValVA.getLocMemOffset();
 
-      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
 
       // Create a copy of the argument in the local area of the current
@@ -4179,7 +4529,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
       unsigned LocMemOffset = VA.getLocMemOffset();
 
       if (!isTailCall) {
-        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
 
         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
@@ -4222,8 +4572,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
                     false, TailCallArguments);
 
   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
-                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
-                    Ins, InVals);
+                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+                    NumBytes, Ins, InVals, CS);
 }
 
 // Copy an argument into memory, being careful to do this outside the
@@ -4254,7 +4604,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                                     const SmallVectorImpl<SDValue> &OutVals,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     SDLoc dl, SelectionDAG &DAG,
-                                    SmallVectorImpl<SDValue> &InVals) const {
+                                    SmallVectorImpl<SDValue> &InVals,
+                                    ImmutableCallSite *CS) const {
 
   bool isELFv2ABI = Subtarget.isELFv2ABI();
   bool isLittleEndian = Subtarget.isLittleEndian();
@@ -4274,13 +4625,39 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
       CallConv == CallingConv::Fast)
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
 
+  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+         "fastcc not supported on varargs functions");
+
   // Count how many bytes are to be pushed on the stack, including the linkage
   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
-                                                          isELFv2ABI);
+  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
   unsigned NumBytes = LinkageSize;
+  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+  unsigned &QFPR_idx = FPR_idx;
+
+  static const MCPhysReg GPR[] = {
+    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+  };
+  static const MCPhysReg VR[] = {
+    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+  };
+  static const MCPhysReg VSRH[] = {
+    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+  };
+
+  const unsigned NumGPRs = array_lengthof(GPR);
+  const unsigned NumFPRs = 13;
+  const unsigned NumVRs  = array_lengthof(VR);
+  const unsigned NumQFPRs = NumFPRs;
+
+  // When using the fast calling convention, we don't provide backing for
+  // arguments that will be in registers.
+  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
 
   // Add up all the space actually used.
   for (unsigned i = 0; i != NumOps; ++i) {
@@ -4288,6 +4665,48 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
     EVT ArgVT = Outs[i].VT;
     EVT OrigVT = Outs[i].ArgVT;
 
+    if (CallConv == CallingConv::Fast) {
+      if (Flags.isByVal())
+        NumGPRsUsed += (Flags.getByValSize()+7)/8;
+      else
+        switch (ArgVT.getSimpleVT().SimpleTy) {
+        default: llvm_unreachable("Unexpected ValueType for argument!");
+        case MVT::i1:
+        case MVT::i32:
+        case MVT::i64:
+          if (++NumGPRsUsed <= NumGPRs)
+            continue;
+          break;
+        case MVT::v4i32:
+        case MVT::v8i16:
+        case MVT::v16i8:
+        case MVT::v2f64:
+        case MVT::v2i64:
+        case MVT::v1i128:
+          if (++NumVRsUsed <= NumVRs)
+            continue;
+          break;
+        case MVT::v4f32:
+	  // When using QPX, this is handled like a FP register, otherwise, it
+	  // is an Altivec register.
+          if (Subtarget.hasQPX()) {
+            if (++NumFPRsUsed <= NumFPRs)
+              continue;
+          } else {
+            if (++NumVRsUsed <= NumVRs)
+              continue;
+          }
+          break;
+        case MVT::f32:
+        case MVT::f64:
+        case MVT::v4f64: // QPX
+        case MVT::v4i1:  // QPX
+          if (++NumFPRsUsed <= NumFPRs)
+            continue;
+          break;
+        }
+    }
+
     /* Respect alignment of argument on the stack.  */
     unsigned Align =
       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
@@ -4311,7 +4730,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
   // Tail call needs the stack to be aligned.
   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
       CallConv == CallingConv::Fast)
-    NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
+    NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
 
   // Calculate by how many bytes the stack has to be adjusted in case of tail
   // call optimization.
@@ -4324,7 +4743,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
 
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
                                dl);
   SDValue CallSeqStart = Chain;
 
@@ -4344,26 +4763,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
   // must be stored to our stack, and loaded into integer regs as well, if
   // any integer regs are available for argument passing.
   unsigned ArgOffset = LinkageSize;
-  unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
-
-  static const MCPhysReg GPR[] = {
-    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
-    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
-  };
-  static const MCPhysReg *FPR = GetFPR();
-
-  static const MCPhysReg VR[] = {
-    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
-    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
-  };
-  static const MCPhysReg VSRH[] = {
-    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
-    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
-  };
-
-  const unsigned NumGPRs = array_lengthof(GPR);
-  const unsigned NumFPRs = 13;
-  const unsigned NumVRs  = array_lengthof(VR);
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
@@ -4375,22 +4774,31 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
     EVT ArgVT = Outs[i].VT;
     EVT OrigVT = Outs[i].ArgVT;
 
-    /* Respect alignment of argument on the stack.  */
-    unsigned Align =
-      CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
-    ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
-
-    /* Compute GPR index associated with argument offset.  */
-    GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
-    GPR_idx = std::min(GPR_idx, NumGPRs);
-
     // PtrOff will be used to store the current argument to the stack if a
     // register cannot be found for it.
     SDValue PtrOff;
 
-    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+    // We re-align the argument offset for each argument, except when using the
+    // fast calling convention, when we need to make sure we do that only when
+    // we'll actually use a stack slot.
+    auto ComputePtrOff = [&]() {
+      /* Respect alignment of argument on the stack.  */
+      unsigned Align =
+        CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+      ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
 
-    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+      PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
+
+      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+    };
+
+    if (CallConv != CallingConv::Fast) {
+      ComputePtrOff();
+
+      /* Compute GPR index associated with argument offset.  */
+      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+      GPR_idx = std::min(GPR_idx, NumGPRs);
+    }
 
     // Promote integers to 64-bit values.
     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
@@ -4415,6 +4823,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
       if (Size == 0)
         continue;
 
+      if (CallConv == CallingConv::Fast)
+        ComputePtrOff();
+
       // All aggregates smaller than 8 bytes must be passed right-justified.
       if (Size==1 || Size==2 || Size==4) {
         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -4423,7 +4834,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                                         MachinePointerInfo(), VT,
                                         false, false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
-          RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
 
           ArgOffset += PtrByteSize;
           continue;
@@ -4433,7 +4844,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
       if (GPR_idx == NumGPRs && Size < 8) {
         SDValue AddPtr = PtrOff;
         if (!isLittleEndian) {
-          SDValue Const = DAG.getConstant(PtrByteSize - Size,
+          SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
                                           PtrOff.getValueType());
           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
         }
@@ -4473,7 +4884,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
         // parameter save area instead of a new local variable.
         SDValue AddPtr = PtrOff;
         if (!isLittleEndian) {
-          SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+          SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
         }
         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
@@ -4485,7 +4896,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                                    MachinePointerInfo(),
                                    false, false, false, 0);
         MemOpChains.push_back(Load.getValue(1));
-        RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
+        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
 
         // Done with this argument.
         ArgOffset += PtrByteSize;
@@ -4495,7 +4906,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
       // For aggregates larger than PtrByteSize, copy the pieces of the
       // object that fit into registers from the parameter save area.
       for (unsigned j=0; j<Size; j+=PtrByteSize) {
-        SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+        SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
         if (GPR_idx != NumGPRs) {
           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
@@ -4521,13 +4932,19 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
       // passed directly.  Clang may use those instead of "byval" aggregate
       // types to avoid forcing arguments to memory unnecessarily.
       if (GPR_idx != NumGPRs) {
-        RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
+        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
       } else {
+        if (CallConv == CallingConv::Fast)
+          ComputePtrOff();
+
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          true, isTailCall, false, MemOpChains,
                          TailCallArguments, dl);
+        if (CallConv == CallingConv::Fast)
+          ArgOffset += PtrByteSize;
       }
-      ArgOffset += PtrByteSize;
+      if (CallConv != CallingConv::Fast)
+        ArgOffset += PtrByteSize;
       break;
     case MVT::f32:
     case MVT::f64: {
@@ -4541,6 +4958,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
       // then the parameter save area.  For now, put all arguments to vararg
       // routines always in both locations (FPR *and* GPR or stack slot).
       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
+      bool NeededLoad = false;
 
       // First load the argument into the next available FPR.
       if (FPR_idx != NumFPRs)
@@ -4549,7 +4967,10 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
       // Next, load the argument into GPR or stack slot if needed.
       if (!NeedGPROrStack)
         ;
-      else if (GPR_idx != NumGPRs) {
+      else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
+        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
+        // once we support fp <-> gpr moves.
+
         // In the non-vararg case, this can only ever happen in the
         // presence of f32 array types, since otherwise we never run
         // out of FPRs before running out of GPRs.
@@ -4580,7 +5001,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
           if (!isLittleEndian)
             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
-                                 DAG.getConstant(32, MVT::i32));
+                                 DAG.getConstant(32, dl, MVT::i32));
 
         // Non-final even elements are skipped; they will be handled
         // together the with subsequent argument on the next go-around.
@@ -4588,27 +5009,34 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
           ArgVal = SDValue();
 
         if (ArgVal.getNode())
-          RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
       } else {
+        if (CallConv == CallingConv::Fast)
+          ComputePtrOff();
+
         // Single-precision floating-point values are mapped to the
         // second (rightmost) word of the stack doubleword.
         if (Arg.getValueType() == MVT::f32 &&
             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
-          SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+          SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
         }
 
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          true, isTailCall, false, MemOpChains,
                          TailCallArguments, dl);
+
+        NeededLoad = true;
       }
       // When passing an array of floats, the array occupies consecutive
       // space in the argument area; only round up to the next doubleword
       // at the end of the array.  Otherwise, each float takes 8 bytes.
-      ArgOffset += (Arg.getValueType() == MVT::f32 &&
-                    Flags.isInConsecutiveRegs()) ? 4 : 8;
-      if (Flags.isInConsecutiveRegsLast())
-        ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+      if (CallConv != CallingConv::Fast || NeededLoad) {
+        ArgOffset += (Arg.getValueType() == MVT::f32 &&
+                      Flags.isInConsecutiveRegs()) ? 4 : 8;
+        if (Flags.isInConsecutiveRegsLast())
+          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+      }
       break;
     }
     case MVT::v4f32:
@@ -4617,6 +5045,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
     case MVT::v16i8:
     case MVT::v2f64:
     case MVT::v2i64:
+    case MVT::v1i128:
+      if (!Subtarget.hasQPX()) {
       // These can be scalar arguments or elements of a vector array type
       // passed directly.  The latter are used to implement ELFv2 homogenous
       // vector aggregates.
@@ -4649,7 +5079,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
           if (GPR_idx == NumGPRs)
             break;
           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
-                                  DAG.getConstant(i, PtrVT));
+                                   DAG.getConstant(i, dl, PtrVT));
           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
                                      false, false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
@@ -4667,12 +5097,73 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
 
         RegsToPass.push_back(std::make_pair(VReg, Arg));
       } else {
+        if (CallConv == CallingConv::Fast)
+          ComputePtrOff();
+
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          true, isTailCall, true, MemOpChains,
                          TailCallArguments, dl);
+        if (CallConv == CallingConv::Fast)
+          ArgOffset += 16;
       }
-      ArgOffset += 16;
+
+      if (CallConv != CallingConv::Fast)
+        ArgOffset += 16;
       break;
+      } // not QPX
+
+      assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
+             "Invalid QPX parameter type");
+
+      /* fall through */
+    case MVT::v4f64:
+    case MVT::v4i1: {
+      bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
+      if (isVarArg) {
+        // We could elide this store in the case where the object fits
+        // entirely in R registers.  Maybe later.
+        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+                                     MachinePointerInfo(), false, false, 0);
+        MemOpChains.push_back(Store);
+        if (QFPR_idx != NumQFPRs) {
+          SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl,
+                                     Store, PtrOff, MachinePointerInfo(),
+                                     false, false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
+        }
+        ArgOffset += (IsF32 ? 16 : 32);
+        for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
+          if (GPR_idx == NumGPRs)
+            break;
+          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+                                   DAG.getConstant(i, dl, PtrVT));
+          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+                                     false, false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+        }
+        break;
+      }
+
+      // Non-varargs QPX params go into registers or on the stack.
+      if (QFPR_idx != NumQFPRs) {
+        RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
+      } else {
+        if (CallConv == CallingConv::Fast)
+          ComputePtrOff();
+
+        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+                         true, isTailCall, true, MemOpChains,
+                         TailCallArguments, dl);
+        if (CallConv == CallingConv::Fast)
+          ArgOffset += (IsF32 ? 16 : 32);
+      }
+
+      if (CallConv != CallingConv::Fast)
+        ArgOffset += (IsF32 ? 16 : 32);
+      break;
+      }
     }
   }
 
@@ -4689,12 +5180,14 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
       !isFunctionGlobalAddress(Callee) &&
       !isa<ExternalSymbolSDNode>(Callee)) {
     // Load r2 into a virtual register and store it to the TOC save area.
+    setUsesTOCBasePtr(DAG);
     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
     // TOC save area offset.
-    unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
-    SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
+    unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+    SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
+    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
+                         MachinePointerInfo::getStack(TOCSaveOffset),
                          false, false, 0);
     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
     // This does not mean the MTCTR instruction must use R12; it's easier
@@ -4717,8 +5210,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                     FPOp, true, TailCallArguments);
 
   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
-                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
-                    Ins, InVals);
+                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+                    NumBytes, Ins, InVals, CS);
 }
 
 SDValue
@@ -4729,7 +5222,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
                                     const SmallVectorImpl<SDValue> &OutVals,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     SDLoc dl, SelectionDAG &DAG,
-                                    SmallVectorImpl<SDValue> &InVals) const {
+                                    SmallVectorImpl<SDValue> &InVals,
+                                    ImmutableCallSite *CS) const {
 
   unsigned NumOps = Outs.size();
 
@@ -4751,8 +5245,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
   // Count how many bytes are to be pushed on the stack, including the linkage
   // area, and parameter passing area.  We start with 24/48 bytes, which is
   // prereserved space for [SP][CR][LR][3 x unused].
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
-                                                          false);
+  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
   unsigned NumBytes = LinkageSize;
 
   // Add up all the space actually used.
@@ -4797,7 +5290,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
   // Tail call needs the stack to be aligned.
   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
       CallConv == CallingConv::Fast)
-    NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
+    NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
 
   // Calculate by how many bytes the stack has to be adjusted in case of tail
   // call optimization.
@@ -4810,7 +5303,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
 
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
                                dl);
   SDValue CallSeqStart = Chain;
 
@@ -4844,8 +5337,6 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
-  static const MCPhysReg *FPR = GetFPR();
-
   static const MCPhysReg VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -4868,7 +5359,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
     // register cannot be found for it.
     SDValue PtrOff;
 
-    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+    PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
 
     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
 
@@ -4897,7 +5388,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
 
           ArgOffset += PtrByteSize;
         } else {
-          SDValue Const = DAG.getConstant(PtrByteSize - Size,
+          SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
                                           PtrOff.getValueType());
           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
@@ -4918,7 +5409,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
       // copy the pieces of the object that fit into registers from the
       // parameter save area.
       for (unsigned j=0; j<Size; j+=PtrByteSize) {
-        SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+        SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
         if (GPR_idx != NumGPRs) {
           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
@@ -4971,7 +5462,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
-            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+            SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
                                        MachinePointerInfo(),
@@ -5016,7 +5507,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         // We could elide this store in the case where the object fits
         // entirely in R registers.  Maybe later.
         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
-                            DAG.getConstant(ArgOffset, PtrVT));
+                             DAG.getConstant(ArgOffset, dl, PtrVT));
         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
                                      MachinePointerInfo(), false, false, 0);
         MemOpChains.push_back(Store);
@@ -5032,7 +5523,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
           if (GPR_idx == NumGPRs)
             break;
           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
-                                  DAG.getConstant(i, PtrVT));
+                                   DAG.getConstant(i, dl, PtrVT));
           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
                                      false, false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
@@ -5110,8 +5601,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
                     FPOp, true, TailCallArguments);
 
   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
-                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
-                    Ins, InVals);
+                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+                    NumBytes, Ins, InVals, CS);
 }
 
 bool
@@ -5210,7 +5701,6 @@ SDValue
 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool isPPC64 = Subtarget.isPPC64();
-  bool isDarwinABI = Subtarget.isDarwinABI();
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
   // Get current frame pointer save index.  The users of this index will be
@@ -5221,7 +5711,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
   // If the frame pointer save index hasn't been defined yet.
   if (!RASI) {
     // Find out what the fix offset of the frame pointer save area.
-    int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+    int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
     // Allocate the frame index for frame pointer save area.
     RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
     // Save the result.
@@ -5234,7 +5724,6 @@ SDValue
 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool isPPC64 = Subtarget.isPPC64();
-  bool isDarwinABI = Subtarget.isDarwinABI();
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
   // Get current frame pointer save index.  The users of this index will be
@@ -5245,9 +5734,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
   // If the frame pointer save index hasn't been defined yet.
   if (!FPSI) {
     // Find out what the fix offset of the frame pointer save area.
-    int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
-                                                           isDarwinABI);
-
+    int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
     // Allocate the frame index for frame pointer save area.
     FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
     // Save the result.
@@ -5268,7 +5755,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   // Negate the size.
   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
-                                  DAG.getConstant(0, PtrVT), Size);
+                                DAG.getConstant(0, dl, PtrVT), Size);
   // Construct a node for the frame pointer save index.
   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
   // Build a DYNALLOC node.
@@ -5293,6 +5780,9 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
 }
 
 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  if (Op.getValueType().isVector())
+    return LowerVectorLoad(Op, DAG);
+
   assert(Op.getValueType() == MVT::i1 &&
          "Custom lowering only for i1 loads");
 
@@ -5314,6 +5804,9 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 }
 
 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  if (Op.getOperand(1).getValueType().isVector())
+    return LowerVectorStore(Op, DAG);
+
   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
          "Custom lowering only for i1 stores");
 
@@ -5453,10 +5946,11 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
   switch (Op.getSimpleValueType().SimpleTy) {
   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
   case MVT::i32:
-    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
-                        (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ :
-                                                   PPCISD::FCTIDZ),
-                      dl, MVT::f64, Src);
+    Tmp = DAG.getNode(
+        Op.getOpcode() == ISD::FP_TO_SINT
+            ? PPCISD::FCTIWZ
+            : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
+        dl, MVT::f64, Src);
     break;
   case MVT::i64:
     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
@@ -5491,7 +5985,7 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
   // add in a bias.
   if (Op.getValueType() == MVT::i32 && !i32Stack) {
     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
-                        DAG.getConstant(4, FIPtr.getValueType()));
+                        DAG.getConstant(4, dl, FIPtr.getValueType()));
     MPI = MPI.getWithOffset(4);
   }
 
@@ -5500,8 +5994,46 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
   RLI.MPI = MPI;
 }
 
+/// \brief Custom lowers floating point to integer conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
+                                                    SelectionDAG &DAG,
+                                                    SDLoc dl) const {
+  assert(Op.getOperand(0).getValueType().isFloatingPoint());
+  SDValue Src = Op.getOperand(0);
+
+  if (Src.getValueType() == MVT::f32)
+    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+  SDValue Tmp;
+  switch (Op.getSimpleValueType().SimpleTy) {
+  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+  case MVT::i32:
+    Tmp = DAG.getNode(
+        Op.getOpcode() == ISD::FP_TO_SINT
+            ? PPCISD::FCTIWZ
+            : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
+        dl, MVT::f64, Src);
+    Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
+    break;
+  case MVT::i64:
+    assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
+           "i64 FP_TO_UINT is supported only with FPCVT");
+    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+                                                        PPCISD::FCTIDUZ,
+                      dl, MVT::f64, Src);
+    Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
+    break;
+  }
+  return Tmp;
+}
+
 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
                                           SDLoc dl) const {
+  if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
+    return LowerFP_TO_INTDirectMove(Op, DAG, dl);
+
   ReuseLoadInfo RLI;
   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
 
@@ -5579,30 +6111,92 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
 }
 
+/// \brief Custom lowers integer to floating point conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
+                                                    SelectionDAG &DAG,
+                                                    SDLoc dl) const {
+  assert((Op.getValueType() == MVT::f32 ||
+          Op.getValueType() == MVT::f64) &&
+         "Invalid floating point type as target of conversion");
+  assert(Subtarget.hasFPCVT() &&
+         "Int to FP conversions with direct moves require FPCVT");
+  SDValue FP;
+  SDValue Src = Op.getOperand(0);
+  bool SinglePrec = Op.getValueType() == MVT::f32;
+  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
+  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
+  unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
+                             (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
+
+  if (WordInt) {
+    FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
+                     dl, MVT::f64, Src);
+    FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+  }
+  else {
+    FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
+    FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+  }
+
+  return FP;
+}
+
 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                                           SelectionDAG &DAG) const {
   SDLoc dl(Op);
+
+  if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
+    if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
+      return SDValue();
+
+    SDValue Value = Op.getOperand(0);
+    // The values are now known to be -1 (false) or 1 (true). To convert this
+    // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+    // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+    Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+  
+    SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
+    FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+                          FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+  
+    Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+    if (Op.getValueType() != MVT::v4f64)
+      Value = DAG.getNode(ISD::FP_ROUND, dl,
+                          Op.getValueType(), Value,
+                          DAG.getIntPtrConstant(1, dl));
+    return Value;
+  }
+
   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
     return SDValue();
 
   if (Op.getOperand(0).getValueType() == MVT::i1)
     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
-                       DAG.getConstantFP(1.0, Op.getValueType()),
-                       DAG.getConstantFP(0.0, Op.getValueType()));
+                       DAG.getConstantFP(1.0, dl, Op.getValueType()),
+                       DAG.getConstantFP(0.0, dl, Op.getValueType()));
+
+  // If we have direct moves, we can do all the conversion, skip the store/load
+  // however, without FPCVT we can't do most conversions.
+  if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT())
+    return LowerINT_TO_FPDirectMove(Op, DAG, dl);
 
   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
          "UINT_TO_FP is supported only with FPCVT");
 
   // If we have FCFIDS, then use it when converting to single-precision.
   // Otherwise, convert to double-precision and then round.
-  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
-                   (Op.getOpcode() == ISD::UINT_TO_FP ?
-                    PPCISD::FCFIDUS : PPCISD::FCFIDS) :
-                   (Op.getOpcode() == ISD::UINT_TO_FP ?
-                    PPCISD::FCFIDU : PPCISD::FCFID);
-  MVT      FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
-                   MVT::f32 : MVT::f64;
+  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+                       ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
+                                                            : PPCISD::FCFIDS)
+                       : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
+                                                            : PPCISD::FCFID);
+  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+                  ? MVT::f32
+                  : MVT::f64;
 
   if (Op.getOperand(0).getValueType() == MVT::i64) {
     SDValue SINT = Op.getOperand(0);
@@ -5627,12 +6221,12 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
       // bit 12 (value 2048) is set instead, so that the final rounding
       // to single-precision gets the correct result.
       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
-                                  SINT, DAG.getConstant(2047, MVT::i64));
+                                  SINT, DAG.getConstant(2047, dl, MVT::i64));
       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
-                          Round, DAG.getConstant(2047, MVT::i64));
+                          Round, DAG.getConstant(2047, dl, MVT::i64));
       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
-                          Round, DAG.getConstant(-2048, MVT::i64));
+                          Round, DAG.getConstant(-2048, dl, MVT::i64));
 
       // However, we cannot use that value unconditionally: if the magnitude
       // of the input value is small, the bit-twiddling we did above might
@@ -5643,11 +6237,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
       // bits are all sign-bit copies, and use the rounded value computed
       // above otherwise.
       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
-                                 SINT, DAG.getConstant(53, MVT::i32));
+                                 SINT, DAG.getConstant(53, dl, MVT::i32));
       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
-                         Cond, DAG.getConstant(1, MVT::i64));
+                         Cond, DAG.getConstant(1, dl, MVT::i64));
       Cond = DAG.getSetCC(dl, MVT::i32,
-                          Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
+                          Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
 
       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
     }
@@ -5720,7 +6314,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
 
     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
       FP = DAG.getNode(ISD::FP_ROUND, dl,
-                       MVT::f32, FP, DAG.getIntPtrConstant(0));
+                       MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
     return FP;
   }
 
@@ -5790,7 +6384,8 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
   // FCFID it and return it.
   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
-    FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
+    FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
+                     DAG.getIntPtrConstant(0, dl));
   return FP;
 }
 
@@ -5834,7 +6429,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
                                StackSlot, MachinePointerInfo(), false, false,0);
 
   // Load FP Control Word from low 32 bits of stack slot.
-  SDValue Four = DAG.getConstant(4, PtrVT);
+  SDValue Four = DAG.getConstant(4, dl, PtrVT);
   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
                             false, false, false, 0);
@@ -5842,14 +6437,14 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   // Transform as necessary
   SDValue CWD1 =
     DAG.getNode(ISD::AND, dl, MVT::i32,
-                CWD, DAG.getConstant(3, MVT::i32));
+                CWD, DAG.getConstant(3, dl, MVT::i32));
   SDValue CWD2 =
     DAG.getNode(ISD::SRL, dl, MVT::i32,
                 DAG.getNode(ISD::AND, dl, MVT::i32,
                             DAG.getNode(ISD::XOR, dl, MVT::i32,
-                                        CWD, DAG.getConstant(3, MVT::i32)),
-                            DAG.getConstant(3, MVT::i32)),
-                DAG.getConstant(1, MVT::i32));
+                                        CWD, DAG.getConstant(3, dl, MVT::i32)),
+                            DAG.getConstant(3, dl, MVT::i32)),
+                DAG.getConstant(1, dl, MVT::i32));
 
   SDValue RetVal =
     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
@@ -5874,12 +6469,12 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
   EVT AmtVT = Amt.getValueType();
 
   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
-                             DAG.getConstant(BitWidth, AmtVT), Amt);
+                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
-                             DAG.getConstant(-BitWidth, AmtVT));
+                             DAG.getConstant(-BitWidth, dl, AmtVT));
   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
@@ -5903,12 +6498,12 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
   EVT AmtVT = Amt.getValueType();
 
   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
-                             DAG.getConstant(BitWidth, AmtVT), Amt);
+                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
-                             DAG.getConstant(-BitWidth, AmtVT));
+                             DAG.getConstant(-BitWidth, dl, AmtVT));
   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
@@ -5931,15 +6526,15 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
   EVT AmtVT = Amt.getValueType();
 
   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
-                             DAG.getConstant(BitWidth, AmtVT), Amt);
+                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
-                             DAG.getConstant(-BitWidth, AmtVT));
+                             DAG.getConstant(-BitWidth, dl, AmtVT));
   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
-  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
+  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
                                   Tmp4, Tmp6, ISD::SETLE);
   SDValue OutOps[] = { OutLo, OutHi };
   return DAG.getMergeValues(OutOps, dl);
@@ -5955,7 +6550,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
                              SelectionDAG &DAG, SDLoc dl) {
   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
 
-  static const EVT VTys[] = { // canonical VT to use for each size.
+  static const MVT VTys[] = { // canonical VT to use for each size.
     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
   };
 
@@ -5968,7 +6563,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
   EVT CanonicalVT = VTys[SplatSize-1];
 
   // Build a canonical splat for this value.
-  SDValue Elt = DAG.getConstant(Val, MVT::i32);
+  SDValue Elt = DAG.getConstant(Val, dl, MVT::i32);
   SmallVector<SDValue, 8> Ops;
   Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
   SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
@@ -5982,7 +6577,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
                                 EVT DestVT = MVT::Other) {
   if (DestVT == MVT::Other) DestVT = Op.getValueType();
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
-                     DAG.getConstant(IID, MVT::i32), Op);
+                     DAG.getConstant(IID, dl, MVT::i32), Op);
 }
 
 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
@@ -5992,7 +6587,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
                                 EVT DestVT = MVT::Other) {
   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
-                     DAG.getConstant(IID, MVT::i32), LHS, RHS);
+                     DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
 }
 
 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
@@ -6002,7 +6597,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
                                 SDLoc dl, EVT DestVT = MVT::Other) {
   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
-                     DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
+                     DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
 }
 
 
@@ -6032,12 +6627,134 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
 
+  if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
+    // We first build an i32 vector, load it into a QPX register,
+    // then convert it to a floating-point vector and compare it
+    // to a zero vector to get the boolean result.
+    MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+    int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+    MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+    EVT PtrVT = getPointerTy();
+    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+    assert(BVN->getNumOperands() == 4 &&
+      "BUILD_VECTOR for v4i1 does not have 4 operands");
+
+    bool IsConst = true;
+    for (unsigned i = 0; i < 4; ++i) {
+      if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+      if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
+        IsConst = false;
+        break;
+      }
+    }
+
+    if (IsConst) {
+      Constant *One =
+        ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
+      Constant *NegOne =
+        ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
+
+      SmallVector<Constant*, 4> CV(4, NegOne);
+      for (unsigned i = 0; i < 4; ++i) {
+        if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
+          CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
+        else if (cast<ConstantSDNode>(BVN->getOperand(i))->
+                   getConstantIntValue()->isZero())
+          continue;
+        else
+          CV[i] = One;
+      }
+
+      Constant *CP = ConstantVector::get(CV);
+      SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(),
+                      16 /* alignment */);
+ 
+      SmallVector<SDValue, 2> Ops;
+      Ops.push_back(DAG.getEntryNode());
+      Ops.push_back(CPIdx);
+
+      SmallVector<EVT, 2> ValueVTs;
+      ValueVTs.push_back(MVT::v4i1);
+      ValueVTs.push_back(MVT::Other); // chain
+      SDVTList VTs = DAG.getVTList(ValueVTs);
+
+      return DAG.getMemIntrinsicNode(PPCISD::QVLFSb,
+        dl, VTs, Ops, MVT::v4f32,
+        MachinePointerInfo::getConstantPool());
+    }
+
+    SmallVector<SDValue, 4> Stores;
+    for (unsigned i = 0; i < 4; ++i) {
+      if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+      unsigned Offset = 4*i;
+      SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
+      Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+      unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
+      if (StoreSize > 4) {
+        Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+                                           BVN->getOperand(i), Idx,
+                                           PtrInfo.getWithOffset(Offset),
+                                           MVT::i32, false, false, 0));
+      } else {
+        SDValue StoreValue = BVN->getOperand(i);
+        if (StoreSize < 4)
+          StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
+
+        Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
+                                      StoreValue, Idx,
+                                      PtrInfo.getWithOffset(Offset),
+                                      false, false, 0));
+      }
+    }
+
+    SDValue StoreChain;
+    if (!Stores.empty())
+      StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+    else
+      StoreChain = DAG.getEntryNode();
+
+    // Now load from v4i32 into the QPX register; this will extend it to
+    // v4i64 but not yet convert it to a floating point. Nevertheless, this
+    // is typed as v4f64 because the QPX register integer states are not
+    // explicitly represented.
+
+    SmallVector<SDValue, 2> Ops;
+    Ops.push_back(StoreChain);
+    Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32));
+    Ops.push_back(FIdx);
+
+    SmallVector<EVT, 2> ValueVTs;
+    ValueVTs.push_back(MVT::v4f64);
+    ValueVTs.push_back(MVT::Other); // chain
+    SDVTList VTs = DAG.getVTList(ValueVTs);
+
+    SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
+      dl, VTs, Ops, MVT::v4i32, PtrInfo);
+    LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+      DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
+      LoadedVect);
+
+    SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::f64);
+    FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+                          FPZeros, FPZeros, FPZeros, FPZeros);
+
+    return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
+  }
+
+  // All other QPX vectors are handled by generic code.
+  if (Subtarget.hasQPX())
+    return SDValue();
+
   // Check if this is a splat of a constant value.
   APInt APSplatBits, APSplatUndef;
   unsigned SplatBitSize;
   bool HasAnyUndefs;
   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
-                             HasAnyUndefs, 0, true) || SplatBitSize > 32)
+                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
+      SplatBitSize > 32)
     return SDValue();
 
   unsigned SplatBits = APSplatBits.getZExtValue();
@@ -6050,7 +6767,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   if (SplatBits == 0) {
     // Canonicalize all zero vectors to be v4i32.
     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
-      SDValue Z = DAG.getConstant(0, MVT::i32);
+      SDValue Z = DAG.getConstant(0, dl, MVT::i32);
       Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
     }
@@ -6077,10 +6794,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     // To avoid having these optimizations undone by constant folding,
     // we convert to a pseudo that will be expanded later into one of
     // the above forms.
-    SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
+    SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
-    SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32);
+    SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
     if (VT == Op.getValueType())
       return RetVal;
@@ -6104,22 +6821,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
   }
 
-  // The remaining cases assume either big endian element order or
-  // a splat-size that equates to the element size of the vector
-  // to be built.  An example that doesn't work for little endian is
-  // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
-  // and a vector element size of 16 bits.  The code below will
-  // produce the vector in big endian element order, which for little
-  // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
-
-  // For now, just avoid these optimizations in that case.
-  // FIXME: Develop correct optimizations for LE with mismatched
-  // splat and element sizes.
-
-  if (Subtarget.isLittleEndian() &&
-      SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
-    return SDValue();
-
   // Check to see if this is a wide variety of vsplti*, binop self cases.
   static const signed char SplatCsts[] = {
     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
@@ -6290,6 +6991,45 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   EVT VT = Op.getValueType();
   bool isLittleEndian = Subtarget.isLittleEndian();
 
+  if (Subtarget.hasQPX()) {
+    if (VT.getVectorNumElements() != 4)
+      return SDValue();
+
+    if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+    int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
+    if (AlignIdx != -1) {
+      return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
+                         DAG.getConstant(AlignIdx, dl, MVT::i32));
+    } else if (SVOp->isSplat()) {
+      int SplatIdx = SVOp->getSplatIndex();
+      if (SplatIdx >= 4) {
+        std::swap(V1, V2);
+        SplatIdx -= 4;
+      }
+
+      // FIXME: If SplatIdx == 0 and the input came from a load, then there is
+      // nothing to do.
+
+      return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
+                         DAG.getConstant(SplatIdx, dl, MVT::i32));
+    }
+
+    // Lower this into a qvgpci/qvfperm pair.
+
+    // Compute the qvgpci literal
+    unsigned idx = 0;
+    for (unsigned i = 0; i < 4; ++i) {
+      int m = SVOp->getMaskElt(i);
+      unsigned mm = m >= 0 ? (unsigned) m : i;
+      idx |= mm << (3-i)*3;
+    }
+
+    SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
+                             DAG.getConstant(idx, dl, MVT::i32));
+    return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
+  }
+
   // Cases that are handled by instructions that take permute immediates
   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
   // selected by the instruction selector.
@@ -6299,6 +7039,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
         PPC::isSplatShuffleMask(SVOp, 4) ||
         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
+        PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
@@ -6316,6 +7057,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+      PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
@@ -6401,10 +7143,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
 
     for (unsigned j = 0; j != BytesPerElement; ++j)
       if (isLittleEndian)
-        ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j),
-                                             MVT::i32));
+        ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
+                                             dl, MVT::i32));
       else
-        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
                                              MVT::i32));
   }
 
@@ -6422,7 +7164,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
 /// altivec comparison.  If it is, return true and fill in Opc/isDot with
 /// information about the intrinsic.
 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
-                                  bool &isDot) {
+                                  bool &isDot, const PPCSubtarget &Subtarget) {
   unsigned IntrinsicID =
     cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
   CompareOpc = -1;
@@ -6435,29 +7177,83 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
   case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpequd_p: 
+    if (Subtarget.hasP8Altivec()) {
+      CompareOpc = 199; 
+      isDot = 1; 
+    }
+    else 
+      return false;
+
+    break;
   case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtsd_p: 
+    if (Subtarget.hasP8Altivec()) {
+      CompareOpc = 967; 
+      isDot = 1; 
+    }
+    else 
+      return false;
+
+    break;
   case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtud_p: 
+    if (Subtarget.hasP8Altivec()) {
+      CompareOpc = 711; 
+      isDot = 1; 
+    }
+    else 
+      return false;
 
+    break;
+      
     // Normal Comparisons.
   case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpequd:
+    if (Subtarget.hasP8Altivec()) {
+      CompareOpc = 199; 
+      isDot = 0; 
+    }
+    else
+      return false;
+
+    break;
   case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtsd:   
+    if (Subtarget.hasP8Altivec()) {
+      CompareOpc = 967; 
+      isDot = 0; 
+    }
+    else
+      return false;
+
+    break;
   case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtud:   
+    if (Subtarget.hasP8Altivec()) {
+      CompareOpc = 711; 
+      isDot = 0; 
+    }
+    else
+      return false;
+
+    break;
   }
   return true;
 }
@@ -6471,14 +7267,14 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   SDLoc dl(Op);
   int CompareOpc;
   bool isDot;
-  if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+  if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
     return SDValue();    // Don't custom lower most intrinsics.
 
   // If this is a non-dot comparison, make the VCMP node and we are done.
   if (!isDot) {
     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
                               Op.getOperand(1), Op.getOperand(2),
-                              DAG.getConstant(CompareOpc, MVT::i32));
+                              DAG.getConstant(CompareOpc, dl, MVT::i32));
     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
   }
 
@@ -6486,7 +7282,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   SDValue Ops[] = {
     Op.getOperand(2),  // LHS
     Op.getOperand(3),  // RHS
-    DAG.getConstant(CompareOpc, MVT::i32)
+    DAG.getConstant(CompareOpc, dl, MVT::i32)
   };
   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
@@ -6518,15 +7314,15 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
   // Shift the bit into the low position.
   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
-                      DAG.getConstant(8-(3-BitNo), MVT::i32));
+                      DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
   // Isolate the bit.
   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
-                      DAG.getConstant(1, MVT::i32));
+                      DAG.getConstant(1, dl, MVT::i32));
 
   // If we are supposed to, toggle the bit.
   if (InvertBit)
     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
-                        DAG.getConstant(1, MVT::i32));
+                        DAG.getConstant(1, dl, MVT::i32));
   return Flags;
 }
 
@@ -6572,6 +7368,304 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                      false, false, false, 0);
 }
 
+SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  SDNode *N = Op.getNode();
+
+  assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
+         "Unknown extract_vector_elt type");
+
+  SDValue Value = N->getOperand(0);
+
+  // The first part of this is like the store lowering except that we don't
+  // need to track the chain.
+
+  // The values are now known to be -1 (false) or 1 (true). To convert this
+  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
+  // understand how to form the extending load.
+  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
+  FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+                        FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+
+  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 
+
+  // Now convert to an integer and store.
+  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+    DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
+    Value);
+
+  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+  EVT PtrVT = getPointerTy();
+  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+  SDValue StoreChain = DAG.getEntryNode();
+  SmallVector<SDValue, 2> Ops;
+  Ops.push_back(StoreChain);
+  Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32));
+  Ops.push_back(Value);
+  Ops.push_back(FIdx);
+
+  SmallVector<EVT, 2> ValueVTs;
+  ValueVTs.push_back(MVT::Other); // chain
+  SDVTList VTs = DAG.getVTList(ValueVTs);
+
+  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
+    dl, VTs, Ops, MVT::v4i32, PtrInfo);
+
+  // Extract the value requested.
+  unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+  SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
+  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+  SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
+                               PtrInfo.getWithOffset(Offset),
+                               false, false, false, 0);
+
+  if (!Subtarget.useCRBits())
+    return IntVal;
+
+  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
+}
+
+/// Lowering for QPX v4i1 loads
+SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+  SDValue LoadChain = LN->getChain();
+  SDValue BasePtr = LN->getBasePtr();
+
+  if (Op.getValueType() == MVT::v4f64 ||
+      Op.getValueType() == MVT::v4f32) {
+    EVT MemVT = LN->getMemoryVT();
+    unsigned Alignment = LN->getAlignment();
+
+    // If this load is properly aligned, then it is legal.
+    if (Alignment >= MemVT.getStoreSize())
+      return Op;
+
+    EVT ScalarVT = Op.getValueType().getScalarType(),
+        ScalarMemVT = MemVT.getScalarType();
+    unsigned Stride = ScalarMemVT.getStoreSize();
+
+    SmallVector<SDValue, 8> Vals, LoadChains;
+    for (unsigned Idx = 0; Idx < 4; ++Idx) {
+      SDValue Load;
+      if (ScalarVT != ScalarMemVT)
+        Load =
+          DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
+                         BasePtr,
+                         LN->getPointerInfo().getWithOffset(Idx*Stride),
+                         ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(),
+                         LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
+                         LN->getAAInfo());
+      else
+        Load =
+          DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
+                       LN->getPointerInfo().getWithOffset(Idx*Stride),
+                       LN->isVolatile(), LN->isNonTemporal(),
+                       LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
+                       LN->getAAInfo());
+
+      if (Idx == 0 && LN->isIndexed()) {
+        assert(LN->getAddressingMode() == ISD::PRE_INC &&
+               "Unknown addressing mode on vector load");
+        Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
+                                  LN->getAddressingMode());
+      }
+
+      Vals.push_back(Load);
+      LoadChains.push_back(Load.getValue(1));
+
+      BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                            DAG.getConstant(Stride, dl,
+                                            BasePtr.getValueType()));
+    }
+
+    SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+    SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+                                Op.getValueType(), Vals);
+
+    if (LN->isIndexed()) {
+      SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
+      return DAG.getMergeValues(RetOps, dl);
+    }
+
+    SDValue RetOps[] = { Value, TF };
+    return DAG.getMergeValues(RetOps, dl);
+  }
+
+  assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
+  assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
+
+  // To lower v4i1 from a byte array, we load the byte elements of the
+  // vector and then reuse the BUILD_VECTOR logic.
+
+  SmallVector<SDValue, 4> VectElmts, VectElmtChains;
+  for (unsigned i = 0; i < 4; ++i) {
+    SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
+    Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
+
+    VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD,
+                        dl, MVT::i32, LoadChain, Idx,
+                        LN->getPointerInfo().getWithOffset(i),
+                        MVT::i8 /* memory type */,
+                        LN->isVolatile(), LN->isNonTemporal(),
+                        LN->isInvariant(),
+                        1 /* alignment */, LN->getAAInfo()));
+    VectElmtChains.push_back(VectElmts[i].getValue(1));
+  }
+
+  LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
+  SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts);
+
+  SDValue RVals[] = { Value, LoadChain };
+  return DAG.getMergeValues(RVals, dl);
+}
+
+/// Lowering for QPX v4i1 stores
+SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
+  SDValue StoreChain = SN->getChain();
+  SDValue BasePtr = SN->getBasePtr();
+  SDValue Value = SN->getValue();
+
+  if (Value.getValueType() == MVT::v4f64 ||
+      Value.getValueType() == MVT::v4f32) {
+    EVT MemVT = SN->getMemoryVT();
+    unsigned Alignment = SN->getAlignment();
+
+    // If this store is properly aligned, then it is legal.
+    if (Alignment >= MemVT.getStoreSize())
+      return Op;
+
+    EVT ScalarVT = Value.getValueType().getScalarType(),
+        ScalarMemVT = MemVT.getScalarType();
+    unsigned Stride = ScalarMemVT.getStoreSize();
+
+    SmallVector<SDValue, 8> Stores;
+    for (unsigned Idx = 0; Idx < 4; ++Idx) {
+      SDValue Ex =
+        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
+                    DAG.getConstant(Idx, dl, getVectorIdxTy()));
+      SDValue Store;
+      if (ScalarVT != ScalarMemVT)
+        Store =
+          DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
+                            SN->getPointerInfo().getWithOffset(Idx*Stride),
+                            ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(),
+                            MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
+      else
+        Store =
+          DAG.getStore(StoreChain, dl, Ex, BasePtr,
+                       SN->getPointerInfo().getWithOffset(Idx*Stride),
+                       SN->isVolatile(), SN->isNonTemporal(),
+                       MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
+
+      if (Idx == 0 && SN->isIndexed()) {
+        assert(SN->getAddressingMode() == ISD::PRE_INC &&
+               "Unknown addressing mode on vector store");
+        Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
+                                    SN->getAddressingMode());
+      }
+
+      BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                            DAG.getConstant(Stride, dl,
+                                            BasePtr.getValueType()));
+      Stores.push_back(Store);
+    }
+
+    SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+    if (SN->isIndexed()) {
+      SDValue RetOps[] = { TF, Stores[0].getValue(1) };
+      return DAG.getMergeValues(RetOps, dl);
+    }
+
+    return TF;
+  }
+
+  assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
+  assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
+
+  // The values are now known to be -1 (false) or 1 (true). To convert this
+  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
+  // understand how to form the extending load.
+  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
+  FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+                        FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+
+  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 
+
+  // Now convert to an integer and store.
+  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+    DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
+    Value);
+
+  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+  EVT PtrVT = getPointerTy();
+  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+  SmallVector<SDValue, 2> Ops;
+  Ops.push_back(StoreChain);
+  Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32));
+  Ops.push_back(Value);
+  Ops.push_back(FIdx);
+
+  SmallVector<EVT, 2> ValueVTs;
+  ValueVTs.push_back(MVT::Other); // chain
+  SDVTList VTs = DAG.getVTList(ValueVTs);
+
+  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
+    dl, VTs, Ops, MVT::v4i32, PtrInfo);
+
+  // Move data into the byte array.
+  SmallVector<SDValue, 4> Loads, LoadChains;
+  for (unsigned i = 0; i < 4; ++i) {
+    unsigned Offset = 4*i;
+    SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
+    Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+    Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
+                                   PtrInfo.getWithOffset(Offset),
+                                   false, false, false, 0));
+    LoadChains.push_back(Loads[i].getValue(1));
+  }
+
+  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+
+  SmallVector<SDValue, 4> Stores;
+  for (unsigned i = 0; i < 4; ++i) {
+    SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
+    Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
+
+    Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx,
+                                       SN->getPointerInfo().getWithOffset(i),
+                                       MVT::i8 /* memory type */,
+                                       SN->isNonTemporal(), SN->isVolatile(), 
+                                       1 /* alignment */, SN->getAAInfo()));
+  }
+
+  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+  return StoreChain;
+}
+
 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
   if (Op.getValueType() == MVT::v4i32) {
@@ -6694,6 +7788,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
   case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::MUL:                return LowerMUL(Op, DAG);
 
   // For counter-based loop handling.
@@ -6708,7 +7803,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
                                            SelectionDAG &DAG) const {
-  const TargetMachine &TM = getTargetMachine();
   SDLoc dl(N);
   switch (N->getOpcode()) {
   default:
@@ -6739,8 +7833,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
     break;
   }
   case ISD::VAARG: {
-    if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
-        || TM.getSubtarget<PPCSubtarget>().isPPC64())
+    if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
       return;
 
     EVT VT = N->getValueType(0);
@@ -6758,10 +7851,10 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
                              MVT::f64, N->getOperand(0),
-                             DAG.getIntPtrConstant(0));
+                             DAG.getIntPtrConstant(0, dl));
     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
                              MVT::f64, N->getOperand(0),
-                             DAG.getIntPtrConstant(1));
+                             DAG.getIntPtrConstant(1, dl));
 
     // Add the two halves of the long double in round-to-zero mode.
     SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
@@ -6773,6 +7866,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
     // LowerFP_TO_INT() can only handle f32 and f64.
     if (N->getOperand(0).getValueType() == MVT::ppcf128)
       return;
@@ -6789,7 +7883,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
   Function *Func = Intrinsic::getDeclaration(M, Id);
-  return Builder.CreateCall(Func);
+  return Builder.CreateCall(Func, {});
 }
 
 // The mappings for emitLeading/TrailingFence is taken from
@@ -6799,10 +7893,9 @@ Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
                                          bool IsLoad) const {
   if (Ord == SequentiallyConsistent)
     return callIntrinsic(Builder, Intrinsic::ppc_sync);
-  else if (isAtLeastRelease(Ord))
+  if (isAtLeastRelease(Ord))
     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
-  else
-    return nullptr;
+  return nullptr;
 }
 
 Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
@@ -6814,16 +7907,40 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
   // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
   // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
   // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
-  else
-    return nullptr;
+  return nullptr;
 }
 
 MachineBasicBlock *
 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
-                                    bool is64bit, unsigned BinOpcode) const {
+                                    unsigned AtomicSize,
+                                    unsigned BinOpcode) const {
   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+
+  auto LoadMnemonic = PPC::LDARX;
+  auto StoreMnemonic = PPC::STDCX;
+  switch (AtomicSize) {
+  default:
+    llvm_unreachable("Unexpected size of atomic entity");
+  case 1:
+    LoadMnemonic = PPC::LBARX;
+    StoreMnemonic = PPC::STBCX;
+    assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+    break;
+  case 2:
+    LoadMnemonic = PPC::LHARX;
+    StoreMnemonic = PPC::STHCX;
+    assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+    break;
+  case 4:
+    LoadMnemonic = PPC::LWARX;
+    StoreMnemonic = PPC::STWCX;
+    break;
+  case 8:
+    LoadMnemonic = PPC::LDARX;
+    StoreMnemonic = PPC::STDCX;
+    break;
+  }
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineFunction *F = BB->getParent();
@@ -6846,7 +7963,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
 
   MachineRegisterInfo &RegInfo = F->getRegInfo();
   unsigned TmpReg = (!BinOpcode) ? incr :
-    RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass
+    RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
                                            : &PPC::GPRCRegClass);
 
   //  thisMBB:
@@ -6861,11 +7978,11 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   //   bne- loopMBB
   //   fallthrough --> exitMBB
   BB = loopMBB;
-  BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
     .addReg(ptrA).addReg(ptrB);
   if (BinOpcode)
     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
-  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+  BuildMI(BB, dl, TII->get(StoreMnemonic))
     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
   BuildMI(BB, dl, TII->get(PPC::BCC))
     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
@@ -6883,9 +8000,12 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
                                             MachineBasicBlock *BB,
                                             bool is8bit,    // operation
                                             unsigned BinOpcode) const {
+  // If we support part-word atomic mnemonics, just use them
+  if (Subtarget.hasPartwordAtomics())
+    return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
+
   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   // In 64 bit mode we have to use 64 bits for addresses, even though the
   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
   // registers without caring whether they're 32 or 64, but here we're
@@ -7012,8 +8132,7 @@ llvm::MachineBasicBlock*
 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
                                     MachineBasicBlock *MBB) const {
   DebugLoc DL = MI->getDebugLoc();
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
 
   MachineFunction *MF = MBB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -7086,6 +8205,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
   unsigned BufReg = MI->getOperand(1).getReg();
 
   if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
+    setUsesTOCBasePtr(*MBB->getParent());
     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
             .addReg(PPC::X2)
             .addImm(TOCOffset)
@@ -7096,23 +8216,21 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
   // Naked functions never have a base pointer, and so we use r1. For all
   // other functions, this decision must be delayed until during PEI.
   unsigned BaseReg;
-  if (MF->getFunction()->getAttributes().hasAttribute(
-          AttributeSet::FunctionIndex, Attribute::Naked))
+  if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
   else
     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
 
   MIB = BuildMI(*thisMBB, MI, DL,
                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
-          .addReg(BaseReg)
-          .addImm(BPOffset)
-          .addReg(BufReg);
+            .addReg(BaseReg)
+            .addImm(BPOffset)
+            .addReg(BufReg);
   MIB.setMemRefs(MMOBegin, MMOEnd);
 
   // Setup
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
-  const PPCRegisterInfo *TRI =
-      getTargetMachine().getSubtarget<PPCSubtarget>().getRegisterInfo();
+  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
   MIB.addRegMask(TRI->getNoPreservedMask());
 
   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
@@ -7126,8 +8244,9 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
 
   // mainMBB:
   //  mainDstReg = 0
-  MIB = BuildMI(mainMBB, DL,
-    TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+  MIB =
+      BuildMI(mainMBB, DL,
+              TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
 
   // Store IP
   if (Subtarget.isPPC64()) {
@@ -7161,8 +8280,7 @@ MachineBasicBlock *
 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
                                      MachineBasicBlock *MBB) const {
   DebugLoc DL = MI->getDebugLoc();
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
 
   MachineFunction *MF = MBB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -7181,10 +8299,13 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
   // Since FP is only updated here but NOT referenced, it's treated as GPR.
   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
-  unsigned BP  = (PVT == MVT::i64) ? PPC::X30 :
-                  (Subtarget.isSVR4ABI() &&
-                   MF->getTarget().getRelocationModel() == Reloc::PIC_ ?
-                     PPC::R29 : PPC::R30);
+  unsigned BP =
+      (PVT == MVT::i64)
+          ? PPC::X30
+          : (Subtarget.isSVR4ABI() &&
+                     MF->getTarget().getRelocationModel() == Reloc::PIC_
+                 ? PPC::R29
+                 : PPC::R30);
 
   MachineInstrBuilder MIB;
 
@@ -7247,6 +8368,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
 
   // Reload TOC
   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
+    setUsesTOCBasePtr(*MBB->getParent());
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
             .addImm(TOCOffset)
             .addReg(BufReg);
@@ -7267,8 +8389,20 @@ MachineBasicBlock *
 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
   if (MI->getOpcode() == TargetOpcode::STACKMAP ||
-      MI->getOpcode() == TargetOpcode::PATCHPOINT)
+      MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+    if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
+        MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+      // Call lowering should have added an r2 operand to indicate a dependence
+      // on the TOC base pointer value. It can't however, because there is no
+      // way to mark the dependence as implicit there, and so the stackmap code
+      // will confuse it with a regular operand. Instead, add the dependence
+      // here.
+      setUsesTOCBasePtr(*BB->getParent());
+      MI->addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
+    }
+
     return emitPatchPoint(MI, BB);
+  }
 
   if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
       MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
@@ -7278,8 +8412,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     return emitEHSjLjLongJmp(MI, BB);
   }
 
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
 
   // To "insert" these instructions we actually have to insert their
   // control-flow patterns.
@@ -7290,9 +8423,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   MachineFunction *F = BB->getParent();
 
   if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
-                                 MI->getOpcode() == PPC::SELECT_CC_I8 ||
-                                 MI->getOpcode() == PPC::SELECT_I4 ||
-                                 MI->getOpcode() == PPC::SELECT_I8)) {
+                              MI->getOpcode() == PPC::SELECT_CC_I8 ||
+                              MI->getOpcode() == PPC::SELECT_I4 ||
+                              MI->getOpcode() == PPC::SELECT_I8)) {
     SmallVector<MachineOperand, 2> Cond;
     if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
         MI->getOpcode() == PPC::SELECT_CC_I8)
@@ -7302,8 +8435,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     Cond.push_back(MI->getOperand(1));
 
     DebugLoc dl = MI->getDebugLoc();
-    const TargetInstrInfo *TII =
-        getTargetMachine().getSubtargetImpl()->getInstrInfo();
     TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
                       Cond, MI->getOperand(2).getReg(),
                       MI->getOperand(3).getReg());
@@ -7311,15 +8442,23 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
              MI->getOpcode() == PPC::SELECT_CC_I8 ||
              MI->getOpcode() == PPC::SELECT_CC_F4 ||
              MI->getOpcode() == PPC::SELECT_CC_F8 ||
+             MI->getOpcode() == PPC::SELECT_CC_QFRC ||
+             MI->getOpcode() == PPC::SELECT_CC_QSRC ||
+             MI->getOpcode() == PPC::SELECT_CC_QBRC ||
              MI->getOpcode() == PPC::SELECT_CC_VRRC ||
              MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
+             MI->getOpcode() == PPC::SELECT_CC_VSSRC ||
              MI->getOpcode() == PPC::SELECT_CC_VSRC ||
              MI->getOpcode() == PPC::SELECT_I4 ||
              MI->getOpcode() == PPC::SELECT_I8 ||
              MI->getOpcode() == PPC::SELECT_F4 ||
              MI->getOpcode() == PPC::SELECT_F8 ||
+             MI->getOpcode() == PPC::SELECT_QFRC ||
+             MI->getOpcode() == PPC::SELECT_QSRC ||
+             MI->getOpcode() == PPC::SELECT_QBRC ||
              MI->getOpcode() == PPC::SELECT_VRRC ||
              MI->getOpcode() == PPC::SELECT_VSFRC ||
+             MI->getOpcode() == PPC::SELECT_VSSRC ||
              MI->getOpcode() == PPC::SELECT_VSRC) {
     // The incoming instruction knows the destination vreg to set, the
     // condition code register to branch on, the true/false values to
@@ -7351,8 +8490,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
         MI->getOpcode() == PPC::SELECT_I8 ||
         MI->getOpcode() == PPC::SELECT_F4 ||
         MI->getOpcode() == PPC::SELECT_F8 ||
+        MI->getOpcode() == PPC::SELECT_QFRC ||
+        MI->getOpcode() == PPC::SELECT_QSRC ||
+        MI->getOpcode() == PPC::SELECT_QBRC ||
         MI->getOpcode() == PPC::SELECT_VRRC ||
         MI->getOpcode() == PPC::SELECT_VSFRC ||
+        MI->getOpcode() == PPC::SELECT_VSSRC ||
         MI->getOpcode() == PPC::SELECT_VSRC) {
       BuildMI(BB, dl, TII->get(PPC::BC))
         .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
@@ -7429,68 +8572,96 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
-    BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+    BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
-    BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+    BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
 
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
-    BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+    BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
-    BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+    BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
 
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
-    BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+    BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
-    BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+    BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
 
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
-    BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+    BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
-    BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+    BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
 
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
-    BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
+    BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
-    BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
+    BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
 
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
-    BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+    BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
-    BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+    BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
 
   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
-    BB = EmitAtomicBinary(MI, BB, false, 0);
+    BB = EmitAtomicBinary(MI, BB, 4, 0);
   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
-    BB = EmitAtomicBinary(MI, BB, true, 0);
+    BB = EmitAtomicBinary(MI, BB, 8, 0);
 
   else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
-           MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+           MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
+           (Subtarget.hasPartwordAtomics() &&
+            MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
+           (Subtarget.hasPartwordAtomics() &&
+            MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
     bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
 
+    auto LoadMnemonic = PPC::LDARX;
+    auto StoreMnemonic = PPC::STDCX;
+    switch(MI->getOpcode()) {
+    default:
+      llvm_unreachable("Compare and swap of unknown size");
+    case PPC::ATOMIC_CMP_SWAP_I8:
+      LoadMnemonic = PPC::LBARX;
+      StoreMnemonic = PPC::STBCX;
+      assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+      break;
+    case PPC::ATOMIC_CMP_SWAP_I16:
+      LoadMnemonic = PPC::LHARX;
+      StoreMnemonic = PPC::STHCX;
+      assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+      break;
+    case PPC::ATOMIC_CMP_SWAP_I32:
+      LoadMnemonic = PPC::LWARX;
+      StoreMnemonic = PPC::STWCX;
+      break;
+    case PPC::ATOMIC_CMP_SWAP_I64:
+      LoadMnemonic = PPC::LDARX;
+      StoreMnemonic = PPC::STDCX;
+      break;
+    }
     unsigned dest   = MI->getOperand(0).getReg();
     unsigned ptrA   = MI->getOperand(1).getReg();
     unsigned ptrB   = MI->getOperand(2).getReg();
@@ -7516,18 +8687,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     BB->addSuccessor(loop1MBB);
 
     // loop1MBB:
-    //   l[wd]arx dest, ptr
+    //   l[bhwd]arx dest, ptr
     //   cmp[wd] dest, oldval
     //   bne- midMBB
     // loop2MBB:
-    //   st[wd]cx. newval, ptr
+    //   st[bhwd]cx. newval, ptr
     //   bne- loopMBB
     //   b exitBB
     // midMBB:
-    //   st[wd]cx. dest, ptr
+    //   st[bhwd]cx. dest, ptr
     // exitBB:
     BB = loop1MBB;
-    BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+    BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
       .addReg(ptrA).addReg(ptrB);
     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
       .addReg(oldval).addReg(dest);
@@ -7537,7 +8708,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     BB->addSuccessor(midMBB);
 
     BB = loop2MBB;
-    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+    BuildMI(BB, dl, TII->get(StoreMnemonic))
       .addReg(newval).addReg(ptrA).addReg(ptrB);
     BuildMI(BB, dl, TII->get(PPC::BCC))
       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
@@ -7546,7 +8717,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     BB->addSuccessor(exitMBB);
 
     BB = midMBB;
-    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+    BuildMI(BB, dl, TII->get(StoreMnemonic))
       .addReg(dest).addReg(ptrA).addReg(ptrB);
     BB->addSuccessor(exitMBB);
 
@@ -7724,7 +8895,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
 
     // Restore FPSCR value.
-    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
   } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
              MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
              MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
@@ -7746,6 +8917,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
             MI->getOperand(0).getReg())
       .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
+  } else if (MI->getOpcode() == PPC::TCHECK_RET) {
+    DebugLoc Dl = MI->getDebugLoc();
+    MachineRegisterInfo &RegInfo = F->getRegInfo();
+    unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+    BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
+    return BB;
   } else {
     llvm_unreachable("Unexpected instr type to insert");
   }
@@ -7764,9 +8941,11 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
                                             bool &UseOneConstNR) const {
   EVT VT = Operand.getValueType();
   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
-      (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
+      (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+      (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
+      (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
+      (VT == MVT::v4f64 && Subtarget.hasQPX())) {
     // Convergence is quadratic, so we essentially double the number of digits
     // correct after every iteration. For both FRE and FRSQRTE, the minimum
     // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
@@ -7785,9 +8964,11 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
                                             unsigned &RefinementSteps) const {
   EVT VT = Operand.getValueType();
   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
-      (VT == MVT::f64 && Subtarget.hasFRE())  ||
+      (VT == MVT::f64 && Subtarget.hasFRE()) ||
       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+      (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
+      (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
+      (VT == MVT::v4f64 && Subtarget.hasQPX())) {
     // Convergence is quadratic, so we essentially double the number of digits
     // correct after every iteration. For both FRE and FRSQRTE, the minimum
     // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
@@ -7873,6 +9054,24 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
     EVT VT;
     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
     default: return false;
+    case Intrinsic::ppc_qpx_qvlfd:
+    case Intrinsic::ppc_qpx_qvlfda:
+      VT = MVT::v4f64;
+      break;
+    case Intrinsic::ppc_qpx_qvlfs:
+    case Intrinsic::ppc_qpx_qvlfsa:
+      VT = MVT::v4f32;
+      break;
+    case Intrinsic::ppc_qpx_qvlfcd:
+    case Intrinsic::ppc_qpx_qvlfcda:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_qpx_qvlfcs:
+    case Intrinsic::ppc_qpx_qvlfcsa:
+      VT = MVT::v2f32;
+      break;
+    case Intrinsic::ppc_qpx_qvlfiwa:
+    case Intrinsic::ppc_qpx_qvlfiwz:
     case Intrinsic::ppc_altivec_lvx:
     case Intrinsic::ppc_altivec_lvxl:
     case Intrinsic::ppc_vsx_lxvw4x:
@@ -7899,6 +9098,24 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
     EVT VT;
     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
     default: return false;
+    case Intrinsic::ppc_qpx_qvstfd:
+    case Intrinsic::ppc_qpx_qvstfda:
+      VT = MVT::v4f64;
+      break;
+    case Intrinsic::ppc_qpx_qvstfs:
+    case Intrinsic::ppc_qpx_qvstfsa:
+      VT = MVT::v4f32;
+      break;
+    case Intrinsic::ppc_qpx_qvstfcd:
+    case Intrinsic::ppc_qpx_qvstfcda:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_qpx_qvstfcs:
+    case Intrinsic::ppc_qpx_qvstfcsa:
+      VT = MVT::v2f32;
+      break;
+    case Intrinsic::ppc_qpx_qvstfiw:
+    case Intrinsic::ppc_qpx_qvstfiwa:
     case Intrinsic::ppc_altivec_stvx:
     case Intrinsic::ppc_altivec_stvxl:
     case Intrinsic::ppc_vsx_stxvw4x:
@@ -7997,8 +9214,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
   SelectionDAG &DAG = DCI.DAG;
   SDLoc dl(N);
 
-  assert(Subtarget.useCRBits() &&
-         "Expecting to be tracking CR bits");
+  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
   // If we're tracking CR bits, we need to be careful that we don't have:
   //   trunc(binary-ops(zext(x), zext(y)))
   // or
@@ -8294,10 +9510,8 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
       N->getValueType(0) != MVT::i64)
     return SDValue();
 
-  if (!((N->getOperand(0).getValueType() == MVT::i1 &&
-        Subtarget.useCRBits()) ||
-       (N->getOperand(0).getValueType() == MVT::i32 &&
-        Subtarget.isPPC64())))
+  if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
+        (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
     return SDValue();
 
   if (N->getOperand(0).getOpcode() != ISD::AND &&
@@ -8539,13 +9753,13 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
                        DAG.getConstant(APInt::getLowBitsSet(
                                          N->getValueSizeInBits(0), PromBits),
-                                       N->getValueType(0)));
+                                       dl, N->getValueType(0)));
 
   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
          "Invalid extension type");
   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
   SDValue ShiftCst =
-    DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy);
+    DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
   return DAG.getNode(ISD::SRA, dl, N->getValueType(0), 
                      DAG.getNode(ISD::SHL, dl, N->getValueType(0),
                                  N->getOperand(0), ShiftCst), ShiftCst);
@@ -8582,13 +9796,14 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
 
   // If we have FCFIDS, then use it when converting to single-precision.
   // Otherwise, convert to double-precision and then round.
-  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
-                   (Op.getOpcode() == ISD::UINT_TO_FP ?
-                    PPCISD::FCFIDUS : PPCISD::FCFIDS) :
-                   (Op.getOpcode() == ISD::UINT_TO_FP ?
-                    PPCISD::FCFIDU : PPCISD::FCFID);
-  MVT      FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
-                   MVT::f32 : MVT::f64;
+  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+                       ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
+                                                            : PPCISD::FCFIDS)
+                       : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
+                                                            : PPCISD::FCFID);
+  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+                  ? MVT::f32
+                  : MVT::f64;
 
   // If we're converting from a float, to an int, and back to a float again,
   // then we don't need the store/load pair at all.
@@ -8610,7 +9825,7 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
 
     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
       FP = DAG.getNode(ISD::FP_ROUND, dl,
-                       MVT::f32, FP, DAG.getIntPtrConstant(0));
+                       MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
       DCI.AddToWorklist(FP.getNode());
     }
 
@@ -8721,7 +9936,6 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
 
 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
-  const TargetMachine &TM = getTargetMachine();
   SelectionDAG &DAG = DCI.DAG;
   SDLoc dl(N);
   switch (N->getOpcode()) {
@@ -8758,8 +9972,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     return combineFPToIntToFP(N, DCI);
   case ISD::STORE: {
     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
-    if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
-        !cast<StoreSDNode>(N)->isTruncatingStore() &&
+    if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
         N->getOperand(1).getValueType() == MVT::i32 &&
         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
@@ -8790,8 +10003,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
         N->getOperand(1).getNode()->hasOneUse() &&
         (N->getOperand(1).getValueType() == MVT::i32 ||
          N->getOperand(1).getValueType() == MVT::i16 ||
-         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
-          TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+         (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
           N->getOperand(1).getValueType() == MVT::i64))) {
       SDValue BSwapOp = N->getOperand(1).getOperand(0);
       // Do an any-extend to 32-bits if this is a half-word input.
@@ -8812,8 +10024,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     EVT VT = N->getOperand(1).getValueType();
     if (VT.isSimple()) {
       MVT StoreVT = VT.getSimpleVT();
-      if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
-          TM.getSubtarget<PPCSubtarget>().isLittleEndian() &&
+      if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
         return expandVSXStoreForLE(N, DCI);
@@ -8827,23 +10038,26 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     // For little endian, VSX loads require generating lxvd2x/xxswapd.
     if (VT.isSimple()) {
       MVT LoadVT = VT.getSimpleVT();
-      if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
-          TM.getSubtarget<PPCSubtarget>().isLittleEndian() &&
+      if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
         return expandVSXLoadForLE(N, DCI);
     }
 
-    Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+    EVT MemVT = LD->getMemoryVT();
+    Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
     unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
-    if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
-        TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
-        // P8 and later hardware should just use LOAD.
-        !TM.getSubtarget<PPCSubtarget>().hasP8Vector() &&
-        (VT == MVT::v16i8 || VT == MVT::v8i16 ||
-         VT == MVT::v4i32 || VT == MVT::v4f32) &&
+    Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
+    unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy);
+    if (LD->isUnindexed() && VT.isVector() &&
+        ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
+          // P8 and later hardware should just use LOAD.
+          !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+                                       VT == MVT::v4i32 || VT == MVT::v4f32)) ||
+         (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
+          LD->getAlignment() >= ScalarABIAlignment)) &&
         LD->getAlignment() < ABIAlignment) {
-      // This is a type-legal unaligned Altivec load.
+      // This is a type-legal unaligned Altivec or QPX load.
       SDValue Chain = LD->getChain();
       SDValue Ptr = LD->getBasePtr();
       bool isLittleEndian = Subtarget.isLittleEndian();
@@ -8872,10 +10086,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       // a different base address offset from this one by an aligned amount.
       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
       // optimization later.
-      Intrinsic::ID Intr = (isLittleEndian ?
-                            Intrinsic::ppc_altivec_lvsr :
-                            Intrinsic::ppc_altivec_lvsl);
-      SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
+      Intrinsic::ID Intr, IntrLD, IntrPerm;
+      MVT PermCntlTy, PermTy, LDTy;
+      if (Subtarget.hasAltivec()) {
+        Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
+                                 Intrinsic::ppc_altivec_lvsl;
+        IntrLD = Intrinsic::ppc_altivec_lvx;
+        IntrPerm = Intrinsic::ppc_altivec_vperm;
+        PermCntlTy = MVT::v16i8;
+        PermTy = MVT::v4i32;
+        LDTy = MVT::v4i32;
+      } else {
+        Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
+                                       Intrinsic::ppc_qpx_qvlpcls;
+        IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
+                                       Intrinsic::ppc_qpx_qvlfs;
+        IntrPerm = Intrinsic::ppc_qpx_qvfperm;
+        PermCntlTy = MVT::v4f64;
+        PermTy = MVT::v4f64;
+        LDTy = MemVT.getSimpleVT();
+      }
+
+      SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
 
       // Create the new MMO for the new base load. It is like the original MMO,
       // but represents an area in memory almost twice the vector size centered
@@ -8884,18 +10116,16 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       // original unaligned load.
       MachineFunction &MF = DAG.getMachineFunction();
       MachineMemOperand *BaseMMO =
-        MF.getMachineMemOperand(LD->getMemOperand(),
-                                -LD->getMemoryVT().getStoreSize()+1,
-                                2*LD->getMemoryVT().getStoreSize()-1);
+        MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1,
+                                2*MemVT.getStoreSize()-1);
 
       // Create the new base load.
-      SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx,
-                                               getPointerTy());
+      SDValue LDXIntID = DAG.getTargetConstant(IntrLD, dl, getPointerTy());
       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
       SDValue BaseLoad =
         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
-                                DAG.getVTList(MVT::v4i32, MVT::Other),
-                                BaseLoadOps, MVT::v4i32, BaseMMO);
+                                DAG.getVTList(PermTy, MVT::Other),
+                                BaseLoadOps, LDTy, BaseMMO);
 
       // Note that the value of IncOffset (which is provided to the next
       // load's pointer info offset value, and thus used to calculate the
@@ -8914,17 +10144,17 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       if (!findConsecutiveLoad(LD, DAG))
         --IncValue;
 
-      SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
+      SDValue Increment = DAG.getConstant(IncValue, dl, getPointerTy());
       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
 
       MachineMemOperand *ExtraMMO =
         MF.getMachineMemOperand(LD->getMemOperand(),
-                                1, 2*LD->getMemoryVT().getStoreSize()-1);
+                                1, 2*MemVT.getStoreSize()-1);
       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
       SDValue ExtraLoad =
         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
-                                DAG.getVTList(MVT::v4i32, MVT::Other),
-                                ExtraLoadOps, MVT::v4i32, ExtraMMO);
+                                DAG.getVTList(PermTy, MVT::Other),
+                                ExtraLoadOps, LDTy, ExtraMMO);
 
       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
         BaseLoad.getValue(1), ExtraLoad.getValue(1));
@@ -8936,14 +10166,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       // and ExtraLoad here.
       SDValue Perm;
       if (isLittleEndian)
-        Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+        Perm = BuildIntrinsicOp(IntrPerm,
                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
       else
-        Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+        Perm = BuildIntrinsicOp(IntrPerm,
                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
 
-      if (VT != MVT::v4i32)
-        Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
+      if (VT != PermTy)
+        Perm = Subtarget.hasAltivec() ?
+                 DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
+                 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
+                               DAG.getTargetConstant(1, dl, MVT::i64));
+                               // second argument is 1 because this rounding
+                               // is always exact.
 
       // The output of the permutation is our loaded result, the TokenFactor is
       // our new chain.
@@ -8952,40 +10187,67 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     }
     }
     break;
-  case ISD::INTRINSIC_WO_CHAIN: {
-    bool isLittleEndian = Subtarget.isLittleEndian();
-    Intrinsic::ID Intr = (isLittleEndian ?
-                          Intrinsic::ppc_altivec_lvsr :
-                          Intrinsic::ppc_altivec_lvsl);
-    if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
+    case ISD::INTRINSIC_WO_CHAIN: {
+      bool isLittleEndian = Subtarget.isLittleEndian();
+      unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+      Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
+                                           : Intrinsic::ppc_altivec_lvsl);
+      if ((IID == Intr ||
+           IID == Intrinsic::ppc_qpx_qvlpcld  ||
+           IID == Intrinsic::ppc_qpx_qvlpcls) &&
         N->getOperand(1)->getOpcode() == ISD::ADD) {
-      SDValue Add = N->getOperand(1);
-
-      if (DAG.MaskedValueIsZero(Add->getOperand(1),
-            APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
-              Add.getValueType().getScalarType().getSizeInBits()))) {
-        SDNode *BasePtr = Add->getOperand(0).getNode();
-        for (SDNode::use_iterator UI = BasePtr->use_begin(),
-             UE = BasePtr->use_end(); UI != UE; ++UI) {
-          if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
-              cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
-                Intr) {
-            // We've found another LVSL/LVSR, and this address is an aligned
-            // multiple of that one. The results will be the same, so use the
-            // one we've just found instead.
-
-            return SDValue(*UI, 0);
+        SDValue Add = N->getOperand(1);
+
+        int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
+                   5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
+
+        if (DAG.MaskedValueIsZero(
+                Add->getOperand(1),
+                APInt::getAllOnesValue(Bits /* alignment */)
+                    .zext(
+                        Add.getValueType().getScalarType().getSizeInBits()))) {
+          SDNode *BasePtr = Add->getOperand(0).getNode();
+          for (SDNode::use_iterator UI = BasePtr->use_begin(),
+                                    UE = BasePtr->use_end();
+               UI != UE; ++UI) {
+            if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+                cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
+              // We've found another LVSL/LVSR, and this address is an aligned
+              // multiple of that one. The results will be the same, so use the
+              // one we've just found instead.
+
+              return SDValue(*UI, 0);
+            }
+          }
+        }
+
+        if (isa<ConstantSDNode>(Add->getOperand(1))) {
+          SDNode *BasePtr = Add->getOperand(0).getNode();
+          for (SDNode::use_iterator UI = BasePtr->use_begin(),
+               UE = BasePtr->use_end(); UI != UE; ++UI) {
+            if (UI->getOpcode() == ISD::ADD &&
+                isa<ConstantSDNode>(UI->getOperand(1)) &&
+                (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
+                 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
+                (1ULL << Bits) == 0) {
+              SDNode *OtherAdd = *UI;
+              for (SDNode::use_iterator VI = OtherAdd->use_begin(),
+                   VE = OtherAdd->use_end(); VI != VE; ++VI) {
+                if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+                    cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
+                  return SDValue(*VI, 0);
+                }
+              }
+            }
           }
         }
       }
     }
-    }
 
     break;
   case ISD::INTRINSIC_W_CHAIN: {
     // For little endian, VSX loads require generating lxvd2x/xxswapd.
-    if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
-        TM.getSubtarget<PPCSubtarget>().isLittleEndian()) {
+    if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
       default:
         break;
@@ -8998,8 +10260,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   }
   case ISD::INTRINSIC_VOID: {
     // For little endian, VSX stores require generating xxswapd/stxvd2x.
-    if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
-        TM.getSubtarget<PPCSubtarget>().isLittleEndian()) {
+    if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
       default:
         break;
@@ -9015,8 +10276,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
         N->getOperand(0).hasOneUse() &&
         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
-         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
-          TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+         (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
           N->getValueType(0) == MVT::i64))) {
       SDValue Load = N->getOperand(0);
       LoadSDNode *LD = cast<LoadSDNode>(Load);
@@ -9165,7 +10425,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
 
     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
-        getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+        getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
       assert(isDot && "Can't compare against a vector result!");
 
       // If this is a comparison against something other than 0/1, then we know
@@ -9185,7 +10445,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       SDValue Ops[] = {
         LHS.getOperand(2),  // LHS of compare
         LHS.getOperand(3),  // RHS of compare
-        DAG.getConstant(CompareOpc, MVT::i32)
+        DAG.getConstant(CompareOpc, dl, MVT::i32)
       };
       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
@@ -9209,7 +10469,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       }
 
       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
-                         DAG.getConstant(CompOpc, MVT::i32),
+                         DAG.getConstant(CompOpc, dl, MVT::i32),
                          DAG.getRegister(PPC::CR6, MVT::i32),
                          N->getOperand(4), CompNode.getValue(1));
     }
@@ -9237,14 +10497,14 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
 
   bool IsNegPow2 = (-Divisor).isPowerOf2();
   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
-  SDValue ShiftAmt = DAG.getConstant(Lg2, VT);
+  SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
 
   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
   if (Created)
     Created->push_back(Op.getNode());
 
   if (IsNegPow2) {
-    Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), Op);
+    Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
     if (Created)
       Created->push_back(Op.getNode());
   }
@@ -9278,14 +10538,17 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     case Intrinsic::ppc_altivec_vcmpequb_p:
     case Intrinsic::ppc_altivec_vcmpequh_p:
     case Intrinsic::ppc_altivec_vcmpequw_p:
+    case Intrinsic::ppc_altivec_vcmpequd_p:
     case Intrinsic::ppc_altivec_vcmpgefp_p:
     case Intrinsic::ppc_altivec_vcmpgtfp_p:
     case Intrinsic::ppc_altivec_vcmpgtsb_p:
     case Intrinsic::ppc_altivec_vcmpgtsh_p:
     case Intrinsic::ppc_altivec_vcmpgtsw_p:
+    case Intrinsic::ppc_altivec_vcmpgtsd_p:
     case Intrinsic::ppc_altivec_vcmpgtub_p:
     case Intrinsic::ppc_altivec_vcmpgtuh_p:
     case Intrinsic::ppc_altivec_vcmpgtuw_p:
+    case Intrinsic::ppc_altivec_vcmpgtud_p:
       KnownZero = ~1U;  // All bits but the low one are known to be zero.
       break;
     }
@@ -9307,9 +10570,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
     if (!ML)
       break;
 
-    const PPCInstrInfo *TII =
-      static_cast<const PPCInstrInfo *>(getTargetMachine().getSubtargetImpl()->
-                                          getInstrInfo());
+    const PPCInstrInfo *TII = Subtarget.getInstrInfo();
 
     // For small loops (between 5 and 8 instructions), align to a 32-byte
     // boundary so that the entire loop fits in one instruction-cache line.
@@ -9414,8 +10675,9 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
   return weight;
 }
 
-std::pair<unsigned, const TargetRegisterClass*>
-PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+std::pair<unsigned, const TargetRegisterClass *>
+PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                                const std::string &Constraint,
                                                 MVT VT) const {
   if (Constraint.size() == 1) {
     // GCC RS6000 Constraint Letters
@@ -9433,8 +10695,16 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
         return std::make_pair(0U, &PPC::F4RCRegClass);
       if (VT == MVT::f64 || VT == MVT::i64)
         return std::make_pair(0U, &PPC::F8RCRegClass);
+      if (VT == MVT::v4f64 && Subtarget.hasQPX())
+        return std::make_pair(0U, &PPC::QFRCRegClass);
+      if (VT == MVT::v4f32 && Subtarget.hasQPX())
+        return std::make_pair(0U, &PPC::QSRCRegClass);
       break;
     case 'v':
+      if (VT == MVT::v4f64 && Subtarget.hasQPX())
+        return std::make_pair(0U, &PPC::QFRCRegClass);
+      if (VT == MVT::v4f32 && Subtarget.hasQPX())
+        return std::make_pair(0U, &PPC::QSRCRegClass);
       return std::make_pair(0U, &PPC::VRRCRegClass);
     case 'y':   // crrc
       return std::make_pair(0U, &PPC::CRRCRegClass);
@@ -9445,11 +10715,14 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
              Constraint == "wf") {
     return std::make_pair(0U, &PPC::VSRCRegClass);
   } else if (Constraint == "ws") {
-    return std::make_pair(0U, &PPC::VSFRCRegClass);
+    if (VT == MVT::f32)
+      return std::make_pair(0U, &PPC::VSSRCRegClass);
+    else
+      return std::make_pair(0U, &PPC::VSFRCRegClass);
   }
 
-  std::pair<unsigned, const TargetRegisterClass*> R =
-    TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  std::pair<unsigned, const TargetRegisterClass *> R =
+      TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 
   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
@@ -9458,13 +10731,10 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
-      PPC::GPRCRegClass.contains(R.first)) {
-    const TargetRegisterInfo *TRI =
-        getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+      PPC::GPRCRegClass.contains(R.first))
     return std::make_pair(TRI->getMatchingSuperReg(R.first,
                             PPC::sub_32, &PPC::G8RCRegClass),
                           &PPC::G8RCRegClass);
-  }
 
   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
@@ -9500,6 +10770,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   case 'P': {
     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
     if (!CST) return; // Must be an immediate to match.
+    SDLoc dl(Op);
     int64_t Value = CST->getSExtValue();
     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
                          // numbers are printed as such.
@@ -9507,35 +10778,35 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
     default: llvm_unreachable("Unknown constraint letter!");
     case 'I':  // "I" is a signed 16-bit constant.
       if (isInt<16>(Value))
-        Result = DAG.getTargetConstant(Value, TCVT);
+        Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
       if (isShiftedUInt<16, 16>(Value))
-        Result = DAG.getTargetConstant(Value, TCVT);
+        Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
       if (isShiftedInt<16, 16>(Value))
-        Result = DAG.getTargetConstant(Value, TCVT);
+        Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
       if (isUInt<16>(Value))
-        Result = DAG.getTargetConstant(Value, TCVT);
+        Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'M':  // "M" is a constant that is greater than 31.
       if (Value > 31)
-        Result = DAG.getTargetConstant(Value, TCVT);
+        Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'N':  // "N" is a positive constant that is an exact power of two.
       if (Value > 0 && isPowerOf2_64(Value))
-        Result = DAG.getTargetConstant(Value, TCVT);
+        Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'O':  // "O" is the constant zero.
       if (Value == 0)
-        Result = DAG.getTargetConstant(Value, TCVT);
+        Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
       if (isInt<16>(-Value))
-        Result = DAG.getTargetConstant(Value, TCVT);
+        Result = DAG.getTargetConstant(Value, dl, TCVT);
       break;
     }
     break;
@@ -9555,7 +10826,9 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 // by AM is legal for this target, for a load/store of the specified type.
 bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
                                               Type *Ty) const {
-  // FIXME: PPC does not allow r+i addressing modes for vectors!
+  // PPC does not allow r+i addressing modes for vectors!
+  if (Ty->isVectorTy() && AM.BaseOffs != 0)
+    return false;
 
   // PPC allows a sign-extended 16-bit immediate field.
   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
@@ -9604,14 +10877,12 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   FuncInfo->setLRStoreRequired();
   bool isPPC64 = Subtarget.isPPC64();
-  bool isDarwinABI = Subtarget.isDarwinABI();
 
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
     SDValue Offset =
-
-      DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
-                      isPPC64? MVT::i64 : MVT::i32);
+        DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
+                        isPPC64 ? MVT::i64 : MVT::i32);
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
@@ -9639,8 +10910,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
   // Naked functions never have a frame pointer, and so we use r1. For all
   // other functions, this decision must be delayed until during PEI.
   unsigned FrameReg;
-  if (MF.getFunction()->getAttributes().hasAttribute(
-        AttributeSet::FunctionIndex, Attribute::Naked))
+  if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
   else
     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
@@ -9668,7 +10938,7 @@ unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
   bool is64Bit = isPPC64 && VT == MVT::i64;
   unsigned Reg = StringSwitch<unsigned>(RegName)
                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
-                   .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2))
+                   .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
                    .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
                                   (is64Bit ? PPC::X13 : PPC::R13))
                    .Default(0);
@@ -9689,6 +10959,12 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                                            unsigned Intrinsic) const {
 
   switch (Intrinsic) {
+  case Intrinsic::ppc_qpx_qvlfd:
+  case Intrinsic::ppc_qpx_qvlfs:
+  case Intrinsic::ppc_qpx_qvlfcd:
+  case Intrinsic::ppc_qpx_qvlfcs:
+  case Intrinsic::ppc_qpx_qvlfiwa:
+  case Intrinsic::ppc_qpx_qvlfiwz:
   case Intrinsic::ppc_altivec_lvx:
   case Intrinsic::ppc_altivec_lvxl:
   case Intrinsic::ppc_altivec_lvebx:
@@ -9710,6 +10986,18 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     case Intrinsic::ppc_vsx_lxvd2x:
       VT = MVT::v2f64;
       break;
+    case Intrinsic::ppc_qpx_qvlfd:
+      VT = MVT::v4f64;
+      break;
+    case Intrinsic::ppc_qpx_qvlfs:
+      VT = MVT::v4f32;
+      break;
+    case Intrinsic::ppc_qpx_qvlfcd:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_qpx_qvlfcs:
+      VT = MVT::v2f32;
+      break;
     default:
       VT = MVT::v4i32;
       break;
@@ -9726,6 +11014,47 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.writeMem = false;
     return true;
   }
+  case Intrinsic::ppc_qpx_qvlfda:
+  case Intrinsic::ppc_qpx_qvlfsa:
+  case Intrinsic::ppc_qpx_qvlfcda:
+  case Intrinsic::ppc_qpx_qvlfcsa:
+  case Intrinsic::ppc_qpx_qvlfiwaa:
+  case Intrinsic::ppc_qpx_qvlfiwza: {
+    EVT VT;
+    switch (Intrinsic) {
+    case Intrinsic::ppc_qpx_qvlfda:
+      VT = MVT::v4f64;
+      break;
+    case Intrinsic::ppc_qpx_qvlfsa:
+      VT = MVT::v4f32;
+      break;
+    case Intrinsic::ppc_qpx_qvlfcda:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_qpx_qvlfcsa:
+      VT = MVT::v2f32;
+      break;
+    default:
+      VT = MVT::v4i32;
+      break;
+    }
+
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = VT;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.size = VT.getStoreSize();
+    Info.align = 1;
+    Info.vol = false;
+    Info.readMem = true;
+    Info.writeMem = false;
+    return true;
+  }
+  case Intrinsic::ppc_qpx_qvstfd:
+  case Intrinsic::ppc_qpx_qvstfs:
+  case Intrinsic::ppc_qpx_qvstfcd:
+  case Intrinsic::ppc_qpx_qvstfcs:
+  case Intrinsic::ppc_qpx_qvstfiw:
   case Intrinsic::ppc_altivec_stvx:
   case Intrinsic::ppc_altivec_stvxl:
   case Intrinsic::ppc_altivec_stvebx:
@@ -9747,6 +11076,18 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     case Intrinsic::ppc_vsx_stxvd2x:
       VT = MVT::v2f64;
       break;
+    case Intrinsic::ppc_qpx_qvstfd:
+      VT = MVT::v4f64;
+      break;
+    case Intrinsic::ppc_qpx_qvstfs:
+      VT = MVT::v4f32;
+      break;
+    case Intrinsic::ppc_qpx_qvstfcd:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_qpx_qvstfcs:
+      VT = MVT::v2f32;
+      break;
     default:
       VT = MVT::v4i32;
       break;
@@ -9763,6 +11104,41 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.writeMem = true;
     return true;
   }
+  case Intrinsic::ppc_qpx_qvstfda:
+  case Intrinsic::ppc_qpx_qvstfsa:
+  case Intrinsic::ppc_qpx_qvstfcda:
+  case Intrinsic::ppc_qpx_qvstfcsa:
+  case Intrinsic::ppc_qpx_qvstfiwa: {
+    EVT VT;
+    switch (Intrinsic) {
+    case Intrinsic::ppc_qpx_qvstfda:
+      VT = MVT::v4f64;
+      break;
+    case Intrinsic::ppc_qpx_qvstfsa:
+      VT = MVT::v4f32;
+      break;
+    case Intrinsic::ppc_qpx_qvstfcda:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_qpx_qvstfcsa:
+      VT = MVT::v2f32;
+      break;
+    default:
+      VT = MVT::v4i32;
+      break;
+    }
+
+    Info.opc = ISD::INTRINSIC_VOID;
+    Info.memVT = VT;
+    Info.ptrVal = I.getArgOperand(1);
+    Info.offset = 0;
+    Info.size = VT.getStoreSize();
+    Info.align = 1;
+    Info.vol = false;
+    Info.readMem = false;
+    Info.writeMem = true;
+    return true;
+  }
   default:
     break;
   }
@@ -9786,11 +11162,29 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                            bool IsMemset, bool ZeroMemset,
                                            bool MemcpyStrSrc,
                                            MachineFunction &MF) const {
+  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
+    const Function *F = MF.getFunction();
+    // When expanding a memset, require at least two QPX instructions to cover
+    // the cost of loading the value to be stored from the constant pool.
+    if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+       (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+        !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+      return MVT::v4f64;
+    }
+
+    // We should use Altivec/VSX loads and stores when available. For unaligned
+    // addresses, unaligned VSX loads are only fast starting with the P8.
+    if (Subtarget.hasAltivec() && Size >= 16 &&
+        (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+         ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+      return MVT::v4i32;
+  }
+
   if (Subtarget.isPPC64()) {
     return MVT::i64;
-  } else {
-    return MVT::i32;
   }
+
+  return MVT::i32;
 }
 
 /// \brief Returns true if it is beneficial to convert a load of a constant
@@ -9913,7 +11307,7 @@ PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
   // to CTR, which is used by any indirect call.
   static const MCPhysReg ScratchRegs[] = {
-    PPC::X11, PPC::X12, PPC::LR8, PPC::CTR8, 0
+    PPC::X12, PPC::LR8, PPC::CTR8, 0
   };
 
   return ScratchRegs;
@@ -9925,6 +11319,11 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
   if (VT == MVT::v2i64)
     return false;
 
+  if (Subtarget.hasQPX()) {
+    if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
+      return true;
+  }
+
   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
 }
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 1b585d8..c93de43 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -24,7 +24,7 @@
 
 namespace llvm {
   namespace PPCISD {
-    enum NodeType {
+    enum NodeType : unsigned {
       // Start the numbering where the builtin ops and target ops leave off.
       FIRST_NUMBER = ISD::BUILTIN_OP_END,
 
@@ -71,18 +71,9 @@ namespace llvm {
       /// though these are usually folded into other nodes.
       Hi, Lo,
 
-      TOC_ENTRY,
-
       /// The following two target-specific nodes are used for calls through
       /// function pointers in the 64-bit SVR4 ABI.
 
-      /// Like a regular LOAD but additionally taking/producing a flag.
-      LOAD,
-
-      /// Like LOAD (taking/producing a flag), but using r2 as hard-coded
-      /// destination.
-      LOAD_TOC,
-
       /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
       /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
       /// compute an allocation on the stack.
@@ -108,10 +99,6 @@ namespace llvm {
       /// SVR4 calls.
       CALL, CALL_NOP,
 
-      /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used
-      /// to access TLS variables.
-      CALL_TLS, CALL_NOP_TLS,
-
       /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
       /// MTCTR instruction.
       MTCTR,
@@ -132,6 +119,15 @@ namespace llvm {
       /// resultant GPR.  Bits corresponding to other CR regs are undefined.
       MFOCRF,
 
+      /// Direct move from a VSX register to a GPR
+      MFVSR,
+
+      /// Direct move from a GPR to a VSX register (algebraic)
+      MTVSRA,
+
+      /// Direct move from a GPR to a VSX register (zero)
+      MTVSRZ,
+
       // FIXME: Remove these once the ANDI glue bug is fixed:
       /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
       /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
@@ -179,14 +175,6 @@ namespace llvm {
       /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
       MFFS,
 
-      /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
-      /// reserve indexed. This is used to implement atomic operations.
-      LARX,
-
-      /// STCX = This corresponds to PPC stcx. instrcution: store conditional
-      /// indexed. This is used to implement atomic operations.
-      STCX,
-
       /// TC_RETURN - A tail call return.
       ///   operand #0 chain
       ///   operand #1 callee (register or absolute)
@@ -203,7 +191,7 @@ namespace llvm {
       PPC32_GOT,
 
       /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
-      /// local dynamic TLS  on PPC32.
+      /// local dynamic TLS on PPC32.
       PPC32_PICGOT,
 
       /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
@@ -230,26 +218,46 @@ namespace llvm {
       /// register to sym\@got\@tlsgd\@ha.
       ADDIS_TLSGD_HA,
 
-      /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+      /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
       /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym\@got\@tlsgd\@l.
+      /// sym\@got\@tlsgd\@l and stores the result in X3.  Hidden by
+      /// ADDIS_TLSGD_L_ADDR until after register assignment.
       ADDI_TLSGD_L,
 
+      /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
+      /// model, produces a call to __tls_get_addr(sym\@tlsgd).  Hidden by
+      /// ADDIS_TLSGD_L_ADDR until after register assignment.
+      GET_TLS_ADDR,
+
+      /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
+      /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
+      /// register assignment.
+      ADDI_TLSGD_L_ADDR,
+
       /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
       /// model, produces an ADDIS8 instruction that adds the GOT base
       /// register to sym\@got\@tlsld\@ha.
       ADDIS_TLSLD_HA,
 
-      /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+      /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
       /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym\@got\@tlsld\@l.
+      /// sym\@got\@tlsld\@l and stores the result in X3.  Hidden by
+      /// ADDIS_TLSLD_L_ADDR until after register assignment.
       ADDI_TLSLD_L,
 
-      /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
-      /// local-dynamic TLS model, produces an ADDIS8 instruction
-      /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
-      /// to tie this in place following a copy to %X3 from the result
-      /// of a GET_TLSLD_ADDR.
+      /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
+      /// model, produces a call to __tls_get_addr(sym\@tlsld).  Hidden by
+      /// ADDIS_TLSLD_L_ADDR until after register assignment.
+      GET_TLSLD_ADDR,
+
+      /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
+      /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
+      /// following register assignment.
+      ADDI_TLSLD_L_ADDR,
+
+      /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS
+      /// model, produces an ADDIS8 instruction that adds X3 to
+      /// sym\@dtprel\@ha.
       ADDIS_DTPREL_HA,
 
       /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
@@ -267,6 +275,16 @@ namespace llvm {
       /// operand identifies the operating system entry point.
       SC,
 
+      /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
+      CLRBHRB,
+
+      /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
+      /// history rolling buffer entry.
+      MFBHRBE,
+
+      /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
+      RFEBB,
+
       /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
       /// endian.  Maps to an xxswapd instruction that corrects an lxvd2x
       /// or stxvd2x instruction.  The chain is necessary because the
@@ -274,6 +292,22 @@ namespace llvm {
       /// of outputs.
       XXSWAPD,
 
+      /// QVFPERM = This corresponds to the QPX qvfperm instruction.
+      QVFPERM,
+
+      /// QVGPCI = This corresponds to the QPX qvgpci instruction.
+      QVGPCI,
+
+      /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
+      QVALIGNI,
+
+      /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
+      QVESPLATI,
+
+      /// QBFLT = Access the underlying QPX floating-point boolean
+      /// representation.
+      QBFLT,
+
       /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
       /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
       /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
@@ -300,21 +334,6 @@ namespace llvm {
       /// destination 64-bit register.
       LFIWZX,
 
-      /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
-      /// produces an ADDIS8 instruction that adds the TOC base register to
-      /// sym\@toc\@ha.
-      ADDIS_TOC_HA,
-
-      /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model,
-      /// produces a LD instruction with base register G8RReg and offset
-      /// sym\@toc\@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
-      LD_TOC_L,
-
-      /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
-      /// an ADDI8 instruction that adds G8RReg to sym\@toc\@l.
-      /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
-      ADDI_TOC_L,
-
       /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
       /// Maps directly to an lxvd2x instruction that will be followed by
       /// an xxswapd.
@@ -323,7 +342,16 @@ namespace llvm {
       /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
       /// Maps directly to an stxvd2x instruction that will be preceded by
       /// an xxswapd.
-      STXVD2X
+      STXVD2X,
+
+      /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
+      /// The 4xf32 load used for v4i1 constants.
+      QVLFSb,
+
+      /// GPRC = TOC_ENTRY GA, TOC
+      /// Loads the entry for GA from the TOC, where the TOC base is given by
+      /// the last operand.
+      TOC_ENTRY
     };
   }
 
@@ -339,6 +367,11 @@ namespace llvm {
     bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                               SelectionDAG &DAG);
 
+    /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
+    /// VPKUDUM instruction.
+    bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+                              SelectionDAG &DAG);
+
     /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
     /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
     bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
@@ -359,10 +392,6 @@ namespace llvm {
     /// VSPLTB/VSPLTH/VSPLTW.
     bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
 
-    /// isAllNegativeZeroVector - Returns true if all elements of build_vector
-    /// are -0.0.
-    bool isAllNegativeZeroVector(SDNode *N);
-
     /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
     /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
     unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
@@ -372,14 +401,18 @@ namespace llvm {
     /// size, return the constant being splatted.  The ByteSize field indicates
     /// the number of bytes of each element [124] -> [bhw].
     SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+
+    /// If this is a qvaligni shuffle mask, return the shift
+    /// amount, otherwise return -1.
+    int isQVALIGNIShuffleMask(SDNode *N);
   }
 
-  class PPCSubtarget;
   class PPCTargetLowering : public TargetLowering {
     const PPCSubtarget &Subtarget;
 
   public:
-    explicit PPCTargetLowering(const PPCTargetMachine &TM);
+    explicit PPCTargetLowering(const PPCTargetMachine &TM,
+                               const PPCSubtarget &STI);
 
     /// getTargetNodeName() - This method returns the name of a target specific
     /// DAG node.
@@ -468,7 +501,8 @@ namespace llvm {
       EmitInstrWithCustomInserter(MachineInstr *MI,
                                   MachineBasicBlock *MBB) const override;
     MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
-                                        MachineBasicBlock *MBB, bool is64Bit,
+                                        MachineBasicBlock *MBB,
+                                        unsigned AtomicSize,
                                         unsigned BinOpcode) const;
     MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
                                                 MachineBasicBlock *MBB,
@@ -488,9 +522,10 @@ namespace llvm {
     ConstraintWeight getSingleConstraintMatchWeight(
       AsmOperandInfo &info, const char *constraint) const override;
 
-    std::pair<unsigned, const TargetRegisterClass*>
-      getRegForInlineAsmConstraint(const std::string &Constraint,
-                                   MVT VT) const override;
+    std::pair<unsigned, const TargetRegisterClass *>
+    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                 const std::string &Constraint,
+                                 MVT VT) const override;
 
     /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
     /// function arguments in the caller parameter area.  This is the actual
@@ -504,6 +539,21 @@ namespace llvm {
                                       std::vector<SDValue> &Ops,
                                       SelectionDAG &DAG) const override;
 
+    unsigned getInlineAsmMemConstraint(
+        const std::string &ConstraintCode) const override {
+      if (ConstraintCode == "es")
+        return InlineAsm::Constraint_es;
+      else if (ConstraintCode == "o")
+        return InlineAsm::Constraint_o;
+      else if (ConstraintCode == "Q")
+        return InlineAsm::Constraint_Q;
+      else if (ConstraintCode == "Z")
+        return InlineAsm::Constraint_Z;
+      else if (ConstraintCode == "Zy")
+        return InlineAsm::Constraint_Zy;
+      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+    }
+
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
     bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
@@ -619,6 +669,10 @@ namespace llvm {
 
     void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
                                 SelectionDAG &DAG, SDLoc dl) const;
+    SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
+                                     SDLoc dl) const;
+    SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
+                                     SDLoc dl) const;
 
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
     SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -642,8 +696,6 @@ namespace llvm {
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
-    std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl,
-                                            SelectionDAG &DAG) const;
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
@@ -672,11 +724,15 @@ namespace llvm {
     SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
 
+    SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
+
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -687,11 +743,12 @@ namespace llvm {
                        SelectionDAG &DAG,
                        SmallVector<std::pair<unsigned, SDValue>, 8>
                          &RegsToPass,
-                       SDValue InFlag, SDValue Chain,
+                       SDValue InFlag, SDValue Chain, SDValue CallSeqStart,
                        SDValue &Callee,
                        int SPDiff, unsigned NumBytes,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
-                       SmallVectorImpl<SDValue> &InVals) const;
+                       SmallVectorImpl<SDValue> &InVals,
+                       ImmutableCallSite *CS) const;
 
     SDValue
       LowerFormalArguments(SDValue Chain,
@@ -753,7 +810,8 @@ namespace llvm {
                        const SmallVectorImpl<SDValue> &OutVals,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
                        SDLoc dl, SelectionDAG &DAG,
-                       SmallVectorImpl<SDValue> &InVals) const;
+                       SmallVectorImpl<SDValue> &InVals,
+                       ImmutableCallSite *CS) const;
     SDValue
       LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                        CallingConv::ID CallConv,
@@ -762,7 +820,8 @@ namespace llvm {
                        const SmallVectorImpl<SDValue> &OutVals,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
                        SDLoc dl, SelectionDAG &DAG,
-                       SmallVectorImpl<SDValue> &InVals) const;
+                       SmallVectorImpl<SDValue> &InVals,
+                       ImmutableCallSite *CS) const;
     SDValue
     LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
                      bool isVarArg, bool isTailCall, bool IsPatchPoint,
@@ -770,7 +829,8 @@ namespace llvm {
                      const SmallVectorImpl<SDValue> &OutVals,
                      const SmallVectorImpl<ISD::InputArg> &Ins,
                      SDLoc dl, SelectionDAG &DAG,
-                     SmallVectorImpl<SDValue> &InVals) const;
+                     SmallVectorImpl<SDValue> &InVals,
+                     ImmutableCallSite *CS) const;
 
     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 5c6675d..d628330 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -56,22 +56,23 @@ def tlscall : Operand<i64> {
 
 def SHL64 : SDNodeXForm<imm, [{
   // Transformation function: 63 - imm
-  return getI32Imm(63 - N->getZExtValue());
+  return getI32Imm(63 - N->getZExtValue(), SDLoc(N));
 }]>;
 
 def SRL64 : SDNodeXForm<imm, [{
   // Transformation function: 64 - imm
-  return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue()) : getI32Imm(0);
+  return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue(), SDLoc(N))
+                           : getI32Imm(0, SDLoc(N));
 }]>;
 
 def HI32_48 : SDNodeXForm<imm, [{
   // Transformation function: shift the immediate value down into the low bits.
-  return getI32Imm((unsigned short)(N->getZExtValue() >> 32));
+  return getI32Imm((unsigned short)(N->getZExtValue() >> 32, SDLoc(N)));
 }]>;
 
 def HI48_64 : SDNodeXForm<imm, [{
   // Transformation function: shift the immediate value down into the low bits.
-  return getI32Imm((unsigned short)(N->getZExtValue() >> 48));
+  return getI32Imm((unsigned short)(N->getZExtValue() >> 48, SDLoc(N)));
 }]>;
 
 
@@ -202,9 +203,6 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
 def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
           (BL8_NOP texternalsym:$dst)>;
 
-def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym),
-          (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
-
 // Atomic operations
 let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
@@ -238,15 +236,19 @@ let usesCustomInserter = 1 in {
 }
 
 // Instructions to support atomic operations
+let mayLoad = 1, hasSideEffects = 0 in {
 def LDARX : XForm_1<31,  84, (outs g8rc:$rD), (ins memrr:$ptr),
-                   "ldarx $rD, $ptr", IIC_LdStLDARX,
-                   [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
+                    "ldarx $rD, $ptr", IIC_LdStLDARX, []>;
 
-let Defs = [CR0] in
+// Instruction to support lock versions of atomics
+// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
+def LDARXL : XForm_1<31,  84, (outs g8rc:$rD), (ins memrr:$ptr),
+                     "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT;
+}
+
+let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in
 def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
-                   "stdcx. $rS, $dst", IIC_LdStSTDCX,
-                   [(PPCstcx i64:$rS, xoaddr:$dst)]>,
-                   isDOT;
+                    "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT;
 
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
@@ -328,6 +330,12 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
                           Requires<[In64BitMode]>;
 }
 
+def MFSPR8 : XFXForm_1<31, 339, (outs g8rc:$RT), (ins i32imm:$SPR),
+                       "mfspr $RT, $SPR", IIC_SprMFSPR>;
+def MTSPR8 : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, g8rc:$RT),
+                       "mtspr $SPR, $RT", IIC_SprMTSPR>;
+
+
 //===----------------------------------------------------------------------===//
 // 64-bit SPR manipulation instrs.
 
@@ -596,6 +604,10 @@ defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
 def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
                        "popcntd $rA, $rS", IIC_IntGeneral,
                        [(set i64:$rA, (ctpop i64:$rS))]>;
+def BPERMD : XForm_6<31, 252, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "bpermd $rA, $rS, $rB", IIC_IntGeneral,
+                     [(set i64:$rA, (int_ppc_bpermd g8rc:$rS, g8rc:$rB))]>,
+                     isPPC64, Requires<[HasBPERMD]>;
 
 let isCodeGenOnly = 1, isCommutable = 1 in
 def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
@@ -609,14 +621,30 @@ def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
                        "popcntw $rA, $rS", IIC_IntGeneral,
                        [(set i32:$rA, (ctpop i32:$rS))]>;
 
-defm DIVD  : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
-                       "divd", "$rT, $rA, $rB", IIC_IntDivD,
-                       [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
-                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
-defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
-                       "divdu", "$rT, $rA, $rB", IIC_IntDivD,
-                       [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
-                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVD  : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                          "divd", "$rT, $rA, $rB", IIC_IntDivD,
+                          [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64;
+defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                          "divdu", "$rT, $rA, $rB", IIC_IntDivD,
+                          [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64;
+def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                     "divde $rT, $rA, $rB", IIC_IntDivD,
+                     [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
+                     isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                      "divde. $rT, $rA, $rB", IIC_IntDivD,
+                      []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+                      isPPC64, Requires<[HasExtDiv]>;
+def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                      "divdeu $rT, $rA, $rB", IIC_IntDivD,
+                      [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
+                      isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "divdeu. $rT, $rA, $rB", IIC_IntDivD,
+                       []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+                        isPPC64, Requires<[HasExtDiv]>;
 let isCommutable = 1 in
 defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
                        "mulld", "$rT, $rA, $rB", IIC_IntMulHD,
@@ -686,7 +714,7 @@ defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA),
 let isSelect = 1 in
 def ISEL8   : AForm_4<31, 15,
                      (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond),
-                     "isel $rT, $rA, $rB, $cond", IIC_IntGeneral,
+                     "isel $rT, $rA, $rB, $cond", IIC_IntISEL,
                      []>;
 }  // Interpretation64Bit
 }  // hasSideEffects = 0
@@ -699,7 +727,7 @@ def ISEL8   : AForm_4<31, 15,
 
 
 // Sign extending loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in
 def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
                   "lha $rD, $src", IIC_LdStLHA,
@@ -755,7 +783,7 @@ def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
 
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
 // Zero extending loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
 def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
                   "lbz $rD, $src", IIC_LdStLoad,
                   [(set i64:$rD, (zextloadi8 iaddr:$src))]>;
@@ -813,7 +841,7 @@ def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
 
 
 // Full 8-byte loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
 def LD   : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
                     "ld $rD, $src", IIC_LdStLD,
                     [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
@@ -837,11 +865,6 @@ def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   [(set i64:$rD,
                      (PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;
 
-let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2, Defs = [X2] in
-def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
-                    "ld 2, $src", IIC_LdStLD,
-                    [(PPCload_toc ixaddr:$src)]>, isPPC64;
-
 def LDX  : XForm_1<31,  21, (outs g8rc:$rD), (ins memrr:$src),
                    "ldx $rD, $src", IIC_LdStLD,
                    [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
@@ -870,25 +893,16 @@ def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
 }
 }
 
-def : Pat<(PPCload ixaddr:$src),
-          (LD ixaddr:$src)>;
-def : Pat<(PPCload xaddr:$src),
-          (LDX xaddr:$src)>;
-
 // Support for medium and large code model.
+let hasSideEffects = 0 in {
 def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
-                       "#ADDIStocHA",
-                       [(set i64:$rD,
-                         (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>,
-                       isPPC64;
+                       "#ADDIStocHA", []>, isPPC64;
+let mayLoad = 1 in
 def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
-                   "#LDtocL",
-                   [(set i64:$rD,
-                     (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+                   "#LDtocL", []>, isPPC64;
 def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
-                     "#ADDItocL",
-                     [(set i64:$rD,
-                       (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
+                     "#ADDItocL", []>, isPPC64;
+}
 
 // Support for thread-local storage.
 def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
@@ -914,6 +928,28 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                        [(set i64:$rD,
                          (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
                  isPPC64;
+// LR8 is a true define, while the rest of the Defs are clobbers.  X3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+    Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+                        "#GETtlsADDR",
+                        [(set i64:$rD,
+                          (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+                 isPPC64;
+// Combined op for ADDItlsgdL and GETtlsADDR, late expanded.  X3 and LR8
+// are true defines while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+    Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
+    in
+def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
+                            (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
+                            "#ADDItlsgdLADDR",
+                            [(set i64:$rD,
+                              (PPCaddiTlsgdLAddr i64:$reg,
+                                                 tglobaltlsaddr:$disp,
+                                                 tglobaltlsaddr:$sym))]>,
+                     isPPC64;
 def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                          "#ADDIStlsldHA",
                          [(set i64:$rD,
@@ -924,6 +960,28 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                        [(set i64:$rD,
                          (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
                  isPPC64;
+// LR8 is a true define, while the rest of the Defs are clobbers.  X3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+    Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+                          "#GETtlsldADDR",
+                          [(set i64:$rD,
+                            (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+                   isPPC64;
+// Combined op for ADDItlsldL and GETtlsADDR, late expanded.  X3 and LR8
+// are true defines, while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+    Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
+    in
+def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
+                            (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
+                            "#ADDItlsldLADDR",
+                            [(set i64:$rD,
+                              (PPCaddiTlsldLAddr i64:$reg,
+                                                 tglobaltlsaddr:$disp,
+                                                 tglobaltlsaddr:$sym))]>,
+                     isPPC64;
 def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                           "#ADDISdtprelHA",
                           [(set i64:$rD,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 4ef08eb..e27bf7f5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -11,6 +11,21 @@
 //
 //===----------------------------------------------------------------------===//
 
+// *********************************** NOTE ***********************************
+// ** For POWER8 Little Endian, the VSX swap optimization relies on knowing  **
+// ** which VMX and VSX instructions are lane-sensitive and which are not.   **
+// ** A lane-sensitive instruction relies, implicitly or explicitly, on      **
+// ** whether lanes are numbered from left to right.  An instruction like    **
+// ** VADDFP is not lane-sensitive, because each lane of the result vector   **
+// ** relies only on the corresponding lane of the source vectors.  However, **
+// ** an instruction like VMULESB is lane-sensitive, because "even" and      **
+// ** "odd" lanes are different for big-endian and little-endian numbering.  **
+// **                                                                        **
+// ** When adding new VMX and VSX instructions, please consider whether they **
+// ** are lane-sensitive.  If so, they must be added to a switch statement   **
+// ** in PPCVSXSwapRemoval::gatherVectorInstructions().                      **
+// ****************************************************************************
+
 //===----------------------------------------------------------------------===//
 // Altivec transformation functions and pattern fragments.
 //
@@ -28,6 +43,10 @@ def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                               (vector_shuffle node:$lhs, node:$rhs), [{
   return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
 }]>;
+def vpkudum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                              (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVPKUDUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
+}]>;
 def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                     (vector_shuffle node:$lhs, node:$rhs), [{
   return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
@@ -36,6 +55,10 @@ def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                     (vector_shuffle node:$lhs, node:$rhs), [{
   return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
 }]>;
+def vpkudum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                    (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVPKUDUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
+}]>;
 
 // These fragments are provided for little-endian, where the inputs must be
 // swapped for correct semantics.
@@ -47,6 +70,10 @@ def vpkuwum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                       (vector_shuffle node:$lhs, node:$rhs), [{
   return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
 }]>;
+def vpkudum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVPKUDUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
 
 def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                              (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
@@ -129,7 +156,7 @@ def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
 
 
 def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
-  return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG));
+  return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG), SDLoc(N));
 }]>;
 def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                              (vector_shuffle node:$lhs, node:$rhs), [{
@@ -140,7 +167,7 @@ def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
 /// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
 /// vector_shuffle(X,undef,mask) by the dag combiner.
 def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
-  return getI32Imm(PPC::isVSLDOIShuffleMask(N, 1, *CurDAG));
+  return getI32Imm(PPC::isVSLDOIShuffleMask(N, 1, *CurDAG), SDLoc(N));
 }]>;
 def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                    (vector_shuffle node:$lhs, node:$rhs), [{
@@ -151,7 +178,7 @@ def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
 /// VSLDOI_swapped* - These fragments are provided for little-endian, where
 /// the inputs must be swapped for correct semantics.
 def VSLDOI_swapped_get_imm : SDNodeXForm<vector_shuffle, [{
-  return getI32Imm(PPC::isVSLDOIShuffleMask(N, 2, *CurDAG));
+  return getI32Imm(PPC::isVSLDOIShuffleMask(N, 2, *CurDAG), SDLoc(N));
 }]>;
 def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                      (vector_shuffle node:$lhs, node:$rhs), [{
@@ -161,21 +188,21 @@ def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
 
 // VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
 def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
-  return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG));
+  return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG), SDLoc(N));
 }]>;
 def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                              (vector_shuffle node:$lhs, node:$rhs), [{
   return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
 }], VSPLTB_get_imm>;
 def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
-  return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG));
+  return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG), SDLoc(N));
 }]>;
 def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                              (vector_shuffle node:$lhs, node:$rhs), [{
   return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
 }], VSPLTH_get_imm>;
 def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
-  return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG));
+  return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG), SDLoc(N));
 }]>;
 def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                              (vector_shuffle node:$lhs, node:$rhs), [{
@@ -269,6 +296,16 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
              !strconcat(opc, " $vD, $vB"), IIC_VecFP,
              [(set OutTy:$vD, (IntID InTy:$vB))]>;
 
+class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+  : VXForm_BX<xo, (outs vrrc:$vD), (ins vrrc:$vA),
+             !strconcat(opc, " $vD, $vA"), IIC_VecFP,
+             [(set Ty:$vD, (IntID Ty:$vA))]>;
+
+class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+  : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
+              !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
+              [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
+
 //===----------------------------------------------------------------------===//
 // Instruction Definitions.
 
@@ -342,7 +379,7 @@ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
                       "mtvscr $vB", IIC_LdStLoad,
                       [(int_ppc_altivec_mtvscr v4i32:$vB)]>; 
 
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {  // Loads.
+let PPC970_Unit = 2 in {  // Loads.
 def LVEBX: XForm_1<31,   7, (outs vrrc:$vD), (ins memrr:$src),
                    "lvebx $vD, $src", IIC_LdStLoad,
                    [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
@@ -750,7 +787,7 @@ def VCMPGTSW  : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
 def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
 def VCMPGTUW  : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
 def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
-                      
+
 let isCodeGenOnly = 1 in {
 def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
                       "vxor $vD, $vD, $vD", IIC_VecFP,
@@ -791,18 +828,38 @@ def : Pat<(store v4i32:$rS, xoaddr:$dst),
 def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
 def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
 def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v1i128 VRRC:$src))), (v16i8 VRRC:$src)>;
 
 def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
 def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
 def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v1i128 VRRC:$src))), (v8i16 VRRC:$src)>;
 
 def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
 def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
 def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v1i128 VRRC:$src))), (v4i32 VRRC:$src)>;
 
 def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
 def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
 def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v1i128 VRRC:$src))), (v4f32 VRRC:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v16i8 VRRC:$src))), (v2i64 VRRC:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VRRC:$src))), (v2i64 VRRC:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VRRC:$src))), (v2i64 VRRC:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VRRC:$src))), (v2i64 VRRC:$src)>;
+def : Pat<(v2i64 (bitconvert (v1i128 VRRC:$src))), (v2i64 VRRC:$src)>;
+
+def : Pat<(v1i128 (bitconvert (v16i8 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(v1i128 (bitconvert (v8i16 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>;
 
 // Shuffles.
 
@@ -929,3 +986,178 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
 
 } // end HasAltivec
 
+def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
+def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">;
+let Predicates = [HasP8Altivec] in {
+
+let isCommutable = 1 in {
+def VMULESW : VX1_Int_Ty2<904, "vmulesw", int_ppc_altivec_vmulesw,
+                          v2i64, v4i32>;
+def VMULEUW : VX1_Int_Ty2<648, "vmuleuw", int_ppc_altivec_vmuleuw,
+                          v2i64, v4i32>;
+def VMULOSW : VX1_Int_Ty2<392, "vmulosw", int_ppc_altivec_vmulosw,
+                          v2i64, v4i32>;
+def VMULOUW : VX1_Int_Ty2<136, "vmulouw", int_ppc_altivec_vmulouw,
+                          v2i64, v4i32>;
+def VMULUWM : VXForm_1<137, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                       "vmuluwm $vD, $vA, $vB", IIC_VecGeneral,
+                       [(set v4i32:$vD, (mul v4i32:$vA, v4i32:$vB))]>;
+def VMAXSD : VX1_Int_Ty<450, "vmaxsd", int_ppc_altivec_vmaxsd, v2i64>;
+def VMAXUD : VX1_Int_Ty<194, "vmaxud", int_ppc_altivec_vmaxud, v2i64>;
+def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
+def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
+} // isCommutable
+
+// Vector shifts
+def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
+def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                    "vsld $vD, $vA, $vB", IIC_VecGeneral,
+                    [(set v2i64:$vD, (shl v2i64:$vA, v2i64:$vB))]>;
+def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                   "vsrd $vD, $vA, $vB", IIC_VecGeneral,
+                   [(set v2i64:$vD, (srl v2i64:$vA, v2i64:$vB))]>;
+def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                    "vsrad $vD, $vA, $vB", IIC_VecGeneral,
+                    [(set v2i64:$vD, (sra v2i64:$vA, v2i64:$vB))]>;
+
+// Vector Integer Arithmetic Instructions
+let isCommutable = 1 in {
+def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                       "vaddudm $vD, $vA, $vB", IIC_VecGeneral,
+                       [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>;
+def VADDUQM : VXForm_1<256, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                       "vadduqm $vD, $vA, $vB", IIC_VecGeneral,
+                       [(set v1i128:$vD, (add v1i128:$vA, v1i128:$vB))]>;
+} // isCommutable
+
+// Vector Quadword Add
+def VADDEUQM : VA1a_Int_Ty<60, "vaddeuqm", int_ppc_altivec_vaddeuqm, v1i128>;
+def VADDCUQ  : VX1_Int_Ty<320, "vaddcuq", int_ppc_altivec_vaddcuq, v1i128>;
+def VADDECUQ : VA1a_Int_Ty<61, "vaddecuq", int_ppc_altivec_vaddecuq, v1i128>;
+
+// Vector Doubleword Subtract
+def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                       "vsubudm $vD, $vA, $vB", IIC_VecGeneral,
+                       [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>;
+
+// Vector Quadword Subtract
+def VSUBUQM : VXForm_1<1280, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                       "vsubuqm $vD, $vA, $vB", IIC_VecGeneral,
+                       [(set v1i128:$vD, (sub v1i128:$vA, v1i128:$vB))]>;
+def VSUBEUQM : VA1a_Int_Ty<62, "vsubeuqm", int_ppc_altivec_vsubeuqm, v1i128>;
+def VSUBCUQ  : VX1_Int_Ty<1344, "vsubcuq", int_ppc_altivec_vsubcuq, v1i128>;
+def VSUBECUQ : VA1a_Int_Ty<63, "vsubecuq", int_ppc_altivec_vsubecuq, v1i128>;
+
+// Count Leading Zeros
+def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB),
+                     "vclzb $vD, $vB", IIC_VecGeneral,
+                     [(set v16i8:$vD, (ctlz v16i8:$vB))]>;
+def VCLZH : VXForm_2<1858, (outs vrrc:$vD), (ins vrrc:$vB),
+                     "vclzh $vD, $vB", IIC_VecGeneral,
+                     [(set v8i16:$vD, (ctlz v8i16:$vB))]>;
+def VCLZW : VXForm_2<1922, (outs vrrc:$vD), (ins vrrc:$vB),
+                     "vclzw $vD, $vB", IIC_VecGeneral,
+                     [(set v4i32:$vD, (ctlz v4i32:$vB))]>;
+def VCLZD : VXForm_2<1986, (outs vrrc:$vD), (ins vrrc:$vB),
+                     "vclzd $vD, $vB", IIC_VecGeneral,
+                     [(set v2i64:$vD, (ctlz v2i64:$vB))]>;
+
+// Population Count
+def VPOPCNTB : VXForm_2<1795, (outs vrrc:$vD), (ins vrrc:$vB),
+                        "vpopcntb $vD, $vB", IIC_VecGeneral,
+                        [(set v16i8:$vD, (ctpop v16i8:$vB))]>;
+def VPOPCNTH : VXForm_2<1859, (outs vrrc:$vD), (ins vrrc:$vB),
+                        "vpopcnth $vD, $vB", IIC_VecGeneral,
+                        [(set v8i16:$vD, (ctpop v8i16:$vB))]>;
+def VPOPCNTW : VXForm_2<1923, (outs vrrc:$vD), (ins vrrc:$vB),
+                        "vpopcntw $vD, $vB", IIC_VecGeneral,
+                        [(set v4i32:$vD, (ctpop v4i32:$vB))]>;
+def VPOPCNTD : VXForm_2<1987, (outs vrrc:$vD), (ins vrrc:$vB),
+                        "vpopcntd $vD, $vB", IIC_VecGeneral,
+                        [(set v2i64:$vD, (ctpop v2i64:$vB))]>;
+
+let isCommutable = 1 in {
+// FIXME: Use AddedComplexity > 400 to ensure these patterns match before the 
+//        VSX equivalents. We need to fix this up at some point. Two possible
+//        solutions for this problem:
+//        1. Disable Altivec patterns that compete with VSX patterns using the
+//           !HasVSX predicate. This essentially favours VSX over Altivec, in 
+//           hopes of reducing register pressure (larger register set using VSX 
+//           instructions than VMX instructions)
+//        2. Employ a more disciplined use of AddedComplexity, which would provide
+//           more fine-grained control than option 1. This would be beneficial
+//           if we find situations where Altivec is really preferred over VSX. 
+def VEQV  : VXForm_1<1668, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                     "veqv $vD, $vA, $vB", IIC_VecGeneral,
+                     [(set v4i32:$vD, (vnot_ppc (xor v4i32:$vA, v4i32:$vB)))]>;
+def VNAND : VXForm_1<1412, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                     "vnand $vD, $vA, $vB", IIC_VecGeneral,
+                     [(set v4i32:$vD, (vnot_ppc (and v4i32:$vA, v4i32:$vB)))]>;
+} // isCommutable
+
+def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                      "vorc $vD, $vA, $vB", IIC_VecGeneral,
+                      [(set v4i32:$vD, (or v4i32:$vA,
+                                           (vnot_ppc v4i32:$vB)))]>;
+
+// i64 element comparisons.
+def VCMPEQUD  : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
+def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
+def VCMPGTSD  : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
+def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
+def VCMPGTUD  : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
+def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
+
+// The cryptography instructions that do not require Category:Vector.Crypto
+def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
+                         int_ppc_altivec_crypto_vpmsumb, v16i8>;
+def VPMSUMH : VX1_Int_Ty<1096, "vpmsumh",
+                         int_ppc_altivec_crypto_vpmsumh, v8i16>;
+def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
+                         int_ppc_altivec_crypto_vpmsumw, v4i32>;
+def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
+                         int_ppc_altivec_crypto_vpmsumd, v2i64>;
+def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
+                         int_ppc_altivec_crypto_vpermxor, v16i8>;
+
+// Vector doubleword integer pack and unpack.
+def VPKSDSS : VX1_Int_Ty2<1486, "vpksdss", int_ppc_altivec_vpksdss,
+                          v4i32, v2i64>;
+def VPKSDUS : VX1_Int_Ty2<1358, "vpksdus", int_ppc_altivec_vpksdus,
+                          v4i32, v2i64>;
+def VPKUDUM : VXForm_1<1102, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                       "vpkudum $vD, $vA, $vB", IIC_VecFP,
+                       [(set v16i8:$vD,
+                         (vpkudum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus,
+                          v4i32, v2i64>;
+def VUPKHSW : VX2_Int_Ty2<1614, "vupkhsw", int_ppc_altivec_vupkhsw,
+                          v2i64, v4i32>;
+def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw,
+                          v2i64, v4i32>;
+
+// Shuffle patterns for unary and swapped (LE) vector pack modulo.
+def:Pat<(vpkudum_unary_shuffle v16i8:$vA, undef),
+        (VPKUDUM $vA, $vA)>;
+def:Pat<(vpkudum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+        (VPKUDUM $vB, $vA)>;
+
+
+} // end HasP8Altivec
+
+// Crypto instructions (from builtins)
+let Predicates = [HasP8Crypto] in {
+def VSHASIGMAW : VXCR_Int_Ty<1666, "vshasigmaw",
+                              int_ppc_altivec_crypto_vshasigmaw, v4i32>;
+def VSHASIGMAD : VXCR_Int_Ty<1730, "vshasigmad",
+                              int_ppc_altivec_crypto_vshasigmad, v2i64>;
+def VCIPHER : VX1_Int_Ty<1288, "vcipher", int_ppc_altivec_crypto_vcipher,
+                         v2i64>;
+def VCIPHERLAST : VX1_Int_Ty<1289, "vcipherlast",
+                              int_ppc_altivec_crypto_vcipherlast, v2i64>;
+def VNCIPHER : VX1_Int_Ty<1352, "vncipher",
+                          int_ppc_altivec_crypto_vncipher, v2i64>;
+def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast",
+                              int_ppc_altivec_crypto_vncipherlast, v2i64>;
+def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
+} // HasP8Crypto
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 186d5dc..4e03ed2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -562,6 +562,47 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = 0;
 }
 
+// Used for QPX
+class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> FRT;
+  bits<5> FRA;
+  bits<5> FRB;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = FRT;
+  let Inst{11-15} = FRA;
+  let Inst{16-20} = FRB;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XForm_19<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern> 
+  : XForm_18<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let FRA = 0;
+}
+
+class XForm_20<bits<6> opcode, bits<6> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> FRT;
+  bits<5> FRA;
+  bits<5> FRB;
+  bits<4> tttt;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = FRT;
+  let Inst{11-15} = FRA;
+  let Inst{16-20} = FRB;
+  let Inst{21-24} = tttt;
+  let Inst{25-30} = xo;
+  let Inst{31}    = 0;
+}
+
 class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
                InstrItinClass itin, list<dag> pattern> 
   : I<opcode, OOL, IOL, asmstr, itin> {
@@ -652,6 +693,60 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let A = 0;
 }
 
+class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                 string asmstr, InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bit R;
+
+  bit RC = 1;
+
+  let Inst{6-9}   = 0;
+  let Inst{10}    = R;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                 string asmstr, InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bit A;
+
+  bit RC = 1;
+
+  let Inst{6}     = A;
+  let Inst{7-20}  = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bit L;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{7-9}   = 0;
+  let Inst{10}    = L;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<3> BF;
+
+  bit RC = 0;
+
+  let Inst{6-8}   = BF;
+  let Inst{9-20}  = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
 // XX*-Form (VSX)
 class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
               InstrItinClass itin, list<dag> pattern>
@@ -669,6 +764,12 @@ class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = XT{5};
 }
 
+class XX1_RS6_RD5_XO<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                     string asmstr, InstrItinClass itin, list<dag> pattern>
+  : XX1Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let B = 0;
+}
+
 class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, 
               InstrItinClass itin, list<dag> pattern>
   : I<opcode, OOL, IOL, asmstr, itin> {
@@ -835,6 +936,21 @@ class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = 0;
 }
 
+class DCB_Form_hint<bits<10> xo, dag OOL, dag IOL, string asmstr,
+                    InstrItinClass itin, list<dag> pattern>
+  : I<31, OOL, IOL, asmstr, itin> {
+  bits<5> TH;
+  bits<5> A;
+  bits<5> B;
+
+  let Pattern = pattern;
+
+  let Inst{6-10}  = TH;
+  let Inst{11-15} = A;
+  let Inst{16-20} = B;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
 
 // DSS_Form - Form X instruction, used for altivec dss* instructions.
 class DSS_Form<bits<1> T, bits<10> xo, dag OOL, dag IOL, string asmstr,
@@ -945,6 +1061,38 @@ class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = 0;
 }
 
+class XLForm_4<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<3> BF;
+  bit W;
+  bits<4> U;
+  
+  bit RC = 0;
+  
+  let Inst{6-8}   = BF;
+  let Inst{9-10}  = 0;
+  let Inst{11-14} = 0;
+  let Inst{15}    = W;
+  let Inst{16-19} = U;
+  let Inst{20}    = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+class XLForm_S<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<1> S;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-19}  = 0;
+  let Inst{20}    = S;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
 class XLForm_2_and_DSForm_1<bits<6> opcode1, bits<10> xo1, bit lk,
                             bits<6> opcode2, bits<2> xo2,
                             dag OOL, dag IOL, string asmstr,
@@ -1023,6 +1171,19 @@ class XFXForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = 0;
 }
 
+class XFXForm_3p<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                 InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  RT;
+  bits<10> Entry;
+  let Pattern = pattern;
+
+  let Inst{6-10}  = RT;
+  let Inst{11-20} = Entry;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
 class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
                 InstrItinClass itin> 
   : I<opcode, OOL, IOL, asmstr, itin> {
@@ -1081,6 +1242,25 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = RC;
 }
 
+class XFLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin, list<dag>pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bit L;
+  bits<8> FLM;
+  bit W;
+  bits<5> FRB;
+
+  bit RC = 0;    // set by isDOT
+  let Pattern = pattern;
+
+  let Inst{6}     = L;
+  let Inst{7-14}  = FLM;
+  let Inst{15}    = W;
+  let Inst{16-20} = FRB;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
 // 1.7.10 XS-Form - SRADI.
 class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
                InstrItinClass itin, list<dag> pattern>
@@ -1177,6 +1357,14 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = 0;
 }
 
+// Used for QPX
+class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let FRA = 0;
+  let FRC = 0;
+}
+
 // 1.7.13 M-Form
 class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern>
@@ -1383,6 +1571,39 @@ class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
   let Inst{21-31} = xo;
 }
 
+/// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX"
+class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  bits<1> ST;
+  bits<4> SIX;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16} =  ST;
+  let Inst{17-20} = SIX;
+  let Inst{21-31} = xo;
+}
+
+/// VXForm_BX - VX crypto instructions with "VRT, VRA, 0 - like vsbox"
+class VXForm_BX<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = 0;
+  let Inst{21-31} = xo;
+}
+
 // E-4 VXR-Form
 class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
                InstrItinClass itin, list<dag> pattern>
@@ -1401,6 +1622,49 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Inst{22-31} = xo;
 }
 
+// Z23-Form (used by QPX)
+class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, 
+              InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> FRT;
+  bits<5> FRA;
+  bits<5> FRB;
+  bits<2> idx;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = FRT;
+  let Inst{11-15} = FRA;
+  let Inst{16-20} = FRB;
+  let Inst{21-22} = idx;
+  let Inst{23-30} = xo;
+  let Inst{31}    = RC;
+}
+
+class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let FRB = 0;
+}
+
+class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, 
+              InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> FRT;
+  bits<12> idx;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = FRT;
+  let Inst{11-22} = idx;
+  let Inst{23-30} = xo;
+  let Inst{31}    = RC;
+}
+
 //===----------------------------------------------------------------------===//
 class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
     : I<0, OOL, IOL, asmstr, NoItinerary> {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
new file mode 100644
index 0000000..6c4e212
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -0,0 +1,172 @@
+//===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory  -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hardware Transactional Memory extension to the
+// PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+def HasHTM : Predicate<"PPCSubTarget->hasHTM()">;
+
+def HTM_get_imm : SDNodeXForm<imm, [{
+  return getI32Imm (N->getZExtValue(), SDLoc(N));
+}]>;
+
+let hasSideEffects = 1, usesCustomInserter = 1  in {
+def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+}
+
+
+let Predicates = [HasHTM] in {
+
+def TBEGIN : XForm_htm0 <31, 654,
+                         (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
+
+def TEND : XForm_htm1 <31, 686,
+                       (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
+
+def TABORT : XForm_base_r3xo <31, 910,
+                              (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
+                              []>, isDOT {
+  let RST = 0;
+  let B = 0;
+}
+
+def TABORTWC : XForm_base_r3xo <31, 782,
+                                (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+                                "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+                                isDOT;
+
+def TABORTWCI : XForm_base_r3xo <31, 846,
+                                 (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+                                 "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+                                 isDOT;
+
+def TABORTDC : XForm_base_r3xo <31, 814,
+                                (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+                                "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+                                isDOT;
+
+def TABORTDCI : XForm_base_r3xo <31, 878,
+                                 (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+                                 "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+                                 isDOT;
+
+def TSR : XForm_htm2 <31, 750,
+                      (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
+                      isDOT;
+
+def TCHECK : XForm_htm3 <31, 718,
+                        (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>;
+
+
+def TRECLAIM : XForm_base_r3xo <31, 942,
+                                (outs crrc:$ret), (ins gprc:$A), "treclaim. $A",
+                                IIC_SprMTSPR, []>,
+                                isDOT {
+  let RST = 0;
+  let B = 0;
+}
+
+def TRECHKPT : XForm_base_r3xo <31, 1006,
+                                (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>,
+                                isDOT {
+  let RST = 0;
+  let A = 0;
+  let B = 0;
+}
+
+// Builtins
+
+// All HTM instructions, with the exception of tcheck, set CR0 with the
+// value of the MSR Transaction State (TS) bits that exist before the
+// instruction is executed.  For tbegin., the EQ bit in CR0 can be used
+// to determine whether the transaction was successfully started (0) or
+// failed (1).  We use an XORI pattern to 'flip' the bit to match the
+// tbegin builtin API which defines a return value of 1 as success.
+
+def : Pat<(int_ppc_tbegin i32:$R),
+           (XORI
+             (EXTRACT_SUBREG (
+               TBEGIN (HTM_get_imm imm:$R)), sub_eq),
+            1)>;
+
+def : Pat<(int_ppc_tend i32:$R),
+          (TEND (HTM_get_imm imm:$R))>;
+
+
+def : Pat<(int_ppc_tabort i32:$R),
+          (TABORT $R)>;
+
+def : Pat<(int_ppc_tabortwc i32:$TO, i32:$RA, i32:$RB),
+          (TABORTWC (HTM_get_imm imm:$TO), $RA, $RB)>;
+
+def : Pat<(int_ppc_tabortwci i32:$TO, i32:$RA, i32:$SI),
+          (TABORTWCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>;
+
+def : Pat<(int_ppc_tabortdc i32:$TO, i32:$RA, i32:$RB),
+          (TABORTDC (HTM_get_imm imm:$TO), $RA, $RB)>;
+
+def : Pat<(int_ppc_tabortdci i32:$TO, i32:$RA, i32:$SI),
+          (TABORTDCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>;
+
+def : Pat<(int_ppc_tcheck),
+          (TCHECK_RET)>;
+
+def : Pat<(int_ppc_treclaim i32:$RA),
+          (TRECLAIM $RA)>;
+
+def : Pat<(int_ppc_trechkpt),
+          (TRECHKPT)>;
+
+def : Pat<(int_ppc_tsr i32:$L),
+          (TSR (HTM_get_imm imm:$L))>;
+
+def : Pat<(int_ppc_get_texasr),
+          (MFSPR8 130)>;
+
+def : Pat<(int_ppc_get_texasru),
+          (MFSPR8 131)>;
+
+def : Pat<(int_ppc_get_tfhar),
+          (MFSPR8 128)>;
+
+def : Pat<(int_ppc_get_tfiar),
+          (MFSPR8 129)>;
+
+
+def : Pat<(int_ppc_set_texasr i64:$V),
+          (MTSPR8 130, $V)>;
+
+def : Pat<(int_ppc_set_texasru i64:$V),
+          (MTSPR8 131, $V)>;
+
+def : Pat<(int_ppc_set_tfhar i64:$V),
+          (MTSPR8 128, $V)>;
+
+def : Pat<(int_ppc_set_tfiar i64:$V),
+          (MTSPR8 129, $V)>;
+
+
+// Extended mnemonics
+def : Pat<(int_ppc_tendall),
+          (TEND 1)>;
+
+def : Pat<(int_ppc_tresume),
+          (TSR 1)>;
+
+def : Pat<(int_ppc_tsuspend),
+          (TSR 0)>;
+
+def : Pat<(i64 (int_ppc_ttest)),
+          (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>;
+
+} // [HasHTM]
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 2492229..b4bb50c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -31,6 +31,7 @@
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -52,9 +53,6 @@ opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
 static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
 cl::desc("Disable compare instruction optimization"), cl::Hidden);
 
-static cl::opt<bool> DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation",
-cl::desc("Disable VSX FMA instruction mutation"), cl::Hidden);
-
 static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
 cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
 cl::Hidden);
@@ -64,7 +62,7 @@ void PPCInstrInfo::anchor() {}
 
 PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
     : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
-      Subtarget(STI), RI(STI) {}
+      Subtarget(STI), RI(STI.getTargetMachine()) {}
 
 /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
 /// this target when scheduling the DAG.
@@ -85,11 +83,11 @@ PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
 
 /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
 /// to use for this target when scheduling the DAG.
-ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
-  const InstrItineraryData *II,
-  const ScheduleDAG *DAG) const {
+ScheduleHazardRecognizer *
+PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                                 const ScheduleDAG *DAG) const {
   unsigned Directive =
-      DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+      DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
 
   if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)
     return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
@@ -116,9 +114,8 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
   unsigned Reg = DefMO.getReg();
 
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
   bool IsRegCR;
-  if (TRI->isVirtualRegister(Reg)) {
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     const MachineRegisterInfo *MRI =
       &DefMI->getParent()->getParent()->getRegInfo();
     IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
@@ -184,6 +181,9 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
   case PPC::RESTORE_CRBIT:
   case PPC::LVX:
   case PPC::LXVD2X:
+  case PPC::QVLFDX:
+  case PPC::QVLFSXs:
+  case PPC::QVLFDXb:
   case PPC::RESTORE_VRSAVE:
     // Check for the operands added by addFrameReference (the immediate is the
     // offset which defaults to 0).
@@ -210,6 +210,9 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
   case PPC::SPILL_CRBIT:
   case PPC::STVX:
   case PPC::STXVD2X:
+  case PPC::QVSTFDX:
+  case PPC::QVSTFSXs:
+  case PPC::QVSTFDXb:
   case PPC::SPILL_VRSAVE:
     // Check for the operands added by addFrameReference (the immediate is the
     // offset which defaults to 0).
@@ -694,6 +697,33 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
     .addReg(Cond[1].getReg(), 0, SubIdx);
 }
 
+static unsigned getCRBitValue(unsigned CRBit) {
+  unsigned Ret = 4;
+  if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
+      CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
+      CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
+      CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
+    Ret = 3;
+  if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
+      CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
+      CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
+      CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
+    Ret = 2;
+  if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
+      CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
+      CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
+      CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
+    Ret = 1;
+  if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
+      CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
+      CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
+      CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
+    Ret = 0;
+
+  assert(Ret != 4 && "Invalid CR bit register");
+  return Ret;
+}
+
 void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator I, DebugLoc DL,
                                unsigned DestReg, unsigned SrcReg,
@@ -702,7 +732,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   // legalization. Promote them here.
   const TargetRegisterInfo *TRI = &getRegisterInfo();
   if (PPC::F8RCRegClass.contains(DestReg) &&
-      PPC::VSLRCRegClass.contains(SrcReg)) {
+      PPC::VSRCRegClass.contains(SrcReg)) {
     unsigned SuperReg =
       TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
 
@@ -711,7 +741,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
     DestReg = SuperReg;
   } else if (PPC::VRRCRegClass.contains(DestReg) &&
-             PPC::VSHRCRegClass.contains(SrcReg)) {
+             PPC::VSRCRegClass.contains(SrcReg)) {
     unsigned SuperReg =
       TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass);
 
@@ -720,7 +750,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
     DestReg = SuperReg;
   } else if (PPC::F8RCRegClass.contains(SrcReg) &&
-             PPC::VSLRCRegClass.contains(DestReg)) {
+             PPC::VSRCRegClass.contains(DestReg)) {
     unsigned SuperReg =
       TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
 
@@ -729,7 +759,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
     SrcReg = SuperReg;
   } else if (PPC::VRRCRegClass.contains(SrcReg) &&
-             PPC::VSHRCRegClass.contains(DestReg)) {
+             PPC::VSRCRegClass.contains(DestReg)) {
     unsigned SuperReg =
       TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass);
 
@@ -739,6 +769,32 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     SrcReg = SuperReg;
   }
 
+  // Different class register copy
+  if (PPC::CRBITRCRegClass.contains(SrcReg) &&
+      PPC::GPRCRegClass.contains(DestReg)) {
+    unsigned CRReg = getCRFromCRBit(SrcReg);
+    BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+       .addReg(CRReg), getKillRegState(KillSrc);
+    // Rotate the CR bit in the CR fields to be the least significant bit and
+    // then mask with 0x1 (MB = ME = 31).
+    BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
+       .addReg(DestReg, RegState::Kill)
+       .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
+       .addImm(31)
+       .addImm(31);
+    return;
+  } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+      PPC::G8RCRegClass.contains(DestReg)) {
+    BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg)
+       .addReg(SrcReg), getKillRegState(KillSrc);
+    return;
+  } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+      PPC::GPRCRegClass.contains(DestReg)) {
+    BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+       .addReg(SrcReg), getKillRegState(KillSrc);
+    return;
+   }
+
   unsigned Opc;
   if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::OR;
@@ -760,8 +816,15 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     // copies are generated, they are close enough to some use that the
     // lower-latency form is preferable.
     Opc = PPC::XXLOR;
-  else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg))
+  else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
+           PPC::VSSRCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::XXLORf;
+  else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::QVFMR;
+  else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::QVFMRs;
+  else if (PPC::QBRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::QVFMRb;
   else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::CROR;
   else
@@ -839,6 +902,12 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                                getKillRegState(isKill)),
                                        FrameIdx));
     NonRI = true;
+  } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSSPX))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    NonRI = true;
   } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
     assert(Subtarget.isDarwin() &&
            "VRSAVE only needs spill/restore on Darwin");
@@ -847,6 +916,24 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                                getKillRegState(isKill)),
                                        FrameIdx));
     SpillsVRS = true;
+  } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDX))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    NonRI = true;
+  } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFSXs))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    NonRI = true;
+  } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDXb))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    NonRI = true;
   } else {
     llvm_unreachable("Unknown regclass!");
   }
@@ -934,6 +1021,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg),
                                        FrameIdx));
     NonRI = true;
+  } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSSPX), DestReg),
+                                       FrameIdx));
+    NonRI = true;
   } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
     assert(Subtarget.isDarwin() &&
            "VRSAVE only needs spill/restore on Darwin");
@@ -942,6 +1033,18 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                                                DestReg),
                                        FrameIdx));
     SpillsVRS = true;
+  } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDX), DestReg),
+                                       FrameIdx));
+    NonRI = true;
+  } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFSXs), DestReg),
+                                       FrameIdx));
+    NonRI = true;
+  } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg),
+                                       FrameIdx));
+    NonRI = true;
   } else {
     llvm_unreachable("Unknown regclass!");
   }
@@ -1608,672 +1711,3 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   }
 }
 
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "ppc-vsx-fma-mutate"
-
-namespace {
-  // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
-  // (Altivec and scalar floating-point registers), we need to transform the
-  // copies into subregister copies with other restrictions.
-  struct PPCVSXFMAMutate : public MachineFunctionPass {
-    static char ID;
-    PPCVSXFMAMutate() : MachineFunctionPass(ID) {
-      initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
-    }
-
-    LiveIntervals *LIS;
-
-    const PPCTargetMachine *TM;
-    const PPCInstrInfo *TII;
-
-protected:
-    bool processBlock(MachineBasicBlock &MBB) {
-      bool Changed = false;
-
-      MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-      const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
-      for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
-           I != IE; ++I) {
-        MachineInstr *MI = I;
-
-        // The default (A-type) VSX FMA form kills the addend (it is taken from
-        // the target register, which is then updated to reflect the result of
-        // the FMA). If the instruction, however, kills one of the registers
-        // used for the product, then we can use the M-form instruction (which
-        // will take that value from the to-be-defined register).
-
-        int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode());
-        if (AltOpc == -1)
-          continue;
-
-        // This pass is run after register coalescing, and so we're looking for
-        // a situation like this:
-        //   ...
-        //   %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
-        //   %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
-        //                         %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
-        //   ...
-        //   %vreg9<def,tied1> = XSMADDADP %vreg9<tied0>, %vreg17, %vreg19,
-        //                         %RM<imp-use>; VSLRC:%vreg9,%vreg17,%vreg19
-        //   ...
-        // Where we can eliminate the copy by changing from the A-type to the
-        // M-type instruction. Specifically, for this example, this means:
-        //   %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
-        //                         %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
-        // is replaced by:
-        //   %vreg16<def,tied1> = XSMADDMDP %vreg16<tied0>, %vreg18, %vreg9,
-        //                         %RM<imp-use>; VSLRC:%vreg16,%vreg18,%vreg9
-        // and we remove: %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
-
-        SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
-
-        VNInfo *AddendValNo =
-          LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
-        MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
-
-        // The addend and this instruction must be in the same block.
-
-        if (!AddendMI || AddendMI->getParent() != MI->getParent())
-          continue;
-
-        // The addend must be a full copy within the same register class.
-
-        if (!AddendMI->isFullCopy())
-          continue;
-
-        unsigned AddendSrcReg = AddendMI->getOperand(1).getReg();
-        if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) {
-          if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
-              MRI.getRegClass(AddendSrcReg))
-            continue;
-        } else {
-          // If AddendSrcReg is a physical register, make sure the destination
-          // register class contains it.
-          if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
-                ->contains(AddendSrcReg))
-            continue;
-        }
-
-        // In theory, there could be other uses of the addend copy before this
-        // fma.  We could deal with this, but that would require additional
-        // logic below and I suspect it will not occur in any relevant
-        // situations.  Additionally, check whether the copy source is killed
-        // prior to the fma.  In order to replace the addend here with the
-        // source of the copy, it must still be live here.  We can't use
-        // interval testing for a physical register, so as long as we're
-        // walking the MIs we may as well test liveness here.
-        bool OtherUsers = false, KillsAddendSrc = false;
-        for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
-             J != JE; --J) {
-          if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
-            OtherUsers = true;
-            break;
-          }
-          if (J->modifiesRegister(AddendSrcReg, TRI) ||
-              J->killsRegister(AddendSrcReg, TRI)) {
-            KillsAddendSrc = true;
-            break;
-          }
-        }
-
-        if (OtherUsers || KillsAddendSrc)
-          continue;
-
-        // Find one of the product operands that is killed by this instruction.
-
-        unsigned KilledProdOp = 0, OtherProdOp = 0;
-        if (LIS->getInterval(MI->getOperand(2).getReg())
-                     .Query(FMAIdx).isKill()) {
-          KilledProdOp = 2;
-          OtherProdOp  = 3;
-        } else if (LIS->getInterval(MI->getOperand(3).getReg())
-                     .Query(FMAIdx).isKill()) {
-          KilledProdOp = 3;
-          OtherProdOp  = 2;
-        }
-
-        // If there are no killed product operands, then this transformation is
-        // likely not profitable.
-        if (!KilledProdOp)
-          continue;
-
-        // For virtual registers, verify that the addend source register
-        // is live here (as should have been assured above).
-        assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) ||
-                LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) &&
-               "Addend source register is not live!");
-
-        // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
-
-        unsigned AddReg = AddendMI->getOperand(1).getReg();
-        unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg();
-        unsigned OtherProdReg  = MI->getOperand(OtherProdOp).getReg();
-
-        unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
-        unsigned KilledProdSubReg = MI->getOperand(KilledProdOp).getSubReg();
-        unsigned OtherProdSubReg  = MI->getOperand(OtherProdOp).getSubReg();
-
-        bool AddRegKill = AddendMI->getOperand(1).isKill();
-        bool KilledProdRegKill = MI->getOperand(KilledProdOp).isKill();
-        bool OtherProdRegKill  = MI->getOperand(OtherProdOp).isKill();
-
-        bool AddRegUndef = AddendMI->getOperand(1).isUndef();
-        bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef();
-        bool OtherProdRegUndef  = MI->getOperand(OtherProdOp).isUndef();
-
-        unsigned OldFMAReg = MI->getOperand(0).getReg();
-
-        // The transformation doesn't work well with things like:
-        //    %vreg5 = A-form-op %vreg5, %vreg11, %vreg5;
-        // so leave such things alone.
-        if (OldFMAReg == KilledProdReg)
-          continue;
-
-        assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
-               "Addend copy not tied to old FMA output!");
-
-        DEBUG(dbgs() << "VSX FMA Mutation:\n    " << *MI;);
-
-        MI->getOperand(0).setReg(KilledProdReg);
-        MI->getOperand(1).setReg(KilledProdReg);
-        MI->getOperand(3).setReg(AddReg);
-        MI->getOperand(2).setReg(OtherProdReg);
-
-        MI->getOperand(0).setSubReg(KilledProdSubReg);
-        MI->getOperand(1).setSubReg(KilledProdSubReg);
-        MI->getOperand(3).setSubReg(AddSubReg);
-        MI->getOperand(2).setSubReg(OtherProdSubReg);
-
-        MI->getOperand(1).setIsKill(KilledProdRegKill);
-        MI->getOperand(3).setIsKill(AddRegKill);
-        MI->getOperand(2).setIsKill(OtherProdRegKill);
-
-        MI->getOperand(1).setIsUndef(KilledProdRegUndef);
-        MI->getOperand(3).setIsUndef(AddRegUndef);
-        MI->getOperand(2).setIsUndef(OtherProdRegUndef);
-
-        MI->setDesc(TII->get(AltOpc));
-
-        DEBUG(dbgs() << " -> " << *MI);
-
-        // The killed product operand was killed here, so we can reuse it now
-        // for the result of the fma.
-
-        LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
-        VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
-        for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
-             UI != UE;) {
-          MachineOperand &UseMO = *UI;
-          MachineInstr *UseMI = UseMO.getParent();
-          ++UI;
-
-          // Don't replace the result register of the copy we're about to erase.
-          if (UseMI == AddendMI)
-            continue;
-
-          UseMO.setReg(KilledProdReg);
-          UseMO.setSubReg(KilledProdSubReg);
-        }
-
-        // Extend the live intervals of the killed product operand to hold the
-        // fma result.
-
-        LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
-        for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end();
-             AI != AE; ++AI) {
-          // Don't add the segment that corresponds to the original copy.
-          if (AI->valno == AddendValNo)
-            continue;
-
-          VNInfo *NewFMAValNo =
-            NewFMAInt.getNextValue(AI->start,
-                                   LIS->getVNInfoAllocator());
-
-          NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end,
-                                                     NewFMAValNo));
-        }
-        DEBUG(dbgs() << "  extended: " << NewFMAInt << '\n');
-
-        FMAInt.removeValNo(FMAValNo);
-        DEBUG(dbgs() << "  trimmed:  " << FMAInt << '\n');
-
-        // Remove the (now unused) copy.
-
-        DEBUG(dbgs() << "  removing: " << *AddendMI << '\n');
-        LIS->RemoveMachineInstrFromMaps(AddendMI);
-        AddendMI->eraseFromParent();
-
-        Changed = true;
-      }
-
-      return Changed;
-    }
-
-public:
-    bool runOnMachineFunction(MachineFunction &MF) override {
-      TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
-      // If we don't have VSX then go ahead and return without doing
-      // anything.
-      if (!TM->getSubtargetImpl()->hasVSX())
-        return false;
-
-      LIS = &getAnalysis<LiveIntervals>();
-
-      TII = TM->getSubtargetImpl()->getInstrInfo();
-
-      bool Changed = false;
-
-      if (DisableVSXFMAMutate)
-        return Changed;
-
-      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
-        MachineBasicBlock &B = *I++;
-        if (processBlock(B))
-          Changed = true;
-      }
-
-      return Changed;
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<LiveIntervals>();
-      AU.addPreserved<LiveIntervals>();
-      AU.addRequired<SlotIndexes>();
-      AU.addPreserved<SlotIndexes>();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-  };
-}
-
-INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
-                      "PowerPC VSX FMA Mutation", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
-                    "PowerPC VSX FMA Mutation", false, false)
-
-char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
-
-char PPCVSXFMAMutate::ID = 0;
-FunctionPass*
-llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); }
-
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "ppc-vsx-copy"
-
-namespace llvm {
-  void initializePPCVSXCopyPass(PassRegistry&);
-}
-
-namespace {
-  // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
-  // (Altivec and scalar floating-point registers), we need to transform the
-  // copies into subregister copies with other restrictions.
-  struct PPCVSXCopy : public MachineFunctionPass {
-    static char ID;
-    PPCVSXCopy() : MachineFunctionPass(ID) {
-      initializePPCVSXCopyPass(*PassRegistry::getPassRegistry());
-    }
-
-    const PPCTargetMachine *TM;
-    const PPCInstrInfo *TII;
-
-    bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
-                      MachineRegisterInfo &MRI) {
-      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-        return RC->hasSubClassEq(MRI.getRegClass(Reg));
-      } else if (RC->contains(Reg)) {
-        return true;
-      }
-
-      return false;
-    }
-
-    bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
-      return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
-    }
-
-    bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
-      return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
-    }
-
-    bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
-      return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
-    }
-
-protected:
-    bool processBlock(MachineBasicBlock &MBB) {
-      bool Changed = false;
-
-      MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-      for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
-           I != IE; ++I) {
-        MachineInstr *MI = I;
-        if (!MI->isFullCopy())
-          continue;
-
-        MachineOperand &DstMO = MI->getOperand(0);
-        MachineOperand &SrcMO = MI->getOperand(1);
-
-        if ( IsVSReg(DstMO.getReg(), MRI) &&
-            !IsVSReg(SrcMO.getReg(), MRI)) {
-          // This is a copy *to* a VSX register from a non-VSX register.
-          Changed = true;
-
-          const TargetRegisterClass *SrcRC =
-            IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
-                                           &PPC::VSLRCRegClass;
-          assert((IsF8Reg(SrcMO.getReg(), MRI) ||
-                  IsVRReg(SrcMO.getReg(), MRI)) &&
-                 "Unknown source for a VSX copy");
-
-          unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
-          BuildMI(MBB, MI, MI->getDebugLoc(),
-                  TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
-            .addImm(1) // add 1, not 0, because there is no implicit clearing
-                       // of the high bits.
-            .addOperand(SrcMO)
-            .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 :
-                                                   PPC::sub_64);
-
-          // The source of the original copy is now the new virtual register.
-          SrcMO.setReg(NewVReg);
-        } else if (!IsVSReg(DstMO.getReg(), MRI) &&
-                    IsVSReg(SrcMO.getReg(), MRI)) {
-          // This is a copy *from* a VSX register to a non-VSX register.
-          Changed = true;
-
-          const TargetRegisterClass *DstRC =
-            IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
-                                           &PPC::VSLRCRegClass;
-          assert((IsF8Reg(DstMO.getReg(), MRI) ||
-                  IsVRReg(DstMO.getReg(), MRI)) &&
-                 "Unknown destination for a VSX copy");
-
-          // Copy the VSX value into a new VSX register of the correct subclass.
-          unsigned NewVReg = MRI.createVirtualRegister(DstRC);
-          BuildMI(MBB, MI, MI->getDebugLoc(),
-                  TII->get(TargetOpcode::COPY), NewVReg)
-            .addOperand(SrcMO);
-
-          // Transform the original copy into a subregister extraction copy.
-          SrcMO.setReg(NewVReg);
-          SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
-                                                         PPC::sub_64);
-        }
-      }
-
-      return Changed;
-    }
-
-public:
-    bool runOnMachineFunction(MachineFunction &MF) override {
-      TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
-      // If we don't have VSX on the subtarget, don't do anything.
-      if (!TM->getSubtargetImpl()->hasVSX())
-        return false;
-      TII = TM->getSubtargetImpl()->getInstrInfo();
-
-      bool Changed = false;
-
-      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
-        MachineBasicBlock &B = *I++;
-        if (processBlock(B))
-          Changed = true;
-      }
-
-      return Changed;
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-  };
-}
-
-INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
-                "PowerPC VSX Copy Legalization", false, false)
-
-char PPCVSXCopy::ID = 0;
-FunctionPass*
-llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
-
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "ppc-vsx-copy-cleanup"
-
-namespace llvm {
-  void initializePPCVSXCopyCleanupPass(PassRegistry&);
-}
-
-namespace {
-  // PPCVSXCopyCleanup pass - We sometimes end up generating self copies of VSX
-  // registers (mostly because the ABI code still places all values into the
-  // "traditional" floating-point and vector registers). Remove them here.
-  struct PPCVSXCopyCleanup : public MachineFunctionPass {
-    static char ID;
-    PPCVSXCopyCleanup() : MachineFunctionPass(ID) {
-      initializePPCVSXCopyCleanupPass(*PassRegistry::getPassRegistry());
-    }
-
-    const PPCTargetMachine *TM;
-    const PPCInstrInfo *TII;
-
-protected:
-    bool processBlock(MachineBasicBlock &MBB) {
-      bool Changed = false;
-
-      SmallVector<MachineInstr *, 4> ToDelete;
-      for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
-           I != IE; ++I) {
-        MachineInstr *MI = I;
-        if (MI->getOpcode() == PPC::XXLOR &&
-            MI->getOperand(0).getReg() == MI->getOperand(1).getReg() &&
-            MI->getOperand(0).getReg() == MI->getOperand(2).getReg())
-          ToDelete.push_back(MI);
-      }
-
-      if (!ToDelete.empty())
-        Changed = true;
-
-      for (unsigned i = 0, ie = ToDelete.size(); i != ie; ++i) {
-        DEBUG(dbgs() << "Removing VSX self-copy: " << *ToDelete[i]);
-        ToDelete[i]->eraseFromParent();
-      }
-
-      return Changed;
-    }
-
-public:
-    bool runOnMachineFunction(MachineFunction &MF) override {
-      TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
-      // If we don't have VSX don't bother doing anything here.
-      if (!TM->getSubtargetImpl()->hasVSX())
-        return false;
-      TII = TM->getSubtargetImpl()->getInstrInfo();
-
-      bool Changed = false;
-
-      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
-        MachineBasicBlock &B = *I++;
-        if (processBlock(B))
-          Changed = true;
-      }
-
-      return Changed;
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-  };
-}
-
-INITIALIZE_PASS(PPCVSXCopyCleanup, DEBUG_TYPE,
-                "PowerPC VSX Copy Cleanup", false, false)
-
-char PPCVSXCopyCleanup::ID = 0;
-FunctionPass*
-llvm::createPPCVSXCopyCleanupPass() { return new PPCVSXCopyCleanup(); }
-
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "ppc-early-ret"
-STATISTIC(NumBCLR, "Number of early conditional returns");
-STATISTIC(NumBLR,  "Number of early returns");
-
-namespace llvm {
-  void initializePPCEarlyReturnPass(PassRegistry&);
-}
-
-namespace {
-  // PPCEarlyReturn pass - For simple functions without epilogue code, move
-  // returns up, and create conditional returns, to avoid unnecessary
-  // branch-to-blr sequences.
-  struct PPCEarlyReturn : public MachineFunctionPass {
-    static char ID;
-    PPCEarlyReturn() : MachineFunctionPass(ID) {
-      initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry());
-    }
-
-    const PPCTargetMachine *TM;
-    const PPCInstrInfo *TII;
-
-protected:
-    bool processBlock(MachineBasicBlock &ReturnMBB) {
-      bool Changed = false;
-
-      MachineBasicBlock::iterator I = ReturnMBB.begin();
-      I = ReturnMBB.SkipPHIsAndLabels(I);
-
-      // The block must be essentially empty except for the blr.
-      if (I == ReturnMBB.end() ||
-          (I->getOpcode() != PPC::BLR && I->getOpcode() != PPC::BLR8) ||
-          I != ReturnMBB.getLastNonDebugInstr())
-        return Changed;
-
-      SmallVector<MachineBasicBlock*, 8> PredToRemove;
-      for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
-           PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
-        bool OtherReference = false, BlockChanged = false;
-        for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
-          if (J->getOpcode() == PPC::B) {
-            if (J->getOperand(0).getMBB() == &ReturnMBB) {
-              // This is an unconditional branch to the return. Replace the
-              // branch with a blr.
-              BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()));
-              MachineBasicBlock::iterator K = J--;
-              K->eraseFromParent();
-              BlockChanged = true;
-              ++NumBLR;
-              continue;
-            }
-          } else if (J->getOpcode() == PPC::BCC) {
-            if (J->getOperand(2).getMBB() == &ReturnMBB) {
-              // This is a conditional branch to the return. Replace the branch
-              // with a bclr.
-              BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
-                .addImm(J->getOperand(0).getImm())
-                .addReg(J->getOperand(1).getReg());
-              MachineBasicBlock::iterator K = J--;
-              K->eraseFromParent();
-              BlockChanged = true;
-              ++NumBCLR;
-              continue;
-            }
-          } else if (J->getOpcode() == PPC::BC || J->getOpcode() == PPC::BCn) {
-            if (J->getOperand(1).getMBB() == &ReturnMBB) {
-              // This is a conditional branch to the return. Replace the branch
-              // with a bclr.
-              BuildMI(**PI, J, J->getDebugLoc(),
-                      TII->get(J->getOpcode() == PPC::BC ?
-                               PPC::BCLR : PPC::BCLRn))
-                .addReg(J->getOperand(0).getReg());
-              MachineBasicBlock::iterator K = J--;
-              K->eraseFromParent();
-              BlockChanged = true;
-              ++NumBCLR;
-              continue;
-            }
-          } else if (J->isBranch()) {
-            if (J->isIndirectBranch()) {
-              if (ReturnMBB.hasAddressTaken())
-                OtherReference = true;
-            } else
-              for (unsigned i = 0; i < J->getNumOperands(); ++i)
-                if (J->getOperand(i).isMBB() &&
-                    J->getOperand(i).getMBB() == &ReturnMBB)
-                  OtherReference = true;
-          } else if (!J->isTerminator() && !J->isDebugValue())
-            break;
-
-          if (J == (*PI)->begin())
-            break;
-
-          --J;
-        }
-
-        if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
-          OtherReference = true;
-
-        // Predecessors are stored in a vector and can't be removed here.
-        if (!OtherReference && BlockChanged) {
-          PredToRemove.push_back(*PI);
-        }
-
-        if (BlockChanged)
-          Changed = true;
-      }
-
-      for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
-        PredToRemove[i]->removeSuccessor(&ReturnMBB);
-
-      if (Changed && !ReturnMBB.hasAddressTaken()) {
-        // We now might be able to merge this blr-only block into its
-        // by-layout predecessor.
-        if (ReturnMBB.pred_size() == 1 &&
-            (*ReturnMBB.pred_begin())->isLayoutSuccessor(&ReturnMBB)) {
-          // Move the blr into the preceding block.
-          MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
-          PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
-          PrevMBB.removeSuccessor(&ReturnMBB);
-        }
-
-        if (ReturnMBB.pred_empty())
-          ReturnMBB.eraseFromParent();
-      }
-
-      return Changed;
-    }
-
-public:
-    bool runOnMachineFunction(MachineFunction &MF) override {
-      TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
-      TII = TM->getSubtargetImpl()->getInstrInfo();
-
-      bool Changed = false;
-
-      // If the function does not have at least two blocks, then there is
-      // nothing to do.
-      if (MF.size() < 2)
-        return Changed;
-
-      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
-        MachineBasicBlock &B = *I++;
-        if (processBlock(B))
-          Changed = true;
-      }
-
-      return Changed;
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-  };
-}
-
-INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
-                "PowerPC Early-Return Creation", false, false)
-
-char PPCEarlyReturn::ID = 0;
-FunctionPass*
-llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 4add6f9..7fd076a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -63,7 +63,7 @@ enum PPC970_Unit {
 };
 } // end namespace PPCII
 
-
+class PPCSubtarget;
 class PPCInstrInfo : public PPCGenInstrInfo {
   PPCSubtarget &Subtarget;
   const PPCRegisterInfo RI;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index ad3bc1d..c5a044c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -46,13 +46,6 @@ def SDT_PPCstbrx : SDTypeProfile<0, 3, [
   SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
 ]>;
 
-def SDT_PPClarx : SDTypeProfile<1, 1, [
-  SDTCisInt<0>, SDTCisPtrTy<1>
-]>;
-def SDT_PPCstcx : SDTypeProfile<0, 2, [
-  SDTCisInt<0>, SDTCisPtrTy<1>
-]>;
-
 def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
   SDTCisPtrTy<0>, SDTCisVT<1, i32>
 ]>;
@@ -61,6 +54,27 @@ def tocentry32 : Operand<iPTR> {
   let MIOperandInfo = (ops i32imm:$imm);
 }
 
+def SDT_PPCqvfperm   : SDTypeProfile<1, 3, [
+  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVec<3>
+]>;
+def SDT_PPCqvgpci   : SDTypeProfile<1, 1, [
+  SDTCisVec<0>, SDTCisInt<1>
+]>;
+def SDT_PPCqvaligni   : SDTypeProfile<1, 3, [
+  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>
+]>;
+def SDT_PPCqvesplati   : SDTypeProfile<1, 2, [
+  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
+]>;
+
+def SDT_PPCqbflt : SDTypeProfile<1, 1, [
+  SDTCisVec<0>, SDTCisVec<1>
+]>;
+
+def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [
+  SDTCisVec<0>, SDTCisPtrTy<1>
+]>;
+
 //===----------------------------------------------------------------------===//
 // PowerPC specific DAG Nodes.
 //
@@ -98,7 +112,8 @@ def PPCfsel   : SDNode<"PPCISD::FSEL",
 
 def PPChi       : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
 def PPClo       : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
-def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,
+                         [SDNPMayLoad, SDNPMemOperand]>;
 def PPCvmaddfp  : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
 def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
 
@@ -110,14 +125,33 @@ def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
 def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
 def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
 def PPCaddiTlsgdL   : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
+def PPCgetTlsAddr   : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
+                               SDTypeProfile<1, 3, [
+                                 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                                 SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
 def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
-def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
-                              [SDNPHasChain]>;
+def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
+def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR",
+                               SDTypeProfile<1, 3, [
+                                 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                                 SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
+def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>;
 def PPCaddiDtprelL   : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
 
 def PPCvperm    : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
 
+def PPCqvfperm   : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
+def PPCqvgpci    : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
+def PPCqvaligni  : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>;
+def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>;
+
+def PPCqbflt     : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>;
+
+def PPCqvlfsb    : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb,
+                          [SDNPHasChain, SDNPMayLoad]>;
+
 def PPCcmpb     : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
 
 // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
@@ -136,20 +170,9 @@ def SDT_PPCCall   : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
 def PPCcall  : SDNode<"PPCISD::CALL", SDT_PPCCall,
                       [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                        SDNPVariadic]>;
-def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall,
-                         [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                          SDNPVariadic]>;
 def PPCcall_nop  : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                            SDNPVariadic]>;
-def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall,
-                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                              SDNPVariadic]>;
-def PPCload   : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
-                       [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
-                          [SDNPHasChain, SDNPSideEffect,
-                           SDNPInGlue, SDNPOutGlue]>;
 def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
@@ -178,6 +201,12 @@ def SDT_PPCsc     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 def PPCsc         : SDNode<"PPCISD::SC", SDT_PPCsc,
                            [SDNPHasChain, SDNPSideEffect]>;
 
+def PPCclrbhrb    : SDNode<"PPCISD::CLRBHRB", SDTNone,
+                           [SDNPHasChain, SDNPSideEffect]>;
+def PPCmfbhrbe    : SDNode<"PPCISD::MFBHRBE", SDTIntBinOp, [SDNPHasChain]>;
+def PPCrfebb      : SDNode<"PPCISD::RFEBB", SDT_PPCsc,
+                           [SDNPHasChain, SDNPSideEffect]>;
+
 def PPCvcmp       : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
 def PPCvcmp_o     : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
 
@@ -195,18 +224,6 @@ def PPCcr6set   : SDNode<"PPCISD::CR6SET", SDTNone,
 def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
                          [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
-// Instructions to support atomic operations
-def PPClarx      : SDNode<"PPCISD::LARX", SDT_PPClarx,
-                          [SDNPHasChain, SDNPMayLoad]>;
-def PPCstcx      : SDNode<"PPCISD::STCX", SDT_PPCstcx,
-                          [SDNPHasChain, SDNPMayStore]>;
-
-// Instructions to support medium and large code model
-def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>;
-def PPCldTocL     : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>;
-def PPCaddiTocL   : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>;
-
-
 // Instructions to support dynamic alloca.
 def SDTDynOp  : SDTypeProfile<1, 2, []>;
 def PPCdynalloc   : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
@@ -217,41 +234,42 @@ def PPCdynalloc   : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
 
 def SHL32 : SDNodeXForm<imm, [{
   // Transformation function: 31 - imm
-  return getI32Imm(31 - N->getZExtValue());
+  return getI32Imm(31 - N->getZExtValue(), SDLoc(N));
 }]>;
 
 def SRL32 : SDNodeXForm<imm, [{
   // Transformation function: 32 - imm
-  return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue()) : getI32Imm(0);
+  return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue(), SDLoc(N))
+                           : getI32Imm(0, SDLoc(N));
 }]>;
 
 def LO16 : SDNodeXForm<imm, [{
   // Transformation function: get the low 16 bits.
-  return getI32Imm((unsigned short)N->getZExtValue());
+  return getI32Imm((unsigned short)N->getZExtValue(), SDLoc(N));
 }]>;
 
 def HI16 : SDNodeXForm<imm, [{
   // Transformation function: shift the immediate value down into the low bits.
-  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+  return getI32Imm((unsigned)N->getZExtValue() >> 16, SDLoc(N));
 }]>;
 
 def HA16 : SDNodeXForm<imm, [{
   // Transformation function: shift the immediate value down into the low bits.
   signed int Val = N->getZExtValue();
-  return getI32Imm((Val - (signed short)Val) >> 16);
+  return getI32Imm((Val - (signed short)Val) >> 16, SDLoc(N));
 }]>;
 def MB : SDNodeXForm<imm, [{
   // Transformation function: get the start bit of a mask
   unsigned mb = 0, me;
   (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
-  return getI32Imm(mb);
+  return getI32Imm(mb, SDLoc(N));
 }]>;
 
 def ME : SDNodeXForm<imm, [{
   // Transformation function: get the end bit of a mask
   unsigned mb, me = 0;
   (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
-  return getI32Imm(me);
+  return getI32Imm(me, SDLoc(N));
 }]>;
 def maskimm32 : PatLeaf<(imm), [{
   // maskImm predicate - True if immediate is a run of ones.
@@ -421,6 +439,18 @@ def PPCRegCRRCAsmOperand : AsmOperandClass {
 def crrc : RegisterOperand<CRRC> {
   let ParserMatchClass = PPCRegCRRCAsmOperand;
 }
+def crrc0 : RegisterOperand<CRRC0> {
+  let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+
+def PPCU1ImmAsmOperand : AsmOperandClass {
+  let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
+  let RenderMethod = "addImmOperands";
+}
+def u1imm   : Operand<i32> {
+  let PrintMethod = "printU1ImmOperand";
+  let ParserMatchClass = PPCU1ImmAsmOperand;
+}
 
 def PPCU2ImmAsmOperand : AsmOperandClass {
   let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
@@ -431,6 +461,15 @@ def u2imm   : Operand<i32> {
   let ParserMatchClass = PPCU2ImmAsmOperand;
 }
 
+def PPCU3ImmAsmOperand : AsmOperandClass {
+  let Name = "U3Imm"; let PredicateMethod = "isU3Imm";
+  let RenderMethod = "addImmOperands";
+}
+def u3imm   : Operand<i32> {
+  let PrintMethod = "printU3ImmOperand";
+  let ParserMatchClass = PPCU3ImmAsmOperand;
+}
+
 def PPCU4ImmAsmOperand : AsmOperandClass {
   let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
   let RenderMethod = "addImmOperands";
@@ -466,6 +505,24 @@ def u6imm   : Operand<i32> {
   let ParserMatchClass = PPCU6ImmAsmOperand;
   let DecoderMethod = "decodeUImmOperand<6>";
 }
+def PPCU10ImmAsmOperand : AsmOperandClass {
+  let Name = "U10Imm"; let PredicateMethod = "isU10Imm";
+  let RenderMethod = "addImmOperands";
+}
+def u10imm  : Operand<i32> {
+  let PrintMethod = "printU10ImmOperand";
+  let ParserMatchClass = PPCU10ImmAsmOperand;
+  let DecoderMethod = "decodeUImmOperand<10>";
+}
+def PPCU12ImmAsmOperand : AsmOperandClass {
+  let Name = "U12Imm"; let PredicateMethod = "isU12Imm";
+  let RenderMethod = "addImmOperands";
+}
+def u12imm  : Operand<i32> {
+  let PrintMethod = "printU12ImmOperand";
+  let ParserMatchClass = PPCU12ImmAsmOperand;
+  let DecoderMethod = "decodeUImmOperand<12>";
+}
 def PPCS16ImmAsmOperand : AsmOperandClass {
   let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
   let RenderMethod = "addS16ImmOperands";
@@ -681,6 +738,12 @@ def IsPPC4xx  : Predicate<"PPCSubTarget->isPPC4xx()">;
 def IsPPC6xx  : Predicate<"PPCSubTarget->isPPC6xx()">;
 def IsE500  : Predicate<"PPCSubTarget->isE500()">;
 def HasSPE  : Predicate<"PPCSubTarget->HasSPE()">;
+def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
+def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">;
+def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
+def NaNsFPMath   : Predicate<"!TM.Options.NoNaNsFPMath">;
+def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">;
+def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Multiclass Definitions.
@@ -757,6 +820,23 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
   }
 }
 
+// Multiclass for instructions for which the non record form is not cracked
+// and the record form is cracked (i.e. divw, mullw, etc.)
+multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel, PPC970_DGroup_First,
+                       PPC970_DGroup_Cracked;
+  }
+}
+
 multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
                       string asmbase, string asmstr, InstrItinClass itin,
                       list<dag> pattern> {
@@ -1292,6 +1372,24 @@ let PPC970_Unit = 7 in {
                       "sc $lev", IIC_BrB, [(PPCsc (i32 imm:$lev))]>;
 }
 
+// Branch history rolling buffer.
+def CLRBHRB : XForm_0<31, 430, (outs), (ins), "clrbhrb", IIC_BrB,
+                      [(PPCclrbhrb)]>,
+                      PPC970_DGroup_Single;
+// The $dmy argument used for MFBHRBE is not needed; however, including
+// it avoids automatic generation of PPCFastISel::fastEmit_i(), which
+// interferes with necessary special handling (see PPCFastISel.cpp).
+def MFBHRBE : XFXForm_3p<31, 302, (outs gprc:$rD),
+                         (ins u10imm:$imm, u10imm:$dmy),
+                         "mfbhrbe $rD, $imm", IIC_BrB,
+                         [(set i32:$rD,
+                               (PPCmfbhrbe imm:$imm, imm:$dmy))]>,
+                         PPC970_DGroup_First;
+
+def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$imm), "rfebb $imm",
+                     IIC_BrB, [(PPCrfebb (i32 imm:$imm))]>,
+                     PPC970_DGroup_Single;
+
 // DCB* instructions.
 def DCBA   : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst",
                       IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
@@ -1305,12 +1403,6 @@ def DCBI   : DCB_Form<470, 0, (outs), (ins memrr:$dst), "dcbi $dst",
 def DCBST  : DCB_Form<54, 0, (outs), (ins memrr:$dst), "dcbst $dst",
                       IIC_LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
                       PPC970_DGroup_Single;
-def DCBT   : DCB_Form<278, 0, (outs), (ins memrr:$dst), "dcbt $dst",
-                      IIC_LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
-                      PPC970_DGroup_Single;
-def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst), "dcbtst $dst",
-                      IIC_LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
-                      PPC970_DGroup_Single;
 def DCBZ   : DCB_Form<1014, 0, (outs), (ins memrr:$dst), "dcbz $dst",
                       IIC_LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
                       PPC970_DGroup_Single;
@@ -1318,15 +1410,29 @@ def DCBZL  : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
                       IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
                       PPC970_DGroup_Single;
 
+let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in {
+def DCBT   : DCB_Form_hint<278, (outs), (ins u5imm:$TH, memrr:$dst),
+                      "dcbt $dst, $TH", IIC_LdStDCBF, []>,
+                      PPC970_DGroup_Single;
+def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, memrr:$dst),
+                      "dcbtst $dst, $TH", IIC_LdStDCBF, []>,
+                      PPC970_DGroup_Single;
+} // hasSideEffects = 0
+
 def ICBT  : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src),
-                       "icbt $CT, $src", IIC_LdStLoad>, Requires<[IsBookE]>;
+                       "icbt $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
+
+def : Pat<(int_ppc_dcbt xoaddr:$dst),
+          (DCBT 0, xoaddr:$dst)>;
+def : Pat<(int_ppc_dcbtst xoaddr:$dst),
+          (DCBTST 0, xoaddr:$dst)>;
 
 def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
-          (DCBT xoaddr:$dst)>;   // data prefetch for loads
+          (DCBT 0, xoaddr:$dst)>;   // data prefetch for loads
 def : Pat<(prefetch xoaddr:$dst, (i32 1), imm, (i32 1)),
-          (DCBTST xoaddr:$dst)>; // data prefetch for stores
+          (DCBTST 0, xoaddr:$dst)>; // data prefetch for stores
 def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
-          (ICBT 0, xoaddr:$dst)>; // inst prefetch (for read)
+          (ICBT 0, xoaddr:$dst)>, Requires<[HasICBT]>; // inst prefetch (for read)
 
 // Atomic operations
 let usesCustomInserter = 1 in {
@@ -1409,15 +1515,44 @@ let usesCustomInserter = 1 in {
 }
 
 // Instructions to support atomic operations
+let mayLoad = 1, hasSideEffects = 0 in {
+def LBARX : XForm_1<31,  52, (outs gprc:$rD), (ins memrr:$src),
+                    "lbarx $rD, $src", IIC_LdStLWARX, []>,
+                    Requires<[HasPartwordAtomics]>;
+
+def LHARX : XForm_1<31,  116, (outs gprc:$rD), (ins memrr:$src),
+                    "lharx $rD, $src", IIC_LdStLWARX, []>,
+                    Requires<[HasPartwordAtomics]>;
+
 def LWARX : XForm_1<31,  20, (outs gprc:$rD), (ins memrr:$src),
-                   "lwarx $rD, $src", IIC_LdStLWARX,
-                   [(set i32:$rD, (PPClarx xoaddr:$src))]>;
+                    "lwarx $rD, $src", IIC_LdStLWARX, []>;
+
+// Instructions to support lock versions of atomics
+// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
+def LBARXL : XForm_1<31,  52, (outs gprc:$rD), (ins memrr:$src),
+                     "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+                     Requires<[HasPartwordAtomics]>;
+
+def LHARXL : XForm_1<31,  116, (outs gprc:$rD), (ins memrr:$src),
+                     "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+                     Requires<[HasPartwordAtomics]>;
+
+def LWARXL : XForm_1<31,  20, (outs gprc:$rD), (ins memrr:$src),
+                     "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT;
+}
+
+let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in {
+def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
+                    "stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
+                    isDOT, Requires<[HasPartwordAtomics]>;
+
+def STHCX : XForm_1<31, 726, (outs), (ins gprc:$rS, memrr:$dst),
+                    "sthcx. $rS, $dst", IIC_LdStSTWCX, []>,
+                    isDOT, Requires<[HasPartwordAtomics]>;
 
-let Defs = [CR0] in
 def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
-                   "stwcx. $rS, $dst", IIC_LdStSTWCX,
-                   [(PPCstcx i32:$rS, xoaddr:$dst)]>,
-                   isDOT;
+                    "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT;
+}
 
 let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
 def TRAP  : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>;
@@ -1436,7 +1571,7 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
 //
 
 // Unindexed (r+i) Loads. 
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
 def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
                   "lbz $rD, $src", IIC_LdStLoad,
                   [(set i32:$rD, (zextloadi8 iaddr:$src))]>;
@@ -1533,7 +1668,7 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
 
 // Indexed (r+r) Loads.
 //
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
 def LBZX : XForm_1<31,  87, (outs gprc:$rD), (ins memrr:$src),
                    "lbzx $rD, $src", IIC_LdStLoad,
                    [(set i32:$rD, (zextloadi8 xaddr:$src))]>;
@@ -2193,15 +2328,20 @@ let Uses = [RM], Defs = [RM] in {
   def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
                         "mtfsb1 $FM", IIC_IntMTFSB0, []>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
-  def MTFSF  : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
-                       "mtfsf $FM, $rT", IIC_IntMTFSB0, []>,
-               PPC970_DGroup_Single, PPC970_Unit_FPU;
+  let isCodeGenOnly = 1 in
+  def MTFSFb  : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
+                        "mtfsf $FM, $rT", IIC_IntMTFSB0, []>,
+                PPC970_DGroup_Single, PPC970_Unit_FPU;
 }
 let Uses = [RM] in {
   def MFFS   : XForm_42<63, 583, (outs f8rc:$rT), (ins),
                          "mffs $rT", IIC_IntMFFS,
                          [(set f64:$rT, (PPCmffs))]>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
+
+  let Defs = [CR1] in
+  def MFFSo : XForm_42<63, 583, (outs f8rc:$rT), (ins),
+                      "mffs. $rT", IIC_IntMFFS, []>, isDOT;
 }
 
 
@@ -2221,14 +2361,30 @@ defm ADDC  : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
                         [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
                         PPC970_DGroup_Cracked;
 
-defm DIVW  : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
-                       "divw", "$rT, $rA, $rB", IIC_IntDivW,
-                       [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
-                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
-defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
-                       "divwu", "$rT, $rA, $rB", IIC_IntDivW,
-                       [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
-                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVW  : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                          "divw", "$rT, $rA, $rB", IIC_IntDivW,
+                          [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>;
+defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                          "divwu", "$rT, $rA, $rB", IIC_IntDivW,
+                          [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>;
+def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                     "divwe $rT, $rA, $rB", IIC_IntDivW,
+                     [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>,
+                     Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                      "divwe. $rT, $rA, $rB", IIC_IntDivW,
+                      []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+                      Requires<[HasExtDiv]>;
+def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                      "divweu $rT, $rA, $rB", IIC_IntDivW,
+                      [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>,
+                      Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "divweu. $rT, $rA, $rB", IIC_IntDivW,
+                       []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+                       Requires<[HasExtDiv]>;
 let isCommutable = 1 in {
 defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
                        "mulhw", "$rT, $rA, $rB", IIC_IntMulHW,
@@ -2377,7 +2533,7 @@ let PPC970_Unit = 1 in {  // FXU Operations.
   let isSelect = 1 in
   def ISEL  : AForm_4<31, 15,
                      (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond),
-                     "isel $rT, $rA, $rB, $cond", IIC_IntGeneral,
+                     "isel $rT, $rA, $rB, $cond", IIC_IntISEL,
                      []>;
 }
 
@@ -2459,9 +2615,6 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
 def : Pat<(PPCcall (i32 texternalsym:$dst)),
           (BL texternalsym:$dst)>;
 
-def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym),
-          (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
-
 def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
           (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
 
@@ -2516,10 +2669,49 @@ def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
                          "#ADDItlsgdL32",
                          [(set i32:$rD,
                            (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
+// LR is a true define, while the rest of the Defs are clobbers.  R3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+    Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+                          "GETtlsADDR32",
+                          [(set i32:$rD,
+                            (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>;
+// Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded.  R3 and LR
+// are true defines while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+    Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD),
+                              (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
+                              "#ADDItlsgdLADDR32",
+                              [(set i32:$rD,
+                                (PPCaddiTlsgdLAddr i32:$reg,
+                                                   tglobaltlsaddr:$disp,
+                                                   tglobaltlsaddr:$sym))]>;
 def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
                           "#ADDItlsldL32",
                           [(set i32:$rD,
                             (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
+// LR is a true define, while the rest of the Defs are clobbers.  R3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+    Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+                            "GETtlsldADDR32",
+                            [(set i32:$rD,
+                              (PPCgetTlsldAddr i32:$reg,
+                                               tglobaltlsaddr:$sym))]>;
+// Combined op for ADDItlsldL32 and GETtlsADDR32, late expanded.  R3 and LR
+// are true defines while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+    Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD),
+                              (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
+                              "#ADDItlsldLADDR32",
+                              [(set i32:$rD,
+                                (PPCaddiTlsldLAddr i32:$reg,
+                                                   tglobaltlsaddr:$disp,
+                                                   tglobaltlsaddr:$sym))]>;
 def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
                            "#ADDIdtprelL32",
                            [(set i32:$rD,
@@ -2604,6 +2796,8 @@ include "PPCInstrAltivec.td"
 include "PPCInstrSPE.td"
 include "PPCInstr64Bit.td"
 include "PPCInstrVSX.td"
+include "PPCInstrQPX.td"
+include "PPCInstrHTM.td"
 
 def crnot : OutPatFrag<(ops node:$in),
                        (CRNOR $in, $in)>;
@@ -3188,6 +3382,28 @@ def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
 def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L),
                     "mtmsrd $RS, $L", IIC_SprMTMSRD>;
 
+def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA),
+                     "mcrfs $BF, $BFA", IIC_BrMCR>;
+
+def MTFSFI : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
+                      "mtfsfi $BF, $U, $W", IIC_IntMFFS>;
+
+def MTFSFIo : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
+                       "mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isDOT;
+
+def : InstAlias<"mtfsfi $BF, $U", (MTFSFI crrc:$BF, i32imm:$U, 0)>;
+def : InstAlias<"mtfsfi. $BF, $U", (MTFSFIo crrc:$BF, i32imm:$U, 0)>;
+
+def MTFSF : XFLForm_1<63, 711, (outs),
+                      (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
+                      "mtfsf $FLM, $FRB, $L, $W", IIC_IntMFFS, []>;
+def MTFSFo : XFLForm_1<63, 711, (outs),
+                       (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
+                       "mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isDOT;
+
+def : InstAlias<"mtfsf $FLM, $FRB", (MTFSF i32imm:$FLM, f8rc:$FRB, 0, 0)>;
+def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSFo i32imm:$FLM, f8rc:$FRB, 0, 0)>;
+
 def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
                         "slbie $RB", IIC_SprSLBIE, []>;
 
@@ -3302,7 +3518,7 @@ class PPCAsmPseudo<string asm, dag iops>
 def : InstAlias<"sc", (SC 0)>;
 
 def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>;
-def : InstAlias<"msync", (SYNC 0)>, Requires<[HasSYNC]>;
+def : InstAlias<"msync", (SYNC 0), 0>, Requires<[HasSYNC]>;
 def : InstAlias<"lwsync", (SYNC 1)>, Requires<[HasSYNC]>;
 def : InstAlias<"ptesync", (SYNC 2)>, Requires<[HasSYNC]>;
 
@@ -3312,6 +3528,17 @@ def : InstAlias<"waitimpl", (WAIT 2)>;
 
 def : InstAlias<"mbar", (MBAR 0)>, Requires<[IsBookE]>;
 
+def DCBTx   : PPCAsmPseudo<"dcbt $dst", (ins memrr:$dst)>;
+def DCBTSTx : PPCAsmPseudo<"dcbtst $dst", (ins memrr:$dst)>;
+
+def DCBTCT : PPCAsmPseudo<"dcbtct $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
+def DCBTDS : PPCAsmPseudo<"dcbtds $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
+def DCBTT  : PPCAsmPseudo<"dcbtt $dst", (ins memrr:$dst)>;
+
+def DCBTSTCT : PPCAsmPseudo<"dcbtstct $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
+def DCBTSTDS : PPCAsmPseudo<"dcbtstds $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
+def DCBTSTT  : PPCAsmPseudo<"dcbtstt $dst", (ins memrr:$dst)>;
+
 def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
 def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
 def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
@@ -3544,6 +3771,9 @@ def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0,
 def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
 def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
 
+def : InstAlias<"cntlz $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>;
+def : InstAlias<"cntlz. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>;
+
 def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b",
                           (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
 def EXTLDIo : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b",
@@ -3584,6 +3814,19 @@ def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)
 def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
 def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
 
+def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b",
+                            (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b",
+                            (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b",
+                           (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b",
+                            (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b",
+                          (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
+                           (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+
 // These generic branch instruction forms are used for the assembler parser only.
 // Defs and Uses are conservative, since we don't know the BO value.
 let PPC970_Unit = 7 in {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
new file mode 100644
index 0000000..5c66b42
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
@@ -0,0 +1,1192 @@
+//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the QPX extension to the PowerPC instruction set.
+// Reference:
+// Book Q: QPX Architecture Definition. IBM (as updated in) 2011.
+//
+//===----------------------------------------------------------------------===//
+
+def PPCRegQFRCAsmOperand : AsmOperandClass {
+  let Name = "RegQFRC"; let PredicateMethod = "isRegNumber";
+}
+def qfrc : RegisterOperand<QFRC> {
+  let ParserMatchClass = PPCRegQFRCAsmOperand;
+}
+def PPCRegQSRCAsmOperand : AsmOperandClass {
+  let Name = "RegQSRC"; let PredicateMethod = "isRegNumber";
+}
+def qsrc : RegisterOperand<QSRC> {
+  let ParserMatchClass = PPCRegQSRCAsmOperand;
+}
+def PPCRegQBRCAsmOperand : AsmOperandClass {
+  let Name = "RegQBRC"; let PredicateMethod = "isRegNumber";
+}
+def qbrc : RegisterOperand<QBRC> {
+  let ParserMatchClass = PPCRegQBRCAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// QPXA1_Int - A AForm_1 intrinsic definition.
+class QPXA1_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+  : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+              !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_FPFused,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+// QPXA1s_Int - A AForm_1 intrinsic definition (simple instructions).
+class QPXA1s_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+  : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+              !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_VecPerm,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+// QPXA2_Int - A AForm_2 intrinsic definition.
+class QPXA2_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+  : AForm_2<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+              !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPGeneral,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
+// QPXA3_Int - A AForm_3 intrinsic definition.
+class QPXA3_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+  : AForm_3<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
+              !strconcat(opc, " $FRT, $FRA, $FRC"), IIC_FPGeneral,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRC))]>;
+// QPXA4_Int - A AForm_4a intrinsic definition.
+class QPXA4_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+  : AForm_4a<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
+              !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
+// QPXX18_Int - A XForm_18 intrinsic definition.
+class QPXX18_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
+  : XForm_18<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+              !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPCompare,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
+// QPXX19_Int - A XForm_19 intrinsic definition.
+class QPXX19_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
+  : XForm_19<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
+              !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
+
+//===----------------------------------------------------------------------===//
+// Pattern Frags.
+
+def extloadv4f32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+def pre_truncstv4f32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+                               (pre_truncst node:$val,
+                                            node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def fround_inexact : PatFrag<(ops node:$val), (fround node:$val), [{
+  return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 0;
+}]>;
+
+def fround_exact : PatFrag<(ops node:$val), (fround node:$val), [{
+  return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 1;
+}]>;
+
+let FastIselShouldIgnore = 1 in // FastIsel should ignore all u12 instrs.
+  def u12 : ImmLeaf<i32, [{ return (Imm & 0xFFF) == Imm; }]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def HasQPX : Predicate<"PPCSubTarget->hasQPX()">;
+let Predicates = [HasQPX] in {
+let DecoderNamespace = "QPX" in {
+let hasSideEffects = 0 in { // QPX instructions don't have side effects.
+let Uses = [RM] in {
+  // Add Instructions
+  let isCommutable = 1 in {
+    def QVFADD : AForm_2<4, 21,
+                        (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                        "qvfadd $FRT, $FRA, $FRB", IIC_FPGeneral,
+                        [(set v4f64:$FRT, (fadd v4f64:$FRA, v4f64:$FRB))]>;
+    let isCodeGenOnly = 1 in
+      def QVFADDS : QPXA2_Int<0, 21, "qvfadds", int_ppc_qpx_qvfadds>;
+    def QVFADDSs : AForm_2<0, 21,
+                          (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                          "qvfadds $FRT, $FRA, $FRB", IIC_FPGeneral,
+                          [(set v4f32:$FRT, (fadd v4f32:$FRA, v4f32:$FRB))]>;
+  }
+  def QVFSUB : AForm_2<4, 20,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                      "qvfsub $FRT, $FRA, $FRB", IIC_FPGeneral,
+                      [(set v4f64:$FRT, (fsub v4f64:$FRA, v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFSUBS : QPXA2_Int<0, 20, "qvfsubs", int_ppc_qpx_qvfsubs>;
+  def QVFSUBSs : AForm_2<0, 20,
+                        (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                        "qvfsubs $FRT, $FRA, $FRB", IIC_FPGeneral,
+                        [(set v4f32:$FRT, (fsub v4f32:$FRA, v4f32:$FRB))]>;
+
+  // Estimate Instructions
+  def QVFRE : AForm_4a<4, 24, (outs qfrc:$FRT), (ins qfrc:$FRB),
+                       "qvfre $FRT, $FRB", IIC_FPGeneral,
+                       [(set v4f64:$FRT, (PPCfre v4f64:$FRB))]>;
+  def QVFRES : QPXA4_Int<0, 24, "qvfres", int_ppc_qpx_qvfres>;
+  let isCodeGenOnly = 1 in
+  def QVFRESs : AForm_4a<0, 24, (outs qsrc:$FRT), (ins qsrc:$FRB),
+                         "qvfres $FRT, $FRB", IIC_FPGeneral,
+                         [(set v4f32:$FRT, (PPCfre v4f32:$FRB))]>;
+
+  def QVFRSQRTE : AForm_4a<4, 26, (outs qfrc:$FRT), (ins qfrc:$FRB),
+                           "qvfrsqrte $FRT, $FRB", IIC_FPGeneral,
+                           [(set v4f64:$FRT, (PPCfrsqrte v4f64:$FRB))]>;
+  def QVFRSQRTES : QPXA4_Int<0, 26, "qvfrsqrtes", int_ppc_qpx_qvfrsqrtes>;
+  let isCodeGenOnly = 1 in
+  def QVFRSQRTESs : AForm_4a<0, 26, (outs qsrc:$FRT), (ins qsrc:$FRB),
+                             "qvfrsqrtes $FRT, $FRB", IIC_FPGeneral,
+                             [(set v4f32:$FRT, (PPCfrsqrte v4f32:$FRB))]>;
+
+  // Multiply Instructions
+  let isCommutable = 1 in {
+    def QVFMUL : AForm_3<4, 25,
+                        (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
+                        "qvfmul $FRT, $FRA, $FRC", IIC_FPGeneral,
+                        [(set v4f64:$FRT, (fmul v4f64:$FRA, v4f64:$FRC))]>;
+    let isCodeGenOnly = 1 in
+      def QVFMULS : QPXA3_Int<0, 25, "qvfmuls", int_ppc_qpx_qvfmuls>;
+    def QVFMULSs : AForm_3<0, 25,
+                          (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC),
+                          "qvfmuls $FRT, $FRA, $FRC", IIC_FPGeneral,
+                          [(set v4f32:$FRT, (fmul v4f32:$FRA, v4f32:$FRC))]>;
+  }
+  def QVFXMUL : QPXA3_Int<4, 17, "qvfxmul", int_ppc_qpx_qvfxmul>;
+  def QVFXMULS : QPXA3_Int<0, 17, "qvfxmuls", int_ppc_qpx_qvfxmuls>;
+
+  // Multiply-add instructions
+  def QVFMADD : AForm_1<4, 29,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+                      "qvfmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                      [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFMADDS : QPXA1_Int<0, 29, "qvfmadds", int_ppc_qpx_qvfmadds>;
+  def QVFMADDSs : AForm_1<0, 29,
+                        (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+                        "qvfmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                        [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, v4f32:$FRB))]>;
+  def QVFNMADD : AForm_1<4, 31,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+                      "qvfnmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                      [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
+                                                   v4f64:$FRB)))]>;
+  let isCodeGenOnly = 1 in
+    def QVFNMADDS : QPXA1_Int<0, 31, "qvfnmadds", int_ppc_qpx_qvfnmadds>;
+  def QVFNMADDSs : AForm_1<0, 31,
+                        (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+                        "qvfnmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                        [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
+                                                     v4f32:$FRB)))]>;
+  def QVFMSUB : AForm_1<4, 28,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+                      "qvfmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                      [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC,
+                                             (fneg v4f64:$FRB)))]>;
+  let isCodeGenOnly = 1 in
+    def QVFMSUBS : QPXA1_Int<0, 28, "qvfmsubs", int_ppc_qpx_qvfmsubs>;
+  def QVFMSUBSs : AForm_1<0, 28,
+                      (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+                      "qvfmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                      [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC,
+                                             (fneg v4f32:$FRB)))]>;
+  def QVFNMSUB : AForm_1<4, 30,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+                      "qvfnmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                      [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
+                                              (fneg v4f64:$FRB))))]>;
+  let isCodeGenOnly = 1 in
+    def QVFNMSUBS : QPXA1_Int<0, 30, "qvfnmsubs", int_ppc_qpx_qvfnmsubs>;
+  def QVFNMSUBSs : AForm_1<0, 30,
+                      (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+                      "qvfnmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                      [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
+                                              (fneg v4f32:$FRB))))]>;
+  def QVFXMADD : QPXA1_Int<4, 9, "qvfxmadd", int_ppc_qpx_qvfxmadd>;
+  def QVFXMADDS : QPXA1_Int<0, 9, "qvfxmadds", int_ppc_qpx_qvfxmadds>;
+  def QVFXXNPMADD : QPXA1_Int<4, 11, "qvfxxnpmadd", int_ppc_qpx_qvfxxnpmadd>;
+  def QVFXXNPMADDS : QPXA1_Int<0, 11, "qvfxxnpmadds", int_ppc_qpx_qvfxxnpmadds>;
+  def QVFXXCPNMADD : QPXA1_Int<4, 3, "qvfxxcpnmadd", int_ppc_qpx_qvfxxcpnmadd>;
+  def QVFXXCPNMADDS : QPXA1_Int<0, 3, "qvfxxcpnmadds", int_ppc_qpx_qvfxxcpnmadds>;
+  def QVFXXMADD : QPXA1_Int<4, 1, "qvfxxmadd", int_ppc_qpx_qvfxxmadd>;
+  def QVFXXMADDS : QPXA1_Int<0, 1, "qvfxxmadds", int_ppc_qpx_qvfxxmadds>;
+
+  // Select Instruction
+  let isCodeGenOnly = 1 in
+    def QVFSEL : QPXA1s_Int<4, 23, "qvfsel", int_ppc_qpx_qvfsel>;
+  def QVFSELb : AForm_1<4, 23, (outs qfrc:$FRT),
+                        (ins qbrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+                        "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+                        [(set v4f64:$FRT, (vselect v4i1:$FRA,
+                                                   v4f64:$FRC, v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+  def QVFSELbs : AForm_1<4, 23, (outs qsrc:$FRT),
+                        (ins qbrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+                        "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+                        [(set v4f32:$FRT, (vselect v4i1:$FRA,
+                                                   v4f32:$FRC, v4f32:$FRB))]>;
+  let isCodeGenOnly = 1 in
+  def QVFSELbb: AForm_1<4, 23, (outs qbrc:$FRT),
+                        (ins qbrc:$FRA, qbrc:$FRB, qbrc:$FRC),
+                        "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+                        [(set v4i1:$FRT, (vselect v4i1:$FRA,
+                                                  v4i1:$FRC, v4i1:$FRB))]>;
+
+  // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
+  // instruction selection into a branch sequence.
+  let usesCustomInserter = 1 in {
+    def SELECT_CC_QFRC: Pseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
+                                i32imm:$BROPC), "#SELECT_CC_QFRC",
+                                []>;
+    def SELECT_CC_QSRC: Pseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
+                                i32imm:$BROPC), "#SELECT_CC_QSRC",
+                                []>;
+    def SELECT_CC_QBRC: Pseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
+                                i32imm:$BROPC), "#SELECT_CC_QBRC",
+                                []>;
+
+    // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+    // register bit directly.
+    def SELECT_QFRC: Pseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
+                            qfrc:$T, qfrc:$F), "#SELECT_QFRC",
+                            [(set v4f64:$dst,
+                                  (select i1:$cond, v4f64:$T, v4f64:$F))]>;
+    def SELECT_QSRC: Pseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
+                            qsrc:$T, qsrc:$F), "#SELECT_QSRC",
+                            [(set v4f32:$dst,
+                                  (select i1:$cond, v4f32:$T, v4f32:$F))]>;
+    def SELECT_QBRC: Pseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
+                            qbrc:$T, qbrc:$F), "#SELECT_QBRC",
+                            [(set v4i1:$dst,
+                                  (select i1:$cond, v4i1:$T, v4i1:$F))]>;
+  }
+
+  // Convert and Round Instructions
+  def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>;
+  let isCodeGenOnly = 1 in
+    def QVFCTIDb : XForm_19<4, 814, (outs qbrc:$FRT), (ins qbrc:$FRB),
+                            "qvfctid $FRT, $FRB", IIC_FPGeneral, []>;
+
+  def QVFCTIDU : QPXX19_Int<4, 942, "qvfctidu", int_ppc_qpx_qvfctidu>;
+  def QVFCTIDZ : QPXX19_Int<4, 815, "qvfctidz", int_ppc_qpx_qvfctidz>;
+  def QVFCTIDUZ : QPXX19_Int<4, 943, "qvfctiduz", int_ppc_qpx_qvfctiduz>;
+  def QVFCTIW : QPXX19_Int<4, 14, "qvfctiw", int_ppc_qpx_qvfctiw>;
+  def QVFCTIWU : QPXX19_Int<4, 142, "qvfctiwu", int_ppc_qpx_qvfctiwu>;
+  def QVFCTIWZ : QPXX19_Int<4, 15, "qvfctiwz", int_ppc_qpx_qvfctiwz>;
+  def QVFCTIWUZ : QPXX19_Int<4, 143, "qvfctiwuz", int_ppc_qpx_qvfctiwuz>;
+  def QVFCFID : QPXX19_Int<4, 846, "qvfcfid", int_ppc_qpx_qvfcfid>;
+  let isCodeGenOnly = 1 in
+    def QVFCFIDb : XForm_19<4, 846, (outs qbrc:$FRT), (ins qbrc:$FRB),
+                            "qvfcfid $FRT, $FRB", IIC_FPGeneral, []>;
+
+  def QVFCFIDU : QPXX19_Int<4, 974, "qvfcfidu", int_ppc_qpx_qvfcfidu>;
+  def QVFCFIDS : QPXX19_Int<0, 846, "qvfcfids", int_ppc_qpx_qvfcfids>;
+  def QVFCFIDUS : QPXX19_Int<0, 974, "qvfcfidus", int_ppc_qpx_qvfcfidus>;
+
+  let isCodeGenOnly = 1 in
+    def QVFRSP : QPXX19_Int<4, 12, "qvfrsp", int_ppc_qpx_qvfrsp>;
+  def QVFRSPs : XForm_19<4, 12,
+                      (outs qsrc:$FRT), (ins qfrc:$FRB),
+                      "qvfrsp $FRT, $FRB", IIC_FPGeneral,
+                      [(set v4f32:$FRT, (fround_inexact v4f64:$FRB))]>;
+
+  def QVFRIZ : XForm_19<4, 424, (outs qfrc:$FRT), (ins qfrc:$FRB),
+                        "qvfriz $FRT, $FRB", IIC_FPGeneral,
+                        [(set v4f64:$FRT, (ftrunc v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFRIZs : XForm_19<4, 424, (outs qsrc:$FRT), (ins qsrc:$FRB),
+                           "qvfriz $FRT, $FRB", IIC_FPGeneral,
+                           [(set v4f32:$FRT, (ftrunc v4f32:$FRB))]>;
+
+  def QVFRIN : XForm_19<4, 392, (outs qfrc:$FRT), (ins qfrc:$FRB),
+                        "qvfrin $FRT, $FRB", IIC_FPGeneral,
+                        [(set v4f64:$FRT, (frnd v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFRINs : XForm_19<4, 392, (outs qsrc:$FRT), (ins qsrc:$FRB),
+                           "qvfrin $FRT, $FRB", IIC_FPGeneral,
+                           [(set v4f32:$FRT, (frnd v4f32:$FRB))]>;
+
+  def QVFRIP : XForm_19<4, 456, (outs qfrc:$FRT), (ins qfrc:$FRB),
+                        "qvfrip $FRT, $FRB", IIC_FPGeneral,
+                        [(set v4f64:$FRT, (fceil v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFRIPs : XForm_19<4, 456, (outs qsrc:$FRT), (ins qsrc:$FRB),
+                           "qvfrip $FRT, $FRB", IIC_FPGeneral,
+                           [(set v4f32:$FRT, (fceil v4f32:$FRB))]>;
+
+  def QVFRIM : XForm_19<4, 488, (outs qfrc:$FRT), (ins qfrc:$FRB),
+                        "qvfrim $FRT, $FRB", IIC_FPGeneral,
+                        [(set v4f64:$FRT, (ffloor v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFRIMs : XForm_19<4, 488, (outs qsrc:$FRT), (ins qsrc:$FRB),
+                           "qvfrim $FRT, $FRB", IIC_FPGeneral,
+                           [(set v4f32:$FRT, (ffloor v4f32:$FRB))]>;
+
+  // Move Instructions
+  def QVFMR : XForm_19<4, 72,
+                      (outs qfrc:$FRT), (ins qfrc:$FRB),
+                      "qvfmr $FRT, $FRB", IIC_VecPerm,
+                      [/* (set v4f64:$FRT, v4f64:$FRB) */]>;
+  let isCodeGenOnly = 1 in {
+    def QVFMRs : XForm_19<4, 72,
+                         (outs qsrc:$FRT), (ins qsrc:$FRB),
+                         "qvfmr $FRT, $FRB", IIC_VecPerm,
+                         [/* (set v4f32:$FRT, v4f32:$FRB) */]>;
+    def QVFMRb : XForm_19<4, 72,
+                         (outs qbrc:$FRT), (ins qbrc:$FRB),
+                         "qvfmr $FRT, $FRB", IIC_VecPerm,
+                         [/* (set v4i1:$FRT, v4i1:$FRB) */]>;
+  }
+  def QVFNEG : XForm_19<4, 40,
+                      (outs qfrc:$FRT), (ins qfrc:$FRB),
+                      "qvfneg $FRT, $FRB", IIC_VecPerm,
+                      [(set v4f64:$FRT, (fneg v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFNEGs : XForm_19<4, 40,
+                         (outs qsrc:$FRT), (ins qsrc:$FRB),
+                         "qvfneg $FRT, $FRB", IIC_VecPerm,
+                         [(set v4f32:$FRT, (fneg v4f32:$FRB))]>;
+  def QVFABS : XForm_19<4, 264,
+                      (outs qfrc:$FRT), (ins qfrc:$FRB),
+                      "qvfabs $FRT, $FRB", IIC_VecPerm,
+                      [(set v4f64:$FRT, (fabs v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFABSs : XForm_19<4, 264,
+                         (outs qsrc:$FRT), (ins qsrc:$FRB),
+                         "qvfabs $FRT, $FRB", IIC_VecPerm,
+                         [(set v4f32:$FRT, (fabs v4f32:$FRB))]>;
+  def QVFNABS : XForm_19<4, 136,
+                      (outs qfrc:$FRT), (ins qfrc:$FRB),
+                      "qvfnabs $FRT, $FRB", IIC_VecPerm,
+                      [(set v4f64:$FRT, (fneg (fabs v4f64:$FRB)))]>;
+  let isCodeGenOnly = 1 in
+    def QVFNABSs : XForm_19<4, 136,
+                         (outs qsrc:$FRT), (ins qsrc:$FRB),
+                         "qvfnabs $FRT, $FRB", IIC_VecPerm,
+                         [(set v4f32:$FRT, (fneg (fabs v4f32:$FRB)))]>;
+  def QVFCPSGN : XForm_18<4, 8,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                      "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
+                      [(set v4f64:$FRT, (fcopysign v4f64:$FRB, v4f64:$FRA))]>;
+  let isCodeGenOnly = 1 in
+    def QVFCPSGNs : XForm_18<4, 8,
+                         (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                         "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
+                         [(set v4f32:$FRT, (fcopysign v4f32:$FRB, v4f32:$FRA))]>;
+
+  def QVALIGNI : Z23Form_1<4, 5,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u2imm:$idx),
+                      "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+                      [(set v4f64:$FRT,
+                            (PPCqvaligni v4f64:$FRA, v4f64:$FRB,
+                                         (i32 imm:$idx)))]>;
+  let isCodeGenOnly = 1 in
+     def QVALIGNIs : Z23Form_1<4, 5,
+                         (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, u2imm:$idx),
+                         "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+                         [(set v4f32:$FRT,
+                               (PPCqvaligni v4f32:$FRA, v4f32:$FRB,
+                                            (i32 imm:$idx)))]>;
+  let isCodeGenOnly = 1 in
+     def QVALIGNIb : Z23Form_1<4, 5,
+                         (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u2imm:$idx),
+                         "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+                         [(set v4i1:$FRT,
+                               (PPCqvaligni v4i1:$FRA, v4i1:$FRB,
+                                            (i32 imm:$idx)))]>;
+
+  def QVESPLATI : Z23Form_2<4, 37,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, u2imm:$idx),
+                      "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+                      [(set v4f64:$FRT,
+                            (PPCqvesplati v4f64:$FRA, (i32 imm:$idx)))]>;
+  let isCodeGenOnly = 1 in
+     def QVESPLATIs : Z23Form_2<4, 37,
+                         (outs qsrc:$FRT), (ins qsrc:$FRA, u2imm:$idx),
+                         "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+                         [(set v4f32:$FRT,
+                               (PPCqvesplati v4f32:$FRA, (i32 imm:$idx)))]>;
+  let isCodeGenOnly = 1 in
+     def QVESPLATIb : Z23Form_2<4, 37,
+                         (outs qbrc:$FRT), (ins qbrc:$FRA, u2imm:$idx),
+                         "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+                         [(set v4i1:$FRT,
+                               (PPCqvesplati v4i1:$FRA, (i32 imm:$idx)))]>;
+
+  def QVFPERM : AForm_1<4, 6,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+                      "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
+                      [(set v4f64:$FRT,
+                            (PPCqvfperm v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+  let isCodeGenOnly = 1 in
+     def QVFPERMs : AForm_1<4, 6,
+                         (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qfrc:$FRC),
+                         "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
+                         [(set v4f32:$FRT,
+                               (PPCqvfperm v4f32:$FRA, v4f32:$FRB, v4f64:$FRC))]>;
+
+  let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+  def QVGPCI : Z23Form_3<4, 133,
+                      (outs qfrc:$FRT), (ins u12imm:$idx),
+                      "qvgpci $FRT, $idx", IIC_VecPerm,
+                      [(set v4f64:$FRT, (PPCqvgpci (u12:$idx)))]>;
+
+  // Compare Instruction
+  let isCodeGenOnly = 1 in
+    def QVFTSTNAN : QPXX18_Int<4, 64, "qvftstnan", int_ppc_qpx_qvftstnan>;
+  def QVFTSTNANb : XForm_18<4, 64, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                           "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
+                           [(set v4i1:$FRT,
+                                 (setcc v4f64:$FRA, v4f64:$FRB, SETUO))]>;
+  let isCodeGenOnly = 1 in
+  def QVFTSTNANbs : XForm_18<4, 64, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                            "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
+                            [(set v4i1:$FRT,
+                                  (setcc v4f32:$FRA, v4f32:$FRB, SETUO))]>;
+  let isCodeGenOnly = 1 in
+    def QVFCMPLT : QPXX18_Int<4, 96, "qvfcmplt", int_ppc_qpx_qvfcmplt>;
+  def QVFCMPLTb : XForm_18<4, 96, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                           "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
+                           [(set v4i1:$FRT,
+                                 (setcc v4f64:$FRA, v4f64:$FRB, SETOLT))]>;
+  let isCodeGenOnly = 1 in
+  def QVFCMPLTbs : XForm_18<4, 96, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                            "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
+                            [(set v4i1:$FRT,
+                                  (setcc v4f32:$FRA, v4f32:$FRB, SETOLT))]>;
+  let isCodeGenOnly = 1 in
+    def QVFCMPGT : QPXX18_Int<4, 32, "qvfcmpgt", int_ppc_qpx_qvfcmpgt>;
+  def QVFCMPGTb : XForm_18<4, 32, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                           "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
+                           [(set v4i1:$FRT,
+                                 (setcc v4f64:$FRA, v4f64:$FRB, SETOGT))]>;
+  let isCodeGenOnly = 1 in
+  def QVFCMPGTbs : XForm_18<4, 32, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                            "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
+                            [(set v4i1:$FRT,
+                                  (setcc v4f32:$FRA, v4f32:$FRB, SETOGT))]>;
+  let isCodeGenOnly = 1 in
+    def QVFCMPEQ : QPXX18_Int<4, 0, "qvfcmpeq", int_ppc_qpx_qvfcmpeq>;
+  def QVFCMPEQb : XForm_18<4, 0, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                           "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
+                           [(set v4i1:$FRT,
+                                 (setcc v4f64:$FRA, v4f64:$FRB, SETOEQ))]>;
+  let isCodeGenOnly = 1 in
+  def QVFCMPEQbs : XForm_18<4, 0, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                            "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
+                            [(set v4i1:$FRT,
+                                  (setcc v4f32:$FRA, v4f32:$FRB, SETOEQ))]>;
+
+  let isCodeGenOnly = 1 in
+  def QVFLOGICAL : XForm_20<4, 4,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u12imm:$tttt),
+                      "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+  def QVFLOGICALb : XForm_20<4, 4,
+                      (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
+                      "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+  let isCodeGenOnly = 1 in
+  def QVFLOGICALs : XForm_20<4, 4,
+                      (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
+                      "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+
+  // Load indexed instructions
+  let mayLoad = 1 in {
+    def QVLFDX : XForm_1<31, 583,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfdx $FRT, $src", IIC_LdStLFD,
+                        [(set v4f64:$FRT, (load xoaddr:$src))]>;
+    let isCodeGenOnly = 1 in
+    def QVLFDXb : XForm_1<31, 583,
+                        (outs qbrc:$FRT), (ins memrr:$src),
+                        "qvlfdx $FRT, $src", IIC_LdStLFD, []>;
+
+    let RC = 1 in
+    def QVLFDXA : XForm_1<31, 583,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfdxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFDUX : XForm_1<31, 615,
+                        (outs qfrc:$FRT, ptr_rc_nor0:$ea_result),
+                        (ins memrr:$src),
+                        "qvlfdux $FRT, $src", IIC_LdStLFDU, []>,
+                        RegConstraint<"$src.ptrreg = $ea_result">,
+                        NoEncode<"$ea_result">;
+    let RC = 1 in
+    def QVLFDUXA : XForm_1<31, 615,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfduxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFSX : XForm_1<31, 519,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfsx $FRT, $src", IIC_LdStLFD,
+                        [(set v4f64:$FRT, (extloadv4f32 xoaddr:$src))]>;
+
+    let isCodeGenOnly = 1 in
+    def QVLFSXb : XForm_1<31, 519,
+                        (outs qbrc:$FRT), (ins memrr:$src),
+                        "qvlfsx $FRT, $src", IIC_LdStLFD,
+                        [(set v4i1:$FRT, (PPCqvlfsb xoaddr:$src))]>;
+    let isCodeGenOnly = 1 in
+    def QVLFSXs : XForm_1<31, 519,
+                        (outs qsrc:$FRT), (ins memrr:$src),
+                        "qvlfsx $FRT, $src", IIC_LdStLFD,
+                        [(set v4f32:$FRT, (load xoaddr:$src))]>;
+
+    let RC = 1 in
+    def QVLFSXA : XForm_1<31, 519,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfsxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFSUX : XForm_1<31, 551,
+                        (outs qsrc:$FRT, ptr_rc_nor0:$ea_result),
+                        (ins memrr:$src),
+                        "qvlfsux $FRT, $src", IIC_LdStLFDU, []>,
+                        RegConstraint<"$src.ptrreg = $ea_result">,
+                        NoEncode<"$ea_result">;
+
+    let RC = 1 in
+    def QVLFSUXA : XForm_1<31, 551,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfsuxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFCDX : XForm_1<31, 71,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcdx $FRT, $src", IIC_LdStLFD, []>;
+    let RC = 1 in
+    def QVLFCDXA : XForm_1<31, 71,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcdxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFCDUX : XForm_1<31, 103,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcdux $FRT, $src", IIC_LdStLFD, []>;
+    let RC = 1 in
+    def QVLFCDUXA : XForm_1<31, 103,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcduxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFCSX : XForm_1<31, 7,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
+    let isCodeGenOnly = 1 in
+    def QVLFCSXs : XForm_1<31, 7,
+                         (outs qsrc:$FRT), (ins memrr:$src),
+                         "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
+
+    let RC = 1 in
+    def QVLFCSXA : XForm_1<31, 7,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcsxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFCSUX : XForm_1<31, 39,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcsux $FRT, $src", IIC_LdStLFD, []>;
+    let RC = 1 in
+    def QVLFCSUXA : XForm_1<31, 39,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcsuxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFIWAX : XForm_1<31, 871,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfiwax $FRT, $src", IIC_LdStLFD, []>;
+    let RC = 1 in
+    def QVLFIWAXA : XForm_1<31, 871,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfiwaxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFIWZX : XForm_1<31, 839,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfiwzx $FRT, $src", IIC_LdStLFD, []>;
+    let RC = 1 in
+    def QVLFIWZXA : XForm_1<31, 839,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfiwzxa $FRT, $src", IIC_LdStLFD, []>;
+  }
+
+
+  def QVLPCLDX : XForm_1<31, 582,
+                      (outs qfrc:$FRT), (ins memrr:$src),
+                      "qvlpcldx $FRT, $src", IIC_LdStLFD, []>;
+  def QVLPCLSX : XForm_1<31, 518,
+                      (outs qfrc:$FRT), (ins memrr:$src),
+                      "qvlpclsx $FRT, $src", IIC_LdStLFD, []>;
+  let isCodeGenOnly = 1 in
+    def QVLPCLSXint : XForm_11<31, 518,
+                              (outs qfrc:$FRT), (ins G8RC:$src),
+                              "qvlpclsx $FRT, 0, $src", IIC_LdStLFD, []>;
+  def QVLPCRDX : XForm_1<31, 70,
+                      (outs qfrc:$FRT), (ins memrr:$src),
+                      "qvlpcrdx $FRT, $src", IIC_LdStLFD, []>;
+  def QVLPCRSX : XForm_1<31, 6,
+                      (outs qfrc:$FRT), (ins memrr:$src),
+                      "qvlpcrsx $FRT, $src", IIC_LdStLFD, []>;
+
+  // Store indexed instructions
+  let mayStore = 1 in {
+    def QVSTFDX : XForm_8<31, 711,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfdx $FRT, $dst", IIC_LdStSTFD,
+                        [(store qfrc:$FRT, xoaddr:$dst)]>;
+    let isCodeGenOnly = 1 in
+    def QVSTFDXb : XForm_8<31, 711,
+                        (outs), (ins qbrc:$FRT, memrr:$dst),
+                        "qvstfdx $FRT, $dst", IIC_LdStSTFD, []>;
+
+    let RC = 1 in
+    def QVSTFDXA : XForm_8<31, 711,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfdxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFDUX : XForm_8<31, 743, (outs ptr_rc_nor0:$ea_res),
+                           (ins qfrc:$FRT, memrr:$dst),
+                           "qvstfdux $FRT, $dst", IIC_LdStSTFDU, []>,
+                           RegConstraint<"$dst.ptrreg = $ea_res">,
+                           NoEncode<"$ea_res">;
+
+    let RC = 1 in
+    def QVSTFDUXA : XForm_8<31, 743,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfduxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFDXI : XForm_8<31, 709,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfdxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFDXIA : XForm_8<31, 709,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfdxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFDUXI : XForm_8<31, 741,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfduxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFDUXIA : XForm_8<31, 741,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfduxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFSX : XForm_8<31, 647,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsx $FRT, $dst", IIC_LdStSTFD,
+                        [(truncstorev4f32 qfrc:$FRT, xoaddr:$dst)]>;
+    let isCodeGenOnly = 1 in
+    def QVSTFSXs : XForm_8<31, 647,
+                         (outs), (ins qsrc:$FRT, memrr:$dst),
+                         "qvstfsx $FRT, $dst", IIC_LdStSTFD,
+                         [(store qsrc:$FRT, xoaddr:$dst)]>;
+
+    let RC = 1 in
+    def QVSTFSXA : XForm_8<31, 647,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFSUX : XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
+                           (ins qsrc:$FRT, memrr:$dst),
+                           "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
+                           RegConstraint<"$dst.ptrreg = $ea_res">,
+                           NoEncode<"$ea_res">;
+    let isCodeGenOnly = 1 in
+    def QVSTFSUXs: XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
+                           (ins qfrc:$FRT, memrr:$dst),
+                           "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
+                           RegConstraint<"$dst.ptrreg = $ea_res">,
+                           NoEncode<"$ea_res">;
+
+    let RC = 1 in
+    def QVSTFSUXA : XForm_8<31, 679,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsuxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFSXI : XForm_8<31, 645,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFSXIA : XForm_8<31, 645,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFSUXI : XForm_8<31, 677,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsuxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFSUXIA : XForm_8<31, 677,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsuxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCDX : XForm_8<31, 199,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcdx $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCDXA : XForm_8<31, 199,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcdxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCSX : XForm_8<31, 135,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
+    let isCodeGenOnly = 1 in
+    def QVSTFCSXs : XForm_8<31, 135,
+                         (outs), (ins qsrc:$FRT, memrr:$dst),
+                         "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
+
+    let RC = 1 in
+    def QVSTFCSXA : XForm_8<31, 135,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCDUX : XForm_8<31, 231,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcdux $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCDUXA : XForm_8<31, 231,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcduxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCSUX : XForm_8<31, 167,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsux $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCSUXA : XForm_8<31, 167,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsuxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCDXI : XForm_8<31, 197,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcdxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCDXIA : XForm_8<31, 197,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcdxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCSXI : XForm_8<31, 133,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCSXIA : XForm_8<31, 133,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCDUXI : XForm_8<31, 229,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcduxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCDUXIA : XForm_8<31, 229,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcduxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCSUXI : XForm_8<31, 165,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsuxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCSUXIA : XForm_8<31, 165,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsuxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFIWX : XForm_8<31, 967,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfiwx $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFIWXA : XForm_8<31, 967,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfiwxa $FRT, $dst", IIC_LdStSTFD, []>;
+  }
+}
+
+} // neverHasSideEffects
+}
+
+def : InstAlias<"qvfclr $FRT",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 0)>;
+def : InstAlias<"qvfand $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 1)>;
+def : InstAlias<"qvfandc $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 4)>;
+def : InstAlias<"qvfctfb $FRT, $FRA",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 5)>;
+def : InstAlias<"qvfxor $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 6)>;
+def : InstAlias<"qvfor $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 7)>;
+def : InstAlias<"qvfnor $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 8)>;
+def : InstAlias<"qvfequ $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 9)>;
+def : InstAlias<"qvfnot $FRT, $FRA",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 10)>;
+def : InstAlias<"qvforc $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 13)>;
+def : InstAlias<"qvfnand $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 14)>;
+def : InstAlias<"qvfset $FRT",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 15)>;
+
+//===----------------------------------------------------------------------===//
+// Additional QPX Patterns
+//
+
+def : Pat<(v4f64 (scalar_to_vector f64:$A)),
+          (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), $A, sub_64)>;
+def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>;
+
+def : Pat<(f64 (vector_extract v4f64:$S, 0)),
+          (EXTRACT_SUBREG $S, sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+          (EXTRACT_SUBREG $S, sub_64)>;
+
+def : Pat<(f64 (vector_extract v4f64:$S, 1)),
+          (EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>;
+def : Pat<(f64 (vector_extract v4f64:$S, 2)),
+          (EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>;
+def : Pat<(f64 (vector_extract v4f64:$S, 3)),
+          (EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>;
+
+def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+          (EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+          (EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+          (EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>;
+
+def : Pat<(f64 (vector_extract v4f64:$S, i64:$F)),
+          (EXTRACT_SUBREG (QVFPERM $S, $S,
+                                   (QVLPCLSXint (RLDICR $F, 2,
+                                                        /* 63-2 = */ 61))),
+                          sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, i64:$F)),
+          (EXTRACT_SUBREG (QVFPERMs $S, $S,
+                                    (QVLPCLSXint (RLDICR $F, 2,
+                                                         /* 63-2 = */ 61))),
+                          sub_64)>;
+
+def : Pat<(int_ppc_qpx_qvfperm v4f64:$A, v4f64:$B, v4f64:$C),
+          (QVFPERM $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvfcpsgn v4f64:$A, v4f64:$B),
+          (QVFCPSGN $A, $B)>;
+
+// FCOPYSIGN's operand types need not agree.
+def : Pat<(fcopysign v4f64:$frB, v4f32:$frA),
+          (QVFCPSGN (COPY_TO_REGCLASS $frA, QFRC), $frB)>;
+def : Pat<(fcopysign QSRC:$frB, QFRC:$frA),
+          (QVFCPSGNs (COPY_TO_REGCLASS $frA, QSRC), $frB)>;
+
+def : Pat<(int_ppc_qpx_qvfneg v4f64:$A), (QVFNEG $A)>;
+def : Pat<(int_ppc_qpx_qvfabs v4f64:$A), (QVFABS $A)>;
+def : Pat<(int_ppc_qpx_qvfnabs v4f64:$A), (QVFNABS $A)>;
+
+def : Pat<(int_ppc_qpx_qvfriz v4f64:$A), (QVFRIZ $A)>;
+def : Pat<(int_ppc_qpx_qvfrin v4f64:$A), (QVFRIN $A)>;
+def : Pat<(int_ppc_qpx_qvfrip v4f64:$A), (QVFRIP $A)>;
+def : Pat<(int_ppc_qpx_qvfrim v4f64:$A), (QVFRIM $A)>;
+
+def : Pat<(int_ppc_qpx_qvfre v4f64:$A), (QVFRE $A)>;
+def : Pat<(int_ppc_qpx_qvfrsqrte v4f64:$A), (QVFRSQRTE $A)>;
+
+def : Pat<(int_ppc_qpx_qvfadd v4f64:$A, v4f64:$B),
+          (QVFADD $A, $B)>;
+def : Pat<(int_ppc_qpx_qvfsub v4f64:$A, v4f64:$B),
+          (QVFSUB $A, $B)>;
+def : Pat<(int_ppc_qpx_qvfmul v4f64:$A, v4f64:$B),
+          (QVFMUL $A, $B)>;
+
+// Additional QVFNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg v4f64:$A), v4f64:$C, v4f64:$B),
+          (QVFNMSUB $A, $B, $C)>;
+def : Pat<(fma v4f64:$A, (fneg v4f64:$C), v4f64:$B),
+          (QVFNMSUB $A, $B, $C)>;
+def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
+          (QVFNMSUBSs $A, $B, $C)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
+          (QVFNMSUBSs $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvfmadd v4f64:$A, v4f64:$B, v4f64:$C),
+          (QVFMADD $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfnmadd v4f64:$A, v4f64:$B, v4f64:$C),
+          (QVFNMADD $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfmsub v4f64:$A, v4f64:$B, v4f64:$C),
+          (QVFMSUB $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfnmsub v4f64:$A, v4f64:$B, v4f64:$C),
+          (QVFNMSUB $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvlfd xoaddr:$src),
+          (QVLFDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
+          (QVLFDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfs xoaddr:$src),
+          (QVLFSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
+          (QVLFSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcda xoaddr:$src),
+          (QVLFCDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcd xoaddr:$src),
+          (QVLFCDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcsa xoaddr:$src),
+          (QVLFCSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcs xoaddr:$src),
+          (QVLFCSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
+          (QVLFDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwaa xoaddr:$src),
+          (QVLFIWAXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwa xoaddr:$src),
+          (QVLFIWAX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwza xoaddr:$src),
+          (QVLFIWZXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwz xoaddr:$src),
+          (QVLFIWZX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
+          (QVLFSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcld xoaddr:$src),
+          (QVLPCLDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcls xoaddr:$src),
+          (QVLPCLSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcrd xoaddr:$src),
+          (QVLPCRDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcrs xoaddr:$src),
+          (QVLPCRSX xoaddr:$src)>;
+
+def : Pat<(int_ppc_qpx_qvstfd v4f64:$T, xoaddr:$dst),
+          (QVSTFDX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfs v4f64:$T, xoaddr:$dst),
+          (QVSTFSX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcda v4f64:$T, xoaddr:$dst),
+          (QVSTFCDXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcd v4f64:$T, xoaddr:$dst),
+          (QVSTFCDX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcsa v4f64:$T, xoaddr:$dst),
+          (QVSTFCSXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcs v4f64:$T, xoaddr:$dst),
+          (QVSTFCSX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfda v4f64:$T, xoaddr:$dst),
+          (QVSTFDXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfiwa v4f64:$T, xoaddr:$dst),
+          (QVSTFIWXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfiw v4f64:$T, xoaddr:$dst),
+          (QVSTFIWX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfsa v4f64:$T, xoaddr:$dst),
+          (QVSTFSXA $T, xoaddr:$dst)>;
+
+def : Pat<(pre_store v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (QVSTFDUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store v4f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (QVSTFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncstv4f32 v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (QVSTFSUXs $rS, $ptrreg, $ptroff)>;
+
+def : Pat<(int_ppc_qpx_qvflogical  v4f64:$A, v4f64:$B, (i32 imm:$idx)),
+          (QVFLOGICAL $A, $B, imm:$idx)>;
+def : Pat<(int_ppc_qpx_qvgpci (u12:$idx)),
+          (QVGPCI imm:$idx)>;
+
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOGE),
+          (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOLE),
+          (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETONE),
+          (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETO),
+          (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUEQ),
+          (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGT),
+          (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGE),
+          (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+                       (QVFCMPLTb $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULT),
+          (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULE),
+          (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+                       (QVFCMPGTb $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUNE),
+          (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+                       (QVFCMPEQb $FRA, $FRB), (i32 13))>;
+
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETEQ),
+          (QVFCMPEQb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGT),
+          (QVFCMPGTb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGE),
+          (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+                       (QVFCMPLTb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLT),
+          (QVFCMPLTb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLE),
+          (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+                       (QVFCMPGTb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETNE),
+          (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+                       (QVFCMPEQb $FRA, $FRB), (i32 10))>;
+
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOGE),
+          (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOLE),
+          (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETONE),
+          (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETO),
+          (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUEQ),
+          (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGT),
+          (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGE),
+          (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+                       (QVFCMPLTbs $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULT),
+          (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULE),
+          (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+                       (QVFCMPGTbs $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUNE),
+          (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+                       (QVFCMPEQbs $FRA, $FRB), (i32 13))>;
+
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETEQ),
+          (QVFCMPEQbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGT),
+          (QVFCMPGTbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGE),
+          (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+                       (QVFCMPLTbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLT),
+          (QVFCMPLTbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLE),
+          (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+                       (QVFCMPGTbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETNE),
+          (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+                       (QVFCMPEQbs $FRA, $FRB), (i32 10))>;
+
+def : Pat<(and v4i1:$FRA, (not v4i1:$FRB)),
+          (QVFLOGICALb $FRA, $FRB, (i32 4))>;
+def : Pat<(not (or v4i1:$FRA, v4i1:$FRB)),
+          (QVFLOGICALb $FRA, $FRB, (i32 8))>;
+def : Pat<(not (xor v4i1:$FRA, v4i1:$FRB)),
+          (QVFLOGICALb $FRA, $FRB, (i32 9))>;
+def : Pat<(or v4i1:$FRA, (not v4i1:$FRB)),
+          (QVFLOGICALb $FRA, $FRB, (i32 13))>;
+def : Pat<(not (and v4i1:$FRA, v4i1:$FRB)),
+          (QVFLOGICALb $FRA, $FRB, (i32 14))>;
+
+def : Pat<(and v4i1:$FRA, v4i1:$FRB),
+          (QVFLOGICALb $FRA, $FRB, (i32 1))>;
+def : Pat<(or v4i1:$FRA, v4i1:$FRB),
+          (QVFLOGICALb $FRA, $FRB, (i32 7))>;
+def : Pat<(xor v4i1:$FRA, v4i1:$FRB),
+          (QVFLOGICALb $FRA, $FRB, (i32 6))>;
+def : Pat<(not v4i1:$FRA),
+          (QVFLOGICALb $FRA, $FRA, (i32 10))>;
+
+def : Pat<(v4f64 (fextend v4f32:$src)),
+          (COPY_TO_REGCLASS $src, QFRC)>;
+
+def : Pat<(v4f32 (fround_exact v4f64:$src)),
+          (COPY_TO_REGCLASS $src, QSRC)>;
+
+// Extract the underlying floating-point values from the
+// QPX (-1.0, 1.0) boolean representation.
+def : Pat<(v4f64 (PPCqbflt v4i1:$src)),
+          (COPY_TO_REGCLASS $src, QFRC)>;
+
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)),
+          (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)),
+          (SELECT_QFRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)),
+          (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)),
+          (SELECT_QFRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)),
+          (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)),
+          (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)),
+          (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)),
+          (SELECT_QSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)),
+          (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)),
+          (SELECT_QSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)),
+          (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)),
+          (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)),
+          (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)),
+          (SELECT_QBRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)),
+          (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)),
+          (SELECT_QBRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)),
+          (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)),
+          (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+} // end HasQPX
+
+let Predicates = [HasQPX, NoNaNsFPMath] in {
+def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
+          (QVFSELb (QVFCMPLTb $FRA, $FRB), $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
+          (QVFSELb (QVFCMPGTb $FRA, $FRB), $FRB, $FRA)>;
+
+def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
+          (QVFSELbs (QVFCMPLTbs $FRA, $FRB), $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
+          (QVFSELbs (QVFCMPGTbs $FRA, $FRB), $FRB, $FRA)>;
+}
+
+let Predicates = [HasQPX, NaNsFPMath] in {
+// When either of these operands is NaN, we should return the other operand.
+// QVFCMPLT/QVFCMPGT return false is either operand is NaN, which means we need
+// to explicitly or with a NaN test on the second operand.
+def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
+          (QVFSELb (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+                                (QVFTSTNANb $FRB, $FRB), (i32 7)),
+                   $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
+          (QVFSELb (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+                                (QVFTSTNANb $FRB, $FRB), (i32 7)),
+                   $FRB, $FRA)>;
+
+def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
+          (QVFSELbs (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+                                 (QVFTSTNANbs $FRB, $FRB), (i32 7)),
+                   $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
+          (QVFSELbs (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+                                 (QVFTSTNANbs $FRB, $FRB), (i32 7)),
+                   $FRB, $FRA)>;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index b21b251..9685bac 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -11,6 +11,21 @@
 //
 //===----------------------------------------------------------------------===//
 
+// *********************************** NOTE ***********************************
+// ** For POWER8 Little Endian, the VSX swap optimization relies on knowing  **
+// ** which VMX and VSX instructions are lane-sensitive and which are not.   **
+// ** A lane-sensitive instruction relies, implicitly or explicitly, on      **
+// ** whether lanes are numbered from left to right.  An instruction like    **
+// ** VADDFP is not lane-sensitive, because each lane of the result vector   **
+// ** relies only on the corresponding lane of the source vectors.  However, **
+// ** an instruction like VMULESB is lane-sensitive, because "even" and      **
+// ** "odd" lanes are different for big-endian and little-endian numbering.  **
+// **                                                                        **
+// ** When adding new VMX and VSX instructions, please consider whether they **
+// ** are lane-sensitive.  If so, they must be added to a switch statement   **
+// ** in PPCVSXSwapRemoval::gatherVectorInstructions().                      **
+// ****************************************************************************
+
 def PPCRegVSRCAsmOperand : AsmOperandClass {
   let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
 }
@@ -25,6 +40,13 @@ def vsfrc : RegisterOperand<VSFRC> {
   let ParserMatchClass = PPCRegVSFRCAsmOperand;
 }
 
+def PPCRegVSSRCAsmOperand : AsmOperandClass {
+  let Name = "RegVSSRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vssrc : RegisterOperand<VSSRC> {
+  let ParserMatchClass = PPCRegVSSRCAsmOperand;
+}
+
 // Little-endian-specific nodes.
 def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
   SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
@@ -41,6 +63,9 @@ def PPClxvd2x  : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
 def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
                         [SDNPHasChain, SDNPMayStore]>;
 def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
+def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
+def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
+def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
 
 multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
                     string asmbase, string asmstr, InstrItinClass itin,
@@ -66,7 +91,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
 let Uses = [RM] in {
 
   // Load indexed instructions
-  let mayLoad = 1, canFoldAsLoad = 1 in {
+  let mayLoad = 1 in {
     def LXSDX : XX1Form<31, 588,
                         (outs vsfrc:$XT), (ins memrr:$src),
                         "lxsdx $XT, $src", IIC_LdStLFD,
@@ -85,7 +110,7 @@ let Uses = [RM] in {
                          (outs vsrc:$XT), (ins memrr:$src),
                          "lxvw4x $XT, $src", IIC_LdStLFD,
                          [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>;
-  }
+  } // mayLoad
 
   // Store indexed instructions
   let mayStore = 1 in {
@@ -97,13 +122,14 @@ let Uses = [RM] in {
     def STXVD2X : XX1Form<31, 972,
                          (outs), (ins vsrc:$XT, memrr:$dst),
                          "stxvd2x $XT, $dst", IIC_LdStSTFD,
-                         [(int_ppc_vsx_stxvd2x v2f64:$XT, xoaddr:$dst)]>;
+                         [(store v2f64:$XT, xoaddr:$dst)]>;
 
     def STXVW4X : XX1Form<31, 908,
                          (outs), (ins vsrc:$XT, memrr:$dst),
                          "stxvw4x $XT, $dst", IIC_LdStSTFD,
-                         [(int_ppc_vsx_stxvw4x v4i32:$XT, xoaddr:$dst)]>;
-  }
+                         [(store v4i32:$XT, xoaddr:$dst)]>;
+
+  } // mayStore
 
   // Add/Mul Instructions
   let isCommutable = 1 in {
@@ -773,6 +799,15 @@ let usesCustomInserter = 1,    // Expanded after instruction selection.
                            "#SELECT_VSFRC",
                            [(set f64:$dst,
                                  (select i1:$cond, f64:$T, f64:$F))]>;
+  def SELECT_CC_VSSRC: Pseudo<(outs f4rc:$dst),
+                              (ins crrc:$cond, f4rc:$T, f4rc:$F,
+                               i32imm:$BROPC), "#SELECT_CC_VSSRC",
+                              []>;
+  def SELECT_VSSRC: Pseudo<(outs f4rc:$dst),
+                           (ins crbitrc:$cond, f4rc:$T, f4rc:$F),
+                           "#SELECT_VSSRC",
+                           [(set f32:$dst,
+                                 (select i1:$cond, f32:$T, f32:$F))]>;
 } // usesCustomInserter
 } // AddedComplexity
 
@@ -872,6 +907,11 @@ def : Pat<(v2f64 (bitconvert v2i64:$A)),
 def : Pat<(v2i64 (bitconvert v2f64:$A)),
           (COPY_TO_REGCLASS $A, VRRC)>;
 
+def : Pat<(v2f64 (bitconvert v1i128:$A)),
+          (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v1i128 (bitconvert v2f64:$A)),
+          (COPY_TO_REGCLASS $A, VRRC)>;
+
 // sign extension patterns
 // To extend "in place" from v2i32 to v2i64, we have input data like:
 // | undef | i32 | undef | i32 |
@@ -891,9 +931,11 @@ def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
 def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
 
 // Stores.
-def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+          (STXVD2X $rS, xoaddr:$dst)>;
 def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
-def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+          (STXVW4X $rS, xoaddr:$dst)>;
 def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
 
 // Permutes.
@@ -938,3 +980,124 @@ def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
 } // AddedComplexity
 } // HasVSX
 
+// The following VSX instructions were introduced in Power ISA 2.07
+/* FIXME: if the operands are v2i64, these patterns will not match.
+   we should define new patterns or otherwise match the same patterns
+   when the elements are larger than i32.
+*/
+def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">;
+def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
+let Predicates = [HasP8Vector] in {
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+  let isCommutable = 1 in {
+    def XXLEQV : XX3Form<60, 186,
+                         (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                         "xxleqv $XT, $XA, $XB", IIC_VecGeneral,
+                         [(set v4i32:$XT, (vnot_ppc (xor v4i32:$XA, v4i32:$XB)))]>;
+    def XXLNAND : XX3Form<60, 178,
+                          (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                          "xxlnand $XT, $XA, $XB", IIC_VecGeneral,
+                          [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
+                                                    v4i32:$XB)))]>;
+  } // isCommutable
+
+  def XXLORC : XX3Form<60, 170,
+                       (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                       "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
+                       [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
+
+  // VSX scalar loads introduced in ISA 2.07
+  let mayLoad = 1 in {
+    def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
+                         "lxsspx $XT, $src", IIC_LdStLFD,
+                         [(set f32:$XT, (load xoaddr:$src))]>;
+    def LXSIWAX : XX1Form<31, 76, (outs vsfrc:$XT), (ins memrr:$src),
+                          "lxsiwax $XT, $src", IIC_LdStLFD,
+                          [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
+    def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
+                          "lxsiwzx $XT, $src", IIC_LdStLFD,
+                          [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
+  } // mayLoad
+
+  // VSX scalar stores introduced in ISA 2.07
+  let mayStore = 1 in {
+    def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
+                          "stxsspx $XT, $dst", IIC_LdStSTFD,
+                          [(store f32:$XT, xoaddr:$dst)]>;
+    def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
+                          "stxsiwx $XT, $dst", IIC_LdStSTFD,
+                          [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
+  } // mayStore
+
+  def : Pat<(f64 (extloadf32 xoaddr:$src)),
+            (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
+  def : Pat<(f64 (fextend f32:$src)),
+            (COPY_TO_REGCLASS $src, VSFRC)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+            (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+            (SELECT_VSSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
+            (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+            (SELECT_VSSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+            (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
+          (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+  // VSX Elementary Scalar FP arithmetic (SP)
+  let isCommutable = 1 in {
+    def XSADDSP : XX3Form<60, 0,
+                          (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+                          "xsaddsp $XT, $XA, $XB", IIC_VecFP,
+                          [(set f32:$XT, (fadd f32:$XA, f32:$XB))]>;
+    def XSMULSP : XX3Form<60, 16,
+                          (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+                          "xsmulsp $XT, $XA, $XB", IIC_VecFP,
+                          [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>;
+  } // isCommutable
+
+  def XSDIVSP : XX3Form<60, 24,
+                        (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+                        "xsdivsp $XT, $XA, $XB", IIC_FPDivS,
+                        [(set f32:$XT, (fdiv f32:$XA, f32:$XB))]>;
+  def XSRESP : XX2Form<60, 26,
+                        (outs vssrc:$XT), (ins vssrc:$XB),
+                        "xsresp $XT, $XB", IIC_VecFP,
+                        [(set f32:$XT, (PPCfre f32:$XB))]>;
+  def XSSQRTSP : XX2Form<60, 11,
+                        (outs vssrc:$XT), (ins vssrc:$XB),
+                        "xssqrtsp $XT, $XB", IIC_FPSqrtS,
+                        [(set f32:$XT, (fsqrt f32:$XB))]>;
+  def XSRSQRTESP : XX2Form<60, 10,
+                           (outs vssrc:$XT), (ins vssrc:$XB),
+                           "xsrsqrtesp $XT, $XB", IIC_VecFP,
+                           [(set f32:$XT, (PPCfrsqrte f32:$XB))]>;
+  def XSSUBSP : XX3Form<60, 8,
+                        (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+                        "xssubsp $XT, $XA, $XB", IIC_VecFP,
+                        [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
+} // AddedComplexity = 400
+} // HasP8Vector
+
+let Predicates = [HasDirectMove, HasVSX] in {
+  // VSX direct move instructions
+  def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
+                              "mfvsrd $rA, $XT", IIC_VecGeneral,
+                              [(set i64:$rA, (PPCmfvsr f64:$XT))]>,
+      Requires<[In64BitMode]>;
+  def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
+                               "mfvsrwz $rA, $XT", IIC_VecGeneral,
+                               [(set i32:$rA, (PPCmfvsr f64:$XT))]>;
+  def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA),
+                              "mtvsrd $XT, $rA", IIC_VecGeneral,
+                              [(set f64:$XT, (PPCmtvsra i64:$rA))]>,
+      Requires<[In64BitMode]>;
+  def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
+                               "mtvsrwa $XT, $rA", IIC_VecGeneral,
+                               [(set f64:$XT, (PPCmtvsra i32:$rA))]>;
+  def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
+                               "mtvsrwz $XT, $rA", IIC_VecGeneral,
+                               [(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
+} // HasDirectMove, HasVSX
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
new file mode 100644
index 0000000..b4e1c09
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -0,0 +1,232 @@
+//===-------- PPCLoopDataPrefetch.cpp - Loop Data Prefetching Pass --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a Loop Data Prefetching Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-loop-data-prefetch"
+#include "PPC.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+// By default, we limit this to creating 16 PHIs (which is a little over half
+// of the allocatable register set).
+static cl::opt<bool>
+PrefetchWrites("ppc-loop-prefetch-writes", cl::Hidden, cl::init(false),
+               cl::desc("Prefetch write addresses"));
+
+// This seems like a reasonable default for the BG/Q (this pass is enabled, by
+// default, only on the BG/Q).
+static cl::opt<unsigned>
+PrefDist("ppc-loop-prefetch-distance", cl::Hidden, cl::init(300),
+         cl::desc("The loop prefetch distance"));
+
+static cl::opt<unsigned>
+CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
+              cl::desc("The loop prefetch cache line size"));
+
+namespace llvm {
+  void initializePPCLoopDataPrefetchPass(PassRegistry&);
+}
+
+namespace {
+
+  class PPCLoopDataPrefetch : public FunctionPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    PPCLoopDataPrefetch() : FunctionPass(ID) {
+      initializePPCLoopDataPrefetchPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<AssumptionCacheTracker>();
+      AU.addPreserved<DominatorTreeWrapperPass>();
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
+      AU.addRequired<ScalarEvolution>();
+      // FIXME: For some reason, preserving SE here breaks LSR (even if
+      // this pass changes nothing).
+      // AU.addPreserved<ScalarEvolution>();
+      AU.addRequired<TargetTransformInfoWrapperPass>();
+    }
+
+    bool runOnFunction(Function &F) override;
+    bool runOnLoop(Loop *L);
+
+  private:
+    AssumptionCache *AC;
+    LoopInfo *LI;
+    ScalarEvolution *SE;
+    const TargetTransformInfo *TTI;
+    const DataLayout *DL;
+  };
+}
+
+char PPCLoopDataPrefetch::ID = 0;
+INITIALIZE_PASS_BEGIN(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
+                      "PPC Loop Data Prefetch", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
+                    "PPC Loop Data Prefetch", false, false)
+
+FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPrefetch(); }
+
+bool PPCLoopDataPrefetch::runOnFunction(Function &F) {
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  SE = &getAnalysis<ScalarEvolution>();
+  DL = &F.getParent()->getDataLayout();
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+  bool MadeChange = false;
+
+  for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+    for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+      MadeChange |= runOnLoop(*L);
+
+  return MadeChange;
+}
+
+bool PPCLoopDataPrefetch::runOnLoop(Loop *L) {
+  bool MadeChange = false;
+
+  // Only prefetch in the inner-most loop
+  if (!L->empty())
+    return MadeChange;
+
+  SmallPtrSet<const Value *, 32> EphValues;
+  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
+
+  // Calculate the number of iterations ahead to prefetch
+  CodeMetrics Metrics;
+  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+       I != IE; ++I) {
+
+    // If the loop already has prefetches, then assume that the user knows
+    // what he or she is doing and don't add any more.
+    for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
+         J != JE; ++J)
+      if (CallInst *CI = dyn_cast<CallInst>(J))
+        if (Function *F = CI->getCalledFunction())
+          if (F->getIntrinsicID() == Intrinsic::prefetch)
+            return MadeChange;
+
+    Metrics.analyzeBasicBlock(*I, *TTI, EphValues);
+  }
+  unsigned LoopSize = Metrics.NumInsts;
+  if (!LoopSize)
+    LoopSize = 1;
+
+  unsigned ItersAhead = PrefDist/LoopSize;
+  if (!ItersAhead)
+    ItersAhead = 1;
+
+  SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads;
+  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+       I != IE; ++I) {
+    for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
+        J != JE; ++J) {
+      Value *PtrValue;
+      Instruction *MemI;
+
+      if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
+        MemI = LMemI;
+        PtrValue = LMemI->getPointerOperand();
+      } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
+        if (!PrefetchWrites) continue;
+        MemI = SMemI;
+        PtrValue = SMemI->getPointerOperand();
+      } else continue;
+
+      unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
+      if (PtrAddrSpace)
+        continue;
+
+      if (L->isLoopInvariant(PtrValue))
+        continue;
+
+      const SCEV *LSCEV = SE->getSCEV(PtrValue);
+      const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
+      if (!LSCEVAddRec)
+        continue;
+
+      // We don't want to double prefetch individual cache lines. If this load
+      // is known to be within one cache line of some other load that has
+      // already been prefetched, then don't prefetch this one as well.
+      bool DupPref = false;
+      for (SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>,
+             16>::iterator K = PrefLoads.begin(), KE = PrefLoads.end();
+           K != KE; ++K) {
+        const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second);
+        if (const SCEVConstant *ConstPtrDiff =
+            dyn_cast<SCEVConstant>(PtrDiff)) {
+          int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
+          if (PD < (int64_t) CacheLineSize) {
+            DupPref = true;
+            break;
+          }
+        }
+      }
+      if (DupPref)
+        continue;
+
+      const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr(
+        SE->getConstant(LSCEVAddRec->getType(), ItersAhead),
+        LSCEVAddRec->getStepRecurrence(*SE)));
+      if (!isSafeToExpand(NextLSCEV, *SE))
+        continue;
+
+      PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
+
+      Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace);
+      SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr");
+      Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
+
+      IRBuilder<> Builder(MemI);
+      Module *M = (*I)->getParent()->getParent();
+      Type *I32 = Type::getInt32Ty((*I)->getContext());
+      Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch);
+      Builder.CreateCall(
+          PrefetchFunc,
+          {PrefPtrValue,
+           ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1),
+           ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
+
+      MadeChange = true;
+    }
+  }
+
+  return MadeChange;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
new file mode 100644
index 0000000..b6e7799
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -0,0 +1,390 @@
+//===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to prepare loops for pre-increment addressing
+// modes. Additional PHIs are created for loop induction variables used by
+// load/store instructions so that the pre-increment forms can be used.
+// Generically, this means transforming loops like this:
+//   for (int i = 0; i < n; ++i)
+//     array[i] = c;
+// to look like this:
+//   T *p = array[-1];
+//   for (int i = 0; i < n; ++i)
+//     *++p = c;
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-loop-preinc-prep"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+// By default, we limit this to creating 16 PHIs (which is a little over half
+// of the allocatable register set).
+static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars",
+                                 cl::Hidden, cl::init(16),
+  cl::desc("Potential PHI threshold for PPC preinc loop prep"));
+
+namespace llvm {
+  void initializePPCLoopPreIncPrepPass(PassRegistry&);
+}
+
+namespace {
+
+  class PPCLoopPreIncPrep : public FunctionPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    PPCLoopPreIncPrep() : FunctionPass(ID), TM(nullptr) {
+      initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
+    }
+    PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
+      initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addPreserved<DominatorTreeWrapperPass>();
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
+      AU.addRequired<ScalarEvolution>();
+    }
+
+    bool runOnFunction(Function &F) override;
+
+    bool runOnLoop(Loop *L);
+    void simplifyLoopLatch(Loop *L);
+    bool rotateLoop(Loop *L);
+
+  private:
+    PPCTargetMachine *TM;
+    LoopInfo *LI;
+    ScalarEvolution *SE;
+  };
+}
+
+char PPCLoopPreIncPrep::ID = 0;
+static const char *name = "Prepare loop for pre-inc. addressing modes";
+INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
+
+FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
+  return new PPCLoopPreIncPrep(TM);
+}
+
+namespace {
+  struct SCEVLess : std::binary_function<const SCEV *, const SCEV *, bool>
+  {
+    SCEVLess(ScalarEvolution *SE) : SE(SE) {}
+
+    bool operator() (const SCEV *X, const SCEV *Y) const {
+      const SCEV *Diff = SE->getMinusSCEV(X, Y);
+      return cast<SCEVConstant>(Diff)->getValue()->getSExtValue() < 0;
+    }
+
+  protected:
+    ScalarEvolution *SE;
+  };
+}
+
+static bool IsPtrInBounds(Value *BasePtr) {
+  Value *StrippedBasePtr = BasePtr;
+  while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))
+    StrippedBasePtr = BC->getOperand(0);
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr))
+    return GEP->isInBounds();
+
+  return false;
+}
+
+static Value *GetPointerOperand(Value *MemI) {
+  if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
+    return LMemI->getPointerOperand();
+  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
+    return SMemI->getPointerOperand();
+  } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
+    if (IMemI->getIntrinsicID() == Intrinsic::prefetch)
+      return IMemI->getArgOperand(0);
+  }
+
+  return 0;
+}
+
+bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  SE = &getAnalysis<ScalarEvolution>();
+
+  bool MadeChange = false;
+
+  for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+    for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+      MadeChange |= runOnLoop(*L);
+
+  return MadeChange;
+}
+
+bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
+  bool MadeChange = false;
+
+  // Only prep. the inner-most loop
+  if (!L->empty())
+    return MadeChange;
+
+  DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
+
+  BasicBlock *Header = L->getHeader();
+
+  const PPCSubtarget *ST =
+    TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr;
+
+  unsigned HeaderLoopPredCount =
+    std::distance(pred_begin(Header), pred_end(Header));
+
+  // Collect buckets of comparable addresses used by loads and stores.
+  typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket;
+  SmallVector<Bucket, 16> Buckets;
+  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+       I != IE; ++I) {
+    for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
+        J != JE; ++J) {
+      Value *PtrValue;
+      Instruction *MemI;
+
+      if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
+        MemI = LMemI;
+        PtrValue = LMemI->getPointerOperand();
+      } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
+        MemI = SMemI;
+        PtrValue = SMemI->getPointerOperand();
+      } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(J)) {
+        if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
+          MemI = IMemI;
+          PtrValue = IMemI->getArgOperand(0);
+        } else continue;
+      } else continue;
+
+      unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
+      if (PtrAddrSpace)
+        continue;
+
+      // There are no update forms for Altivec vector load/stores.
+      if (ST && ST->hasAltivec() &&
+          PtrValue->getType()->getPointerElementType()->isVectorTy())
+        continue;
+
+      if (L->isLoopInvariant(PtrValue))
+        continue;
+
+      const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
+      if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) {
+        if (LARSCEV->getLoop() != L)
+          continue;
+      } else {
+        continue;
+      }
+
+      bool FoundBucket = false;
+      for (unsigned i = 0, e = Buckets.size(); i != e; ++i)
+        for (Bucket::iterator K = Buckets[i].begin(), KE = Buckets[i].end();
+             K != KE; ++K) {
+          const SCEV *Diff = SE->getMinusSCEV(K->first, LSCEV);
+          if (isa<SCEVConstant>(Diff)) {
+            Buckets[i].insert(std::make_pair(LSCEV, MemI));
+            FoundBucket = true;
+            break;
+          }
+        }
+
+      if (!FoundBucket) {
+        Buckets.push_back(Bucket(SCEVLess(SE)));
+        Buckets[Buckets.size()-1].insert(std::make_pair(LSCEV, MemI));
+      }
+    }
+  }
+
+  if (Buckets.empty() || Buckets.size() > MaxVars)
+    return MadeChange;
+
+  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
+  // If there is no loop predecessor, or the loop predecessor's terminator
+  // returns a value (which might contribute to determining the loop's
+  // iteration space), insert a new preheader for the loop.
+  if (!LoopPredecessor ||
+      !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
+    LoopPredecessor = InsertPreheaderForLoop(L, this);
+    if (LoopPredecessor)
+      MadeChange = true;
+  }
+  if (!LoopPredecessor)
+    return MadeChange;
+
+  DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n");
+
+  SmallSet<BasicBlock *, 16> BBChanged;
+  for (unsigned i = 0, e = Buckets.size(); i != e; ++i) {
+    // The base address of each bucket is transformed into a phi and the others
+    // are rewritten as offsets of that variable.
+
+    const SCEVAddRecExpr *BasePtrSCEV =
+      cast<SCEVAddRecExpr>(Buckets[i].begin()->first);
+    if (!BasePtrSCEV->isAffine())
+      continue;
+
+    DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
+    assert(BasePtrSCEV->getLoop() == L &&
+           "AddRec for the wrong loop?");
+
+    Instruction *MemI = Buckets[i].begin()->second;
+    Value *BasePtr = GetPointerOperand(MemI);
+    assert(BasePtr && "No pointer operand");
+
+    Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
+    Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
+      BasePtr->getType()->getPointerAddressSpace());
+
+    const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart();
+    if (!SE->isLoopInvariant(BasePtrStartSCEV, L))
+      continue;
+
+    const SCEVConstant *BasePtrIncSCEV =
+      dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
+    if (!BasePtrIncSCEV)
+      continue;
+    BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV);
+    if (!isSafeToExpand(BasePtrStartSCEV, *SE))
+      continue;
+
+    DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
+
+    PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
+      MemI->hasName() ? MemI->getName() + ".phi" : "",
+      Header->getFirstNonPHI());
+
+    SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
+    Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
+      LoopPredecessor->getTerminator());
+
+    // Note that LoopPredecessor might occur in the predecessor list multiple
+    // times, and we need to add it the right number of times.
+    for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+         PI != PE; ++PI) {
+      if (*PI != LoopPredecessor)
+        continue;
+
+      NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
+    }
+
+    Instruction *InsPoint = Header->getFirstInsertionPt();
+    GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
+        I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+        MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
+    PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
+    for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+         PI != PE; ++PI) {
+      if (*PI == LoopPredecessor)
+        continue;
+
+      NewPHI->addIncoming(PtrInc, *PI);
+    }
+
+    Instruction *NewBasePtr;
+    if (PtrInc->getType() != BasePtr->getType())
+      NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(),
+        PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint);
+    else
+      NewBasePtr = PtrInc;
+
+    if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
+      BBChanged.insert(IDel->getParent());
+    BasePtr->replaceAllUsesWith(NewBasePtr);
+    RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
+
+    Value *LastNewPtr = NewBasePtr;
+    for (Bucket::iterator I = std::next(Buckets[i].begin()),
+         IE = Buckets[i].end(); I != IE; ++I) {
+      Value *Ptr = GetPointerOperand(I->second);
+      assert(Ptr && "No pointer operand");
+      if (Ptr == LastNewPtr)
+        continue;
+
+      Instruction *RealNewPtr;
+      const SCEVConstant *Diff =
+        cast<SCEVConstant>(SE->getMinusSCEV(I->first, BasePtrSCEV));
+      if (Diff->isZero()) {
+        RealNewPtr = NewBasePtr;
+      } else {
+        Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
+        if (PtrIP && isa<Instruction>(NewBasePtr) &&
+            cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
+          PtrIP = 0;
+        else if (isa<PHINode>(PtrIP))
+          PtrIP = PtrIP->getParent()->getFirstInsertionPt();
+        else if (!PtrIP)
+          PtrIP = I->second;
+
+        GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
+            I8Ty, PtrInc, Diff->getValue(),
+            I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP);
+        if (!PtrIP)
+          NewPtr->insertAfter(cast<Instruction>(PtrInc));
+        NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
+        RealNewPtr = NewPtr;
+      }
+
+      if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
+        BBChanged.insert(IDel->getParent());
+
+      Instruction *ReplNewPtr;
+      if (Ptr->getType() != RealNewPtr->getType()) {
+        ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
+          Ptr->hasName() ? Ptr->getName() + ".cast" : "");
+        ReplNewPtr->insertAfter(RealNewPtr);
+      } else
+        ReplNewPtr = RealNewPtr;
+
+      Ptr->replaceAllUsesWith(ReplNewPtr);
+      RecursivelyDeleteTriviallyDeadInstructions(Ptr);
+
+      LastNewPtr = RealNewPtr;
+    }
+
+    MadeChange = true;
+  }
+
+  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+       I != IE; ++I) {
+    if (BBChanged.count(*I))
+      DeleteDeadPHIs(*I);
+  }
+
+  return MadeChange;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index c417199..f1e2865 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -38,7 +38,7 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
 static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
   const TargetMachine &TM = AP.TM;
   Mangler *Mang = AP.Mang;
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
   MCContext &Ctx = AP.OutContext;
   bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
 
@@ -66,7 +66,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
   unsigned OrigLen = Name.size() - PrefixLen;
 
   Name += Suffix;
-  MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+  MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
   StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
 
   // If the target flags on the operand changes the name of the symbol, do that
@@ -85,7 +85,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
     } else {
       StubSym =
       MachineModuleInfoImpl::
-      StubValueTy(Ctx.GetOrCreateSymbol(OrigName), false);
+      StubValueTy(Ctx.getOrCreateSymbol(OrigName), false);
     }
     return Sym;
   }
@@ -137,12 +137,6 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
     case PPCII::MO_TLS:
       RefKind = MCSymbolRefExpr::VK_PPC_TLS;
       break;
-    case PPCII::MO_TLSGD:
-      RefKind = MCSymbolRefExpr::VK_PPC_TLSGD;
-      break;
-    case PPCII::MO_TLSLD:
-      RefKind = MCSymbolRefExpr::VK_PPC_TLSLD;
-      break;
   }
 
   if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
@@ -173,7 +167,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
       break;
   }
 
-  return MCOperand::CreateExpr(Expr);
+  return MCOperand::createExpr(Expr);
 }
 
 void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
@@ -190,13 +184,16 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
       llvm_unreachable("unknown operand type");
     case MachineOperand::MO_Register:
       assert(!MO.getSubReg() && "Subregs should be eliminated!");
-      MCOp = MCOperand::CreateReg(MO.getReg());
+      assert(MO.getReg() > PPC::NoRegister &&
+             MO.getReg() < PPC::NUM_TARGET_REGS &&
+             "Invalid register for this target!");
+      MCOp = MCOperand::createReg(MO.getReg());
       break;
     case MachineOperand::MO_Immediate:
-      MCOp = MCOperand::CreateImm(MO.getImm());
+      MCOp = MCOperand::createImm(MO.getImm());
       break;
     case MachineOperand::MO_MachineBasicBlock:
-      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+      MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
                                       MO.getMBB()->getSymbol(), AP.OutContext));
       break;
     case MachineOperand::MO_GlobalAddress:
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 4aff95a..ec4e0a5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -18,7 +18,8 @@ using namespace llvm;
 void PPCFunctionInfo::anchor() { }
 
 MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
-  const DataLayout *DL = MF.getSubtarget().getDataLayout();
-  return MF.getContext().GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
-    Twine(MF.getFunctionNumber())+"$poff");
+  const DataLayout *DL = MF.getTarget().getDataLayout();
+  return MF.getContext().getOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) +
+                                           Twine(MF.getFunctionNumber()) +
+                                           "$poff");
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 37b2ff8..607cdf6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -62,6 +62,9 @@ class PPCFunctionInfo : public MachineFunctionInfo {
   /// entry, even though LR may otherwise apparently not be used.
   bool LRStoreRequired;
 
+  /// This function makes use of the PPC64 ELF TOC base pointer (register r2).
+  bool UsesTOCBasePtr;
+
   /// MinReservedArea - This is the frame size that is at least reserved in a
   /// potential caller (parameter+linkage area).
   unsigned MinReservedArea;
@@ -112,6 +115,7 @@ public:
       SpillsCR(false),
       SpillsVRSAVE(false),
       LRStoreRequired(false),
+      UsesTOCBasePtr(false),
       MinReservedArea(0),
       TailCallSPDelta(0),
       HasFastCall(false),
@@ -164,6 +168,9 @@ public:
   void setLRStoreRequired() { LRStoreRequired = true; }
   bool isLRStoreRequired() const { return LRStoreRequired; }
 
+  void setUsesTOCBasePtr()    { UsesTOCBasePtr = true; }
+  bool usesTOCBasePtr() const { return UsesTOCBasePtr; }
+
   void setHasFastCall() { HasFastCall = true; }
   bool hasFastCall() const { return HasFastCall;}
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index aa0da7a..656376c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -18,6 +18,7 @@
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -56,11 +57,11 @@ static cl::opt<bool>
 AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false),
          cl::desc("Force the use of a base pointer in every function"));
 
-PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST)
-  : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
-                       ST.isPPC64() ? 0 : 1,
-                       ST.isPPC64() ? 0 : 1),
-    Subtarget(ST) {
+PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
+  : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
+                       TM.isPPC64() ? 0 : 1,
+                       TM.isPPC64() ? 0 : 1),
+    TM(TM) {
   ImmToIdxMap[PPC::LD]   = PPC::LDX;    ImmToIdxMap[PPC::STD]  = PPC::STDX;
   ImmToIdxMap[PPC::LBZ]  = PPC::LBZX;   ImmToIdxMap[PPC::STB]  = PPC::STBX;
   ImmToIdxMap[PPC::LHZ]  = PPC::LHZX;   ImmToIdxMap[PPC::LHA]  = PPC::LHAX;
@@ -87,18 +88,19 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
   // Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value
   // when it checks for ZERO folding.
   if (Kind == 1) {
-    if (Subtarget.isPPC64())
+    if (TM.isPPC64())
       return &PPC::G8RC_NOX0RegClass;
     return &PPC::GPRC_NOR0RegClass;
   }
 
-  if (Subtarget.isPPC64())
+  if (TM.isPPC64())
     return &PPC::G8RCRegClass;
   return &PPC::GPRCRegClass;
 }
 
 const MCPhysReg*
 PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
   if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) {
     if (Subtarget.hasVSX())
       return CSR_64_AllRegs_VSX_SaveList;
@@ -108,23 +110,28 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   }
 
   if (Subtarget.isDarwinABI())
-    return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
-                                  CSR_Darwin64_Altivec_SaveList :
-                                  CSR_Darwin64_SaveList) :
-                                 (Subtarget.hasAltivec() ?
-                                  CSR_Darwin32_Altivec_SaveList :
-                                  CSR_Darwin32_SaveList);
-
-  return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
-                                CSR_SVR464_Altivec_SaveList :
-                                CSR_SVR464_SaveList) :
-                               (Subtarget.hasAltivec() ?
-                                CSR_SVR432_Altivec_SaveList :
-                                CSR_SVR432_SaveList);
+    return TM.isPPC64()
+               ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_SaveList
+                                         : CSR_Darwin64_SaveList)
+               : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList
+                                         : CSR_Darwin32_SaveList);
+
+  // On PPC64, we might need to save r2 (but only if it is not reserved).
+  bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
+
+  return TM.isPPC64()
+             ? (Subtarget.hasAltivec()
+                    ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
+                              : CSR_SVR464_Altivec_SaveList)
+                    : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList))
+             : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList
+                                       : CSR_SVR432_SaveList);
 }
 
-const uint32_t*
-PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+                                      CallingConv::ID CC) const {
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   if (CC == CallingConv::AnyReg) {
     if (Subtarget.hasVSX())
       return CSR_64_AllRegs_VSX_RegMask;
@@ -134,19 +141,15 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
   }
 
   if (Subtarget.isDarwinABI())
-    return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
-                                  CSR_Darwin64_Altivec_RegMask :
-                                  CSR_Darwin64_RegMask) :
-                                 (Subtarget.hasAltivec() ?
-                                  CSR_Darwin32_Altivec_RegMask :
-                                  CSR_Darwin32_RegMask);
-
-  return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
-                                CSR_SVR464_Altivec_RegMask :
-                                CSR_SVR464_RegMask) :
-                               (Subtarget.hasAltivec() ?
-                                CSR_SVR432_Altivec_RegMask :
-                                CSR_SVR432_RegMask);
+    return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_RegMask
+                                                  : CSR_Darwin64_RegMask)
+                        : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
+                                                  : CSR_Darwin32_RegMask);
+
+  return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
+                                                : CSR_SVR464_RegMask)
+                      : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
+                                                : CSR_SVR432_RegMask);
 }
 
 const uint32_t*
@@ -155,17 +158,15 @@ PPCRegisterInfo::getNoPreservedMask() const {
 }
 
 void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
-  unsigned PseudoRegs[] = { PPC::ZERO, PPC::ZERO8, PPC::RM };
-  for (unsigned i = 0, ie = array_lengthof(PseudoRegs); i != ie; ++i) {
-    unsigned Reg = PseudoRegs[i];
-    Mask[Reg / 32] &= ~(1u << (Reg % 32));
-  }
+  for (unsigned PseudoReg : {PPC::ZERO, PPC::ZERO8, PPC::RM})
+    Mask[PseudoReg / 32] &= ~(1u << (PseudoReg % 32));
 }
 
 BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
-  const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
-      MF.getSubtarget().getFrameLowering());
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const PPCFrameLowering *PPCFI =
+      static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
 
   // The ZERO register is not really a register, but the representation of r0
   // when used in instructions that treat r0 as the constant 0.
@@ -202,7 +203,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   }
   
   // On PPC64, r13 is the thread pointer. Never allocate this register.
-  if (Subtarget.isPPC64()) {
+  if (TM.isPPC64()) {
     Reserved.set(PPC::R13);
 
     Reserved.set(PPC::X1);
@@ -216,7 +217,16 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
     // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
     if (Subtarget.isSVR4ABI()) {
-      Reserved.set(PPC::X2);
+      // We only reserve r2 if we need to use the TOC pointer. If we have no
+      // explicit uses of the TOC pointer (meaning we're a leaf function with
+      // no constant-pool loads, etc.) and we have no potential uses inside an
+      // inline asm block, then we can treat r2 has an ordinary callee-saved
+      // register.
+      const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+      if (FuncInfo->usesTOCBasePtr() || MF.hasInlineAsm())
+        Reserved.set(PPC::X2);
+      else
+        Reserved.reset(PPC::R2);
     }
   }
 
@@ -224,15 +234,15 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(PPC::R31);
 
   if (hasBasePointer(MF)) {
-  	if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
-        MF.getTarget().getRelocationModel() == Reloc::PIC_)
+    if (Subtarget.isSVR4ABI() && !TM.isPPC64() &&
+        TM.getRelocationModel() == Reloc::PIC_)
       Reserved.set(PPC::R29);
     else
       Reserved.set(PPC::R30);
   }
 
-  if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
-      MF.getTarget().getRelocationModel() == Reloc::PIC_)
+  if (Subtarget.isSVR4ABI() && !TM.isPPC64() &&
+      TM.getRelocationModel() == Reloc::PIC_)
     Reserved.set(PPC::R30);
 
   // Reserve Altivec registers when Altivec is unavailable.
@@ -244,10 +254,10 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-unsigned
-PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
-                                         MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+                                              MachineFunction &MF) const {
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
   const unsigned DefaultSafety = 1;
 
   switch (RC->getID()) {
@@ -262,6 +272,9 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   }
   case PPC::F8RCRegClassID:
   case PPC::F4RCRegClassID:
+  case PPC::QFRCRegClassID:
+  case PPC::QSRCRegClassID:
+  case PPC::QBRCRegClassID:
   case PPC::VRRCRegClassID:
   case PPC::VFRCRegClassID:
   case PPC::VSLRCRegClassID:
@@ -269,14 +282,17 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
     return 32 - DefaultSafety;
   case PPC::VSRCRegClassID:
   case PPC::VSFRCRegClassID:
+  case PPC::VSSRCRegClassID:
     return 64 - DefaultSafety;
   case PPC::CRRCRegClassID:
     return 8 - DefaultSafety;
   }
 }
 
-const TargetRegisterClass*
-PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)const {
+const TargetRegisterClass *
+PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+                                           const MachineFunction &MF) const {
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   if (Subtarget.hasVSX()) {
     // With VSX, we can inflate various sub-register classes to the full VSX
     // register set.
@@ -285,9 +301,11 @@ PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)const {
       return &PPC::VSFRCRegClass;
     else if (RC == &PPC::VRRCRegClass)
       return &PPC::VSRCRegClass;
+    else if (RC == &PPC::F4RCRegClass && Subtarget.hasP8Vector())
+      return &PPC::VSSRCRegClass;
   }
 
-  return TargetRegisterInfo::getLargestLegalSuperClass(RC);
+  return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF);
 }
 
 //===----------------------------------------------------------------------===//
@@ -310,10 +328,11 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
   MachineFunction &MF = *MBB.getParent();
   // Get the frame info.
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   // Get the instruction info.
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   // Determine whether 64-bit pointers are used.
-  bool LP64 = Subtarget.isPPC64();
+  bool LP64 = TM.isPPC64();
   DebugLoc dl = MI.getDebugLoc();
 
   // Get the maximum call stack size.
@@ -322,10 +341,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
   unsigned FrameSize = MFI->getStackSize();
   
   // Get stack alignments.
-  unsigned TargetAlign = MF.getTarget()
-                             .getSubtargetImpl()
-                             ->getFrameLowering()
-                             ->getStackAlignment();
+  unsigned TargetAlign = Subtarget.getFrameLowering()->getStackAlignment();
   unsigned MaxAlign = MFI->getMaxAlignment();
   assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
          "Maximum call-frame size not sufficiently aligned");
@@ -430,10 +446,11 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
-  bool LP64 = Subtarget.isPPC64();
+  bool LP64 = TM.isPPC64();
   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
 
@@ -474,10 +491,11 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
-  bool LP64 = Subtarget.isPPC64();
+  bool LP64 = TM.isPPC64();
   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
 
@@ -509,37 +527,6 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
   MBB.erase(II);
 }
 
-static unsigned getCRFromCRBit(unsigned SrcReg) {
-  unsigned Reg = 0;
-  if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
-      SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
-    Reg = PPC::CR0;
-  else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
-           SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
-    Reg = PPC::CR1;
-  else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
-           SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
-    Reg = PPC::CR2;
-  else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
-           SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
-    Reg = PPC::CR3;
-  else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
-           SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
-    Reg = PPC::CR4;
-  else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
-           SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
-    Reg = PPC::CR5;
-  else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
-           SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
-    Reg = PPC::CR6;
-  else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
-           SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
-    Reg = PPC::CR7;
-
-  assert(Reg != 0 && "Invalid CR bit register");
-  return Reg;
-}
-
 void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
                                          unsigned FrameIndex) const {
   // Get the instruction.
@@ -547,10 +534,11 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
-  bool LP64 = Subtarget.isPPC64();
+  bool LP64 = TM.isPPC64();
   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
 
@@ -590,10 +578,11 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
-  bool LP64 = Subtarget.isPPC64();
+  bool LP64 = TM.isPPC64();
   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
 
@@ -637,7 +626,8 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -662,7 +652,8 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -684,14 +675,14 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
 bool
 PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
 				      unsigned Reg, int &FrameIdx) const {
-
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
   // ABI, return true to prevent allocating an additional frame slot.
   // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
   // is arbitrary and will be subsequently ignored.  For 32-bit, we have
   // previously created the stack slot if needed, so return its FrameIdx.
   if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
-    if (Subtarget.isPPC64())
+    if (TM.isPPC64())
       FrameIdx = 0;
     else {
       const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -744,8 +735,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineBasicBlock &MBB = *MI.getParent();
   // Get the basic block's function.
   MachineFunction &MF = *MBB.getParent();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   // Get the instruction info.
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   // Get the frame info.
   MachineFrameInfo *MFI = MF.getFrameInfo();
   DebugLoc dl = MI.getDebugLoc();
@@ -811,8 +803,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // to Offset to get the correct offset.
   // Naked functions have stack size 0, although getStackSize may not reflect that
   // because we didn't call all the pieces that compute it for naked functions.
-  if (!MF.getFunction()->getAttributes().
-        hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked)) {
+  if (!MF.getFunction()->hasFnAttribute(Attribute::Naked)) {
     if (!(hasBasePointer(MF) && FrameIndex < 0))
       Offset += MFI->getStackSize();
   }
@@ -835,7 +826,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // The offset doesn't fit into a single register, scavenge one to build the
   // offset in.
 
-  bool is64Bit = Subtarget.isPPC64();
+  bool is64Bit = TM.isPPC64();
   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
   const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
@@ -873,23 +864,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 }
 
 unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
 
-  if (!Subtarget.isPPC64())
+  if (!TM.isPPC64())
     return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
   else
     return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
 }
 
 unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   if (!hasBasePointer(MF))
     return getFrameRegister(MF);
 
-  if (Subtarget.isPPC64())
+  if (TM.isPPC64())
     return PPC::X30;
 
   if (Subtarget.isSVR4ABI() &&
-      MF.getTarget().getRelocationModel() == Reloc::PIC_)
+      TM.getRelocationModel() == Reloc::PIC_)
     return PPC::R29;
 
   return PPC::R30;
@@ -915,16 +908,12 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
 }
 
 bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *F = MF.getFunction();
-  unsigned StackAlign = MF.getTarget()
-                            .getSubtargetImpl()
-                            ->getFrameLowering()
-                            ->getStackAlignment();
-  bool requiresRealignment =
-    ((MFI->getMaxAlignment() > StackAlign) ||
-     F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                     Attribute::StackAlignment));
+  unsigned StackAlign = Subtarget.getFrameLowering()->getStackAlignment();
+  bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+                              F->hasFnAttribute(Attribute::StackAlignment));
 
   return requiresRealignment && canRealignStack(MF);
 }
@@ -957,9 +946,9 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
 
   MachineBasicBlock &MBB = *MI->getParent();
   MachineFunction &MF = *MBB.getParent();
-
-  const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
-      MF.getSubtarget().getFrameLowering());
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const PPCFrameLowering *PPCFI =
+      static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
   unsigned StackEst =
     PPCFI->determineFrameLayout(MF, false, true);
 
@@ -976,7 +965,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
 
   // The frame pointer will point to the end of the stack, so estimate the
   // offset as the difference between the object offset and the FP location.
-  return !isFrameOffsetLegal(MI, Offset);
+  return !isFrameOffsetLegal(MI, getBaseRegister(MF), Offset);
 }
 
 /// Insert defining instruction(s) for BaseReg to
@@ -985,7 +974,7 @@ void PPCRegisterInfo::
 materializeFrameBaseRegister(MachineBasicBlock *MBB,
                              unsigned BaseReg, int FrameIdx,
                              int64_t Offset) const {
-  unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
+  unsigned ADDriOpc = TM.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
 
   MachineBasicBlock::iterator Ins = MBB->begin();
   DebugLoc DL;                  // Defaults to "unknown"
@@ -993,7 +982,8 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
     DL = Ins->getDebugLoc();
 
   const MachineFunction &MF = *MBB->getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   const MCInstrDesc &MCID = TII.get(ADDriOpc);
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
   MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
@@ -1018,7 +1008,8 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
 
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   const MCInstrDesc &MCID = MI.getDesc();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   MRI.constrainRegClass(BaseReg,
@@ -1026,6 +1017,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
 }
 
 bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+                                         unsigned BaseReg,
                                          int64_t Offset) const {
   unsigned FIOperandNum = 0;
   while (!MI->getOperand(FIOperandNum).isFI()) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 4c2ef90..d304e1d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -22,15 +22,44 @@
 #include "PPCGenRegisterInfo.inc"
 
 namespace llvm {
-class PPCSubtarget;
-class TargetInstrInfo;
-class Type;
+
+inline static unsigned getCRFromCRBit(unsigned SrcReg) {
+  unsigned Reg = 0;
+  if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+      SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+    Reg = PPC::CR0;
+  else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+           SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+    Reg = PPC::CR1;
+  else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+           SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+    Reg = PPC::CR2;
+  else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+           SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+    Reg = PPC::CR3;
+  else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+           SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+    Reg = PPC::CR4;
+  else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+           SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+    Reg = PPC::CR5;
+  else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+           SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+    Reg = PPC::CR6;
+  else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+           SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+    Reg = PPC::CR7;
+
+  assert(Reg != 0 && "Invalid CR bit register");
+  return Reg;
+}
+
 
 class PPCRegisterInfo : public PPCGenRegisterInfo {
   DenseMap<unsigned, unsigned> ImmToIdxMap;
-  const PPCSubtarget &Subtarget;
+  const PPCTargetMachine &TM;
 public:
-  PPCRegisterInfo(const PPCSubtarget &SubTarget);
+  PPCRegisterInfo(const PPCTargetMachine &TM);
   
   /// getPointerRegClass - Return the register class to use to hold pointers.
   /// This is used for addressing modes.
@@ -40,13 +69,14 @@ public:
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const override;
 
-  const TargetRegisterClass*
-  getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+  const TargetRegisterClass *
+  getLargestLegalSuperClass(const TargetRegisterClass *RC,
+                            const MachineFunction &MF) const override;
 
   /// Code Generation virtual methods...
-  const MCPhysReg *
-  getCalleeSavedRegs(const MachineFunction* MF =nullptr) const override;
-  const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+  const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+                                       CallingConv::ID CC) const override;
   const uint32_t *getNoPreservedMask() const;
 
   void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
@@ -97,7 +127,7 @@ public:
                                     int64_t Offset) const override;
   void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
                          int64_t Offset) const override;
-  bool isFrameOffsetLegal(const MachineInstr *MI,
+  bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
                           int64_t Offset) const override;
 
   // Debug information queries.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 2fdf04e..e5f363c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -49,6 +49,13 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
   let HWEncoding{4-0} = num;
 }
 
+// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
+class QFPR<FPR SubReg, string n> : PPCReg<n> {
+  let HWEncoding = SubReg.HWEncoding;
+  let SubRegs = [SubReg];
+  let SubRegIndices = [sub_64];
+}
+
 // VF - One of the 32 64-bit floating-point subregisters of the vector
 // registers (used by VSX).
 class VF<bits<5> num, string n> : PPCReg<n> {
@@ -114,6 +121,12 @@ foreach Index = 0-31 in {
   def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
 }
 
+// QPX Floating-point registers
+foreach Index = 0-31 in {
+  def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>,
+                 DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
+
 // Vector registers
 foreach Index = 0-31 in {
   def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
@@ -219,17 +232,50 @@ def RM: PPCReg<"**ROUNDING MODE**">;
 // then nonvolatiles in reverse order since stmw/lmw save from rN to r31
 def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
                                                 (sequence "R%u", 30, 13),
-                                                R31, R0, R1, FP, BP)>;
+                                                R31, R0, R1, FP, BP)> {
+  // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
+  // put it at the end of the list.
+  let AltOrders = [(add (sub GPRC, R2), R2)];
+  let AltOrderSelect = [{
+    const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
+    return S.isPPC64() && S.isSVR4ABI();
+  }];
+}
 
 def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
                                                 (sequence "X%u", 30, 14),
-                                                X31, X13, X0, X1, FP8, BP8)>;
+                                                X31, X13, X0, X1, FP8, BP8)> {
+  // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
+  // put it at the end of the list.
+  let AltOrders = [(add (sub G8RC, X2), X2)];
+  let AltOrderSelect = [{
+    const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
+    return S.isPPC64() && S.isSVR4ABI();
+  }];
+}
 
 // For some instructions r0 is special (representing the value 0 instead of
 // the value in the r0 register), and we use these register subclasses to
 // prevent r0 from being allocated for use by those instructions.
-def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>;
-def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>;
+def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)> {
+  // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
+  // put it at the end of the list.
+  let AltOrders = [(add (sub GPRC_NOR0, R2), R2)];
+  let AltOrderSelect = [{
+    const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
+    return S.isPPC64() && S.isSVR4ABI();
+  }];
+}
+
+def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)> {
+  // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
+  // put it at the end of the list.
+  let AltOrders = [(add (sub G8RC_NOX0, X2), X2)];
+  let AltOrderSelect = [{
+    const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
+    return S.isPPC64() && S.isSVR4ABI();
+  }];
+}
 
 // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
 // ABI the size of the Floating-point register save area is determined by the
@@ -242,7 +288,7 @@ def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
                                                 (sequence "F%u", 31, 14))>;
 def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
 
-def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32], 128,
                          (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
                              V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
                              V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
@@ -270,6 +316,19 @@ def VFRC :  RegisterClass<"PPC", [f64], 64,
                                VF22, VF21, VF20)>;
 def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;
 
+// Register class for single precision scalars in VSX registers
+def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>;
+
+// For QPX
+def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
+                                                (sequence "QF%u", 31, 14))>;
+def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>;
+def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> {
+  // These are actually stored as floating-point values where a positive
+  // number is true and anything else (including NaN) is false.
+  let Size = 256;
+}
+
 def CRBITRC : RegisterClass<"PPC", [i1], 32,
   (add CR2LT, CR2GT, CR2EQ, CR2UN,
        CR3LT, CR3GT, CR3EQ, CR3UN,
@@ -285,6 +344,8 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32,
 def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
                                                 CR7, CR2, CR3, CR4)>;
 
+def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>;
+
 // The CTR registers are not allocatable because they're used by the
 // decrement-and-branch instructions, and thus need to stay live across
 // multiple basic blocks.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
index 3ae3793..d0954a1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -13,6 +13,7 @@
 def IIC_IntSimple    : InstrItinClass;
 def IIC_IntGeneral   : InstrItinClass;
 def IIC_IntCompare   : InstrItinClass;
+def IIC_IntISEL      : InstrItinClass;
 def IIC_IntDivD      : InstrItinClass;
 def IIC_IntDivW      : InstrItinClass;
 def IIC_IntMFFS      : InstrItinClass;
@@ -123,400 +124,3 @@ include "PPCScheduleP8.td"
 include "PPCScheduleA2.td"
 include "PPCScheduleE500mc.td"
 include "PPCScheduleE5500.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction to itinerary class map - When add new opcodes to the supported
-// set, refer to the following table to determine which itinerary class the
-// opcode belongs.
-//
-//    opcode     itinerary class
-//    ======     ===============
-//    add        IIC_IntSimple
-//    addc       IIC_IntGeneral
-//    adde       IIC_IntGeneral
-//    addi       IIC_IntSimple
-//    addic      IIC_IntGeneral
-//    addic.     IIC_IntGeneral
-//    addis      IIC_IntSimple
-//    addme      IIC_IntGeneral
-//    addze      IIC_IntGeneral
-//    and        IIC_IntSimple
-//    andc       IIC_IntSimple
-//    andi.      IIC_IntGeneral
-//    andis.     IIC_IntGeneral
-//    b          IIC_BrB
-//    bc         IIC_BrB
-//    bcctr      IIC_BrB
-//    bclr       IIC_BrB
-//    cmp        IIC_IntCompare
-//    cmpi       IIC_IntCompare
-//    cmpl       IIC_IntCompare
-//    cmpli      IIC_IntCompare
-//    cntlzd     IIC_IntRotateD
-//    cntlzw     IIC_IntGeneral
-//    crand      IIC_BrCR
-//    crandc     IIC_BrCR
-//    creqv      IIC_BrCR
-//    crnand     IIC_BrCR
-//    crnor      IIC_BrCR
-//    cror       IIC_BrCR
-//    crorc      IIC_BrCR
-//    crxor      IIC_BrCR
-//    dcba       IIC_LdStDCBA
-//    dcbf       IIC_LdStDCBF
-//    dcbi       IIC_LdStDCBI
-//    dcbst      IIC_LdStDCBF
-//    dcbt       IIC_LdStLoad
-//    dcbtst     IIC_LdStLoad
-//    dcbz       IIC_LdStDCBF
-//    divd       IIC_IntDivD
-//    divdu      IIC_IntDivD
-//    divw       IIC_IntDivW
-//    divwu      IIC_IntDivW
-//    dss        IIC_LdStDSS
-//    dst        IIC_LdStDSS
-//    dstst      IIC_LdStDSS
-//    eciwx      IIC_LdStLoad
-//    ecowx      IIC_LdStLoad
-//    eieio      IIC_LdStLoad
-//    eqv        IIC_IntSimple
-//    extsb      IIC_IntSimple
-//    extsh      IIC_IntSimple
-//    extsw      IIC_IntSimple
-//    fabs       IIC_FPGeneral
-//    fadd       IIC_FPAddSub
-//    fadds      IIC_FPGeneral
-//    fcfid      IIC_FPGeneral
-//    fcmpo      IIC_FPCompare
-//    fcmpu      IIC_FPCompare
-//    fctid      IIC_FPGeneral
-//    fctidz     IIC_FPGeneral
-//    fctiw      IIC_FPGeneral
-//    fctiwz     IIC_FPGeneral
-//    fdiv       IIC_FPDivD
-//    fdivs      IIC_FPDivS
-//    fmadd      IIC_FPFused
-//    fmadds     IIC_FPGeneral
-//    fmr        IIC_FPGeneral
-//    fmsub      IIC_FPFused
-//    fmsubs     IIC_FPGeneral
-//    fmul       IIC_FPFused
-//    fmuls      IIC_FPGeneral
-//    fnabs      IIC_FPGeneral
-//    fneg       IIC_FPGeneral
-//    fnmadd     IIC_FPFused
-//    fnmadds    IIC_FPGeneral
-//    fnmsub     IIC_FPFused
-//    fnmsubs    IIC_FPGeneral
-//    fres       IIC_FPRes
-//    frsp       IIC_FPGeneral
-//    frsqrte    IIC_FPGeneral
-//    fsel       IIC_FPGeneral
-//    fsqrt      IIC_FPSqrtD
-//    fsqrts     IIC_FPSqrtS
-//    fsub       IIC_FPAddSub
-//    fsubs      IIC_FPGeneral
-//    icbi       IIC_LdStICBI
-//    isync      IIC_SprISYNC
-//    lbz        IIC_LdStLoad
-//    lbzu       IIC_LdStLoadUpd
-//    lbzux      IIC_LdStLoadUpdX
-//    lbzx       IIC_LdStLoad
-//    ld         IIC_LdStLD
-//    ldarx      IIC_LdStLDARX
-//    ldu        IIC_LdStLDU
-//    ldux       IIC_LdStLDUX
-//    ldx        IIC_LdStLD
-//    lfd        IIC_LdStLFD
-//    lfdu       IIC_LdStLFDU
-//    lfdux      IIC_LdStLFDUX
-//    lfdx       IIC_LdStLFD
-//    lfs        IIC_LdStLFD
-//    lfsu       IIC_LdStLFDU
-//    lfsux      IIC_LdStLFDUX
-//    lfsx       IIC_LdStLFD
-//    lha        IIC_LdStLHA
-//    lhau       IIC_LdStLHAU
-//    lhaux      IIC_LdStLHAUX
-//    lhax       IIC_LdStLHA
-//    lhbrx      IIC_LdStLoad
-//    lhz        IIC_LdStLoad
-//    lhzu       IIC_LdStLoadUpd
-//    lhzux      IIC_LdStLoadUpdX
-//    lhzx       IIC_LdStLoad
-//    lmw        IIC_LdStLMW
-//    lswi       IIC_LdStLMW
-//    lswx       IIC_LdStLMW
-//    lvebx      IIC_LdStLVecX
-//    lvehx      IIC_LdStLVecX
-//    lvewx      IIC_LdStLVecX
-//    lvsl       IIC_LdStLVecX
-//    lvsr       IIC_LdStLVecX
-//    lvx        IIC_LdStLVecX
-//    lvxl       IIC_LdStLVecX
-//    lwa        IIC_LdStLWA
-//    lwarx      IIC_LdStLWARX
-//    lwaux      IIC_LdStLHAUX
-//    lwax       IIC_LdStLHA
-//    lwbrx      IIC_LdStLoad
-//    lwz        IIC_LdStLoad
-//    lwzu       IIC_LdStLoadUpd
-//    lwzux      IIC_LdStLoadUpdX
-//    lwzx       IIC_LdStLoad
-//    mcrf       IIC_BrMCR
-//    mcrfs      IIC_FPGeneral
-//    mcrxr      IIC_BrMCRX
-//    mfcr       IIC_SprMFCR
-//    mffs       IIC_IntMFFS
-//    mfmsr      IIC_SprMFMSR
-//    mfspr      IIC_SprMFSPR
-//    mfsr       IIC_SprMFSR
-//    mfsrin     IIC_SprMFSR
-//    mftb       IIC_SprMFTB
-//    mfvscr     IIC_IntMFVSCR
-//    mtcrf      IIC_BrMCRX
-//    mtfsb0     IIC_IntMTFSB0
-//    mtfsb1     IIC_IntMTFSB0
-//    mtfsf      IIC_IntMTFSB0
-//    mtfsfi     IIC_IntMTFSB0
-//    mtmsr      IIC_SprMTMSR
-//    mtmsrd     IIC_LdStLD
-//    mtspr      IIC_SprMTSPR
-//    mtsr       IIC_SprMTSR
-//    mtsrd      IIC_IntMTSRD
-//    mtsrdin    IIC_IntMTSRD
-//    mtsrin     IIC_SprMTSRIN
-//    mtvscr     IIC_IntMFVSCR
-//    mulhd      IIC_IntMulHD
-//    mulhdu     IIC_IntMulHD
-//    mulhw      IIC_IntMulHW
-//    mulhwu     IIC_IntMulHWU
-//    mulld      IIC_IntMulHD
-//    mulli      IIC_IntMulLI
-//    mullw      IIC_IntMulHW
-//    nand       IIC_IntSimple
-//    neg        IIC_IntSimple
-//    nor        IIC_IntSimple
-//    or         IIC_IntSimple
-//    orc        IIC_IntSimple
-//    ori        IIC_IntSimple
-//    oris       IIC_IntSimple
-//    rfi        IIC_SprRFI
-//    rfid       IIC_IntRFID
-//    rldcl      IIC_IntRotateD
-//    rldcr      IIC_IntRotateD
-//    rldic      IIC_IntRotateDI
-//    rldicl     IIC_IntRotateDI
-//    rldicr     IIC_IntRotateDI
-//    rldimi     IIC_IntRotateDI
-//    rlwimi     IIC_IntRotate
-//    rlwinm     IIC_IntGeneral
-//    rlwnm      IIC_IntGeneral
-//    sc         IIC_SprSC
-//    slbia      IIC_LdStSLBIA
-//    slbie      IIC_LdStSLBIE
-//    sld        IIC_IntRotateD
-//    slw        IIC_IntGeneral
-//    srad       IIC_IntRotateD
-//    sradi      IIC_IntRotateDI
-//    sraw       IIC_IntShift
-//    srawi      IIC_IntShift
-//    srd        IIC_IntRotateD
-//    srw        IIC_IntGeneral
-//    stb        IIC_LdStStore
-//    stbu       IIC_LdStStoreUpd
-//    stbux      IIC_LdStStoreUpd
-//    stbx       IIC_LdStStore
-//    std        IIC_LdStSTD
-//    stdcx.     IIC_LdStSTDCX
-//    stdu       IIC_LdStSTDU
-//    stdux      IIC_LdStSTDUX
-//    stdx       IIC_LdStSTD
-//    stfd       IIC_LdStSTFD
-//    stfdu      IIC_LdStSTFDU
-//    stfdux     IIC_LdStSTFDU
-//    stfdx      IIC_LdStSTFD
-//    stfiwx     IIC_LdStSTFD
-//    stfs       IIC_LdStSTFD
-//    stfsu      IIC_LdStSTFDU
-//    stfsux     IIC_LdStSTFDU
-//    stfsx      IIC_LdStSTFD
-//    sth        IIC_LdStStore
-//    sthbrx     IIC_LdStStore
-//    sthu       IIC_LdStStoreUpd
-//    sthux      IIC_LdStStoreUpd
-//    sthx       IIC_LdStStore
-//    stmw       IIC_LdStLMW
-//    stswi      IIC_LdStLMW
-//    stswx      IIC_LdStLMW
-//    stvebx     IIC_LdStSTVEBX
-//    stvehx     IIC_LdStSTVEBX
-//    stvewx     IIC_LdStSTVEBX
-//    stvx       IIC_LdStSTVEBX
-//    stvxl      IIC_LdStSTVEBX
-//    stw        IIC_LdStStore
-//    stwbrx     IIC_LdStStore
-//    stwcx.     IIC_LdStSTWCX
-//    stwu       IIC_LdStStoreUpd
-//    stwux      IIC_LdStStoreUpd
-//    stwx       IIC_LdStStore
-//    subf       IIC_IntGeneral
-//    subfc      IIC_IntGeneral
-//    subfe      IIC_IntGeneral
-//    subfic     IIC_IntGeneral
-//    subfme     IIC_IntGeneral
-//    subfze     IIC_IntGeneral
-//    sync       IIC_LdStSync
-//    td         IIC_IntTrapD
-//    tdi        IIC_IntTrapD
-//    tlbia      IIC_LdStSLBIA
-//    tlbie      IIC_LdStDCBF
-//    tlbsync    IIC_SprTLBSYNC
-//    tw         IIC_IntTrapW
-//    twi        IIC_IntTrapW
-//    vaddcuw    IIC_VecGeneral
-//    vaddfp     IIC_VecFP
-//    vaddsbs    IIC_VecGeneral
-//    vaddshs    IIC_VecGeneral
-//    vaddsws    IIC_VecGeneral
-//    vaddubm    IIC_VecGeneral
-//    vaddubs    IIC_VecGeneral
-//    vadduhm    IIC_VecGeneral
-//    vadduhs    IIC_VecGeneral
-//    vadduwm    IIC_VecGeneral
-//    vadduws    IIC_VecGeneral
-//    vand       IIC_VecGeneral
-//    vandc      IIC_VecGeneral
-//    vavgsb     IIC_VecGeneral
-//    vavgsh     IIC_VecGeneral
-//    vavgsw     IIC_VecGeneral
-//    vavgub     IIC_VecGeneral
-//    vavguh     IIC_VecGeneral
-//    vavguw     IIC_VecGeneral
-//    vcfsx      IIC_VecFP
-//    vcfux      IIC_VecFP
-//    vcmpbfp    IIC_VecFPCompare
-//    vcmpeqfp   IIC_VecFPCompare
-//    vcmpequb   IIC_VecGeneral
-//    vcmpequh   IIC_VecGeneral
-//    vcmpequw   IIC_VecGeneral
-//    vcmpgefp   IIC_VecFPCompare
-//    vcmpgtfp   IIC_VecFPCompare
-//    vcmpgtsb   IIC_VecGeneral
-//    vcmpgtsh   IIC_VecGeneral
-//    vcmpgtsw   IIC_VecGeneral
-//    vcmpgtub   IIC_VecGeneral
-//    vcmpgtuh   IIC_VecGeneral
-//    vcmpgtuw   IIC_VecGeneral
-//    vctsxs     IIC_VecFP
-//    vctuxs     IIC_VecFP
-//    vexptefp   IIC_VecFP
-//    vlogefp    IIC_VecFP
-//    vmaddfp    IIC_VecFP
-//    vmaxfp     IIC_VecFPCompare
-//    vmaxsb     IIC_VecGeneral
-//    vmaxsh     IIC_VecGeneral
-//    vmaxsw     IIC_VecGeneral
-//    vmaxub     IIC_VecGeneral
-//    vmaxuh     IIC_VecGeneral
-//    vmaxuw     IIC_VecGeneral
-//    vmhaddshs  IIC_VecComplex
-//    vmhraddshs IIC_VecComplex
-//    vminfp     IIC_VecFPCompare
-//    vminsb     IIC_VecGeneral
-//    vminsh     IIC_VecGeneral
-//    vminsw     IIC_VecGeneral
-//    vminub     IIC_VecGeneral
-//    vminuh     IIC_VecGeneral
-//    vminuw     IIC_VecGeneral
-//    vmladduhm  IIC_VecComplex
-//    vmrghb     IIC_VecPerm
-//    vmrghh     IIC_VecPerm
-//    vmrghw     IIC_VecPerm
-//    vmrglb     IIC_VecPerm
-//    vmrglh     IIC_VecPerm
-//    vmrglw     IIC_VecPerm
-//    vmsubfp    IIC_VecFP
-//    vmsummbm   IIC_VecComplex
-//    vmsumshm   IIC_VecComplex
-//    vmsumshs   IIC_VecComplex
-//    vmsumubm   IIC_VecComplex
-//    vmsumuhm   IIC_VecComplex
-//    vmsumuhs   IIC_VecComplex
-//    vmulesb    IIC_VecComplex
-//    vmulesh    IIC_VecComplex
-//    vmuleub    IIC_VecComplex
-//    vmuleuh    IIC_VecComplex
-//    vmulosb    IIC_VecComplex
-//    vmulosh    IIC_VecComplex
-//    vmuloub    IIC_VecComplex
-//    vmulouh    IIC_VecComplex
-//    vnor       IIC_VecGeneral
-//    vor        IIC_VecGeneral
-//    vperm      IIC_VecPerm
-//    vpkpx      IIC_VecPerm
-//    vpkshss    IIC_VecPerm
-//    vpkshus    IIC_VecPerm
-//    vpkswss    IIC_VecPerm
-//    vpkswus    IIC_VecPerm
-//    vpkuhum    IIC_VecPerm
-//    vpkuhus    IIC_VecPerm
-//    vpkuwum    IIC_VecPerm
-//    vpkuwus    IIC_VecPerm
-//    vrefp      IIC_VecFPRound
-//    vrfim      IIC_VecFPRound
-//    vrfin      IIC_VecFPRound
-//    vrfip      IIC_VecFPRound
-//    vrfiz      IIC_VecFPRound
-//    vrlb       IIC_VecGeneral
-//    vrlh       IIC_VecGeneral
-//    vrlw       IIC_VecGeneral
-//    vrsqrtefp  IIC_VecFP
-//    vsel       IIC_VecGeneral
-//    vsl        IIC_VecVSL
-//    vslb       IIC_VecGeneral
-//    vsldoi     IIC_VecPerm
-//    vslh       IIC_VecGeneral
-//    vslo       IIC_VecPerm
-//    vslw       IIC_VecGeneral
-//    vspltb     IIC_VecPerm
-//    vsplth     IIC_VecPerm
-//    vspltisb   IIC_VecPerm
-//    vspltish   IIC_VecPerm
-//    vspltisw   IIC_VecPerm
-//    vspltw     IIC_VecPerm
-//    vsr        IIC_VecVSR
-//    vsrab      IIC_VecGeneral
-//    vsrah      IIC_VecGeneral
-//    vsraw      IIC_VecGeneral
-//    vsrb       IIC_VecGeneral
-//    vsrh       IIC_VecGeneral
-//    vsro       IIC_VecPerm
-//    vsrw       IIC_VecGeneral
-//    vsubcuw    IIC_VecGeneral
-//    vsubfp     IIC_VecFP
-//    vsubsbs    IIC_VecGeneral
-//    vsubshs    IIC_VecGeneral
-//    vsubsws    IIC_VecGeneral
-//    vsububm    IIC_VecGeneral
-//    vsububs    IIC_VecGeneral
-//    vsubuhm    IIC_VecGeneral
-//    vsubuhs    IIC_VecGeneral
-//    vsubuwm    IIC_VecGeneral
-//    vsubuws    IIC_VecGeneral
-//    vsum2sws   IIC_VecComplex
-//    vsum4sbs   IIC_VecComplex
-//    vsum4shs   IIC_VecComplex
-//    vsum4ubs   IIC_VecComplex
-//    vsumsws    IIC_VecComplex
-//    vupkhpx    IIC_VecPerm
-//    vupkhsb    IIC_VecPerm
-//    vupkhsh    IIC_VecPerm
-//    vupklpx    IIC_VecPerm
-//    vupklsb    IIC_VecPerm
-//    vupklsh    IIC_VecPerm
-//    vxor       IIC_VecGeneral
-//    xor        IIC_IntSimple
-//    xori       IIC_IntSimple
-//    xoris      IIC_IntSimple
-//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
index 218fed2..04a43bc 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
@@ -121,6 +121,14 @@ def PPC440Itineraries : ProcessorItineraries<
                                 [2, 0, 0],
                                 [P440_GPR_Bypass,
                                  P440_GPR_Bypass, P440_GPR_Bypass]>,
+  InstrItinData<IIC_IntISEL,    [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+                                 InstrStage<1, [P440_IRACC, P440_LRACC]>,
+                                 InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+                                 InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+                                 InstrStage<1, [P440_IWB, P440_JWB]>],
+                                [2, 0, 0, 0],
+                                [P440_GPR_Bypass,
+                                 P440_GPR_Bypass, P440_GPR_Bypass, NoBypass]>,
   InstrItinData<IIC_IntCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
                                  InstrStage<1, [P440_IRACC, P440_LRACC]>,
                                  InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
index 1447696..21a357a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
@@ -29,6 +29,8 @@ def PPCA2Itineraries : ProcessorItineraries<
                                  [1, 0, 0]>,
   InstrItinData<IIC_IntGeneral,  [InstrStage<1, [A2_XU]>],
                                  [2, 0, 0]>,
+  InstrItinData<IIC_IntISEL,     [InstrStage<1, [A2_XU]>],
+                                 [2, 0, 0, 0]>,
   InstrItinData<IIC_IntCompare,  [InstrStage<1, [A2_XU]>],
                                  [2, 0, 0]>,
   InstrItinData<IIC_IntDivW,     [InstrStage<1, [A2_XU]>],
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
index dab89e3..36b8517 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -54,6 +54,12 @@ def PPCE500mcItineraries : ProcessorItineraries<
                                  [4, 1, 1], // Latency = 1
                                  [E500_GPR_Bypass,
                                   E500_GPR_Bypass, E500_GPR_Bypass]>,
+  InstrItinData<IIC_IntISEL,     [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+                                  InstrStage<1, [E500_SFX0, E500_SFX1]>],
+                                 [4, 1, 1, 1], // Latency = 1
+                                 [E500_GPR_Bypass,
+                                  E500_GPR_Bypass, E500_GPR_Bypass,
+                                  E500_CR_Bypass]>,
   InstrItinData<IIC_IntCompare,  [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
                                   InstrStage<1, [E500_SFX0, E500_SFX1]>],
                                  [5, 1, 1], // Latency = 1 or 2
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
index de097d9..7c2693e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -58,6 +58,12 @@ def PPCE5500Itineraries : ProcessorItineraries<
                                  [5, 2, 2], // Latency = 1
                                  [E5500_GPR_Bypass,
                                   E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+  InstrItinData<IIC_IntISEL,     [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+                                  InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+                                 [5, 2, 2, 2], // Latency = 1
+                                 [E5500_GPR_Bypass,
+                                  E5500_GPR_Bypass, E5500_GPR_Bypass,
+                                  E5500_CR_Bypass]>,
   InstrItinData<IIC_IntCompare,  [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
                                   InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
                                  [6, 2, 2], // Latency = 1 or 2
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index 03693cb..635d154 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -89,6 +89,10 @@ def P7Itineraries : ProcessorItineraries<
                                                   P7_DU3, P7_DU4], 0>,
                                    InstrStage<1, [P7_FX1, P7_FX2]>],
                                   [1, 1, 1]>,
+  InstrItinData<IIC_IntISEL,      [InstrStage<1, [P7_DU1], 0>,
+                                   InstrStage<1, [P7_FX1, P7_FX2], 0>,
+                                   InstrStage<1, [P7_BRU]>],
+                                  [1, 1, 1, 1]>,
   InstrItinData<IIC_IntCompare  , [InstrStage<1, [P7_DU1, P7_DU2,
                                                   P7_DU3, P7_DU4], 0>,
                                    InstrStage<1, [P7_FX1, P7_FX2]>],
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
index 0797180..020739b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -66,6 +66,10 @@ def P8Itineraries : ProcessorItineraries<
                                    InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
                                                   P8_LU2, P8_LSU1, P8_LSU2]>],
                                   [1, 1, 1]>,
+  InstrItinData<IIC_IntISEL,      [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
+                                   InstrStage<1, [P8_BRU]>],
+                                  [1, 1, 1, 1]>,
   InstrItinData<IIC_IntCompare  , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index d8ba1c7..f313b0a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -14,13 +14,13 @@
 #include "PPCSubtarget.h"
 #include "PPC.h"
 #include "PPCRegisterInfo.h"
+#include "PPCTargetMachine.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cstdlib>
@@ -36,39 +36,9 @@ using namespace llvm;
 static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
 cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
 
-/// Return the datalayout string of a subtarget.
-static std::string getDataLayoutString(const Triple &T) {
-  bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
-  std::string Ret;
-
-  // Most PPC* platforms are big endian, PPC64LE is little endian.
-  if (T.getArch() == Triple::ppc64le)
-    Ret = "e";
-  else
-    Ret = "E";
-
-  Ret += DataLayout::getManglingComponent(T);
-
-  // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
-  // pointers.
-  if (!is64Bit || T.getOS() == Triple::Lv2)
-    Ret += "-p:32:32";
-
-  // Note, the alignment values for f64 and i64 on ppc64 in Darwin
-  // documentation are wrong; these are correct (i.e. "what gcc does").
-  if (is64Bit || !T.isOSDarwin())
-    Ret += "-i64:64";
-  else
-    Ret += "-f64:32:64";
-
-  // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
-  if (is64Bit)
-    Ret += "-n32:64";
-  else
-    Ret += "-n32";
-
-  return Ret;
-}
+static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
+  cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
+  cl::Hidden);
 
 PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                             StringRef FS) {
@@ -80,12 +50,10 @@ PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
 PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
                            const std::string &FS, const PPCTargetMachine &TM)
     : PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
-      DL(getDataLayoutString(TargetTriple)),
       IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
               TargetTriple.getArch() == Triple::ppc64le),
-      TargetABI(PPC_ABI_UNKNOWN),
-      FrameLowering(initializeSubtargetDependencies(CPU, FS)), InstrInfo(*this),
-      TLInfo(TM), TSInfo(&DL) {}
+      TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)),
+      InstrInfo(*this), TLInfo(TM, *this), TSInfo(TM.getDataLayout()) {}
 
 void PPCSubtarget::initializeEnvironment() {
   StackAlignment = 16;
@@ -99,6 +67,8 @@ void PPCSubtarget::initializeEnvironment() {
   HasQPX = false;
   HasVSX = false;
   HasP8Vector = false;
+  HasP8Altivec = false;
+  HasP8Crypto = false;
   HasFCPSGN = false;
   HasFSQRT = false;
   HasFRE = false;
@@ -112,6 +82,8 @@ void PPCSubtarget::initializeEnvironment() {
   HasFPCVT = false;
   HasISEL = false;
   HasPOPCNTD = false;
+  HasBPERMD = false;
+  HasExtDiv = false;
   HasCMPB = false;
   HasLDBRX = false;
   IsBookE = false;
@@ -122,18 +94,24 @@ void PPCSubtarget::initializeEnvironment() {
   DeprecatedMFTB = false;
   DeprecatedDST = false;
   HasLazyResolverStubs = false;
+  HasICBT = false;
+  HasInvariantFunctionDescriptors = false;
+  HasPartwordAtomics = false;
+  HasDirectMove = false;
+  IsQPXStackUnaligned = false;
+  HasHTM = false;
 }
 
 void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   // Determine default and user specified characteristics
   std::string CPUName = CPU;
-  if (CPUName.empty())
-    CPUName = "generic";
-#if (defined(__APPLE__) || defined(__linux__)) && \
-    (defined(__ppc__) || defined(__powerpc__))
-  if (CPUName == "generic")
-    CPUName = sys::getHostCPUName();
-#endif
+  if (CPUName.empty()) {
+    // If cross-compiling with -march=ppc64le without -mcpu
+    if (TargetTriple.getArch() == Triple::ppc64le)
+      CPUName = "ppc64le";
+    else
+      CPUName = "generic";
+  }
 
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUName);
@@ -153,28 +131,18 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   // QPX requires a 32-byte aligned stack. Note that we need to do this if
   // we're compiling for a BG/Q system regardless of whether or not QPX
   // is enabled because external functions will assume this alignment.
-  if (hasQPX() || isBGQ())
-    StackAlignment = 32;
+  IsQPXStackUnaligned = QPXStackUnaligned;
+  StackAlignment = getPlatformStackAlignment();
 
   // Determine endianness.
+  // FIXME: Part of the TargetMachine.
   IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
-
-  // Determine default ABI.
-  if (TargetABI == PPC_ABI_UNKNOWN) {
-    if (!isDarwin() && IsPPC64) {
-      if (IsLittleEndian)
-        TargetABI = PPC_ABI_ELFv2;
-      else
-        TargetABI = PPC_ABI_ELFv1;
-    }
-  }
 }
 
 /// hasLazyResolverStub - Return true if accesses to the specified global have
 /// to go through a dyld lazy resolution stub.  This means that an extra load
 /// is required to get the address of the global.
-bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
-                                       const TargetMachine &TM) const {
+bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
   // We never have stubs if HasLazyResolverStubs=false or if in static mode.
   if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
     return false;
@@ -242,3 +210,5 @@ bool PPCSubtarget::enableSubRegLiveness() const {
   return UseSubRegLiveness;
 }
 
+bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
+bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
index a1b644d..8d95508 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -68,9 +68,6 @@ protected:
   /// TargetTriple - What processor and OS we're targeting.
   Triple TargetTriple;
 
-  // Calculates type size & alignment
-  const DataLayout DL;
-
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned StackAlignment;
@@ -92,6 +89,8 @@ protected:
   bool HasQPX;
   bool HasVSX;
   bool HasP8Vector;
+  bool HasP8Altivec;
+  bool HasP8Crypto;
   bool HasFCPSGN;
   bool HasFSQRT;
   bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -102,6 +101,8 @@ protected:
   bool HasFPCVT;
   bool HasISEL;
   bool HasPOPCNTD;
+  bool HasBPERMD;
+  bool HasExtDiv;
   bool HasCMPB;
   bool HasLDBRX;
   bool IsBookE;
@@ -113,13 +114,18 @@ protected:
   bool DeprecatedDST;
   bool HasLazyResolverStubs;
   bool IsLittleEndian;
-
-  enum {
-    PPC_ABI_UNKNOWN,
-    PPC_ABI_ELFv1,
-    PPC_ABI_ELFv2
-  } TargetABI;
-
+  bool HasICBT;
+  bool HasInvariantFunctionDescriptors;
+  bool HasPartwordAtomics;
+  bool HasDirectMove;
+  bool HasHTM;
+
+  /// When targeting QPX running a stock PPC64 Linux kernel where the stack
+  /// alignment has not been changed, we need to keep the 16-byte alignment
+  /// of the stack.
+  bool IsQPXStackUnaligned;
+
+  const PPCTargetMachine &TM;
   PPCFrameLowering FrameLowering;
   PPCInstrInfo InstrInfo;
   PPCTargetLowering TLInfo;
@@ -154,7 +160,6 @@ public:
   const PPCFrameLowering *getFrameLowering() const override {
     return &FrameLowering;
   }
-  const DataLayout *getDataLayout() const override { return &DL; }
   const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; }
   const PPCTargetLowering *getTargetLowering() const override {
     return &TLInfo;
@@ -165,6 +170,7 @@ public:
   const PPCRegisterInfo *getRegisterInfo() const override {
     return &getInstrInfo()->getRegisterInfo();
   }
+  const PPCTargetMachine &getTargetMachine() const { return TM; }
 
   /// initializeSubtargetDependencies - Initializes using a CPU and feature string
   /// so that we can use initializer lists for subtarget initialization.
@@ -177,7 +183,7 @@ private:
 public:
   /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
   ///
-  bool isPPC64() const { return IsPPC64; }
+  bool isPPC64() const;
 
   /// has64BitSupport - Return true if the selected CPU supports 64-bit
   /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
@@ -195,8 +201,7 @@ public:
   /// hasLazyResolverStub - Return true if accesses to the specified global have
   /// to go through a dyld lazy resolution stub.  This means that an extra load
   /// is required to get the address of the global.
-  bool hasLazyResolverStub(const GlobalValue *GV,
-                           const TargetMachine &TM) const;
+  bool hasLazyResolverStub(const GlobalValue *GV) const;
 
   // isLittleEndian - True if generating little-endian code
   bool isLittleEndian() const { return IsLittleEndian; }
@@ -218,9 +223,13 @@ public:
   bool hasQPX() const { return HasQPX; }
   bool hasVSX() const { return HasVSX; }
   bool hasP8Vector() const { return HasP8Vector; }
+  bool hasP8Altivec() const { return HasP8Altivec; }
+  bool hasP8Crypto() const { return HasP8Crypto; }
   bool hasMFOCRF() const { return HasMFOCRF; }
   bool hasISEL() const { return HasISEL; }
   bool hasPOPCNTD() const { return HasPOPCNTD; }
+  bool hasBPERMD() const { return HasBPERMD; }
+  bool hasExtDiv() const { return HasExtDiv; }
   bool hasCMPB() const { return HasCMPB; }
   bool hasLDBRX() const { return HasLDBRX; }
   bool isBookE() const { return IsBookE; }
@@ -230,6 +239,21 @@ public:
   bool isE500() const { return IsE500; }
   bool isDeprecatedMFTB() const { return DeprecatedMFTB; }
   bool isDeprecatedDST() const { return DeprecatedDST; }
+  bool hasICBT() const { return HasICBT; }
+  bool hasInvariantFunctionDescriptors() const {
+    return HasInvariantFunctionDescriptors;
+  }
+  bool hasPartwordAtomics() const { return HasPartwordAtomics; }
+  bool hasDirectMove() const { return HasDirectMove; }
+
+  bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
+  unsigned getPlatformStackAlignment() const {
+    if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
+      return 32;
+
+    return 16;
+  }
+  bool hasHTM() const { return HasHTM; }
 
   const Triple &getTargetTriple() const { return TargetTriple; }
 
@@ -241,9 +265,9 @@ public:
   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
 
-  bool isDarwinABI() const { return isDarwin(); }
-  bool isSVR4ABI() const { return !isDarwin(); }
-  bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; }
+  bool isDarwinABI() const { return isTargetMachO() || isDarwin(); }
+  bool isSVR4ABI() const { return !isDarwinABI(); }
+  bool isELFv2ABI() const;
 
   bool enableEarlyIfConversion() const override { return hasISEL(); }
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
new file mode 100644
index 0000000..2dc0d82
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -0,0 +1,170 @@
+//===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands ADDItls{ld,gd}LADDR[32] machine instructions into
+// separate ADDItls[gd]L[32] and GETtlsADDR[32] instructions, both of
+// which define GPR3.  A copy is added from GPR3 to the target virtual
+// register of the original instruction.  The GETtlsADDR[32] is really
+// a call instruction, so its target register is constrained to be GPR3.
+// This is not true of ADDItls[gd]L[32], but there is a legacy linker
+// optimization bug that requires the target register of the addi of
+// a local- or general-dynamic TLS access sequence to be GPR3.
+//
+// This is done in a late pass so that TLS variable accesses can be
+// fully commoned by MachineCSE.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-tls-dynamic-call"
+
+namespace llvm {
+  void initializePPCTLSDynamicCallPass(PassRegistry&);
+}
+
+namespace {
+  struct PPCTLSDynamicCall : public MachineFunctionPass {
+    static char ID;
+    PPCTLSDynamicCall() : MachineFunctionPass(ID) {
+      initializePPCTLSDynamicCallPass(*PassRegistry::getPassRegistry());
+    }
+
+    const PPCInstrInfo *TII;
+    LiveIntervals *LIS;
+
+protected:
+    bool processBlock(MachineBasicBlock &MBB) {
+      bool Changed = false;
+      bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
+
+      for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+           I != IE;) {
+        MachineInstr *MI = I;
+
+        if (MI->getOpcode() != PPC::ADDItlsgdLADDR &&
+            MI->getOpcode() != PPC::ADDItlsldLADDR &&
+            MI->getOpcode() != PPC::ADDItlsgdLADDR32 &&
+            MI->getOpcode() != PPC::ADDItlsldLADDR32) {
+          ++I;
+          continue;
+        }
+
+        DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n    " << *MI;);
+
+        unsigned OutReg = MI->getOperand(0).getReg();
+        unsigned InReg  = MI->getOperand(1).getReg();
+        DebugLoc DL = MI->getDebugLoc();
+        unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
+        unsigned Opc1, Opc2;
+        SmallVector<unsigned, 4> OrigRegs;
+        OrigRegs.push_back(OutReg);
+        OrigRegs.push_back(InReg);
+        OrigRegs.push_back(GPR3);
+
+        switch (MI->getOpcode()) {
+        default:
+          llvm_unreachable("Opcode inconsistency error");
+        case PPC::ADDItlsgdLADDR:
+          Opc1 = PPC::ADDItlsgdL;
+          Opc2 = PPC::GETtlsADDR;
+          break;
+        case PPC::ADDItlsldLADDR:
+          Opc1 = PPC::ADDItlsldL;
+          Opc2 = PPC::GETtlsldADDR;
+          break;
+        case PPC::ADDItlsgdLADDR32:
+          Opc1 = PPC::ADDItlsgdL32;
+          Opc2 = PPC::GETtlsADDR32;
+          break;
+        case PPC::ADDItlsldLADDR32:
+          Opc1 = PPC::ADDItlsldL32;
+          Opc2 = PPC::GETtlsldADDR32;
+          break;
+        }
+
+        // Expand into two ops built prior to the existing instruction.
+        MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
+          .addReg(InReg);
+        Addi->addOperand(MI->getOperand(2));
+
+        // The ADDItls* instruction is the first instruction in the
+        // repair range.
+        MachineBasicBlock::iterator First = I;
+        --First;
+
+        MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
+                              .addReg(GPR3));
+        Call->addOperand(MI->getOperand(3));
+
+        BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
+          .addReg(GPR3);
+
+        // The COPY is the last instruction in the repair range.
+        MachineBasicBlock::iterator Last = I;
+        --Last;
+
+        // Move past the original instruction and remove it.
+        ++I;
+        MI->removeFromParent();
+
+        // Repair the live intervals.
+        LIS->repairIntervalsInRange(&MBB, First, Last, OrigRegs);
+        Changed = true;
+      }
+
+      return Changed;
+    }
+
+public:
+    bool runOnMachineFunction(MachineFunction &MF) override {
+      TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
+      LIS = &getAnalysis<LiveIntervals>();
+
+      bool Changed = false;
+
+      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+        MachineBasicBlock &B = *I++;
+        if (processBlock(B))
+          Changed = true;
+      }
+
+      return Changed;
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<LiveIntervals>();
+      AU.addPreserved<LiveIntervals>();
+      AU.addRequired<SlotIndexes>();
+      AU.addPreserved<SlotIndexes>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE,
+                      "PowerPC TLS Dynamic Call Fixup", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE,
+                    "PowerPC TLS Dynamic Call Fixup", false, false)
+
+char PPCTLSDynamicCall::ID = 0;
+FunctionPass*
+llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
new file mode 100644
index 0000000..bf165c9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -0,0 +1,156 @@
+//===-- PPCTOCRegDeps.cpp - Add Extra TOC Register Dependencies -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// When resolving an address using the ELF ABI TOC pointer, two relocations are
+// generally required: one for the high part and one for the low part. Only
+// the high part generally explicitly depends on r2 (the TOC pointer). And, so,
+// we might produce code like this:
+//
+// .Ltmp526:
+//         addis 3, 2, .LC12@toc@ha
+// .Ltmp1628:
+//         std 2, 40(1)
+//         ld 5, 0(27)
+//         ld 2, 8(27)
+//         ld 11, 16(27)
+//         ld 3, .LC12@toc@l(3)
+//         rldicl 4, 4, 0, 32
+//         mtctr 5
+//         bctrl
+//         ld 2, 40(1)
+//
+// And there is nothing wrong with this code, as such, but there is a linker bug
+// in binutils (https://sourceware.org/bugzilla/show_bug.cgi?id=18414) that will
+// misoptimize this code sequence to this:
+//         nop
+//         std     r2,40(r1)
+//         ld      r5,0(r27)
+//         ld      r2,8(r27)
+//         ld      r11,16(r27)
+//         ld      r3,-32472(r2)
+//         clrldi  r4,r4,32
+//         mtctr   r5
+//         bctrl
+//         ld      r2,40(r1)
+// because the linker does not know (and does not check) that the value in r2
+// changed in between the instruction using the .LC12@toc@ha (TOC-relative)
+// relocation and the instruction using the .LC12@toc@l(3) relocation.
+// Because it finds these instructions using the relocations (and not by
+// scanning the instructions), it has been asserted that there is no good way
+// to detect the change of r2 in between. As a result, this bug may never be
+// fixed (i.e. it may become part of the definition of the ABI). GCC was
+// updated to add extra dependencies on r2 to instructions using the @toc@l
+// relocations to avoid this problem, and we'll do the same here.
+//
+// This is done as a separate pass because:
+//  1. These extra r2 dependencies are not really properties of the
+//     instructions, but rather due to a linker bug, and maybe one day we'll be
+//     able to get rid of them when targeting linkers without this bug (and,
+//     thus, keeping the logic centralized here will make that
+//     straightforward).
+//  2. There are ISel-level peephole optimizations that propagate the @toc@l
+//     relocations to some user instructions, and so the exta dependencies do
+//     not apply only to a fixed set of instructions (without undesirable
+//     definition replication).
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-toc-reg-deps"
+
+namespace llvm {
+  void initializePPCTOCRegDepsPass(PassRegistry&);
+}
+
+namespace {
+  // PPCTOCRegDeps pass - For simple functions without epilogue code, move
+  // returns up, and create conditional returns, to avoid unnecessary
+  // branch-to-blr sequences.
+  struct PPCTOCRegDeps : public MachineFunctionPass {
+    static char ID;
+    PPCTOCRegDeps() : MachineFunctionPass(ID) {
+      initializePPCTOCRegDepsPass(*PassRegistry::getPassRegistry());
+    }
+
+protected:
+    bool hasTOCLoReloc(const MachineInstr &MI) {
+      if (MI.getOpcode() == PPC::LDtocL ||
+          MI.getOpcode() == PPC::ADDItocL)
+        return true;
+
+      for (const MachineOperand &MO : MI.operands()) {
+        if ((MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) == PPCII::MO_TOC_LO)
+          return true;
+      }
+
+      return false;
+    }
+
+    bool processBlock(MachineBasicBlock &MBB) {
+      bool Changed = false;
+
+      for (auto &MI : MBB) {
+        if (!hasTOCLoReloc(MI))
+          continue;
+
+        MI.addOperand(MachineOperand::CreateReg(PPC::X2,
+                                                false  /*IsDef*/,
+                                                true  /*IsImp*/));
+        Changed = true;
+      }
+
+      return Changed;
+    }
+
+public:
+    bool runOnMachineFunction(MachineFunction &MF) override {
+      bool Changed = false;
+
+      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+        MachineBasicBlock &B = *I++;
+        if (processBlock(B))
+          Changed = true;
+      }
+
+      return Changed;
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+INITIALIZE_PASS(PPCTOCRegDeps, DEBUG_TYPE,
+                "PowerPC TOC Register Dependencies", false, false)
+
+char PPCTOCRegDeps::ID = 0;
+FunctionPass*
+llvm::createPPCTOCRegDepsPass() { return new PPCTOCRegDeps(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index e07fd05..50d4395 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -14,10 +14,11 @@
 #include "PPCTargetMachine.h"
 #include "PPC.h"
 #include "PPCTargetObjectFile.h"
+#include "PPCTargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -29,15 +30,33 @@ static cl::
 opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
                         cl::desc("Disable CTR loops for PPC"));
 
+static cl::
+opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
+                            cl::desc("Disable PPC loop preinc prep"));
+
 static cl::opt<bool>
 VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
   cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
 
+static cl::
+opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
+                                cl::desc("Disable VSX Swap Removal for PPC"));
+
 static cl::opt<bool>
 EnableGEPOpt("ppc-gep-opt", cl::Hidden,
              cl::desc("Enable optimizations on complex GEPs"),
              cl::init(true));
 
+static cl::opt<bool>
+EnablePrefetch("enable-ppc-prefetching",
+                  cl::desc("disable software prefetching on PPC"),
+                  cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
+                      cl::desc("Add extra TOC register dependencies"),
+                      cl::init(true), cl::Hidden);
+
 extern "C" void LLVMInitializePowerPCTarget() {
   // Register the targets
   RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
@@ -45,6 +64,40 @@ extern "C" void LLVMInitializePowerPCTarget() {
   RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
 }
 
+/// Return the datalayout string of a subtarget.
+static std::string getDataLayoutString(const Triple &T) {
+  bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
+  std::string Ret;
+
+  // Most PPC* platforms are big endian, PPC64LE is little endian.
+  if (T.getArch() == Triple::ppc64le)
+    Ret = "e";
+  else
+    Ret = "E";
+
+  Ret += DataLayout::getManglingComponent(T);
+
+  // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
+  // pointers.
+  if (!is64Bit || T.getOS() == Triple::Lv2)
+    Ret += "-p:32:32";
+
+  // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+  // documentation are wrong; these are correct (i.e. "what gcc does").
+  if (is64Bit || !T.isOSDarwin())
+    Ret += "-i64:64";
+  else
+    Ret += "-f64:32:64";
+
+  // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
+  if (is64Bit)
+    Ret += "-n32:64";
+  else
+    Ret += "-n32";
+
+  return Ret;
+}
+
 static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, StringRef TT) {
   std::string FullFS = FS;
   Triple TargetTriple(TT);
@@ -64,6 +117,14 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, String
     else
       FullFS = "+crbits";
   }
+
+  if (OL != CodeGenOpt::None) {
+     if (!FullFS.empty())
+      FullFS = "+invariant-function-descriptors," + FullFS;
+    else
+      FullFS = "+invariant-function-descriptors";
+  }
+
   return FullFS;
 }
 
@@ -76,6 +137,30 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
   return make_unique<PPC64LinuxTargetObjectFile>();
 }
 
+static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
+                                                 const TargetOptions &Options) {
+  if (Options.MCOptions.getABIName().startswith("elfv1"))
+    return PPCTargetMachine::PPC_ABI_ELFv1;
+  else if (Options.MCOptions.getABIName().startswith("elfv2"))
+    return PPCTargetMachine::PPC_ABI_ELFv2;
+
+  assert(Options.MCOptions.getABIName().empty() &&
+	 "Unknown target-abi option!");
+
+  if (!TT.isMacOSX()) {
+    switch (TT.getArch()) {
+    case Triple::ppc64le:
+      return PPCTargetMachine::PPC_ABI_ELFv2;
+    case Triple::ppc64:
+      return PPCTargetMachine::PPC_ABI_ELFv1;
+    default:
+      // Fallthrough.
+      ;
+    }
+  }
+  return PPCTargetMachine::PPC_ABI_UNKNOWN;
+}
+
 // The FeatureString here is a little subtle. We are modifying the feature string
 // with what are (currently) non-function specific overrides as it goes into the
 // LLVMTargetMachine constructor and then using the stored value in the
@@ -84,10 +169,10 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
                                    StringRef FS, const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
                                    CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM,
-                        CM, OL),
+    : LLVMTargetMachine(T, getDataLayoutString(Triple(TT)), TT, CPU,
+                        computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
       TLOF(createTLOF(Triple(getTargetTriple()))),
-      Subtarget(TT, CPU, TargetFS, *this) {
+      TargetABI(computeTargetABI(Triple(TT), Options)) {
   initAsmInfo();
 }
 
@@ -115,11 +200,8 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
 
 const PPCSubtarget *
 PPCTargetMachine::getSubtargetImpl(const Function &F) const {
-  AttributeSet FnAttrs = F.getAttributes();
-  Attribute CPUAttr =
-      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
-  Attribute FSAttr =
-      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+  Attribute CPUAttr = F.getFnAttribute("target-cpu");
+  Attribute FSAttr = F.getFnAttribute("target-features");
 
   std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
                         ? CPUAttr.getValueAsString().str()
@@ -134,7 +216,15 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
     // creation will depend on the TM and the code generation flags on the
     // function that reside in TargetOptions.
     resetTargetOptions(F);
-    I = llvm::make_unique<PPCSubtarget>(TargetTriple, CPU, FS, *this);
+    I = llvm::make_unique<PPCSubtarget>(
+        TargetTriple, CPU,
+        // FIXME: It would be good to have the subtarget additions here
+        // not necessary. Anything that turns them on/off (overrides) ends
+        // up being put at the end of the feature string, but the defaults
+        // shouldn't require adding them. Fixing this means pulling Feature64Bit
+        // out of most of the target cpus in the .td file and making it set only
+        // as part of initialization via the TargetTriple.
+        computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
   }
   return I.get();
 }
@@ -154,14 +244,11 @@ public:
     return getTM<PPCTargetMachine>();
   }
 
-  const PPCSubtarget &getPPCSubtarget() const {
-    return *getPPCTargetMachine().getSubtargetImpl();
-  }
-
   void addIRPasses() override;
   bool addPreISel() override;
   bool addILPOpts() override;
   bool addInstSelector() override;
+  void addMachineSSAOptimization() override;
   void addPreRegAlloc() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
@@ -175,6 +262,16 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
 void PPCPassConfig::addIRPasses() {
   addPass(createAtomicExpandPass(&getPPCTargetMachine()));
 
+  // For the BG/Q (or if explicitly requested), add explicit data prefetch
+  // intrinsics.
+  bool UsePrefetching =
+    Triple(TM->getTargetTriple()).getVendor() == Triple::BGQ &&           
+    getOptLevel() != CodeGenOpt::None;
+  if (EnablePrefetch.getNumOccurrences() > 0)
+    UsePrefetching = EnablePrefetch;
+  if (UsePrefetching)
+    addPass(createPPCLoopDataPrefetchPass());
+
   if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
     // Call SeparateConstOffsetFromGEP pass to extract constants within indices
     // and lower a GEP with multiple indices to either arithmetic operations or
@@ -192,6 +289,9 @@ void PPCPassConfig::addIRPasses() {
 }
 
 bool PPCPassConfig::addPreISel() {
+  if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None)
+    addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
+
   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
     addPass(createPPCCTRLoops(getPPCTargetMachine()));
 
@@ -216,15 +316,26 @@ bool PPCPassConfig::addInstSelector() {
   return false;
 }
 
+void PPCPassConfig::addMachineSSAOptimization() {
+  TargetPassConfig::addMachineSSAOptimization();
+  // For little endian, remove where possible the vector swap instructions
+  // introduced at code generation to normalize vector element order.
+  if (Triple(TM->getTargetTriple()).getArch() == Triple::ppc64le &&
+      !DisableVSXSwapRemoval)
+    addPass(createPPCVSXSwapRemovalPass());
+}
+
 void PPCPassConfig::addPreRegAlloc() {
   initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
   insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
              &PPCVSXFMAMutateID);
+  if (getPPCTargetMachine().getRelocationModel() == Reloc::PIC_)
+    addPass(createPPCTLSDynamicCallPass());
+  if (EnableExtraTOCRegDeps)
+    addPass(createPPCTOCRegDepsPass());
 }
 
 void PPCPassConfig::addPreSched2() {
-  addPass(createPPCVSXCopyCleanupPass(), false);
-
   if (getOptLevel() != CodeGenOpt::None)
     addPass(&IfConverterID);
 }
@@ -236,11 +347,7 @@ void PPCPassConfig::addPreEmitPass() {
   addPass(createPPCBranchSelectionPass(), false);
 }
 
-void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-  // Add first the target-independent BasicTTI pass, then our PPC pass. This
-  // allows the PPC pass to delegate to the target independent layer when
-  // appropriate.
-  PM.add(createBasicTargetTransformInfoPass(this));
-  PM.add(createPPCTargetTransformInfoPass(this));
+TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis(
+      [this](Function &F) { return TargetTransformInfo(PPCTTIImpl(this, F)); });
 }
-
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index 5095d73..7a49058 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -24,30 +24,35 @@ namespace llvm {
 /// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
 ///
 class PPCTargetMachine : public LLVMTargetMachine {
+public:
+  enum PPCABI { PPC_ABI_UNKNOWN, PPC_ABI_ELFv1, PPC_ABI_ELFv2 };
+private:
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
-  PPCSubtarget Subtarget;
-
+  PPCABI TargetABI;
   mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
 
 public:
-  PPCTargetMachine(const Target &T, StringRef TT,
-                   StringRef CPU, StringRef FS, const TargetOptions &Options,
-                   Reloc::Model RM, CodeModel::Model CM,
-                   CodeGenOpt::Level OL);
+  PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+                   const TargetOptions &Options, Reloc::Model RM,
+                   CodeModel::Model CM, CodeGenOpt::Level OL);
 
   ~PPCTargetMachine() override;
 
-  const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
   const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
 
   // Pass Pipeline Configuration
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
 
-  /// \brief Register PPC analysis passes with a pass manager.
-  void addAnalysisPasses(PassManagerBase &PM) override;
+  TargetIRAnalysis getTargetIRAnalysis() override;
+
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF.get();
   }
+  bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; }
+  bool isPPC64() const {
+    Triple TT(getTargetTriple());
+    return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
+  };
 };
 
 /// PPC32TargetMachine - PowerPC 32-bit target machine.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index 2903cc1..9ad1340 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -22,7 +22,7 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) {
   InitializeELF(TM.Options.UseInitArray);
 }
 
-const MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
+MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
     const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
     const TargetMachine &TM) const {
   // Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
index cd84da2..d248791 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -22,9 +22,9 @@ namespace llvm {
 
     void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
 
-    const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
-                                        SectionKind Kind, Mangler &Mang,
-                                        const TargetMachine &TM) const override;
+    MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                                      Mangler &Mang,
+                                      const TargetMachine &TM) const override;
 
     /// \brief Describe a TLS variable address within debug info.
     const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
index 6493713..8aaf5e1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -16,7 +16,7 @@ namespace llvm {
 class PPCTargetStreamer : public MCTargetStreamer {
 public:
   PPCTargetStreamer(MCStreamer &S);
-  virtual ~PPCTargetStreamer();
+  ~PPCTargetStreamer() override;
   virtual void emitTCEntry(const MCSymbol &S) = 0;
   virtual void emitMachine(StringRef CPU) = 0;
   virtual void emitAbiVersion(int AbiVersion) = 0;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index a7bb252..25d563a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1,4 +1,4 @@
-//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
+//===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,17 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-/// \file
-/// This file implements a TargetTransformInfo analysis pass specific to the
-/// PPC target machine. It uses the target's detailed information to provide
-/// more precise answers to certain TTI queries, while letting the target
-/// independent and default TTI implementations handle the rest.
-///
-//===----------------------------------------------------------------------===//
 
-#include "PPC.h"
-#include "PPCTargetMachine.h"
+#include "PPCTargetTransformInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
@@ -28,115 +21,23 @@ using namespace llvm;
 static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
 cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
 
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializePPCTTIPass(PassRegistry &);
-}
-
-namespace {
-
-class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
-  const TargetMachine *TM;
-  const PPCSubtarget *ST;
-  const PPCTargetLowering *TLI;
-
-public:
-  PPCTTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) {
-    llvm_unreachable("This pass cannot be directly constructed");
-  }
-
-  PPCTTI(const PPCTargetMachine *TM)
-      : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
-        TLI(TM->getSubtargetImpl()->getTargetLowering()) {
-    initializePPCTTIPass(*PassRegistry::getPassRegistry());
-  }
-
-  void initializePass() override {
-    pushTTIStack(this);
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    TargetTransformInfo::getAnalysisUsage(AU);
-  }
-
-  /// Pass identification.
-  static char ID;
-
-  /// Provide necessary pointer adjustments for the two base classes.
-  void *getAdjustedAnalysisPointer(const void *ID) override {
-    if (ID == &TargetTransformInfo::ID)
-      return (TargetTransformInfo*)this;
-    return this;
-  }
-
-  /// \name Scalar TTI Implementations
-  /// @{
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
-
-  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                         Type *Ty) const override;
-  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                         Type *Ty) const override;
-
-  PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
-  void getUnrollingPreferences(const Function *F, Loop *L,
-                               UnrollingPreferences &UP) const override;
-
-  /// @}
-
-  /// \name Vector TTI Implementations
-  /// @{
-
-  unsigned getNumberOfRegisters(bool Vector) const override;
-  unsigned getRegisterBitWidth(bool Vector) const override;
-  unsigned getMaxInterleaveFactor() const override;
-  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
-                                  OperandValueKind, OperandValueProperties,
-                                  OperandValueProperties) const override;
-  unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
-                          int Index, Type *SubTp) const override;
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
-                            Type *Src) const override;
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                              Type *CondTy) const override;
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                              unsigned Index) const override;
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace) const override;
-
-  /// @}
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
-                   "PPC Target Transform Info", true, true, false)
-char PPCTTI::ID = 0;
-
-ImmutablePass *
-llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
-  return new PPCTTI(TM);
-}
-
-
 //===----------------------------------------------------------------------===//
 //
 // PPC cost model.
 //
 //===----------------------------------------------------------------------===//
 
-PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
+TargetTransformInfo::PopcntSupportKind
+PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
   if (ST->hasPOPCNTD() && TyWidth <= 64)
-    return PSK_FastHardware;
-  return PSK_Software;
+    return TTI::PSK_FastHardware;
+  return TTI::PSK_Software;
 }
 
-unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+unsigned PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   if (DisablePPCConstHoist)
-    return TargetTransformInfo::getIntImmCost(Imm, Ty);
+    return BaseT::getIntImmCost(Imm, Ty);
 
   assert(Ty->isIntegerTy());
 
@@ -145,28 +46,28 @@ unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
     return ~0U;
 
   if (Imm == 0)
-    return TCC_Free;
+    return TTI::TCC_Free;
 
   if (Imm.getBitWidth() <= 64) {
     if (isInt<16>(Imm.getSExtValue()))
-      return TCC_Basic;
+      return TTI::TCC_Basic;
 
     if (isInt<32>(Imm.getSExtValue())) {
       // A constant that can be materialized using lis.
       if ((Imm.getZExtValue() & 0xFFFF) == 0)
-        return TCC_Basic;
+        return TTI::TCC_Basic;
 
-      return 2 * TCC_Basic;
+      return 2 * TTI::TCC_Basic;
     }
   }
 
-  return 4 * TCC_Basic;
+  return 4 * TTI::TCC_Basic;
 }
 
-unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
-                               const APInt &Imm, Type *Ty) const {
+unsigned PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                   const APInt &Imm, Type *Ty) {
   if (DisablePPCConstHoist)
-    return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty);
+    return BaseT::getIntImmCost(IID, Idx, Imm, Ty);
 
   assert(Ty->isIntegerTy());
 
@@ -175,31 +76,32 @@ unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
     return ~0U;
 
   switch (IID) {
-  default: return TCC_Free;
+  default:
+    return TTI::TCC_Free;
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::ssub_with_overflow:
   case Intrinsic::usub_with_overflow:
     if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
-      return TCC_Free;
+      return TTI::TCC_Free;
     break;
   case Intrinsic::experimental_stackmap:
     if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
-      return TCC_Free;
+      return TTI::TCC_Free;
     break;
   case Intrinsic::experimental_patchpoint_void:
   case Intrinsic::experimental_patchpoint_i64:
     if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
-      return TCC_Free;
+      return TTI::TCC_Free;
     break;
   }
-  return PPCTTI::getIntImmCost(Imm, Ty);
+  return PPCTTIImpl::getIntImmCost(Imm, Ty);
 }
 
-unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                               Type *Ty) const {
+unsigned PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+                                   const APInt &Imm, Type *Ty) {
   if (DisablePPCConstHoist)
-    return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty);
+    return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty);
 
   assert(Ty->isIntegerTy());
 
@@ -211,14 +113,15 @@ unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
   bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
        ZeroFree = false;
   switch (Opcode) {
-  default: return TCC_Free;
+  default:
+    return TTI::TCC_Free;
   case Instruction::GetElementPtr:
     // Always hoist the base address of a GetElementPtr. This prevents the
     // creation of new constants for every base constant that gets constant
     // folded with the offset.
     if (Idx == 0)
-      return 2 * TCC_Basic;
-    return TCC_Free;
+      return 2 * TTI::TCC_Basic;
+    return TTI::TCC_Free;
   case Instruction::And:
     RunFree = true; // (for the rotate-and-mask instructions)
     // Fallthrough...
@@ -250,54 +153,62 @@ unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
   }
 
   if (ZeroFree && Imm == 0)
-    return TCC_Free;
+    return TTI::TCC_Free;
 
   if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
     if (isInt<16>(Imm.getSExtValue()))
-      return TCC_Free;
+      return TTI::TCC_Free;
 
     if (RunFree) {
       if (Imm.getBitWidth() <= 32 &&
           (isShiftedMask_32(Imm.getZExtValue()) ||
            isShiftedMask_32(~Imm.getZExtValue())))
-        return TCC_Free;
-
+        return TTI::TCC_Free;
 
       if (ST->isPPC64() &&
           (isShiftedMask_64(Imm.getZExtValue()) ||
            isShiftedMask_64(~Imm.getZExtValue())))
-        return TCC_Free;
+        return TTI::TCC_Free;
     }
 
     if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
-      return TCC_Free;
+      return TTI::TCC_Free;
 
     if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
-      return TCC_Free;
+      return TTI::TCC_Free;
   }
 
-  return PPCTTI::getIntImmCost(Imm, Ty);
+  return PPCTTIImpl::getIntImmCost(Imm, Ty);
 }
 
-void PPCTTI::getUnrollingPreferences(const Function *F, Loop *L,
-                                     UnrollingPreferences &UP) const {
-  if (TM->getSubtarget<PPCSubtarget>(F).getDarwinDirective() == PPC::DIR_A2) {
+void PPCTTIImpl::getUnrollingPreferences(Loop *L,
+                                         TTI::UnrollingPreferences &UP) {
+  if (ST->getDarwinDirective() == PPC::DIR_A2) {
     // The A2 is in-order with a deep pipeline, and concatenation unrolling
     // helps expose latency-hiding opportunities to the instruction scheduler.
     UP.Partial = UP.Runtime = true;
+
+    // We unroll a lot on the A2 (hundreds of instructions), and the benefits
+    // often outweigh the cost of a division to compute the trip count.
+    UP.AllowExpensiveTripCount = true;
   }
 
-  TargetTransformInfo::getUnrollingPreferences(F, L, UP);
+  BaseT::getUnrollingPreferences(L, UP);
 }
 
-unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
-  if (Vector && !ST->hasAltivec())
+bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
+  return LoopHasReductions;
+}
+
+unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
+  if (Vector && !ST->hasAltivec() && !ST->hasQPX())
     return 0;
   return ST->hasVSX() ? 64 : 32;
 }
 
-unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
+unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) {
   if (Vector) {
+    if (ST->hasQPX()) return 256;
     if (ST->hasAltivec()) return 128;
     return 0;
   }
@@ -308,7 +219,7 @@ unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
 
 }
 
-unsigned PPCTTI::getMaxInterleaveFactor() const {
+unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
   unsigned Directive = ST->getDarwinDirective();
   // The 440 has no SIMD support, but floating-point instructions
   // have a 5-cycle latency, so unroll by 5x for latency hiding.
@@ -324,40 +235,46 @@ unsigned PPCTTI::getMaxInterleaveFactor() const {
   if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
     return 1;
 
+  // For P7 and P8, floating-point instructions have a 6-cycle latency and
+  // there are two execution units, so unroll by 12x for latency hiding.
+  if (Directive == PPC::DIR_PWR7 ||
+      Directive == PPC::DIR_PWR8)
+    return 12;
+
   // For most things, modern systems have two execution units (and
   // out-of-order execution).
   return 2;
 }
 
-unsigned PPCTTI::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
-    OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
-    OperandValueProperties Opd2PropInfo) const {
+unsigned PPCTTIImpl::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+    TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
+    TTI::OperandValueProperties Opd2PropInfo) {
   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 
   // Fallback to the default implementation.
-  return TargetTransformInfo::getArithmeticInstrCost(
-      Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
+  return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+                                       Opd1PropInfo, Opd2PropInfo);
 }
 
-unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
-                                Type *SubTp) const {
-  return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+unsigned PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+                                    Type *SubTp) {
+  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 }
 
-unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+unsigned PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 
-  return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+  return BaseT::getCastInstrCost(Opcode, Dst, Src);
 }
 
-unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                    Type *CondTy) const {
-  return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+unsigned PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                        Type *CondTy) {
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
 }
 
-unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                    unsigned Index) const {
+unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                        unsigned Index) {
   assert(Val->isVectorTy() && "This must be a vector type");
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -368,7 +285,13 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
     if (Index == 0)
       return 0;
 
-    return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+    return BaseT::getVectorInstrCost(Opcode, Val, Index);
+  } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
+    // Floating point scalars are already located in index #0.
+    if (Index == 0)
+      return 0;
+
+    return BaseT::getVectorInstrCost(Opcode, Val, Index);
   }
 
   // Estimated cost of a load-hit-store delay.  This was obtained
@@ -385,21 +308,20 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
   // these need to be estimated as very costly.
   if (ISD == ISD::EXTRACT_VECTOR_ELT ||
       ISD == ISD::INSERT_VECTOR_ELT)
-    return LHSPenalty +
-      TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+    return LHSPenalty + BaseT::getVectorInstrCost(Opcode, Val, Index);
 
-  return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+  return BaseT::getVectorInstrCost(Opcode, Val, Index);
 }
 
-unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                                 unsigned AddressSpace) const {
+unsigned PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                     unsigned Alignment,
+                                     unsigned AddressSpace) {
   // Legalize the type.
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
   assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
          "Invalid Opcode");
 
-  unsigned Cost =
-    TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+  unsigned Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
 
   // VSX loads/stores support unaligned access.
   if (ST->hasVSX()) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
new file mode 100644
index 0000000..35e7a14
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -0,0 +1,104 @@
+//===-- PPCTargetTransformInfo.h - PPC specific TTI -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// PPC target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
+
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
+  typedef BasicTTIImplBase<PPCTTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const PPCSubtarget *ST;
+  const PPCTargetLowering *TLI;
+
+  const PPCSubtarget *getST() const { return ST; }
+  const PPCTargetLowering *getTLI() const { return TLI; }
+
+public:
+  explicit PPCTTIImpl(const PPCTargetMachine *TM, Function &F)
+      : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  PPCTTIImpl(const PPCTTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+  PPCTTIImpl(PPCTTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
+  PPCTTIImpl &operator=(const PPCTTIImpl &RHS) {
+    BaseT::operator=(static_cast<const BaseT &>(RHS));
+    ST = RHS.ST;
+    TLI = RHS.TLI;
+    return *this;
+  }
+  PPCTTIImpl &operator=(PPCTTIImpl &&RHS) {
+    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+    ST = std::move(RHS.ST);
+    TLI = std::move(RHS.TLI);
+    return *this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+
+  using BaseT::getIntImmCost;
+  unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+
+  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                         Type *Ty);
+  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                         Type *Ty);
+
+  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+  void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+  /// @}
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  bool enableAggressiveInterleaving(bool LoopHasReductions);
+  unsigned getNumberOfRegisters(bool Vector);
+  unsigned getRegisterBitWidth(bool Vector);
+  unsigned getMaxInterleaveFactor(unsigned VF);
+  unsigned getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty,
+      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+                          Type *SubTp);
+  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                           unsigned AddressSpace);
+
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
new file mode 100644
index 0000000..5e3ae2a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -0,0 +1,176 @@
+//===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass which deals with the complexity of generating legal VSX register
+// copies to/from register classes which partially overlap with the VSX
+// register file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCHazardRecognizers.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-vsx-copy"
+
+namespace llvm {
+  void initializePPCVSXCopyPass(PassRegistry&);
+}
+
+namespace {
+  // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
+  // (Altivec and scalar floating-point registers), we need to transform the
+  // copies into subregister copies with other restrictions.
+  struct PPCVSXCopy : public MachineFunctionPass {
+    static char ID;
+    PPCVSXCopy() : MachineFunctionPass(ID) {
+      initializePPCVSXCopyPass(*PassRegistry::getPassRegistry());
+    }
+
+    const TargetInstrInfo *TII;
+
+    bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
+                      MachineRegisterInfo &MRI) {
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        return RC->hasSubClassEq(MRI.getRegClass(Reg));
+      } else if (RC->contains(Reg)) {
+        return true;
+      }
+
+      return false;
+    }
+
+    bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+      return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
+    }
+
+    bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
+      return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
+    }
+
+    bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
+      return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
+    }
+
+protected:
+    bool processBlock(MachineBasicBlock &MBB) {
+      bool Changed = false;
+
+      MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+      for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+           I != IE; ++I) {
+        MachineInstr *MI = I;
+        if (!MI->isFullCopy())
+          continue;
+
+        MachineOperand &DstMO = MI->getOperand(0);
+        MachineOperand &SrcMO = MI->getOperand(1);
+
+        if ( IsVSReg(DstMO.getReg(), MRI) &&
+            !IsVSReg(SrcMO.getReg(), MRI)) {
+          // This is a copy *to* a VSX register from a non-VSX register.
+          Changed = true;
+
+          const TargetRegisterClass *SrcRC =
+            IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+                                           &PPC::VSLRCRegClass;
+          assert((IsF8Reg(SrcMO.getReg(), MRI) ||
+                  IsVRReg(SrcMO.getReg(), MRI)) &&
+                 "Unknown source for a VSX copy");
+
+          unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
+          BuildMI(MBB, MI, MI->getDebugLoc(),
+                  TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
+            .addImm(1) // add 1, not 0, because there is no implicit clearing
+                       // of the high bits.
+            .addOperand(SrcMO)
+            .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 :
+                                                   PPC::sub_64);
+
+          // The source of the original copy is now the new virtual register.
+          SrcMO.setReg(NewVReg);
+        } else if (!IsVSReg(DstMO.getReg(), MRI) &&
+                    IsVSReg(SrcMO.getReg(), MRI)) {
+          // This is a copy *from* a VSX register to a non-VSX register.
+          Changed = true;
+
+          const TargetRegisterClass *DstRC =
+            IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+                                           &PPC::VSLRCRegClass;
+          assert((IsF8Reg(DstMO.getReg(), MRI) ||
+                  IsVRReg(DstMO.getReg(), MRI)) &&
+                 "Unknown destination for a VSX copy");
+
+          // Copy the VSX value into a new VSX register of the correct subclass.
+          unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+          BuildMI(MBB, MI, MI->getDebugLoc(),
+                  TII->get(TargetOpcode::COPY), NewVReg)
+            .addOperand(SrcMO);
+
+          // Transform the original copy into a subregister extraction copy.
+          SrcMO.setReg(NewVReg);
+          SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
+                                                         PPC::sub_64);
+        }
+      }
+
+      return Changed;
+    }
+
+public:
+    bool runOnMachineFunction(MachineFunction &MF) override {
+      // If we don't have VSX on the subtarget, don't do anything.
+      const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
+      if (!STI.hasVSX())
+        return false;
+      TII = STI.getInstrInfo();
+
+      bool Changed = false;
+
+      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+        MachineBasicBlock &B = *I++;
+        if (processBlock(B))
+          Changed = true;
+      }
+
+      return Changed;
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
+                "PowerPC VSX Copy Legalization", false, false)
+
+char PPCVSXCopy::ID = 0;
+FunctionPass*
+llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
new file mode 100644
index 0000000..f352fa6
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -0,0 +1,335 @@
+//===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mutates the form of VSX FMA instructions to avoid unnecessary
+// copies.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation",
+cl::desc("Disable VSX FMA instruction mutation"), cl::Hidden);
+
+#define DEBUG_TYPE "ppc-vsx-fma-mutate"
+
+namespace llvm { namespace PPC {
+  int getAltVSXFMAOpcode(uint16_t Opcode);
+} }
+
+namespace {
+  // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
+  // (Altivec and scalar floating-point registers), we need to transform the
+  // copies into subregister copies with other restrictions.
+  struct PPCVSXFMAMutate : public MachineFunctionPass {
+    static char ID;
+    PPCVSXFMAMutate() : MachineFunctionPass(ID) {
+      initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
+    }
+
+    LiveIntervals *LIS;
+    const PPCInstrInfo *TII;
+
+protected:
+    bool processBlock(MachineBasicBlock &MBB) {
+      bool Changed = false;
+
+      MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+      const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+      for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+           I != IE; ++I) {
+        MachineInstr *MI = I;
+
+        // The default (A-type) VSX FMA form kills the addend (it is taken from
+        // the target register, which is then updated to reflect the result of
+        // the FMA). If the instruction, however, kills one of the registers
+        // used for the product, then we can use the M-form instruction (which
+        // will take that value from the to-be-defined register).
+
+        int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode());
+        if (AltOpc == -1)
+          continue;
+
+        // This pass is run after register coalescing, and so we're looking for
+        // a situation like this:
+        //   ...
+        //   %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
+        //   %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
+        //                         %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
+        //   ...
+        //   %vreg9<def,tied1> = XSMADDADP %vreg9<tied0>, %vreg17, %vreg19,
+        //                         %RM<imp-use>; VSLRC:%vreg9,%vreg17,%vreg19
+        //   ...
+        // Where we can eliminate the copy by changing from the A-type to the
+        // M-type instruction. Specifically, for this example, this means:
+        //   %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
+        //                         %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
+        // is replaced by:
+        //   %vreg16<def,tied1> = XSMADDMDP %vreg16<tied0>, %vreg18, %vreg9,
+        //                         %RM<imp-use>; VSLRC:%vreg16,%vreg18,%vreg9
+        // and we remove: %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
+
+        SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
+
+        VNInfo *AddendValNo =
+          LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
+        MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
+
+        // The addend and this instruction must be in the same block.
+
+        if (!AddendMI || AddendMI->getParent() != MI->getParent())
+          continue;
+
+        // The addend must be a full copy within the same register class.
+
+        if (!AddendMI->isFullCopy())
+          continue;
+
+        unsigned AddendSrcReg = AddendMI->getOperand(1).getReg();
+        if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) {
+          if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
+              MRI.getRegClass(AddendSrcReg))
+            continue;
+        } else {
+          // If AddendSrcReg is a physical register, make sure the destination
+          // register class contains it.
+          if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
+                ->contains(AddendSrcReg))
+            continue;
+        }
+
+        // In theory, there could be other uses of the addend copy before this
+        // fma.  We could deal with this, but that would require additional
+        // logic below and I suspect it will not occur in any relevant
+        // situations.  Additionally, check whether the copy source is killed
+        // prior to the fma.  In order to replace the addend here with the
+        // source of the copy, it must still be live here.  We can't use
+        // interval testing for a physical register, so as long as we're
+        // walking the MIs we may as well test liveness here.
+        bool OtherUsers = false, KillsAddendSrc = false;
+        for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
+             J != JE; --J) {
+          if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
+            OtherUsers = true;
+            break;
+          }
+          if (J->modifiesRegister(AddendSrcReg, TRI) ||
+              J->killsRegister(AddendSrcReg, TRI)) {
+            KillsAddendSrc = true;
+            break;
+          }
+        }
+
+        if (OtherUsers || KillsAddendSrc)
+          continue;
+
+        // Find one of the product operands that is killed by this instruction.
+
+        unsigned KilledProdOp = 0, OtherProdOp = 0;
+        if (LIS->getInterval(MI->getOperand(2).getReg())
+                     .Query(FMAIdx).isKill()) {
+          KilledProdOp = 2;
+          OtherProdOp  = 3;
+        } else if (LIS->getInterval(MI->getOperand(3).getReg())
+                     .Query(FMAIdx).isKill()) {
+          KilledProdOp = 3;
+          OtherProdOp  = 2;
+        }
+
+        // If there are no killed product operands, then this transformation is
+        // likely not profitable.
+        if (!KilledProdOp)
+          continue;
+
+        // For virtual registers, verify that the addend source register
+        // is live here (as should have been assured above).
+        assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) ||
+                LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) &&
+               "Addend source register is not live!");
+
+        // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
+
+        unsigned AddReg = AddendMI->getOperand(1).getReg();
+        unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg();
+        unsigned OtherProdReg  = MI->getOperand(OtherProdOp).getReg();
+
+        unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
+        unsigned KilledProdSubReg = MI->getOperand(KilledProdOp).getSubReg();
+        unsigned OtherProdSubReg  = MI->getOperand(OtherProdOp).getSubReg();
+
+        bool AddRegKill = AddendMI->getOperand(1).isKill();
+        bool KilledProdRegKill = MI->getOperand(KilledProdOp).isKill();
+        bool OtherProdRegKill  = MI->getOperand(OtherProdOp).isKill();
+
+        bool AddRegUndef = AddendMI->getOperand(1).isUndef();
+        bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef();
+        bool OtherProdRegUndef  = MI->getOperand(OtherProdOp).isUndef();
+
+        unsigned OldFMAReg = MI->getOperand(0).getReg();
+
+        // The transformation doesn't work well with things like:
+        //    %vreg5 = A-form-op %vreg5, %vreg11, %vreg5;
+        // so leave such things alone.
+        if (OldFMAReg == KilledProdReg)
+          continue;
+
+        assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
+               "Addend copy not tied to old FMA output!");
+
+        DEBUG(dbgs() << "VSX FMA Mutation:\n    " << *MI;);
+
+        MI->getOperand(0).setReg(KilledProdReg);
+        MI->getOperand(1).setReg(KilledProdReg);
+        MI->getOperand(3).setReg(AddReg);
+        MI->getOperand(2).setReg(OtherProdReg);
+
+        MI->getOperand(0).setSubReg(KilledProdSubReg);
+        MI->getOperand(1).setSubReg(KilledProdSubReg);
+        MI->getOperand(3).setSubReg(AddSubReg);
+        MI->getOperand(2).setSubReg(OtherProdSubReg);
+
+        MI->getOperand(1).setIsKill(KilledProdRegKill);
+        MI->getOperand(3).setIsKill(AddRegKill);
+        MI->getOperand(2).setIsKill(OtherProdRegKill);
+
+        MI->getOperand(1).setIsUndef(KilledProdRegUndef);
+        MI->getOperand(3).setIsUndef(AddRegUndef);
+        MI->getOperand(2).setIsUndef(OtherProdRegUndef);
+
+        MI->setDesc(TII->get(AltOpc));
+
+        DEBUG(dbgs() << " -> " << *MI);
+
+        // The killed product operand was killed here, so we can reuse it now
+        // for the result of the fma.
+
+        LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
+        VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
+        for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
+             UI != UE;) {
+          MachineOperand &UseMO = *UI;
+          MachineInstr *UseMI = UseMO.getParent();
+          ++UI;
+
+          // Don't replace the result register of the copy we're about to erase.
+          if (UseMI == AddendMI)
+            continue;
+
+          UseMO.setReg(KilledProdReg);
+          UseMO.setSubReg(KilledProdSubReg);
+        }
+
+        // Extend the live intervals of the killed product operand to hold the
+        // fma result.
+
+        LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
+        for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end();
+             AI != AE; ++AI) {
+          // Don't add the segment that corresponds to the original copy.
+          if (AI->valno == AddendValNo)
+            continue;
+
+          VNInfo *NewFMAValNo =
+            NewFMAInt.getNextValue(AI->start,
+                                   LIS->getVNInfoAllocator());
+
+          NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end,
+                                                     NewFMAValNo));
+        }
+        DEBUG(dbgs() << "  extended: " << NewFMAInt << '\n');
+
+        FMAInt.removeValNo(FMAValNo);
+        DEBUG(dbgs() << "  trimmed:  " << FMAInt << '\n');
+
+        // Remove the (now unused) copy.
+
+        DEBUG(dbgs() << "  removing: " << *AddendMI << '\n');
+        LIS->RemoveMachineInstrFromMaps(AddendMI);
+        AddendMI->eraseFromParent();
+
+        Changed = true;
+      }
+
+      return Changed;
+    }
+
+public:
+    bool runOnMachineFunction(MachineFunction &MF) override {
+      // If we don't have VSX then go ahead and return without doing
+      // anything.
+      const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
+      if (!STI.hasVSX())
+        return false;
+
+      LIS = &getAnalysis<LiveIntervals>();
+
+      TII = STI.getInstrInfo();
+
+      bool Changed = false;
+
+      if (DisableVSXFMAMutate)
+        return Changed;
+
+      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+        MachineBasicBlock &B = *I++;
+        if (processBlock(B))
+          Changed = true;
+      }
+
+      return Changed;
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<LiveIntervals>();
+      AU.addPreserved<LiveIntervals>();
+      AU.addRequired<SlotIndexes>();
+      AU.addPreserved<SlotIndexes>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
+                      "PowerPC VSX FMA Mutation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
+                    "PowerPC VSX FMA Mutation", false, false)
+
+char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
+
+char PPCVSXFMAMutate::ID = 0;
+FunctionPass*
+llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); }
+
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
new file mode 100644
index 0000000..e238669
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -0,0 +1,821 @@
+//===----------- PPCVSXSwapRemoval.cpp - Remove VSX LE Swaps -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass analyzes vector computations and removes unnecessary
+// doubleword swaps (xxswapd instructions).  This pass is performed
+// only for little-endian VSX code generation.
+//
+// For this specific case, loads and stores of v4i32, v4f32, v2i64,
+// and v2f64 vectors are inefficient.  These are implemented using
+// the lxvd2x and stxvd2x instructions, which invert the order of
+// doublewords in a vector register.  Thus code generation inserts
+// an xxswapd after each such load, and prior to each such store.
+//
+// The extra xxswapd instructions reduce performance.  The purpose
+// of this pass is to reduce the number of xxswapd instructions
+// required for correctness.
+//
+// The primary insight is that much code that operates on vectors
+// does not care about the relative order of elements in a register,
+// so long as the correct memory order is preserved.  If we have a
+// computation where all input values are provided by lxvd2x/xxswapd,
+// all outputs are stored using xxswapd/lxvd2x, and all intermediate
+// computations are lane-insensitive (independent of element order),
+// then all the xxswapd instructions associated with the loads and
+// stores may be removed without changing observable semantics.
+//
+// This pass uses standard equivalence class infrastructure to create
+// maximal webs of computations fitting the above description.  Each
+// such web is then optimized by removing its unnecessary xxswapd
+// instructions.
+//
+// There are some lane-sensitive operations for which we can still
+// permit the optimization, provided we modify those operations
+// accordingly.  Such operations are identified as using "special
+// handling" within this module.
+//
+//===---------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-vsx-swaps"
+
+namespace llvm {
+  void initializePPCVSXSwapRemovalPass(PassRegistry&);
+}
+
+namespace {
+
+// A PPCVSXSwapEntry is created for each machine instruction that
+// is relevant to a vector computation.
+struct PPCVSXSwapEntry {
+  // Pointer to the instruction.
+  MachineInstr *VSEMI;
+
+  // Unique ID (position in the swap vector).
+  int VSEId;
+
+  // Attributes of this node.
+  unsigned int IsLoad : 1;
+  unsigned int IsStore : 1;
+  unsigned int IsSwap : 1;
+  unsigned int MentionsPhysVR : 1;
+  unsigned int HasImplicitSubreg : 1;
+  unsigned int IsSwappable : 1;
+  unsigned int SpecialHandling : 3;
+  unsigned int WebRejected : 1;
+  unsigned int WillRemove : 1;
+};
+
+enum SHValues {
+  SH_NONE = 0,
+  SH_EXTRACT,
+  SH_INSERT,
+  SH_NOSWAP_LD,
+  SH_NOSWAP_ST,
+  SH_SPLAT
+};
+
+struct PPCVSXSwapRemoval : public MachineFunctionPass {
+
+  static char ID;
+  const PPCInstrInfo *TII;
+  MachineFunction *MF;
+  MachineRegisterInfo *MRI;
+
+  // Swap entries are allocated in a vector for better performance.
+  std::vector<PPCVSXSwapEntry> SwapVector;
+
+  // A mapping is maintained between machine instructions and
+  // their swap entries.  The key is the address of the MI.
+  DenseMap<MachineInstr*, int> SwapMap;
+
+  // Equivalence classes are used to gather webs of related computation.
+  // Swap entries are represented by their VSEId fields.
+  EquivalenceClasses<int> *EC;
+
+  PPCVSXSwapRemoval() : MachineFunctionPass(ID) {
+    initializePPCVSXSwapRemovalPass(*PassRegistry::getPassRegistry());
+  }
+
+private:
+  // Initialize data structures.
+  void initialize(MachineFunction &MFParm);
+
+  // Walk the machine instructions to gather vector usage information.
+  // Return true iff vector mentions are present.
+  bool gatherVectorInstructions();
+
+  // Add an entry to the swap vector and swap map.
+  int addSwapEntry(MachineInstr *MI, PPCVSXSwapEntry &SwapEntry);
+
+  // Hunt backwards through COPY and SUBREG_TO_REG chains for a
+  // source register.  VecIdx indicates the swap vector entry to
+  // mark as mentioning a physical register if the search leads
+  // to one.
+  unsigned lookThruCopyLike(unsigned SrcReg, unsigned VecIdx);
+
+  // Generate equivalence classes for related computations (webs).
+  void formWebs();
+
+  // Analyze webs and determine those that cannot be optimized.
+  void recordUnoptimizableWebs();
+
+  // Record which swap instructions can be safely removed.
+  void markSwapsForRemoval();
+
+  // Remove swaps and update other instructions requiring special
+  // handling.  Return true iff any changes are made.
+  bool removeSwaps();
+
+  // Update instructions requiring special handling.
+  void handleSpecialSwappables(int EntryIdx);
+
+  // Dump a description of the entries in the swap vector.
+  void dumpSwapVector();
+
+  // Return true iff the given register is in the given class.
+  bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return RC->hasSubClassEq(MRI->getRegClass(Reg));
+    if (RC->contains(Reg))
+      return true;
+    return false;
+  }
+
+  // Return true iff the given register is a full vector register.
+  bool isVecReg(unsigned Reg) {
+    return (isRegInClass(Reg, &PPC::VSRCRegClass) ||
+            isRegInClass(Reg, &PPC::VRRCRegClass));
+  }
+
+public:
+  // Main entry point for this pass.
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    // If we don't have VSX on the subtarget, don't do anything.
+    const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
+    if (!STI.hasVSX())
+      return false;
+
+    bool Changed = false;
+    initialize(MF);
+
+    if (gatherVectorInstructions()) {
+      formWebs();
+      recordUnoptimizableWebs();
+      markSwapsForRemoval();
+      Changed = removeSwaps();
+    }
+
+    // FIXME: See the allocation of EC in initialize().
+    delete EC;
+    return Changed;
+  }
+};
+
+// Initialize data structures for this pass.  In particular, clear the
+// swap vector and allocate the equivalence class mapping before
+// processing each function.
+void PPCVSXSwapRemoval::initialize(MachineFunction &MFParm) {
+  MF = &MFParm;
+  MRI = &MF->getRegInfo();
+  TII = static_cast<const PPCInstrInfo*>(MF->getSubtarget().getInstrInfo());
+
+  // An initial vector size of 256 appears to work well in practice.
+  // Small/medium functions with vector content tend not to incur a
+  // reallocation at this size.  Three of the vector tests in
+  // projects/test-suite reallocate, which seems like a reasonable rate.
+  const int InitialVectorSize(256);
+  SwapVector.clear();
+  SwapVector.reserve(InitialVectorSize);
+
+  // FIXME: Currently we allocate EC each time because we don't have
+  // access to the set representation on which to call clear().  Should
+  // consider adding a clear() method to the EquivalenceClasses class.
+  EC = new EquivalenceClasses<int>;
+}
+
+// Create an entry in the swap vector for each instruction that mentions
+// a full vector register, recording various characteristics of the
+// instructions there.
+bool PPCVSXSwapRemoval::gatherVectorInstructions() {
+  bool RelevantFunction = false;
+
+  for (MachineBasicBlock &MBB : *MF) {
+    for (MachineInstr &MI : MBB) {
+
+      bool RelevantInstr = false;
+      bool ImplicitSubreg = false;
+
+      for (const MachineOperand &MO : MI.operands()) {
+        if (!MO.isReg())
+          continue;
+        unsigned Reg = MO.getReg();
+        if (isVecReg(Reg)) {
+          RelevantInstr = true;
+          if (MO.getSubReg() != 0)
+            ImplicitSubreg = true;
+          break;
+        }
+      }
+
+      if (!RelevantInstr)
+        continue;
+
+      RelevantFunction = true;
+
+      // Create a SwapEntry initialized to zeros, then fill in the
+      // instruction and ID fields before pushing it to the back
+      // of the swap vector.
+      PPCVSXSwapEntry SwapEntry{};
+      int VecIdx = addSwapEntry(&MI, SwapEntry);
+
+      if (ImplicitSubreg)
+        SwapVector[VecIdx].HasImplicitSubreg = 1;
+
+      switch(MI.getOpcode()) {
+      default:
+        // Unless noted otherwise, an instruction is considered
+        // safe for the optimization.  There are a large number of
+        // such true-SIMD instructions (all vector math, logical,
+        // select, compare, etc.).
+        SwapVector[VecIdx].IsSwappable = 1;
+        break;
+      case PPC::XXPERMDI:
+        // This is a swap if it is of the form XXPERMDI t, s, s, 2.
+        // Unfortunately, MachineCSE ignores COPY and SUBREG_TO_REG, so we
+        // can also see XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), 2,
+        // for example.  We have to look through chains of COPY and
+        // SUBREG_TO_REG to find the real source value for comparison.
+        // If the real source value is a physical register, then mark the
+        // XXPERMDI as mentioning a physical register.
+        // Any other form of XXPERMDI is lane-sensitive and unsafe
+        // for the optimization.
+        if (MI.getOperand(3).getImm() == 2) {
+          unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
+                                               VecIdx);
+          unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
+                                               VecIdx);
+          if (trueReg1 == trueReg2)
+            SwapVector[VecIdx].IsSwap = 1;
+        }
+        break;
+      case PPC::LVX:
+        // Non-permuting loads are currently unsafe.  We can use special
+        // handling for this in the future.  By not marking these as
+        // IsSwap, we ensure computations containing them will be rejected
+        // for now.
+        SwapVector[VecIdx].IsLoad = 1;
+        break;
+      case PPC::LXVD2X:
+      case PPC::LXVW4X:
+        // Permuting loads are marked as both load and swap, and are
+        // safe for optimization.
+        SwapVector[VecIdx].IsLoad = 1;
+        SwapVector[VecIdx].IsSwap = 1;
+        break;
+      case PPC::STVX:
+        // Non-permuting stores are currently unsafe.  We can use special
+        // handling for this in the future.  By not marking these as
+        // IsSwap, we ensure computations containing them will be rejected
+        // for now.
+        SwapVector[VecIdx].IsStore = 1;
+        break;
+      case PPC::STXVD2X:
+      case PPC::STXVW4X:
+        // Permuting stores are marked as both store and swap, and are
+        // safe for optimization.
+        SwapVector[VecIdx].IsStore = 1;
+        SwapVector[VecIdx].IsSwap = 1;
+        break;
+      case PPC::SUBREG_TO_REG:
+        // These are fine provided they are moving between full vector
+        // register classes.  For example, the VRs are a subset of the
+        // VSRs, but each VR and each VSR is a full 128-bit register.
+        if (isVecReg(MI.getOperand(0).getReg()) &&
+            isVecReg(MI.getOperand(2).getReg()))
+          SwapVector[VecIdx].IsSwappable = 1;
+        break;
+      case PPC::COPY:
+        // These are fine provided they are moving between full vector
+        // register classes.
+        if (isVecReg(MI.getOperand(0).getReg()) &&
+            isVecReg(MI.getOperand(1).getReg()))
+          SwapVector[VecIdx].IsSwappable = 1;
+        break;
+      case PPC::VSPLTB:
+      case PPC::VSPLTH:
+      case PPC::VSPLTW:
+        // Splats are lane-sensitive, but we can use special handling
+        // to adjust the source lane for the splat.  This is not yet
+        // implemented.  When it is, we need to uncomment the following:
+        SwapVector[VecIdx].IsSwappable = 1;
+        SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT;
+        break;
+      // The presence of the following lane-sensitive operations in a
+      // web will kill the optimization, at least for now.  For these
+      // we do nothing, causing the optimization to fail.
+      // FIXME: Some of these could be permitted with special handling,
+      // and will be phased in as time permits.
+      // FIXME: There is no simple and maintainable way to express a set
+      // of opcodes having a common attribute in TableGen.  Should this
+      // change, this is a prime candidate to use such a mechanism.
+      case PPC::INLINEASM:
+      case PPC::EXTRACT_SUBREG:
+      case PPC::INSERT_SUBREG:
+      case PPC::COPY_TO_REGCLASS:
+      case PPC::LVEBX:
+      case PPC::LVEHX:
+      case PPC::LVEWX:
+      case PPC::LVSL:
+      case PPC::LVSR:
+      case PPC::LVXL:
+      case PPC::LXVDSX:
+      case PPC::STVEBX:
+      case PPC::STVEHX:
+      case PPC::STVEWX:
+      case PPC::STVXL:
+      case PPC::STXSDX:
+      case PPC::VCIPHER:
+      case PPC::VCIPHERLAST:
+      case PPC::VMRGHB:
+      case PPC::VMRGHH:
+      case PPC::VMRGHW:
+      case PPC::VMRGLB:
+      case PPC::VMRGLH:
+      case PPC::VMRGLW:
+      case PPC::VMULESB:
+      case PPC::VMULESH:
+      case PPC::VMULESW:
+      case PPC::VMULEUB:
+      case PPC::VMULEUH:
+      case PPC::VMULEUW:
+      case PPC::VMULOSB:
+      case PPC::VMULOSH:
+      case PPC::VMULOSW:
+      case PPC::VMULOUB:
+      case PPC::VMULOUH:
+      case PPC::VMULOUW:
+      case PPC::VNCIPHER:
+      case PPC::VNCIPHERLAST:
+      case PPC::VPERM:
+      case PPC::VPERMXOR:
+      case PPC::VPKPX:
+      case PPC::VPKSHSS:
+      case PPC::VPKSHUS:
+      case PPC::VPKSDSS:
+      case PPC::VPKSDUS:
+      case PPC::VPKSWSS:
+      case PPC::VPKSWUS:
+      case PPC::VPKUDUM:
+      case PPC::VPKUDUS:
+      case PPC::VPKUHUM:
+      case PPC::VPKUHUS:
+      case PPC::VPKUWUM:
+      case PPC::VPKUWUS:
+      case PPC::VPMSUMB:
+      case PPC::VPMSUMD:
+      case PPC::VPMSUMH:
+      case PPC::VPMSUMW:
+      case PPC::VRLB:
+      case PPC::VRLD:
+      case PPC::VRLH:
+      case PPC::VRLW:
+      case PPC::VSBOX:
+      case PPC::VSHASIGMAD:
+      case PPC::VSHASIGMAW:
+      case PPC::VSL:
+      case PPC::VSLDOI:
+      case PPC::VSLO:
+      case PPC::VSR:
+      case PPC::VSRO:
+      case PPC::VSUM2SWS:
+      case PPC::VSUM4SBS:
+      case PPC::VSUM4SHS:
+      case PPC::VSUM4UBS:
+      case PPC::VSUMSWS:
+      case PPC::VUPKHPX:
+      case PPC::VUPKHSB:
+      case PPC::VUPKHSH:
+      case PPC::VUPKHSW:
+      case PPC::VUPKLPX:
+      case PPC::VUPKLSB:
+      case PPC::VUPKLSH:
+      case PPC::VUPKLSW:
+      case PPC::XXMRGHW:
+      case PPC::XXMRGLW:
+      case PPC::XXSPLTW:
+        break;
+      }
+    }
+  }
+
+  if (RelevantFunction) {
+    DEBUG(dbgs() << "Swap vector when first built\n\n");
+    dumpSwapVector();
+  }
+
+  return RelevantFunction;
+}
+
+// Add an entry to the swap vector and swap map, and make a
+// singleton equivalence class for the entry.
+int PPCVSXSwapRemoval::addSwapEntry(MachineInstr *MI,
+                                  PPCVSXSwapEntry& SwapEntry) {
+  SwapEntry.VSEMI = MI;
+  SwapEntry.VSEId = SwapVector.size();
+  SwapVector.push_back(SwapEntry);
+  EC->insert(SwapEntry.VSEId);
+  SwapMap[MI] = SwapEntry.VSEId;
+  return SwapEntry.VSEId;
+}
+
+// This is used to find the "true" source register for an
+// XXPERMDI instruction, since MachineCSE does not handle the
+// "copy-like" operations (Copy and SubregToReg).  Returns
+// the original SrcReg unless it is the target of a copy-like
+// operation, in which case we chain backwards through all
+// such operations to the ultimate source register.  If a
+// physical register is encountered, we stop the search and
+// flag the swap entry indicated by VecIdx (the original
+// XXPERMDI) as mentioning a physical register.  Similarly
+// for implicit subregister mentions (which should never
+// happen).
+unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
+                                             unsigned VecIdx) {
+  MachineInstr *MI = MRI->getVRegDef(SrcReg);
+  if (!MI->isCopyLike())
+    return SrcReg;
+
+  unsigned CopySrcReg, CopySrcSubreg;
+  if (MI->isCopy()) {
+    CopySrcReg = MI->getOperand(1).getReg();
+    CopySrcSubreg = MI->getOperand(1).getSubReg();
+  } else {
+    assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
+    CopySrcReg = MI->getOperand(2).getReg();
+    CopySrcSubreg = MI->getOperand(2).getSubReg();
+  }
+
+  if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
+    SwapVector[VecIdx].MentionsPhysVR = 1;
+    return CopySrcReg;
+  }
+
+  if (CopySrcSubreg != 0) {
+    SwapVector[VecIdx].HasImplicitSubreg = 1;
+    return CopySrcReg;
+  }
+
+  return lookThruCopyLike(CopySrcReg, VecIdx);
+}
+
+// Generate equivalence classes for related computations (webs) by
+// def-use relationships of virtual registers.  Mention of a physical
+// register terminates the generation of equivalence classes as this
+// indicates a use of a parameter, definition of a return value, use
+// of a value returned from a call, or definition of a parameter to a
+// call.  Computations with physical register mentions are flagged
+// as such so their containing webs will not be optimized.
+void PPCVSXSwapRemoval::formWebs() {
+
+  DEBUG(dbgs() << "\n*** Forming webs for swap removal ***\n\n");
+
+  for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+
+    MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+
+    DEBUG(dbgs() << "\n" << SwapVector[EntryIdx].VSEId << " ");
+    DEBUG(MI->dump());
+
+    // It's sufficient to walk vector uses and join them to their unique
+    // definitions.  In addition, check *all* vector register operands
+    // for physical regs.
+    for (const MachineOperand &MO : MI->operands()) {
+      if (!MO.isReg())
+        continue;
+
+      unsigned Reg = MO.getReg();
+      if (!isVecReg(Reg))
+        continue;
+
+      if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+        SwapVector[EntryIdx].MentionsPhysVR = 1;
+        continue;
+      }
+
+      if (!MO.isUse())
+        continue;
+
+      MachineInstr* DefMI = MRI->getVRegDef(Reg);
+      assert(SwapMap.find(DefMI) != SwapMap.end() &&
+             "Inconsistency: def of vector reg not found in swap map!");
+      int DefIdx = SwapMap[DefMI];
+      (void)EC->unionSets(SwapVector[DefIdx].VSEId,
+                          SwapVector[EntryIdx].VSEId);
+
+      DEBUG(dbgs() << format("Unioning %d with %d\n", SwapVector[DefIdx].VSEId,
+                             SwapVector[EntryIdx].VSEId));
+      DEBUG(dbgs() << "  Def: ");
+      DEBUG(DefMI->dump());
+    }
+  }
+}
+
+// Walk the swap vector entries looking for conditions that prevent their
+// containing computations from being optimized.  When such conditions are
+// found, mark the representative of the computation's equivalence class
+// as rejected.
+void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
+
+  DEBUG(dbgs() << "\n*** Rejecting webs for swap removal ***\n\n");
+
+  for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+    int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
+
+    // Reject webs containing mentions of physical registers or implicit
+    // subregs, or containing operations that we don't know how to handle
+    // in a lane-permuted region.
+    if (SwapVector[EntryIdx].MentionsPhysVR ||
+        SwapVector[EntryIdx].HasImplicitSubreg ||
+        !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
+
+      SwapVector[Repr].WebRejected = 1;
+
+      DEBUG(dbgs() <<
+            format("Web %d rejected for physreg, subreg, or not swap[pable]\n",
+                   Repr));
+      DEBUG(dbgs() << "  in " << EntryIdx << ": ");
+      DEBUG(SwapVector[EntryIdx].VSEMI->dump());
+      DEBUG(dbgs() << "\n");
+    }
+
+    // Reject webs than contain swapping loads that feed something other
+    // than a swap instruction.
+    else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
+      MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+      unsigned DefReg = MI->getOperand(0).getReg();
+
+      // We skip debug instructions in the analysis.  (Note that debug
+      // location information is still maintained by this optimization
+      // because it remains on the LXVD2X and STXVD2X instructions after
+      // the XXPERMDIs are removed.)
+      for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) {
+        int UseIdx = SwapMap[&UseMI];
+
+        if (!SwapVector[UseIdx].IsSwap || SwapVector[UseIdx].IsLoad ||
+            SwapVector[UseIdx].IsStore) {
+
+          SwapVector[Repr].WebRejected = 1;
+
+          DEBUG(dbgs() <<
+                format("Web %d rejected for load not feeding swap\n", Repr));
+          DEBUG(dbgs() << "  def " << EntryIdx << ": ");
+          DEBUG(MI->dump());
+          DEBUG(dbgs() << "  use " << UseIdx << ": ");
+          DEBUG(UseMI.dump());
+          DEBUG(dbgs() << "\n");
+        }
+      }
+
+    // Reject webs than contain swapping stores that are fed by something
+    // other than a swap instruction.
+    } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
+      MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+      unsigned UseReg = MI->getOperand(0).getReg();
+      MachineInstr *DefMI = MRI->getVRegDef(UseReg);
+      int DefIdx = SwapMap[DefMI];
+
+      if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad ||
+          SwapVector[DefIdx].IsStore) {
+
+        SwapVector[Repr].WebRejected = 1;
+
+        DEBUG(dbgs() <<
+              format("Web %d rejected for store not fed by swap\n", Repr));
+        DEBUG(dbgs() << "  def " << DefIdx << ": ");
+        DEBUG(DefMI->dump());
+        DEBUG(dbgs() << "  use " << EntryIdx << ": ");
+        DEBUG(MI->dump());
+        DEBUG(dbgs() << "\n");
+      }
+    }
+  }
+
+  DEBUG(dbgs() << "Swap vector after web analysis:\n\n");
+  dumpSwapVector();
+}
+
+// Walk the swap vector entries looking for swaps fed by permuting loads
+// and swaps that feed permuting stores.  If the containing computation
+// has not been marked rejected, mark each such swap for removal.
+// (Removal is delayed in case optimization has disturbed the pattern,
+// such that multiple loads feed the same swap, etc.)
+void PPCVSXSwapRemoval::markSwapsForRemoval() {
+
+  DEBUG(dbgs() << "\n*** Marking swaps for removal ***\n\n");
+
+  for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+
+    if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
+      int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
+
+      if (!SwapVector[Repr].WebRejected) {
+        MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+        unsigned DefReg = MI->getOperand(0).getReg();
+
+        for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) {
+          int UseIdx = SwapMap[&UseMI];
+          SwapVector[UseIdx].WillRemove = 1;
+
+          DEBUG(dbgs() << "Marking swap fed by load for removal: ");
+          DEBUG(UseMI.dump());
+        }
+      }
+
+    } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
+      int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
+
+      if (!SwapVector[Repr].WebRejected) {
+        MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+        unsigned UseReg = MI->getOperand(0).getReg();
+        MachineInstr *DefMI = MRI->getVRegDef(UseReg);
+        int DefIdx = SwapMap[DefMI];
+        SwapVector[DefIdx].WillRemove = 1;
+
+        DEBUG(dbgs() << "Marking swap feeding store for removal: ");
+        DEBUG(DefMI->dump());
+      }
+
+    } else if (SwapVector[EntryIdx].IsSwappable &&
+               SwapVector[EntryIdx].SpecialHandling != 0) {
+      int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
+
+      if (!SwapVector[Repr].WebRejected)
+        handleSpecialSwappables(EntryIdx);
+    }
+  }
+}
+
+// The identified swap entry requires special handling to allow its
+// containing computation to be optimized.  Perform that handling
+// here.
+// FIXME: This code is to be phased in with subsequent patches.
+void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
+  switch (SwapVector[EntryIdx].SpecialHandling) {
+
+  default:
+    assert(false && "Unexpected special handling type");
+    break;
+
+  // For splats based on an index into a vector, add N/2 modulo N
+  // to the index, where N is the number of vector elements.
+  case SHValues::SH_SPLAT: {
+    MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+    unsigned NElts;
+
+    DEBUG(dbgs() << "Changing splat: ");
+    DEBUG(MI->dump());
+
+    switch (MI->getOpcode()) {
+    default:
+      assert(false && "Unexpected splat opcode");
+    case PPC::VSPLTB: NElts = 16; break;
+    case PPC::VSPLTH: NElts = 8;  break;
+    case PPC::VSPLTW: NElts = 4;  break;
+    }
+
+    unsigned EltNo = MI->getOperand(1).getImm();
+    EltNo = (EltNo + NElts / 2) % NElts;
+    MI->getOperand(1).setImm(EltNo);
+
+    DEBUG(dbgs() << "  Into: ");
+    DEBUG(MI->dump());
+    break;
+  }
+
+  }
+}
+
+// Walk the swap vector and replace each entry marked for removal with
+// a copy operation.
+bool PPCVSXSwapRemoval::removeSwaps() {
+
+  DEBUG(dbgs() << "\n*** Removing swaps ***\n\n");
+
+  bool Changed = false;
+
+  for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+    if (SwapVector[EntryIdx].WillRemove) {
+      Changed = true;
+      MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+      MachineBasicBlock *MBB = MI->getParent();
+      BuildMI(*MBB, MI, MI->getDebugLoc(),
+              TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
+        .addOperand(MI->getOperand(1));
+
+      DEBUG(dbgs() << format("Replaced %d with copy: ",
+                             SwapVector[EntryIdx].VSEId));
+      DEBUG(MI->dump());
+
+      MI->eraseFromParent();
+    }
+  }
+
+  return Changed;
+}
+
+// For debug purposes, dump the contents of the swap vector.
+void PPCVSXSwapRemoval::dumpSwapVector() {
+
+  for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+
+    MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+    int ID = SwapVector[EntryIdx].VSEId;
+
+    DEBUG(dbgs() << format("%6d", ID));
+    DEBUG(dbgs() << format("%6d", EC->getLeaderValue(ID)));
+    DEBUG(dbgs() << format(" BB#%3d", MI->getParent()->getNumber()));
+    DEBUG(dbgs() << format("  %14s  ", TII->getName(MI->getOpcode())));
+
+    if (SwapVector[EntryIdx].IsLoad)
+      DEBUG(dbgs() << "load ");
+    if (SwapVector[EntryIdx].IsStore)
+      DEBUG(dbgs() << "store ");
+    if (SwapVector[EntryIdx].IsSwap)
+      DEBUG(dbgs() << "swap ");
+    if (SwapVector[EntryIdx].MentionsPhysVR)
+      DEBUG(dbgs() << "physreg ");
+    if (SwapVector[EntryIdx].HasImplicitSubreg)
+      DEBUG(dbgs() << "implsubreg ");
+
+    if (SwapVector[EntryIdx].IsSwappable) {
+      DEBUG(dbgs() << "swappable ");
+      switch(SwapVector[EntryIdx].SpecialHandling) {
+      default:
+        DEBUG(dbgs() << "special:**unknown**");
+        break;
+      case SH_NONE:
+        break;
+      case SH_EXTRACT:
+        DEBUG(dbgs() << "special:extract ");
+        break;
+      case SH_INSERT:
+        DEBUG(dbgs() << "special:insert ");
+        break;
+      case SH_NOSWAP_LD:
+        DEBUG(dbgs() << "special:load ");
+        break;
+      case SH_NOSWAP_ST:
+        DEBUG(dbgs() << "special:store ");
+        break;
+      case SH_SPLAT:
+        DEBUG(dbgs() << "special:splat ");
+        break;
+      }
+    }
+
+    if (SwapVector[EntryIdx].WebRejected)
+      DEBUG(dbgs() << "rejected ");
+    if (SwapVector[EntryIdx].WillRemove)
+      DEBUG(dbgs() << "remove ");
+
+    DEBUG(dbgs() << "\n");
+
+    // For no-asserts builds.
+    (void)MI;
+    (void)ID;
+  }
+
+  DEBUG(dbgs() << "\n");
+}
+
+} // end default namespace
+
+INITIALIZE_PASS_BEGIN(PPCVSXSwapRemoval, DEBUG_TYPE,
+                      "PowerPC VSX Swap Removal", false, false)
+INITIALIZE_PASS_END(PPCVSXSwapRemoval, DEBUG_TYPE,
+                    "PowerPC VSX Swap Removal", false, false)
+
+char PPCVSXSwapRemoval::ID = 0;
+FunctionPass*
+llvm::createPPCVSXSwapRemovalPass() { return new PPCVSXSwapRemoval(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 5727dbc..5b2fe19 100644
--- a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -14,7 +14,7 @@ using namespace llvm;
 
 Target llvm::ThePPC32Target, llvm::ThePPC64Target, llvm::ThePPC64LETarget;
 
-extern "C" void LLVMInitializePowerPCTargetInfo() { 
+extern "C" void LLVMInitializePowerPCTargetInfo() {
   RegisterTarget<Triple::ppc, /*HasJIT=*/true>
     X(ThePPC32Target, "ppc32", "PowerPC 32");
 
diff --git a/contrib/llvm/lib/Target/R600/AMDGPU.h b/contrib/llvm/lib/Target/R600/AMDGPU.h
index c660055..9b36063 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPU.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPU.h
@@ -43,6 +43,7 @@ FunctionPass *createSILowerI1CopiesPass();
 FunctionPass *createSIShrinkInstructionsPass();
 FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
 FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
+FunctionPass *createSIFixControlFlowLiveIntervalsPass();
 FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
 FunctionPass *createSIFixSGPRLiveRangesPass();
 FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
@@ -64,9 +65,8 @@ Pass *createAMDGPUStructurizeCFGPass();
 FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
 ModulePass *createAMDGPUAlwaysInlinePass();
 
-/// \brief Creates an AMDGPU-specific Target Transformation Info pass.
-ImmutablePass *
-createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM);
+void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
+extern char &SIFixControlFlowLiveIntervalsID;
 
 void initializeSIFixSGPRLiveRangesPass(PassRegistry&);
 extern char &SIFixSGPRLiveRangesID;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPU.td b/contrib/llvm/lib/Target/R600/AMDGPU.td
index 03f2bbe..2e7e39a 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPU.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPU.td
@@ -1,11 +1,11 @@
-//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
+//===-- AMDGPU.td - AMDGPU Tablegen files ------------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
-//==-----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 
 include "llvm/Target/Target.td"
 
@@ -20,6 +20,11 @@ def FeatureDumpCode : SubtargetFeature <"DumpCode",
         "true",
         "Dump MachineInstrs in the CodeEmitter">;
 
+def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
+        "DumpCode",
+        "true",
+        "Dump MachineInstrs in the CodeEmitter">;
+
 def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
         "EnableIRStructurizer",
         "false",
@@ -48,6 +53,12 @@ def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
         "Enable double precision denormal handling",
         [FeatureFP64]>;
 
+def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
+        "FastFMAF32",
+        "true",
+        "Assuming f32 fma is at least as fast as mul + add",
+        []>;
+
 // Some instructions do not support denormals despite this flag. Using
 // fp32 denormals also causes instructions to run at the double
 // precision rate for the device.
@@ -121,12 +132,47 @@ def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
 
+class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
+      "ldsbankcount"#Value,
+      "LDSBankCount",
+      !cast<string>(Value),
+      "The number of LDS banks per compute unit.">;
+
+def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
+def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
+
 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
         "localmemorysize"#Value,
         "LocalMemorySize",
         !cast<string>(Value),
         "The size of local memory in bytes">;
 
+def FeatureGCN : SubtargetFeature<"gcn",
+        "IsGCN",
+        "true",
+        "GCN or newer GPU">;
+
+def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
+        "GCN1Encoding",
+        "true",
+        "Encoding format for SI and CI">;
+
+def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
+        "GCN3Encoding",
+        "true",
+        "Encoding format for VI">;
+
+def FeatureCIInsts : SubtargetFeature<"ci-insts",
+        "CIInsts",
+        "true",
+        "Additional intstructions for CI+">;
+
+// Dummy feature used to disable assembler instructions.
+def FeatureDisable : SubtargetFeature<"",
+                                      "FeatureDisable","true",
+                                      "Dummy feature to disable assembler"
+                                      " instructions">;
+
 class SubtargetFeatureGeneration <string Value,
                                   list<SubtargetFeature> Implies> :
         SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
@@ -152,20 +198,24 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
 
 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
-         FeatureWavefrontSize64]>;
+         FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
+         FeatureLDSBankCount32]>;
 
 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
-         FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
+         FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
+         FeatureGCN1Encoding, FeatureCIInsts]>;
 
 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
-         FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
+         FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
+         FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>;
 
 //===----------------------------------------------------------------------===//
 
 def AMDGPUInstrInfo : InstrInfo {
   let guessInstructionProperties = 1;
+  let noNamedPositionallyEncodedOperands = 1;
 }
 
 def AMDGPUAsmParser : AsmParser {
@@ -188,10 +238,20 @@ def NullALU : InstrItinClass;
 // Predicate helper class
 //===----------------------------------------------------------------------===//
 
+def TruePredicate : Predicate<"true">;
+def isSICI : Predicate<
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
+>, AssemblerPredicate<"FeatureGCN1Encoding">;
+
 class PredicateControl {
   Predicate SubtargetPredicate;
+  Predicate SIAssemblerPredicate = isSICI;
+  list<Predicate> AssemblerPredicates = [];
+  Predicate AssemblerPredicate = TruePredicate;
   list<Predicate> OtherPredicates = [];
-  list<Predicate> Predicates = !listconcat([SubtargetPredicate],
+  list<Predicate> Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate],
+                                            AssemblerPredicates,
                                             OtherPredicates);
 }
 
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 1fae26e..56b50a9 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -17,6 +17,7 @@
 //
 
 #include "AMDGPUAsmPrinter.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
 #include "AMDGPU.h"
 #include "AMDKernelCodeT.h"
 #include "AMDGPUSubtarget.h"
@@ -58,7 +59,7 @@ using namespace llvm;
 // instructions to run at the double precision rate for the device so it's
 // probably best to just report no single precision denormals.
 static uint32_t getFPMode(const MachineFunction &F) {
-  const AMDGPUSubtarget& ST = F.getTarget().getSubtarget<AMDGPUSubtarget>();
+  const AMDGPUSubtarget& ST = F.getSubtarget<AMDGPUSubtarget>();
   // TODO: Is there any real use for the flush in only / flush out only modes?
 
   uint32_t FP32Denormals =
@@ -73,9 +74,10 @@ static uint32_t getFPMode(const MachineFunction &F) {
          FP_DENORM_MODE_DP(FP64Denormals);
 }
 
-static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
-                                              MCStreamer &Streamer) {
-  return new AMDGPUAsmPrinter(tm, Streamer);
+static AsmPrinter *
+createAMDGPUAsmPrinterPass(TargetMachine &tm,
+                           std::unique_ptr<MCStreamer> &&Streamer) {
+  return new AMDGPUAsmPrinter(tm, std::move(Streamer));
 }
 
 extern "C" void LLVMInitializeR600AsmPrinter() {
@@ -83,19 +85,18 @@ extern "C" void LLVMInitializeR600AsmPrinter() {
   TargetRegistry::RegisterAsmPrinter(TheGCNTarget, createAMDGPUAsmPrinterPass);
 }
 
-AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-    : AsmPrinter(TM, Streamer) {
-  DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode();
-}
+AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
+                                   std::unique_ptr<MCStreamer> Streamer)
+    : AsmPrinter(TM, std::move(Streamer)) {}
 
 void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
 
   // This label is used to mark the end of the .text section.
   const TargetLoweringObjectFile &TLOF = getObjFileLowering();
-  OutStreamer.SwitchSection(TLOF.getTextSection());
+  OutStreamer->SwitchSection(TLOF.getTextSection());
   MCSymbol *EndOfTextLabel =
-      OutContext.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
-  OutStreamer.EmitLabel(EndOfTextLabel);
+      OutContext.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
+  OutStreamer->EmitLabel(EndOfTextLabel);
 }
 
 bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
@@ -105,20 +106,17 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   SetupMachineFunction(MF);
 
-  EmitFunctionHeader();
-
   MCContext &Context = getObjFileLowering().getContext();
-  const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
-                                              ELF::SHT_PROGBITS, 0,
-                                              SectionKind::getReadOnly());
-  OutStreamer.SwitchSection(ConfigSection);
+  MCSectionELF *ConfigSection =
+      Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
+  OutStreamer->SwitchSection(ConfigSection);
 
-  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
   SIProgramInfo KernelInfo;
   if (STM.isAmdHsaOS()) {
     getSIProgramInfo(KernelInfo, MF);
     EmitAmdKernelCodeT(MF, KernelInfo);
-    OutStreamer.EmitCodeAlignment(2 << (MF.getAlignment() - 1));
+    OutStreamer->EmitCodeAlignment(2 << (MF.getAlignment() - 1));
   } else if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
     getSIProgramInfo(KernelInfo, MF);
     EmitProgramInfoSI(MF, KernelInfo);
@@ -130,49 +128,45 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   HexLines.clear();
   DisasmLineMaxLen = 0;
 
-  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
   EmitFunctionBody();
 
   if (isVerbose()) {
-    const MCSectionELF *CommentSection
-      = Context.getELFSection(".AMDGPU.csdata",
-                              ELF::SHT_PROGBITS, 0,
-                              SectionKind::getReadOnly());
-    OutStreamer.SwitchSection(CommentSection);
+    MCSectionELF *CommentSection =
+        Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
+    OutStreamer->SwitchSection(CommentSection);
 
     if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-      OutStreamer.emitRawComment(" Kernel info:", false);
-      OutStreamer.emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
-                                 false);
-      OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
-                                 false);
-      OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
-                                 false);
-      OutStreamer.emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
-                                 false);
-      OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
-                                 false);
-      OutStreamer.emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
-                                 false);
+      OutStreamer->emitRawComment(" Kernel info:", false);
+      OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
+                                  false);
+      OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
+                                  false);
+      OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
+                                  false);
+      OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
+                                  false);
+      OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
+                                  false);
+      OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
+                                  false);
     } else {
       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
-      OutStreamer.emitRawComment(
+      OutStreamer->emitRawComment(
         Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
     }
   }
 
-  if (STM.dumpCode() && DisasmEnabled) {
+  if (STM.dumpCode()) {
 
-    OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
-                                                ELF::SHT_NOTE, 0,
-                                                SectionKind::getReadOnly()));
+    OutStreamer->SwitchSection(
+        Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
 
     for (size_t i = 0; i < DisasmLines.size(); ++i) {
       std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
       Comment += " ; " + HexLines[i] + "\n";
 
-      OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
-      OutStreamer.EmitBytes(StringRef(Comment));
+      OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
+      OutStreamer->EmitBytes(StringRef(Comment));
     }
   }
 
@@ -182,10 +176,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
   unsigned MaxGPR = 0;
   bool killPixel = false;
-  const R600RegisterInfo *RI = static_cast<const R600RegisterInfo *>(
-      TM.getSubtargetImpl()->getRegisterInfo());
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  const R600RegisterInfo *RI =
+      static_cast<const R600RegisterInfo *>(STM.getRegisterInfo());
   const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
-  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
 
   for (const MachineBasicBlock &MBB : MF) {
     for (const MachineInstr &MI : MBB) {
@@ -227,29 +221,29 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
     }
   }
 
-  OutStreamer.EmitIntValue(RsrcReg, 4);
-  OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
+  OutStreamer->EmitIntValue(RsrcReg, 4);
+  OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
                            S_STACK_SIZE(MFI->StackSize), 4);
-  OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
-  OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
+  OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
+  OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
 
   if (MFI->getShaderType() == ShaderType::COMPUTE) {
-    OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
-    OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
+    OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
+    OutStreamer->EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
   }
 }
 
 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
                                         const MachineFunction &MF) const {
-  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   uint64_t CodeSize = 0;
   unsigned MaxSGPR = 0;
   unsigned MaxVGPR = 0;
   bool VCCUsed = false;
   bool FlatUsed = false;
-  const SIRegisterInfo *RI = static_cast<const SIRegisterInfo *>(
-      TM.getSubtargetImpl()->getRegisterInfo());
+  const SIRegisterInfo *RI =
+      static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
 
   for (const MachineBasicBlock &MBB : MF) {
     for (const MachineInstr &MI : MBB) {
@@ -427,45 +421,45 @@ static unsigned getRsrcReg(unsigned ShaderType) {
 
 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
                                          const SIProgramInfo &KernelInfo) {
-  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
 
   if (MFI->getShaderType() == ShaderType::COMPUTE) {
-    OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
+    OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
 
-    OutStreamer.EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
+    OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
 
-    OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
-    OutStreamer.EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
+    OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
+    OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
 
-    OutStreamer.EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
-    OutStreamer.EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
+    OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
+    OutStreamer->EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
 
     // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
     // 0" comment but I don't see a corresponding field in the register spec.
   } else {
-    OutStreamer.EmitIntValue(RsrcReg, 4);
-    OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
-                             S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
+    OutStreamer->EmitIntValue(RsrcReg, 4);
+    OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
+                              S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
     if (STM.isVGPRSpillingEnabled(MFI)) {
-      OutStreamer.EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
-      OutStreamer.EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
+      OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
+      OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
     }
   }
 
   if (MFI->getShaderType() == ShaderType::PIXEL) {
-    OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
-    OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
-    OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
-    OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
+    OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
+    OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
+    OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
+    OutStreamer->EmitIntValue(MFI->PSInputAddr, 4);
   }
 }
 
 void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
                                         const SIProgramInfo &KernelInfo) const {
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
   amd_kernel_code_t header;
 
   memset(&header, 0, sizeof(header));
@@ -515,69 +509,92 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
 
   header.wavefront_size = STM.getWavefrontSize();
 
-  const MCSectionELF *VersionSection = OutContext.getELFSection(".hsa.version",
-      ELF::SHT_PROGBITS, 0, SectionKind::getReadOnly());
-  OutStreamer.SwitchSection(VersionSection);
-  OutStreamer.EmitBytes(Twine("HSA Code Unit:" +
-                        Twine(header.hsail_version_major) + "." +
-                        Twine(header.hsail_version_minor) + ":" +
-                        "AMD:" +
-                        Twine(header.amd_code_version_major) + "." +
-                        Twine(header.amd_code_version_minor) +  ":" +
-                        "GFX8.1:0").str());
+  MCSectionELF *VersionSection =
+      OutContext.getELFSection(".hsa.version", ELF::SHT_PROGBITS, 0);
+  OutStreamer->SwitchSection(VersionSection);
+  OutStreamer->EmitBytes(Twine("HSA Code Unit:" +
+                         Twine(header.hsail_version_major) + "." +
+                         Twine(header.hsail_version_minor) + ":" +
+                         "AMD:" +
+                         Twine(header.amd_code_version_major) + "." +
+                         Twine(header.amd_code_version_minor) +  ":" +
+                         "GFX8.1:0").str());
 
-  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
 
   if (isVerbose()) {
-    OutStreamer.emitRawComment("amd_code_version_major = " +
-                               Twine(header.amd_code_version_major), false);
-    OutStreamer.emitRawComment("amd_code_version_minor = " +
-                               Twine(header.amd_code_version_minor), false);
-    OutStreamer.emitRawComment("struct_byte_size = " +
-                               Twine(header.struct_byte_size), false);
-    OutStreamer.emitRawComment("target_chip = " +
-                               Twine(header.target_chip), false);
-    OutStreamer.emitRawComment(" compute_pgm_rsrc1: " +
-                               Twine::utohexstr(KernelInfo.ComputePGMRSrc1), false);
-    OutStreamer.emitRawComment(" compute_pgm_rsrc2: " +
-                               Twine::utohexstr(KernelInfo.ComputePGMRSrc2), false);
-    OutStreamer.emitRawComment("enable_sgpr_private_segment_buffer = " +
+    OutStreamer->emitRawComment("amd_code_version_major = " +
+                                Twine(header.amd_code_version_major), false);
+    OutStreamer->emitRawComment("amd_code_version_minor = " +
+                                Twine(header.amd_code_version_minor), false);
+    OutStreamer->emitRawComment("struct_byte_size = " +
+                                Twine(header.struct_byte_size), false);
+    OutStreamer->emitRawComment("target_chip = " +
+                                Twine(header.target_chip), false);
+    OutStreamer->emitRawComment(" compute_pgm_rsrc1: " +
+                                Twine::utohexstr(KernelInfo.ComputePGMRSrc1),
+                                false);
+    OutStreamer->emitRawComment(" compute_pgm_rsrc2: " +
+                                Twine::utohexstr(KernelInfo.ComputePGMRSrc2),
+                                false);
+    OutStreamer->emitRawComment("enable_sgpr_private_segment_buffer = " +
       Twine((bool)(header.code_properties &
                    AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)), false);
-    OutStreamer.emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
+    OutStreamer->emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
       Twine((bool)(header.code_properties &
                    AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR)), false);
-    OutStreamer.emitRawComment("private_element_size = 2 ", false);
-    OutStreamer.emitRawComment("is_ptr64 = " +
+    OutStreamer->emitRawComment("private_element_size = 2 ", false);
+    OutStreamer->emitRawComment("is_ptr64 = " +
         Twine((bool)(header.code_properties & AMD_CODE_PROPERTY_IS_PTR64)), false);
-    OutStreamer.emitRawComment("workitem_private_segment_byte_size = " +
-                               Twine(header.workitem_private_segment_byte_size),
-                               false);
-    OutStreamer.emitRawComment("workgroup_group_segment_byte_size = " +
-                               Twine(header.workgroup_group_segment_byte_size),
-                               false);
-    OutStreamer.emitRawComment("gds_segment_byte_size = " +
-                               Twine(header.gds_segment_byte_size), false);
-    OutStreamer.emitRawComment("kernarg_segment_byte_size = " +
-                               Twine(header.kernarg_segment_byte_size), false);
-    OutStreamer.emitRawComment("wavefront_sgpr_count = " +
-                               Twine(header.wavefront_sgpr_count), false);
-    OutStreamer.emitRawComment("workitem_vgpr_count = " +
-                               Twine(header.workitem_vgpr_count), false);
-    OutStreamer.emitRawComment("code_type = " + Twine(header.code_type), false);
-    OutStreamer.emitRawComment("wavefront_size = " +
-                               Twine((int)header.wavefront_size), false);
-    OutStreamer.emitRawComment("optimization_level = " +
-                               Twine(header.optimization_level), false);
-    OutStreamer.emitRawComment("hsail_profile = " +
-                               Twine(header.hsail_profile), false);
-    OutStreamer.emitRawComment("hsail_machine_model = " +
-                               Twine(header.hsail_machine_model), false);
-    OutStreamer.emitRawComment("hsail_version_major = " +
-                               Twine(header.hsail_version_major), false);
-    OutStreamer.emitRawComment("hsail_version_minor = " +
-                               Twine(header.hsail_version_minor), false);
+    OutStreamer->emitRawComment("workitem_private_segment_byte_size = " +
+                                Twine(header.workitem_private_segment_byte_size),
+                                false);
+    OutStreamer->emitRawComment("workgroup_group_segment_byte_size = " +
+                                Twine(header.workgroup_group_segment_byte_size),
+                                false);
+    OutStreamer->emitRawComment("gds_segment_byte_size = " +
+                                Twine(header.gds_segment_byte_size), false);
+    OutStreamer->emitRawComment("kernarg_segment_byte_size = " +
+                                Twine(header.kernarg_segment_byte_size), false);
+    OutStreamer->emitRawComment("wavefront_sgpr_count = " +
+                                Twine(header.wavefront_sgpr_count), false);
+    OutStreamer->emitRawComment("workitem_vgpr_count = " +
+                                Twine(header.workitem_vgpr_count), false);
+    OutStreamer->emitRawComment("code_type = " + Twine(header.code_type), false);
+    OutStreamer->emitRawComment("wavefront_size = " +
+                                Twine((int)header.wavefront_size), false);
+    OutStreamer->emitRawComment("optimization_level = " +
+                                Twine(header.optimization_level), false);
+    OutStreamer->emitRawComment("hsail_profile = " +
+                                Twine(header.hsail_profile), false);
+    OutStreamer->emitRawComment("hsail_machine_model = " +
+                                Twine(header.hsail_machine_model), false);
+    OutStreamer->emitRawComment("hsail_version_major = " +
+                                Twine(header.hsail_version_major), false);
+    OutStreamer->emitRawComment("hsail_version_minor = " +
+                                Twine(header.hsail_version_minor), false);
+  }
+
+  OutStreamer->EmitBytes(StringRef((char*)&header, sizeof(header)));
+}
+
+bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode, raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0)
+      return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default:
+      // See if this is a generic print operand
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+    case 'r':
+      break;
+    }
   }
 
-  OutStreamer.EmitBytes(StringRef((char*)&header, sizeof(header)));
+  AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
+                   *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
+  return false;
 }
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
index b360ae8..1acff3a 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -85,7 +85,8 @@ private:
                           const SIProgramInfo &KernelInfo) const;
 
 public:
-  explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
+  explicit AMDGPUAsmPrinter(TargetMachine &TM,
+                            std::unique_ptr<MCStreamer> Streamer);
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -98,8 +99,11 @@ public:
 
   void EmitEndOfAsmFile(Module &M) override;
 
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &O) override;
+
 protected:
-  bool DisasmEnabled;
   std::vector<std::string> DisasmLines, HexLines;
   size_t DisasmLineMaxLen;
 };
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp
index 9e8302e..8175786 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.cpp
@@ -99,9 +99,8 @@ AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
   NumEntries = 0;
   return nullptr;
 }
-void
-AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
-}
+void AMDGPUFrameLowering::emitPrologue(MachineFunction &MF,
+                                       MachineBasicBlock &MBB) const {}
 void
 AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
                                   MachineBasicBlock &MBB) const {
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h
index 15a6636..9f31be1 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUFrameLowering.h
@@ -37,7 +37,7 @@ public:
   int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
   const SpillSlot *
     getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
-  void emitPrologue(MachineFunction &MF) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   bool hasFP(const MachineFunction &MF) const override;
 };
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 68d557a..df4461e 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -39,18 +39,17 @@ namespace {
 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
   // make the right decision when generating code for different targets.
-  const AMDGPUSubtarget &Subtarget;
+  const AMDGPUSubtarget *Subtarget;
 public:
   AMDGPUDAGToDAGISel(TargetMachine &TM);
   virtual ~AMDGPUDAGToDAGISel();
-
+  bool runOnMachineFunction(MachineFunction &MF) override;
   SDNode *Select(SDNode *N) override;
   const char *getPassName() const override;
   void PostprocessISelDAG() override;
 
 private:
   bool isInlineImmediate(SDNode *N) const;
-  inline SDValue getSmallIPtrImm(unsigned Imm);
   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
                    const R600InstrInfo *TII);
   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
@@ -79,6 +78,8 @@ private:
   bool isLocalLoad(const LoadSDNode *N) const;
   bool isRegionLoad(const LoadSDNode *N) const;
 
+  SDNode *glueCopyToM0(SDNode *N) const;
+
   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
@@ -95,9 +96,10 @@ private:
                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
                    SDValue &TFE) const;
   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
-                         SDValue &Offset) const;
+                         SDValue &SOffset, SDValue &Offset, SDValue &GLC,
+                         SDValue &SLC, SDValue &TFE) const;
   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
-                         SDValue &VAddr, SDValue &Offset,
+                         SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
                          SDValue &SLC) const;
   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
                           SDValue &SOffset, SDValue &ImmOffset) const;
@@ -120,6 +122,11 @@ private:
   SDNode *SelectADD_SUB_I64(SDNode *N);
   SDNode *SelectDIV_SCALE(SDNode *N);
 
+  SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+                   uint32_t Offset, uint32_t Width);
+  SDNode *SelectS_BFEFromShifts(SDNode *N);
+  SDNode *SelectS_BFE(SDNode *N);
+
   // Include the pieces autogenerated from the target description.
 #include "AMDGPUGenDAGISel.inc"
 };
@@ -132,7 +139,11 @@ FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
 }
 
 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
-  : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
+    : SelectionDAGISel(TM) {}
+
+bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
+  return SelectionDAGISel::runOnMachineFunction(MF);
 }
 
 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
@@ -156,7 +167,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
   switch (N->getMachineOpcode()) {
   default: {
     const MCInstrDesc &Desc =
-        TM.getSubtargetImpl()->getInstrInfo()->get(N->getMachineOpcode());
+        Subtarget->getInstrInfo()->get(N->getMachineOpcode());
     unsigned OpIdx = Desc.getNumDefs() + OpNo;
     if (OpIdx >= Desc.getNumOperands())
       return nullptr;
@@ -164,42 +175,38 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
     if (RegClass == -1)
       return nullptr;
 
-    return TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RegClass);
+    return Subtarget->getRegisterInfo()->getRegClass(RegClass);
   }
   case AMDGPU::REG_SEQUENCE: {
     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
     const TargetRegisterClass *SuperRC =
-        TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RCID);
+        Subtarget->getRegisterInfo()->getRegClass(RCID);
 
     SDValue SubRegOp = N->getOperand(OpNo + 1);
     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
-    return TM.getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg(
-        SuperRC, SubRegIdx);
+    return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
+                                                              SubRegIdx);
   }
   }
 }
 
-SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
-  return CurDAG->getTargetConstant(Imm, MVT::i32);
-}
-
 bool AMDGPUDAGToDAGISel::SelectADDRParam(
   SDValue Addr, SDValue& R1, SDValue& R2) {
 
   if (Addr.getOpcode() == ISD::FrameIndex) {
     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-      R2 = CurDAG->getTargetConstant(0, MVT::i32);
+      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
     } else {
       R1 = Addr;
-      R2 = CurDAG->getTargetConstant(0, MVT::i32);
+      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
     }
   } else if (Addr.getOpcode() == ISD::ADD) {
     R1 = Addr.getOperand(0);
     R2 = Addr.getOperand(1);
   } else {
     R1 = Addr;
-    R2 = CurDAG->getTargetConstant(0, MVT::i32);
+    R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
   }
   return true;
 }
@@ -222,21 +229,47 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
   if (Addr.getOpcode() == ISD::FrameIndex) {
     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
-      R2 = CurDAG->getTargetConstant(0, MVT::i64);
+      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
     } else {
       R1 = Addr;
-      R2 = CurDAG->getTargetConstant(0, MVT::i64);
+      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
     }
   } else if (Addr.getOpcode() == ISD::ADD) {
     R1 = Addr.getOperand(0);
     R2 = Addr.getOperand(1);
   } else {
     R1 = Addr;
-    R2 = CurDAG->getTargetConstant(0, MVT::i64);
+    R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
   }
   return true;
 }
 
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
+  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+      !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
+                 AMDGPUAS::LOCAL_ADDRESS))
+    return N;
+
+  const SITargetLowering& Lowering =
+      *static_cast<const SITargetLowering*>(getTargetLowering());
+
+  // Write max value to m0 before each load operation
+
+  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
+                                 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+
+  SDValue Glue = M0.getValue(1);
+
+  SmallVector <SDValue, 8> Ops;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+     Ops.push_back(N->getOperand(i));
+  }
+  Ops.push_back(Glue);
+  CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
+
+  return N;
+}
+
 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   unsigned int Opc = N->getOpcode();
   if (N->isMachineOpcode()) {
@@ -244,7 +277,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     return nullptr;   // Already selected.
   }
 
-  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+  if (isa<AtomicSDNode>(N))
+    N = glueCopyToM0(N);
+
   switch (Opc) {
   default: break;
   // We are selecting i64 ADD here instead of custom lower it during
@@ -253,7 +288,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   case ISD::ADD:
   case ISD::SUB: {
     if (N->getValueType(0) != MVT::i64 ||
-        ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+        Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
       break;
 
     return SelectADD_SUB_I64(N);
@@ -262,15 +297,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
   case ISD::BUILD_VECTOR: {
     unsigned RegClassID;
-    const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo *>(
-        TM.getSubtargetImpl()->getRegisterInfo());
-    const SIRegisterInfo *SIRI = static_cast<const SIRegisterInfo *>(
-        TM.getSubtargetImpl()->getRegisterInfo());
+    const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
     EVT VT = N->getValueType(0);
     unsigned NumVectorElts = VT.getVectorNumElements();
     EVT EltVT = VT.getVectorElementType();
     assert(EltVT.bitsEq(MVT::i32));
-    if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+    if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
       bool UseVReg = true;
       for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
                                                     U != E; ++U) {
@@ -281,7 +313,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
         if (!RC) {
           continue;
         }
-        if (SIRI->isSGPRClass(RC)) {
+        if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) {
           UseVReg = false;
         }
       }
@@ -320,7 +352,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
       }
     }
 
-    SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
+    SDLoc DL(N);
+    SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
 
     if (NumVectorElts == 1) {
       return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
@@ -334,18 +367,19 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     // 1 = Vector Register Class
     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
 
-    RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
+    RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
     bool IsRegSeq = true;
     unsigned NOps = N->getNumOperands();
     for (unsigned i = 0; i < NOps; i++) {
       // XXX: Why is this here?
-      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
+      if (isa<RegisterSDNode>(N->getOperand(i))) {
         IsRegSeq = false;
         break;
       }
       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
       RegSeqArgs[1 + (2 * i) + 1] =
-              CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
+              CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
+                                        MVT::i32);
     }
 
     if (NOps != NumVectorElts) {
@@ -353,11 +387,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
 
       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
-                                                     SDLoc(N), EltVT);
+                                                     DL, EltVT);
       for (unsigned i = NOps; i < NumVectorElts; ++i) {
         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
         RegSeqArgs[1 + (2 * i) + 1] =
-          CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
+          CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
       }
     }
 
@@ -368,30 +402,30 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   }
   case ISD::BUILD_PAIR: {
     SDValue RC, SubReg0, SubReg1;
-    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
       break;
     }
+    SDLoc DL(N);
     if (N->getValueType(0) == MVT::i128) {
-      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
-      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
-      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
+      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
+      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
+      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
     } else if (N->getValueType(0) == MVT::i64) {
-      RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
-      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
-      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
+      RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
+      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
     } else {
       llvm_unreachable("Unhandled value type for BUILD_PAIR");
     }
     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
                             N->getOperand(1), SubReg1 };
     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
-                                  SDLoc(N), N->getValueType(0), Ops);
+                                  DL, N->getValueType(0), Ops);
   }
 
   case ISD::Constant:
   case ISD::ConstantFP: {
-    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
-    if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
       break;
 
@@ -403,38 +437,46 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
       Imm = C->getZExtValue();
     }
 
-    SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
-                                CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32));
-    SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
-                                CurDAG->getConstant(Imm >> 32, MVT::i32));
+    SDLoc DL(N);
+    SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+                                CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
+                                                    MVT::i32));
+    SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+                                CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
     const SDValue Ops[] = {
-      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
-      SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
-      SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
+      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
+      SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+      SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
     };
 
-    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N),
+    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
                                   N->getValueType(0), Ops);
   }
 
   case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    SDLoc SL(N);
+    EVT VT = N->getValueType(0);
+
+    if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) {
+      N = glueCopyToM0(N);
+      break;
+    }
+
     // To simplify the TableGen patters, we replace all i64 loads with
     // v2i32 loads.  Alternatively, we could promote i64 loads to v2i32
     // during DAG legalization, however, so places (ExpandUnalignedLoad)
     // in the DAG legalizer assume that if i64 is legal, so doing this
     // promotion early can cause problems.
-    EVT VT = N->getValueType(0);
-    LoadSDNode *LD = cast<LoadSDNode>(N);
-    if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
-      break;
 
     SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
-                                     LD->getBasePtr(), LD->getMemOperand());
-    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
+                                      LD->getBasePtr(), LD->getMemOperand());
+    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
                                       MVT::i64, NewLoad);
     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
-    SelectCode(NewLoad.getNode());
+    SDNode *Load = glueCopyToM0(NewLoad.getNode());
+    SelectCode(Load);
     N = BitCast.getNode();
     break;
   }
@@ -443,63 +485,68 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     // Handle i64 stores here for the same reason mentioned above for loads.
     StoreSDNode *ST = cast<StoreSDNode>(N);
     SDValue Value = ST->getValue();
-    if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
-      break;
+    if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) {
 
-    SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
-                                      MVT::v2i32, Value);
-    SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
-                                        ST->getBasePtr(), ST->getMemOperand());
+      SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
+                                        MVT::v2i32, Value);
+      SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
+                                          ST->getBasePtr(), ST->getMemOperand());
 
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
+
+      if (NewValue.getOpcode() == ISD::BITCAST) {
+        Select(NewStore.getNode());
+        return SelectCode(NewValue.getNode());
+      }
 
-    if (NewValue.getOpcode() == ISD::BITCAST) {
-      Select(NewStore.getNode());
-      return SelectCode(NewValue.getNode());
+      // getNode() may fold the bitcast if its input was another bitcast.  If that
+      // happens we should only select the new store.
+      N = NewStore.getNode();
     }
 
-    // getNode() may fold the bitcast if its input was another bitcast.  If that
-    // happens we should only select the new store.
-    N = NewStore.getNode();
+    N = glueCopyToM0(N);
     break;
   }
 
   case AMDGPUISD::REGISTER_LOAD: {
-    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
       break;
     SDValue Addr, Offset;
 
+    SDLoc DL(N);
     SelectADDRIndirect(N->getOperand(1), Addr, Offset);
     const SDValue Ops[] = {
       Addr,
       Offset,
-      CurDAG->getTargetConstant(0, MVT::i32),
+      CurDAG->getTargetConstant(0, DL, MVT::i32),
       N->getOperand(0),
     };
-    return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
-                                  CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
+    return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL,
+                                  CurDAG->getVTList(MVT::i32, MVT::i64,
+                                                    MVT::Other),
                                   Ops);
   }
   case AMDGPUISD::REGISTER_STORE: {
-    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
       break;
     SDValue Addr, Offset;
     SelectADDRIndirect(N->getOperand(2), Addr, Offset);
+    SDLoc DL(N);
     const SDValue Ops[] = {
       N->getOperand(1),
       Addr,
       Offset,
-      CurDAG->getTargetConstant(0, MVT::i32),
+      CurDAG->getTargetConstant(0, DL, MVT::i32),
       N->getOperand(0),
     };
-    return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
+    return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL,
                                         CurDAG->getVTList(MVT::Other),
                                         Ops);
   }
 
   case AMDGPUISD::BFE_I32:
   case AMDGPUISD::BFE_U32: {
-    if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
       break;
 
     // There is a scalar version available, but unlike the vector version which
@@ -520,21 +567,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 
     bool Signed = Opc == AMDGPUISD::BFE_I32;
 
-    // Transformation function, pack the offset and width of a BFE into
-    // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
-    // source, bits [5:0] contain the offset and bits [22:16] the width.
-
     uint32_t OffsetVal = Offset->getZExtValue();
     uint32_t WidthVal = Width->getZExtValue();
 
-    uint32_t PackedVal = OffsetVal | WidthVal << 16;
-
-    SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32);
-    return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
-                                  SDLoc(N),
-                                  MVT::i32,
-                                  N->getOperand(0),
-                                  PackedOffsetWidth);
+    return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
+                    N->getOperand(0), OffsetVal, WidthVal);
 
   }
   case AMDGPUISD::DIV_SCALE: {
@@ -548,6 +585,14 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   }
   case ISD::ADDRSPACECAST:
     return SelectAddrSpaceCast(N);
+  case ISD::AND:
+  case ISD::SRL:
+  case ISD::SRA:
+    if (N->getValueType(0) != MVT::i32 ||
+        Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      break;
+
+    return SelectS_BFE(N);
   }
 
   return SelectCode(N);
@@ -604,13 +649,11 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
 }
 
 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
-  if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
-    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
-    if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
-        N->getMemoryVT().bitsLT(MVT::i32)) {
+  if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
+    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+        N->getMemoryVT().bitsLT(MVT::i32))
       return true;
-    }
-  }
+
   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
 }
 
@@ -681,7 +724,8 @@ const char *AMDGPUDAGToDAGISel::getPassName() const {
 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
                                                          SDValue& IntPtr) {
   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
-    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
+    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
+                                       true);
     return true;
   }
   return false;
@@ -691,7 +735,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
     SDValue& BaseReg, SDValue &Offset) {
   if (!isa<ConstantSDNode>(Addr)) {
     BaseReg = Addr;
-    Offset = CurDAG->getIntPtrConstant(0, true);
+    Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
     return true;
   }
   return false;
@@ -706,7 +750,8 @@ bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
       && isInt<16>(IMMOffset->getZExtValue())) {
 
       Base = Addr.getOperand(0);
-      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+                                         MVT::i32);
       return true;
   // If the pointer address is constant, we can move it to the offset field.
   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
@@ -714,30 +759,32 @@ bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
                                   SDLoc(CurDAG->getEntryNode()),
                                   AMDGPU::ZERO, MVT::i32);
-    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+                                       MVT::i32);
     return true;
   }
 
   // Default case, no offset
   Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
   return true;
 }
 
 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
                                             SDValue &Offset) {
   ConstantSDNode *C;
+  SDLoc DL(Addr);
 
   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
-    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
     Base = Addr.getOperand(0);
-    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
   } else {
     Base = Addr;
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
   }
 
   return true;
@@ -750,8 +797,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
 
   bool IsAdd = (N->getOpcode() == ISD::ADD);
 
-  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
-  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
+  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
 
   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                        DL, MVT::i32, LHS, Sub0);
@@ -777,7 +824,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
                              SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
 
   SDValue Args[5] = {
-    CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+    CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
     SDValue(AddLo,0),
     Sub0,
     SDValue(AddHi,0),
@@ -808,12 +855,11 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
 
 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
                                          unsigned OffsetBits) const {
-  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
       (OffsetBits == 8 && !isUInt<8>(Offset)))
     return false;
 
-  if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
+  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
     return true;
 
   // On Southern Islands instruction with a negative base value and an offset
@@ -835,15 +881,17 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
     }
   }
 
+  SDLoc DL(Addr);
+
   // If we have a constant address, prefer to put the constant into the
   // offset. This can save moves to load the constant address since multiple
   // operations can share the zero base address register, and enables merging
   // into read2 / write2 instructions.
   if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
     if (isUInt<16>(CAddr->getZExtValue())) {
-      SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
-                                 SDLoc(Addr), MVT::i32, Zero);
+                                 DL, MVT::i32, Zero);
       Base = SDValue(MovZero, 0);
       Offset = Addr;
       return true;
@@ -852,13 +900,15 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
 
   // default case
   Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i16);
+  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
   return true;
 }
 
 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
                                                    SDValue &Offset0,
                                                    SDValue &Offset1) const {
+  SDLoc DL(Addr);
+
   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     SDValue N0 = Addr.getOperand(0);
     SDValue N1 = Addr.getOperand(1);
@@ -868,8 +918,8 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
     // (add n0, c0)
     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
       Base = N0;
-      Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
-      Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
+      Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
+      Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
       return true;
     }
   }
@@ -880,21 +930,21 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
     assert(4 * DWordOffset0 == CAddr->getZExtValue());
 
     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
-      SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
       MachineSDNode *MovZero
         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
-                                 SDLoc(Addr), MVT::i32, Zero);
+                                 DL, MVT::i32, Zero);
       Base = SDValue(MovZero, 0);
-      Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
-      Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
+      Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
+      Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
       return true;
     }
   }
 
   // default case
   Base = Addr;
-  Offset0 = CurDAG->getTargetConstant(0, MVT::i8);
-  Offset1 = CurDAG->getTargetConstant(1, MVT::i8);
+  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
+  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
   return true;
 }
 
@@ -910,62 +960,70 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
                                      SDValue &TFE) const {
   SDLoc DL(Addr);
 
-  GLC = CurDAG->getTargetConstant(0, MVT::i1);
-  SLC = CurDAG->getTargetConstant(0, MVT::i1);
-  TFE = CurDAG->getTargetConstant(0, MVT::i1);
+  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
 
-  Idxen = CurDAG->getTargetConstant(0, MVT::i1);
-  Offen = CurDAG->getTargetConstant(0, MVT::i1);
-  Addr64 = CurDAG->getTargetConstant(0, MVT::i1);
-  SOffset = CurDAG->getTargetConstant(0, MVT::i32);
+  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
+  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
+  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
+  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
 
   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     SDValue N0 = Addr.getOperand(0);
     SDValue N1 = Addr.getOperand(1);
     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
 
-    if (isLegalMUBUFImmOffset(C1)) {
-
-      if (N0.getOpcode() == ISD::ADD) {
-        // (add (add N2, N3), C1) -> addr64
-        SDValue N2 = N0.getOperand(0);
-        SDValue N3 = N0.getOperand(1);
-        Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
-        Ptr = N2;
-        VAddr = N3;
-        Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
-        return;
-      }
+    if (N0.getOpcode() == ISD::ADD) {
+      // (add (add N2, N3), C1) -> addr64
+      SDValue N2 = N0.getOperand(0);
+      SDValue N3 = N0.getOperand(1);
+      Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
+      Ptr = N2;
+      VAddr = N3;
+    } else {
 
       // (add N0, C1) -> offset
-      VAddr = CurDAG->getTargetConstant(0, MVT::i32);
+      VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
       Ptr = N0;
-      Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+    }
+
+    if (isLegalMUBUFImmOffset(C1)) {
+        Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
+        return;
+    } else if (isUInt<32>(C1->getZExtValue())) {
+      // Illegal offset, store it in soffset.
+      Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+      SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+                   CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
+                        0);
       return;
     }
   }
+
   if (Addr.getOpcode() == ISD::ADD) {
     // (add N0, N1) -> addr64
     SDValue N0 = Addr.getOperand(0);
     SDValue N1 = Addr.getOperand(1);
-    Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
+    Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
     Ptr = N0;
     VAddr = N1;
-    Offset = CurDAG->getTargetConstant(0, MVT::i16);
+    Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
     return;
   }
 
   // default case -> offset
-  VAddr = CurDAG->getTargetConstant(0, MVT::i32);
+  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
   Ptr = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i16);
+  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
 
 }
 
 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
-                                           SDValue &VAddr,
-                                           SDValue &Offset) const {
-  SDValue Ptr, SOffset, Offen, Idxen, Addr64, GLC, SLC, TFE;
+                                           SDValue &VAddr, SDValue &SOffset,
+                                           SDValue &Offset, SDValue &GLC,
+                                           SDValue &SLC, SDValue &TFE) const {
+  SDValue Ptr, Offen, Idxen, Addr64;
 
   SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
               GLC, SLC, TFE);
@@ -985,11 +1043,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
 }
 
 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
-                                           SDValue &VAddr, SDValue &Offset,
-                                           SDValue &SLC) const {
-  SLC = CurDAG->getTargetConstant(0, MVT::i1);
+                                           SDValue &VAddr, SDValue &SOffset,
+					   SDValue &Offset,
+					   SDValue &SLC) const {
+  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
+  SDValue GLC, TFE;
 
-  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, Offset);
+  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
 }
 
 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
@@ -999,7 +1059,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
   SDLoc DL(Addr);
   MachineFunction &MF = CurDAG->getMachineFunction();
   const SIRegisterInfo *TRI =
-      static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const SITargetLowering& Lowering =
     *static_cast<const SITargetLowering*>(getTargetLowering());
@@ -1017,11 +1077,11 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
       SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
 
   const SDValue RsrcOps[] = {
-      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
       ScratchRsrcDword0,
-      CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+      CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
       ScratchRsrcDword1,
-      CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
+      CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
   };
   SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
                                               MVT::v2i32, RsrcOps), 0);
@@ -1036,14 +1096,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
 
     if (isLegalMUBUFImmOffset(C1)) {
       VAddr = Addr.getOperand(0);
-      ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+      ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
       return true;
     }
   }
 
   // (node)
   VAddr = Addr;
-  ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
+  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
   return true;
 }
 
@@ -1053,7 +1113,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
                                            SDValue &TFE) const {
   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
   const SIInstrInfo *TII =
-    static_cast<const SIInstrInfo *>(Subtarget.getInstrInfo());
+    static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
 
   SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
               GLC, SLC, TFE);
@@ -1087,7 +1147,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
   AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
   SDLoc DL(N);
 
-  assert(Subtarget.hasFlatAddressSpace() &&
+  assert(Subtarget->hasFlatAddressSpace() &&
          "addrspacecast only supported with flat address space!");
 
   assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
@@ -1116,7 +1176,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
       DL,
       DestVT,
       Src,
-      CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32));
+      CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
   }
 
 
@@ -1125,25 +1185,115 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
 
     // FIXME: This is probably wrong, we should never be defining
     // a register class with both VGPRs and SGPRs
-    SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, MVT::i32);
+    SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL,
+                                           MVT::i32);
 
     const SDValue Ops[] = {
       RC,
       Src,
-      CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
-      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
-                                     CurDAG->getConstant(0, MVT::i32)), 0),
-      CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
+      CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+                                     CurDAG->getConstant(0, DL, MVT::i32)), 0),
+      CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
     };
 
     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
-                                  SDLoc(N), N->getValueType(0), Ops);
+                                  DL, N->getValueType(0), Ops);
   }
 
   assert(SrcSize == 64 && DestSize == 64);
   return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
 }
 
+SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+                                     uint32_t Offset, uint32_t Width) {
+  // Transformation function, pack the offset and width of a BFE into
+  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
+  // source, bits [5:0] contain the offset and bits [22:16] the width.
+  uint32_t PackedVal = Offset | (Width << 16);
+  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
+
+  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
+  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
+  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
+  // Predicate: 0 < b <= c < 32
+
+  const SDValue &Shl = N->getOperand(0);
+  ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+  if (B && C) {
+    uint32_t BVal = B->getZExtValue();
+    uint32_t CVal = C->getZExtValue();
+
+    if (0 < BVal && BVal <= CVal && CVal < 32) {
+      bool Signed = N->getOpcode() == ISD::SRA;
+      unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
+
+      return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
+                      CVal - BVal, 32 - CVal);
+    }
+  }
+  return SelectCode(N);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
+  switch (N->getOpcode()) {
+  case ISD::AND:
+    if (N->getOperand(0).getOpcode() == ISD::SRL) {
+      // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
+      // Predicate: isMask(mask)
+      const SDValue &Srl = N->getOperand(0);
+      ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
+      ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+      if (Shift && Mask) {
+        uint32_t ShiftVal = Shift->getZExtValue();
+        uint32_t MaskVal = Mask->getZExtValue();
+
+        if (isMask_32(MaskVal)) {
+          uint32_t WidthVal = countPopulation(MaskVal);
+
+          return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
+                          ShiftVal, WidthVal);
+        }
+      }
+    }
+    break;
+  case ISD::SRL:
+    if (N->getOperand(0).getOpcode() == ISD::AND) {
+      // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
+      // Predicate: isMask(mask >> b)
+      const SDValue &And = N->getOperand(0);
+      ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
+      ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
+
+      if (Shift && Mask) {
+        uint32_t ShiftVal = Shift->getZExtValue();
+        uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
+
+        if (isMask_32(MaskVal)) {
+          uint32_t WidthVal = countPopulation(MaskVal);
+
+          return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
+                          ShiftVal, WidthVal);
+        }
+      }
+    } else if (N->getOperand(0).getOpcode() == ISD::SHL)
+      return SelectS_BFEFromShifts(N);
+    break;
+  case ISD::SRA:
+    if (N->getOperand(0).getOpcode() == ISD::SHL)
+      return SelectS_BFEFromShifts(N);
+    break;
+  }
+
+  return SelectCode(N);
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
                                         SDValue &SrcMods) const {
 
@@ -1161,7 +1311,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
     Src = Src.getOperand(0);
   }
 
-  SrcMods = CurDAG->getTargetConstant(Mods, MVT::i32);
+  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
 
   return true;
 }
@@ -1169,9 +1319,10 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
                                          SDValue &SrcMods, SDValue &Clamp,
                                          SDValue &Omod) const {
+  SDLoc DL(In);
   // FIXME: Handle Clamp and Omod
-  Clamp = CurDAG->getTargetConstant(0, MVT::i32);
-  Omod = CurDAG->getTargetConstant(0, MVT::i32);
+  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
+  Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
 
   return SelectVOP3Mods(In, Src, SrcMods);
 }
@@ -1180,7 +1331,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
                                               SDValue &SrcMods,
                                               SDValue &Omod) const {
   // FIXME: Handle Omod
-  Omod = CurDAG->getTargetConstant(0, MVT::i32);
+  Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
 
   return SelectVOP3Mods(In, Src, SrcMods);
 }
@@ -1189,7 +1340,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
                                                    SDValue &SrcMods,
                                                    SDValue &Clamp,
                                                    SDValue &Omod) const {
-  Clamp = Omod = CurDAG->getTargetConstant(0, MVT::i32);
+  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
   return SelectVOP3Mods(In, Src, SrcMods);
 }
 
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
index b137053..d00ae78 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -102,11 +102,9 @@ EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) {
   return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
 }
 
-AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
-  TargetLowering(TM) {
-
-  Subtarget = &TM.getSubtarget<AMDGPUSubtarget>();
-
+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
+                                           const AMDGPUSubtarget &STI)
+    : TargetLowering(TM), Subtarget(&STI) {
   setOperationAction(ISD::Constant, MVT::i32, Legal);
   setOperationAction(ISD::Constant, MVT::i64, Legal);
   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
@@ -127,12 +125,23 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::FABS,   MVT::f32, Legal);
   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
-  setOperationAction(ISD::FROUND, MVT::f32, Legal);
   setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+  setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+  setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+
+  setOperationAction(ISD::FROUND, MVT::f32, Custom);
+  setOperationAction(ISD::FROUND, MVT::f64, Custom);
 
   setOperationAction(ISD::FREM, MVT::f32, Custom);
   setOperationAction(ISD::FREM, MVT::f64, Custom);
 
+  // v_mad_f32 does not support denormals according to some sources.
+  if (!Subtarget->hasFP32Denormals())
+    setOperationAction(ISD::FMAD, MVT::f32, Legal);
+
+  // Expand to fneg + fadd.
+  setOperationAction(ISD::FSUB, MVT::f64, Expand);
+
   // Lower floating point store/load to integer store/load to reduce the number
   // of patterns in tablegen.
   setOperationAction(ISD::STORE, MVT::f32, Promote);
@@ -384,6 +393,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   setTargetDAGCombine(ISD::SELECT_CC);
   setTargetDAGCombine(ISD::STORE);
 
+  setTargetDAGCombine(ISD::FADD);
+  setTargetDAGCombine(ISD::FSUB);
+
   setBooleanContents(ZeroOrNegativeOneBooleanContent);
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
@@ -497,6 +509,12 @@ bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
   return VT == MVT::f32 || VT == MVT::f64;
 }
 
+bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
+                                                         unsigned NumElem,
+                                                         unsigned AS) const {
+  return true;
+}
+
 bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
   // Truncate is just accessing a subregister.
   return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
@@ -601,6 +619,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
   case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
   case ISD::FRINT: return LowerFRINT(Op, DAG);
   case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
+  case ISD::FROUND: return LowerFROUND(Op, DAG);
   case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
   case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
   case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
@@ -660,14 +679,14 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
                                                        const SDValue &InitPtr,
                                                        SDValue Chain,
                                                        SelectionDAG &DAG) const {
-  const DataLayout *TD = getTargetMachine().getSubtargetImpl()->getDataLayout();
+  const DataLayout *TD = getDataLayout();
   SDLoc DL(InitPtr);
   Type *InitTy = Init->getType();
 
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Init)) {
     EVT VT = EVT::getEVT(InitTy);
     PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
-    return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr,
+    return DAG.getStore(Chain, DL, DAG.getConstant(*CI, DL, VT), InitPtr,
                         MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
                         TD->getPrefTypeAlignment(InitTy));
   }
@@ -675,7 +694,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
     EVT VT = EVT::getEVT(CFP->getType());
     PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
-    return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, VT), InitPtr,
+    return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, DL, VT), InitPtr,
                  MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
                  TD->getPrefTypeAlignment(CFP->getType()));
   }
@@ -687,7 +706,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
     SmallVector<SDValue, 8> Chains;
 
     for (unsigned I = 0, N = ST->getNumElements(); I != N; ++I) {
-      SDValue Offset = DAG.getConstant(SL->getElementOffset(I), PtrVT);
+      SDValue Offset = DAG.getConstant(SL->getElementOffset(I), DL, PtrVT);
       SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
 
       Constant *Elt = Init->getAggregateElement(I);
@@ -711,7 +730,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
     unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType());
     SmallVector<SDValue, 8> Chains;
     for (unsigned i = 0; i < NumElements; ++i) {
-      SDValue Offset = DAG.getConstant(i * EltSize, PtrVT);
+      SDValue Offset = DAG.getConstant(i * EltSize, DL, PtrVT);
       SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
 
       Constant *Elt = Init->getAggregateElement(i);
@@ -748,7 +767,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
                                                  SDValue Op,
                                                  SelectionDAG &DAG) const {
 
-  const DataLayout *TD = getTargetMachine().getSubtargetImpl()->getDataLayout();
+  const DataLayout *TD = getDataLayout();
   GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = G->getGlobal();
 
@@ -773,7 +792,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
       Offset = MFI->LocalMemoryObjects[GV];
     }
 
-    return DAG.getConstant(Offset, getPointerTy(AMDGPUAS::LOCAL_ADDRESS));
+    return DAG.getConstant(Offset, SDLoc(Op),
+                           getPointerTy(AMDGPUAS::LOCAL_ADDRESS));
   }
   case AMDGPUAS::CONSTANT_ADDRESS: {
     MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
@@ -849,14 +869,13 @@ SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
                                               SelectionDAG &DAG) const {
 
   MachineFunction &MF = DAG.getMachineFunction();
-  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
-      getTargetMachine().getSubtargetImpl()->getFrameLowering());
+  const AMDGPUFrameLowering *TFL = Subtarget->getFrameLowering();
 
   FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
 
   unsigned FrameIndex = FIN->getIndex();
   unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
-  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
+  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
                          Op.getValueType());
 }
 
@@ -931,9 +950,9 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
         SDValue Rsq = DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
         SDValue Tmp = DAG.getNode(ISD::FMINNUM, DL, VT, Rsq,
-                                  DAG.getConstantFP(Max, VT));
+                                  DAG.getConstantFP(Max, DL, VT));
         return DAG.getNode(ISD::FMAXNUM, DL, VT, Tmp,
-                           DAG.getConstantFP(Min, VT));
+                           DAG.getConstantFP(Min, DL, VT));
       } else {
         return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
       }
@@ -1028,8 +1047,8 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
                                                  SelectionDAG &DAG) const {
   SDLoc DL(Op);
   EVT VT = Op.getValueType();
-  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
-                                              Op.getOperand(1));
+  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+                            Op.getOperand(1));
 
   return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
 }
@@ -1041,7 +1060,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
   SDLoc DL(Op);
   EVT VT = Op.getValueType();
   SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
-                                DAG.getConstantFP(1.0f, MVT::f32),
+                                DAG.getConstantFP(1.0f, DL, MVT::f32),
                                 Op.getOperand(1));
   SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
                                                     Op.getOperand(3));
@@ -1189,7 +1208,7 @@ SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
 
   for (unsigned i = 0; i < NumElts; ++i) {
     SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
-                              DAG.getConstant(i * MemEltSize, PtrVT));
+                              DAG.getConstant(i * MemEltSize, SL, PtrVT));
 
     SDValue NewLoad
       = DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
@@ -1240,7 +1259,8 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
                      Load->isInvariant(), Load->getAlignment());
 
   SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
-                              DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));
+                              DAG.getConstant(LoMemVT.getStoreSize(), SL,
+                                              PtrVT));
 
   SDValue HiLoad
     = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT,
@@ -1280,18 +1300,18 @@ SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
   unsigned MemEltBits = MemEltVT.getSizeInBits();
   unsigned MemNumElements = MemVT.getVectorNumElements();
   unsigned PackedSize = MemVT.getStoreSizeInBits();
-  SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, MVT::i32);
+  SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, DL, MVT::i32);
 
   assert(Value.getValueType().getScalarSizeInBits() >= 32);
 
   SDValue PackedValue;
   for (unsigned i = 0; i < MemNumElements; ++i) {
     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
-                              DAG.getConstant(i, MVT::i32));
+                              DAG.getConstant(i, DL, MVT::i32));
     Elt = DAG.getZExtOrTrunc(Elt, DL, MVT::i32);
     Elt = DAG.getNode(ISD::AND, DL, MVT::i32, Elt, Mask); // getZeroExtendInReg
 
-    SDValue Shift = DAG.getConstant(MemEltBits * i, MVT::i32);
+    SDValue Shift = DAG.getConstant(MemEltBits * i, DL, MVT::i32);
     Elt = DAG.getNode(ISD::SHL, DL, MVT::i32, Elt, Shift);
 
     if (i == 0) {
@@ -1333,9 +1353,9 @@ SDValue AMDGPUTargetLowering::ScalarizeVectorStore(SDValue Op,
   for (unsigned i = 0, e = NumElts; i != e; ++i) {
     SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
                               Store->getValue(),
-                              DAG.getConstant(i, MVT::i32));
+                              DAG.getConstant(i, SL, MVT::i32));
 
-    SDValue Offset = DAG.getConstant(i * MemEltVT.getStoreSize(), PtrVT);
+    SDValue Offset = DAG.getConstant(i * MemEltVT.getStoreSize(), SL, PtrVT);
     SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Store->getBasePtr(), Offset);
     SDValue NewStore =
       DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
@@ -1374,7 +1394,8 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
 
   EVT PtrVT = BasePtr.getValueType();
   SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
-                              DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));
+                              DAG.getConstant(LoMemVT.getStoreSize(), SL,
+                                              PtrVT));
 
   MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
   SDValue LoStore
@@ -1430,22 +1451,34 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
       ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
     return SDValue();
 
+  // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
+  // register (2-)byte extract.
 
+  // Get Register holding the target.
   SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
-                            DAG.getConstant(2, MVT::i32));
+                            DAG.getConstant(2, DL, MVT::i32));
+  // Load the Register.
   SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
                             Load->getChain(), Ptr,
-                            DAG.getTargetConstant(0, MVT::i32),
+                            DAG.getTargetConstant(0, DL, MVT::i32),
                             Op.getOperand(2));
+
+  // Get offset within the register.
   SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
                                 Load->getBasePtr(),
-                                DAG.getConstant(0x3, MVT::i32));
+                                DAG.getConstant(0x3, DL, MVT::i32));
+
+  // Bit offset of target byte (byteIdx * 8).
   SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
-                                 DAG.getConstant(3, MVT::i32));
+                                 DAG.getConstant(3, DL, MVT::i32));
 
+  // Shift to the right.
   Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
 
+  // Eliminate the upper bits by setting them to ...
   EVT MemEltVT = MemVT.getScalarType();
+
+  // ... ones.
   if (ExtType == ISD::SEXTLOAD) {
     SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
 
@@ -1457,6 +1490,7 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getMergeValues(Ops, DL);
   }
 
+  // ... or zeros.
   SDValue Ops[] = {
     DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
     Load->getChain()
@@ -1491,15 +1525,16 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
     }
     SDValue BasePtr = Store->getBasePtr();
     SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
-                              DAG.getConstant(2, MVT::i32));
+                              DAG.getConstant(2, DL, MVT::i32));
     SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
-                              Chain, Ptr, DAG.getTargetConstant(0, MVT::i32));
+                              Chain, Ptr,
+                              DAG.getTargetConstant(0, DL, MVT::i32));
 
     SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
-                                  DAG.getConstant(0x3, MVT::i32));
+                                  DAG.getConstant(0x3, DL, MVT::i32));
 
     SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
-                                   DAG.getConstant(3, MVT::i32));
+                                   DAG.getConstant(3, DL, MVT::i32));
 
     SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
                                     Store->getValue());
@@ -1509,15 +1544,17 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
     SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
                                        MaskedValue, ShiftAmt);
 
-    SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(Mask, MVT::i32),
+    SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
+                                  DAG.getConstant(Mask, DL, MVT::i32),
                                   ShiftAmt);
     DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
-                          DAG.getConstant(0xffffffff, MVT::i32));
+                          DAG.getConstant(0xffffffff, DL, MVT::i32));
     Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
 
     SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
     return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
-                       Chain, Value, Ptr, DAG.getTargetConstant(0, MVT::i32));
+                       Chain, Value, Ptr,
+                       DAG.getTargetConstant(0, DL, MVT::i32));
   }
   return SDValue();
 }
@@ -1544,17 +1581,18 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
 
   unsigned BitSize = VT.getScalarType().getSizeInBits();
 
-  SDValue jq = DAG.getConstant(1, IntVT);
+  SDValue jq = DAG.getConstant(1, DL, IntVT);
 
   if (sign) {
     // char|short jq = ia ^ ib;
     jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
 
     // jq = jq >> (bitsize - 2)
-    jq = DAG.getNode(ISD::SRA, DL, VT, jq, DAG.getConstant(BitSize - 2, VT));
+    jq = DAG.getNode(ISD::SRA, DL, VT, jq,
+                     DAG.getConstant(BitSize - 2, DL, VT));
 
     // jq = jq | 0x1
-    jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, VT));
+    jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT));
 
     // jq = (int)jq
     jq = DAG.getSExtOrTrunc(jq, DL, IntVT);
@@ -1603,7 +1641,7 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
   SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
 
   // jq = (cv ? jq : 0);
-  jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, VT));
+  jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT));
 
   // dst = trunc/extend to legal type
   iq = sign ? DAG.getSExtOrTrunc(iq, DL, VT) : DAG.getZExtOrTrunc(iq, DL, VT);
@@ -1631,8 +1669,8 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
   EVT VT = Op.getValueType();
   EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
 
-  SDValue one = DAG.getConstant(1, HalfVT);
-  SDValue zero = DAG.getConstant(0, HalfVT);
+  SDValue one = DAG.getConstant(1, DL, HalfVT);
+  SDValue zero = DAG.getConstant(0, DL, HalfVT);
 
   //HiLo split
   SDValue LHS = Op.getOperand(0);
@@ -1643,12 +1681,26 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
   SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
   SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
 
+  if (VT == MVT::i64 &&
+    DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
+    DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
+
+    SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
+                              LHS_Lo, RHS_Lo);
+
+    SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(0), zero);
+    SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(1), zero);
+    Results.push_back(DIV);
+    Results.push_back(REM);
+    return;
+  }
+
   // Get Speculative values
   SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
   SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
 
-  SDValue REM_Hi = zero;
   SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
+  SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, zero);
 
   SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
   SDValue DIV_Lo = zero;
@@ -1656,42 +1708,28 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
   const unsigned halfBitWidth = HalfVT.getSizeInBits();
 
   for (unsigned i = 0; i < halfBitWidth; ++i) {
-    SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
-    // Get Value of high bit
-    SDValue HBit;
-    if (halfBitWidth == 32 && Subtarget->hasBFE()) {
-      HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
-    } else {
-      HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
-      HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
-    }
-
-    SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
-      DAG.getConstant(halfBitWidth - 1, HalfVT));
-    REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
-    REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
-
-    REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
-    REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
-
-
-    SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
-
-    SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
+    const unsigned bitPos = halfBitWidth - i - 1;
+    SDValue POS = DAG.getConstant(bitPos, DL, HalfVT);
+    // Get value of high bit
+    SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
+    HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
+    HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);
+
+    // Shift
+    REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT));
+    // Add LHS high bit
+    REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit);
+
+    SDValue BIT = DAG.getConstant(1 << bitPos, DL, HalfVT);
     SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETUGE);
 
     DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
 
     // Update REM
-
     SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
-
     REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
-    REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
-    REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
   }
 
-  SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
   SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
   Results.push_back(DIV);
   Results.push_back(REM);
@@ -1712,8 +1750,8 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
   SDValue Den = Op.getOperand(1);
 
   if (VT == MVT::i32) {
-    if (DAG.MaskedValueIsZero(Op.getOperand(0), APInt(32, 0xff << 24)) &&
-        DAG.MaskedValueIsZero(Op.getOperand(1), APInt(32, 0xff << 24))) {
+    if (DAG.MaskedValueIsZero(Num, APInt::getHighBitsSet(32, 8)) &&
+        DAG.MaskedValueIsZero(Den, APInt::getHighBitsSet(32, 8))) {
       // TODO: We technically could do this for i64, but shouldn't that just be
       // handled by something generally reducing 64-bit division on 32-bit
       // values to 32-bit?
@@ -1732,11 +1770,11 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
   SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
 
   // NEG_RCP_LO = -RCP_LO
-  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
                                                      RCP_LO);
 
   // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
-  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
                                            NEG_RCP_LO, RCP_LO,
                                            ISD::SETEQ);
   // Calculate the rounding error from the URECIP instruction
@@ -1750,7 +1788,7 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
   SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
 
   // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
-  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
                                      RCP_A_E, RCP_S_E,
                                      ISD::SETEQ);
   // Quotient = mulhu(Tmp0, Num)
@@ -1764,14 +1802,14 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
 
   // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
   SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
-                                                 DAG.getConstant(-1, VT),
-                                                 DAG.getConstant(0, VT),
+                                                 DAG.getConstant(-1, DL, VT),
+                                                 DAG.getConstant(0, DL, VT),
                                                  ISD::SETUGE);
   // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
   SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
                                                   Num_S_Remainder,
-                                                  DAG.getConstant(-1, VT),
-                                                  DAG.getConstant(0, VT),
+                                                  DAG.getConstant(-1, DL, VT),
+                                                  DAG.getConstant(0, DL, VT),
                                                   ISD::SETUGE);
   // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
   SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
@@ -1781,18 +1819,18 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
 
   // Quotient_A_One = Quotient + 1
   SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
-                                                         DAG.getConstant(1, VT));
+                                       DAG.getConstant(1, DL, VT));
 
   // Quotient_S_One = Quotient - 1
   SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
-                                                         DAG.getConstant(1, VT));
+                                       DAG.getConstant(1, DL, VT));
 
   // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
-  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
                                      Quotient, Quotient_A_One, ISD::SETEQ);
 
   // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
-  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
                             Quotient_S_One, Div, ISD::SETEQ);
 
   // Calculate Rem result:
@@ -1804,11 +1842,11 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
   SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
 
   // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
-  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
                                     Remainder, Remainder_S_Den, ISD::SETEQ);
 
   // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
-  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
                             Remainder_A_Den, Rem, ISD::SETEQ);
   SDValue Ops[2] = {
     Div,
@@ -1825,19 +1863,31 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
 
-  if (VT == MVT::i32) {
-    if (DAG.ComputeNumSignBits(Op.getOperand(0)) > 8 &&
-        DAG.ComputeNumSignBits(Op.getOperand(1)) > 8) {
-      // TODO: We technically could do this for i64, but shouldn't that just be
-      // handled by something generally reducing 64-bit division on 32-bit
-      // values to 32-bit?
-      return LowerDIVREM24(Op, DAG, true);
-    }
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue NegOne = DAG.getConstant(-1, DL, VT);
+
+  if (VT == MVT::i32 &&
+      DAG.ComputeNumSignBits(LHS) > 8 &&
+      DAG.ComputeNumSignBits(RHS) > 8) {
+    return LowerDIVREM24(Op, DAG, true);
+  }
+  if (VT == MVT::i64 &&
+      DAG.ComputeNumSignBits(LHS) > 32 &&
+      DAG.ComputeNumSignBits(RHS) > 32) {
+    EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
+
+    //HiLo split
+    SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
+    SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
+    SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
+                                 LHS_Lo, RHS_Lo);
+    SDValue Res[2] = {
+      DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
+      DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
+    };
+    return DAG.getMergeValues(Res, DL);
   }
 
-  SDValue Zero = DAG.getConstant(0, VT);
-  SDValue NegOne = DAG.getConstant(-1, VT);
-
   SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
   SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
   SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
@@ -1889,8 +1939,8 @@ SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
 
-  const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
-  const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
+  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
+  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
 
   EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
 
@@ -1902,14 +1952,28 @@ SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
 }
 
+static SDValue extractF64Exponent(SDValue Hi, SDLoc SL, SelectionDAG &DAG) {
+  const unsigned FractBits = 52;
+  const unsigned ExpBits = 11;
+
+  SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
+                                Hi,
+                                DAG.getConstant(FractBits - 32, SL, MVT::i32),
+                                DAG.getConstant(ExpBits, SL, MVT::i32));
+  SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
+                            DAG.getConstant(1023, SL, MVT::i32));
+
+  return Exp;
+}
+
 SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
   SDLoc SL(Op);
   SDValue Src = Op.getOperand(0);
 
   assert(Op.getValueType() == MVT::f64);
 
-  const SDValue Zero = DAG.getConstant(0, MVT::i32);
-  const SDValue One = DAG.getConstant(1, MVT::i32);
+  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
 
   SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
 
@@ -1917,19 +1981,12 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
   // exponent.
   SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
 
-  const unsigned FractBits = 52;
-  const unsigned ExpBits = 11;
+  SDValue Exp = extractF64Exponent(Hi, SL, DAG);
 
-  // Extract the exponent.
-  SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
-                                Hi,
-                                DAG.getConstant(FractBits - 32, MVT::i32),
-                                DAG.getConstant(ExpBits, MVT::i32));
-  SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
-                            DAG.getConstant(1023, MVT::i32));
+  const unsigned FractBits = 52;
 
   // Extract the sign bit.
-  const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, MVT::i32);
+  const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32);
   SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
 
   // Extend back to to 64-bits.
@@ -1939,7 +1996,7 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
   const SDValue FractMask
-    = DAG.getConstant((UINT64_C(1) << FractBits) - 1, MVT::i64);
+    = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);
 
   SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
   SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
@@ -1947,7 +2004,7 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
 
   EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
 
-  const SDValue FiftyOne = DAG.getConstant(FractBits - 1, MVT::i32);
+  const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);
 
   SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
   SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
@@ -1965,7 +2022,7 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
   assert(Op.getValueType() == MVT::f64);
 
   APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52");
-  SDValue C1 = DAG.getConstantFP(C1Val, MVT::f64);
+  SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
   SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
 
   SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
@@ -1974,7 +2031,7 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
   SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
 
   APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
-  SDValue C2 = DAG.getConstantFP(C2Val, MVT::f64);
+  SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
 
   EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
   SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
@@ -1989,6 +2046,101 @@ SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) con
   return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
 }
 
+// XXX - May require not supporting f32 denormals?
+SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue X = Op.getOperand(0);
+
+  SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
+
+  SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
+
+  SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
+
+  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f32);
+  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
+  const SDValue Half = DAG.getConstantFP(0.5, SL, MVT::f32);
+
+  SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, One, X);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
+
+  SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
+
+  SDValue Sel = DAG.getNode(ISD::SELECT, SL, MVT::f32, Cmp, SignOne, Zero);
+
+  return DAG.getNode(ISD::FADD, SL, MVT::f32, T, Sel);
+}
+
+SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue X = Op.getOperand(0);
+
+  SDValue L = DAG.getNode(ISD::BITCAST, SL, MVT::i64, X);
+
+  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
+  const SDValue NegOne = DAG.getConstant(-1, SL, MVT::i32);
+  const SDValue FiftyOne = DAG.getConstant(51, SL, MVT::i32);
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
+
+
+  SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
+
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC, One);
+
+  SDValue Exp = extractF64Exponent(Hi, SL, DAG);
+
+  const SDValue Mask = DAG.getConstant(INT64_C(0x000fffffffffffff), SL,
+                                       MVT::i64);
+
+  SDValue M = DAG.getNode(ISD::SRA, SL, MVT::i64, Mask, Exp);
+  SDValue D = DAG.getNode(ISD::SRA, SL, MVT::i64,
+                          DAG.getConstant(INT64_C(0x0008000000000000), SL,
+                                          MVT::i64),
+                          Exp);
+
+  SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, L, M);
+  SDValue Tmp1 = DAG.getSetCC(SL, SetCCVT,
+                              DAG.getConstant(0, SL, MVT::i64), Tmp0,
+                              ISD::SETNE);
+
+  SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, Tmp1,
+                             D, DAG.getConstant(0, SL, MVT::i64));
+  SDValue K = DAG.getNode(ISD::ADD, SL, MVT::i64, L, Tmp2);
+
+  K = DAG.getNode(ISD::AND, SL, MVT::i64, K, DAG.getNOT(SL, M, MVT::i64));
+  K = DAG.getNode(ISD::BITCAST, SL, MVT::f64, K);
+
+  SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
+  SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
+  SDValue ExpEqNegOne = DAG.getSetCC(SL, SetCCVT, NegOne, Exp, ISD::SETEQ);
+
+  SDValue Mag = DAG.getNode(ISD::SELECT, SL, MVT::f64,
+                            ExpEqNegOne,
+                            DAG.getConstantFP(1.0, SL, MVT::f64),
+                            DAG.getConstantFP(0.0, SL, MVT::f64));
+
+  SDValue S = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, Mag, X);
+
+  K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpLt0, S, K);
+  K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpGt51, X, K);
+
+  return K;
+}
+
+SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+
+  if (VT == MVT::f32)
+    return LowerFROUND32(Op, DAG);
+
+  if (VT == MVT::f64)
+    return LowerFROUND64(Op, DAG);
+
+  llvm_unreachable("unhandled type");
+}
+
 SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
   SDLoc SL(Op);
   SDValue Src = Op.getOperand(0);
@@ -1999,8 +2151,8 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
 
-  const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
-  const SDValue NegOne = DAG.getConstantFP(-1.0, MVT::f64);
+  const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
+  const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);
 
   EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
 
@@ -2020,9 +2172,9 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
   SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
 
   SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
-                           DAG.getConstant(0, MVT::i32));
+                           DAG.getConstant(0, SL, MVT::i32));
   SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
-                           DAG.getConstant(1, MVT::i32));
+                           DAG.getConstant(1, SL, MVT::i32));
 
   SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
                               SL, MVT::f64, Hi);
@@ -2030,7 +2182,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
   SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
 
   SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
-                              DAG.getConstant(32, MVT::i32));
+                              DAG.getConstant(32, SL, MVT::i32));
 
   return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
 }
@@ -2051,13 +2203,13 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
 
   // f32 uint_to_fp i64
   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
-                           DAG.getConstant(0, MVT::i32));
+                           DAG.getConstant(0, DL, MVT::i32));
   SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
-                           DAG.getConstant(1, MVT::i32));
+                           DAG.getConstant(1, DL, MVT::i32));
   SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
   FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
-                        DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
+                        DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
   return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
 }
 
@@ -2078,10 +2230,10 @@ SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
 
   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
 
-  SDValue K0
-    = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), MVT::f64);
-  SDValue K1
-    = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), MVT::f64);
+  SDValue K0 = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), SL,
+                                 MVT::f64);
+  SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
+                                 MVT::f64);
 
   SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
 
@@ -2180,14 +2332,14 @@ static void simplifyI24(SDValue Op, TargetLowering::DAGCombinerInfo &DCI) {
 
 template <typename IntTy>
 static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0,
-                               uint32_t Offset, uint32_t Width) {
+                               uint32_t Offset, uint32_t Width, SDLoc DL) {
   if (Width + Offset < 32) {
     uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
     IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
-    return DAG.getConstant(Result, MVT::i32);
+    return DAG.getConstant(Result, DL, MVT::i32);
   }
 
-  return DAG.getConstant(Src0 >> Offset, MVT::i32);
+  return DAG.getConstant(Src0 >> Offset, DL, MVT::i32);
 }
 
 static bool usesAllNormalStores(SDNode *LoadVal) {
@@ -2292,7 +2444,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SELECT: {
     SDValue Cond = N->getOperand(0);
     if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
-      SDLoc DL(N);
       EVT VT = N->getValueType(0);
       SDValue LHS = Cond.getOperand(0);
       SDValue RHS = Cond.getOperand(1);
@@ -2323,7 +2474,7 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
 
     uint32_t WidthVal = Width->getZExtValue() & 0x1f;
     if (WidthVal == 0)
-      return DAG.getConstant(0, MVT::i32);
+      return DAG.getConstant(0, DL, MVT::i32);
 
     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
     if (!Offset)
@@ -2362,17 +2513,19 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
         return constantFoldBFE<int32_t>(DAG,
                                         CVal->getSExtValue(),
                                         OffsetVal,
-                                        WidthVal);
+                                        WidthVal,
+                                        DL);
       }
 
       return constantFoldBFE<uint32_t>(DAG,
                                        CVal->getZExtValue(),
                                        OffsetVal,
-                                       WidthVal);
+                                       WidthVal,
+                                       DL);
     }
 
     if ((OffsetVal + WidthVal) >= 32) {
-      SDValue ShiftVal = DAG.getConstant(OffsetVal, MVT::i32);
+      SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32);
       return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
                          BitsFrom, ShiftVal);
     }
@@ -2476,8 +2629,8 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
 
 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default: return nullptr;
+  switch ((AMDGPUISD::NodeType)Opcode) {
+  case AMDGPUISD::FIRST_NUMBER: break;
   // AMDIL DAG nodes
   NODE_NAME_CASE(CALL);
   NODE_NAME_CASE(UMUL);
@@ -2488,7 +2641,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(DWORDADDR)
   NODE_NAME_CASE(FRACT)
   NODE_NAME_CASE(CLAMP)
-  NODE_NAME_CASE(MAD)
+  NODE_NAME_CASE(COS_HW)
+  NODE_NAME_CASE(SIN_HW)
   NODE_NAME_CASE(FMAX_LEGACY)
   NODE_NAME_CASE(SMAX)
   NODE_NAME_CASE(UMAX)
@@ -2513,6 +2667,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(LDEXP)
   NODE_NAME_CASE(FP_CLASS)
   NODE_NAME_CASE(DOT4)
+  NODE_NAME_CASE(CARRY)
+  NODE_NAME_CASE(BORROW)
   NODE_NAME_CASE(BFE_U32)
   NODE_NAME_CASE(BFE_I32)
   NODE_NAME_CASE(BFI)
@@ -2522,6 +2678,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(MUL_I24)
   NODE_NAME_CASE(MAD_U24)
   NODE_NAME_CASE(MAD_I24)
+  NODE_NAME_CASE(TEXTURE_FETCH)
   NODE_NAME_CASE(EXPORT)
   NODE_NAME_CASE(CONST_ADDRESS)
   NODE_NAME_CASE(REGISTER_LOAD)
@@ -2538,9 +2695,16 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(CVT_F32_UBYTE3)
   NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
   NODE_NAME_CASE(CONST_DATA_PTR)
+  case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
+  NODE_NAME_CASE(SENDMSG)
+  NODE_NAME_CASE(INTERP_MOV)
+  NODE_NAME_CASE(INTERP_P1)
+  NODE_NAME_CASE(INTERP_P2)
   NODE_NAME_CASE(STORE_MSKOR)
   NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
+  case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
   }
+  return nullptr;
 }
 
 SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
@@ -2638,6 +2802,12 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
                               KnownZero, KnownOne, DAG, Depth);
     break;
 
+  case AMDGPUISD::CARRY:
+  case AMDGPUISD::BORROW: {
+    KnownZero = APInt::getHighBitsSet(32, 31);
+    break;
+  }
+
   case AMDGPUISD::BFE_I32:
   case AMDGPUISD::BFE_U32: {
     ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2));
@@ -2680,6 +2850,10 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
     return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
   }
 
+  case AMDGPUISD::CARRY:
+  case AMDGPUISD::BORROW:
+    return 31;
+
   default:
     return 1;
   }
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
index 15f529c..c9f1981 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
@@ -43,12 +43,15 @@ private:
   /// \brief Split a vector store into multiple scalar stores.
   /// \returns The resulting chain.
 
-  SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
@@ -86,6 +89,7 @@ protected:
   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
   void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &Results) const;
@@ -106,7 +110,7 @@ protected:
                               const SmallVectorImpl<ISD::InputArg> &Ins) const;
 
 public:
-  AMDGPUTargetLowering(TargetMachine &TM);
+  AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
 
   bool isFAbsFree(EVT VT) const override;
   bool isFNegFree(EVT VT) const override;
@@ -129,6 +133,10 @@ public:
                              EVT ExtVT) const override;
 
   bool isLoadBitCastBeneficial(EVT, EVT) const override;
+
+  bool storeOfVectorConstantIsCheap(EVT MemVT,
+                                    unsigned NumElem,
+                                    unsigned AS) const override;
   bool isCheapToSpeculateCttz() const override;
   bool isCheapToSpeculateCtlz() const override;
 
@@ -203,7 +211,7 @@ public:
 
 namespace AMDGPUISD {
 
-enum {
+enum NodeType : unsigned {
   // AMDIL ISD Opcodes
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
   CALL,        // Function call based on a single integer
@@ -214,7 +222,6 @@ enum {
   DWORDADDR,
   FRACT,
   CLAMP,
-  MAD, // Multiply + add with same result as the separate operations.
 
   // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
   // Denormals handled on some parts.
@@ -247,6 +254,8 @@ enum {
   LDEXP,
   FP_CLASS,
   DOT4,
+  CARRY,
+  BORROW,
   BFE_U32, // Extract range of bits with zero extension to 32-bits.
   BFE_I32, // Extract range of bits with sign extension to 32-bits.
   BFI, // (src0 & src1) | (~src0 & src2)
@@ -283,6 +292,10 @@ enum {
   BUILD_VERTICAL_VECTOR,
   /// Pointer to the start of the shader's constant data.
   CONST_DATA_PTR,
+  SENDMSG,
+  INTERP_MOV,
+  INTERP_P1,
+  INTERP_P2,
   FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
   STORE_MSKOR,
   LOAD_CONSTANT,
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
index e34a7b7..f0f10ca 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
 void AMDGPUInstrInfo::anchor() {}
 
 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
-  : AMDGPUGenInstrInfo(-1,-1), RI(st), ST(st) { }
+    : AMDGPUGenInstrInfo(-1, -1), ST(st) {}
 
 const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
   return RI;
@@ -152,26 +152,22 @@ bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const
   return true;
 }
 
-
-MachineInstr *
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-                                      MachineInstr *MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
-                                      int FrameIndex) const {
+MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                                     MachineInstr *MI,
+                                                     ArrayRef<unsigned> Ops,
+                                                     int FrameIndex) const {
 // TODO: Implement this function
   return nullptr;
 }
-MachineInstr*
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-                                      MachineInstr *MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
-                                      MachineInstr *LoadMI) const {
+MachineInstr *
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                                       ArrayRef<unsigned> Ops,
+                                       MachineInstr *LoadMI) const {
   // TODO: Implement this function
   return nullptr;
 }
-bool
-AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
-                                     const SmallVectorImpl<unsigned> &Ops) const {
+bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+                                           ArrayRef<unsigned> Ops) const {
   // TODO: Implement this function
   return false;
 }
@@ -319,10 +315,7 @@ int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
     return -1;
   }
 
-  Offset = MF.getTarget()
-               .getSubtargetImpl()
-               ->getFrameLowering()
-               ->getFrameIndexOffset(MF, -1);
+  Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexOffset(MF, -1);
 
   return getIndirectIndexBegin(MF) + Offset;
 }
@@ -353,7 +346,7 @@ enum SISubtarget {
   VI = 1
 };
 
-enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
+static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
   switch (Gen) {
   default:
     return SI;
@@ -363,8 +356,8 @@ enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
 }
 
 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
-  int MCOp = AMDGPU::getMCOpcode(Opcode,
-                        AMDGPUSubtargetToSISubtarget(RI.ST.getGeneration()));
+  int MCOp = AMDGPU::getMCOpcode(
+      Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
 
   // -1 means that Opcode is already a native instruction.
   if (MCOp == -1)
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
index 202183c..07042b5 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h
@@ -85,14 +85,13 @@ public:
                             const TargetRegisterInfo *TRI) const override;
 
 protected:
-  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
-                                      MachineInstr *MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                                      ArrayRef<unsigned> Ops,
                                       int FrameIndex) const override;
-  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
-                                      MachineInstr *MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                                      ArrayRef<unsigned> Ops,
                                       MachineInstr *LoadMI) const override;
+
 public:
   /// \returns the smallest register index that will be accessed by an indirect
   /// read or write or -1 if indirect addressing is not used by this program.
@@ -103,7 +102,7 @@ public:
   int getIndirectIndexEnd(const MachineFunction &MF) const;
 
   bool canFoldMemoryOperand(const MachineInstr *MI,
-                           const SmallVectorImpl<unsigned> &Ops) const override;
+                            ArrayRef<unsigned> Ops) const override;
   bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                         unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
                         SmallVectorImpl<MachineInstr *> &NewMIs) const override;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td
index d657ad0..790f34c 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td
@@ -78,7 +78,6 @@ def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp,
 >;
 
 def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
-def AMDGPUmad : SDNode<"AMDGPUISD::MAD", SDTFPTernaryOp, []>;
 
 // out = max(a, b) a and b are signed ints
 def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
@@ -137,6 +136,13 @@ def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp,
   [/*SDNPCommutative, SDNPAssociative*/]
 >;
 
+// out = (src0 + src1 > 0xFFFFFFFF) ? 1 : 0
+def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>;
+
+// out = (src1 > src0) ? 1 : 0
+def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;
+
+
 def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
   SDTIntToFPOp, []>;
 def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",
@@ -213,6 +219,22 @@ def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp,
   []
 >;
 
+def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
+                    SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                    [SDNPHasChain, SDNPInGlue]>;
+
+def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
+                        SDTypeProfile<1, 3, [SDTCisFP<0>]>,
+                        [SDNPInGlue]>;
+
+def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1",
+                      SDTypeProfile<1, 3, [SDTCisFP<0>]>,
+                      [SDNPInGlue, SDNPOutGlue]>;
+
+def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
+                      SDTypeProfile<1, 4, [SDTCisFP<0>]>,
+                      [SDNPInGlue]>;
+
 //===----------------------------------------------------------------------===//
 // Flow Control Profile Types
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
index 34b1fc8..72cab39 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
@@ -23,8 +23,6 @@ class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instructio
   let Pattern = pattern;
   let Itinerary = NullALU;
 
-  let isCodeGenOnly = 1;
-
   let TSFlags{63} = isRegisterLoad;
   let TSFlags{62} = isRegisterStore;
 }
@@ -185,12 +183,15 @@ def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
     return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
 }]>;
 
-def az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
+                                              (ld_node node:$ptr), [{
   LoadSDNode *L = cast<LoadSDNode>(N);
   return L->getExtensionType() == ISD::ZEXTLOAD ||
          L->getExtensionType() == ISD::EXTLOAD;
 }]>;
 
+def az_extload : AZExtLoadBase <unindexedload>;
+
 def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
 }]>;
@@ -360,25 +361,29 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
 
 def mskor_global : PatFrag<(ops node:$val, node:$ptr),
                             (AMDGPUstore_mskor node:$val, node:$ptr), [{
-  return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
-}]>;
-
-
-def atomic_cmp_swap_32_local :
-  PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
-          (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
-  AtomicSDNode *AN = cast<AtomicSDNode>(N);
-  return AN->getMemoryVT() == MVT::i32 &&
-         AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+}]>;
+
+multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
+
+  def _32_local : PatFrag <
+    (ops node:$ptr, node:$cmp, node:$swap),
+    (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
+      AtomicSDNode *AN = cast<AtomicSDNode>(N);
+      return AN->getMemoryVT() == MVT::i32 &&
+             AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+  }]>;
+
+  def _64_local : PatFrag<
+    (ops node:$ptr, node:$cmp, node:$swap),
+    (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
+      AtomicSDNode *AN = cast<AtomicSDNode>(N);
+      return AN->getMemoryVT() == MVT::i64 &&
+             AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+  }]>;
+}
 
-def atomic_cmp_swap_64_local :
-  PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
-          (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
-  AtomicSDNode *AN = cast<AtomicSDNode>(N);
-  return AN->getMemoryVT() == MVT::i64 &&
-         AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
+defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>;
 
 def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
     return isFlatLoad(dyn_cast<LoadSDNode>(N));
@@ -391,7 +396,7 @@ def flat_store : PatFrag<(ops node:$val, node:$ptr),
 
 def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
                             (AMDGPUstore_mskor node:$val, node:$ptr), [{
-  return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
 }]>;
 
 class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
@@ -415,11 +420,6 @@ def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
 // Misc Pattern Fragments
 //===----------------------------------------------------------------------===//
 
-def fmad : PatFrag <
-  (ops node:$src0, node:$src1, node:$src2),
-  (fadd (fmul node:$src0, node:$src1), node:$src2)
->;
-
 class Constants {
 int TWO_PI = 0x40c90fdb;
 int PI = 0x40490fdb;
@@ -440,6 +440,11 @@ def FP_ONE : PatLeaf <
   [{return N->isExactlyValue(1.0);}]
 >;
 
+def FP_HALF : PatLeaf <
+  (fpimm),
+  [{return N->isExactlyValue(0.5);}]
+>;
+
 let isCodeGenOnly = 1, isPseudo = 1 in {
 
 let usesCustomInserter = 1  in {
@@ -580,22 +585,20 @@ class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
 
 // Bitfield extract patterns
 
-/*
-
-XXX: The BFE pattern is not working correctly because the XForm is not being
-applied.
+def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
+  return isMask_32(N->getZExtValue());
+}]>;
 
-def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
-def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
-                            SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>;
+def IMMPopCount : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
+                                   MVT::i32);
+}]>;
 
-class BFEPattern <Instruction BFE> : Pat <
-  (and (srl i32:$x, legalshift32:$y), bfemask:$z),
-  (BFE $x, $y, $z)
+class BFEPattern <Instruction BFE, Instruction MOV> : Pat <
+  (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
+  (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
 >;
 
-*/
-
 // rotr pattern
 class ROTRPattern <Instruction BIT_ALIGN> : Pat <
   (rotr i32:$src0, i32:$src1),
@@ -605,6 +608,20 @@ class ROTRPattern <Instruction BIT_ALIGN> : Pat <
 // 24-bit arithmetic patterns
 def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>;
 
+// Special conversion patterns
+
+def cvt_rpi_i32_f32 : PatFrag <
+  (ops node:$src),
+  (fp_to_sint (ffloor (fadd $src, FP_HALF))),
+  [{ (void) N; return TM.Options.NoNaNsFPMath; }]
+>;
+
+def cvt_flr_i32_f32 : PatFrag <
+  (ops node:$src),
+  (fp_to_sint (ffloor $src)),
+  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
+>;
+
 /*
 class UMUL24Pattern <Instruction UMUL24> : Pat <
   (mul U24:$x, U24:$y),
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
index 03aa32d..9565e3f 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -58,32 +58,32 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
     default:
       llvm_unreachable("unknown operand type");
     case MachineOperand::MO_Immediate:
-      MCOp = MCOperand::CreateImm(MO.getImm());
+      MCOp = MCOperand::createImm(MO.getImm());
       break;
     case MachineOperand::MO_Register:
-      MCOp = MCOperand::CreateReg(MO.getReg());
+      MCOp = MCOperand::createReg(MO.getReg());
       break;
     case MachineOperand::MO_MachineBasicBlock:
-      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+      MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
                                    MO.getMBB()->getSymbol(), Ctx));
       break;
     case MachineOperand::MO_GlobalAddress: {
       const GlobalValue *GV = MO.getGlobal();
-      MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(GV->getName()));
-      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(Sym, Ctx));
+      MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName()));
+      MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(Sym, Ctx));
       break;
     }
     case MachineOperand::MO_TargetIndex: {
       assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START);
-      MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
+      MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
       const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
-      MCOp = MCOperand::CreateExpr(Expr);
+      MCOp = MCOperand::createExpr(Expr);
       break;
     }
     case MachineOperand::MO_ExternalSymbol: {
-      MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(MO.getSymbolName()));
+      MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName()));
       const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
-      MCOp = MCOperand::CreateExpr(Expr);
+      MCOp = MCOperand::createExpr(Expr);
       break;
     }
     }
@@ -92,12 +92,12 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
 }
 
 void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  AMDGPUMCInstLower MCInstLowering(OutContext,
-                               MF->getTarget().getSubtarget<AMDGPUSubtarget>());
+  const AMDGPUSubtarget &STI = MF->getSubtarget<AMDGPUSubtarget>();
+  AMDGPUMCInstLower MCInstLowering(OutContext, STI);
 
 #ifdef _DEBUG
   StringRef Err;
-  if (!TM.getSubtargetImpl()->getInstrInfo()->verifyInstruction(MI, Err)) {
+  if (!STI.getInstrInfo()->verifyInstruction(MI, Err)) {
     errs() << "Warning: Illegal instruction detected: " << Err << "\n";
     MI->dump();
   }
@@ -113,28 +113,29 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   } else {
     MCInst TmpInst;
     MCInstLowering.lower(MI, TmpInst);
-    EmitToStreamer(OutStreamer, TmpInst);
+    EmitToStreamer(*OutStreamer, TmpInst);
 
-    if (DisasmEnabled) {
+    if (STI.dumpCode()) {
       // Disassemble instruction/operands to text.
       DisasmLines.resize(DisasmLines.size() + 1);
       std::string &DisasmLine = DisasmLines.back();
       raw_string_ostream DisasmStream(DisasmLine);
 
       AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(),
-                                    *TM.getSubtargetImpl()->getInstrInfo(),
-                                    *TM.getSubtargetImpl()->getRegisterInfo());
-      InstPrinter.printInst(&TmpInst, DisasmStream, StringRef());
+                                    *MF->getSubtarget().getInstrInfo(),
+                                    *MF->getSubtarget().getRegisterInfo());
+      InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(),
+                            MF->getSubtarget());
 
       // Disassemble instruction/operands to hex representation.
       SmallVector<MCFixup, 4> Fixups;
       SmallVector<char, 16> CodeBytes;
       raw_svector_ostream CodeStream(CodeBytes);
 
-      MCObjectStreamer &ObjStreamer = (MCObjectStreamer &)OutStreamer;
+      auto &ObjStreamer = static_cast<MCObjectStreamer&>(*OutStreamer);
       MCCodeEmitter &InstEmitter = ObjStreamer.getAssembler().getEmitter();
-      InstEmitter.EncodeInstruction(TmpInst, CodeStream, Fixups,
-                                    TM.getSubtarget<MCSubtargetInfo>());
+      InstEmitter.encodeInstruction(TmpInst, CodeStream, Fixups,
+                                    MF->getSubtarget<MCSubtargetInfo>());
       CodeStream.flush();
 
       HexLines.resize(HexLines.size() + 1);
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
index 0f3f9e2..21c7da6 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -15,9 +15,7 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
   LDSSize(0),
   ScratchSize(0),
   IsKernel(true) {
-  AttributeSet Set = MF.getFunction()->getAttributes();
-  Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
-                                 ShaderTypeAttribute);
+  Attribute A = MF.getFunction()->getFnAttribute(ShaderTypeAttribute);
 
   if (A.isStringAttribute()) {
     StringRef Str = A.getValueAsString();
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp
index b81fef4..4a65bfc 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -18,6 +18,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 #define DEBUG_TYPE "amdgpu-promote-alloca"
 
@@ -87,7 +88,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
           continue;
         if (Use->getParent()->getParent() == &F)
           LocalMemAvailable -=
-              Mod->getDataLayout()->getTypeAllocSize(GVTy->getElementType());
+              Mod->getDataLayout().getTypeAllocSize(GVTy->getElementType());
       }
     }
   }
@@ -276,8 +277,8 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
   // value from the reqd_work_group_size function attribute if it is
   // available.
   unsigned WorkGroupSize = 256;
-  int AllocaSize = WorkGroupSize *
-      Mod->getDataLayout()->getTypeAllocSize(AllocaTy);
+  int AllocaSize =
+      WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);
 
   if (AllocaSize > LocalMemAvailable) {
     DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
@@ -294,9 +295,9 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
   DEBUG(dbgs() << "Promoting alloca to local memory\n");
   LocalMemAvailable -= AllocaSize;
 
+  Type *GVTy = ArrayType::get(I.getAllocatedType(), 256);
   GlobalVariable *GV = new GlobalVariable(
-      *Mod, ArrayType::get(I.getAllocatedType(), 256), false,
-      GlobalValue::ExternalLinkage, 0, I.getName(), 0,
+      *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0,
       GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
 
   FunctionType *FTy = FunctionType::get(
@@ -315,12 +316,11 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
   Value *ReadTIDIGZ = Mod->getOrInsertFunction(
       "llvm.r600.read.tidig.z", FTy, AttrSet);
 
-
-  Value *TCntY = Builder.CreateCall(ReadLocalSizeY);
-  Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ);
-  Value *TIdX  = Builder.CreateCall(ReadTIDIGX);
-  Value *TIdY  = Builder.CreateCall(ReadTIDIGY);
-  Value *TIdZ  = Builder.CreateCall(ReadTIDIGZ);
+  Value *TCntY = Builder.CreateCall(ReadLocalSizeY, {});
+  Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ, {});
+  Value *TIdX = Builder.CreateCall(ReadTIDIGX, {});
+  Value *TIdY = Builder.CreateCall(ReadTIDIGY, {});
+  Value *TIdZ = Builder.CreateCall(ReadTIDIGZ, {});
 
   Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
   Tmp0 = Builder.CreateMul(Tmp0, TIdX);
@@ -332,7 +332,7 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
   Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
   Indices.push_back(TID);
 
-  Value *Offset = Builder.CreateGEP(GV, Indices);
+  Value *Offset = Builder.CreateGEP(GVTy, GV, Indices);
   I.mutateType(Offset->getType());
   I.replaceAllUsesWith(Offset);
   I.eraseFromParent();
@@ -365,8 +365,8 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
       Function *F = Call->getCalledFunction();
       FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
                                                 F->isVarArg());
-      Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType,
-                                             F->getAttributes());
+      Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
+                                             NewType, F->getAttributes());
       Function *NewF = cast<Function>(C);
       Call->setCalledFunction(NewF);
       continue;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp
index 57b054b..3ca0eca 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -17,10 +17,7 @@
 
 using namespace llvm;
 
-AMDGPURegisterInfo::AMDGPURegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPUGenRegisterInfo(0),
-  ST(st)
-  { }
+AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
 
 //===----------------------------------------------------------------------===//
 // Function handling callbacks - Functions are a seldom used feature of GPUS, so
diff --git a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h
index f27576a..cfd800b 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h
@@ -30,9 +30,8 @@ class TargetInstrInfo;
 
 struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
   static const MCPhysReg CalleeSavedReg;
-  const AMDGPUSubtarget &ST;
 
-  AMDGPURegisterInfo(const AMDGPUSubtarget &st);
+  AMDGPURegisterInfo();
 
   BitVector getReservedRegs(const MachineFunction &MF) const override {
     assert(!"Unimplemented");  return BitVector();
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
index 87cdb5f..5288866 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -31,22 +31,9 @@ using namespace llvm;
 #define GET_SUBTARGETINFO_CTOR
 #include "AMDGPUGenSubtargetInfo.inc"
 
-static std::string computeDataLayout(const AMDGPUSubtarget &ST) {
-  std::string Ret = "e-p:32:32";
-
-  if (ST.is64bit()) {
-    // 32-bit private, local, and region pointers. 64-bit global and constant.
-    Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
-  }
-
-  Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
-         "-v512:512-v1024:1024-v2048:2048-n32:64";
-
-  return Ret;
-}
-
 AMDGPUSubtarget &
-AMDGPUSubtarget::initializeSubtargetDependencies(StringRef GPU, StringRef FS) {
+AMDGPUSubtarget::initializeSubtargetDependencies(StringRef TT, StringRef GPU,
+                                                 StringRef FS) {
   // Determine default and user-specified characteristics
   // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
   // enabled, but some instructions do not respect them and they run at the
@@ -59,6 +46,9 @@ AMDGPUSubtarget::initializeSubtargetDependencies(StringRef GPU, StringRef FS) {
   SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
   FullFS += FS;
 
+  if (GPU == "" && Triple(TT).getArch() == Triple::amdgcn)
+    GPU = "SI";
+
   ParseSubtargetFeatures(GPU, FullFS);
 
   // FIXME: I don't think think Evergreen has any useful support for
@@ -76,23 +66,26 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
     : AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false),
       DumpCode(false), R600ALUInst(false), HasVertexCache(false),
       TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
-      FP64Denormals(false), FP32Denormals(false), CaymanISA(false),
-      FlatAddressSpace(false), EnableIRStructurizer(true),
-      EnablePromoteAlloca(false), EnableIfCvt(true),
-      EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
-      EnableVGPRSpilling(false),SGPRInitBug(false),
-      DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
+      FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),
+      CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
+      EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
+      WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
+      EnableVGPRSpilling(false), SGPRInitBug(false),
+      IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false),
+      LDSBankCount(0),
       FrameLowering(TargetFrameLowering::StackGrowsUp,
                     64 * 16, // Maximum stack alignment (long16)
                     0),
-      InstrItins(getInstrItineraryForCPU(GPU)),
-      TargetTriple(TT) {
+      InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
+
+  initializeSubtargetDependencies(TT, GPU, FS);
+
   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
     InstrInfo.reset(new R600InstrInfo(*this));
-    TLInfo.reset(new R600TargetLowering(TM));
+    TLInfo.reset(new R600TargetLowering(TM, *this));
   } else {
     InstrInfo.reset(new SIInstrInfo(*this));
-    TLInfo.reset(new SITargetLowering(TM));
+    TLInfo.reset(new SITargetLowering(TM, *this));
   }
 }
 
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
index eeb41d3..b262cdf 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
@@ -22,7 +22,6 @@
 #include "R600ISelLowering.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 
 #define GET_SUBTARGETINFO_HEADER
@@ -60,6 +59,7 @@ private:
   bool FP64;
   bool FP64Denormals;
   bool FP32Denormals;
+  bool FastFMAF32;
   bool CaymanISA;
   bool FlatAddressSpace;
   bool EnableIRStructurizer;
@@ -71,8 +71,13 @@ private:
   int LocalMemorySize;
   bool EnableVGPRSpilling;
   bool SGPRInitBug;
+  bool IsGCN;
+  bool GCN1Encoding;
+  bool GCN3Encoding;
+  bool CIInsts;
+  bool FeatureDisable;
+  int LDSBankCount;
 
-  const DataLayout DL;
   AMDGPUFrameLowering FrameLowering;
   std::unique_ptr<AMDGPUTargetLowering> TLInfo;
   std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
@@ -81,7 +86,8 @@ private:
 
 public:
   AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS, TargetMachine &TM);
-  AMDGPUSubtarget &initializeSubtargetDependencies(StringRef GPU, StringRef FS);
+  AMDGPUSubtarget &initializeSubtargetDependencies(StringRef TT, StringRef GPU,
+                                                   StringRef FS);
 
   const AMDGPUFrameLowering *getFrameLowering() const override {
     return &FrameLowering;
@@ -95,7 +101,6 @@ public:
   AMDGPUTargetLowering *getTargetLowering() const override {
     return TLInfo.get();
   }
-  const DataLayout *getDataLayout() const override { return &DL; }
   const InstrItineraryData *getInstrItineraryData() const override {
     return &InstrItins;
   }
@@ -134,6 +139,10 @@ public:
     return FP64Denormals;
   }
 
+  bool hasFastFMAF32() const {
+    return FastFMAF32;
+  }
+
   bool hasFlatAddressSpace() const {
     return FlatAddressSpace;
   }
@@ -177,6 +186,14 @@ public:
     return (getGeneration() >= EVERGREEN);
   }
 
+  bool hasCARRY() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasBORROW() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
   bool IsIRStructurizerEnabled() const {
     return EnableIRStructurizer;
   }
@@ -212,10 +229,14 @@ public:
     return SGPRInitBug;
   }
 
+  int getLDSBankCount() const {
+    return LDSBankCount;
+  }
+
   unsigned getAmdKernelCodeChipID() const;
 
   bool enableMachineScheduler() const override {
-    return getGeneration() <= NORTHERN_ISLANDS;
+    return true;
   }
 
   void overrideSchedPolicy(MachineSchedPolicy &Policy,
@@ -249,6 +270,10 @@ public:
     // FIXME: Not sure what this is for other subtagets.
     llvm_unreachable("do not know max waves per CU for this subtarget.");
   }
+
+  bool enableSubRegLiveness() const override {
+    return false;
+  }
 };
 
 } // End namespace llvm
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
index a1da717..44c2abd 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -15,6 +15,7 @@
 
 #include "AMDGPUTargetMachine.h"
 #include "AMDGPU.h"
+#include "AMDGPUTargetTransformInfo.h"
 #include "R600ISelLowering.h"
 #include "R600InstrInfo.h"
 #include "R600MachineScheduler.h"
@@ -27,7 +28,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_os_ostream.h"
 #include "llvm/Transforms/IPO.h"
@@ -38,7 +39,7 @@ using namespace llvm;
 
 extern "C" void LLVMInitializeR600Target() {
   // Register the target
-  RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
+  RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
   RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);
 }
 
@@ -50,14 +51,30 @@ static MachineSchedRegistry
 SchedCustomRegistry("r600", "Run R600's custom scheduler",
                     createR600MachineScheduler);
 
+static std::string computeDataLayout(StringRef TT) {
+  Triple Triple(TT);
+  std::string Ret = "e-p:32:32";
+
+  if (Triple.getArch() == Triple::amdgcn) {
+    // 32-bit private, local, and region pointers. 64-bit global and constant.
+    Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
+  }
+
+  Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
+         "-v512:512-v1024:1024-v2048:2048-n32:64";
+
+  return Ret;
+}
+
 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
                                          StringRef CPU, StringRef FS,
                                          TargetOptions Options, Reloc::Model RM,
                                          CodeModel::Model CM,
                                          CodeGenOpt::Level OptLevel)
-    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
-      TLOF(new TargetLoweringObjectFileELF()),
-      Subtarget(TT, CPU, FS, *this), IntrinsicInfo() {
+    : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
+                        OptLevel),
+      TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this),
+      IntrinsicInfo() {
   setRequiresStructuredCFG(true);
   initAsmInfo();
 }
@@ -66,10 +83,33 @@ AMDGPUTargetMachine::~AMDGPUTargetMachine() {
   delete TLOF;
 }
 
+//===----------------------------------------------------------------------===//
+// R600 Target Machine (R600 -> Cayman)
+//===----------------------------------------------------------------------===//
+
+R600TargetMachine::R600TargetMachine(const Target &T, StringRef TT, StringRef FS,
+                    StringRef CPU, TargetOptions Options, Reloc::Model RM,
+                    CodeModel::Model CM, CodeGenOpt::Level OL) :
+    AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { }
+
+
+//===----------------------------------------------------------------------===//
+// GCN Target Machine (SI+)
+//===----------------------------------------------------------------------===//
+
+GCNTargetMachine::GCNTargetMachine(const Target &T, StringRef TT, StringRef FS,
+                    StringRef CPU, TargetOptions Options, Reloc::Model RM,
+                    CodeModel::Model CM, CodeGenOpt::Level OL) :
+    AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { }
+
+//===----------------------------------------------------------------------===//
+// AMDGPU Pass Setup
+//===----------------------------------------------------------------------===//
+
 namespace {
 class AMDGPUPassConfig : public TargetPassConfig {
 public:
-  AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
+  AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
     : TargetPassConfig(TM, PM) {}
 
   AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
@@ -78,7 +118,7 @@ public:
 
   ScheduleDAGInstrs *
   createMachineScheduler(MachineSchedContext *C) const override {
-    const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+    const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
     if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
       return createR600MachineScheduler(C);
     return nullptr;
@@ -86,6 +126,25 @@ public:
 
   void addIRPasses() override;
   void addCodeGenPrepare() override;
+  virtual bool addPreISel() override;
+  virtual bool addInstSelector() override;
+};
+
+class R600PassConfig : public AMDGPUPassConfig {
+public:
+  R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
+    : AMDGPUPassConfig(TM, PM) { }
+
+  bool addPreISel() override;
+  void addPreRegAlloc() override;
+  void addPreSched2() override;
+  void addPreEmitPass() override;
+};
+
+class GCNPassConfig : public AMDGPUPassConfig {
+public:
+  GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
+    : AMDGPUPassConfig(TM, PM) { }
   bool addPreISel() override;
   bool addInstSelector() override;
   void addPreRegAlloc() override;
@@ -93,22 +152,12 @@ public:
   void addPreSched2() override;
   void addPreEmitPass() override;
 };
-} // End of anonymous namespace
-
-TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
-  return new AMDGPUPassConfig(this, PM);
-}
 
-//===----------------------------------------------------------------------===//
-// AMDGPU Analysis Pass Setup
-//===----------------------------------------------------------------------===//
+} // End of anonymous namespace
 
-void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-  // Add first the target-independent BasicTTI pass, then our AMDGPU pass. This
-  // allows the AMDGPU pass to delegate to the target independent layer when
-  // appropriate.
-  PM.add(createBasicTargetTransformInfoPass(this));
-  PM.add(createAMDGPUTargetTransformInfoPass(this));
+TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis(
+      [this](Function &F) { return TargetTransformInfo(AMDGPUTTIImpl(this)); });
 }
 
 void AMDGPUPassConfig::addIRPasses() {
@@ -125,108 +174,119 @@ void AMDGPUPassConfig::addIRPasses() {
 }
 
 void AMDGPUPassConfig::addCodeGenPrepare() {
-  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
   if (ST.isPromoteAllocaEnabled()) {
     addPass(createAMDGPUPromoteAlloca(ST));
     addPass(createSROAPass());
   }
-
   TargetPassConfig::addCodeGenPrepare();
 }
 
 bool
 AMDGPUPassConfig::addPreISel() {
-  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
   addPass(createFlattenCFGPass());
   if (ST.IsIRStructurizerEnabled())
     addPass(createStructurizeCFGPass());
-  if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-    addPass(createSinkingPass());
-    addPass(createSITypeRewriter());
-    addPass(createSIAnnotateControlFlowPass());
-  } else {
-    addPass(createR600TextureIntrinsicsReplacer());
-  }
   return false;
 }
 
 bool AMDGPUPassConfig::addInstSelector() {
-  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-
   addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+  return false;
+}
 
-  if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-    addPass(createSILowerI1CopiesPass());
-    addPass(createSIFixSGPRCopiesPass(*TM));
-    addPass(createSIFoldOperandsPass());
-  }
+//===----------------------------------------------------------------------===//
+// R600 Pass Setup
+//===----------------------------------------------------------------------===//
 
+bool R600PassConfig::addPreISel() {
+  AMDGPUPassConfig::addPreISel();
+  addPass(createR600TextureIntrinsicsReplacer());
   return false;
 }
 
-void AMDGPUPassConfig::addPreRegAlloc() {
-  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+void R600PassConfig::addPreRegAlloc() {
+  addPass(createR600VectorRegMerger(*TM));
+}
 
-  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
-    addPass(createR600VectorRegMerger(*TM));
-  } else {
-     if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
-      // Don't do this with no optimizations since it throws away debug info by
-      // merging nonadjacent loads.
+void R600PassConfig::addPreSched2() {
+  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
+  addPass(createR600EmitClauseMarkers(), false);
+  if (ST.isIfCvtEnabled())
+    addPass(&IfConverterID, false);
+  addPass(createR600ClauseMergePass(*TM), false);
+}
 
-      // This should be run after scheduling, but before register allocation. It
-      // also need extra copies to the address operand to be eliminated.
-      initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
-      insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
-    }
+void R600PassConfig::addPreEmitPass() {
+  addPass(createAMDGPUCFGStructurizerPass(), false);
+  addPass(createR600ExpandSpecialInstrsPass(*TM), false);
+  addPass(&FinalizeMachineBundlesID, false);
+  addPass(createR600Packetizer(*TM), false);
+  addPass(createR600ControlFlowFinalizer(*TM), false);
+}
 
-    addPass(createSIShrinkInstructionsPass(), false);
-    addPass(createSIFixSGPRLiveRangesPass(), false);
-  }
+TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new R600PassConfig(this, PM);
 }
 
-void AMDGPUPassConfig::addPostRegAlloc() {
-  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+//===----------------------------------------------------------------------===//
+// GCN Pass Setup
+//===----------------------------------------------------------------------===//
 
-  if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
-    addPass(createSIPrepareScratchRegs(), false);
-    addPass(createSIShrinkInstructionsPass(), false);
-  }
+bool GCNPassConfig::addPreISel() {
+  AMDGPUPassConfig::addPreISel();
+  addPass(createSinkingPass());
+  addPass(createSITypeRewriter());
+  addPass(createSIAnnotateControlFlowPass());
+  return false;
 }
 
-void AMDGPUPassConfig::addPreSched2() {
-  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+bool GCNPassConfig::addInstSelector() {
+  AMDGPUPassConfig::addInstSelector();
+  addPass(createSILowerI1CopiesPass());
+  addPass(createSIFixSGPRCopiesPass(*TM));
+  addPass(createSIFoldOperandsPass());
+  return false;
+}
 
-  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
-    addPass(createR600EmitClauseMarkers(), false);
-  if (ST.isIfCvtEnabled())
-    addPass(&IfConverterID, false);
-  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
-    addPass(createR600ClauseMergePass(*TM), false);
-  if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-    addPass(createSIInsertWaits(*TM), false);
+void GCNPassConfig::addPreRegAlloc() {
+  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
+
+  // This needs to be run directly before register allocation because
+  // earlier passes might recompute live intervals.
+  // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
+  if (getOptLevel() > CodeGenOpt::None) {
+    initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry());
+    insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
   }
-}
 
-void AMDGPUPassConfig::addPreEmitPass() {
-  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
-    addPass(createAMDGPUCFGStructurizerPass(), false);
-    addPass(createR600ExpandSpecialInstrsPass(*TM), false);
-    addPass(&FinalizeMachineBundlesID, false);
-    addPass(createR600Packetizer(*TM), false);
-    addPass(createR600ControlFlowFinalizer(*TM), false);
-  } else {
-    addPass(createSILowerControlFlowPass(*TM), false);
+  if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
+    // Don't do this with no optimizations since it throws away debug info by
+    // merging nonadjacent loads.
+
+    // This should be run after scheduling, but before register allocation. It
+    // also need extra copies to the address operand to be eliminated.
+    initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
+    insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
   }
+  addPass(createSIShrinkInstructionsPass(), false);
+  addPass(createSIFixSGPRLiveRangesPass(), false);
 }
 
+void GCNPassConfig::addPostRegAlloc() {
+  addPass(createSIPrepareScratchRegs(), false);
+  addPass(createSIShrinkInstructionsPass(), false);
+}
 
-//===----------------------------------------------------------------------===//
-// GCN Target Machine (SI+)
-//===----------------------------------------------------------------------===//
+void GCNPassConfig::addPreSched2() {
+  addPass(createSIInsertWaits(*TM), false);
+}
 
-GCNTargetMachine::GCNTargetMachine(const Target &T, StringRef TT, StringRef FS,
-                    StringRef CPU, TargetOptions Options, Reloc::Model RM,
-                    CodeModel::Model CM, CodeGenOpt::Level OL) :
-    AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { }
+void GCNPassConfig::addPreEmitPass() {
+  addPass(createSILowerControlFlowPass(*TM), false);
+}
+
+TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new GCNPassConfig(this, PM);
+}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h
index 66b3070..785c119 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h
@@ -29,6 +29,8 @@ namespace llvm {
 //===----------------------------------------------------------------------===//
 
 class AMDGPUTargetMachine : public LLVMTargetMachine {
+private:
+
 protected:
   TargetLoweringObjectFile *TLOF;
   AMDGPUSubtarget Subtarget;
@@ -39,22 +41,36 @@ public:
                       StringRef CPU, TargetOptions Options, Reloc::Model RM,
                       CodeModel::Model CM, CodeGenOpt::Level OL);
   ~AMDGPUTargetMachine();
-  const AMDGPUSubtarget *getSubtargetImpl() const override {
+
+  const AMDGPUSubtarget *getSubtargetImpl() const { return &Subtarget; }
+  const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override {
     return &Subtarget;
   }
   const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
     return &IntrinsicInfo;
   }
-  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+  TargetIRAnalysis getTargetIRAnalysis() override;
 
-  /// \brief Register R600 analysis passes with a pass manager.
-  void addAnalysisPasses(PassManagerBase &PM) override;
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF;
   }
 };
 
 //===----------------------------------------------------------------------===//
+// R600 Target Machine (R600 -> Cayman)
+//===----------------------------------------------------------------------===//
+
+class R600TargetMachine : public AMDGPUTargetMachine {
+
+public:
+  R600TargetMachine(const Target &T, StringRef TT, StringRef FS,
+                    StringRef CPU, TargetOptions Options, Reloc::Model RM,
+                    CodeModel::Model CM, CodeGenOpt::Level OL);
+
+  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+};
+
+//===----------------------------------------------------------------------===//
 // GCN Target Machine (SI+)
 //===----------------------------------------------------------------------===//
 
@@ -64,6 +80,8 @@ public:
   GCNTargetMachine(const Target &T, StringRef TT, StringRef FS,
                     StringRef CPU, TargetOptions Options, Reloc::Model RM,
                     CodeModel::Model CM, CodeGenOpt::Level OL);
+
+  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
 };
 
 } // End namespace llvm
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index 0bc62d0..6dacc74 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -15,11 +15,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPU.h"
-#include "AMDGPUTargetMachine.h"
+#include "AMDGPUTargetTransformInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
 #include "llvm/Target/TargetLowering.h"
@@ -27,78 +28,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "AMDGPUtti"
 
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeAMDGPUTTIPass(PassRegistry &);
-}
-
-namespace {
-
-class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo {
-  const AMDGPUTargetMachine *TM;
-  const AMDGPUSubtarget *ST;
-  const AMDGPUTargetLowering *TLI;
-
-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
-public:
-  AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
-    llvm_unreachable("This pass cannot be directly constructed");
-  }
-
-  AMDGPUTTI(const AMDGPUTargetMachine *TM)
-      : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
-        TLI(TM->getSubtargetImpl()->getTargetLowering()) {
-    initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
-  }
-
-  void initializePass() override { pushTTIStack(this); }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    TargetTransformInfo::getAnalysisUsage(AU);
-  }
-
-  /// Pass identification.
-  static char ID;
-
-  /// Provide necessary pointer adjustments for the two base classes.
-  void *getAdjustedAnalysisPointer(const void *ID) override {
-    if (ID == &TargetTransformInfo::ID)
-      return (TargetTransformInfo *)this;
-    return this;
-  }
-
-  bool hasBranchDivergence() const override;
-
-  void getUnrollingPreferences(const Function *F, Loop *L,
-                               UnrollingPreferences &UP) const override;
-
-  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override;
-
-  unsigned getNumberOfRegisters(bool Vector) const override;
-  unsigned getRegisterBitWidth(bool Vector) const override;
-  unsigned getMaxInterleaveFactor() const override;
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti",
-                   "AMDGPU Target Transform Info", true, true, false)
-char AMDGPUTTI::ID = 0;
-
-ImmutablePass *
-llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
-  return new AMDGPUTTI(TM);
-}
-
-bool AMDGPUTTI::hasBranchDivergence() const { return true; }
-
-void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L,
-                                        UnrollingPreferences &UP) const {
+void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
+                                            TTI::UnrollingPreferences &UP) {
   UP.Threshold = 300; // Twice the default.
   UP.MaxCount = UINT_MAX;
   UP.Partial = true;
@@ -106,13 +37,15 @@ void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L,
   // TODO: Do we want runtime unrolling?
 
   for (const BasicBlock *BB : L->getBlocks()) {
+    const DataLayout &DL = BB->getModule()->getDataLayout();
     for (const Instruction &I : *BB) {
       const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
       if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
         continue;
 
       const Value *Ptr = GEP->getPointerOperand();
-      const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
+      const AllocaInst *Alloca =
+          dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
       if (Alloca) {
         // We want to do whatever we can to limit the number of alloca
         // instructions that make it through to the code generator.  allocas
@@ -130,13 +63,7 @@ void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L,
   }
 }
 
-AMDGPUTTI::PopcntSupportKind
-AMDGPUTTI::getPopcntSupport(unsigned TyWidth) const {
-  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
-  return ST->hasBCNT(TyWidth) ? PSK_FastHardware : PSK_Software;
-}
-
-unsigned AMDGPUTTI::getNumberOfRegisters(bool Vec) const {
+unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {
   if (Vec)
     return 0;
 
@@ -147,11 +74,9 @@ unsigned AMDGPUTTI::getNumberOfRegisters(bool Vec) const {
   return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
 }
 
-unsigned AMDGPUTTI::getRegisterBitWidth(bool) const {
-  return 32;
-}
+unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool) { return 32; }
 
-unsigned AMDGPUTTI::getMaxInterleaveFactor() const {
+unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
   // Semi-arbitrary large amount.
   return 64;
 }
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h
new file mode 100644
index 0000000..791c84e
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h
@@ -0,0 +1,78 @@
+//===-- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// AMDGPU target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_R600_AMDGPUTARGETTRANSFORMINFO_H
+
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class AMDGPUTTIImpl : public BasicTTIImplBase<AMDGPUTTIImpl> {
+  typedef BasicTTIImplBase<AMDGPUTTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const AMDGPUSubtarget *ST;
+  const AMDGPUTargetLowering *TLI;
+
+  const AMDGPUSubtarget *getST() const { return ST; }
+  const AMDGPUTargetLowering *getTLI() const { return TLI; }
+
+public:
+  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM)
+      : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  AMDGPUTTIImpl(const AMDGPUTTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+  AMDGPUTTIImpl(AMDGPUTTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
+  AMDGPUTTIImpl &operator=(const AMDGPUTTIImpl &RHS) {
+    BaseT::operator=(static_cast<const BaseT &>(RHS));
+    ST = RHS.ST;
+    TLI = RHS.TLI;
+    return *this;
+  }
+  AMDGPUTTIImpl &operator=(AMDGPUTTIImpl &&RHS) {
+    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+    ST = std::move(RHS.ST);
+    TLI = std::move(RHS.TLI);
+    return *this;
+  }
+
+  bool hasBranchDivergence() { return true; }
+
+  void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
+    assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+    return ST->hasBCNT(TyWidth) ? TTI::PSK_FastHardware : TTI::PSK_Software;
+  }
+
+  unsigned getNumberOfRegisters(bool Vector);
+  unsigned getRegisterBitWidth(bool Vector);
+  unsigned getMaxInterleaveFactor(unsigned VF);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp b/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
index ee6e8ec..c9b25a1 100644
--- a/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -10,8 +10,8 @@
 
 #include "AMDGPU.h"
 #include "AMDGPUInstrInfo.h"
-#include "R600InstrInfo.h"
 #include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/SmallVector.h"
@@ -30,6 +30,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include <deque>
 
 using namespace llvm;
 
@@ -165,6 +166,7 @@ public:
     TRI = &TII->getRegisterInfo();
     DEBUG(MF.dump(););
     OrderedBlks.clear();
+    Visited.clear();
     FuncRep = &MF;
     MLI = &getAnalysis<MachineLoopInfo>();
     DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
@@ -621,7 +623,7 @@ DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
   for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
       ++It) {
     MachineInstr *instr = &(*It);
-    if (instr->getDebugLoc().isUnknown() == false)
+    if (instr->getDebugLoc())
       DL = instr->getDebugLoc();
   }
   return DL;
@@ -1075,21 +1077,19 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
 }
 
 int AMDGPUCFGStructurizer::loopendPatternMatch() {
-  std::vector<MachineLoop *> NestedLoops;
-  for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); It != E;
-       ++It)
-    for (MachineLoop *ML : depth_first(*It))
-      NestedLoops.push_back(ML);
+  std::deque<MachineLoop *> NestedLoops;
+  for (auto &It: *MLI)
+    for (MachineLoop *ML : depth_first(It))
+      NestedLoops.push_front(ML);
 
   if (NestedLoops.size() == 0)
     return 0;
 
-  // Process nested loop outside->inside, so "continue" to a outside loop won't
-  // be mistaken as "break" of the current loop.
+  // Process nested loop outside->inside (we did push_front),
+  // so "continue" to a outside loop won't be mistaken as "break"
+  // of the current loop.
   int Num = 0;
-  for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(),
-      E = NestedLoops.rend(); It != E; ++It) {
-    MachineLoop *ExaminedLoop = *It;
+  for (MachineLoop *ExaminedLoop : NestedLoops) {
     if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
       continue;
     DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
@@ -1611,7 +1611,7 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
 
     bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
 
-    if (UseContinueLogical == false) {
+    if (!UseContinueLogical) {
       int BranchOpcode =
           TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
           getBranchZeroOpcode(OldOpcode);
diff --git a/contrib/llvm/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
index 3b4ba1a..19bffd5 100644
--- a/contrib/llvm/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
@@ -27,77 +29,107 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
 
 using namespace llvm;
 
 namespace {
 
-class AMDGPUAsmParser : public MCTargetAsmParser {
-  MCSubtargetInfo &STI;
-  MCAsmParser &Parser;
-
-
-  /// @name Auto-generated Match Functions
-  /// {
-
-#define GET_ASSEMBLER_HEADER
-#include "AMDGPUGenAsmMatcher.inc"
-
-  /// }
-
-public:
-  AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
-               const MCInstrInfo &_MII,
-               const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
-    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
-  }
-  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
-  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
-                               OperandVector &Operands, MCStreamer &Out,
-                               uint64_t &ErrorInfo,
-                               bool MatchingInlineAsm) override;
-  bool ParseDirective(AsmToken DirectiveID) override;
-  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
-  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
-                        SMLoc NameLoc, OperandVector &Operands) override;
-
-  bool parseCnt(int64_t &IntVal);
-  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
-};
+struct OptionalOperand;
 
 class AMDGPUOperand : public MCParsedAsmOperand {
   enum KindTy {
     Token,
-    Immediate
+    Immediate,
+    Register,
+    Expression
   } Kind;
 
+  SMLoc StartLoc, EndLoc;
+
 public:
   AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
 
+  MCContext *Ctx;
+
+  enum ImmTy {
+    ImmTyNone,
+    ImmTyDSOffset0,
+    ImmTyDSOffset1,
+    ImmTyGDS,
+    ImmTyOffset,
+    ImmTyGLC,
+    ImmTySLC,
+    ImmTyTFE,
+    ImmTyClamp,
+    ImmTyOMod
+  };
+
   struct TokOp {
     const char *Data;
     unsigned Length;
   };
 
   struct ImmOp {
+    bool IsFPImm;
+    ImmTy Type;
     int64_t Val;
   };
 
+  struct RegOp {
+    unsigned RegNo;
+    int Modifiers;
+    const MCRegisterInfo *TRI;
+    bool IsForcedVOP3;
+  };
+
   union {
     TokOp Tok;
     ImmOp Imm;
+    RegOp Reg;
+    const MCExpr *Expr;
   };
 
   void addImmOperands(MCInst &Inst, unsigned N) const {
-    Inst.addOperand(MCOperand::CreateImm(getImm()));
-  }
-  void addRegOperands(MCInst &Inst, unsigned N) const {
-    llvm_unreachable("addRegOperands");
+    Inst.addOperand(MCOperand::createImm(getImm()));
   }
+
   StringRef getToken() const {
     return StringRef(Tok.Data, Tok.Length);
   }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::createReg(getReg()));
+  }
+
+  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
+    if (isReg())
+      addRegOperands(Inst, N);
+    else
+      addImmOperands(Inst, N);
+  }
+
+  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::createImm(
+        Reg.Modifiers == -1 ? 0 : Reg.Modifiers));
+    addRegOperands(Inst, N);
+  }
+
+  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
+    if (isImm())
+      addImmOperands(Inst, N);
+    else {
+      assert(isExpr());
+      Inst.addOperand(MCOperand::createExpr(Expr));
+    }
+  }
+
+  bool defaultTokenHasSuffix() const {
+    StringRef Token(Tok.Data, Tok.Length);
+
+    return Token.endswith("_e32") || Token.endswith("_e64");
+  }
+
   bool isToken() const override {
     return Kind == Token;
   }
@@ -106,52 +138,384 @@ public:
     return Kind == Immediate;
   }
 
+  bool isInlineImm() const {
+    float F = BitsToFloat(Imm.Val);
+    // TODO: Add 0.5pi for VI
+    return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
+           (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
+           F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
+  }
+
+  bool isDSOffset0() const {
+    assert(isImm());
+    return Imm.Type == ImmTyDSOffset0;
+  }
+
+  bool isDSOffset1() const {
+    assert(isImm());
+    return Imm.Type == ImmTyDSOffset1;
+  }
+
   int64_t getImm() const {
     return Imm.Val;
   }
 
+  enum ImmTy getImmTy() const {
+    assert(isImm());
+    return Imm.Type;
+  }
+
+  bool isRegKind() const {
+    return Kind == Register;
+  }
+
   bool isReg() const override {
-    return false;
+    return Kind == Register && Reg.Modifiers == -1;
+  }
+
+  bool isRegWithInputMods() const {
+    return Kind == Register && (Reg.IsForcedVOP3 || Reg.Modifiers != -1);
+  }
+
+  void setModifiers(unsigned Mods) {
+    assert(isReg());
+    Reg.Modifiers = Mods;
+  }
+
+  bool hasModifiers() const {
+    assert(isRegKind());
+    return Reg.Modifiers != -1;
   }
 
   unsigned getReg() const override {
-    return 0;
+    return Reg.RegNo;
+  }
+
+  bool isRegOrImm() const {
+    return isReg() || isImm();
+  }
+
+  bool isRegClass(unsigned RCID) const {
+    return Reg.TRI->getRegClass(RCID).contains(getReg());
+  }
+
+  bool isSCSrc32() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+  }
+
+  bool isSSrc32() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+  }
+
+  bool isSSrc64() const {
+    return isImm() || isInlineImm() ||
+           (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
+  }
+
+  bool isVCSrc32() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+  }
+
+  bool isVCSrc64() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+  }
+
+  bool isVSrc32() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+  }
+
+  bool isVSrc64() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
   }
 
   bool isMem() const override {
     return false;
   }
 
+  bool isExpr() const {
+    return Kind == Expression;
+  }
+
+  bool isSoppBrTarget() const {
+    return isExpr() || isImm();
+  }
+
   SMLoc getStartLoc() const override {
-    return SMLoc();
+    return StartLoc;
   }
 
   SMLoc getEndLoc() const override {
-    return SMLoc();
+    return EndLoc;
   }
 
   void print(raw_ostream &OS) const override { }
 
-  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) {
+  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
+                                                  enum ImmTy Type = ImmTyNone,
+                                                  bool IsFPImm = false) {
     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
     Op->Imm.Val = Val;
+    Op->Imm.IsFPImm = IsFPImm;
+    Op->Imm.Type = Type;
+    Op->StartLoc = Loc;
+    Op->EndLoc = Loc;
     return Op;
   }
 
-  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) {
+  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc,
+                                           bool HasExplicitEncodingSize = true) {
     auto Res = llvm::make_unique<AMDGPUOperand>(Token);
     Res->Tok.Data = Str.data();
     Res->Tok.Length = Str.size();
+    Res->StartLoc = Loc;
+    Res->EndLoc = Loc;
     return Res;
   }
 
+  static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
+                                                  SMLoc E,
+                                                  const MCRegisterInfo *TRI,
+                                                  bool ForceVOP3) {
+    auto Op = llvm::make_unique<AMDGPUOperand>(Register);
+    Op->Reg.RegNo = RegNo;
+    Op->Reg.TRI = TRI;
+    Op->Reg.Modifiers = -1;
+    Op->Reg.IsForcedVOP3 = ForceVOP3;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static std::unique_ptr<AMDGPUOperand> CreateExpr(const class MCExpr *Expr, SMLoc S) {
+    auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
+    Op->Expr = Expr;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  bool isDSOffset() const;
+  bool isDSOffset01() const;
   bool isSWaitCnt() const;
+  bool isMubufOffset() const;
+};
+
+class AMDGPUAsmParser : public MCTargetAsmParser {
+  MCSubtargetInfo &STI;
+  const MCInstrInfo &MII;
+  MCAsmParser &Parser;
+
+  unsigned ForcedEncodingSize;
+  /// @name Auto-generated Match Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "AMDGPUGenAsmMatcher.inc"
+
+  /// }
+
+public:
+  AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
+               const MCInstrInfo &MII,
+               const MCTargetOptions &Options)
+      : MCTargetAsmParser(), STI(STI), MII(MII), Parser(_Parser),
+        ForcedEncodingSize(0){
+
+    if (STI.getFeatureBits().none()) {
+      // Set default features.
+      STI.ToggleFeature("SOUTHERN_ISLANDS");
+    }
+
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+
+  unsigned getForcedEncodingSize() const {
+    return ForcedEncodingSize;
+  }
+
+  void setForcedEncodingSize(unsigned Size) {
+    ForcedEncodingSize = Size;
+  }
+
+  bool isForcedVOP3() const {
+    return ForcedEncodingSize == 64;
+  }
+
+  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               OperandVector &Operands, MCStreamer &Out,
+                               uint64_t &ErrorInfo,
+                               bool MatchingInlineAsm) override;
+  bool ParseDirective(AsmToken DirectiveID) override;
+  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
+  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                        SMLoc NameLoc, OperandVector &Operands) override;
+
+  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int,
+                                          int64_t Default = 0);
+  OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
+                                          OperandVector &Operands,
+                                          enum AMDGPUOperand::ImmTy ImmTy =
+                                                      AMDGPUOperand::ImmTyNone);
+  OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
+                                     enum AMDGPUOperand::ImmTy ImmTy =
+                                                      AMDGPUOperand::ImmTyNone);
+  OperandMatchResultTy parseOptionalOps(
+                                   const ArrayRef<OptionalOperand> &OptionalOps,
+                                   OperandVector &Operands);
+
+
+  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
+  void cvtDS(MCInst &Inst, const OperandVector &Operands);
+  OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands);
+
+  bool parseCnt(int64_t &IntVal);
+  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
+  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
+
+  void cvtMubuf(MCInst &Inst, const OperandVector &Operands);
+  OperandMatchResultTy parseOffset(OperandVector &Operands);
+  OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseGLC(OperandVector &Operands);
+  OperandMatchResultTy parseSLC(OperandVector &Operands);
+  OperandMatchResultTy parseTFE(OperandVector &Operands);
+
+  OperandMatchResultTy parseDMask(OperandVector &Operands);
+  OperandMatchResultTy parseUNorm(OperandVector &Operands);
+  OperandMatchResultTy parseR128(OperandVector &Operands);
+
+  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
+  OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands);
+};
+
+struct OptionalOperand {
+  const char *Name;
+  AMDGPUOperand::ImmTy Type;
+  bool IsBit;
+  int64_t Default;
+  bool (*ConvertResult)(int64_t&);
 };
 
 }
 
+static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
+  if (IsVgpr) {
+    switch (RegWidth) {
+      default: llvm_unreachable("Unknown register width");
+      case 1: return AMDGPU::VGPR_32RegClassID;
+      case 2: return AMDGPU::VReg_64RegClassID;
+      case 3: return AMDGPU::VReg_96RegClassID;
+      case 4: return AMDGPU::VReg_128RegClassID;
+      case 8: return AMDGPU::VReg_256RegClassID;
+      case 16: return AMDGPU::VReg_512RegClassID;
+    }
+  }
+
+  switch (RegWidth) {
+    default: llvm_unreachable("Unknown register width");
+    case 1: return AMDGPU::SGPR_32RegClassID;
+    case 2: return AMDGPU::SGPR_64RegClassID;
+    case 4: return AMDGPU::SReg_128RegClassID;
+    case 8: return AMDGPU::SReg_256RegClassID;
+    case 16: return AMDGPU::SReg_512RegClassID;
+  }
+}
+
+static unsigned getRegForName(const StringRef &RegName) {
+
+  return StringSwitch<unsigned>(RegName)
+    .Case("exec", AMDGPU::EXEC)
+    .Case("vcc", AMDGPU::VCC)
+    .Case("flat_scr", AMDGPU::FLAT_SCR)
+    .Case("m0", AMDGPU::M0)
+    .Case("scc", AMDGPU::SCC)
+    .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO)
+    .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI)
+    .Case("vcc_lo", AMDGPU::VCC_LO)
+    .Case("vcc_hi", AMDGPU::VCC_HI)
+    .Case("exec_lo", AMDGPU::EXEC_LO)
+    .Case("exec_hi", AMDGPU::EXEC_HI)
+    .Default(0);
+}
+
 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
-  return true;
+  const AsmToken Tok = Parser.getTok();
+  StartLoc = Tok.getLoc();
+  EndLoc = Tok.getEndLoc();
+  const StringRef &RegName = Tok.getString();
+  RegNo = getRegForName(RegName);
+
+  if (RegNo) {
+    Parser.Lex();
+    return false;
+  }
+
+  // Match vgprs and sgprs
+  if (RegName[0] != 's' && RegName[0] != 'v')
+    return true;
+
+  bool IsVgpr = RegName[0] == 'v';
+  unsigned RegWidth;
+  unsigned RegIndexInClass;
+  if (RegName.size() > 1) {
+    // We have a 32-bit register
+    RegWidth = 1;
+    if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
+      return true;
+    Parser.Lex();
+  } else {
+    // We have a register greater than 32-bits.
+
+    int64_t RegLo, RegHi;
+    Parser.Lex();
+    if (getLexer().isNot(AsmToken::LBrac))
+      return true;
+
+    Parser.Lex();
+    if (getParser().parseAbsoluteExpression(RegLo))
+      return true;
+
+    if (getLexer().isNot(AsmToken::Colon))
+      return true;
+
+    Parser.Lex();
+    if (getParser().parseAbsoluteExpression(RegHi))
+      return true;
+
+    if (getLexer().isNot(AsmToken::RBrac))
+      return true;
+
+    Parser.Lex();
+    RegWidth = (RegHi - RegLo) + 1;
+    if (IsVgpr) {
+      // VGPR registers aren't aligned.
+      RegIndexInClass = RegLo;
+    } else {
+      // SGPR registers are aligned.  Max alignment is 4 dwords.
+      RegIndexInClass = RegLo / std::min(RegWidth, 4u);
+    }
+  }
+
+  const MCRegisterInfo *TRC = getContext().getRegisterInfo();
+  unsigned RC = getRegClass(IsVgpr, RegWidth);
+  if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs())
+    return true;
+  RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
+  return false;
+}
+
+unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+
+  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+
+  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
+      (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
+    return Match_InvalidOperand;
+
+  return Match_Success;
 }
 
 
@@ -163,22 +527,52 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   MCInst Inst;
 
   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
-  case Match_Success:
-    Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst, STI);
-    return false;
-  case Match_MissingFeature:
-    return Error(IDLoc, "instruction use requires an option to be enabled");
-  case Match_MnemonicFail:
-    return Error(IDLoc, "unrecognized instruction mnemonic");
-  case Match_InvalidOperand: {
-    if (ErrorInfo != ~0ULL) {
-      if (ErrorInfo >= Operands.size())
-        return Error(IDLoc, "too few operands for instruction");
+    default: break;
+    case Match_Success:
+      Inst.setLoc(IDLoc);
+      Out.EmitInstruction(Inst, STI);
+      return false;
+    case Match_MissingFeature:
+      return Error(IDLoc, "instruction not supported on this GPU");
 
+    case Match_MnemonicFail:
+      return Error(IDLoc, "unrecognized instruction mnemonic");
+
+    case Match_InvalidOperand: {
+      SMLoc ErrorLoc = IDLoc;
+      if (ErrorInfo != ~0ULL) {
+        if (ErrorInfo >= Operands.size()) {
+          if (isForcedVOP3()) {
+            // If 64-bit encoding has been forced we can end up with no
+            // clamp or omod operands if none of the registers have modifiers,
+            // so we need to add these to the operand list.
+            AMDGPUOperand &LastOp =
+                ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
+            if (LastOp.isRegKind() ||
+               (LastOp.isImm() &&
+                LastOp.getImmTy() != AMDGPUOperand::ImmTyNone)) {
+              SMLoc S = Parser.getTok().getLoc();
+              Operands.push_back(AMDGPUOperand::CreateImm(0, S,
+                                 AMDGPUOperand::ImmTyClamp));
+              Operands.push_back(AMDGPUOperand::CreateImm(0, S,
+                                 AMDGPUOperand::ImmTyOMod));
+              bool Res = MatchAndEmitInstruction(IDLoc, Opcode, Operands,
+                                                 Out, ErrorInfo,
+                                                 MatchingInlineAsm);
+              if (!Res)
+                return Res;
+            }
+
+          }
+          return Error(IDLoc, "too few operands for instruction");
+        }
+
+        ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
+        if (ErrorLoc == SMLoc())
+          ErrorLoc = IDLoc;
+      }
+      return Error(ErrorLoc, "invalid operand for instruction");
     }
-    return Error(IDLoc, "invalid operand for instruction");
-  }
   }
   llvm_unreachable("Implement any new match types added!");
 }
@@ -187,6 +581,19 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   return true;
 }
 
+static bool operandsHaveModifiers(const OperandVector &Operands) {
+
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
+    if (Op.isRegKind() && Op.hasModifiers())
+      return true;
+    if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod ||
+                       Op.getImmTy() == AMDGPUOperand::ImmTyClamp))
+      return true;
+  }
+  return false;
+}
+
 AMDGPUAsmParser::OperandMatchResultTy
 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 
@@ -195,17 +602,105 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 
   // If we successfully parsed the operand or if there as an error parsing,
   // we are done.
-  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
+  //
+  // If we are parsing after we reach EndOfStatement then this means we
+  // are appending default values to the Operands list.  This is only done
+  // by custom parser, so we shouldn't continue on to the generic parsing.
+  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
+      getLexer().is(AsmToken::EndOfStatement))
     return ResTy;
 
+  bool Negate = false, Abs = false;
+  if (getLexer().getKind()== AsmToken::Minus) {
+    Parser.Lex();
+    Negate = true;
+  }
+
+  if (getLexer().getKind() == AsmToken::Pipe) {
+    Parser.Lex();
+    Abs = true;
+  }
+
   switch(getLexer().getKind()) {
     case AsmToken::Integer: {
+      SMLoc S = Parser.getTok().getLoc();
       int64_t IntVal;
       if (getParser().parseAbsoluteExpression(IntVal))
         return MatchOperand_ParseFail;
-      Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
+      APInt IntVal32(32, IntVal);
+      if (IntVal32.getSExtValue() != IntVal) {
+        Error(S, "invalid immediate: only 32-bit values are legal");
+        return MatchOperand_ParseFail;
+      }
+
+      IntVal = IntVal32.getSExtValue();
+      if (Negate)
+        IntVal *= -1;
+      Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
       return MatchOperand_Success;
     }
+    case AsmToken::Real: {
+      // FIXME: We should emit an error if a double precisions floating-point
+      // value is used.  I'm not sure the best way to detect this.
+      SMLoc S = Parser.getTok().getLoc();
+      int64_t IntVal;
+      if (getParser().parseAbsoluteExpression(IntVal))
+        return MatchOperand_ParseFail;
+
+      APFloat F((float)BitsToDouble(IntVal));
+      if (Negate)
+        F.changeSign();
+      Operands.push_back(
+          AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
+      return MatchOperand_Success;
+    }
+    case AsmToken::Identifier: {
+      SMLoc S, E;
+      unsigned RegNo;
+      if (!ParseRegister(RegNo, S, E)) {
+
+        bool HasModifiers = operandsHaveModifiers(Operands);
+        unsigned Modifiers = 0;
+
+        if (Negate)
+          Modifiers |= 0x1;
+
+        if (Abs) {
+          if (getLexer().getKind() != AsmToken::Pipe)
+            return MatchOperand_ParseFail;
+          Parser.Lex();
+          Modifiers |= 0x2;
+        }
+
+        if (Modifiers && !HasModifiers) {
+          // We are adding a modifier to src1 or src2 and previous sources
+          // don't have modifiers, so we need to go back and empty modifers
+          // for each previous source.
+          for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1;
+               --PrevRegIdx) {
+
+            AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]);
+            RegOp.setModifiers(0);
+          }
+        }
+
+
+        Operands.push_back(AMDGPUOperand::CreateReg(
+            RegNo, S, E, getContext().getRegisterInfo(),
+            isForcedVOP3()));
+
+        if (HasModifiers || Modifiers) {
+          AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]);
+          RegOp.setModifiers(Modifiers);
+
+        }
+     }  else {
+      Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(),
+                                                    S));
+      Parser.Lex();
+     }
+     return MatchOperand_Success;
+    }
     default:
       return MatchOperand_NoMatch;
   }
@@ -214,23 +709,283 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
                                        StringRef Name,
                                        SMLoc NameLoc, OperandVector &Operands) {
+
+  // Clear any forced encodings from the previous instruction.
+  setForcedEncodingSize(0);
+
+  if (Name.endswith("_e64"))
+    setForcedEncodingSize(64);
+  else if (Name.endswith("_e32"))
+    setForcedEncodingSize(32);
+
   // Add the instruction mnemonic
   Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
 
-  if (getLexer().is(AsmToken::EndOfStatement))
-    return false;
+  while (!getLexer().is(AsmToken::EndOfStatement)) {
+    AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
+
+    // Eat the comma or space if there is one.
+    if (getLexer().is(AsmToken::Comma))
+      Parser.Lex();
 
-  AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
-  switch (Res) {
-    case MatchOperand_Success: return false;
-    case MatchOperand_ParseFail: return Error(NameLoc,
-                                              "Failed parsing operand");
-    case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand");
+    switch (Res) {
+      case MatchOperand_Success: break;
+      case MatchOperand_ParseFail: return Error(getLexer().getLoc(),
+                                                "failed parsing operand.");
+      case MatchOperand_NoMatch: return Error(getLexer().getLoc(),
+                                              "not a valid operand.");
+    }
   }
-  return true;
+
+  // Once we reach end of statement, continue parsing so we can add default
+  // values for optional arguments.
+  AMDGPUAsmParser::OperandMatchResultTy Res;
+  while ((Res = parseOperand(Operands, Name)) != MatchOperand_NoMatch) {
+    if (Res != MatchOperand_Success)
+      return Error(getLexer().getLoc(), "failed parsing operand.");
+  }
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Utility functions
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int,
+                                    int64_t Default) {
+
+  // We are at the end of the statement, and this is a default argument, so
+  // use a default value.
+  if (getLexer().is(AsmToken::EndOfStatement)) {
+    Int = Default;
+    return MatchOperand_Success;
+  }
+
+  switch(getLexer().getKind()) {
+    default: return MatchOperand_NoMatch;
+    case AsmToken::Identifier: {
+      StringRef OffsetName = Parser.getTok().getString();
+      if (!OffsetName.equals(Prefix))
+        return MatchOperand_NoMatch;
+
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Colon))
+        return MatchOperand_ParseFail;
+
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Integer))
+        return MatchOperand_ParseFail;
+
+      if (getParser().parseAbsoluteExpression(Int))
+        return MatchOperand_ParseFail;
+      break;
+    }
+  }
+  return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
+                                    enum AMDGPUOperand::ImmTy ImmTy) {
+
+  SMLoc S = Parser.getTok().getLoc();
+  int64_t Offset = 0;
+
+  AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset);
+  if (Res != MatchOperand_Success)
+    return Res;
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy));
+  return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
+                               enum AMDGPUOperand::ImmTy ImmTy) {
+  int64_t Bit = 0;
+  SMLoc S = Parser.getTok().getLoc();
+
+  // We are at the end of the statement, and this is a default argument, so
+  // use a default value.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    switch(getLexer().getKind()) {
+      case AsmToken::Identifier: {
+        StringRef Tok = Parser.getTok().getString();
+        if (Tok == Name) {
+          Bit = 1;
+          Parser.Lex();
+        } else if (Tok.startswith("no") && Tok.endswith(Name)) {
+          Bit = 0;
+          Parser.Lex();
+        } else {
+          return MatchOperand_NoMatch;
+        }
+        break;
+      }
+      default:
+        return MatchOperand_NoMatch;
+    }
+  }
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy));
+  return MatchOperand_Success;
+}
+
+static bool operandsHasOptionalOp(const OperandVector &Operands,
+                                  const OptionalOperand &OOp) {
+  for (unsigned i = 0; i < Operands.size(); i++) {
+    const AMDGPUOperand &ParsedOp = ((const AMDGPUOperand &)*Operands[i]);
+    if ((ParsedOp.isImm() && ParsedOp.getImmTy() == OOp.Type) ||
+        (ParsedOp.isToken() && ParsedOp.getToken() == OOp.Name))
+      return true;
+
+  }
+  return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
+                                   OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  for (const OptionalOperand &Op : OptionalOps) {
+    if (operandsHasOptionalOp(Operands, Op))
+      continue;
+    AMDGPUAsmParser::OperandMatchResultTy Res;
+    int64_t Value;
+    if (Op.IsBit) {
+      Res = parseNamedBit(Op.Name, Operands, Op.Type);
+      if (Res == MatchOperand_NoMatch)
+        continue;
+      return Res;
+    }
+
+    Res = parseIntWithPrefix(Op.Name, Value, Op.Default);
+
+    if (Res == MatchOperand_NoMatch)
+      continue;
+
+    if (Res != MatchOperand_Success)
+      return Res;
+
+    if (Op.ConvertResult && !Op.ConvertResult(Value)) {
+      return MatchOperand_ParseFail;
+    }
+
+    Operands.push_back(AMDGPUOperand::CreateImm(Value, S, Op.Type));
+    return MatchOperand_Success;
+  }
+  return MatchOperand_NoMatch;
 }
 
 //===----------------------------------------------------------------------===//
+// ds
+//===----------------------------------------------------------------------===//
+
+static const OptionalOperand DSOptionalOps [] = {
+  {"offset",  AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
+  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
+};
+
+static const OptionalOperand DSOptionalOpsOff01 [] = {
+  {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr},
+  {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr},
+  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
+};
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(DSOptionalOps, Operands);
+}
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(DSOptionalOpsOff01, Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  AMDGPUAsmParser::OperandMatchResultTy Res =
+    parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
+  if (Res == MatchOperand_NoMatch) {
+    Operands.push_back(AMDGPUOperand::CreateImm(0, S,
+                       AMDGPUOperand::ImmTyOffset));
+    Res = MatchOperand_Success;
+  }
+  return Res;
+}
+
+bool AMDGPUOperand::isDSOffset() const {
+  return isImm() && isUInt<16>(getImm());
+}
+
+bool AMDGPUOperand::isDSOffset01() const {
+  return isImm() && isUInt<8>(getImm());
+}
+
+void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
+                                    const OperandVector &Operands) {
+
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      Op.addRegOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+  }
+
+  unsigned Offset0Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset0];
+  unsigned Offset1Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset1];
+  unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
+
+  ((AMDGPUOperand &)*Operands[Offset0Idx]).addImmOperands(Inst, 1); // offset0
+  ((AMDGPUOperand &)*Operands[Offset1Idx]).addImmOperands(Inst, 1); // offset1
+  ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
+  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
+}
+
+void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
+
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+  bool GDSOnly = false;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      Op.addRegOperands(Inst, 1);
+      continue;
+    }
+
+    if (Op.isToken() && Op.getToken() == "gds") {
+      GDSOnly = true;
+      continue;
+    }
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+  }
+
+  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
+  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); // offset
+
+  if (!GDSOnly) {
+    unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
+    ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
+  }
+  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
+}
+
+
+//===----------------------------------------------------------------------===//
 // s_waitcnt
 //===----------------------------------------------------------------------===//
 
@@ -284,6 +1039,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
   // expcnt  [6:4]
   // lgkmcnt [10:8]
   int64_t CntVal = 0x77f;
+  SMLoc S = Parser.getTok().getLoc();
 
   switch(getLexer().getKind()) {
     default: return MatchOperand_ParseFail;
@@ -300,7 +1056,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
       } while(getLexer().isNot(AsmToken::EndOfStatement));
       break;
   }
-  Operands.push_back(AMDGPUOperand::CreateImm(CntVal));
+  Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
   return MatchOperand_Success;
 }
 
@@ -308,6 +1064,245 @@ bool AMDGPUOperand::isSWaitCnt() const {
   return isImm();
 }
 
+//===----------------------------------------------------------------------===//
+// sopp branch targets
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+
+  switch (getLexer().getKind()) {
+    default: return MatchOperand_ParseFail;
+    case AsmToken::Integer: {
+      int64_t Imm;
+      if (getParser().parseAbsoluteExpression(Imm))
+        return MatchOperand_ParseFail;
+      Operands.push_back(AMDGPUOperand::CreateImm(Imm, S));
+      return MatchOperand_Success;
+    }
+
+    case AsmToken::Identifier:
+      Operands.push_back(AMDGPUOperand::CreateExpr(
+          MCSymbolRefExpr::Create(getContext().getOrCreateSymbol(
+                                  Parser.getTok().getString()), getContext()), S));
+      Parser.Lex();
+      return MatchOperand_Success;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// mubuf
+//===----------------------------------------------------------------------===//
+
+static const OptionalOperand MubufOptionalOps [] = {
+  {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
+  {"glc",    AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
+  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
+  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
+};
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(MubufOptionalOps, Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOffset(OperandVector &Operands) {
+  return parseIntWithPrefix("offset", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseGLC(OperandVector &Operands) {
+  return parseNamedBit("glc", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSLC(OperandVector &Operands) {
+  return parseNamedBit("slc", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseTFE(OperandVector &Operands) {
+  return parseNamedBit("tfe", Operands);
+}
+
+bool AMDGPUOperand::isMubufOffset() const {
+  return isImm() && isUInt<12>(getImm());
+}
+
+void AMDGPUAsmParser::cvtMubuf(MCInst &Inst,
+                               const OperandVector &Operands) {
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      Op.addRegOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle the case where soffset is an immediate
+    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
+      Op.addImmOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle tokens like 'offen' which are sometimes hard-coded into the
+    // asm string.  There are no MCInst operands for these.
+    if (Op.isToken()) {
+      continue;
+    }
+    assert(Op.isImm());
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+  }
+
+  assert(OptionalIdx.size() == 4);
+
+  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
+  unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
+  unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
+  unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
+
+  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1);
+  ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
+  ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
+  ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
+}
+
+//===----------------------------------------------------------------------===//
+// mimg
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDMask(OperandVector &Operands) {
+  return parseIntWithPrefix("dmask", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseUNorm(OperandVector &Operands) {
+  return parseNamedBit("unorm", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseR128(OperandVector &Operands) {
+  return parseNamedBit("r128", Operands);
+}
+
+//===----------------------------------------------------------------------===//
+// vop3
+//===----------------------------------------------------------------------===//
+
+static bool ConvertOmodMul(int64_t &Mul) {
+  if (Mul != 1 && Mul != 2 && Mul != 4)
+    return false;
+
+  Mul >>= 1;
+  return true;
+}
+
+static bool ConvertOmodDiv(int64_t &Div) {
+  if (Div == 1) {
+    Div = 0;
+    return true;
+  }
+
+  if (Div == 2) {
+    Div = 3;
+    return true;
+  }
+
+  return false;
+}
+
+static const OptionalOperand VOP3OptionalOps [] = {
+  {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr},
+  {"mul",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul},
+  {"div",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv},
+};
+
+static bool isVOP3(OperandVector &Operands) {
+  if (operandsHaveModifiers(Operands))
+    return true;
+
+  AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]);
+
+  if (DstOp.isReg() && DstOp.isRegClass(AMDGPU::SGPR_64RegClassID))
+    return true;
+
+  if (Operands.size() >= 5)
+    return true;
+
+  if (Operands.size() > 3) {
+    AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]);
+    if (Src1Op.getReg() && (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) ||
+                            Src1Op.isRegClass(AMDGPU::SReg_64RegClassID)))
+      return true;
+  }
+  return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
+
+  // The value returned by this function may change after parsing
+  // an operand so store the original value here.
+  bool HasModifiers = operandsHaveModifiers(Operands);
+
+  bool IsVOP3 = isVOP3(Operands);
+  if (HasModifiers || IsVOP3 ||
+      getLexer().isNot(AsmToken::EndOfStatement) ||
+      getForcedEncodingSize() == 64) {
+
+    AMDGPUAsmParser::OperandMatchResultTy Res =
+        parseOptionalOps(VOP3OptionalOps, Operands);
+
+    if (!HasModifiers && Res == MatchOperand_Success) {
+      // We have added a modifier operation, so we need to make sure all
+      // previous register operands have modifiers
+      for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
+        AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
+        if (Op.isReg())
+          Op.setModifiers(0);
+      }
+    }
+    return Res;
+  }
+  return MatchOperand_NoMatch;
+}
+
+void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
+  ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
+  unsigned i = 2;
+
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+  if (operandsHaveModifiers(Operands)) {
+    for (unsigned e = Operands.size(); i != e; ++i) {
+      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+      if (Op.isRegWithInputMods()) {
+        ((AMDGPUOperand &)*Operands[i]).addRegWithInputModsOperands(Inst, 2);
+        continue;
+      }
+      OptionalIdx[Op.getImmTy()] = i;
+    }
+
+    unsigned ClampIdx = OptionalIdx[AMDGPUOperand::ImmTyClamp];
+    unsigned OModIdx = OptionalIdx[AMDGPUOperand::ImmTyOMod];
+
+    ((AMDGPUOperand &)*Operands[ClampIdx]).addImmOperands(Inst, 1);
+    ((AMDGPUOperand &)*Operands[OModIdx]).addImmOperands(Inst, 1);
+  } else {
+    for (unsigned e = Operands.size(); i != e; ++i)
+      ((AMDGPUOperand &)*Operands[i]).addRegOrImmOperands(Inst, 1);
+  }
+}
+
 /// Force static initialization.
 extern "C" void LLVMInitializeR600AsmParser() {
   RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
diff --git a/contrib/llvm/lib/Target/R600/CIInstructions.td b/contrib/llvm/lib/Target/R600/CIInstructions.td
index fdb58bb..560aa78 100644
--- a/contrib/llvm/lib/Target/R600/CIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/CIInstructions.td
@@ -11,9 +11,9 @@
 
 
 def isCIVI : Predicate <
-  "Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
-  "Subtarget.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
->;
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
+  "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
+>, AssemblerPredicate<"FeatureCIInsts">;
 
 //===----------------------------------------------------------------------===//
 // VOP1 Instructions
diff --git a/contrib/llvm/lib/Target/R600/CaymanInstructions.td b/contrib/llvm/lib/Target/R600/CaymanInstructions.td
index 433c3fc..ba4df82 100644
--- a/contrib/llvm/lib/Target/R600/CaymanInstructions.td
+++ b/contrib/llvm/lib/Target/R600/CaymanInstructions.td
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-def isCayman : Predicate<"Subtarget.hasCaymanISA()">;
+def isCayman : Predicate<"Subtarget->hasCaymanISA()">;
 
 //===----------------------------------------------------------------------===//
 // Cayman Instructions
diff --git a/contrib/llvm/lib/Target/R600/EvergreenInstructions.td b/contrib/llvm/lib/Target/R600/EvergreenInstructions.td
index 299d1fa..7adcd46 100644
--- a/contrib/llvm/lib/Target/R600/EvergreenInstructions.td
+++ b/contrib/llvm/lib/Target/R600/EvergreenInstructions.td
@@ -14,14 +14,14 @@
 //===----------------------------------------------------------------------===//
 
 def isEG : Predicate<
-  "Subtarget.getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
-  "Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && "
-  "!Subtarget.hasCaymanISA()"
+  "Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
+  "Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && "
+  "!Subtarget->hasCaymanISA()"
 >;
 
 def isEGorCayman : Predicate<
-  "Subtarget.getGeneration() == AMDGPUSubtarget::EVERGREEN ||"
-  "Subtarget.getGeneration() ==AMDGPUSubtarget::NORTHERN_ISLANDS"
+  "Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||"
+  "Subtarget->getGeneration() ==AMDGPUSubtarget::NORTHERN_ISLANDS"
 >;
 
 //===----------------------------------------------------------------------===//
@@ -287,9 +287,8 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
   VecALU
 >;
 
-// XXX: This pattern is broken, disabling for now.  See comment in
-// AMDGPUInstructions.td for more info.
-//  def : BFEPattern <BFE_UINT_eg>;
+def : BFEPattern <BFE_UINT_eg, MOV_IMM_I32>;
+
 def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
   [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
   VecALU
@@ -336,6 +335,9 @@ defm CUBE_eg : CUBE_Common<0xC0>;
 
 def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
 
+def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>;
+def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>;
+
 def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>;
 def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>;
 
@@ -590,8 +592,6 @@ def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
 // SHA-256 Patterns
 def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
 
-def : FROUNDPat <CNDGE_eg, CNDGT_eg>;
-
 def EG_ExportSwz : ExportSwzInst {
   let Word1{19-16} = 0; // BURST_COUNT
   let Word1{20} = 0; // VALID_PIXEL_MODE
diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index b66ed10..279c3eb 100644
--- a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -20,7 +20,7 @@
 using namespace llvm;
 
 void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                             StringRef Annot) {
+                                  StringRef Annot, const MCSubtargetInfo &STI) {
   OS.flush();
   printInstruction(MI, OS);
 
@@ -89,14 +89,24 @@ void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
 
 void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
                                         raw_ostream &O) {
-  O << " offset0:";
-  printU8ImmDecOperand(MI, OpNo, O);
+  if (MI->getOperand(OpNo).getImm()) {
+    O << " offset0:";
+    printU8ImmDecOperand(MI, OpNo, O);
+  }
 }
 
 void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
                                         raw_ostream &O) {
-  O << " offset1:";
-  printU8ImmDecOperand(MI, OpNo, O);
+  if (MI->getOperand(OpNo).getImm()) {
+    O << " offset1:";
+    printU8ImmDecOperand(MI, OpNo, O);
+  }
+}
+
+void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " gds";
 }
 
 void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
@@ -117,7 +127,8 @@ void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
     O << " tfe";
 }
 
-void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
+void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
+                                        const MCRegisterInfo &MRI) {
   switch (reg) {
   case AMDGPU::VCC:
     O << "vcc";
@@ -208,6 +219,16 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
   O << Type << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']';
 }
 
+void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3)
+    O << "_e64 ";
+  else
+    O << "_e32 ";
+
+  printOperand(MI, OpNo, O);
+}
+
 void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) {
   int32_t SImm = static_cast<int32_t>(Imm);
   if (SImm >= -16 && SImm <= 64) {
@@ -277,7 +298,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
       break;
 
     default:
-      printRegOperand(Op.getReg(), O);
+      printRegOperand(Op.getReg(), O, MRI);
       break;
     }
   } else if (Op.isImm()) {
diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 1d43c7a..14fb511 100644
--- a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -29,7 +29,10 @@ public:
   void printInstruction(const MCInst *MI, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  static void printRegOperand(unsigned RegNo, raw_ostream &O,
+                              const MCRegisterInfo &MRI);
 
 private:
   void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -44,10 +47,12 @@ private:
   void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printGDS(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printRegOperand(unsigned RegNo, raw_ostream &O);
+  void printVOPDst(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printImmediate32(uint32_t I, raw_ostream &O);
   void printImmediate64(uint64_t I, raw_ostream &O);
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index 5fb311b..2605ca5 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -24,12 +24,12 @@ namespace {
 
 class AMDGPUMCObjectWriter : public MCObjectWriter {
 public:
-  AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
+  AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {}
   void ExecutePostLayoutBinding(MCAssembler &Asm,
                                 const MCAsmLayout &Layout) override {
     //XXX: Implement if necessary.
   }
-  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+  void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
                         const MCFragment *Fragment, const MCFixup &Fixup,
                         MCValue Target, bool &IsPCRel,
                         uint64_t &FixedValue) override {
@@ -67,7 +67,7 @@ public:
 void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
                                        const MCAsmLayout &Layout) {
   for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
-    Asm.writeSectionData(I, Layout);
+    Asm.writeSectionData(&*I, Layout);
   }
 }
 
@@ -115,8 +115,7 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
 }
 
 bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
-  for (unsigned i = 0; i < Count; ++i)
-    OW->Write8(0);
+  OW->WriteZeros(Count);
 
   return true;
 }
@@ -131,7 +130,7 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
 public:
   ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createAMDGPUELFObjectWriter(OS);
   }
 };
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 5fb94d5..59f45ff 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -33,7 +33,7 @@ protected:
 AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
   : MCELFObjectTargetWriter(false, 0, 0, false) { }
 
-MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_ostream &OS) {
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_pwrite_stream &OS) {
   MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
   return createELFObjectWriter(MOTW, OS, true);
 }
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 83403ba..1bc205d 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -17,6 +17,7 @@
 #include "InstPrinter/AMDGPUInstPrinter.h"
 #include "SIDefines.h"
 #include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
@@ -59,63 +60,31 @@ static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
                                                CodeModel::Model CM,
                                                CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
-  X->InitMCCodeGenInfo(RM, CM, OL);
+  X->initMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
-static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
+static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI,
                                                 const MCInstrInfo &MII,
-                                                const MCRegisterInfo &MRI,
-                                                const MCSubtargetInfo &STI) {
+                                                const MCRegisterInfo &MRI) {
   return new AMDGPUInstPrinter(MAI, MII, MRI);
 }
 
-static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
-                                                const MCRegisterInfo &MRI,
-                                                const MCSubtargetInfo &STI,
-                                                MCContext &Ctx) {
-  if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
-    return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
-  } else {
-    return createR600MCCodeEmitter(MCII, MRI, STI);
-  }
-}
-
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
-                                    MCContext &Ctx, MCAsmBackend &MAB,
-                                    raw_ostream &_OS, MCCodeEmitter *_Emitter,
-                                    const MCSubtargetInfo &STI, bool RelaxAll) {
-  return createELFStreamer(Ctx, MAB, _OS, _Emitter, false);
-}
-
 extern "C" void LLVMInitializeR600TargetMC() {
+  for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
+    RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
+
+    TargetRegistry::RegisterMCCodeGenInfo(*T, createAMDGPUMCCodeGenInfo);
+    TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);
+    TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
+    TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
+    TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
+    TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
+  }
 
-  RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
-  RegisterMCAsmInfo<AMDGPUMCAsmInfo> Z(TheGCNTarget);
-
-  TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
-  TargetRegistry::RegisterMCCodeGenInfo(TheGCNTarget, createAMDGPUMCCodeGenInfo);
-
-  TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheGCNTarget, createAMDGPUMCInstrInfo);
-
-  TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheGCNTarget, createAMDGPUMCRegisterInfo);
-
-  TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheGCNTarget, createAMDGPUMCSubtargetInfo);
-
-  TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheGCNTarget, createAMDGPUMCInstPrinter);
-
-  TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createAMDGPUMCCodeEmitter);
-
-  TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheGCNTarget, createAMDGPUAsmBackend);
-
-  TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheGCNTarget, createMCStreamer);
+  TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget,
+                                        createR600MCCodeEmitter);
+  TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createSIMCCodeEmitter);
 }
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index bc8cd53..9a7548e 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
 #define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
 
+#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace llvm {
@@ -27,6 +28,7 @@ class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
+class raw_pwrite_stream;
 class raw_ostream;
 
 extern Target TheAMDGPUTarget;
@@ -34,17 +36,16 @@ extern Target TheGCNTarget;
 
 MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
                                        const MCRegisterInfo &MRI,
-                                       const MCSubtargetInfo &STI);
+                                       MCContext &Ctx);
 
 MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
                                      const MCRegisterInfo &MRI,
-                                     const MCSubtargetInfo &STI,
                                      MCContext &Ctx);
 
 MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
                                      StringRef TT, StringRef CPU);
 
-MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS);
+MCObjectWriter *createAMDGPUELFObjectWriter(raw_pwrite_stream &OS);
 } // End llvm namespace
 
 #define GET_REGINFO_ENUM
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index dc1344f..a809564 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -30,8 +30,8 @@ using namespace llvm;
 namespace {
 
 class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
-  R600MCCodeEmitter(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
-  void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  R600MCCodeEmitter(const R600MCCodeEmitter &) = delete;
+  void operator=(const R600MCCodeEmitter &) = delete;
   const MCInstrInfo &MCII;
   const MCRegisterInfo &MRI;
 
@@ -41,7 +41,7 @@ public:
     : MCII(mcii), MRI(mri) { }
 
   /// \brief Encode the instruction and write it to the OS.
-  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
 
@@ -81,12 +81,12 @@ enum FCInstr {
 };
 
 MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
-                                           const MCRegisterInfo &MRI,
-                                           const MCSubtargetInfo &STI) {
+                                             const MCRegisterInfo &MRI,
+					     MCContext &Ctx) {
   return new R600MCCodeEmitter(MCII, MRI);
 }
 
-void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
                                        SmallVectorImpl<MCFixup> &Fixups,
                                        const MCSubtargetInfo &STI) const {
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
@@ -99,7 +99,7 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   } else if (IS_VTX(Desc)) {
     uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI);
     uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
-    if (!(STI.getFeatureBits() & AMDGPU::FeatureCaymanISA)) {
+    if (!(STI.getFeatureBits()[AMDGPU::FeatureCaymanISA])) {
       InstWord2 |= 1 << 19; // Mega-Fetch bit
     }
 
@@ -132,7 +132,7 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
       Emit((uint32_t) 0, OS);
   } else {
     uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI);
-    if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
+    if ((STI.getFeatureBits()[AMDGPU::FeatureR600ALUInst]) &&
        ((Desc.TSFlags & R600_InstFlag::OP1) ||
          Desc.TSFlags & R600_InstFlag::OP2)) {
       uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index 640de3f..65a0eeb 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -31,15 +31,9 @@ using namespace llvm;
 
 namespace {
 
-/// \brief Helper type used in encoding
-typedef union {
-  int32_t I;
-  float F;
-} IntFloatUnion;
-
 class SIMCCodeEmitter : public  AMDGPUMCCodeEmitter {
-  SIMCCodeEmitter(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
-  void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  SIMCCodeEmitter(const SIMCCodeEmitter &) = delete;
+  void operator=(const SIMCCodeEmitter &) = delete;
   const MCInstrInfo &MCII;
   const MCRegisterInfo &MRI;
   MCContext &Ctx;
@@ -48,17 +42,17 @@ class SIMCCodeEmitter : public  AMDGPUMCCodeEmitter {
   bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
 
   /// \brief Encode an fp or int literal
-  uint32_t getLitEncoding(const MCOperand &MO) const;
+  uint32_t getLitEncoding(const MCOperand &MO, unsigned OpSize) const;
 
 public:
   SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
                   MCContext &ctx)
     : MCII(mcii), MRI(mri), Ctx(ctx) { }
 
-  ~SIMCCodeEmitter() { }
+  ~SIMCCodeEmitter() override {}
 
   /// \brief Encode the instruction and write it to the OS.
-  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
 
@@ -78,7 +72,6 @@ public:
 
 MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
                                            const MCRegisterInfo &MRI,
-                                           const MCSubtargetInfo &STI,
                                            MCContext &Ctx) {
   return new SIMCCodeEmitter(MCII, MRI, Ctx);
 }
@@ -91,52 +84,102 @@ bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
          OpType == AMDGPU::OPERAND_REG_INLINE_C;
 }
 
-uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const {
+// Returns the encoding value to use if the given integer is an integer inline
+// immediate value, or 0 if it is not.
+template <typename IntTy>
+static uint32_t getIntInlineImmEncoding(IntTy Imm) {
+  if (Imm >= 0 && Imm <= 64)
+    return 128 + Imm;
 
-  IntFloatUnion Imm;
-  if (MO.isImm())
-    Imm.I = MO.getImm();
-  else if (MO.isFPImm())
-    Imm.F = MO.getFPImm();
-  else if (MO.isExpr())
-    return 255;
-  else
-    return ~0;
+  if (Imm >= -16 && Imm <= -1)
+    return 192 + std::abs(Imm);
+
+  return 0;
+}
+
+static uint32_t getLit32Encoding(uint32_t Val) {
+  uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val));
+  if (IntImm != 0)
+    return IntImm;
+
+  if (Val == FloatToBits(0.5f))
+    return 240;
+
+  if (Val == FloatToBits(-0.5f))
+    return 241;
+
+  if (Val == FloatToBits(1.0f))
+    return 242;
+
+  if (Val == FloatToBits(-1.0f))
+    return 243;
+
+  if (Val == FloatToBits(2.0f))
+    return 244;
+
+  if (Val == FloatToBits(-2.0f))
+    return 245;
 
-  if (Imm.I >= 0 && Imm.I <= 64)
-    return 128 + Imm.I;
+  if (Val == FloatToBits(4.0f))
+    return 246;
+
+  if (Val == FloatToBits(-4.0f))
+    return 247;
+
+  return 255;
+}
 
-  if (Imm.I >= -16 && Imm.I <= -1)
-    return 192 + abs(Imm.I);
+static uint32_t getLit64Encoding(uint64_t Val) {
+  uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val));
+  if (IntImm != 0)
+    return IntImm;
 
-  if (Imm.F == 0.5f)
+  if (Val == DoubleToBits(0.5))
     return 240;
 
-  if (Imm.F == -0.5f)
+  if (Val == DoubleToBits(-0.5))
     return 241;
 
-  if (Imm.F == 1.0f)
+  if (Val == DoubleToBits(1.0))
     return 242;
 
-  if (Imm.F == -1.0f)
+  if (Val == DoubleToBits(-1.0))
     return 243;
 
-  if (Imm.F == 2.0f)
+  if (Val == DoubleToBits(2.0))
     return 244;
 
-  if (Imm.F == -2.0f)
+  if (Val == DoubleToBits(-2.0))
     return 245;
 
-  if (Imm.F == 4.0f)
+  if (Val == DoubleToBits(4.0))
     return 246;
 
-  if (Imm.F == -4.0f)
+  if (Val == DoubleToBits(-4.0))
     return 247;
 
   return 255;
 }
 
-void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
+                                         unsigned OpSize) const {
+  if (MO.isExpr())
+    return 255;
+
+  assert(!MO.isFPImm());
+
+  if (!MO.isImm())
+    return ~0;
+
+  if (OpSize == 4)
+    return getLit32Encoding(static_cast<uint32_t>(MO.getImm()));
+
+  assert(OpSize == 8);
+
+  return getLit64Encoding(static_cast<uint64_t>(MO.getImm()));
+}
+
+void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
                                        SmallVectorImpl<MCFixup> &Fixups,
                                        const MCSubtargetInfo &STI) const {
 
@@ -158,25 +201,24 @@ void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     if (!isSrcOperand(Desc, i))
       continue;
 
+    int RCID = Desc.OpInfo[i].RegClass;
+    const MCRegisterClass &RC = MRI.getRegClass(RCID);
+
     // Is this operand a literal immediate?
     const MCOperand &Op = MI.getOperand(i);
-    if (getLitEncoding(Op) != 255)
+    if (getLitEncoding(Op, RC.getSize()) != 255)
       continue;
 
     // Yes! Encode it
-    IntFloatUnion Imm;
+    int64_t Imm = 0;
+
     if (Op.isImm())
-      Imm.I = Op.getImm();
-    else if (Op.isFPImm())
-      Imm.F = Op.getFPImm();
-    else {
-      assert(Op.isExpr());
-      // This will be replaced with a fixup value.
-      Imm.I = 0;
-    }
+      Imm = Op.getImm();
+    else if (!Op.isExpr()) // Exprs will be replaced with a fixup value.
+      llvm_unreachable("Must be immediate or expr");
 
     for (unsigned j = 0; j < 4; j++) {
-      OS.write((uint8_t) ((Imm.I >> (8 * j)) & 0xff));
+      OS.write((uint8_t) ((Imm >> (8 * j)) & 0xff));
     }
 
     // Only one literal value allowed
@@ -192,7 +234,7 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
   if (MO.isExpr()) {
     const MCExpr *Expr = MO.getExpr();
     MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br;
-    Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+    Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
     return 0;
   }
 
@@ -210,7 +252,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
     const MCSymbolRefExpr *Expr = cast<MCSymbolRefExpr>(MO.getExpr());
     MCFixupKind Kind;
     const MCSymbol *Sym =
-        Ctx.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
+        Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
 
     if (&Expr->getSymbol() == Sym) {
       // Add the offset to the beginning of the constant values.
@@ -219,7 +261,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
       // This is used for constant data stored in .rodata.
      Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
     }
-    Fixups.push_back(MCFixup::Create(4, Expr, Kind, MI.getLoc()));
+    Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc()));
   }
 
   // Figure out the operand number, needed for isSrcOperand check
@@ -231,7 +273,10 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
 
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
   if (isSrcOperand(Desc, OpNo)) {
-    uint32_t Enc = getLitEncoding(MO);
+    int RCID = Desc.OpInfo[OpNo].RegClass;
+    const MCRegisterClass &RC = MRI.getRegClass(RCID);
+
+    uint32_t Enc = getLitEncoding(MO, RC.getSize());
     if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
       return Enc;
 
diff --git a/contrib/llvm/lib/Target/R600/Processors.td b/contrib/llvm/lib/Target/R600/Processors.td
index e5fef0c..c0ffede 100644
--- a/contrib/llvm/lib/Target/R600/Processors.td
+++ b/contrib/llvm/lib/Target/R600/Processors.td
@@ -83,9 +83,13 @@ def : Proc<"cayman",     R600_VLIW4_Itin,
 // Southern Islands
 //===----------------------------------------------------------------------===//
 
-def : ProcessorModel<"SI",       SIFullSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"SI", SIFullSpeedModel,
+  [FeatureSouthernIslands, FeatureFastFMAF32]
+>;
 
-def : ProcessorModel<"tahiti",   SIFullSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"tahiti",   SIFullSpeedModel,
+  [FeatureSouthernIslands, FeatureFastFMAF32]
+>;
 
 def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
 
@@ -99,15 +103,24 @@ def : ProcessorModel<"hainan",   SIQuarterSpeedModel, [FeatureSouthernIslands]>;
 // Sea Islands
 //===----------------------------------------------------------------------===//
 
-def : ProcessorModel<"bonaire",    SIQuarterSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"bonaire",    SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount32]
+>;
 
-def : ProcessorModel<"kabini",     SIQuarterSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"kabini",     SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount16]
+>;
 
-def : ProcessorModel<"kaveri",     SIQuarterSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"kaveri",     SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount32]
+>;
 
-def : ProcessorModel<"hawaii",     SIFullSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"hawaii", SIFullSpeedModel,
+  [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32]
+>;
 
-def : ProcessorModel<"mullins",    SIQuarterSpeedModel, [FeatureSeaIslands]>;
+def : ProcessorModel<"mullins",    SIQuarterSpeedModel,
+  [FeatureSeaIslands, FeatureLDSBankCount16]>;
 
 //===----------------------------------------------------------------------===//
 // Volcanic Islands
diff --git a/contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp b/contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp
index f07be00..3cb9021 100644
--- a/contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp
+++ b/contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp
@@ -14,11 +14,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
 #include "R600Defines.h"
 #include "R600InstrInfo.h"
 #include "R600MachineFunctionInfo.h"
 #include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
index edaf278..c8f37f6 100644
--- a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -39,14 +39,14 @@ struct CFStack {
     FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
   };
 
-  const AMDGPUSubtarget &ST;
+  const AMDGPUSubtarget *ST;
   std::vector<StackItem> BranchStack;
   std::vector<StackItem> LoopStack;
   unsigned MaxStackSize;
   unsigned CurrentEntries;
   unsigned CurrentSubEntries;
 
-  CFStack(const AMDGPUSubtarget &st, unsigned ShaderType) : ST(st),
+  CFStack(const AMDGPUSubtarget *st, unsigned ShaderType) : ST(st),
       // We need to reserve a stack entry for CALL_FS in vertex shaders.
       MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0),
       CurrentEntries(0), CurrentSubEntries(0) { }
@@ -76,11 +76,11 @@ bool CFStack::branchStackContains(CFStack::StackItem Item) {
 }
 
 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
-  if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() &&
+  if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
       getLoopDepth() > 1)
     return true;
 
-  if (!ST.hasCFAluBug())
+  if (!ST->hasCFAluBug())
     return false;
 
   switch(Opcode) {
@@ -91,7 +91,7 @@ bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
   case AMDGPU::CF_ALU_CONTINUE:
     if (CurrentSubEntries == 0)
       return false;
-    if (ST.getWavefrontSize() == 64) {
+    if (ST->getWavefrontSize() == 64) {
       // We are being conservative here.  We only require this work-around if
       // CurrentSubEntries > 3 &&
       // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
@@ -102,7 +102,7 @@ bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
       // resources without any problems.
       return CurrentSubEntries > 3;
     } else {
-      assert(ST.getWavefrontSize() == 32);
+      assert(ST->getWavefrontSize() == 32);
       // We are being conservative here.  We only require the work-around if
       // CurrentSubEntries > 7 &&
       // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
@@ -118,8 +118,8 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
   default:
     return 0;
   case CFStack::FIRST_NON_WQM_PUSH:
-  assert(!ST.hasCaymanISA());
-  if (ST.getGeneration() <= AMDGPUSubtarget::R700) {
+  assert(!ST->hasCaymanISA());
+  if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
     // +1 For the push operation.
     // +2 Extra space required.
     return 3;
@@ -132,7 +132,7 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
     return 2;
   }
   case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
-    assert(ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN);
+    assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
     // +1 For the push operation.
     // +1 Extra space required.
     return 2;
@@ -153,13 +153,14 @@ void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
   case AMDGPU::CF_PUSH_EG:
   case AMDGPU::CF_ALU_PUSH_BEFORE:
     if (!isWQM) {
-      if (!ST.hasCaymanISA() && !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
+      if (!ST->hasCaymanISA() &&
+          !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
         Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on Evergreen/NI
                                              // See comment in
                                              // CFStack::getSubEntrySize()
       else if (CurrentEntries > 0 &&
-               ST.getGeneration() > AMDGPUSubtarget::EVERGREEN &&
-               !ST.hasCaymanISA() &&
+               ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
+               !ST->hasCaymanISA() &&
                !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
         Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
       else
@@ -219,7 +220,7 @@ private:
   const R600InstrInfo *TII;
   const R600RegisterInfo *TRI;
   unsigned MaxFetchInst;
-  const AMDGPUSubtarget &ST;
+  const AMDGPUSubtarget *ST;
 
   bool IsTrivialInst(MachineInstr *MI) const {
     switch (MI->getOpcode()) {
@@ -233,7 +234,7 @@ private:
 
   const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
     unsigned Opcode = 0;
-    bool isEg = (ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN);
+    bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
     switch (CFI) {
     case CF_TC:
       Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
@@ -266,7 +267,7 @@ private:
       Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
       break;
     case CF_END:
-      if (ST.hasCaymanISA()) {
+      if (ST->hasCaymanISA()) {
         Opcode = AMDGPU::CF_END_CM;
         break;
       }
@@ -467,17 +468,14 @@ private:
   }
 
 public:
-  R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
-    TII (nullptr), TRI(nullptr),
-    ST(tm.getSubtarget<AMDGPUSubtarget>()) {
-      const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
-      MaxFetchInst = ST.getTexVTXClauseSize();
-  }
+  R600ControlFlowFinalizer(TargetMachine &tm)
+      : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override {
-    TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
-    TRI = static_cast<const R600RegisterInfo *>(
-        MF.getSubtarget().getRegisterInfo());
+    ST = &MF.getSubtarget<AMDGPUSubtarget>();
+    MaxFetchInst = ST->getTexVTXClauseSize();
+    TII = static_cast<const R600InstrInfo *>(ST->getInstrInfo());
+    TRI = static_cast<const R600RegisterInfo *>(ST->getRegisterInfo());
     R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
 
     CFStack CFStack(ST, MFI->getShaderType());
diff --git a/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp b/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
index 2e1b094..8357b6d 100644
--- a/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
@@ -30,9 +30,9 @@
 
 using namespace llvm;
 
-R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
-    AMDGPUTargetLowering(TM),
-    Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
+R600TargetLowering::R600TargetLowering(TargetMachine &TM,
+                                       const AMDGPUSubtarget &STI)
+    : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
   addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
   addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
   addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
@@ -40,7 +40,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
   addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
 
-  computeRegisterProperties();
+  computeRegisterProperties(STI.getRegisterInfo());
 
   // Set condition code actions
   setCondCodeAction(ISD::SETO,   MVT::f32, Expand);
@@ -91,6 +91,14 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
   setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
 
+  // ADD, SUB overflow.
+  // TODO: turn these into Legal?
+  if (Subtarget->hasCARRY())
+    setOperationAction(ISD::UADDO, MVT::i32, Custom);
+
+  if (Subtarget->hasBORROW())
+    setOperationAction(ISD::USUBO, MVT::i32, Custom);
+
   // Expand sign extension of vectors
   if (!Subtarget->hasBFE())
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -163,15 +171,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setTargetDAGCombine(ISD::SELECT_CC);
   setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
 
-  setOperationAction(ISD::SUB, MVT::i64, Expand);
-
-  // These should be replaced by UDVIREM, but it does not happen automatically
-  // during Type Legalization
-  setOperationAction(ISD::UDIV, MVT::i64, Custom);
-  setOperationAction(ISD::UREM, MVT::i64, Custom);
-  setOperationAction(ISD::SDIV, MVT::i64, Custom);
-  setOperationAction(ISD::SREM, MVT::i64, Custom);
-
   // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
   //  to be Legal/Custom in order to avoid library calls.
   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
@@ -197,7 +196,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
   MachineRegisterInfo &MRI = MF->getRegInfo();
   MachineBasicBlock::iterator I = *MI;
   const R600InstrInfo *TII =
-      static_cast<const R600InstrInfo *>(MF->getSubtarget().getInstrInfo());
+      static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
 
   switch (MI->getOpcode()) {
   default:
@@ -585,6 +584,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
   case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
   case ISD::SRA_PARTS:
   case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
+  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
+  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
   case ISD::FCOS:
   case ISD::FSIN: return LowerTrig(Op, DAG);
   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
@@ -611,17 +612,18 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
       return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
     }
     case AMDGPUIntrinsic::R600_store_swizzle: {
+      SDLoc DL(Op);
       const SDValue Args[8] = {
         Chain,
         Op.getOperand(2), // Export Value
         Op.getOperand(3), // ArrayBase
         Op.getOperand(4), // Type
-        DAG.getConstant(0, MVT::i32), // SWZ_X
-        DAG.getConstant(1, MVT::i32), // SWZ_Y
-        DAG.getConstant(2, MVT::i32), // SWZ_Z
-        DAG.getConstant(3, MVT::i32) // SWZ_W
+        DAG.getConstant(0, DL, MVT::i32), // SWZ_X
+        DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
+        DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
+        DAG.getConstant(3, DL, MVT::i32) // SWZ_W
       };
-      return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
+      return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
     }
 
     // default for switch(IntrinsicID)
@@ -652,11 +654,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
       int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
       MachineSDNode *interp;
       if (ijb < 0) {
-        const MachineFunction &MF = DAG.getMachineFunction();
-        const R600InstrInfo *TII = static_cast<const R600InstrInfo *>(
-            MF.getSubtarget().getInstrInfo());
+        const R600InstrInfo *TII =
+            static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
         interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
-            MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
+            MVT::v4f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32));
         return DAG.getTargetExtractSubreg(
             TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
             DL, MVT::f32, SDValue(interp, 0));
@@ -674,11 +675,11 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
 
       if (slot % 4 < 2)
         interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
-            MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
+            MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
             RegisterJNode, RegisterINode);
       else
         interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
-            MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
+            MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
             RegisterJNode, RegisterINode);
       return SDValue(interp, slot % 2);
     }
@@ -691,11 +692,11 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
 
       if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
         interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
-            MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
+            MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
             RegisterJNode, RegisterINode);
       else
         interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
-            MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
+            MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
             RegisterJNode, RegisterINode);
       return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
           SDValue(interp, 0), SDValue(interp, 1));
@@ -751,19 +752,19 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
       }
 
       SDValue TexArgs[19] = {
-        DAG.getConstant(TextureOp, MVT::i32),
+        DAG.getConstant(TextureOp, DL, MVT::i32),
         Op.getOperand(1),
-        DAG.getConstant(0, MVT::i32),
-        DAG.getConstant(1, MVT::i32),
-        DAG.getConstant(2, MVT::i32),
-        DAG.getConstant(3, MVT::i32),
+        DAG.getConstant(0, DL, MVT::i32),
+        DAG.getConstant(1, DL, MVT::i32),
+        DAG.getConstant(2, DL, MVT::i32),
+        DAG.getConstant(3, DL, MVT::i32),
         Op.getOperand(2),
         Op.getOperand(3),
         Op.getOperand(4),
-        DAG.getConstant(0, MVT::i32),
-        DAG.getConstant(1, MVT::i32),
-        DAG.getConstant(2, MVT::i32),
-        DAG.getConstant(3, MVT::i32),
+        DAG.getConstant(0, DL, MVT::i32),
+        DAG.getConstant(1, DL, MVT::i32),
+        DAG.getConstant(2, DL, MVT::i32),
+        DAG.getConstant(3, DL, MVT::i32),
         Op.getOperand(5),
         Op.getOperand(6),
         Op.getOperand(7),
@@ -776,21 +777,21 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
     case AMDGPUIntrinsic::AMDGPU_dp4: {
       SDValue Args[8] = {
       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
-          DAG.getConstant(0, MVT::i32)),
+          DAG.getConstant(0, DL, MVT::i32)),
       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
-          DAG.getConstant(0, MVT::i32)),
+          DAG.getConstant(0, DL, MVT::i32)),
       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
-          DAG.getConstant(1, MVT::i32)),
+          DAG.getConstant(1, DL, MVT::i32)),
       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
-          DAG.getConstant(1, MVT::i32)),
+          DAG.getConstant(1, DL, MVT::i32)),
       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
-          DAG.getConstant(2, MVT::i32)),
+          DAG.getConstant(2, DL, MVT::i32)),
       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
-          DAG.getConstant(2, MVT::i32)),
+          DAG.getConstant(2, DL, MVT::i32)),
       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
-          DAG.getConstant(3, MVT::i32)),
+          DAG.getConstant(3, DL, MVT::i32)),
       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
-          DAG.getConstant(3, MVT::i32))
+          DAG.getConstant(3, DL, MVT::i32))
       };
       return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
     }
@@ -871,42 +872,6 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
       Results.push_back(Result);
     return;
   }
-  case ISD::UDIV: {
-    SDValue Op = SDValue(N, 0);
-    SDLoc DL(Op);
-    EVT VT = Op.getValueType();
-    SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
-      N->getOperand(0), N->getOperand(1));
-    Results.push_back(UDIVREM);
-    break;
-  }
-  case ISD::UREM: {
-    SDValue Op = SDValue(N, 0);
-    SDLoc DL(Op);
-    EVT VT = Op.getValueType();
-    SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
-      N->getOperand(0), N->getOperand(1));
-    Results.push_back(UDIVREM.getValue(1));
-    break;
-  }
-  case ISD::SDIV: {
-    SDValue Op = SDValue(N, 0);
-    SDLoc DL(Op);
-    EVT VT = Op.getValueType();
-    SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
-      N->getOperand(0), N->getOperand(1));
-    Results.push_back(SDIVREM);
-    break;
-  }
-  case ISD::SREM: {
-    SDValue Op = SDValue(N, 0);
-    SDLoc DL(Op);
-    EVT VT = Op.getValueType();
-    SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
-      N->getOperand(0), N->getOperand(1));
-    Results.push_back(SDIVREM.getValue(1));
-    break;
-  }
   case ISD::SDIVREM: {
     SDValue Op = SDValue(N, 1);
     SDValue RES = LowerSDIVREM(Op, DAG);
@@ -932,8 +897,8 @@ SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
 
   for (unsigned i = 0, e = VecVT.getVectorNumElements();
                                                            i != e; ++i) {
-    Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
-                               Vector, DAG.getConstant(i, getVectorIdxTy())));
+    Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
+                               DAG.getConstant(i, DL, getVectorIdxTy())));
   }
 
   return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
@@ -977,11 +942,12 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
   // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
   EVT VT = Op.getValueType();
   SDValue Arg = Op.getOperand(0);
-  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
-      DAG.getNode(ISD::FADD, SDLoc(Op), VT,
-        DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
-          DAG.getConstantFP(0.15915494309, MVT::f32)),
-        DAG.getConstantFP(0.5, MVT::f32)));
+  SDLoc DL(Op);
+  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
+      DAG.getNode(ISD::FADD, DL, VT,
+        DAG.getNode(ISD::FMUL, DL, VT, Arg,
+          DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
+        DAG.getConstantFP(0.5, DL, MVT::f32)));
   unsigned TrigNode;
   switch (Op.getOpcode()) {
   case ISD::FCOS:
@@ -993,14 +959,14 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
   default:
     llvm_unreachable("Wrong trig opcode");
   }
-  SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
-      DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
-        DAG.getConstantFP(-0.5, MVT::f32)));
+  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
+      DAG.getNode(ISD::FADD, DL, VT, FractPart,
+        DAG.getConstantFP(-0.5, DL, MVT::f32)));
   if (Gen >= AMDGPUSubtarget::R700)
     return TrigVal;
   // On R600 hw, COS/SIN input must be between -Pi and Pi.
-  return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
-      DAG.getConstantFP(3.14159265359, MVT::f32));
+  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
+      DAG.getConstantFP(3.14159265359, DL, MVT::f32));
 }
 
 SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
@@ -1010,11 +976,11 @@ SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
   SDValue Lo = Op.getOperand(0);
   SDValue Hi = Op.getOperand(1);
   SDValue Shift = Op.getOperand(2);
-  SDValue Zero = DAG.getConstant(0, VT);
-  SDValue One  = DAG.getConstant(1, VT);
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue One  = DAG.getConstant(1, DL, VT);
 
-  SDValue Width  = DAG.getConstant(VT.getSizeInBits(), VT);
-  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
+  SDValue Width  = DAG.getConstant(VT.getSizeInBits(), DL, VT);
+  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
   SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
   SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
 
@@ -1046,13 +1012,13 @@ SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
   SDValue Lo = Op.getOperand(0);
   SDValue Hi = Op.getOperand(1);
   SDValue Shift = Op.getOperand(2);
-  SDValue Zero = DAG.getConstant(0, VT);
-  SDValue One  = DAG.getConstant(1, VT);
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue One  = DAG.getConstant(1, DL, VT);
 
   const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
 
-  SDValue Width  = DAG.getConstant(VT.getSizeInBits(), VT);
-  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
+  SDValue Width  = DAG.getConstant(VT.getSizeInBits(), DL, VT);
+  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
   SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
   SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
 
@@ -1077,12 +1043,31 @@ SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
 }
 
+SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
+                                          unsigned mainop, unsigned ovf) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+
+  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
+  // Extend sign.
+  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
+                    DAG.getValueType(MVT::i1));
+
+  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
+
+  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
+}
+
 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
   return DAG.getNode(
       ISD::SETCC,
-      SDLoc(Op),
+      DL,
       MVT::i1,
-      Op, DAG.getConstantFP(0.0f, MVT::f32),
+      Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
       DAG.getCondCode(ISD::SETNE)
       );
 }
@@ -1098,7 +1083,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
   assert(isInt<16>(ByteOffset));
 
   return DAG.getLoad(VT, DL, DAG.getEntryNode(),
-                     DAG.getConstant(ByteOffset, MVT::i32), // PTR
+                     DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
                      MachinePointerInfo(ConstantPointerNull::get(PtrType)),
                      false, false, false, 0);
 }
@@ -1235,11 +1220,11 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
   SDValue HWTrue, HWFalse;
 
   if (CompareVT == MVT::f32) {
-    HWTrue = DAG.getConstantFP(1.0f, CompareVT);
-    HWFalse = DAG.getConstantFP(0.0f, CompareVT);
+    HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
+    HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
   } else if (CompareVT == MVT::i32) {
-    HWTrue = DAG.getConstant(-1, CompareVT);
-    HWFalse = DAG.getConstant(0, CompareVT);
+    HWTrue = DAG.getConstant(-1, DL, CompareVT);
+    HWFalse = DAG.getConstant(0, DL, CompareVT);
   }
   else {
     llvm_unreachable("Unhandled value type in LowerSELECT_CC");
@@ -1277,8 +1262,9 @@ SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
   default: llvm_unreachable("Invalid stack width");
   }
 
-  return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
-                     DAG.getConstant(SRLPad, MVT::i32));
+  SDLoc DL(Ptr);
+  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
+                     DAG.getConstant(SRLPad, DL, MVT::i32));
 }
 
 void R600TargetLowering::getStackAddress(unsigned StackWidth,
@@ -1329,26 +1315,26 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
       EVT MemVT = StoreNode->getMemoryVT();
       SDValue MaskConstant;
       if (MemVT == MVT::i8) {
-        MaskConstant = DAG.getConstant(0xFF, MVT::i32);
+        MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
       } else {
         assert(MemVT == MVT::i16);
-        MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
+        MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
       }
       SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
-                                      DAG.getConstant(2, MVT::i32));
+                                      DAG.getConstant(2, DL, MVT::i32));
       SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
-                                      DAG.getConstant(0x00000003, VT));
+                                      DAG.getConstant(0x00000003, DL, VT));
       SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
       SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
-                                   DAG.getConstant(3, VT));
+                                   DAG.getConstant(3, DL, VT));
       SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
       SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
       // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
       // vector instead.
       SDValue Src[4] = {
         ShiftedValue,
-        DAG.getConstant(0, MVT::i32),
-        DAG.getConstant(0, MVT::i32),
+        DAG.getConstant(0, DL, MVT::i32),
+        DAG.getConstant(0, DL, MVT::i32),
         Mask
       };
       SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
@@ -1361,7 +1347,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
       // Convert pointer from byte address to dword address.
       Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
                         DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
-                                    Ptr, DAG.getConstant(2, MVT::i32)));
+                                    Ptr, DAG.getConstant(2, DL, MVT::i32)));
 
       if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
         llvm_unreachable("Truncated and indexed stores not supported yet");
@@ -1385,8 +1371,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   // Lowering for indirect addressing
 
   const MachineFunction &MF = DAG.getMachineFunction();
-  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
-      getTargetMachine().getSubtargetImpl()->getFrameLowering());
+  const AMDGPUFrameLowering *TFL =
+      static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
   unsigned StackWidth = TFL->getStackWidth(MF);
 
   Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
@@ -1403,13 +1389,13 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
       unsigned Channel, PtrIncr;
       getStackAddress(StackWidth, i, Channel, PtrIncr);
       Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
-                        DAG.getConstant(PtrIncr, MVT::i32));
+                        DAG.getConstant(PtrIncr, DL, MVT::i32));
       SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
-                                 Value, DAG.getConstant(i, MVT::i32));
+                                 Value, DAG.getConstant(i, DL, MVT::i32));
 
       Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
                               Chain, Elem, Ptr,
-                              DAG.getTargetConstant(Channel, MVT::i32));
+                              DAG.getTargetConstant(Channel, DL, MVT::i32));
     }
      Chain =  DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
    } else {
@@ -1417,7 +1403,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
       Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
     }
     Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
-    DAG.getTargetConstant(0, MVT::i32)); // Channel
+    DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
   }
 
   return Chain;
@@ -1484,16 +1470,17 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
 
   // Lower loads constant address space global variable loads
   if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
-      isa<GlobalVariable>(
-          GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
+      isa<GlobalVariable>(GetUnderlyingObject(
+          LoadNode->getMemOperand()->getValue(), *getDataLayout()))) {
 
     SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
         getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
     Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
-        DAG.getConstant(2, MVT::i32));
+        DAG.getConstant(2, DL, MVT::i32));
     return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
                        LoadNode->getChain(), Ptr,
-                       DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
+                       DAG.getTargetConstant(0, DL, MVT::i32),
+                       Op.getOperand(2));
   }
 
   if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
@@ -1520,7 +1507,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
         // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
         // then div by 4 at the ISel step
         SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-            DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
+            DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
         Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
       }
       EVT NewVT = MVT::v4i32;
@@ -1534,15 +1521,16 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
     } else {
       // non-constant ptr can't be folded, keeps it as a v4f32 load
       Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
-          DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
-          DAG.getConstant(LoadNode->getAddressSpace() -
-                          AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
+          DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
+                      DAG.getConstant(4, DL, MVT::i32)),
+                      DAG.getConstant(LoadNode->getAddressSpace() -
+                                      AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
           );
     }
 
     if (!VT.isVector()) {
       Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
-          DAG.getConstant(0, MVT::i32));
+                           DAG.getConstant(0, DL, MVT::i32));
     }
 
     SDValue MergedValues[2] = {
@@ -1562,18 +1550,16 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
   if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
     EVT MemVT = LoadNode->getMemoryVT();
     assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
-    SDValue ShiftAmount =
-          DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
     SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
                                   LoadNode->getPointerInfo(), MemVT,
                                   LoadNode->isVolatile(),
                                   LoadNode->isNonTemporal(),
                                   LoadNode->isInvariant(),
                                   LoadNode->getAlignment());
-    SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
-    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
+    SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
+                              DAG.getValueType(MemVT));
 
-    SDValue MergedValues[2] = { Sra, Chain };
+    SDValue MergedValues[2] = { Res, Chain };
     return DAG.getMergeValues(MergedValues, DL);
   }
 
@@ -1583,8 +1569,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
 
   // Lowering for indirect addressing
   const MachineFunction &MF = DAG.getMachineFunction();
-  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
-      getTargetMachine().getSubtargetImpl()->getFrameLowering());
+  const AMDGPUFrameLowering *TFL =
+      static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
   unsigned StackWidth = TFL->getStackWidth(MF);
 
   Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
@@ -1601,10 +1587,10 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
       unsigned Channel, PtrIncr;
       getStackAddress(StackWidth, i, Channel, PtrIncr);
       Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
-                        DAG.getConstant(PtrIncr, MVT::i32));
+                        DAG.getConstant(PtrIncr, DL, MVT::i32));
       Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
                              Chain, Ptr,
-                             DAG.getTargetConstant(Channel, MVT::i32),
+                             DAG.getTargetConstant(Channel, DL, MVT::i32),
                              Op.getOperand(2));
     }
     for (unsigned i = NumElemVT; i < 4; ++i) {
@@ -1615,7 +1601,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
   } else {
     LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
                               Chain, Ptr,
-                              DAG.getTargetConstant(0, MVT::i32), // Channel
+                              DAG.getTargetConstant(0, DL, MVT::i32), // Channel
                               Op.getOperand(2));
   }
 
@@ -1704,7 +1690,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
 
     MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
     SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
-                              DAG.getConstant(Offset, MVT::i32),
+                              DAG.getConstant(Offset, DL, MVT::i32),
                               DAG.getUNDEF(MVT::i32),
                               PtrInfo,
                               MemVT, false, true, true, 4);
@@ -1805,24 +1791,25 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
 
 
 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
-SDValue Swz[4], SelectionDAG &DAG) const {
+                                            SDValue Swz[4], SelectionDAG &DAG,
+                                            SDLoc DL) const {
   assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
   // Old -> New swizzle values
   DenseMap<unsigned, unsigned> SwizzleRemap;
 
   BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
   for (unsigned i = 0; i < 4; i++) {
-    unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
+    unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
     if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
-      Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
+      Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
   }
 
   SwizzleRemap.clear();
   BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
   for (unsigned i = 0; i < 4; i++) {
-    unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
+    unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
     if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
-      Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
+      Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
   }
 
   return BuildVector;
@@ -1868,11 +1855,12 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
       return SDValue();
     }
 
-    return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
+    SDLoc dl(N);
+    return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
                            SelectCC.getOperand(0), // LHS
                            SelectCC.getOperand(1), // RHS
-                           DAG.getConstant(-1, MVT::i32), // True
-                           DAG.getConstant(0, MVT::i32),  // Flase
+                           DAG.getConstant(-1, dl, MVT::i32), // True
+                           DAG.getConstant(0, dl, MVT::i32),  // False
                            SelectCC.getOperand(4)); // CC
 
     break;
@@ -2015,7 +2003,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
       N->getOperand(7) // SWZ_W
     };
     SDLoc DL(N);
-    NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
+    NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
     return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
   }
   case AMDGPUISD::TEXTURE_FETCH: {
@@ -2044,9 +2032,9 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
       N->getOperand(17),
       N->getOperand(18),
     };
-    NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
-    return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
-        NewArgs);
+    SDLoc DL(N);
+    NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
+    return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
   }
   }
 
@@ -2065,13 +2053,13 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
     if (!Neg.getNode())
       return false;
     Src = Src.getOperand(0);
-    Neg = DAG.getTargetConstant(1, MVT::i32);
+    Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
     return true;
   case AMDGPU::FABS_R600:
     if (!Abs.getNode())
       return false;
     Src = Src.getOperand(0);
-    Abs = DAG.getTargetConstant(1, MVT::i32);
+    Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
     return true;
   case AMDGPU::CONST_COPY: {
     unsigned Opcode = ParentNode->getMachineOpcode();
@@ -2167,7 +2155,7 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
       assert(C);
       if (C->getZExtValue())
         return false;
-      Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
+      Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
     }
     Src = DAG.getRegister(ImmReg, MVT::i32);
     return true;
@@ -2188,9 +2176,7 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
   unsigned Opcode = Node->getMachineOpcode();
   SDValue FakeOp;
 
-  std::vector<SDValue> Ops;
-  for (const SDUse &I : Node->ops())
-    Ops.push_back(I);
+  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
 
   if (Opcode == AMDGPU::DOT_4) {
     int OperandIdx[] = {
@@ -2252,13 +2238,11 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
         AMDGPU::OpName::clamp);
     if (ClampIdx < 0)
       return Node;
-    std::vector<SDValue> Ops;
-    unsigned NumOp = Src.getNumOperands();
-    for(unsigned i = 0; i < NumOp; ++i)
-          Ops.push_back(Src.getOperand(i));
-    Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
-    return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
-        Node->getVTList(), Ops);
+    SDLoc DL(Node);
+    std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
+    Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
+    return DAG.getMachineNode(Src.getMachineOpcode(), DL,
+                              Node->getVTList(), Ops);
   } else {
     if (!TII->hasInstrModifiers(Opcode))
       return Node;
diff --git a/contrib/llvm/lib/Target/R600/R600ISelLowering.h b/contrib/llvm/lib/Target/R600/R600ISelLowering.h
index 10ebc10..c06d3c4 100644
--- a/contrib/llvm/lib/Target/R600/R600ISelLowering.h
+++ b/contrib/llvm/lib/Target/R600/R600ISelLowering.h
@@ -23,7 +23,7 @@ class R600InstrInfo;
 
 class R600TargetLowering : public AMDGPUTargetLowering {
 public:
-  R600TargetLowering(TargetMachine &TM);
+  R600TargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
   MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
       MachineBasicBlock * BB) const override;
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -50,7 +50,8 @@ private:
 
   void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
       MachineRegisterInfo & MRI, unsigned dword_offset) const;
-  SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG) const;
+  SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG,
+                          SDLoc DL) const;
   SDValue vectorToVerticalVector(SelectionDAG &DAG, SDValue Vector) const;
 
   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
@@ -63,6 +64,8 @@ private:
   SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSRXParts(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
+                        unsigned mainop, unsigned ovf) const;
 
   SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
                                           SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
index 653fd0d..5f0bdf3 100644
--- a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
@@ -29,9 +29,7 @@ using namespace llvm;
 #include "AMDGPUGenDFAPacketizer.inc"
 
 R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st)
-  : AMDGPUInstrInfo(st),
-    RI(st)
-  { }
+    : AMDGPUInstrInfo(st), RI() {}
 
 const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
   return RI;
@@ -268,9 +266,8 @@ int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
   return getOperandIdx(Opcode, OpTable[SrcNum]);
 }
 
-#define SRC_SEL_ROWS 11
 int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
-  static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
+  static const unsigned SrcSelTable[][2] = {
     {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
     {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
     {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
@@ -284,14 +281,13 @@ int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
     {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
   };
 
-  for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
-    if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
-      return getOperandIdx(Opcode, SrcSelTable[i][1]);
+  for (const auto &Row : SrcSelTable) {
+    if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) {
+      return getOperandIdx(Opcode, Row[1]);
     }
   }
   return -1;
 }
-#undef SRC_SEL_ROWS
 
 SmallVector<std::pair<MachineOperand *, int64_t>, 3>
 R600InstrInfo::getSrcs(MachineInstr *MI) const {
diff --git a/contrib/llvm/lib/Target/R600/R600Instructions.td b/contrib/llvm/lib/Target/R600/R600Instructions.td
index 05957d2..7126c82 100644
--- a/contrib/llvm/lib/Target/R600/R600Instructions.td
+++ b/contrib/llvm/lib/Target/R600/R600Instructions.td
@@ -335,10 +335,11 @@ def load_param : LoadParamFrag<load>;
 def load_param_exti8 : LoadParamFrag<az_extloadi8>;
 def load_param_exti16 : LoadParamFrag<az_extloadi16>;
 
-def isR600 : Predicate<"Subtarget.getGeneration() <= AMDGPUSubtarget::R700">;
+def isR600 : Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::R700">;
 
-def isR600toCayman : Predicate<
-                     "Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">;
+def isR600toCayman
+    : Predicate<
+          "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">;
 
 //===----------------------------------------------------------------------===//
 // R600 SDNodes
@@ -579,6 +580,7 @@ i32imm:$COUNT, i32imm:$Enabled),
   let ALT_CONST = 0;
   let WHOLE_QUAD_MODE = 0;
   let BARRIER = 1;
+  let isCodeGenOnly = 1;
   let UseNamedOperandTable = 1;
 
   let Inst{31-0} = Word0;
@@ -641,6 +643,7 @@ def FETCH_CLAUSE : AMDGPUInst <(outs),
   field bits<8> Inst;
   bits<8> num;
   let Inst = num;
+  let isCodeGenOnly = 1;
 }
 
 def ALU_CLAUSE : AMDGPUInst <(outs),
@@ -648,10 +651,13 @@ def ALU_CLAUSE : AMDGPUInst <(outs),
   field bits<8> Inst;
   bits<8> num;
   let Inst = num;
+  let isCodeGenOnly = 1;
 }
 
 def LITERALS : AMDGPUInst <(outs),
 (ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
+  let isCodeGenOnly = 1;
+
   field bits<64> Inst;
   bits<32> literal1;
   bits<32> literal2;
@@ -677,6 +683,11 @@ def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
 // TODO: Do these actually match the regular fmin/fmax behavior?
 def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>;
 def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>;
+// According to https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050%28v=vs.85%29.aspx
+// DX10 min/max returns the other operand if one is NaN,
+// this matches http://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic
+def MAX_DX10 : R600_2OP_Helper <0x5, "MAX_DX10", fmaxnum>;
+def MIN_DX10 : R600_2OP_Helper <0x6, "MIN_DX10", fminnum>;
 
 // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
 // so some of the instruction names don't match the asm string.
@@ -914,7 +925,7 @@ class MULADD_Common <bits<5> inst> : R600_3OP <
 
 class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
   inst, "MULADD_IEEE",
-  [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
+  [(set f32:$dst, (fmad f32:$src0, f32:$src1, f32:$src2))]
 >;
 
 class FMA_Common <bits<5> inst> : R600_3OP <
@@ -1142,16 +1153,6 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie
   (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
 >;
 
-// FROUND pattern
-class FROUNDPat<Instruction CNDGE, Instruction CNDGT> : Pat <
-  (AMDGPUround f32:$x),
-  (CNDGE $x,
-  (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)),
-  (CNDGT (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x))
-  )
->;
-
-
 //===----------------------------------------------------------------------===//
 // R600 / R700 Instructions
 //===----------------------------------------------------------------------===//
@@ -1195,8 +1196,6 @@ let Predicates = [isR600] in {
   def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
   def : RsqPat<RECIPSQRT_IEEE_r600, f32>;
 
-  def : FROUNDPat <CNDGE_r600, CNDGT_r600>;
-
   def R600_ExportSwz : ExportSwzInst {
     let Word1{20-17} = 0; // BURST_COUNT
     let Word1{21} = eop;
@@ -1249,6 +1248,7 @@ let Predicates = [isR600] in {
   def CF_PUSH_ELSE_R600 : CF_CLAUSE_R600<12, (ins i32imm:$ADDR),
   "PUSH_ELSE @$ADDR"> {
     let CNT = 0;
+    let POP_COUNT = 0; // FIXME?
   }
   def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
   "ELSE @$ADDR POP:$POP_COUNT"> {
diff --git a/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp b/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp
index d782713..bcde5fb 100644
--- a/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/R600/R600MachineScheduler.cpp
@@ -16,7 +16,7 @@
 #include "AMDGPUSubtarget.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Pass.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -26,17 +26,16 @@ using namespace llvm;
 void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
   assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness");
   DAG = static_cast<ScheduleDAGMILive*>(dag);
+  const AMDGPUSubtarget &ST = DAG->MF.getSubtarget<AMDGPUSubtarget>();
   TII = static_cast<const R600InstrInfo*>(DAG->TII);
   TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
-  VLIW5 = !DAG->MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
+  VLIW5 = !ST.hasCaymanISA();
   MRI = &DAG->MRI;
   CurInstKind = IDOther;
   CurEmitted = 0;
   OccupedSlotsMask = 31;
   InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
   InstKindLimit[IDOther] = 32;
-
-  const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
   InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
   AluInstCount = 0;
   FetchInstCount = 0;
diff --git a/contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp
index 742c0e0..0c06ccc 100644
--- a/contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp
+++ b/contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp
@@ -27,10 +27,9 @@
 /// to reduce MOV count.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Debug.h"
 #include "AMDGPU.h"
-#include "R600InstrInfo.h"
 #include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -38,6 +37,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/contrib/llvm/lib/Target/R600/R600Packetizer.cpp b/contrib/llvm/lib/Target/R600/R600Packetizer.cpp
index ddf68c9..deee5bc 100644
--- a/contrib/llvm/lib/Target/R600/R600Packetizer.cpp
+++ b/contrib/llvm/lib/Target/R600/R600Packetizer.cpp
@@ -153,7 +153,7 @@ public:
         TII(static_cast<const R600InstrInfo *>(
             MF.getSubtarget().getInstrInfo())),
         TRI(TII->getRegisterInfo()) {
-    VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
+    VLIW5 = !MF.getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
   }
 
   // initPacketizerState - initialize some internal flags.
diff --git a/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp b/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp
index dc95675..fb0359c 100644
--- a/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp
@@ -20,14 +20,16 @@
 
 using namespace llvm;
 
-R600RegisterInfo::R600RegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPURegisterInfo(st)
-  { RCW.RegWeight = 0; RCW.WeightLimit = 0;}
+R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() {
+  RCW.RegWeight = 0;
+  RCW.WeightLimit = 0;
+}
 
 BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
 
-  const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(ST.getInstrInfo());
+  const R600InstrInfo *TII =
+      static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
 
   Reserved.set(AMDGPU::ZERO);
   Reserved.set(AMDGPU::HALF);
diff --git a/contrib/llvm/lib/Target/R600/R600RegisterInfo.h b/contrib/llvm/lib/Target/R600/R600RegisterInfo.h
index f1a8a41..9713e60 100644
--- a/contrib/llvm/lib/Target/R600/R600RegisterInfo.h
+++ b/contrib/llvm/lib/Target/R600/R600RegisterInfo.h
@@ -24,7 +24,7 @@ class AMDGPUSubtarget;
 struct R600RegisterInfo : public AMDGPURegisterInfo {
   RegClassWeight RCW;
 
-  R600RegisterInfo(const AMDGPUSubtarget &st);
+  R600RegisterInfo();
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
diff --git a/contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
index 419ec8b..2fc7b02 100644
--- a/contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
+++ b/contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
@@ -162,7 +162,7 @@ class R600TextureIntrinsicsReplacer :
     Value *SamplerId = I.getArgOperand(2);
 
     unsigned TextureType =
-        dyn_cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+        cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
 
     unsigned SrcSelect[4] = { 0, 1, 2, 3 };
     unsigned CT[4] = {1, 1, 1, 1};
@@ -186,7 +186,7 @@ class R600TextureIntrinsicsReplacer :
     Value *SamplerId = I.getArgOperand(5);
 
     unsigned TextureType =
-        dyn_cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
+        cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
 
     unsigned SrcSelect[4] = { 0, 1, 2, 3 };
     unsigned CT[4] = {1, 1, 1, 1};
diff --git a/contrib/llvm/lib/Target/R600/R700Instructions.td b/contrib/llvm/lib/Target/R600/R700Instructions.td
index 9aad85d..613a0d7 100644
--- a/contrib/llvm/lib/Target/R600/R700Instructions.td
+++ b/contrib/llvm/lib/Target/R600/R700Instructions.td
@@ -13,7 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-def isR700 : Predicate<"Subtarget.getGeneration() == AMDGPUSubtarget::R700">;
+def isR700 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::R700">;
 
 let Predicates = [isR700] in {
   def SIN_r700 : SIN_Common<0x6E>;
diff --git a/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp b/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp
index b8165fb..ccfbf1b 100644
--- a/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp
+++ b/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -102,7 +102,7 @@ public:
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<LoopInfo>();
+    AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     FunctionPass::getAnalysisUsage(AU);
@@ -312,7 +312,8 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
       if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end())
         Preds.push_back(*PI);
     }
-    BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", this);
+    BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", nullptr, DT,
+                                      LI, false);
   }
 
   CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
@@ -322,7 +323,7 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
 /// recognize if/then/else and loops.
 bool SIAnnotateControlFlow::runOnFunction(Function &F) {
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
 
   for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
        E = df_end(&F.getEntryBlock()); I != E; ++I) {
diff --git a/contrib/llvm/lib/Target/R600/SIDefines.h b/contrib/llvm/lib/Target/R600/SIDefines.h
index b540140..4727d97 100644
--- a/contrib/llvm/lib/Target/R600/SIDefines.h
+++ b/contrib/llvm/lib/Target/R600/SIDefines.h
@@ -36,7 +36,8 @@ enum {
   DS = 1 << 17,
   MIMG = 1 << 18,
   FLAT = 1 << 19,
-  WQM = 1 << 20
+  WQM = 1 << 20,
+  VGPRSpill = 1 << 21
 };
 }
 
diff --git a/contrib/llvm/lib/Target/R600/SIFixControlFlowLiveIntervals.cpp b/contrib/llvm/lib/Target/R600/SIFixControlFlowLiveIntervals.cpp
new file mode 100644
index 0000000..5fe8d19
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIFixControlFlowLiveIntervals.cpp
@@ -0,0 +1,96 @@
+//===-- SIFixControlFlowLiveIntervals.cpp - Fix CF live intervals ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Spilling of EXEC masks used for control flow messes up control flow
+/// lowering, so mark all live intervals associated with CF instructions as
+/// non-spillable.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-fix-cf-live-intervals"
+
+namespace {
+
+class SIFixControlFlowLiveIntervals : public MachineFunctionPass {
+public:
+  static char ID;
+
+public:
+  SIFixControlFlowLiveIntervals() : MachineFunctionPass(ID) {
+    initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "SI Fix CF Live Intervals";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LiveIntervals>();
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIFixControlFlowLiveIntervals, DEBUG_TYPE,
+                      "SI Fix CF Live Intervals", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(SIFixControlFlowLiveIntervals, DEBUG_TYPE,
+                    "SI Fix CF Live Intervals", false, false)
+
+char SIFixControlFlowLiveIntervals::ID = 0;
+
+char &llvm::SIFixControlFlowLiveIntervalsID = SIFixControlFlowLiveIntervals::ID;
+
+FunctionPass *llvm::createSIFixControlFlowLiveIntervalsPass() {
+  return new SIFixControlFlowLiveIntervals();
+}
+
+bool SIFixControlFlowLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
+  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
+
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      switch (MI.getOpcode()) {
+        case AMDGPU::SI_IF:
+        case AMDGPU::SI_ELSE:
+        case AMDGPU::SI_BREAK:
+        case AMDGPU::SI_IF_BREAK:
+        case AMDGPU::SI_ELSE_BREAK:
+        case AMDGPU::SI_END_CF: {
+          unsigned Reg = MI.getOperand(0).getReg();
+          LIS->getInterval(Reg).markNotSpillable();
+          break;
+        }
+        default:
+          break;
+      }
+    }
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
index cd1b3ac..23502b4 100644
--- a/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -140,7 +140,7 @@ const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
   const TargetRegisterClass *RC
     = TargetRegisterInfo::isVirtualRegister(Reg) ?
     MRI.getRegClass(Reg) :
-    TRI->getRegClass(Reg);
+    TRI->getPhysRegClass(Reg);
 
   RC = TRI->getSubRegClass(RC, SubReg);
   for (MachineRegisterInfo::use_instr_iterator
@@ -183,15 +183,17 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
   unsigned SrcReg = Copy.getOperand(1).getReg();
   unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
 
-  const TargetRegisterClass *DstRC
-    = TargetRegisterInfo::isVirtualRegister(DstReg) ?
-    MRI.getRegClass(DstReg) :
-    TRI->getRegClass(DstReg);
+  if (!TargetRegisterInfo::isVirtualRegister(DstReg)) {
+    // If the destination register is a physical register there isn't really
+    // much we can do to fix this.
+    return false;
+  }
+
+  const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
 
   const TargetRegisterClass *SrcRC;
 
   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
-      DstRC == &AMDGPU::M0RegRegClass ||
       MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
     return false;
 
diff --git a/contrib/llvm/lib/Target/R600/SIFixSGPRLiveRanges.cpp b/contrib/llvm/lib/Target/R600/SIFixSGPRLiveRanges.cpp
index f34c375..0c54446 100644
--- a/contrib/llvm/lib/Target/R600/SIFixSGPRLiveRanges.cpp
+++ b/contrib/llvm/lib/Target/R600/SIFixSGPRLiveRanges.cpp
@@ -54,6 +54,7 @@
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
diff --git a/contrib/llvm/lib/Target/R600/SIFoldOperands.cpp b/contrib/llvm/lib/Target/R600/SIFoldOperands.cpp
index cb24bba..d14e37a 100644
--- a/contrib/llvm/lib/Target/R600/SIFoldOperands.cpp
+++ b/contrib/llvm/lib/Target/R600/SIFoldOperands.cpp
@@ -17,9 +17,10 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 
 #define DEBUG_TYPE "si-fold-operands"
@@ -172,6 +173,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
       if (!isSafeToFold(MI.getOpcode()))
         continue;
 
+      unsigned OpSize = TII->getOpSize(MI, 1);
       MachineOperand &OpToFold = MI.getOperand(1);
       bool FoldingImm = OpToFold.isImm();
 
@@ -180,10 +182,10 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
       if (!FoldingImm && !OpToFold.isReg())
         continue;
 
-      // Folding immediates with more than one use will increase program side.
+      // Folding immediates with more than one use will increase program size.
       // FIXME: This will also reduce register usage, which may be better
       // in some cases.  A better heuristic is needed.
-      if (FoldingImm && !TII->isInlineConstant(OpToFold) &&
+      if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
           !MRI.hasOneUse(MI.getOperand(0).getReg()))
         continue;
 
@@ -202,7 +204,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
         const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
 
         // FIXME: Fold operands with subregs.
-        if (UseOp.isReg() && UseOp.getSubReg() && OpToFold.isReg()) {
+        if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
+            UseOp.isImplicit())) {
           continue;
         }
 
@@ -213,7 +216,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
           const TargetRegisterClass *UseRC
             = TargetRegisterInfo::isVirtualRegister(UseReg) ?
             MRI.getRegClass(UseReg) :
-            TRI.getRegClass(UseReg);
+            TRI.getPhysRegClass(UseReg);
 
           Imm = APInt(64, OpToFold.getImm());
 
@@ -237,7 +240,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
             const TargetRegisterClass *DestRC
               = TargetRegisterInfo::isVirtualRegister(DestReg) ?
               MRI.getRegClass(DestReg) :
-              TRI.getRegClass(DestReg);
+              TRI.getPhysRegClass(DestReg);
 
             unsigned MovOp = TII->getMovOpcode(DestRC);
             if (MovOp == AMDGPU::COPY)
diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
index 32ae605..52bf2ae 100644
--- a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
@@ -35,8 +35,9 @@
 
 using namespace llvm;
 
-SITargetLowering::SITargetLowering(TargetMachine &TM) :
-    AMDGPUTargetLowering(TM) {
+SITargetLowering::SITargetLowering(TargetMachine &TM,
+                                   const AMDGPUSubtarget &STI)
+    : AMDGPUTargetLowering(TM, STI) {
   addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
   addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
 
@@ -59,7 +60,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
   addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);
   addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
 
-  computeRegisterProperties();
+  computeRegisterProperties(STI.getRegisterInfo());
 
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
@@ -75,8 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::FSIN, MVT::f32, Custom);
   setOperationAction(ISD::FCOS, MVT::f32, Custom);
 
-  setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
-  setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
   setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
   setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
 
@@ -171,16 +170,12 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::UDIV, MVT::i64, Expand);
   setOperationAction(ISD::UREM, MVT::i64, Expand);
 
-  // We only support LOAD/STORE and vector manipulation ops for vectors
-  // with > 4 elements.
-  MVT VecTypes[] = {
-    MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32
-  };
-
   setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
   setOperationAction(ISD::SELECT, MVT::i1, Promote);
 
-  for (MVT VT : VecTypes) {
+  // We only support LOAD/STORE and vector manipulation ops for vectors
+  // with > 4 elements.
+  for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}) {
     for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
       switch(Op) {
       case ISD::LOAD:
@@ -205,10 +200,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
     setOperationAction(ISD::FCEIL, MVT::f64, Legal);
-    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
     setOperationAction(ISD::FRINT, MVT::f64, Legal);
   }
 
+  setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
   setOperationAction(ISD::FDIV, MVT::f32, Custom);
   setOperationAction(ISD::FDIV, MVT::f64, Custom);
 
@@ -368,8 +363,8 @@ SITargetLowering::getPreferredVectorAction(EVT VT) const {
 
 bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                                          Type *Ty) const {
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
-      getTargetMachine().getSubtargetImpl()->getInstrInfo());
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
   return TII->isInlineConstant(Imm);
 }
 
@@ -389,7 +384,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
   SDValue BasePtr =  DAG.getCopyFromReg(Chain, SL,
                            MRI.getLiveInVirtReg(InputPtrReg), MVT::i64);
   SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, BasePtr,
-                                             DAG.getConstant(Offset, MVT::i64));
+                            DAG.getConstant(Offset, SL, MVT::i64));
   SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
 
@@ -402,16 +397,11 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
 }
 
 SDValue SITargetLowering::LowerFormalArguments(
-                                      SDValue Chain,
-                                      CallingConv::ID CallConv,
-                                      bool isVarArg,
-                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                      SDLoc DL, SelectionDAG &DAG,
-                                      SmallVectorImpl<SDValue> &InVals) const {
-
-  const TargetMachine &TM = getTargetMachine();
+    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
+    SmallVectorImpl<SDValue> &InVals) const {
   const SIRegisterInfo *TRI =
-      static_cast<const SIRegisterInfo*>(TM.getSubtargetImpl()->getRegisterInfo());
+      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
 
   MachineFunction &MF = DAG.getMachineFunction();
   FunctionType *FType = MF.getFunction()->getFunctionType();
@@ -581,8 +571,7 @@ SDValue SITargetLowering::LowerFormalArguments(
 
       // Fill up the missing vector elements
       NumElements = Arg.VT.getVectorNumElements() - NumElements;
-      for (unsigned j = 0; j != NumElements; ++j)
-        Regs.push_back(DAG.getUNDEF(VT));
+      Regs.append(NumElements, DAG.getUNDEF(VT));
 
       InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, Regs));
       continue;
@@ -592,8 +581,8 @@ SDValue SITargetLowering::LowerFormalArguments(
   }
 
   if (Info->getShaderType() != ShaderType::COMPUTE) {
-    unsigned ScratchIdx = CCInfo.getFirstUnallocated(
-        AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs());
+    unsigned ScratchIdx = CCInfo.getFirstUnallocated(ArrayRef<MCPhysReg>(
+        AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()));
     Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
   }
   return Chain;
@@ -603,25 +592,14 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
     MachineInstr * MI, MachineBasicBlock * BB) const {
 
   MachineBasicBlock::iterator I = *MI;
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
-      getTargetMachine().getSubtargetImpl()->getInstrInfo());
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
 
   switch (MI->getOpcode()) {
   default:
     return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
-  case AMDGPU::BRANCH: return BB;
-  case AMDGPU::V_SUB_F64: {
-    unsigned DestReg = MI->getOperand(0).getReg();
-    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg)
-      .addImm(0)  // SRC0 modifiers
-      .addReg(MI->getOperand(1).getReg())
-      .addImm(1)  // SRC1 modifiers
-      .addReg(MI->getOperand(2).getReg())
-      .addImm(0)  // CLAMP
-      .addImm(0); // OMOD
-    MI->eraseFromParent();
-    break;
-  }
+  case AMDGPU::BRANCH:
+    return BB;
   case AMDGPU::SI_RegisterStorePseudo: {
     MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
     unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
@@ -638,6 +616,17 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
   return BB;
 }
 
+bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
+  // This currently forces unfolding various combinations of fsub into fma with
+  // free fneg'd operands. As long as we have fast FMA (controlled by
+  // isFMAFasterThanFMulAndFAdd), we should perform these.
+
+  // When fma is quarter rate, for f64 where add / sub are at best half rate,
+  // most of these combines appear to be cycle neutral but save on instruction
+  // count / code size.
+  return true;
+}
+
 EVT SITargetLowering::getSetCCResultType(LLVMContext &Ctx, EVT VT) const {
   if (!VT.isVector()) {
     return MVT::i1;
@@ -649,6 +638,21 @@ MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
   return MVT::i32;
 }
 
+// Answering this is somewhat tricky and depends on the specific device which
+// have different rates for fma or all f64 operations.
+//
+// v_fma_f64 and v_mul_f64 always take the same number of cycles as each other
+// regardless of which device (although the number of cycles differs between
+// devices), so it is always profitable for f64.
+//
+// v_fma_f32 takes 4 or 16 cycles depending on the device, so it is profitable
+// only on full rate devices. Normally, we should prefer selecting v_mad_f32
+// which we can always do even without fused FP ops since it returns the same
+// result as the separate operations and since it is always full
+// rate. Therefore, we lie and report that it is not faster for f32. v_mad_f32
+// however does not support denormals, so we do report fma as faster if we have
+// a fast fma device and require denormals.
+//
 bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
   VT = VT.getScalarType();
 
@@ -657,7 +661,11 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
 
   switch (VT.getSimpleVT().SimpleTy) {
   case MVT::f32:
-    return false; /* There is V_MAD_F32 for f32 */
+    // This is as fast on some subtargets. However, we always have full rate f32
+    // mad available which returns the same result as the separate operations
+    // which we should prefer over fma. We can't use this if we want to support
+    // denormals, so only report this in these cases.
+    return Subtarget->hasFP32Denormals() && Subtarget->hasFastFMAF32();
   case MVT::f64:
     return true;
   default:
@@ -753,15 +761,12 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
   assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
 
   // Build the result and
-  SmallVector<EVT, 4> Res;
-  for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i)
-    Res.push_back(Intr->getValueType(i));
+  ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());
 
   // operands of the new intrinsic call
   SmallVector<SDValue, 4> Ops;
   Ops.push_back(BRCOND.getOperand(0));
-  for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i)
-    Ops.push_back(Intr->getOperand(i));
+  Ops.append(Intr->op_begin() + 1, Intr->op_end());
   Ops.push_back(Target);
 
   // build the new intrinsic call
@@ -821,23 +826,40 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
   SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
 
   SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
-                              DAG.getConstant(0, MVT::i32));
+                              DAG.getConstant(0, DL, MVT::i32));
   SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
-                              DAG.getConstant(1, MVT::i32));
+                              DAG.getConstant(1, DL, MVT::i32));
 
   SDValue Lo = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i32, MVT::Glue),
                            PtrLo, GA);
   SDValue Hi = DAG.getNode(ISD::ADDE, DL, DAG.getVTList(MVT::i32, MVT::Glue),
-                           PtrHi, DAG.getConstant(0, MVT::i32),
+                           PtrHi, DAG.getConstant(0, DL, MVT::i32),
                            SDValue(Lo.getNode(), 1));
   return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
 }
 
+SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
+                                   SDValue V) const {
+  // We can't use CopyToReg, because MachineCSE won't combine COPY instructions,
+  // so we will end up with redundant moves to m0.
+  //
+  // We can't use S_MOV_B32, because there is no way to specify m0 as the
+  // destination register.
+  //
+  // We have to use them both.  Machine cse will combine all the S_MOV_B32
+  // instructions and the register coalescer eliminate the extra copies.
+  SDNode *M0 = DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, V.getValueType(), V);
+  return DAG.getCopyToReg(Chain, DL, DAG.getRegister(AMDGPU::M0, MVT::i32),
+                          SDValue(M0, 0), SDValue()); // Glue
+                                                      // A Null SDValue creates
+                                                      // a glue result.
+}
+
 SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                   SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   const SIRegisterInfo *TRI =
-      static_cast<const SIRegisterInfo*>(MF.getSubtarget().getRegisterInfo());
+      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
 
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
@@ -926,7 +948,28 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
     return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
                        DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
-
+  case AMDGPUIntrinsic::SI_fs_constant: {
+    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
+    SDValue Glue = M0.getValue(1);
+    return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
+                       DAG.getConstant(2, DL, MVT::i32), // P0
+                       Op.getOperand(1), Op.getOperand(2), Glue);
+  }
+  case AMDGPUIntrinsic::SI_fs_interp: {
+    SDValue IJ = Op.getOperand(4);
+    SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
+                            DAG.getConstant(0, DL, MVT::i32));
+    SDValue J = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
+                            DAG.getConstant(1, DL, MVT::i32));
+    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
+    SDValue Glue = M0.getValue(1);
+    SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL,
+                             DAG.getVTList(MVT::f32, MVT::Glue),
+                             I, Op.getOperand(1), Op.getOperand(2), Glue);
+    Glue = SDValue(P1.getNode(), 1);
+    return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
+                             Op.getOperand(1), Op.getOperand(2), Glue);
+  }
   default:
     return AMDGPUTargetLowering::LowerOperation(Op, DAG);
   }
@@ -935,12 +978,18 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
                                               SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
+  SDLoc DL(Op);
   SDValue Chain = Op.getOperand(0);
   unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 
   switch (IntrinsicID) {
+  case AMDGPUIntrinsic::SI_sendmsg: {
+    Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
+    SDValue Glue = Chain.getValue(1);
+    return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
+                       Op.getOperand(2), Glue);
+  }
   case AMDGPUIntrinsic::SI_tbuffer_store: {
-    SDLoc DL(Op);
     SDValue Ops[] = {
       Chain,
       Op.getOperand(2),
@@ -1013,8 +1062,8 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   SDLoc DL(Op);
   SDValue Cond = Op.getOperand(0);
 
-  SDValue Zero = DAG.getConstant(0, MVT::i32);
-  SDValue One = DAG.getConstant(1, MVT::i32);
+  SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
+  SDValue One = DAG.getConstant(1, DL, MVT::i32);
 
   SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1));
   SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(2));
@@ -1089,12 +1138,12 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
   SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
 
   const APFloat K0Val(BitsToFloat(0x6f800000));
-  const SDValue K0 = DAG.getConstantFP(K0Val, MVT::f32);
+  const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
 
   const APFloat K1Val(BitsToFloat(0x2f800000));
-  const SDValue K1 = DAG.getConstantFP(K1Val, MVT::f32);
+  const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
 
-  const SDValue One = DAG.getConstantFP(1.0, MVT::f32);
+  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
 
   EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
 
@@ -1119,7 +1168,7 @@ SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
   SDValue X = Op.getOperand(0);
   SDValue Y = Op.getOperand(1);
 
-  const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
+  const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
 
   SDVTList ScaleVT = DAG.getVTList(MVT::f64, MVT::i1);
 
@@ -1149,7 +1198,7 @@ SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
     // Workaround a hardware bug on SI where the condition output from div_scale
     // is not usable.
 
-    const SDValue Hi = DAG.getConstant(1, MVT::i32);
+    const SDValue Hi = DAG.getConstant(1, SL, MVT::i32);
 
     // Figure out if the scale to use for div_fmas.
     SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
@@ -1218,11 +1267,13 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 }
 
 SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
   EVT VT = Op.getValueType();
   SDValue Arg = Op.getOperand(0);
-  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
-        DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
-          DAG.getConstantFP(0.5 / M_PI, VT)));
+  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
+                                  DAG.getNode(ISD::FMUL, DL, VT, Arg,
+                                              DAG.getConstantFP(0.5/M_PI, DL,
+                                                                VT)));
 
   switch (Op.getOpcode()) {
   case ISD::FCOS:
@@ -1341,6 +1392,35 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
   return SDValue();
 }
 
+/// \brief Return true if the given offset Size in bytes can be folded into
+/// the immediate offsets of a memory instruction for the given address space.
+static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
+                          const AMDGPUSubtarget &STI) {
+  switch (AS) {
+  case AMDGPUAS::GLOBAL_ADDRESS: {
+    // MUBUF instructions a 12-bit offset in bytes.
+    return isUInt<12>(OffsetSize);
+  }
+  case AMDGPUAS::CONSTANT_ADDRESS: {
+    // SMRD instructions have an 8-bit offset in dwords on SI and
+    // a 20-bit offset in bytes on VI.
+    if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+      return isUInt<20>(OffsetSize);
+    else
+      return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
+  }
+  case AMDGPUAS::LOCAL_ADDRESS:
+  case AMDGPUAS::REGION_ADDRESS: {
+    // The single offset versions have a 16-bit offset in bytes.
+    return isUInt<16>(OffsetSize);
+  }
+  case AMDGPUAS::PRIVATE_ADDRESS:
+  // Indirect register addressing does not use any offsets.
+  default:
+    return 0;
+  }
+}
+
 // (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
 
 // This is a variant of
@@ -1372,13 +1452,10 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
   if (!CAdd)
     return SDValue();
 
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
-      getTargetMachine().getSubtargetImpl()->getInstrInfo());
-
   // If the resulting offset is too large, we can't fold it into the addressing
   // mode offset.
   APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
-  if (!TII->canFoldOffset(Offset.getZExtValue(), AddrSpace))
+  if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *Subtarget))
     return SDValue();
 
   SelectionDAG &DAG = DCI.DAG;
@@ -1386,7 +1463,7 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
   EVT VT = N->getValueType(0);
 
   SDValue ShlX = DAG.getNode(ISD::SHL, SL, VT, N0.getOperand(0), N1);
-  SDValue COffset = DAG.getConstant(Offset, MVT::i32);
+  SDValue COffset = DAG.getConstant(Offset, SL, MVT::i32);
 
   return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
 }
@@ -1435,8 +1512,9 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
                           SIInstrFlags::P_INFINITY)) & 0x3ff) == Mask,
                       "mask not equal");
 
-        return DAG.getNode(AMDGPUISD::FP_CLASS, SDLoc(N), MVT::i1,
-                           X, DAG.getConstant(Mask, MVT::i32));
+        SDLoc DL(N);
+        return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
+                           X, DAG.getConstant(Mask, DL, MVT::i32));
       }
     }
   }
@@ -1466,8 +1544,9 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
     static const uint32_t MaxMask = 0x3ff;
 
     uint32_t NewMask = (CLHS->getZExtValue() | CRHS->getZExtValue()) & MaxMask;
-    return DAG.getNode(AMDGPUISD::FP_CLASS, SDLoc(N), MVT::i1,
-                       Src, DAG.getConstant(NewMask, MVT::i32));
+    SDLoc DL(N);
+    return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
+                       Src, DAG.getConstant(NewMask, DL, MVT::i32));
   }
 
   return SDValue();
@@ -1481,7 +1560,7 @@ SDValue SITargetLowering::performClassCombine(SDNode *N,
   // fp_class x, 0 -> false
   if (const ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Mask)) {
     if (CMask->isNullValue())
-      return DAG.getConstant(0, MVT::i1);
+      return DAG.getConstant(0, SDLoc(N), MVT::i1);
   }
 
   return SDValue();
@@ -1565,8 +1644,8 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
     const APFloat &APF = CRHS->getValueAPF();
     if (APF.isInfinity() && !APF.isNegative()) {
       unsigned Mask = SIInstrFlags::P_INFINITY | SIInstrFlags::N_INFINITY;
-      return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1,
-                         LHS.getOperand(0), DAG.getConstant(Mask, MVT::i32));
+      return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, LHS.getOperand(0),
+                         DAG.getConstant(Mask, SL, MVT::i32));
     }
   }
 
@@ -1628,6 +1707,11 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
     if (VT != MVT::f32)
       break;
 
+    // Only do this if we are not trying to support denormals. v_mad_f32 does
+    // not support denormals ever.
+    if (Subtarget->hasFP32Denormals())
+      break;
+
     SDValue LHS = N->getOperand(0);
     SDValue RHS = N->getOperand(1);
 
@@ -1638,8 +1722,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
     if (LHS.getOpcode() == ISD::FADD) {
       SDValue A = LHS.getOperand(0);
       if (A == LHS.getOperand(1)) {
-        const SDValue Two = DAG.getConstantFP(2.0, MVT::f32);
-        return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, RHS);
+        const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
+        return DAG.getNode(ISD::FMAD, DL, VT, Two, A, RHS);
       }
     }
 
@@ -1647,12 +1731,12 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
     if (RHS.getOpcode() == ISD::FADD) {
       SDValue A = RHS.getOperand(0);
       if (A == RHS.getOperand(1)) {
-        const SDValue Two = DAG.getConstantFP(2.0, MVT::f32);
-        return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, LHS);
+        const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
+        return DAG.getNode(ISD::FMAD, DL, VT, Two, A, LHS);
       }
     }
 
-    break;
+    return SDValue();
   }
   case ISD::FSUB: {
     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
@@ -1662,39 +1746,22 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
 
     // Try to get the fneg to fold into the source modifier. This undoes generic
     // DAG combines and folds them into the mad.
-    if (VT == MVT::f32) {
+    //
+    // Only do this if we are not trying to support denormals. v_mad_f32 does
+    // not support denormals ever.
+    if (VT == MVT::f32 &&
+        !Subtarget->hasFP32Denormals()) {
       SDValue LHS = N->getOperand(0);
       SDValue RHS = N->getOperand(1);
-
-      if (LHS.getOpcode() == ISD::FMUL) {
-        // (fsub (fmul a, b), c) -> mad a, b, (fneg c)
-
-        SDValue A = LHS.getOperand(0);
-        SDValue B = LHS.getOperand(1);
-        SDValue C = DAG.getNode(ISD::FNEG, DL, VT, RHS);
-
-        return DAG.getNode(AMDGPUISD::MAD, DL, VT, A, B, C);
-      }
-
-      if (RHS.getOpcode() == ISD::FMUL) {
-        // (fsub c, (fmul a, b)) -> mad (fneg a), b, c
-
-        SDValue A = DAG.getNode(ISD::FNEG, DL, VT, RHS.getOperand(0));
-        SDValue B = RHS.getOperand(1);
-        SDValue C = LHS;
-
-        return DAG.getNode(AMDGPUISD::MAD, DL, VT, A, B, C);
-      }
-
       if (LHS.getOpcode() == ISD::FADD) {
         // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
 
         SDValue A = LHS.getOperand(0);
         if (A == LHS.getOperand(1)) {
-          const SDValue Two = DAG.getConstantFP(2.0, MVT::f32);
+          const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
           SDValue NegRHS = DAG.getNode(ISD::FNEG, DL, VT, RHS);
 
-          return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, NegRHS);
+          return DAG.getNode(ISD::FMAD, DL, VT, Two, A, NegRHS);
         }
       }
 
@@ -1703,10 +1770,12 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
 
         SDValue A = RHS.getOperand(0);
         if (A == RHS.getOperand(1)) {
-          const SDValue NegTwo = DAG.getConstantFP(-2.0, MVT::f32);
-          return DAG.getNode(AMDGPUISD::MAD, DL, VT, NegTwo, A, LHS);
+          const SDValue NegTwo = DAG.getConstantFP(-2.0, DL, MVT::f32);
+          return DAG.getNode(ISD::FMAD, DL, VT, NegTwo, A, LHS);
         }
       }
+
+      return SDValue();
     }
 
     break;
@@ -1740,9 +1809,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
     if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
       SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
       if (NewPtr) {
-        SmallVector<SDValue, 8> NewOps;
-        for (unsigned I = 0, E = MemNode->getNumOperands(); I != E; ++I)
-          NewOps.push_back(MemNode->getOperand(I));
+        SmallVector<SDValue, 8> NewOps(MemNode->op_begin(), MemNode->op_end());
 
         NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr;
         return SDValue(DAG.UpdateNodeOperands(MemNode, NewOps), 0);
@@ -1760,33 +1827,21 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
   return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
 }
 
-/// \brief Test if RegClass is one of the VSrc classes
-static bool isVSrc(unsigned RegClass) {
-  switch(RegClass) {
-    default: return false;
-    case AMDGPU::VS_32RegClassID:
-    case AMDGPU::VS_64RegClassID:
-      return true;
-  }
-}
-
 /// \brief Analyze the possible immediate value Op
 ///
 /// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
 /// and the immediate value if it's a literal immediate
 int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
 
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
-    getTargetMachine().getSubtargetImpl()->getInstrInfo());
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
 
   if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
-    if (Node->getZExtValue() >> 32)
-      return -1;
-
     if (TII->isInlineConstant(Node->getAPIntValue()))
       return 0;
 
-    return Node->getZExtValue();
+    uint64_t Val = Node->getZExtValue();
+    return isUInt<32>(Val) ? Val : -1;
   }
 
   if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) {
@@ -1802,69 +1857,6 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
   return -1;
 }
 
-const TargetRegisterClass *SITargetLowering::getRegClassForNode(
-                                   SelectionDAG &DAG, const SDValue &Op) const {
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
-      getTargetMachine().getSubtargetImpl()->getInstrInfo());
-  const SIRegisterInfo &TRI = TII->getRegisterInfo();
-
-  if (!Op->isMachineOpcode()) {
-    switch(Op->getOpcode()) {
-    case ISD::CopyFromReg: {
-      MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
-      unsigned Reg = cast<RegisterSDNode>(Op->getOperand(1))->getReg();
-      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-        return MRI.getRegClass(Reg);
-      }
-      return TRI.getPhysRegClass(Reg);
-    }
-    default:  return nullptr;
-    }
-  }
-  const MCInstrDesc &Desc = TII->get(Op->getMachineOpcode());
-  int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass;
-  if (OpClassID != -1) {
-    return TRI.getRegClass(OpClassID);
-  }
-  switch(Op.getMachineOpcode()) {
-  case AMDGPU::COPY_TO_REGCLASS:
-    // Operand 1 is the register class id for COPY_TO_REGCLASS instructions.
-    OpClassID = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
-
-    // If the COPY_TO_REGCLASS instruction is copying to a VSrc register
-    // class, then the register class for the value could be either a
-    // VReg or and SReg.  In order to get a more accurate
-    if (isVSrc(OpClassID))
-      return getRegClassForNode(DAG, Op.getOperand(0));
-
-    return TRI.getRegClass(OpClassID);
-  case AMDGPU::EXTRACT_SUBREG: {
-    int SubIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-    const TargetRegisterClass *SuperClass =
-      getRegClassForNode(DAG, Op.getOperand(0));
-    return TRI.getSubClassWithSubReg(SuperClass, SubIdx);
-  }
-  case AMDGPU::REG_SEQUENCE:
-    // Operand 0 is the register class id for REG_SEQUENCE instructions.
-    return TRI.getRegClass(
-      cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue());
-  default:
-    return getRegClassFor(Op.getSimpleValueType());
-  }
-}
-
-/// \brief Does "Op" fit into register class "RegClass" ?
-bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
-                                    unsigned RegClass) const {
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const TargetRegisterClass *RC = getRegClassForNode(DAG, Op);
-  if (!RC) {
-    return false;
-  }
-  return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
-}
-
 /// \brief Helper function for adjustWritemask
 static unsigned SubIdx2Lane(unsigned Idx) {
   switch (Idx) {
@@ -1921,15 +1913,15 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
 
   // Adjust the writemask in the node
   std::vector<SDValue> Ops;
-  Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32));
-  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
-    Ops.push_back(Node->getOperand(i));
+  Ops.push_back(DAG.getTargetConstant(NewDmask, SDLoc(Node), MVT::i32));
+  Ops.insert(Ops.end(), Node->op_begin() + 1, Node->op_end());
   Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops);
 
   // If we only got one lane, replace it with a copy
   // (if NewDmask has only one bit set...)
   if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
-    SDValue RC = DAG.getTargetConstant(AMDGPU::VGPR_32RegClassID, MVT::i32);
+    SDValue RC = DAG.getTargetConstant(AMDGPU::VGPR_32RegClassID, SDLoc(),
+                                       MVT::i32);
     SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
                                       SDLoc(), Users[Lane]->getValueType(0),
                                       SDValue(Node, 0), RC);
@@ -1944,7 +1936,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
     if (!User)
       continue;
 
-    SDValue Op = DAG.getTargetConstant(Idx, MVT::i32);
+    SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
     DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
 
     switch (Idx) {
@@ -1981,9 +1973,8 @@ void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
 /// \brief Fold the instructions after selecting them.
 SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
                                           SelectionDAG &DAG) const {
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
-      getTargetMachine().getSubtargetImpl()->getInstrInfo());
-  Node = AdjustRegClass(Node, DAG);
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
 
   if (TII->isMIMG(Node->getMachineOpcode()))
     adjustWritemask(Node, DAG);
@@ -2000,8 +1991,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
 /// bits set in the writemask
 void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
                                                      SDNode *Node) const {
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
-      getTargetMachine().getSubtargetImpl()->getInstrInfo());
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
 
   MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
   TII->legalizeOperands(MI);
@@ -2040,26 +2031,26 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
 }
 
 static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val) {
-  SDValue K = DAG.getTargetConstant(Val, MVT::i32);
+  SDValue K = DAG.getTargetConstant(Val, DL, MVT::i32);
   return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, K), 0);
 }
 
 MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
                                                 SDLoc DL,
                                                 SDValue Ptr) const {
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
-      getTargetMachine().getSubtargetImpl()->getInstrInfo());
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
 #if 1
     // XXX - Workaround for moveToVALU not handling different register class
     // inserts for REG_SEQUENCE.
 
     // Build the half of the subregister with the constants.
     const SDValue Ops0[] = {
-      DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, MVT::i32),
+      DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
       buildSMovImm32(DAG, DL, 0),
-      DAG.getTargetConstant(AMDGPU::sub0, MVT::i32),
+      DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
       buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
-      DAG.getTargetConstant(AMDGPU::sub1, MVT::i32)
+      DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
     };
 
     SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
@@ -2067,11 +2058,11 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
 
     // Combine the constants and the pointer.
     const SDValue Ops1[] = {
-      DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
+      DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
       Ptr,
-      DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
+      DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
       SubRegHi,
-      DAG.getTargetConstant(AMDGPU::sub2_sub3, MVT::i32)
+      DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
     };
 
     return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
@@ -2104,7 +2095,8 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
   SDValue PtrHi = DAG.getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
   if (RsrcDword1) {
     PtrHi = SDValue(DAG.getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
-                                     DAG.getConstant(RsrcDword1, MVT::i32)), 0);
+                                     DAG.getConstant(RsrcDword1, DL, MVT::i32)),
+                    0);
   }
 
   SDValue DataLo = buildSMovImm32(DAG, DL,
@@ -2112,15 +2104,15 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
   SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32);
 
   const SDValue Ops[] = {
-    DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
+    DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
     PtrLo,
-    DAG.getTargetConstant(AMDGPU::sub0, MVT::i32),
+    DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
     PtrHi,
-    DAG.getTargetConstant(AMDGPU::sub1, MVT::i32),
+    DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
     DataLo,
-    DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
+    DAG.getTargetConstant(AMDGPU::sub2, DL, MVT::i32),
     DataHi,
-    DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
+    DAG.getTargetConstant(AMDGPU::sub3, DL, MVT::i32)
   };
 
   return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
@@ -2129,64 +2121,14 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
 MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
                                                   SDLoc DL,
                                                   SDValue Ptr) const {
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
-      getTargetMachine().getSubtargetImpl()->getInstrInfo());
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
   uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
                   0xffffffff; // Size
 
   return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
 }
 
-MachineSDNode *SITargetLowering::AdjustRegClass(MachineSDNode *N,
-                                                SelectionDAG &DAG) const {
-
-  SDLoc DL(N);
-  unsigned NewOpcode = N->getMachineOpcode();
-
-  switch (N->getMachineOpcode()) {
-  default: return N;
-  case AMDGPU::S_LOAD_DWORD_IMM:
-    NewOpcode = AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
-    // Fall-through
-  case AMDGPU::S_LOAD_DWORDX2_SGPR:
-    if (NewOpcode == N->getMachineOpcode()) {
-      NewOpcode = AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
-    }
-    // Fall-through
-  case AMDGPU::S_LOAD_DWORDX4_IMM:
-  case AMDGPU::S_LOAD_DWORDX4_SGPR: {
-    if (NewOpcode == N->getMachineOpcode()) {
-      NewOpcode = AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
-    }
-    if (fitsRegClass(DAG, N->getOperand(0), AMDGPU::SReg_64RegClassID)) {
-      return N;
-    }
-    ConstantSDNode *Offset = cast<ConstantSDNode>(N->getOperand(1));
-
-    const SDValue Zero64 = DAG.getTargetConstant(0, MVT::i64);
-    SDValue Ptr(DAG.getMachineNode(AMDGPU::S_MOV_B64, DL, MVT::i64, Zero64), 0);
-    MachineSDNode *RSrc = wrapAddr64Rsrc(DAG, DL, Ptr);
-
-    SmallVector<SDValue, 8> Ops;
-    Ops.push_back(SDValue(RSrc, 0));
-    Ops.push_back(N->getOperand(0));
-
-    // The immediate offset is in dwords on SI and in bytes on VI.
-    if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
-      Ops.push_back(DAG.getTargetConstant(Offset->getSExtValue(), MVT::i32));
-    else
-      Ops.push_back(DAG.getTargetConstant(Offset->getSExtValue() << 2, MVT::i32));
-
-    // Copy remaining operands so we keep any chain and glue nodes that follow
-    // the normal operands.
-    for (unsigned I = 2, E = N->getNumOperands(); I != E; ++I)
-      Ops.push_back(N->getOperand(I));
-
-    return DAG.getMachineNode(NewOpcode, DL, N->getVTList(), Ops);
-  }
-  }
-}
-
 SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
                                                const TargetRegisterClass *RC,
                                                unsigned Reg, EVT VT) const {
@@ -2195,3 +2137,38 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
                             cast<RegisterSDNode>(VReg)->getReg(), VT);
 }
+
+//===----------------------------------------------------------------------===//
+//                         SI Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass *>
+SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                               const std::string &Constraint,
+                                               MVT VT) const {
+  if (Constraint == "r") {
+    switch(VT.SimpleTy) {
+      default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
+      case MVT::i64:
+        return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
+      case MVT::i32:
+        return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
+    }
+  }
+
+  if (Constraint.size() > 1) {
+    const TargetRegisterClass *RC = nullptr;
+    if (Constraint[1] == 'v') {
+      RC = &AMDGPU::VGPR_32RegClass;
+    } else if (Constraint[1] == 's') {
+      RC = &AMDGPU::SGPR_32RegClass;
+    }
+
+    if (RC) {
+      unsigned Idx = std::atoi(Constraint.substr(2).c_str());
+      if (Idx < RC->getNumRegs())
+        return std::make_pair(RC->getRegister(Idx), RC);
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.h b/contrib/llvm/lib/Target/R600/SIISelLowering.h
index 876fd8c..a95354c 100644
--- a/contrib/llvm/lib/Target/R600/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/R600/SIISelLowering.h
@@ -42,13 +42,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
   SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
 
-  const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG,
-                                                const SDValue &Op) const;
-  bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
-                    unsigned RegClass) const;
-
   void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
-  MachineSDNode *AdjustRegClass(MachineSDNode *N, SelectionDAG &DAG) const;
 
   SDValue performUCharToFloatCombine(SDNode *N,
                                      DAGCombinerInfo &DCI) const;
@@ -63,7 +57,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
   SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
 public:
-  SITargetLowering(TargetMachine &tm);
+  SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
 
   bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
                           EVT /*VT*/) const override;
@@ -95,6 +89,7 @@ public:
 
   MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
                                       MachineBasicBlock * BB) const override;
+  bool enableAggressiveFMAFusion(EVT VT) const override;
   EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
   MVT getScalarShiftAmountTy(EVT VT) const override;
   bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
@@ -118,6 +113,11 @@ public:
   MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
                                   SDLoc DL,
                                   SDValue Ptr) const;
+
+  std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
+                                   const TargetRegisterInfo *TRI,
+                                   const std::string &Constraint, MVT VT) const override;
+  SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const;
 };
 
 } // End namespace llvm
diff --git a/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp b/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
index 50f20ac..90a37f1 100644
--- a/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
+++ b/contrib/llvm/lib/Target/R600/SIInsertWaits.cpp
@@ -259,7 +259,8 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
     return;
   }
 
-  if (TRI->ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+  if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
+      AMDGPUSubtarget::VOLCANIC_ISLANDS) {
     // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
     // or SMEM clause, respectively.
     //
@@ -412,7 +413,8 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
 
 void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I) {
-  if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+  if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <
+      AMDGPUSubtarget::VOLCANIC_ISLANDS)
     return;
 
   // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
diff --git a/contrib/llvm/lib/Target/R600/SIInstrFormats.td b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
index b825208..3dddd24 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrFormats.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
@@ -39,6 +39,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
   field bits<1> MIMG = 0;
   field bits<1> FLAT = 0;
   field bits<1> WQM = 0;
+  field bits<1> VGPRSpill = 0;
 
   // These need to be kept in sync with the enum in SIInstrFlags.
   let TSFlags{0} = VM_CNT;
@@ -66,6 +67,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
   let TSFlags{18} = MIMG;
   let TSFlags{19} = FLAT;
   let TSFlags{20} = WQM;
+  let TSFlags{21} = VGPRSpill;
 
   // Most instructions require adjustments after selection to satisfy
   // operand requirements.
@@ -74,17 +76,18 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
 }
 
 class Enc32 {
-
   field bits<32> Inst;
   int Size = 4;
 }
 
 class Enc64 {
-
   field bits<64> Inst;
   int Size = 8;
 }
 
+class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
+def VOPDstVCC : VOPDstOperand <VCCReg>;
+
 let Uses = [EXEC] in {
 
 class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -98,7 +101,7 @@ class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
 }
 
 class VOPCCommon <dag ins, string asm, list<dag> pattern> :
-    VOPAnyCommon <(outs VCCReg:$dst), ins, asm, pattern> {
+    VOPAnyCommon <(outs VOPDstVCC:$dst), ins, asm, pattern> {
 
   let DisableEncoding = "$dst";
   let VOPC = 1;
@@ -129,6 +132,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
   let AddedComplexity = -1000;
 
   let VOP3 = 1;
+  let VALU = 1;
+
+  let AsmMatchConverter = "cvtVOP3";
+  let isCodeGenOnly = 0;
+
   int Size = 8;
 }
 
@@ -139,53 +147,61 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
 //===----------------------------------------------------------------------===//
 
 class SOP1e <bits<8> op> : Enc32 {
+  bits<7> sdst;
+  bits<8> ssrc0;
 
-  bits<7> SDST;
-  bits<8> SSRC0;
-
-  let Inst{7-0} = SSRC0;
+  let Inst{7-0} = ssrc0;
   let Inst{15-8} = op;
-  let Inst{22-16} = SDST;
+  let Inst{22-16} = sdst;
   let Inst{31-23} = 0x17d; //encoding;
 }
 
 class SOP2e <bits<7> op> : Enc32 {
+  bits<7> sdst;
+  bits<8> ssrc0;
+  bits<8> ssrc1;
 
-  bits<7> SDST;
-  bits<8> SSRC0;
-  bits<8> SSRC1;
-
-  let Inst{7-0} = SSRC0;
-  let Inst{15-8} = SSRC1;
-  let Inst{22-16} = SDST;
+  let Inst{7-0} = ssrc0;
+  let Inst{15-8} = ssrc1;
+  let Inst{22-16} = sdst;
   let Inst{29-23} = op;
   let Inst{31-30} = 0x2; // encoding
 }
 
 class SOPCe <bits<7> op> : Enc32 {
+  bits<8> ssrc0;
+  bits<8> ssrc1;
 
-  bits<8> SSRC0;
-  bits<8> SSRC1;
-
-  let Inst{7-0} = SSRC0;
-  let Inst{15-8} = SSRC1;
+  let Inst{7-0} = ssrc0;
+  let Inst{15-8} = ssrc1;
   let Inst{22-16} = op;
   let Inst{31-23} = 0x17e;
 }
 
 class SOPKe <bits<5> op> : Enc32 {
+  bits <7> sdst;
+  bits <16> simm16;
 
-  bits <7> SDST;
-  bits <16> SIMM16;
-
-  let Inst{15-0} = SIMM16;
-  let Inst{22-16} = SDST;
+  let Inst{15-0} = simm16;
+  let Inst{22-16} = sdst;
   let Inst{27-23} = op;
   let Inst{31-28} = 0xb; //encoding
 }
 
-class SOPPe <bits<7> op> : Enc32 {
+class SOPK64e <bits<5> op> : Enc64 {
+  bits <7> sdst = 0;
+  bits <16> simm16;
+  bits <32> imm;
+
+  let Inst{15-0} = simm16;
+  let Inst{22-16} = sdst;
+  let Inst{27-23} = op;
+  let Inst{31-28} = 0xb;
+
+  let Inst{63-32} = imm;
+}
 
+class SOPPe <bits<7> op> : Enc32 {
   bits <16> simm16;
 
   let Inst{15-0} = simm16;
@@ -194,15 +210,14 @@ class SOPPe <bits<7> op> : Enc32 {
 }
 
 class SMRDe <bits<5> op, bits<1> imm> : Enc32 {
+  bits<7> sdst;
+  bits<7> sbase;
+  bits<8> offset;
 
-  bits<7> SDST;
-  bits<7> SBASE;
-  bits<8> OFFSET;
-
-  let Inst{7-0} = OFFSET;
+  let Inst{7-0} = offset;
   let Inst{8} = imm;
-  let Inst{14-9} = SBASE{6-1};
-  let Inst{21-15} = SDST;
+  let Inst{14-9} = sbase{6-1};
+  let Inst{21-15} = sdst;
   let Inst{26-22} = op;
   let Inst{31-27} = 0x18; //encoding
 }
@@ -213,6 +228,7 @@ class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
+  let isCodeGenOnly = 0;
   let SALU = 1;
   let SOP1 = 1;
 }
@@ -223,6 +239,7 @@ class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
+  let isCodeGenOnly = 0;
   let SALU = 1;
   let SOP2 = 1;
 
@@ -238,6 +255,7 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
   let hasSideEffects = 0;
   let SALU = 1;
   let SOPC = 1;
+  let isCodeGenOnly = 0;
 
   let UseNamedOperandTable = 1;
 }
@@ -260,7 +278,6 @@ class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let isCodeGenOnly = 0;
   let SALU = 1;
   let SOPP = 1;
 
@@ -286,13 +303,12 @@ class SMRD <dag outs, dag ins, string asm, list<dag> pattern> :
 //===----------------------------------------------------------------------===//
 
 class VOP1e <bits<8> op> : Enc32 {
+  bits<8> vdst;
+  bits<9> src0;
 
-  bits<8> VDST;
-  bits<9> SRC0;
-
-  let Inst{8-0} = SRC0;
+  let Inst{8-0} = src0;
   let Inst{16-9} = op;
-  let Inst{24-17} = VDST;
+  let Inst{24-17} = vdst;
   let Inst{31-25} = 0x3f; //encoding
 }
 
@@ -324,8 +340,7 @@ class VOP2_MADKe <bits<6> op> : Enc64 {
 }
 
 class VOP3e <bits<9> op> : Enc64 {
-
-  bits<8> dst;
+  bits<8> vdst;
   bits<2> src0_modifiers;
   bits<9> src0;
   bits<2> src1_modifiers;
@@ -335,7 +350,7 @@ class VOP3e <bits<9> op> : Enc64 {
   bits<1> clamp;
   bits<2> omod;
 
-  let Inst{7-0} = dst;
+  let Inst{7-0} = vdst;
   let Inst{8} = src0_modifiers{1};
   let Inst{9} = src1_modifiers{1};
   let Inst{10} = src2_modifiers{1};
@@ -352,8 +367,7 @@ class VOP3e <bits<9> op> : Enc64 {
 }
 
 class VOP3be <bits<9> op> : Enc64 {
-
-  bits<8> dst;
+  bits<8> vdst;
   bits<2> src0_modifiers;
   bits<9> src0;
   bits<2> src1_modifiers;
@@ -363,7 +377,7 @@ class VOP3be <bits<9> op> : Enc64 {
   bits<7> sdst;
   bits<2> omod;
 
-  let Inst{7-0} = dst;
+  let Inst{7-0} = vdst;
   let Inst{14-8} = sdst;
   let Inst{25-17} = op;
   let Inst{31-26} = 0x34; //encoding
@@ -377,33 +391,30 @@ class VOP3be <bits<9> op> : Enc64 {
 }
 
 class VOPCe <bits<8> op> : Enc32 {
+  bits<9> src0;
+  bits<8> vsrc1;
 
-  bits<9> SRC0;
-  bits<8> VSRC1;
-
-  let Inst{8-0} = SRC0;
-  let Inst{16-9} = VSRC1;
+  let Inst{8-0} = src0;
+  let Inst{16-9} = vsrc1;
   let Inst{24-17} = op;
   let Inst{31-25} = 0x3e;
 }
 
 class VINTRPe <bits<2> op> : Enc32 {
+  bits<8> vdst;
+  bits<8> vsrc;
+  bits<2> attrchan;
+  bits<6> attr;
 
-  bits<8> VDST;
-  bits<8> VSRC;
-  bits<2> ATTRCHAN;
-  bits<6> ATTR;
-
-  let Inst{7-0} = VSRC;
-  let Inst{9-8} = ATTRCHAN;
-  let Inst{15-10} = ATTR;
+  let Inst{7-0} = vsrc;
+  let Inst{9-8} = attrchan;
+  let Inst{15-10} = attr;
   let Inst{17-16} = op;
-  let Inst{25-18} = VDST;
+  let Inst{25-18} = vdst;
   let Inst{31-26} = 0x32; // encoding
 }
 
 class DSe <bits<8> op> : Enc64 {
-
   bits<8> vdst;
   bits<1> gds;
   bits<8> addr;
@@ -424,7 +435,6 @@ class DSe <bits<8> op> : Enc64 {
 }
 
 class MUBUFe <bits<7> op> : Enc64 {
-
   bits<12> offset;
   bits<1> offen;
   bits<1> idxen;
@@ -455,67 +465,65 @@ class MUBUFe <bits<7> op> : Enc64 {
 }
 
 class MTBUFe <bits<3> op> : Enc64 {
+  bits<8> vdata;
+  bits<12> offset;
+  bits<1> offen;
+  bits<1> idxen;
+  bits<1> glc;
+  bits<1> addr64;
+  bits<4> dfmt;
+  bits<3> nfmt;
+  bits<8> vaddr;
+  bits<7> srsrc;
+  bits<1> slc;
+  bits<1> tfe;
+  bits<8> soffset;
 
-  bits<8> VDATA;
-  bits<12> OFFSET;
-  bits<1> OFFEN;
-  bits<1> IDXEN;
-  bits<1> GLC;
-  bits<1> ADDR64;
-  bits<4> DFMT;
-  bits<3> NFMT;
-  bits<8> VADDR;
-  bits<7> SRSRC;
-  bits<1> SLC;
-  bits<1> TFE;
-  bits<8> SOFFSET;
-
-  let Inst{11-0} = OFFSET;
-  let Inst{12} = OFFEN;
-  let Inst{13} = IDXEN;
-  let Inst{14} = GLC;
-  let Inst{15} = ADDR64;
+  let Inst{11-0} = offset;
+  let Inst{12} = offen;
+  let Inst{13} = idxen;
+  let Inst{14} = glc;
+  let Inst{15} = addr64;
   let Inst{18-16} = op;
-  let Inst{22-19} = DFMT;
-  let Inst{25-23} = NFMT;
+  let Inst{22-19} = dfmt;
+  let Inst{25-23} = nfmt;
   let Inst{31-26} = 0x3a; //encoding
-  let Inst{39-32} = VADDR;
-  let Inst{47-40} = VDATA;
-  let Inst{52-48} = SRSRC{6-2};
-  let Inst{54} = SLC;
-  let Inst{55} = TFE;
-  let Inst{63-56} = SOFFSET;
+  let Inst{39-32} = vaddr;
+  let Inst{47-40} = vdata;
+  let Inst{52-48} = srsrc{6-2};
+  let Inst{54} = slc;
+  let Inst{55} = tfe;
+  let Inst{63-56} = soffset;
 }
 
 class MIMGe <bits<7> op> : Enc64 {
-
-  bits<8> VDATA;
-  bits<4> DMASK;
-  bits<1> UNORM;
-  bits<1> GLC;
-  bits<1> DA;
-  bits<1> R128;
-  bits<1> TFE;
-  bits<1> LWE;
-  bits<1> SLC;
-  bits<8> VADDR;
-  bits<7> SRSRC;
-  bits<7> SSAMP;
-
-  let Inst{11-8} = DMASK;
-  let Inst{12} = UNORM;
-  let Inst{13} = GLC;
-  let Inst{14} = DA;
-  let Inst{15} = R128;
-  let Inst{16} = TFE;
-  let Inst{17} = LWE;
+  bits<8> vdata;
+  bits<4> dmask;
+  bits<1> unorm;
+  bits<1> glc;
+  bits<1> da;
+  bits<1> r128;
+  bits<1> tfe;
+  bits<1> lwe;
+  bits<1> slc;
+  bits<8> vaddr;
+  bits<7> srsrc;
+  bits<7> ssamp;
+
+  let Inst{11-8} = dmask;
+  let Inst{12} = unorm;
+  let Inst{13} = glc;
+  let Inst{14} = da;
+  let Inst{15} = r128;
+  let Inst{16} = tfe;
+  let Inst{17} = lwe;
   let Inst{24-18} = op;
-  let Inst{25} = SLC;
+  let Inst{25} = slc;
   let Inst{31-26} = 0x3c;
-  let Inst{39-32} = VADDR;
-  let Inst{47-40} = VDATA;
-  let Inst{52-48} = SRSRC{6-2};
-  let Inst{57-53} = SSAMP{6-2};
+  let Inst{39-32} = vaddr;
+  let Inst{47-40} = vdata;
+  let Inst{52-48} = srsrc{6-2};
+  let Inst{57-53} = ssamp{6-2};
 }
 
 class FLATe<bits<7> op> : Enc64 {
@@ -539,36 +547,40 @@ class FLATe<bits<7> op> : Enc64 {
 }
 
 class EXPe : Enc64 {
-  bits<4> EN;
-  bits<6> TGT;
-  bits<1> COMPR;
-  bits<1> DONE;
-  bits<1> VM;
-  bits<8> VSRC0;
-  bits<8> VSRC1;
-  bits<8> VSRC2;
-  bits<8> VSRC3;
-
-  let Inst{3-0} = EN;
-  let Inst{9-4} = TGT;
-  let Inst{10} = COMPR;
-  let Inst{11} = DONE;
-  let Inst{12} = VM;
+  bits<4> en;
+  bits<6> tgt;
+  bits<1> compr;
+  bits<1> done;
+  bits<1> vm;
+  bits<8> vsrc0;
+  bits<8> vsrc1;
+  bits<8> vsrc2;
+  bits<8> vsrc3;
+
+  let Inst{3-0} = en;
+  let Inst{9-4} = tgt;
+  let Inst{10} = compr;
+  let Inst{11} = done;
+  let Inst{12} = vm;
   let Inst{31-26} = 0x3e;
-  let Inst{39-32} = VSRC0;
-  let Inst{47-40} = VSRC1;
-  let Inst{55-48} = VSRC2;
-  let Inst{63-56} = VSRC3;
+  let Inst{39-32} = vsrc0;
+  let Inst{47-40} = vsrc1;
+  let Inst{55-48} = vsrc2;
+  let Inst{63-56} = vsrc3;
 }
 
 let Uses = [EXEC] in {
 
 class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
     VOP1Common <outs, ins, asm, pattern>,
-    VOP1e<op>;
+    VOP1e<op> {
+  let isCodeGenOnly = 0;
+}
 
 class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
-    VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
+    VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
+  let isCodeGenOnly = 0;
+}
 
 class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
     VOPCCommon <ins, asm, pattern>, VOPCe <op>;
@@ -594,7 +606,14 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
   let LGKM_CNT = 1;
   let DS = 1;
   let UseNamedOperandTable = 1;
-  let DisableEncoding = "$m0";
+  let Uses = [M0];
+
+  // Most instruction load and store data, so set this as the default.
+  let mayLoad = 1;
+  let mayStore = 1;
+
+  let hasSideEffects = 0;
+  let AsmMatchConverter = "cvtDS";
   let SchedRW = [WriteLDS];
 }
 
@@ -607,6 +626,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
 
   let hasSideEffects = 0;
   let UseNamedOperandTable = 1;
+  let AsmMatchConverter = "cvtMubuf";
   let SchedRW = [WriteVMEM];
 }
 
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
index 5ab33b4..d647c25 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
@@ -28,8 +28,7 @@
 using namespace llvm;
 
 SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
-  : AMDGPUInstrInfo(st),
-    RI(st) { }
+    : AMDGPUInstrInfo(st), RI() {}
 
 //===----------------------------------------------------------------------===//
 // TargetInstrInfo callbacks
@@ -75,6 +74,20 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
   return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
 }
 
+bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
+                                                    AliasAnalysis *AA) const {
+  // TODO: The generic check fails for VALU instructions that should be
+  // rematerializable due to implicit reads of exec. We really want all of the
+  // generic logic for this except for this.
+  switch (MI->getOpcode()) {
+  case AMDGPU::V_MOV_B32_e32:
+  case AMDGPU::V_MOV_B32_e64:
+    return true;
+  default:
+    return false;
+  }
+}
+
 bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
                                           int64_t &Offset0,
                                           int64_t &Offset1) const {
@@ -427,7 +440,9 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   }
 }
 
-unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
+unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
+  const unsigned Opcode = MI.getOpcode();
+
   int NewOpc;
 
   // Try to map original to commuted opcode
@@ -566,7 +581,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
                                                unsigned Size) const {
   MachineFunction *MF = MBB.getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
-  const AMDGPUSubtarget &ST = MF->getTarget().getSubtarget<AMDGPUSubtarget>();
+  const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
   const SIRegisterInfo *TRI =
       static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
   DebugLoc DL = MBB.findDebugLoc(MI);
@@ -592,10 +607,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
       unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
       unsigned InputPtrReg =
           TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
-      static const unsigned TIDIGRegs[3] = {
-        TIDIGXReg, TIDIGYReg, TIDIGZReg
-      };
-      for (unsigned Reg : TIDIGRegs) {
+      for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
         if (!Entry.isLiveIn(Reg))
           Entry.addLiveIn(Reg);
       }
@@ -729,6 +741,26 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
     MI->eraseFromParent();
     break;
   }
+
+  case AMDGPU::V_CNDMASK_B64_PSEUDO: {
+    unsigned Dst = MI->getOperand(0).getReg();
+    unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
+    unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
+    unsigned Src0 = MI->getOperand(1).getReg();
+    unsigned Src1 = MI->getOperand(2).getReg();
+    const MachineOperand &SrcCond = MI->getOperand(3);
+
+    BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
+        .addReg(RI.getSubReg(Src0, AMDGPU::sub0))
+        .addReg(RI.getSubReg(Src1, AMDGPU::sub0))
+        .addOperand(SrcCond);
+    BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
+        .addReg(RI.getSubReg(Src0, AMDGPU::sub1))
+        .addReg(RI.getSubReg(Src1, AMDGPU::sub1))
+        .addOperand(SrcCond);
+    MI->eraseFromParent();
+    break;
+  }
   }
   return true;
 }
@@ -759,7 +791,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
       (!isOperandLegal(MI, Src0Idx, &Src1) ||
        !isOperandLegal(MI, Src1Idx, &Src0))) {
     return nullptr;
-    }
+  }
 
   if (!Src1.isReg()) {
     // Allow commuting instructions with Imm operands.
@@ -801,7 +833,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
   }
 
   if (MI)
-    MI->setDesc(get(commuteOpcode(MI->getOpcode())));
+    MI->setDesc(get(commuteOpcode(*MI)));
 
   return MI;
 }
@@ -868,6 +900,134 @@ SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
   return RC != &AMDGPU::EXECRegRegClass;
 }
 
+static void removeModOperands(MachineInstr &MI) {
+  unsigned Opc = MI.getOpcode();
+  int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
+                                              AMDGPU::OpName::src0_modifiers);
+  int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
+                                              AMDGPU::OpName::src1_modifiers);
+  int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
+                                              AMDGPU::OpName::src2_modifiers);
+
+  MI.RemoveOperand(Src2ModIdx);
+  MI.RemoveOperand(Src1ModIdx);
+  MI.RemoveOperand(Src0ModIdx);
+}
+
+bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                                unsigned Reg, MachineRegisterInfo *MRI) const {
+  if (!MRI->hasOneNonDBGUse(Reg))
+    return false;
+
+  unsigned Opc = UseMI->getOpcode();
+  if (Opc == AMDGPU::V_MAD_F32) {
+    // Don't fold if we are using source modifiers. The new VOP2 instructions
+    // don't have them.
+    if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) ||
+        hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) ||
+        hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) {
+      return false;
+    }
+
+    MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0);
+    MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1);
+    MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2);
+
+    // Multiplied part is the constant: Use v_madmk_f32
+    // We should only expect these to be on src0 due to canonicalizations.
+    if (Src0->isReg() && Src0->getReg() == Reg) {
+      if (!Src1->isReg() ||
+          (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
+        return false;
+
+      if (!Src2->isReg() ||
+          (Src2->isReg() && RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))))
+        return false;
+
+      // We need to do some weird looking operand shuffling since the madmk
+      // operands are out of the normal expected order with the multiplied
+      // constant as the last operand.
+      //
+      // v_mad_f32 src0, src1, src2 -> v_madmk_f32 src0 * src2K + src1
+      // src0 -> src2 K
+      // src1 -> src0
+      // src2 -> src1
+
+      const int64_t Imm = DefMI->getOperand(1).getImm();
+
+      // FIXME: This would be a lot easier if we could return a new instruction
+      // instead of having to modify in place.
+
+      // Remove these first since they are at the end.
+      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+                                                      AMDGPU::OpName::omod));
+      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+                                                      AMDGPU::OpName::clamp));
+
+      unsigned Src1Reg = Src1->getReg();
+      unsigned Src1SubReg = Src1->getSubReg();
+      unsigned Src2Reg = Src2->getReg();
+      unsigned Src2SubReg = Src2->getSubReg();
+      Src0->setReg(Src1Reg);
+      Src0->setSubReg(Src1SubReg);
+      Src0->setIsKill(Src1->isKill());
+
+      Src1->setReg(Src2Reg);
+      Src1->setSubReg(Src2SubReg);
+      Src1->setIsKill(Src2->isKill());
+
+      Src2->ChangeToImmediate(Imm);
+
+      removeModOperands(*UseMI);
+      UseMI->setDesc(get(AMDGPU::V_MADMK_F32));
+
+      bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+      if (DeleteDef)
+        DefMI->eraseFromParent();
+
+      return true;
+    }
+
+    // Added part is the constant: Use v_madak_f32
+    if (Src2->isReg() && Src2->getReg() == Reg) {
+      // Not allowed to use constant bus for another operand.
+      // We can however allow an inline immediate as src0.
+      if (!Src0->isImm() &&
+          (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
+        return false;
+
+      if (!Src1->isReg() ||
+          (Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
+        return false;
+
+      const int64_t Imm = DefMI->getOperand(1).getImm();
+
+      // FIXME: This would be a lot easier if we could return a new instruction
+      // instead of having to modify in place.
+
+      // Remove these first since they are at the end.
+      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+                                                      AMDGPU::OpName::omod));
+      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+                                                      AMDGPU::OpName::clamp));
+
+      Src2->ChangeToImmediate(Imm);
+
+      // These come before src2.
+      removeModOperands(*UseMI);
+      UseMI->setDesc(get(AMDGPU::V_MADAK_F32));
+
+      bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+      if (DeleteDef)
+        DefMI->eraseFromParent();
+
+      return true;
+    }
+  }
+
+  return false;
+}
+
 bool
 SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
                                          AliasAnalysis *AA) const {
@@ -1001,15 +1161,25 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
          (FloatToBits(-4.0f) == Val);
 }
 
-bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
-  if (MO.isImm())
-    return isInlineConstant(APInt(32, MO.getImm(), true));
+bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
+                                   unsigned OpSize) const {
+  if (MO.isImm()) {
+    // MachineOperand provides no way to tell the true operand size, since it
+    // only records a 64-bit value. We need to know the size to determine if a
+    // 32-bit floating point immediate bit pattern is legal for an integer
+    // immediate. It would be for any 32-bit integer operand, but would not be
+    // for a 64-bit one.
+
+    unsigned BitSize = 8 * OpSize;
+    return isInlineConstant(APInt(BitSize, MO.getImm(), true));
+  }
 
   return false;
 }
 
-bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
-  return MO.isImm() && !isInlineConstant(MO);
+bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO,
+                                    unsigned OpSize) const {
+  return MO.isImm() && !isInlineConstant(MO, OpSize);
 }
 
 static bool compareMachineOp(const MachineOperand &Op0,
@@ -1039,38 +1209,13 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
   if (OpInfo.RegClass < 0)
     return false;
 
-  if (isLiteralConstant(MO))
+  unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
+  if (isLiteralConstant(MO, OpSize))
     return RI.opCanUseLiteralConstant(OpInfo.OperandType);
 
   return RI.opCanUseInlineConstant(OpInfo.OperandType);
 }
 
-bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const {
-  switch (AS) {
-  case AMDGPUAS::GLOBAL_ADDRESS: {
-    // MUBUF instructions a 12-bit offset in bytes.
-    return isUInt<12>(OffsetSize);
-  }
-  case AMDGPUAS::CONSTANT_ADDRESS: {
-    // SMRD instructions have an 8-bit offset in dwords on SI and
-    // a 20-bit offset in bytes on VI.
-    if (RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
-      return isUInt<20>(OffsetSize);
-    else
-      return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
-  }
-  case AMDGPUAS::LOCAL_ADDRESS:
-  case AMDGPUAS::REGION_ADDRESS: {
-    // The single offset versions have a 16-bit offset in bytes.
-    return isUInt<16>(OffsetSize);
-  }
-  case AMDGPUAS::PRIVATE_ADDRESS:
-    // Indirect register addressing does not use any offsets.
-  default:
-    return 0;
-  }
-}
-
 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
   int Op32 = AMDGPU::getVOPe32(Opcode);
   if (Op32 == -1)
@@ -1094,9 +1239,10 @@ bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
 }
 
 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
-                                  const MachineOperand &MO) const {
+                                  const MachineOperand &MO,
+                                  unsigned OpSize) const {
   // Literal constants use the constant bus.
-  if (isLiteralConstant(MO))
+  if (isLiteralConstant(MO, OpSize))
     return true;
 
   if (!MO.isReg() || !MO.isUse())
@@ -1152,7 +1298,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
 
     switch (Desc.OpInfo[i].OperandType) {
     case MCOI::OPERAND_REGISTER:
-      if (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm()) {
+      if (MI->getOperand(i).isImm()) {
         ErrInfo = "Illegal immediate value for operand.";
         return false;
       }
@@ -1160,7 +1306,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
     case AMDGPU::OPERAND_REG_IMM32:
       break;
     case AMDGPU::OPERAND_REG_INLINE_C:
-      if (isLiteralConstant(MI->getOperand(i))) {
+      if (isLiteralConstant(MI->getOperand(i),
+                            RI.getRegClass(RegClass)->getSize())) {
         ErrInfo = "Illegal immediate value for operand.";
         return false;
       }
@@ -1207,9 +1354,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
     for (int OpIdx : OpIndices) {
       if (OpIdx == -1)
         break;
-
       const MachineOperand &MO = MI->getOperand(OpIdx);
-      if (usesConstantBus(MRI, MO)) {
+      if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
         if (MO.isReg()) {
           if (MO.getReg() != SGPRUsed)
             ++ConstantBusCount;
@@ -1277,6 +1423,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
   case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
   case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
   case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+  case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
   case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
   case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
   case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
@@ -1313,7 +1460,7 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
 
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       return MRI.getRegClass(Reg);
-    return RI.getRegClass(Reg);
+    return RI.getPhysRegClass(Reg);
   }
 
   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
@@ -1457,14 +1604,16 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
   if (!MO)
     MO = &MI->getOperand(OpIdx);
 
-  if (isVALU(InstDesc.Opcode) && usesConstantBus(MRI, *MO)) {
+  if (isVALU(InstDesc.Opcode) &&
+      usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
     unsigned SGPRUsed =
         MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister;
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       if (i == OpIdx)
         continue;
-      if (usesConstantBus(MRI, MI->getOperand(i)) &&
-          MI->getOperand(i).isReg() && MI->getOperand(i).getReg() != SGPRUsed) {
+      const MachineOperand &Op = MI->getOperand(i);
+      if (Op.isReg() && Op.getReg() != SGPRUsed &&
+          usesConstantBus(MRI, Op, getOpSize(*MI, i))) {
         return false;
       }
     }
@@ -1557,7 +1706,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
           // We can use one SGPR in each VOP3 instruction.
           continue;
         }
-      } else if (!isLiteralConstant(MO)) {
+      } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) {
         // If it is not a register and not a literal constant, then it must be
         // an inline constant which is always legal.
         continue;
@@ -1730,20 +1879,21 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
       MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
       MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
       MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
-      assert(SOffset->isImm() && SOffset->getImm() == 0 && "Legalizing MUBUF "
-             "with non-zero soffset is not implemented");
-      (void)SOffset;
 
       // Create the new instruction.
       unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
       MachineInstr *Addr64 =
           BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
                   .addOperand(*VData)
-                  .addOperand(*SRsrc)
                   .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
                                               // This will be replaced later
                                               // with the new value of vaddr.
-                  .addOperand(*Offset);
+                  .addOperand(*SRsrc)
+                  .addOperand(*SOffset)
+                  .addOperand(*Offset)
+                  .addImm(0) // glc
+                  .addImm(0) // slc
+                  .addImm(0); // tfe
 
       MI->removeFromParent();
       MI = Addr64;
@@ -1787,14 +1937,20 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI,
 
   // The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes
   // on VI.
+
+  bool IsKill = SBase->isKill();
   if (OffOp) {
-    bool isVI = RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
+    bool isVI =
+        MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
+        AMDGPUSubtarget::VOLCANIC_ISLANDS;
     unsigned OffScale = isVI ? 1 : 4;
     // Handle the _IMM variant
     unsigned LoOffset = OffOp->getImm() * OffScale;
     unsigned HiOffset = LoOffset + HalfSize;
     Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
-                  .addOperand(*SBase)
+                  // Use addReg instead of addOperand
+                  // to make sure kill flag is cleared.
+                  .addReg(SBase->getReg(), 0, SBase->getSubReg())
                   .addImm(LoOffset / OffScale);
 
     if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) {
@@ -1803,25 +1959,28 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI,
       BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
               .addImm(HiOffset); // The offset in register is in bytes.
       Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
-                    .addOperand(*SBase)
+                    .addReg(SBase->getReg(), getKillRegState(IsKill),
+                            SBase->getSubReg())
                     .addReg(OffsetSGPR);
     } else {
       Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
-                     .addOperand(*SBase)
+                     .addReg(SBase->getReg(), getKillRegState(IsKill),
+                             SBase->getSubReg())
                      .addImm(HiOffset / OffScale);
     }
   } else {
     // Handle the _SGPR variant
     MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
     Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
-                  .addOperand(*SBase)
+                  .addReg(SBase->getReg(), 0, SBase->getSubReg())
                   .addOperand(*SOff);
     unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
     BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
             .addOperand(*SOff)
             .addImm(HalfSize);
     Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
-                  .addOperand(*SBase)
+                  .addReg(SBase->getReg(), getKillRegState(IsKill),
+                          SBase->getSubReg())
                   .addReg(OffsetSGPR);
   }
 
@@ -1876,7 +2035,8 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
         // SMRD instructions take a dword offsets on SI and byte offset on VI
         // and MUBUF instructions always take a byte offset.
         ImmOffset = MI->getOperand(2).getImm();
-        if (RI.ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+        if (MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <=
+            AMDGPUSubtarget::SEA_ISLANDS)
           ImmOffset <<= 2;
         RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 
@@ -1916,12 +2076,15 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
               .addImm(AMDGPU::sub3);
       MI->setDesc(get(NewOpcode));
       if (MI->getOperand(2).isReg()) {
-        MI->getOperand(2).setReg(MI->getOperand(1).getReg());
+        MI->getOperand(2).setReg(SRsrc);
       } else {
-        MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
+        MI->getOperand(2).ChangeToRegister(SRsrc, false);
       }
-      MI->getOperand(1).setReg(SRsrc);
+      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0));
       MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
+      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc
+      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc
+      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe
 
       const TargetRegisterClass *NewDstRC =
           RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.h b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
index 1298030..64b5120 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
@@ -72,6 +72,9 @@ public:
     return RI;
   }
 
+  bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
+                                         AliasAnalysis *AA) const override;
+
   bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
                                int64_t &Offset1,
                                int64_t &Offset2) const override;
@@ -114,7 +117,7 @@ public:
   // register.  If there is no hardware instruction that can store to \p
   // DstRC, then AMDGPU::COPY is returned.
   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
-  unsigned commuteOpcode(unsigned Opcode) const;
+  unsigned commuteOpcode(const MachineInstr &MI) const;
 
   MachineInstr *commuteInstruction(MachineInstr *MI,
                                    bool NewMI = false) const override;
@@ -136,6 +139,11 @@ public:
 
   bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
 
+  bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                     unsigned Reg, MachineRegisterInfo *MRI) const final;
+
+  unsigned getMachineCSELookAheadLimit() const override { return 500; }
+
   bool isSALU(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SALU;
   }
@@ -208,24 +216,25 @@ public:
     return get(Opcode).TSFlags & SIInstrFlags::WQM;
   }
 
+  bool isVGPRSpill(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
+  }
+
   bool isInlineConstant(const APInt &Imm) const;
-  bool isInlineConstant(const MachineOperand &MO) const;
-  bool isLiteralConstant(const MachineOperand &MO) const;
+  bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const;
+  bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const;
 
   bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
                          const MachineOperand &MO) const;
 
-  /// \brief Return true if the given offset Size in bytes can be folded into
-  /// the immediate offsets of a memory instruction for the given address space.
-  bool canFoldOffset(unsigned OffsetSize, unsigned AS) const;
-
   /// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding.
   /// This function will return false if you pass it a 32-bit instruction.
   bool hasVALU32BitEncoding(unsigned Opcode) const;
 
   /// \brief Returns true if this operand uses the constant bus.
   bool usesConstantBus(const MachineRegisterInfo &MRI,
-                       const MachineOperand &MO) const;
+                       const MachineOperand &MO,
+                       unsigned OpSize) const;
 
   /// \brief Return true if this instruction has any modifiers.
   ///  e.g. src[012]_mod, omod, clamp.
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.td b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
index a749e7f..4fc2498 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
@@ -6,6 +6,13 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+def isCI : Predicate<"Subtarget->getGeneration() "
+                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
+def isVI : Predicate <
+  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+  AssemblerPredicate<"FeatureGCN3Encoding">;
+
+def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
 
 class vop {
   field bits<9> SI3;
@@ -117,9 +124,111 @@ def SIconstdata_ptr : SDNode<
   "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]>
 >;
 
+//===----------------------------------------------------------------------===//
+// SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1
+// to be glued to the memory instructions.
+//===----------------------------------------------------------------------===//
+
+def SIld_local : SDNode <"ISD::LOAD", SDTLoad,
+  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
+>;
+
+def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{
+  return isLocalLoad(cast<LoadSDNode>(N));
+}]>;
+
+def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED &&
+         cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+}]>;
+
+def si_load_local_align8 : Aligned8Bytes <
+  (ops node:$ptr), (si_load_local node:$ptr)
+>;
+
+def si_sextload_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
+  return cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
+}]>;
+def si_az_extload_local : AZExtLoadBase <si_ld_local>;
+
+multiclass SIExtLoadLocal <PatFrag ld_node> {
+
+  def _i8 : PatFrag <(ops node:$ptr), (ld_node node:$ptr),
+                     [{return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;}]
+  >;
+
+  def _i16 : PatFrag <(ops node:$ptr), (ld_node node:$ptr),
+                     [{return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;}]
+  >;
+}
+
+defm si_sextload_local : SIExtLoadLocal <si_sextload_local>;
+defm si_az_extload_local : SIExtLoadLocal <si_az_extload_local>;
+
+def SIst_local : SDNode <"ISD::STORE", SDTStore,
+  [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
+>;
+
+def si_st_local : PatFrag <
+  (ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{
+  return isLocalStore(cast<StoreSDNode>(N));
+}]>;
+
+def si_store_local : PatFrag <
+  (ops node:$val, node:$ptr), (si_st_local node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED &&
+         !cast<StoreSDNode>(N)->isTruncatingStore();
+}]>;
+
+def si_store_local_align8 : Aligned8Bytes <
+  (ops node:$val, node:$ptr), (si_store_local node:$val, node:$ptr)
+>;
+
+def si_truncstore_local : PatFrag <
+  (ops node:$val, node:$ptr), (si_st_local node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->isTruncatingStore();
+}]>;
+
+def si_truncstore_local_i8 : PatFrag <
+  (ops node:$val, node:$ptr), (si_truncstore_local node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def si_truncstore_local_i16 : PatFrag <
+  (ops node:$val, node:$ptr), (si_truncstore_local node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+multiclass SIAtomicM0Glue2 <string op_name> {
+
+  def _glue : SDNode <"ISD::ATOMIC_"#op_name, SDTAtomic2,
+    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
+  >;
+
+  def _local : local_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
+}
+
+defm si_atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
+defm si_atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
+defm si_atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
+defm si_atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
+defm si_atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
+defm si_atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
+defm si_atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
+defm si_atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
+defm si_atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
+defm si_atomic_swap : SIAtomicM0Glue2 <"SWAP">;
+
+def si_atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
+  [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
+>;
+
+defm si_atomic_cmp_swap : AtomicCmpSwapLocal <si_atomic_cmp_swap_glue>;
+
 // Transformation function, extract the lower 32bit of a 64bit immediate
 def LO32 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
+  return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, SDLoc(N),
+                                   MVT::i32);
 }]>;
 
 def LO32f : SDNodeXForm<fpimm, [{
@@ -129,12 +238,13 @@ def LO32f : SDNodeXForm<fpimm, [{
 
 // Transformation function, extract the upper 32bit of a 64bit immediate
 def HI32 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
+  return CurDAG->getTargetConstant(N->getZExtValue() >> 32, SDLoc(N), MVT::i32);
 }]>;
 
 def HI32f : SDNodeXForm<fpimm, [{
   APInt V = N->getValueAPF().bitcastToAPInt().lshr(32).trunc(32);
-  return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), MVT::f32);
+  return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), SDLoc(N),
+                                     MVT::f32);
 }]>;
 
 def IMM8bitDWORD : PatLeaf <(imm),
@@ -142,39 +252,39 @@ def IMM8bitDWORD : PatLeaf <(imm),
 >;
 
 def as_dword_i32imm : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue() >> 2, MVT::i32);
+  return CurDAG->getTargetConstant(N->getZExtValue() >> 2, SDLoc(N), MVT::i32);
 }]>;
 
 def as_i1imm : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i1);
+  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
 }]>;
 
 def as_i8imm : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i8);
+  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
 }]>;
 
 def as_i16imm : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i16);
+  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
 }]>;
 
 def as_i32imm: SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32);
+  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
 }]>;
 
 def as_i64imm: SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i64);
+  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
 }]>;
 
 // Copied from the AArch64 backend:
 def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
 return CurDAG->getTargetConstant(
-  N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i32);
+  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
 }]>;
 
 // Copied from the AArch64 backend:
 def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
 return CurDAG->getTargetConstant(
-  N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i64);
+  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
 }]>;
 
 def IMM8bit : PatLeaf <(imm),
@@ -211,12 +321,11 @@ class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
 }]>;
 
 class SGPRImm <dag frag> : PatLeaf<frag, [{
-  if (TM.getSubtarget<AMDGPUSubtarget>().getGeneration() <
-      AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
     return false;
   }
   const SIRegisterInfo *SIRI =
-                       static_cast<const SIRegisterInfo*>(TM.getSubtargetImpl()->getRegisterInfo());
+      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
   for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
                                                 U != E; ++U) {
     if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) {
@@ -234,14 +343,88 @@ def FRAMEri32 : Operand<iPTR> {
   let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
 }
 
+def SoppBrTarget : AsmOperandClass {
+  let Name = "SoppBrTarget";
+  let ParserMethod = "parseSOppBrTarget";
+}
+
 def sopp_brtarget : Operand<OtherVT> {
   let EncoderMethod = "getSOPPBrEncoding";
   let OperandType = "OPERAND_PCREL";
+  let ParserMatchClass = SoppBrTarget;
 }
 
 include "SIInstrFormats.td"
 include "VIInstrFormats.td"
 
+def MubufOffsetMatchClass : AsmOperandClass {
+  let Name = "MubufOffset";
+  let ParserMethod = "parseMubufOptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+class DSOffsetBaseMatchClass <string parser> : AsmOperandClass {
+  let Name = "DSOffset"#parser;
+  let ParserMethod = parser;
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isDSOffset";
+}
+
+def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">;
+def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">;
+
+def DSOffset01MatchClass : AsmOperandClass {
+  let Name = "DSOffset1";
+  let ParserMethod = "parseDSOff01OptionalOps";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isDSOffset01";
+}
+
+class GDSBaseMatchClass <string parser> : AsmOperandClass {
+  let Name = "GDS"#parser;
+  let PredicateMethod = "isImm";
+  let ParserMethod = parser;
+  let RenderMethod = "addImmOperands";
+}
+
+def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">;
+def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
+
+def GLCMatchClass : AsmOperandClass {
+  let Name = "GLC";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseMubufOptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+def SLCMatchClass : AsmOperandClass {
+  let Name = "SLC";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseMubufOptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+def TFEMatchClass : AsmOperandClass {
+  let Name = "TFE";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseMubufOptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+def OModMatchClass : AsmOperandClass {
+  let Name = "OMod";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseVOP3OptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+def ClampMatchClass : AsmOperandClass {
+  let Name = "Clamp";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseVOP3OptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
 let OperandType = "OPERAND_IMMEDIATE" in {
 
 def offen : Operand<i1> {
@@ -255,36 +438,58 @@ def addr64 : Operand<i1> {
 }
 def mbuf_offset : Operand<i16> {
   let PrintMethod = "printMBUFOffset";
+  let ParserMatchClass = MubufOffsetMatchClass;
 }
-def ds_offset : Operand<i16> {
+class ds_offset_base <AsmOperandClass mc> : Operand<i16> {
   let PrintMethod = "printDSOffset";
+  let ParserMatchClass = mc;
 }
+def ds_offset : ds_offset_base <DSOffsetMatchClass>;
+def ds_offset_gds : ds_offset_base <DSOffsetGDSMatchClass>;
+
 def ds_offset0 : Operand<i8> {
   let PrintMethod = "printDSOffset0";
+  let ParserMatchClass = DSOffset01MatchClass;
 }
 def ds_offset1 : Operand<i8> {
   let PrintMethod = "printDSOffset1";
+  let ParserMatchClass = DSOffset01MatchClass;
 }
+class gds_base <AsmOperandClass mc> : Operand <i1> {
+  let PrintMethod = "printGDS";
+  let ParserMatchClass = mc;
+}
+def gds : gds_base <GDSMatchClass>;
+
+def gds01 : gds_base <GDS01MatchClass>;
+
 def glc : Operand <i1> {
   let PrintMethod = "printGLC";
+  let ParserMatchClass = GLCMatchClass;
 }
 def slc : Operand <i1> {
   let PrintMethod = "printSLC";
+  let ParserMatchClass = SLCMatchClass;
 }
 def tfe : Operand <i1> {
   let PrintMethod = "printTFE";
+  let ParserMatchClass = TFEMatchClass;
 }
 
 def omod : Operand <i32> {
   let PrintMethod = "printOModSI";
+  let ParserMatchClass = OModMatchClass;
 }
 
 def ClampMod : Operand <i1> {
   let PrintMethod = "printClampSI";
+  let ParserMatchClass = ClampMatchClass;
 }
 
 } // End OperandType = "OPERAND_IMMEDIATE"
 
+def VOPDstS64 : VOPDstOperand <SReg_64>;
+
 //===----------------------------------------------------------------------===//
 // Complex patterns
 //===----------------------------------------------------------------------===//
@@ -293,8 +498,8 @@ def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
 def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
 
 def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
-def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
-def MUBUFAddr64Atomic : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
+def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
+def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
 def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
 def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
 def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
@@ -316,6 +521,7 @@ def SIOperand {
 
 def SRCMODS {
   int NONE = 0;
+  int NEG = 1;
 }
 
 def DSTCLAMP {
@@ -364,7 +570,7 @@ class EXPCommon : InstSI<
 
 multiclass EXP_m {
 
-  let isPseudo = 1 in {
+  let isPseudo = 1, isCodeGenOnly = 1 in {
     def "" : EXPCommon, SIMCInstr <"exp", SISubtarget.NONE> ;
   }
 
@@ -381,83 +587,109 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
   SOP1 <outs, ins, "", pattern>,
   SIMCInstr<opName, SISubtarget.NONE> {
   let isPseudo = 1;
+  let isCodeGenOnly = 1;
 }
 
 class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
   SOP1 <outs, ins, asm, []>,
   SOP1e <op.SI>,
-  SIMCInstr<opName, SISubtarget.SI>;
+  SIMCInstr<opName, SISubtarget.SI> {
+  let isCodeGenOnly = 0;
+  let AssemblerPredicates = [isSICI];
+}
 
 class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
   SOP1 <outs, ins, asm, []>,
   SOP1e <op.VI>,
-  SIMCInstr<opName, SISubtarget.VI>;
+  SIMCInstr<opName, SISubtarget.VI> {
+  let isCodeGenOnly = 0;
+  let AssemblerPredicates = [isVI];
+}
 
-multiclass SOP1_32 <sop1 op, string opName, list<dag> pattern> {
-  def "" : SOP1_Pseudo <opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
-    pattern>;
+multiclass SOP1_m <sop1 op, string opName, dag outs, dag ins, string asm,
+                   list<dag> pattern> {
 
-  def _si : SOP1_Real_si <op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
-    opName#" $dst, $src0">;
+  def "" : SOP1_Pseudo <opName, outs, ins, pattern>;
 
-  def _vi : SOP1_Real_vi <op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
-    opName#" $dst, $src0">;
-}
+  def _si : SOP1_Real_si <op, opName, outs, ins, asm>;
 
-multiclass SOP1_64 <sop1 op, string opName, list<dag> pattern> {
-  def "" : SOP1_Pseudo <opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
-    pattern>;
+  def _vi : SOP1_Real_vi <op, opName, outs, ins, asm>;
 
-  def _si : SOP1_Real_si <op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
-    opName#" $dst, $src0">;
-
-  def _vi : SOP1_Real_vi <op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
-    opName#" $dst, $src0">;
 }
 
+multiclass SOP1_32 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
+    op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0),
+    opName#" $dst, $src0", pattern
+>;
+
+multiclass SOP1_64 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
+    op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0),
+    opName#" $dst, $src0", pattern
+>;
+
 // no input, 64-bit output.
 multiclass SOP1_64_0 <sop1 op, string opName, list<dag> pattern> {
   def "" : SOP1_Pseudo <opName, (outs SReg_64:$dst), (ins), pattern>;
 
   def _si : SOP1_Real_si <op, opName, (outs SReg_64:$dst), (ins),
     opName#" $dst"> {
-    let SSRC0 = 0;
+    let ssrc0 = 0;
   }
 
   def _vi : SOP1_Real_vi <op, opName, (outs SReg_64:$dst), (ins),
     opName#" $dst"> {
-    let SSRC0 = 0;
+    let ssrc0 = 0;
   }
 }
 
-// 64-bit input, 32-bit output.
-multiclass SOP1_32_64 <sop1 op, string opName, list<dag> pattern> {
-  def "" : SOP1_Pseudo <opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
-    pattern>;
+// 64-bit input, no output
+multiclass SOP1_1 <sop1 op, string opName, list<dag> pattern> {
+  def "" : SOP1_Pseudo <opName, (outs), (ins SReg_64:$src0), pattern>;
 
-  def _si : SOP1_Real_si <op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
-    opName#" $dst, $src0">;
+  def _si : SOP1_Real_si <op, opName, (outs), (ins SReg_64:$src0),
+    opName#" $src0"> {
+    let sdst = 0;
+  }
 
-  def _vi : SOP1_Real_vi <op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
-    opName#" $dst, $src0">;
+  def _vi : SOP1_Real_vi <op, opName, (outs), (ins SReg_64:$src0),
+    opName#" $src0"> {
+    let sdst = 0;
+  }
 }
 
+// 64-bit input, 32-bit output.
+multiclass SOP1_32_64 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
+    op, opName, (outs SReg_32:$dst), (ins SSrc_64:$src0),
+    opName#" $dst, $src0", pattern
+>;
+
 class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
   SOP2<outs, ins, "", pattern>,
   SIMCInstr<opName, SISubtarget.NONE> {
   let isPseudo = 1;
+  let isCodeGenOnly = 1;
   let Size = 4;
+
+  // Pseudo instructions have no encodings, but adding this field here allows
+  // us to do:
+  // let sdst = xxx in {
+  // for multiclasses that include both real and pseudo instructions.
+  field bits<7> sdst = 0;
 }
 
 class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
   SOP2<outs, ins, asm, []>,
   SOP2e<op.SI>,
-  SIMCInstr<opName, SISubtarget.SI>;
+  SIMCInstr<opName, SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
   SOP2<outs, ins, asm, []>,
   SOP2e<op.VI>,
-  SIMCInstr<opName, SISubtarget.VI>;
+  SIMCInstr<opName, SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
   def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
@@ -472,44 +704,36 @@ multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
     opName#" $dst, $src0, $src1 [$scc]">;
 }
 
-multiclass SOP2_32 <sop2 op, string opName, list<dag> pattern> {
-  def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
-    (ins SSrc_32:$src0, SSrc_32:$src1), pattern>;
+multiclass SOP2_m <sop2 op, string opName, dag outs, dag ins, string asm,
+                   list<dag> pattern> {
 
-  def _si : SOP2_Real_si <op, opName, (outs SReg_32:$dst),
-    (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
-
-  def _vi : SOP2_Real_vi <op, opName, (outs SReg_32:$dst),
-    (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
-}
+  def "" : SOP2_Pseudo <opName, outs, ins, pattern>;
 
-multiclass SOP2_64 <sop2 op, string opName, list<dag> pattern> {
-  def "" : SOP2_Pseudo <opName, (outs SReg_64:$dst),
-    (ins SSrc_64:$src0, SSrc_64:$src1), pattern>;
+  def _si : SOP2_Real_si <op, opName, outs, ins, asm>;
 
-  def _si : SOP2_Real_si <op, opName, (outs SReg_64:$dst),
-    (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1">;
+  def _vi : SOP2_Real_vi <op, opName, outs, ins, asm>;
 
-  def _vi : SOP2_Real_vi <op, opName, (outs SReg_64:$dst),
-    (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1">;
 }
 
-multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> {
-  def "" : SOP2_Pseudo <opName, (outs SReg_64:$dst),
-    (ins SSrc_64:$src0, SSrc_32:$src1), pattern>;
-
-  def _si : SOP2_Real_si <op, opName, (outs SReg_64:$dst),
-    (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
+multiclass SOP2_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
+    op, opName, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
+    opName#" $dst, $src0, $src1", pattern
+>;
 
-  def _vi : SOP2_Real_vi <op, opName, (outs SReg_64:$dst),
-    (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
-}
+multiclass SOP2_64 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
+    op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
+    opName#" $dst, $src0, $src1", pattern
+>;
 
+multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
+    op, opName, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
+    opName#" $dst, $src0, $src1", pattern
+>;
 
 class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
                     string opName, PatLeaf cond> : SOPC <
   op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1),
-  opName#" $dst, $src0, $src1", []>;
+  opName#" $src0, $src1", []>;
 
 class SOPC_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
   : SOPC_Helper<op, SSrc_32, i32, opName, cond>;
@@ -521,17 +745,34 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
   SOPK <outs, ins, "", pattern>,
   SIMCInstr<opName, SISubtarget.NONE> {
   let isPseudo = 1;
+  let isCodeGenOnly = 1;
 }
 
 class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
   SOPK <outs, ins, asm, []>,
   SOPKe <op.SI>,
-  SIMCInstr<opName, SISubtarget.SI>;
+  SIMCInstr<opName, SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+  let isCodeGenOnly = 0;
+}
 
 class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
   SOPK <outs, ins, asm, []>,
   SOPKe <op.VI>,
-  SIMCInstr<opName, SISubtarget.VI>;
+  SIMCInstr<opName, SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+  let isCodeGenOnly = 0;
+}
+
+multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm,
+                   string asm = opName#opAsm> {
+  def "" : SOPK_Pseudo <opName, outs, ins, []>;
+
+  def _si : SOPK_Real_si <op, opName, outs, ins, asm>;
+
+  def _vi : SOPK_Real_vi <op, opName, outs, ins, asm>;
+
+}
 
 multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
   def "" : SOPK_Pseudo <opName, (outs SReg_32:$dst), (ins u16imm:$src0),
@@ -548,13 +789,39 @@ multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
   def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst),
     (ins SReg_32:$src0, u16imm:$src1), pattern>;
 
-  def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
-    (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
+  let DisableEncoding = "$dst" in {
+    def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
+      (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
 
-  def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
-    (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
+    def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
+      (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
+  }
 }
 
+multiclass SOPK_32TIE <sopk op, string opName, list<dag> pattern> : SOPK_m <
+  op, opName, (outs SReg_32:$sdst), (ins SReg_32:$src0, u16imm:$simm16),
+  " $sdst, $simm16"
+>;
+
+multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins,
+                       string argAsm, string asm = opName#argAsm> {
+
+  def "" : SOPK_Pseudo <opName, outs, ins, []>;
+
+  def _si : SOPK <outs, ins, asm, []>,
+            SOPK64e <op.SI>,
+            SIMCInstr<opName, SISubtarget.SI> {
+              let AssemblerPredicates = [isSICI];
+              let isCodeGenOnly = 0;
+            }
+
+  def _vi : SOPK <outs, ins, asm, []>,
+            SOPK64e <op.VI>,
+            SIMCInstr<opName, SISubtarget.VI> {
+              let AssemblerPredicates = [isVI];
+              let isCodeGenOnly = 0;
+            }
+}
 //===----------------------------------------------------------------------===//
 // SMRD classes
 //===----------------------------------------------------------------------===//
@@ -563,19 +830,24 @@ class SMRD_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
   SMRD <outs, ins, "", pattern>,
   SIMCInstr<opName, SISubtarget.NONE> {
   let isPseudo = 1;
+  let isCodeGenOnly = 1;
 }
 
 class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
                     string asm> :
   SMRD <outs, ins, asm, []>,
   SMRDe <op, imm>,
-  SIMCInstr<opName, SISubtarget.SI>;
+  SIMCInstr<opName, SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
                     string asm> :
   SMRD <outs, ins, asm, []>,
   SMEMe_vi <op, imm>,
-  SIMCInstr<opName, SISubtarget.VI>;
+  SIMCInstr<opName, SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
                    string asm, list<dag> pattern> {
@@ -584,7 +856,11 @@ multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
 
   def _si : SMRD_Real_si <op, opName, imm, outs, ins, asm>;
 
-  def _vi : SMRD_Real_vi <{0, 0, 0, op}, opName, imm, outs, ins, asm>;
+  // glc is only applicable to scalar stores, which are not yet
+  // implemented.
+  let glc = 0 in {
+    def _vi : SMRD_Real_vi <{0, 0, 0, op}, opName, imm, outs, ins, asm>;
+  }
 }
 
 multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
@@ -610,8 +886,14 @@ multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
 def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
   let PrintMethod = "printOperandAndMods";
 }
+
+def InputModsMatchClass : AsmOperandClass {
+  let Name = "RegWithInputMods";
+}
+
 def InputModsNoDefault : Operand <i32> {
   let PrintMethod = "printOperandAndMods";
+  let ParserMatchClass = InputModsMatchClass;
 }
 
 class getNumSrcArgs<ValueType Src1, ValueType Src2> {
@@ -624,9 +906,9 @@ class getNumSrcArgs<ValueType Src1, ValueType Src2> {
 // Returns the register class to use for the destination of VOP[123C]
 // instructions for the given VT.
 class getVALUDstForVT<ValueType VT> {
-  RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32,
-                          !if(!eq(VT.Size, 64), VReg_64,
-                            SReg_64)); // else VT == i1
+  RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
+                          !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
+                            VOPDstOperand<SReg_64>)); // else VT == i1
 }
 
 // Returns the register class to use for source 0 of VOP[12C]
@@ -641,31 +923,12 @@ class getVOPSrc1ForVT<ValueType VT> {
   RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32, VReg_64);
 }
 
-// Returns the register classes for the source arguments of a VOP[12C]
-// instruction for the given SrcVTs.
-class getInRC32 <list<ValueType> SrcVT> {
-  list<DAGOperand> ret = [
-    getVOPSrc0ForVT<SrcVT[0]>.ret,
-    getVOPSrc1ForVT<SrcVT[1]>.ret
-  ];
-}
-
 // Returns the register class to use for sources of VOP3 instructions for the
 // given VT.
 class getVOP3SrcForVT<ValueType VT> {
   RegisterOperand ret = !if(!eq(VT.Size, 32), VCSrc_32, VCSrc_64);
 }
 
-// Returns the register classes for the source arguments of a VOP3
-// instruction for the given SrcVTs.
-class getInRC64 <list<ValueType> SrcVT> {
-  list<DAGOperand> ret = [
-    getVOP3SrcForVT<SrcVT[0]>.ret,
-    getVOP3SrcForVT<SrcVT[1]>.ret,
-    getVOP3SrcForVT<SrcVT[2]>.ret
-  ];
-}
-
 // Returns 1 if the source arguments have modifiers, 0 if they do not.
 class hasModifiers<ValueType SrcVT> {
   bit ret = !if(!eq(SrcVT.Value, f32.Value), 1,
@@ -723,7 +986,7 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
 class getAsm32 <int NumSrcArgs> {
   string src1 = ", $src1";
   string src2 = ", $src2";
-  string ret = " $dst, $src0"#
+  string ret = "$dst, $src0"#
                !if(!eq(NumSrcArgs, 1), "", src1)#
                !if(!eq(NumSrcArgs, 3), src2, "");
 }
@@ -739,7 +1002,7 @@ class getAsm64 <int NumSrcArgs, bit HasModifiers> {
   string ret =
   !if(!eq(HasModifiers, 0),
       getAsm32<NumSrcArgs>.ret,
-      " $dst, "#src0#src1#src2#"$clamp"#"$omod");
+      "$dst, "#src0#src1#src2#"$clamp"#"$omod");
 }
 
 
@@ -751,7 +1014,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
   field ValueType Src0VT = ArgVT[1];
   field ValueType Src1VT = ArgVT[2];
   field ValueType Src2VT = ArgVT[3];
-  field RegisterClass DstRC = getVALUDstForVT<DstVT>.ret;
+  field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
   field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
   field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
   field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
@@ -767,10 +1030,20 @@ class VOPProfile <list<ValueType> _ArgVT> {
   field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
                              HasModifiers>.ret;
 
-  field string Asm32 = "_e32"#getAsm32<NumSrcArgs>.ret;
+  field string Asm32 = getAsm32<NumSrcArgs>.ret;
   field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
 }
 
+// FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
+//        for the instruction patterns to work.
+def VOP_F16_F16 : VOPProfile <[f32, f32, untyped, untyped]>;
+def VOP_F16_I16 : VOPProfile <[f32, i32, untyped, untyped]>;
+def VOP_I16_F16 : VOPProfile <[i32, f32, untyped, untyped]>;
+
+def VOP_F16_F16_F16 : VOPProfile <[f32, f32, f32, untyped]>;
+def VOP_F16_F16_I16 : VOPProfile <[f32, f32, i32, untyped]>;
+def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
+
 def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
 def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
 def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
@@ -794,22 +1067,27 @@ def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
 
 def VOP_I1_F32_I32 : VOPProfile <[i1, f32, i32, untyped]> {
   let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
-  let Asm64 = " $dst, $src0_modifiers, $src1";
+  let Asm64 = "$dst, $src0_modifiers, $src1";
 }
 
 def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
   let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
-  let Asm64 = " $dst, $src0_modifiers, $src1";
+  let Asm64 = "$dst, $src0_modifiers, $src1";
 }
 
 def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
 def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
 def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
+def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> {
+  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2);
+  let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2);
+  let Asm64 = "$dst, $src0, $src1, $src2";
+}
 
 def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
 def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> {
   field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
-  field string Asm = " $dst, $src0, $vsrc1, $src2";
+  field string Asm = "$dst, $src0, $vsrc1, $src2";
 }
 def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
 def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
@@ -833,34 +1111,62 @@ class AtomicNoRet <string noRetOp, bit isRet> {
 class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   VOP1Common <outs, ins, "", pattern>,
   VOP <opName>,
-  SIMCInstr <opName#"_e32", SISubtarget.NONE> {
+  SIMCInstr <opName#"_e32", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e32", opName> {
   let isPseudo = 1;
+  let isCodeGenOnly = 1;
+
+  field bits<8> vdst;
+  field bits<9> src0;
+}
+
+class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
+  VOP1<op.SI, outs, ins, asm, []>,
+  SIMCInstr <opName#"_e32", SISubtarget.SI> {
+  let AssemblerPredicate = SIAssemblerPredicate;
+}
+
+class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
+  VOP1<op.VI, outs, ins, asm, []>,
+  SIMCInstr <opName#"_e32", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
 }
 
 multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
                    string opName> {
   def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
 
-  def _si : VOP1<op.SI, outs, ins, asm, []>,
-            SIMCInstr <opName#"_e32", SISubtarget.SI>;
-  def _vi : VOP1<op.VI, outs, ins, asm, []>,
-            SIMCInstr <opName#"_e32", SISubtarget.VI>;
+  def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
+
+  def _vi : VOP1_Real_vi <opName, op, outs, ins, asm>;
 }
 
 multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
                    string opName> {
   def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
 
-  def _si : VOP1<op.SI, outs, ins, asm, []>,
-            SIMCInstr <opName#"_e32", SISubtarget.SI>;
-  // No VI instruction. This class is for SI only.
+  def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
 }
 
 class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   VOP2Common <outs, ins, "", pattern>,
   VOP <opName>,
-  SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e32", opName> {
   let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
+
+class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
+  VOP2 <op.SI, outs, ins, opName#asm, []>,
+  SIMCInstr <opName#"_e32", SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
+
+class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
+  VOP2 <op.VI, outs, ins, opName#asm, []>,
+  SIMCInstr <opName#"_e32", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
 }
 
 multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
@@ -868,8 +1174,7 @@ multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
   def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
            VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
 
-  def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
-            SIMCInstr <opName#"_e32", SISubtarget.SI>;
+  def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
 }
 
 multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
@@ -877,49 +1182,70 @@ multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
   def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
            VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
 
-  def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
-            SIMCInstr <opName#"_e32", SISubtarget.SI>;
-  def _vi : VOP2 <op.VI, outs, ins, opName#asm, []>,
-            SIMCInstr <opName#"_e32", SISubtarget.VI>;
+  def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
+
+  def _vi : VOP2_Real_vi <opName, op, outs, ins, asm>;
+
 }
 
 class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
 
   bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
   bits<2> src1_modifiers = !if(HasModifiers, !if(HasSrc1, ?, 0), 0);
-  bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ? ,0) ,0);
+  bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ?, 0), 0);
   bits<2> omod = !if(HasModifiers, ?, 0);
   bits<1> clamp = !if(HasModifiers, ?, 0);
   bits<9> src1 = !if(HasSrc1, ?, 0);
   bits<9> src2 = !if(HasSrc2, ?, 0);
 }
 
+class VOP3DisableModFields <bit HasSrc0Mods,
+                            bit HasSrc1Mods = 0,
+                            bit HasSrc2Mods = 0,
+                            bit HasOutputMods = 0> {
+  bits<2> src0_modifiers = !if(HasSrc0Mods, ?, 0);
+  bits<2> src1_modifiers = !if(HasSrc1Mods, ?, 0);
+  bits<2> src2_modifiers = !if(HasSrc2Mods, ?, 0);
+  bits<2> omod = !if(HasOutputMods, ?, 0);
+  bits<1> clamp = !if(HasOutputMods, ?, 0);
+}
+
 class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   VOP3Common <outs, ins, "", pattern>,
   VOP <opName>,
-  SIMCInstr<opName#"_e64", SISubtarget.NONE> {
+  SIMCInstr<opName#"_e64", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e64", opName> {
   let isPseudo = 1;
+  let isCodeGenOnly = 1;
 }
 
 class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
   VOP3Common <outs, ins, asm, []>,
   VOP3e <op>,
-  SIMCInstr<opName#"_e64", SISubtarget.SI>;
+  SIMCInstr<opName#"_e64", SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
   VOP3Common <outs, ins, asm, []>,
   VOP3e_vi <op>,
-  SIMCInstr <opName#"_e64", SISubtarget.VI>;
+  SIMCInstr <opName#"_e64", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
   VOP3Common <outs, ins, asm, []>,
   VOP3be <op>,
-  SIMCInstr<opName#"_e64", SISubtarget.SI>;
+  SIMCInstr<opName#"_e64", SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
   VOP3Common <outs, ins, asm, []>,
   VOP3be_vi <op>,
-  SIMCInstr <opName#"_e64", SISubtarget.VI>;
+  SIMCInstr <opName#"_e64", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
                    string opName, int NumSrcArgs, bit HasMods = 1> {
@@ -937,14 +1263,16 @@ multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
 }
 
 // VOP3_m without source modifiers
-multiclass VOP3_m_nosrcmod <vop op, dag outs, dag ins, string asm, list<dag> pattern,
+multiclass VOP3_m_nomods <vop op, dag outs, dag ins, string asm, list<dag> pattern,
                    string opName, int NumSrcArgs, bit HasMods = 1> {
 
   def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
 
   let src0_modifiers = 0,
       src1_modifiers = 0,
-      src2_modifiers = 0 in {
+      src2_modifiers = 0,
+      clamp = 0,
+      omod = 0 in {
     def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>;
     def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>;
   }
@@ -1034,9 +1362,10 @@ multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
 
 multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
                      list<dag> pattern, string opName,
-                     bit HasMods, bit defExec> {
+                     bit HasMods, bit defExec, string revOp> {
 
-  def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+  def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
 
   def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
             VOP3DisableFields<1, 0, HasMods> {
@@ -1052,18 +1381,22 @@ multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
 // An instruction that is VOP2 on SI and VOP3 on VI, no modifiers.
 multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
                          string asm, list<dag> pattern = []> {
-  let isPseudo = 1 in {
+  let isPseudo = 1, isCodeGenOnly = 1 in {
     def "" : VOPAnyCommon <outs, ins, "", pattern>,
              SIMCInstr<opName, SISubtarget.NONE>;
   }
 
   def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
-            SIMCInstr <opName, SISubtarget.SI>;
+            SIMCInstr <opName, SISubtarget.SI> {
+            let AssemblerPredicates = [isSICI];
+  }
 
   def _vi : VOP3Common <outs, ins, asm, []>,
             VOP3e_vi <op.VI3>,
             VOP3DisableFields <1, 0, 0>,
-            SIMCInstr <opName, SISubtarget.VI>;
+            SIMCInstr <opName, SISubtarget.VI> {
+            let AssemblerPredicates = [isVI];
+  }
 }
 
 multiclass VOP1_Helper <vop1 op, string opName, dag outs,
@@ -1073,7 +1406,7 @@ multiclass VOP1_Helper <vop1 op, string opName, dag outs,
 
   defm _e32 : VOP1_m <op, outs, ins32, opName#asm32, pat32, opName>;
 
-  defm _e64 : VOP3_1_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, HasMods>;
+  defm _e64 : VOP3_1_m <op, outs, ins64, opName#asm64, pat64, opName, HasMods>;
 }
 
 multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
@@ -1108,7 +1441,7 @@ multiclass VOP2_Helper <vop2 op, string opName, dag outs,
   defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
 
   defm _e64 : VOP3_2_m <op,
-    outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+    outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
   >;
 }
 
@@ -1132,7 +1465,7 @@ multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
                        string revOp = opName> {
   defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
 
-  defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#"_e64"#P.Asm64,
+  defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64,
     !if(P.HasModifiers,
         [(set P.DstVT:$dst,
              (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -1150,7 +1483,7 @@ multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
   defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
 
   defm _e64 : VOP3b_2_m <op,
-    outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+    outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
   >;
 }
 
@@ -1176,7 +1509,7 @@ multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
                             string revOp, bit HasMods> {
   defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>;
 
-  defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName,
+  defm _e64 : VOP3_2_m <op, outs, ins64, opName#asm64, pat64, opName,
                         revOp, HasMods>;
 }
 
@@ -1204,53 +1537,75 @@ let isCodeGenOnly = 0 in {
   def _si : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins,
                         !strconcat(opName, VOP_MADK.Asm), []>,
             SIMCInstr <opName#"_e32", SISubtarget.SI>,
-            VOP2_MADKe <op.SI>;
+            VOP2_MADKe <op.SI> {
+            let AssemblerPredicates = [isSICI];
+            }
 
   def _vi : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins,
                         !strconcat(opName, VOP_MADK.Asm), []>,
             SIMCInstr <opName#"_e32", SISubtarget.VI>,
-            VOP2_MADKe <op.VI>;
+            VOP2_MADKe <op.VI> {
+            let AssemblerPredicates = [isVI];
+            }
 } // End isCodeGenOnly = 0
 }
 
 class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   VOPCCommon <ins, "", pattern>,
   VOP <opName>,
-  SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e32", opName> {
   let isPseudo = 1;
+  let isCodeGenOnly = 1;
 }
 
 multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern,
-                   string opName, bit DefExec> {
+                   string opName, bit DefExec, string revOpName = ""> {
   def "" : VOPC_Pseudo <outs, ins, pattern, opName>;
 
   def _si : VOPC<op.SI, ins, asm, []>,
             SIMCInstr <opName#"_e32", SISubtarget.SI> {
     let Defs = !if(DefExec, [EXEC], []);
+    let hasSideEffects = DefExec;
   }
 
   def _vi : VOPC<op.VI, ins, asm, []>,
             SIMCInstr <opName#"_e32", SISubtarget.VI> {
     let Defs = !if(DefExec, [EXEC], []);
+    let hasSideEffects = DefExec;
   }
 }
 
 multiclass VOPC_Helper <vopc op, string opName,
                         dag ins32, string asm32, list<dag> pat32,
                         dag out64, dag ins64, string asm64, list<dag> pat64,
-                        bit HasMods, bit DefExec> {
+                        bit HasMods, bit DefExec, string revOp> {
   defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
 
-  defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64,
-                        opName, HasMods, DefExec>;
+  defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
+                        opName, HasMods, DefExec, revOp>;
+}
+
+// Special case for class instructions which only have modifiers on
+// the 1st source operand.
+multiclass VOPC_Class_Helper <vopc op, string opName,
+                             dag ins32, string asm32, list<dag> pat32,
+                             dag out64, dag ins64, string asm64, list<dag> pat64,
+                             bit HasMods, bit DefExec, string revOp> {
+  defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
+
+  defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
+                        opName, HasMods, DefExec, revOp>,
+                        VOP3DisableModFields<1, 0, 0>;
 }
 
 multiclass VOPCInst <vopc op, string opName,
                      VOPProfile P, PatLeaf cond = COND_NULL,
+                     string revOp = opName,
                      bit DefExec = 0> : VOPC_Helper <
   op, opName,
   P.Ins32, P.Asm32, [],
-  (outs SReg_64:$dst), P.Ins64, P.Asm64,
+  (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
   !if(P.HasModifiers,
       [(set i1:$dst,
           (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -1258,54 +1613,55 @@ multiclass VOPCInst <vopc op, string opName,
                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
                  cond))],
       [(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
-  P.HasModifiers, DefExec
+  P.HasModifiers, DefExec, revOp
 >;
 
 multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
-                     bit DefExec = 0> : VOPC_Helper <
+                     bit DefExec = 0> : VOPC_Class_Helper <
   op, opName,
   P.Ins32, P.Asm32, [],
-  (outs SReg_64:$dst), P.Ins64, P.Asm64,
+  (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
   !if(P.HasModifiers,
       [(set i1:$dst,
           (AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))],
       [(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]),
-  P.HasModifiers, DefExec
+  P.HasModifiers, DefExec, opName
 >;
 
 
-multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
-  VOPCInst <op, opName, VOP_F32_F32_F32, cond>;
+multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPCInst <op, opName, VOP_F32_F32_F32, cond, revOp>;
 
-multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
-  VOPCInst <op, opName, VOP_F64_F64_F64, cond>;
+multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPCInst <op, opName, VOP_F64_F64_F64, cond, revOp>;
 
-multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
-  VOPCInst <op, opName, VOP_I32_I32_I32, cond>;
+multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPCInst <op, opName, VOP_I32_I32_I32, cond, revOp>;
 
-multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
-  VOPCInst <op, opName, VOP_I64_I64_I64, cond>;
+multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPCInst <op, opName, VOP_I64_I64_I64, cond, revOp>;
 
 
 multiclass VOPCX <vopc op, string opName, VOPProfile P,
-                  PatLeaf cond = COND_NULL>
-  : VOPCInst <op, opName, P, cond, 1>;
+                  PatLeaf cond = COND_NULL,
+                  string revOp = "">
+  : VOPCInst <op, opName, P, cond, revOp, 1>;
 
-multiclass VOPCX_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
-  VOPCX <op, opName, VOP_F32_F32_F32, cond>;
+multiclass VOPCX_F32 <vopc op, string opName, string revOp = opName> :
+  VOPCX <op, opName, VOP_F32_F32_F32, COND_NULL, revOp>;
 
-multiclass VOPCX_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
-  VOPCX <op, opName, VOP_F64_F64_F64, cond>;
+multiclass VOPCX_F64 <vopc op, string opName, string revOp = opName> :
+  VOPCX <op, opName, VOP_F64_F64_F64, COND_NULL, revOp>;
 
-multiclass VOPCX_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
-  VOPCX <op, opName, VOP_I32_I32_I32, cond>;
+multiclass VOPCX_I32 <vopc op, string opName, string revOp = opName> :
+  VOPCX <op, opName, VOP_I32_I32_I32, COND_NULL, revOp>;
 
-multiclass VOPCX_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
-  VOPCX <op, opName, VOP_I64_I64_I64, cond>;
+multiclass VOPCX_I64 <vopc op, string opName, string revOp = opName> :
+  VOPCX <op, opName, VOP_I64_I64_I64, COND_NULL, revOp>;
 
 multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
                         list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
-    op, outs, ins, opName#asm, pat, opName, NumSrcArgs, HasMods
+    op, outs, ins, opName#" "#asm, pat, opName, NumSrcArgs, HasMods
 >;
 
 multiclass VOPC_CLASS_F32 <vopc op, string opName> :
@@ -1322,7 +1678,7 @@ multiclass VOPCX_CLASS_F64 <vopc op, string opName> :
 
 multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
                      SDPatternOperator node = null_frag> : VOP3_Helper <
-  op, opName, P.Outs, P.Ins64, P.Asm64,
+  op, opName, (outs P.DstRC.RegClass:$dst), P.Ins64, P.Asm64,
   !if(!eq(P.NumSrcArgs, 3),
     !if(P.HasModifiers,
         [(set P.DstVT:$dst,
@@ -1354,7 +1710,7 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName,
                           VOPProfile P,
                           SDPatternOperator node = null_frag> : VOP3_Helper <
   op, opName,
-  P.Outs,
+  (outs P.DstRC.RegClass:$dst),
   (ins InputModsNoDefault:$src0_modifiers, P.Src0RC64:$src0,
        InputModsNoDefault:$src1_modifiers, P.Src1RC64:$src1,
        InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2,
@@ -1407,6 +1763,7 @@ class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
   VINTRPCommon <outs, ins, "", pattern>,
   SIMCInstr<opName, SISubtarget.NONE> {
   let isPseudo = 1;
+  let isCodeGenOnly = 1;
 }
 
 class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
@@ -1421,17 +1778,13 @@ class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
   VINTRPe_vi <op>,
   SIMCInstr<opName, SISubtarget.VI>;
 
-multiclass VINTRP_m <bits <2> op, string opName, dag outs, dag ins, string asm,
-                     string disableEncoding = "", string constraints = "",
+multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
                      list<dag> pattern = []> {
-  let DisableEncoding = disableEncoding,
-      Constraints = constraints in {
-    def "" : VINTRP_Pseudo <opName, outs, ins, pattern>;
+  def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
 
-    def _si : VINTRP_Real_si <op, opName, outs, ins, asm>;
+  def _si : VINTRP_Real_si <op, NAME, outs, ins, asm>;
 
-    def _vi : VINTRP_Real_vi <op, opName, outs, ins, asm>;
-  }
+  def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1442,33 +1795,33 @@ class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
   DS <outs, ins, "", pattern>,
   SIMCInstr <opName, SISubtarget.NONE> {
   let isPseudo = 1;
+  let isCodeGenOnly = 1;
 }
 
 class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
   DS <outs, ins, asm, []>,
   DSe <op>,
-  SIMCInstr <opName, SISubtarget.SI>;
+  SIMCInstr <opName, SISubtarget.SI> {
+  let isCodeGenOnly = 0;
+}
 
 class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
   DS <outs, ins, asm, []>,
   DSe_vi <op>,
   SIMCInstr <opName, SISubtarget.VI>;
 
-class DS_1A_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
-  DS <outs, ins, asm, []>,
-  DSe <op>,
-  SIMCInstr <opName, SISubtarget.SI> {
+class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
+  DS_Real_si <op,opName, outs, ins, asm> {
 
   // Single load interpret the 2 i8imm operands as a single i16 offset.
   bits<16> offset;
   let offset0 = offset{7-0};
   let offset1 = offset{15-8};
+  let isCodeGenOnly = 0;
 }
 
-class DS_1A_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
-  DS <outs, ins, asm, []>,
-  DSe_vi <op>,
-  SIMCInstr <opName, SISubtarget.VI> {
+class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
+  DS_Real_vi <op, opName, outs, ins, asm> {
 
   // Single load interpret the 2 i8imm operands as a single i16 offset.
   bits<16> offset;
@@ -1476,178 +1829,166 @@ class DS_1A_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
   let offset1 = offset{15-8};
 }
 
-multiclass DS_1A_Load_m <bits<8> op, string opName, dag outs, dag ins, string asm,
-                         list<dag> pat> {
-  let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
-    def "" : DS_Pseudo <opName, outs, ins, pat>;
+multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
+  dag outs = (outs rc:$vdst),
+  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds),
+  string asm = opName#" $vdst, $addr"#"$offset$gds"> {
 
-    let data0 = 0, data1 = 0 in {
-      def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
-      def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
-    }
+  def "" : DS_Pseudo <opName, outs, ins, []>;
+
+  let data0 = 0, data1 = 0 in {
+    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
   }
 }
 
-multiclass DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass>
-    : DS_1A_Load_m <
-  op,
-  asm,
-  (outs regClass:$vdst),
-  (ins i1imm:$gds, VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
-  asm#" $vdst, $addr"#"$offset"#" [M0]",
-  []>;
-
-multiclass DS_Load2_m <bits<8> op, string opName, dag outs, dag ins, string asm,
-                       list<dag> pat> {
-  let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
-    def "" : DS_Pseudo <opName, outs, ins, pat>;
-
-    let data0 = 0, data1 = 0 in {
-      def _si : DS_Real_si <op, opName, outs, ins, asm>;
-      def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
-    }
+multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
+  dag outs = (outs rc:$vdst),
+  dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
+                 gds01:$gds),
+  string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>;
+
+  let data0 = 0, data1 = 0, AsmMatchConverter = "cvtDSOffset01" in {
+    def _si : DS_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
   }
 }
 
-multiclass DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass>
-    : DS_Load2_m <
-  op,
-  asm,
-  (outs regClass:$vdst),
-  (ins i1imm:$gds, VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
-        M0Reg:$m0),
-  asm#" $vdst, $addr"#"$offset0"#"$offset1 [M0]",
-  []>;
-
-multiclass DS_1A_Store_m <bits<8> op, string opName, dag outs, dag ins,
-                          string asm, list<dag> pat> {
-  let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
-    def "" : DS_Pseudo <opName, outs, ins, pat>;
-
-    let data1 = 0, vdst = 0 in {
-      def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
-      def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
-    }
+multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
+  dag outs = (outs),
+  dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
+  string asm = opName#" $addr, $data0"#"$offset$gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>,
+           AtomicNoRet<opName, 0>;
+
+  let data1 = 0, vdst = 0 in {
+    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
   }
 }
 
-multiclass DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass>
-    : DS_1A_Store_m <
-  op,
-  asm,
-  (outs),
-  (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, ds_offset:$offset, M0Reg:$m0),
-  asm#" $addr, $data0"#"$offset"#" [M0]",
-  []>;
-
-multiclass DS_Store_m <bits<8> op, string opName, dag outs, dag ins,
-                       string asm, list<dag> pat> {
-  let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
-    def "" : DS_Pseudo <opName, outs, ins, pat>;
-
-    let vdst = 0 in {
-      def _si : DS_Real_si <op, opName, outs, ins, asm>;
-      def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
-    }
+multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
+  dag outs = (outs),
+  dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+              ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds),
+  string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>;
+
+  let vdst = 0, AsmMatchConverter = "cvtDSOffset01" in {
+    def _si : DS_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
   }
 }
 
-multiclass DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass>
-    : DS_Store_m <
-  op,
-  asm,
-  (outs),
-  (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, regClass:$data1,
-       ds_offset0:$offset0, ds_offset1:$offset1, M0Reg:$m0),
-  asm#" $addr, $data0, $data1"#"$offset0"#"$offset1 [M0]",
-  []>;
-
-// 1 address, 1 data.
-multiclass DS_1A1D_RET_m <bits<8> op, string opName, dag outs, dag ins,
-                          string asm, list<dag> pat, string noRetOp> {
-  let mayLoad = 1, mayStore = 1,
-      hasPostISelHook = 1 // Adjusted to no return version.
-      in {
-    def "" : DS_Pseudo <opName, outs, ins, pat>,
-             AtomicNoRet<noRetOp, 1>;
-
-    let data1 = 0 in {
-      def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
-      def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
-    }
+multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
+                        string noRetOp = "",
+  dag outs = (outs rc:$vdst),
+  dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
+  string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>,
+           AtomicNoRet<noRetOp, 1>;
+
+  let data1 = 0 in {
+    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
   }
 }
 
-multiclass DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc,
-                        string noRetOp = ""> : DS_1A1D_RET_m <
-  op, asm,
-  (outs rc:$vdst),
-  (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
-  asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", [], noRetOp>;
-
-// 1 address, 2 data.
-multiclass DS_1A2D_RET_m <bits<8> op, string opName, dag outs, dag ins,
-                          string asm, list<dag> pat, string noRetOp> {
-  let mayLoad = 1, mayStore = 1,
-      hasPostISelHook = 1 // Adjusted to no return version.
-      in {
-    def "" : DS_Pseudo <opName, outs, ins, pat>,
-             AtomicNoRet<noRetOp, 1>;
-
-    def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
-    def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
-  }
+multiclass DS_1A2D_RET_m <bits<8> op, string opName, RegisterClass rc,
+                          string noRetOp = "", dag ins,
+  dag outs = (outs rc:$vdst),
+  string asm = opName#" $vdst, $addr, $data0, $data1"#"$offset"#"$gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>,
+           AtomicNoRet<noRetOp, 1>;
+
+  def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+  def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
 }
 
 multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc,
-                   string noRetOp = ""> : DS_1A2D_RET_m <
-  op, asm,
-  (outs rc:$vdst),
-  (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
-  asm#" $vdst, $addr, $data0, $data1"#"$offset"#" [M0]",
-  [], noRetOp>;
-
-// 1 address, 2 data.
-multiclass DS_1A2D_NORET_m <bits<8> op, string opName, dag outs, dag ins,
-                            string asm, list<dag> pat, string noRetOp> {
-  let mayLoad = 1, mayStore = 1 in {
-    def "" : DS_Pseudo <opName, outs, ins, pat>,
-             AtomicNoRet<noRetOp, 0>;
+                        string noRetOp = "", RegisterClass src = rc> :
+  DS_1A2D_RET_m <op, asm, rc, noRetOp,
+                 (ins VGPR_32:$addr, src:$data0, src:$data1,
+                      ds_offset:$offset, gds:$gds)
+>;
+
+multiclass DS_1A2D_NORET <bits<8> op, string opName, RegisterClass rc,
+                          string noRetOp = opName,
+  dag outs = (outs),
+  dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+                 ds_offset:$offset, gds:$gds),
+  string asm = opName#" $addr, $data0, $data1"#"$offset"#"$gds"> {
 
-    def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
-    def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
+  def "" : DS_Pseudo <opName, outs, ins, []>,
+           AtomicNoRet<noRetOp, 0>;
+
+  let vdst = 0 in {
+    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
   }
 }
 
-multiclass DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc,
-                     string noRetOp = asm> : DS_1A2D_NORET_m <
-  op, asm,
-  (outs),
-  (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
-  asm#" $addr, $data0, $data1"#"$offset"#" [M0]",
-  [], noRetOp>;
+multiclass DS_0A_RET <bits<8> op, string opName,
+  dag outs = (outs VGPR_32:$vdst),
+  dag ins = (ins ds_offset:$offset, gds:$gds),
+  string asm = opName#" $vdst"#"$offset"#"$gds"> {
 
-// 1 address, 1 data.
-multiclass DS_1A1D_NORET_m <bits<8> op, string opName, dag outs, dag ins,
-                            string asm, list<dag> pat, string noRetOp> {
   let mayLoad = 1, mayStore = 1 in {
-    def "" : DS_Pseudo <opName, outs, ins, pat>,
-             AtomicNoRet<noRetOp, 0>;
+    def "" : DS_Pseudo <opName, outs, ins, []>;
 
-    let data1 = 0 in {
-      def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
-      def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
-    }
-  }
+    let addr = 0, data0 = 0, data1 = 0 in {
+      def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+      def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+    } // end addr = 0, data0 = 0, data1 = 0
+  } // end mayLoad = 1, mayStore = 1
 }
 
-multiclass DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc,
-                          string noRetOp = asm> : DS_1A1D_NORET_m <
-  op, asm,
-  (outs),
-  (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
-  asm#" $addr, $data0"#"$offset"#" [M0]",
-  [], noRetOp>;
+multiclass DS_1A_RET_GDS <bits<8> op, string opName,
+  dag outs = (outs VGPR_32:$vdst),
+  dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset),
+  string asm = opName#" $vdst, $addr"#"$offset gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>;
+
+  let data0 = 0, data1 = 0, gds = 1 in {
+    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+  } // end data0 = 0, data1 = 0, gds = 1
+}
+
+multiclass DS_1A_GDS <bits<8> op, string opName,
+  dag outs = (outs),
+  dag ins = (ins VGPR_32:$addr),
+  string asm = opName#" $addr gds"> {
+
+  def "" : DS_Pseudo <opName, outs, ins, []>;
+
+  let vdst = 0, data0 = 0, data1 = 0, offset0 = 0, offset1 = 0, gds = 1 in {
+    def _si : DS_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
+  } // end vdst = 0, data = 0, data1 = 0, gds = 1
+}
+
+multiclass DS_1A <bits<8> op, string opName,
+  dag outs = (outs),
+  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds),
+  string asm = opName#" $addr"#"$offset"#"$gds"> {
+
+  let mayLoad = 1, mayStore = 1 in {
+    def "" : DS_Pseudo <opName, outs, ins, []>;
+
+    let vdst = 0, data0 = 0, data1 = 0 in {
+      def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+      def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+    } // let vdst = 0, data0 = 0, data1 = 0
+  } // end mayLoad = 1, mayStore = 1
+}
 
 //===----------------------------------------------------------------------===//
 // MTBUF classes
@@ -1657,6 +1998,7 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
   MTBUF <outs, ins, "", pattern>,
   SIMCInstr<opName, SISubtarget.NONE> {
   let isPseudo = 1;
+  let isCodeGenOnly = 1;
 }
 
 class MTBUF_Real_si <bits<3> op, string opName, dag outs, dag ins,
@@ -1718,6 +2060,20 @@ class mubuf <bits<7> si, bits<7> vi = si> {
   field bits<7> VI = vi;
 }
 
+let isCodeGenOnly = 0 in {
+
+class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+  MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
+  let lds  = 0;
+}
+
+} // End let isCodeGenOnly = 0
+
+class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+  MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> {
+  let lds = 0;
+}
+
 class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
   bit IsAddr64 = is_addr64;
   string OpName = NAME # suffix;
@@ -1727,6 +2083,7 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
   MUBUF <outs, ins, "", pattern>,
   SIMCInstr<opName, SISubtarget.NONE> {
   let isPseudo = 1;
+  let isCodeGenOnly = 1;
 
   // dummy fields, so that we can use let statements around multiclasses
   bits<1> offen;
@@ -1760,7 +2117,7 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
   def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
            MUBUFAddr64Table <0>;
 
-  let addr64 = 0 in {
+  let addr64 = 0, isCodeGenOnly = 0 in {
     def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
   }
 
@@ -1773,7 +2130,7 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
   def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
            MUBUFAddr64Table <1>;
 
-  let addr64 = 1 in {
+  let addr64 = 1, isCodeGenOnly = 0 in {
     def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
   }
 
@@ -1781,11 +2138,6 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
   // for VI appropriately.
 }
 
-class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
-  MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
-  let lds = 0;
-}
-
 multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins,
                                 string asm, list<dag> pattern, bit is_return> {
 
@@ -1809,7 +2161,7 @@ multiclass MUBUFAtomicAddr64_m <mubuf op, string opName, dag outs, dag ins,
            MUBUFAddr64Table <1, !if(is_return, "_RTN", "")>,
            AtomicNoRet<NAME#"_ADDR64", is_return>;
 
-  let offen = 0, idxen = 0, addr64 = 1, tfe = 0, soffset = 128 in {
+  let offen = 0, idxen = 0, addr64 = 1, tfe = 0 in {
     def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
   }
 
@@ -1828,14 +2180,14 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
       defm _ADDR64 : MUBUFAtomicAddr64_m <
         op, name#"_addr64", (outs),
         (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
-             mbuf_offset:$offset, slc:$slc),
-        name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#"$slc", [], 0
+             SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
+        name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#"$slc", [], 0
       >;
 
       defm _OFFSET : MUBUFAtomicOffset_m <
         op, name#"_offset", (outs),
-        (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
-             SCSrc_32:$soffset, slc:$slc),
+        (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, mbuf_offset:$offset,
+             slc:$slc),
         name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0
       >;
     } // glc = 0
@@ -1847,17 +2199,17 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
       defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
         op, name#"_rtn_addr64", (outs rc:$vdata),
         (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
-             mbuf_offset:$offset, slc:$slc),
-        name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#" glc"#"$slc",
+             SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
+        name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc",
         [(set vt:$vdata,
-         (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i16:$offset,
-                                    i1:$slc), vt:$vdata_in))], 1
+         (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+	                            i16:$offset, i1:$slc), vt:$vdata_in))], 1
       >;
 
       defm _RTN_OFFSET : MUBUFAtomicOffset_m <
         op, name#"_rtn_offset", (outs rc:$vdata),
-        (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset,
-             SCSrc_32:$soffset, slc:$slc),
+        (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset,
+             mbuf_offset:$offset, slc:$slc),
         name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
         [(set vt:$vdata,
          (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
@@ -1876,9 +2228,8 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
   let mayLoad = 1, mayStore = 0 in {
     let offen = 0, idxen = 0, vaddr = 0 in {
       defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata),
-                           (ins SReg_128:$srsrc,
-                           mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
-                           slc:$slc, tfe:$tfe),
+                           (ins SReg_128:$srsrc, SCSrc_32:$soffset,
+                           mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
                            name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
                            [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
                                                      i32:$soffset, i16:$offset,
@@ -1887,7 +2238,7 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
 
     let offen = 1, idxen = 0  in {
       defm _OFFEN  : MUBUF_m <op, name#"_offen", (outs regClass:$vdata),
-                           (ins SReg_128:$srsrc, VGPR_32:$vaddr,
+                           (ins VGPR_32:$vaddr, SReg_128:$srsrc,
                            SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
                            tfe:$tfe),
                            name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
@@ -1895,43 +2246,48 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
 
     let offen = 0, idxen = 1 in {
       defm _IDXEN  : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata),
-                           (ins SReg_128:$srsrc, VGPR_32:$vaddr,
-                           mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
+                           (ins VGPR_32:$vaddr, SReg_128:$srsrc,
+                           SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
                            slc:$slc, tfe:$tfe),
                            name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
     }
 
     let offen = 1, idxen = 1 in {
       defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata),
-                           (ins SReg_128:$srsrc, VReg_64:$vaddr,
-                           SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
-                           name#" $vdata, $vaddr, $srsrc, $soffset, idxen offen"#"$glc"#"$slc"#"$tfe", []>;
+                           (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+                           mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+                           name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
     }
 
-    let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
+    let offen = 0, idxen = 0 in {
       defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata),
-                           (ins SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
-                           name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
+                           (ins VReg_64:$vaddr, SReg_128:$srsrc,
+                                SCSrc_32:$soffset, mbuf_offset:$offset,
+				glc:$glc, slc:$slc, tfe:$tfe),
+                           name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#
+                                "$glc"#"$slc"#"$tfe",
                            [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
-                                                  i64:$vaddr, i16:$offset)))]>;
+                                                  i64:$vaddr, i32:$soffset,
+                                                  i16:$offset, i1:$glc, i1:$slc,
+						  i1:$tfe)))]>;
     }
   }
 }
 
 multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
-                          ValueType store_vt, SDPatternOperator st> {
+                          ValueType store_vt = i32, SDPatternOperator st = null_frag> {
   let mayLoad = 0, mayStore = 1 in {
     defm : MUBUF_m <op, name, (outs),
-                    (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
+                    (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
                     mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc,
                     tfe:$tfe),
                     name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#
-                    "$glc"#"$slc"#"$tfe", []>;
+                         "$glc"#"$slc"#"$tfe", []>;
 
     let offen = 0, idxen = 0, vaddr = 0 in {
       defm _OFFSET : MUBUF_m <op, name#"_offset",(outs),
-                              (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
-                              SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
+                              (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset,
+                              mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
                               name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
                               [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
                                    i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>;
@@ -1939,30 +2295,52 @@ multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
 
     let offen = 1, idxen = 0  in {
       defm _OFFEN : MUBUF_m <op, name#"_offen", (outs),
-                             (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
-                             mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+                             (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
+                              SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+                              slc:$slc, tfe:$tfe),
                              name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#
                              "$glc"#"$slc"#"$tfe", []>;
     } // end offen = 1, idxen = 0
 
-    let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0,
-        soffset = 128 /* ZERO */ in {
+    let offen = 0, idxen = 1 in {
+      defm _IDXEN  : MUBUF_m <op, name#"_idxen", (outs),
+                           (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
+                           SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+                           slc:$slc, tfe:$tfe),
+                           name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+    }
+
+    let offen = 1, idxen = 1 in {
+      defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs),
+                           (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+                           mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+                           name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+    }
+
+    let offen = 0, idxen = 0 in {
       defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs),
-                                    (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
-                                    name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
+                                    (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
+                                         SCSrc_32:$soffset,
+                                         mbuf_offset:$offset, glc:$glc, slc:$slc,
+                                         tfe:$tfe),
+                                    name#" $vdata, $vaddr, $srsrc, $soffset addr64"#
+                                         "$offset"#"$glc"#"$slc"#"$tfe",
                                     [(st store_vt:$vdata,
-                                      (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]>;
+                                      (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr,
+                                                   i32:$soffset, i16:$offset,
+                                                   i1:$glc, i1:$slc, i1:$tfe))]>;
     }
   } // End mayLoad = 0, mayStore = 1
 }
 
 class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :
-      FLAT <op, (outs regClass:$data),
+      FLAT <op, (outs regClass:$vdst),
                 (ins VReg_64:$addr),
-            asm#" $data, $addr, [M0, FLAT_SCRATCH]", []> {
+            asm#" $vdst, $addr, [M0, FLAT_SCRATCH]", []> {
   let glc = 0;
   let slc = 0;
   let tfe = 0;
+  let data = 0;
   let mayLoad = 1;
 }
 
@@ -1978,6 +2356,7 @@ class FLAT_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
   let glc = 0;
   let slc = 0;
   let tfe = 0;
+  let vdst = 0;
 }
 
 class MIMG_Mask <string op, int channels> {
@@ -1996,7 +2375,7 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm,
   asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
      #" $tfe, $lwe, $slc, $vaddr, $srsrc",
   []> {
-  let SSAMP = 0;
+  let ssamp = 0;
   let mayLoad = 1;
   let mayStore = 0;
   let hasPostISelHook = 1;
@@ -2143,15 +2522,6 @@ def getVOPe32 : InstrMapping {
   let ValueCols = [["4"]];
 }
 
-// Maps an original opcode to its commuted version
-def getCommuteRev : InstrMapping {
-  let FilterClass = "VOP2_REV";
-  let RowFields = ["RevOp"];
-  let ColFields = ["IsOrig"];
-  let KeyCol = ["1"];
-  let ValueCols = [["0"]];
-}
-
 def getMaskedMIMGOp : InstrMapping {
   let FilterClass = "MIMG_Mask";
   let RowFields = ["Op"];
@@ -2169,6 +2539,33 @@ def getCommuteOrig : InstrMapping {
   let ValueCols = [["1"]];
 }
 
+// Maps an original opcode to its commuted version
+def getCommuteRev : InstrMapping {
+  let FilterClass = "VOP2_REV";
+  let RowFields = ["RevOp"];
+  let ColFields = ["IsOrig"];
+  let KeyCol = ["1"];
+  let ValueCols = [["0"]];
+}
+
+def getCommuteCmpOrig : InstrMapping {
+  let FilterClass = "VOP2_REV";
+  let RowFields = ["RevOp"];
+  let ColFields = ["IsOrig"];
+  let KeyCol = ["0"];
+  let ValueCols = [["1"]];
+}
+
+// Maps an original opcode to its commuted version
+def getCommuteCmpRev : InstrMapping {
+  let FilterClass = "VOP2_REV";
+  let RowFields = ["RevOp"];
+  let ColFields = ["IsOrig"];
+  let KeyCol = ["1"];
+  let ValueCols = [["0"]];
+}
+
+
 def getMCOpcodeGen : InstrMapping {
   let FilterClass = "SIMCInstr";
   let RowFields = ["PseudoInstr"];
diff --git a/contrib/llvm/lib/Target/R600/SIInstructions.td b/contrib/llvm/lib/Target/R600/SIInstructions.td
index bbedef2..839c2e9 100644
--- a/contrib/llvm/lib/Target/R600/SIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/SIInstructions.td
@@ -26,20 +26,17 @@ def SendMsgImm : Operand<i32> {
   let PrintMethod = "printSendMsg";
 }
 
-def isGCN : Predicate<"Subtarget.getGeneration() "
-                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
-def isSICI : Predicate<
-  "Subtarget.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
-  "Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
->;
-def isCI : Predicate<"Subtarget.getGeneration() "
-                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
-def isVI : Predicate <
-  "Subtarget.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS"
->;
+def isGCN : Predicate<"Subtarget->getGeneration() "
+                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+            AssemblerPredicate<"FeatureGCN">;
+def isSI : Predicate<"Subtarget->getGeneration() "
+                      "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
 
 def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
 
+def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
+def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
+
 def SWaitMatchClass : AsmOperandClass {
   let Name = "SWaitCnt";
   let RenderMethod = "addImmOperands";
@@ -103,7 +100,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
 //===----------------------------------------------------------------------===//
 
 let isMoveImm = 1 in {
-  let isReMaterializable = 1 in {
+  let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
     defm S_MOV_B32 : SOP1_32 <sop1<0x03, 0x00>, "s_mov_b32", []>;
     defm S_MOV_B64 : SOP1_64 <sop1<0x04, 0x01>, "s_mov_b64", []>;
   } // let isRematerializeable = 1
@@ -133,20 +130,20 @@ defm S_BREV_B32 : SOP1_32 <sop1<0x0b, 0x08>, "s_brev_b32",
 defm S_BREV_B64 : SOP1_64 <sop1<0x0c, 0x09>, "s_brev_b64", []>;
 
 let Defs = [SCC] in {
-  //defm S_BCNT0_I32_B32 : SOP1_BCNT0 <sop1<0x0d, 0x0a>, "s_bcnt0_i32_b32", []>;
-  //defm S_BCNT0_I32_B64 : SOP1_BCNT0 <sop1<0x0e, 0x0b>, "s_bcnt0_i32_b64", []>;
+  defm S_BCNT0_I32_B32 : SOP1_32 <sop1<0x0d, 0x0a>, "s_bcnt0_i32_b32", []>;
+  defm S_BCNT0_I32_B64 : SOP1_32_64 <sop1<0x0e, 0x0b>, "s_bcnt0_i32_b64", []>;
   defm S_BCNT1_I32_B32 : SOP1_32 <sop1<0x0f, 0x0c>, "s_bcnt1_i32_b32",
     [(set i32:$dst, (ctpop i32:$src0))]
   >;
   defm S_BCNT1_I32_B64 : SOP1_32_64 <sop1<0x10, 0x0d>, "s_bcnt1_i32_b64", []>;
 } // End Defs = [SCC]
 
-//defm S_FF0_I32_B32 : SOP1_32 <sop1<0x11, 0x0e>, "s_ff0_i32_b32", []>;
-//defm S_FF0_I32_B64 : SOP1_FF0 <sop1<0x12, 0x0f>, "s_ff0_i32_b64", []>;
+defm S_FF0_I32_B32 : SOP1_32 <sop1<0x11, 0x0e>, "s_ff0_i32_b32", []>;
+defm S_FF0_I32_B64 : SOP1_32_64 <sop1<0x12, 0x0f>, "s_ff0_i32_b64", []>;
 defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32",
   [(set i32:$dst, (cttz_zero_undef i32:$src0))]
 >;
-////defm S_FF1_I32_B64 : SOP1_FF1 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
+defm S_FF1_I32_B64 : SOP1_32_64 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
 
 defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
   [(set i32:$dst, (ctlz_zero_undef i32:$src0))]
@@ -164,10 +161,10 @@ defm S_SEXT_I32_I16 : SOP1_32 <sop1<0x1a, 0x17>, "s_sext_i32_i16",
   [(set i32:$dst, (sext_inreg i32:$src0, i16))]
 >;
 
-////defm S_BITSET0_B32 : SOP1_BITSET0 <sop1<0x1b, 0x18>, "s_bitset0_b32", []>;
-////defm S_BITSET0_B64 : SOP1_BITSET0 <sop1<0x1c, 0x19>, "s_bitset0_b64", []>;
-////defm S_BITSET1_B32 : SOP1_BITSET1 <sop1<0x1d, 0x1a>, "s_bitset1_b32", []>;
-////defm S_BITSET1_B64 : SOP1_BITSET1 <sop1<0x1e, 0x1b>, "s_bitset1_b64", []>;
+defm S_BITSET0_B32 : SOP1_32 <sop1<0x1b, 0x18>, "s_bitset0_b32", []>;
+defm S_BITSET0_B64 : SOP1_64 <sop1<0x1c, 0x19>, "s_bitset0_b64", []>;
+defm S_BITSET1_B32 : SOP1_32 <sop1<0x1d, 0x1a>, "s_bitset1_b32", []>;
+defm S_BITSET1_B64 : SOP1_64 <sop1<0x1e, 0x1b>, "s_bitset1_b64", []>;
 defm S_GETPC_B64 : SOP1_64_0 <sop1<0x1f, 0x1c>, "s_getpc_b64", []>;
 defm S_SETPC_B64 : SOP1_64 <sop1<0x20, 0x1d>, "s_setpc_b64", []>;
 defm S_SWAPPC_B64 : SOP1_64 <sop1<0x21, 0x1e>, "s_swappc_b64", []>;
@@ -192,7 +189,7 @@ defm S_MOVRELS_B32 : SOP1_32 <sop1<0x2e, 0x2a>, "s_movrels_b32", []>;
 defm S_MOVRELS_B64 : SOP1_64 <sop1<0x2f, 0x2b>, "s_movrels_b64", []>;
 defm S_MOVRELD_B32 : SOP1_32 <sop1<0x30, 0x2c>, "s_movreld_b32", []>;
 defm S_MOVRELD_B64 : SOP1_64 <sop1<0x31, 0x2d>, "s_movreld_b64", []>;
-//defm S_CBRANCH_JOIN : SOP1_ <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
+defm S_CBRANCH_JOIN : SOP1_1 <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
 defm S_MOV_REGRD_B32 : SOP1_32 <sop1<0x33, 0x2f>, "s_mov_regrd_b32", []>;
 let Defs = [SCC] in {
   defm S_ABS_I32 : SOP1_32 <sop1<0x34, 0x30>, "s_abs_i32", []>;
@@ -240,9 +237,9 @@ defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
 >;
 } // End Defs = [SCC]
 
-defm S_CSELECT_B32 : SOP2_SELECT_32 <sop2<0x0a>, "s_cselect_b32", []>;
 
 let Uses = [SCC] in {
+  defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>;
   defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>;
 } // End Uses = [SCC]
 
@@ -306,7 +303,8 @@ defm S_ASHR_I64 : SOP2_64_32 <sop2<0x23, 0x21>, "s_ashr_i64",
 >;
 } // End Defs = [SCC]
 
-defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32", []>;
+defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32",
+  [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
 defm S_BFM_B64 : SOP2_64 <sop2<0x25, 0x23>, "s_bfm_b64", []>;
 defm S_MUL_I32 : SOP2_32 <sop2<0x26, 0x24>, "s_mul_i32",
   [(set i32:$dst, (mul i32:$src0, i32:$src1))]
@@ -321,7 +319,13 @@ defm S_BFE_U64 : SOP2_64 <sop2<0x29, 0x27>, "s_bfe_u64", []>;
 defm S_BFE_I64 : SOP2_64_32 <sop2<0x2a, 0x28>, "s_bfe_i64", []>;
 } // End Defs = [SCC]
 
-//defm S_CBRANCH_G_FORK : SOP2_ <sop2<0x2b, 0x29>, "s_cbranch_g_fork", []>;
+let sdst = 0 in {
+defm S_CBRANCH_G_FORK : SOP2_m <
+  sop2<0x2b, 0x29>, "s_cbranch_g_fork", (outs),
+  (ins SReg_64:$src0, SReg_64:$src1), "s_cbranch_g_fork $src0, $src1", []
+>;
+}
+
 let Defs = [SCC] in {
 defm S_ABSDIFF_I32 : SOP2_32 <sop2<0x2c, 0x2a>, "s_absdiff_i32", []>;
 } // End Defs = [SCC]
@@ -378,6 +382,7 @@ defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32",
 >;
 */
 
+defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", []>;
 defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>;
 defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>;
 defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>;
@@ -391,18 +396,27 @@ defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>;
 defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>;
 } // End isCompare = 1
 
-let isCommutable = 1 in {
-  let Defs = [SCC], isCommutable = 1 in {
-    defm S_ADDK_I32 : SOPK_32 <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
-  }
-  defm S_MULK_I32 : SOPK_32 <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
+let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
+    Constraints = "$sdst = $src0" in {
+  defm S_ADDK_I32 : SOPK_32TIE <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
+  defm S_MULK_I32 : SOPK_32TIE <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
 }
 
-//defm S_CBRANCH_I_FORK : SOPK_ <sopk<0x11, 0x10>, "s_cbranch_i_fork", []>;
+defm S_CBRANCH_I_FORK : SOPK_m <
+  sopk<0x11, 0x10>, "s_cbranch_i_fork", (outs),
+  (ins SReg_64:$sdst, u16imm:$simm16), " $sdst, $simm16"
+>;
 defm S_GETREG_B32 : SOPK_32 <sopk<0x12, 0x11>, "s_getreg_b32", []>;
-defm S_SETREG_B32 : SOPK_32 <sopk<0x13, 0x12>, "s_setreg_b32", []>;
-defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
-//defm S_SETREG_IMM32_B32 : SOPK_32 <sopk<0x15, 0x14>, "s_setreg_imm32_b32", []>;
+defm S_SETREG_B32 : SOPK_m <
+  sopk<0x13, 0x12>, "s_setreg_b32", (outs),
+  (ins SReg_32:$sdst, u16imm:$simm16), " $sdst, $simm16"
+>;
+// FIXME: Not on SI?
+//defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
+defm S_SETREG_IMM32_B32 : SOPK_IMM32 <
+  sopk<0x15, 0x14>, "s_setreg_imm32_b32", (outs),
+  (ins i32imm:$imm, u16imm:$simm16), " $imm, $simm16"
+>;
 
 //===----------------------------------------------------------------------===//
 // SOPP Instructions
@@ -477,13 +491,11 @@ def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
 def S_SLEEP : SOPP <0x0000000e, (ins i16imm:$simm16), "s_sleep $simm16">;
 def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$sim16), "s_setprio $sim16">;
 
-let Uses = [EXEC] in {
-  def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "s_sendmsg $simm16",
-      [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
-  > {
-    let DisableEncoding = "$m0";
-  }
-} // End Uses = [EXEC]
+let Uses = [EXEC, M0] in {
+  def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
+      [(AMDGPUsendmsg (i32 imm:$simm16))]
+  >;
+} // End Uses = [EXEC, M0]
 
 def S_SENDMSGHALT : SOPP <0x00000011, (ins i16imm:$simm16), "s_sendmsghalt $simm16">;
 def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
@@ -501,31 +513,30 @@ def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
 // VOPC Instructions
 //===----------------------------------------------------------------------===//
 
-let isCompare = 1 in {
+let isCompare = 1, isCommutable = 1 in {
 
 defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0, 0x40>, "v_cmp_f_f32">;
-defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT>;
+defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT, "v_cmp_gt_f32">;
 defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2, 0x42>, "v_cmp_eq_f32", COND_OEQ>;
-defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE>;
+defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE, "v_cmp_ge_f32">;
 defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4, 0x44>, "v_cmp_gt_f32", COND_OGT>;
 defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5, 0x45>, "v_cmp_lg_f32", COND_ONE>;
 defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6, 0x46>, "v_cmp_ge_f32", COND_OGE>;
 defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7, 0x47>, "v_cmp_o_f32", COND_O>;
 defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8, 0x48>, "v_cmp_u_f32", COND_UO>;
-defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32",  COND_ULT>;
+defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32",  COND_ULT, "v_cmp_nle_f32">;
 defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa, 0x4a>, "v_cmp_nlg_f32", COND_UEQ>;
-defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE>;
+defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE, "v_cmp_nlt_f32">;
 defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc, 0x4c>, "v_cmp_nle_f32", COND_UGT>;
 defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd, 0x4d>, "v_cmp_neq_f32", COND_UNE>;
 defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe, 0x4e>, "v_cmp_nlt_f32", COND_UGE>;
 defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf, 0x4f>, "v_cmp_tru_f32">;
 
-let hasSideEffects = 1 in {
 
 defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10, 0x50>, "v_cmpx_f_f32">;
-defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32">;
+defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32", "v_cmpx_gt_f32">;
 defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12, 0x52>, "v_cmpx_eq_f32">;
-defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32">;
+defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32", "v_cmpx_ge_f32">;
 defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14, 0x54>, "v_cmpx_gt_f32">;
 defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15, 0x55>, "v_cmpx_lg_f32">;
 defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16, 0x56>, "v_cmpx_ge_f32">;
@@ -539,233 +550,207 @@ defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d, 0x5d>, "v_cmpx_neq_f32">;
 defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e, 0x5e>, "v_cmpx_nlt_f32">;
 defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f, 0x5f>, "v_cmpx_tru_f32">;
 
-} // End hasSideEffects = 1
 
 defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20, 0x60>, "v_cmp_f_f64">;
-defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT>;
+defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT, "v_cmp_gt_f64">;
 defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22, 0x62>, "v_cmp_eq_f64", COND_OEQ>;
-defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE>;
+defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE, "v_cmp_ge_f64">;
 defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24, 0x64>, "v_cmp_gt_f64", COND_OGT>;
 defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25, 0x65>, "v_cmp_lg_f64", COND_ONE>;
 defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26, 0x66>, "v_cmp_ge_f64", COND_OGE>;
 defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27, 0x67>, "v_cmp_o_f64", COND_O>;
 defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28, 0x68>, "v_cmp_u_f64", COND_UO>;
-defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT>;
+defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT, "v_cmp_nle_f64">;
 defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a, 0x6a>, "v_cmp_nlg_f64", COND_UEQ>;
-defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE>;
+defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE, "v_cmp_nlt_f64">;
 defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c, 0x6c>, "v_cmp_nle_f64", COND_UGT>;
 defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d, 0x6d>, "v_cmp_neq_f64", COND_UNE>;
 defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e, 0x6e>, "v_cmp_nlt_f64", COND_UGE>;
 defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f, 0x6f>, "v_cmp_tru_f64">;
 
-let hasSideEffects = 1 in {
 
 defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30, 0x70>, "v_cmpx_f_f64">;
-defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64">;
+defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64", "v_cmpx_gt_f64">;
 defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32, 0x72>, "v_cmpx_eq_f64">;
-defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64">;
+defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64", "v_cmpx_ge_f64">;
 defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34, 0x74>, "v_cmpx_gt_f64">;
 defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35, 0x75>, "v_cmpx_lg_f64">;
 defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36, 0x76>, "v_cmpx_ge_f64">;
 defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37, 0x77>, "v_cmpx_o_f64">;
 defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38, 0x78>, "v_cmpx_u_f64">;
-defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64">;
+defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64", "v_cmpx_nle_f64">;
 defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a, 0x7a>, "v_cmpx_nlg_f64">;
-defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64">;
+defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64", "v_cmpx_nlt_f64">;
 defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c, 0x7c>, "v_cmpx_nle_f64">;
 defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d, 0x7d>, "v_cmpx_neq_f64">;
 defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e, 0x7e>, "v_cmpx_nlt_f64">;
 defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f, 0x7f>, "v_cmpx_tru_f64">;
 
-} // End hasSideEffects = 1
 
 let SubtargetPredicate = isSICI in {
 
 defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "v_cmps_f_f32">;
-defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32">;
+defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">;
 defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "v_cmps_eq_f32">;
-defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32">;
+defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32", COND_NULL, "v_cmps_ge_f32">;
 defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "v_cmps_gt_f32">;
 defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "v_cmps_lg_f32">;
 defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "v_cmps_ge_f32">;
 defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "v_cmps_o_f32">;
 defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "v_cmps_u_f32">;
-defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32">;
+defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32", COND_NULL, "v_cmps_nle_f32">;
 defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "v_cmps_nlg_f32">;
-defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32">;
+defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32", COND_NULL, "v_cmps_nlt_f32">;
 defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "v_cmps_nle_f32">;
 defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "v_cmps_neq_f32">;
 defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "v_cmps_nlt_f32">;
 defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "v_cmps_tru_f32">;
 
-let hasSideEffects = 1 in {
 
 defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "v_cmpsx_f_f32">;
-defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32">;
+defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32", "v_cmpsx_gt_f32">;
 defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "v_cmpsx_eq_f32">;
-defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32">;
+defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32", "v_cmpsx_ge_f32">;
 defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "v_cmpsx_gt_f32">;
 defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "v_cmpsx_lg_f32">;
 defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "v_cmpsx_ge_f32">;
 defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "v_cmpsx_o_f32">;
 defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "v_cmpsx_u_f32">;
-defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32">;
+defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32", "v_cmpsx_nle_f32">;
 defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "v_cmpsx_nlg_f32">;
-defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32">;
+defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32", "v_cmpsx_nlt_f32">;
 defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "v_cmpsx_nle_f32">;
 defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "v_cmpsx_neq_f32">;
 defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "v_cmpsx_nlt_f32">;
 defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "v_cmpsx_tru_f32">;
 
-} // End hasSideEffects = 1
 
 defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "v_cmps_f_f64">;
-defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64">;
+defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64", COND_NULL, "v_cmps_gt_f64">;
 defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "v_cmps_eq_f64">;
-defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64">;
+defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64", COND_NULL, "v_cmps_ge_f64">;
 defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "v_cmps_gt_f64">;
 defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "v_cmps_lg_f64">;
 defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "v_cmps_ge_f64">;
 defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "v_cmps_o_f64">;
 defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "v_cmps_u_f64">;
-defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64">;
+defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64", COND_NULL, "v_cmps_nle_f64">;
 defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "v_cmps_nlg_f64">;
-defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64">;
+defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64", COND_NULL, "v_cmps_nlt_f64">;
 defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "v_cmps_nle_f64">;
 defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "v_cmps_neq_f64">;
 defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "v_cmps_nlt_f64">;
 defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "v_cmps_tru_f64">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
-
-defm V_CMPSX_F_F64 : VOPC_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
-defm V_CMPSX_LT_F64 : VOPC_F64 <vopc<0x71>, "v_cmpsx_lt_f64">;
-defm V_CMPSX_EQ_F64 : VOPC_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
-defm V_CMPSX_LE_F64 : VOPC_F64 <vopc<0x73>, "v_cmpsx_le_f64">;
-defm V_CMPSX_GT_F64 : VOPC_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
-defm V_CMPSX_LG_F64 : VOPC_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
-defm V_CMPSX_GE_F64 : VOPC_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
-defm V_CMPSX_O_F64 : VOPC_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
-defm V_CMPSX_U_F64 : VOPC_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
-defm V_CMPSX_NGE_F64 : VOPC_F64 <vopc<0x79>, "v_cmpsx_nge_f64">;
-defm V_CMPSX_NLG_F64 : VOPC_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
-defm V_CMPSX_NGT_F64 : VOPC_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64">;
-defm V_CMPSX_NLE_F64 : VOPC_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
-defm V_CMPSX_NEQ_F64 : VOPC_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
-defm V_CMPSX_NLT_F64 : VOPC_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
-defm V_CMPSX_TRU_F64 : VOPC_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
-
-} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMPSX_F_F64 : VOPCX_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
+defm V_CMPSX_LT_F64 : VOPCX_F64 <vopc<0x71>, "v_cmpsx_lt_f64", "v_cmpsx_gt_f64">;
+defm V_CMPSX_EQ_F64 : VOPCX_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
+defm V_CMPSX_LE_F64 : VOPCX_F64 <vopc<0x73>, "v_cmpsx_le_f64", "v_cmpsx_ge_f64">;
+defm V_CMPSX_GT_F64 : VOPCX_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
+defm V_CMPSX_LG_F64 : VOPCX_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
+defm V_CMPSX_GE_F64 : VOPCX_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
+defm V_CMPSX_O_F64 : VOPCX_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
+defm V_CMPSX_U_F64 : VOPCX_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
+defm V_CMPSX_NGE_F64 : VOPCX_F64 <vopc<0x79>, "v_cmpsx_nge_f64", "v_cmpsx_nle_f64">;
+defm V_CMPSX_NLG_F64 : VOPCX_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
+defm V_CMPSX_NGT_F64 : VOPCX_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64", "v_cmpsx_nlt_f64">;
+defm V_CMPSX_NLE_F64 : VOPCX_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
+defm V_CMPSX_NEQ_F64 : VOPCX_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
+defm V_CMPSX_NLT_F64 : VOPCX_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
+defm V_CMPSX_TRU_F64 : VOPCX_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
 
 } // End SubtargetPredicate = isSICI
 
 defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80, 0xc0>, "v_cmp_f_i32">;
-defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT>;
+defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">;
 defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82, 0xc2>, "v_cmp_eq_i32", COND_EQ>;
-defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE>;
+defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">;
 defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84, 0xc4>, "v_cmp_gt_i32", COND_SGT>;
 defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85, 0xc5>, "v_cmp_ne_i32", COND_NE>;
 defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86, 0xc6>, "v_cmp_ge_i32", COND_SGE>;
 defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87, 0xc7>, "v_cmp_t_i32">;
 
-let hasSideEffects = 1 in {
 
 defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90, 0xd0>, "v_cmpx_f_i32">;
-defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32">;
+defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32", "v_cmpx_gt_i32">;
 defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92, 0xd2>, "v_cmpx_eq_i32">;
-defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32">;
+defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32", "v_cmpx_ge_i32">;
 defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94, 0xd4>, "v_cmpx_gt_i32">;
 defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95, 0xd5>, "v_cmpx_ne_i32">;
 defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96, 0xd6>, "v_cmpx_ge_i32">;
 defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97, 0xd7>, "v_cmpx_t_i32">;
 
-} // End hasSideEffects = 1
 
 defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0, 0xe0>, "v_cmp_f_i64">;
-defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT>;
+defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">;
 defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2, 0xe2>, "v_cmp_eq_i64", COND_EQ>;
-defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE>;
+defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">;
 defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4, 0xe4>, "v_cmp_gt_i64", COND_SGT>;
 defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5, 0xe5>, "v_cmp_ne_i64", COND_NE>;
 defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6, 0xe6>, "v_cmp_ge_i64", COND_SGE>;
 defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7, 0xe7>, "v_cmp_t_i64">;
 
-let hasSideEffects = 1 in {
 
 defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0, 0xf0>, "v_cmpx_f_i64">;
-defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64">;
+defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64", "v_cmpx_gt_i64">;
 defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2, 0xf2>, "v_cmpx_eq_i64">;
-defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64">;
+defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64", "v_cmpx_ge_i64">;
 defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4, 0xf4>, "v_cmpx_gt_i64">;
 defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5, 0xf5>, "v_cmpx_ne_i64">;
 defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6, 0xf6>, "v_cmpx_ge_i64">;
 defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7, 0xf7>, "v_cmpx_t_i64">;
 
-} // End hasSideEffects = 1
 
 defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0, 0xc8>, "v_cmp_f_u32">;
-defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT>;
+defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT, "v_cmp_gt_u32">;
 defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2, 0xca>, "v_cmp_eq_u32", COND_EQ>;
-defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE>;
+defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE, "v_cmp_ge_u32">;
 defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4, 0xcc>, "v_cmp_gt_u32", COND_UGT>;
 defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5, 0xcd>, "v_cmp_ne_u32", COND_NE>;
 defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6, 0xce>, "v_cmp_ge_u32", COND_UGE>;
 defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7, 0xcf>, "v_cmp_t_u32">;
 
-let hasSideEffects = 1 in {
 
 defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0, 0xd8>, "v_cmpx_f_u32">;
-defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32">;
+defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32", "v_cmpx_gt_u32">;
 defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2, 0xda>, "v_cmpx_eq_u32">;
-defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32">;
+defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32", "v_cmpx_le_u32">;
 defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4, 0xdc>, "v_cmpx_gt_u32">;
 defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5, 0xdd>, "v_cmpx_ne_u32">;
 defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6, 0xde>, "v_cmpx_ge_u32">;
 defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7, 0xdf>, "v_cmpx_t_u32">;
 
-} // End hasSideEffects = 1
 
 defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0, 0xe8>, "v_cmp_f_u64">;
-defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT>;
+defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT, "v_cmp_gt_u64">;
 defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2, 0xea>, "v_cmp_eq_u64", COND_EQ>;
-defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE>;
+defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE, "v_cmp_ge_u64">;
 defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4, 0xec>, "v_cmp_gt_u64", COND_UGT>;
 defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5, 0xed>, "v_cmp_ne_u64", COND_NE>;
 defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6, 0xee>, "v_cmp_ge_u64", COND_UGE>;
 defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7, 0xef>, "v_cmp_t_u64">;
 
-let hasSideEffects = 1 in {
-
 defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0, 0xf8>, "v_cmpx_f_u64">;
-defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64">;
+defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64", "v_cmpx_gt_u64">;
 defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2, 0xfa>, "v_cmpx_eq_u64">;
-defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64">;
+defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64", "v_cmpx_ge_u64">;
 defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4, 0xfc>, "v_cmpx_gt_u64">;
 defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5, 0xfd>, "v_cmpx_ne_u64">;
 defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6, 0xfe>, "v_cmpx_ge_u64">;
 defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7, 0xff>, "v_cmpx_t_u64">;
 
-} // End hasSideEffects = 1
+} // End isCompare = 1, isCommutable = 1
 
 defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <vopc<0x88, 0x10>, "v_cmp_class_f32">;
-
-let hasSideEffects = 1 in {
 defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <vopc<0x98, 0x11>, "v_cmpx_class_f32">;
-} // End hasSideEffects = 1
-
 defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <vopc<0xa8, 0x12>, "v_cmp_class_f64">;
-
-let hasSideEffects = 1 in {
 defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">;
-} // End hasSideEffects = 1
-
-} // End isCompare = 1
 
 //===----------------------------------------------------------------------===//
 // DS Instructions
 //===----------------------------------------------------------------------===//
 
-
 defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
 defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
 defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
@@ -778,12 +763,26 @@ defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
 defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
 defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
 defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
-defm DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
+defm DS_MSKOR_B32 : DS_1A2D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
+let mayLoad = 0 in {
+defm DS_WRITE_B32 : DS_1A1D_NORET <0xd, "ds_write_b32", VGPR_32>;
+defm DS_WRITE2_B32 : DS_1A1D_Off8_NORET <0xe, "ds_write2_b32", VGPR_32>;
+defm DS_WRITE2ST64_B32 : DS_1A1D_Off8_NORET <0xf, "ds_write2st64_b32", VGPR_32>;
+}
 defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
 defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
-defm DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>;
-defm DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>;
-
+defm DS_MIN_F32 : DS_1A2D_NORET <0x12, "ds_min_f32", VGPR_32>;
+defm DS_MAX_F32 : DS_1A2D_NORET <0x13, "ds_max_f32", VGPR_32>;
+
+defm DS_GWS_INIT : DS_1A_GDS <0x19, "ds_gws_init">;
+defm DS_GWS_SEMA_V : DS_1A_GDS <0x1a, "ds_gws_sema_v">;
+defm DS_GWS_SEMA_BR : DS_1A_GDS <0x1b, "ds_gws_sema_br">;
+defm DS_GWS_SEMA_P : DS_1A_GDS <0x1c, "ds_gws_sema_p">;
+defm DS_GWS_BARRIER : DS_1A_GDS <0x1d, "ds_gws_barrier">;
+let mayLoad = 0 in {
+defm DS_WRITE_B8 : DS_1A1D_NORET <0x1e, "ds_write_b8", VGPR_32>;
+defm DS_WRITE_B16 : DS_1A1D_NORET <0x1f, "ds_write_b16", VGPR_32>;
+}
 defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
 defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
 defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
@@ -796,20 +795,34 @@ defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32"
 defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
 defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
 defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
-defm DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
+defm DS_MSKOR_RTN_B32 : DS_1A2D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
 defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
-//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2_b32">;
-//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2st64_b32">;
+defm DS_WRXCHG2_RTN_B32 : DS_1A2D_RET <
+  0x2e, "ds_wrxchg2_rtn_b32", VReg_64, "", VGPR_32
+>;
+defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET <
+  0x2f, "ds_wrxchg2st64_rtn_b32", VReg_64, "", VGPR_32
+>;
 defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
 defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
-defm DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
-defm DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
-
+defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
+defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
 let SubtargetPredicate = isCI in {
 defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
 } // End isCI
-
-
+defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>;
+let mayStore = 0 in {
+defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>;
+defm DS_READ2_B32 : DS_1A_Off8_RET <0x37, "ds_read2_b32", VReg_64>;
+defm DS_READ2ST64_B32 : DS_1A_Off8_RET <0x38, "ds_read2st64_b32", VReg_64>;
+defm DS_READ_I8 : DS_1A_RET <0x39, "ds_read_i8", VGPR_32>;
+defm DS_READ_U8 : DS_1A_RET <0x3a, "ds_read_u8", VGPR_32>;
+defm DS_READ_I16 : DS_1A_RET <0x3b, "ds_read_i16", VGPR_32>;
+defm DS_READ_U16 : DS_1A_RET <0x3c, "ds_read_u16", VGPR_32>;
+}
+defm DS_CONSUME : DS_0A_RET <0x3d, "ds_consume">;
+defm DS_APPEND : DS_0A_RET <0x3e, "ds_append">;
+defm DS_ORDERED_COUNT : DS_1A_RET_GDS <0x3f, "ds_ordered_count">;
 defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
 defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
 defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
@@ -822,7 +835,12 @@ defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
 defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
 defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
 defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
-defm DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+defm DS_MSKOR_B64 : DS_1A2D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+let mayLoad = 0 in {
+defm DS_WRITE_B64 : DS_1A1D_NORET <0x4d, "ds_write_b64", VReg_64>;
+defm DS_WRITE2_B64 : DS_1A1D_Off8_NORET <0x4E, "ds_write2_b64", VReg_64>;
+defm DS_WRITE2ST64_B64 : DS_1A1D_Off8_NORET <0x4f, "ds_write2st64_b64", VReg_64>;
+}
 defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
 defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
 defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
@@ -840,57 +858,88 @@ defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64"
 defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
 defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
 defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
-defm DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
+defm DS_MSKOR_RTN_B64 : DS_1A2D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
 defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
-//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2_b64">;
-//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2st64_b64">;
+defm DS_WRXCHG2_RTN_B64 : DS_1A2D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_128, "ds_wrxchg2_b64", VReg_64>;
+defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET <0x6f, "ds_wrxchg2st64_rtn_b64", VReg_128, "ds_wrxchg2st64_b64", VReg_64>;
 defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
 defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
 defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">;
 defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">;
 
+let mayStore = 0 in {
+defm DS_READ_B64 : DS_1A_RET <0x76, "ds_read_b64", VReg_64>;
+defm DS_READ2_B64 : DS_1A_Off8_RET <0x77, "ds_read2_b64", VReg_128>;
+defm DS_READ2ST64_B64 : DS_1A_Off8_RET <0x78, "ds_read2st64_b64", VReg_128>;
+}
+
+defm DS_ADD_SRC2_U32 : DS_1A <0x80, "ds_add_src2_u32">;
+defm DS_SUB_SRC2_U32 : DS_1A <0x81, "ds_sub_src2_u32">;
+defm DS_RSUB_SRC2_U32 : DS_1A <0x82, "ds_rsub_src2_u32">;
+defm DS_INC_SRC2_U32 : DS_1A <0x83, "ds_inc_src2_u32">;
+defm DS_DEC_SRC2_U32 : DS_1A <0x84, "ds_dec_src2_u32">;
+defm DS_MIN_SRC2_I32 : DS_1A <0x85, "ds_min_src2_i32">;
+defm DS_MAX_SRC2_I32 : DS_1A <0x86, "ds_max_src2_i32">;
+defm DS_MIN_SRC2_U32 : DS_1A <0x87, "ds_min_src2_u32">;
+defm DS_MAX_SRC2_U32 : DS_1A <0x88, "ds_max_src2_u32">;
+defm DS_AND_SRC2_B32 : DS_1A <0x89, "ds_and_src_b32">;
+defm DS_OR_SRC2_B32 : DS_1A <0x8a, "ds_or_src2_b32">;
+defm DS_XOR_SRC2_B32 : DS_1A <0x8b, "ds_xor_src2_b32">;
+defm DS_WRITE_SRC2_B32 : DS_1A <0x8c, "ds_write_src2_b32">;
+
+defm DS_MIN_SRC2_F32 : DS_1A <0x92, "ds_min_src2_f32">;
+defm DS_MAX_SRC2_F32 : DS_1A <0x93, "ds_max_src2_f32">;
+
+defm DS_ADD_SRC2_U64 : DS_1A <0xc0, "ds_add_src2_u64">;
+defm DS_SUB_SRC2_U64 : DS_1A <0xc1, "ds_sub_src2_u64">;
+defm DS_RSUB_SRC2_U64 : DS_1A <0xc2, "ds_rsub_src2_u64">;
+defm DS_INC_SRC2_U64 : DS_1A <0xc3, "ds_inc_src2_u64">;
+defm DS_DEC_SRC2_U64 : DS_1A <0xc4, "ds_dec_src2_u64">;
+defm DS_MIN_SRC2_I64 : DS_1A <0xc5, "ds_min_src2_i64">;
+defm DS_MAX_SRC2_I64 : DS_1A <0xc6, "ds_max_src2_i64">;
+defm DS_MIN_SRC2_U64 : DS_1A <0xc7, "ds_min_src2_u64">;
+defm DS_MAX_SRC2_U64 : DS_1A <0xc8, "ds_max_src2_u64">;
+defm DS_AND_SRC2_B64 : DS_1A <0xc9, "ds_and_src2_b64">;
+defm DS_OR_SRC2_B64 : DS_1A <0xca, "ds_or_src2_b64">;
+defm DS_XOR_SRC2_B64 : DS_1A <0xcb, "ds_xor_src2_b64">;
+defm DS_WRITE_SRC2_B64 : DS_1A <0xcc, "ds_write_src2_b64">;
+
+defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">;
+defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">;
+
 //let SubtargetPredicate = isCI in {
 // DS_CONDXCHG32_RTN_B64
 // DS_CONDXCHG32_RTN_B128
 //} // End isCI
 
-// TODO: _SRC2_* forms
-
-defm DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "ds_write_b32", VGPR_32>;
-defm DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "ds_write_b8", VGPR_32>;
-defm DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "ds_write_b16", VGPR_32>;
-defm DS_WRITE_B64 : DS_Store_Helper <0x00000004d, "ds_write_b64", VReg_64>;
-
-defm DS_READ_B32 : DS_Load_Helper <0x00000036, "ds_read_b32", VGPR_32>;
-defm DS_READ_I8 : DS_Load_Helper <0x00000039, "ds_read_i8", VGPR_32>;
-defm DS_READ_U8 : DS_Load_Helper <0x0000003a, "ds_read_u8", VGPR_32>;
-defm DS_READ_I16 : DS_Load_Helper <0x0000003b, "ds_read_i16", VGPR_32>;
-defm DS_READ_U16 : DS_Load_Helper <0x0000003c, "ds_read_u16", VGPR_32>;
-defm DS_READ_B64 : DS_Load_Helper <0x00000076, "ds_read_b64", VReg_64>;
-
-// 2 forms.
-defm DS_WRITE2_B32 : DS_Store2_Helper <0x0000000E, "ds_write2_b32", VGPR_32>;
-defm DS_WRITE2ST64_B32 : DS_Store2_Helper <0x0000000F, "ds_write2st64_b32", VGPR_32>;
-defm DS_WRITE2_B64 : DS_Store2_Helper <0x0000004E, "ds_write2_b64", VReg_64>;
-defm DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "ds_write2st64_b64", VReg_64>;
-
-defm DS_READ2_B32 : DS_Load2_Helper <0x00000037, "ds_read2_b32", VReg_64>;
-defm DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "ds_read2st64_b32", VReg_64>;
-defm DS_READ2_B64 : DS_Load2_Helper <0x00000077, "ds_read2_b64", VReg_128>;
-defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000078, "ds_read2st64_b64", VReg_128>;
-
 //===----------------------------------------------------------------------===//
 // MUBUF Instructions
 //===----------------------------------------------------------------------===//
 
-//def BUFFER_LOAD_FORMAT_X : MUBUF_ <mubuf<0x00>, "buffer_load_format_x", []>;
-//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <mubuf<0x01>, "buffer_load_format_xy", []>;
-//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <mubuf<0x02>, "buffer_load_format_xyz", []>;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <mubuf<0x03>, "buffer_load_format_xyzw", VReg_128>;
-//def BUFFER_STORE_FORMAT_X : MUBUF_ <mubuf<0x04>, "buffer_store_format_x", []>;
-//def BUFFER_STORE_FORMAT_XY : MUBUF_ <mubuf<0x05>, "buffer_store_format_xy", []>;
-//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <mubuf<0x06>, "buffer_store_format_xyz", []>;
-//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <mubuf<0x07>, "buffer_store_format_xyzw", []>;
+defm BUFFER_LOAD_FORMAT_X : MUBUF_Load_Helper <
+  mubuf<0x00>, "buffer_load_format_x", VGPR_32
+>;
+defm BUFFER_LOAD_FORMAT_XY : MUBUF_Load_Helper <
+  mubuf<0x01>, "buffer_load_format_xy", VReg_64
+>;
+defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Load_Helper <
+  mubuf<0x02>, "buffer_load_format_xyz", VReg_96
+>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <
+  mubuf<0x03>, "buffer_load_format_xyzw", VReg_128
+>;
+defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper <
+  mubuf<0x04>, "buffer_store_format_x", VGPR_32
+>;
+defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper <
+  mubuf<0x05>, "buffer_store_format_xy", VReg_64
+>;
+defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper <
+  mubuf<0x06>, "buffer_store_format_xyz", VReg_96
+>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper <
+  mubuf<0x07>, "buffer_store_format_xyzw", VReg_128
+>;
 defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
   mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
 >;
@@ -1182,9 +1231,11 @@ def FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
 // VOP1 Instructions
 //===----------------------------------------------------------------------===//
 
-//def V_NOP : VOP1_ <0x00000000, "v_nop", []>;
+let vdst = 0, src0 = 0 in {
+defm V_NOP : VOP1_m <vop1<0x0>, (outs), (ins), "v_nop", [], "v_nop">;
+}
 
-let isMoveImm = 1 in {
+let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>;
 } // End isMoveImm = 1
 
@@ -1222,16 +1273,17 @@ defm V_CVT_U32_F32 : VOP1Inst <vop1<0x7>, "v_cvt_u32_f32",
 defm V_CVT_I32_F32 : VOP1Inst <vop1<0x8>, "v_cvt_i32_f32",
   VOP_I32_F32, fp_to_sint
 >;
-defm V_MOV_FED_B32 : VOP1Inst <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
 defm V_CVT_F16_F32 : VOP1Inst <vop1<0xa>, "v_cvt_f16_f32",
   VOP_I32_F32, fp_to_f16
 >;
 defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16",
   VOP_F32_I32, f16_to_fp
 >;
-//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "v_cvt_rpi_i32_f32", []>;
-//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "v_cvt_flr_i32_f32", []>;
-//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "v_cvt_off_f32_i4", []>;
+defm V_CVT_RPI_I32_F32 : VOP1Inst <vop1<0xc>, "v_cvt_rpi_i32_f32",
+  VOP_I32_F32, cvt_rpi_i32_f32>;
+defm V_CVT_FLR_I32_F32 : VOP1Inst <vop1<0xd>, "v_cvt_flr_i32_f32",
+  VOP_I32_F32, cvt_flr_i32_f32>;
+defm V_CVT_OFF_F32_I4 : VOP1Inst  <vop1<0x0e>, "v_cvt_off_f32_i4", VOP_F32_I32>;
 defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64",
   VOP_F32_F64, fround
 >;
@@ -1329,16 +1381,24 @@ defm V_BFREV_B32 : VOP1Inst <vop1<0x38, 0x2c>, "v_bfrev_b32", VOP_I32_I32>;
 defm V_FFBH_U32 : VOP1Inst <vop1<0x39, 0x2d>, "v_ffbh_u32", VOP_I32_I32>;
 defm V_FFBL_B32 : VOP1Inst <vop1<0x3a, 0x2e>, "v_ffbl_b32", VOP_I32_I32>;
 defm V_FFBH_I32 : VOP1Inst <vop1<0x3b, 0x2f>, "v_ffbh_i32", VOP_I32_I32>;
-//defm V_FREXP_EXP_I32_F64 : VOPInst <0x0000003c, "v_frexp_exp_i32_f64", VOP_I32_F32>;
+defm V_FREXP_EXP_I32_F64 : VOP1Inst <vop1<0x3c,0x30>, "v_frexp_exp_i32_f64",
+  VOP_I32_F64
+>;
 defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64",
   VOP_F64_F64
 >;
 defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64", VOP_F64_F64>;
-//defm V_FREXP_EXP_I32_F32 : VOPInst <0x0000003f, "v_frexp_exp_i32_f32", VOP_I32_F32>;
+defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32",
+  VOP_I32_F32
+>;
 defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
   VOP_F32_F32
 >;
-//def V_CLREXCP : VOP1_ <0x00000041, "v_clrexcp", []>;
+let vdst = 0, src0 = 0 in {
+defm V_CLREXCP : VOP1_m <vop1<0x41,0x35>, (outs), (ins), "v_clrexcp", [],
+  "v_clrexcp"
+>;
+}
 defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_I32_I32>;
 defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_I32>;
 defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_I32_I32>;
@@ -1348,6 +1408,7 @@ let SubtargetPredicate = isSICI in {
 
 let SchedRW = [WriteQuarterRate32] in {
 
+defm V_MOV_FED_B32 : VOP1InstSI <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
 defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32", VOP_F32_F32>;
 defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>;
 defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>;
@@ -1375,40 +1436,68 @@ defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64",
 // VINTRP Instructions
 //===----------------------------------------------------------------------===//
 
+let Uses = [M0] in {
+
 // FIXME: Specify SchedRW for VINTRP insturctions.
-defm V_INTERP_P1_F32 : VINTRP_m <
-  0x00000000, "v_interp_p1_f32",
+
+multiclass V_INTERP_P1_F32_m : VINTRP_m <
+  0x00000000,
   (outs VGPR_32:$dst),
-  (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
-  "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [$m0]",
-  "$m0">;
+  (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr),
+  "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [m0]",
+  [(set f32:$dst, (AMDGPUinterp_p1 i32:$i, (i32 imm:$attr_chan),
+                                           (i32 imm:$attr)))]
+>;
+
+let OtherPredicates = [has32BankLDS] in {
+
+defm V_INTERP_P1_F32 : V_INTERP_P1_F32_m;
+
+} // End OtherPredicates = [has32BankLDS]
+
+let OtherPredicates = [has16BankLDS], Constraints = "@earlyclobber $dst" in {
+
+defm V_INTERP_P1_F32_16bank : V_INTERP_P1_F32_m;
+
+} // End OtherPredicates = [has32BankLDS], Constraints = "@earlyclobber $dst"
+
+let DisableEncoding = "$src0", Constraints = "$src0 = $dst" in {
 
 defm V_INTERP_P2_F32 : VINTRP_m <
-  0x00000001, "v_interp_p2_f32",
+  0x00000001,
   (outs VGPR_32:$dst),
-  (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
-  "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
-  "$src0,$m0",
-  "$src0 = $dst">;
+  (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr),
+  "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [m0]",
+  [(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, i32:$j, (i32 imm:$attr_chan),
+                                                     (i32 imm:$attr)))]>;
+
+} // End DisableEncoding = "$src0", Constraints = "$src0 = $dst"
 
 defm V_INTERP_MOV_F32 : VINTRP_m <
-  0x00000002, "v_interp_mov_f32",
+  0x00000002,
   (outs VGPR_32:$dst),
-  (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
-  "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [$m0]",
-  "$m0">;
+  (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr),
+  "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [m0]",
+  [(set f32:$dst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan),
+                                    (i32 imm:$attr)))]>;
+
+} // End Uses = [M0]
 
 //===----------------------------------------------------------------------===//
 // VOP2 Instructions
 //===----------------------------------------------------------------------===//
 
-defm V_CNDMASK_B32_e64 : VOP3_m_nosrcmod <vop3<0x100>, (outs VGPR_32:$dst),
-  (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2),
-  "v_cndmask_b32_e64 $dst, $src0, $src1, $src2",
-  [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))],
-  "v_cndmask_b32_e64", 3
->;
+multiclass V_CNDMASK <vop2 op, string name> {
+  defm _e32 : VOP2_m <
+      op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [],
+      name, name>;
+
+  defm _e64  : VOP3_m <
+      op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64,
+      name#!cast<string>(VOP_CNDMASK.Asm64), [], name, 3>;
+}
 
+defm V_CNDMASK_B32 : V_CNDMASK<vop2<0x0>, "v_cndmask_b32">;
 
 let isCommutable = 1 in {
 defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32",
@@ -1434,11 +1523,18 @@ defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32",
 defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9, 0x6>, "v_mul_i32_i24",
   VOP_I32_I32_I32, AMDGPUmul_i24
 >;
-//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "v_mul_hi_i32_i24", []>;
+
+defm V_MUL_HI_I32_I24 : VOP2Inst <vop2<0xa,0x7>, "v_mul_hi_i32_i24",
+  VOP_I32_I32_I32
+>;
+
 defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb, 0x8>, "v_mul_u32_u24",
   VOP_I32_I32_I32, AMDGPUmul_u24
 >;
-//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "v_mul_hi_u32_u24", []>;
+
+defm V_MUL_HI_U32_U24 : VOP2Inst <vop2<0xc,0x9>, "v_mul_hi_u32_u24",
+ VOP_I32_I32_I32
+>;
 
 defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
   fminnum>;
@@ -1545,8 +1641,8 @@ defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
 >;
 } // End isCommutable = 1
 
-defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32,
-  AMDGPUbfm
+defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",
+  VOP_I32_I32_I32
 >;
 defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
   VOP_I32_I32_I32
@@ -1561,7 +1657,6 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
   VOP_F32_F32_I32, AMDGPUldexp
 >;
 
-
 defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
   VOP_I32_F32_I32>; // TODO: set "Uses = dst"
 
@@ -1615,14 +1710,12 @@ defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147, 0x1c7>, "v_cubema_f32",
   VOP_F32_F32_F32_F32
 >;
 
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
 defm V_BFE_U32 : VOP3Inst <vop3<0x148, 0x1c8>, "v_bfe_u32",
   VOP_I32_I32_I32_I32, AMDGPUbfe_u32
 >;
 defm V_BFE_I32 : VOP3Inst <vop3<0x149, 0x1c9>, "v_bfe_i32",
   VOP_I32_I32_I32_I32, AMDGPUbfe_i32
 >;
-}
 
 defm V_BFI_B32 : VOP3Inst <vop3<0x14a, 0x1ca>, "v_bfi_b32",
   VOP_I32_I32_I32_I32, AMDGPUbfi
@@ -1810,6 +1903,11 @@ defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64",
 //===----------------------------------------------------------------------===//
 let isCodeGenOnly = 1, isPseudo = 1 in {
 
+// For use in patterns
+def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst),
+  (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []
+>;
+
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
 // 64-bit vector move instruction.  This is mainly used by the SIFoldOperands
 // pass to enable folding of inline immediates.
@@ -1950,17 +2048,6 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
 
 } // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
 
-let usesCustomInserter = 1 in {
-
-def V_SUB_F64 : InstSI <
-  (outs VReg_64:$dst),
-  (ins VReg_64:$src0, VReg_64:$src1),
-  "v_sub_f64 $dst, $src0, $src1",
-  [(set f64:$dst, (fsub f64:$src0, f64:$src1))]
->;
-
-} // end usesCustomInserter
-
 multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
 
   let UseNamedOperandTable = 1 in {
@@ -1979,14 +2066,17 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
   } // End UseNamedOperandTable = 1
 }
 
-defm SI_SPILL_S32  : SI_SPILL_SGPR <SReg_32>;
+// It's unclear whether you can use M0 as the output of v_readlane_b32
+// instructions, so use SGPR_32 register class for spills to prevent
+// this from happening.
+defm SI_SPILL_S32  : SI_SPILL_SGPR <SGPR_32>;
 defm SI_SPILL_S64  : SI_SPILL_SGPR <SReg_64>;
 defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
 defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
 defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
 
 multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
-  let UseNamedOperandTable = 1 in {
+  let UseNamedOperandTable = 1, VGPRSpill = 1 in {
     def _SAVE : InstSI <
       (outs),
       (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
@@ -1999,7 +2089,7 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
       (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
       "", []
     >;
-  } // End UseNamedOperandTable = 1
+  } // End UseNamedOperandTable = 1, VGPRSpill = 1
 }
 
 defm SI_SPILL_V32  : SI_SPILL_VGPR <VGPR_32>;
@@ -2040,7 +2130,7 @@ def : Pat <
 /* int_SI_vs_load_input */
 def : Pat<
   (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
-  (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
+  (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)
 >;
 
 /* int_SI_export */
@@ -2187,6 +2277,11 @@ def : Pat <
   (V_BCNT_U32_B32_e64 $popcnt, $val)
 >;
 
+def : Pat <
+  (i32 (select i1:$src0, i32:$src1, i32:$src2)),
+  (V_CNDMASK_B32_e64 $src2, $src1, $src0)
+>;
+
 /********** ======================= **********/
 /********** Image sampling patterns **********/
 /********** ======================= **********/
@@ -2659,27 +2754,6 @@ def : Pat <
   (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm<f64>:$imm)))
 >;
 
-/********** ===================== **********/
-/********** Interpolation Paterns **********/
-/********** ===================== **********/
-
-// The value of $params is constant through out the entire kernel.
-// We need to use S_MOV_B32 $params, because CSE ignores copies, so
-// without it we end up with a lot of redundant moves.
-
-def : Pat <
-  (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
-  (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
->;
-
-def : Pat <
-  (int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij),
-  (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
-                                    imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)),
-                   (EXTRACT_SUBREG $ij, sub1),
-                   imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
->;
-
 /********** ================== **********/
 /********** Intrinsic Patterns **********/
 /********** ================== **********/
@@ -2729,7 +2803,7 @@ def : Ext32Pat <anyext>;
 // Offset in an 32Bit VGPR
 def : Pat <
   (SIload_constant v4i32:$sbase, i32:$voff),
-  (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
+  (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)
 >;
 
 // The multiplication scales from [0,1] to the unsigned integer range
@@ -2763,9 +2837,6 @@ def : Pat <
   (V_MUL_HI_I32 $src0, $src1)
 >;
 
-def : Vop3ModPat<V_MAD_F32, VOP_F32_F32_F32_F32, AMDGPUmad>;
-
-
 defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
 def : ROTRPattern <V_ALIGNBIT_B32>;
 
@@ -2775,52 +2846,52 @@ def : ROTRPattern <V_ALIGNBIT_B32>;
 
 class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
   (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
-  (inst (i1 0), $ptr, (as_i16imm $offset), (S_MOV_B32 -1))
+  (inst $ptr, (as_i16imm $offset), (i1 0))
 >;
 
-def : DSReadPat <DS_READ_I8,  i32, sextloadi8_local>;
-def : DSReadPat <DS_READ_U8,  i32, az_extloadi8_local>;
-def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
-def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
-def : DSReadPat <DS_READ_B32, i32, local_load>;
+def : DSReadPat <DS_READ_I8,  i32, si_sextload_local_i8>;
+def : DSReadPat <DS_READ_U8,  i32, si_az_extload_local_i8>;
+def : DSReadPat <DS_READ_I16, i32, si_sextload_local_i16>;
+def : DSReadPat <DS_READ_U16, i32, si_az_extload_local_i16>;
+def : DSReadPat <DS_READ_B32, i32, si_load_local>;
 
 let AddedComplexity = 100 in {
 
-def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>;
+def : DSReadPat <DS_READ_B64, v2i32, si_load_local_align8>;
 
 } // End AddedComplexity = 100
 
 def : Pat <
-  (v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+  (v2i32 (si_load_local (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
                                                     i8:$offset1))),
-  (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1, (S_MOV_B32 -1))
+  (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0))
 >;
 
 class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
   (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
-  (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
+  (inst $ptr, $value, (as_i16imm $offset), (i1 0))
 >;
 
-def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
-def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
-def : DSWritePat <DS_WRITE_B32, i32, local_store>;
+def : DSWritePat <DS_WRITE_B8, i32, si_truncstore_local_i8>;
+def : DSWritePat <DS_WRITE_B16, i32, si_truncstore_local_i16>;
+def : DSWritePat <DS_WRITE_B32, i32, si_store_local>;
 
 let AddedComplexity = 100 in {
 
-def : DSWritePat <DS_WRITE_B64, v2i32, local_store_aligned8bytes>;
+def : DSWritePat <DS_WRITE_B64, v2i32, si_store_local_align8>;
 } // End AddedComplexity = 100
 
 def : Pat <
-  (local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
-                                                            i8:$offset1)),
-  (DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0),
-                        (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
-                        (S_MOV_B32 -1))
+  (si_store_local v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+                                                               i8:$offset1)),
+  (DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0),
+                       (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
+                       (i1 0))
 >;
 
 class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
   (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
-  (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
+  (inst $ptr, $value, (as_i16imm $offset), (i1 0))
 >;
 
 // Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
@@ -2836,53 +2907,53 @@ class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
 class DSAtomicIncRetPat<DS inst, ValueType vt,
                         Instruction LoadImm, PatFrag frag> : Pat <
   (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
-  (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (S_MOV_B32 -1))
+  (inst $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (i1 0))
 >;
 
 
 class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
   (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
-  (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset), (S_MOV_B32 -1))
+  (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0))
 >;
 
 
 // 32-bit atomics.
 def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
-                        S_MOV_B32, atomic_load_add_local>;
+                        S_MOV_B32, si_atomic_load_add_local>;
 def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
-                        S_MOV_B32, atomic_load_sub_local>;
+                        S_MOV_B32, si_atomic_load_sub_local>;
 
-def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
-def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
-def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
-def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
-def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
-def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, si_atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, si_atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, si_atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B32, i32, si_atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B32, i32, si_atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, si_atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, si_atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, si_atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, si_atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, si_atomic_load_umax_local>;
 
-def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;
 
 // 64-bit atomics.
 def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
-                        S_MOV_B64, atomic_load_add_local>;
+                        S_MOV_B64, si_atomic_load_add_local>;
 def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
-                        S_MOV_B64, atomic_load_sub_local>;
+                        S_MOV_B64, si_atomic_load_sub_local>;
 
-def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
-def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
-def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
-def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
-def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
-def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, si_atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B64, i64, si_atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B64, i64, si_atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, si_atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, si_atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, si_atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, si_atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, si_atomic_load_umax_local>;
 
-def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, si_atomic_cmp_swap_64_local>;
 
 
 //===----------------------------------------------------------------------===//
@@ -2892,8 +2963,9 @@ def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
 multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
                               PatFrag constant_ld> {
   def : Pat <
-     (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))),
-     (Instr_ADDR64 $srsrc, $vaddr, $offset)
+     (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+                                   i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
+     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
   >;
 }
 
@@ -2910,7 +2982,7 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>;
 class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
   (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
                         i32:$soffset, u16imm:$offset))),
-  (Instr $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
+  (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
 >;
 
 def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
@@ -2929,7 +3001,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
     (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
                                   imm:$offset, 0, 0, imm:$glc, imm:$slc,
                                   imm:$tfe)),
-    (offset $rsrc, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+    (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
             (as_i1imm $slc), (as_i1imm $tfe))
   >;
 
@@ -2937,7 +3009,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
     (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
                                   imm:$offset, 1, 0, imm:$glc, imm:$slc,
                                   imm:$tfe)),
-    (offen $rsrc, $vaddr, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
+    (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
            (as_i1imm $tfe))
   >;
 
@@ -2945,15 +3017,15 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
     (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
                                   imm:$offset, 0, 1, imm:$glc, imm:$slc,
                                   imm:$tfe)),
-    (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+    (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
            (as_i1imm $slc), (as_i1imm $tfe))
   >;
 
   def : Pat <
     (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
-                                  imm, 1, 1, imm:$glc, imm:$slc,
+                                  imm:$offset, 1, 1, imm:$glc, imm:$slc,
                                   imm:$tfe)),
-    (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
+    (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
             (as_i1imm $tfe))
   >;
 }
@@ -2968,7 +3040,7 @@ defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_
 class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
   (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
                                u16imm:$offset)),
-  (Instr $value, $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
+  (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
 >;
 
 def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
@@ -3098,26 +3170,26 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST I
 
   // 1. Extract with offset
   def : Pat<
-    (vector_extract vt:$vec, (add i32:$idx, imm:$off)),
-    (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
+    (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))),
+    (SI_INDIRECT_SRC $vec, $idx, imm:$off)
   >;
 
   // 2. Extract without offset
   def : Pat<
-    (vector_extract vt:$vec, i32:$idx),
-    (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
+    (eltvt (vector_extract vt:$vec, i32:$idx)),
+    (SI_INDIRECT_SRC $vec, $idx, 0)
   >;
 
   // 3. Insert with offset
   def : Pat<
     (vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
-    (IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
+    (IndDst $vec, $idx, imm:$off, $val)
   >;
 
   // 4. Insert without offset
   def : Pat<
     (vector_insert vt:$vec, eltvt:$val, i32:$idx),
-    (IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
+    (IndDst $vec, $idx, 0, $val)
   >;
 }
 
@@ -3263,10 +3335,71 @@ def : Pat <
   (V_CNDMASK_B32_e64 $src0, $src1, $src2)
 >;
 
+multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
+  def : Pat <
+    (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
+    (BFM $a, $b)
+  >;
+
+  def : Pat <
+    (vt (add (vt (shl 1, vt:$a)), -1)),
+    (BFM $a, (MOV 0))
+  >;
+}
+
+defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
+// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
+
+def : BFEPattern <V_BFE_U32, S_MOV_B32>;
+
 //===----------------------------------------------------------------------===//
 // Fract Patterns
 //===----------------------------------------------------------------------===//
 
+let Predicates = [isSI] in {
+
+// V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is
+// used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient
+// way to implement it is using V_FRACT_F64.
+// The workaround for the V_FRACT bug is:
+//    fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
+
+// Convert (x + (-floor(x)) to fract(x)
+def : Pat <
+  (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+             (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+  (V_CNDMASK_B64_PSEUDO
+      $x,
+      (V_MIN_F64
+          SRCMODS.NONE,
+          (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
+          SRCMODS.NONE,
+          (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
+          DSTCLAMP.NONE, DSTOMOD.NONE),
+      (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/))
+>;
+
+// Convert floor(x) to (x - fract(x))
+def : Pat <
+  (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
+  (V_ADD_F64
+      $mods,
+      $x,
+      SRCMODS.NEG,
+      (V_CNDMASK_B64_PSEUDO
+         $x,
+         (V_MIN_F64
+             SRCMODS.NONE,
+             (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
+             SRCMODS.NONE,
+             (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
+             DSTCLAMP.NONE, DSTOMOD.NONE),
+         (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)),
+      DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isSI]
+
 let Predicates = [isCI] in {
 
 // Convert (x - floor(x)) to fract(x)
@@ -3291,4 +3424,12 @@ def : Pat <
 
 def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
 
+//============================================================================//
+// Assembler aliases
+//============================================================================//
+
+def : MnemonicAlias<"v_add_u32", "v_add_i32">;
+def : MnemonicAlias<"v_sub_u32", "v_sub_i32">;
+def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">;
+
 } // End isGCN predicate
diff --git a/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp
index 0cb6746..9b1d256 100644
--- a/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp
@@ -45,6 +45,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
@@ -55,7 +56,6 @@ namespace {
 
 class SILoadStoreOptimizer : public MachineFunctionPass {
 private:
-  const TargetMachine *TM;
   const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
   MachineRegisterInfo *MRI;
@@ -86,20 +86,11 @@ private:
 public:
   static char ID;
 
-  SILoadStoreOptimizer() :
-    MachineFunctionPass(ID),
-    TM(nullptr),
-    TII(nullptr),
-    TRI(nullptr),
-    MRI(nullptr),
-    LIS(nullptr) {
+  SILoadStoreOptimizer()
+      : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), MRI(nullptr),
+        LIS(nullptr) {}
 
-  }
-
-  SILoadStoreOptimizer(const TargetMachine &TM_) :
-    MachineFunctionPass(ID),
-    TM(&TM_),
-    TII(static_cast<const SIInstrInfo*>(TM->getSubtargetImpl()->getInstrInfo())) {
+  SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) {
     initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
   }
 
@@ -222,7 +213,6 @@ MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
   // Be careful, since the addresses could be subregisters themselves in weird
   // cases, like vectors of pointers.
   const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
-  const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
 
   unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
   unsigned DestReg1
@@ -259,41 +249,28 @@ MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
   DebugLoc DL = I->getDebugLoc();
   MachineInstrBuilder Read2
     = BuildMI(*MBB, I, DL, Read2Desc, DestReg)
-    .addImm(0) // gds
     .addOperand(*AddrReg) // addr
     .addImm(NewOffset0) // offset0
     .addImm(NewOffset1) // offset1
-    .addOperand(*M0Reg) // M0
+    .addImm(0) // gds
     .addMemOperand(*I->memoperands_begin())
     .addMemOperand(*Paired->memoperands_begin());
 
-  LIS->InsertMachineInstrInMaps(Read2);
-
   unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
   unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
   updateRegDefsUses(DestReg0, DestReg, SubRegIdx0);
   updateRegDefsUses(DestReg1, DestReg, SubRegIdx1);
 
   LIS->RemoveMachineInstrFromMaps(I);
-  LIS->RemoveMachineInstrFromMaps(Paired);
+  // Replacing Paired in the maps with Read2 allows us to avoid updating the
+  // live range for the m0 register.
+  LIS->ReplaceMachineInstrInMaps(Paired, Read2);
   I->eraseFromParent();
   Paired->eraseFromParent();
 
   LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
   LIS->shrinkToUses(&AddrRegLI);
 
-  LiveInterval &M0RegLI = LIS->getInterval(M0Reg->getReg());
-  LIS->shrinkToUses(&M0RegLI);
-
-  // Currently m0 is treated as a register class with one member instead of an
-  // implicit physical register. We are using the virtual register for the first
-  // one, but we still need to update the live range of the now unused second m0
-  // virtual register to avoid verifier errors.
-  const MachineOperand *PairedM0Reg
-    = TII->getNamedOperand(*Paired, AMDGPU::OpName::m0);
-  LiveInterval &PairedM0RegLI = LIS->getInterval(PairedM0Reg->getReg());
-  LIS->shrinkToUses(&PairedM0RegLI);
-
   LIS->getInterval(DestReg); // Create new LI
 
   DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
@@ -309,7 +286,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
   // Be sure to use .addOperand(), and not .addReg() with these. We want to be
   // sure we preserve the subregister index and any register flags set on them.
   const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
-  const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
   const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
   const MachineOperand *Data1
     = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
@@ -340,29 +316,40 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
   const MCInstrDesc &Write2Desc = TII->get(Opc);
   DebugLoc DL = I->getDebugLoc();
 
+  // repairLiveintervalsInRange() doesn't handle physical register, so we have
+  // to update the M0 range manually.
+  SlotIndex PairedIndex = LIS->getInstructionIndex(Paired);
+  LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI));
+  LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex);
+  bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot();
+
   MachineInstrBuilder Write2
     = BuildMI(*MBB, I, DL, Write2Desc)
-    .addImm(0) // gds
     .addOperand(*Addr) // addr
     .addOperand(*Data0) // data0
     .addOperand(*Data1) // data1
     .addImm(NewOffset0) // offset0
     .addImm(NewOffset1) // offset1
-    .addOperand(*M0Reg)  // m0
+    .addImm(0) // gds
     .addMemOperand(*I->memoperands_begin())
     .addMemOperand(*Paired->memoperands_begin());
 
   // XXX - How do we express subregisters here?
-  unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg(),
-                          M0Reg->getReg()};
+  unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() };
 
   LIS->RemoveMachineInstrFromMaps(I);
   LIS->RemoveMachineInstrFromMaps(Paired);
   I->eraseFromParent();
   Paired->eraseFromParent();
 
+  // This doesn't handle physical registers like M0
   LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs);
 
+  if (UpdateM0Range) {
+    SlotIndex Write2Index = LIS->getInstructionIndex(Write2);
+    M0Segment->end = Write2Index.getRegSlot();
+  }
+
   DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
   return Write2.getInstr();
 }
@@ -414,9 +401,9 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
 }
 
 bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
-  const TargetSubtargetInfo *STM = MF.getTarget().getSubtargetImpl();
-  TRI = static_cast<const SIRegisterInfo*>(STM->getRegisterInfo());
-  TII = static_cast<const SIInstrInfo*>(STM->getInstrInfo());
+  const TargetSubtargetInfo &STM = MF.getSubtarget();
+  TRI = static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
+  TII = static_cast<const SIInstrInfo *>(STM.getInstrInfo());
   MRI = &MF.getRegInfo();
 
   LIS = &getAnalysis<LiveIntervals>();
diff --git a/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
index 198dd56..587ea63 100644
--- a/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -39,8 +39,8 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
                                                        unsigned FrameIndex,
                                                        unsigned SubIdx) {
   const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
-  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo*>(
-      MF->getTarget().getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
+  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
+      MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
   MachineRegisterInfo &MRI = MF->getRegInfo();
   int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
   Offset += SubIdx * 4;
@@ -70,7 +70,7 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
 
 unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
                                               const MachineFunction &MF) const {
-  const AMDGPUSubtarget &ST = MF.getTarget().getSubtarget<AMDGPUSubtarget>();
+  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
   // FIXME: We should get this information from kernel attributes if it
   // is available.
   return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
diff --git a/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp b/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp
index 0a57a5b..0a7f684 100644
--- a/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp
+++ b/contrib/llvm/lib/Target/R600/SIPrepareScratchRegs.cpp
@@ -128,80 +128,66 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr &MI = *I;
       RS.forward(I);
       DebugLoc DL = MI.getDebugLoc();
-      switch(MI.getOpcode()) {
-        default: break;
-        case AMDGPU::SI_SPILL_V512_SAVE:
-        case AMDGPU::SI_SPILL_V256_SAVE:
-        case AMDGPU::SI_SPILL_V128_SAVE:
-        case AMDGPU::SI_SPILL_V96_SAVE:
-        case AMDGPU::SI_SPILL_V64_SAVE:
-        case AMDGPU::SI_SPILL_V32_SAVE:
-        case AMDGPU::SI_SPILL_V32_RESTORE:
-        case AMDGPU::SI_SPILL_V64_RESTORE:
-        case AMDGPU::SI_SPILL_V128_RESTORE:
-        case AMDGPU::SI_SPILL_V256_RESTORE:
-        case AMDGPU::SI_SPILL_V512_RESTORE:
-
-          // Scratch resource
-          unsigned ScratchRsrcReg =
-              RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
-
-          uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
-                          0xffffffff; // Size
-
-          unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
-          unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
-          unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
-          unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
-
-          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
-                  .addExternalSymbol("SCRATCH_RSRC_DWORD0")
-                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
-          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
-                  .addExternalSymbol("SCRATCH_RSRC_DWORD1")
-                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
-          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
-                  .addImm(Rsrc & 0xffffffff)
-                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
-          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
-                  .addImm(Rsrc >> 32)
-                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
-          // Scratch Offset
-          if (ScratchOffsetReg == AMDGPU::NoRegister) {
-            ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
-            BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
-                    ScratchOffsetReg)
-                    .addFrameIndex(ScratchOffsetFI)
-                    .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
-                    .addReg(AMDGPU::SGPR0, RegState::Undef);
-          } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
-            MBB.addLiveIn(ScratchOffsetReg);
-          }
-
-          if (ScratchRsrcReg == AMDGPU::NoRegister ||
-              ScratchOffsetReg == AMDGPU::NoRegister) {
-            LLVMContext &Ctx = MF.getFunction()->getContext();
-            Ctx.emitError("ran out of SGPRs for spilling VGPRs");
-            ScratchRsrcReg = AMDGPU::SGPR0;
-            ScratchOffsetReg = AMDGPU::SGPR0;
-          }
-          MI.getOperand(2).setReg(ScratchRsrcReg);
-          MI.getOperand(2).setIsKill(true);
-          MI.getOperand(2).setIsUndef(false);
-          MI.getOperand(3).setReg(ScratchOffsetReg);
-          MI.getOperand(3).setIsUndef(false);
-          MI.getOperand(3).setIsKill(false);
-          MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
-          MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
-          MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
-          MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
-
-          break;
+      if (!TII->isVGPRSpill(MI.getOpcode()))
+        continue;
+
+      // Scratch resource
+      unsigned ScratchRsrcReg =
+          RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
+
+      uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
+                      0xffffffff; // Size
+
+      unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+      unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+      unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
+      unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
+
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
+              .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
+              .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
+              .addImm(Rsrc & 0xffffffff)
+              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
+              .addImm(Rsrc >> 32)
+              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+      // Scratch Offset
+      if (ScratchOffsetReg == AMDGPU::NoRegister) {
+        ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
+        BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
+                ScratchOffsetReg)
+                .addFrameIndex(ScratchOffsetFI)
+                .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
+                .addReg(AMDGPU::SGPR0, RegState::Undef);
+      } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
+        MBB.addLiveIn(ScratchOffsetReg);
       }
+
+      if (ScratchRsrcReg == AMDGPU::NoRegister ||
+          ScratchOffsetReg == AMDGPU::NoRegister) {
+        LLVMContext &Ctx = MF.getFunction()->getContext();
+        Ctx.emitError("ran out of SGPRs for spilling VGPRs");
+        ScratchRsrcReg = AMDGPU::SGPR0;
+        ScratchOffsetReg = AMDGPU::SGPR0;
+      }
+      MI.getOperand(2).setReg(ScratchRsrcReg);
+      MI.getOperand(2).setIsKill(true);
+      MI.getOperand(2).setIsUndef(false);
+      MI.getOperand(3).setReg(ScratchOffsetReg);
+      MI.getOperand(3).setIsUndef(false);
+      MI.getOperand(3).setIsKill(false);
+      MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
+      MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
+      MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
+      MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
     }
   }
   return true;
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
index f502991..db2ff0b 100644
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp
@@ -24,9 +24,7 @@
 
 using namespace llvm;
 
-SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPURegisterInfo(st)
-  { }
+SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}
 
 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
@@ -48,7 +46,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
   // Tonga and Iceland can only allocate a fixed number of SGPRs due
   // to a hw bug.
-  if (ST.hasSGPRInitBug()) {
+  if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) {
     unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
     // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
     // Assume XNACK_MASK is unused.
@@ -66,12 +64,14 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
+unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
+                                                unsigned Idx) const {
 
+  const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>();
   // FIXME: We should adjust the max number of waves based on LDS size.
-  unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(),
-                                          ST.getMaxWavesPerCU());
-  unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
+  unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(),
+                                          STI.getMaxWavesPerCU());
+  unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
 
   for (regclass_iterator I = regclass_begin(), E = regclass_end();
        I != E; ++I) {
@@ -142,9 +142,10 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
                                            int64_t Offset,
                                            RegScavenger *RS) const {
 
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
   MachineBasicBlock *MBB = MI->getParent();
   const MachineFunction *MF = MI->getParent()->getParent();
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
   LLVMContext &Ctx = MF->getFunction()->getContext();
   DebugLoc DL = MI->getDebugLoc();
   bool IsLoad = TII->get(LoadStoreOp).mayLoad();
@@ -179,8 +180,8 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
     BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
             .addReg(SubReg, getDefRegState(IsLoad))
             .addReg(ScratchRsrcReg, getKillRegState(IsKill))
-            .addImm(Offset)
             .addReg(SOffset)
+            .addImm(Offset)
             .addImm(0) // glc
             .addImm(0) // slc
             .addImm(0) // tfe
@@ -195,7 +196,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
   MachineBasicBlock *MBB = MI->getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo *FrameInfo = MF->getFrameInfo();
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
   DebugLoc DL = MI->getDebugLoc();
 
   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
@@ -243,7 +245,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
         unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
                                            &AMDGPU::SGPR_32RegClass, i);
-        bool isM0 = SubReg == AMDGPU::M0;
         struct SIMachineFunctionInfo::SpilledReg Spill =
             MFI->getSpilledReg(MF, Index, i);
 
@@ -252,23 +253,16 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
            Ctx.emitError("Ran out of VGPRs for spilling SGPR");
         }
 
-        if (isM0)
-          SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
-
         BuildMI(*MBB, MI, DL,
                 TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
                 SubReg)
                 .addReg(Spill.VGPR)
                 .addImm(Spill.Lane)
                 .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
-        if (isM0) {
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
-                  .addReg(SubReg);
-        }
       }
 
       // TODO: only do this when it is needed
-      switch (ST.getGeneration()) {
+      switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
       case AMDGPUSubtarget::SOUTHERN_ISLANDS:
         // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
         TII->insertNOPs(MI, 3);
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.h b/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
index 1dfe530..bfdb67c 100644
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.h
@@ -24,11 +24,12 @@ namespace llvm {
 
 struct SIRegisterInfo : public AMDGPURegisterInfo {
 
-  SIRegisterInfo(const AMDGPUSubtarget &st);
+  SIRegisterInfo();
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
-  unsigned getRegPressureSetLimit(unsigned Idx) const override;
+  unsigned getRegPressureSetLimit(const MachineFunction &MF,
+                                  unsigned Idx) const override;
 
   bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
 
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.td b/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
index c63f305..2a9017f 100644
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
@@ -21,7 +21,7 @@ def VCC_LO : SIReg<"vcc_lo", 106>;
 def VCC_HI : SIReg<"vcc_hi", 107>;
 
 // VCC for 64-bit instructions
-def VCC : RegisterWithSubRegs<"VCC", [VCC_LO, VCC_HI]> {
+def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
   let HWEncoding = 106;
@@ -36,14 +36,14 @@ def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> {
   let HWEncoding = 126;
 }
 
-def SCC : SIReg<"SCC", 253>;
-def M0 : SIReg <"M0", 124>;
+def SCC : SIReg<"scc", 253>;
+def M0 : SIReg <"m0", 124>;
 
 def FLAT_SCR_LO : SIReg<"flat_scr_lo", 104>; // Offset in units of 256-bytes.
 def FLAT_SCR_HI : SIReg<"flat_scr_hi", 105>; // Size is the per-thread scratch size, in bytes.
 
 // Pair to indicate location of scratch space for flat accesses.
-def FLAT_SCR : RegisterWithSubRegs <"FLAT_SCR", [FLAT_SCR_LO, FLAT_SCR_HI]> {
+def FLAT_SCR : RegisterWithSubRegs <"flat_scr", [FLAT_SCR_LO, FLAT_SCR_HI]> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
   let HWEncoding = 104;
@@ -66,7 +66,7 @@ foreach Index = 0-255 in {
 //===----------------------------------------------------------------------===//
 
 // SGPR 32-bit registers
-def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
                             (add (sequence "SGPR%u", 0, 101))>;
 
 // SGPR 64-bit registers
@@ -113,7 +113,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
                                (add (decimate (shl SGPR_32, 15), 4))]>;
 
 // VGPR 32-bit registers
-def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
                             (add (sequence "VGPR%u", 0, 255))>;
 
 // VGPR 64-bit registers
@@ -169,6 +169,11 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
 //  Register classes used as source and destination
 //===----------------------------------------------------------------------===//
 
+class RegImmMatcher<string name> : AsmOperandClass {
+  let Name = name;
+  let RenderMethod = "addRegOrImmOperands";
+}
+
 // Special register classes for predicates and the M0 register
 def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
   let CopyCost = -1; // Theoretically it is possible to read from SCC,
@@ -177,11 +182,10 @@ def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
 
 def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
 def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
-def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
 
 // Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
-  (add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
+def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
+  (add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
 >;
 
 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 64, (add SGPR_64Regs)>;
@@ -227,15 +231,21 @@ class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
 //  SSrc_* Operands with an SGPR or a 32-bit immediate
 //===----------------------------------------------------------------------===//
 
-def SSrc_32 : RegImmOperand<SReg_32>;
+def SSrc_32 : RegImmOperand<SReg_32> {
+  let ParserMatchClass = RegImmMatcher<"SSrc32">;
+}
 
-def SSrc_64 : RegImmOperand<SReg_64>;
+def SSrc_64 : RegImmOperand<SReg_64> {
+  let ParserMatchClass = RegImmMatcher<"SSrc64">;
+}
 
 //===----------------------------------------------------------------------===//
 //  SCSrc_* Operands with an SGPR or a inline constant
 //===----------------------------------------------------------------------===//
 
-def SCSrc_32 : RegInlineOperand<SReg_32>;
+def SCSrc_32 : RegInlineOperand<SReg_32> {
+  let ParserMatchClass = RegImmMatcher<"SCSrc32">;
+}
 
 //===----------------------------------------------------------------------===//
 //  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
@@ -245,21 +255,30 @@ def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
 
 def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
 
-def VSrc_32 : RegImmOperand<VS_32>;
+def VSrc_32 : RegisterOperand<VS_32> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_IMM32";
+  let ParserMatchClass = RegImmMatcher<"VSrc32">;
+}
 
-def VSrc_64 : RegImmOperand<VS_64>;
+def VSrc_64 : RegisterOperand<VS_64> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_IMM32";
+  let ParserMatchClass = RegImmMatcher<"VSrc64">;
+}
 
 //===----------------------------------------------------------------------===//
 //  VCSrc_* Operands with an SGPR, VGPR or an inline constant
 //===----------------------------------------------------------------------===//
 
-def VCSrc_32 : RegInlineOperand<VS_32>;
-
-def VCSrc_64 : RegInlineOperand<VS_64>;
-
-//===----------------------------------------------------------------------===//
-// SGPR and VGPR register classes
-//===----------------------------------------------------------------------===//
+def VCSrc_32 : RegisterOperand<VS_32> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_INLINE_C";
+  let ParserMatchClass = RegImmMatcher<"VCSrc32">;
+}
 
-def VSrc_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128,
-                             (add VReg_128, SReg_128)>;
+def VCSrc_64 : RegisterOperand<VS_64> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_INLINE_C";
+  let ParserMatchClass = RegImmMatcher<"VCSrc64">;
+}
diff --git a/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp b/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp
index 6a34106..51e72cd 100644
--- a/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp
+++ b/contrib/llvm/lib/Target/R600/SIShrinkInstructions.cpp
@@ -18,9 +18,10 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 
 #define DEBUG_TYPE "si-shrink-instructions"
@@ -88,6 +89,11 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
 
   const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
   // Can't shrink instruction with three operands.
+  // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
+  // a special case for it.  It can only be shrunk if the third operand
+  // is vcc.  We should handle this the same way we handle vopc, by addding
+  // a register allocation hint pre-regalloc and then do the shrining
+  // post-regalloc.
   if (Src2)
     return false;
 
@@ -127,30 +133,31 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
          TII->isVOPC(MI.getOpcode()));
 
   const SIRegisterInfo &TRI = TII->getRegisterInfo();
-  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
+  MachineOperand &Src0 = MI.getOperand(Src0Idx);
 
   // Only one literal constant is allowed per instruction, so if src0 is a
   // literal constant then we can't do any folding.
-  if (Src0->isImm() && TII->isLiteralConstant(*Src0))
+  if (Src0.isImm() &&
+      TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
     return;
 
-
   // Literal constants and SGPRs can only be used in Src0, so if Src0 is an
   // SGPR, we cannot commute the instruction, so we can't fold any literal
   // constants.
-  if (Src0->isReg() && !isVGPR(Src0, TRI, MRI))
+  if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
     return;
 
   // Try to fold Src0
-  if (Src0->isReg()) {
-    unsigned Reg = Src0->getReg();
+  if (Src0.isReg()) {
+    unsigned Reg = Src0.getReg();
     MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
     if (Def && Def->isMoveImmediate()) {
       MachineOperand &MovSrc = Def->getOperand(1);
       bool ConstantFolded = false;
 
       if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
-        Src0->ChangeToImmediate(MovSrc.getImm());
+        Src0.ChangeToImmediate(MovSrc.getImm());
         ConstantFolded = true;
       }
       if (ConstantFolded) {
@@ -189,7 +196,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
         const MachineOperand &Src = MI.getOperand(1);
 
         if (Src.isImm()) {
-          if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src))
+          if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4))
             MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
         }
 
diff --git a/contrib/llvm/lib/Target/R600/SITypeRewriter.cpp b/contrib/llvm/lib/Target/R600/SITypeRewriter.cpp
index 9318dc1..591ce85 100644
--- a/contrib/llvm/lib/Target/R600/SITypeRewriter.cpp
+++ b/contrib/llvm/lib/Target/R600/SITypeRewriter.cpp
@@ -61,8 +61,7 @@ bool SITypeRewriter::doInitialization(Module &M) {
 }
 
 bool SITypeRewriter::runOnFunction(Function &F) {
-  AttributeSet Set = F.getAttributes();
-  Attribute A = Set.getAttribute(AttributeSet::FunctionIndex, "ShaderType");
+  Attribute A = F.getFnAttribute("ShaderType");
 
   unsigned ShaderType = ShaderType::COMPUTE;
   if (A.isStringAttribute()) {
@@ -105,7 +104,7 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
   SmallVector <Type*, 8> Types;
   bool NeedToReplace = false;
   Function *F = I.getCalledFunction();
-  std::string Name = F->getName().str();
+  std::string Name = F->getName();
   for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
     Value *Arg = I.getArgOperand(i);
     if (Arg->getType() == v16i8) {
diff --git a/contrib/llvm/lib/Target/R600/VIInstrFormats.td b/contrib/llvm/lib/Target/R600/VIInstrFormats.td
index c242235..d8738f9 100644
--- a/contrib/llvm/lib/Target/R600/VIInstrFormats.td
+++ b/contrib/llvm/lib/Target/R600/VIInstrFormats.td
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 class DSe_vi <bits<8> op> : Enc64 {
-
   bits<8> vdst;
   bits<1> gds;
   bits<8> addr;
@@ -33,7 +32,6 @@ class DSe_vi <bits<8> op> : Enc64 {
 }
 
 class MUBUFe_vi <bits<7> op> : Enc64 {
-
   bits<12> offset;
   bits<1> offen;
   bits<1> idxen;
@@ -62,7 +60,6 @@ class MUBUFe_vi <bits<7> op> : Enc64 {
 }
 
 class MTBUFe_vi <bits<4> op> : Enc64 {
-
   bits<12> offset;
   bits<1>  offen;
   bits<1>  idxen;
@@ -93,7 +90,6 @@ class MTBUFe_vi <bits<4> op> : Enc64 {
 }
 
 class SMEMe_vi <bits<8> op, bit imm> : Enc64 {
-
   bits<7>  sbase;
   bits<7>  sdata;
   bits<1>  glc;
@@ -109,8 +105,7 @@ class SMEMe_vi <bits<8> op, bit imm> : Enc64 {
 }
 
 class VOP3e_vi <bits<10> op> : Enc64 {
-
-  bits<8> dst;
+  bits<8> vdst;
   bits<2> src0_modifiers;
   bits<9> src0;
   bits<2> src1_modifiers;
@@ -120,7 +115,7 @@ class VOP3e_vi <bits<10> op> : Enc64 {
   bits<1> clamp;
   bits<2> omod;
 
-  let Inst{7-0}   = dst;
+  let Inst{7-0}   = vdst;
   let Inst{8}     = src0_modifiers{1};
   let Inst{9}     = src1_modifiers{1};
   let Inst{10}    = src2_modifiers{1};
diff --git a/contrib/llvm/lib/Target/R600/VIInstructions.td b/contrib/llvm/lib/Target/R600/VIInstructions.td
index 4a6e933..5bf86e6 100644
--- a/contrib/llvm/lib/Target/R600/VIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/VIInstructions.td
@@ -9,6 +9,87 @@
 // Instruction definitions for VI and newer.
 //===----------------------------------------------------------------------===//
 
+let SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI in {
+
+//===----------------------------------------------------------------------===//
+// VOP1 Instructions
+//===----------------------------------------------------------------------===//
+
+defm V_CVT_F16_U16 : VOP1Inst <vop1<0, 0x39>, "v_cvt_f16_u16", VOP_F16_I16>;
+defm V_CVT_F16_I16 : VOP1Inst <vop1<0, 0x3a>, "v_cvt_f16_i16", VOP_F16_I16>;
+defm V_CVT_U16_F16 : VOP1Inst <vop1<0, 0x3b>, "v_cvt_u16_f16", VOP_I16_F16>;
+defm V_CVT_I16_F16 : VOP1Inst <vop1<0, 0x3c>, "v_cvt_i16_f16", VOP_I16_F16>;
+defm V_RCP_F16 : VOP1Inst <vop1<0, 0x3d>, "v_rcp_f16", VOP_F16_F16>;
+defm V_SQRT_F16 : VOP1Inst <vop1<0, 0x3e>, "v_sqrt_f16", VOP_F16_F16>;
+defm V_RSQ_F16 : VOP1Inst <vop1<0, 0x3f>, "v_rsq_f16", VOP_F16_F16>;
+defm V_LOG_F16 : VOP1Inst <vop1<0, 0x40>, "v_log_f16", VOP_F16_F16>;
+defm V_EXP_F16 : VOP1Inst <vop1<0, 0x41>, "v_exp_f16", VOP_F16_F16>;
+defm V_FREXP_MANT_F16 : VOP1Inst <vop1<0, 0x42>, "v_frexp_mant_f16",
+  VOP_F16_F16
+>;
+defm V_FREXP_EXP_I16_F16 : VOP1Inst <vop1<0, 0x43>, "v_frexp_exp_i16_f16",
+  VOP_I16_F16
+>;
+defm V_FLOOR_F16 : VOP1Inst <vop1<0, 0x44>, "v_floor_f16", VOP_F16_F16>;
+defm V_CEIL_F16 : VOP1Inst <vop1<0, 0x45>, "v_ceil_f16", VOP_F16_F16>;
+defm V_TRUNC_F16 : VOP1Inst <vop1<0, 0x46>, "v_trunc_f16", VOP_F16_F16>;
+defm V_RNDNE_F16 : VOP1Inst <vop1<0, 0x47>, "v_rndne_f16", VOP_F16_F16>;
+defm V_FRACT_F16 : VOP1Inst <vop1<0, 0x48>, "v_fract_f16", VOP_F16_F16>;
+defm V_SIN_F16 : VOP1Inst <vop1<0, 0x49>, "v_sin_f16", VOP_F16_F16>;
+defm V_COS_F16 : VOP1Inst <vop1<0, 0x4a>, "v_cos_f16", VOP_F16_F16>;
+
+//===----------------------------------------------------------------------===//
+// VOP2 Instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1 in {
+
+defm V_ADD_F16 : VOP2Inst <vop2<0, 0x1f>, "v_add_f16", VOP_F16_F16_F16>;
+defm V_SUB_F16 : VOP2Inst <vop2<0, 0x20>, "v_sub_f16", VOP_F16_F16_F16>;
+defm V_SUBREV_F16 : VOP2Inst <vop2<0, 0x21>, "v_subrev_f16", VOP_F16_F16_F16,
+  null_frag, "v_sub_f16"
+>;
+defm V_MUL_F16 : VOP2Inst <vop2<0, 0x22>, "v_mul_f16", VOP_F16_F16_F16>;
+defm V_MAC_F16 : VOP2Inst <vop2<0, 0x23>, "v_mac_f16", VOP_F16_F16_F16>;
+} // End isCommutable = 1
+defm V_MADMK_F16 : VOP2MADK <vop2<0,0x24>, "v_madmk_f16">;
+let isCommutable = 1 in {
+defm V_MADAK_F16 : VOP2MADK <vop2<0,0x25>, "v_madak_f16">;
+defm V_ADD_U16 : VOP2Inst <vop2<0,0x26>, "v_add_u16", VOP_I16_I16_I16>;
+defm V_SUB_U16 : VOP2Inst <vop2<0,0x27>, "v_sub_u16" , VOP_I16_I16_I16>;
+defm V_SUBREV_U16 : VOP2Inst <vop2<0,0x28>, "v_subrev_u16", VOP_I16_I16_I16>;
+defm V_MUL_LO_U16 : VOP2Inst <vop2<0,0x29>, "v_mul_lo_u16", VOP_I16_I16_I16>;
+} // End isCommutable = 1
+defm V_LSHLREV_B16 : VOP2Inst <vop2<0,0x2a>, "v_lshlrev_b16", VOP_I16_I16_I16>;
+defm V_LSHRREV_B16 : VOP2Inst <vop2<0,0x2b>, "v_lshrrev_b16", VOP_I16_I16_I16>;
+defm V_ASHRREV_B16 : VOP2Inst <vop2<0,0x2c>, "v_ashrrev_b16", VOP_I16_I16_I16>;
+let isCommutable = 1 in {
+defm V_MAX_F16 : VOP2Inst <vop2<0,0x2d>, "v_max_f16", VOP_F16_F16_F16>;
+defm V_MIN_F16 : VOP2Inst <vop2<0,0x2e>, "v_min_f16", VOP_F16_F16_F16>;
+defm V_MAX_U16 : VOP2Inst <vop2<0,0x2f>, "v_max_u16", VOP_I16_I16_I16>;
+defm V_MAX_I16 : VOP2Inst <vop2<0,0x30>, "v_max_i16", VOP_I16_I16_I16>;
+defm V_MIN_U16 : VOP2Inst <vop2<0,0x31>, "v_min_u16", VOP_I16_I16_I16>;
+defm V_MIN_I16 : VOP2Inst <vop2<0,0x32>, "v_min_i16", VOP_I16_I16_I16>;
+} // End isCommutable = 1
+defm V_LDEXP_F16 : VOP2Inst <vop2<0,0x33>, "v_ldexp_f16", VOP_F16_F16_I16>;
+
+// Aliases to simplify matching of floating-pint instructions that are VOP2 on
+// SI and VOP3 on VI.
+
+class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
+  name#" $dst, $src0, $src1",
+  (inst VGPR_32:$dst, 0, VCSrc_32:$src0, 0, VCSrc_32:$src1, 0, 0)
+>, PredicateControl {
+  let UseInstAsmMatchConverter = 0;
+}
+
+def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
+
+} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
 
 //===----------------------------------------------------------------------===//
 // SMEM Patterns
diff --git a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 551189c..388cb65 100644
--- a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -77,6 +77,10 @@ class SparcAsmParser : public MCTargetAsmParser {
   bool parseDirectiveWord(unsigned Size, SMLoc L);
 
   bool is64Bit() const { return STI.getTargetTriple().startswith("sparcv9"); }
+
+  void expandSET(MCInst &Inst, SMLoc IDLoc,
+                 SmallVectorImpl<MCInst> &Instructions);
+
 public:
   SparcAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
                 const MCInstrInfo &MII,
@@ -124,6 +128,15 @@ public:
     Sparc::Q8,  Sparc::Q9,  Sparc::Q10, Sparc::Q11,
     Sparc::Q12, Sparc::Q13, Sparc::Q14, Sparc::Q15 };
 
+  static unsigned ASRRegs[32] = {
+    SP::Y,     SP::ASR1,  SP::ASR2,  SP::ASR3,
+    SP::ASR4,  SP::ASR5,  SP::ASR6, SP::ASR7,
+    SP::ASR8,  SP::ASR9,  SP::ASR10, SP::ASR11,
+    SP::ASR12, SP::ASR13, SP::ASR14, SP::ASR15,
+    SP::ASR16, SP::ASR17, SP::ASR18, SP::ASR19,
+    SP::ASR20, SP::ASR21, SP::ASR22, SP::ASR23,
+    SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
+    SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
 
 /// SparcOperand - Instances of this class represent a parsed Sparc machine
 /// instruction.
@@ -135,9 +148,9 @@ public:
     rk_FloatReg,
     rk_DoubleReg,
     rk_QuadReg,
-    rk_CCReg,
-    rk_Y
+    rk_Special,
   };
+
 private:
   enum KindTy {
     k_Token,
@@ -250,7 +263,7 @@ public:
 
   void addRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getReg()));
+    Inst.addOperand(MCOperand::createReg(getReg()));
   }
 
   void addImmOperands(MCInst &Inst, unsigned N) const {
@@ -262,26 +275,26 @@ public:
   void addExpr(MCInst &Inst, const MCExpr *Expr) const{
     // Add as immediate when possible.  Null MCExpr = 0.
     if (!Expr)
-      Inst.addOperand(MCOperand::CreateImm(0));
+      Inst.addOperand(MCOperand::createImm(0));
     else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
-      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+      Inst.addOperand(MCOperand::createImm(CE->getValue()));
     else
-      Inst.addOperand(MCOperand::CreateExpr(Expr));
+      Inst.addOperand(MCOperand::createExpr(Expr));
   }
 
   void addMEMrrOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
 
-    Inst.addOperand(MCOperand::CreateReg(getMemBase()));
+    Inst.addOperand(MCOperand::createReg(getMemBase()));
 
     assert(getMemOffsetReg() != 0 && "Invalid offset");
-    Inst.addOperand(MCOperand::CreateReg(getMemOffsetReg()));
+    Inst.addOperand(MCOperand::createReg(getMemOffsetReg()));
   }
 
   void addMEMriOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
 
-    Inst.addOperand(MCOperand::CreateReg(getMemBase()));
+    Inst.addOperand(MCOperand::createReg(getMemBase()));
 
     const MCExpr *Expr = getMemOff();
     addExpr(Inst, Expr);
@@ -360,11 +373,11 @@ public:
   }
 
   static std::unique_ptr<SparcOperand>
-  CreateMEMri(unsigned Base, const MCExpr *Off, SMLoc S, SMLoc E) {
-    auto Op = make_unique<SparcOperand>(k_MemoryImm);
+  CreateMEMr(unsigned Base, SMLoc S, SMLoc E) {
+    auto Op = make_unique<SparcOperand>(k_MemoryReg);
     Op->Mem.Base = Base;
-    Op->Mem.OffsetReg = 0;
-    Op->Mem.Off = Off;
+    Op->Mem.OffsetReg = Sparc::G0;  // always 0
+    Op->Mem.Off = nullptr;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
@@ -383,6 +396,49 @@ public:
 
 } // end namespace
 
+void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
+                               SmallVectorImpl<MCInst> &Instructions) {
+  MCOperand MCRegOp = Inst.getOperand(0);
+  MCOperand MCValOp = Inst.getOperand(1);
+  assert(MCRegOp.isReg());
+  assert(MCValOp.isImm() || MCValOp.isExpr());
+
+  // the imm operand can be either an expression or an immediate.
+  bool IsImm = Inst.getOperand(1).isImm();
+  uint64_t ImmValue = IsImm ? MCValOp.getImm() : 0;
+  const MCExpr *ValExpr;
+  if (IsImm)
+    ValExpr = MCConstantExpr::Create(ImmValue, getContext());
+  else
+    ValExpr = MCValOp.getExpr();
+
+  MCOperand PrevReg = MCOperand::createReg(Sparc::G0);
+
+  if (!IsImm || (ImmValue & ~0x1fff)) {
+    MCInst TmpInst;
+    const MCExpr *Expr =
+        SparcMCExpr::Create(SparcMCExpr::VK_Sparc_HI, ValExpr, getContext());
+    TmpInst.setLoc(IDLoc);
+    TmpInst.setOpcode(SP::SETHIi);
+    TmpInst.addOperand(MCRegOp);
+    TmpInst.addOperand(MCOperand::createExpr(Expr));
+    Instructions.push_back(TmpInst);
+    PrevReg = MCRegOp;
+  }
+
+  if (!IsImm || ((ImmValue & 0x1fff) != 0 || ImmValue == 0)) {
+    MCInst TmpInst;
+    const MCExpr *Expr =
+        SparcMCExpr::Create(SparcMCExpr::VK_Sparc_LO, ValExpr, getContext());
+    TmpInst.setLoc(IDLoc);
+    TmpInst.setOpcode(SP::ORri);
+    TmpInst.addOperand(MCRegOp);
+    TmpInst.addOperand(PrevReg);
+    TmpInst.addOperand(MCOperand::createExpr(Expr));
+    Instructions.push_back(TmpInst);
+  }
+}
+
 bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                              OperandVector &Operands,
                                              MCStreamer &Out,
@@ -394,8 +450,19 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                               MatchingInlineAsm);
   switch (MatchResult) {
   case Match_Success: {
-    Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst, STI);
+    switch (Inst.getOpcode()) {
+    default:
+      Inst.setLoc(IDLoc);
+      Instructions.push_back(Inst);
+      break;
+    case SP::SET:
+      expandSET(Inst, IDLoc, Instructions);
+      break;
+    }
+
+    for (const MCInst &I : Instructions) {
+      Out.EmitInstruction(I, STI);
+    }
     return false;
   }
 
@@ -556,7 +623,7 @@ SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
   case AsmToken::Comma:
   case AsmToken::RBrac:
   case AsmToken::EndOfStatement:
-    Operands.push_back(SparcOperand::CreateMEMri(BaseReg, nullptr, S, E));
+    Operands.push_back(SparcOperand::CreateMEMr(BaseReg, S, E));
     return MatchOperand_Success;
 
   case AsmToken:: Plus:
@@ -622,6 +689,15 @@ SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
     Operands.push_back(SparcOperand::CreateToken("]",
                                                  Parser.getTok().getLoc()));
     Parser.Lex(); // Eat the ]
+
+    // Parse an optional address-space identifier after the address.
+    if (getLexer().is(AsmToken::Integer)) {
+      std::unique_ptr<SparcOperand> Op;
+      ResTy = parseSparcAsmOperand(Op, false);
+      if (ResTy != MatchOperand_Success || !Op)
+        return MatchOperand_ParseFail;
+      Operands.push_back(std::move(Op));
+    }
     return MatchOperand_Success;
   }
 
@@ -661,10 +737,15 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
       default:
         Op = SparcOperand::CreateReg(RegNo, RegKind, S, E);
         break;
-      case Sparc::Y:
-        Op = SparcOperand::CreateToken("%y", S);
+      case Sparc::PSR:
+        Op = SparcOperand::CreateToken("%psr", S);
+        break;
+      case Sparc::WIM:
+        Op = SparcOperand::CreateToken("%wim", S);
+        break;
+      case Sparc::TBR:
+        Op = SparcOperand::CreateToken("%tbr", S);
         break;
-
       case Sparc::ICC:
         if (name == "xcc")
           Op = SparcOperand::CreateToken("%xcc", S);
@@ -682,6 +763,7 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
 
   case AsmToken::Minus:
   case AsmToken::Integer:
+  case AsmToken::LParen:
     if (!getParser().parseExpression(EVal, E))
       Op = SparcOperand::CreateImm(EVal, S, E);
     break;
@@ -690,7 +772,7 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
     StringRef Identifier;
     if (!getParser().parseIdentifier(Identifier)) {
       E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
-      MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
+      MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
 
       const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
                                                   getContext());
@@ -752,20 +834,46 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
 
     if (name.equals("y")) {
       RegNo = Sparc::Y;
-      RegKind = SparcOperand::rk_Y;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+
+    if (name.substr(0, 3).equals_lower("asr")
+        && !name.substr(3).getAsInteger(10, intVal)
+        && intVal > 0 && intVal < 32) {
+      RegNo = ASRRegs[intVal];
+      RegKind = SparcOperand::rk_Special;
       return true;
     }
 
     if (name.equals("icc")) {
       RegNo = Sparc::ICC;
-      RegKind = SparcOperand::rk_CCReg;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+
+    if (name.equals("psr")) {
+      RegNo = Sparc::PSR;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+
+    if (name.equals("wim")) {
+      RegNo = Sparc::WIM;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+
+    if (name.equals("tbr")) {
+      RegNo = Sparc::TBR;
+      RegKind = SparcOperand::rk_Special;
       return true;
     }
 
     if (name.equals("xcc")) {
       // FIXME:: check 64bit.
       RegNo = Sparc::ICC;
-      RegKind = SparcOperand::rk_CCReg;
+      RegKind = SparcOperand::rk_Special;
       return true;
     }
 
@@ -775,7 +883,7 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
         && intVal < 4) {
       // FIXME: check 64bit and  handle %fcc1 - %fcc3
       RegNo = Sparc::FCC0 + intVal;
-      RegKind = SparcOperand::rk_CCReg;
+      RegKind = SparcOperand::rk_Special;
       return true;
     }
 
@@ -906,10 +1014,10 @@ bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
   return true;
 }
 
-
 extern "C" void LLVMInitializeSparcAsmParser() {
   RegisterMCAsmParser<SparcAsmParser> A(TheSparcTarget);
   RegisterMCAsmParser<SparcAsmParser> B(TheSparcV9Target);
+  RegisterMCAsmParser<SparcAsmParser> C(TheSparcelTarget);
 }
 
 #define GET_REGISTER_MATCHER
diff --git a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 28369fd..38bff44 100644
--- a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -45,10 +45,7 @@ namespace {
     const SparcSubtarget *Subtarget;
 
     static char ID;
-    Filler(TargetMachine &tm)
-      : MachineFunctionPass(ID), TM(tm),
-        Subtarget(&TM.getSubtarget<SparcSubtarget>()) {
-    }
+    Filler(TargetMachine &tm) : MachineFunctionPass(ID), TM(tm) {}
 
     const char *getPassName() const override {
       return "SPARC Delay Slot Filler";
@@ -57,6 +54,7 @@ namespace {
     bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
     bool runOnMachineFunction(MachineFunction &F) override {
       bool Changed = false;
+      Subtarget = &F.getSubtarget<SparcSubtarget>();
 
       // This pass invalidates liveness information when it reorders
       // instructions to fill delay slot.
@@ -109,8 +107,8 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
 ///
 bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
-
-  const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
+  Subtarget = &MBB.getParent()->getSubtarget<SparcSubtarget>();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
 
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
     MachineBasicBlock::iterator MI = I;
@@ -187,7 +185,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
     if (J->getOpcode() == SP::RESTORErr
         || J->getOpcode() == SP::RESTOREri) {
       // change retl to ret.
-      slot->setDesc(TM.getSubtargetImpl()->getInstrInfo()->get(SP::RET));
+      slot->setDesc(Subtarget->getInstrInfo()->get(SP::RET));
       return J;
     }
   }
@@ -329,8 +327,7 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
 bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
 {
   // Check Reg and all aliased Registers.
-  for (MCRegAliasIterator AI(Reg, TM.getSubtargetImpl()->getRegisterInfo(),
-                             true);
+  for (MCRegAliasIterator AI(Reg, Subtarget->getRegisterInfo(), true);
        AI.isValid(); ++AI)
     if (RegSet.count(*AI))
       return true;
@@ -483,7 +480,7 @@ bool Filler::tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB,
   if (PrevInst->isBundledWithSucc())
     return false;
 
-  const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
 
   switch (PrevInst->getOpcode()) {
   default: break;
diff --git a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 8bc4ca9..3e56b9e 100644
--- a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -16,6 +16,9 @@
 #include "SparcSubtarget.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
@@ -38,17 +41,15 @@ public:
                               raw_ostream &VStream,
                               raw_ostream &CStream) const override;
 };
-
 }
 
 namespace llvm {
-  extern Target TheSparcTarget, TheSparcV9Target;
+extern Target TheSparcTarget, TheSparcV9Target, TheSparcelTarget;
 }
 
-static MCDisassembler *createSparcDisassembler(
-                       const Target &T,
-                       const MCSubtargetInfo &STI,
-                       MCContext &Ctx) {
+static MCDisassembler *createSparcDisassembler(const Target &T,
+                                               const MCSubtargetInfo &STI,
+                                               MCContext &Ctx) {
   return new SparcDisassembler(STI, Ctx);
 }
 
@@ -59,10 +60,10 @@ extern "C" void LLVMInitializeSparcDisassembler() {
                                          createSparcDisassembler);
   TargetRegistry::RegisterMCDisassembler(TheSparcV9Target,
                                          createSparcDisassembler);
+  TargetRegistry::RegisterMCDisassembler(TheSparcelTarget,
+                                         createSparcDisassembler);
 }
 
-
-
 static const unsigned IntRegDecoderTable[] = {
   SP::G0,  SP::G1,  SP::G2,  SP::G3,
   SP::G4,  SP::G5,  SP::G6,  SP::G7,
@@ -106,6 +107,16 @@ static const unsigned QFPRegDecoderTable[] = {
 static const unsigned FCCRegDecoderTable[] = {
   SP::FCC0, SP::FCC1, SP::FCC2, SP::FCC3 };
 
+static const unsigned ASRRegDecoderTable[] = {
+  SP::Y,     SP::ASR1,  SP::ASR2,  SP::ASR3,
+  SP::ASR4,  SP::ASR5,  SP::ASR6,  SP::ASR7,
+  SP::ASR8,  SP::ASR9,  SP::ASR10, SP::ASR11,
+  SP::ASR12, SP::ASR13, SP::ASR14, SP::ASR15,
+  SP::ASR16, SP::ASR17, SP::ASR18, SP::ASR19,
+  SP::ASR20, SP::ASR21, SP::ASR22, SP::ASR23,
+  SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
+  SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
+
 static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst,
                                                unsigned RegNo,
                                                uint64_t Address,
@@ -113,7 +124,7 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst,
   if (RegNo > 31)
     return MCDisassembler::Fail;
   unsigned Reg = IntRegDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -124,7 +135,7 @@ static DecodeStatus DecodeI64RegsRegisterClass(MCInst &Inst,
   if (RegNo > 31)
     return MCDisassembler::Fail;
   unsigned Reg = IntRegDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -136,7 +147,7 @@ static DecodeStatus DecodeFPRegsRegisterClass(MCInst &Inst,
   if (RegNo > 31)
     return MCDisassembler::Fail;
   unsigned Reg = FPRegDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -148,7 +159,7 @@ static DecodeStatus DecodeDFPRegsRegisterClass(MCInst &Inst,
   if (RegNo > 31)
     return MCDisassembler::Fail;
   unsigned Reg = DFPRegDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -163,7 +174,7 @@ static DecodeStatus DecodeQFPRegsRegisterClass(MCInst &Inst,
   unsigned Reg = QFPRegDecoderTable[RegNo];
   if (Reg == ~0U)
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -172,7 +183,16 @@ static DecodeStatus DecodeFCCRegsRegisterClass(MCInst &Inst, unsigned RegNo,
                                                const void *Decoder) {
   if (RegNo > 3)
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateReg(FCCRegDecoderTable[RegNo]));
+  Inst.addOperand(MCOperand::createReg(FCCRegDecoderTable[RegNo]));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeASRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::createReg(ASRRegDecoderTable[RegNo]));
   return MCDisassembler::Success;
 }
 
@@ -208,16 +228,19 @@ static DecodeStatus DecodeSWAP(MCInst &Inst, unsigned insn, uint64_t Address,
 
 /// Read four bytes from the ArrayRef and return 32 bit word.
 static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
-                                      uint64_t &Size, uint32_t &Insn) {
+                                      uint64_t &Size, uint32_t &Insn,
+                                      bool IsLittleEndian) {
   // We want to read exactly 4 Bytes of data.
   if (Bytes.size() < 4) {
     Size = 0;
     return MCDisassembler::Fail;
   }
 
-  // Encoded as a big-endian 32-bit word in the stream.
-  Insn =
-      (Bytes[3] << 0) | (Bytes[2] << 8) | (Bytes[1] << 16) | (Bytes[0] << 24);
+  Insn = IsLittleEndian
+             ? (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) |
+                   (Bytes[3] << 24)
+             : (Bytes[3] << 0) | (Bytes[2] << 8) | (Bytes[1] << 16) |
+                   (Bytes[0] << 24);
 
   return MCDisassembler::Success;
 }
@@ -228,12 +251,12 @@ DecodeStatus SparcDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
                                                raw_ostream &VStream,
                                                raw_ostream &CStream) const {
   uint32_t Insn;
-
-  DecodeStatus Result = readInstruction32(Bytes, Address, Size, Insn);
+  bool isLittleEndian = getContext().getAsmInfo()->isLittleEndian();
+  DecodeStatus Result =
+      readInstruction32(Bytes, Address, Size, Insn, isLittleEndian);
   if (Result == MCDisassembler::Fail)
     return MCDisassembler::Fail;
 
-
   // Calling the auto-generated decoder function.
   Result =
       decodeInstruction(DecoderTableSparc32, Instr, Insn, Address, this, STI);
@@ -256,6 +279,8 @@ static DecodeStatus DecodeMem(MCInst &MI, unsigned insn, uint64_t Address,
   unsigned rd = fieldFromInstruction(insn, 25, 5);
   unsigned rs1 = fieldFromInstruction(insn, 14, 5);
   bool isImm = fieldFromInstruction(insn, 13, 1);
+  bool hasAsi = fieldFromInstruction(insn, 23, 1); // (in op3 field)
+  unsigned asi = fieldFromInstruction(insn, 5, 8);
   unsigned rs2 = 0;
   unsigned simm13 = 0;
   if (isImm)
@@ -277,13 +302,16 @@ static DecodeStatus DecodeMem(MCInst &MI, unsigned insn, uint64_t Address,
 
   // Decode imm|rs2.
   if (isImm)
-    MI.addOperand(MCOperand::CreateImm(simm13));
+    MI.addOperand(MCOperand::createImm(simm13));
   else {
     status = DecodeIntRegsRegisterClass(MI, rs2, Address, Decoder);
     if (status != MCDisassembler::Success)
       return status;
   }
 
+  if (hasAsi)
+    MI.addOperand(MCOperand::createImm(asi));
+
   if (!isLoad) {
     status = DecodeRD(MI, rd, Address, Decoder);
     if (status != MCDisassembler::Success)
@@ -355,14 +383,14 @@ static DecodeStatus DecodeCall(MCInst &MI, unsigned insn,
   tgt <<= 2;
   if (!tryAddingSymbolicOperand(tgt+Address, false, Address,
                                 0, 30, MI, Decoder))
-    MI.addOperand(MCOperand::CreateImm(tgt));
+    MI.addOperand(MCOperand::createImm(tgt));
   return MCDisassembler::Success;
 }
 
 static DecodeStatus DecodeSIMM13(MCInst &MI, unsigned insn,
                                  uint64_t Address, const void *Decoder) {
   unsigned tgt = SignExtend32<13>(fieldFromInstruction(insn, 0, 13));
-  MI.addOperand(MCOperand::CreateImm(tgt));
+  MI.addOperand(MCOperand::createImm(tgt));
   return MCDisassembler::Success;
 }
 
@@ -391,7 +419,7 @@ static DecodeStatus DecodeJMPL(MCInst &MI, unsigned insn, uint64_t Address,
 
   // Decode RS1 | SIMM13.
   if (isImm)
-    MI.addOperand(MCOperand::CreateImm(simm13));
+    MI.addOperand(MCOperand::createImm(simm13));
   else {
     status = DecodeIntRegsRegisterClass(MI, rs2, Address, Decoder);
     if (status != MCDisassembler::Success)
@@ -419,7 +447,7 @@ static DecodeStatus DecodeReturn(MCInst &MI, unsigned insn, uint64_t Address,
 
   // Decode RS2 | SIMM13.
   if (isImm)
-    MI.addOperand(MCOperand::CreateImm(simm13));
+    MI.addOperand(MCOperand::createImm(simm13));
   else {
     status = DecodeIntRegsRegisterClass(MI, rs2, Address, Decoder);
     if (status != MCDisassembler::Success)
@@ -434,6 +462,8 @@ static DecodeStatus DecodeSWAP(MCInst &MI, unsigned insn, uint64_t Address,
   unsigned rd = fieldFromInstruction(insn, 25, 5);
   unsigned rs1 = fieldFromInstruction(insn, 14, 5);
   unsigned isImm = fieldFromInstruction(insn, 13, 1);
+  bool hasAsi = fieldFromInstruction(insn, 23, 1); // (in op3 field)
+  unsigned asi = fieldFromInstruction(insn, 5, 8);
   unsigned rs2 = 0;
   unsigned simm13 = 0;
   if (isImm)
@@ -453,11 +483,15 @@ static DecodeStatus DecodeSWAP(MCInst &MI, unsigned insn, uint64_t Address,
 
   // Decode RS1 | SIMM13.
   if (isImm)
-    MI.addOperand(MCOperand::CreateImm(simm13));
+    MI.addOperand(MCOperand::createImm(simm13));
   else {
     status = DecodeIntRegsRegisterClass(MI, rs2, Address, Decoder);
     if (status != MCDisassembler::Success)
       return status;
   }
+
+  if (hasAsi)
+    MI.addOperand(MCOperand::createImm(asi));
+
   return MCDisassembler::Success;
 }
diff --git a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index 5975a51..bac2617 100644
--- a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -34,8 +34,8 @@ namespace Sparc {
 #define PRINT_ALIAS_INSTR
 #include "SparcGenAsmWriter.inc"
 
-bool SparcInstPrinter::isV9() const {
-  return (STI.getFeatureBits() & Sparc::FeatureV9) != 0;
+bool SparcInstPrinter::isV9(const MCSubtargetInfo &STI) const {
+  return (STI.getFeatureBits()[Sparc::FeatureV9]) != 0;
 }
 
 void SparcInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const
@@ -44,15 +44,15 @@ void SparcInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const
 }
 
 void SparcInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot)
-{
-  if (!printAliasInstr(MI, O) && !printSparcAliasInstr(MI, O))
-    printInstruction(MI, O);
+                                 StringRef Annot, const MCSubtargetInfo &STI) {
+  if (!printAliasInstr(MI, STI, O) && !printSparcAliasInstr(MI, STI, O))
+    printInstruction(MI, STI, O);
   printAnnotation(O, Annot);
 }
 
-bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O)
-{
+bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI,
+                                            const MCSubtargetInfo &STI,
+                                            raw_ostream &O) {
   switch (MI->getOpcode()) {
   default: return false;
   case SP::JMPLrr:
@@ -72,16 +72,16 @@ bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O)
         case SP::O7: O << "\tretl"; return true;
         }
       }
-      O << "\tjmp "; printMemOperand(MI, 1, O);
+      O << "\tjmp "; printMemOperand(MI, 1, STI, O);
       return true;
     case SP::O7: // call $addr
-      O << "\tcall "; printMemOperand(MI, 1, O);
+      O << "\tcall "; printMemOperand(MI, 1, STI, O);
       return true;
     }
   }
   case SP::V9FCMPS:  case SP::V9FCMPD:  case SP::V9FCMPQ:
   case SP::V9FCMPES: case SP::V9FCMPED: case SP::V9FCMPEQ: {
-    if (isV9()
+    if (isV9(STI)
         || (MI->getNumOperands() != 3)
         || (!MI->getOperand(0).isReg())
         || (MI->getOperand(0).getReg() != SP::FCC0))
@@ -96,17 +96,17 @@ bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O)
     case SP::V9FCMPED: O << "\tfcmped "; break;
     case SP::V9FCMPEQ: O << "\tfcmpeq "; break;
     }
-    printOperand(MI, 1, O);
+    printOperand(MI, 1, STI, O);
     O << ", ";
-    printOperand(MI, 2, O);
+    printOperand(MI, 2, STI, O);
     return true;
   }
   }
 }
 
 void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
-                                    raw_ostream &O)
-{
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
   const MCOperand &MO = MI->getOperand (opNum);
 
   if (MO.isReg()) {
@@ -124,14 +124,14 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
 }
 
 void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum,
-                                      raw_ostream &O, const char *Modifier)
-{
-  printOperand(MI, opNum, O);
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O, const char *Modifier) {
+  printOperand(MI, opNum, STI, O);
 
   // If this is an ADD operand, emit it like normal operands.
   if (Modifier && !strcmp(Modifier, "arith")) {
     O << ", ";
-    printOperand(MI, opNum+1, O);
+    printOperand(MI, opNum+1, STI, O);
     return;
   }
   const MCOperand &MO = MI->getOperand(opNum+1);
@@ -143,12 +143,12 @@ void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum,
 
   O << "+";
 
-  printOperand(MI, opNum+1, O);
+  printOperand(MI, opNum+1, STI, O);
 }
 
 void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
-                                     raw_ostream &O)
-{
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
   int CC = (int)MI->getOperand(opNum).getImm();
   switch (MI->getOpcode()) {
   default: break;
@@ -171,8 +171,8 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
 }
 
 bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum,
-                                  raw_ostream &O)
-{
+                                   const MCSubtargetInfo &STI,
+                                   raw_ostream &O) {
   llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX.");
   return true;
 }
diff --git a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
index c96d5ad..0b01b88 100644
--- a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
@@ -22,32 +22,36 @@ namespace llvm {
 class MCOperand;
 
 class SparcInstPrinter : public MCInstPrinter {
-  const MCSubtargetInfo &STI;
 public:
- SparcInstPrinter(const MCAsmInfo &MAI,
-                  const MCInstrInfo &MII,
-                  const MCRegisterInfo &MRI,
-                  const MCSubtargetInfo &sti)
-   : MCInstPrinter(MAI, MII, MRI), STI(sti) {}
+  SparcInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                   const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
-  bool printSparcAliasInstr(const MCInst *MI, raw_ostream &OS);
-  bool isV9() const;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  bool printSparcAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                            raw_ostream &OS);
+  bool isV9(const MCSubtargetInfo &STI) const;
 
   // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                       raw_ostream &O);
   void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                               unsigned PrintMethodIdx, raw_ostream &O);
+                               unsigned PrintMethodIdx,
+                               const MCSubtargetInfo &STI, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 
-  void printOperand(const MCInst *MI, int opNum, raw_ostream &OS);
-  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &OS,
-                       const char *Modifier = nullptr);
-  void printCCOperand(const MCInst *MI, int opNum, raw_ostream &OS);
-  bool printGetPCX(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
-
+  void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                    raw_ostream &OS);
+  void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                       raw_ostream &OS, const char *Modifier = nullptr);
+  void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                      raw_ostream &OS);
+  bool printGetPCX(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &OS);
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index dcd81e3..3792a59 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -98,16 +98,23 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
 
 namespace {
   class SparcAsmBackend : public MCAsmBackend {
+  protected:
     const Target &TheTarget;
+    bool IsLittleEndian;
+    bool Is64Bit;
+
   public:
-    SparcAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {}
+    SparcAsmBackend(const Target &T)
+        : MCAsmBackend(), TheTarget(T),
+          IsLittleEndian(StringRef(TheTarget.getName()) == "sparcel"),
+          Is64Bit(StringRef(TheTarget.getName()) == "sparcv9") {}
 
     unsigned getNumFixupKinds() const override {
       return Sparc::NumTargetFixupKinds;
     }
 
     const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
-      const static MCFixupKindInfo Infos[Sparc::NumTargetFixupKinds] = {
+      const static MCFixupKindInfo InfosBE[Sparc::NumTargetFixupKinds] = {
         // name                    offset bits  flags
         { "fixup_sparc_call30",     2,     30,  MCFixupKindInfo::FKF_IsPCRel },
         { "fixup_sparc_br22",      10,     22,  MCFixupKindInfo::FKF_IsPCRel },
@@ -146,12 +153,54 @@ namespace {
         { "fixup_sparc_tls_le_lox10",   0,  0,  0 }
       };
 
+      const static MCFixupKindInfo InfosLE[Sparc::NumTargetFixupKinds] = {
+        // name                    offset bits  flags
+        { "fixup_sparc_call30",     0,     30,  MCFixupKindInfo::FKF_IsPCRel },
+        { "fixup_sparc_br22",       0,     22,  MCFixupKindInfo::FKF_IsPCRel },
+        { "fixup_sparc_br19",       0,     19,  MCFixupKindInfo::FKF_IsPCRel },
+        { "fixup_sparc_br16_2",    20,      2,  MCFixupKindInfo::FKF_IsPCRel },
+        { "fixup_sparc_br16_14",    0,     14,  MCFixupKindInfo::FKF_IsPCRel },
+        { "fixup_sparc_hi22",       0,     22,  0 },
+        { "fixup_sparc_lo10",       0,     10,  0 },
+        { "fixup_sparc_h44",        0,     22,  0 },
+        { "fixup_sparc_m44",        0,     10,  0 },
+        { "fixup_sparc_l44",        0,     12,  0 },
+        { "fixup_sparc_hh",         0,     22,  0 },
+        { "fixup_sparc_hm",         0,     10,  0 },
+        { "fixup_sparc_pc22",       0,     22,  MCFixupKindInfo::FKF_IsPCRel },
+        { "fixup_sparc_pc10",       0,     10,  MCFixupKindInfo::FKF_IsPCRel },
+        { "fixup_sparc_got22",      0,     22,  0 },
+        { "fixup_sparc_got10",      0,     10,  0 },
+        { "fixup_sparc_wplt30",      0,     30,  MCFixupKindInfo::FKF_IsPCRel },
+        { "fixup_sparc_tls_gd_hi22",    0, 22,  0 },
+        { "fixup_sparc_tls_gd_lo10",    0, 10,  0 },
+        { "fixup_sparc_tls_gd_add",     0,  0,  0 },
+        { "fixup_sparc_tls_gd_call",    0,  0,  0 },
+        { "fixup_sparc_tls_ldm_hi22",   0, 22,  0 },
+        { "fixup_sparc_tls_ldm_lo10",   0, 10,  0 },
+        { "fixup_sparc_tls_ldm_add",    0,  0,  0 },
+        { "fixup_sparc_tls_ldm_call",   0,  0,  0 },
+        { "fixup_sparc_tls_ldo_hix22",  0, 22,  0 },
+        { "fixup_sparc_tls_ldo_lox10",  0, 10,  0 },
+        { "fixup_sparc_tls_ldo_add",    0,  0,  0 },
+        { "fixup_sparc_tls_ie_hi22",    0, 22,  0 },
+        { "fixup_sparc_tls_ie_lo10",    0, 10,  0 },
+        { "fixup_sparc_tls_ie_ld",      0,  0,  0 },
+        { "fixup_sparc_tls_ie_ldx",     0,  0,  0 },
+        { "fixup_sparc_tls_ie_add",     0,  0,  0 },
+        { "fixup_sparc_tls_le_hix22",   0,  0,  0 },
+        { "fixup_sparc_tls_le_lox10",   0,  0,  0 }
+      };
+
       if (Kind < FirstTargetFixupKind)
         return MCAsmBackend::getFixupKindInfo(Kind);
 
       assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
              "Invalid kind!");
-      return Infos[Kind - FirstTargetFixupKind];
+      if (IsLittleEndian)
+        return InfosLE[Kind - FirstTargetFixupKind];
+
+      return InfosBE[Kind - FirstTargetFixupKind];
     }
 
     void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
@@ -215,11 +264,6 @@ namespace {
 
       return true;
     }
-
-    bool is64Bit() const {
-      StringRef name = TheTarget.getName();
-      return name == "sparcv9";
-    }
   };
 
   class ELFSparcAsmBackend : public SparcAsmBackend {
@@ -239,14 +283,15 @@ namespace {
       // For each byte of the fragment that the fixup touches, mask in the bits
       // from the fixup value. The Value has been "split up" into the
       // appropriate bitfields above.
-      for (unsigned i = 0; i != 4; ++i)
-        Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
-
+      for (unsigned i = 0; i != 4; ++i) {
+        unsigned Idx = IsLittleEndian ? i : 3 - i;
+        Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
+      }
     }
 
-    MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+    MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
       uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType);
-      return createSparcELFObjectWriter(OS, is64Bit(), OSABI);
+      return createSparcELFObjectWriter(OS, Is64Bit, IsLittleEndian, OSABI);
     }
   };
 
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index 5ba82f1..4f07ae2 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -26,7 +26,8 @@ namespace {
                                 Is64Bit ?  ELF::EM_SPARCV9 : ELF::EM_SPARC,
                                 /*HasRelocationAddend*/ true) {}
 
-    virtual ~SparcELFObjectWriter() {}
+    ~SparcELFObjectWriter() override {}
+
   protected:
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
@@ -104,9 +105,10 @@ unsigned SparcELFObjectWriter::GetRelocType(const MCValue &Target,
   return ELF::R_SPARC_NONE;
 }
 
-MCObjectWriter *llvm::createSparcELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createSparcELFObjectWriter(raw_pwrite_stream &OS,
                                                  bool Is64Bit,
+                                                 bool IsLittleEndian,
                                                  uint8_t OSABI) {
   MCELFObjectTargetWriter *MOTW = new SparcELFObjectWriter(Is64Bit, OSABI);
-  return createELFObjectWriter(MOTW, OS,  /*IsLittleEndian=*/false);
+  return createELFObjectWriter(MOTW, OS, IsLittleEndian);
 }
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 4269020..124cb3b 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -18,12 +18,12 @@
 
 using namespace llvm;
 
-void SparcELFMCAsmInfo::anchor() { }
+void SparcELFMCAsmInfo::anchor() {}
 
 SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {
-  IsLittleEndian = false;
   Triple TheTriple(TT);
   bool isV9 = (TheTriple.getArch() == Triple::sparcv9);
+  IsLittleEndian = (TheTriple.getArch() == Triple::sparcel);
 
   if (isV9) {
     PointerSize = CalleeSaveStackSlotSize = 8;
@@ -42,8 +42,7 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {
   SunStyleELFSectionSwitchSyntax = true;
   UsesELFSectionDirectiveForBSS = true;
 
-  if (TheTriple.isOSSolaris() || TheTriple.isOSOpenBSD())
-    UseIntegratedAssembler = true;
+  UseIntegratedAssembler = true;
 }
 
 const MCExpr*
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index eea9626..34079ee 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -21,6 +21,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -31,16 +32,16 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
 
 namespace {
 class SparcMCCodeEmitter : public MCCodeEmitter {
-  SparcMCCodeEmitter(const SparcMCCodeEmitter &) LLVM_DELETED_FUNCTION;
-  void operator=(const SparcMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  SparcMCCodeEmitter(const SparcMCCodeEmitter &) = delete;
+  void operator=(const SparcMCCodeEmitter &) = delete;
   MCContext &Ctx;
 
 public:
   SparcMCCodeEmitter(MCContext &ctx): Ctx(ctx) {}
 
-  ~SparcMCCodeEmitter() {}
+  ~SparcMCCodeEmitter() override {}
 
-  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
 
@@ -74,21 +75,27 @@ public:
 
 MCCodeEmitter *llvm::createSparcMCCodeEmitter(const MCInstrInfo &MCII,
                                               const MCRegisterInfo &MRI,
-                                              const MCSubtargetInfo &STI,
                                               MCContext &Ctx) {
   return new SparcMCCodeEmitter(Ctx);
 }
 
-void SparcMCCodeEmitter::
-EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-                  SmallVectorImpl<MCFixup> &Fixups,
-                  const MCSubtargetInfo &STI) const {
+void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                           SmallVectorImpl<MCFixup> &Fixups,
+                                           const MCSubtargetInfo &STI) const {
   unsigned Bits = getBinaryCodeForInstr(MI, Fixups, STI);
 
-  // Output the constant in big endian byte order.
-  for (unsigned i = 0; i != 4; ++i) {
-    OS << (char)(Bits >> 24);
-    Bits <<= 8;
+  if (Ctx.getAsmInfo()->isLittleEndian()) {
+    // Output the bits in little-endian byte order.
+    for (unsigned i = 0; i != 4; ++i) {
+      OS << (char)Bits;
+      Bits >>= 8;
+    }
+  } else {
+    // Output the bits in big-endian byte order.
+    for (unsigned i = 0; i != 4; ++i) {
+      OS << (char)(Bits >> 24);
+      Bits <<= 8;
+    }
   }
   unsigned tlsOpNo = 0;
   switch (MI.getOpcode()) {
@@ -125,7 +132,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
   const MCExpr *Expr = MO.getExpr();
   if (const SparcMCExpr *SExpr = dyn_cast<SparcMCExpr>(Expr)) {
     MCFixupKind Kind = (MCFixupKind)SExpr->getFixupKind();
-    Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+    Fixups.push_back(MCFixup::create(0, Expr, Kind));
     return 0;
   }
 
@@ -147,7 +154,7 @@ getCallTargetOpValue(const MCInst &MI, unsigned OpNo,
 
   if (MI.getOpcode() == SP::TLS_CALL) {
     // No fixups for __tls_get_addr. Will emit for fixups for tls_symbol in
-    // EncodeInstruction.
+    // encodeInstruction.
 #ifndef NDEBUG
     // Verify that the callee is actually __tls_get_addr.
     const SparcMCExpr *SExpr = dyn_cast<SparcMCExpr>(MO.getExpr());
@@ -167,7 +174,7 @@ getCallTargetOpValue(const MCInst &MI, unsigned OpNo,
       fixupKind = (MCFixupKind)Sparc::fixup_sparc_wplt30;
   }
 
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), fixupKind));
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(), fixupKind));
 
   return 0;
 }
@@ -180,7 +187,7 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
   if (MO.isReg() || MO.isImm())
     return getMachineOpValue(MI, MO, Fixups, STI);
 
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(),
                                    (MCFixupKind)Sparc::fixup_sparc_br22));
   return 0;
 }
@@ -193,7 +200,7 @@ getBranchPredTargetOpValue(const MCInst &MI, unsigned OpNo,
   if (MO.isReg() || MO.isImm())
     return getMachineOpValue(MI, MO, Fixups, STI);
 
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(),
                                    (MCFixupKind)Sparc::fixup_sparc_br19));
   return 0;
 }
@@ -205,9 +212,9 @@ getBranchOnRegTargetOpValue(const MCInst &MI, unsigned OpNo,
   if (MO.isReg() || MO.isImm())
     return getMachineOpValue(MI, MO, Fixups, STI);
 
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(),
                                    (MCFixupKind)Sparc::fixup_sparc_br16_2));
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(),
                                    (MCFixupKind)Sparc::fixup_sparc_br16_14));
 
   return 0;
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index f72c6c4..116e104 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -62,8 +62,8 @@ private:
   const VariantKind Kind;
   const MCExpr *Expr;
 
-  explicit SparcMCExpr(VariantKind _Kind, const MCExpr *_Expr)
-    : Kind(_Kind), Expr(_Expr) {}
+  explicit SparcMCExpr(VariantKind Kind, const MCExpr *Expr)
+      : Kind(Kind), Expr(Expr) {}
 
 public:
   /// @name Construction
@@ -90,7 +90,7 @@ public:
                                  const MCAsmLayout *Layout,
                                  const MCFixup *Fixup) const override;
   void visitUsedExpr(MCStreamer &Streamer) const override;
-  const MCSection *FindAssociatedSection() const override {
+  MCSection *FindAssociatedSection() const override {
     return getSubExpr()->FindAssociatedSection();
   }
 
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 3cc4314..4d5672e 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -97,7 +97,7 @@ static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   case CodeModel::JITDefault: CM = CodeModel::Small; break;
   }
 
-  X->InitMCCodeGenInfo(RM, CM, OL);
+  X->initMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
@@ -118,93 +118,68 @@ static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM,
     break;
   }
 
-  X->InitMCCodeGenInfo(RM, CM, OL);
+  X->initMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
-                                    MCContext &Context, MCAsmBackend &MAB,
-                                    raw_ostream &OS, MCCodeEmitter *Emitter,
-                                    const MCSubtargetInfo &STI, bool RelaxAll) {
-  MCStreamer *S = createELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
-  new SparcTargetELFStreamer(*S);
-  return S;
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+  return new SparcTargetELFStreamer(S);
 }
 
-static MCStreamer *
-createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
-                    bool isVerboseAsm, bool useDwarfDirectory,
-                    MCInstPrinter *InstPrint, MCCodeEmitter *CE,
-                    MCAsmBackend *TAB, bool ShowInst) {
-
-  MCStreamer *S = llvm::createAsmStreamer(
-      Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
-  new SparcTargetAsmStreamer(*S, OS);
-  return S;
+static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
+                                                 formatted_raw_ostream &OS,
+                                                 MCInstPrinter *InstPrint,
+                                                 bool isVerboseAsm) {
+  return new SparcTargetAsmStreamer(S, OS);
 }
 
-static MCInstPrinter *createSparcMCInstPrinter(const Target &T,
-                                              unsigned SyntaxVariant,
-                                              const MCAsmInfo &MAI,
-                                              const MCInstrInfo &MII,
-                                              const MCRegisterInfo &MRI,
-                                              const MCSubtargetInfo &STI) {
-  return new SparcInstPrinter(MAI, MII, MRI, STI);
+static MCInstPrinter *createSparcMCInstPrinter(const Triple &T,
+                                               unsigned SyntaxVariant,
+                                               const MCAsmInfo &MAI,
+                                               const MCInstrInfo &MII,
+                                               const MCRegisterInfo &MRI) {
+  return new SparcInstPrinter(MAI, MII, MRI);
 }
 
 extern "C" void LLVMInitializeSparcTargetMC() {
   // Register the MC asm info.
   RegisterMCAsmInfoFn X(TheSparcTarget, createSparcMCAsmInfo);
   RegisterMCAsmInfoFn Y(TheSparcV9Target, createSparcV9MCAsmInfo);
+  RegisterMCAsmInfoFn Z(TheSparcelTarget, createSparcMCAsmInfo);
+
+  for (Target *T : {&TheSparcTarget, &TheSparcV9Target, &TheSparcelTarget}) {
+    // Register the MC instruction info.
+    TargetRegistry::RegisterMCInstrInfo(*T, createSparcMCInstrInfo);
+
+    // Register the MC register info.
+    TargetRegistry::RegisterMCRegInfo(*T, createSparcMCRegisterInfo);
+
+    // Register the MC subtarget info.
+    TargetRegistry::RegisterMCSubtargetInfo(*T, createSparcMCSubtargetInfo);
+
+    // Register the MC Code Emitter.
+    TargetRegistry::RegisterMCCodeEmitter(*T, createSparcMCCodeEmitter);
+
+    // Register the asm backend.
+    TargetRegistry::RegisterMCAsmBackend(*T, createSparcAsmBackend);
+
+    // Register the object target streamer.
+    TargetRegistry::RegisterObjectTargetStreamer(*T,
+                                                 createObjectTargetStreamer);
+
+    // Register the asm streamer.
+    TargetRegistry::RegisterAsmTargetStreamer(*T, createTargetAsmStreamer);
+
+    // Register the MCInstPrinter
+    TargetRegistry::RegisterMCInstPrinter(*T, createSparcMCInstPrinter);
+  }
 
   // Register the MC codegen info.
   TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget,
-                                       createSparcMCCodeGenInfo);
+                                        createSparcMCCodeGenInfo);
   TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target,
-                                       createSparcV9MCCodeGenInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheSparcV9Target, createSparcMCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheSparcTarget, createSparcMCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheSparcV9Target,
-                                    createSparcMCRegisterInfo);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget,
-                                          createSparcMCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheSparcV9Target,
-                                          createSparcMCSubtargetInfo);
-
-  // Register the MC Code Emitter.
-  TargetRegistry::RegisterMCCodeEmitter(TheSparcTarget,
-                                        createSparcMCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(TheSparcV9Target,
-                                        createSparcMCCodeEmitter);
-
-  //Register the asm backend.
-  TargetRegistry::RegisterMCAsmBackend(TheSparcTarget,
-                                       createSparcAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheSparcV9Target,
-                                       createSparcAsmBackend);
-
-  // Register the object streamer.
-  TargetRegistry::RegisterMCObjectStreamer(TheSparcTarget,
-                                           createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheSparcV9Target,
-                                           createMCStreamer);
-
-  // Register the asm streamer.
-  TargetRegistry::RegisterAsmStreamer(TheSparcTarget,
-                                      createMCAsmStreamer);
-  TargetRegistry::RegisterAsmStreamer(TheSparcV9Target,
-                                      createMCAsmStreamer);
-
-  // Register the MCInstPrinter
-  TargetRegistry::RegisterMCInstPrinter(TheSparcTarget,
-                                        createSparcMCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheSparcV9Target,
-                                        createSparcMCInstPrinter);
+                                        createSparcV9MCCodeGenInfo);
+  TargetRegistry::RegisterMCCodeGenInfo(TheSparcelTarget,
+                                        createSparcMCCodeGenInfo);
 }
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index c31943d..28e2119 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -26,22 +26,20 @@ class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
 class StringRef;
+class raw_pwrite_stream;
 class raw_ostream;
 
 extern Target TheSparcTarget;
 extern Target TheSparcV9Target;
+extern Target TheSparcelTarget;
 
 MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII,
                                         const MCRegisterInfo &MRI,
-                                        const MCSubtargetInfo &STI,
                                         MCContext &Ctx);
-MCAsmBackend *createSparcAsmBackend(const Target &T,
-                                    const MCRegisterInfo &MRI,
-                                    StringRef TT,
-                                    StringRef CPU);
-MCObjectWriter *createSparcELFObjectWriter(raw_ostream &OS,
-                                           bool Is64Bit,
-                                           uint8_t OSABI);
+MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+                                    StringRef TT, StringRef CPU);
+MCObjectWriter *createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
+                                           bool IsLIttleEndian, uint8_t OSABI);
 } // End llvm namespace
 
 // Defines symbolic names for Sparc registers.  This defines a mapping from
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.td b/contrib/llvm/lib/Target/Sparc/Sparc.td
index 3159a46..c34122e 100644
--- a/contrib/llvm/lib/Target/Sparc/Sparc.td
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.td
@@ -92,8 +92,15 @@ def : Proc<"niagara4",        [FeatureV9, FeatureV8Deprecated, UsePopc,
 // Declare the target which we are implementing
 //===----------------------------------------------------------------------===//
 
+def SparcAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  int PassSubtarget = 1;
+  int Variant = 0;
+}
+
 def Sparc : Target {
   // Pull in Instruction Info:
   let InstructionSet = SparcInstrInfo;
   let AssemblyParsers  = [SparcAsmParser];
+  let AssemblyWriters = [SparcAsmWriter];
 }
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index 6432003..9903bc5 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -40,11 +40,12 @@ namespace {
   class SparcAsmPrinter : public AsmPrinter {
     SparcTargetStreamer &getTargetStreamer() {
       return static_cast<SparcTargetStreamer &>(
-          *OutStreamer.getTargetStreamer());
+          *OutStreamer->getTargetStreamer());
     }
   public:
-    explicit SparcAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer) {}
+    explicit SparcAsmPrinter(TargetMachine &TM,
+                             std::unique_ptr<MCStreamer> Streamer)
+        : AsmPrinter(TM, std::move(Streamer)) {}
 
     const char *getPassName() const override {
       return "Sparc Assembly Printer";
@@ -57,7 +58,6 @@ namespace {
 
     void EmitFunctionBodyStart() override;
     void EmitInstruction(const MachineInstr *MI) override;
-    void EmitEndOfAsmFile(Module &M) override;
 
     static const char *getRegisterName(unsigned RegNo) {
       return SparcInstPrinter::getRegisterName(RegNo);
@@ -81,7 +81,7 @@ static MCOperand createSparcMCOperand(SparcMCExpr::VariantKind Kind,
   const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Sym,
                                                          OutContext);
   const SparcMCExpr *expr = SparcMCExpr::Create(Kind, MCSym, OutContext);
-  return MCOperand::CreateExpr(expr);
+  return MCOperand::createExpr(expr);
 
 }
 static MCOperand createPCXCallOP(MCSymbol *Label,
@@ -104,7 +104,7 @@ static MCOperand createPCXRelExprOp(SparcMCExpr::VariantKind Kind,
   const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(GOT, Sub, OutContext);
   const SparcMCExpr *expr = SparcMCExpr::Create(Kind,
                                                 Add, OutContext);
-  return MCOperand::CreateExpr(expr);
+  return MCOperand::createExpr(expr);
 }
 
 static void EmitCall(MCStreamer &OutStreamer,
@@ -176,13 +176,13 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
                                                 const MCSubtargetInfo &STI)
 {
   MCSymbol *GOTLabel   =
-    OutContext.GetOrCreateSymbol(Twine("_GLOBAL_OFFSET_TABLE_"));
+    OutContext.getOrCreateSymbol(Twine("_GLOBAL_OFFSET_TABLE_"));
 
   const MachineOperand &MO = MI->getOperand(0);
   assert(MO.getReg() != SP::O7 &&
          "%o7 is assigned as destination for getpcx!");
 
-  MCOperand MCRegOP = MCOperand::CreateReg(MO.getReg());
+  MCOperand MCRegOP = MCOperand::createReg(MO.getReg());
 
 
   if (TM.getRelocationModel() != Reloc::PIC_) {
@@ -191,45 +191,45 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
     default:
       llvm_unreachable("Unsupported absolute code model");
     case CodeModel::Small:
-      EmitHiLo(OutStreamer, GOTLabel,
+      EmitHiLo(*OutStreamer, GOTLabel,
                SparcMCExpr::VK_Sparc_HI, SparcMCExpr::VK_Sparc_LO,
                MCRegOP, OutContext, STI);
       break;
     case CodeModel::Medium: {
-      EmitHiLo(OutStreamer, GOTLabel,
+      EmitHiLo(*OutStreamer, GOTLabel,
                SparcMCExpr::VK_Sparc_H44, SparcMCExpr::VK_Sparc_M44,
                MCRegOP, OutContext, STI);
-      MCOperand imm = MCOperand::CreateExpr(MCConstantExpr::Create(12,
+      MCOperand imm = MCOperand::createExpr(MCConstantExpr::Create(12,
                                                                    OutContext));
-      EmitSHL(OutStreamer, MCRegOP, imm, MCRegOP, STI);
+      EmitSHL(*OutStreamer, MCRegOP, imm, MCRegOP, STI);
       MCOperand lo = createSparcMCOperand(SparcMCExpr::VK_Sparc_L44,
                                           GOTLabel, OutContext);
-      EmitOR(OutStreamer, MCRegOP, lo, MCRegOP, STI);
+      EmitOR(*OutStreamer, MCRegOP, lo, MCRegOP, STI);
       break;
     }
     case CodeModel::Large: {
-      EmitHiLo(OutStreamer, GOTLabel,
+      EmitHiLo(*OutStreamer, GOTLabel,
                SparcMCExpr::VK_Sparc_HH, SparcMCExpr::VK_Sparc_HM,
                MCRegOP, OutContext, STI);
-      MCOperand imm = MCOperand::CreateExpr(MCConstantExpr::Create(32,
+      MCOperand imm = MCOperand::createExpr(MCConstantExpr::Create(32,
                                                                    OutContext));
-      EmitSHL(OutStreamer, MCRegOP, imm, MCRegOP, STI);
+      EmitSHL(*OutStreamer, MCRegOP, imm, MCRegOP, STI);
       // Use register %o7 to load the lower 32 bits.
-      MCOperand RegO7 = MCOperand::CreateReg(SP::O7);
-      EmitHiLo(OutStreamer, GOTLabel,
+      MCOperand RegO7 = MCOperand::createReg(SP::O7);
+      EmitHiLo(*OutStreamer, GOTLabel,
                SparcMCExpr::VK_Sparc_HI, SparcMCExpr::VK_Sparc_LO,
                RegO7, OutContext, STI);
-      EmitADD(OutStreamer, MCRegOP, RegO7, MCRegOP, STI);
+      EmitADD(*OutStreamer, MCRegOP, RegO7, MCRegOP, STI);
     }
     }
     return;
   }
 
-  MCSymbol *StartLabel = OutContext.CreateTempSymbol();
-  MCSymbol *EndLabel   = OutContext.CreateTempSymbol();
-  MCSymbol *SethiLabel = OutContext.CreateTempSymbol();
+  MCSymbol *StartLabel = OutContext.createTempSymbol();
+  MCSymbol *EndLabel   = OutContext.createTempSymbol();
+  MCSymbol *SethiLabel = OutContext.createTempSymbol();
 
-  MCOperand RegO7   = MCOperand::CreateReg(SP::O7);
+  MCOperand RegO7   = MCOperand::createReg(SP::O7);
 
   // <StartLabel>:
   //   call <EndLabel>
@@ -239,20 +239,20 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
   //   or  <MO>, %lo(_GLOBAL_OFFSET_TABLE_+(<EndLabel>-<StartLabel>))), <MO>
   //   add <MO>, %o7, <MO>
 
-  OutStreamer.EmitLabel(StartLabel);
+  OutStreamer->EmitLabel(StartLabel);
   MCOperand Callee =  createPCXCallOP(EndLabel, OutContext);
-  EmitCall(OutStreamer, Callee, STI);
-  OutStreamer.EmitLabel(SethiLabel);
+  EmitCall(*OutStreamer, Callee, STI);
+  OutStreamer->EmitLabel(SethiLabel);
   MCOperand hiImm = createPCXRelExprOp(SparcMCExpr::VK_Sparc_PC22,
                                        GOTLabel, StartLabel, SethiLabel,
                                        OutContext);
-  EmitSETHI(OutStreamer, hiImm, MCRegOP, STI);
-  OutStreamer.EmitLabel(EndLabel);
+  EmitSETHI(*OutStreamer, hiImm, MCRegOP, STI);
+  OutStreamer->EmitLabel(EndLabel);
   MCOperand loImm = createPCXRelExprOp(SparcMCExpr::VK_Sparc_PC10,
                                        GOTLabel, StartLabel, EndLabel,
                                        OutContext);
-  EmitOR(OutStreamer, MCRegOP, loImm, MCRegOP, STI);
-  EmitADD(OutStreamer, MCRegOP, RegO7, MCRegOP, STI);
+  EmitOR(*OutStreamer, MCRegOP, loImm, MCRegOP, STI);
+  EmitADD(*OutStreamer, MCRegOP, RegO7, MCRegOP, STI);
 }
 
 void SparcAsmPrinter::EmitInstruction(const MachineInstr *MI)
@@ -272,12 +272,12 @@ void SparcAsmPrinter::EmitInstruction(const MachineInstr *MI)
   do {
     MCInst TmpInst;
     LowerSparcMachineInstrToMCInst(I, TmpInst, *this);
-    EmitToStreamer(OutStreamer, TmpInst);
+    EmitToStreamer(*OutStreamer, TmpInst);
   } while ((++I != E) && I->isInsideBundle()); // Delay slot check.
 }
 
 void SparcAsmPrinter::EmitFunctionBodyStart() {
-  if (!TM.getSubtarget<SparcSubtarget>().is64Bit())
+  if (!MF->getSubtarget<SparcSubtarget>().is64Bit())
     return;
 
   const MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -296,7 +296,7 @@ void SparcAsmPrinter::EmitFunctionBodyStart() {
 
 void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                    raw_ostream &O) {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
   const MachineOperand &MO = MI->getOperand (opNum);
   SparcMCExpr::VariantKind TF = (SparcMCExpr::VariantKind) MO.getTargetFlags();
 
@@ -441,26 +441,9 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-void SparcAsmPrinter::EmitEndOfAsmFile(Module &M) {
-  const TargetLoweringObjectFileELF &TLOFELF =
-    static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-  MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-  // Generate stubs for global variables.
-  MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-  if (!Stubs.empty()) {
-    OutStreamer.SwitchSection(TLOFELF.getDataSection());
-    unsigned PtrSize =
-        TM.getSubtargetImpl()->getDataLayout()->getPointerSize(0);
-    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-      OutStreamer.EmitLabel(Stubs[i].first);
-      OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), PtrSize);
-    }
-  }
-}
-
 // Force static initialization.
 extern "C" void LLVMInitializeSparcAsmPrinter() {
   RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
   RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target);
+  RegisterAsmPrinter<SparcAsmPrinter> Z(TheSparcelTarget);
 }
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index 1b67b4b..bccc6bd 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -82,10 +82,11 @@ void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
     .addReg(SP::O6).addReg(SP::G1);
 }
 
-void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
+void SparcFrameLowering::emitPrologue(MachineFunction &MF,
+                                      MachineBasicBlock &MBB) const {
   SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
 
-  MachineBasicBlock &MBB = MF.front();
+  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const SparcInstrInfo &TII =
       *static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -103,9 +104,7 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
     SAVEri = SP::ADDri;
     SAVErr = SP::ADDrr;
   }
-  NumBytes =
-      -MF.getTarget().getSubtarget<SparcSubtarget>().getAdjustedFrameSize(
-          NumBytes);
+  NumBytes = -MF.getSubtarget<SparcSubtarget>().getAdjustedFrameSize(NumBytes);
   emitSPAdjustment(MF, MBB, MBBI, NumBytes, SAVErr, SAVEri);
 
   MachineModuleInfo &MMI = MF.getMMI();
@@ -168,8 +167,7 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
   if (NumBytes == 0)
     return;
 
-  NumBytes = MF.getTarget().getSubtarget<SparcSubtarget>().getAdjustedFrameSize(
-      NumBytes);
+  NumBytes = MF.getSubtarget<SparcSubtarget>().getAdjustedFrameSize(NumBytes);
   emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
 }
 
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
index 9e53994..bb3b788 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -26,7 +26,7 @@ public:
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
-  void emitPrologue(MachineFunction &MF) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
   void
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index b3b029e..9c594a9 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -32,13 +32,13 @@ namespace {
 class SparcDAGToDAGISel : public SelectionDAGISel {
   /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can
   /// make the right decision when generating code for different targets.
-  const SparcSubtarget &Subtarget;
-  SparcTargetMachine &TM;
+  const SparcSubtarget *Subtarget;
 public:
-  explicit SparcDAGToDAGISel(SparcTargetMachine &tm)
-    : SelectionDAGISel(tm),
-      Subtarget(tm.getSubtarget<SparcSubtarget>()),
-      TM(tm) {
+  explicit SparcDAGToDAGISel(SparcTargetMachine &tm) : SelectionDAGISel(tm) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    Subtarget = &MF.getSubtarget<SparcSubtarget>();
+    return SelectionDAGISel::runOnMachineFunction(MF);
   }
 
   SDNode *Select(SDNode *N) override;
@@ -50,7 +50,7 @@ public:
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                    char ConstraintCode,
+                                    unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
 
   const char *getPassName() const override {
@@ -66,8 +66,7 @@ private:
 }  // end anonymous namespace
 
 SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
-  unsigned GlobalBaseReg =
-      TM.getSubtargetImpl()->getInstrInfo()->getGlobalBaseReg(MF);
+  unsigned GlobalBaseReg = Subtarget->getInstrInfo()->getGlobalBaseReg(MF);
   return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode();
 }
 
@@ -75,7 +74,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
                                      SDValue &Base, SDValue &Offset) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI->getPointerTy());
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
     return true;
   }
   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
@@ -94,7 +93,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
         } else {
           Base = Addr.getOperand(0);
         }
-        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr),
+                                           MVT::i32);
         return true;
       }
     }
@@ -110,7 +110,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
     }
   }
   Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
   return true;
 }
 
@@ -163,12 +163,15 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
     SDValue TopPart;
     if (N->getOpcode() == ISD::SDIV) {
       TopPart = SDValue(CurDAG->getMachineNode(SP::SRAri, dl, MVT::i32, DivLHS,
-                                   CurDAG->getTargetConstant(31, MVT::i32)), 0);
+                                   CurDAG->getTargetConstant(31, dl, MVT::i32)),
+                        0);
     } else {
       TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
     }
-    TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Glue, TopPart,
-                                     CurDAG->getRegister(SP::G0, MVT::i32)), 0);
+    TopPart = SDValue(CurDAG->getMachineNode(SP::WRASRrr, dl, MVT::i32,
+                                 TopPart,
+                                 CurDAG->getRegister(SP::G0, MVT::i32)), 0);
+    TopPart = CurDAG->getCopyToReg(TopPart, dl, SP::Y, TopPart, SDValue()).getValue(1);
 
     // FIXME: Handle div by immediate.
     unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr;
@@ -184,7 +187,9 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
     SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
                                          MulLHS, MulRHS);
     // The high part is in the Y register.
-    return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
+    return CurDAG->SelectNodeTo(N, SP::RDASR, MVT::i32,
+                                CurDAG->getRegister(SP::Y, MVT::i32),
+                                SDValue(Mul, 1));
   }
   }
 
@@ -196,12 +201,13 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
 /// inline asm expressions.
 bool
 SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                                char ConstraintCode,
+                                                unsigned ConstraintID,
                                                 std::vector<SDValue> &OutOps) {
   SDValue Op0, Op1;
-  switch (ConstraintCode) {
+  switch (ConstraintID) {
   default: return true;
-  case 'm':   // memory
+  case InlineAsm::Constraint_i:
+  case InlineAsm::Constraint_m: // memory
    if (!SelectADDRrr(Op, Op0, Op1))
      SelectADDRri(Op, Op0, Op1);
    break;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 0a3607e..0481676 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -57,7 +57,7 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
     SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
   };
   // Try to get first reg.
-  if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+  if (unsigned Reg = State.AllocateReg(RegList)) {
     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   } else {
     // Assign whole thing in stack.
@@ -68,7 +68,7 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
   }
 
   // Try to get second reg.
-  if (unsigned Reg = State.AllocateReg(RegList, 6))
+  if (unsigned Reg = State.AllocateReg(RegList))
     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   else
     State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
@@ -229,7 +229,7 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain,
   }
 
   RetOps[0] = Chain;  // Update chain.
-  RetOps[1] = DAG.getConstant(RetAddrOffset, MVT::i32);
+  RetOps[1] = DAG.getConstant(RetAddrOffset, DL, MVT::i32);
 
   // Add the flag if we have it.
   if (Flag.getNode())
@@ -261,7 +261,7 @@ SparcTargetLowering::LowerReturn_64(SDValue Chain,
 
   // The second operand on the return instruction is the return address offset.
   // The return address is always %i7+8 with the 64-bit ABI.
-  RetOps.push_back(DAG.getConstant(8, MVT::i32));
+  RetOps.push_back(DAG.getConstant(8, DL, MVT::i32));
 
   // Copy the result values into the output registers.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -289,7 +289,7 @@ SparcTargetLowering::LowerReturn_64(SDValue Chain,
     // in the high bits of the register.
     if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
       OutVal = DAG.getNode(ISD::SHL, DL, MVT::i64, OutVal,
-                           DAG.getConstant(32, MVT::i32));
+                           DAG.getConstant(32, DL, MVT::i32));
 
       // The next value may go in the low bits of the same register.
       // Handle both at once.
@@ -471,7 +471,7 @@ LowerFormalArguments_32(SDValue Chain,
       // Sparc is big endian, so add an offset based on the ObjectVT.
       unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8);
       FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
-                          DAG.getConstant(Offset, MVT::i32));
+                          DAG.getConstant(Offset, dl, MVT::i32));
       Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
                             MachinePointerInfo(),
                             VA.getValVT(), false, false, false,0);
@@ -497,7 +497,7 @@ LowerFormalArguments_32(SDValue Chain,
     static const MCPhysReg ArgRegs[] = {
       SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
     };
-    unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
+    unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs);
     const MCPhysReg *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
     unsigned ArgOffset = CCInfo.getNextStackOffset();
     if (NumAllocated == 6)
@@ -570,7 +570,7 @@ LowerFormalArguments_64(SDValue Chain,
       // Get the high bits for i32 struct elements.
       if (VA.getValVT() == MVT::i32 && VA.needsCustom())
         Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
-                          DAG.getConstant(32, MVT::i32));
+                          DAG.getConstant(32, DL, MVT::i32));
 
       // The caller promoted the argument, so insert an Assert?ext SDNode so we
       // won't promote the value again in this function.
@@ -723,16 +723,17 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
 
     int FI = MFI->CreateStackObject(Size, Align, false);
     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
-    SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
+    SDValue SizeNode = DAG.getConstant(Size, dl, MVT::i32);
 
     Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
                           false,        // isVolatile,
-                          (Size <= 32), // AlwaysInline if size <= 32
+                          (Size <= 32), // AlwaysInline if size <= 32,
+                          false,        // isTailCall
                           MachinePointerInfo(), MachinePointerInfo());
     ByValArgs.push_back(FIPtr);
   }
 
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, dl, true),
                                dl);
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
@@ -775,7 +776,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
       assert(VA.needsCustom());
       // store SRet argument in %sp+64
       SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
-      SDValue PtrOff = DAG.getIntPtrConstant(64);
+      SDValue PtrOff = DAG.getIntPtrConstant(64, dl);
       PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
                                          MachinePointerInfo(),
@@ -792,7 +793,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
         // if it is double-word aligned, just store.
         if (Offset % 8 == 0) {
           SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
-          SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+          SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
           PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
           MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
                                              MachinePointerInfo(),
@@ -810,7 +811,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
                                MachinePointerInfo(), false, false, false, 0);
       // Increment the pointer to the other half.
       StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                             DAG.getIntPtrConstant(4));
+                             DAG.getIntPtrConstant(4, dl));
       // Load the low part.
       SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
                                MachinePointerInfo(), false, false, false, 0);
@@ -825,7 +826,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
           // Store the low part in stack.
           unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
           SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
-          SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+          SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
           PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
           MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
                                              MachinePointerInfo(),
@@ -835,13 +836,13 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
         unsigned Offset = VA.getLocMemOffset() + StackOffset;
         // Store the high part.
         SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
-        SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+        SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
         PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
         MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
                                            MachinePointerInfo(),
                                            false, false, 0));
         // Store the low part.
-        PtrOff = DAG.getIntPtrConstant(Offset+4);
+        PtrOff = DAG.getIntPtrConstant(Offset + 4, dl);
         PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
         MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
                                            MachinePointerInfo(),
@@ -866,7 +867,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
 
     // Create a store off the stack pointer for this argument.
     SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
-    SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+StackOffset);
+    SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + StackOffset,
+                                           dl);
     PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
                                        MachinePointerInfo(),
@@ -908,17 +910,17 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
   Ops.push_back(Chain);
   Ops.push_back(Callee);
   if (hasStructRetAttr)
-    Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
+    Ops.push_back(DAG.getTargetConstant(SRetArgSize, dl, MVT::i32));
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
     Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first),
                                   RegsToPass[i].second.getValueType()));
 
   // Add a register mask operand representing the call-preserved registers.
-  const SparcRegisterInfo *TRI =
-      getTargetMachine().getSubtarget<SparcSubtarget>().getRegisterInfo();
-  const uint32_t *Mask = ((hasReturnsTwice)
-                          ? TRI->getRTCallPreservedMask(CallConv)
-                          : TRI->getCallPreservedMask(CallConv));
+  const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const uint32_t *Mask =
+      ((hasReturnsTwice)
+           ? TRI->getRTCallPreservedMask(CallConv)
+           : TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv));
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
 
@@ -928,8 +930,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
   Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops);
   InFlag = Chain.getValue(1);
 
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
-                             DAG.getIntPtrConstant(0, true), InFlag, dl);
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, dl, true),
+                             DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
   InFlag = Chain.getValue(1);
 
   // Assign locations to each value returned by this call.
@@ -1081,7 +1083,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
   // Adjust the stack pointer to make room for the arguments.
   // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
   // with more than 6 arguments.
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
                                DL);
 
   // Collect the set of registers to pass to the function and their values.
@@ -1129,10 +1131,10 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
         unsigned Offset = 8 * (VA.getLocReg() - SP::I0);
         unsigned StackOffset = Offset + Subtarget->getStackPointerBias() + 128;
         SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy());
-        SDValue HiPtrOff = DAG.getIntPtrConstant(StackOffset);
+        SDValue HiPtrOff = DAG.getIntPtrConstant(StackOffset, DL);
         HiPtrOff         = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr,
                                        HiPtrOff);
-        SDValue LoPtrOff = DAG.getIntPtrConstant(StackOffset + 8);
+        SDValue LoPtrOff = DAG.getIntPtrConstant(StackOffset + 8, DL);
         LoPtrOff         = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr,
                                        LoPtrOff);
 
@@ -1158,7 +1160,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
       // passed in the high bits of the register.
       if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
         Arg = DAG.getNode(ISD::SHL, DL, MVT::i64, Arg,
-                          DAG.getConstant(32, MVT::i32));
+                          DAG.getConstant(32, DL, MVT::i32));
 
         // The next value may go in the low bits of the same register.
         // Handle both at once.
@@ -1183,7 +1185,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
     // %sp+BIAS+128 in ours.
     SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() +
                                            Subtarget->getStackPointerBias() +
-                                           128);
+                                           128, DL);
     PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
     MemOpChains.push_back(DAG.getStore(Chain, DL, Arg, PtrOff,
                                        MachinePointerInfo(),
@@ -1227,11 +1229,11 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
                                   RegsToPass[i].second.getValueType()));
 
   // Add a register mask operand representing the call-preserved registers.
-  const SparcRegisterInfo *TRI =
-      getTargetMachine().getSubtarget<SparcSubtarget>().getRegisterInfo();
+  const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
   const uint32_t *Mask =
       ((hasReturnsTwice) ? TRI->getRTCallPreservedMask(CLI.CallConv)
-                         : TRI->getCallPreservedMask(CLI.CallConv));
+                         : TRI->getCallPreservedMask(DAG.getMachineFunction(),
+                                                     CLI.CallConv));
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
 
@@ -1246,8 +1248,8 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
   InGlue = Chain.getValue(1);
 
   // Revert the stack pointer immediately after the call.
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
-                             DAG.getIntPtrConstant(0, true), InGlue, DL);
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
+                             DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
   InGlue = Chain.getValue(1);
 
   // Now extract the return values. This is more or less the same as
@@ -1288,7 +1290,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
     // Get the high bits for i32 struct elements.
     if (VA.getValVT() == MVT::i32 && VA.needsCustom())
       RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
-                       DAG.getConstant(32, MVT::i32));
+                       DAG.getConstant(32, DL, MVT::i32));
 
     // The callee promoted the return value, so insert an Assert?ext SDNode so
     // we won't promote the value again in this function.
@@ -1365,10 +1367,9 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
   }
 }
 
-SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
-  : TargetLowering(TM) {
-  Subtarget = &TM.getSubtarget<SparcSubtarget>();
-
+SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
+                                         const SparcSubtarget &STI)
+    : TargetLowering(TM), Subtarget(&STI) {
   // Set up the register classes.
   addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
   addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
@@ -1672,12 +1673,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
 
   setMinFunctionAlignment(2);
 
-  computeRegisterProperties();
+  computeRegisterProperties(Subtarget->getRegisterInfo());
 }
 
 const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default: return nullptr;
+  switch ((SPISD::NodeType)Opcode) {
+  case SPISD::FIRST_NUMBER: break;
   case SPISD::CMPICC:     return "SPISD::CMPICC";
   case SPISD::CMPFCC:     return "SPISD::CMPFCC";
   case SPISD::BRICC:      return "SPISD::BRICC";
@@ -1700,6 +1701,7 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case SPISD::TLS_LD:     return "SPISD::TLS_LD";
   case SPISD::TLS_CALL:   return "SPISD::TLS_CALL";
   }
+  return nullptr;
 }
 
 EVT SparcTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
@@ -1831,7 +1833,7 @@ SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
     // abs44.
     SDValue H44 = makeHiLoPair(Op, SparcMCExpr::VK_Sparc_H44,
                                SparcMCExpr::VK_Sparc_M44, DAG);
-    H44 = DAG.getNode(ISD::SHL, DL, VT, H44, DAG.getConstant(12, MVT::i32));
+    H44 = DAG.getNode(ISD::SHL, DL, VT, H44, DAG.getConstant(12, DL, MVT::i32));
     SDValue L44 = withTargetFlags(Op, SparcMCExpr::VK_Sparc_L44, DAG);
     L44 = DAG.getNode(SPISD::Lo, DL, VT, L44);
     return DAG.getNode(ISD::ADD, DL, VT, H44, L44);
@@ -1840,7 +1842,7 @@ SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
     // abs64.
     SDValue Hi = makeHiLoPair(Op, SparcMCExpr::VK_Sparc_HH,
                               SparcMCExpr::VK_Sparc_HM, DAG);
-    Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, DAG.getConstant(32, MVT::i32));
+    Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, DAG.getConstant(32, DL, MVT::i32));
     SDValue Lo = makeHiLoPair(Op, SparcMCExpr::VK_Sparc_HI,
                               SparcMCExpr::VK_Sparc_LO, DAG);
     return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
@@ -1895,7 +1897,7 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
     SDValue Chain = DAG.getEntryNode();
     SDValue InFlag;
 
-    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(1, true), DL);
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(1, DL, true), DL);
     Chain = DAG.getCopyToReg(Chain, DL, SP::O0, Argument, InFlag);
     InFlag = Chain.getValue(1);
     SDValue Callee = DAG.getTargetExternalSymbol("__tls_get_addr", PtrVT);
@@ -1907,17 +1909,15 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
     Ops.push_back(Callee);
     Ops.push_back(Symbol);
     Ops.push_back(DAG.getRegister(SP::O0, PtrVT));
-    const uint32_t *Mask = getTargetMachine()
-                               .getSubtargetImpl()
-                               ->getRegisterInfo()
-                               ->getCallPreservedMask(CallingConv::C);
+    const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
+        DAG.getMachineFunction(), CallingConv::C);
     assert(Mask && "Missing call preserved mask for calling convention");
     Ops.push_back(DAG.getRegisterMask(Mask));
     Ops.push_back(InFlag);
     Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, Ops);
     InFlag = Chain.getValue(1);
-    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(1, true),
-                               DAG.getIntPtrConstant(0, true), InFlag, DL);
+    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(1, DL, true),
+                               DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
     InFlag = Chain.getValue(1);
     SDValue Ret = DAG.getCopyFromReg(Chain, DL, SP::O0, PtrVT, InFlag);
 
@@ -2100,54 +2100,54 @@ SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
 
   switch(SPCC) {
   default: {
-    SDValue RHS = DAG.getTargetConstant(0, Result.getValueType());
+    SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType());
     SPCC = SPCC::ICC_NE;
     return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
   }
   case SPCC::FCC_UL : {
-    SDValue Mask   = DAG.getTargetConstant(1, Result.getValueType());
+    SDValue Mask   = DAG.getTargetConstant(1, DL, Result.getValueType());
     Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
-    SDValue RHS    = DAG.getTargetConstant(0, Result.getValueType());
+    SDValue RHS    = DAG.getTargetConstant(0, DL, Result.getValueType());
     SPCC = SPCC::ICC_NE;
     return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
   }
   case SPCC::FCC_ULE: {
-    SDValue RHS = DAG.getTargetConstant(2, Result.getValueType());
+    SDValue RHS = DAG.getTargetConstant(2, DL, Result.getValueType());
     SPCC = SPCC::ICC_NE;
     return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
   }
   case SPCC::FCC_UG :  {
-    SDValue RHS = DAG.getTargetConstant(1, Result.getValueType());
+    SDValue RHS = DAG.getTargetConstant(1, DL, Result.getValueType());
     SPCC = SPCC::ICC_G;
     return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
   }
   case SPCC::FCC_UGE: {
-    SDValue RHS = DAG.getTargetConstant(1, Result.getValueType());
+    SDValue RHS = DAG.getTargetConstant(1, DL, Result.getValueType());
     SPCC = SPCC::ICC_NE;
     return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
   }
 
   case SPCC::FCC_U  :  {
-    SDValue RHS = DAG.getTargetConstant(3, Result.getValueType());
+    SDValue RHS = DAG.getTargetConstant(3, DL, Result.getValueType());
     SPCC = SPCC::ICC_E;
     return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
   }
   case SPCC::FCC_O  :  {
-    SDValue RHS = DAG.getTargetConstant(3, Result.getValueType());
+    SDValue RHS = DAG.getTargetConstant(3, DL, Result.getValueType());
     SPCC = SPCC::ICC_NE;
     return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
   }
   case SPCC::FCC_LG :  {
-    SDValue Mask   = DAG.getTargetConstant(3, Result.getValueType());
+    SDValue Mask   = DAG.getTargetConstant(3, DL, Result.getValueType());
     Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
-    SDValue RHS    = DAG.getTargetConstant(0, Result.getValueType());
+    SDValue RHS    = DAG.getTargetConstant(0, DL, Result.getValueType());
     SPCC = SPCC::ICC_NE;
     return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
   }
   case SPCC::FCC_UE : {
-    SDValue Mask   = DAG.getTargetConstant(3, Result.getValueType());
+    SDValue Mask   = DAG.getTargetConstant(3, DL, Result.getValueType());
     Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
-    SDValue RHS    = DAG.getTargetConstant(0, Result.getValueType());
+    SDValue RHS    = DAG.getTargetConstant(0, DL, Result.getValueType());
     SPCC = SPCC::ICC_E;
     return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
   }
@@ -2319,7 +2319,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
     }
   }
   return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest,
-                     DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+                     DAG.getConstant(SPCC, dl, MVT::i32), CompareFlag);
 }
 
 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
@@ -2355,7 +2355,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
     }
   }
   return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
-                     DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+                     DAG.getConstant(SPCC, dl, MVT::i32), CompareFlag);
 }
 
 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
@@ -2372,7 +2372,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
   SDValue Offset =
     DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(),
                 DAG.getRegister(SP::I6, TLI.getPointerTy()),
-                DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset()));
+                DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
                       MachinePointerInfo(SV), false, false, 0);
@@ -2390,7 +2390,8 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
                                MachinePointerInfo(SV), false, false, false, 0);
   // Increment the pointer, VAList, to the next vaarg.
   SDValue NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
-                                DAG.getIntPtrConstant(VT.getSizeInBits()/8));
+                                DAG.getIntPtrConstant(VT.getSizeInBits()/8,
+                                                      DL));
   // Store the incremented VAList to the legalized pointer.
   InChain = DAG.getStore(VAList.getValue(1), DL, NextPtr,
                          VAListPtr, MachinePointerInfo(SV), false, false, 0);
@@ -2419,7 +2420,7 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
   regSpillArea += Subtarget->getStackPointerBias();
 
   SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP,
-                               DAG.getConstant(regSpillArea, VT));
+                               DAG.getConstant(regSpillArea, dl, VT));
   SDValue Ops[2] = { NewVal, Chain };
   return DAG.getMergeValues(Ops, dl);
 }
@@ -2448,7 +2449,7 @@ static SDValue getFRAMEADDR(uint64_t depth, SDValue Op, SelectionDAG &DAG,
     FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
     if (Subtarget->is64Bit())
       FrameAddr = DAG.getNode(ISD::ADD, dl, VT, FrameAddr,
-                              DAG.getIntPtrConstant(stackBias));
+                              DAG.getIntPtrConstant(stackBias, dl));
     return FrameAddr;
   }
 
@@ -2460,13 +2461,13 @@ static SDValue getFRAMEADDR(uint64_t depth, SDValue Op, SelectionDAG &DAG,
 
   while (depth--) {
     SDValue Ptr = DAG.getNode(ISD::ADD, dl, VT, FrameAddr,
-                              DAG.getIntPtrConstant(Offset));
+                              DAG.getIntPtrConstant(Offset, dl));
     FrameAddr = DAG.getLoad(VT, dl, Chain, Ptr, MachinePointerInfo(),
                             false, false, false, 0);
   }
   if (Subtarget->is64Bit())
     FrameAddr = DAG.getNode(ISD::ADD, dl, VT, FrameAddr,
-                            DAG.getIntPtrConstant(stackBias));
+                            DAG.getIntPtrConstant(stackBias, dl));
   return FrameAddr;
 }
 
@@ -2509,7 +2510,7 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
   SDValue Ptr = DAG.getNode(ISD::ADD,
                             dl, VT,
                             FrameAddr,
-                            DAG.getIntPtrConstant(Offset));
+                            DAG.getIntPtrConstant(Offset, dl));
   RetAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), Ptr,
                         MachinePointerInfo(), false, false, false, 0);
 
@@ -2565,7 +2566,7 @@ static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG)
   EVT addrVT = LdNode->getBasePtr().getValueType();
   SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT,
                               LdNode->getBasePtr(),
-                              DAG.getConstant(8, addrVT));
+                              DAG.getConstant(8, dl, addrVT));
   SDValue Lo64 = DAG.getLoad(MVT::f64,
                              dl,
                              LdNode->getChain(),
@@ -2573,8 +2574,8 @@ static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG)
                              LdNode->getPointerInfo(),
                              false, false, false, alignment);
 
-  SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, MVT::i32);
-  SDValue SubRegOdd  = DAG.getTargetConstant(SP::sub_odd64, MVT::i32);
+  SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, dl, MVT::i32);
+  SDValue SubRegOdd  = DAG.getTargetConstant(SP::sub_odd64, dl, MVT::i32);
 
   SDNode *InFP128 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                        dl, MVT::f128);
@@ -2601,8 +2602,8 @@ static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
   StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
   assert(StNode && StNode->getOffset().getOpcode() == ISD::UNDEF
          && "Unexpected node type");
-  SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, MVT::i32);
-  SDValue SubRegOdd  = DAG.getTargetConstant(SP::sub_odd64, MVT::i32);
+  SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, dl, MVT::i32);
+  SDValue SubRegOdd  = DAG.getTargetConstant(SP::sub_odd64, dl, MVT::i32);
 
   SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                     dl,
@@ -2629,7 +2630,7 @@ static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
   EVT addrVT = StNode->getBasePtr().getValueType();
   SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT,
                               StNode->getBasePtr(),
-                              DAG.getConstant(8, addrVT));
+                              DAG.getConstant(8, dl, addrVT));
   OutChains[1] = DAG.getStore(StNode->getChain(),
                              dl,
                              SDValue(Lo64, 0),
@@ -2680,13 +2681,13 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
   SDValue Src1 = Op.getOperand(0);
   SDValue Src1Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src1);
   SDValue Src1Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Src1,
-                               DAG.getConstant(32, MVT::i64));
+                               DAG.getConstant(32, dl, MVT::i64));
   Src1Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src1Hi);
 
   SDValue Src2 = Op.getOperand(1);
   SDValue Src2Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src2);
   SDValue Src2Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Src2,
-                               DAG.getConstant(32, MVT::i64));
+                               DAG.getConstant(32, dl, MVT::i64));
   Src2Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src2Hi);
 
 
@@ -2713,7 +2714,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
   Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Lo);
   Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Hi);
   Hi = DAG.getNode(ISD::SHL, dl, MVT::i64, Hi,
-                   DAG.getConstant(32, MVT::i64));
+                   DAG.getConstant(32, dl, MVT::i64));
 
   SDValue Dst = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, Lo);
   SDValue Ops[2] = { Dst, Carry };
@@ -2737,7 +2738,7 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
   if (LHS.getValueType() != VT)
     return Op;
 
-  SDValue ShiftAmt = DAG.getConstant(63, VT);
+  SDValue ShiftAmt = DAG.getConstant(63, dl, VT);
 
   SDValue RHS = Op.getOperand(1);
   SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, ShiftAmt);
@@ -2748,14 +2749,14 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
                                       RTLIB::MUL_I128, WideVT,
                                       Args, 4, isSigned, dl).first;
   SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
-                                   MulResult, DAG.getIntPtrConstant(0));
+                                   MulResult, DAG.getIntPtrConstant(0, dl));
   SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
-                                MulResult, DAG.getIntPtrConstant(1));
+                                MulResult, DAG.getIntPtrConstant(1, dl));
   if (isSigned) {
     SDValue Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
     TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, Tmp1, ISD::SETNE);
   } else {
-    TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, DAG.getConstant(0, VT),
+    TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, DAG.getConstant(0, dl, VT),
                            ISD::SETNE);
   }
   // MulResult is a node with an illegal type. Because such things are not
@@ -2906,8 +2907,7 @@ MachineBasicBlock*
 SparcTargetLowering::expandSelectCC(MachineInstr *MI,
                                     MachineBasicBlock *BB,
                                     unsigned BROpcode) const {
-  const TargetInstrInfo &TII =
-      *getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
 
@@ -2968,8 +2968,7 @@ SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
                                      MachineBasicBlock *MBB,
                                      unsigned Opcode,
                                      unsigned CondCode) const {
-  const TargetInstrInfo &TII =
-      *getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
 
@@ -3123,7 +3122,8 @@ LowerAsmOperandForConstraint(SDValue Op,
   case 'I':
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (isInt<13>(C->getSExtValue())) {
-        Result = DAG.getTargetConstant(C->getSExtValue(), Op.getValueType());
+        Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
+                                       Op.getValueType());
         break;
       }
       return;
@@ -3137,8 +3137,9 @@ LowerAsmOperandForConstraint(SDValue Op,
   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
-std::pair<unsigned, const TargetRegisterClass*>
-SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+std::pair<unsigned, const TargetRegisterClass *>
+SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                                  const std::string &Constraint,
                                                   MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
@@ -3163,11 +3164,12 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
       char regIdx = '0' + (intVal % 8);
       char tmp[] = { '{', regType, regIdx, '}', 0 };
       std::string newConstraint = std::string(tmp);
-      return TargetLowering::getRegForInlineAsmConstraint(newConstraint, VT);
+      return TargetLowering::getRegForInlineAsmConstraint(TRI, newConstraint,
+                                                          VT);
     }
   }
 
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
 
 bool
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
index a62d569..b6bc3d2 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -22,7 +22,7 @@ namespace llvm {
   class SparcSubtarget;
 
   namespace SPISD {
-    enum {
+    enum NodeType : unsigned {
       FIRST_NUMBER = ISD::BUILTIN_OP_END,
       CMPICC,      // Compare two GPR operands, set icc+xcc.
       CMPFCC,      // Compare two FP operands, set fcc.
@@ -54,7 +54,7 @@ namespace llvm {
   class SparcTargetLowering : public TargetLowering {
     const SparcSubtarget *Subtarget;
   public:
-    SparcTargetLowering(TargetMachine &TM);
+    SparcTargetLowering(TargetMachine &TM, const SparcSubtarget &STI);
     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
     /// computeKnownBitsForTargetNode - Determine which of the bits specified
@@ -80,8 +80,10 @@ namespace llvm {
                                       std::string &Constraint,
                                       std::vector<SDValue> &Ops,
                                       SelectionDAG &DAG) const override;
-    std::pair<unsigned, const TargetRegisterClass*>
-    getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const override;
+    std::pair<unsigned, const TargetRegisterClass *>
+    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                 const std::string &Constraint,
+                                 MVT VT) const override;
 
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
     MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
index 54d8240..419e8cc 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -63,7 +63,7 @@ defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, I64Regs>;
 
 // The ALU instructions want their simm13 operands as i32 immediates.
 def as_i32imm : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32);
+  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
 }]>;
 def : Pat<(i64 simm13:$val), (ORri (i64 G0), (as_i32imm $val))>;
 def : Pat<(i64 SETHIimm:$val), (SETHIi (HI22 $val))>;
@@ -83,11 +83,12 @@ def nimm33 : PatLeaf<(imm), [{
 // Bits 10-31 inverted. Same as assembler's %hix.
 def HIX22 : SDNodeXForm<imm, [{
   uint64_t Val = (~N->getZExtValue() >> 10) & ((1u << 22) - 1);
-  return CurDAG->getTargetConstant(Val, MVT::i32);
+  return CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
 }]>;
 // Bits 0-9 with ones in bits 10-31. Same as assembler's %lox.
 def LOX10 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(~(~N->getZExtValue() & 0x3ff), MVT::i32);
+  return CurDAG->getTargetConstant(~(~N->getZExtValue() & 0x3ff), SDLoc(N),
+                                   MVT::i32);
 }]>;
 def : Pat<(i64 nimm33:$val), (XORri (SETHIi (HIX22 $val)), (LOX10 $val))>,
       Requires<[Is64Bit]>;
@@ -121,12 +122,12 @@ def : Pat<(i64 nimm33:$val), (XORri (SETHIi (HIX22 $val)), (LOX10 $val))>,
 // Bits 42-63, same as assembler's %hh.
 def HH22 : SDNodeXForm<imm, [{
   uint64_t Val = (N->getZExtValue() >> 42) & ((1u << 22) - 1);
-  return CurDAG->getTargetConstant(Val, MVT::i32);
+  return CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
 }]>;
 // Bits 32-41, same as assembler's %hm.
 def HM10 : SDNodeXForm<imm, [{
   uint64_t Val = (N->getZExtValue() >> 32) & ((1u << 10) - 1);
-  return CurDAG->getTargetConstant(Val, MVT::i32);
+  return CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
 }]>;
 def : Pat<(i64 imm:$val),
           (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i32 32)),
@@ -485,8 +486,8 @@ def SETHIXi : F2_1<0b100,
 }
 
 // ATOMICS.
-let Predicates = [Is64Bit], Constraints = "$swap = $rd" in {
-  def CASXrr: F3_1_asi<3, 0b111110, 0b10000000,
+let Predicates = [Is64Bit], Constraints = "$swap = $rd", asi = 0b10000000 in {
+  def CASXrr: F3_1_asi<3, 0b111110,
                 (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2,
                                      I64Regs:$swap),
                  "casx [$rs1], $rs2, $rd",
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td b/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
index d36f67b..670e9e9 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
@@ -247,7 +247,9 @@ multiclass fp_cond_alias<string cond, int condVal> {
 defm : int_cond_alias<"a",    0b1000>;
 defm : int_cond_alias<"n",    0b0000>;
 defm : int_cond_alias<"ne",   0b1001>;
+defm : int_cond_alias<"nz",   0b1001>; // same as ne
 defm : int_cond_alias<"e",    0b0001>;
+defm : int_cond_alias<"z",    0b0001>; // same as e
 defm : int_cond_alias<"g",    0b1010>;
 defm : int_cond_alias<"le",   0b0010>;
 defm : int_cond_alias<"ge",   0b1011>;
@@ -255,7 +257,9 @@ defm : int_cond_alias<"l",    0b0011>;
 defm : int_cond_alias<"gu",   0b1100>;
 defm : int_cond_alias<"leu",  0b0100>;
 defm : int_cond_alias<"cc",   0b1101>;
+defm : int_cond_alias<"geu",  0b1101>; // same as cc
 defm : int_cond_alias<"cs",   0b0101>;
+defm : int_cond_alias<"lu",   0b0101>; // same as cs
 defm : int_cond_alias<"pos",  0b1110>;
 defm : int_cond_alias<"neg",  0b0110>;
 defm : int_cond_alias<"vc",   0b1111>;
@@ -270,7 +274,9 @@ defm : fp_cond_alias<"l",     0b0100>;
 defm : fp_cond_alias<"ul",    0b0011>;
 defm : fp_cond_alias<"lg",    0b0010>;
 defm : fp_cond_alias<"ne",    0b0001>;
+defm : fp_cond_alias<"nz",    0b0001>; // same as ne
 defm : fp_cond_alias<"e",     0b1001>;
+defm : fp_cond_alias<"z",     0b1001>; // same as e
 defm : fp_cond_alias<"ue",    0b1010>;
 defm : fp_cond_alias<"ge",    0b1011>;
 defm : fp_cond_alias<"uge",   0b1100>;
@@ -300,6 +306,11 @@ def : InstAlias<"mov $rs2, $rd", (ORrr IntRegs:$rd, G0, IntRegs:$rs2)>;
 // mov simm13, rd -> or %g0, simm13, rd
 def : InstAlias<"mov $simm13, $rd", (ORri IntRegs:$rd, G0, i32imm:$simm13)>;
 
+// set value, rd
+// (turns into a sequence of sethi+or, depending on the value)
+// def : InstAlias<"set $val, $rd", (ORri IntRegs:$rd, (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
+def SET : AsmPseudoInst<(outs IntRegs:$rd), (ins i32imm:$val), "set $val, $rd">;
+
 // restore -> restore %g0, %g0, %g0
 def : InstAlias<"restore", (RESTORErr G0, G0, G0)>;
 
@@ -323,3 +334,4 @@ def : InstAlias<"fcmped $rs1, $rs2", (V9FCMPED FCC0, DFPRegs:$rs1,
 def : InstAlias<"fcmpeq $rs1, $rs2", (V9FCMPEQ FCC0, QFPRegs:$rs1,
                                                      QFPRegs:$rs2)>,
                 Requires<[HasHardQuad]>;
+
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
index 3b5e238..74ccf55 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
@@ -113,8 +113,9 @@ class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
 
 // Specific F3 classes: SparcV8 manual, page 44
 //
-class F3_1_asi<bits<2> opVal, bits<6> op3val, bits<8> asi, dag outs, dag ins,
+class F3_1_asi<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
            string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bits<8> asi;
   bits<5> rs2;
 
   let op         = opVal;
@@ -126,8 +127,10 @@ class F3_1_asi<bits<2> opVal, bits<6> op3val, bits<8> asi, dag outs, dag ins,
 }
 
 class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins, string asmstr,
-       list<dag> pattern> : F3_1_asi<opVal, op3val, 0, outs, ins,
-                                                     asmstr, pattern>;
+       list<dag> pattern> : F3_1_asi<opVal, op3val, outs, ins,
+                                                     asmstr, pattern> {
+  let asi = 0;
+}
 
 class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
            string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
@@ -328,3 +331,11 @@ class TRAPSPri<bits<6> op3Val, dag outs, dag ins, string asmstr,
    let Inst{10-8} = 0;
    let Inst{7-0}  = imm;
 }
+
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class AsmPseudoInst<dag outs, dag ins, string asm>
+  : InstSP<outs, ins, asm, []> {
+  let isPseudo = 1;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 8b2e6bc..4b70f16 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -33,9 +33,8 @@ using namespace llvm;
 void SparcInstrInfo::anchor() {}
 
 SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
-  : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
-    RI(ST), Subtarget(ST) {
-}
+    : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), RI(),
+      Subtarget(ST) {}
 
 /// isLoadFromStackSlot - If the specified machine instruction is a direct
 /// load from a stack slot, return the virtual or physical register number of
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
index fe93ed7..6e08418 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -22,6 +22,8 @@
 
 namespace llvm {
 
+class SparcSubtarget;
+
 /// SPII - This namespace holds all of the target specific flags that
 /// instruction info tracks.
 ///
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
index c320239..b1f795b 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -22,38 +22,38 @@ include "SparcInstrFormats.td"
 //===----------------------------------------------------------------------===//
 
 // True when generating 32-bit code.
-def Is32Bit : Predicate<"!Subtarget.is64Bit()">;
+def Is32Bit : Predicate<"!Subtarget->is64Bit()">;
 
 // True when generating 64-bit code. This also implies HasV9.
-def Is64Bit : Predicate<"Subtarget.is64Bit()">;
+def Is64Bit : Predicate<"Subtarget->is64Bit()">;
 
 // HasV9 - This predicate is true when the target processor supports V9
 // instructions.  Note that the machine may be running in 32-bit mode.
-def HasV9   : Predicate<"Subtarget.isV9()">,
+def HasV9   : Predicate<"Subtarget->isV9()">,
               AssemblerPredicate<"FeatureV9">;
 
 // HasNoV9 - This predicate is true when the target doesn't have V9
 // instructions.  Use of this is just a hack for the isel not having proper
 // costs for V8 instructions that are more expensive than their V9 ones.
-def HasNoV9 : Predicate<"!Subtarget.isV9()">;
+def HasNoV9 : Predicate<"!Subtarget->isV9()">;
 
 // HasVIS - This is true when the target processor has VIS extensions.
-def HasVIS : Predicate<"Subtarget.isVIS()">,
+def HasVIS : Predicate<"Subtarget->isVIS()">,
              AssemblerPredicate<"FeatureVIS">;
-def HasVIS2 : Predicate<"Subtarget.isVIS2()">,
+def HasVIS2 : Predicate<"Subtarget->isVIS2()">,
              AssemblerPredicate<"FeatureVIS2">;
-def HasVIS3 : Predicate<"Subtarget.isVIS3()">,
+def HasVIS3 : Predicate<"Subtarget->isVIS3()">,
              AssemblerPredicate<"FeatureVIS3">;
 
 // HasHardQuad - This is true when the target processor supports quad floating
 // point instructions.
-def HasHardQuad : Predicate<"Subtarget.hasHardQuad()">;
+def HasHardQuad : Predicate<"Subtarget->hasHardQuad()">;
 
 // UseDeprecatedInsts - This predicate is true when the target processor is a
 // V8, or when it is V9 but the V8 deprecated instructions are efficient enough
 // to use when appropriate.  In either of these cases, the instruction selector
 // will pick deprecated instructions.
-def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">;
+def UseDeprecatedInsts : Predicate<"Subtarget->useDeprecatedV8Instructions()">;
 
 //===----------------------------------------------------------------------===//
 // Instruction Pattern Stuff
@@ -64,13 +64,14 @@ def simm11  : PatLeaf<(imm), [{ return isInt<11>(N->getSExtValue()); }]>;
 def simm13  : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>;
 
 def LO10 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023,
+  return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023, SDLoc(N),
                                    MVT::i32);
 }]>;
 
 def HI22 : SDNodeXForm<imm, [{
   // Transformation function: shift the immediate value down into the low bits.
-  return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 10, MVT::i32);
+  return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 10, SDLoc(N),
+                                   MVT::i32);
 }]>;
 
 def SETHIimm : PatLeaf<(imm), [{
@@ -282,6 +283,17 @@ multiclass Load<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
                  [(set Ty:$dst, (OpNode ADDRri:$addr))]>;
 }
 
+// LoadA multiclass - As above, but also define alternate address space variant
+multiclass LoadA<string OpcStr, bits<6> Op3Val, bits<6> LoadAOp3Val,
+                 SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
+             Load<OpcStr, Op3Val, OpNode, RC, Ty> {
+  // TODO: The LD*Arr instructions are currently asm only; hooking up
+  // CodeGen's address spaces to use these is a future task.
+  def Arr  : F3_1_asi<3, LoadAOp3Val, (outs RC:$dst), (ins MEMrr:$addr, i8imm:$asi),
+                !strconcat(OpcStr, "a [$addr] $asi, $dst"),
+                []>;
+}
+
 // Store multiclass - Define both Reg+Reg/Reg+Imm patterns in one shot.
 multiclass Store<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
            RegisterClass RC, ValueType Ty> {
@@ -295,6 +307,16 @@ multiclass Store<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
                  [(OpNode Ty:$rd, ADDRri:$addr)]>;
 }
 
+multiclass StoreA<string OpcStr, bits<6> Op3Val, bits<6> StoreAOp3Val,
+                  SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
+             Store<OpcStr, Op3Val, OpNode, RC, Ty> {
+  // TODO: The ST*Arr instructions are currently asm only; hooking up
+  // CodeGen's address spaces to use these is a future task.
+  def Arr  : F3_1_asi<3, StoreAOp3Val, (outs), (ins MEMrr:$addr, RC:$rd, i8imm:$asi),
+                  !strconcat(OpcStr, "a $rd, [$addr] $asi"),
+                  []>;
+}
+
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
@@ -416,11 +438,11 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot = 1,
 
 // Section B.1 - Load Integer Instructions, p. 90
 let DecoderMethod = "DecodeLoadInt" in {
-  defm LDSB : Load<"ldsb", 0b001001, sextloadi8,  IntRegs, i32>;
-  defm LDSH : Load<"ldsh", 0b001010, sextloadi16, IntRegs, i32>;
-  defm LDUB : Load<"ldub", 0b000001, zextloadi8,  IntRegs, i32>;
-  defm LDUH : Load<"lduh", 0b000010, zextloadi16, IntRegs, i32>;
-  defm LD   : Load<"ld",   0b000000, load,        IntRegs, i32>;
+  defm LDSB : LoadA<"ldsb", 0b001001, 0b011001, sextloadi8,  IntRegs, i32>;
+  defm LDSH : LoadA<"ldsh", 0b001010, 0b011010, sextloadi16, IntRegs, i32>;
+  defm LDUB : LoadA<"ldub", 0b000001, 0b010001, zextloadi8,  IntRegs, i32>;
+  defm LDUH : LoadA<"lduh", 0b000010, 0b010010, zextloadi16, IntRegs, i32>;
+  defm LD   : LoadA<"ld",   0b000000, 0b010000, load,        IntRegs, i32>;
 }
 
 // Section B.2 - Load Floating-point Instructions, p. 92
@@ -434,9 +456,9 @@ let DecoderMethod = "DecodeLoadQFP" in
 
 // Section B.4 - Store Integer Instructions, p. 95
 let DecoderMethod = "DecodeStoreInt" in {
-  defm STB   : Store<"stb", 0b000101, truncstorei8,  IntRegs, i32>;
-  defm STH   : Store<"sth", 0b000110, truncstorei16, IntRegs, i32>;
-  defm ST    : Store<"st",  0b000100, store,         IntRegs, i32>;
+  defm STB   : StoreA<"stb", 0b000101, 0b010101, truncstorei8,  IntRegs, i32>;
+  defm STH   : StoreA<"sth", 0b000110, 0b010110, truncstorei16, IntRegs, i32>;
+  defm ST    : StoreA<"st",  0b000100, 0b010100, store,         IntRegs, i32>;
 }
 
 // Section B.5 - Store Floating-point Instructions, p. 97
@@ -704,20 +726,67 @@ let Uses = [O6],
 }
 
 // Section B.28 - Read State Register Instructions
-let Uses = [Y], rs1 = 0, rs2 = 0 in
-  def RDY : F3_1<2, 0b101000,
-                 (outs IntRegs:$dst), (ins),
-                 "rd %y, $dst", []>;
+let rs2 = 0 in
+  def RDASR : F3_1<2, 0b101000,
+                 (outs IntRegs:$rd), (ins ASRRegs:$rs1),
+                 "rd $rs1, $rd", []>;
+
+// PSR, WIM, and TBR don't exist on the SparcV9, only the V8.
+let Predicates = [HasNoV9] in {
+  let rs2 = 0, rs1 = 0, Uses=[PSR] in
+    def RDPSR : F3_1<2, 0b101001,
+		     (outs IntRegs:$rd), (ins),
+		     "rd %psr, $rd", []>;
+
+  let rs2 = 0, rs1 = 0, Uses=[WIM] in
+    def RDWIM : F3_1<2, 0b101010,
+		     (outs IntRegs:$rd), (ins),
+		     "rd %wim, $rd", []>;
+
+  let rs2 = 0, rs1 = 0, Uses=[TBR] in
+    def RDTBR : F3_1<2, 0b101011,
+		     (outs IntRegs:$rd), (ins),
+		     "rd %tbr, $rd", []>;
+}
 
 // Section B.29 - Write State Register Instructions
-let Defs = [Y], rd = 0 in {
-  def WRYrr : F3_1<2, 0b110000,
-                   (outs), (ins IntRegs:$rs1, IntRegs:$rs2),
-                   "wr $rs1, $rs2, %y", []>;
-  def WRYri : F3_2<2, 0b110000,
-                   (outs), (ins IntRegs:$rs1, simm13Op:$simm13),
-                   "wr $rs1, $simm13, %y", []>;
+def WRASRrr : F3_1<2, 0b110000,
+                 (outs ASRRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2),
+                 "wr $rs1, $rs2, $rd", []>;
+def WRASRri : F3_2<2, 0b110000,
+                 (outs ASRRegs:$rd), (ins IntRegs:$rs1, simm13Op:$simm13),
+                 "wr $rs1, $simm13, $rd", []>;
+
+// PSR, WIM, and TBR don't exist on the SparcV9, only the V8.
+let Predicates = [HasNoV9] in {
+  let Defs = [PSR], rd=0 in {
+    def WRPSRrr : F3_1<2, 0b110001,
+		     (outs), (ins IntRegs:$rs1, IntRegs:$rs2),
+		     "wr $rs1, $rs2, %psr", []>;
+    def WRPSRri : F3_2<2, 0b110001,
+		     (outs), (ins IntRegs:$rs1, simm13Op:$simm13),
+		     "wr $rs1, $simm13, %psr", []>;
+  }
+
+  let Defs = [WIM], rd=0 in {
+    def WRWIMrr : F3_1<2, 0b110010,
+		     (outs), (ins IntRegs:$rs1, IntRegs:$rs2),
+		     "wr $rs1, $rs2, %wim", []>;
+    def WRWIMri : F3_2<2, 0b110010,
+		     (outs), (ins IntRegs:$rs1, simm13Op:$simm13),
+		     "wr $rs1, $simm13, %wim", []>;
+  }
+
+  let Defs = [TBR], rd=0 in {
+    def WRTBRrr : F3_1<2, 0b110011,
+		     (outs), (ins IntRegs:$rs1, IntRegs:$rs2),
+		     "wr $rs1, $rs2, %tbr", []>;
+    def WRTBRri : F3_2<2, 0b110011,
+		     (outs), (ins IntRegs:$rs1, simm13Op:$simm13),
+		     "wr $rs1, $simm13, %tbr", []>;
+  }
 }
+
 // Convert Integer to Floating-point Instructions, p. 141
 def FITOS : F3_3u<2, 0b110100, 0b011000100,
                  (outs FPRegs:$rd), (ins FPRegs:$rs2),
@@ -1116,10 +1185,20 @@ let Constraints = "$val = $dst", DecoderMethod = "DecodeSWAP" in {
                  (outs IntRegs:$dst), (ins MEMri:$addr, IntRegs:$val),
                  "swap [$addr], $dst",
                  [(set i32:$dst, (atomic_swap_32 ADDRri:$addr, i32:$val))]>;
+  def SWAPArr : F3_1_asi<3, 0b011111,
+                 (outs IntRegs:$dst), (ins MEMrr:$addr, i8imm:$asi, IntRegs:$val),
+                 "swapa [$addr] $asi, $dst",
+                 [/*FIXME: pattern?*/]>;
 }
 
-let Predicates = [HasV9], Constraints = "$swap = $rd" in
-  def CASrr: F3_1_asi<3, 0b111100, 0b10000000,
+// TODO: Should add a CASArr variant. In fact, the CAS instruction,
+// unlike other instructions, only comes in a form which requires an
+// ASI be provided. The ASI value hardcoded here is ASI_PRIMARY, the
+// default unprivileged ASI for SparcV9.  (Also of note: some modern
+// SparcV8 implementations provide CASA as an extension, but require
+// the use of SparcV8's default ASI, 0xA ("User Data") instead.)
+let Predicates = [HasV9], Constraints = "$swap = $rd", asi = 0b10000000 in
+  def CASrr: F3_1_asi<3, 0b111100,
                 (outs IntRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2,
                                      IntRegs:$swap),
                  "cas [$rs1], $rs2, $rd",
diff --git a/contrib/llvm/lib/Target/Sparc/SparcMCInstLower.cpp b/contrib/llvm/lib/Target/Sparc/SparcMCInstLower.cpp
index 9e94d2c..9388d59 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcMCInstLower.cpp
@@ -63,7 +63,7 @@ static MCOperand LowerSymbolOperand(const MachineInstr *MI,
                                                          AP.OutContext);
   const SparcMCExpr *expr = SparcMCExpr::Create(Kind, MCSym,
                                                 AP.OutContext);
-  return MCOperand::CreateExpr(expr);
+  return MCOperand::createExpr(expr);
 }
 
 static MCOperand LowerOperand(const MachineInstr *MI,
@@ -74,10 +74,10 @@ static MCOperand LowerOperand(const MachineInstr *MI,
   case MachineOperand::MO_Register:
     if (MO.isImplicit())
       break;
-    return MCOperand::CreateReg(MO.getReg());
+    return MCOperand::createReg(MO.getReg());
 
   case MachineOperand::MO_Immediate:
-    return MCOperand::CreateImm(MO.getImm());
+    return MCOperand::createImm(MO.getImm());
 
   case MachineOperand::MO_MachineBasicBlock:
   case MachineOperand::MO_GlobalAddress:
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index 3cca98f..9667bc0 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -34,17 +34,16 @@ static cl::opt<bool>
 ReserveAppRegisters("sparc-reserve-app-registers", cl::Hidden, cl::init(false),
                     cl::desc("Reserve application registers (%g2-%g4)"));
 
-SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st)
-  : SparcGenRegisterInfo(SP::O7), Subtarget(st) {
-}
+SparcRegisterInfo::SparcRegisterInfo() : SparcGenRegisterInfo(SP::O7) {}
 
 const MCPhysReg*
 SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   return CSR_SaveList;
 }
 
-const uint32_t*
-SparcRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+SparcRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+                                        CallingConv::ID CC) const {
   return CSR_RegMask;
 }
 
@@ -55,6 +54,7 @@ SparcRegisterInfo::getRTCallPreservedMask(CallingConv::ID CC) const {
 
 BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
+  const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
   // FIXME: G1 reserved for now for large imm generation by frame code.
   Reserved.set(SP::G1);
 
@@ -89,6 +89,7 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 const TargetRegisterClass*
 SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF,
                                       unsigned Kind) const {
+  const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
   return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
 }
 
@@ -160,6 +161,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // Addressable stack objects are accessed using neg. offsets from %fp
   MachineFunction &MF = *MI.getParent()->getParent();
+  const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
   int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
                    MI.getOperand(FIOperandNum + 1).getImm() +
                    Subtarget.getStackPointerBias();
@@ -174,7 +176,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) {
     if (MI.getOpcode() == SP::STQFri) {
-      const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+      const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
       unsigned SrcReg = MI.getOperand(2).getReg();
       unsigned SrcEvenReg = getSubReg(SrcReg, SP::sub_even64);
       unsigned SrcOddReg  = getSubReg(SrcReg, SP::sub_odd64);
@@ -186,7 +188,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       MI.getOperand(2).setReg(SrcOddReg);
       Offset += 8;
     } else if (MI.getOpcode() == SP::LDQFri) {
-      const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+      const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
       unsigned DestReg     = MI.getOperand(0).getReg();
       unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64);
       unsigned DestOddReg  = getSubReg(DestReg, SP::sub_odd64);
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
index 63567b0..764a894 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -20,20 +20,13 @@
 #include "SparcGenRegisterInfo.inc"
 
 namespace llvm {
-
-class SparcSubtarget;
-class TargetInstrInfo;
-class Type;
-
 struct SparcRegisterInfo : public SparcGenRegisterInfo {
-  SparcSubtarget &Subtarget;
-
-  SparcRegisterInfo(SparcSubtarget &st);
+  SparcRegisterInfo();
 
   /// Code Generation virtual methods...
-  const MCPhysReg *
-  getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override;
-  const uint32_t* getCallPreservedMask(CallingConv::ID CC) const override;
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+  const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+                                       CallingConv::ID CC) const override;
 
   const uint32_t* getRTCallPreservedMask(CallingConv::ID CC) const;
 
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
index 2cadff1..e504da4 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
@@ -56,6 +56,43 @@ foreach I = 0-3 in
 
 // Y register
 def Y : SparcCtrlReg<0, "Y">, DwarfRegNum<[64]>;
+// Ancillary state registers (implementation defined)
+def ASR1 : SparcCtrlReg<1, "ASR1">;
+def ASR2 : SparcCtrlReg<2, "ASR2">;
+def ASR3 : SparcCtrlReg<3, "ASR3">;
+def ASR4 : SparcCtrlReg<4, "ASR4">;
+def ASR5 : SparcCtrlReg<5, "ASR5">;
+def ASR6 : SparcCtrlReg<6, "ASR6">;
+def ASR7 : SparcCtrlReg<7, "ASR7">;
+def ASR8 : SparcCtrlReg<8, "ASR8">;
+def ASR9 : SparcCtrlReg<9, "ASR9">;
+def ASR10 : SparcCtrlReg<10, "ASR10">;
+def ASR11 : SparcCtrlReg<11, "ASR11">;
+def ASR12 : SparcCtrlReg<12, "ASR12">;
+def ASR13 : SparcCtrlReg<13, "ASR13">;
+def ASR14 : SparcCtrlReg<14, "ASR14">;
+def ASR15 : SparcCtrlReg<15, "ASR15">;
+def ASR16 : SparcCtrlReg<16, "ASR16">;
+def ASR17 : SparcCtrlReg<17, "ASR17">;
+def ASR18 : SparcCtrlReg<18, "ASR18">;
+def ASR19 : SparcCtrlReg<19, "ASR19">;
+def ASR20 : SparcCtrlReg<20, "ASR20">;
+def ASR21 : SparcCtrlReg<21, "ASR21">;
+def ASR22 : SparcCtrlReg<22, "ASR22">;
+def ASR23 : SparcCtrlReg<23, "ASR23">;
+def ASR24 : SparcCtrlReg<24, "ASR24">;
+def ASR25 : SparcCtrlReg<25, "ASR25">;
+def ASR26 : SparcCtrlReg<26, "ASR26">;
+def ASR27 : SparcCtrlReg<27, "ASR27">;
+def ASR28 : SparcCtrlReg<28, "ASR28">;
+def ASR29 : SparcCtrlReg<29, "ASR29">;
+def ASR30 : SparcCtrlReg<30, "ASR30">;
+def ASR31 : SparcCtrlReg<31, "ASR31">;
+
+// Note that PSR, WIM, and TBR don't exist on the SparcV9, only the V8.
+def PSR : SparcCtrlReg<0, "PSR">;
+def WIM : SparcCtrlReg<0, "WIM">;
+def TBR : SparcCtrlReg<0, "TBR">;
 
 // Integer registers
 def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
@@ -209,3 +246,7 @@ def QFPRegs : RegisterClass<"SP", [f128], 128, (sequence "Q%u", 0, 15)>;
 
 // Floating point control register classes.
 def FCCRegs : RegisterClass<"SP", [i1], 1, (sequence "FCC%u", 0, 3)>;
+
+// Ancillary state registers
+def ASRRegs : RegisterClass<"SP", [i32], 32,
+                            (add Y, (sequence "ASR%u", 1, 31))>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
index a3a21d6..6818291 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -23,7 +23,7 @@ class SparcTargetMachine;
 class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
   explicit SparcSelectionDAGInfo(const DataLayout &DL);
-  ~SparcSelectionDAGInfo();
+  ~SparcSelectionDAGInfo() override;
 };
 
 }
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index eea0c8c..ce1105f 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -26,32 +26,6 @@ using namespace llvm;
 
 void SparcSubtarget::anchor() { }
 
-static std::string computeDataLayout(const SparcSubtarget &ST) {
-  // Sparc is big endian.
-  std::string Ret = "E-m:e";
-
-  // Some ABIs have 32bit pointers.
-  if (!ST.is64Bit())
-    Ret += "-p:32:32";
-
-  // Alignments for 64 bit integers.
-  Ret += "-i64:64";
-
-  // On SparcV9 128 floats are aligned to 128 bits, on others only to 64.
-  // On SparcV9 registers can hold 64 or 32 bits, on others only 32.
-  if (ST.is64Bit())
-    Ret += "-n32:64";
-  else
-    Ret += "-f128:64-n32";
-
-  if (ST.is64Bit())
-    Ret += "-S128";
-  else
-    Ret += "-S64";
-
-  return Ret;
-}
-
 SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                                 StringRef FS) {
   IsV9 = false;
@@ -79,8 +53,8 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
                                const std::string &FS, TargetMachine &TM,
                                bool is64Bit)
     : SparcGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit),
-      DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
-      InstrInfo(*this), TLInfo(TM), TSInfo(DL), FrameLowering(*this) {}
+      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+      TSInfo(*TM.getDataLayout()), FrameLowering(*this) {}
 
 int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
 
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
index d503b2b..e6cf460 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -37,7 +37,6 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
   bool Is64Bit;
   bool HasHardQuad;
   bool UsePopc;
-  const DataLayout DL;       // Calculates type size & alignment
   SparcInstrInfo InstrInfo;
   SparcTargetLowering TLInfo;
   SparcSelectionDAGInfo TSInfo;
@@ -60,7 +59,6 @@ public:
   const SparcSelectionDAGInfo *getSelectionDAGInfo() const override {
     return &TSInfo;
   }
-  const DataLayout *getDataLayout() const override { return &DL; }
 
   bool isV9() const { return IsV9; }
   bool isVIS() const { return IsVIS; }
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index 6dccddc..d43cd9e 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -14,7 +14,7 @@
 #include "SparcTargetObjectFile.h"
 #include "Sparc.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
@@ -22,6 +22,34 @@ extern "C" void LLVMInitializeSparcTarget() {
   // Register the target.
   RegisterTargetMachine<SparcV8TargetMachine> X(TheSparcTarget);
   RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target);
+  RegisterTargetMachine<SparcelTargetMachine> Z(TheSparcelTarget);
+}
+
+static std::string computeDataLayout(const Triple &T, bool is64Bit) {
+  // Sparc is typically big endian, but some are little.
+  std::string Ret = T.getArch() == Triple::sparcel ? "e" : "E";
+  Ret += "-m:e";
+
+  // Some ABIs have 32bit pointers.
+  if (!is64Bit)
+    Ret += "-p:32:32";
+
+  // Alignments for 64 bit integers.
+  Ret += "-i64:64";
+
+  // On SparcV9 128 floats are aligned to 128 bits, on others only to 64.
+  // On SparcV9 registers can hold 64 or 32 bits, on others only 32.
+  if (is64Bit)
+    Ret += "-n32:64";
+  else
+    Ret += "-f128:64-n32";
+
+  if (is64Bit)
+    Ret += "-S128";
+  else
+    Ret += "-S64";
+
+  return Ret;
 }
 
 /// SparcTargetMachine ctor - Create an ILP32 architecture model
@@ -30,11 +58,11 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
-                                       CodeGenOpt::Level OL,
-                                       bool is64bit)
-  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-    TLOF(make_unique<SparcELFTargetObjectFile>()),
-    Subtarget(TT, CPU, FS, *this, is64bit) {
+                                       CodeGenOpt::Level OL, bool is64bit)
+    : LLVMTargetMachine(T, computeDataLayout(Triple(TT), is64bit), TT, CPU, FS,
+                        Options, RM, CM, OL),
+      TLOF(make_unique<SparcELFTargetObjectFile>()),
+      Subtarget(TT, CPU, FS, *this, is64bit) {
   initAsmInfo();
 }
 
@@ -90,12 +118,18 @@ SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
 
 void SparcV9TargetMachine::anchor() { }
 
-SparcV9TargetMachine::SparcV9TargetMachine(const Target &T,
-                                           StringRef TT,  StringRef CPU,
-                                           StringRef FS,
+SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, StringRef TT,
+                                           StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
-                                           Reloc::Model RM,
-                                           CodeModel::Model CM,
+                                           Reloc::Model RM, CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
-  : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
-}
+    : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+
+void SparcelTargetMachine::anchor() {}
+
+SparcelTargetMachine::SparcelTargetMachine(const Target &T, StringRef TT,
+                                           StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM, CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+    : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
index 096e7c8..fd05b8c 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
@@ -30,7 +30,9 @@ public:
                      CodeGenOpt::Level OL, bool is64bit);
   ~SparcTargetMachine() override;
 
-  const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+  const SparcSubtarget *getSubtargetImpl(const Function &) const override {
+    return &Subtarget;
+  }
 
   // Pass Pipeline Configuration
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
@@ -56,9 +58,18 @@ public:
 class SparcV9TargetMachine : public SparcTargetMachine {
   virtual void anchor();
 public:
-  SparcV9TargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS,
-                       const TargetOptions &Options,
+  SparcV9TargetMachine(const Target &T, StringRef TT, StringRef CPU,
+                       StringRef FS, const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
+};
+
+class SparcelTargetMachine : public SparcTargetMachine {
+  virtual void anchor();
+
+public:
+  SparcelTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+                       StringRef FS, const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
 };
diff --git a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index 4eea163..ab1c6be 100644
--- a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -14,10 +14,13 @@ using namespace llvm;
 
 Target llvm::TheSparcTarget;
 Target llvm::TheSparcV9Target;
+Target llvm::TheSparcelTarget;
 
 extern "C" void LLVMInitializeSparcTargetInfo() {
-  RegisterTarget<Triple::sparc, /*HasJIT=*/ true>
-    X(TheSparcTarget, "sparc", "Sparc");
-  RegisterTarget<Triple::sparcv9, /*HasJIT=*/ true>
-    Y(TheSparcV9Target, "sparcv9", "Sparc V9");
+  RegisterTarget<Triple::sparc, /*HasJIT=*/true> X(TheSparcTarget, "sparc",
+                                                   "Sparc");
+  RegisterTarget<Triple::sparcv9, /*HasJIT=*/true> Y(TheSparcV9Target,
+                                                     "sparcv9", "Sparc V9");
+  RegisterTarget<Triple::sparcel, /*HasJIT=*/true> Z(TheSparcelTarget,
+                                                     "sparcel", "Sparc LE");
 }
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index cb528db..b721def 100644
--- a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -39,13 +39,17 @@ enum RegisterKind {
   ADDR64Reg,
   FP32Reg,
   FP64Reg,
-  FP128Reg
+  FP128Reg,
+  VR32Reg,
+  VR64Reg,
+  VR128Reg
 };
 
 enum MemoryKind {
   BDMem,
   BDXMem,
-  BDLMem
+  BDLMem,
+  BDVMem
 };
 
 class SystemZOperand : public MCParsedAsmOperand {
@@ -57,6 +61,7 @@ private:
     KindReg,
     KindAccessReg,
     KindImm,
+    KindImmTLS,
     KindMem
   };
 
@@ -84,34 +89,42 @@ private:
   };
 
   // Base + Disp + Index, where Base and Index are LLVM registers or 0.
-  // RegKind says what type the registers have (ADDR32Reg or ADDR64Reg).
-  // Length is the operand length for D(L,B)-style operands, otherwise
-  // it is null.
+  // MemKind says what type of memory this is and RegKind says what type
+  // the base register has (ADDR32Reg or ADDR64Reg).  Length is the operand
+  // length for D(L,B)-style operands, otherwise it is null.
   struct MemOp {
-    unsigned Base : 8;
-    unsigned Index : 8;
-    unsigned RegKind : 8;
-    unsigned Unused : 8;
+    unsigned Base : 12;
+    unsigned Index : 12;
+    unsigned MemKind : 4;
+    unsigned RegKind : 4;
     const MCExpr *Disp;
     const MCExpr *Length;
   };
 
+  // Imm is an immediate operand, and Sym is an optional TLS symbol
+  // for use with a __tls_get_offset marker relocation.
+  struct ImmTLSOp {
+    const MCExpr *Imm;
+    const MCExpr *Sym;
+  };
+
   union {
     TokenOp Token;
     RegOp Reg;
     unsigned AccessReg;
     const MCExpr *Imm;
+    ImmTLSOp ImmTLS;
     MemOp Mem;
   };
 
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediates when possible.  Null MCExpr = 0.
     if (!Expr)
-      Inst.addOperand(MCOperand::CreateImm(0));
+      Inst.addOperand(MCOperand::createImm(0));
     else if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
-      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+      Inst.addOperand(MCOperand::createImm(CE->getValue()));
     else
-      Inst.addOperand(MCOperand::CreateExpr(Expr));
+      Inst.addOperand(MCOperand::createExpr(Expr));
   }
 
 public:
@@ -149,10 +162,11 @@ public:
     return Op;
   }
   static std::unique_ptr<SystemZOperand>
-  createMem(RegisterKind RegKind, unsigned Base, const MCExpr *Disp,
-            unsigned Index, const MCExpr *Length, SMLoc StartLoc,
-            SMLoc EndLoc) {
+  createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base,
+            const MCExpr *Disp, unsigned Index, const MCExpr *Length,
+            SMLoc StartLoc, SMLoc EndLoc) {
     auto Op = make_unique<SystemZOperand>(KindMem, StartLoc, EndLoc);
+    Op->Mem.MemKind = MemKind;
     Op->Mem.RegKind = RegKind;
     Op->Mem.Base = Base;
     Op->Mem.Index = Index;
@@ -160,6 +174,14 @@ public:
     Op->Mem.Length = Length;
     return Op;
   }
+  static std::unique_ptr<SystemZOperand>
+  createImmTLS(const MCExpr *Imm, const MCExpr *Sym,
+               SMLoc StartLoc, SMLoc EndLoc) {
+    auto Op = make_unique<SystemZOperand>(KindImmTLS, StartLoc, EndLoc);
+    Op->ImmTLS.Imm = Imm;
+    Op->ImmTLS.Sym = Sym;
+    return Op;
+  }
 
   // Token operands
   bool isToken() const override {
@@ -200,24 +222,40 @@ public:
     return Imm;
   }
 
+  // Immediate operands with optional TLS symbol.
+  bool isImmTLS() const {
+    return Kind == KindImmTLS;
+  }
+
   // Memory operands.
   bool isMem() const override {
     return Kind == KindMem;
   }
-  bool isMem(RegisterKind RegKind, MemoryKind MemKind) const {
+  bool isMem(MemoryKind MemKind) const {
     return (Kind == KindMem &&
-            Mem.RegKind == RegKind &&
-            (MemKind == BDXMem || !Mem.Index) &&
-            (MemKind == BDLMem) == (Mem.Length != nullptr));
+            (Mem.MemKind == MemKind ||
+             // A BDMem can be treated as a BDXMem in which the index
+             // register field is 0.
+             (Mem.MemKind == BDMem && MemKind == BDXMem)));
+  }
+  bool isMem(MemoryKind MemKind, RegisterKind RegKind) const {
+    return isMem(MemKind) && Mem.RegKind == RegKind;
   }
-  bool isMemDisp12(RegisterKind RegKind, MemoryKind MemKind) const {
-    return isMem(RegKind, MemKind) && inRange(Mem.Disp, 0, 0xfff);
+  bool isMemDisp12(MemoryKind MemKind, RegisterKind RegKind) const {
+    return isMem(MemKind, RegKind) && inRange(Mem.Disp, 0, 0xfff);
   }
-  bool isMemDisp20(RegisterKind RegKind, MemoryKind MemKind) const {
-    return isMem(RegKind, MemKind) && inRange(Mem.Disp, -524288, 524287);
+  bool isMemDisp20(MemoryKind MemKind, RegisterKind RegKind) const {
+    return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287);
   }
   bool isMemDisp12Len8(RegisterKind RegKind) const {
-    return isMemDisp12(RegKind, BDLMem) && inRange(Mem.Length, 1, 0x100);
+    return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length, 1, 0x100);
+  }
+  void addBDVAddrOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands");
+    assert(isMem(BDVMem) && "Invalid operand type");
+    Inst.addOperand(MCOperand::createReg(Mem.Base));
+    addExpr(Inst, Mem.Disp);
+    Inst.addOperand(MCOperand::createReg(Mem.Index));
   }
 
   // Override MCParsedAsmOperand.
@@ -229,12 +267,12 @@ public:
   // to an instruction.
   void addRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands");
-    Inst.addOperand(MCOperand::CreateReg(getReg()));
+    Inst.addOperand(MCOperand::createReg(getReg()));
   }
   void addAccessRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands");
     assert(Kind == KindAccessReg && "Invalid operand type");
-    Inst.addOperand(MCOperand::CreateImm(AccessReg));
+    Inst.addOperand(MCOperand::createImm(AccessReg));
   }
   void addImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands");
@@ -242,24 +280,31 @@ public:
   }
   void addBDAddrOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands");
-    assert(Kind == KindMem && Mem.Index == 0 && "Invalid operand type");
-    Inst.addOperand(MCOperand::CreateReg(Mem.Base));
+    assert(isMem(BDMem) && "Invalid operand type");
+    Inst.addOperand(MCOperand::createReg(Mem.Base));
     addExpr(Inst, Mem.Disp);
   }
   void addBDXAddrOperands(MCInst &Inst, unsigned N) const {
     assert(N == 3 && "Invalid number of operands");
-    assert(Kind == KindMem && "Invalid operand type");
-    Inst.addOperand(MCOperand::CreateReg(Mem.Base));
+    assert(isMem(BDXMem) && "Invalid operand type");
+    Inst.addOperand(MCOperand::createReg(Mem.Base));
     addExpr(Inst, Mem.Disp);
-    Inst.addOperand(MCOperand::CreateReg(Mem.Index));
+    Inst.addOperand(MCOperand::createReg(Mem.Index));
   }
   void addBDLAddrOperands(MCInst &Inst, unsigned N) const {
     assert(N == 3 && "Invalid number of operands");
-    assert(Kind == KindMem && "Invalid operand type");
-    Inst.addOperand(MCOperand::CreateReg(Mem.Base));
+    assert(isMem(BDLMem) && "Invalid operand type");
+    Inst.addOperand(MCOperand::createReg(Mem.Base));
     addExpr(Inst, Mem.Disp);
     addExpr(Inst, Mem.Length);
   }
+  void addImmTLSOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands");
+    assert(Kind == KindImmTLS && "Invalid operand type");
+    addExpr(Inst, ImmTLS.Imm);
+    if (ImmTLS.Sym)
+      addExpr(Inst, ImmTLS.Sym);
+  }
 
   // Used by the TableGen code to check for particular operand types.
   bool isGR32() const { return isReg(GR32Reg); }
@@ -273,17 +318,26 @@ public:
   bool isFP32() const { return isReg(FP32Reg); }
   bool isFP64() const { return isReg(FP64Reg); }
   bool isFP128() const { return isReg(FP128Reg); }
-  bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, BDMem); }
-  bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, BDMem); }
-  bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDMem); }
-  bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDMem); }
-  bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDXMem); }
-  bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDXMem); }
+  bool isVR32() const { return isReg(VR32Reg); }
+  bool isVR64() const { return isReg(VR64Reg); }
+  bool isVF128() const { return false; }
+  bool isVR128() const { return isReg(VR128Reg); }
+  bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); }
+  bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); }
+  bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, ADDR64Reg); }
+  bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, ADDR64Reg); }
+  bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); }
+  bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); }
   bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); }
+  bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); }
+  bool isU1Imm() const { return isImm(0, 1); }
+  bool isU2Imm() const { return isImm(0, 3); }
+  bool isU3Imm() const { return isImm(0, 7); }
   bool isU4Imm() const { return isImm(0, 15); }
   bool isU6Imm() const { return isImm(0, 63); }
   bool isU8Imm() const { return isImm(0, 255); }
   bool isS8Imm() const { return isImm(-128, 127); }
+  bool isU12Imm() const { return isImm(0, 4095); }
   bool isU16Imm() const { return isImm(0, 65535); }
   bool isS16Imm() const { return isImm(-32768, 32767); }
   bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); }
@@ -300,6 +354,7 @@ private:
   enum RegisterGroup {
     RegGR,
     RegFP,
+    RegV,
     RegAccess
   };
   struct Register {
@@ -318,12 +373,15 @@ private:
                                      RegisterKind Kind);
 
   bool parseAddress(unsigned &Base, const MCExpr *&Disp,
-                    unsigned &Index, const MCExpr *&Length,
+                    unsigned &Index, bool &IsVector, const MCExpr *&Length,
                     const unsigned *Regs, RegisterKind RegKind);
 
   OperandMatchResultTy parseAddress(OperandVector &Operands,
-                                    const unsigned *Regs, RegisterKind RegKind,
-                                    MemoryKind MemKind);
+                                    MemoryKind MemKind, const unsigned *Regs,
+                                    RegisterKind RegKind);
+
+  OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal,
+                                  int64_t MaxVal, bool AllowTLS);
 
   bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
 
@@ -382,26 +440,45 @@ public:
   OperandMatchResultTy parseFP128(OperandVector &Operands) {
     return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg);
   }
+  OperandMatchResultTy parseVR32(OperandVector &Operands) {
+    return parseRegister(Operands, RegV, SystemZMC::VR32Regs, VR32Reg);
+  }
+  OperandMatchResultTy parseVR64(OperandVector &Operands) {
+    return parseRegister(Operands, RegV, SystemZMC::VR64Regs, VR64Reg);
+  }
+  OperandMatchResultTy parseVF128(OperandVector &Operands) {
+    llvm_unreachable("Shouldn't be used as an operand");
+  }
+  OperandMatchResultTy parseVR128(OperandVector &Operands) {
+    return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg);
+  }
   OperandMatchResultTy parseBDAddr32(OperandVector &Operands) {
-    return parseAddress(Operands, SystemZMC::GR32Regs, ADDR32Reg, BDMem);
+    return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, ADDR32Reg);
   }
   OperandMatchResultTy parseBDAddr64(OperandVector &Operands) {
-    return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDMem);
+    return parseAddress(Operands, BDMem, SystemZMC::GR64Regs, ADDR64Reg);
   }
   OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) {
-    return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDXMem);
+    return parseAddress(Operands, BDXMem, SystemZMC::GR64Regs, ADDR64Reg);
   }
   OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) {
-    return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDLMem);
+    return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, ADDR64Reg);
+  }
+  OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) {
+    return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, ADDR64Reg);
   }
   OperandMatchResultTy parseAccessReg(OperandVector &Operands);
-  OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal,
-                                  int64_t MaxVal);
   OperandMatchResultTy parsePCRel16(OperandVector &Operands) {
-    return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1);
+    return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, false);
   }
   OperandMatchResultTy parsePCRel32(OperandVector &Operands) {
-    return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1);
+    return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, false);
+  }
+  OperandMatchResultTy parsePCRelTLS16(OperandVector &Operands) {
+    return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, true);
+  }
+  OperandMatchResultTy parsePCRelTLS32(OperandVector &Operands) {
+    return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true);
   }
 };
 } // end anonymous namespace
@@ -443,6 +520,8 @@ bool SystemZAsmParser::parseRegister(Register &Reg) {
     Reg.Group = RegGR;
   else if (Prefix == 'f' && Reg.Num < 16)
     Reg.Group = RegFP;
+  else if (Prefix == 'v' && Reg.Num < 32)
+    Reg.Group = RegV;
   else if (Prefix == 'a' && Reg.Num < 16)
     Reg.Group = RegAccess;
   else
@@ -493,8 +572,8 @@ SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group,
 // Regs maps asm register numbers to LLVM register numbers and RegKind
 // says what kind of address register we're using (ADDR32Reg or ADDR64Reg).
 bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
-                                    unsigned &Index, const MCExpr *&Length,
-                                    const unsigned *Regs,
+                                    unsigned &Index, bool &IsVector,
+                                    const MCExpr *&Length, const unsigned *Regs,
                                     RegisterKind RegKind) {
   // Parse the displacement, which must always be present.
   if (getParser().parseExpression(Disp))
@@ -503,6 +582,7 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
   // Parse the optional base and index.
   Index = 0;
   Base = 0;
+  IsVector = false;
   Length = nullptr;
   if (getLexer().is(AsmToken::LParen)) {
     Parser.Lex();
@@ -510,12 +590,23 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
     if (getLexer().is(AsmToken::Percent)) {
       // Parse the first register and decide whether it's a base or an index.
       Register Reg;
-      if (parseRegister(Reg, RegGR, Regs, RegKind))
+      if (parseRegister(Reg))
         return true;
-      if (getLexer().is(AsmToken::Comma))
-        Index = Reg.Num;
-      else
-        Base = Reg.Num;
+      if (Reg.Group == RegV) {
+        // A vector index register.  The base register is optional.
+        IsVector = true;
+        Index = SystemZMC::VR128Regs[Reg.Num];
+      } else if (Reg.Group == RegGR) {
+        if (Reg.Num == 0)
+          return Error(Reg.StartLoc, "%r0 used in an address");
+        // If the are two registers, the first one is the index and the
+        // second is the base.
+        if (getLexer().is(AsmToken::Comma))
+          Index = Regs[Reg.Num];
+        else
+          Base = Regs[Reg.Num];
+      } else
+        return Error(Reg.StartLoc, "invalid address register");
     } else {
       // Parse the length.
       if (getParser().parseExpression(Length))
@@ -542,37 +633,46 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
 // Parse a memory operand and add it to Operands.  The other arguments
 // are as above.
 SystemZAsmParser::OperandMatchResultTy
-SystemZAsmParser::parseAddress(OperandVector &Operands, const unsigned *Regs,
-                               RegisterKind RegKind, MemoryKind MemKind) {
+SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
+                               const unsigned *Regs, RegisterKind RegKind) {
   SMLoc StartLoc = Parser.getTok().getLoc();
   unsigned Base, Index;
+  bool IsVector;
   const MCExpr *Disp;
   const MCExpr *Length;
-  if (parseAddress(Base, Disp, Index, Length, Regs, RegKind))
+  if (parseAddress(Base, Disp, Index, IsVector, Length, Regs, RegKind))
     return MatchOperand_ParseFail;
 
-  if (Index && MemKind != BDXMem)
-    {
-      Error(StartLoc, "invalid use of indexed addressing");
-      return MatchOperand_ParseFail;
-    }
+  if (IsVector && MemKind != BDVMem) {
+    Error(StartLoc, "invalid use of vector addressing");
+    return MatchOperand_ParseFail;
+  }
 
-  if (Length && MemKind != BDLMem)
-    {
-      Error(StartLoc, "invalid use of length addressing");
-      return MatchOperand_ParseFail;
-    }
+  if (!IsVector && MemKind == BDVMem) {
+    Error(StartLoc, "vector index required in address");
+    return MatchOperand_ParseFail;
+  }
 
-  if (!Length && MemKind == BDLMem)
-    {
-      Error(StartLoc, "missing length in address");
-      return MatchOperand_ParseFail;
-    }
+  if (Index && MemKind != BDXMem && MemKind != BDVMem) {
+    Error(StartLoc, "invalid use of indexed addressing");
+    return MatchOperand_ParseFail;
+  }
+
+  if (Length && MemKind != BDLMem) {
+    Error(StartLoc, "invalid use of length addressing");
+    return MatchOperand_ParseFail;
+  }
+
+  if (!Length && MemKind == BDLMem) {
+    Error(StartLoc, "missing length in address");
+    return MatchOperand_ParseFail;
+  }
 
   SMLoc EndLoc =
     SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
-  Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index,
-                                               Length, StartLoc, EndLoc));
+  Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp,
+                                               Index, Length, StartLoc,
+                                               EndLoc));
   return MatchOperand_Success;
 }
 
@@ -589,6 +689,8 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
     RegNo = SystemZMC::GR64Regs[Reg.Num];
   else if (Reg.Group == RegFP)
     RegNo = SystemZMC::FP64Regs[Reg.Num];
+  else if (Reg.Group == RegV)
+    RegNo = SystemZMC::VR128Regs[Reg.Num];
   else
     // FIXME: Access registers aren't modelled as LLVM registers yet.
     return Error(Reg.StartLoc, "invalid operand for instruction");
@@ -661,8 +763,10 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
   // so we treat any plain expression as an immediate.
   SMLoc StartLoc = Parser.getTok().getLoc();
   unsigned Base, Index;
+  bool IsVector;
   const MCExpr *Expr, *Length;
-  if (parseAddress(Base, Expr, Index, Length, SystemZMC::GR64Regs, ADDR64Reg))
+  if (parseAddress(Base, Expr, Index, IsVector, Length, SystemZMC::GR64Regs,
+                   ADDR64Reg))
     return true;
 
   SMLoc EndLoc =
@@ -743,7 +847,7 @@ SystemZAsmParser::parseAccessReg(OperandVector &Operands) {
 
 SystemZAsmParser::OperandMatchResultTy
 SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
-                             int64_t MaxVal) {
+                             int64_t MaxVal, bool AllowTLS) {
   MCContext &Ctx = getContext();
   MCStreamer &Out = getStreamer();
   const MCExpr *Expr;
@@ -759,16 +863,61 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
       Error(StartLoc, "offset out of range");
       return MatchOperand_ParseFail;
     }
-    MCSymbol *Sym = Ctx.CreateTempSymbol();
+    MCSymbol *Sym = Ctx.createTempSymbol();
     Out.EmitLabel(Sym);
     const MCExpr *Base = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
                                                  Ctx);
     Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx);
   }
 
+  // Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol.
+  const MCExpr *Sym = nullptr;
+  if (AllowTLS && getLexer().is(AsmToken::Colon)) {
+    Parser.Lex();
+
+    if (Parser.getTok().isNot(AsmToken::Identifier)) {
+      Error(Parser.getTok().getLoc(), "unexpected token");
+      return MatchOperand_ParseFail;
+    }
+
+    MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+    StringRef Name = Parser.getTok().getString();
+    if (Name == "tls_gdcall")
+      Kind = MCSymbolRefExpr::VK_TLSGD;
+    else if (Name == "tls_ldcall")
+      Kind = MCSymbolRefExpr::VK_TLSLDM;
+    else {
+      Error(Parser.getTok().getLoc(), "unknown TLS tag");
+      return MatchOperand_ParseFail;
+    }
+    Parser.Lex();
+
+    if (Parser.getTok().isNot(AsmToken::Colon)) {
+      Error(Parser.getTok().getLoc(), "unexpected token");
+      return MatchOperand_ParseFail;
+    }
+    Parser.Lex();
+
+    if (Parser.getTok().isNot(AsmToken::Identifier)) {
+      Error(Parser.getTok().getLoc(), "unexpected token");
+      return MatchOperand_ParseFail;
+    }
+
+    StringRef Identifier = Parser.getTok().getString();
+    Sym = MCSymbolRefExpr::Create(Ctx.getOrCreateSymbol(Identifier),
+                                  Kind, Ctx);
+    Parser.Lex();
+  }
+
   SMLoc EndLoc =
     SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
-  Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
+
+  if (AllowTLS)
+    Operands.push_back(SystemZOperand::createImmTLS(Expr, Sym,
+                                                    StartLoc, EndLoc));
+  else
+    Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
+
   return MatchOperand_Success;
 }
 
diff --git a/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 23173bf..bf67b75 100644
--- a/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -25,7 +25,7 @@ class SystemZDisassembler : public MCDisassembler {
 public:
   SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
     : MCDisassembler(STI, Ctx) {}
-  virtual ~SystemZDisassembler() {}
+  ~SystemZDisassembler() override {}
 
   DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -47,74 +47,94 @@ extern "C" void LLVMInitializeSystemZDisassembler() {
 }
 
 static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
-                                        const unsigned *Regs) {
-  assert(RegNo < 16 && "Invalid register");
+                                        const unsigned *Regs, unsigned Size) {
+  assert(RegNo < Size && "Invalid register");
   RegNo = Regs[RegNo];
   if (RegNo == 0)
     return MCDisassembler::Fail;
-  Inst.addOperand(MCOperand::CreateReg(RegNo));
+  Inst.addOperand(MCOperand::createReg(RegNo));
   return MCDisassembler::Success;
 }
 
 static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
                                                uint64_t Address,
                                                const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs);
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs, 16);
 }
 
 static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
                                                 uint64_t Address,
                                                 const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs);
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs, 16);
 }
 
 static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
                                                uint64_t Address,
                                                const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs);
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16);
 }
 
 static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
                                                 uint64_t Address,
                                                 const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs);
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs, 16);
 }
 
 static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
                                                  uint64_t Address,
                                                  const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs);
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16);
 }
 
 static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
                                                uint64_t Address,
                                                const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs);
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs, 16);
 }
 
 static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
                                                uint64_t Address,
                                                const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs);
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs, 16);
 }
 
 static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
                                                 uint64_t Address,
                                                 const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs);
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs, 16);
+}
+
+static DecodeStatus DecodeVR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::VR32Regs, 32);
+}
+
+static DecodeStatus DecodeVR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::VR64Regs, 32);
+}
+
+static DecodeStatus DecodeVR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                                uint64_t Address,
+                                                const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::VR128Regs, 32);
 }
 
 template<unsigned N>
 static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) {
-  assert(isUInt<N>(Imm) && "Invalid immediate");
-  Inst.addOperand(MCOperand::CreateImm(Imm));
+  if (!isUInt<N>(Imm))
+    return MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::createImm(Imm));
   return MCDisassembler::Success;
 }
 
 template<unsigned N>
 static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) {
-  assert(isUInt<N>(Imm) && "Invalid immediate");
-  Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm)));
+  if (!isUInt<N>(Imm))
+    return MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm)));
   return MCDisassembler::Success;
 }
 
@@ -124,6 +144,21 @@ static DecodeStatus decodeAccessRegOperand(MCInst &Inst, uint64_t Imm,
   return decodeUImmOperand<4>(Inst, Imm);
 }
 
+static DecodeStatus decodeU1ImmOperand(MCInst &Inst, uint64_t Imm,
+                                       uint64_t Address, const void *Decoder) {
+  return decodeUImmOperand<1>(Inst, Imm);
+}
+
+static DecodeStatus decodeU2ImmOperand(MCInst &Inst, uint64_t Imm,
+                                       uint64_t Address, const void *Decoder) {
+  return decodeUImmOperand<2>(Inst, Imm);
+}
+
+static DecodeStatus decodeU3ImmOperand(MCInst &Inst, uint64_t Imm,
+                                       uint64_t Address, const void *Decoder) {
+  return decodeUImmOperand<3>(Inst, Imm);
+}
+
 static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm,
                                        uint64_t Address, const void *Decoder) {
   return decodeUImmOperand<4>(Inst, Imm);
@@ -139,6 +174,11 @@ static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm,
   return decodeUImmOperand<8>(Inst, Imm);
 }
 
+static DecodeStatus decodeU12ImmOperand(MCInst &Inst, uint64_t Imm,
+                                        uint64_t Address, const void *Decoder) {
+  return decodeUImmOperand<12>(Inst, Imm);
+}
+
 static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm,
                                         uint64_t Address, const void *Decoder) {
   return decodeUImmOperand<16>(Inst, Imm);
@@ -168,7 +208,7 @@ template<unsigned N>
 static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm,
                                        uint64_t Address) {
   assert(isUInt<N>(Imm) && "Invalid PC-relative offset");
-  Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm) * 2 + Address));
+  Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm) * 2 + Address));
   return MCDisassembler::Success;
 }
 
@@ -189,8 +229,8 @@ static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field,
   uint64_t Base = Field >> 12;
   uint64_t Disp = Field & 0xfff;
   assert(Base < 16 && "Invalid BDAddr12");
-  Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
-  Inst.addOperand(MCOperand::CreateImm(Disp));
+  Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+  Inst.addOperand(MCOperand::createImm(Disp));
   return MCDisassembler::Success;
 }
 
@@ -199,8 +239,8 @@ static DecodeStatus decodeBDAddr20Operand(MCInst &Inst, uint64_t Field,
   uint64_t Base = Field >> 20;
   uint64_t Disp = ((Field << 12) & 0xff000) | ((Field >> 8) & 0xfff);
   assert(Base < 16 && "Invalid BDAddr20");
-  Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
-  Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp)));
+  Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+  Inst.addOperand(MCOperand::createImm(SignExtend64<20>(Disp)));
   return MCDisassembler::Success;
 }
 
@@ -210,9 +250,9 @@ static DecodeStatus decodeBDXAddr12Operand(MCInst &Inst, uint64_t Field,
   uint64_t Base = (Field >> 12) & 0xf;
   uint64_t Disp = Field & 0xfff;
   assert(Index < 16 && "Invalid BDXAddr12");
-  Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
-  Inst.addOperand(MCOperand::CreateImm(Disp));
-  Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index]));
+  Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+  Inst.addOperand(MCOperand::createImm(Disp));
+  Inst.addOperand(MCOperand::createReg(Index == 0 ? 0 : Regs[Index]));
   return MCDisassembler::Success;
 }
 
@@ -222,9 +262,9 @@ static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field,
   uint64_t Base = (Field >> 20) & 0xf;
   uint64_t Disp = ((Field & 0xfff00) >> 8) | ((Field & 0xff) << 12);
   assert(Index < 16 && "Invalid BDXAddr20");
-  Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
-  Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp)));
-  Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index]));
+  Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+  Inst.addOperand(MCOperand::createImm(SignExtend64<20>(Disp)));
+  Inst.addOperand(MCOperand::createReg(Index == 0 ? 0 : Regs[Index]));
   return MCDisassembler::Success;
 }
 
@@ -234,9 +274,21 @@ static DecodeStatus decodeBDLAddr12Len8Operand(MCInst &Inst, uint64_t Field,
   uint64_t Base = (Field >> 12) & 0xf;
   uint64_t Disp = Field & 0xfff;
   assert(Length < 256 && "Invalid BDLAddr12Len8");
-  Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
-  Inst.addOperand(MCOperand::CreateImm(Disp));
-  Inst.addOperand(MCOperand::CreateImm(Length + 1));
+  Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+  Inst.addOperand(MCOperand::createImm(Disp));
+  Inst.addOperand(MCOperand::createImm(Length + 1));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDVAddr12Operand(MCInst &Inst, uint64_t Field,
+                                           const unsigned *Regs) {
+  uint64_t Index = Field >> 16;
+  uint64_t Base = (Field >> 12) & 0xf;
+  uint64_t Disp = Field & 0xfff;
+  assert(Index < 32 && "Invalid BDVAddr12");
+  Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+  Inst.addOperand(MCOperand::createImm(Disp));
+  Inst.addOperand(MCOperand::createReg(SystemZMC::VR128Regs[Index]));
   return MCDisassembler::Success;
 }
 
@@ -283,6 +335,12 @@ static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst,
   return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs);
 }
 
+static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
+  return decodeBDVAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
 #include "SystemZGenDisassemblerTables.inc"
 
 DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index d2ba9b6..373ddfa 100644
--- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -9,7 +9,10 @@
 
 #include "SystemZInstPrinter.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -21,13 +24,17 @@ using namespace llvm;
 void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
                                       unsigned Index, raw_ostream &O) {
   O << Disp;
-  if (Base) {
+  if (Base || Index) {
     O << '(';
-    if (Index)
-      O << '%' << getRegisterName(Index) << ',';
-    O << '%' << getRegisterName(Base) << ')';
-  } else
-    assert(!Index && "Shouldn't have an index without a base");
+    if (Index) {
+      O << '%' << getRegisterName(Index);
+      if (Base)
+        O << ',';
+    }
+    if (Base)
+      O << '%' << getRegisterName(Base);
+    O << ')';
+  }
 }
 
 void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) {
@@ -42,7 +49,8 @@ void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) {
 }
 
 void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                   StringRef Annot) {
+                                   StringRef Annot,
+                                   const MCSubtargetInfo &STI) {
   printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
@@ -51,60 +59,78 @@ void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
   O << '%' << getRegisterName(RegNo);
 }
 
-void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
-                                           raw_ostream &O) {
+template<unsigned N>
+void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
   int64_t Value = MI->getOperand(OpNum).getImm();
-  assert(isUInt<4>(Value) && "Invalid u4imm argument");
+  assert(isUInt<N>(Value) && "Invalid uimm argument");
   O << Value;
 }
 
-void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
-                                           raw_ostream &O) {
+template<unsigned N>
+void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
   int64_t Value = MI->getOperand(OpNum).getImm();
-  assert(isUInt<6>(Value) && "Invalid u6imm argument");
+  assert(isInt<N>(Value) && "Invalid simm argument");
   O << Value;
 }
 
+void SystemZInstPrinter::printU1ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  printUImmOperand<1>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU2ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  printUImmOperand<2>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU3ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  printUImmOperand<3>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  printUImmOperand<4>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  printUImmOperand<6>(MI, OpNum, O);
+}
+
 void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum,
                                            raw_ostream &O) {
-  int64_t Value = MI->getOperand(OpNum).getImm();
-  assert(isInt<8>(Value) && "Invalid s8imm argument");
-  O << Value;
+  printSImmOperand<8>(MI, OpNum, O);
 }
 
 void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum,
                                            raw_ostream &O) {
-  int64_t Value = MI->getOperand(OpNum).getImm();
-  assert(isUInt<8>(Value) && "Invalid u8imm argument");
-  O << Value;
+  printUImmOperand<8>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU12ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  printUImmOperand<12>(MI, OpNum, O);
 }
 
 void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum,
                                             raw_ostream &O) {
-  int64_t Value = MI->getOperand(OpNum).getImm();
-  assert(isInt<16>(Value) && "Invalid s16imm argument");
-  O << Value;
+  printSImmOperand<16>(MI, OpNum, O);
 }
 
 void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum,
                                             raw_ostream &O) {
-  int64_t Value = MI->getOperand(OpNum).getImm();
-  assert(isUInt<16>(Value) && "Invalid u16imm argument");
-  O << Value;
+  printUImmOperand<16>(MI, OpNum, O);
 }
 
 void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum,
                                             raw_ostream &O) {
-  int64_t Value = MI->getOperand(OpNum).getImm();
-  assert(isInt<32>(Value) && "Invalid s32imm argument");
-  O << Value;
+  printSImmOperand<32>(MI, OpNum, O);
 }
 
 void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum,
                                             raw_ostream &O) {
-  int64_t Value = MI->getOperand(OpNum).getImm();
-  assert(isUInt<32>(Value) && "Invalid u32imm argument");
-  O << Value;
+  printUImmOperand<32>(MI, OpNum, O);
 }
 
 void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum,
@@ -124,6 +150,29 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
     O << *MO.getExpr();
 }
 
+void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum,
+                                              raw_ostream &O) {
+  // Output the PC-relative operand.
+  printPCRelOperand(MI, OpNum, O);
+
+  // Output the TLS marker if present.
+  if ((unsigned)OpNum + 1 < MI->getNumOperands()) {
+    const MCOperand &MO = MI->getOperand(OpNum + 1);
+    const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*MO.getExpr());
+    switch (refExp.getKind()) {
+      case MCSymbolRefExpr::VK_TLSGD:
+        O << ":tls_gdcall:";
+        break;
+      case MCSymbolRefExpr::VK_TLSLDM:
+        O << ":tls_ldcall:";
+        break;
+      default:
+        llvm_unreachable("Unexpected symbol kind");
+    }
+    O << refExp.getSymbol().getName();
+  }
+}
+
 void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
                                       raw_ostream &O) {
   printOperand(MI->getOperand(OpNum), O);
@@ -153,6 +202,13 @@ void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum,
   O << ')';
 }
 
+void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum,
+                                             raw_ostream &O) {
+  printAddress(MI->getOperand(OpNum).getReg(),
+               MI->getOperand(OpNum + 1).getImm(),
+               MI->getOperand(OpNum + 2).getReg(), O);
+}
+
 void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum,
                                            raw_ostream &O) {
   static const char *const CondNames[] = {
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index 753903c..847b696 100644
--- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -39,7 +39,8 @@ public:
 
   // Override MCInstPrinter.
   void printRegName(raw_ostream &O, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
 private:
   // Print various types of operand.
@@ -47,15 +48,21 @@ private:
   void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printBDVAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU1ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU3ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU12ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O);
 
   // Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 6e7268d..1c3887a 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -27,9 +27,10 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
   switch (unsigned(Kind)) {
   case SystemZ::FK_390_PC16DBL:
   case SystemZ::FK_390_PC32DBL:
-  case SystemZ::FK_390_PLT16DBL:
-  case SystemZ::FK_390_PLT32DBL:
     return (int64_t)Value / 2;
+
+  case SystemZ::FK_390_TLS_CALL:
+    return 0;
   }
 
   llvm_unreachable("Unknown fixup kind!");
@@ -61,7 +62,7 @@ public:
     llvm_unreachable("SystemZ does do not have assembler relaxation");
   }
   bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createSystemZObjectWriter(OS, OSABI);
   }
 };
@@ -72,8 +73,7 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
   const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = {
     { "FK_390_PC16DBL",  0, 16, MCFixupKindInfo::FKF_IsPCRel },
     { "FK_390_PC32DBL",  0, 32, MCFixupKindInfo::FKF_IsPCRel },
-    { "FK_390_PLT16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
-    { "FK_390_PLT32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
+    { "FK_390_TLS_CALL", 0, 0, 0 }
   };
 
   if (Kind < FirstTargetFixupKind)
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index 27b4bd8..c9290c1 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -16,7 +16,9 @@
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 
 using namespace llvm;
 
@@ -32,10 +34,10 @@ public:
     : MCII(mcii), Ctx(ctx) {
   }
 
-  ~SystemZMCCodeEmitter() {}
+  ~SystemZMCCodeEmitter() override {}
 
   // OVerride MCCodeEmitter.
-  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
 
@@ -70,37 +72,55 @@ private:
   uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
                                     SmallVectorImpl<MCFixup> &Fixups,
                                     const MCSubtargetInfo &STI) const;
+  uint64_t getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
+                                SmallVectorImpl<MCFixup> &Fixups,
+                                const MCSubtargetInfo &STI) const;
 
   // Operand OpNum of MI needs a PC-relative fixup of kind Kind at
   // Offset bytes from the start of MI.  Add the fixup to Fixups
   // and return the in-place addend, which since we're a RELA target
-  // is always 0.
+  // is always 0.  If AllowTLS is true and optional operand OpNum + 1
+  // is present, also emit a TLS call fixup for it.
   uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum,
                             SmallVectorImpl<MCFixup> &Fixups,
-                            unsigned Kind, int64_t Offset) const;
+                            unsigned Kind, int64_t Offset,
+                            bool AllowTLS) const;
 
   uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum,
                               SmallVectorImpl<MCFixup> &Fixups,
                               const MCSubtargetInfo &STI) const {
-    return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2);
+    return getPCRelEncoding(MI, OpNum, Fixups,
+                            SystemZ::FK_390_PC16DBL, 2, false);
   }
   uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum,
                               SmallVectorImpl<MCFixup> &Fixups,
                               const MCSubtargetInfo &STI) const {
-    return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2);
+    return getPCRelEncoding(MI, OpNum, Fixups,
+                            SystemZ::FK_390_PC32DBL, 2, false);
+  }
+  uint64_t getPC16DBLTLSEncoding(const MCInst &MI, unsigned OpNum,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const {
+    return getPCRelEncoding(MI, OpNum, Fixups,
+                            SystemZ::FK_390_PC16DBL, 2, true);
+  }
+  uint64_t getPC32DBLTLSEncoding(const MCInst &MI, unsigned OpNum,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const {
+    return getPCRelEncoding(MI, OpNum, Fixups,
+                            SystemZ::FK_390_PC32DBL, 2, true);
   }
 };
 } // end anonymous namespace
 
 MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
                                                 const MCRegisterInfo &MRI,
-                                                const MCSubtargetInfo &MCSTI,
                                                 MCContext &Ctx) {
   return new SystemZMCCodeEmitter(MCII, Ctx);
 }
 
 void SystemZMCCodeEmitter::
-EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+encodeInstruction(const MCInst &MI, raw_ostream &OS,
                   SmallVectorImpl<MCFixup> &Fixups,
                   const MCSubtargetInfo &STI) const {
   uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
@@ -178,10 +198,22 @@ getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
   return (Len << 16) | (Base << 12) | Disp;
 }
 
+uint64_t SystemZMCCodeEmitter::
+getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
+                     SmallVectorImpl<MCFixup> &Fixups,
+                     const MCSubtargetInfo &STI) const {
+  uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
+  uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+  uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
+  assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<5>(Index));
+  return (Index << 16) | (Base << 12) | Disp;
+}
+
 uint64_t
 SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
                                        SmallVectorImpl<MCFixup> &Fixups,
-                                       unsigned Kind, int64_t Offset) const {
+                                       unsigned Kind, int64_t Offset,
+                                       bool AllowTLS) const {
   const MCOperand &MO = MI.getOperand(OpNum);
   const MCExpr *Expr;
   if (MO.isImm())
@@ -197,7 +229,14 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
       Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
     }
   }
-  Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind));
+  Fixups.push_back(MCFixup::create(Offset, Expr, (MCFixupKind)Kind));
+
+  // Output the fixup for the TLS marker if present.
+  if (AllowTLS && OpNum + 1 < MI.getNumOperands()) {
+    const MCOperand &MOTLS = MI.getOperand(OpNum + 1);
+    Fixups.push_back(MCFixup::create(0, MOTLS.getExpr(),
+                                     (MCFixupKind)SystemZ::FK_390_TLS_CALL));
+  }
   return 0;
 }
 
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
index 52a8d1d..229ab5d 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -18,8 +18,7 @@ enum FixupKind {
   // These correspond directly to R_390_* relocations.
   FK_390_PC16DBL = FirstTargetFixupKind,
   FK_390_PC32DBL,
-  FK_390_PLT16DBL,
-  FK_390_PLT32DBL,
+  FK_390_TLS_CALL,
 
   // Marker
   LastTargetFixupKind,
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index c6a1816..ee1af02 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -20,7 +20,7 @@ class SystemZObjectWriter : public MCELFObjectTargetWriter {
 public:
   SystemZObjectWriter(uint8_t OSABI);
 
-  virtual ~SystemZObjectWriter();
+  ~SystemZObjectWriter() override;
 
 protected:
   // Override MCELFObjectTargetWriter.
@@ -55,8 +55,6 @@ static unsigned getPCRelReloc(unsigned Kind) {
   case FK_Data_8:                return ELF::R_390_PC64;
   case SystemZ::FK_390_PC16DBL:  return ELF::R_390_PC16DBL;
   case SystemZ::FK_390_PC32DBL:  return ELF::R_390_PC32DBL;
-  case SystemZ::FK_390_PLT16DBL: return ELF::R_390_PLT16DBL;
-  case SystemZ::FK_390_PLT32DBL: return ELF::R_390_PLT32DBL;
   }
   llvm_unreachable("Unsupported PC-relative address");
 }
@@ -70,6 +68,35 @@ static unsigned getTLSLEReloc(unsigned Kind) {
   llvm_unreachable("Unsupported absolute address");
 }
 
+// Return the R_390_TLS_LDO* relocation type for MCFixupKind Kind.
+static unsigned getTLSLDOReloc(unsigned Kind) {
+  switch (Kind) {
+  case FK_Data_4: return ELF::R_390_TLS_LDO32;
+  case FK_Data_8: return ELF::R_390_TLS_LDO64;
+  }
+  llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the R_390_TLS_LDM* relocation type for MCFixupKind Kind.
+static unsigned getTLSLDMReloc(unsigned Kind) {
+  switch (Kind) {
+  case FK_Data_4: return ELF::R_390_TLS_LDM32;
+  case FK_Data_8: return ELF::R_390_TLS_LDM64;
+  case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_LDCALL;
+  }
+  llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the R_390_TLS_GD* relocation type for MCFixupKind Kind.
+static unsigned getTLSGDReloc(unsigned Kind) {
+  switch (Kind) {
+  case FK_Data_4: return ELF::R_390_TLS_GD32;
+  case FK_Data_8: return ELF::R_390_TLS_GD64;
+  case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_GDCALL;
+  }
+  llvm_unreachable("Unsupported absolute address");
+}
+
 // Return the PLT relocation counterpart of MCFixupKind Kind.
 static unsigned getPLTReloc(unsigned Kind) {
   switch (Kind) {
@@ -94,6 +121,23 @@ unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target,
     assert(!IsPCRel && "NTPOFF shouldn't be PC-relative");
     return getTLSLEReloc(Kind);
 
+  case MCSymbolRefExpr::VK_INDNTPOFF:
+    if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
+      return ELF::R_390_TLS_IEENT;
+    llvm_unreachable("Only PC-relative INDNTPOFF accesses are supported for now");
+
+  case MCSymbolRefExpr::VK_DTPOFF:
+    assert(!IsPCRel && "DTPOFF shouldn't be PC-relative");
+    return getTLSLDOReloc(Kind);
+
+  case MCSymbolRefExpr::VK_TLSLDM:
+    assert(!IsPCRel && "TLSLDM shouldn't be PC-relative");
+    return getTLSLDMReloc(Kind);
+
+  case MCSymbolRefExpr::VK_TLSGD:
+    assert(!IsPCRel && "TLSGD shouldn't be PC-relative");
+    return getTLSGDReloc(Kind);
+
   case MCSymbolRefExpr::VK_GOT:
     if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
       return ELF::R_390_GOTENT;
@@ -108,7 +152,7 @@ unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target,
   }
 }
 
-MCObjectWriter *llvm::createSystemZObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createSystemZObjectWriter(raw_pwrite_stream &OS,
                                                 uint8_t OSABI) {
   MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI);
   return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 6e82b6d..8c2075a 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -12,6 +12,7 @@
 #include "SystemZMCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -76,6 +77,39 @@ const unsigned SystemZMC::FP128Regs[16] = {
   SystemZ::F12Q, SystemZ::F13Q, 0, 0
 };
 
+const unsigned SystemZMC::VR32Regs[32] = {
+  SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S,
+  SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S,
+  SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
+  SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
+  SystemZ::F16S, SystemZ::F17S, SystemZ::F18S, SystemZ::F19S,
+  SystemZ::F20S, SystemZ::F21S, SystemZ::F22S, SystemZ::F23S,
+  SystemZ::F24S, SystemZ::F25S, SystemZ::F26S, SystemZ::F27S,
+  SystemZ::F28S, SystemZ::F29S, SystemZ::F30S, SystemZ::F31S
+};
+
+const unsigned SystemZMC::VR64Regs[32] = {
+  SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D,
+  SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D,
+  SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
+  SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D,
+  SystemZ::F16D, SystemZ::F17D, SystemZ::F18D, SystemZ::F19D,
+  SystemZ::F20D, SystemZ::F21D, SystemZ::F22D, SystemZ::F23D,
+  SystemZ::F24D, SystemZ::F25D, SystemZ::F26D, SystemZ::F27D,
+  SystemZ::F28D, SystemZ::F29D, SystemZ::F30D, SystemZ::F31D
+};
+
+const unsigned SystemZMC::VR128Regs[32] = {
+  SystemZ::V0, SystemZ::V1, SystemZ::V2, SystemZ::V3,
+  SystemZ::V4, SystemZ::V5, SystemZ::V6, SystemZ::V7,
+  SystemZ::V8, SystemZ::V9, SystemZ::V10, SystemZ::V11,
+  SystemZ::V12, SystemZ::V13, SystemZ::V14, SystemZ::V15,
+  SystemZ::V16, SystemZ::V17, SystemZ::V18, SystemZ::V19,
+  SystemZ::V20, SystemZ::V21, SystemZ::V22, SystemZ::V23,
+  SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27,
+  SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31
+};
+
 unsigned SystemZMC::getFirstReg(unsigned Reg) {
   static unsigned Map[SystemZ::NUM_TARGET_REGS];
   static bool Initialized = false;
@@ -85,10 +119,13 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) {
       Map[GRH32Regs[I]] = I;
       Map[GR64Regs[I]] = I;
       Map[GR128Regs[I]] = I;
-      Map[FP32Regs[I]] = I;
-      Map[FP64Regs[I]] = I;
       Map[FP128Regs[I]] = I;
     }
+    for (unsigned I = 0; I < 32; ++I) {
+      Map[VR32Regs[I]] = I;
+      Map[VR64Regs[I]] = I;
+      Map[VR128Regs[I]] = I;
+    }
   }
   assert(Reg < SystemZ::NUM_TARGET_REGS);
   return Map[Reg];
@@ -168,27 +205,18 @@ static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
     CM = CodeModel::Small;
   else if (CM == CodeModel::JITDefault)
     CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium;
-  X->InitMCCodeGenInfo(RM, CM, OL);
+  X->initMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
-static MCInstPrinter *createSystemZMCInstPrinter(const Target &T,
+static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
                                                  unsigned SyntaxVariant,
                                                  const MCAsmInfo &MAI,
                                                  const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI) {
+                                                 const MCRegisterInfo &MRI) {
   return new SystemZInstPrinter(MAI, MII, MRI);
 }
 
-static MCStreamer *
-createSystemZMCObjectStreamer(const Target &T, StringRef TT, MCContext &Ctx,
-                              MCAsmBackend &MAB, raw_ostream &OS,
-                              MCCodeEmitter *Emitter,
-                              const MCSubtargetInfo &STI, bool RelaxAll) {
-  return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
-}
-
 extern "C" void LLVMInitializeSystemZTargetMC() {
   // Register the MCAsmInfo.
   TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget,
@@ -221,8 +249,4 @@ extern "C" void LLVMInitializeSystemZTargetMC() {
   // Register the MCInstPrinter.
   TargetRegistry::RegisterMCInstPrinter(TheSystemZTarget,
                                         createSystemZMCInstPrinter);
-
-  // Register the MCObjectStreamer;
-  TargetRegistry::RegisterMCObjectStreamer(TheSystemZTarget,
-                                           createSystemZMCObjectStreamer);
 }
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 5eb6526..36ea750 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -23,6 +23,7 @@ class MCRegisterInfo;
 class MCSubtargetInfo;
 class StringRef;
 class Target;
+class raw_pwrite_stream;
 class raw_ostream;
 
 extern Target TheSystemZTarget;
@@ -48,6 +49,9 @@ extern const unsigned GR128Regs[16];
 extern const unsigned FP32Regs[16];
 extern const unsigned FP64Regs[16];
 extern const unsigned FP128Regs[16];
+extern const unsigned VR32Regs[32];
+extern const unsigned VR64Regs[32];
+extern const unsigned VR128Regs[32];
 
 // Return the 0-based number of the first architectural register that
 // contains the given LLVM register.   E.g. R1D -> 1.
@@ -67,18 +71,22 @@ inline unsigned getRegAsGR32(unsigned Reg) {
 inline unsigned getRegAsGRH32(unsigned Reg) {
   return GRH32Regs[getFirstReg(Reg)];
 }
+
+// Return the given register as a VR128.
+inline unsigned getRegAsVR128(unsigned Reg) {
+  return VR128Regs[getFirstReg(Reg)];
+}
 } // end namespace SystemZMC
 
 MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
                                           const MCRegisterInfo &MRI,
-                                          const MCSubtargetInfo &STI,
                                           MCContext &Ctx);
 
 MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI,
                                         StringRef TT, StringRef CPU);
 
-MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI);
+MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
 } // end namespace llvm
 
 // Defines symbolic names for SystemZ registers.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
index c8b95b2..cafe2c5 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZ.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
@@ -68,6 +68,25 @@ const unsigned CCMASK_TM_MSB_0       = CCMASK_0 | CCMASK_1;
 const unsigned CCMASK_TM_MSB_1       = CCMASK_2 | CCMASK_3;
 const unsigned CCMASK_TM             = CCMASK_ANY;
 
+// Condition-code mask assignments for TRANSACTION_BEGIN.
+const unsigned CCMASK_TBEGIN_STARTED       = CCMASK_0;
+const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1;
+const unsigned CCMASK_TBEGIN_TRANSIENT     = CCMASK_2;
+const unsigned CCMASK_TBEGIN_PERSISTENT    = CCMASK_3;
+const unsigned CCMASK_TBEGIN               = CCMASK_ANY;
+
+// Condition-code mask assignments for TRANSACTION_END.
+const unsigned CCMASK_TEND_TX   = CCMASK_0;
+const unsigned CCMASK_TEND_NOTX = CCMASK_2;
+const unsigned CCMASK_TEND      = CCMASK_TEND_TX | CCMASK_TEND_NOTX;
+
+// Condition-code mask assignments for vector comparisons (and similar
+// operations).
+const unsigned CCMASK_VCMP_ALL       = CCMASK_0;
+const unsigned CCMASK_VCMP_MIXED     = CCMASK_1;
+const unsigned CCMASK_VCMP_NONE      = CCMASK_3;
+const unsigned CCMASK_VCMP           = CCMASK_0 | CCMASK_1 | CCMASK_3;
+
 // The position of the low CC bit in an IPM result.
 const unsigned IPM_CC = 28;
 
@@ -75,6 +94,13 @@ const unsigned IPM_CC = 28;
 const unsigned PFD_READ  = 1;
 const unsigned PFD_WRITE = 2;
 
+// Number of bits in a vector register.
+const unsigned VectorBits = 128;
+
+// Number of bytes in a vector register (and consequently the number of
+// bytes in a general permute vector).
+const unsigned VectorBytes = VectorBits / 8;
+
 // Return true if Val fits an LLILL operand.
 static inline bool isImmLL(uint64_t Val) {
   return (Val & ~0x000000000000ffffULL) == 0;
@@ -111,6 +137,7 @@ FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
 FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
index 5f82903..d4d636d 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZ.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
@@ -40,6 +40,7 @@ include "SystemZOperands.td"
 include "SystemZPatterns.td"
 include "SystemZInstrFormats.td"
 include "SystemZInstrInfo.td"
+include "SystemZInstrVector.td"
 include "SystemZInstrFP.td"
 
 def SystemZInstrInfo : InstrInfo {}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index f4f3ec7..a0d079f 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -66,6 +66,41 @@ static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) {
     .addImm(MI->getOperand(5).getImm());
 }
 
+static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) {
+  StringRef Name = "__tls_get_offset";
+  return MCSymbolRefExpr::Create(Context.getOrCreateSymbol(Name),
+                                 MCSymbolRefExpr::VK_PLT,
+                                 Context);
+}
+
+static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) {
+  StringRef Name = "_GLOBAL_OFFSET_TABLE_";
+  return MCSymbolRefExpr::Create(Context.getOrCreateSymbol(Name),
+                                 MCSymbolRefExpr::VK_None,
+                                 Context);
+}
+
+// MI loads the high part of a vector from memory.  Return an instruction
+// that uses replicating vector load Opcode to do the same thing.
+static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) {
+  return MCInstBuilder(Opcode)
+    .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+    .addReg(MI->getOperand(1).getReg())
+    .addImm(MI->getOperand(2).getImm())
+    .addReg(MI->getOperand(3).getReg());
+}
+
+// MI stores the high part of a vector to memory.  Return an instruction
+// that uses elemental vector store Opcode to do the same thing.
+static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) {
+  return MCInstBuilder(Opcode)
+    .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+    .addReg(MI->getOperand(1).getReg())
+    .addImm(MI->getOperand(2).getImm())
+    .addReg(MI->getOperand(3).getReg())
+    .addImm(0);
+}
+
 void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   SystemZMCInstLower Lower(MF->getContext(), *this);
   MCInst LoweredMI;
@@ -95,6 +130,26 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D);
     break;
 
+  case SystemZ::TLS_GDCALL:
+    LoweredMI = MCInstBuilder(SystemZ::BRASL)
+      .addReg(SystemZ::R14D)
+      .addExpr(getTLSGetOffset(MF->getContext()))
+      .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSGD));
+    break;
+
+  case SystemZ::TLS_LDCALL:
+    LoweredMI = MCInstBuilder(SystemZ::BRASL)
+      .addReg(SystemZ::R14D)
+      .addExpr(getTLSGetOffset(MF->getContext()))
+      .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSLDM));
+    break;
+
+  case SystemZ::GOT:
+    LoweredMI = MCInstBuilder(SystemZ::LARL)
+      .addReg(MI->getOperand(0).getReg())
+      .addExpr(getGlobalOffsetTable(MF->getContext()));
+    break;
+
   case SystemZ::IILF64:
     LoweredMI = MCInstBuilder(SystemZ::IILF)
       .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
@@ -117,6 +172,51 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG);
     break;
 
+  case SystemZ::VLVGP32:
+    LoweredMI = MCInstBuilder(SystemZ::VLVGP)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(SystemZMC::getRegAsGR64(MI->getOperand(1).getReg()))
+      .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg()));
+    break;
+
+  case SystemZ::VLR32:
+  case SystemZ::VLR64:
+    LoweredMI = MCInstBuilder(SystemZ::VLR)
+      .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+      .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg()));
+    break;
+
+  case SystemZ::VL32:
+    LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF);
+    break;
+
+  case SystemZ::VL64:
+    LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG);
+    break;
+
+  case SystemZ::VST32:
+    LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF);
+    break;
+
+  case SystemZ::VST64:
+    LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEG);
+    break;
+
+  case SystemZ::LFER:
+    LoweredMI = MCInstBuilder(SystemZ::VLGVF)
+      .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg()))
+      .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg()))
+      .addReg(0).addImm(0);
+    break;
+
+  case SystemZ::LEFR:
+    LoweredMI = MCInstBuilder(SystemZ::VLVGF)
+      .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+      .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+      .addReg(MI->getOperand(1).getReg())
+      .addReg(0).addImm(0);
+    break;
+
 #define LOWER_LOW(NAME)                                                 \
   case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break
 
@@ -152,7 +252,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 #undef LOWER_HIGH
 
   case SystemZ::Serialize:
-    if (Subtarget->hasFastSerialization())
+    if (MF->getSubtarget<SystemZSubtarget>().hasFastSerialization())
       LoweredMI = MCInstBuilder(SystemZ::AsmBCR)
         .addImm(14).addReg(SystemZ::R0D);
     else
@@ -164,7 +264,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     Lower.lower(MI, LoweredMI);
     break;
   }
-  EmitToStreamer(OutStreamer, LoweredMI);
+  EmitToStreamer(*OutStreamer, LoweredMI);
 }
 
 // Convert a SystemZ-specific constant pool modifier into the associated
@@ -172,6 +272,9 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 static MCSymbolRefExpr::VariantKind
 getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) {
   switch (Modifier) {
+  case SystemZCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD;
+  case SystemZCP::TLSLDM: return MCSymbolRefExpr::VK_TLSLDM;
+  case SystemZCP::DTPOFF: return MCSymbolRefExpr::VK_DTPOFF;
   case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF;
   }
   llvm_unreachable("Invalid SystemCPModifier!");
@@ -185,10 +288,9 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
     MCSymbolRefExpr::Create(getSymbol(ZCPV->getGlobalValue()),
                             getModifierVariantKind(ZCPV->getModifier()),
                             OutContext);
-  uint64_t Size =
-      TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(ZCPV->getType());
+  uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
 
-  OutStreamer.EmitValue(Expr, Size);
+  OutStreamer->EmitValue(Expr, Size);
 }
 
 bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
@@ -219,29 +321,6 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
-  if (Subtarget->isTargetELF()) {
-    auto &TLOFELF =
-      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
-    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    // Output stubs for external and common global variables.
-    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-    if (!Stubs.empty()) {
-      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
-      const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
-
-      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-        OutStreamer.EmitLabel(Stubs[i].first);
-        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
-                                    TD->getPointerSize(0));
-      }
-      Stubs.clear();
-    }
-  }
-}
-
 // Force static initialization.
 extern "C" void LLVMInitializeSystemZAsmPrinter() {
   RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
index 6467279..7f6e823 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -22,14 +22,9 @@ class Module;
 class raw_ostream;
 
 class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter {
-private:
-  const SystemZSubtarget *Subtarget;
-
 public:
-  SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-    : AsmPrinter(TM, Streamer) {
-    Subtarget = &TM.getSubtarget<SystemZSubtarget>();
-  }
+  SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+      : AsmPrinter(TM, std::move(Streamer)) {}
 
   // Override AsmPrinter.
   const char *getPassName() const override {
@@ -43,7 +38,6 @@ public:
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
                              unsigned AsmVariant, const char *ExtraCode,
                              raw_ostream &OS) override;
-  void EmitEndOfAsmFile(Module &M) override;
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h
index 71605ac..bff0706 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@@ -10,6 +10,9 @@
 #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
 #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
 
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
 namespace llvm {
 namespace SystemZ {
   const unsigned NumArgGPRs = 5;
@@ -18,6 +21,64 @@ namespace SystemZ {
   const unsigned NumArgFPRs = 4;
   extern const unsigned ArgFPRs[NumArgFPRs];
 } // end namespace SystemZ
+
+class SystemZCCState : public CCState {
+private:
+  /// Records whether the value was a fixed argument.
+  /// See ISD::OutputArg::IsFixed.
+  SmallVector<bool, 4> ArgIsFixed;
+
+  /// Records whether the value was widened from a short vector type.
+  SmallVector<bool, 4> ArgIsShortVector;
+
+  // Check whether ArgVT is a short vector type.
+  bool IsShortVectorType(EVT ArgVT) {
+    return ArgVT.isVector() && ArgVT.getStoreSize() <= 8;
+  }
+
+public:
+  SystemZCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+                 SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
+      : CCState(CC, isVarArg, MF, locs, C) {}
+
+  void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                              CCAssignFn Fn) {
+    // Formal arguments are always fixed.
+    ArgIsFixed.clear();
+    for (unsigned i = 0; i < Ins.size(); ++i)
+      ArgIsFixed.push_back(true);
+    // Record whether the call operand was a short vector.
+    ArgIsShortVector.clear();
+    for (unsigned i = 0; i < Ins.size(); ++i)
+      ArgIsShortVector.push_back(IsShortVectorType(Ins[i].ArgVT));
+
+    CCState::AnalyzeFormalArguments(Ins, Fn);
+  }
+
+  void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           CCAssignFn Fn) {
+    // Record whether the call operand was a fixed argument.
+    ArgIsFixed.clear();
+    for (unsigned i = 0; i < Outs.size(); ++i)
+      ArgIsFixed.push_back(Outs[i].IsFixed);
+    // Record whether the call operand was a short vector.
+    ArgIsShortVector.clear();
+    for (unsigned i = 0; i < Outs.size(); ++i)
+      ArgIsShortVector.push_back(IsShortVectorType(Outs[i].ArgVT));
+
+    CCState::AnalyzeCallOperands(Outs, Fn);
+  }
+
+  // This version of AnalyzeCallOperands in the base class is not usable
+  // since we must provide a means of accessing ISD::OutputArg::IsFixed.
+  void AnalyzeCallOperands(const SmallVectorImpl<MVT> &Outs,
+                           SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+                           CCAssignFn Fn) = delete;
+
+  bool IsFixed(unsigned ValNo) { return ArgIsFixed[ValNo]; }
+  bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; }
+};
+
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index fb0d1d8..be8f00b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -12,6 +12,20 @@
 class CCIfExtend<CCAction A>
   : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
 
+class CCIfSubtarget<string F, CCAction A>
+  : CCIf<!strconcat("static_cast<const SystemZSubtarget&>"
+                    "(State.getMachineFunction().getSubtarget()).", F),
+         A>;
+
+// Match if this specific argument is a fixed (i.e. named) argument.
+class CCIfFixed<CCAction A>
+    : CCIf<"static_cast<SystemZCCState *>(&State)->IsFixed(ValNo)", A>;
+
+// Match if this specific argument was widened from a short vector type.
+class CCIfShortVector<CCAction A>
+    : CCIf<"static_cast<SystemZCCState *>(&State)->IsShortVector(ValNo)", A>;
+
+
 //===----------------------------------------------------------------------===//
 // z/Linux return value calling convention
 //===----------------------------------------------------------------------===//
@@ -31,7 +45,14 @@ def RetCC_SystemZ : CallingConv<[
   // doesn't care about the ABI.  All floating-point argument registers
   // are call-clobbered, so we can use all of them here.
   CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
-  CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>
+  CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
+
+  // Similarly for vectors, with V24 being the ABI-compliant choice.
+  // Sub-128 vectors are returned in the same way, but they're widened
+  // to one of these types during type legalization.
+  CCIfSubtarget<"hasVector()",
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+             CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
 
   // ABI-compliant code returns long double by reference, but that conversion
   // is left to higher-level code.  Perhaps we could add an f128 definition
@@ -60,6 +81,25 @@ def CC_SystemZ : CallingConv<[
   CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
   CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
 
+  // The first 8 named vector arguments are passed in V24-V31.  Sub-128 vectors
+  // are passed in the same way, but they're widened to one of these types
+  // during type legalization.
+  CCIfSubtarget<"hasVector()",
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+             CCIfFixed<CCAssignToReg<[V24, V26, V28, V30,
+                                      V25, V27, V29, V31]>>>>,
+
+  // However, sub-128 vectors which need to go on the stack occupy just a
+  // single 8-byte-aligned 8-byte stack slot.  Pass as i64.
+  CCIfSubtarget<"hasVector()",
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+             CCIfShortVector<CCBitConvertToType<i64>>>>,
+
+  // Other vector arguments are passed in 8-byte-aligned 16-byte stack slots.
+  CCIfSubtarget<"hasVector()",
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+             CCAssignToStack<16, 8>>>,
+
   // Other arguments are passed in 8-byte-aligned 8-byte stack slots.
   CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
 ]>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
index 19cec21..44ea1d2 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -28,6 +28,11 @@ SystemZConstantPoolValue::Create(const GlobalValue *GV,
 
 unsigned SystemZConstantPoolValue::getRelocationInfo() const {
   switch (Modifier) {
+  case SystemZCP::TLSGD:
+  case SystemZCP::TLSLDM:
+  case SystemZCP::DTPOFF:
+    // May require a dynamic relocation.
+    return 2;
   case SystemZCP::NTPOFF:
     // May require a relocation, but the relocations are always resolved
     // by the static linker.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
index 0bd8c20..e5f1bb1 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -19,13 +19,17 @@ class GlobalValue;
 
 namespace SystemZCP {
 enum SystemZCPModifier {
+  TLSGD,
+  TLSLDM,
+  DTPOFF,
   NTPOFF
 };
 } // end namespace SystemZCP
 
 /// A SystemZ-specific constant pool value.  At present, the only
-/// defined constant pool values are offsets of thread-local variables
-/// (written x@NTPOFF).
+/// defined constant pool values are module IDs or offsets of
+/// thread-local variables (written x@TLSGD, x@TLSLDM, x@DTPOFF,
+/// or x@NTPOFF).
 class SystemZConstantPoolValue : public MachineConstantPoolValue {
   const GlobalValue *GV;
   SystemZCP::SystemZCPModifier Modifier;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index ce99ee5..16f9adc 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -47,7 +47,7 @@ struct Reference {
     return *this;
   }
 
-  LLVM_EXPLICIT operator bool() const { return Def || Use; }
+  explicit operator bool() const { return Def || Use; }
 
   // True if the register is defined or used in some form, either directly or
   // via a sub- or super-register.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index eff4ae3..a636b35 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -309,8 +309,9 @@ static void emitIncrement(MachineBasicBlock &MBB,
   }
 }
 
-void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();
+void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
+                                        MachineBasicBlock &MBB) const {
+  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
   MachineFrameInfo *MFFrame = MF.getFrameInfo();
   auto *ZII =
       static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index cefa56f..60bad89 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -40,7 +40,7 @@ public:
     override;
   void processFunctionBeforeFrameFinalized(MachineFunction &MF,
                                            RegScavenger *RS) const override;
-  void emitPrologue(MachineFunction &MF) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   bool hasFP(const MachineFunction &MF) const override;
   int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 5f84624..6399293 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -127,12 +127,11 @@ struct RxSBGOperands {
 };
 
 class SystemZDAGToDAGISel : public SelectionDAGISel {
-  const SystemZTargetLowering &Lowering;
-  const SystemZSubtarget &Subtarget;
+  const SystemZSubtarget *Subtarget;
 
   // Used by SystemZOperands.td to create integer constants.
   inline SDValue getImm(const SDNode *Node, uint64_t Imm) const {
-    return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
+    return CurDAG->getTargetConstant(Imm, SDLoc(Node), Node->getValueType(0));
   }
 
   const SystemZTargetMachine &getTargetMachine() const {
@@ -140,7 +139,7 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
   }
 
   const SystemZInstrInfo *getInstrInfo() const {
-    return getTargetMachine().getSubtargetImpl()->getInstrInfo();
+    return Subtarget->getInstrInfo();
   }
 
   // Try to fold more of the base or index of AM into AM, where IsBase
@@ -256,6 +255,13 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
                          Addr, Base, Disp, Index);
   }
 
+  // Try to match Addr as an address with a base, 12-bit displacement
+  // and index, where the index is element Elem of a vector.
+  // Return true on success, storing the base, displacement and vector
+  // in Base, Disp and Index respectively.
+  bool selectBDVAddr12Only(SDValue Addr, SDValue Elem, SDValue &Base,
+                           SDValue &Disp, SDValue &Index) const;
+
   // Check whether (or Op (and X InsertMask)) is effectively an insertion
   // of X into bits InsertMask of some Y != Op.  Return true if so and
   // set Op to that Y.
@@ -293,6 +299,12 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
   SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
                               uint64_t UpperVal, uint64_t LowerVal);
 
+  // Try to use gather instruction Opcode to implement vector insertion N.
+  SDNode *tryGather(SDNode *N, unsigned Opcode);
+
+  // Try to use scatter instruction Opcode to implement store Store.
+  SDNode *tryScatter(StoreSDNode *Store, unsigned Opcode);
+
   // Return true if Load and Store are loads and stores of the same size
   // and are guaranteed not to overlap.  Such operations can be implemented
   // using block (SS-format) instructions.
@@ -315,9 +327,12 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
 
 public:
   SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(TM, OptLevel),
-        Lowering(*TM.getSubtargetImpl()->getTargetLowering()),
-        Subtarget(*TM.getSubtargetImpl()) {}
+      : SelectionDAGISel(TM, OptLevel) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    Subtarget = &MF.getSubtarget<SystemZSubtarget>();
+    return SelectionDAGISel::runOnMachineFunction(MF);
+  }
 
   // Override MachineFunctionPass.
   const char *getPassName() const override {
@@ -326,7 +341,7 @@ public:
 
   // Override SelectionDAGISel.
   SDNode *Select(SDNode *Node) override;
-  bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
 
   // Include the pieces autogenerated from the target description.
@@ -594,7 +609,7 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
   }
 
   // Lower the displacement to a TargetConstant.
-  Disp = CurDAG->getTargetConstant(AM.Disp, VT);
+  Disp = CurDAG->getTargetConstant(AM.Disp, SDLoc(Base), VT);
 }
 
 void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
@@ -643,6 +658,30 @@ bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
   return true;
 }
 
+bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr, SDValue Elem,
+                                              SDValue &Base,
+                                              SDValue &Disp,
+                                              SDValue &Index) const {
+  SDValue Regs[2];
+  if (selectBDXAddr12Only(Addr, Regs[0], Disp, Regs[1]) &&
+      Regs[0].getNode() && Regs[1].getNode()) {
+    for (unsigned int I = 0; I < 2; ++I) {
+      Base = Regs[I];
+      Index = Regs[1 - I];
+      // We can't tell here whether the index vector has the right type
+      // for the access; the caller needs to do that instead.
+      if (Index.getOpcode() == ISD::ZERO_EXTEND)
+        Index = Index.getOperand(0);
+      if (Index.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+          Index.getOperand(1) == Elem) {
+        Index = Index.getOperand(0);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
                                                uint64_t InsertMask) const {
   // We're only interested in cases where the insertion is into some operand
@@ -862,6 +901,7 @@ SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const {
 }
 
 SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
+  SDLoc DL(N);
   EVT VT = N->getValueType(0);
   RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0));
   unsigned Count = 0;
@@ -887,7 +927,7 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
       // Force the new mask into the DAG, since it may include known-one bits.
       auto *MaskN = cast<ConstantSDNode>(N->getOperand(1).getNode());
       if (MaskN->getZExtValue() != RISBG.Mask) {
-        SDValue NewMask = CurDAG->getConstant(RISBG.Mask, VT);
+        SDValue NewMask = CurDAG->getConstant(RISBG.Mask, DL, VT);
         N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask);
         return SelectCode(N);
       }
@@ -896,22 +936,25 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
   }  
 
   unsigned Opcode = SystemZ::RISBG;
+  // Prefer RISBGN if available, since it does not clobber CC.
+  if (Subtarget->hasMiscellaneousExtensions())
+    Opcode = SystemZ::RISBGN;
   EVT OpcodeVT = MVT::i64;
-  if (VT == MVT::i32 && Subtarget.hasHighWord()) {
+  if (VT == MVT::i32 && Subtarget->hasHighWord()) {
     Opcode = SystemZ::RISBMux;
     OpcodeVT = MVT::i32;
     RISBG.Start &= 31;
     RISBG.End &= 31;
   }
   SDValue Ops[5] = {
-    getUNDEF(SDLoc(N), OpcodeVT),
-    convertTo(SDLoc(N), OpcodeVT, RISBG.Input),
-    CurDAG->getTargetConstant(RISBG.Start, MVT::i32),
-    CurDAG->getTargetConstant(RISBG.End | 128, MVT::i32),
-    CurDAG->getTargetConstant(RISBG.Rotate, MVT::i32)
+    getUNDEF(DL, OpcodeVT),
+    convertTo(DL, OpcodeVT, RISBG.Input),
+    CurDAG->getTargetConstant(RISBG.Start, DL, MVT::i32),
+    CurDAG->getTargetConstant(RISBG.End | 128, DL, MVT::i32),
+    CurDAG->getTargetConstant(RISBG.Rotate, DL, MVT::i32)
   };
-  N = CurDAG->getMachineNode(Opcode, SDLoc(N), OpcodeVT, Ops);
-  return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode();
+  N = CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops);
+  return convertTo(DL, VT, SDValue(N, 0)).getNode();
 }
 
 SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
@@ -943,19 +986,24 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
 
   // See whether we can avoid an AND in the first operand by converting
   // ROSBG to RISBG.
-  if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask))
+  if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) {
     Opcode = SystemZ::RISBG;
-           
+    // Prefer RISBGN if available, since it does not clobber CC.
+    if (Subtarget->hasMiscellaneousExtensions())
+      Opcode = SystemZ::RISBGN;
+  }
+
+  SDLoc DL(N);
   EVT VT = N->getValueType(0);
   SDValue Ops[5] = {
-    convertTo(SDLoc(N), MVT::i64, Op0),
-    convertTo(SDLoc(N), MVT::i64, RxSBG[I].Input),
-    CurDAG->getTargetConstant(RxSBG[I].Start, MVT::i32),
-    CurDAG->getTargetConstant(RxSBG[I].End, MVT::i32),
-    CurDAG->getTargetConstant(RxSBG[I].Rotate, MVT::i32)
+    convertTo(DL, MVT::i64, Op0),
+    convertTo(DL, MVT::i64, RxSBG[I].Input),
+    CurDAG->getTargetConstant(RxSBG[I].Start, DL, MVT::i32),
+    CurDAG->getTargetConstant(RxSBG[I].End, DL, MVT::i32),
+    CurDAG->getTargetConstant(RxSBG[I].Rotate, DL, MVT::i32)
   };
-  N = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, Ops);
-  return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode();
+  N = CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops);
+  return convertTo(DL, VT, SDValue(N, 0)).getNode();
 }
 
 SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
@@ -963,16 +1011,81 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
                                                  uint64_t LowerVal) {
   EVT VT = Node->getValueType(0);
   SDLoc DL(Node);
-  SDValue Upper = CurDAG->getConstant(UpperVal, VT);
+  SDValue Upper = CurDAG->getConstant(UpperVal, DL, VT);
   if (Op0.getNode())
     Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper);
   Upper = SDValue(Select(Upper.getNode()), 0);
 
-  SDValue Lower = CurDAG->getConstant(LowerVal, VT);
+  SDValue Lower = CurDAG->getConstant(LowerVal, DL, VT);
   SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower);
   return Or.getNode();
 }
 
+SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
+  SDValue ElemV = N->getOperand(2);
+  auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
+  if (!ElemN)
+    return 0;
+
+  unsigned Elem = ElemN->getZExtValue();
+  EVT VT = N->getValueType(0);
+  if (Elem >= VT.getVectorNumElements())
+    return 0;
+
+  auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1));
+  if (!Load || !Load->hasOneUse())
+    return 0;
+  if (Load->getMemoryVT().getSizeInBits() !=
+      Load->getValueType(0).getSizeInBits())
+    return 0;
+
+  SDValue Base, Disp, Index;
+  if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) ||
+      Index.getValueType() != VT.changeVectorElementTypeToInteger())
+    return 0;
+
+  SDLoc DL(Load);
+  SDValue Ops[] = {
+    N->getOperand(0), Base, Disp, Index,
+    CurDAG->getTargetConstant(Elem, DL, MVT::i32), Load->getChain()
+  };
+  SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops);
+  ReplaceUses(SDValue(Load, 1), SDValue(Res, 1));
+  return Res;
+}
+
+SDNode *SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) {
+  SDValue Value = Store->getValue();
+  if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return 0;
+  if (Store->getMemoryVT().getSizeInBits() !=
+      Value.getValueType().getSizeInBits())
+    return 0;
+
+  SDValue ElemV = Value.getOperand(1);
+  auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
+  if (!ElemN)
+    return 0;
+
+  SDValue Vec = Value.getOperand(0);
+  EVT VT = Vec.getValueType();
+  unsigned Elem = ElemN->getZExtValue();
+  if (Elem >= VT.getVectorNumElements())
+    return 0;
+
+  SDValue Base, Disp, Index;
+  if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) ||
+      Index.getValueType() != VT.changeVectorElementTypeToInteger())
+    return 0;
+
+  SDLoc DL(Store);
+  SDValue Ops[] = {
+    Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, DL, MVT::i32),
+    Store->getChain()
+  };
+  return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+}
+
 bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
                                                LoadSDNode *Load) const {
   // Check that the two memory operands have the same size.
@@ -1102,13 +1215,33 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
       uint64_t ConstCCMask =
         cast<ConstantSDNode>(CCMask.getNode())->getZExtValue();
       // Invert the condition.
-      CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask,
+      CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, SDLoc(Node),
                                    CCMask.getValueType());
       SDValue Op4 = Node->getOperand(4);
       Node = CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4);
     }
     break;
   }
+
+  case ISD::INSERT_VECTOR_ELT: {
+    EVT VT = Node->getValueType(0);
+    unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
+    if (ElemBitSize == 32)
+      ResNode = tryGather(Node, SystemZ::VGEF);
+    else if (ElemBitSize == 64)
+      ResNode = tryGather(Node, SystemZ::VGEG);
+    break;
+  }
+
+  case ISD::STORE: {
+    auto *Store = cast<StoreSDNode>(Node);
+    unsigned ElemBitSize = Store->getValue().getValueType().getSizeInBits();
+    if (ElemBitSize == 32)
+      ResNode = tryScatter(Store, SystemZ::VSCEF);
+    else if (ElemBitSize == 64)
+      ResNode = tryScatter(Store, SystemZ::VSCEG);
+    break;
+  }
   }
 
   // Select the default instruction
@@ -1127,18 +1260,29 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
 
 bool SystemZDAGToDAGISel::
 SelectInlineAsmMemoryOperand(const SDValue &Op,
-                             char ConstraintCode,
+                             unsigned ConstraintID,
                              std::vector<SDValue> &OutOps) {
-  assert(ConstraintCode == 'm' && "Unexpected constraint code");
-  // Accept addresses with short displacements, which are compatible
-  // with Q, R, S and T.  But keep the index operand for future expansion.
-  SDValue Base, Disp, Index;
-  if (!selectBDXAddr(SystemZAddressingMode::FormBD,
-                     SystemZAddressingMode::Disp12Only,
-                     Op, Base, Disp, Index))
-    return true;
-  OutOps.push_back(Base);
-  OutOps.push_back(Disp);
-  OutOps.push_back(Index);
-  return false;
+  switch(ConstraintID) {
+  default:
+    llvm_unreachable("Unexpected asm memory constraint");
+  case InlineAsm::Constraint_i:
+  case InlineAsm::Constraint_m:
+  case InlineAsm::Constraint_Q:
+  case InlineAsm::Constraint_R:
+  case InlineAsm::Constraint_S:
+  case InlineAsm::Constraint_T:
+    // Accept addresses with short displacements, which are compatible
+    // with Q, R, S and T.  But keep the index operand for future expansion.
+    SDValue Base, Disp, Index;
+    if (selectBDXAddr(SystemZAddressingMode::FormBD,
+                      SystemZAddressingMode::Disp12Only,
+                      Op, Base, Disp, Index)) {
+      OutOps.push_back(Base);
+      OutOps.push_back(Disp);
+      OutOps.push_back(Index);
+      return false;
+    }
+    break;
+  }
+  return true;
 }
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index f7ac1ca..24b5a41 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Intrinsics.h"
 #include <cctype>
 
 using namespace llvm;
@@ -80,9 +81,9 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
   return Op;
 }
 
-SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
-    : TargetLowering(tm),
-      Subtarget(tm.getSubtarget<SystemZSubtarget>()) {
+SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
+                                             const SystemZSubtarget &STI)
+    : TargetLowering(tm), Subtarget(STI) {
   MVT PtrVT = getPointerTy();
 
   // Set up the register classes.
@@ -90,13 +91,27 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
     addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
   else
     addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
-  addRegisterClass(MVT::i64,  &SystemZ::GR64BitRegClass);
-  addRegisterClass(MVT::f32,  &SystemZ::FP32BitRegClass);
-  addRegisterClass(MVT::f64,  &SystemZ::FP64BitRegClass);
+  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
+  if (Subtarget.hasVector()) {
+    addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
+    addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
+  } else {
+    addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
+    addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
+  }
   addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
 
+  if (Subtarget.hasVector()) {
+    addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
+    addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
+    addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
+    addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
+    addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
+    addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
+  }
+
   // Compute derived properties from the register classes
-  computeRegisterProperties();
+  computeRegisterProperties(Subtarget.getRegisterInfo());
 
   // Set up special registers.
   setExceptionPointerRegister(SystemZ::R6D);
@@ -110,7 +125,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
   setSchedulingPreference(Sched::RegPressure);
 
   setBooleanContents(ZeroOrOneBooleanContent);
-  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
   // Instructions are strings of 2-byte aligned 2-byte values.
   setMinFunctionAlignment(2);
@@ -163,8 +178,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
       // available, or if the operand is constant.
       setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
 
+      // Use POPCNT on z196 and above.
+      if (Subtarget.hasPopulationCount())
+        setOperationAction(ISD::CTPOP, VT, Custom);
+      else
+        setOperationAction(ISD::CTPOP, VT, Expand);
+
       // No special instructions for these.
-      setOperationAction(ISD::CTPOP,           VT, Expand);
       setOperationAction(ISD::CTTZ,            VT, Expand);
       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
@@ -244,6 +264,90 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
   // Handle prefetches with PFD or PFDRL.
   setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
 
+  for (MVT VT : MVT::vector_valuetypes()) {
+    // Assume by default that all vector operations need to be expanded.
+    for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
+      if (getOperationAction(Opcode, VT) == Legal)
+        setOperationAction(Opcode, VT, Expand);
+
+    // Likewise all truncating stores and extending loads.
+    for (MVT InnerVT : MVT::vector_valuetypes()) {
+      setTruncStoreAction(VT, InnerVT, Expand);
+      setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+      setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+      setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+    }
+
+    if (isTypeLegal(VT)) {
+      // These operations are legal for anything that can be stored in a
+      // vector register, even if there is no native support for the format
+      // as such.  In particular, we can do these for v4f32 even though there
+      // are no specific instructions for that format.
+      setOperationAction(ISD::LOAD, VT, Legal);
+      setOperationAction(ISD::STORE, VT, Legal);
+      setOperationAction(ISD::VSELECT, VT, Legal);
+      setOperationAction(ISD::BITCAST, VT, Legal);
+      setOperationAction(ISD::UNDEF, VT, Legal);
+
+      // Likewise, except that we need to replace the nodes with something
+      // more specific.
+      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+    }
+  }
+
+  // Handle integer vector types.
+  for (MVT VT : MVT::integer_vector_valuetypes()) {
+    if (isTypeLegal(VT)) {
+      // These operations have direct equivalents.
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
+      setOperationAction(ISD::ADD, VT, Legal);
+      setOperationAction(ISD::SUB, VT, Legal);
+      if (VT != MVT::v2i64)
+        setOperationAction(ISD::MUL, VT, Legal);
+      setOperationAction(ISD::AND, VT, Legal);
+      setOperationAction(ISD::OR, VT, Legal);
+      setOperationAction(ISD::XOR, VT, Legal);
+      setOperationAction(ISD::CTPOP, VT, Custom);
+      setOperationAction(ISD::CTTZ, VT, Legal);
+      setOperationAction(ISD::CTLZ, VT, Legal);
+      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
+
+      // Convert a GPR scalar to a vector by inserting it into element 0.
+      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+
+      // Use a series of unpacks for extensions.
+      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
+      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
+
+      // Detect shifts by a scalar amount and convert them into
+      // V*_BY_SCALAR.
+      setOperationAction(ISD::SHL, VT, Custom);
+      setOperationAction(ISD::SRA, VT, Custom);
+      setOperationAction(ISD::SRL, VT, Custom);
+
+      // At present ROTL isn't matched by DAGCombiner.  ROTR should be
+      // converted into ROTL.
+      setOperationAction(ISD::ROTL, VT, Expand);
+      setOperationAction(ISD::ROTR, VT, Expand);
+
+      // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
+      // and inverting the result as necessary.
+      setOperationAction(ISD::SETCC, VT, Custom);
+    }
+  }
+
+  if (Subtarget.hasVector()) {
+    // There should be no need to check for float types other than v2f64
+    // since <2 x f32> isn't a legal type.
+    setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+  }
+
   // Handle floating-point types.
   for (unsigned I = MVT::FIRST_FP_VALUETYPE;
        I <= MVT::LAST_FP_VALUETYPE;
@@ -269,6 +373,36 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
     }
   }
 
+  // Handle floating-point vector types.
+  if (Subtarget.hasVector()) {
+    // Scalar-to-vector conversion is just a subreg.
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+
+    // Some insertions and extractions can be done directly but others
+    // need to go via integers.
+    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
+
+    // These operations have direct equivalents.
+    setOperationAction(ISD::FADD, MVT::v2f64, Legal);
+    setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
+    setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
+    setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
+    setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+    setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+    setOperationAction(ISD::FABS, MVT::v2f64, Legal);
+    setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+    setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
+    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+    setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+    setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+    setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+  }
+
   // We have fused multiply-addition for f32 and f64 but not f128.
   setOperationAction(ISD::FMA, MVT::f32,  Legal);
   setOperationAction(ISD::FMA, MVT::f64,  Legal);
@@ -287,8 +421,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
 
   // We have 64-bit FPR<->GPR moves, but need special handling for
   // 32-bit forms.
-  setOperationAction(ISD::BITCAST, MVT::i32, Custom);
-  setOperationAction(ISD::BITCAST, MVT::f32, Custom);
+  if (!Subtarget.hasVector()) {
+    setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+    setOperationAction(ISD::BITCAST, MVT::f32, Custom);
+  }
 
   // VASTART and VACOPY need to deal with the SystemZ-specific varargs
   // structure, but VAEND is a no-op.
@@ -298,6 +434,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
 
   // Codes for which we want to perform some z-specific combinations.
   setTargetDAGCombine(ISD::SIGN_EXTEND);
+  setTargetDAGCombine(ISD::STORE);
+  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+  setTargetDAGCombine(ISD::FP_ROUND);
+
+  // Handle intrinsics.
+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
   // We want to use MVC in preference to even a single load/store pair.
   MaxStoresPerMemcpy = 0;
@@ -342,6 +485,16 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   return Imm.isZero() || Imm.isNegZero();
 }
 
+bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  // We can use CGFI or CLGFI.
+  return isInt<32>(Imm) || isUInt<32>(Imm);
+}
+
+bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+  // We can use ALGFI or SLGFI.
+  return isUInt<32>(Imm) || isUInt<32>(-Imm);
+}
+
 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
                                                            unsigned,
                                                            unsigned,
@@ -499,8 +652,10 @@ parseRegisterNumber(const std::string &Constraint,
   return std::make_pair(0U, nullptr);
 }
 
-std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const {
+std::pair<unsigned, const TargetRegisterClass *>
+SystemZTargetLowering::getRegForInlineAsmConstraint(
+    const TargetRegisterInfo *TRI, const std::string &Constraint,
+    MVT VT) const {
   if (Constraint.size() == 1) {
     // GCC Constraint Letters
     switch (Constraint[0]) {
@@ -557,7 +712,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const {
                                  SystemZMC::FP64Regs);
     }
   }
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
 
 void SystemZTargetLowering::
@@ -570,35 +725,35 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
     case 'I': // Unsigned 8-bit constant
       if (auto *C = dyn_cast<ConstantSDNode>(Op))
         if (isUInt<8>(C->getZExtValue()))
-          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
                                               Op.getValueType()));
       return;
 
     case 'J': // Unsigned 12-bit constant
       if (auto *C = dyn_cast<ConstantSDNode>(Op))
         if (isUInt<12>(C->getZExtValue()))
-          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
                                               Op.getValueType()));
       return;
 
     case 'K': // Signed 16-bit constant
       if (auto *C = dyn_cast<ConstantSDNode>(Op))
         if (isInt<16>(C->getSExtValue()))
-          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
                                               Op.getValueType()));
       return;
 
     case 'L': // Signed 20-bit displacement (on all targets we support)
       if (auto *C = dyn_cast<ConstantSDNode>(Op))
         if (isInt<20>(C->getSExtValue()))
-          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
                                               Op.getValueType()));
       return;
 
     case 'M': // 0x7fffffff
       if (auto *C = dyn_cast<ConstantSDNode>(Op))
         if (C->getZExtValue() == 0x7fffffff)
-          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
                                               Op.getValueType()));
       return;
     }
@@ -623,6 +778,24 @@ bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
   return true;
 }
 
+// We do not yet support 128-bit single-element vector types.  If the user
+// attempts to use such types as function argument or return type, prefer
+// to error out instead of emitting code violating the ABI.
+static void VerifyVectorType(MVT VT, EVT ArgVT) {
+  if (ArgVT.isVector() && !VT.isVector())
+    report_fatal_error("Unsupported vector argument or return type");
+}
+
+static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
+  for (unsigned i = 0; i < Ins.size(); ++i)
+    VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
+}
+
+static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+  for (unsigned i = 0; i < Outs.size(); ++i)
+    VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
+}
+
 // Value is a value that has been passed to us in the location described by VA
 // (and so has type VA.getLocVT()).  Convert Value to VA.getValVT(), chaining
 // any loads onto Chain.
@@ -643,7 +816,15 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
   else if (VA.getLocInfo() == CCValAssign::Indirect)
     Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
                         MachinePointerInfo(), false, false, false, 0);
-  else
+  else if (VA.getLocInfo() == CCValAssign::BCvt) {
+    // If this is a short vector argument loaded from the stack,
+    // extend from i64 to full vector size and then bitcast.
+    assert(VA.getLocVT() == MVT::i64);
+    assert(VA.getValVT().isVector());
+    Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64,
+                        Value, DAG.getUNDEF(MVT::i64));
+    Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
+  } else
     assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
   return Value;
 }
@@ -660,6 +841,14 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL,
     return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
   case CCValAssign::AExt:
     return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
+  case CCValAssign::BCvt:
+    // If this is a short vector argument to be stored to the stack,
+    // bitcast to v2i64 and then extract first element.
+    assert(VA.getLocVT() == MVT::i64);
+    assert(VA.getValVT().isVector());
+    Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
+                       DAG.getConstant(0, DL, MVT::i32));
   case CCValAssign::Full:
     return Value;
   default:
@@ -676,13 +865,17 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   SystemZMachineFunctionInfo *FuncInfo =
-    MF.getInfo<SystemZMachineFunctionInfo>();
-  auto *TFL = static_cast<const SystemZFrameLowering *>(
-      DAG.getSubtarget().getFrameLowering());
+      MF.getInfo<SystemZMachineFunctionInfo>();
+  auto *TFL =
+      static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+
+  // Detect unsupported vector argument types.
+  if (Subtarget.hasVector())
+    VerifyVectorTypes(Ins);
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
   CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
 
   unsigned NumFixedGPRs = 0;
@@ -714,6 +907,14 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
         NumFixedFPRs += 1;
         RC = &SystemZ::FP64BitRegClass;
         break;
+      case MVT::v16i8:
+      case MVT::v8i16:
+      case MVT::v4i32:
+      case MVT::v2i64:
+      case MVT::v4f32:
+      case MVT::v2f64:
+        RC = &SystemZ::VR128BitRegClass;
+        break;
       }
 
       unsigned VReg = MRI.createVirtualRegister(RC);
@@ -732,7 +933,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
       EVT PtrVT = getPointerTy();
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
-        FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4));
+        FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
+                          DAG.getIntPtrConstant(4, DL));
       ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
                              MachinePointerInfo::getFixedStack(FI),
                              false, false, false, 0);
@@ -818,9 +1020,15 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
   MachineFunction &MF = DAG.getMachineFunction();
   EVT PtrVT = getPointerTy();
 
+  // Detect unsupported vector argument and return types.
+  if (Subtarget.hasVector()) {
+    VerifyVectorTypes(Outs);
+    VerifyVectorTypes(Ins);
+  }
+
   // Analyze the operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
   ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
 
   // We don't support GuaranteedTailCallOpt, only automatically-detected
@@ -833,7 +1041,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   // Mark the start of the call.
   if (!IsTailCall)
-    Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true),
+    Chain = DAG.getCALLSEQ_START(Chain,
+                                 DAG.getConstant(NumBytes, DL, PtrVT, true),
                                  DL);
 
   // Copy argument values to their designated locations.
@@ -869,7 +1078,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
         Offset += 4;
       SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
-                                    DAG.getIntPtrConstant(Offset));
+                                    DAG.getIntPtrConstant(Offset, DL));
 
       // Emit the store.
       MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address,
@@ -917,9 +1126,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
                                   RegsToPass[I].second.getValueType()));
 
   // Add a register mask operand representing the call-preserved registers.
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
 
@@ -936,8 +1144,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   // Mark the end of the call, which is glued to the call itself.
   Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getConstant(NumBytes, PtrVT, true),
-                             DAG.getConstant(0, PtrVT, true),
+                             DAG.getConstant(NumBytes, DL, PtrVT, true),
+                             DAG.getConstant(0, DL, PtrVT, true),
                              Glue, DL);
   Glue = Chain.getValue(1);
 
@@ -972,6 +1180,10 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
                                    SDLoc DL, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
 
+  // Detect unsupported vector return types.
+  if (Subtarget.hasVector())
+    VerifyVectorTypes(Outs);
+
   // Assign locations to each returned value.
   SmallVector<CCValAssign, 16> RetLocs;
   CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
@@ -1015,6 +1227,207 @@ prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const {
   return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
 }
 
+// Return true if Op is an intrinsic node with chain that returns the CC value
+// as its only (other) argument.  Provide the associated SystemZISD opcode and
+// the mask of valid CC values if so.
+static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
+                                      unsigned &CCValid) {
+  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  switch (Id) {
+  case Intrinsic::s390_tbegin:
+    Opcode = SystemZISD::TBEGIN;
+    CCValid = SystemZ::CCMASK_TBEGIN;
+    return true;
+
+  case Intrinsic::s390_tbegin_nofloat:
+    Opcode = SystemZISD::TBEGIN_NOFLOAT;
+    CCValid = SystemZ::CCMASK_TBEGIN;
+    return true;
+
+  case Intrinsic::s390_tend:
+    Opcode = SystemZISD::TEND;
+    CCValid = SystemZ::CCMASK_TEND;
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+// Return true if Op is an intrinsic node without chain that returns the
+// CC value as its final argument.  Provide the associated SystemZISD
+// opcode and the mask of valid CC values if so.
+static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
+  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  switch (Id) {
+  case Intrinsic::s390_vpkshs:
+  case Intrinsic::s390_vpksfs:
+  case Intrinsic::s390_vpksgs:
+    Opcode = SystemZISD::PACKS_CC;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vpklshs:
+  case Intrinsic::s390_vpklsfs:
+  case Intrinsic::s390_vpklsgs:
+    Opcode = SystemZISD::PACKLS_CC;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vceqbs:
+  case Intrinsic::s390_vceqhs:
+  case Intrinsic::s390_vceqfs:
+  case Intrinsic::s390_vceqgs:
+    Opcode = SystemZISD::VICMPES;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vchbs:
+  case Intrinsic::s390_vchhs:
+  case Intrinsic::s390_vchfs:
+  case Intrinsic::s390_vchgs:
+    Opcode = SystemZISD::VICMPHS;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vchlbs:
+  case Intrinsic::s390_vchlhs:
+  case Intrinsic::s390_vchlfs:
+  case Intrinsic::s390_vchlgs:
+    Opcode = SystemZISD::VICMPHLS;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vtm:
+    Opcode = SystemZISD::VTM;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vfaebs:
+  case Intrinsic::s390_vfaehs:
+  case Intrinsic::s390_vfaefs:
+    Opcode = SystemZISD::VFAE_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vfaezbs:
+  case Intrinsic::s390_vfaezhs:
+  case Intrinsic::s390_vfaezfs:
+    Opcode = SystemZISD::VFAEZ_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vfeebs:
+  case Intrinsic::s390_vfeehs:
+  case Intrinsic::s390_vfeefs:
+    Opcode = SystemZISD::VFEE_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vfeezbs:
+  case Intrinsic::s390_vfeezhs:
+  case Intrinsic::s390_vfeezfs:
+    Opcode = SystemZISD::VFEEZ_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vfenebs:
+  case Intrinsic::s390_vfenehs:
+  case Intrinsic::s390_vfenefs:
+    Opcode = SystemZISD::VFENE_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vfenezbs:
+  case Intrinsic::s390_vfenezhs:
+  case Intrinsic::s390_vfenezfs:
+    Opcode = SystemZISD::VFENEZ_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vistrbs:
+  case Intrinsic::s390_vistrhs:
+  case Intrinsic::s390_vistrfs:
+    Opcode = SystemZISD::VISTR_CC;
+    CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
+    return true;
+
+  case Intrinsic::s390_vstrcbs:
+  case Intrinsic::s390_vstrchs:
+  case Intrinsic::s390_vstrcfs:
+    Opcode = SystemZISD::VSTRC_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vstrczbs:
+  case Intrinsic::s390_vstrczhs:
+  case Intrinsic::s390_vstrczfs:
+    Opcode = SystemZISD::VSTRCZ_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vfcedbs:
+    Opcode = SystemZISD::VFCMPES;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vfchdbs:
+    Opcode = SystemZISD::VFCMPHS;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vfchedbs:
+    Opcode = SystemZISD::VFCMPHES;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vftcidb:
+    Opcode = SystemZISD::VFTCI;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+// Emit an intrinsic with chain with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
+                                             unsigned Opcode) {
+  // Copy all operands except the intrinsic ID.
+  unsigned NumOps = Op.getNumOperands();
+  SmallVector<SDValue, 6> Ops;
+  Ops.reserve(NumOps - 1);
+  Ops.push_back(Op.getOperand(0));
+  for (unsigned I = 2; I < NumOps; ++I)
+    Ops.push_back(Op.getOperand(I));
+
+  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+  SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+  SDValue OldChain = SDValue(Op.getNode(), 1);
+  SDValue NewChain = SDValue(Intr.getNode(), 0);
+  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
+  return Intr;
+}
+
+// Emit an intrinsic with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op,
+                                     unsigned Opcode) {
+  // Copy all operands except the intrinsic ID.
+  unsigned NumOps = Op.getNumOperands();
+  SmallVector<SDValue, 6> Ops;
+  Ops.reserve(NumOps - 1);
+  for (unsigned I = 1; I < NumOps; ++I)
+    Ops.push_back(Op.getOperand(I));
+
+  if (Op->getNumValues() == 1)
+    return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops);
+  assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result");
+  SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue);
+  return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+}
+
 // CC is a comparison that will be implemented using an integer or
 // floating-point comparison.  Return the condition code mask for
 // a branch on true.  In the integer case, CCMASK_CMP_UO is set for
@@ -1112,7 +1525,7 @@ static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
 
 // If C can be converted to a comparison against zero, adjust the operands
 // as necessary.
-static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) {
+static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
   if (C.ICmpType == SystemZICMP::UnsignedOnly)
     return;
 
@@ -1126,13 +1539,13 @@ static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) {
       (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
       (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
     C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
-    C.Op1 = DAG.getConstant(0, C.Op1.getValueType());
+    C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
   }
 }
 
 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
 // adjust the operands as necessary.
-static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) {
+static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
   // For us to make any changes, it must a comparison between a single-use
   // load and a constant.
   if (!C.Op0.hasOneUse() ||
@@ -1197,7 +1610,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) {
   // Make sure that the second operand is an i32 with the right value.
   if (C.Op1.getValueType() != MVT::i32 ||
       Value != ConstOp1->getZExtValue())
-    C.Op1 = DAG.getConstant(Value, MVT::i32);
+    C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
 }
 
 // Return true if Op is either an unextended load, or a load suitable
@@ -1293,7 +1706,7 @@ static unsigned reverseCCMask(unsigned CCMask) {
 // Check whether C tests for equality between X and Y and whether X - Y
 // or Y - X is also computed.  In that case it's better to compare the
 // result of the subtraction against zero.
-static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) {
+static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
   if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
       C.CCMask == SystemZ::CCMASK_CMP_NE) {
     for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
@@ -1302,7 +1715,7 @@ static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) {
           ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
            (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
         C.Op0 = SDValue(N, 0);
-        C.Op1 = DAG.getConstant(0, N->getValueType(0));
+        C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
         return;
       }
     }
@@ -1358,7 +1771,7 @@ static void adjustForLTGFR(Comparison &C) {
 // If C compares the truncation of an extending load, try to compare
 // the untruncated value instead.  This exposes more opportunities to
 // reuse CC.
-static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) {
+static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
   if (C.Op0.getOpcode() == ISD::TRUNCATE &&
       C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
       C.Op1.getOpcode() == ISD::Constant &&
@@ -1370,7 +1783,7 @@ static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) {
       if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
           (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
         C.Op0 = C.Op0.getOperand(0);
-        C.Op1 = DAG.getConstant(0, C.Op0.getValueType());
+        C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
       }
     }
   }
@@ -1489,7 +1902,7 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
 
 // See whether C can be implemented as a TEST UNDER MASK instruction.
 // Update the arguments with the TM version if so.
-static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
+static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
   // Check that we have a comparison with a constant.
   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
   if (!ConstOp1)
@@ -1529,6 +1942,8 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
     MaskVal = -(CmpVal & -CmpVal);
     NewC.ICmpType = SystemZICMP::UnsignedOnly;
   }
+  if (!MaskVal)
+    return;
 
   // Check whether the combination of mask, comparison value and comparison
   // type are suitable.
@@ -1565,14 +1980,62 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
   if (Mask && Mask->getZExtValue() == MaskVal)
     C.Op1 = SDValue(Mask, 0);
   else
-    C.Op1 = DAG.getConstant(MaskVal, C.Op0.getValueType());
+    C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
   C.CCValid = SystemZ::CCMASK_TM;
   C.CCMask = NewCCMask;
 }
 
+// Return a Comparison that tests the condition-code result of intrinsic
+// node Call against constant integer CC using comparison code Cond.
+// Opcode is the opcode of the SystemZISD operation for the intrinsic
+// and CCValid is the set of possible condition-code results.
+static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
+                                  SDValue Call, unsigned CCValid, uint64_t CC,
+                                  ISD::CondCode Cond) {
+  Comparison C(Call, SDValue());
+  C.Opcode = Opcode;
+  C.CCValid = CCValid;
+  if (Cond == ISD::SETEQ)
+    // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
+    C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
+  else if (Cond == ISD::SETNE)
+    // ...and the inverse of that.
+    C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
+  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
+    // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
+    // always true for CC>3.
+    C.CCMask = CC < 4 ? -1 << (4 - CC) : -1;
+  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
+    // ...and the inverse of that.
+    C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0;
+  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
+    // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
+    // always true for CC>3.
+    C.CCMask = CC < 4 ? -1 << (3 - CC) : -1;
+  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
+    // ...and the inverse of that.
+    C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0;
+  else
+    llvm_unreachable("Unexpected integer comparison type");
+  C.CCMask &= CCValid;
+  return C;
+}
+
 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
-                         ISD::CondCode Cond) {
+                         ISD::CondCode Cond, SDLoc DL) {
+  if (CmpOp1.getOpcode() == ISD::Constant) {
+    uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
+    unsigned Opcode, CCValid;
+    if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+        CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
+        isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
+      return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+    if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+        CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
+        isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
+      return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+  }
   Comparison C(CmpOp0, CmpOp1);
   C.CCMask = CCMaskForCondCode(Cond);
   if (C.Op0.getValueType().isFloatingPoint()) {
@@ -1596,11 +2059,11 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
     else
       C.ICmpType = SystemZICMP::SignedOnly;
     C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
-    adjustZeroCmp(DAG, C);
-    adjustSubwordCmp(DAG, C);
-    adjustForSubtraction(DAG, C);
+    adjustZeroCmp(DAG, DL, C);
+    adjustSubwordCmp(DAG, DL, C);
+    adjustForSubtraction(DAG, DL, C);
     adjustForLTGFR(C);
-    adjustICmpTruncate(DAG, C);
+    adjustICmpTruncate(DAG, DL, C);
   }
 
   if (shouldSwapCmpOperands(C)) {
@@ -1608,20 +2071,34 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
     C.CCMask = reverseCCMask(C.CCMask);
   }
 
-  adjustForTestUnderMask(DAG, C);
+  adjustForTestUnderMask(DAG, DL, C);
   return C;
 }
 
 // Emit the comparison instruction described by C.
 static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+  if (!C.Op1.getNode()) {
+    SDValue Op;
+    switch (C.Op0.getOpcode()) {
+    case ISD::INTRINSIC_W_CHAIN:
+      Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
+      break;
+    case ISD::INTRINSIC_WO_CHAIN:
+      Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode);
+      break;
+    default:
+      llvm_unreachable("Invalid comparison operands");
+    }
+    return SDValue(Op.getNode(), Op->getNumValues() - 1);
+  }
   if (C.Opcode == SystemZISD::ICMP)
     return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
-                       DAG.getConstant(C.ICmpType, MVT::i32));
+                       DAG.getConstant(C.ICmpType, DL, MVT::i32));
   if (C.Opcode == SystemZISD::TM) {
     bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
                          bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
     return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1,
-                       DAG.getConstant(RegisterOnly, MVT::i32));
+                       DAG.getConstant(RegisterOnly, DL, MVT::i32));
   }
   return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1);
 }
@@ -1635,7 +2112,8 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
   Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
   Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
   SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
-  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64));
+  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
+                   DAG.getConstant(32, DL, MVT::i64));
   Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
   Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
 }
@@ -1667,46 +2145,175 @@ static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue,
 
   if (Conversion.XORValue)
     Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
-                         DAG.getConstant(Conversion.XORValue, MVT::i32));
+                         DAG.getConstant(Conversion.XORValue, DL, MVT::i32));
 
   if (Conversion.AddValue)
     Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
-                         DAG.getConstant(Conversion.AddValue, MVT::i32));
+                         DAG.getConstant(Conversion.AddValue, DL, MVT::i32));
 
   // The SHR/AND sequence should get optimized to an RISBG.
   Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
-                       DAG.getConstant(Conversion.Bit, MVT::i32));
+                       DAG.getConstant(Conversion.Bit, DL, MVT::i32));
   if (Conversion.Bit != 31)
     Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
-                         DAG.getConstant(1, MVT::i32));
+                         DAG.getConstant(1, DL, MVT::i32));
   return Result;
 }
 
+// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
+// be done directly.  IsFP is true if CC is for a floating-point rather than
+// integer comparison.
+static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
+  switch (CC) {
+  case ISD::SETOEQ:
+  case ISD::SETEQ:
+    return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
+
+  case ISD::SETOGE:
+  case ISD::SETGE:
+    return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
+
+  case ISD::SETOGT:
+  case ISD::SETGT:
+    return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
+
+  case ISD::SETUGT:
+    return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
+
+  default:
+    return 0;
+  }
+}
+
+// Return the SystemZISD vector comparison operation for CC or its inverse,
+// or 0 if neither can be done directly.  Indicate in Invert whether the
+// result is for the inverse of CC.  IsFP is true if CC is for a
+// floating-point rather than integer comparison.
+static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
+                                            bool &Invert) {
+  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
+    Invert = false;
+    return Opcode;
+  }
+
+  CC = ISD::getSetCCInverse(CC, !IsFP);
+  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
+    Invert = true;
+    return Opcode;
+  }
+
+  return 0;
+}
+
+// Return a v2f64 that contains the extended form of elements Start and Start+1
+// of v4f32 value Op.
+static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL,
+                                  SDValue Op) {
+  int Mask[] = { Start, -1, Start + 1, -1 };
+  Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
+  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
+}
+
+// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
+// producing a result of type VT.
+static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL,
+                            EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
+  // There is no hardware support for v4f32, so extend the vector into
+  // two v2f64s and compare those.
+  if (CmpOp0.getValueType() == MVT::v4f32) {
+    SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
+    SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
+    SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
+    SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
+    SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
+    SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
+    return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
+  }
+  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
+}
+
+// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
+// an integer mask of type VT.
+static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT,
+                                ISD::CondCode CC, SDValue CmpOp0,
+                                SDValue CmpOp1) {
+  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
+  bool Invert = false;
+  SDValue Cmp;
+  switch (CC) {
+    // Handle tests for order using (or (ogt y x) (oge x y)).
+  case ISD::SETUO:
+    Invert = true;
+  case ISD::SETO: {
+    assert(IsFP && "Unexpected integer comparison");
+    SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
+    SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
+    Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
+    break;
+  }
+
+    // Handle <> tests using (or (ogt y x) (ogt x y)).
+  case ISD::SETUEQ:
+    Invert = true;
+  case ISD::SETONE: {
+    assert(IsFP && "Unexpected integer comparison");
+    SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
+    SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
+    Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
+    break;
+  }
+
+    // Otherwise a single comparison is enough.  It doesn't really
+    // matter whether we try the inversion or the swap first, since
+    // there are no cases where both work.
+  default:
+    if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
+      Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
+    else {
+      CC = ISD::getSetCCSwappedOperands(CC);
+      if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
+        Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
+      else
+        llvm_unreachable("Unhandled comparison");
+    }
+    break;
+  }
+  if (Invert) {
+    SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
+                               DAG.getConstant(65535, DL, MVT::i32));
+    Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
+    Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
+  }
+  return Cmp;
+}
+
 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
                                           SelectionDAG &DAG) const {
   SDValue CmpOp0   = Op.getOperand(0);
   SDValue CmpOp1   = Op.getOperand(1);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
   SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  if (VT.isVector())
+    return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
 
-  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
+  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
   SDValue Glue = emitCmp(DAG, DL, C);
   return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
 }
 
 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
-  SDValue Chain    = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue CmpOp0   = Op.getOperand(2);
   SDValue CmpOp1   = Op.getOperand(3);
   SDValue Dest     = Op.getOperand(4);
   SDLoc DL(Op);
 
-  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
+  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
   SDValue Glue = emitCmp(DAG, DL, C);
   return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
-                     Chain, DAG.getConstant(C.CCValid, MVT::i32),
-                     DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue);
+                     Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
+                     DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue);
 }
 
 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
@@ -1727,7 +2334,7 @@ static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op,
   Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
   if (IsNegative)
     Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
-                     DAG.getConstant(0, Op.getValueType()), Op);
+                     DAG.getConstant(0, DL, Op.getValueType()), Op);
   return Op;
 }
 
@@ -1740,7 +2347,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
   SDLoc DL(Op);
 
-  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
+  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
 
   // Check for absolute and negative-absolute selections, including those
   // where the comparison value is sign-extended (for LPGFR and LNGFR).
@@ -1775,18 +2382,14 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
       if (!is32Bit(VT))
         Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result);
       // Sign-extend from the low bit.
-      SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, MVT::i32);
+      SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32);
       SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt);
       return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt);
     }
   }
 
-  SmallVector<SDValue, 5> Ops;
-  Ops.push_back(TrueOp);
-  Ops.push_back(FalseOp);
-  Ops.push_back(DAG.getConstant(C.CCValid, MVT::i32));
-  Ops.push_back(DAG.getConstant(C.CCMask, MVT::i32));
-  Ops.push_back(Glue);
+  SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
+                   DAG.getConstant(C.CCMask, DL, MVT::i32), Glue};
 
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
   return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
@@ -1826,11 +2429,58 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
   // addition for it.
   if (Offset != 0)
     Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
-                         DAG.getConstant(Offset, PtrVT));
+                         DAG.getConstant(Offset, DL, PtrVT));
 
   return Result;
 }
 
+SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
+                                                 SelectionDAG &DAG,
+                                                 unsigned Opcode,
+                                                 SDValue GOTOffset) const {
+  SDLoc DL(Node);
+  EVT PtrVT = getPointerTy();
+  SDValue Chain = DAG.getEntryNode();
+  SDValue Glue;
+
+  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
+  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
+  Glue = Chain.getValue(1);
+  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
+  Glue = Chain.getValue(1);
+
+  // The first call operand is the chain and the second is the TLS symbol.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
+                                           Node->getValueType(0),
+                                           0, 0));
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
+  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
+
+  // Add a register mask operand representing the call-preserved registers.
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  const uint32_t *Mask =
+      TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
+
+  // Glue the call to the argument copies.
+  Ops.push_back(Glue);
+
+  // Emit the call.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
+  Glue = Chain.getValue(1);
+
+  // Copy the return value from %r2.
+  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
+}
+
 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
 						     SelectionDAG &DAG) const {
   SDLoc DL(Node);
@@ -1838,33 +2488,94 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
   EVT PtrVT = getPointerTy();
   TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
 
-  if (model != TLSModel::LocalExec)
-    llvm_unreachable("only local-exec TLS mode supported");
-
   // The high part of the thread pointer is in access register 0.
   SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
-                             DAG.getConstant(0, MVT::i32));
+                             DAG.getConstant(0, DL, MVT::i32));
   TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
 
   // The low part of the thread pointer is in access register 1.
   SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
-                             DAG.getConstant(1, MVT::i32));
+                             DAG.getConstant(1, DL, MVT::i32));
   TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
 
   // Merge them into a single 64-bit address.
   SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
-				    DAG.getConstant(32, PtrVT));
+                                    DAG.getConstant(32, DL, PtrVT));
   SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
 
-  // Get the offset of GA from the thread pointer.
-  SystemZConstantPoolValue *CPV =
-    SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
+  // Get the offset of GA from the thread pointer, based on the TLS model.
+  SDValue Offset;
+  switch (model) {
+    case TLSModel::GeneralDynamic: {
+      // Load the GOT offset of the tls_index (module ID / per-symbol offset).
+      SystemZConstantPoolValue *CPV =
+        SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
+
+      Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+                           Offset, MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+
+      // Call __tls_get_offset to retrieve the offset.
+      Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
+      break;
+    }
+
+    case TLSModel::LocalDynamic: {
+      // Load the GOT offset of the module ID.
+      SystemZConstantPoolValue *CPV =
+        SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
+
+      Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+                           Offset, MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
 
-  // Force the offset into the constant pool and load it from there.
-  SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8);
-  SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
-			       CPAddr, MachinePointerInfo::getConstantPool(),
-			       false, false, false, 0);
+      // Call __tls_get_offset to retrieve the module base offset.
+      Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
+
+      // Note: The SystemZLDCleanupPass will remove redundant computations
+      // of the module base offset.  Count total number of local-dynamic
+      // accesses to trigger execution of that pass.
+      SystemZMachineFunctionInfo* MFI =
+        DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
+      MFI->incNumLocalDynamicTLSAccesses();
+
+      // Add the per-symbol offset.
+      CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
+
+      SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
+      DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+                              DTPOffset, MachinePointerInfo::getConstantPool(),
+                              false, false, false, 0);
+
+      Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
+      break;
+    }
+
+    case TLSModel::InitialExec: {
+      // Load the offset from the GOT.
+      Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+                                          SystemZII::MO_INDNTPOFF);
+      Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
+      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+                           Offset, MachinePointerInfo::getGOT(),
+                           false, false, false, 0);
+      break;
+    }
+
+    case TLSModel::LocalExec: {
+      // Force the offset into the constant pool and load it from there.
+      SystemZConstantPoolValue *CPV =
+        SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
+
+      Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+                           Offset, MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+      break;
+    }
+  }
 
   // Add the base and offset together.
   return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
@@ -1916,6 +2627,13 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
   EVT InVT = In.getValueType();
   EVT ResVT = Op.getValueType();
 
+  // Convert loads directly.  This is normally done by DAGCombiner,
+  // but we need this case for bitcasts that are created during lowering
+  // and which are then lowered themselves.
+  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
+    return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
+                       LoadN->getMemOperand());
+
   if (InVT == MVT::i32 && ResVT == MVT::f32) {
     SDValue In64;
     if (Subtarget.hasHighWord()) {
@@ -1926,22 +2644,22 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
     } else {
       In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
       In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
-                         DAG.getConstant(32, MVT::i64));
+                         DAG.getConstant(32, DL, MVT::i64));
     }
     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
-    return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
+    return DAG.getTargetExtractSubreg(SystemZ::subreg_r32,
                                       DL, MVT::f32, Out64);
   }
   if (InVT == MVT::f32 && ResVT == MVT::i32) {
     SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
-    SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
+    SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL,
                                              MVT::f64, SDValue(U64, 0), In);
     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
     if (Subtarget.hasHighWord())
       return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
                                         MVT::i32, Out64);
     SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
-                                DAG.getConstant(32, MVT::i64));
+                                DAG.getConstant(32, DL, MVT::i64));
     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
   }
   llvm_unreachable("Unexpected bitcast combination");
@@ -1962,8 +2680,8 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
   // The initial values of each field.
   const unsigned NumFields = 4;
   SDValue Fields[NumFields] = {
-    DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), PtrVT),
-    DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), PtrVT),
+    DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
+    DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
     DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
     DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
   };
@@ -1975,7 +2693,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
     SDValue FieldAddr = Addr;
     if (Offset != 0)
       FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
-                              DAG.getIntPtrConstant(Offset));
+                              DAG.getIntPtrConstant(Offset, DL));
     MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
                              MachinePointerInfo(SV, Offset),
                              false, false, 0);
@@ -1993,8 +2711,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
   SDLoc DL(Op);
 
-  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32),
+  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
                        /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
+                       /*isTailCall*/false,
                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
@@ -2049,7 +2768,7 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
     // multiplication:
     //
     //   (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
-    SDValue C63 = DAG.getConstant(63, MVT::i64);
+    SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
     SDValue LL = Op.getOperand(0);
     SDValue RL = Op.getOperand(1);
     SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
@@ -2187,6 +2906,81 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
                                    MVT::i64, HighOp, Low32);
 }
 
+SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  SDLoc DL(Op);
+  Op = Op.getOperand(0);
+
+  // Handle vector types via VPOPCT.
+  if (VT.isVector()) {
+    Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
+    Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
+    switch (VT.getVectorElementType().getSizeInBits()) {
+    case 8:
+      break;
+    case 16: {
+      Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
+      SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
+      SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
+      Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
+      Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
+      break;
+    }
+    case 32: {
+      SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
+                                DAG.getConstant(0, DL, MVT::i32));
+      Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
+      break;
+    }
+    case 64: {
+      SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
+                                DAG.getConstant(0, DL, MVT::i32));
+      Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
+      Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
+      break;
+    }
+    default:
+      llvm_unreachable("Unexpected type");
+    }
+    return Op;
+  }
+
+  // Get the known-zero mask for the operand.
+  APInt KnownZero, KnownOne;
+  DAG.computeKnownBits(Op, KnownZero, KnownOne);
+  unsigned NumSignificantBits = (~KnownZero).getActiveBits();
+  if (NumSignificantBits == 0)
+    return DAG.getConstant(0, DL, VT);
+
+  // Skip known-zero high parts of the operand.
+  int64_t OrigBitSize = VT.getSizeInBits();
+  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
+  BitSize = std::min(BitSize, OrigBitSize);
+
+  // The POPCNT instruction counts the number of bits in each byte.
+  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
+  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
+  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+
+  // Add up per-byte counts in a binary tree.  All bits of Op at
+  // position larger than BitSize remain zero throughout.
+  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
+    SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
+    if (BitSize != OrigBitSize)
+      Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
+                        DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
+    Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
+  }
+
+  // Extract overall result from high byte.
+  if (BitSize > 8)
+    Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+                     DAG.getConstant(BitSize - 8, DL, VT));
+
+  return Op;
+}
+
 // Op is an atomic load.  Lower it into a normal volatile load.
 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
                                                 SelectionDAG &DAG) const {
@@ -2233,23 +3027,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
   if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
     if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
       Opcode = SystemZISD::ATOMIC_LOADW_ADD;
-      Src2 = DAG.getConstant(-Const->getSExtValue(), Src2.getValueType());
+      Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
     }
 
   // Get the address of the containing word.
   SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
-                                    DAG.getConstant(-4, PtrVT));
+                                    DAG.getConstant(-4, DL, PtrVT));
 
   // Get the number of bits that the word must be rotated left in order
   // to bring the field to the top bits of a GR32.
   SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
-                                 DAG.getConstant(3, PtrVT));
+                                 DAG.getConstant(3, DL, PtrVT));
   BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
 
   // Get the complementing shift amount, for rotating a field in the top
   // bits back to its proper position.
   SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
-                                    DAG.getConstant(0, WideVT), BitShift);
+                                    DAG.getConstant(0, DL, WideVT), BitShift);
 
   // Extend the source operand to 32 bits and prepare it for the inner loop.
   // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
@@ -2258,23 +3052,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
   // bits must be set, while for other opcodes they should be left clear.
   if (Opcode != SystemZISD::ATOMIC_SWAPW)
     Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
-                       DAG.getConstant(32 - BitSize, WideVT));
+                       DAG.getConstant(32 - BitSize, DL, WideVT));
   if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
       Opcode == SystemZISD::ATOMIC_LOADW_NAND)
     Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
-                       DAG.getConstant(uint32_t(-1) >> BitSize, WideVT));
+                       DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
 
   // Construct the ATOMIC_LOADW_* node.
   SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
   SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
-                    DAG.getConstant(BitSize, WideVT) };
+                    DAG.getConstant(BitSize, DL, WideVT) };
   SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
                                              NarrowVT, MMO);
 
   // Rotate the result of the final CS so that the field is in the lower
   // bits of a GR32, then truncate it.
   SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
-                                    DAG.getConstant(BitSize, WideVT));
+                                    DAG.getConstant(BitSize, DL, WideVT));
   SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
 
   SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
@@ -2300,10 +3094,10 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
       // available or the negative value is in the range of A(G)FHI.
       int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
       if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
-        NegSrc2 = DAG.getConstant(Value, MemVT);
+        NegSrc2 = DAG.getConstant(Value, DL, MemVT);
     } else if (Subtarget.hasInterlockedAccess1())
       // Use LAA(G) if available.
-      NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, MemVT),
+      NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
                             Src2);
 
     if (NegSrc2.getNode())
@@ -2342,23 +3136,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
 
   // Get the address of the containing word.
   SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
-                                    DAG.getConstant(-4, PtrVT));
+                                    DAG.getConstant(-4, DL, PtrVT));
 
   // Get the number of bits that the word must be rotated left in order
   // to bring the field to the top bits of a GR32.
   SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
-                                 DAG.getConstant(3, PtrVT));
+                                 DAG.getConstant(3, DL, PtrVT));
   BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
 
   // Get the complementing shift amount, for rotating a field in the top
   // bits back to its proper position.
   SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
-                                    DAG.getConstant(0, WideVT), BitShift);
+                                    DAG.getConstant(0, DL, WideVT), BitShift);
 
   // Construct the ATOMIC_CMP_SWAPW node.
   SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
   SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
-                    NegBitShift, DAG.getConstant(BitSize, WideVT) };
+                    NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
   SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
                                              VTList, Ops, NarrowVT, MMO);
   return AtomicOp;
@@ -2387,19 +3181,1084 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
     // Just preserve the chain.
     return Op.getOperand(0);
 
+  SDLoc DL(Op);
   bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
   unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
   auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
   SDValue Ops[] = {
     Op.getOperand(0),
-    DAG.getConstant(Code, MVT::i32),
+    DAG.getConstant(Code, DL, MVT::i32),
     Op.getOperand(1)
   };
-  return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op),
+  return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
                                  Node->getVTList(), Ops,
                                  Node->getMemoryVT(), Node->getMemOperand());
 }
 
+// Return an i32 that contains the value of CC immediately after After,
+// whose final operand must be MVT::Glue.
+static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
+  SDLoc DL(After);
+  SDValue Glue = SDValue(After, After->getNumValues() - 1);
+  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+  return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+                     DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
+}
+
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  unsigned Opcode, CCValid;
+  if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
+    assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+    SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
+    SDValue CC = getCCResult(DAG, Glued.getNode());
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
+    return SDValue();
+  }
+
+  return SDValue();
+}
+
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  unsigned Opcode, CCValid;
+  if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
+    SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode);
+    SDValue CC = getCCResult(DAG, Glued.getNode());
+    if (Op->getNumValues() == 1)
+      return CC;
+    assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
+    return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
+		    Glued, CC);
+  }
+
+  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  switch (Id) {
+  case Intrinsic::s390_vpdi:
+    return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+  case Intrinsic::s390_vperm:
+    return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+  case Intrinsic::s390_vuphb:
+  case Intrinsic::s390_vuphh:
+  case Intrinsic::s390_vuphf:
+    return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1));
+
+  case Intrinsic::s390_vuplhb:
+  case Intrinsic::s390_vuplhh:
+  case Intrinsic::s390_vuplhf:
+    return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1));
+
+  case Intrinsic::s390_vuplb:
+  case Intrinsic::s390_vuplhw:
+  case Intrinsic::s390_vuplf:
+    return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1));
+
+  case Intrinsic::s390_vupllb:
+  case Intrinsic::s390_vupllh:
+  case Intrinsic::s390_vupllf:
+    return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1));
+
+  case Intrinsic::s390_vsumb:
+  case Intrinsic::s390_vsumh:
+  case Intrinsic::s390_vsumgh:
+  case Intrinsic::s390_vsumgf:
+  case Intrinsic::s390_vsumqf:
+  case Intrinsic::s390_vsumqg:
+    return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  }
+
+  return SDValue();
+}
+
+namespace {
+// Says that SystemZISD operation Opcode can be used to perform the equivalent
+// of a VPERM with permute vector Bytes.  If Opcode takes three operands,
+// Operand is the constant third operand, otherwise it is the number of
+// bytes in each element of the result.
+struct Permute {
+  unsigned Opcode;
+  unsigned Operand;
+  unsigned char Bytes[SystemZ::VectorBytes];
+};
+}
+
+static const Permute PermuteForms[] = {
+  // VMRHG
+  { SystemZISD::MERGE_HIGH, 8,
+    { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
+  // VMRHF
+  { SystemZISD::MERGE_HIGH, 4,
+    { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
+  // VMRHH
+  { SystemZISD::MERGE_HIGH, 2,
+    { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
+  // VMRHB
+  { SystemZISD::MERGE_HIGH, 1,
+    { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
+  // VMRLG
+  { SystemZISD::MERGE_LOW, 8,
+    { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
+  // VMRLF
+  { SystemZISD::MERGE_LOW, 4,
+    { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
+  // VMRLH
+  { SystemZISD::MERGE_LOW, 2,
+    { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
+  // VMRLB
+  { SystemZISD::MERGE_LOW, 1,
+    { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
+  // VPKG
+  { SystemZISD::PACK, 4,
+    { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
+  // VPKF
+  { SystemZISD::PACK, 2,
+    { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
+  // VPKH
+  { SystemZISD::PACK, 1,
+    { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
+  // VPDI V1, V2, 4  (low half of V1, high half of V2)
+  { SystemZISD::PERMUTE_DWORDS, 4,
+    { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
+  // VPDI V1, V2, 1  (high half of V1, low half of V2)
+  { SystemZISD::PERMUTE_DWORDS, 1,
+    { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
+};
+
+// Called after matching a vector shuffle against a particular pattern.
+// Both the original shuffle and the pattern have two vector operands.
+// OpNos[0] is the operand of the original shuffle that should be used for
+// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
+// OpNos[1] is the same for operand 1 of the pattern.  Resolve these -1s and
+// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
+// for operands 0 and 1 of the pattern.
+static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
+  if (OpNos[0] < 0) {
+    if (OpNos[1] < 0)
+      return false;
+    OpNo0 = OpNo1 = OpNos[1];
+  } else if (OpNos[1] < 0) {
+    OpNo0 = OpNo1 = OpNos[0];
+  } else {
+    OpNo0 = OpNos[0];
+    OpNo1 = OpNos[1];
+  }
+  return true;
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes.  Return true if the VPERM can be implemented using P.
+// When returning true set OpNo0 to the VPERM operand that should be
+// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
+//
+// For example, if swapping the VPERM operands allows P to match, OpNo0
+// will be 1 and OpNo1 will be 0.  If instead Bytes only refers to one
+// operand, but rewriting it to use two duplicated operands allows it to
+// match P, then OpNo0 and OpNo1 will be the same.
+static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
+                         unsigned &OpNo0, unsigned &OpNo1) {
+  int OpNos[] = { -1, -1 };
+  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
+    int Elt = Bytes[I];
+    if (Elt >= 0) {
+      // Make sure that the two permute vectors use the same suboperand
+      // byte number.  Only the operand numbers (the high bits) are
+      // allowed to differ.
+      if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
+        return false;
+      int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
+      int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
+      // Make sure that the operand mappings are consistent with previous
+      // elements.
+      if (OpNos[ModelOpNo] == 1 - RealOpNo)
+        return false;
+      OpNos[ModelOpNo] = RealOpNo;
+    }
+  }
+  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
+}
+
+// As above, but search for a matching permute.
+static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
+                                   unsigned &OpNo0, unsigned &OpNo1) {
+  for (auto &P : PermuteForms)
+    if (matchPermute(Bytes, P, OpNo0, OpNo1))
+      return &P;
+  return nullptr;
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes.  This permute is an operand of an outer permute.
+// See whether redistributing the -1 bytes gives a shuffle that can be
+// implemented using P.  If so, set Transform to a VPERM-like permute vector
+// that, when applied to the result of P, gives the original permute in Bytes.
+static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
+                               const Permute &P,
+                               SmallVectorImpl<int> &Transform) {
+  unsigned To = 0;
+  for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
+    int Elt = Bytes[From];
+    if (Elt < 0)
+      // Byte number From of the result is undefined.
+      Transform[From] = -1;
+    else {
+      while (P.Bytes[To] != Elt) {
+        To += 1;
+        if (To == SystemZ::VectorBytes)
+          return false;
+      }
+      Transform[From] = To;
+    }
+  }
+  return true;
+}
+
+// As above, but search for a matching permute.
+static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
+                                         SmallVectorImpl<int> &Transform) {
+  for (auto &P : PermuteForms)
+    if (matchDoublePermute(Bytes, P, Transform))
+      return &P;
+  return nullptr;
+}
+
+// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
+// as if it had type vNi8.
+static void getVPermMask(ShuffleVectorSDNode *VSN,
+                         SmallVectorImpl<int> &Bytes) {
+  EVT VT = VSN->getValueType(0);
+  unsigned NumElements = VT.getVectorNumElements();
+  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
+  Bytes.resize(NumElements * BytesPerElement, -1);
+  for (unsigned I = 0; I < NumElements; ++I) {
+    int Index = VSN->getMaskElt(I);
+    if (Index >= 0)
+      for (unsigned J = 0; J < BytesPerElement; ++J)
+        Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
+  }
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes.  See whether bytes [Start, Start + BytesPerElement) of
+// the result come from a contiguous sequence of bytes from one input.
+// Set Base to the selector for the first byte if so.
+static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
+                            unsigned BytesPerElement, int &Base) {
+  Base = -1;
+  for (unsigned I = 0; I < BytesPerElement; ++I) {
+    if (Bytes[Start + I] >= 0) {
+      unsigned Elem = Bytes[Start + I];
+      if (Base < 0) {
+        Base = Elem - I;
+        // Make sure the bytes would come from one input operand.
+        if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
+          return false;
+      } else if (unsigned(Base) != Elem - I)
+        return false;
+    }
+  }
+  return true;
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes.  Return true if it can be performed using VSLDI.
+// When returning true, set StartIndex to the shift amount and OpNo0
+// and OpNo1 to the VPERM operands that should be used as the first
+// and second shift operand respectively.
+static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
+                               unsigned &StartIndex, unsigned &OpNo0,
+                               unsigned &OpNo1) {
+  int OpNos[] = { -1, -1 };
+  int Shift = -1;
+  for (unsigned I = 0; I < 16; ++I) {
+    int Index = Bytes[I];
+    if (Index >= 0) {
+      int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
+      int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
+      int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
+      if (Shift < 0)
+        Shift = ExpectedShift;
+      else if (Shift != ExpectedShift)
+        return false;
+      // Make sure that the operand mappings are consistent with previous
+      // elements.
+      if (OpNos[ModelOpNo] == 1 - RealOpNo)
+        return false;
+      OpNos[ModelOpNo] = RealOpNo;
+    }
+  }
+  StartIndex = Shift;
+  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
+}
+
+// Create a node that performs P on operands Op0 and Op1, casting the
+// operands to the appropriate type.  The type of the result is determined by P.
+static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL,
+                              const Permute &P, SDValue Op0, SDValue Op1) {
+  // VPDI (PERMUTE_DWORDS) always operates on v2i64s.  The input
+  // elements of a PACK are twice as wide as the outputs.
+  unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
+                      P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
+                      P.Operand);
+  // Cast both operands to the appropriate type.
+  MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
+                              SystemZ::VectorBytes / InBytes);
+  Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
+  Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
+  SDValue Op;
+  if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
+    SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
+    Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
+  } else if (P.Opcode == SystemZISD::PACK) {
+    MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
+                                 SystemZ::VectorBytes / P.Operand);
+    Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
+  } else {
+    Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
+  }
+  return Op;
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes.  Implement it on operands Ops[0] and Ops[1] using
+// VSLDI or VPERM.
+static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops,
+                                     const SmallVectorImpl<int> &Bytes) {
+  for (unsigned I = 0; I < 2; ++I)
+    Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
+
+  // First see whether VSLDI can be used.
+  unsigned StartIndex, OpNo0, OpNo1;
+  if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
+    return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
+                       Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
+
+  // Fall back on VPERM.  Construct an SDNode for the permute vector.
+  SDValue IndexNodes[SystemZ::VectorBytes];
+  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
+    if (Bytes[I] >= 0)
+      IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
+    else
+      IndexNodes[I] = DAG.getUNDEF(MVT::i32);
+  SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes);
+  return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
+}
+
+namespace {
+// Describes a general N-operand vector shuffle.
+struct GeneralShuffle {
+  GeneralShuffle(EVT vt) : VT(vt) {}
+  void addUndef();
+  void add(SDValue, unsigned);
+  SDValue getNode(SelectionDAG &, SDLoc);
+
+  // The operands of the shuffle.
+  SmallVector<SDValue, SystemZ::VectorBytes> Ops;
+
+  // Index I is -1 if byte I of the result is undefined.  Otherwise the
+  // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
+  // Bytes[I] / SystemZ::VectorBytes.
+  SmallVector<int, SystemZ::VectorBytes> Bytes;
+
+  // The type of the shuffle result.
+  EVT VT;
+};
+}
+
+// Add an extra undefined element to the shuffle.
+void GeneralShuffle::addUndef() {
+  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
+  for (unsigned I = 0; I < BytesPerElement; ++I)
+    Bytes.push_back(-1);
+}
+
+// Add an extra element to the shuffle, taking it from element Elem of Op.
+// A null Op indicates a vector input whose value will be calculated later;
+// there is at most one such input per shuffle and it always has the same
+// type as the result.
+void GeneralShuffle::add(SDValue Op, unsigned Elem) {
+  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
+
+  // The source vector can have wider elements than the result,
+  // either through an explicit TRUNCATE or because of type legalization.
+  // We want the least significant part.
+  EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
+  unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
+  assert(FromBytesPerElement >= BytesPerElement &&
+         "Invalid EXTRACT_VECTOR_ELT");
+  unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
+                   (FromBytesPerElement - BytesPerElement));
+
+  // Look through things like shuffles and bitcasts.
+  while (Op.getNode()) {
+    if (Op.getOpcode() == ISD::BITCAST)
+      Op = Op.getOperand(0);
+    else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
+      // See whether the bytes we need come from a contiguous part of one
+      // operand.
+      SmallVector<int, SystemZ::VectorBytes> OpBytes;
+      getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
+      int NewByte;
+      if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
+        break;
+      if (NewByte < 0) {
+        addUndef();
+        return;
+      }
+      Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
+      Byte = unsigned(NewByte) % SystemZ::VectorBytes;
+    } else if (Op.getOpcode() == ISD::UNDEF) {
+      addUndef();
+      return;
+    } else
+      break;
+  }
+
+  // Make sure that the source of the extraction is in Ops.
+  unsigned OpNo = 0;
+  for (; OpNo < Ops.size(); ++OpNo)
+    if (Ops[OpNo] == Op)
+      break;
+  if (OpNo == Ops.size())
+    Ops.push_back(Op);
+
+  // Add the element to Bytes.
+  unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
+  for (unsigned I = 0; I < BytesPerElement; ++I)
+    Bytes.push_back(Base + I);
+}
+
+// Return SDNodes for the completed shuffle.
+SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) {
+  assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
+
+  if (Ops.size() == 0)
+    return DAG.getUNDEF(VT);
+
+  // Make sure that there are at least two shuffle operands.
+  if (Ops.size() == 1)
+    Ops.push_back(DAG.getUNDEF(MVT::v16i8));
+
+  // Create a tree of shuffles, deferring root node until after the loop.
+  // Try to redistribute the undefined elements of non-root nodes so that
+  // the non-root shuffles match something like a pack or merge, then adjust
+  // the parent node's permute vector to compensate for the new order.
+  // Among other things, this copes with vectors like <2 x i16> that were
+  // padded with undefined elements during type legalization.
+  //
+  // In the best case this redistribution will lead to the whole tree
+  // using packs and merges.  It should rarely be a loss in other cases.
+  unsigned Stride = 1;
+  for (; Stride * 2 < Ops.size(); Stride *= 2) {
+    for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
+      SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
+
+      // Create a mask for just these two operands.
+      SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
+      for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
+        unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
+        unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
+        if (OpNo == I)
+          NewBytes[J] = Byte;
+        else if (OpNo == I + Stride)
+          NewBytes[J] = SystemZ::VectorBytes + Byte;
+        else
+          NewBytes[J] = -1;
+      }
+      // See if it would be better to reorganize NewMask to avoid using VPERM.
+      SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
+      if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
+        Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
+        // Applying NewBytesMap to Ops[I] gets back to NewBytes.
+        for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
+          if (NewBytes[J] >= 0) {
+            assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
+                   "Invalid double permute");
+            Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
+          } else
+            assert(NewBytesMap[J] < 0 && "Invalid double permute");
+        }
+      } else {
+        // Just use NewBytes on the operands.
+        Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
+        for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
+          if (NewBytes[J] >= 0)
+            Bytes[J] = I * SystemZ::VectorBytes + J;
+      }
+    }
+  }
+
+  // Now we just have 2 inputs.  Put the second operand in Ops[1].
+  if (Stride > 1) {
+    Ops[1] = Ops[Stride];
+    for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
+      if (Bytes[I] >= int(SystemZ::VectorBytes))
+        Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
+  }
+
+  // Look for an instruction that can do the permute without resorting
+  // to VPERM.
+  unsigned OpNo0, OpNo1;
+  SDValue Op;
+  if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
+    Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
+  else
+    Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
+  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+}
+
+// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
+static bool isScalarToVector(SDValue Op) {
+  for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
+    if (Op.getOperand(I).getOpcode() != ISD::UNDEF)
+      return false;
+  return true;
+}
+
+// Return a vector of type VT that contains Value in the first element.
+// The other elements don't matter.
+static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
+                                   SDValue Value) {
+  // If we have a constant, replicate it to all elements and let the
+  // BUILD_VECTOR lowering take care of it.
+  if (Value.getOpcode() == ISD::Constant ||
+      Value.getOpcode() == ISD::ConstantFP) {
+    SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
+    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+  }
+  if (Value.getOpcode() == ISD::UNDEF)
+    return DAG.getUNDEF(VT);
+  return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
+}
+
+// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
+// element 1.  Used for cases in which replication is cheap.
+static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT,
+                                 SDValue Op0, SDValue Op1) {
+  if (Op0.getOpcode() == ISD::UNDEF) {
+    if (Op1.getOpcode() == ISD::UNDEF)
+      return DAG.getUNDEF(VT);
+    return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
+  }
+  if (Op1.getOpcode() == ISD::UNDEF)
+    return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
+  return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
+                     buildScalarToVector(DAG, DL, VT, Op0),
+                     buildScalarToVector(DAG, DL, VT, Op1));
+}
+
+// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
+// vector for them.
+static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0,
+                          SDValue Op1) {
+  if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF)
+    return DAG.getUNDEF(MVT::v2i64);
+  // If one of the two inputs is undefined then replicate the other one,
+  // in order to avoid using another register unnecessarily.
+  if (Op0.getOpcode() == ISD::UNDEF)
+    Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
+  else if (Op1.getOpcode() == ISD::UNDEF)
+    Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
+  else {
+    Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
+    Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
+  }
+  return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
+}
+
+// Try to represent constant BUILD_VECTOR node BVN using a
+// SystemZISD::BYTE_MASK-style mask.  Store the mask value in Mask
+// on success.
+static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
+  EVT ElemVT = BVN->getValueType(0).getVectorElementType();
+  unsigned BytesPerElement = ElemVT.getStoreSize();
+  for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
+    SDValue Op = BVN->getOperand(I);
+    if (Op.getOpcode() != ISD::UNDEF) {
+      uint64_t Value;
+      if (Op.getOpcode() == ISD::Constant)
+        Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue();
+      else if (Op.getOpcode() == ISD::ConstantFP)
+        Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
+                 .getZExtValue());
+      else
+        return false;
+      for (unsigned J = 0; J < BytesPerElement; ++J) {
+        uint64_t Byte = (Value >> (J * 8)) & 0xff;
+        if (Byte == 0xff)
+          Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
+        else if (Byte != 0)
+          return false;
+      }
+    }
+  }
+  return true;
+}
+
+// Try to load a vector constant in which BitsPerElement-bit value Value
+// is replicated to fill the vector.  VT is the type of the resulting
+// constant, which may have elements of a different size from BitsPerElement.
+// Return the SDValue of the constant on success, otherwise return
+// an empty value.
+static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
+                                       const SystemZInstrInfo *TII,
+                                       SDLoc DL, EVT VT, uint64_t Value,
+                                       unsigned BitsPerElement) {
+  // Signed 16-bit values can be replicated using VREPI.
+  int64_t SignedValue = SignExtend64(Value, BitsPerElement);
+  if (isInt<16>(SignedValue)) {
+    MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
+                                 SystemZ::VectorBits / BitsPerElement);
+    SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT,
+                             DAG.getConstant(SignedValue, DL, MVT::i32));
+    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+  }
+  // See whether rotating the constant left some N places gives a value that
+  // is one less than a power of 2 (i.e. all zeros followed by all ones).
+  // If so we can use VGM.
+  unsigned Start, End;
+  if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
+    // isRxSBGMask returns the bit numbers for a full 64-bit value,
+    // with 0 denoting 1 << 63 and 63 denoting 1.  Convert them to
+    // bit numbers for an BitsPerElement value, so that 0 denotes
+    // 1 << (BitsPerElement-1).
+    Start -= 64 - BitsPerElement;
+    End -= 64 - BitsPerElement;
+    MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
+                                 SystemZ::VectorBits / BitsPerElement);
+    SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT,
+                             DAG.getConstant(Start, DL, MVT::i32),
+                             DAG.getConstant(End, DL, MVT::i32));
+    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+  }
+  return SDValue();
+}
+
+// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
+// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
+// the non-EXTRACT_VECTOR_ELT elements.  See if the given BUILD_VECTOR
+// would benefit from this representation and return it if so.
+static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
+                                     BuildVectorSDNode *BVN) {
+  EVT VT = BVN->getValueType(0);
+  unsigned NumElements = VT.getVectorNumElements();
+
+  // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
+  // on byte vectors.  If there are non-EXTRACT_VECTOR_ELT elements that still
+  // need a BUILD_VECTOR, add an additional placeholder operand for that
+  // BUILD_VECTOR and store its operands in ResidueOps.
+  GeneralShuffle GS(VT);
+  SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
+  bool FoundOne = false;
+  for (unsigned I = 0; I < NumElements; ++I) {
+    SDValue Op = BVN->getOperand(I);
+    if (Op.getOpcode() == ISD::TRUNCATE)
+      Op = Op.getOperand(0);
+    if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+        Op.getOperand(1).getOpcode() == ISD::Constant) {
+      unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+      GS.add(Op.getOperand(0), Elem);
+      FoundOne = true;
+    } else if (Op.getOpcode() == ISD::UNDEF) {
+      GS.addUndef();
+    } else {
+      GS.add(SDValue(), ResidueOps.size());
+      ResidueOps.push_back(Op);
+    }
+  }
+
+  // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
+  if (!FoundOne)
+    return SDValue();
+
+  // Create the BUILD_VECTOR for the remaining elements, if any.
+  if (!ResidueOps.empty()) {
+    while (ResidueOps.size() < NumElements)
+      ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType()));
+    for (auto &Op : GS.Ops) {
+      if (!Op.getNode()) {
+        Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps);
+        break;
+      }
+    }
+  }
+  return GS.getNode(DAG, SDLoc(BVN));
+}
+
+// Combine GPR scalar values Elems into a vector of type VT.
+static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
+                           SmallVectorImpl<SDValue> &Elems) {
+  // See whether there is a single replicated value.
+  SDValue Single;
+  unsigned int NumElements = Elems.size();
+  unsigned int Count = 0;
+  for (auto Elem : Elems) {
+    if (Elem.getOpcode() != ISD::UNDEF) {
+      if (!Single.getNode())
+        Single = Elem;
+      else if (Elem != Single) {
+        Single = SDValue();
+        break;
+      }
+      Count += 1;
+    }
+  }
+  // There are three cases here:
+  //
+  // - if the only defined element is a loaded one, the best sequence
+  //   is a replicating load.
+  //
+  // - otherwise, if the only defined element is an i64 value, we will
+  //   end up with the same VLVGP sequence regardless of whether we short-cut
+  //   for replication or fall through to the later code.
+  //
+  // - otherwise, if the only defined element is an i32 or smaller value,
+  //   we would need 2 instructions to replicate it: VLVGP followed by VREPx.
+  //   This is only a win if the single defined element is used more than once.
+  //   In other cases we're better off using a single VLVGx.
+  if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
+    return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
+
+  // The best way of building a v2i64 from two i64s is to use VLVGP.
+  if (VT == MVT::v2i64)
+    return joinDwords(DAG, DL, Elems[0], Elems[1]);
+
+  // Use a 64-bit merge high to combine two doubles.
+  if (VT == MVT::v2f64)
+    return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
+
+  // Build v4f32 values directly from the FPRs:
+  //
+  //   <Axxx> <Bxxx> <Cxxxx> <Dxxx>
+  //         V              V         VMRHF
+  //      <ABxx>         <CDxx>
+  //                V                 VMRHG
+  //              <ABCD>
+  if (VT == MVT::v4f32) {
+    SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
+    SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
+    // Avoid unnecessary undefs by reusing the other operand.
+    if (Op01.getOpcode() == ISD::UNDEF)
+      Op01 = Op23;
+    else if (Op23.getOpcode() == ISD::UNDEF)
+      Op23 = Op01;
+    // Merging identical replications is a no-op.
+    if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
+      return Op01;
+    Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
+    Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
+    SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
+                             DL, MVT::v2i64, Op01, Op23);
+    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+  }
+
+  // Collect the constant terms.
+  SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
+  SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
+
+  unsigned NumConstants = 0;
+  for (unsigned I = 0; I < NumElements; ++I) {
+    SDValue Elem = Elems[I];
+    if (Elem.getOpcode() == ISD::Constant ||
+        Elem.getOpcode() == ISD::ConstantFP) {
+      NumConstants += 1;
+      Constants[I] = Elem;
+      Done[I] = true;
+    }
+  }
+  // If there was at least one constant, fill in the other elements of
+  // Constants with undefs to get a full vector constant and use that
+  // as the starting point.
+  SDValue Result;
+  if (NumConstants > 0) {
+    for (unsigned I = 0; I < NumElements; ++I)
+      if (!Constants[I].getNode())
+        Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
+    Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants);
+  } else {
+    // Otherwise try to use VLVGP to start the sequence in order to
+    // avoid a false dependency on any previous contents of the vector
+    // register.  This only makes sense if one of the associated elements
+    // is defined.
+    unsigned I1 = NumElements / 2 - 1;
+    unsigned I2 = NumElements - 1;
+    bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF);
+    bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF);
+    if (Def1 || Def2) {
+      SDValue Elem1 = Elems[Def1 ? I1 : I2];
+      SDValue Elem2 = Elems[Def2 ? I2 : I1];
+      Result = DAG.getNode(ISD::BITCAST, DL, VT,
+                           joinDwords(DAG, DL, Elem1, Elem2));
+      Done[I1] = true;
+      Done[I2] = true;
+    } else
+      Result = DAG.getUNDEF(VT);
+  }
+
+  // Use VLVGx to insert the other elements.
+  for (unsigned I = 0; I < NumElements; ++I)
+    if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF)
+      Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
+                           DAG.getConstant(I, DL, MVT::i32));
+  return Result;
+}
+
+SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  const SystemZInstrInfo *TII =
+    static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+  auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  if (BVN->isConstant()) {
+    // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally-
+    // preferred way of creating all-zero and all-one vectors so give it
+    // priority over other methods below.
+    uint64_t Mask = 0;
+    if (tryBuildVectorByteMask(BVN, Mask)) {
+      SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
+                               DAG.getConstant(Mask, DL, MVT::i32));
+      return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+    }
+
+    // Try using some form of replication.
+    APInt SplatBits, SplatUndef;
+    unsigned SplatBitSize;
+    bool HasAnyUndefs;
+    if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+                             8, true) &&
+        SplatBitSize <= 64) {
+      // First try assuming that any undefined bits above the highest set bit
+      // and below the lowest set bit are 1s.  This increases the likelihood of
+      // being able to use a sign-extended element value in VECTOR REPLICATE
+      // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
+      uint64_t SplatBitsZ = SplatBits.getZExtValue();
+      uint64_t SplatUndefZ = SplatUndef.getZExtValue();
+      uint64_t Lower = (SplatUndefZ
+                        & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
+      uint64_t Upper = (SplatUndefZ
+                        & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
+      uint64_t Value = SplatBitsZ | Upper | Lower;
+      SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
+                                           SplatBitSize);
+      if (Op.getNode())
+        return Op;
+
+      // Now try assuming that any undefined bits between the first and
+      // last defined set bits are set.  This increases the chances of
+      // using a non-wraparound mask.
+      uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
+      Value = SplatBitsZ | Middle;
+      Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
+      if (Op.getNode())
+        return Op;
+    }
+
+    // Fall back to loading it from memory.
+    return SDValue();
+  }
+
+  // See if we should use shuffles to construct the vector from other vectors.
+  SDValue Res = tryBuildVectorShuffle(DAG, BVN);
+  if (Res.getNode())
+    return Res;
+
+  // Detect SCALAR_TO_VECTOR conversions.
+  if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
+    return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
+
+  // Otherwise use buildVector to build the vector up from GPRs.
+  unsigned NumElements = Op.getNumOperands();
+  SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
+  for (unsigned I = 0; I < NumElements; ++I)
+    Ops[I] = Op.getOperand(I);
+  return buildVector(DAG, DL, VT, Ops);
+}
+
+SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  unsigned NumElements = VT.getVectorNumElements();
+
+  if (VSN->isSplat()) {
+    SDValue Op0 = Op.getOperand(0);
+    unsigned Index = VSN->getSplatIndex();
+    assert(Index < VT.getVectorNumElements() &&
+           "Splat index should be defined and in first operand");
+    // See whether the value we're splatting is directly available as a scalar.
+    if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
+        Op0.getOpcode() == ISD::BUILD_VECTOR)
+      return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
+    // Otherwise keep it as a vector-to-vector operation.
+    return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
+                       DAG.getConstant(Index, DL, MVT::i32));
+  }
+
+  GeneralShuffle GS(VT);
+  for (unsigned I = 0; I < NumElements; ++I) {
+    int Elt = VSN->getMaskElt(I);
+    if (Elt < 0)
+      GS.addUndef();
+    else
+      GS.add(Op.getOperand(unsigned(Elt) / NumElements),
+             unsigned(Elt) % NumElements);
+  }
+  return GS.getNode(DAG, SDLoc(VSN));
+}
+
+SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
+                                                     SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  // Just insert the scalar into element 0 of an undefined vector.
+  return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+                     Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
+                     Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
+}
+
+SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  // Handle insertions of floating-point values.
+  SDLoc DL(Op);
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDValue Op2 = Op.getOperand(2);
+  EVT VT = Op.getValueType();
+
+  // Insertions into constant indices of a v2f64 can be done using VPDI.
+  // However, if the inserted value is a bitcast or a constant then it's
+  // better to use GPRs, as below.
+  if (VT == MVT::v2f64 &&
+      Op1.getOpcode() != ISD::BITCAST &&
+      Op1.getOpcode() != ISD::ConstantFP &&
+      Op2.getOpcode() == ISD::Constant) {
+    uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
+    unsigned Mask = VT.getVectorNumElements() - 1;
+    if (Index <= Mask)
+      return Op;
+  }
+
+  // Otherwise bitcast to the equivalent integer form and insert via a GPR.
+  MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
+  MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
+  SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
+                            DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
+                            DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
+  return DAG.getNode(ISD::BITCAST, DL, VT, Res);
+}
+
+SDValue
+SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  // Handle extractions of floating-point values.
+  SDLoc DL(Op);
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+  EVT VecVT = Op0.getValueType();
+
+  // Extractions of constant indices can be done directly.
+  if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
+    uint64_t Index = CIndexN->getZExtValue();
+    unsigned Mask = VecVT.getVectorNumElements() - 1;
+    if (Index <= Mask)
+      return Op;
+  }
+
+  // Otherwise bitcast to the equivalent integer form and extract via a GPR.
+  MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
+  MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
+  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
+                            DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
+  return DAG.getNode(ISD::BITCAST, DL, VT, Res);
+}
+
+SDValue
+SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
+					      unsigned UnpackHigh) const {
+  SDValue PackedOp = Op.getOperand(0);
+  EVT OutVT = Op.getValueType();
+  EVT InVT = PackedOp.getValueType();
+  unsigned ToBits = OutVT.getVectorElementType().getSizeInBits();
+  unsigned FromBits = InVT.getVectorElementType().getSizeInBits();
+  do {
+    FromBits *= 2;
+    EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
+                                 SystemZ::VectorBits / FromBits);
+    PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
+  } while (FromBits != ToBits);
+  return PackedOp;
+}
+
+SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
+                                          unsigned ByScalar) const {
+  // Look for cases where a vector shift can use the *_BY_SCALAR form.
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
+
+  // See whether the shift vector is a splat represented as BUILD_VECTOR.
+  if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
+    APInt SplatBits, SplatUndef;
+    unsigned SplatBitSize;
+    bool HasAnyUndefs;
+    // Check for constant splats.  Use ElemBitSize as the minimum element
+    // width and reject splats that need wider elements.
+    if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+                             ElemBitSize, true) &&
+        SplatBitSize == ElemBitSize) {
+      SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
+                                      DL, MVT::i32);
+      return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
+    }
+    // Check for variable splats.
+    BitVector UndefElements;
+    SDValue Splat = BVN->getSplatValue(&UndefElements);
+    if (Splat) {
+      // Since i32 is the smallest legal type, we either need a no-op
+      // or a truncation.
+      SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
+      return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
+    }
+  }
+
+  // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
+  // and the shift amount is directly available in a GPR.
+  if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
+    if (VSN->isSplat()) {
+      SDValue VSNOp0 = VSN->getOperand(0);
+      unsigned Index = VSN->getSplatIndex();
+      assert(Index < VT.getVectorNumElements() &&
+             "Splat index should be defined and in first operand");
+      if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
+          VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
+        // Since i32 is the smallest legal type, we either need a no-op
+        // or a truncation.
+        SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
+                                    VSNOp0.getOperand(Index));
+        return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
+      }
+    }
+  }
+
+  // Otherwise just treat the current form as legal.
+  return Op;
+}
+
 SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
                                               SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
@@ -2437,6 +4296,14 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
     return lowerUDIVREM(Op, DAG);
   case ISD::OR:
     return lowerOR(Op, DAG);
+  case ISD::CTPOP:
+    return lowerCTPOP(Op, DAG);
+  case ISD::CTLZ_ZERO_UNDEF:
+    return DAG.getNode(ISD::CTLZ, SDLoc(Op),
+                       Op.getValueType(), Op.getOperand(0));
+  case ISD::CTTZ_ZERO_UNDEF:
+    return DAG.getNode(ISD::CTTZ, SDLoc(Op),
+                       Op.getValueType(), Op.getOperand(0));
   case ISD::ATOMIC_SWAP:
     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
   case ISD::ATOMIC_STORE:
@@ -2471,6 +4338,30 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
     return lowerSTACKRESTORE(Op, DAG);
   case ISD::PREFETCH:
     return lowerPREFETCH(Op, DAG);
+  case ISD::INTRINSIC_W_CHAIN:
+    return lowerINTRINSIC_W_CHAIN(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN:
+    return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::BUILD_VECTOR:
+    return lowerBUILD_VECTOR(Op, DAG);
+  case ISD::VECTOR_SHUFFLE:
+    return lowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::SCALAR_TO_VECTOR:
+    return lowerSCALAR_TO_VECTOR(Op, DAG);
+  case ISD::INSERT_VECTOR_ELT:
+    return lowerINSERT_VECTOR_ELT(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT:
+    return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+    return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
+  case ISD::ZERO_EXTEND_VECTOR_INREG:
+    return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
+  case ISD::SHL:
+    return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
+  case ISD::SRL:
+    return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
+  case ISD::SRA:
+    return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
   default:
     llvm_unreachable("Unexpected node to lower");
   }
@@ -2478,10 +4369,13 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
 
 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
-  switch (Opcode) {
+  switch ((SystemZISD::NodeType)Opcode) {
+    case SystemZISD::FIRST_NUMBER: break;
     OPCODE(RET_FLAG);
     OPCODE(CALL);
     OPCODE(SIBCALL);
+    OPCODE(TLS_GDCALL);
+    OPCODE(TLS_LDCALL);
     OPCODE(PCREL_WRAPPER);
     OPCODE(PCREL_OFFSET);
     OPCODE(IABS);
@@ -2492,7 +4386,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(SELECT_CCMASK);
     OPCODE(ADJDYNALLOC);
     OPCODE(EXTRACT_ACCESS);
+    OPCODE(POPCNT);
     OPCODE(UMUL_LOHI64);
+    OPCODE(SDIVREM32);
     OPCODE(SDIVREM64);
     OPCODE(UDIVREM32);
     OPCODE(UDIVREM64);
@@ -2506,11 +4402,60 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(XC_LOOP);
     OPCODE(CLC);
     OPCODE(CLC_LOOP);
-    OPCODE(STRCMP);
     OPCODE(STPCPY);
+    OPCODE(STRCMP);
     OPCODE(SEARCH_STRING);
     OPCODE(IPM);
     OPCODE(SERIALIZE);
+    OPCODE(TBEGIN);
+    OPCODE(TBEGIN_NOFLOAT);
+    OPCODE(TEND);
+    OPCODE(BYTE_MASK);
+    OPCODE(ROTATE_MASK);
+    OPCODE(REPLICATE);
+    OPCODE(JOIN_DWORDS);
+    OPCODE(SPLAT);
+    OPCODE(MERGE_HIGH);
+    OPCODE(MERGE_LOW);
+    OPCODE(SHL_DOUBLE);
+    OPCODE(PERMUTE_DWORDS);
+    OPCODE(PERMUTE);
+    OPCODE(PACK);
+    OPCODE(PACKS_CC);
+    OPCODE(PACKLS_CC);
+    OPCODE(UNPACK_HIGH);
+    OPCODE(UNPACKL_HIGH);
+    OPCODE(UNPACK_LOW);
+    OPCODE(UNPACKL_LOW);
+    OPCODE(VSHL_BY_SCALAR);
+    OPCODE(VSRL_BY_SCALAR);
+    OPCODE(VSRA_BY_SCALAR);
+    OPCODE(VSUM);
+    OPCODE(VICMPE);
+    OPCODE(VICMPH);
+    OPCODE(VICMPHL);
+    OPCODE(VICMPES);
+    OPCODE(VICMPHS);
+    OPCODE(VICMPHLS);
+    OPCODE(VFCMPE);
+    OPCODE(VFCMPH);
+    OPCODE(VFCMPHE);
+    OPCODE(VFCMPES);
+    OPCODE(VFCMPHS);
+    OPCODE(VFCMPHES);
+    OPCODE(VFTCI);
+    OPCODE(VEXTEND);
+    OPCODE(VROUND);
+    OPCODE(VTM);
+    OPCODE(VFAE_CC);
+    OPCODE(VFAEZ_CC);
+    OPCODE(VFEE_CC);
+    OPCODE(VFEEZ_CC);
+    OPCODE(VFENE_CC);
+    OPCODE(VFENEZ_CC);
+    OPCODE(VISTR_CC);
+    OPCODE(VSTRC_CC);
+    OPCODE(VSTRCZ_CC);
     OPCODE(ATOMIC_SWAPW);
     OPCODE(ATOMIC_LOADW_ADD);
     OPCODE(ATOMIC_LOADW_SUB);
@@ -2529,6 +4474,157 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
 #undef OPCODE
 }
 
+// Return true if VT is a vector whose elements are a whole number of bytes
+// in width.
+static bool canTreatAsByteVector(EVT VT) {
+  return VT.isVector() && VT.getVectorElementType().getSizeInBits() % 8 == 0;
+}
+
+// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
+// producing a result of type ResVT.  Op is a possibly bitcast version
+// of the input vector and Index is the index (based on type VecVT) that
+// should be extracted.  Return the new extraction if a simplification
+// was possible or if Force is true.
+SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
+                                              SDValue Op, unsigned Index,
+                                              DAGCombinerInfo &DCI,
+                                              bool Force) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  // The number of bytes being extracted.
+  unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
+
+  for (;;) {
+    unsigned Opcode = Op.getOpcode();
+    if (Opcode == ISD::BITCAST)
+      // Look through bitcasts.
+      Op = Op.getOperand(0);
+    else if (Opcode == ISD::VECTOR_SHUFFLE &&
+             canTreatAsByteVector(Op.getValueType())) {
+      // Get a VPERM-like permute mask and see whether the bytes covered
+      // by the extracted element are a contiguous sequence from one
+      // source operand.
+      SmallVector<int, SystemZ::VectorBytes> Bytes;
+      getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
+      int First;
+      if (!getShuffleInput(Bytes, Index * BytesPerElement,
+                           BytesPerElement, First))
+        break;
+      if (First < 0)
+        return DAG.getUNDEF(ResVT);
+      // Make sure the contiguous sequence starts at a multiple of the
+      // original element size.
+      unsigned Byte = unsigned(First) % Bytes.size();
+      if (Byte % BytesPerElement != 0)
+        break;
+      // We can get the extracted value directly from an input.
+      Index = Byte / BytesPerElement;
+      Op = Op.getOperand(unsigned(First) / Bytes.size());
+      Force = true;
+    } else if (Opcode == ISD::BUILD_VECTOR &&
+               canTreatAsByteVector(Op.getValueType())) {
+      // We can only optimize this case if the BUILD_VECTOR elements are
+      // at least as wide as the extracted value.
+      EVT OpVT = Op.getValueType();
+      unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
+      if (OpBytesPerElement < BytesPerElement)
+        break;
+      // Make sure that the least-significant bit of the extracted value
+      // is the least significant bit of an input.
+      unsigned End = (Index + 1) * BytesPerElement;
+      if (End % OpBytesPerElement != 0)
+        break;
+      // We're extracting the low part of one operand of the BUILD_VECTOR.
+      Op = Op.getOperand(End / OpBytesPerElement - 1);
+      if (!Op.getValueType().isInteger()) {
+        EVT VT = MVT::getIntegerVT(Op.getValueType().getSizeInBits());
+        Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
+        DCI.AddToWorklist(Op.getNode());
+      }
+      EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
+      Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+      if (VT != ResVT) {
+        DCI.AddToWorklist(Op.getNode());
+        Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
+      }
+      return Op;
+    } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
+		Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+		Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+	       canTreatAsByteVector(Op.getValueType()) &&
+               canTreatAsByteVector(Op.getOperand(0).getValueType())) {
+      // Make sure that only the unextended bits are significant.
+      EVT ExtVT = Op.getValueType();
+      EVT OpVT = Op.getOperand(0).getValueType();
+      unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
+      unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
+      unsigned Byte = Index * BytesPerElement;
+      unsigned SubByte = Byte % ExtBytesPerElement;
+      unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
+      if (SubByte < MinSubByte ||
+	  SubByte + BytesPerElement > ExtBytesPerElement)
+	break;
+      // Get the byte offset of the unextended element
+      Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
+      // ...then add the byte offset relative to that element.
+      Byte += SubByte - MinSubByte;
+      if (Byte % BytesPerElement != 0)
+	break;
+      Op = Op.getOperand(0);
+      Index = Byte / BytesPerElement;
+      Force = true;
+    } else
+      break;
+  }
+  if (Force) {
+    if (Op.getValueType() != VecVT) {
+      Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
+      DCI.AddToWorklist(Op.getNode());
+    }
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
+                       DAG.getConstant(Index, DL, MVT::i32));
+  }
+  return SDValue();
+}
+
+// Optimize vector operations in scalar value Op on the basis that Op
+// is truncated to TruncVT.
+SDValue
+SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
+                                              DAGCombinerInfo &DCI) const {
+  // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
+  // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
+  // of type TruncVT.
+  if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+      TruncVT.getSizeInBits() % 8 == 0) {
+    SDValue Vec = Op.getOperand(0);
+    EVT VecVT = Vec.getValueType();
+    if (canTreatAsByteVector(VecVT)) {
+      if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+        unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
+        unsigned TruncBytes = TruncVT.getStoreSize();
+        if (BytesPerElement % TruncBytes == 0) {
+          // Calculate the value of Y' in the above description.  We are
+          // splitting the original elements into Scale equal-sized pieces
+          // and for truncation purposes want the last (least-significant)
+          // of these pieces for IndexN.  This is easiest to do by calculating
+          // the start index of the following element and then subtracting 1.
+          unsigned Scale = BytesPerElement / TruncBytes;
+          unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
+
+          // Defer the creation of the bitcast from X to combineExtract,
+          // which might be able to optimize the extraction.
+          VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
+                                   VecVT.getStoreSize() / TruncBytes);
+          EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
+          return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
+        }
+      }
+    }
+  }
+  return SDValue();
+}
+
 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -2552,9 +4648,118 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
           SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
                                     Inner.getOperand(0));
           SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
-                                    DAG.getConstant(NewShlAmt, ShiftVT));
+                                    DAG.getConstant(NewShlAmt, SDLoc(Inner),
+                                                    ShiftVT));
           return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
-                             DAG.getConstant(NewSraAmt, ShiftVT));
+                             DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
+        }
+      }
+    }
+  }
+  if (Opcode == SystemZISD::MERGE_HIGH ||
+      Opcode == SystemZISD::MERGE_LOW) {
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 = N->getOperand(1);
+    if (Op0.getOpcode() == ISD::BITCAST)
+      Op0 = Op0.getOperand(0);
+    if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
+        cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
+      // (z_merge_* 0, 0) -> 0.  This is mostly useful for using VLLEZF
+      // for v4f32.
+      if (Op1 == N->getOperand(0))
+        return Op1;
+      // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
+      EVT VT = Op1.getValueType();
+      unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
+      if (ElemBytes <= 4) {
+        Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
+                  SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
+        EVT InVT = VT.changeVectorElementTypeToInteger();
+        EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
+                                     SystemZ::VectorBytes / ElemBytes / 2);
+        if (VT != InVT) {
+          Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
+          DCI.AddToWorklist(Op1.getNode());
+        }
+        SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
+        DCI.AddToWorklist(Op.getNode());
+        return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+      }
+    }
+  }
+  // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
+  // for the extraction to be done on a vMiN value, so that we can use VSTE.
+  // If X has wider elements then convert it to:
+  // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
+  if (Opcode == ISD::STORE) {
+    auto *SN = cast<StoreSDNode>(N);
+    EVT MemVT = SN->getMemoryVT();
+    if (MemVT.isInteger()) {
+      SDValue Value = combineTruncateExtract(SDLoc(N), MemVT,
+                                             SN->getValue(), DCI);
+      if (Value.getNode()) {
+        DCI.AddToWorklist(Value.getNode());
+
+        // Rewrite the store with the new form of stored value.
+        return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
+                                 SN->getBasePtr(), SN->getMemoryVT(),
+                                 SN->getMemOperand());
+      }
+    }
+  }
+  // Try to simplify a vector extraction.
+  if (Opcode == ISD::EXTRACT_VECTOR_ELT) {
+    if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+      SDValue Op0 = N->getOperand(0);
+      EVT VecVT = Op0.getValueType();
+      return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
+                            IndexN->getZExtValue(), DCI, false);
+    }
+  }
+  // (join_dwords X, X) == (replicate X)
+  if (Opcode == SystemZISD::JOIN_DWORDS &&
+      N->getOperand(0) == N->getOperand(1))
+    return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
+                       N->getOperand(0));
+  // (fround (extract_vector_elt X 0))
+  // (fround (extract_vector_elt X 1)) ->
+  // (extract_vector_elt (VROUND X) 0)
+  // (extract_vector_elt (VROUND X) 1)
+  //
+  // This is a special case since the target doesn't really support v2f32s.
+  if (Opcode == ISD::FP_ROUND) {
+    SDValue Op0 = N->getOperand(0);
+    if (N->getValueType(0) == MVT::f32 &&
+        Op0.hasOneUse() &&
+        Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+        Op0.getOperand(0).getValueType() == MVT::v2f64 &&
+        Op0.getOperand(1).getOpcode() == ISD::Constant &&
+        cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
+      SDValue Vec = Op0.getOperand(0);
+      for (auto *U : Vec->uses()) {
+        if (U != Op0.getNode() &&
+            U->hasOneUse() &&
+            U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+            U->getOperand(0) == Vec &&
+            U->getOperand(1).getOpcode() == ISD::Constant &&
+            cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
+          SDValue OtherRound = SDValue(*U->use_begin(), 0);
+          if (OtherRound.getOpcode() == ISD::FP_ROUND &&
+              OtherRound.getOperand(0) == SDValue(U, 0) &&
+              OtherRound.getValueType() == MVT::f32) {
+            SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
+                                         MVT::v4f32, Vec);
+            DCI.AddToWorklist(VRound.getNode());
+            SDValue Extract1 =
+              DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
+                          VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
+            DCI.AddToWorklist(Extract1.getNode());
+            DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
+            SDValue Extract0 =
+              DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
+                          VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+            return Extract0;
+          }
         }
       }
     }
@@ -2614,8 +4819,8 @@ static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
 MachineBasicBlock *
 SystemZTargetLowering::emitSelect(MachineInstr *MI,
                                   MachineBasicBlock *MBB) const {
-  const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
-      MBB->getParent()->getSubtarget().getInstrInfo());
+  const SystemZInstrInfo *TII =
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
 
   unsigned DestReg  = MI->getOperand(0).getReg();
   unsigned TrueReg  = MI->getOperand(1).getReg();
@@ -2663,8 +4868,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
                                      MachineBasicBlock *MBB,
                                      unsigned StoreOpcode, unsigned STOCOpcode,
                                      bool Invert) const {
-  const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
-      MBB->getParent()->getSubtarget().getInstrInfo());
+  const SystemZInstrInfo *TII =
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
 
   unsigned SrcReg     = MI->getOperand(0).getReg();
   MachineOperand Base = MI->getOperand(1);
@@ -2733,7 +4938,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
                                             bool Invert) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
-      static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
   bool IsSubWord = (BitSize < 32);
 
@@ -2853,7 +5058,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
                                             unsigned BitSize) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
-      static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
   bool IsSubWord = (BitSize < 32);
 
@@ -2965,7 +5170,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
                                           MachineBasicBlock *MBB) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
-      static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   // Extract the operands.  Base can be a register or a frame index.
@@ -3082,7 +5287,7 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
                                   bool ClearEven, unsigned SubReg) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
-      static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
 
@@ -3114,7 +5319,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
                                          unsigned Opcode) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
-      static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
 
@@ -3284,7 +5489,7 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
                                          unsigned Opcode) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
-      static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
 
@@ -3338,6 +5543,57 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
   return DoneMBB;
 }
 
+// Update TBEGIN instruction with final opcode and register clobbers.
+MachineBasicBlock *
+SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
+                                            MachineBasicBlock *MBB,
+                                            unsigned Opcode,
+                                            bool NoFloat) const {
+  MachineFunction &MF = *MBB->getParent();
+  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+  const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
+
+  // Update opcode.
+  MI->setDesc(TII->get(Opcode));
+
+  // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
+  // Make sure to add the corresponding GRSM bits if they are missing.
+  uint64_t Control = MI->getOperand(2).getImm();
+  static const unsigned GPRControlBit[16] = {
+    0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
+    0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
+  };
+  Control |= GPRControlBit[15];
+  if (TFI->hasFP(MF))
+    Control |= GPRControlBit[11];
+  MI->getOperand(2).setImm(Control);
+
+  // Add GPR clobbers.
+  for (int I = 0; I < 16; I++) {
+    if ((Control & GPRControlBit[I]) == 0) {
+      unsigned Reg = SystemZMC::GR64Regs[I];
+      MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+    }
+  }
+
+  // Add FPR/VR clobbers.
+  if (!NoFloat && (Control & 4) != 0) {
+    if (Subtarget.hasVector()) {
+      for (int I = 0; I < 32; I++) {
+        unsigned Reg = SystemZMC::VR128Regs[I];
+        MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+      }
+    } else {
+      for (int I = 0; I < 16; I++) {
+        unsigned Reg = SystemZMC::FP64Regs[I];
+        MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+      }
+    }
+  }
+
+  return MBB;
+}
+
 MachineBasicBlock *SystemZTargetLowering::
 EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
   switch (MI->getOpcode()) {
@@ -3579,6 +5835,12 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
     return emitStringWrapper(MI, MBB, SystemZ::MVST);
   case SystemZ::SRSTLoop:
     return emitStringWrapper(MI, MBB, SystemZ::SRST);
+  case SystemZ::TBEGIN:
+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
+  case SystemZ::TBEGIN_nofloat:
+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
+  case SystemZ::TBEGINC:
+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
   default:
     llvm_unreachable("Unexpected instr type to insert");
   }
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 887c236..b001abc 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -22,7 +22,7 @@
 
 namespace llvm {
 namespace SystemZISD {
-enum {
+enum NodeType : unsigned {
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
 
   // Return with a flag operand.  Operand 0 is the chain operand.
@@ -34,6 +34,11 @@ enum {
   CALL,
   SIBCALL,
 
+  // TLS calls.  Like regular calls, except operand 1 is the TLS symbol.
+  // (The call target is implicitly __tls_get_offset.)
+  TLS_GDCALL,
+  TLS_LDCALL,
+
   // Wraps a TargetGlobalAddress that should be loaded using PC-relative
   // accesses (LARL).  Operand 0 is the address.
   PCREL_WRAPPER,
@@ -82,6 +87,9 @@ enum {
   // the number of the register.
   EXTRACT_ACCESS,
 
+  // Count number of bits set in operand 0 per byte.
+  POPCNT,
+
   // Wrappers around the ISD opcodes of the same name.  The output and
   // first input operands are GR128s.  The trailing numbers are the
   // widths of the second operand in bits.
@@ -138,6 +146,135 @@ enum {
   // Perform a serialization operation.  (BCR 15,0 or BCR 14,0.)
   SERIALIZE,
 
+  // Transaction begin.  The first operand is the chain, the second
+  // the TDB pointer, and the third the immediate control field.
+  // Returns chain and glue.
+  TBEGIN,
+  TBEGIN_NOFLOAT,
+
+  // Transaction end.  Just the chain operand.  Returns chain and glue.
+  TEND,
+
+  // Create a vector constant by filling byte N of the result with bit
+  // 15-N of the single operand.
+  BYTE_MASK,
+
+  // Create a vector constant by replicating an element-sized RISBG-style mask.
+  // The first operand specifies the starting set bit and the second operand
+  // specifies the ending set bit.  Both operands count from the MSB of the
+  // element.
+  ROTATE_MASK,
+
+  // Replicate a GPR scalar value into all elements of a vector.
+  REPLICATE,
+
+  // Create a vector from two i64 GPRs.
+  JOIN_DWORDS,
+
+  // Replicate one element of a vector into all elements.  The first operand
+  // is the vector and the second is the index of the element to replicate.
+  SPLAT,
+
+  // Interleave elements from the high half of operand 0 and the high half
+  // of operand 1.
+  MERGE_HIGH,
+
+  // Likewise for the low halves.
+  MERGE_LOW,
+
+  // Concatenate the vectors in the first two operands, shift them left
+  // by the third operand, and take the first half of the result.
+  SHL_DOUBLE,
+
+  // Take one element of the first v2i64 operand and the one element of
+  // the second v2i64 operand and concatenate them to form a v2i64 result.
+  // The third operand is a 4-bit value of the form 0A0B, where A and B
+  // are the element selectors for the first operand and second operands
+  // respectively.
+  PERMUTE_DWORDS,
+
+  // Perform a general vector permute on vector operands 0 and 1.
+  // Each byte of operand 2 controls the corresponding byte of the result,
+  // in the same way as a byte-level VECTOR_SHUFFLE mask.
+  PERMUTE,
+
+  // Pack vector operands 0 and 1 into a single vector with half-sized elements.
+  PACK,
+
+  // Likewise, but saturate the result and set CC.  PACKS_CC does signed
+  // saturation and PACKLS_CC does unsigned saturation.
+  PACKS_CC,
+  PACKLS_CC,
+
+  // Unpack the first half of vector operand 0 into double-sized elements.
+  // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends.
+  UNPACK_HIGH,
+  UNPACKL_HIGH,
+
+  // Likewise for the second half.
+  UNPACK_LOW,
+  UNPACKL_LOW,
+
+  // Shift each element of vector operand 0 by the number of bits specified
+  // by scalar operand 1.
+  VSHL_BY_SCALAR,
+  VSRL_BY_SCALAR,
+  VSRA_BY_SCALAR,
+
+  // For each element of the output type, sum across all sub-elements of
+  // operand 0 belonging to the corresponding element, and add in the
+  // rightmost sub-element of the corresponding element of operand 1.
+  VSUM,
+
+  // Compare integer vector operands 0 and 1 to produce the usual 0/-1
+  // vector result.  VICMPE is for equality, VICMPH for "signed greater than"
+  // and VICMPHL for "unsigned greater than".
+  VICMPE,
+  VICMPH,
+  VICMPHL,
+
+  // Likewise, but also set the condition codes on the result.
+  VICMPES,
+  VICMPHS,
+  VICMPHLS,
+
+  // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1
+  // vector result.  VFCMPE is for "ordered and equal", VFCMPH for "ordered and
+  // greater than" and VFCMPHE for "ordered and greater than or equal to".
+  VFCMPE,
+  VFCMPH,
+  VFCMPHE,
+
+  // Likewise, but also set the condition codes on the result.
+  VFCMPES,
+  VFCMPHS,
+  VFCMPHES,
+
+  // Test floating-point data class for vectors.
+  VFTCI,
+
+  // Extend the even f32 elements of vector operand 0 to produce a vector
+  // of f64 elements.
+  VEXTEND,
+
+  // Round the f64 elements of vector operand 0 to f32s and store them in the
+  // even elements of the result.
+  VROUND,
+
+  // AND the two vector operands together and set CC based on the result.
+  VTM,
+
+  // String operations that set CC as a side-effect.
+  VFAE_CC,
+  VFAEZ_CC,
+  VFEE_CC,
+  VFEEZ_CC,
+  VFENE_CC,
+  VFENEZ_CC,
+  VISTR_CC,
+  VSTRC_CC,
+  VSTRCZ_CC,
+
   // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
   // ATOMIC_LOAD_<op>.
   //
@@ -198,15 +335,40 @@ class SystemZTargetMachine;
 
 class SystemZTargetLowering : public TargetLowering {
 public:
-  explicit SystemZTargetLowering(const TargetMachine &TM);
+  explicit SystemZTargetLowering(const TargetMachine &TM,
+                                 const SystemZSubtarget &STI);
 
   // Override TargetLowering.
   MVT getScalarShiftAmountTy(EVT LHSTy) const override {
     return MVT::i32;
   }
+  MVT getVectorIdxTy() const override {
+    // Only the lower 12 bits of an element index are used, so we don't
+    // want to clobber the upper 32 bits of a GPR unnecessarily.
+    return MVT::i32;
+  }
+  TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+    const override {
+    // Widen subvectors to the full width rather than promoting integer
+    // elements.  This is better because:
+    //
+    // (a) it means that we can handle the ABI for passing and returning
+    //     sub-128 vectors without having to handle them as legal types.
+    //
+    // (b) we don't have instructions to extend on load and truncate on store,
+    //     so promoting the integers is less efficient.
+    //
+    // (c) there are no multiplication instructions for the widest integer
+    //     type (v2i64).
+    if (VT.getVectorElementType().getSizeInBits() % 8 == 0)
+      return TypeWidenVector;
+    return TargetLoweringBase::getPreferredVectorAction(VT);
+  }
   EVT getSetCCResultType(LLVMContext &, EVT) const override;
   bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
   bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+  bool isLegalICmpImmediate(int64_t Imm) const override;
+  bool isLegalAddImmediate(int64_t Imm) const override;
   bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
                                       unsigned Align,
@@ -215,8 +377,9 @@ public:
   bool isTruncateFree(EVT, EVT) const override;
   const char *getTargetNodeName(unsigned Opcode) const override;
   std::pair<unsigned, const TargetRegisterClass *>
-    getRegForInlineAsmConstraint(const std::string &Constraint,
-                                 MVT VT) const override;
+  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                               const std::string &Constraint,
+                               MVT VT) const override;
   TargetLowering::ConstraintType
     getConstraintType(const std::string &Constraint) const override;
   TargetLowering::ConstraintWeight
@@ -226,6 +389,26 @@ public:
                                     std::string &Constraint,
                                     std::vector<SDValue> &Ops,
                                     SelectionDAG &DAG) const override;
+
+  unsigned getInlineAsmMemConstraint(
+      const std::string &ConstraintCode) const override {
+    if (ConstraintCode.size() == 1) {
+      switch(ConstraintCode[0]) {
+      default:
+        break;
+      case 'Q':
+        return InlineAsm::Constraint_Q;
+      case 'R':
+        return InlineAsm::Constraint_R;
+      case 'S':
+        return InlineAsm::Constraint_S;
+      case 'T':
+        return InlineAsm::Constraint_T;
+      }
+    }
+    return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+  }
+
   MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
                                                  MachineBasicBlock *BB) const
     override;
@@ -257,6 +440,9 @@ private:
   SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
                              SelectionDAG &DAG) const;
+  SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node,
+                            SelectionDAG &DAG, unsigned Opcode,
+                            SDValue GOTOffset) const;
   SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
                                 SelectionDAG &DAG) const;
   SDValue lowerBlockAddress(BlockAddressSDNode *Node,
@@ -272,6 +458,7 @@ private:
   SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
@@ -282,6 +469,22 @@ private:
   SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
+				 unsigned UnpackHigh) const;
+  SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
+
+  SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
+                         unsigned Index, DAGCombinerInfo &DCI,
+                         bool Force) const;
+  SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
+                                 DAGCombinerInfo &DCI) const;
 
   // If the last instruction before MBBI in MBB was some form of COMPARE,
   // try to replace it with a COMPARE AND BRANCH just before MBBI.
@@ -319,6 +522,10 @@ private:
   MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
                                        MachineBasicBlock *BB,
                                        unsigned Opcode) const;
+  MachineBasicBlock *emitTransactionBegin(MachineInstr *MI,
+                                          MachineBasicBlock *MBB,
+                                          unsigned Opcode,
+                                          bool NoFloat) const;
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 4a5582f..27fbd7d 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -46,9 +46,14 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
   defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>;
   defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>;
 }
-defm : CompareZeroFP<LTEBRCompare, FP32>;
-defm : CompareZeroFP<LTDBRCompare, FP64>;
-defm : CompareZeroFP<LTXBRCompare, FP128>;
+// Note that the comparison against zero operation is not available if we
+// have vector support, since load-and-test instructions will partially
+// clobber the target (vector) register.
+let Predicates = [FeatureNoVector] in {
+  defm : CompareZeroFP<LTEBRCompare, FP32>;
+  defm : CompareZeroFP<LTDBRCompare, FP64>;
+  defm : CompareZeroFP<LTXBRCompare, FP128>;
+}
 
 // Moves between 64-bit integer and floating-point registers.
 def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>;
@@ -98,6 +103,9 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
   defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>;
   defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>;
 
+  // For z13 we prefer LDE over LE to avoid partial register dependencies.
+  def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>;
+
   // These instructions are split after register allocation, so we don't
   // want a custom inserter.
   let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
@@ -141,7 +149,7 @@ def LDXBRA : UnaryRRF4<"ldxbra", 0xB345, FP128, FP128>,
              Requires<[FeatureFPExtension]>;
 
 def : Pat<(f32 (fround FP128:$src)),
-          (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
+          (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>;
 def : Pat<(f64 (fround FP128:$src)),
           (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
 
@@ -345,13 +353,13 @@ def MDB  : BinaryRXE<"mdb",  0xED1C, fmul, FP64, load, 8>;
 def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>;
 def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))),
           (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                FP32:$src1, subreg_h32), FP32:$src2)>;
+                                FP32:$src1, subreg_r32), FP32:$src2)>;
 
 // f64 multiplication of an FP32 register and an f32 memory.
 def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
 def : Pat<(fmul (f64 (fextend FP32:$src1)),
                 (f64 (extloadf32 bdxaddr12only:$addr))),
-          (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
+          (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32),
                 bdxaddr12only:$addr)>;
 
 // f128 multiplication of two FP64 registers.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index 9f59a1c..71eb998 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -142,10 +142,13 @@ def getThreeOperandOpcode : InstrMapping {
 // Formats are specified using operand field declarations of the form:
 //
 //   bits<4> Rn   : register input or output for operand n
+//   bits<5> Vn   : vector register input or output for operand n
 //   bits<m> In   : immediate value of width m for operand n
 //   bits<4> BDn  : address operand n, which has a base and a displacement
 //   bits<m> XBDn : address operand n, which has an index, a base and a
 //                  displacement
+//   bits<m> VBDn : address operand n, which has a vector index, a base and a
+//                  displacement
 //   bits<4> Xn   : index register for address operand n
 //   bits<4> Mn   : mode value for operand n
 //
@@ -339,11 +342,13 @@ class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 
   bits<4> R1;
   bits<20> XBD2;
+  bits<4> M3;
 
   let Inst{47-40} = op{15-8};
   let Inst{39-36} = R1;
   let Inst{35-16} = XBD2;
-  let Inst{15-8}  = 0;
+  let Inst{15-12} = M3;
+  let Inst{11-8}  = 0;
   let Inst{7-0}   = op{7-0};
 
   let HasIndex = 1;
@@ -473,6 +478,393 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{15-0}  = BD2;
 }
 
+class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+  field bits<32> SoftFail = 0;
+
+  bits<16> BD2;
+
+  let Inst{31-16} = op;
+  let Inst{15-0}  = BD2;
+}
+
+class InstVRIa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<16> I2;
+  bits<4> M3;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-32} = 0;
+  let Inst{31-16} = I2;
+  let Inst{15-12} = M3;
+  let Inst{11}    = V1{4};
+  let Inst{10-8}  = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstVRIb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<8> I2;
+  bits<8> I3;
+  bits<4> M4;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-32} = 0;
+  let Inst{31-24} = I2;
+  let Inst{23-16} = I3;
+  let Inst{15-12} = M4;
+  let Inst{11}    = V1{4};
+  let Inst{10-8}  = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstVRIc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<5> V3;
+  bits<16> I2;
+  bits<4> M4;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-32} = V3{3-0};
+  let Inst{31-16} = I2;
+  let Inst{15-12} = M4;
+  let Inst{11}    = V1{4};
+  let Inst{10}    = V3{4};
+  let Inst{9-8}   = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstVRId<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<5> V2;
+  bits<5> V3;
+  bits<8> I4;
+  bits<4> M5;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-32} = V2{3-0};
+  let Inst{31-28} = V3{3-0};
+  let Inst{27-24} = 0;
+  let Inst{23-16} = I4;
+  let Inst{15-12} = M5;
+  let Inst{11}    = V1{4};
+  let Inst{10}    = V2{4};
+  let Inst{9}     = V3{4};
+  let Inst{8}     = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstVRIe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<5> V2;
+  bits<12> I3;
+  bits<4> M4;
+  bits<4> M5;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-32} = V2{3-0};
+  let Inst{31-20} = I3;
+  let Inst{19-16} = M5;
+  let Inst{15-12} = M4;
+  let Inst{11}    = V1{4};
+  let Inst{10}    = V2{4};
+  let Inst{9-8}   = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+// Depending on the instruction mnemonic, certain bits may be or-ed into
+// the M4 value provided as explicit operand.  These are passed as m4or.
+class InstVRRa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+               bits<4> m4or = 0>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<5> V2;
+  bits<4> M3;
+  bits<4> M4;
+  bits<4> M5;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-32} = V2{3-0};
+  let Inst{31-24} = 0;
+  let Inst{23-20} = M5;
+  let Inst{19}    = !if (!eq (m4or{3}, 1), 1, M4{3});
+  let Inst{18}    = !if (!eq (m4or{2}, 1), 1, M4{2});
+  let Inst{17}    = !if (!eq (m4or{1}, 1), 1, M4{1});
+  let Inst{16}    = !if (!eq (m4or{0}, 1), 1, M4{0});
+  let Inst{15-12} = M3;
+  let Inst{11}    = V1{4};
+  let Inst{10}    = V2{4};
+  let Inst{9-8}   = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+// Depending on the instruction mnemonic, certain bits may be or-ed into
+// the M5 value provided as explicit operand.  These are passed as m5or.
+class InstVRRb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+               bits<4> m5or = 0>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<5> V2;
+  bits<5> V3;
+  bits<4> M4;
+  bits<4> M5;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-32} = V2{3-0};
+  let Inst{31-28} = V3{3-0};
+  let Inst{27-24} = 0;
+  let Inst{23}    = !if (!eq (m5or{3}, 1), 1, M5{3});
+  let Inst{22}    = !if (!eq (m5or{2}, 1), 1, M5{2});
+  let Inst{21}    = !if (!eq (m5or{1}, 1), 1, M5{1});
+  let Inst{20}    = !if (!eq (m5or{0}, 1), 1, M5{0});
+  let Inst{19-16} = 0;
+  let Inst{15-12} = M4;
+  let Inst{11}    = V1{4};
+  let Inst{10}    = V2{4};
+  let Inst{9}     = V3{4};
+  let Inst{8}     = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstVRRc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<5> V2;
+  bits<5> V3;
+  bits<4> M4;
+  bits<4> M5;
+  bits<4> M6;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-32} = V2{3-0};
+  let Inst{31-28} = V3{3-0};
+  let Inst{27-24} = 0;
+  let Inst{23-20} = M6;
+  let Inst{19-16} = M5;
+  let Inst{15-12} = M4;
+  let Inst{11}    = V1{4};
+  let Inst{10}    = V2{4};
+  let Inst{9}     = V3{4};
+  let Inst{8}     = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+// Depending on the instruction mnemonic, certain bits may be or-ed into
+// the M6 value provided as explicit operand.  These are passed as m6or.
+class InstVRRd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+               bits<4> m6or = 0>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<5> V2;
+  bits<5> V3;
+  bits<5> V4;
+  bits<4> M5;
+  bits<4> M6;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-32} = V2{3-0};
+  let Inst{31-28} = V3{3-0};
+  let Inst{27-24} = M5;
+  let Inst{23}    = !if (!eq (m6or{3}, 1), 1, M6{3});
+  let Inst{22}    = !if (!eq (m6or{2}, 1), 1, M6{2});
+  let Inst{21}    = !if (!eq (m6or{1}, 1), 1, M6{1});
+  let Inst{20}    = !if (!eq (m6or{0}, 1), 1, M6{0});
+  let Inst{19-16} = 0;
+  let Inst{15-12} = V4{3-0};
+  let Inst{11}    = V1{4};
+  let Inst{10}    = V2{4};
+  let Inst{9}     = V3{4};
+  let Inst{8}     = V4{4};
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstVRRe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<5> V2;
+  bits<5> V3;
+  bits<5> V4;
+  bits<4> M5;
+  bits<4> M6;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-32} = V2{3-0};
+  let Inst{31-28} = V3{3-0};
+  let Inst{27-24} = M6;
+  let Inst{23-20} = 0;
+  let Inst{19-16} = M5;
+  let Inst{15-12} = V4{3-0};
+  let Inst{11}    = V1{4};
+  let Inst{10}    = V2{4};
+  let Inst{9}     = V3{4};
+  let Inst{8}     = V4{4};
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstVRRf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<4> R2;
+  bits<4> R3;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-32} = R2;
+  let Inst{31-28} = R3;
+  let Inst{27-12} = 0;
+  let Inst{11}    = V1{4};
+  let Inst{10-8}  = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstVRSa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<16> BD2;
+  bits<5> V3;
+  bits<4> M4;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-32} = V3{3-0};
+  let Inst{31-16} = BD2;
+  let Inst{15-12} = M4;
+  let Inst{11}    = V1{4};
+  let Inst{10}    = V3{4};
+  let Inst{9-8}   = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstVRSb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<16> BD2;
+  bits<4> R3;
+  bits<4> M4;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-32} = R3;
+  let Inst{31-16} = BD2;
+  let Inst{15-12} = M4;
+  let Inst{11}    = V1{4};
+  let Inst{10-8}  = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstVRSc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> R1;
+  bits<16> BD2;
+  bits<5> V3;
+  bits<4> M4;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = V3{3-0};
+  let Inst{31-16} = BD2;
+  let Inst{15-12} = M4;
+  let Inst{11}    = 0;
+  let Inst{10}    = V3{4};
+  let Inst{9-8}   = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstVRV<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<21> VBD2;
+  bits<4> M3;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-16} = VBD2{19-0};
+  let Inst{15-12} = M3;
+  let Inst{11}    = V1{4};
+  let Inst{10}    = VBD2{20};
+  let Inst{9-8}   = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<5> V1;
+  bits<20> XBD2;
+  bits<4> M3;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = V1{3-0};
+  let Inst{35-16} = XBD2;
+  let Inst{15-12} = M3;
+  let Inst{11}    = V1{4};
+  let Inst{10-8}  = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
 //===----------------------------------------------------------------------===//
 // Instruction definitions with semantics
 //===----------------------------------------------------------------------===//
@@ -492,12 +884,6 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 //     form of the source register in the destination register and
 //     branches on the result.
 //
-//   Store:
-//     One register or immediate input operand and one address input operand.
-//     The instruction stores the first operand to the address.
-//
-//     This category is used for both pure and truncating stores.
-//
 //   LoadMultiple:
 //     One address input operand and two explicit output operands.
 //     The instruction loads a range of registers from the address,
@@ -510,18 +896,35 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 //     with the explicit operands giving the first and last register
 //     to store.  Other stored registers are added as implicit uses.
 //
+//   StoreLength:
+//     One value operand, one length operand and one address operand.
+//     The instruction stores the value operand to the address but
+//     doesn't write more than the number of bytes specified by the
+//     length operand.
+//
 //   Unary:
 //     One register output operand and one input operand.
 //
+//   Store:
+//     One address operand and one other input operand.  The instruction
+//     stores to the address.
+//
 //   Binary:
 //     One register output operand and two input operands.
 //
+//   StoreBinary:
+//     One address operand and two other input operands.  The instruction
+//     stores to the address.
+//
 //   Compare:
 //     Two input operands and an implicit CC output operand.
 //
 //   Ternary:
 //     One register output operand and three input operands.
 //
+//   Quaternary:
+//     One register output operand and four input operands.
+//
 //   LoadAndOp:
 //     One output operand and two input operands, one of which is an address.
 //     The instruction both reads from and writes to the address.
@@ -556,6 +959,12 @@ class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
   let R2 = 0;
 }
 
+class InherentVRIa<string mnemonic, bits<16> opcode, bits<16> value>
+  : InstVRIa<opcode, (outs VR128:$V1), (ins), mnemonic#"\t$V1", []> {
+  let I2 = value;
+  let M3 = 0;
+}
+
 class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls>
   : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$I2),
            mnemonic##"\t$R1, $I2", []> {
@@ -571,6 +980,13 @@ class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
   let mayLoad = 1;
 }
 
+class LoadMultipleVRSa<string mnemonic, bits<16> opcode>
+  : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), (ins bdaddr12only:$BD2),
+             mnemonic#"\t$V1, $V3, $BD2", []> {
+  let M4 = 0;
+  let mayLoad = 1;
+}
+
 class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                  RegisterOperand cls>
   : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2),
@@ -619,12 +1035,39 @@ multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
   }
 }
 
+class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               TypedReg tr, bits<5> bytes, bits<4> type = 0>
+  : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2),
+            mnemonic#"\t$V1, $XBD2",
+            [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2)))]> {
+  let M3 = type;
+  let mayStore = 1;
+  let AccessBytes = bytes;
+}
+
+class StoreLengthVRSb<string mnemonic, bits<16> opcode,
+                      SDPatternOperator operator, bits<5> bytes>
+  : InstVRSb<opcode, (outs), (ins VR128:$V1, GR32:$R3, bdaddr12only:$BD2),
+             mnemonic#"\t$V1, $R3, $BD2",
+             [(operator VR128:$V1, GR32:$R3, bdaddr12only:$BD2)]> {
+  let M4 = 0;
+  let mayStore = 1;
+  let AccessBytes = bytes;
+}
+
 class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
   : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, bdaddr20only:$BD2),
             mnemonic#"\t$R1, $R3, $BD2", []> {
   let mayStore = 1;
 }
 
+class StoreMultipleVRSa<string mnemonic, bits<16> opcode>
+  : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, bdaddr12only:$BD2),
+             mnemonic#"\t$V1, $V3, $BD2", []> {
+  let M4 = 0;
+  let mayStore = 1;
+}
+
 // StoreSI* instructions are used to store an integer to memory, but the
 // addresses are more restricted than for normal stores.  If we are in the
 // situation of having to force either the address into a register or the
@@ -857,6 +1300,7 @@ class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let OpType = "mem";
   let mayLoad = 1;
   let AccessBytes = bytes;
+  let M3 = 0;
 }
 
 class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
@@ -883,6 +1327,46 @@ multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
   }
 }
 
+class UnaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                TypedReg tr, Immediate imm, bits<4> type = 0>
+  : InstVRIa<opcode, (outs tr.op:$V1), (ins imm:$I2),
+             mnemonic#"\t$V1, $I2",
+             [(set tr.op:$V1, (tr.vt (operator imm:$I2)))]> {
+  let M3 = type;
+}
+
+class UnaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0,
+                bits<4> m5 = 0>
+  : InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2),
+             mnemonic#"\t$V1, $V2",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]> {
+  let M3 = type;
+  let M4 = m4;
+  let M5 = m5;
+}
+
+multiclass UnaryVRRaSPair<string mnemonic, bits<16> opcode,
+                          SDPatternOperator operator,
+                          SDPatternOperator operator_cc, TypedReg tr1,
+                          TypedReg tr2, bits<4> type, bits<4> modifier = 0,
+                          bits<4> modifier_cc = 1> {
+  def "" : UnaryVRRa<mnemonic, opcode, operator, tr1, tr2, type, 0, modifier>;
+  let Defs = [CC] in
+    def S : UnaryVRRa<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, 0,
+                      modifier_cc>;
+}
+
+class UnaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               TypedReg tr, bits<5> bytes, bits<4> type = 0>
+  : InstVRX<opcode, (outs tr.op:$V1), (ins bdxaddr12only:$XBD2),
+            mnemonic#"\t$V1, $XBD2",
+            [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2)))]> {
+  let M3 = type;
+  let mayLoad = 1;
+  let AccessBytes = bytes;
+}
+
 class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                RegisterOperand cls1, RegisterOperand cls2>
   : InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
@@ -1036,6 +1520,7 @@ class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let DisableEncoding = "$R1src";
   let mayLoad = 1;
   let AccessBytes = bytes;
+  let M3 = 0;
 }
 
 class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
@@ -1094,6 +1579,148 @@ multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
   }
 }
 
+class BinaryVRIb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 TypedReg tr, bits<4> type>
+  : InstVRIb<opcode, (outs tr.op:$V1), (ins imm32zx8:$I2, imm32zx8:$I3),
+             mnemonic#"\t$V1, $I2, $I3",
+             [(set tr.op:$V1, (tr.vt (operator imm32zx8:$I2, imm32zx8:$I3)))]> {
+  let M4 = type;
+}
+
+class BinaryVRIc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 TypedReg tr1, TypedReg tr2, bits<4> type>
+  : InstVRIc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, imm32zx16:$I2),
+             mnemonic#"\t$V1, $V3, $I2",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V3),
+                                                 imm32zx16:$I2)))]> {
+  let M4 = type;
+}
+
+class BinaryVRIe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m5>
+  : InstVRIe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx12:$I3),
+             mnemonic#"\t$V1, $V2, $I3",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+                                                 imm32zx12:$I3)))]> {
+  let M4 = type;
+  let M5 = m5;
+}
+
+class BinaryVRRa<string mnemonic, bits<16> opcode>
+  : InstVRRa<opcode, (outs VR128:$V1), (ins VR128:$V2, imm32zx4:$M3),
+             mnemonic#"\t$V1, $V2, $M3", []> {
+  let M4 = 0;
+  let M5 = 0;
+}
+
+class BinaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 TypedReg tr1, TypedReg tr2, bits<4> type = 0,
+                 bits<4> modifier = 0>
+  : InstVRRb<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3),
+             mnemonic#"\t$V1, $V2, $V3",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+                                                 (tr2.vt tr2.op:$V3))))]> {
+  let M4 = type;
+  let M5 = modifier;
+}
+
+// Declare a pair of instructions, one which sets CC and one which doesn't.
+// The CC-setting form ends with "S" and sets the low bit of M5.
+multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode,
+                           SDPatternOperator operator,
+                           SDPatternOperator operator_cc, TypedReg tr1,
+                           TypedReg tr2, bits<4> type,
+                           bits<4> modifier = 0, bits<4> modifier_cc = 1> {
+  def "" : BinaryVRRb<mnemonic, opcode, operator, tr1, tr2, type, modifier>;
+  let Defs = [CC] in
+    def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+                       modifier_cc>;
+}
+
+class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m5 = 0,
+                 bits<4> m6 = 0>
+  : InstVRRc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3),
+             mnemonic#"\t$V1, $V2, $V3",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+                                                 (tr2.vt tr2.op:$V3))))]> {
+  let M4 = type;
+  let M5 = m5;
+  let M6 = m6;
+}
+
+multiclass BinaryVRRcSPair<string mnemonic, bits<16> opcode,
+                           SDPatternOperator operator,
+                           SDPatternOperator operator_cc, TypedReg tr1,
+                           TypedReg tr2, bits<4> type, bits<4> m5,
+                           bits<4> modifier = 0, bits<4> modifier_cc = 1> {
+  def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type, m5, modifier>;
+  let Defs = [CC] in
+    def S : BinaryVRRc<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+                       m5, modifier_cc>;
+}
+
+class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 TypedReg tr>
+  : InstVRRf<opcode, (outs tr.op:$V1), (ins GR64:$R2, GR64:$R3),
+             mnemonic#"\t$V1, $R2, $R3",
+             [(set tr.op:$V1, (tr.vt (operator GR64:$R2, GR64:$R3)))]>;
+
+class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 TypedReg tr1, TypedReg tr2, bits<4> type>
+  : InstVRSa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, shift12only:$BD2),
+             mnemonic#"\t$V1, $V3, $BD2",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V3),
+                                                 shift12only:$BD2)))]> {
+  let M4 = type;
+}
+
+class BinaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 bits<5> bytes>
+  : InstVRSb<opcode, (outs VR128:$V1), (ins GR32:$R3, bdaddr12only:$BD2),
+             mnemonic#"\t$V1, $R3, $BD2",
+             [(set VR128:$V1, (operator GR32:$R3, bdaddr12only:$BD2))]> {
+  let M4 = 0;
+  let mayLoad = 1;
+  let AccessBytes = bytes;
+}
+
+class BinaryVRSc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 TypedReg tr, bits<4> type>
+  : InstVRSc<opcode, (outs GR64:$R1), (ins tr.op:$V3, shift12only:$BD2),
+           mnemonic#"\t$R1, $V3, $BD2",
+           [(set GR64:$R1, (operator (tr.vt tr.op:$V3), shift12only:$BD2))]> {
+  let M4 = type;
+}
+
+class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                TypedReg tr, bits<5> bytes>
+  : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+            mnemonic#"\t$V1, $XBD2, $M3",
+            [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2,
+                                              imm32zx4:$M3)))]> {
+  let mayLoad = 1;
+  let AccessBytes = bytes;
+}
+
+class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
+                     Immediate index>
+  : InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3),
+            mnemonic#"\t$V1, $VBD2, $M3", []> {
+  let mayStore = 1;
+  let AccessBytes = bytes;
+}
+
+class StoreBinaryVRX<string mnemonic, bits<16> opcode,
+                     SDPatternOperator operator, TypedReg tr, bits<5> bytes,
+                     Immediate index>
+  : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2, index:$M3),
+            mnemonic#"\t$V1, $XBD2, $M3",
+            [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2, index:$M3)]> {
+  let mayStore = 1;
+  let AccessBytes = bytes;
+}
+
 class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                 RegisterOperand cls1, RegisterOperand cls2>
   : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2),
@@ -1166,6 +1793,7 @@ class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let isCompare = 1;
   let mayLoad = 1;
   let AccessBytes = bytes;
+  let M3 = 0;
 }
 
 class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
@@ -1235,6 +1863,17 @@ multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
   }
 }
 
+class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                  TypedReg tr, bits<4> type>
+  : InstVRRa<opcode, (outs), (ins tr.op:$V1, tr.op:$V2),
+             mnemonic#"\t$V1, $V2",
+             [(operator (tr.vt tr.op:$V1), (tr.vt tr.op:$V2))]> {
+  let isCompare = 1;
+  let M3 = type;
+  let M4 = 0;
+  let M5 = 0;
+}
+
 class TernaryRRD<string mnemonic, bits<16> opcode,
                  SDPatternOperator operator, RegisterOperand cls>
   : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2),
@@ -1261,6 +1900,188 @@ class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let AccessBytes = bytes;
 }
 
+class TernaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                  TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index>
+  : InstVRIa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V1src, imm:$I2, index:$M3),
+             mnemonic#"\t$V1, $I2, $M3",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
+                                                 imm:$I2, index:$M3)))]> {
+  let Constraints = "$V1 = $V1src";
+  let DisableEncoding = "$V1src";
+}
+
+class TernaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                  TypedReg tr1, TypedReg tr2, bits<4> type>
+  : InstVRId<opcode, (outs tr1.op:$V1),
+             (ins tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4),
+             mnemonic#"\t$V1, $V2, $V3, $I4",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+                                                 (tr2.vt tr2.op:$V3),
+                                                 imm32zx8:$I4)))]> {
+  let M5 = type;
+}
+
+class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                  TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m4or>
+  : InstVRRa<opcode, (outs tr1.op:$V1),
+             (ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5),
+             mnemonic#"\t$V1, $V2, $M4, $M5",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+                                                 imm32zx4:$M4,
+                                                 imm32zx4:$M5)))],
+             m4or> {
+  let M3 = type;
+}
+
+class TernaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                  TypedReg tr1, TypedReg tr2, bits<4> type,
+                  SDPatternOperator m5mask, bits<4> m5or>
+  : InstVRRb<opcode, (outs tr1.op:$V1),
+             (ins tr2.op:$V2, tr2.op:$V3, m5mask:$M5),
+             mnemonic#"\t$V1, $V2, $V3, $M5",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+                                                 (tr2.vt tr2.op:$V3),
+                                                 m5mask:$M5)))],
+             m5or> {
+  let M4 = type;
+}
+
+multiclass TernaryVRRbSPair<string mnemonic, bits<16> opcode,
+                            SDPatternOperator operator,
+                            SDPatternOperator operator_cc, TypedReg tr1,
+                            TypedReg tr2, bits<4> type, bits<4> m5or> {
+  def "" : TernaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,
+                       imm32zx4even, !and (m5or, 14)>;
+  def : InstAlias<mnemonic#"\t$V1, $V2, $V3",
+                  (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
+                                            tr2.op:$V3, 0)>;
+  let Defs = [CC] in
+    def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+                        imm32zx4even, !add(!and (m5or, 14), 1)>;
+  def : InstAlias<mnemonic#"s\t$V1, $V2, $V3",
+                  (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
+                                                tr2.op:$V3, 0)>;
+}
+
+class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                  TypedReg tr1, TypedReg tr2>
+  : InstVRRc<opcode, (outs tr1.op:$V1),
+             (ins tr2.op:$V2, tr2.op:$V3, imm32zx4:$M4),
+             mnemonic#"\t$V1, $V2, $V3, $M4",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+                                                 (tr2.vt tr2.op:$V3),
+                                                 imm32zx4:$M4)))]> {
+  let M5 = 0;
+  let M6 = 0;
+}
+
+class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                  TypedReg tr1, TypedReg tr2, bits<4> type = 0>
+  : InstVRRd<opcode, (outs tr1.op:$V1),
+             (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
+             mnemonic#"\t$V1, $V2, $V3, $V4",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+                                                 (tr2.vt tr2.op:$V3),
+                                                 (tr1.vt tr1.op:$V4))))]> {
+  let M5 = type;
+  let M6 = 0;
+}
+
+class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                  TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0>
+  : InstVRRe<opcode, (outs tr1.op:$V1),
+             (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
+             mnemonic#"\t$V1, $V2, $V3, $V4",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+                                                 (tr2.vt tr2.op:$V3),
+                                                 (tr1.vt tr1.op:$V4))))]> {
+  let M5 = m5;
+  let M6 = type;
+}
+
+class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                  TypedReg tr1, TypedReg tr2, RegisterOperand cls, bits<4> type>
+  : InstVRSb<opcode, (outs tr1.op:$V1),
+             (ins tr2.op:$V1src, cls:$R3, shift12only:$BD2),
+             mnemonic#"\t$V1, $R3, $BD2",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
+                                                 cls:$R3,
+                                                 shift12only:$BD2)))]> {
+  let Constraints = "$V1 = $V1src";
+  let DisableEncoding = "$V1src";
+  let M4 = type;
+}
+
+class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
+                 Immediate index>
+  : InstVRV<opcode, (outs VR128:$V1),
+           (ins VR128:$V1src, bdvaddr12only:$VBD2, index:$M3),
+           mnemonic#"\t$V1, $VBD2, $M3", []> {
+  let Constraints = "$V1 = $V1src";
+  let DisableEncoding = "$V1src";
+  let mayLoad = 1;
+  let AccessBytes = bytes;
+}
+
+class TernaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index>
+  : InstVRX<opcode, (outs tr1.op:$V1),
+           (ins tr2.op:$V1src, bdxaddr12only:$XBD2, index:$M3),
+           mnemonic#"\t$V1, $XBD2, $M3",
+           [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
+                                               bdxaddr12only:$XBD2,
+                                               index:$M3)))]> {
+  let Constraints = "$V1 = $V1src";
+  let DisableEncoding = "$V1src";
+  let mayLoad = 1;
+  let AccessBytes = bytes;
+}
+
+class QuaternaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                     TypedReg tr1, TypedReg tr2, bits<4> type>
+  : InstVRId<opcode, (outs tr1.op:$V1),
+             (ins tr2.op:$V1src, tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4),
+             mnemonic#"\t$V1, $V2, $V3, $I4",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
+                                                 (tr2.vt tr2.op:$V2),
+                                                 (tr2.vt tr2.op:$V3),
+                                                 imm32zx8:$I4)))]> {
+  let Constraints = "$V1 = $V1src";
+  let DisableEncoding = "$V1src";
+  let M5 = type;
+}
+
+class QuaternaryVRRd<string mnemonic, bits<16> opcode,
+                     SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
+                     bits<4> type, SDPatternOperator m6mask, bits<4> m6or>
+  : InstVRRd<opcode, (outs tr1.op:$V1),
+             (ins tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, m6mask:$M6),
+             mnemonic#"\t$V1, $V2, $V3, $V4, $M6",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+                                                 (tr2.vt tr2.op:$V3),
+                                                 (tr2.vt tr2.op:$V4),
+                                                 m6mask:$M6)))],
+             m6or> {
+  let M5 = type;
+}
+
+multiclass QuaternaryVRRdSPair<string mnemonic, bits<16> opcode,
+                               SDPatternOperator operator,
+                               SDPatternOperator operator_cc, TypedReg tr1,
+                               TypedReg tr2, bits<4> type, bits<4> m6or> {
+  def "" : QuaternaryVRRd<mnemonic, opcode, operator, tr1, tr2, type,
+                          imm32zx4even, !and (m6or, 14)>;
+  def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
+                  (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
+                                            tr2.op:$V3, tr2.op:$V4, 0)>;
+  let Defs = [CC] in
+    def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+                           imm32zx4even, !add (!and (m6or, 14), 1)>;
+  def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4",
+                  (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
+                                                tr2.op:$V3, tr2.op:$V4, 0)>;
+}
+
 class LoadAndOpRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                   RegisterOperand cls, AddressingMode mode = bdaddr20only>
   : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, mode:$BD2),
@@ -1330,10 +2151,13 @@ class PrefetchRILPC<string mnemonic, bits<12> opcode,
 
 // A floating-point load-and test operation.  Create both a normal unary
 // operation and one that acts as a comparison against zero.
+// Note that the comparison against zero operation is not available if we
+// have vector support, since load-and-test instructions will partially
+// clobber the target (vector) register.
 multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
                           RegisterOperand cls> {
   def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>;
-  let isCodeGenOnly = 1 in
+  let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in
     def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>;
 }
 
@@ -1577,6 +2401,26 @@ class Alias<int size, dag outs, dag ins, list<dag> pattern>
   let isCodeGenOnly = 1;
 }
 
+class UnaryAliasVRS<RegisterOperand cls1, RegisterOperand cls2>
+ : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>;
+
+// An alias of a UnaryVRR*, but with different register sizes.
+class UnaryAliasVRR<SDPatternOperator operator, TypedReg tr1, TypedReg tr2>
+  : Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2),
+          [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]>;
+
+// An alias of a UnaryVRX, but with different register sizes.
+class UnaryAliasVRX<SDPatternOperator operator, TypedReg tr,
+                    AddressingMode mode = bdxaddr12only>
+  : Alias<6, (outs tr.op:$V1), (ins mode:$XBD2),
+          [(set tr.op:$V1, (tr.vt (operator mode:$XBD2)))]>;
+
+// An alias of a StoreVRX, but with different register sizes.
+class StoreAliasVRX<SDPatternOperator operator, TypedReg tr,
+                    AddressingMode mode = bdxaddr12only>
+  : Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2),
+          [(operator (tr.vt tr.op:$V1), mode:$XBD2)]>;
+
 // An alias of a BinaryRI, but with different register sizes.
 class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
                     Immediate imm>
@@ -1593,6 +2437,10 @@ class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls,
   let Constraints = "$R1 = $R1src";
 }
 
+// An alias of a BinaryVRRf, but with different register sizes.
+class BinaryAliasVRRf<RegisterOperand cls>
+  : Alias<6, (outs VR128:$V1), (ins cls:$R2, cls:$R3), []>;
+
 // An alias of a CompareRI, but with different register sizes.
 class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
                      Immediate imm>
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 8ff9553..9059885 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -423,7 +423,7 @@ static MachineInstr *getDef(unsigned Reg,
 }
 
 // Return true if MI is a shift of type Opcode by Imm bits.
-static bool isShift(MachineInstr *MI, int Opcode, int64_t Imm) {
+static bool isShift(MachineInstr *MI, unsigned Opcode, int64_t Imm) {
   return (MI->getOpcode() == Opcode &&
           !MI->getOperand(2).getReg() &&
           MI->getOperand(3).getImm() == Imm);
@@ -578,6 +578,12 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opcode = SystemZ::LDR;
   else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
     Opcode = SystemZ::LXR;
+  else if (SystemZ::VR32BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::VLR32;
+  else if (SystemZ::VR64BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::VLR64;
+  else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::VLR;
   else
     llvm_unreachable("Impossible reg-to-reg copy");
 
@@ -633,7 +639,7 @@ struct LogicOp {
   LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize)
     : RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {}
 
-  LLVM_EXPLICIT operator bool() const { return RegSize; }
+  explicit operator bool() const { return RegSize; }
 
   unsigned RegSize, ImmLSB, ImmSize;
 };
@@ -723,9 +729,12 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     unsigned Start, End;
     if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
       unsigned NewOpcode;
-      if (And.RegSize == 64)
+      if (And.RegSize == 64) {
         NewOpcode = SystemZ::RISBG;
-      else {
+        // Prefer RISBGN if available, since it does not clobber CC.
+        if (STI.hasMiscellaneousExtensions())
+          NewOpcode = SystemZ::RISBGN;
+      } else {
         NewOpcode = SystemZ::RISBMux;
         Start &= 31;
         End &= 31;
@@ -743,11 +752,10 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   return nullptr;
 }
 
-MachineInstr *
-SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-                                        MachineInstr *MI,
-                                        const SmallVectorImpl<unsigned> &Ops,
-                                        int FrameIndex) const {
+MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                                      MachineInstr *MI,
+                                                      ArrayRef<unsigned> Ops,
+                                                      int FrameIndex) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   unsigned Size = MFI->getObjectSize(FrameIndex);
   unsigned Opcode = MI->getOpcode();
@@ -862,9 +870,9 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
 }
 
 MachineInstr *
-SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
-                                        const SmallVectorImpl<unsigned> &Ops,
-                                        MachineInstr* LoadMI) const {
+SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                                        ArrayRef<unsigned> Ops,
+                                        MachineInstr *LoadMI) const {
   return nullptr;
 }
 
@@ -1114,6 +1122,16 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
   } else if (RC == &SystemZ::FP128BitRegClass) {
     LoadOpcode = SystemZ::LX;
     StoreOpcode = SystemZ::STX;
+  } else if (RC == &SystemZ::VR32BitRegClass) {
+    LoadOpcode = SystemZ::VL32;
+    StoreOpcode = SystemZ::VST32;
+  } else if (RC == &SystemZ::VR64BitRegClass) {
+    LoadOpcode = SystemZ::VL64;
+    StoreOpcode = SystemZ::VST64;
+  } else if (RC == &SystemZ::VF128BitRegClass ||
+             RC == &SystemZ::VR128BitRegClass) {
+    LoadOpcode = SystemZ::VL;
+    StoreOpcode = SystemZ::VST;
   } else
     llvm_unreachable("Unsupported regclass to load or store");
 }
@@ -1147,17 +1165,22 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
 
 unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
   switch (Opcode) {
-  case SystemZ::L:    return SystemZ::LT;
-  case SystemZ::LY:   return SystemZ::LT;
-  case SystemZ::LG:   return SystemZ::LTG;
-  case SystemZ::LGF:  return SystemZ::LTGF;
-  case SystemZ::LR:   return SystemZ::LTR;
-  case SystemZ::LGFR: return SystemZ::LTGFR;
-  case SystemZ::LGR:  return SystemZ::LTGR;
-  case SystemZ::LER:  return SystemZ::LTEBR;
-  case SystemZ::LDR:  return SystemZ::LTDBR;
-  case SystemZ::LXR:  return SystemZ::LTXBR;
-  default:            return 0;
+  case SystemZ::L:      return SystemZ::LT;
+  case SystemZ::LY:     return SystemZ::LT;
+  case SystemZ::LG:     return SystemZ::LTG;
+  case SystemZ::LGF:    return SystemZ::LTGF;
+  case SystemZ::LR:     return SystemZ::LTR;
+  case SystemZ::LGFR:   return SystemZ::LTGFR;
+  case SystemZ::LGR:    return SystemZ::LTGR;
+  case SystemZ::LER:    return SystemZ::LTEBR;
+  case SystemZ::LDR:    return SystemZ::LTDBR;
+  case SystemZ::LXR:    return SystemZ::LTXBR;
+  // On zEC12 we prefer to use RISBGN.  But if there is a chance to
+  // actually use the condition code, we may turn it back into RISGB.
+  // Note that RISBG is not really a "load-and-test" instruction,
+  // but sets the same condition code values, so is OK to use here.
+  case SystemZ::RISBGN: return SystemZ::RISBG;
+  default:              return 0;
   }
 }
 
@@ -1178,6 +1201,7 @@ static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) {
 bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize,
                                    unsigned &Start, unsigned &End) const {
   // Reject trivial all-zero masks.
+  Mask &= allOnes(BitSize);
   if (Mask == 0)
     return false;
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index d2e3f54..b55810b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -56,10 +56,13 @@ static inline unsigned getCompareZeroCCMask(unsigned int Flags) {
 // SystemZ MachineOperand target flags.
 enum {
   // Masks out the bits for the access model.
-  MO_SYMBOL_MODIFIER = (1 << 0),
+  MO_SYMBOL_MODIFIER = (3 << 0),
 
   // @GOT (aka @GOTENT)
-  MO_GOT = (1 << 0)
+  MO_GOT = (1 << 0),
+
+  // @INDNTPOFF
+  MO_INDNTPOFF = (2 << 0)
 };
 // Classifies a branch.
 enum BranchType {
@@ -183,11 +186,11 @@ public:
                                       MachineBasicBlock::iterator &MBBI,
                                       LiveVariables *LV) const override;
   MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
+                                      ArrayRef<unsigned> Ops,
                                       int FrameIndex) const override;
-  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
-                                      MachineInstr* LoadMI) const override;
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                                      ArrayRef<unsigned> Ops,
+                                      MachineInstr *LoadMI) const override;
   bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override;
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
     override;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 902d74d..820f30b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -249,11 +249,21 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
     def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>;
 }
 
+// TLS calls.  These will be lowered into a call to __tls_get_offset,
+// with an extra relocation specifying the TLS symbol.
+let isCall = 1, Defs = [R14D, CC] in {
+  def TLS_GDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
+                         [(z_tls_gdcall tglobaltlsaddr:$I2)]>;
+  def TLS_LDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
+                         [(z_tls_ldcall tglobaltlsaddr:$I2)]>;
+}
+
 // Define the general form of the call instructions for the asm parser.
 // These instructions don't hard-code %r14 as the return address register.
-def BRAS  : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2),
+// Allow an optional TLS marker symbol to generate TLS call relocations.
+def BRAS  : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2),
                    "bras\t$R1, $I2", []>;
-def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2),
+def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2),
                     "brasl\t$R1, $I2", []>;
 def BASR  : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
                    "basr\t$R1, $R2", []>;
@@ -587,6 +597,12 @@ let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
                      [(set GR64:$R1, pcrel32:$I2)]>;
 }
 
+// Load the Global Offset Table address.  This will be lowered into a
+//     larl $R1, _GLOBAL_OFFSET_TABLE_
+// instruction.
+def GOT : Alias<6, (outs GR64:$R1), (ins),
+                [(set GR64:$R1, (global_offset_table))]>;
+
 //===----------------------------------------------------------------------===//
 // Absolute and Negation
 //===----------------------------------------------------------------------===//
@@ -1045,6 +1061,10 @@ let Defs = [CC] in {
     def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
 }
 
+// On zEC12 we have a variant of RISBG that does not set CC.
+let Predicates = [FeatureMiscellaneousExtensions] in
+  def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>;
+
 // Forms of RISBG that only affect one word of the destination register.
 // They do not set CC.
 let Predicates = [FeatureHighWord] in {
@@ -1342,6 +1362,60 @@ let Defs = [CC] in {
 }
 
 //===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureTransactionalExecution] in {
+  // Transaction Begin
+  let hasSideEffects = 1, mayStore = 1,
+      usesCustomInserter = 1, Defs = [CC] in {
+    def TBEGIN : InstSIL<0xE560,
+                         (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+                         "tbegin\t$BD1, $I2",
+                         [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>;
+    def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+                                [(z_tbegin_nofloat bdaddr12only:$BD1,
+                                                   imm32zx16:$I2)]>;
+    def TBEGINC : InstSIL<0xE561,
+                          (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+                          "tbeginc\t$BD1, $I2",
+                          [(int_s390_tbeginc bdaddr12only:$BD1,
+                                             imm32zx16:$I2)]>;
+  }
+
+  // Transaction End
+  let hasSideEffects = 1, Defs = [CC], BD2 = 0 in
+    def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>;
+
+  // Transaction Abort
+  let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in
+    def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2),
+                       "tabort\t$BD2",
+                       [(int_s390_tabort bdaddr12only:$BD2)]>;
+
+  // Nontransactional Store
+  let hasSideEffects = 1 in
+    def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>;
+
+  // Extract Transaction Nesting Depth
+  let hasSideEffects = 1 in
+    def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureProcessorAssist] in {
+  let hasSideEffects = 1, R4 = 0 in
+    def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3),
+                      "ppa\t$R1, $R2, $R3", []>;
+  def : Pat<(int_s390_ppa_txassist GR32:$src),
+            (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
+                 0, 1)>;
+}
+
+//===----------------------------------------------------------------------===//
 // Miscellaneous Instructions.
 //===----------------------------------------------------------------------===//
 
@@ -1366,6 +1440,13 @@ let Defs = [CC] in {
 def : Pat<(ctlz GR64:$src),
           (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
 
+// Population count.  Counts bits set per byte.
+let Predicates = [FeaturePopulationCount], Defs = [CC] in {
+  def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2),
+                       "popcnt\t$R1, $R2",
+                       [(set GR64:$R1, (z_popcnt GR64:$R2))]>;
+}
+
 // Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
 def : Pat<(i64 (anyext GR32:$src)),
           (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
new file mode 100644
index 0000000..c101e43
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -0,0 +1,1097 @@
+//==- SystemZInstrVector.td - SystemZ Vector instructions ------*- tblgen-*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+  // Register move.
+  def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>;
+  def VLR32 : UnaryAliasVRR<null_frag, v32eb, v32eb>;
+  def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>;
+
+  // Load GR from VR element.
+  def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>;
+  def VLGVH : BinaryVRSc<"vlgvh", 0xE721, null_frag, v128h, 1>;
+  def VLGVF : BinaryVRSc<"vlgvf", 0xE721, null_frag, v128f, 2>;
+  def VLGVG : BinaryVRSc<"vlgvg", 0xE721, z_vector_extract, v128g, 3>;
+
+  // Load VR element from GR.
+  def VLVGB : TernaryVRSb<"vlvgb", 0xE722, z_vector_insert,
+                          v128b, v128b, GR32, 0>;
+  def VLVGH : TernaryVRSb<"vlvgh", 0xE722, z_vector_insert,
+                          v128h, v128h, GR32, 1>;
+  def VLVGF : TernaryVRSb<"vlvgf", 0xE722, z_vector_insert,
+                          v128f, v128f, GR32, 2>;
+  def VLVGG : TernaryVRSb<"vlvgg", 0xE722, z_vector_insert,
+                          v128g, v128g, GR64, 3>;
+
+  // Load VR from GRs disjoint.
+  def VLVGP : BinaryVRRf<"vlvgp", 0xE762, z_join_dwords, v128g>;
+  def VLVGP32 : BinaryAliasVRRf<GR32>;
+}
+
+// Extractions always assign to the full GR64, even if the element would
+// fit in the lower 32 bits.  Sub-i64 extracts therefore need to take a
+// subreg of the result.
+class VectorExtractSubreg<ValueType type, Instruction insn>
+  : Pat<(i32 (z_vector_extract (type VR128:$vec), shift12only:$index)),
+        (EXTRACT_SUBREG (insn VR128:$vec, shift12only:$index), subreg_l32)>;
+
+def : VectorExtractSubreg<v16i8, VLGVB>;
+def : VectorExtractSubreg<v8i16, VLGVH>;
+def : VectorExtractSubreg<v4i32, VLGVF>;
+
+//===----------------------------------------------------------------------===//
+// Immediate instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+  // Generate byte mask.
+  def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
+  def VONE  : InherentVRIa<"vone", 0xE744, 0xffff>;
+  def VGBM  : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
+
+  // Generate mask.
+  def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>;
+  def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>;
+  def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>;
+  def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>;
+
+  // Load element immediate.
+  //
+  // We want these instructions to be used ahead of VLVG* where possible.
+  // However, VLVG* takes a variable BD-format index whereas VLEI takes
+  // a plain immediate index.  This means that VLVG* has an extra "base"
+  // register operand and is 3 units more complex.  Bumping the complexity
+  // of the VLEI* instructions by 4 means that they are strictly better
+  // than VLVG* in cases where both forms match.
+  let AddedComplexity = 4 in {
+    def VLEIB : TernaryVRIa<"vleib", 0xE740, z_vector_insert,
+                            v128b, v128b, imm32sx16trunc, imm32zx4>;
+    def VLEIH : TernaryVRIa<"vleih", 0xE741, z_vector_insert,
+                            v128h, v128h, imm32sx16trunc, imm32zx3>;
+    def VLEIF : TernaryVRIa<"vleif", 0xE743, z_vector_insert,
+                            v128f, v128f, imm32sx16, imm32zx2>;
+    def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert,
+                            v128g, v128g, imm64sx16, imm32zx1>;
+  }
+
+  // Replicate immediate.
+  def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
+  def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
+  def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
+  def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
+}
+
+//===----------------------------------------------------------------------===//
+// Loads
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+  // Load.
+  def VL : UnaryVRX<"vl", 0xE706, null_frag, v128any, 16>;
+
+  // Load to block boundary.  The number of loaded bytes is only known
+  // at run time.  The instruction is really polymorphic, but v128b matches
+  // the return type of the associated intrinsic.
+  def VLBB : BinaryVRX<"vlbb", 0xE707, int_s390_vlbb, v128b, 0>;
+
+  // Load count to block boundary.
+  let Defs = [CC] in
+    def LCBB : InstRXE<0xE727, (outs GR32:$R1),
+                               (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+                       "lcbb\t$R1, $XBD2, $M3",
+                       [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2,
+                                                      imm32zx4:$M3))]>;
+
+  // Load with length.  The number of loaded bytes is only known at run time.
+  def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>;
+
+  // Load multiple.
+  def VLM : LoadMultipleVRSa<"vlm", 0xE736>;
+
+  // Load and replicate
+  def VLREPB : UnaryVRX<"vlrepb", 0xE705, z_replicate_loadi8,  v128b, 1, 0>;
+  def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>;
+  def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>;
+  def VLREPG : UnaryVRX<"vlrepg", 0xE705, z_replicate_loadi64, v128g, 8, 3>;
+  def : Pat<(v4f32 (z_replicate_loadf32 bdxaddr12only:$addr)),
+            (VLREPF bdxaddr12only:$addr)>;
+  def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)),
+            (VLREPG bdxaddr12only:$addr)>;
+
+  // Use VLREP to load subvectors.  These patterns use "12pair" because
+  // LEY and LDY offer full 20-bit displacement fields.  It's often better
+  // to use those instructions rather than force a 20-bit displacement
+  // into a GPR temporary.
+  def VL32 : UnaryAliasVRX<load, v32eb, bdxaddr12pair>;
+  def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
+
+  // Load logical element and zero.
+  def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8,  v128b, 1, 0>;
+  def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
+  def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>;
+  def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>;
+  def : Pat<(v4f32 (z_vllezf32 bdxaddr12only:$addr)),
+            (VLLEZF bdxaddr12only:$addr)>;
+  def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)),
+            (VLLEZG bdxaddr12only:$addr)>;
+
+  // Load element.
+  def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8,  v128b, v128b, 1, imm32zx4>;
+  def VLEH : TernaryVRX<"vleh", 0xE701, z_vlei16, v128h, v128h, 2, imm32zx3>;
+  def VLEF : TernaryVRX<"vlef", 0xE703, z_vlei32, v128f, v128f, 4, imm32zx2>;
+  def VLEG : TernaryVRX<"vleg", 0xE702, z_vlei64, v128g, v128g, 8, imm32zx1>;
+  def : Pat<(z_vlef32 (v4f32 VR128:$val), bdxaddr12only:$addr, imm32zx2:$index),
+            (VLEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>;
+  def : Pat<(z_vlef64 (v2f64 VR128:$val), bdxaddr12only:$addr, imm32zx1:$index),
+            (VLEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
+
+  // Gather element.
+  def VGEF : TernaryVRV<"vgef", 0xE713, 4, imm32zx2>;
+  def VGEG : TernaryVRV<"vgeg", 0xE712, 8, imm32zx1>;
+}
+
+// Use replicating loads if we're inserting a single element into an
+// undefined vector.  This avoids a false dependency on the previous
+// register contents.
+multiclass ReplicatePeephole<Instruction vlrep, ValueType vectype,
+                             SDPatternOperator load, ValueType scalartype> {
+  def : Pat<(vectype (z_vector_insert
+                      (undef), (scalartype (load bdxaddr12only:$addr)), 0)),
+            (vlrep bdxaddr12only:$addr)>;
+  def : Pat<(vectype (scalar_to_vector
+                      (scalartype (load bdxaddr12only:$addr)))),
+            (vlrep bdxaddr12only:$addr)>;
+}
+defm : ReplicatePeephole<VLREPB, v16i8, anyextloadi8, i32>;
+defm : ReplicatePeephole<VLREPH, v8i16, anyextloadi16, i32>;
+defm : ReplicatePeephole<VLREPF, v4i32, load, i32>;
+defm : ReplicatePeephole<VLREPG, v2i64, load, i64>;
+defm : ReplicatePeephole<VLREPF, v4f32, load, f32>;
+defm : ReplicatePeephole<VLREPG, v2f64, load, f64>;
+
+//===----------------------------------------------------------------------===//
+// Stores
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+  // Store.
+  def VST : StoreVRX<"vst", 0xE70E, null_frag, v128any, 16>;
+
+  // Store with length.  The number of stored bytes is only known at run time.
+  def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>;
+
+  // Store multiple.
+  def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>;
+
+  // Store element.
+  def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, z_vstei8,  v128b, 1, imm32zx4>;
+  def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, z_vstei16, v128h, 2, imm32zx3>;
+  def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, z_vstei32, v128f, 4, imm32zx2>;
+  def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, z_vstei64, v128g, 8, imm32zx1>;
+  def : Pat<(z_vstef32 (v4f32 VR128:$val), bdxaddr12only:$addr,
+                       imm32zx2:$index),
+            (VSTEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>;
+  def : Pat<(z_vstef64 (v2f64 VR128:$val), bdxaddr12only:$addr,
+                       imm32zx1:$index),
+            (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
+
+  // Use VSTE to store subvectors.  These patterns use "12pair" because
+  // STEY and STDY offer full 20-bit displacement fields.  It's often better
+  // to use those instructions rather than force a 20-bit displacement
+  // into a GPR temporary.
+  def VST32 : StoreAliasVRX<store, v32eb, bdxaddr12pair>;
+  def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>;
+
+  // Scatter element.
+  def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>;
+  def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>;
+}
+
+//===----------------------------------------------------------------------===//
+// Selects and permutes
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+  // Merge high.
+  def VMRHB : BinaryVRRc<"vmrhb", 0xE761, z_merge_high, v128b, v128b, 0>;
+  def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>;
+  def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>;
+  def VMRHG : BinaryVRRc<"vmrhg", 0xE761, z_merge_high, v128g, v128g, 3>;
+  def : BinaryRRWithType<VMRHF, VR128, z_merge_high, v4f32>;
+  def : BinaryRRWithType<VMRHG, VR128, z_merge_high, v2f64>;
+
+  // Merge low.
+  def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>;
+  def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>;
+  def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>;
+  def VMRLG : BinaryVRRc<"vmrlg", 0xE760, z_merge_low, v128g, v128g, 3>;
+  def : BinaryRRWithType<VMRLF, VR128, z_merge_low, v4f32>;
+  def : BinaryRRWithType<VMRLG, VR128, z_merge_low, v2f64>;
+
+  // Permute.
+  def VPERM : TernaryVRRe<"vperm", 0xE78C, z_permute, v128b, v128b>;
+
+  // Permute doubleword immediate.
+  def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>;
+
+  // Replicate.
+  def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>;
+  def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>;
+  def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>;
+  def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>;
+  def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)),
+            (VREPF VR128:$vec, imm32zx16:$index)>;
+  def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)),
+            (VREPG VR128:$vec, imm32zx16:$index)>;
+
+  // Select.
+  def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>;
+}
+
+//===----------------------------------------------------------------------===//
+// Widening and narrowing
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+  // Pack
+  def VPKH : BinaryVRRc<"vpkh", 0xE794, z_pack, v128b, v128h, 1>;
+  def VPKF : BinaryVRRc<"vpkf", 0xE794, z_pack, v128h, v128f, 2>;
+  def VPKG : BinaryVRRc<"vpkg", 0xE794, z_pack, v128f, v128g, 3>;
+
+  // Pack saturate.
+  defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, int_s390_vpksh, z_packs_cc,
+                               v128b, v128h, 1>;
+  defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, int_s390_vpksf, z_packs_cc,
+                               v128h, v128f, 2>;
+  defm VPKSG : BinaryVRRbSPair<"vpksg", 0xE797, int_s390_vpksg, z_packs_cc,
+                               v128f, v128g, 3>;
+
+  // Pack saturate logical.
+  defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, int_s390_vpklsh, z_packls_cc,
+                                v128b, v128h, 1>;
+  defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, int_s390_vpklsf, z_packls_cc,
+                                v128h, v128f, 2>;
+  defm VPKLSG : BinaryVRRbSPair<"vpklsg", 0xE795, int_s390_vpklsg, z_packls_cc,
+                                v128f, v128g, 3>;
+
+  // Sign-extend to doubleword.
+  def VSEGB : UnaryVRRa<"vsegb", 0xE75F, z_vsei8,  v128g, v128g, 0>;
+  def VSEGH : UnaryVRRa<"vsegh", 0xE75F, z_vsei16, v128g, v128g, 1>;
+  def VSEGF : UnaryVRRa<"vsegf", 0xE75F, z_vsei32, v128g, v128g, 2>;
+  def : Pat<(z_vsei8_by_parts  (v16i8 VR128:$src)), (VSEGB VR128:$src)>;
+  def : Pat<(z_vsei16_by_parts (v8i16 VR128:$src)), (VSEGH VR128:$src)>;
+  def : Pat<(z_vsei32_by_parts (v4i32 VR128:$src)), (VSEGF VR128:$src)>;
+
+  // Unpack high.
+  def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, z_unpack_high, v128h, v128b, 0>;
+  def VUPHH : UnaryVRRa<"vuphh", 0xE7D7, z_unpack_high, v128f, v128h, 1>;
+  def VUPHF : UnaryVRRa<"vuphf", 0xE7D7, z_unpack_high, v128g, v128f, 2>;
+
+  // Unpack logical high.
+  def VUPLHB : UnaryVRRa<"vuplhb", 0xE7D5, z_unpackl_high, v128h, v128b, 0>;
+  def VUPLHH : UnaryVRRa<"vuplhh", 0xE7D5, z_unpackl_high, v128f, v128h, 1>;
+  def VUPLHF : UnaryVRRa<"vuplhf", 0xE7D5, z_unpackl_high, v128g, v128f, 2>;
+
+  // Unpack low.
+  def VUPLB  : UnaryVRRa<"vuplb",  0xE7D6, z_unpack_low, v128h, v128b, 0>;
+  def VUPLHW : UnaryVRRa<"vuplhw", 0xE7D6, z_unpack_low, v128f, v128h, 1>;
+  def VUPLF  : UnaryVRRa<"vuplf",  0xE7D6, z_unpack_low, v128g, v128f, 2>;
+
+  // Unpack logical low.
+  def VUPLLB : UnaryVRRa<"vupllb", 0xE7D4, z_unpackl_low, v128h, v128b, 0>;
+  def VUPLLH : UnaryVRRa<"vupllh", 0xE7D4, z_unpackl_low, v128f, v128h, 1>;
+  def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, z_unpackl_low, v128g, v128f, 2>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instantiating generic operations for specific types.
+//===----------------------------------------------------------------------===//
+
+multiclass GenericVectorOps<ValueType type, ValueType inttype> {
+  let Predicates = [FeatureVector] in {
+    def : Pat<(type (load bdxaddr12only:$addr)),
+              (VL bdxaddr12only:$addr)>;
+    def : Pat<(store (type VR128:$src), bdxaddr12only:$addr),
+              (VST VR128:$src, bdxaddr12only:$addr)>;
+    def : Pat<(type (vselect (inttype VR128:$x), VR128:$y, VR128:$z)),
+              (VSEL VR128:$y, VR128:$z, VR128:$x)>;
+    def : Pat<(type (vselect (inttype (z_vnot VR128:$x)), VR128:$y, VR128:$z)),
+              (VSEL VR128:$z, VR128:$y, VR128:$x)>;
+  }
+}
+
+defm : GenericVectorOps<v16i8, v16i8>;
+defm : GenericVectorOps<v8i16, v8i16>;
+defm : GenericVectorOps<v4i32, v4i32>;
+defm : GenericVectorOps<v2i64, v2i64>;
+defm : GenericVectorOps<v4f32, v4i32>;
+defm : GenericVectorOps<v2f64, v2i64>;
+
+//===----------------------------------------------------------------------===//
+// Integer arithmetic
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+  // Add.
+  def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>;
+  def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>;
+  def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>;
+  def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>;
+  def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>;
+
+  // Add compute carry.
+  def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>;
+  def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>;
+  def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>;
+  def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>;
+  def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>;
+
+  // Add with carry.
+  def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>;
+
+  // Add with carry compute carry.
+  def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>;
+
+  // And.
+  def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>;
+
+  // And with complement.
+  def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>;
+
+  // Average.
+  def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>;
+  def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>;
+  def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>;
+  def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>;
+
+  // Average logical.
+  def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>;
+  def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>;
+  def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>;
+  def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>;
+
+  // Checksum.
+  def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>;
+
+  // Count leading zeros.
+  def VCLZB : UnaryVRRa<"vclzb", 0xE753, ctlz, v128b, v128b, 0>;
+  def VCLZH : UnaryVRRa<"vclzh", 0xE753, ctlz, v128h, v128h, 1>;
+  def VCLZF : UnaryVRRa<"vclzf", 0xE753, ctlz, v128f, v128f, 2>;
+  def VCLZG : UnaryVRRa<"vclzg", 0xE753, ctlz, v128g, v128g, 3>;
+
+  // Count trailing zeros.
+  def VCTZB : UnaryVRRa<"vctzb", 0xE752, cttz, v128b, v128b, 0>;
+  def VCTZH : UnaryVRRa<"vctzh", 0xE752, cttz, v128h, v128h, 1>;
+  def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>;
+  def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>;
+
+  // Exclusive or.
+  def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>;
+
+  // Galois field multiply sum.
+  def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, int_s390_vgfmb, v128h, v128b, 0>;
+  def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, int_s390_vgfmh, v128f, v128h, 1>;
+  def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, int_s390_vgfmf, v128g, v128f, 2>;
+  def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, int_s390_vgfmg, v128q, v128g, 3>;
+
+  // Galois field multiply sum and accumulate.
+  def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, int_s390_vgfmab, v128h, v128b, 0>;
+  def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, int_s390_vgfmah, v128f, v128h, 1>;
+  def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, int_s390_vgfmaf, v128g, v128f, 2>;
+  def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, int_s390_vgfmag, v128q, v128g, 3>;
+
+  // Load complement.
+  def VLCB : UnaryVRRa<"vlcb", 0xE7DE, z_vneg, v128b, v128b, 0>;
+  def VLCH : UnaryVRRa<"vlch", 0xE7DE, z_vneg, v128h, v128h, 1>;
+  def VLCF : UnaryVRRa<"vlcf", 0xE7DE, z_vneg, v128f, v128f, 2>;
+  def VLCG : UnaryVRRa<"vlcg", 0xE7DE, z_vneg, v128g, v128g, 3>;
+
+  // Load positive.
+  def VLPB : UnaryVRRa<"vlpb", 0xE7DF, z_viabs8,  v128b, v128b, 0>;
+  def VLPH : UnaryVRRa<"vlph", 0xE7DF, z_viabs16, v128h, v128h, 1>;
+  def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>;
+  def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>;
+
+  // Maximum.
+  def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>;
+  def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>;
+  def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>;
+  def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>;
+
+  // Maximum logical.
+  def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>;
+  def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>;
+  def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>;
+  def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>;
+
+  // Minimum.
+  def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>;
+  def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>;
+  def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>;
+  def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>;
+
+  // Minimum logical.
+  def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>;
+  def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>;
+  def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>;
+  def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
+
+  // Multiply and add low.
+  def VMALB  : TernaryVRRd<"vmalb",  0xE7AA, z_muladd, v128b, v128b, 0>;
+  def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>;
+  def VMALF  : TernaryVRRd<"vmalf",  0xE7AA, z_muladd, v128f, v128f, 2>;
+
+  // Multiply and add high.
+  def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>;
+  def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>;
+  def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>;
+
+  // Multiply and add logical high.
+  def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>;
+  def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>;
+  def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>;
+
+  // Multiply and add even.
+  def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>;
+  def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>;
+  def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>;
+
+  // Multiply and add logical even.
+  def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>;
+  def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>;
+  def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>;
+
+  // Multiply and add odd.
+  def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>;
+  def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>;
+  def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>;
+
+  // Multiply and add logical odd.
+  def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>;
+  def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>;
+  def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>;
+
+  // Multiply high.
+  def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>;
+  def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>;
+  def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>;
+
+  // Multiply logical high.
+  def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>;
+  def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>;
+  def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>;
+
+  // Multiply low.
+  def VMLB  : BinaryVRRc<"vmlb",  0xE7A2, mul, v128b, v128b, 0>;
+  def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>;
+  def VMLF  : BinaryVRRc<"vmlf",  0xE7A2, mul, v128f, v128f, 2>;
+
+  // Multiply even.
+  def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>;
+  def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>;
+  def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>;
+
+  // Multiply logical even.
+  def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>;
+  def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>;
+  def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>;
+
+  // Multiply odd.
+  def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>;
+  def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>;
+  def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>;
+
+  // Multiply logical odd.
+  def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>;
+  def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>;
+  def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>;
+
+  // Nor.
+  def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>;
+
+  // Or.
+  def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>;
+
+  // Population count.
+  def VPOPCT : BinaryVRRa<"vpopct", 0xE750>;
+  def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>;
+
+  // Element rotate left logical (with vector shift amount).
+  def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb,
+                           v128b, v128b, 0>;
+  def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh,
+                           v128h, v128h, 1>;
+  def VERLLVF : BinaryVRRc<"verllvf", 0xE773, int_s390_verllvf,
+                           v128f, v128f, 2>;
+  def VERLLVG : BinaryVRRc<"verllvg", 0xE773, int_s390_verllvg,
+                           v128g, v128g, 3>;
+
+  // Element rotate left logical (with scalar shift amount).
+  def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>;
+  def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>;
+  def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>;
+  def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>;
+
+  // Element rotate and insert under mask.
+  def VERIMB : QuaternaryVRId<"verimb", 0xE772, int_s390_verimb, v128b, v128b, 0>;
+  def VERIMH : QuaternaryVRId<"verimh", 0xE772, int_s390_verimh, v128h, v128h, 1>;
+  def VERIMF : QuaternaryVRId<"verimf", 0xE772, int_s390_verimf, v128f, v128f, 2>;
+  def VERIMG : QuaternaryVRId<"verimg", 0xE772, int_s390_verimg, v128g, v128g, 3>;
+
+  // Element shift left (with vector shift amount).
+  def VESLVB : BinaryVRRc<"veslvb", 0xE770, z_vshl, v128b, v128b, 0>;
+  def VESLVH : BinaryVRRc<"veslvh", 0xE770, z_vshl, v128h, v128h, 1>;
+  def VESLVF : BinaryVRRc<"veslvf", 0xE770, z_vshl, v128f, v128f, 2>;
+  def VESLVG : BinaryVRRc<"veslvg", 0xE770, z_vshl, v128g, v128g, 3>;
+
+  // Element shift left (with scalar shift amount).
+  def VESLB : BinaryVRSa<"veslb", 0xE730, z_vshl_by_scalar, v128b, v128b, 0>;
+  def VESLH : BinaryVRSa<"veslh", 0xE730, z_vshl_by_scalar, v128h, v128h, 1>;
+  def VESLF : BinaryVRSa<"veslf", 0xE730, z_vshl_by_scalar, v128f, v128f, 2>;
+  def VESLG : BinaryVRSa<"veslg", 0xE730, z_vshl_by_scalar, v128g, v128g, 3>;
+
+  // Element shift right arithmetic (with vector shift amount).
+  def VESRAVB : BinaryVRRc<"vesravb", 0xE77A, z_vsra, v128b, v128b, 0>;
+  def VESRAVH : BinaryVRRc<"vesravh", 0xE77A, z_vsra, v128h, v128h, 1>;
+  def VESRAVF : BinaryVRRc<"vesravf", 0xE77A, z_vsra, v128f, v128f, 2>;
+  def VESRAVG : BinaryVRRc<"vesravg", 0xE77A, z_vsra, v128g, v128g, 3>;
+
+  // Element shift right arithmetic (with scalar shift amount).
+  def VESRAB : BinaryVRSa<"vesrab", 0xE73A, z_vsra_by_scalar, v128b, v128b, 0>;
+  def VESRAH : BinaryVRSa<"vesrah", 0xE73A, z_vsra_by_scalar, v128h, v128h, 1>;
+  def VESRAF : BinaryVRSa<"vesraf", 0xE73A, z_vsra_by_scalar, v128f, v128f, 2>;
+  def VESRAG : BinaryVRSa<"vesrag", 0xE73A, z_vsra_by_scalar, v128g, v128g, 3>;
+
+  // Element shift right logical (with vector shift amount).
+  def VESRLVB : BinaryVRRc<"vesrlvb", 0xE778, z_vsrl, v128b, v128b, 0>;
+  def VESRLVH : BinaryVRRc<"vesrlvh", 0xE778, z_vsrl, v128h, v128h, 1>;
+  def VESRLVF : BinaryVRRc<"vesrlvf", 0xE778, z_vsrl, v128f, v128f, 2>;
+  def VESRLVG : BinaryVRRc<"vesrlvg", 0xE778, z_vsrl, v128g, v128g, 3>;
+
+  // Element shift right logical (with scalar shift amount).
+  def VESRLB : BinaryVRSa<"vesrlb", 0xE738, z_vsrl_by_scalar, v128b, v128b, 0>;
+  def VESRLH : BinaryVRSa<"vesrlh", 0xE738, z_vsrl_by_scalar, v128h, v128h, 1>;
+  def VESRLF : BinaryVRSa<"vesrlf", 0xE738, z_vsrl_by_scalar, v128f, v128f, 2>;
+  def VESRLG : BinaryVRSa<"vesrlg", 0xE738, z_vsrl_by_scalar, v128g, v128g, 3>;
+
+  // Shift left.
+  def VSL : BinaryVRRc<"vsl", 0xE774, int_s390_vsl, v128b, v128b>;
+
+  // Shift left by byte.
+  def VSLB : BinaryVRRc<"vslb", 0xE775, int_s390_vslb, v128b, v128b>;
+
+  // Shift left double by byte.
+  def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>;
+  def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
+            (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
+
+  // Shift right arithmetic.
+  def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>;
+
+  // Shift right arithmetic by byte.
+  def VSRAB : BinaryVRRc<"vsrab", 0xE77F, int_s390_vsrab, v128b, v128b>;
+
+  // Shift right logical.
+  def VSRL : BinaryVRRc<"vsrl", 0xE77C, int_s390_vsrl, v128b, v128b>;
+
+  // Shift right logical by byte.
+  def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>;
+
+  // Subtract.
+  def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>;
+  def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>;
+  def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>;
+  def VSG : BinaryVRRc<"vsg", 0xE7F7, sub, v128g, v128g, 3>;
+  def VSQ : BinaryVRRc<"vsq", 0xE7F7, int_s390_vsq, v128q, v128q, 4>;
+
+  // Subtract compute borrow indication.
+  def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, int_s390_vscbib, v128b, v128b, 0>;
+  def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, int_s390_vscbih, v128h, v128h, 1>;
+  def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, int_s390_vscbif, v128f, v128f, 2>;
+  def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, int_s390_vscbig, v128g, v128g, 3>;
+  def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, int_s390_vscbiq, v128q, v128q, 4>;
+
+  // Subtract with borrow indication.
+  def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, int_s390_vsbiq, v128q, v128q, 4>;
+
+  // Subtract with borrow compute borrow indication.
+  def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, int_s390_vsbcbiq,
+                            v128q, v128q, 4>;
+
+  // Sum across doubleword.
+  def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, z_vsum, v128g, v128h, 1>;
+  def VSUMGF : BinaryVRRc<"vsumgf", 0xE765, z_vsum, v128g, v128f, 2>;
+
+  // Sum across quadword.
+  def VSUMQF : BinaryVRRc<"vsumqf", 0xE767, z_vsum, v128q, v128f, 2>;
+  def VSUMQG : BinaryVRRc<"vsumqg", 0xE767, z_vsum, v128q, v128g, 3>;
+
+  // Sum across word.
+  def VSUMB : BinaryVRRc<"vsumb", 0xE764, z_vsum, v128f, v128b, 0>;
+  def VSUMH : BinaryVRRc<"vsumh", 0xE764, z_vsum, v128f, v128h, 1>;
+}
+
+// Instantiate the bitwise ops for type TYPE.
+multiclass BitwiseVectorOps<ValueType type> {
+  let Predicates = [FeatureVector] in {
+    def : Pat<(type (and VR128:$x, VR128:$y)), (VN VR128:$x, VR128:$y)>;
+    def : Pat<(type (and VR128:$x, (z_vnot VR128:$y))),
+              (VNC VR128:$x, VR128:$y)>;
+    def : Pat<(type (or VR128:$x, VR128:$y)), (VO VR128:$x, VR128:$y)>;
+    def : Pat<(type (xor VR128:$x, VR128:$y)), (VX VR128:$x, VR128:$y)>;
+    def : Pat<(type (or (and VR128:$x, VR128:$z),
+                        (and VR128:$y, (z_vnot VR128:$z)))),
+              (VSEL VR128:$x, VR128:$y, VR128:$z)>;
+    def : Pat<(type (z_vnot (or VR128:$x, VR128:$y))),
+              (VNO VR128:$x, VR128:$y)>;
+    def : Pat<(type (z_vnot VR128:$x)), (VNO VR128:$x, VR128:$x)>;
+  }
+}
+
+defm : BitwiseVectorOps<v16i8>;
+defm : BitwiseVectorOps<v8i16>;
+defm : BitwiseVectorOps<v4i32>;
+defm : BitwiseVectorOps<v2i64>;
+
+// Instantiate additional patterns for absolute-related expressions on
+// type TYPE.  LC is the negate instruction for TYPE and LP is the absolute
+// instruction.
+multiclass IntegerAbsoluteVectorOps<ValueType type, Instruction lc,
+                                    Instruction lp, int shift> {
+  let Predicates = [FeatureVector] in {
+    def : Pat<(type (vselect (type (z_vicmph_zero VR128:$x)),
+                             (z_vneg VR128:$x), VR128:$x)),
+              (lc (lp VR128:$x))>;
+    def : Pat<(type (vselect (type (z_vnot (z_vicmph_zero VR128:$x))),
+                             VR128:$x, (z_vneg VR128:$x))),
+              (lc (lp VR128:$x))>;
+    def : Pat<(type (vselect (type (z_vicmpl_zero VR128:$x)),
+                             VR128:$x, (z_vneg VR128:$x))),
+              (lc (lp VR128:$x))>;
+    def : Pat<(type (vselect (type (z_vnot (z_vicmpl_zero VR128:$x))),
+                             (z_vneg VR128:$x), VR128:$x)),
+              (lc (lp VR128:$x))>;
+    def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)),
+                             (z_vneg VR128:$x)),
+                        (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))),
+                             VR128:$x))),
+              (lp VR128:$x)>;
+    def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)),
+                             VR128:$x),
+                        (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))),
+                             (z_vneg VR128:$x)))),
+              (lc (lp VR128:$x))>;
+  }
+}
+
+defm : IntegerAbsoluteVectorOps<v16i8, VLCB, VLPB, 7>;
+defm : IntegerAbsoluteVectorOps<v8i16, VLCH, VLPH, 15>;
+defm : IntegerAbsoluteVectorOps<v4i32, VLCF, VLPF, 31>;
+defm : IntegerAbsoluteVectorOps<v2i64, VLCG, VLPG, 63>;
+
+// Instantiate minimum- and maximum-related patterns for TYPE.  CMPH is the
+// signed or unsigned "set if greater than" comparison instruction and
+// MIN and MAX are the associated minimum and maximum instructions.
+multiclass IntegerMinMaxVectorOps<ValueType type, SDPatternOperator cmph,
+                                  Instruction min, Instruction max> {
+  let Predicates = [FeatureVector] in {
+    def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)),
+              (max VR128:$x, VR128:$y)>;
+    def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)),
+              (min VR128:$x, VR128:$y)>;
+    def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
+                             VR128:$x, VR128:$y)),
+              (min VR128:$x, VR128:$y)>;
+    def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
+                             VR128:$y, VR128:$x)),
+              (max VR128:$x, VR128:$y)>;
+  }
+}
+
+// Signed min/max.
+defm : IntegerMinMaxVectorOps<v16i8, z_vicmph, VMNB, VMXB>;
+defm : IntegerMinMaxVectorOps<v8i16, z_vicmph, VMNH, VMXH>;
+defm : IntegerMinMaxVectorOps<v4i32, z_vicmph, VMNF, VMXF>;
+defm : IntegerMinMaxVectorOps<v2i64, z_vicmph, VMNG, VMXG>;
+
+// Unsigned min/max.
+defm : IntegerMinMaxVectorOps<v16i8, z_vicmphl, VMNLB, VMXLB>;
+defm : IntegerMinMaxVectorOps<v8i16, z_vicmphl, VMNLH, VMXLH>;
+defm : IntegerMinMaxVectorOps<v4i32, z_vicmphl, VMNLF, VMXLF>;
+defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>;
+
+//===----------------------------------------------------------------------===//
+// Integer comparison
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+  // Element compare.
+  let Defs = [CC] in {
+    def VECB : CompareVRRa<"vecb", 0xE7DB, null_frag, v128b, 0>;
+    def VECH : CompareVRRa<"vech", 0xE7DB, null_frag, v128h, 1>;
+    def VECF : CompareVRRa<"vecf", 0xE7DB, null_frag, v128f, 2>;
+    def VECG : CompareVRRa<"vecg", 0xE7DB, null_frag, v128g, 3>;
+  }
+
+  // Element compare logical.
+  let Defs = [CC] in {
+    def VECLB : CompareVRRa<"veclb", 0xE7D9, null_frag, v128b, 0>;
+    def VECLH : CompareVRRa<"veclh", 0xE7D9, null_frag, v128h, 1>;
+    def VECLF : CompareVRRa<"veclf", 0xE7D9, null_frag, v128f, 2>;
+    def VECLG : CompareVRRa<"veclg", 0xE7D9, null_frag, v128g, 3>;
+  }
+
+  // Compare equal.
+  defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, z_vicmpes,
+                               v128b, v128b, 0>;
+  defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, z_vicmpes,
+                               v128h, v128h, 1>;
+  defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, z_vicmpes,
+                               v128f, v128f, 2>;
+  defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, z_vicmpes,
+                               v128g, v128g, 3>;
+
+  // Compare high.
+  defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, z_vicmphs,
+                              v128b, v128b, 0>;
+  defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, z_vicmphs,
+                              v128h, v128h, 1>;
+  defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, z_vicmphs,
+                              v128f, v128f, 2>;
+  defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, z_vicmphs,
+                              v128g, v128g, 3>;
+
+  // Compare high logical.
+  defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, z_vicmphls,
+                               v128b, v128b, 0>;
+  defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, z_vicmphls,
+                               v128h, v128h, 1>;
+  defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, z_vicmphls,
+                               v128f, v128f, 2>;
+  defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, z_vicmphls,
+                               v128g, v128g, 3>;
+
+  // Test under mask.
+  let Defs = [CC] in
+    def VTM : CompareVRRa<"vtm", 0xE7D8, z_vtm, v128b, 0>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point arithmetic
+//===----------------------------------------------------------------------===//
+
+// See comments in SystemZInstrFP.td for the suppression flags and
+// rounding modes.
+multiclass VectorRounding<Instruction insn, TypedReg tr> {
+  def : FPConversion<insn, frint,      tr, tr, 0, 0>;
+  def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>;
+  def : FPConversion<insn, ffloor,     tr, tr, 4, 7>;
+  def : FPConversion<insn, fceil,      tr, tr, 4, 6>;
+  def : FPConversion<insn, ftrunc,     tr, tr, 4, 5>;
+  def : FPConversion<insn, frnd,       tr, tr, 4, 1>;
+}
+
+let Predicates = [FeatureVector] in {
+  // Add.
+  def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
+  def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
+
+  // Convert from fixed 64-bit.
+  def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
+  def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
+  def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
+
+  // Convert from logical 64-bit.
+  def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
+  def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
+  def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
+
+  // Convert to fixed 64-bit.
+  def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
+  def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
+  // Rounding mode should agree with SystemZInstrFP.td.
+  def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
+
+  // Convert to logical 64-bit.
+  def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
+  def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
+  // Rounding mode should agree with SystemZInstrFP.td.
+  def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
+
+  // Divide.
+  def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
+  def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
+
+  // Load FP integer.
+  def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
+  def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
+  defm : VectorRounding<VFIDB, v128db>;
+  defm : VectorRounding<WFIDB, v64db>;
+
+  // Load lengthened.
+  def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>;
+  def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fextend, v64db, v32eb, 2, 8>;
+
+  // Load rounded,
+  def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>;
+  def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>;
+  def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
+  def : FPConversion<WLEDB, fround, v32eb, v64db, 0, 0>;
+
+  // Multiply.
+  def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
+  def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
+
+  // Multiply and add.
+  def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
+  def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
+
+  // Multiply and subtract.
+  def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
+  def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
+
+  // Load complement,
+  def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
+  def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>;
+
+  // Load negative.
+  def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>;
+  def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>;
+
+  // Load positive.
+  def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>;
+  def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>;
+
+  // Square root.
+  def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
+  def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
+
+  // Subtract.
+  def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
+  def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
+
+  // Test data class immediate.
+  let Defs = [CC] in {
+    def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>;
+    def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point comparison
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+  // Compare scalar.
+  let Defs = [CC] in
+    def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
+
+  // Compare and signal scalar.
+  let Defs = [CC] in
+    def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
+
+  // Compare equal.
+  defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+                                v128g, v128db, 3, 0>;
+  defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
+                                v64g, v64db, 3, 8>;
+
+  // Compare high.
+  defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
+                                v128g, v128db, 3, 0>;
+  defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
+                                v64g, v64db, 3, 8>;
+
+  // Compare high or equal.
+  defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+                                 v128g, v128db, 3, 0>;
+  defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
+                                 v64g, v64db, 3, 8>;
+}
+
+//===----------------------------------------------------------------------===//
+// Conversions
+//===----------------------------------------------------------------------===//
+
+def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Replicating scalars
+//===----------------------------------------------------------------------===//
+
+// Define patterns for replicating a scalar GR32 into a vector of type TYPE.
+// INDEX is 8 minus the element size in bytes.
+class VectorReplicateScalar<ValueType type, Instruction insn, bits<16> index>
+  : Pat<(type (z_replicate GR32:$scalar)),
+        (insn (VLVGP32 GR32:$scalar, GR32:$scalar), index)>;
+
+def : VectorReplicateScalar<v16i8, VREPB, 7>;
+def : VectorReplicateScalar<v8i16, VREPH, 3>;
+def : VectorReplicateScalar<v4i32, VREPF, 1>;
+
+// i64 replications are just a single isntruction.
+def : Pat<(v2i64 (z_replicate GR64:$scalar)),
+          (VLVGP GR64:$scalar, GR64:$scalar)>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point insertion and extraction
+//===----------------------------------------------------------------------===//
+
+// Moving 32-bit values between GPRs and FPRs can be done using VLVGF
+// and VLGVF.
+def LEFR : UnaryAliasVRS<VR32, GR32>;
+def LFER : UnaryAliasVRS<GR64, VR32>;
+def : Pat<(f32 (bitconvert (i32 GR32:$src))), (LEFR GR32:$src)>;
+def : Pat<(i32 (bitconvert (f32 VR32:$src))),
+          (EXTRACT_SUBREG (LFER VR32:$src), subreg_l32)>;
+
+// Floating-point values are stored in element 0 of the corresponding
+// vector register.  Scalar to vector conversion is just a subreg and
+// scalar replication can just replicate element 0 of the vector register.
+multiclass ScalarToVectorFP<Instruction vrep, ValueType vt, RegisterOperand cls,
+                            SubRegIndex subreg> {
+  def : Pat<(vt (scalar_to_vector cls:$scalar)),
+            (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, subreg)>;
+  def : Pat<(vt (z_replicate cls:$scalar)),
+            (vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar,
+                                 subreg), 0)>;
+}
+defm : ScalarToVectorFP<VREPF, v4f32, FP32, subreg_r32>;
+defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>;
+
+// Match v2f64 insertions.  The AddedComplexity counters the 3 added by
+// TableGen for the base register operand in VLVG-based integer insertions
+// and ensures that this version is strictly better.
+let AddedComplexity = 4 in {
+  def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0),
+            (VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
+                                 subreg_r64), VR128:$vec, 1)>;
+  def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1),
+            (VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
+                                             subreg_r64), 0)>;
+}
+
+// We extract floating-point element X by replicating (for elements other
+// than 0) and then taking a high subreg.  The AddedComplexity counters the
+// 3 added by TableGen for the base register operand in VLGV-based integer
+// extractions and ensures that this version is strictly better.
+let AddedComplexity = 4 in {
+  def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), 0)),
+            (EXTRACT_SUBREG VR128:$vec, subreg_r32)>;
+  def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), imm32zx2:$index)),
+            (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_r32)>;
+
+  def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)),
+            (EXTRACT_SUBREG VR128:$vec, subreg_r64)>;
+  def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)),
+            (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>;
+}
+
+//===----------------------------------------------------------------------===//
+// String instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+  defm VFAEB : TernaryVRRbSPair<"vfaeb", 0xE782, int_s390_vfaeb, z_vfae_cc,
+                                v128b, v128b, 0, 0>;
+  defm VFAEH : TernaryVRRbSPair<"vfaeh", 0xE782, int_s390_vfaeh, z_vfae_cc,
+                                v128h, v128h, 1, 0>;
+  defm VFAEF : TernaryVRRbSPair<"vfaef", 0xE782, int_s390_vfaef, z_vfae_cc,
+                                v128f, v128f, 2, 0>;
+  defm VFAEZB : TernaryVRRbSPair<"vfaezb", 0xE782, int_s390_vfaezb, z_vfaez_cc,
+                                 v128b, v128b, 0, 2>;
+  defm VFAEZH : TernaryVRRbSPair<"vfaezh", 0xE782, int_s390_vfaezh, z_vfaez_cc,
+                                 v128h, v128h, 1, 2>;
+  defm VFAEZF : TernaryVRRbSPair<"vfaezf", 0xE782, int_s390_vfaezf, z_vfaez_cc,
+                                 v128f, v128f, 2, 2>;
+
+  defm VFEEB : BinaryVRRbSPair<"vfeeb", 0xE780, int_s390_vfeeb, z_vfee_cc,
+                               v128b, v128b, 0, 0, 1>;
+  defm VFEEH : BinaryVRRbSPair<"vfeeh", 0xE780, int_s390_vfeeh, z_vfee_cc,
+                               v128h, v128h, 1, 0, 1>;
+  defm VFEEF : BinaryVRRbSPair<"vfeef", 0xE780, int_s390_vfeef, z_vfee_cc,
+                               v128f, v128f, 2, 0, 1>;
+  defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, int_s390_vfeezb, z_vfeez_cc,
+                                v128b, v128b, 0, 2, 3>;
+  defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, int_s390_vfeezh, z_vfeez_cc,
+                                v128h, v128h, 1, 2, 3>;
+  defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, int_s390_vfeezf, z_vfeez_cc,
+                                v128f, v128f, 2, 2, 3>;
+
+  defm VFENEB : BinaryVRRbSPair<"vfeneb", 0xE781, int_s390_vfeneb, z_vfene_cc,
+                                v128b, v128b, 0, 0, 1>;
+  defm VFENEH : BinaryVRRbSPair<"vfeneh", 0xE781, int_s390_vfeneh, z_vfene_cc,
+                                v128h, v128h, 1, 0, 1>;
+  defm VFENEF : BinaryVRRbSPair<"vfenef", 0xE781, int_s390_vfenef, z_vfene_cc,
+                                v128f, v128f, 2, 0, 1>;
+  defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, int_s390_vfenezb,
+                                 z_vfenez_cc, v128b, v128b, 0, 2, 3>;
+  defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, int_s390_vfenezh,
+                                 z_vfenez_cc, v128h, v128h, 1, 2, 3>;
+  defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, int_s390_vfenezf,
+                                 z_vfenez_cc, v128f, v128f, 2, 2, 3>;
+
+  defm VISTRB : UnaryVRRaSPair<"vistrb", 0xE75C, int_s390_vistrb, z_vistr_cc,
+                               v128b, v128b, 0>;
+  defm VISTRH : UnaryVRRaSPair<"vistrh", 0xE75C, int_s390_vistrh, z_vistr_cc,
+                               v128h, v128h, 1>;
+  defm VISTRF : UnaryVRRaSPair<"vistrf", 0xE75C, int_s390_vistrf, z_vistr_cc,
+                               v128f, v128f, 2>;
+
+  defm VSTRCB : QuaternaryVRRdSPair<"vstrcb", 0xE78A, int_s390_vstrcb,
+                                    z_vstrc_cc, v128b, v128b, 0, 0>;
+  defm VSTRCH : QuaternaryVRRdSPair<"vstrch", 0xE78A, int_s390_vstrch,
+                                    z_vstrc_cc, v128h, v128h, 1, 0>;
+  defm VSTRCF : QuaternaryVRRdSPair<"vstrcf", 0xE78A, int_s390_vstrcf,
+                                    z_vstrc_cc, v128f, v128f, 2, 0>;
+  defm VSTRCZB : QuaternaryVRRdSPair<"vstrczb", 0xE78A, int_s390_vstrczb,
+                                     z_vstrcz_cc, v128b, v128b, 0, 2>;
+  defm VSTRCZH : QuaternaryVRRdSPair<"vstrczh", 0xE78A, int_s390_vstrczh,
+                                     z_vstrcz_cc, v128h, v128h, 1, 2>;
+  defm VSTRCZF : QuaternaryVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf,
+                                     z_vstrcz_cc, v128f, v128f, 2, 2>;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
new file mode 100644
index 0000000..24165be
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -0,0 +1,143 @@
+//===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines multiple accesses to local-dynamic TLS variables so that
+// the TLS base address for the module is only fetched once per execution path
+// through the function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class SystemZLDCleanup : public MachineFunctionPass {
+public:
+  static char ID;
+  SystemZLDCleanup(const SystemZTargetMachine &tm)
+    : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {}
+
+  const char *getPassName() const override {
+    return "SystemZ Local Dynamic TLS Access Clean-up";
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+  bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg);
+  MachineInstr *ReplaceTLSCall(MachineInstr *I, unsigned TLSBaseAddrReg);
+  MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg);
+
+  const SystemZInstrInfo *TII;
+  MachineFunction *MF;
+};
+
+char SystemZLDCleanup::ID = 0;
+
+} // end anonymous namespace
+
+FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) {
+  return new SystemZLDCleanup(TM);
+}
+
+void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<MachineDominatorTree>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) {
+  TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+  MF = &F;
+
+  SystemZMachineFunctionInfo* MFI = F.getInfo<SystemZMachineFunctionInfo>();
+  if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+    // No point folding accesses if there isn't at least two.
+    return false;
+  }
+
+  MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+  return VisitNode(DT->getRootNode(), 0);
+}
+
+// Visit the dominator subtree rooted at Node in pre-order.
+// If TLSBaseAddrReg is non-null, then use that to replace any
+// TLS_LDCALL instructions. Otherwise, create the register
+// when the first such instruction is seen, and then use it
+// as we encounter more instructions.
+bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node,
+                                 unsigned TLSBaseAddrReg) {
+  MachineBasicBlock *BB = Node->getBlock();
+  bool Changed = false;
+
+  // Traverse the current block.
+  for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
+    switch (I->getOpcode()) {
+      case SystemZ::TLS_LDCALL:
+        if (TLSBaseAddrReg)
+          I = ReplaceTLSCall(I, TLSBaseAddrReg);
+        else
+          I = SetRegister(I, &TLSBaseAddrReg);
+        Changed = true;
+        break;
+      default:
+        break;
+    }
+  }
+
+  // Visit the children of this block in the dominator tree.
+  for (auto I = Node->begin(), E = Node->end(); I != E; ++I)
+    Changed |= VisitNode(*I, TLSBaseAddrReg);
+
+  return Changed;
+}
+
+// Replace the TLS_LDCALL instruction I with a copy from TLSBaseAddrReg,
+// returning the new instruction.
+MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I,
+                                               unsigned TLSBaseAddrReg) {
+  // Insert a Copy from TLSBaseAddrReg to R2.
+  MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+                               TII->get(TargetOpcode::COPY), SystemZ::R2D)
+                               .addReg(TLSBaseAddrReg);
+
+  // Erase the TLS_LDCALL instruction.
+  I->eraseFromParent();
+
+  return Copy;
+}
+
+// Create a virtal register in *TLSBaseAddrReg, and populate it by
+// inserting a copy instruction after I. Returns the new instruction.
+MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I,
+                                            unsigned *TLSBaseAddrReg) {
+  // Create a virtual register for the TLS base address.
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  *TLSBaseAddrReg = RegInfo.createVirtualRegister(&SystemZ::GR64BitRegClass);
+
+  // Insert a copy from R2 to TLSBaseAddrReg.
+  MachineInstr *Next = I->getNextNode();
+  MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+                               TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
+                               .addReg(SystemZ::R2D);
+
+  return Copy;
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
index df561e2..a1dceda 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -11,6 +11,7 @@
 #include "SystemZAsmPrinter.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 
 using namespace llvm;
@@ -22,6 +23,8 @@ static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) {
       return MCSymbolRefExpr::VK_None;
     case SystemZII::MO_GOT:
       return MCSymbolRefExpr::VK_GOT;
+    case SystemZII::MO_INDNTPOFF:
+      return MCSymbolRefExpr::VK_INDNTPOFF;
   }
   llvm_unreachable("Unrecognised MO_ACCESS_MODEL");
 }
@@ -77,14 +80,14 @@ SystemZMCInstLower::getExpr(const MachineOperand &MO,
 MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const {
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
-    return MCOperand::CreateReg(MO.getReg());
+    return MCOperand::createReg(MO.getReg());
 
   case MachineOperand::MO_Immediate:
-    return MCOperand::CreateImm(MO.getImm());
+    return MCOperand::createImm(MO.getImm());
 
   default: {
     MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags());
-    return MCOperand::CreateExpr(getExpr(MO, Kind));
+    return MCOperand::createExpr(getExpr(MO, Kind));
   }
   }
 }
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 92c2ce7..34fc36d 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -23,11 +23,13 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
   unsigned VarArgsFrameIndex;
   unsigned RegSaveFrameIndex;
   bool ManipulatesSP;
+  unsigned NumLocalDynamics;
 
 public:
   explicit SystemZMachineFunctionInfo(MachineFunction &MF)
     : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0),
-      VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false) {}
+      VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false),
+      NumLocalDynamics(0) {}
 
   // Get and set the first call-saved GPR that should be saved and restored
   // by this function.  This is 0 if no GPRs need to be saved or restored.
@@ -61,6 +63,10 @@ public:
   // e.g. through STACKSAVE or STACKRESTORE.
   bool getManipulatesSP() const { return ManipulatesSP; }
   void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; }
+
+  // Count number of local-dynamic TLS symbols used.
+  unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
+  void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
index 7be81dc..9af90d4 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -16,6 +16,11 @@ class ImmediateAsmOperand<string name>
   let Name = name;
   let RenderMethod = "addImmOperands";
 }
+class ImmediateTLSAsmOperand<string name>
+  : AsmOperandClass {
+  let Name = name;
+  let RenderMethod = "addImmTLSOperands";
+}
 
 // Constructs both a DAG pattern and instruction operand for an immediate
 // of type VT.  PRED returns true if a node is acceptable and XFORM returns
@@ -34,6 +39,11 @@ class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> {
   let PredicateMethod = "isImm";
   let ParserMethod = "parsePCRel"##size;
 }
+class PCRelTLSAsmOperand<string size>
+  : ImmediateTLSAsmOperand<"PCRelTLS"##size> {
+  let PredicateMethod = "isImmTLS";
+  let ParserMethod = "parsePCRelTLS"##size;
+}
 
 // Constructs an operand for a PC-relative address with address type VT.
 // ASMOP is the associated asm operand.
@@ -41,6 +51,10 @@ class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
   let PrintMethod = "printPCRelOperand";
   let ParserMatchClass = asmop;
 }
+class PCRelTLSOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
+  let PrintMethod = "printPCRelTLSOperand";
+  let ParserMatchClass = asmop;
+}
 
 // Constructs both a DAG pattern and instruction operand for a PC-relative
 // address with address size VT.  SELF is the name of the operand and
@@ -64,6 +78,22 @@ class AddressAsmOperand<string format, string bitsize, string dispsize,
   let RenderMethod = "add"##format##"Operands";
 }
 
+// Constructs an instruction operand for an addressing mode.  FORMAT,
+// BITSIZE, DISPSIZE and LENGTH are the parameters to an associated
+// AddressAsmOperand.  OPERANDS is a list of individual operands
+// (base register, displacement, etc.).
+class AddressOperand<string bitsize, string dispsize, string length,
+                     string format, dag operands>
+  : Operand<!cast<ValueType>("i"##bitsize)> {
+  let PrintMethod = "print"##format##"Operand";
+  let EncoderMethod = "get"##format##dispsize##length##"Encoding";
+  let DecoderMethod =
+    "decode"##format##bitsize##"Disp"##dispsize##length##"Operand";
+  let MIOperandInfo = operands;
+  let ParserMatchClass =
+    !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length);
+}
+
 // Constructs both a DAG pattern and instruction operand for an addressing mode.
 // FORMAT, BITSIZE, DISPSIZE and LENGTH are the parameters to an associated
 // AddressAsmOperand.  OPERANDS is a list of NUMOPS individual operands
@@ -79,15 +109,7 @@ class AddressingMode<string seltype, string bitsize, string dispsize,
   : ComplexPattern<!cast<ValueType>("i"##bitsize), numops,
                    "select"##seltype##dispsize##suffix##length,
                    [add, sub, or, frameindex, z_adjdynalloc]>,
-    Operand<!cast<ValueType>("i"##bitsize)> {
-  let PrintMethod = "print"##format##"Operand";
-  let EncoderMethod = "get"##format##dispsize##length##"Encoding";
-  let DecoderMethod =
-    "decode"##format##bitsize##"Disp"##dispsize##length##"Operand";
-  let MIOperandInfo = operands;
-  let ParserMatchClass =
-    !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length);
-}
+    AddressOperand<bitsize, dispsize, length, format, operands>;
 
 // An addressing mode with a base and displacement but no index.
 class BDMode<string type, string bitsize, string dispsize, string suffix>
@@ -111,6 +133,13 @@ class BDLMode<string type, string bitsize, string dispsize, string suffix,
                         !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
                         !cast<Immediate>("imm"##bitsize))>;
 
+// An addressing mode with a base, displacement and a vector index.
+class BDVMode<string bitsize, string dispsize>
+  : AddressOperand<bitsize, dispsize, "", "BDVAddr",
+                   (ops !cast<RegisterOperand>("ADDR"##bitsize),
+                        !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
+                        !cast<RegisterOperand>("VR128"))>;
+
 //===----------------------------------------------------------------------===//
 // Extracting immediate operands from nodes
 // These all create MVT::i64 nodes to ensure the value is not sign-extended
@@ -120,82 +149,105 @@ class BDLMode<string type, string bitsize, string dispsize, string suffix,
 // Bits 0-15 (counting from the lsb).
 def LL16 : SDNodeXForm<imm, [{
   uint64_t Value = N->getZExtValue() & 0x000000000000FFFFULL;
-  return CurDAG->getTargetConstant(Value, MVT::i64);
+  return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
 }]>;
 
 // Bits 16-31 (counting from the lsb).
 def LH16 : SDNodeXForm<imm, [{
   uint64_t Value = (N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16;
-  return CurDAG->getTargetConstant(Value, MVT::i64);
+  return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
 }]>;
 
 // Bits 32-47 (counting from the lsb).
 def HL16 : SDNodeXForm<imm, [{
   uint64_t Value = (N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32;
-  return CurDAG->getTargetConstant(Value, MVT::i64);
+  return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
 }]>;
 
 // Bits 48-63 (counting from the lsb).
 def HH16 : SDNodeXForm<imm, [{
   uint64_t Value = (N->getZExtValue() & 0xFFFF000000000000ULL) >> 48;
-  return CurDAG->getTargetConstant(Value, MVT::i64);
+  return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
 }]>;
 
 // Low 32 bits.
 def LF32 : SDNodeXForm<imm, [{
   uint64_t Value = N->getZExtValue() & 0x00000000FFFFFFFFULL;
-  return CurDAG->getTargetConstant(Value, MVT::i64);
+  return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
 }]>;
 
 // High 32 bits.
 def HF32 : SDNodeXForm<imm, [{
   uint64_t Value = N->getZExtValue() >> 32;
-  return CurDAG->getTargetConstant(Value, MVT::i64);
+  return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
 }]>;
 
 // Truncate an immediate to a 8-bit signed quantity.
 def SIMM8 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), MVT::i64);
+  return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), SDLoc(N),
+                                   MVT::i64);
 }]>;
 
 // Truncate an immediate to a 8-bit unsigned quantity.
 def UIMM8 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), MVT::i64);
+  return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), SDLoc(N),
+                                   MVT::i64);
+}]>;
+
+// Truncate an immediate to a 8-bit unsigned quantity and mask off low bit.
+def UIMM8EVEN : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 0xfe, SDLoc(N),
+                                   MVT::i64);
+}]>;
+
+// Truncate an immediate to a 12-bit unsigned quantity.
+def UIMM12 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 0xfff, SDLoc(N),
+                                   MVT::i64);
 }]>;
 
 // Truncate an immediate to a 16-bit signed quantity.
 def SIMM16 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), MVT::i64);
+  return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), SDLoc(N),
+                                   MVT::i64);
 }]>;
 
 // Truncate an immediate to a 16-bit unsigned quantity.
 def UIMM16 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), MVT::i64);
+  return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), SDLoc(N),
+                                   MVT::i64);
 }]>;
 
 // Truncate an immediate to a 32-bit signed quantity.
 def SIMM32 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(int32_t(N->getZExtValue()), MVT::i64);
+  return CurDAG->getTargetConstant(int32_t(N->getZExtValue()), SDLoc(N),
+                                   MVT::i64);
 }]>;
 
 // Truncate an immediate to a 32-bit unsigned quantity.
 def UIMM32 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), MVT::i64);
+  return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), SDLoc(N),
+                                   MVT::i64);
 }]>;
 
 // Negate and then truncate an immediate to a 32-bit unsigned quantity.
 def NEGIMM32 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), MVT::i64);
+  return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), SDLoc(N),
+                                   MVT::i64);
 }]>;
 
 //===----------------------------------------------------------------------===//
 // Immediate asm operands.
 //===----------------------------------------------------------------------===//
 
+def U1Imm  : ImmediateAsmOperand<"U1Imm">;
+def U2Imm  : ImmediateAsmOperand<"U2Imm">;
+def U3Imm  : ImmediateAsmOperand<"U3Imm">;
 def U4Imm  : ImmediateAsmOperand<"U4Imm">;
 def U6Imm  : ImmediateAsmOperand<"U6Imm">;
 def S8Imm  : ImmediateAsmOperand<"S8Imm">;
 def U8Imm  : ImmediateAsmOperand<"U8Imm">;
+def U12Imm : ImmediateAsmOperand<"U12Imm">;
 def S16Imm : ImmediateAsmOperand<"S16Imm">;
 def U16Imm : ImmediateAsmOperand<"U16Imm">;
 def S32Imm : ImmediateAsmOperand<"S32Imm">;
@@ -226,10 +278,28 @@ def imm32lh16c : Immediate<i32, [{
 }], LH16, "U16Imm">;
 
 // Short immediates
+def imm32zx1 : Immediate<i32, [{
+  return isUInt<1>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U1Imm">;
+
+def imm32zx2 : Immediate<i32, [{
+  return isUInt<2>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U2Imm">;
+
+def imm32zx3 : Immediate<i32, [{
+  return isUInt<3>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U3Imm">;
+
 def imm32zx4 : Immediate<i32, [{
   return isUInt<4>(N->getZExtValue());
 }], NOOP_SDNodeXForm, "U4Imm">;
 
+// Note: this enforces an even value during code generation only.
+// When used from the assembler, any 4-bit value is allowed.
+def imm32zx4even : Immediate<i32, [{
+  return isUInt<4>(N->getZExtValue());
+}], UIMM8EVEN, "U4Imm">;
+
 def imm32zx6 : Immediate<i32, [{
   return isUInt<6>(N->getZExtValue());
 }], NOOP_SDNodeXForm, "U6Imm">;
@@ -244,6 +314,10 @@ def imm32zx8 : Immediate<i32, [{
 
 def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
 
+def imm32zx12 : Immediate<i32, [{
+  return isUInt<12>(N->getZExtValue());
+}], UIMM12, "U12Imm">;
+
 def imm32sx16 : Immediate<i32, [{
   return isInt<16>(N->getSExtValue());
 }], SIMM16, "S16Imm">;
@@ -370,6 +444,8 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>;
 // PC-relative asm operands.
 def PCRel16 : PCRelAsmOperand<"16">;
 def PCRel32 : PCRelAsmOperand<"32">;
+def PCRelTLS16 : PCRelTLSAsmOperand<"16">;
+def PCRelTLS32 : PCRelTLSAsmOperand<"32">;
 
 // PC-relative offsets of a basic block.  The offset is sign-extended
 // and multiplied by 2.
@@ -382,6 +458,20 @@ def brtarget32 : PCRelOperand<OtherVT, PCRel32> {
   let DecoderMethod = "decodePC32DBLOperand";
 }
 
+// Variants of brtarget16/32 with an optional additional TLS symbol.
+// These are used to annotate calls to __tls_get_offset.
+def tlssym : Operand<i64> { }
+def brtarget16tls : PCRelTLSOperand<OtherVT, PCRelTLS16> {
+  let MIOperandInfo = (ops brtarget16:$func, tlssym:$sym);
+  let EncoderMethod = "getPC16DBLTLSEncoding";
+  let DecoderMethod = "decodePC16DBLOperand";
+}
+def brtarget32tls : PCRelTLSOperand<OtherVT, PCRelTLS32> {
+  let MIOperandInfo = (ops brtarget32:$func, tlssym:$sym);
+  let EncoderMethod = "getPC32DBLTLSEncoding";
+  let DecoderMethod = "decodePC32DBLOperand";
+}
+
 // A PC-relative offset of a global value.  The offset is sign-extended
 // and multiplied by 2.
 def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> {
@@ -408,6 +498,7 @@ def BDAddr64Disp20      : AddressAsmOperand<"BDAddr",   "64", "20">;
 def BDXAddr64Disp12     : AddressAsmOperand<"BDXAddr",  "64", "12">;
 def BDXAddr64Disp20     : AddressAsmOperand<"BDXAddr",  "64", "20">;
 def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr",  "64", "12", "Len8">;
+def BDVAddr64Disp12     : AddressAsmOperand<"BDVAddr",  "64", "12">;
 
 // DAG patterns and operands for addressing modes.  Each mode has
 // the form <type><range><group>[<len>] where:
@@ -420,6 +511,7 @@ def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr",  "64", "12", "Len8">;
 //   laaddr   : like bdxaddr, but used for Load Address operations
 //   dynalloc : base + displacement + index + ADJDYNALLOC
 //   bdladdr  : base + displacement with a length field
+//   bdvaddr  : base + displacement with a vector index
 //
 // <range> is one of:
 //   12       : the displacement is an unsigned 12-bit value
@@ -452,6 +544,7 @@ def dynalloc12only    : BDXMode<"DynAlloc", "64", "12", "Only">;
 def laaddr12pair      : BDXMode<"LAAddr",   "64", "12", "Pair">;
 def laaddr20pair      : BDXMode<"LAAddr",   "64", "20", "Pair">;
 def bdladdr12onlylen8 : BDLMode<"BDLAddr",  "64", "12", "Only", "8">;
+def bdvaddr12only     : BDVMode<            "64", "12">;
 
 //===----------------------------------------------------------------------===//
 // Miscellaneous
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
index c70e662..3c95a1e 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -79,6 +79,64 @@ def SDT_ZI32Intrinsic       : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
 def SDT_ZPrefetch           : SDTypeProfile<0, 2,
                                             [SDTCisVT<0, i32>,
                                              SDTCisPtrTy<1>]>;
+def SDT_ZTBegin             : SDTypeProfile<0, 2,
+                                            [SDTCisPtrTy<0>,
+                                             SDTCisVT<1, i32>]>;
+def SDT_ZInsertVectorElt    : SDTypeProfile<1, 3,
+                                            [SDTCisVec<0>,
+                                             SDTCisSameAs<0, 1>,
+                                             SDTCisVT<3, i32>]>;
+def SDT_ZExtractVectorElt   : SDTypeProfile<1, 2,
+                                            [SDTCisVec<1>,
+                                             SDTCisVT<2, i32>]>;
+def SDT_ZReplicate          : SDTypeProfile<1, 1,
+                                            [SDTCisVec<0>]>;
+def SDT_ZVecUnaryConv       : SDTypeProfile<1, 1,
+                                            [SDTCisVec<0>,
+                                             SDTCisVec<1>]>;
+def SDT_ZVecUnary           : SDTypeProfile<1, 1,
+                                            [SDTCisVec<0>,
+                                             SDTCisSameAs<0, 1>]>;
+def SDT_ZVecBinary          : SDTypeProfile<1, 2,
+                                            [SDTCisVec<0>,
+                                             SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<0, 2>]>;
+def SDT_ZVecBinaryInt       : SDTypeProfile<1, 2,
+                                            [SDTCisVec<0>,
+                                             SDTCisSameAs<0, 1>,
+                                             SDTCisVT<2, i32>]>;
+def SDT_ZVecBinaryConv      : SDTypeProfile<1, 2,
+                                            [SDTCisVec<0>,
+                                             SDTCisVec<1>,
+                                             SDTCisSameAs<1, 2>]>;
+def SDT_ZVecBinaryConvInt   : SDTypeProfile<1, 2,
+                                            [SDTCisVec<0>,
+                                             SDTCisVec<1>,
+                                             SDTCisVT<2, i32>]>;
+def SDT_ZRotateMask         : SDTypeProfile<1, 2,
+                                            [SDTCisVec<0>,
+                                             SDTCisVT<1, i32>,
+                                             SDTCisVT<2, i32>]>;
+def SDT_ZJoinDwords         : SDTypeProfile<1, 2,
+                                            [SDTCisVT<0, v2i64>,
+                                             SDTCisVT<1, i64>,
+                                             SDTCisVT<2, i64>]>;
+def SDT_ZVecTernary         : SDTypeProfile<1, 3,
+                                            [SDTCisVec<0>,
+                                             SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<0, 2>,
+                                             SDTCisSameAs<0, 3>]>;
+def SDT_ZVecTernaryInt      : SDTypeProfile<1, 3,
+                                            [SDTCisVec<0>,
+                                             SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<0, 2>,
+                                             SDTCisVT<3, i32>]>;
+def SDT_ZVecQuaternaryInt   : SDTypeProfile<1, 4,
+                                            [SDTCisVec<0>,
+                                             SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<0, 2>,
+                                             SDTCisSameAs<0, 3>,
+                                             SDTCisVT<4, i32>]>;
 
 //===----------------------------------------------------------------------===//
 // Node definitions
@@ -90,6 +148,7 @@ def callseq_start       : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
 def callseq_end         : SDNode<"ISD::CALLSEQ_END",   SDT_CallSeqEnd,
                                  [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue,
                                   SDNPOutGlue]>;
+def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>;
 
 // Nodes for SystemZISD::*.  See SystemZISelLowering.h for more details.
 def z_retflag           : SDNode<"SystemZISD::RET_FLAG", SDTNone,
@@ -100,6 +159,12 @@ def z_call              : SDNode<"SystemZISD::CALL", SDT_ZCall,
 def z_sibcall           : SDNode<"SystemZISD::SIBCALL", SDT_ZCall,
                                  [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
                                   SDNPVariadic]>;
+def z_tls_gdcall        : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall,
+                                 [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+                                  SDNPVariadic]>;
+def z_tls_ldcall        : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall,
+                                 [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+                                  SDNPVariadic]>;
 def z_pcrel_wrapper     : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
 def z_pcrel_offset      : SDNode<"SystemZISD::PCREL_OFFSET",
                                  SDT_ZWrapOffset, []>;
@@ -114,6 +179,7 @@ def z_select_ccmask     : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
 def z_adjdynalloc       : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
 def z_extract_access    : SDNode<"SystemZISD::EXTRACT_ACCESS",
                                  SDT_ZExtractAccess>;
+def z_popcnt            : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
 def z_umul_lohi64       : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
 def z_sdivrem32         : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
 def z_sdivrem64         : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
@@ -123,6 +189,80 @@ def z_udivrem64         : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
 def z_serialize         : SDNode<"SystemZISD::SERIALIZE", SDTNone,
                                  [SDNPHasChain, SDNPMayStore]>;
 
+// Defined because the index is an i32 rather than a pointer.
+def z_vector_insert     : SDNode<"ISD::INSERT_VECTOR_ELT",
+                                 SDT_ZInsertVectorElt>;
+def z_vector_extract    : SDNode<"ISD::EXTRACT_VECTOR_ELT",
+                                 SDT_ZExtractVectorElt>;
+def z_byte_mask         : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>;
+def z_rotate_mask       : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>;
+def z_replicate         : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>;
+def z_join_dwords       : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>;
+def z_splat             : SDNode<"SystemZISD::SPLAT", SDT_ZVecBinaryInt>;
+def z_merge_high        : SDNode<"SystemZISD::MERGE_HIGH", SDT_ZVecBinary>;
+def z_merge_low         : SDNode<"SystemZISD::MERGE_LOW", SDT_ZVecBinary>;
+def z_shl_double        : SDNode<"SystemZISD::SHL_DOUBLE", SDT_ZVecTernaryInt>;
+def z_permute_dwords    : SDNode<"SystemZISD::PERMUTE_DWORDS",
+                                 SDT_ZVecTernaryInt>;
+def z_permute           : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>;
+def z_pack              : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>;
+def z_packs_cc          : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConv,
+                                 [SDNPOutGlue]>;
+def z_packls_cc         : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConv,
+                                 [SDNPOutGlue]>;
+def z_unpack_high       : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>;
+def z_unpackl_high      : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>;
+def z_unpack_low        : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>;
+def z_unpackl_low       : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnaryConv>;
+def z_vshl_by_scalar    : SDNode<"SystemZISD::VSHL_BY_SCALAR",
+                                 SDT_ZVecBinaryInt>;
+def z_vsrl_by_scalar    : SDNode<"SystemZISD::VSRL_BY_SCALAR",
+                                 SDT_ZVecBinaryInt>;
+def z_vsra_by_scalar    : SDNode<"SystemZISD::VSRA_BY_SCALAR",
+                                 SDT_ZVecBinaryInt>;
+def z_vsum              : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>;
+def z_vicmpe            : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>;
+def z_vicmph            : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>;
+def z_vicmphl           : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>;
+def z_vicmpes           : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
+def z_vicmphs           : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
+def z_vicmphls          : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
+def z_vfcmpe            : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>;
+def z_vfcmph            : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>;
+def z_vfcmphe           : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>;
+def z_vfcmpes           : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConv,
+                                 [SDNPOutGlue]>;
+def z_vfcmphs           : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConv,
+                                 [SDNPOutGlue]>;
+def z_vfcmphes          : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConv,
+                                 [SDNPOutGlue]>;
+def z_vextend           : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>;
+def z_vround            : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
+def z_vtm               : SDNode<"SystemZISD::VTM", SDT_ZCmp, [SDNPOutGlue]>;
+def z_vfae_cc           : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryInt,
+                                 [SDNPOutGlue]>;
+def z_vfaez_cc          : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryInt,
+                                 [SDNPOutGlue]>;
+def z_vfee_cc           : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
+def z_vfeez_cc          : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
+def z_vfene_cc          : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
+def z_vfenez_cc         : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
+def z_vistr_cc          : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnary,
+                                 [SDNPOutGlue]>;
+def z_vstrc_cc          : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryInt,
+                                 [SDNPOutGlue]>;
+def z_vstrcz_cc         : SDNode<"SystemZISD::VSTRCZ_CC",
+                                 SDT_ZVecQuaternaryInt, [SDNPOutGlue]>;
+def z_vftci             : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvInt,
+                                 [SDNPOutGlue]>;
+
 class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
   : SDNode<"SystemZISD::"##name, profile,
            [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
@@ -172,6 +312,19 @@ def z_prefetch          : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch,
                                  [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
                                   SDNPMemOperand]>;
 
+def z_tbegin            : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin,
+                                 [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+                                  SDNPSideEffect]>;
+def z_tbegin_nofloat    : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin,
+                                 [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+                                  SDNPSideEffect]>;
+def z_tend              : SDNode<"SystemZISD::TEND", SDTNone,
+                                 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+
+def z_vshl              : SDNode<"ISD::SHL", SDT_ZVecBinary>;
+def z_vsra              : SDNode<"ISD::SRA", SDT_ZVecBinary>;
+def z_vsrl              : SDNode<"ISD::SRL", SDT_ZVecBinary>;
+
 //===----------------------------------------------------------------------===//
 // Pattern fragments
 //===----------------------------------------------------------------------===//
@@ -195,11 +348,21 @@ def sext8  : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>;
 def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;
 def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>;
 
+// Match extensions of an i32 to an i64, followed by an in-register sign
+// extension from a sub-i32 value.
+def sext8dbl : PatFrag<(ops node:$src), (sext8 (anyext node:$src))>;
+def sext16dbl : PatFrag<(ops node:$src), (sext16 (anyext node:$src))>;
+
 // Register zero-extend operations.  Sub-32-bit values are represented as i32s.
 def zext8  : PatFrag<(ops node:$src), (and node:$src, 0xff)>;
 def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>;
 def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
 
+// Match extensions of an i32 to an i64, followed by an AND of the low
+// i8 or i16 part.
+def zext8dbl : PatFrag<(ops node:$src), (zext8 (anyext node:$src))>;
+def zext16dbl : PatFrag<(ops node:$src), (zext16 (anyext node:$src))>;
+
 // Typed floating-point loads.
 def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
 def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
@@ -363,6 +526,14 @@ def z_iabs64 : PatFrag<(ops node:$src),
 def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>;
 def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>;
 
+// Integer multiply-and-add
+def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                       (add (mul node:$src1, node:$src2), node:$src3)>;
+
+// Fused multiply-subtract, using the natural operand order.
+def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                  (fma node:$src1, node:$src2, (fneg node:$src3))>;
+
 // Fused multiply-add and multiply-subtract, but with the order of the
 // operands matching SystemZ's MA and MS instructions.
 def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
@@ -383,3 +554,110 @@ class loadu<SDPatternOperator operator, SDPatternOperator load = load>
 class storeu<SDPatternOperator operator, SDPatternOperator store = store>
   : PatFrag<(ops node:$value, node:$addr),
             (store (operator node:$value), node:$addr)>;
+
+// Vector representation of all-zeros and all-ones.
+def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
+def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
+
+// Load a scalar and replicate it in all elements of a vector.
+class z_replicate_load<ValueType scalartype, SDPatternOperator load>
+  : PatFrag<(ops node:$addr),
+            (z_replicate (scalartype (load node:$addr)))>;
+def z_replicate_loadi8  : z_replicate_load<i32, anyextloadi8>;
+def z_replicate_loadi16 : z_replicate_load<i32, anyextloadi16>;
+def z_replicate_loadi32 : z_replicate_load<i32, load>;
+def z_replicate_loadi64 : z_replicate_load<i64, load>;
+def z_replicate_loadf32 : z_replicate_load<f32, load>;
+def z_replicate_loadf64 : z_replicate_load<f64, load>;
+
+// Load a scalar and insert it into a single element of a vector.
+class z_vle<ValueType scalartype, SDPatternOperator load>
+  : PatFrag<(ops node:$vec, node:$addr, node:$index),
+            (z_vector_insert node:$vec, (scalartype (load node:$addr)),
+                             node:$index)>;
+def z_vlei8  : z_vle<i32, anyextloadi8>;
+def z_vlei16 : z_vle<i32, anyextloadi16>;
+def z_vlei32 : z_vle<i32, load>;
+def z_vlei64 : z_vle<i64, load>;
+def z_vlef32 : z_vle<f32, load>;
+def z_vlef64 : z_vle<f64, load>;
+
+// Load a scalar and insert it into the low element of the high i64 of a
+// zeroed vector.
+class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
+  : PatFrag<(ops node:$addr),
+            (z_vector_insert (z_vzero),
+                             (scalartype (load node:$addr)), (i32 index))>;
+def z_vllezi8  : z_vllez<i32, anyextloadi8, 7>;
+def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
+def z_vllezi32 : z_vllez<i32, load, 1>;
+def z_vllezi64 : PatFrag<(ops node:$addr),
+                         (z_join_dwords (i64 (load node:$addr)), (i64 0))>;
+// We use high merges to form a v4f32 from four f32s.  Propagating zero
+// into all elements but index 1 gives this expression.
+def z_vllezf32 : PatFrag<(ops node:$addr),
+                         (bitconvert
+                          (z_merge_high
+                           (v2i64
+                            (z_unpackl_high
+                             (v4i32
+                              (bitconvert
+                               (v4f32 (scalar_to_vector
+                                       (f32 (load node:$addr)))))))),
+                           (v2i64 (z_vzero))))>;
+def z_vllezf64 : PatFrag<(ops node:$addr),
+                         (z_merge_high
+                          (scalar_to_vector (f64 (load node:$addr))),
+                          (z_vzero))>;
+
+// Store one element of a vector.
+class z_vste<ValueType scalartype, SDPatternOperator store>
+  : PatFrag<(ops node:$vec, node:$addr, node:$index),
+            (store (scalartype (z_vector_extract node:$vec, node:$index)),
+                   node:$addr)>;
+def z_vstei8  : z_vste<i32, truncstorei8>;
+def z_vstei16 : z_vste<i32, truncstorei16>;
+def z_vstei32 : z_vste<i32, store>;
+def z_vstei64 : z_vste<i64, store>;
+def z_vstef32 : z_vste<f32, store>;
+def z_vstef64 : z_vste<f64, store>;
+
+// Arithmetic negation on vectors.
+def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>;
+
+// Bitwise negation on vectors.
+def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>;
+
+// Signed "integer greater than zero" on vectors.
+def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>;
+
+// Signed "integer less than zero" on vectors.
+def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>;
+
+// Integer absolute on vectors.
+class z_viabs<int shift>
+  : PatFrag<(ops node:$src),
+            (xor (add node:$src, (z_vsra_by_scalar node:$src, (i32 shift))),
+                 (z_vsra_by_scalar node:$src, (i32 shift)))>;
+def z_viabs8  : z_viabs<7>;
+def z_viabs16 : z_viabs<15>;
+def z_viabs32 : z_viabs<31>;
+def z_viabs64 : z_viabs<63>;
+
+// Sign-extend the i64 elements of a vector.
+class z_vse<int shift>
+  : PatFrag<(ops node:$src),
+            (z_vsra_by_scalar (z_vshl_by_scalar node:$src, shift), shift)>;
+def z_vsei8  : z_vse<56>;
+def z_vsei16 : z_vse<48>;
+def z_vsei32 : z_vse<32>;
+
+// ...and again with the extensions being done on individual i64 scalars.
+class z_vse_by_parts<SDPatternOperator operator, int index1, int index2>
+  : PatFrag<(ops node:$src),
+            (z_join_dwords
+             (operator (z_vector_extract node:$src, index1)),
+             (operator (z_vector_extract node:$src, index2)))>;
+def z_vsei8_by_parts  : z_vse_by_parts<sext8dbl, 7, 15>;
+def z_vsei16_by_parts : z_vse_by_parts<sext16dbl, 3, 7>;
+def z_vsei32_by_parts : z_vse_by_parts<sext32, 1, 3>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td
index e307f8a..16a7ed7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -153,3 +153,17 @@ multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> {
   // The sign of the zero makes no difference.
   def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
 }
+
+// Use INSN for performing binary operation OPERATION of type VT
+// on registers of class CLS.
+class BinaryRRWithType<Instruction insn, RegisterOperand cls,
+                       SDPatternOperator operator, ValueType vt>
+  : Pat<(vt (operator cls:$x, cls:$y)), (insn cls:$x, cls:$y)>;
+
+// Use INSN to perform conversion operation OPERATOR, with the input being
+// TR2 and the output being TR1.  SUPPRESS is 4 to suppress inexact conditions
+// and 0 to allow them.  MODE is the rounding mode to use.
+class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1,
+                   TypedReg tr2, bits<3> suppress, bits<4> mode>
+  : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))),
+        (insn tr2.op:$vec, suppress, mode)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td
index e6b58f1..32fbe5a 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 class SystemZFeature<string extname, string intname, string desc>
-  : Predicate<"Subtarget.has"##intname##"()">,
+  : Predicate<"Subtarget->has"##intname##"()">,
     AssemblerPredicate<"Feature"##intname, extname>,
     SubtargetFeature<extname, "Has"##intname, "true", desc>;
 
 class SystemZMissingFeature<string intname>
-  : Predicate<"!Subtarget.has"##intname##"()">;
+  : Predicate<"!Subtarget->has"##intname##"()">;
 
 def FeatureDistinctOps : SystemZFeature<
   "distinct-ops", "DistinctOps",
@@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature<
   "Assume that the floating-point extension facility is installed"
 >;
 
+def FeaturePopulationCount : SystemZFeature<
+  "population-count", "PopulationCount",
+  "Assume that the population-count facility is installed"
+>;
+
 def FeatureFastSerialization : SystemZFeature<
   "fast-serialization", "FastSerialization",
   "Assume that the fast-serialization facility is installed"
@@ -50,13 +55,42 @@ def FeatureInterlockedAccess1 : SystemZFeature<
 >;
 def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">;
 
+def FeatureMiscellaneousExtensions : SystemZFeature<
+  "miscellaneous-extensions", "MiscellaneousExtensions",
+  "Assume that the miscellaneous-extensions facility is installed"
+>;
+
+def FeatureTransactionalExecution : SystemZFeature<
+  "transactional-execution", "TransactionalExecution",
+  "Assume that the transactional-execution facility is installed"
+>;
+
+def FeatureProcessorAssist : SystemZFeature<
+  "processor-assist", "ProcessorAssist",
+  "Assume that the processor-assist facility is installed"
+>;
+
+def FeatureVector : SystemZFeature<
+  "vector", "Vector",
+  "Assume that the vectory facility is installed"
+>;
+def FeatureNoVector : SystemZMissingFeature<"Vector">;
+
 def : Processor<"generic", NoItineraries, []>;
 def : Processor<"z10", NoItineraries, []>;
 def : Processor<"z196", NoItineraries,
                 [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
-                 FeatureFPExtension, FeatureFastSerialization,
-                 FeatureInterlockedAccess1]>;
+                 FeatureFPExtension, FeaturePopulationCount,
+                 FeatureFastSerialization, FeatureInterlockedAccess1]>;
 def : Processor<"zEC12", NoItineraries,
                 [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
-                 FeatureFPExtension, FeatureFastSerialization,
-                 FeatureInterlockedAccess1]>;
+                 FeatureFPExtension, FeaturePopulationCount,
+                 FeatureFastSerialization, FeatureInterlockedAccess1,
+                 FeatureMiscellaneousExtensions,
+                 FeatureTransactionalExecution, FeatureProcessorAssist]>;
+def : Processor<"z13", NoItineraries,
+                [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
+                 FeatureFPExtension, FeaturePopulationCount,
+                 FeatureFastSerialization, FeatureInterlockedAccess1,
+                 FeatureTransactionalExecution, FeatureProcessorAssist,
+                 FeatureVector]>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 64f5eeb..7cabea9 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -28,7 +28,8 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 }
 
 const uint32_t *
-SystemZRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+                                          CallingConv::ID CC) const {
   return CSR_SystemZ_RegMask;
 }
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 212fe91..a0db5a9 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -43,9 +43,9 @@ public:
   bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
     return true;
   }
-  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF = nullptr) const
-    override;
-  const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+  const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+                                       CallingConv::ID CC) const override;
   BitVector getReservedRegs(const MachineFunction &MF) const override;
   void eliminateFrameIndex(MachineBasicBlock::iterator MI,
                            int SPAdj, unsigned FIOperandNum,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index 47ac20d..85aa0a6 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -25,20 +25,24 @@ def subreg_l32   : SubRegIndex<32, 0>;  // Also acts as subreg_ll32.
 def subreg_h32   : SubRegIndex<32, 32>; // Also acts as subreg_lh32.
 def subreg_l64   : SubRegIndex<64, 0>;
 def subreg_h64   : SubRegIndex<64, 64>;
+def subreg_r32   : SubRegIndex<32, 32>; // Reinterpret a wider reg as 32 bits.
+def subreg_r64   : SubRegIndex<64, 64>; // Reinterpret a wider reg as 64 bits.
 def subreg_hh32  : ComposedSubRegIndex<subreg_h64, subreg_h32>;
 def subreg_hl32  : ComposedSubRegIndex<subreg_h64, subreg_l32>;
+def subreg_hr32  : ComposedSubRegIndex<subreg_h64, subreg_r32>;
 }
 
-// Define a register class that contains values of type TYPE and an
+// Define a register class that contains values of types TYPES and an
 // associated operand called NAME.  SIZE is the size and alignment
 // of the registers and REGLIST is the list of individual registers.
-multiclass SystemZRegClass<string name, ValueType type, int size, dag regList> {
+multiclass SystemZRegClass<string name, list<ValueType> types, int size,
+                           dag regList> {
   def AsmOperand : AsmOperandClass {
     let Name = name;
     let ParserMethod = "parse"##name;
     let RenderMethod = "addRegOperands";
   }
-  def Bit : RegisterClass<"SystemZ", [type], size, regList> {
+  def Bit : RegisterClass<"SystemZ", types, size, regList> {
     let Size = size;
   }
   def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> {
@@ -84,16 +88,19 @@ foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in {
 
 /// Allocate the callee-saved R6-R13 backwards. That way they can be saved
 /// together with R14 and R15 in one prolog instruction.
-defm GR32  : SystemZRegClass<"GR32",  i32, 32, (add (sequence "R%uL",  0, 5),
-                                                    (sequence "R%uL", 15, 6))>;
-defm GRH32 : SystemZRegClass<"GRH32", i32, 32, (add (sequence "R%uH",  0, 5),
-                                                    (sequence "R%uH", 15, 6))>;
-defm GR64  : SystemZRegClass<"GR64",  i64, 64, (add (sequence "R%uD",  0, 5),
-                                                    (sequence "R%uD", 15, 6))>;
+defm GR32  : SystemZRegClass<"GR32",  [i32], 32,
+                             (add (sequence "R%uL",  0, 5),
+                                  (sequence "R%uL", 15, 6))>;
+defm GRH32 : SystemZRegClass<"GRH32", [i32], 32,
+                             (add (sequence "R%uH",  0, 5),
+                                  (sequence "R%uH", 15, 6))>;
+defm GR64  : SystemZRegClass<"GR64",  [i64], 64,
+                             (add (sequence "R%uD",  0, 5),
+                                  (sequence "R%uD", 15, 6))>;
 
 // Combine the low and high GR32s into a single class.  This can only be
 // used for virtual registers if the high-word facility is available.
-defm GRX32 : SystemZRegClass<"GRX32", i32, 32,
+defm GRX32 : SystemZRegClass<"GRX32", [i32], 32,
                              (add (sequence "R%uL",  0, 5),
                                   (sequence "R%uH",  0, 5),
                                   R15L, R15H, R14L, R14H, R13L, R13H,
@@ -102,18 +109,17 @@ defm GRX32 : SystemZRegClass<"GRX32", i32, 32,
 
 // The architecture doesn't really have any i128 support, so model the
 // register pairs as untyped instead.
-defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q,
-                                                         R12Q, R10Q, R8Q, R6Q,
-                                                         R14Q)>;
+defm GR128 : SystemZRegClass<"GR128", [untyped], 128,
+                             (add R0Q, R2Q, R4Q, R12Q, R10Q, R8Q, R6Q, R14Q)>;
 
 // Base and index registers.  Everything except R0, which in an address
 // context evaluates as 0.
-defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0L)>;
-defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>;
+defm ADDR32 : SystemZRegClass<"ADDR32", [i32], 32, (sub GR32Bit, R0L)>;
+defm ADDR64 : SystemZRegClass<"ADDR64", [i64], 64, (sub GR64Bit, R0D)>;
 
 // Not used directly, but needs to exist for ADDR32 and ADDR64 subregs
 // of a GR128.
-defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>;
+defm ADDR128 : SystemZRegClass<"ADDR128", [untyped], 128, (sub GR128Bit, R0Q)>;
 
 //===----------------------------------------------------------------------===//
 // Floating-point registers
@@ -142,16 +148,36 @@ def F11Dwarf : DwarfMapping<29>;
 def F13Dwarf : DwarfMapping<30>;
 def F15Dwarf : DwarfMapping<31>;
 
-// Lower 32 bits of one of the 16 64-bit floating-point registers
+def F16Dwarf : DwarfMapping<68>;
+def F18Dwarf : DwarfMapping<69>;
+def F20Dwarf : DwarfMapping<70>;
+def F22Dwarf : DwarfMapping<71>;
+
+def F17Dwarf : DwarfMapping<72>;
+def F19Dwarf : DwarfMapping<73>;
+def F21Dwarf : DwarfMapping<74>;
+def F23Dwarf : DwarfMapping<75>;
+
+def F24Dwarf : DwarfMapping<76>;
+def F26Dwarf : DwarfMapping<77>;
+def F28Dwarf : DwarfMapping<78>;
+def F30Dwarf : DwarfMapping<79>;
+
+def F25Dwarf : DwarfMapping<80>;
+def F27Dwarf : DwarfMapping<81>;
+def F29Dwarf : DwarfMapping<82>;
+def F31Dwarf : DwarfMapping<83>;
+
+// Upper 32 bits of one of the floating-point registers
 class FPR32<bits<16> num, string n> : SystemZReg<n> {
   let HWEncoding = num;
 }
 
-// One of the 16 64-bit floating-point registers
-class FPR64<bits<16> num, string n, FPR32 low>
- : SystemZRegWithSubregs<n, [low]> {
+// One of the floating-point registers.
+class FPR64<bits<16> num, string n, FPR32 high>
+ : SystemZRegWithSubregs<n, [high]> {
   let HWEncoding = num;
-  let SubRegIndices = [subreg_h32];
+  let SubRegIndices = [subreg_r32];
 }
 
 // 8 pairs of FPR64s, with a one-register gap inbetween.
@@ -161,12 +187,17 @@ class FPR128<bits<16> num, string n, FPR64 low, FPR64 high>
   let SubRegIndices = [subreg_l64, subreg_h64];
 }
 
-// Floating-point registers
+// Floating-point registers.  Registers 16-31 require the vector facility.
 foreach I = 0-15 in {
   def F#I#S : FPR32<I, "f"#I>;
   def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>,
               DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
 }
+foreach I = 16-31 in {
+  def F#I#S : FPR32<I, "v"#I>;
+  def F#I#D : FPR64<I, "v"#I, !cast<FPR32>("F"#I#"S")>,
+              DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
+}
 
 foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
   def F#I#Q  : FPR128<I, "f"#I, !cast<FPR64>("F"#!add(I, 2)#"D"),
@@ -175,10 +206,74 @@ foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
 
 // There's no store-multiple instruction for FPRs, so we're not fussy
 // about the order in which call-saved registers are allocated.
-defm FP32  : SystemZRegClass<"FP32", f32, 32, (sequence "F%uS", 0, 15)>;
-defm FP64  : SystemZRegClass<"FP64", f64, 64, (sequence "F%uD", 0, 15)>;
-defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q,
-                                                      F8Q, F9Q, F12Q, F13Q)>;
+defm FP32  : SystemZRegClass<"FP32", [f32], 32, (sequence "F%uS", 0, 15)>;
+defm FP64  : SystemZRegClass<"FP64", [f64], 64, (sequence "F%uD", 0, 15)>;
+defm FP128 : SystemZRegClass<"FP128", [f128], 128,
+                             (add F0Q, F1Q, F4Q, F5Q, F8Q, F9Q, F12Q, F13Q)>;
+
+//===----------------------------------------------------------------------===//
+// Vector registers
+//===----------------------------------------------------------------------===//
+
+// A full 128-bit vector register, with an FPR64 as its high part.
+class VR128<bits<16> num, string n, FPR64 high>
+  : SystemZRegWithSubregs<n, [high]> {
+  let HWEncoding = num;
+  let SubRegIndices = [subreg_r64];
+}
+
+// Full vector registers.
+foreach I = 0-31 in {
+  def V#I : VR128<I, "v"#I, !cast<FPR64>("F"#I#"D")>,
+            DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
+}
+
+// Class used to store 32-bit values in the first element of a vector
+// register.  f32 scalars are used for the WLEDB and WLDEB instructions.
+defm VR32 : SystemZRegClass<"VR32", [f32, v4i8, v2i16], 32,
+                            (add (sequence "F%uS", 0, 7),
+                                 (sequence "F%uS", 16, 31),
+                                 (sequence "F%uS", 8, 15))>;
+
+// Class used to store 64-bit values in the upper half of a vector register.
+// The vector facility also includes scalar f64 instructions that operate
+// on the full vector register set.
+defm VR64 : SystemZRegClass<"VR64", [f64, v8i8, v4i16, v2i32, v2f32], 64,
+                            (add (sequence "F%uD", 0, 7),
+                                 (sequence "F%uD", 16, 31),
+                                 (sequence "F%uD", 8, 15))>;
+
+// The subset of vector registers that can be used for floating-point
+// operations too.
+defm VF128 : SystemZRegClass<"VF128",
+                             [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+                             (sequence "V%u", 0, 15)>;
+
+// All vector registers.
+defm VR128 : SystemZRegClass<"VR128",
+                             [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+                             (add (sequence "V%u", 0, 7),
+                                  (sequence "V%u", 16, 31),
+                                  (sequence "V%u", 8, 15))>;
+
+// Attaches a ValueType to a register operand, to make the instruction
+// definitions easier.
+class TypedReg<ValueType vtin, RegisterOperand opin> {
+  ValueType vt = vtin;
+  RegisterOperand op = opin;
+}
+
+def v32eb   : TypedReg<f32,     VR32>;
+def v64g    : TypedReg<i64,     VR64>;
+def v64db   : TypedReg<f64,     VR64>;
+def v128b   : TypedReg<v16i8,   VR128>;
+def v128h   : TypedReg<v8i16,   VR128>;
+def v128f   : TypedReg<v4i32,   VR128>;
+def v128g   : TypedReg<v2i64,   VR128>;
+def v128q   : TypedReg<v16i8,   VR128>;
+def v128eb  : TypedReg<v4f32,   VR128>;
+def v128db  : TypedReg<v2f64,   VR128>;
+def v128any : TypedReg<untyped, VR128>;
 
 //===----------------------------------------------------------------------===//
 // Other registers
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index a3cba64..e7e0268 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -46,10 +46,10 @@ static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence,
   // number of straight-line MVCs as 6 * 256 - 1.
   if (Size > 6 * 256)
     return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src,
-                       DAG.getConstant(Size, PtrVT),
-                       DAG.getConstant(Size / 256, PtrVT));
+                       DAG.getConstant(Size, DL, PtrVT),
+                       DAG.getConstant(Size / 256, DL, PtrVT));
   return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src,
-                     DAG.getConstant(Size, PtrVT));
+                     DAG.getConstant(Size, DL, PtrVT));
 }
 
 SDValue SystemZSelectionDAGInfo::
@@ -78,7 +78,8 @@ static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
   for (unsigned I = 1; I < Size; ++I)
     StoreVal |= ByteVal << (I * 8);
   return DAG.getStore(Chain, DL,
-                      DAG.getConstant(StoreVal, MVT::getIntegerVT(Size * 8)),
+                      DAG.getConstant(StoreVal, DL,
+                                      MVT::getIntegerVT(Size * 8)),
                       Dst, DstPtrInfo, false, false, Align);
 }
 
@@ -103,7 +104,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
       // we can move at most 2 halfwords.
       uint64_t ByteVal = CByte->getZExtValue();
       if (ByteVal == 0 || ByteVal == 255 ?
-          Bytes <= 16 && CountPopulation_64(Bytes) <= 2 :
+          Bytes <= 16 && countPopulation(Bytes) <= 2 :
           Bytes <= 4) {
         unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes);
         unsigned Size2 = Bytes - Size1;
@@ -112,7 +113,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
         if (Size2 == 0)
           return Chain1;
         Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
-                          DAG.getConstant(Size1, PtrVT));
+                          DAG.getConstant(Size1, DL, PtrVT));
         DstPtrInfo = DstPtrInfo.getWithOffset(Size1);
         SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2,
                                      std::min(Align, Size1), DstPtrInfo);
@@ -126,7 +127,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
         if (Bytes == 1)
           return Chain1;
         SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
-                                   DAG.getConstant(1, PtrVT));
+                                   DAG.getConstant(1, DL, PtrVT));
         SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2,
                                       DstPtrInfo.getWithOffset(1),
                                       false, false, 1);
@@ -146,7 +147,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
     Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
                          false, false, Align);
     SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
-                                   DAG.getConstant(1, PtrVT));
+                                   DAG.getConstant(1, DL, PtrVT));
     return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
                       Chain, DstPlus1, Dst, Bytes - 1);
   }
@@ -169,10 +170,10 @@ static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
   // needs 2 branches, whereas a straight-line sequence would need 3 or more.
   if (Size > 3 * 256)
     return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2,
-                       DAG.getConstant(Size, PtrVT),
-                       DAG.getConstant(Size / 256, PtrVT));
+                       DAG.getConstant(Size, DL, PtrVT),
+                       DAG.getConstant(Size / 256, DL, PtrVT));
   return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2,
-                     DAG.getConstant(Size, PtrVT));
+                     DAG.getConstant(Size, DL, PtrVT));
 }
 
 // Convert the current CC value into an integer that is 0 if CC == 0,
@@ -182,9 +183,9 @@ static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
 static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) {
   SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
   SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
-                            DAG.getConstant(SystemZ::IPM_CC, MVT::i32));
+                            DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
   SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL,
-                             DAG.getConstant(31, MVT::i32));
+                             DAG.getConstant(31, DL, MVT::i32));
   return ROTL;
 }
 
@@ -213,7 +214,7 @@ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
   Length = DAG.getZExtOrTrunc(Length, DL, PtrVT);
   Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32);
   Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char,
-                     DAG.getConstant(255, MVT::i32));
+                     DAG.getConstant(255, DL, MVT::i32));
   SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length);
   SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
                             Limit, Src, Char);
@@ -222,12 +223,10 @@ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
 
   // Now select between End and null, depending on whether the character
   // was found.
-  SmallVector<SDValue, 5> Ops;
-  Ops.push_back(End);
-  Ops.push_back(DAG.getConstant(0, PtrVT));
-  Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32));
-  Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32));
-  Ops.push_back(Glue);
+  SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT),
+                   DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32),
+                   DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32),
+                   Glue};
   VTs = DAG.getVTList(PtrVT, MVT::Glue);
   End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
   return std::make_pair(End, Chain);
@@ -240,7 +239,7 @@ EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
                         MachinePointerInfo SrcPtrInfo, bool isStpcpy) const {
   SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other);
   SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src,
-                                DAG.getConstant(0, MVT::i32));
+                                DAG.getConstant(0, DL, MVT::i32));
   return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1));
 }
 
@@ -251,7 +250,7 @@ EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
                         MachinePointerInfo Op2PtrInfo) const {
   SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue);
   SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2,
-                               DAG.getConstant(0, MVT::i32));
+                               DAG.getConstant(0, DL, MVT::i32));
   Chain = Unused.getValue(1);
   SDValue Glue = Chain.getValue(2);
   return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
@@ -268,7 +267,7 @@ static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL,
   EVT PtrVT = Src.getValueType();
   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
   SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
-                            Limit, Src, DAG.getConstant(0, MVT::i32));
+                            Limit, Src, DAG.getConstant(0, DL, MVT::i32));
   Chain = End.getValue(1);
   SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src);
   return std::make_pair(Len, Chain);
@@ -278,7 +277,7 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
 EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
                         SDValue Src, MachinePointerInfo SrcPtrInfo) const {
   EVT PtrVT = Src.getValueType();
-  return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, PtrVT));
+  return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, DL, PtrVT));
 }
 
 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index ec7a8c4..d1a17c5 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -15,6 +15,7 @@
 
 #include "SystemZTargetMachine.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 
 using namespace llvm;
 
@@ -36,6 +37,10 @@ public:
 private:
   bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther,
                   unsigned LLIxL, unsigned LLIxH);
+  bool shortenOn0(MachineInstr &MI, unsigned Opcode);
+  bool shortenOn01(MachineInstr &MI, unsigned Opcode);
+  bool shortenOn001(MachineInstr &MI, unsigned Opcode);
+  bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
 
   const SystemZInstrInfo *TII;
 
@@ -97,6 +102,64 @@ bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap,
   return false;
 }
 
+// Change MI's opcode to Opcode if register operand 0 has a 4-bit encoding.
+bool SystemZShortenInst::shortenOn0(MachineInstr &MI, unsigned Opcode) {
+  if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16) {
+    MI.setDesc(TII->get(Opcode));
+    return true;
+  }
+  return false;
+}
+
+// Change MI's opcode to Opcode if register operands 0 and 1 have a
+// 4-bit encoding.
+bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) {
+  if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
+      SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
+    MI.setDesc(TII->get(Opcode));
+    return true;
+  }
+  return false;
+}
+
+// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a
+// 4-bit encoding and if operands 0 and 1 are tied.
+bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) {
+  if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
+      MI.getOperand(1).getReg() == MI.getOperand(0).getReg() &&
+      SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) {
+    MI.setDesc(TII->get(Opcode));
+    return true;
+  }
+  return false;
+}
+
+// MI is a vector-style conversion instruction with the operand order:
+// destination, source, exact-suppress, rounding-mode.  If both registers
+// have a 4-bit encoding then change it to Opcode, which has operand order:
+// destination, rouding-mode, source, exact-suppress.
+bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
+  if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
+      SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
+    MachineOperand Dest(MI.getOperand(0));
+    MachineOperand Src(MI.getOperand(1));
+    MachineOperand Suppress(MI.getOperand(2));
+    MachineOperand Mode(MI.getOperand(3));
+    MI.RemoveOperand(3);
+    MI.RemoveOperand(2);
+    MI.RemoveOperand(1);
+    MI.RemoveOperand(0);
+    MI.setDesc(TII->get(Opcode));
+    MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
+      .addOperand(Dest)
+      .addOperand(Mode)
+      .addOperand(Src)
+      .addOperand(Suppress);
+    return true;
+  }
+  return false;
+}
+
 // Process all instructions in MBB.  Return true if something changed.
 bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
@@ -117,13 +180,83 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
   // Iterate backwards through the block looking for instructions to change.
   for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
     MachineInstr &MI = *MBBI;
-    unsigned Opcode = MI.getOpcode();
-    if (Opcode == SystemZ::IILF)
+    switch (MI.getOpcode()) {
+    case SystemZ::IILF:
       Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL,
                             SystemZ::LLILH);
-    else if (Opcode == SystemZ::IIHF)
+      break;
+
+    case SystemZ::IIHF:
       Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL,
                             SystemZ::LLIHH);
+      break;
+
+    case SystemZ::WFADB:
+      Changed |= shortenOn001(MI, SystemZ::ADBR);
+      break;
+
+    case SystemZ::WFDDB:
+      Changed |= shortenOn001(MI, SystemZ::DDBR);
+      break;
+
+    case SystemZ::WFIDB:
+      Changed |= shortenFPConv(MI, SystemZ::FIDBRA);
+      break;
+
+    case SystemZ::WLDEB:
+      Changed |= shortenOn01(MI, SystemZ::LDEBR);
+      break;
+
+    case SystemZ::WLEDB:
+      Changed |= shortenFPConv(MI, SystemZ::LEDBRA);
+      break;
+
+    case SystemZ::WFMDB:
+      Changed |= shortenOn001(MI, SystemZ::MDBR);
+      break;
+
+    case SystemZ::WFLCDB:
+      Changed |= shortenOn01(MI, SystemZ::LCDBR);
+      break;
+
+    case SystemZ::WFLNDB:
+      Changed |= shortenOn01(MI, SystemZ::LNDBR);
+      break;
+
+    case SystemZ::WFLPDB:
+      Changed |= shortenOn01(MI, SystemZ::LPDBR);
+      break;
+
+    case SystemZ::WFSQDB:
+      Changed |= shortenOn01(MI, SystemZ::SQDBR);
+      break;
+
+    case SystemZ::WFSDB:
+      Changed |= shortenOn001(MI, SystemZ::SDBR);
+      break;
+
+    case SystemZ::WFCDB:
+      Changed |= shortenOn01(MI, SystemZ::CDBR);
+      break;
+
+    case SystemZ::VL32:
+      // For z13 we prefer LDE over LE to avoid partial register dependencies.
+      Changed |= shortenOn0(MI, SystemZ::LDE32);
+      break;
+
+    case SystemZ::VST32:
+      Changed |= shortenOn0(MI, SystemZ::STE);
+      break;
+
+    case SystemZ::VL64:
+      Changed |= shortenOn0(MI, SystemZ::LD);
+      break;
+
+    case SystemZ::VST64:
+      Changed |= shortenOn0(MI, SystemZ::STD);
+      break;
+    }
+
     unsigned UsedLow = 0;
     unsigned UsedHigh = 0;
     for (auto MOI = MI.operands_begin(), MOE = MI.operands_end();
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index e160bc8..05aede3 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -10,7 +10,6 @@
 #include "SystemZSubtarget.h"
 #include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "llvm/IR/GlobalValue.h"
-#include "llvm/Support/Host.h"
 
 using namespace llvm;
 
@@ -28,10 +27,6 @@ SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
   std::string CPUName = CPU;
   if (CPUName.empty())
     CPUName = "generic";
-#if defined(__linux__) && defined(__s390x__)
-  if (CPUName == "generic")
-    CPUName = sys::getHostCPUName();
-#endif
   // Parse features string.
   ParseSubtargetFeatures(CPUName, FS);
   return *this;
@@ -43,14 +38,12 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT,
                                    const TargetMachine &TM)
     : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
       HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
-      HasFastSerialization(false), HasInterlockedAccess1(false),
-      TargetTriple(TT),
-      // Make sure that global data has at least 16 bits of alignment by
-      // default, so that we can refer to it using LARL.  We don't have any
-      // special requirements for stack variables though.
-      DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"),
-      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
-      TSInfo(DL), FrameLowering() {}
+      HasPopulationCount(false), HasFastSerialization(false),
+      HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
+      HasTransactionalExecution(false), HasProcessorAssist(false),
+      HasVector(false),
+      TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+      TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {}
 
 // Return true if GV binds locally under reloc model RM.
 static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) {
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index f881552..9a1f593 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -38,12 +38,16 @@ protected:
   bool HasLoadStoreOnCond;
   bool HasHighWord;
   bool HasFPExtension;
+  bool HasPopulationCount;
   bool HasFastSerialization;
   bool HasInterlockedAccess1;
+  bool HasMiscellaneousExtensions;
+  bool HasTransactionalExecution;
+  bool HasProcessorAssist;
+  bool HasVector;
 
 private:
   Triple TargetTriple;
-  const DataLayout DL;
   SystemZInstrInfo InstrInfo;
   SystemZTargetLowering TLInfo;
   SystemZSelectionDAGInfo TSInfo;
@@ -59,7 +63,6 @@ public:
     return &FrameLowering;
   }
   const SystemZInstrInfo *getInstrInfo() const override { return &InstrInfo; }
-  const DataLayout *getDataLayout() const override { return &DL; }
   const SystemZRegisterInfo *getRegisterInfo() const override {
     return &InstrInfo.getRegisterInfo();
   }
@@ -88,12 +91,29 @@ public:
   // Return true if the target has the floating-point extension facility.
   bool hasFPExtension() const { return HasFPExtension; }
 
+  // Return true if the target has the population-count facility.
+  bool hasPopulationCount() const { return HasPopulationCount; }
+
   // Return true if the target has the fast-serialization facility.
   bool hasFastSerialization() const { return HasFastSerialization; }
 
   // Return true if the target has interlocked-access facility 1.
   bool hasInterlockedAccess1() const { return HasInterlockedAccess1; }
 
+  // Return true if the target has the miscellaneous-extensions facility.
+  bool hasMiscellaneousExtensions() const {
+    return HasMiscellaneousExtensions;
+  }
+
+  // Return true if the target has the transactional-execution facility.
+  bool hasTransactionalExecution() const { return HasTransactionalExecution; }
+
+  // Return true if the target has the processor-assist facility.
+  bool hasProcessorAssist() const { return HasProcessorAssist; }
+
+  // Return true if the target has the vector facility.
+  bool hasVector() const { return HasVector; }
+
   // Return true if GV can be accessed using LARL for reloc model RM
   // and code model CM.
   bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index a210074..a34cdaf 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SystemZTargetMachine.h"
+#include "SystemZTargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Transforms/Scalar.h"
@@ -20,12 +21,71 @@ extern "C" void LLVMInitializeSystemZTarget() {
   RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
 }
 
+// Determine whether we use the vector ABI.
+static bool UsesVectorABI(StringRef CPU, StringRef FS) {
+  // We use the vector ABI whenever the vector facility is avaiable.
+  // This is the case by default if CPU is z13 or later, and can be
+  // overridden via "[+-]vector" feature string elements.
+  bool VectorABI = true;
+  if (CPU.empty() || CPU == "generic" ||
+      CPU == "z10" || CPU == "z196" || CPU == "zEC12")
+    VectorABI = false;
+
+  SmallVector<StringRef, 3> Features;
+  FS.split(Features, ",", -1, false /* KeepEmpty */);
+  for (auto &Feature : Features) {
+    if (Feature == "vector" || Feature == "+vector")
+      VectorABI = true;
+    if (Feature == "-vector")
+      VectorABI = false;
+  }
+
+  return VectorABI;
+}
+
+static std::string computeDataLayout(StringRef TT, StringRef CPU,
+                                     StringRef FS) {
+  const Triple Triple(TT);
+  bool VectorABI = UsesVectorABI(CPU, FS);
+  std::string Ret = "";
+
+  // Big endian.
+  Ret += "E";
+
+  // Data mangling.
+  Ret += DataLayout::getManglingComponent(Triple);
+
+  // Make sure that global data has at least 16 bits of alignment by
+  // default, so that we can refer to it using LARL.  We don't have any
+  // special requirements for stack variables though.
+  Ret += "-i1:8:16-i8:8:16";
+
+  // 64-bit integers are naturally aligned.
+  Ret += "-i64:64";
+
+  // 128-bit floats are aligned only to 64 bits.
+  Ret += "-f128:64";
+
+  // When using the vector ABI, 128-bit vectors are also aligned to 64 bits.
+  if (VectorABI)
+    Ret += "-v128:64";
+
+  // We prefer 16 bits of aligned for all globals; see above.
+  Ret += "-a:8:16";
+
+  // Integer registers are 32 or 64 bits.
+  Ret += "-n32:64";
+
+  return Ret;
+}
+
 SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
                                            StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    : LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS),
+                        TT, CPU, FS, Options, RM, CM, OL),
       TLOF(make_unique<TargetLoweringObjectFileELF>()),
       Subtarget(TT, CPU, FS, *this) {
   initAsmInfo();
@@ -57,6 +117,10 @@ void SystemZPassConfig::addIRPasses() {
 
 bool SystemZPassConfig::addInstSelector() {
   addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
+
+ if (getOptLevel() != CodeGenOpt::None)
+    addPass(createSystemZLDCleanupPass(getSystemZTargetMachine()));
+
   return false;
 }
 
@@ -100,3 +164,9 @@ void SystemZPassConfig::addPreEmitPass() {
 TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new SystemZPassConfig(this, PM);
 }
+
+TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis([this](Function &F) {
+    return TargetTransformInfo(SystemZTTIImpl(this, F));
+  });
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
index 9fae5e4..5ded07c 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -33,12 +33,13 @@ public:
                        CodeGenOpt::Level OL);
   ~SystemZTargetMachine() override;
 
-  // Override TargetMachine.
-  const SystemZSubtarget *getSubtargetImpl() const override {
+  const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
+  const SystemZSubtarget *getSubtargetImpl(const Function &) const override {
     return &Subtarget;
   }
   // Override LLVMTargetMachine
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+  TargetIRAnalysis getTargetIRAnalysis() override;
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF.get();
   }
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
new file mode 100644
index 0000000..5a87df1
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -0,0 +1,258 @@
+//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a TargetTransformInfo analysis pass specific to the
+// SystemZ target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "systemztti"
+
+//===----------------------------------------------------------------------===//
+//
+// SystemZ cost model.
+//
+//===----------------------------------------------------------------------===//
+
+unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
+  // here, so that constant hoisting will ignore this constant.
+  if (BitSize == 0)
+    return TTI::TCC_Free;
+  // No cost model for operations on integers larger than 64 bit implemented yet.
+  if (BitSize > 64)
+    return TTI::TCC_Free;
+
+  if (Imm == 0)
+    return TTI::TCC_Free;
+
+  if (Imm.getBitWidth() <= 64) {
+    // Constants loaded via lgfi.
+    if (isInt<32>(Imm.getSExtValue()))
+      return TTI::TCC_Basic;
+    // Constants loaded via llilf.
+    if (isUInt<32>(Imm.getZExtValue()))
+      return TTI::TCC_Basic;
+    // Constants loaded via llihf:
+    if ((Imm.getZExtValue() & 0xffffffff) == 0)
+      return TTI::TCC_Basic;
+
+    return 2 * TTI::TCC_Basic;
+  }
+
+  return 4 * TTI::TCC_Basic;
+}
+
+unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+                                       const APInt &Imm, Type *Ty) {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
+  // here, so that constant hoisting will ignore this constant.
+  if (BitSize == 0)
+    return TTI::TCC_Free;
+  // No cost model for operations on integers larger than 64 bit implemented yet.
+  if (BitSize > 64)
+    return TTI::TCC_Free;
+
+  switch (Opcode) {
+  default:
+    return TTI::TCC_Free;
+  case Instruction::GetElementPtr:
+    // Always hoist the base address of a GetElementPtr. This prevents the
+    // creation of new constants for every base constant that gets constant
+    // folded with the offset.
+    if (Idx == 0)
+      return 2 * TTI::TCC_Basic;
+    return TTI::TCC_Free;
+  case Instruction::Store:
+    if (Idx == 0 && Imm.getBitWidth() <= 64) {
+      // Any 8-bit immediate store can by implemented via mvi.
+      if (BitSize == 8)
+        return TTI::TCC_Free;
+      // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi.
+      if (isInt<16>(Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::ICmp:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // Comparisons against signed 32-bit immediates implemented via cgfi.
+      if (isInt<32>(Imm.getSExtValue()))
+        return TTI::TCC_Free;
+      // Comparisons against unsigned 32-bit immediates implemented via clgfi.
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Add:
+  case Instruction::Sub:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // We use algfi/slgfi to add/subtract 32-bit unsigned immediates.
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      // Or their negation, by swapping addition vs. subtraction.
+      if (isUInt<32>(-Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Mul:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // We use msgfi to multiply by 32-bit signed immediates.
+      if (isInt<32>(Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Or:
+  case Instruction::Xor:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // Masks supported by oilf/xilf.
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      // Masks supported by oihf/xihf.
+      if ((Imm.getZExtValue() & 0xffffffff) == 0)
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::And:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // Any 32-bit AND operation can by implemented via nilf.
+      if (BitSize <= 32)
+        return TTI::TCC_Free;
+      // 64-bit masks supported by nilf.
+      if (isUInt<32>(~Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      // 64-bit masks supported by nilh.
+      if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
+        return TTI::TCC_Free;
+      // Some 64-bit AND operations can be implemented via risbg.
+      const SystemZInstrInfo *TII = ST->getInstrInfo();
+      unsigned Start, End;
+      if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    // Always return TCC_Free for the shift value of a shift instruction.
+    if (Idx == 1)
+      return TTI::TCC_Free;
+    break;
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::IntToPtr:
+  case Instruction::PtrToInt:
+  case Instruction::BitCast:
+  case Instruction::PHI:
+  case Instruction::Call:
+  case Instruction::Select:
+  case Instruction::Ret:
+  case Instruction::Load:
+    break;
+  }
+
+  return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                       const APInt &Imm, Type *Ty) {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
+  // here, so that constant hoisting will ignore this constant.
+  if (BitSize == 0)
+    return TTI::TCC_Free;
+  // No cost model for operations on integers larger than 64 bit implemented yet.
+  if (BitSize > 64)
+    return TTI::TCC_Free;
+
+  switch (IID) {
+  default:
+    return TTI::TCC_Free;
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+  case Intrinsic::usub_with_overflow:
+    // These get expanded to include a normal addition/subtraction.
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      if (isUInt<32>(-Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Intrinsic::smul_with_overflow:
+  case Intrinsic::umul_with_overflow:
+    // These get expanded to include a normal multiplication.
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      if (isInt<32>(Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Intrinsic::experimental_stackmap:
+    if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+      return TTI::TCC_Free;
+    break;
+  case Intrinsic::experimental_patchpoint_void:
+  case Intrinsic::experimental_patchpoint_i64:
+    if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+      return TTI::TCC_Free;
+    break;
+  }
+  return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+TargetTransformInfo::PopcntSupportKind
+SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
+  assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2");
+  if (ST->hasPopulationCount() && TyWidth <= 64)
+    return TTI::PSK_FastHardware;
+  return TTI::PSK_Software;
+}
+
+unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
+  if (!Vector)
+    // Discount the stack pointer.  Also leave out %r0, since it can't
+    // be used in an address.
+    return 14;
+  if (ST->hasVector())
+    return 32;
+  return 0;
+}
+
+unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) {
+  if (!Vector)
+    return 64;
+  if (ST->hasVector())
+    return 128;
+  return 0;
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
new file mode 100644
index 0000000..e9cabe9
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -0,0 +1,78 @@
+//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
+
+#include "SystemZTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+
+namespace llvm {
+
+class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
+  typedef BasicTTIImplBase<SystemZTTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const SystemZSubtarget *ST;
+  const SystemZTargetLowering *TLI;
+
+  const SystemZSubtarget *getST() const { return ST; }
+  const SystemZTargetLowering *getTLI() const { return TLI; }
+
+public:
+  explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F)
+    : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  SystemZTTIImpl(const SystemZTTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+  SystemZTTIImpl(SystemZTTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
+  SystemZTTIImpl &operator=(const SystemZTTIImpl &RHS) {
+    BaseT::operator=(static_cast<const BaseT &>(RHS));
+    ST = RHS.ST;
+    TLI = RHS.TLI;
+    return *this;
+  }
+  SystemZTTIImpl &operator=(SystemZTTIImpl &&RHS) {
+    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+    ST = std::move(RHS.ST);
+    TLI = std::move(RHS.TLI);
+    return *this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+
+  unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+
+  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                         Type *Ty);
+  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                         Type *Ty);
+
+  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
+  /// @}
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  unsigned getNumberOfRegisters(bool Vector);
+  unsigned getRegisterBitWidth(bool Vector);
+
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp
index 4b51b3f..1b74e8c 100644
--- a/contrib/llvm/lib/Target/Target.cpp
+++ b/contrib/llvm/lib/Target/Target.cpp
@@ -18,24 +18,24 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Value.h"
 #include "llvm/InitializePasses.h"
-#include "llvm/PassManager.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include <cstring>
 
 using namespace llvm;
 
-inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
-  return reinterpret_cast<TargetLibraryInfo*>(P);
+inline TargetLibraryInfoImpl *unwrap(LLVMTargetLibraryInfoRef P) {
+  return reinterpret_cast<TargetLibraryInfoImpl*>(P);
 }
 
-inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
-  TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
+inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfoImpl *P) {
+  TargetLibraryInfoImpl *X = const_cast<TargetLibraryInfoImpl*>(P);
   return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
 }
 
 void llvm::initializeTarget(PassRegistry &Registry) {
-  initializeDataLayoutPassPass(Registry);
-  initializeTargetLibraryInfoPass(Registry);
+  initializeTargetLibraryInfoWrapperPassPass(Registry);
+  initializeTargetTransformInfoWrapperPassPass(Registry);
 }
 
 void LLVMInitializeTarget(LLVMPassRegistryRef R) {
@@ -47,14 +47,11 @@ LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
 }
 
 void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
-  // The DataLayoutPass must now be in sync with the module. Unfortunatelly we
-  // cannot enforce that from the C api.
-  unwrap(PM)->add(new DataLayoutPass());
 }
 
 void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI,
                               LLVMPassManagerRef PM) {
-  unwrap(PM)->add(new TargetLibraryInfo(*unwrap(TLI)));
+  unwrap(PM)->add(new TargetLibraryInfoWrapperPass(*unwrap(TLI)));
 }
 
 char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) {
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
index f343473..a184b92 100644
--- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -43,7 +43,7 @@ using namespace llvm;
 void TargetLoweringObjectFile::Initialize(MCContext &ctx,
                                           const TargetMachine &TM) {
   Ctx = &ctx;
-  DL = TM.getSubtargetImpl()->getDataLayout();
+  DL = TM.getDataLayout();
   InitMCObjectFileInfo(TM.getTargetTriple(),
                        TM.getRelocationModel(), TM.getCodeModel(), *Ctx);
 }
@@ -110,7 +110,7 @@ MCSymbol *TargetLoweringObjectFile::getSymbolWithGlobalValueBase(
   NameStr += DL->getPrivateGlobalPrefix();
   TM.getNameWithPrefix(NameStr, GV, Mang);
   NameStr.append(Suffix.begin(), Suffix.end());
-  return Ctx->GetOrCreateSymbol(NameStr.str());
+  return Ctx->getOrCreateSymbol(NameStr);
 }
 
 MCSymbol *TargetLoweringObjectFile::getCFIPersonalitySymbol(
@@ -200,12 +200,12 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
       // Otherwise, just drop it into a mergable constant section.  If we have
       // a section for this size, use it, otherwise use the arbitrary sized
       // mergable section.
-      switch (TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
-          C->getType())) {
+      switch (TM.getDataLayout()->getTypeAllocSize(C->getType())) {
       case 4:  return SectionKind::getMergeableConst4();
       case 8:  return SectionKind::getMergeableConst8();
       case 16: return SectionKind::getMergeableConst16();
-      default: return SectionKind::getMergeableConst();
+      default:
+        return SectionKind::getReadOnly();
       }
 
     case Constant::LocalRelocation:
@@ -255,12 +255,13 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
   llvm_unreachable("Invalid relocation");
 }
 
-/// SectionForGlobal - This method computes the appropriate section to emit
-/// the specified global variable or function definition.  This should not
-/// be passed external (or available externally) globals.
-const MCSection *TargetLoweringObjectFile::
-SectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-                 const TargetMachine &TM) const {
+/// This method computes the appropriate section to emit the specified global
+/// variable or function definition.  This should not be passed external (or
+/// available externally) globals.
+MCSection *
+TargetLoweringObjectFile::SectionForGlobal(const GlobalValue *GV,
+                                           SectionKind Kind, Mangler &Mang,
+                                           const TargetMachine &TM) const {
   // Select section name.
   if (GV->hasSection())
     return getExplicitSectionGlobal(GV, Kind, Mang, TM);
@@ -270,10 +271,32 @@ SectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
   return SelectSectionForGlobal(GV, Kind, Mang, TM);
 }
 
-/// getSectionForConstant - Given a mergable constant with the
-/// specified size and relocation information, return a section that it
-/// should be placed in.
-const MCSection *
+MCSection *TargetLoweringObjectFile::getSectionForJumpTable(
+    const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+  return getSectionForConstant(SectionKind::getReadOnly(), /*C=*/nullptr);
+}
+
+bool TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
+    bool UsesLabelDifference, const Function &F) const {
+  // In PIC mode, we need to emit the jump table to the same section as the
+  // function body itself, otherwise the label differences won't make sense.
+  // FIXME: Need a better predicate for this: what about custom entries?
+  if (UsesLabelDifference)
+    return true;
+
+  // We should also do if the section name is NULL or function is declared
+  // in discardable section
+  // FIXME: this isn't the right predicate, should be based on the MCSection
+  // for the function.
+  if (F.isWeakForLinker())
+    return true;
+
+  return false;
+}
+
+/// Given a mergable constant with the specified size and relocation
+/// information, return a section that it should be placed in.
+MCSection *
 TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind,
                                                 const Constant *C) const {
   if (Kind.isReadOnly() && ReadOnlySection != nullptr)
@@ -307,7 +330,7 @@ getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
   case dwarf::DW_EH_PE_pcrel: {
     // Emit a label to the streamer for the current position.  This gives us
     // .-foo addressing.
-    MCSymbol *PCSym = getContext().CreateTempSymbol();
+    MCSymbol *PCSym = getContext().createTempSymbol();
     Streamer.EmitLabel(PCSym);
     const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
     return MCBinaryExpr::CreateSub(Sym, PC, getContext());
@@ -320,3 +343,9 @@ const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol
   // null return could mean 'no location' & we should just do that here.
   return MCSymbolRefExpr::Create(Sym, *Ctx);
 }
+
+void TargetLoweringObjectFile::getNameWithPrefix(
+    SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+    bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const {
+  Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
index b3ff001..2824250 100644
--- a/contrib/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalAlias.h"
@@ -21,9 +22,11 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/MC/SectionKind.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
@@ -34,39 +37,40 @@ using namespace llvm;
 // TargetMachine Class
 //
 
-TargetMachine::TargetMachine(const Target &T,
+TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
                              StringRef TT, StringRef CPU, StringRef FS,
                              const TargetOptions &Options)
-  : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS),
-    CodeGenInfo(nullptr), AsmInfo(nullptr),
-    RequireStructuredCFG(false),
-    Options(Options) {
-}
+    : TheTarget(T), DL(DataLayoutString), TargetTriple(TT), TargetCPU(CPU),
+      TargetFS(FS), CodeGenInfo(nullptr), AsmInfo(nullptr), MRI(nullptr),
+      MII(nullptr), STI(nullptr), RequireStructuredCFG(false),
+      Options(Options) {}
 
 TargetMachine::~TargetMachine() {
   delete CodeGenInfo;
   delete AsmInfo;
+  delete MRI;
+  delete MII;
+  delete STI;
 }
 
 /// \brief Reset the target options based on the function's attributes.
+// FIXME: This function needs to go away for a number of reasons:
+// a) global state on the TargetMachine is terrible in general,
+// b) there's no default state here to keep,
+// c) these target options should be passed only on the function
+//    and not on the TargetMachine (via TargetOptions) at all.
 void TargetMachine::resetTargetOptions(const Function &F) const {
 #define RESET_OPTION(X, Y)                                                     \
   do {                                                                         \
-    if (F.hasFnAttribute(Y))                                                  \
-      Options.X = (F.getAttributes()                                          \
-                       .getAttribute(AttributeSet::FunctionIndex, Y)           \
-                       .getValueAsString() == "true");                         \
+    if (F.hasFnAttribute(Y))                                                   \
+      Options.X = (F.getFnAttribute(Y).getValueAsString() == "true");          \
   } while (0)
 
-  RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim");
   RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad");
   RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
   RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
   RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
-  RESET_OPTION(UseSoftFloat, "use-soft-float");
   RESET_OPTION(DisableTailCalls, "disable-tail-calls");
-
-  Options.MCOptions.SanitizeAddress = F.hasFnAttribute(Attribute::SanitizeAddress);
 }
 
 /// getRelocationModel - Returns the code generation relocation model. The
@@ -146,28 +150,9 @@ void TargetMachine::setOptLevel(CodeGenOpt::Level Level) const {
     CodeGenInfo->setOptLevel(Level);
 }
 
-bool TargetMachine::getAsmVerbosityDefault() const {
-  return Options.MCOptions.AsmVerbose;
-}
-
-void TargetMachine::setAsmVerbosityDefault(bool V) {
-  Options.MCOptions.AsmVerbose = V;
-}
-
-bool TargetMachine::getFunctionSections() const {
-  return Options.FunctionSections;
-}
-
-bool TargetMachine::getDataSections() const {
-  return Options.DataSections;
-}
-
-void TargetMachine::setFunctionSections(bool V) {
-  Options.FunctionSections = V;
-}
-
-void TargetMachine::setDataSections(bool V) {
-  Options.DataSections = V;
+TargetIRAnalysis TargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis(
+      [this](Function &) { return TargetTransformInfo(getDataLayout()); });
 }
 
 static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
@@ -193,17 +178,15 @@ void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
     return;
   }
   SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, *this);
-  const TargetLoweringObjectFile &TLOF =
-      getSubtargetImpl()->getTargetLowering()->getObjFileLowering();
-  const MCSection *TheSection = TLOF.SectionForGlobal(GV, GVKind, Mang, *this);
+  const TargetLoweringObjectFile *TLOF = getObjFileLowering();
+  const MCSection *TheSection = TLOF->SectionForGlobal(GV, GVKind, Mang, *this);
   bool CannotUsePrivateLabel = !canUsePrivateLabel(*AsmInfo, *TheSection);
-  Mang.getNameWithPrefix(Name, GV, CannotUsePrivateLabel);
+  TLOF->getNameWithPrefix(Name, GV, CannotUsePrivateLabel, Mang, *this);
 }
 
 MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const {
-  SmallString<60> NameStr;
+  SmallString<128> NameStr;
   getNameWithPrefix(NameStr, GV, Mang);
-  const TargetLoweringObjectFile &TLOF =
-      getSubtargetImpl()->getTargetLowering()->getObjFileLowering();
-  return TLOF.getContext().GetOrCreateSymbol(NameStr.str());
+  const TargetLoweringObjectFile *TLOF = getObjFileLowering();
+  return TLOF->getContext().getOrCreateSymbol(NameStr);
 }
diff --git a/contrib/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm/lib/Target/TargetMachineC.cpp
index b3e07df..623b3e8 100644
--- a/contrib/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm/lib/Target/TargetMachineC.cpp
@@ -14,9 +14,10 @@
 #include "llvm-c/TargetMachine.h"
 #include "llvm-c/Core.h"
 #include "llvm-c/Target.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Module.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FormattedStream.h"
@@ -46,11 +47,11 @@ inline LLVMTargetRef wrap(const Target * P) {
 }
 
 LLVMTargetRef LLVMGetFirstTarget() {
-  if(TargetRegistry::begin() == TargetRegistry::end()) {
+  if (TargetRegistry::targets().begin() == TargetRegistry::targets().end()) {
     return nullptr;
   }
 
-  const Target* target = &*TargetRegistry::begin();
+  const Target *target = &*TargetRegistry::targets().begin();
   return wrap(target);
 }
 LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) {
@@ -59,13 +60,10 @@ LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) {
 
 LLVMTargetRef LLVMGetTargetFromName(const char *Name) {
   StringRef NameRef = Name;
-  for (TargetRegistry::iterator IT = TargetRegistry::begin(),
-                                IE = TargetRegistry::end(); IT != IE; ++IT) {
-    if (IT->getName() == NameRef)
-      return wrap(&*IT);
-  }
-  
-  return nullptr;
+  auto I = std::find_if(
+      TargetRegistry::targets().begin(), TargetRegistry::targets().end(),
+      [&](const Target &T) { return T.getName() == NameRef; });
+  return I != TargetRegistry::targets().end() ? wrap(&*I) : nullptr;
 }
 
 LLVMBool LLVMGetTargetFromTriple(const char* TripleStr, LLVMTargetRef *T,
@@ -173,32 +171,33 @@ char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) {
 }
 
 LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
-  return wrap(unwrap(T)->getSubtargetImpl()->getDataLayout());
+  return wrap(unwrap(T)->getDataLayout());
 }
 
 void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T,
                                       LLVMBool VerboseAsm) {
-  unwrap(T)->setAsmVerbosityDefault(VerboseAsm);
+  unwrap(T)->Options.MCOptions.AsmVerbose = VerboseAsm;
 }
 
 static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
-  formatted_raw_ostream &OS, LLVMCodeGenFileType codegen, char **ErrorMessage) {
+                                      raw_pwrite_stream &OS,
+                                      LLVMCodeGenFileType codegen,
+                                      char **ErrorMessage) {
   TargetMachine* TM = unwrap(T);
   Module* Mod = unwrap(M);
 
-  PassManager pass;
+  legacy::PassManager pass;
 
   std::string error;
 
-  const DataLayout *td = TM->getSubtargetImpl()->getDataLayout();
+  const DataLayout *td = TM->getDataLayout();
 
   if (!td) {
     error = "No DataLayout in TargetMachine";
     *ErrorMessage = strdup(error.c_str());
     return true;
   }
-  Mod->setDataLayout(td);
-  pass.add(new DataLayoutPass());
+  Mod->setDataLayout(*td);
 
   TargetMachine::CodeGenFileType ft;
   switch (codegen) {
@@ -229,8 +228,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
     *ErrorMessage = strdup(EC.message().c_str());
     return true;
   }
-  formatted_raw_ostream destf(dest);
-  bool Result = LLVMTargetMachineEmit(T, M, destf, codegen, ErrorMessage);
+  bool Result = LLVMTargetMachineEmit(T, M, dest, codegen, ErrorMessage);
   dest.flush();
   return Result;
 }
@@ -238,15 +236,14 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
 LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T,
   LLVMModuleRef M, LLVMCodeGenFileType codegen, char** ErrorMessage,
   LLVMMemoryBufferRef *OutMemBuf) {
-  std::string CodeString;
-  raw_string_ostream OStream(CodeString);
-  formatted_raw_ostream Out(OStream);
-  bool Result = LLVMTargetMachineEmit(T, M, Out, codegen, ErrorMessage);
+  SmallString<0> CodeString;
+  raw_svector_ostream OStream(CodeString);
+  bool Result = LLVMTargetMachineEmit(T, M, OStream, codegen, ErrorMessage);
   OStream.flush();
 
-  std::string &Data = OStream.str();
-  *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.c_str(),
-                                                     Data.length(), "");
+  StringRef Data = OStream.str();
+  *OutMemBuf =
+      LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.data(), Data.size(), "");
   return Result;
 }
 
@@ -255,5 +252,6 @@ char *LLVMGetDefaultTargetTriple(void) {
 }
 
 void LLVMAddAnalysisPasses(LLVMTargetMachineRef T, LLVMPassManagerRef PM) {
-  unwrap(T)->addAnalysisPasses(*unwrap(PM));
+  unwrap(PM)->add(
+      createTargetTransformInfoWrapperPass(unwrap(T)->getTargetIRAnalysis()));
 }
diff --git a/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp b/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
index 10597a8..b2bb59e 100644
--- a/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
+++ b/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
@@ -23,22 +23,6 @@ TargetSubtargetInfo::TargetSubtargetInfo() {}
 
 TargetSubtargetInfo::~TargetSubtargetInfo() {}
 
-// Temporary option to compare overall performance change when moving from the
-// SD scheduler to the MachineScheduler pass pipeline. This is convenient for
-// benchmarking during the transition from SD to MI scheduling. Once armv7 makes
-// the switch, it should go away. The normal way to enable/disable the
-// MachineScheduling pass itself is by using -enable-misched. For targets that
-// already use MI sched (via MySubTarget::enableMachineScheduler())
-// -misched-bench=false negates the subtarget hook.
-static cl::opt<bool> BenchMachineSched("misched-bench", cl::Hidden,
-    cl::desc("Migrate from the target's default SD scheduler to MI scheduler"));
-
-bool TargetSubtargetInfo::useMachineScheduler() const {
-  if (BenchMachineSched.getNumOccurrences())
-    return BenchMachineSched;
-  return enableMachineScheduler();
-}
-
 bool TargetSubtargetInfo::enableAtomicExpand() const {
   return true;
 }
@@ -47,6 +31,10 @@ bool TargetSubtargetInfo::enableMachineScheduler() const {
   return false;
 }
 
+bool TargetSubtargetInfo::enableJoinGlobalCopies() const {
+  return enableMachineScheduler();
+}
+
 bool TargetSubtargetInfo::enableRALocalReassignment(
     CodeGenOpt::Level OptLevel) const {
   return true;
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index 543af8e..a21f8c7 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -14,7 +14,6 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/MachineValueType.h"
-#include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
@@ -246,7 +245,7 @@ protected:
     assert(VT == MVT::i32 || VT == MVT::i64);
     MCInst Inst;
     Inst.setOpcode(VT == MVT::i32 ? X86::LEA32r : X86::LEA64r);
-    Inst.addOperand(MCOperand::CreateReg(getX86SubSuperRegister(Reg, VT)));
+    Inst.addOperand(MCOperand::createReg(getX86SubSuperRegister(Reg, VT)));
     Op.addMemOperands(Inst, 5);
     EmitInstruction(Out, Inst);
   }
@@ -262,13 +261,13 @@ protected:
                                               MCContext &Ctx, int64_t *Residue);
 
   bool is64BitMode() const {
-    return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+    return STI.getFeatureBits()[X86::Mode64Bit];
   }
   bool is32BitMode() const {
-    return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
+    return STI.getFeatureBits()[X86::Mode32Bit];
   }
   bool is16BitMode() const {
-    return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
+    return STI.getFeatureBits()[X86::Mode16Bit];
   }
 
   unsigned getPointerWidth() {
@@ -614,7 +613,7 @@ private:
         Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(MVT::i32)));
 
     const std::string &Fn = FuncName(AccessSize, IsWrite);
-    MCSymbol *FnSym = Ctx.GetOrCreateSymbol(StringRef(Fn));
+    MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn));
     const MCSymbolRefExpr *FnExpr =
         MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
     EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr));
@@ -643,7 +642,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
   {
     MCInst Inst;
     Inst.setOpcode(X86::MOV8rm);
-    Inst.addOperand(MCOperand::CreateReg(ShadowRegI8));
+    Inst.addOperand(MCOperand::createReg(ShadowRegI8));
     const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
     std::unique_ptr<X86Operand> Op(
         X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1,
@@ -654,7 +653,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
 
   EmitInstruction(
       Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
-  MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+  MCSymbol *DoneSym = Ctx.createTempSymbol();
   const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
   EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
 
@@ -726,10 +725,10 @@ void X86AddressSanitizer32::InstrumentMemOperandLarge(
         X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1,
                               SMLoc(), SMLoc()));
     Op->addMemOperands(Inst, 5);
-    Inst.addOperand(MCOperand::CreateImm(0));
+    Inst.addOperand(MCOperand::createImm(0));
     EmitInstruction(Out, Inst);
   }
-  MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+  MCSymbol *DoneSym = Ctx.createTempSymbol();
   const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
   EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
 
@@ -743,7 +742,7 @@ void X86AddressSanitizer32::InstrumentMOVSImpl(unsigned AccessSize,
   StoreFlags(Out);
 
   // No need to test when ECX is equals to zero.
-  MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+  MCSymbol *DoneSym = Ctx.createTempSymbol();
   const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
   EmitInstruction(
       Out, MCInstBuilder(X86::TEST32rr).addReg(X86::ECX).addReg(X86::ECX));
@@ -884,7 +883,7 @@ private:
                                RegCtx.AddressReg(MVT::i64)));
     }
     const std::string &Fn = FuncName(AccessSize, IsWrite);
-    MCSymbol *FnSym = Ctx.GetOrCreateSymbol(StringRef(Fn));
+    MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn));
     const MCSymbolRefExpr *FnExpr =
         MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
     EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr));
@@ -914,7 +913,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
   {
     MCInst Inst;
     Inst.setOpcode(X86::MOV8rm);
-    Inst.addOperand(MCOperand::CreateReg(ShadowRegI8));
+    Inst.addOperand(MCOperand::createReg(ShadowRegI8));
     const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
     std::unique_ptr<X86Operand> Op(
         X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1,
@@ -925,7 +924,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
 
   EmitInstruction(
       Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
-  MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+  MCSymbol *DoneSym = Ctx.createTempSymbol();
   const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
   EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
 
@@ -997,11 +996,11 @@ void X86AddressSanitizer64::InstrumentMemOperandLarge(
         X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1,
                               SMLoc(), SMLoc()));
     Op->addMemOperands(Inst, 5);
-    Inst.addOperand(MCOperand::CreateImm(0));
+    Inst.addOperand(MCOperand::createImm(0));
     EmitInstruction(Out, Inst);
   }
 
-  MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+  MCSymbol *DoneSym = Ctx.createTempSymbol();
   const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
   EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
 
@@ -1015,7 +1014,7 @@ void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize,
   StoreFlags(Out);
 
   // No need to test when RCX is equals to zero.
-  MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+  MCSymbol *DoneSym = Ctx.createTempSymbol();
   const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
   EmitInstruction(
       Out, MCInstBuilder(X86::TEST64rr).addReg(X86::RCX).addReg(X86::RCX));
@@ -1073,9 +1072,9 @@ CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
   const bool hasCompilerRTSupport = T.isOSLinux();
   if (ClAsanInstrumentAssembly && hasCompilerRTSupport &&
       MCOptions.SanitizeAddress) {
-    if ((STI.getFeatureBits() & X86::Mode32Bit) != 0)
+    if (STI.getFeatureBits()[X86::Mode32Bit] != 0)
       return new X86AddressSanitizer32(STI);
-    if ((STI.getFeatureBits() & X86::Mode64Bit) != 0)
+    if (STI.getFeatureBits()[X86::Mode64Bit] != 0)
       return new X86AddressSanitizer64(STI);
   }
   return new X86AsmInstrumentation(STI);
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index d743aa6..3047fd1 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -11,6 +11,7 @@
 #include "X86AsmInstrumentation.h"
 #include "X86AsmParserCommon.h"
 #include "X86Operand.h"
+#include "X86ISelLowering.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
@@ -664,6 +665,7 @@ private:
   ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
   std::unique_ptr<X86Operand>
   ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
+  std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
   bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
   std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
                                                        SMLoc Start,
@@ -727,23 +729,24 @@ private:
 
   bool is64BitMode() const {
     // FIXME: Can tablegen auto-generate this?
-    return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+    return STI.getFeatureBits()[X86::Mode64Bit];
   }
   bool is32BitMode() const {
     // FIXME: Can tablegen auto-generate this?
-    return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
+    return STI.getFeatureBits()[X86::Mode32Bit];
   }
   bool is16BitMode() const {
     // FIXME: Can tablegen auto-generate this?
-    return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
+    return STI.getFeatureBits()[X86::Mode16Bit];
   }
-  void SwitchMode(uint64_t mode) {
-    uint64_t oldMode = STI.getFeatureBits() &
-        (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
-    unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
+  void SwitchMode(unsigned mode) {
+    FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
+    FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
+    unsigned FB = ComputeAvailableFeatures(
+      STI.ToggleFeature(OldMode.flip(mode)));
     setAvailableFeatures(FB);
-    assert(mode == (STI.getFeatureBits() &
-                    (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
+    
+    assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
   }
 
   unsigned getPointerWidth() {
@@ -1189,7 +1192,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
         StringRef IDVal = getTok().getString();
         if (IDVal == "f" || IDVal == "b") {
           MCSymbol *Sym =
-              getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
+              getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
           MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
           const MCExpr *Val =
 	    MCSymbolRefExpr::Create(Sym, Variant, getContext());
@@ -1349,7 +1352,7 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
   }
 
   // Create the symbol reference.
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
   Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
   return false;
@@ -1407,6 +1410,43 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
                                /*Scale=*/1, Start, End, Size, Identifier, Info);
 }
 
+//ParseRoundingModeOp - Parse AVX-512 rounding mode operand
+std::unique_ptr<X86Operand>
+X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
+  MCAsmParser &Parser = getParser();
+  const AsmToken &Tok = Parser.getTok();
+  // Eat "{" and mark the current place.
+  const SMLoc consumedToken = consumeToken();
+  if (Tok.getIdentifier().startswith("r")){
+    int rndMode = StringSwitch<int>(Tok.getIdentifier())
+      .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
+      .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
+      .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
+      .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
+      .Default(-1);
+    if (-1 == rndMode)
+      return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
+     Parser.Lex();  // Eat "r*" of r*-sae
+    if (!getLexer().is(AsmToken::Minus))
+      return ErrorOperand(Tok.getLoc(), "Expected - at this point");
+    Parser.Lex();  // Eat "-"
+    Parser.Lex();  // Eat the sae
+    if (!getLexer().is(AsmToken::RCurly))
+      return ErrorOperand(Tok.getLoc(), "Expected } at this point");
+    Parser.Lex();  // Eat "}"
+    const MCExpr *RndModeOp =
+      MCConstantExpr::Create(rndMode, Parser.getContext());
+    return X86Operand::CreateImm(RndModeOp, Start, End);
+  }
+  if(Tok.getIdentifier().equals("sae")){
+    Parser.Lex();  // Eat the sae
+    if (!getLexer().is(AsmToken::RCurly))
+      return ErrorOperand(Tok.getLoc(), "Expected } at this point");
+    Parser.Lex();  // Eat "}"
+    return X86Operand::CreateToken("{sae}", consumedToken);
+  }
+  return ErrorOperand(Tok.getLoc(), "unknown token in expression");
+}
 /// ParseIntelMemOperand - Parse intel style memory operand.
 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
                                                                SMLoc Start,
@@ -1656,6 +1696,11 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
     return ParseIntelMemOperand(Imm, Start, Size);
   }
 
+  // rounding mode token
+  if (STI.getFeatureBits()[X86::FeatureAVX512] &&
+      getLexer().is(AsmToken::LCurly))
+    return ParseRoundingModeOp(Start, End);
+
   // Register.
   unsigned RegNo = 0;
   if (!ParseRegister(RegNo, Start, End)) {
@@ -1708,13 +1753,19 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
       return nullptr;
     return X86Operand::CreateImm(Val, Start, End);
   }
+  case AsmToken::LCurly:{
+    SMLoc Start = Parser.getTok().getLoc(), End;
+    if (STI.getFeatureBits()[X86::FeatureAVX512])
+      return ParseRoundingModeOp(Start, End);
+    return ErrorOperand(Start, "unknown token in expression");
+  }
   }
 }
 
 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
                                        const MCParsedAsmOperand &Op) {
   MCAsmParser &Parser = getParser();
-  if(STI.getFeatureBits() & X86::FeatureAVX512) {
+  if(STI.getFeatureBits()[X86::FeatureAVX512]) {
     if (getLexer().is(AsmToken::LCurly)) {
       // Eat "{" and mark the current place.
       const SMLoc consumedToken = consumeToken();
@@ -1964,14 +2015,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     PatchedName = PatchedName.substr(0, Name.size()-1);
 
   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
-  const MCExpr *ExtraImmOp = nullptr;
   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
     bool IsVCMP = PatchedName[0] == 'v';
-    unsigned SSECCIdx = IsVCMP ? 4 : 3;
-    unsigned SSEComparisonCode = StringSwitch<unsigned>(
-      PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
+    unsigned CCIdx = IsVCMP ? 4 : 3;
+    unsigned ComparisonCode = StringSwitch<unsigned>(
+      PatchedName.slice(CCIdx, PatchedName.size() - 2))
       .Case("eq",       0x00)
       .Case("lt",       0x01)
       .Case("le",       0x02)
@@ -2006,27 +2056,75 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
       .Case("gt_oq",    0x1E)
       .Case("true_us",  0x1F)
       .Default(~0U);
-    if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
-      ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
-                                          getParser().getContext());
-      if (PatchedName.endswith("ss")) {
-        PatchedName = IsVCMP ? "vcmpss" : "cmpss";
-      } else if (PatchedName.endswith("sd")) {
-        PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
-      } else if (PatchedName.endswith("ps")) {
-        PatchedName = IsVCMP ? "vcmpps" : "cmpps";
-      } else {
-        assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
-        PatchedName = IsVCMP ? "vcmppd" : "cmppd";
-      }
+    if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
+
+      Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
+                                                 NameLoc));
+
+      const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
+                                                   getParser().getContext());
+      Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
+
+      PatchedName = PatchedName.substr(PatchedName.size() - 2);
+    }
+  }
+
+  // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
+  if (PatchedName.startswith("vpcmp") &&
+      (PatchedName.endswith("b") || PatchedName.endswith("w") ||
+       PatchedName.endswith("d") || PatchedName.endswith("q"))) {
+    unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
+    unsigned ComparisonCode = StringSwitch<unsigned>(
+      PatchedName.slice(5, PatchedName.size() - CCIdx))
+      .Case("eq",    0x0) // Only allowed on unsigned. Checked below.
+      .Case("lt",    0x1)
+      .Case("le",    0x2)
+      //.Case("false", 0x3) // Not a documented alias.
+      .Case("neq",   0x4)
+      .Case("nlt",   0x5)
+      .Case("nle",   0x6)
+      //.Case("true",  0x7) // Not a documented alias.
+      .Default(~0U);
+    if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
+      Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
+
+      const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
+                                                   getParser().getContext());
+      Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
+
+      PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
+    }
+  }
+
+  // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
+  if (PatchedName.startswith("vpcom") &&
+      (PatchedName.endswith("b") || PatchedName.endswith("w") ||
+       PatchedName.endswith("d") || PatchedName.endswith("q"))) {
+    unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
+    unsigned ComparisonCode = StringSwitch<unsigned>(
+      PatchedName.slice(5, PatchedName.size() - CCIdx))
+      .Case("lt",    0x0)
+      .Case("le",    0x1)
+      .Case("gt",    0x2)
+      .Case("ge",    0x3)
+      .Case("eq",    0x4)
+      .Case("neq",   0x5)
+      .Case("false", 0x6)
+      .Case("true",  0x7)
+      .Default(~0U);
+    if (ComparisonCode != ~0U) {
+      Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
+
+      const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
+                                                   getParser().getContext());
+      Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
+
+      PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
     }
   }
 
   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 
-  if (ExtraImmOp && !isParsingIntelSyntax())
-    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
-
   // Determine whether this is an instruction prefix.
   bool isPrefix =
     Name == "lock" || Name == "rep" ||
@@ -2072,9 +2170,6 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
       (isPrefix && getLexer().is(AsmToken::Slash)))
     Parser.Lex();
 
-  if (ExtraImmOp && isParsingIntelSyntax())
-    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
-
   // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
   // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
   // documented form in various unofficial manuals, so a lot of code uses it.
@@ -2239,8 +2334,8 @@ static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
   MCInst TmpInst;
   TmpInst.setOpcode(Opcode);
   if (!isCmp)
-    TmpInst.addOperand(MCOperand::CreateReg(Reg));
-  TmpInst.addOperand(MCOperand::CreateReg(Reg));
+    TmpInst.addOperand(MCOperand::createReg(Reg));
+  TmpInst.addOperand(MCOperand::createReg(Reg));
   TmpInst.addOperand(Inst.getOperand(0));
   Inst = TmpInst;
   return true;
@@ -2485,7 +2580,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
   SmallString<16> Tmp;
   Tmp += Base;
   Tmp += ' ';
-  Op.setTokenValue(Tmp.str());
+  Op.setTokenValue(Tmp);
 
   // If this instruction starts with an 'f', then it is a floating point stack
   // instruction.  These come in up to three forms for 32-bit, 64-bit, and
@@ -2668,7 +2763,7 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
   }
 
   // If we haven't matched anything yet, this is not a basic integer or FPU
-  // operation.  There shouldn't be any ambiguity in our mneumonic table, so try
+  // operation.  There shouldn't be any ambiguity in our mnemonic table, so try
   // matching with the unsized operand.
   if (Match.empty()) {
     Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h
index 72aeeaa..7610806 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h
@@ -34,6 +34,11 @@ inline bool isImmSExti64i32Value(uint64_t Value) {
           (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 }
 
+inline bool isImmUnsignedi8Value(uint64_t Value) {
+  return ((                                  Value <= 0x00000000000000FFULL)||
+          (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+}
+
 } // End of namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86Operand.h b/contrib/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 6675d4d..b3066ef 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -12,8 +12,11 @@
 
 #include "X86AsmParserCommon.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/ADT/STLExtras.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
 
 namespace llvm {
 
@@ -187,6 +190,13 @@ struct X86Operand : public MCParsedAsmOperand {
     return isImmSExti64i32Value(CE->getValue());
   }
 
+  bool isImmUnsignedi8() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    return isImmUnsignedi8Value(CE->getValue());
+  }
+
   bool isOffsetOf() const override {
     return OffsetOfLoc.getPointer();
   }
@@ -253,15 +263,14 @@ struct X86Operand : public MCParsedAsmOperand {
     return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
       !getMemIndexReg() && getMemScale() == 1;
   }
+  bool isAVX512RC() const{
+      return isImm();
+  }
 
   bool isAbsMem16() const {
     return isAbsMem() && Mem.ModeSize == 16;
   }
 
-  bool isAbsMem32() const {
-    return isAbsMem() && Mem.ModeSize != 16;
-  }
-
   bool isSrcIdx() const {
     return !getMemIndexReg() && getMemScale() == 1 &&
       (getMemBaseReg() == X86::RSI || getMemBaseReg() == X86::ESI ||
@@ -351,14 +360,14 @@ struct X86Operand : public MCParsedAsmOperand {
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediates when possible.
     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
-      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+      Inst.addOperand(MCOperand::createImm(CE->getValue()));
     else
-      Inst.addOperand(MCOperand::CreateExpr(Expr));
+      Inst.addOperand(MCOperand::createExpr(Expr));
   }
 
   void addRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getReg()));
+    Inst.addOperand(MCOperand::createReg(getReg()));
   }
 
   static unsigned getGR32FromGR64(unsigned RegNo) {
@@ -389,9 +398,12 @@ struct X86Operand : public MCParsedAsmOperand {
     unsigned RegNo = getReg();
     if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
       RegNo = getGR32FromGR64(RegNo);
-    Inst.addOperand(MCOperand::CreateReg(RegNo));
+    Inst.addOperand(MCOperand::createReg(RegNo));
+  }
+  void addAVX512RCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
   }
-
   void addImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     addExpr(Inst, getImm());
@@ -399,40 +411,40 @@ struct X86Operand : public MCParsedAsmOperand {
 
   void addMemOperands(MCInst &Inst, unsigned N) const {
     assert((N == 5) && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
-    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
-    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
+    Inst.addOperand(MCOperand::createReg(getMemBaseReg()));
+    Inst.addOperand(MCOperand::createImm(getMemScale()));
+    Inst.addOperand(MCOperand::createReg(getMemIndexReg()));
     addExpr(Inst, getMemDisp());
-    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
+    Inst.addOperand(MCOperand::createReg(getMemSegReg()));
   }
 
   void addAbsMemOperands(MCInst &Inst, unsigned N) const {
     assert((N == 1) && "Invalid number of operands!");
     // Add as immediates when possible.
     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
-      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+      Inst.addOperand(MCOperand::createImm(CE->getValue()));
     else
-      Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+      Inst.addOperand(MCOperand::createExpr(getMemDisp()));
   }
 
   void addSrcIdxOperands(MCInst &Inst, unsigned N) const {
     assert((N == 2) && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
-    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
+    Inst.addOperand(MCOperand::createReg(getMemBaseReg()));
+    Inst.addOperand(MCOperand::createReg(getMemSegReg()));
   }
   void addDstIdxOperands(MCInst &Inst, unsigned N) const {
     assert((N == 1) && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
+    Inst.addOperand(MCOperand::createReg(getMemBaseReg()));
   }
 
   void addMemOffsOperands(MCInst &Inst, unsigned N) const {
     assert((N == 2) && "Invalid number of operands!");
     // Add as immediates when possible.
     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
-      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+      Inst.addOperand(MCOperand::createImm(CE->getValue()));
     else
-      Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
-    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
+      Inst.addOperand(MCOperand::createExpr(getMemDisp()));
+    Inst.addOperand(MCOperand::createReg(getMemSegReg()));
   }
 
   static std::unique_ptr<X86Operand> CreateToken(StringRef Str, SMLoc Loc) {
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 1c56182..3469d19 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -80,20 +80,19 @@ X86GenericDisassembler::X86GenericDisassembler(
                                          MCContext &Ctx,
                                          std::unique_ptr<const MCInstrInfo> MII)
   : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
-  switch (STI.getFeatureBits() &
-          (X86::Mode16Bit | X86::Mode32Bit | X86::Mode64Bit)) {
-  case X86::Mode16Bit:
+  const FeatureBitset &FB = STI.getFeatureBits();
+  if (FB[X86::Mode16Bit]) {
     fMode = MODE_16BIT;
-    break;
-  case X86::Mode32Bit:
+    return;
+  } else if (FB[X86::Mode32Bit]) {
     fMode = MODE_32BIT;
-    break;
-  case X86::Mode64Bit:
+    return;
+  } else if (FB[X86::Mode64Bit]) {
     fMode = MODE_64BIT;
-    break;
-  default:
-    llvm_unreachable("Invalid CPU mode");
+    return;
   }
+
+  llvm_unreachable("Invalid CPU mode");
 }
 
 struct Region {
@@ -180,7 +179,7 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
 #undef ENTRY
 
   uint8_t llvmRegnum = llvmRegnums[reg];
-  mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
+  mcInst.addOperand(MCOperand::createReg(llvmRegnum));
 }
 
 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
@@ -248,11 +247,11 @@ static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
     assert(insn.mode == MODE_16BIT);
     baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI;
   }
-  MCOperand baseReg = MCOperand::CreateReg(baseRegNo);
+  MCOperand baseReg = MCOperand::createReg(baseRegNo);
   mcInst.addOperand(baseReg);
 
   MCOperand segmentReg;
-  segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
+  segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
   mcInst.addOperand(segmentReg);
   return false;
 }
@@ -273,7 +272,7 @@ static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
     assert(insn.mode == MODE_16BIT);
     baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI;
   }
-  MCOperand baseReg = MCOperand::CreateReg(baseRegNo);
+  MCOperand baseReg = MCOperand::createReg(baseRegNo);
   mcInst.addOperand(baseReg);
   return false;
 }
@@ -320,24 +319,12 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
   // By default sign-extend all X86 immediates based on their encoding.
   else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
            type == TYPE_IMM64 || type == TYPE_IMMv) {
-    uint32_t Opcode = mcInst.getOpcode();
     switch (operand.encoding) {
     default:
       break;
     case ENCODING_IB:
-      // Special case those X86 instructions that use the imm8 as a set of
-      // bits, bit count, etc. and are not sign-extend.
-      if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri &&
-          Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri &&
-          Opcode != X86::DPPSrri && Opcode != X86::DPPDrri &&
-          Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri &&
-          Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri &&
-          Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri &&
-          Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
-          Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
-          Opcode != X86::VINSERTPSrr)
-        if(immediate & 0x80)
-          immediate |= ~(0xffull);
+      if(immediate & 0x80)
+        immediate |= ~(0xffull);
       break;
     case ENCODING_IW:
       if(immediate & 0x8000)
@@ -356,14 +343,30 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
       unsigned NewOpc;
       switch (mcInst.getOpcode()) {
       default: llvm_unreachable("unexpected opcode");
-      case X86::CMPPDrmi: NewOpc = X86::CMPPDrmi_alt; break;
-      case X86::CMPPDrri: NewOpc = X86::CMPPDrri_alt; break;
-      case X86::CMPPSrmi: NewOpc = X86::CMPPSrmi_alt; break;
-      case X86::CMPPSrri: NewOpc = X86::CMPPSrri_alt; break;
-      case X86::CMPSDrm:  NewOpc = X86::CMPSDrm_alt;  break;
-      case X86::CMPSDrr:  NewOpc = X86::CMPSDrr_alt;  break;
-      case X86::CMPSSrm:  NewOpc = X86::CMPSSrm_alt;  break;
-      case X86::CMPSSrr:  NewOpc = X86::CMPSSrr_alt;  break;
+      case X86::CMPPDrmi:  NewOpc = X86::CMPPDrmi_alt;  break;
+      case X86::CMPPDrri:  NewOpc = X86::CMPPDrri_alt;  break;
+      case X86::CMPPSrmi:  NewOpc = X86::CMPPSrmi_alt;  break;
+      case X86::CMPPSrri:  NewOpc = X86::CMPPSrri_alt;  break;
+      case X86::CMPSDrm:   NewOpc = X86::CMPSDrm_alt;   break;
+      case X86::CMPSDrr:   NewOpc = X86::CMPSDrr_alt;   break;
+      case X86::CMPSSrm:   NewOpc = X86::CMPSSrm_alt;   break;
+      case X86::CMPSSrr:   NewOpc = X86::CMPSSrr_alt;   break;
+      case X86::VPCOMBri:  NewOpc = X86::VPCOMBri_alt;  break;
+      case X86::VPCOMBmi:  NewOpc = X86::VPCOMBmi_alt;  break;
+      case X86::VPCOMWri:  NewOpc = X86::VPCOMWri_alt;  break;
+      case X86::VPCOMWmi:  NewOpc = X86::VPCOMWmi_alt;  break;
+      case X86::VPCOMDri:  NewOpc = X86::VPCOMDri_alt;  break;
+      case X86::VPCOMDmi:  NewOpc = X86::VPCOMDmi_alt;  break;
+      case X86::VPCOMQri:  NewOpc = X86::VPCOMQri_alt;  break;
+      case X86::VPCOMQmi:  NewOpc = X86::VPCOMQmi_alt;  break;
+      case X86::VPCOMUBri: NewOpc = X86::VPCOMUBri_alt; break;
+      case X86::VPCOMUBmi: NewOpc = X86::VPCOMUBmi_alt; break;
+      case X86::VPCOMUWri: NewOpc = X86::VPCOMUWri_alt; break;
+      case X86::VPCOMUWmi: NewOpc = X86::VPCOMUWmi_alt; break;
+      case X86::VPCOMUDri: NewOpc = X86::VPCOMUDri_alt; break;
+      case X86::VPCOMUDmi: NewOpc = X86::VPCOMUDmi_alt; break;
+      case X86::VPCOMUQri: NewOpc = X86::VPCOMUQri_alt; break;
+      case X86::VPCOMUQmi: NewOpc = X86::VPCOMUQmi_alt; break;
       }
       // Switch opcode to the one that doesn't get special printing.
       mcInst.setOpcode(NewOpc);
@@ -374,26 +377,157 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
       unsigned NewOpc;
       switch (mcInst.getOpcode()) {
       default: llvm_unreachable("unexpected opcode");
-      case X86::VCMPPDrmi:  NewOpc = X86::VCMPPDrmi_alt;  break;
-      case X86::VCMPPDrri:  NewOpc = X86::VCMPPDrri_alt;  break;
-      case X86::VCMPPSrmi:  NewOpc = X86::VCMPPSrmi_alt;  break;
-      case X86::VCMPPSrri:  NewOpc = X86::VCMPPSrri_alt;  break;
-      case X86::VCMPSDrm:   NewOpc = X86::VCMPSDrm_alt;   break;
-      case X86::VCMPSDrr:   NewOpc = X86::VCMPSDrr_alt;   break;
-      case X86::VCMPSSrm:   NewOpc = X86::VCMPSSrm_alt;   break;
-      case X86::VCMPSSrr:   NewOpc = X86::VCMPSSrr_alt;   break;
-      case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break;
-      case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break;
-      case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break;
-      case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break;
-      case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break;
-      case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break;
-      case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break;
-      case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break;
-      case X86::VCMPSDZrm:  NewOpc = X86::VCMPSDZrmi_alt; break;
-      case X86::VCMPSDZrr:  NewOpc = X86::VCMPSDZrri_alt; break;
-      case X86::VCMPSSZrm:  NewOpc = X86::VCMPSSZrmi_alt; break;
-      case X86::VCMPSSZrr:  NewOpc = X86::VCMPSSZrri_alt; break;
+      case X86::VCMPPDrmi:   NewOpc = X86::VCMPPDrmi_alt;   break;
+      case X86::VCMPPDrri:   NewOpc = X86::VCMPPDrri_alt;   break;
+      case X86::VCMPPSrmi:   NewOpc = X86::VCMPPSrmi_alt;   break;
+      case X86::VCMPPSrri:   NewOpc = X86::VCMPPSrri_alt;   break;
+      case X86::VCMPSDrm:    NewOpc = X86::VCMPSDrm_alt;    break;
+      case X86::VCMPSDrr:    NewOpc = X86::VCMPSDrr_alt;    break;
+      case X86::VCMPSSrm:    NewOpc = X86::VCMPSSrm_alt;    break;
+      case X86::VCMPSSrr:    NewOpc = X86::VCMPSSrr_alt;    break;
+      case X86::VCMPPDYrmi:  NewOpc = X86::VCMPPDYrmi_alt;  break;
+      case X86::VCMPPDYrri:  NewOpc = X86::VCMPPDYrri_alt;  break;
+      case X86::VCMPPSYrmi:  NewOpc = X86::VCMPPSYrmi_alt;  break;
+      case X86::VCMPPSYrri:  NewOpc = X86::VCMPPSYrri_alt;  break;
+      case X86::VCMPPDZrmi:  NewOpc = X86::VCMPPDZrmi_alt;  break;
+      case X86::VCMPPDZrri:  NewOpc = X86::VCMPPDZrri_alt;  break;
+      case X86::VCMPPDZrrib: NewOpc = X86::VCMPPDZrrib_alt; break;
+      case X86::VCMPPSZrmi:  NewOpc = X86::VCMPPSZrmi_alt;  break;
+      case X86::VCMPPSZrri:  NewOpc = X86::VCMPPSZrri_alt;  break;
+      case X86::VCMPPSZrrib: NewOpc = X86::VCMPPSZrrib_alt; break;
+      case X86::VCMPSDZrm:   NewOpc = X86::VCMPSDZrmi_alt;  break;
+      case X86::VCMPSDZrr:   NewOpc = X86::VCMPSDZrri_alt;  break;
+      case X86::VCMPSSZrm:   NewOpc = X86::VCMPSSZrmi_alt;  break;
+      case X86::VCMPSSZrr:   NewOpc = X86::VCMPSSZrri_alt;  break;
+      }
+      // Switch opcode to the one that doesn't get special printing.
+      mcInst.setOpcode(NewOpc);
+    }
+  } else if (type == TYPE_AVX512ICC) {
+    if (immediate >= 8 || ((immediate & 0x3) == 3)) {
+      unsigned NewOpc;
+      switch (mcInst.getOpcode()) {
+      default: llvm_unreachable("unexpected opcode");
+      case X86::VPCMPBZ128rmi:    NewOpc = X86::VPCMPBZ128rmi_alt;    break;
+      case X86::VPCMPBZ128rmik:   NewOpc = X86::VPCMPBZ128rmik_alt;   break;
+      case X86::VPCMPBZ128rri:    NewOpc = X86::VPCMPBZ128rri_alt;    break;
+      case X86::VPCMPBZ128rrik:   NewOpc = X86::VPCMPBZ128rrik_alt;   break;
+      case X86::VPCMPBZ256rmi:    NewOpc = X86::VPCMPBZ256rmi_alt;    break;
+      case X86::VPCMPBZ256rmik:   NewOpc = X86::VPCMPBZ256rmik_alt;   break;
+      case X86::VPCMPBZ256rri:    NewOpc = X86::VPCMPBZ256rri_alt;    break;
+      case X86::VPCMPBZ256rrik:   NewOpc = X86::VPCMPBZ256rrik_alt;   break;
+      case X86::VPCMPBZrmi:       NewOpc = X86::VPCMPBZrmi_alt;       break;
+      case X86::VPCMPBZrmik:      NewOpc = X86::VPCMPBZrmik_alt;      break;
+      case X86::VPCMPBZrri:       NewOpc = X86::VPCMPBZrri_alt;       break;
+      case X86::VPCMPBZrrik:      NewOpc = X86::VPCMPBZrrik_alt;      break;
+      case X86::VPCMPDZ128rmi:    NewOpc = X86::VPCMPDZ128rmi_alt;    break;
+      case X86::VPCMPDZ128rmib:   NewOpc = X86::VPCMPDZ128rmib_alt;   break;
+      case X86::VPCMPDZ128rmibk:  NewOpc = X86::VPCMPDZ128rmibk_alt;  break;
+      case X86::VPCMPDZ128rmik:   NewOpc = X86::VPCMPDZ128rmik_alt;   break;
+      case X86::VPCMPDZ128rri:    NewOpc = X86::VPCMPDZ128rri_alt;    break;
+      case X86::VPCMPDZ128rrik:   NewOpc = X86::VPCMPDZ128rrik_alt;   break;
+      case X86::VPCMPDZ256rmi:    NewOpc = X86::VPCMPDZ256rmi_alt;    break;
+      case X86::VPCMPDZ256rmib:   NewOpc = X86::VPCMPDZ256rmib_alt;   break;
+      case X86::VPCMPDZ256rmibk:  NewOpc = X86::VPCMPDZ256rmibk_alt;  break;
+      case X86::VPCMPDZ256rmik:   NewOpc = X86::VPCMPDZ256rmik_alt;   break;
+      case X86::VPCMPDZ256rri:    NewOpc = X86::VPCMPDZ256rri_alt;    break;
+      case X86::VPCMPDZ256rrik:   NewOpc = X86::VPCMPDZ256rrik_alt;   break;
+      case X86::VPCMPDZrmi:       NewOpc = X86::VPCMPDZrmi_alt;       break;
+      case X86::VPCMPDZrmib:      NewOpc = X86::VPCMPDZrmib_alt;      break;
+      case X86::VPCMPDZrmibk:     NewOpc = X86::VPCMPDZrmibk_alt;     break;
+      case X86::VPCMPDZrmik:      NewOpc = X86::VPCMPDZrmik_alt;      break;
+      case X86::VPCMPDZrri:       NewOpc = X86::VPCMPDZrri_alt;       break;
+      case X86::VPCMPDZrrik:      NewOpc = X86::VPCMPDZrrik_alt;      break;
+      case X86::VPCMPQZ128rmi:    NewOpc = X86::VPCMPQZ128rmi_alt;    break;
+      case X86::VPCMPQZ128rmib:   NewOpc = X86::VPCMPQZ128rmib_alt;   break;
+      case X86::VPCMPQZ128rmibk:  NewOpc = X86::VPCMPQZ128rmibk_alt;  break;
+      case X86::VPCMPQZ128rmik:   NewOpc = X86::VPCMPQZ128rmik_alt;   break;
+      case X86::VPCMPQZ128rri:    NewOpc = X86::VPCMPQZ128rri_alt;    break;
+      case X86::VPCMPQZ128rrik:   NewOpc = X86::VPCMPQZ128rrik_alt;   break;
+      case X86::VPCMPQZ256rmi:    NewOpc = X86::VPCMPQZ256rmi_alt;    break;
+      case X86::VPCMPQZ256rmib:   NewOpc = X86::VPCMPQZ256rmib_alt;   break;
+      case X86::VPCMPQZ256rmibk:  NewOpc = X86::VPCMPQZ256rmibk_alt;  break;
+      case X86::VPCMPQZ256rmik:   NewOpc = X86::VPCMPQZ256rmik_alt;   break;
+      case X86::VPCMPQZ256rri:    NewOpc = X86::VPCMPQZ256rri_alt;    break;
+      case X86::VPCMPQZ256rrik:   NewOpc = X86::VPCMPQZ256rrik_alt;   break;
+      case X86::VPCMPQZrmi:       NewOpc = X86::VPCMPQZrmi_alt;       break;
+      case X86::VPCMPQZrmib:      NewOpc = X86::VPCMPQZrmib_alt;      break;
+      case X86::VPCMPQZrmibk:     NewOpc = X86::VPCMPQZrmibk_alt;     break;
+      case X86::VPCMPQZrmik:      NewOpc = X86::VPCMPQZrmik_alt;      break;
+      case X86::VPCMPQZrri:       NewOpc = X86::VPCMPQZrri_alt;       break;
+      case X86::VPCMPQZrrik:      NewOpc = X86::VPCMPQZrrik_alt;      break;
+      case X86::VPCMPUBZ128rmi:   NewOpc = X86::VPCMPUBZ128rmi_alt;   break;
+      case X86::VPCMPUBZ128rmik:  NewOpc = X86::VPCMPUBZ128rmik_alt;  break;
+      case X86::VPCMPUBZ128rri:   NewOpc = X86::VPCMPUBZ128rri_alt;   break;
+      case X86::VPCMPUBZ128rrik:  NewOpc = X86::VPCMPUBZ128rrik_alt;  break;
+      case X86::VPCMPUBZ256rmi:   NewOpc = X86::VPCMPUBZ256rmi_alt;   break;
+      case X86::VPCMPUBZ256rmik:  NewOpc = X86::VPCMPUBZ256rmik_alt;  break;
+      case X86::VPCMPUBZ256rri:   NewOpc = X86::VPCMPUBZ256rri_alt;   break;
+      case X86::VPCMPUBZ256rrik:  NewOpc = X86::VPCMPUBZ256rrik_alt;  break;
+      case X86::VPCMPUBZrmi:      NewOpc = X86::VPCMPUBZrmi_alt;      break;
+      case X86::VPCMPUBZrmik:     NewOpc = X86::VPCMPUBZrmik_alt;     break;
+      case X86::VPCMPUBZrri:      NewOpc = X86::VPCMPUBZrri_alt;      break;
+      case X86::VPCMPUBZrrik:     NewOpc = X86::VPCMPUBZrrik_alt;     break;
+      case X86::VPCMPUDZ128rmi:   NewOpc = X86::VPCMPUDZ128rmi_alt;   break;
+      case X86::VPCMPUDZ128rmib:  NewOpc = X86::VPCMPUDZ128rmib_alt;  break;
+      case X86::VPCMPUDZ128rmibk: NewOpc = X86::VPCMPUDZ128rmibk_alt; break;
+      case X86::VPCMPUDZ128rmik:  NewOpc = X86::VPCMPUDZ128rmik_alt;  break;
+      case X86::VPCMPUDZ128rri:   NewOpc = X86::VPCMPUDZ128rri_alt;   break;
+      case X86::VPCMPUDZ128rrik:  NewOpc = X86::VPCMPUDZ128rrik_alt;  break;
+      case X86::VPCMPUDZ256rmi:   NewOpc = X86::VPCMPUDZ256rmi_alt;   break;
+      case X86::VPCMPUDZ256rmib:  NewOpc = X86::VPCMPUDZ256rmib_alt;  break;
+      case X86::VPCMPUDZ256rmibk: NewOpc = X86::VPCMPUDZ256rmibk_alt; break;
+      case X86::VPCMPUDZ256rmik:  NewOpc = X86::VPCMPUDZ256rmik_alt;  break;
+      case X86::VPCMPUDZ256rri:   NewOpc = X86::VPCMPUDZ256rri_alt;   break;
+      case X86::VPCMPUDZ256rrik:  NewOpc = X86::VPCMPUDZ256rrik_alt;  break;
+      case X86::VPCMPUDZrmi:      NewOpc = X86::VPCMPUDZrmi_alt;      break;
+      case X86::VPCMPUDZrmib:     NewOpc = X86::VPCMPUDZrmib_alt;     break;
+      case X86::VPCMPUDZrmibk:    NewOpc = X86::VPCMPUDZrmibk_alt;    break;
+      case X86::VPCMPUDZrmik:     NewOpc = X86::VPCMPUDZrmik_alt;     break;
+      case X86::VPCMPUDZrri:      NewOpc = X86::VPCMPUDZrri_alt;      break;
+      case X86::VPCMPUDZrrik:     NewOpc = X86::VPCMPUDZrrik_alt;     break;
+      case X86::VPCMPUQZ128rmi:   NewOpc = X86::VPCMPUQZ128rmi_alt;   break;
+      case X86::VPCMPUQZ128rmib:  NewOpc = X86::VPCMPUQZ128rmib_alt;  break;
+      case X86::VPCMPUQZ128rmibk: NewOpc = X86::VPCMPUQZ128rmibk_alt; break;
+      case X86::VPCMPUQZ128rmik:  NewOpc = X86::VPCMPUQZ128rmik_alt;  break;
+      case X86::VPCMPUQZ128rri:   NewOpc = X86::VPCMPUQZ128rri_alt;   break;
+      case X86::VPCMPUQZ128rrik:  NewOpc = X86::VPCMPUQZ128rrik_alt;  break;
+      case X86::VPCMPUQZ256rmi:   NewOpc = X86::VPCMPUQZ256rmi_alt;   break;
+      case X86::VPCMPUQZ256rmib:  NewOpc = X86::VPCMPUQZ256rmib_alt;  break;
+      case X86::VPCMPUQZ256rmibk: NewOpc = X86::VPCMPUQZ256rmibk_alt; break;
+      case X86::VPCMPUQZ256rmik:  NewOpc = X86::VPCMPUQZ256rmik_alt;  break;
+      case X86::VPCMPUQZ256rri:   NewOpc = X86::VPCMPUQZ256rri_alt;   break;
+      case X86::VPCMPUQZ256rrik:  NewOpc = X86::VPCMPUQZ256rrik_alt;  break;
+      case X86::VPCMPUQZrmi:      NewOpc = X86::VPCMPUQZrmi_alt;      break;
+      case X86::VPCMPUQZrmib:     NewOpc = X86::VPCMPUQZrmib_alt;     break;
+      case X86::VPCMPUQZrmibk:    NewOpc = X86::VPCMPUQZrmibk_alt;    break;
+      case X86::VPCMPUQZrmik:     NewOpc = X86::VPCMPUQZrmik_alt;     break;
+      case X86::VPCMPUQZrri:      NewOpc = X86::VPCMPUQZrri_alt;      break;
+      case X86::VPCMPUQZrrik:     NewOpc = X86::VPCMPUQZrrik_alt;     break;
+      case X86::VPCMPUWZ128rmi:   NewOpc = X86::VPCMPUWZ128rmi_alt;   break;
+      case X86::VPCMPUWZ128rmik:  NewOpc = X86::VPCMPUWZ128rmik_alt;  break;
+      case X86::VPCMPUWZ128rri:   NewOpc = X86::VPCMPUWZ128rri_alt;   break;
+      case X86::VPCMPUWZ128rrik:  NewOpc = X86::VPCMPUWZ128rrik_alt;  break;
+      case X86::VPCMPUWZ256rmi:   NewOpc = X86::VPCMPUWZ256rmi_alt;   break;
+      case X86::VPCMPUWZ256rmik:  NewOpc = X86::VPCMPUWZ256rmik_alt;  break;
+      case X86::VPCMPUWZ256rri:   NewOpc = X86::VPCMPUWZ256rri_alt;   break;
+      case X86::VPCMPUWZ256rrik:  NewOpc = X86::VPCMPUWZ256rrik_alt;  break;
+      case X86::VPCMPUWZrmi:      NewOpc = X86::VPCMPUWZrmi_alt;      break;
+      case X86::VPCMPUWZrmik:     NewOpc = X86::VPCMPUWZrmik_alt;     break;
+      case X86::VPCMPUWZrri:      NewOpc = X86::VPCMPUWZrri_alt;      break;
+      case X86::VPCMPUWZrrik:     NewOpc = X86::VPCMPUWZrrik_alt;     break;
+      case X86::VPCMPWZ128rmi:    NewOpc = X86::VPCMPWZ128rmi_alt;    break;
+      case X86::VPCMPWZ128rmik:   NewOpc = X86::VPCMPWZ128rmik_alt;   break;
+      case X86::VPCMPWZ128rri:    NewOpc = X86::VPCMPWZ128rri_alt;    break;
+      case X86::VPCMPWZ128rrik:   NewOpc = X86::VPCMPWZ128rrik_alt;   break;
+      case X86::VPCMPWZ256rmi:    NewOpc = X86::VPCMPWZ256rmi_alt;    break;
+      case X86::VPCMPWZ256rmik:   NewOpc = X86::VPCMPWZ256rmik_alt;   break;
+      case X86::VPCMPWZ256rri:    NewOpc = X86::VPCMPWZ256rri_alt;    break;
+      case X86::VPCMPWZ256rrik:   NewOpc = X86::VPCMPWZ256rrik_alt;   break;
+      case X86::VPCMPWZrmi:       NewOpc = X86::VPCMPWZrmi_alt;       break;
+      case X86::VPCMPWZrmik:      NewOpc = X86::VPCMPWZrmik_alt;      break;
+      case X86::VPCMPWZrri:       NewOpc = X86::VPCMPWZrri_alt;       break;
+      case X86::VPCMPWZrrik:      NewOpc = X86::VPCMPWZrrik_alt;      break;
       }
       // Switch opcode to the one that doesn't get special printing.
       mcInst.setOpcode(NewOpc);
@@ -404,13 +538,13 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
   case TYPE_XMM32:
   case TYPE_XMM64:
   case TYPE_XMM128:
-    mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
+    mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
     return;
   case TYPE_XMM256:
-    mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
+    mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
     return;
   case TYPE_XMM512:
-    mcInst.addOperand(MCOperand::CreateReg(X86::ZMM0 + (immediate >> 4)));
+    mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
     return;
   case TYPE_REL8:
     isBranch = true;
@@ -433,12 +567,12 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
   if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
                                insn.immediateOffset, insn.immediateSize,
                                mcInst, Dis))
-    mcInst.addOperand(MCOperand::CreateImm(immediate));
+    mcInst.addOperand(MCOperand::createImm(immediate));
 
   if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 ||
       type == TYPE_MOFFS32 || type == TYPE_MOFFS64) {
     MCOperand segmentReg;
-    segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
+    segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
     mcInst.addOperand(segmentReg);
   }
 }
@@ -471,7 +605,7 @@ static bool translateRMRegister(MCInst &mcInst,
     return true;
 #define ENTRY(x)                                                      \
   case EA_REG_##x:                                                    \
-    mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
+    mcInst.addOperand(MCOperand::createReg(X86::x)); break;
   ALL_REGS
 #undef ENTRY
   }
@@ -516,12 +650,12 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
         return true;
 #define ENTRY(x)                                          \
       case SIB_BASE_##x:                                  \
-        baseReg = MCOperand::CreateReg(X86::x); break;
+        baseReg = MCOperand::createReg(X86::x); break;
       ALL_SIB_BASES
 #undef ENTRY
       }
     } else {
-      baseReg = MCOperand::CreateReg(0);
+      baseReg = MCOperand::createReg(0);
     }
 
     // Check whether we are handling VSIB addressing mode for GATHER.
@@ -571,7 +705,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
         return true;
 #define ENTRY(x)                                          \
       case SIB_INDEX_##x:                                 \
-        indexReg = MCOperand::CreateReg(X86::x); break;
+        indexReg = MCOperand::createReg(X86::x); break;
       EA_BASES_32BIT
       EA_BASES_64BIT
       REGS_XMM
@@ -580,10 +714,10 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
 #undef ENTRY
       }
     } else {
-      indexReg = MCOperand::CreateReg(0);
+      indexReg = MCOperand::createReg(0);
     }
 
-    scaleAmount = MCOperand::CreateImm(insn.sibScale);
+    scaleAmount = MCOperand::createImm(insn.sibScale);
   } else {
     switch (insn.eaBase) {
     case EA_BASE_NONE:
@@ -597,31 +731,31 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
         tryAddingPcLoadReferenceComment(insn.startLocation +
                                         insn.displacementOffset,
                                         insn.displacement + pcrel, Dis);
-        baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
+        baseReg = MCOperand::createReg(X86::RIP); // Section 2.2.1.6
       }
       else
-        baseReg = MCOperand::CreateReg(0);
+        baseReg = MCOperand::createReg(0);
 
-      indexReg = MCOperand::CreateReg(0);
+      indexReg = MCOperand::createReg(0);
       break;
     case EA_BASE_BX_SI:
-      baseReg = MCOperand::CreateReg(X86::BX);
-      indexReg = MCOperand::CreateReg(X86::SI);
+      baseReg = MCOperand::createReg(X86::BX);
+      indexReg = MCOperand::createReg(X86::SI);
       break;
     case EA_BASE_BX_DI:
-      baseReg = MCOperand::CreateReg(X86::BX);
-      indexReg = MCOperand::CreateReg(X86::DI);
+      baseReg = MCOperand::createReg(X86::BX);
+      indexReg = MCOperand::createReg(X86::DI);
       break;
     case EA_BASE_BP_SI:
-      baseReg = MCOperand::CreateReg(X86::BP);
-      indexReg = MCOperand::CreateReg(X86::SI);
+      baseReg = MCOperand::createReg(X86::BP);
+      indexReg = MCOperand::createReg(X86::SI);
       break;
     case EA_BASE_BP_DI:
-      baseReg = MCOperand::CreateReg(X86::BP);
-      indexReg = MCOperand::CreateReg(X86::DI);
+      baseReg = MCOperand::createReg(X86::BP);
+      indexReg = MCOperand::createReg(X86::DI);
       break;
     default:
-      indexReg = MCOperand::CreateReg(0);
+      indexReg = MCOperand::createReg(0);
       switch (insn.eaBase) {
       default:
         debug("Unexpected eaBase");
@@ -632,7 +766,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
         //   placeholders to keep the compiler happy.
 #define ENTRY(x)                                        \
       case EA_BASE_##x:                                 \
-        baseReg = MCOperand::CreateReg(X86::x); break;
+        baseReg = MCOperand::createReg(X86::x); break;
       ALL_EA_BASES
 #undef ENTRY
 #define ENTRY(x) case EA_REG_##x:
@@ -644,12 +778,12 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
       }
     }
 
-    scaleAmount = MCOperand::CreateImm(1);
+    scaleAmount = MCOperand::createImm(1);
   }
 
-  displacement = MCOperand::CreateImm(insn.displacement);
+  displacement = MCOperand::createImm(insn.displacement);
 
-  segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
+  segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
 
   mcInst.addOperand(baseReg);
   mcInst.addOperand(scaleAmount);
@@ -721,7 +855,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
 /// @param stackPos     - The stack position to translate.
 static void translateFPRegister(MCInst &mcInst,
                                 uint8_t stackPos) {
-  mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
+  mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));
 }
 
 /// translateMaskRegister - Translates a 3-bit mask register number to
@@ -737,7 +871,7 @@ static bool translateMaskRegister(MCInst &mcInst,
     return true;
   }
 
-  mcInst.addOperand(MCOperand::CreateReg(X86::K0 + maskRegNum));
+  mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));
   return false;
 }
 
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index 619a0d4..d990bf3 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -310,11 +310,8 @@ static bool isPrefixAtLocation(struct InternalInstruction* insn,
                                uint8_t prefix,
                                uint64_t location)
 {
-  if (insn->prefixPresent[prefix] == 1 &&
-     insn->prefixLocations[prefix] == location)
-    return true;
-  else
-    return false;
+  return insn->prefixPresent[prefix] == 1 &&
+     insn->prefixLocations[prefix] == location;
 }
 
 /*
@@ -1369,16 +1366,17 @@ static int readModRM(struct InternalInstruction* insn) {
     switch (mod) {
     case 0x0:
       insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
-      switch (rm) {
-      case 0x14:
-      case 0x4:
-      case 0xc:   /* in case REXW.b is set */
+      // In determining whether RIP-relative mode is used (rm=5),
+      // or whether a SIB byte is present (rm=4),
+      // the extension bits (REX.b and EVEX.x) are ignored.
+      switch (rm & 7) {
+      case 0x4: // SIB byte is present
         insn->eaBase = (insn->addressSize == 4 ?
                         EA_BASE_sib : EA_BASE_sib64);
         if (readSIB(insn) || readDisplacement(insn))
           return -1;
         break;
-      case 0x5:
+      case 0x5: // RIP-relative
         insn->eaBase = EA_BASE_NONE;
         insn->eaDisplacement = EA_DISP_32;
         if (readDisplacement(insn))
@@ -1394,10 +1392,8 @@ static int readModRM(struct InternalInstruction* insn) {
       /* FALLTHROUGH */
     case 0x2:
       insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
-      switch (rm) {
-      case 0x14:
-      case 0x4:
-      case 0xc:   /* in case REXW.b is set */
+      switch (rm & 7) {
+      case 0x4: // SIB byte is present
         insn->eaBase = EA_BASE_sib;
         if (readSIB(insn) || readDisplacement(insn))
           return -1;
@@ -1458,6 +1454,8 @@ static int readModRM(struct InternalInstruction* insn) {
     case TYPE_VK1:                                        \
     case TYPE_VK8:                                        \
     case TYPE_VK16:                                       \
+      if (index > 7)                                      \
+        *valid = 0;                                       \
       return prefix##_K0 + index;                         \
     case TYPE_MM64:                                       \
       return prefix##_MM0 + (index & 0x7);                \
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 1f8f9da..9e65050 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -406,6 +406,8 @@ enum OperandEncoding {
   ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
   ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
   ENUM_ENTRY(TYPE_IMM5,       "1-byte immediate operand between 0 and 31")     \
+  ENUM_ENTRY(TYPE_AVX512ICC,  "1-byte immediate operand for AVX512 icmp")      \
+  ENUM_ENTRY(TYPE_UIMM8,      "1-byte unsigned immediate operand")             \
   ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
   ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
   ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
@@ -483,18 +485,6 @@ struct OperandSpecifier {
   uint8_t type;
 };
 
-// Indicates where the opcode modifier (if any) is to be found.  Extended
-// opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
-#define MODIFIER_TYPES        \
-  ENUM_ENTRY(MODIFIER_NONE)
-
-#define ENUM_ENTRY(n) n,
-enum ModifierType {
-  MODIFIER_TYPES
-  MODIFIER_max
-};
-#undef ENUM_ENTRY
-
 static const unsigned X86_MAX_OPERANDS = 6;
 
 /// Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index db69485..af4399a 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -33,15 +33,12 @@ using namespace llvm;
 #define PRINT_ALIAS_INSTR
 #include "X86GenAsmWriter.inc"
 
-void X86ATTInstPrinter::printRegName(raw_ostream &OS,
-                                     unsigned RegNo) const {
-  OS << markup("<reg:")
-     << '%' << getRegisterName(RegNo)
-     << markup(">");
+void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << markup("<reg:") << '%' << getRegisterName(RegNo) << markup(">");
 }
 
 void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                  StringRef Annot) {
+                                  StringRef Annot, const MCSubtargetInfo &STI) {
   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
   uint64_t TSFlags = Desc.TSFlags;
 
@@ -51,7 +48,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
         EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
 
   if (TSFlags & X86II::LOCK)
-    OS << "\tlock\n";
+    OS << "\tlock\t";
 
   // Output CALLpcrel32 as "callq" in 64-bit mode.
   // In Intel annotation it's always emitted as "call".
@@ -60,7 +57,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
   // InstrInfo.td as soon as Requires clause is supported properly
   // for InstAlias.
   if (MI->getOpcode() == X86::CALLpcrel32 &&
-      (getAvailableFeatures() & X86::Mode64Bit) != 0) {
+      (STI.getFeatureBits()[X86::Mode64Bit])) {
     OS << "\tcallq\t";
     printPCRelImm(MI, 0, OS);
   }
@@ -72,7 +69,9 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
   printAnnotation(OS, Annot);
 }
 
-static void printSSEAVXCC(int64_t Imm, raw_ostream &O) {
+void X86ATTInstPrinter::printSSEAVXCC(const MCInst *MI, unsigned Op,
+                                      raw_ostream &O) {
+  int64_t Imm = MI->getOperand(Op).getImm();
   switch (Imm) {
   default: llvm_unreachable("Invalid ssecc/avxcc argument!");
   case    0: O << "eq"; break;
@@ -110,22 +109,24 @@ static void printSSEAVXCC(int64_t Imm, raw_ostream &O) {
   }
 }
 
-void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+void X86ATTInstPrinter::printXOPCC(const MCInst *MI, unsigned Op,
                                    raw_ostream &O) {
   int64_t Imm = MI->getOperand(Op).getImm();
-  assert((Imm & 0x7) == Imm); // Ensure valid immediate.
-  printSSEAVXCC(Imm, O);
-}
-
-void X86ATTInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
-                                   raw_ostream &O) {
-  int64_t Imm = MI->getOperand(Op).getImm();
-  assert((Imm & 0x1f) == Imm); // Ensure valid immediate.
-  printSSEAVXCC(Imm, O);
+  switch (Imm) {
+  default: llvm_unreachable("Invalid xopcc argument!");
+  case 0: O << "lt"; break;
+  case 1: O << "le"; break;
+  case 2: O << "gt"; break;
+  case 3: O << "ge"; break;
+  case 4: O << "eq"; break;
+  case 5: O << "neq"; break;
+  case 6: O << "false"; break;
+  case 7: O << "true"; break;
+  }
 }
 
 void X86ATTInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op,
-                                   raw_ostream &O) {
+                                            raw_ostream &O) {
   int64_t Imm = MI->getOperand(Op).getImm() & 0x3;
   switch (Imm) {
   case 0: O << "{rn-sae}"; break;
@@ -165,8 +166,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     printRegName(O, Op.getReg());
   } else if (Op.isImm()) {
     // Print X86 immediates as signed values.
-    O << markup("<imm:")
-      << '$' << formatImm((int64_t)Op.getImm())
+    O << markup("<imm:") << '$' << formatImm((int64_t)Op.getImm())
       << markup(">");
 
     // If there are no instruction-specific comments, add a comment clarifying
@@ -178,24 +178,22 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
 
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
-    O << markup("<imm:")
-      << '$' << *Op.getExpr()
-      << markup(">");
+    O << markup("<imm:") << '$' << *Op.getExpr() << markup(">");
   }
 }
 
 void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
                                           raw_ostream &O) {
-  const MCOperand &BaseReg  = MI->getOperand(Op+X86::AddrBaseReg);
-  const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
-  const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
-  const MCOperand &SegReg = MI->getOperand(Op+X86::AddrSegmentReg);
+  const MCOperand &BaseReg = MI->getOperand(Op + X86::AddrBaseReg);
+  const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg);
+  const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp);
+  const MCOperand &SegReg = MI->getOperand(Op + X86::AddrSegmentReg);
 
   O << markup("<mem:");
 
   // If this has a segment register, print it.
   if (SegReg.getReg()) {
-    printOperand(MI, Op+X86::AddrSegmentReg, O);
+    printOperand(MI, Op + X86::AddrSegmentReg, O);
     O << ':';
   }
 
@@ -211,16 +209,14 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
   if (IndexReg.getReg() || BaseReg.getReg()) {
     O << '(';
     if (BaseReg.getReg())
-      printOperand(MI, Op+X86::AddrBaseReg, O);
+      printOperand(MI, Op + X86::AddrBaseReg, O);
 
     if (IndexReg.getReg()) {
       O << ',';
-      printOperand(MI, Op+X86::AddrIndexReg, O);
-      unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
+      printOperand(MI, Op + X86::AddrIndexReg, O);
+      unsigned ScaleVal = MI->getOperand(Op + X86::AddrScaleAmt).getImm();
       if (ScaleVal != 1) {
-        O << ','
-          << markup("<imm:")
-          << ScaleVal // never printed in hex.
+        O << ',' << markup("<imm:") << ScaleVal // never printed in hex.
           << markup(">");
       }
     }
@@ -232,13 +228,13 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
 
 void X86ATTInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
                                     raw_ostream &O) {
-  const MCOperand &SegReg = MI->getOperand(Op+1);
+  const MCOperand &SegReg = MI->getOperand(Op + 1);
 
   O << markup("<mem:");
 
   // If this has a segment register, print it.
   if (SegReg.getReg()) {
-    printOperand(MI, Op+1, O);
+    printOperand(MI, Op + 1, O);
     O << ':';
   }
 
@@ -263,13 +259,13 @@ void X86ATTInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
 void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
                                        raw_ostream &O) {
   const MCOperand &DispSpec = MI->getOperand(Op);
-  const MCOperand &SegReg = MI->getOperand(Op+1);
+  const MCOperand &SegReg = MI->getOperand(Op + 1);
 
   O << markup("<mem:");
 
   // If this has a segment register, print it.
   if (SegReg.getReg()) {
-    printOperand(MI, Op+1, O);
+    printOperand(MI, Op + 1, O);
     O << ':';
   }
 
@@ -282,3 +278,9 @@ void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
 
   O << markup(">");
 }
+
+void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
+                                   raw_ostream &O) {
+  O << markup("<imm:") << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
+    << markup(">");
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 649dc84..62b6b73 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -24,14 +24,12 @@ class MCOperand;
 class X86ATTInstPrinter final : public MCInstPrinter {
 public:
   X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                    const MCRegisterInfo &MRI, const MCSubtargetInfo &STI)
-    : MCInstPrinter(MAI, MII, MRI) {
-    // Initialize the set of available features.
-    setAvailableFeatures(STI.getFeatureBits());
-  }
+                    const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
   // Autogenerated by tblgen, returns true if we successfully printed an
   // alias.
@@ -45,13 +43,14 @@ public:
 
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
   void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
-  void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS);
-  void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void printSSEAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void printXOPCC(const MCInst *MI, unsigned Op, raw_ostream &OS);
   void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
   void printSrcIdx(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
   void printDstIdx(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
   void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
   void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &OS);
 
   void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
     printMemReference(MI, OpNo, O);
@@ -141,7 +140,6 @@ public:
 private:
   bool HasCustomInstComment;
 };
-
 }
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index a4c0ca8..3cad9fa 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -21,6 +21,92 @@
 
 using namespace llvm;
 
+/// \brief Extracts the src/dst types for a given zero extension instruction.
+/// \note While the number of elements in DstVT type correct, the
+/// number in the SrcVT type is expanded to fill the src xmm register and the
+/// upper elements may not be included in the dst xmm/ymm register.
+static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) {
+  switch (MI->getOpcode()) {
+  default:
+    llvm_unreachable("Unknown zero extension instruction");
+  // i8 zero extension
+  case X86::PMOVZXBWrm:
+  case X86::PMOVZXBWrr:
+  case X86::VPMOVZXBWrm:
+  case X86::VPMOVZXBWrr:
+    SrcVT = MVT::v16i8;
+    DstVT = MVT::v8i16;
+    break;
+  case X86::VPMOVZXBWYrm:
+  case X86::VPMOVZXBWYrr:
+    SrcVT = MVT::v16i8;
+    DstVT = MVT::v16i16;
+    break;
+  case X86::PMOVZXBDrm:
+  case X86::PMOVZXBDrr:
+  case X86::VPMOVZXBDrm:
+  case X86::VPMOVZXBDrr:
+    SrcVT = MVT::v16i8;
+    DstVT = MVT::v4i32;
+    break;
+  case X86::VPMOVZXBDYrm:
+  case X86::VPMOVZXBDYrr:
+    SrcVT = MVT::v16i8;
+    DstVT = MVT::v8i32;
+    break;
+  case X86::PMOVZXBQrm:
+  case X86::PMOVZXBQrr:
+  case X86::VPMOVZXBQrm:
+  case X86::VPMOVZXBQrr:
+    SrcVT = MVT::v16i8;
+    DstVT = MVT::v2i64;
+    break;
+  case X86::VPMOVZXBQYrm:
+  case X86::VPMOVZXBQYrr:
+    SrcVT = MVT::v16i8;
+    DstVT = MVT::v4i64;
+    break;
+  // i16 zero extension
+  case X86::PMOVZXWDrm:
+  case X86::PMOVZXWDrr:
+  case X86::VPMOVZXWDrm:
+  case X86::VPMOVZXWDrr:
+    SrcVT = MVT::v8i16;
+    DstVT = MVT::v4i32;
+    break;
+  case X86::VPMOVZXWDYrm:
+  case X86::VPMOVZXWDYrr:
+    SrcVT = MVT::v8i16;
+    DstVT = MVT::v8i32;
+    break;
+  case X86::PMOVZXWQrm:
+  case X86::PMOVZXWQrr:
+  case X86::VPMOVZXWQrm:
+  case X86::VPMOVZXWQrr:
+    SrcVT = MVT::v8i16;
+    DstVT = MVT::v2i64;
+    break;
+  case X86::VPMOVZXWQYrm:
+  case X86::VPMOVZXWQYrr:
+    SrcVT = MVT::v8i16;
+    DstVT = MVT::v4i64;
+    break;
+  // i32 zero extension
+  case X86::PMOVZXDQrm:
+  case X86::PMOVZXDQrr:
+  case X86::VPMOVZXDQrm:
+  case X86::VPMOVZXDQrr:
+    SrcVT = MVT::v4i32;
+    DstVT = MVT::v2i64;
+    break;
+  case X86::VPMOVZXDQYrm:
+  case X86::VPMOVZXDQYrr:
+    SrcVT = MVT::v4i32;
+    DstVT = MVT::v4i64;
+    break;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Top Level Entrypoint
 //===----------------------------------------------------------------------===//
@@ -45,9 +131,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::BLENDPDrmi:
   case X86::VBLENDPDrmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodeBLENDMask(MVT::v2f64,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -56,9 +142,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VBLENDPDYrmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodeBLENDMask(MVT::v4f64,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -70,9 +156,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::BLENDPSrmi:
   case X86::VBLENDPSrmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodeBLENDMask(MVT::v4f32,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -81,9 +167,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VBLENDPSYrmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodeBLENDMask(MVT::v8f32,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -95,9 +181,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::PBLENDWrmi:
   case X86::VPBLENDWrmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodeBLENDMask(MVT::v8i16,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -106,9 +192,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VPBLENDWYrmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodeBLENDMask(MVT::v16i16,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -118,9 +204,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VPBLENDDrmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodeBLENDMask(MVT::v4i32,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -130,9 +216,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VPBLENDDYrmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodeBLENDMask(MVT::v8i32,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -146,8 +232,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VINSERTPSrm:
     DestName = getRegName(MI->getOperand(0).getReg());
     Src1Name = getRegName(MI->getOperand(1).getReg());
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
-      DecodeINSERTPSMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
+      DecodeINSERTPSMask(MI->getOperand(MI->getNumOperands() - 1).getImm(),
                          ShuffleMask);
     break;
 
@@ -203,22 +289,40 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
     break;
 
+  case X86::VMOVDDUPYrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::VMOVDDUPYrm:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVDDUPMask(MVT::v4f64, ShuffleMask);
+    break;
+
+  case X86::MOVDDUPrr:
+  case X86::VMOVDDUPrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::MOVDDUPrm:
+  case X86::VMOVDDUPrm:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVDDUPMask(MVT::v2f64, ShuffleMask);
+    break;
+
   case X86::PSLLDQri:
   case X86::VPSLLDQri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSLLDQMask(MVT::v16i8,
-                       MI->getOperand(MI->getNumOperands()-1).getImm(),
+                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                        ShuffleMask);
     break;
 
   case X86::VPSLLDQYri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSLLDQMask(MVT::v32i8,
-                       MI->getOperand(MI->getNumOperands()-1).getImm(),
+                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                        ShuffleMask);
     break;
 
@@ -226,18 +330,18 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VPSRLDQri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSRLDQMask(MVT::v16i8,
-                       MI->getOperand(MI->getNumOperands()-1).getImm(),
+                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                        ShuffleMask);
     break;
 
   case X86::VPSRLDQYri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSRLDQMask(MVT::v32i8,
-                       MI->getOperand(MI->getNumOperands()-1).getImm(),
+                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                        ShuffleMask);
     break;
 
@@ -249,9 +353,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VPALIGNR128rm:
     Src2Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePALIGNRMask(MVT::v16i8,
-                        MI->getOperand(MI->getNumOperands()-1).getImm(),
+                        MI->getOperand(MI->getNumOperands() - 1).getImm(),
                         ShuffleMask);
     break;
   case X86::VPALIGNR256rr:
@@ -260,9 +364,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VPALIGNR256rm:
     Src2Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePALIGNRMask(MVT::v32i8,
-                        MI->getOperand(MI->getNumOperands()-1).getImm(),
+                        MI->getOperand(MI->getNumOperands() - 1).getImm(),
                         ShuffleMask);
     break;
 
@@ -273,9 +377,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::PSHUFDmi:
   case X86::VPSHUFDmi:
     DestName = getRegName(MI->getOperand(0).getReg());
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSHUFMask(MVT::v4i32,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     break;
   case X86::VPSHUFDYri:
@@ -283,13 +387,12 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::VPSHUFDYmi:
     DestName = getRegName(MI->getOperand(0).getReg());
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSHUFMask(MVT::v8i32,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     break;
 
-
   case X86::PSHUFHWri:
   case X86::VPSHUFHWri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -297,9 +400,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::PSHUFHWmi:
   case X86::VPSHUFHWmi:
     DestName = getRegName(MI->getOperand(0).getReg());
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSHUFHWMask(MVT::v8i16,
-                        MI->getOperand(MI->getNumOperands()-1).getImm(),
+                        MI->getOperand(MI->getNumOperands() - 1).getImm(),
                         ShuffleMask);
     break;
   case X86::VPSHUFHWYri:
@@ -307,9 +410,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::VPSHUFHWYmi:
     DestName = getRegName(MI->getOperand(0).getReg());
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSHUFHWMask(MVT::v16i16,
-                        MI->getOperand(MI->getNumOperands()-1).getImm(),
+                        MI->getOperand(MI->getNumOperands() - 1).getImm(),
                         ShuffleMask);
     break;
   case X86::PSHUFLWri:
@@ -319,9 +422,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::PSHUFLWmi:
   case X86::VPSHUFLWmi:
     DestName = getRegName(MI->getOperand(0).getReg());
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSHUFLWMask(MVT::v8i16,
-                        MI->getOperand(MI->getNumOperands()-1).getImm(),
+                        MI->getOperand(MI->getNumOperands() - 1).getImm(),
                         ShuffleMask);
     break;
   case X86::VPSHUFLWYri:
@@ -329,9 +432,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::VPSHUFLWYmi:
     DestName = getRegName(MI->getOperand(0).getReg());
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSHUFLWMask(MVT::v16i16,
-                        MI->getOperand(MI->getNumOperands()-1).getImm(),
+                        MI->getOperand(MI->getNumOperands() - 1).getImm(),
                         ShuffleMask);
     break;
 
@@ -519,9 +622,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::SHUFPDrmi:
   case X86::VSHUFPDrmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodeSHUFPMask(MVT::v2f64,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -530,9 +633,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VSHUFPDYrmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodeSHUFPMask(MVT::v4f64,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -544,9 +647,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::SHUFPSrmi:
   case X86::VSHUFPSrmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodeSHUFPMask(MVT::v4f32,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -555,9 +658,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VSHUFPSYrmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodeSHUFPMask(MVT::v8f32,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -671,9 +774,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src1Name = getRegName(MI->getOperand(1).getReg());
     // FALL THROUGH.
   case X86::VPERMILPSmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSHUFMask(MVT::v4f32,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -681,9 +784,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src1Name = getRegName(MI->getOperand(1).getReg());
     // FALL THROUGH.
   case X86::VPERMILPSYmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSHUFMask(MVT::v8f32,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -691,9 +794,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src1Name = getRegName(MI->getOperand(1).getReg());
     // FALL THROUGH.
   case X86::VPERMILPDmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSHUFMask(MVT::v2f64,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -701,9 +804,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src1Name = getRegName(MI->getOperand(1).getReg());
     // FALL THROUGH.
   case X86::VPERMILPDYmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodePSHUFMask(MVT::v4f64,
-                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -714,9 +817,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VPERM2F128rm:
   case X86::VPERM2I128rm:
     // For instruction comments purpose, assume the 256-bit vector is v4i64.
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
       DecodeVPERM2X128Mask(MVT::v4i64,
-                           MI->getOperand(MI->getNumOperands()-1).getImm(),
+                           MI->getOperand(MI->getNumOperands() - 1).getImm(),
                            ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -727,11 +830,97 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::VPERMQYmi:
   case X86::VPERMPDYmi:
-    if(MI->getOperand(MI->getNumOperands()-1).isImm())
-      DecodeVPERMMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
+      DecodeVPERMMask(MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
+
+  case X86::MOVSDrr:
+  case X86::VMOVSDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::MOVSDrm:
+  case X86::VMOVSDrm:
+    DecodeScalarMoveMask(MVT::v2f64, nullptr == Src2Name, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::MOVSSrr:
+  case X86::VMOVSSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::MOVSSrm:
+  case X86::VMOVSSrm:
+    DecodeScalarMoveMask(MVT::v4f32, nullptr == Src2Name, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::MOVPQI2QIrr:
+  case X86::MOVZPQILo2PQIrr:
+  case X86::VMOVPQI2QIrr:
+  case X86::VMOVZPQILo2PQIrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+  // FALL THROUGH.
+  case X86::MOVQI2PQIrm:
+  case X86::MOVZQI2PQIrm:
+  case X86::MOVZPQILo2PQIrm:
+  case X86::VMOVQI2PQIrm:
+  case X86::VMOVZQI2PQIrm:
+  case X86::VMOVZPQILo2PQIrm:
+    DecodeZeroMoveLowMask(MVT::v2i64, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::MOVDI2PDIrm:
+  case X86::VMOVDI2PDIrm:
+    DecodeZeroMoveLowMask(MVT::v4i32, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::PMOVZXBWrr:
+  case X86::PMOVZXBDrr:
+  case X86::PMOVZXBQrr:
+  case X86::PMOVZXWDrr:
+  case X86::PMOVZXWQrr:
+  case X86::PMOVZXDQrr:
+  case X86::VPMOVZXBWrr:
+  case X86::VPMOVZXBDrr:
+  case X86::VPMOVZXBQrr:
+  case X86::VPMOVZXWDrr:
+  case X86::VPMOVZXWQrr:
+  case X86::VPMOVZXDQrr:
+  case X86::VPMOVZXBWYrr:
+  case X86::VPMOVZXBDYrr:
+  case X86::VPMOVZXBQYrr:
+  case X86::VPMOVZXWDYrr:
+  case X86::VPMOVZXWQYrr:
+  case X86::VPMOVZXDQYrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+  // FALL THROUGH.
+  case X86::PMOVZXBWrm:
+  case X86::PMOVZXBDrm:
+  case X86::PMOVZXBQrm:
+  case X86::PMOVZXWDrm:
+  case X86::PMOVZXWQrm:
+  case X86::PMOVZXDQrm:
+  case X86::VPMOVZXBWrm:
+  case X86::VPMOVZXBDrm:
+  case X86::VPMOVZXBQrm:
+  case X86::VPMOVZXWDrm:
+  case X86::VPMOVZXWQrm:
+  case X86::VPMOVZXDQrm:
+  case X86::VPMOVZXBWYrm:
+  case X86::VPMOVZXBDYrm:
+  case X86::VPMOVZXBQYrm:
+  case X86::VPMOVZXWDYrm:
+  case X86::VPMOVZXWQYrm:
+  case X86::VPMOVZXDQYrm: {
+    MVT SrcVT, DstVT;
+    getZeroExtensionTypes(MI, SrcVT, DstVT);
+    DecodeZeroExtendMask(SrcVT, DstVT, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+  } break;
   }
 
   // The only comments we decode are shuffles, so give up if we were unable to
@@ -747,7 +936,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   if (Src1Name == Src2Name) {
     for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
       if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
-          ShuffleMask[i] >= (int)e)        // From second mask.
+          ShuffleMask[i] >= (int)e)   // From second mask.
         ShuffleMask[i] -= e;
     }
   }
@@ -782,7 +971,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
       ++i;
     }
     OS << ']';
-    --i;  // For loop increments element #.
+    --i; // For loop increments element #.
   }
   //MI->print(OS, 0);
   OS << "\n";
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 449445d..4d92daf 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -33,7 +33,8 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                    StringRef Annot) {
+                                    StringRef Annot,
+                                    const MCSubtargetInfo &STI) {
   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
   uint64_t TSFlags = Desc.TSFlags;
 
@@ -50,7 +51,9 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
     EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
 }
 
-static void printSSEAVXCC(int64_t Imm, raw_ostream &O) {
+void X86IntelInstPrinter::printSSEAVXCC(const MCInst *MI, unsigned Op,
+                                        raw_ostream &O) {
+  int64_t Imm = MI->getOperand(Op).getImm();
   switch (Imm) {
   default: llvm_unreachable("Invalid avxcc argument!");
   case    0: O << "eq"; break;
@@ -88,22 +91,24 @@ static void printSSEAVXCC(int64_t Imm, raw_ostream &O) {
   }
 }
 
-void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
-                                     raw_ostream &O) {
-  int64_t Imm = MI->getOperand(Op).getImm();
-  assert((Imm & 0x7) == Imm); // Ensure valid immediate.
-  printSSEAVXCC(Imm, O);
-}
-
-void X86IntelInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
+void X86IntelInstPrinter::printXOPCC(const MCInst *MI, unsigned Op,
                                      raw_ostream &O) {
   int64_t Imm = MI->getOperand(Op).getImm();
-  assert((Imm & 0x1f) == Imm); // Ensure valid immediate.
-  printSSEAVXCC(Imm, O);
+  switch (Imm) {
+  default: llvm_unreachable("Invalid xopcc argument!");
+  case 0: O << "lt"; break;
+  case 1: O << "le"; break;
+  case 2: O << "gt"; break;
+  case 3: O << "ge"; break;
+  case 4: O << "eq"; break;
+  case 5: O << "neq"; break;
+  case 6: O << "false"; break;
+  case 7: O << "true"; break;
+  }
 }
 
 void X86IntelInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op,
-                                   raw_ostream &O) {
+                                               raw_ostream &O) {
   int64_t Imm = MI->getOperand(Op).getImm() & 0x3;
   switch (Imm) {
   case 0: O << "{rn-sae}"; break;
@@ -245,3 +250,8 @@ void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
 
   O << ']';
 }
+
+void X86IntelInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
+                                     raw_ostream &O) {
+  O << formatImm(MI->getOperand(Op).getImm() & 0xff);
+}
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 641423d..6e371da 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -28,7 +28,8 @@ public:
     : MCInstPrinter(MAI, MII, MRI) {}
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
@@ -36,13 +37,14 @@ public:
 
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
-  void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
-  void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void printSSEAVXCC(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void printXOPCC(const MCInst *MI, unsigned Op, raw_ostream &O);
   void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printSrcIdx(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printDstIdx(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &O);
 
   void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
     printMemReference(MI, OpNo, O);
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 164b419..6d4284d 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -14,8 +14,10 @@
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
@@ -76,8 +78,8 @@ class X86AsmBackend : public MCAsmBackend {
   bool HasNopl;
   const uint64_t MaxNopLength;
 public:
-  X86AsmBackend(const Target &T, StringRef _CPU)
-    : MCAsmBackend(), CPU(_CPU), MaxNopLength(_CPU == "slm" ? 7 : 15) {
+  X86AsmBackend(const Target &T, StringRef CPU)
+      : MCAsmBackend(), CPU(CPU), MaxNopLength(CPU == "slm" ? 7 : 15) {
     HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
               CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
               CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
@@ -351,8 +353,8 @@ namespace {
 class ELFX86AsmBackend : public X86AsmBackend {
 public:
   uint8_t OSABI;
-  ELFX86AsmBackend(const Target &T, uint8_t _OSABI, StringRef CPU)
-      : X86AsmBackend(T, CPU), OSABI(_OSABI) {}
+  ELFX86AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+      : X86AsmBackend(T, CPU), OSABI(OSABI) {}
 };
 
 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
@@ -360,7 +362,7 @@ public:
   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
     : ELFX86AsmBackend(T, OSABI, CPU) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_386);
   }
 };
@@ -370,7 +372,7 @@ public:
   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
       : ELFX86AsmBackend(T, OSABI, CPU) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI,
                                     ELF::EM_X86_64);
   }
@@ -381,7 +383,7 @@ public:
   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
     : ELFX86AsmBackend(T, OSABI, CPU) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, ELF::EM_X86_64);
   }
 };
@@ -395,7 +397,7 @@ public:
     , Is64Bit(is64Bit) {
   }
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86WinCOFFObjectWriter(OS, Is64Bit);
   }
 };
@@ -752,7 +754,7 @@ public:
                          StringRef CPU)
       : DarwinX86AsmBackend(T, MRI, CPU, false) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
                                      MachO::CPU_TYPE_I386,
                                      MachO::CPU_SUBTYPE_I386_ALL);
@@ -772,24 +774,11 @@ public:
                          StringRef CPU, MachO::CPUSubTypeX86 st)
       : DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
                                      MachO::CPU_TYPE_X86_64, Subtype);
   }
 
-  bool doesSectionRequireSymbols(const MCSection &Section) const override {
-    // Temporary labels in the string literals sections require symbols. The
-    // issue is that the x86_64 relocation format does not allow symbol +
-    // offset, and so the linker does not have enough information to resolve the
-    // access to the appropriate atom unless an external relocation is used. For
-    // non-cstring sections, we expect the compiler to use a non-temporary label
-    // for anything that could have an addend pointing outside the symbol.
-    //
-    // See <rdar://problem/4765733>.
-    const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
-    return SMO.getType() == MachO::S_CSTRING_LITERALS;
-  }
-
   /// \brief Generate the compact unwind encoding for the CFI instructions.
   uint32_t generateCompactUnwindEncoding(
                              ArrayRef<MCCFIInstruction> Instrs) const override {
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 28be15b..85b0006 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -213,7 +213,11 @@ namespace X86II {
     /// the offset from beginning of section.
     ///
     /// This is the TLS offset for the COFF/Windows TLS mechanism.
-    MO_SECREL
+    MO_SECREL,
+
+    /// MO_NOPREFIX - On a symbol operand this indicates that the symbol should
+    /// not be mangled with a prefix.
+    MO_NOPREFIX,
   };
 
   enum : uint64_t {
@@ -302,19 +306,21 @@ namespace X86II {
 
     //// MRM_XX - A mod/rm byte of exactly 0xXX.
     MRM_C0 = 32, MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35,
-    MRM_C4 = 36, MRM_C8 = 37, MRM_C9 = 38, MRM_CA = 39,
-    MRM_CB = 40, MRM_CF = 41, MRM_D0 = 42, MRM_D1 = 43,
-    MRM_D4 = 44, MRM_D5 = 45, MRM_D6 = 46, MRM_D7 = 47,
-    MRM_D8 = 48, MRM_D9 = 49, MRM_DA = 50, MRM_DB = 51,
-    MRM_DC = 52, MRM_DD = 53, MRM_DE = 54, MRM_DF = 55,
-    MRM_E0 = 56, MRM_E1 = 57, MRM_E2 = 58, MRM_E3 = 59,
-    MRM_E4 = 60, MRM_E5 = 61, MRM_E8 = 62, MRM_E9 = 63,
-    MRM_EA = 64, MRM_EB = 65, MRM_EC = 66, MRM_ED = 67,
-    MRM_EE = 68, MRM_F0 = 69, MRM_F1 = 70, MRM_F2 = 71,
-    MRM_F3 = 72, MRM_F4 = 73, MRM_F5 = 74, MRM_F6 = 75,
-    MRM_F7 = 76, MRM_F8 = 77, MRM_F9 = 78, MRM_FA = 79,
-    MRM_FB = 80, MRM_FC = 81, MRM_FD = 82, MRM_FE = 83,
-    MRM_FF = 84,
+    MRM_C4 = 36, MRM_C5 = 37, MRM_C6 = 38, MRM_C7 = 39,
+    MRM_C8 = 40, MRM_C9 = 41, MRM_CA = 42, MRM_CB = 43,
+    MRM_CC = 44, MRM_CD = 45, MRM_CE = 46, MRM_CF = 47,
+    MRM_D0 = 48, MRM_D1 = 49, MRM_D2 = 50, MRM_D3 = 51,
+    MRM_D4 = 52, MRM_D5 = 53, MRM_D6 = 54, MRM_D7 = 55,
+    MRM_D8 = 56, MRM_D9 = 57, MRM_DA = 58, MRM_DB = 59,
+    MRM_DC = 60, MRM_DD = 61, MRM_DE = 62, MRM_DF = 63,
+    MRM_E0 = 64, MRM_E1 = 65, MRM_E2 = 66, MRM_E3 = 67,
+    MRM_E4 = 68, MRM_E5 = 69, MRM_E6 = 70, MRM_E7 = 71,
+    MRM_E8 = 72, MRM_E9 = 73, MRM_EA = 74, MRM_EB = 75,
+    MRM_EC = 76, MRM_ED = 77, MRM_EE = 78, MRM_EF = 79,
+    MRM_F0 = 80, MRM_F1 = 81, MRM_F2 = 82, MRM_F3 = 83,
+    MRM_F4 = 84, MRM_F5 = 85, MRM_F6 = 86, MRM_F7 = 87,
+    MRM_F8 = 88, MRM_F9 = 89, MRM_FA = 90, MRM_FB = 91,
+    MRM_FC = 92, MRM_FD = 93, MRM_FE = 94, MRM_FF = 95,
 
     FormMask       = 127,
 
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index e8b0b4c..4508883 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -22,7 +22,8 @@ namespace {
   public:
     X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine);
 
-    virtual ~X86ELFObjectWriter();
+    ~X86ELFObjectWriter() override;
+
   protected:
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
@@ -38,236 +39,218 @@ X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
 X86ELFObjectWriter::~X86ELFObjectWriter()
 {}
 
-unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
-                                          const MCFixup &Fixup,
-                                          bool IsPCRel) const {
-  // determine the type of the relocation
-
-  MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
-  unsigned Type;
-  if (getEMachine() == ELF::EM_X86_64) {
-    if (IsPCRel) {
-      switch ((unsigned)Fixup.getKind()) {
-      default: llvm_unreachable("invalid fixup kind!");
+enum X86_64RelType { RT64_64, RT64_32, RT64_32S, RT64_16, RT64_8 };
 
-      case FK_Data_8: Type = ELF::R_X86_64_PC64; break;
-      case FK_Data_4: Type = ELF::R_X86_64_PC32; break;
-      case FK_Data_2: Type = ELF::R_X86_64_PC16; break;
-      case FK_Data_1: Type = ELF::R_X86_64_PC8; break;
+static X86_64RelType getType64(unsigned Kind,
+                               MCSymbolRefExpr::VariantKind &Modifier,
+                               bool &IsPCRel) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unimplemented");
+  case X86::reloc_global_offset_table8:
+    Modifier = MCSymbolRefExpr::VK_GOT;
+    IsPCRel = true;
+    return RT64_64;
+  case FK_Data_8:
+    return RT64_64;
+  case X86::reloc_signed_4byte:
+    if (Modifier == MCSymbolRefExpr::VK_None && !IsPCRel)
+      return RT64_32S;
+    return RT64_32;
+  case X86::reloc_global_offset_table:
+    Modifier = MCSymbolRefExpr::VK_GOT;
+    IsPCRel = true;
+    return RT64_32;
+  case FK_Data_4:
+  case FK_PCRel_4:
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+    return RT64_32;
+  case FK_Data_2:
+    return RT64_16;
+  case FK_PCRel_1:
+  case FK_Data_1:
+    return RT64_8;
+  }
+}
 
-      case FK_PCRel_8:
-        assert(Modifier == MCSymbolRefExpr::VK_None);
-        Type = ELF::R_X86_64_PC64;
-        break;
-      case X86::reloc_signed_4byte:
-      case X86::reloc_riprel_4byte_movq_load:
-      case X86::reloc_riprel_4byte:
-      case FK_PCRel_4:
-        switch (Modifier) {
-        default:
-          llvm_unreachable("Unimplemented");
-        case MCSymbolRefExpr::VK_None:
-          Type = ELF::R_X86_64_PC32;
-          break;
-        case MCSymbolRefExpr::VK_PLT:
-          Type = ELF::R_X86_64_PLT32;
-          break;
-        case MCSymbolRefExpr::VK_GOTPCREL:
-          Type = ELF::R_X86_64_GOTPCREL;
-          break;
-        case MCSymbolRefExpr::VK_GOTTPOFF:
-          Type = ELF::R_X86_64_GOTTPOFF;
-          break;
-        case MCSymbolRefExpr::VK_TLSGD:
-          Type = ELF::R_X86_64_TLSGD;
-          break;
-        case MCSymbolRefExpr::VK_TLSLD:
-          Type = ELF::R_X86_64_TLSLD;
-          break;
-        }
-        break;
-      case FK_PCRel_2:
-        assert(Modifier == MCSymbolRefExpr::VK_None);
-        Type = ELF::R_X86_64_PC16;
-        break;
-      case FK_PCRel_1:
-        assert(Modifier == MCSymbolRefExpr::VK_None);
-        Type = ELF::R_X86_64_PC8;
-        break;
-      }
-    } else {
-      switch ((unsigned)Fixup.getKind()) {
-      default: llvm_unreachable("invalid fixup kind!");
-      case X86::reloc_global_offset_table8:
-        Type = ELF::R_X86_64_GOTPC64;
-        break;
-      case X86::reloc_global_offset_table:
-        Type = ELF::R_X86_64_GOTPC32;
-        break;
-      case FK_Data_8:
-        switch (Modifier) {
-        default:
-          llvm_unreachable("Unimplemented");
-        case MCSymbolRefExpr::VK_None:
-          Type = ELF::R_X86_64_64;
-          break;
-        case MCSymbolRefExpr::VK_GOT:
-          Type = ELF::R_X86_64_GOT64;
-          break;
-        case MCSymbolRefExpr::VK_GOTOFF:
-          Type = ELF::R_X86_64_GOTOFF64;
-          break;
-        case MCSymbolRefExpr::VK_TPOFF:
-          Type = ELF::R_X86_64_TPOFF64;
-          break;
-        case MCSymbolRefExpr::VK_DTPOFF:
-          Type = ELF::R_X86_64_DTPOFF64;
-          break;
-        }
-        break;
-      case X86::reloc_signed_4byte:
-        switch (Modifier) {
-        default:
-          llvm_unreachable("Unimplemented");
-        case MCSymbolRefExpr::VK_None:
-          Type = ELF::R_X86_64_32S;
-          break;
-        case MCSymbolRefExpr::VK_GOT:
-          Type = ELF::R_X86_64_GOT32;
-          break;
-        case MCSymbolRefExpr::VK_GOTPCREL:
-          Type = ELF::R_X86_64_GOTPCREL;
-          break;
-        case MCSymbolRefExpr::VK_TPOFF:
-          Type = ELF::R_X86_64_TPOFF32;
-          break;
-        case MCSymbolRefExpr::VK_DTPOFF:
-          Type = ELF::R_X86_64_DTPOFF32;
-          break;
-        }
-        break;
-      case FK_Data_4:
-        Type = ELF::R_X86_64_32;
-        break;
-      case FK_Data_2: Type = ELF::R_X86_64_16; break;
-      case FK_PCRel_1:
-      case FK_Data_1: Type = ELF::R_X86_64_8; break;
-      }
+static unsigned getRelocType64(MCSymbolRefExpr::VariantKind Modifier,
+                               X86_64RelType Type, bool IsPCRel) {
+  switch (Modifier) {
+  default:
+    llvm_unreachable("Unimplemented");
+  case MCSymbolRefExpr::VK_None:
+    switch (Type) {
+    case RT64_64:
+      return IsPCRel ? ELF::R_X86_64_PC64 : ELF::R_X86_64_64;
+    case RT64_32:
+      return IsPCRel ? ELF::R_X86_64_PC32 : ELF::R_X86_64_32;
+    case RT64_32S:
+      return ELF::R_X86_64_32S;
+    case RT64_16:
+      return IsPCRel ? ELF::R_X86_64_PC16 : ELF::R_X86_64_16;
+    case RT64_8:
+      return IsPCRel ? ELF::R_X86_64_PC8 : ELF::R_X86_64_8;
     }
-  } else if (getEMachine() == ELF::EM_386) {
-    if (IsPCRel) {
-      switch ((unsigned)Fixup.getKind()) {
-      default: llvm_unreachable("invalid fixup kind!");
-
-      case X86::reloc_global_offset_table:
-        Type = ELF::R_386_GOTPC;
-        break;
-
-      case FK_PCRel_1:
-      case FK_Data_1:
-        switch (Modifier) {
-        default:
-          llvm_unreachable("Unimplemented");
-        case MCSymbolRefExpr::VK_None:
-          Type = ELF::R_386_PC8;
-          break;
-        }
-        break;
-
-      case FK_PCRel_2:
-      case FK_Data_2:
-        switch (Modifier) {
-        default:
-          llvm_unreachable("Unimplemented");
-        case MCSymbolRefExpr::VK_None:
-          Type = ELF::R_386_PC16;
-          break;
-        }
-        break;
+  case MCSymbolRefExpr::VK_GOT:
+    switch (Type) {
+    case RT64_64:
+      return IsPCRel ? ELF::R_X86_64_GOTPC64 : ELF::R_X86_64_GOT64;
+    case RT64_32:
+      return IsPCRel ? ELF::R_X86_64_GOTPC32 : ELF::R_X86_64_GOT32;
+    case RT64_32S:
+    case RT64_16:
+    case RT64_8:
+      llvm_unreachable("Unimplemented");
+    }
+  case MCSymbolRefExpr::VK_GOTOFF:
+    assert(Type == RT64_64);
+    assert(!IsPCRel);
+    return ELF::R_X86_64_GOTOFF64;
+  case MCSymbolRefExpr::VK_TPOFF:
+    assert(!IsPCRel);
+    switch (Type) {
+    case RT64_64:
+      return ELF::R_X86_64_TPOFF64;
+    case RT64_32:
+      return ELF::R_X86_64_TPOFF32;
+    case RT64_32S:
+    case RT64_16:
+    case RT64_8:
+      llvm_unreachable("Unimplemented");
+    }
+  case MCSymbolRefExpr::VK_DTPOFF:
+    assert(!IsPCRel);
+    switch (Type) {
+    case RT64_64:
+      return ELF::R_X86_64_DTPOFF64;
+    case RT64_32:
+      return ELF::R_X86_64_DTPOFF32;
+    case RT64_32S:
+    case RT64_16:
+    case RT64_8:
+      llvm_unreachable("Unimplemented");
+    }
+  case MCSymbolRefExpr::VK_SIZE:
+    assert(!IsPCRel);
+    switch (Type) {
+    case RT64_64:
+      return ELF::R_X86_64_SIZE64;
+    case RT64_32:
+      return ELF::R_X86_64_SIZE32;
+    case RT64_32S:
+    case RT64_16:
+    case RT64_8:
+      llvm_unreachable("Unimplemented");
+    }
+  case MCSymbolRefExpr::VK_TLSGD:
+    assert(Type == RT64_32);
+    return ELF::R_X86_64_TLSGD;
+  case MCSymbolRefExpr::VK_GOTTPOFF:
+    assert(Type == RT64_32);
+    return ELF::R_X86_64_GOTTPOFF;
+  case MCSymbolRefExpr::VK_TLSLD:
+    assert(Type == RT64_32);
+    return ELF::R_X86_64_TLSLD;
+  case MCSymbolRefExpr::VK_PLT:
+    assert(Type == RT64_32);
+    return ELF::R_X86_64_PLT32;
+  case MCSymbolRefExpr::VK_GOTPCREL:
+    assert(Type == RT64_32);
+    return ELF::R_X86_64_GOTPCREL;
+  }
+}
 
-      case X86::reloc_signed_4byte:
-      case FK_PCRel_4:
-      case FK_Data_4:
-        switch (Modifier) {
-        default:
-          llvm_unreachable("Unimplemented");
-        case MCSymbolRefExpr::VK_None:
-          Type = ELF::R_386_PC32;
-          break;
-        case MCSymbolRefExpr::VK_PLT:
-          Type = ELF::R_386_PLT32;
-          break;
-        }
-        break;
-      }
-    } else {
-      switch ((unsigned)Fixup.getKind()) {
-      default: llvm_unreachable("invalid fixup kind!");
+enum X86_32RelType { RT32_32, RT32_16, RT32_8 };
 
-      case X86::reloc_global_offset_table:
-        Type = ELF::R_386_GOTPC;
-        break;
+static X86_32RelType getType32(X86_64RelType T) {
+  switch (T) {
+  case RT64_64:
+    llvm_unreachable("Unimplemented");
+  case RT64_32:
+  case RT64_32S:
+    return RT32_32;
+  case RT64_16:
+    return RT32_16;
+  case RT64_8:
+    return RT32_8;
+  }
+  llvm_unreachable("unexpected relocation type!");
+}
 
-      // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode
-      // instead?
-      case X86::reloc_signed_4byte:
-      case FK_PCRel_4:
-      case FK_Data_4:
-        switch (Modifier) {
-        default:
-          llvm_unreachable("Unimplemented");
-        case MCSymbolRefExpr::VK_None:
-          Type = ELF::R_386_32;
-          break;
-        case MCSymbolRefExpr::VK_GOT:
-          Type = ELF::R_386_GOT32;
-          break;
-        case MCSymbolRefExpr::VK_PLT:
-          Type = ELF::R_386_PLT32;
-          break;
-        case MCSymbolRefExpr::VK_GOTOFF:
-          Type = ELF::R_386_GOTOFF;
-          break;
-        case MCSymbolRefExpr::VK_TLSGD:
-          Type = ELF::R_386_TLS_GD;
-          break;
-        case MCSymbolRefExpr::VK_TPOFF:
-          Type = ELF::R_386_TLS_LE_32;
-          break;
-        case MCSymbolRefExpr::VK_INDNTPOFF:
-          Type = ELF::R_386_TLS_IE;
-          break;
-        case MCSymbolRefExpr::VK_NTPOFF:
-          Type = ELF::R_386_TLS_LE;
-          break;
-        case MCSymbolRefExpr::VK_GOTNTPOFF:
-          Type = ELF::R_386_TLS_GOTIE;
-          break;
-        case MCSymbolRefExpr::VK_TLSLDM:
-          Type = ELF::R_386_TLS_LDM;
-          break;
-        case MCSymbolRefExpr::VK_DTPOFF:
-          Type = ELF::R_386_TLS_LDO_32;
-          break;
-        case MCSymbolRefExpr::VK_GOTTPOFF:
-          Type = ELF::R_386_TLS_IE_32;
-          break;
-        }
-        break;
-      case FK_Data_2: Type = ELF::R_386_16; break;
-      case FK_PCRel_1:
-      case FK_Data_1: Type = ELF::R_386_8; break;
-      }
+static unsigned getRelocType32(MCSymbolRefExpr::VariantKind Modifier,
+                               X86_32RelType Type, bool IsPCRel) {
+  switch (Modifier) {
+  default:
+    llvm_unreachable("Unimplemented");
+  case MCSymbolRefExpr::VK_None:
+    switch (Type) {
+    case RT32_32:
+      return IsPCRel ? ELF::R_386_PC32 : ELF::R_386_32;
+    case RT32_16:
+      return IsPCRel ? ELF::R_386_PC16 : ELF::R_386_16;
+    case RT32_8:
+      return IsPCRel ? ELF::R_386_PC8 : ELF::R_386_8;
     }
-  } else
-    llvm_unreachable("Unsupported ELF machine type.");
+  case MCSymbolRefExpr::VK_GOT:
+    assert(Type == RT32_32);
+    return IsPCRel ? ELF::R_386_GOTPC : ELF::R_386_GOT32;
+  case MCSymbolRefExpr::VK_GOTOFF:
+    assert(Type == RT32_32);
+    assert(!IsPCRel);
+    return ELF::R_386_GOTOFF;
+  case MCSymbolRefExpr::VK_TPOFF:
+    assert(Type == RT32_32);
+    assert(!IsPCRel);
+    return ELF::R_386_TLS_LE_32;
+  case MCSymbolRefExpr::VK_DTPOFF:
+    assert(Type == RT32_32);
+    assert(!IsPCRel);
+    return ELF::R_386_TLS_LDO_32;
+  case MCSymbolRefExpr::VK_TLSGD:
+    assert(Type == RT32_32);
+    assert(!IsPCRel);
+    return ELF::R_386_TLS_GD;
+  case MCSymbolRefExpr::VK_GOTTPOFF:
+    assert(Type == RT32_32);
+    assert(!IsPCRel);
+    return ELF::R_386_TLS_IE_32;
+  case MCSymbolRefExpr::VK_PLT:
+    assert(Type == RT32_32);
+    return ELF::R_386_PLT32;
+  case MCSymbolRefExpr::VK_INDNTPOFF:
+    assert(Type == RT32_32);
+    assert(!IsPCRel);
+    return ELF::R_386_TLS_IE;
+  case MCSymbolRefExpr::VK_NTPOFF:
+    assert(Type == RT32_32);
+    assert(!IsPCRel);
+    return ELF::R_386_TLS_LE;
+  case MCSymbolRefExpr::VK_GOTNTPOFF:
+    assert(Type == RT32_32);
+    assert(!IsPCRel);
+    return ELF::R_386_TLS_GOTIE;
+  case MCSymbolRefExpr::VK_TLSLDM:
+    assert(Type == RT32_32);
+    assert(!IsPCRel);
+    return ELF::R_386_TLS_LDM;
+  }
+}
+
+unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel) const {
+  MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
+  X86_64RelType Type = getType64(Fixup.getKind(), Modifier, IsPCRel);
+  if (getEMachine() == ELF::EM_X86_64)
+    return getRelocType64(Modifier, Type, IsPCRel);
 
-  return Type;
+  assert(getEMachine() == ELF::EM_386 && "Unsupported ELF machine type.");
+  return getRelocType32(Modifier, getType32(Type), IsPCRel);
 }
 
-MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS,
-                                               bool IsELF64,
-                                               uint8_t OSABI,
+MCObjectWriter *llvm::createX86ELFObjectWriter(raw_pwrite_stream &OS,
+                                               bool IsELF64, uint8_t OSABI,
                                                uint16_t EMachine) {
   MCELFObjectTargetWriter *MOTW =
     new X86ELFObjectWriter(IsELF64, OSABI, EMachine);
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index b679316..a39def9 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -34,9 +34,9 @@ public:
     uint64_t  SymSize; SymI->getSize(SymSize);
     int64_t  Addend;  getELFRelocationAddend(Rel, Addend);
 
-    MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName);
+    MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
     // FIXME: check that the value is actually the same.
-    if (Sym->isVariable() == false)
+    if (!Sym->isVariable())
       Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx));
 
     const MCExpr *Expr = nullptr;
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 5795514..bda35f2 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -132,11 +132,10 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
     PrivateLabelPrefix = ".L";
     PointerSize = 8;
     WinEHEncodingType = WinEH::EncodingType::Itanium;
-
-    // Use MSVC-compatible EH data.
-    ExceptionsType = ExceptionHandling::MSVC;
   }
 
+  ExceptionsType = ExceptionHandling::WinEH;
+
   AssemblerDialect = AsmWriterFlavor;
 
   TextAlignFillValue = 0x90;
@@ -155,7 +154,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
     PrivateLabelPrefix = ".L";
     PointerSize = 8;
     WinEHEncodingType = WinEH::EncodingType::Itanium;
-    ExceptionsType = ExceptionHandling::ItaniumWinEH;
+    ExceptionsType = ExceptionHandling::WinEH;
   } else {
     ExceptionsType = ExceptionHandling::DwarfCFI;
   }
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 5d92524..8aed7a4 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -30,8 +30,8 @@ using namespace llvm;
 
 namespace {
 class X86MCCodeEmitter : public MCCodeEmitter {
-  X86MCCodeEmitter(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION;
-  void operator=(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  X86MCCodeEmitter(const X86MCCodeEmitter &) = delete;
+  void operator=(const X86MCCodeEmitter &) = delete;
   const MCInstrInfo &MCII;
   MCContext &Ctx;
 public:
@@ -39,18 +39,18 @@ public:
     : MCII(mcii), Ctx(ctx) {
   }
 
-  ~X86MCCodeEmitter() {}
+  ~X86MCCodeEmitter() override {}
 
   bool is64BitMode(const MCSubtargetInfo &STI) const {
-    return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+    return STI.getFeatureBits()[X86::Mode64Bit];
   }
 
   bool is32BitMode(const MCSubtargetInfo &STI) const {
-    return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
+    return STI.getFeatureBits()[X86::Mode32Bit];
   }
 
   bool is16BitMode(const MCSubtargetInfo &STI) const {
-    return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
+    return STI.getFeatureBits()[X86::Mode16Bit];
   }
 
   /// Is16BitMemOperand - Return true if the specified instruction has
@@ -149,7 +149,7 @@ public:
                         SmallVectorImpl<MCFixup> &Fixups,
                         const MCSubtargetInfo &STI) const;
 
-  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
 
@@ -168,10 +168,8 @@ public:
 
 } // end anonymous namespace
 
-
 MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
                                             const MCRegisterInfo &MRI,
-                                            const MCSubtargetInfo &STI,
                                             MCContext &Ctx) {
   return new X86MCCodeEmitter(MCII, Ctx);
 }
@@ -357,7 +355,7 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
                                    Ctx);
 
   // Emit a symbolic constant as a fixup and 4 zeros.
-  Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind, Loc));
+  Fixups.push_back(MCFixup::create(CurByte, Expr, FixupKind, Loc));
   EmitConstant(0, Size, CurByte, OS);
 }
 
@@ -1154,7 +1152,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
 }
 
 void X86MCCodeEmitter::
-EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+encodeInstruction(const MCInst &MI, raw_ostream &OS,
                   SmallVectorImpl<MCFixup> &Fixups,
                   const MCSubtargetInfo &STI) const {
   unsigned Opcode = MI.getOpcode();
@@ -1426,83 +1424,31 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     break;
   }
   case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
-  case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8:
+  case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C5:
+  case X86II::MRM_C6: case X86II::MRM_C7: case X86II::MRM_C8:
   case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB:
+  case X86II::MRM_CC: case X86II::MRM_CD: case X86II::MRM_CE:
   case X86II::MRM_CF: case X86II::MRM_D0: case X86II::MRM_D1:
-  case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
-  case X86II::MRM_D7: case X86II::MRM_D8: case X86II::MRM_D9:
-  case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC:
-  case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF:
-  case X86II::MRM_E0: case X86II::MRM_E1: case X86II::MRM_E2:
-  case X86II::MRM_E3: case X86II::MRM_E4: case X86II::MRM_E5:
-  case X86II::MRM_E8: case X86II::MRM_E9: case X86II::MRM_EA:
-  case X86II::MRM_EB: case X86II::MRM_EC: case X86II::MRM_ED:
-  case X86II::MRM_EE: case X86II::MRM_F0: case X86II::MRM_F1:
-  case X86II::MRM_F2: case X86II::MRM_F3: case X86II::MRM_F4:
-  case X86II::MRM_F5: case X86II::MRM_F6: case X86II::MRM_F7:
-  case X86II::MRM_F8: case X86II::MRM_F9: case X86II::MRM_FA:
-  case X86II::MRM_FB: case X86II::MRM_FC: case X86II::MRM_FD:
-  case X86II::MRM_FE: case X86II::MRM_FF:
+  case X86II::MRM_D2: case X86II::MRM_D3: case X86II::MRM_D4:
+  case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D7:
+  case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA:
+  case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD:
+  case X86II::MRM_DE: case X86II::MRM_DF: case X86II::MRM_E0:
+  case X86II::MRM_E1: case X86II::MRM_E2: case X86II::MRM_E3:
+  case X86II::MRM_E4: case X86II::MRM_E5: case X86II::MRM_E6:
+  case X86II::MRM_E7: case X86II::MRM_E8: case X86II::MRM_E9:
+  case X86II::MRM_EA: case X86II::MRM_EB: case X86II::MRM_EC:
+  case X86II::MRM_ED: case X86II::MRM_EE: case X86II::MRM_EF:
+  case X86II::MRM_F0: case X86II::MRM_F1: case X86II::MRM_F2:
+  case X86II::MRM_F3: case X86II::MRM_F4: case X86II::MRM_F5:
+  case X86II::MRM_F6: case X86II::MRM_F7: case X86II::MRM_F8:
+  case X86II::MRM_F9: case X86II::MRM_FA: case X86II::MRM_FB:
+  case X86II::MRM_FC: case X86II::MRM_FD: case X86II::MRM_FE:
+  case X86II::MRM_FF:
     EmitByte(BaseOpcode, CurByte, OS);
 
-    unsigned char MRM;
-    switch (TSFlags & X86II::FormMask) {
-    default: llvm_unreachable("Invalid Form");
-    case X86II::MRM_C0: MRM = 0xC0; break;
-    case X86II::MRM_C1: MRM = 0xC1; break;
-    case X86II::MRM_C2: MRM = 0xC2; break;
-    case X86II::MRM_C3: MRM = 0xC3; break;
-    case X86II::MRM_C4: MRM = 0xC4; break;
-    case X86II::MRM_C8: MRM = 0xC8; break;
-    case X86II::MRM_C9: MRM = 0xC9; break;
-    case X86II::MRM_CA: MRM = 0xCA; break;
-    case X86II::MRM_CB: MRM = 0xCB; break;
-    case X86II::MRM_CF: MRM = 0xCF; break;
-    case X86II::MRM_D0: MRM = 0xD0; break;
-    case X86II::MRM_D1: MRM = 0xD1; break;
-    case X86II::MRM_D4: MRM = 0xD4; break;
-    case X86II::MRM_D5: MRM = 0xD5; break;
-    case X86II::MRM_D6: MRM = 0xD6; break;
-    case X86II::MRM_D7: MRM = 0xD7; break;
-    case X86II::MRM_D8: MRM = 0xD8; break;
-    case X86II::MRM_D9: MRM = 0xD9; break;
-    case X86II::MRM_DA: MRM = 0xDA; break;
-    case X86II::MRM_DB: MRM = 0xDB; break;
-    case X86II::MRM_DC: MRM = 0xDC; break;
-    case X86II::MRM_DD: MRM = 0xDD; break;
-    case X86II::MRM_DE: MRM = 0xDE; break;
-    case X86II::MRM_DF: MRM = 0xDF; break;
-    case X86II::MRM_E0: MRM = 0xE0; break;
-    case X86II::MRM_E1: MRM = 0xE1; break;
-    case X86II::MRM_E2: MRM = 0xE2; break;
-    case X86II::MRM_E3: MRM = 0xE3; break;
-    case X86II::MRM_E4: MRM = 0xE4; break;
-    case X86II::MRM_E5: MRM = 0xE5; break;
-    case X86II::MRM_E8: MRM = 0xE8; break;
-    case X86II::MRM_E9: MRM = 0xE9; break;
-    case X86II::MRM_EA: MRM = 0xEA; break;
-    case X86II::MRM_EB: MRM = 0xEB; break;
-    case X86II::MRM_EC: MRM = 0xEC; break;
-    case X86II::MRM_ED: MRM = 0xED; break;
-    case X86II::MRM_EE: MRM = 0xEE; break;
-    case X86II::MRM_F0: MRM = 0xF0; break;
-    case X86II::MRM_F1: MRM = 0xF1; break;
-    case X86II::MRM_F2: MRM = 0xF2; break;
-    case X86II::MRM_F3: MRM = 0xF3; break;
-    case X86II::MRM_F4: MRM = 0xF4; break;
-    case X86II::MRM_F5: MRM = 0xF5; break;
-    case X86II::MRM_F6: MRM = 0xF6; break;
-    case X86II::MRM_F7: MRM = 0xF7; break;
-    case X86II::MRM_F8: MRM = 0xF8; break;
-    case X86II::MRM_F9: MRM = 0xF9; break;
-    case X86II::MRM_FA: MRM = 0xFA; break;
-    case X86II::MRM_FB: MRM = 0xFB; break;
-    case X86II::MRM_FC: MRM = 0xFC; break;
-    case X86II::MRM_FD: MRM = 0xFD; break;
-    case X86II::MRM_FE: MRM = 0xFE; break;
-    case X86II::MRM_FF: MRM = 0xFF; break;
-    }
-    EmitByte(MRM, CurByte, OS);
+    uint64_t Form = TSFlags & X86II::FormMask;
+    EmitByte(0xC0 + Form - X86II::MRM_C0, CurByte, OS);
     break;
   }
 
@@ -1529,7 +1475,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
           RegNum |= Val;
         }
       }
-      EmitImmediate(MCOperand::CreateImm(RegNum), MI.getLoc(), 1, FK_Data_1,
+      EmitImmediate(MCOperand::createImm(RegNum), MI.getLoc(), 1, FK_Data_1,
                     CurByte, OS, Fixups);
     } else {
       EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 5a9181d..8e3c721 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -55,148 +55,6 @@ std::string X86_MC::ParseX86Triple(StringRef TT) {
   return FS;
 }
 
-/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
-/// specified arguments.  If we can't run cpuid on the host, return true.
-bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
-                             unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
-  #if defined(__GNUC__)
-    // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
-    asm ("movq\t%%rbx, %%rsi\n\t"
-         "cpuid\n\t"
-         "xchgq\t%%rbx, %%rsi\n\t"
-         : "=a" (*rEAX),
-           "=S" (*rEBX),
-           "=c" (*rECX),
-           "=d" (*rEDX)
-         :  "a" (value));
-    return false;
-  #elif defined(_MSC_VER)
-    int registers[4];
-    __cpuid(registers, value);
-    *rEAX = registers[0];
-    *rEBX = registers[1];
-    *rECX = registers[2];
-    *rEDX = registers[3];
-    return false;
-  #else
-    return true;
-  #endif
-#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
-  #if defined(__GNUC__)
-    asm ("movl\t%%ebx, %%esi\n\t"
-         "cpuid\n\t"
-         "xchgl\t%%ebx, %%esi\n\t"
-         : "=a" (*rEAX),
-           "=S" (*rEBX),
-           "=c" (*rECX),
-           "=d" (*rEDX)
-         :  "a" (value));
-    return false;
-  #elif defined(_MSC_VER)
-    __asm {
-      mov   eax,value
-      cpuid
-      mov   esi,rEAX
-      mov   dword ptr [esi],eax
-      mov   esi,rEBX
-      mov   dword ptr [esi],ebx
-      mov   esi,rECX
-      mov   dword ptr [esi],ecx
-      mov   esi,rEDX
-      mov   dword ptr [esi],edx
-    }
-    return false;
-  #else
-    return true;
-  #endif
-#else
-  return true;
-#endif
-}
-
-/// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the
-/// 4 values in the specified arguments.  If we can't run cpuid on the host,
-/// return true.
-bool X86_MC::GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
-                               unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
-  #if defined(__GNUC__)
-    // gcc desn't know cpuid would clobber ebx/rbx. Preseve it manually.
-    asm ("movq\t%%rbx, %%rsi\n\t"
-         "cpuid\n\t"
-         "xchgq\t%%rbx, %%rsi\n\t"
-         : "=a" (*rEAX),
-           "=S" (*rEBX),
-           "=c" (*rECX),
-           "=d" (*rEDX)
-         :  "a" (value),
-            "c" (subleaf));
-    return false;
-  #elif defined(_MSC_VER)
-    // __cpuidex was added in MSVC++ 9.0 SP1
-    #if (_MSC_VER > 1500) || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729)
-      int registers[4];
-      __cpuidex(registers, value, subleaf);
-      *rEAX = registers[0];
-      *rEBX = registers[1];
-      *rECX = registers[2];
-      *rEDX = registers[3];
-      return false;
-    #else
-      return true;
-    #endif
-  #else
-    return true;
-  #endif
-#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
-  #if defined(__GNUC__)
-    asm ("movl\t%%ebx, %%esi\n\t"
-         "cpuid\n\t"
-         "xchgl\t%%ebx, %%esi\n\t"
-         : "=a" (*rEAX),
-           "=S" (*rEBX),
-           "=c" (*rECX),
-           "=d" (*rEDX)
-         :  "a" (value),
-            "c" (subleaf));
-    return false;
-  #elif defined(_MSC_VER)
-    __asm {
-      mov   eax,value
-      mov   ecx,subleaf
-      cpuid
-      mov   esi,rEAX
-      mov   dword ptr [esi],eax
-      mov   esi,rEBX
-      mov   dword ptr [esi],ebx
-      mov   esi,rECX
-      mov   dword ptr [esi],ecx
-      mov   esi,rEDX
-      mov   dword ptr [esi],edx
-    }
-    return false;
-  #else
-    return true;
-  #endif
-#else
-  return true;
-#endif
-}
-
-void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
-                               unsigned &Model) {
-  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
-  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
-  if (Family == 6 || Family == 0xf) {
-    if (Family == 0xf)
-      // Examine extended family ID if family ID is F.
-      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
-    // Examine extended model ID if family ID is 6 or F.
-    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
-  }
-}
-
 unsigned X86_MC::getDwarfRegFlavour(Triple TT, bool isEH) {
   if (TT.getArch() == Triple::x86_64)
     return DWARFFlavour::X86_64;
@@ -222,7 +80,7 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
   std::string ArchFS = X86_MC::ParseX86Triple(TT);
   if (!FS.empty()) {
     if (!ArchFS.empty())
-      ArchFS = ArchFS + "," + FS.str();
+      ArchFS = (Twine(ArchFS) + "," + FS).str();
     else
       ArchFS = FS;
   }
@@ -345,36 +203,17 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
     // 64-bit JIT places everything in the same buffer except external funcs.
     CM = is64Bit ? CodeModel::Large : CodeModel::Small;
 
-  X->InitMCCodeGenInfo(RM, CM, OL);
+  X->initMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
-                                    MCContext &Ctx, MCAsmBackend &MAB,
-                                    raw_ostream &_OS, MCCodeEmitter *_Emitter,
-                                    const MCSubtargetInfo &STI, bool RelaxAll) {
-  Triple TheTriple(TT);
-
-  switch (TheTriple.getObjectFormat()) {
-  default: llvm_unreachable("unsupported object format");
-  case Triple::MachO:
-    return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
-  case Triple::COFF:
-    assert(TheTriple.isOSWindows() && "only Windows COFF is supported");
-    return createX86WinCOFFStreamer(Ctx, MAB, _Emitter, _OS, RelaxAll);
-  case Triple::ELF:
-    return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
-  }
-}
-
-static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+static MCInstPrinter *createX86MCInstPrinter(const Triple &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI,
                                              const MCInstrInfo &MII,
-                                             const MCRegisterInfo &MRI,
-                                             const MCSubtargetInfo &STI) {
+                                             const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
-    return new X86ATTInstPrinter(MAI, MII, MRI, STI);
+    return new X86ATTInstPrinter(MAI, MII, MRI);
   if (SyntaxVariant == 1)
     return new X86IntelInstPrinter(MAI, MII, MRI);
   return nullptr;
@@ -397,61 +236,42 @@ static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) {
 
 // Force static initialization.
 extern "C" void LLVMInitializeX86TargetMC() {
-  // Register the MC asm info.
-  RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo);
-  RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo);
-
-  // Register the MC codegen info.
-  RegisterMCCodeGenInfoFn C(TheX86_32Target, createX86MCCodeGenInfo);
-  RegisterMCCodeGenInfoFn D(TheX86_64Target, createX86MCCodeGenInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
-  TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
-                                          X86_MC::createX86MCSubtargetInfo);
-  TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
-                                          X86_MC::createX86MCSubtargetInfo);
-
-  // Register the MC instruction analyzer.
-  TargetRegistry::RegisterMCInstrAnalysis(TheX86_32Target,
-                                          createX86MCInstrAnalysis);
-  TargetRegistry::RegisterMCInstrAnalysis(TheX86_64Target,
-                                          createX86MCInstrAnalysis);
-
-  // Register the code emitter.
-  TargetRegistry::RegisterMCCodeEmitter(TheX86_32Target,
-                                        createX86MCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(TheX86_64Target,
-                                        createX86MCCodeEmitter);
+  for (Target *T : {&TheX86_32Target, &TheX86_64Target}) {
+    // Register the MC asm info.
+    RegisterMCAsmInfoFn X(*T, createX86MCAsmInfo);
+
+    // Register the MC codegen info.
+    RegisterMCCodeGenInfoFn Y(*T, createX86MCCodeGenInfo);
+
+    // Register the MC instruction info.
+    TargetRegistry::RegisterMCInstrInfo(*T, createX86MCInstrInfo);
+
+    // Register the MC register info.
+    TargetRegistry::RegisterMCRegInfo(*T, createX86MCRegisterInfo);
+
+    // Register the MC subtarget info.
+    TargetRegistry::RegisterMCSubtargetInfo(*T,
+                                            X86_MC::createX86MCSubtargetInfo);
+
+    // Register the MC instruction analyzer.
+    TargetRegistry::RegisterMCInstrAnalysis(*T, createX86MCInstrAnalysis);
+
+    // Register the code emitter.
+    TargetRegistry::RegisterMCCodeEmitter(*T, createX86MCCodeEmitter);
+
+    // Register the object streamer.
+    TargetRegistry::RegisterCOFFStreamer(*T, createX86WinCOFFStreamer);
+
+    // Register the MCInstPrinter.
+    TargetRegistry::RegisterMCInstPrinter(*T, createX86MCInstPrinter);
+
+    // Register the MC relocation info.
+    TargetRegistry::RegisterMCRelocationInfo(*T, createX86MCRelocationInfo);
+  }
 
   // Register the asm backend.
   TargetRegistry::RegisterMCAsmBackend(TheX86_32Target,
                                        createX86_32AsmBackend);
   TargetRegistry::RegisterMCAsmBackend(TheX86_64Target,
                                        createX86_64AsmBackend);
-
-  // Register the object streamer.
-  TargetRegistry::RegisterMCObjectStreamer(TheX86_32Target,
-                                           createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheX86_64Target,
-                                           createMCStreamer);
-
-  // Register the MCInstPrinter.
-  TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,
-                                        createX86MCInstPrinter);
-  TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,
-                                        createX86MCInstPrinter);
-
-  // Register the MC relocation info.
-  TargetRegistry::RegisterMCRelocationInfo(TheX86_32Target,
-                                           createX86MCRelocationInfo);
-  TargetRegistry::RegisterMCRelocationInfo(TheX86_64Target,
-                                           createX86MCRelocationInfo);
 }
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index d8320b9..dcdae1d 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -31,10 +31,11 @@ class Target;
 class Triple;
 class StringRef;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheX86_32Target, TheX86_64Target;
 
-/// DWARFFlavour - Flavour of dwarf regnumbers
+/// Flavour of dwarf regnumbers
 ///
 namespace DWARFFlavour {
   enum {
@@ -42,7 +43,7 @@ namespace DWARFFlavour {
   };
 }
 
-/// N86 namespace - Native X86 register numbers
+///  Native X86 register numbers
 ///
 namespace N86 {
   enum {
@@ -53,32 +54,18 @@ namespace N86 {
 namespace X86_MC {
   std::string ParseX86Triple(StringRef TT);
 
-  /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
-  /// the specified arguments.  If we can't run cpuid on the host, return true.
-  bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
-                       unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
-  /// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
-  /// the 4 values in the specified arguments.  If we can't run cpuid on the
-  /// host, return true.
-  bool GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
-                       unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
-
-  void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
-
   unsigned getDwarfRegFlavour(Triple TT, bool isEH);
 
   void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
 
-  /// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
-  /// This is exposed so Asm parser, etc. do not need to go through
-  /// TargetRegistry.
+  /// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc.
+  /// do not need to go through TargetRegistry.
   MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
                                             StringRef FS);
 }
 
 MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
                                       const MCRegisterInfo &MRI,
-                                      const MCSubtargetInfo &STI,
                                       MCContext &Ctx);
 
 MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
@@ -86,32 +73,30 @@ MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
 MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
                                      StringRef TT, StringRef CPU);
 
-/// createX86WinCOFFStreamer - Construct an X86 Windows COFF machine code
-/// streamer which will generate PE/COFF format object files.
+/// Construct an X86 Windows COFF machine code streamer which will generate
+/// PE/COFF format object files.
 ///
 /// Takes ownership of \p AB and \p CE.
 MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
-                                     MCCodeEmitter *CE, raw_ostream &OS,
+                                     raw_pwrite_stream &OS, MCCodeEmitter *CE,
                                      bool RelaxAll);
 
-/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
-MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
-                                          bool Is64Bit,
+/// Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                           uint32_t CPUType,
                                           uint32_t CPUSubtype);
 
-/// createX86ELFObjectWriter - Construct an X86 ELF object writer.
-MCObjectWriter *createX86ELFObjectWriter(raw_ostream &OS,
-                                         bool IsELF64,
-                                         uint8_t OSABI,
-                                         uint16_t EMachine);
-/// createX86WinCOFFObjectWriter - Construct an X86 Win COFF object writer.
-MCObjectWriter *createX86WinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
+/// Construct an X86 ELF object writer.
+MCObjectWriter *createX86ELFObjectWriter(raw_pwrite_stream &OS, bool IsELF64,
+                                         uint8_t OSABI, uint16_t EMachine);
+/// Construct an X86 Win COFF object writer.
+MCObjectWriter *createX86WinCOFFObjectWriter(raw_pwrite_stream &OS,
+                                             bool Is64Bit);
 
-/// createX86_64MachORelocationInfo - Construct X86-64 Mach-O relocation info.
+/// Construct X86-64 Mach-O relocation info.
 MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx);
 
-/// createX86_64ELFORelocationInfo - Construct X86-64 ELF relocation info.
+/// Construct X86-64 ELF relocation info.
 MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx);
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index 3b81d53..6cf5af7 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -36,9 +36,9 @@ public:
     any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl());
     bool isPCRel = Obj->getAnyRelocationPCRel(RE);
 
-    MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName);
+    MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
     // FIXME: check that the value is actually the same.
-    if (Sym->isVariable() == false)
+    if (!Sym->isVariable())
       Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx));
     const MCExpr *Expr = nullptr;
 
@@ -92,8 +92,8 @@ public:
         StringRef RSymName;
         RSymI->getName(RSymName);
 
-        MCSymbol *RSym = Ctx.GetOrCreateSymbol(RSymName);
-        if (RSym->isVariable() == false)
+        MCSymbol *RSym = Ctx.getOrCreateSymbol(RSymName);
+        if (!RSym->isVariable())
           RSym->setVariableValue(MCConstantExpr::Create(RSymAddr, Ctx));
 
         const MCExpr *RHS = MCSymbolRefExpr::Create(RSym, Ctx);
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 67b0c89..9da3e1f 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -10,6 +10,7 @@
 #include "MCTargetDesc/X86MCTargetDesc.h"
 #include "MCTargetDesc/X86FixupKinds.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@@ -47,23 +48,21 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter {
                               const MCFixup &Fixup,
                               MCValue Target,
                               uint64_t &FixedValue);
-  void RecordX86_64Relocation(MachObjectWriter *Writer,
-                              const MCAssembler &Asm,
+  void RecordX86_64Relocation(MachObjectWriter *Writer, MCAssembler &Asm,
                               const MCAsmLayout &Layout,
-                              const MCFragment *Fragment,
-                              const MCFixup &Fixup,
-                              MCValue Target,
-                              uint64_t &FixedValue);
+                              const MCFragment *Fragment, const MCFixup &Fixup,
+                              MCValue Target, uint64_t &FixedValue);
+
 public:
   X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
                       uint32_t CPUSubtype)
     : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
                                /*UseAggressiveSymbolFolding=*/Is64Bit) {}
 
-  void RecordRelocation(MachObjectWriter *Writer,
-                        const MCAssembler &Asm, const MCAsmLayout &Layout,
-                        const MCFragment *Fragment, const MCFixup &Fixup,
-                        MCValue Target, uint64_t &FixedValue) override {
+  void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+                        const MCAsmLayout &Layout, const MCFragment *Fragment,
+                        const MCFixup &Fixup, MCValue Target,
+                        uint64_t &FixedValue) override {
     if (Writer->is64Bit())
       RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
                              FixedValue);
@@ -97,13 +96,10 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
   }
 }
 
-void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
-                                                 const MCAssembler &Asm,
-                                                 const MCAsmLayout &Layout,
-                                                 const MCFragment *Fragment,
-                                                 const MCFixup &Fixup,
-                                                 MCValue Target,
-                                                 uint64_t &FixedValue) {
+void X86MachObjectWriter::RecordX86_64Relocation(
+    MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout,
+    const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
+    uint64_t &FixedValue) {
   unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
   unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
   unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
@@ -117,6 +113,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
   unsigned Index = 0;
   unsigned IsExtern = 0;
   unsigned Type = 0;
+  const MCSymbol *RelSymbol = nullptr;
 
   Value = Target.getConstant();
 
@@ -132,7 +129,6 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
   if (Target.isAbsolute()) { // constant
     // SymbolNum of 0 indicates the absolute section.
     Type = MachO::X86_64_RELOC_UNSIGNED;
-    Index = 0;
 
     // FIXME: I believe this is broken, I don't think the linker can understand
     // it. I think it would require a local relocation, but I'm not sure if that
@@ -145,15 +141,15 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
   } else if (Target.getSymB()) { // A - B + constant
     const MCSymbol *A = &Target.getSymA()->getSymbol();
     if (A->isTemporary())
-      A = &A->AliasedSymbol();
+      A = &Writer->findAliasedSymbol(*A);
     const MCSymbolData &A_SD = Asm.getSymbolData(*A);
-    const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+    const MCSymbol *A_Base = Asm.getAtom(*A);
 
     const MCSymbol *B = &Target.getSymB()->getSymbol();
     if (B->isTemporary())
-      B = &B->AliasedSymbol();
+      B = &Writer->findAliasedSymbol(*B);
     const MCSymbolData &B_SD = Asm.getSymbolData(*B);
-    const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
+    const MCSymbol *B_Base = Asm.getAtom(*B);
 
     // Neither symbol can be modified.
     if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
@@ -183,73 +179,64 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
     // non-relocatable expression.
     if (A->isUndefined() || B->isUndefined()) {
       StringRef Name = A->isUndefined() ? A->getName() : B->getName();
-      Asm.getContext().FatalError(Fixup.getLoc(),
+      Asm.getContext().reportFatalError(Fixup.getLoc(),
         "unsupported relocation with subtraction expression, symbol '" +
         Name + "' can not be undefined in a subtraction expression");
     }
 
-    Value += Writer->getSymbolAddress(&A_SD, Layout) -
-      (!A_Base ? 0 : Writer->getSymbolAddress(A_Base, Layout));
-    Value -= Writer->getSymbolAddress(&B_SD, Layout) -
-      (!B_Base ? 0 : Writer->getSymbolAddress(B_Base, Layout));
+    Value += Writer->getSymbolAddress(*A, Layout) -
+             (!A_Base ? 0 : Writer->getSymbolAddress(*A_Base, Layout));
+    Value -= Writer->getSymbolAddress(*B, Layout) -
+             (!B_Base ? 0 : Writer->getSymbolAddress(*B_Base, Layout));
 
-    if (A_Base) {
-      Index = A_Base->getIndex();
-      IsExtern = 1;
-    } else {
+    if (!A_Base)
       Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
-      IsExtern = 0;
-    }
     Type = MachO::X86_64_RELOC_UNSIGNED;
 
     MachO::any_relocation_info MRE;
     MRE.r_word0 = FixupOffset;
-    MRE.r_word1 = ((Index     <<  0) |
-                   (IsPCRel   << 24) |
-                   (Log2Size  << 25) |
-                   (IsExtern  << 27) |
-                   (Type      << 28));
-    Writer->addRelocation(Fragment->getParent(), MRE);
-
-    if (B_Base) {
-      Index = B_Base->getIndex();
-      IsExtern = 1;
-    } else {
+    MRE.r_word1 =
+        (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+    Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
+
+    if (B_Base)
+      RelSymbol = B_Base;
+    else
       Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
-      IsExtern = 0;
-    }
     Type = MachO::X86_64_RELOC_SUBTRACTOR;
   } else {
     const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+    if (Symbol->isTemporary() && Value) {
+      const MCSection &Sec = Symbol->getSection();
+      if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec))
+        Asm.addLocalUsedInReloc(*Symbol);
+    }
     const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
-    const MCSymbolData *Base = Asm.getAtom(&SD);
+    RelSymbol = Asm.getAtom(*Symbol);
 
     // Relocations inside debug sections always use local relocations when
     // possible. This seems to be done because the debugger doesn't fully
     // understand x86_64 relocation entries, and expects to find values that
     // have already been fixed up.
     if (Symbol->isInSection()) {
-      const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
-        Fragment->getParent()->getSection());
+      const MCSectionMachO &Section =
+          static_cast<const MCSectionMachO &>(*Fragment->getParent());
       if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
-        Base = nullptr;
+        RelSymbol = nullptr;
     }
 
     // x86_64 almost always uses external relocations, except when there is no
     // symbol to use as a base address (a local symbol with no preceding
     // non-local symbol).
-    if (Base) {
-      Index = Base->getIndex();
-      IsExtern = 1;
-
+    if (RelSymbol) {
       // Add the local offset, if needed.
-      if (Base != &SD)
-        Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
+      if (RelSymbol != Symbol)
+        Value += Layout.getSymbolOffset(*Symbol) -
+                 Layout.getSymbolOffset(*RelSymbol);
     } else if (Symbol->isInSection() && !Symbol->isVariable()) {
       // The index is the section ordinal (1-based).
       Index = SD.getFragment()->getParent()->getOrdinal() + 1;
-      IsExtern = 0;
-      Value += Writer->getSymbolAddress(&SD, Layout);
+      Value += Writer->getSymbolAddress(*Symbol, Layout);
 
       if (IsPCRel)
         Value -= FixupAddress + (1 << Log2Size);
@@ -347,12 +334,9 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
   // struct relocation_info (8 bytes)
   MachO::any_relocation_info MRE;
   MRE.r_word0 = FixupOffset;
-  MRE.r_word1 = ((Index     <<  0) |
-                 (IsPCRel   << 24) |
-                 (Log2Size  << 25) |
-                 (IsExtern  << 27) |
-                 (Type      << 28));
-  Writer->addRelocation(Fragment->getParent(), MRE);
+  MRE.r_word1 = (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
+                (IsExtern << 27) | (Type << 28);
+  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 }
 
 bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
@@ -377,8 +361,9 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
                        "' can not be undefined in a subtraction expression",
                        false);
 
-  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
-  uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+  uint32_t Value = Writer->getSymbolAddress(*A, Layout);
+  uint64_t SecAddr =
+      Writer->getSectionAddress(A_SD->getFragment()->getParent());
   FixedValue += SecAddr;
   uint32_t Value2 = 0;
 
@@ -397,7 +382,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
     // pedantic compatibility with 'as'.
     Type = A_SD->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF :
       (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF;
-    Value2 = Writer->getSymbolAddress(B_SD, Layout);
+    Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
     FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
   }
 
@@ -409,7 +394,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
     if (FixupOffset > 0xffffff) {
       char Buffer[32];
       format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
-      Asm.getContext().FatalError(Fixup.getLoc(),
+      Asm.getContext().reportFatalError(Fixup.getLoc(),
                          Twine("Section too large, can't encode "
                                 "r_address (") + Buffer +
                          ") into 24 bits of scattered "
@@ -424,7 +409,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
                    (IsPCRel                   << 30) |
                    MachO::R_SCATTERED);
     MRE.r_word1 = Value2;
-    Writer->addRelocation(Fragment->getParent(), MRE);
+    Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
   } else {
     // If the offset is more than 24-bits, it won't fit in a scattered
     // relocation offset field, so we fall back to using a non-scattered
@@ -446,7 +431,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
                  (IsPCRel     << 30) |
                  MachO::R_SCATTERED);
   MRE.r_word1 = Value;
-  Writer->addRelocation(Fragment->getParent(), MRE);
+  Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
   return true;
 }
 
@@ -465,10 +450,6 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
   uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
   unsigned IsPCRel = 0;
 
-  // Get the symbol data.
-  const MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
-  unsigned Index = SD_A->getIndex();
-
   // We're only going to have a second symbol in pic mode and it'll be a
   // subtraction from the picbase. For 32-bit pic the addend is the difference
   // between the picbase and the next address.  For 32-bit static the addend is
@@ -477,11 +458,11 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
     // If this is a subtraction then we're pcrel.
     uint32_t FixupAddress =
       Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
-    const MCSymbolData *SD_B =
-        &Asm.getSymbolData(Target.getSymB()->getSymbol());
     IsPCRel = 1;
-    FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) +
-                  Target.getConstant());
+    FixedValue =
+        FixupAddress -
+        Writer->getSymbolAddress(Target.getSymB()->getSymbol(), Layout) +
+        Target.getConstant();
     FixedValue += 1ULL << Log2Size;
   } else {
     FixedValue = 0;
@@ -490,12 +471,10 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
   // struct relocation_info (8 bytes)
   MachO::any_relocation_info MRE;
   MRE.r_word0 = Value;
-  MRE.r_word1 = ((Index                    <<  0) |
-                 (IsPCRel                  << 24) |
-                 (Log2Size                 << 25) |
-                 (1                        << 27) | // r_extern
-                 (MachO::GENERIC_RELOC_TLV << 28)); // r_type
-  Writer->addRelocation(Fragment->getParent(), MRE);
+  MRE.r_word1 =
+      (IsPCRel << 24) | (Log2Size << 25) | (MachO::GENERIC_RELOC_TLV << 28);
+  Writer->addRelocation(&Target.getSymA()->getSymbol(), Fragment->getParent(),
+                        MRE);
 }
 
 void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
@@ -526,9 +505,9 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
   }
 
   // Get the symbol data, if any.
-  const MCSymbolData *SD = nullptr;
+  const MCSymbol *A = nullptr;
   if (Target.getSymA())
-    SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+    A = &Target.getSymA()->getSymbol();
 
   // If this is an internal relocation with an offset, it also needs a scattered
   // relocation entry.
@@ -538,16 +517,16 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
   // Try to record the scattered relocation if needed. Fall back to non
   // scattered if necessary (see comments in RecordScatteredRelocation()
   // for details).
-  if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD) &&
-      RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
-                                Target, Log2Size, FixedValue))
+  if (Offset && A && !Writer->doesSymbolRequireExternRelocation(*A) &&
+      RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+                                Log2Size, FixedValue))
     return;
 
   // See <reloc.h>.
   uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
   unsigned Index = 0;
-  unsigned IsExtern = 0;
   unsigned Type = 0;
+  const MCSymbol *RelSymbol = nullptr;
 
   if (Target.isAbsolute()) { // constant
     // SymbolNum of 0 indicates the absolute section.
@@ -557,30 +536,28 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
     Type = MachO::GENERIC_RELOC_VANILLA;
   } else {
     // Resolve constant variables.
-    if (SD->getSymbol().isVariable()) {
+    if (A->isVariable()) {
       int64_t Res;
-      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
-            Res, Layout, Writer->getSectionAddressMap())) {
+      if (A->getVariableValue()->EvaluateAsAbsolute(
+              Res, Layout, Writer->getSectionAddressMap())) {
         FixedValue = Res;
         return;
       }
     }
 
     // Check whether we need an external or internal relocation.
-    if (Writer->doesSymbolRequireExternRelocation(SD)) {
-      IsExtern = 1;
-      Index = SD->getIndex();
+    if (Writer->doesSymbolRequireExternRelocation(*A)) {
+      RelSymbol = A;
       // For external relocations, make sure to offset the fixup value to
       // compensate for the addend of the symbol address, if it was
       // undefined. This occurs with weak definitions, for example.
-      if (!SD->getSymbol().isUndefined())
-        FixedValue -= Layout.getSymbolOffset(SD);
+      if (!A->isUndefined())
+        FixedValue -= Layout.getSymbolOffset(*A);
     } else {
       // The index is the section ordinal (1-based).
-      const MCSectionData &SymSD = Asm.getSectionData(
-        SD->getSymbol().getSection());
-      Index = SymSD.getOrdinal() + 1;
-      FixedValue += Writer->getSectionAddress(&SymSD);
+      const MCSection &Sec = A->getSection();
+      Index = Sec.getOrdinal() + 1;
+      FixedValue += Writer->getSectionAddress(&Sec);
     }
     if (IsPCRel)
       FixedValue -= Writer->getSectionAddress(Fragment->getParent());
@@ -591,17 +568,13 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
   // struct relocation_info (8 bytes)
   MachO::any_relocation_info MRE;
   MRE.r_word0 = FixupOffset;
-  MRE.r_word1 = ((Index     <<  0) |
-                 (IsPCRel   << 24) |
-                 (Log2Size  << 25) |
-                 (IsExtern  << 27) |
-                 (Type      << 28));
-  Writer->addRelocation(Fragment->getParent(), MRE);
+  MRE.r_word1 =
+      (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28);
+  Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 }
 
-MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
-                                                bool Is64Bit,
-                                                uint32_t CPUType,
+MCObjectWriter *llvm::createX86MachObjectWriter(raw_pwrite_stream &OS,
+                                                bool Is64Bit, uint32_t CPUType,
                                                 uint32_t CPUSubtype) {
   return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
                                                         CPUType,
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 40af822..bd1bc99 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -25,10 +25,11 @@ namespace {
   class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
   public:
     X86WinCOFFObjectWriter(bool Is64Bit);
-    virtual ~X86WinCOFFObjectWriter();
+    ~X86WinCOFFObjectWriter() override;
 
     unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
-                          bool IsCrossSection) const override;
+                          bool IsCrossSection,
+                          const MCAsmBackend &MAB) const override;
   };
 }
 
@@ -40,7 +41,8 @@ X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {}
 
 unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target,
                                               const MCFixup &Fixup,
-                                              bool IsCrossSection) const {
+                                              bool IsCrossSection,
+                                              const MCAsmBackend &MAB) const {
   unsigned FixupKind = IsCrossSection ? FK_PCRel_4 : Fixup.getKind();
 
   MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
@@ -88,7 +90,7 @@ unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target,
     llvm_unreachable("Unsupported COFF machine type.");
 }
 
-MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_pwrite_stream &OS,
                                                    bool Is64Bit) {
   MCWinCOFFObjectTargetWriter *MOTW = new X86WinCOFFObjectWriter(Is64Bit);
   return createWinCOFFObjectWriter(MOTW, OS);
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 5f1596c..92f42b6 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -18,8 +18,8 @@ class X86WinCOFFStreamer : public MCWinCOFFStreamer {
   Win64EH::UnwindEmitter EHStreamer;
 public:
   X86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter *CE,
-                     raw_ostream &OS)
-    : MCWinCOFFStreamer(C, AB, *CE, OS) { }
+                     raw_pwrite_stream &OS)
+      : MCWinCOFFStreamer(C, AB, *CE, OS) {}
 
   void EmitWinEHHandlerData() override;
   void EmitWindowsUnwindTables() override;
@@ -48,13 +48,11 @@ void X86WinCOFFStreamer::FinishImpl() {
 }
 }
 
-namespace llvm {
-MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
-                                     MCCodeEmitter *CE, raw_ostream &OS,
-                                     bool RelaxAll) {
+MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
+                                           raw_pwrite_stream &OS,
+                                           MCCodeEmitter *CE, bool RelaxAll) {
   X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS);
   S->getAssembler().setRelaxAll(RelaxAll);
   return S;
 }
-}
 
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index e4c58d4..ef3318b 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -35,7 +35,7 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   unsigned CountS = (Imm >> 6) & 3;
 
   // CountS selects which input element to use.
-  unsigned InVal = 4+CountS;
+  unsigned InVal = 4 + CountS;
   // CountD specifies which element of destination to update.
   ShuffleMask[CountD] = InVal;
   // ZMask zaps values, potentially overriding the CountD elt.
@@ -47,20 +47,20 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
 
 // <3,1> or <6,7,2,3>
 void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
-  for (unsigned i = NElts/2; i != NElts; ++i)
-    ShuffleMask.push_back(NElts+i);
+  for (unsigned i = NElts / 2; i != NElts; ++i)
+    ShuffleMask.push_back(NElts + i);
 
-  for (unsigned i = NElts/2; i != NElts; ++i)
+  for (unsigned i = NElts / 2; i != NElts; ++i)
     ShuffleMask.push_back(i);
 }
 
 // <0,2> or <0,1,4,5>
 void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts/2; ++i)
+  for (unsigned i = 0; i != NElts / 2; ++i)
     ShuffleMask.push_back(i);
 
-  for (unsigned i = 0; i != NElts/2; ++i)
-    ShuffleMask.push_back(NElts+i);
+  for (unsigned i = 0; i != NElts / 2; ++i)
+    ShuffleMask.push_back(NElts + i);
 }
 
 void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
@@ -79,6 +79,20 @@ void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
   }
 }
 
+void DecodeMOVDDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+  unsigned VectorSizeInBits = VT.getSizeInBits();
+  unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned NumLanes = VectorSizeInBits / 128;
+  unsigned NumLaneElts = NumElts / NumLanes;
+  unsigned NumLaneSubElts = 64 / ScalarSizeInBits;
+
+  for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+    for (unsigned i = 0; i < NumLaneElts; i += NumLaneSubElts)
+      for (unsigned s = 0; s != NumLaneSubElts; s++)
+        ShuffleMask.push_back(l + s);
+}
+
 void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   unsigned VectorSizeInBits = VT.getSizeInBits();
   unsigned NumElts = VectorSizeInBits / 8;
@@ -189,8 +203,8 @@ void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   unsigned NewImm = Imm;
   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
     // each half of a lane comes from different source
-    for (unsigned s = 0; s != NumElts*2; s += NumElts) {
-      for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+    for (unsigned s = 0; s != NumElts * 2; s += NumElts) {
+      for (unsigned i = 0; i != NumLaneElts / 2; ++i) {
         ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
         NewImm /= NumLaneElts;
       }
@@ -212,9 +226,9 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
   unsigned NumLaneElts = NumElts / NumLanes;
 
   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
-    for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) {
-      ShuffleMask.push_back(i);          // Reads from dest/src1
-      ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
+    for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) {
+      ShuffleMask.push_back(i);           // Reads from dest/src1
+      ShuffleMask.push_back(i + NumElts); // Reads from src/src2
     }
   }
 }
@@ -232,9 +246,9 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
   unsigned NumLaneElts = NumElts / NumLanes;
 
   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
-    for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) {
-      ShuffleMask.push_back(i);          // Reads from dest/src1
-      ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
+    for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) {
+      ShuffleMask.push_back(i);           // Reads from dest/src1
+      ShuffleMask.push_back(i + NumElts); // Reads from src/src2
     }
   }
 }
@@ -244,11 +258,11 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
   if (Imm & 0x88)
     return; // Not a shuffle
 
-  unsigned HalfSize = VT.getVectorNumElements()/2;
+  unsigned HalfSize = VT.getVectorNumElements() / 2;
 
   for (unsigned l = 0; l != 2; ++l) {
-    unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize;
-    for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i)
+    unsigned HalfBegin = ((Imm >> (l * 4)) & 0x3) * HalfSize;
+    for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
       ShuffleMask.push_back(i);
   }
 }
@@ -341,7 +355,7 @@ void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
 /// No VT provided since it only works on 256-bit, 4 element vectors.
 void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   for (unsigned i = 0; i != 4; ++i) {
-    ShuffleMask.push_back((Imm >> (2*i)) & 3);
+    ShuffleMask.push_back((Imm >> (2 * i)) & 3);
   }
 }
 
@@ -385,4 +399,36 @@ void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
   }
 }
 
+void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
+  unsigned NumDstElts = DstVT.getVectorNumElements();
+  unsigned SrcScalarBits = SrcVT.getScalarSizeInBits();
+  unsigned DstScalarBits = DstVT.getScalarSizeInBits();
+  unsigned Scale = DstScalarBits / SrcScalarBits;
+  assert(SrcScalarBits < DstScalarBits &&
+         "Expected zero extension mask to increase scalar size");
+  assert(SrcVT.getVectorNumElements() >= NumDstElts &&
+         "Too many zero extension lanes");
+
+  for (unsigned i = 0; i != NumDstElts; i++) {
+    Mask.push_back(i);
+    for (unsigned j = 1; j != Scale; j++)
+      Mask.push_back(SM_SentinelZero);
+  }
+}
+
+void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+  unsigned NumElts = VT.getVectorNumElements();
+  ShuffleMask.push_back(0);
+  for (unsigned i = 1; i < NumElts; i++)
+    ShuffleMask.push_back(SM_SentinelZero);
+}
+
+void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
+  // First element comes from the first element of second source.
+  // Remaining elements: Load zero extends / Move copies from first source.
+  unsigned NumElts = VT.getVectorNumElements();
+  Mask.push_back(NumElts);
+  for (unsigned i = 1; i < NumElts; i++)
+    Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
+}
 } // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index 6ba3c64..14b6943 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -40,6 +40,8 @@ void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
 
 void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
 
+void DecodeMOVDDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+
 void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
 void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
@@ -88,6 +90,16 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 /// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
 void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
 
+/// \brief Decode a zero extension instruction as a shuffle mask.
+void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT,
+                          SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a move lower and zero upper instruction as a shuffle mask.
+void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a scalar float move instruction as a shuffle mask.
+void DecodeScalarMoveMask(MVT VT, bool IsLoad,
+                          SmallVectorImpl<int> &ShuffleMask);
 } // llvm namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index 219b64d..8403ae6 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -55,9 +55,6 @@ FunctionPass *createX86IssueVZeroUpperPass();
 ///
 FunctionPass *createEmitX86CodeToMemory();
 
-/// \brief Creates an X86-specific Target Transformation Info pass.
-ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM);
-
 /// createX86PadShortFunctions - Return a pass that pads short functions
 /// with NOOPs. This will prevent a stall when returning on the Atom.
 FunctionPass *createX86PadShortFunctions();
@@ -72,6 +69,17 @@ FunctionPass *createX86FixupLEAs();
 /// esp-relative movs with pushes.
 FunctionPass *createX86CallFrameOptimization();
 
+/// createX86WinEHStatePass - Return an IR pass that inserts EH registration
+/// stack objects and explicit EH state updates. This pass must run after EH
+/// preparation, which does Windows-specific but architecture-neutral
+/// preparation.
+FunctionPass *createX86WinEHStatePass();
+
+/// Return a Machine IR pass that expands X86-specific pseudo
+/// instructions into a sequence of actual instructions. This pass
+/// must run after prologue/epilogue insertion and before lowering
+/// the MachineInstr to MC.
+FunctionPass *createX86ExpandPseudoPass();
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index 30b3b28..c70e2e9 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -164,14 +164,10 @@ def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
 def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
                                       "Enable SHA instructions",
                                       [FeatureSSE2]>;
-def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
-                                      "Support SGX instructions">;
 def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
                                       "Support PRFCHW instructions">;
 def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
                                       "Support RDSEED instruction">;
-def FeatureSMAP    : SubtargetFeature<"smap", "HasSMAP", "true",
-                                      "Support SMAP instructions">;
 def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
                                      "Use LEA for adjusting the stack pointer">;
 def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
@@ -196,6 +192,9 @@ def FeatureUseSqrtEst : SubtargetFeature<"use-sqrt-est", "UseSqrtEst", "true",
                             "Use RSQRT* to optimize square root calculations">;
 def FeatureUseRecipEst : SubtargetFeature<"use-recip-est", "UseReciprocalEst",
                           "true", "Use RCP* to optimize division calculations">;
+def FeatureSoftFloat
+    : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
+                       "Use software floating point features.">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
@@ -277,17 +276,15 @@ def : SilvermontProc<"silvermont">;
 def : SilvermontProc<"slm">; // Legacy alias.
 
 // "Arrandale" along with corei3 and corei5
-class NehalemProc<string Name, list<SubtargetFeature> AdditionalFeatures>
-    : ProcessorModel<Name, SandyBridgeModel, !listconcat([
-                                                           FeatureSSE42,
-                                                           FeatureCMPXCHG16B,
-                                                           FeatureSlowBTMem,
-                                                           FeatureFastUAMem,
-                                                           FeaturePOPCNT
-                                                         ],
-                                                         AdditionalFeatures)>;
-def : NehalemProc<"nehalem", []>;
-def : NehalemProc<"corei7", [FeatureAES]>;
+class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
+                                   FeatureSSE42,
+                                   FeatureCMPXCHG16B,
+                                   FeatureSlowBTMem,
+                                   FeatureFastUAMem,
+                                   FeaturePOPCNT
+                                 ]>;
+def : NehalemProc<"nehalem">;
+def : NehalemProc<"corei7">;
 
 // Westmere is a similar machine to nehalem with some additional features.
 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
@@ -372,7 +369,6 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
                                      FeatureHLE,
                                      FeatureADX,
                                      FeatureRDSEED,
-                                     FeatureSMAP,
                                      FeatureSlowIncDec
                                    ]>;
 def : BroadwellProc<"broadwell">;
@@ -395,7 +391,7 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
                       FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
                       FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
                       FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
-                      FeatureSlowIncDec, FeatureSGX]>;
+                      FeatureSlowIncDec]>;
 def : SkylakeProc<"skylake">;
 def : SkylakeProc<"skx">; // Legacy alias.
 
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
index d06e94f..f97557e 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -47,23 +47,22 @@ using namespace llvm;
 /// runOnMachineFunction - Emit the function body.
 ///
 bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  Subtarget = &MF.getSubtarget<X86Subtarget>();
+
   SMShadowTracker.startFunction(MF);
 
   SetupMachineFunction(MF);
 
   if (Subtarget->isTargetCOFF()) {
     bool Intrn = MF.getFunction()->hasInternalLinkage();
-    OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
-    OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
-                                              : COFF::IMAGE_SYM_CLASS_EXTERNAL);
-    OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+    OutStreamer->BeginCOFFSymbolDef(CurrentFnSym);
+    OutStreamer->EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
+                                            : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+    OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
                                                << COFF::SCT_COMPLEX_TYPE_SHIFT);
-    OutStreamer.EndCOFFSymbolDef();
+    OutStreamer->EndCOFFSymbolDef();
   }
 
-  // Have common code print out the function header with linkage info etc.
-  EmitFunctionHeader();
-
   // Emit the rest of the function body.
   EmitFunctionBody();
 
@@ -98,7 +97,7 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
     // Handle dllimport linkage.
     if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
       GVSym =
-          P.OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
+          P.OutContext.getOrCreateSymbol(Twine("__imp_") + GVSym->getName());
 
     if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
         MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
@@ -505,26 +504,27 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 }
 
 void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
-  if (Subtarget->isTargetMachO())
-    OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+  Triple TT(TM.getTargetTriple());
 
-  if (Subtarget->isTargetCOFF()) {
+  if (TT.isOSBinFormatMachO())
+    OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+
+  if (TT.isOSBinFormatCOFF()) {
     // Emit an absolute @feat.00 symbol.  This appears to be some kind of
     // compiler features bitfield read by link.exe.
-    if (!Subtarget->is64Bit()) {
-      MCSymbol *S = MMI->getContext().GetOrCreateSymbol(StringRef("@feat.00"));
-      OutStreamer.BeginCOFFSymbolDef(S);
-      OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
-      OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
-      OutStreamer.EndCOFFSymbolDef();
+    if (TT.getArch() == Triple::x86) {
+      MCSymbol *S = MMI->getContext().getOrCreateSymbol(StringRef("@feat.00"));
+      OutStreamer->BeginCOFFSymbolDef(S);
+      OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+      OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
+      OutStreamer->EndCOFFSymbolDef();
       // According to the PE-COFF spec, the LSB of this value marks the object
       // for "registered SEH".  This means that all SEH handler entry points
       // must be registered in .sxdata.  Use of any unregistered handlers will
       // cause the process to terminate immediately.  LLVM does not know how to
       // register any SEH handlers, so its object files should be safe.
-      S->setAbsolute();
-      OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
-      OutStreamer.EmitAssignment(
+      OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
+      OutStreamer->EmitAssignment(
           S, MCConstantExpr::Create(int64_t(1), MMI->getContext()));
     }
   }
@@ -558,14 +558,13 @@ MCSymbol *X86AsmPrinter::GetCPISymbol(unsigned CPID) const {
     const MachineConstantPoolEntry &CPE =
         MF->getConstantPool()->getConstants()[CPID];
     if (!CPE.isMachineConstantPoolEntry()) {
-      SectionKind Kind =
-          CPE.getSectionKind(TM.getSubtargetImpl()->getDataLayout());
+      SectionKind Kind = CPE.getSectionKind(TM.getDataLayout());
       const Constant *C = CPE.Val.ConstVal;
       if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>(
             getObjFileLowering().getSectionForConstant(Kind, C))) {
         if (MCSymbol *Sym = S->getCOMDATSymbol()) {
           if (Sym->isUndefined())
-            OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+            OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global);
           return Sym;
         }
       }
@@ -579,31 +578,34 @@ void X86AsmPrinter::GenerateExportDirective(const MCSymbol *Sym, bool IsData) {
   SmallString<128> Directive;
   raw_svector_ostream OS(Directive);
   StringRef Name = Sym->getName();
+  Triple TT(TM.getTargetTriple());
 
-  if (Subtarget->isTargetKnownWindowsMSVC())
+  if (TT.isKnownWindowsMSVCEnvironment())
     OS << " /EXPORT:";
   else
     OS << " -export:";
 
-  if ((Subtarget->isTargetWindowsGNU() || Subtarget->isTargetWindowsCygwin()) &&
+  if ((TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) &&
       (Name[0] == getDataLayout().getGlobalPrefix()))
     Name = Name.drop_front();
 
   OS << Name;
 
   if (IsData) {
-    if (Subtarget->isTargetKnownWindowsMSVC())
+    if (TT.isKnownWindowsMSVCEnvironment())
       OS << ",DATA";
     else
       OS << ",data";
   }
 
   OS.flush();
-  OutStreamer.EmitBytes(Directive);
+  OutStreamer->EmitBytes(Directive);
 }
 
 void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
-  if (Subtarget->isTargetMachO()) {
+  Triple TT(TM.getTargetTriple());
+
+  if (TT.isOSBinFormatMachO()) {
     // All darwin targets use mach-o.
     MachineModuleInfoMachO &MMIMacho =
         MMI->getObjFileInfo<MachineModuleInfoMachO>();
@@ -613,58 +615,55 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
 
     Stubs = MMIMacho.GetFnStubList();
     if (!Stubs.empty()) {
-      const MCSection *TheSection =
-        OutContext.getMachOSection("__IMPORT", "__jump_table",
-                                   MachO::S_SYMBOL_STUBS |
-                                   MachO::S_ATTR_SELF_MODIFYING_CODE |
-                                   MachO::S_ATTR_PURE_INSTRUCTIONS,
-                                   5, SectionKind::getMetadata());
-      OutStreamer.SwitchSection(TheSection);
+      MCSection *TheSection = OutContext.getMachOSection(
+          "__IMPORT", "__jump_table",
+          MachO::S_SYMBOL_STUBS | MachO::S_ATTR_SELF_MODIFYING_CODE |
+              MachO::S_ATTR_PURE_INSTRUCTIONS,
+          5, SectionKind::getMetadata());
+      OutStreamer->SwitchSection(TheSection);
 
       for (const auto &Stub : Stubs) {
         // L_foo$stub:
-        OutStreamer.EmitLabel(Stub.first);
+        OutStreamer->EmitLabel(Stub.first);
         //   .indirect_symbol _foo
-        OutStreamer.EmitSymbolAttribute(Stub.second.getPointer(),
-                                        MCSA_IndirectSymbol);
+        OutStreamer->EmitSymbolAttribute(Stub.second.getPointer(),
+                                         MCSA_IndirectSymbol);
         // hlt; hlt; hlt; hlt; hlt     hlt = 0xf4.
         const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4";
-        OutStreamer.EmitBytes(StringRef(HltInsts, 5));
+        OutStreamer->EmitBytes(StringRef(HltInsts, 5));
       }
 
       Stubs.clear();
-      OutStreamer.AddBlankLine();
+      OutStreamer->AddBlankLine();
     }
 
     // Output stubs for external and common global variables.
     Stubs = MMIMacho.GetGVStubList();
     if (!Stubs.empty()) {
-      const MCSection *TheSection =
-        OutContext.getMachOSection("__IMPORT", "__pointers",
-                                   MachO::S_NON_LAZY_SYMBOL_POINTERS,
-                                   SectionKind::getMetadata());
-      OutStreamer.SwitchSection(TheSection);
+      MCSection *TheSection = OutContext.getMachOSection(
+          "__IMPORT", "__pointers", MachO::S_NON_LAZY_SYMBOL_POINTERS,
+          SectionKind::getMetadata());
+      OutStreamer->SwitchSection(TheSection);
 
       for (auto &Stub : Stubs)
-        emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second);
+        emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
 
       Stubs.clear();
-      OutStreamer.AddBlankLine();
+      OutStreamer->AddBlankLine();
     }
 
     Stubs = MMIMacho.GetHiddenGVStubList();
     if (!Stubs.empty()) {
-      const MCSection *TheSection =
-        OutContext.getMachOSection("__IMPORT", "__pointers",
-                                   MachO::S_NON_LAZY_SYMBOL_POINTERS,
-                                   SectionKind::getMetadata());
-      OutStreamer.SwitchSection(TheSection);
+      MCSection *TheSection = OutContext.getMachOSection(
+          "__IMPORT", "__pointers", MachO::S_NON_LAZY_SYMBOL_POINTERS,
+          SectionKind::getMetadata());
+      OutStreamer->SwitchSection(TheSection);
 
       for (auto &Stub : Stubs)
-        emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second);
+        emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
 
       Stubs.clear();
-      OutStreamer.AddBlankLine();
+      OutStreamer->AddBlankLine();
     }
 
     SM.serializeToStackMapSection();
@@ -674,16 +673,17 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     // implementation of multiple entry points).  If this doesn't occur, the
     // linker can safely perform dead code stripping.  Since LLVM never
     // generates code that does this, it is always safe to set.
-    OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+    OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
   }
 
-  if (Subtarget->isTargetKnownWindowsMSVC() && MMI->usesVAFloatArgument()) {
-    StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
-    MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
-    OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
+  if (TT.isKnownWindowsMSVCEnvironment() && MMI->usesVAFloatArgument()) {
+    StringRef SymbolName =
+        (TT.getArch() == Triple::x86_64) ? "_fltused" : "__fltused";
+    MCSymbol *S = MMI->getContext().getOrCreateSymbol(SymbolName);
+    OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
   }
 
-  if (Subtarget->isTargetCOFF()) {
+  if (TT.isOSBinFormatCOFF()) {
     // Necessary for dllexport support
     std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
 
@@ -710,7 +710,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       const TargetLoweringObjectFileCOFF &TLOFCOFF =
         static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
 
-      OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection());
+      OutStreamer->SwitchSection(TLOFCOFF.getDrectveSection());
 
       for (auto & Symbol : DLLExportedGlobals)
         GenerateExportDirective(Symbol, /*IsData=*/true);
@@ -719,28 +719,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     }
   }
 
-  if (Subtarget->isTargetELF()) {
-    const TargetLoweringObjectFileELF &TLOFELF =
-      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
-    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    // Output stubs for external and common global variables.
-    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-    if (!Stubs.empty()) {
-      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
-      const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
-
-      for (const auto &Stub : Stubs) {
-        OutStreamer.EmitLabel(Stub.first);
-        OutStreamer.EmitSymbolValue(Stub.second.getPointer(),
-                                    TD->getPointerSize());
-      }
-      Stubs.clear();
-    }
-
+  if (TT.isOSBinFormatELF())
     SM.serializeToStackMapSection();
-  }
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
index 748b948..3beeb17 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -57,6 +57,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
     void emitShadowPadding(MCStreamer &OutStreamer, const MCSubtargetInfo &STI);
   private:
     TargetMachine &TM;
+    const MachineFunction *MF;
     std::unique_ptr<MCCodeEmitter> CodeEmitter;
     bool InShadow;
 
@@ -80,15 +81,15 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
 
   void InsertStackMapShadows(MachineFunction &MF);
   void LowerSTACKMAP(const MachineInstr &MI);
-  void LowerPATCHPOINT(const MachineInstr &MI);
+  void LowerPATCHPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
+  void LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
 
   void LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI);
 
  public:
-  explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-    : AsmPrinter(TM, Streamer), SM(*this), SMShadowTracker(TM) {
-    Subtarget = &TM.getSubtarget<X86Subtarget>();
-  }
+   explicit X86AsmPrinter(TargetMachine &TM,
+                          std::unique_ptr<MCStreamer> Streamer)
+       : AsmPrinter(TM, std::move(Streamer)), SM(*this), SMShadowTracker(TM) {}
 
   const char *getPassName() const override {
     return "X86 Assembly / Object Emitter";
@@ -103,7 +104,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   void EmitInstruction(const MachineInstr *MI) override;
 
   void EmitBasicBlockEnd(const MachineBasicBlock &MBB) override {
-    SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo());
+    SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
   }
 
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index fae489e..4412125 100644
--- a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines a pass that optimizes call sequences on x86.
-// Currently, it converts movs of function parameters onto the stack into 
+// Currently, it converts movs of function parameters onto the stack into
 // pushes. This is beneficial for two main reasons:
 // 1) The push instruction encoding is much smaller than an esp-relative mov
 // 2) It is possible to push memory arguments directly. So, if the
@@ -37,9 +37,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "x86-cf-opt"
 
-cl::opt<bool> NoX86CFOpt("no-x86-call-frame-opt",
-              cl::desc("Avoid optimizing x86 call frames for size"),
-              cl::init(false), cl::Hidden);
+static cl::opt<bool>
+    NoX86CFOpt("no-x86-call-frame-opt",
+               cl::desc("Avoid optimizing x86 call frames for size"),
+               cl::init(false), cl::Hidden);
 
 namespace {
 class X86CallFrameOptimization : public MachineFunctionPass {
@@ -49,17 +50,47 @@ public:
   bool runOnMachineFunction(MachineFunction &MF) override;
 
 private:
-  bool shouldPerformTransformation(MachineFunction &MF);
+  // Information we know about a particular call site
+  struct CallContext {
+    CallContext()
+        : Call(nullptr), SPCopy(nullptr), ExpectedDist(0),
+          MovVector(4, nullptr), NoStackParams(false), UsePush(false){};
 
-  bool adjustCallSequence(MachineFunction &MF, MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator I);
+    // Actuall call instruction
+    MachineInstr *Call;
+
+    // A copy of the stack pointer
+    MachineInstr *SPCopy;
+
+    // The total displacement of all passed parameters
+    int64_t ExpectedDist;
+
+    // The sequence of movs used to pass the parameters
+    SmallVector<MachineInstr *, 4> MovVector;
+
+    // True if this call site has no stack parameters
+    bool NoStackParams;
+
+    // True of this callsite can use push instructions
+    bool UsePush;
+  };
+
+  typedef DenseMap<MachineInstr *, CallContext> ContextMap;
+
+  bool isLegal(MachineFunction &MF);
+  
+  bool isProfitable(MachineFunction &MF, ContextMap &CallSeqMap);
+
+  void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator I, CallContext &Context);
+
+  bool adjustCallSequence(MachineFunction &MF, MachineBasicBlock::iterator I,
+                          const CallContext &Context);
 
   MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
                                    unsigned Reg);
 
-  const char *getPassName() const override {
-    return "X86 Optimize Call Frame";
-  }
+  const char *getPassName() const override { return "X86 Optimize Call Frame"; }
 
   const TargetInstrInfo *TII;
   const TargetFrameLowering *TFL;
@@ -74,8 +105,10 @@ FunctionPass *llvm::createX86CallFrameOptimization() {
   return new X86CallFrameOptimization();
 }
 
-// This checks whether the transformation is legal and profitable
-bool X86CallFrameOptimization::shouldPerformTransformation(MachineFunction &MF) {
+// This checks whether the transformation is legal. 
+// Also returns false in cases where it's potentially legal, but
+// we don't even want to try.
+bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
   if (NoX86CFOpt.getValue())
     return false;
 
@@ -84,7 +117,7 @@ bool X86CallFrameOptimization::shouldPerformTransformation(MachineFunction &MF)
   // No point in running this in 64-bit mode, since some arguments are
   // passed in-register in all common calling conventions, so the pattern
   // we're looking for will never match.
-  const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
   if (STI.is64Bit())
     return false;
 
@@ -95,8 +128,8 @@ bool X86CallFrameOptimization::shouldPerformTransformation(MachineFunction &MF)
   // This is bad, and breaks SP adjustment.
   // So, check that all of the frames in the function are closed inside
   // the same block, and, for good measure, that there are no nested frames.
-  int FrameSetupOpcode = TII->getCallFrameSetupOpcode();
-  int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
+  unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
+  unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
   for (MachineBasicBlock &BB : MF) {
     bool InsideFrameSequence = false;
     for (MachineInstr &MI : BB) {
@@ -104,8 +137,7 @@ bool X86CallFrameOptimization::shouldPerformTransformation(MachineFunction &MF)
         if (InsideFrameSequence)
           return false;
         InsideFrameSequence = true;
-      }
-      else if (MI.getOpcode() == FrameDestroyOpcode) {
+      } else if (MI.getOpcode() == FrameDestroyOpcode) {
         if (!InsideFrameSequence)
           return false;
         InsideFrameSequence = false;
@@ -116,99 +148,141 @@ bool X86CallFrameOptimization::shouldPerformTransformation(MachineFunction &MF)
       return false;
   }
 
-  // Now that we know the transformation is legal, check if it is
-  // profitable.
-  // TODO: Add a heuristic that actually looks at the function,
-  //       and enable this for more cases.
+  return true;
+}
 
-  // This transformation is always a win when we expected to have
-  // a reserved call frame. Under other circumstances, it may be either 
+// Check whether this trasnformation is profitable for a particular
+// function - in terms of code size.
+bool X86CallFrameOptimization::isProfitable(MachineFunction &MF, 
+  ContextMap &CallSeqMap) {
+  // This transformation is always a win when we do not expect to have
+  // a reserved call frame. Under other circumstances, it may be either
   // a win or a loss, and requires a heuristic.
-  // For now, enable it only for the relatively clear win cases.
   bool CannotReserveFrame = MF.getFrameInfo()->hasVarSizedObjects();
   if (CannotReserveFrame)
     return true;
 
-  // For now, don't even try to evaluate the profitability when
-  // not optimizing for size.
-  AttributeSet FnAttrs = MF.getFunction()->getAttributes();
+  // Don't do this when not optimizing for size.
   bool OptForSize =
-    FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
-    Attribute::OptimizeForSize) ||
-    FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+      MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
+      MF.getFunction()->hasFnAttribute(Attribute::MinSize);
 
   if (!OptForSize)
     return false;
 
-  // Stack re-alignment can make this unprofitable even in terms of size.
-  // As mentioned above, a better heuristic is needed. For now, don't do this
-  // when the required alignment is above 8. (4 would be the safe choice, but
-  // some experimentation showed 8 is generally good).
-  if (TFL->getStackAlignment() > 8)
-    return false;
 
-  return true;
+  unsigned StackAlign = TFL->getStackAlignment();
+  
+  int64_t Advantage = 0;
+  for (auto CC : CallSeqMap) {
+    // Call sites where no parameters are passed on the stack
+    // do not affect the cost, since there needs to be no
+    // stack adjustment.
+    if (CC.second.NoStackParams)
+      continue;
+
+    if (!CC.second.UsePush) {
+      // If we don't use pushes for a particular call site,
+      // we pay for not having a reserved call frame with an
+      // additional sub/add esp pair. The cost is ~3 bytes per instruction,
+      // depending on the size of the constant.
+      // TODO: Callee-pop functions should have a smaller penalty, because
+      // an add is needed even with a reserved call frame.
+      Advantage -= 6;
+    } else {
+      // We can use pushes. First, account for the fixed costs.
+      // We'll need a add after the call.
+      Advantage -= 3;
+      // If we have to realign the stack, we'll also need and sub before
+      if (CC.second.ExpectedDist % StackAlign)
+        Advantage -= 3;
+      // Now, for each push, we save ~3 bytes. For small constants, we actually,
+      // save more (up to 5 bytes), but 3 should be a good approximation.
+      Advantage += (CC.second.ExpectedDist / 4) * 3;
+    }
+  }
+
+  return (Advantage >= 0);
 }
 
+
 bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getSubtarget().getInstrInfo();
   TFL = MF.getSubtarget().getFrameLowering();
   MRI = &MF.getRegInfo();
 
-  if (!shouldPerformTransformation(MF))
+  if (!isLegal(MF))
     return false;
 
-  int FrameSetupOpcode = TII->getCallFrameSetupOpcode();
+  unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
 
   bool Changed = false;
 
+  ContextMap CallSeqMap;
+
   for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
-      if (I->getOpcode() == FrameSetupOpcode)
-        Changed |= adjustCallSequence(MF, *BB, I);
+      if (I->getOpcode() == FrameSetupOpcode) {
+        CallContext &Context = CallSeqMap[I];
+        collectCallInfo(MF, *BB, I, Context);
+      }
+
+  if (!isProfitable(MF, CallSeqMap))
+    return false;
+
+  for (auto CC : CallSeqMap)
+    if (CC.second.UsePush)
+      Changed |= adjustCallSequence(MF, CC.first, CC.second);
 
   return Changed;
 }
 
-bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
-                                                MachineBasicBlock &MBB,
-                                                MachineBasicBlock::iterator I) {
-
+void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
+                                               MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator I,
+                                               CallContext &Context) {
   // Check that this particular call sequence is amenable to the
   // transformation.
   const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
                                        MF.getSubtarget().getRegisterInfo());
   unsigned StackPtr = RegInfo.getStackRegister();
-  int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
+  unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
 
   // We expect to enter this at the beginning of a call sequence
   assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
   MachineBasicBlock::iterator FrameSetup = I++;
 
+  // How much do we adjust the stack? This puts an upper bound on
+  // the number of parameters actually passed on it.
+  unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;  
   
+  // A zero adjustment means no stack parameters
+  if (!MaxAdjust) {
+    Context.NoStackParams = true;
+    return;
+  }
+
   // For globals in PIC mode, we can have some LEAs here.
   // Ignore them, they don't bother us.
   // TODO: Extend this to something that covers more cases.
   while (I->getOpcode() == X86::LEA32r)
     ++I;
-  
+
   // We expect a copy instruction here.
   // TODO: The copy instruction is a lowering artifact.
   //       We should also support a copy-less version, where the stack
   //       pointer is used directly.
   if (!I->isCopy() || !I->getOperand(0).isReg())
-    return false;
-  MachineBasicBlock::iterator SPCopy = I++;
-  StackPtr = SPCopy->getOperand(0).getReg();
+    return;
+  Context.SPCopy = I++;
+  StackPtr = Context.SPCopy->getOperand(0).getReg();
 
   // Scan the call setup sequence for the pattern we're looking for.
   // We only handle a simple case - a sequence of MOV32mi or MOV32mr
   // instructions, that push a sequence of 32-bit values onto the stack, with
   // no gaps between them.
-  SmallVector<MachineInstr*, 4> MovVector(4, nullptr);
-  unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;
   if (MaxAdjust > 4)
-    MovVector.resize(MaxAdjust, nullptr);
+    Context.MovVector.resize(MaxAdjust, nullptr);
 
   do {
     int Opcode = I->getOpcode();
@@ -230,77 +304,86 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
         (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
         (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
         !I->getOperand(X86::AddrDisp).isImm())
-      return false;
+      return;
 
     int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
-    assert(StackDisp >= 0 && "Negative stack displacement when passing parameters");
+    assert(StackDisp >= 0 &&
+           "Negative stack displacement when passing parameters");
 
     // We really don't want to consider the unaligned case.
     if (StackDisp % 4)
-      return false;
+      return;
     StackDisp /= 4;
 
-    assert((size_t)StackDisp < MovVector.size() &&
-      "Function call has more parameters than the stack is adjusted for.");
+    assert((size_t)StackDisp < Context.MovVector.size() &&
+           "Function call has more parameters than the stack is adjusted for.");
 
     // If the same stack slot is being filled twice, something's fishy.
-    if (MovVector[StackDisp] != nullptr)
-      return false;
-    MovVector[StackDisp] = I;
+    if (Context.MovVector[StackDisp] != nullptr)
+      return;
+    Context.MovVector[StackDisp] = I;
 
     ++I;
   } while (I != MBB.end());
 
   // We now expect the end of the sequence - a call and a stack adjust.
   if (I == MBB.end())
-    return false;
+    return;
 
   // For PCrel calls, we expect an additional COPY of the basereg.
   // If we find one, skip it.
   if (I->isCopy()) {
     if (I->getOperand(1).getReg() ==
-      MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg())
+        MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg())
       ++I;
     else
-      return false;
+      return;
   }
 
   if (!I->isCall())
-    return false;
-  MachineBasicBlock::iterator Call = I;
+    return;
+
+  Context.Call = I;
   if ((++I)->getOpcode() != FrameDestroyOpcode)
-    return false;
+    return;
 
   // Now, go through the vector, and see that we don't have any gaps,
   // but only a series of 32-bit MOVs.
-  
-  int64_t ExpectedDist = 0;
-  auto MMI = MovVector.begin(), MME = MovVector.end();
-  for (; MMI != MME; ++MMI, ExpectedDist += 4)
+  auto MMI = Context.MovVector.begin(), MME = Context.MovVector.end();
+  for (; MMI != MME; ++MMI, Context.ExpectedDist += 4)
     if (*MMI == nullptr)
       break;
-  
+
   // If the call had no parameters, do nothing
-  if (!ExpectedDist)
-    return false;
+  if (MMI == Context.MovVector.begin())
+    return;
 
-  // We are either at the last parameter, or a gap. 
+  // We are either at the last parameter, or a gap.
   // Make sure it's not a gap
   for (; MMI != MME; ++MMI)
     if (*MMI != nullptr)
-      return false;
+      return;
+
+  Context.UsePush = true;
+  return;
+}
 
+bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
+                                                  MachineBasicBlock::iterator I,
+                                                  const CallContext &Context) {
   // Ok, we can in fact do the transformation for this call.
   // Do not remove the FrameSetup instruction, but adjust the parameters.
   // PEI will end up finalizing the handling of this.
-  FrameSetup->getOperand(1).setImm(ExpectedDist);
+  MachineBasicBlock::iterator FrameSetup = I;
+  MachineBasicBlock &MBB = *(I->getParent());
+  FrameSetup->getOperand(1).setImm(Context.ExpectedDist);
 
   DebugLoc DL = I->getDebugLoc();
   // Now, iterate through the vector in reverse order, and replace the movs
-  // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to 
+  // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
   // replace uses.
-  for (int Idx = (ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
-    MachineBasicBlock::iterator MOV = *MovVector[Idx];
+  for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
+    MachineBasicBlock::iterator MOV = *Context.MovVector[Idx];
     MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
     if (MOV->getOpcode() == X86::MOV32mi) {
       unsigned PushOpcode = X86::PUSHi32;
@@ -313,20 +396,21 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
         if (isInt<8>(Val))
           PushOpcode = X86::PUSH32i8;
       }
-      BuildMI(MBB, Call, DL, TII->get(PushOpcode)).addOperand(PushOp);
+      BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).addOperand(PushOp);
     } else {
       unsigned int Reg = PushOp.getReg();
 
       // If PUSHrmm is not slow on this target, try to fold the source of the
       // push into the instruction.
-      const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>();
+      const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
       bool SlowPUSHrmm = ST.isAtom() || ST.isSLM();
 
       // Check that this is legal to fold. Right now, we're extremely
       // conservative about that.
       MachineInstr *DefMov = nullptr;
       if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
-        MachineInstr *Push = BuildMI(MBB, Call, DL, TII->get(X86::PUSH32rmm));
+        MachineInstr *Push =
+            BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
 
         unsigned NumOps = DefMov->getDesc().getNumOperands();
         for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
@@ -334,7 +418,9 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
 
         DefMov->eraseFromParent();
       } else {
-        BuildMI(MBB, Call, DL, TII->get(X86::PUSH32r)).addReg(Reg).getInstr();
+        BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
+            .addReg(Reg)
+            .getInstr();
       }
     }
 
@@ -343,8 +429,8 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
 
   // The stack-pointer copy is no longer used in the call sequences.
   // There should not be any other users, but we can't commit to that, so:
-  if (MRI->use_empty(SPCopy->getOperand(0).getReg()))
-    SPCopy->eraseFromParent();
+  if (MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))
+    Context.SPCopy->eraseFromParent();
 
   // Once we've done this, we need to make sure PEI doesn't assume a reserved
   // frame.
@@ -381,17 +467,11 @@ MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
       DefMI->getParent() != FrameSetup->getParent())
     return nullptr;
 
-  // Be careful with movs that load from a stack slot, since it may get
-  // resolved incorrectly.
-  // TODO: Again, we already have the infrastructure, so this should work.
-  if (!DefMI->getOperand(1).isReg())
-    return nullptr;
-
   // Now, make sure everything else up until the ADJCALLSTACK is a sequence
   // of MOVs. To be less conservative would require duplicating a lot of the
   // logic from PeepholeOptimizer.
   // FIXME: A possibly better approach would be to teach the PeepholeOptimizer
-  // to be smarter about folding into pushes. 
+  // to be smarter about folding into pushes.
   for (auto I = DefMI; I != FrameSetup; ++I)
     if (I->getOpcode() != X86::MOV32rm)
       return nullptr;
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
index 75a2ec0..8f88888 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -34,11 +34,22 @@ def RetCC_X86Common : CallingConv<[
   //
   // For code that doesn't care about the ABI, we allow returning more than two
   // integer values in registers.
+  CCIfType<[i1],  CCPromoteToType<i8>>,
   CCIfType<[i8] , CCAssignToReg<[AL, DL, CL]>>,
   CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>,
   CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>,
   CCIfType<[i64], CCAssignToReg<[RAX, RDX, RCX]>>,
 
+  // Boolean vectors of AVX-512 are returned in SIMD registers.
+  // The call from AVX to AVX-512 function should work,
+  // since the boolean types in AVX/AVX2 are promoted by default.
+  CCIfType<[v2i1],  CCPromoteToType<v2i64>>,
+  CCIfType<[v4i1],  CCPromoteToType<v4i32>>,
+  CCIfType<[v8i1],  CCPromoteToType<v8i16>>,
+  CCIfType<[v16i1], CCPromoteToType<v16i8>>,
+  CCIfType<[v32i1], CCPromoteToType<v32i8>>,
+  CCIfType<[v64i1], CCPromoteToType<v64i8>>,
+
   // Vector types are returned in XMM0 and XMM1, when they fit.  XMM2 and XMM3
   // can only be used by ABI non-compliant code. If the target doesn't have XMM
   // registers, it won't have vector types.
@@ -241,8 +252,8 @@ def CC_X86_64_C : CallingConv<[
   // Handles byval parameters.
   CCIfByVal<CCPassByVal<8, 8>>,
 
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  // Promote i1/i8/i16 arguments to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
   // The 'nest' parameter, if any, is passed in R10.
   CCIfNest<CCIfSubtarget<"isTarget64BitILP32()", CCAssignToReg<[R10D]>>>,
@@ -258,6 +269,16 @@ def CC_X86_64_C : CallingConv<[
             CCIfSubtarget<"hasSSE2()",
             CCPromoteToType<v2i64>>>>,
 
+  // Boolean vectors of AVX-512 are passed in SIMD registers.
+  // The call from AVX to AVX-512 function should work,
+  // since the boolean types in AVX/AVX2 are promoted by default.
+  CCIfType<[v2i1],  CCPromoteToType<v2i64>>,
+  CCIfType<[v4i1],  CCPromoteToType<v4i32>>,
+  CCIfType<[v8i1],  CCPromoteToType<v8i16>>,
+  CCIfType<[v16i1], CCPromoteToType<v16i8>>,
+  CCIfType<[v32i1], CCPromoteToType<v32i8>>,
+  CCIfType<[v64i1], CCPromoteToType<v64i8>>,
+
   // The first 8 FP/Vector arguments are passed in XMM registers.
   CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
             CCIfSubtarget<"hasSSE1()",
@@ -303,8 +324,8 @@ def CC_X86_Win64_C : CallingConv<[
   // FIXME: Handle byval stuff.
   // FIXME: Handle varargs.
 
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  // Promote i1/i8/i16 arguments to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
   // The 'nest' parameter, if any, is passed in R10.
   CCIfNest<CCAssignToReg<[R10]>>,
@@ -424,9 +445,61 @@ def CC_X86_64_AnyReg : CallingConv<[
 // X86 C Calling Convention
 //===----------------------------------------------------------------------===//
 
+/// CC_X86_32_Vector_Common - In all X86-32 calling conventions, extra vector
+/// values are spilled on the stack.
+def CC_X86_32_Vector_Common : CallingConv<[
+  // Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+  // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+           CCAssignToStack<32, 32>>,
+
+  // 512-bit AVX 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
+  CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+           CCAssignToStack<64, 64>>
+]>;
+
+// CC_X86_32_Vector_Standard - The first 3 vector arguments are passed in
+// vector registers
+def CC_X86_32_Vector_Standard : CallingConv<[
+  // SSE vector arguments are passed in XMM registers.
+  CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                CCAssignToReg<[XMM0, XMM1, XMM2]>>>,
+
+  // AVX 256-bit vector arguments are passed in YMM registers.
+  CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+                CCIfSubtarget<"hasFp256()",
+                CCAssignToReg<[YMM0, YMM1, YMM2]>>>>,
+
+  // AVX 512-bit vector arguments are passed in ZMM registers.
+  CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+                CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>>,
+
+  CCDelegateTo<CC_X86_32_Vector_Common>
+]>;
+
+// CC_X86_32_Vector_Darwin - The first 4 vector arguments are passed in
+// vector registers.
+def CC_X86_32_Vector_Darwin : CallingConv<[
+  // SSE vector arguments are passed in XMM registers.
+  CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
+
+  // AVX 256-bit vector arguments are passed in YMM registers.
+  CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+                CCIfSubtarget<"hasFp256()",
+                CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
+
+  // AVX 512-bit vector arguments are passed in ZMM registers.
+  CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+                CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3]>>>,
+
+  CCDelegateTo<CC_X86_32_Vector_Common>
+]>;
+
 /// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
-/// values are spilled on the stack, and the first 4 vector values go in XMM
-/// regs.
+/// values are spilled on the stack.
 def CC_X86_32_Common : CallingConv<[
   // Handles byval parameters.
   CCIfByVal<CCPassByVal<4, 4>>,
@@ -452,29 +525,30 @@ def CC_X86_32_Common : CallingConv<[
   // Long doubles get slots whose size depends on the subtarget.
   CCIfType<[f80], CCAssignToStack<0, 4>>,
 
-  // The first 4 SSE vector arguments are passed in XMM registers.
-  CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-                CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
-
-  // The first 4 AVX 256-bit vector arguments are passed in YMM registers.
-  CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
-                CCIfSubtarget<"hasFp256()",
-                CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
-
-  // Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
-
-  // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
-  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
-           CCAssignToStack<32, 32>>,
+  // Boolean vectors of AVX-512 are passed in SIMD registers.
+  // The call from AVX to AVX-512 function should work,
+  // since the boolean types in AVX/AVX2 are promoted by default.
+  CCIfType<[v2i1],  CCPromoteToType<v2i64>>,
+  CCIfType<[v4i1],  CCPromoteToType<v4i32>>,
+  CCIfType<[v8i1],  CCPromoteToType<v8i16>>,
+  CCIfType<[v16i1], CCPromoteToType<v16i8>>,
+  CCIfType<[v32i1], CCPromoteToType<v32i8>>,
+  CCIfType<[v64i1], CCPromoteToType<v64i8>>,
 
   // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
   // passed in the parameter area.
-  CCIfType<[x86mmx], CCAssignToStack<8, 4>>]>;
+  CCIfType<[x86mmx], CCAssignToStack<8, 4>>,
+
+  // Darwin passes vectors in a form that differs from the i386 psABI
+  CCIfSubtarget<"isTargetDarwin()", CCDelegateTo<CC_X86_32_Vector_Darwin>>,
+
+  // Otherwise, drop to 'normal' X86-32 CC
+  CCDelegateTo<CC_X86_32_Vector_Standard>
+]>;
 
 def CC_X86_32_C : CallingConv<[
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  // Promote i1/i8/i16 arguments to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
   // The 'nest' parameter, if any, is passed in ECX.
   CCIfNest<CCAssignToReg<[ECX]>>,
@@ -488,8 +562,8 @@ def CC_X86_32_C : CallingConv<[
 ]>;
 
 def CC_X86_32_FastCall : CallingConv<[
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  // Promote i1/i8/i16 arguments to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
   // The 'nest' parameter, if any, is passed in EAX.
   CCIfNest<CCAssignToReg<[EAX]>>,
@@ -534,15 +608,15 @@ def CC_X86_32_ThisCall_Common : CallingConv<[
 ]>;
 
 def CC_X86_32_ThisCall_Mingw : CallingConv<[
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  // Promote i1/i8/i16 arguments to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
   CCDelegateTo<CC_X86_32_ThisCall_Common>
 ]>;
 
 def CC_X86_32_ThisCall_Win : CallingConv<[
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  // Promote i1/i8/i16 arguments to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
   // Pass sret arguments indirectly through stack.
   CCIfSRet<CCAssignToStack<4, 4>>,
@@ -561,8 +635,8 @@ def CC_X86_32_FastCC : CallingConv<[
   // puts arguments in registers.
   CCIfByVal<CCPassByVal<4, 4>>,
 
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  // Promote i1/i8/i16 arguments to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
   // The 'nest' parameter, if any, is passed in EAX.
   CCIfNest<CCAssignToReg<[EAX]>>,
@@ -626,6 +700,9 @@ def CC_Intel_OCL_BI : CallingConv<[
   CCIfType<[v16f32, v8f64, v16i32, v8i64],
            CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3]>>,
 
+  // Pass masks in mask registers
+  CCIfType<[v16i1, v8i1], CCAssignToReg<[K1]>>,
+
   CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
   CCIfSubtarget<"is64Bit()",       CCDelegateTo<CC_X86_64_C>>,
   CCDelegateTo<CC_X86_32_C>
diff --git a/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
new file mode 100644
index 0000000..1b00997
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -0,0 +1,189 @@
+//===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, other late
+// optimizations, or simply the encoding of the instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86FrameLowering.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/GlobalValue.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-pseudo"
+
+namespace {
+class X86ExpandPseudo : public MachineFunctionPass {
+public:
+  static char ID;
+  X86ExpandPseudo() : MachineFunctionPass(ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addPreservedID(MachineLoopInfoID);
+    AU.addPreservedID(MachineDominatorsID);
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  const X86Subtarget *STI;
+  const X86InstrInfo *TII;
+  const X86RegisterInfo *TRI;
+  const X86FrameLowering *X86FL;
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  const char *getPassName() const override {
+    return "X86 pseudo instruction expansion pass";
+  }
+
+private:
+  bool ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
+  bool ExpandMBB(MachineBasicBlock &MBB);
+};
+char X86ExpandPseudo::ID = 0;
+} // End anonymous namespace.
+
+/// If \p MBBI is a pseudo instruction, this method expands
+/// it to the corresponding (sequence of) actual instruction(s).
+/// \returns true if \p MBBI has been expanded.
+bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned Opcode = MI.getOpcode();
+  DebugLoc DL = MBBI->getDebugLoc();
+  switch (Opcode) {
+  default:
+    return false;
+  case X86::TCRETURNdi:
+  case X86::TCRETURNri:
+  case X86::TCRETURNmi:
+  case X86::TCRETURNdi64:
+  case X86::TCRETURNri64:
+  case X86::TCRETURNmi64: {
+    bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
+    assert(StackAdjust.isImm() && "Expecting immediate value.");
+
+    // Adjust stack pointer.
+    int StackAdj = StackAdjust.getImm();
+
+    if (StackAdj) {
+      bool Is64Bit = STI->is64Bit();
+      // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
+      const bool Uses64BitFramePtr =
+          STI->isTarget64BitLP64() || STI->isTargetNaCl64();
+      // Check if we should use LEA for SP.
+      bool UseLEAForSP = STI->useLeaForSP() &&
+                         X86FL->canUseLEAForSPInEpilogue(*MBB.getParent());
+      unsigned StackPtr = TRI->getStackRegister();
+      // Check for possible merge with preceding ADD instruction.
+      StackAdj += X86FrameLowering::mergeSPUpdates(MBB, MBBI, StackPtr, true);
+      X86FrameLowering::emitSPUpdate(MBB, MBBI, StackPtr, StackAdj, Is64Bit,
+                                     Uses64BitFramePtr, UseLEAForSP, *TII,
+                                     *TRI);
+    }
+
+    // Jump to label or value in register.
+    bool IsWin64 = STI->isTargetWin64();
+    if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) {
+      unsigned Op = (Opcode == X86::TCRETURNdi)
+                        ? X86::TAILJMPd
+                        : (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64);
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
+      if (JumpTarget.isGlobal())
+        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                             JumpTarget.getTargetFlags());
+      else {
+        assert(JumpTarget.isSymbol());
+        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                              JumpTarget.getTargetFlags());
+      }
+    } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
+      unsigned Op = (Opcode == X86::TCRETURNmi)
+                        ? X86::TAILJMPm
+                        : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
+      for (unsigned i = 0; i != 5; ++i)
+        MIB.addOperand(MBBI->getOperand(i));
+    } else if (Opcode == X86::TCRETURNri64) {
+      BuildMI(MBB, MBBI, DL,
+              TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
+          .addReg(JumpTarget.getReg(), RegState::Kill);
+    } else {
+      BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr))
+          .addReg(JumpTarget.getReg(), RegState::Kill);
+    }
+
+    MachineInstr *NewMI = std::prev(MBBI);
+    NewMI->copyImplicitOps(*MBBI->getParent()->getParent(), MBBI);
+
+    // Delete the pseudo instruction TCRETURN.
+    MBB.erase(MBBI);
+
+    return true;
+  }
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64: {
+    MachineOperand &DestAddr = MBBI->getOperand(0);
+    assert(DestAddr.isReg() && "Offset should be in register!");
+    const bool Uses64BitFramePtr =
+        STI->isTarget64BitLP64() || STI->isTargetNaCl64();
+    unsigned StackPtr = TRI->getStackRegister();
+    BuildMI(MBB, MBBI, DL,
+            TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr)
+        .addReg(DestAddr.getReg());
+    // The EH_RETURN pseudo is really removed during the MC Lowering.
+    return true;
+  }
+  }
+  llvm_unreachable("Previous switch has a fallthrough?");
+}
+
+/// Expand all pseudo instructions contained in \p MBB.
+/// \returns true if any expansion occurred for \p MBB.
+bool X86ExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  // MBBI may be invalidated by the expansion.
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+    Modified |= ExpandMI(MBB, MBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+  STI = &static_cast<const X86Subtarget &>(MF.getSubtarget());
+  TII = STI->getInstrInfo();
+  TRI = STI->getRegisterInfo();
+  X86FL = STI->getFrameLowering();
+
+  bool Modified = false;
+  for (MachineBasicBlock &MBB : MF)
+    Modified |= ExpandMBB(MBB);
+  return Modified;
+}
+
+/// Returns an instance of the pseudo instruction expansion pass.
+FunctionPass *llvm::createX86ExpandPseudoPass() {
+  return new X86ExpandPseudo();
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index 688a544..9af0aeb 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -37,6 +37,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
@@ -58,8 +59,8 @@ class X86FastISel final : public FastISel {
 public:
   explicit X86FastISel(FunctionLoweringInfo &funcInfo,
                        const TargetLibraryInfo *libInfo)
-    : FastISel(funcInfo, libInfo) {
-    Subtarget = &TM.getSubtarget<X86Subtarget>();
+      : FastISel(funcInfo, libInfo) {
+    Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
     X86ScalarSSEf64 = Subtarget->hasSSE2();
     X86ScalarSSEf32 = Subtarget->hasSSE1();
   }
@@ -80,15 +81,15 @@ public:
 #include "X86GenFastISel.inc"
 
 private:
-  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
+  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL);
 
-  bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
-                       unsigned &ResultReg);
+  bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
+                       unsigned &ResultReg, unsigned Alignment = 1);
 
-  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
+  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
                         MachineMemOperand *MMO = nullptr, bool Aligned = false);
   bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
-                        const X86AddressMode &AM,
+                        X86AddressMode &AM,
                         MachineMemOperand *MMO = nullptr, bool Aligned = false);
 
   bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
@@ -123,11 +124,15 @@ private:
 
   bool X86SelectTrunc(const Instruction *I);
 
+  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
+                               const TargetRegisterClass *RC);
+
   bool X86SelectFPExt(const Instruction *I);
   bool X86SelectFPTrunc(const Instruction *I);
+  bool X86SelectSIToFP(const Instruction *I);
 
   const X86InstrInfo *getInstrInfo() const {
-    return getTargetMachine()->getSubtargetImpl()->getInstrInfo();
+    return Subtarget->getInstrInfo();
   }
   const X86TargetMachine *getTargetMachine() const {
     return static_cast<const X86TargetMachine *>(&TM);
@@ -160,6 +165,9 @@ private:
 
   bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
                             const Value *Cond);
+
+  const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
+                                            X86AddressMode &AM);
 };
 
 } // end anonymous namespace.
@@ -237,6 +245,20 @@ getX86SSEConditionCode(CmpInst::Predicate Predicate) {
   return std::make_pair(CC, NeedSwap);
 }
 
+/// \brief Adds a complex addressing mode to the given machine instr builder.
+/// Note, this will constrain the index register.  If its not possible to
+/// constrain the given index register, then a new one will be created.  The
+/// IndexReg field of the addressing mode will be updated to match in this case.
+const MachineInstrBuilder &
+X86FastISel::addFullAddress(const MachineInstrBuilder &MIB,
+                            X86AddressMode &AM) {
+  // First constrain the index register.  It needs to be a GR64_NOSP.
+  AM.IndexReg = constrainOperandRegClass(MIB->getDesc(), AM.IndexReg,
+                                         MIB->getNumOperands() +
+                                         X86::AddrIndexReg);
+  return ::addFullAddress(MIB, AM);
+}
+
 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
 /// into the user. The condition code will only be updated on success.
 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
@@ -321,8 +343,9 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
 /// Return true and the result register by reference if it is possible.
-bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
-                                  MachineMemOperand *MMO, unsigned &ResultReg) {
+bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
+                                  MachineMemOperand *MMO, unsigned &ResultReg,
+                                  unsigned Alignment) {
   // Get opcode and regclass of the output for the given load instruction.
   unsigned Opc = 0;
   const TargetRegisterClass *RC = nullptr;
@@ -367,6 +390,30 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
   case MVT::f80:
     // No f80 support yet.
     return false;
+  case MVT::v4f32:
+    if (Alignment >= 16)
+      Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm;
+    else
+      Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm;
+    RC  = &X86::VR128RegClass;
+    break;
+  case MVT::v2f64:
+    if (Alignment >= 16)
+      Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm;
+    else
+      Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm;
+    RC  = &X86::VR128RegClass;
+    break;
+  case MVT::v4i32:
+  case MVT::v2i64:
+  case MVT::v8i16:
+  case MVT::v16i8:
+    if (Alignment >= 16)
+      Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm;
+    else
+      Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm;
+    RC  = &X86::VR128RegClass;
+    break;
   }
 
   ResultReg = createResultReg(RC);
@@ -383,7 +430,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
 /// and a displacement offset, or a GlobalAddress,
 /// i.e. V. Return true if it is possible.
 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
-                                   const X86AddressMode &AM,
+                                   X86AddressMode &AM,
                                    MachineMemOperand *MMO, bool Aligned) {
   // Get opcode and regclass of the output for the given store instruction.
   unsigned Opc = 0;
@@ -444,7 +491,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
 }
 
 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
-                                   const X86AddressMode &AM,
+                                   X86AddressMode &AM,
                                    MachineMemOperand *MMO, bool Aligned) {
   // Handle 'null' like i32/i64 0.
   if (isa<ConstantPointerNull>(Val))
@@ -1063,8 +1110,14 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
   if (!X86SelectAddress(Ptr, AM))
     return false;
 
+  unsigned Alignment = LI->getAlignment();
+  unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
+  if (Alignment == 0) // Ensure that codegen never sees alignment 0
+    Alignment = ABIAlignment;
+
   unsigned ResultReg = 0;
-  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
+  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
+                       Alignment))
     return false;
 
   updateValueMap(I, ResultReg);
@@ -1089,27 +1142,37 @@ static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
   }
 }
 
-/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
-/// of the comparison, return an opcode that works for the compare (e.g.
-/// CMP32ri) otherwise return 0.
+/// If we have a comparison with RHS as the RHS  of the comparison, return an
+/// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
+  int64_t Val = RHSC->getSExtValue();
   switch (VT.getSimpleVT().SimpleTy) {
   // Otherwise, we can't fold the immediate into this comparison.
-  default: return 0;
-  case MVT::i8: return X86::CMP8ri;
-  case MVT::i16: return X86::CMP16ri;
-  case MVT::i32: return X86::CMP32ri;
+  default:
+    return 0;
+  case MVT::i8:
+    return X86::CMP8ri;
+  case MVT::i16:
+    if (isInt<8>(Val))
+      return X86::CMP16ri8;
+    return X86::CMP16ri;
+  case MVT::i32:
+    if (isInt<8>(Val))
+      return X86::CMP32ri8;
+    return X86::CMP32ri;
   case MVT::i64:
+    if (isInt<8>(Val))
+      return X86::CMP64ri8;
     // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
     // field.
-    if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
+    if (isInt<32>(Val))
       return X86::CMP64ri32;
     return 0;
   }
 }
 
 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
-                                     EVT VT) {
+                                     EVT VT, DebugLoc CurDbgLoc) {
   unsigned Op0Reg = getRegForValue(Op0);
   if (Op0Reg == 0) return false;
 
@@ -1122,7 +1185,7 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
   // CMPri, otherwise use CMPrr.
   if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
     if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CompareImmOpc))
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
         .addReg(Op0Reg)
         .addImm(Op1C->getSExtValue());
       return true;
@@ -1134,7 +1197,7 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
 
   unsigned Op1Reg = getRegForValue(Op1);
   if (Op1Reg == 0) return false;
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CompareOpc))
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
     .addReg(Op0Reg)
     .addReg(Op1Reg);
 
@@ -1202,7 +1265,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
 
   ResultReg = createResultReg(&X86::GR8RegClass);
   if (SETFOpc) {
-    if (!X86FastEmitCompare(LHS, RHS, VT))
+    if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
       return false;
 
     unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
@@ -1227,7 +1290,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
     std::swap(LHS, RHS);
 
   // Emit a compare of LHS/RHS.
-  if (!X86FastEmitCompare(LHS, RHS, VT))
+  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
     return false;
 
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
@@ -1353,7 +1416,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
         std::swap(CmpLHS, CmpRHS);
 
       // Emit a compare of the LHS and RHS, setting the flags.
-      if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT))
+      if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
         return false;
 
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
@@ -1740,7 +1803,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
 
     EVT CmpVT = TLI.getValueType(CmpLHS->getType());
     // Emit a compare of the LHS and RHS, setting the flags.
-    if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
+    if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
       return false;
 
     if (SETFOpc) {
@@ -1805,11 +1868,11 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
   return true;
 }
 
-/// \brief Emit SSE instructions to lower the select.
+/// \brief Emit SSE or AVX instructions to lower the select.
 ///
 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
-/// SSE instructions are available.
+/// SSE instructions are available. If AVX is available, try to use a VBLENDV.
 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
   // Optimize conditions coming from a compare if both instructions are in the
   // same basic block (values defined in other basic blocks may not have
@@ -1845,19 +1908,17 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
   if (NeedSwap)
     std::swap(CmpLHS, CmpRHS);
 
-  static unsigned OpcTable[2][2][4] = {
-    { { X86::CMPSSrr,  X86::FsANDPSrr,  X86::FsANDNPSrr,  X86::FsORPSrr  },
-      { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr }  },
-    { { X86::CMPSDrr,  X86::FsANDPDrr,  X86::FsANDNPDrr,  X86::FsORPDrr  },
-      { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr }  }
+  // Choose the SSE instruction sequence based on data type (float or double).
+  static unsigned OpcTable[2][4] = {
+    { X86::CMPSSrr,  X86::FsANDPSrr,  X86::FsANDNPSrr,  X86::FsORPSrr  },
+    { X86::CMPSDrr,  X86::FsANDPDrr,  X86::FsANDNPDrr,  X86::FsORPDrr  }
   };
 
-  bool HasAVX = Subtarget->hasAVX();
   unsigned *Opc = nullptr;
   switch (RetVT.SimpleTy) {
   default: return false;
-  case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
-  case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
+  case MVT::f32: Opc = &OpcTable[0][0]; break;
+  case MVT::f64: Opc = &OpcTable[1][0]; break;
   }
 
   const Value *LHS = I->getOperand(1);
@@ -1879,14 +1940,33 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
     return false;
 
   const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
-  unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
-                                     CmpRHSReg, CmpRHSIsKill, CC);
-  unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
-                                    LHSReg, LHSIsKill);
-  unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
-                                     RHSReg, RHSIsKill);
-  unsigned ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
-                                       AndReg, /*IsKill=*/true);
+  unsigned ResultReg;
+  
+  if (Subtarget->hasAVX()) {
+    // If we have AVX, create 1 blendv instead of 3 logic instructions.
+    // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
+    // uses XMM0 as the selection register. That may need just as many
+    // instructions as the AND/ANDN/OR sequence due to register moves, so
+    // don't bother.
+    unsigned CmpOpcode =
+      (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
+    unsigned BlendOpcode =
+      (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
+    
+    unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
+                                       CmpRHSReg, CmpRHSIsKill, CC);
+    ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill,
+                                 LHSReg, LHSIsKill, CmpReg, true);
+  } else {
+    unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
+                                       CmpRHSReg, CmpRHSIsKill, CC);
+    unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+                                      LHSReg, LHSIsKill);
+    unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+                                       RHSReg, RHSIsKill);
+    ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
+                                         AndReg, /*IsKill=*/true);
+  }
   updateValueMap(I, ResultReg);
   return true;
 }
@@ -1924,7 +2004,7 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
       std::swap(CmpLHS, CmpRHS);
 
     EVT CmpVT = TLI.getValueType(CmpLHS->getType());
-    if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
+    if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
       return false;
   } else {
     unsigned CondReg = getRegForValue(Cond);
@@ -2001,41 +2081,84 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
   return false;
 }
 
+bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
+  // The target-independent selection algorithm in FastISel already knows how
+  // to select a SINT_TO_FP if the target is SSE but not AVX.
+  // Early exit if the subtarget doesn't have AVX.
+  if (!Subtarget->hasAVX())
+    return false;
+
+  if (!I->getOperand(0)->getType()->isIntegerTy(32))
+    return false;
+
+  // Select integer to float/double conversion.
+  unsigned OpReg = getRegForValue(I->getOperand(0));
+  if (OpReg == 0)
+    return false;
+
+  const TargetRegisterClass *RC = nullptr;
+  unsigned Opcode;
+
+  if (I->getType()->isDoubleTy()) {
+    // sitofp int -> double
+    Opcode = X86::VCVTSI2SDrr;
+    RC = &X86::FR64RegClass;
+  } else if (I->getType()->isFloatTy()) {
+    // sitofp int -> float
+    Opcode = X86::VCVTSI2SSrr;
+    RC = &X86::FR32RegClass;
+  } else
+    return false;
+
+  unsigned ImplicitDefReg = createResultReg(RC);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+          TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+  unsigned ResultReg =
+      fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
+  updateValueMap(I, ResultReg);
+  return true;
+}
+
+// Helper method used by X86SelectFPExt and X86SelectFPTrunc.
+bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
+                                          unsigned TargetOpc,
+                                          const TargetRegisterClass *RC) {
+  assert((I->getOpcode() == Instruction::FPExt ||
+          I->getOpcode() == Instruction::FPTrunc) &&
+         "Instruction must be an FPExt or FPTrunc!");
+
+  unsigned OpReg = getRegForValue(I->getOperand(0));
+  if (OpReg == 0)
+    return false;
+
+  unsigned ResultReg = createResultReg(RC);
+  MachineInstrBuilder MIB;
+  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
+                ResultReg);
+  if (Subtarget->hasAVX())
+    MIB.addReg(OpReg);
+  MIB.addReg(OpReg);
+  updateValueMap(I, ResultReg);
+  return true;
+}
+
 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
-  // fpext from float to double.
-  if (X86ScalarSSEf64 &&
-      I->getType()->isDoubleTy()) {
-    const Value *V = I->getOperand(0);
-    if (V->getType()->isFloatTy()) {
-      unsigned OpReg = getRegForValue(V);
-      if (OpReg == 0) return false;
-      unsigned ResultReg = createResultReg(&X86::FR64RegClass);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-              TII.get(X86::CVTSS2SDrr), ResultReg)
-        .addReg(OpReg);
-      updateValueMap(I, ResultReg);
-      return true;
-    }
+  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
+      I->getOperand(0)->getType()->isFloatTy()) {
+    // fpext from float to double.
+    unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
+    return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR64RegClass);
   }
 
   return false;
 }
 
 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
-  if (X86ScalarSSEf64) {
-    if (I->getType()->isFloatTy()) {
-      const Value *V = I->getOperand(0);
-      if (V->getType()->isDoubleTy()) {
-        unsigned OpReg = getRegForValue(V);
-        if (OpReg == 0) return false;
-        unsigned ResultReg = createResultReg(&X86::FR32RegClass);
-        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                TII.get(X86::CVTSD2SSrr), ResultReg)
-          .addReg(OpReg);
-        updateValueMap(I, ResultReg);
-        return true;
-      }
-    }
+  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
+      I->getOperand(0)->getType()->isDoubleTy()) {
+    // fptrunc from double to float.
+    unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
+    return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR32RegClass);
   }
 
   return false;
@@ -2062,6 +2185,7 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
     return true;
   }
 
+  bool KillInputReg = false;
   if (!Subtarget->is64Bit()) {
     // If we're on x86-32; we can't extract an i8 from a general register.
     // First issue a copy to GR16_ABCD or GR32_ABCD.
@@ -2071,11 +2195,12 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg);
     InputReg = CopyReg;
+    KillInputReg = true;
   }
 
   // Issue an extract_subreg.
   unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
-                                                  InputReg, /*Kill=*/true,
+                                                  InputReg, KillInputReg,
                                                   X86::sub_8bit);
   if (!ResultReg)
     return false;
@@ -2127,7 +2252,73 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
   // FIXME: Handle more intrinsics.
   switch (II->getIntrinsicID()) {
   default: return false;
+  case Intrinsic::convert_from_fp16:
+  case Intrinsic::convert_to_fp16: {
+    if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
+      return false;
+
+    const Value *Op = II->getArgOperand(0);
+    unsigned InputReg = getRegForValue(Op);
+    if (InputReg == 0)
+      return false;
+
+    // F16C only allows converting from float to half and from half to float.
+    bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
+    if (IsFloatToHalf) {
+      if (!Op->getType()->isFloatTy())
+        return false;
+    } else {
+      if (!II->getType()->isFloatTy())
+        return false;
+    }
+
+    unsigned ResultReg = 0;
+    const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
+    if (IsFloatToHalf) {
+      // 'InputReg' is implicitly promoted from register class FR32 to
+      // register class VR128 by method 'constrainOperandRegClass' which is
+      // directly called by 'fastEmitInst_ri'.
+      // Instruction VCVTPS2PHrr takes an extra immediate operand which is
+      // used to provide rounding control.
+      InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 0);
+
+      // Move the lower 32-bits of ResultReg to another register of class GR32.
+      ResultReg = createResultReg(&X86::GR32RegClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(X86::VMOVPDI2DIrr), ResultReg)
+          .addReg(InputReg, RegState::Kill);
+      
+      // The result value is in the lower 16-bits of ResultReg.
+      unsigned RegIdx = X86::sub_16bit;
+      ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
+    } else {
+      assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
+      // Explicitly sign-extend the input to 32-bit.
+      InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
+                            /*Kill=*/false);
+
+      // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
+      InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
+                            InputReg, /*Kill=*/true);
+
+      InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
+
+      // The result value is in the lower 32-bits of ResultReg.
+      // Emit an explicit copy from register class VR128 to register class FR32.
+      ResultReg = createResultReg(&X86::FR32RegClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(TargetOpcode::COPY), ResultReg)
+          .addReg(InputReg, RegState::Kill);
+    }
+
+    updateValueMap(II, ResultReg);
+    return true;
+  }
   case Intrinsic::frameaddress: {
+    MachineFunction *MF = FuncInfo.MF;
+    if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
+      return false;
+
     Type *RetTy = II->getCalledFunction()->getReturnType();
 
     MVT VT;
@@ -2145,12 +2336,11 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
 
     // This needs to be set before we call getPtrSizedFrameRegister, otherwise
     // we get the wrong frame register.
-    MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
+    MachineFrameInfo *MFI = MF->getFrameInfo();
     MFI->setFrameAddressIsTaken(true);
 
-    const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-        TM.getSubtargetImpl()->getRegisterInfo());
-    unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*(FuncInfo.MF));
+    const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+    unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
     assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
             (FrameReg == X86::EBP && VT == MVT::i32)) &&
            "Invalid Frame Register!");
@@ -2247,6 +2437,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
     // FIXME may need to add RegState::Debug to any registers produced,
     // although ESP/EBP should be the only ones at the moment.
+    assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+           "Expected inlined-at fields to agree");
     addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
         .addImm(0)
         .addMetadata(DI->getVariable())
@@ -2738,8 +2930,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
     .addImm(NumBytes).addImm(0);
 
   // Walk the register/memloc assignments, inserting copies/loads.
-  const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-      TM.getSubtargetImpl()->getRegisterInfo());
+  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign const &VA = ArgLocs[i];
     const Value *ArgVal = OutVals[VA.getValNo()];
@@ -2870,7 +3061,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
     };
-    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
     assert((Subtarget->hasSSE1() || !NumXMMRegs)
            && "SSE registers cannot be used when SSE is disabled");
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
@@ -2934,7 +3125,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
 
   // Add a register mask operand representing the call-preserved registers.
   // Proper defs for return values will be added by setPhysRegsDeadExcept().
-  MIB.addRegMask(TRI.getCallPreservedMask(CC));
+  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
 
   // Add an implicit use GOT pointer in EBX.
   if (Subtarget->isPICStyleGOT())
@@ -3044,6 +3235,8 @@ X86FastISel::fastSelectInstruction(const Instruction *I)  {
     return X86SelectFPExt(I);
   case Instruction::FPTrunc:
     return X86SelectFPTrunc(I);
+  case Instruction::SIToFP:
+    return X86SelectSIToFP(I);
   case Instruction::IntToPtr: // Deliberate fall-through.
   case Instruction::PtrToInt: {
     EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
@@ -3189,8 +3382,8 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
                                       TII.get(Opc), ResultReg);
     addDirectMem(MIB, AddrReg);
     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
-      MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
-      TM.getSubtargetImpl()->getDataLayout()->getPointerSize(), Align);
+        MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
+        TM.getDataLayout()->getPointerSize(), Align);
     MIB->addMemOperand(*FuncInfo.MF, MMO);
     return ResultReg;
   }
@@ -3224,7 +3417,10 @@ unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
               ResultReg)
         .addGlobalAddress(GV);
     } else {
-      unsigned Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+      unsigned Opc = TLI.getPointerTy() == MVT::i32
+                     ? (Subtarget->isTarget64BitILP32()
+                        ? X86::LEA64_32r : X86::LEA32r)
+                     : X86::LEA64r;
       addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                              TII.get(Opc), ResultReg), AM);
     }
@@ -3266,7 +3462,10 @@ unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
   X86AddressMode AM;
   if (!X86SelectAddress(C, AM))
     return 0;
-  unsigned Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+  unsigned Opc = TLI.getPointerTy() == MVT::i32
+                 ? (Subtarget->isTarget64BitILP32()
+                    ? X86::LEA64_32r : X86::LEA32r)
+                 : X86::LEA64r;
   const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
   unsigned ResultReg = createResultReg(RC);
   addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -3337,6 +3536,23 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
   if (!Result)
     return false;
 
+  // The index register could be in the wrong register class.  Unfortunately,
+  // foldMemoryOperandImpl could have commuted the instruction so its not enough
+  // to just look at OpNo + the offset to the index reg.  We actually need to
+  // scan the instruction to find the index reg and see if its the correct reg
+  // class.
+  for (MIOperands MO(Result); MO.isValid(); ++MO) {
+    if (!MO->isReg() || MO->isDef() || MO->getReg() != AM.IndexReg)
+      continue;
+    // Found the index reg, now try to rewrite it.
+    unsigned OpNo = MO.getOperandNo();
+    unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
+                                                 MO->getReg(), OpNo);
+    if (IndexReg == MO->getReg())
+      continue;
+    MO->setReg(IndexReg);
+  }
+
   Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
   FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
   MI->eraseFromParent();
diff --git a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 8e033a0..b39c5ab 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -88,7 +88,6 @@ public:
 
 private:
   MachineFunction *MF;
-  const TargetMachine *TM;
   const X86InstrInfo *TII; // Machine instruction info.
 };
 char FixupLEAPass::ID = 0;
@@ -150,13 +149,11 @@ FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
 
 bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
   MF = &Func;
-  TM = &Func.getTarget();
-  const X86Subtarget &ST = TM->getSubtarget<X86Subtarget>();
+  const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>();
   if (!ST.LEAusesAG() && !ST.slowLEA())
     return false;
 
-  TII =
-      static_cast<const X86InstrInfo *>(TM->getSubtargetImpl()->getInstrInfo());
+  TII = ST.getInstrInfo();
 
   DEBUG(dbgs() << "Start X86FixupLEAs\n";);
   // Process all basic blocks.
@@ -219,7 +216,7 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
       return CurInst;
     }
     InstrDistance += TII->getInstrLatency(
-        TM->getSubtargetImpl()->getInstrItineraryData(), CurInst);
+        MF->getSubtarget().getInstrItineraryData(), CurInst);
     Found = getPreviousInstr(CurInst, MFI);
   }
   return nullptr;
@@ -333,7 +330,7 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
                                      MachineFunction::iterator MFI) {
 
   for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
-    if (TM->getSubtarget<X86Subtarget>().isSLM())
+    if (MF.getSubtarget<X86Subtarget>().isSLM())
       processInstructionForSLM(I, MFI);
     else
       processInstruction(I, MFI);
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 6189109..3b0bd03 100644
--- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -32,10 +32,10 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/Support/Debug.h"
@@ -300,7 +300,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
   // function.  If it is all integer, there is nothing for us to do!
   bool FPIsUsed = false;
 
-  assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
+  static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
   for (unsigned i = 0; i <= 6; ++i)
     if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
       FPIsUsed = true;
@@ -438,7 +438,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
         // Rewind to first instruction newly inserted.
         while (Start != BB.begin() && std::prev(Start) != PrevI) --Start;
         dbgs() << "Inserted instructions:\n\t";
-        Start->print(dbgs(), &MF.getTarget());
+        Start->print(dbgs());
         while (++Start != std::next(I)) {}
       }
       dumpStack();
@@ -898,7 +898,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
 
   // Now we should have the correct registers live.
   DEBUG(dumpStack());
-  assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch");
+  assert(StackTop == countPopulation(Mask) && "Live count mismatch");
 }
 
 /// shuffleStackTop - emit fxch instructions before I to shuffle the top
@@ -943,7 +943,7 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
     }
   }
 
-  unsigned N = CountTrailingOnes_32(STReturns);
+  unsigned N = countTrailingOnes(STReturns);
 
   // FP registers used for function return must be consecutive starting at
   // FP0.
@@ -1420,14 +1420,14 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
 
     if (STUses && !isMask_32(STUses))
       MI->emitError("fixed input regs must be last on the x87 stack");
-    unsigned NumSTUses = CountTrailingOnes_32(STUses);
+    unsigned NumSTUses = countTrailingOnes(STUses);
 
     // Defs must be contiguous from the stack top. ST0-STn.
     if (STDefs && !isMask_32(STDefs)) {
       MI->emitError("output regs must be last on the x87 stack");
       STDefs = NextPowerOf2(STDefs) - 1;
     }
-    unsigned NumSTDefs = CountTrailingOnes_32(STDefs);
+    unsigned NumSTDefs = countTrailingOnes(STDefs);
 
     // So must the clobbered stack slots. ST0-STm, m >= n.
     if (STClobbers && !isMask_32(STDefs | STClobbers))
@@ -1437,7 +1437,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
     unsigned STPopped = STUses & (STDefs | STClobbers);
     if (STPopped && !isMask_32(STPopped))
       MI->emitError("implicitly popped regs must be last on the x87 stack");
-    unsigned NumSTPopped = CountTrailingOnes_32(STPopped);
+    unsigned NumSTPopped = countTrailingOnes(STPopped);
 
     DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
                  << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index 66f54e9..db58d9c 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -109,6 +109,14 @@ static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
   }
 }
 
+static unsigned getSUBrrOpcode(unsigned isLP64) {
+  return isLP64 ? X86::SUB64rr : X86::SUB32rr;
+}
+
+static unsigned getADDrrOpcode(unsigned isLP64) {
+  return isLP64 ? X86::ADD64rr : X86::ADD32rr;
+}
+
 static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
   if (IsLP64) {
     if (isInt<8>(Imm))
@@ -182,14 +190,27 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
   return 0;
 }
 
+static bool isEAXLiveIn(MachineFunction &MF) {
+  for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
+       EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
+    unsigned Reg = II->first;
+
+    if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
+        Reg == X86::AH || Reg == X86::AL)
+      return true;
+  }
+
+  return false;
+}
 
 /// emitSPUpdate - Emit a series of instructions to increment / decrement the
 /// stack pointer by a constant value.
-static
-void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
-                  unsigned StackPtr, int64_t NumBytes,
-                  bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA,
-                  const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
+void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator &MBBI,
+                                    unsigned StackPtr, int64_t NumBytes,
+                                    bool Is64BitTarget, bool Is64BitStackPtr,
+                                    bool UseLEA, const TargetInstrInfo &TII,
+                                    const TargetRegisterInfo &TRI) {
   bool isSub = NumBytes < 0;
   uint64_t Offset = isSub ? -NumBytes : NumBytes;
   unsigned Opc;
@@ -204,7 +225,33 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
   DebugLoc DL = MBB.findDebugLoc(MBBI);
 
   while (Offset) {
-    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+    if (Offset > Chunk) {
+      // Rather than emit a long series of instructions for large offsets,
+      // load the offset into a register and do one sub/add
+      unsigned Reg = 0;
+
+      if (isSub && !isEAXLiveIn(*MBB.getParent()))
+        Reg = (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX);
+      else
+        Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
+
+      if (Reg) {
+        Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri;
+        BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg)
+          .addImm(Offset);
+        Opc = isSub
+          ? getSUBrrOpcode(Is64BitTarget)
+          : getADDrrOpcode(Is64BitTarget);
+        MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+          .addReg(StackPtr)
+          .addReg(Reg);
+        MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+        Offset = 0;
+        continue;
+      }
+    }
+
+    uint64_t ThisVal = std::min(Offset, Chunk);
     if (ThisVal == (Is64BitTarget ? 8 : 4)) {
       // Use push / pop instead.
       unsigned Reg = isSub
@@ -266,45 +313,10 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
   }
 }
 
-/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower
-/// iterator.
-static
-void mergeSPUpdatesDown(MachineBasicBlock &MBB,
-                        MachineBasicBlock::iterator &MBBI,
-                        unsigned StackPtr, uint64_t *NumBytes = nullptr) {
-  // FIXME:  THIS ISN'T RUN!!!
-  return;
-
-  if (MBBI == MBB.end()) return;
-
-  MachineBasicBlock::iterator NI = std::next(MBBI);
-  if (NI == MBB.end()) return;
-
-  unsigned Opc = NI->getOpcode();
-  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
-       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
-      NI->getOperand(0).getReg() == StackPtr) {
-    if (NumBytes)
-      *NumBytes -= NI->getOperand(2).getImm();
-    MBB.erase(NI);
-    MBBI = NI;
-  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
-              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
-             NI->getOperand(0).getReg() == StackPtr) {
-    if (NumBytes)
-      *NumBytes += NI->getOperand(2).getImm();
-    MBB.erase(NI);
-    MBBI = NI;
-  }
-}
-
-/// mergeSPUpdates - Checks the instruction before/after the passed
-/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and
-/// the stack adjustment is returned as a positive value for ADD/LEA and a
-/// negative for SUB.
-static int mergeSPUpdates(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator &MBBI, unsigned StackPtr,
-                          bool doMergeWithPrevious) {
+int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator &MBBI,
+                                     unsigned StackPtr,
+                                     bool doMergeWithPrevious) {
   if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
       (!doMergeWithPrevious && MBBI == MBB.end()))
     return 0;
@@ -333,19 +345,6 @@ static int mergeSPUpdates(MachineBasicBlock &MBB,
   return Offset;
 }
 
-static bool isEAXLiveIn(MachineFunction &MF) {
-  for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
-       EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
-    unsigned Reg = II->first;
-
-    if (Reg == X86::EAX || Reg == X86::AX ||
-        Reg == X86::AH || Reg == X86::AL)
-      return true;
-  }
-
-  return false;
-}
-
 void
 X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
                                             MachineBasicBlock::iterator MBBI,
@@ -396,12 +395,10 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
                                           MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator MBBI,
                                           DebugLoc DL) {
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-  const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
   bool Is64Bit = STI.is64Bit();
   bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
-  const X86RegisterInfo *RegInfo =
-      static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
 
   unsigned CallOp;
   if (Is64Bit)
@@ -453,6 +450,35 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
   }
 }
 
+static unsigned calculateSetFPREG(uint64_t SPAdjust) {
+  // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
+  // and might require smaller successive adjustments.
+  const uint64_t Win64MaxSEHOffset = 128;
+  uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
+  // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
+  return SEHFrameOffset & -16;
+}
+
+// If we're forcing a stack realignment we can't rely on just the frame
+// info, we need to know the ABI stack alignment as well in case we
+// have a call out.  Otherwise just make sure we have some alignment - we'll
+// go with the minimum SlotSize.
+static uint64_t calculateMaxStackAlign(const MachineFunction &MF) {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
+  unsigned SlotSize = RegInfo->getSlotSize();
+  unsigned StackAlign = STI.getFrameLowering()->getStackAlignment();
+  if (ForceStackAlign) {
+    if (MFI->hasCalls())
+      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+    else if (MaxAlign < SlotSize)
+      MaxAlign = SlotSize;
+  }
+  return MaxAlign;
+}
+
 /// emitPrologue - Push callee-saved registers onto the stack, which
 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
 /// space for local variables. Also emit labels used by the exception handler to
@@ -537,52 +563,44 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
   - for 32-bit code, substitute %e?? registers for %r??
 */
 
-void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+void X86FrameLowering::emitPrologue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *Fn = MF.getFunction();
-  const X86RegisterInfo *RegInfo =
-      static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
+  uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
   uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
   bool HasFP = hasFP(MF);
-  const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
   bool Is64Bit = STI.is64Bit();
   // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
   const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
-  bool IsWin64 = STI.isTargetWin64();
+  bool IsWin64 = STI.isCallingConvWin64(Fn->getCallingConv());
   // Not necessarily synonymous with IsWin64.
   bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
   bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
   bool NeedsDwarfCFI =
       !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
   bool UseLEA = STI.useLeaForSP();
-  unsigned StackAlign = getStackAlignment();
   unsigned SlotSize = RegInfo->getSlotSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
-  const unsigned MachineFramePtr = STI.isTarget64BitILP32() ?
-                 getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
+  const unsigned MachineFramePtr =
+      STI.isTarget64BitILP32()
+          ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
+          : FramePtr;
   unsigned StackPtr = RegInfo->getStackRegister();
   unsigned BasePtr = RegInfo->getBaseRegister();
   DebugLoc DL;
 
-  // If we're forcing a stack realignment we can't rely on just the frame
-  // info, we need to know the ABI stack alignment as well in case we
-  // have a call out.  Otherwise just make sure we have some alignment - we'll
-  // go with the minimum SlotSize.
-  if (ForceStackAlign) {
-    if (MFI->hasCalls())
-      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
-    else if (MaxAlign < SlotSize)
-      MaxAlign = SlotSize;
-  }
-
   // Add RETADDR move area to callee saved frame size.
   int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+  if (TailCallReturnAddrDelta && IsWinEH)
+    report_fatal_error("Can't handle guaranteed tail call under win64 yet");
+
   if (TailCallReturnAddrDelta < 0)
     X86FI->setCalleeSavedFrameSize(
       X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
@@ -602,14 +620,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   // pointer, calls, or dynamic alloca then we do not need to adjust the
   // stack pointer (we fit in the Red Zone). We also check that we don't
   // push and pop from the stack.
-  if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                                   Attribute::NoRedZone) &&
+  if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) &&
       !RegInfo->needsStackRealignment(MF) &&
-      !MFI->hasVarSizedObjects() &&                     // No dynamic alloca.
-      !MFI->adjustsStack() &&                           // No calls.
-      !IsWin64 &&                                       // Win64 has no Red Zone
-      !usesTheStack(MF) &&                              // Don't push and pop.
-      !MF.shouldSplitStack()) {                         // Regular stack
+      !MFI->hasVarSizedObjects() && // No dynamic alloca.
+      !MFI->adjustsStack() &&       // No calls.
+      !IsWin64 &&                   // Win64 has no Red Zone
+      !usesTheStack(MF) &&          // Don't push and pop.
+      !MF.shouldSplitStack()) {     // Regular stack
     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
     if (HasFP) MinSize += SlotSize;
     StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
@@ -653,14 +670,12 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
     // If required, include space for extra hidden slot for stashing base pointer.
     if (X86FI->getRestoreBasePointer())
       FrameSize += SlotSize;
-    if (RegInfo->needsStackRealignment(MF)) {
-      // Callee-saved registers are pushed on stack before the stack
-      // is realigned.
-      FrameSize -= X86FI->getCalleeSavedFrameSize();
-      NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
-    } else {
-      NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
-    }
+
+    NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+
+    // Callee-saved registers are pushed on stack before the stack is realigned.
+    if (RegInfo->needsStackRealignment(MF) && !IsWinEH)
+      NumBytes = RoundUpToAlignment(NumBytes, MaxAlign);
 
     // Get the offset of the stack slot for the EBP register, which is
     // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
@@ -696,11 +711,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
           .setMIFlag(MachineInstr::FrameSetup);
     }
 
-    // Update EBP with the new base value.
-    BuildMI(MBB, MBBI, DL,
-            TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr)
-        .addReg(StackPtr)
-        .setMIFlag(MachineInstr::FrameSetup);
+    if (!IsWinEH) {
+      // Update EBP with the new base value.
+      BuildMI(MBB, MBBI, DL,
+              TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
+              FramePtr)
+          .addReg(StackPtr)
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
 
     if (NeedsDwarfCFI) {
       // Mark effective beginning of when frame pointer becomes valid.
@@ -749,15 +767,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
 
   // Realign stack after we pushed callee-saved registers (so that we'll be
   // able to calculate their offsets from the frame pointer).
-  if (RegInfo->needsStackRealignment(MF)) {
+  // Don't do this for Win64, it needs to realign the stack after the prologue.
+  if (!IsWinEH && RegInfo->needsStackRealignment(MF)) {
     assert(HasFP && "There should be a frame pointer if stack is realigned.");
     uint64_t Val = -MaxAlign;
     MachineInstr *MI =
-      BuildMI(MBB, MBBI, DL,
-              TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), StackPtr)
-      .addReg(StackPtr)
-      .addImm(Val)
-      .setMIFlag(MachineInstr::FrameSetup);
+        BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
+                StackPtr)
+            .addReg(StackPtr)
+            .addImm(Val)
+            .setMIFlag(MachineInstr::FrameSetup);
 
     // The EFLAGS implicit def is dead.
     MI->getOperand(3).setIsDead();
@@ -768,10 +787,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   // the callee has more arguments then the caller.
   NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
 
-  // If there is an ADD32ri or SUB32ri of ESP immediately after this
-  // instruction, merge the two instructions.
-  mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
-
   // Adjust stack pointer: ESP -= numbytes.
 
   // Windows and cygwin/mingw require a prologue helper routine when allocating
@@ -782,7 +797,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   // responsible for adjusting the stack pointer.  Touching the stack at 4K
   // increments is necessary to ensure that the guard pages used by the OS
   // virtual memory manager are allocated in correct sequence.
-  if (NumBytes >= StackProbeSize && UseStackProbe) {
+  uint64_t AlignedNumBytes = NumBytes;
+  if (IsWinEH && RegInfo->needsStackRealignment(MF))
+    AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign);
+  if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
     // Check whether EAX is livein for this function.
     bool isEAXAlive = isEAXLiveIn(MF);
 
@@ -800,9 +818,19 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
     if (Is64Bit) {
       // Handle the 64-bit Windows ABI case where we need to call __chkstk.
       // Function prologue is responsible for adjusting the stack pointer.
-      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
-        .addImm(NumBytes)
-        .setMIFlag(MachineInstr::FrameSetup);
+      if (isUInt<32>(NumBytes)) {
+        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+            .addImm(NumBytes)
+            .setMIFlag(MachineInstr::FrameSetup);
+      } else if (isInt<32>(NumBytes)) {
+        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX)
+            .addImm(NumBytes)
+            .setMIFlag(MachineInstr::FrameSetup);
+      } else {
+        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
+            .addImm(NumBytes)
+            .setMIFlag(MachineInstr::FrameSetup);
+      }
     } else {
       // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
       // We'll also use 4 already allocated bytes for EAX.
@@ -835,68 +863,66 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
                  UseLEA, TII, *RegInfo);
   }
 
+  if (NeedsWinEH && NumBytes)
+    BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
+        .addImm(NumBytes)
+        .setMIFlag(MachineInstr::FrameSetup);
+
   int SEHFrameOffset = 0;
-  if (NeedsWinEH) {
-    if (HasFP) {
-      // We need to set frame base offset low enough such that all saved
-      // register offsets would be positive relative to it, but we can't
-      // just use NumBytes, because .seh_setframe offset must be <=240.
-      // So we pretend to have only allocated enough space to spill the
-      // non-volatile registers.
-      // We don't care about the rest of stack allocation, because unwinder
-      // will restore SP to (BP - SEHFrameOffset)
-      for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
-        int offset = MFI->getObjectOffset(Info.getFrameIdx());
-        SEHFrameOffset = std::max(SEHFrameOffset, std::abs(offset));
-      }
-      SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
-
-      // This only needs to account for XMM spill slots, GPR slots
-      // are covered by the .seh_pushreg's emitted above.
-      unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize();
-      if (Size) {
-        BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
-            .addImm(Size)
-            .setMIFlag(MachineInstr::FrameSetup);
-      }
+  if (IsWinEH && HasFP) {
+    SEHFrameOffset = calculateSetFPREG(NumBytes);
+    if (SEHFrameOffset)
+      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
+                   StackPtr, false, SEHFrameOffset);
+    else
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr);
 
+    if (NeedsWinEH)
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
           .addImm(FramePtr)
           .addImm(SEHFrameOffset)
           .setMIFlag(MachineInstr::FrameSetup);
-    } else {
-      // SP will be the base register for restoring XMMs
-      if (NumBytes) {
-        BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
-            .addImm(NumBytes)
-            .setMIFlag(MachineInstr::FrameSetup);
-      }
-    }
   }
 
-  // Skip the rest of register spilling code
-  while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
+  while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
+    const MachineInstr *FrameInstr = &*MBBI;
     ++MBBI;
 
-  // Emit SEH info for non-GPRs
-  if (NeedsWinEH) {
-    for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
-      unsigned Reg = Info.getReg();
-      if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
-        continue;
-      assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class");
-
-      int Offset = getFrameIndexOffset(MF, Info.getFrameIdx());
-      Offset += SEHFrameOffset;
-
-      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
-          .addImm(Reg)
-          .addImm(Offset)
-          .setMIFlag(MachineInstr::FrameSetup);
+    if (NeedsWinEH) {
+      int FI;
+      if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
+        if (X86::FR64RegClass.contains(Reg)) {
+          int Offset = getFrameIndexOffset(MF, FI);
+          Offset += SEHFrameOffset;
+
+          BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
+              .addImm(Reg)
+              .addImm(Offset)
+              .setMIFlag(MachineInstr::FrameSetup);
+        }
+      }
     }
+  }
 
+  if (NeedsWinEH)
     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
         .setMIFlag(MachineInstr::FrameSetup);
+
+  // Realign stack after we spilled callee-saved registers (so that we'll be
+  // able to calculate their offsets from the frame pointer).
+  // Win64 requires aligning the stack after the prologue.
+  if (IsWinEH && RegInfo->needsStackRealignment(MF)) {
+    assert(HasFP && "There should be a frame pointer if stack is realigned.");
+    uint64_t Val = -MaxAlign;
+    MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
+                StackPtr)
+            .addReg(StackPtr)
+            .addImm(Val)
+            .setMIFlag(MachineInstr::FrameSetup);
+
+    // The EFLAGS implicit def is dead.
+    MI->getOperand(3).setIsDead();
   }
 
   // If we need a base pointer, set it up here. It's whatever the value
@@ -938,79 +964,92 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   }
 }
 
+bool X86FrameLowering::canUseLEAForSPInEpilogue(
+    const MachineFunction &MF) const {
+  // We can't use LEA instructions for adjusting the stack pointer if this is a
+  // leaf function in the Win64 ABI.  Only ADD instructions may be used to
+  // deallocate the stack.
+  // This means that we can use LEA for SP in two situations:
+  // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
+  // 2. We *have* a frame pointer which means we are permitted to use LEA.
+  return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
+}
+
+/// Check whether or not the terminators of \p MBB needs to read EFLAGS.
+static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
+  for (const MachineInstr &MI : MBB.terminators()) {
+    bool BreakNext = false;
+    for (const MachineOperand &MO : MI.operands()) {
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (Reg != X86::EFLAGS)
+        continue;
+
+      // This terminator needs an eflag that is not defined
+      // by a previous terminator.
+      if (!MO.isDef())
+        return true;
+      BreakNext = true;
+    }
+    if (BreakNext)
+      break;
+  }
+  return false;
+}
+
 void X86FrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  const X86RegisterInfo *RegInfo =
-      static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  assert(MBBI != MBB.end() && "Returning block has no instructions");
-  unsigned RetOpcode = MBBI->getOpcode();
-  DebugLoc DL = MBBI->getDebugLoc();
-  const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+  DebugLoc DL;
+  if (MBBI != MBB.end())
+    DL = MBBI->getDebugLoc();
   bool Is64Bit = STI.is64Bit();
   // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
   const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
   const bool Is64BitILP32 = STI.isTarget64BitILP32();
-  bool UseLEA = STI.useLeaForSP();
-  unsigned StackAlign = getStackAlignment();
   unsigned SlotSize = RegInfo->getSlotSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
-  unsigned MachineFramePtr = Is64BitILP32 ?
-             getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
+  unsigned MachineFramePtr =
+      Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
+                   : FramePtr;
   unsigned StackPtr = RegInfo->getStackRegister();
 
   bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
   bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry();
-
-  switch (RetOpcode) {
-  default:
-    llvm_unreachable("Can only insert epilog into returning blocks");
-  case X86::RETQ:
-  case X86::RETL:
-  case X86::RETIL:
-  case X86::RETIQ:
-  case X86::TCRETURNdi:
-  case X86::TCRETURNri:
-  case X86::TCRETURNmi:
-  case X86::TCRETURNdi64:
-  case X86::TCRETURNri64:
-  case X86::TCRETURNmi64:
-  case X86::EH_RETURN:
-  case X86::EH_RETURN64:
-    break;  // These are ok
-  }
+  bool UseLEAForSP = canUseLEAForSPInEpilogue(MF);
+  // If we can use LEA for SP but we shouldn't, check that none
+  // of the terminators uses the eflags. Otherwise we will insert
+  // a ADD that will redefine the eflags and break the condition.
+  // Alternatively, we could move the ADD, but this may not be possible
+  // and is an optimization anyway.
+  if (UseLEAForSP && !MF.getSubtarget<X86Subtarget>().useLeaForSP())
+    UseLEAForSP = terminatorsNeedFlagsAsInput(MBB);
+  // If that assert breaks, that means we do not do the right thing
+  // in canUseAsEpilogue.
+  assert((UseLEAForSP || !terminatorsNeedFlagsAsInput(MBB)) &&
+         "We shouldn't have allowed this insertion point");
 
   // Get the number of bytes to allocate from the FrameInfo.
   uint64_t StackSize = MFI->getStackSize();
-  uint64_t MaxAlign  = MFI->getMaxAlignment();
+  uint64_t MaxAlign = calculateMaxStackAlign(MF);
   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   uint64_t NumBytes = 0;
 
-  // If we're forcing a stack realignment we can't rely on just the frame
-  // info, we need to know the ABI stack alignment as well in case we
-  // have a call out.  Otherwise just make sure we have some alignment - we'll
-  // go with the minimum.
-  if (ForceStackAlign) {
-    if (MFI->hasCalls())
-      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
-    else
-      MaxAlign = MaxAlign ? MaxAlign : 4;
-  }
-
   if (hasFP(MF)) {
     // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
-    if (RegInfo->needsStackRealignment(MF)) {
-      // Callee-saved registers were pushed on stack before the stack
-      // was realigned.
-      FrameSize -= CSSize;
-      NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
-    } else {
-      NumBytes = FrameSize - CSSize;
-    }
+    NumBytes = FrameSize - CSSize;
+
+    // Callee-saved registers were pushed on stack before the stack was
+    // realigned.
+    if (RegInfo->needsStackRealignment(MF) && !IsWinEH)
+      NumBytes = RoundUpToAlignment(FrameSize, MaxAlign);
 
     // Pop EBP.
     BuildMI(MBB, MBBI, DL,
@@ -1018,6 +1057,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   } else {
     NumBytes = StackSize - CSSize;
   }
+  uint64_t SEHStackAllocAmt = NumBytes;
 
   // Skip the callee-saved pop instructions.
   while (MBBI != MBB.begin()) {
@@ -1032,7 +1072,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   }
   MachineBasicBlock::iterator FirstCSPop = MBBI;
 
-  DL = MBBI->getDebugLoc();
+  if (MBBI != MBB.end())
+    DL = MBBI->getDebugLoc();
 
   // If there is an ADD32ri or SUB32ri of ESP immediately before this
   // instruction, merge the two instructions.
@@ -1045,10 +1086,20 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
     if (RegInfo->needsStackRealignment(MF))
       MBBI = FirstCSPop;
-    if (CSSize != 0) {
+    unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
+    uint64_t LEAAmount = IsWinEH ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
+
+    // There are only two legal forms of epilogue:
+    // - add SEHAllocationSize, %rsp
+    // - lea SEHAllocationSize(%FramePtr), %rsp
+    //
+    // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
+    // However, we may use this sequence if we have a frame pointer because the
+    // effects of the prologue can safely be undone.
+    if (LEAAmount != 0) {
       unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
-                   FramePtr, false, -CSSize);
+                   FramePtr, false, LEAAmount);
       --MBBI;
     } else {
       unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
@@ -1058,8 +1109,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     }
   } else if (NumBytes) {
     // Adjust stack pointer back: ESP += numbytes.
-    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr, UseLEA,
-                 TII, *RegInfo);
+    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr,
+                 UseLEAForSP, TII, *RegInfo);
     --MBBI;
   }
 
@@ -1072,101 +1123,66 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   if (NeedsWinEH)
     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
 
-  // We're returning from function via eh_return.
-  if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
-    MBBI = MBB.getLastNonDebugInstr();
-    MachineOperand &DestAddr  = MBBI->getOperand(0);
-    assert(DestAddr.isReg() && "Offset should be in register!");
-    BuildMI(MBB, MBBI, DL,
-            TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
-            StackPtr).addReg(DestAddr.getReg());
-  } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
-             RetOpcode == X86::TCRETURNmi ||
-             RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
-             RetOpcode == X86::TCRETURNmi64) {
-    bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
-    // Tail call return: adjust the stack pointer and jump to callee.
-    MBBI = MBB.getLastNonDebugInstr();
-    MachineOperand &JumpTarget = MBBI->getOperand(0);
-    MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
-    assert(StackAdjust.isImm() && "Expecting immediate value.");
-
-    // Adjust stack pointer.
-    int StackAdj = StackAdjust.getImm();
-    int MaxTCDelta = X86FI->getTCReturnAddrDelta();
-    int Offset = 0;
-    assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
-
-    // Incoporate the retaddr area.
-    Offset = StackAdj-MaxTCDelta;
-    assert(Offset >= 0 && "Offset should never be negative");
-
-    if (Offset) {
-      // Check for possible merge with preceding ADD instruction.
-      Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
-      emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr,
-                   UseLEA, TII, *RegInfo);
-    }
-
-    // Jump to label or value in register.
-    if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
-      MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
-                                       ? X86::TAILJMPd : X86::TAILJMPd64));
-      if (JumpTarget.isGlobal())
-        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
-                             JumpTarget.getTargetFlags());
-      else {
-        assert(JumpTarget.isSymbol());
-        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
-                              JumpTarget.getTargetFlags());
-      }
-    } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
-      MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
-                                       ? X86::TAILJMPm : X86::TAILJMPm64));
-      for (unsigned i = 0; i != 5; ++i)
-        MIB.addOperand(MBBI->getOperand(i));
-    } else if (RetOpcode == X86::TCRETURNri64) {
-      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
-        addReg(JumpTarget.getReg(), RegState::Kill);
-    } else {
-      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
-        addReg(JumpTarget.getReg(), RegState::Kill);
-    }
-
-    MachineInstr *NewMI = std::prev(MBBI);
-    NewMI->copyImplicitOps(MF, MBBI);
-
-    // Delete the pseudo instruction TCRETURN.
-    MBB.erase(MBBI);
-  } else if ((RetOpcode == X86::RETQ || RetOpcode == X86::RETL ||
-              RetOpcode == X86::RETIQ || RetOpcode == X86::RETIL) &&
-             (X86FI->getTCReturnAddrDelta() < 0)) {
-    // Add the return addr area delta back since we are not tail calling.
-    int delta = -1*X86FI->getTCReturnAddrDelta();
-    MBBI = MBB.getLastNonDebugInstr();
+  // Add the return addr area delta back since we are not tail calling.
+  int Offset = -1 * X86FI->getTCReturnAddrDelta();
+  assert(Offset >= 0 && "TCDelta should never be positive");
+  if (Offset) {
+    MBBI = MBB.getFirstTerminator();
 
     // Check for possible merge with preceding ADD instruction.
-    delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
-    emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr, UseLEA, TII,
-                 *RegInfo);
+    Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+    emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr,
+                 UseLEAForSP, TII, *RegInfo);
   }
 }
 
 int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
                                           int FI) const {
   const X86RegisterInfo *RegInfo =
-      static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+      MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   const MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Offset will hold the offset from the stack pointer at function entry to the
+  // object.
+  // We need to factor in additional offsets applied during the prologue to the
+  // frame, base, and stack pointer depending on which is used.
   int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+  const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   uint64_t StackSize = MFI->getStackSize();
+  unsigned SlotSize = RegInfo->getSlotSize();
+  bool HasFP = hasFP(MF);
+  bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+  int64_t FPDelta = 0;
+
+  if (IsWinEH) {
+    assert(!MFI->hasCalls() || (StackSize % 16) == 8);
+
+    // Calculate required stack adjustment.
+    uint64_t FrameSize = StackSize - SlotSize;
+    // If required, include space for extra hidden slot for stashing base pointer.
+    if (X86FI->getRestoreBasePointer())
+      FrameSize += SlotSize;
+    uint64_t NumBytes = FrameSize - CSSize;
+
+    uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
+    if (FI && FI == X86FI->getFAIndex())
+      return -SEHFrameOffset;
+
+    // FPDelta is the offset from the "traditional" FP location of the old base
+    // pointer followed by return address and the location required by the
+    // restricted Win64 prologue.
+    // Add FPDelta to all offsets below that go through the frame pointer.
+    FPDelta = FrameSize - SEHFrameOffset;
+    assert((!MFI->hasCalls() || (FPDelta % 16) == 0) &&
+           "FPDelta isn't aligned per the Win64 ABI!");
+  }
+
 
   if (RegInfo->hasBasePointer(MF)) {
-    assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
+    assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
     if (FI < 0) {
       // Skip the saved EBP.
-      return Offset + RegInfo->getSlotSize();
+      return Offset + SlotSize + FPDelta;
     } else {
       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
       return Offset + StackSize;
@@ -1174,33 +1190,32 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
   } else if (RegInfo->needsStackRealignment(MF)) {
     if (FI < 0) {
       // Skip the saved EBP.
-      return Offset + RegInfo->getSlotSize();
+      return Offset + SlotSize + FPDelta;
     } else {
       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
       return Offset + StackSize;
     }
     // FIXME: Support tail calls
   } else {
-    if (!hasFP(MF))
+    if (!HasFP)
       return Offset + StackSize;
 
     // Skip the saved EBP.
-    Offset += RegInfo->getSlotSize();
+    Offset += SlotSize;
 
     // Skip the RETADDR move area
-    const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
     int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
     if (TailCallReturnAddrDelta < 0)
       Offset -= TailCallReturnAddrDelta;
   }
 
-  return Offset;
+  return Offset + FPDelta;
 }
 
 int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
                                              unsigned &FrameReg) const {
   const X86RegisterInfo *RegInfo =
-      static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+      MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   // We can't calculate offset from frame pointer if the stack is realigned,
   // so enforce usage of stack/base pointer.  The base pointer is used when we
   // have dynamic allocas in addition to dynamic realignment.
@@ -1221,7 +1236,7 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F
   {
 #ifndef NDEBUG
     const X86RegisterInfo *RegInfo =
-      static_cast<const X86RegisterInfo*>(MF.getSubtarget().getRegisterInfo());
+        MF.getSubtarget<X86Subtarget>().getRegisterInfo();
     // Note: LLVM arranges the stack as:
     // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP)
     //      > "Stack Slots" (<--SP)
@@ -1275,11 +1290,11 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F
   return Offset + StackSize;
 }
 // Simplified from getFrameIndexReference keeping only StackPointer cases
-int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI,
-                                                  unsigned &FrameReg) const {
+int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
+                                                   int FI,
+                                                   unsigned &FrameReg) const {
   const X86RegisterInfo *RegInfo =
-    static_cast<const X86RegisterInfo*>(MF.getSubtarget().getRegisterInfo());
-
+      MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
 
   FrameReg = RegInfo->getStackRegister();
@@ -1291,7 +1306,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
     std::vector<CalleeSavedInfo> &CSI) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const X86RegisterInfo *RegInfo =
-      static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+      MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   unsigned SlotSize = RegInfo->getSlotSize();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
 
@@ -1358,8 +1373,8 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
   DebugLoc DL = MBB.findDebugLoc(MI);
 
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-  const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
 
   // Push GPRs. It increases frame size.
   unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
@@ -1379,8 +1394,7 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
   // It can be done by spilling XMMs to stack frame.
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
-    if (X86::GR64RegClass.contains(Reg) ||
-        X86::GR32RegClass.contains(Reg))
+    if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
       continue;
     // Add the callee-saved register as live-in. It's killed at the spill.
     MBB.addLiveIn(Reg);
@@ -1406,8 +1420,8 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   DebugLoc DL = MBB.findDebugLoc(MI);
 
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-  const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
 
   // Reload XMMs from stack frame.
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
@@ -1438,7 +1452,7 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                        RegScavenger *RS) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const X86RegisterInfo *RegInfo =
-      static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+      MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   unsigned SlotSize = RegInfo->getSlotSize();
 
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
@@ -1515,13 +1529,12 @@ GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Pr
 // limit.
 static const uint64_t kSplitStackAvailable = 256;
 
-void
-X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
-  MachineBasicBlock &prologueMBB = MF.front();
+void X86FrameLowering::adjustForSegmentedStacks(
+    MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
   uint64_t StackSize;
-  const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
   bool Is64Bit = STI.is64Bit();
   const bool IsLP64 = STI.isTarget64BitLP64();
   unsigned TlsReg, TlsOffset;
@@ -1559,8 +1572,9 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   // The MOV R10, RAX needs to be in a different block, since the RET we emit in
   // allocMBB needs to be last (terminating) instruction.
 
-  for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
-         e = prologueMBB.livein_end(); i != e; i++) {
+  for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
+                                          e = PrologueMBB.livein_end();
+       i != e; i++) {
     allocMBB->addLiveIn(*i);
     checkMBB->addLiveIn(*i);
   }
@@ -1674,7 +1688,7 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
 
   // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
   // It jumps to normal execution of the function body.
-  BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&prologueMBB);
+  BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&PrologueMBB);
 
   // On 32 bit we first push the arguments size and then the frame size. On 64
   // bit, we pass the stack frame size in r10 and the argument size in r11.
@@ -1741,10 +1755,10 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   else
     BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
 
-  allocMBB->addSuccessor(&prologueMBB);
+  allocMBB->addSuccessor(&PrologueMBB);
 
   checkMBB->addSuccessor(allocMBB);
-  checkMBB->addSuccessor(&prologueMBB);
+  checkMBB->addSuccessor(&PrologueMBB);
 
 #ifdef XDEBUG
   MF.verify();
@@ -1766,13 +1780,12 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
 ///       call inc_stack   # doubles the stack space
 ///       temp0 = sp - MaxStack
 ///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
-void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+void X86FrameLowering::adjustForHiPEPrologue(
+    MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  const unsigned SlotSize =
-      static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo())
-          ->getSlotSize();
-  const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
+  const unsigned SlotSize = STI.getRegisterInfo()->getSlotSize();
   const bool Is64Bit = STI.is64Bit();
   const bool IsLP64 = STI.isTarget64BitLP64();
   DebugLoc DL;
@@ -1837,12 +1850,12 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
   // If the stack frame needed is larger than the guaranteed then runtime checks
   // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
   if (MaxStack > Guaranteed) {
-    MachineBasicBlock &prologueMBB = MF.front();
     MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
     MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
 
-    for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
-           E = prologueMBB.livein_end(); I != E; I++) {
+    for (MachineBasicBlock::livein_iterator I = PrologueMBB.livein_begin(),
+                                            E = PrologueMBB.livein_end();
+         I != E; I++) {
       stackCheckMBB->addLiveIn(*I);
       incStackMBB->addLiveIn(*I);
     }
@@ -1878,7 +1891,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
     // SPLimitOffset is in a fixed heap location (pointed by BP).
     addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
-    BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&prologueMBB);
+    BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&PrologueMBB);
 
     // Create new MBB for IncStack:
     BuildMI(incStackMBB, DL, TII.get(CALLop)).
@@ -1889,9 +1902,9 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
     BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB);
 
-    stackCheckMBB->addSuccessor(&prologueMBB, 99);
+    stackCheckMBB->addSuccessor(&PrologueMBB, 99);
     stackCheckMBB->addSuccessor(incStackMBB, 1);
-    incStackMBB->addSuccessor(&prologueMBB, 99);
+    incStackMBB->addSuccessor(&PrologueMBB, 99);
     incStackMBB->addSuccessor(incStackMBB, 1);
   }
 #ifdef XDEBUG
@@ -1902,14 +1915,13 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
 void X86FrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-  const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
-                                       MF.getSubtarget().getRegisterInfo());
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
+  const X86RegisterInfo &RegInfo = *STI.getRegisterInfo();
   unsigned StackPtr = RegInfo.getStackRegister();
   bool reserveCallFrame = hasReservedCallFrame(MF);
-  int Opcode = I->getOpcode();
+  unsigned Opcode = I->getOpcode();
   bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
-  const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
   bool IsLP64 = STI.isTarget64BitLP64();
   DebugLoc DL = I->getDebugLoc();
   uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
@@ -1926,11 +1938,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     // We need to keep the stack aligned properly.  To do this, we round the
     // amount of space needed for the outgoing arguments up to the next
     // alignment boundary.
-    unsigned StackAlign = MF.getTarget()
-                              .getSubtargetImpl()
-                              ->getFrameLowering()
-                              ->getStackAlignment();
-    Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+    unsigned StackAlign = getStackAlignment();
+    Amount = RoundUpToAlignment(Amount, StackAlign);
 
     MachineInstr *New = nullptr;
 
@@ -1983,3 +1992,15 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   }
 }
 
+bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
+  assert(MBB.getParent() && "Block is not attached to a function!");
+
+  if (canUseLEAForSPInEpilogue(*MBB.getParent()))
+    return true;
+
+  // If we cannot use LEA to adjust SP, we may need to use ADD, which
+  // clobbers the EFLAGS. Check that none of the terminators reads the
+  // EFLAGS, and if one uses it, conservatively assume this is not
+  // safe to insert the epilogue here.
+  return !terminatorsNeedFlagsAsInput(MBB);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
index 448a365..5d03b4d 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
@@ -18,10 +18,6 @@
 
 namespace llvm {
 
-class MCSymbol;
-class X86TargetMachine;
-class X86Subtarget;
-
 class X86FrameLowering : public TargetFrameLowering {
 public:
   explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO)
@@ -39,12 +35,14 @@ public:
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
-  void emitPrologue(MachineFunction &MF) const override;
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
-  void adjustForSegmentedStacks(MachineFunction &MF) const override;
+  void adjustForSegmentedStacks(MachineFunction &MF,
+                                MachineBasicBlock &PrologueMBB) const override;
 
-  void adjustForHiPEPrologue(MachineFunction &MF) const override;
+  void adjustForHiPEPrologue(MachineFunction &MF,
+                             MachineBasicBlock &PrologueMBB) const override;
 
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                      RegScavenger *RS = nullptr) const override;
@@ -81,6 +79,33 @@ public:
                                  MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI) const override;
 
+  /// Check the instruction before/after the passed instruction. If
+  /// it is an ADD/SUB/LEA instruction it is deleted argument and the
+  /// stack adjustment is returned as a positive value for ADD/LEA and
+  /// a negative for SUB.
+  static int mergeSPUpdates(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator &MBBI,
+                            unsigned StackPtr, bool doMergeWithPrevious);
+
+  /// Emit a series of instructions to increment / decrement the stack
+  /// pointer by a constant value.
+  static void emitSPUpdate(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator &MBBI, unsigned StackPtr,
+                           int64_t NumBytes, bool Is64BitTarget,
+                           bool Is64BitStackPtr, bool UseLEA,
+                           const TargetInstrInfo &TII,
+                           const TargetRegisterInfo &TRI);
+
+  /// Check that LEA can be used on SP in an epilogue sequence for \p MF.
+  bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const;
+
+  /// Check whether or not the given \p MBB can be used as a epilogue
+  /// for the target.
+  /// The epilogue will be inserted before the first terminator of that block.
+  /// This method is used by the shrink-wrapping pass to decide if
+  /// \p MBB will be correctly handled by the target.
+  bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+
 private:
   /// convertArgMovsToPushes - This method tries to convert a call sequence
   /// that uses sub and mov instructions to put the argument onto the stack
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 7fe8e8b..de59109 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -156,9 +156,7 @@ namespace {
 
   public:
     explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(tm, OptLevel),
-        Subtarget(&tm.getSubtarget<X86Subtarget>()),
-        OptForSize(false) {}
+        : SelectionDAGISel(tm, OptLevel), OptForSize(false) {}
 
     const char *getPassName() const override {
       return "X86 DAG->DAG Instruction Selection";
@@ -166,7 +164,7 @@ namespace {
 
     bool runOnMachineFunction(MachineFunction &MF) override {
       // Reset the subtarget each time through.
-      Subtarget = &TM.getSubtarget<X86Subtarget>();
+      Subtarget = &MF.getSubtarget<X86Subtarget>();
       SelectionDAGISel::runOnMachineFunction(MF);
       return true;
     }
@@ -206,6 +204,9 @@ namespace {
     bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
                     SDValue &Scale, SDValue &Index, SDValue &Disp,
                     SDValue &Segment);
+    bool SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
+                          SDValue &Scale, SDValue &Index, SDValue &Disp,
+                          SDValue &Segment);
     bool SelectMOV64Imm32(SDValue N, SDValue &Imm);
     bool SelectLEAAddr(SDValue N, SDValue &Base,
                        SDValue &Scale, SDValue &Index, SDValue &Disp,
@@ -230,19 +231,20 @@ namespace {
     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
     /// inline asm expressions.
     bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                      char ConstraintCode,
+                                      unsigned ConstraintID,
                                       std::vector<SDValue> &OutOps) override;
 
-    void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
+    void EmitSpecialCodeForMain();
 
-    inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
-                                   SDValue &Scale, SDValue &Index,
-                                   SDValue &Disp, SDValue &Segment) {
+    inline void getAddressOperands(X86ISelAddressMode &AM, SDLoc DL,
+                                   SDValue &Base, SDValue &Scale,
+                                   SDValue &Index, SDValue &Disp,
+                                   SDValue &Segment) {
       Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
                  ? CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
                                                TLI->getPointerTy())
                  : AM.Base_Reg;
-      Scale = getI8Imm(AM.Scale);
+      Scale = getI8Imm(AM.Scale, DL);
       Index = AM.IndexReg;
       // These are 32-bit even in 64-bit mode since RIP relative offset
       // is 32-bit.
@@ -263,7 +265,7 @@ namespace {
         Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
                                              AM.SymbolFlags);
       else
-        Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
+        Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
 
       if (AM.Segment.getNode())
         Segment = AM.Segment;
@@ -273,14 +275,14 @@ namespace {
 
     /// getI8Imm - Return a target constant with the specified value, of type
     /// i8.
-    inline SDValue getI8Imm(unsigned Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i8);
+    inline SDValue getI8Imm(unsigned Imm, SDLoc DL) {
+      return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
     }
 
     /// getI32Imm - Return a target constant with the specified value, of type
     /// i32.
-    inline SDValue getI32Imm(unsigned Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    inline SDValue getI32Imm(unsigned Imm, SDLoc DL) {
+      return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
     }
 
     /// getGlobalBaseReg - Return an SDNode that returns the value of
@@ -298,7 +300,7 @@ namespace {
     /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
     /// to the target-specific type.
     const X86InstrInfo *getInstrInfo() const {
-      return getTargetMachine().getSubtargetImpl()->getInstrInfo();
+      return Subtarget->getInstrInfo();
     }
 
     /// \brief Address-mode matching performs shift-of-and to and-of-shift
@@ -395,17 +397,14 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
     Ops.clear();
     Ops.push_back(NewChain);
   }
-  for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
-    Ops.push_back(OrigChain.getOperand(i));
+  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
   CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
   CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
                              Load.getOperand(1), Load.getOperand(2));
 
-  unsigned NumOps = Call.getNode()->getNumOperands();
   Ops.clear();
   Ops.push_back(SDValue(Load.getNode(), 1));
-  for (unsigned i = 1, e = NumOps; i != e; ++i)
-    Ops.push_back(Call.getOperand(i));
+  Ops.append(Call->op_begin() + 1, Call->op_end());
   CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
 }
 
@@ -453,8 +452,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
 
 void X86DAGToDAGISel::PreprocessISelDAG() {
   // OptForSize is used in pattern predicates that isel is matching.
-  OptForSize = MF->getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  OptForSize = MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
 
   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
        E = CurDAG->allnodes_end(); I != E; ) {
@@ -571,14 +569,18 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
 
 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
 /// the main function.
-void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
-                                             MachineFrameInfo *MFI) {
-  const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
+void X86DAGToDAGISel::EmitSpecialCodeForMain() {
   if (Subtarget->isTargetCygMing()) {
-    unsigned CallOp =
-      Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
-    BuildMI(BB, DebugLoc(),
-            TII->get(CallOp)).addExternalSymbol("__main");
+    TargetLowering::ArgListTy Args;
+
+    TargetLowering::CallLoweringInfo CLI(*CurDAG);
+    CLI.setChain(CurDAG->getRoot())
+        .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
+                   CurDAG->getExternalSymbol("__main", TLI->getPointerTy()),
+                   std::move(Args), 0);
+    const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
+    std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+    CurDAG->setRoot(Result.second);
   }
 }
 
@@ -586,7 +588,7 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() {
   // If this is main, emit special code for main.
   if (const Function *Fn = MF->getFunction())
     if (Fn->hasExternalLinkage() && Fn->getName() == "main")
-      EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
+      EmitSpecialCodeForMain();
 }
 
 static bool isDispSafeForFrameIndex(int64_t Val) {
@@ -601,6 +603,9 @@ static bool isDispSafeForFrameIndex(int64_t Val) {
 
 bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
                                             X86ISelAddressMode &AM) {
+  // Cannot combine ExternalSymbol displacements with integer offsets.
+  if (Offset != 0 && AM.ES)
+    return true;
   int64_t Val = AM.Disp + Offset;
   CodeModel::Model M = TM.getCodeModel();
   if (Subtarget->is64Bit()) {
@@ -803,11 +808,11 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
 
   MVT VT = N.getSimpleValueType();
   SDLoc DL(N);
-  SDValue Eight = DAG.getConstant(8, MVT::i8);
-  SDValue NewMask = DAG.getConstant(0xff, VT);
+  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
+  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
   SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
   SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
-  SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8);
+  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
   SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
 
   // Insert the new nodes into the topological ordering. We must do this in
@@ -851,7 +856,7 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
 
   MVT VT = N.getSimpleValueType();
   SDLoc DL(N);
-  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT);
+  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
   SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
 
@@ -918,7 +923,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
   if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
 
   // We also need to ensure that mask is a continuous run of bits.
-  if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
+  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
 
   // Scale the leading zero count down based on the actual size of the value.
   // Also scale it down based on the size of the shift.
@@ -957,9 +962,9 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
     X = NewX;
   }
   SDLoc DL(N);
-  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8);
+  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
   SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
-  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8);
+  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
   SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
 
   // Insert the new nodes into the topological ordering. We must do this in
@@ -1006,6 +1011,17 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 
   switch (N.getOpcode()) {
   default: break;
+  case ISD::FRAME_ALLOC_RECOVER: {
+    if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
+      if (const auto *ESNode = dyn_cast<ExternalSymbolSDNode>(N.getOperand(0)))
+        if (ESNode->getOpcode() == ISD::TargetExternalSymbol) {
+          // Use the symbol and don't prefix it.
+          AM.ES = ESNode->getSymbol();
+          AM.SymbolFlags = X86II::MO_NOPREFIX;
+          return false;
+        }
+    break;
+  }
   case ISD::Constant: {
     uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
     if (!FoldOffsetIntoAddress(Val, AM))
@@ -1191,7 +1207,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     }
 
     // Ok, the transformation is legal and appears profitable. Go for it.
-    SDValue Zero = CurDAG->getConstant(0, N.getValueType());
+    SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
     SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
     AM.IndexReg = Neg;
     AM.Scale = 1;
@@ -1309,6 +1325,42 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
   return false;
 }
 
+bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
+                                      SDValue &Scale, SDValue &Index,
+                                      SDValue &Disp, SDValue &Segment) {
+
+  MaskedGatherScatterSDNode *Mgs = dyn_cast<MaskedGatherScatterSDNode>(Parent);
+  if (!Mgs)
+    return false;
+  X86ISelAddressMode AM;
+  unsigned AddrSpace = Mgs->getPointerInfo().getAddrSpace();
+  // AddrSpace 256 -> GS, 257 -> FS.
+  if (AddrSpace == 256)
+    AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+  if (AddrSpace == 257)
+    AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+
+  SDLoc DL(N);
+  Base = Mgs->getBasePtr();
+  Index = Mgs->getIndex();
+  unsigned ScalarSize = Mgs->getValue().getValueType().getScalarSizeInBits();
+  Scale = getI8Imm(ScalarSize/8, DL);
+
+  // If Base is 0, the whole address is in index and the Scale is 1
+  if (isa<ConstantSDNode>(Base)) {
+    assert(dyn_cast<ConstantSDNode>(Base)->isNullValue() &&
+           "Unexpected base in gather/scatter");
+    Scale = getI8Imm(1, DL);
+    Base = CurDAG->getRegister(0, MVT::i32);
+  }
+  if (AM.Segment.getNode())
+    Segment = AM.Segment;
+  else
+    Segment = CurDAG->getRegister(0, MVT::i32);
+  Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
+  return true;
+}
+
 /// SelectAddr - returns true if it is able pattern match an addressing mode.
 /// It returns the operands which make up the maximal addressing mode it can
 /// match by reference.
@@ -1350,7 +1402,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
   if (!AM.IndexReg.getNode())
     AM.IndexReg = CurDAG->getRegister(0, VT);
 
-  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
   return true;
 }
 
@@ -1406,7 +1458,7 @@ bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
     if ((uint32_t)ImmVal != (uint64_t)ImmVal)
       return false;
 
-    Imm = CurDAG->getTargetConstant(ImmVal, MVT::i64);
+    Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
     return true;
   }
 
@@ -1442,9 +1494,9 @@ bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
     // Base could already be %rip, particularly in the x32 ABI.
     Base = SDValue(CurDAG->getMachineNode(
                        TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
-                       CurDAG->getTargetConstant(0, MVT::i64),
+                       CurDAG->getTargetConstant(0, DL, MVT::i64),
                        Base,
-                       CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
+                       CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
                    0);
   }
 
@@ -1456,9 +1508,10 @@ bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
            "Expect to be extending 32-bit registers for use in LEA");
     Index = SDValue(CurDAG->getMachineNode(
                         TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
-                        CurDAG->getTargetConstant(0, MVT::i64),
+                        CurDAG->getTargetConstant(0, DL, MVT::i64),
                         Index,
-                        CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
+                        CurDAG->getTargetConstant(X86::sub_32bit, DL,
+                                                  MVT::i32)),
                     0);
   }
 
@@ -1524,7 +1577,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
   if (Complexity <= 2)
     return false;
 
-  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
   return true;
 }
 
@@ -1548,7 +1601,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
     AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
   }
 
-  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
   return true;
 }
 
@@ -1718,7 +1771,7 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
     // an immediate operand to sub. However, it still fits in 32 bits for the
     // add (since it is not negated) so we can return target-constant.
     if (CNVal == INT32_MIN)
-      return CurDAG->getTargetConstant(CNVal, NVT);
+      return CurDAG->getTargetConstant(CNVal, dl, NVT);
     // For atomic-load-add, we could do some optimizations.
     if (Op == ADD) {
       // Translate to INC/DEC if ADD by 1 or -1.
@@ -1733,7 +1786,7 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
         CNVal = -CNVal;
       }
     }
-    return CurDAG->getTargetConstant(CNVal, NVT);
+    return CurDAG->getTargetConstant(CNVal, dl, NVT);
   }
 
   // If the value operand is single-used, try to optimize it.
@@ -2046,12 +2099,14 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
   SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
                                    MVT::Other);
 
+  SDLoc DL(Node);
+
   // Memory Operands: Base, Scale, Index, Disp, Segment
-  SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32);
+  SDValue Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
   SDValue Segment = CurDAG->getRegister(0, MVT::i32);
-  const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
+  const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue(), DL), VIdx,
                           Disp, Segment, VMask, Chain};
-  SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), VTs, Ops);
+  SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops);
   // Node has 2 outputs: VDst and MVT::Other.
   // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
   // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
@@ -2180,7 +2235,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
       break;
 
-    unsigned ShlOp, Op;
+    unsigned ShlOp, AddOp, Op;
     MVT CstVT = NVT;
 
     // Check the minimum bitwidth for the new constant.
@@ -2201,6 +2256,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     case MVT::i32:
       assert(CstVT == MVT::i8);
       ShlOp = X86::SHL32ri;
+      AddOp = X86::ADD32rr;
 
       switch (Opcode) {
       default: llvm_unreachable("Impossible opcode");
@@ -2212,6 +2268,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     case MVT::i64:
       assert(CstVT == MVT::i8 || CstVT == MVT::i32);
       ShlOp = X86::SHL64ri;
+      AddOp = X86::ADD64rr;
 
       switch (Opcode) {
       default: llvm_unreachable("Impossible opcode");
@@ -2223,10 +2280,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     }
 
     // Emit the smaller op and the shift.
-    SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
+    SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
     SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
+    if (ShlVal == 1)
+      return CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
+                                  SDValue(New, 0));
     return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
-                                getI8Imm(ShlVal));
+                                getI8Imm(ShlVal, dl));
   }
   case X86ISD::UMUL8:
   case X86ISD::SMUL8: {
@@ -2390,7 +2450,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       // Shift AX down 8 bits.
       Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
                                               Result,
-                                     CurDAG->getTargetConstant(8, MVT::i8)), 0);
+                                     CurDAG->getTargetConstant(8, dl, MVT::i8)),
+                       0);
       // Then truncate it down to i8.
       ReplaceUses(SDValue(Node, 1),
         CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
@@ -2510,7 +2571,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
           ClrNode =
               SDValue(CurDAG->getMachineNode(
                           TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
-                          CurDAG->getTargetConstant(X86::sub_16bit, MVT::i32)),
+                          CurDAG->getTargetConstant(X86::sub_16bit, dl,
+                                                    MVT::i32)),
                       0);
           break;
         case MVT::i32:
@@ -2519,8 +2581,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
           ClrNode =
               SDValue(CurDAG->getMachineNode(
                           TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
-                          CurDAG->getTargetConstant(0, MVT::i64), ClrNode,
-                          CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
+                          CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
+                          CurDAG->getTargetConstant(X86::sub_32bit, dl,
+                                                    MVT::i32)),
                       0);
           break;
         default:
@@ -2572,8 +2635,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
           Result =
               SDValue(CurDAG->getMachineNode(
                           TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
-                          CurDAG->getTargetConstant(0, MVT::i64), Result,
-                          CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
+                          CurDAG->getTargetConstant(0, dl, MVT::i64), Result,
+                          CurDAG->getTargetConstant(X86::sub_32bit, dl,
+                                                    MVT::i32)),
                       0);
         }
       } else {
@@ -2612,26 +2676,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     SDValue N1 = Node->getOperand(1);
 
     if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
-        HasNoSignedComparisonUses(Node)) {
-      // Look for (X86cmp (truncate $op, i1), 0) and try to convert to a
-      // smaller encoding
-      if (Opcode == X86ISD::CMP && N0.getValueType() == MVT::i1 &&
-          X86::isZeroNode(N1)) {
-        SDValue Reg = N0.getOperand(0);
-        SDValue Imm = CurDAG->getTargetConstant(1, MVT::i8);
-
-        // Emit testb
-        if (Reg.getScalarValueSizeInBits() > 8)
-          Reg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Reg);
-        // Emit a testb.
-        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
-                                                Reg, Imm);
-        ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
-        return nullptr;
-      }
-
+        HasNoSignedComparisonUses(Node))
       N0 = N0.getOperand(0);
-    }
+
     // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
     // use a smaller encoding.
     // Look past the truncate if CMP is the only use of it.
@@ -2647,7 +2694,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
           (!(C->getZExtValue() & 0x80) ||
            HasNoSignedComparisonUses(Node))) {
-        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8);
+        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i8);
         SDValue Reg = N0.getNode()->getOperand(0);
 
         // On x86-32, only the ABCD registers have 8-bit subregisters.
@@ -2658,7 +2705,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
           case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
           default: llvm_unreachable("Unsupported TEST operand type!");
           }
-          SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+          SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
           Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
                                                Reg.getValueType(), Reg, RC), 0);
         }
@@ -2683,7 +2730,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
            HasNoSignedComparisonUses(Node))) {
         // Shift the immediate right by 8 bits.
         SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
-                                                       MVT::i8);
+                                                       dl, MVT::i8);
         SDValue Reg = N0.getNode()->getOperand(0);
 
         // Put the value in an ABCD register.
@@ -2694,7 +2741,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
         case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
         default: llvm_unreachable("Unsupported TEST operand type!");
         }
-        SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+        SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
         Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
                                              Reg.getValueType(), Reg, RC), 0);
 
@@ -2719,7 +2766,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
           N0.getValueType() != MVT::i16 &&
           (!(C->getZExtValue() & 0x8000) ||
            HasNoSignedComparisonUses(Node))) {
-        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16);
+        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
+                                                MVT::i16);
         SDValue Reg = N0.getNode()->getOperand(0);
 
         // Extract the 16-bit subregister.
@@ -2741,7 +2789,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
           N0.getValueType() == MVT::i64 &&
           (!(C->getZExtValue() & 0x80000000) ||
            HasNoSignedComparisonUses(Node))) {
-        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
+                                                MVT::i32);
         SDValue Reg = N0.getNode()->getOperand(0);
 
         // Extract the 32-bit subregister.
@@ -2824,14 +2873,20 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
 }
 
 bool X86DAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                              std::vector<SDValue> &OutOps) {
   SDValue Op0, Op1, Op2, Op3, Op4;
-  switch (ConstraintCode) {
-  case 'o':   // offsetable        ??
-  case 'v':   // not offsetable    ??
-  default: return true;
-  case 'm':   // memory
+  switch (ConstraintID) {
+  default:
+    llvm_unreachable("Unexpected asm memory constraint");
+  case InlineAsm::Constraint_i:
+    // FIXME: It seems strange that 'i' is needed here since it's supposed to
+    //        be an immediate and not a memory constraint.
+    // Fallthrough.
+  case InlineAsm::Constraint_o: // offsetable        ??
+  case InlineAsm::Constraint_v: // not offsetable    ??
+  case InlineAsm::Constraint_m: // memory
+  case InlineAsm::Constraint_X:
     if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
       return true;
     break;
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 85978d8..3ba8115 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25,7 +25,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/VariadicFunction.h"
 #include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -33,6 +32,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
@@ -67,18 +67,6 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization(
              "rather than promotion."),
     cl::Hidden);
 
-static cl::opt<bool> ExperimentalVectorShuffleLowering(
-    "x86-experimental-vector-shuffle-lowering", cl::init(true),
-    cl::desc("Enable an experimental vector shuffle lowering code path."),
-    cl::Hidden);
-
-static cl::opt<bool> ExperimentalVectorShuffleLegality(
-    "x86-experimental-vector-shuffle-legality", cl::init(false),
-    cl::desc("Enable experimental shuffle legality based on the experimental "
-             "shuffle lowering. Should only be used with the experimental "
-             "shuffle lowering."),
-    cl::Hidden);
-
 static cl::opt<int> ReciprocalEstimateRefinementSteps(
     "x86-recip-refinement-steps", cl::init(1),
     cl::desc("Specify the number of Newton-Raphson iterations applied to the "
@@ -89,147 +77,13 @@ static cl::opt<int> ReciprocalEstimateRefinementSteps(
 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
                        SDValue V2);
 
-static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
-                                SelectionDAG &DAG, SDLoc dl,
-                                unsigned vectorWidth) {
-  assert((vectorWidth == 128 || vectorWidth == 256) &&
-         "Unsupported vector width");
-  EVT VT = Vec.getValueType();
-  EVT ElVT = VT.getVectorElementType();
-  unsigned Factor = VT.getSizeInBits()/vectorWidth;
-  EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
-                                  VT.getVectorNumElements()/Factor);
-
-  // Extract from UNDEF is UNDEF.
-  if (Vec.getOpcode() == ISD::UNDEF)
-    return DAG.getUNDEF(ResultVT);
-
-  // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
-  unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
-
-  // This is the index of the first element of the vectorWidth-bit chunk
-  // we want.
-  unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
-                               * ElemsPerChunk);
-
-  // If the input is a buildvector just emit a smaller one.
-  if (Vec.getOpcode() == ISD::BUILD_VECTOR)
-    return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
-                       makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
-                                    ElemsPerChunk));
-
-  SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
-  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
-}
-
-/// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
-/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
-/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
-/// instructions or a simple subregister reference. Idx is an index in the
-/// 128 bits we want.  It need not be aligned to a 128-bit boundary.  That makes
-/// lowering EXTRACT_VECTOR_ELT operations easier.
-static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
-                                   SelectionDAG &DAG, SDLoc dl) {
-  assert((Vec.getValueType().is256BitVector() ||
-          Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
-  return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
-}
-
-/// Generate a DAG to grab 256-bits from a 512-bit vector.
-static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
-                                   SelectionDAG &DAG, SDLoc dl) {
-  assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
-  return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
-}
-
-static SDValue InsertSubVector(SDValue Result, SDValue Vec,
-                               unsigned IdxVal, SelectionDAG &DAG,
-                               SDLoc dl, unsigned vectorWidth) {
-  assert((vectorWidth == 128 || vectorWidth == 256) &&
-         "Unsupported vector width");
-  // Inserting UNDEF is Result
-  if (Vec.getOpcode() == ISD::UNDEF)
-    return Result;
-  EVT VT = Vec.getValueType();
-  EVT ElVT = VT.getVectorElementType();
-  EVT ResultVT = Result.getValueType();
-
-  // Insert the relevant vectorWidth bits.
-  unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
-
-  // This is the index of the first element of the vectorWidth-bit chunk
-  // we want.
-  unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
-                               * ElemsPerChunk);
-
-  SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
-  return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
-}
-
-/// Generate a DAG to put 128-bits into a vector > 128 bits.  This
-/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
-/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
-/// simple superregister reference.  Idx is an index in the 128 bits
-/// we want.  It need not be aligned to a 128-bit boundary.  That makes
-/// lowering INSERT_VECTOR_ELT operations easier.
-static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
-                                  SelectionDAG &DAG,SDLoc dl) {
-  assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
-  return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
-}
-
-static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
-                                  SelectionDAG &DAG, SDLoc dl) {
-  assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
-  return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
-}
-
-/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
-/// instructions. This is used because creating CONCAT_VECTOR nodes of
-/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
-/// large BUILD_VECTORS.
-static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
-                                   unsigned NumElems, SelectionDAG &DAG,
-                                   SDLoc dl) {
-  SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
-  return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
-}
-
-static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
-                                   unsigned NumElems, SelectionDAG &DAG,
-                                   SDLoc dl) {
-  SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
-  return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
-}
-
-// FIXME: This should stop caching the target machine as soon as
-// we can remove resetOperationActions et al.
-X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM)
-    : TargetLowering(TM) {
-  Subtarget = &TM.getSubtarget<X86Subtarget>();
+X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
+                                     const X86Subtarget &STI)
+    : TargetLowering(TM), Subtarget(&STI) {
   X86ScalarSSEf64 = Subtarget->hasSSE2();
   X86ScalarSSEf32 = Subtarget->hasSSE1();
   TD = getDataLayout();
 
-  resetOperationActions();
-}
-
-void X86TargetLowering::resetOperationActions() {
-  const TargetMachine &TM = getTargetMachine();
-  static bool FirstTimeThrough = true;
-
-  // If none of the target options have changed, then we don't need to reset the
-  // operation actions.
-  if (!FirstTimeThrough && TO == TM.Options) return;
-
-  if (!FirstTimeThrough) {
-    // Reinitialize the actions.
-    initActions();
-    FirstTimeThrough = false;
-  }
-
-  TO = TM.Options;
-
   // Set up the TargetLowering object.
   static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
 
@@ -247,8 +101,7 @@ void X86TargetLowering::resetOperationActions() {
     setSchedulingPreference(Sched::ILP);
   else
     setSchedulingPreference(Sched::RegPressure);
-  const X86RegisterInfo *RegInfo =
-      TM.getSubtarget<X86Subtarget>().getRegisterInfo();
+  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
   setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
 
   // Bypass expensive divides on Atom when compiling with O2.
@@ -330,7 +183,7 @@ void X86TargetLowering::resetOperationActions() {
   if (Subtarget->is64Bit()) {
     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
-  } else if (!TM.Options.UseSoftFloat) {
+  } else if (!Subtarget->useSoftFloat()) {
     // We have an algorithm for SSE2->double, and we turn this into a
     // 64-bit FILD followed by conditional FADD for other targets.
     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
@@ -344,7 +197,7 @@ void X86TargetLowering::resetOperationActions() {
   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
 
-  if (!TM.Options.UseSoftFloat) {
+  if (!Subtarget->useSoftFloat()) {
     // SSE has no i16 to fp conversion, only i32
     if (X86ScalarSSEf32) {
       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
@@ -387,7 +240,7 @@ void X86TargetLowering::resetOperationActions() {
   if (Subtarget->is64Bit()) {
     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
     setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
-  } else if (!TM.Options.UseSoftFloat) {
+  } else if (!Subtarget->useSoftFloat()) {
     // Since AVX is a superset of SSE3, only check for SSE here.
     if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
       // Expand FP_TO_UINT into a select.
@@ -515,7 +368,7 @@ void X86TargetLowering::resetOperationActions() {
   // Special handling for half-precision floating point conversions.
   // If we don't have F16C support, then lower half float conversions
   // into library calls.
-  if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
+  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) {
     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
   }
@@ -660,7 +513,11 @@ void X86TargetLowering::resetOperationActions() {
 
   setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
 
-  if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
+  // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
+  setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
+  setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
+
+  if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) {
     // f32 and f64 use SSE.
     // Set up the FP register classes.
     addRegisterClass(MVT::f32, &X86::FR32RegClass);
@@ -694,7 +551,7 @@ void X86TargetLowering::resetOperationActions() {
     // cases we handle.
     addLegalFPImmediate(APFloat(+0.0)); // xorpd
     addLegalFPImmediate(APFloat(+0.0f)); // xorps
-  } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
+  } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) {
     // Use SSE for f32, x87 for f64.
     // Set up the FP register classes.
     addRegisterClass(MVT::f32, &X86::FR32RegClass);
@@ -729,7 +586,7 @@ void X86TargetLowering::resetOperationActions() {
       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
     }
-  } else if (!TM.Options.UseSoftFloat) {
+  } else if (!Subtarget->useSoftFloat()) {
     // f32 and f64 in x87.
     // Set up the FP register classes.
     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
@@ -763,7 +620,7 @@ void X86TargetLowering::resetOperationActions() {
   setOperationAction(ISD::FMA, MVT::f32, Expand);
 
   // Long double always uses X87.
-  if (!TM.Options.UseSoftFloat) {
+  if (!Subtarget->useSoftFloat()) {
     addRegisterClass(MVT::f80, &X86::RFP80RegClass);
     setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
@@ -893,49 +750,35 @@ void X86TargetLowering::resetOperationActions() {
       // them legal.
       if (VT.getVectorElementType() == MVT::i1)
         setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
+
+      // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
+      // split/scalarized right now.
+      if (VT.getVectorElementType() == MVT::f16)
+        setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
     }
   }
 
   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
   // with -msoft-float, disable use of MMX as well.
-  if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
+  if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) {
     addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
     // No operations on x86mmx supported, everything uses intrinsics.
   }
 
   // MMX-sized vectors (other than x86mmx) are expected to be expanded
   // into smaller operations.
-  setOperationAction(ISD::MULHS,              MVT::v8i8,  Expand);
-  setOperationAction(ISD::MULHS,              MVT::v4i16, Expand);
-  setOperationAction(ISD::MULHS,              MVT::v2i32, Expand);
-  setOperationAction(ISD::MULHS,              MVT::v1i64, Expand);
-  setOperationAction(ISD::AND,                MVT::v8i8,  Expand);
-  setOperationAction(ISD::AND,                MVT::v4i16, Expand);
-  setOperationAction(ISD::AND,                MVT::v2i32, Expand);
-  setOperationAction(ISD::AND,                MVT::v1i64, Expand);
-  setOperationAction(ISD::OR,                 MVT::v8i8,  Expand);
-  setOperationAction(ISD::OR,                 MVT::v4i16, Expand);
-  setOperationAction(ISD::OR,                 MVT::v2i32, Expand);
-  setOperationAction(ISD::OR,                 MVT::v1i64, Expand);
-  setOperationAction(ISD::XOR,                MVT::v8i8,  Expand);
-  setOperationAction(ISD::XOR,                MVT::v4i16, Expand);
-  setOperationAction(ISD::XOR,                MVT::v2i32, Expand);
-  setOperationAction(ISD::XOR,                MVT::v1i64, Expand);
-  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i8,  Expand);
-  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v4i16, Expand);
-  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v2i32, Expand);
-  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v1i64, Expand);
+  for (MVT MMXTy : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64}) {
+    setOperationAction(ISD::MULHS,              MMXTy,      Expand);
+    setOperationAction(ISD::AND,                MMXTy,      Expand);
+    setOperationAction(ISD::OR,                 MMXTy,      Expand);
+    setOperationAction(ISD::XOR,                MMXTy,      Expand);
+    setOperationAction(ISD::SCALAR_TO_VECTOR,   MMXTy,      Expand);
+    setOperationAction(ISD::SELECT,             MMXTy,      Expand);
+    setOperationAction(ISD::BITCAST,            MMXTy,      Expand);
+  }
   setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v1i64, Expand);
-  setOperationAction(ISD::SELECT,             MVT::v8i8,  Expand);
-  setOperationAction(ISD::SELECT,             MVT::v4i16, Expand);
-  setOperationAction(ISD::SELECT,             MVT::v2i32, Expand);
-  setOperationAction(ISD::SELECT,             MVT::v1i64, Expand);
-  setOperationAction(ISD::BITCAST,            MVT::v8i8,  Expand);
-  setOperationAction(ISD::BITCAST,            MVT::v4i16, Expand);
-  setOperationAction(ISD::BITCAST,            MVT::v2i32, Expand);
-  setOperationAction(ISD::BITCAST,            MVT::v1i64, Expand);
-
-  if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
+
+  if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) {
     addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
 
     setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
@@ -948,12 +791,13 @@ void X86TargetLowering::resetOperationActions() {
     setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
+    setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Custom);
   }
 
-  if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
+  if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) {
     addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
 
     // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
@@ -967,6 +811,7 @@ void X86TargetLowering::resetOperationActions() {
     setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
     setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
     setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
+    setOperationAction(ISD::MUL,                MVT::v16i8, Custom);
     setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
     setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
     setOperationAction(ISD::UMUL_LOHI,          MVT::v4i32, Custom);
@@ -1016,6 +861,7 @@ void X86TargetLowering::resetOperationActions() {
         continue;
       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
+      setOperationAction(ISD::VSELECT,            VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
     }
 
@@ -1039,6 +885,8 @@ void X86TargetLowering::resetOperationActions() {
     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
+    setOperationAction(ISD::VSELECT,            MVT::v2f64, Custom);
+    setOperationAction(ISD::VSELECT,            MVT::v2i64, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2f64, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
 
@@ -1094,39 +942,20 @@ void X86TargetLowering::resetOperationActions() {
     setOperationAction(ISD::BITCAST,            MVT::v8i8,  Custom);
   }
 
-  if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) {
-    setOperationAction(ISD::FFLOOR,             MVT::f32,   Legal);
-    setOperationAction(ISD::FCEIL,              MVT::f32,   Legal);
-    setOperationAction(ISD::FTRUNC,             MVT::f32,   Legal);
-    setOperationAction(ISD::FRINT,              MVT::f32,   Legal);
-    setOperationAction(ISD::FNEARBYINT,         MVT::f32,   Legal);
-    setOperationAction(ISD::FFLOOR,             MVT::f64,   Legal);
-    setOperationAction(ISD::FCEIL,              MVT::f64,   Legal);
-    setOperationAction(ISD::FTRUNC,             MVT::f64,   Legal);
-    setOperationAction(ISD::FRINT,              MVT::f64,   Legal);
-    setOperationAction(ISD::FNEARBYINT,         MVT::f64,   Legal);
-
-    setOperationAction(ISD::FFLOOR,             MVT::v4f32, Legal);
-    setOperationAction(ISD::FCEIL,              MVT::v4f32, Legal);
-    setOperationAction(ISD::FTRUNC,             MVT::v4f32, Legal);
-    setOperationAction(ISD::FRINT,              MVT::v4f32, Legal);
-    setOperationAction(ISD::FNEARBYINT,         MVT::v4f32, Legal);
-    setOperationAction(ISD::FFLOOR,             MVT::v2f64, Legal);
-    setOperationAction(ISD::FCEIL,              MVT::v2f64, Legal);
-    setOperationAction(ISD::FTRUNC,             MVT::v2f64, Legal);
-    setOperationAction(ISD::FRINT,              MVT::v2f64, Legal);
-    setOperationAction(ISD::FNEARBYINT,         MVT::v2f64, Legal);
+  if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) {
+    for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
+      setOperationAction(ISD::FFLOOR,           RoundedTy,  Legal);
+      setOperationAction(ISD::FCEIL,            RoundedTy,  Legal);
+      setOperationAction(ISD::FTRUNC,           RoundedTy,  Legal);
+      setOperationAction(ISD::FRINT,            RoundedTy,  Legal);
+      setOperationAction(ISD::FNEARBYINT,       RoundedTy,  Legal);
+    }
 
     // FIXME: Do we need to handle scalar-to-vector here?
     setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
 
-    setOperationAction(ISD::VSELECT,            MVT::v2f64, Custom);
-    setOperationAction(ISD::VSELECT,            MVT::v2i64, Custom);
-    setOperationAction(ISD::VSELECT,            MVT::v4i32, Custom);
-    setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom);
-    setOperationAction(ISD::VSELECT,            MVT::v8i16, Custom);
-    // There is no BLENDI for byte vectors. We don't need to custom lower
-    // some vselects for now.
+    // We directly match byte blends in the backend as they match the VSELECT
+    // condition form.
     setOperationAction(ISD::VSELECT,            MVT::v16i8, Legal);
 
     // SSE41 brings specific instructions for doing vector sign extend even in
@@ -1137,6 +966,21 @@ void X86TargetLowering::resetOperationActions() {
       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
     }
 
+    // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
+
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
+
     // i8 and i16 vectors are custom because the source register and source
     // source memory operand types are not the same width.  f32 vectors are
     // custom since the immediate controlling the insert encodes additional
@@ -1160,6 +1004,10 @@ void X86TargetLowering::resetOperationActions() {
   }
 
   if (Subtarget->hasSSE2()) {
+    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
+    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
+    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
+
     setOperationAction(ISD::SRL,               MVT::v8i16, Custom);
     setOperationAction(ISD::SRL,               MVT::v16i8, Custom);
 
@@ -1180,7 +1028,7 @@ void X86TargetLowering::resetOperationActions() {
     setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
   }
 
-  if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
+  if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
     addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
     addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
     addRegisterClass(MVT::v8i32,  &X86::VR256RegClass);
@@ -1252,11 +1100,6 @@ void X86TargetLowering::resetOperationActions() {
     setOperationAction(ISD::SELECT,            MVT::v4i64, Custom);
     setOperationAction(ISD::SELECT,            MVT::v8f32, Custom);
 
-    setOperationAction(ISD::VSELECT,           MVT::v4f64, Custom);
-    setOperationAction(ISD::VSELECT,           MVT::v4i64, Custom);
-    setOperationAction(ISD::VSELECT,           MVT::v8i32, Custom);
-    setOperationAction(ISD::VSELECT,           MVT::v8f32, Custom);
-
     setOperationAction(ISD::SIGN_EXTEND,       MVT::v4i64, Custom);
     setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i32, Custom);
     setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i16, Custom);
@@ -1293,16 +1136,13 @@ void X86TargetLowering::resetOperationActions() {
       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
       setOperationAction(ISD::MUL,             MVT::v8i32, Legal);
       setOperationAction(ISD::MUL,             MVT::v16i16, Legal);
-      // Don't lower v32i8 because there is no 128-bit byte mul
+      setOperationAction(ISD::MUL,             MVT::v32i8, Custom);
 
       setOperationAction(ISD::UMUL_LOHI,       MVT::v8i32, Custom);
       setOperationAction(ISD::SMUL_LOHI,       MVT::v8i32, Custom);
       setOperationAction(ISD::MULHU,           MVT::v16i16, Legal);
       setOperationAction(ISD::MULHS,           MVT::v16i16, Legal);
 
-      setOperationAction(ISD::VSELECT,         MVT::v16i16, Custom);
-      setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
-
       // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
       // when we have a 256bit-wide blend with immediate.
       setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
@@ -1316,6 +1156,21 @@ void X86TargetLowering::resetOperationActions() {
 
       // Custom CTPOP always performs better on natively supported v8i32
       setOperationAction(ISD::CTPOP,             MVT::v8i32, Custom);
+
+      // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
+
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
     } else {
       setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
       setOperationAction(ISD::ADD,             MVT::v8i32, Custom);
@@ -1330,7 +1185,7 @@ void X86TargetLowering::resetOperationActions() {
       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
       setOperationAction(ISD::MUL,             MVT::v8i32, Custom);
       setOperationAction(ISD::MUL,             MVT::v16i16, Custom);
-      // Don't lower v32i8 because there is no 128-bit byte mul
+      setOperationAction(ISD::MUL,             MVT::v32i8, Custom);
     }
 
     // In the customized shift lowering, the legal cases in AVX2 will be
@@ -1360,6 +1215,7 @@ void X86TargetLowering::resetOperationActions() {
 
       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
+      setOperationAction(ISD::VSELECT,            VT, Custom);
       setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
@@ -1367,6 +1223,10 @@ void X86TargetLowering::resetOperationActions() {
       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
     }
 
+    if (Subtarget->hasInt256())
+      setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
+
+
     // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
     for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
       MVT VT = (MVT::SimpleValueType)i;
@@ -1388,7 +1248,7 @@ void X86TargetLowering::resetOperationActions() {
     }
   }
 
-  if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) {
+  if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
     addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
     addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
     addRegisterClass(MVT::v8i64,  &X86::VR512RegClass);
@@ -1401,11 +1261,27 @@ void X86TargetLowering::resetOperationActions() {
     for (MVT VT : MVT::fp_vector_valuetypes())
       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
 
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64,  MVT::v8i8,  Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64,  MVT::v8i8,  Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64,  MVT::v8i16,  Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64,  MVT::v8i16,  Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64,  MVT::v8i32,  Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64,  MVT::v8i32,  Legal);
+    
     setOperationAction(ISD::BR_CC,              MVT::i1,    Expand);
     setOperationAction(ISD::SETCC,              MVT::i1,    Custom);
     setOperationAction(ISD::XOR,                MVT::i1,    Legal);
     setOperationAction(ISD::OR,                 MVT::i1,    Legal);
     setOperationAction(ISD::AND,                MVT::i1,    Legal);
+    setOperationAction(ISD::SUB,                MVT::i1,    Custom);
+    setOperationAction(ISD::ADD,                MVT::i1,    Custom);
+    setOperationAction(ISD::MUL,                MVT::i1,    Custom);
     setOperationAction(ISD::LOAD,               MVT::v16f32, Legal);
     setOperationAction(ISD::LOAD,               MVT::v8f64, Legal);
     setOperationAction(ISD::LOAD,               MVT::v8i64, Legal);
@@ -1450,28 +1326,49 @@ void X86TargetLowering::resetOperationActions() {
     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i32, Legal);
     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i32, Legal);
     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Legal);
+    setOperationAction(ISD::UINT_TO_FP,         MVT::v16i8, Custom);
+    setOperationAction(ISD::UINT_TO_FP,         MVT::v16i16, Custom);
     setOperationAction(ISD::FP_ROUND,           MVT::v8f32, Legal);
     setOperationAction(ISD::FP_EXTEND,          MVT::v8f32, Legal);
 
     setOperationAction(ISD::TRUNCATE,           MVT::i1, Custom);
     setOperationAction(ISD::TRUNCATE,           MVT::v16i8, Custom);
     setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
+    if (Subtarget->hasDQI()) {
+      setOperationAction(ISD::TRUNCATE,           MVT::v2i1, Custom);
+      setOperationAction(ISD::TRUNCATE,           MVT::v4i1, Custom);
+    }
     setOperationAction(ISD::TRUNCATE,           MVT::v8i1, Custom);
     setOperationAction(ISD::TRUNCATE,           MVT::v16i1, Custom);
     setOperationAction(ISD::TRUNCATE,           MVT::v16i16, Custom);
     setOperationAction(ISD::ZERO_EXTEND,        MVT::v16i32, Custom);
     setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i64, Custom);
+    setOperationAction(ISD::ANY_EXTEND,         MVT::v16i32, Custom);
+    setOperationAction(ISD::ANY_EXTEND,         MVT::v8i64, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i32, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i64, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i8, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i16, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i16, Custom);
+    if (Subtarget->hasDQI()) {
+      setOperationAction(ISD::SIGN_EXTEND,        MVT::v4i32, Custom);
+      setOperationAction(ISD::SIGN_EXTEND,        MVT::v2i64, Custom);
+    }
+    setOperationAction(ISD::FFLOOR,             MVT::v16f32, Legal);
+    setOperationAction(ISD::FFLOOR,             MVT::v8f64, Legal);
+    setOperationAction(ISD::FCEIL,              MVT::v16f32, Legal);
+    setOperationAction(ISD::FCEIL,              MVT::v8f64, Legal);
+    setOperationAction(ISD::FTRUNC,             MVT::v16f32, Legal);
+    setOperationAction(ISD::FTRUNC,             MVT::v8f64, Legal);
+    setOperationAction(ISD::FRINT,              MVT::v16f32, Legal);
+    setOperationAction(ISD::FRINT,              MVT::v8f64, Legal);
+    setOperationAction(ISD::FNEARBYINT,         MVT::v16f32, Legal);
+    setOperationAction(ISD::FNEARBYINT,         MVT::v8f64, Legal);
 
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8f64,  Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i64,  Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16f32,  Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i32,  Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i1,    Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i1, Legal);
 
     setOperationAction(ISD::SETCC,              MVT::v16i1, Custom);
@@ -1488,6 +1385,8 @@ void X86TargetLowering::resetOperationActions() {
     setOperationAction(ISD::SELECT,             MVT::v8f64, Custom);
     setOperationAction(ISD::SELECT,             MVT::v8i64, Custom);
     setOperationAction(ISD::SELECT,             MVT::v16f32, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v16i1, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v8i1,  Custom);
 
     setOperationAction(ISD::ADD,                MVT::v8i64, Legal);
     setOperationAction(ISD::ADD,                MVT::v16i32, Legal);
@@ -1517,10 +1416,23 @@ void X86TargetLowering::resetOperationActions() {
       setOperationAction(ISD::CTLZ,             MVT::v8i64, Legal);
       setOperationAction(ISD::CTLZ,             MVT::v16i32, Legal);
     }
-
+    if (Subtarget->hasDQI()) {
+      setOperationAction(ISD::MUL,             MVT::v2i64, Legal);
+      setOperationAction(ISD::MUL,             MVT::v4i64, Legal);
+      setOperationAction(ISD::MUL,             MVT::v8i64, Legal);
+    }
     // Custom lower several nodes.
     for (MVT VT : MVT::vector_valuetypes()) {
       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+      if (EltSize == 1) {
+        setOperationAction(ISD::AND, VT, Legal);
+        setOperationAction(ISD::OR,  VT, Legal);
+        setOperationAction(ISD::XOR,  VT, Legal);
+      }
+      if (EltSize >= 32 && VT.getSizeInBits() <= 512) {
+        setOperationAction(ISD::MGATHER,  VT, Custom);
+        setOperationAction(ISD::MSCATTER, VT, Custom);
+      }
       // Extract subvector is special because the value type
       // (result) is 256/128-bit but the source is 512-bit wide.
       if (VT.is128BitVector() || VT.is256BitVector()) {
@@ -1533,7 +1445,7 @@ void X86TargetLowering::resetOperationActions() {
       if (!VT.is512BitVector())
         continue;
 
-      if ( EltSize >= 32) {
+      if (EltSize >= 32) {
         setOperationAction(ISD::VECTOR_SHUFFLE,      VT, Custom);
         setOperationAction(ISD::INSERT_VECTOR_ELT,   VT, Custom);
         setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
@@ -1557,7 +1469,7 @@ void X86TargetLowering::resetOperationActions() {
     }
   }// has  AVX-512
 
-  if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) {
+  if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) {
     addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
     addRegisterClass(MVT::v64i8,  &X86::VR512RegClass);
 
@@ -1573,6 +1485,24 @@ void X86TargetLowering::resetOperationActions() {
     setOperationAction(ISD::SUB,                MVT::v32i16, Legal);
     setOperationAction(ISD::SUB,                MVT::v64i8, Legal);
     setOperationAction(ISD::MUL,                MVT::v32i16, Legal);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i1, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i1, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i1, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i1, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v32i1, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v64i1, Custom);
+    setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i8, Custom);
+    setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i8, Custom);
+    setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i16, Custom);
+    setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i16, Custom);
+    setOperationAction(ISD::SIGN_EXTEND,        MVT::v64i8, Custom);
+    setOperationAction(ISD::ZERO_EXTEND,        MVT::v64i8, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v32i1, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v64i1, Custom);
+    setOperationAction(ISD::VSELECT,            MVT::v32i16, Legal);
+    setOperationAction(ISD::VSELECT,            MVT::v64i8, Legal);
+    setOperationAction(ISD::TRUNCATE,           MVT::v32i1, Custom);
+    setOperationAction(ISD::TRUNCATE,           MVT::v64i1, Custom);
 
     for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
       const MVT VT = (MVT::SimpleValueType)i;
@@ -1590,13 +1520,20 @@ void X86TargetLowering::resetOperationActions() {
     }
   }
 
-  if (!TM.Options.UseSoftFloat && Subtarget->hasVLX()) {
+  if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) {
     addRegisterClass(MVT::v4i1,   &X86::VK4RegClass);
     addRegisterClass(MVT::v2i1,   &X86::VK2RegClass);
 
     setOperationAction(ISD::SETCC,              MVT::v4i1, Custom);
     setOperationAction(ISD::SETCC,              MVT::v2i1, Custom);
-    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v8i1, Legal);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i1, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i1, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v8i1, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v4i1, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v4i1, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v2i1, Custom);
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i1, Custom);
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i1, Custom);
 
     setOperationAction(ISD::AND,                MVT::v8i32, Legal);
     setOperationAction(ISD::OR,                 MVT::v8i32, Legal);
@@ -1604,13 +1541,10 @@ void X86TargetLowering::resetOperationActions() {
     setOperationAction(ISD::AND,                MVT::v4i32, Legal);
     setOperationAction(ISD::OR,                 MVT::v4i32, Legal);
     setOperationAction(ISD::XOR,                MVT::v4i32, Legal);
+    setOperationAction(ISD::SRA,                MVT::v2i64, Custom);
+    setOperationAction(ISD::SRA,                MVT::v4i64, Custom);
   }
 
-  // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
-  // of this type with custom code.
-  for (MVT VT : MVT::vector_valuetypes())
-    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
-
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -1667,6 +1601,7 @@ void X86TargetLowering::resetOperationActions() {
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+  setTargetDAGCombine(ISD::BITCAST);
   setTargetDAGCombine(ISD::VSELECT);
   setTargetDAGCombine(ISD::SELECT);
   setTargetDAGCombine(ISD::SHL);
@@ -1687,16 +1622,14 @@ void X86TargetLowering::resetOperationActions() {
   setTargetDAGCombine(ISD::ANY_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
-  setTargetDAGCombine(ISD::TRUNCATE);
   setTargetDAGCombine(ISD::SINT_TO_FP);
   setTargetDAGCombine(ISD::SETCC);
   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
   setTargetDAGCombine(ISD::BUILD_VECTOR);
-  if (Subtarget->is64Bit())
-    setTargetDAGCombine(ISD::MUL);
+  setTargetDAGCombine(ISD::MUL);
   setTargetDAGCombine(ISD::XOR);
 
-  computeRegisterProperties();
+  computeRegisterProperties(Subtarget->getRegisterInfo());
 
   // On Darwin, -Os means optimize for size without hurting performance,
   // do not reduce the limit.
@@ -1837,8 +1770,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
                                        MachineFunction &MF) const {
   const Function *F = MF.getFunction();
   if ((!IsMemset || ZeroMemset) &&
-      !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                       Attribute::NoImplicitFloat)) {
+      !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
     if (Size >= 16 &&
         (Subtarget->isUnalignedMemAccessFast() ||
          ((DstAlign == 0 || DstAlign >= 16) &&
@@ -1898,6 +1830,10 @@ unsigned X86TargetLowering::getJumpTableEncoding() const {
   return TargetLowering::getJumpTableEncoding();
 }
 
+bool X86TargetLowering::useSoftFloat() const {
+  return Subtarget->useSoftFloat();
+}
+
 const MCExpr *
 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
                                              const MachineBasicBlock *MBB,
@@ -1933,14 +1869,14 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
   return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
 }
 
-// FIXME: Why this routine is here? Move to RegInfo!
-std::pair<const TargetRegisterClass*, uint8_t>
-X86TargetLowering::findRepresentativeClass(MVT VT) const{
+std::pair<const TargetRegisterClass *, uint8_t>
+X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
+                                           MVT VT) const {
   const TargetRegisterClass *RRC = nullptr;
   uint8_t Cost = 1;
   switch (VT.SimpleTy) {
   default:
-    return TargetLowering::findRepresentativeClass(VT);
+    return TargetLowering::findRepresentativeClass(TRI, VT);
   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
     RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
     break;
@@ -2023,7 +1959,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
   SmallVector<SDValue, 6> RetOps;
   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
   // Operand #1 = Bytes To Pop
-  RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
+  RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
                    MVT::i16));
 
   // Copy the result values into the output registers.
@@ -2038,8 +1974,12 @@ X86TargetLowering::LowerReturn(SDValue Chain,
       ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
     else if (VA.getLocInfo() == CCValAssign::ZExt)
       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
-    else if (VA.getLocInfo() == CCValAssign::AExt)
-      ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
+    else if (VA.getLocInfo() == CCValAssign::AExt) {
+      if (ValVT.isVector() && ValVT.getScalarType() == MVT::i1)
+        ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
+      else
+        ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
+    }
     else if (VA.getLocInfo() == CCValAssign::BCvt)
       ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
 
@@ -2094,19 +2034,17 @@ X86TargetLowering::LowerReturn(SDValue Chain,
     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
 
-  // The x86-64 ABIs require that for returning structs by value we copy
+  // All x86 ABIs require that for returning structs by value we copy
   // the sret argument into %rax/%eax (depending on ABI) for the return.
-  // Win32 requires us to put the sret argument to %eax as well.
   // We saved the argument into a virtual register in the entry block,
   // so now we copy the value out and into %rax/%eax.
-  if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
-      (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
-    MachineFunction &MF = DAG.getMachineFunction();
-    X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
-    unsigned Reg = FuncInfo->getSRetReturnReg();
-    assert(Reg &&
-           "SRetReturnReg should have been set in LowerFormalArguments().");
-    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+  //
+  // Checking Function.hasStructRetAttr() here is insufficient because the IR
+  // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
+  // false, then an sret argument may be implicitly inserted in the SelDAG. In
+  // either case FuncInfo->setSRetReturnReg() will have been called.
+  if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg, getPointerTy());
 
     unsigned RetValReg
         = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
@@ -2200,7 +2138,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
     CCValAssign &VA = RVLocs[i];
-    EVT CopyVT = VA.getValVT();
+    EVT CopyVT = VA.getLocVT();
 
     // If this is x86-64, and we disabled SSE, we can't return FP values
     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
@@ -2210,18 +2148,24 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 
     // If we prefer to use the value in xmm registers, copy it out as f80 and
     // use a truncate to move it from fp stack reg to xmm reg.
+    bool RoundAfterCopy = false;
     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
-        isScalarFPTypeInSSEReg(VA.getValVT()))
+        isScalarFPTypeInSSEReg(VA.getValVT())) {
       CopyVT = MVT::f80;
+      RoundAfterCopy = (CopyVT != VA.getLocVT());
+    }
 
     Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
                                CopyVT, InFlag).getValue(1);
     SDValue Val = Chain.getValue(0);
 
-    if (CopyVT != VA.getValVT())
+    if (RoundAfterCopy)
       Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
                         // This truncation won't change the value.
-                        DAG.getIntPtrConstant(1));
+                        DAG.getIntPtrConstant(1, dl));
+
+    if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
+      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
 
     InFlag = Chain.getValue(2);
     InVals.push_back(Val);
@@ -2281,10 +2225,11 @@ static SDValue
 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
                           SDLoc dl) {
-  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
 
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        /*isVolatile*/false, /*AlwaysInline=*/true,
+                       /*isTailCall*/false,
                        MachinePointerInfo(), MachinePointerInfo());
 }
 
@@ -2337,7 +2282,10 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
 
   // If value is passed by pointer we have address passed instead of the value
   // itself.
-  if (VA.getLocInfo() == CCValAssign::Indirect)
+  bool ExtendedInMem = VA.isExtInLoc() &&
+    VA.getValVT().getScalarType() == MVT::i1;
+
+  if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
     ValVT = VA.getLocVT();
   else
     ValVT = VA.getValVT();
@@ -2355,9 +2303,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
     int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
                                     VA.getLocMemOffset(), isImmutable);
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-    return DAG.getLoad(ValVT, dl, Chain, FIN,
-                       MachinePointerInfo::getFixedStack(FI),
-                       false, false, false, 0);
+    SDValue Val =  DAG.getLoad(ValVT, dl, Chain, FIN,
+                               MachinePointerInfo::getFixedStack(FI),
+                               false, false, false, 0);
+    return ExtendedInMem ?
+      DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
   }
 }
 
@@ -2393,12 +2343,11 @@ static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
   }
 
   const Function *Fn = MF.getFunction();
-  bool NoImplicitFloatOps = Fn->getAttributes().
-      hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
-  assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) &&
+  bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
+  bool isSoftFloat = Subtarget->useSoftFloat();
+  assert(!(isSoftFloat && NoImplicitFloatOps) &&
          "SSE register cannot be used when SSE is disabled!");
-  if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
-      !Subtarget->hasSSE1())
+  if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
     // Kernel mode asks for SSE to be disabled, so there are no XMM argument
     // registers.
     return None;
@@ -2421,6 +2370,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
                                           const {
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
 
   const Function* Fn = MF.getFunction();
   if (Fn->hasExternalLinkage() &&
@@ -2505,7 +2455,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
       if (VA.isExtInLoc()) {
         // Handle MMX values passed in XMM regs.
-        if (RegVT.isVector())
+        if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
           ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
         else
           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
@@ -2523,24 +2473,21 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
     InVals.push_back(ArgValue);
   }
 
-  if (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) {
-    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-      // The x86-64 ABIs require that for returning structs by value we copy
-      // the sret argument into %rax/%eax (depending on ABI) for the return.
-      // Win32 requires us to put the sret argument to %eax as well.
-      // Save the argument into a virtual register so that we can access it
-      // from the return points.
-      if (Ins[i].Flags.isSRet()) {
-        unsigned Reg = FuncInfo->getSRetReturnReg();
-        if (!Reg) {
-          MVT PtrTy = getPointerTy();
-          Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
-          FuncInfo->setSRetReturnReg(Reg);
-        }
-        SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
-        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
-        break;
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    // All x86 ABIs require that for returning structs by value we copy the
+    // sret argument into %rax/%eax (depending on ABI) for the return. Save
+    // the argument into a virtual register so that we can access it from the
+    // return points.
+    if (Ins[i].Flags.isSRet()) {
+      unsigned Reg = FuncInfo->getSRetReturnReg();
+      if (!Reg) {
+        MVT PtrTy = getPointerTy();
+        Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
+        FuncInfo->setSRetReturnReg(Reg);
       }
+      SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+      break;
     }
   }
 
@@ -2560,10 +2507,16 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         MFI->CreateFixedObject(1, StackSize, true));
   }
 
+  MachineModuleInfo &MMI = MF.getMMI();
+  const Function *WinEHParent = nullptr;
+  if (IsWin64 && MMI.hasWinEHFuncInfo(Fn))
+    WinEHParent = MMI.getWinEHParent(Fn);
+  bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn;
+  bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
+
   // Figure out if XMM registers are in use.
-  assert(!(MF.getTarget().Options.UseSoftFloat &&
-           Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                            Attribute::NoImplicitFloat)) &&
+  assert(!(Subtarget->useSoftFloat() &&
+           Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
          "SSE register cannot be used when SSE is disabled!");
 
   // 64-bit calling conventions support varargs and register parameters, so we
@@ -2572,10 +2525,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
     // Find the first unallocated argument registers.
     ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
     ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
-    unsigned NumIntRegs =
-        CCInfo.getFirstUnallocated(ArgGPRs.data(), ArgGPRs.size());
-    unsigned NumXMMRegs =
-        CCInfo.getFirstUnallocated(ArgXMMs.data(), ArgXMMs.size());
+    unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
+    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
     assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
            "SSE register cannot be used when SSE is disabled!");
 
@@ -2599,7 +2550,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
     }
 
     if (IsWin64) {
-      const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
       // Get to the caller-allocated home save location.  Add 8 to account
       // for the return address.
       int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
@@ -2625,7 +2575,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
     unsigned Offset = FuncInfo->getVarArgsGPOffset();
     for (SDValue Val : LiveGPRs) {
       SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
-                                DAG.getIntPtrConstant(Offset));
+                                DAG.getIntPtrConstant(Offset, dl));
       SDValue Store =
         DAG.getStore(Val.getValue(1), dl, Val, FIN,
                      MachinePointerInfo::getFixedStack(
@@ -2641,9 +2591,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       SaveXMMOps.push_back(Chain);
       SaveXMMOps.push_back(ALVal);
       SaveXMMOps.push_back(DAG.getIntPtrConstant(
-                             FuncInfo->getRegSaveFrameIndex()));
+                             FuncInfo->getRegSaveFrameIndex(), dl));
       SaveXMMOps.push_back(DAG.getIntPtrConstant(
-                             FuncInfo->getVarArgsFPOffset()));
+                             FuncInfo->getVarArgsFPOffset(), dl));
       SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
                         LiveXMMRegs.end());
       MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
@@ -2652,6 +2602,27 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
     if (!MemOps.empty())
       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+  } else if (IsWinEHOutlined) {
+    // Get to the caller-allocated home save location.  Add 8 to account
+    // for the return address.
+    int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+    FuncInfo->setRegSaveFrameIndex(MFI->CreateFixedObject(
+        /*Size=*/1, /*SPOffset=*/HomeOffset + 8, /*Immutable=*/false));
+
+    MMI.getWinEHFuncInfo(Fn)
+        .CatchHandlerParentFrameObjIdx[const_cast<Function *>(Fn)] =
+        FuncInfo->getRegSaveFrameIndex();
+
+    // Store the second integer parameter (rdx) into rsp+16 relative to the
+    // stack pointer at the entry of the function.
+    SDValue RSFIN =
+        DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy());
+    unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass);
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64);
+    Chain = DAG.getStore(
+        Val.getValue(1), dl, Val, RSFIN,
+        MachinePointerInfo::getFixedStack(FuncInfo->getRegSaveFrameIndex()),
+        /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0);
   }
 
   if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
@@ -2718,6 +2689,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
   FuncInfo->setArgumentStackSize(StackSize);
 
+  if (IsWinEHParent) {
+    int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
+    SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
+    MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
+    SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64);
+    Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
+                         MachinePointerInfo::getFixedStack(UnwindHelpFI),
+                         /*isVolatile=*/true,
+                         /*isNonTemporal=*/false, /*Alignment=*/0);
+  }
+
   return Chain;
 }
 
@@ -2728,7 +2710,7 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
                                     const CCValAssign &VA,
                                     ISD::ArgFlagsTy Flags) const {
   unsigned LocMemOffset = VA.getLocMemOffset();
-  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
   if (Flags.isByVal())
     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
@@ -2874,7 +2856,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   if (!IsSibcall)
     Chain = DAG.getCALLSEQ_START(
-        Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl);
+        Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl);
 
   SDValue RetAddrFrIdx;
   // Load return address for tail calls.
@@ -2888,8 +2870,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Walk the register/memloc assignments, inserting copies/loads.  In the case
   // of tail call optimization arguments are handle later.
-  const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-      DAG.getSubtarget().getRegisterInfo());
+  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     // Skip inalloca arguments, they have already been written.
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -2912,7 +2893,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
       break;
     case CCValAssign::AExt:
-      if (RegVT.is128BitVector()) {
+      if (Arg.getValueType().isVector() &&
+          Arg.getValueType().getScalarType() == MVT::i1)
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+      else if (RegVT.is128BitVector()) {
         // Special case: passing MMX values in XMM registers.
         Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
         Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
@@ -3002,12 +2986,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
     };
-    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
     assert((Subtarget->hasSSE1() || !NumXMMRegs)
            && "SSE registers cannot be used when SSE is disabled");
 
     RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
-                                        DAG.getConstant(NumXMMRegs, MVT::i8)));
+                                        DAG.getConstant(NumXMMRegs, dl,
+                                                        MVT::i8)));
   }
 
   if (isVarArg && IsMustTail) {
@@ -3051,7 +3036,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
       if (Flags.isByVal()) {
         // Copy relative to framepointer.
-        SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+        SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
         if (!StackPtr.getNode())
           StackPtr = DAG.getCopyFromReg(Chain, dl,
                                         RegInfo->getStackRegister(),
@@ -3124,11 +3109,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         // unless we're building with the leopard linker or later, which
         // automatically synthesizes these stubs.
         OpFlags = X86II::MO_DARWIN_STUB;
-      } else if (Subtarget->isPICStyleRIPRel() &&
-                 isa<Function>(GV) &&
-                 cast<Function>(GV)->getAttributes().
-                   hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::NonLazyBind)) {
+      } else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) &&
+                 cast<Function>(GV)->hasFnAttribute(Attribute::NonLazyBind)) {
         // If the function is marked as non-lazy, generate an indirect call
         // which loads from the GOT directly. This avoids runtime overhead
         // at the cost of eager binding (and one extra byte of encoding).
@@ -3168,7 +3150,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
                                          OpFlags);
-  } else if (Subtarget->isTarget64BitILP32() && Callee->getValueType(0) == MVT::i32) {
+  } else if (Subtarget->isTarget64BitILP32() &&
+             Callee->getValueType(0) == MVT::i32) {
     // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
     Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
   }
@@ -3179,8 +3162,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   if (!IsSibcall && isTailCall) {
     Chain = DAG.getCALLSEQ_END(Chain,
-                               DAG.getIntPtrConstant(NumBytesToPop, true),
-                               DAG.getIntPtrConstant(0, true), InFlag, dl);
+                               DAG.getIntPtrConstant(NumBytesToPop, dl, true),
+                               DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
     InFlag = Chain.getValue(1);
   }
 
@@ -3188,7 +3171,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   Ops.push_back(Callee);
 
   if (isTailCall)
-    Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
+    Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
 
   // Add argument registers to the end of the list so that they are known live
   // into the call.
@@ -3197,8 +3180,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                   RegsToPass[i].second.getValueType()));
 
   // Add a register mask operand representing the call-preserved registers.
-  const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
-  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
 
@@ -3212,6 +3195,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // This isn't right, although it's probably harmless on x86; liveouts
     // should be computed from returns not tail calls.  Consider a void
     // function making a tail call to a function returning int.
+    MF.getFrameInfo()->setHasTailCall();
     return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
   }
 
@@ -3237,8 +3221,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // Returns a flag for retval copy to use.
   if (!IsSibcall) {
     Chain = DAG.getCALLSEQ_END(Chain,
-                               DAG.getIntPtrConstant(NumBytesToPop, true),
-                               DAG.getIntPtrConstant(NumBytesForCalleeToPop,
+                               DAG.getIntPtrConstant(NumBytesToPop, dl, true),
+                               DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
                                                      true),
                                InFlag, dl);
     InFlag = Chain.getValue(1);
@@ -3286,11 +3270,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 unsigned
 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
                                                SelectionDAG& DAG) const {
-  MachineFunction &MF = DAG.getMachineFunction();
-  const TargetMachine &TM = MF.getTarget();
-  const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-      TM.getSubtargetImpl()->getRegisterInfo());
-  const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
+  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
   unsigned StackAlignment = TFI.getStackAlignment();
   uint64_t AlignMask = StackAlignment - 1;
   int64_t Offset = StackSize;
@@ -3327,7 +3308,8 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
         return false;
     } else {
       unsigned Opcode = Def->getOpcode();
-      if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
+      if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
+           Opcode == X86::LEA64_32r) &&
           Def->getOperand(1).isFI()) {
         FI = Def->getOperand(1).getIndex();
         Bytes = Flags.getByValSize();
@@ -3392,6 +3374,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
   bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
 
+  // Win64 functions have extra shadow space for argument homing. Don't do the
+  // sibcall if the caller and callee have mismatched expectations for this
+  // space.
+  if (IsCalleeWin64 != IsCallerWin64)
+    return false;
+
   if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
     if (IsTailCallConvention(CalleeCC) && CCMatch)
       return true;
@@ -3403,8 +3391,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
 
   // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
   // emit a special epilogue.
-  const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-      DAG.getSubtarget().getRegisterInfo());
+  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
   if (RegInfo->needsStackRealignment(MF))
     return false;
 
@@ -3516,8 +3503,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
       // the caller's fixed stack objects.
       MachineFrameInfo *MFI = MF.getFrameInfo();
       const MachineRegisterInfo *MRI = &MF.getRegInfo();
-      const X86InstrInfo *TII =
-          static_cast<const X86InstrInfo *>(DAG.getSubtarget().getInstrInfo());
+      const X86InstrInfo *TII = Subtarget->getInstrInfo();
       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
         CCValAssign &VA = ArgLocs[i];
         SDValue Arg = OutVals[i];
@@ -3614,17 +3600,6 @@ static bool isTargetShuffle(unsigned Opcode) {
 }
 
 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
-                                    SDValue V1, SelectionDAG &DAG) {
-  switch(Opc) {
-  default: llvm_unreachable("Unknown x86 shuffle node");
-  case X86ISD::MOVSHDUP:
-  case X86ISD::MOVSLDUP:
-  case X86ISD::MOVDDUP:
-    return DAG.getNode(Opc, dl, VT, V1);
-  }
-}
-
-static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
                                     SDValue V1, unsigned TargetMask,
                                     SelectionDAG &DAG) {
   switch(Opc) {
@@ -3634,21 +3609,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
   case X86ISD::PSHUFLW:
   case X86ISD::VPERMILPI:
   case X86ISD::VPERMI:
-    return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
-  }
-}
-
-static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
-                                    SDValue V1, SDValue V2, unsigned TargetMask,
-                                    SelectionDAG &DAG) {
-  switch(Opc) {
-  default: llvm_unreachable("Unknown x86 shuffle node");
-  case X86ISD::PALIGNR:
-  case X86ISD::VALIGN:
-  case X86ISD::SHUFP:
-  case X86ISD::VPERM2X128:
-    return DAG.getNode(Opc, dl, VT, V1, V2,
-                       DAG.getConstant(TargetMask, MVT::i8));
+    return DAG.getNode(Opc, dl, VT, V1,
+                       DAG.getConstant(TargetMask, dl, MVT::i8));
   }
 }
 
@@ -3671,8 +3633,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
 
 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-      DAG.getSubtarget().getRegisterInfo());
+  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
   int ReturnAddrIndex = FuncInfo->getRAIndex();
 
@@ -3759,13 +3720,13 @@ static bool isX86CCUnsigned(unsigned X86CC) {
 /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
 /// specific condition code, returning the condition code and the LHS/RHS of the
 /// comparison to make.
-static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
+static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP,
                                SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
   if (!isFP) {
     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
       if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
         // X > -1   -> X == 0, jump !sign.
-        RHS = DAG.getConstant(0, RHS.getValueType());
+        RHS = DAG.getConstant(0, DL, RHS.getValueType());
         return X86::COND_NS;
       }
       if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
@@ -3774,7 +3735,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
       }
       if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
         // X < 1   -> X <= 0
-        RHS = DAG.getConstant(0, RHS.getValueType());
+        RHS = DAG.getConstant(0, DL, RHS.getValueType());
         return X86::COND_LE;
       }
     }
@@ -3939,849 +3900,6 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
   return true;
 }
 
-/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
-/// is suitable for input to PSHUFD. That is, it doesn't reference the other
-/// operand - by default will match for first operand.
-static bool isPSHUFDMask(ArrayRef<int> Mask, MVT VT,
-                         bool TestSecondOperand = false) {
-  if (VT != MVT::v4f32 && VT != MVT::v4i32 &&
-      VT != MVT::v2f64 && VT != MVT::v2i64)
-    return false;
-
-  unsigned NumElems = VT.getVectorNumElements();
-  unsigned Lo = TestSecondOperand ? NumElems : 0;
-  unsigned Hi = Lo + NumElems;
-
-  for (unsigned i = 0; i < NumElems; ++i)
-    if (!isUndefOrInRange(Mask[i], (int)Lo, (int)Hi))
-      return false;
-
-  return true;
-}
-
-/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
-/// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
-  if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
-    return false;
-
-  // Lower quadword copied in order or undef.
-  if (!isSequentialOrUndefInRange(Mask, 0, 4, 0))
-    return false;
-
-  // Upper quadword shuffled.
-  for (unsigned i = 4; i != 8; ++i)
-    if (!isUndefOrInRange(Mask[i], 4, 8))
-      return false;
-
-  if (VT == MVT::v16i16) {
-    // Lower quadword copied in order or undef.
-    if (!isSequentialOrUndefInRange(Mask, 8, 4, 8))
-      return false;
-
-    // Upper quadword shuffled.
-    for (unsigned i = 12; i != 16; ++i)
-      if (!isUndefOrInRange(Mask[i], 12, 16))
-        return false;
-  }
-
-  return true;
-}
-
-/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
-/// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
-  if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
-    return false;
-
-  // Upper quadword copied in order.
-  if (!isSequentialOrUndefInRange(Mask, 4, 4, 4))
-    return false;
-
-  // Lower quadword shuffled.
-  for (unsigned i = 0; i != 4; ++i)
-    if (!isUndefOrInRange(Mask[i], 0, 4))
-      return false;
-
-  if (VT == MVT::v16i16) {
-    // Upper quadword copied in order.
-    if (!isSequentialOrUndefInRange(Mask, 12, 4, 12))
-      return false;
-
-    // Lower quadword shuffled.
-    for (unsigned i = 8; i != 12; ++i)
-      if (!isUndefOrInRange(Mask[i], 8, 12))
-        return false;
-  }
-
-  return true;
-}
-
-/// \brief Return true if the mask specifies a shuffle of elements that is
-/// suitable for input to intralane (palignr) or interlane (valign) vector
-/// right-shift.
-static bool isAlignrMask(ArrayRef<int> Mask, MVT VT, bool InterLane) {
-  unsigned NumElts = VT.getVectorNumElements();
-  unsigned NumLanes = InterLane ? 1: VT.getSizeInBits()/128;
-  unsigned NumLaneElts = NumElts/NumLanes;
-
-  // Do not handle 64-bit element shuffles with palignr.
-  if (NumLaneElts == 2)
-    return false;
-
-  for (unsigned l = 0; l != NumElts; l+=NumLaneElts) {
-    unsigned i;
-    for (i = 0; i != NumLaneElts; ++i) {
-      if (Mask[i+l] >= 0)
-        break;
-    }
-
-    // Lane is all undef, go to next lane
-    if (i == NumLaneElts)
-      continue;
-
-    int Start = Mask[i+l];
-
-    // Make sure its in this lane in one of the sources
-    if (!isUndefOrInRange(Start, l, l+NumLaneElts) &&
-        !isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts))
-      return false;
-
-    // If not lane 0, then we must match lane 0
-    if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l))
-      return false;
-
-    // Correct second source to be contiguous with first source
-    if (Start >= (int)NumElts)
-      Start -= NumElts - NumLaneElts;
-
-    // Make sure we're shifting in the right direction.
-    if (Start <= (int)(i+l))
-      return false;
-
-    Start -= i;
-
-    // Check the rest of the elements to see if they are consecutive.
-    for (++i; i != NumLaneElts; ++i) {
-      int Idx = Mask[i+l];
-
-      // Make sure its in this lane
-      if (!isUndefOrInRange(Idx, l, l+NumLaneElts) &&
-          !isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts))
-        return false;
-
-      // If not lane 0, then we must match lane 0
-      if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l))
-        return false;
-
-      if (Idx >= (int)NumElts)
-        Idx -= NumElts - NumLaneElts;
-
-      if (!isUndefOrEqual(Idx, Start+i))
-        return false;
-
-    }
-  }
-
-  return true;
-}
-
-/// \brief Return true if the node specifies a shuffle of elements that is
-/// suitable for input to PALIGNR.
-static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
-                          const X86Subtarget *Subtarget) {
-  if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
-      (VT.is256BitVector() && !Subtarget->hasInt256()) ||
-      VT.is512BitVector())
-    // FIXME: Add AVX512BW.
-    return false;
-
-  return isAlignrMask(Mask, VT, false);
-}
-
-/// \brief Return true if the node specifies a shuffle of elements that is
-/// suitable for input to VALIGN.
-static bool isVALIGNMask(ArrayRef<int> Mask, MVT VT,
-                          const X86Subtarget *Subtarget) {
-  // FIXME: Add AVX512VL.
-  if (!VT.is512BitVector() || !Subtarget->hasAVX512())
-    return false;
-  return isAlignrMask(Mask, VT, true);
-}
-
-/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
-/// the two vector operands have swapped position.
-static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
-                                     unsigned NumElems) {
-  for (unsigned i = 0; i != NumElems; ++i) {
-    int idx = Mask[i];
-    if (idx < 0)
-      continue;
-    else if (idx < (int)NumElems)
-      Mask[i] = idx + NumElems;
-    else
-      Mask[i] = idx - NumElems;
-  }
-}
-
-/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to 128/256-bit
-/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
-/// reverse of what x86 shuffles want.
-static bool isSHUFPMask(ArrayRef<int> Mask, MVT VT, bool Commuted = false) {
-
-  unsigned NumElems = VT.getVectorNumElements();
-  unsigned NumLanes = VT.getSizeInBits()/128;
-  unsigned NumLaneElems = NumElems/NumLanes;
-
-  if (NumLaneElems != 2 && NumLaneElems != 4)
-    return false;
-
-  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
-  bool symetricMaskRequired =
-    (VT.getSizeInBits() >= 256) && (EltSize == 32);
-
-  // VSHUFPSY divides the resulting vector into 4 chunks.
-  // The sources are also splitted into 4 chunks, and each destination
-  // chunk must come from a different source chunk.
-  //
-  //  SRC1 =>   X7    X6    X5    X4    X3    X2    X1    X0
-  //  SRC2 =>   Y7    Y6    Y5    Y4    Y3    Y2    Y1    Y9
-  //
-  //  DST  =>  Y7..Y4,   Y7..Y4,   X7..X4,   X7..X4,
-  //           Y3..Y0,   Y3..Y0,   X3..X0,   X3..X0
-  //
-  // VSHUFPDY divides the resulting vector into 4 chunks.
-  // The sources are also splitted into 4 chunks, and each destination
-  // chunk must come from a different source chunk.
-  //
-  //  SRC1 =>      X3       X2       X1       X0
-  //  SRC2 =>      Y3       Y2       Y1       Y0
-  //
-  //  DST  =>  Y3..Y2,  X3..X2,  Y1..Y0,  X1..X0
-  //
-  SmallVector<int, 4> MaskVal(NumLaneElems, -1);
-  unsigned HalfLaneElems = NumLaneElems/2;
-  for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
-    for (unsigned i = 0; i != NumLaneElems; ++i) {
-      int Idx = Mask[i+l];
-      unsigned RngStart = l + ((Commuted == (i<HalfLaneElems)) ? NumElems : 0);
-      if (!isUndefOrInRange(Idx, RngStart, RngStart+NumLaneElems))
-        return false;
-      // For VSHUFPSY, the mask of the second half must be the same as the
-      // first but with the appropriate offsets. This works in the same way as
-      // VPERMILPS works with masks.
-      if (!symetricMaskRequired || Idx < 0)
-        continue;
-      if (MaskVal[i] < 0) {
-        MaskVal[i] = Idx - l;
-        continue;
-      }
-      if ((signed)(Idx - l) != MaskVal[i])
-        return false;
-    }
-  }
-
-  return true;
-}
-
-/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
-static bool isMOVHLPSMask(ArrayRef<int> Mask, MVT VT) {
-  if (!VT.is128BitVector())
-    return false;
-
-  unsigned NumElems = VT.getVectorNumElements();
-
-  if (NumElems != 4)
-    return false;
-
-  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
-  return isUndefOrEqual(Mask[0], 6) &&
-         isUndefOrEqual(Mask[1], 7) &&
-         isUndefOrEqual(Mask[2], 2) &&
-         isUndefOrEqual(Mask[3], 3);
-}
-
-/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
-/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
-/// <2, 3, 2, 3>
-static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, MVT VT) {
-  if (!VT.is128BitVector())
-    return false;
-
-  unsigned NumElems = VT.getVectorNumElements();
-
-  if (NumElems != 4)
-    return false;
-
-  return isUndefOrEqual(Mask[0], 2) &&
-         isUndefOrEqual(Mask[1], 3) &&
-         isUndefOrEqual(Mask[2], 2) &&
-         isUndefOrEqual(Mask[3], 3);
-}
-
-/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
-static bool isMOVLPMask(ArrayRef<int> Mask, MVT VT) {
-  if (!VT.is128BitVector())
-    return false;
-
-  unsigned NumElems = VT.getVectorNumElements();
-
-  if (NumElems != 2 && NumElems != 4)
-    return false;
-
-  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
-    if (!isUndefOrEqual(Mask[i], i + NumElems))
-      return false;
-
-  for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
-    if (!isUndefOrEqual(Mask[i], i))
-      return false;
-
-  return true;
-}
-
-/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
-static bool isMOVLHPSMask(ArrayRef<int> Mask, MVT VT) {
-  if (!VT.is128BitVector())
-    return false;
-
-  unsigned NumElems = VT.getVectorNumElements();
-
-  if (NumElems != 2 && NumElems != 4)
-    return false;
-
-  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
-    if (!isUndefOrEqual(Mask[i], i))
-      return false;
-
-  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
-    if (!isUndefOrEqual(Mask[i + e], i + NumElems))
-      return false;
-
-  return true;
-}
-
-/// isINSERTPSMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to INSERTPS.
-/// i. e: If all but one element come from the same vector.
-static bool isINSERTPSMask(ArrayRef<int> Mask, MVT VT) {
-  // TODO: Deal with AVX's VINSERTPS
-  if (!VT.is128BitVector() || (VT != MVT::v4f32 && VT != MVT::v4i32))
-    return false;
-
-  unsigned CorrectPosV1 = 0;
-  unsigned CorrectPosV2 = 0;
-  for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) {
-    if (Mask[i] == -1) {
-      ++CorrectPosV1;
-      ++CorrectPosV2;
-      continue;
-    }
-
-    if (Mask[i] == i)
-      ++CorrectPosV1;
-    else if (Mask[i] == i + 4)
-      ++CorrectPosV2;
-  }
-
-  if (CorrectPosV1 == 3 || CorrectPosV2 == 3)
-    // We have 3 elements (undefs count as elements from any vector) from one
-    // vector, and one from another.
-    return true;
-
-  return false;
-}
-
-//
-// Some special combinations that can be optimized.
-//
-static
-SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
-                               SelectionDAG &DAG) {
-  MVT VT = SVOp->getSimpleValueType(0);
-  SDLoc dl(SVOp);
-
-  if (VT != MVT::v8i32 && VT != MVT::v8f32)
-    return SDValue();
-
-  ArrayRef<int> Mask = SVOp->getMask();
-
-  // These are the special masks that may be optimized.
-  static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14};
-  static const int MaskToOptimizeOdd[]  = {1, 9, 3, 11, 5, 13, 7, 15};
-  bool MatchEvenMask = true;
-  bool MatchOddMask  = true;
-  for (int i=0; i<8; ++i) {
-    if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i]))
-      MatchEvenMask = false;
-    if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
-      MatchOddMask = false;
-  }
-
-  if (!MatchEvenMask && !MatchOddMask)
-    return SDValue();
-
-  SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
-
-  SDValue Op0 = SVOp->getOperand(0);
-  SDValue Op1 = SVOp->getOperand(1);
-
-  if (MatchEvenMask) {
-    // Shift the second operand right to 32 bits.
-    static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 };
-    Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask);
-  } else {
-    // Shift the first operand left to 32 bits.
-    static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 };
-    Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask);
-  }
-  static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15};
-  return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask);
-}
-
-/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to UNPCKL.
-static bool isUNPCKLMask(ArrayRef<int> Mask, MVT VT,
-                         bool HasInt256, bool V2IsSplat = false) {
-
-  assert(VT.getSizeInBits() >= 128 &&
-         "Unsupported vector type for unpckl");
-
-  unsigned NumElts = VT.getVectorNumElements();
-  if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
-      (!HasInt256 || (NumElts != 16 && NumElts != 32)))
-    return false;
-
-  assert((!VT.is512BitVector() || VT.getScalarType().getSizeInBits() >= 32) &&
-         "Unsupported vector type for unpckh");
-
-  // AVX defines UNPCK* to operate independently on 128-bit lanes.
-  unsigned NumLanes = VT.getSizeInBits()/128;
-  unsigned NumLaneElts = NumElts/NumLanes;
-
-  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
-    for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
-      int BitI  = Mask[l+i];
-      int BitI1 = Mask[l+i+1];
-      if (!isUndefOrEqual(BitI, j))
-        return false;
-      if (V2IsSplat) {
-        if (!isUndefOrEqual(BitI1, NumElts))
-          return false;
-      } else {
-        if (!isUndefOrEqual(BitI1, j + NumElts))
-          return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to UNPCKH.
-static bool isUNPCKHMask(ArrayRef<int> Mask, MVT VT,
-                         bool HasInt256, bool V2IsSplat = false) {
-  assert(VT.getSizeInBits() >= 128 &&
-         "Unsupported vector type for unpckh");
-
-  unsigned NumElts = VT.getVectorNumElements();
-  if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
-      (!HasInt256 || (NumElts != 16 && NumElts != 32)))
-    return false;
-
-  assert((!VT.is512BitVector() || VT.getScalarType().getSizeInBits() >= 32) &&
-         "Unsupported vector type for unpckh");
-
-  // AVX defines UNPCK* to operate independently on 128-bit lanes.
-  unsigned NumLanes = VT.getSizeInBits()/128;
-  unsigned NumLaneElts = NumElts/NumLanes;
-
-  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
-    for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
-      int BitI  = Mask[l+i];
-      int BitI1 = Mask[l+i+1];
-      if (!isUndefOrEqual(BitI, j))
-        return false;
-      if (V2IsSplat) {
-        if (isUndefOrEqual(BitI1, NumElts))
-          return false;
-      } else {
-        if (!isUndefOrEqual(BitI1, j+NumElts))
-          return false;
-      }
-    }
-  }
-  return true;
-}
-
-/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
-/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
-/// <0, 0, 1, 1>
-static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
-  unsigned NumElts = VT.getVectorNumElements();
-  bool Is256BitVec = VT.is256BitVector();
-
-  if (VT.is512BitVector())
-    return false;
-  assert((VT.is128BitVector() || VT.is256BitVector()) &&
-         "Unsupported vector type for unpckh");
-
-  if (Is256BitVec && NumElts != 4 && NumElts != 8 &&
-      (!HasInt256 || (NumElts != 16 && NumElts != 32)))
-    return false;
-
-  // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
-  // FIXME: Need a better way to get rid of this, there's no latency difference
-  // between UNPCKLPD and MOVDDUP, the later should always be checked first and
-  // the former later. We should also remove the "_undef" special mask.
-  if (NumElts == 4 && Is256BitVec)
-    return false;
-
-  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
-  // independently on 128-bit lanes.
-  unsigned NumLanes = VT.getSizeInBits()/128;
-  unsigned NumLaneElts = NumElts/NumLanes;
-
-  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
-    for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
-      int BitI  = Mask[l+i];
-      int BitI1 = Mask[l+i+1];
-
-      if (!isUndefOrEqual(BitI, j))
-        return false;
-      if (!isUndefOrEqual(BitI1, j))
-        return false;
-    }
-  }
-
-  return true;
-}
-
-/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
-/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
-/// <2, 2, 3, 3>
-static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
-  unsigned NumElts = VT.getVectorNumElements();
-
-  if (VT.is512BitVector())
-    return false;
-
-  assert((VT.is128BitVector() || VT.is256BitVector()) &&
-         "Unsupported vector type for unpckh");
-
-  if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
-      (!HasInt256 || (NumElts != 16 && NumElts != 32)))
-    return false;
-
-  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
-  // independently on 128-bit lanes.
-  unsigned NumLanes = VT.getSizeInBits()/128;
-  unsigned NumLaneElts = NumElts/NumLanes;
-
-  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
-    for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
-      int BitI  = Mask[l+i];
-      int BitI1 = Mask[l+i+1];
-      if (!isUndefOrEqual(BitI, j))
-        return false;
-      if (!isUndefOrEqual(BitI1, j))
-        return false;
-    }
-  }
-  return true;
-}
-
-// Match for INSERTI64x4 INSERTF64x4 instructions (src0[0], src1[0]) or
-// (src1[0], src0[1]), manipulation with 256-bit sub-vectors
-static bool isINSERT64x4Mask(ArrayRef<int> Mask, MVT VT, unsigned int *Imm) {
-  if (!VT.is512BitVector())
-    return false;
-
-  unsigned NumElts = VT.getVectorNumElements();
-  unsigned HalfSize = NumElts/2;
-  if (isSequentialOrUndefInRange(Mask, 0, HalfSize, 0)) {
-    if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, NumElts)) {
-      *Imm = 1;
-      return true;
-    }
-  }
-  if (isSequentialOrUndefInRange(Mask, 0, HalfSize, NumElts)) {
-    if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, HalfSize)) {
-      *Imm = 0;
-      return true;
-    }
-  }
-  return false;
-}
-
-/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to MOVSS,
-/// MOVSD, and MOVD, i.e. setting the lowest element.
-static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
-  if (VT.getVectorElementType().getSizeInBits() < 32)
-    return false;
-  if (!VT.is128BitVector())
-    return false;
-
-  unsigned NumElts = VT.getVectorNumElements();
-
-  if (!isUndefOrEqual(Mask[0], NumElts))
-    return false;
-
-  for (unsigned i = 1; i != NumElts; ++i)
-    if (!isUndefOrEqual(Mask[i], i))
-      return false;
-
-  return true;
-}
-
-/// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered
-/// as permutations between 128-bit chunks or halves. As an example: this
-/// shuffle bellow:
-///   vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
-/// The first half comes from the second half of V1 and the second half from the
-/// the second half of V2.
-static bool isVPERM2X128Mask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
-  if (!HasFp256 || !VT.is256BitVector())
-    return false;
-
-  // The shuffle result is divided into half A and half B. In total the two
-  // sources have 4 halves, namely: C, D, E, F. The final values of A and
-  // B must come from C, D, E or F.
-  unsigned HalfSize = VT.getVectorNumElements()/2;
-  bool MatchA = false, MatchB = false;
-
-  // Check if A comes from one of C, D, E, F.
-  for (unsigned Half = 0; Half != 4; ++Half) {
-    if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
-      MatchA = true;
-      break;
-    }
-  }
-
-  // Check if B comes from one of C, D, E, F.
-  for (unsigned Half = 0; Half != 4; ++Half) {
-    if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
-      MatchB = true;
-      break;
-    }
-  }
-
-  return MatchA && MatchB;
-}
-
-/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
-static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
-  MVT VT = SVOp->getSimpleValueType(0);
-
-  unsigned HalfSize = VT.getVectorNumElements()/2;
-
-  unsigned FstHalf = 0, SndHalf = 0;
-  for (unsigned i = 0; i < HalfSize; ++i) {
-    if (SVOp->getMaskElt(i) > 0) {
-      FstHalf = SVOp->getMaskElt(i)/HalfSize;
-      break;
-    }
-  }
-  for (unsigned i = HalfSize; i < HalfSize*2; ++i) {
-    if (SVOp->getMaskElt(i) > 0) {
-      SndHalf = SVOp->getMaskElt(i)/HalfSize;
-      break;
-    }
-  }
-
-  return (FstHalf | (SndHalf << 4));
-}
-
-// Symetric in-lane mask. Each lane has 4 elements (for imm8)
-static bool isPermImmMask(ArrayRef<int> Mask, MVT VT, unsigned& Imm8) {
-  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
-  if (EltSize < 32)
-    return false;
-
-  unsigned NumElts = VT.getVectorNumElements();
-  Imm8 = 0;
-  if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) {
-    for (unsigned i = 0; i != NumElts; ++i) {
-      if (Mask[i] < 0)
-        continue;
-      Imm8 |= Mask[i] << (i*2);
-    }
-    return true;
-  }
-
-  unsigned LaneSize = 4;
-  SmallVector<int, 4> MaskVal(LaneSize, -1);
-
-  for (unsigned l = 0; l != NumElts; l += LaneSize) {
-    for (unsigned i = 0; i != LaneSize; ++i) {
-      if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
-        return false;
-      if (Mask[i+l] < 0)
-        continue;
-      if (MaskVal[i] < 0) {
-        MaskVal[i] = Mask[i+l] - l;
-        Imm8 |= MaskVal[i] << (i*2);
-        continue;
-      }
-      if (Mask[i+l] != (signed)(MaskVal[i]+l))
-        return false;
-    }
-  }
-  return true;
-}
-
-/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
-/// Note that VPERMIL mask matching is different depending whether theunderlying
-/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
-/// to the same elements of the low, but to the higher half of the source.
-/// In VPERMILPD the two lanes could be shuffled independently of each other
-/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
-static bool isVPERMILPMask(ArrayRef<int> Mask, MVT VT) {
-  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
-  if (VT.getSizeInBits() < 256 || EltSize < 32)
-    return false;
-  bool symetricMaskRequired = (EltSize == 32);
-  unsigned NumElts = VT.getVectorNumElements();
-
-  unsigned NumLanes = VT.getSizeInBits()/128;
-  unsigned LaneSize = NumElts/NumLanes;
-  // 2 or 4 elements in one lane
-
-  SmallVector<int, 4> ExpectedMaskVal(LaneSize, -1);
-  for (unsigned l = 0; l != NumElts; l += LaneSize) {
-    for (unsigned i = 0; i != LaneSize; ++i) {
-      if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
-        return false;
-      if (symetricMaskRequired) {
-        if (ExpectedMaskVal[i] < 0 && Mask[i+l] >= 0) {
-          ExpectedMaskVal[i] = Mask[i+l] - l;
-          continue;
-        }
-        if (!isUndefOrEqual(Mask[i+l], ExpectedMaskVal[i]+l))
-          return false;
-      }
-    }
-  }
-  return true;
-}
-
-/// isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse
-/// of what x86 movss want. X86 movs requires the lowest  element to be lowest
-/// element of vector 2 and the other elements to come from vector 1 in order.
-static bool isCommutedMOVLMask(ArrayRef<int> Mask, MVT VT,
-                               bool V2IsSplat = false, bool V2IsUndef = false) {
-  if (!VT.is128BitVector())
-    return false;
-
-  unsigned NumOps = VT.getVectorNumElements();
-  if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
-    return false;
-
-  if (!isUndefOrEqual(Mask[0], 0))
-    return false;
-
-  for (unsigned i = 1; i != NumOps; ++i)
-    if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
-          (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
-          (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
-      return false;
-
-  return true;
-}
-
-/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
-/// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
-static bool isMOVSHDUPMask(ArrayRef<int> Mask, MVT VT,
-                           const X86Subtarget *Subtarget) {
-  if (!Subtarget->hasSSE3())
-    return false;
-
-  unsigned NumElems = VT.getVectorNumElements();
-
-  if ((VT.is128BitVector() && NumElems != 4) ||
-      (VT.is256BitVector() && NumElems != 8) ||
-      (VT.is512BitVector() && NumElems != 16))
-    return false;
-
-  // "i+1" is the value the indexed mask element must have
-  for (unsigned i = 0; i != NumElems; i += 2)
-    if (!isUndefOrEqual(Mask[i], i+1) ||
-        !isUndefOrEqual(Mask[i+1], i+1))
-      return false;
-
-  return true;
-}
-
-/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
-/// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
-static bool isMOVSLDUPMask(ArrayRef<int> Mask, MVT VT,
-                           const X86Subtarget *Subtarget) {
-  if (!Subtarget->hasSSE3())
-    return false;
-
-  unsigned NumElems = VT.getVectorNumElements();
-
-  if ((VT.is128BitVector() && NumElems != 4) ||
-      (VT.is256BitVector() && NumElems != 8) ||
-      (VT.is512BitVector() && NumElems != 16))
-    return false;
-
-  // "i" is the value the indexed mask element must have
-  for (unsigned i = 0; i != NumElems; i += 2)
-    if (!isUndefOrEqual(Mask[i], i) ||
-        !isUndefOrEqual(Mask[i+1], i))
-      return false;
-
-  return true;
-}
-
-/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to 256-bit
-/// version of MOVDDUP.
-static bool isMOVDDUPYMask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
-  if (!HasFp256 || !VT.is256BitVector())
-    return false;
-
-  unsigned NumElts = VT.getVectorNumElements();
-  if (NumElts != 4)
-    return false;
-
-  for (unsigned i = 0; i != NumElts/2; ++i)
-    if (!isUndefOrEqual(Mask[i], 0))
-      return false;
-  for (unsigned i = NumElts/2; i != NumElts; ++i)
-    if (!isUndefOrEqual(Mask[i], NumElts/2))
-      return false;
-  return true;
-}
-
-/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to 128-bit
-/// version of MOVDDUP.
-static bool isMOVDDUPMask(ArrayRef<int> Mask, MVT VT) {
-  if (!VT.is128BitVector())
-    return false;
-
-  unsigned e = VT.getVectorNumElements() / 2;
-  for (unsigned i = 0; i != e; ++i)
-    if (!isUndefOrEqual(Mask[i], i))
-      return false;
-  for (unsigned i = 0; i != e; ++i)
-    if (!isUndefOrEqual(Mask[e+i], i))
-      return false;
-  return true;
-}
-
 /// isVEXTRACTIndex - Return true if the specified
 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
 /// suitable for instruction that extract 128 or 256 bit vectors
@@ -4835,125 +3953,6 @@ bool X86::isVEXTRACT256Index(SDNode *N) {
   return isVEXTRACTIndex(N, 256);
 }
 
-/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
-/// Handles 128-bit and 256-bit.
-static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
-  MVT VT = N->getSimpleValueType(0);
-
-  assert((VT.getSizeInBits() >= 128) &&
-         "Unsupported vector type for PSHUF/SHUFP");
-
-  // Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate
-  // independently on 128-bit lanes.
-  unsigned NumElts = VT.getVectorNumElements();
-  unsigned NumLanes = VT.getSizeInBits()/128;
-  unsigned NumLaneElts = NumElts/NumLanes;
-
-  assert((NumLaneElts == 2 || NumLaneElts == 4 || NumLaneElts == 8) &&
-         "Only supports 2, 4 or 8 elements per lane");
-
-  unsigned Shift = (NumLaneElts >= 4) ? 1 : 0;
-  unsigned Mask = 0;
-  for (unsigned i = 0; i != NumElts; ++i) {
-    int Elt = N->getMaskElt(i);
-    if (Elt < 0) continue;
-    Elt &= NumLaneElts - 1;
-    unsigned ShAmt = (i << Shift) % 8;
-    Mask |= Elt << ShAmt;
-  }
-
-  return Mask;
-}
-
-/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
-static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
-  MVT VT = N->getSimpleValueType(0);
-
-  assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
-         "Unsupported vector type for PSHUFHW");
-
-  unsigned NumElts = VT.getVectorNumElements();
-
-  unsigned Mask = 0;
-  for (unsigned l = 0; l != NumElts; l += 8) {
-    // 8 nodes per lane, but we only care about the last 4.
-    for (unsigned i = 0; i < 4; ++i) {
-      int Elt = N->getMaskElt(l+i+4);
-      if (Elt < 0) continue;
-      Elt &= 0x3; // only 2-bits.
-      Mask |= Elt << (i * 2);
-    }
-  }
-
-  return Mask;
-}
-
-/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
-static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
-  MVT VT = N->getSimpleValueType(0);
-
-  assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
-         "Unsupported vector type for PSHUFHW");
-
-  unsigned NumElts = VT.getVectorNumElements();
-
-  unsigned Mask = 0;
-  for (unsigned l = 0; l != NumElts; l += 8) {
-    // 8 nodes per lane, but we only care about the first 4.
-    for (unsigned i = 0; i < 4; ++i) {
-      int Elt = N->getMaskElt(l+i);
-      if (Elt < 0) continue;
-      Elt &= 0x3; // only 2-bits
-      Mask |= Elt << (i * 2);
-    }
-  }
-
-  return Mask;
-}
-
-/// \brief Return the appropriate immediate to shuffle the specified
-/// VECTOR_SHUFFLE mask with the PALIGNR (if InterLane is false) or with
-/// VALIGN (if Interlane is true) instructions.
-static unsigned getShuffleAlignrImmediate(ShuffleVectorSDNode *SVOp,
-                                           bool InterLane) {
-  MVT VT = SVOp->getSimpleValueType(0);
-  unsigned EltSize = InterLane ? 1 :
-    VT.getVectorElementType().getSizeInBits() >> 3;
-
-  unsigned NumElts = VT.getVectorNumElements();
-  unsigned NumLanes = VT.is512BitVector() ? 1 : VT.getSizeInBits()/128;
-  unsigned NumLaneElts = NumElts/NumLanes;
-
-  int Val = 0;
-  unsigned i;
-  for (i = 0; i != NumElts; ++i) {
-    Val = SVOp->getMaskElt(i);
-    if (Val >= 0)
-      break;
-  }
-  if (Val >= (int)NumElts)
-    Val -= NumElts - NumLaneElts;
-
-  assert(Val - i > 0 && "PALIGNR imm should be positive");
-  return (Val - i) * EltSize;
-}
-
-/// \brief Return the appropriate immediate to shuffle the specified
-/// VECTOR_SHUFFLE mask with the PALIGNR instruction.
-static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
-  return getShuffleAlignrImmediate(SVOp, false);
-}
-
-/// \brief Return the appropriate immediate to shuffle the specified
-/// VECTOR_SHUFFLE mask with the VALIGN instruction.
-static unsigned getShuffleVALIGNImmediate(ShuffleVectorSDNode *SVOp) {
-  return getShuffleAlignrImmediate(SVOp, true);
-}
-
-
 static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
   if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
@@ -5028,119 +4027,6 @@ bool X86::isZeroNode(SDValue Elt) {
   return false;
 }
 
-/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
-/// match movhlps. The lower half elements should come from upper half of
-/// V1 (and in order), and the upper half elements should come from the upper
-/// half of V2 (and in order).
-static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, MVT VT) {
-  if (!VT.is128BitVector())
-    return false;
-  if (VT.getVectorNumElements() != 4)
-    return false;
-  for (unsigned i = 0, e = 2; i != e; ++i)
-    if (!isUndefOrEqual(Mask[i], i+2))
-      return false;
-  for (unsigned i = 2; i != 4; ++i)
-    if (!isUndefOrEqual(Mask[i], i+4))
-      return false;
-  return true;
-}
-
-/// isScalarLoadToVector - Returns true if the node is a scalar load that
-/// is promoted to a vector. It also returns the LoadSDNode by reference if
-/// required.
-static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = nullptr) {
-  if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
-    return false;
-  N = N->getOperand(0).getNode();
-  if (!ISD::isNON_EXTLoad(N))
-    return false;
-  if (LD)
-    *LD = cast<LoadSDNode>(N);
-  return true;
-}
-
-// Test whether the given value is a vector value which will be legalized
-// into a load.
-static bool WillBeConstantPoolLoad(SDNode *N) {
-  if (N->getOpcode() != ISD::BUILD_VECTOR)
-    return false;
-
-  // Check for any non-constant elements.
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    switch (N->getOperand(i).getNode()->getOpcode()) {
-    case ISD::UNDEF:
-    case ISD::ConstantFP:
-    case ISD::Constant:
-      break;
-    default:
-      return false;
-    }
-
-  // Vectors of all-zeros and all-ones are materialized with special
-  // instructions rather than being loaded.
-  return !ISD::isBuildVectorAllZeros(N) &&
-         !ISD::isBuildVectorAllOnes(N);
-}
-
-/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
-/// match movlp{s|d}. The lower half elements should come from lower half of
-/// V1 (and in order), and the upper half elements should come from the upper
-/// half of V2 (and in order). And since V1 will become the source of the
-/// MOVLP, it must be either a vector load or a scalar load to vector.
-static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
-                               ArrayRef<int> Mask, MVT VT) {
-  if (!VT.is128BitVector())
-    return false;
-
-  if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
-    return false;
-  // Is V2 is a vector load, don't do this transformation. We will try to use
-  // load folding shufps op.
-  if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2))
-    return false;
-
-  unsigned NumElems = VT.getVectorNumElements();
-
-  if (NumElems != 2 && NumElems != 4)
-    return false;
-  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
-    if (!isUndefOrEqual(Mask[i], i))
-      return false;
-  for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
-    if (!isUndefOrEqual(Mask[i], i+NumElems))
-      return false;
-  return true;
-}
-
-/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
-/// to an zero vector.
-/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
-static bool isZeroShuffle(ShuffleVectorSDNode *N) {
-  SDValue V1 = N->getOperand(0);
-  SDValue V2 = N->getOperand(1);
-  unsigned NumElems = N->getValueType(0).getVectorNumElements();
-  for (unsigned i = 0; i != NumElems; ++i) {
-    int Idx = N->getMaskElt(i);
-    if (Idx >= (int)NumElems) {
-      unsigned Opc = V2.getOpcode();
-      if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
-        continue;
-      if (Opc != ISD::BUILD_VECTOR ||
-          !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
-        return false;
-    } else if (Idx >= 0) {
-      unsigned Opc = V1.getOpcode();
-      if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
-        continue;
-      if (Opc != ISD::BUILD_VECTOR ||
-          !X86::isZeroNode(V1.getOperand(Idx)))
-        return false;
-    }
-  }
-  return true;
-}
-
 /// getZeroVector - Returns a vector of specified type with all zero elements.
 ///
 static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
@@ -5152,33 +4038,37 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
   SDValue Vec;
   if (VT.is128BitVector()) {  // SSE
     if (Subtarget->hasSSE2()) {  // SSE2
-      SDValue Cst = DAG.getConstant(0, MVT::i32);
+      SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
     } else { // SSE1
-      SDValue Cst = DAG.getConstantFP(+0.0, MVT::f32);
+      SDValue Cst = DAG.getConstantFP(+0.0, dl, MVT::f32);
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
     }
   } else if (VT.is256BitVector()) { // AVX
     if (Subtarget->hasInt256()) { // AVX2
-      SDValue Cst = DAG.getConstant(0, MVT::i32);
+      SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
     } else {
       // 256-bit logic and arithmetic instructions in AVX are all
       // floating-point, no support for integer ops. Emit fp zeroed vectors.
-      SDValue Cst = DAG.getConstantFP(+0.0, MVT::f32);
+      SDValue Cst = DAG.getConstantFP(+0.0, dl, MVT::f32);
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops);
     }
   } else if (VT.is512BitVector()) { // AVX-512
-      SDValue Cst = DAG.getConstant(0, MVT::i32);
+      SDValue Cst = DAG.getConstant(0, dl, MVT::i32);
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
                         Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
   } else if (VT.getScalarType() == MVT::i1) {
-    assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
-    SDValue Cst = DAG.getConstant(0, MVT::i1);
-    SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
+
+    assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16)
+            && "Unexpected vector type");
+    assert((Subtarget->hasVLX() || VT.getVectorNumElements() >= 8)
+            && "Unexpected vector type");
+    SDValue Cst = DAG.getConstant(0, dl, MVT::i1);
+    SmallVector<SDValue, 64> Ops(VT.getVectorNumElements(), Cst);
     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
   } else
     llvm_unreachable("Unexpected vector type");
@@ -5186,6 +4076,162 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
 }
 
+static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
+                                SelectionDAG &DAG, SDLoc dl,
+                                unsigned vectorWidth) {
+  assert((vectorWidth == 128 || vectorWidth == 256) &&
+         "Unsupported vector width");
+  EVT VT = Vec.getValueType();
+  EVT ElVT = VT.getVectorElementType();
+  unsigned Factor = VT.getSizeInBits()/vectorWidth;
+  EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
+                                  VT.getVectorNumElements()/Factor);
+
+  // Extract from UNDEF is UNDEF.
+  if (Vec.getOpcode() == ISD::UNDEF)
+    return DAG.getUNDEF(ResultVT);
+
+  // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
+  unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
+
+  // This is the index of the first element of the vectorWidth-bit chunk
+  // we want.
+  unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
+                               * ElemsPerChunk);
+
+  // If the input is a buildvector just emit a smaller one.
+  if (Vec.getOpcode() == ISD::BUILD_VECTOR)
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
+                       makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
+                                    ElemsPerChunk));
+
+  SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
+}
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
+/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
+/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
+/// instructions or a simple subregister reference. Idx is an index in the
+/// 128 bits we want.  It need not be aligned to a 128-bit boundary.  That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
+                                   SelectionDAG &DAG, SDLoc dl) {
+  assert((Vec.getValueType().is256BitVector() ||
+          Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
+  return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
+}
+
+/// Generate a DAG to grab 256-bits from a 512-bit vector.
+static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
+                                   SelectionDAG &DAG, SDLoc dl) {
+  assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
+  return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
+}
+
+static SDValue InsertSubVector(SDValue Result, SDValue Vec,
+                               unsigned IdxVal, SelectionDAG &DAG,
+                               SDLoc dl, unsigned vectorWidth) {
+  assert((vectorWidth == 128 || vectorWidth == 256) &&
+         "Unsupported vector width");
+  // Inserting UNDEF is Result
+  if (Vec.getOpcode() == ISD::UNDEF)
+    return Result;
+  EVT VT = Vec.getValueType();
+  EVT ElVT = VT.getVectorElementType();
+  EVT ResultVT = Result.getValueType();
+
+  // Insert the relevant vectorWidth bits.
+  unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
+
+  // This is the index of the first element of the vectorWidth-bit chunk
+  // we want.
+  unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
+                               * ElemsPerChunk);
+
+  SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+  return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits.  This
+/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
+/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
+/// simple superregister reference.  Idx is an index in the 128 bits
+/// we want.  It need not be aligned to a 128-bit boundary.  That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
+                                  SelectionDAG &DAG, SDLoc dl) {
+  assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
+
+  // For insertion into the zero index (low half) of a 256-bit vector, it is
+  // more efficient to generate a blend with immediate instead of an insert*128.
+  // We are still creating an INSERT_SUBVECTOR below with an undef node to
+  // extend the subvector to the size of the result vector. Make sure that
+  // we are not recursing on that node by checking for undef here.
+  if (IdxVal == 0 && Result.getValueType().is256BitVector() &&
+      Result.getOpcode() != ISD::UNDEF) {
+    EVT ResultVT = Result.getValueType();
+    SDValue ZeroIndex = DAG.getIntPtrConstant(0, dl);
+    SDValue Undef = DAG.getUNDEF(ResultVT);
+    SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef,
+                                 Vec, ZeroIndex);
+
+    // The blend instruction, and therefore its mask, depend on the data type.
+    MVT ScalarType = ResultVT.getScalarType().getSimpleVT();
+    if (ScalarType.isFloatingPoint()) {
+      // Choose either vblendps (float) or vblendpd (double).
+      unsigned ScalarSize = ScalarType.getSizeInBits();
+      assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type");
+      unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f;
+      SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8);
+      return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask);
+    }
+
+    const X86Subtarget &Subtarget =
+    static_cast<const X86Subtarget &>(DAG.getSubtarget());
+
+    // AVX2 is needed for 256-bit integer blend support.
+    // Integers must be cast to 32-bit because there is only vpblendd;
+    // vpblendw can't be used for this because it has a handicapped mask.
+
+    // If we don't have AVX2, then cast to float. Using a wrong domain blend
+    // is still more efficient than using the wrong domain vinsertf128 that
+    // will be created by InsertSubVector().
+    MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
+
+    SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8);
+    Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256);
+    Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
+    return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256);
+  }
+
+  return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
+}
+
+static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
+                                  SelectionDAG &DAG, SDLoc dl) {
+  assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
+  return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
+}
+
+/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
+/// instructions. This is used because creating CONCAT_VECTOR nodes of
+/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
+/// large BUILD_VECTORS.
+static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
+                                   unsigned NumElems, SelectionDAG &DAG,
+                                   SDLoc dl) {
+  SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
+  return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
+}
+
+static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
+                                   unsigned NumElems, SelectionDAG &DAG,
+                                   SDLoc dl) {
+  SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
+  return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
+}
+
 /// getOnesVector - Returns a vector of specified type with all bits set.
 /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
 /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
@@ -5194,7 +4240,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
                              SDLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
 
-  SDValue Cst = DAG.getConstant(~0U, MVT::i32);
+  SDValue Cst = DAG.getConstant(~0U, dl, MVT::i32);
   SDValue Vec;
   if (VT.is256BitVector()) {
     if (HasInt256) { // AVX2
@@ -5212,16 +4258,6 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
 }
 
-/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
-/// that point to V2 points to its first element.
-static void NormalizeMask(SmallVectorImpl<int> &Mask, unsigned NumElems) {
-  for (unsigned i = 0; i != NumElems; ++i) {
-    if (Mask[i] > (int)NumElems) {
-      Mask[i] = NumElems;
-    }
-  }
-}
-
 /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
 /// operation of specified width.
 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
@@ -5258,92 +4294,6 @@ static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
 }
 
-// PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by
-// a generic shuffle instruction because the target has no such instructions.
-// Generate shuffles which repeat i16 and i8 several times until they can be
-// represented by v4f32 and then be manipulated by target suported shuffles.
-static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
-  MVT VT = V.getSimpleValueType();
-  int NumElems = VT.getVectorNumElements();
-  SDLoc dl(V);
-
-  while (NumElems > 4) {
-    if (EltNo < NumElems/2) {
-      V = getUnpackl(DAG, dl, VT, V, V);
-    } else {
-      V = getUnpackh(DAG, dl, VT, V, V);
-      EltNo -= NumElems/2;
-    }
-    NumElems >>= 1;
-  }
-  return V;
-}
-
-/// getLegalSplat - Generate a legal splat with supported x86 shuffles
-static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
-  MVT VT = V.getSimpleValueType();
-  SDLoc dl(V);
-
-  if (VT.is128BitVector()) {
-    V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
-    int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
-    V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
-                             &SplatMask[0]);
-  } else if (VT.is256BitVector()) {
-    // To use VPERMILPS to splat scalars, the second half of indicies must
-    // refer to the higher part, which is a duplication of the lower one,
-    // because VPERMILPS can only handle in-lane permutations.
-    int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo,
-                         EltNo+4, EltNo+4, EltNo+4, EltNo+4 };
-
-    V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V);
-    V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32),
-                             &SplatMask[0]);
-  } else
-    llvm_unreachable("Vector size not supported");
-
-  return DAG.getNode(ISD::BITCAST, dl, VT, V);
-}
-
-/// PromoteSplat - Splat is promoted to target supported vector shuffles.
-static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
-  MVT SrcVT = SV->getSimpleValueType(0);
-  SDValue V1 = SV->getOperand(0);
-  SDLoc dl(SV);
-
-  int EltNo = SV->getSplatIndex();
-  int NumElems = SrcVT.getVectorNumElements();
-  bool Is256BitVec = SrcVT.is256BitVector();
-
-  assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) &&
-         "Unknown how to promote splat for type");
-
-  // Extract the 128-bit part containing the splat element and update
-  // the splat element index when it refers to the higher register.
-  if (Is256BitVec) {
-    V1 = Extract128BitVector(V1, EltNo, DAG, dl);
-    if (EltNo >= NumElems/2)
-      EltNo -= NumElems/2;
-  }
-
-  // All i16 and i8 vector types can't be used directly by a generic shuffle
-  // instruction because the target has no such instruction. Generate shuffles
-  // which repeat i16 and i8 several times until they fit in i32, and then can
-  // be manipulated by target suported shuffles.
-  MVT EltVT = SrcVT.getVectorElementType();
-  if (EltVT == MVT::i8 || EltVT == MVT::i16)
-    V1 = PromoteSplati8i16(V1, DAG, EltNo);
-
-  // Recreate the 256-bit vector and place the same 128-bit vector
-  // into the low and high part. This is necessary because we want
-  // to use VPERM* to shuffle the vectors
-  if (Is256BitVec) {
-    V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1);
-  }
-
-  return getLegalSplat(DAG, V1, EltNo);
-}
-
 /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
 /// vector of zero or undef vector.  This produces a shuffle where the low
 /// element of V2 is swizzled into the zero/undef vector, landing at element
@@ -5467,7 +4417,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
       return false;
 
     SDValue Ptr = MaskLoad->getBasePtr();
-    if (Ptr->getOpcode() == X86ISD::Wrapper)
+    if (Ptr->getOpcode() == X86ISD::Wrapper ||
+        Ptr->getOpcode() == X86ISD::WrapperRIP)
       Ptr = Ptr->getOperand(0);
 
     auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
@@ -5489,16 +4440,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
     IsUnary = true;
     break;
   case X86ISD::MOVSS:
-  case X86ISD::MOVSD: {
-    // The index 0 always comes from the first element of the second source,
-    // this is why MOVSS and MOVSD are used in the first place. The other
-    // elements come from the other positions of the first source vector
-    Mask.push_back(NumElems);
-    for (unsigned i = 1; i != NumElems; ++i) {
-      Mask.push_back(i);
-    }
+  case X86ISD::MOVSD:
+    DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
     break;
-  }
   case X86ISD::VPERM2X128:
     ImmN = N->getOperand(N->getNumOperands()-1);
     DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
@@ -5506,11 +4450,16 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
     break;
   case X86ISD::MOVSLDUP:
     DecodeMOVSLDUPMask(VT, Mask);
+    IsUnary = true;
     break;
   case X86ISD::MOVSHDUP:
     DecodeMOVSHDUPMask(VT, Mask);
+    IsUnary = true;
     break;
   case X86ISD::MOVDDUP:
+    DecodeMOVDDUPMask(VT, Mask);
+    IsUnary = true;
+    break;
   case X86ISD::MOVLHPD:
   case X86ISD::MOVLPD:
   case X86ISD::MOVLPS:
@@ -5594,148 +4543,6 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// getNumOfConsecutiveZeros - Return the number of elements of a vector
-/// shuffle operation which come from a consecutively from a zero. The
-/// search can start in two different directions, from left or right.
-/// We count undefs as zeros until PreferredNum is reached.
-static unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp,
-                                         unsigned NumElems, bool ZerosFromLeft,
-                                         SelectionDAG &DAG,
-                                         unsigned PreferredNum = -1U) {
-  unsigned NumZeros = 0;
-  for (unsigned i = 0; i != NumElems; ++i) {
-    unsigned Index = ZerosFromLeft ? i : NumElems - i - 1;
-    SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
-    if (!Elt.getNode())
-      break;
-
-    if (X86::isZeroNode(Elt))
-      ++NumZeros;
-    else if (Elt.getOpcode() == ISD::UNDEF) // Undef as zero up to PreferredNum.
-      NumZeros = std::min(NumZeros + 1, PreferredNum);
-    else
-      break;
-  }
-
-  return NumZeros;
-}
-
-/// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE)
-/// correspond consecutively to elements from one of the vector operands,
-/// starting from its index OpIdx. Also tell OpNum which source vector operand.
-static
-bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
-                              unsigned MaskI, unsigned MaskE, unsigned OpIdx,
-                              unsigned NumElems, unsigned &OpNum) {
-  bool SeenV1 = false;
-  bool SeenV2 = false;
-
-  for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) {
-    int Idx = SVOp->getMaskElt(i);
-    // Ignore undef indicies
-    if (Idx < 0)
-      continue;
-
-    if (Idx < (int)NumElems)
-      SeenV1 = true;
-    else
-      SeenV2 = true;
-
-    // Only accept consecutive elements from the same vector
-    if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
-      return false;
-  }
-
-  OpNum = SeenV1 ? 0 : 1;
-  return true;
-}
-
-/// isVectorShiftRight - Returns true if the shuffle can be implemented as a
-/// logical left shift of a vector.
-static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
-                               bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
-  unsigned NumElems =
-    SVOp->getSimpleValueType(0).getVectorNumElements();
-  unsigned NumZeros = getNumOfConsecutiveZeros(
-      SVOp, NumElems, false /* check zeros from right */, DAG,
-      SVOp->getMaskElt(0));
-  unsigned OpSrc;
-
-  if (!NumZeros)
-    return false;
-
-  // Considering the elements in the mask that are not consecutive zeros,
-  // check if they consecutively come from only one of the source vectors.
-  //
-  //               V1 = {X, A, B, C}     0
-  //                         \  \  \    /
-  //   vector_shuffle V1, V2 <1, 2, 3, X>
-  //
-  if (!isShuffleMaskConsecutive(SVOp,
-            0,                   // Mask Start Index
-            NumElems-NumZeros,   // Mask End Index(exclusive)
-            NumZeros,            // Where to start looking in the src vector
-            NumElems,            // Number of elements in vector
-            OpSrc))              // Which source operand ?
-    return false;
-
-  isLeft = false;
-  ShAmt = NumZeros;
-  ShVal = SVOp->getOperand(OpSrc);
-  return true;
-}
-
-/// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
-/// logical left shift of a vector.
-static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
-                              bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
-  unsigned NumElems =
-    SVOp->getSimpleValueType(0).getVectorNumElements();
-  unsigned NumZeros = getNumOfConsecutiveZeros(
-      SVOp, NumElems, true /* check zeros from left */, DAG,
-      NumElems - SVOp->getMaskElt(NumElems - 1) - 1);
-  unsigned OpSrc;
-
-  if (!NumZeros)
-    return false;
-
-  // Considering the elements in the mask that are not consecutive zeros,
-  // check if they consecutively come from only one of the source vectors.
-  //
-  //                           0    { A, B, X, X } = V2
-  //                          / \    /  /
-  //   vector_shuffle V1, V2 <X, X, 4, 5>
-  //
-  if (!isShuffleMaskConsecutive(SVOp,
-            NumZeros,     // Mask Start Index
-            NumElems,     // Mask End Index(exclusive)
-            0,            // Where to start looking in the src vector
-            NumElems,     // Number of elements in vector
-            OpSrc))       // Which source operand ?
-    return false;
-
-  isLeft = true;
-  ShAmt = NumZeros;
-  ShVal = SVOp->getOperand(OpSrc);
-  return true;
-}
-
-/// isVectorShift - Returns true if the shuffle can be implemented as a
-/// logical left or right shift of a vector.
-static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
-                          bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
-  // Although the logic below support any bitwidth size, there are no
-  // shift instructions which handle more than 128-bit vectors.
-  if (!SVOp->getSimpleValueType(0).is128BitVector())
-    return false;
-
-  if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
-      isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
-    return true;
-
-  return false;
-}
-
 /// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
 ///
 static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
@@ -5749,6 +4556,29 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
   SDLoc dl(Op);
   SDValue V;
   bool First = true;
+
+  // SSE4.1 - use PINSRB to insert each byte directly.
+  if (Subtarget->hasSSE41()) {
+    for (unsigned i = 0; i < 16; ++i) {
+      bool isNonZero = (NonZeros & (1 << i)) != 0;
+      if (isNonZero) {
+        if (First) {
+          if (NumZero)
+            V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
+          else
+            V = DAG.getUNDEF(MVT::v16i8);
+          First = false;
+        }
+        V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+                        MVT::v16i8, V, Op.getOperand(i),
+                        DAG.getIntPtrConstant(i, dl));
+      }
+    }
+
+    return V;
+  }
+
+  // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
   for (unsigned i = 0; i < 16; ++i) {
     bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
     if (ThisIsNonZero && First) {
@@ -5769,7 +4599,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
       if (ThisIsNonZero) {
         ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
         ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
-                              ThisElt, DAG.getConstant(8, MVT::i8));
+                              ThisElt, DAG.getConstant(8, dl, MVT::i8));
         if (LastIsNonZero)
           ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
       } else
@@ -5777,7 +4607,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
 
       if (ThisElt.getNode())
         V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
-                        DAG.getIntPtrConstant(i/2));
+                        DAG.getIntPtrConstant(i/2, dl));
     }
   }
 
@@ -5809,7 +4639,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
       }
       V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
                       MVT::v8i16, V, Op.getOperand(i),
-                      DAG.getIntPtrConstant(i));
+                      DAG.getIntPtrConstant(i, dl));
     }
   }
 
@@ -5821,13 +4651,12 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
                                      const X86Subtarget *Subtarget,
                                      const TargetLowering &TLI) {
   // Find all zeroable elements.
-  bool Zeroable[4];
+  std::bitset<4> Zeroable;
   for (int i=0; i < 4; ++i) {
     SDValue Elt = Op->getOperand(i);
     Zeroable[i] = (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt));
   }
-  assert(std::count_if(&Zeroable[0], &Zeroable[4],
-                       [](bool M) { return !M; }) > 1 &&
+  assert(Zeroable.size() - Zeroable.count() > 1 &&
          "We expect at least two non-zero elements!");
 
   // We only know how to deal with build_vector nodes where elements are either
@@ -5913,31 +4742,29 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
     V2 = DAG.getNode(ISD::BITCAST, SDLoc(V2), MVT::v4f32, V2);
 
   // Ok, we can emit an INSERTPS instruction.
-  unsigned ZMask = 0;
-  for (int i = 0; i < 4; ++i)
-    if (Zeroable[i])
-      ZMask |= 1 << i;
+  unsigned ZMask = Zeroable.to_ulong();
 
   unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
   assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
-  SDValue Result = DAG.getNode(X86ISD::INSERTPS, SDLoc(Op), MVT::v4f32, V1, V2,
-                               DAG.getIntPtrConstant(InsertPSMask));
-  return DAG.getNode(ISD::BITCAST, SDLoc(Op), VT, Result);
+  SDLoc DL(Op);
+  SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
+                               DAG.getIntPtrConstant(InsertPSMask, DL));
+  return DAG.getNode(ISD::BITCAST, DL, VT, Result);
 }
 
-/// getVShift - Return a vector logical shift node.
-///
+/// Return a vector logical shift node.
 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
                          unsigned NumBits, SelectionDAG &DAG,
                          const TargetLowering &TLI, SDLoc dl) {
   assert(VT.is128BitVector() && "Unknown type for VShift");
-  EVT ShVT = MVT::v2i64;
+  MVT ShVT = MVT::v2i64;
   unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
   SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
+  MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType());
+  assert(NumBits % 8 == 0 && "Only support byte sized shifts");
+  SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
   return DAG.getNode(ISD::BITCAST, dl, VT,
-                     DAG.getNode(Opc, dl, ShVT, SrcOp,
-                             DAG.getConstant(NumBits,
-                                  TLI.getScalarShiftAmountTy(SrcOp.getValueType()))));
+                     DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
 }
 
 static SDValue
@@ -5992,9 +4819,11 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
     if ((Offset % RequiredAlign) & 3)
       return SDValue();
     int64_t StartOffset = Offset & ~(RequiredAlign-1);
-    if (StartOffset)
-      Ptr = DAG.getNode(ISD::ADD, SDLoc(Ptr), Ptr.getValueType(),
-                        Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
+    if (StartOffset) {
+      SDLoc DL(Ptr);
+      Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+                        DAG.getConstant(StartOffset, DL, Ptr.getValueType()));
+    }
 
     int EltNo = (Offset - StartOffset) >> 2;
     unsigned NumElems = VT.getVectorNumElements();
@@ -6004,9 +4833,7 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
                              LD->getPointerInfo().getWithOffset(StartOffset),
                              false, false, false, 0);
 
-    SmallVector<int, 8> Mask;
-    for (unsigned i = 0; i != NumElems; ++i)
-      Mask.push_back(EltNo);
+    SmallVector<int, 8> Mask(NumElems, EltNo);
 
     return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
   }
@@ -6014,19 +4841,18 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
   return SDValue();
 }
 
-/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
-/// vector of type 'VT', see if the elements can be replaced by a single large
-/// load which has the same value as a build_vector whose operands are 'elts'.
+/// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
+/// elements can be replaced by a single large load which has the same value as
+/// a build_vector or insert_subvector whose loaded operands are 'Elts'.
 ///
 /// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
 ///
 /// FIXME: we'd also like to handle the case where the last elements are zero
 /// rather than undef via VZEXT_LOAD, but we do not detect that case today.
 /// There's even a handy isZeroNode for that purpose.
-static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
+static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
                                         SDLoc &DL, SelectionDAG &DAG,
                                         bool isAfterLegalize) {
-  EVT EltVT = VT.getVectorElementType();
   unsigned NumElems = Elts.size();
 
   LoadSDNode *LDBase = nullptr;
@@ -6037,7 +4863,9 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
   // non-consecutive, bail out.
   for (unsigned i = 0; i < NumElems; ++i) {
     SDValue Elt = Elts[i];
-
+    // Look through a bitcast.
+    if (Elt.getNode() && Elt.getOpcode() == ISD::BITCAST)
+      Elt = Elt.getOperand(0);
     if (!Elt.getNode() ||
         (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
       return SDValue();
@@ -6052,7 +4880,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
       continue;
 
     LoadSDNode *LD = cast<LoadSDNode>(Elt);
-    if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
+    EVT LdVT = Elt.getValueType();
+    // Each loaded element must be the correct fractional portion of the
+    // requested vector load.
+    if (LdVT.getSizeInBits() != VT.getSizeInBits() / NumElems)
+      return SDValue();
+    if (!DAG.isConsecutiveLoad(LD, LDBase, LdVT.getSizeInBits() / 8, i))
       return SDValue();
     LastLoadedElt = i;
   }
@@ -6061,6 +4894,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
   // load of the entire vector width starting at the base pointer.  If we found
   // consecutive loads for the low half, generate a vzext_load node.
   if (LastLoadedElt == NumElems - 1) {
+    assert(LDBase && "Did not find base load for merging consecutive loads");
+    EVT EltVT = LDBase->getValueType(0);
+    // Ensure that the input vector size for the merged loads matches the
+    // cumulative size of the input elements.
+    if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
+      return SDValue();
 
     if (isAfterLegalize &&
         !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
@@ -6087,6 +4926,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
 
   //TODO: The code below fires only for for loading the low v2i32 / v2f32
   //of a v4i32 / v4f32. It's probably worth generalizing.
+  EVT EltVT = VT.getVectorElementType();
   if (NumElems == 4 && LastLoadedElt == 1 && (EltVT.getSizeInBits() == 32) &&
       DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
@@ -6212,8 +5052,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
   // it may be detrimental to overall size. There needs to be a way to detect
   // that condition to know if this is truly a size win.
   const Function *F = DAG.getMachineFunction().getFunction();
-  bool OptForSize = F->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize);
 
   // Handle broadcasting a single constant scalar from the constant pool
   // into a vector.
@@ -6377,95 +5216,117 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
   for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
     unsigned Idx = InsertIndices[i];
     NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
-                     DAG.getIntPtrConstant(Idx));
+                     DAG.getIntPtrConstant(Idx, DL));
   }
 
   return NV;
 }
 
+static SDValue ConvertI1VectorToInterger(SDValue Op, SelectionDAG &DAG) {
+  assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
+         Op.getScalarValueSizeInBits() == 1 &&
+         "Can not convert non-constant vector");
+  uint64_t Immediate = 0;
+  for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
+    SDValue In = Op.getOperand(idx);
+    if (In.getOpcode() != ISD::UNDEF)
+      Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
+  }
+  SDLoc dl(Op);
+  MVT VT =
+   MVT::getIntegerVT(std::max((int)Op.getValueType().getSizeInBits(), 8));
+  return DAG.getConstant(Immediate, dl, VT);
+}
 // Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
 SDValue
 X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
 
   MVT VT = Op.getSimpleValueType();
-  assert((VT.getVectorElementType() == MVT::i1) && (VT.getSizeInBits() <= 16) &&
+  assert((VT.getVectorElementType() == MVT::i1) &&
          "Unexpected type in LowerBUILD_VECTORvXi1!");
 
   SDLoc dl(Op);
   if (ISD::isBuildVectorAllZeros(Op.getNode())) {
-    SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
+    SDValue Cst = DAG.getTargetConstant(0, dl, MVT::i1);
     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
   }
 
   if (ISD::isBuildVectorAllOnes(Op.getNode())) {
-    SDValue Cst = DAG.getTargetConstant(1, MVT::i1);
+    SDValue Cst = DAG.getTargetConstant(1, dl, MVT::i1);
     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
   }
 
-  bool AllContants = true;
+  if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
+    SDValue Imm = ConvertI1VectorToInterger(Op, DAG);
+    if (Imm.getValueSizeInBits() == VT.getSizeInBits())
+      return DAG.getNode(ISD::BITCAST, dl, VT, Imm);
+    SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
+                        DAG.getIntPtrConstant(0, dl));
+  }
+
+  // Vector has one or more non-const elements
   uint64_t Immediate = 0;
-  int NonConstIdx = -1;
+  SmallVector<unsigned, 16> NonConstIdx;
   bool IsSplat = true;
-  unsigned NumNonConsts = 0;
-  unsigned NumConsts = 0;
+  bool HasConstElts = false;
+  int SplatIdx = -1;
   for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
     SDValue In = Op.getOperand(idx);
     if (In.getOpcode() == ISD::UNDEF)
       continue;
-    if (!isa<ConstantSDNode>(In)) {
-      AllContants = false;
-      NonConstIdx = idx;
-      NumNonConsts++;
-    } else {
-      NumConsts++;
-      if (cast<ConstantSDNode>(In)->getZExtValue())
-      Immediate |= (1ULL << idx);
+    if (!isa<ConstantSDNode>(In)) 
+      NonConstIdx.push_back(idx);
+    else {
+      Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
+      HasConstElts = true;
     }
-    if (In != Op.getOperand(0))
+    if (SplatIdx == -1)
+      SplatIdx = idx;
+    else if (In != Op.getOperand(SplatIdx))
       IsSplat = false;
   }
 
-  if (AllContants) {
-    SDValue FullMask = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1,
-      DAG.getConstant(Immediate, MVT::i16));
-    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, FullMask,
-                       DAG.getIntPtrConstant(0));
+  // for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
+  if (IsSplat)
+    return DAG.getNode(ISD::SELECT, dl, VT, Op.getOperand(SplatIdx),
+                       DAG.getConstant(1, dl, VT),
+                       DAG.getConstant(0, dl, VT));
+
+  // insert elements one by one
+  SDValue DstVec;
+  SDValue Imm;
+  if (Immediate) {
+    MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
+    Imm = DAG.getConstant(Immediate, dl, ImmVT);
+  }
+  else if (HasConstElts)
+    Imm = DAG.getConstant(0, dl, VT);
+  else 
+    Imm = DAG.getUNDEF(VT);
+  if (Imm.getValueSizeInBits() == VT.getSizeInBits())
+    DstVec = DAG.getNode(ISD::BITCAST, dl, VT, Imm);
+  else {
+    SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
+    DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
+                         DAG.getIntPtrConstant(0, dl));
   }
 
-  if (NumNonConsts == 1 && NonConstIdx != 0) {
-    SDValue DstVec;
-    if (NumConsts) {
-      SDValue VecAsImm = DAG.getConstant(Immediate,
-                                         MVT::getIntegerVT(VT.getSizeInBits()));
-      DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
-    }
-    else
-      DstVec = DAG.getUNDEF(VT);
-    return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
-                       Op.getOperand(NonConstIdx),
-                       DAG.getIntPtrConstant(NonConstIdx));
-  }
-  if (!IsSplat && (NonConstIdx != 0))
-    llvm_unreachable("Unsupported BUILD_VECTOR operation");
-  MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
-  SDValue Select;
-  if (IsSplat)
-    Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
-                          DAG.getConstant(-1, SelectVT),
-                          DAG.getConstant(0, SelectVT));
-  else
-    Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
-                         DAG.getConstant((Immediate | 1), SelectVT),
-                         DAG.getConstant(Immediate, SelectVT));
-  return DAG.getNode(ISD::BITCAST, dl, VT, Select);
+  for (unsigned i = 0; i < NonConstIdx.size(); ++i) {
+    unsigned InsertIdx = NonConstIdx[i];
+    DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
+                         Op.getOperand(InsertIdx),
+                         DAG.getIntPtrConstant(InsertIdx, dl));
+  }
+  return DstVec;
 }
 
 /// \brief Return true if \p N implements a horizontal binop and return the
 /// operands for the horizontal binop into V0 and V1.
 ///
-/// This is a helper function of PerformBUILD_VECTORCombine.
+/// This is a helper function of LowerToHorizontalOp().
 /// This function checks that the build_vector \p N in input implements a
 /// horizontal operation. Parameter \p Opcode defines the kind of horizontal
 /// operation to match.
@@ -6528,11 +5389,17 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
     unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
 
     if (i * 2 < NumElts) {
-      if (V0.getOpcode() == ISD::UNDEF)
+      if (V0.getOpcode() == ISD::UNDEF) {
         V0 = Op0.getOperand(0);
+        if (V0.getValueType() != VT)
+          return false;
+      }
     } else {
-      if (V1.getOpcode() == ISD::UNDEF)
+      if (V1.getOpcode() == ISD::UNDEF) {
         V1 = Op0.getOperand(0);
+        if (V1.getValueType() != VT)
+          return false;
+      }
       if (i * 2 == NumElts)
         ExpectedVExtractIdx = BaseIdx;
     }
@@ -6556,7 +5423,7 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
 /// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
 /// a concat_vector.
 ///
-/// This is a helper function of PerformBUILD_VECTORCombine.
+/// This is a helper function of LowerToHorizontalOp().
 /// This function expects two 256-bit vectors called V0 and V1.
 /// At first, each vector is split into two separate 128-bit vectors.
 /// Then, the resulting 128-bit vectors are used to implement two
@@ -6622,12 +5489,16 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
 }
 
-/// \brief Try to fold a build_vector that performs an 'addsub' into the
-/// sequence of 'vadd + vsub + blendi'.
-static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
-                           const X86Subtarget *Subtarget) {
-  SDLoc DL(BV);
+/// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB
+/// node.
+static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
+                             const X86Subtarget *Subtarget, SelectionDAG &DAG) {
   EVT VT = BV->getValueType(0);
+  if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
+      (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
+    return SDValue();
+
+  SDLoc DL(BV);
   unsigned NumElts = VT.getVectorNumElements();
   SDValue InVec0 = DAG.getUNDEF(VT);
   SDValue InVec1 = DAG.getUNDEF(VT);
@@ -6644,7 +5515,7 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
   bool AddFound = false;
   bool SubFound = false;
 
-  for (unsigned i = 0, e = NumElts; i != e; i++) {
+  for (unsigned i = 0, e = NumElts; i != e; ++i) {
     SDValue Op = BV->getOperand(i);
 
     // Skip 'undef' values.
@@ -6682,10 +5553,16 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
       SubFound = true;
 
     // Update InVec0 and InVec1.
-    if (InVec0.getOpcode() == ISD::UNDEF)
+    if (InVec0.getOpcode() == ISD::UNDEF) {
       InVec0 = Op0.getOperand(0);
-    if (InVec1.getOpcode() == ISD::UNDEF)
+      if (InVec0.getValueType() != VT)
+        return SDValue();
+    }
+    if (InVec1.getOpcode() == ISD::UNDEF) {
       InVec1 = Op1.getOperand(0);
+      if (InVec1.getValueType() != VT)
+        return SDValue();
+    }
 
     // Make sure that operands in input to each add/sub node always
     // come from a same pair of vectors.
@@ -6715,23 +5592,12 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
   return SDValue();
 }
 
-static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
-                                          const X86Subtarget *Subtarget) {
-  SDLoc DL(N);
-  EVT VT = N->getValueType(0);
+/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
+static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
+                                   const X86Subtarget *Subtarget,
+                                   SelectionDAG &DAG) {
+  EVT VT = BV->getValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
-  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
-  SDValue InVec0, InVec1;
-
-  // Try to match an ADDSUB.
-  if ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
-      (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) {
-    SDValue Value = matchAddSub(BV, DAG, Subtarget);
-    if (Value.getNode())
-      return Value;
-  }
-
-  // Try to match horizontal ADD/SUB.
   unsigned NumUndefsLO = 0;
   unsigned NumUndefsHI = 0;
   unsigned Half = NumElts/2;
@@ -6750,6 +5616,8 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
   if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
     return SDValue();
 
+  SDLoc DL(BV);
+  SDValue InVec0, InVec1;
   if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
     // Try to match an SSE3 float HADD/HSUB.
     if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
@@ -6894,8 +5762,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
   }
 
-  SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
-  if (Broadcast.getNode())
+  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
+  if (SDValue AddSub = LowerToAddSub(BV, Subtarget, DAG))
+    return AddSub;
+  if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
+    return HorizontalOp;
+  if (SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG))
     return Broadcast;
 
   unsigned EVTBits = ExtVT.getSizeInBits();
@@ -6941,32 +5813,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
         // Handle SSE only.
         assert(VT == MVT::v2i64 && "Expected an SSE value type!");
         EVT VecVT = MVT::v4i32;
-        unsigned VecElts = 4;
 
         // Truncate the value (which may itself be a constant) to i32, and
         // convert it to a vector with movd (S2V+shuffle to zero extend).
         Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
-
-        // If using the new shuffle lowering, just directly insert this.
-        if (ExperimentalVectorShuffleLowering)
-          return DAG.getNode(
-              ISD::BITCAST, dl, VT,
-              getShuffleVectorZeroOrUndef(Item, Idx * 2, true, Subtarget, DAG));
-
-        Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
-
-        // Now we have our 32-bit value zero extended in the low element of
-        // a vector.  If Idx != 0, swizzle it into place.
-        if (Idx != 0) {
-          SmallVector<int, 4> Mask;
-          Mask.push_back(Idx);
-          for (unsigned i = 1; i != VecElts; ++i)
-            Mask.push_back(i);
-          Item = DAG.getVectorShuffle(VecVT, dl, Item, DAG.getUNDEF(VecVT),
-                                      &Mask[0]);
-        }
-        return DAG.getNode(ISD::BITCAST, dl, VT, Item);
+        return DAG.getNode(
+            ISD::BITCAST, dl, VT,
+            getShuffleVectorZeroOrUndef(Item, Idx * 2, true, Subtarget, DAG));
       }
     }
 
@@ -6980,25 +5834,36 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 
       if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
           (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
-        if (VT.is256BitVector() || VT.is512BitVector()) {
+        if (VT.is512BitVector()) {
           SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
           return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
-                             Item, DAG.getIntPtrConstant(0));
+                             Item, DAG.getIntPtrConstant(0, dl));
         }
-        assert(VT.is128BitVector() && "Expected an SSE value type!");
+        assert((VT.is128BitVector() || VT.is256BitVector()) &&
+               "Expected an SSE value type!");
         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
         // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
         return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
       }
 
+      // We can't directly insert an i8 or i16 into a vector, so zero extend
+      // it to i32 first.
       if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
-        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
         if (VT.is256BitVector()) {
-          SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
-          Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
+          if (Subtarget->hasAVX()) {
+            Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item);
+            Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
+          } else {
+            // Without AVX, we need to extend to a 128-bit vector and then
+            // insert into the 256-bit vector.
+            Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
+            SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
+            Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
+          }
         } else {
           assert(VT.is128BitVector() && "Expected an SSE value type!");
+          Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
           Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
         }
         return DAG.getNode(ISD::BITCAST, dl, VT, Item);
@@ -7026,17 +5891,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     // place.
     if (EVTBits == 32) {
       Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
-
-      // If using the new shuffle lowering, just directly insert this.
-      if (ExperimentalVectorShuffleLowering)
-        return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
-
-      // Turn it into a shuffle of zero and zero-extended scalar to vector.
-      Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG);
-      SmallVector<int, 8> MaskVec;
-      for (unsigned i = 0; i != NumElems; ++i)
-        MaskVec.push_back(i == Idx ? 0 : 1);
-      return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
+      return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
     }
   }
 
@@ -7064,9 +5919,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   // elements, otherwise build the individual 128-bit pieces and use
   // shuffles to put them in place.
   if (VT.is256BitVector() || VT.is512BitVector()) {
-    SmallVector<SDValue, 64> V;
-    for (unsigned i = 0; i != NumElems; ++i)
-      V.push_back(Op.getOperand(i));
+    SmallVector<SDValue, 64> V(Op->op_begin(), Op->op_begin() + NumElems);
 
     // Check for a build vector of consecutive loads.
     if (SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false))
@@ -7099,24 +5952,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // If element VT is < 32 bits, convert it to inserts into a zero vector.
-  if (EVTBits == 8 && NumElems == 16) {
-    SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
-                                        Subtarget, *this);
-    if (V.getNode()) return V;
-  }
+  if (EVTBits == 8 && NumElems == 16)
+    if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
+                                        Subtarget, *this))
+      return V;
 
-  if (EVTBits == 16 && NumElems == 8) {
-    SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
-                                      Subtarget, *this);
-    if (V.getNode()) return V;
-  }
+  if (EVTBits == 16 && NumElems == 8)
+    if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
+                                      Subtarget, *this))
+      return V;
 
   // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
-  if (EVTBits == 32 && NumElems == 4) {
-    SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this);
-    if (V.getNode())
+  if (EVTBits == 32 && NumElems == 4)
+    if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this))
       return V;
-  }
 
   // If element VT is == 32 bits, turn it into a number of shuffles.
   SmallVector<SDValue, 8> V(NumElems);
@@ -7164,17 +6013,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       V[i] = Op.getOperand(i);
 
     // Check for elements which are consecutive loads.
-    SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
-    if (LD.getNode())
+    if (SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false))
       return LD;
 
     // Check for a build vector from mostly shuffle plus few inserting.
-    SDValue Sh = buildFromShuffleMostly(Op, DAG);
-    if (Sh.getNode())
+    if (SDValue Sh = buildFromShuffleMostly(Op, DAG))
       return Sh;
 
     // For SSE 4.1, use insertps to put the high elements into the low element.
-    if (getSubtarget()->hasSSE41()) {
+    if (Subtarget->hasSSE41()) {
       SDValue Result;
       if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
         Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
@@ -7184,7 +6031,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       for (unsigned i = 1; i < NumElems; ++i) {
         if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
         Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
-                             Op.getOperand(i), DAG.getIntPtrConstant(i));
+                             Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
       }
       return Result;
     }
@@ -7236,7 +6083,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
   unsigned NumElems = ResVT.getVectorNumElements();
-  if(ResVT.is256BitVector())
+  if (ResVT.is256BitVector())
     return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
 
   if (Op.getNumOperands() == 4) {
@@ -7250,8 +6097,64 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
   return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
 }
 
-static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
-  MVT LLVM_ATTRIBUTE_UNUSED VT = Op.getSimpleValueType();
+static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
+                                       const X86Subtarget *Subtarget,
+                                       SelectionDAG & DAG) {
+  SDLoc dl(Op);
+  MVT ResVT = Op.getSimpleValueType();
+  unsigned NumOfOperands = Op.getNumOperands();
+
+  assert(isPowerOf2_32(NumOfOperands) &&
+         "Unexpected number of operands in CONCAT_VECTORS");
+
+  if (NumOfOperands > 2) {
+    MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
+                                  ResVT.getVectorNumElements()/2);
+    SmallVector<SDValue, 2> Ops;
+    for (unsigned i = 0; i < NumOfOperands/2; i++)
+      Ops.push_back(Op.getOperand(i));
+    SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops);
+    Ops.clear();
+    for (unsigned i = NumOfOperands/2; i < NumOfOperands; i++)
+      Ops.push_back(Op.getOperand(i));
+    SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops);
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+  }
+
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode());
+  bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode());
+
+  if (IsZeroV1 && IsZeroV2)
+    return getZeroVector(ResVT, Subtarget, DAG, dl);
+
+  SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
+  SDValue Undef = DAG.getUNDEF(ResVT);
+  unsigned NumElems = ResVT.getVectorNumElements();
+  SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8);
+
+  V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, ZeroIdx);
+  V2 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V2, ShiftBits);
+  if (IsZeroV1)
+    return V2;
+
+  V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
+  // Zero the upper bits of V1
+  V1 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V1, ShiftBits);
+  V1 = DAG.getNode(X86ISD::VSRLI, dl, ResVT, V1, ShiftBits);
+  if (IsZeroV2)
+    return V1;
+  return DAG.getNode(ISD::OR, dl, ResVT, V1, V2);
+}
+
+static SDValue LowerCONCAT_VECTORS(SDValue Op,
+                                   const X86Subtarget *Subtarget,
+                                   SelectionDAG &DAG) {
+  MVT VT = Op.getSimpleValueType();
+  if (VT.getVectorElementType() == MVT::i1)
+    return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG);
+
   assert((VT.is256BitVector() && Op.getNumOperands() == 2) ||
          (VT.is512BitVector() && (Op.getNumOperands() == 2 ||
           Op.getNumOperands() == 4)));
@@ -7354,38 +6257,40 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
   return true;
 }
 
-// Hide this symbol with an anonymous namespace instead of 'static' so that MSVC
-// 2013 will allow us to use it as a non-type template parameter.
-namespace {
-
-/// \brief Implementation of the \c isShuffleEquivalent variadic functor.
-///
-/// See its documentation for details.
-bool isShuffleEquivalentImpl(ArrayRef<int> Mask, ArrayRef<const int *> Args) {
-  if (Mask.size() != Args.size())
-    return false;
-  for (int i = 0, e = Mask.size(); i < e; ++i) {
-    assert(*Args[i] >= 0 && "Arguments must be positive integers!");
-    if (Mask[i] != -1 && Mask[i] != *Args[i])
-      return false;
-  }
-  return true;
-}
-
-} // namespace
-
 /// \brief Checks whether a shuffle mask is equivalent to an explicit list of
 /// arguments.
 ///
 /// This is a fast way to test a shuffle mask against a fixed pattern:
 ///
-///   if (isShuffleEquivalent(Mask, 3, 2, 1, 0)) { ... }
+///   if (isShuffleEquivalent(Mask, 3, 2, {1, 0})) { ... }
 ///
 /// It returns true if the mask is exactly as wide as the argument list, and
 /// each element of the mask is either -1 (signifying undef) or the value given
 /// in the argument.
-static const VariadicFunction1<
-    bool, ArrayRef<int>, int, isShuffleEquivalentImpl> isShuffleEquivalent = {};
+static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef<int> Mask,
+                                ArrayRef<int> ExpectedMask) {
+  if (Mask.size() != ExpectedMask.size())
+    return false;
+
+  int Size = Mask.size();
+
+  // If the values are build vectors, we can look through them to find
+  // equivalent inputs that make the shuffles equivalent.
+  auto *BV1 = dyn_cast<BuildVectorSDNode>(V1);
+  auto *BV2 = dyn_cast<BuildVectorSDNode>(V2);
+
+  for (int i = 0; i < Size; ++i)
+    if (Mask[i] != -1 && Mask[i] != ExpectedMask[i]) {
+      auto *MaskBV = Mask[i] < Size ? BV1 : BV2;
+      auto *ExpectedBV = ExpectedMask[i] < Size ? BV1 : BV2;
+      if (!MaskBV || !ExpectedBV ||
+          MaskBV->getOperand(Mask[i] % Size) !=
+              ExpectedBV->getOperand(ExpectedMask[i] % Size))
+        return false;
+    }
+
+  return true;
+}
 
 /// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
 ///
@@ -7395,7 +6300,7 @@ static const VariadicFunction1<
 /// example.
 ///
 /// NB: We rely heavily on "undef" masks preserving the input lane.
-static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask,
+static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
                                           SelectionDAG &DAG) {
   assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
   assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");
@@ -7408,7 +6313,39 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask,
   Imm |= (Mask[1] == -1 ? 1 : Mask[1]) << 2;
   Imm |= (Mask[2] == -1 ? 2 : Mask[2]) << 4;
   Imm |= (Mask[3] == -1 ? 3 : Mask[3]) << 6;
-  return DAG.getConstant(Imm, MVT::i8);
+  return DAG.getConstant(Imm, DL, MVT::i8);
+}
+
+/// \brief Try to emit a blend instruction for a shuffle using bit math.
+///
+/// This is used as a fallback approach when first class blend instructions are
+/// unavailable. Currently it is only suitable for integer vectors, but could
+/// be generalized for floating point vectors if desirable.
+static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1,
+                                            SDValue V2, ArrayRef<int> Mask,
+                                            SelectionDAG &DAG) {
+  assert(VT.isInteger() && "Only supports integer vector types!");
+  MVT EltVT = VT.getScalarType();
+  int NumEltBits = EltVT.getSizeInBits();
+  SDValue Zero = DAG.getConstant(0, DL, EltVT);
+  SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
+                                    EltVT);
+  SmallVector<SDValue, 16> MaskOps;
+  for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+    if (Mask[i] != -1 && Mask[i] != i && Mask[i] != i + Size)
+      return SDValue(); // Shuffled input!
+    MaskOps.push_back(Mask[i] < Size ? AllOnes : Zero);
+  }
+
+  SDValue V1Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, MaskOps);
+  V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask);
+  // We have to cast V2 around.
+  MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
+  V2 = DAG.getNode(ISD::BITCAST, DL, VT,
+                   DAG.getNode(X86ISD::ANDNP, DL, MaskVT,
+                               DAG.getNode(ISD::BITCAST, DL, MaskVT, V1Mask),
+                               DAG.getNode(ISD::BITCAST, DL, MaskVT, V2)));
+  return DAG.getNode(ISD::OR, DL, VT, V1, V2);
 }
 
 /// \brief Try to emit a blend instruction for a shuffle.
@@ -7421,7 +6358,6 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
                                          SDValue V2, ArrayRef<int> Mask,
                                          const X86Subtarget *Subtarget,
                                          SelectionDAG &DAG) {
-
   unsigned BlendMask = 0;
   for (int i = 0, Size = Mask.size(); i < Size; ++i) {
     if (Mask[i] >= Size) {
@@ -7439,7 +6375,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
   case MVT::v4f64:
   case MVT::v8f32:
     return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
-                       DAG.getConstant(BlendMask, MVT::i8));
+                       DAG.getConstant(BlendMask, DL, MVT::i8));
 
   case MVT::v4i64:
   case MVT::v8i32:
@@ -7463,7 +6399,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
       V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2);
       return DAG.getNode(ISD::BITCAST, DL, VT,
                          DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
-                                     DAG.getConstant(BlendMask, MVT::i8)));
+                                     DAG.getConstant(BlendMask, DL, MVT::i8)));
     }
     // FALLTHROUGH
   case MVT::v8i16: {
@@ -7480,7 +6416,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
     V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2);
     return DAG.getNode(ISD::BITCAST, DL, VT,
                        DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
-                                   DAG.getConstant(BlendMask, MVT::i8)));
+                                   DAG.getConstant(BlendMask, DL, MVT::i8)));
   }
 
   case MVT::v16i16: {
@@ -7494,15 +6430,21 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
         if (RepeatedMask[i] >= 16)
           BlendMask |= 1u << i;
       return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
-                         DAG.getConstant(BlendMask, MVT::i8));
+                         DAG.getConstant(BlendMask, DL, MVT::i8));
     }
   }
     // FALLTHROUGH
+  case MVT::v16i8:
   case MVT::v32i8: {
-    assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
+    assert((VT.getSizeInBits() == 128 || Subtarget->hasAVX2()) &&
+           "256-bit byte-blends require AVX2 support!");
+
     // Scale the blend by the number of bytes per element.
-    int Scale =  VT.getScalarSizeInBits() / 8;
-    assert(Mask.size() * Scale == 32 && "Not a 256-bit vector!");
+    int Scale = VT.getScalarSizeInBits() / 8;
+
+    // This form of blend is always done on bytes. Compute the byte vector
+    // type.
+    MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
 
     // Compute the VSELECT mask. Note that VSELECT is really confusing in the
     // mix of LLVM's code generator and the x86 backend. We tell the code
@@ -7515,19 +6457,20 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
     // the LLVM model for boolean values in vector elements gets the relevant
     // bit set, it is set backwards and over constrained relative to x86's
     // actual model.
-    SDValue VSELECTMask[32];
+    SmallVector<SDValue, 32> VSELECTMask;
     for (int i = 0, Size = Mask.size(); i < Size; ++i)
       for (int j = 0; j < Scale; ++j)
-        VSELECTMask[Scale * i + j] =
+        VSELECTMask.push_back(
             Mask[i] < 0 ? DAG.getUNDEF(MVT::i8)
-                        : DAG.getConstant(Mask[i] < Size ? -1 : 0, MVT::i8);
+                        : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL,
+                                          MVT::i8));
 
-    V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V1);
-    V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V2);
+    V1 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V1);
+    V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2);
     return DAG.getNode(
         ISD::BITCAST, DL, VT,
-        DAG.getNode(ISD::VSELECT, DL, MVT::v32i8,
-                    DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, VSELECTMask),
+        DAG.getNode(ISD::VSELECT, DL, BlendVT,
+                    DAG.getNode(ISD::BUILD_VECTOR, DL, BlendVT, VSELECTMask),
                     V1, V2));
   }
 
@@ -7536,12 +6479,45 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
   }
 }
 
-/// \brief Generic routine to lower a shuffle and blend as a decomposed set of
-/// unblended shuffles followed by an unshuffled blend.
+/// \brief Try to lower as a blend of elements from two inputs followed by
+/// a single-input permutation.
+///
+/// This matches the pattern where we can blend elements from two inputs and
+/// then reduce the shuffle to a single-input permutation.
+static SDValue lowerVectorShuffleAsBlendAndPermute(SDLoc DL, MVT VT, SDValue V1,
+                                                   SDValue V2,
+                                                   ArrayRef<int> Mask,
+                                                   SelectionDAG &DAG) {
+  // We build up the blend mask while checking whether a blend is a viable way
+  // to reduce the shuffle.
+  SmallVector<int, 32> BlendMask(Mask.size(), -1);
+  SmallVector<int, 32> PermuteMask(Mask.size(), -1);
+
+  for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+    if (Mask[i] < 0)
+      continue;
+
+    assert(Mask[i] < Size * 2 && "Shuffle input is out of bounds.");
+
+    if (BlendMask[Mask[i] % Size] == -1)
+      BlendMask[Mask[i] % Size] = Mask[i];
+    else if (BlendMask[Mask[i] % Size] != Mask[i])
+      return SDValue(); // Can't blend in the needed input!
+
+    PermuteMask[i] = Mask[i] % Size;
+  }
+
+  SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
+  return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask);
+}
+
+/// \brief Generic routine to decompose a shuffle and blend into indepndent
+/// blends and permutes.
 ///
 /// This matches the extremely common pattern for handling combined
 /// shuffle+blend operations on newer X86 ISAs where we have very fast blend
-/// operations.
+/// operations. It will try to pick the best arrangement of shuffles and
+/// blends.
 static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(SDLoc DL, MVT VT,
                                                           SDValue V1,
                                                           SDValue V2,
@@ -7561,6 +6537,16 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(SDLoc DL, MVT VT,
       BlendMask[i] = i + Size;
     }
 
+  // Try to lower with the simpler initial blend strategy unless one of the
+  // input shuffles would be a no-op. We prefer to shuffle inputs as the
+  // shuffle may be able to fold with a load or other benefit. However, when
+  // we'll have to do 2x as many shuffles in order to achieve this, blending
+  // first is a better strategy.
+  if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask))
+    if (SDValue BlendPerm =
+            lowerVectorShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG))
+      return BlendPerm;
+
   V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
   V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
   return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
@@ -7582,8 +6568,6 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(SDLoc DL, MVT VT,
 /// elements, and takes the low elements as the result. Note that while this is
 /// specified as a *right shift* because x86 is little-endian, it is a *left
 /// rotate* of the vector lanes.
-///
-/// Note that this only handles 128-bit vector widths currently.
 static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
                                               SDValue V2,
                                               ArrayRef<int> Mask,
@@ -7591,6 +6575,10 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
                                               SelectionDAG &DAG) {
   assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
 
+  int NumElts = Mask.size();
+  int NumLanes = VT.getSizeInBits() / 128;
+  int NumLaneElts = NumElts / NumLanes;
+
   // We need to detect various ways of spelling a rotation:
   //   [11, 12, 13, 14, 15,  0,  1,  2]
   //   [-1, 12, 13, 14, -1, -1,  1, -1]
@@ -7600,44 +6588,52 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
   //   [-1,  4,  5,  6, -1, -1, -1, -1]
   int Rotation = 0;
   SDValue Lo, Hi;
-  for (int i = 0, Size = Mask.size(); i < Size; ++i) {
-    if (Mask[i] == -1)
-      continue;
-    assert(Mask[i] >= 0 && "Only -1 is a valid negative mask element!");
+  for (int l = 0; l < NumElts; l += NumLaneElts) {
+    for (int i = 0; i < NumLaneElts; ++i) {
+      if (Mask[l + i] == -1)
+        continue;
+      assert(Mask[l + i] >= 0 && "Only -1 is a valid negative mask element!");
 
-    // Based on the mod-Size value of this mask element determine where
-    // a rotated vector would have started.
-    int StartIdx = i - (Mask[i] % Size);
-    if (StartIdx == 0)
-      // The identity rotation isn't interesting, stop.
-      return SDValue();
+      // Get the mod-Size index and lane correct it.
+      int LaneIdx = (Mask[l + i] % NumElts) - l;
+      // Make sure it was in this lane.
+      if (LaneIdx < 0 || LaneIdx >= NumLaneElts)
+        return SDValue();
 
-    // If we found the tail of a vector the rotation must be the missing
-    // front. If we found the head of a vector, it must be how much of the head.
-    int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
+      // Determine where a rotated vector would have started.
+      int StartIdx = i - LaneIdx;
+      if (StartIdx == 0)
+        // The identity rotation isn't interesting, stop.
+        return SDValue();
 
-    if (Rotation == 0)
-      Rotation = CandidateRotation;
-    else if (Rotation != CandidateRotation)
-      // The rotations don't match, so we can't match this mask.
-      return SDValue();
+      // If we found the tail of a vector the rotation must be the missing
+      // front. If we found the head of a vector, it must be how much of the
+      // head.
+      int CandidateRotation = StartIdx < 0 ? -StartIdx : NumLaneElts - StartIdx;
 
-    // Compute which value this mask is pointing at.
-    SDValue MaskV = Mask[i] < Size ? V1 : V2;
-
-    // Compute which of the two target values this index should be assigned to.
-    // This reflects whether the high elements are remaining or the low elements
-    // are remaining.
-    SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
-
-    // Either set up this value if we've not encountered it before, or check
-    // that it remains consistent.
-    if (!TargetV)
-      TargetV = MaskV;
-    else if (TargetV != MaskV)
-      // This may be a rotation, but it pulls from the inputs in some
-      // unsupported interleaving.
-      return SDValue();
+      if (Rotation == 0)
+        Rotation = CandidateRotation;
+      else if (Rotation != CandidateRotation)
+        // The rotations don't match, so we can't match this mask.
+        return SDValue();
+
+      // Compute which value this mask is pointing at.
+      SDValue MaskV = Mask[l + i] < NumElts ? V1 : V2;
+
+      // Compute which of the two target values this index should be assigned
+      // to. This reflects whether the high elements are remaining or the low
+      // elements are remaining.
+      SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
+
+      // Either set up this value if we've not encountered it before, or check
+      // that it remains consistent.
+      if (!TargetV)
+        TargetV = MaskV;
+      else if (TargetV != MaskV)
+        // This may be a rotation, but it pulls from the inputs in some
+        // unsupported interleaving.
+        return SDValue();
+    }
   }
 
   // Check that we successfully analyzed the mask, and normalize the results.
@@ -7648,26 +6644,28 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
   else if (!Hi)
     Hi = Lo;
 
-  assert(VT.getSizeInBits() == 128 &&
-         "Rotate-based lowering only supports 128-bit lowering!");
-  assert(Mask.size() <= 16 &&
-         "Can shuffle at most 16 bytes in a 128-bit vector!");
-
   // The actual rotate instruction rotates bytes, so we need to scale the
-  // rotation based on how many bytes are in the vector.
-  int Scale = 16 / Mask.size();
+  // rotation based on how many bytes are in the vector lane.
+  int Scale = 16 / NumLaneElts;
 
-  // SSSE3 targets can use the palignr instruction
+  // SSSE3 targets can use the palignr instruction.
   if (Subtarget->hasSSSE3()) {
-    // Cast the inputs to v16i8 to match PALIGNR.
-    Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Lo);
-    Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Hi);
+    // Cast the inputs to i8 vector of correct length to match PALIGNR.
+    MVT AlignVT = MVT::getVectorVT(MVT::i8, 16 * NumLanes);
+    Lo = DAG.getNode(ISD::BITCAST, DL, AlignVT, Lo);
+    Hi = DAG.getNode(ISD::BITCAST, DL, AlignVT, Hi);
 
     return DAG.getNode(ISD::BITCAST, DL, VT,
-                       DAG.getNode(X86ISD::PALIGNR, DL, MVT::v16i8, Hi, Lo,
-                                   DAG.getConstant(Rotation * Scale, MVT::i8)));
+                       DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo,
+                                   DAG.getConstant(Rotation * Scale, DL,
+                                                   MVT::i8)));
   }
 
+  assert(VT.getSizeInBits() == 128 &&
+         "Rotate-based lowering only supports 128-bit lowering!");
+  assert(Mask.size() <= 16 &&
+         "Can shuffle at most 16 bytes in a 128-bit vector!");
+
   // Default SSE2 implementation
   int LoByteShift = 16 - Rotation * Scale;
   int HiByteShift = Rotation * Scale;
@@ -7677,9 +6675,9 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
   Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Hi);
 
   SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo,
-                                DAG.getConstant(8 * LoByteShift, MVT::i8));
+                                DAG.getConstant(LoByteShift, DL, MVT::i8));
   SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi,
-                                DAG.getConstant(8 * HiByteShift, MVT::i8));
+                                DAG.getConstant(HiByteShift, DL, MVT::i8));
   return DAG.getNode(ISD::BITCAST, DL, VT,
                      DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
 }
@@ -7696,6 +6694,11 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
                                                      SDValue V1, SDValue V2) {
   SmallBitVector Zeroable(Mask.size(), false);
 
+  while (V1.getOpcode() == ISD::BITCAST)
+    V1 = V1->getOperand(0);
+  while (V2.getOpcode() == ISD::BITCAST)
+    V2 = V2->getOperand(0);
+
   bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
   bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
 
@@ -7707,10 +6710,10 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
       continue;
     }
 
-    // If this is an index into a build_vector node, dig out the input value and
-    // use it.
+    // If this is an index into a build_vector node (which has the same number
+    // of elements), dig out the input value and use it.
     SDValue V = M < Size ? V1 : V2;
-    if (V.getOpcode() != ISD::BUILD_VECTOR)
+    if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands())
       continue;
 
     SDValue Input = V.getOperand(M % Size);
@@ -7723,78 +6726,135 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
   return Zeroable;
 }
 
-/// \brief Try to lower a vector shuffle as a byte shift (shifts in zeros).
-///
-/// Attempts to match a shuffle mask against the PSRLDQ and PSLLDQ SSE2
-/// byte-shift instructions. The mask must consist of a shifted sequential
-/// shuffle from one of the input vectors and zeroable elements for the
-/// remaining 'shifted in' elements.
+/// \brief Try to emit a bitmask instruction for a shuffle.
 ///
-/// Note that this only handles 128-bit vector widths currently.
-static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1,
-                                             SDValue V2, ArrayRef<int> Mask,
-                                             SelectionDAG &DAG) {
-  assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
+/// This handles cases where we can model a blend exactly as a bitmask due to
+/// one of the inputs being zeroable.
+static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1,
+                                           SDValue V2, ArrayRef<int> Mask,
+                                           SelectionDAG &DAG) {
+  MVT EltVT = VT.getScalarType();
+  int NumEltBits = EltVT.getSizeInBits();
+  MVT IntEltVT = MVT::getIntegerVT(NumEltBits);
+  SDValue Zero = DAG.getConstant(0, DL, IntEltVT);
+  SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
+                                    IntEltVT);
+  if (EltVT.isFloatingPoint()) {
+    Zero = DAG.getNode(ISD::BITCAST, DL, EltVT, Zero);
+    AllOnes = DAG.getNode(ISD::BITCAST, DL, EltVT, AllOnes);
+  }
+  SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
+  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+  SDValue V;
+  for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+    if (Zeroable[i])
+      continue;
+    if (Mask[i] % Size != i)
+      return SDValue(); // Not a blend.
+    if (!V)
+      V = Mask[i] < Size ? V1 : V2;
+    else if (V != (Mask[i] < Size ? V1 : V2))
+      return SDValue(); // Can only let one input through the mask.
+
+    VMaskOps[i] = AllOnes;
+  }
+  if (!V)
+    return SDValue(); // No non-zeroable elements!
+
+  SDValue VMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, VMaskOps);
+  V = DAG.getNode(VT.isFloatingPoint()
+                  ? (unsigned) X86ISD::FAND : (unsigned) ISD::AND,
+                  DL, VT, V, VMask);
+  return V;
+}
 
+/// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
+///
+/// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
+/// PSRL(W/D/Q/DQ) SSE2 and AVX2 logical bit-shift instructions. The function
+/// matches elements from one of the input vectors shuffled to the left or
+/// right with zeroable elements 'shifted in'. It handles both the strictly
+/// bit-wise element shifts and the byte shift across an entire 128-bit double
+/// quad word lane.
+///
+/// PSHL : (little-endian) left bit shift.
+/// [ zz, 0, zz,  2 ]
+/// [ -1, 4, zz, -1 ]
+/// PSRL : (little-endian) right bit shift.
+/// [  1, zz,  3, zz]
+/// [ -1, -1,  7, zz]
+/// PSLLDQ : (little-endian) left byte shift
+/// [ zz,  0,  1,  2,  3,  4,  5,  6]
+/// [ zz, zz, -1, -1,  2,  3,  4, -1]
+/// [ zz, zz, zz, zz, zz, zz, -1,  1]
+/// PSRLDQ : (little-endian) right byte shift
+/// [  5, 6,  7, zz, zz, zz, zz, zz]
+/// [ -1, 5,  6,  7, zz, zz, zz, zz]
+/// [  1, 2, -1, -1, -1, -1, zz, zz]
+static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1,
+                                         SDValue V2, ArrayRef<int> Mask,
+                                         SelectionDAG &DAG) {
   SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
 
   int Size = Mask.size();
-  int Scale = 16 / Size;
+  assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
 
-  for (int Shift = 1; Shift < Size; Shift++) {
-    int ByteShift = Shift * Scale;
-
-    // PSRLDQ : (little-endian) right byte shift
-    // [ 5,  6,  7, zz, zz, zz, zz, zz]
-    // [ -1, 5,  6,  7, zz, zz, zz, zz]
-    // [  1, 2, -1, -1, -1, -1, zz, zz]
-    bool ZeroableRight = true;
-    for (int i = Size - Shift; i < Size; i++) {
-      ZeroableRight &= Zeroable[i];
-    }
-
-    if (ZeroableRight) {
-      bool ValidShiftRight1 =
-          isSequentialOrUndefInRange(Mask, 0, Size - Shift, Shift);
-      bool ValidShiftRight2 =
-          isSequentialOrUndefInRange(Mask, 0, Size - Shift, Size + Shift);
-
-      if (ValidShiftRight1 || ValidShiftRight2) {
-        // Cast the inputs to v2i64 to match PSRLDQ.
-        SDValue &TargetV = ValidShiftRight1 ? V1 : V2;
-        SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, TargetV);
-        SDValue Shifted = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, V,
-                                      DAG.getConstant(ByteShift * 8, MVT::i8));
-        return DAG.getNode(ISD::BITCAST, DL, VT, Shifted);
-      }
-    }
+  auto CheckZeros = [&](int Shift, int Scale, bool Left) {
+    for (int i = 0; i < Size; i += Scale)
+      for (int j = 0; j < Shift; ++j)
+        if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
+          return false;
 
-    // PSLLDQ : (little-endian) left byte shift
-    // [ zz,  0,  1,  2,  3,  4,  5,  6]
-    // [ zz, zz, -1, -1,  2,  3,  4, -1]
-    // [ zz, zz, zz, zz, zz, zz, -1,  1]
-    bool ZeroableLeft = true;
-    for (int i = 0; i < Shift; i++) {
-      ZeroableLeft &= Zeroable[i];
-    }
-
-    if (ZeroableLeft) {
-      bool ValidShiftLeft1 =
-          isSequentialOrUndefInRange(Mask, Shift, Size - Shift, 0);
-      bool ValidShiftLeft2 =
-          isSequentialOrUndefInRange(Mask, Shift, Size - Shift, Size);
-
-      if (ValidShiftLeft1 || ValidShiftLeft2) {
-        // Cast the inputs to v2i64 to match PSLLDQ.
-        SDValue &TargetV = ValidShiftLeft1 ? V1 : V2;
-        SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, TargetV);
-        SDValue Shifted = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, V,
-                                      DAG.getConstant(ByteShift * 8, MVT::i8));
-        return DAG.getNode(ISD::BITCAST, DL, VT, Shifted);
-      }
+    return true;
+  };
+
+  auto MatchShift = [&](int Shift, int Scale, bool Left, SDValue V) {
+    for (int i = 0; i != Size; i += Scale) {
+      unsigned Pos = Left ? i + Shift : i;
+      unsigned Low = Left ? i : i + Shift;
+      unsigned Len = Scale - Shift;
+      if (!isSequentialOrUndefInRange(Mask, Pos, Len,
+                                      Low + (V == V1 ? 0 : Size)))
+        return SDValue();
     }
-  }
 
+    int ShiftEltBits = VT.getScalarSizeInBits() * Scale;
+    bool ByteShift = ShiftEltBits > 64;
+    unsigned OpCode = Left ? (ByteShift ? X86ISD::VSHLDQ : X86ISD::VSHLI)
+                           : (ByteShift ? X86ISD::VSRLDQ : X86ISD::VSRLI);
+    int ShiftAmt = Shift * VT.getScalarSizeInBits() / (ByteShift ? 8 : 1);
+
+    // Normalize the scale for byte shifts to still produce an i64 element
+    // type.
+    Scale = ByteShift ? Scale / 2 : Scale;
+
+    // We need to round trip through the appropriate type for the shift.
+    MVT ShiftSVT = MVT::getIntegerVT(VT.getScalarSizeInBits() * Scale);
+    MVT ShiftVT = MVT::getVectorVT(ShiftSVT, Size / Scale);
+    assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
+           "Illegal integer vector type");
+    V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V);
+
+    V = DAG.getNode(OpCode, DL, ShiftVT, V,
+                    DAG.getConstant(ShiftAmt, DL, MVT::i8));
+    return DAG.getNode(ISD::BITCAST, DL, VT, V);
+  };
+
+  // SSE/AVX supports logical shifts up to 64-bit integers - so we can just
+  // keep doubling the size of the integer elements up to that. We can
+  // then shift the elements of the integer vector by whole multiples of
+  // their width within the elements of the larger integer vector. Test each
+  // multiple to see if we can find a match with the moved element indices
+  // and that the shifted in elements are all zeroable.
+  for (int Scale = 2; Scale * VT.getScalarSizeInBits() <= 128; Scale *= 2)
+    for (int Shift = 1; Shift != Scale; ++Shift)
+      for (bool Left : {true, false})
+        if (CheckZeros(Shift, Scale, Left))
+          for (SDValue V : {V1, V2})
+            if (SDValue Match = MatchShift(Shift, Scale, Left, V))
+              return Match;
+
+  // no match
   return SDValue();
 }
 
@@ -7804,10 +6864,11 @@ static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1,
 /// stride, produce either a zero or any extension based on the available
 /// features of the subtarget.
 static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
-    SDLoc DL, MVT VT, int NumElements, int Scale, bool AnyExt, SDValue InputV,
+    SDLoc DL, MVT VT, int Scale, bool AnyExt, SDValue InputV,
     const X86Subtarget *Subtarget, SelectionDAG &DAG) {
   assert(Scale > 1 && "Need a scale to extend.");
-  int EltBits = VT.getSizeInBits() / NumElements;
+  int NumElements = VT.getVectorNumElements();
+  int EltBits = VT.getScalarSizeInBits();
   assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
          "Only 8, 16, and 32 bit elements can be extended.");
   assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits.");
@@ -7815,10 +6876,8 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
   // Found a valid zext mask! Try various lowering strategies based on the
   // input type and available ISA extensions.
   if (Subtarget->hasSSE41()) {
-    MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
     MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
                                  NumElements / Scale);
-    InputV = DAG.getNode(ISD::BITCAST, DL, InputVT, InputV);
     return DAG.getNode(ISD::BITCAST, DL, VT,
                        DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV));
   }
@@ -7831,19 +6890,19 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
         ISD::BITCAST, DL, VT,
         DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
                     DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV),
-                    getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
+                    getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
   }
   if (AnyExt && EltBits == 16 && Scale > 2) {
     int PSHUFDMask[4] = {0, -1, 0, -1};
     InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
                          DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV),
-                         getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG));
+                         getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
     int PSHUFHWMask[4] = {1, -1, -1, -1};
     return DAG.getNode(
         ISD::BITCAST, DL, VT,
         DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16,
                     DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, InputV),
-                    getV4X86ShuffleImm8ForMask(PSHUFHWMask, DAG)));
+                    getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG)));
   }
 
   // If this would require more than 2 unpack instructions to expand, use
@@ -7854,7 +6913,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
     SDValue PSHUFBMask[16];
     for (int i = 0; i < 16; ++i)
       PSHUFBMask[i] =
-          DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, MVT::i8);
+          DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, DL, MVT::i8);
     InputV = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, InputV);
     return DAG.getNode(ISD::BITCAST, DL, VT,
                        DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
@@ -7876,7 +6935,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
   return DAG.getNode(ISD::BITCAST, DL, VT, InputV);
 }
 
-/// \brief Try to lower a vector shuffle as a zero extension on any micrarch.
+/// \brief Try to lower a vector shuffle as a zero extension on any microarch.
 ///
 /// This routine will try to do everything in its power to cleverly lower
 /// a shuffle which happens to match the pattern of a zero extend. It doesn't
@@ -7894,7 +6953,10 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
   SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
 
   int Bits = VT.getSizeInBits();
-  int NumElements = Mask.size();
+  int NumElements = VT.getVectorNumElements();
+  assert(VT.getScalarSizeInBits() <= 32 &&
+         "Exceeds 32-bit integer zero extension limit");
+  assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size");
 
   // Define a helper function to check a particular ext-scale and lower to it if
   // valid.
@@ -7905,11 +6967,11 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
       if (Mask[i] == -1)
         continue; // Valid anywhere but doesn't tell us anything.
       if (i % Scale != 0) {
-        // Each of the extend elements needs to be zeroable.
+        // Each of the extended elements need to be zeroable.
         if (!Zeroable[i])
           return SDValue();
 
-        // We no lorger are in the anyext case.
+        // We no longer are in the anyext case.
         AnyExt = false;
         continue;
       }
@@ -7923,7 +6985,7 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
         return SDValue(); // Flip-flopping inputs.
 
       if (Mask[i] % NumElements != i / Scale)
-        return SDValue(); // Non-consecutive strided elemenst.
+        return SDValue(); // Non-consecutive strided elements.
     }
 
     // If we fail to find an input, we have a zero-shuffle which should always
@@ -7933,7 +6995,7 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
       return SDValue();
 
     return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
-        DL, VT, NumElements, Scale, AnyExt, InputV, Subtarget, DAG);
+        DL, VT, Scale, AnyExt, InputV, Subtarget, DAG);
   };
 
   // The widest scale possible for extending is to a 64-bit integer.
@@ -7945,11 +7007,34 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
   // many elements.
   for (; NumExtElements < NumElements; NumExtElements *= 2) {
     assert(NumElements % NumExtElements == 0 &&
-           "The input vector size must be divisble by the extended size.");
+           "The input vector size must be divisible by the extended size.");
     if (SDValue V = Lower(NumElements / NumExtElements))
       return V;
   }
 
+  // General extends failed, but 128-bit vectors may be able to use MOVQ.
+  if (Bits != 128)
+    return SDValue();
+
+  // Returns one of the source operands if the shuffle can be reduced to a
+  // MOVQ, copying the lower 64-bits and zero-extending to the upper 64-bits.
+  auto CanZExtLowHalf = [&]() {
+    for (int i = NumElements / 2; i != NumElements; ++i)
+      if (!Zeroable[i])
+        return SDValue();
+    if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, 0))
+      return V1;
+    if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, NumElements))
+      return V2;
+    return SDValue();
+  };
+
+  if (SDValue V = CanZExtLowHalf()) {
+    V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V);
+    V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V);
+    return DAG.getNode(ISD::BITCAST, DL, VT, V);
+  }
+
   // No viable ext lowering found.
   return SDValue();
 }
@@ -7970,8 +7055,13 @@ static SDValue getScalarValueForVectorElement(SDValue V, int Idx,
     return SDValue();
 
   if (V.getOpcode() == ISD::BUILD_VECTOR ||
-      (Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR))
-    return DAG.getNode(ISD::BITCAST, SDLoc(V), EltVT, V.getOperand(Idx));
+      (Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR)) {
+    // Ensure the scalar operand is the same size as the destination.
+    // FIXME: Add support for scalar truncation where possible.
+    SDValue S = V.getOperand(Idx);
+    if (EltVT.getSizeInBits() == S.getSimpleValueType().getSizeInBits())
+      return DAG.getNode(ISD::BITCAST, SDLoc(V), EltVT, S);
+  }
 
   return SDValue();
 }
@@ -7992,7 +7082,7 @@ static bool isShuffleFoldableLoad(SDValue V) {
 /// This is a common pattern that we have especially efficient patterns to lower
 /// across all subtarget feature sets.
 static SDValue lowerVectorShuffleAsElementInsertion(
-    MVT VT, SDLoc DL, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+    SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
     const X86Subtarget *Subtarget, SelectionDAG &DAG) {
   SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
   MVT ExtVT = VT;
@@ -8059,6 +7149,10 @@ static SDValue lowerVectorShuffleAsElementInsertion(
                        ExtVT, V1, V2);
   }
 
+  // This lowering only works for the low element with floating point vectors.
+  if (VT.isFloatingPoint() && V2Index != 0)
+    return SDValue();
+
   V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2);
   if (ExtVT != VT)
     V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
@@ -8077,7 +7171,7 @@ static SDValue lowerVectorShuffleAsElementInsertion(
       V2 = DAG.getNode(
           X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
           DAG.getConstant(
-              V2Index * EltVT.getSizeInBits(),
+              V2Index * EltVT.getSizeInBits()/8, DL,
               DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64)));
       V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
     }
@@ -8090,7 +7184,7 @@ static SDValue lowerVectorShuffleAsElementInsertion(
 /// For convenience, this code also bundles all of the subtarget feature set
 /// filtering. While a little annoying to re-dispatch on type here, there isn't
 /// a convenient way to factor it out.
-static SDValue lowerVectorShuffleAsBroadcast(MVT VT, SDLoc DL, SDValue V,
+static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
                                              ArrayRef<int> Mask,
                                              const X86Subtarget *Subtarget,
                                              SelectionDAG &DAG) {
@@ -8111,8 +7205,8 @@ static SDValue lowerVectorShuffleAsBroadcast(MVT VT, SDLoc DL, SDValue V,
                                             "a sorted mask where the broadcast "
                                             "comes from V1.");
 
-  // Go up the chain of (vector) values to try and find a scalar load that
-  // we can combine with the broadcast.
+  // Go up the chain of (vector) values to find a scalar load that we can
+  // combine with the broadcast.
   for (;;) {
     switch (V.getOpcode()) {
     case ISD::CONCAT_VECTORS: {
@@ -8149,12 +7243,12 @@ static SDValue lowerVectorShuffleAsBroadcast(MVT VT, SDLoc DL, SDValue V,
       (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
     V = V.getOperand(BroadcastIdx);
 
-    // If the scalar isn't a load we can't broadcast from it in AVX1, only with
-    // AVX2.
+    // If the scalar isn't a load, we can't broadcast from it in AVX1.
+    // Only AVX2 has register broadcasts.
     if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
       return SDValue();
   } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) {
-    // We can't broadcast from a vector register w/o AVX2, and we can only
+    // We can't broadcast from a vector register without AVX2, and we can only
     // broadcast from the zero-element of a vector register.
     return SDValue();
   }
@@ -8183,7 +7277,7 @@ static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2,
   int V2DstIndex = -1;
   bool V1UsedInPlace = false;
 
-  for (int i = 0; i < 4; i++) {
+  for (int i = 0; i < 4; ++i) {
     // Synthesize a zero mask from the zeroable elements (includes undefs).
     if (Zeroable[i]) {
       ZMask |= 1 << i;
@@ -8237,7 +7331,122 @@ static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2,
   // Insert the V2 element into the desired position.
   SDLoc DL(Op);
   return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
-                     DAG.getConstant(InsertPSMask, MVT::i8));
+                     DAG.getConstant(InsertPSMask, DL, MVT::i8));
+}
+
+/// \brief Try to lower a shuffle as a permute of the inputs followed by an
+/// UNPCK instruction.
+///
+/// This specifically targets cases where we end up with alternating between
+/// the two inputs, and so can permute them into something that feeds a single
+/// UNPCK instruction. Note that this routine only targets integer vectors
+/// because for floating point vectors we have a generalized SHUFPS lowering
+/// strategy that handles everything that doesn't *exactly* match an unpack,
+/// making this clever lowering unnecessary.
+static SDValue lowerVectorShuffleAsUnpack(SDLoc DL, MVT VT, SDValue V1,
+                                          SDValue V2, ArrayRef<int> Mask,
+                                          SelectionDAG &DAG) {
+  assert(!VT.isFloatingPoint() &&
+         "This routine only supports integer vectors.");
+  assert(!isSingleInputShuffleMask(Mask) &&
+         "This routine should only be used when blending two inputs.");
+  assert(Mask.size() >= 2 && "Single element masks are invalid.");
+
+  int Size = Mask.size();
+
+  int NumLoInputs = std::count_if(Mask.begin(), Mask.end(), [Size](int M) {
+    return M >= 0 && M % Size < Size / 2;
+  });
+  int NumHiInputs = std::count_if(
+      Mask.begin(), Mask.end(), [Size](int M) { return M % Size >= Size / 2; });
+
+  bool UnpackLo = NumLoInputs >= NumHiInputs;
+
+  auto TryUnpack = [&](MVT UnpackVT, int Scale) {
+    SmallVector<int, 32> V1Mask(Mask.size(), -1);
+    SmallVector<int, 32> V2Mask(Mask.size(), -1);
+
+    for (int i = 0; i < Size; ++i) {
+      if (Mask[i] < 0)
+        continue;
+
+      // Each element of the unpack contains Scale elements from this mask.
+      int UnpackIdx = i / Scale;
+
+      // We only handle the case where V1 feeds the first slots of the unpack.
+      // We rely on canonicalization to ensure this is the case.
+      if ((UnpackIdx % 2 == 0) != (Mask[i] < Size))
+        return SDValue();
+
+      // Setup the mask for this input. The indexing is tricky as we have to
+      // handle the unpack stride.
+      SmallVectorImpl<int> &VMask = (UnpackIdx % 2 == 0) ? V1Mask : V2Mask;
+      VMask[(UnpackIdx / 2) * Scale + i % Scale + (UnpackLo ? 0 : Size / 2)] =
+          Mask[i] % Size;
+    }
+
+    // If we will have to shuffle both inputs to use the unpack, check whether
+    // we can just unpack first and shuffle the result. If so, skip this unpack.
+    if ((NumLoInputs == 0 || NumHiInputs == 0) && !isNoopShuffleMask(V1Mask) &&
+        !isNoopShuffleMask(V2Mask))
+      return SDValue();
+
+    // Shuffle the inputs into place.
+    V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
+    V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
+
+    // Cast the inputs to the type we will use to unpack them.
+    V1 = DAG.getNode(ISD::BITCAST, DL, UnpackVT, V1);
+    V2 = DAG.getNode(ISD::BITCAST, DL, UnpackVT, V2);
+
+    // Unpack the inputs and cast the result back to the desired type.
+    return DAG.getNode(ISD::BITCAST, DL, VT,
+                       DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH,
+                                   DL, UnpackVT, V1, V2));
+  };
+
+  // We try each unpack from the largest to the smallest to try and find one
+  // that fits this mask.
+  int OrigNumElements = VT.getVectorNumElements();
+  int OrigScalarSize = VT.getScalarSizeInBits();
+  for (int ScalarSize = 64; ScalarSize >= OrigScalarSize; ScalarSize /= 2) {
+    int Scale = ScalarSize / OrigScalarSize;
+    int NumElements = OrigNumElements / Scale;
+    MVT UnpackVT = MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), NumElements);
+    if (SDValue Unpack = TryUnpack(UnpackVT, Scale))
+      return Unpack;
+  }
+
+  // If none of the unpack-rooted lowerings worked (or were profitable) try an
+  // initial unpack.
+  if (NumLoInputs == 0 || NumHiInputs == 0) {
+    assert((NumLoInputs > 0 || NumHiInputs > 0) &&
+           "We have to have *some* inputs!");
+    int HalfOffset = NumLoInputs == 0 ? Size / 2 : 0;
+
+    // FIXME: We could consider the total complexity of the permute of each
+    // possible unpacking. Or at the least we should consider how many
+    // half-crossings are created.
+    // FIXME: We could consider commuting the unpacks.
+
+    SmallVector<int, 32> PermMask;
+    PermMask.assign(Size, -1);
+    for (int i = 0; i < Size; ++i) {
+      if (Mask[i] < 0)
+        continue;
+
+      assert(Mask[i] % Size >= HalfOffset && "Found input from wrong half!");
+
+      PermMask[i] =
+          2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1);
+    }
+    return DAG.getVectorShuffle(
+        VT, DL, DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL,
+                            DL, VT, V1, V2),
+        DAG.getUNDEF(VT), PermMask);
+  }
+
+  return SDValue();
 }
 
 /// \brief Handle lowering of 2-lane 64-bit floating point shuffles.
@@ -8259,6 +7468,11 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
 
   if (isSingleInputShuffleMask(Mask)) {
+    // Use low duplicate instructions for masks that match their pattern.
+    if (Subtarget->hasSSE3())
+      if (isShuffleEquivalent(V1, V2, Mask, {0, 0}))
+        return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, V1);
+
     // Straight shuffle of a single input vector. Simulate this by using the
     // single input as both of the "inputs" to this instruction..
     unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
@@ -8267,38 +7481,33 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
       // If we have AVX, we can use VPERMILPS which will allow folding a load
       // into the shuffle.
       return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1,
-                         DAG.getConstant(SHUFPDMask, MVT::i8));
+                         DAG.getConstant(SHUFPDMask, DL, MVT::i8));
     }
 
-    return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V1,
-                       DAG.getConstant(SHUFPDMask, MVT::i8));
+    return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V1,
+                       DAG.getConstant(SHUFPDMask, DL, MVT::i8));
   }
   assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
   assert(Mask[1] >= 2 && "Non-canonicalized blend!");
 
-  // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(Mask, 0, 2))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
-  if (isShuffleEquivalent(Mask, 1, 3))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
-
   // If we have a single input, insert that into V1 if we can do so cheaply.
   if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
     if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-            MVT::v2f64, DL, V1, V2, Mask, Subtarget, DAG))
+            DL, MVT::v2f64, V1, V2, Mask, Subtarget, DAG))
       return Insertion;
     // Try inverting the insertion since for v2 masks it is easy to do and we
     // can't reliably sort the mask one way or the other.
     int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
                           Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
     if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-            MVT::v2f64, DL, V2, V1, InverseMask, Subtarget, DAG))
+            DL, MVT::v2f64, V2, V1, InverseMask, Subtarget, DAG))
       return Insertion;
   }
 
   // Try to use one of the special instruction patterns to handle two common
   // blend patterns if a zero-blend above didn't work.
-  if (isShuffleEquivalent(Mask, 0, 3) || isShuffleEquivalent(Mask, 1, 3))
+  if (isShuffleEquivalent(V1, V2, Mask, {0, 3}) ||
+      isShuffleEquivalent(V1, V2, Mask, {1, 3}))
     if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG))
       // We can either use a special instruction to load over the low double or
       // to move just the low double.
@@ -8312,9 +7521,15 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                                                   Subtarget, DAG))
       return Blend;
 
+  // Use dedicated unpack instructions for masks that match their pattern.
+  if (isShuffleEquivalent(V1, V2, Mask, {0, 2}))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
+  if (isShuffleEquivalent(V1, V2, Mask, {1, 3}))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
+
   unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
-  return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
-                     DAG.getConstant(SHUFPDMask, MVT::i8));
+  return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2,
+                     DAG.getConstant(SHUFPDMask, DL, MVT::i8));
 }
 
 /// \brief Handle lowering of 2-lane 64-bit integer shuffles.
@@ -8336,7 +7551,7 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
 
   if (isSingleInputShuffleMask(Mask)) {
     // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v2i64, DL, V1,
+    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v2i64, V1,
                                                           Mask, Subtarget, DAG))
       return Broadcast;
 
@@ -8349,41 +7564,64 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
         std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1};
     return DAG.getNode(
         ISD::BITCAST, DL, MVT::v2i64,
-        DAG.getNode(X86ISD::PSHUFD, SDLoc(Op), MVT::v4i32, V1,
-                    getV4X86ShuffleImm8ForMask(WidenedMask, DAG)));
-  }
+        DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
+                    getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG)));
+  }
+  assert(Mask[0] != -1 && "No undef lanes in multi-input v2 shuffles!");
+  assert(Mask[1] != -1 && "No undef lanes in multi-input v2 shuffles!");
+  assert(Mask[0] < 2 && "We sort V1 to be the first input.");
+  assert(Mask[1] >= 2 && "We sort V2 to be the second input.");
+
+  // If we have a blend of two PACKUS operations an the blend aligns with the
+  // low and half halves, we can just merge the PACKUS operations. This is
+  // particularly important as it lets us merge shuffles that this routine itself
+  // creates.
+  auto GetPackNode = [](SDValue V) {
+    while (V.getOpcode() == ISD::BITCAST)
+      V = V.getOperand(0);
 
-  // Try to use byte shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsByteShift(
-          DL, MVT::v2i64, V1, V2, Mask, DAG))
+    return V.getOpcode() == X86ISD::PACKUS ? V : SDValue();
+  };
+  if (SDValue V1Pack = GetPackNode(V1))
+    if (SDValue V2Pack = GetPackNode(V2))
+      return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+                         DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8,
+                                     Mask[0] == 0 ? V1Pack.getOperand(0)
+                                                  : V1Pack.getOperand(1),
+                                     Mask[1] == 2 ? V2Pack.getOperand(0)
+                                                  : V2Pack.getOperand(1)));
+
+  // Try to use shift instructions.
+  if (SDValue Shift =
+          lowerVectorShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask, DAG))
     return Shift;
 
-  // If we have a single input from V2 insert that into V1 if we can do so
-  // cheaply.
-  if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
-    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-            MVT::v2i64, DL, V1, V2, Mask, Subtarget, DAG))
-      return Insertion;
-    // Try inverting the insertion since for v2 masks it is easy to do and we
-    // can't reliably sort the mask one way or the other.
-    int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
-                          Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
-    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-            MVT::v2i64, DL, V2, V1, InverseMask, Subtarget, DAG))
-      return Insertion;
-  }
+  // When loading a scalar and then shuffling it into a vector we can often do
+  // the insertion cheaply.
+  if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+          DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
+    return Insertion;
+  // Try inverting the insertion since for v2 masks it is easy to do and we
+  // can't reliably sort the mask one way or the other.
+  int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2};
+  if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+          DL, MVT::v2i64, V2, V1, InverseMask, Subtarget, DAG))
+    return Insertion;
+
+  // We have different paths for blend lowering, but they all must use the
+  // *exact* same predicate.
+  bool IsBlendSupported = Subtarget->hasSSE41();
+  if (IsBlendSupported)
+    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
+                                                  Subtarget, DAG))
+      return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(Mask, 0, 2))
+  if (isShuffleEquivalent(V1, V2, Mask, {0, 2}))
     return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
-  if (isShuffleEquivalent(Mask, 1, 3))
+  if (isShuffleEquivalent(V1, V2, Mask, {1, 3}))
     return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
 
-  if (Subtarget->hasSSE41())
-    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
-                                                  Subtarget, DAG))
-      return Blend;
-
   // Try to use byte rotation instructions.
   // Its more profitable for pre-SSSE3 to use shuffles/unpacks.
   if (Subtarget->hasSSSE3())
@@ -8391,6 +7629,12 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
             DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
       return Rotate;
 
+  // If we have direct support for blends, we should lower by decomposing into
+  // a permute. That will be faster than the domain cross.
+  if (IsBlendSupported)
+    return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2,
+                                                      Mask, DAG);
+
   // We implement this with SHUFPD which is pretty lame because it will likely
   // incur 2 cycles of stall for integer vectors on Nehalem and older chips.
   // However, all the alternatives are still more cycles and newer chips don't
@@ -8401,6 +7645,24 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                      DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
 }
 
+/// \brief Test whether this can be lowered with a single SHUFPS instruction.
+///
+/// This is used to disable more specialized lowerings when the shufps lowering
+/// will happen to be efficient.
+static bool isSingleSHUFPSMask(ArrayRef<int> Mask) {
+  // This routine only handles 128-bit shufps.
+  assert(Mask.size() == 4 && "Unsupported mask size!");
+
+  // To lower with a single SHUFPS we need to have the low half and high half
+  // each requiring a single input.
+  if (Mask[0] != -1 && Mask[1] != -1 && (Mask[0] < 4) != (Mask[1] < 4))
+    return false;
+  if (Mask[2] != -1 && Mask[3] != -1 && (Mask[2] < 4) != (Mask[3] < 4))
+    return false;
+
+  return true;
+}
+
 /// \brief Lower a vector shuffle using the SHUFPS instruction.
 ///
 /// This is a helper routine dedicated to lowering vector shuffles using SHUFPS.
@@ -8437,7 +7699,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(SDLoc DL, MVT VT,
       int V1Index = V2AdjIndex;
       int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0};
       V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,
-                       getV4X86ShuffleImm8ForMask(BlendMask, DAG));
+                       getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG));
 
       // Now proceed to reconstruct the final blend as we have the necessary
       // high or low half formed.
@@ -8476,7 +7738,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(SDLoc DL, MVT VT,
                           (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4,
                           (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4};
       V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
-                       getV4X86ShuffleImm8ForMask(BlendMask, DAG));
+                       getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG));
 
       // Now we do a normal shuffle of V1 by giving V1 as both operands to
       // a blend.
@@ -8488,7 +7750,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(SDLoc DL, MVT VT,
     }
   }
   return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV,
-                     getV4X86ShuffleImm8ForMask(NewMask, DAG));
+                     getV4X86ShuffleImm8ForMask(NewMask, DL, DAG));
 }
 
 /// \brief Lower 4-lane 32-bit floating point shuffles.
@@ -8512,36 +7774,38 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
 
   if (NumV2Elements == 0) {
     // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4f32, DL, V1,
+    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4f32, V1,
                                                           Mask, Subtarget, DAG))
       return Broadcast;
 
+    // Use even/odd duplicate instructions for masks that match their pattern.
+    if (Subtarget->hasSSE3()) {
+      if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2}))
+        return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1);
+      if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3}))
+        return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1);
+    }
+
     if (Subtarget->hasAVX()) {
       // If we have AVX, we can use VPERMILPS which will allow folding a load
       // into the shuffle.
       return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1,
-                         getV4X86ShuffleImm8ForMask(Mask, DAG));
+                         getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
     }
 
     // Otherwise, use a straight shuffle of a single input vector. We pass the
     // input vector to both operands to simulate this with a SHUFPS.
     return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
-                       getV4X86ShuffleImm8ForMask(Mask, DAG));
+                       getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
   }
 
-  // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
-  if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
-
   // There are special ways we can lower some single-element blends. However, we
   // have custom ways we can lower more complex single-element blends below that
   // we defer to if both this and BLENDPS fail to match, so restrict this to
   // when the V2 input is targeting element 0 of the mask -- that is the fast
   // case here.
   if (NumV2Elements == 1 && Mask[0] >= 4)
-    if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v4f32, DL, V1, V2,
+    if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v4f32, V1, V2,
                                                          Mask, Subtarget, DAG))
       return V;
 
@@ -8553,8 +7817,23 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     // Use INSERTPS if we can complete the shuffle efficiently.
     if (SDValue V = lowerVectorShuffleAsInsertPS(Op, V1, V2, Mask, DAG))
       return V;
+
+    if (!isSingleSHUFPSMask(Mask))
+      if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute(
+              DL, MVT::v4f32, V1, V2, Mask, DAG))
+        return BlendPerm;
   }
 
+  // Use dedicated unpack instructions for masks that match their pattern.
+  if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 1, 5}))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
+  if (isShuffleEquivalent(V1, V2, Mask, {2, 6, 3, 7}))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
+  if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 5, 1}))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V2, V1);
+  if (isShuffleEquivalent(V1, V2, Mask, {6, 2, 7, 3}))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V2, V1);
+
   // Otherwise fall back to a SHUFPS lowering strategy.
   return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
 }
@@ -8586,7 +7865,7 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
 
   if (NumV2Elements == 0) {
     // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4i32, DL, V1,
+    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4i32, V1,
                                                           Mask, Subtarget, DAG))
       return Broadcast;
 
@@ -8597,37 +7876,48 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     // so prevents folding a load into this instruction or making a copy.
     const int UnpackLoMask[] = {0, 0, 1, 1};
     const int UnpackHiMask[] = {2, 2, 3, 3};
-    if (isShuffleEquivalent(Mask, 0, 0, 1, 1))
+    if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 1, 1}))
       Mask = UnpackLoMask;
-    else if (isShuffleEquivalent(Mask, 2, 2, 3, 3))
+    else if (isShuffleEquivalent(V1, V2, Mask, {2, 2, 3, 3}))
       Mask = UnpackHiMask;
 
     return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
-                       getV4X86ShuffleImm8ForMask(Mask, DAG));
+                       getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
   }
 
-  // Try to use byte shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsByteShift(
-          DL, MVT::v4i32, V1, V2, Mask, DAG))
+  // Try to use shift instructions.
+  if (SDValue Shift =
+          lowerVectorShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask, DAG))
     return Shift;
 
   // There are special ways we can lower some single-element blends.
   if (NumV2Elements == 1)
-    if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v4i32, DL, V1, V2,
+    if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v4i32, V1, V2,
                                                          Mask, Subtarget, DAG))
       return V;
 
-  // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
-  if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
-
-  if (Subtarget->hasSSE41())
+  // We have different paths for blend lowering, but they all must use the
+  // *exact* same predicate.
+  bool IsBlendSupported = Subtarget->hasSSE41();
+  if (IsBlendSupported)
     if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
                                                   Subtarget, DAG))
       return Blend;
 
+  if (SDValue Masked =
+          lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask, DAG))
+    return Masked;
+
+  // Use dedicated unpack instructions for masks that match their pattern.
+  if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 1, 5}))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
+  if (isShuffleEquivalent(V1, V2, Mask, {2, 6, 3, 7}))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
+  if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 5, 1}))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V2, V1);
+  if (isShuffleEquivalent(V1, V2, Mask, {6, 2, 7, 3}))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V2, V1);
+
   // Try to use byte rotation instructions.
   // Its more profitable for pre-SSSE3 to use shuffles/unpacks.
   if (Subtarget->hasSSSE3())
@@ -8635,6 +7925,17 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
             DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
       return Rotate;
 
+  // If we have direct support for blends, we should lower by decomposing into
+  // a permute. That will be faster than the domain cross.
+  if (IsBlendSupported)
+    return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2,
+                                                      Mask, DAG);
+
+  // Try to lower by permuting the inputs into an unpack instruction.
+  if (SDValue Unpack =
+          lowerVectorShuffleAsUnpack(DL, MVT::v4i32, V1, V2, Mask, DAG))
+    return Unpack;
+
   // We implement this with SHUFPS because it can blend from two vectors.
   // Because we're going to eventually use SHUFPS, we use SHUFPS even to build
   // up the inputs, bypassing domain shift penalties that we would encur if we
@@ -8658,10 +7959,18 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
 /// The exact breakdown of how to form these dword pairs and align them on the
 /// correct sides is really tricky. See the comments within the function for
 /// more of the details.
-static SDValue lowerV8I16SingleInputVectorShuffle(
-    SDLoc DL, SDValue V, MutableArrayRef<int> Mask,
+///
+/// This code also handles repeated 128-bit lanes of v8i16 shuffles, but each
+/// lane must shuffle the *exact* same way. In fact, you must pass a v8 Mask to
+/// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16
+/// vector, form the analogous 128-bit 8-element Mask.
+static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
+    SDLoc DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
     const X86Subtarget *Subtarget, SelectionDAG &DAG) {
-  assert(V.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
+  assert(VT.getScalarType() == MVT::i16 && "Bad input type!");
+  MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
+
+  assert(Mask.size() == 8 && "Shuffle mask length doen't match!");
   MutableArrayRef<int> LoMask = Mask.slice(0, 4);
   MutableArrayRef<int> HiMask = Mask.slice(4, 4);
 
@@ -8686,27 +7995,6 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
   MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
   MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);
 
-  // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v8i16, DL, V,
-                                                        Mask, Subtarget, DAG))
-    return Broadcast;
-
-  // Try to use byte shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsByteShift(
-          DL, MVT::v8i16, V, V, Mask, DAG))
-    return Shift;
-
-  // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(Mask, 0, 0, 1, 1, 2, 2, 3, 3))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V, V);
-  if (isShuffleEquivalent(Mask, 4, 4, 5, 5, 6, 6, 7, 7))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V, V);
-
-  // Try to use byte rotation instructions.
-  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
-          DL, MVT::v8i16, V, V, Mask, Subtarget, DAG))
-    return Rotate;
-
   // Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
   // such inputs we can swap two of the dwords across the half mark and end up
   // with <=2 inputs to each half in each half. Once there, we can fall through
@@ -8811,7 +8099,7 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
           std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]);
           V = DAG.getNode(FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL,
                           MVT::v8i16, V,
-                          getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DAG));
+                          getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG));
 
           for (int &M : Mask)
             if (M != -1 && M == FixIdx)
@@ -8835,10 +8123,11 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
     int PSHUFDMask[] = {0, 1, 2, 3};
     PSHUFDMask[ADWord] = BDWord;
     PSHUFDMask[BDWord] = ADWord;
-    V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
-                    DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
-                                DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
-                                getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
+    V = DAG.getNode(ISD::BITCAST, DL, VT,
+                    DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT,
+                                DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V),
+                                getV4X86ShuffleImm8ForMask(PSHUFDMask, DL,
+                                                           DAG)));
 
     // Adjust the mask to match the new locations of A and B.
     for (int &M : Mask)
@@ -8849,8 +8138,8 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
 
     // Recurse back into this routine to re-compute state now that this isn't
     // a 3 and 1 problem.
-    return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
-                                Mask);
+    return lowerV8I16GeneralSingleInputVectorShuffle(DL, VT, V, Mask, Subtarget,
+                                                     DAG);
   };
   if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
     return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
@@ -9073,16 +8362,17 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
   // Now enact all the shuffles we've computed to move the inputs into their
   // target half.
   if (!isNoopShuffleMask(PSHUFLMask))
-    V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
-                    getV4X86ShuffleImm8ForMask(PSHUFLMask, DAG));
+    V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
+                    getV4X86ShuffleImm8ForMask(PSHUFLMask, DL, DAG));
   if (!isNoopShuffleMask(PSHUFHMask))
-    V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
-                    getV4X86ShuffleImm8ForMask(PSHUFHMask, DAG));
+    V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
+                    getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG));
   if (!isNoopShuffleMask(PSHUFDMask))
-    V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
-                    DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
-                                DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
-                                getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
+    V = DAG.getNode(ISD::BITCAST, DL, VT,
+                    DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT,
+                                DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V),
+                                getV4X86ShuffleImm8ForMask(PSHUFDMask, DL,
+                                                           DAG)));
 
   // At this point, each half should contain all its inputs, and we can then
   // just shuffle them into their final position.
@@ -9095,172 +8385,70 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
 
   // Do a half shuffle for the low mask.
   if (!isNoopShuffleMask(LoMask))
-    V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
-                    getV4X86ShuffleImm8ForMask(LoMask, DAG));
+    V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
+                    getV4X86ShuffleImm8ForMask(LoMask, DL, DAG));
 
   // Do a half shuffle with the high mask after shifting its values down.
   for (int &M : HiMask)
     if (M >= 0)
       M -= 4;
   if (!isNoopShuffleMask(HiMask))
-    V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
-                    getV4X86ShuffleImm8ForMask(HiMask, DAG));
+    V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
+                    getV4X86ShuffleImm8ForMask(HiMask, DL, DAG));
 
   return V;
 }
 
-/// \brief Detect whether the mask pattern should be lowered through
-/// interleaving.
-///
-/// This essentially tests whether viewing the mask as an interleaving of two
-/// sub-sequences reduces the cross-input traffic of a blend operation. If so,
-/// lowering it through interleaving is a significantly better strategy.
-static bool shouldLowerAsInterleaving(ArrayRef<int> Mask) {
-  int NumEvenInputs[2] = {0, 0};
-  int NumOddInputs[2] = {0, 0};
-  int NumLoInputs[2] = {0, 0};
-  int NumHiInputs[2] = {0, 0};
-  for (int i = 0, Size = Mask.size(); i < Size; ++i) {
-    if (Mask[i] < 0)
-      continue;
-
-    int InputIdx = Mask[i] >= Size;
-
-    if (i < Size / 2)
-      ++NumLoInputs[InputIdx];
-    else
-      ++NumHiInputs[InputIdx];
-
-    if ((i % 2) == 0)
-      ++NumEvenInputs[InputIdx];
-    else
-      ++NumOddInputs[InputIdx];
-  }
-
-  // The minimum number of cross-input results for both the interleaved and
-  // split cases. If interleaving results in fewer cross-input results, return
-  // true.
-  int InterleavedCrosses = std::min(NumEvenInputs[1] + NumOddInputs[0],
-                                    NumEvenInputs[0] + NumOddInputs[1]);
-  int SplitCrosses = std::min(NumLoInputs[1] + NumHiInputs[0],
-                              NumLoInputs[0] + NumHiInputs[1]);
-  return InterleavedCrosses < SplitCrosses;
-}
-
-/// \brief Blend two v8i16 vectors using a naive unpack strategy.
-///
-/// This strategy only works when the inputs from each vector fit into a single
-/// half of that vector, and generally there are not so many inputs as to leave
-/// the in-place shuffles required highly constrained (and thus expensive). It
-/// shifts all the inputs into a single side of both input vectors and then
-/// uses an unpack to interleave these inputs in a single vector. At that
-/// point, we will fall back on the generic single input shuffle lowering.
-static SDValue lowerV8I16BasicBlendVectorShuffle(SDLoc DL, SDValue V1,
-                                                 SDValue V2,
-                                                 MutableArrayRef<int> Mask,
-                                                 const X86Subtarget *Subtarget,
-                                                 SelectionDAG &DAG) {
-  assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
-  assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
-  SmallVector<int, 3> LoV1Inputs, HiV1Inputs, LoV2Inputs, HiV2Inputs;
-  for (int i = 0; i < 8; ++i)
-    if (Mask[i] >= 0 && Mask[i] < 4)
-      LoV1Inputs.push_back(i);
-    else if (Mask[i] >= 4 && Mask[i] < 8)
-      HiV1Inputs.push_back(i);
-    else if (Mask[i] >= 8 && Mask[i] < 12)
-      LoV2Inputs.push_back(i);
-    else if (Mask[i] >= 12)
-      HiV2Inputs.push_back(i);
-
-  int NumV1Inputs = LoV1Inputs.size() + HiV1Inputs.size();
-  int NumV2Inputs = LoV2Inputs.size() + HiV2Inputs.size();
-  (void)NumV1Inputs;
-  (void)NumV2Inputs;
-  assert(NumV1Inputs > 0 && NumV1Inputs <= 3 && "At most 3 inputs supported");
-  assert(NumV2Inputs > 0 && NumV2Inputs <= 3 && "At most 3 inputs supported");
-  assert(NumV1Inputs + NumV2Inputs <= 4 && "At most 4 combined inputs");
-
-  bool MergeFromLo = LoV1Inputs.size() + LoV2Inputs.size() >=
-                     HiV1Inputs.size() + HiV2Inputs.size();
-
-  auto moveInputsToHalf = [&](SDValue V, ArrayRef<int> LoInputs,
-                              ArrayRef<int> HiInputs, bool MoveToLo,
-                              int MaskOffset) {
-    ArrayRef<int> GoodInputs = MoveToLo ? LoInputs : HiInputs;
-    ArrayRef<int> BadInputs = MoveToLo ? HiInputs : LoInputs;
-    if (BadInputs.empty())
-      return V;
-
-    int MoveMask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
-    int MoveOffset = MoveToLo ? 0 : 4;
+/// \brief Helper to form a PSHUFB-based shuffle+blend.
+static SDValue lowerVectorShuffleAsPSHUFB(SDLoc DL, MVT VT, SDValue V1,
+                                          SDValue V2, ArrayRef<int> Mask,
+                                          SelectionDAG &DAG, bool &V1InUse,
+                                          bool &V2InUse) {
+  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+  SDValue V1Mask[16];
+  SDValue V2Mask[16];
+  V1InUse = false;
+  V2InUse = false;
 
-    if (GoodInputs.empty()) {
-      for (int BadInput : BadInputs) {
-        MoveMask[Mask[BadInput] % 4 + MoveOffset] = Mask[BadInput] - MaskOffset;
-        Mask[BadInput] = Mask[BadInput] % 4 + MoveOffset + MaskOffset;
-      }
+  int Size = Mask.size();
+  int Scale = 16 / Size;
+  for (int i = 0; i < 16; ++i) {
+    if (Mask[i / Scale] == -1) {
+      V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
     } else {
-      if (GoodInputs.size() == 2) {
-        // If the low inputs are spread across two dwords, pack them into
-        // a single dword.
-        MoveMask[MoveOffset] = Mask[GoodInputs[0]] - MaskOffset;
-        MoveMask[MoveOffset + 1] = Mask[GoodInputs[1]] - MaskOffset;
-        Mask[GoodInputs[0]] = MoveOffset + MaskOffset;
-        Mask[GoodInputs[1]] = MoveOffset + 1 + MaskOffset;
-      } else {
-        // Otherwise pin the good inputs.
-        for (int GoodInput : GoodInputs)
-          MoveMask[Mask[GoodInput] - MaskOffset] = Mask[GoodInput] - MaskOffset;
-      }
-
-      if (BadInputs.size() == 2) {
-        // If we have two bad inputs then there may be either one or two good
-        // inputs fixed in place. Find a fixed input, and then find the *other*
-        // two adjacent indices by using modular arithmetic.
-        int GoodMaskIdx =
-            std::find_if(std::begin(MoveMask) + MoveOffset, std::end(MoveMask),
-                         [](int M) { return M >= 0; }) -
-            std::begin(MoveMask);
-        int MoveMaskIdx =
-            ((((GoodMaskIdx - MoveOffset) & ~1) + 2) % 4) + MoveOffset;
-        assert(MoveMask[MoveMaskIdx] == -1 && "Expected empty slot");
-        assert(MoveMask[MoveMaskIdx + 1] == -1 && "Expected empty slot");
-        MoveMask[MoveMaskIdx] = Mask[BadInputs[0]] - MaskOffset;
-        MoveMask[MoveMaskIdx + 1] = Mask[BadInputs[1]] - MaskOffset;
-        Mask[BadInputs[0]] = MoveMaskIdx + MaskOffset;
-        Mask[BadInputs[1]] = MoveMaskIdx + 1 + MaskOffset;
-      } else {
-        assert(BadInputs.size() == 1 && "All sizes handled");
-        int MoveMaskIdx = std::find(std::begin(MoveMask) + MoveOffset,
-                                    std::end(MoveMask), -1) -
-                          std::begin(MoveMask);
-        MoveMask[MoveMaskIdx] = Mask[BadInputs[0]] - MaskOffset;
-        Mask[BadInputs[0]] = MoveMaskIdx + MaskOffset;
-      }
-    }
-
-    return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
-                                MoveMask);
-  };
-  V1 = moveInputsToHalf(V1, LoV1Inputs, HiV1Inputs, MergeFromLo,
-                        /*MaskOffset*/ 0);
-  V2 = moveInputsToHalf(V2, LoV2Inputs, HiV2Inputs, MergeFromLo,
-                        /*MaskOffset*/ 8);
-
-  // FIXME: Select an interleaving of the merge of V1 and V2 that minimizes
-  // cross-half traffic in the final shuffle.
-
-  // Munge the mask to be a single-input mask after the unpack merges the
-  // results.
-  for (int &M : Mask)
-    if (M != -1)
-      M = 2 * (M % 4) + (M / 8);
+      const int ZeroMask = 0x80;
+      int V1Idx = Mask[i / Scale] < Size ? Mask[i / Scale] * Scale + i % Scale
+                                          : ZeroMask;
+      int V2Idx = Mask[i / Scale] < Size
+                      ? ZeroMask
+                      : (Mask[i / Scale] - Size) * Scale + i % Scale;
+      if (Zeroable[i / Scale])
+        V1Idx = V2Idx = ZeroMask;
+      V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8);
+      V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8);
+      V1InUse |= (ZeroMask != V1Idx);
+      V2InUse |= (ZeroMask != V2Idx);
+    }
+  }
+
+  if (V1InUse)
+    V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
+                     DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, V1),
+                     DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
+  if (V2InUse)
+    V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
+                     DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, V2),
+                     DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
+
+  // If we need shuffled inputs from both, blend the two.
+  SDValue V;
+  if (V1InUse && V2InUse)
+    V = DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
+  else
+    V = V1InUse ? V1 : V2;
 
-  return DAG.getVectorShuffle(
-      MVT::v8i16, DL, DAG.getNode(MergeFromLo ? X86ISD::UNPCKL : X86ISD::UNPCKH,
-                                  DL, MVT::v8i16, V1, V2),
-      DAG.getUNDEF(MVT::v8i16), Mask);
+  // Cast the result back to the correct type.
+  return DAG.getNode(ISD::BITCAST, DL, VT, V);
 }
 
 /// \brief Generic lowering of 8-lane i16 shuffles.
@@ -9297,85 +8485,95 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     return ZExt;
 
   auto isV1 = [](int M) { return M >= 0 && M < 8; };
+  (void)isV1;
   auto isV2 = [](int M) { return M >= 8; };
 
-  int NumV1Inputs = std::count_if(Mask.begin(), Mask.end(), isV1);
   int NumV2Inputs = std::count_if(Mask.begin(), Mask.end(), isV2);
 
-  if (NumV2Inputs == 0)
-    return lowerV8I16SingleInputVectorShuffle(DL, V1, Mask, Subtarget, DAG);
+  if (NumV2Inputs == 0) {
+    // Check for being able to broadcast a single element.
+    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8i16, V1,
+                                                          Mask, Subtarget, DAG))
+      return Broadcast;
 
-  assert(NumV1Inputs > 0 && "All single-input shuffles should be canonicalized "
-                            "to be V1-input shuffles.");
+    // Try to use shift instructions.
+    if (SDValue Shift =
+            lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask, DAG))
+      return Shift;
+
+    // Use dedicated unpack instructions for masks that match their pattern.
+    if (isShuffleEquivalent(V1, V1, Mask, {0, 0, 1, 1, 2, 2, 3, 3}))
+      return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V1);
+    if (isShuffleEquivalent(V1, V1, Mask, {4, 4, 5, 5, 6, 6, 7, 7}))
+      return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V1);
 
-  // Try to use byte shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsByteShift(
-          DL, MVT::v8i16, V1, V2, Mask, DAG))
+    // Try to use byte rotation instructions.
+    if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V1,
+                                                        Mask, Subtarget, DAG))
+      return Rotate;
+
+    return lowerV8I16GeneralSingleInputVectorShuffle(DL, MVT::v8i16, V1, Mask,
+                                                     Subtarget, DAG);
+  }
+
+  assert(std::any_of(Mask.begin(), Mask.end(), isV1) &&
+         "All single-input shuffles should be canonicalized to be V1-input "
+         "shuffles.");
+
+  // Try to use shift instructions.
+  if (SDValue Shift =
+          lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, DAG))
     return Shift;
 
   // There are special ways we can lower some single-element blends.
   if (NumV2Inputs == 1)
-    if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v8i16, DL, V1, V2,
+    if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v8i16, V1, V2,
                                                          Mask, Subtarget, DAG))
       return V;
 
-  // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 2, 10, 3, 11))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2);
-  if (isShuffleEquivalent(Mask, 4, 12, 5, 13, 6, 14, 7, 15))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);
-
-  if (Subtarget->hasSSE41())
+  // We have different paths for blend lowering, but they all must use the
+  // *exact* same predicate.
+  bool IsBlendSupported = Subtarget->hasSSE41();
+  if (IsBlendSupported)
     if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
                                                   Subtarget, DAG))
       return Blend;
 
+  if (SDValue Masked =
+          lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask, DAG))
+    return Masked;
+
+  // Use dedicated unpack instructions for masks that match their pattern.
+  if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 2, 10, 3, 11}))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2);
+  if (isShuffleEquivalent(V1, V2, Mask, {4, 12, 5, 13, 6, 14, 7, 15}))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);
+
   // Try to use byte rotation instructions.
   if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
           DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
     return Rotate;
 
-  if (NumV1Inputs + NumV2Inputs <= 4)
-    return lowerV8I16BasicBlendVectorShuffle(DL, V1, V2, Mask, Subtarget, DAG);
-
-  // Check whether an interleaving lowering is likely to be more efficient.
-  // This isn't perfect but it is a strong heuristic that tends to work well on
-  // the kinds of shuffles that show up in practice.
-  //
-  // FIXME: Handle 1x, 2x, and 4x interleaving.
-  if (shouldLowerAsInterleaving(Mask)) {
-    // FIXME: Figure out whether we should pack these into the low or high
-    // halves.
-
-    int EMask[8], OMask[8];
-    for (int i = 0; i < 4; ++i) {
-      EMask[i] = Mask[2*i];
-      OMask[i] = Mask[2*i + 1];
-      EMask[i + 4] = -1;
-      OMask[i + 4] = -1;
-    }
+  if (SDValue BitBlend =
+          lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
+    return BitBlend;
 
-    SDValue Evens = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, EMask);
-    SDValue Odds = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, OMask);
+  if (SDValue Unpack =
+          lowerVectorShuffleAsUnpack(DL, MVT::v8i16, V1, V2, Mask, DAG))
+    return Unpack;
 
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, Evens, Odds);
+  // If we can't directly blend but can use PSHUFB, that will be better as it
+  // can both shuffle and set up the inefficient blend.
+  if (!IsBlendSupported && Subtarget->hasSSSE3()) {
+    bool V1InUse, V2InUse;
+    return lowerVectorShuffleAsPSHUFB(DL, MVT::v8i16, V1, V2, Mask, DAG,
+                                      V1InUse, V2InUse);
   }
 
-  int LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
-  int HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
-
-  for (int i = 0; i < 4; ++i) {
-    LoBlendMask[i] = Mask[i];
-    HiBlendMask[i] = Mask[i + 4];
-  }
-
-  SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask);
-  SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask);
-  LoV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, LoV);
-  HiV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, HiV);
-
-  return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
-                     DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, LoV, HiV));
+  // We can always bit-blend if we have to so the fallback strategy is to
+  // decompose into single-input permutes and blends.
+  return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
+                                                      Mask, DAG);
 }
 
 /// \brief Check whether a compaction lowering can be done by dropping even
@@ -9461,40 +8659,31 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
-  ArrayRef<int> OrigMask = SVOp->getMask();
-  assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+  ArrayRef<int> Mask = SVOp->getMask();
+  assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
 
-  // Try to use byte shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsByteShift(
-          DL, MVT::v16i8, V1, V2, OrigMask, DAG))
+  // Try to use shift instructions.
+  if (SDValue Shift =
+          lowerVectorShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask, DAG))
     return Shift;
 
   // Try to use byte rotation instructions.
   if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
-          DL, MVT::v16i8, V1, V2, OrigMask, Subtarget, DAG))
+          DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
     return Rotate;
 
   // Try to use a zext lowering.
   if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
-          DL, MVT::v16i8, V1, V2, OrigMask, Subtarget, DAG))
+          DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
     return ZExt;
 
-  int MaskStorage[16] = {
-      OrigMask[0],  OrigMask[1],  OrigMask[2],  OrigMask[3],
-      OrigMask[4],  OrigMask[5],  OrigMask[6],  OrigMask[7],
-      OrigMask[8],  OrigMask[9],  OrigMask[10], OrigMask[11],
-      OrigMask[12], OrigMask[13], OrigMask[14], OrigMask[15]};
-  MutableArrayRef<int> Mask(MaskStorage);
-  MutableArrayRef<int> LoMask = Mask.slice(0, 8);
-  MutableArrayRef<int> HiMask = Mask.slice(8, 8);
-
   int NumV2Elements =
       std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 16; });
 
   // For single-input shuffles, there are some nicer lowering tricks we can use.
   if (NumV2Elements == 0) {
     // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v16i8, DL, V1,
+    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v16i8, V1,
                                                           Mask, Subtarget, DAG))
       return Broadcast;
 
@@ -9591,36 +8780,17 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
       return V;
   }
 
-  // Check whether an interleaving lowering is likely to be more efficient.
-  // This isn't perfect but it is a strong heuristic that tends to work well on
-  // the kinds of shuffles that show up in practice.
-  //
-  // FIXME: We need to handle other interleaving widths (i16, i32, ...).
-  if (shouldLowerAsInterleaving(Mask)) {
-    int NumLoHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
-      return (M >= 0 && M < 8) || (M >= 16 && M < 24);
-    });
-    int NumHiHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
-      return (M >= 8 && M < 16) || M >= 24;
-    });
-    int EMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
-                     -1, -1, -1, -1, -1, -1, -1, -1};
-    int OMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
-                     -1, -1, -1, -1, -1, -1, -1, -1};
-    bool UnpackLo = NumLoHalf >= NumHiHalf;
-    MutableArrayRef<int> TargetEMask(UnpackLo ? EMask : EMask + 8, 8);
-    MutableArrayRef<int> TargetOMask(UnpackLo ? OMask : OMask + 8, 8);
-    for (int i = 0; i < 8; ++i) {
-      TargetEMask[i] = Mask[2 * i];
-      TargetOMask[i] = Mask[2 * i + 1];
-    }
-
-    SDValue Evens = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, EMask);
-    SDValue Odds = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, OMask);
-
-    return DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
-                       MVT::v16i8, Evens, Odds);
-  }
+  // Use dedicated unpack instructions for masks that match their pattern.
+  if (isShuffleEquivalent(V1, V2, Mask, {// Low half.
+                                         0, 16, 1, 17, 2, 18, 3, 19,
+                                         // High half.
+                                         4, 20, 5, 21, 6, 22, 7, 23}))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V1, V2);
+  if (isShuffleEquivalent(V1, V2, Mask, {// Low half.
+                                         8, 24, 9, 25, 10, 26, 11, 27,
+                                         // High half.
+                                         12, 28, 13, 29, 14, 30, 15, 31}))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V1, V2);
 
   // Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
   // with PSHUFB. It is important to do this before we attempt to generate any
@@ -9636,52 +8806,47 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   // interleavings with direct instructions supporting them. We currently don't
   // handle those well here.
   if (Subtarget->hasSSSE3()) {
-    SDValue V1Mask[16];
-    SDValue V2Mask[16];
     bool V1InUse = false;
     bool V2InUse = false;
-    SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
 
-    for (int i = 0; i < 16; ++i) {
-      if (Mask[i] == -1) {
-        V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
-      } else {
-        const int ZeroMask = 0x80;
-        int V1Idx = (Mask[i] < 16 ? Mask[i] : ZeroMask);
-        int V2Idx = (Mask[i] < 16 ? ZeroMask : Mask[i] - 16);
-        if (Zeroable[i])
-          V1Idx = V2Idx = ZeroMask;
-        V1Mask[i] = DAG.getConstant(V1Idx, MVT::i8);
-        V2Mask[i] = DAG.getConstant(V2Idx, MVT::i8);
-        V1InUse |= (ZeroMask != V1Idx);
-        V2InUse |= (ZeroMask != V2Idx);
-      }
-    }
+    SDValue PSHUFB = lowerVectorShuffleAsPSHUFB(DL, MVT::v16i8, V1, V2, Mask,
+                                                DAG, V1InUse, V2InUse);
 
-    if (V1InUse)
-      V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V1,
-                       DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
-    if (V2InUse)
-      V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V2,
-                       DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
+    // If both V1 and V2 are in use and we can use a direct blend or an unpack,
+    // do so. This avoids using them to handle blends-with-zero which is
+    // important as a single pshufb is significantly faster for that.
+    if (V1InUse && V2InUse) {
+      if (Subtarget->hasSSE41())
+        if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i8, V1, V2,
+                                                      Mask, Subtarget, DAG))
+          return Blend;
 
-    // If we need shuffled inputs from both, blend the two.
-    if (V1InUse && V2InUse)
-      return DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
-    if (V1InUse)
-      return V1; // Single inputs are easy.
-    if (V2InUse)
-      return V2; // Single inputs are easy.
-    // Shuffling to a zeroable vector.
-    return getZeroVector(MVT::v16i8, Subtarget, DAG, DL);
+      // We can use an unpack to do the blending rather than an or in some
+      // cases. Even though the or may be (very minorly) more efficient, we
+      // preference this lowering because there are common cases where part of
+      // the complexity of the shuffles goes away when we do the final blend as
+      // an unpack.
+      // FIXME: It might be worth trying to detect if the unpack-feeding
+      // shuffles will both be pshufb, in which case we shouldn't bother with
+      // this.
+      if (SDValue Unpack =
+              lowerVectorShuffleAsUnpack(DL, MVT::v16i8, V1, V2, Mask, DAG))
+        return Unpack;
+    }
+
+    return PSHUFB;
   }
 
   // There are special ways we can lower some single-element blends.
   if (NumV2Elements == 1)
-    if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v16i8, DL, V1, V2,
+    if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v16i8, V1, V2,
                                                          Mask, Subtarget, DAG))
       return V;
 
+  if (SDValue BitBlend =
+          lowerVectorShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
+    return BitBlend;
+
   // Check whether a compaction lowering can be done. This handles shuffles
   // which take every Nth element for some even N. See the helper function for
   // details.
@@ -9703,7 +8868,7 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 };
     SDValue ByteClearMask =
         DAG.getNode(ISD::BITCAST, DL, MVT::v16i8,
-                    DAG.getConstant(0xFF, MaskVTs[NumEvenDrops - 1]));
+                    DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1]));
     V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask);
     if (!IsSingleInput)
       V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask);
@@ -9720,72 +8885,58 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     return Result;
   }
 
-  int V1LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
-  int V1HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
-  int V2LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
-  int V2HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+  // Handle multi-input cases by blending single-input shuffles.
+  if (NumV2Elements > 0)
+    return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2,
+                                                      Mask, DAG);
 
-  auto buildBlendMasks = [](MutableArrayRef<int> HalfMask,
-                            MutableArrayRef<int> V1HalfBlendMask,
-                            MutableArrayRef<int> V2HalfBlendMask) {
-    for (int i = 0; i < 8; ++i)
-      if (HalfMask[i] >= 0 && HalfMask[i] < 16) {
-        V1HalfBlendMask[i] = HalfMask[i];
-        HalfMask[i] = i;
-      } else if (HalfMask[i] >= 16) {
-        V2HalfBlendMask[i] = HalfMask[i] - 16;
-        HalfMask[i] = i + 8;
-      }
-  };
-  buildBlendMasks(LoMask, V1LoBlendMask, V2LoBlendMask);
-  buildBlendMasks(HiMask, V1HiBlendMask, V2HiBlendMask);
+  // The fallback path for single-input shuffles widens this into two v8i16
+  // vectors with unpacks, shuffles those, and then pulls them back together
+  // with a pack.
+  SDValue V = V1;
 
-  SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL);
+  int LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+  int HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+  for (int i = 0; i < 16; ++i)
+    if (Mask[i] >= 0)
+      (i < 8 ? LoBlendMask[i] : HiBlendMask[i % 8]) = Mask[i];
 
-  auto buildLoAndHiV8s = [&](SDValue V, MutableArrayRef<int> LoBlendMask,
-                             MutableArrayRef<int> HiBlendMask) {
-    SDValue V1, V2;
-    // Check if any of the odd lanes in the v16i8 are used. If not, we can mask
-    // them out and avoid using UNPCK{L,H} to extract the elements of V as
-    // i16s.
-    if (std::none_of(LoBlendMask.begin(), LoBlendMask.end(),
-                     [](int M) { return M >= 0 && M % 2 == 1; }) &&
-        std::none_of(HiBlendMask.begin(), HiBlendMask.end(),
-                     [](int M) { return M >= 0 && M % 2 == 1; })) {
-      // Use a mask to drop the high bytes.
-      V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
-      V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, V1,
-                       DAG.getConstant(0x00FF, MVT::v8i16));
-
-      // This will be a single vector shuffle instead of a blend so nuke V2.
-      V2 = DAG.getUNDEF(MVT::v8i16);
-
-      // Squash the masks to point directly into V1.
-      for (int &M : LoBlendMask)
-        if (M >= 0)
-          M /= 2;
-      for (int &M : HiBlendMask)
-        if (M >= 0)
-          M /= 2;
-    } else {
-      // Otherwise just unpack the low half of V into V1 and the high half into
-      // V2 so that we can blend them as i16s.
-      V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
-                       DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
-      V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
-                       DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
-    }
+  SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL);
 
-    SDValue BlendedLo = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask);
-    SDValue BlendedHi = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask);
-    return std::make_pair(BlendedLo, BlendedHi);
-  };
-  SDValue V1Lo, V1Hi, V2Lo, V2Hi;
-  std::tie(V1Lo, V1Hi) = buildLoAndHiV8s(V1, V1LoBlendMask, V1HiBlendMask);
-  std::tie(V2Lo, V2Hi) = buildLoAndHiV8s(V2, V2LoBlendMask, V2HiBlendMask);
+  SDValue VLoHalf, VHiHalf;
+  // Check if any of the odd lanes in the v16i8 are used. If not, we can mask
+  // them out and avoid using UNPCK{L,H} to extract the elements of V as
+  // i16s.
+  if (std::none_of(std::begin(LoBlendMask), std::end(LoBlendMask),
+                   [](int M) { return M >= 0 && M % 2 == 1; }) &&
+      std::none_of(std::begin(HiBlendMask), std::end(HiBlendMask),
+                   [](int M) { return M >= 0 && M % 2 == 1; })) {
+    // Use a mask to drop the high bytes.
+    VLoHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
+    VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf,
+                     DAG.getConstant(0x00FF, DL, MVT::v8i16));
+
+    // This will be a single vector shuffle instead of a blend so nuke VHiHalf.
+    VHiHalf = DAG.getUNDEF(MVT::v8i16);
+
+    // Squash the masks to point directly into VLoHalf.
+    for (int &M : LoBlendMask)
+      if (M >= 0)
+        M /= 2;
+    for (int &M : HiBlendMask)
+      if (M >= 0)
+        M /= 2;
+  } else {
+    // Otherwise just unpack the low half of V into VLoHalf and the high half into
+    // VHiHalf so that we can blend them as i16s.
+    VLoHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+                     DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
+    VHiHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+                     DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
+  }
 
-  SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Lo, V2Lo, LoMask);
-  SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Hi, V2Hi, HiMask);
+  SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask);
+  SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask);
 
   return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV);
 }
@@ -9871,7 +9022,7 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask,
   return true;
 }
 
-/// \brief Generic routine to split ector shuffle into half-sized shuffles.
+/// \brief Generic routine to split vector shuffle into half-sized shuffles.
 ///
 /// This routine just extracts two subvectors, shuffles them independently, and
 /// then concatenates them back together. This should work effectively with all
@@ -9892,14 +9043,43 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1,
   MVT ScalarVT = VT.getScalarType();
   MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2);
 
-  SDValue LoV1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V1,
-                             DAG.getIntPtrConstant(0));
-  SDValue HiV1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V1,
-                             DAG.getIntPtrConstant(SplitNumElements));
-  SDValue LoV2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V2,
-                             DAG.getIntPtrConstant(0));
-  SDValue HiV2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V2,
-                             DAG.getIntPtrConstant(SplitNumElements));
+  // Rather than splitting build-vectors, just build two narrower build
+  // vectors. This helps shuffling with splats and zeros.
+  auto SplitVector = [&](SDValue V) {
+    while (V.getOpcode() == ISD::BITCAST)
+      V = V->getOperand(0);
+
+    MVT OrigVT = V.getSimpleValueType();
+    int OrigNumElements = OrigVT.getVectorNumElements();
+    int OrigSplitNumElements = OrigNumElements / 2;
+    MVT OrigScalarVT = OrigVT.getScalarType();
+    MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2);
+
+    SDValue LoV, HiV;
+
+    auto *BV = dyn_cast<BuildVectorSDNode>(V);
+    if (!BV) {
+      LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V,
+                        DAG.getIntPtrConstant(0, DL));
+      HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V,
+                        DAG.getIntPtrConstant(OrigSplitNumElements, DL));
+    } else {
+
+      SmallVector<SDValue, 16> LoOps, HiOps;
+      for (int i = 0; i < OrigSplitNumElements; ++i) {
+        LoOps.push_back(BV->getOperand(i));
+        HiOps.push_back(BV->getOperand(i + OrigSplitNumElements));
+      }
+      LoV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, LoOps);
+      HiV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, HiOps);
+    }
+    return std::make_pair(DAG.getNode(ISD::BITCAST, DL, SplitVT, LoV),
+                          DAG.getNode(ISD::BITCAST, DL, SplitVT, HiV));
+  };
+
+  SDValue LoV1, HiV1, LoV2, HiV2;
+  std::tie(LoV1, HiV1) = SplitVector(V1);
+  std::tie(LoV2, HiV2) = SplitVector(V2);
 
   // Now create two 4-way blends of these half-width vectors.
   auto HalfBlend = [&](ArrayRef<int> HalfMask) {
@@ -10046,7 +9226,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT,
   int LaneSize = Mask.size() / 2;
 
   // If there are only inputs from one 128-bit lane, splitting will in fact be
-  // less expensive. The flags track wether the given lane contains an element
+  // less expensive. The flags track whether the given lane contains an element
   // that crosses to another lane.
   bool LaneCrossing[2] = {false, false};
   for (int i = 0, Size = Mask.size(); i < Size; ++i)
@@ -10071,7 +9251,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT,
     // allow folding it into a memory operand.
     unsigned PERMMask = 3 | 2 << 4;
     SDValue Flipped = DAG.getNode(X86ISD::VPERM2X128, DL, VT, DAG.getUNDEF(VT),
-                                  V1, DAG.getConstant(PERMMask, MVT::i8));
+                                  V1, DAG.getConstant(PERMMask, DL, MVT::i8));
     return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask);
   }
 
@@ -10086,33 +9266,49 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
                                         SDValue V2, ArrayRef<int> Mask,
                                         const X86Subtarget *Subtarget,
                                         SelectionDAG &DAG) {
+  // TODO: If minimizing size and one of the inputs is a zero vector and the
+  // the zero vector has only one use, we could use a VPERM2X128 to save the
+  // instruction bytes needed to explicitly generate the zero vector.
+
   // Blends are faster and handle all the non-lane-crossing cases.
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
                                                 Subtarget, DAG))
     return Blend;
 
-  MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
-                               VT.getVectorNumElements() / 2);
-  // Check for patterns which can be matched with a single insert of a 128-bit
-  // subvector.
-  bool OnlyUsesV1 = isShuffleEquivalent(Mask, 0, 1, 0, 1);
-  if (OnlyUsesV1 || isShuffleEquivalent(Mask, 0, 1, 4, 5)) {
-    SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
-                              DAG.getIntPtrConstant(0));
-    SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
-                              OnlyUsesV1 ? V1 : V2, DAG.getIntPtrConstant(0));
-    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
-  }
-  if (isShuffleEquivalent(Mask, 0, 1, 6, 7)) {
-    SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
-                              DAG.getIntPtrConstant(0));
-    SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
-                              DAG.getIntPtrConstant(2));
-    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
-  }
-
-  // Otherwise form a 128-bit permutation.
-  // FIXME: Detect zero-vector inputs and use the VPERM2X128 to zero that half.
+  bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode());
+  bool IsV2Zero = ISD::isBuildVectorAllZeros(V2.getNode());
+
+  // If either input operand is a zero vector, use VPERM2X128 because its mask
+  // allows us to replace the zero input with an implicit zero.
+  if (!IsV1Zero && !IsV2Zero) {
+    // Check for patterns which can be matched with a single insert of a 128-bit
+    // subvector.
+    bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1});
+    if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) {
+      MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
+                                   VT.getVectorNumElements() / 2);
+      SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
+                                DAG.getIntPtrConstant(0, DL));
+      SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
+                                OnlyUsesV1 ? V1 : V2,
+                                DAG.getIntPtrConstant(0, DL));
+      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+    }
+  }
+
+  // Otherwise form a 128-bit permutation. After accounting for undefs,
+  // convert the 64-bit shuffle mask selection values into 128-bit
+  // selection bits by dividing the indexes by 2 and shifting into positions
+  // defined by a vperm2*128 instruction's immediate control byte.
+
+  // The immediate permute control byte looks like this:
+  //    [1:0] - select 128 bits from sources for low half of destination
+  //    [2]   - ignore
+  //    [3]   - zero low half of destination
+  //    [5:4] - select 128 bits from sources for high half of destination
+  //    [6]   - ignore
+  //    [7]   - zero high half of destination
+
   int MaskLO = Mask[0];
   if (MaskLO == SM_SentinelUndef)
     MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1];
@@ -10122,8 +9318,29 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
     MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3];
 
   unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4;
+
+  // If either input is a zero vector, replace it with an undef input.
+  // Shuffle mask values <  4 are selecting elements of V1.
+  // Shuffle mask values >= 4 are selecting elements of V2.
+  // Adjust each half of the permute mask by clearing the half that was
+  // selecting the zero vector and setting the zero mask bit.
+  if (IsV1Zero) {
+    V1 = DAG.getUNDEF(VT);
+    if (MaskLO < 4)
+      PermMask = (PermMask & 0xf0) | 0x08;
+    if (MaskHI < 4)
+      PermMask = (PermMask & 0x0f) | 0x80;
+  }
+  if (IsV2Zero) {
+    V2 = DAG.getUNDEF(VT);
+    if (MaskLO >= 4)
+      PermMask = (PermMask & 0xf0) | 0x08;
+    if (MaskHI >= 4)
+      PermMask = (PermMask & 0x0f) | 0x80;
+  }
+
   return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
-                     DAG.getConstant(PermMask, MVT::i8));
+                     DAG.getConstant(PermMask, DL, MVT::i8));
 }
 
 /// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then
@@ -10245,23 +9462,27 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
 
   if (isSingleInputShuffleMask(Mask)) {
     // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4f64, DL, V1,
+    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4f64, V1,
                                                           Mask, Subtarget, DAG))
       return Broadcast;
 
+    // Use low duplicate instructions for masks that match their pattern.
+    if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2}))
+      return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1);
+
     if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) {
       // Non-half-crossing single input shuffles can be lowerid with an
       // interleaved permutation.
       unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
                               ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3);
       return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1,
-                         DAG.getConstant(VPERMILPMask, MVT::i8));
+                         DAG.getConstant(VPERMILPMask, DL, MVT::i8));
     }
 
     // With AVX2 we have direct support for this permutation.
     if (Subtarget->hasAVX2())
       return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1,
-                         getV4X86ShuffleImm8ForMask(Mask, DAG));
+                         getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
 
     // Otherwise, fall back.
     return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask,
@@ -10270,19 +9491,14 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
 
   // X86 has dedicated unpack instructions that can handle specific blend
   // operations: UNPCKH and UNPCKL.
-  if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
+  if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 2, 6}))
     return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
-  if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
+  if (isShuffleEquivalent(V1, V2, Mask, {1, 5, 3, 7}))
     return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
-
-  // If we have a single input to the zero element, insert that into V1 if we
-  // can do so cheaply.
-  int NumV2Elements =
-      std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
-  if (NumV2Elements == 1 && Mask[0] >= 4)
-    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-            MVT::v4f64, DL, V1, V2, Mask, Subtarget, DAG))
-      return Insertion;
+  if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 6, 2}))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V2, V1);
+  if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3}))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
 
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
                                                 Subtarget, DAG))
@@ -10296,7 +9512,7 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 5) << 1) |
                           ((Mask[2] == 3) << 2) | ((Mask[3] == 7) << 3);
     return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V1, V2,
-                       DAG.getConstant(SHUFPDMask, MVT::i8));
+                       DAG.getConstant(SHUFPDMask, DL, MVT::i8));
   }
   if ((Mask[0] == -1 || (Mask[0] >= 4 && Mask[0] < 6)) &&
       (Mask[1] == -1 || Mask[1] < 2) &&
@@ -10305,7 +9521,7 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     unsigned SHUFPDMask = (Mask[0] == 5) | ((Mask[1] == 1) << 1) |
                           ((Mask[2] == 7) << 2) | ((Mask[3] == 3) << 3);
     return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V2, V1,
-                       DAG.getConstant(SHUFPDMask, MVT::i8));
+                       DAG.getConstant(SHUFPDMask, DL, MVT::i8));
   }
 
   // Try to simplify this by merging 128-bit lanes to enable a lane-based
@@ -10353,7 +9569,7 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     return Blend;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4i64, DL, V1,
+  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4i64, V1,
                                                         Mask, Subtarget, DAG))
     return Broadcast;
 
@@ -10372,21 +9588,30 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
           ISD::BITCAST, DL, MVT::v4i64,
           DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32,
                       DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, V1),
-                      getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
+                      getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
     }
-
-    // Use dedicated unpack instructions for masks that match their pattern.
-    if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
-      return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2);
-    if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
-      return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2);
   }
 
   // AVX2 provides a direct instruction for permuting a single input across
   // lanes.
   if (isSingleInputShuffleMask(Mask))
     return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1,
-                       getV4X86ShuffleImm8ForMask(Mask, DAG));
+                       getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
+
+  // Try to use shift instructions.
+  if (SDValue Shift =
+          lowerVectorShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask, DAG))
+    return Shift;
+
+  // Use dedicated unpack instructions for masks that match their pattern.
+  if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 2, 6}))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2);
+  if (isShuffleEquivalent(V1, V2, Mask, {1, 5, 3, 7}))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2);
+  if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 6, 2}))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V2, V1);
+  if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3}))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V2, V1);
 
   // Try to simplify this by merging 128-bit lanes to enable a lane-based
   // shuffle. However, if we have AVX2 and either inputs are already in place,
@@ -10422,7 +9647,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     return Blend;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v8f32, DL, V1,
+  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8f32, V1,
                                                         Mask, Subtarget, DAG))
     return Broadcast;
 
@@ -10432,15 +9657,26 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask, RepeatedMask)) {
     assert(RepeatedMask.size() == 4 &&
            "Repeated masks must be half the mask width!");
+
+    // Use even/odd duplicate instructions for masks that match their pattern.
+    if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6}))
+      return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1);
+    if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3, 5, 5, 7, 7}))
+      return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1);
+
     if (isSingleInputShuffleMask(Mask))
       return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1,
-                         getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
+                         getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
 
     // Use dedicated unpack instructions for masks that match their pattern.
-    if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
+    if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 4, 12, 5, 13}))
       return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V1, V2);
-    if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
+    if (isShuffleEquivalent(V1, V2, Mask, {2, 10, 3, 11, 6, 14, 7, 15}))
       return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V1, V2);
+    if (isShuffleEquivalent(V1, V2, Mask, {8, 0, 9, 1, 12, 4, 13, 5}))
+      return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V2, V1);
+    if (isShuffleEquivalent(V1, V2, Mask, {10, 2, 11, 3, 14, 6, 15, 7}))
+      return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V2, V1);
 
     // Otherwise, fall back to a SHUFPS sequence. Here it is important that we
     // have already handled any direct blends. We also need to squash the
@@ -10457,7 +9693,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     SDValue VPermMask[8];
     for (int i = 0; i < 8; ++i)
       VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32)
-                                 : DAG.getConstant(Mask[i], MVT::i32);
+                                 : DAG.getConstant(Mask[i], DL, MVT::i32);
     if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask))
       return DAG.getNode(
           X86ISD::VPERMILPV, DL, MVT::v8f32, V1,
@@ -10506,12 +9742,19 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
   assert(Subtarget->hasAVX2() && "We can only lower v8i32 with AVX2!");
 
+  // Whenever we can lower this as a zext, that instruction is strictly faster
+  // than any alternative. It also allows us to fold memory operands into the
+  // shuffle in many cases.
+  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2,
+                                                         Mask, Subtarget, DAG))
+    return ZExt;
+
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
                                                 Subtarget, DAG))
     return Blend;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v8i32, DL, V1,
+  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8i32, V1,
                                                         Mask, Subtarget, DAG))
     return Broadcast;
 
@@ -10523,22 +9766,35 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
     if (isSingleInputShuffleMask(Mask))
       return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1,
-                         getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
+                         getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
 
     // Use dedicated unpack instructions for masks that match their pattern.
-    if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
+    if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 4, 12, 5, 13}))
       return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2);
-    if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
+    if (isShuffleEquivalent(V1, V2, Mask, {2, 10, 3, 11, 6, 14, 7, 15}))
       return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2);
+    if (isShuffleEquivalent(V1, V2, Mask, {8, 0, 9, 1, 12, 4, 13, 5}))
+      return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V2, V1);
+    if (isShuffleEquivalent(V1, V2, Mask, {10, 2, 11, 3, 14, 6, 15, 7}))
+      return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V2, V1);
   }
 
+  // Try to use shift instructions.
+  if (SDValue Shift =
+          lowerVectorShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask, DAG))
+    return Shift;
+
+  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+          DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
+    return Rotate;
+
   // If the shuffle patterns aren't repeated but it is a single input, directly
   // generate a cross-lane VPERMD instruction.
   if (isSingleInputShuffleMask(Mask)) {
     SDValue VPermMask[8];
     for (int i = 0; i < 8; ++i)
       VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32)
-                                 : DAG.getConstant(Mask[i], MVT::i32);
+                                 : DAG.getConstant(Mask[i], DL, MVT::i32);
     return DAG.getNode(
         X86ISD::VPERMV, DL, MVT::v8i32,
         DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
@@ -10570,8 +9826,15 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
   assert(Subtarget->hasAVX2() && "We can only lower v16i16 with AVX2!");
 
+  // Whenever we can lower this as a zext, that instruction is strictly faster
+  // than any alternative. It also allows us to fold memory operands into the
+  // shuffle in many cases.
+  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v16i16, V1, V2,
+                                                         Mask, Subtarget, DAG))
+    return ZExt;
+
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v16i16, DL, V1,
+  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v16i16, V1,
                                                         Mask, Subtarget, DAG))
     return Broadcast;
 
@@ -10580,19 +9843,29 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(Mask,
-                          // First 128-bit lane:
-                          0, 16, 1, 17, 2, 18, 3, 19,
-                          // Second 128-bit lane:
-                          8, 24, 9, 25, 10, 26, 11, 27))
+  if (isShuffleEquivalent(V1, V2, Mask,
+                          {// First 128-bit lane:
+                           0, 16, 1, 17, 2, 18, 3, 19,
+                           // Second 128-bit lane:
+                           8, 24, 9, 25, 10, 26, 11, 27}))
     return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i16, V1, V2);
-  if (isShuffleEquivalent(Mask,
-                          // First 128-bit lane:
-                          4, 20, 5, 21, 6, 22, 7, 23,
-                          // Second 128-bit lane:
-                          12, 28, 13, 29, 14, 30, 15, 31))
+  if (isShuffleEquivalent(V1, V2, Mask,
+                          {// First 128-bit lane:
+                           4, 20, 5, 21, 6, 22, 7, 23,
+                           // Second 128-bit lane:
+                           12, 28, 13, 29, 14, 30, 15, 31}))
     return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i16, V1, V2);
 
+  // Try to use shift instructions.
+  if (SDValue Shift =
+          lowerVectorShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask, DAG))
+    return Shift;
+
+  // Try to use byte rotation instructions.
+  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+          DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
+    return Rotate;
+
   if (isSingleInputShuffleMask(Mask)) {
     // There are no generalized cross-lane shuffle operations available on i16
     // element types.
@@ -10600,6 +9873,15 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
       return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
                                                      Mask, DAG);
 
+    SmallVector<int, 8> RepeatedMask;
+    if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
+      // As this is a single-input shuffle, the repeated mask should be
+      // a strictly valid v8i16 mask that we can pass through to the v8i16
+      // lowering to handle even the v16 case.
+      return lowerV8I16GeneralSingleInputVectorShuffle(
+          DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
+    }
+
     SDValue PSHUFBMask[32];
     for (int i = 0; i < 16; ++i) {
       if (Mask[i] == -1) {
@@ -10609,8 +9891,8 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
 
       int M = i < 8 ? Mask[i] : Mask[i] - 8;
       assert(M >= 0 && M < 8 && "Invalid single-input mask!");
-      PSHUFBMask[2 * i] = DAG.getConstant(2 * M, MVT::i8);
-      PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, MVT::i8);
+      PSHUFBMask[2 * i] = DAG.getConstant(2 * M, DL, MVT::i8);
+      PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, DL, MVT::i8);
     }
     return DAG.getNode(
         ISD::BITCAST, DL, MVT::v16i16,
@@ -10645,8 +9927,15 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
   assert(Subtarget->hasAVX2() && "We can only lower v32i8 with AVX2!");
 
+  // Whenever we can lower this as a zext, that instruction is strictly faster
+  // than any alternative. It also allows us to fold memory operands into the
+  // shuffle in many cases.
+  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v32i8, V1, V2,
+                                                         Mask, Subtarget, DAG))
+    return ZExt;
+
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v32i8, DL, V1,
+  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v32i8, V1,
                                                         Mask, Subtarget, DAG))
     return Broadcast;
 
@@ -10658,20 +9947,30 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   // Note that these are repeated 128-bit lane unpacks, not unpacks across all
   // 256-bit lanes.
   if (isShuffleEquivalent(
-          Mask,
-          // First 128-bit lane:
-          0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39,
-          // Second 128-bit lane:
-          16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55))
+          V1, V2, Mask,
+          {// First 128-bit lane:
+           0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39,
+           // Second 128-bit lane:
+           16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55}))
     return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v32i8, V1, V2);
   if (isShuffleEquivalent(
-          Mask,
-          // First 128-bit lane:
-          8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47,
-          // Second 128-bit lane:
-          24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63))
+          V1, V2, Mask,
+          {// First 128-bit lane:
+           8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47,
+           // Second 128-bit lane:
+           24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63}))
     return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v32i8, V1, V2);
 
+  // Try to use shift instructions.
+  if (SDValue Shift =
+          lowerVectorShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask, DAG))
+    return Shift;
+
+  // Try to use byte rotation instructions.
+  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+          DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
+    return Rotate;
+
   if (isSingleInputShuffleMask(Mask)) {
     // There are no generalized cross-lane shuffle operations available on i8
     // element types.
@@ -10684,7 +9983,8 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
       PSHUFBMask[i] =
           Mask[i] < 0
               ? DAG.getUNDEF(MVT::i8)
-              : DAG.getConstant(Mask[i] < 16 ? Mask[i] : Mask[i] - 16, MVT::i8);
+              : DAG.getConstant(Mask[i] < 16 ? Mask[i] : Mask[i] - 16, DL,
+                                MVT::i8);
 
     return DAG.getNode(
         X86ISD::PSHUFB, DL, MVT::v32i8, V1,
@@ -10713,6 +10013,18 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
 
+  // If we have a single input to the zero element, insert that into V1 if we
+  // can do so cheaply.
+  int NumElts = VT.getVectorNumElements();
+  int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [NumElts](int M) {
+    return M >= NumElts;
+  });
+
+  if (NumV2Elements == 1 && Mask[0] >= NumElts)
+    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+                              DL, VT, V1, V2, Mask, Subtarget, DAG))
+      return Insertion;
+
   // There is a really nice hard cut-over between AVX1 and AVX2 that means we can
   // check for those subtargets here and avoid much of the subtarget querying in
   // the per-vector-type lowering routines. With AVX1 we have essentially *zero*
@@ -10765,9 +10077,9 @@ static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
 
   // X86 has dedicated unpack instructions that can handle specific blend
   // operations: UNPCKH and UNPCKL.
-  if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14))
+  if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
     return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
-  if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15))
+  if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
     return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
 
   // FIXME: Implement direct support for this type!
@@ -10786,13 +10098,17 @@ static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(Mask,
-                          0, 16, 1, 17, 4, 20, 5, 21,
-                          8, 24, 9, 25, 12, 28, 13, 29))
+  if (isShuffleEquivalent(V1, V2, Mask,
+                          {// First 128-bit lane.
+                           0, 16, 1, 17, 4, 20, 5, 21,
+                           // Second 128-bit lane.
+                           8, 24, 9, 25, 12, 28, 13, 29}))
     return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
-  if (isShuffleEquivalent(Mask,
-                          2, 18, 3, 19, 6, 22, 7, 23,
-                          10, 26, 11, 27, 14, 30, 15, 31))
+  if (isShuffleEquivalent(V1, V2, Mask,
+                          {// First 128-bit lane.
+                           2, 18, 3, 19, 6, 22, 7, 23,
+                           // Second 128-bit lane.
+                           10, 26, 11, 27, 14, 30, 15, 31}))
     return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
 
   // FIXME: Implement direct support for this type!
@@ -10812,9 +10128,9 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
 
   // X86 has dedicated unpack instructions that can handle specific blend
   // operations: UNPCKH and UNPCKL.
-  if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14))
+  if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
     return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
-  if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15))
+  if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
     return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
 
   // FIXME: Implement direct support for this type!
@@ -10833,13 +10149,17 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(Mask,
-                          0, 16, 1, 17, 4, 20, 5, 21,
-                          8, 24, 9, 25, 12, 28, 13, 29))
+  if (isShuffleEquivalent(V1, V2, Mask,
+                          {// First 128-bit lane.
+                           0, 16, 1, 17, 4, 20, 5, 21,
+                           // Second 128-bit lane.
+                           8, 24, 9, 25, 12, 28, 13, 29}))
     return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
-  if (isShuffleEquivalent(Mask,
-                          2, 18, 3, 19, 6, 22, 7, 23,
-                          10, 26, 11, 27, 14, 30, 15, 31))
+  if (isShuffleEquivalent(V1, V2, Mask,
+                          {// First 128-bit lane.
+                           2, 18, 3, 19, 6, 22, 7, 23,
+                           // Second 128-bit lane.
+                           10, 26, 11, 27, 14, 30, 15, 31}))
     return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
 
   // FIXME: Implement direct support for this type!
@@ -10893,8 +10213,8 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
          "Cannot lower 512-bit vectors w/ basic ISA!");
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(VT.SimpleTy, DL, V1,
-                                                        Mask, Subtarget, DAG))
+  if (SDValue Broadcast =
+          lowerVectorShuffleAsBroadcast(DL, VT, V1, Mask, Subtarget, DAG))
     return Broadcast;
 
   // Dispatch to each element type for lowering. If we don't have supprot for
@@ -10970,6 +10290,13 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
         return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask);
       }
 
+  // We actually see shuffles that are entirely re-arrangements of a set of
+  // zero inputs. This mostly happens while decomposing complex shuffles into
+  // simple ones. Directly lower these as a buildvector of zeros.
+  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+  if (Zeroable.all())
+    return getZeroVector(VT, Subtarget, DAG, dl);
+
   // Try to collapse shuffles into using a vector type with fewer elements but
   // wider element types. We cap this to not form integers or floating point
   // elements wider than 64 bits, but it might be interesting to form i128
@@ -11057,1586 +10384,6 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
   llvm_unreachable("Unimplemented!");
 }
 
-
-//===----------------------------------------------------------------------===//
-// Legacy vector shuffle lowering
-//
-// This code is the legacy code handling vector shuffles until the above
-// replaces its functionality and performance.
-//===----------------------------------------------------------------------===//
-
-static bool isBlendMask(ArrayRef<int> MaskVals, MVT VT, bool hasSSE41,
-                        bool hasInt256, unsigned *MaskOut = nullptr) {
-  MVT EltVT = VT.getVectorElementType();
-
-  // There is no blend with immediate in AVX-512.
-  if (VT.is512BitVector())
-    return false;
-
-  if (!hasSSE41 || EltVT == MVT::i8)
-    return false;
-  if (!hasInt256 && VT == MVT::v16i16)
-    return false;
-
-  unsigned MaskValue = 0;
-  unsigned NumElems = VT.getVectorNumElements();
-  // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
-  unsigned NumLanes = (NumElems - 1) / 8 + 1;
-  unsigned NumElemsInLane = NumElems / NumLanes;
-
-  // Blend for v16i16 should be symetric for the both lanes.
-  for (unsigned i = 0; i < NumElemsInLane; ++i) {
-
-    int SndLaneEltIdx = (NumLanes == 2) ? MaskVals[i + NumElemsInLane] : -1;
-    int EltIdx = MaskVals[i];
-
-    if ((EltIdx < 0 || EltIdx == (int)i) &&
-        (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
-      continue;
-
-    if (((unsigned)EltIdx == (i + NumElems)) &&
-        (SndLaneEltIdx < 0 ||
-         (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
-      MaskValue |= (1 << i);
-    else
-      return false;
-  }
-
-  if (MaskOut)
-    *MaskOut = MaskValue;
-  return true;
-}
-
-// Try to lower a shuffle node into a simple blend instruction.
-// This function assumes isBlendMask returns true for this
-// SuffleVectorSDNode
-static SDValue LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
-                                          unsigned MaskValue,
-                                          const X86Subtarget *Subtarget,
-                                          SelectionDAG &DAG) {
-  MVT VT = SVOp->getSimpleValueType(0);
-  MVT EltVT = VT.getVectorElementType();
-  assert(isBlendMask(SVOp->getMask(), VT, Subtarget->hasSSE41(),
-                     Subtarget->hasInt256() && "Trying to lower a "
-                                               "VECTOR_SHUFFLE to a Blend but "
-                                               "with the wrong mask"));
-  SDValue V1 = SVOp->getOperand(0);
-  SDValue V2 = SVOp->getOperand(1);
-  SDLoc dl(SVOp);
-  unsigned NumElems = VT.getVectorNumElements();
-
-  // Convert i32 vectors to floating point if it is not AVX2.
-  // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
-  MVT BlendVT = VT;
-  if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
-    BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
-                               NumElems);
-    V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
-    V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
-  }
-
-  SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
-                            DAG.getConstant(MaskValue, MVT::i32));
-  return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
-}
-
-/// In vector type \p VT, return true if the element at index \p InputIdx
-/// falls on a different 128-bit lane than \p OutputIdx.
-static bool ShuffleCrosses128bitLane(MVT VT, unsigned InputIdx,
-                                     unsigned OutputIdx) {
-  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
-  return InputIdx * EltSize / 128 != OutputIdx * EltSize / 128;
-}
-
-/// Generate a PSHUFB if possible.  Selects elements from \p V1 according to
-/// \p MaskVals.  MaskVals[OutputIdx] = InputIdx specifies that we want to
-/// shuffle the element at InputIdx in V1 to OutputIdx in the result.  If \p
-/// MaskVals refers to elements outside of \p V1 or is undef (-1), insert a
-/// zero.
-static SDValue getPSHUFB(ArrayRef<int> MaskVals, SDValue V1, SDLoc &dl,
-                         SelectionDAG &DAG) {
-  MVT VT = V1.getSimpleValueType();
-  assert(VT.is128BitVector() || VT.is256BitVector());
-
-  MVT EltVT = VT.getVectorElementType();
-  unsigned EltSizeInBytes = EltVT.getSizeInBits() / 8;
-  unsigned NumElts = VT.getVectorNumElements();
-
-  SmallVector<SDValue, 32> PshufbMask;
-  for (unsigned OutputIdx = 0; OutputIdx < NumElts; ++OutputIdx) {
-    int InputIdx = MaskVals[OutputIdx];
-    unsigned InputByteIdx;
-
-    if (InputIdx < 0 || NumElts <= (unsigned)InputIdx)
-      InputByteIdx = 0x80;
-    else {
-      // Cross lane is not allowed.
-      if (ShuffleCrosses128bitLane(VT, InputIdx, OutputIdx))
-        return SDValue();
-      InputByteIdx = InputIdx * EltSizeInBytes;
-      // Index is an byte offset within the 128-bit lane.
-      InputByteIdx &= 0xf;
-    }
-
-    for (unsigned j = 0; j < EltSizeInBytes; ++j) {
-      PshufbMask.push_back(DAG.getConstant(InputByteIdx, MVT::i8));
-      if (InputByteIdx != 0x80)
-        ++InputByteIdx;
-    }
-  }
-
-  MVT ShufVT = MVT::getVectorVT(MVT::i8, PshufbMask.size());
-  if (ShufVT != VT)
-    V1 = DAG.getNode(ISD::BITCAST, dl, ShufVT, V1);
-  return DAG.getNode(X86ISD::PSHUFB, dl, ShufVT, V1,
-                     DAG.getNode(ISD::BUILD_VECTOR, dl, ShufVT, PshufbMask));
-}
-
-// v8i16 shuffles - Prefer shuffles in the following order:
-// 1. [all]   pshuflw, pshufhw, optional move
-// 2. [ssse3] 1 x pshufb
-// 3. [ssse3] 2 x pshufb + 1 x por
-// 4. [all]   mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
-static SDValue
-LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
-                         SelectionDAG &DAG) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
-  SDValue V1 = SVOp->getOperand(0);
-  SDValue V2 = SVOp->getOperand(1);
-  SDLoc dl(SVOp);
-  SmallVector<int, 8> MaskVals;
-
-  // Determine if more than 1 of the words in each of the low and high quadwords
-  // of the result come from the same quadword of one of the two inputs.  Undef
-  // mask values count as coming from any quadword, for better codegen.
-  //
-  // Lo/HiQuad[i] = j indicates how many words from the ith quad of the input
-  // feeds this quad.  For i, 0 and 1 refer to V1, 2 and 3 refer to V2.
-  unsigned LoQuad[] = { 0, 0, 0, 0 };
-  unsigned HiQuad[] = { 0, 0, 0, 0 };
-  // Indices of quads used.
-  std::bitset<4> InputQuads;
-  for (unsigned i = 0; i < 8; ++i) {
-    unsigned *Quad = i < 4 ? LoQuad : HiQuad;
-    int EltIdx = SVOp->getMaskElt(i);
-    MaskVals.push_back(EltIdx);
-    if (EltIdx < 0) {
-      ++Quad[0];
-      ++Quad[1];
-      ++Quad[2];
-      ++Quad[3];
-      continue;
-    }
-    ++Quad[EltIdx / 4];
-    InputQuads.set(EltIdx / 4);
-  }
-
-  int BestLoQuad = -1;
-  unsigned MaxQuad = 1;
-  for (unsigned i = 0; i < 4; ++i) {
-    if (LoQuad[i] > MaxQuad) {
-      BestLoQuad = i;
-      MaxQuad = LoQuad[i];
-    }
-  }
-
-  int BestHiQuad = -1;
-  MaxQuad = 1;
-  for (unsigned i = 0; i < 4; ++i) {
-    if (HiQuad[i] > MaxQuad) {
-      BestHiQuad = i;
-      MaxQuad = HiQuad[i];
-    }
-  }
-
-  // For SSSE3, If all 8 words of the result come from only 1 quadword of each
-  // of the two input vectors, shuffle them into one input vector so only a
-  // single pshufb instruction is necessary. If there are more than 2 input
-  // quads, disable the next transformation since it does not help SSSE3.
-  bool V1Used = InputQuads[0] || InputQuads[1];
-  bool V2Used = InputQuads[2] || InputQuads[3];
-  if (Subtarget->hasSSSE3()) {
-    if (InputQuads.count() == 2 && V1Used && V2Used) {
-      BestLoQuad = InputQuads[0] ? 0 : 1;
-      BestHiQuad = InputQuads[2] ? 2 : 3;
-    }
-    if (InputQuads.count() > 2) {
-      BestLoQuad = -1;
-      BestHiQuad = -1;
-    }
-  }
-
-  // If BestLoQuad or BestHiQuad are set, shuffle the quads together and update
-  // the shuffle mask.  If a quad is scored as -1, that means that it contains
-  // words from all 4 input quadwords.
-  SDValue NewV;
-  if (BestLoQuad >= 0 || BestHiQuad >= 0) {
-    int MaskV[] = {
-      BestLoQuad < 0 ? 0 : BestLoQuad,
-      BestHiQuad < 0 ? 1 : BestHiQuad
-    };
-    NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
-                  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
-                  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
-    NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV);
-
-    // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
-    // source words for the shuffle, to aid later transformations.
-    bool AllWordsInNewV = true;
-    bool InOrder[2] = { true, true };
-    for (unsigned i = 0; i != 8; ++i) {
-      int idx = MaskVals[i];
-      if (idx != (int)i)
-        InOrder[i/4] = false;
-      if (idx < 0 || (idx/4) == BestLoQuad || (idx/4) == BestHiQuad)
-        continue;
-      AllWordsInNewV = false;
-      break;
-    }
-
-    bool pshuflw = AllWordsInNewV, pshufhw = AllWordsInNewV;
-    if (AllWordsInNewV) {
-      for (int i = 0; i != 8; ++i) {
-        int idx = MaskVals[i];
-        if (idx < 0)
-          continue;
-        idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
-        if ((idx != i) && idx < 4)
-          pshufhw = false;
-        if ((idx != i) && idx > 3)
-          pshuflw = false;
-      }
-      V1 = NewV;
-      V2Used = false;
-      BestLoQuad = 0;
-      BestHiQuad = 1;
-    }
-
-    // If we've eliminated the use of V2, and the new mask is a pshuflw or
-    // pshufhw, that's as cheap as it gets.  Return the new shuffle.
-    if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
-      unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW;
-      unsigned TargetMask = 0;
-      NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
-                                  DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
-      ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
-      TargetMask = pshufhw ? getShufflePSHUFHWImmediate(SVOp):
-                             getShufflePSHUFLWImmediate(SVOp);
-      V1 = NewV.getOperand(0);
-      return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
-    }
-  }
-
-  // Promote splats to a larger type which usually leads to more efficient code.
-  // FIXME: Is this true if pshufb is available?
-  if (SVOp->isSplat())
-    return PromoteSplat(SVOp, DAG);
-
-  // If we have SSSE3, and all words of the result are from 1 input vector,
-  // case 2 is generated, otherwise case 3 is generated.  If no SSSE3
-  // is present, fall back to case 4.
-  if (Subtarget->hasSSSE3()) {
-    SmallVector<SDValue,16> pshufbMask;
-
-    // If we have elements from both input vectors, set the high bit of the
-    // shuffle mask element to zero out elements that come from V2 in the V1
-    // mask, and elements that come from V1 in the V2 mask, so that the two
-    // results can be OR'd together.
-    bool TwoInputs = V1Used && V2Used;
-    V1 = getPSHUFB(MaskVals, V1, dl, DAG);
-    if (!TwoInputs)
-      return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
-
-    // Calculate the shuffle mask for the second input, shuffle it, and
-    // OR it with the first shuffled input.
-    CommuteVectorShuffleMask(MaskVals, 8);
-    V2 = getPSHUFB(MaskVals, V2, dl, DAG);
-    V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
-    return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
-  }
-
-  // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
-  // and update MaskVals with new element order.
-  std::bitset<8> InOrder;
-  if (BestLoQuad >= 0) {
-    int MaskV[] = { -1, -1, -1, -1, 4, 5, 6, 7 };
-    for (int i = 0; i != 4; ++i) {
-      int idx = MaskVals[i];
-      if (idx < 0) {
-        InOrder.set(i);
-      } else if ((idx / 4) == BestLoQuad) {
-        MaskV[i] = idx & 3;
-        InOrder.set(i);
-      }
-    }
-    NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
-                                &MaskV[0]);
-
-    if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSE2()) {
-      ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
-      NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
-                                  NewV.getOperand(0),
-                                  getShufflePSHUFLWImmediate(SVOp), DAG);
-    }
-  }
-
-  // If BestHi >= 0, generate a pshufhw to put the high elements in order,
-  // and update MaskVals with the new element order.
-  if (BestHiQuad >= 0) {
-    int MaskV[] = { 0, 1, 2, 3, -1, -1, -1, -1 };
-    for (unsigned i = 4; i != 8; ++i) {
-      int idx = MaskVals[i];
-      if (idx < 0) {
-        InOrder.set(i);
-      } else if ((idx / 4) == BestHiQuad) {
-        MaskV[i] = (idx & 3) + 4;
-        InOrder.set(i);
-      }
-    }
-    NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
-                                &MaskV[0]);
-
-    if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSE2()) {
-      ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
-      NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
-                                  NewV.getOperand(0),
-                                  getShufflePSHUFHWImmediate(SVOp), DAG);
-    }
-  }
-
-  // In case BestHi & BestLo were both -1, which means each quadword has a word
-  // from each of the four input quadwords, calculate the InOrder bitvector now
-  // before falling through to the insert/extract cleanup.
-  if (BestLoQuad == -1 && BestHiQuad == -1) {
-    NewV = V1;
-    for (int i = 0; i != 8; ++i)
-      if (MaskVals[i] < 0 || MaskVals[i] == i)
-        InOrder.set(i);
-  }
-
-  // The other elements are put in the right place using pextrw and pinsrw.
-  for (unsigned i = 0; i != 8; ++i) {
-    if (InOrder[i])
-      continue;
-    int EltIdx = MaskVals[i];
-    if (EltIdx < 0)
-      continue;
-    SDValue ExtOp = (EltIdx < 8) ?
-      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1,
-                  DAG.getIntPtrConstant(EltIdx)) :
-      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2,
-                  DAG.getIntPtrConstant(EltIdx - 8));
-    NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp,
-                       DAG.getIntPtrConstant(i));
-  }
-  return NewV;
-}
-
-/// \brief v16i16 shuffles
-///
-/// FIXME: We only support generation of a single pshufb currently.  We can
-/// generalize the other applicable cases from LowerVECTOR_SHUFFLEv8i16 as
-/// well (e.g 2 x pshufb + 1 x por).
-static SDValue
-LowerVECTOR_SHUFFLEv16i16(SDValue Op, SelectionDAG &DAG) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
-  SDValue V1 = SVOp->getOperand(0);
-  SDValue V2 = SVOp->getOperand(1);
-  SDLoc dl(SVOp);
-
-  if (V2.getOpcode() != ISD::UNDEF)
-    return SDValue();
-
-  SmallVector<int, 16> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end());
-  return getPSHUFB(MaskVals, V1, dl, DAG);
-}
-
-// v16i8 shuffles - Prefer shuffles in the following order:
-// 1. [ssse3] 1 x pshufb
-// 2. [ssse3] 2 x pshufb + 1 x por
-// 3. [all]   v8i16 shuffle + N x pextrw + rotate + pinsrw
-static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
-                                        const X86Subtarget* Subtarget,
-                                        SelectionDAG &DAG) {
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  SDValue V1 = SVOp->getOperand(0);
-  SDValue V2 = SVOp->getOperand(1);
-  SDLoc dl(SVOp);
-  ArrayRef<int> MaskVals = SVOp->getMask();
-
-  // Promote splats to a larger type which usually leads to more efficient code.
-  // FIXME: Is this true if pshufb is available?
-  if (SVOp->isSplat())
-    return PromoteSplat(SVOp, DAG);
-
-  // If we have SSSE3, case 1 is generated when all result bytes come from
-  // one of  the inputs.  Otherwise, case 2 is generated.  If no SSSE3 is
-  // present, fall back to case 3.
-
-  // If SSSE3, use 1 pshufb instruction per vector with elements in the result.
-  if (Subtarget->hasSSSE3()) {
-    SmallVector<SDValue,16> pshufbMask;
-
-    // If all result elements are from one input vector, then only translate
-    // undef mask values to 0x80 (zero out result) in the pshufb mask.
-    //
-    // Otherwise, we have elements from both input vectors, and must zero out
-    // elements that come from V2 in the first mask, and V1 in the second mask
-    // so that we can OR them together.
-    for (unsigned i = 0; i != 16; ++i) {
-      int EltIdx = MaskVals[i];
-      if (EltIdx < 0 || EltIdx >= 16)
-        EltIdx = 0x80;
-      pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
-    }
-    V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
-                     DAG.getNode(ISD::BUILD_VECTOR, dl,
-                                 MVT::v16i8, pshufbMask));
-
-    // As PSHUFB will zero elements with negative indices, it's safe to ignore
-    // the 2nd operand if it's undefined or zero.
-    if (V2.getOpcode() == ISD::UNDEF ||
-        ISD::isBuildVectorAllZeros(V2.getNode()))
-      return V1;
-
-    // Calculate the shuffle mask for the second input, shuffle it, and
-    // OR it with the first shuffled input.
-    pshufbMask.clear();
-    for (unsigned i = 0; i != 16; ++i) {
-      int EltIdx = MaskVals[i];
-      EltIdx = (EltIdx < 16) ? 0x80 : EltIdx - 16;
-      pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
-    }
-    V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
-                     DAG.getNode(ISD::BUILD_VECTOR, dl,
-                                 MVT::v16i8, pshufbMask));
-    return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
-  }
-
-  // No SSSE3 - Calculate in place words and then fix all out of place words
-  // With 0-16 extracts & inserts.  Worst case is 16 bytes out of order from
-  // the 16 different words that comprise the two doublequadword input vectors.
-  V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
-  V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
-  SDValue NewV = V1;
-  for (int i = 0; i != 8; ++i) {
-    int Elt0 = MaskVals[i*2];
-    int Elt1 = MaskVals[i*2+1];
-
-    // This word of the result is all undef, skip it.
-    if (Elt0 < 0 && Elt1 < 0)
-      continue;
-
-    // This word of the result is already in the correct place, skip it.
-    if ((Elt0 == i*2) && (Elt1 == i*2+1))
-      continue;
-
-    SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
-    SDValue Elt1Src = Elt1 < 16 ? V1 : V2;
-    SDValue InsElt;
-
-    // If Elt0 and Elt1 are defined, are consecutive, and can be load
-    // using a single extract together, load it and store it.
-    if ((Elt0 >= 0) && ((Elt0 + 1) == Elt1) && ((Elt0 & 1) == 0)) {
-      InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
-                           DAG.getIntPtrConstant(Elt1 / 2));
-      NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
-                        DAG.getIntPtrConstant(i));
-      continue;
-    }
-
-    // If Elt1 is defined, extract it from the appropriate source.  If the
-    // source byte is not also odd, shift the extracted word left 8 bits
-    // otherwise clear the bottom 8 bits if we need to do an or.
-    if (Elt1 >= 0) {
-      InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
-                           DAG.getIntPtrConstant(Elt1 / 2));
-      if ((Elt1 & 1) == 0)
-        InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
-                             DAG.getConstant(8,
-                                  TLI.getShiftAmountTy(InsElt.getValueType())));
-      else if (Elt0 >= 0)
-        InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
-                             DAG.getConstant(0xFF00, MVT::i16));
-    }
-    // If Elt0 is defined, extract it from the appropriate source.  If the
-    // source byte is not also even, shift the extracted word right 8 bits. If
-    // Elt1 was also defined, OR the extracted values together before
-    // inserting them in the result.
-    if (Elt0 >= 0) {
-      SDValue InsElt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
-                                    Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
-      if ((Elt0 & 1) != 0)
-        InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
-                              DAG.getConstant(8,
-                                 TLI.getShiftAmountTy(InsElt0.getValueType())));
-      else if (Elt1 >= 0)
-        InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
-                             DAG.getConstant(0x00FF, MVT::i16));
-      InsElt = Elt1 >= 0 ? DAG.getNode(ISD::OR, dl, MVT::i16, InsElt, InsElt0)
-                         : InsElt0;
-    }
-    NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
-                       DAG.getIntPtrConstant(i));
-  }
-  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
-}
-
-// v32i8 shuffles - Translate to VPSHUFB if possible.
-static
-SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
-                                 const X86Subtarget *Subtarget,
-                                 SelectionDAG &DAG) {
-  MVT VT = SVOp->getSimpleValueType(0);
-  SDValue V1 = SVOp->getOperand(0);
-  SDValue V2 = SVOp->getOperand(1);
-  SDLoc dl(SVOp);
-  SmallVector<int, 32> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end());
-
-  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
-  bool V1IsAllZero = ISD::isBuildVectorAllZeros(V1.getNode());
-  bool V2IsAllZero = ISD::isBuildVectorAllZeros(V2.getNode());
-
-  // VPSHUFB may be generated if
-  // (1) one of input vector is undefined or zeroinitializer.
-  // The mask value 0x80 puts 0 in the corresponding slot of the vector.
-  // And (2) the mask indexes don't cross the 128-bit lane.
-  if (VT != MVT::v32i8 || !Subtarget->hasInt256() ||
-      (!V2IsUndef && !V2IsAllZero && !V1IsAllZero))
-    return SDValue();
-
-  if (V1IsAllZero && !V2IsAllZero) {
-    CommuteVectorShuffleMask(MaskVals, 32);
-    V1 = V2;
-  }
-  return getPSHUFB(MaskVals, V1, dl, DAG);
-}
-
-/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
-/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
-/// done when every pair / quad of shuffle mask elements point to elements in
-/// the right sequence. e.g.
-/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
-static
-SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
-                                 SelectionDAG &DAG) {
-  MVT VT = SVOp->getSimpleValueType(0);
-  SDLoc dl(SVOp);
-  unsigned NumElems = VT.getVectorNumElements();
-  MVT NewVT;
-  unsigned Scale;
-  switch (VT.SimpleTy) {
-  default: llvm_unreachable("Unexpected!");
-  case MVT::v2i64:
-  case MVT::v2f64:
-           return SDValue(SVOp, 0);
-  case MVT::v4f32:  NewVT = MVT::v2f64; Scale = 2; break;
-  case MVT::v4i32:  NewVT = MVT::v2i64; Scale = 2; break;
-  case MVT::v8i16:  NewVT = MVT::v4i32; Scale = 2; break;
-  case MVT::v16i8:  NewVT = MVT::v4i32; Scale = 4; break;
-  case MVT::v16i16: NewVT = MVT::v8i32; Scale = 2; break;
-  case MVT::v32i8:  NewVT = MVT::v8i32; Scale = 4; break;
-  }
-
-  SmallVector<int, 8> MaskVec;
-  for (unsigned i = 0; i != NumElems; i += Scale) {
-    int StartIdx = -1;
-    for (unsigned j = 0; j != Scale; ++j) {
-      int EltIdx = SVOp->getMaskElt(i+j);
-      if (EltIdx < 0)
-        continue;
-      if (StartIdx < 0)
-        StartIdx = (EltIdx / Scale);
-      if (EltIdx != (int)(StartIdx*Scale + j))
-        return SDValue();
-    }
-    MaskVec.push_back(StartIdx);
-  }
-
-  SDValue V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(0));
-  SDValue V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(1));
-  return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
-}
-
-/// getVZextMovL - Return a zero-extending vector move low node.
-///
-static SDValue getVZextMovL(MVT VT, MVT OpVT,
-                            SDValue SrcOp, SelectionDAG &DAG,
-                            const X86Subtarget *Subtarget, SDLoc dl) {
-  if (VT == MVT::v2f64 || VT == MVT::v4f32) {
-    LoadSDNode *LD = nullptr;
-    if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
-      LD = dyn_cast<LoadSDNode>(SrcOp);
-    if (!LD) {
-      // movssrr and movsdrr do not clear top bits. Try to use movd, movq
-      // instead.
-      MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
-      if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
-          SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
-          SrcOp.getOperand(0).getOpcode() == ISD::BITCAST &&
-          SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
-        // PR2108
-        OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
-        return DAG.getNode(ISD::BITCAST, dl, VT,
-                           DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
-                                       DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
-                                                   OpVT,
-                                                   SrcOp.getOperand(0)
-                                                          .getOperand(0))));
-      }
-    }
-  }
-
-  return DAG.getNode(ISD::BITCAST, dl, VT,
-                     DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
-                                 DAG.getNode(ISD::BITCAST, dl,
-                                             OpVT, SrcOp)));
-}
-
-/// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles
-/// which could not be matched by any known target speficic shuffle
-static SDValue
-LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
-
-  SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG);
-  if (NewOp.getNode())
-    return NewOp;
-
-  MVT VT = SVOp->getSimpleValueType(0);
-
-  unsigned NumElems = VT.getVectorNumElements();
-  unsigned NumLaneElems = NumElems / 2;
-
-  SDLoc dl(SVOp);
-  MVT EltVT = VT.getVectorElementType();
-  MVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
-  SDValue Output[2];
-
-  SmallVector<int, 16> Mask;
-  for (unsigned l = 0; l < 2; ++l) {
-    // Build a shuffle mask for the output, discovering on the fly which
-    // input vectors to use as shuffle operands (recorded in InputUsed).
-    // If building a suitable shuffle vector proves too hard, then bail
-    // out with UseBuildVector set.
-    bool UseBuildVector = false;
-    int InputUsed[2] = { -1, -1 }; // Not yet discovered.
-    unsigned LaneStart = l * NumLaneElems;
-    for (unsigned i = 0; i != NumLaneElems; ++i) {
-      // The mask element.  This indexes into the input.
-      int Idx = SVOp->getMaskElt(i+LaneStart);
-      if (Idx < 0) {
-        // the mask element does not index into any input vector.
-        Mask.push_back(-1);
-        continue;
-      }
-
-      // The input vector this mask element indexes into.
-      int Input = Idx / NumLaneElems;
-
-      // Turn the index into an offset from the start of the input vector.
-      Idx -= Input * NumLaneElems;
-
-      // Find or create a shuffle vector operand to hold this input.
-      unsigned OpNo;
-      for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
-        if (InputUsed[OpNo] == Input)
-          // This input vector is already an operand.
-          break;
-        if (InputUsed[OpNo] < 0) {
-          // Create a new operand for this input vector.
-          InputUsed[OpNo] = Input;
-          break;
-        }
-      }
-
-      if (OpNo >= array_lengthof(InputUsed)) {
-        // More than two input vectors used!  Give up on trying to create a
-        // shuffle vector.  Insert all elements into a BUILD_VECTOR instead.
-        UseBuildVector = true;
-        break;
-      }
-
-      // Add the mask index for the new shuffle vector.
-      Mask.push_back(Idx + OpNo * NumLaneElems);
-    }
-
-    if (UseBuildVector) {
-      SmallVector<SDValue, 16> SVOps;
-      for (unsigned i = 0; i != NumLaneElems; ++i) {
-        // The mask element.  This indexes into the input.
-        int Idx = SVOp->getMaskElt(i+LaneStart);
-        if (Idx < 0) {
-          SVOps.push_back(DAG.getUNDEF(EltVT));
-          continue;
-        }
-
-        // The input vector this mask element indexes into.
-        int Input = Idx / NumElems;
-
-        // Turn the index into an offset from the start of the input vector.
-        Idx -= Input * NumElems;
-
-        // Extract the vector element by hand.
-        SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
-                                    SVOp->getOperand(Input),
-                                    DAG.getIntPtrConstant(Idx)));
-      }
-
-      // Construct the output using a BUILD_VECTOR.
-      Output[l] = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, SVOps);
-    } else if (InputUsed[0] < 0) {
-      // No input vectors were used! The result is undefined.
-      Output[l] = DAG.getUNDEF(NVT);
-    } else {
-      SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2),
-                                        (InputUsed[0] % 2) * NumLaneElems,
-                                        DAG, dl);
-      // If only one input was used, use an undefined vector for the other.
-      SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) :
-        Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2),
-                            (InputUsed[1] % 2) * NumLaneElems, DAG, dl);
-      // At least one input vector was used. Create a new shuffle vector.
-      Output[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]);
-    }
-
-    Mask.clear();
-  }
-
-  // Concatenate the result back
-  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Output[0], Output[1]);
-}
-
-/// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with
-/// 4 elements, and match them with several different shuffle types.
-static SDValue
-LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
-  SDValue V1 = SVOp->getOperand(0);
-  SDValue V2 = SVOp->getOperand(1);
-  SDLoc dl(SVOp);
-  MVT VT = SVOp->getSimpleValueType(0);
-
-  assert(VT.is128BitVector() && "Unsupported vector size");
-
-  std::pair<int, int> Locs[4];
-  int Mask1[] = { -1, -1, -1, -1 };
-  SmallVector<int, 8> PermMask(SVOp->getMask().begin(), SVOp->getMask().end());
-
-  unsigned NumHi = 0;
-  unsigned NumLo = 0;
-  for (unsigned i = 0; i != 4; ++i) {
-    int Idx = PermMask[i];
-    if (Idx < 0) {
-      Locs[i] = std::make_pair(-1, -1);
-    } else {
-      assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!");
-      if (Idx < 4) {
-        Locs[i] = std::make_pair(0, NumLo);
-        Mask1[NumLo] = Idx;
-        NumLo++;
-      } else {
-        Locs[i] = std::make_pair(1, NumHi);
-        if (2+NumHi < 4)
-          Mask1[2+NumHi] = Idx;
-        NumHi++;
-      }
-    }
-  }
-
-  if (NumLo <= 2 && NumHi <= 2) {
-    // If no more than two elements come from either vector. This can be
-    // implemented with two shuffles. First shuffle gather the elements.
-    // The second shuffle, which takes the first shuffle as both of its
-    // vector operands, put the elements into the right order.
-    V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
-
-    int Mask2[] = { -1, -1, -1, -1 };
-
-    for (unsigned i = 0; i != 4; ++i)
-      if (Locs[i].first != -1) {
-        unsigned Idx = (i < 2) ? 0 : 4;
-        Idx += Locs[i].first * 2 + Locs[i].second;
-        Mask2[i] = Idx;
-      }
-
-    return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
-  }
-
-  if (NumLo == 3 || NumHi == 3) {
-    // Otherwise, we must have three elements from one vector, call it X, and
-    // one element from the other, call it Y.  First, use a shufps to build an
-    // intermediate vector with the one element from Y and the element from X
-    // that will be in the same half in the final destination (the indexes don't
-    // matter). Then, use a shufps to build the final vector, taking the half
-    // containing the element from Y from the intermediate, and the other half
-    // from X.
-    if (NumHi == 3) {
-      // Normalize it so the 3 elements come from V1.
-      CommuteVectorShuffleMask(PermMask, 4);
-      std::swap(V1, V2);
-    }
-
-    // Find the element from V2.
-    unsigned HiIndex;
-    for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
-      int Val = PermMask[HiIndex];
-      if (Val < 0)
-        continue;
-      if (Val >= 4)
-        break;
-    }
-
-    Mask1[0] = PermMask[HiIndex];
-    Mask1[1] = -1;
-    Mask1[2] = PermMask[HiIndex^1];
-    Mask1[3] = -1;
-    V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
-
-    if (HiIndex >= 2) {
-      Mask1[0] = PermMask[0];
-      Mask1[1] = PermMask[1];
-      Mask1[2] = HiIndex & 1 ? 6 : 4;
-      Mask1[3] = HiIndex & 1 ? 4 : 6;
-      return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
-    }
-
-    Mask1[0] = HiIndex & 1 ? 2 : 0;
-    Mask1[1] = HiIndex & 1 ? 0 : 2;
-    Mask1[2] = PermMask[2];
-    Mask1[3] = PermMask[3];
-    if (Mask1[2] >= 0)
-      Mask1[2] += 4;
-    if (Mask1[3] >= 0)
-      Mask1[3] += 4;
-    return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
-  }
-
-  // Break it into (shuffle shuffle_hi, shuffle_lo).
-  int LoMask[] = { -1, -1, -1, -1 };
-  int HiMask[] = { -1, -1, -1, -1 };
-
-  int *MaskPtr = LoMask;
-  unsigned MaskIdx = 0;
-  unsigned LoIdx = 0;
-  unsigned HiIdx = 2;
-  for (unsigned i = 0; i != 4; ++i) {
-    if (i == 2) {
-      MaskPtr = HiMask;
-      MaskIdx = 1;
-      LoIdx = 0;
-      HiIdx = 2;
-    }
-    int Idx = PermMask[i];
-    if (Idx < 0) {
-      Locs[i] = std::make_pair(-1, -1);
-    } else if (Idx < 4) {
-      Locs[i] = std::make_pair(MaskIdx, LoIdx);
-      MaskPtr[LoIdx] = Idx;
-      LoIdx++;
-    } else {
-      Locs[i] = std::make_pair(MaskIdx, HiIdx);
-      MaskPtr[HiIdx] = Idx;
-      HiIdx++;
-    }
-  }
-
-  SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
-  SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
-  int MaskOps[] = { -1, -1, -1, -1 };
-  for (unsigned i = 0; i != 4; ++i)
-    if (Locs[i].first != -1)
-      MaskOps[i] = Locs[i].first * 4 + Locs[i].second;
-  return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
-}
-
-static bool MayFoldVectorLoad(SDValue V) {
-  while (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
-    V = V.getOperand(0);
-
-  if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
-    V = V.getOperand(0);
-  if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR &&
-      V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF)
-    // BUILD_VECTOR (load), undef
-    V = V.getOperand(0);
-
-  return MayFoldLoad(V);
-}
-
-static
-SDValue getMOVDDup(SDValue &Op, SDLoc &dl, SDValue V1, SelectionDAG &DAG) {
-  MVT VT = Op.getSimpleValueType();
-
-  // Canonizalize to v2f64.
-  V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
-  return DAG.getNode(ISD::BITCAST, dl, VT,
-                     getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
-                                          V1, DAG));
-}
-
-static
-SDValue getMOVLowToHigh(SDValue &Op, SDLoc &dl, SelectionDAG &DAG,
-                        bool HasSSE2) {
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
-  MVT VT = Op.getSimpleValueType();
-
-  assert(VT != MVT::v2i64 && "unsupported shuffle type");
-
-  if (HasSSE2 && VT == MVT::v2f64)
-    return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
-
-  // v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1)
-  return DAG.getNode(ISD::BITCAST, dl, VT,
-                     getTargetShuffleNode(X86ISD::MOVLHPS, dl, MVT::v4f32,
-                           DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V1),
-                           DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V2), DAG));
-}
-
-static
-SDValue getMOVHighToLow(SDValue &Op, SDLoc &dl, SelectionDAG &DAG) {
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
-  MVT VT = Op.getSimpleValueType();
-
-  assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
-         "unsupported shuffle type");
-
-  if (V2.getOpcode() == ISD::UNDEF)
-    V2 = V1;
-
-  // v4i32 or v4f32
-  return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
-}
-
-static
-SDValue getMOVLP(SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
-  MVT VT = Op.getSimpleValueType();
-  unsigned NumElems = VT.getVectorNumElements();
-
-  // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
-  // operand of these instructions is only memory, so check if there's a
-  // potencial load folding here, otherwise use SHUFPS or MOVSD to match the
-  // same masks.
-  bool CanFoldLoad = false;
-
-  // Trivial case, when V2 comes from a load.
-  if (MayFoldVectorLoad(V2))
-    CanFoldLoad = true;
-
-  // When V1 is a load, it can be folded later into a store in isel, example:
-  //  (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1)
-  //    turns into:
-  //  (MOVLPSmr addr:$src1, VR128:$src2)
-  // So, recognize this potential and also use MOVLPS or MOVLPD
-  else if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
-    CanFoldLoad = true;
-
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
-  if (CanFoldLoad) {
-    if (HasSSE2 && NumElems == 2)
-      return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
-
-    if (NumElems == 4)
-      // If we don't care about the second element, proceed to use movss.
-      if (SVOp->getMaskElt(1) != -1)
-        return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
-  }
-
-  // movl and movlp will both match v2i64, but v2i64 is never matched by
-  // movl earlier because we make it strict to avoid messing with the movlp load
-  // folding logic (see the code above getMOVLP call). Match it here then,
-  // this is horrible, but will stay like this until we move all shuffle
-  // matching to x86 specific nodes. Note that for the 1st condition all
-  // types are matched with movsd.
-  if (HasSSE2) {
-    // FIXME: isMOVLMask should be checked and matched before getMOVLP,
-    // as to remove this logic from here, as much as possible
-    if (NumElems == 2 || !isMOVLMask(SVOp->getMask(), VT))
-      return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
-    return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
-  }
-
-  assert(VT != MVT::v4i32 && "unsupported shuffle type");
-
-  // Invert the operand order and use SHUFPS to match it.
-  return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V2, V1,
-                              getShuffleSHUFImmediate(SVOp), DAG);
-}
-
-static SDValue NarrowVectorLoadToElement(LoadSDNode *Load, unsigned Index,
-                                         SelectionDAG &DAG) {
-  SDLoc dl(Load);
-  MVT VT = Load->getSimpleValueType(0);
-  MVT EVT = VT.getVectorElementType();
-  SDValue Addr = Load->getOperand(1);
-  SDValue NewAddr = DAG.getNode(
-      ISD::ADD, dl, Addr.getSimpleValueType(), Addr,
-      DAG.getConstant(Index * EVT.getStoreSize(), Addr.getSimpleValueType()));
-
-  SDValue NewLoad =
-      DAG.getLoad(EVT, dl, Load->getChain(), NewAddr,
-                  DAG.getMachineFunction().getMachineMemOperand(
-                      Load->getMemOperand(), 0, EVT.getStoreSize()));
-  return NewLoad;
-}
-
-// It is only safe to call this function if isINSERTPSMask is true for
-// this shufflevector mask.
-static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
-                           SelectionDAG &DAG) {
-  // Generate an insertps instruction when inserting an f32 from memory onto a
-  // v4f32 or when copying a member from one v4f32 to another.
-  // We also use it for transferring i32 from one register to another,
-  // since it simply copies the same bits.
-  // If we're transferring an i32 from memory to a specific element in a
-  // register, we output a generic DAG that will match the PINSRD
-  // instruction.
-  MVT VT = SVOp->getSimpleValueType(0);
-  MVT EVT = VT.getVectorElementType();
-  SDValue V1 = SVOp->getOperand(0);
-  SDValue V2 = SVOp->getOperand(1);
-  auto Mask = SVOp->getMask();
-  assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
-         "unsupported vector type for insertps/pinsrd");
-
-  auto FromV1Predicate = [](const int &i) { return i < 4 && i > -1; };
-  auto FromV2Predicate = [](const int &i) { return i >= 4; };
-  int FromV1 = std::count_if(Mask.begin(), Mask.end(), FromV1Predicate);
-
-  SDValue From;
-  SDValue To;
-  unsigned DestIndex;
-  if (FromV1 == 1) {
-    From = V1;
-    To = V2;
-    DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) -
-                Mask.begin();
-
-    // If we have 1 element from each vector, we have to check if we're
-    // changing V1's element's place. If so, we're done. Otherwise, we
-    // should assume we're changing V2's element's place and behave
-    // accordingly.
-    int FromV2 = std::count_if(Mask.begin(), Mask.end(), FromV2Predicate);
-    assert(DestIndex <= INT32_MAX && "truncated destination index");
-    if (FromV1 == FromV2 &&
-        static_cast<int>(DestIndex) == Mask[DestIndex] % 4) {
-      From = V2;
-      To = V1;
-      DestIndex =
-          std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin();
-    }
-  } else {
-    assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1 &&
-           "More than one element from V1 and from V2, or no elements from one "
-           "of the vectors. This case should not have returned true from "
-           "isINSERTPSMask");
-    From = V2;
-    To = V1;
-    DestIndex =
-        std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin();
-  }
-
-  // Get an index into the source vector in the range [0,4) (the mask is
-  // in the range [0,8) because it can address V1 and V2)
-  unsigned SrcIndex = Mask[DestIndex] % 4;
-  if (MayFoldLoad(From)) {
-    // Trivial case, when From comes from a load and is only used by the
-    // shuffle. Make it use insertps from the vector that we need from that
-    // load.
-    SDValue NewLoad =
-        NarrowVectorLoadToElement(cast<LoadSDNode>(From), SrcIndex, DAG);
-    if (!NewLoad.getNode())
-      return SDValue();
-
-    if (EVT == MVT::f32) {
-      // Create this as a scalar to vector to match the instruction pattern.
-      SDValue LoadScalarToVector =
-          DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, NewLoad);
-      SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4);
-      return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, LoadScalarToVector,
-                         InsertpsMask);
-    } else { // EVT == MVT::i32
-      // If we're getting an i32 from memory, use an INSERT_VECTOR_ELT
-      // instruction, to match the PINSRD instruction, which loads an i32 to a
-      // certain vector element.
-      return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, To, NewLoad,
-                         DAG.getConstant(DestIndex, MVT::i32));
-    }
-  }
-
-  // Vector-element-to-vector
-  SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4 | SrcIndex << 6);
-  return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, From, InsertpsMask);
-}
-
-// Reduce a vector shuffle to zext.
-static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget,
-                                    SelectionDAG &DAG) {
-  // PMOVZX is only available from SSE41.
-  if (!Subtarget->hasSSE41())
-    return SDValue();
-
-  MVT VT = Op.getSimpleValueType();
-
-  // Only AVX2 support 256-bit vector integer extending.
-  if (!Subtarget->hasInt256() && VT.is256BitVector())
-    return SDValue();
-
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
-  SDLoc DL(Op);
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
-  unsigned NumElems = VT.getVectorNumElements();
-
-  // Extending is an unary operation and the element type of the source vector
-  // won't be equal to or larger than i64.
-  if (V2.getOpcode() != ISD::UNDEF || !VT.isInteger() ||
-      VT.getVectorElementType() == MVT::i64)
-    return SDValue();
-
-  // Find the expansion ratio, e.g. expanding from i8 to i32 has a ratio of 4.
-  unsigned Shift = 1; // Start from 2, i.e. 1 << 1.
-  while ((1U << Shift) < NumElems) {
-    if (SVOp->getMaskElt(1U << Shift) == 1)
-      break;
-    Shift += 1;
-    // The maximal ratio is 8, i.e. from i8 to i64.
-    if (Shift > 3)
-      return SDValue();
-  }
-
-  // Check the shuffle mask.
-  unsigned Mask = (1U << Shift) - 1;
-  for (unsigned i = 0; i != NumElems; ++i) {
-    int EltIdx = SVOp->getMaskElt(i);
-    if ((i & Mask) != 0 && EltIdx != -1)
-      return SDValue();
-    if ((i & Mask) == 0 && (unsigned)EltIdx != (i >> Shift))
-      return SDValue();
-  }
-
-  unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
-  MVT NeVT = MVT::getIntegerVT(NBits);
-  MVT NVT = MVT::getVectorVT(NeVT, NumElems >> Shift);
-
-  if (!DAG.getTargetLoweringInfo().isTypeLegal(NVT))
-    return SDValue();
-
-  return DAG.getNode(ISD::BITCAST, DL, VT,
-                     DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
-}
-
-static SDValue NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
-                                      SelectionDAG &DAG) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
-  MVT VT = Op.getSimpleValueType();
-  SDLoc dl(Op);
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
-
-  if (isZeroShuffle(SVOp))
-    return getZeroVector(VT, Subtarget, DAG, dl);
-
-  // Handle splat operations
-  if (SVOp->isSplat()) {
-    // Use vbroadcast whenever the splat comes from a foldable load
-    SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
-    if (Broadcast.getNode())
-      return Broadcast;
-  }
-
-  // Check integer expanding shuffles.
-  SDValue NewOp = LowerVectorIntExtend(Op, Subtarget, DAG);
-  if (NewOp.getNode())
-    return NewOp;
-
-  // If the shuffle can be profitably rewritten as a narrower shuffle, then
-  // do it!
-  if (VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v16i16 ||
-      VT == MVT::v32i8) {
-    SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
-    if (NewOp.getNode())
-      return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
-  } else if (VT.is128BitVector() && Subtarget->hasSSE2()) {
-    // FIXME: Figure out a cleaner way to do this.
-    if (ISD::isBuildVectorAllZeros(V2.getNode())) {
-      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
-      if (NewOp.getNode()) {
-        MVT NewVT = NewOp.getSimpleValueType();
-        if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
-                               NewVT, true, false))
-          return getVZextMovL(VT, NewVT, NewOp.getOperand(0), DAG, Subtarget,
-                              dl);
-      }
-    } else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
-      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
-      if (NewOp.getNode()) {
-        MVT NewVT = NewOp.getSimpleValueType();
-        if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
-          return getVZextMovL(VT, NewVT, NewOp.getOperand(1), DAG, Subtarget,
-                              dl);
-      }
-    }
-  }
-  return SDValue();
-}
-
-SDValue
-X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
-  MVT VT = Op.getSimpleValueType();
-  SDLoc dl(Op);
-  unsigned NumElems = VT.getVectorNumElements();
-  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
-  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
-  bool V1IsSplat = false;
-  bool V2IsSplat = false;
-  bool HasSSE2 = Subtarget->hasSSE2();
-  bool HasFp256    = Subtarget->hasFp256();
-  bool HasInt256   = Subtarget->hasInt256();
-  MachineFunction &MF = DAG.getMachineFunction();
-  bool OptForSize = MF.getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
-
-  // Check if we should use the experimental vector shuffle lowering. If so,
-  // delegate completely to that code path.
-  if (ExperimentalVectorShuffleLowering)
-    return lowerVectorShuffle(Op, Subtarget, DAG);
-
-  assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
-
-  if (V1IsUndef && V2IsUndef)
-    return DAG.getUNDEF(VT);
-
-  // When we create a shuffle node we put the UNDEF node to second operand,
-  // but in some cases the first operand may be transformed to UNDEF.
-  // In this case we should just commute the node.
-  if (V1IsUndef)
-    return DAG.getCommutedVectorShuffle(*SVOp);
-
-  // Vector shuffle lowering takes 3 steps:
-  //
-  // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
-  //    narrowing and commutation of operands should be handled.
-  // 2) Matching of shuffles with known shuffle masks to x86 target specific
-  //    shuffle nodes.
-  // 3) Rewriting of unmatched masks into new generic shuffle operations,
-  //    so the shuffle can be broken into other shuffles and the legalizer can
-  //    try the lowering again.
-  //
-  // The general idea is that no vector_shuffle operation should be left to
-  // be matched during isel, all of them must be converted to a target specific
-  // node here.
-
-  // Normalize the input vectors. Here splats, zeroed vectors, profitable
-  // narrowing and commutation of operands should be handled. The actual code
-  // doesn't include all of those, work in progress...
-  SDValue NewOp = NormalizeVectorShuffle(Op, Subtarget, DAG);
-  if (NewOp.getNode())
-    return NewOp;
-
-  SmallVector<int, 8> M(SVOp->getMask().begin(), SVOp->getMask().end());
-
-  // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
-  // unpckh_undef). Only use pshufd if speed is more important than size.
-  if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasInt256))
-    return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
-  if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasInt256))
-    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
-
-  if (isMOVDDUPMask(M, VT) && Subtarget->hasSSE3() &&
-      V2IsUndef && MayFoldVectorLoad(V1))
-    return getMOVDDup(Op, dl, V1, DAG);
-
-  if (isMOVHLPS_v_undef_Mask(M, VT))
-    return getMOVHighToLow(Op, dl, DAG);
-
-  // Use to match splats
-  if (HasSSE2 && isUNPCKHMask(M, VT, HasInt256) && V2IsUndef &&
-      (VT == MVT::v2f64 || VT == MVT::v2i64))
-    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
-
-  if (isPSHUFDMask(M, VT)) {
-    // The actual implementation will match the mask in the if above and then
-    // during isel it can match several different instructions, not only pshufd
-    // as its name says, sad but true, emulate the behavior for now...
-    if (isMOVDDUPMask(M, VT) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
-      return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
-
-    unsigned TargetMask = getShuffleSHUFImmediate(SVOp);
-
-    if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
-      return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
-
-    if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
-      return getTargetShuffleNode(X86ISD::VPERMILPI, dl, VT, V1, TargetMask,
-                                  DAG);
-
-    return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
-                                TargetMask, DAG);
-  }
-
-  if (isPALIGNRMask(M, VT, Subtarget))
-    return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2,
-                                getShufflePALIGNRImmediate(SVOp),
-                                DAG);
-
-  if (isVALIGNMask(M, VT, Subtarget))
-    return getTargetShuffleNode(X86ISD::VALIGN, dl, VT, V1, V2,
-                                getShuffleVALIGNImmediate(SVOp),
-                                DAG);
-
-  // Check if this can be converted into a logical shift.
-  bool isLeft = false;
-  unsigned ShAmt = 0;
-  SDValue ShVal;
-  bool isShift = HasSSE2 && isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
-  if (isShift && ShVal.hasOneUse()) {
-    // If the shifted value has multiple uses, it may be cheaper to use
-    // v_set0 + movlhps or movhlps, etc.
-    MVT EltVT = VT.getVectorElementType();
-    ShAmt *= EltVT.getSizeInBits();
-    return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
-  }
-
-  if (isMOVLMask(M, VT)) {
-    if (ISD::isBuildVectorAllZeros(V1.getNode()))
-      return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
-    if (!isMOVLPMask(M, VT)) {
-      if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
-        return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
-
-      if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
-    }
-  }
-
-  // FIXME: fold these into legal mask.
-  if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasInt256))
-    return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
-
-  if (isMOVHLPSMask(M, VT))
-    return getMOVHighToLow(Op, dl, DAG);
-
-  if (V2IsUndef && isMOVSHDUPMask(M, VT, Subtarget))
-    return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
-
-  if (V2IsUndef && isMOVSLDUPMask(M, VT, Subtarget))
-    return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
-
-  if (isMOVLPMask(M, VT))
-    return getMOVLP(Op, dl, DAG, HasSSE2);
-
-  if (ShouldXformToMOVHLPS(M, VT) ||
-      ShouldXformToMOVLP(V1.getNode(), V2.getNode(), M, VT))
-    return DAG.getCommutedVectorShuffle(*SVOp);
-
-  if (isShift) {
-    // No better options. Use a vshldq / vsrldq.
-    MVT EltVT = VT.getVectorElementType();
-    ShAmt *= EltVT.getSizeInBits();
-    return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
-  }
-
-  bool Commuted = false;
-  // FIXME: This should also accept a bitcast of a splat?  Be careful, not
-  // 1,1,1,1 -> v8i16 though.
-  BitVector UndefElements;
-  if (auto *BVOp = dyn_cast<BuildVectorSDNode>(V1.getNode()))
-    if (BVOp->getConstantSplatNode(&UndefElements) && UndefElements.none())
-      V1IsSplat = true;
-  if (auto *BVOp = dyn_cast<BuildVectorSDNode>(V2.getNode()))
-    if (BVOp->getConstantSplatNode(&UndefElements) && UndefElements.none())
-      V2IsSplat = true;
-
-  // Canonicalize the splat or undef, if present, to be on the RHS.
-  if (!V2IsUndef && V1IsSplat && !V2IsSplat) {
-    CommuteVectorShuffleMask(M, NumElems);
-    std::swap(V1, V2);
-    std::swap(V1IsSplat, V2IsSplat);
-    Commuted = true;
-  }
-
-  if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) {
-    // Shuffling low element of v1 into undef, just return v1.
-    if (V2IsUndef)
-      return V1;
-    // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which
-    // the instruction selector will not match, so get a canonical MOVL with
-    // swapped operands to undo the commute.
-    return getMOVL(DAG, dl, VT, V2, V1);
-  }
-
-  if (isUNPCKLMask(M, VT, HasInt256))
-    return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
-
-  if (isUNPCKHMask(M, VT, HasInt256))
-    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
-
-  if (V2IsSplat) {
-    // Normalize mask so all entries that point to V2 points to its first
-    // element then try to match unpck{h|l} again. If match, return a
-    // new vector_shuffle with the corrected mask.p
-    SmallVector<int, 8> NewMask(M.begin(), M.end());
-    NormalizeMask(NewMask, NumElems);
-    if (isUNPCKLMask(NewMask, VT, HasInt256, true))
-      return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
-    if (isUNPCKHMask(NewMask, VT, HasInt256, true))
-      return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
-  }
-
-  if (Commuted) {
-    // Commute is back and try unpck* again.
-    // FIXME: this seems wrong.
-    CommuteVectorShuffleMask(M, NumElems);
-    std::swap(V1, V2);
-    std::swap(V1IsSplat, V2IsSplat);
-
-    if (isUNPCKLMask(M, VT, HasInt256))
-      return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
-
-    if (isUNPCKHMask(M, VT, HasInt256))
-      return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
-  }
-
-  // Normalize the node to match x86 shuffle ops if needed
-  if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true)))
-    return DAG.getCommutedVectorShuffle(*SVOp);
-
-  // The checks below are all present in isShuffleMaskLegal, but they are
-  // inlined here right now to enable us to directly emit target specific
-  // nodes, and remove one by one until they don't return Op anymore.
-
-  if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
-      SVOp->getSplatIndex() == 0 && V2IsUndef) {
-    if (VT == MVT::v2f64 || VT == MVT::v2i64)
-      return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
-  }
-
-  if (isPSHUFHWMask(M, VT, HasInt256))
-    return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
-                                getShufflePSHUFHWImmediate(SVOp),
-                                DAG);
-
-  if (isPSHUFLWMask(M, VT, HasInt256))
-    return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
-                                getShufflePSHUFLWImmediate(SVOp),
-                                DAG);
-
-  unsigned MaskValue;
-  if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
-                  &MaskValue))
-    return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);
-
-  if (isSHUFPMask(M, VT))
-    return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
-                                getShuffleSHUFImmediate(SVOp), DAG);
-
-  if (isUNPCKL_v_undef_Mask(M, VT, HasInt256))
-    return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
-  if (isUNPCKH_v_undef_Mask(M, VT, HasInt256))
-    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
-
-  //===--------------------------------------------------------------------===//
-  // Generate target specific nodes for 128 or 256-bit shuffles only
-  // supported in the AVX instruction set.
-  //
-
-  // Handle VMOVDDUPY permutations
-  if (V2IsUndef && isMOVDDUPYMask(M, VT, HasFp256))
-    return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
-
-  // Handle VPERMILPS/D* permutations
-  if (isVPERMILPMask(M, VT)) {
-    if ((HasInt256 && VT == MVT::v8i32) || VT == MVT::v16i32)
-      return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
-                                  getShuffleSHUFImmediate(SVOp), DAG);
-    return getTargetShuffleNode(X86ISD::VPERMILPI, dl, VT, V1,
-                                getShuffleSHUFImmediate(SVOp), DAG);
-  }
-
-  unsigned Idx;
-  if (VT.is512BitVector() && isINSERT64x4Mask(M, VT, &Idx))
-    return Insert256BitVector(V1, Extract256BitVector(V2, 0, DAG, dl),
-                              Idx*(NumElems/2), DAG, dl);
-
-  // Handle VPERM2F128/VPERM2I128 permutations
-  if (isVPERM2X128Mask(M, VT, HasFp256))
-    return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
-                                V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
-
-  if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT))
-    return getINSERTPS(SVOp, dl, DAG);
-
-  unsigned Imm8;
-  if (V2IsUndef && HasInt256 && isPermImmMask(M, VT, Imm8))
-    return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, Imm8, DAG);
-
-  if ((V2IsUndef && HasInt256 && VT.is256BitVector() && NumElems == 8) ||
-      VT.is512BitVector()) {
-    MVT MaskEltVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
-    MVT MaskVectorVT = MVT::getVectorVT(MaskEltVT, NumElems);
-    SmallVector<SDValue, 16> permclMask;
-    for (unsigned i = 0; i != NumElems; ++i) {
-      permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MaskEltVT));
-    }
-
-    SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVectorVT, permclMask);
-    if (V2IsUndef)
-      // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32
-      return DAG.getNode(X86ISD::VPERMV, dl, VT,
-                          DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
-    return DAG.getNode(X86ISD::VPERMV3, dl, VT, V1,
-                       DAG.getNode(ISD::BITCAST, dl, VT, Mask), V2);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Since no target specific shuffle was selected for this generic one,
-  // lower it into other known shuffles. FIXME: this isn't true yet, but
-  // this is the plan.
-  //
-
-  // Handle v8i16 specifically since SSE can do byte extraction and insertion.
-  if (VT == MVT::v8i16) {
-    SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, Subtarget, DAG);
-    if (NewOp.getNode())
-      return NewOp;
-  }
-
-  if (VT == MVT::v16i16 && Subtarget->hasInt256()) {
-    SDValue NewOp = LowerVECTOR_SHUFFLEv16i16(Op, DAG);
-    if (NewOp.getNode())
-      return NewOp;
-  }
-
-  if (VT == MVT::v16i8) {
-    SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, Subtarget, DAG);
-    if (NewOp.getNode())
-      return NewOp;
-  }
-
-  if (VT == MVT::v32i8) {
-    SDValue NewOp = LowerVECTOR_SHUFFLEv32i8(SVOp, Subtarget, DAG);
-    if (NewOp.getNode())
-      return NewOp;
-  }
-
-  // Handle all 128-bit wide vectors with 4 elements, and match them with
-  // several different shuffle types.
-  if (NumElems == 4 && VT.is128BitVector())
-    return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG);
-
-  // Handle general 256-bit shuffles
-  if (VT.is256BitVector())
-    return LowerVECTOR_SHUFFLE_256(SVOp, DAG);
-
-  return SDValue();
-}
-
 // This function assumes its argument is a BUILD_VECTOR of constants or
 // undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
 // true.
@@ -12674,48 +10421,29 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
   return true;
 }
 
-/// \brief Try to lower a VSELECT instruction to an immediate-controlled blend
-/// instruction.
-static SDValue lowerVSELECTtoBLENDI(SDValue Op, const X86Subtarget *Subtarget,
-                                    SelectionDAG &DAG) {
+/// \brief Try to lower a VSELECT instruction to a vector shuffle.
+static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
+                                           const X86Subtarget *Subtarget,
+                                           SelectionDAG &DAG) {
   SDValue Cond = Op.getOperand(0);
   SDValue LHS = Op.getOperand(1);
   SDValue RHS = Op.getOperand(2);
   SDLoc dl(Op);
   MVT VT = Op.getSimpleValueType();
-  MVT EltVT = VT.getVectorElementType();
-  unsigned NumElems = VT.getVectorNumElements();
-
-  // There is no blend with immediate in AVX-512.
-  if (VT.is512BitVector())
-    return SDValue();
-
-  if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
-    return SDValue();
-  if (!Subtarget->hasInt256() && VT == MVT::v16i16)
-    return SDValue();
 
   if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
     return SDValue();
+  auto *CondBV = cast<BuildVectorSDNode>(Cond);
 
-  // Check the mask for BLEND and build the value.
-  unsigned MaskValue = 0;
-  if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
-    return SDValue();
-
-  // Convert i32 vectors to floating point if it is not AVX2.
-  // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
-  MVT BlendVT = VT;
-  if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
-    BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
-                               NumElems);
-    LHS = DAG.getNode(ISD::BITCAST, dl, VT, LHS);
-    RHS = DAG.getNode(ISD::BITCAST, dl, VT, RHS);
+  // Only non-legal VSELECTs reach this lowering, convert those into generic
+  // shuffles and re-use the shuffle lowering path for blends.
+  SmallVector<int, 32> Mask;
+  for (int i = 0, Size = VT.getVectorNumElements(); i < Size; ++i) {
+    SDValue CondElt = CondBV->getOperand(i);
+    Mask.push_back(
+        isa<ConstantSDNode>(CondElt) ? i + (isZero(CondElt) ? Size : 0) : -1);
   }
-
-  SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, LHS, RHS,
-                            DAG.getConstant(MaskValue, MVT::i32));
-  return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
+  return DAG.getVectorShuffle(VT, dl, LHS, RHS, Mask);
 }
 
 SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
@@ -12726,28 +10454,40 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
       ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode()))
     return SDValue();
 
-  SDValue BlendOp = lowerVSELECTtoBLENDI(Op, Subtarget, DAG);
-  if (BlendOp.getNode())
+  // Try to lower this to a blend-style vector shuffle. This can handle all
+  // constant condition cases.
+  if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG))
     return BlendOp;
 
-  // Some types for vselect were previously set to Expand, not Legal or
-  // Custom. Return an empty SDValue so we fall-through to Expand, after
-  // the Custom lowering phase.
-  MVT VT = Op.getSimpleValueType();
-  switch (VT.SimpleTy) {
+  // Variable blends are only legal from SSE4.1 onward.
+  if (!Subtarget->hasSSE41())
+    return SDValue();
+
+  // Only some types will be legal on some subtargets. If we can emit a legal
+  // VSELECT-matching blend, return Op, and but if we need to expand, return
+  // a null value.
+  switch (Op.getSimpleValueType().SimpleTy) {
   default:
-    break;
+    // Most of the vector types have blends past SSE4.1.
+    return Op;
+
+  case MVT::v32i8:
+    // The byte blends for AVX vectors were introduced only in AVX2.
+    if (Subtarget->hasAVX2())
+      return Op;
+
+    return SDValue();
+
   case MVT::v8i16:
   case MVT::v16i16:
+    // AVX-512 BWI and VLX features support VSELECT with i16 elements.
     if (Subtarget->hasBWI() && Subtarget->hasVLX())
-      break;
+      return Op;
+
+    // FIXME: We should custom lower this by fixing the condition and using i8
+    // blends.
     return SDValue();
   }
-
-  // We couldn't create a "Blend with immediate" node.
-  // This node should still be legal, but we'll have to emit a blendv*
-  // instruction.
-  return Op;
 }
 
 static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
@@ -12823,6 +10563,8 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const
   MVT EltVT = Op.getSimpleValueType();
 
   assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector");
+  assert((VecVT.getVectorNumElements() <= 16 || Subtarget->hasBWI()) &&
+         "Unexpected vector type in ExtractBitFromMaskVector");
 
   // variable index can't be handled in mask registers,
   // extend vector to VR512
@@ -12836,13 +10578,15 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const
 
   unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
   const TargetRegisterClass* rc = getRegClassFor(VecVT);
+  if (!Subtarget->hasDQI() && (VecVT.getVectorNumElements() <= 8))
+    rc = getRegClassFor(MVT::v16i1);
   unsigned MaxSift = rc->getSize()*8 - 1;
   Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
-                    DAG.getConstant(MaxSift - IdxVal, MVT::i8));
+                    DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
   Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
-                    DAG.getConstant(MaxSift, MVT::i8));
+                    DAG.getConstant(MaxSift, dl, MVT::i8));
   return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec,
-                       DAG.getIntPtrConstant(0));
+                       DAG.getIntPtrConstant(0, dl));
 }
 
 SDValue
@@ -12869,10 +10613,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
       Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT);
       SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT,
                                 getZeroVector(MaskVT, Subtarget, DAG, dl),
-                                Idx, DAG.getConstant(0, getPointerTy()));
+                                Idx, DAG.getConstant(0, dl, getPointerTy()));
       SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec);
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(),
-                        Perm, DAG.getConstant(0, getPointerTy()));
+                        Perm, DAG.getConstant(0, dl, getPointerTy()));
     }
     return SDValue();
   }
@@ -12892,7 +10636,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
     //  IdxVal -= NumElems/2;
     IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk;
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
-                       DAG.getConstant(IdxVal, MVT::i32));
+                       DAG.getConstant(IdxVal, dl, MVT::i32));
   }
 
   assert(VecVT.is128BitVector() && "Unexpected vector length");
@@ -12934,7 +10678,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
     SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
                                        DAG.getUNDEF(VVT), Mask);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
-                       DAG.getIntPtrConstant(0));
+                       DAG.getIntPtrConstant(0, dl));
   }
 
   if (VT.getSizeInBits() == 64) {
@@ -12953,7 +10697,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
     SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
                                        DAG.getUNDEF(VVT), Mask);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
-                       DAG.getIntPtrConstant(0));
+                       DAG.getIntPtrConstant(0, dl));
   }
 
   return SDValue();
@@ -12982,15 +10726,11 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
 
   unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
   SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
+  if (IdxVal)
+    EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
+                           DAG.getConstant(IdxVal, dl, MVT::i8));
   if (Vec.getOpcode() == ISD::UNDEF)
-    return DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
-                       DAG.getConstant(IdxVal, MVT::i8));
-  const TargetRegisterClass* rc = getRegClassFor(VecVT);
-  unsigned MaxSift = rc->getSize()*8 - 1;
-  EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
-                    DAG.getConstant(MaxSift, MVT::i8));
-  EltInVec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, EltInVec,
-                    DAG.getConstant(MaxSift - IdxVal, MVT::i8));
+    return EltInVec;
   return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
 }
 
@@ -13014,17 +10754,31 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
   // If the vector is wider than 128 bits, extract the 128-bit subvector, insert
   // into that, and then insert the subvector back into the result.
   if (VT.is256BitVector() || VT.is512BitVector()) {
-    // Get the desired 128-bit vector half.
+    // With a 256-bit vector, we can insert into the zero element efficiently
+    // using a blend if we have AVX or AVX2 and the right data type.
+    if (VT.is256BitVector() && IdxVal == 0) {
+      // TODO: It is worthwhile to cast integer to floating point and back
+      // and incur a domain crossing penalty if that's what we'll end up
+      // doing anyway after extracting to a 128-bit vector.
+      if ((Subtarget->hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
+          (Subtarget->hasAVX2() && EltVT == MVT::i32)) {
+        SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
+        N2 = DAG.getIntPtrConstant(1, dl);
+        return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2);
+      }
+    }
+
+    // Get the desired 128-bit vector chunk.
     SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl);
 
-    // Insert the element into the desired half.
+    // Insert the element into the desired chunk.
     unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
     unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
 
     V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
-                    DAG.getConstant(IdxIn128, MVT::i32));
+                    DAG.getConstant(IdxIn128, dl, MVT::i32));
 
-    // Insert the changed part back to the 256-bit vector
+    // Insert the changed part back into the bigger vector
     return Insert128BitVector(N0, V, IdxVal, DAG, dl);
   }
   assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
@@ -13044,22 +10798,35 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
       if (N1.getValueType() != MVT::i32)
         N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
       if (N2.getValueType() != MVT::i32)
-        N2 = DAG.getIntPtrConstant(IdxVal);
+        N2 = DAG.getIntPtrConstant(IdxVal, dl);
       return DAG.getNode(Opc, dl, VT, N0, N1, N2);
     }
 
     if (EltVT == MVT::f32) {
-      // Bits [7:6] of the constant are the source select.  This will always be
-      //  zero here.  The DAG Combiner may combine an extract_elt index into
-      //  these
-      //  bits.  For example (insert (extract, 3), 2) could be matched by
-      //  putting
-      //  the '3' into bits [7:6] of X86ISD::INSERTPS.
-      // Bits [5:4] of the constant are the destination select.  This is the
-      //  value of the incoming immediate.
-      // Bits [3:0] of the constant are the zero mask.  The DAG Combiner may
+      // Bits [7:6] of the constant are the source select. This will always be
+      //   zero here. The DAG Combiner may combine an extract_elt index into
+      //   these bits. For example (insert (extract, 3), 2) could be matched by
+      //   putting the '3' into bits [7:6] of X86ISD::INSERTPS.
+      // Bits [5:4] of the constant are the destination select. This is the
+      //   value of the incoming immediate.
+      // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
       //   combine either bitwise AND or insert of float 0.0 to set these bits.
-      N2 = DAG.getIntPtrConstant(IdxVal << 4);
+
+      const Function *F = DAG.getMachineFunction().getFunction();
+      bool MinSize = F->hasFnAttribute(Attribute::MinSize);
+      if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) {
+        // If this is an insertion of 32-bits into the low 32-bits of
+        // a vector, we prefer to generate a blend with immediate rather
+        // than an insertps. Blends are simpler operations in hardware and so
+        // will always have equal or better performance than insertps.
+        // But if optimizing for size and there's a load folding opportunity,
+        // generate insertps because blendps does not have a 32-bit memory
+        // operand form.
+        N2 = DAG.getIntPtrConstant(1, dl);
+        N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
+        return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, N2);
+      }
+      N2 = DAG.getIntPtrConstant(IdxVal << 4, dl);
       // Create this as a scalar to vector..
       N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
       return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
@@ -13080,7 +10847,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
     if (N1.getValueType() != MVT::i32)
       N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
     if (N2.getValueType() != MVT::i32)
-      N2 = DAG.getIntPtrConstant(IdxVal);
+      N2 = DAG.getIntPtrConstant(IdxVal, dl);
     return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
   }
   return SDValue();
@@ -13145,25 +10912,76 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
 // the upper bits of a vector.
 static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
                                      SelectionDAG &DAG) {
-  if (Subtarget->hasFp256()) {
-    SDLoc dl(Op.getNode());
-    SDValue Vec = Op.getNode()->getOperand(0);
-    SDValue SubVec = Op.getNode()->getOperand(1);
-    SDValue Idx = Op.getNode()->getOperand(2);
-
-    if ((Op.getNode()->getSimpleValueType(0).is256BitVector() ||
-         Op.getNode()->getSimpleValueType(0).is512BitVector()) &&
-        SubVec.getNode()->getSimpleValueType(0).is128BitVector() &&
-        isa<ConstantSDNode>(Idx)) {
-      unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
-      return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
+  if (!Subtarget->hasAVX())
+    return SDValue();
+
+  SDLoc dl(Op);
+  SDValue Vec = Op.getOperand(0);
+  SDValue SubVec = Op.getOperand(1);
+  SDValue Idx = Op.getOperand(2);
+
+  if (!isa<ConstantSDNode>(Idx))
+    return SDValue();
+
+  unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  MVT OpVT = Op.getSimpleValueType();
+  MVT SubVecVT = SubVec.getSimpleValueType();
+
+  // Fold two 16-byte subvector loads into one 32-byte load:
+  // (insert_subvector (insert_subvector undef, (load addr), 0),
+  //                   (load addr + 16), Elts/2)
+  // --> load32 addr
+  if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
+      Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
+      OpVT.is256BitVector() && SubVecVT.is128BitVector() &&
+      !Subtarget->isUnalignedMem32Slow()) {
+    SDValue SubVec2 = Vec.getOperand(1);
+    if (auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2))) {
+      if (Idx2->getZExtValue() == 0) {
+        SDValue Ops[] = { SubVec2, SubVec };
+        SDValue LD = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false);
+        if (LD.getNode())
+          return LD;
+      }
     }
+  }
 
-    if (Op.getNode()->getSimpleValueType(0).is512BitVector() &&
-        SubVec.getNode()->getSimpleValueType(0).is256BitVector() &&
-        isa<ConstantSDNode>(Idx)) {
-      unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
-      return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
+  if ((OpVT.is256BitVector() || OpVT.is512BitVector()) &&
+      SubVecVT.is128BitVector())
+    return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
+
+  if (OpVT.is512BitVector() && SubVecVT.is256BitVector())
+    return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
+
+  if (OpVT.getVectorElementType() == MVT::i1) {
+    if (IdxVal == 0  && Vec.getOpcode() == ISD::UNDEF) // the operation is legal
+      return Op;
+    SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
+    SDValue Undef = DAG.getUNDEF(OpVT);
+    unsigned NumElems = OpVT.getVectorNumElements();
+    SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8);
+
+    if (IdxVal == OpVT.getVectorNumElements() / 2) {
+      // Zero upper bits of the Vec
+      Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
+      Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
+
+      SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
+                                 SubVec, ZeroIdx);
+      Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits);
+      return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2);
+    }
+    if (IdxVal == 0) {
+      SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
+                                 SubVec, ZeroIdx);
+      // Zero upper bits of the Vec2
+      Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits);
+      Vec2 = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec2, ShiftBits);
+      // Zero lower bits of the Vec
+      Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
+      Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
+      // Merge them together
+      return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2);
     }
   }
   return SDValue();
@@ -13356,7 +11174,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl,
   // addition for it.
   if (Offset != 0)
     Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
-                         DAG.getConstant(Offset, getPointerTy()));
+                         DAG.getConstant(Offset, dl, getPointerTy()));
 
   return Result;
 }
@@ -13471,7 +11289,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
                                                          is64Bit ? 257 : 256));
 
   SDValue ThreadPointer =
-      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0),
+      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl),
                   MachinePointerInfo(Ptr), false, false, false, 0);
 
   unsigned char OperandFlags = 0;
@@ -13523,7 +11341,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
 
   if (Subtarget->isTargetELF()) {
     TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
-
     switch (model) {
       case TLSModel::GeneralDynamic:
         if (Subtarget->is64Bit())
@@ -13613,30 +11430,36 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
 
     SDValue TlsArray =
         Subtarget->is64Bit()
-            ? DAG.getIntPtrConstant(0x58)
+            ? DAG.getIntPtrConstant(0x58, dl)
             : (Subtarget->isTargetWindowsGNU()
-                   ? DAG.getIntPtrConstant(0x2C)
+                   ? DAG.getIntPtrConstant(0x2C, dl)
                    : DAG.getExternalSymbol("_tls_array", getPointerTy()));
 
     SDValue ThreadPointer =
         DAG.getLoad(getPointerTy(), dl, Chain, TlsArray,
                     MachinePointerInfo(Ptr), false, false, false, 0);
 
-    // Load the _tls_index variable
-    SDValue IDX = DAG.getExternalSymbol("_tls_index", getPointerTy());
-    if (Subtarget->is64Bit())
-      IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain,
-                           IDX, MachinePointerInfo(), MVT::i32,
-                           false, false, false, 0);
-    else
-      IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
-                        false, false, false, 0);
+    SDValue res;
+    if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) {
+      res = ThreadPointer;
+    } else {
+      // Load the _tls_index variable
+      SDValue IDX = DAG.getExternalSymbol("_tls_index", getPointerTy());
+      if (Subtarget->is64Bit())
+        IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain, IDX,
+                             MachinePointerInfo(), MVT::i32, false, false,
+                             false, 0);
+      else
+        IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
+                          false, false, false, 0);
 
-    SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()),
-                                    getPointerTy());
-    IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
+      SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()), dl,
+                                      getPointerTy());
+      IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
+
+      res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX);
+    }
 
-    SDValue res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX);
     res = DAG.getLoad(getPointerTy(), dl, Chain, res, MachinePointerInfo(),
                       false, false, false, 0);
 
@@ -13669,10 +11492,10 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
   // generic ISD nodes haven't. Insert an AND to be safe, it's optimized away
   // during isel.
   SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
-                                  DAG.getConstant(VTBits - 1, MVT::i8));
+                                  DAG.getConstant(VTBits - 1, dl, MVT::i8));
   SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
-                                     DAG.getConstant(VTBits - 1, MVT::i8))
-                       : DAG.getConstant(0, VT);
+                                     DAG.getConstant(VTBits - 1, dl, MVT::i8))
+                       : DAG.getConstant(0, dl, VT);
 
   SDValue Tmp2, Tmp3;
   if (Op.getOpcode() == ISD::SHL_PARTS) {
@@ -13687,12 +11510,12 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
   // rely on the results of shld/shrd. Insert a test and select the appropriate
   // values for large shift amounts.
   SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
-                                DAG.getConstant(VTBits, MVT::i8));
+                                DAG.getConstant(VTBits, dl, MVT::i8));
   SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
-                             AndNode, DAG.getConstant(0, MVT::i8));
+                             AndNode, DAG.getConstant(0, dl, MVT::i8));
 
   SDValue Hi, Lo;
-  SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+  SDValue CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
   SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
   SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
 
@@ -13871,7 +11694,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
   }
 
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
-                     DAG.getIntPtrConstant(0));
+                     DAG.getIntPtrConstant(0, dl));
 }
 
 // LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion.
@@ -13879,7 +11702,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
                                                SelectionDAG &DAG) const {
   SDLoc dl(Op);
   // FP constant to bias correct the final result.
-  SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+  SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
                                    MVT::f64);
 
   // Load the 32-bit value into an XMM register.
@@ -13891,7 +11714,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
 
   Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
                      DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
-                     DAG.getIntPtrConstant(0));
+                     DAG.getIntPtrConstant(0, dl));
 
   // Or the load with the bias.
   SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
@@ -13903,7 +11726,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
                                                    MVT::v2f64, Bias)));
   Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
                    DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
-                   DAG.getIntPtrConstant(0));
+                   DAG.getIntPtrConstant(0, dl));
 
   // Subtract the bias.
   SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
@@ -13913,7 +11736,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
 
   if (DestVT.bitsLT(MVT::f64))
     return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
-                       DAG.getIntPtrConstant(0));
+                       DAG.getIntPtrConstant(0, dl));
   if (DestVT.bitsGT(MVT::f64))
     return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
 
@@ -13958,20 +11781,20 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
   // -- v >> 16
 
   // Create the splat vector for 0x4b000000.
-  SDValue CstLow = DAG.getConstant(0x4b000000, MVT::i32);
+  SDValue CstLow = DAG.getConstant(0x4b000000, DL, MVT::i32);
   SDValue CstLowArray[] = {CstLow, CstLow, CstLow, CstLow,
                            CstLow, CstLow, CstLow, CstLow};
   SDValue VecCstLow = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT,
                                   makeArrayRef(&CstLowArray[0], NumElts));
   // Create the splat vector for 0x53000000.
-  SDValue CstHigh = DAG.getConstant(0x53000000, MVT::i32);
+  SDValue CstHigh = DAG.getConstant(0x53000000, DL, MVT::i32);
   SDValue CstHighArray[] = {CstHigh, CstHigh, CstHigh, CstHigh,
                             CstHigh, CstHigh, CstHigh, CstHigh};
   SDValue VecCstHigh = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT,
                                    makeArrayRef(&CstHighArray[0], NumElts));
 
   // Create the right shift.
-  SDValue CstShift = DAG.getConstant(16, MVT::i32);
+  SDValue CstShift = DAG.getConstant(16, DL, MVT::i32);
   SDValue CstShiftArray[] = {CstShift, CstShift, CstShift, CstShift,
                              CstShift, CstShift, CstShift, CstShift};
   SDValue VecCstShift = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT,
@@ -13988,7 +11811,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
     // Low will be bitcasted right away, so do not bother bitcasting back to its
     // original type.
     Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
-                      VecCstLowBitcast, DAG.getConstant(0xaa, MVT::i32));
+                      VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i32));
     //     uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
     //                                 (uint4) 0x53000000, 0xaa);
     SDValue VecCstHighBitcast =
@@ -13998,9 +11821,9 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
     // High will be bitcasted right away, so do not bother bitcasting back to
     // its original type.
     High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
-                       VecCstHighBitcast, DAG.getConstant(0xaa, MVT::i32));
+                       VecCstHighBitcast, DAG.getConstant(0xaa, DL, MVT::i32));
   } else {
-    SDValue CstMask = DAG.getConstant(0xffff, MVT::i32);
+    SDValue CstMask = DAG.getConstant(0xffff, DL, MVT::i32);
     SDValue VecCstMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT, CstMask,
                                      CstMask, CstMask, CstMask);
     //     uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000;
@@ -14013,7 +11836,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
 
   // Create the vector constant for -(0x1.0p39f + 0x1.0p23f).
   SDValue CstFAdd = DAG.getConstantFP(
-      APFloat(APFloat::IEEEsingle, APInt(32, 0xD3000080)), MVT::f32);
+      APFloat(APFloat::IEEEsingle, APInt(32, 0xD3000080)), DL, MVT::f32);
   SDValue CstFAddArray[] = {CstFAdd, CstFAdd, CstFAdd, CstFAdd,
                             CstFAdd, CstFAdd, CstFAdd, CstFAdd};
   SDValue VecCstFAdd = DAG.getNode(ISD::BUILD_VECTOR, DL, VecFloatVT,
@@ -14048,6 +11871,11 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
   case MVT::v4i32:
   case MVT::v8i32:
     return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget);
+  case MVT::v16i8:
+  case MVT::v16i16:
+    if (Subtarget->hasAVX512())
+      return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
+                         DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0));
   }
   llvm_unreachable(nullptr);
 }
@@ -14078,13 +11906,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
   // Make a 64-bit buffer, and use it to build an FILD.
   SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
   if (SrcVT == MVT::i32) {
-    SDValue WordOff = DAG.getConstant(4, getPointerTy());
+    SDValue WordOff = DAG.getConstant(4, dl, getPointerTy());
     SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
                                      getPointerTy(), StackSlot, WordOff);
     SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
                                   StackSlot, MachinePointerInfo(),
                                   false, false, 0);
-    SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
+    SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
                                   OffsetSlot, MachinePointerInfo(),
                                   false, false, 0);
     SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
@@ -14116,8 +11944,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
   // Check whether the sign bit is set.
   SDValue SignSet = DAG.getSetCC(dl,
                                  getSetCCResultType(*DAG.getContext(), MVT::i64),
-                                 Op.getOperand(0), DAG.getConstant(0, MVT::i64),
-                                 ISD::SETLT);
+                                 Op.getOperand(0),
+                                 DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
 
   // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
   SDValue FudgePtr = DAG.getConstantPool(
@@ -14125,8 +11953,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
                                          getPointerTy());
 
   // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
-  SDValue Zero = DAG.getIntPtrConstant(0);
-  SDValue Four = DAG.getIntPtrConstant(4);
+  SDValue Zero = DAG.getIntPtrConstant(0, dl);
+  SDValue Four = DAG.getIntPtrConstant(4, dl);
   SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
                                Zero, Four);
   FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
@@ -14138,7 +11966,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
                                  MVT::f32, false, false, false, 4);
   // Extend everything to 80 bits to force it to be done on x87.
   SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
-  return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
+  return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
+                     DAG.getIntPtrConstant(0, dl));
 }
 
 std::pair<SDValue,SDValue>
@@ -14241,6 +12070,9 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
   MVT InVT = In.getSimpleValueType();
   SDLoc dl(Op);
 
+  if (VT.is512BitVector() || InVT.getScalarType() == MVT::i1)
+    return DAG.getNode(ISD::ZERO_EXTEND, dl, VT, In);
+
   // Optimize vectors in AVX mode:
   //
   //   v8i16 -> v8i32
@@ -14278,34 +12110,29 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
 }
 
 static  SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
-                                        SelectionDAG &DAG) {
+                  const X86Subtarget *Subtarget, SelectionDAG &DAG) {
   MVT VT = Op->getSimpleValueType(0);
   SDValue In = Op->getOperand(0);
   MVT InVT = In.getSimpleValueType();
   SDLoc DL(Op);
   unsigned int NumElts = VT.getVectorNumElements();
-  if (NumElts != 8 && NumElts != 16)
+  if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
     return SDValue();
 
   if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
     return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
 
-  EVT ExtVT = (NumElts == 8)? MVT::v8i64 : MVT::v16i32;
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  // Now we have only mask extension
   assert(InVT.getVectorElementType() == MVT::i1);
-  SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType());
-  const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
-  SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
-  unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
-  SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
-                           MachinePointerInfo::getConstantPool(),
-                           false, false, false, Alignment);
-
-  SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, DL, ExtVT, In, Ld);
+  MVT ExtVT = NumElts == 8 ? MVT::v8i64 : MVT::v16i32;
+  SDValue One =
+   DAG.getConstant(APInt(ExtVT.getScalarSizeInBits(), 1), DL, ExtVT);
+  SDValue Zero =
+   DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), DL, ExtVT);
+
+  SDValue V = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero);
   if (VT.is512BitVector())
-    return Brcst;
-  return DAG.getNode(X86ISD::VTRUNC, DL, VT, Brcst);
+    return V;
+  return DAG.getNode(X86ISD::VTRUNC, DL, VT, V);
 }
 
 static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
@@ -14327,7 +12154,7 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
   MVT SVT = In.getSimpleValueType();
 
   if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
-    return LowerZERO_EXTEND_AVX512(Op, DAG);
+    return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG);
 
   if (Subtarget->hasFp256()) {
     SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
@@ -14357,6 +12184,23 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
   assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
          "Invalid TRUNCATE operation");
 
+  // move vector to mask - truncate solution for SKX
+  if (VT.getVectorElementType() == MVT::i1) {
+    if (InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 &&
+        Subtarget->hasBWI())
+      return Op; // legal, will go to VPMOVB2M, VPMOVW2M
+    if ((InVT.is256BitVector() || InVT.is128BitVector()) 
+        && InVT.getScalarSizeInBits() <= 16 &&
+        Subtarget->hasBWI() && Subtarget->hasVLX())
+      return Op; // legal, will go to VPMOVB2M, VPMOVW2M
+    if (InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 &&
+        Subtarget->hasDQI())
+      return Op; // legal, will go to VPMOVD2M, VPMOVQ2M
+    if ((InVT.is256BitVector() || InVT.is128BitVector()) 
+        && InVT.getScalarSizeInBits() >= 32 &&
+        Subtarget->hasDQI() && Subtarget->hasVLX())
+      return Op; // legal, will go to VPMOVB2M, VPMOVQ2M
+  }
   if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
     if (VT.getVectorElementType().getSizeInBits() >=8)
       return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
@@ -14370,14 +12214,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
       InVT = ExtVT;
     }
 
-    SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
-    const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
-    SDValue CP = DAG.getConstantPool(C, getPointerTy());
-    unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
-    SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
-                           MachinePointerInfo::getConstantPool(),
-                           false, false, false, Alignment);
-    SDValue OneV = DAG.getNode(X86ISD::VBROADCAST, DL, InVT, Ld);
+    SDValue OneV =
+     DAG.getConstant(APInt::getSignBit(InVT.getScalarSizeInBits()), DL, InVT);
     SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In);
     return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
   }
@@ -14390,13 +12228,13 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
       In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32),
                                 ShufMask);
       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
-                         DAG.getIntPtrConstant(0));
+                         DAG.getIntPtrConstant(0, DL));
     }
 
     SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
-                               DAG.getIntPtrConstant(0));
+                               DAG.getIntPtrConstant(0, DL));
     SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
-                               DAG.getIntPtrConstant(2));
+                               DAG.getIntPtrConstant(2, DL));
     OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
     OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
     static const int ShufMask[] = {0, 2, 4, 6};
@@ -14410,16 +12248,16 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
 
       SmallVector<SDValue,32> pshufbMask;
       for (unsigned i = 0; i < 2; ++i) {
-        pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x0, DL, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x1, DL, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x4, DL, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x5, DL, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x8, DL, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x9, DL, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0xc, DL, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0xd, DL, MVT::i8));
         for (unsigned j = 0; j < 8; ++j)
-          pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+          pshufbMask.push_back(DAG.getConstant(0x80, DL, MVT::i8));
       }
       SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, pshufbMask);
       In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV);
@@ -14429,15 +12267,15 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
       In = DAG.getVectorShuffle(MVT::v4i64, DL,  In, DAG.getUNDEF(MVT::v4i64),
                                 &ShufMask[0]);
       In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
-                       DAG.getIntPtrConstant(0));
+                       DAG.getIntPtrConstant(0, DL));
       return DAG.getNode(ISD::BITCAST, DL, VT, In);
     }
 
     SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
-                               DAG.getIntPtrConstant(0));
+                               DAG.getIntPtrConstant(0, DL));
 
     SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
-                               DAG.getIntPtrConstant(4));
+                               DAG.getIntPtrConstant(4, DL));
 
     OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo);
     OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi);
@@ -14476,7 +12314,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
                                    DAG.getNode(ISD::BITCAST, DL, NVT, In),
                                    DAG.getUNDEF(NVT), &MaskVec[0]);
   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V,
-                     DAG.getIntPtrConstant(0));
+                     DAG.getIntPtrConstant(0, DL));
 }
 
 SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
@@ -14613,7 +12451,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   }
   // And if it is bigger, shrink it first.
   if (SrcVT.bitsGT(VT)) {
-    Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1));
+    Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1, dl));
     SrcVT = VT;
   }
 
@@ -14672,8 +12510,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
 
   // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
   SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
-                                  DAG.getConstant(1, VT));
-  return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
+                                  DAG.getConstant(1, dl, VT));
+  return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, dl, VT));
 }
 
 // Check whether an OR'd tree is PTEST-able.
@@ -14791,11 +12629,11 @@ static bool hasNonFlagsUse(SDValue Op) {
 /// equivalent.
 SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
                                     SelectionDAG &DAG) const {
-  if (Op.getValueType() == MVT::i1)
-    // KORTEST instruction should be selected
-    return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
-                       DAG.getConstant(0, Op.getValueType()));
-
+  if (Op.getValueType() == MVT::i1) {
+    SDValue ExtOp = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Op);
+    return DAG.getNode(X86ISD::CMP, dl, MVT::i32, ExtOp,
+                       DAG.getConstant(0, dl, MVT::i8));
+  }
   // CF and OF aren't always set the way we want. Determine which
   // of these we need.
   bool NeedCF = false;
@@ -14817,9 +12655,8 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
     case ISD::SUB:
     case ISD::MUL:
     case ISD::SHL: {
-      const BinaryWithFlagsSDNode *BinNode =
-          cast<BinaryWithFlagsSDNode>(Op.getNode());
-      if (BinNode->hasNoSignedWrap())
+      const auto *BinNode = cast<BinaryWithFlagsSDNode>(Op.getNode());
+      if (BinNode->Flags.hasNoSignedWrap())
         break;
     }
     default:
@@ -14838,7 +12675,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
     //  return DAG.getNode(X86ISD::CMP, dl, MVT::i1, Op,
     //                     DAG.getConstant(0, MVT::i1));
     return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
-                       DAG.getConstant(0, Op.getValueType()));
+                       DAG.getConstant(0, dl, Op.getValueType()));
   }
   unsigned Opcode = 0;
   unsigned NumOperands = 0;
@@ -14926,7 +12763,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
       if (!Mask.isSignedIntN(32)) // Avoid large immediates.
         break;
       SDValue New = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0),
-                                DAG.getConstant(Mask, VT));
+                                DAG.getConstant(Mask, dl, VT));
       DAG.ReplaceAllUsesWith(Op, New);
       Op = New;
     }
@@ -15012,12 +12849,10 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
   if (Opcode == 0)
     // Emit a CMP with 0, which is the TEST pattern.
     return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
-                       DAG.getConstant(0, Op.getValueType()));
+                       DAG.getConstant(0, dl, Op.getValueType()));
 
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
-  SmallVector<SDValue, 4> Ops;
-  for (unsigned i = 0; i != NumOperands; ++i)
-    Ops.push_back(Op.getOperand(i));
+  SmallVector<SDValue, 4> Ops(Op->op_begin(), Op->op_begin() + NumOperands);
 
   SDValue New = DAG.getNode(Opcode, dl, VTs, Ops);
   DAG.ReplaceAllUsesWith(Op, New);
@@ -15043,8 +12878,8 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
     // if we're optimizing for size, however, as that'll allow better folding
     // of memory operations.
     if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
-        !DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
-             AttributeSet::FunctionIndex, Attribute::MinSize) &&
+        !DAG.getMachineFunction().getFunction()->hasFnAttribute(
+            Attribute::MinSize) &&
         !Subtarget->isAtom()) {
       unsigned ExtendOp =
           isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
@@ -15079,7 +12914,7 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
   SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp);
   SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW);
   SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
-                            DAG.getConstant(8, MVT::i8));
+                            DAG.getConstant(8, dl, MVT::i8));
   SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
   return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
 }
@@ -15144,6 +12979,16 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
   return SDValue();
 }
 
+/// If we have at least two divisions that use the same divisor, convert to
+/// multplication by a reciprocal. This may need to be adjusted for a given
+/// CPU if a division's cost is not at least twice the cost of a multiplication.
+/// This is because we still need one division to calculate the reciprocal and
+/// then we need two multiplies by that reciprocal as replacements for the
+/// original divisions.
+bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+  return NumUsers > 1;
+}
+
 static bool isAllOnes(SDValue V) {
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
   return C && C->isAllOnesValue();
@@ -15192,7 +13037,7 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
     // Use BT if the immediate can't be encoded in a TEST instruction.
     if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) {
       LHS = AndLHS;
-      RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), LHS.getValueType());
+      RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, LHS.getValueType());
     }
   }
 
@@ -15214,7 +13059,7 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
     SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
     X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
     return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                       DAG.getConstant(Cond, MVT::i8), BT);
+                       DAG.getConstant(Cond, dl, MVT::i8), BT);
   }
 
   return SDValue();
@@ -15295,6 +13140,49 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
                      DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
 }
 
+static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDValue CC = Op.getOperand(2);
+  MVT VT = Op.getSimpleValueType();
+  SDLoc dl(Op);
+
+  assert(Op0.getValueType().getVectorElementType() == MVT::i1 &&
+         "Unexpected type for boolean compare operation");
+  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+  SDValue NotOp0 = DAG.getNode(ISD::XOR, dl, VT, Op0,
+                               DAG.getConstant(-1, dl, VT));
+  SDValue NotOp1 = DAG.getNode(ISD::XOR, dl, VT, Op1,
+                               DAG.getConstant(-1, dl, VT));
+  switch (SetCCOpcode) {
+  default: llvm_unreachable("Unexpected SETCC condition");
+  case ISD::SETNE:
+    // (x != y) -> ~(x ^ y)
+    return DAG.getNode(ISD::XOR, dl, VT,
+                       DAG.getNode(ISD::XOR, dl, VT, Op0, Op1),
+                       DAG.getConstant(-1, dl, VT));
+  case ISD::SETEQ:
+    // (x == y) -> (x ^ y)
+    return DAG.getNode(ISD::XOR, dl, VT, Op0, Op1);
+  case ISD::SETUGT:
+  case ISD::SETGT:
+    // (x > y) -> (x & ~y)
+    return DAG.getNode(ISD::AND, dl, VT, Op0, NotOp1);
+  case ISD::SETULT:
+  case ISD::SETLT:
+    // (x < y) -> (~x & y)
+    return DAG.getNode(ISD::AND, dl, VT, NotOp0, Op1);
+  case ISD::SETULE:
+  case ISD::SETLE:
+    // (x <= y) -> (~x | y)
+    return DAG.getNode(ISD::OR, dl, VT, NotOp0, Op1);
+  case ISD::SETUGE:
+  case ISD::SETGE:
+    // (x >=y) -> (x | ~y)
+    return DAG.getNode(ISD::OR, dl, VT, Op0, NotOp1);
+  }
+}
+
 static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
                                      const X86Subtarget *Subtarget) {
   SDValue Op0 = Op.getOperand(0);
@@ -15332,7 +13220,7 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
     return DAG.getNode(Opc, dl, VT, Op0, Op1);
   Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
   return DAG.getNode(Opc, dl, VT, Op0, Op1,
-                     DAG.getConstant(SSECC, MVT::i8));
+                     DAG.getConstant(SSECC, dl, MVT::i8));
 }
 
 /// \brief Try to turn a VSETULT into a VSETULE by modifying its second
@@ -15359,7 +13247,7 @@ static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG)
     if (Val == 0)
       return SDValue();
 
-    ULTOp1.push_back(DAG.getConstant(Val - 1, EVT));
+    ULTOp1.push_back(DAG.getConstant(Val - 1, dl, EVT));
   }
 
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, ULTOp1);
@@ -15399,22 +13287,25 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
       }
 
       SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
-                                 DAG.getConstant(CC0, MVT::i8));
+                                 DAG.getConstant(CC0, dl, MVT::i8));
       SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
-                                 DAG.getConstant(CC1, MVT::i8));
+                                 DAG.getConstant(CC1, dl, MVT::i8));
       return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
     }
     // Handle all other FP comparisons here.
     return DAG.getNode(Opc, dl, VT, Op0, Op1,
-                       DAG.getConstant(SSECC, MVT::i8));
+                       DAG.getConstant(SSECC, dl, MVT::i8));
   }
 
   // Break 256-bit integer vector compare into smaller ones.
   if (VT.is256BitVector() && !Subtarget->hasInt256())
     return Lower256IntVSETCC(Op, DAG);
 
-  bool MaskResult = (VT.getVectorElementType() == MVT::i1);
   EVT OpVT = Op1.getValueType();
+  if (OpVT.getVectorElementType() == MVT::i1)
+    return LowerBoolVSETCC_AVX512(Op, DAG);
+
+  bool MaskResult = (VT.getVectorElementType() == MVT::i1);
   if (Subtarget->hasAVX512()) {
     if (Op1.getValueType().is512BitVector() ||
         (Subtarget->hasBWI() && Subtarget->hasVLX()) ||
@@ -15524,10 +13415,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
       // compare is always unsigned.
       SDValue SB;
       if (FlipSigns) {
-        SB = DAG.getConstant(0x80000000U, MVT::v4i32);
+        SB = DAG.getConstant(0x80000000U, dl, MVT::v4i32);
       } else {
-        SDValue Sign = DAG.getConstant(0x80000000U, MVT::i32);
-        SDValue Zero = DAG.getConstant(0x00000000U, MVT::i32);
+        SDValue Sign = DAG.getConstant(0x80000000U, dl, MVT::i32);
+        SDValue Zero = DAG.getConstant(0x00000000U, dl, MVT::i32);
         SB = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                          Sign, Zero, Sign, Zero);
       }
@@ -15582,7 +13473,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
   // bits of the inputs before performing those operations.
   if (FlipSigns) {
     EVT EltVT = VT.getVectorElementType();
-    SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), VT);
+    SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl,
+                                 VT);
     Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB);
     Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB);
   }
@@ -15650,7 +13542,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 
       CCode = X86::GetOppositeBranchCondition(CCode);
       SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                                  DAG.getConstant(CCode, MVT::i8),
+                                  DAG.getConstant(CCode, dl, MVT::i8),
                                   Op0.getOperand(1));
       if (VT == MVT::i1)
         return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
@@ -15662,18 +13554,18 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
 
     ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true);
-    return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, MVT::i1), NewCC);
+    return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, dl, MVT::i1), NewCC);
   }
 
   bool isFP = Op1.getSimpleValueType().isFloatingPoint();
-  unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
+  unsigned X86CC = TranslateX86CC(CC, dl, isFP, Op0, Op1, DAG);
   if (X86CC == X86::COND_INVALID)
     return SDValue();
 
   SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG);
   EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
   SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                              DAG.getConstant(X86CC, MVT::i8), EFLAGS);
+                              DAG.getConstant(X86CC, dl, MVT::i8), EFLAGS);
   if (VT == MVT::i1)
     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
   return SetCC;
@@ -15724,9 +13616,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op1.getValueType();
   SDValue CC;
 
-  // Lower fp selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
-  // are available. Otherwise fp cmovs get lowered into a less efficient branch
-  // sequence later on.
+  // Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
+  // are available or VBLENDV if AVX is available.
+  // Otherwise FP cmovs get lowered into a less efficient branch sequence later.
   if (Cond.getOpcode() == ISD::SETCC &&
       ((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
        (Subtarget->hasSSE1() && VT == MVT::f32)) &&
@@ -15738,17 +13630,85 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     if (SSECC != 8) {
       if (Subtarget->hasAVX512()) {
         SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1,
-                                  DAG.getConstant(SSECC, MVT::i8));
+                                  DAG.getConstant(SSECC, DL, MVT::i8));
         return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2);
       }
+
       SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
-                                DAG.getConstant(SSECC, MVT::i8));
+                                DAG.getConstant(SSECC, DL, MVT::i8));
+
+      // If we have AVX, we can use a variable vector select (VBLENDV) instead
+      // of 3 logic instructions for size savings and potentially speed.
+      // Unfortunately, there is no scalar form of VBLENDV.
+
+      // If either operand is a constant, don't try this. We can expect to
+      // optimize away at least one of the logic instructions later in that
+      // case, so that sequence would be faster than a variable blend.
+
+      // BLENDV was introduced with SSE 4.1, but the 2 register form implicitly
+      // uses XMM0 as the selection register. That may need just as many
+      // instructions as the AND/ANDN/OR sequence due to register moves, so
+      // don't bother.
+
+      if (Subtarget->hasAVX() &&
+          !isa<ConstantFPSDNode>(Op1) && !isa<ConstantFPSDNode>(Op2)) {
+
+        // Convert to vectors, do a VSELECT, and convert back to scalar.
+        // All of the conversions should be optimized away.
+
+        EVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
+        SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
+        SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);
+        SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp);
+
+        EVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
+        VCmp = DAG.getNode(ISD::BITCAST, DL, VCmpVT, VCmp);
+
+        SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2);
+
+        return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+                           VSel, DAG.getIntPtrConstant(0, DL));
+      }
       SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
       SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
       return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And);
     }
   }
 
+    if (VT.isVector() && VT.getScalarType() == MVT::i1) {
+      SDValue Op1Scalar;
+      if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
+        Op1Scalar = ConvertI1VectorToInterger(Op1, DAG);
+      else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
+        Op1Scalar = Op1.getOperand(0);
+      SDValue Op2Scalar;
+      if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
+        Op2Scalar = ConvertI1VectorToInterger(Op2, DAG);
+      else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
+        Op2Scalar = Op2.getOperand(0);
+      if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
+        SDValue newSelect = DAG.getNode(ISD::SELECT, DL, 
+                                        Op1Scalar.getValueType(),
+                                        Cond, Op1Scalar, Op2Scalar);
+        if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
+          return DAG.getNode(ISD::BITCAST, DL, VT, newSelect);
+        SDValue ExtVec = DAG.getNode(ISD::BITCAST, DL, MVT::v8i1, newSelect);
+        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
+                           DAG.getIntPtrConstant(0, DL));
+    }
+  }
+
+  if (VT == MVT::v4i1 || VT == MVT::v2i1) {
+    SDValue zeroConst = DAG.getIntPtrConstant(0, DL);
+    Op1 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
+                      DAG.getUNDEF(MVT::v8i1), Op1, zeroConst);
+    Op2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
+                      DAG.getUNDEF(MVT::v8i1), Op2, zeroConst);
+    SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::v8i1,
+                                    Cond, Op1, Op2);
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);
+  }
+
   if (Cond.getOpcode() == ISD::SETCC) {
     SDValue NewCond = LowerSETCC(Cond, DAG);
     if (NewCond.getNode())
@@ -15779,21 +13739,22 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
             (isAllOnes(Op1) == (CondCode == X86::COND_NE))) {
           SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
           SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
-                                    DAG.getConstant(0, CmpOp0.getValueType()),
+                                    DAG.getConstant(0, DL,
+                                                    CmpOp0.getValueType()),
                                     CmpOp0);
           SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
-                                    DAG.getConstant(X86::COND_B, MVT::i8),
+                                    DAG.getConstant(X86::COND_B, DL, MVT::i8),
                                     SDValue(Neg.getNode(), 1));
           return Res;
         }
 
       Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
-                        CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
+                        CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
       Cmp = ConvertCmpIfNecessary(Cmp, DAG);
 
       SDValue Res =   // Res = 0 or -1.
         DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
-                    DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
+                    DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp);
 
       if (isAllOnes(Op1) != (CondCode == X86::COND_E))
         Res = DAG.getNOT(DL, Res, Res.getValueType());
@@ -15865,7 +13826,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     else
       Cond = X86Op.getValue(1);
 
-    CC = DAG.getConstant(X86Cond, MVT::i8);
+    CC = DAG.getConstant(X86Cond, DL, MVT::i8);
     addTest = false;
   }
 
@@ -15887,7 +13848,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   }
 
   if (addTest) {
-    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+    CC = DAG.getConstant(X86::COND_NE, DL, MVT::i8);
     Cond = EmitTest(Cond, X86::COND_NE, DL, DAG);
   }
 
@@ -15902,7 +13863,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
         (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
       SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
-                                DAG.getConstant(X86::COND_B, MVT::i8), Cond);
+                                DAG.getConstant(X86::COND_B, DL, MVT::i8),
+                                Cond);
       if (isAllOnes(Op1) != (CondCode == X86::COND_B))
         return DAG.getNOT(DL, Res, Res.getValueType());
       return Res;
@@ -15931,7 +13893,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops);
 }
 
-static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
+                                       const X86Subtarget *Subtarget,
                                        SelectionDAG &DAG) {
   MVT VT = Op->getSimpleValueType(0);
   SDValue In = Op->getOperand(0);
@@ -15957,7 +13920,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget
 
   unsigned int NumElts = VT.getVectorNumElements();
 
-  if (NumElts != 8 && NumElts != 16)
+  if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
     return SDValue();
 
   if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
@@ -15966,22 +13929,74 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget
     return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
   }
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
+  MVT ExtVT = NumElts == 8 ? MVT::v8i64 : MVT::v16i32;
+  SDValue NegOne =
+   DAG.getConstant(APInt::getAllOnesValue(ExtVT.getScalarSizeInBits()), dl,
+                   ExtVT);
+  SDValue Zero =
+   DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);
+
+  SDValue V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
+  if (VT.is512BitVector())
+    return V;
+  return DAG.getNode(X86ISD::VTRUNC, dl, VT, V);
+}
+
+static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
+                                             const X86Subtarget *Subtarget,
+                                             SelectionDAG &DAG) {
+  SDValue In = Op->getOperand(0);
+  MVT VT = Op->getSimpleValueType(0);
+  MVT InVT = In.getSimpleValueType();
+  assert(VT.getSizeInBits() == InVT.getSizeInBits());
 
-  MVT ExtVT = (NumElts == 8) ? MVT::v8i64 : MVT::v16i32;
-  Constant *C = ConstantInt::get(*DAG.getContext(),
-    APInt::getAllOnesValue(ExtVT.getScalarType().getSizeInBits()));
+  MVT InSVT = InVT.getScalarType();
+  assert(VT.getScalarType().getScalarSizeInBits() > InSVT.getScalarSizeInBits());
 
-  SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
-  unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
-  SDValue Ld = DAG.getLoad(ExtVT.getScalarType(), dl, DAG.getEntryNode(), CP,
-                          MachinePointerInfo::getConstantPool(),
-                          false, false, false, Alignment);
-  SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, dl, ExtVT, In, Ld);
-  if (VT.is512BitVector())
-    return Brcst;
-  return DAG.getNode(X86ISD::VTRUNC, dl, VT, Brcst);
+  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
+    return SDValue();
+  if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
+    return SDValue();
+
+  SDLoc dl(Op);
+
+  // SSE41 targets can use the pmovsx* instructions directly.
+  if (Subtarget->hasSSE41())
+    return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+
+  // pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.
+  SDValue Curr = In;
+  MVT CurrVT = InVT;
+
+  // As SRAI is only available on i16/i32 types, we expand only up to i32
+  // and handle i64 separately.
+  while (CurrVT != VT && CurrVT.getScalarType() != MVT::i32) {
+    Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
+    MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
+    CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
+    Curr = DAG.getNode(ISD::BITCAST, dl, CurrVT, Curr);
+  }
+
+  SDValue SignExt = Curr;
+  if (CurrVT != InVT) {
+    unsigned SignExtShift =
+        CurrVT.getScalarSizeInBits() - InSVT.getScalarSizeInBits();
+    SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
+                          DAG.getConstant(SignExtShift, dl, MVT::i8));
+  }
+
+  if (CurrVT == VT)
+    return SignExt;
+
+  if (VT == MVT::v2i64 && CurrVT == MVT::v4i32) {
+    SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
+                               DAG.getConstant(31, dl, MVT::i8));
+    SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5});
+    return DAG.getNode(ISD::BITCAST, dl, VT, Ext);
+  }
+
+  return SDValue();
 }
 
 static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
@@ -16039,6 +14054,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
 // may emit an illegal shuffle but the expansion is still better than scalar
 // code. We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise
 // we'll emit a shuffle and a arithmetic shift.
+// FIXME: Is the expansion actually better than scalar code? It doesn't seem so.
 // TODO: It is possible to support ZExt by zeroing the undef values during
 // the shuffle phase or after the shuffle.
 static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
@@ -16137,8 +14153,8 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
          "Can only lower sext loads with a single scalar load!");
 
   unsigned loadRegZize = RegSz;
-  if (Ext == ISD::SEXTLOAD && RegSz == 256)
-    loadRegZize /= 2;
+  if (Ext == ISD::SEXTLOAD && RegSz >= 256)
+    loadRegZize = 128;
 
   // Represent our vector as a sequence of elements which are the
   // largest scalar that we can load.
@@ -16161,7 +14177,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
   SmallVector<SDValue, 8> Chains;
   SDValue Ptr = Ld->getBasePtr();
   SDValue Increment =
-      DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, TLI.getPointerTy());
+      DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl, TLI.getPointerTy());
   SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
 
   for (unsigned i = 0; i < NumLoads; ++i) {
@@ -16177,7 +14193,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
       Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
     else
       Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
-                        ScalarLoad, DAG.getIntPtrConstant(i));
+                        ScalarLoad, DAG.getIntPtrConstant(i, dl));
 
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
   }
@@ -16217,7 +14233,8 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
     unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
                    MemVT.getVectorElementType().getSizeInBits();
     Shuff =
-        DAG.getNode(ISD::SRA, dl, RegVT, Shuff, DAG.getConstant(Amt, RegVT));
+        DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
+                    DAG.getConstant(Amt, dl, RegVT));
 
     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
     return Shuff;
@@ -16384,7 +14401,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
     else
       Cond = X86Op.getValue(1);
 
-    CC = DAG.getConstant(X86Cond, MVT::i8);
+    CC = DAG.getConstant(X86Cond, dl, MVT::i8);
     addTest = false;
   } else {
     unsigned CondOpc;
@@ -16415,7 +14432,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
           X86::CondCode CCode =
             (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
           CCode = X86::GetOppositeBranchCondition(CCode);
-          CC = DAG.getConstant(CCode, MVT::i8);
+          CC = DAG.getConstant(CCode, dl, MVT::i8);
           SDNode *User = *Op.getNode()->use_begin();
           // Look for an unconditional branch following this conditional branch.
           // We need this because we need to reverse the successors in order
@@ -16433,7 +14450,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
             X86::CondCode CCode =
               (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
             CCode = X86::GetOppositeBranchCondition(CCode);
-            CC = DAG.getConstant(CCode, MVT::i8);
+            CC = DAG.getConstant(CCode, dl, MVT::i8);
             Cond = Cmp;
             addTest = false;
           }
@@ -16446,7 +14463,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
       X86::CondCode CCode =
         (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
       CCode = X86::GetOppositeBranchCondition(CCode);
-      CC = DAG.getConstant(CCode, MVT::i8);
+      CC = DAG.getConstant(CCode, dl, MVT::i8);
       Cond = Cond.getOperand(0).getOperand(1);
       addTest = false;
     } else if (Cond.getOpcode() == ISD::SETCC &&
@@ -16472,10 +14489,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
           SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
                                     Cond.getOperand(0), Cond.getOperand(1));
           Cmp = ConvertCmpIfNecessary(Cmp, DAG);
-          CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+          CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
           Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
                               Chain, Dest, CC, Cmp);
-          CC = DAG.getConstant(X86::COND_P, MVT::i8);
+          CC = DAG.getConstant(X86::COND_P, dl, MVT::i8);
           Cond = Cmp;
           addTest = false;
         }
@@ -16502,10 +14519,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
           SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
                                     Cond.getOperand(0), Cond.getOperand(1));
           Cmp = ConvertCmpIfNecessary(Cmp, DAG);
-          CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+          CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
           Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
                               Chain, Dest, CC, Cmp);
-          CC = DAG.getConstant(X86::COND_NP, MVT::i8);
+          CC = DAG.getConstant(X86::COND_NP, dl, MVT::i8);
           Cond = Cmp;
           addTest = false;
           Dest = FalseBB;
@@ -16533,7 +14550,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
 
   if (addTest) {
     X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE;
-    CC = DAG.getConstant(X86Cond, MVT::i8);
+    CC = DAG.getConstant(X86Cond, dl, MVT::i8);
     Cond = EmitTest(Cond, X86Cond, dl, DAG);
   }
   Cond = ConvertCmpIfNecessary(Cond, DAG);
@@ -16570,23 +14587,23 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
 
     // Chain the dynamic stack allocation so that it doesn't modify the stack
     // pointer when other instructions are using the stack.
-    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true),
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true),
         SDLoc(Node));
 
     SDValue Size = Tmp2.getOperand(1);
     SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
     Chain = SP.getValue(1);
     unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
-    const TargetFrameLowering &TFI = *DAG.getSubtarget().getFrameLowering();
+    const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
     unsigned StackAlign = TFI.getStackAlignment();
     Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
     if (Align > StackAlign)
       Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
-          DAG.getConstant(-(uint64_t)Align, VT));
+          DAG.getConstant(-(uint64_t)Align, dl, VT));
     Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
 
-    Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
-        DAG.getIntPtrConstant(0, true), SDValue(),
+    Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
+        DAG.getIntPtrConstant(0, dl, true), SDValue(),
         SDLoc(Node));
 
     SDValue Ops[2] = { Tmp1, Tmp2 };
@@ -16635,15 +14652,14 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
 
     Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
 
-    const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-        DAG.getSubtarget().getRegisterInfo());
+    const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
     unsigned SPReg = RegInfo->getStackRegister();
     SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
     Chain = SP.getValue(1);
 
     if (Align) {
       SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
-                       DAG.getConstant(-(uint64_t)Align, VT));
+                       DAG.getConstant(-(uint64_t)Align, dl, VT));
       Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
     }
 
@@ -16678,22 +14694,22 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   // Store gp_offset
   SDValue Store = DAG.getStore(Op.getOperand(0), DL,
                                DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
-                                               MVT::i32),
+                                               DL, MVT::i32),
                                FIN, MachinePointerInfo(SV), false, false, 0);
   MemOps.push_back(Store);
 
   // Store fp_offset
   FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
-                    FIN, DAG.getIntPtrConstant(4));
+                    FIN, DAG.getIntPtrConstant(4, DL));
   Store = DAG.getStore(Op.getOperand(0), DL,
-                       DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
+                       DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL,
                                        MVT::i32),
                        FIN, MachinePointerInfo(SV, 4), false, false, 0);
   MemOps.push_back(Store);
 
   // Store ptr to overflow_arg_area
   FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
-                    FIN, DAG.getIntPtrConstant(4));
+                    FIN, DAG.getIntPtrConstant(4, DL));
   SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
                                     getPointerTy());
   Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
@@ -16703,7 +14719,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
 
   // Store ptr to reg_save_area.
   FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
-                    FIN, DAG.getIntPtrConstant(8));
+                    FIN, DAG.getIntPtrConstant(8, DL));
   SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
                                     getPointerTy());
   Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
@@ -16745,22 +14761,17 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
 
   if (ArgMode == 2) {
     // Sanity Check: Make sure using fp_offset makes sense.
-    assert(!DAG.getTarget().Options.UseSoftFloat &&
-           !(DAG.getMachineFunction()
-                .getFunction()->getAttributes()
-                .hasAttribute(AttributeSet::FunctionIndex,
-                              Attribute::NoImplicitFloat)) &&
+    assert(!Subtarget->useSoftFloat() &&
+           !(DAG.getMachineFunction().getFunction()->hasFnAttribute(
+               Attribute::NoImplicitFloat)) &&
            Subtarget->hasSSE1());
   }
 
   // Insert VAARG_64 node into the DAG
   // VAARG_64 returns two values: Variable Argument Address, Chain
-  SmallVector<SDValue, 11> InstOps;
-  InstOps.push_back(Chain);
-  InstOps.push_back(SrcPtr);
-  InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
-  InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
-  InstOps.push_back(DAG.getConstant(Align, MVT::i32));
+  SDValue InstOps[] = {Chain, SrcPtr, DAG.getConstant(ArgSize, dl, MVT::i32),
+                       DAG.getConstant(ArgMode, dl, MVT::i8),
+                       DAG.getConstant(Align, dl, MVT::i32)};
   SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
   SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
                                           VTs, InstOps, MVT::i64,
@@ -16791,8 +14802,8 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
   SDLoc DL(Op);
 
   return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
-                       DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
-                       false,
+                       DAG.getIntPtrConstant(24, DL), 8, /*isVolatile*/false,
+                       false, false,
                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
@@ -16812,7 +14823,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT,
     if (Opc == X86ISD::VSRAI)
       ShiftAmt = ElementType.getSizeInBits() - 1;
     else
-      return DAG.getConstant(0, VT);
+      return DAG.getConstant(0, dl, VT);
   }
 
   assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI)
@@ -16837,7 +14848,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT,
         }
         ND = cast<ConstantSDNode>(CurrentOp);
         const APInt &C = ND->getAPIntValue();
-        Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), ElementType));
+        Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), dl, ElementType));
       }
       break;
     case X86ISD::VSRLI:
@@ -16849,7 +14860,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT,
         }
         ND = cast<ConstantSDNode>(CurrentOp);
         const APInt &C = ND->getAPIntValue();
-        Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), ElementType));
+        Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), dl, ElementType));
       }
       break;
     case X86ISD::VSRAI:
@@ -16861,7 +14872,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT,
         }
         ND = cast<ConstantSDNode>(CurrentOp);
         const APInt &C = ND->getAPIntValue();
-        Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), ElementType));
+        Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), dl, ElementType));
       }
       break;
     }
@@ -16869,7 +14880,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT,
     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts);
   }
 
-  return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8));
+  return DAG.getNode(Opc, dl, VT, SrcOp,
+                     DAG.getConstant(ShiftAmt, dl, MVT::i8));
 }
 
 // getTargetVShiftNode - Handle vector element shifts where the shift amount
@@ -16894,7 +14906,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
   }
 
   const X86Subtarget &Subtarget =
-      DAG.getTarget().getSubtarget<X86Subtarget>();
+      static_cast<const X86Subtarget &>(DAG.getSubtarget());
   if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
       ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
     // Let the shuffle legalizer expand this shift amount node.
@@ -16907,7 +14919,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
     SmallVector<SDValue, 4> ShOps;
     ShOps.push_back(ShAmt);
     if (SVT == MVT::i32) {
-      ShOps.push_back(DAG.getConstant(0, SVT));
+      ShOps.push_back(DAG.getConstant(0, dl, SVT));
       ShOps.push_back(DAG.getUNDEF(SVT));
     }
     ShOps.push_back(DAG.getUNDEF(SVT));
@@ -16948,7 +14960,7 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
     // are extracted by EXTRACT_SUBVECTOR.
     SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
                               DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
-                              DAG.getIntPtrConstant(0));
+                              DAG.getIntPtrConstant(0, dl));
 
     switch (Op.getOpcode()) {
       default: break;
@@ -16987,54 +14999,6 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
     return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
 }
 
-static unsigned getOpcodeForFMAIntrinsic(unsigned IntNo) {
-    switch (IntNo) {
-    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
-    case Intrinsic::x86_fma_vfmadd_ps:
-    case Intrinsic::x86_fma_vfmadd_pd:
-    case Intrinsic::x86_fma_vfmadd_ps_256:
-    case Intrinsic::x86_fma_vfmadd_pd_256:
-    case Intrinsic::x86_fma_mask_vfmadd_ps_512:
-    case Intrinsic::x86_fma_mask_vfmadd_pd_512:
-      return X86ISD::FMADD;
-    case Intrinsic::x86_fma_vfmsub_ps:
-    case Intrinsic::x86_fma_vfmsub_pd:
-    case Intrinsic::x86_fma_vfmsub_ps_256:
-    case Intrinsic::x86_fma_vfmsub_pd_256:
-    case Intrinsic::x86_fma_mask_vfmsub_ps_512:
-    case Intrinsic::x86_fma_mask_vfmsub_pd_512:
-      return X86ISD::FMSUB;
-    case Intrinsic::x86_fma_vfnmadd_ps:
-    case Intrinsic::x86_fma_vfnmadd_pd:
-    case Intrinsic::x86_fma_vfnmadd_ps_256:
-    case Intrinsic::x86_fma_vfnmadd_pd_256:
-    case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
-    case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
-      return X86ISD::FNMADD;
-    case Intrinsic::x86_fma_vfnmsub_ps:
-    case Intrinsic::x86_fma_vfnmsub_pd:
-    case Intrinsic::x86_fma_vfnmsub_ps_256:
-    case Intrinsic::x86_fma_vfnmsub_pd_256:
-    case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
-    case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
-      return X86ISD::FNMSUB;
-    case Intrinsic::x86_fma_vfmaddsub_ps:
-    case Intrinsic::x86_fma_vfmaddsub_pd:
-    case Intrinsic::x86_fma_vfmaddsub_ps_256:
-    case Intrinsic::x86_fma_vfmaddsub_pd_256:
-    case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
-    case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
-      return X86ISD::FMADDSUB;
-    case Intrinsic::x86_fma_vfmsubadd_ps:
-    case Intrinsic::x86_fma_vfmsubadd_pd:
-    case Intrinsic::x86_fma_vfmsubadd_ps_256:
-    case Intrinsic::x86_fma_vfmsubadd_pd_256:
-    case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
-    case Intrinsic::x86_fma_mask_vfmsubadd_pd_512:
-      return X86ISD::FMSUBADD;
-    }
-}
-
 static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc dl(Op);
@@ -17065,15 +15029,68 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
       SDValue Src2 = Op.getOperand(2);
       SDValue Src0 = Op.getOperand(3);
       SDValue Mask = Op.getOperand(4);
-      SDValue RoundingMode = Op.getOperand(5);
+      // There are 2 kinds of intrinsics in this group:
+      // (1) With supress-all-exceptions (sae) or rounding mode- 6 operands
+      // (2) With rounding mode and sae - 7 operands.
+      if (Op.getNumOperands() == 6) {
+        SDValue Sae  = Op.getOperand(5);
+        unsigned Opc = IntrData->Opc1 ? IntrData->Opc1 : IntrData->Opc0;
+        return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2,
+                                                Sae),
+                                    Mask, Src0, Subtarget, DAG);
+      }
+      assert(Op.getNumOperands() == 7 && "Unexpected intrinsic form");
+      SDValue RoundingMode  = Op.getOperand(5);
+      SDValue Sae  = Op.getOperand(6);
       return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
-                                              RoundingMode),
+                                              RoundingMode, Sae),
                                   Mask, Src0, Subtarget, DAG);
     }
     case INTR_TYPE_2OP_MASK: {
-      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
-                                              Op.getOperand(2)),
-                                  Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Src2 = Op.getOperand(2);
+      SDValue PassThru = Op.getOperand(3);
+      SDValue Mask = Op.getOperand(4);
+      // We specify 2 possible opcodes for intrinsics with rounding modes.
+      // First, we check if the intrinsic may have non-default rounding mode,
+      // (IntrData->Opc1 != 0), then we check the rounding mode operand.
+      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+      if (IntrWithRoundingModeOpcode != 0) {
+        SDValue Rnd = Op.getOperand(5);
+        unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
+        if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
+          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+                                      dl, Op.getValueType(),
+                                      Src1, Src2, Rnd),
+                                      Mask, PassThru, Subtarget, DAG);
+        }
+      }
+      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+                                              Src1,Src2),
+                                  Mask, PassThru, Subtarget, DAG);
+    }
+    case FMA_OP_MASK: {
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Src2 = Op.getOperand(2);
+      SDValue Src3 = Op.getOperand(3);
+      SDValue Mask = Op.getOperand(4);
+      // We specify 2 possible opcodes for intrinsics with rounding modes.
+      // First, we check if the intrinsic may have non-default rounding mode,
+      // (IntrData->Opc1 != 0), then we check the rounding mode operand.
+      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+      if (IntrWithRoundingModeOpcode != 0) {
+        SDValue Rnd = Op.getOperand(5);
+        if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
+            X86::STATIC_ROUNDING::CUR_DIRECTION)
+          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+                                                  dl, Op.getValueType(),
+                                                  Src1, Src2, Src3, Rnd),
+                                      Mask, Src1, Subtarget, DAG);
+      }
+      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
+                                              dl, Op.getValueType(),
+                                              Src1, Src2, Src3),
+                                  Mask, Src1, Subtarget, DAG);
     }
     case CMP_MASK:
     case CMP_MASK_CC: {
@@ -17094,30 +15111,46 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                        Mask.getValueType().getSizeInBits());
       SDValue Cmp;
       if (IntrData->Type == CMP_MASK_CC) {
-        Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
-                    Op.getOperand(2), Op.getOperand(3));
+        SDValue CC = Op.getOperand(3);
+        CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC);
+        // We specify 2 possible opcodes for intrinsics with rounding modes.
+        // First, we check if the intrinsic may have non-default rounding mode,
+        // (IntrData->Opc1 != 0), then we check the rounding mode operand.
+        if (IntrData->Opc1 != 0) {
+          SDValue Rnd = Op.getOperand(5);
+          if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
+              X86::STATIC_ROUNDING::CUR_DIRECTION)
+            Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
+                              Op.getOperand(2), CC, Rnd);
+        }
+        //default rounding mode
+        if(!Cmp.getNode())
+            Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
+                              Op.getOperand(2), CC);
+
       } else {
         assert(IntrData->Type == CMP_MASK && "Unexpected intrinsic type!");
         Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
-                    Op.getOperand(2));
+                          Op.getOperand(2));
       }
       SDValue CmpMask = getVectorMaskingNode(Cmp, Mask,
-                                             DAG.getTargetConstant(0, MaskVT),
+                                             DAG.getTargetConstant(0, dl,
+                                                                   MaskVT),
                                              Subtarget, DAG);
       SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
                                 DAG.getUNDEF(BitcastVT), CmpMask,
-                                DAG.getIntPtrConstant(0));
+                                DAG.getIntPtrConstant(0, dl));
       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
     }
     case COMI: { // Comparison intrinsics
       ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
       SDValue LHS = Op.getOperand(1);
       SDValue RHS = Op.getOperand(2);
-      unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
+      unsigned X86CC = TranslateX86CC(CC, dl, true, LHS, RHS, DAG);
       assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
       SDValue Cond = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
       SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                                  DAG.getConstant(X86CC, MVT::i8), Cond);
+                                  DAG.getConstant(X86CC, dl, MVT::i8), Cond);
       return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
     }
     case VSHIFT:
@@ -17144,7 +15177,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
       SDLoc dl(Op);
       SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
                                   DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
-                                  DAG.getIntPtrConstant(0));
+                                  DAG.getIntPtrConstant(0, dl));
 
       return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress,
                          PassThru);
@@ -17159,20 +15192,10 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
       SDLoc dl(Op);
       SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
                                   DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
-                                  DAG.getIntPtrConstant(0));
+                                  DAG.getIntPtrConstant(0, dl));
       return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
                          Op.getOperand(2));
     }
-    case FMA_OP_MASK:
-    {
-        return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
-            dl, Op.getValueType(),
-            Op.getOperand(1),
-            Op.getOperand(2),
-            Op.getOperand(3)),
-            Op.getOperand(4), Op.getOperand(1),
-            Subtarget, DAG);
-    }
     default:
       break;
     }
@@ -17259,7 +15282,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
     SDValue RHS = Op.getOperand(2);
     unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
     SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
-    SDValue CC = DAG.getConstant(X86CC, MVT::i8);
+    SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8);
     SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
   }
@@ -17268,7 +15291,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
     unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B;
     SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1));
     SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
-    SDValue CC = DAG.getConstant(X86CC, MVT::i8);
+    SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8);
     SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
     SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i1, CC, Test);
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
@@ -17333,7 +15356,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
     SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps);
     SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                                DAG.getConstant(X86CC, MVT::i8),
+                                DAG.getConstant(X86CC, dl, MVT::i8),
                                 SDValue(PCMP.getNode(), 1));
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
   }
@@ -17351,57 +15374,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
     return DAG.getNode(Opcode, dl, VTs, NewOps);
   }
 
-  case Intrinsic::x86_fma_mask_vfmadd_ps_512:
-  case Intrinsic::x86_fma_mask_vfmadd_pd_512:
-  case Intrinsic::x86_fma_mask_vfmsub_ps_512:
-  case Intrinsic::x86_fma_mask_vfmsub_pd_512:
-  case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
-  case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
-  case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
-  case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
-  case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
-  case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
-  case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
-  case Intrinsic::x86_fma_mask_vfmsubadd_pd_512: {
-    auto *SAE = cast<ConstantSDNode>(Op.getOperand(5));
-    if (SAE->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION)
-      return getVectorMaskingNode(DAG.getNode(getOpcodeForFMAIntrinsic(IntNo),
-                                              dl, Op.getValueType(),
-                                              Op.getOperand(1),
-                                              Op.getOperand(2),
-                                              Op.getOperand(3)),
-                                  Op.getOperand(4), Op.getOperand(1),
-                                  Subtarget, DAG);
-    else
-      return SDValue();
-  }
+  case Intrinsic::x86_seh_lsda: {
+    // Compute the symbol for the LSDA. We know it'll get emitted later.
+    MachineFunction &MF = DAG.getMachineFunction();
+    SDValue Op1 = Op.getOperand(1);
+    Op1->dump();
+    auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal());
+    MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol(
+        GlobalValue::getRealLinkageName(Fn->getName()));
+    StringRef Name = LSDASym->getName();
+    assert(Name.data()[Name.size()] == '\0' && "not null terminated");
 
-  case Intrinsic::x86_fma_vfmadd_ps:
-  case Intrinsic::x86_fma_vfmadd_pd:
-  case Intrinsic::x86_fma_vfmsub_ps:
-  case Intrinsic::x86_fma_vfmsub_pd:
-  case Intrinsic::x86_fma_vfnmadd_ps:
-  case Intrinsic::x86_fma_vfnmadd_pd:
-  case Intrinsic::x86_fma_vfnmsub_ps:
-  case Intrinsic::x86_fma_vfnmsub_pd:
-  case Intrinsic::x86_fma_vfmaddsub_ps:
-  case Intrinsic::x86_fma_vfmaddsub_pd:
-  case Intrinsic::x86_fma_vfmsubadd_ps:
-  case Intrinsic::x86_fma_vfmsubadd_pd:
-  case Intrinsic::x86_fma_vfmadd_ps_256:
-  case Intrinsic::x86_fma_vfmadd_pd_256:
-  case Intrinsic::x86_fma_vfmsub_ps_256:
-  case Intrinsic::x86_fma_vfmsub_pd_256:
-  case Intrinsic::x86_fma_vfnmadd_ps_256:
-  case Intrinsic::x86_fma_vfnmadd_pd_256:
-  case Intrinsic::x86_fma_vfnmsub_ps_256:
-  case Intrinsic::x86_fma_vfnmsub_pd_256:
-  case Intrinsic::x86_fma_vfmaddsub_ps_256:
-  case Intrinsic::x86_fma_vfmaddsub_pd_256:
-  case Intrinsic::x86_fma_vfmsubadd_ps_256:
-  case Intrinsic::x86_fma_vfmsubadd_pd_256:
-    return DAG.getNode(getOpcodeForFMAIntrinsic(IntNo), dl, Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+    // Generate a simple absolute symbol reference. This intrinsic is only
+    // supported on 32-bit Windows, which isn't PIC.
+    SDValue Result =
+        DAG.getTargetExternalSymbol(Name.data(), VT, X86II::MO_NOPREFIX);
+    return DAG.getNode(X86ISD::Wrapper, dl, VT, Result);
+  }
   }
 }
 
@@ -17412,17 +15401,17 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
   SDLoc dl(Op);
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
   assert(C && "Invalid scale type");
-  SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
+  SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
   EVT MaskVT = MVT::getVectorVT(MVT::i1,
                              Index.getSimpleValueType().getVectorNumElements());
   SDValue MaskInReg;
   ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
   if (MaskC)
-    MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT);
+    MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
   else
     MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
-  SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+  SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
   SDValue Segment = DAG.getRegister(0, MVT::i32);
   if (Src.getOpcode() == ISD::UNDEF)
     Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
@@ -17438,15 +15427,15 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
   SDLoc dl(Op);
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
   assert(C && "Invalid scale type");
-  SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
-  SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+  SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
+  SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
   SDValue Segment = DAG.getRegister(0, MVT::i32);
   EVT MaskVT = MVT::getVectorVT(MVT::i1,
                              Index.getSimpleValueType().getVectorNumElements());
   SDValue MaskInReg;
   ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
   if (MaskC)
-    MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT);
+    MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
   else
     MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
   SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
@@ -17461,15 +15450,15 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
   SDLoc dl(Op);
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
   assert(C && "Invalid scale type");
-  SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
-  SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+  SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
+  SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
   SDValue Segment = DAG.getRegister(0, MVT::i32);
   EVT MaskVT =
     MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
   SDValue MaskInReg;
   ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
   if (MaskC)
-    MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT);
+    MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
   else
     MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
   //SDVTList VTs = DAG.getVTList(MVT::Other);
@@ -17510,7 +15499,7 @@ static void getReadPerformanceCounter(SDNode *N, SDLoc DL,
     // The EAX register is loaded with the low-order 32 bits. The EDX register
     // is loaded with the supported high-order bits of the counter.
     SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
-                              DAG.getConstant(32, MVT::i8));
+                              DAG.getConstant(32, DL, MVT::i8));
     Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
     Results.push_back(Chain);
     return;
@@ -17564,7 +15553,7 @@ static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode,
     // The EDX register is loaded with the high-order 32 bits of the MSR, and
     // the EAX register is loaded with the low-order 32 bits.
     SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
-                              DAG.getConstant(32, MVT::i8));
+                              DAG.getConstant(32, DL, MVT::i8));
     Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
     Results.push_back(Chain);
     return;
@@ -17609,8 +15598,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
     // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
     // Otherwise return the value from Rand, which is always 0, casted to i32.
     SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
-                      DAG.getConstant(1, Op->getValueType(1)),
-                      DAG.getConstant(X86::COND_B, MVT::i32),
+                      DAG.getConstant(1, dl, Op->getValueType(1)),
+                      DAG.getConstant(X86::COND_B, dl, MVT::i32),
                       SDValue(Result.getNode(), 1) };
     SDValue isValid = DAG.getNode(X86ISD::CMOV, dl,
                                   DAG.getVTList(Op->getValueType(1), MVT::Glue),
@@ -17628,8 +15617,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
     SDValue Index = Op.getOperand(4);
     SDValue Mask  = Op.getOperand(5);
     SDValue Scale = Op.getOperand(6);
-    return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain,
-                          Subtarget);
+    return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale,
+                         Chain, Subtarget);
   }
   case SCATTER: {
   //scatter(base, mask, index, v1, scale);
@@ -17639,14 +15628,13 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
     SDValue Index = Op.getOperand(4);
     SDValue Src   = Op.getOperand(5);
     SDValue Scale = Op.getOperand(6);
-    return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
+    return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
+                          Scale, Chain);
   }
   case PREFETCH: {
     SDValue Hint = Op.getOperand(6);
-    unsigned HintVal;
-    if (dyn_cast<ConstantSDNode> (Hint) == nullptr ||
-        (HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1)
-      llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1");
+    unsigned HintVal = cast<ConstantSDNode>(Hint)->getZExtValue();
+    assert(HintVal < 2 && "Wrong prefetch hint in intrinsic: should be 0 or 1");
     unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0);
     SDValue Chain = Op.getOperand(0);
     SDValue Mask  = Op.getOperand(2);
@@ -17658,7 +15646,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
   // Read Time Stamp Counter (RDTSC) and Processor ID (RDTSCP).
   case RDTSC: {
     SmallVector<SDValue, 2> Results;
-    getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget, Results);
+    getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget,
+                            Results);
     return DAG.getMergeValues(Results, dl);
   }
   // Read Performance Monitoring Counters.
@@ -17672,7 +15661,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
     SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
     SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));
     SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                                DAG.getConstant(X86::COND_NE, MVT::i8),
+                                DAG.getConstant(X86::COND_NE, dl, MVT::i8),
                                 InTrans);
     SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
     return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
@@ -17684,14 +15673,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
     SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
     SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::Other);
     SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2),
-                                DAG.getConstant(-1, MVT::i8));
+                                DAG.getConstant(-1, dl, MVT::i8));
     SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3),
                               Op.getOperand(4), GenCF.getValue(1));
     SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0),
                                  Op.getOperand(5), MachinePointerInfo(),
                                  false, false, 0);
     SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                                DAG.getConstant(X86::COND_B, MVT::i8),
+                                DAG.getConstant(X86::COND_B, dl, MVT::i8),
                                 Res.getValue(1));
     Results.push_back(SetCC);
     Results.push_back(Store);
@@ -17715,7 +15704,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
                                      Mask.getValueType().getSizeInBits());
     SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
                                 DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
-                                DAG.getIntPtrConstant(0));
+                                DAG.getIntPtrConstant(0, dl));
 
     SDValue Compressed =  DAG.getNode(IntrData->Opc0, dl, VT, VMask,
                                       DataToCompress, DAG.getUNDEF(VT));
@@ -17739,15 +15728,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
                                      Mask.getValueType().getSizeInBits());
     SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
                                 DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
-                                DAG.getIntPtrConstant(0));
+                                DAG.getIntPtrConstant(0, dl));
 
     SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(),
                                    false, false, false, 0);
 
-    SmallVector<SDValue, 2> Results;
-    Results.push_back(DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand,
-                                  PathThru));
-    Results.push_back(Chain);
+    SDValue Results[] = {
+        DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand, PathThru),
+        Chain};
     return DAG.getMergeValues(Results, dl);
   }
   }
@@ -17767,9 +15755,8 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
 
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
-    const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-        DAG.getSubtarget().getRegisterInfo());
-    SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), PtrVT);
+    const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+    SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT);
     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, PtrVT,
                                    FrameAddr, Offset),
@@ -17783,16 +15770,33 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
 }
 
 SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  EVT VT = Op.getValueType();
+
   MFI->setFrameAddressIsTaken(true);
 
-  EVT VT = Op.getValueType();
+  if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+    // Depth > 0 makes no sense on targets which use Windows unwind codes.  It
+    // is not possible to crawl up the stack without looking at the unwind codes
+    // simultaneously.
+    int FrameAddrIndex = FuncInfo->getFAIndex();
+    if (!FrameAddrIndex) {
+      // Set up a frame object for the return address.
+      unsigned SlotSize = RegInfo->getSlotSize();
+      FrameAddrIndex = MF.getFrameInfo()->CreateFixedObject(
+          SlotSize, /*Offset=*/0, /*IsImmutable=*/false);
+      FuncInfo->setFAIndex(FrameAddrIndex);
+    }
+    return DAG.getFrameIndex(FrameAddrIndex, VT);
+  }
+
+  unsigned FrameReg =
+      RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
   SDLoc dl(Op);  // FIXME probably not meaningful
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-      DAG.getSubtarget().getRegisterInfo());
-  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(
-      DAG.getMachineFunction());
   assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
           (FrameReg == X86::EBP && VT == MVT::i32)) &&
          "Invalid Frame Register!");
@@ -17819,9 +15823,8 @@ unsigned X86TargetLowering::getRegisterByName(const char* RegName,
 
 SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
                                                      SelectionDAG &DAG) const {
-  const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-      DAG.getSubtarget().getRegisterInfo());
-  return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize());
+  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op));
 }
 
 SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
@@ -17831,8 +15834,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl      (Op);
 
   EVT PtrVT = getPointerTy();
-  const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-      DAG.getSubtarget().getRegisterInfo());
+  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
   unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
   assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
           (FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
@@ -17841,7 +15843,8 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
 
   SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
-                                 DAG.getIntPtrConstant(RegInfo->getSlotSize()));
+                                 DAG.getIntPtrConstant(RegInfo->getSlotSize(),
+                                                       dl));
   StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset);
   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
                        false, false, 0);
@@ -17879,7 +15882,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
   SDLoc dl (Op);
 
   const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
-  const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
 
   if (Subtarget->is64Bit()) {
     SDValue OutChains[6];
@@ -17896,12 +15899,12 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
     // Load the pointer to the nested function into R11.
     unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
     SDValue Addr = Trmp;
-    OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+    OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16),
                                 Addr, MachinePointerInfo(TrmpAddr),
                                 false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
-                       DAG.getConstant(2, MVT::i64));
+                       DAG.getConstant(2, dl, MVT::i64));
     OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
                                 MachinePointerInfo(TrmpAddr, 2),
                                 false, false, 2);
@@ -17910,13 +15913,13 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
     // R10 is specified in X86CallingConv.td
     OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
-                       DAG.getConstant(10, MVT::i64));
-    OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+                       DAG.getConstant(10, dl, MVT::i64));
+    OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16),
                                 Addr, MachinePointerInfo(TrmpAddr, 10),
                                 false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
-                       DAG.getConstant(12, MVT::i64));
+                       DAG.getConstant(12, dl, MVT::i64));
     OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
                                 MachinePointerInfo(TrmpAddr, 12),
                                 false, false, 2);
@@ -17924,16 +15927,16 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
     // Jump to the nested function.
     OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
-                       DAG.getConstant(20, MVT::i64));
-    OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+                       DAG.getConstant(20, dl, MVT::i64));
+    OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16),
                                 Addr, MachinePointerInfo(TrmpAddr, 20),
                                 false, false, 0);
 
     unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
-                       DAG.getConstant(22, MVT::i64));
-    OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
-                                MachinePointerInfo(TrmpAddr, 22),
+                       DAG.getConstant(22, dl, MVT::i64));
+    OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, dl, MVT::i8),
+                                Addr, MachinePointerInfo(TrmpAddr, 22),
                                 false, false, 0);
 
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
@@ -17986,32 +15989,32 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
     SDValue Addr, Disp;
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
-                       DAG.getConstant(10, MVT::i32));
+                       DAG.getConstant(10, dl, MVT::i32));
     Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);
 
     // This is storing the opcode for MOV32ri.
     const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
     const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7;
     OutChains[0] = DAG.getStore(Root, dl,
-                                DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
+                                DAG.getConstant(MOV32ri|N86Reg, dl, MVT::i8),
                                 Trmp, MachinePointerInfo(TrmpAddr),
                                 false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
-                       DAG.getConstant(1, MVT::i32));
+                       DAG.getConstant(1, dl, MVT::i32));
     OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
                                 MachinePointerInfo(TrmpAddr, 1),
                                 false, false, 1);
 
     const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
-                       DAG.getConstant(5, MVT::i32));
-    OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
-                                MachinePointerInfo(TrmpAddr, 5),
+                       DAG.getConstant(5, dl, MVT::i32));
+    OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8),
+                                Addr, MachinePointerInfo(TrmpAddr, 5),
                                 false, false, 1);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
-                       DAG.getConstant(6, MVT::i32));
+                       DAG.getConstant(6, dl, MVT::i32));
     OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
                                 MachinePointerInfo(TrmpAddr, 6),
                                 false, false, 1);
@@ -18042,8 +16045,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   */
 
   MachineFunction &MF = DAG.getMachineFunction();
-  const TargetMachine &TM = MF.getTarget();
-  const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
+  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
   unsigned StackAlignment = TFI.getStackAlignment();
   MVT VT = Op.getSimpleValueType();
   SDLoc DL(Op);
@@ -18069,20 +16071,20 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   SDValue CWD1 =
     DAG.getNode(ISD::SRL, DL, MVT::i16,
                 DAG.getNode(ISD::AND, DL, MVT::i16,
-                            CWD, DAG.getConstant(0x800, MVT::i16)),
-                DAG.getConstant(11, MVT::i8));
+                            CWD, DAG.getConstant(0x800, DL, MVT::i16)),
+                DAG.getConstant(11, DL, MVT::i8));
   SDValue CWD2 =
     DAG.getNode(ISD::SRL, DL, MVT::i16,
                 DAG.getNode(ISD::AND, DL, MVT::i16,
-                            CWD, DAG.getConstant(0x400, MVT::i16)),
-                DAG.getConstant(9, MVT::i8));
+                            CWD, DAG.getConstant(0x400, DL, MVT::i16)),
+                DAG.getConstant(9, DL, MVT::i8));
 
   SDValue RetVal =
     DAG.getNode(ISD::AND, DL, MVT::i16,
                 DAG.getNode(ISD::ADD, DL, MVT::i16,
                             DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
-                            DAG.getConstant(1, MVT::i16)),
-                DAG.getConstant(3, MVT::i16));
+                            DAG.getConstant(1, DL, MVT::i16)),
+                DAG.getConstant(3, DL, MVT::i16));
 
   return DAG.getNode((VT.getSizeInBits() < 16 ?
                       ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
@@ -18108,14 +16110,15 @@ static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
   // If src is zero (i.e. bsr sets ZF), returns NumBits.
   SDValue Ops[] = {
     Op,
-    DAG.getConstant(NumBits+NumBits-1, OpVT),
-    DAG.getConstant(X86::COND_E, MVT::i8),
+    DAG.getConstant(NumBits + NumBits - 1, dl, OpVT),
+    DAG.getConstant(X86::COND_E, dl, MVT::i8),
     Op.getValue(1)
   };
   Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops);
 
   // Finally xor with NumBits-1.
-  Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+  Op = DAG.getNode(ISD::XOR, dl, OpVT, Op,
+                   DAG.getConstant(NumBits - 1, dl, OpVT));
 
   if (VT == MVT::i8)
     Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
@@ -18140,7 +16143,8 @@ static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
   Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
 
   // And xor with NumBits-1.
-  Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+  Op = DAG.getNode(ISD::XOR, dl, OpVT, Op,
+                   DAG.getConstant(NumBits - 1, dl, OpVT));
 
   if (VT == MVT::i8)
     Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
@@ -18160,8 +16164,8 @@ static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
   // If src is zero (i.e. bsf sets ZF), returns NumBits.
   SDValue Ops[] = {
     Op,
-    DAG.getConstant(NumBits, VT),
-    DAG.getConstant(X86::COND_E, MVT::i8),
+    DAG.getConstant(NumBits, dl, VT),
+    DAG.getConstant(X86::COND_E, dl, MVT::i8),
     Op.getValue(1)
   };
   return DAG.getNode(X86ISD::CMOV, dl, VT, Ops);
@@ -18197,6 +16201,9 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
 }
 
 static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) {
+  if (Op.getValueType() == MVT::i1)
+    return DAG.getNode(ISD::XOR, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(0), Op.getOperand(1));
   assert(Op.getSimpleValueType().is256BitVector() &&
          Op.getSimpleValueType().isInteger() &&
          "Only handle AVX 256-bit vector integer operation");
@@ -18204,6 +16211,9 @@ static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) {
 }
 
 static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
+  if (Op.getValueType() == MVT::i1)
+    return DAG.getNode(ISD::XOR, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(0), Op.getOperand(1));
   assert(Op.getSimpleValueType().is256BitVector() &&
          Op.getSimpleValueType().isInteger() &&
          "Only handle AVX 256-bit vector integer operation");
@@ -18215,6 +16225,9 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
   SDLoc dl(Op);
   MVT VT = Op.getSimpleValueType();
 
+  if (VT == MVT::i1)
+    return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
+
   // Decompose 256-bit ops into smaller 128-bit ops.
   if (VT.is256BitVector() && !Subtarget->hasInt256())
     return Lower256IntArith(Op, DAG);
@@ -18222,6 +16235,79 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
   SDValue A = Op.getOperand(0);
   SDValue B = Op.getOperand(1);
 
+  // Lower v16i8/v32i8 mul as promotion to v8i16/v16i16 vector
+  // pairs, multiply and truncate.
+  if (VT == MVT::v16i8 || VT == MVT::v32i8) {
+    if (Subtarget->hasInt256()) {
+      if (VT == MVT::v32i8) {
+        MVT SubVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() / 2);
+        SDValue Lo = DAG.getIntPtrConstant(0, dl);
+        SDValue Hi = DAG.getIntPtrConstant(VT.getVectorNumElements() / 2, dl);
+        SDValue ALo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, A, Lo);
+        SDValue BLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, B, Lo);
+        SDValue AHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, A, Hi);
+        SDValue BHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, B, Hi);
+        return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+                           DAG.getNode(ISD::MUL, dl, SubVT, ALo, BLo),
+                           DAG.getNode(ISD::MUL, dl, SubVT, AHi, BHi));
+      }
+
+      MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
+      return DAG.getNode(
+          ISD::TRUNCATE, dl, VT,
+          DAG.getNode(ISD::MUL, dl, ExVT,
+                      DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, A),
+                      DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, B)));
+    }
+
+    assert(VT == MVT::v16i8 &&
+           "Pre-AVX2 support only supports v16i8 multiplication");
+    MVT ExVT = MVT::v8i16;
+
+    // Extract the lo parts and sign extend to i16
+    SDValue ALo, BLo;
+    if (Subtarget->hasSSE41()) {
+      ALo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, A);
+      BLo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, B);
+    } else {
+      const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
+                              -1, 4, -1, 5, -1, 6, -1, 7};
+      ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
+      BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
+      ALo = DAG.getNode(ISD::BITCAST, dl, ExVT, ALo);
+      BLo = DAG.getNode(ISD::BITCAST, dl, ExVT, BLo);
+      ALo = DAG.getNode(ISD::SRA, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT));
+      BLo = DAG.getNode(ISD::SRA, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT));
+    }
+
+    // Extract the hi parts and sign extend to i16
+    SDValue AHi, BHi;
+    if (Subtarget->hasSSE41()) {
+      const int ShufMask[] = {8,  9,  10, 11, 12, 13, 14, 15,
+                              -1, -1, -1, -1, -1, -1, -1, -1};
+      AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
+      BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
+      AHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, AHi);
+      BHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, BHi);
+    } else {
+      const int ShufMask[] = {-1, 8,  -1, 9,  -1, 10, -1, 11,
+                              -1, 12, -1, 13, -1, 14, -1, 15};
+      AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
+      BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
+      AHi = DAG.getNode(ISD::BITCAST, dl, ExVT, AHi);
+      BHi = DAG.getNode(ISD::BITCAST, dl, ExVT, BHi);
+      AHi = DAG.getNode(ISD::SRA, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT));
+      BHi = DAG.getNode(ISD::SRA, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT));
+    }
+
+    // Multiply, mask the lower 8bits of the lo/hi results and pack
+    SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
+    SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
+    RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT));
+    RHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, DAG.getConstant(255, dl, ExVT));
+    return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
+  }
+
   // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
   if (VT == MVT::v4i32) {
     assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() &&
@@ -18394,7 +16480,8 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
   // unsigned multiply.
   if (IsSigned && !Subtarget->hasSSE41()) {
     SDValue ShAmt =
-        DAG.getConstant(31, DAG.getTargetLoweringInfo().getShiftAmountTy(VT));
+        DAG.getConstant(31, dl,
+                        DAG.getTargetLoweringInfo().getShiftAmountTy(VT));
     SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
                              DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
     SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
@@ -18410,6 +16497,53 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
   return DAG.getMergeValues(Ops, dl);
 }
 
+// Return true if the requred (according to Opcode) shift-imm form is natively
+// supported by the Subtarget
+static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget, 
+                                        unsigned Opcode) {
+  if (VT.getScalarSizeInBits() < 16)
+    return false;
+ 
+  if (VT.is512BitVector() &&
+      (VT.getScalarSizeInBits() > 16 || Subtarget->hasBWI()))
+    return true;
+
+  bool LShift = VT.is128BitVector() || 
+    (VT.is256BitVector() && Subtarget->hasInt256());
+
+  bool AShift = LShift && (Subtarget->hasVLX() ||
+    (VT != MVT::v2i64 && VT != MVT::v4i64));
+  return (Opcode == ISD::SRA) ? AShift : LShift;
+}
+
+// The shift amount is a variable, but it is the same for all vector lanes.
+// These instrcutions are defined together with shift-immediate.
+static 
+bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget, 
+                                      unsigned Opcode) {
+  return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
+}
+
+// Return true if the requred (according to Opcode) variable-shift form is
+// natively supported by the Subtarget
+static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget, 
+                                    unsigned Opcode) {
+
+  if (!Subtarget->hasInt256() || VT.getScalarSizeInBits() < 16)
+    return false;
+
+  // vXi16 supported only on AVX-512, BWI
+  if (VT.getScalarSizeInBits() == 16 && !Subtarget->hasBWI())
+    return false;
+
+  if (VT.is512BitVector() || Subtarget->hasVLX())
+    return true;
+
+  bool LShift = VT.is128BitVector() || VT.is256BitVector();
+  bool AShift = LShift &&  VT != MVT::v2i64 && VT != MVT::v4i64;
+  return (Opcode == ISD::SRA) ? AShift : LShift;
+}
+
 static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
                                          const X86Subtarget *Subtarget) {
   MVT VT = Op.getSimpleValueType();
@@ -18417,97 +16551,44 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
   SDValue R = Op.getOperand(0);
   SDValue Amt = Op.getOperand(1);
 
+  unsigned X86Opc = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI :
+    (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;
+
   // Optimize shl/srl/sra with constant shift amount.
   if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
     if (auto *ShiftConst = BVAmt->getConstantSplatNode()) {
       uint64_t ShiftAmt = ShiftConst->getZExtValue();
 
-      if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
-          (Subtarget->hasInt256() &&
-           (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16)) ||
-          (Subtarget->hasAVX512() &&
-           (VT == MVT::v8i64 || VT == MVT::v16i32))) {
-        if (Op.getOpcode() == ISD::SHL)
-          return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
-                                            DAG);
-        if (Op.getOpcode() == ISD::SRL)
-          return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
-                                            DAG);
-        if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
-          return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
-                                            DAG);
-      }
+      if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
+        return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
 
-      if (VT == MVT::v16i8) {
-        if (Op.getOpcode() == ISD::SHL) {
-          // Make a large shift.
-          SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
-                                                   MVT::v8i16, R, ShiftAmt,
-                                                   DAG);
-          SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
-          // Zero out the rightmost bits.
-          SmallVector<SDValue, 16> V(16,
-                                     DAG.getConstant(uint8_t(-1U << ShiftAmt),
-                                                     MVT::i8));
-          return DAG.getNode(ISD::AND, dl, VT, SHL,
-                             DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
-        }
-        if (Op.getOpcode() == ISD::SRL) {
-          // Make a large shift.
-          SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
-                                                   MVT::v8i16, R, ShiftAmt,
-                                                   DAG);
-          SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
-          // Zero out the leftmost bits.
-          SmallVector<SDValue, 16> V(16,
-                                     DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
-                                                     MVT::i8));
-          return DAG.getNode(ISD::AND, dl, VT, SRL,
-                             DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
-        }
-        if (Op.getOpcode() == ISD::SRA) {
-          if (ShiftAmt == 7) {
-            // R s>> 7  ===  R s< 0
-            SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
-            return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
-          }
+      if (VT == MVT::v16i8 || (Subtarget->hasInt256() && VT == MVT::v32i8)) {
+        unsigned NumElts = VT.getVectorNumElements();
+        MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
 
-          // R s>> a === ((R u>> a) ^ m) - m
-          SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
-          SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt,
-                                                         MVT::i8));
-          SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V);
-          Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
-          Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
-          return Res;
-        }
-        llvm_unreachable("Unknown shift opcode.");
-      }
-
-      if (Subtarget->hasInt256() && VT == MVT::v32i8) {
         if (Op.getOpcode() == ISD::SHL) {
+          // Simple i8 add case
+          if (ShiftAmt == 1)
+            return DAG.getNode(ISD::ADD, dl, VT, R, R);
+
           // Make a large shift.
-          SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
-                                                   MVT::v16i16, R, ShiftAmt,
-                                                   DAG);
+          SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT,
+                                                   R, ShiftAmt, DAG);
           SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
           // Zero out the rightmost bits.
-          SmallVector<SDValue, 32> V(32,
-                                     DAG.getConstant(uint8_t(-1U << ShiftAmt),
-                                                     MVT::i8));
+          SmallVector<SDValue, 32> V(
+              NumElts, DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, MVT::i8));
           return DAG.getNode(ISD::AND, dl, VT, SHL,
                              DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
         }
         if (Op.getOpcode() == ISD::SRL) {
           // Make a large shift.
-          SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
-                                                   MVT::v16i16, R, ShiftAmt,
-                                                   DAG);
+          SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT,
+                                                   R, ShiftAmt, DAG);
           SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
           // Zero out the leftmost bits.
-          SmallVector<SDValue, 32> V(32,
-                                     DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
-                                                     MVT::i8));
+          SmallVector<SDValue, 32> V(
+              NumElts, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, MVT::i8));
           return DAG.getNode(ISD::AND, dl, VT, SRL,
                              DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
         }
@@ -18520,8 +16601,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
 
           // R s>> a === ((R u>> a) ^ m) - m
           SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
-          SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt,
-                                                         MVT::i8));
+          SmallVector<SDValue, 32> V(NumElts,
+                                     DAG.getConstant(128 >> ShiftAmt, dl,
+                                                     MVT::i8));
           SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V);
           Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
           Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
@@ -18563,19 +16645,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
       if (ShAmt != ShiftAmt)
         return SDValue();
     }
-    switch (Op.getOpcode()) {
-    default:
-      llvm_unreachable("Unknown shift opcode!");
-    case ISD::SHL:
-      return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
-                                        DAG);
-    case ISD::SRL:
-      return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
-                                        DAG);
-    case ISD::SRA:
-      return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
-                                        DAG);
-    }
+    return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
   }
 
   return SDValue();
@@ -18588,12 +16658,13 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
   SDValue R = Op.getOperand(0);
   SDValue Amt = Op.getOperand(1);
 
-  if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) ||
-      VT == MVT::v4i32 || VT == MVT::v8i16 ||
-      (Subtarget->hasInt256() &&
-       ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
-        VT == MVT::v8i32 || VT == MVT::v16i16)) ||
-       (Subtarget->hasAVX512() && (VT == MVT::v8i64 || VT == MVT::v16i32))) {
+  unsigned X86OpcI = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI :
+    (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;
+
+  unsigned X86OpcV = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHL :
+    (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRL : X86ISD::VSRA;
+
+  if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) {
     SDValue BaseShAmt;
     EVT EltVT = VT.getVectorElementType();
 
@@ -18626,7 +16697,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
         if (!BaseShAmt)
           // Avoid introducing an extract element from a shuffle.
           BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InVec,
-                                    DAG.getIntPtrConstant(SplatIdx));
+                                  DAG.getIntPtrConstant(SplatIdx, dl));
       }
     }
 
@@ -18637,54 +16708,12 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
       else if (EltVT.bitsLT(MVT::i32))
         BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
 
-      switch (Op.getOpcode()) {
-      default:
-        llvm_unreachable("Unknown shift opcode!");
-      case ISD::SHL:
-        switch (VT.SimpleTy) {
-        default: return SDValue();
-        case MVT::v2i64:
-        case MVT::v4i32:
-        case MVT::v8i16:
-        case MVT::v4i64:
-        case MVT::v8i32:
-        case MVT::v16i16:
-        case MVT::v16i32:
-        case MVT::v8i64:
-          return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
-        }
-      case ISD::SRA:
-        switch (VT.SimpleTy) {
-        default: return SDValue();
-        case MVT::v4i32:
-        case MVT::v8i16:
-        case MVT::v8i32:
-        case MVT::v16i16:
-        case MVT::v16i32:
-        case MVT::v8i64:
-          return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
-        }
-      case ISD::SRL:
-        switch (VT.SimpleTy) {
-        default: return SDValue();
-        case MVT::v2i64:
-        case MVT::v4i32:
-        case MVT::v8i16:
-        case MVT::v4i64:
-        case MVT::v8i32:
-        case MVT::v16i16:
-        case MVT::v16i32:
-        case MVT::v8i64:
-          return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
-        }
-      }
+      return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, DAG);
     }
   }
 
   // Special case in 32-bit mode, where i64 is expanded into high and low parts.
-  if (!Subtarget->is64Bit() &&
-      (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64) ||
-      (Subtarget->hasAVX512() && VT == MVT::v8i64)) &&
+  if (!Subtarget->is64Bit() && VT == MVT::v2i64  &&
       Amt.getOpcode() == ISD::BITCAST &&
       Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
     Amt = Amt.getOperand(0);
@@ -18698,18 +16727,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
         if (Vals[j] != Amt.getOperand(i + j))
           return SDValue();
     }
-    switch (Op.getOpcode()) {
-    default:
-      llvm_unreachable("Unknown shift opcode!");
-    case ISD::SHL:
-      return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1));
-    case ISD::SRL:
-      return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1));
-    case ISD::SRA:
-      return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1));
-    }
+    return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1));
   }
-
   return SDValue();
 }
 
@@ -18719,33 +16738,28 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
   SDLoc dl(Op);
   SDValue R = Op.getOperand(0);
   SDValue Amt = Op.getOperand(1);
-  SDValue V;
 
   assert(VT.isVector() && "Custom lowering only for vector shifts!");
   assert(Subtarget->hasSSE2() && "Only custom lower when we have SSE2!");
 
-  V = LowerScalarImmediateShift(Op, DAG, Subtarget);
-  if (V.getNode())
+  if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget))
     return V;
 
-  V = LowerScalarVariableShift(Op, DAG, Subtarget);
-  if (V.getNode())
+  if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
       return V;
 
-  if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64))
+  if (SupportedVectorVarShift(VT, Subtarget, Op.getOpcode()))
     return Op;
-  // AVX2 has VPSLLV/VPSRAV/VPSRLV.
-  if (Subtarget->hasInt256()) {
-    if (Op.getOpcode() == ISD::SRL &&
-        (VT == MVT::v2i64 || VT == MVT::v4i32 ||
-         VT == MVT::v4i64 || VT == MVT::v8i32))
-      return Op;
-    if (Op.getOpcode() == ISD::SHL &&
-        (VT == MVT::v2i64 || VT == MVT::v4i32 ||
-         VT == MVT::v4i64 || VT == MVT::v8i32))
-      return Op;
-    if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32))
-      return Op;
+
+  // 2i64 vector logical shifts can efficiently avoid scalarization - do the
+  // shifts per-lane and then shuffle the partial results back together.
+  if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) {
+    // Splat the shift amounts so the scalar shifts above will catch it.
+    SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0});
+    SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1});
+    SDValue R0 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt0);
+    SDValue R1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt1);
+    return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
   }
 
   // If possible, lower this packed shift into a vector multiply instead of
@@ -18775,7 +16789,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
         Elts.push_back(DAG.getUNDEF(SVT));
         continue;
       }
-      Elts.push_back(DAG.getConstant(One.shl(ShAmt), SVT));
+      Elts.push_back(DAG.getConstant(One.shl(ShAmt), dl, SVT));
     }
     SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts);
     return DAG.getNode(ISD::MUL, dl, VT, R, BV);
@@ -18783,9 +16797,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
 
   // Lower SHL with variable shift amount.
   if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
-    Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
+    Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT));
 
-    Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT));
+    Op = DAG.getNode(ISD::ADD, dl, VT, Op,
+                     DAG.getConstant(0x3f800000U, dl, VT));
     Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
     Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
     return DAG.getNode(ISD::MUL, dl, VT, Op, R);
@@ -18849,10 +16864,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
       // Replace this node with two shifts followed by a MOVSS/MOVSD.
       EVT CastVT = MVT::v4i32;
       SDValue Splat1 =
-        DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), VT);
+        DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
       SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
       SDValue Splat2 =
-        DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), VT);
+        DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), dl, VT);
       SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
       if (TargetOpcode == X86ISD::MOVSD)
         CastVT = MVT::v2i64;
@@ -18865,24 +16880,15 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
   }
 
   if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
-    assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
+    // Turn 'a' into a mask suitable for VSELECT: a = a << 5;
+    Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, dl, VT));
 
-    // a = a << 5;
-    Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT));
-    Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
-
-    // Turn 'a' into a mask suitable for VSELECT
-    SDValue VSelM = DAG.getConstant(0x80, VT);
+    SDValue VSelM = DAG.getConstant(0x80, dl, VT);
     SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
     OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
 
-    SDValue CM1 = DAG.getConstant(0x0f, VT);
-    SDValue CM2 = DAG.getConstant(0x3f, VT);
-
-    // r = VSELECT(r, psllw(r & (char16)15, 4), a);
-    SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
-    M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 4, DAG);
-    M = DAG.getNode(ISD::BITCAST, dl, VT, M);
+    // r = VSELECT(r, shl(r, 4), a);
+    SDValue M = DAG.getNode(ISD::SHL, dl, VT, R, DAG.getConstant(4, dl, VT));
     R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
 
     // a += a
@@ -18890,10 +16896,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
     OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
     OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
 
-    // r = VSELECT(r, psllw(r & (char16)63, 2), a);
-    M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
-    M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 2, DAG);
-    M = DAG.getNode(ISD::BITCAST, dl, VT, M);
+    // r = VSELECT(r, shl(r, 2), a);
+    M = DAG.getNode(ISD::SHL, dl, VT, R, DAG.getConstant(2, dl, VT));
     R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
 
     // a += a
@@ -18911,14 +16915,32 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
   // the extra overheads to get from v16i8 to v8i32 make the existing SSE
   // solution better.
   if (Subtarget->hasInt256() && VT == MVT::v8i16) {
-    MVT NewVT = VT == MVT::v8i16 ? MVT::v8i32 : MVT::v16i16;
+    MVT ExtVT = MVT::v8i32;
     unsigned ExtOpc =
         Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
-    R = DAG.getNode(ExtOpc, dl, NewVT, R);
-    Amt = DAG.getNode(ISD::ANY_EXTEND, dl, NewVT, Amt);
+    R = DAG.getNode(ExtOpc, dl, ExtVT, R);
+    Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt);
     return DAG.getNode(ISD::TRUNCATE, dl, VT,
-                       DAG.getNode(Op.getOpcode(), dl, NewVT, R, Amt));
-    }
+                       DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
+  }
+
+  if (Subtarget->hasInt256() && VT == MVT::v16i16) {
+    MVT ExtVT = MVT::v8i32;
+    SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
+    SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z);
+    SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z);
+    SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, R, R);
+    SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, R, R);
+    ALo = DAG.getNode(ISD::BITCAST, dl, ExtVT, ALo);
+    AHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, AHi);
+    RLo = DAG.getNode(ISD::BITCAST, dl, ExtVT, RLo);
+    RHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, RHi);
+    SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo);
+    SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi);
+    Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT));
+    Hi = DAG.getNode(ISD::SRL, dl, ExtVT, Hi, DAG.getConstant(16, dl, ExtVT));
+    return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
+  }
 
   // Decompose 256-bit shifts into smaller 128-bit shifts.
   if (VT.is256BitVector()) {
@@ -18934,12 +16956,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
     SDValue Amt1, Amt2;
     if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
       // Constant shift amount
-      SmallVector<SDValue, 4> Amt1Csts;
-      SmallVector<SDValue, 4> Amt2Csts;
-      for (unsigned i = 0; i != NumElems/2; ++i)
-        Amt1Csts.push_back(Amt->getOperand(i));
-      for (unsigned i = NumElems/2; i != NumElems; ++i)
-        Amt2Csts.push_back(Amt->getOperand(i));
+      SmallVector<SDValue, 8> Ops(Amt->op_begin(), Amt->op_begin() + NumElems);
+      ArrayRef<SDValue> Amt1Csts = makeArrayRef(Ops).slice(0, NumElems / 2);
+      ArrayRef<SDValue> Amt2Csts = makeArrayRef(Ops).slice(NumElems / 2);
 
       Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt1Csts);
       Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt2Csts);
@@ -19021,7 +17040,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
 
     SDValue SetCC =
       DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
-                  DAG.getConstant(X86::COND_O, MVT::i32),
+                  DAG.getConstant(X86::COND_O, DL, MVT::i32),
                   SDValue(Sum.getNode(), 2));
 
     return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
@@ -19034,87 +17053,23 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
 
   SDValue SetCC =
     DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
-                DAG.getConstant(Cond, MVT::i32),
+                DAG.getConstant(Cond, DL, MVT::i32),
                 SDValue(Sum.getNode(), 1));
 
   return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
 }
 
-// Sign extension of the low part of vector elements. This may be used either
-// when sign extend instructions are not available or if the vector element
-// sizes already match the sign-extended size. If the vector elements are in
-// their pre-extended size and sign extend instructions are available, that will
-// be handled by LowerSIGN_EXTEND.
-SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
-                                                  SelectionDAG &DAG) const {
-  SDLoc dl(Op);
-  EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
-  MVT VT = Op.getSimpleValueType();
-
-  if (!Subtarget->hasSSE2() || !VT.isVector())
-    return SDValue();
-
-  unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
-                      ExtraVT.getScalarType().getSizeInBits();
-
-  switch (VT.SimpleTy) {
-    default: return SDValue();
-    case MVT::v8i32:
-    case MVT::v16i16:
-      if (!Subtarget->hasFp256())
-        return SDValue();
-      if (!Subtarget->hasInt256()) {
-        // needs to be split
-        unsigned NumElems = VT.getVectorNumElements();
-
-        // Extract the LHS vectors
-        SDValue LHS = Op.getOperand(0);
-        SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
-        SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
-
-        MVT EltVT = VT.getVectorElementType();
-        EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
-
-        EVT ExtraEltVT = ExtraVT.getVectorElementType();
-        unsigned ExtraNumElems = ExtraVT.getVectorNumElements();
-        ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
-                                   ExtraNumElems/2);
-        SDValue Extra = DAG.getValueType(ExtraVT);
-
-        LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
-        LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
-
-        return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);
-      }
-      // fall through
-    case MVT::v4i32:
-    case MVT::v8i16: {
-      SDValue Op0 = Op.getOperand(0);
-
-      // This is a sign extension of some low part of vector elements without
-      // changing the size of the vector elements themselves:
-      // Shift-Left + Shift-Right-Algebraic.
-      SDValue Shl = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Op0,
-                                               BitsDiff, DAG);
-      return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Shl, BitsDiff,
-                                        DAG);
-    }
-  }
-}
-
 /// Returns true if the operand type is exactly twice the native width, and
 /// the corresponding cmpxchg8b or cmpxchg16b instruction is available.
 /// Used to know whether to use cmpxchg8/16b when expanding atomic operations
 /// (otherwise we leave them alone to become __sync_fetch_and_... calls).
 bool X86TargetLowering::needsCmpXchgNb(const Type *MemType) const {
-  const X86Subtarget &Subtarget =
-      getTargetMachine().getSubtarget<X86Subtarget>();
   unsigned OpWidth = MemType->getPrimitiveSizeInBits();
 
   if (OpWidth == 64)
-    return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
+    return !Subtarget->is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
   else if (OpWidth == 128)
-    return Subtarget.hasCmpxchg16b();
+    return Subtarget->hasCmpxchg16b();
   else
     return false;
 }
@@ -19130,16 +17085,17 @@ bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
   return needsCmpXchgNb(PTy->getElementType());
 }
 
-bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
-  const X86Subtarget &Subtarget =
-      getTargetMachine().getSubtarget<X86Subtarget>();
-  unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
+TargetLoweringBase::AtomicRMWExpansionKind
+X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+  unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
   const Type *MemType = AI->getType();
 
   // If the operand is too big, we must see if cmpxchg8/16b is available
   // and default to library calls otherwise.
-  if (MemType->getPrimitiveSizeInBits() > NativeWidth)
-    return needsCmpXchgNb(MemType);
+  if (MemType->getPrimitiveSizeInBits() > NativeWidth) {
+    return needsCmpXchgNb(MemType) ? AtomicRMWExpansionKind::CmpXChg
+                                   : AtomicRMWExpansionKind::None;
+  }
 
   AtomicRMWInst::BinOp Op = AI->getOperation();
   switch (Op) {
@@ -19149,13 +17105,14 @@ bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   case AtomicRMWInst::Add:
   case AtomicRMWInst::Sub:
     // It's better to use xadd, xsub or xchg for these in all cases.
-    return false;
+    return AtomicRMWExpansionKind::None;
   case AtomicRMWInst::Or:
   case AtomicRMWInst::And:
   case AtomicRMWInst::Xor:
     // If the atomicrmw's result isn't actually used, we can just add a "lock"
     // prefix to a normal instruction for these operations.
-    return !AI->use_empty();
+    return !AI->use_empty() ? AtomicRMWExpansionKind::CmpXChg
+                            : AtomicRMWExpansionKind::None;
   case AtomicRMWInst::Nand:
   case AtomicRMWInst::Max:
   case AtomicRMWInst::Min:
@@ -19163,7 +17120,7 @@ bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   case AtomicRMWInst::UMin:
     // These always require a non-trivial set of data operations on x86. We must
     // use a cmpxchg loop.
-    return true;
+    return AtomicRMWExpansionKind::CmpXChg;
   }
 }
 
@@ -19176,9 +17133,7 @@ static bool hasMFENCE(const X86Subtarget& Subtarget) {
 
 LoadInst *
 X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
-  const X86Subtarget &Subtarget =
-      getTargetMachine().getSubtarget<X86Subtarget>();
-  unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
+  unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
   const Type *MemType = AI->getType();
   // Accesses larger than the native width are turned into cmpxchg/libcalls, so
   // there is no benefit in turning such RMWs into loads, and it is actually
@@ -19210,21 +17165,21 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
   // otherwise, we might be able to be more agressive on relaxed idempotent
   // rmw. In practice, they do not look useful, so we don't try to be
   // especially clever.
-  if (SynchScope == SingleThread) {
+  if (SynchScope == SingleThread)
     // FIXME: we could just insert an X86ISD::MEMBARRIER here, except we are at
     // the IR level, so we must wrap it in an intrinsic.
     return nullptr;
-  } else if (hasMFENCE(Subtarget)) {
-    Function *MFence = llvm::Intrinsic::getDeclaration(M,
-            Intrinsic::x86_sse2_mfence);
-    Builder.CreateCall(MFence);
-  } else {
+
+  if (!hasMFENCE(*Subtarget))
     // FIXME: it might make sense to use a locked operation here but on a
     // different cache-line to prevent cache-line bouncing. In practice it
     // is probably a small win, and x86 processors without mfence are rare
     // enough that we do not bother.
     return nullptr;
-  }
+
+  Function *MFence =
+      llvm::Intrinsic::getDeclaration(M, Intrinsic::x86_sse2_mfence);
+  Builder.CreateCall(MFence, {});
 
   // Finally we can emit the atomic load.
   LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr,
@@ -19250,13 +17205,13 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
       return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
 
     SDValue Chain = Op.getOperand(0);
-    SDValue Zero = DAG.getConstant(0, MVT::i32);
+    SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
     SDValue Ops[] = {
-      DAG.getRegister(X86::ESP, MVT::i32), // Base
-      DAG.getTargetConstant(1, MVT::i8),   // Scale
-      DAG.getRegister(0, MVT::i32),        // Index
-      DAG.getTargetConstant(0, MVT::i32),  // Disp
-      DAG.getRegister(0, MVT::i32),        // Segment.
+      DAG.getRegister(X86::ESP, MVT::i32),     // Base
+      DAG.getTargetConstant(1, dl, MVT::i8),   // Scale
+      DAG.getRegister(0, MVT::i32),            // Index
+      DAG.getTargetConstant(0, dl, MVT::i32),  // Disp
+      DAG.getRegister(0, MVT::i32),            // Segment.
       Zero,
       Chain
     };
@@ -19289,7 +17244,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
   SDValue Ops[] = { cpIn.getValue(0),
                     Op.getOperand(1),
                     Op.getOperand(3),
-                    DAG.getTargetConstant(size, MVT::i8),
+                    DAG.getTargetConstant(size, DL, MVT::i8),
                     cpIn.getValue(1) };
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
   MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
@@ -19301,7 +17256,8 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
   SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
                                       MVT::i32, cpOut.getValue(2));
   SDValue Success = DAG.getNode(X86ISD::SETCC, DL, Op->getValueType(1),
-                                DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS);
+                                DAG.getConstant(X86::COND_E, DL, MVT::i8),
+                                EFLAGS);
 
   DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut);
   DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
@@ -19330,18 +17286,16 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
     SmallVector<SDValue, 16> Elts;
     for (unsigned i = 0, e = NumElts; i != e; ++i)
       Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec,
-                                 DAG.getIntPtrConstant(i)));
+                                 DAG.getIntPtrConstant(i, dl)));
 
     // Explicitly mark the extra elements as Undef.
-    SDValue Undef = DAG.getUNDEF(SVT);
-    for (unsigned i = NumElts, e = NumElts * 2; i != e; ++i)
-      Elts.push_back(Undef);
+    Elts.append(NumElts, DAG.getUNDEF(SVT));
 
     EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
     SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Elts);
     SDValue ToV2F64 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, BV);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64,
-                       DAG.getIntPtrConstant(0));
+                       DAG.getIntPtrConstant(0, dl));
   }
 
   assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
@@ -19396,12 +17350,15 @@ static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
   bool NeedsBitcast = EltVT == MVT::i32;
   MVT BitcastVT = VT.is256BitVector() ? MVT::v4i64 : MVT::v2i64;
 
-  SDValue Cst55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), EltVT);
-  SDValue Cst33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), EltVT);
-  SDValue Cst0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), EltVT);
+  SDValue Cst55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl,
+                                  EltVT);
+  SDValue Cst33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl,
+                                  EltVT);
+  SDValue Cst0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl,
+                                  EltVT);
 
   // v = v - ((v >> 1) & 0x55555555...)
-  SmallVector<SDValue, 8> Ones(NumElts, DAG.getConstant(1, EltVT));
+  SmallVector<SDValue, 8> Ones(NumElts, DAG.getConstant(1, dl, EltVT));
   SDValue OnesV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ones);
   SDValue Srl = DAG.getNode(ISD::SRL, dl, VT, Op, OnesV);
   if (NeedsBitcast)
@@ -19420,7 +17377,7 @@ static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
   SmallVector<SDValue, 8> Mask33(NumElts, Cst33);
   SDValue M33 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask33);
-  SmallVector<SDValue, 8> Twos(NumElts, DAG.getConstant(2, EltVT));
+  SmallVector<SDValue, 8> Twos(NumElts, DAG.getConstant(2, dl, EltVT));
   SDValue TwosV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Twos);
 
   Srl = DAG.getNode(ISD::SRL, dl, VT, Sub, TwosV);
@@ -19439,7 +17396,7 @@ static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
   SDValue Add = DAG.getNode(ISD::ADD, dl, VT, AndLHS, AndRHS);
 
   // v = (v + (v >> 4)) & 0x0F0F0F0F...
-  SmallVector<SDValue, 8> Fours(NumElts, DAG.getConstant(4, EltVT));
+  SmallVector<SDValue, 8> Fours(NumElts, DAG.getConstant(4, dl, EltVT));
   SDValue FoursV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Fours);
   Srl = DAG.getNode(ISD::SRL, dl, VT, Add, FoursV);
   Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl);
@@ -19472,7 +17429,7 @@ static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
   Add = And;
   SmallVector<SDValue, 8> Csts;
   for (unsigned i = 8; i <= Len/2; i *= 2) {
-    Csts.assign(NumElts, DAG.getConstant(i, EltVT));
+    Csts.assign(NumElts, DAG.getConstant(i, dl, EltVT));
     SDValue CstsV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Csts);
     Srl = DAG.getNode(ISD::SRL, dl, VT, Add, CstsV);
     Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl);
@@ -19480,7 +17437,8 @@ static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
   }
 
   // The result is on the least significant 6-bits on i32 and 7-bits on i64.
-  SDValue Cst3F = DAG.getConstant(APInt(Len, Len == 32 ? 0x3F : 0x7F), EltVT);
+  SDValue Cst3F = DAG.getConstant(APInt(Len, Len == 32 ? 0x3F : 0x7F), dl,
+                                  EltVT);
   SmallVector<SDValue, 8> Cst3FV(NumElts, Cst3F);
   SDValue M3F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Cst3FV);
   if (NeedsBitcast) {
@@ -19499,7 +17457,7 @@ static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
   SDLoc dl(Node);
   EVT T = Node->getValueType(0);
   SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
-                              DAG.getConstant(0, T), Node->getOperand(2));
+                              DAG.getConstant(0, dl, T), Node->getOperand(2));
   return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
                        cast<AtomicSDNode>(Node)->getMemoryVT(),
                        Node->getOperand(0),
@@ -19605,19 +17563,110 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
 
   // Returned in bits 0:31 and 32:64 xmm0.
   SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
-                               CallResult.first, DAG.getIntPtrConstant(0));
+                               CallResult.first, DAG.getIntPtrConstant(0, dl));
   SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
-                               CallResult.first, DAG.getIntPtrConstant(1));
+                               CallResult.first, DAG.getIntPtrConstant(1, dl));
   SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
   return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
 }
 
+static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget,
+                             SelectionDAG &DAG) {
+  assert(Subtarget->hasAVX512() &&
+         "MGATHER/MSCATTER are supported on AVX-512 arch only");
+
+  MaskedScatterSDNode *N = cast<MaskedScatterSDNode>(Op.getNode());
+  EVT VT = N->getValue().getValueType();
+  assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op");
+  SDLoc dl(Op);
+
+  // X86 scatter kills mask register, so its type should be added to
+  // the list of return values
+  if (N->getNumValues() == 1) {
+    SDValue Index = N->getIndex();
+    if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
+        !Index.getValueType().is512BitVector())
+      Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
+
+    SDVTList VTs = DAG.getVTList(N->getMask().getValueType(), MVT::Other);
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),  N->getOperand(2),
+                      N->getOperand(3), Index };
+
+    SDValue NewScatter = DAG.getMaskedScatter(VTs, VT, dl, Ops, N->getMemOperand());
+    DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
+    return SDValue(NewScatter.getNode(), 0);
+  }
+  return Op;
+}
+
+static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget,
+                            SelectionDAG &DAG) {
+  assert(Subtarget->hasAVX512() &&
+         "MGATHER/MSCATTER are supported on AVX-512 arch only");
+
+  MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode());
+  EVT VT = Op.getValueType();
+  assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
+  SDLoc dl(Op);
+
+  SDValue Index = N->getIndex();
+  if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
+      !Index.getValueType().is512BitVector()) {
+    Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),  N->getOperand(2),
+                      N->getOperand(3), Index };
+    DAG.UpdateNodeOperands(N, Ops);
+  }
+  return Op;
+}
+
+SDValue X86TargetLowering::LowerGC_TRANSITION_START(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  // TODO: Eventually, the lowering of these nodes should be informed by or
+  // deferred to the GC strategy for the function in which they appear. For
+  // now, however, they must be lowered to something. Since they are logically
+  // no-ops in the case of a null GC strategy (or a GC strategy which does not
+  // require special handling for these nodes), lower them as literal NOOPs for
+  // the time being.
+  SmallVector<SDValue, 2> Ops;
+
+  Ops.push_back(Op.getOperand(0));
+  if (Op->getGluedNode())
+    Ops.push_back(Op->getOperand(Op->getNumOperands() - 1));
+
+  SDLoc OpDL(Op);
+  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0);
+
+  return NOOP;
+}
+
+SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  // TODO: Eventually, the lowering of these nodes should be informed by or
+  // deferred to the GC strategy for the function in which they appear. For
+  // now, however, they must be lowered to something. Since they are logically
+  // no-ops in the case of a null GC strategy (or a GC strategy which does not
+  // require special handling for these nodes), lower them as literal NOOPs for
+  // the time being.
+  SmallVector<SDValue, 2> Ops;
+
+  Ops.push_back(Op.getOperand(0));
+  if (Op->getGluedNode())
+    Ops.push_back(Op->getOperand(Op->getNumOperands() - 1));
+
+  SDLoc OpDL(Op);
+  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0);
+
+  return NOOP;
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Should not custom lower this!");
-  case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op,DAG);
   case ISD::ATOMIC_FENCE:       return LowerATOMIC_FENCE(Op, Subtarget, DAG);
   case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
     return LowerCMP_SWAP(Op, Subtarget, DAG);
@@ -19625,8 +17674,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ATOMIC_LOAD_SUB:    return LowerLOAD_SUB(Op,DAG);
   case ISD::ATOMIC_STORE:       return LowerATOMIC_STORE(Op,DAG);
   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
-  case ISD::CONCAT_VECTORS:     return LowerCONCAT_VECTORS(Op, DAG);
-  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::CONCAT_VECTORS:     return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
+  case ISD::VECTOR_SHUFFLE:     return lowerVectorShuffle(Op, Subtarget, DAG);
   case ISD::VSELECT:            return LowerVSELECT(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
@@ -19647,6 +17696,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ZERO_EXTEND:        return LowerZERO_EXTEND(Op, Subtarget, DAG);
   case ISD::SIGN_EXTEND:        return LowerSIGN_EXTEND(Op, Subtarget, DAG);
   case ISD::ANY_EXTEND:         return LowerANY_EXTEND(Op, Subtarget, DAG);
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+    return LowerSIGN_EXTEND_VECTOR_INREG(Op, Subtarget, DAG);
   case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
   case ISD::FP_TO_UINT:         return LowerFP_TO_UINT(Op, DAG);
   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
@@ -19700,6 +17751,11 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ADD:                return LowerADD(Op, DAG);
   case ISD::SUB:                return LowerSUB(Op, DAG);
   case ISD::FSINCOS:            return LowerFSINCOS(Op, Subtarget, DAG);
+  case ISD::MGATHER:            return LowerMGATHER(Op, Subtarget, DAG);
+  case ISD::MSCATTER:           return LowerMSCATTER(Op, Subtarget, DAG);
+  case ISD::GC_TRANSITION_START:
+                                return LowerGC_TRANSITION_START(Op, DAG);
+  case ISD::GC_TRANSITION_END:  return LowerGC_TRANSITION_END(Op, DAG);
   }
 }
 
@@ -19747,6 +17803,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::FP_TO_SINT:
+    // FP_TO_INT*_IN_MEM is not legal for f16 inputs.  Do not convert
+    // (FP_TO_SINT (load f16)) to FP_TO_INT*.
+    if (N->getOperand(0).getValueType() == MVT::f16)
+      break;
+    // fallthrough
   case ISD::FP_TO_UINT: {
     bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
 
@@ -19775,7 +17836,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
       return;
     SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
                                  N->getOperand(0));
-    SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+    SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
                                      MVT::f64);
     SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias);
     SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
@@ -19792,6 +17853,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(V);
     return;
   }
+  case ISD::FP_EXTEND: {
+    // Right now, only MVT::v2f32 has OperationAction for FP_EXTEND.
+    // No other ValueType for FP_EXTEND should reach this point.
+    assert(N->getValueType(0) == MVT::v2f32 &&
+           "Do not know how to legalize this Node");
+    return;
+  }
   case ISD::INTRINSIC_W_CHAIN: {
     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
     switch (IntNo) {
@@ -19818,9 +17886,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
     SDValue cpInL, cpInH;
     cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
-                        DAG.getConstant(0, HalfT));
+                        DAG.getConstant(0, dl, HalfT));
     cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
-                        DAG.getConstant(1, HalfT));
+                        DAG.getConstant(1, dl, HalfT));
     cpInL = DAG.getCopyToReg(N->getOperand(0), dl,
                              Regs64bit ? X86::RAX : X86::EAX,
                              cpInL, SDValue());
@@ -19829,9 +17897,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                              cpInH, cpInL.getValue(1));
     SDValue swapInL, swapInH;
     swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
-                          DAG.getConstant(0, HalfT));
+                          DAG.getConstant(0, dl, HalfT));
     swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
-                          DAG.getConstant(1, HalfT));
+                          DAG.getConstant(1, dl, HalfT));
     swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl,
                                Regs64bit ? X86::RBX : X86::EBX,
                                swapInL, cpInH.getValue(1));
@@ -19858,7 +17926,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                                         MVT::i32, cpOutH.getValue(2));
     SDValue Success =
         DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                    DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS);
+                    DAG.getConstant(X86::COND_E, dl, MVT::i8), EFLAGS);
     Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1));
 
     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF));
@@ -19908,7 +17976,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     SmallVector<SDValue, 8> Elts;
     for (unsigned i = 0, e = NumElts; i != e; ++i)
       Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT,
-                                   ToVecInt, DAG.getIntPtrConstant(i)));
+                                   ToVecInt, DAG.getIntPtrConstant(i, dl)));
 
     Results.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, DstVT, Elts));
   }
@@ -19916,8 +17984,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
 }
 
 const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default: return nullptr;
+  switch ((X86ISD::NodeType)Opcode) {
+  case X86ISD::FIRST_NUMBER:       break;
   case X86ISD::BSF:                return "X86ISD::BSF";
   case X86ISD::BSR:                return "X86ISD::BSR";
   case X86ISD::SHLD:               return "X86ISD::SHLD";
@@ -19944,9 +18012,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::UCOMI:              return "X86ISD::UCOMI";
   case X86ISD::CMPM:               return "X86ISD::CMPM";
   case X86ISD::CMPMU:              return "X86ISD::CMPMU";
+  case X86ISD::CMPM_RND:           return "X86ISD::CMPM_RND";
   case X86ISD::SETCC:              return "X86ISD::SETCC";
   case X86ISD::SETCC_CARRY:        return "X86ISD::SETCC_CARRY";
   case X86ISD::FSETCC:             return "X86ISD::FSETCC";
+  case X86ISD::FGETSIGNx86:        return "X86ISD::FGETSIGNx86";
   case X86ISD::CMOV:               return "X86ISD::CMOV";
   case X86ISD::BRCOND:             return "X86ISD::BRCOND";
   case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
@@ -19955,16 +18025,21 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
   case X86ISD::Wrapper:            return "X86ISD::Wrapper";
   case X86ISD::WrapperRIP:         return "X86ISD::WrapperRIP";
+  case X86ISD::MOVDQ2Q:            return "X86ISD::MOVDQ2Q";
+  case X86ISD::MMX_MOVD2W:         return "X86ISD::MMX_MOVD2W";
+  case X86ISD::MMX_MOVW2D:         return "X86ISD::MMX_MOVW2D";
   case X86ISD::PEXTRB:             return "X86ISD::PEXTRB";
   case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
   case X86ISD::INSERTPS:           return "X86ISD::INSERTPS";
   case X86ISD::PINSRB:             return "X86ISD::PINSRB";
   case X86ISD::PINSRW:             return "X86ISD::PINSRW";
+  case X86ISD::MMX_PINSRW:         return "X86ISD::MMX_PINSRW";
   case X86ISD::PSHUFB:             return "X86ISD::PSHUFB";
   case X86ISD::ANDNP:              return "X86ISD::ANDNP";
   case X86ISD::PSIGN:              return "X86ISD::PSIGN";
   case X86ISD::BLENDI:             return "X86ISD::BLENDI";
   case X86ISD::SHRUNKBLEND:        return "X86ISD::SHRUNKBLEND";
+  case X86ISD::ADDUS:              return "X86ISD::ADDUS";
   case X86ISD::SUBUS:              return "X86ISD::SUBUS";
   case X86ISD::HADD:               return "X86ISD::HADD";
   case X86ISD::HSUB:               return "X86ISD::HSUB";
@@ -19975,7 +18050,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::SMAX:               return "X86ISD::SMAX";
   case X86ISD::SMIN:               return "X86ISD::SMIN";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
+  case X86ISD::FMAX_RND:           return "X86ISD::FMAX_RND";
   case X86ISD::FMIN:               return "X86ISD::FMIN";
+  case X86ISD::FMIN_RND:           return "X86ISD::FMIN_RND";
   case X86ISD::FMAXC:              return "X86ISD::FMAXC";
   case X86ISD::FMINC:              return "X86ISD::FMINC";
   case X86ISD::FRSQRT:             return "X86ISD::FRSQRT";
@@ -20057,8 +18134,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::UNPCKL:             return "X86ISD::UNPCKL";
   case X86ISD::UNPCKH:             return "X86ISD::UNPCKH";
   case X86ISD::VBROADCAST:         return "X86ISD::VBROADCAST";
-  case X86ISD::VBROADCASTM:        return "X86ISD::VBROADCASTM";
+  case X86ISD::SUBV_BROADCAST:     return "X86ISD::SUBV_BROADCAST";
   case X86ISD::VEXTRACT:           return "X86ISD::VEXTRACT";
+  case X86ISD::VPERMILPV:          return "X86ISD::VPERMILPV";
   case X86ISD::VPERMILPI:          return "X86ISD::VPERMILPI";
   case X86ISD::VPERM2X128:         return "X86ISD::VPERM2X128";
   case X86ISD::VPERMV:             return "X86ISD::VPERMV";
@@ -20071,6 +18149,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VAARG_64:           return "X86ISD::VAARG_64";
   case X86ISD::WIN_ALLOCA:         return "X86ISD::WIN_ALLOCA";
   case X86ISD::MEMBARRIER:         return "X86ISD::MEMBARRIER";
+  case X86ISD::MFENCE:             return "X86ISD::MFENCE";
+  case X86ISD::SFENCE:             return "X86ISD::SFENCE";
+  case X86ISD::LFENCE:             return "X86ISD::LFENCE";
   case X86ISD::SEG_ALLOCA:         return "X86ISD::SEG_ALLOCA";
   case X86ISD::WIN_FTOL:           return "X86ISD::WIN_FTOL";
   case X86ISD::SAHF:               return "X86ISD::SAHF";
@@ -20082,13 +18163,31 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FNMSUB:             return "X86ISD::FNMSUB";
   case X86ISD::FMADDSUB:           return "X86ISD::FMADDSUB";
   case X86ISD::FMSUBADD:           return "X86ISD::FMSUBADD";
+  case X86ISD::FMADD_RND:          return "X86ISD::FMADD_RND";
+  case X86ISD::FNMADD_RND:         return "X86ISD::FNMADD_RND";
+  case X86ISD::FMSUB_RND:          return "X86ISD::FMSUB_RND";
+  case X86ISD::FNMSUB_RND:         return "X86ISD::FNMSUB_RND";
+  case X86ISD::FMADDSUB_RND:       return "X86ISD::FMADDSUB_RND";
+  case X86ISD::FMSUBADD_RND:       return "X86ISD::FMSUBADD_RND";
+  case X86ISD::RNDSCALE:           return "X86ISD::RNDSCALE";
   case X86ISD::PCMPESTRI:          return "X86ISD::PCMPESTRI";
   case X86ISD::PCMPISTRI:          return "X86ISD::PCMPISTRI";
   case X86ISD::XTEST:              return "X86ISD::XTEST";
   case X86ISD::COMPRESS:           return "X86ISD::COMPRESS";
   case X86ISD::EXPAND:             return "X86ISD::EXPAND";
   case X86ISD::SELECT:             return "X86ISD::SELECT";
+  case X86ISD::ADDSUB:             return "X86ISD::ADDSUB";
+  case X86ISD::RCP28:              return "X86ISD::RCP28";
+  case X86ISD::EXP2:               return "X86ISD::EXP2";
+  case X86ISD::RSQRT28:            return "X86ISD::RSQRT28";
+  case X86ISD::FADD_RND:           return "X86ISD::FADD_RND";
+  case X86ISD::FSUB_RND:           return "X86ISD::FSUB_RND";
+  case X86ISD::FMUL_RND:           return "X86ISD::FMUL_RND";
+  case X86ISD::FDIV_RND:           return "X86ISD::FDIV_RND";
+  case X86ISD::ADDS:               return "X86ISD::ADDS";
+  case X86ISD::SUBS:               return "X86ISD::SUBS";
   }
+  return nullptr;
 }
 
 // isLegalAddressingMode - Return true if the addressing mode represented
@@ -20236,6 +18335,8 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
   return false;
 }
 
+bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; }
+
 bool
 X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
   if (!(Subtarget->hasFMA() || Subtarget->hasFMA4()))
@@ -20272,85 +18373,24 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
   if (!VT.isSimple())
     return false;
 
-  MVT SVT = VT.getSimpleVT();
+  // Not for i1 vectors
+  if (VT.getScalarType() == MVT::i1)
+    return false;
 
   // Very little shuffling can be done for 64-bit vectors right now.
   if (VT.getSizeInBits() == 64)
     return false;
 
-  // This is an experimental legality test that is tailored to match the
-  // legality test of the experimental lowering more closely. They are gated
-  // separately to ease testing of performance differences.
-  if (ExperimentalVectorShuffleLegality)
-    // We only care that the types being shuffled are legal. The lowering can
-    // handle any possible shuffle mask that results.
-    return isTypeLegal(SVT);
-
-  // If this is a single-input shuffle with no 128 bit lane crossings we can
-  // lower it into pshufb.
-  if ((SVT.is128BitVector() && Subtarget->hasSSSE3()) ||
-      (SVT.is256BitVector() && Subtarget->hasInt256())) {
-    bool isLegal = true;
-    for (unsigned I = 0, E = M.size(); I != E; ++I) {
-      if (M[I] >= (int)SVT.getVectorNumElements() ||
-          ShuffleCrosses128bitLane(SVT, I, M[I])) {
-        isLegal = false;
-        break;
-      }
-    }
-    if (isLegal)
-      return true;
-  }
-
-  // FIXME: blends, shifts.
-  return (SVT.getVectorNumElements() == 2 ||
-          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
-          isMOVLMask(M, SVT) ||
-          isCommutedMOVLMask(M, SVT) ||
-          isMOVHLPSMask(M, SVT) ||
-          isSHUFPMask(M, SVT) ||
-          isSHUFPMask(M, SVT, /* Commuted */ true) ||
-          isPSHUFDMask(M, SVT) ||
-          isPSHUFDMask(M, SVT, /* SecondOperand */ true) ||
-          isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) ||
-          isPSHUFLWMask(M, SVT, Subtarget->hasInt256()) ||
-          isPALIGNRMask(M, SVT, Subtarget) ||
-          isUNPCKLMask(M, SVT, Subtarget->hasInt256()) ||
-          isUNPCKHMask(M, SVT, Subtarget->hasInt256()) ||
-          isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
-          isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
-          isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()) ||
-          (Subtarget->hasSSE41() && isINSERTPSMask(M, SVT)));
+  // We only care that the types being shuffled are legal. The lowering can
+  // handle any possible shuffle mask that results.
+  return isTypeLegal(VT.getSimpleVT());
 }
 
 bool
 X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
                                           EVT VT) const {
-  if (!VT.isSimple())
-    return false;
-
-  MVT SVT = VT.getSimpleVT();
-
-  // This is an experimental legality test that is tailored to match the
-  // legality test of the experimental lowering more closely. They are gated
-  // separately to ease testing of performance differences.
-  if (ExperimentalVectorShuffleLegality)
-    // The new vector shuffle lowering is very good at managing zero-inputs.
-    return isShuffleMaskLegal(Mask, VT);
-
-  unsigned NumElts = SVT.getVectorNumElements();
-  // FIXME: This collection of masks seems suspect.
-  if (NumElts == 2)
-    return true;
-  if (NumElts == 4 && SVT.is128BitVector()) {
-    return (isMOVLMask(Mask, SVT)  ||
-            isCommutedMOVLMask(Mask, SVT, true) ||
-            isSHUFPMask(Mask, SVT) ||
-            isSHUFPMask(Mask, SVT, /* Commuted */ true) ||
-            isBlendMask(Mask, SVT, Subtarget->hasSSE41(),
-                        Subtarget->hasInt256()));
-  }
-  return false;
+  // Just delegate to the generic legality, clear masks aren't special.
+  return isShuffleMaskLegal(Mask, VT);
 }
 
 //===----------------------------------------------------------------------===//
@@ -20488,11 +18528,10 @@ static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB,
   return BB;
 }
 
-static MachineBasicBlock * EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB,
-                                       const TargetInstrInfo *TII,
-                                       const X86Subtarget* Subtarget) {
+static MachineBasicBlock *EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB,
+                                      const X86Subtarget *Subtarget) {
   DebugLoc dl = MI->getDebugLoc();
-
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   // Address into RAX/EAX, other two args into ECX, EDX.
   unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
   unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
@@ -20514,9 +18553,8 @@ static MachineBasicBlock * EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB,
 }
 
 MachineBasicBlock *
-X86TargetLowering::EmitVAARG64WithCustomInserter(
-                   MachineInstr *MI,
-                   MachineBasicBlock *MBB) const {
+X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr *MI,
+                                                 MachineBasicBlock *MBB) const {
   // Emit va_arg instruction on X86-64.
 
   // Operands to this pseudo-instruction:
@@ -20528,7 +18566,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
   // 9  ) EFLAGS (implicit-def)
 
   assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
-  assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+  static_assert(X86::AddrNumOperands == 5,
+                "VAARG_64 assumes 5 address operands");
 
   unsigned DestReg = MI->getOperand(0).getReg();
   MachineOperand &Base = MI->getOperand(1);
@@ -20546,7 +18585,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
   MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
 
   // Machine Information
-  const TargetInstrInfo *TII = MBB->getParent()->getSubtarget().getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
   const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
   const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
@@ -20802,7 +18841,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
   XMMSaveMBB->addSuccessor(EndMBB);
 
   // Now add the instructions.
-  const TargetInstrInfo *TII = MBB->getParent()->getSubtarget().getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
   unsigned CountReg = MI->getOperand(0).getReg();
@@ -20885,7 +18924,7 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
                                      MachineBasicBlock *BB) const {
-  const TargetInstrInfo *TII = BB->getParent()->getSubtarget().getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
   // To "insert" a SELECT_CC instruction, we actually have to insert the
@@ -20904,6 +18943,92 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   //   fallthrough --> copy0MBB
   MachineBasicBlock *thisMBB = BB;
   MachineFunction *F = BB->getParent();
+
+  // We also lower double CMOVs:
+  //   (CMOV (CMOV F, T, cc1), T, cc2)
+  // to two successives branches.  For that, we look for another CMOV as the
+  // following instruction.
+  //
+  // Without this, we would add a PHI between the two jumps, which ends up
+  // creating a few copies all around. For instance, for
+  //
+  //    (sitofp (zext (fcmp une)))
+  //
+  // we would generate:
+  //
+  //         ucomiss %xmm1, %xmm0
+  //         movss  <1.0f>, %xmm0
+  //         movaps  %xmm0, %xmm1
+  //         jne     .LBB5_2
+  //         xorps   %xmm1, %xmm1
+  // .LBB5_2:
+  //         jp      .LBB5_4
+  //         movaps  %xmm1, %xmm0
+  // .LBB5_4:
+  //         retq
+  //
+  // because this custom-inserter would have generated:
+  //
+  //   A
+  //   | \
+  //   |  B
+  //   | /
+  //   C
+  //   | \
+  //   |  D
+  //   | /
+  //   E
+  //
+  // A: X = ...; Y = ...
+  // B: empty
+  // C: Z = PHI [X, A], [Y, B]
+  // D: empty
+  // E: PHI [X, C], [Z, D]
+  //
+  // If we lower both CMOVs in a single step, we can instead generate:
+  //
+  //   A
+  //   | \
+  //   |  C
+  //   | /|
+  //   |/ |
+  //   |  |
+  //   |  D
+  //   | /
+  //   E
+  //
+  // A: X = ...; Y = ...
+  // D: empty
+  // E: PHI [X, A], [X, C], [Y, D]
+  //
+  // Which, in our sitofp/fcmp example, gives us something like:
+  //
+  //         ucomiss %xmm1, %xmm0
+  //         movss  <1.0f>, %xmm0
+  //         jne     .LBB5_4
+  //         jp      .LBB5_4
+  //         xorps   %xmm0, %xmm0
+  // .LBB5_4:
+  //         retq
+  //
+  MachineInstr *NextCMOV = nullptr;
+  MachineBasicBlock::iterator NextMIIt =
+      std::next(MachineBasicBlock::iterator(MI));
+  if (NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() &&
+      NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() &&
+      NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg())
+    NextCMOV = &*NextMIIt;
+
+  MachineBasicBlock *jcc1MBB = nullptr;
+
+  // If we have a double CMOV, we lower it to two successive branches to
+  // the same block.  EFLAGS is used by both, so mark it as live in the second.
+  if (NextCMOV) {
+    jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    F->insert(It, jcc1MBB);
+    jcc1MBB->addLiveIn(X86::EFLAGS);
+  }
+
   MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
   F->insert(It, copy0MBB);
@@ -20911,10 +19036,11 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
 
   // If the EFLAGS register isn't dead in the terminator, then claim that it's
   // live into the sink and copy blocks.
-  const TargetRegisterInfo *TRI =
-      BB->getParent()->getSubtarget().getRegisterInfo();
-  if (!MI->killsRegister(X86::EFLAGS) &&
-      !checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
+  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+
+  MachineInstr *LastEFLAGSUser = NextCMOV ? NextCMOV : MI;
+  if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) &&
+      !checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) {
     copy0MBB->addLiveIn(X86::EFLAGS);
     sinkMBB->addLiveIn(X86::EFLAGS);
   }
@@ -20925,7 +19051,19 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
 
   // Add the true and fallthrough blocks as its successors.
-  BB->addSuccessor(copy0MBB);
+  if (NextCMOV) {
+    // The fallthrough block may be jcc1MBB, if we have a double CMOV.
+    BB->addSuccessor(jcc1MBB);
+
+    // In that case, jcc1MBB will itself fallthrough the copy0MBB, and
+    // jump to the sinkMBB.
+    jcc1MBB->addSuccessor(copy0MBB);
+    jcc1MBB->addSuccessor(sinkMBB);
+  } else {
+    BB->addSuccessor(copy0MBB);
+  }
+
+  // The true block target of the first (or only) branch is always sinkMBB.
   BB->addSuccessor(sinkMBB);
 
   // Create the conditional branch instruction.
@@ -20933,6 +19071,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
     X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
   BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
 
+  if (NextCMOV) {
+    unsigned Opc2 = X86::GetCondBranchFromCond(
+        (X86::CondCode)NextCMOV->getOperand(3).getImm());
+    BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB);
+  }
+
   //  copy0MBB:
   //   %FalseValue = ...
   //   # fallthrough to sinkMBB
@@ -20941,10 +19085,22 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   //  sinkMBB:
   //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
   //  ...
-  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
-          TII->get(X86::PHI), MI->getOperand(0).getReg())
-    .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
-    .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+  MachineInstrBuilder MIB =
+      BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI),
+              MI->getOperand(0).getReg())
+          .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+          .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+  // If we have a double CMOV, the second Jcc provides the same incoming
+  // value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes).
+  if (NextCMOV) {
+    MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB);
+    // Copy the PHI result to the register defined by the second CMOV.
+    BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())),
+            DL, TII->get(TargetOpcode::COPY), NextCMOV->getOperand(0).getReg())
+        .addReg(MI->getOperand(0).getReg());
+    NextCMOV->eraseFromParent();
+  }
 
   MI->eraseFromParent();   // The pseudo instruction is gone now.
   return sinkMBB;
@@ -20954,7 +19110,7 @@ MachineBasicBlock *
 X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
                                         MachineBasicBlock *BB) const {
   MachineFunction *MF = BB->getParent();
-  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
 
@@ -21027,10 +19183,8 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
   BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);
 
   // Calls into a routine in libgcc to allocate more space from the heap.
-  const uint32_t *RegMask = MF->getTarget()
-                                .getSubtargetImpl()
-                                ->getRegisterInfo()
-                                ->getCallPreservedMask(CallingConv::C);
+  const uint32_t *RegMask =
+      Subtarget->getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
   if (IsLP64) {
     BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
       .addReg(sizeVReg);
@@ -21087,7 +19241,6 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
                                         MachineBasicBlock *BB) const {
-  const TargetInstrInfo *TII = BB->getParent()->getSubtarget().getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
   assert(!Subtarget->isTargetMachO());
@@ -21106,8 +19259,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
   // or EAX and doing an indirect call.  The return value will then
   // be in the normal return register.
   MachineFunction *F = BB->getParent();
-  const X86InstrInfo *TII =
-      static_cast<const X86InstrInfo *>(F->getSubtarget().getInstrInfo());
+  const X86InstrInfo *TII = Subtarget->getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
   assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
@@ -21116,10 +19268,8 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
   // Get a register mask for the lowered call.
   // FIXME: The 32-bit calls have non-standard calling conventions. Use a
   // proper register mask.
-  const uint32_t *RegMask = F->getTarget()
-                                .getSubtargetImpl()
-                                ->getRegisterInfo()
-                                ->getCallPreservedMask(CallingConv::C);
+  const uint32_t *RegMask =
+      Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
   if (Subtarget->is64Bit()) {
     MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
                                       TII->get(X86::MOV64rm), X86::RDI)
@@ -21164,7 +19314,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
                                     MachineBasicBlock *MBB) const {
   DebugLoc DL = MI->getDebugLoc();
   MachineFunction *MF = MBB->getParent();
-  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
   const BasicBlock *BB = MBB->getBasicBlock();
@@ -21271,8 +19421,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
           .addMBB(restoreMBB);
 
-  const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-      MF->getSubtarget().getRegisterInfo());
+  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
   MIB.addRegMask(RegInfo->getNoPreservedMask());
   thisMBB->addSuccessor(mainMBB);
   thisMBB->addSuccessor(restoreMBB);
@@ -21290,8 +19439,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
 
   // restoreMBB:
   if (RegInfo->hasBasePointer(*MF)) {
-    const X86Subtarget &STI = MF->getTarget().getSubtarget<X86Subtarget>();
-    const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
+    const bool Uses64BitFramePtr =
+        Subtarget->isTarget64BitLP64() || Subtarget->isTargetNaCl64();
     X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
     X86FI->setRestoreBasePointer(MF);
     unsigned FramePtr = RegInfo->getFrameRegister(*MF);
@@ -21314,7 +19463,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
                                      MachineBasicBlock *MBB) const {
   DebugLoc DL = MI->getDebugLoc();
   MachineFunction *MF = MBB->getParent();
-  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
   // Memory Reference
@@ -21329,8 +19478,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
     (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
   unsigned Tmp = MRI.createVirtualRegister(RC);
   // Since FP is only updated here but NOT referenced, it's treated as GPR.
-  const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-      MF->getSubtarget().getRegisterInfo());
+  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
   unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
   unsigned SP = RegInfo->getStackRegister();
 
@@ -21449,7 +19597,7 @@ X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
         default: llvm_unreachable("Unrecognized FMA variant.");
       }
 
-      const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+      const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
       MachineInstrBuilder MIB =
         BuildMI(MF, MI->getDebugLoc(), TII.get(NewFMAOpc))
         .addOperand(MI->getOperand(0))
@@ -21472,6 +19620,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::TAILJMPd64:
   case X86::TAILJMPr64:
   case X86::TAILJMPm64:
+  case X86::TAILJMPd64_REX:
+  case X86::TAILJMPr64_REX:
+  case X86::TAILJMPm64_REX:
     llvm_unreachable("TAILJMP64 would not be touched here.");
   case X86::TCRETURNdi64:
   case X86::TCRETURNri64:
@@ -21502,6 +19653,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::CMOV_RFP32:
   case X86::CMOV_RFP64:
   case X86::CMOV_RFP80:
+  case X86::CMOV_V8I1:
+  case X86::CMOV_V16I1:
+  case X86::CMOV_V32I1:
+  case X86::CMOV_V64I1:
     return EmitLoweredSelect(MI, BB);
 
   case X86::FP32_TO_INT16_IN_MEM:
@@ -21514,7 +19669,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::FP80_TO_INT32_IN_MEM:
   case X86::FP80_TO_INT64_IN_MEM: {
     MachineFunction *F = BB->getParent();
-    const TargetInstrInfo *TII = F->getSubtarget().getInstrInfo();
+    const TargetInstrInfo *TII = Subtarget->getInstrInfo();
     DebugLoc DL = MI->getDebugLoc();
 
     // Change the floating point control register to use "round towards zero"
@@ -21598,7 +19753,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::VPCMPESTRM128MEM:
     assert(Subtarget->hasSSE42() &&
            "Target must have SSE4.2 or AVX features enabled");
-    return EmitPCMPSTRM(MI, BB, BB->getParent()->getSubtarget().getInstrInfo());
+    return EmitPCMPSTRM(MI, BB, Subtarget->getInstrInfo());
 
   // String/text processing lowering.
   case X86::PCMPISTRIREG:
@@ -21611,16 +19766,15 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::VPCMPESTRIMEM:
     assert(Subtarget->hasSSE42() &&
            "Target must have SSE4.2 or AVX features enabled");
-    return EmitPCMPSTRI(MI, BB, BB->getParent()->getSubtarget().getInstrInfo());
+    return EmitPCMPSTRI(MI, BB, Subtarget->getInstrInfo());
 
   // Thread synchronization.
   case X86::MONITOR:
-    return EmitMonitor(MI, BB, BB->getParent()->getSubtarget().getInstrInfo(),
-                       Subtarget);
+    return EmitMonitor(MI, BB, Subtarget);
 
   // xbegin
   case X86::XBEGIN:
-    return EmitXBegin(MI, BB, BB->getParent()->getSubtarget().getInstrInfo());
+    return EmitXBegin(MI, BB, Subtarget->getInstrInfo());
 
   case X86::VASTART_SAVE_XMM_REGS:
     return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
@@ -21948,9 +20102,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
   // Note that even with AVX we prefer the PSHUFD form of shuffle for integer
   // vectors because it can have a load folded into it that UNPCK cannot. This
   // doesn't preclude something switching to the shorter encoding post-RA.
-  if (FloatDomain) {
-    if (Mask.equals(0, 0) || Mask.equals(1, 1)) {
-      bool Lo = Mask.equals(0, 0);
+  //
+  // FIXME: Should teach these routines about AVX vector widths.
+  if (FloatDomain && VT.getSizeInBits() == 128) {
+    if (Mask.equals({0, 0}) || Mask.equals({1, 1})) {
+      bool Lo = Mask.equals({0, 0});
       unsigned Shuffle;
       MVT ShuffleVT;
       // Check if we have SSE3 which will let us use MOVDDUP. That instruction
@@ -21979,8 +20135,8 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
       return true;
     }
     if (Subtarget->hasSSE3() &&
-        (Mask.equals(0, 0, 2, 2) || Mask.equals(1, 1, 3, 3))) {
-      bool Lo = Mask.equals(0, 0, 2, 2);
+        (Mask.equals({0, 0, 2, 2}) || Mask.equals({1, 1, 3, 3}))) {
+      bool Lo = Mask.equals({0, 0, 2, 2});
       unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP;
       MVT ShuffleVT = MVT::v4f32;
       if (Depth == 1 && Root->getOpcode() == Shuffle)
@@ -21993,8 +20149,8 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
                     /*AddTo*/ true);
       return true;
     }
-    if (Mask.equals(0, 0, 1, 1) || Mask.equals(2, 2, 3, 3)) {
-      bool Lo = Mask.equals(0, 0, 1, 1);
+    if (Mask.equals({0, 0, 1, 1}) || Mask.equals({2, 2, 3, 3})) {
+      bool Lo = Mask.equals({0, 0, 1, 1});
       unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
       MVT ShuffleVT = MVT::v4f32;
       if (Depth == 1 && Root->getOpcode() == Shuffle)
@@ -22012,12 +20168,12 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
   // We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK
   // variants as none of these have single-instruction variants that are
   // superior to the UNPCK formulation.
-  if (!FloatDomain &&
-      (Mask.equals(0, 0, 1, 1, 2, 2, 3, 3) ||
-       Mask.equals(4, 4, 5, 5, 6, 6, 7, 7) ||
-       Mask.equals(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7) ||
-       Mask.equals(8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15,
-                   15))) {
+  if (!FloatDomain && VT.getSizeInBits() == 128 &&
+      (Mask.equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
+       Mask.equals({4, 4, 5, 5, 6, 6, 7, 7}) ||
+       Mask.equals({0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}) ||
+       Mask.equals(
+           {8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15}))) {
     bool Lo = Mask[0] == 0;
     unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
     if (Depth == 1 && Root->getOpcode() == Shuffle)
@@ -22053,9 +20209,9 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
   // in practice PSHUFB tends to be *very* fast so we're more aggressive.
   if ((Depth >= 3 || HasPSHUFB) && Subtarget->hasSSSE3()) {
     SmallVector<SDValue, 16> PSHUFBMask;
-    assert(Mask.size() <= 16 && "Can't shuffle elements smaller than bytes!");
-    int Ratio = 16 / Mask.size();
-    for (unsigned i = 0; i < 16; ++i) {
+    int NumBytes = VT.getSizeInBits() / 8;
+    int Ratio = NumBytes / Mask.size();
+    for (int i = 0; i < NumBytes; ++i) {
       if (Mask[i / Ratio] == SM_SentinelUndef) {
         PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8));
         continue;
@@ -22063,14 +20219,15 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
       int M = Mask[i / Ratio] != SM_SentinelZero
                   ? Ratio * Mask[i / Ratio] + i % Ratio
                   : 255;
-      PSHUFBMask.push_back(DAG.getConstant(M, MVT::i8));
+      PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8));
     }
-    Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Input);
+    MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes);
+    Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Input);
     DCI.AddToWorklist(Op.getNode());
     SDValue PSHUFBMaskOp =
-        DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, PSHUFBMask);
+        DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVT, PSHUFBMask);
     DCI.AddToWorklist(PSHUFBMaskOp.getNode());
-    Op = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, Op, PSHUFBMaskOp);
+    Op = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Op, PSHUFBMaskOp);
     DCI.AddToWorklist(Op.getNode());
     DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
                   /*AddTo*/ true);
@@ -22128,10 +20285,6 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
   MVT VT = Op.getSimpleValueType();
   if (!VT.isVector())
     return false; // Bail if we hit a non-vector.
-  // FIXME: This routine should be taught about 256-bit shuffles, or a 256-bit
-  // version should be added.
-  if (VT.getSizeInBits() != 128)
-    return false;
 
   assert(Root.getSimpleValueType().isVector() &&
          "Shuffles operate on vector types!");
@@ -22234,12 +20387,26 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
 /// This is a very minor wrapper around getTargetShuffleMask to easy forming v4
 /// PSHUF-style masks that can be reused with such instructions.
 static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
+  MVT VT = N.getSimpleValueType();
   SmallVector<int, 4> Mask;
   bool IsUnary;
-  bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), Mask, IsUnary);
+  bool HaveMask = getTargetShuffleMask(N.getNode(), VT, Mask, IsUnary);
   (void)HaveMask;
   assert(HaveMask);
 
+  // If we have more than 128-bits, only the low 128-bits of shuffle mask
+  // matter. Check that the upper masks are repeats and remove them.
+  if (VT.getSizeInBits() > 128) {
+    int LaneElts = 128 / VT.getScalarSizeInBits();
+#ifndef NDEBUG
+    for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i)
+      for (int j = 0; j < LaneElts; ++j)
+        assert(Mask[j] == Mask[i * LaneElts + j] - LaneElts &&
+               "Mask doesn't repeat in high 128-bit lanes!");
+#endif
+    Mask.resize(LaneElts);
+  }
+
   switch (N.getOpcode()) {
   case X86ISD::PSHUFD:
     return Mask;
@@ -22312,7 +20479,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
     case X86ISD::UNPCKH:
       // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
       // shuffle into a preceding word shuffle.
-      if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16)
+      if (V.getSimpleValueType().getScalarType() != MVT::i8 &&
+          V.getSimpleValueType().getScalarType() != MVT::i16)
         return SDValue();
 
       // Search for a half-shuffle which we can combine with.
@@ -22357,7 +20525,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
   for (int &M : Mask)
     M = VMask[M];
   V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0),
-                  getV4X86ShuffleImm8ForMask(Mask, DAG));
+                  getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
 
   // Rebuild the chain around this new shuffle.
   while (!Chain.empty()) {
@@ -22444,7 +20612,7 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
   for (int &M : Mask)
     M = VMask[M];
   V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
-                  getV4X86ShuffleImm8ForMask(Mask, DAG));
+                  getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
 
   // Check that the shuffles didn't cancel each other out. If not, we need to
   // combine to the new one.
@@ -22486,8 +20654,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
     break;
   case X86ISD::PSHUFLW:
   case X86ISD::PSHUFHW:
-    assert(VT == MVT::v8i16);
-    (void)VT;
+    assert(VT.getScalarType() == MVT::i16 && "Bad word shuffle type!");
 
     if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
       return SDValue(); // We combined away this shuffle, so we're done.
@@ -22495,17 +20662,18 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
     // See if this reduces to a PSHUFD which is no more expensive and can
     // combine with more operations. Note that it has to at least flip the
     // dwords as otherwise it would have been removed as a no-op.
-    if (Mask[0] == 2 && Mask[1] == 3 && Mask[2] == 0 && Mask[3] == 1) {
+    if (makeArrayRef(Mask).equals({2, 3, 0, 1})) {
       int DMask[] = {0, 1, 2, 3};
       int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
       DMask[DOffset + 0] = DOffset + 1;
       DMask[DOffset + 1] = DOffset + 0;
-      V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V);
+      MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
+      V = DAG.getNode(ISD::BITCAST, DL, DVT, V);
       DCI.AddToWorklist(V.getNode());
-      V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V,
-                      getV4X86ShuffleImm8ForMask(DMask, DAG));
+      V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V,
+                      getV4X86ShuffleImm8ForMask(DMask, DL, DAG));
       DCI.AddToWorklist(V.getNode());
-      return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
+      return DAG.getNode(ISD::BITCAST, DL, VT, V);
     }
 
     // Look for shuffle patterns which can be implemented as a single unpack.
@@ -22533,18 +20701,14 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
         int MappedMask[8];
         for (int i = 0; i < 8; ++i)
           MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
-        const int UnpackLoMask[] = {0, 0, 1, 1, 2, 2, 3, 3};
-        const int UnpackHiMask[] = {4, 4, 5, 5, 6, 6, 7, 7};
-        if (std::equal(std::begin(MappedMask), std::end(MappedMask),
-                       std::begin(UnpackLoMask)) ||
-            std::equal(std::begin(MappedMask), std::end(MappedMask),
-                       std::begin(UnpackHiMask))) {
+        if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
+            makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) {
           // We can replace all three shuffles with an unpack.
-          V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, D.getOperand(0));
+          V = DAG.getNode(ISD::BITCAST, DL, VT, D.getOperand(0));
           DCI.AddToWorklist(V.getNode());
           return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
                                                 : X86ISD::UNPCKH,
-                             DL, MVT::v8i16, V, V);
+                             DL, VT, V, V);
         }
       }
     }
@@ -22602,9 +20766,9 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
 
   // We're looking for blends between FADD and FSUB nodes. We insist on these
   // nodes being lined up in a specific expected pattern.
-  if (!(isShuffleEquivalent(Mask, 0, 3) ||
-        isShuffleEquivalent(Mask, 0, 5, 2, 7) ||
-        isShuffleEquivalent(Mask, 0, 9, 2, 11, 4, 13, 6, 15)))
+  if (!(isShuffleEquivalent(V1, V2, Mask, {0, 3}) ||
+        isShuffleEquivalent(V1, V2, Mask, {0, 5, 2, 7}) ||
+        isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15})))
     return SDValue();
 
   // Only specific types are legal at this point, assert so we notice if and
@@ -22692,10 +20856,6 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // Only handle 128 wide vector from here on.
-  if (!VT.is128BitVector())
-    return SDValue();
-
   // Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
   // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
   // consecutive, non-overlapping, and in the right order.
@@ -22729,15 +20889,6 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// PerformTruncateCombine - Converts truncate operation to
-/// a sequence of vector shuffle operations.
-/// It is possible when we truncate 256-bit vector to 128-bit vector
-static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
-                                      TargetLowering::DAGCombinerInfo &DCI,
-                                      const X86Subtarget *Subtarget)  {
-  return SDValue();
-}
-
 /// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target
 /// specific shuffle of a load can be folded into a single element load.
 /// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
@@ -22760,7 +20911,8 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
     if (!InVec.hasOneUse())
       return SDValue();
     EVT BCVT = InVec.getOperand(0).getValueType();
-    if (BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
+    if (!BCVT.isVector() ||
+        BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
       return SDValue();
     InVec = InVec.getOperand(0);
   }
@@ -22788,7 +20940,8 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
                                          : InVec.getOperand(1);
 
   // If inputs to shuffle are the same for both ops, then allow 2 uses
-  unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
+  unsigned AllowedUses = InVec.getNumOperands() > 1 &&
+                         InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
 
   if (LdNode.getOpcode() == ISD::BITCAST) {
     // Don't duplicate a load with other uses.
@@ -22833,6 +20986,25 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
                      EltNo);
 }
 
+/// \brief Detect bitcasts between i32 to x86mmx low word. Since MMX types are
+/// special and don't usually play with other vector types, it's better to
+/// handle them early to be sure we emit efficient code by avoiding
+/// store-load conversions.
+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
+  if (N->getValueType(0) != MVT::x86mmx ||
+      N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR ||
+      N->getOperand(0)->getValueType(0) != MVT::v2i32)
+    return SDValue();
+
+  SDValue V = N->getOperand(0);
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
+  if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32)
+    return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(V.getOperand(0)),
+                       N->getValueType(0), V.getOperand(0));
+
+  return SDValue();
+}
+
 /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
 /// generation and convert it from being a bunch of shuffles and extracts
 /// into a somewhat faster sequence. For i686, the best sequence is apparently
@@ -22845,16 +21017,43 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
     return NewOp;
 
   SDValue InputVector = N->getOperand(0);
+  SDLoc dl(InputVector);
+  // Detect mmx to i32 conversion through a v2i32 elt extract.
+  if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
+      N->getValueType(0) == MVT::i32 &&
+      InputVector.getValueType() == MVT::v2i32) {
+
+    // The bitcast source is a direct mmx result.
+    SDValue MMXSrc = InputVector.getNode()->getOperand(0);
+    if (MMXSrc.getValueType() == MVT::x86mmx)
+      return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
+                         N->getValueType(0),
+                         InputVector.getNode()->getOperand(0));
+
+    // The mmx is indirect: (i64 extract_elt (v1i64 bitcast (x86mmx ...))).
+    SDValue MMXSrcOp = MMXSrc.getOperand(0);
+    if (MMXSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT && MMXSrc.hasOneUse() &&
+        MMXSrc.getValueType() == MVT::i64 && MMXSrcOp.hasOneUse() &&
+        MMXSrcOp.getOpcode() == ISD::BITCAST &&
+        MMXSrcOp.getValueType() == MVT::v1i64 &&
+        MMXSrcOp.getOperand(0).getValueType() == MVT::x86mmx)
+      return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
+                         N->getValueType(0),
+                         MMXSrcOp.getOperand(0));
+  }
 
-  // Detect whether we are trying to convert from mmx to i32 and the bitcast
-  // from mmx to v2i32 has a single usage.
-  if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&
-      InputVector.getNode()->getOperand(0).getValueType() == MVT::x86mmx &&
-      InputVector.hasOneUse() && N->getValueType(0) == MVT::i32)
-    return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
-                       N->getValueType(0),
-                       InputVector.getNode()->getOperand(0));
+  EVT VT = N->getValueType(0);
 
+  if (VT == MVT::i1 && dyn_cast<ConstantSDNode>(N->getOperand(1)) &&
+      InputVector.getOpcode() == ISD::BITCAST &&
+      dyn_cast<ConstantSDNode>(InputVector.getOperand(0))) {
+    uint64_t ExtractedElt =
+	  cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    uint64_t InputValue =
+	  cast<ConstantSDNode>(InputVector.getOperand(0))->getZExtValue();
+    uint64_t Res = (InputValue >> ExtractedElt) & 1;
+    return DAG.getConstant(Res, dl, MVT::i1);
+  }
   // Only operate on vectors of 4 elements, where the alternative shuffling
   // gets to be more expensive.
   if (InputVector.getValueType() != MVT::v4i32)
@@ -22900,17 +21099,16 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
   // otherwise bounce the vector off the cache.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Vals[4];
-  SDLoc dl(InputVector);
 
   if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
     SDValue Cst = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, InputVector);
     EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy();
     SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
-      DAG.getConstant(0, VecIdxTy));
+      DAG.getConstant(0, dl, VecIdxTy));
     SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
-      DAG.getConstant(1, VecIdxTy));
+      DAG.getConstant(1, dl, VecIdxTy));
 
-    SDValue ShAmt = DAG.getConstant(32,
+    SDValue ShAmt = DAG.getConstant(32, dl,
       DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64));
     Vals[0] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf);
     Vals[1] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
@@ -22930,7 +21128,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
     // Replace each use (extract) with a load of the appropriate element.
     for (unsigned i = 0; i < 4; ++i) {
       uint64_t Offset = EltSize * i;
-      SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
+      SDValue OffsetVal = DAG.getConstant(Offset, dl, TLI.getPointerTy());
 
       SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
                                        StackPtr, OffsetVal);
@@ -23013,16 +21211,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
     default: break;
     case ISD::SETULT:
     case ISD::SETULE:
-      Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
+      Opc = hasUnsigned ? X86ISD::UMIN : 0u; break;
     case ISD::SETUGT:
     case ISD::SETUGE:
-      Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
+      Opc = hasUnsigned ? X86ISD::UMAX : 0u; break;
     case ISD::SETLT:
     case ISD::SETLE:
-      Opc = hasSigned ? X86ISD::SMIN : 0; break;
+      Opc = hasSigned ? X86ISD::SMIN : 0u; break;
     case ISD::SETGT:
     case ISD::SETGE:
-      Opc = hasSigned ? X86ISD::SMAX : 0; break;
+      Opc = hasSigned ? X86ISD::SMAX : 0u; break;
     }
   // Check for x CC y ? y : x -- a min/max with reversed arms.
   } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
@@ -23031,16 +21229,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
     default: break;
     case ISD::SETULT:
     case ISD::SETULE:
-      Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
+      Opc = hasUnsigned ? X86ISD::UMAX : 0u; break;
     case ISD::SETUGT:
     case ISD::SETUGE:
-      Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
+      Opc = hasUnsigned ? X86ISD::UMIN : 0u; break;
     case ISD::SETLT:
     case ISD::SETLE:
-      Opc = hasSigned ? X86ISD::SMAX : 0; break;
+      Opc = hasSigned ? X86ISD::SMAX : 0u; break;
     case ISD::SETGT:
     case ISD::SETGE:
-      Opc = hasSigned ? X86ISD::SMIN : 0; break;
+      Opc = hasSigned ? X86ISD::SMIN : 0u; break;
     }
   }
 
@@ -23291,21 +21489,21 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
             TrueC->getAPIntValue().isPowerOf2()) {
           if (NeedsCondInvert) // Invert the condition if needed.
             Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
-                               DAG.getConstant(1, Cond.getValueType()));
+                               DAG.getConstant(1, DL, Cond.getValueType()));
 
           // Zero extend the condition if needed.
           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
 
           unsigned ShAmt = TrueC->getAPIntValue().logBase2();
           return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
-                             DAG.getConstant(ShAmt, MVT::i8));
+                             DAG.getConstant(ShAmt, DL, MVT::i8));
         }
 
         // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
         if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
           if (NeedsCondInvert) // Invert the condition if needed.
             Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
-                               DAG.getConstant(1, Cond.getValueType()));
+                               DAG.getConstant(1, DL, Cond.getValueType()));
 
           // Zero extend the condition if needed.
           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
@@ -23340,7 +21538,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
             APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
             if (NeedsCondInvert) // Invert the condition if needed.
               Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
-                                 DAG.getConstant(1, Cond.getValueType()));
+                                 DAG.getConstant(1, DL, Cond.getValueType()));
 
             // Zero extend the condition if needed.
             Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
@@ -23348,7 +21546,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
             // Scale the condition by the difference.
             if (Diff != 1)
               Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
-                                 DAG.getConstant(Diff, Cond.getValueType()));
+                                 DAG.getConstant(Diff, DL,
+                                                 Cond.getValueType()));
 
             // Add the base if non-zero.
             if (FalseC->getAPIntValue() != 0)
@@ -23436,7 +21635,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
                       (-OpRHSConst->getAPIntValue() - 1))
                 return DAG.getNode(
                     X86ISD::SUBUS, DL, VT, OpLHS,
-                    DAG.getConstant(-OpRHSConst->getAPIntValue(), VT));
+                    DAG.getConstant(-OpRHSConst->getAPIntValue(), DL, VT));
 
           // Another special case: If C was a sign bit, the sub has been
           // canonicalized into a xor.
@@ -23450,7 +21649,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
             // don't rely on particular values of undef lanes.
             return DAG.getNode(
                 X86ISD::SUBUS, DL, VT, OpLHS,
-                DAG.getConstant(OpRHSConst->getAPIntValue(), VT));
+                DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT));
         }
     }
   }
@@ -23528,21 +21727,31 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // If we know that this node is legal then we know that it is going to be
-  // matched by one of the SSE/AVX BLEND instructions. These instructions only
-  // depend on the highest bit in each word. Try to use SimplifyDemandedBits
-  // to simplify previous instructions.
+  // We should generate an X86ISD::BLENDI from a vselect if its argument
+  // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of
+  // constants. This specific pattern gets generated when we split a
+  // selector for a 512 bit vector in a machine without AVX512 (but with
+  // 256-bit vectors), during legalization:
+  //
+  // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS)
+  //
+  // Iff we find this pattern and the build_vectors are built from
+  // constants, we translate the vselect into a shuffle_vector that we
+  // know will be matched by LowerVECTOR_SHUFFLEtoBlend.
+  if ((N->getOpcode() == ISD::VSELECT ||
+       N->getOpcode() == X86ISD::SHRUNKBLEND) &&
+      !DCI.isBeforeLegalize() && !VT.is512BitVector()) {
+    SDValue Shuffle = transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
+    if (Shuffle.getNode())
+      return Shuffle;
+  }
+
+  // If this is a *dynamic* select (non-constant condition) and we can match
+  // this node with one of the variable blend instructions, restructure the
+  // condition so that the blends can use the high bit of each element and use
+  // SimplifyDemandedBits to simplify the condition operand.
   if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
       !DCI.isBeforeLegalize() &&
-      // We explicitly check against v8i16 and v16i16 because, although
-      // they're marked as Custom, they might only be legal when Cond is a
-      // build_vector of constants. This will be taken care in a later
-      // condition.
-      (TLI.isOperationLegalOrCustom(ISD::VSELECT, VT) && VT != MVT::v16i16 &&
-       VT != MVT::v8i16) &&
-      // Don't optimize vector of constants. Those are handled by
-      // the generic code and all the bits must be properly set for
-      // the generic optimizer.
       !ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
     unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
 
@@ -23550,6 +21759,31 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
     if (BitWidth == 1)
       return SDValue();
 
+    // We can only handle the cases where VSELECT is directly legal on the
+    // subtarget. We custom lower VSELECT nodes with constant conditions and
+    // this makes it hard to see whether a dynamic VSELECT will correctly
+    // lower, so we both check the operation's status and explicitly handle the
+    // cases where a *dynamic* blend will fail even though a constant-condition
+    // blend could be custom lowered.
+    // FIXME: We should find a better way to handle this class of problems.
+    // Potentially, we should combine constant-condition vselect nodes
+    // pre-legalization into shuffles and not mark as many types as custom
+    // lowered.
+    if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
+      return SDValue();
+    // FIXME: We don't support i16-element blends currently. We could and
+    // should support them by making *all* the bits in the condition be set
+    // rather than just the high bit and using an i8-element blend.
+    if (VT.getScalarType() == MVT::i16)
+      return SDValue();
+    // Dynamic blending was only available from SSE4.1 onward.
+    if (VT.getSizeInBits() == 128 && !Subtarget->hasSSE41())
+      return SDValue();
+    // Byte blends are only available in AVX2
+    if (VT.getSizeInBits() == 256 && VT.getScalarType() == MVT::i8 &&
+        !Subtarget->hasAVX2())
+      return SDValue();
+
     assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
     APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
 
@@ -23598,25 +21832,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // We should generate an X86ISD::BLENDI from a vselect if its argument
-  // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of
-  // constants. This specific pattern gets generated when we split a
-  // selector for a 512 bit vector in a machine without AVX512 (but with
-  // 256-bit vectors), during legalization:
-  //
-  // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS)
-  //
-  // Iff we find this pattern and the build_vectors are built from
-  // constants, we translate the vselect into a shuffle_vector that we
-  // know will be matched by LowerVECTOR_SHUFFLEtoBlend.
-  if ((N->getOpcode() == ISD::VSELECT ||
-       N->getOpcode() == X86ISD::SHRUNKBLEND) &&
-      !DCI.isBeforeLegalize()) {
-    SDValue Shuffle = transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
-    if (Shuffle.getNode())
-      return Shuffle;
-  }
-
   return SDValue();
 }
 
@@ -23752,6 +21967,49 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
   return SDValue();
 }
 
+/// Check whether Cond is an AND/OR of SETCCs off of the same EFLAGS.
+/// Match:
+///   (X86or (X86setcc) (X86setcc))
+///   (X86cmp (and (X86setcc) (X86setcc)), 0)
+static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
+                                           X86::CondCode &CC1, SDValue &Flags,
+                                           bool &isAnd) {
+  if (Cond->getOpcode() == X86ISD::CMP) {
+    ConstantSDNode *CondOp1C = dyn_cast<ConstantSDNode>(Cond->getOperand(1));
+    if (!CondOp1C || !CondOp1C->isNullValue())
+      return false;
+
+    Cond = Cond->getOperand(0);
+  }
+
+  isAnd = false;
+
+  SDValue SetCC0, SetCC1;
+  switch (Cond->getOpcode()) {
+  default: return false;
+  case ISD::AND:
+  case X86ISD::AND:
+    isAnd = true;
+    // fallthru
+  case ISD::OR:
+  case X86ISD::OR:
+    SetCC0 = Cond->getOperand(0);
+    SetCC1 = Cond->getOperand(1);
+    break;
+  };
+
+  // Make sure we have SETCC nodes, using the same flags value.
+  if (SetCC0.getOpcode() != X86ISD::SETCC ||
+      SetCC1.getOpcode() != X86ISD::SETCC ||
+      SetCC0->getOperand(1) != SetCC1->getOperand(1))
+    return false;
+
+  CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0);
+  CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0);
+  Flags = SetCC0->getOperand(1);
+  return true;
+}
+
 /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
 static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI,
@@ -23785,7 +22043,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
       // Extra check as FCMOV only supports a subset of X86 cond.
       (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
     SDValue Ops[] = { FalseOp, TrueOp,
-                      DAG.getConstant(CC, MVT::i8), Flags };
+                      DAG.getConstant(CC, DL, MVT::i8), Flags };
     return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
   }
 
@@ -23807,14 +22065,14 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
       // shift amount.
       if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
         Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
-                           DAG.getConstant(CC, MVT::i8), Cond);
+                           DAG.getConstant(CC, DL, MVT::i8), Cond);
 
         // Zero extend the condition if needed.
         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
 
         unsigned ShAmt = TrueC->getAPIntValue().logBase2();
         Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
-                           DAG.getConstant(ShAmt, MVT::i8));
+                           DAG.getConstant(ShAmt, DL, MVT::i8));
         if (N->getNumValues() == 2)  // Dead flag value?
           return DCI.CombineTo(N, Cond, SDValue());
         return Cond;
@@ -23824,7 +22082,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
       // for any integer data type, including i8/i16.
       if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
         Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
-                           DAG.getConstant(CC, MVT::i8), Cond);
+                           DAG.getConstant(CC, DL, MVT::i8), Cond);
 
         // Zero extend the condition if needed.
         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
@@ -23862,14 +22120,14 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
         if (isFastMultiplier) {
           APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
           Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
-                             DAG.getConstant(CC, MVT::i8), Cond);
+                             DAG.getConstant(CC, DL, MVT::i8), Cond);
           // Zero extend the condition if needed.
           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
                              Cond);
           // Scale the condition by the difference.
           if (Diff != 1)
             Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
-                               DAG.getConstant(Diff, Cond.getValueType()));
+                               DAG.getConstant(Diff, DL, Cond.getValueType()));
 
           // Add the base if non-zero.
           if (FalseC->getAPIntValue() != 0)
@@ -23915,12 +22173,50 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
       if (CC == X86::COND_E &&
           CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
         SDValue Ops[] = { FalseOp, Cond.getOperand(0),
-                          DAG.getConstant(CC, MVT::i8), Cond };
+                          DAG.getConstant(CC, DL, MVT::i8), Cond };
         return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops);
       }
     }
   }
 
+  // Fold and/or of setcc's to double CMOV:
+  //   (CMOV F, T, ((cc1 | cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2)
+  //   (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2)
+  //
+  // This combine lets us generate:
+  //   cmovcc1 (jcc1 if we don't have CMOV)
+  //   cmovcc2 (same)
+  // instead of:
+  //   setcc1
+  //   setcc2
+  //   and/or
+  //   cmovne (jne if we don't have CMOV)
+  // When we can't use the CMOV instruction, it might increase branch
+  // mispredicts.
+  // When we can use CMOV, or when there is no mispredict, this improves
+  // throughput and reduces register pressure.
+  //
+  if (CC == X86::COND_NE) {
+    SDValue Flags;
+    X86::CondCode CC0, CC1;
+    bool isAndSetCC;
+    if (checkBoolTestAndOrSetCCCombine(Cond, CC0, CC1, Flags, isAndSetCC)) {
+      if (isAndSetCC) {
+        std::swap(FalseOp, TrueOp);
+        CC0 = X86::GetOppositeBranchCondition(CC0);
+        CC1 = X86::GetOppositeBranchCondition(CC1);
+      }
+
+      SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, DL, MVT::i8),
+        Flags};
+      SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), LOps);
+      SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, DL, MVT::i8), Flags};
+      SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
+      DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(CMOV.getNode(), 1));
+      return CMOV;
+    }
+  }
+
   return SDValue();
 }
 
@@ -23931,24 +22227,16 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
   default: return SDValue();
   // SSE/AVX/AVX2 blend intrinsics.
   case Intrinsic::x86_avx2_pblendvb:
-  case Intrinsic::x86_avx2_pblendw:
-  case Intrinsic::x86_avx2_pblendd_128:
-  case Intrinsic::x86_avx2_pblendd_256:
     // Don't try to simplify this intrinsic if we don't have AVX2.
     if (!Subtarget->hasAVX2())
       return SDValue();
     // FALL-THROUGH
-  case Intrinsic::x86_avx_blend_pd_256:
-  case Intrinsic::x86_avx_blend_ps_256:
   case Intrinsic::x86_avx_blendv_pd_256:
   case Intrinsic::x86_avx_blendv_ps_256:
     // Don't try to simplify this intrinsic if we don't have AVX.
     if (!Subtarget->hasAVX())
       return SDValue();
     // FALL-THROUGH
-  case Intrinsic::x86_sse41_pblendw:
-  case Intrinsic::x86_sse41_blendpd:
-  case Intrinsic::x86_sse41_blendps:
   case Intrinsic::x86_sse41_blendvps:
   case Intrinsic::x86_sse41_blendvpd:
   case Intrinsic::x86_sse41_pblendvb: {
@@ -24020,8 +22308,9 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
 
     // Replace this packed shift intrinsic with a target independent
     // shift dag node.
-    SDValue Splat = DAG.getConstant(C, VT);
-    return DAG.getNode(ISD::SRA, SDLoc(N), VT, Op0, Splat);
+    SDLoc DL(N);
+    SDValue Splat = DAG.getConstant(C, DL, VT);
+    return DAG.getNode(ISD::SRA, DL, VT, Op0, Splat);
   }
   }
 }
@@ -24035,7 +22324,7 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   EVT VT = N->getValueType(0);
-  if (VT != MVT::i64)
+  if (VT != MVT::i64 && VT != MVT::i32)
     return SDValue();
 
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
@@ -24071,17 +22360,17 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
     SDValue NewMul;
     if (isPowerOf2_64(MulAmt1))
       NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
-                           DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
+                           DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8));
     else
       NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
-                           DAG.getConstant(MulAmt1, VT));
+                           DAG.getConstant(MulAmt1, DL, VT));
 
     if (isPowerOf2_64(MulAmt2))
       NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
-                           DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
+                           DAG.getConstant(Log2_64(MulAmt2), DL, MVT::i8));
     else
       NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
-                           DAG.getConstant(MulAmt2, VT));
+                           DAG.getConstant(MulAmt2, DL, VT));
 
     // Do not add new nodes to DAG combiner worklist.
     DCI.CombineTo(N, NewMul, false);
@@ -24108,9 +22397,11 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
       APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
       APInt ShAmt = N1C->getAPIntValue();
       Mask = Mask.shl(ShAmt);
-      if (Mask != 0)
-        return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                           N00, DAG.getConstant(Mask, VT));
+      if (Mask != 0) {
+        SDLoc DL(N);
+        return DAG.getNode(ISD::AND, DL, VT,
+                           N00, DAG.getConstant(Mask, DL, VT));
+      }
     }
   }
 
@@ -24240,7 +22531,8 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
           unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
           if (Subtarget->hasAVX512()) {
             SDValue FSetCC = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00,
-                                         CMP01, DAG.getConstant(x86cc, MVT::i8));
+                                         CMP01,
+                                         DAG.getConstant(x86cc, DL, MVT::i8));
             if (N->getValueType(0) != MVT::i1)
               return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0),
                                  FSetCC);
@@ -24248,7 +22540,8 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
           }
           SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL,
                                               CMP00.getValueType(), CMP00, CMP01,
-                                              DAG.getConstant(x86cc, MVT::i8));
+                                              DAG.getConstant(x86cc, DL,
+                                                              MVT::i8));
 
           bool is64BitFP = (CMP00.getValueType() == MVT::f64);
           MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
@@ -24264,14 +22557,16 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
             SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32,
                                            Vector64);
             OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
-                                        Vector32, DAG.getIntPtrConstant(0));
+                                        Vector32, DAG.getIntPtrConstant(0, DL));
             IntVT = MVT::i32;
           }
 
-          SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, OnesOrZeroesF);
+          SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT,
+                                              OnesOrZeroesF);
           SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
-                                      DAG.getConstant(1, IntVT));
-          SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
+                                      DAG.getConstant(1, DL, IntVT));
+          SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
+                                              ANDed);
           return OneBitOfTruth;
         }
       }
@@ -24383,7 +22678,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
     APInt Mask = APInt::getAllOnesValue(InBits);
     Mask = Mask.zext(VT.getScalarType().getSizeInBits());
     return DAG.getNode(ISD::AND, DL, VT,
-                       Op, DAG.getConstant(Mask, VT));
+                       Op, DAG.getConstant(Mask, DL, VT));
   }
   case ISD::SIGN_EXTEND:
     return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
@@ -24393,24 +22688,116 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
   }
 }
 
+static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const X86Subtarget *Subtarget) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDLoc DL(N);
+
+  // A vector zext_in_reg may be represented as a shuffle,
+  // feeding into a bitcast (this represents anyext) feeding into
+  // an and with a mask.
+  // We'd like to try to combine that into a shuffle with zero
+  // plus a bitcast, removing the and.
+  if (N0.getOpcode() != ISD::BITCAST ||
+      N0.getOperand(0).getOpcode() != ISD::VECTOR_SHUFFLE)
+    return SDValue();
+
+  // The other side of the AND should be a splat of 2^C, where C
+  // is the number of bits in the source type.
+  if (N1.getOpcode() == ISD::BITCAST)
+    N1 = N1.getOperand(0);
+  if (N1.getOpcode() != ISD::BUILD_VECTOR)
+    return SDValue();
+  BuildVectorSDNode *Vector = cast<BuildVectorSDNode>(N1);
+
+  ShuffleVectorSDNode *Shuffle = cast<ShuffleVectorSDNode>(N0.getOperand(0));
+  EVT SrcType = Shuffle->getValueType(0);
+
+  // We expect a single-source shuffle
+  if (Shuffle->getOperand(1)->getOpcode() != ISD::UNDEF)
+    return SDValue();
+
+  unsigned SrcSize = SrcType.getScalarSizeInBits();
+
+  APInt SplatValue, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (!Vector->isConstantSplat(SplatValue, SplatUndef,
+                                SplatBitSize, HasAnyUndefs))
+    return SDValue();
+
+  unsigned ResSize = N1.getValueType().getScalarSizeInBits();
+  // Make sure the splat matches the mask we expect
+  if (SplatBitSize > ResSize ||
+      (SplatValue + 1).exactLogBase2() != (int)SrcSize)
+    return SDValue();
+
+  // Make sure the input and output size make sense
+  if (SrcSize >= ResSize || ResSize % SrcSize)
+    return SDValue();
+
+  // We expect a shuffle of the form <0, u, u, u, 1, u, u, u...>
+  // The number of u's between each two values depends on the ratio between
+  // the source and dest type.
+  unsigned ZextRatio = ResSize / SrcSize;
+  bool IsZext = true;
+  for (unsigned i = 0; i < SrcType.getVectorNumElements(); ++i) {
+    if (i % ZextRatio) {
+      if (Shuffle->getMaskElt(i) > 0) {
+        // Expected undef
+        IsZext = false;
+        break;
+      }
+    } else {
+      if (Shuffle->getMaskElt(i) != (int)(i / ZextRatio)) {
+        // Expected element number
+        IsZext = false;
+        break;
+      }
+    }
+  }
+
+  if (!IsZext)
+    return SDValue();
+
+  // Ok, perform the transformation - replace the shuffle with
+  // a shuffle of the form <0, k, k, k, 1, k, k, k> with zero
+  // (instead of undef) where the k elements come from the zero vector.
+  SmallVector<int, 8> Mask;
+  unsigned NumElems = SrcType.getVectorNumElements();
+  for (unsigned i = 0; i < NumElems; ++i)
+    if (i % ZextRatio)
+      Mask.push_back(NumElems);
+    else
+      Mask.push_back(i / ZextRatio);
+
+  SDValue NewShuffle = DAG.getVectorShuffle(Shuffle->getValueType(0), DL,
+    Shuffle->getOperand(0), DAG.getConstant(0, DL, SrcType), Mask);
+  return DAG.getNode(ISD::BITCAST, DL, N0.getValueType(), NewShuffle);
+}
+
 static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const X86Subtarget *Subtarget) {
-  EVT VT = N->getValueType(0);
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
-  if (R.getNode())
+  if (SDValue Zext = VectorZextCombine(N, DAG, DCI, Subtarget))
+    return Zext;
+
+  if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget))
     return R;
 
+  EVT VT = N->getValueType(0);
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDLoc DL(N);
+
   // Create BEXTR instructions
   // BEXTR is ((X >> imm) & (2**size-1))
   if (VT == MVT::i32 || VT == MVT::i64) {
-    SDValue N0 = N->getOperand(0);
-    SDValue N1 = N->getOperand(1);
-    SDLoc DL(N);
-
     // Check for BEXTR.
     if ((Subtarget->hasBMI() || Subtarget->hasTBM()) &&
         (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) {
@@ -24420,10 +22807,11 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
         uint64_t Mask = MaskNode->getZExtValue();
         uint64_t Shift = ShiftNode->getZExtValue();
         if (isMask_64(Mask)) {
-          uint64_t MaskSize = CountPopulation_64(Mask);
+          uint64_t MaskSize = countPopulation(Mask);
           if (Shift + MaskSize <= VT.getSizeInBits())
             return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0),
-                               DAG.getConstant(Shift | (MaskSize << 8), VT));
+                               DAG.getConstant(Shift | (MaskSize << 8), DL,
+                                               VT));
         }
       }
     } // BEXTR
@@ -24438,10 +22826,6 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
   if (VT != MVT::v2i64 && VT != MVT::v4i64)
     return SDValue();
 
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-  SDLoc DL(N);
-
   // Check LHS for vnot
   if (N0.getOpcode() == ISD::XOR &&
       //ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
@@ -24553,8 +22937,8 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
 
   // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
   MachineFunction &MF = DAG.getMachineFunction();
-  bool OptForSize = MF.getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptForSize =
+      MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
 
   // SHLD/SHRD instructions have lower register pressure, but on some
   // platforms they have higher latency than the equivalent
@@ -24642,10 +23026,10 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
       if (Y1C->getAPIntValue() == VT.getSizeInBits()-1) {
         // Generate SUB & CMOV.
         SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
-                                  DAG.getConstant(0, VT), N0.getOperand(0));
+                                  DAG.getConstant(0, DL, VT), N0.getOperand(0));
 
         SDValue Ops[] = { N0.getOperand(0), Neg,
-                          DAG.getConstant(X86::COND_GE, MVT::i8),
+                          DAG.getConstant(X86::COND_GE, DL, MVT::i8),
                           SDValue(Neg.getNode(), 1) };
         return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), Ops);
       }
@@ -24690,7 +23074,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
       return SDValue();
 
     SDValue Ptr = Ld->getBasePtr();
-    SDValue Increment = DAG.getConstant(16, TLI.getPointerTy());
+    SDValue Increment = DAG.getConstant(16, dl, TLI.getPointerTy());
 
     EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
                                   NumElems/2);
@@ -24725,7 +23109,6 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   EVT VT = Mld->getValueType(0);
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   unsigned NumElems = VT.getVectorNumElements();
   EVT LdVT = Mld->getMemoryVT();
   SDLoc dl(Mld);
@@ -24753,7 +23136,8 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
       ShuffleVec[i] = i * SizeRatio;
 
     // Can't shuffle using an illegal type.
-    assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
+    assert (DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT)
+	    && "WideVecVT should be legal");
     WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
                                     DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
   }
@@ -24769,7 +23153,7 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
     for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
       ShuffleVec[i] = NumElems*SizeRatio;
     NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
-                                   DAG.getConstant(0, WideVecVT),
+                                   DAG.getConstant(0, dl, WideVecVT),
                                    &ShuffleVec[0]);
   }
   else {
@@ -24781,14 +23165,14 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
 
     unsigned NumConcat = WidenNumElts / MaskNumElts;
     SmallVector<SDValue, 16> Ops(NumConcat);
-    SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
+    SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType());
     Ops[0] = Mask;
     for (unsigned i = 1; i != NumConcat; ++i)
       Ops[i] = ZeroVal;
 
     NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
   }
-  
+
   SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
                                      Mld->getBasePtr(), NewMask, WideSrc0,
                                      Mld->getMemoryVT(), Mld->getMemOperand(),
@@ -24805,7 +23189,6 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   EVT VT = Mst->getValue().getValueType();
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   unsigned NumElems = VT.getVectorNumElements();
   EVT StVT = Mst->getMemoryVT();
   SDLoc dl(Mst);
@@ -24819,7 +23202,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
     "Unexpected size for truncating masked store");
   // We are going to use the original vector elt for storing.
   // Accumulated smaller vector elements must be a multiple of the store size.
-  assert (((NumElems * FromSz) % ToSz) == 0 && 
+  assert (((NumElems * FromSz) % ToSz) == 0 &&
           "Unexpected ratio for truncating masked store");
 
   unsigned SizeRatio  = FromSz / ToSz;
@@ -24837,7 +23220,8 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
     ShuffleVec[i] = i * SizeRatio;
 
   // Can't shuffle using an illegal type.
-  assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
+  assert (DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT)
+	  && "WideVecVT should be legal");
 
   SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
                                         DAG.getUNDEF(WideVecVT),
@@ -24853,7 +23237,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
     for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
       ShuffleVec[i] = NumElems*SizeRatio;
     NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
-                                   DAG.getConstant(0, WideVecVT),
+                                   DAG.getConstant(0, dl, WideVecVT),
                                    &ShuffleVec[0]);
   }
   else {
@@ -24865,7 +23249,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
 
     unsigned NumConcat = WidenNumElts / MaskNumElts;
     SmallVector<SDValue, 16> Ops(NumConcat);
-    SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
+    SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType());
     Ops[0] = Mask;
     for (unsigned i = 1; i != NumConcat; ++i)
       Ops[i] = ZeroVal;
@@ -24899,7 +23283,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
     SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
 
-    SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
+    SDValue Stride = DAG.getConstant(16, dl, TLI.getPointerTy());
     SDValue Ptr0 = St->getBasePtr();
     SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride);
 
@@ -24972,7 +23356,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
     SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff);
     SmallVector<SDValue, 8> Chains;
-    SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+    SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, dl,
                                         TLI.getPointerTy());
     SDValue Ptr = St->getBasePtr();
 
@@ -24980,7 +23364,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) {
       SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                                    StoreType, ShuffWide,
-                                   DAG.getIntPtrConstant(i));
+                                   DAG.getIntPtrConstant(i, dl));
       SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr,
                                 St->getPointerInfo(), St->isVolatile(),
                                 St->isNonTemporal(), St->getAlignment());
@@ -25001,10 +23385,9 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   const Function *F = DAG.getMachineFunction().getFunction();
-  bool NoImplicitFloatOps = F->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
-  bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
-                     && Subtarget->hasSSE2();
+  bool NoImplicitFloatOps = F->hasFnAttribute(Attribute::NoImplicitFloat);
+  bool F64IsLegal =
+      !Subtarget->useSoftFloat() && !NoImplicitFloatOps && Subtarget->hasSSE2();
   if ((VT.isVector() ||
        (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
       isa<LoadSDNode>(St->getValue()) &&
@@ -25065,7 +23448,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     // Otherwise, lower to two pairs of 32-bit loads / stores.
     SDValue LoAddr = Ld->getBasePtr();
     SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
-                                 DAG.getConstant(4, MVT::i32));
+                                 DAG.getConstant(4, LdDL, MVT::i32));
 
     SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
                                Ld->getPointerInfo(),
@@ -25086,7 +23469,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
 
     LoAddr = St->getBasePtr();
     HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
-                         DAG.getConstant(4, MVT::i32));
+                         DAG.getConstant(4, StDL, MVT::i32));
 
     SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
                                 St->getPointerInfo(),
@@ -25099,6 +23482,27 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
                                 MinAlign(St->getAlignment(), 4));
     return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
   }
+
+  // This is similar to the above case, but here we handle a scalar 64-bit
+  // integer store that is extracted from a vector on a 32-bit target.
+  // If we have SSE2, then we can treat it like a floating-point double
+  // to get past legalization. The execution dependencies fixup pass will
+  // choose the optimal machine instruction for the store if this really is
+  // an integer or v2f32 rather than an f64.
+  if (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit() &&
+      St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+    SDValue OldExtract = St->getOperand(1);
+    SDValue ExtOp0 = OldExtract.getOperand(0);
+    unsigned VecSize = ExtOp0.getValueSizeInBits();
+    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64);
+    SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtOp0);
+    SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+                                     BitCast, OldExtract.getOperand(1));
+    return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(),
+                        St->getPointerInfo(), St->isVolatile(),
+                        St->isNonTemporal(), St->getAlignment());
+  }
+
   return SDValue();
 }
 
@@ -25197,7 +23601,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
   // If A and B occur in reverse order in RHS, then "swap" them (which means
   // rewriting the mask).
   if (A != C)
-    CommuteVectorShuffleMask(RMask, NumElts);
+    ShuffleVectorSDNode::commuteMask(RMask);
 
   // At this point LHS and RHS are equivalent to
   //   LHS = VECTOR_SHUFFLE A, B, LMask
@@ -25261,11 +23665,13 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
 /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
 static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
   assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
+
   // F[X]OR(0.0, x) -> x
-  // F[X]OR(x, 0.0) -> x
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
     if (C->getValueAPF().isPosZero())
       return N->getOperand(1);
+
+  // F[X]OR(x, 0.0) -> x
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
     if (C->getValueAPF().isPosZero())
       return N->getOperand(0);
@@ -25296,26 +23702,30 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
 /// Do target-specific dag combines on X86ISD::FAND nodes.
 static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
   // FAND(0.0, x) -> 0.0
-  // FAND(x, 0.0) -> 0.0
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
     if (C->getValueAPF().isPosZero())
       return N->getOperand(0);
+
+  // FAND(x, 0.0) -> 0.0
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
     if (C->getValueAPF().isPosZero())
       return N->getOperand(1);
+
   return SDValue();
 }
 
 /// Do target-specific dag combines on X86ISD::FANDN nodes
 static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
-  // FANDN(x, 0.0) -> 0.0
   // FANDN(0.0, x) -> x
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
     if (C->getValueAPF().isPosZero())
       return N->getOperand(1);
+
+  // FANDN(x, 0.0) -> 0.0
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
     if (C->getValueAPF().isPosZero())
       return N->getOperand(1);
+
   return SDValue();
 }
 
@@ -25391,23 +23801,76 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
                                   const X86Subtarget *Subtarget) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
+  EVT SVT = VT.getScalarType();
+  EVT InVT = N0->getValueType(0);
+  EVT InSVT = InVT.getScalarType();
+  SDLoc DL(N);
 
   // (i8,i32 sext (sdivrem (i8 x, i8 y)) ->
   // (i8,i32 (sdivrem_sext_hreg (i8 x, i8 y)
   // This exposes the sext to the sdivrem lowering, so that it directly extends
   // from AH (which we otherwise need to do contortions to access).
   if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 &&
-      N0.getValueType() == MVT::i8 && VT == MVT::i32) {
-    SDLoc dl(N);
+      InVT == MVT::i8 && VT == MVT::i32) {
     SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
-    SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, dl, NodeTys,
+    SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, DL, NodeTys,
                             N0.getOperand(0), N0.getOperand(1));
     DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
     return R.getValue(1);
   }
 
-  if (!DCI.isBeforeLegalizeOps())
+  if (!DCI.isBeforeLegalizeOps()) {
+    if (N0.getValueType() == MVT::i1) {
+      SDValue Zero = DAG.getConstant(0, DL, VT);
+      SDValue AllOnes =
+        DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
+      return DAG.getNode(ISD::SELECT, DL, VT, N0, AllOnes, Zero);
+    }
     return SDValue();
+  }
+
+  if (VT.isVector()) {
+    auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) {
+      EVT InVT = N->getValueType(0);
+      EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
+                                   128 / InVT.getScalarSizeInBits());
+      SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(),
+                                    DAG.getUNDEF(InVT));
+      Opnds[0] = N;
+      return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
+    };
+
+    // If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG
+    // which ensures lowering to X86ISD::VSEXT (pmovsx*).
+    if (VT.getSizeInBits() == 128 &&
+        (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
+        (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
+      SDValue ExOp = ExtendToVec128(DL, N0);
+      return DAG.getSignExtendVectorInReg(ExOp, DL, VT);
+    }
+
+    // On pre-AVX2 targets, split into 128-bit nodes of
+    // ISD::SIGN_EXTEND_VECTOR_INREG.
+    if (!Subtarget->hasInt256() && !(VT.getSizeInBits() % 128) &&
+        (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
+        (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
+      unsigned NumVecs = VT.getSizeInBits() / 128;
+      unsigned NumSubElts = 128 / SVT.getSizeInBits();
+      EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
+      EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
+
+      SmallVector<SDValue, 8> Opnds;
+      for (unsigned i = 0, Offset = 0; i != NumVecs;
+           ++i, Offset += NumSubElts) {
+        SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
+                                     DAG.getIntPtrConstant(Offset, DL));
+        SrcVec = ExtendToVec128(DL, SrcVec);
+        SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT);
+        Opnds.push_back(SrcVec);
+      }
+      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
+    }
+  }
 
   if (!Subtarget->hasFp256())
     return SDValue();
@@ -25483,7 +23946,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
       return DAG.getNode(ISD::AND, dl, VT,
                          DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
                                      N00.getOperand(0), N00.getOperand(1)),
-                         DAG.getConstant(1, VT));
+                         DAG.getConstant(1, dl, VT));
     }
   }
 
@@ -25495,7 +23958,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
       return DAG.getNode(ISD::AND, dl, VT,
                          DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
                                      N00.getOperand(0), N00.getOperand(1)),
-                         DAG.getConstant(1, VT));
+                         DAG.getConstant(1, dl, VT));
     }
   }
   if (VT.is256BitVector()) {
@@ -25534,18 +23997,18 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
   if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
       if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
-        SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), LHS.getValueType(), RHS,
+        SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS,
                                    LHS.getOperand(1));
-        return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
-                            DAG.getConstant(0, addV.getValueType()), CC);
+        return DAG.getSetCC(DL, N->getValueType(0), addV,
+                            DAG.getConstant(0, DL, addV.getValueType()), CC);
       }
   if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
       if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
-        SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), RHS.getValueType(), LHS,
+        SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS,
                                    RHS.getOperand(1));
-        return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
-                            DAG.getConstant(0, addV.getValueType()), CC);
+        return DAG.getSetCC(DL, N->getValueType(0), addV,
+                            DAG.getConstant(0, DL, addV.getValueType()), CC);
       }
 
   if (VT.getScalarType() == MVT::i1 &&
@@ -25569,12 +24032,12 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
       assert(VT == LHS.getOperand(0).getValueType() &&
              "Uexpected operand type");
       if (CC == ISD::SETGT)
-        return DAG.getConstant(0, VT);
+        return DAG.getConstant(0, DL, VT);
       if (CC == ISD::SETLE)
-        return DAG.getConstant(1, VT);
+        return DAG.getConstant(1, DL, VT);
       if (CC == ISD::SETEQ || CC == ISD::SETGE)
         return DAG.getNOT(DL, LHS.getOperand(0), VT);
-      
+
       assert((CC == ISD::SETNE || CC == ISD::SETLT) &&
              "Unexpected condition code!");
       return LHS.getOperand(0);
@@ -25584,6 +24047,24 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static SDValue NarrowVectorLoadToElement(LoadSDNode *Load, unsigned Index,
+                                         SelectionDAG &DAG) {
+  SDLoc dl(Load);
+  MVT VT = Load->getSimpleValueType(0);
+  MVT EVT = VT.getVectorElementType();
+  SDValue Addr = Load->getOperand(1);
+  SDValue NewAddr = DAG.getNode(
+      ISD::ADD, dl, Addr.getSimpleValueType(), Addr,
+      DAG.getConstant(Index * EVT.getStoreSize(), dl,
+                      Addr.getSimpleValueType()));
+
+  SDValue NewLoad =
+      DAG.getLoad(EVT, dl, Load->getChain(), NewAddr,
+                  DAG.getMachineFunction().getMachineMemOperand(
+                      Load->getMemOperand(), 0, EVT.getStoreSize()));
+  return NewLoad;
+}
+
 static SDValue PerformINSERTPSCombine(SDNode *N, SelectionDAG &DAG,
                                       const X86Subtarget *Subtarget) {
   SDLoc dl(N);
@@ -25595,20 +24076,47 @@ static SDValue PerformINSERTPSCombine(SDNode *N, SelectionDAG &DAG,
   if (MayFoldLoad(Ld)) {
     // Extract the countS bits from the immediate so we can get the proper
     // address when narrowing the vector load to a specific element.
-    // When the second source op is a memory address, interps doesn't use
+    // When the second source op is a memory address, insertps doesn't use
     // countS and just gets an f32 from that address.
     unsigned DestIndex =
         cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() >> 6;
+
     Ld = NarrowVectorLoadToElement(cast<LoadSDNode>(Ld), DestIndex, DAG);
-  } else
-    return SDValue();
 
-  // Create this as a scalar to vector to match the instruction pattern.
-  SDValue LoadScalarToVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Ld);
-  // countS bits are ignored when loading from memory on insertps, which
-  // means we don't need to explicitly set them to 0.
-  return DAG.getNode(X86ISD::INSERTPS, dl, VT, N->getOperand(0),
-                     LoadScalarToVector, N->getOperand(2));
+    // Create this as a scalar to vector to match the instruction pattern.
+    SDValue LoadScalarToVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Ld);
+    // countS bits are ignored when loading from memory on insertps, which
+    // means we don't need to explicitly set them to 0.
+    return DAG.getNode(X86ISD::INSERTPS, dl, VT, N->getOperand(0),
+                       LoadScalarToVector, N->getOperand(2));
+  }
+  return SDValue();
+}
+
+static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
+  SDValue V0 = N->getOperand(0);
+  SDValue V1 = N->getOperand(1);
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+
+  // Canonicalize a v2f64 blend with a mask of 2 by swapping the vector
+  // operands and changing the mask to 1. This saves us a bunch of
+  // pattern-matching possibilities related to scalar math ops in SSE/AVX.
+  // x86InstrInfo knows how to commute this back after instruction selection
+  // if it would help register allocation.
+
+  // TODO: If optimizing for size or a processor that doesn't suffer from
+  // partial register update stalls, this should be transformed into a MOVSD
+  // instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.
+
+  if (VT == MVT::v2f64)
+    if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+      if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) {
+        SDValue NewMask = DAG.getConstant(1, DL, MVT::i8);
+        return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask);
+      }
+
+  return SDValue();
 }
 
 // Helper function of PerformSETCCCombine. It is to materialize "setb reg"
@@ -25619,12 +24127,14 @@ static SDValue MaterializeSETB(SDLoc DL, SDValue EFLAGS, SelectionDAG &DAG,
   if (VT == MVT::i8)
     return DAG.getNode(ISD::AND, DL, VT,
                        DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
-                                   DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS),
-                       DAG.getConstant(1, VT));
+                                   DAG.getConstant(X86::COND_B, DL, MVT::i8),
+                                   EFLAGS),
+                       DAG.getConstant(1, DL, VT));
   assert (VT == MVT::i1 && "Unexpected type for SECCC node");
   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1,
                      DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
-                                 DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS));
+                                 DAG.getConstant(X86::COND_B, DL, MVT::i8),
+                                 EFLAGS));
 }
 
 // Optimize  RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
@@ -25663,7 +24173,7 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
 
   Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
   if (Flags.getNode()) {
-    SDValue Cond = DAG.getConstant(CC, MVT::i8);
+    SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
     return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
   }
 
@@ -25685,7 +24195,7 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
 
   Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
   if (Flags.getNode()) {
-    SDValue Cond = DAG.getConstant(CC, MVT::i8);
+    SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
     return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
                        Flags);
   }
@@ -25740,7 +24250,7 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
 }
 
 static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
-                                        const X86TargetLowering *XTLI) {
+                                        const X86Subtarget *Subtarget) {
   // First try to optimize away the conversion entirely when it's
   // conditionally from a constant. Vectors only.
   SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
@@ -25764,12 +24274,16 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
   if (Op0.getOpcode() == ISD::LOAD) {
     LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
     EVT VT = Ld->getValueType(0);
+
+    // This transformation is not supported if the result type is f16
+    if (N->getValueType(0) == MVT::f16)
+      return SDValue();
+
     if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
         ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
-        !XTLI->getSubtarget()->is64Bit() &&
-        VT == MVT::i64) {
-      SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
-                                          Ld->getChain(), Op0, DAG);
+        !Subtarget->is64Bit() && VT == MVT::i64) {
+      SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD(
+          SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG);
       DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
       return FILDChain;
     }
@@ -25790,12 +24304,13 @@ static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
       SDValue(N, 1).use_empty()) {
     SDLoc DL(N);
     EVT VT = N->getValueType(0);
-    SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
+    SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1));
     SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
                                DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
-                                           DAG.getConstant(X86::COND_B,MVT::i8),
+                                           DAG.getConstant(X86::COND_B, DL,
+                                                           MVT::i8),
                                            N->getOperand(2)),
-                               DAG.getConstant(1, VT));
+                               DAG.getConstant(1, DL, VT));
     return DCI.CombineTo(N, Res1, CarryOut);
   }
 
@@ -25830,16 +24345,17 @@ static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
 
   SDValue CmpOp0 = Cmp.getOperand(0);
   SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
-                               DAG.getConstant(1, CmpOp0.getValueType()));
+                               DAG.getConstant(1, DL, CmpOp0.getValueType()));
 
   SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
   if (CC == X86::COND_NE)
     return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
                        DL, OtherVal.getValueType(), OtherVal,
-                       DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
+                       DAG.getConstant(-1ULL, DL, OtherVal.getValueType()),
+                       NewCmp);
   return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
                      DL, OtherVal.getValueType(), OtherVal,
-                     DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
+                     DAG.getConstant(0, DL, OtherVal.getValueType()), NewCmp);
 }
 
 /// PerformADDCombine - Do target-specific dag combines on integer adds.
@@ -25875,9 +24391,9 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
       EVT VT = Op0.getValueType();
       SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT,
                                    Op1.getOperand(0),
-                                   DAG.getConstant(~XorC, VT));
+                                   DAG.getConstant(~XorC, SDLoc(Op1), VT));
       return DAG.getNode(ISD::ADD, SDLoc(N), VT, NewXor,
-                         DAG.getConstant(C->getAPIntValue()+1, VT));
+                         DAG.getConstant(C->getAPIntValue() + 1, SDLoc(N), VT));
     }
   }
 
@@ -25947,7 +24463,7 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
           OrigVT = MVT::getVectorVT(OrigVT.getVectorElementType(),
                                     OrigVT.getVectorNumElements() / Ratio);
           OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV,
-                              DAG.getIntPtrConstant(0));
+                              DAG.getIntPtrConstant(0, DL));
         }
         Op = DAG.getNode(ISD::BITCAST, DL, OpVT, OrigV);
         return DAG.getNode(X86ISD::VZEXT, DL, VT, Op);
@@ -25968,6 +24484,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SELECT:
   case X86ISD::SHRUNKBLEND:
     return PerformSELECTCombine(N, DAG, DCI, Subtarget);
+  case ISD::BITCAST:        return PerformBITCASTCombine(N, DAG);
   case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI, Subtarget);
   case ISD::ADD:            return PerformAddCombine(N, DAG, Subtarget);
   case ISD::SUB:            return PerformSubCombine(N, DAG, Subtarget);
@@ -25983,7 +24500,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::MLOAD:          return PerformMLOADCombine(N, DAG, DCI, Subtarget);
   case ISD::STORE:          return PerformSTORECombine(N, DAG, Subtarget);
   case ISD::MSTORE:         return PerformMSTORECombine(N, DAG, Subtarget);
-  case ISD::SINT_TO_FP:     return PerformSINT_TO_FPCombine(N, DAG, this);
+  case ISD::SINT_TO_FP:     return PerformSINT_TO_FPCombine(N, DAG, Subtarget);
   case ISD::FADD:           return PerformFADDCombine(N, DAG, Subtarget);
   case ISD::FSUB:           return PerformFSUBCombine(N, DAG, Subtarget);
   case X86ISD::FXOR:
@@ -25999,7 +24516,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SIGN_EXTEND:    return PerformSExtCombine(N, DAG, DCI, Subtarget);
   case ISD::SIGN_EXTEND_INREG:
     return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
-  case ISD::TRUNCATE:       return PerformTruncateCombine(N, DAG,DCI,Subtarget);
   case ISD::SETCC:          return PerformISDSETCCCombine(N, DAG, Subtarget);
   case X86ISD::SETCC:       return PerformSETCCCombine(N, DAG, DCI, Subtarget);
   case X86ISD::BRCOND:      return PerformBrCondCombine(N, DAG, DCI, Subtarget);
@@ -26022,9 +24538,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::FMA:            return PerformFMACombine(N, DAG, Subtarget);
   case ISD::INTRINSIC_WO_CHAIN:
     return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget);
-  case X86ISD::INSERTPS:
-    return PerformINSERTPSCombine(N, DAG, Subtarget);
-  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DAG, Subtarget);
+  case X86ISD::INSERTPS: {
+    if (getTargetMachine().getOptLevel() > CodeGenOpt::None)
+      return PerformINSERTPSCombine(N, DAG, Subtarget);
+    break;
+  }
+  case X86ISD::BLENDI:    return PerformBLENDICombine(N, DAG);
   }
 
   return SDValue();
@@ -26131,27 +24650,23 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
 //                           X86 Inline Assembly Support
 //===----------------------------------------------------------------------===//
 
-namespace {
-  // Helper to match a string separated by whitespace.
-  bool matchAsmImpl(StringRef s, ArrayRef<const StringRef *> args) {
-    s = s.substr(s.find_first_not_of(" \t")); // Skip leading whitespace.
+// Helper to match a string separated by whitespace.
+static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) {
+  S = S.substr(S.find_first_not_of(" \t")); // Skip leading whitespace.
 
-    for (unsigned i = 0, e = args.size(); i != e; ++i) {
-      StringRef piece(*args[i]);
-      if (!s.startswith(piece)) // Check if the piece matches.
-        return false;
-
-      s = s.substr(piece.size());
-      StringRef::size_type pos = s.find_first_not_of(" \t");
-      if (pos == 0) // We matched a prefix.
-        return false;
+  for (StringRef Piece : Pieces) {
+    if (!S.startswith(Piece)) // Check if the piece matches.
+      return false;
 
-      s = s.substr(pos);
-    }
+    S = S.substr(Piece.size());
+    StringRef::size_type Pos = S.find_first_not_of(" \t");
+    if (Pos == 0) // We matched a prefix.
+      return false;
 
-    return s.empty();
+    S = S.substr(Pos);
   }
-  const VariadicFunction1<bool, StringRef, StringRef, matchAsmImpl> matchAsm={};
+
+  return S.empty();
 }
 
 static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {
@@ -26191,12 +24706,12 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
     // ops instead of emitting the bswap asm.  For now, we don't support 486 or
     // lower so don't worry about this.
     // bswap $0
-    if (matchAsm(AsmPieces[0], "bswap", "$0") ||
-        matchAsm(AsmPieces[0], "bswapl", "$0") ||
-        matchAsm(AsmPieces[0], "bswapq", "$0") ||
-        matchAsm(AsmPieces[0], "bswap", "${0:q}") ||
-        matchAsm(AsmPieces[0], "bswapl", "${0:q}") ||
-        matchAsm(AsmPieces[0], "bswapq", "${0:q}")) {
+    if (matchAsm(AsmPieces[0], {"bswap", "$0"}) ||
+        matchAsm(AsmPieces[0], {"bswapl", "$0"}) ||
+        matchAsm(AsmPieces[0], {"bswapq", "$0"}) ||
+        matchAsm(AsmPieces[0], {"bswap", "${0:q}"}) ||
+        matchAsm(AsmPieces[0], {"bswapl", "${0:q}"}) ||
+        matchAsm(AsmPieces[0], {"bswapq", "${0:q}"})) {
       // No need to check constraints, nothing other than the equivalent of
       // "=r,0" would be valid here.
       return IntrinsicLowering::LowerToByteSwap(CI);
@@ -26205,8 +24720,8 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
     // rorw $$8, ${0:w}  -->  llvm.bswap.i16
     if (CI->getType()->isIntegerTy(16) &&
         IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
-        (matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") ||
-         matchAsm(AsmPieces[0], "rolw", "$$8,", "${0:w}"))) {
+        (matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) ||
+         matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) {
       AsmPieces.clear();
       const std::string &ConstraintsStr = IA->getConstraintString();
       SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
@@ -26218,9 +24733,9 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
   case 3:
     if (CI->getType()->isIntegerTy(32) &&
         IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
-        matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") &&
-        matchAsm(AsmPieces[1], "rorl", "$$16,", "$0") &&
-        matchAsm(AsmPieces[2], "rorw", "$$8,", "${0:w}")) {
+        matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) &&
+        matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) &&
+        matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) {
       AsmPieces.clear();
       const std::string &ConstraintsStr = IA->getConstraintString();
       SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
@@ -26235,9 +24750,9 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
           Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
           Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
         // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
-        if (matchAsm(AsmPieces[0], "bswap", "%eax") &&
-            matchAsm(AsmPieces[1], "bswap", "%edx") &&
-            matchAsm(AsmPieces[2], "xchgl", "%eax,", "%edx"))
+        if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) &&
+            matchAsm(AsmPieces[1], {"bswap", "%edx"}) &&
+            matchAsm(AsmPieces[2], {"xchgl", "%eax,", "%edx"}))
           return IntrinsicLowering::LowerToByteSwap(CI);
       }
     }
@@ -26373,7 +24888,7 @@ TargetLowering::ConstraintWeight
     break;
   case 'G':
   case 'C':
-    if (dyn_cast<ConstantFP>(CallOperandVal)) {
+    if (isa<ConstantFP>(CallOperandVal)) {
       weight = CW_Constant;
     }
     break;
@@ -26428,7 +24943,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   case 'I':
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (C->getZExtValue() <= 31) {
-        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
+                                       Op.getValueType());
         break;
       }
     }
@@ -26436,7 +24952,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   case 'J':
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (C->getZExtValue() <= 63) {
-        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
+                                       Op.getValueType());
         break;
       }
     }
@@ -26444,7 +24961,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   case 'K':
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (isInt<8>(C->getSExtValue())) {
-        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
+                                       Op.getValueType());
         break;
       }
     }
@@ -26453,7 +24971,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff ||
           (Subtarget->is64Bit() && C->getZExtValue() == 0xffffffff)) {
-        Result = DAG.getTargetConstant(C->getSExtValue(), Op.getValueType());
+        Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
+                                       Op.getValueType());
         break;
       }
     }
@@ -26461,7 +24980,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   case 'M':
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (C->getZExtValue() <= 3) {
-        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
+                                       Op.getValueType());
         break;
       }
     }
@@ -26469,7 +24989,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   case 'N':
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (C->getZExtValue() <= 255) {
-        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
+                                       Op.getValueType());
         break;
       }
     }
@@ -26477,7 +24998,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   case 'O':
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (C->getZExtValue() <= 127) {
-        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
+                                       Op.getValueType());
         break;
       }
     }
@@ -26488,7 +25010,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
                                            C->getSExtValue())) {
         // Widen to 64 bits here to get it sign extended.
-        Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
+        Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64);
         break;
       }
     // FIXME gcc accepts some relocatable values here too, but only in certain
@@ -26501,7 +25023,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
                                            C->getZExtValue())) {
-        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
+                                       Op.getValueType());
         break;
       }
     }
@@ -26513,7 +25036,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
     // Literal immediates are always ok.
     if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
       // Widen to 64 bits here to get it sign extended.
-      Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64);
+      Result = DAG.getTargetConstant(CST->getSExtValue(), SDLoc(Op), MVT::i64);
       break;
     }
 
@@ -26571,8 +25094,9 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
-std::pair<unsigned, const TargetRegisterClass*>
-X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+std::pair<unsigned, const TargetRegisterClass *>
+X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                                const std::string &Constraint,
                                                 MVT VT) const {
   // First, see if this is a constraint that directly corresponds to an LLVM
   // register class.
@@ -26678,7 +25202,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   // Use the default implementation in TargetLowering to convert the register
   // constraint into a member of a register class.
   std::pair<unsigned, const TargetRegisterClass*> Res;
-  Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 
   // Not found as a standard register?
   if (!Res.second) {
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index ba077a8..b589ca4 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -26,41 +26,41 @@ namespace llvm {
 
   namespace X86ISD {
     // X86 Specific DAG Nodes
-    enum NodeType {
+    enum NodeType : unsigned {
       // Start the numbering where the builtin ops leave off.
       FIRST_NUMBER = ISD::BUILTIN_OP_END,
 
-      /// BSF - Bit scan forward.
-      /// BSR - Bit scan reverse.
+      /// Bit scan forward.
       BSF,
+      /// Bit scan reverse.
       BSR,
 
-      /// SHLD, SHRD - Double shift instructions. These correspond to
+      /// Double shift instructions. These correspond to
       /// X86::SHLDxx and X86::SHRDxx instructions.
       SHLD,
       SHRD,
 
-      /// FAND - Bitwise logical AND of floating point values. This corresponds
+      /// Bitwise logical AND of floating point values. This corresponds
       /// to X86::ANDPS or X86::ANDPD.
       FAND,
 
-      /// FOR - Bitwise logical OR of floating point values. This corresponds
+      /// Bitwise logical OR of floating point values. This corresponds
       /// to X86::ORPS or X86::ORPD.
       FOR,
 
-      /// FXOR - Bitwise logical XOR of floating point values. This corresponds
+      /// Bitwise logical XOR of floating point values. This corresponds
       /// to X86::XORPS or X86::XORPD.
       FXOR,
 
-      /// FANDN - Bitwise logical ANDNOT of floating point values. This
+      ///  Bitwise logical ANDNOT of floating point values. This
       /// corresponds to X86::ANDNPS or X86::ANDNPD.
       FANDN,
 
-      /// FSRL - Bitwise logical right shift of floating point values. These
+      /// Bitwise logical right shift of floating point values. This
       /// corresponds to X86::PSRLDQ.
       FSRL,
 
-      /// CALL - These operations represent an abstract X86 call
+      /// These operations represent an abstract X86 call
       /// instruction, which includes a bunch of information.  In particular the
       /// operands of these node are:
       ///
@@ -79,8 +79,7 @@ namespace llvm {
       ///
       CALL,
 
-      /// RDTSC_DAG - This operation implements the lowering for
-      /// readcyclecounter
+      /// This operation implements the lowering for readcyclecounter
       RDTSC_DAG,
 
       /// X86 Read Time-Stamp Counter and Processor ID.
@@ -131,178 +130,194 @@ namespace llvm {
       /// 1 is the number of bytes of stack to pop.
       RET_FLAG,
 
-      /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
+      /// Repeat fill, corresponds to X86::REP_STOSx.
       REP_STOS,
 
-      /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
+      /// Repeat move, corresponds to X86::REP_MOVSx.
       REP_MOVS,
 
-      /// GlobalBaseReg - On Darwin, this node represents the result of the popl
+      /// On Darwin, this node represents the result of the popl
       /// at function entry, used for PIC code.
       GlobalBaseReg,
 
-      /// Wrapper - A wrapper node for TargetConstantPool,
+      /// A wrapper node for TargetConstantPool,
       /// TargetExternalSymbol, and TargetGlobalAddress.
       Wrapper,
 
-      /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP
+      /// Special wrapper used under X86-64 PIC mode for RIP
       /// relative displacements.
       WrapperRIP,
 
-      /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
+      /// Copies a 64-bit value from the low word of an XMM vector
       /// to an MMX vector.  If you think this is too close to the previous
       /// mnemonic, so do I; blame Intel.
       MOVDQ2Q,
 
-      /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX
+      /// Copies a 32-bit value from the low word of a MMX
       /// vector to a GPR.
       MMX_MOVD2W,
 
-      /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
+      /// Copies a GPR into the low 32-bit word of a MMX vector
+      /// and zero out the high word.
+      MMX_MOVW2D,
+
+      /// Extract an 8-bit value from a vector and zero extend it to
       /// i32, corresponds to X86::PEXTRB.
       PEXTRB,
 
-      /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
+      /// Extract a 16-bit value from a vector and zero extend it to
       /// i32, corresponds to X86::PEXTRW.
       PEXTRW,
 
-      /// INSERTPS - Insert any element of a 4 x float vector into any element
+      /// Insert any element of a 4 x float vector into any element
       /// of a destination 4 x floatvector.
       INSERTPS,
 
-      /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector,
+      /// Insert the lower 8-bits of a 32-bit value to a vector,
       /// corresponds to X86::PINSRB.
       PINSRB,
 
-      /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
+      /// Insert the lower 16-bits of a 32-bit value to a vector,
       /// corresponds to X86::PINSRW.
       PINSRW, MMX_PINSRW,
 
-      /// PSHUFB - Shuffle 16 8-bit values within a vector.
+      /// Shuffle 16 8-bit values within a vector.
       PSHUFB,
 
-      /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
+      /// Bitwise Logical AND NOT of Packed FP values.
       ANDNP,
 
-      /// PSIGN - Copy integer sign.
+      /// Copy integer sign.
       PSIGN,
 
-      /// BLENDI - Blend where the selector is an immediate.
+      /// Blend where the selector is an immediate.
       BLENDI,
 
-      /// SHRUNKBLEND - Blend where the condition has been shrunk.
+      /// Blend where the condition has been shrunk.
       /// This is used to emphasize that the condition mask is
       /// no more valid for generic VSELECT optimizations.
       SHRUNKBLEND,
 
-      /// ADDSUB - Combined add and sub on an FP vector.
+      /// Combined add and sub on an FP vector.
       ADDSUB,
-
-      // SUBUS - Integer sub with unsigned saturation.
+      //  FP vector ops with rounding mode.
+      FADD_RND,
+      FSUB_RND,
+      FMUL_RND,
+      FDIV_RND,
+      FMAX_RND,
+      FMIN_RND,
+      
+      // Integer add/sub with unsigned saturation.
+      ADDUS,
       SUBUS,
+      // Integer add/sub with signed saturation.
+      ADDS,
+      SUBS,
 
-      /// HADD - Integer horizontal add.
+      /// Integer horizontal add.
       HADD,
 
-      /// HSUB - Integer horizontal sub.
+      /// Integer horizontal sub.
       HSUB,
 
-      /// FHADD - Floating point horizontal add.
+      /// Floating point horizontal add.
       FHADD,
 
-      /// FHSUB - Floating point horizontal sub.
+      /// Floating point horizontal sub.
       FHSUB,
 
-      /// UMAX, UMIN - Unsigned integer max and min.
+      /// Unsigned integer max and min.
       UMAX, UMIN,
 
-      /// SMAX, SMIN - Signed integer max and min.
+      /// Signed integer max and min.
       SMAX, SMIN,
 
-      /// FMAX, FMIN - Floating point max and min.
-      ///
+      /// Floating point max and min.
       FMAX, FMIN,
 
-      /// FMAXC, FMINC - Commutative FMIN and FMAX.
+      /// Commutative FMIN and FMAX.
       FMAXC, FMINC,
 
-      /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
-      /// approximation.  Note that these typically require refinement
+      /// Floating point reciprocal-sqrt and reciprocal approximation.
+      /// Note that these typically require refinement
       /// in order to obtain suitable precision.
       FRSQRT, FRCP,
 
-      // TLSADDR - Thread Local Storage.
+      // Thread Local Storage.
       TLSADDR,
 
-      // TLSBASEADDR - Thread Local Storage. A call to get the start address
+      // Thread Local Storage. A call to get the start address
       // of the TLS block for the current module.
       TLSBASEADDR,
 
-      // TLSCALL - Thread Local Storage.  When calling to an OS provided
+      // Thread Local Storage.  When calling to an OS provided
       // thunk at the address from an earlier relocation.
       TLSCALL,
 
-      // EH_RETURN - Exception Handling helpers.
+      // Exception Handling helpers.
       EH_RETURN,
 
-      // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+      // SjLj exception handling setjmp.
       EH_SJLJ_SETJMP,
 
-      // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+      // SjLj exception handling longjmp.
       EH_SJLJ_LONGJMP,
 
-      /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for
+      /// Tail call return. See X86TargetLowering::LowerCall for
       /// the list of operands.
       TC_RETURN,
 
-      // VZEXT_MOVL - Vector move to low scalar and zero higher vector elements.
+      // Vector move to low scalar and zero higher vector elements.
       VZEXT_MOVL,
 
-      // VZEXT - Vector integer zero-extend.
+      // Vector integer zero-extend.
       VZEXT,
 
-      // VSEXT - Vector integer signed-extend.
+      // Vector integer signed-extend.
       VSEXT,
 
-      // VTRUNC - Vector integer truncate.
+      // Vector integer truncate.
       VTRUNC,
 
-      // VTRUNC - Vector integer truncate with mask.
+      // Vector integer truncate with mask.
       VTRUNCM,
 
-      // VFPEXT - Vector FP extend.
+      // Vector FP extend.
       VFPEXT,
 
-      // VFPROUND - Vector FP round.
+      // Vector FP round.
       VFPROUND,
 
-      // VSHL, VSRL - 128-bit vector logical left / right shift
+      // 128-bit vector logical left / right shift
       VSHLDQ, VSRLDQ,
 
-      // VSHL, VSRL, VSRA - Vector shift elements
+      // Vector shift elements
       VSHL, VSRL, VSRA,
 
-      // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate
+      // Vector shift elements by immediate
       VSHLI, VSRLI, VSRAI,
 
-      // CMPP - Vector packed double/float comparison.
+      // Vector packed double/float comparison.
       CMPP,
 
-      // PCMP* - Vector integer comparisons.
+      // Vector integer comparisons.
       PCMPEQ, PCMPGT,
-      // PCMP*M - Vector integer comparisons, the result is in a mask vector.
+      // Vector integer comparisons, the result is in a mask vector.
       PCMPEQM, PCMPGTM,
 
-      /// CMPM, CMPMU - Vector comparison generating mask bits for fp and
+      /// Vector comparison generating mask bits for fp and
       /// integer signed and unsigned data types.
       CMPM,
       CMPMU,
+      // Vector comparison with rounding mode for FP values
+      CMPM_RND,
 
-      // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
+      // Arithmetic operations with FLAGS results.
       ADD, SUB, ADC, SBB, SMUL,
       INC, DEC, OR, XOR, AND,
 
-      BEXTR,  // BEXTR - Bit field extract
+      BEXTR,  // Bit field extract
 
       UMUL, // LOW, HI, FLAGS = umul LHS, RHS
 
@@ -313,16 +328,16 @@ namespace llvm {
       UDIVREM8_ZEXT_HREG,
       SDIVREM8_SEXT_HREG,
 
-      // MUL_IMM - X86 specific multiply by immediate.
+      // X86-specific multiply by immediate.
       MUL_IMM,
 
-      // PTEST - Vector bitwise comparisons.
+      // Vector bitwise comparisons.
       PTEST,
 
-      // TESTP - Vector packed fp sign bitwise comparisons.
+      // Vector packed fp sign bitwise comparisons.
       TESTP,
 
-      // TESTM, TESTNM - Vector "test" in AVX-512, the result is in a mask vector.
+      // Vector "test" in AVX-512, the result is in a mask vector.
       TESTM,
       TESTNM,
 
@@ -359,9 +374,10 @@ namespace llvm {
       VPERMIV3,
       VPERMI,
       VPERM2X128,
+      // Broadcast scalar to vector
       VBROADCAST,
-      // masked broadcast
-      VBROADCASTM,
+      // Broadcast subvector to vector
+      SUBV_BROADCAST,
       // Insert/Extract vector element
       VINSERT,
       VEXTRACT,
@@ -378,6 +394,14 @@ namespace llvm {
       FNMSUB,
       FMADDSUB,
       FMSUBADD,
+      // FMA with rounding mode
+      FMADD_RND,
+      FNMADD_RND,
+      FMSUB_RND,
+      FNMSUB_RND,
+      FMADDSUB_RND,
+      FMSUBADD_RND,
+      RNDSCALE,
 
       // Compress and expand
       COMPRESS,
@@ -547,9 +571,11 @@ namespace llvm {
   //  X86 Implementation of the TargetLowering interface
   class X86TargetLowering final : public TargetLowering {
   public:
-    explicit X86TargetLowering(const X86TargetMachine &TM);
+    explicit X86TargetLowering(const X86TargetMachine &TM,
+                               const X86Subtarget &STI);
 
     unsigned getJumpTableEncoding() const override;
+    bool useSoftFloat() const override;
 
     MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; }
 
@@ -679,13 +705,27 @@ namespace llvm {
                                       std::vector<SDValue> &Ops,
                                       SelectionDAG &DAG) const override;
 
+    unsigned getInlineAsmMemConstraint(
+        const std::string &ConstraintCode) const override {
+      if (ConstraintCode == "i")
+        return InlineAsm::Constraint_i;
+      else if (ConstraintCode == "o")
+        return InlineAsm::Constraint_o;
+      else if (ConstraintCode == "v")
+        return InlineAsm::Constraint_v;
+      else if (ConstraintCode == "X")
+        return InlineAsm::Constraint_X;
+      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+    }
+
     /// Given a physical register constraint
     /// (e.g. {edx}), return the register number and the register class for the
     /// register.  This should only be used for C_Register constraints.  On
     /// error, this returns a register number of 0.
-    std::pair<unsigned, const TargetRegisterClass*>
-      getRegForInlineAsmConstraint(const std::string &Constraint,
-                                   MVT VT) const override;
+    std::pair<unsigned, const TargetRegisterClass *>
+    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                 const std::string &Constraint,
+                                 MVT VT) const override;
 
     /// Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
@@ -732,6 +772,10 @@ namespace llvm {
     bool isZExtFree(EVT VT1, EVT VT2) const override;
     bool isZExtFree(SDValue Val, EVT VT2) const override;
 
+    /// Return true if folding a vector load into ExtVal (a sign, zero, or any
+    /// extend node) is profitable.
+    bool isVectorLoadExtDesirable(SDValue) const override;
+
     /// Return true if an FMA operation is faster than a pair of fmul and fadd
     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
@@ -775,10 +819,6 @@ namespace llvm {
     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
                                EVT NewVT) const override;
 
-    const X86Subtarget* getSubtarget() const {
-      return Subtarget;
-    }
-
     /// Return true if the specified scalar FP type is computed in an SSE
     /// register, not on the X87 floating point stack.
     bool isScalarFPTypeInSSEReg(EVT VT) const {
@@ -827,16 +867,14 @@ namespace llvm {
 
     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
 
-    /// \brief Reset the operation actions based on target options.
-    void resetOperationActions() override;
-
     bool useLoadStackGuardNode() const override;
     /// \brief Customize the preferred legalization strategy for certain types.
     LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
 
   protected:
-    std::pair<const TargetRegisterClass*, uint8_t>
-    findRepresentativeClass(MVT VT) const override;
+    std::pair<const TargetRegisterClass *, uint8_t>
+    findRepresentativeClass(const TargetRegisterInfo *TRI,
+                            MVT VT) const override;
 
   private:
     /// Keep a pointer to the X86Subtarget around so that we can
@@ -844,10 +882,6 @@ namespace llvm {
     const X86Subtarget *Subtarget;
     const DataLayout *TD;
 
-    /// Used to store the TargetOptions so that we don't waste time resetting
-    /// the operation actions unless we have to.
-    TargetOptions TO;
-
     /// Select between SSE or x87 floating point ops.
     /// When SSE is available, use it for f32 operations.
     /// When SSE2 is available, use it for f64 operations.
@@ -947,8 +981,9 @@ namespace llvm {
     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue
       LowerFormalArguments(SDValue Chain,
@@ -981,7 +1016,8 @@ namespace llvm {
 
     bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-    bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+    TargetLoweringBase::AtomicRMWExpansionKind
+    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
 
     LoadInst *
     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
@@ -1055,6 +1091,9 @@ namespace llvm {
     /// Use rcp* to speed up fdiv calculations.
     SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
                              unsigned &RefinementSteps) const override;
+
+    /// Reassociate floating point divisions into multiply by reciprocal.
+    bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
   };
 
   namespace X86 {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index c4ec3df..9d11d3c 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1,3 +1,18 @@
+//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 AVX512 instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
 // Group template arguments that can be derived from the vector type (EltNum x
 // EltVT).  These are things like the register class for the writemask, etc.
 // The idea is to pass one of these as the template argument rather than the
@@ -59,17 +74,16 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
                                        !if (!eq (Size, 128), "v2i64",
                                        !if (!eq (Size, 256), "v4i64",
                                             VTName)), VTName));
-  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
 
-  // Load patterns used for memory operands.  We only have this defined in
-  // case of i64 element types for sub-512 integer vectors.  For now, keep
-  // MemOpFrag undefined in these cases.
-  PatFrag MemOpFrag =
-    !if (!eq (NumElts#EltTypeName, "1f32"), !cast<PatFrag>("memopfsf32"),
-    !if (!eq (NumElts#EltTypeName, "1f64"), !cast<PatFrag>("memopfsf64"),
-    !if (!eq (TypeVariantName, "f"), !cast<PatFrag>("memop" # VTName),
-    !if (!eq (EltTypeName, "i64"),   !cast<PatFrag>("memop" # VTName),
-    !if (!eq (VTName, "v16i32"),     !cast<PatFrag>("memop" # VTName), ?)))));
+  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
+                          !if (!eq (TypeVariantName, "i"),
+                                !if (!eq (Size, 128), "v2i64",
+                                !if (!eq (Size, 256), "v4i64",
+                                !if (!eq (Size, 512), 
+                                    !if (!eq (EltSize, 64), "v8i64", "v16i32"),
+                                    VTName))), VTName));
+
+  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
 
   // The corresponding float type, e.g. v16f32 for v16i32
   // Note: For EltSize < 32, FloatVT is illegal and TableGen
@@ -96,10 +110,15 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
                      !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
                      SSEPackedInt));
 
+  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
+
   // A vector type of the same width with element type i32.  This is used to
   // create the canonical constant zero node ImmAllZerosV.
   ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
   dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
+
+  string ZSuffix = !if (!eq (Size, 128), "Z128",
+                   !if (!eq (Size, 256), "Z256", "Z"));
 }
 
 def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
@@ -161,21 +180,20 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
                                   list<dag> Pattern,
                                   list<dag> MaskingPattern,
                                   list<dag> ZeroMaskingPattern,
-                                  string Round = "",
                                   string MaskingConstraint = "",
                                   InstrItinClass itin = NoItinerary,
                                   bit IsCommutable = 0> {
   let isCommutable = IsCommutable in
     def NAME: AVX512<O, F, Outs, Ins,
-                       OpcodeStr#"\t{"#AttSrcAsm#", $dst "#Round#"|"#
-                                     "$dst "#Round#", "#IntelSrcAsm#"}",
+                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
+                                     "$dst , "#IntelSrcAsm#"}",
                        Pattern, itin>;
 
   // Prefer over VMOV*rrk Pat<>
   let AddedComplexity = 20 in
     def NAME#k: AVX512<O, F, Outs, MaskingIns,
-                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}"#Round#"|"#
-                                     "$dst {${mask}}"#Round#", "#IntelSrcAsm#"}",
+                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
+                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
                        MaskingPattern, itin>,
               EVEX_K {
       // In case of the 3src subclass this is overridden with a let.
@@ -183,8 +201,8 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
   }
   let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
-                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}"#Round#"|"#
-                                     "$dst {${mask}} {z}"#Round#", "#IntelSrcAsm#"}",
+                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
+                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
                        ZeroMaskingPattern,
                        itin>,
               EVEX_KZ;
@@ -198,7 +216,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
                                   string OpcodeStr,
                                   string AttSrcAsm, string IntelSrcAsm,
                                   dag RHS, dag MaskingRHS,
-                                  SDNode Select = vselect, string Round = "",
+                                  SDNode Select = vselect,
                                   string MaskingConstraint = "",
                                   InstrItinClass itin = NoItinerary,
                                   bit IsCommutable = 0> :
@@ -208,7 +226,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
                          [(set _.RC:$dst, MaskingRHS)],
                          [(set _.RC:$dst,
                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
-                         Round, MaskingConstraint, NoItinerary, IsCommutable>;
+                         MaskingConstraint, NoItinerary, IsCommutable>;
 
 // This multiclass generates the unconditional/non-masking, the masking and
 // the zero-masking variant of the vector instruction.  In the masking case, the
@@ -216,7 +234,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
                            dag Outs, dag Ins, string OpcodeStr,
                            string AttSrcAsm, string IntelSrcAsm,
-                           dag RHS, string Round = "",
+                           dag RHS,
                            InstrItinClass itin = NoItinerary,
                            bit IsCommutable = 0> :
    AVX512_maskable_common<O, F, _, Outs, Ins,
@@ -224,14 +242,14 @@ multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
                           !con((ins _.KRCWM:$mask), Ins),
                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
                           (vselect _.KRCWM:$mask, RHS, _.RC:$src0), vselect,
-                          Round, "$src0 = $dst", itin, IsCommutable>;
+                          "$src0 = $dst", itin, IsCommutable>;
 
 // This multiclass generates the unconditional/non-masking, the masking and
 // the zero-masking variant of the scalar instruction.
 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
                            dag Outs, dag Ins, string OpcodeStr,
                            string AttSrcAsm, string IntelSrcAsm,
-                           dag RHS, string Round = "",
+                           dag RHS,
                            InstrItinClass itin = NoItinerary,
                            bit IsCommutable = 0> :
    AVX512_maskable_common<O, F, _, Outs, Ins,
@@ -239,7 +257,7 @@ multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
                           !con((ins _.KRCWM:$mask), Ins),
                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
                           (X86select _.KRCWM:$mask, RHS, _.RC:$src0), X86select,
-                          Round, "$src0 = $dst", itin, IsCommutable>;
+                          "$src0 = $dst", itin, IsCommutable>;
 
 // Similar to AVX512_maskable but in this case one of the source operands
 // ($src1) is already tied to $dst so we just use that for the preserved
@@ -265,9 +283,65 @@ multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
    AVX512_maskable_custom<O, F, Outs, Ins,
                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
                           !con((ins _.KRCWM:$mask), Ins),
-                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], "",
+                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
                           "$src0 = $dst">;
 
+
+// Instruction with mask that puts result in mask register,
+// like "compare" and "vptest"
+multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
+                                  dag Outs,
+                                  dag Ins, dag MaskingIns,
+                                  string OpcodeStr,
+                                  string AttSrcAsm, string IntelSrcAsm,
+                                  list<dag> Pattern,
+                                  list<dag> MaskingPattern,
+                                  string Round = "",
+                                  InstrItinClass itin = NoItinerary> {
+    def NAME: AVX512<O, F, Outs, Ins,
+                       OpcodeStr#"\t{"#AttSrcAsm#", $dst "#Round#"|"#
+                                     "$dst "#Round#", "#IntelSrcAsm#"}",
+                       Pattern, itin>;
+
+    def NAME#k: AVX512<O, F, Outs, MaskingIns,
+                       OpcodeStr#"\t{"#Round#AttSrcAsm#", $dst {${mask}}|"#
+                                     "$dst {${mask}}, "#IntelSrcAsm#Round#"}",
+                       MaskingPattern, itin>, EVEX_K;
+}
+
+multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
+                                  dag Outs,
+                                  dag Ins, dag MaskingIns,
+                                  string OpcodeStr,
+                                  string AttSrcAsm, string IntelSrcAsm,
+                                  dag RHS, dag MaskingRHS,
+                                  string Round = "",
+                                  InstrItinClass itin = NoItinerary> :
+  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
+                         AttSrcAsm, IntelSrcAsm,
+                         [(set _.KRC:$dst, RHS)],
+                         [(set _.KRC:$dst, MaskingRHS)],
+                         Round, NoItinerary>;
+
+multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
+                           dag Outs, dag Ins, string OpcodeStr,
+                           string AttSrcAsm, string IntelSrcAsm,
+                           dag RHS, string Round = "",
+                           InstrItinClass itin = NoItinerary> :
+   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
+                          !con((ins _.KRCWM:$mask), Ins),
+                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+                          (and _.KRCWM:$mask, RHS),
+                          Round, itin>;
+
+multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
+                           dag Outs, dag Ins, string OpcodeStr,
+                           string AttSrcAsm, string IntelSrcAsm> :
+   AVX512_maskable_custom_cmp<O, F, Outs,
+                             Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
+                             AttSrcAsm, IntelSrcAsm,
+                             [],[],"", NoItinerary>;
+
 // Bitcasts between 512-bit vector types. Return the original type since
 // no instruction is needed for the conversion
 let Predicates = [HasAVX512] in {
@@ -394,7 +468,7 @@ multiclass vinsert_for_size_no_alt<int Opcode,
                                    SDNodeXForm INSERT_get_vinsert_imm> {
   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
     def rr : AVX512AIi8<Opcode, MRMSrcReg, (outs VR512:$dst),
-               (ins VR512:$src1, From.RC:$src2, i8imm:$src3),
+               (ins VR512:$src1, From.RC:$src2, u8imm:$src3),
                "vinsert" # From.EltTypeName # "x" # From.NumElts #
                                                 "\t{$src3, $src2, $src1, $dst|"
                                                    "$dst, $src1, $src2, $src3}",
@@ -405,7 +479,7 @@ multiclass vinsert_for_size_no_alt<int Opcode,
 
     let mayLoad = 1 in
     def rm : AVX512AIi8<Opcode, MRMSrcMem, (outs VR512:$dst),
-               (ins VR512:$src1, From.MemOp:$src2, i8imm:$src3),
+               (ins VR512:$src1, From.MemOp:$src2, u8imm:$src3),
                "vinsert" # From.EltTypeName # "x" # From.NumElts #
                                                 "\t{$src3, $src2, $src1, $dst|"
                                                    "$dst, $src1, $src2, $src3}",
@@ -467,12 +541,12 @@ defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
 
 // vinsertps - insert f32 to XMM
 def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
-      (ins VR128X:$src1, VR128X:$src2, i8imm:$src3),
+      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
       EVEX_4V;
 def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
-      (ins VR128X:$src1, f32mem:$src2, i8imm:$src3),
+      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
       [(set VR128X:$dst, (X86insertps VR128X:$src1,
                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
@@ -489,7 +563,7 @@ multiclass vextract_for_size<int Opcode,
                              SDNodeXForm EXTRACT_get_vextract_imm> {
   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
     defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
-                (ins VR512:$src1, i8imm:$idx),
+                (ins VR512:$src1, u8imm:$idx),
                 "vextract" # To.EltTypeName # "x4",
                 "$idx, $src1", "$src1, $idx",
                 [(set To.RC:$dst, (vextract_extract:$idx (From.VT VR512:$src1),
@@ -497,7 +571,7 @@ multiclass vextract_for_size<int Opcode,
               AVX512AIi8Base, EVEX, EVEX_V512;
     let mayStore = 1 in
     def rm : AVX512AIi8<Opcode, MRMDestMem, (outs),
-            (ins To.MemOp:$dst, VR512:$src1, i8imm:$src2),
+            (ins To.MemOp:$dst, VR512:$src1, u8imm:$src2),
             "vextract" # To.EltTypeName # "x4\t{$src2, $src1, $dst|"
                                                "$dst, $src1, $src2}",
             []>, EVEX, EVEX_V512, EVEX_CD8<To.EltSize, CD8VT4>;
@@ -596,13 +670,13 @@ def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
 
 // vextractps - extract 32 bits from XMM
 def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
-      (ins VR128X:$src1, i32i8imm:$src2),
+      (ins VR128X:$src1, u8imm:$src2),
       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
       [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
       EVEX;
 
 def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
-      (ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2),
+      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
       [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
                           addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
@@ -735,12 +809,8 @@ def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
 
 def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
         (VPBROADCASTDrZr GR32:$src)>;
-def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
-        (VPBROADCASTDrZrkz VK16WM:$mask, GR32:$src)>;
 def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
         (VPBROADCASTQrZr GR64:$src)>;
-def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
-        (VPBROADCASTQrZrkz VK8WM:$mask, GR64:$src)>;
 
 def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
         (VPBROADCASTDrZr GR32:$src)>;
@@ -762,24 +832,33 @@ multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                   [(set DstRC:$dst,
                     (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
-  def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
+  def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
+                                                         VR128X:$src),
+                    !strconcat(OpcodeStr,
+                    "\t{$src, ${dst} {${mask}} |${dst} {${mask}}, $src}"),
+                    []>, EVEX, EVEX_K;
+  def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
                                                          VR128X:$src),
                     !strconcat(OpcodeStr,
                     "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
-                    [(set DstRC:$dst,
-                      (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
-                    EVEX, EVEX_KZ;
+                    []>, EVEX, EVEX_KZ;
   let mayLoad = 1 in {
   def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                   [(set DstRC:$dst,
                     (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
-  def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
+  def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
+                                                         x86memop:$src),
+                  !strconcat(OpcodeStr,
+                      "\t{$src, ${dst} {${mask}}|${dst} {${mask}} , $src}"),
+                  []>, EVEX, EVEX_K;
+  def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
                                                          x86memop:$src),
                   !strconcat(OpcodeStr,
                       "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
-                  [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
-                                     (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
+                  [(set DstRC:$dst, (OpVT (vselect KRC:$mask,
+                             (X86VBroadcast (ld_frag addr:$src)), 
+                             (OpVT (bitconvert (v16i32 immAllZerosV))))))]>, EVEX, EVEX_KZ;
   }
 }
 
@@ -790,28 +869,71 @@ defm VPBROADCASTQZ  : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
                       loadi64, VR512, v8i64, v2i64, VK8WM>,  EVEX_V512, VEX_W,
                       EVEX_CD8<64, CD8VT1>;
 
-multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
-                          X86MemOperand x86memop, PatFrag ld_frag,
-                          RegisterClass KRC> {
+multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
+                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
   let mayLoad = 1 in {
-  def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
+  def rm : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Src.MemOp:$src),
                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  []>, EVEX;
-  def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
-                                                         x86memop:$src),
+                  [(set _Dst.RC:$dst, 
+                    (_Dst.VT (X86SubVBroadcast 
+                    (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))))]>, EVEX;
+  def rmk : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Dst.KRCWM:$mask,
+                                                         _Src.MemOp:$src),
                   !strconcat(OpcodeStr,
-                      "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+                      "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
+                  []>, EVEX, EVEX_K;
+  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Dst.KRCWM:$mask,
+                                                         _Src.MemOp:$src),
+                  !strconcat(OpcodeStr,
+                    "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
                   []>, EVEX, EVEX_KZ;
   }
 }
 
-defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
-                       i128mem, loadv2i64, VK16WM>,
+defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
+                       v16i32_info, v4i32x_info>,
                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
-defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
-                       i256mem, loadv4i64, VK16WM>, VEX_W,
+defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
+                       v16f32_info, v4f32x_info>,
+                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
+defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
+                       v8i64_info, v4i64x_info>, VEX_W,
+                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
+defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
+                       v8f64_info, v4f64x_info>, VEX_W,
                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
 
+let Predicates = [HasVLX] in {
+defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
+                           v8i32x_info, v4i32x_info>,
+                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
+defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
+                           v8f32x_info, v4f32x_info>,
+                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
+}
+let Predicates = [HasVLX, HasDQI] in {
+defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
+                           v4i64x_info, v2i64x_info>, VEX_W,
+                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
+defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
+                           v4f64x_info, v2f64x_info>, VEX_W,
+                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
+}
+let Predicates = [HasDQI] in {
+defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
+                       v8i64_info, v2i64x_info>, VEX_W,
+                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
+defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti32x8",
+                       v16i32_info, v8i32x_info>,
+                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
+defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
+                       v8f64_info, v2f64x_info>, VEX_W,
+                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
+defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
+                       v16f32_info, v8f32x_info>,
+                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
+}
+
 def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
           (VPBROADCASTDZrr VR128X:$src)>;
 def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
@@ -819,13 +941,23 @@ def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
 
 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
           (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
+def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
+          (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
+
 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
           (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
+def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
+          (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
 
 def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))),
           (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
+def : Pat<(v16i32 (X86VBroadcast (v8i32 VR256X:$src))),
+          (VPBROADCASTDZrr (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm))>;
+
 def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))),
           (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
+def : Pat<(v8i64 (X86VBroadcast (v4i64 VR256X:$src))),
+          (VPBROADCASTQZrr (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm))>;
 
 def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
           (VBROADCASTSSZr VR128X:$src)>;
@@ -840,12 +972,6 @@ def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
           (VBROADCASTSDZr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
 
 
-let Predicates = [HasAVX512] in {
-def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
-           (EXTRACT_SUBREG
-              (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
-                       addr:$src)), sub_ymm)>;
-}
 //===----------------------------------------------------------------------===//
 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
 //---
@@ -882,18 +1008,18 @@ multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                            X86VectorVTInfo _> {
   let ExeDomain = _.ExeDomain in {
   def ri : AVX512AIi8<opc, MRMSrcReg, (outs _.RC:$dst),
-                     (ins _.RC:$src1, i8imm:$src2),
+                     (ins _.RC:$src1, u8imm:$src2),
                      !strconcat(OpcodeStr,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set _.RC:$dst,
                        (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
                      EVEX;
   def mi : AVX512AIi8<opc, MRMSrcMem, (outs _.RC:$dst),
-                     (ins _.MemOp:$src1, i8imm:$src2),
+                     (ins _.MemOp:$src1, u8imm:$src2),
                      !strconcat(OpcodeStr,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set _.RC:$dst,
-                       (_.VT (OpNode (_.MemOpFrag addr:$src1),
+                       (_.VT (OpNode (_.LdFrag addr:$src1),
                               (i8 imm:$src2))))]>,
            EVEX, EVEX_CD8<_.EltSize, CD8VF>;
 }
@@ -917,7 +1043,7 @@ multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set _.RC:$dst,
                          (_.VT (X86VPermilpv _.RC:$src1,
-                                  (Ctrl.VT (Ctrl.MemOpFrag addr:$src2)))))]>,
+                                  (Ctrl.VT (Ctrl.LdFrag addr:$src2)))))]>,
              EVEX_4V;
   }
 }
@@ -957,15 +1083,15 @@ multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
                      EVEX_4V;
 }
 
-defm VPERMDZ   : avx512_perm<0x36, "vpermd",  VR512,  memopv16i32, i512mem,
+defm VPERMDZ   : avx512_perm<0x36, "vpermd",  VR512,  loadv16i32, i512mem,
                            v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMQZ   : avx512_perm<0x36, "vpermq",  VR512,  memopv8i64,  i512mem,
+defm VPERMQZ   : avx512_perm<0x36, "vpermq",  VR512,  loadv8i64,  i512mem,
                            v8i64>,  EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
 let ExeDomain = SSEPackedSingle in
-defm VPERMPSZ  : avx512_perm<0x16, "vpermps", VR512,  memopv16f32, f512mem,
+defm VPERMPSZ  : avx512_perm<0x16, "vpermps", VR512,  loadv16f32, f512mem,
                            v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
 let ExeDomain = SSEPackedDouble in
-defm VPERMPDZ  : avx512_perm<0x16, "vpermpd", VR512,  memopv8f64, f512mem,
+defm VPERMPDZ  : avx512_perm<0x16, "vpermpd", VR512,  loadv8f64, f512mem,
                            v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
 
 // -- VPERM2I - 3 source operands form --
@@ -1040,16 +1166,16 @@ let Constraints = "$src1 = $dst" in {
                     EVEX_4V, EVEX_KZ;
   }
 }
-defm VPERMI2D  : avx512_perm_3src<0x76, "vpermi2d",  VR512, memopv16i32,
+defm VPERMI2D  : avx512_perm_3src<0x76, "vpermi2d",  VR512, loadv16i32,
                                   i512mem, X86VPermiv3, v16i32, VK16WM>,
                  EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMI2Q  : avx512_perm_3src<0x76, "vpermi2q",  VR512, memopv8i64,
+defm VPERMI2Q  : avx512_perm_3src<0x76, "vpermi2q",  VR512, loadv8i64,
                                   i512mem, X86VPermiv3, v8i64, VK8WM>,
                  EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps",  VR512, memopv16f32,
+defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps",  VR512, loadv16f32,
                                   i512mem, X86VPermiv3, v16f32, VK16WM>,
                  EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd",  VR512, memopv8f64,
+defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd",  VR512, loadv8f64,
                                   i512mem, X86VPermiv3, v8f64, VK8WM>,
                   EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
 
@@ -1069,16 +1195,16 @@ multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
               (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
 }
 
-defm VPERMT2D  : avx512_perm_table_3src<0x7E, "d",  VR512, memopv16i32, i512mem,
+defm VPERMT2D  : avx512_perm_table_3src<0x7E, "d",  VR512, loadv16i32, i512mem,
                                X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
                  EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMT2Q  : avx512_perm_table_3src<0x7E, "q",  VR512, memopv8i64, i512mem,
+defm VPERMT2Q  : avx512_perm_table_3src<0x7E, "q",  VR512, loadv8i64, i512mem,
                                X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
                  EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps",  VR512, memopv16f32, i512mem,
+defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps",  VR512, loadv16f32, i512mem,
                                X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
                  EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd",  VR512, memopv8f64, i512mem,
+defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd",  VR512, loadv8f64, i512mem,
                                X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
                  EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
 
@@ -1198,35 +1324,40 @@ def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
 
 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
 multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
-                            Operand CC, SDNode OpNode, ValueType VT,
-                            PatFrag ld_frag, string asm, string asm_alt> {
+                            SDNode OpNode, ValueType VT,
+                            PatFrag ld_frag, string Suffix> {
   def rr : AVX512Ii8<0xC2, MRMSrcReg,
-                (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+                (outs VK1:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
+                !strconcat("vcmp${cc}", Suffix,
+                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
                 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
   def rm : AVX512Ii8<0xC2, MRMSrcMem,
-                (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+                (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
+                !strconcat("vcmp${cc}", Suffix,
+                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                 [(set VK1:$dst, (OpNode (VT RC:$src1),
                 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
   let isAsmParserOnly = 1, hasSideEffects = 0 in {
     def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
-               (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
-               asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
+               (outs VK1:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
+               !strconcat("vcmp", Suffix,
+                          "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
+               [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
+    let mayLoad = 1 in
     def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
-               (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
-               asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+               (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
+               !strconcat("vcmp", Suffix,
+                          "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
+               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
   }
 }
 
 let Predicates = [HasAVX512] in {
-defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
-                 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
-                 XS;
-defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
-                 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
-                 XD, VEX_W;
+defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, X86cmpms, f32, loadf32, "ss">,
+                                 XS;
+defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, X86cmpms, f64, loadf64, "sd">,
+                                 XD, VEX_W;
 }
 
 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -1361,7 +1492,7 @@ def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
                           X86VectorVTInfo _> {
   def rri : AVX512AIi8<opc, MRMSrcReg,
-             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
+             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
              !strconcat("vpcmp${cc}", Suffix,
                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
@@ -1369,7 +1500,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
              IIC_SSE_ALU_F32P_RR>, EVEX_4V;
   let mayLoad = 1 in
   def rmi : AVX512AIi8<opc, MRMSrcMem,
-             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
+             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
              !strconcat("vpcmp${cc}", Suffix,
                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
@@ -1378,7 +1509,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
              IIC_SSE_ALU_F32P_RM>, EVEX_4V;
   def rrik : AVX512AIi8<opc, MRMSrcReg,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
-                                      AVXCC:$cc),
+                                      AVX512ICC:$cc),
               !strconcat("vpcmp${cc}", Suffix,
                          "\t{$src2, $src1, $dst {${mask}}|",
                          "$dst {${mask}}, $src1, $src2}"),
@@ -1389,7 +1520,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
   let mayLoad = 1 in
   def rmik : AVX512AIi8<opc, MRMSrcMem,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
-                                    AVXCC:$cc),
+                                    AVX512ICC:$cc),
               !strconcat("vpcmp${cc}", Suffix,
                          "\t{$src2, $src1, $dst {${mask}}|",
                          "$dst {${mask}}, $src1, $src2}"),
@@ -1402,25 +1533,27 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
   // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1, hasSideEffects = 0 in {
     def rri_alt : AVX512AIi8<opc, MRMSrcReg,
-               (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, i8imm:$cc),
+               (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
                !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
                           "$dst, $src1, $src2, $cc}"),
                [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+    let mayLoad = 1 in
     def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
-               (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, i8imm:$cc),
+               (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
                !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
                           "$dst, $src1, $src2, $cc}"),
                [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
     def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
-                                       i8imm:$cc),
+                                       u8imm:$cc),
                !strconcat("vpcmp", Suffix,
                           "\t{$cc, $src2, $src1, $dst {${mask}}|",
                           "$dst {${mask}}, $src1, $src2, $cc}"),
                [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
+    let mayLoad = 1 in
     def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
-                                       i8imm:$cc),
+                                       u8imm:$cc),
                !strconcat("vpcmp", Suffix,
                           "\t{$cc, $src2, $src1, $dst {${mask}}|",
                           "$dst {${mask}}, $src1, $src2, $cc}"),
@@ -1431,10 +1564,9 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
                               X86VectorVTInfo _> :
            avx512_icmp_cc<opc, Suffix, OpNode, _> {
-  let mayLoad = 1 in {
   def rmib : AVX512AIi8<opc, MRMSrcMem,
              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
-                                     AVXCC:$cc),
+                                     AVX512ICC:$cc),
              !strconcat("vpcmp${cc}", Suffix,
                         "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
                         "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
@@ -1444,7 +1576,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
   def rmibk : AVX512AIi8<opc, MRMSrcMem,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
-                                       _.ScalarMemOp:$src2, AVXCC:$cc),
+                                       _.ScalarMemOp:$src2, AVX512ICC:$cc),
               !strconcat("vpcmp${cc}", Suffix,
                        "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
                        "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
@@ -1453,20 +1585,19 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
                                     (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
                                     imm:$cc)))],
               IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
-  }
 
   // Accept explicit immediate argument form instead of comparison code.
-  let isAsmParserOnly = 1, hasSideEffects = 0 in {
+  let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
     def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
                (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
-                                       i8imm:$cc),
+                                       u8imm:$cc),
                !strconcat("vpcmp", Suffix,
                    "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
                    "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
                [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
     def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
-                                       _.ScalarMemOp:$src2, i8imm:$cc),
+                                       _.ScalarMemOp:$src2, u8imm:$cc),
                !strconcat("vpcmp", Suffix,
                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
@@ -1519,46 +1650,97 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
                                      HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
 
-// avx512_cmp_packed - compare packed instructions
-multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
-                           X86MemOperand x86memop, ValueType vt,
-                           string suffix, Domain d> {
-  def rri : AVX512PIi8<0xC2, MRMSrcReg,
-             (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
-             !strconcat("vcmp${cc}", suffix,
-                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
-  def rrib: AVX512PIi8<0xC2, MRMSrcReg,
-             (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
-     !strconcat("vcmp${cc}", suffix,
-                "\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
-                [], d>, EVEX_B;
-  def rmi : AVX512PIi8<0xC2, MRMSrcMem,
-             (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
-              !strconcat("vcmp${cc}", suffix,
-                         "\t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
-             [(set KRC:$dst,
-              (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
+multiclass avx512_vcmp_common<X86VectorVTInfo _> {
 
+  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
+                   "vcmp${cc}"#_.Suffix,
+                   "$src2, $src1", "$src1, $src2",
+                   (X86cmpm (_.VT _.RC:$src1),
+                         (_.VT _.RC:$src2),
+                           imm:$cc)>;
+
+  let mayLoad = 1 in {
+    defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+                  (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
+                  "vcmp${cc}"#_.Suffix,
+                  "$src2, $src1", "$src1, $src2",
+                  (X86cmpm (_.VT _.RC:$src1),
+                          (_.VT (bitconvert (_.LdFrag addr:$src2))),
+                          imm:$cc)>;
+
+    defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+                  (outs _.KRC:$dst),
+                  (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
+                  "vcmp${cc}"#_.Suffix,
+                  "${src2}"##_.BroadcastStr##", $src1",
+                  "$src1, ${src2}"##_.BroadcastStr,
+                  (X86cmpm (_.VT _.RC:$src1),
+                          (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+                          imm:$cc)>,EVEX_B;
+  }
   // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1, hasSideEffects = 0 in {
-    def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
-               (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
-              !strconcat("vcmp", suffix,
-                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
-    def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
-               (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
-              !strconcat("vcmp", suffix,
-                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
+    defm  rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
+                         (outs _.KRC:$dst),
+                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+                         "vcmp"#_.Suffix,
+                         "$cc, $src2, $src1", "$src1, $src2, $cc">;
+
+    let mayLoad = 1 in {
+      defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
+                             (outs _.KRC:$dst),
+                             (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
+                             "vcmp"#_.Suffix,
+                             "$cc, $src2, $src1", "$src1, $src2, $cc">;
+
+      defm  rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
+                         (outs _.KRC:$dst),
+                         (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
+                         "vcmp"#_.Suffix,
+                         "$cc, ${src2}"##_.BroadcastStr##", $src1",
+                         "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B;
+    }
+ }
+}
+
+multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
+  // comparison code form (VCMP[EQ/LT/LE/...]
+  defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+                     (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
+                     "vcmp${cc}"#_.Suffix,
+                     "{sae}, $src2, $src1", "$src1, $src2,{sae}",
+                     (X86cmpmRnd (_.VT _.RC:$src1),
+                                    (_.VT _.RC:$src2),
+                                    imm:$cc,
+                                (i32 FROUND_NO_EXC))>, EVEX_B;
+
+  let isAsmParserOnly = 1, hasSideEffects = 0 in {
+    defm  rrib_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
+                         (outs _.KRC:$dst),
+                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+                         "vcmp"#_.Suffix,
+                         "$cc,{sae}, $src2, $src1",
+                         "$src1, $src2,{sae}, $cc">, EVEX_B;
+   }
+}
+
+multiclass avx512_vcmp<AVX512VLVectorVTInfo _> {
+  let Predicates = [HasAVX512] in {
+    defm Z    : avx512_vcmp_common<_.info512>,
+                avx512_vcmp_sae<_.info512>, EVEX_V512;
+
+  }
+  let Predicates = [HasAVX512,HasVLX] in {
+   defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128;
+   defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256;
   }
 }
 
-defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
-               "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
-               EVEX_CD8<32, CD8VF>;
-defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
-               "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
-               EVEX_CD8<64, CD8VF>;
+defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
+                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
+                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
 
 def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
           (COPY_TO_REGCLASS (VCMPPSZrri
@@ -1576,30 +1758,7 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
             (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
             imm:$cc), VK8)>;
 
-def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
-                (v16f32 VR512:$src2), i32immZExt5:$cc, (i16 -1),
-                 FROUND_NO_EXC)),
-          (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
-                             (I8Imm imm:$cc)), GR16)>;
-
-def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
-                (v8f64 VR512:$src2), i32immZExt5:$cc, (i8 -1),
-                 FROUND_NO_EXC)),
-          (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
-                             (I8Imm imm:$cc)), GR8)>;
-
-def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
-                (v16f32 VR512:$src2), i32immZExt5:$cc, (i16 -1),
-                FROUND_CURRENT)),
-          (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
-                             (I8Imm imm:$cc)), GR16)>;
-
-def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
-                (v8f64 VR512:$src2), i32immZExt5:$cc, (i8 -1),
-                 FROUND_CURRENT)),
-          (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
-                             (I8Imm imm:$cc)), GR8)>;
-
+//-----------------------------------------------------------------
 // Mask register copy, including
 // - copy between mask registers
 // - load/store mask registers
@@ -1607,17 +1766,18 @@ def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
 //
 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
                          string OpcodeStr, RegisterClass KRC,
-                         ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
+                         ValueType vvt, X86MemOperand x86memop> {
   let hasSideEffects = 0 in {
     def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
     let mayLoad = 1 in
     def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-               [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
+               [(set KRC:$dst, (vvt (load addr:$src)))]>;
     let mayStore = 1 in
     def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
-               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+               [(store KRC:$src, addr:$dst)]>;
   }
 }
 
@@ -1633,27 +1793,25 @@ multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
 }
 
 let Predicates = [HasDQI] in
-  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
-                               i8mem>,
+  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
                VEX, PD;
 
 let Predicates = [HasAVX512] in
-  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
-                               i16mem>,
+  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
                VEX, PS;
 
 let Predicates = [HasBWI] in {
-  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
-                               i32mem>, VEX, PD, VEX_W;
+  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
+               VEX, PD, VEX_W;
   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
                VEX, XD;
 }
 
 let Predicates = [HasBWI] in {
-  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
-                               i64mem>, VEX, PS, VEX_W;
+  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
+               VEX, PS, VEX_W;
   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
                VEX, XD, VEX_W;
 }
@@ -1684,24 +1842,36 @@ let Predicates = [HasBWI] in {
 let Predicates = [HasDQI] in {
   def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
             (KMOVBmk addr:$dst, VK8:$src)>;
+  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
+            (KMOVBkm addr:$src)>;
 }
-let Predicates = [HasAVX512] in {
-  def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
-            (KMOVWmk addr:$dst, VK16:$src)>;
+let Predicates = [HasAVX512, NoDQI] in {
   def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
             (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
-  def : Pat<(i1 (load addr:$src)),
-            (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
             (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
 }
+let Predicates = [HasAVX512] in {
+  def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
+            (KMOVWmk addr:$dst, VK16:$src)>;
+  def : Pat<(i1 (load addr:$src)),
+            (COPY_TO_REGCLASS (AND16ri (i16 (SUBREG_TO_REG (i32 0),
+                                              (MOV8rm addr:$src), sub_8bit)),
+                                (i16 1)), VK1)>;
+  def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
+            (KMOVWkm addr:$src)>;
+}
 let Predicates = [HasBWI] in {
   def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
             (KMOVDmk addr:$dst, VK32:$src)>;
+  def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))),
+            (KMOVDkm addr:$src)>;
 }
 let Predicates = [HasBWI] in {
   def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
             (KMOVQmk addr:$dst, VK64:$src)>;
+  def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))),
+            (KMOVQkm addr:$src)>;
 }
 
 let Predicates = [HasAVX512] in {
@@ -1723,6 +1893,8 @@ let Predicates = [HasAVX512] in {
 
   def : Pat<(i32 (zext VK1:$src)),
             (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
+  def : Pat<(i32 (anyext VK1:$src)),
+            (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>;
   def : Pat<(i8 (zext VK1:$src)),
             (EXTRACT_SUBREG
              (AND32ri (KMOVWrk
@@ -1748,17 +1920,18 @@ let Predicates = [HasBWI] in {
 
 
 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
-let Predicates = [HasAVX512] in {
+let Predicates = [HasAVX512, NoDQI] in {
   // GR from/to 8-bit mask without native support
   def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
             (COPY_TO_REGCLASS
-              (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
-              VK8)>;
+             (KMOVWkr (MOVZX32rr8 GR8 :$src)), VK8)>;
   def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
             (EXTRACT_SUBREG
               (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
               sub_8bit)>;
+}
 
+let Predicates = [HasAVX512] in {
   def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
             (COPY_TO_REGCLASS VK16:$src, VK1)>;
   def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
@@ -1815,21 +1988,24 @@ let Predicates = [HasBWI] in
 def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
 
 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
-let Predicates = [HasAVX512] in {
+let Predicates = [HasAVX512, NoDQI] in {
 def : Pat<(xor VK8:$src1,  (v8i1 immAllOnesV)),
           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
-
 def : Pat<(not VK8:$src),
           (COPY_TO_REGCLASS
             (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
 }
+def : Pat<(xor VK4:$src1,  (v4i1 immAllOnesV)),
+          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src1, VK16)), VK4)>;
+def : Pat<(xor VK2:$src1,  (v2i1 immAllOnesV)),
+          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src1, VK16)), VK2)>;
 
 // Mask binary operation
 // - KAND, KANDN, KOR, KXNOR, KXOR
 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
                            RegisterClass KRC, SDPatternOperator OpNode,
-                           Predicate prd> {
-  let Predicates = [prd] in
+                           Predicate prd, bit IsCommutable> {
+  let Predicates = [prd], isCommutable = IsCommutable in
     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
                !strconcat(OpcodeStr,
                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -1837,40 +2013,25 @@ multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
 }
 
 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
-                               SDPatternOperator OpNode> {
+                               SDPatternOperator OpNode, bit IsCommutable> {
   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
-                             HasDQI>, VEX_4V, VEX_L, PD;
+                             HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
-                             HasAVX512>, VEX_4V, VEX_L, PS;
+                             HasAVX512, IsCommutable>, VEX_4V, VEX_L, PS;
   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
-                             HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
+                             HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
-                             HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
+                             HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
 }
 
 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
 
-let isCommutable = 1 in {
-  defm KAND  : avx512_mask_binop_all<0x41, "kand",  and>;
-  defm KOR   : avx512_mask_binop_all<0x45, "kor",   or>;
-  defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
-  defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor>;
-}
-let isCommutable = 0 in
-  defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
-
-def : Pat<(xor VK1:$src1, VK1:$src2),
-     (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
-                                (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
-
-def : Pat<(or VK1:$src1, VK1:$src2),
-     (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
-                               (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
-
-def : Pat<(and VK1:$src1, VK1:$src2),
-     (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
-                                (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
+defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,  1>;
+defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,   1>;
+defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor, 1>;
+defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,  1>;
+defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn, 0>;
 
 multiclass avx512_mask_binop_int<string IntName, string InstName> {
   let Predicates = [HasAVX512] in
@@ -1887,13 +2048,28 @@ defm : avx512_mask_binop_int<"kor",   "KOR">;
 defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
 defm : avx512_mask_binop_int<"kxor",  "KXOR">;
 
-// With AVX-512, 8-bit mask is promoted to 16-bit mask.
 multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
-  let Predicates = [HasAVX512] in
-    def : Pat<(OpNode VK8:$src1, VK8:$src2),
-              (COPY_TO_REGCLASS
-                (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
-                      (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
+  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
+  // for the DQI set, this type is legal and KxxxB instruction is used
+  let Predicates = [NoDQI] in
+  def : Pat<(OpNode VK8:$src1, VK8:$src2),
+            (COPY_TO_REGCLASS
+              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
+                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
+
+  // All types smaller than 8 bits require conversion anyway
+  def : Pat<(OpNode VK1:$src1, VK1:$src2),
+        (COPY_TO_REGCLASS (Inst
+                           (COPY_TO_REGCLASS VK1:$src1, VK16),
+                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
+  def : Pat<(OpNode VK2:$src1, VK2:$src2),
+        (COPY_TO_REGCLASS (Inst
+                           (COPY_TO_REGCLASS VK2:$src1, VK16),
+                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
+  def : Pat<(OpNode VK4:$src1, VK4:$src2),
+        (COPY_TO_REGCLASS (Inst
+                           (COPY_TO_REGCLASS VK4:$src1, VK16),
+                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
 }
 
 defm : avx512_binop_pat<and,  KANDWrr>;
@@ -1902,6 +2078,32 @@ defm : avx512_binop_pat<or,   KORWrr>;
 defm : avx512_binop_pat<xnor, KXNORWrr>;
 defm : avx512_binop_pat<xor,  KXORWrr>;
 
+def : Pat<(xor (xor VK16:$src1, VK16:$src2), (v16i1 immAllOnesV)),
+          (KXNORWrr VK16:$src1, VK16:$src2)>;
+def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
+          (KXNORBrr VK8:$src1, VK8:$src2)>;
+def : Pat<(xor (xor VK32:$src1, VK32:$src2), (v32i1 immAllOnesV)),
+          (KXNORDrr VK32:$src1, VK32:$src2)>;
+def : Pat<(xor (xor VK64:$src1, VK64:$src2), (v64i1 immAllOnesV)),
+          (KXNORQrr VK64:$src1, VK64:$src2)>;
+
+let Predicates = [NoDQI] in
+def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
+          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK8:$src1, VK16),
+                             (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
+
+def : Pat<(xor (xor VK4:$src1, VK4:$src2), (v4i1 immAllOnesV)),
+          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK4:$src1, VK16),
+                             (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
+
+def : Pat<(xor (xor VK2:$src1, VK2:$src2), (v2i1 immAllOnesV)),
+          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK2:$src1, VK16),
+                             (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
+
+def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)),
+          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
+                             (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
+
 // Mask unpacking
 multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
                            RegisterClass KRC> {
@@ -1944,19 +2146,24 @@ multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
   defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
                             VEX, PS;
+  let Predicates = [HasDQI] in
+  defm B : avx512_mask_testop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
+                            VEX, PD;
+  let Predicates = [HasBWI] in {
+  defm Q : avx512_mask_testop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
+                            VEX, PS, VEX_W;
+  defm D : avx512_mask_testop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
+                            VEX, PD, VEX_W;
+  }
 }
 
 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
 
-def : Pat<(X86cmp VK1:$src1, (i1 0)),
-          (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
-           (COPY_TO_REGCLASS VK1:$src1, VK16))>;
-
 // Mask shift
 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
                              SDNode OpNode> {
   let Predicates = [HasAVX512] in
-    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
+    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
                  !strconcat(OpcodeStr,
                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
                             [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
@@ -1965,7 +2172,17 @@ multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
                                SDNode OpNode> {
   defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
-                             VEX, TAPD, VEX_W;
+                               VEX, TAPD, VEX_W;
+  let Predicates = [HasDQI] in
+  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
+                               VEX, TAPD;
+  let Predicates = [HasBWI] in {
+  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
+                               VEX, TAPD, VEX_W;
+  let Predicates = [HasDQI] in
+  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
+                               VEX, TAPD;
+  }  
 }
 
 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
@@ -1982,6 +2199,8 @@ multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
 multiclass avx512_mask_setop_w<PatFrag Val> {
   defm B : avx512_mask_setop<VK8,   v8i1, Val>;
   defm W : avx512_mask_setop<VK16, v16i1, Val>;
+  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
+  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
 }
 
 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
@@ -1991,9 +2210,11 @@ defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
 let Predicates = [HasAVX512] in {
   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
+  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
+  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
   def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
-  def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
-  def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
+  def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
+  def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
 }
 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
           (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
@@ -2004,11 +2225,19 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
           (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
 
+def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 0))),
+          (v32i1 (COPY_TO_REGCLASS VK64:$src, VK32))>;
+
+def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))),
+          (v32i1 (COPY_TO_REGCLASS (KSHIFTRQri VK64:$src, (i8 32)), VK32))>;
+
 let Predicates = [HasVLX] in {
   def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))),
             (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
   def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
             (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
+  def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
+            (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>;
   def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
             (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
   def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
@@ -2016,181 +2245,201 @@ let Predicates = [HasVLX] in {
 }
 
 def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
-          (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
+          (v8i1 (COPY_TO_REGCLASS
+                 (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16),
+                  (I8Imm $imm)), VK8))>, Requires<[HasAVX512, NoDQI]>;
 
 def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
-          (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
+          (v8i1 (COPY_TO_REGCLASS
+                 (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16),
+                  (I8Imm $imm)), VK8))>, Requires<[HasAVX512, NoDQI]>;
+
+def : Pat<(v4i1 (X86vshli VK4:$src, (i8 imm:$imm))),
+          (v4i1 (COPY_TO_REGCLASS
+                 (KSHIFTLWri (COPY_TO_REGCLASS VK4:$src, VK16),
+                  (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>;
+
+def : Pat<(v4i1 (X86vsrli VK4:$src, (i8 imm:$imm))),
+          (v4i1 (COPY_TO_REGCLASS
+                 (KSHIFTRWri (COPY_TO_REGCLASS VK4:$src, VK16),
+                  (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>;
+
 //===----------------------------------------------------------------------===//
 // AVX-512 - Aligned and unaligned load and store
 //
 
-multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
-                       RegisterClass KRC, RegisterClass RC,
-                       ValueType vt, ValueType zvt, X86MemOperand memop,
-                       Domain d, bit IsReMaterializable = 1> {
-let hasSideEffects = 0 in {
-  def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+
+multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                         PatFrag ld_frag, PatFrag mload,
+                         bit IsReMaterializable = 1> {
+  let hasSideEffects = 0 in {
+  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
-                    d>, EVEX;
-  def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
+                    _.ExeDomain>, EVEX;
+  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
+                      (ins _.KRCWM:$mask,  _.RC:$src),
                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
-                       "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
-  }
+                       "${dst} {${mask}} {z}, $src}"), [], _.ExeDomain>,
+                       EVEX, EVEX_KZ;
+
   let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
       SchedRW = [WriteLoad] in
-  def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
+  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
-                    d>, EVEX;
-
-  let AddedComplexity = 20 in {
-  let Constraints = "$src0 = $dst",  hasSideEffects = 0 in {
-  let hasSideEffects = 0 in
-    def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
-                     (ins RC:$src0, KRC:$mask, RC:$src1),
-                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
-                      "${dst} {${mask}}, $src1}"),
-                     [(set RC:$dst, (vt (vselect KRC:$mask,
-                                          (vt RC:$src1),
-                                          (vt RC:$src0))))],
-                     d>, EVEX, EVEX_K;
+                    [(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))],
+                    _.ExeDomain>, EVEX;
+
+  let Constraints = "$src0 = $dst" in {
+  def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
+                    (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
+                    !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
+                    "${dst} {${mask}}, $src1}"),
+                    [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
+                                        (_.VT _.RC:$src1),
+                                        (_.VT _.RC:$src0))))], _.ExeDomain>,
+                     EVEX, EVEX_K;
   let mayLoad = 1, SchedRW = [WriteLoad] in
-    def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
-                     (ins RC:$src0, KRC:$mask, memop:$src1),
+    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
+                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
                       "${dst} {${mask}}, $src1}"),
-                     [(set RC:$dst, (vt
-                         (vselect KRC:$mask,
-                                 (vt (bitconvert (ld_frag addr:$src1))),
-                                 (vt RC:$src0))))],
-                     d>, EVEX, EVEX_K;
+                     [(set _.RC:$dst, (_.VT
+                         (vselect _.KRCWM:$mask,
+                          (_.VT (bitconvert (ld_frag addr:$src1))),
+                           (_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K;
   }
   let mayLoad = 1, SchedRW = [WriteLoad] in
-    def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
-                      (ins KRC:$mask, memop:$src),
-                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
-                       "${dst} {${mask}} {z}, $src}"),
-                      [(set RC:$dst, (vt
-                           (vselect KRC:$mask,
-                                     (vt (bitconvert (ld_frag addr:$src))),
-                                     (vt (bitconvert (zvt immAllZerosV))))))],
-                      d>, EVEX, EVEX_KZ;
+  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
+                  (ins _.KRCWM:$mask, _.MemOp:$src),
+                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
+                                "${dst} {${mask}} {z}, $src}",
+                  [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
+                    (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
+                  _.ExeDomain>, EVEX, EVEX_KZ;
   }
+  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
+            (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
+
+  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
+            (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
+
+  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
+            (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
+             _.KRCWM:$mask, addr:$ptr)>;
 }
 
-multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
-                          string elty, string elsz, string vsz512,
-                          string vsz256, string vsz128, Domain d,
-                          Predicate prd, bit IsReMaterializable = 1> {
+multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
+                                  AVX512VLVectorVTInfo _,
+                                  Predicate prd,
+                                  bit IsReMaterializable = 1> {
   let Predicates = [prd] in
-  defm Z : avx512_load<opc, OpcodeStr,
-                       !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
-                       !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
-                       !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
-                       !cast<X86MemOperand>(elty##"512mem"), d,
-                       IsReMaterializable>, EVEX_V512;
+  defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.AlignedLdFrag,
+                       masked_load_aligned512, IsReMaterializable>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_load<opc, OpcodeStr,
-                       !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
-                             "v"##vsz256##elty##elsz, "v4i64")),
-                       !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
-                       !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
-                       !cast<X86MemOperand>(elty##"256mem"), d,
-                       IsReMaterializable>, EVEX_V256;
-
-    defm Z128 : avx512_load<opc, OpcodeStr,
-                       !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
-                             "v"##vsz128##elty##elsz, "v2i64")),
-                       !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
-                       !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
-                       !cast<X86MemOperand>(elty##"128mem"), d,
-                       IsReMaterializable>, EVEX_V128;
+  defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.AlignedLdFrag,
+                          masked_load_aligned256, IsReMaterializable>, EVEX_V256;
+  defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.AlignedLdFrag,
+                          masked_load_aligned128, IsReMaterializable>, EVEX_V128;
   }
 }
 
+multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
+                                  AVX512VLVectorVTInfo _,
+                                  Predicate prd,
+                                  bit IsReMaterializable = 1> {
+  let Predicates = [prd] in
+  defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag,
+                       masked_load_unaligned, IsReMaterializable>, EVEX_V512;
 
-multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
-                        ValueType OpVT, RegisterClass KRC, RegisterClass RC,
-                        X86MemOperand memop, Domain d> {
-  let isAsmParserOnly = 1, hasSideEffects = 0 in {
-  def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
-              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
-              EVEX;
+  let Predicates = [prd, HasVLX] in {
+  defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag,
+                         masked_load_unaligned, IsReMaterializable>, EVEX_V256;
+  defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag,
+                         masked_load_unaligned, IsReMaterializable>, EVEX_V128;
+  }
+}
+
+multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                        PatFrag st_frag, PatFrag mstore> {
+  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
+  def rr_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
+                        OpcodeStr # "\t{$src, $dst|$dst, $src}", [],
+                        _.ExeDomain>, EVEX;
   let Constraints = "$src1 = $dst" in
-  def rrk_alt : AVX512PI<opc, MRMDestReg, (outs  RC:$dst),
-                                          (ins RC:$src1, KRC:$mask, RC:$src2),
-              !strconcat(OpcodeStr,
-              "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
-              EVEX, EVEX_K;
-  def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs  RC:$dst),
-                                           (ins KRC:$mask, RC:$src),
-              !strconcat(OpcodeStr,
-              "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
-              [], d>, EVEX, EVEX_KZ;
+  def rrk_alt : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
+                         (ins _.RC:$src1, _.KRCWM:$mask, _.RC:$src2),
+                         OpcodeStr #
+                         "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}",
+                         [], _.ExeDomain>,  EVEX, EVEX_K;
+  def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
+                          (ins _.KRCWM:$mask, _.RC:$src),
+                          OpcodeStr #
+                          "\t{$src, ${dst} {${mask}} {z}|" # 
+                          "${dst} {${mask}} {z}, $src}",
+                          [], _.ExeDomain>, EVEX, EVEX_KZ;
   }
   let mayStore = 1 in {
-  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
+  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
+                    [(st_frag (_.VT _.RC:$src), addr:$dst)], _.ExeDomain>, EVEX;
   def mrk : AVX512PI<opc, MRMDestMem, (outs),
-                                      (ins memop:$dst, KRC:$mask, RC:$src),
-              !strconcat(OpcodeStr,
-              "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
-               [], d>, EVEX, EVEX_K;
+                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
+              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
+               [], _.ExeDomain>, EVEX, EVEX_K;
   }
+
+  def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
+           (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
+                                                    _.KRCWM:$mask, _.RC:$src)>;
 }
 
 
-multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
-                           string st_suff_512, string st_suff_256,
-                           string st_suff_128, string elty, string elsz,
-                           string vsz512, string vsz256, string vsz128,
-                           Domain d, Predicate prd> {
+multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
+                            AVX512VLVectorVTInfo _, Predicate prd> {
   let Predicates = [prd] in
-  defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
-                        !cast<ValueType>("v"##vsz512##elty##elsz),
-                        !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
-                        !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
+  defm Z : avx512_store<opc, OpcodeStr, _.info512, store,
+                        masked_store_unaligned>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
-                             !cast<ValueType>("v"##vsz256##elty##elsz),
-                             !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
-                             !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
-
-    defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
-                             !cast<ValueType>("v"##vsz128##elty##elsz),
-                             !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
-                             !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
+    defm Z256 : avx512_store<opc, OpcodeStr, _.info256, store,
+                             masked_store_unaligned>, EVEX_V256;
+    defm Z128 : avx512_store<opc, OpcodeStr, _.info128, store,
+                             masked_store_unaligned>, EVEX_V128;
   }
 }
 
-defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
-                              "16", "8", "4", SSEPackedSingle, HasAVX512>,
-               avx512_store_vl<0x29, "vmovaps", "alignedstore",
-                               "512", "256", "", "f", "32", "16", "8", "4",
-                               SSEPackedSingle, HasAVX512>,
-                              PS, EVEX_CD8<32, CD8VF>;
+multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
+                                  AVX512VLVectorVTInfo _,  Predicate prd> {
+  let Predicates = [prd] in
+  defm Z : avx512_store<opc, OpcodeStr, _.info512, alignedstore512,
+                        masked_store_aligned512>, EVEX_V512;
+
+  let Predicates = [prd, HasVLX] in {
+    defm Z256 : avx512_store<opc, OpcodeStr, _.info256, alignedstore256,
+                             masked_store_aligned256>, EVEX_V256;
+    defm Z128 : avx512_store<opc, OpcodeStr, _.info128, alignedstore,
+                             masked_store_aligned128>, EVEX_V128;
+  }
+}
 
-defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
-                              "8", "4", "2", SSEPackedDouble, HasAVX512>,
-               avx512_store_vl<0x29, "vmovapd", "alignedstore",
-                               "512", "256", "", "f", "64", "8", "4", "2",
-                               SSEPackedDouble, HasAVX512>,
-                              PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
-                              "16", "8", "4", SSEPackedSingle, HasAVX512>,
-               avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
-                              "16", "8", "4", SSEPackedSingle, HasAVX512>,
+defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
+                                     HasAVX512>,
+               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
+                                      HasAVX512>,  PS, EVEX_CD8<32, CD8VF>;
+
+defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
+                                     HasAVX512>,
+               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
+                                     HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512>,
+               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512>,
                               PS, EVEX_CD8<32, CD8VF>;
 
-defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
-                              "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
-               avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
-                              "8", "4", "2", SSEPackedDouble, HasAVX512>,
-                             PD, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 0>,
+               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>,
+               PD, VEX_W, EVEX_CD8<64, CD8VF>;
 
 def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
                 (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
@@ -2200,6 +2449,22 @@ def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
                  (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
        (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
 
+def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
+                (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
+       (VMOVAPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
+
+def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
+                 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
+       (VMOVAPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
+
+def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
+                (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
+       (VMOVAPDZrm addr:$ptr)>;
+
+def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
+                 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
+       (VMOVAPSZrm addr:$ptr)>;
+
 def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
           GR16:$mask),
          (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
@@ -2209,6 +2474,16 @@ def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
          (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
             VR512:$src)>;
 
+def: Pat<(int_x86_avx512_mask_store_ps_512 addr:$ptr, (v16f32 VR512:$src),
+          GR16:$mask),
+         (VMOVAPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
+            VR512:$src)>;
+def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src),
+          GR8:$mask),
+         (VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
+            VR512:$src)>;
+
+let Predicates = [HasAVX512, NoVLX] in {
 def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)),
          (VMOVUPSZmrk addr:$ptr,
          (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
@@ -2218,73 +2493,36 @@ def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
          (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz 
           (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
 
-def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src)),
-         (VMOVUPSZmrk addr:$ptr, VK16WM:$mask, VR512:$src)>;
-
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src)),
-         (VMOVUPDZmrk addr:$ptr, VK8WM:$mask, VR512:$src)>;
-
-def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, undef)),
-         (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask,
-                              (bc_v16f32 (v16i32 immAllZerosV)))),
-         (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src0))),
-         (VMOVUPSZrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, undef)),
-         (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask,
-                             (bc_v8f64 (v16i32 immAllZerosV)))),
-         (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src0))),
-         (VMOVUPDZrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>;
-
 def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))),
          (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmk
          (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src0, sub_ymm),
           (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
+}
+
+defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
+                                       HasAVX512>,
+                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
+                                       HasAVX512>, PD, EVEX_CD8<32, CD8VF>;
 
-defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
-                                "16", "8", "4", SSEPackedInt, HasAVX512>,
-                 avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
-                                 "512", "256", "", "i", "32", "16", "8", "4",
-                                 SSEPackedInt, HasAVX512>,
-                                PD, EVEX_CD8<32, CD8VF>;
-
-defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
-                                "8", "4", "2", SSEPackedInt, HasAVX512>,
-                 avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
-                                 "512", "256", "", "i", "64", "8", "4", "2",
-                                 SSEPackedInt, HasAVX512>,
-                                PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
-                               "64", "32", "16", SSEPackedInt, HasBWI>,
-                 avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
-                                 "i", "8", "64", "32", "16", SSEPackedInt,
+defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
+                                       HasAVX512>,
+                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
+                                    HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>,
+                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
                                  HasBWI>, XD, EVEX_CD8<8, CD8VF>;
 
-defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
-                                "32", "16", "8", SSEPackedInt, HasBWI>,
-                 avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
-                                 "i", "16", "32", "16", "8", SSEPackedInt,
+defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>,
+                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
                                  HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
 
-defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
-                                "16", "8", "4", SSEPackedInt, HasAVX512>,
-                 avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
-                                 "i", "32", "16", "8", "4", SSEPackedInt,
+defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512>,
+                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
                                  HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
 
-defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
-                                "8", "4", "2", SSEPackedInt, HasAVX512>,
-                 avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
-                                 "i", "64", "8", "4", "2", SSEPackedInt,
+defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512>,
+                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
                                  HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
 
 def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
@@ -2322,37 +2560,8 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
                            (v16i32 VR512:$src))),
                   (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
 }
-
-def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 immAllZerosV))),
-         (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, undef)),
-         (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src0))),
-         (VMOVDQU32Zrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask,
-                             (bc_v8i64 (v16i32 immAllZerosV)))),
-         (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, undef)),
-         (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src0))),
-         (VMOVDQU64Zrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src)),
-         (VMOVDQU32Zmrk addr:$ptr, VK16WM:$mask, VR512:$src)>;
-
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src)),
-         (VMOVDQU64Zmrk addr:$ptr, VK8WM:$mask, VR512:$src)>;
-
-// SKX replacement
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
-         (VMOVDQU32Z256mrk addr:$ptr, VK8WM:$mask, VR256:$src)>;
-
-// KNL replacement
+// NoVLX patterns
+let Predicates = [HasAVX512, NoVLX] in {
 def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
          (VMOVDQU32Zmrk addr:$ptr,
          (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
@@ -2361,7 +2570,7 @@ def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
 def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
          (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz 
           (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-
+}
 
 // Move Int Doubleword to Packed Double Int
 //
@@ -2816,7 +3025,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
                     "$src2, $src1", "$src1, $src2",
                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
-                    "", itins.rr, IsCommutable>,
+                    itins.rr, IsCommutable>,
             AVX512BIBase, EVEX_4V;
 
   let mayLoad = 1 in
@@ -2825,7 +3034,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                     "$src2, $src1", "$src1, $src2",
                     (_.VT (OpNode _.RC:$src1,
                                   (bitconvert (_.LdFrag addr:$src2)))),
-                    "", itins.rm>,
+                    itins.rm>,
               AVX512BIBase, EVEX_4V;
 }
 
@@ -2841,7 +3050,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
                     (_.VT (OpNode _.RC:$src1,
                                   (X86VBroadcast
                                       (_.ScalarLdFrag addr:$src2)))),
-                    "", itins.rm>,
+                    itins.rm>,
                AVX512BIBase, EVEX_4V, EVEX_B;
 }
 
@@ -2934,60 +3143,36 @@ multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
                                     itins, HasBWI, IsCommutable>;
 }
 
-multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
-                            ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
-                            PatFrag memop_frag, X86MemOperand x86memop,
-                            PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
-                            string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
-  let isCommutable = IsCommutable in
-  {
-    def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
-       (ins RC:$src1, RC:$src2),
-       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-       []>, EVEX_4V;
-    def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
-               (ins KRC:$mask, RC:$src1, RC:$src2),
-               !strconcat(OpcodeStr,
-                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
-               [], itins.rr>, EVEX_4V, EVEX_K;
-    def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
-                (ins KRC:$mask, RC:$src1, RC:$src2),
-                !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}} {z}" ,
-                    "|$dst {${mask}} {z}, $src1, $src2}"),
-                [], itins.rr>, EVEX_4V, EVEX_KZ;
-  }
+multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
+                            SDNode OpNode,X86VectorVTInfo _Src, 
+                            X86VectorVTInfo _Dst, bit IsCommutable = 0> {
+  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 
+                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
+                            "$src2, $src1","$src1, $src2", 
+                            (_Dst.VT (OpNode 
+                                         (_Src.VT _Src.RC:$src1), 
+                                         (_Src.VT _Src.RC:$src2))),
+                            itins.rr, IsCommutable>, 
+                            AVX512BIBase, EVEX_4V;
   let mayLoad = 1 in {
-    def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-              (ins RC:$src1, x86memop:$src2),
-              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-              []>, EVEX_4V;
-    def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-               (ins KRC:$mask, RC:$src1, x86memop:$src2),
-               !strconcat(OpcodeStr,
-                   "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
-               [], itins.rm>, EVEX_4V, EVEX_K;
-    def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-                (ins KRC:$mask, RC:$src1, x86memop:$src2),
-                !strconcat(OpcodeStr,
-                    "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
-                [], itins.rm>, EVEX_4V, EVEX_KZ;
-    def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-               (ins RC:$src1, x86scalar_mop:$src2),
-               !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
-                          ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
-               [], itins.rm>, EVEX_4V, EVEX_B;
-    def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-                (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
-                !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
-                           ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
-                           BrdcstStr, "}"),
-                [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
-    def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-                 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
-                 !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
-                            ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
-                            BrdcstStr, "}"),
-                 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
+      defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+                            (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
+                            "$src2, $src1", "$src1, $src2",
+                            (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
+                                          (bitconvert (_Src.LdFrag addr:$src2)))),
+                            itins.rm>,
+                            AVX512BIBase, EVEX_4V;
+
+      defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+                        (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2), 
+                        OpcodeStr,
+                        "${src2}"##_Dst.BroadcastStr##", $src1",
+                         "$src1, ${src2}"##_Dst.BroadcastStr,
+                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 
+                                     (_Dst.VT (X86VBroadcast 
+                                              (_Dst.ScalarLdFrag addr:$src2)))))),
+                        itins.rm>,
+                        AVX512BIBase, EVEX_4V, EVEX_B;
   }
 }
 
@@ -2995,6 +3180,14 @@ defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
                                     SSE_INTALU_ITINS_P, 1>;
 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
                                     SSE_INTALU_ITINS_P, 0>;
+defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
+                                    SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
+                                    SSE_INTALU_ITINS_P, HasBWI, 0>;
+defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
+                                    SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
+                                    SSE_INTALU_ITINS_P, HasBWI, 0>;
 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul,
                                    SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
@@ -3002,24 +3195,97 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
                                    SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
 
-defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
-                   memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
-                   SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
-                   EVEX_CD8<64, CD8VF>, VEX_W;
+                                   
+multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
+                            SDNode OpNode, bit IsCommutable = 0> {
 
-defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
-                   memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
-                   SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
+  defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
+                                 v16i32_info, v8i64_info, IsCommutable>,
+                                EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
+  let Predicates = [HasVLX] in {
+    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
+                                      v8i32x_info, v4i64x_info, IsCommutable>,
+                                     EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
+    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
+                                      v4i32x_info, v2i64x_info, IsCommutable>,
+                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
+  }
+}                            
 
-def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
-          (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
+defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
+                   X86pmuldq, 1>,T8PD;
+defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
+                   X86pmuludq, 1>;
 
-def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
-           (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
-          (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
-def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
-           (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
-          (VPMULDQZrr VR512:$src1, VR512:$src2)>;
+multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst> {
+  let mayLoad = 1 in {
+      defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+                        (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 
+                        OpcodeStr,
+                        "${src2}"##_Src.BroadcastStr##", $src1",
+                         "$src1, ${src2}"##_Src.BroadcastStr,
+                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 
+                                     (_Src.VT (X86VBroadcast 
+                                              (_Src.ScalarLdFrag addr:$src2))))))>,
+                        EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>;
+  }
+}
+
+multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 
+                            SDNode OpNode,X86VectorVTInfo _Src, 
+                            X86VectorVTInfo _Dst> {
+  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 
+                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
+                            "$src2, $src1","$src1, $src2", 
+                            (_Dst.VT (OpNode 
+                                         (_Src.VT _Src.RC:$src1), 
+                                         (_Src.VT _Src.RC:$src2)))>,
+                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V;
+  let mayLoad = 1 in {
+    defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+                          (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
+                          "$src2, $src1", "$src1, $src2",
+                          (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
+                                        (bitconvert (_Src.LdFrag addr:$src2))))>,
+                           EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>;
+  }
+}
+
+multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
+                                    SDNode OpNode> {
+  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
+                                 v32i16_info>,
+                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
+                                 v32i16_info>, EVEX_V512;
+  let Predicates = [HasVLX] in {
+    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
+                                     v16i16x_info>,
+                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
+                                     v16i16x_info>, EVEX_V256;
+    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
+                                     v8i16x_info>,
+                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
+                                     v8i16x_info>, EVEX_V128;
+  }
+}
+multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
+                            SDNode OpNode> {
+  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
+                                v64i8_info>, EVEX_V512;
+  let Predicates = [HasVLX] in {
+    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
+                                    v32i8x_info>, EVEX_V256;
+    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
+                                    v16i8x_info>, EVEX_V128;
+  }
+}
+let Predicates = [HasBWI] in {
+  defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, PD;
+  defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, T8PD;
+  defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase, VEX_W;
+  defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W;
+}
 
 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
@@ -3094,16 +3360,16 @@ multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
                         d>, EVEX_4V;
 }
 
-defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
+defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, loadv8f64,
       VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
       SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
+defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, loadv8f64,
       VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
       SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
+defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, loadv8f64,
       VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
       SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
+defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, loadv8f64,
       VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
       SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
 
@@ -3123,16 +3389,16 @@ multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                      IIC_SSE_UNPCK>, EVEX_4V;
 }
 defm VPUNPCKLDQZ  : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
-                                VR512, memopv16i32, i512mem>, EVEX_V512,
+                                VR512, loadv16i32, i512mem>, EVEX_V512,
                                 EVEX_CD8<32, CD8VF>;
 defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
-                                VR512, memopv8i64, i512mem>, EVEX_V512,
+                                VR512, loadv8i64, i512mem>, EVEX_V512,
                                 VEX_W, EVEX_CD8<64, CD8VF>;
 defm VPUNPCKHDQZ  : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
-                                VR512, memopv16i32, i512mem>, EVEX_V512,
+                                VR512, loadv16i32, i512mem>, EVEX_V512,
                                 EVEX_CD8<32, CD8VF>;
 defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
-                                VR512, memopv8i64, i512mem>, EVEX_V512,
+                                VR512, loadv8i64, i512mem>, EVEX_V512,
                                 VEX_W, EVEX_CD8<64, CD8VF>;
 //===----------------------------------------------------------------------===//
 // AVX-512 - PSHUFD
@@ -3142,14 +3408,14 @@ multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
                          SDNode OpNode, PatFrag mem_frag,
                          X86MemOperand x86memop, ValueType OpVT> {
   def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
-                     (ins RC:$src1, i8imm:$src2),
+                     (ins RC:$src1, u8imm:$src2),
                      !strconcat(OpcodeStr,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set RC:$dst,
                        (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
                      EVEX;
   def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
-                     (ins x86memop:$src1, i8imm:$src2),
+                     (ins x86memop:$src1, u8imm:$src2),
                      !strconcat(OpcodeStr,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set RC:$dst,
@@ -3157,7 +3423,7 @@ multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
                               (i8 imm:$src2))))]>, EVEX;
 }
 
-defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
+defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, loadv16i32,
                       i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
 
 //===----------------------------------------------------------------------===//
@@ -3171,32 +3437,99 @@ defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
                                   SSE_INTALU_ITINS_P, HasAVX512, 1>;
 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
-                                  SSE_INTALU_ITINS_P, HasAVX512, 1>;
+                                  SSE_INTALU_ITINS_P, HasAVX512, 0>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512  FP arithmetic
 //===----------------------------------------------------------------------===//
+multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
+                         SDNode OpNode, SDNode VecNode, OpndItins itins,
+                         bit IsCommutable> {
 
-multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                  SizeItins itins> {
-  defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
-                             f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
-                             EVEX_CD8<32, CD8VT1>;
-  defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
-                             f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
-                             EVEX_CD8<64, CD8VT1>;
-}
+  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+                           "$src2, $src1", "$src1, $src2",
+                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                           (i32 FROUND_CURRENT)),
+                           itins.rr, IsCommutable>;
 
-let isCommutable = 1 in {
-defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
-defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
-defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
-defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
+  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
+                         "$src2, $src1", "$src1, $src2",
+                         (VecNode (_.VT _.RC:$src1),
+                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
+                           (i32 FROUND_CURRENT)),
+                         itins.rm, IsCommutable>;
+  let isCodeGenOnly = 1, isCommutable = IsCommutable,
+      Predicates = [HasAVX512] in {
+  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
+                         (ins _.FRC:$src1, _.FRC:$src2), 
+                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
+                          itins.rr>;
+  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
+                         (ins _.FRC:$src1, _.ScalarMemOp:$src2), 
+                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
+                         (_.ScalarLdFrag addr:$src2)))], itins.rr>;
+  }
 }
-let isCommutable = 0 in {
-defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
-defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
+
+multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
+                         SDNode VecNode, OpndItins itins, bit IsCommutable> {
+
+  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
+                          "$rc, $src2, $src1", "$src1, $src2, $rc",
+                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                          (i32 imm:$rc)), itins.rr, IsCommutable>,
+                          EVEX_B, EVEX_RC;
 }
+multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
+                         SDNode VecNode, OpndItins itins, bit IsCommutable> {
+
+  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
+                            (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                            (i32 FROUND_NO_EXC))>, EVEX_B;
+}
+
+multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                  SDNode VecNode,
+                                  SizeItins itins, bit IsCommutable> {
+  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
+                              itins.s, IsCommutable>,
+             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
+                              itins.s, IsCommutable>,
+                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
+  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
+                              itins.d,                  IsCommutable>,
+             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
+                              itins.d, IsCommutable>,
+                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+}
+
+multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                  SDNode VecNode,
+                                  SizeItins itins, bit IsCommutable> {
+  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
+                              itins.s, IsCommutable>,
+             avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, VecNode,
+                              itins.s, IsCommutable>,
+                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
+  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
+                              itins.d,                  IsCommutable>,
+             avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, VecNode,
+                              itins.d, IsCommutable>,
+                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+}
+defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>;
+defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_ALU_ITINS_S, 1>;
+defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>;
+defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_ALU_ITINS_S, 0>;
+defm VMIN : avx512_binop_s_sae  <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 1>;
+defm VMAX : avx512_binop_s_sae  <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 1>;
 
 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             X86VectorVTInfo _, bit IsCommutable> {
@@ -3219,7 +3552,26 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
   }//let mayLoad = 1
 }
 
-multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+                            X86VectorVTInfo _, bit IsCommutable> {
+  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
+                  "$rc, $src2, $src1", "$src1, $src2, $rc",
+                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
+                  EVEX_4V, EVEX_B, EVEX_RC;
+}
+
+
+multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+                            X86VectorVTInfo _, bit IsCommutable> {
+  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
+                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
+                  EVEX_4V, EVEX_B;
+}
+
+multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 
                              bit IsCommutable = 0> {
   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
                               IsCommutable>, EVEX_V512, PS,
@@ -3245,67 +3597,121 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
   }
 }
 
-defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>;
-defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>;
-defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>;
-defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>;
-defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>;
-defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>;
-
-def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
-                   (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
-                   (i16 -1), FROUND_CURRENT)),
-          (VMAXPSZrr VR512:$src1, VR512:$src2)>;
-
-def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
-                   (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
-                   (i8 -1), FROUND_CURRENT)),
-          (VMAXPDZrr VR512:$src1, VR512:$src2)>;
-
-def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
-                   (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
-                   (i16 -1), FROUND_CURRENT)),
-          (VMINPSZrr VR512:$src1, VR512:$src2)>;
-
-def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
-                   (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
-                   (i8 -1), FROUND_CURRENT)),
-          (VMINPDZrr VR512:$src1, VR512:$src2)>;
+multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
+  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>,
+                              EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>,
+                              EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
+}
+
+multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
+  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>,
+                              EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>,
+                              EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
+}
+
+defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>,
+            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>;
+defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>,
+            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>;
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>, 
+            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>;
+defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>,
+            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
+defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>,
+            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd>;
+defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>,
+            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd>;
+let Predicates = [HasDQI] in {
+  defm VAND  : avx512_fp_binop_p<0x54, "vand", X86fand, 1>;
+  defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, 0>;
+  defm VOR   : avx512_fp_binop_p<0x56, "vor", X86for, 1>;
+  defm VXOR  : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>;
+}
+
 //===----------------------------------------------------------------------===//
 // AVX-512  VPTESTM instructions
 //===----------------------------------------------------------------------===//
 
-multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
-              RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
-              SDNode OpNode, ValueType vt> {
-  def rr : AVX512PI<opc, MRMSrcReg,
-             (outs KRC:$dst), (ins RC:$src1, RC:$src2),
-             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
-             SSEPackedInt>, EVEX_4V;
-  def rm : AVX512PI<opc, MRMSrcMem,
-             (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
-             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set KRC:$dst, (OpNode (vt RC:$src1),
-              (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
+multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            X86VectorVTInfo _> {
+  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
+                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+                      "$src2, $src1", "$src1, $src2",
+                   (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
+                    EVEX_4V;
+  let mayLoad = 1 in
+  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
+                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
+                       "$src2, $src1", "$src1, $src2",
+                   (OpNode (_.VT _.RC:$src1), 
+                    (_.VT (bitconvert (_.LdFrag addr:$src2))))>,
+                    EVEX_4V,
+                   EVEX_CD8<_.EltSize, CD8VF>;
 }
 
-defm VPTESTMDZ  : avx512_vptest<0x27, "vptestmd", VK16, VR512,  f512mem,
-                              memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
-                              EVEX_CD8<32, CD8VF>;
-defm VPTESTMQZ  : avx512_vptest<0x27, "vptestmq", VK8, VR512,  f512mem,
-                              memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W,
-                              EVEX_CD8<64, CD8VF>;
+multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            X86VectorVTInfo _> {
+  let mayLoad = 1 in
+  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
+                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
+                    "${src2}"##_.BroadcastStr##", $src1",
+                    "$src1, ${src2}"##_.BroadcastStr,
+                    (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
+                                                (_.ScalarLdFrag addr:$src2))))>,
+                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+}
+multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                  AVX512VLVectorVTInfo _> {
+  let Predicates  = [HasAVX512] in
+  defm Z : avx512_vptest<opc, OpcodeStr, OpNode, _.info512>,
+           avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
 
-let Predicates = [HasCDI] in {
-defm VPTESTNMDZ  : avx512_vptest<0x27, "vptestnmd", VK16, VR512,  f512mem,
-                              memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
-                              EVEX_CD8<32, CD8VF>;
-defm VPTESTNMQZ  : avx512_vptest<0x27, "vptestnmq", VK8, VR512,  f512mem,
-                              memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W,
-                              EVEX_CD8<64, CD8VF>;
+  let Predicates = [HasAVX512, HasVLX] in {
+  defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, _.info256>,
+              avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
+  defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, _.info128>,
+              avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
+  }
+}
+
+multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode,
+                                 avx512vl_i32_info>;
+  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode,
+                                 avx512vl_i64_info>, VEX_W;
+}
+
+multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
+                                 SDNode OpNode> {
+  let Predicates = [HasBWI] in {
+  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", OpNode, v32i16_info>,
+              EVEX_V512, VEX_W;
+  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", OpNode, v64i8_info>,
+              EVEX_V512;
+  }
+  let Predicates = [HasVLX, HasBWI] in {
+
+  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, v16i16x_info>,
+              EVEX_V256, VEX_W;
+  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, v8i16x_info>,
+              EVEX_V128, VEX_W;
+  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, v32i8x_info>,
+              EVEX_V256;
+  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, v16i8x_info>,
+              EVEX_V128;
+  }
 }
 
+multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
+                                   SDNode OpNode> :
+  avx512_vptest_wb <opc_wb, OpcodeStr, OpNode>,
+  avx512_vptest_dq<opc_dq, OpcodeStr, OpNode>;
+
+defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm>, T8PD;
+defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm>, T8XS;
+
 def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
                  (v16i32 VR512:$src2), (i16 -1))),
                  (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
@@ -3320,69 +3726,130 @@ def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
                          string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
-                   (ins _.RC:$src1, i8imm:$src2), OpcodeStr,
+                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
                       "$src2, $src1", "$src1, $src2",
                    (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
-                   " ",  SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V;
+                   SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V;
+  let mayLoad = 1 in
   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
-                   (ins _.MemOp:$src1, i8imm:$src2), OpcodeStr,
+                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
                        "$src2, $src1", "$src1, $src2",
-                   (_.VT (OpNode (_.MemOpFrag addr:$src1), (i8 imm:$src2))),
-                   " ",  SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V;
+                   (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+                          (i8 imm:$src2))),
+                   SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V;
+}
+
+multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
+                         string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
+  let mayLoad = 1 in
+  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
+                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
+      "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
+     (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
+     SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V, EVEX_B;
 }
 
 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                            ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
+                         ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
    // src2 is always 128-bit
   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
                       "$src2, $src1", "$src1, $src2",
                    (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
-                   " ",  SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
+                   SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
                        "$src2, $src1", "$src1, $src2",
-                   (_.VT (OpNode _.RC:$src1, (bc_frag (memopv2i64 addr:$src2)))),
-                   " ",  SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase, EVEX_4V;
+                   (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
+                   SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
+                   EVEX_4V;
 }
 
 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                  ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
-  defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag, _>, EVEX_V512;
+                                  ValueType SrcVT, PatFrag bc_frag,
+                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+  let Predicates = [prd] in
+  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
+                            VTInfo.info512>, EVEX_V512,
+                            EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
+  let Predicates = [prd, HasVLX] in {
+  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
+                            VTInfo.info256>, EVEX_V256,
+                            EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
+  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
+                            VTInfo.info128>, EVEX_V128,
+                            EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
+  }
 }
 
-multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, string OpcodeStr,
-                                 SDNode OpNode> {
+multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
+                              string OpcodeStr, SDNode OpNode> {
   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32,
-                                 v16i32_info>, EVEX_CD8<32, CD8VQ>;
+                                 avx512vl_i32_info, HasAVX512>;
   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64,
-                                 v8i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W;
+                                 avx512vl_i64_info, HasAVX512>, VEX_W;
+  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, v8i16, bc_v8i16,
+                                 avx512vl_i16_info, HasBWI>;
+}
+
+multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
+                                 string OpcodeStr, SDNode OpNode,
+                                 AVX512VLVectorVTInfo VTInfo> {
+  let Predicates = [HasAVX512] in
+  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+                              VTInfo.info512>,
+             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+                              VTInfo.info512>, EVEX_V512;
+  let Predicates = [HasAVX512, HasVLX] in {
+  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+                              VTInfo.info256>,
+             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+                              VTInfo.info256>, EVEX_V256;
+  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+                              VTInfo.info128>,
+             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 
+                              VTInfo.info128>, EVEX_V128;
+  }
 }
 
-defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
-                           v16i32_info>,
-                           EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
-                           v8i64_info>, EVEX_V512,
-                           EVEX_CD8<64, CD8VF>, VEX_W;
+multiclass avx512_shift_rmi_w<bits<8> opcw, 
+                                 Format ImmFormR, Format ImmFormM,
+                                 string OpcodeStr, SDNode OpNode> {
+  let Predicates = [HasBWI] in
+  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+                               v32i16_info>, EVEX_V512;
+  let Predicates = [HasVLX, HasBWI] in {
+  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+                               v16i16x_info>, EVEX_V256;
+  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+                               v8i16x_info>, EVEX_V128;
+  }
+}
 
-defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
-                           v16i32_info>, EVEX_V512,
-                           EVEX_CD8<32, CD8VF>;
-defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
-                           v8i64_info>, EVEX_V512,
-                           EVEX_CD8<64, CD8VF>, VEX_W;
+multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
+                                 Format ImmFormR, Format ImmFormM,
+                                 string OpcodeStr, SDNode OpNode> {
+  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
+                                 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
+                                 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
+}
+
+defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>,
+             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>;
+
+defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
+             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>;
+
+defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>,
+             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>;
 
-defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
-                           v16i32_info>,
-                           EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
-                           v8i64_info>, EVEX_V512,
-                           EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>;
+defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>;
 
-defm VPSLL : avx512_shift_types<0xF2, 0xF3, "vpsll", X86vshl>;
-defm VPSRA : avx512_shift_types<0xE2, 0xE2, "vpsra", X86vsra>;
-defm VPSRL : avx512_shift_types<0xD2, 0xD3, "vpsrl", X86vsrl>;
+defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
+defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
+defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>;
 
 //===-------------------------------------------------------------------===//
 // Variable Bit Shifts
@@ -3393,30 +3860,72 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                       "$src2, $src1", "$src1, $src2",
                    (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
-                   " ",  SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V;
+                   SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V;
+  let mayLoad = 1 in
   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
                        "$src2, $src1", "$src1, $src2",
-                   (_.VT (OpNode _.RC:$src1, (_.MemOpFrag addr:$src2))),
-                   " ",  SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V;
+                   (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))),
+                   SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
+                   EVEX_CD8<_.EltSize, CD8VF>;
 }
 
+multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            X86VectorVTInfo _> {
+  let mayLoad = 1 in
+  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
+                    "${src2}"##_.BroadcastStr##", $src1",
+                    "$src1, ${src2}"##_.BroadcastStr,
+                    (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+                                                (_.ScalarLdFrag addr:$src2))))),
+                    SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
+                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+}
 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                   AVX512VLVectorVTInfo _> {
-  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+  let Predicates  = [HasAVX512] in
+  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
+           avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+
+  let Predicates = [HasAVX512, HasVLX] in {
+  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
+              avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
+  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
+              avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
+  }
 }
 
 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
                                  SDNode OpNode> {
   defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode,
-                                 avx512vl_i32_info>, EVEX_CD8<32, CD8VQ>;
+                                 avx512vl_i32_info>;
   defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode,
-                                 avx512vl_i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W;
+                                 avx512vl_i64_info>, VEX_W;
+}
+
+multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
+                                 SDNode OpNode> {
+  let Predicates = [HasBWI] in
+  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, v32i16_info>,
+              EVEX_V512, VEX_W;
+  let Predicates = [HasVLX, HasBWI] in {
+
+  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, v16i16x_info>,
+              EVEX_V256, VEX_W;
+  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, v8i16x_info>,
+              EVEX_V128, VEX_W;
+  }
 }
 
-defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>;
-defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>;
-defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>;
+defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
+              avx512_var_shift_w<0x12, "vpsllvw", shl>;
+defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
+              avx512_var_shift_w<0x11, "vpsravw", sra>;
+defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
+              avx512_var_shift_w<0x10, "vpsrlvw", srl>;
+defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
+defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512 - MOVDDUP
@@ -3433,7 +3942,7 @@ def rm  : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
                       (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
 }
 
-defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
+defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, loadv8f64>,
                  VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
 def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
           (VMOVDDUPZrm addr:$src)>;
@@ -3454,17 +3963,17 @@ multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
 }
 
 defm VMOVSHDUPZ  : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
-                       v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
+                       v16f32, VR512, loadv16f32, f512mem>, EVEX_V512,
                        EVEX_CD8<32, CD8VF>;
 defm VMOVSLDUPZ  : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
-                       v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
+                       v16f32, VR512, loadv16f32, f512mem>, EVEX_V512,
                        EVEX_CD8<32, CD8VF>;
 
 def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
-def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
+def : Pat<(v16i32 (X86Movshdup (loadv16i32 addr:$src))),
            (VMOVSHDUPZrm addr:$src)>;
 def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
-def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
+def : Pat<(v16i32 (X86Movsldup (loadv16i32 addr:$src))),
            (VMOVSLDUPZrm addr:$src)>;
 
 //===----------------------------------------------------------------------===//
@@ -3516,28 +4025,51 @@ multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
 
   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
               (ins _.RC:$src2, _.ScalarMemOp:$src3),
-              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ),
-              (OpNode _.RC:$src1, _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,	
+              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
+              !strconcat("$src2, ${src3}", _.BroadcastStr ),
+              (OpNode _.RC:$src1,
+               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,	
               AVX512FMA3Base, EVEX_B;
  }
 } // Constraints = "$src1 = $dst"
 
+let Constraints = "$src1 = $dst" in {
+// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
+multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr,
+                                 X86VectorVTInfo _,
+                                 SDPatternOperator OpNode> {
+   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
+          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
+          (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
+          AVX512FMA3Base, EVEX_B, EVEX_RC;
+ }
+} // Constraints = "$src1 = $dst"
+
+multiclass avx512_fma3_round_forms<bits<8> opc213, string OpcodeStr,
+                              X86VectorVTInfo VTI, SDPatternOperator OpNode> {
+  defm v213r : avx512_fma3_round_rrb<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
+                              VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
+}
+
 multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
                               string OpcodeStr, X86VectorVTInfo VTI,
                               SDPatternOperator OpNode> {
   defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
                               VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
-
   defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
                               VTI>, EVEX_CD8<VTI.EltSize, CD8VF>;
 }
 
 multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
                               string OpcodeStr,
-                              SDPatternOperator OpNode> {
+                              SDPatternOperator OpNode,
+                              SDPatternOperator OpNodeRnd> {
 let ExeDomain = SSEPackedSingle in {
     defm NAME##PSZ      : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v16f32_info, OpNode>, EVEX_V512;
+                                             v16f32_info, OpNode>,
+                          avx512_fma3_round_forms<opc213, OpcodeStr,
+                                             v16f32_info, OpNodeRnd>, EVEX_V512;
     defm NAME##PSZ256   : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
                                              v8f32x_info, OpNode>, EVEX_V256;
     defm NAME##PSZ128   : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
@@ -3545,20 +4077,24 @@ let ExeDomain = SSEPackedSingle in {
   }
 let ExeDomain = SSEPackedDouble in {
     defm  NAME##PDZ     : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v8f64_info, OpNode>, EVEX_V512, VEX_W;
+                                             v8f64_info, OpNode>,
+                          avx512_fma3_round_forms<opc213, OpcodeStr, v8f64_info,
+                                                  OpNodeRnd>, EVEX_V512, VEX_W;
     defm  NAME##PDZ256  : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v4f64x_info, OpNode>, EVEX_V256, VEX_W;
+                                             v4f64x_info, OpNode>,
+                                             EVEX_V256, VEX_W;
     defm  NAME##PDZ128  : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v2f64x_info, OpNode>, EVEX_V128, VEX_W;
+                                             v2f64x_info, OpNode>,
+                                             EVEX_V128, VEX_W;
   }
 }
 
-defm VFMADD    : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd>;
-defm VFMSUB    : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub>;
-defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub>;
-defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd>;
-defm VFNMADD   : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd>;
-defm VFNMSUB   : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub>;
+defm VFMADD    : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB    : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD   : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB   : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
 
 let Constraints = "$src1 = $dst" in {
 multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3567,7 +4103,7 @@ multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
   def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
           (ins _.RC:$src1, _.RC:$src3, _.MemOp:$src2),
           !strconcat(OpcodeStr, "\t{$src2, $src3, $dst|$dst, $src3, $src2}"),
-          [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (_.MemOpFrag addr:$src2),
+          [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2),
                                                     _.RC:$src3)))]>;
    def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
            (ins _.RC:$src1, _.RC:$src3, _.ScalarMemOp:$src2),
@@ -3580,26 +4116,29 @@ multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
 }
 } // Constraints = "$src1 = $dst"
 
-
-multiclass avx512_fma3p_m132_f<bits<8> opc,
-                              string OpcodeStr,
-                              SDNode OpNode> {
+multiclass avx512_fma3p_m132_f<bits<8> opc, string OpcodeStr, SDNode OpNode> {
 
 let ExeDomain = SSEPackedSingle in {
     defm NAME##PSZ      : avx512_fma3p_m132<opc, OpcodeStr##ps,
-                                             OpNode,v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+                                             OpNode,v16f32_info>, EVEX_V512,
+                                             EVEX_CD8<32, CD8VF>;
     defm NAME##PSZ256   : avx512_fma3p_m132<opc, OpcodeStr##ps,
-                                             OpNode, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>;
+                                             OpNode, v8f32x_info>, EVEX_V256,
+                                             EVEX_CD8<32, CD8VF>;
     defm NAME##PSZ128   : avx512_fma3p_m132<opc, OpcodeStr##ps,
-                                             OpNode, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>;
+                                             OpNode, v4f32x_info>, EVEX_V128,
+                                             EVEX_CD8<32, CD8VF>;
   }
 let ExeDomain = SSEPackedDouble in {
     defm  NAME##PDZ       : avx512_fma3p_m132<opc, OpcodeStr##pd,
-                                           OpNode, v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VF>;
+                                           OpNode, v8f64_info>, EVEX_V512,
+                                           VEX_W, EVEX_CD8<32, CD8VF>;
     defm  NAME##PDZ256    : avx512_fma3p_m132<opc, OpcodeStr##pd,
-                                           OpNode, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<32, CD8VF>;
+                                           OpNode, v4f64x_info>, EVEX_V256,
+                                           VEX_W, EVEX_CD8<32, CD8VF>;
     defm  NAME##PDZ128    : avx512_fma3p_m132<opc, OpcodeStr##pd,
-                                           OpNode, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<32, CD8VF>;
+                                           OpNode, v2f64x_info>, EVEX_V128,
+                                           VEX_W, EVEX_CD8<32, CD8VF>;
   }
 }
 
@@ -3610,7 +4149,6 @@ defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>;
 defm VFNMADD132   : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>;
 defm VFNMSUB132   : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>;
 
-
 // Scalar FMA
 let Constraints = "$src1 = $dst" in {
 multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3633,7 +4171,6 @@ multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                      (OpVT (OpNode RC:$src2, RC:$src1,
                             (mem_frag addr:$src3))))]>;
 }
-
 } // Constraints = "$src1 = $dst"
 
 defm VFMADDSSZ  : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
@@ -3670,6 +4207,7 @@ let hasSideEffects = 0 in {
               EVEX_4V;
 } // hasSideEffects = 0
 }
+
 let Predicates = [HasAVX512] in {
 defm VCVTSI2SSZ   : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
                                   XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
@@ -3951,12 +4489,12 @@ let hasSideEffects = 0 in {
 }
 
 defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
-                                memopv8f64, f512mem, v8f32, v8f64,
+                                loadv8f64, f512mem, v8f32, v8f64,
                                 SSEPackedSingle>, EVEX_V512, VEX_W, PD,
                                 EVEX_CD8<64, CD8VF>;
 
 defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
-                                memopv4f64, f256mem, v8f64, v8f32,
+                                loadv4f64, f256mem, v8f64, v8f32,
                                 SSEPackedDouble>, EVEX_V512, PS,
                                 EVEX_CD8<32, CD8VH>;
 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
@@ -3975,27 +4513,27 @@ def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
 //===----------------------------------------------------------------------===//
 
 defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
-                                memopv8i64, i512mem, v16f32, v16i32,
+                                loadv8i64, i512mem, v16f32, v16i32,
                                 SSEPackedSingle>, EVEX_V512, PS,
                                 EVEX_CD8<32, CD8VF>;
 
 defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
-                                memopv4i64, i256mem, v8f64, v8i32,
+                                loadv4i64, i256mem, v8f64, v8i32,
                                 SSEPackedDouble>, EVEX_V512, XS,
                                 EVEX_CD8<32, CD8VH>;
 
 defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
-                                 memopv16f32, f512mem, v16i32, v16f32,
+                                 loadv16f32, f512mem, v16i32, v16f32,
                                  SSEPackedSingle>, EVEX_V512, XS,
                                  EVEX_CD8<32, CD8VF>;
 
 defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
-                                 memopv8f64, f512mem, v8i32, v8f64,
+                                 loadv8f64, f512mem, v8i32, v8f64,
                                  SSEPackedDouble>, EVEX_V512, PD, VEX_W,
                                  EVEX_CD8<64, CD8VF>;
 
 defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
-                                 memopv16f32, f512mem, v16i32, v16f32,
+                                 loadv16f32, f512mem, v16i32, v16f32,
                                  SSEPackedSingle>, EVEX_V512, PS,
                                  EVEX_CD8<32, CD8VF>;
 
@@ -4005,7 +4543,7 @@ def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
           (VCVTTPS2UDQZrr VR512:$src)>;
 
 defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
-                                 memopv8f64, f512mem, v8i32, v8f64,
+                                 loadv8f64, f512mem, v8i32, v8f64,
                                  SSEPackedDouble>, EVEX_V512, PS, VEX_W,
                                  EVEX_CD8<64, CD8VF>;
 
@@ -4015,12 +4553,12 @@ def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
           (VCVTTPD2UDQZrr VR512:$src)>;
 
 defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
-                                 memopv4i64, f256mem, v8f64, v8i32,
+                                 loadv4i64, f256mem, v8f64, v8i32,
                                  SSEPackedDouble>, EVEX_V512, XS,
                                  EVEX_CD8<32, CD8VH>;
 
 defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
-                                 memopv16i32, f512mem, v16f32, v16i32,
+                                 loadv16i32, f512mem, v16f32, v16i32,
                                  SSEPackedSingle>, EVEX_V512, XD,
                                  EVEX_CD8<32, CD8VF>;
 
@@ -4075,10 +4613,10 @@ let hasSideEffects = 0 in {
 }
 
 defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
-                                 memopv16f32, f512mem, SSEPackedSingle>, PD,
+                                 loadv16f32, f512mem, SSEPackedSingle>, PD,
                                  EVEX_V512, EVEX_CD8<32, CD8VF>;
 defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
-                                 memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
+                                 loadv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
                                  EVEX_V512, EVEX_CD8<64, CD8VF>;
 
 def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
@@ -4090,10 +4628,10 @@ def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
            (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
 
 defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
-                                 memopv16f32, f512mem, SSEPackedSingle>,
+                                 loadv16f32, f512mem, SSEPackedSingle>,
                                  PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
 defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
-                                 memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
+                                 loadv8f64, f512mem, SSEPackedDouble>, VEX_W,
                                  PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
 
 def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
@@ -4127,12 +4665,12 @@ multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
 multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
                              X86MemOperand x86memop> {
   def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
-               (ins srcRC:$src1, i32i8imm:$src2),
+               (ins srcRC:$src1, i32u8imm:$src2),
                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                []>, EVEX;
   let hasSideEffects = 0, mayStore = 1 in
   def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
-               (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
+               (ins x86memop:$dst, srcRC:$src1, i32u8imm:$src2),
                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
 }
 
@@ -4299,9 +4837,9 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
 
   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
-                            "$src2, $src1", "$src1, $src2",
+                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
                             (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
-                            (i32 FROUND_NO_EXC)), "{sae}">, EVEX_B;
+                            (i32 FROUND_NO_EXC))>, EVEX_B;
 
   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
@@ -4333,9 +4871,8 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
 
   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                         (ins _.RC:$src), OpcodeStr,
-                        "$src", "$src",
-                        (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)),
-                        "{sae}">, EVEX_B;
+                        "{sae}, $src", "$src, {sae}",
+                        (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
 
   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
@@ -4523,107 +5060,6 @@ let Predicates = [HasAVX512] in {
 }
 
 
-multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
-                            X86MemOperand x86memop, RegisterClass RC,
-                            PatFrag mem_frag32, PatFrag mem_frag64,
-                            Intrinsic V4F32Int, Intrinsic V2F64Int,
-                            CD8VForm VForm> {
-let ExeDomain = SSEPackedSingle in {
-  // Intrinsic operation, reg.
-  // Vector intrinsic operation, reg
-  def PSr : AVX512AIi8<opcps, MRMSrcReg,
-                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
-                    !strconcat(OpcodeStr,
-                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
-
-  // Vector intrinsic operation, mem
-  def PSm : AVX512AIi8<opcps, MRMSrcMem,
-                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
-                    !strconcat(OpcodeStr,
-                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    [(set RC:$dst,
-                          (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
-                    EVEX_CD8<32, VForm>;
-} // ExeDomain = SSEPackedSingle
-
-let ExeDomain = SSEPackedDouble in {
-  // Vector intrinsic operation, reg
-  def PDr : AVX512AIi8<opcpd, MRMSrcReg,
-                     (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
-                     !strconcat(OpcodeStr,
-                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                     [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
-
-  // Vector intrinsic operation, mem
-  def PDm : AVX512AIi8<opcpd, MRMSrcMem,
-                     (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
-                     !strconcat(OpcodeStr,
-                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                     [(set RC:$dst,
-                          (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
-                     EVEX_CD8<64, VForm>;
-} // ExeDomain = SSEPackedDouble
-}
-
-multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
-                            string OpcodeStr,
-                            Intrinsic F32Int,
-                            Intrinsic F64Int> {
-let ExeDomain = GenericDomain in {
-  // Operation, reg.
-  let hasSideEffects = 0 in
-  def SSr : AVX512AIi8<opcss, MRMSrcReg,
-      (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
-      !strconcat(OpcodeStr,
-              "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-      []>;
-
-  // Intrinsic operation, reg.
-  let isCodeGenOnly = 1 in
-  def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
-        (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
-        !strconcat(OpcodeStr,
-                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-        [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
-
-  // Intrinsic operation, mem.
-  def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
-                     (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
-                     !strconcat(OpcodeStr,
-                   "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-                     [(set VR128X:$dst, (F32Int VR128X:$src1,
-                                         sse_load_f32:$src2, imm:$src3))]>,
-                     EVEX_CD8<32, CD8VT1>;
-
-  // Operation, reg.
-  let hasSideEffects = 0 in
-  def SDr : AVX512AIi8<opcsd, MRMSrcReg,
-        (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
-        !strconcat(OpcodeStr,
-                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-        []>, VEX_W;
-
-  // Intrinsic operation, reg.
-  let isCodeGenOnly = 1 in
-  def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
-        (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
-        !strconcat(OpcodeStr,
-                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-        [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
-        VEX_W;
-
-  // Intrinsic operation, mem.
-  def SDm : AVX512AIi8<opcsd, MRMSrcMem,
-        (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
-        !strconcat(OpcodeStr,
-                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-        [(set VR128X:$dst,
-              (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
-        VEX_W, EVEX_CD8<64, CD8VT1>;
-} // ExeDomain = GenericDomain
-}
-
 multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
                             X86MemOperand x86memop, RegisterClass RC,
                             PatFrag mem_frag, Domain d> {
@@ -4631,23 +5067,22 @@ let ExeDomain = d in {
   // Intrinsic operation, reg.
   // Vector intrinsic operation, reg
   def r : AVX512AIi8<opc, MRMSrcReg,
-                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+                    (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
                     !strconcat(OpcodeStr,
                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                     []>, EVEX;
 
   // Vector intrinsic operation, mem
   def m : AVX512AIi8<opc, MRMSrcMem,
-                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+                    (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
                     !strconcat(OpcodeStr,
                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                     []>, EVEX;
 } // ExeDomain
 }
 
-
 defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
-                                memopv16f32, SSEPackedSingle>, EVEX_V512,
+                                loadv16f32, SSEPackedSingle>, EVEX_V512,
                                 EVEX_CD8<32, CD8VF>;
 
 def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
@@ -4657,7 +5092,7 @@ def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
 
 
 defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
-                                memopv8f64, SSEPackedDouble>, EVEX_V512,
+                                loadv8f64, SSEPackedDouble>, EVEX_V512,
                                 VEX_W, EVEX_CD8<64, CD8VF>;
 
 def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
@@ -4665,50 +5100,72 @@ def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
                   FROUND_CURRENT)),
                    (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
 
-multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
-                     Operand x86memop, RegisterClass RC, Domain d> {
-let ExeDomain = d in {
-  def r : AVX512AIi8<opc, MRMSrcReg,
-                    (outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
-                    !strconcat(OpcodeStr,
-                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    []>, EVEX_4V;
+multiclass
+avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
 
-  def m : AVX512AIi8<opc, MRMSrcMem,
-                    (outs RC:$dst), (ins RC:$src1, x86memop:$src2,  i32i8imm:$src3),
-                    !strconcat(OpcodeStr,
-                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    []>, EVEX_4V;
-} // ExeDomain
+  let ExeDomain = _.ExeDomain in {
+  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
+                           "$src3, $src2, $src1", "$src1, $src2, $src3",
+                           (_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                            (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
+
+  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
+                         "{sae}, $src3, $src2, $src1", "$src1, $src2, $src3, {sae}",
+                         (_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                         (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;
+
+  let mayLoad = 1 in
+  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), OpcodeStr,
+                         "$src3, $src2, $src1", "$src1, $src2, $src3",
+                         (_.VT (X86RndScale (_.VT _.RC:$src1),
+                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
+                          (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
+  }
+  let Predicates = [HasAVX512] in {
+  def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS
+             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
+             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x1))), _.FRC)>;
+  def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS
+             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
+             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x2))), _.FRC)>;
+  def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS
+             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
+             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x3))), _.FRC)>;
+  def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS
+             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
+             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>;
+  def : Pat<(fnearbyint _.FRC:$src), (COPY_TO_REGCLASS
+             (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
+             (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xc))), _.FRC)>;
+
+  def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
+             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
+             addr:$src, (i32 0x1))), _.FRC)>;
+  def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
+             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
+             addr:$src, (i32 0x2))), _.FRC)>;
+  def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
+             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
+             addr:$src, (i32 0x3))), _.FRC)>;
+  def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
+             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
+             addr:$src, (i32 0x4))), _.FRC)>;
+  def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
+             (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
+             addr:$src, (i32 0xc))), _.FRC)>;
+  }
 }
 
-defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
-                                SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
-
-defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
-                                SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
-
-def : Pat<(ffloor FR32X:$src),
-          (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
-def : Pat<(f64 (ffloor FR64X:$src)),
-          (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
-def : Pat<(f32 (fnearbyint FR32X:$src)),
-          (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
-def : Pat<(f64 (fnearbyint FR64X:$src)),
-          (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
-def : Pat<(f32 (fceil FR32X:$src)),
-          (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
-def : Pat<(f64 (fceil FR64X:$src)),
-          (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
-def : Pat<(f32 (frint FR32X:$src)),
-          (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
-def : Pat<(f64 (frint FR64X:$src)),
-          (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
-def : Pat<(f32 (ftrunc FR32X:$src)),
-          (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
-def : Pat<(f64 (ftrunc FR64X:$src)),
-          (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
+defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
+                                AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
 
+defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W,
+                                AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>;
+
+let Predicates = [HasAVX512] in {
 def : Pat<(v16f32 (ffloor VR512:$src)),
           (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
 def : Pat<(v16f32 (fnearbyint VR512:$src)),
@@ -4730,7 +5187,7 @@ def : Pat<(v8f64 (frint VR512:$src)),
           (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
 def : Pat<(v8f64 (ftrunc VR512:$src)),
           (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
-
+}
 //-------------------------------------------------
 // Integer truncate and extend operations
 //-------------------------------------------------
@@ -4812,151 +5269,224 @@ def : Pat<(v8i32  (X86vtruncm VK8WM:$mask,  (v8i64 VR512:$src))),
                   (VPMOVQDrrkz  VK8WM:$mask, VR512:$src)>;
 
 
-multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
-                      RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
-                      PatFrag mem_frag, X86MemOperand x86memop,
-                      ValueType OpVT, ValueType InVT> {
+multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
+                  X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
+                  X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
 
-  def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
-              (ins SrcRC:$src),
-              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
+  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
+                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
+                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
+                  EVEX;
 
-  def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
-              (ins KRC:$mask, SrcRC:$src),
-              !strconcat(OpcodeStr, "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
-              []>, EVEX, EVEX_K;
+  let mayLoad = 1 in {
+    defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
+                    (ins x86memop:$src), OpcodeStr ,"$src", "$src",
+                    (DestInfo.VT (LdFrag addr:$src))>,
+                  EVEX;
+  }
+}
 
-  def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
-              (ins KRC:$mask, SrcRC:$src),
-              !strconcat(OpcodeStr, "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
-              []>, EVEX, EVEX_KZ;
+multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr, SDNode OpNode,
+          string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
+  let Predicates = [HasVLX, HasBWI] in {
+    defm Z128:  avx512_extend_common<opc, OpcodeStr, v8i16x_info,
+                    v16i8x_info, i64mem, LdFrag, OpNode>,
+                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128;
 
-  let mayLoad = 1 in {
-    def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
-              (ins x86memop:$src),
-              !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst,
-                (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
-              EVEX;
-
-    def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
-              (ins KRC:$mask, x86memop:$src),
-              !strconcat(OpcodeStr,"\t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
-              []>,
-              EVEX, EVEX_K;
-
-    def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
-              (ins KRC:$mask, x86memop:$src),
-              !strconcat(OpcodeStr,"\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
-              []>,
-              EVEX, EVEX_KZ;
+    defm Z256:  avx512_extend_common<opc, OpcodeStr, v16i16x_info,
+                    v16i8x_info, i128mem, LdFrag, OpNode>,
+                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256;
+  }
+  let Predicates = [HasBWI] in {
+    defm Z   :  avx512_extend_common<opc, OpcodeStr, v32i16_info,
+                    v32i8x_info, i256mem, LdFrag, OpNode>,
+                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512;
+  }
+}
+
+multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr, SDNode OpNode,
+          string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
+  let Predicates = [HasVLX, HasAVX512] in {
+    defm Z128:  avx512_extend_common<opc, OpcodeStr, v4i32x_info,
+                   v16i8x_info, i32mem, LdFrag, OpNode>,
+                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128;
+
+    defm Z256:  avx512_extend_common<opc, OpcodeStr, v8i32x_info,
+                   v16i8x_info, i64mem, LdFrag, OpNode>,
+                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256;
+  }
+  let Predicates = [HasAVX512] in {
+    defm Z   :  avx512_extend_common<opc, OpcodeStr, v16i32_info,
+                   v16i8x_info, i128mem, LdFrag, OpNode>,
+                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512;
+  }
+}
+
+multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
+          string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
+  let Predicates = [HasVLX, HasAVX512] in {
+    defm Z128:  avx512_extend_common<opc, OpcodeStr, v2i64x_info,
+                   v16i8x_info, i16mem, LdFrag, OpNode>,
+                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128;
+
+    defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
+                   v16i8x_info, i32mem, LdFrag, OpNode>,
+                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256;
+  }
+  let Predicates = [HasAVX512] in {
+    defm Z   :  avx512_extend_common<opc, OpcodeStr, v8i64_info,
+                   v16i8x_info, i64mem, LdFrag, OpNode>,
+                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512;
+  }
+}
+
+multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr, SDNode OpNode,
+         string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
+  let Predicates = [HasVLX, HasAVX512] in {
+    defm Z128:  avx512_extend_common<opc, OpcodeStr, v4i32x_info,
+                   v8i16x_info, i64mem, LdFrag, OpNode>,
+                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128;
+
+    defm Z256:  avx512_extend_common<opc, OpcodeStr, v8i32x_info,
+                   v8i16x_info, i128mem, LdFrag, OpNode>,
+                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256;
+  }
+  let Predicates = [HasAVX512] in {
+    defm Z   :  avx512_extend_common<opc, OpcodeStr, v16i32_info,
+                   v16i16x_info, i256mem, LdFrag, OpNode>,
+                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512;
+  }
+}
+
+multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
+         string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
+  let Predicates = [HasVLX, HasAVX512] in {
+    defm Z128:  avx512_extend_common<opc, OpcodeStr, v2i64x_info,
+                   v8i16x_info, i32mem, LdFrag, OpNode>,
+                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128;
+
+    defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
+                   v8i16x_info, i64mem, LdFrag, OpNode>,
+                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256;
+  }
+  let Predicates = [HasAVX512] in {
+    defm Z   :  avx512_extend_common<opc, OpcodeStr, v8i64_info,
+                   v8i16x_info, i128mem, LdFrag, OpNode>,
+                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512;
   }
 }
 
-defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
-                             memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
-                             EVEX_CD8<8, CD8VQ>;
-defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
-                             memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
-                             EVEX_CD8<8, CD8VO>;
-defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
-                             memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
-                             EVEX_CD8<16, CD8VH>;
-defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
-                             memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
-                             EVEX_CD8<16, CD8VQ>;
-defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
-                             memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
-                             EVEX_CD8<32, CD8VH>;
-
-defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
-                             memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
-                             EVEX_CD8<8, CD8VQ>;
-defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
-                             memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
-                             EVEX_CD8<8, CD8VO>;
-defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
-                             memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
-                             EVEX_CD8<16, CD8VH>;
-defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
-                             memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
-                             EVEX_CD8<16, CD8VQ>;
-defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
-                             memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
-                             EVEX_CD8<32, CD8VH>;
+multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
+         string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
+
+  let Predicates = [HasVLX, HasAVX512] in {
+    defm Z128:  avx512_extend_common<opc, OpcodeStr, v2i64x_info,
+                   v4i32x_info, i64mem, LdFrag, OpNode>,
+                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
+
+    defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
+                   v4i32x_info, i128mem, LdFrag, OpNode>,
+                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
+  }
+  let Predicates = [HasAVX512] in {
+    defm Z   :  avx512_extend_common<opc, OpcodeStr, v8i64_info,
+                   v8i32x_info, i256mem, LdFrag, OpNode>,
+                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
+  }
+}
+
+defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">;
+defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">;
+defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">;
+defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">;
+defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">;
+defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">;
+
+
+defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">;
+defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">;
+defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">;
+defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">;
+defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">;
+defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
 
 //===----------------------------------------------------------------------===//
 // GATHER - SCATTER Operations
 
-multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
-                       RegisterClass RC, X86MemOperand memop> {
-let mayLoad = 1,
-  Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
-  def rm  : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
-            (ins RC:$src1, KRC:$mask, memop:$src2),
+multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                         X86MemOperand memop, PatFrag GatherNode> {
+  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
+  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
+            (ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
             !strconcat(OpcodeStr,
             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
-            []>, EVEX, EVEX_K;
+            [(set _.RC:$dst, _.KRCWM:$mask_wb,
+              (GatherNode  (_.VT _.RC:$src1), _.KRCWM:$mask,
+                     vectoraddr:$src2))]>, EVEX, EVEX_K,
+             EVEX_CD8<_.EltSize, CD8VT1>;
 }
 
 let ExeDomain = SSEPackedDouble in {
-defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
-                                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
-                                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", v8f64_info, vy64xmem,
+                                 mgatherv8i32>, EVEX_V512, VEX_W;
+defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", v8f64_info, vz64mem,
+                                 mgatherv8i64>, EVEX_V512, VEX_W;
 }
 
 let ExeDomain = SSEPackedSingle in {
-defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
-                                 EVEX_V512, EVEX_CD8<32, CD8VT1>;
-defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
-                                 EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", v16f32_info, vz32mem,
+                                 mgatherv16i32>, EVEX_V512;
+defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", v8f32x_info, vz64mem,
+                                 mgatherv8i64>,  EVEX_V512;
 }
 
-defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512,  vy64xmem>,
-                                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
-                                 EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", v8i64_info,  vy64xmem,
+                                 mgatherv8i32>, EVEX_V512, VEX_W;
+defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", v16i32_info, vz32mem,
+                                 mgatherv16i32>, EVEX_V512;
 
-defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512,  vz64mem>,
-                                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X,  vz64mem>,
-                                 EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", v8i64_info,  vz64mem,
+                                 mgatherv8i64>, EVEX_V512, VEX_W;
+defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", v8i32x_info,  vz64mem,
+                                 mgatherv8i64>, EVEX_V512;
+
+multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                          X86MemOperand memop, PatFrag ScatterNode> {
 
-multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
-                       RegisterClass RC, X86MemOperand memop> {
 let mayStore = 1, Constraints = "$mask = $mask_wb" in
-  def mr  : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
-            (ins memop:$dst, KRC:$mask, RC:$src2),
+
+  def mr  : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
+            (ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
             !strconcat(OpcodeStr,
-            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
-            []>, EVEX, EVEX_K;
+            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
+            [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
+                                     _.KRCWM:$mask,  vectoraddr:$dst))]>,
+            EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
 }
 
 let ExeDomain = SSEPackedDouble in {
-defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
-                                   EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
-                                   EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", v8f64_info, vy64xmem,
+                                   mscatterv8i32>, EVEX_V512, VEX_W;
+defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", v8f64_info, vz64mem,
+                                   mscatterv8i64>, EVEX_V512, VEX_W;
 }
 
 let ExeDomain = SSEPackedSingle in {
-defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
-                                   EVEX_V512, EVEX_CD8<32, CD8VT1>;
-defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
-                                   EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", v16f32_info, vz32mem,
+                                   mscatterv16i32>, EVEX_V512;
+defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", v8f32x_info, vz64mem,
+                                   mscatterv8i64>, EVEX_V512;
 }
 
-defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
-                                   EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
-                                   EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", v8i64_info, vy64xmem,
+                                   mscatterv8i32>, EVEX_V512, VEX_W;
+defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", v16i32_info, vz32mem,
+                                   mscatterv16i32>, EVEX_V512;
 
-defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
-                                  EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
-                                  EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", v8i64_info, vz64mem,
+                                   mscatterv8i64>, EVEX_V512, VEX_W;
+defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", v8i32x_info, vz64mem,
+                                   mscatterv8i64>, EVEX_V512;
 
 // prefetch
 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
@@ -5021,14 +5551,14 @@ multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
                       ValueType vt, string OpcodeStr, PatFrag mem_frag,
                       Domain d> {
   def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
-                   (ins RC:$src1, x86memop:$src2, i8imm:$src3),
+                   (ins RC:$src1, x86memop:$src2, u8imm:$src3),
                    !strconcat(OpcodeStr,
                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                    [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
                                        (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
                    EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
   def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
-                   (ins RC:$src1, RC:$src2, i8imm:$src3),
+                   (ins RC:$src1, RC:$src2, u8imm:$src3),
                    !strconcat(OpcodeStr,
                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                    [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
@@ -5036,26 +5566,26 @@ multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
                    EVEX_4V, Sched<[WriteShuffle]>;
 }
 
-defm VSHUFPSZ  : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
+defm VSHUFPSZ  : avx512_shufp<VR512, f512mem, v16f32, "vshufps", loadv16f32,
                   SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VSHUFPDZ  : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
+defm VSHUFPDZ  : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", loadv8f64,
                   SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
 
 def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
           (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
 def : Pat<(v16i32 (X86Shufp VR512:$src1,
-                    (memopv16i32 addr:$src2), (i8 imm:$imm))),
+                    (loadv16i32 addr:$src2), (i8 imm:$imm))),
           (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
 
 def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
           (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
 def : Pat<(v8i64 (X86Shufp VR512:$src1,
-                            (memopv8i64 addr:$src2), (i8 imm:$imm))),
+                            (loadv8i64 addr:$src2), (i8 imm:$imm))),
           (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
 
 multiclass avx512_valign<X86VectorVTInfo _> {
   defm rri : AVX512_maskable<0x03, MRMSrcReg, _, (outs _.RC:$dst),
-                     (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
+                     (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
                      "valign"##_.Suffix,
                      "$src3, $src2, $src1", "$src1, $src2, $src3",
                      (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
@@ -5068,7 +5598,7 @@ multiclass avx512_valign<X86VectorVTInfo _> {
 
   let mayLoad = 1 in
   def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
-                     (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
+                     (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
                      !strconcat("valign"##_.Suffix,
                      "\t{$src3, $src2, $src1, $dst|"
                          "$dst, $src1, $src2, $src3}"),
@@ -5156,14 +5686,17 @@ multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
                         RegisterClass RC, RegisterClass KRC,
                         X86MemOperand x86memop,
                         X86MemOperand x86scalar_mop, string BrdcstStr> {
+  let hasSideEffects = 0 in {
   def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
        (ins RC:$src),
        !strconcat(OpcodeStr, "\t{$src, ${dst} |${dst}, $src}"),
        []>, EVEX;
+  let mayLoad = 1 in
   def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
        (ins x86memop:$src),
        !strconcat(OpcodeStr, "\t{$src, ${dst}|${dst}, $src}"),
        []>, EVEX;
+  let mayLoad = 1 in
   def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
        (ins x86scalar_mop:$src),
        !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
@@ -5174,11 +5707,13 @@ multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
        !strconcat(OpcodeStr,
                   "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
        []>, EVEX, EVEX_KZ;
+  let mayLoad = 1 in
   def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
        (ins KRC:$mask, x86memop:$src),
        !strconcat(OpcodeStr,
                   "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
        []>, EVEX, EVEX_KZ;
+  let mayLoad = 1 in
   def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
        (ins KRC:$mask, x86scalar_mop:$src),
        !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
@@ -5192,17 +5727,20 @@ multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
        !strconcat(OpcodeStr,
                   "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
        []>, EVEX, EVEX_K;
+  let mayLoad = 1 in
   def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, KRC:$mask, x86memop:$src2),
        !strconcat(OpcodeStr,
                   "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
        []>, EVEX, EVEX_K;
+  let mayLoad = 1 in
   def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
        !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
                   ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
        []>, EVEX, EVEX_K, EVEX_B;
-   }
+  }
+  }
 }
 
 let Predicates = [HasCDI] in {
@@ -5249,11 +5787,11 @@ def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
           (VPLZCNTQrrk VR512:$src1,
            (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
 
-def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
+def : Pat<(v16i32 (ctlz (loadv16i32 addr:$src))),
           (VPLZCNTDrm addr:$src)>;
 def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
           (VPLZCNTDrr VR512:$src)>;
-def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
+def : Pat<(v8i64 (ctlz (loadv8i64 addr:$src))),
           (VPLZCNTQrm addr:$src)>;
 def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
           (VPLZCNTQrr VR512:$src)>;
@@ -5263,7 +5801,14 @@ def : Pat<(store (i1  1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
 def : Pat<(store (i1  0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
 
 def : Pat<(store VK1:$src, addr:$dst),
-          (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
+          (MOV8mr addr:$dst,
+           (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
+            sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
+
+def : Pat<(store VK8:$src, addr:$dst),
+          (MOV8mr addr:$dst,
+           (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
+            sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
 
 def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
                            (truncstore node:$val, node:$ptr), [{
@@ -5274,7 +5819,7 @@ def : Pat<(truncstorei1 GR8:$src, addr:$dst),
           (MOV8mr addr:$dst, GR8:$src)>;
 
 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
-def rr : AVX512XS8I<opc, MRMDestReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
+def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
                   !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
                   [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
 }
@@ -5303,6 +5848,35 @@ multiclass avx512_convert_mask_to_vector<string OpcodeStr> {
 
 defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
 
+multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
+def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
+                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                  [(set _.KRC:$dst, (trunc (_.VT _.RC:$src)))]>, EVEX;
+}
+
+multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
+                        AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+let Predicates = [prd] in
+  defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
+   EVEX_V512;
+
+  let Predicates = [prd, HasVLX] in {
+    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
+     EVEX_V256;
+    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
+     EVEX_V128;
+  }
+}
+
+defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
+                                              avx512vl_i8_info, HasBWI>;
+defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
+                                              avx512vl_i16_info, HasBWI>, VEX_W;
+defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
+                                              avx512vl_i32_info, HasDQI>;
+defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
+                                              avx512vl_i64_info, HasDQI>, VEX_W;
+
 //===----------------------------------------------------------------------===//
 // AVX-512 - COMPRESS and EXPAND
 //
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
index 78efc4d..5e19ad4 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1216,10 +1216,10 @@ def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
 let isCompare = 1 in {
   let Defs = [EFLAGS] in {
     let isCommutable = 1 in {
-      def TEST8rr  : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>;
-      def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>;
-      def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>;
-      def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>;
+      def TEST8rr  : BinOpRR_F<0x84, "test", Xi8 , X86testpat>;
+      def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat>;
+      def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat>;
+      def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat>;
     } // isCommutable
 
     def TEST8rm    : BinOpRM_F<0x84, "test", Xi8 , X86testpat>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
index 880c982..912a0fb 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -17,12 +17,12 @@
 
 def GetLo32XForm : SDNodeXForm<imm, [{
   // Transformation function: get the low 32 bits.
-  return getI32Imm((unsigned)N->getZExtValue());
+  return getI32Imm((unsigned)N->getZExtValue(), SDLoc(N));
 }]>;
 
 def GetLo8XForm : SDNodeXForm<imm, [{
   // Transformation function: get the low 8 bits.
-  return getI8Imm((uint8_t)N->getZExtValue());
+  return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
 }]>;
 
 
@@ -475,59 +475,54 @@ def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
 //===----------------------------------------------------------------------===//
 // Conditional Move Pseudo Instructions
 
-// X86 doesn't have 8-bit conditional moves. Use a customInserter to
-// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
-// however that requires promoting the operands, and can induce additional
-// i8 register pressure.
-let usesCustomInserter = 1, Uses = [EFLAGS] in {
-def CMOV_GR8 : I<0, Pseudo,
-                 (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
-                 "#CMOV_GR8 PSEUDO!",
-                 [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
-                                          imm:$cond, EFLAGS))]>;
-
-let Predicates = [NoCMov] in {
-def CMOV_GR32 : I<0, Pseudo,
-                    (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
-                    "#CMOV_GR32* PSEUDO!",
-                    [(set GR32:$dst,
-                      (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
-def CMOV_GR16 : I<0, Pseudo,
-                    (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
-                    "#CMOV_GR16* PSEUDO!",
-                    [(set GR16:$dst,
-                      (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
-} // Predicates = [NoCMov]
-
-// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
-// SSE1.
-let Predicates = [FPStackf32] in
-def CMOV_RFP32 : I<0, Pseudo,
-                    (outs RFP32:$dst),
-                    (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
-                    "#CMOV_RFP32 PSEUDO!",
-                    [(set RFP32:$dst,
-                      (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
-                                                  EFLAGS))]>;
-// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
-// SSE2.
-let Predicates = [FPStackf64] in
-def CMOV_RFP64 : I<0, Pseudo,
-                    (outs RFP64:$dst),
-                    (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
-                    "#CMOV_RFP64 PSEUDO!",
-                    [(set RFP64:$dst,
-                      (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
-                                                  EFLAGS))]>;
-def CMOV_RFP80 : I<0, Pseudo,
-                    (outs RFP80:$dst),
-                    (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
-                    "#CMOV_RFP80 PSEUDO!",
-                    [(set RFP80:$dst,
-                      (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
-                                                  EFLAGS))]>;
-} // UsesCustomInserter = 1, Uses = [EFLAGS]
+// CMOV* - Used to implement the SELECT DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+multiclass CMOVrr_PSEUDO<RegisterClass RC, ValueType VT> {
+  def CMOV#NAME  : I<0, Pseudo,
+                    (outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond),
+                    "#CMOV_"#NAME#" PSEUDO!",
+                    [(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, imm:$cond,
+                                                EFLAGS)))]>;
+}
 
+let usesCustomInserter = 1, Uses = [EFLAGS] in {
+  // X86 doesn't have 8-bit conditional moves. Use a customInserter to
+  // emit control flow. An alternative to this is to mark i8 SELECT as Promote,
+  // however that requires promoting the operands, and can induce additional
+  // i8 register pressure.
+  defm _GR8 : CMOVrr_PSEUDO<GR8, i8>;
+
+  let Predicates = [NoCMov] in {
+    defm _GR32 : CMOVrr_PSEUDO<GR32, i32>;
+    defm _GR16 : CMOVrr_PSEUDO<GR16, i16>;
+  } // Predicates = [NoCMov]
+
+  // fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
+  // SSE1/SSE2.
+  let Predicates = [FPStackf32] in
+    defm _RFP32 : CMOVrr_PSEUDO<RFP32, f32>;
+
+  let Predicates = [FPStackf64] in
+    defm _RFP64 : CMOVrr_PSEUDO<RFP64, f64>;
+
+  defm _RFP80 : CMOVrr_PSEUDO<RFP80, f80>;
+
+  defm _FR32   : CMOVrr_PSEUDO<FR32, f32>;
+  defm _FR64   : CMOVrr_PSEUDO<FR64, f64>;
+  defm _V4F32  : CMOVrr_PSEUDO<VR128, v4f32>;
+  defm _V2F64  : CMOVrr_PSEUDO<VR128, v2f64>;
+  defm _V2I64  : CMOVrr_PSEUDO<VR128, v2i64>;
+  defm _V8F32  : CMOVrr_PSEUDO<VR256, v8f32>;
+  defm _V4F64  : CMOVrr_PSEUDO<VR256, v4f64>;
+  defm _V4I64  : CMOVrr_PSEUDO<VR256, v4i64>;
+  defm _V8I64  : CMOVrr_PSEUDO<VR512, v8i64>;
+  defm _V8F64  : CMOVrr_PSEUDO<VR512, v8f64>;
+  defm _V16F32 : CMOVrr_PSEUDO<VR512, v16f32>;
+  defm _V8I1   : CMOVrr_PSEUDO<VK8,  v8i1>;
+  defm _V16I1  : CMOVrr_PSEUDO<VK16, v16i1>;
+  defm _V32I1  : CMOVrr_PSEUDO<VK32, v32i1>;
+  defm _V64I1  : CMOVrr_PSEUDO<VK64, v64i1>;
+} // usesCustomInserter = 1, Uses = [EFLAGS]
 
 //===----------------------------------------------------------------------===//
 // Normal-Instructions-With-Lock-Prefix Pseudo Instructions
@@ -863,79 +858,6 @@ def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
 def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
                       "#ACQUIRE_MOV PSEUDO!",
                       [(set GR64:$dst, (atomic_load_64 addr:$src))]>;
-//===----------------------------------------------------------------------===//
-// Conditional Move Pseudo Instructions.
-//===----------------------------------------------------------------------===//
-
-// CMOV* - Used to implement the SSE SELECT DAG operation.  Expanded after
-// instruction selection into a branch sequence.
-let Uses = [EFLAGS], usesCustomInserter = 1 in {
-  def CMOV_FR32 : I<0, Pseudo,
-                    (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
-                    "#CMOV_FR32 PSEUDO!",
-                    [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
-                                                  EFLAGS))]>;
-  def CMOV_FR64 : I<0, Pseudo,
-                    (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
-                    "#CMOV_FR64 PSEUDO!",
-                    [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
-                                                  EFLAGS))]>;
-  def CMOV_V4F32 : I<0, Pseudo,
-                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
-                    "#CMOV_V4F32 PSEUDO!",
-                    [(set VR128:$dst,
-                      (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
-                                          EFLAGS)))]>;
-  def CMOV_V2F64 : I<0, Pseudo,
-                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
-                    "#CMOV_V2F64 PSEUDO!",
-                    [(set VR128:$dst,
-                      (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
-                                          EFLAGS)))]>;
-  def CMOV_V2I64 : I<0, Pseudo,
-                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
-                    "#CMOV_V2I64 PSEUDO!",
-                    [(set VR128:$dst,
-                      (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
-                                          EFLAGS)))]>;
-  def CMOV_V8F32 : I<0, Pseudo,
-                    (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
-                    "#CMOV_V8F32 PSEUDO!",
-                    [(set VR256:$dst,
-                      (v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond,
-                                          EFLAGS)))]>;
-  def CMOV_V4F64 : I<0, Pseudo,
-                    (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
-                    "#CMOV_V4F64 PSEUDO!",
-                    [(set VR256:$dst,
-                      (v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
-                                          EFLAGS)))]>;
-  def CMOV_V4I64 : I<0, Pseudo,
-                    (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond),
-                    "#CMOV_V4I64 PSEUDO!",
-                    [(set VR256:$dst,
-                      (v4i64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
-                                          EFLAGS)))]>;
-  def CMOV_V8I64 : I<0, Pseudo,
-                    (outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
-                    "#CMOV_V8I64 PSEUDO!",
-                    [(set VR512:$dst,
-                      (v8i64 (X86cmov VR512:$t, VR512:$f, imm:$cond,
-                                          EFLAGS)))]>;
-  def CMOV_V8F64 : I<0, Pseudo,
-                    (outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
-                    "#CMOV_V8F64 PSEUDO!",
-                    [(set VR512:$dst,
-                      (v8f64 (X86cmov VR512:$t, VR512:$f, imm:$cond,
-                                          EFLAGS)))]>;
-  def CMOV_V16F32 : I<0, Pseudo,
-                    (outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
-                    "#CMOV_V16F32 PSEUDO!",
-                    [(set VR512:$dst,
-                      (v16f32 (X86cmov VR512:$t, VR512:$f, imm:$cond,
-                                          EFLAGS)))]>;
-}
-
 
 //===----------------------------------------------------------------------===//
 // DAG Pattern Matching Rules
@@ -1065,12 +987,12 @@ def : Pat<(X86tcret (load addr:$dst), imm:$off),
           Requires<[Not64BitMode, IsNotPIC]>;
 
 def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
-          (TCRETURNdi texternalsym:$dst, imm:$off)>,
-          Requires<[Not64BitMode]>;
+          (TCRETURNdi tglobaladdr:$dst, imm:$off)>,
+          Requires<[NotLP64]>;
 
 def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
           (TCRETURNdi texternalsym:$dst, imm:$off)>,
-          Requires<[Not64BitMode]>;
+          Requires<[NotLP64]>;
 
 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
           (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
@@ -1084,11 +1006,11 @@ def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
 
 def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
           (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
-          Requires<[In64BitMode]>;
+          Requires<[IsLP64]>;
 
 def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
           (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
-          Requires<[In64BitMode]>;
+          Requires<[IsLP64]>;
 
 // Normal calls, with various flavors of addresses.
 def : Pat<(X86call (i32 tglobaladdr:$dst)),
@@ -1142,11 +1064,12 @@ defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
 defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
 
 // zextload bool -> zextload byte
-def : Pat<(zextloadi8i1  addr:$src), (MOV8rm     addr:$src)>;
-def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
-def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(zextloadi8i1  addr:$src), (AND8ri (MOV8rm addr:$src), (i8 1))>;
+def : Pat<(zextloadi16i1 addr:$src), (AND16ri (MOVZX16rm8 addr:$src), (i16 1))>;
+def : Pat<(zextloadi32i1 addr:$src), (AND32ri (MOVZX32rm8 addr:$src), (i32 1))>;
 def : Pat<(zextloadi64i1 addr:$src),
-          (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
+          (SUBREG_TO_REG (i64 0),
+           (AND32ri (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>;
 
 // extload bool -> extload byte
 // When extloading from 16-bit and smaller memory locations into 64-bit
@@ -1314,7 +1237,11 @@ def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
 // with implicit zero-extension instead of a 64-bit and if the immediate has at
 // least 32 bits of leading zeros. If in addition the last 32 bits can be
 // represented with a sign extension of a 8 bit constant, use that.
+// This can also reduce instruction size by eliminating the need for the REX
+// prefix.
 
+// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.
+let AddedComplexity = 1 in {
 def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
           (SUBREG_TO_REG
             (i64 0),
@@ -1330,8 +1257,13 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm),
               (EXTRACT_SUBREG GR64:$src, sub_32bit),
               (i32 (GetLo32XForm imm:$imm))),
             sub_32bit)>;
+} // AddedComplexity = 1
 
 
+// AddedComplexity is needed due to the increased complexity on the
+// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all
+// the MOVZX patterns keeps thems together in DAGIsel tables.
+let AddedComplexity = 1 in {
 // r & (2^16-1) ==> movz
 def : Pat<(and GR32:$src1, 0xffff),
           (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
@@ -1354,6 +1286,7 @@ def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
                          (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
                          sub_32bit)>;
 // r & (2^16-1) ==> movz
+let AddedComplexity = 1 in // Give priority over i64immZExt32.
 def : Pat<(and GR64:$src, 0xffff),
           (SUBREG_TO_REG (i64 0),
                       (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
@@ -1372,6 +1305,7 @@ def : Pat<(and GR16:$src1, 0xff),
            (EXTRACT_SUBREG (MOVZX32rr8 (i8
             (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
       Requires<[In64BitMode]>;
+} // AddedComplexity = 1
 
 
 // sext_inreg patterns
@@ -1563,8 +1497,12 @@ def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
 def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
 
 // Helper imms that check if a mask doesn't change significant shift bits.
-def immShift32 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 5; }]>;
-def immShift64 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 6; }]>;
+def immShift32 : ImmLeaf<i8, [{
+  return countTrailingOnes<uint64_t>(Imm) >= 5;
+}]>;
+def immShift64 : ImmLeaf<i8, [{
+  return countTrailingOnes<uint64_t>(Imm) >= 6;
+}]>;
 
 // Shift amount is implicitly masked.
 multiclass MaskedShiftAmountPats<SDNode frag, string name> {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td
index 7baff19..6ab961f 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrControl.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td
@@ -240,13 +240,13 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
   // mcinst.
   def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
                            (ins i32imm_pcrel:$dst),
-                           "jmp\t$dst  # TAILCALL",
+                           "jmp\t$dst",
                            [], IIC_JMP_REL>;
   def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
                    "", [], IIC_JMP_REG>;  // FIXME: Remove encoding when JIT is dead.
   let mayLoad = 1 in
   def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst),
-                   "jmp{l}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>;
+                   "jmp{l}\t{*}$dst", [], IIC_JMP_MEM>;
 }
 
 
@@ -278,18 +278,6 @@ let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
                        "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
 }
 
-let isCall = 1, isCodeGenOnly = 1 in
-  // __chkstk(MSVC):     clobber R10, R11 and EFLAGS
-  // ___chkstk_ms(Mingw64): clobber R10, R11 and EFLAGS
-  // ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
-  let Defs = [RAX, R10, R11, RSP, EFLAGS],
-      Uses = [RSP] in {
-    def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
-                      (outs), (ins i64i32imm_pcrel:$dst),
-                      "call{q}\t$dst", [], IIC_CALL_RI>,
-                    Requires<[IsWin64]>, Sched<[WriteJump]>;
-  }
-
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
     isCodeGenOnly = 1, Uses = [RSP], usesCustomInserter = 1,
     SchedRW = [WriteJump] in {
@@ -302,13 +290,25 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
   def TCRETURNmi64 : PseudoI<(outs),
                        (ins i64mem_TC:$dst, i32imm:$offset), []>;
 
-  def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
-                                      (ins i64i32imm_pcrel:$dst),
-                   "jmp\t$dst  # TAILCALL", [], IIC_JMP_REL>;
+  def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst),
+                   "jmp\t$dst", [], IIC_JMP_REL>;
   def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
-                     "jmp{q}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>;
+                     "jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
 
   let mayLoad = 1 in
   def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
-                     "jmp{q}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>;
+                     "jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
+
+  // Win64 wants jumps leaving the function to have a REX_W prefix.
+  let hasREX_WPrefix = 1 in {
+    def TAILJMPd64_REX : Ii32PCRel<0xE9, RawFrm, (outs),
+                                   (ins i64i32imm_pcrel:$dst),
+                                   "rex64 jmp\t$dst", [], IIC_JMP_REL>;
+    def TAILJMPr64_REX : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
+                           "rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
+
+    let mayLoad = 1 in
+    def TAILJMPm64_REX : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
+                           "rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
+  }
 }
diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
index eea4f17..c4b2d6d 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrExtension.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
@@ -139,11 +139,11 @@ def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
 def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
                     "movs{lq|xd}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>,
-                    Sched<[WriteALU]>;
+                    Sched<[WriteALU]>, Requires<[In64BitMode]>;
 def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
                     "movs{lq|xd}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>,
-                    Sched<[WriteALULd]>;
+                    Sched<[WriteALULd]>, Requires<[In64BitMode]>;
 
 // movzbq and movzwq encodings for the disassembler
 def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
index 2993e42..7cc3b59 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
@@ -183,19 +183,24 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
   defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "PD", IntF64, OpNode,
                         FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
 
+// These patterns use the 123 ordering, instead of 213, even though
+// they match the intrinsic to the 213 version of the instruction.
+// This is because src1 is tied to dest, and the scalar intrinsics
+// require the pass-through values to come from the first source
+// operand, not the second.
   def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
             (COPY_TO_REGCLASS
               (!cast<Instruction>(NAME#"SSr213r")
-                (COPY_TO_REGCLASS $src2, FR32),
                 (COPY_TO_REGCLASS $src1, FR32),
+                (COPY_TO_REGCLASS $src2, FR32),
                 (COPY_TO_REGCLASS $src3, FR32)),
               VR128)>;
 
   def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
             (COPY_TO_REGCLASS
               (!cast<Instruction>(NAME#"SDr213r")
-                (COPY_TO_REGCLASS $src2, FR64),
                 (COPY_TO_REGCLASS $src1, FR64),
+                (COPY_TO_REGCLASS $src2, FR64),
                 (COPY_TO_REGCLASS $src3, FR64)),
               VR128)>;
 }
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
index e1abf26..0dd05d8 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -350,21 +350,21 @@ defm CMOVNP : FPCMov<X86_COND_NP>;
 
 let Predicates = [HasCMov] in {
 // These are not factored because there's no clean way to pass DA/DB.
-def CMOVB_F  : FPI<0xDA, MRM0r, (outs RST:$op), (ins),
+def CMOVB_F  : FPI<0xDA, MRM0r, (outs), (ins RST:$op),
                   "fcmovb\t{$op, %st(0)|st(0), $op}">;
-def CMOVBE_F : FPI<0xDA, MRM2r, (outs RST:$op), (ins),
+def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RST:$op),
                   "fcmovbe\t{$op, %st(0)|st(0), $op}">;
-def CMOVE_F  : FPI<0xDA, MRM1r, (outs RST:$op), (ins),
+def CMOVE_F  : FPI<0xDA, MRM1r, (outs), (ins RST:$op),
                   "fcmove\t{$op, %st(0)|st(0), $op}">;
-def CMOVP_F  : FPI<0xDA, MRM3r, (outs RST:$op), (ins),
+def CMOVP_F  : FPI<0xDA, MRM3r, (outs), (ins RST:$op),
                   "fcmovu\t{$op, %st(0)|st(0), $op}">;
-def CMOVNB_F : FPI<0xDB, MRM0r, (outs RST:$op), (ins),
+def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RST:$op),
                   "fcmovnb\t{$op, %st(0)|st(0), $op}">;
-def CMOVNBE_F: FPI<0xDB, MRM2r, (outs RST:$op), (ins),
+def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RST:$op),
                   "fcmovnbe\t{$op, %st(0)|st(0), $op}">;
-def CMOVNE_F : FPI<0xDB, MRM1r, (outs RST:$op), (ins),
+def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RST:$op),
                   "fcmovne\t{$op, %st(0)|st(0), $op}">;
-def CMOVNP_F : FPI<0xDB, MRM3r, (outs RST:$op), (ins),
+def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RST:$op),
                   "fcmovnu\t{$op, %st(0)|st(0), $op}">;
 } // Predicates = [HasCMov]
 
@@ -636,12 +636,12 @@ def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>;
 def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
                "fxsave\t$dst", [], IIC_FXSAVE>, TB;
 def FXSAVE64 : RI<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
-                  "fxsave{q|64}\t$dst", [], IIC_FXSAVE>, TB,
+                  "fxsave64\t$dst", [], IIC_FXSAVE>, TB,
                   Requires<[In64BitMode]>;
 def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
                 "fxrstor\t$src", [], IIC_FXRSTOR>, TB;
 def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
-                  "fxrstor{q|64}\t$src", [], IIC_FXRSTOR>, TB,
+                  "fxrstor64\t$src", [], IIC_FXRSTOR>, TB,
                   Requires<[In64BitMode]>;
 } // SchedRW
 
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
index ba23878..331faf2 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
@@ -34,23 +34,27 @@ def MRM0m  : Format<24>; def MRM1m  : Format<25>; def MRM2m  : Format<26>;
 def MRM3m  : Format<27>; def MRM4m  : Format<28>; def MRM5m  : Format<29>;
 def MRM6m  : Format<30>; def MRM7m  : Format<31>;
 def MRM_C0 : Format<32>; def MRM_C1 : Format<33>; def MRM_C2 : Format<34>;
-def MRM_C3 : Format<35>; def MRM_C4 : Format<36>; def MRM_C8 : Format<37>;
-def MRM_C9 : Format<38>; def MRM_CA : Format<39>; def MRM_CB : Format<40>;
-def MRM_CF : Format<41>; def MRM_D0 : Format<42>; def MRM_D1 : Format<43>;
-def MRM_D4 : Format<44>; def MRM_D5 : Format<45>; def MRM_D6 : Format<46>;
-def MRM_D7 : Format<47>; def MRM_D8 : Format<48>; def MRM_D9 : Format<49>;
-def MRM_DA : Format<50>; def MRM_DB : Format<51>; def MRM_DC : Format<52>;
-def MRM_DD : Format<53>; def MRM_DE : Format<54>; def MRM_DF : Format<55>;
-def MRM_E0 : Format<56>; def MRM_E1 : Format<57>; def MRM_E2 : Format<58>;
-def MRM_E3 : Format<59>; def MRM_E4 : Format<60>; def MRM_E5 : Format<61>;
-def MRM_E8 : Format<62>; def MRM_E9 : Format<63>; def MRM_EA : Format<64>;
-def MRM_EB : Format<65>; def MRM_EC : Format<66>; def MRM_ED : Format<67>;
-def MRM_EE : Format<68>; def MRM_F0 : Format<69>; def MRM_F1 : Format<70>;
-def MRM_F2 : Format<71>; def MRM_F3 : Format<72>; def MRM_F4 : Format<73>;
-def MRM_F5 : Format<74>; def MRM_F6 : Format<75>; def MRM_F7 : Format<76>;
-def MRM_F8 : Format<77>; def MRM_F9 : Format<78>; def MRM_FA : Format<79>;
-def MRM_FB : Format<80>; def MRM_FC : Format<81>; def MRM_FD : Format<82>;
-def MRM_FE : Format<83>; def MRM_FF : Format<84>;
+def MRM_C3 : Format<35>; def MRM_C4 : Format<36>; def MRM_C5 : Format<37>;
+def MRM_C6 : Format<38>; def MRM_C7 : Format<39>; def MRM_C8 : Format<40>;
+def MRM_C9 : Format<41>; def MRM_CA : Format<42>; def MRM_CB : Format<43>;
+def MRM_CC : Format<44>; def MRM_CD : Format<45>; def MRM_CE : Format<46>;
+def MRM_CF : Format<47>; def MRM_D0 : Format<48>; def MRM_D1 : Format<49>;
+def MRM_D2 : Format<50>; def MRM_D3 : Format<51>; def MRM_D4 : Format<52>;
+def MRM_D5 : Format<53>; def MRM_D6 : Format<54>; def MRM_D7 : Format<55>;
+def MRM_D8 : Format<56>; def MRM_D9 : Format<57>; def MRM_DA : Format<58>;
+def MRM_DB : Format<59>; def MRM_DC : Format<60>; def MRM_DD : Format<61>;
+def MRM_DE : Format<62>; def MRM_DF : Format<63>; def MRM_E0 : Format<64>;
+def MRM_E1 : Format<65>; def MRM_E2 : Format<66>; def MRM_E3 : Format<67>;
+def MRM_E4 : Format<68>; def MRM_E5 : Format<69>; def MRM_E6 : Format<70>;
+def MRM_E7 : Format<71>; def MRM_E8 : Format<72>; def MRM_E9 : Format<73>;
+def MRM_EA : Format<74>; def MRM_EB : Format<75>; def MRM_EC : Format<76>;
+def MRM_ED : Format<77>; def MRM_EE : Format<78>; def MRM_EF : Format<79>;
+def MRM_F0 : Format<80>; def MRM_F1 : Format<81>; def MRM_F2 : Format<82>;
+def MRM_F3 : Format<83>; def MRM_F4 : Format<84>; def MRM_F5 : Format<85>;
+def MRM_F6 : Format<86>; def MRM_F7 : Format<87>; def MRM_F8 : Format<88>;
+def MRM_F9 : Format<89>; def MRM_FA : Format<90>; def MRM_FB : Format<91>;
+def MRM_FC : Format<92>; def MRM_FD : Format<93>; def MRM_FE : Format<94>;
+def MRM_FF : Format<95>;
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -422,8 +426,9 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
 
 // SI - SSE 1 & 2 scalar instructions
 class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
-         list<dag> pattern, InstrItinClass itin = NoItinerary>
-      : I<o, F, outs, ins, asm, pattern, itin> {
+         list<dag> pattern, InstrItinClass itin = NoItinerary,
+         Domain d = GenericDomain>
+      : I<o, F, outs, ins, asm, pattern, itin, d> {
   let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
                    !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
                    !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
@@ -437,12 +442,29 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
                   asm));
 }
 
-// SIi8 - SSE 1 & 2 scalar instructions
+// SI - SSE 1 & 2 scalar intrinsics - vex form available on AVX512
+class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
+         list<dag> pattern, InstrItinClass itin = NoItinerary,
+         Domain d = GenericDomain>
+      : I<o, F, outs, ins, asm, pattern, itin, d> {
+  let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+                   !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
+                   !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
+                   !if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
+                   !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
+                   [UseSSE1])))));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+                  !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+                  asm));
+}
+// SIi8 - SSE 1 & 2 scalar instructions - vex form available on AVX512
 class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin> {
   let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
-                   !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
+                   !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
                    !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
                    [UseSSE2])));
 
@@ -742,6 +764,14 @@ class AVX512BIi8Base : PD {
   Domain ExeDomain = SSEPackedInt;
   ImmType ImmT = Imm8;
 }
+class AVX512PSIi8Base : PS {
+  Domain ExeDomain = SSEPackedSingle;
+  ImmType ImmT = Imm8;
+}
+class AVX512PDIi8Base : PD {
+  Domain ExeDomain = SSEPackedDouble;
+  ImmType ImmT = Imm8;
+}
 class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
               list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 7069bd6..79d213c 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -12,10 +12,23 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
+// MMX specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+// Low word of MMX to GPR.
+def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
+                            [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
+// GPR to low word of MMX.
+def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1,
+                            [SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>;
+
+//===----------------------------------------------------------------------===//
 // MMX Pattern Fragments
 //===----------------------------------------------------------------------===//
 
 def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
+def load_mvmmx : PatFrag<(ops node:$ptr),
+                         (x86mmx (MMX_X86movw2d (load node:$ptr)))>;
 def bc_mmx  : PatFrag<(ops node:$in), (x86mmx  (bitconvert node:$in))>;
 
 //===----------------------------------------------------------------------===//
@@ -134,14 +147,21 @@ def X86pcmpeqm  : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>;
 def X86pcmpgtm  : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
 
 def X86CmpMaskCC :
-      SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>, SDTCisVec<1>,
-                           SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+      SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
+                       SDTCisVec<1>, SDTCisSameAs<2, 1>,
+                       SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>;
+def X86CmpMaskCCRound :
+      SDTypeProfile<1, 4, [SDTCisVec<0>,SDTCVecEltisVT<0, i1>,
+                       SDTCisVec<1>, SDTCisSameAs<2, 1>,
+                       SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>,
+                       SDTCisInt<4>]>;
 def X86CmpMaskCCScalar :
       SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
 
-def X86cmpm   : SDNode<"X86ISD::CMPM",    X86CmpMaskCC>;
-def X86cmpmu  : SDNode<"X86ISD::CMPMU",   X86CmpMaskCC>;
-def X86cmpms  : SDNode<"X86ISD::FSETCC",  X86CmpMaskCCScalar>;
+def X86cmpm    : SDNode<"X86ISD::CMPM",     X86CmpMaskCC>;
+def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
+def X86cmpmu   : SDNode<"X86ISD::CMPMU",    X86CmpMaskCC>;
+def X86cmpms   : SDNode<"X86ISD::FSETCC",   X86CmpMaskCCScalar>;
 
 def X86vshl    : SDNode<"X86ISD::VSHL",
                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
@@ -160,16 +180,21 @@ def X86vsrai   : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
 def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
                                           SDTCisVec<1>,
                                           SDTCisSameAs<2, 1>]>;
+def X86addus   : SDNode<"X86ISD::ADDUS", SDTIntBinOp>;
 def X86subus   : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
+def X86adds    : SDNode<"X86ISD::ADDS", SDTIntBinOp>;
+def X86subs    : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
 def X86ptest   : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
 def X86testp   : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
 def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
 def X86testm   : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
-                                          SDTCisVec<1>,
-                                          SDTCisSameAs<2, 1>]>>;
+                                          SDTCisVec<1>, SDTCisSameAs<2, 1>,
+                                          SDTCVecEltisVT<0, i1>,
+                                          SDTCisSameNumEltsAs<0, 1>]>>;
 def X86testnm  : SDNode<"X86ISD::TESTNM", SDTypeProfile<1, 2, [SDTCisVec<0>,
-                                          SDTCisVec<1>,
-                                          SDTCisSameAs<2, 1>]>>;
+                                          SDTCisVec<1>, SDTCisSameAs<2, 1>,
+                                          SDTCVecEltisVT<0, i1>,
+                                          SDTCisSameNumEltsAs<0, 1>]>>;
 def X86select  : SDNode<"X86ISD::SELECT"     , SDTSelect>;
 
 def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
@@ -201,12 +226,19 @@ def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
 def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                              SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
 
+def SDTFPBinOpRound : SDTypeProfile<1, 3, [      // fadd_round, fmul_round, etc.
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>;
+
 def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
                            SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
+def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
+                           SDTCisSameAs<1,2>, SDTCisSameAs<1,3>, SDTCisInt<4>]>;
 def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
                            SDTCisVec<0>, SDTCisInt<2>]>;
 def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
                            SDTCisVec<0>, SDTCisInt<3>]>;
+def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
+                           SDTCisVec<0>, SDTCisInt<3>, SDTCisInt<4>]>;
 
 def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
 def X86VAlign  : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
@@ -247,8 +279,10 @@ def X86VPermiv3   : SDNode<"X86ISD::VPERMIV3",  SDTShuff3Op>;
 
 def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
 
+def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
+                    SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+                                         SDTCisSubVecOfVec<1, 0>]>, []>;
 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
-def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
 def X86Vinsert   : SDNode<"X86ISD::VINSERT",  SDTypeProfile<1, 3,
                               [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
 def X86Vextract   : SDNode<"X86ISD::VEXTRACT",  SDTypeProfile<1, 2,
@@ -258,6 +292,13 @@ def X86Blendi    : SDNode<"X86ISD::BLENDI",   SDTBlend>;
 
 def X86Addsub    : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
 
+def X86faddRnd   : SDNode<"X86ISD::FADD_RND",  SDTFPBinOpRound>;
+def X86fsubRnd   : SDNode<"X86ISD::FSUB_RND",  SDTFPBinOpRound>;
+def X86fmulRnd   : SDNode<"X86ISD::FMUL_RND",  SDTFPBinOpRound>;
+def X86fdivRnd   : SDNode<"X86ISD::FDIV_RND",  SDTFPBinOpRound>;
+def X86fmaxRnd   : SDNode<"X86ISD::FMAX_RND",      SDTFPBinOpRound>;
+def X86fminRnd   : SDNode<"X86ISD::FMIN_RND",      SDTFPBinOpRound>;
+
 def X86Fmadd     : SDNode<"X86ISD::FMADD",     SDTFma>;
 def X86Fnmadd    : SDNode<"X86ISD::FNMADD",    SDTFma>;
 def X86Fmsub     : SDNode<"X86ISD::FMSUB",     SDTFma>;
@@ -265,12 +306,20 @@ def X86Fnmsub    : SDNode<"X86ISD::FNMSUB",    SDTFma>;
 def X86Fmaddsub  : SDNode<"X86ISD::FMADDSUB",  SDTFma>;
 def X86Fmsubadd  : SDNode<"X86ISD::FMSUBADD",  SDTFma>;
 
+def X86FmaddRnd     : SDNode<"X86ISD::FMADD_RND",     SDTFmaRound>;
+def X86FnmaddRnd    : SDNode<"X86ISD::FNMADD_RND",    SDTFmaRound>;
+def X86FmsubRnd     : SDNode<"X86ISD::FMSUB_RND",     SDTFmaRound>;
+def X86FnmsubRnd    : SDNode<"X86ISD::FNMSUB_RND",    SDTFmaRound>;
+def X86FmaddsubRnd  : SDNode<"X86ISD::FMADDSUB_RND",  SDTFmaRound>;
+def X86FmsubaddRnd  : SDNode<"X86ISD::FMSUBADD_RND",  SDTFmaRound>;
+
 def X86rsqrt28   : SDNode<"X86ISD::RSQRT28",  STDFp1SrcRm>;
 def X86rcp28     : SDNode<"X86ISD::RCP28",    STDFp1SrcRm>;
 def X86exp2      : SDNode<"X86ISD::EXP2",     STDFp1SrcRm>;
 
 def X86rsqrt28s  : SDNode<"X86ISD::RSQRT28",  STDFp2SrcRm>;
 def X86rcp28s    : SDNode<"X86ISD::RCP28",    STDFp2SrcRm>;
+def X86RndScale  : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>;
 
 def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
                                          SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
@@ -346,6 +395,15 @@ def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
 def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
 def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
 
+// These are needed to match a scalar load that is used in a vector-only
+// math instruction such as the FP logical ops: andps, andnps, orps, xorps.
+// The memory operand is required to be a 128-bit load, so it must be converted
+// from a vector to a scalar.
+def loadf32_128 : PatFrag<(ops node:$ptr),
+  (f32 (vector_extract (loadv4f32 node:$ptr), (iPTR 0)))>;
+def loadf64_128 : PatFrag<(ops node:$ptr),
+  (f64 (vector_extract (loadv2f64 node:$ptr), (iPTR 0)))>;
+
 // Like 'store', but always requires 128-bit vector alignment.
 def alignedstore : PatFrag<(ops node:$val, node:$ptr),
                            (store node:$val, node:$ptr), [{
@@ -437,17 +495,15 @@ def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
 def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
 def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
 
-// 256-bit memop pattern fragments
-// NOTE: all 256-bit integer vector loads are promoted to v4i64
-def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
-def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
-def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+// These are needed to match a scalar memop that is used in a vector-only
+// math instruction such as the FP logical ops: andps, andnps, orps, xorps.
+// The memory operand is required to be a 128-bit load, so it must be converted
+// from a vector to a scalar.
+def memopfsf32_128 : PatFrag<(ops node:$ptr),
+  (f32 (vector_extract (memopv4f32 node:$ptr), (iPTR 0)))>;
+def memopfsf64_128 : PatFrag<(ops node:$ptr),
+  (f64 (vector_extract (memopv2f64 node:$ptr), (iPTR 0)))>;
 
-// 512-bit memop pattern fragments
-def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop node:$ptr))>;
-def memopv8f64  : PatFrag<(ops node:$ptr), (v8f64  (memop node:$ptr))>;
-def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop node:$ptr))>;
-def memopv8i64  : PatFrag<(ops node:$ptr), (v8i64  (memop node:$ptr))>;
 
 // SSSE3 uses MMX registers for some instructions. They aren't aligned on a
 // 16-byte boundary.
@@ -484,6 +540,52 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
   return false;
 }]>;
 
+def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+  (masked_gather node:$src1, node:$src2, node:$src3) , [{
+  if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+    return (Mgt->getIndex().getValueType() == MVT::v8i32 ||
+            Mgt->getBasePtr().getValueType() == MVT::v8i32);
+  return false;
+}]>;
+
+def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+  (masked_gather node:$src1, node:$src2, node:$src3) , [{
+  if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+    return (Mgt->getIndex().getValueType() == MVT::v8i64 ||
+            Mgt->getBasePtr().getValueType() == MVT::v8i64);
+  return false;
+}]>;
+def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+  (masked_gather node:$src1, node:$src2, node:$src3) , [{
+  if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+    return (Mgt->getIndex().getValueType() == MVT::v16i32 ||
+            Mgt->getBasePtr().getValueType() == MVT::v16i32);
+  return false;
+}]>;
+
+def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+  (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+  if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+    return (Sc->getIndex().getValueType() == MVT::v8i32 ||
+            Sc->getBasePtr().getValueType() == MVT::v8i32);
+  return false;
+}]>;
+
+def mscatterv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+  (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+  if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+    return (Sc->getIndex().getValueType() == MVT::v8i64 ||
+            Sc->getBasePtr().getValueType() == MVT::v8i64);
+  return false;
+}]>;
+def mscatterv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+  (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+  if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+    return (Sc->getIndex().getValueType() == MVT::v16i32 ||
+            Sc->getBasePtr().getValueType() == MVT::v16i32);
+  return false;
+}]>;
+
 // 128-bit bitconvert pattern fragments
 def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
 def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
@@ -522,7 +624,7 @@ def fp32imm0 : PatLeaf<(f32 fpimm), [{
 
 def I8Imm : SDNodeXForm<imm, [{
   // Transformation function: get the low 8 bits.
-  return getI8Imm((uint8_t)N->getZExtValue());
+  return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
 }]>;
 
 def FROUND_NO_EXC : ImmLeaf<i32, [{ return Imm == 8; }]>;
@@ -533,31 +635,31 @@ def FROUND_CURRENT : ImmLeaf<i32, [{
 // BYTE_imm - Transform bit immediates into byte immediates.
 def BYTE_imm  : SDNodeXForm<imm, [{
   // Transformation function: imm >> 3
-  return getI32Imm(N->getZExtValue() >> 3);
+  return getI32Imm(N->getZExtValue() >> 3, SDLoc(N));
 }]>;
 
 // EXTRACT_get_vextract128_imm xform function: convert extract_subvector index
 // to VEXTRACTF128/VEXTRACTI128 imm.
 def EXTRACT_get_vextract128_imm : SDNodeXForm<extract_subvector, [{
-  return getI8Imm(X86::getExtractVEXTRACT128Immediate(N));
+  return getI8Imm(X86::getExtractVEXTRACT128Immediate(N), SDLoc(N));
 }]>;
 
 // INSERT_get_vinsert128_imm xform function: convert insert_subvector index to
 // VINSERTF128/VINSERTI128 imm.
 def INSERT_get_vinsert128_imm : SDNodeXForm<insert_subvector, [{
-  return getI8Imm(X86::getInsertVINSERT128Immediate(N));
+  return getI8Imm(X86::getInsertVINSERT128Immediate(N), SDLoc(N));
 }]>;
 
 // EXTRACT_get_vextract256_imm xform function: convert extract_subvector index
 // to VEXTRACTF64x4 imm.
 def EXTRACT_get_vextract256_imm : SDNodeXForm<extract_subvector, [{
-  return getI8Imm(X86::getExtractVEXTRACT256Immediate(N));
+  return getI8Imm(X86::getExtractVEXTRACT256Immediate(N), SDLoc(N));
 }]>;
 
 // INSERT_get_vinsert256_imm xform function: convert insert_subvector index to
 // VINSERTF64x4 imm.
 def INSERT_get_vinsert256_imm : SDNodeXForm<insert_subvector, [{
-  return getI8Imm(X86::getInsertVINSERT256Immediate(N));
+  return getI8Imm(X86::getInsertVINSERT256Immediate(N), SDLoc(N));
 }]>;
 
 def vextract128_extract : PatFrag<(ops node:$bigvec, node:$index),
@@ -587,3 +689,55 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
   return X86::isVINSERT256Index(N);
 }], INSERT_get_vinsert256_imm>;
 
+def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                         (masked_load node:$src1, node:$src2, node:$src3), [{
+  if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+    return Load->getAlignment() >= 16;
+  return false;
+}]>;
+
+def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                         (masked_load node:$src1, node:$src2, node:$src3), [{
+  if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+    return Load->getAlignment() >= 32;
+  return false;
+}]>;
+
+def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                         (masked_load node:$src1, node:$src2, node:$src3), [{
+  if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+    return Load->getAlignment() >= 64;
+  return false;
+}]>;
+
+def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                         (masked_load node:$src1, node:$src2, node:$src3), [{
+  return isa<MaskedLoadSDNode>(N);
+}]>;
+
+def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                         (masked_store node:$src1, node:$src2, node:$src3), [{
+  if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+    return Store->getAlignment() >= 16;
+  return false;
+}]>;
+
+def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                         (masked_store node:$src1, node:$src2, node:$src3), [{
+  if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+    return Store->getAlignment() >= 32;
+  return false;
+}]>;
+
+def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                         (masked_store node:$src1, node:$src2, node:$src3), [{
+  if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+    return Store->getAlignment() >= 64;
+  return false;
+}]>;
+
+def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                         (masked_store node:$src1, node:$src2, node:$src3), [{
+  return isa<MaskedStoreSDNode>(N);
+}]>;
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index 6b6b8ae..43decf7 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -91,7 +91,7 @@ enum {
   TB_ALIGN_MASK  = 0xff << TB_ALIGN_SHIFT
 };
 
-struct X86OpTblEntry {
+struct X86MemoryFoldTableEntry {
   uint16_t RegOp;
   uint16_t MemOp;
   uint16_t Flags;
@@ -104,9 +104,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     : X86GenInstrInfo(
           (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
           (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
-      Subtarget(STI), RI(STI) {
+      Subtarget(STI), RI(STI.getTargetTriple()) {
 
-  static const X86OpTblEntry OpTbl2Addr[] = {
+  static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
     { X86::ADC32ri,     X86::ADC32mi,    0 },
     { X86::ADC32ri8,    X86::ADC32mi8,   0 },
     { X86::ADC32rr,     X86::ADC32mr,    0 },
@@ -269,17 +269,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::XOR8rr,      X86::XOR8mr,     0 }
   };
 
-  for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
-    unsigned RegOp = OpTbl2Addr[i].RegOp;
-    unsigned MemOp = OpTbl2Addr[i].MemOp;
-    unsigned Flags = OpTbl2Addr[i].Flags;
+  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable2Addr); i != e; ++i) {
+    unsigned RegOp = MemoryFoldTable2Addr[i].RegOp;
+    unsigned MemOp = MemoryFoldTable2Addr[i].MemOp;
+    unsigned Flags = MemoryFoldTable2Addr[i].Flags;
     AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
                   RegOp, MemOp,
                   // Index 0, folded load and store, no alignment requirement.
                   Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
   }
 
-  static const X86OpTblEntry OpTbl0[] = {
+  static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
     { X86::BT16ri8,     X86::BT16mi8,       TB_FOLDED_LOAD },
     { X86::BT32ri8,     X86::BT32mi8,       TB_FOLDED_LOAD },
     { X86::BT64ri8,     X86::BT64mi8,       TB_FOLDED_LOAD },
@@ -333,6 +333,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::MUL32r,      X86::MUL32m,        TB_FOLDED_LOAD },
     { X86::MUL64r,      X86::MUL64m,        TB_FOLDED_LOAD },
     { X86::MUL8r,       X86::MUL8m,         TB_FOLDED_LOAD },
+    { X86::PEXTRDrr,    X86::PEXTRDmr,      TB_FOLDED_STORE },
+    { X86::PEXTRQrr,    X86::PEXTRQmr,      TB_FOLDED_STORE },
     { X86::SETAEr,      X86::SETAEm,        TB_FOLDED_STORE },
     { X86::SETAr,       X86::SETAm,         TB_FOLDED_STORE },
     { X86::SETBEr,      X86::SETBEm,        TB_FOLDED_STORE },
@@ -351,10 +353,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::SETSr,       X86::SETSm,         TB_FOLDED_STORE },
     { X86::TAILJMPr,    X86::TAILJMPm,      TB_FOLDED_LOAD },
     { X86::TAILJMPr64,  X86::TAILJMPm64,    TB_FOLDED_LOAD },
+    { X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD },
     { X86::TEST16ri,    X86::TEST16mi,      TB_FOLDED_LOAD },
     { X86::TEST32ri,    X86::TEST32mi,      TB_FOLDED_LOAD },
     { X86::TEST64ri32,  X86::TEST64mi32,    TB_FOLDED_LOAD },
     { X86::TEST8ri,     X86::TEST8mi,       TB_FOLDED_LOAD },
+
     // AVX 128-bit versions of foldable instructions
     { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr,  TB_FOLDED_STORE  },
     { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
@@ -367,6 +371,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVSS2DIrr, X86::VMOVSS2DImr,   TB_FOLDED_STORE },
     { X86::VMOVUPDrr,   X86::VMOVUPDmr,     TB_FOLDED_STORE },
     { X86::VMOVUPSrr,   X86::VMOVUPSmr,     TB_FOLDED_STORE },
+    { X86::VPEXTRDrr,   X86::VPEXTRDmr,     TB_FOLDED_STORE },
+    { X86::VPEXTRQrr,   X86::VPEXTRQmr,     TB_FOLDED_STORE },
+
     // AVX 256-bit foldable instructions
     { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
     { X86::VMOVAPDYrr,  X86::VMOVAPDYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
@@ -374,6 +381,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVDQAYrr,  X86::VMOVDQAYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
     { X86::VMOVUPDYrr,  X86::VMOVUPDYmr,    TB_FOLDED_STORE },
     { X86::VMOVUPSYrr,  X86::VMOVUPSYmr,    TB_FOLDED_STORE },
+
     // AVX-512 foldable instructions
     { X86::VMOVPDI2DIZrr,   X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
     { X86::VMOVAPDZrr,      X86::VMOVAPDZmr,    TB_FOLDED_STORE | TB_ALIGN_64 },
@@ -386,6 +394,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVDQU16Zrr,    X86::VMOVDQU16Zmr,  TB_FOLDED_STORE },
     { X86::VMOVDQU32Zrr,    X86::VMOVDQU32Zmr,  TB_FOLDED_STORE },
     { X86::VMOVDQU64Zrr,    X86::VMOVDQU64Zmr,  TB_FOLDED_STORE },
+
     // AVX-512 foldable instructions (256-bit versions)
     { X86::VMOVAPDZ256rr,      X86::VMOVAPDZ256mr,    TB_FOLDED_STORE | TB_ALIGN_32 },
     { X86::VMOVAPSZ256rr,      X86::VMOVAPSZ256mr,    TB_FOLDED_STORE | TB_ALIGN_32 },
@@ -397,6 +406,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVDQU16Z256rr,    X86::VMOVDQU16Z256mr,  TB_FOLDED_STORE },
     { X86::VMOVDQU32Z256rr,    X86::VMOVDQU32Z256mr,  TB_FOLDED_STORE },
     { X86::VMOVDQU64Z256rr,    X86::VMOVDQU64Z256mr,  TB_FOLDED_STORE },
+
     // AVX-512 foldable instructions (128-bit versions)
     { X86::VMOVAPDZ128rr,      X86::VMOVAPDZ128mr,    TB_FOLDED_STORE | TB_ALIGN_16 },
     { X86::VMOVAPSZ128rr,      X86::VMOVAPSZ128mr,    TB_FOLDED_STORE | TB_ALIGN_16 },
@@ -407,18 +417,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVDQU8Z128rr,     X86::VMOVDQU8Z128mr,   TB_FOLDED_STORE },
     { X86::VMOVDQU16Z128rr,    X86::VMOVDQU16Z128mr,  TB_FOLDED_STORE },
     { X86::VMOVDQU32Z128rr,    X86::VMOVDQU32Z128mr,  TB_FOLDED_STORE },
-    { X86::VMOVDQU64Z128rr,    X86::VMOVDQU64Z128mr,  TB_FOLDED_STORE }
+    { X86::VMOVDQU64Z128rr,    X86::VMOVDQU64Z128mr,  TB_FOLDED_STORE },
+
+    // F16C foldable instructions
+    { X86::VCVTPS2PHrr,        X86::VCVTPS2PHmr,      TB_FOLDED_STORE },
+    { X86::VCVTPS2PHYrr,       X86::VCVTPS2PHYmr,     TB_FOLDED_STORE }
   };
 
-  for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
-    unsigned RegOp      = OpTbl0[i].RegOp;
-    unsigned MemOp      = OpTbl0[i].MemOp;
-    unsigned Flags      = OpTbl0[i].Flags;
+  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable0); i != e; ++i) {
+    unsigned RegOp      = MemoryFoldTable0[i].RegOp;
+    unsigned MemOp      = MemoryFoldTable0[i].MemOp;
+    unsigned Flags      = MemoryFoldTable0[i].Flags;
     AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
                   RegOp, MemOp, TB_INDEX_0 | Flags);
   }
 
-  static const X86OpTblEntry OpTbl1[] = {
+  static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
     { X86::CMP16rr,         X86::CMP16rm,             0 },
     { X86::CMP32rr,         X86::CMP32rm,             0 },
     { X86::CMP64rr,         X86::CMP64rm,             0 },
@@ -445,10 +459,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::CVTSD2SIrr,      X86::CVTSD2SIrm,          0 },
     { X86::CVTSS2SI64rr,    X86::CVTSS2SI64rm,        0 },
     { X86::CVTSS2SIrr,      X86::CVTSS2SIrm,          0 },
+    { X86::CVTDQ2PDrr,      X86::CVTDQ2PDrm,          TB_ALIGN_16 },
     { X86::CVTDQ2PSrr,      X86::CVTDQ2PSrm,          TB_ALIGN_16 },
     { X86::CVTPD2DQrr,      X86::CVTPD2DQrm,          TB_ALIGN_16 },
     { X86::CVTPD2PSrr,      X86::CVTPD2PSrm,          TB_ALIGN_16 },
     { X86::CVTPS2DQrr,      X86::CVTPS2DQrm,          TB_ALIGN_16 },
+    { X86::CVTPS2PDrr,      X86::CVTPS2PDrm,          TB_ALIGN_16 },
     { X86::CVTTPD2DQrr,     X86::CVTTPD2DQrm,         TB_ALIGN_16 },
     { X86::CVTTPS2DQrr,     X86::CVTTPS2DQrm,         TB_ALIGN_16 },
     { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm,  0 },
@@ -488,13 +504,33 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::PABSBrr128,      X86::PABSBrm128,          TB_ALIGN_16 },
     { X86::PABSDrr128,      X86::PABSDrm128,          TB_ALIGN_16 },
     { X86::PABSWrr128,      X86::PABSWrm128,          TB_ALIGN_16 },
+    { X86::PCMPESTRIrr,     X86::PCMPESTRIrm,         TB_ALIGN_16 },
+    { X86::PCMPESTRM128rr,  X86::PCMPESTRM128rm,      TB_ALIGN_16 },
+    { X86::PCMPISTRIrr,     X86::PCMPISTRIrm,         TB_ALIGN_16 },
+    { X86::PCMPISTRM128rr,  X86::PCMPISTRM128rm,      TB_ALIGN_16 },
+    { X86::PHMINPOSUWrr128, X86::PHMINPOSUWrm128,     TB_ALIGN_16 },
+    { X86::PMOVSXBDrr,      X86::PMOVSXBDrm,          TB_ALIGN_16 },
+    { X86::PMOVSXBQrr,      X86::PMOVSXBQrm,          TB_ALIGN_16 },
+    { X86::PMOVSXBWrr,      X86::PMOVSXBWrm,          TB_ALIGN_16 },
+    { X86::PMOVSXDQrr,      X86::PMOVSXDQrm,          TB_ALIGN_16 },
+    { X86::PMOVSXWDrr,      X86::PMOVSXWDrm,          TB_ALIGN_16 },
+    { X86::PMOVSXWQrr,      X86::PMOVSXWQrm,          TB_ALIGN_16 },
+    { X86::PMOVZXBDrr,      X86::PMOVZXBDrm,          TB_ALIGN_16 },
+    { X86::PMOVZXBQrr,      X86::PMOVZXBQrm,          TB_ALIGN_16 },
+    { X86::PMOVZXBWrr,      X86::PMOVZXBWrm,          TB_ALIGN_16 },
+    { X86::PMOVZXDQrr,      X86::PMOVZXDQrm,          TB_ALIGN_16 },
+    { X86::PMOVZXWDrr,      X86::PMOVZXWDrm,          TB_ALIGN_16 },
+    { X86::PMOVZXWQrr,      X86::PMOVZXWQrm,          TB_ALIGN_16 },
     { X86::PSHUFDri,        X86::PSHUFDmi,            TB_ALIGN_16 },
     { X86::PSHUFHWri,       X86::PSHUFHWmi,           TB_ALIGN_16 },
     { X86::PSHUFLWri,       X86::PSHUFLWmi,           TB_ALIGN_16 },
+    { X86::PTESTrr,         X86::PTESTrm,             TB_ALIGN_16 },
     { X86::RCPPSr,          X86::RCPPSm,              TB_ALIGN_16 },
-    { X86::RCPPSr_Int,      X86::RCPPSm_Int,          TB_ALIGN_16 },
+    { X86::RCPSSr,          X86::RCPSSm,              0 },
+    { X86::RCPSSr_Int,      X86::RCPSSm_Int,          0 },
+    { X86::ROUNDPDr,        X86::ROUNDPDm,            TB_ALIGN_16 },
+    { X86::ROUNDPSr,        X86::ROUNDPSm,            TB_ALIGN_16 },
     { X86::RSQRTPSr,        X86::RSQRTPSm,            TB_ALIGN_16 },
-    { X86::RSQRTPSr_Int,    X86::RSQRTPSm_Int,        TB_ALIGN_16 },
     { X86::RSQRTSSr,        X86::RSQRTSSm,            0 },
     { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int,        0 },
     { X86::SQRTPDr,         X86::SQRTPDm,             TB_ALIGN_16 },
@@ -510,6 +546,28 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
     { X86::UCOMISDrr,       X86::UCOMISDrm,           0 },
     { X86::UCOMISSrr,       X86::UCOMISSrm,           0 },
+
+    // MMX version of foldable instructions
+    { X86::MMX_CVTPD2PIirr,   X86::MMX_CVTPD2PIirm,   0 },
+    { X86::MMX_CVTPI2PDirr,   X86::MMX_CVTPI2PDirm,   0 },
+    { X86::MMX_CVTPS2PIirr,   X86::MMX_CVTPS2PIirm,   0 },
+    { X86::MMX_CVTTPD2PIirr,  X86::MMX_CVTTPD2PIirm,  0 },
+    { X86::MMX_CVTTPS2PIirr,  X86::MMX_CVTTPS2PIirm,  0 },
+    { X86::MMX_MOVD64to64rr,  X86::MMX_MOVQ64rm,      0 },
+    { X86::MMX_PABSBrr64,     X86::MMX_PABSBrm64,     0 },
+    { X86::MMX_PABSDrr64,     X86::MMX_PABSDrm64,     0 },
+    { X86::MMX_PABSWrr64,     X86::MMX_PABSWrm64,     0 },
+    { X86::MMX_PSHUFWri,      X86::MMX_PSHUFWmi,      0 },
+
+    // 3DNow! version of foldable instructions
+    { X86::PF2IDrr,         X86::PF2IDrm,             0 },
+    { X86::PF2IWrr,         X86::PF2IWrm,             0 },
+    { X86::PFRCPrr,         X86::PFRCPrm,             0 },
+    { X86::PFRSQRTrr,       X86::PFRSQRTrm,           0 },
+    { X86::PI2FDrr,         X86::PI2FDrm,             0 },
+    { X86::PI2FWrr,         X86::PI2FWrm,             0 },
+    { X86::PSWAPDrr,        X86::PSWAPDrm,            0 },
+
     // AVX 128-bit versions of foldable instructions
     { X86::Int_VCOMISDrr,   X86::Int_VCOMISDrm,       0 },
     { X86::Int_VCOMISSrr,   X86::Int_VCOMISSrm,       0 },
@@ -527,10 +585,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VCVTSD2SIrr,     X86::VCVTSD2SIrm,         0 },
     { X86::VCVTSS2SI64rr,   X86::VCVTSS2SI64rm,       0 },
     { X86::VCVTSS2SIrr,     X86::VCVTSS2SIrm,         0 },
+    { X86::VCVTDQ2PDrr,     X86::VCVTDQ2PDrm,         0 },
     { X86::VCVTDQ2PSrr,     X86::VCVTDQ2PSrm,         0 },
     { X86::VCVTPD2DQrr,     X86::VCVTPD2DQXrm,        0 },
     { X86::VCVTPD2PSrr,     X86::VCVTPD2PSXrm,        0 },
     { X86::VCVTPS2DQrr,     X86::VCVTPS2DQrm,         0 },
+    { X86::VCVTPS2PDrr,     X86::VCVTPS2PDrm,         0 },
     { X86::VCVTTPD2DQrr,    X86::VCVTTPD2DQXrm,       0 },
     { X86::VCVTTPS2DQrr,    X86::VCVTTPS2DQrm,        0 },
     { X86::VMOV64toPQIrr,   X86::VMOVQI2PQIrm,        0 },
@@ -541,8 +601,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVDI2PDIrr,    X86::VMOVDI2PDIrm,        0 },
     { X86::VMOVDI2SSrr,     X86::VMOVDI2SSrm,         0 },
     { X86::VMOVDQArr,       X86::VMOVDQArm,           TB_ALIGN_16 },
-    { X86::VMOVSLDUPrr,     X86::VMOVSLDUPrm,         TB_ALIGN_16 },
-    { X86::VMOVSHDUPrr,     X86::VMOVSHDUPrm,         TB_ALIGN_16 },
+    { X86::VMOVSLDUPrr,     X86::VMOVSLDUPrm,         0 },
+    { X86::VMOVSHDUPrr,     X86::VMOVSHDUPrm,         0 },
     { X86::VMOVUPDrr,       X86::VMOVUPDrm,           0 },
     { X86::VMOVUPSrr,       X86::VMOVUPSrm,           0 },
     { X86::VMOVZQI2PQIrr,   X86::VMOVZQI2PQIrm,       0 },
@@ -550,51 +610,147 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPABSBrr128,     X86::VPABSBrm128,         0 },
     { X86::VPABSDrr128,     X86::VPABSDrm128,         0 },
     { X86::VPABSWrr128,     X86::VPABSWrm128,         0 },
+    { X86::VPCMPESTRIrr,    X86::VPCMPESTRIrm,        0 },
+    { X86::VPCMPESTRM128rr, X86::VPCMPESTRM128rm,     0 },
+    { X86::VPCMPISTRIrr,    X86::VPCMPISTRIrm,        0 },
+    { X86::VPCMPISTRM128rr, X86::VPCMPISTRM128rm,     0 },
+    { X86::VPHMINPOSUWrr128, X86::VPHMINPOSUWrm128,   0 },
     { X86::VPERMILPDri,     X86::VPERMILPDmi,         0 },
     { X86::VPERMILPSri,     X86::VPERMILPSmi,         0 },
+    { X86::VPMOVSXBDrr,     X86::VPMOVSXBDrm,         0 },
+    { X86::VPMOVSXBQrr,     X86::VPMOVSXBQrm,         0 },
+    { X86::VPMOVSXBWrr,     X86::VPMOVSXBWrm,         0 },
+    { X86::VPMOVSXDQrr,     X86::VPMOVSXDQrm,         0 },
+    { X86::VPMOVSXWDrr,     X86::VPMOVSXWDrm,         0 },
+    { X86::VPMOVSXWQrr,     X86::VPMOVSXWQrm,         0 },
+    { X86::VPMOVZXBDrr,     X86::VPMOVZXBDrm,         0 },
+    { X86::VPMOVZXBQrr,     X86::VPMOVZXBQrm,         0 },
+    { X86::VPMOVZXBWrr,     X86::VPMOVZXBWrm,         0 },
+    { X86::VPMOVZXDQrr,     X86::VPMOVZXDQrm,         0 },
+    { X86::VPMOVZXWDrr,     X86::VPMOVZXWDrm,         0 },
+    { X86::VPMOVZXWQrr,     X86::VPMOVZXWQrm,         0 },
     { X86::VPSHUFDri,       X86::VPSHUFDmi,           0 },
     { X86::VPSHUFHWri,      X86::VPSHUFHWmi,          0 },
     { X86::VPSHUFLWri,      X86::VPSHUFLWmi,          0 },
+    { X86::VPTESTrr,        X86::VPTESTrm,            0 },
     { X86::VRCPPSr,         X86::VRCPPSm,             0 },
-    { X86::VRCPPSr_Int,     X86::VRCPPSm_Int,         0 },
+    { X86::VROUNDPDr,       X86::VROUNDPDm,           0 },
+    { X86::VROUNDPSr,       X86::VROUNDPSm,           0 },
     { X86::VRSQRTPSr,       X86::VRSQRTPSm,           0 },
-    { X86::VRSQRTPSr_Int,   X86::VRSQRTPSm_Int,       0 },
     { X86::VSQRTPDr,        X86::VSQRTPDm,            0 },
     { X86::VSQRTPSr,        X86::VSQRTPSm,            0 },
+    { X86::VTESTPDrr,       X86::VTESTPDrm,           0 },
+    { X86::VTESTPSrr,       X86::VTESTPSrm,           0 },
     { X86::VUCOMISDrr,      X86::VUCOMISDrm,          0 },
     { X86::VUCOMISSrr,      X86::VUCOMISSrm,          0 },
-    { X86::VBROADCASTSSrr,  X86::VBROADCASTSSrm,      TB_NO_REVERSE },
 
     // AVX 256-bit foldable instructions
+    { X86::VCVTDQ2PDYrr,    X86::VCVTDQ2PDYrm,        0 },
     { X86::VCVTDQ2PSYrr,    X86::VCVTDQ2PSYrm,        0 },
     { X86::VCVTPD2DQYrr,    X86::VCVTPD2DQYrm,        0 },
     { X86::VCVTPD2PSYrr,    X86::VCVTPD2PSYrm,        0 },
     { X86::VCVTPS2DQYrr,    X86::VCVTPS2DQYrm,        0 },
+    { X86::VCVTPS2PDYrr,    X86::VCVTPS2PDYrm,        0 },
     { X86::VCVTTPD2DQYrr,   X86::VCVTTPD2DQYrm,       0 },
     { X86::VCVTTPS2DQYrr,   X86::VCVTTPS2DQYrm,       0 },
     { X86::VMOVAPDYrr,      X86::VMOVAPDYrm,          TB_ALIGN_32 },
     { X86::VMOVAPSYrr,      X86::VMOVAPSYrm,          TB_ALIGN_32 },
+    { X86::VMOVDDUPYrr,     X86::VMOVDDUPYrm,         0 },
     { X86::VMOVDQAYrr,      X86::VMOVDQAYrm,          TB_ALIGN_32 },
+    { X86::VMOVSLDUPYrr,    X86::VMOVSLDUPYrm,        0 },
+    { X86::VMOVSHDUPYrr,    X86::VMOVSHDUPYrm,        0 },
     { X86::VMOVUPDYrr,      X86::VMOVUPDYrm,          0 },
     { X86::VMOVUPSYrr,      X86::VMOVUPSYrm,          0 },
     { X86::VPERMILPDYri,    X86::VPERMILPDYmi,        0 },
     { X86::VPERMILPSYri,    X86::VPERMILPSYmi,        0 },
+    { X86::VPTESTYrr,       X86::VPTESTYrm,           0 },
     { X86::VRCPPSYr,        X86::VRCPPSYm,            0 },
-    { X86::VRCPPSYr_Int,    X86::VRCPPSYm_Int,        0 },
+    { X86::VROUNDYPDr,      X86::VROUNDYPDm,          0 },
+    { X86::VROUNDYPSr,      X86::VROUNDYPSm,          0 },
     { X86::VRSQRTPSYr,      X86::VRSQRTPSYm,          0 },
     { X86::VSQRTPDYr,       X86::VSQRTPDYm,           0 },
     { X86::VSQRTPSYr,       X86::VSQRTPSYm,           0 },
-    { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm,     TB_NO_REVERSE },
-    { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm,     TB_NO_REVERSE },
+    { X86::VTESTPDYrr,      X86::VTESTPDYrm,          0 },
+    { X86::VTESTPSYrr,      X86::VTESTPSYrm,          0 },
 
     // AVX2 foldable instructions
+
+    // VBROADCASTS{SD}rr register instructions were an AVX2 addition while the
+    // VBROADCASTS{SD}rm memory instructions were available from AVX1.
+    // TB_NO_REVERSE prevents unfolding from introducing an illegal instruction
+    // on AVX1 targets. The VPBROADCAST instructions are all AVX2 instructions
+    // so they don't need an equivalent limitation.
+    { X86::VBROADCASTSSrr,  X86::VBROADCASTSSrm,      TB_NO_REVERSE },
+    { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm,     TB_NO_REVERSE },
+    { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm,     TB_NO_REVERSE },
     { X86::VPABSBrr256,     X86::VPABSBrm256,         0 },
     { X86::VPABSDrr256,     X86::VPABSDrm256,         0 },
     { X86::VPABSWrr256,     X86::VPABSWrm256,         0 },
+    { X86::VPBROADCASTBrr,  X86::VPBROADCASTBrm,      0 },
+    { X86::VPBROADCASTBYrr, X86::VPBROADCASTBYrm,     0 },
+    { X86::VPBROADCASTDrr,  X86::VPBROADCASTDrm,      0 },
+    { X86::VPBROADCASTDYrr, X86::VPBROADCASTDYrm,     0 },
+    { X86::VPBROADCASTQrr,  X86::VPBROADCASTQrm,      0 },
+    { X86::VPBROADCASTQYrr, X86::VPBROADCASTQYrm,     0 },
+    { X86::VPBROADCASTWrr,  X86::VPBROADCASTWrm,      0 },
+    { X86::VPBROADCASTWYrr, X86::VPBROADCASTWYrm,     0 },
+    { X86::VPERMPDYri,      X86::VPERMPDYmi,          0 },
+    { X86::VPERMQYri,       X86::VPERMQYmi,           0 },
+    { X86::VPMOVSXBDYrr,    X86::VPMOVSXBDYrm,        0 },
+    { X86::VPMOVSXBQYrr,    X86::VPMOVSXBQYrm,        0 },
+    { X86::VPMOVSXBWYrr,    X86::VPMOVSXBWYrm,        0 },
+    { X86::VPMOVSXDQYrr,    X86::VPMOVSXDQYrm,        0 },
+    { X86::VPMOVSXWDYrr,    X86::VPMOVSXWDYrm,        0 },
+    { X86::VPMOVSXWQYrr,    X86::VPMOVSXWQYrm,        0 },
+    { X86::VPMOVZXBDYrr,    X86::VPMOVZXBDYrm,        0 },
+    { X86::VPMOVZXBQYrr,    X86::VPMOVZXBQYrm,        0 },
+    { X86::VPMOVZXBWYrr,    X86::VPMOVZXBWYrm,        0 },
+    { X86::VPMOVZXDQYrr,    X86::VPMOVZXDQYrm,        0 },
+    { X86::VPMOVZXWDYrr,    X86::VPMOVZXWDYrm,        0 },
+    { X86::VPMOVZXWQYrr,    X86::VPMOVZXWQYrm,        0 },
     { X86::VPSHUFDYri,      X86::VPSHUFDYmi,          0 },
     { X86::VPSHUFHWYri,     X86::VPSHUFHWYmi,         0 },
     { X86::VPSHUFLWYri,     X86::VPSHUFLWYmi,         0 },
 
+    // XOP foldable instructions
+    { X86::VFRCZPDrr,          X86::VFRCZPDrm,        0 },
+    { X86::VFRCZPDrrY,         X86::VFRCZPDrmY,       0 },
+    { X86::VFRCZPSrr,          X86::VFRCZPSrm,        0 },
+    { X86::VFRCZPSrrY,         X86::VFRCZPSrmY,       0 },
+    { X86::VFRCZSDrr,          X86::VFRCZSDrm,        0 },
+    { X86::VFRCZSSrr,          X86::VFRCZSSrm,        0 },
+    { X86::VPHADDBDrr,         X86::VPHADDBDrm,       0 },
+    { X86::VPHADDBQrr,         X86::VPHADDBQrm,       0 },
+    { X86::VPHADDBWrr,         X86::VPHADDBWrm,       0 },
+    { X86::VPHADDDQrr,         X86::VPHADDDQrm,       0 },
+    { X86::VPHADDWDrr,         X86::VPHADDWDrm,       0 },
+    { X86::VPHADDWQrr,         X86::VPHADDWQrm,       0 },
+    { X86::VPHADDUBDrr,        X86::VPHADDUBDrm,      0 },
+    { X86::VPHADDUBQrr,        X86::VPHADDUBQrm,      0 },
+    { X86::VPHADDUBWrr,        X86::VPHADDUBWrm,      0 },
+    { X86::VPHADDUDQrr,        X86::VPHADDUDQrm,      0 },
+    { X86::VPHADDUWDrr,        X86::VPHADDUWDrm,      0 },
+    { X86::VPHADDUWQrr,        X86::VPHADDUWQrm,      0 },
+    { X86::VPHSUBBWrr,         X86::VPHSUBBWrm,       0 },
+    { X86::VPHSUBDQrr,         X86::VPHSUBDQrm,       0 },
+    { X86::VPHSUBWDrr,         X86::VPHSUBWDrm,       0 },
+    { X86::VPROTBri,           X86::VPROTBmi,         0 },
+    { X86::VPROTBrr,           X86::VPROTBmr,         0 },
+    { X86::VPROTDri,           X86::VPROTDmi,         0 },
+    { X86::VPROTDrr,           X86::VPROTDmr,         0 },
+    { X86::VPROTQri,           X86::VPROTQmi,         0 },
+    { X86::VPROTQrr,           X86::VPROTQmr,         0 },
+    { X86::VPROTWri,           X86::VPROTWmi,         0 },
+    { X86::VPROTWrr,           X86::VPROTWmr,         0 },
+    { X86::VPSHABrr,           X86::VPSHABmr,         0 },
+    { X86::VPSHADrr,           X86::VPSHADmr,         0 },
+    { X86::VPSHAQrr,           X86::VPSHAQmr,         0 },
+    { X86::VPSHAWrr,           X86::VPSHAWmr,         0 },
+    { X86::VPSHLBrr,           X86::VPSHLBmr,         0 },
+    { X86::VPSHLDrr,           X86::VPSHLDmr,         0 },
+    { X86::VPSHLQrr,           X86::VPSHLQmr,         0 },
+    { X86::VPSHLWrr,           X86::VPSHLWmr,         0 },
+
     // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
     { X86::BEXTR32rr,       X86::BEXTR32rm,           0 },
     { X86::BEXTR64rr,       X86::BEXTR64rm,           0 },
@@ -661,6 +817,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPABSQZrr,       X86::VPABSQZrm,           0 },
     { X86::VBROADCASTSSZr,  X86::VBROADCASTSSZm,      TB_NO_REVERSE },
     { X86::VBROADCASTSDZr,  X86::VBROADCASTSDZm,      TB_NO_REVERSE },
+
     // AVX-512 foldable instructions (256-bit versions)
     { X86::VMOVAPDZ256rr,      X86::VMOVAPDZ256rm,          TB_ALIGN_32 },
     { X86::VMOVAPSZ256rr,      X86::VMOVAPSZ256rm,          TB_ALIGN_32 },
@@ -674,6 +831,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVUPSZ256rr,      X86::VMOVUPSZ256rm,          0 },
     { X86::VBROADCASTSSZ256r,  X86::VBROADCASTSSZ256m,      TB_NO_REVERSE },
     { X86::VBROADCASTSDZ256r,  X86::VBROADCASTSDZ256m,      TB_NO_REVERSE },
+
     // AVX-512 foldable instructions (256-bit versions)
     { X86::VMOVAPDZ128rr,      X86::VMOVAPDZ128rm,          TB_ALIGN_16 },
     { X86::VMOVAPSZ128rr,      X86::VMOVAPSZ128rm,          TB_ALIGN_16 },
@@ -687,24 +845,28 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVUPSZ128rr,      X86::VMOVUPSZ128rm,          0 },
     { X86::VBROADCASTSSZ128r,  X86::VBROADCASTSSZ128m,      TB_NO_REVERSE },
 
+    // F16C foldable instructions
+    { X86::VCVTPH2PSrr,        X86::VCVTPH2PSrm,            0 },
+    { X86::VCVTPH2PSYrr,       X86::VCVTPH2PSYrm,           0 },
+
     // AES foldable instructions
     { X86::AESIMCrr,              X86::AESIMCrm,              TB_ALIGN_16 },
     { X86::AESKEYGENASSIST128rr,  X86::AESKEYGENASSIST128rm,  TB_ALIGN_16 },
-    { X86::VAESIMCrr,             X86::VAESIMCrm,             TB_ALIGN_16 },
-    { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 }
+    { X86::VAESIMCrr,             X86::VAESIMCrm,             0 },
+    { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 }
   };
 
-  for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
-    unsigned RegOp = OpTbl1[i].RegOp;
-    unsigned MemOp = OpTbl1[i].MemOp;
-    unsigned Flags = OpTbl1[i].Flags;
+  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable1); i != e; ++i) {
+    unsigned RegOp = MemoryFoldTable1[i].RegOp;
+    unsigned MemOp = MemoryFoldTable1[i].MemOp;
+    unsigned Flags = MemoryFoldTable1[i].Flags;
     AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
                   RegOp, MemOp,
                   // Index 1, folded load
                   Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
   }
 
-  static const X86OpTblEntry OpTbl2[] = {
+  static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
     { X86::ADC32rr,         X86::ADC32rm,       0 },
     { X86::ADC64rr,         X86::ADC64rm,       0 },
     { X86::ADD16rr,         X86::ADD16rm,       0 },
@@ -717,7 +879,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::ADDPDrr,         X86::ADDPDrm,       TB_ALIGN_16 },
     { X86::ADDPSrr,         X86::ADDPSrm,       TB_ALIGN_16 },
     { X86::ADDSDrr,         X86::ADDSDrm,       0 },
+    { X86::ADDSDrr_Int,     X86::ADDSDrm_Int,   0 },
     { X86::ADDSSrr,         X86::ADDSSrm,       0 },
+    { X86::ADDSSrr_Int,     X86::ADDSSrm_Int,   0 },
     { X86::ADDSUBPDrr,      X86::ADDSUBPDrm,    TB_ALIGN_16 },
     { X86::ADDSUBPSrr,      X86::ADDSUBPSrm,    TB_ALIGN_16 },
     { X86::AND16rr,         X86::AND16rm,       0 },
@@ -784,10 +948,21 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::CMPPSrri,        X86::CMPPSrmi,      TB_ALIGN_16 },
     { X86::CMPSDrr,         X86::CMPSDrm,       0 },
     { X86::CMPSSrr,         X86::CMPSSrm,       0 },
+    { X86::CRC32r32r32,     X86::CRC32r32m32,   0 },
+    { X86::CRC32r64r64,     X86::CRC32r64m64,   0 },
     { X86::DIVPDrr,         X86::DIVPDrm,       TB_ALIGN_16 },
     { X86::DIVPSrr,         X86::DIVPSrm,       TB_ALIGN_16 },
     { X86::DIVSDrr,         X86::DIVSDrm,       0 },
+    { X86::DIVSDrr_Int,     X86::DIVSDrm_Int,   0 },
     { X86::DIVSSrr,         X86::DIVSSrm,       0 },
+    { X86::DIVSSrr_Int,     X86::DIVSSrm_Int,   0 },
+    { X86::DPPDrri,         X86::DPPDrmi,       TB_ALIGN_16 },
+    { X86::DPPSrri,         X86::DPPSrmi,       TB_ALIGN_16 },
+
+    // FIXME: We should not be folding Fs* scalar loads into vector
+    // instructions because the vector instructions require vector-sized
+    // loads. Lowering should create vector-sized instructions (the Fv*
+    // variants below) to allow load folding.
     { X86::FsANDNPDrr,      X86::FsANDNPDrm,    TB_ALIGN_16 },
     { X86::FsANDNPSrr,      X86::FsANDNPSrm,    TB_ALIGN_16 },
     { X86::FsANDPDrr,       X86::FsANDPDrm,     TB_ALIGN_16 },
@@ -796,6 +971,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::FsORPSrr,        X86::FsORPSrm,      TB_ALIGN_16 },
     { X86::FsXORPDrr,       X86::FsXORPDrm,     TB_ALIGN_16 },
     { X86::FsXORPSrr,       X86::FsXORPSrm,     TB_ALIGN_16 },
+
+    { X86::FvANDNPDrr,      X86::FvANDNPDrm,    TB_ALIGN_16 },
+    { X86::FvANDNPSrr,      X86::FvANDNPSrm,    TB_ALIGN_16 },
+    { X86::FvANDPDrr,       X86::FvANDPDrm,     TB_ALIGN_16 },
+    { X86::FvANDPSrr,       X86::FvANDPSrm,     TB_ALIGN_16 },
+    { X86::FvORPDrr,        X86::FvORPDrm,      TB_ALIGN_16 },
+    { X86::FvORPSrr,        X86::FvORPSrm,      TB_ALIGN_16 },
+    { X86::FvXORPDrr,       X86::FvXORPDrm,     TB_ALIGN_16 },
+    { X86::FvXORPSrr,       X86::FvXORPSrm,     TB_ALIGN_16 },
     { X86::HADDPDrr,        X86::HADDPDrm,      TB_ALIGN_16 },
     { X86::HADDPSrr,        X86::HADDPSrm,      TB_ALIGN_16 },
     { X86::HSUBPDrr,        X86::HSUBPDrm,      TB_ALIGN_16 },
@@ -814,16 +998,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::MAXPDrr,         X86::MAXPDrm,       TB_ALIGN_16 },
     { X86::MAXPSrr,         X86::MAXPSrm,       TB_ALIGN_16 },
     { X86::MAXSDrr,         X86::MAXSDrm,       0 },
+    { X86::MAXSDrr_Int,     X86::MAXSDrm_Int,   0 },
     { X86::MAXSSrr,         X86::MAXSSrm,       0 },
+    { X86::MAXSSrr_Int,     X86::MAXSSrm_Int,   0 },
     { X86::MINPDrr,         X86::MINPDrm,       TB_ALIGN_16 },
     { X86::MINPSrr,         X86::MINPSrm,       TB_ALIGN_16 },
     { X86::MINSDrr,         X86::MINSDrm,       0 },
+    { X86::MINSDrr_Int,     X86::MINSDrm_Int,   0 },
     { X86::MINSSrr,         X86::MINSSrm,       0 },
+    { X86::MINSSrr_Int,     X86::MINSSrm_Int,   0 },
     { X86::MPSADBWrri,      X86::MPSADBWrmi,    TB_ALIGN_16 },
     { X86::MULPDrr,         X86::MULPDrm,       TB_ALIGN_16 },
     { X86::MULPSrr,         X86::MULPSrm,       TB_ALIGN_16 },
     { X86::MULSDrr,         X86::MULSDrm,       0 },
+    { X86::MULSDrr_Int,     X86::MULSDrm_Int,   0 },
     { X86::MULSSrr,         X86::MULSSrm,       0 },
+    { X86::MULSSrr_Int,     X86::MULSSrm_Int,   0 },
     { X86::OR16rr,          X86::OR16rm,        0 },
     { X86::OR32rr,          X86::OR32rm,        0 },
     { X86::OR64rr,          X86::OR64rm,        0 },
@@ -847,7 +1037,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::PANDrr,          X86::PANDrm,        TB_ALIGN_16 },
     { X86::PAVGBrr,         X86::PAVGBrm,       TB_ALIGN_16 },
     { X86::PAVGWrr,         X86::PAVGWrm,       TB_ALIGN_16 },
+    { X86::PBLENDVBrr0,     X86::PBLENDVBrm0,   TB_ALIGN_16 },
     { X86::PBLENDWrri,      X86::PBLENDWrmi,    TB_ALIGN_16 },
+    { X86::PCLMULQDQrr,     X86::PCLMULQDQrm,   TB_ALIGN_16 },
     { X86::PCMPEQBrr,       X86::PCMPEQBrm,     TB_ALIGN_16 },
     { X86::PCMPEQDrr,       X86::PCMPEQDrm,     TB_ALIGN_16 },
     { X86::PCMPEQQrr,       X86::PCMPEQQrm,     TB_ALIGN_16 },
@@ -862,7 +1054,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::PHSUBDrr,        X86::PHSUBDrm,      TB_ALIGN_16 },
     { X86::PHSUBSWrr128,    X86::PHSUBSWrm128,  TB_ALIGN_16 },
     { X86::PHSUBWrr,        X86::PHSUBWrm,      TB_ALIGN_16 },
-    { X86::PINSRWrri,       X86::PINSRWrmi,     TB_ALIGN_16 },
+    { X86::PINSRBrr,        X86::PINSRBrm,      0 },
+    { X86::PINSRDrr,        X86::PINSRDrm,      0 },
+    { X86::PINSRQrr,        X86::PINSRQrm,      0 },
+    { X86::PINSRWrri,       X86::PINSRWrmi,     0 },
     { X86::PMADDUBSWrr128,  X86::PMADDUBSWrm128, TB_ALIGN_16 },
     { X86::PMADDWDrr,       X86::PMADDWDrm,     TB_ALIGN_16 },
     { X86::PMAXSWrr,        X86::PMAXSWrm,      TB_ALIGN_16 },
@@ -900,8 +1095,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::PSRLWrr,         X86::PSRLWrm,       TB_ALIGN_16 },
     { X86::PSUBBrr,         X86::PSUBBrm,       TB_ALIGN_16 },
     { X86::PSUBDrr,         X86::PSUBDrm,       TB_ALIGN_16 },
+    { X86::PSUBQrr,         X86::PSUBQrm,       TB_ALIGN_16 },
     { X86::PSUBSBrr,        X86::PSUBSBrm,      TB_ALIGN_16 },
     { X86::PSUBSWrr,        X86::PSUBSWrm,      TB_ALIGN_16 },
+    { X86::PSUBUSBrr,       X86::PSUBUSBrm,     TB_ALIGN_16 },
+    { X86::PSUBUSWrr,       X86::PSUBUSWrm,     TB_ALIGN_16 },
     { X86::PSUBWrr,         X86::PSUBWrm,       TB_ALIGN_16 },
     { X86::PUNPCKHBWrr,     X86::PUNPCKHBWrm,   TB_ALIGN_16 },
     { X86::PUNPCKHDQrr,     X86::PUNPCKHDQrm,   TB_ALIGN_16 },
@@ -923,7 +1121,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::SUBPDrr,         X86::SUBPDrm,       TB_ALIGN_16 },
     { X86::SUBPSrr,         X86::SUBPSrm,       TB_ALIGN_16 },
     { X86::SUBSDrr,         X86::SUBSDrm,       0 },
+    { X86::SUBSDrr_Int,     X86::SUBSDrm_Int,   0 },
     { X86::SUBSSrr,         X86::SUBSSrm,       0 },
+    { X86::SUBSSrr_Int,     X86::SUBSSrm_Int,   0 },
     // FIXME: TEST*rr -> swapped operand of TEST*mr.
     { X86::UNPCKHPDrr,      X86::UNPCKHPDrm,    TB_ALIGN_16 },
     { X86::UNPCKHPSrr,      X86::UNPCKHPSrm,    TB_ALIGN_16 },
@@ -935,6 +1135,98 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::XOR8rr,          X86::XOR8rm,        0 },
     { X86::XORPDrr,         X86::XORPDrm,       TB_ALIGN_16 },
     { X86::XORPSrr,         X86::XORPSrm,       TB_ALIGN_16 },
+
+    // MMX version of foldable instructions
+    { X86::MMX_CVTPI2PSirr,   X86::MMX_CVTPI2PSirm,   0 },
+    { X86::MMX_PACKSSDWirr,   X86::MMX_PACKSSDWirm,   0 },
+    { X86::MMX_PACKSSWBirr,   X86::MMX_PACKSSWBirm,   0 },
+    { X86::MMX_PACKUSWBirr,   X86::MMX_PACKUSWBirm,   0 },
+    { X86::MMX_PADDBirr,      X86::MMX_PADDBirm,      0 },
+    { X86::MMX_PADDDirr,      X86::MMX_PADDDirm,      0 },
+    { X86::MMX_PADDQirr,      X86::MMX_PADDQirm,      0 },
+    { X86::MMX_PADDSBirr,     X86::MMX_PADDSBirm,     0 },
+    { X86::MMX_PADDSWirr,     X86::MMX_PADDSWirm,     0 },
+    { X86::MMX_PADDUSBirr,    X86::MMX_PADDUSBirm,    0 },
+    { X86::MMX_PADDUSWirr,    X86::MMX_PADDUSWirm,    0 },
+    { X86::MMX_PADDWirr,      X86::MMX_PADDWirm,      0 },
+    { X86::MMX_PALIGNR64irr,  X86::MMX_PALIGNR64irm,  0 },
+    { X86::MMX_PANDNirr,      X86::MMX_PANDNirm,      0 },
+    { X86::MMX_PANDirr,       X86::MMX_PANDirm,       0 },
+    { X86::MMX_PAVGBirr,      X86::MMX_PAVGBirm,      0 },
+    { X86::MMX_PAVGWirr,      X86::MMX_PAVGWirm,      0 },
+    { X86::MMX_PCMPEQBirr,    X86::MMX_PCMPEQBirm,    0 },
+    { X86::MMX_PCMPEQDirr,    X86::MMX_PCMPEQDirm,    0 },
+    { X86::MMX_PCMPEQWirr,    X86::MMX_PCMPEQWirm,    0 },
+    { X86::MMX_PCMPGTBirr,    X86::MMX_PCMPGTBirm,    0 },
+    { X86::MMX_PCMPGTDirr,    X86::MMX_PCMPGTDirm,    0 },
+    { X86::MMX_PCMPGTWirr,    X86::MMX_PCMPGTWirm,    0 },
+    { X86::MMX_PHADDSWrr64,   X86::MMX_PHADDSWrm64,   0 },
+    { X86::MMX_PHADDWrr64,    X86::MMX_PHADDWrm64,    0 },
+    { X86::MMX_PHADDrr64,     X86::MMX_PHADDrm64,     0 },
+    { X86::MMX_PHSUBDrr64,    X86::MMX_PHSUBDrm64,    0 },
+    { X86::MMX_PHSUBSWrr64,   X86::MMX_PHSUBSWrm64,   0 },
+    { X86::MMX_PHSUBWrr64,    X86::MMX_PHSUBWrm64,    0 },
+    { X86::MMX_PINSRWirri,    X86::MMX_PINSRWirmi,    0 },
+    { X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 },
+    { X86::MMX_PMADDWDirr,    X86::MMX_PMADDWDirm,    0 },
+    { X86::MMX_PMAXSWirr,     X86::MMX_PMAXSWirm,     0 },
+    { X86::MMX_PMAXUBirr,     X86::MMX_PMAXUBirm,     0 },
+    { X86::MMX_PMINSWirr,     X86::MMX_PMINSWirm,     0 },
+    { X86::MMX_PMINUBirr,     X86::MMX_PMINUBirm,     0 },
+    { X86::MMX_PMULHRSWrr64,  X86::MMX_PMULHRSWrm64,  0 },
+    { X86::MMX_PMULHUWirr,    X86::MMX_PMULHUWirm,    0 },
+    { X86::MMX_PMULHWirr,     X86::MMX_PMULHWirm,     0 },
+    { X86::MMX_PMULLWirr,     X86::MMX_PMULLWirm,     0 },
+    { X86::MMX_PMULUDQirr,    X86::MMX_PMULUDQirm,    0 },
+    { X86::MMX_PORirr,        X86::MMX_PORirm,        0 },
+    { X86::MMX_PSADBWirr,     X86::MMX_PSADBWirm,     0 },
+    { X86::MMX_PSHUFBrr64,    X86::MMX_PSHUFBrm64,    0 },
+    { X86::MMX_PSIGNBrr64,    X86::MMX_PSIGNBrm64,    0 },
+    { X86::MMX_PSIGNDrr64,    X86::MMX_PSIGNDrm64,    0 },
+    { X86::MMX_PSIGNWrr64,    X86::MMX_PSIGNWrm64,    0 },
+    { X86::MMX_PSLLDrr,       X86::MMX_PSLLDrm,       0 },
+    { X86::MMX_PSLLQrr,       X86::MMX_PSLLQrm,       0 },
+    { X86::MMX_PSLLWrr,       X86::MMX_PSLLWrm,       0 },
+    { X86::MMX_PSRADrr,       X86::MMX_PSRADrm,       0 },
+    { X86::MMX_PSRAWrr,       X86::MMX_PSRAWrm,       0 },
+    { X86::MMX_PSRLDrr,       X86::MMX_PSRLDrm,       0 },
+    { X86::MMX_PSRLQrr,       X86::MMX_PSRLQrm,       0 },
+    { X86::MMX_PSRLWrr,       X86::MMX_PSRLWrm,       0 },
+    { X86::MMX_PSUBBirr,      X86::MMX_PSUBBirm,      0 },
+    { X86::MMX_PSUBDirr,      X86::MMX_PSUBDirm,      0 },
+    { X86::MMX_PSUBQirr,      X86::MMX_PSUBQirm,      0 },
+    { X86::MMX_PSUBSBirr,     X86::MMX_PSUBSBirm,     0 },
+    { X86::MMX_PSUBSWirr,     X86::MMX_PSUBSWirm,     0 },
+    { X86::MMX_PSUBUSBirr,    X86::MMX_PSUBUSBirm,    0 },
+    { X86::MMX_PSUBUSWirr,    X86::MMX_PSUBUSWirm,    0 },
+    { X86::MMX_PSUBWirr,      X86::MMX_PSUBWirm,      0 },
+    { X86::MMX_PUNPCKHBWirr,  X86::MMX_PUNPCKHBWirm,  0 },
+    { X86::MMX_PUNPCKHDQirr,  X86::MMX_PUNPCKHDQirm,  0 },
+    { X86::MMX_PUNPCKHWDirr,  X86::MMX_PUNPCKHWDirm,  0 },
+    { X86::MMX_PUNPCKLBWirr,  X86::MMX_PUNPCKLBWirm,  0 },
+    { X86::MMX_PUNPCKLDQirr,  X86::MMX_PUNPCKLDQirm,  0 },
+    { X86::MMX_PUNPCKLWDirr,  X86::MMX_PUNPCKLWDirm,  0 },
+    { X86::MMX_PXORirr,       X86::MMX_PXORirm,       0 },
+
+    // 3DNow! version of foldable instructions
+    { X86::PAVGUSBrr,         X86::PAVGUSBrm,         0 },
+    { X86::PFACCrr,           X86::PFACCrm,           0 },
+    { X86::PFADDrr,           X86::PFADDrm,           0 },
+    { X86::PFCMPEQrr,         X86::PFCMPEQrm,         0 },
+    { X86::PFCMPGErr,         X86::PFCMPGErm,         0 },
+    { X86::PFCMPGTrr,         X86::PFCMPGTrm,         0 },
+    { X86::PFMAXrr,           X86::PFMAXrm,           0 },
+    { X86::PFMINrr,           X86::PFMINrm,           0 },
+    { X86::PFMULrr,           X86::PFMULrm,           0 },
+    { X86::PFNACCrr,          X86::PFNACCrm,          0 },
+    { X86::PFPNACCrr,         X86::PFPNACCrm,         0 },
+    { X86::PFRCPIT1rr,        X86::PFRCPIT1rm,        0 },
+    { X86::PFRCPIT2rr,        X86::PFRCPIT2rm,        0 },
+    { X86::PFRSQIT1rr,        X86::PFRSQIT1rm,        0 },
+    { X86::PFSUBrr,           X86::PFSUBrm,           0 },
+    { X86::PFSUBRrr,          X86::PFSUBRrm,          0 },
+    { X86::PMULHRWrr,         X86::PMULHRWrm,         0 },
+
     // AVX 128-bit versions of foldable instructions
     { X86::VCVTSD2SSrr,       X86::VCVTSD2SSrm,        0 },
     { X86::Int_VCVTSD2SSrr,   X86::Int_VCVTSD2SSrm,    0 },
@@ -948,13 +1240,20 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::Int_VCVTSI2SSrr,   X86::Int_VCVTSI2SSrm,    0 },
     { X86::VCVTSS2SDrr,       X86::VCVTSS2SDrm,        0 },
     { X86::Int_VCVTSS2SDrr,   X86::Int_VCVTSS2SDrm,    0 },
+    { X86::VRCPSSr,           X86::VRCPSSm,            0 },
+    { X86::VRCPSSr_Int,       X86::VRCPSSm_Int,        0 },
     { X86::VRSQRTSSr,         X86::VRSQRTSSm,          0 },
+    { X86::VRSQRTSSr_Int,     X86::VRSQRTSSm_Int,      0 },
     { X86::VSQRTSDr,          X86::VSQRTSDm,           0 },
+    { X86::VSQRTSDr_Int,      X86::VSQRTSDm_Int,       0 },
     { X86::VSQRTSSr,          X86::VSQRTSSm,           0 },
+    { X86::VSQRTSSr_Int,      X86::VSQRTSSm_Int,       0 },
     { X86::VADDPDrr,          X86::VADDPDrm,           0 },
     { X86::VADDPSrr,          X86::VADDPSrm,           0 },
     { X86::VADDSDrr,          X86::VADDSDrm,           0 },
+    { X86::VADDSDrr_Int,      X86::VADDSDrm_Int,       0 },
     { X86::VADDSSrr,          X86::VADDSSrm,           0 },
+    { X86::VADDSSrr_Int,      X86::VADDSSrm_Int,       0 },
     { X86::VADDSUBPDrr,       X86::VADDSUBPDrm,        0 },
     { X86::VADDSUBPSrr,       X86::VADDSUBPSrm,        0 },
     { X86::VANDNPDrr,         X86::VANDNPDrm,          0 },
@@ -972,15 +1271,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VDIVPDrr,          X86::VDIVPDrm,           0 },
     { X86::VDIVPSrr,          X86::VDIVPSrm,           0 },
     { X86::VDIVSDrr,          X86::VDIVSDrm,           0 },
+    { X86::VDIVSDrr_Int,      X86::VDIVSDrm_Int,       0 },
     { X86::VDIVSSrr,          X86::VDIVSSrm,           0 },
-    { X86::VFsANDNPDrr,       X86::VFsANDNPDrm,        TB_ALIGN_16 },
-    { X86::VFsANDNPSrr,       X86::VFsANDNPSrm,        TB_ALIGN_16 },
-    { X86::VFsANDPDrr,        X86::VFsANDPDrm,         TB_ALIGN_16 },
-    { X86::VFsANDPSrr,        X86::VFsANDPSrm,         TB_ALIGN_16 },
-    { X86::VFsORPDrr,         X86::VFsORPDrm,          TB_ALIGN_16 },
-    { X86::VFsORPSrr,         X86::VFsORPSrm,          TB_ALIGN_16 },
-    { X86::VFsXORPDrr,        X86::VFsXORPDrm,         TB_ALIGN_16 },
-    { X86::VFsXORPSrr,        X86::VFsXORPSrm,         TB_ALIGN_16 },
+    { X86::VDIVSSrr_Int,      X86::VDIVSSrm_Int,       0 },
+    { X86::VDPPDrri,          X86::VDPPDrmi,           0 },
+    { X86::VDPPSrri,          X86::VDPPSrmi,           0 },
+    // Do not fold VFs* loads because there are no scalar load variants for
+    // these instructions. When folded, the load is required to be 128-bits, so
+    // the load size would not match.
+    { X86::VFvANDNPDrr,       X86::VFvANDNPDrm,        0 },
+    { X86::VFvANDNPSrr,       X86::VFvANDNPSrm,        0 },
+    { X86::VFvANDPDrr,        X86::VFvANDPDrm,         0 },
+    { X86::VFvANDPSrr,        X86::VFvANDPSrm,         0 },
+    { X86::VFvORPDrr,         X86::VFvORPDrm,          0 },
+    { X86::VFvORPSrr,         X86::VFvORPSrm,          0 },
+    { X86::VFvXORPDrr,        X86::VFvXORPDrm,         0 },
+    { X86::VFvXORPSrr,        X86::VFvXORPSrm,         0 },
     { X86::VHADDPDrr,         X86::VHADDPDrm,          0 },
     { X86::VHADDPSrr,         X86::VHADDPSrm,          0 },
     { X86::VHSUBPDrr,         X86::VHSUBPDrm,          0 },
@@ -990,16 +1296,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMAXPDrr,          X86::VMAXPDrm,           0 },
     { X86::VMAXPSrr,          X86::VMAXPSrm,           0 },
     { X86::VMAXSDrr,          X86::VMAXSDrm,           0 },
+    { X86::VMAXSDrr_Int,      X86::VMAXSDrm_Int,       0 },
     { X86::VMAXSSrr,          X86::VMAXSSrm,           0 },
+    { X86::VMAXSSrr_Int,      X86::VMAXSSrm_Int,       0 },
     { X86::VMINPDrr,          X86::VMINPDrm,           0 },
     { X86::VMINPSrr,          X86::VMINPSrm,           0 },
     { X86::VMINSDrr,          X86::VMINSDrm,           0 },
+    { X86::VMINSDrr_Int,      X86::VMINSDrm_Int,       0 },
     { X86::VMINSSrr,          X86::VMINSSrm,           0 },
+    { X86::VMINSSrr_Int,      X86::VMINSSrm_Int,       0 },
     { X86::VMPSADBWrri,       X86::VMPSADBWrmi,        0 },
     { X86::VMULPDrr,          X86::VMULPDrm,           0 },
     { X86::VMULPSrr,          X86::VMULPSrm,           0 },
     { X86::VMULSDrr,          X86::VMULSDrm,           0 },
+    { X86::VMULSDrr_Int,      X86::VMULSDrm_Int,       0 },
     { X86::VMULSSrr,          X86::VMULSSrm,           0 },
+    { X86::VMULSSrr_Int,      X86::VMULSSrm_Int,       0 },
     { X86::VORPDrr,           X86::VORPDrm,            0 },
     { X86::VORPSrr,           X86::VORPSrm,            0 },
     { X86::VPACKSSDWrr,       X86::VPACKSSDWrm,        0 },
@@ -1019,7 +1331,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPANDrr,           X86::VPANDrm,            0 },
     { X86::VPAVGBrr,          X86::VPAVGBrm,           0 },
     { X86::VPAVGWrr,          X86::VPAVGWrm,           0 },
+    { X86::VPBLENDVBrr,       X86::VPBLENDVBrm,        0 },
     { X86::VPBLENDWrri,       X86::VPBLENDWrmi,        0 },
+    { X86::VPCLMULQDQrr,      X86::VPCLMULQDQrm,       0 },
     { X86::VPCMPEQBrr,        X86::VPCMPEQBrm,         0 },
     { X86::VPCMPEQDrr,        X86::VPCMPEQDrm,         0 },
     { X86::VPCMPEQQrr,        X86::VPCMPEQQrm,         0 },
@@ -1036,6 +1350,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPHSUBWrr,         X86::VPHSUBWrm,          0 },
     { X86::VPERMILPDrr,       X86::VPERMILPDrm,        0 },
     { X86::VPERMILPSrr,       X86::VPERMILPSrm,        0 },
+    { X86::VPINSRBrr,         X86::VPINSRBrm,          0 },
+    { X86::VPINSRDrr,         X86::VPINSRDrm,          0 },
+    { X86::VPINSRQrr,         X86::VPINSRQrm,          0 },
     { X86::VPINSRWrri,        X86::VPINSRWrmi,         0 },
     { X86::VPMADDUBSWrr128,   X86::VPMADDUBSWrm128,    0 },
     { X86::VPMADDWDrr,        X86::VPMADDWDrm,         0 },
@@ -1074,8 +1391,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPSRLWrr,          X86::VPSRLWrm,           0 },
     { X86::VPSUBBrr,          X86::VPSUBBrm,           0 },
     { X86::VPSUBDrr,          X86::VPSUBDrm,           0 },
+    { X86::VPSUBQrr,          X86::VPSUBQrm,           0 },
     { X86::VPSUBSBrr,         X86::VPSUBSBrm,          0 },
     { X86::VPSUBSWrr,         X86::VPSUBSWrm,          0 },
+    { X86::VPSUBUSBrr,        X86::VPSUBUSBrm,         0 },
+    { X86::VPSUBUSWrr,        X86::VPSUBUSWrm,         0 },
     { X86::VPSUBWrr,          X86::VPSUBWrm,           0 },
     { X86::VPUNPCKHBWrr,      X86::VPUNPCKHBWrm,       0 },
     { X86::VPUNPCKHDQrr,      X86::VPUNPCKHDQrm,       0 },
@@ -1091,13 +1411,16 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VSUBPDrr,          X86::VSUBPDrm,           0 },
     { X86::VSUBPSrr,          X86::VSUBPSrm,           0 },
     { X86::VSUBSDrr,          X86::VSUBSDrm,           0 },
+    { X86::VSUBSDrr_Int,      X86::VSUBSDrm_Int,       0 },
     { X86::VSUBSSrr,          X86::VSUBSSrm,           0 },
+    { X86::VSUBSSrr_Int,      X86::VSUBSSrm_Int,       0 },
     { X86::VUNPCKHPDrr,       X86::VUNPCKHPDrm,        0 },
     { X86::VUNPCKHPSrr,       X86::VUNPCKHPSrm,        0 },
     { X86::VUNPCKLPDrr,       X86::VUNPCKLPDrm,        0 },
     { X86::VUNPCKLPSrr,       X86::VUNPCKLPSrm,        0 },
     { X86::VXORPDrr,          X86::VXORPDrm,           0 },
     { X86::VXORPSrr,          X86::VXORPSrm,           0 },
+
     // AVX 256-bit foldable instructions
     { X86::VADDPDYrr,         X86::VADDPDYrm,          0 },
     { X86::VADDPSYrr,         X86::VADDPSYrm,          0 },
@@ -1115,6 +1438,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VCMPPSYrri,        X86::VCMPPSYrmi,         0 },
     { X86::VDIVPDYrr,         X86::VDIVPDYrm,          0 },
     { X86::VDIVPSYrr,         X86::VDIVPSYrm,          0 },
+    { X86::VDPPSYrri,         X86::VDPPSYrmi,          0 },
     { X86::VHADDPDYrr,        X86::VHADDPDYrm,         0 },
     { X86::VHADDPSYrr,        X86::VHADDPSYrm,         0 },
     { X86::VHSUBPDYrr,        X86::VHSUBPDYrm,         0 },
@@ -1141,6 +1465,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VUNPCKLPSYrr,      X86::VUNPCKLPSYrm,       0 },
     { X86::VXORPDYrr,         X86::VXORPDYrm,          0 },
     { X86::VXORPSYrr,         X86::VXORPSYrm,          0 },
+
     // AVX2 foldable instructions
     { X86::VINSERTI128rr,     X86::VINSERTI128rm,      0 },
     { X86::VPACKSSDWYrr,      X86::VPACKSSDWYrm,       0 },
@@ -1162,6 +1487,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPAVGWYrr,         X86::VPAVGWYrm,          0 },
     { X86::VPBLENDDrri,       X86::VPBLENDDrmi,        0 },
     { X86::VPBLENDDYrri,      X86::VPBLENDDYrmi,       0 },
+    { X86::VPBLENDVBYrr,      X86::VPBLENDVBYrm,       0 },
     { X86::VPBLENDWYrri,      X86::VPBLENDWYrmi,       0 },
     { X86::VPCMPEQBYrr,       X86::VPCMPEQBYrm,        0 },
     { X86::VPCMPEQDYrr,       X86::VPCMPEQDYrm,        0 },
@@ -1173,9 +1499,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPCMPGTWYrr,       X86::VPCMPGTWYrm,        0 },
     { X86::VPERM2I128rr,      X86::VPERM2I128rm,       0 },
     { X86::VPERMDYrr,         X86::VPERMDYrm,          0 },
-    { X86::VPERMPDYri,        X86::VPERMPDYmi,         0 },
     { X86::VPERMPSYrr,        X86::VPERMPSYrm,         0 },
-    { X86::VPERMQYri,         X86::VPERMQYmi,          0 },
     { X86::VPHADDDYrr,        X86::VPHADDDYrm,         0 },
     { X86::VPHADDSWrr256,     X86::VPHADDSWrm256,      0 },
     { X86::VPHADDWYrr,        X86::VPHADDWYrm,         0 },
@@ -1230,8 +1554,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPSRLVQYrr,        X86::VPSRLVQYrm,         0 },
     { X86::VPSUBBYrr,         X86::VPSUBBYrm,          0 },
     { X86::VPSUBDYrr,         X86::VPSUBDYrm,          0 },
+    { X86::VPSUBQYrr,         X86::VPSUBQYrm,          0 },
     { X86::VPSUBSBYrr,        X86::VPSUBSBYrm,         0 },
     { X86::VPSUBSWYrr,        X86::VPSUBSWYrm,         0 },
+    { X86::VPSUBUSBYrr,       X86::VPSUBUSBYrm,        0 },
+    { X86::VPSUBUSWYrr,       X86::VPSUBUSWYrm,        0 },
     { X86::VPSUBWYrr,         X86::VPSUBWYrm,          0 },
     { X86::VPUNPCKHBWYrr,     X86::VPUNPCKHBWYrm,      0 },
     { X86::VPUNPCKHDQYrr,     X86::VPUNPCKHDQYrm,      0 },
@@ -1242,41 +1569,81 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPUNPCKLQDQYrr,    X86::VPUNPCKLQDQYrm,     0 },
     { X86::VPUNPCKLWDYrr,     X86::VPUNPCKLWDYrm,      0 },
     { X86::VPXORYrr,          X86::VPXORYrm,           0 },
-    // FIXME: add AVX 256-bit foldable instructions
 
     // FMA4 foldable patterns
-    { X86::VFMADDSS4rr,       X86::VFMADDSS4mr,        0           },
-    { X86::VFMADDSD4rr,       X86::VFMADDSD4mr,        0           },
-    { X86::VFMADDPS4rr,       X86::VFMADDPS4mr,        TB_ALIGN_16 },
-    { X86::VFMADDPD4rr,       X86::VFMADDPD4mr,        TB_ALIGN_16 },
-    { X86::VFMADDPS4rrY,      X86::VFMADDPS4mrY,       TB_ALIGN_32 },
-    { X86::VFMADDPD4rrY,      X86::VFMADDPD4mrY,       TB_ALIGN_32 },
-    { X86::VFNMADDSS4rr,      X86::VFNMADDSS4mr,       0           },
-    { X86::VFNMADDSD4rr,      X86::VFNMADDSD4mr,       0           },
-    { X86::VFNMADDPS4rr,      X86::VFNMADDPS4mr,       TB_ALIGN_16 },
-    { X86::VFNMADDPD4rr,      X86::VFNMADDPD4mr,       TB_ALIGN_16 },
-    { X86::VFNMADDPS4rrY,     X86::VFNMADDPS4mrY,      TB_ALIGN_32 },
-    { X86::VFNMADDPD4rrY,     X86::VFNMADDPD4mrY,      TB_ALIGN_32 },
-    { X86::VFMSUBSS4rr,       X86::VFMSUBSS4mr,        0           },
-    { X86::VFMSUBSD4rr,       X86::VFMSUBSD4mr,        0           },
-    { X86::VFMSUBPS4rr,       X86::VFMSUBPS4mr,        TB_ALIGN_16 },
-    { X86::VFMSUBPD4rr,       X86::VFMSUBPD4mr,        TB_ALIGN_16 },
-    { X86::VFMSUBPS4rrY,      X86::VFMSUBPS4mrY,       TB_ALIGN_32 },
-    { X86::VFMSUBPD4rrY,      X86::VFMSUBPD4mrY,       TB_ALIGN_32 },
-    { X86::VFNMSUBSS4rr,      X86::VFNMSUBSS4mr,       0           },
-    { X86::VFNMSUBSD4rr,      X86::VFNMSUBSD4mr,       0           },
-    { X86::VFNMSUBPS4rr,      X86::VFNMSUBPS4mr,       TB_ALIGN_16 },
-    { X86::VFNMSUBPD4rr,      X86::VFNMSUBPD4mr,       TB_ALIGN_16 },
-    { X86::VFNMSUBPS4rrY,     X86::VFNMSUBPS4mrY,      TB_ALIGN_32 },
-    { X86::VFNMSUBPD4rrY,     X86::VFNMSUBPD4mrY,      TB_ALIGN_32 },
-    { X86::VFMADDSUBPS4rr,    X86::VFMADDSUBPS4mr,     TB_ALIGN_16 },
-    { X86::VFMADDSUBPD4rr,    X86::VFMADDSUBPD4mr,     TB_ALIGN_16 },
-    { X86::VFMADDSUBPS4rrY,   X86::VFMADDSUBPS4mrY,    TB_ALIGN_32 },
-    { X86::VFMADDSUBPD4rrY,   X86::VFMADDSUBPD4mrY,    TB_ALIGN_32 },
-    { X86::VFMSUBADDPS4rr,    X86::VFMSUBADDPS4mr,     TB_ALIGN_16 },
-    { X86::VFMSUBADDPD4rr,    X86::VFMSUBADDPD4mr,     TB_ALIGN_16 },
-    { X86::VFMSUBADDPS4rrY,   X86::VFMSUBADDPS4mrY,    TB_ALIGN_32 },
-    { X86::VFMSUBADDPD4rrY,   X86::VFMSUBADDPD4mrY,    TB_ALIGN_32 },
+    { X86::VFMADDSS4rr,       X86::VFMADDSS4mr,        0 },
+    { X86::VFMADDSD4rr,       X86::VFMADDSD4mr,        0 },
+    { X86::VFMADDPS4rr,       X86::VFMADDPS4mr,        0 },
+    { X86::VFMADDPD4rr,       X86::VFMADDPD4mr,        0 },
+    { X86::VFMADDPS4rrY,      X86::VFMADDPS4mrY,       0 },
+    { X86::VFMADDPD4rrY,      X86::VFMADDPD4mrY,       0 },
+    { X86::VFNMADDSS4rr,      X86::VFNMADDSS4mr,       0 },
+    { X86::VFNMADDSD4rr,      X86::VFNMADDSD4mr,       0 },
+    { X86::VFNMADDPS4rr,      X86::VFNMADDPS4mr,       0 },
+    { X86::VFNMADDPD4rr,      X86::VFNMADDPD4mr,       0 },
+    { X86::VFNMADDPS4rrY,     X86::VFNMADDPS4mrY,      0 },
+    { X86::VFNMADDPD4rrY,     X86::VFNMADDPD4mrY,      0 },
+    { X86::VFMSUBSS4rr,       X86::VFMSUBSS4mr,        0 },
+    { X86::VFMSUBSD4rr,       X86::VFMSUBSD4mr,        0 },
+    { X86::VFMSUBPS4rr,       X86::VFMSUBPS4mr,        0 },
+    { X86::VFMSUBPD4rr,       X86::VFMSUBPD4mr,        0 },
+    { X86::VFMSUBPS4rrY,      X86::VFMSUBPS4mrY,       0 },
+    { X86::VFMSUBPD4rrY,      X86::VFMSUBPD4mrY,       0 },
+    { X86::VFNMSUBSS4rr,      X86::VFNMSUBSS4mr,       0 },
+    { X86::VFNMSUBSD4rr,      X86::VFNMSUBSD4mr,       0 },
+    { X86::VFNMSUBPS4rr,      X86::VFNMSUBPS4mr,       0 },
+    { X86::VFNMSUBPD4rr,      X86::VFNMSUBPD4mr,       0 },
+    { X86::VFNMSUBPS4rrY,     X86::VFNMSUBPS4mrY,      0 },
+    { X86::VFNMSUBPD4rrY,     X86::VFNMSUBPD4mrY,      0 },
+    { X86::VFMADDSUBPS4rr,    X86::VFMADDSUBPS4mr,     0 },
+    { X86::VFMADDSUBPD4rr,    X86::VFMADDSUBPD4mr,     0 },
+    { X86::VFMADDSUBPS4rrY,   X86::VFMADDSUBPS4mrY,    0 },
+    { X86::VFMADDSUBPD4rrY,   X86::VFMADDSUBPD4mrY,    0 },
+    { X86::VFMSUBADDPS4rr,    X86::VFMSUBADDPS4mr,     0 },
+    { X86::VFMSUBADDPD4rr,    X86::VFMSUBADDPD4mr,     0 },
+    { X86::VFMSUBADDPS4rrY,   X86::VFMSUBADDPS4mrY,    0 },
+    { X86::VFMSUBADDPD4rrY,   X86::VFMSUBADDPD4mrY,    0 },
+
+    // XOP foldable instructions
+    { X86::VPCMOVrr,          X86::VPCMOVmr,            0 },
+    { X86::VPCMOVrrY,         X86::VPCMOVmrY,           0 },
+    { X86::VPCOMBri,          X86::VPCOMBmi,            0 },
+    { X86::VPCOMDri,          X86::VPCOMDmi,            0 },
+    { X86::VPCOMQri,          X86::VPCOMQmi,            0 },
+    { X86::VPCOMWri,          X86::VPCOMWmi,            0 },
+    { X86::VPCOMUBri,         X86::VPCOMUBmi,           0 },
+    { X86::VPCOMUDri,         X86::VPCOMUDmi,           0 },
+    { X86::VPCOMUQri,         X86::VPCOMUQmi,           0 },
+    { X86::VPCOMUWri,         X86::VPCOMUWmi,           0 },
+    { X86::VPERMIL2PDrr,      X86::VPERMIL2PDmr,        0 },
+    { X86::VPERMIL2PDrrY,     X86::VPERMIL2PDmrY,       0 },
+    { X86::VPERMIL2PSrr,      X86::VPERMIL2PSmr,        0 },
+    { X86::VPERMIL2PSrrY,     X86::VPERMIL2PSmrY,       0 },
+    { X86::VPMACSDDrr,        X86::VPMACSDDrm,          0 },
+    { X86::VPMACSDQHrr,       X86::VPMACSDQHrm,         0 },
+    { X86::VPMACSDQLrr,       X86::VPMACSDQLrm,         0 },
+    { X86::VPMACSSDDrr,       X86::VPMACSSDDrm,         0 },
+    { X86::VPMACSSDQHrr,      X86::VPMACSSDQHrm,        0 },
+    { X86::VPMACSSDQLrr,      X86::VPMACSSDQLrm,        0 },
+    { X86::VPMACSSWDrr,       X86::VPMACSSWDrm,         0 },
+    { X86::VPMACSSWWrr,       X86::VPMACSSWWrm,         0 },
+    { X86::VPMACSWDrr,        X86::VPMACSWDrm,          0 },
+    { X86::VPMACSWWrr,        X86::VPMACSWWrm,          0 },
+    { X86::VPMADCSSWDrr,      X86::VPMADCSSWDrm,        0 },
+    { X86::VPMADCSWDrr,       X86::VPMADCSWDrm,         0 },
+    { X86::VPPERMrr,          X86::VPPERMmr,            0 },
+    { X86::VPROTBrr,          X86::VPROTBrm,            0 },
+    { X86::VPROTDrr,          X86::VPROTDrm,            0 },
+    { X86::VPROTQrr,          X86::VPROTQrm,            0 },
+    { X86::VPROTWrr,          X86::VPROTWrm,            0 },
+    { X86::VPSHABrr,          X86::VPSHABrm,            0 },
+    { X86::VPSHADrr,          X86::VPSHADrm,            0 },
+    { X86::VPSHAQrr,          X86::VPSHAQrm,            0 },
+    { X86::VPSHAWrr,          X86::VPSHAWrm,            0 },
+    { X86::VPSHLBrr,          X86::VPSHLBrm,            0 },
+    { X86::VPSHLDrr,          X86::VPSHLDrm,            0 },
+    { X86::VPSHLQrr,          X86::VPSHLQrm,            0 },
+    { X86::VPSHLWrr,          X86::VPSHLWrm,            0 },
 
     // BMI/BMI2 foldable instructions
     { X86::ANDN32rr,          X86::ANDN32rm,            0 },
@@ -1345,10 +1712,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::AESDECrr,          X86::AESDECrm,            TB_ALIGN_16 },
     { X86::AESENCLASTrr,      X86::AESENCLASTrm,        TB_ALIGN_16 },
     { X86::AESENCrr,          X86::AESENCrm,            TB_ALIGN_16 },
-    { X86::VAESDECLASTrr,     X86::VAESDECLASTrm,       TB_ALIGN_16 },
-    { X86::VAESDECrr,         X86::VAESDECrm,           TB_ALIGN_16 },
-    { X86::VAESENCLASTrr,     X86::VAESENCLASTrm,       TB_ALIGN_16 },
-    { X86::VAESENCrr,         X86::VAESENCrm,           TB_ALIGN_16 },
+    { X86::VAESDECLASTrr,     X86::VAESDECLASTrm,       0 },
+    { X86::VAESDECrr,         X86::VAESDECrm,           0 },
+    { X86::VAESENCLASTrr,     X86::VAESENCLASTrm,       0 },
+    { X86::VAESENCrr,         X86::VAESENCrm,           0 },
 
     // SHA foldable instructions
     { X86::SHA1MSG1rr,        X86::SHA1MSG1rm,          TB_ALIGN_16 },
@@ -1357,20 +1724,20 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::SHA1RNDS4rri,      X86::SHA1RNDS4rmi,        TB_ALIGN_16 },
     { X86::SHA256MSG1rr,      X86::SHA256MSG1rm,        TB_ALIGN_16 },
     { X86::SHA256MSG2rr,      X86::SHA256MSG2rm,        TB_ALIGN_16 },
-    { X86::SHA256RNDS2rr,     X86::SHA256RNDS2rm,       TB_ALIGN_16 },
+    { X86::SHA256RNDS2rr,     X86::SHA256RNDS2rm,       TB_ALIGN_16 }
   };
 
-  for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
-    unsigned RegOp = OpTbl2[i].RegOp;
-    unsigned MemOp = OpTbl2[i].MemOp;
-    unsigned Flags = OpTbl2[i].Flags;
+  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable2); i != e; ++i) {
+    unsigned RegOp = MemoryFoldTable2[i].RegOp;
+    unsigned MemOp = MemoryFoldTable2[i].MemOp;
+    unsigned Flags = MemoryFoldTable2[i].Flags;
     AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
                   RegOp, MemOp,
                   // Index 2, folded load
                   Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
   }
 
-  static const X86OpTblEntry OpTbl3[] = {
+  static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
     // FMA foldable instructions
     { X86::VFMADDSSr231r,         X86::VFMADDSSr231m,         TB_ALIGN_NONE },
     { X86::VFMADDSDr231r,         X86::VFMADDSDr231m,         TB_ALIGN_NONE },
@@ -1511,6 +1878,16 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VFMSUBADDPD4rr,        X86::VFMSUBADDPD4rm,        TB_ALIGN_16 },
     { X86::VFMSUBADDPS4rrY,       X86::VFMSUBADDPS4rmY,       TB_ALIGN_32 },
     { X86::VFMSUBADDPD4rrY,       X86::VFMSUBADDPD4rmY,       TB_ALIGN_32 },
+
+    // XOP foldable instructions
+    { X86::VPCMOVrr,              X86::VPCMOVrm,              0 },
+    { X86::VPCMOVrrY,             X86::VPCMOVrmY,             0 },
+    { X86::VPERMIL2PDrr,          X86::VPERMIL2PDrm,          0 },
+    { X86::VPERMIL2PDrrY,         X86::VPERMIL2PDrmY,         0 },
+    { X86::VPERMIL2PSrr,          X86::VPERMIL2PSrm,          0 },
+    { X86::VPERMIL2PSrrY,         X86::VPERMIL2PSrmY,         0 },
+    { X86::VPPERMrr,              X86::VPPERMrm,              0 },
+
     // AVX-512 VPERMI instructions with 3 source operands.
     { X86::VPERMI2Drr,            X86::VPERMI2Drm,            0 },
     { X86::VPERMI2Qrr,            X86::VPERMI2Qrm,            0 },
@@ -1566,17 +1943,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMAXPDZ128rrkz,        X86::VMAXPDZ128rmkz,        0 }
   };
 
-  for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
-    unsigned RegOp = OpTbl3[i].RegOp;
-    unsigned MemOp = OpTbl3[i].MemOp;
-    unsigned Flags = OpTbl3[i].Flags;
+  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable3); i != e; ++i) {
+    unsigned RegOp = MemoryFoldTable3[i].RegOp;
+    unsigned MemOp = MemoryFoldTable3[i].MemOp;
+    unsigned Flags = MemoryFoldTable3[i].Flags;
     AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
                   RegOp, MemOp,
                   // Index 3, folded load
                   Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
   }
 
-  static const X86OpTblEntry OpTbl4[] = {
+  static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
      // AVX-512 foldable instructions
     { X86::VADDPSZrrk,         X86::VADDPSZrmk,           0 },
     { X86::VADDPDZrrk,         X86::VADDPDZrmk,           0 },
@@ -1618,10 +1995,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMAXPDZ128rrk,      X86::VMAXPDZ128rmk,        0 }
   };
 
-  for (unsigned i = 0, e = array_lengthof(OpTbl4); i != e; ++i) {
-    unsigned RegOp = OpTbl4[i].RegOp;
-    unsigned MemOp = OpTbl4[i].MemOp;
-    unsigned Flags = OpTbl4[i].Flags;
+  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable4); i != e; ++i) {
+    unsigned RegOp = MemoryFoldTable4[i].RegOp;
+    unsigned MemOp = MemoryFoldTable4[i].MemOp;
+    unsigned Flags = MemoryFoldTable4[i].Flags;
     AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
                   RegOp, MemOp,
                   // Index 4, folded load
@@ -1699,7 +2076,7 @@ int X86InstrInfo::getSPAdjust(const MachineInstr *MI) const {
   if (MI->getOpcode() == getCallFrameSetupOpcode() ||
       MI->getOpcode() == getCallFrameDestroyOpcode()) {
     unsigned StackAlign = TFI->getStackAlignment();
-    int SPAdj = (MI->getOperand(0).getImm() + StackAlign - 1) / StackAlign * 
+    int SPAdj = (MI->getOperand(0).getImm() + StackAlign - 1) / StackAlign *
                  StackAlign;
 
     SPAdj -= MI->getOperand(1).getImm();
@@ -1709,8 +2086,8 @@ int X86InstrInfo::getSPAdjust(const MachineInstr *MI) const {
     else
       return -SPAdj;
   }
-  
-  // To know whether a call adjusts the stack, we need information 
+
+  // To know whether a call adjusts the stack, we need information
   // that is bound to the following ADJCALLSTACKUP pseudo.
   // Look for the next ADJCALLSTACKUP that follows the call.
   if (MI->isCall()) {
@@ -1733,7 +2110,7 @@ int X86InstrInfo::getSPAdjust(const MachineInstr *MI) const {
   // Currently handle only PUSHes we can reasonably expect to see
   // in call sequences
   switch (MI->getOpcode()) {
-  default: 
+  default:
     return 0;
   case X86::PUSH32i8:
   case X86::PUSH32r:
@@ -1744,7 +2121,7 @@ int X86InstrInfo::getSPAdjust(const MachineInstr *MI) const {
   }
 }
 
-/// isFrameOperand - Return true and the FrameIndex if the specified
+/// Return true and the FrameIndex if the specified
 /// operand and follow operands form a reference to the stack frame.
 bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
                                   int &FrameIndex) const {
@@ -1871,8 +2248,7 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
   return 0;
 }
 
-/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
-/// X86::MOVPC32r.
+/// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
 static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
   // Don't waste compile time scanning use-def chains of physregs.
   if (!TargetRegisterInfo::isVirtualRegister(BaseReg))
@@ -2068,8 +2444,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
   NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
 }
 
-/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
-/// is not marked dead.
+/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
 static bool hasLiveCondCodeDef(MachineInstr *MI) {
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
@@ -2081,8 +2456,7 @@ static bool hasLiveCondCodeDef(MachineInstr *MI) {
   return false;
 }
 
-/// getTruncatedShiftCount - check whether the shift count for a machine operand
-/// is non-zero.
+/// Check whether the shift count for a machine operand is non-zero.
 inline static unsigned getTruncatedShiftCount(MachineInstr *MI,
                                               unsigned ShiftAmtOperandIdx) {
   // The shift count is six bits with the REX.W prefix and five bits without.
@@ -2091,7 +2465,7 @@ inline static unsigned getTruncatedShiftCount(MachineInstr *MI,
   return Imm & ShiftCountMask;
 }
 
-/// isTruncatedShiftCountForLEA - check whether the given shift count is appropriate
+/// Check whether the given shift count is appropriate
 /// can be represented by a LEA instruction.
 inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
   // Left shift instructions can be transformed into load-effective-address
@@ -2173,10 +2547,9 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr *MI, const MachineOperand &Src,
   return true;
 }
 
-/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
-/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
-/// to a 32-bit superregister and then truncating back down to a 16-bit
-/// subregister.
+/// Helper for convertToThreeAddress when 16-bit LEA is disabled, use 32-bit
+/// LEA to form 3-address code by promoting to a 32-bit superregister and then
+/// truncating back down to a 16-bit subregister.
 MachineInstr *
 X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
                                            MachineFunction::iterator &MFI,
@@ -2283,7 +2656,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
   return ExtMI;
 }
 
-/// convertToThreeAddress - This method must be implemented by targets that
+/// This method must be implemented by targets that
 /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
 /// may be able to convert a two-address instruction into a true
 /// three-address instruction on demand.  This allows the X86 target (for
@@ -2558,8 +2931,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   return NewMI;
 }
 
-/// commuteInstruction - We have a few instructions that must be hacked on to
-/// commute them.
+/// We have a few instructions that must be hacked on to commute them.
 ///
 MachineInstr *
 X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
@@ -2627,6 +2999,71 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     MI->getOperand(3).setImm(Mask ^ Imm);
     return TargetInstrInfo::commuteInstruction(MI, NewMI);
   }
+  case X86::PCLMULQDQrr:
+  case X86::VPCLMULQDQrr:{
+    // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0]
+    // SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0]
+    unsigned Imm = MI->getOperand(3).getImm();
+    unsigned Src1Hi = Imm & 0x01;
+    unsigned Src2Hi = Imm & 0x10;
+    if (NewMI) {
+      MachineFunction &MF = *MI->getParent()->getParent();
+      MI = MF.CloneMachineInstr(MI);
+      NewMI = false;
+    }
+    MI->getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4));
+    return TargetInstrInfo::commuteInstruction(MI, NewMI);
+  }
+  case X86::CMPPDrri:
+  case X86::CMPPSrri:
+  case X86::VCMPPDrri:
+  case X86::VCMPPSrri:
+  case X86::VCMPPDYrri:
+  case X86::VCMPPSYrri: {
+    // Float comparison can be safely commuted for
+    // Ordered/Unordered/Equal/NotEqual tests
+    unsigned Imm = MI->getOperand(3).getImm() & 0x7;
+    switch (Imm) {
+    case 0x00: // EQUAL
+    case 0x03: // UNORDERED
+    case 0x04: // NOT EQUAL
+    case 0x07: // ORDERED
+      if (NewMI) {
+        MachineFunction &MF = *MI->getParent()->getParent();
+        MI = MF.CloneMachineInstr(MI);
+        NewMI = false;
+      }
+      return TargetInstrInfo::commuteInstruction(MI, NewMI);
+    default:
+      return nullptr;
+    }
+  }
+  case X86::VPCOMBri: case X86::VPCOMUBri:
+  case X86::VPCOMDri: case X86::VPCOMUDri:
+  case X86::VPCOMQri: case X86::VPCOMUQri:
+  case X86::VPCOMWri: case X86::VPCOMUWri: {
+    // Flip comparison mode immediate (if necessary).
+    unsigned Imm = MI->getOperand(3).getImm() & 0x7;
+    switch (Imm) {
+    case 0x00: Imm = 0x02; break; // LT -> GT
+    case 0x01: Imm = 0x03; break; // LE -> GE
+    case 0x02: Imm = 0x00; break; // GT -> LT
+    case 0x03: Imm = 0x01; break; // GE -> LE
+    case 0x04: // EQ
+    case 0x05: // NE
+    case 0x06: // FALSE
+    case 0x07: // TRUE
+    default:
+      break;
+    }
+    if (NewMI) {
+      MachineFunction &MF = *MI->getParent()->getParent();
+      MI = MF.CloneMachineInstr(MI);
+      NewMI = false;
+    }
+    MI->getOperand(3).setImm(Imm);
+    return TargetInstrInfo::commuteInstruction(MI, NewMI);
+  }
   case X86::CMOVB16rr:  case X86::CMOVB32rr:  case X86::CMOVB64rr:
   case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
   case X86::CMOVE16rr:  case X86::CMOVE32rr:  case X86::CMOVE64rr:
@@ -2711,20 +3148,26 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
 bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
                                          unsigned &SrcOpIdx2) const {
   switch (MI->getOpcode()) {
-    case X86::BLENDPDrri:
-    case X86::BLENDPSrri:
-    case X86::PBLENDWrri:
-    case X86::VBLENDPDrri:
-    case X86::VBLENDPSrri:
-    case X86::VBLENDPDYrri:
-    case X86::VBLENDPSYrri:
-    case X86::VPBLENDDrri:
-    case X86::VPBLENDDYrri:
-    case X86::VPBLENDWrri:
-    case X86::VPBLENDWYrri:
-      SrcOpIdx1 = 1;
-      SrcOpIdx2 = 2;
-      return true;
+    case X86::CMPPDrri:
+    case X86::CMPPSrri:
+    case X86::VCMPPDrri:
+    case X86::VCMPPSrri:
+    case X86::VCMPPDYrri:
+    case X86::VCMPPSYrri: {
+      // Float comparison can be safely commuted for
+      // Ordered/Unordered/Equal/NotEqual tests
+      unsigned Imm = MI->getOperand(3).getImm() & 0x7;
+      switch (Imm) {
+      case 0x00: // EQUAL
+      case 0x03: // UNORDERED
+      case 0x04: // NOT EQUAL
+      case 0x07: // ORDERED
+        SrcOpIdx1 = 1;
+        SrcOpIdx2 = 2;
+        return true;
+      }
+      return false;
+    }
     case X86::VFMADDPDr231r:
     case X86::VFMADDPSr231r:
     case X86::VFMADDSDr231r:
@@ -2779,7 +3222,7 @@ static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
   }
 }
 
-/// getCondFromSETOpc - return condition code of a SET opcode.
+/// Return condition code of a SET opcode.
 static X86::CondCode getCondFromSETOpc(unsigned Opc) {
   switch (Opc) {
   default: return X86::COND_INVALID;
@@ -2802,7 +3245,7 @@ static X86::CondCode getCondFromSETOpc(unsigned Opc) {
   }
 }
 
-/// getCondFromCmovOpc - return condition code of a CMov opcode.
+/// Return condition code of a CMov opcode.
 X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) {
   switch (Opc) {
   default: return X86::COND_INVALID;
@@ -2879,7 +3322,7 @@ unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
   }
 }
 
-/// GetOppositeBranchCondition - Return the inverse of the specified condition,
+/// Return the inverse of the specified condition,
 /// e.g. turning COND_E to COND_NE.
 X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
   switch (CC) {
@@ -2903,9 +3346,8 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
   }
 }
 
-/// getSwappedCondition - assume the flags are set by MI(a,b), return
-/// the condition code if we modify the instructions such that flags are
-/// set by MI(b,a).
+/// Assuming the flags are set by MI(a,b), return the condition code if we
+/// modify the instructions such that flags are set by MI(b,a).
 static X86::CondCode getSwappedCondition(X86::CondCode CC) {
   switch (CC) {
   default: return X86::COND_INVALID;
@@ -2922,7 +3364,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) {
   }
 }
 
-/// getSETFromCond - Return a set opcode for the given condition and
+/// Return a set opcode for the given condition and
 /// whether it has memory operand.
 unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
   static const uint16_t Opc[16][2] = {
@@ -2948,7 +3390,7 @@ unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
   return Opc[CC][HasMemoryOperand ? 1 : 0];
 }
 
-/// getCMovFromCond - Return a cmov opcode for the given condition,
+/// Return a cmov opcode for the given condition,
 /// register size in bytes, and operand type.
 unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes,
                               bool HasMemoryOperand) {
@@ -3271,7 +3713,7 @@ void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
    BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
 }
 
-/// isHReg - Test if the given register is a physical h register.
+/// Test if the given register is a physical h register.
 static bool isHReg(unsigned Reg) {
   return X86::GR8_ABCD_HRegClass.contains(Reg);
 }
@@ -3543,11 +3985,9 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
          "Stack slot too small for store");
   unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
-  bool isAligned = (MF.getTarget()
-                        .getSubtargetImpl()
-                        ->getFrameLowering()
-                        ->getStackAlignment() >= Alignment) ||
-                   RI.canRealignStack(MF);
+  bool isAligned =
+      (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
+      RI.canRealignStack(MF);
   unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
   DebugLoc DL = MBB.findDebugLoc(MI);
   addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
@@ -3582,11 +4022,9 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         const TargetRegisterInfo *TRI) const {
   const MachineFunction &MF = *MBB.getParent();
   unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
-  bool isAligned = (MF.getTarget()
-                        .getSubtargetImpl()
-                        ->getFrameLowering()
-                        ->getStackAlignment() >= Alignment) ||
-                   RI.canRealignStack(MF);
+  bool isAligned =
+      (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
+      RI.canRealignStack(MF);
   unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
   DebugLoc DL = MBB.findDebugLoc(MI);
   addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
@@ -3682,7 +4120,7 @@ analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
   return false;
 }
 
-/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// Check whether the first instruction, whose only
 /// purpose is to update flags, can be made redundant.
 /// CMPrr can be made redundant by SUBrr if the operands are the same.
 /// This function can be extended later on.
@@ -3725,7 +4163,7 @@ inline static bool isRedundantFlagInstr(MachineInstr *FlagI, unsigned SrcReg,
   return false;
 }
 
-/// isDefConvertible - check whether the definition can be converted
+/// Check whether the definition can be converted
 /// to remove a comparison against zero.
 inline static bool isDefConvertible(MachineInstr *MI) {
   switch (MI->getOpcode()) {
@@ -3811,8 +4249,7 @@ inline static bool isDefConvertible(MachineInstr *MI) {
   }
 }
 
-/// isUseDefConvertible - check whether the use can be converted
-/// to remove a comparison against zero.
+/// Check whether the use can be converted to remove a comparison against zero.
 static X86::CondCode isUseDefConvertible(MachineInstr *MI) {
   switch (MI->getOpcode()) {
   default: return X86::COND_INVALID;
@@ -3831,7 +4268,7 @@ static X86::CondCode isUseDefConvertible(MachineInstr *MI) {
   }
 }
 
-/// optimizeCompareInstr - Check if there exists an earlier instruction that
+/// Check if there exists an earlier instruction that
 /// operates on the same source operands and sets flags in the same way as
 /// Compare; remove Compare if possible.
 bool X86InstrInfo::
@@ -4122,7 +4559,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
   return true;
 }
 
-/// optimizeLoadInstr - Try to remove the load by folding it to a register
+/// Try to remove the load by folding it to a register
 /// operand at the use. We fold the load instructions if load defines a virtual
 /// register, the virtual register is used once in the same BB, and the
 /// instructions in-between do not load or store, and have no side effects.
@@ -4142,7 +4579,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
   DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
   assert(DefMI);
   bool SawStore = false;
-  if (!DefMI->isSafeToMove(this, nullptr, SawStore))
+  if (!DefMI->isSafeToMove(nullptr, SawStore))
     return nullptr;
 
   // Collect information about virtual register operands of MI.
@@ -4166,9 +4603,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
     return nullptr;
 
   // Check whether we can fold the def into SrcOperandId.
-  SmallVector<unsigned, 8> Ops;
-  Ops.push_back(SrcOperandId);
-  MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
+  MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI);
   if (FoldMI) {
     FoldAsLoadDefReg = 0;
     return FoldMI;
@@ -4177,9 +4612,9 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
   return nullptr;
 }
 
-/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
-/// instruction with two undef reads of the register being defined.  This is
-/// used for mapping:
+/// Expand a single-def pseudo instruction to a two-addr
+/// instruction with two undef reads of the register being defined.
+/// This is used for mapping:
 ///   %xmm4 = V_SET0
 /// to:
 ///   %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef>
@@ -4263,7 +4698,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
 }
 
 static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
-                                     const SmallVectorImpl<MachineOperand> &MOs,
+                                     ArrayRef<MachineOperand> MOs,
                                      MachineInstr *MI,
                                      const TargetInstrInfo &TII) {
   // Create the base instruction with the memory operand as the first part.
@@ -4290,9 +4725,8 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
   return MIB;
 }
 
-static MachineInstr *FuseInst(MachineFunction &MF,
-                              unsigned Opcode, unsigned OpNo,
-                              const SmallVectorImpl<MachineOperand> &MOs,
+static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
+                              unsigned OpNo, ArrayRef<MachineOperand> MOs,
                               MachineInstr *MI, const TargetInstrInfo &TII) {
   // Omit the implicit operands, something BuildMI can't do.
   MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
@@ -4316,7 +4750,7 @@ static MachineInstr *FuseInst(MachineFunction &MF,
 }
 
 static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
-                                const SmallVectorImpl<MachineOperand> &MOs,
+                                ArrayRef<MachineOperand> MOs,
                                 MachineInstr *MI) {
   MachineFunction &MF = *MI->getParent()->getParent();
   MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
@@ -4329,23 +4763,22 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
   return MIB.addImm(0);
 }
 
-MachineInstr*
-X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-                                    MachineInstr *MI, unsigned i,
-                                    const SmallVectorImpl<MachineOperand> &MOs,
-                                    unsigned Size, unsigned Align,
-                                    bool AllowCommute) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                                  MachineInstr *MI,
+                                                  unsigned OpNum,
+                                                  ArrayRef<MachineOperand> MOs,
+                                                  unsigned Size, unsigned Align,
+                                                  bool AllowCommute) const {
   const DenseMap<unsigned,
                  std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
   bool isCallRegIndirect = Subtarget.callRegIndirect();
   bool isTwoAddrFold = false;
 
-  // Atom favors register form of call. So, we do not fold loads into calls
-  // when X86Subtarget is Atom.
+  // For CPUs that favor the register form of a call,
+  // do not fold loads into calls.
   if (isCallRegIndirect &&
-    (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) {
+    (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r))
     return nullptr;
-  }
 
   unsigned NumOps = MI->getDesc().getNumOperands();
   bool isTwoAddr = NumOps > 1 &&
@@ -4361,13 +4794,13 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   // Folding a memory location into the two-address part of a two-address
   // instruction is different than folding it other places.  It requires
   // replacing the *two* registers with the memory location.
-  if (isTwoAddr && NumOps >= 2 && i < 2 &&
+  if (isTwoAddr && NumOps >= 2 && OpNum < 2 &&
       MI->getOperand(0).isReg() &&
       MI->getOperand(1).isReg() &&
       MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
     OpcodeTablePtr = &RegOp2MemOpTable2Addr;
     isTwoAddrFold = true;
-  } else if (i == 0) { // If operand 0
+  } else if (OpNum == 0) {
     if (MI->getOpcode() == X86::MOV32r0) {
       NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
       if (NewMI)
@@ -4375,13 +4808,13 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     }
 
     OpcodeTablePtr = &RegOp2MemOpTable0;
-  } else if (i == 1) {
+  } else if (OpNum == 1) {
     OpcodeTablePtr = &RegOp2MemOpTable1;
-  } else if (i == 2) {
+  } else if (OpNum == 2) {
     OpcodeTablePtr = &RegOp2MemOpTable2;
-  } else if (i == 3) {
+  } else if (OpNum == 3) {
     OpcodeTablePtr = &RegOp2MemOpTable3;
-  } else if (i == 4) {
+  } else if (OpNum == 4) {
     OpcodeTablePtr = &RegOp2MemOpTable4;
   }
 
@@ -4397,7 +4830,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
         return nullptr;
       bool NarrowToMOV32rm = false;
       if (Size) {
-        unsigned RCSize = getRegClass(MI->getDesc(), i, &RI, MF)->getSize();
+        unsigned RCSize = getRegClass(MI->getDesc(), OpNum, &RI, MF)->getSize();
         if (Size < RCSize) {
           // Check if it's safe to fold the load. If the size of the object is
           // narrower than the load width, then it's not.
@@ -4416,7 +4849,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
       if (isTwoAddrFold)
         NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
       else
-        NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this);
+        NewMI = FuseInst(MF, Opcode, OpNum, MOs, MI, *this);
 
       if (NarrowToMOV32rm) {
         // If this is the special case where we use a MOV32rm to load a 32-bit
@@ -4435,7 +4868,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   // If the instruction and target operand are commutable, commute the
   // instruction and try again.
   if (AllowCommute) {
-    unsigned OriginalOpIdx = i, CommuteOpIdx1, CommuteOpIdx2;
+    unsigned OriginalOpIdx = OpNum, CommuteOpIdx1, CommuteOpIdx2;
     if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
       bool HasDef = MI->getDesc().getNumDefs();
       unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
@@ -4493,11 +4926,11 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
 
   // No fusion
   if (PrintFailedFusing && !MI->isCopy())
-    dbgs() << "We failed to fuse operand " << i << " in " << *MI;
+    dbgs() << "We failed to fuse operand " << OpNum << " in " << *MI;
   return nullptr;
 }
 
-/// hasPartialRegUpdate - Return true for all instructions that only update
+/// Return true for all instructions that only update
 /// the first 32 or 64-bits of the destination register and leave the rest
 /// unmodified. This can be used to avoid folding loads if the instructions
 /// only update part of the destination register, and the non-updated part is
@@ -4559,7 +4992,7 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
   return false;
 }
 
-/// getPartialRegUpdateClearance - Inform the ExeDepsFix pass how many idle
+/// Inform the ExeDepsFix pass how many idle
 /// instructions we would like before a partial register update.
 unsigned X86InstrInfo::
 getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
@@ -4698,17 +5131,16 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
   MI->addRegisterKilled(Reg, TRI, true);
 }
 
-MachineInstr*
-X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                                    const SmallVectorImpl<unsigned> &Ops,
-                                    int FrameIndex) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                                  MachineInstr *MI,
+                                                  ArrayRef<unsigned> Ops,
+                                                  int FrameIndex) const {
   // Check switch flag
   if (NoFusing) return nullptr;
 
   // Unless optimizing for size, don't fold to avoid partial
   // register update stalls
-  if (!MF.getFunction()->getAttributes().
-        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+  if (!MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
       hasPartialRegUpdate(MI->getOpcode()))
     return nullptr;
 
@@ -4718,10 +5150,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
   // If the function stack isn't realigned we don't want to fold instructions
   // that need increased alignment.
   if (!RI.needsStackRealignment(MF))
-    Alignment = std::min(Alignment, MF.getTarget()
-                                        .getSubtargetImpl()
-                                        ->getFrameLowering()
-                                        ->getStackAlignment());
+    Alignment =
+        std::min(Alignment, Subtarget.getFrameLowering()->getStackAlignment());
   if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
     unsigned NewOpc = 0;
     unsigned RCSize = 0;
@@ -4742,10 +5172,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
   } else if (Ops.size() != 1)
     return nullptr;
 
-  SmallVector<MachineOperand,4> MOs;
-  MOs.push_back(MachineOperand::CreateFI(FrameIndex));
-  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
-                               Size, Alignment, /*AllowCommute=*/true);
+  return foldMemoryOperandImpl(MF, MI, Ops[0],
+                               MachineOperand::CreateFI(FrameIndex), Size,
+                               Alignment, /*AllowCommute=*/true);
 }
 
 static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
@@ -4767,9 +5196,9 @@ static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
   return false;
 }
 
-MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                                   MachineInstr *MI,
-                                           const SmallVectorImpl<unsigned> &Ops,
+                                                  ArrayRef<unsigned> Ops,
                                                   MachineInstr *LoadMI) const {
   // If loading from a FrameIndex, fold directly from the FrameIndex.
   unsigned NumOps = LoadMI->getDesc().getNumOperands();
@@ -4785,8 +5214,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
 
   // Unless optimizing for size, don't fold to avoid partial
   // register update stalls
-  if (!MF.getFunction()->getAttributes().
-        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+  if (!MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
       hasPartialRegUpdate(MI->getOpcode()))
     return nullptr;
 
@@ -4893,8 +5321,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
       return nullptr;
 
     // Folding a normal load. Just copy the load's address operands.
-    for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
-      MOs.push_back(LoadMI->getOperand(i));
+    MOs.append(LoadMI->operands_begin() + NumOps - X86::AddrNumOperands,
+               LoadMI->operands_begin() + NumOps);
     break;
   }
   }
@@ -4902,9 +5330,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                /*Size=*/0, Alignment, /*AllowCommute=*/true);
 }
 
-
 bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
-                                  const SmallVectorImpl<unsigned> &Ops) const {
+                                        ArrayRef<unsigned> Ops) const {
   // Check switch flag
   if (NoFusing) return 0;
 
@@ -4941,7 +5368,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
                  std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
   if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
     OpcodeTablePtr = &RegOp2MemOpTable2Addr;
-  } else if (OpNum == 0) { // If operand 0
+  } else if (OpNum == 0) {
     if (Opc == X86::MOV32r0)
       return true;
 
@@ -5157,7 +5584,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
   }
   if (Load)
     BeforeOps.push_back(SDValue(Load, 0));
-  std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
+  BeforeOps.insert(BeforeOps.end(), AfterOps.begin(), AfterOps.end());
   SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
   NewNodes.push_back(NewNode);
 
@@ -5184,7 +5611,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
     NewNodes.push_back(Store);
 
     // Preserve memory reference information.
-    cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+    cast<MachineSDNode>(Store)->setMemRefs(MMOs.first, MMOs.second);
   }
 
   return true;
@@ -5539,7 +5966,7 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
            RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
 }
 
-/// getGlobalBaseReg - Return a virtual register initialized with the
+/// Return a virtual register initialized with the
 /// the global base register value. Output instructions required to
 /// initialize the register in the function entry block, if necessary.
 ///
@@ -5572,6 +5999,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
   { X86::MOVAPSrr,   X86::MOVAPDrr,  X86::MOVDQArr  },
   { X86::MOVUPSmr,   X86::MOVUPDmr,  X86::MOVDQUmr  },
   { X86::MOVUPSrm,   X86::MOVUPDrm,  X86::MOVDQUrm  },
+  { X86::MOVLPSmr,   X86::MOVLPDmr,  X86::MOVPQI2QImr  },
   { X86::MOVNTPSmr,  X86::MOVNTPDmr, X86::MOVNTDQmr },
   { X86::ANDNPSrm,   X86::ANDNPDrm,  X86::PANDNrm   },
   { X86::ANDNPSrr,   X86::ANDNPDrr,  X86::PANDNrr   },
@@ -5587,6 +6015,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
   { X86::VMOVAPSrr,  X86::VMOVAPDrr,  X86::VMOVDQArr  },
   { X86::VMOVUPSmr,  X86::VMOVUPDmr,  X86::VMOVDQUmr  },
   { X86::VMOVUPSrm,  X86::VMOVUPDrm,  X86::VMOVDQUrm  },
+  { X86::VMOVLPSmr,  X86::VMOVLPDmr,  X86::VMOVPQI2QImr  },
   { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
   { X86::VANDNPSrm,  X86::VANDNPDrm,  X86::VPANDNrm   },
   { X86::VANDNPSrr,  X86::VANDNPDrr,  X86::VPANDNrr   },
@@ -5672,7 +6101,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
   MI->setDesc(get(table[Domain-1]));
 }
 
-/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+/// Return the noop instruction to use for a noop.
 void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
   NopInst.setOpcode(X86::NOOP);
 }
@@ -5684,7 +6113,7 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
 void X86InstrInfo::getUnconditionalBranch(
     MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
   Branch.setOpcode(X86::JMP_1);
-  Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
+  Branch.addOperand(MCOperand::createExpr(BranchTarget));
 }
 
 // This code must remain in sync with getJumpInstrTableEntryBound in this class!
@@ -5789,7 +6218,7 @@ hasHighOperandLatency(const InstrItineraryData *ItinData,
 }
 
 namespace {
-  /// CGBR - Create Global Base Reg pass. This initializes the PIC
+  /// Create Global Base Reg pass. This initializes the PIC
   /// global base register for x86-32.
   struct CGBR : public MachineFunctionPass {
     static char ID;
@@ -5798,10 +6227,11 @@ namespace {
     bool runOnMachineFunction(MachineFunction &MF) override {
       const X86TargetMachine *TM =
         static_cast<const X86TargetMachine *>(&MF.getTarget());
+      const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
 
       // Don't do anything if this is 64-bit as 64-bit PIC
       // uses RIP relative addressing.
-      if (TM->getSubtarget<X86Subtarget>().is64Bit())
+      if (STI.is64Bit())
         return false;
 
       // Only emit a global base reg in PIC mode.
@@ -5820,10 +6250,10 @@ namespace {
       MachineBasicBlock::iterator MBBI = FirstMBB.begin();
       DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
       MachineRegisterInfo &RegInfo = MF.getRegInfo();
-      const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
+      const X86InstrInfo *TII = STI.getInstrInfo();
 
       unsigned PC;
-      if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
+      if (STI.isPICStyleGOT())
         PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
       else
         PC = GlobalBaseReg;
@@ -5834,7 +6264,7 @@ namespace {
 
       // If we're using vanilla 'GOT' PIC style, we should use relative addressing
       // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
-      if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
+      if (STI.isPICStyleGOT()) {
         // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
         BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
           .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
@@ -5915,10 +6345,9 @@ namespace {
     MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
                                          unsigned TLSBaseAddrReg) {
       MachineFunction *MF = I->getParent()->getParent();
-      const X86TargetMachine *TM =
-          static_cast<const X86TargetMachine *>(&MF->getTarget());
-      const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
-      const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
+      const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
+      const bool is64Bit = STI.is64Bit();
+      const X86InstrInfo *TII = STI.getInstrInfo();
 
       // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
       MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
@@ -5936,10 +6365,9 @@ namespace {
     // inserting a copy instruction after I. Returns the new instruction.
     MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
       MachineFunction *MF = I->getParent()->getParent();
-      const X86TargetMachine *TM =
-          static_cast<const X86TargetMachine *>(&MF->getTarget());
-      const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
-      const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
+      const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
+      const bool is64Bit = STI.is64Bit();
+      const X86InstrInfo *TII = STI.getInstrInfo();
 
       // Create a virtual register for the TLS base address.
       MachineRegisterInfo &RegInfo = MF->getRegInfo();
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index 4d15467..0dd8101 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -305,23 +305,21 @@ public:
   /// folding and return true, otherwise it should return false.  If it folds
   /// the instruction, it is likely that the MachineInstruction the iterator
   /// references has been changed.
-  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                      MachineInstr* MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                                      ArrayRef<unsigned> Ops,
                                       int FrameIndex) const override;
 
   /// foldMemoryOperand - Same as the previous version except it allows folding
   /// of any load and store from / to any address, not just from a specific
   /// stack slot.
-  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                      MachineInstr* MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
-                                      MachineInstr* LoadMI) const override;
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                                      ArrayRef<unsigned> Ops,
+                                      MachineInstr *LoadMI) const override;
 
   /// canFoldMemoryOperand - Returns true if the specified load / store is
   /// folding is possible.
-  bool canFoldMemoryOperand(const MachineInstr*,
-                            const SmallVectorImpl<unsigned> &) const override;
+  bool canFoldMemoryOperand(const MachineInstr *,
+                            ArrayRef<unsigned>) const override;
 
   /// unfoldMemoryOperand - Separate a single instruction which folded a load or
   /// a store or a load and a store into two or more instruction. If this is
@@ -406,10 +404,9 @@ public:
   void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
                                  const TargetRegisterInfo *TRI) const override;
 
-  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                      MachineInstr* MI,
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
                                       unsigned OpNum,
-                                      const SmallVectorImpl<MachineOperand> &MOs,
+                                      ArrayRef<MachineOperand> MOs,
                                       unsigned Size, unsigned Alignment,
                                       bool AllowCommute) const;
 
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index 53715dc..70c2027520 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -383,25 +383,19 @@ def brtarget8 : Operand<OtherVT>;
 
 }
 
-// Special parsers to detect mode to disambiguate.
+// Special parser to detect 16-bit mode to select 16-bit displacement.
 def X86AbsMem16AsmOperand : AsmOperandClass {
   let Name = "AbsMem16";
   let RenderMethod = "addAbsMemOperands";
   let SuperClasses = [X86AbsMemAsmOperand];
 }
 
-def X86AbsMem32AsmOperand : AsmOperandClass {
-  let Name = "AbsMem32";
-  let RenderMethod = "addAbsMemOperands";
-  let SuperClasses = [X86AbsMemAsmOperand];
-}
-
 // Branch targets have OtherVT type and print as pc-relative values.
 let OperandType = "OPERAND_PCREL",
     PrintMethod = "printPCRelImm" in {
 let ParserMatchClass = X86AbsMem16AsmOperand in
   def brtarget16 : Operand<OtherVT>;
-let ParserMatchClass = X86AbsMem32AsmOperand in
+let ParserMatchClass = X86AbsMemAsmOperand in
   def brtarget32 : Operand<OtherVT>;
 }
 
@@ -539,7 +533,7 @@ def offset64_64 : X86MemOffsOperand<i64imm, "printMemOffs64",
                                     X86MemOffs64_64AsmOperand>;
 
 def SSECC : Operand<i8> {
-  let PrintMethod = "printSSECC";
+  let PrintMethod = "printSSEAVXCC";
   let OperandType = "OPERAND_IMMEDIATE";
 }
 
@@ -548,17 +542,23 @@ def i8immZExt3 : ImmLeaf<i8, [{
 }]>;
 
 def AVXCC : Operand<i8> {
-  let PrintMethod = "printAVXCC";
+  let PrintMethod = "printSSEAVXCC";
   let OperandType = "OPERAND_IMMEDIATE";
 }
 
 def i8immZExt5 : ImmLeaf<i8, [{
   return Imm >= 0 && Imm < 32;
 }]>;
-// AVX-512 uses a 32-bit immediate in their intrinsics
-def i32immZExt5 : ImmLeaf<i32, [{
-  return Imm >= 0 && Imm < 32;
-}]>;
+
+def AVX512ICC : Operand<i8> {
+  let PrintMethod = "printSSEAVXCC";
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def XOPCC : Operand<i8> {
+  let PrintMethod = "printXOPCC";
+  let OperandType = "OPERAND_IMMEDIATE";
+}
 
 class ImmSExtAsmOperandClass : AsmOperandClass {
   let SuperClasses = [ImmAsmOperand];
@@ -572,10 +572,13 @@ def X86GR32orGR64AsmOperand : AsmOperandClass {
 def GR32orGR64 : RegisterOperand<GR32> {
   let ParserMatchClass = X86GR32orGR64AsmOperand;
 }
-
+def AVX512RCOperand : AsmOperandClass {
+  let Name = "AVX512RC";
+}
 def AVX512RC : Operand<i32> {
   let PrintMethod = "printRoundingControl";
   let OperandType = "OPERAND_IMMEDIATE";
+  let ParserMatchClass = AVX512RCOperand;
 }
 
 // Sign-extended immediate classes. We don't need to define the full lattice
@@ -613,6 +616,14 @@ def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
                       ImmSExti64i32AsmOperand];
 }
 
+// Unsigned immediate used by SSE/AVX instructions
+// [0, 0xFF]
+//   [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmUnsignedi8AsmOperand : AsmOperandClass {
+  let Name = "ImmUnsignedi8";
+  let RenderMethod = "addImmOperands";
+}
+
 // A couple of more descriptive operand definitions.
 // 16-bits but only 8 bits are significant.
 def i16i8imm  : Operand<i16> {
@@ -631,6 +642,27 @@ def i64i32imm  : Operand<i64> {
   let OperandType = "OPERAND_IMMEDIATE";
 }
 
+// 64-bits but only 8 bits are significant.
+def i64i8imm   : Operand<i64> {
+  let ParserMatchClass = ImmSExti64i8AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// Unsigned 8-bit immediate used by SSE/AVX instructions.
+def u8imm : Operand<i8> {
+  let PrintMethod = "printU8Imm";
+  let ParserMatchClass = ImmUnsignedi8AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
+// 32-bit immediate but only 8-bits are significant and they are unsigned.
+// Used by some SSE/AVX instructions that use intrinsics.
+def i32u8imm : Operand<i32> {
+  let PrintMethod = "printU8Imm";
+  let ParserMatchClass = ImmUnsignedi8AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
 // 64-bits but only 32 bits are significant, and those bits are treated as being
 // pc relative.
 def i64i32imm_pcrel : Operand<i64> {
@@ -639,12 +671,6 @@ def i64i32imm_pcrel : Operand<i64> {
   let OperandType = "OPERAND_PCREL";
 }
 
-// 64-bits but only 8 bits are significant.
-def i64i8imm   : Operand<i64> {
-  let ParserMatchClass = ImmSExti64i8AsmOperand;
-  let OperandType = "OPERAND_IMMEDIATE";
-}
-
 def lea64_32mem : Operand<i32> {
   let PrintMethod = "printanymem";
   let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm);
@@ -690,6 +716,8 @@ def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
 def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
                                [tglobaltlsaddr], []>;
 
+def vectoraddr : ComplexPattern<iPTR, 5, "SelectVectorAddr", [],[SDNPWantParent]>;
+
 //===----------------------------------------------------------------------===//
 // X86 Instruction Predicate Definitions.
 def HasCMov      : Predicate<"Subtarget->hasCMov()">;
@@ -720,14 +748,19 @@ def HasAVX512    : Predicate<"Subtarget->hasAVX512()">,
 def UseAVX       : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">;
 def UseAVX2      : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
 def NoAVX512     : Predicate<"!Subtarget->hasAVX512()">;
-def HasCDI       : Predicate<"Subtarget->hasCDI()">;
-def HasPFI       : Predicate<"Subtarget->hasPFI()">;
-def HasERI       : Predicate<"Subtarget->hasERI()">;
-def HasDQI       : Predicate<"Subtarget->hasDQI()">;
+def HasCDI       : Predicate<"Subtarget->hasCDI()">,
+                     AssemblerPredicate<"FeatureCDI", "AVX-512 CD ISA">;
+def HasPFI       : Predicate<"Subtarget->hasPFI()">,
+                     AssemblerPredicate<"FeaturePFI", "AVX-512 PF ISA">;
+def HasERI       : Predicate<"Subtarget->hasERI()">,
+                     AssemblerPredicate<"FeatureERI", "AVX-512 ER ISA">;
+def HasDQI       : Predicate<"Subtarget->hasDQI()">,
+                     AssemblerPredicate<"FeatureDQI", "AVX-512 DQ ISA">;
 def NoDQI        : Predicate<"!Subtarget->hasDQI()">;
-def HasBWI       : Predicate<"Subtarget->hasBWI()">;
+def HasBWI       : Predicate<"Subtarget->hasBWI()">,
+                     AssemblerPredicate<"FeatureBWI", "AVX-512 BW ISA">;
 def HasVLX       : Predicate<"Subtarget->hasVLX()">,
-                     AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">;
+                     AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">;
 def NoVLX        : Predicate<"!Subtarget->hasVLX()">;
 
 def HasPOPCNT    : Predicate<"Subtarget->hasPOPCNT()">;
@@ -750,10 +783,8 @@ def HasHLE       : Predicate<"Subtarget->hasHLE()">;
 def HasTSX       : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
 def HasADX       : Predicate<"Subtarget->hasADX()">;
 def HasSHA       : Predicate<"Subtarget->hasSHA()">;
-def HasSGX       : Predicate<"Subtarget->hasSGX()">;
 def HasPRFCHW    : Predicate<"Subtarget->hasPRFCHW()">;
 def HasRDSEED    : Predicate<"Subtarget->hasRDSEED()">;
-def HasSMAP      : Predicate<"Subtarget->hasSMAP()">;
 def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
 def FPStackf32   : Predicate<"!Subtarget->hasSSE1()">;
 def FPStackf64   : Predicate<"!Subtarget->hasSSE2()">;
@@ -771,6 +802,9 @@ def Not16BitMode : Predicate<"!Subtarget->is16Bit()">,
 def In32BitMode  : Predicate<"Subtarget->is32Bit()">,
                              AssemblerPredicate<"Mode32Bit", "32-bit mode">;
 def IsWin64      : Predicate<"Subtarget->isTargetWin64()">;
+def NotWin64     : Predicate<"!Subtarget->isTargetWin64()">;
+def IsPS4        : Predicate<"Subtarget->isTargetPS4()">;
+def NotPS4       : Predicate<"!Subtarget->isTargetPS4()">;
 def IsNaCl       : Predicate<"Subtarget->isTargetNaCl()">;
 def NotNaCl      : Predicate<"!Subtarget->isTargetNaCl()">;
 def SmallCode    : Predicate<"TM.getCodeModel() == CodeModel::Small">;
@@ -823,11 +857,11 @@ def X86_COND_E_OR_NE : ImmLeaf<i8, [{
   return (Imm == X86::COND_E) || (Imm == X86::COND_NE);
 }]>;
 
-let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs.
-  def i16immSExt8  : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
-  def i32immSExt8  : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
-  def i64immSExt8  : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
-}
+
+def i16immSExt8  : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
+def i32immSExt8  : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
+def i64immSExt8  : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+
 
 def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
 
@@ -2191,11 +2225,11 @@ let Predicates = [HasBMI2], Defs = [EFLAGS] in {
 
 def CountTrailingOnes : SDNodeXForm<imm, [{
   // Count the trailing ones in the immediate.
-  return getI8Imm(CountTrailingOnes_64(N->getZExtValue()));
+  return getI8Imm(countTrailingOnes(N->getZExtValue()), SDLoc(N));
 }]>;
 
 def BZHIMask : ImmLeaf<i64, [{
-  return isMask_64(Imm) && (CountTrailingOnes_64(Imm) > 32);
+  return isMask_64(Imm) && (countTrailingOnes<uint64_t>(Imm) > 32);
 }]>;
 
 let Predicates = [HasBMI2] in {
@@ -2385,6 +2419,16 @@ let Predicates = [HasTBM] in {
 } // HasTBM
 
 //===----------------------------------------------------------------------===//
+// Memory Instructions
+//
+
+def CLFLUSHOPT : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+                   "clflushopt\t$src", []>, PD;
+def CLWB       : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src", []>, PD;
+def PCOMMIT    : I<0xAE, MRM_F8, (outs), (ins), "pcommit", []>, PD;
+
+
+//===----------------------------------------------------------------------===//
 // Subsystems.
 //===----------------------------------------------------------------------===//
 
@@ -2537,6 +2581,12 @@ def : MnemonicAlias<"fnstsww",  "fnstsw",   "att">;
 def : MnemonicAlias<"fucomip",  "fucompi",  "att">;
 def : MnemonicAlias<"fwait",    "wait">;
 
+def : MnemonicAlias<"fxsaveq",   "fxsave64",   "att">;
+def : MnemonicAlias<"fxrstorq",  "fxrstor64",  "att">;
+def : MnemonicAlias<"xsaveq",    "xsave64",    "att">;
+def : MnemonicAlias<"xrstorq",   "xrstor64",   "att">;
+def : MnemonicAlias<"xsaveoptq", "xsaveopt64", "att">;
+
 
 class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond,
                     string VariantName>
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
index 5b36427..eaa7894 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -125,9 +125,9 @@ let Constraints = "$src1 = $dst" in {
                                     (bitconvert (load_mmx addr:$src2))))],
                   itins.rm>, Sched<[WriteVecShiftLd, ReadAfterLd]>;
     def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
-                                   (ins VR64:$src1, i32i8imm:$src2),
+                                   (ins VR64:$src1, i32u8imm:$src2),
                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-           [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>,
+           [(set VR64:$dst, (IntId2 VR64:$src1, imm:$src2))], itins.ri>,
            Sched<[WriteVecShift]>;
   }
 }
@@ -170,12 +170,12 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
 /// PALIGN MMX instructions (require SSSE3).
 multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
   def R64irr  : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
-      (ins VR64:$src1, VR64:$src2, i8imm:$src3),
+      (ins VR64:$src1, VR64:$src2, u8imm:$src3),
       !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
       [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
       Sched<[WriteShuffle]>;
   def R64irm  : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
-      (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
+      (ins VR64:$src1, i64mem:$src2, u8imm:$src3),
       !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
       [(set VR64:$dst, (IntId VR64:$src1,
                        (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
@@ -223,20 +223,26 @@ def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
                         [(set VR64:$dst,
                          (x86mmx (scalar_to_vector GR32:$src)))],
                         IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
-let canFoldAsLoad = 1 in
 def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set VR64:$dst,
                         (x86mmx (scalar_to_vector (loadi32 addr:$src))))],
                         IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
+
+let Predicates = [HasMMX] in {
+  let AddedComplexity = 15 in
+    def : Pat<(x86mmx (MMX_X86movw2d GR32:$src)),
+              (MMX_MOVD64rr GR32:$src)>;
+  let AddedComplexity = 20 in
+    def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))),
+              (MMX_MOVD64rm addr:$src)>;
+}
+
 let mayStore = 1 in
 def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
                         "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>,
                    Sched<[WriteStore]>;
 
-// Low word of MMX to GPR.
-def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
-                            [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
 def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
                          "movd\t{$src, $dst|$dst, $src}",
                          [(set GR32:$dst,
@@ -248,6 +254,11 @@ def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
                              [(set VR64:$dst, (bitconvert GR64:$src))],
                              IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
 
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst),
+                             (ins i64mem:$src), "movd\t{$src, $dst|$dst, $src}",
+                             [], IIC_MMX_MOVQ_RM>, Sched<[WriteLoad]>;
+
 // These are 64 bit moves, but since the OS X assembler doesn't
 // recognize a register-register movq, we write them as
 // movd.
@@ -268,6 +279,12 @@ def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src),
 }
 } // SchedRW
 
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+def MMX_MOVD64from64rm : MMXRI<0x7E, MRMDestMem,
+                               (outs i64mem:$dst), (ins VR64:$src),
+                               "movd\t{$src, $dst|$dst, $src}",
+                               [], IIC_MMX_MOV_REG_MM>, Sched<[WriteStore]>;
+
 let SchedRW = [WriteLoad] in {
 let canFoldAsLoad = 1 in
 def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
@@ -453,6 +470,13 @@ defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
                                     int_x86_mmx_psrl_q, int_x86_mmx_psrli_q,
                                     MMX_SHIFT_ITINS>;
 
+def : Pat<(int_x86_mmx_psrl_w VR64:$src1, (load_mvmmx addr:$src2)),
+          (MMX_PSRLWrm VR64:$src1, addr:$src2)>;
+def : Pat<(int_x86_mmx_psrl_d VR64:$src1, (load_mvmmx addr:$src2)),
+          (MMX_PSRLDrm VR64:$src1, addr:$src2)>;
+def : Pat<(int_x86_mmx_psrl_q VR64:$src1, (load_mvmmx addr:$src2)),
+          (MMX_PSRLQrm VR64:$src1, addr:$src2)>;
+
 defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
                                     int_x86_mmx_psll_w, int_x86_mmx_pslli_w,
                                     MMX_SHIFT_ITINS>;
@@ -463,6 +487,13 @@ defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
                                     int_x86_mmx_psll_q, int_x86_mmx_pslli_q,
                                     MMX_SHIFT_ITINS>;
 
+def : Pat<(int_x86_mmx_psll_w VR64:$src1, (load_mvmmx addr:$src2)),
+          (MMX_PSLLWrm VR64:$src1, addr:$src2)>;
+def : Pat<(int_x86_mmx_psll_d VR64:$src1, (load_mvmmx addr:$src2)),
+          (MMX_PSLLDrm VR64:$src1, addr:$src2)>;
+def : Pat<(int_x86_mmx_psll_q VR64:$src1, (load_mvmmx addr:$src2)),
+          (MMX_PSLLQrm VR64:$src1, addr:$src2)>;
+
 defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
                                     int_x86_mmx_psra_w, int_x86_mmx_psrai_w,
                                     MMX_SHIFT_ITINS>;
@@ -470,6 +501,11 @@ defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
                                     int_x86_mmx_psra_d, int_x86_mmx_psrai_d,
                                     MMX_SHIFT_ITINS>;
 
+def : Pat<(int_x86_mmx_psra_w VR64:$src1, (load_mvmmx addr:$src2)),
+          (MMX_PSRAWrm VR64:$src1, addr:$src2)>;
+def : Pat<(int_x86_mmx_psra_d VR64:$src1, (load_mvmmx addr:$src2)),
+          (MMX_PSRADrm VR64:$src1, addr:$src2)>;
+
 // Comparison Instructions
 defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b,
                                      MMX_INTALU_ITINS>;
@@ -518,13 +554,13 @@ defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b,
                                        MMX_PSHUF_ITINS>;
 
 def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
-                          (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
+                          (outs VR64:$dst), (ins VR64:$src1, u8imm:$src2),
                           "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set VR64:$dst,
                              (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))],
                           IIC_MMX_PSHUF>, Sched<[WriteShuffle]>;
 def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
-                          (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
+                          (outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2),
                           "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set VR64:$dst,
                              (int_x86_sse_pshuf_w (load_mmx addr:$src1),
@@ -559,27 +595,27 @@ let Constraints = "$src1 = $dst" in {
 
 // Extract / Insert
 def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
-                       (outs GR32orGR64:$dst), (ins VR64:$src1, i32i8imm:$src2),
+                       (outs GR32orGR64:$dst), (ins VR64:$src1, i32u8imm:$src2),
                        "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1,
-                                         (iPTR imm:$src2)))],
+                                               imm:$src2))],
                        IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
 let Constraints = "$src1 = $dst" in {
   def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
                       (outs VR64:$dst),
-                      (ins VR64:$src1, GR32orGR64:$src2, i32i8imm:$src3),
+                      (ins VR64:$src1, GR32orGR64:$src2, i32u8imm:$src3),
                       "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
-                                        GR32orGR64:$src2, (iPTR imm:$src3)))],
+                                        GR32orGR64:$src2, imm:$src3))],
                       IIC_MMX_PINSRW>, Sched<[WriteShuffle]>;
 
   def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
                      (outs VR64:$dst),
-                     (ins VR64:$src1, i16mem:$src2, i32i8imm:$src3),
+                     (ins VR64:$src1, i16mem:$src2, i32u8imm:$src3),
                      "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
                                          (i32 (anyext (loadi16 addr:$src2))),
-                                       (iPTR imm:$src3)))],
+                                       imm:$src3))],
                      IIC_MMX_PINSRW>, Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSGX.td b/contrib/llvm/lib/Target/X86/X86InstrSGX.td
index 47c5dc5..84119ad 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSGX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSGX.td
@@ -17,8 +17,8 @@
 
 // ENCLS - Execute an Enclave System Function of Specified Leaf Number
 def ENCLS : I<0x01, MRM_CF, (outs), (ins),
-             "encls", []>, TB, Requires<[HasSGX]>;
+             "encls", []>, TB;
 
 // ENCLU - Execute an Enclave User Function of Specified Leaf Number
 def ENCLU : I<0x01, MRM_D7, (outs), (ins),
-             "enclu", []>, TB, Requires<[HasSGX]>;
+             "enclu", []>, TB;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index fca0c44..d3b401e 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -241,21 +241,20 @@ def SSE_INTALU_ITINS_BLEND_P : OpndItins<
 /// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
 multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
                            RegisterClass RC, X86MemOperand x86memop,
-                           OpndItins itins,
-                           bit Is2Addr = 1> {
+                           Domain d, OpndItins itins, bit Is2Addr = 1> {
   let isCommutable = 1 in {
     def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>,
+       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr, d>,
        Sched<[itins.Sched]>;
   }
   def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>,
+       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm, d>,
        Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
@@ -263,24 +262,23 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
 multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
                              string asm, string SSEVer, string FPSizeStr,
                              Operand memopr, ComplexPattern mem_cpat,
-                             OpndItins itins,
-                             bit Is2Addr = 1> {
+                             Domain d, OpndItins itins, bit Is2Addr = 1> {
 let isCodeGenOnly = 1 in {
-  def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+  def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (!cast<Intrinsic>(
                  !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, RC:$src2))], itins.rr>,
+             RC:$src1, RC:$src2))], itins.rr, d>,
        Sched<[itins.Sched]>;
-  def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
+  def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
                                           SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, mem_cpat:$src2))], itins.rm>,
+             RC:$src1, mem_cpat:$src2))], itins.rm, d>,
        Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 }
@@ -548,13 +546,13 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
 
 multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
                          X86MemOperand x86memop, string base_opc,
-                         string asm_opr> {
+                         string asm_opr, Domain d = GenericDomain> {
   def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
               (ins VR128:$src1, RC:$src2),
               !strconcat(base_opc, asm_opr),
               [(set VR128:$dst, (vt (OpNode VR128:$src1,
                                  (scalar_to_vector RC:$src2))))],
-              IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>;
+              IIC_SSE_MOV_S_RR, d>, Sched<[WriteFShuffle]>;
 
   // For the disassembler
   let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
@@ -565,49 +563,55 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
 }
 
 multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
-                      X86MemOperand x86memop, string OpcodeStr> {
+                      X86MemOperand x86memop, string OpcodeStr,
+                      Domain d = GenericDomain> {
   // AVX
   defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
-                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
+                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>,
                               VEX_4V, VEX_LIG;
 
   def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                     [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+                     [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>,
                      VEX, VEX_LIG, Sched<[WriteStore]>;
   // SSE1 & 2
   let Constraints = "$src1 = $dst" in {
     defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
-                              "\t{$src2, $dst|$dst, $src2}">;
+                              "\t{$src2, $dst|$dst, $src2}", d>;
   }
 
   def NAME#mr   : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                     [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+                     [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>,
                   Sched<[WriteStore]>;
 }
 
 // Loading from memory automatically zeroing upper bits.
 multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
-                         PatFrag mem_pat, string OpcodeStr> {
+                         PatFrag mem_pat, string OpcodeStr,
+                         Domain d = GenericDomain> {
   def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                      [(set RC:$dst, (mem_pat addr:$src))],
-                     IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
+                     IIC_SSE_MOV_S_RM, d>, VEX, VEX_LIG, Sched<[WriteLoad]>;
   def NAME#rm   : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                      [(set RC:$dst, (mem_pat addr:$src))],
-                     IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
+                     IIC_SSE_MOV_S_RM, d>, Sched<[WriteLoad]>;
 }
 
-defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
-defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD;
+defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
+                        SSEPackedSingle>, XS;
+defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
+                        SSEPackedDouble>, XD;
 
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
-  defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+  defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss",
+                             SSEPackedSingle>, XS;
 
   let AddedComplexity = 20 in
-    defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+    defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd",
+                               SSEPackedDouble>, XD;
 }
 
 // Patterns
@@ -637,9 +641,6 @@ let Predicates = [UseAVX] in {
 
   // Represent the same patterns above but in the form they appear for
   // 256-bit types
-  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
-                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
@@ -647,9 +648,6 @@ let Predicates = [UseAVX] in {
                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
   }
-  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
-                   (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
 
   // Extract and store.
   def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -787,7 +785,7 @@ let Predicates = [UseSSE2] in {
             (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
 
   // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
-  // is during lowering, where it's not possible to recognize the fold cause
+  // is during lowering, where it's not possible to recognize the fold because
   // it has two uses through a bitcast. One use disappears at isel time and the
   // fold opportunity reappears.
   def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
@@ -1333,7 +1331,7 @@ let Predicates = [HasAVX] in {
             (VMOVHPSrm VR128:$src1, addr:$src2)>;
 
   // VMOVHPD patterns
-  
+
   // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
   // is during lowering, where it's not possible to recognize the load fold
   // cause it has two uses through a bitcast. One use disappears at isel time
@@ -1853,14 +1851,14 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
                        "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set VR128:$dst,
                          (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
-                       IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>,
+                       IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
                        Sched<[WriteCvtF2F]>;
 def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
                        "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
                                           VR128:$src1, sse_load_f64:$src2))],
-                       IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>,
+                       IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
                        Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 
 let Constraints = "$src1 = $dst" in {
@@ -1938,14 +1936,14 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst,
                       (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
-                    IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>,
+                    IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
                     Sched<[WriteCvtF2F]>;
 def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
                       (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst,
                       (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
-                    IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>,
+                    IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
                     Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
 def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
@@ -2348,11 +2346,11 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
   // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1, hasSideEffects = 0 in {
     def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
-                      (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [],
+                      (ins RC:$src1, RC:$src2, u8imm:$cc), asm_alt, [],
                       IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>;
     let mayLoad = 1 in
     def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
-                      (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [],
+                      (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm_alt, [],
                       IIC_SSE_ALU_F32S_RM>,
                       Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
@@ -2487,7 +2485,8 @@ let Defs = [EFLAGS] in {
 multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
                             Operand CC, Intrinsic Int, string asm,
                             string asm_alt, Domain d, ImmLeaf immLeaf,
-                            OpndItins itins = SSE_ALU_F32P> {
+                            PatFrag ld_frag, OpndItins itins = SSE_ALU_F32P> {
+  let isCommutable = 1 in
   def rri : PIi8<0xC2, MRMSrcReg,
              (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
              [(set RC:$dst, (Int RC:$src1, RC:$src2, immLeaf:$cc))],
@@ -2495,17 +2494,18 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
             Sched<[WriteFAdd]>;
   def rmi : PIi8<0xC2, MRMSrcMem,
              (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
-             [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), immLeaf:$cc))],
+             [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2), immLeaf:$cc))],
              itins.rm, d>,
             Sched<[WriteFAddLd, ReadAfterLd]>;
 
   // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1, hasSideEffects = 0 in {
     def rri_alt : PIi8<0xC2, MRMSrcReg,
-               (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+               (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
                asm_alt, [], itins.rr, d>, Sched<[WriteFAdd]>;
+    let mayLoad = 1 in
     def rmi_alt : PIi8<0xC2, MRMSrcMem,
-               (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
+               (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
                asm_alt, [], itins.rm, d>,
                Sched<[WriteFAddLd, ReadAfterLd]>;
   }
@@ -2514,61 +2514,61 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
 defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps,
                "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
-               SSEPackedSingle, i8immZExt5>, PS, VEX_4V;
+               SSEPackedSingle, i8immZExt5, loadv4f32>, PS, VEX_4V;
 defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd,
                "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
-               SSEPackedDouble, i8immZExt5>, PD, VEX_4V;
+               SSEPackedDouble, i8immZExt5, loadv2f64>, PD, VEX_4V;
 defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256,
                "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
-               SSEPackedSingle, i8immZExt5>, PS, VEX_4V, VEX_L;
+               SSEPackedSingle, i8immZExt5, loadv8f32>, PS, VEX_4V, VEX_L;
 defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256,
                "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
-               SSEPackedDouble, i8immZExt5>, PD, VEX_4V, VEX_L;
+               SSEPackedDouble, i8immZExt5, loadv4f64>, PD, VEX_4V, VEX_L;
 let Constraints = "$src1 = $dst" in {
   defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
                  "cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
                  "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
-                 SSEPackedSingle, i8immZExt5, SSE_ALU_F32P>, PS;
+                 SSEPackedSingle, i8immZExt5, memopv4f32, SSE_ALU_F32P>, PS;
   defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
                  "cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
                  "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
-                 SSEPackedDouble, i8immZExt5, SSE_ALU_F64P>, PD;
+                 SSEPackedDouble, i8immZExt5, memopv2f64, SSE_ALU_F64P>, PD;
 }
 
 let Predicates = [HasAVX] in {
 def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
           (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (loadv4f32 addr:$src2), imm:$cc)),
           (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
 def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
           (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (loadv2f64 addr:$src2), imm:$cc)),
           (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
 
 def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
           (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
-def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (loadv8f32 addr:$src2), imm:$cc)),
           (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
 def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
           (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
-def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (loadv4f64 addr:$src2), imm:$cc)),
           (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
 }
 
 let Predicates = [UseSSE1] in {
 def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
           (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memopv4f32 addr:$src2), imm:$cc)),
           (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
 }
 
 let Predicates = [UseSSE2] in {
 def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
           (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memopv2f64 addr:$src2), imm:$cc)),
           (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
 }
 
@@ -2581,12 +2581,12 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
                          ValueType vt, string asm, PatFrag mem_frag,
                          Domain d> {
   def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
-                   (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
+                   (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
                    [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
                                        (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
             Sched<[WriteFShuffleLd, ReadAfterLd]>;
   def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
-                 (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
+                 (ins RC:$src1, RC:$src2, u8imm:$src3), asm,
                  [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
                                      (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
             Sched<[WriteFShuffle]>;
@@ -2742,24 +2742,6 @@ let Predicates = [HasAVX1Only] in {
             (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
 }
 
-let Predicates = [HasAVX] in {
-  // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
-  // problem is during lowering, where it's not possible to recognize the load
-  // fold cause it has two uses through a bitcast. One use disappears at isel
-  // time and the fold opportunity reappears.
-  def : Pat<(v2f64 (X86Movddup VR128:$src)),
-            (VUNPCKLPDrr VR128:$src, VR128:$src)>;
-}
-
-let Predicates = [UseSSE2] in {
-  // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
-  // problem is during lowering, where it's not possible to recognize the load
-  // fold cause it has two uses through a bitcast. One use disappears at isel
-  // time and the fold opportunity reappears.
-  def : Pat<(v2f64 (X86Movddup VR128:$src)),
-            (UNPCKLPDrr VR128:$src, VR128:$src)>;
-}
-
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Extract Floating-Point Sign mask
 //===----------------------------------------------------------------------===//
@@ -2880,40 +2862,73 @@ defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
 // SSE 1 & 2 - Logical Instructions
 //===----------------------------------------------------------------------===//
 
-/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
-///
-multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
-                                       SDNode OpNode, OpndItins itins> {
+// Multiclass for scalars using the X86 logical operation aliases for FP.
+multiclass sse12_fp_packed_scalar_logical_alias<
+    bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> {
+  defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+                FR32, f32, f128mem, loadf32_128, SSEPackedSingle, itins, 0>,
+                PS, VEX_4V;
+
+  defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+                FR64, f64, f128mem, loadf64_128, SSEPackedDouble, itins, 0>,
+                PD, VEX_4V;
+
+  let Constraints = "$src1 = $dst" in {
+    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
+                f32, f128mem, memopfsf32_128, SSEPackedSingle, itins>, PS;
+
+    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
+                f64, f128mem, memopfsf64_128, SSEPackedDouble, itins>, PD;
+  }
+}
+
+let isCodeGenOnly = 1 in {
+  defm FsAND  : sse12_fp_packed_scalar_logical_alias<0x54, "and", X86fand,
+                SSE_BIT_ITINS_P>;
+  defm FsOR   : sse12_fp_packed_scalar_logical_alias<0x56, "or", X86for,
+                SSE_BIT_ITINS_P>;
+  defm FsXOR  : sse12_fp_packed_scalar_logical_alias<0x57, "xor", X86fxor,
+                SSE_BIT_ITINS_P>;
+
+  let isCommutable = 0 in
+    defm FsANDN : sse12_fp_packed_scalar_logical_alias<0x55, "andn", X86fandn,
+                  SSE_BIT_ITINS_P>;
+}
+
+// Multiclass for vectors using the X86 logical operation aliases for FP.
+multiclass sse12_fp_packed_vector_logical_alias<
+    bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> {
+  let Predicates = [HasAVX, NoVLX] in {
   defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
-              FR32, f32, f128mem, memopfsf32, SSEPackedSingle, itins, 0>,
+              VR128, v4f32, f128mem, loadv4f32, SSEPackedSingle, itins, 0>,
               PS, VEX_4V;
 
   defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
-        FR64, f64, f128mem, memopfsf64, SSEPackedDouble, itins, 0>,
+        VR128, v2f64, f128mem, loadv2f64, SSEPackedDouble, itins, 0>,
         PD, VEX_4V;
+  }
 
   let Constraints = "$src1 = $dst" in {
-    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
-                f32, f128mem, memopfsf32, SSEPackedSingle, itins>,
+    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
+                v4f32, f128mem, memopv4f32, SSEPackedSingle, itins>,
                 PS;
 
-    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
-                f64, f128mem, memopfsf64, SSEPackedDouble, itins>,
+    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
+                v2f64, f128mem, memopv2f64, SSEPackedDouble, itins>,
                 PD;
   }
 }
 
-// Alias bitwise logical operations using SSE logical ops on packed FP values.
 let isCodeGenOnly = 1 in {
-  defm FsAND  : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
+  defm FvAND  : sse12_fp_packed_vector_logical_alias<0x54, "and", X86fand,
                 SSE_BIT_ITINS_P>;
-  defm FsOR   : sse12_fp_alias_pack_logical<0x56, "or", X86for,
+  defm FvOR   : sse12_fp_packed_vector_logical_alias<0x56, "or", X86for,
                 SSE_BIT_ITINS_P>;
-  defm FsXOR  : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
+  defm FvXOR  : sse12_fp_packed_vector_logical_alias<0x57, "xor", X86fxor,
                 SSE_BIT_ITINS_P>;
 
   let isCommutable = 0 in
-    defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", X86fandn,
+    defm FvANDN : sse12_fp_packed_vector_logical_alias<0x55, "andn", X86fandn,
                   SSE_BIT_ITINS_P>;
 }
 
@@ -3037,15 +3052,19 @@ multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
 multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                   SizeItins itins> {
   defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
-                         OpNode, FR32, f32mem, itins.s, 0>, XS, VEX_4V, VEX_LIG;
+                         OpNode, FR32, f32mem, SSEPackedSingle, itins.s, 0>,
+                         XS, VEX_4V, VEX_LIG;
   defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
-                         OpNode, FR64, f64mem, itins.d, 0>, XD, VEX_4V, VEX_LIG;
+                         OpNode, FR64, f64mem, SSEPackedDouble, itins.d, 0>,
+                         XD, VEX_4V, VEX_LIG;
 
   let Constraints = "$src1 = $dst" in {
     defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
-                              OpNode, FR32, f32mem, itins.s>, XS;
+                              OpNode, FR32, f32mem, SSEPackedSingle,
+                              itins.s>, XS;
     defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
-                              OpNode, FR64, f64mem, itins.d>, XD;
+                              OpNode, FR64, f64mem, SSEPackedDouble,
+                              itins.d>, XD;
   }
 }
 
@@ -3053,18 +3072,18 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
                                       SizeItins itins> {
   defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
                    !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
-                   itins.s, 0>, XS, VEX_4V, VEX_LIG;
+                   SSEPackedSingle, itins.s, 0>, XS, VEX_4V, VEX_LIG;
   defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
                    !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
-                   itins.d, 0>, XD, VEX_4V, VEX_LIG;
+                   SSEPackedDouble, itins.d, 0>, XD, VEX_4V, VEX_LIG;
 
   let Constraints = "$src1 = $dst" in {
     defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
                    !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
-                   itins.s>, XS;
+                   SSEPackedSingle, itins.s>, XS;
     defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
                    !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
-                   itins.d>, XD;
+                   SSEPackedDouble, itins.d>, XD;
   }
 }
 
@@ -3098,10 +3117,9 @@ let isCodeGenOnly = 1 in {
 }
 
 // Patterns used to select SSE scalar fp arithmetic instructions from
-// a scalar fp operation followed by a blend.
+// either:
 //
-// These patterns know, for example, how to select an ADDSS from a
-// float add plus vector insert.
+// (1) a scalar fp operation followed by a blend
 //
 // The effect is that the backend no longer emits unnecessary vector
 // insert instructions immediately after SSE scalar fp instructions
@@ -3113,218 +3131,14 @@ let isCodeGenOnly = 1 in {
 //     return A;
 //   }
 //
-// previously we generated:
+// Previously we generated:
 //   addss %xmm0, %xmm1
 //   movss %xmm1, %xmm0
 //
-// we now generate:
+// We now generate:
 //   addss %xmm1, %xmm0
-
-let Predicates = [UseSSE1] in {
-  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd
-                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                      FR32:$src))))),
-            (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub
-                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                      FR32:$src))))),
-            (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul
-                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                      FR32:$src))))),
-            (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv
-                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                      FR32:$src))))),
-            (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-}
-
-let Predicates = [UseSSE2] in {
-  // SSE2 patterns to select scalar double-precision fp arithmetic instructions
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))))),
-            (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))))),
-            (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))))),
-            (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))))),
-            (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-}
-
-let Predicates = [UseSSE41] in {
-  // If the subtarget has SSE4.1 but not AVX, the vector insert instruction is
-  // lowered into a X86insertps or a X86Blendi rather than a X86Movss. When
-  // selecting SSE scalar single-precision fp arithmetic instructions, make
-  // sure that we correctly match them.
-
-  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
-                  (fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                    FR32:$src))), (iPTR 0))),
-            (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
-                  (fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                    FR32:$src))), (iPTR 0))),
-            (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
-                  (fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                    FR32:$src))), (iPTR 0))),
-            (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
-                  (fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                    FR32:$src))), (iPTR 0))),
-            (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd
-                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                      FR32:$src))), (i8 1))),
-            (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub
-                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                      FR32:$src))), (i8 1))),
-            (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul
-                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                      FR32:$src))), (i8 1))),
-            (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv
-                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                      FR32:$src))), (i8 1))),
-            (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (i8 1))),
-            (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (i8 1))),
-            (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (i8 1))),
-            (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (i8 1))),
-            (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-
-  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fadd
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
-            (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fsub
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
-            (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fmul
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
-            (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fdiv
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
-            (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-}
-
-let Predicates = [HasAVX] in {
-  // The following patterns select AVX Scalar single/double precision fp
-  // arithmetic instructions.
-
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))))),
-            (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))))),
-            (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))))),
-            (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))))),
-            (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
-                 (fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                       FR32:$src))), (iPTR 0))),
-            (VADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
-                 (fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                       FR32:$src))), (iPTR 0))),
-            (VSUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
-                 (fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                       FR32:$src))), (iPTR 0))),
-            (VMULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
-                 (fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                       FR32:$src))), (iPTR 0))),
-            (VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd
-                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                      FR32:$src))), (i8 1))),
-            (VADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub
-                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                      FR32:$src))), (i8 1))),
-            (VSUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul
-                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                      FR32:$src))), (i8 1))),
-            (VMULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv
-                      (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
-                      FR32:$src))), (i8 1))),
-            (VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
-
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (i8 1))),
-            (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (i8 1))),
-            (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (i8 1))),
-            (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (i8 1))),
-            (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-
-  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fadd
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
-            (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fsub
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
-            (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fmul
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
-            (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fdiv
-                      (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
-                      FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
-            (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
-}
-
-// Patterns used to select SSE scalar fp arithmetic instructions from
-// a vector packed single/double fp operation followed by a vector insert.
+//
+// (2) a vector packed single/double fp operation followed by a vector insert
 //
 // The effect is that the backend converts the packed fp instruction
 // followed by a vector insert into a single SSE scalar fp instruction.
@@ -3335,160 +3149,137 @@ let Predicates = [HasAVX] in {
 //     return (__m128) {c[0], a[1], a[2], a[3]};
 //   }
 //
-// previously we generated:
+// Previously we generated:
 //   addps %xmm0, %xmm1
 //   movss %xmm1, %xmm0
 //
-// we now generate:
+// We now generate:
 //   addss %xmm1, %xmm0
 
-let Predicates = [UseSSE1] in {
-  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
-                   (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
-            (ADDSSrr_Int v4f32:$dst, v4f32:$src)>;
-  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
-                   (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
-            (SUBSSrr_Int v4f32:$dst, v4f32:$src)>;
-  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
-                   (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
-            (MULSSrr_Int v4f32:$dst, v4f32:$src)>;
-  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
-                   (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
-            (DIVSSrr_Int v4f32:$dst, v4f32:$src)>;
-}
+// TODO: Some canonicalization in lowering would simplify the number of
+// patterns we have to try to match.
+multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
+  let Predicates = [UseSSE1] in {
+    // extracted scalar math op with insert via movss
+    def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
+          (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+          FR32:$src))))),
+      (!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst,
+          (COPY_TO_REGCLASS FR32:$src, VR128))>;
+
+    // vector math op with insert via movss
+    def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
+          (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
+      (!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>;
+  }
 
-let Predicates = [UseSSE2] in {
-  // SSE2 patterns to select scalar double-precision fp arithmetic instructions
-  // from a packed double-precision fp instruction plus movsd.
-
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
-                   (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
-            (ADDSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
-                   (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
-            (SUBSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
-                   (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
-            (MULSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
-                   (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
-            (DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
-}
+  // With SSE 4.1, blendi is preferred to movsd, so match that too.
+  let Predicates = [UseSSE41] in {
+    // extracted scalar math op with insert via blend
+    def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
+          (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+          FR32:$src))), (i8 1))),
+      (!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst,
+          (COPY_TO_REGCLASS FR32:$src, VR128))>;
 
-let Predicates = [UseSSE41] in {
-  // With SSE4.1 we may see these operations using X86Blendi rather than
-  // X86Movs{s,d}.
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
-                   (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
-            (ADDSSrr_Int v4f32:$dst, v4f32:$src)>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
-                   (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
-            (SUBSSrr_Int v4f32:$dst, v4f32:$src)>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
-                   (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
-            (MULSSrr_Int v4f32:$dst, v4f32:$src)>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
-                   (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
-            (DIVSSrr_Int v4f32:$dst, v4f32:$src)>;
-
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
-                   (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
-            (ADDSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
-                   (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
-            (SUBSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
-                   (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
-            (MULSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
-                   (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
-            (DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
-
-  def : Pat<(v2f64 (X86Blendi (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)),
-                              (v2f64 VR128:$dst), (i8 2))),
-            (ADDSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Blendi (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)),
-                   (v2f64 VR128:$dst), (i8 2))),
-            (SUBSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Blendi (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)),
-                   (v2f64 VR128:$dst), (i8 2))),
-            (MULSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Blendi (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)),
-                   (v2f64 VR128:$dst), (i8 2))),
-            (DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
+    // vector math op with insert via blend
+    def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+          (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+      (!cast<I>(OpcPrefix#SSrr_Int)v4f32:$dst, v4f32:$src)>;
+
+  }
+
+  // Repeat everything for AVX, except for the movss + scalar combo...
+  // because that one shouldn't occur with AVX codegen?
+  let Predicates = [HasAVX] in {
+    // extracted scalar math op with insert via blend
+    def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
+          (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+          FR32:$src))), (i8 1))),
+      (!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst,
+          (COPY_TO_REGCLASS FR32:$src, VR128))>;
+
+    // vector math op with insert via movss
+    def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
+          (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
+      (!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>;
+
+    // vector math op with insert via blend
+    def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+          (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+      (!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>;
+  }
 }
 
-let Predicates = [HasAVX] in {
-  // The following patterns select AVX Scalar single/double precision fp
-  // arithmetic instructions from a packed single precision fp instruction
-  // plus movss/movsd.
-
-  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
-                   (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
-            (VADDSSrr_Int v4f32:$dst, v4f32:$src)>;
-  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
-                   (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
-            (VSUBSSrr_Int v4f32:$dst, v4f32:$src)>;
-  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
-                   (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
-            (VMULSSrr_Int v4f32:$dst, v4f32:$src)>;
-  def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
-                   (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
-            (VDIVSSrr_Int v4f32:$dst, v4f32:$src)>;
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
-                   (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
-            (VADDSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
-                   (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
-            (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
-                   (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
-            (VMULSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
-                   (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
-            (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>;
-
-  // Also handle X86Blendi-based patterns.
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
-                   (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
-            (VADDSSrr_Int v4f32:$dst, v4f32:$src)>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
-                   (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
-            (VSUBSSrr_Int v4f32:$dst, v4f32:$src)>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
-                   (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
-            (VMULSSrr_Int v4f32:$dst, v4f32:$src)>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
-                   (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
-            (VDIVSSrr_Int v4f32:$dst, v4f32:$src)>;
-
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
-                   (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
-            (VADDSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
-                   (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
-            (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
-                   (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
-            (VMULSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
-                   (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
-            (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>;
-
-  def : Pat<(v2f64 (X86Blendi (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)),
-                              (v2f64 VR128:$dst), (i8 2))),
-            (VADDSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Blendi (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)),
-                   (v2f64 VR128:$dst), (i8 2))),
-            (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Blendi (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)),
-                   (v2f64 VR128:$dst), (i8 2))),
-            (VMULSDrr_Int v2f64:$dst, v2f64:$src)>;
-  def : Pat<(v2f64 (X86Blendi (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)),
-                   (v2f64 VR128:$dst), (i8 2))),
-            (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>;
+defm : scalar_math_f32_patterns<fadd, "ADD">;
+defm : scalar_math_f32_patterns<fsub, "SUB">;
+defm : scalar_math_f32_patterns<fmul, "MUL">;
+defm : scalar_math_f32_patterns<fdiv, "DIV">;
+
+multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
+  let Predicates = [UseSSE2] in {
+    // extracted scalar math op with insert via movsd
+    def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
+          (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+          FR64:$src))))),
+      (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst,
+          (COPY_TO_REGCLASS FR64:$src, VR128))>;
+
+    // vector math op with insert via movsd
+    def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
+          (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
+      (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;
+  }
+
+  // With SSE 4.1, blendi is preferred to movsd, so match those too.
+  let Predicates = [UseSSE41] in {
+    // extracted scalar math op with insert via blend
+    def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
+          (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+          FR64:$src))), (i8 1))),
+      (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst,
+          (COPY_TO_REGCLASS FR64:$src, VR128))>;
+
+    // vector math op with insert via blend
+    def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+          (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+      (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;
+  }
+
+  // Repeat everything for AVX.
+  let Predicates = [HasAVX] in {
+    // extracted scalar math op with insert via movsd
+    def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
+          (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+          FR64:$src))))),
+      (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst,
+          (COPY_TO_REGCLASS FR64:$src, VR128))>;
+
+    // extracted scalar math op with insert via blend
+    def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
+          (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+          FR64:$src))), (i8 1))),
+      (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst,
+          (COPY_TO_REGCLASS FR64:$src, VR128))>;
+
+    // vector math op with insert via movsd
+    def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
+          (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
+      (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;
+
+    // vector math op with insert via blend
+    def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+          (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+      (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;
+  }
 }
 
+defm : scalar_math_f64_patterns<fadd, "ADD">;
+defm : scalar_math_f64_patterns<fsub, "SUB">;
+defm : scalar_math_f64_patterns<fmul, "MUL">;
+defm : scalar_math_f64_patterns<fdiv, "DIV">;
+
+
 /// Unop Arithmetic
 /// In addition, we also have a special variant of the scalar form here to
 /// represent the associated intrinsic operation.  This form is unlike the
@@ -3535,56 +3326,104 @@ def SSE_RCPS : OpndItins<
 >;
 }
 
-/// sse1_fp_unop_s - SSE1 unops in scalar form
+/// sse_fp_unop_s - SSE1 unops in scalar form
 /// For the non-AVX defs, we need $src1 to be tied to $dst because
 /// the HW instructions are 2 operand / destructive.
-multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                           OpndItins itins> {
-let Predicates = [HasAVX], hasSideEffects = 0 in {
-  def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
-                       (ins FR32:$src1, FR32:$src2),
-                       !strconcat("v", OpcodeStr,
-                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
-  let mayLoad = 1 in {
-  def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
-                      (ins FR32:$src1,f32mem:$src2),
-                      !strconcat("v", OpcodeStr,
-                                 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      []>, VEX_4V, VEX_LIG,
-                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
-  let isCodeGenOnly = 1 in
-  def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
-                      (ins VR128:$src1, ssmem:$src2),
-                      !strconcat("v", OpcodeStr,
-                                 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      []>, VEX_4V, VEX_LIG,
-                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
+multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                          ValueType vt, ValueType ScalarVT,
+                          X86MemOperand x86memop, Operand vec_memop,
+                          ComplexPattern mem_cpat, Intrinsic Intr,
+                          SDNode OpNode, Domain d, OpndItins itins,
+                          Predicate target, string Suffix> {
+  let hasSideEffects = 0 in {
+  def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
+              !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
+            [(set RC:$dst, (OpNode RC:$src1))], itins.rr, d>, Sched<[itins.Sched]>,
+            Requires<[target]>;
+  let mayLoad = 1 in
+  def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1),
+            !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
+            [(set RC:$dst, (OpNode (load addr:$src1)))], itins.rm, d>,
+            Sched<[itins.Sched.Folded, ReadAfterLd]>,
+            Requires<[target, OptForSize]>;
+
+  let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
+  def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+              !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+            []>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
+  let mayLoad = 1 in
+  def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, vec_memop:$src2),
+              !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+            []>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
+  }
+  }
+
+  let Predicates = [target] in {
+  def : Pat<(vt (OpNode mem_cpat:$src)),
+            (vt (COPY_TO_REGCLASS (vt (!cast<Instruction>(NAME#Suffix##m_Int)
+                 (vt (IMPLICIT_DEF)), mem_cpat:$src)), RC))>;
+  // These are unary operations, but they are modeled as having 2 source operands
+  // because the high elements of the destination are unchanged in SSE.
+  def : Pat<(Intr VR128:$src),
+            (!cast<Instruction>(NAME#Suffix##r_Int) VR128:$src, VR128:$src)>;
+  def : Pat<(Intr (load addr:$src)),
+            (vt (COPY_TO_REGCLASS(!cast<Instruction>(NAME#Suffix##m)
+                                      addr:$src), VR128))>;
+  def : Pat<(Intr mem_cpat:$src),
+             (!cast<Instruction>(NAME#Suffix##m_Int)
+                    (vt (IMPLICIT_DEF)), mem_cpat:$src)>;
   }
 }
 
-  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
-                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
-  // For scalar unary operations, fold a load into the operation
-  // only in OptForSize mode. It eliminates an instruction, but it also
-  // eliminates a whole-register clobber (the load), so it introduces a
-  // partial register update condition.
-  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
-                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
-            Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
-  let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
-    def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
-                      (ins VR128:$src1, VR128:$src2),
-                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-                      [], itins.rr>, Sched<[itins.Sched]>;
-    let mayLoad = 1, hasSideEffects = 0 in
-    def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
-                      (ins VR128:$src1, ssmem:$src2),
-                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-                      [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
+multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                          ValueType vt, ValueType ScalarVT,
+                          X86MemOperand x86memop, Operand vec_memop,
+                          ComplexPattern mem_cpat,
+                          Intrinsic Intr, SDNode OpNode, Domain d,
+                          OpndItins itins, string Suffix> {
+  let hasSideEffects = 0 in {
+  def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+            [], itins.rr, d>, Sched<[itins.Sched]>;
+  let mayLoad = 1 in
+  def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+            [], itins.rm, d>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
+  let isCodeGenOnly = 1 in {
+  def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
+                (ins VR128:$src1, VR128:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             []>, Sched<[itins.Sched.Folded]>;
+  let mayLoad = 1 in
+  def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
+                (ins VR128:$src1, vec_memop:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             []>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
+  }
+
+  let Predicates = [UseAVX] in {
+   def : Pat<(OpNode RC:$src),  (!cast<Instruction>("V"#NAME#Suffix##r)
+                                (ScalarVT (IMPLICIT_DEF)), RC:$src)>;
+
+   def : Pat<(vt (OpNode mem_cpat:$src)),
+             (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)),
+                                  mem_cpat:$src)>;
+
+  }
+  let Predicates = [HasAVX] in {
+   def : Pat<(Intr VR128:$src),
+             (!cast<Instruction>("V"#NAME#Suffix##r_Int) (vt (IMPLICIT_DEF)),
+                                 VR128:$src)>;
+
+   def : Pat<(Intr mem_cpat:$src),
+             (!cast<Instruction>("V"#NAME#Suffix##m_Int)
+                    (vt (IMPLICIT_DEF)), mem_cpat:$src)>;
+  }
+  let Predicates = [UseAVX, OptForSize] in
+  def : Pat<(ScalarVT (OpNode (load addr:$src))),
+            (!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
+             addr:$src)>;
 }
 
 /// sse1_fp_unop_p - SSE1 unops in packed form.
@@ -3623,93 +3462,6 @@ let Predicates = [HasAVX] in {
             Sched<[itins.Sched.Folded]>;
 }
 
-/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
-multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
-                              Intrinsic V4F32Int, Intrinsic V8F32Int,
-                              OpndItins itins> {
-let isCodeGenOnly = 1 in {
-let Predicates = [HasAVX] in {
-  def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                           !strconcat("v", OpcodeStr,
-                                      "ps\t{$src, $dst|$dst, $src}"),
-                           [(set VR128:$dst, (V4F32Int VR128:$src))],
-                           itins.rr>, VEX, Sched<[itins.Sched]>;
-  def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                          !strconcat("v", OpcodeStr,
-                          "ps\t{$src, $dst|$dst, $src}"),
-                          [(set VR128:$dst, (V4F32Int (loadv4f32 addr:$src)))],
-                          itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
-  def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
-                            !strconcat("v", OpcodeStr,
-                                       "ps\t{$src, $dst|$dst, $src}"),
-                            [(set VR256:$dst, (V8F32Int VR256:$src))],
-                            itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
-  def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst),
-                          (ins f256mem:$src),
-                          !strconcat("v", OpcodeStr,
-                                    "ps\t{$src, $dst|$dst, $src}"),
-                          [(set VR256:$dst, (V8F32Int (loadv8f32 addr:$src)))],
-                          itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
-}
-
-  def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (V4F32Int VR128:$src))],
-                    itins.rr>, Sched<[itins.Sched]>;
-  def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
-                    itins.rm>, Sched<[itins.Sched.Folded]>;
-} // isCodeGenOnly = 1
-}
-
-/// sse2_fp_unop_s - SSE2 unops in scalar form.
-multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
-                          SDNode OpNode, Intrinsic F64Int, OpndItins itins> {
-let Predicates = [HasAVX], hasSideEffects = 0 in {
-  def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst),
-                      (ins FR64:$src1, FR64:$src2),
-                      !strconcat("v", OpcodeStr,
-                                 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
-  let mayLoad = 1 in {
-  def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
-                      (ins FR64:$src1,f64mem:$src2),
-                      !strconcat("v", OpcodeStr,
-                                 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      []>, VEX_4V, VEX_LIG,
-                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
-  let isCodeGenOnly = 1 in
-  def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
-                      (ins VR128:$src1, sdmem:$src2),
-                      !strconcat("v", OpcodeStr,
-                                 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      []>, VEX_4V, VEX_LIG,
-                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
-  }
-}
-
-  def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
-                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>,
-            Sched<[itins.Sched]>;
-  // See the comments in sse1_fp_unop_s for why this is OptForSize.
-  def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
-                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
-            Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
-let isCodeGenOnly = 1 in {
-  def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>,
-                Sched<[itins.Sched]>;
-  def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
-                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>,
-                Sched<[itins.Sched.Folded]>;
-}
-}
-
 /// sse2_fp_unop_p - SSE2 unops in vector forms.
 multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
                           SDNode OpNode, OpndItins itins> {
@@ -3746,92 +3498,83 @@ let Predicates = [HasAVX] in {
             Sched<[itins.Sched.Folded]>;
 }
 
+multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                          OpndItins itins> {
+  defm SS        :  sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,
+                      ssmem, sse_load_f32,
+                      !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
+                      SSEPackedSingle, itins, UseSSE1, "SS">, XS;
+  defm V#NAME#SS  : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
+                      f32mem, ssmem, sse_load_f32,
+                      !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
+                      SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG;
+}
+
+multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                          OpndItins itins> {
+  defm SD         : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,
+                         sdmem, sse_load_f64,
+                         !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
+                         OpNode, SSEPackedDouble, itins, UseSSE2, "SD">, XD;
+  defm V#NAME#SD  : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
+                         f64mem, sdmem, sse_load_f64,
+                         !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
+                         OpNode, SSEPackedDouble, itins, "SD">,
+                         XD, VEX_4V, VEX_LIG;
+}
+
 // Square root.
 defm SQRT  : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
              sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
-             sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
-                            SSE_SQRTSD>,
+             sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>,
              sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;
 
 // Reciprocal approximations. Note that these typically require refinement
 // in order to obtain suitable precision.
 defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>,
-             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>,
-             sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
-                                int_x86_avx_rsqrt_ps_256, SSE_RSQRTPS>;
+             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>;
 defm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>,
-             sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>,
-             sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
-                                int_x86_avx_rcp_ps_256, SSE_RCPP>;
+             sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>;
 
-let Predicates = [UseAVX] in {
-  def : Pat<(f32 (fsqrt FR32:$src)),
-            (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
-  def : Pat<(f32 (fsqrt (load addr:$src))),
-            (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
-            Requires<[HasAVX, OptForSize]>;
-  def : Pat<(f64 (fsqrt FR64:$src)),
-            (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
-  def : Pat<(f64 (fsqrt (load addr:$src))),
-            (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
-            Requires<[HasAVX, OptForSize]>;
-
-  def : Pat<(f32 (X86frsqrt FR32:$src)),
-            (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
-  def : Pat<(f32 (X86frsqrt (load addr:$src))),
-            (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
-            Requires<[HasAVX, OptForSize]>;
-
-  def : Pat<(f32 (X86frcp FR32:$src)),
-            (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
-  def : Pat<(f32 (X86frcp (load addr:$src))),
-            (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
-            Requires<[HasAVX, OptForSize]>;
-}
-let Predicates = [UseAVX] in {
-  def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
-            (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
-                                        (COPY_TO_REGCLASS VR128:$src, FR32)),
-                              VR128)>;
-  def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
-            (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+// There is no f64 version of the reciprocal approximation instructions.
 
-  def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
-            (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)),
-                                        (COPY_TO_REGCLASS VR128:$src, FR64)),
-                              VR128)>;
-  def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
-            (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
-}
+// TODO: We should add *scalar* op patterns for these just like we have for
+// the binops above. If the binop and unop patterns could all be unified
+// that would be even better.
 
-let Predicates = [HasAVX] in {
-  def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
-            (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)),
-                                         (COPY_TO_REGCLASS VR128:$src, FR32)),
-                              VR128)>;
-  def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
-            (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
-
-  def : Pat<(int_x86_sse_rcp_ss VR128:$src),
-            (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)),
-                                       (COPY_TO_REGCLASS VR128:$src, FR32)),
-                              VR128)>;
-  def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
-            (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
-}
-
-// These are unary operations, but they are modeled as having 2 source operands
-// because the high elements of the destination are unchanged in SSE.
-let Predicates = [UseSSE1] in {
-  def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
-            (RSQRTSSr_Int VR128:$src, VR128:$src)>;
-  def : Pat<(int_x86_sse_rcp_ss VR128:$src),
-            (RCPSSr_Int VR128:$src, VR128:$src)>;
-  def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
-            (SQRTSSr_Int VR128:$src, VR128:$src)>;
+multiclass scalar_unary_math_patterns<Intrinsic Intr, string OpcPrefix,
+                                      SDNode Move, ValueType VT,
+                                      Predicate BasePredicate> {
+  let Predicates = [BasePredicate] in {
+    def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
+              (!cast<I>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
+  }
+
+  // With SSE 4.1, blendi is preferred to movs*, so match that too.
+  let Predicates = [UseSSE41] in {
+    def : Pat<(VT (X86Blendi VT:$dst, (Intr VT:$src), (i8 1))),
+              (!cast<I>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
+  }
+
+  // Repeat for AVX versions of the instructions.
+  let Predicates = [HasAVX] in {
+    def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
+              (!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
+    
+    def : Pat<(VT (X86Blendi VT:$dst, (Intr VT:$src), (i8 1))),
+              (!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
+  }
 }
 
-// There is no f64 version of the reciprocal approximation instructions.
+defm : scalar_unary_math_patterns<int_x86_sse_rcp_ss, "RCPSS", X86Movss,
+                                  v4f32, UseSSE1>;
+defm : scalar_unary_math_patterns<int_x86_sse_rsqrt_ss, "RSQRTSS", X86Movss,
+                                  v4f32, UseSSE1>;
+defm : scalar_unary_math_patterns<int_x86_sse_sqrt_ss, "SQRTSS", X86Movss,
+                                  v4f32, UseSSE1>;
+defm : scalar_unary_math_patterns<int_x86_sse2_sqrt_sd, "SQRTSD", X86Movsd,
+                                  v2f64, UseSSE2>;
+
 
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Non-temporal stores
@@ -3910,13 +3653,30 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                   PS, Requires<[HasSSE2]>;
 } // SchedRW = [WriteStore]
 
+let Predicates = [HasAVX2, NoVLX] in {
+  def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst),
+            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+  def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst),
+            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+  def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst),
+            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+}
+
 let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
-            (VMOVNTPSmr addr:$dst, VR128:$src)>;
+            (VMOVNTDQmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
+            (VMOVNTDQmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
+            (VMOVNTDQmr addr:$dst, VR128:$src)>;
 }
 
 def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
-          (MOVNTPSmr addr:$dst, VR128:$src)>;
+          (MOVNTDQmr addr:$dst, VR128:$src)>;
+def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
+          (MOVNTDQmr addr:$dst, VR128:$src)>;
+def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
+          (MOVNTDQmr addr:$dst, VR128:$src)>;
 
 } // AddedComplexity
 
@@ -3945,7 +3705,7 @@ let SchedRW = [WriteLoad] in {
 // Flush cache
 def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
                "clflush\t$src", [(int_x86_sse2_clflush addr:$src)],
-               IIC_SSE_PREFETCH>, TB, Requires<[HasSSE2]>;
+               IIC_SSE_PREFETCH>, PS, Requires<[HasSSE2]>;
 }
 
 let SchedRW = [WriteNop] in {
@@ -3960,7 +3720,7 @@ let SchedRW = [WriteFence] in {
 // Load, store, and memory fence
 def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
                "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>,
-               TB, Requires<[HasSSE1]>;
+               PS, Requires<[HasSSE1]>;
 def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
                "lfence", [(int_x86_sse2_lfence)], IIC_SSE_LFENCE>,
                TB, Requires<[HasSSE2]>;
@@ -4184,7 +3944,7 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
                          string OpcodeStr, SDNode OpNode,
                          SDNode OpNode2, RegisterClass RC,
                          ValueType DstVT, ValueType SrcVT, PatFrag bc_frag,
-                         ShiftOpndItins itins,
+                         PatFrag ld_frag, ShiftOpndItins itins,
                          bit Is2Addr = 1> {
   // src2 is always 128-bit
   def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
@@ -4200,10 +3960,10 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (DstVT (OpNode RC:$src1,
-                       (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>,
+                       (bc_frag (ld_frag addr:$src2)))))], itins.rm>,
       Sched<[WriteVecShiftLd, ReadAfterLd]>;
   def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
-       (ins RC:$src1, i8imm:$src2),
+       (ins RC:$src1, u8imm:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
@@ -4309,93 +4069,93 @@ defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
 // SSE2 - Packed Integer Logical Instructions
 //===---------------------------------------------------------------------===//
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
 defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
-                            VR128, v8i16, v8i16, bc_v8i16,
+                            VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
-                            VR128, v4i32, v4i32, bc_v4i32,
+                            VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
-                            VR128, v2i64, v2i64, bc_v2i64,
+                            VR128, v2i64, v2i64, bc_v2i64, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 
 defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
-                            VR128, v8i16, v8i16, bc_v8i16,
+                            VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
-                            VR128, v4i32, v4i32, bc_v4i32,
+                            VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
-                            VR128, v2i64, v2i64, bc_v2i64,
+                            VR128, v2i64, v2i64, bc_v2i64, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 
 defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
-                            VR128, v8i16, v8i16, bc_v8i16,
+                            VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
-                            VR128, v4i32, v4i32, bc_v4i32,
+                            VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 
 let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
   // 128-bit logical shifts.
   def VPSLLDQri : PDIi8<0x73, MRM7r,
-                    (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                    (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
                     "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst,
-                      (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>,
+                      (v2i64 (X86vshldq VR128:$src1, (i8 imm:$src2))))]>,
                     VEX_4V;
   def VPSRLDQri : PDIi8<0x73, MRM3r,
-                    (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                    (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
                     "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst,
-                      (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>,
+                      (v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>,
                     VEX_4V;
   // PSRADQri doesn't exist in SSE[1-3].
 }
 } // Predicates = [HasAVX]
 
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
 defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
-                             VR256, v16i16, v8i16, bc_v8i16,
+                             VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
-                             VR256, v8i32, v4i32, bc_v4i32,
+                             VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
-                             VR256, v4i64, v2i64, bc_v2i64,
+                             VR256, v4i64, v2i64, bc_v2i64, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 
 defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
-                             VR256, v16i16, v8i16, bc_v8i16,
+                             VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
-                             VR256, v8i32, v4i32, bc_v4i32,
+                             VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
-                             VR256, v4i64, v2i64, bc_v2i64,
+                             VR256, v4i64, v2i64, bc_v2i64, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 
 defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
-                             VR256, v16i16, v8i16, bc_v8i16,
+                             VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
-                             VR256, v8i32, v4i32, bc_v4i32,
+                             VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
   // 256-bit logical shifts.
   def VPSLLDQYri : PDIi8<0x73, MRM7r,
-                    (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
+                    (outs VR256:$dst), (ins VR256:$src1, u8imm:$src2),
                     "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR256:$dst,
-                      (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>,
+                      (v4i64 (X86vshldq VR256:$src1, (i8 imm:$src2))))]>,
                     VEX_4V, VEX_L;
   def VPSRLDQYri : PDIi8<0x73, MRM3r,
-                    (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
+                    (outs VR256:$dst), (ins VR256:$src1, u8imm:$src2),
                     "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR256:$dst,
-                      (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>,
+                      (v4i64 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>,
                     VEX_4V, VEX_L;
   // PSRADQYri doesn't exist in SSE[1-3].
 }
@@ -4403,85 +4163,58 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
 
 let Constraints = "$src1 = $dst" in {
 defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
-                           VR128, v8i16, v8i16, bc_v8i16,
+                           VR128, v8i16, v8i16, bc_v8i16, memopv2i64,
                            SSE_INTSHIFT_ITINS_P>;
 defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
-                           VR128, v4i32, v4i32, bc_v4i32,
+                           VR128, v4i32, v4i32, bc_v4i32, memopv2i64,
                            SSE_INTSHIFT_ITINS_P>;
 defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
-                           VR128, v2i64, v2i64, bc_v2i64,
+                           VR128, v2i64, v2i64, bc_v2i64, memopv2i64,
                            SSE_INTSHIFT_ITINS_P>;
 
 defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
-                           VR128, v8i16, v8i16, bc_v8i16,
+                           VR128, v8i16, v8i16, bc_v8i16, memopv2i64,
                            SSE_INTSHIFT_ITINS_P>;
 defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
-                           VR128, v4i32, v4i32, bc_v4i32,
+                           VR128, v4i32, v4i32, bc_v4i32, memopv2i64,
                            SSE_INTSHIFT_ITINS_P>;
 defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
-                           VR128, v2i64, v2i64, bc_v2i64,
+                           VR128, v2i64, v2i64, bc_v2i64, memopv2i64,
                            SSE_INTSHIFT_ITINS_P>;
 
 defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
-                           VR128, v8i16, v8i16, bc_v8i16,
+                           VR128, v8i16, v8i16, bc_v8i16, memopv2i64,
                            SSE_INTSHIFT_ITINS_P>;
 defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
-                           VR128, v4i32, v4i32, bc_v4i32,
+                           VR128, v4i32, v4i32, bc_v4i32, memopv2i64,
                            SSE_INTSHIFT_ITINS_P>;
 
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
   // 128-bit logical shifts.
   def PSLLDQri : PDIi8<0x73, MRM7r,
-                       (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                       (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
                        "pslldq\t{$src2, $dst|$dst, $src2}",
                        [(set VR128:$dst,
-                         (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))],
-                         IIC_SSE_INTSHDQ_P_RI>;
+                         (v2i64 (X86vshldq VR128:$src1, (i8 imm:$src2))))],
+                       IIC_SSE_INTSHDQ_P_RI>;
   def PSRLDQri : PDIi8<0x73, MRM3r,
-                       (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                       (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
                        "psrldq\t{$src2, $dst|$dst, $src2}",
                        [(set VR128:$dst,
-                         (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))],
-                         IIC_SSE_INTSHDQ_P_RI>;
+                         (v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))],
+                       IIC_SSE_INTSHDQ_P_RI>;
   // PSRADQri doesn't exist in SSE[1-3].
 }
 } // Constraints = "$src1 = $dst"
 
 let Predicates = [HasAVX] in {
-  def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
-            (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
-  def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
-            (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
   def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
             (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
-
-  // Shift up / down and insert zero's.
-  def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
-            (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
-  def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
-            (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
-}
-
-let Predicates = [HasAVX2] in {
-  def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
-            (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
-  def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
-            (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
 }
 
 let Predicates = [UseSSE2] in {
-  def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
-            (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
-  def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
-            (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
   def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
             (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
-
-  // Shift up / down and insert zero's.
-  def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
-            (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
-  def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
-            (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -4510,14 +4243,14 @@ multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256,
                          SDNode OpNode> {
 let Predicates = [HasAVX] in {
   def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst),
-                      (ins VR128:$src1, i8imm:$src2),
+                      (ins VR128:$src1, u8imm:$src2),
                       !strconcat("v", OpcodeStr,
                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                       [(set VR128:$dst,
                         (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
                       IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>;
   def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
-                      (ins i128mem:$src1, i8imm:$src2),
+                      (ins i128mem:$src1, u8imm:$src2),
                       !strconcat("v", OpcodeStr,
                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set VR128:$dst,
@@ -4528,14 +4261,14 @@ let Predicates = [HasAVX] in {
 
 let Predicates = [HasAVX2] in {
   def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst),
-                       (ins VR256:$src1, i8imm:$src2),
+                       (ins VR256:$src1, u8imm:$src2),
                        !strconcat("v", OpcodeStr,
                                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                        [(set VR256:$dst,
                          (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
                        IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>;
   def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
-                       (ins i256mem:$src1, i8imm:$src2),
+                       (ins i256mem:$src1, u8imm:$src2),
                        !strconcat("v", OpcodeStr,
                                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                       [(set VR256:$dst,
@@ -4546,14 +4279,14 @@ let Predicates = [HasAVX2] in {
 
 let Predicates = [UseSSE2] in {
   def ri : Ii8<0x70, MRMSrcReg,
-               (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+               (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
                !strconcat(OpcodeStr,
                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                 [(set VR128:$dst,
                   (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
                 IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>;
   def mi : Ii8<0x70, MRMSrcMem,
-               (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+               (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2),
                !strconcat(OpcodeStr,
                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                 [(set VR128:$dst,
@@ -4589,7 +4322,7 @@ let Predicates = [UseSSE2] in {
 let ExeDomain = SSEPackedInt in {
 multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
                      ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
-                     bit Is2Addr = 1> {
+                     PatFrag ld_frag, bit Is2Addr = 1> {
   def rr : PDI<opc, MRMSrcReg,
                (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                !if(Is2Addr,
@@ -4607,7 +4340,7 @@ multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
                [(set VR128:$dst,
                      (OutVT (OpNode VR128:$src1,
-                                    (bc_frag (memopv2i64 addr:$src2)))))]>,
+                                    (bc_frag (ld_frag addr:$src2)))))]>,
                Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
@@ -4626,13 +4359,13 @@ multiclass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                 [(set VR256:$dst,
                       (OutVT (OpNode VR256:$src1,
-                                     (bc_frag (memopv4i64 addr:$src2)))))]>,
+                                     (bc_frag (loadv4i64 addr:$src2)))))]>,
                 Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
 multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
                      ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
-                     bit Is2Addr = 1> {
+                     PatFrag ld_frag, bit Is2Addr = 1> {
   def rr : SS48I<opc, MRMSrcReg,
                  (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                  !if(Is2Addr,
@@ -4650,7 +4383,7 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
                  [(set VR128:$dst,
                        (OutVT (OpNode VR128:$src1,
-                                      (bc_frag (memopv2i64 addr:$src2)))))]>,
+                                      (bc_frag (ld_frag addr:$src2)))))]>,
                  Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
@@ -4669,20 +4402,20 @@ multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                   [(set VR256:$dst,
                         (OutVT (OpNode VR256:$src1,
-                                       (bc_frag (memopv4i64 addr:$src2)))))]>,
+                                       (bc_frag (loadv4i64 addr:$src2)))))]>,
                   Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
 let Predicates = [HasAVX] in {
   defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss,
-                             bc_v8i16, 0>, VEX_4V;
+                             bc_v8i16, loadv2i64, 0>, VEX_4V;
   defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss,
-                             bc_v4i32, 0>, VEX_4V;
+                             bc_v4i32, loadv2i64, 0>, VEX_4V;
 
   defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus,
-                             bc_v8i16, 0>, VEX_4V;
+                             bc_v8i16, loadv2i64, 0>, VEX_4V;
   defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus,
-                             bc_v4i32, 0>, VEX_4V;
+                             bc_v4i32, loadv2i64, 0>, VEX_4V;
 }
 
 let Predicates = [HasAVX2] in {
@@ -4699,16 +4432,16 @@ let Predicates = [HasAVX2] in {
 
 let Constraints = "$src1 = $dst" in {
   defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss,
-                            bc_v8i16>;
+                            bc_v8i16, memopv2i64>;
   defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss,
-                            bc_v4i32>;
+                            bc_v4i32, memopv2i64>;
 
   defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus,
-                            bc_v8i16>;
+                            bc_v8i16, memopv2i64>;
 
   let Predicates = [HasSSE41] in
   defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus,
-                            bc_v4i32>;
+                            bc_v4i32, memopv2i64>;
 }
 } // ExeDomain = SSEPackedInt
 
@@ -4718,7 +4451,8 @@ let Constraints = "$src1 = $dst" in {
 
 let ExeDomain = SSEPackedInt in {
 multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
-                       SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> {
+                       SDNode OpNode, PatFrag bc_frag, PatFrag ld_frag,
+                       bit Is2Addr = 1> {
   def rr : PDI<opc, MRMSrcReg,
       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
       !if(Is2Addr,
@@ -4732,8 +4466,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
           !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
           !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set VR128:$dst, (OpNode VR128:$src1,
-                                  (bc_frag (memopv2i64
-                                               addr:$src2))))],
+                                  (bc_frag (ld_frag addr:$src2))))],
                                                IIC_SSE_UNPCK>,
       Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
@@ -4749,28 +4482,28 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
       (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
       !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
       [(set VR256:$dst, (OpNode VR256:$src1,
-                                  (bc_frag (memopv4i64 addr:$src2))))]>,
+                                  (bc_frag (loadv4i64 addr:$src2))))]>,
       Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
 let Predicates = [HasAVX] in {
   defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
-                                 bc_v16i8, 0>, VEX_4V;
+                                 bc_v16i8, loadv2i64, 0>, VEX_4V;
   defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
-                                 bc_v8i16, 0>, VEX_4V;
+                                 bc_v8i16, loadv2i64, 0>, VEX_4V;
   defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
-                                 bc_v4i32, 0>, VEX_4V;
+                                 bc_v4i32, loadv2i64, 0>, VEX_4V;
   defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
-                                 bc_v2i64, 0>, VEX_4V;
+                                 bc_v2i64, loadv2i64, 0>, VEX_4V;
 
   defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
-                                 bc_v16i8, 0>, VEX_4V;
+                                 bc_v16i8, loadv2i64, 0>, VEX_4V;
   defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
-                                 bc_v8i16, 0>, VEX_4V;
+                                 bc_v8i16, loadv2i64, 0>, VEX_4V;
   defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
-                                 bc_v4i32, 0>, VEX_4V;
+                                 bc_v4i32, loadv2i64, 0>, VEX_4V;
   defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
-                                 bc_v2i64, 0>, VEX_4V;
+                                 bc_v2i64, loadv2i64, 0>, VEX_4V;
 }
 
 let Predicates = [HasAVX2] in {
@@ -4795,22 +4528,22 @@ let Predicates = [HasAVX2] in {
 
 let Constraints = "$src1 = $dst" in {
   defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
-                                bc_v16i8>;
+                                bc_v16i8, memopv2i64>;
   defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
-                                bc_v8i16>;
+                                bc_v8i16, memopv2i64>;
   defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
-                                bc_v4i32>;
+                                bc_v4i32, memopv2i64>;
   defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
-                                bc_v2i64>;
+                                bc_v2i64, memopv2i64>;
 
   defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
-                                bc_v16i8>;
+                                bc_v16i8, memopv2i64>;
   defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
-                                bc_v8i16>;
+                                bc_v8i16, memopv2i64>;
   defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
-                                bc_v4i32>;
+                                bc_v4i32, memopv2i64>;
   defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
-                                bc_v2i64>;
+                                bc_v2i64, memopv2i64>;
 }
 } // ExeDomain = SSEPackedInt
 
@@ -4822,7 +4555,7 @@ let ExeDomain = SSEPackedInt in {
 multiclass sse2_pinsrw<bit Is2Addr = 1> {
   def rri : Ii8<0xC4, MRMSrcReg,
        (outs VR128:$dst), (ins VR128:$src1,
-        GR32orGR64:$src2, i32i8imm:$src3),
+        GR32orGR64:$src2, u8imm:$src3),
        !if(Is2Addr,
            "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
            "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@@ -4831,7 +4564,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
        IIC_SSE_PINSRW>, Sched<[WriteShuffle]>;
   def rmi : Ii8<0xC4, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1,
-                        i16mem:$src2, i32i8imm:$src3),
+                        i16mem:$src2, u8imm:$src3),
        !if(Is2Addr,
            "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
            "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@@ -4844,13 +4577,13 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
 // Extract
 let Predicates = [HasAVX] in
 def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
-                    (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                    (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
                     "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
                                             imm:$src2))]>, PD, VEX,
                 Sched<[WriteShuffle]>;
 def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
-                    (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                    (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
                     "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
                                             imm:$src2))], IIC_SSE_PEXTRW>,
@@ -4947,6 +4680,10 @@ def VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                         [(set VR128:$dst,
                           (v2i64 (scalar_to_vector GR64:$src)))],
                           IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+def VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                        [], IIC_SSE_MOVDQ>, VEX, Sched<[WriteLoad]>;
 let isCodeGenOnly = 1 in
 def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
                        "movq\t{$src, $dst|$dst, $src}",
@@ -4968,6 +4705,10 @@ def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                         [(set VR128:$dst,
                           (v2i64 (scalar_to_vector GR64:$src)))],
                           IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
+                        [], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
 let isCodeGenOnly = 1 in
 def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
@@ -5054,6 +4795,15 @@ def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                                                          IIC_SSE_MOVD_ToGP>;
 } //SchedRW
 
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+def VMOVPQIto64rm : VRS2I<0x7E, MRMDestMem, (outs i64mem:$dst),
+                          (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}",
+                          [], IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs i64mem:$dst), (ins VR128:$src),
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
+                        [], IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+
 //===---------------------------------------------------------------------===//
 // Bitcast FR64 <-> GR64
 //
@@ -5132,7 +4882,8 @@ let Predicates = [UseAVX] in {
     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
               (VMOVDI2PDIrr GR32:$src)>;
 
-  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
+  // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
+  // These instructions also write zeros in the high part of a 256-bit register.
   let AddedComplexity = 20 in {
     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
               (VMOVDI2PDIrm addr:$src)>;
@@ -5140,6 +4891,9 @@ let Predicates = [UseAVX] in {
               (VMOVDI2PDIrm addr:$src)>;
     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
               (VMOVDI2PDIrm addr:$src)>;
+    def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+                (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
+              (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>;
   }
   // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
@@ -5186,7 +4940,7 @@ def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
 // Move Quadword Int to Packed Quadword Int
 //
 
-let SchedRW = [WriteLoad] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteLoad] in {
 def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                     "vmovq\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst,
@@ -5198,12 +4952,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                       (v2i64 (scalar_to_vector (loadi64 addr:$src))))],
                       IIC_SSE_MOVDQ>, XS,
                     Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
-} // SchedRW
+} // ExeDomain, SchedRW
 
 //===---------------------------------------------------------------------===//
 // Move Packed Quadword Int to Quadword Int
 //
-let SchedRW = [WriteStore] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
 def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                       "movq\t{$src, $dst|$dst, $src}",
                       [(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -5214,7 +4968,7 @@ def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                       [(store (i64 (vector_extract (v2i64 VR128:$src),
                                     (iPTR 0))), addr:$dst)],
                                     IIC_SSE_MOVDQ>;
-} // SchedRW
+} // ExeDomain, SchedRW
 
 // For disassembler only
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
@@ -5228,14 +4982,14 @@ def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
 //===---------------------------------------------------------------------===//
 // Store / copy lower 64-bits of a XMM register.
 //
-let Predicates = [UseAVX] in
+let Predicates = [HasAVX] in
 def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src),
           (VMOVPQI2QImr addr:$dst, VR128:$src)>;
 let Predicates = [UseSSE2] in
 def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src),
           (MOVPQI2QImr addr:$dst, VR128:$src)>;
 
-let isCodeGenOnly = 1, AddedComplexity = 20 in {
+let ExeDomain = SSEPackedInt, isCodeGenOnly = 1, AddedComplexity = 20 in {
 def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                      "vmovq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
@@ -5251,13 +5005,16 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                                                  (loadi64 addr:$src))))))],
                                                  IIC_SSE_MOVDQ>,
                      XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
-}
+} // ExeDomain, isCodeGenOnly, AddedComplexity
 
 let Predicates = [UseAVX], AddedComplexity = 20 in {
   def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
             (VMOVZQI2PQIrm addr:$src)>;
   def : Pat<(v2i64 (X86vzload addr:$src)),
             (VMOVZQI2PQIrm addr:$src)>;
+  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+              (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
+            (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrm addr:$src), sub_xmm)>;
 }
 
 let Predicates = [UseSSE2], AddedComplexity = 20 in {
@@ -5277,7 +5034,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)),
 // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
 // IA32 document. movq xmm1, xmm2 does clear the high bits.
 //
-let SchedRW = [WriteVecLogic] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
 let AddedComplexity = 15 in
 def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
@@ -5290,9 +5047,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
                     IIC_SSE_MOVQ_RR>,
                       XS, Requires<[UseSSE2]>;
-} // SchedRW
+} // ExeDomain, SchedRW
 
-let isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in {
+let ExeDomain = SSEPackedInt, isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in {
 let AddedComplexity = 20 in
 def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
@@ -5308,7 +5065,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                                              IIC_SSE_MOVDQ>,
                       XS, Requires<[UseSSE2]>;
 }
-} // isCodeGenOnly, SchedRW
+} // ExeDomain, isCodeGenOnly, SchedRW
 
 let AddedComplexity = 20 in {
   let Predicates = [UseAVX] in {
@@ -5387,10 +5144,10 @@ let Predicates = [UseSSE3] in {
 //===---------------------------------------------------------------------===//
 
 multiclass sse3_replicate_dfp<string OpcodeStr> {
-let hasSideEffects = 0 in
 def rr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [], IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
+                    [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))],
+                    IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
 def rm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst,
@@ -5444,9 +5201,9 @@ let Predicates = [HasAVX] in {
 
 let Predicates = [UseAVX, OptForSize] in {
   def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
-  (VMOVDDUPrm addr:$src)>;
+            (VMOVDDUPrm addr:$src)>;
   def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
-  (VMOVDDUPrm addr:$src)>;
+            (VMOVDDUPrm addr:$src)>;
 }
 
 let Predicates = [UseSSE3] in {
@@ -5487,7 +5244,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
 
 multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
                        X86MemOperand x86memop, OpndItins itins,
-                       bit Is2Addr = 1> {
+                       PatFrag ld_frag, bit Is2Addr = 1> {
   def rr : I<0xD0, MRMSrcReg,
        (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
@@ -5500,62 +5257,62 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>,
+       [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))], itins.rr>,
        Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 let Predicates = [HasAVX] in {
   let ExeDomain = SSEPackedSingle in {
     defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
-                                 f128mem, SSE_ALU_F32P, 0>, XD, VEX_4V;
+                               f128mem, SSE_ALU_F32P, loadv4f32, 0>, XD, VEX_4V;
     defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
-                               f256mem, SSE_ALU_F32P, 0>, XD, VEX_4V, VEX_L;
+                        f256mem, SSE_ALU_F32P, loadv8f32, 0>, XD, VEX_4V, VEX_L;
   }
   let ExeDomain = SSEPackedDouble in {
     defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
-                                 f128mem, SSE_ALU_F64P, 0>, PD, VEX_4V;
+                               f128mem, SSE_ALU_F64P, loadv2f64, 0>, PD, VEX_4V;
     defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
-                           f256mem, SSE_ALU_F64P, 0>, PD, VEX_4V, VEX_L;
+                        f256mem, SSE_ALU_F64P, loadv4f64, 0>, PD, VEX_4V, VEX_L;
   }
 }
 let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
   let ExeDomain = SSEPackedSingle in
   defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
-                              f128mem, SSE_ALU_F32P>, XD;
+                              f128mem, SSE_ALU_F32P, memopv4f32>, XD;
   let ExeDomain = SSEPackedDouble in
   defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
-                              f128mem, SSE_ALU_F64P>, PD;
+                              f128mem, SSE_ALU_F64P, memopv2f64>, PD;
 }
 
 // Patterns used to select 'addsub' instructions.
 let Predicates = [HasAVX] in {
   def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))),
             (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
-  def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))),
+  def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (loadv4f32 addr:$rhs))),
             (VADDSUBPSrm VR128:$lhs, f128mem:$rhs)>;
   def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))),
             (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
-  def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))),
+  def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (loadv2f64 addr:$rhs))),
             (VADDSUBPDrm VR128:$lhs, f128mem:$rhs)>;
 
   def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 VR256:$rhs))),
             (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
-  def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 (memop addr:$rhs)))),
+  def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (loadv8f32 addr:$rhs))),
             (VADDSUBPSYrm VR256:$lhs, f256mem:$rhs)>;
   def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 VR256:$rhs))),
             (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
-  def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 (memop addr:$rhs)))),
+  def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (loadv4f64 addr:$rhs))),
             (VADDSUBPDYrm VR256:$lhs, f256mem:$rhs)>;
 }
 
 let Predicates = [UseSSE3] in {
   def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))),
             (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
-  def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))),
+  def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (memopv4f32 addr:$rhs))),
             (ADDSUBPSrm VR128:$lhs, f128mem:$rhs)>;
   def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))),
             (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
-  def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))),
+  def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (memopv2f64 addr:$rhs))),
             (ADDSUBPDrm VR128:$lhs, f128mem:$rhs)>;
 }
 
@@ -5565,7 +5322,8 @@ let Predicates = [UseSSE3] in {
 
 // Horizontal ops
 multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
-                   X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
+                   X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag,
+                   bit Is2Addr = 1> {
   def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -5577,11 +5335,12 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-      [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
+      [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))],
         IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
 }
 multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
-                  X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
+                  X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag,
+                  bit Is2Addr = 1> {
   def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -5593,41 +5352,45 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-      [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
+      [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))],
         IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
 }
 
 let Predicates = [HasAVX] in {
   let ExeDomain = SSEPackedSingle in {
     defm VHADDPS  : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
-                            X86fhadd, 0>, VEX_4V;
+                            X86fhadd, loadv4f32, 0>, VEX_4V;
     defm VHSUBPS  : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
-                            X86fhsub, 0>, VEX_4V;
+                            X86fhsub, loadv4f32, 0>, VEX_4V;
     defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
-                            X86fhadd, 0>, VEX_4V, VEX_L;
+                            X86fhadd, loadv8f32, 0>, VEX_4V, VEX_L;
     defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
-                            X86fhsub, 0>, VEX_4V, VEX_L;
+                            X86fhsub, loadv8f32, 0>, VEX_4V, VEX_L;
   }
   let ExeDomain = SSEPackedDouble in {
     defm VHADDPD  : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
-                            X86fhadd, 0>, VEX_4V;
+                            X86fhadd, loadv2f64, 0>, VEX_4V;
     defm VHSUBPD  : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
-                            X86fhsub, 0>, VEX_4V;
+                            X86fhsub, loadv2f64, 0>, VEX_4V;
     defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
-                            X86fhadd, 0>, VEX_4V, VEX_L;
+                            X86fhadd, loadv4f64, 0>, VEX_4V, VEX_L;
     defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
-                            X86fhsub, 0>, VEX_4V, VEX_L;
+                            X86fhsub, loadv4f64, 0>, VEX_4V, VEX_L;
   }
 }
 
 let Constraints = "$src1 = $dst" in {
   let ExeDomain = SSEPackedSingle in {
-    defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
-    defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
+    defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd,
+                          memopv4f32>;
+    defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub,
+                          memopv4f32>;
   }
   let ExeDomain = SSEPackedDouble in {
-    defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
-    defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
+    defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd,
+                         memopv2f64>;
+    defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub,
+                         memopv2f64>;
   }
 }
 
@@ -5637,8 +5400,8 @@ let Constraints = "$src1 = $dst" in {
 
 
 /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
-multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
-                            Intrinsic IntId128> {
+multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
+                            PatFrag ld_frag> {
   def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
                     (ins VR128:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -5650,7 +5413,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst,
                       (IntId128
-                       (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>,
+                       (bitconvert (ld_frag addr:$src))))], IIC_SSE_PABS_RM>,
                     Sched<[WriteVecALULd]>;
 }
 
@@ -5668,7 +5431,7 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst,
                       (IntId256
-                       (bitconvert (memopv4i64 addr:$src))))]>,
+                       (bitconvert (loadv4i64 addr:$src))))]>,
                     Sched<[WriteVecALULd]>;
 }
 
@@ -5683,12 +5446,12 @@ def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>;
 def v8i1sextv8i32  : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>;
 
 let Predicates = [HasAVX] in {
-  defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb",
-                                  int_x86_ssse3_pabs_b_128>, VEX;
-  defm VPABSW  : SS3I_unop_rm_int<0x1D, "vpabsw",
-                                  int_x86_ssse3_pabs_w_128>, VEX;
-  defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd",
-                                  int_x86_ssse3_pabs_d_128>, VEX;
+  defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb", int_x86_ssse3_pabs_b_128,
+                                  loadv2i64>, VEX;
+  defm VPABSW  : SS3I_unop_rm_int<0x1D, "vpabsw", int_x86_ssse3_pabs_w_128,
+                                  loadv2i64>, VEX;
+  defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd", int_x86_ssse3_pabs_d_128,
+                                  loadv2i64>, VEX;
 
   def : Pat<(xor
             (bc_v2i64 (v16i1sextv16i8)),
@@ -5726,12 +5489,12 @@ let Predicates = [HasAVX2] in {
             (VPABSDrr256 VR256:$src)>;
 }
 
-defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
-                              int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
-                              int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
-                              int_x86_ssse3_pabs_d_128>;
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", int_x86_ssse3_pabs_b_128,
+                              memopv2i64>;
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", int_x86_ssse3_pabs_w_128,
+                              memopv2i64>;
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", int_x86_ssse3_pabs_d_128,
+                              memopv2i64>;
 
 let Predicates = [HasSSSE3] in {
   def : Pat<(xor
@@ -5803,7 +5566,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
 /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
 multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
                              Intrinsic IntId128, OpndItins itins,
-                             bit Is2Addr = 1> {
+                             PatFrag ld_frag, bit Is2Addr = 1> {
   let isCommutable = 1 in
   def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
        (ins VR128:$src1, VR128:$src2),
@@ -5819,7 +5582,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst,
          (IntId128 VR128:$src1,
-          (bitconvert (memopv2i64 addr:$src2))))]>,
+          (bitconvert (ld_frag addr:$src2))))]>,
        Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
@@ -5868,17 +5631,17 @@ let isCommutable = 0 in {
                                   SSE_PSHUFB, 0>, VEX_4V;
   defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw",
                                       int_x86_ssse3_phadd_sw_128,
-                                      SSE_PHADDSUBSW, 0>, VEX_4V;
+                                      SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V;
   defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw",
                                       int_x86_ssse3_phsub_sw_128,
-                                      SSE_PHADDSUBSW, 0>, VEX_4V;
+                                      SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V;
   defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
                                       int_x86_ssse3_pmadd_ub_sw_128,
-                                      SSE_PMADD, 0>, VEX_4V;
+                                      SSE_PMADD, loadv2i64, 0>, VEX_4V;
 }
 defm VPMULHRSW    : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
                                       int_x86_ssse3_pmul_hr_sw_128,
-                                      SSE_PMULHRSW, 0>, VEX_4V;
+                                      SSE_PMULHRSW, loadv2i64, 0>, VEX_4V;
 }
 
 let ImmT = NoImm, Predicates = [HasAVX2] in {
@@ -5943,16 +5706,17 @@ let isCommutable = 0 in {
                                  memopv2i64, i128mem, SSE_PSHUFB>;
   defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw",
                                      int_x86_ssse3_phadd_sw_128,
-                                     SSE_PHADDSUBSW>;
+                                     SSE_PHADDSUBSW, memopv2i64>;
   defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw",
                                      int_x86_ssse3_phsub_sw_128,
-                                     SSE_PHADDSUBSW>;
+                                     SSE_PHADDSUBSW, memopv2i64>;
   defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
-                                     int_x86_ssse3_pmadd_ub_sw_128, SSE_PMADD>;
+                                     int_x86_ssse3_pmadd_ub_sw_128,
+                                     SSE_PMADD, memopv2i64>;
 }
 defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw",
                                      int_x86_ssse3_pmul_hr_sw_128,
-                                     SSE_PMULHRSW>;
+                                     SSE_PMULHRSW, memopv2i64>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -5962,7 +5726,7 @@ defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw",
 multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
   let hasSideEffects = 0 in {
   def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
-      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+      (ins VR128:$src1, VR128:$src2, u8imm:$src3),
       !if(Is2Addr,
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
@@ -5970,7 +5734,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
       [], IIC_SSE_PALIGNRR>, Sched<[WriteShuffle]>;
   let mayLoad = 1 in
   def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
-      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+      (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
       !if(Is2Addr,
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
@@ -5982,13 +5746,13 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
 multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
   let hasSideEffects = 0 in {
   def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
-      (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+      (ins VR256:$src1, VR256:$src2, u8imm:$src3),
       !strconcat(asm,
                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
       []>, Sched<[WriteShuffle]>;
   let mayLoad = 1 in
   def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
-      (ins VR256:$src1, i256mem:$src2, i8imm:$src3),
+      (ins VR256:$src1, i256mem:$src2, u8imm:$src3),
       !strconcat(asm,
                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
       []>, Sched<[WriteShuffleLd, ReadAfterLd]>;
@@ -6086,10 +5850,10 @@ multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
                           OpndItins SSEItins, OpndItins AVXItins,
                           OpndItins AVX2Itins> {
   defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, SSEItins>;
-  let Predicates = [HasAVX] in
+  let Predicates = [HasAVX, NoVLX] in
     defm V#NAME   : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
                                      VR128, VR128, AVXItins>, VEX;
-  let Predicates = [HasAVX2] in
+  let Predicates = [HasAVX2, NoVLX] in
     defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
                                      VR256, VR128, AVX2Itins>, VEX, VEX_L;
 }
@@ -6119,7 +5883,7 @@ defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem>;
 defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem>;
 
 // AVX2 Patterns
-multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, SDNode ExtOp> {
+multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtOp> {
   // Register-Register patterns
   def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
             (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
@@ -6137,7 +5901,6 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, SDNode ExtOp> {
             (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
 
   // On AVX2, we also support 256bit inputs.
-  // FIXME: remove these patterns when the old shuffle lowering goes away.
   def : Pat<(v16i16 (ExtOp (v32i8 VR256:$src))),
             (!cast<I>(OpcPrefix#BWYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
   def : Pat<(v8i32 (ExtOp (v32i8 VR256:$src))),
@@ -6153,6 +5916,22 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, SDNode ExtOp> {
   def : Pat<(v4i64 (ExtOp (v8i32 VR256:$src))),
             (!cast<I>(OpcPrefix#DQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
 
+  // Simple Register-Memory patterns
+  def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+  def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
+  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
+
+  def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
+
+  def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
+            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+
   // AVX2 Register-Memory patterns
   def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
@@ -6209,14 +5988,14 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, SDNode ExtOp> {
             (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
 }
 
-let Predicates = [HasAVX2] in {
-  defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", X86vsext>;
-  defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", X86vzext>;
+let Predicates = [HasAVX2, NoVLX] in {
+  defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>;
+  defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>;
 }
 
 // SSE4.1/AVX patterns.
-multiclass SS41I_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
-                                PatFrag ExtLoad16> {
+multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
+                                SDNode ExtOp, PatFrag ExtLoad16> {
   def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))),
             (!cast<I>(OpcPrefix#BWrr) VR128:$src)>;
   def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))),
@@ -6232,6 +6011,21 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
   def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))),
             (!cast<I>(OpcPrefix#DQrr) VR128:$src)>;
 
+  def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
+  def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
+  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
+
+  def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
+  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
+
+  def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
+            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
+
   def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
   def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
@@ -6293,14 +6087,14 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
             (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
 }
 
-let Predicates = [HasAVX] in {
-  defm : SS41I_pmovx_patterns<"VPMOVSX", X86vsext, extloadi32i16>;
-  defm : SS41I_pmovx_patterns<"VPMOVZX", X86vzext, loadi16_anyext>;
+let Predicates = [HasAVX, NoVLX] in {
+  defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
+  defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
 }
 
 let Predicates = [UseSSE41] in {
-  defm : SS41I_pmovx_patterns<"PMOVSX", X86vsext, extloadi32i16>;
-  defm : SS41I_pmovx_patterns<"PMOVZX", X86vzext, loadi16_anyext>;
+  defm : SS41I_pmovx_patterns<"PMOVSX", "s", X86vsext, extloadi32i16>;
+  defm : SS41I_pmovx_patterns<"PMOVZX", "z", X86vzext, loadi16_anyext>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -6310,7 +6104,7 @@ let Predicates = [UseSSE41] in {
 /// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
 multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
   def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
-                 (ins VR128:$src1, i32i8imm:$src2),
+                 (ins VR128:$src1, u8imm:$src2),
                  !strconcat(OpcodeStr,
                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
@@ -6319,11 +6113,11 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
   let hasSideEffects = 0, mayStore = 1,
       SchedRW = [WriteShuffleLd, WriteRMW] in
   def mr : SS4AIi8<opc, MRMDestMem, (outs),
-                 (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 (ins i8mem:$dst, VR128:$src1, u8imm:$src2),
                  !strconcat(OpcodeStr,
                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(store (i8 (trunc (assertzext (X86pextrb (v16i8 VR128:$src1),
-						 imm:$src2)))), addr:$dst)]>;
+                                                 imm:$src2)))), addr:$dst)]>;
 }
 
 let Predicates = [HasAVX] in
@@ -6336,7 +6130,7 @@ defm PEXTRB      : SS41I_extract8<0x14, "pextrb">;
 multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
   let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
   def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
-                   (ins VR128:$src1, i32i8imm:$src2),
+                   (ins VR128:$src1, u8imm:$src2),
                    !strconcat(OpcodeStr,
                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                    []>, Sched<[WriteShuffle]>;
@@ -6344,11 +6138,11 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
   let hasSideEffects = 0, mayStore = 1,
       SchedRW = [WriteShuffleLd, WriteRMW] in
   def mr : SS4AIi8<opc, MRMDestMem, (outs),
-                 (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 (ins i16mem:$dst, VR128:$src1, u8imm:$src2),
                  !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(store (i16 (trunc (assertzext (X86pextrw (v8i16 VR128:$src1),
-						  imm:$src2)))), addr:$dst)]>;
+                                                  imm:$src2)))), addr:$dst)]>;
 }
 
 let Predicates = [HasAVX] in
@@ -6360,7 +6154,7 @@ defm PEXTRW      : SS41I_extract16<0x15, "pextrw">;
 /// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
 multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
   def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
-                 (ins VR128:$src1, i32i8imm:$src2),
+                 (ins VR128:$src1, u8imm:$src2),
                  !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(set GR32:$dst,
@@ -6368,7 +6162,7 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
                   Sched<[WriteShuffle]>;
   let SchedRW = [WriteShuffleLd, WriteRMW] in
   def mr : SS4AIi8<opc, MRMDestMem, (outs),
-                 (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 (ins i32mem:$dst, VR128:$src1, u8imm:$src2),
                  !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
@@ -6383,7 +6177,7 @@ defm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
 /// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
 multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
   def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
-                 (ins VR128:$src1, i32i8imm:$src2),
+                 (ins VR128:$src1, u8imm:$src2),
                  !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(set GR64:$dst,
@@ -6391,7 +6185,7 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
                   Sched<[WriteShuffle]>, REX_W;
   let SchedRW = [WriteShuffleLd, WriteRMW] in
   def mr : SS4AIi8<opc, MRMDestMem, (outs),
-                 (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 (ins i64mem:$dst, VR128:$src1, u8imm:$src2),
                  !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
@@ -6408,7 +6202,7 @@ defm PEXTRQ      : SS41I_extract64<0x16, "pextrq">;
 multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr,
                             OpndItins itins = DEFAULT_ITINS> {
   def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
-                 (ins VR128:$src1, i32i8imm:$src2),
+                 (ins VR128:$src1, u8imm:$src2),
                  !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(set GR32orGR64:$dst,
@@ -6416,7 +6210,7 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr,
                     itins.rr>, Sched<[WriteFBlend]>;
   let SchedRW = [WriteFBlendLd, WriteRMW] in
   def mr : SS4AIi8<opc, MRMDestMem, (outs),
-                 (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 (ins f32mem:$dst, VR128:$src1, u8imm:$src2),
                  !strconcat(OpcodeStr,
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
@@ -6447,7 +6241,7 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
 
 multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
   def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
-      (ins VR128:$src1, GR32orGR64:$src2, i32i8imm:$src3),
+      (ins VR128:$src1, GR32orGR64:$src2, u8imm:$src3),
       !if(Is2Addr,
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
@@ -6456,7 +6250,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
         (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
       Sched<[WriteShuffle]>;
   def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
-      (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
+      (ins VR128:$src1, i8mem:$src2, u8imm:$src3),
       !if(Is2Addr,
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
@@ -6473,7 +6267,7 @@ let Constraints = "$src1 = $dst" in
 
 multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
   def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
-      (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+      (ins VR128:$src1, GR32:$src2, u8imm:$src3),
       !if(Is2Addr,
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
@@ -6482,7 +6276,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
         (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
       Sched<[WriteShuffle]>;
   def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
-      (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
+      (ins VR128:$src1, i32mem:$src2, u8imm:$src3),
       !if(Is2Addr,
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
@@ -6499,7 +6293,7 @@ let Constraints = "$src1 = $dst" in
 
 multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
   def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
-      (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+      (ins VR128:$src1, GR64:$src2, u8imm:$src3),
       !if(Is2Addr,
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
@@ -6508,7 +6302,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
         (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
       Sched<[WriteShuffle]>;
   def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
-      (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
+      (ins VR128:$src1, i64mem:$src2, u8imm:$src3),
       !if(Is2Addr,
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
@@ -6530,7 +6324,7 @@ let Constraints = "$src1 = $dst" in
 multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
                            OpndItins itins = DEFAULT_ITINS> {
   def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
-      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+      (ins VR128:$src1, VR128:$src2, u8imm:$src3),
       !if(Is2Addr,
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
@@ -6539,7 +6333,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
         (X86insertps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>,
       Sched<[WriteFShuffle]>;
   def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
-      (ins VR128:$src1, f32mem:$src2, i8imm:$src3),
+      (ins VR128:$src1, f32mem:$src2, u8imm:$src3),
       !if(Is2Addr,
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
@@ -6593,7 +6387,7 @@ let ExeDomain = SSEPackedSingle in {
   // Intrinsic operation, reg.
   // Vector intrinsic operation, reg
   def PSr : SS4AIi8<opcps, MRMSrcReg,
-                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+                    (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
                     !strconcat(OpcodeStr,
                     "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                     [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))],
@@ -6601,7 +6395,7 @@ let ExeDomain = SSEPackedSingle in {
 
   // Vector intrinsic operation, mem
   def PSm : SS4AIi8<opcps, MRMSrcMem,
-                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+                    (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
                     !strconcat(OpcodeStr,
                     "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                     [(set RC:$dst,
@@ -6612,7 +6406,7 @@ let ExeDomain = SSEPackedSingle in {
 let ExeDomain = SSEPackedDouble in {
   // Vector intrinsic operation, reg
   def PDr : SS4AIi8<opcpd, MRMSrcReg,
-                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+                    (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
                     !strconcat(OpcodeStr,
                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                     [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))],
@@ -6620,7 +6414,7 @@ let ExeDomain = SSEPackedDouble in {
 
   // Vector intrinsic operation, mem
   def PDm : SS4AIi8<opcpd, MRMSrcMem,
-                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+                    (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
                     !strconcat(OpcodeStr,
                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                     [(set RC:$dst,
@@ -6637,7 +6431,7 @@ let ExeDomain = GenericDomain in {
   // Operation, reg.
   let hasSideEffects = 0 in
   def SSr : SS4AIi8<opcss, MRMSrcReg,
-      (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
+      (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3),
       !if(Is2Addr,
           !strconcat(OpcodeStr,
               "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -6648,7 +6442,7 @@ let ExeDomain = GenericDomain in {
   // Intrinsic operation, reg.
   let isCodeGenOnly = 1 in
   def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
-        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
                 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -6659,7 +6453,7 @@ let ExeDomain = GenericDomain in {
 
   // Intrinsic operation, mem.
   def SSm : SS4AIi8<opcss, MRMSrcMem,
-        (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+        (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
                 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -6672,7 +6466,7 @@ let ExeDomain = GenericDomain in {
   // Operation, reg.
   let hasSideEffects = 0 in
   def SDr : SS4AIi8<opcsd, MRMSrcReg,
-        (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
+        (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
                 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -6683,7 +6477,7 @@ let ExeDomain = GenericDomain in {
   // Intrinsic operation, reg.
   let isCodeGenOnly = 1 in
   def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
-        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
                 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -6694,7 +6488,7 @@ let ExeDomain = GenericDomain in {
 
   // Intrinsic operation, mem.
   def SDm : SS4AIi8<opcsd, MRMSrcMem,
-        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
                 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -6720,7 +6514,9 @@ let Predicates = [HasAVX] in {
   defm VROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
                                   int_x86_sse41_round_ss,
                                   int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
+}
 
+let Predicates = [UseAVX] in {
   def : Pat<(ffloor FR32:$src),
             (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
   def : Pat<(f64 (ffloor FR64:$src)),
@@ -6741,7 +6537,9 @@ let Predicates = [HasAVX] in {
             (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
   def : Pat<(f64 (ftrunc FR64:$src)),
             (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+}
 
+let Predicates = [HasAVX] in {
   def : Pat<(v4f32 (ffloor VR128:$src)),
             (VROUNDPSr VR128:$src, (i32 0x1))>;
   def : Pat<(v4f32 (fnearbyint VR128:$src)),
@@ -6945,7 +6743,7 @@ let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
 
 // SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
 multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
-                                 Intrinsic IntId128,
+                                 Intrinsic IntId128, PatFrag ld_frag,
                                  X86FoldableSchedWrite Sched> {
   def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
                     (ins VR128:$src),
@@ -6956,7 +6754,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
                      (ins i128mem:$src),
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                      [(set VR128:$dst,
-                       (IntId128 (bitconvert (memopv2i64 addr:$src))))]>,
+                       (IntId128 (bitconvert (ld_frag addr:$src))))]>,
                     Sched<[Sched.Folded]>;
 }
 
@@ -6964,53 +6762,12 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
 // model, although the naming is misleading.
 let Predicates = [HasAVX] in
 defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
-                                         int_x86_sse41_phminposuw,
+                                         int_x86_sse41_phminposuw, loadv2i64,
                                          WriteVecIMul>, VEX;
 defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
-                                         int_x86_sse41_phminposuw,
+                                         int_x86_sse41_phminposuw, memopv2i64,
                                          WriteVecIMul>;
 
-/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
-multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
-                              Intrinsic IntId128, bit Is2Addr = 1,
-                              OpndItins itins = DEFAULT_ITINS> {
-  let isCommutable = 1 in
-  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
-       (ins VR128:$src1, VR128:$src2),
-       !if(Is2Addr,
-           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))],
-       itins.rr>, Sched<[itins.Sched]>;
-  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
-       (ins VR128:$src1, i128mem:$src2),
-       !if(Is2Addr,
-           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst,
-         (IntId128 VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))],
-       itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
-}
-
-/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator
-multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
-                                Intrinsic IntId256,
-                                X86FoldableSchedWrite Sched> {
-  let isCommutable = 1 in
-  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst),
-       (ins VR256:$src1, VR256:$src2),
-       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-       [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
-       Sched<[Sched]>;
-  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst),
-       (ins VR256:$src1, i256mem:$src2),
-       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-       [(set VR256:$dst,
-         (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>,
-       Sched<[Sched.Folded, ReadAfterLd]>;
-}
-
-
 /// SS48I_binop_rm - Simple SSE41 binary operator.
 multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                           ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
@@ -7154,10 +6911,10 @@ let Predicates = [HasAVX, NoVLX] in {
 }
 let Predicates = [HasAVX2] in {
   defm VPMULLDY  : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
-                                  memopv4i64, i256mem, 0, SSE_PMULLD_ITINS>,
+                                  loadv4i64, i256mem, 0, SSE_PMULLD_ITINS>,
                                   VEX_4V, VEX_L;
   defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
-                                  memopv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
+                                  loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V, VEX_L;
 }
 
@@ -7175,7 +6932,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
                  OpndItins itins = DEFAULT_ITINS> {
   let isCommutable = 1 in
   def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
-        (ins RC:$src1, RC:$src2, i8imm:$src3),
+        (ins RC:$src1, RC:$src2, u8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
                 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -7184,7 +6941,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
         [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>,
         Sched<[itins.Sched]>;
   def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
-        (ins RC:$src1, x86memop:$src2, i8imm:$src3),
+        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
                 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -7196,6 +6953,34 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
         Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
+/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate
+multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+                           X86MemOperand x86memop, bit Is2Addr = 1,
+                           OpndItins itins = DEFAULT_ITINS> {
+  let isCommutable = 1 in
+  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
+        (ins RC:$src1, RC:$src2, u8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))],
+        itins.rr>, Sched<[itins.Sched]>;
+  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
+        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set RC:$dst,
+          (OpVT (OpNode RC:$src1,
+                 (bitconvert (memop_frag addr:$src2)), imm:$src3)))], itins.rm>,
+        Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
 let Predicates = [HasAVX] in {
   let isCommutable = 0 in {
     defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
@@ -7204,26 +6989,24 @@ let Predicates = [HasAVX] in {
   }
 
   let ExeDomain = SSEPackedSingle in {
-  defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
-                                      VR128, loadv4f32, f128mem, 0,
-                                      DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
-  defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
-                                  int_x86_avx_blend_ps_256, VR256, loadv8f32,
-                                  f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
-                                  VEX_4V, VEX_L;
+  defm VBLENDPS : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v4f32,
+                                  VR128, loadv4f32, f128mem, 0,
+                                  DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+  defm VBLENDPSY : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v8f32,
+                                   VR256, loadv8f32, f256mem, 0,
+                                   DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
   }
   let ExeDomain = SSEPackedDouble in {
-  defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
-                                      VR128, loadv2f64, f128mem, 0,
-                                      DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
-  defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
-                                   int_x86_avx_blend_pd_256,VR256, loadv4f64,
-                                   f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
-                                   VEX_4V, VEX_L;
+  defm VBLENDPD : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
+                                  VR128, loadv2f64, f128mem, 0,
+                                  DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+  defm VBLENDPDY : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
+                                   VR256, loadv4f64, f256mem, 0,
+                                   DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
   }
-  defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
-                                      VR128, loadv2i64, i128mem, 0,
-                                      DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
+  defm VPBLENDW : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
+                                  VR128, loadv2i64, i128mem, 0,
+                                  DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
 
   let ExeDomain = SSEPackedSingle in
   defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
@@ -7245,9 +7028,9 @@ let Predicates = [HasAVX2] in {
                                   VR256, loadv4i64, i256mem, 0,
                                   DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L;
   }
-  defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
-                                  VR256, loadv4i64, i256mem, 0,
-                                  DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
+  defm VPBLENDWY : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
+                                   VR256, loadv4i64, i256mem, 0,
+                                   DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -7257,16 +7040,16 @@ let Constraints = "$src1 = $dst" in {
                                      1, SSE_MPSADBW_ITINS>;
   }
   let ExeDomain = SSEPackedSingle in
-  defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
-                                     VR128, memopv4f32, f128mem,
-                                     1, SSE_INTALU_ITINS_FBLEND_P>;
+  defm BLENDPS : SS41I_binop_rmi<0x0C, "blendps", X86Blendi, v4f32,
+                                 VR128, memopv4f32, f128mem,
+                                 1, SSE_INTALU_ITINS_FBLEND_P>;
   let ExeDomain = SSEPackedDouble in
-  defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
-                                     VR128, memopv2f64, f128mem,
-                                     1, SSE_INTALU_ITINS_FBLEND_P>;
-  defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
-                                     VR128, memopv2i64, i128mem,
-                                     1, SSE_INTALU_ITINS_BLEND_P>;
+  defm BLENDPD : SS41I_binop_rmi<0x0D, "blendpd", X86Blendi, v2f64,
+                                 VR128, memopv2f64, f128mem,
+                                 1, SSE_INTALU_ITINS_FBLEND_P>;
+  defm PBLENDW : SS41I_binop_rmi<0x0E, "pblendw", X86Blendi, v8i16,
+                                 VR128, memopv2i64, i128mem,
+                                 1, SSE_INTALU_ITINS_BLEND_P>;
   let ExeDomain = SSEPackedSingle in
   defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
                                   VR128, memopv4f32, f128mem, 1,
@@ -7357,35 +7140,19 @@ let Predicates = [HasAVX] in {
   def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
                             (v4f64 VR256:$src2))),
             (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-
-  def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2),
-                               (imm:$mask))),
-            (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>;
-  def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2),
-                               (imm:$mask))),
-            (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
-
-  def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
-                               (imm:$mask))),
-            (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
-                               (imm:$mask))),
-            (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
-                               (imm:$mask))),
-            (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
 }
 
 let Predicates = [HasAVX2] in {
   def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
                             (v32i8 VR256:$src2))),
             (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-  def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
-                               (imm:$mask))),
-            (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
 }
 
 // Patterns
+// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or
+// on targets where they have equal performance. These were changed to use
+// blends because blends have better throughput on SandyBridge and Haswell, but
+// movs[s/d] are 1-2 byte shorter instructions.
 let Predicates = [UseAVX] in {
   let AddedComplexity = 15 in {
   // Move scalar to XMM zero-extended, zeroing a VR128 then do a
@@ -7402,8 +7169,10 @@ let Predicates = [UseAVX] in {
   // Move low f32 and clear high bits.
   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
             (VBLENDPSYrri (v8f32 (AVX_SET0)), VR256:$src, (i8 1))>;
-  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
-            (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>;
+
+  // Move low f64 and clear high bits.
+  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+            (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>;
   }
 
   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
@@ -7417,14 +7186,19 @@ let Predicates = [UseAVX] in {
                            (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
                            sub_xmm)>;
 
-  // Move low f64 and clear high bits.
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
-            (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>;
-
+  // These will incur an FP/int domain crossing penalty, but it may be the only
+  // way without AVX2. Do not add any complexity because we may be able to match
+  // more optimal patterns defined earlier in this file.
+  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
+            (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>;
   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
             (VBLENDPDYrri (v4i64 (AVX_SET0)), VR256:$src, (i8 1))>;
 }
 
+// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or
+// on targets where they have equal performance. These were changed to use
+// blends because blends have better throughput on SandyBridge and Haswell, but
+// movs[s/d] are 1-2 byte shorter instructions.
 let Predicates = [UseSSE41] in {
   // With SSE41 we can use blends for these patterns.
   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
@@ -7501,17 +7275,6 @@ let Predicates = [UseSSE41] in {
   def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
                             (v2f64 VR128:$src2))),
             (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
-
-  def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
-                               (imm:$mask))),
-            (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
-  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
-                               (imm:$mask))),
-            (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
-  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
-                               (imm:$mask))),
-            (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
-
 }
 
 let SchedRW = [WriteLoad] in {
@@ -7570,30 +7333,32 @@ let Constraints = "$src1 = $dst" in
 //===----------------------------------------------------------------------===//
 
 // Packed Compare Implicit Length Strings, Return Mask
-multiclass pseudo_pcmpistrm<string asm> {
+multiclass pseudo_pcmpistrm<string asm, PatFrag ld_frag> {
   def REG : PseudoI<(outs VR128:$dst),
-                    (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+                    (ins VR128:$src1, VR128:$src2, u8imm:$src3),
     [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
                                                   imm:$src3))]>;
   def MEM : PseudoI<(outs VR128:$dst),
-                    (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+                    (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
     [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1,
-                       (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>;
+                       (bc_v16i8 (ld_frag addr:$src2)), imm:$src3))]>;
 }
 
 let Defs = [EFLAGS], usesCustomInserter = 1 in {
-  defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
-  defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>;
+  defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128", loadv2i64>,
+                         Requires<[HasAVX]>;
+  defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128", memopv2i64>,
+                         Requires<[UseSSE42]>;
 }
 
 multiclass pcmpistrm_SS42AI<string asm> {
   def rr : SS42AI<0x62, MRMSrcReg, (outs),
-    (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+    (ins VR128:$src1, VR128:$src2, u8imm:$src3),
     !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
     []>, Sched<[WritePCmpIStrM]>;
   let mayLoad = 1 in
   def rm :SS42AI<0x62, MRMSrcMem, (outs),
-    (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+    (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
     !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
     []>, Sched<[WritePCmpIStrMLd, ReadAfterLd]>;
 }
@@ -7605,30 +7370,32 @@ let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
 }
 
 // Packed Compare Explicit Length Strings, Return Mask
-multiclass pseudo_pcmpestrm<string asm> {
+multiclass pseudo_pcmpestrm<string asm, PatFrag ld_frag> {
   def REG : PseudoI<(outs VR128:$dst),
-                    (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+                    (ins VR128:$src1, VR128:$src3, u8imm:$src5),
     [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
                        VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
   def MEM : PseudoI<(outs VR128:$dst),
-                    (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+                    (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
     [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
-                       (bc_v16i8 (memopv2i64 addr:$src3)), EDX, imm:$src5))]>;
+                       (bc_v16i8 (ld_frag addr:$src3)), EDX, imm:$src5))]>;
 }
 
 let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
-  defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
-  defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>;
+  defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128", loadv2i64>,
+                         Requires<[HasAVX]>;
+  defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128", memopv2i64>,
+                         Requires<[UseSSE42]>;
 }
 
 multiclass SS42AI_pcmpestrm<string asm> {
   def rr : SS42AI<0x60, MRMSrcReg, (outs),
-    (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+    (ins VR128:$src1, VR128:$src3, u8imm:$src5),
     !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
     []>, Sched<[WritePCmpEStrM]>;
   let mayLoad = 1 in
   def rm : SS42AI<0x60, MRMSrcMem, (outs),
-    (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+    (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
     !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
     []>, Sched<[WritePCmpEStrMLd, ReadAfterLd]>;
 }
@@ -7640,30 +7407,32 @@ let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
 }
 
 // Packed Compare Implicit Length Strings, Return Index
-multiclass pseudo_pcmpistri<string asm> {
+multiclass pseudo_pcmpistri<string asm, PatFrag ld_frag> {
   def REG : PseudoI<(outs GR32:$dst),
-                    (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+                    (ins VR128:$src1, VR128:$src2, u8imm:$src3),
     [(set GR32:$dst, EFLAGS,
       (X86pcmpistri VR128:$src1, VR128:$src2, imm:$src3))]>;
   def MEM : PseudoI<(outs GR32:$dst),
-                    (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+                    (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
     [(set GR32:$dst, EFLAGS, (X86pcmpistri VR128:$src1,
-                              (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>;
+                              (bc_v16i8 (ld_frag addr:$src2)), imm:$src3))]>;
 }
 
 let Defs = [EFLAGS], usesCustomInserter = 1 in {
-  defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI">, Requires<[HasAVX]>;
-  defm PCMPISTRI  : pseudo_pcmpistri<"#PCMPISTRI">, Requires<[UseSSE42]>;
+  defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI", loadv2i64>,
+                      Requires<[HasAVX]>;
+  defm PCMPISTRI  : pseudo_pcmpistri<"#PCMPISTRI", memopv2i64>,
+                      Requires<[UseSSE42]>;
 }
 
 multiclass SS42AI_pcmpistri<string asm> {
   def rr : SS42AI<0x63, MRMSrcReg, (outs),
-    (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+    (ins VR128:$src1, VR128:$src2, u8imm:$src3),
     !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
     []>, Sched<[WritePCmpIStrI]>;
   let mayLoad = 1 in
   def rm : SS42AI<0x63, MRMSrcMem, (outs),
-    (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+    (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
     !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
     []>, Sched<[WritePCmpIStrILd, ReadAfterLd]>;
 }
@@ -7675,31 +7444,33 @@ let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
 }
 
 // Packed Compare Explicit Length Strings, Return Index
-multiclass pseudo_pcmpestri<string asm> {
+multiclass pseudo_pcmpestri<string asm, PatFrag ld_frag> {
   def REG : PseudoI<(outs GR32:$dst),
-                    (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+                    (ins VR128:$src1, VR128:$src3, u8imm:$src5),
     [(set GR32:$dst, EFLAGS,
       (X86pcmpestri VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
   def MEM : PseudoI<(outs GR32:$dst),
-                    (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+                    (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
     [(set GR32:$dst, EFLAGS,
-      (X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (memopv2i64 addr:$src3)), EDX,
+      (X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (ld_frag addr:$src3)), EDX,
        imm:$src5))]>;
 }
 
 let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
-  defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI">, Requires<[HasAVX]>;
-  defm PCMPESTRI  : pseudo_pcmpestri<"#PCMPESTRI">, Requires<[UseSSE42]>;
+  defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI", loadv2i64>,
+                      Requires<[HasAVX]>;
+  defm PCMPESTRI  : pseudo_pcmpestri<"#PCMPESTRI", memopv2i64>,
+                      Requires<[UseSSE42]>;
 }
 
 multiclass SS42AI_pcmpestri<string asm> {
   def rr : SS42AI<0x61, MRMSrcReg, (outs),
-    (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+    (ins VR128:$src1, VR128:$src3, u8imm:$src5),
     !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
     []>, Sched<[WritePCmpEStrI]>;
   let mayLoad = 1 in
   def rm : SS42AI<0x61, MRMSrcMem, (outs),
-    (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+    (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
     !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
     []>, Sched<[WritePCmpEStrILd, ReadAfterLd]>;
 }
@@ -7784,13 +7555,13 @@ multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
 
 let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
   def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst),
-                         (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+                         (ins VR128:$src1, VR128:$src2, u8imm:$src3),
                          "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                          [(set VR128:$dst,
                            (int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
                             (i8 imm:$src3)))]>, TA;
   def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
-                         (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+                         (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
                          "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                          [(set VR128:$dst,
                            (int_x86_sha1rnds4 VR128:$src1,
@@ -7818,8 +7589,8 @@ def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
 // AES-NI Instructions
 //===----------------------------------------------------------------------===//
 
-multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
-                              Intrinsic IntId128, bit Is2Addr = 1> {
+multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
+                             PatFrag ld_frag, bit Is2Addr = 1> {
   def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
        (ins VR128:$src1, VR128:$src2),
        !if(Is2Addr,
@@ -7833,31 +7604,31 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst,
-         (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>,
+         (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>,
        Sched<[WriteAESDecEncLd, ReadAfterLd]>;
 }
 
 // Perform One Round of an AES Encryption/Decryption Flow
 let Predicates = [HasAVX, HasAES] in {
   defm VAESENC          : AESI_binop_rm_int<0xDC, "vaesenc",
-                         int_x86_aesni_aesenc, 0>, VEX_4V;
+                         int_x86_aesni_aesenc, loadv2i64, 0>, VEX_4V;
   defm VAESENCLAST      : AESI_binop_rm_int<0xDD, "vaesenclast",
-                         int_x86_aesni_aesenclast, 0>, VEX_4V;
+                         int_x86_aesni_aesenclast, loadv2i64, 0>, VEX_4V;
   defm VAESDEC          : AESI_binop_rm_int<0xDE, "vaesdec",
-                         int_x86_aesni_aesdec, 0>, VEX_4V;
+                         int_x86_aesni_aesdec, loadv2i64, 0>, VEX_4V;
   defm VAESDECLAST      : AESI_binop_rm_int<0xDF, "vaesdeclast",
-                         int_x86_aesni_aesdeclast, 0>, VEX_4V;
+                         int_x86_aesni_aesdeclast, loadv2i64, 0>, VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
   defm AESENC          : AESI_binop_rm_int<0xDC, "aesenc",
-                         int_x86_aesni_aesenc>;
+                         int_x86_aesni_aesenc, memopv2i64>;
   defm AESENCLAST      : AESI_binop_rm_int<0xDD, "aesenclast",
-                         int_x86_aesni_aesenclast>;
+                         int_x86_aesni_aesenclast, memopv2i64>;
   defm AESDEC          : AESI_binop_rm_int<0xDE, "aesdec",
-                         int_x86_aesni_aesdec>;
+                         int_x86_aesni_aesdec, memopv2i64>;
   defm AESDECLAST      : AESI_binop_rm_int<0xDF, "aesdeclast",
-                         int_x86_aesni_aesdeclast>;
+                         int_x86_aesni_aesdeclast, memopv2i64>;
 }
 
 // Perform the AES InvMixColumn Transformation
@@ -7888,26 +7659,26 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
 // AES Round Key Generation Assist
 let Predicates = [HasAVX, HasAES] in {
   def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
-      (ins VR128:$src1, i8imm:$src2),
+      (ins VR128:$src1, u8imm:$src2),
       "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
       [(set VR128:$dst,
         (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
       Sched<[WriteAESKeyGen]>, VEX;
   def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
-      (ins i128mem:$src1, i8imm:$src2),
+      (ins i128mem:$src1, u8imm:$src2),
       "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
       [(set VR128:$dst,
         (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>,
       Sched<[WriteAESKeyGenLd]>, VEX;
 }
 def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
-  (ins VR128:$src1, i8imm:$src2),
+  (ins VR128:$src1, u8imm:$src2),
   "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   [(set VR128:$dst,
     (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
   Sched<[WriteAESKeyGen]>;
 def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
-  (ins i128mem:$src1, i8imm:$src2),
+  (ins i128mem:$src1, u8imm:$src2),
   "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   [(set VR128:$dst,
     (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
@@ -7918,15 +7689,16 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
 //===----------------------------------------------------------------------===//
 
 // AVX carry-less Multiplication instructions
+let isCommutable = 1 in
 def VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
-           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+           (ins VR128:$src1, VR128:$src2, u8imm:$src3),
            "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            [(set VR128:$dst,
              (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>,
            Sched<[WriteCLMul]>;
 
 def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
-           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+           (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
            "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
                               (loadv2i64 addr:$src2), imm:$src3))]>,
@@ -7934,15 +7706,16 @@ def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
 
 // Carry-less Multiplication instructions
 let Constraints = "$src1 = $dst" in {
+let isCommutable = 1 in
 def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
-           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+           (ins VR128:$src1, VR128:$src2, u8imm:$src3),
            "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
            [(set VR128:$dst,
              (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))],
              IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>;
 
 def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
-           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+           (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
            "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
            [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
                               (memopv2i64 addr:$src2), imm:$src3))],
@@ -7981,7 +7754,7 @@ let Predicates = [HasSSE4A] in {
 
 let Constraints = "$src = $dst" in {
 def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
-                 (ins VR128:$src, i8imm:$len, i8imm:$idx),
+                 (ins VR128:$src, u8imm:$len, u8imm:$idx),
                  "extrq\t{$idx, $len, $src|$src, $len, $idx}",
                  [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len,
                                     imm:$idx))]>, PD;
@@ -7992,7 +7765,7 @@ def EXTRQ  : I<0x79, MRMSrcReg, (outs VR128:$dst),
                                  VR128:$mask))]>, PD;
 
 def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
-                   (ins VR128:$src, VR128:$src2, i8imm:$len, i8imm:$idx),
+                   (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
                    "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
                    [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src,
                                       VR128:$src2, imm:$len, imm:$idx))]>, XD;
@@ -8071,9 +7844,9 @@ def VBROADCASTSDYrr  : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
                                       WriteFShuffle256>, VEX_L;
 
 let Predicates = [HasAVX2] in
-def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
-                                   int_x86_avx2_vbroadcasti128, WriteLoad>,
-                                   VEX_L;
+def VBROADCASTI128 : avx_broadcast_no_int<0x5A, "vbroadcasti128", VR256,
+                                          i128mem, v4i64, loadv2i64,
+                                          WriteLoad>, VEX_L;
 
 let Predicates = [HasAVX] in
 def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
@@ -8085,12 +7858,12 @@ def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
 //
 let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
 def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
-          (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+          (ins VR256:$src1, VR128:$src2, u8imm:$src3),
           "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           []>, Sched<[WriteFShuffle]>, VEX_4V, VEX_L;
 let mayLoad = 1 in
 def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
-          (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
+          (ins VR256:$src1, f128mem:$src2, u8imm:$src3),
           "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           []>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L;
 }
@@ -8115,49 +7888,6 @@ def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
                          (INSERT_get_vinsert128_imm VR256:$ins))>;
 }
 
-// Combine two consecutive 16-byte loads with a common destination register into
-// one 32-byte load to that register.
-let Predicates = [HasAVX, HasFastMem32] in {
-  def : Pat<(insert_subvector
-              (v8f32 (insert_subvector undef, (loadv4f32 addr:$src), (iPTR 0))),
-              (loadv4f32 (add addr:$src, (iPTR 16))),
-              (iPTR 4)),
-            (VMOVUPSYrm addr:$src)>;
-
-  def : Pat<(insert_subvector
-              (v4f64 (insert_subvector undef, (loadv2f64 addr:$src), (iPTR 0))),
-              (loadv2f64 (add addr:$src, (iPTR 16))),
-              (iPTR 2)),
-            (VMOVUPDYrm addr:$src)>;
-            
-  def : Pat<(insert_subvector
-              (v32i8 (insert_subvector
-                undef, (bc_v16i8 (loadv2i64 addr:$src)), (iPTR 0))),
-              (bc_v16i8 (loadv2i64 (add addr:$src, (iPTR 16)))),
-              (iPTR 16)),
-            (VMOVDQUYrm addr:$src)>;
-            
-  def : Pat<(insert_subvector
-              (v16i16 (insert_subvector
-                undef, (bc_v8i16 (loadv2i64 addr:$src)), (iPTR 0))),
-              (bc_v8i16 (loadv2i64 (add addr:$src, (iPTR 16)))),
-              (iPTR 8)),
-            (VMOVDQUYrm addr:$src)>;
-            
-  def : Pat<(insert_subvector
-              (v8i32 (insert_subvector
-                undef, (bc_v4i32 (loadv2i64 addr:$src)), (iPTR 0))),
-              (bc_v4i32 (loadv2i64 (add addr:$src, (iPTR 16)))),
-              (iPTR 4)),
-            (VMOVDQUYrm addr:$src)>;
-
-  def : Pat<(insert_subvector
-              (v4i64 (insert_subvector undef, (loadv2i64 addr:$src), (iPTR 0))),
-              (loadv2i64 (add addr:$src, (iPTR 16))),
-              (iPTR 2)),
-            (VMOVDQUYrm addr:$src)>;
-}
-
 let Predicates = [HasAVX1Only] in {
 def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
                                    (iPTR imm)),
@@ -8202,12 +7932,12 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
 //
 let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
 def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
-          (ins VR256:$src1, i8imm:$src2),
+          (ins VR256:$src1, u8imm:$src2),
           "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
           []>, Sched<[WriteFShuffle]>, VEX, VEX_L;
 let mayStore = 1 in
 def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
-          (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
+          (ins f128mem:$dst, VR256:$src1, u8imm:$src2),
           "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
           []>, Sched<[WriteStore]>, VEX, VEX_L;
 }
@@ -8328,15 +8058,15 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
              Sched<[WriteFShuffleLd, ReadAfterLd]>;
 
   def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
-             (ins RC:$src1, i8imm:$src2),
+             (ins RC:$src1, u8imm:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set RC:$dst, (vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
              Sched<[WriteFShuffle]>;
   def mi  : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
-             (ins x86memop_f:$src1, i8imm:$src2),
+             (ins x86memop_f:$src1, u8imm:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set RC:$dst,
-               (vt (X86VPermilpi (memop addr:$src1), (i8 imm:$src2))))]>, VEX,
+               (vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
              Sched<[WriteFShuffleLd]>;
 }
 
@@ -8393,13 +8123,13 @@ def : Pat<(v2i64 (X86VPermilpi (loadv2i64 addr:$src1), (i8 imm:$imm))),
 //
 let ExeDomain = SSEPackedSingle in {
 def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
-          (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+          (ins VR256:$src1, VR256:$src2, u8imm:$src3),
           "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2,
                               (i8 imm:$src3))))]>, VEX_4V, VEX_L,
           Sched<[WriteFShuffle]>;
 def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
-          (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+          (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
           "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv8f32 addr:$src2),
                              (i8 imm:$src3)))]>, VEX_4V, VEX_L,
@@ -8468,14 +8198,14 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
 
 multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
   def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
-               (ins RC:$src1, i32i8imm:$src2),
+               (ins RC:$src1, i32u8imm:$src2),
                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
                TAPD, VEX, Sched<[WriteCvtF2F]>;
   let hasSideEffects = 0, mayStore = 1,
       SchedRW = [WriteCvtF2FLd, WriteRMW] in
   def mr : Ii8<0x1D, MRMDestMem, (outs),
-               (ins x86memop:$dst, RC:$src1, i32i8imm:$src2),
+               (ins x86memop:$dst, RC:$src1, i32u8imm:$src2),
                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
                TAPD, VEX;
 }
@@ -8491,6 +8221,18 @@ let Predicates = [HasF16C] in {
             (VCVTPH2PSrm addr:$src)>;
   def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)),
             (VCVTPH2PSrm addr:$src)>;
+
+  def : Pat<(store (f64 (vector_extract (bc_v2f64 (v8i16
+                  (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
+                   addr:$dst),
+                   (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
+  def : Pat<(store (i64 (vector_extract (bc_v2i64 (v8i16
+                  (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
+                   addr:$dst),
+                   (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
+  def : Pat<(store (v8i16 (int_x86_vcvtps2ph_256 VR256:$src1, i32:$src2)),
+                   addr:$dst),
+                   (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>;
 }
 
 // Patterns for  matching conversions from float to half-float and vice versa.
@@ -8512,38 +8254,31 @@ let Predicates = [HasF16C] in {
 // AVX2 Instructions
 //===----------------------------------------------------------------------===//
 
-/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate
-multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
-                 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
-                 X86MemOperand x86memop> {
+/// AVX2_binop_rmi - AVX2 binary operator with 8-bit immediate
+multiclass AVX2_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                          ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+                          X86MemOperand x86memop> {
   let isCommutable = 1 in
   def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
-        (ins RC:$src1, RC:$src2, i8imm:$src3),
+        (ins RC:$src1, RC:$src2, u8imm:$src3),
         !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-        [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+        [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
         Sched<[WriteBlend]>, VEX_4V;
   def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
-        (ins RC:$src1, x86memop:$src2, i8imm:$src3),
+        (ins RC:$src1, x86memop:$src2, u8imm:$src3),
         !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
         [(set RC:$dst,
-          (IntId RC:$src1,
-           (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+          (OpVT (OpNode RC:$src1,
+           (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
         Sched<[WriteBlendLd, ReadAfterLd]>, VEX_4V;
 }
 
-defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
-                                   VR128, loadv2i64, i128mem>;
-defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
-                                    VR256, loadv4i64, i256mem>, VEX_L;
-
-def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
-                  imm:$mask)),
-          (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>;
-def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2),
-                  imm:$mask)),
-          (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+defm VPBLENDD : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v4i32,
+                               VR128, loadv2i64, i128mem>;
+defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32,
+                                VR256, loadv4i64, i256mem>, VEX_L;
 
 //===----------------------------------------------------------------------===//
 // VPBROADCAST - Load from memory and broadcast to all elements of the
@@ -8628,7 +8363,7 @@ let Predicates = [HasAVX2] in {
   def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
           (VBROADCASTSDYrr VR128:$src)>;
 
-  // Provide aliases for broadcast from the same regitser class that
+  // Provide aliases for broadcast from the same register class that
   // automatically does the extract.
   def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))),
             (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src),
@@ -8733,6 +8468,8 @@ let Predicates = [HasAVX] in {
 
   def : Pat<(v2f64 (X86VBroadcast f64:$src)),
             (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
+  def : Pat<(v2i64 (X86VBroadcast i64:$src)),
+            (VMOVDDUPrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -8765,14 +8502,14 @@ defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256>;
 multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
                          ValueType OpVT, X86FoldableSchedWrite Sched> {
   def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
-                     (ins VR256:$src1, i8imm:$src2),
+                     (ins VR256:$src1, u8imm:$src2),
                      !strconcat(OpcodeStr,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set VR256:$dst,
                        (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
                      Sched<[Sched]>, VEX, VEX_L;
   def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
-                     (ins i256mem:$src1, i8imm:$src2),
+                     (ins i256mem:$src1, u8imm:$src2),
                      !strconcat(OpcodeStr,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set VR256:$dst,
@@ -8791,13 +8528,13 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64,
 // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
 //
 def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
-          (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+          (ins VR256:$src1, VR256:$src2, u8imm:$src3),
           "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
                             (i8 imm:$src3))))]>, Sched<[WriteShuffle256]>,
           VEX_4V, VEX_L;
 def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
-          (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+          (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
           "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
                              (i8 imm:$src3)))]>,
@@ -8828,12 +8565,12 @@ def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)),
 //
 let hasSideEffects = 0 in {
 def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
-          (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+          (ins VR256:$src1, VR128:$src2, u8imm:$src3),
           "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
 let mayLoad = 1 in
 def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
-          (ins VR256:$src1, i128mem:$src2, i8imm:$src3),
+          (ins VR256:$src1, i128mem:$src2, u8imm:$src3),
           "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           []>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
 }
@@ -8881,14 +8618,12 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
 // VEXTRACTI128 - Extract packed integer values
 //
 def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
-          (ins VR256:$src1, i8imm:$src2),
-          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-          [(set VR128:$dst,
-            (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
+          (ins VR256:$src1, u8imm:$src2),
+          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
           Sched<[WriteShuffle256]>, VEX, VEX_L;
 let hasSideEffects = 0, mayStore = 1 in
 def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
-          (ins i128mem:$dst, VR256:$src1, i8imm:$src2),
+          (ins i128mem:$dst, VR256:$src1, u8imm:$src2),
           "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
           Sched<[WriteStore]>, VEX, VEX_L;
 
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
index c706d43..caecf70 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -850,12 +850,12 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
 
 def ROT32L2R_imm8  : SDNodeXForm<imm, [{
   // Convert a ROTL shamt to a ROTR shamt on 32-bit integer.
-  return getI8Imm(32 - N->getZExtValue());
+  return getI8Imm(32 - N->getZExtValue(), SDLoc(N));
 }]>;
 
 def ROT64L2R_imm8  : SDNodeXForm<imm, [{
   // Convert a ROTL shamt to a ROTR shamt on 64-bit integer.
-  return getI8Imm(64 - N->getZExtValue());
+  return getI8Imm(64 - N->getZExtValue(), SDLoc(N));
 }]>;
 
 multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
index 848eeeb..0350566 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -38,9 +38,6 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
               [(int_x86_int (i8 3))], IIC_INT3>;
 } // SchedRW
 
-def : Pat<(debugtrap),
-          (INT3)>;
-
 // The long form of "int $3" turns into int3 as a size optimization.
 // FIXME: This doesn't work because InstAlias can't match immediate constants.
 //def : InstAlias<"int\t$3", (INT3)>;
@@ -71,6 +68,10 @@ def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>,
              Requires<[In64BitMode]>;
 } // SchedRW
 
+def : Pat<(debugtrap),
+          (INT3)>, Requires<[NotPS4]>;
+def : Pat<(debugtrap),
+          (INT (i8 0x41))>, Requires<[IsPS4]>;
 
 //===----------------------------------------------------------------------===//
 //  Input/Output Instructions.
@@ -437,7 +438,9 @@ def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
 //===----------------------------------------------------------------------===//
 // Specialized register support
 let SchedRW = [WriteSystem] in {
+let Uses = [EAX, ECX, EDX] in
 def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB;
+let Defs = [EAX, EDX], Uses = [ECX] in
 def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB;
 
 let Defs = [RAX, RDX], Uses = [ECX] in
@@ -485,15 +488,28 @@ let Uses = [RDX, RAX] in {
   def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
                "xsave\t$dst", []>, TB;
   def XSAVE64 : RI<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
-                 "xsave{q|64}\t$dst", []>, TB, Requires<[In64BitMode]>;
+                 "xsave64\t$dst", []>, TB, Requires<[In64BitMode]>;
   def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
                "xrstor\t$dst", []>, TB;
   def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
-                 "xrstor{q|64}\t$dst", []>, TB, Requires<[In64BitMode]>;
+                 "xrstor64\t$dst", []>, TB, Requires<[In64BitMode]>;
   def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
-                  "xsaveopt\t$dst", []>, TB;
+                  "xsaveopt\t$dst", []>, PS;
   def XSAVEOPT64 : RI<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
-                    "xsaveopt{q|64}\t$dst", []>, TB, Requires<[In64BitMode]>;
+                    "xsaveopt64\t$dst", []>, PS, Requires<[In64BitMode]>;
+
+  def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaque512mem:$dst),
+                "xrstors\t$dst", []>, TB;
+  def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaque512mem:$dst),
+                  "xrstors64\t$dst", []>, TB, Requires<[In64BitMode]>;
+  def XSAVEC : I<0xC7, MRM4m, (outs opaque512mem:$dst), (ins),
+                "xsavec\t$dst", []>, TB;
+  def XSAVEC64 : RI<0xC7, MRM4m, (outs opaque512mem:$dst), (ins),
+                  "xsavec64\t$dst", []>, TB, Requires<[In64BitMode]>;
+  def XSAVES : I<0xC7, MRM5m, (outs opaque512mem:$dst), (ins),
+                "xsaves\t$dst", []>, TB;
+  def XSAVES64 : RI<0xC7, MRM5m, (outs opaque512mem:$dst), (ins),
+                  "xsaves64\t$dst", []>, TB, Requires<[In64BitMode]>;
 }
 } // SchedRW
 
@@ -559,7 +575,13 @@ def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
 
 //===----------------------------------------------------------------------===//
 // SMAP Instruction
-let Predicates = [HasSMAP], Defs = [EFLAGS] in {
+let Defs = [EFLAGS] in {
   def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
   def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
 }
+
+//===----------------------------------------------------------------------===//
+// SMX Instruction
+let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in {
+  def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, TB;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
index 45e2ff0..8455b8d 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
@@ -20,21 +20,23 @@ multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
            [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP;
 }
 
-defm VPHSUBWD  : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, memopv2i64>;
-defm VPHSUBDQ  : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, memopv2i64>;
-defm VPHSUBBW  : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, memopv2i64>;
-defm VPHADDWQ  : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, memopv2i64>;
-defm VPHADDWD  : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, memopv2i64>;
-defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, memopv2i64>;
-defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, memopv2i64>;
-defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, memopv2i64>;
-defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, memopv2i64>;
-defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, memopv2i64>;
-defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, memopv2i64>;
-defm VPHADDDQ  : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, memopv2i64>;
-defm VPHADDBW  : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, memopv2i64>;
-defm VPHADDBQ  : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, memopv2i64>;
-defm VPHADDBD  : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, memopv2i64>;
+let ExeDomain = SSEPackedInt in {
+  defm VPHSUBWD  : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, loadv2i64>;
+  defm VPHSUBDQ  : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, loadv2i64>;
+  defm VPHSUBBW  : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, loadv2i64>;
+  defm VPHADDWQ  : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, loadv2i64>;
+  defm VPHADDWD  : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, loadv2i64>;
+  defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, loadv2i64>;
+  defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, loadv2i64>;
+  defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, loadv2i64>;
+  defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, loadv2i64>;
+  defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, loadv2i64>;
+  defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, loadv2i64>;
+  defm VPHADDDQ  : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, loadv2i64>;
+  defm VPHADDBW  : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, loadv2i64>;
+  defm VPHADDBQ  : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, loadv2i64>;
+  defm VPHADDBD  : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, loadv2i64>;
+}
 
 // Scalar load 2 addr operand instructions
 multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
@@ -47,11 +49,6 @@ multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
            [(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, XOP;
 }
 
-defm VFRCZSS   : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
-                 ssmem, sse_load_f32>;
-defm VFRCZSD   : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
-                 sdmem, sse_load_f64>;
-
 multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
                      PatFrag memop> {
   def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -62,9 +59,6 @@ multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
            [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP;
 }
 
-defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>;
-defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>;
-
 multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
                      PatFrag memop> {
   def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
@@ -75,8 +69,19 @@ multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
            [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L;
 }
 
-defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, memopv8f32>;
-defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, memopv4f64>;
+let ExeDomain = SSEPackedSingle in {
+  defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
+                           ssmem, sse_load_f32>;
+  defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32>;
+  defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+  defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
+                           sdmem, sse_load_f64>;
+  defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64>;
+  defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64>;
+}
 
 multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
   def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
@@ -87,28 +92,30 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
            (ins VR128:$src1, i128mem:$src2),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
            [(set VR128:$dst,
-              (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>,
+              (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2))))]>,
            XOP_4V, VEX_W;
   def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
            (ins i128mem:$src1, VR128:$src2),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
            [(set VR128:$dst,
-              (Int (bitconvert (memopv2i64 addr:$src1)), VR128:$src2))]>,
+              (Int (bitconvert (loadv2i64 addr:$src1)), VR128:$src2))]>,
              XOP_4VOp3;
 }
 
-defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
-defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
-defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
-defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
-defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
-defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
-defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
-defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
-defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
-defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
-defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
-defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
+let ExeDomain = SSEPackedInt in {
+  defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
+  defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
+  defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
+  defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
+  defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
+  defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
+  defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
+  defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
+  defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
+  defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
+  defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
+  defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
+}
 
 multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
   def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
@@ -119,16 +126,19 @@ multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
            (ins i128mem:$src1, i8imm:$src2),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
            [(set VR128:$dst,
-             (Int (bitconvert (memopv2i64 addr:$src1)), imm:$src2))]>, XOP;
+             (Int (bitconvert (loadv2i64 addr:$src1)), imm:$src2))]>, XOP;
 }
 
-defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>;
-defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>;
-defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>;
-defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>;
+let ExeDomain = SSEPackedInt in {
+  defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>;
+  defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>;
+  defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>;
+  defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>;
+}
 
 // Instruction where second source can be memory, but third must be register
 multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+  let isCommutable = 1 in
   def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
            (ins VR128:$src1, VR128:$src2, VR128:$src3),
            !strconcat(OpcodeStr,
@@ -140,48 +150,66 @@ multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR128:$dst,
-              (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+              (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
               VR128:$src3))]>, XOP_4V, VEX_I8IMM;
 }
 
-defm VPMADCSWD  : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>;
-defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>;
-defm VPMACSWW   : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>;
-defm VPMACSWD   : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>;
-defm VPMACSSWW  : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>;
-defm VPMACSSWD  : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>;
-defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>;
-defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>;
-defm VPMACSSDD  : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>;
-defm VPMACSDQL  : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>;
-defm VPMACSDQH  : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>;
-defm VPMACSDD   : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
+let ExeDomain = SSEPackedInt in {
+  defm VPMADCSWD  : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>;
+  defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>;
+  defm VPMACSWW   : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>;
+  defm VPMACSWD   : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>;
+  defm VPMACSSWW  : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>;
+  defm VPMACSSWD  : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>;
+  defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>;
+  defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>;
+  defm VPMACSSDD  : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>;
+  defm VPMACSDQL  : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>;
+  defm VPMACSDQH  : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>;
+  defm VPMACSDD   : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
+}
 
 // Instruction where second source can be memory, third must be imm8
-multiclass xop4opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+multiclass xopvpcom<bits<8> opc, string Suffix, Intrinsic Int> {
+  let isCommutable = 1 in
   def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
-           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
-           !strconcat(OpcodeStr,
-           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-           [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+           (ins VR128:$src1, VR128:$src2, XOPCC:$cc),
+           !strconcat("vpcom${cc}", Suffix,
+           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, i8immZExt3:$cc))]>,
            XOP_4V;
   def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
-           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
-           !strconcat(OpcodeStr,
-           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           (ins VR128:$src1, i128mem:$src2, XOPCC:$cc),
+           !strconcat("vpcom${cc}", Suffix,
+           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
            [(set VR128:$dst,
-             (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
-              imm:$src3))]>, XOP_4V;
+             (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
+              i8immZExt3:$cc))]>, XOP_4V;
+  let isAsmParserOnly = 1, hasSideEffects = 0 in {
+    def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+                 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+                 !strconcat("vpcom", Suffix,
+                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                 []>, XOP_4V;
+    let mayLoad = 1 in
+    def mi_alt : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+                 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+                 !strconcat("vpcom", Suffix,
+                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                 []>, XOP_4V;
+  }
 }
 
-defm VPCOMB  : xop4opimm<0xCC, "vpcomb", int_x86_xop_vpcomb>;
-defm VPCOMW  : xop4opimm<0xCD, "vpcomw", int_x86_xop_vpcomw>;
-defm VPCOMD  : xop4opimm<0xCE, "vpcomd", int_x86_xop_vpcomd>;
-defm VPCOMQ  : xop4opimm<0xCF, "vpcomq", int_x86_xop_vpcomq>;
-defm VPCOMUB : xop4opimm<0xEC, "vpcomub", int_x86_xop_vpcomub>;
-defm VPCOMUW : xop4opimm<0xED, "vpcomuw", int_x86_xop_vpcomuw>;
-defm VPCOMUD : xop4opimm<0xEE, "vpcomud", int_x86_xop_vpcomud>;
-defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq", int_x86_xop_vpcomuq>;
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+  defm VPCOMB  : xopvpcom<0xCC, "b", int_x86_xop_vpcomb>;
+  defm VPCOMW  : xopvpcom<0xCD, "w", int_x86_xop_vpcomw>;
+  defm VPCOMD  : xopvpcom<0xCE, "d", int_x86_xop_vpcomd>;
+  defm VPCOMQ  : xopvpcom<0xCF, "q", int_x86_xop_vpcomq>;
+  defm VPCOMUB : xopvpcom<0xEC, "ub", int_x86_xop_vpcomub>;
+  defm VPCOMUW : xopvpcom<0xED, "uw", int_x86_xop_vpcomuw>;
+  defm VPCOMUD : xopvpcom<0xEE, "ud", int_x86_xop_vpcomud>;
+  defm VPCOMUQ : xopvpcom<0xEF, "uq", int_x86_xop_vpcomuq>;
+}
 
 // Instruction where either second or third source can be memory
 multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
@@ -197,20 +225,22 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR128:$dst,
              (Int VR128:$src1, VR128:$src2,
-              (bitconvert (memopv2i64 addr:$src3))))]>,
+              (bitconvert (loadv2i64 addr:$src3))))]>,
            XOP_4V, VEX_I8IMM, VEX_W, MemOp4;
   def mr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
            (ins VR128:$src1, i128mem:$src2, VR128:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR128:$dst,
-             (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+             (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
               VR128:$src3))]>,
            XOP_4V, VEX_I8IMM;
 }
 
-defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>;
-defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>;
+let ExeDomain = SSEPackedInt in {
+  defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>;
+  defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>;
+}
 
 multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
   def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst),
@@ -225,19 +255,20 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR256:$dst,
              (Int VR256:$src1, VR256:$src2,
-              (bitconvert (memopv4i64 addr:$src3))))]>,
+              (bitconvert (loadv4i64 addr:$src3))))]>,
            XOP_4V, VEX_I8IMM, VEX_W, MemOp4, VEX_L;
   def mrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
            (ins VR256:$src1, f256mem:$src2, VR256:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR256:$dst,
-             (Int VR256:$src1, (bitconvert (memopv4i64 addr:$src2)),
+             (Int VR256:$src1, (bitconvert (loadv4i64 addr:$src2)),
               VR256:$src3))]>,
            XOP_4V, VEX_I8IMM, VEX_L;
 }
 
-defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
+let ExeDomain = SSEPackedInt in
+  defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
 
 multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
                   Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
@@ -282,8 +313,11 @@ multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
         VEX_L;
 }
 
-defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd,
-                         int_x86_xop_vpermil2pd_256, memopv2f64, memopv4f64>;
-defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", int_x86_xop_vpermil2ps,
-                         int_x86_xop_vpermil2ps_256, memopv4f32, memopv8f32>;
+let ExeDomain = SSEPackedDouble in
+  defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd,
+                           int_x86_xop_vpermil2pd_256, loadv2f64, loadv4f64>;
+
+let ExeDomain = SSEPackedSingle in
+  defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", int_x86_xop_vpermil2ps,
+                           int_x86_xop_vpermil2ps_256, loadv4f32, loadv8f32>;
 
diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index b411d07..4af514a 100644
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1,4 +1,4 @@
-//===-- X86IntinsicsInfo.h - X86 Instrinsics ------------*- C++ -*-===//
+//===-- X86IntrinsicsInfo.h - X86 Intrinsics ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -21,8 +21,9 @@ enum IntrinsicType {
   GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
   INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
   CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
-  INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, FMA_OP_MASK, INTR_TYPE_SCALAR_MASK_RM,
-  COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, EXPAND_FROM_MEM, BLEND
+  INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, FMA_OP_MASK,
+  INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
+  EXPAND_FROM_MEM, BLEND
 };
 
 struct IntrinsicData {
@@ -171,77 +172,101 @@ static const IntrinsicData* getIntrinsicWithChain(unsigned IntNo) {
  * the alphabetical order.
  */
 static const IntrinsicData  IntrinsicsWithoutChain[] = {
-  X86_INTRINSIC_DATA(avx2_packssdw,     INTR_TYPE_2OP, X86ISD::PACKSS, 0),
-  X86_INTRINSIC_DATA(avx2_packsswb,     INTR_TYPE_2OP, X86ISD::PACKSS, 0),
-  X86_INTRINSIC_DATA(avx2_packusdw,     INTR_TYPE_2OP, X86ISD::PACKUS, 0),
-  X86_INTRINSIC_DATA(avx2_packuswb,     INTR_TYPE_2OP, X86ISD::PACKUS, 0),
-  X86_INTRINSIC_DATA(avx2_phadd_d,      INTR_TYPE_2OP, X86ISD::HADD, 0),
-  X86_INTRINSIC_DATA(avx2_phadd_w,      INTR_TYPE_2OP, X86ISD::HADD, 0),
-  X86_INTRINSIC_DATA(avx2_phsub_d,      INTR_TYPE_2OP, X86ISD::HSUB, 0),
-  X86_INTRINSIC_DATA(avx2_phsub_w,      INTR_TYPE_2OP, X86ISD::HSUB, 0),
-  X86_INTRINSIC_DATA(avx2_pmaxs_b,      INTR_TYPE_2OP, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx2_pmaxs_d,      INTR_TYPE_2OP, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx2_pmaxs_w,      INTR_TYPE_2OP, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx2_pmaxu_b,      INTR_TYPE_2OP, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx2_pmaxu_d,      INTR_TYPE_2OP, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx2_pmaxu_w,      INTR_TYPE_2OP, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx2_pmins_b,      INTR_TYPE_2OP, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx2_pmins_d,      INTR_TYPE_2OP, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx2_pmins_w,      INTR_TYPE_2OP, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx2_pminu_b,      INTR_TYPE_2OP, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx2_pminu_d,      INTR_TYPE_2OP, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx2_pminu_w,      INTR_TYPE_2OP, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx2_pmovsxbd,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx2_pmovsxbq,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx2_pmovsxbw,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx2_pmovsxdq,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx2_pmovsxwd,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx2_pmovsxwq,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx2_pmovzxbd,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx2_pmovzxbq,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx2_pmovzxbw,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx2_pmovzxdq,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx2_pmovzxwd,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx2_pmovzxwq,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx2_pmul_dq,      INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
-  X86_INTRINSIC_DATA(avx2_pmulh_w,      INTR_TYPE_2OP, ISD::MULHS, 0),
-  X86_INTRINSIC_DATA(avx2_pmulhu_w,     INTR_TYPE_2OP, ISD::MULHU, 0),
-  X86_INTRINSIC_DATA(avx2_pmulu_dq,     INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
-  X86_INTRINSIC_DATA(avx2_pshuf_b,      INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
-  X86_INTRINSIC_DATA(avx2_psign_b,      INTR_TYPE_2OP, X86ISD::PSIGN, 0),
-  X86_INTRINSIC_DATA(avx2_psign_d,      INTR_TYPE_2OP, X86ISD::PSIGN, 0),
-  X86_INTRINSIC_DATA(avx2_psign_w,      INTR_TYPE_2OP, X86ISD::PSIGN, 0),
-  X86_INTRINSIC_DATA(avx2_psll_d,       INTR_TYPE_2OP, X86ISD::VSHL, 0),
-  X86_INTRINSIC_DATA(avx2_psll_q,       INTR_TYPE_2OP, X86ISD::VSHL, 0),
-  X86_INTRINSIC_DATA(avx2_psll_w,       INTR_TYPE_2OP, X86ISD::VSHL, 0),
-  X86_INTRINSIC_DATA(avx2_pslli_d,      VSHIFT, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx2_pslli_q,      VSHIFT, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx2_pslli_w,      VSHIFT, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx2_psllv_d,      INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx2_psllv_d_256,  INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx2_psllv_q,      INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx2_psllv_q_256,  INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx2_psra_d,       INTR_TYPE_2OP, X86ISD::VSRA, 0),
-  X86_INTRINSIC_DATA(avx2_psra_w,       INTR_TYPE_2OP, X86ISD::VSRA, 0),
-  X86_INTRINSIC_DATA(avx2_psrai_d,      VSHIFT, X86ISD::VSRAI, 0),
-  X86_INTRINSIC_DATA(avx2_psrai_w,      VSHIFT, X86ISD::VSRAI, 0),
-  X86_INTRINSIC_DATA(avx2_psrav_d,      INTR_TYPE_2OP, ISD::SRA, 0),
-  X86_INTRINSIC_DATA(avx2_psrav_d_256,  INTR_TYPE_2OP, ISD::SRA, 0),
-  X86_INTRINSIC_DATA(avx2_psrl_d,       INTR_TYPE_2OP, X86ISD::VSRL, 0),
-  X86_INTRINSIC_DATA(avx2_psrl_q,       INTR_TYPE_2OP, X86ISD::VSRL, 0),
-  X86_INTRINSIC_DATA(avx2_psrl_w,       INTR_TYPE_2OP, X86ISD::VSRL, 0),
-  X86_INTRINSIC_DATA(avx2_psrli_d,      VSHIFT, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx2_psrli_q,      VSHIFT, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx2_psrli_w,      VSHIFT, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx2_psrlv_d,      INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx2_psrlv_d_256,  INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx2_psrlv_q,      INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx2_psrlv_q_256,  INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx2_psubus_b,     INTR_TYPE_2OP, X86ISD::SUBUS, 0),
-  X86_INTRINSIC_DATA(avx2_psubus_w,     INTR_TYPE_2OP, X86ISD::SUBUS, 0),
-  X86_INTRINSIC_DATA(avx2_vperm2i128,   INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
-  X86_INTRINSIC_DATA(avx512_exp2_pd,    INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0),
-  X86_INTRINSIC_DATA(avx512_exp2_ps,    INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0),
+  X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
+  X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
+  X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
+  X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
+  X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+  X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+  X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx2_pmovsxdq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx2_pmovsxwd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx2_pmovsxwq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx2_pmovzxbd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx2_pmovzxbq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx2_pmovzxbw, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx2_pmovzxdq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx2_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx2_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
+  X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
+  X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
+  X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
+  X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
+  X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
+  X86_INTRINSIC_DATA(avx2_psign_b, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+  X86_INTRINSIC_DATA(avx2_psign_d, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+  X86_INTRINSIC_DATA(avx2_psign_w, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+  X86_INTRINSIC_DATA(avx2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+  X86_INTRINSIC_DATA(avx2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+  X86_INTRINSIC_DATA(avx2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+  X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
+  X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
+  X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
+  X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+  X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+  X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
+  X86_INTRINSIC_DATA(avx2_psrai_w, VSHIFT, X86ISD::VSRAI, 0),
+  X86_INTRINSIC_DATA(avx2_psrav_d, INTR_TYPE_2OP, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx2_psrav_d_256, INTR_TYPE_2OP, ISD::SRA, 0),
+  X86_INTRINSIC_DATA(avx2_psrl_d, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+  X86_INTRINSIC_DATA(avx2_psrl_q, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+  X86_INTRINSIC_DATA(avx2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+  X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
+  X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
+  X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
+  X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
+  X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
+  X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+  X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
+  X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
+  X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_add_pd_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
+  X86ISD::FADD_RND),
+  X86_INTRINSIC_DATA(avx512_mask_add_ps_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_add_ps_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD,
+  X86ISD::FADD_RND),
+  X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FADD,
+  X86ISD::FADD_RND),
+  X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FADD,
+  X86ISD::FADD_RND),
+  X86_INTRINSIC_DATA(avx512_mask_and_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_and_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_and_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_and_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_and_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_and_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_andn_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_andn_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_andn_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_andn_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_andn_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_andn_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
   X86_INTRINSIC_DATA(avx512_mask_blend_b_128,  BLEND, X86ISD::SELECT, 0),
   X86_INTRINSIC_DATA(avx512_mask_blend_b_256,  BLEND, X86ISD::SELECT, 0),
   X86_INTRINSIC_DATA(avx512_mask_blend_b_512,  BLEND, X86ISD::SELECT, 0),
@@ -260,18 +285,26 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_blend_w_128,  BLEND, X86ISD::SELECT, 0),
   X86_INTRINSIC_DATA(avx512_mask_blend_w_256,  BLEND, X86ISD::SELECT, 0),
   X86_INTRINSIC_DATA(avx512_mask_blend_w_512,  BLEND, X86ISD::SELECT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_b_128,     CMP_MASK_CC,  X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_b_256,     CMP_MASK_CC,  X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_b_512,     CMP_MASK_CC,  X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_d_128,     CMP_MASK_CC,  X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_d_256,     CMP_MASK_CC,  X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_d_512,     CMP_MASK_CC,  X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_q_128,     CMP_MASK_CC,  X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_q_256,     CMP_MASK_CC,  X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_q_512,     CMP_MASK_CC,  X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_w_128,     CMP_MASK_CC,  X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_w_256,     CMP_MASK_CC,  X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_w_512,     CMP_MASK_CC,  X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_b_128,  CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_b_256,  CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_b_512,  CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_d_128,  CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_d_256,  CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_d_512,  CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM,
+                     X86ISD::CMPM_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM,
+                     X86ISD::CMPM_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_q_128,  CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_q_256,  CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_q_512,  CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_w_128,  CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_w_256,  CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_w_512,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_compress_d_128,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::COMPRESS, 0),
   X86_INTRINSIC_DATA(avx512_mask_compress_d_256,  COMPRESS_EXPAND_IN_REG,
@@ -297,6 +330,18 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_compress_q_512,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::COMPRESS, 0),
 
+  X86_INTRINSIC_DATA(avx512_mask_div_pd_128, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_div_pd_256, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_div_pd_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
+                     X86ISD::FDIV_RND),
+  X86_INTRINSIC_DATA(avx512_mask_div_ps_128, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_div_ps_256, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
+                     X86ISD::FDIV_RND),
+  X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV,
+  X86ISD::FDIV_RND),
+  X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV,
+  X86ISD::FDIV_RND),
   X86_INTRINSIC_DATA(avx512_mask_expand_d_128,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_d_256,  COMPRESS_EXPAND_IN_REG,
@@ -321,6 +366,96 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_q_512,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
+                     X86ISD::FMAX_RND),
+  X86_INTRINSIC_DATA(avx512_mask_max_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_max_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
+                     X86ISD::FMAX_RND),  
+  X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
+  X86ISD::FMAX_RND),
+  X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
+  X86ISD::FMAX_RND),
+  X86_INTRINSIC_DATA(avx512_mask_min_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_min_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
+                     X86ISD::FMIN_RND),
+  X86_INTRINSIC_DATA(avx512_mask_min_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_min_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
+                     X86ISD::FMIN_RND),  
+  X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
+  X86ISD::FMIN_RND),
+  X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
+  X86ISD::FMIN_RND),
+  X86_INTRINSIC_DATA(avx512_mask_mul_pd_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_mul_pd_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
+                     X86ISD::FMUL_RND),
+  X86_INTRINSIC_DATA(avx512_mask_mul_ps_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_mul_ps_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
+                     X86ISD::FMUL_RND),
+  X86_INTRINSIC_DATA(avx512_mask_mul_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FMUL,
+  X86ISD::FMUL_RND),
+  X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FMUL,
+  X86ISD::FMUL_RND),
+  X86_INTRINSIC_DATA(avx512_mask_or_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_or_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_or_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_or_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_or_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_or_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_packssdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_packssdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_packssdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_packsswb_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_packsswb_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_packsswb_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_packusdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_packusdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_packusdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_packuswb_128, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_packuswb_256, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_packuswb_512, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_b_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_b_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_b_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_d_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_d_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_d_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_q_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_q_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_q_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_w_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_w_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_w_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padds_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padds_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padds_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_paddus_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_paddus_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_paddus_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pand_q_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pand_q_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pand_q_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pandn_d_128, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pandn_d_256, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pandn_d_512, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pandn_q_128, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pandn_q_256, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pandn_q_512, INTR_TYPE_2OP_MASK, X86ISD::ANDNP, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128,  CMP_MASK,  X86ISD::PCMPEQM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256,  CMP_MASK,  X86ISD::PCMPEQM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512,  CMP_MASK,  X86ISD::PCMPEQM, 0),
@@ -345,6 +480,33 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128,  CMP_MASK,  X86ISD::PCMPGTM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256,  CMP_MASK,  X86ISD::PCMPGTM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512,  CMP_MASK,  X86ISD::PCMPGTM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::PMULDQ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::PMULDQ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::PMULDQ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmull_d_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmull_d_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmull_d_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmull_q_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmull_q_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmull_q_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmull_w_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmull_w_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmull_w_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::PMULUDQ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::PMULUDQ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::PMULUDQ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_por_d_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_por_d_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_por_d_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
   X86_INTRINSIC_DATA(avx512_mask_psll_d,        INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psll_q,        INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
   X86_INTRINSIC_DATA(avx512_mask_pslli_d,       VSHIFT_MASK, X86ISD::VSHLI, 0),
@@ -363,6 +525,52 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_psrli_q,       VSHIFT_MASK, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrlv_d,       INTR_TYPE_2OP_MASK, ISD::SRL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrlv_q,       INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_b_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_b_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_b_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_d_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_d_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_d_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_q_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_q_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_q_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_w_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_w_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_w_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psubs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psubs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psubs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psubus_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psubus_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psubus_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pxor_q_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pxor_q_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_sd,   INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::RNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_ss,   INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::RNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_sub_pd_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_sub_pd_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
+                     X86ISD::FSUB_RND),
+  X86_INTRINSIC_DATA(avx512_mask_sub_ps_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_sub_ps_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
+                     X86ISD::FSUB_RND),
+  X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FSUB,
+  X86ISD::FSUB_RND),
+  X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FSUB,
+  X86ISD::FSUB_RND),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_b_256,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_b_512,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
@@ -375,10 +583,16 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
+  X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_pd,   INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_ps,   INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
-  X86_INTRINSIC_DATA(avx512_rcp28_sd,   INTR_TYPE_SCALAR_MASK_RM,X86ISD::RCP28, 0),
-  X86_INTRINSIC_DATA(avx512_rcp28_ss,   INTR_TYPE_SCALAR_MASK_RM,X86ISD::RCP28, 0),
+  X86_INTRINSIC_DATA(avx512_rcp28_sd,   INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
+  X86_INTRINSIC_DATA(avx512_rcp28_ss,   INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
@@ -391,35 +605,85 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx_max_ps_256,    INTR_TYPE_2OP, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(avx_min_pd_256,    INTR_TYPE_2OP, X86ISD::FMIN, 0),
   X86_INTRINSIC_DATA(avx_min_ps_256,    INTR_TYPE_2OP, X86ISD::FMIN, 0),
+  X86_INTRINSIC_DATA(avx_rcp_ps_256,    INTR_TYPE_1OP, X86ISD::FRCP, 0),
+  X86_INTRINSIC_DATA(avx_rsqrt_ps_256,  INTR_TYPE_1OP, X86ISD::FRSQRT, 0),
   X86_INTRINSIC_DATA(avx_sqrt_pd_256,   INTR_TYPE_1OP, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx_sqrt_ps_256,   INTR_TYPE_1OP, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
   X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
   X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128,    FMA_OP_MASK, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256,    FMA_OP_MASK, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512,    FMA_OP_MASK, X86ISD::FMADD,
+                     X86ISD::FMADD_RND),
+  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128,    FMA_OP_MASK, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256,    FMA_OP_MASK, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512,    FMA_OP_MASK, X86ISD::FMADD,
+                     X86ISD::FMADD_RND),
   X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
   X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
+                     X86ISD::FMADDSUB_RND),
   X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
   X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128, FMA_OP_MASK, X86ISD::FMSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256, FMA_OP_MASK, X86ISD::FMSUB, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
+                     X86ISD::FMADDSUB_RND),
+  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128,    FMA_OP_MASK, X86ISD::FMSUB, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256,    FMA_OP_MASK, X86ISD::FMSUB, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512,    FMA_OP_MASK, X86ISD::FMSUB,
+                     X86ISD::FMSUB_RND),
+  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128,    FMA_OP_MASK, X86ISD::FMSUB, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256,    FMA_OP_MASK, X86ISD::FMSUB, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512,    FMA_OP_MASK, X86ISD::FMSUB,
+                     X86ISD::FMSUB_RND),
   X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
   X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, FMA_OP_MASK, X86ISD::FMSUBADD,
+                     X86ISD::FMSUBADD_RND),
   X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
   X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB , 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, FMA_OP_MASK, X86ISD::FMSUBADD,
+                     X86ISD::FMSUBADD_RND),
+  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128,   FMA_OP_MASK, X86ISD::FNMADD, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256,   FMA_OP_MASK, X86ISD::FNMADD, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512,   FMA_OP_MASK, X86ISD::FNMADD,
+                     X86ISD::FNMADD_RND),
+  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128,   FMA_OP_MASK, X86ISD::FNMADD, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256,   FMA_OP_MASK, X86ISD::FNMADD, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512,   FMA_OP_MASK, X86ISD::FNMADD,
+                     X86ISD::FNMADD_RND),
+  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512,   FMA_OP_MASK, X86ISD::FNMSUB,
+                     X86ISD::FNMSUB_RND),
+  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512,   FMA_OP_MASK, X86ISD::FNMSUB,
+                     X86ISD::FNMSUB_RND),
+  X86_INTRINSIC_DATA(fma_vfmadd_pd,        INTR_TYPE_3OP, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(fma_vfmadd_pd_256,    INTR_TYPE_3OP, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(fma_vfmadd_ps,        INTR_TYPE_3OP, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(fma_vfmadd_ps_256,    INTR_TYPE_3OP, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(fma_vfmaddsub_pd,     INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(fma_vfmaddsub_ps,     INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(fma_vfmaddsub_ps_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(fma_vfmsub_pd,        INTR_TYPE_3OP, X86ISD::FMSUB, 0),
+  X86_INTRINSIC_DATA(fma_vfmsub_pd_256,    INTR_TYPE_3OP, X86ISD::FMSUB, 0),
+  X86_INTRINSIC_DATA(fma_vfmsub_ps,        INTR_TYPE_3OP, X86ISD::FMSUB, 0),
+  X86_INTRINSIC_DATA(fma_vfmsub_ps_256,    INTR_TYPE_3OP, X86ISD::FMSUB, 0),
+  X86_INTRINSIC_DATA(fma_vfmsubadd_pd,     INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
+  X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
+  X86_INTRINSIC_DATA(fma_vfmsubadd_ps,     INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
+  X86_INTRINSIC_DATA(fma_vfmsubadd_ps_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
+  X86_INTRINSIC_DATA(fma_vfnmadd_pd,       INTR_TYPE_3OP, X86ISD::FNMADD, 0),
+  X86_INTRINSIC_DATA(fma_vfnmadd_pd_256,   INTR_TYPE_3OP, X86ISD::FNMADD, 0),
+  X86_INTRINSIC_DATA(fma_vfnmadd_ps,       INTR_TYPE_3OP, X86ISD::FNMADD, 0),
+  X86_INTRINSIC_DATA(fma_vfnmadd_ps_256,   INTR_TYPE_3OP, X86ISD::FNMADD, 0),
+  X86_INTRINSIC_DATA(fma_vfnmsub_pd,       INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(fma_vfnmsub_pd_256,   INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(fma_vfnmsub_ps,       INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(fma_vfnmsub_ps_256,   INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
   X86_INTRINSIC_DATA(sse2_comieq_sd,    COMI, X86ISD::COMI, ISD::SETEQ),
   X86_INTRINSIC_DATA(sse2_comige_sd,    COMI, X86ISD::COMI, ISD::SETGE),
   X86_INTRINSIC_DATA(sse2_comigt_sd,    COMI, X86ISD::COMI, ISD::SETGT),
@@ -501,6 +765,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse_comineq_ss,    COMI, X86ISD::COMI, ISD::SETNE),
   X86_INTRINSIC_DATA(sse_max_ps,        INTR_TYPE_2OP, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(sse_min_ps,        INTR_TYPE_2OP, X86ISD::FMIN, 0),
+  X86_INTRINSIC_DATA(sse_rcp_ps,        INTR_TYPE_1OP, X86ISD::FRCP, 0),
+  X86_INTRINSIC_DATA(sse_rsqrt_ps,      INTR_TYPE_1OP, X86ISD::FRSQRT, 0),
   X86_INTRINSIC_DATA(sse_sqrt_ps,       INTR_TYPE_1OP, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(sse_ucomieq_ss,    COMI, X86ISD::UCOMI, ISD::SETEQ),
   X86_INTRINSIC_DATA(sse_ucomige_ss,    COMI, X86ISD::UCOMI, ISD::SETGE),
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index 63ccded..556b518 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -30,6 +30,7 @@
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCStreamer.h"
@@ -74,11 +75,11 @@ namespace llvm {
   X86AsmPrinter::StackMapShadowTracker::~StackMapShadowTracker() {}
 
   void
-  X86AsmPrinter::StackMapShadowTracker::startFunction(MachineFunction &MF) {
+  X86AsmPrinter::StackMapShadowTracker::startFunction(MachineFunction &F) {
+    MF = &F;
     CodeEmitter.reset(TM.getTarget().createMCCodeEmitter(
-        *TM.getSubtargetImpl()->getInstrInfo(),
-        *TM.getSubtargetImpl()->getRegisterInfo(), *TM.getSubtargetImpl(),
-        MF.getContext()));
+        *MF->getSubtarget().getInstrInfo(),
+        *MF->getSubtarget().getRegisterInfo(), MF->getContext()));
   }
 
   void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
@@ -87,7 +88,7 @@ namespace llvm {
       SmallString<256> Code;
       SmallVector<MCFixup, 4> Fixups;
       raw_svector_ostream VecOS(Code);
-      CodeEmitter->EncodeInstruction(Inst, VecOS, Fixups, STI);
+      CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
       VecOS.flush();
       CurrentShadowSize += Code.size();
       if (CurrentShadowSize >= RequiredShadowSize)
@@ -100,20 +101,20 @@ namespace llvm {
     if (InShadow && CurrentShadowSize < RequiredShadowSize) {
       InShadow = false;
       EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
-               TM.getSubtarget<X86Subtarget>().is64Bit(), STI);
+               MF->getSubtarget<X86Subtarget>().is64Bit(), STI);
     }
   }
 
   void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
-    OutStreamer.EmitInstruction(Inst, getSubtargetInfo());
+    OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
     SMShadowTracker.count(Inst, getSubtargetInfo());
   }
 } // end llvm namespace
 
 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
                                X86AsmPrinter &asmprinter)
-: Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()),
-  MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
+    : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
+      AsmPrinter(asmprinter) {}
 
 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
   return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
@@ -124,7 +125,7 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
 /// operand to an MCSymbol.
 MCSymbol *X86MCInstLower::
 GetSymbolFromOperand(const MachineOperand &MO) const {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
   assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
 
   SmallString<128> Name;
@@ -154,14 +155,17 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
     const GlobalValue *GV = MO.getGlobal();
     AsmPrinter.getNameWithPrefix(Name, GV);
   } else if (MO.isSymbol()) {
-    getMang()->getNameWithPrefix(Name, MO.getSymbolName());
+    if (MO.getTargetFlags() == X86II::MO_NOPREFIX)
+      Name += MO.getSymbolName();
+    else
+      getMang()->getNameWithPrefix(Name, MO.getSymbolName());
   } else if (MO.isMBB()) {
     Name += MO.getMBB()->getSymbol()->getName();
   }
   unsigned OrigLen = Name.size() - PrefixLen;
 
   Name += Suffix;
-  MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
+  MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
 
   StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
 
@@ -208,7 +212,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
     } else {
       StubSym =
         MachineModuleInfoImpl::
-        StubValueTy(Ctx.GetOrCreateSymbol(OrigName), false);
+        StubValueTy(Ctx.getOrCreateSymbol(OrigName), false);
     }
     break;
   }
@@ -231,6 +235,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   case X86II::MO_DARWIN_NONLAZY:
   case X86II::MO_DLLIMPORT:
   case X86II::MO_DARWIN_STUB:
+  case X86II::MO_NOPREFIX:
     break;
 
   case X86II::MO_TLVP:      RefKind = MCSymbolRefExpr::VK_TLVP; break;
@@ -270,8 +275,8 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
       // relocations the assembler will generate for differences between
       // local labels. This is only safe when the symbols are in the same
       // section so we are restricting it to jumptable references.
-      MCSymbol *Label = Ctx.CreateTempSymbol();
-      AsmPrinter.OutStreamer.EmitAssignment(Label, Expr);
+      MCSymbol *Label = Ctx.createTempSymbol();
+      AsmPrinter.OutStreamer->EmitAssignment(Label, Expr);
       Expr = MCSymbolRefExpr::Create(Label, Ctx);
     }
     break;
@@ -284,7 +289,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
     Expr = MCBinaryExpr::CreateAdd(Expr,
                                    MCConstantExpr::Create(MO.getOffset(), Ctx),
                                    Ctx);
-  return MCOperand::CreateExpr(Expr);
+  return MCOperand::createExpr(Expr);
 }
 
 
@@ -408,10 +413,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     case MachineOperand::MO_Register:
       // Ignore all implicit register operands.
       if (MO.isImplicit()) continue;
-      MCOp = MCOperand::CreateReg(MO.getReg());
+      MCOp = MCOperand::createReg(MO.getReg());
       break;
     case MachineOperand::MO_Immediate:
-      MCOp = MCOperand::CreateImm(MO.getImm());
+      MCOp = MCOperand::createImm(MO.getImm());
       break;
     case MachineOperand::MO_MachineBasicBlock:
     case MachineOperand::MO_GlobalAddress:
@@ -509,6 +514,7 @@ ReSimplify:
   // inputs modeled as normal uses instead of implicit uses.  As such, truncate
   // off all but the first operand (the callee).  FIXME: Change isel.
   case X86::TAILJMPr64:
+  case X86::TAILJMPr64_REX:
   case X86::CALL64r:
   case X86::CALL64pcrel32: {
     unsigned Opcode = OutMI.getOpcode();
@@ -682,7 +688,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
 
   bool needsPadding = MI.getOpcode() == X86::TLS_addr64;
 
-  MCContext &context = OutStreamer.getContext();
+  MCContext &context = OutStreamer->getContext();
 
   if (needsPadding)
     EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
@@ -709,28 +715,28 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
   MCInst LEA;
   if (is64Bits) {
     LEA.setOpcode(X86::LEA64r);
-    LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest
-    LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base
-    LEA.addOperand(MCOperand::CreateImm(1));        // scale
-    LEA.addOperand(MCOperand::CreateReg(0));        // index
-    LEA.addOperand(MCOperand::CreateExpr(symRef));  // disp
-    LEA.addOperand(MCOperand::CreateReg(0));        // seg
+    LEA.addOperand(MCOperand::createReg(X86::RDI)); // dest
+    LEA.addOperand(MCOperand::createReg(X86::RIP)); // base
+    LEA.addOperand(MCOperand::createImm(1));        // scale
+    LEA.addOperand(MCOperand::createReg(0));        // index
+    LEA.addOperand(MCOperand::createExpr(symRef));  // disp
+    LEA.addOperand(MCOperand::createReg(0));        // seg
   } else if (SRVK == MCSymbolRefExpr::VK_TLSLDM) {
     LEA.setOpcode(X86::LEA32r);
-    LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
-    LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // base
-    LEA.addOperand(MCOperand::CreateImm(1));        // scale
-    LEA.addOperand(MCOperand::CreateReg(0));        // index
-    LEA.addOperand(MCOperand::CreateExpr(symRef));  // disp
-    LEA.addOperand(MCOperand::CreateReg(0));        // seg
+    LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest
+    LEA.addOperand(MCOperand::createReg(X86::EBX)); // base
+    LEA.addOperand(MCOperand::createImm(1));        // scale
+    LEA.addOperand(MCOperand::createReg(0));        // index
+    LEA.addOperand(MCOperand::createExpr(symRef));  // disp
+    LEA.addOperand(MCOperand::createReg(0));        // seg
   } else {
     LEA.setOpcode(X86::LEA32r);
-    LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
-    LEA.addOperand(MCOperand::CreateReg(0));        // base
-    LEA.addOperand(MCOperand::CreateImm(1));        // scale
-    LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index
-    LEA.addOperand(MCOperand::CreateExpr(symRef));  // disp
-    LEA.addOperand(MCOperand::CreateReg(0));        // seg
+    LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest
+    LEA.addOperand(MCOperand::createReg(0));        // base
+    LEA.addOperand(MCOperand::createImm(1));        // scale
+    LEA.addOperand(MCOperand::createReg(X86::EBX)); // index
+    LEA.addOperand(MCOperand::createExpr(symRef));  // disp
+    LEA.addOperand(MCOperand::createReg(0));        // seg
   }
   EmitAndCountInstruction(LEA);
 
@@ -741,7 +747,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
   }
 
   StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
-  MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
+  MCSymbol *tlsGetAddr = context.getOrCreateSymbol(name);
   const MCSymbolRefExpr *tlsRef =
     MCSymbolRefExpr::Create(tlsGetAddr,
                             MCSymbolRefExpr::VK_PLT,
@@ -803,51 +809,53 @@ static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, const MCSu
   } // while (NumBytes)
 }
 
-static void LowerSTATEPOINT(MCStreamer &OS, StackMaps &SM,
-                            const MachineInstr &MI, bool Is64Bit,
-                            const TargetMachine& TM,
-                            const MCSubtargetInfo& STI,
-                            X86MCInstLower &MCInstLowering) {
-  assert(Is64Bit && "Statepoint currently only supports X86-64");
-
-  // Lower call target and choose correct opcode
-  const MachineOperand &call_target = StatepointOpers(&MI).getCallTarget();
-  MCOperand call_target_mcop;
-  unsigned call_opcode;
-  switch (call_target.getType()) {
-  case MachineOperand::MO_GlobalAddress:
-  case MachineOperand::MO_ExternalSymbol:
-    call_target_mcop = MCInstLowering.LowerSymbolOperand(
-      call_target,
-      MCInstLowering.GetSymbolFromOperand(call_target));
-    call_opcode = X86::CALL64pcrel32;
-    // Currently, we only support relative addressing with statepoints.
-    // Otherwise, we'll need a scratch register to hold the target
-    // address.  You'll fail asserts during load & relocation if this
-    // symbol is to far away. (TODO: support non-relative addressing)
-    break;
-  case MachineOperand::MO_Immediate:
-    call_target_mcop = MCOperand::CreateImm(call_target.getImm());
-    call_opcode = X86::CALL64pcrel32;
-    // Currently, we only support relative addressing with statepoints.
-    // Otherwise, we'll need a scratch register to hold the target
-    // immediate.  You'll fail asserts during load & relocation if this
-    // address is to far away. (TODO: support non-relative addressing)
-    break;
-  case MachineOperand::MO_Register:
-    call_target_mcop = MCOperand::CreateReg(call_target.getReg());
-    call_opcode = X86::CALL64r;
-    break;
-  default:
-    llvm_unreachable("Unsupported operand type in statepoint call target");
-    break;
-  }
+void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
+                                    X86MCInstLower &MCIL) {
+  assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
+
+  StatepointOpers SOpers(&MI);
+  if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
+    EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(),
+             getSubtargetInfo());
+  } else {
+    // Lower call target and choose correct opcode
+    const MachineOperand &CallTarget = SOpers.getCallTarget();
+    MCOperand CallTargetMCOp;
+    unsigned CallOpcode;
+    switch (CallTarget.getType()) {
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+      CallTargetMCOp = MCIL.LowerSymbolOperand(
+          CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
+      CallOpcode = X86::CALL64pcrel32;
+      // Currently, we only support relative addressing with statepoints.
+      // Otherwise, we'll need a scratch register to hold the target
+      // address.  You'll fail asserts during load & relocation if this
+      // symbol is to far away. (TODO: support non-relative addressing)
+      break;
+    case MachineOperand::MO_Immediate:
+      CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
+      CallOpcode = X86::CALL64pcrel32;
+      // Currently, we only support relative addressing with statepoints.
+      // Otherwise, we'll need a scratch register to hold the target
+      // immediate.  You'll fail asserts during load & relocation if this
+      // address is to far away. (TODO: support non-relative addressing)
+      break;
+    case MachineOperand::MO_Register:
+      CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
+      CallOpcode = X86::CALL64r;
+      break;
+    default:
+      llvm_unreachable("Unsupported operand type in statepoint call target");
+      break;
+    }
 
-  // Emit call
-  MCInst call_inst;
-  call_inst.setOpcode(call_opcode);
-  call_inst.addOperand(call_target_mcop);
-  OS.EmitInstruction(call_inst, STI);
+    // Emit call
+    MCInst CallInst;
+    CallInst.setOpcode(CallOpcode);
+    CallInst.addOperand(CallTargetMCOp);
+    OutStreamer->EmitInstruction(CallInst, getSubtargetInfo());
+  }
 
   // Record our statepoint node in the same section used by STACKMAP
   // and PATCHPOINT
@@ -858,7 +866,7 @@ static void LowerSTATEPOINT(MCStreamer &OS, StackMaps &SM,
 // Lower a stackmap of the form:
 // <id>, <shadowBytes>, ...
 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
-  SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo());
+  SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
   SM.recordStackMap(MI);
   unsigned NumShadowBytes = MI.getOperand(1).getImm();
   SMShadowTracker.reset(NumShadowBytes);
@@ -866,18 +874,40 @@ void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
 
 // Lower a patchpoint of the form:
 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
-void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI) {
+void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
+                                    X86MCInstLower &MCIL) {
   assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
 
-  SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo());
+  SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
 
   SM.recordPatchPoint(MI);
 
   PatchPointOpers opers(&MI);
   unsigned ScratchIdx = opers.getNextScratchIdx();
   unsigned EncodedBytes = 0;
-  int64_t CallTarget = opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
-  if (CallTarget) {
+  const MachineOperand &CalleeMO =
+    opers.getMetaOper(PatchPointOpers::TargetPos);
+
+  // Check for null target. If target is non-null (i.e. is non-zero or is
+  // symbolic) then emit a call.
+  if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
+    MCOperand CalleeMCOp;
+    switch (CalleeMO.getType()) {
+    default:
+      /// FIXME: Add a verifier check for bad callee types.
+      llvm_unreachable("Unrecognized callee operand type.");
+    case MachineOperand::MO_Immediate:
+      if (CalleeMO.getImm())
+        CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+    case MachineOperand::MO_GlobalAddress:
+      CalleeMCOp =
+        MCIL.LowerSymbolOperand(CalleeMO,
+                                MCIL.GetSymbolFromOperand(CalleeMO));
+      break;
+    }
+
     // Emit MOV to materialize the target address and the CALL to target.
     // This is encoded with 12-13 bytes, depending on which register is used.
     unsigned ScratchReg = MI.getOperand(ScratchIdx).getReg();
@@ -885,16 +915,18 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI) {
       EncodedBytes = 13;
     else
       EncodedBytes = 12;
-    EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri).addReg(ScratchReg)
-                                                       .addImm(CallTarget));
+
+    EmitAndCountInstruction(
+        MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
     EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
   }
+
   // Emit padding.
   unsigned NumBytes = opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
   assert(NumBytes >= EncodedBytes &&
          "Patchpoint can't request size less than the length of a call.");
 
-  EmitNops(OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(),
+  EmitNops(*OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(),
            getSubtargetInfo());
 }
 
@@ -988,8 +1020,7 @@ static std::string getShuffleComment(const MachineOperand &DstOp,
 
 void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   X86MCInstLower MCInstLowering(*MF, *this);
-  const X86RegisterInfo *RI = static_cast<const X86RegisterInfo *>(
-      TM.getSubtargetImpl()->getRegisterInfo());
+  const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo();
 
   switch (MI->getOpcode()) {
   case TargetOpcode::DBG_VALUE:
@@ -997,7 +1028,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
   // Emit nothing here but a comment if we can.
   case X86::Int_MemBarrier:
-    OutStreamer.emitRawComment("MEMBARRIER");
+    OutStreamer->emitRawComment("MEMBARRIER");
     return;
 
 
@@ -1005,15 +1036,21 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case X86::EH_RETURN64: {
     // Lower these as normal, but add some comments.
     unsigned Reg = MI->getOperand(0).getReg();
-    OutStreamer.AddComment(StringRef("eh_return, addr: %") +
-                           X86ATTInstPrinter::getRegisterName(Reg));
+    OutStreamer->AddComment(StringRef("eh_return, addr: %") +
+                            X86ATTInstPrinter::getRegisterName(Reg));
     break;
   }
   case X86::TAILJMPr:
+  case X86::TAILJMPm:
   case X86::TAILJMPd:
+  case X86::TAILJMPr64:
+  case X86::TAILJMPm64:
   case X86::TAILJMPd64:
+  case X86::TAILJMPr64_REX:
+  case X86::TAILJMPm64_REX:
+  case X86::TAILJMPd64_REX:
     // Lower these as normal, but add some comments.
-    OutStreamer.AddComment("TAILCALL");
+    OutStreamer->AddComment("TAILCALL");
     break;
 
   case X86::TLS_addr32:
@@ -1037,7 +1074,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
 
     // Emit the label.
-    OutStreamer.EmitLabel(PICBase);
+    OutStreamer->EmitLabel(PICBase);
 
     // popl $reg
     EmitAndCountInstruction(MCInstBuilder(X86::POP32r)
@@ -1057,8 +1094,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     //   MYGLOBAL + (. - PICBASE)
     // However, we can't generate a ".", so just emit a new label here and refer
     // to it.
-    MCSymbol *DotSym = OutContext.CreateTempSymbol();
-    OutStreamer.EmitLabel(DotSym);
+    MCSymbol *DotSym = OutContext.createTempSymbol();
+    OutStreamer->EmitLabel(DotSym);
 
     // Now that we have emitted the label, lower the complex operand expression.
     MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
@@ -1078,14 +1115,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
   case TargetOpcode::STATEPOINT:
-    return LowerSTATEPOINT(OutStreamer, SM, *MI, Subtarget->is64Bit(), TM,
-                           getSubtargetInfo(), MCInstLowering);
+    return LowerSTATEPOINT(*MI, MCInstLowering);
 
   case TargetOpcode::STACKMAP:
     return LowerSTACKMAP(*MI);
 
   case TargetOpcode::PATCHPOINT:
-    return LowerPATCHPOINT(*MI);
+    return LowerPATCHPOINT(*MI, MCInstLowering);
 
   case X86::MORESTACK_RET:
     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
@@ -1100,34 +1136,34 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
 
   case X86::SEH_PushReg:
-    OutStreamer.EmitWinCFIPushReg(RI->getSEHRegNum(MI->getOperand(0).getImm()));
+    OutStreamer->EmitWinCFIPushReg(RI->getSEHRegNum(MI->getOperand(0).getImm()));
     return;
 
   case X86::SEH_SaveReg:
-    OutStreamer.EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
-                                  MI->getOperand(1).getImm());
+    OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
+                                   MI->getOperand(1).getImm());
     return;
 
   case X86::SEH_SaveXMM:
-    OutStreamer.EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
-                                  MI->getOperand(1).getImm());
+    OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
+                                   MI->getOperand(1).getImm());
     return;
 
   case X86::SEH_StackAlloc:
-    OutStreamer.EmitWinCFIAllocStack(MI->getOperand(0).getImm());
+    OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
     return;
 
   case X86::SEH_SetFrame:
-    OutStreamer.EmitWinCFISetFrame(RI->getSEHRegNum(MI->getOperand(0).getImm()),
-                                   MI->getOperand(1).getImm());
+    OutStreamer->EmitWinCFISetFrame(RI->getSEHRegNum(MI->getOperand(0).getImm()),
+                                    MI->getOperand(1).getImm());
     return;
 
   case X86::SEH_PushFrame:
-    OutStreamer.EmitWinCFIPushFrame(MI->getOperand(0).getImm());
+    OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
     return;
 
   case X86::SEH_EndPrologue:
-    OutStreamer.EmitWinCFIEndProlog();
+    OutStreamer->EmitWinCFIEndProlog();
     return;
 
   case X86::SEH_Epilogue: {
@@ -1151,7 +1187,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case X86::PSHUFBrm:
   case X86::VPSHUFBrm:
   case X86::VPSHUFBYrm: {
-    if (!OutStreamer.isVerboseAsm())
+    if (!OutStreamer->isVerboseAsm())
       break;
     assert(MI->getNumOperands() > 5 &&
            "We should always have at least 5 operands!");
@@ -1163,7 +1199,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       SmallVector<int, 16> Mask;
       DecodePSHUFBMask(C, Mask);
       if (!Mask.empty())
-        OutStreamer.AddComment(getShuffleComment(DstOp, SrcOp, Mask));
+        OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask));
     }
     break;
   }
@@ -1171,7 +1207,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case X86::VPERMILPDrm:
   case X86::VPERMILPSYrm:
   case X86::VPERMILPDYrm: {
-    if (!OutStreamer.isVerboseAsm())
+    if (!OutStreamer->isVerboseAsm())
       break;
     assert(MI->getNumOperands() > 5 &&
            "We should always have at least 5 operands!");
@@ -1183,7 +1219,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       SmallVector<int, 16> Mask;
       DecodeVPERMILPMask(C, Mask);
       if (!Mask.empty())
-        OutStreamer.AddComment(getShuffleComment(DstOp, SrcOp, Mask));
+        OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask));
     }
     break;
   }
@@ -1208,7 +1244,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case X86::MOVDQUrm:
   case X86::VMOVDQUrm:
   case X86::VMOVDQUYrm:
-    if (!OutStreamer.isVerboseAsm())
+    if (!OutStreamer->isVerboseAsm())
       break;
     if (MI->getNumOperands() > 4)
     if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
@@ -1231,7 +1267,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
             CS << "?";
         }
         CS << "]";
-        OutStreamer.AddComment(CS.str());
+        OutStreamer->AddComment(CS.str());
       } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
         CS << "<";
         for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
@@ -1251,7 +1287,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
           }
         }
         CS << ">";
-        OutStreamer.AddComment(CS.str());
+        OutStreamer->AddComment(CS.str());
       }
     }
     break;
@@ -1269,9 +1305,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     SMShadowTracker.count(TmpInst, getSubtargetInfo());
     // Then flush the shadow so that we fill with nops before the call, not
     // after it.
-    SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo());
+    SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
     // Then emit the call
-    OutStreamer.EmitInstruction(TmpInst, getSubtargetInfo());
+    OutStreamer->EmitInstruction(TmpInst, getSubtargetInfo());
     return;
   }
 
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 9fd03a7..d598b55 100644
--- a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -50,6 +50,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   /// ReturnAddrIndex - FrameIndex for return slot.
   int ReturnAddrIndex;
 
+  /// \brief FrameIndex for return slot.
+  int FrameAddrIndex;
+
   /// TailCallReturnAddrDelta - The number of bytes by which return address
   /// stack slot is moved as the result of tail call optimization.
   int TailCallReturnAddrDelta;
@@ -92,6 +95,7 @@ public:
                              CalleeSavedFrameSize(0),
                              BytesToPopOnReturn(0),
                              ReturnAddrIndex(0),
+                             FrameAddrIndex(0),
                              TailCallReturnAddrDelta(0),
                              SRetReturnReg(0),
                              GlobalBaseReg(0),
@@ -109,6 +113,7 @@ public:
       CalleeSavedFrameSize(0),
       BytesToPopOnReturn(0),
       ReturnAddrIndex(0),
+      FrameAddrIndex(0),
       TailCallReturnAddrDelta(0),
       SRetReturnReg(0),
       GlobalBaseReg(0),
@@ -139,6 +144,9 @@ public:
   int getRAIndex() const { return ReturnAddrIndex; }
   void setRAIndex(int Index) { ReturnAddrIndex = Index; }
 
+  int getFAIndex() const { return FrameAddrIndex; }
+  void setFAIndex(int Index) { FrameAddrIndex = Index; }
+
   int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
   void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
 
diff --git a/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp b/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
index adc05b2..143e70b 100644
--- a/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -51,7 +51,7 @@ namespace {
   struct PadShortFunc : public MachineFunctionPass {
     static char ID;
     PadShortFunc() : MachineFunctionPass(ID)
-                   , Threshold(4), TM(nullptr), TII(nullptr) {}
+                   , Threshold(4), STI(nullptr), TII(nullptr) {}
 
     bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -79,7 +79,7 @@ namespace {
     // VisitedBBs - Cache of previously visited BBs.
     DenseMap<MachineBasicBlock*, VisitedBBInfo> VisitedBBs;
 
-    const TargetMachine *TM;
+    const X86Subtarget *STI;
     const TargetInstrInfo *TII;
   };
 
@@ -93,19 +93,16 @@ FunctionPass *llvm::createX86PadShortFunctions() {
 /// runOnMachineFunction - Loop over all of the basic blocks, inserting
 /// NOOP instructions before early exits.
 bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
-  const AttributeSet &FnAttrs = MF.getFunction()->getAttributes();
-  if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
-                           Attribute::OptimizeForSize) ||
-      FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
-                           Attribute::MinSize)) {
+  if (MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
+      MF.getFunction()->hasFnAttribute(Attribute::MinSize)) {
     return false;
   }
 
-  TM = &MF.getTarget();
-  if (!TM->getSubtarget<X86Subtarget>().padShortFunctions())
+  STI = &MF.getSubtarget<X86Subtarget>();
+  if (!STI->padShortFunctions())
     return false;
 
-  TII = TM->getSubtargetImpl()->getInstrInfo();
+  TII = STI->getInstrInfo();
 
   // Search through basic blocks and mark the ones that have early returns
   ReturnBBs.clear();
@@ -195,8 +192,7 @@ bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
       return true;
     }
 
-    CyclesToEnd += TII->getInstrLatency(
-        TM->getSubtargetImpl()->getInstrItineraryData(), MI);
+    CyclesToEnd += TII->getInstrLatency(STI->getInstrItineraryData(), MI);
   }
 
   VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 0fa38f4..1f36163 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86RegisterInfo.h"
+#include "X86FrameLowering.h"
 #include "X86InstrBuilder.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
@@ -53,34 +54,35 @@ static cl::opt<bool>
 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
           cl::desc("Enable use of a base pointer for complex stack frames"));
 
-X86RegisterInfo::X86RegisterInfo(const X86Subtarget &STI)
-    : X86GenRegisterInfo(
-          (STI.is64Bit() ? X86::RIP : X86::EIP),
-          X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), false),
-          X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), true),
-          (STI.is64Bit() ? X86::RIP : X86::EIP)),
-      Subtarget(STI) {
+X86RegisterInfo::X86RegisterInfo(const Triple &TT)
+    : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
+                         X86_MC::getDwarfRegFlavour(TT, false),
+                         X86_MC::getDwarfRegFlavour(TT, true),
+                         (TT.isArch64Bit() ? X86::RIP : X86::EIP)) {
   X86_MC::InitLLVM2SEHRegisterMapping(this);
 
   // Cache some information.
-  Is64Bit = Subtarget.is64Bit();
-  IsWin64 = Subtarget.isTargetWin64();
+  Is64Bit = TT.isArch64Bit();
+  IsWin64 = Is64Bit && TT.isOSWindows();
 
+  // Use a callee-saved register as the base pointer.  These registers must
+  // not conflict with any ABI requirements.  For example, in 32-bit mode PIC
+  // requires GOT in the EBX register before function calls via PLT GOT pointer.
   if (Is64Bit) {
     SlotSize = 8;
-    StackPtr = (Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64()) ?
-        X86::RSP : X86::ESP;
-    FramePtr = (Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64()) ?
-        X86::RBP : X86::EBP;
+    // This matches the simplified 32-bit pointer code in the data layout
+    // computation.
+    // FIXME: Should use the data layout?
+    bool Use64BitReg = TT.getEnvironment() != Triple::GNUX32;
+    StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
+    FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
+    BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
   } else {
     SlotSize = 4;
     StackPtr = X86::ESP;
     FramePtr = X86::EBP;
+    BasePtr = X86::ESI;
   }
-  // Use a callee-saved register as the base pointer.  These registers must
-  // not conflict with any ABI requirements.  For example, in 32-bit mode PIC
-  // requires GOT in the EBX register before function calls via PLT GOT pointer.
-  BasePtr = Is64Bit ? X86::RBX : X86::ESI;
 }
 
 bool
@@ -119,8 +121,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
   return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
 }
 
-const TargetRegisterClass*
-X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
+const TargetRegisterClass *
+X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+                                           const MachineFunction &MF) const {
   // Don't allow super-classes of GR8_NOREX.  This class is only used after
   // extracting sub_8bit_hi sub-registers.  The H sub-registers cannot be copied
   // to the full GR8 register class in 64-bit mode, so we cannot allow the
@@ -160,6 +163,7 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
 const TargetRegisterClass *
 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
                                     unsigned Kind) const {
+  const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
   switch (Kind) {
   default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
   case 0: // Normal GPRs.
@@ -171,9 +175,9 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
       return &X86::GR64_NOSPRegClass;
     return &X86::GR32_NOSPRegClass;
   case 2: // Available for tailcall (not callee-saved GPRs).
-    if (Subtarget.isTargetWin64())
+    if (IsWin64)
       return &X86::GR64_TCW64RegClass;
-    else if (Subtarget.is64Bit())
+    else if (Is64Bit)
       return &X86::GR64_TCRegClass;
 
     const Function *F = MF.getFunction();
@@ -209,7 +213,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   case X86::GR64RegClassID:
     return 12 - FPDiff;
   case X86::VR128RegClassID:
-    return Subtarget.is64Bit() ? 10 : 4;
+    return Is64Bit ? 10 : 4;
   case X86::VR64RegClassID:
     return 4;
   }
@@ -217,8 +221,10 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
 
 const MCPhysReg *
 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
   bool HasAVX = Subtarget.hasAVX();
   bool HasAVX512 = Subtarget.hasAVX512();
+  bool CallsEHReturn = MF->getMMI().callsEHReturn();
 
   assert(MF && "MachineFunction required");
   switch (MF->getFunction()->getCallingConv()) {
@@ -252,11 +258,16 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     if (Is64Bit)
       return CSR_64_MostRegs_SaveList;
     break;
+  case CallingConv::X86_64_Win64:
+    return CSR_Win64_SaveList;
+  case CallingConv::X86_64_SysV:
+    if (CallsEHReturn)
+      return CSR_64EHRet_SaveList;
+    return CSR_64_SaveList;
   default:
     break;
   }
 
-  bool CallsEHReturn = MF->getMMI().callsEHReturn();
   if (Is64Bit) {
     if (IsWin64)
       return CSR_Win64_SaveList;
@@ -269,8 +280,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   return CSR_32_SaveList;
 }
 
-const uint32_t*
-X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+                                      CallingConv::ID CC) const {
+  const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
   bool HasAVX = Subtarget.hasAVX();
   bool HasAVX512 = Subtarget.hasAVX512();
 
@@ -307,6 +320,10 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
     break;
   default:
     break;
+  case CallingConv::X86_64_Win64:
+    return CSR_Win64_RegMask;
+  case CallingConv::X86_64_SysV:
+    return CSR_64_RegMask;
   }
 
   // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
@@ -348,13 +365,15 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   // Set the base-pointer register and its aliases as reserved if needed.
   if (hasBasePointer(MF)) {
     CallingConv::ID CC = MF.getFunction()->getCallingConv();
-    const uint32_t* RegMask = getCallPreservedMask(CC);
+    const uint32_t *RegMask = getCallPreservedMask(MF, CC);
     if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
       report_fatal_error(
         "Stack realignment in presence of dynamic allocas is not supported with"
         "this calling convention.");
 
-    for (MCSubRegIterator I(getBaseRegister(), this, /*IncludeSelf=*/true);
+    unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), MVT::i64,
+                                              false);
+    for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true);
          I.isValid(); ++I)
       Reserved.set(*I);
   }
@@ -390,7 +409,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
         Reserved.set(*AI);
     }
   }
-  if (!Is64Bit || !Subtarget.hasAVX512()) {
+  if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
     for (unsigned n = 16; n != 32; ++n) {
       for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
         Reserved.set(*AI);
@@ -445,10 +464,8 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   const Function *F = MF.getFunction();
   unsigned StackAlign =
     MF.getSubtarget().getFrameLowering()->getStackAlignment();
-  bool requiresRealignment =
-    ((MFI->getMaxAlignment() > StackAlign) ||
-     F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                     Attribute::StackAlignment));
+  bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+                              F->hasFnAttribute(Attribute::StackAlignment));
 
   // If we've requested that we force align the stack do so now.
   if (ForceStackAlign)
@@ -475,7 +492,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   unsigned BasePtr;
 
   unsigned Opc = MI.getOpcode();
-  bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
+  bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm ||
+                    Opc == X86::TCRETURNmi || Opc == X86::TCRETURNmi64;
   if (hasBasePointer(MF))
     BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister());
   else if (needsStackRealignment(MF))
@@ -485,6 +503,24 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   else
     BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
 
+  // FRAME_ALLOC uses a single offset, with no register. It only works in the
+  // simple FP case, and doesn't work with stack realignment. On 32-bit, the
+  // offset is from the traditional base pointer location.  On 64-bit, the
+  // offset is from the SP at the end of the prologue, not the FP location. This
+  // matches the behavior of llvm.frameaddress.
+  if (Opc == TargetOpcode::FRAME_ALLOC) {
+    MachineOperand &FI = MI.getOperand(FIOperandNum);
+    bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+    int Offset;
+    if (IsWinEH)
+      Offset = static_cast<const X86FrameLowering *>(TFI)
+                     ->getFrameIndexOffsetFromSP(MF, FrameIndex);
+    else
+      Offset = TFI->getFrameIndexOffset(MF, FrameIndex);
+    FI.ChangeToImmediate(Offset);
+    return;
+  }
+
   // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
   // register as source operand, semantic is the same and destination is
   // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
@@ -536,8 +572,9 @@ unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return TFI->hasFP(MF) ? FramePtr : StackPtr;
 }
 
-unsigned X86RegisterInfo::getPtrSizedFrameRegister(
-    const MachineFunction &MF) const {
+unsigned
+X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
+  const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
   unsigned FrameReg = getFrameRegister(MF);
   if (Subtarget.isTarget64BitILP32())
     FrameReg = getX86SubSuperRegister(FrameReg, MVT::i32, false);
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
index 406b1fc..a714c2a 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -20,14 +20,9 @@
 #include "X86GenRegisterInfo.inc"
 
 namespace llvm {
-  class Type;
-  class TargetInstrInfo;
-  class X86Subtarget;
+  class Triple;
 
 class X86RegisterInfo final : public X86GenRegisterInfo {
-public:
-  const X86Subtarget &Subtarget;
-
 private:
   /// Is64Bit - Is the target 64-bits.
   ///
@@ -55,7 +50,7 @@ private:
   unsigned BasePtr;
 
 public:
-  X86RegisterInfo(const X86Subtarget &STI);
+  X86RegisterInfo(const Triple &TT);
 
   // FIXME: This should be tablegen'd like getDwarfRegNum is
   int getSEHRegNum(unsigned i) const;
@@ -76,8 +71,9 @@ public:
   getSubClassWithSubReg(const TargetRegisterClass *RC,
                         unsigned Idx) const override;
 
-  const TargetRegisterClass*
-  getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+  const TargetRegisterClass *
+  getLargestLegalSuperClass(const TargetRegisterClass *RC,
+                            const MachineFunction &MF) const override;
 
   /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
   /// values.
@@ -98,7 +94,8 @@ public:
   /// callee-save registers on this target.
   const MCPhysReg *
   getCalleeSavedRegs(const MachineFunction* MF) const override;
-  const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+  const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+                                       CallingConv::ID) const override;
   const uint32_t *getNoPreservedMask() const;
 
   /// getReservedRegs - Returns a bitset indexed by physical register number
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
index 30cd09a..2e735fa 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -325,7 +325,7 @@ def GR8 : RegisterClass<"X86", [i8],  8,
                              R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
   let AltOrders = [(sub GR8, AH, BH, CH, DH)];
   let AltOrderSelect = [{
-    return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
+    return MF.getSubtarget<X86Subtarget>().is64Bit();
   }];
 }
 
@@ -377,7 +377,7 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8,
                               (add AL, CL, DL, AH, CH, DH, BL, BH)> {
   let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
   let AltOrderSelect = [{
-    return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
+    return MF.getSubtarget<X86Subtarget>().is64Bit();
   }];
 }
 // GR16_NOREX - GR16 registers which do not require a REX prefix.
@@ -469,18 +469,18 @@ def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
                           256, (sequence "YMM%u", 0, 31)>;
 
 // Mask registers
-def VK1     : RegisterClass<"X86", [i1],    16, (sequence "K%u", 0, 7)> {let Size = 16;}
-def VK2     : RegisterClass<"X86", [v2i1],  16, (add VK1)> {let Size = 16;}
-def VK4     : RegisterClass<"X86", [v4i1],  16, (add VK2)> {let Size = 16;}
-def VK8     : RegisterClass<"X86", [v8i1],  16, (add VK4)> {let Size = 16;}
+def VK1     : RegisterClass<"X86", [i1],    8,  (sequence "K%u", 0, 7)> {let Size = 8;}
+def VK2     : RegisterClass<"X86", [v2i1],  8,  (add VK1)> {let Size = 8;}
+def VK4     : RegisterClass<"X86", [v4i1],  8,  (add VK2)> {let Size = 8;}
+def VK8     : RegisterClass<"X86", [v8i1],  8,  (add VK4)> {let Size = 8;}
 def VK16    : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
 def VK32    : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
 def VK64    : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
 
-def VK1WM   : RegisterClass<"X86", [i1],    16, (sub VK1, K0)> {let Size = 16;}
-def VK2WM   : RegisterClass<"X86", [v2i1],  16, (sub VK2, K0)> {let Size = 16;}
-def VK4WM   : RegisterClass<"X86", [v4i1],  16, (sub VK4, K0)> {let Size = 16;}
-def VK8WM   : RegisterClass<"X86", [v8i1],  16, (sub VK8, K0)> {let Size = 16;}
+def VK1WM   : RegisterClass<"X86", [i1],    8,  (sub VK1, K0)> {let Size = 8;}
+def VK2WM   : RegisterClass<"X86", [v2i1],  8,  (sub VK2, K0)> {let Size = 8;}
+def VK4WM   : RegisterClass<"X86", [v4i1],  8,  (sub VK4, K0)> {let Size = 8;}
+def VK8WM   : RegisterClass<"X86", [v8i1],  8,  (sub VK8, K0)> {let Size = 8;}
 def VK16WM  : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>   {let Size = 16;}
 def VK32WM  : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
 def VK64WM  : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
diff --git a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
index 73a3230..677e824 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -1895,7 +1895,7 @@ def : InstRW<[WriteMULr], (instregex "(V?)MUL(P|S)(S|D)rr")>;
 
 // x,m / v,v,m.
 def WriteMULm : SchedWriteRes<[HWPort01, HWPort23]> {
-  let Latency = 4;
+  let Latency = 9;
   let NumMicroOps = 2;
   let ResourceCycles = [1, 1];
 }
@@ -2014,7 +2014,7 @@ def : InstRW<[WriteFMADDr],
     // 3p forms.
     "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?",
     // 3s forms.
-    "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)r",
+    "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)r",
     // 4s/4s_int forms.
     "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?",
     // 4p forms.
@@ -2031,7 +2031,7 @@ def : InstRW<[WriteFMADDm],
     // 3p forms.
     "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?",
     // 3s forms.
-    "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)m",
+    "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)m",
     // 4s/4s_int forms.
     "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?",
     // 4p forms.
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index 821044f..5ca40bc 100644
--- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -57,12 +57,13 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
                                              bool isVolatile,
                                          MachinePointerInfo DstPtrInfo) const {
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
-  const X86Subtarget &Subtarget = DAG.getTarget().getSubtarget<X86Subtarget>();
+  const X86Subtarget &Subtarget =
+      DAG.getMachineFunction().getSubtarget<X86Subtarget>();
 
 #ifndef NDEBUG
   // If the base register might conflict with our physical registers, bail out.
-  unsigned ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
-                           X86::ECX, X86::EAX, X86::EDI};
+  const unsigned ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
+                                 X86::ECX, X86::EAX, X86::EDI};
   assert(!isBaseRegConflictPossible(DAG, ClobberSet));
 #endif
 
@@ -137,22 +138,22 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
     default:  // Byte aligned
       AVT = MVT::i8;
       ValReg = X86::AL;
-      Count = DAG.getIntPtrConstant(SizeVal);
+      Count = DAG.getIntPtrConstant(SizeVal, dl);
       break;
     }
 
     if (AVT.bitsGT(MVT::i8)) {
       unsigned UBytes = AVT.getSizeInBits() / 8;
-      Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+      Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl);
       BytesLeft = SizeVal % UBytes;
     }
 
-    Chain  = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
+    Chain  = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
                               InFlag);
     InFlag = Chain.getValue(1);
   } else {
     AVT = MVT::i8;
-    Count  = DAG.getIntPtrConstant(SizeVal);
+    Count  = DAG.getIntPtrConstant(SizeVal, dl);
     Chain  = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
     InFlag = Chain.getValue(1);
   }
@@ -173,7 +174,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
     Count  = Size;
     EVT CVT = Count.getValueType();
     SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
-                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl,
+                                               CVT));
     Chain  = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
                                                              X86::ECX,
                               Left, InFlag);
@@ -189,27 +191,26 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
 
     Chain = DAG.getMemset(Chain, dl,
                           DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
-                                      DAG.getConstant(Offset, AddrVT)),
+                                      DAG.getConstant(Offset, dl, AddrVT)),
                           Src,
-                          DAG.getConstant(BytesLeft, SizeVT),
-                          Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
+                          DAG.getConstant(BytesLeft, dl, SizeVT),
+                          Align, isVolatile, false,
+                          DstPtrInfo.getWithOffset(Offset));
   }
 
   // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
   return Chain;
 }
 
-SDValue
-X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
-                                        SDValue Chain, SDValue Dst, SDValue Src,
-                                        SDValue Size, unsigned Align,
-                                        bool isVolatile, bool AlwaysInline,
-                                         MachinePointerInfo DstPtrInfo,
-                                         MachinePointerInfo SrcPtrInfo) const {
+SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
+    SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
+    SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
+    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
   // This requires the copy size to be a constant, preferably
   // within a subtarget-specific limit.
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
-  const X86Subtarget &Subtarget = DAG.getTarget().getSubtarget<X86Subtarget>();
+  const X86Subtarget &Subtarget =
+      DAG.getMachineFunction().getSubtarget<X86Subtarget>();
   if (!ConstantSize)
     return SDValue();
   uint64_t SizeVal = ConstantSize->getZExtValue();
@@ -229,8 +230,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
     return SDValue();
 
   // If the base register might conflict with our physical registers, bail out.
-  unsigned ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
-                           X86::ECX, X86::ESI, X86::EDI};
+  const unsigned ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
+                                 X86::ECX, X86::ESI, X86::EDI};
   if (isBaseRegConflictPossible(DAG, ClobberSet))
     return SDValue();
 
@@ -248,7 +249,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
 
   unsigned UBytes = AVT.getSizeInBits() / 8;
   unsigned CountVal = SizeVal / UBytes;
-  SDValue Count = DAG.getIntPtrConstant(CountVal);
+  SDValue Count = DAG.getIntPtrConstant(CountVal, dl);
   unsigned BytesLeft = SizeVal % UBytes;
 
   SDValue InFlag;
@@ -279,11 +280,13 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
     EVT SizeVT = Size.getValueType();
     Results.push_back(DAG.getMemcpy(Chain, dl,
                                     DAG.getNode(ISD::ADD, dl, DstVT, Dst,
-                                                DAG.getConstant(Offset, DstVT)),
+                                                DAG.getConstant(Offset, dl,
+                                                                DstVT)),
                                     DAG.getNode(ISD::ADD, dl, SrcVT, Src,
-                                                DAG.getConstant(Offset, SrcVT)),
-                                    DAG.getConstant(BytesLeft, SizeVT),
-                                    Align, isVolatile, AlwaysInline,
+                                                DAG.getConstant(Offset, dl,
+                                                                SrcVT)),
+                                    DAG.getConstant(BytesLeft, dl, SizeVT),
+                                    Align, isVolatile, AlwaysInline, false,
                                     DstPtrInfo.getWithOffset(Offset),
                                     SrcPtrInfo.getWithOffset(Offset)));
   }
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index 9831698..1cdab14 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -257,10 +257,8 @@ void X86Subtarget::initializeEnvironment() {
   HasVLX = false;
   HasADX = false;
   HasSHA = false;
-  HasSGX = false;
   HasPRFCHW = false;
   HasRDSEED = false;
-  HasSMAP = false;
   IsBTMemSlow = false;
   IsSHLDSlow = false;
   IsUAMemFast = false;
@@ -280,46 +278,7 @@ void X86Subtarget::initializeEnvironment() {
   stackAlignment = 4;
   // FIXME: this is a known good value for Yonah. How about others?
   MaxInlineSizeThreshold = 128;
-}
-
-static std::string computeDataLayout(const Triple &TT) {
-  // X86 is little endian
-  std::string Ret = "e";
-
-  Ret += DataLayout::getManglingComponent(TT);
-  // X86 and x32 have 32 bit pointers.
-  if ((TT.isArch64Bit() &&
-       (TT.getEnvironment() == Triple::GNUX32 || TT.isOSNaCl())) ||
-      !TT.isArch64Bit())
-    Ret += "-p:32:32";
-
-  // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
-  if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl())
-    Ret += "-i64:64";
-  else
-    Ret += "-f64:32:64";
-
-  // Some ABIs align long double to 128 bits, others to 32.
-  if (TT.isOSNaCl())
-    ; // No f80
-  else if (TT.isArch64Bit() || TT.isOSDarwin())
-    Ret += "-f80:128";
-  else
-    Ret += "-f80:32";
-
-  // The registers can hold 8, 16, 32 or, in x86-64, 64 bits.
-  if (TT.isArch64Bit())
-    Ret += "-n8:16:32:64";
-  else
-    Ret += "-n8:16:32";
-
-  // The stack is aligned to 32 bits on some ABIs and 128 bits on others.
-  if (!TT.isArch64Bit() && TT.isOSWindows())
-    Ret += "-S32";
-  else
-    Ret += "-S128";
-
-  return Ret;
+  UseSoftFloat = false;
 }
 
 X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -334,16 +293,16 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
                            unsigned StackAlignOverride)
     : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
       PICStyle(PICStyles::None), TargetTriple(TT),
-      DL(computeDataLayout(TargetTriple)),
       StackAlignOverride(StackAlignOverride),
       In64BitMode(TargetTriple.getArch() == Triple::x86_64),
       In32BitMode(TargetTriple.getArch() == Triple::x86 &&
                   TargetTriple.getEnvironment() != Triple::CODE16),
       In16BitMode(TargetTriple.getArch() == Triple::x86 &&
                   TargetTriple.getEnvironment() == Triple::CODE16),
-      TSInfo(DL), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
-      TLInfo(TM), FrameLowering(TargetFrameLowering::StackGrowsDown,
-                                getStackAlignment(), is64Bit() ? -8 : -4) {
+      TSInfo(*TM.getDataLayout()),
+      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+      FrameLowering(TargetFrameLowering::StackGrowsDown, getStackAlignment(),
+                    is64Bit() ? -8 : -4) {
   // Determine the PICStyle based on the target selected.
   if (TM.getRelocationModel() == Reloc::Static) {
     // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index e0263d6..455dd77 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -31,7 +31,7 @@ class GlobalValue;
 class StringRef;
 class TargetMachine;
 
-/// PICStyles - The X86 backend supports a number of different styles of PIC.
+/// The X86 backend supports a number of different styles of PIC.
 ///
 namespace PICStyles {
 enum Style {
@@ -58,105 +58,95 @@ protected:
     Others, IntelAtom, IntelSLM
   };
 
-  /// X86ProcFamily - X86 processor family: Intel Atom, and others
+  /// X86 processor family: Intel Atom, and others
   X86ProcFamilyEnum X86ProcFamily;
 
-  /// PICStyle - Which PIC style to use
-  ///
+  /// Which PIC style to use
   PICStyles::Style PICStyle;
 
-  /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or
-  /// none supported.
+  /// MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
   X86SSEEnum X86SSELevel;
 
-  /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
-  ///
+  /// 3DNow, 3DNow Athlon, or none supported.
   X863DNowEnum X863DNowLevel;
 
-  /// HasCMov - True if this processor has conditional move instructions
+  /// True if this processor has conditional move instructions
   /// (generally pentium pro+).
   bool HasCMov;
 
-  /// HasX86_64 - True if the processor supports X86-64 instructions.
-  ///
+  /// True if the processor supports X86-64 instructions.
   bool HasX86_64;
 
-  /// HasPOPCNT - True if the processor supports POPCNT.
+  /// True if the processor supports POPCNT.
   bool HasPOPCNT;
 
-  /// HasSSE4A - True if the processor supports SSE4A instructions.
+  /// True if the processor supports SSE4A instructions.
   bool HasSSE4A;
 
-  /// HasAES - Target has AES instructions
+  /// Target has AES instructions
   bool HasAES;
 
-  /// HasPCLMUL - Target has carry-less multiplication
+  /// Target has carry-less multiplication
   bool HasPCLMUL;
 
-  /// HasFMA - Target has 3-operand fused multiply-add
+  /// Target has 3-operand fused multiply-add
   bool HasFMA;
 
-  /// HasFMA4 - Target has 4-operand fused multiply-add
+  /// Target has 4-operand fused multiply-add
   bool HasFMA4;
 
-  /// HasXOP - Target has XOP instructions
+  /// Target has XOP instructions
   bool HasXOP;
 
-  /// HasTBM - Target has TBM instructions.
+  /// Target has TBM instructions.
   bool HasTBM;
 
-  /// HasMOVBE - True if the processor has the MOVBE instruction.
+  /// True if the processor has the MOVBE instruction.
   bool HasMOVBE;
 
-  /// HasRDRAND - True if the processor has the RDRAND instruction.
+  /// True if the processor has the RDRAND instruction.
   bool HasRDRAND;
 
-  /// HasF16C - Processor has 16-bit floating point conversion instructions.
+  /// Processor has 16-bit floating point conversion instructions.
   bool HasF16C;
 
-  /// HasFSGSBase - Processor has FS/GS base insturctions.
+  /// Processor has FS/GS base insturctions.
   bool HasFSGSBase;
 
-  /// HasLZCNT - Processor has LZCNT instruction.
+  /// Processor has LZCNT instruction.
   bool HasLZCNT;
 
-  /// HasBMI - Processor has BMI1 instructions.
+  /// Processor has BMI1 instructions.
   bool HasBMI;
 
-  /// HasBMI2 - Processor has BMI2 instructions.
+  /// Processor has BMI2 instructions.
   bool HasBMI2;
 
-  /// HasRTM - Processor has RTM instructions.
+  /// Processor has RTM instructions.
   bool HasRTM;
 
-  /// HasHLE - Processor has HLE.
+  /// Processor has HLE.
   bool HasHLE;
 
-  /// HasADX - Processor has ADX instructions.
+  /// Processor has ADX instructions.
   bool HasADX;
 
-  /// HasSHA - Processor has SHA instructions.
+  /// Processor has SHA instructions.
   bool HasSHA;
 
-  /// HasSGX - Processor has SGX instructions.
-  bool HasSGX;
-
-  /// HasPRFCHW - Processor has PRFCHW instructions.
+  /// Processor has PRFCHW instructions.
   bool HasPRFCHW;
 
-  /// HasRDSEED - Processor has RDSEED instructions.
+  /// Processor has RDSEED instructions.
   bool HasRDSEED;
 
-  /// HasSMAP - Processor has SMAP instructions.
-  bool HasSMAP;
-
-  /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+  /// True if BT (bit test) of memory instructions are slow.
   bool IsBTMemSlow;
 
-  /// IsSHLDSlow - True if SHLD instructions are slow.
+  /// True if SHLD instructions are slow.
   bool IsSHLDSlow;
 
-  /// IsUAMemFast - True if unaligned memory access is fast.
+  /// True if unaligned memory access is fast.
   bool IsUAMemFast;
 
   /// True if unaligned 32-byte memory accesses are slow.
@@ -166,37 +156,38 @@ protected:
   /// This may require setting a configuration bit in the processor.
   bool HasSSEUnalignedMem;
 
-  /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction;
+  /// True if this processor has the CMPXCHG16B instruction;
   /// this is true for most x86-64 chips, but not the first AMD chips.
   bool HasCmpxchg16b;
 
-  /// UseLeaForSP - True if the LEA instruction should be used for adjusting
+  /// True if the LEA instruction should be used for adjusting
   /// the stack pointer. This is an optimization for Intel Atom processors.
   bool UseLeaForSP;
 
-  /// HasSlowDivide32 - True if 8-bit divisions are significantly faster than
+  /// True if 8-bit divisions are significantly faster than
   /// 32-bit divisions and should be used when possible.
   bool HasSlowDivide32;
 
-  /// HasSlowDivide64 - True if 16-bit divides are significantly faster than
+  /// True if 16-bit divides are significantly faster than
   /// 64-bit divisions and should be used when possible.
   bool HasSlowDivide64;
 
-  /// PadShortFunctions - True if the short functions should be padded to prevent
+  /// True if the short functions should be padded to prevent
   /// a stall when returning too early.
   bool PadShortFunctions;
 
-  /// CallRegIndirect - True if the Calls with memory reference should be converted
+  /// True if the Calls with memory reference should be converted
   /// to a register-based indirect call.
   bool CallRegIndirect;
-  /// LEAUsesAG - True if the LEA instruction inputs have to be ready at
-  ///             address generation (AG) time.
+
+  /// True if the LEA instruction inputs have to be ready at address generation
+  /// (AG) time.
   bool LEAUsesAG;
 
-  /// SlowLEA - True if the LEA instruction with certain arguments is slow
+  /// True if the LEA instruction with certain arguments is slow
   bool SlowLEA;
 
-  /// SlowIncDec - True if INC and DEC instructions are slow when writing to flags
+  /// True if INC and DEC instructions are slow when writing to flags
   bool SlowIncDec;
 
   /// Use the RSQRT* instructions to optimize square root calculations.
@@ -227,7 +218,10 @@ protected:
   /// Processor has AVX-512 Vector Length eXtenstions
   bool HasVLX;
 
-  /// stackAlignment - The minimum alignment known to hold of the stack frame on
+  /// Use software floating point for code generation.
+  bool UseSoftFloat;
+
+  /// The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
 
@@ -235,26 +229,24 @@ protected:
   ///
   unsigned MaxInlineSizeThreshold;
 
-  /// TargetTriple - What processor and OS we're targeting.
+  /// What processor and OS we're targeting.
   Triple TargetTriple;
 
   /// Instruction itineraries for scheduling
   InstrItineraryData InstrItins;
 
 private:
-  // Calculates type size & alignment
-  const DataLayout DL;
 
-  /// StackAlignOverride - Override the stack alignment.
+  /// Override the stack alignment.
   unsigned StackAlignOverride;
 
-  /// In64BitMode - True if compiling for 64-bit, false for 16-bit or 32-bit.
+  /// True if compiling for 64-bit, false for 16-bit or 32-bit.
   bool In64BitMode;
 
-  /// In32BitMode - True if compiling for 32-bit, false for 16-bit or 64-bit.
+  /// True if compiling for 32-bit, false for 16-bit or 64-bit.
   bool In32BitMode;
 
-  /// In16BitMode - True if compiling for 16-bit, false for 32-bit or 64-bit.
+  /// True if compiling for 16-bit, false for 32-bit or 64-bit.
   bool In16BitMode;
 
   X86SelectionDAGInfo TSInfo;
@@ -276,7 +268,6 @@ public:
     return &TLInfo;
   }
   const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
-  const DataLayout *getDataLayout() const override { return &DL; }
   const X86FrameLowering *getFrameLowering() const override {
     return &FrameLowering;
   }
@@ -287,12 +278,12 @@ public:
     return &getInstrInfo()->getRegisterInfo();
   }
 
-  /// getStackAlignment - Returns the minimum alignment known to hold of the
+  /// Returns the minimum alignment known to hold of the
   /// stack frame on entry to the function and which must be maintained by every
   /// function for this subtarget.
   unsigned getStackAlignment() const { return stackAlignment; }
 
-  /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+  /// Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
   unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
 
@@ -301,7 +292,7 @@ public:
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
 private:
-  /// \brief Initialize the full set of dependencies so we can use an initializer
+  /// Initialize the full set of dependencies so we can use an initializer
   /// list for X86Subtarget.
   X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
   void initializeEnvironment();
@@ -370,10 +361,8 @@ public:
   bool hasHLE() const { return HasHLE; }
   bool hasADX() const { return HasADX; }
   bool hasSHA() const { return HasSHA; }
-  bool hasSGX() const { return HasSGX; }
   bool hasPRFCHW() const { return HasPRFCHW; }
   bool hasRDSEED() const { return HasRDSEED; }
-  bool hasSMAP() const { return HasSMAP; }
   bool isBTMemSlow() const { return IsBTMemSlow; }
   bool isSHLDSlow() const { return IsSHLDSlow; }
   bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
@@ -399,6 +388,7 @@ public:
 
   bool isAtom() const { return X86ProcFamily == IntelAtom; }
   bool isSLM() const { return X86ProcFamily == IntelSLM; }
+  bool useSoftFloat() const { return UseSoftFloat; }
 
   const Triple &getTargetTriple() const { return TargetTriple; }
 
@@ -406,6 +396,7 @@ public:
   bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
   bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
   bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
+  bool isTargetPS4() const { return TargetTriple.isPS4(); }
 
   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
@@ -475,13 +466,11 @@ public:
   unsigned char ClassifyGlobalReference(const GlobalValue *GV,
                                         const TargetMachine &TM)const;
 
-  /// ClassifyBlockAddressReference - Classify a blockaddress reference for the
-  /// current subtarget according to how we should reference it in a non-pcrel
-  /// context.
+  /// Classify a blockaddress reference for the current subtarget according to
+  /// how we should reference it in a non-pcrel context.
   unsigned char ClassifyBlockAddressReference() const;
 
-  /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
-  /// to immediate address.
+  /// Return true if the subtarget allows calls to immediate address.
   bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
 
   /// This function returns the name of a function which has an interface
@@ -500,8 +489,7 @@ public:
 
   bool enableEarlyIfConversion() const override;
 
-  /// getInstrItins = Return the instruction itineraries based on the
-  /// subtarget selection.
+  /// Return the instruction itineraries based on the subtarget selection.
   const InstrItineraryData *getInstrItineraryData() const override {
     return &InstrItins;
   }
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index 1fc6b20..3e5f1d8 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -14,9 +14,10 @@
 #include "X86TargetMachine.h"
 #include "X86.h"
 #include "X86TargetObjectFile.h"
+#include "X86TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -36,10 +37,10 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
     return make_unique<TargetLoweringObjectFileMachO>();
   }
 
-  if (TT.isOSLinux())
-    return make_unique<X86LinuxTargetObjectFile>();
+  if (TT.isOSLinux() || TT.isOSNaCl())
+    return make_unique<X86LinuxNaClTargetObjectFile>();
   if (TT.isOSBinFormatELF())
-    return make_unique<TargetLoweringObjectFileELF>();
+    return make_unique<X86ELFTargetObjectFile>();
   if (TT.isKnownWindowsMSVCEnvironment())
     return make_unique<X86WindowsTargetObjectFile>();
   if (TT.isOSBinFormatCOFF())
@@ -47,19 +48,56 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
   llvm_unreachable("unknown subtarget type");
 }
 
+static std::string computeDataLayout(const Triple &TT) {
+  // X86 is little endian
+  std::string Ret = "e";
+
+  Ret += DataLayout::getManglingComponent(TT);
+  // X86 and x32 have 32 bit pointers.
+  if ((TT.isArch64Bit() &&
+       (TT.getEnvironment() == Triple::GNUX32 || TT.isOSNaCl())) ||
+      !TT.isArch64Bit())
+    Ret += "-p:32:32";
+
+  // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
+  if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl())
+    Ret += "-i64:64";
+  else
+    Ret += "-f64:32:64";
+
+  // Some ABIs align long double to 128 bits, others to 32.
+  if (TT.isOSNaCl())
+    ; // No f80
+  else if (TT.isArch64Bit() || TT.isOSDarwin())
+    Ret += "-f80:128";
+  else
+    Ret += "-f80:32";
+
+  // The registers can hold 8, 16, 32 or, in x86-64, 64 bits.
+  if (TT.isArch64Bit())
+    Ret += "-n8:16:32:64";
+  else
+    Ret += "-n8:16:32";
+
+  // The stack is aligned to 32 bits on some ABIs and 128 bits on others.
+  if (!TT.isArch64Bit() && TT.isOSWindows())
+    Ret += "-a:0:32-S32";
+  else
+    Ret += "-S128";
+
+  return Ret;
+}
+
 /// X86TargetMachine ctor - Create an X86 target.
 ///
 X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
                                    StringRef FS, const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
                                    CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    : LLVMTargetMachine(T, computeDataLayout(Triple(TT)), TT, CPU, FS, Options,
+                        RM, CM, OL),
       TLOF(createTLOF(Triple(getTargetTriple()))),
       Subtarget(TT, CPU, FS, *this, Options.StackAlignmentOverride) {
-  // default to hard float ABI
-  if (Options.FloatABIType == FloatABI::Default)
-    this->Options.FloatABIType = FloatABI::Hard;
-
   // Windows stack unwinder gets confused when execution flow "falls through"
   // after a call to 'noreturn' function.
   // To prevent that, we emit a trap for 'unreachable' IR instructions.
@@ -74,11 +112,8 @@ X86TargetMachine::~X86TargetMachine() {}
 
 const X86Subtarget *
 X86TargetMachine::getSubtargetImpl(const Function &F) const {
-  AttributeSet FnAttrs = F.getAttributes();
-  Attribute CPUAttr =
-      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
-  Attribute FSAttr =
-      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+  Attribute CPUAttr = F.getFnAttribute("target-cpu");
+  Attribute FSAttr = F.getFnAttribute("target-features");
 
   std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
                         ? CPUAttr.getValueAsString().str()
@@ -92,14 +127,15 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
   // function before we can generate a subtarget. We also need to use
   // it as a key for the subtarget since that can be the only difference
   // between two functions.
-  Attribute SFAttr =
-      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
-  bool SoftFloat = !SFAttr.hasAttribute(Attribute::None)
-                       ? SFAttr.getValueAsString() == "true"
-                       : Options.UseSoftFloat;
-
-  auto &I = SubtargetMap[CPU + FS + (SoftFloat ? "use-soft-float=true"
-                                               : "use-soft-float=false")];
+  bool SoftFloat =
+      F.hasFnAttribute("use-soft-float") &&
+      F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+  // If the soft float attribute is set on the function turn on the soft float
+  // subtarget feature.
+  if (SoftFloat)
+    FS += FS.empty() ? "+soft-float" : ",+soft-float";
+
+  auto &I = SubtargetMap[CPU + FS];
   if (!I) {
     // This needs to be done before we create a new subtarget since any
     // creation will depend on the TM and the code generation flags on the
@@ -120,15 +156,12 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
   cl::init(true));
 
 //===----------------------------------------------------------------------===//
-// X86 Analysis Pass Setup
+// X86 TTI query.
 //===----------------------------------------------------------------------===//
 
-void X86TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-  // Add first the target-independent BasicTTI pass, then our X86 pass. This
-  // allows the X86 pass to delegate to the target independent layer when
-  // appropriate.
-  PM.add(createBasicTargetTransformInfoPass(this));
-  PM.add(createX86TargetTransformInfoPass(this));
+TargetIRAnalysis X86TargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis(
+      [this](Function &F) { return TargetTransformInfo(X86TTIImpl(this, F)); });
 }
 
 
@@ -147,16 +180,14 @@ public:
     return getTM<X86TargetMachine>();
   }
 
-  const X86Subtarget &getX86Subtarget() const {
-    return *getX86TargetMachine().getSubtargetImpl();
-  }
-
   void addIRPasses() override;
   bool addInstSelector() override;
   bool addILPOpts() override;
+  bool addPreISel() override;
   void addPreRegAlloc() override;
   void addPostRegAlloc() override;
   void addPreEmitPass() override;
+  void addPreSched2() override;
 };
 } // namespace
 
@@ -175,7 +206,8 @@ bool X86PassConfig::addInstSelector() {
   addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
 
   // For ELF, cleanup any local-dynamic TLS accesses.
-  if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
+  if (Triple(TM->getTargetTriple()).isOSBinFormatELF() &&
+      getOptLevel() != CodeGenOpt::None)
     addPass(createCleanupLocalDynamicTLSPass());
 
   addPass(createX86GlobalBaseRegPass());
@@ -188,6 +220,14 @@ bool X86PassConfig::addILPOpts() {
   return true;
 }
 
+bool X86PassConfig::addPreISel() {
+  // Only add this pass for 32-bit x86.
+  Triple TT(TM->getTargetTriple());
+  if (TT.getArch() == Triple::x86)
+    addPass(createX86WinEHStatePass());
+  return true;
+}
+
 void X86PassConfig::addPreRegAlloc() {
   addPass(createX86CallFrameOptimization());
 }
@@ -196,8 +236,10 @@ void X86PassConfig::addPostRegAlloc() {
   addPass(createX86FloatingPointStackifierPass());
 }
 
+void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
+
 void X86PassConfig::addPreEmitPass() {
-  if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2())
+  if (getOptLevel() != CodeGenOpt::None)
     addPass(createExecutionDependencyFixPass(&X86::VR128RegClass));
 
   if (UseVZeroUpper)
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
index 916278c..c9833ed 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
@@ -24,22 +24,18 @@ class StringRef;
 
 class X86TargetMachine final : public LLVMTargetMachine {
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
-  X86Subtarget       Subtarget;
+  X86Subtarget Subtarget;
 
   mutable StringMap<std::unique_ptr<X86Subtarget>> SubtargetMap;
 
 public:
-  X86TargetMachine(const Target &T, StringRef TT,
-                   StringRef CPU, StringRef FS, const TargetOptions &Options,
-                   Reloc::Model RM, CodeModel::Model CM,
-                   CodeGenOpt::Level OL);
+  X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+                   const TargetOptions &Options, Reloc::Model RM,
+                   CodeModel::Model CM, CodeGenOpt::Level OL);
   ~X86TargetMachine() override;
-
-  const X86Subtarget *getSubtargetImpl() const override { return &Subtarget; }
   const X86Subtarget *getSubtargetImpl(const Function &F) const override;
 
-  /// \brief Register X86 analysis passes with a pass manager.
-  void addAnalysisPasses(PassManagerBase &PM) override;
+  TargetIRAnalysis getTargetIRAnalysis() override;
 
   // Set up the pass pipeline.
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
index f8bcd61..6bf45c3 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -15,6 +15,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Target/TargetLowering.h"
 
@@ -46,18 +47,31 @@ MCSymbol *X86_64MachoTargetObjectFile::getCFIPersonalitySymbol(
   return TM.getSymbol(GV, Mang);
 }
 
-void
-X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
-  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
-  InitializeELF(TM.Options.UseInitArray);
+const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
+    const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
+    MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+  // On Darwin/X86-64, we need to use foo@GOTPCREL+4 to access the got entry
+  // from a data section. In case there's an additional offset, then use
+  // foo@GOTPCREL+4+<offset>.
+  unsigned FinalOff = Offset+MV.getConstant()+4;
+  const MCExpr *Res =
+    MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
+  const MCExpr *Off = MCConstantExpr::Create(FinalOff, getContext());
+  return MCBinaryExpr::CreateAdd(Res, Off, getContext());
 }
 
-const MCExpr *
-X86LinuxTargetObjectFile::getDebugThreadLocalSymbol(
+const MCExpr *X86ELFTargetObjectFile::getDebugThreadLocalSymbol(
     const MCSymbol *Sym) const {
   return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext());
 }
 
+void
+X86LinuxNaClTargetObjectFile::Initialize(MCContext &Ctx,
+                                         const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+}
+
 const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
     const ConstantExpr *CE, Mangler &Mang, const TargetMachine &TM) const {
   // We are looking for the difference of two symbols, need a subtraction
@@ -81,14 +95,12 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
       SubRHS->getPointerAddressSpace() != 0)
     return nullptr;
 
-  // Both ptrtoint instructions must wrap global variables:
+  // Both ptrtoint instructions must wrap global objects:
   // - Only global variables are eligible for image relative relocations.
-  // - The subtrahend refers to the special symbol __ImageBase, a global.
-  const GlobalVariable *GVLHS =
-      dyn_cast<GlobalVariable>(SubLHS->getPointerOperand());
-  const GlobalVariable *GVRHS =
-      dyn_cast<GlobalVariable>(SubRHS->getPointerOperand());
-  if (!GVLHS || !GVRHS)
+  // - The subtrahend refers to the special symbol __ImageBase, a GlobalVariable.
+  const auto *GOLHS = dyn_cast<GlobalObject>(SubLHS->getPointerOperand());
+  const auto *GVRHS = dyn_cast<GlobalVariable>(SubRHS->getPointerOperand());
+  if (!GOLHS || !GVRHS)
     return nullptr;
 
   // We expect __ImageBase to be a global variable without a section, externally
@@ -101,10 +113,10 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
     return nullptr;
 
   // An image-relative, thread-local, symbol makes no sense.
-  if (GVLHS->isThreadLocal())
+  if (GOLHS->isThreadLocal())
     return nullptr;
 
-  return MCSymbolRefExpr::Create(TM.getSymbol(GVLHS, Mang),
+  return MCSymbolRefExpr::Create(TM.getSymbol(GOLHS, Mang),
                                  MCSymbolRefExpr::VK_COFF_IMGREL32,
                                  getContext());
 }
@@ -137,7 +149,7 @@ static std::string scalarConstantToHexString(const Constant *C) {
   return APIntToHexString(AI);
 }
 
-const MCSection *
+MCSection *
 X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind,
                                                   const Constant *C) const {
   if (Kind.isReadOnly()) {
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
index 6a6988a..66366b2 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
@@ -30,17 +30,26 @@ namespace llvm {
     MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
                                       const TargetMachine &TM,
                                       MachineModuleInfo *MMI) const override;
-  };
 
-  /// X86LinuxTargetObjectFile - This implementation is used for linux x86
-  /// and x86-64.
-  class X86LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
-    void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+    const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+                                            const MCValue &MV, int64_t Offset,
+                                            MachineModuleInfo *MMI,
+                                            MCStreamer &Streamer) const override;
+  };
 
+  /// \brief This implemenatation is used for X86 ELF targets that don't
+  /// have a further specialization.
+  class X86ELFTargetObjectFile : public TargetLoweringObjectFileELF {
     /// \brief Describe a TLS variable address within debug info.
     const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
   };
 
+  /// X86LinuxNaClTargetObjectFile - This implementation is used for linux and
+  /// Native Client on x86 and x86-64.
+  class X86LinuxNaClTargetObjectFile : public X86ELFTargetObjectFile {
+    void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+  };
+
   /// \brief This implementation is used for Windows targets on x86 and x86-64.
   class X86WindowsTargetObjectFile : public TargetLoweringObjectFileCOFF {
     const MCExpr *
@@ -49,8 +58,8 @@ namespace llvm {
 
     /// \brief Given a mergeable constant with the specified size and relocation
     /// information, return a section that it should be placed in.
-    const MCSection *getSectionForConstant(SectionKind Kind,
-                                           const Constant *C) const override;
+    MCSection *getSectionForConstant(SectionKind Kind,
+                                     const Constant *C) const override;
   };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 67488f7..bbfeba8 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -14,9 +14,9 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "X86.h"
-#include "X86TargetMachine.h"
+#include "X86TargetTransformInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
@@ -25,125 +25,22 @@ using namespace llvm;
 
 #define DEBUG_TYPE "x86tti"
 
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeX86TTIPass(PassRegistry &);
-}
-
-namespace {
-
-class X86TTI final : public ImmutablePass, public TargetTransformInfo {
-  const X86Subtarget *ST;
-  const X86TargetLowering *TLI;
-
-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
-public:
-  X86TTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) {
-    llvm_unreachable("This pass cannot be directly constructed");
-  }
-
-  X86TTI(const X86TargetMachine *TM)
-      : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
-        TLI(TM->getSubtargetImpl()->getTargetLowering()) {
-    initializeX86TTIPass(*PassRegistry::getPassRegistry());
-  }
-
-  void initializePass() override {
-    pushTTIStack(this);
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    TargetTransformInfo::getAnalysisUsage(AU);
-  }
-
-  /// Pass identification.
-  static char ID;
-
-  /// Provide necessary pointer adjustments for the two base classes.
-  void *getAdjustedAnalysisPointer(const void *ID) override {
-    if (ID == &TargetTransformInfo::ID)
-      return (TargetTransformInfo*)this;
-    return this;
-  }
-
-  /// \name Scalar TTI Implementations
-  /// @{
-  PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
-
-  /// @}
-
-  /// \name Vector TTI Implementations
-  /// @{
-
-  unsigned getNumberOfRegisters(bool Vector) const override;
-  unsigned getRegisterBitWidth(bool Vector) const override;
-  unsigned getMaxInterleaveFactor() const override;
-  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
-                                  OperandValueKind, OperandValueProperties,
-                                  OperandValueProperties) const override;
-  unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
-                          int Index, Type *SubTp) const override;
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
-                            Type *Src) const override;
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                              Type *CondTy) const override;
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                              unsigned Index) const override;
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace) const override;
-
-  unsigned getAddressComputationCost(Type *PtrTy,
-                                     bool IsComplex) const override;
-
-  unsigned getReductionCost(unsigned Opcode, Type *Ty,
-                            bool IsPairwiseForm) const override;
-
-  unsigned getIntImmCost(int64_t) const;
-
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
-
-  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                         Type *Ty) const override;
-  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                         Type *Ty) const override;
-  bool isLegalMaskedLoad (Type *DataType, int Consecutive) const override;
-  bool isLegalMaskedStore(Type *DataType, int Consecutive) const override;
-
-  /// @}
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti",
-                   "X86 Target Transform Info", true, true, false)
-char X86TTI::ID = 0;
-
-ImmutablePass *
-llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) {
-  return new X86TTI(TM);
-}
-
-
 //===----------------------------------------------------------------------===//
 //
 // X86 cost model.
 //
 //===----------------------------------------------------------------------===//
 
-X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
+TargetTransformInfo::PopcntSupportKind
+X86TTIImpl::getPopcntSupport(unsigned TyWidth) {
   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
   // TODO: Currently the __builtin_popcount() implementation using SSE3
   //   instructions is inefficient. Once the problem is fixed, we should
   //   call ST->hasSSE3() instead of ST->hasPOPCNT().
-  return ST->hasPOPCNT() ? PSK_FastHardware : PSK_Software;
+  return ST->hasPOPCNT() ? TTI::PSK_FastHardware : TTI::PSK_Software;
 }
 
-unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
+unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) {
   if (Vector && !ST->hasSSE1())
     return 0;
 
@@ -155,7 +52,7 @@ unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
   return 8;
 }
 
-unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
+unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) {
   if (Vector) {
     if (ST->hasAVX512()) return 512;
     if (ST->hasAVX()) return 256;
@@ -169,7 +66,13 @@ unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
 
 }
 
-unsigned X86TTI::getMaxInterleaveFactor() const {
+unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+  // If the loop will not be vectorized, don't interleave the loop.
+  // Let regular unroll to unroll the loop, which saves the overflow
+  // check and memory check cost.
+  if (VF == 1)
+    return 1;
+
   if (ST->isAtom())
     return 1;
 
@@ -181,10 +84,10 @@ unsigned X86TTI::getMaxInterleaveFactor() const {
   return 2;
 }
 
-unsigned X86TTI::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
-    OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
-    OperandValueProperties Opd2PropInfo) const {
+unsigned X86TTIImpl::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+    TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
+    TTI::OperandValueProperties Opd2PropInfo) {
   // Legalize the type.
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
 
@@ -250,15 +153,15 @@ unsigned X86TTI::getArithmeticInstrCost(
     { ISD::SHL,     MVT::v4i64,    1 },
     { ISD::SRL,     MVT::v4i64,    1 },
 
-    { ISD::SHL,  MVT::v32i8,  42 }, // cmpeqb sequence.
-    { ISD::SHL,  MVT::v16i16,  16*10 }, // Scalarized.
+    { ISD::SHL,  MVT::v32i8,      42 }, // cmpeqb sequence.
+    { ISD::SHL,  MVT::v16i16,     10 }, // extend/vpsrlvd/pack sequence.
 
-    { ISD::SRL,  MVT::v32i8,  32*10 }, // Scalarized.
-    { ISD::SRL,  MVT::v16i16,  8*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v32i8,   32*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v16i16,     10 }, // extend/vpsrlvd/pack sequence.
 
-    { ISD::SRA,  MVT::v32i8,  32*10 }, // Scalarized.
-    { ISD::SRA,  MVT::v16i16,  16*10 }, // Scalarized.
-    { ISD::SRA,  MVT::v4i64,  4*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v32i8,   32*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v16i16,     10 }, // extend/vpsravd/pack sequence.
+    { ISD::SRA,  MVT::v4i64,    4*10 }, // Scalarized.
 
     // Vectorizing division is a bad idea. See the SSE2 table for more comments.
     { ISD::SDIV,  MVT::v32i8,  32*20 },
@@ -439,17 +342,16 @@ unsigned X86TTI::getArithmeticInstrCost(
     return LT.first * 6;
 
   // Fallback to the default implementation.
-  return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
-                                                     Op2Info);
+  return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
 }
 
-unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
-                                Type *SubTp) const {
+unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+                                    Type *SubTp) {
   // We only estimate the cost of reverse and alternate shuffles.
-  if (Kind != SK_Reverse && Kind != SK_Alternate)
-    return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+  if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
+    return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 
-  if (Kind == SK_Reverse) {
+  if (Kind == TTI::SK_Reverse) {
     std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
     unsigned Cost = 1;
     if (LT.second.getSizeInBits() > 128)
@@ -459,7 +361,7 @@ unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
     return Cost * LT.first;
   }
 
-  if (Kind == SK_Alternate) {
+  if (Kind == TTI::SK_Alternate) {
     // 64-bit packed float vectors (v2f32) are widened to type v4f32.
     // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
     std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
@@ -552,13 +454,13 @@ unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
     int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
     if (Idx != -1)
       return LT.first * SSEAltShuffleTbl[Idx].Cost;
-    return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+    return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
   }
 
-  return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 }
 
-unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
@@ -640,7 +542,7 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
 
   // The function getSimpleVT only handles simple value types.
   if (!SrcTy.isSimple() || !DstTy.isSimple())
-    return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+    return BaseT::getCastInstrCost(Opcode, Dst, Src);
 
   static const TypeConversionCostTblEntry<MVT::SimpleValueType>
   AVX2ConversionTbl[] = {
@@ -759,11 +661,11 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
       return AVXConversionTbl[Idx].Cost;
   }
 
-  return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+  return BaseT::getCastInstrCost(Opcode, Dst, Src);
 }
 
-unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                    Type *CondTy) const {
+unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                        Type *CondTy) {
   // Legalize the type.
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
 
@@ -829,11 +731,11 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
       return LT.first * SSE42CostTbl[Idx].Cost;
   }
 
-  return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
 }
 
-unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                    unsigned Index) const {
+unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                        unsigned Index) {
   assert(Val->isVectorTy() && "This must be a vector type");
 
   if (Index != -1U) {
@@ -853,26 +755,27 @@ unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
       return 0;
   }
 
-  return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+  return BaseT::getVectorInstrCost(Opcode, Val, Index);
 }
 
-unsigned X86TTI::getScalarizationOverhead(Type *Ty, bool Insert,
-                                            bool Extract) const {
+unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
+                                              bool Extract) {
   assert (Ty->isVectorTy() && "Can only scalarize vectors");
   unsigned Cost = 0;
 
   for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
     if (Insert)
-      Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+      Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i);
     if (Extract)
-      Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+      Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i);
   }
 
   return Cost;
 }
 
-unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                                 unsigned AddressSpace) const {
+unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                     unsigned Alignment,
+                                     unsigned AddressSpace) {
   // Handle non-power-of-two vectors such as <3 x float>
   if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
     unsigned NumElem = VTy->getVectorNumElements();
@@ -890,10 +793,8 @@ unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
 
     // Assume that all other non-power-of-two numbers are scalarized.
     if (!isPowerOf2_32(NumElem)) {
-      unsigned Cost = TargetTransformInfo::getMemoryOpCost(Opcode,
-                                                           VTy->getScalarType(),
-                                                           Alignment,
-                                                           AddressSpace);
+      unsigned Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(),
+                                             Alignment, AddressSpace);
       unsigned SplitCost = getScalarizationOverhead(Src,
                                                     Opcode == Instruction::Load,
                                                     Opcode==Instruction::Store);
@@ -917,7 +818,60 @@ unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
   return Cost;
 }
 
-unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
+unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
+                                           unsigned Alignment,
+                                           unsigned AddressSpace) {
+  VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
+  if (!SrcVTy)
+    // To calculate scalar take the regular cost, without mask
+    return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace);
+
+  unsigned NumElem = SrcVTy->getVectorNumElements();
+  VectorType *MaskTy =
+    VectorType::get(Type::getInt8Ty(getGlobalContext()), NumElem);
+  if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy, 1)) ||
+      (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy, 1)) ||
+      !isPowerOf2_32(NumElem)) {
+    // Scalarization
+    unsigned MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
+    unsigned ScalarCompareCost =
+      getCmpSelInstrCost(Instruction::ICmp,
+                         Type::getInt8Ty(getGlobalContext()), NULL);
+    unsigned BranchCost = getCFInstrCost(Instruction::Br);
+    unsigned MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
+
+    unsigned ValueSplitCost =
+      getScalarizationOverhead(SrcVTy, Opcode == Instruction::Load,
+                               Opcode == Instruction::Store);
+    unsigned MemopCost =
+        NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
+                                         Alignment, AddressSpace);
+    return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
+  }
+
+  // Legalize the type.
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(SrcVTy);
+  unsigned Cost = 0;
+  if (LT.second != TLI->getValueType(SrcVTy).getSimpleVT() &&
+      LT.second.getVectorNumElements() == NumElem)
+    // Promotion requires expand/truncate for data and a shuffle for mask.
+    Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) +
+            getShuffleCost(TTI::SK_Alternate, MaskTy, 0, 0);
+
+  else if (LT.second.getVectorNumElements() > NumElem) {
+    VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),
+                                            LT.second.getVectorNumElements());
+    // Expanding requires fill mask with zeroes
+    Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy);
+  }
+  if (!ST->hasAVX512())
+    return Cost + LT.first*4; // Each maskmov costs 4
+
+  // AVX-512 masked load/store is cheapper
+  return Cost+LT.first;
+}
+
+unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
   // Address computations in vectorized code with non-consecutive addresses will
   // likely result in more instructions compared to scalar code where the
   // computation can more often be merged into the index mode. The resulting
@@ -927,11 +881,11 @@ unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
   if (Ty->isVectorTy() && IsComplex)
     return NumVectorInstToHideOverhead;
 
-  return TargetTransformInfo::getAddressComputationCost(Ty, IsComplex);
+  return BaseT::getAddressComputationCost(Ty, IsComplex);
 }
 
-unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
-                                  bool IsPairwise) const {
+unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
+                                      bool IsPairwise) {
 
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
 
@@ -1007,23 +961,23 @@ unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
     }
   }
 
-  return TargetTransformInfo::getReductionCost(Opcode, ValTy, IsPairwise);
+  return BaseT::getReductionCost(Opcode, ValTy, IsPairwise);
 }
 
 /// \brief Calculate the cost of materializing a 64-bit value. This helper
 /// method might only calculate a fraction of a larger immediate. Therefore it
 /// is valid to return a cost of ZERO.
-unsigned X86TTI::getIntImmCost(int64_t Val) const {
+unsigned X86TTIImpl::getIntImmCost(int64_t Val) {
   if (Val == 0)
-    return TCC_Free;
+    return TTI::TCC_Free;
 
   if (isInt<32>(Val))
-    return TCC_Basic;
+    return TTI::TCC_Basic;
 
-  return 2 * TCC_Basic;
+  return 2 * TTI::TCC_Basic;
 }
 
-unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+unsigned X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -1035,10 +989,10 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
   // Fixme: Create a cost model for types larger than i128 once the codegen
   // issues have been fixed.
   if (BitSize > 128)
-    return TCC_Free;
+    return TTI::TCC_Free;
 
   if (Imm == 0)
-    return TCC_Free;
+    return TTI::TCC_Free;
 
   // Sign-extend all constants to a multiple of 64-bit.
   APInt ImmVal = Imm;
@@ -1057,26 +1011,27 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
   return std::max(1U, Cost);
 }
 
-unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                               Type *Ty) const {
+unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+                                   const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
   // There is no cost model for constants with a bit size of 0. Return TCC_Free
   // here, so that constant hoisting will ignore this constant.
   if (BitSize == 0)
-    return TCC_Free;
+    return TTI::TCC_Free;
 
   unsigned ImmIdx = ~0U;
   switch (Opcode) {
-  default: return TCC_Free;
+  default:
+    return TTI::TCC_Free;
   case Instruction::GetElementPtr:
     // Always hoist the base address of a GetElementPtr. This prevents the
     // creation of new constants for every base constant that gets constant
     // folded with the offset.
     if (Idx == 0)
-      return 2 * TCC_Basic;
-    return TCC_Free;
+      return 2 * TTI::TCC_Basic;
+    return TTI::TCC_Free;
   case Instruction::Store:
     ImmIdx = 0;
     break;
@@ -1098,7 +1053,7 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
   case Instruction::LShr:
   case Instruction::AShr:
     if (Idx == 1)
-      return TCC_Free;
+      return TTI::TCC_Free;
     break;
   case Instruction::Trunc:
   case Instruction::ZExt:
@@ -1116,27 +1071,28 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
 
   if (Idx == ImmIdx) {
     unsigned NumConstants = (BitSize + 63) / 64;
-    unsigned Cost = X86TTI::getIntImmCost(Imm, Ty);
-    return (Cost <= NumConstants * TCC_Basic)
-      ? static_cast<unsigned>(TCC_Free)
-      : Cost;
+    unsigned Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
+    return (Cost <= NumConstants * TTI::TCC_Basic)
+               ? static_cast<unsigned>(TTI::TCC_Free)
+               : Cost;
   }
 
-  return X86TTI::getIntImmCost(Imm, Ty);
+  return X86TTIImpl::getIntImmCost(Imm, Ty);
 }
 
-unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
-                               const APInt &Imm, Type *Ty) const {
+unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                   const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
   // There is no cost model for constants with a bit size of 0. Return TCC_Free
   // here, so that constant hoisting will ignore this constant.
   if (BitSize == 0)
-    return TCC_Free;
+    return TTI::TCC_Free;
 
   switch (IID) {
-  default: return TCC_Free;
+  default:
+    return TTI::TCC_Free;
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::ssub_with_overflow:
@@ -1144,22 +1100,22 @@ unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
   case Intrinsic::smul_with_overflow:
   case Intrinsic::umul_with_overflow:
     if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
-      return TCC_Free;
+      return TTI::TCC_Free;
     break;
   case Intrinsic::experimental_stackmap:
     if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
-      return TCC_Free;
+      return TTI::TCC_Free;
     break;
   case Intrinsic::experimental_patchpoint_void:
   case Intrinsic::experimental_patchpoint_i64:
     if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
-      return TCC_Free;
+      return TTI::TCC_Free;
     break;
   }
-  return X86TTI::getIntImmCost(Imm, Ty);
+  return X86TTIImpl::getIntImmCost(Imm, Ty);
 }
 
-bool X86TTI::isLegalMaskedLoad(Type *DataTy, int Consecutive) const {
+bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) {
   int DataWidth = DataTy->getPrimitiveSizeInBits();
   
   // Todo: AVX512 allows gather/scatter, works with strided and random as well
@@ -1170,7 +1126,7 @@ bool X86TTI::isLegalMaskedLoad(Type *DataTy, int Consecutive) const {
   return false;
 }
 
-bool X86TTI::isLegalMaskedStore(Type *DataType, int Consecutive) const {
+bool X86TTIImpl::isLegalMaskedStore(Type *DataType, int Consecutive) {
   return isLegalMaskedLoad(DataType, Consecutive);
 }
 
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
new file mode 100644
index 0000000..e570bb5
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -0,0 +1,112 @@
+//===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// X86 target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
+
+#include "X86.h"
+#include "X86TargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
+  typedef BasicTTIImplBase<X86TTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const X86Subtarget *ST;
+  const X86TargetLowering *TLI;
+
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
+
+  const X86Subtarget *getST() const { return ST; }
+  const X86TargetLowering *getTLI() const { return TLI; }
+
+public:
+  explicit X86TTIImpl(const X86TargetMachine *TM, Function &F)
+      : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  X86TTIImpl(const X86TTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+  X86TTIImpl(X86TTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
+  X86TTIImpl &operator=(const X86TTIImpl &RHS) {
+    BaseT::operator=(static_cast<const BaseT &>(RHS));
+    ST = RHS.ST;
+    TLI = RHS.TLI;
+    return *this;
+  }
+  X86TTIImpl &operator=(X86TTIImpl &&RHS) {
+    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+    ST = std::move(RHS.ST);
+    TLI = std::move(RHS.TLI);
+    return *this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
+  /// @}
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  unsigned getNumberOfRegisters(bool Vector);
+  unsigned getRegisterBitWidth(bool Vector);
+  unsigned getMaxInterleaveFactor(unsigned VF);
+  unsigned getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty,
+      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+                          Type *SubTp);
+  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                           unsigned AddressSpace);
+  unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                                 unsigned AddressSpace);
+
+  unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex);
+
+  unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);
+
+  unsigned getIntImmCost(int64_t);
+
+  unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+
+  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                         Type *Ty);
+  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                         Type *Ty);
+  bool isLegalMaskedLoad(Type *DataType, int Consecutive);
+  bool isLegalMaskedStore(Type *DataType, int Consecutive);
+
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
index 5c01f3e1..6925b27 100644
--- a/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
+++ b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
@@ -9,7 +9,7 @@
 //
 // This file defines the pass which inserts x86 AVX vzeroupper instructions
 // before calls to SSE encoded functions. This avoids transition latency
-// penalty when tranfering control between AVX encoded instructions and old
+// penalty when transferring control between AVX encoded instructions and old
 // SSE encoding mode.
 //
 //===----------------------------------------------------------------------===//
@@ -171,7 +171,7 @@ void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
 }
 
 /// processBasicBlock - Loop over all of the instructions in the basic block,
-/// inserting vzero upper instructions before function calls.
+/// inserting vzeroupper instructions before function calls.
 void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
 
   // Start by assuming that the block PASS_THROUGH, which implies no unguarded
@@ -202,7 +202,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
     // If the call won't clobber any YMM register, skip it as well. It usually
     // happens on helper function calls (such as '_chkstk', '_ftol2') where
     // standard calling convention is not used (RegMask is not used to mark
-    // register clobbered and register usage (def/imp-def/use) is well-dfined
+    // register clobbered and register usage (def/imp-def/use) is well-defined
     // and explicitly specified.
     if (MI->isCall() && !callClobbersAnyYmmReg(MI))
       continue;
@@ -245,12 +245,12 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
 }
 
 /// runOnMachineFunction - Loop over all of the basic blocks, inserting
-/// vzero upper instructions before function calls.
+/// vzeroupper instructions before function calls.
 bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
-  const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>();
+  const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
   if (!ST.hasAVX() || ST.hasAVX512())
     return false;
-  TII = MF.getSubtarget().getInstrInfo();
+  TII = ST.getInstrInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   EverMadeChange = false;
 
@@ -281,8 +281,8 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
   // Process all blocks. This will compute block exit states, record the first
   // unguarded call in each block, and add successors of dirty blocks to the
   // DirtySuccessors list.
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
-    processBasicBlock(*I);
+  for (MachineBasicBlock &MBB : MF)
+    processBasicBlock(MBB);
 
   // If any YMM regs are live in to this function, add the entry block to the
   // DirtySuccessors list
diff --git a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
new file mode 100644
index 0000000..4efaada
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -0,0 +1,374 @@
+//===-- X86WinEHState - Insert EH state updates for win32 exceptions ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// All functions using an MSVC EH personality use an explicitly updated state
+// number stored in an exception registration stack object. The registration
+// object is linked into a thread-local chain of registrations stored at fs:00.
+// This pass adds the registration object and EH state updates.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "winehstate"
+
+namespace {
+class WinEHStatePass : public FunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid.
+
+  WinEHStatePass() : FunctionPass(ID) {}
+
+  bool runOnFunction(Function &Fn) override;
+
+  bool doInitialization(Module &M) override;
+
+  bool doFinalization(Module &M) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  const char *getPassName() const override {
+    return "Windows 32-bit x86 EH state insertion";
+  }
+
+private:
+  void emitExceptionRegistrationRecord(Function *F);
+
+  void linkExceptionRegistration(IRBuilder<> &Builder, Value *RegNode,
+                                 Value *Handler);
+  void unlinkExceptionRegistration(IRBuilder<> &Builder, Value *RegNode);
+
+  Value *emitEHLSDA(IRBuilder<> &Builder, Function *F);
+
+  Function *generateLSDAInEAXThunk(Function *ParentFunc);
+
+  // Module-level type getters.
+  Type *getEHRegistrationType();
+  Type *getSEH3RegistrationType();
+  Type *getSEH4RegistrationType();
+  Type *getCXXEH3RegistrationType();
+
+  // Per-module data.
+  Module *TheModule = nullptr;
+  StructType *EHRegistrationTy = nullptr;
+  StructType *CXXEH3RegistrationTy = nullptr;
+  StructType *SEH3RegistrationTy = nullptr;
+  StructType *SEH4RegistrationTy = nullptr;
+
+  // Per-function state
+  EHPersonality Personality = EHPersonality::Unknown;
+  Function *PersonalityFn = nullptr;
+};
+}
+
+FunctionPass *llvm::createX86WinEHStatePass() { return new WinEHStatePass(); }
+
+char WinEHStatePass::ID = 0;
+
+bool WinEHStatePass::doInitialization(Module &M) {
+  TheModule = &M;
+  return false;
+}
+
+bool WinEHStatePass::doFinalization(Module &M) {
+  assert(TheModule == &M);
+  TheModule = nullptr;
+  EHRegistrationTy = nullptr;
+  CXXEH3RegistrationTy = nullptr;
+  SEH3RegistrationTy = nullptr;
+  SEH4RegistrationTy = nullptr;
+  return false;
+}
+
+void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const {
+  // This pass should only insert a stack allocation, memory accesses, and
+  // framerecovers.
+  AU.setPreservesCFG();
+}
+
+bool WinEHStatePass::runOnFunction(Function &F) {
+  // If this is an outlined handler, don't do anything. We'll do state insertion
+  // for it in the parent.
+  StringRef WinEHParentName =
+      F.getFnAttribute("wineh-parent").getValueAsString();
+  if (WinEHParentName != F.getName() && !WinEHParentName.empty())
+    return false;
+
+  // Check the personality. Do nothing if this is not an MSVC personality.
+  LandingPadInst *LP = nullptr;
+  for (BasicBlock &BB : F) {
+    LP = BB.getLandingPadInst();
+    if (LP)
+      break;
+  }
+  if (!LP)
+    return false;
+  PersonalityFn =
+      dyn_cast<Function>(LP->getPersonalityFn()->stripPointerCasts());
+  if (!PersonalityFn)
+    return false;
+  Personality = classifyEHPersonality(PersonalityFn);
+  if (!isMSVCEHPersonality(Personality))
+    return false;
+
+  emitExceptionRegistrationRecord(&F);
+  // FIXME: State insertion.
+
+  // Reset per-function state.
+  PersonalityFn = nullptr;
+  Personality = EHPersonality::Unknown;
+  return true;
+}
+
+/// Get the common EH registration subobject:
+///   typedef _EXCEPTION_DISPOSITION (*PEXCEPTION_ROUTINE)(
+///       _EXCEPTION_RECORD *, void *, _CONTEXT *, void *);
+///   struct EHRegistrationNode {
+///     EHRegistrationNode *Next;
+///     PEXCEPTION_ROUTINE Handler;
+///   };
+Type *WinEHStatePass::getEHRegistrationType() {
+  if (EHRegistrationTy)
+    return EHRegistrationTy;
+  LLVMContext &Context = TheModule->getContext();
+  EHRegistrationTy = StructType::create(Context, "EHRegistrationNode");
+  Type *FieldTys[] = {
+      EHRegistrationTy->getPointerTo(0), // EHRegistrationNode *Next
+      Type::getInt8PtrTy(Context) // EXCEPTION_DISPOSITION (*Handler)(...)
+  };
+  EHRegistrationTy->setBody(FieldTys, false);
+  return EHRegistrationTy;
+}
+
+/// The __CxxFrameHandler3 registration node:
+///   struct CXXExceptionRegistration {
+///     void *SavedESP;
+///     EHRegistrationNode SubRecord;
+///     int32_t TryLevel;
+///   };
+Type *WinEHStatePass::getCXXEH3RegistrationType() {
+  if (CXXEH3RegistrationTy)
+    return CXXEH3RegistrationTy;
+  LLVMContext &Context = TheModule->getContext();
+  Type *FieldTys[] = {
+      Type::getInt8PtrTy(Context), // void *SavedESP
+      getEHRegistrationType(),     // EHRegistrationNode SubRecord
+      Type::getInt32Ty(Context)    // int32_t TryLevel
+  };
+  CXXEH3RegistrationTy =
+      StructType::create(FieldTys, "CXXExceptionRegistration");
+  return CXXEH3RegistrationTy;
+}
+
+/// The _except_handler3 registration node:
+///   struct EH3ExceptionRegistration {
+///     EHRegistrationNode SubRecord;
+///     void *ScopeTable;
+///     int32_t TryLevel;
+///   };
+Type *WinEHStatePass::getSEH3RegistrationType() {
+  if (SEH3RegistrationTy)
+    return SEH3RegistrationTy;
+  LLVMContext &Context = TheModule->getContext();
+  Type *FieldTys[] = {
+      getEHRegistrationType(),     // EHRegistrationNode SubRecord
+      Type::getInt8PtrTy(Context), // void *ScopeTable
+      Type::getInt32Ty(Context)    // int32_t TryLevel
+  };
+  SEH3RegistrationTy = StructType::create(FieldTys, "EH3ExceptionRegistration");
+  return SEH3RegistrationTy;
+}
+
+/// The _except_handler4 registration node:
+///   struct EH4ExceptionRegistration {
+///     void *SavedESP;
+///     _EXCEPTION_POINTERS *ExceptionPointers;
+///     EHRegistrationNode SubRecord;
+///     int32_t EncodedScopeTable;
+///     int32_t TryLevel;
+///   };
+Type *WinEHStatePass::getSEH4RegistrationType() {
+  if (SEH4RegistrationTy)
+    return SEH4RegistrationTy;
+  LLVMContext &Context = TheModule->getContext();
+  Type *FieldTys[] = {
+      Type::getInt8PtrTy(Context), // void *SavedESP
+      Type::getInt8PtrTy(Context), // void *ExceptionPointers
+      getEHRegistrationType(),     // EHRegistrationNode SubRecord
+      Type::getInt32Ty(Context),   // int32_t EncodedScopeTable
+      Type::getInt32Ty(Context)    // int32_t TryLevel
+  };
+  SEH4RegistrationTy = StructType::create(FieldTys, "EH4ExceptionRegistration");
+  return SEH4RegistrationTy;
+}
+
+// Emit an exception registration record. These are stack allocations with the
+// common subobject of two pointers: the previous registration record (the old
+// fs:00) and the personality function for the current frame. The data before
+// and after that is personality function specific.
+void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
+  assert(Personality == EHPersonality::MSVC_CXX ||
+         Personality == EHPersonality::MSVC_X86SEH);
+
+  StringRef PersonalityName = PersonalityFn->getName();
+  IRBuilder<> Builder(&F->getEntryBlock(), F->getEntryBlock().begin());
+  Type *Int8PtrType = Builder.getInt8PtrTy();
+  Value *SubRecord = nullptr;
+  if (PersonalityName == "__CxxFrameHandler3") {
+    Type *RegNodeTy = getCXXEH3RegistrationType();
+    Value *RegNode = Builder.CreateAlloca(RegNodeTy);
+    // FIXME: We can skip this in -GS- mode, when we figure that out.
+    // SavedESP = llvm.stacksave()
+    Value *SP = Builder.CreateCall(
+        Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {});
+    Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
+    // TryLevel = -1
+    Builder.CreateStore(Builder.getInt32(-1),
+                        Builder.CreateStructGEP(RegNodeTy, RegNode, 2));
+    // Handler = __ehhandler$F
+    Function *Trampoline = generateLSDAInEAXThunk(F);
+    SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 1);
+    linkExceptionRegistration(Builder, SubRecord, Trampoline);
+  } else if (PersonalityName == "_except_handler3") {
+    Type *RegNodeTy = getSEH3RegistrationType();
+    Value *RegNode = Builder.CreateAlloca(RegNodeTy);
+    // TryLevel = -1
+    Builder.CreateStore(Builder.getInt32(-1),
+                        Builder.CreateStructGEP(RegNodeTy, RegNode, 2));
+    // ScopeTable = llvm.x86.seh.lsda(F)
+    Value *LSDA = emitEHLSDA(Builder, F);
+    Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 1));
+    SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 0);
+    linkExceptionRegistration(Builder, SubRecord, PersonalityFn);
+  } else if (PersonalityName == "_except_handler4") {
+    Type *RegNodeTy = getSEH4RegistrationType();
+    Value *RegNode = Builder.CreateAlloca(RegNodeTy);
+    // SavedESP = llvm.stacksave()
+    Value *SP = Builder.CreateCall(
+        Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {});
+    Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
+    // TryLevel = -2
+    Builder.CreateStore(Builder.getInt32(-2),
+                        Builder.CreateStructGEP(RegNodeTy, RegNode, 4));
+    // FIXME: XOR the LSDA with __security_cookie.
+    // ScopeTable = llvm.x86.seh.lsda(F)
+    Value *FI8 = Builder.CreateBitCast(F, Int8PtrType);
+    Value *LSDA = Builder.CreateCall(
+        Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_lsda), FI8);
+    Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 1));
+    SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 2);
+    linkExceptionRegistration(Builder, SubRecord, PersonalityFn);
+  } else {
+    llvm_unreachable("unexpected personality function");
+  }
+
+  // FIXME: Insert an unlink before all returns.
+  for (BasicBlock &BB : *F) {
+    TerminatorInst *T = BB.getTerminator();
+    if (!isa<ReturnInst>(T))
+      continue;
+    Builder.SetInsertPoint(T);
+    unlinkExceptionRegistration(Builder, SubRecord);
+  }
+}
+
+Value *WinEHStatePass::emitEHLSDA(IRBuilder<> &Builder, Function *F) {
+  Value *FI8 = Builder.CreateBitCast(F, Type::getInt8PtrTy(F->getContext()));
+  return Builder.CreateCall(
+      Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_lsda), FI8);
+}
+
+/// Generate a thunk that puts the LSDA of ParentFunc in EAX and then calls
+/// PersonalityFn, forwarding the parameters passed to PEXCEPTION_ROUTINE:
+///   typedef _EXCEPTION_DISPOSITION (*PEXCEPTION_ROUTINE)(
+///       _EXCEPTION_RECORD *, void *, _CONTEXT *, void *);
+/// We essentially want this code:
+///   movl $lsda, %eax
+///   jmpl ___CxxFrameHandler3
+Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
+  LLVMContext &Context = ParentFunc->getContext();
+  Type *Int32Ty = Type::getInt32Ty(Context);
+  Type *Int8PtrType = Type::getInt8PtrTy(Context);
+  Type *ArgTys[5] = {Int8PtrType, Int8PtrType, Int8PtrType, Int8PtrType,
+                     Int8PtrType};
+  FunctionType *TrampolineTy =
+      FunctionType::get(Int32Ty, makeArrayRef(&ArgTys[0], 4),
+                        /*isVarArg=*/false);
+  FunctionType *TargetFuncTy =
+      FunctionType::get(Int32Ty, makeArrayRef(&ArgTys[0], 5),
+                        /*isVarArg=*/false);
+  Function *Trampoline = Function::Create(
+      TrampolineTy, GlobalValue::InternalLinkage,
+      Twine("__ehhandler$") + ParentFunc->getName(), TheModule);
+  BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", Trampoline);
+  IRBuilder<> Builder(EntryBB);
+  Value *LSDA = emitEHLSDA(Builder, ParentFunc);
+  Value *CastPersonality =
+      Builder.CreateBitCast(PersonalityFn, TargetFuncTy->getPointerTo());
+  auto AI = Trampoline->arg_begin();
+  Value *Args[5] = {LSDA, AI++, AI++, AI++, AI++};
+  CallInst *Call = Builder.CreateCall(CastPersonality, Args);
+  // Can't use musttail due to prototype mismatch, but we can use tail.
+  Call->setTailCall(true);
+  // Set inreg so we pass it in EAX.
+  Call->addAttribute(1, Attribute::InReg);
+  Builder.CreateRet(Call);
+  return Trampoline;
+}
+
+void WinEHStatePass::linkExceptionRegistration(IRBuilder<> &Builder,
+                                               Value *RegNode, Value *Handler) {
+  Type *RegNodeTy = getEHRegistrationType();
+  // Handler = Handler
+  Handler = Builder.CreateBitCast(Handler, Builder.getInt8PtrTy());
+  Builder.CreateStore(Handler, Builder.CreateStructGEP(RegNodeTy, RegNode, 1));
+  // Next = [fs:00]
+  Constant *FSZero =
+      Constant::getNullValue(RegNodeTy->getPointerTo()->getPointerTo(257));
+  Value *Next = Builder.CreateLoad(FSZero);
+  Builder.CreateStore(Next, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
+  // [fs:00] = RegNode
+  Builder.CreateStore(RegNode, FSZero);
+}
+
+void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder,
+                                                 Value *RegNode) {
+  // Clone RegNode into the current BB for better address mode folding.
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(RegNode)) {
+    GEP = cast<GetElementPtrInst>(GEP->clone());
+    Builder.Insert(GEP);
+    RegNode = GEP;
+  }
+  Type *RegNodeTy = getEHRegistrationType();
+  // [fs:00] = RegNode->Next
+  Value *Next =
+      Builder.CreateLoad(Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
+  Constant *FSZero =
+      Constant::getNullValue(RegNodeTy->getPointerTo()->getPointerTo(257));
+  Builder.CreateStore(Next, FSZero);
+}
diff --git a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 640e6b0..2e44ac9 100644
--- a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -204,7 +204,7 @@ static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
   if (RegNo > 11)
     return MCDisassembler::Fail;
   unsigned Reg = getReg(Decoder, XCore::GRRegsRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -216,7 +216,7 @@ static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
   if (RegNo > 15)
     return MCDisassembler::Fail;
   unsigned Reg = getReg(Decoder, XCore::RRegsRegClassID, RegNo);
-  Inst.addOperand(MCOperand::CreateReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Reg));
   return MCDisassembler::Success;
 }
 
@@ -227,13 +227,13 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
   static unsigned Values[] = {
     32 /*bpw*/, 1, 2, 3, 4, 5, 6, 7, 8, 16, 24, 32
   };
-  Inst.addOperand(MCOperand::CreateImm(Values[Val]));
+  Inst.addOperand(MCOperand::createImm(Values[Val]));
   return MCDisassembler::Success;
 }
 
 static DecodeStatus DecodeNegImmOperand(MCInst &Inst, unsigned Val,
                                         uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(-(int64_t)Val));
+  Inst.addOperand(MCOperand::createImm(-(int64_t)Val));
   return MCDisassembler::Success;
 }
 
@@ -362,7 +362,7 @@ Decode2RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
   if (S != MCDisassembler::Success)
     return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
 
-  Inst.addOperand(MCOperand::CreateImm(Op1));
+  Inst.addOperand(MCOperand::createImm(Op1));
   DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
   return S;
 }
@@ -403,7 +403,7 @@ DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
     return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
 
   DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
-  Inst.addOperand(MCOperand::CreateImm(Op2));
+  Inst.addOperand(MCOperand::createImm(Op2));
   return S;
 }
 
@@ -552,7 +552,7 @@ Decode3RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
   unsigned Op1, Op2, Op3;
   DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
   if (S == MCDisassembler::Success) {
-    Inst.addOperand(MCOperand::CreateImm(Op1));
+    Inst.addOperand(MCOperand::createImm(Op1));
     DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
     DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
   }
@@ -567,7 +567,7 @@ Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
   if (S == MCDisassembler::Success) {
     DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
     DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
-    Inst.addOperand(MCOperand::CreateImm(Op3));
+    Inst.addOperand(MCOperand::createImm(Op3));
   }
   return S;
 }
@@ -623,7 +623,7 @@ DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
   if (S == MCDisassembler::Success) {
     DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
     DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
-    Inst.addOperand(MCOperand::CreateImm(Op3));
+    Inst.addOperand(MCOperand::createImm(Op3));
   }
   return S;
 }
diff --git a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
index 215fe89..36b3b02 100644
--- a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -30,7 +30,7 @@ void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                 StringRef Annot) {
+                                 StringRef Annot, const MCSubtargetInfo &STI) {
   printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
diff --git a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
index 78521fd..6fd2dec 100644
--- a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -32,7 +32,8 @@ public:
   static const char *getRegisterName(unsigned RegNo);
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 private:
   void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
   void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 4073549..ce0d39f 100644
--- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -77,16 +77,15 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   if (CM != CodeModel::Small && CM != CodeModel::Large)
     report_fatal_error("Target only supports CodeModel Small or Large");
 
-  X->InitMCCodeGenInfo(RM, CM, OL);
+  X->initMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
-static MCInstPrinter *createXCoreMCInstPrinter(const Target &T,
+static MCInstPrinter *createXCoreMCInstPrinter(const Triple &T,
                                                unsigned SyntaxVariant,
                                                const MCAsmInfo &MAI,
                                                const MCInstrInfo &MII,
-                                               const MCRegisterInfo &MRI,
-                                               const MCSubtargetInfo &STI) {
+                                               const MCRegisterInfo &MRI) {
   return new XCoreInstPrinter(MAI, MII, MRI);
 }
 
@@ -126,15 +125,11 @@ void XCoreTargetAsmStreamer::emitCCBottomFunction(StringRef Name) {
 }
 }
 
-static MCStreamer *
-createXCoreMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
-                         bool isVerboseAsm, bool useDwarfDirectory,
-                         MCInstPrinter *InstPrint, MCCodeEmitter *CE,
-                         MCAsmBackend *TAB, bool ShowInst) {
-  MCStreamer *S = llvm::createAsmStreamer(
-      Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
-  new XCoreTargetAsmStreamer(*S, OS);
-  return S;
+static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
+                                                 formatted_raw_ostream &OS,
+                                                 MCInstPrinter *InstPrint,
+                                                 bool isVerboseAsm) {
+  return new XCoreTargetAsmStreamer(S, OS);
 }
 
 // Force static initialization.
@@ -160,5 +155,6 @@ extern "C" void LLVMInitializeXCoreTargetMC() {
   TargetRegistry::RegisterMCInstPrinter(TheXCoreTarget,
                                         createXCoreMCInstPrinter);
 
-  TargetRegistry::RegisterAsmStreamer(TheXCoreTarget, createXCoreMCAsmStreamer);
+  TargetRegistry::RegisterAsmTargetStreamer(TheXCoreTarget,
+                                            createTargetAsmStreamer);
 }
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index 0ff5961..28e0275 100644
--- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
 #define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
 
+#include "llvm/Support/DataTypes.h"
+
 namespace llvm {
 class Target;
 
diff --git a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
index 00e34e0..c78cde9 100644
--- a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -14,6 +14,6 @@ using namespace llvm;
 
 Target llvm::TheXCoreTarget;
 
-extern "C" void LLVMInitializeXCoreTargetInfo() { 
+extern "C" void LLVMInitializeXCoreTargetInfo() {
   RegisterTarget<Triple::xcore> X(TheXCoreTarget, "xcore", "XCore");
 }
diff --git a/contrib/llvm/lib/Target/XCore/XCore.h b/contrib/llvm/lib/Target/XCore/XCore.h
index 140ba2a..ba6ca84 100644
--- a/contrib/llvm/lib/Target/XCore/XCore.h
+++ b/contrib/llvm/lib/Target/XCore/XCore.h
@@ -32,8 +32,6 @@ namespace llvm {
                                    CodeGenOpt::Level OptLevel);
   ModulePass *createXCoreLowerThreadLocalPass();
 
-  ImmutablePass *createXCoreTargetTransformInfoPass(const XCoreTargetMachine *TM);
-
 } // end namespace llvm;
 
 #endif
diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 82e4e36..23e24f2 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -50,14 +50,13 @@ using namespace llvm;
 
 namespace {
   class XCoreAsmPrinter : public AsmPrinter {
-    const XCoreSubtarget &Subtarget;
     XCoreMCInstLower MCInstLowering;
     XCoreTargetStreamer &getTargetStreamer();
 
   public:
-    explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()),
-        MCInstLowering(*this) {}
+    explicit XCoreAsmPrinter(TargetMachine &TM,
+                             std::unique_ptr<MCStreamer> Streamer)
+        : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(*this) {}
 
     const char *getPassName() const override {
       return "XCore Assembly Printer";
@@ -87,7 +86,7 @@ namespace {
 } // end of anonymous namespace
 
 XCoreTargetStreamer &XCoreAsmPrinter::getTargetStreamer() {
-  return static_cast<XCoreTargetStreamer&>(*OutStreamer.getTargetStreamer());
+  return static_cast<XCoreTargetStreamer&>(*OutStreamer->getTargetStreamer());
 }
 
 void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
@@ -97,16 +96,15 @@ void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
   if (ArrayType *ATy = dyn_cast<ArrayType>(
                         cast<PointerType>(GV->getType())->getElementType())) {
 
-    MCSymbol *SymGlob = OutContext.GetOrCreateSymbol(
+    MCSymbol *SymGlob = OutContext.getOrCreateSymbol(
                           Twine(Sym->getName() + StringRef(".globound")));
-    OutStreamer.EmitSymbolAttribute(SymGlob, MCSA_Global);
-    OutStreamer.EmitAssignment(SymGlob,
-                               MCConstantExpr::Create(ATy->getNumElements(),
-                                                      OutContext));
+    OutStreamer->EmitSymbolAttribute(SymGlob, MCSA_Global);
+    OutStreamer->EmitAssignment(SymGlob,
+                                MCConstantExpr::Create(ATy->getNumElements(),
+                                                       OutContext));
     if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
         GV->hasCommonLinkage()) {
-      // TODO Use COMDAT groups for LinkOnceLinkage
-      OutStreamer.EmitSymbolAttribute(SymGlob, MCSA_Weak);
+      OutStreamer->EmitSymbolAttribute(SymGlob, MCSA_Weak);
     }
   }
 }
@@ -117,8 +115,8 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
       EmitSpecialLLVMGlobal(GV))
     return;
 
-  const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
-  OutStreamer.SwitchSection(
+  const DataLayout *TD = TM.getDataLayout();
+  OutStreamer->SwitchSection(
       getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
 
   MCSymbol *GVSym = getSymbol(GV);
@@ -138,12 +136,11 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   case GlobalValue::ExternalLinkage:
   case GlobalValue::CommonLinkage:
     emitArrayBound(GVSym, GV);
-    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+    OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
 
-    // TODO Use COMDAT groups for LinkOnceLinkage
     if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
         GV->hasCommonLinkage())
-      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+      OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Weak);
     // FALL THROUGH
   case GlobalValue::InternalLinkage:
   case GlobalValue::PrivateLinkage:
@@ -159,16 +156,16 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   }
   unsigned Size = TD->getTypeAllocSize(C->getType());
   if (MAI->hasDotTypeDotSizeDirective()) {
-    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
-    OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
+    OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+    OutStreamer->EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
   }
-  OutStreamer.EmitLabel(GVSym);
+  OutStreamer->EmitLabel(GVSym);
   
   EmitGlobalConstant(C);
   // The ABI requires that unsigned scalar types smaller than 32 bits
   // are padded to 32 bits.
   if (Size < 4)
-    OutStreamer.EmitZeros(4 - Size);
+    OutStreamer->EmitZeros(4 - Size);
   
   // Mark the end of the global
   getTargetStreamer().emitCCBottomData(GVSym->getName());
@@ -188,7 +185,7 @@ void XCoreAsmPrinter::EmitFunctionBodyEnd() {
 void XCoreAsmPrinter::EmitFunctionEntryLabel() {
   // Mark the start of the function
   getTargetStreamer().emitCCTopFunction(CurrentFnSym->getName());
-  OutStreamer.EmitLabel(CurrentFnSym);
+  OutStreamer->EmitLabel(CurrentFnSym);
 }
 
 void XCoreAsmPrinter::
@@ -210,7 +207,7 @@ printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
 
 void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                    raw_ostream &O) {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
   const MachineOperand &MO = MI->getOperand(opNum);
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
@@ -278,7 +275,7 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       O << "\tmov "
         << XCoreInstPrinter::getRegisterName(MI->getOperand(0).getReg()) << ", "
         << XCoreInstPrinter::getRegisterName(MI->getOperand(1).getReg());
-      OutStreamer.EmitRawText(O.str());
+      OutStreamer->EmitRawText(O.str());
       return;
     }
     break;
@@ -291,14 +288,14 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     else
       printInlineJT32(MI, 0, O);
     O << '\n';
-    OutStreamer.EmitRawText(O.str());
+    OutStreamer->EmitRawText(O.str());
     return;
   }
 
   MCInst TmpInst;
   MCInstLowering.Lower(MI, TmpInst);
 
-  EmitToStreamer(OutStreamer, TmpInst);
+  EmitToStreamer(*OutStreamer, TmpInst);
 }
 
 // Force static initialization.
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
index 7c74340..bd834cc 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -220,14 +220,14 @@ bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
          MF.getFrameInfo()->hasVarSizedObjects();
 }
 
-void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
+                                      MachineBasicBlock &MBB) const {
+  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineModuleInfo *MMI = &MF.getMMI();
   const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
-  const XCoreInstrInfo &TII =
-      *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const XCoreInstrInfo &TII = *MF.getSubtarget<XCoreSubtarget>().getInstrInfo();
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
@@ -341,8 +341,7 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
                                      MachineBasicBlock &MBB) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  const XCoreInstrInfo &TII =
-      *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const XCoreInstrInfo &TII = *MF.getSubtarget<XCoreSubtarget>().getInstrInfo();
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
   DebugLoc dl = MBBI->getDebugLoc();
   unsigned RetOpcode = MBBI->getOpcode();
@@ -480,8 +479,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 void XCoreFrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  const XCoreInstrInfo &TII =
-      *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const XCoreInstrInfo &TII = *MF.getSubtarget<XCoreSubtarget>().getInstrInfo();
   if (!hasReservedCallFrame(MF)) {
     // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
     // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
index 7b169c2..607c772 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
@@ -27,7 +27,8 @@ namespace llvm {
 
     /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
     /// the function.
-    void emitPrologue(MachineFunction &MF) const override;
+    void emitPrologue(MachineFunction &MF,
+                      MachineBasicBlock &MBB) const override;
     void emitEpilogue(MachineFunction &MF,
                       MachineBasicBlock &MBB) const override;
 
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 86bc6f2..f5b180b 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -37,20 +37,18 @@ using namespace llvm;
 ///
 namespace {
   class XCoreDAGToDAGISel : public SelectionDAGISel {
-    const XCoreSubtarget &Subtarget;
 
   public:
     XCoreDAGToDAGISel(XCoreTargetMachine &TM, CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(TM, OptLevel),
-        Subtarget(*TM.getSubtargetImpl()) { }
+      : SelectionDAGISel(TM, OptLevel) {}
 
     SDNode *Select(SDNode *N) override;
     SDNode *SelectBRIND(SDNode *N);
 
     /// getI32Imm - Return a target constant with the specified value, of type
     /// i32.
-    inline SDValue getI32Imm(unsigned Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
+      return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
     }
 
     inline bool immMskBitp(SDNode *inN) const {
@@ -67,7 +65,7 @@ namespace {
     // Complex Pattern Selectors.
     bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
 
-    bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+    bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                                       std::vector<SDValue> &OutOps) override;
 
     const char *getPassName() const override {
@@ -92,7 +90,7 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
   FrameIndexSDNode *FIN = nullptr;
   if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
     return true;
   }
   if (Addr.getOpcode() == ISD::ADD) {
@@ -102,7 +100,8 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
       && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
       // Constant positive word offset from frame index
       Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(Addr),
+                                         MVT::i32);
       return true;
     }
   }
@@ -110,12 +109,12 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
 }
 
 bool XCoreDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                              std::vector<SDValue> &OutOps) {
   SDValue Reg;
-  switch (ConstraintCode) {
+  switch (ConstraintID) {
   default: return true;
-  case 'm': // Memory.
+  case InlineAsm::Constraint_m: // Memory.
     switch (Op.getOpcode()) {
     default: return true;
     case XCoreISD::CPRelativeWrapper:
@@ -140,7 +139,7 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
     if (immMskBitp(N)) {
       // Transformation function: get the size of a mask
       // Look for the first non-zero bit
-      SDValue MskSize = getI32Imm(32 - countLeadingZeros((uint32_t)Val));
+      SDValue MskSize = getI32Imm(32 - countLeadingZeros((uint32_t)Val), dl);
       return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
                                     MVT::i32, MskSize);
     }
@@ -258,7 +257,7 @@ SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) {
   // after with clrsr 1. If any resources owned by the thread are ready an event
   // will be taken. If no resource is ready we branch to the address which was
   // the operand to the checkevent intrinsic.
-  SDValue constOne = getI32Imm(1);
+  SDValue constOne = getI32Imm(1, dl);
   SDValue Glue =
     SDValue(CurDAG->getMachineNode(XCore::SETSR_branch_u6, dl, MVT::Glue,
                                    constOne, Chain), 0);
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 51e4d03..f56caec 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -44,8 +44,9 @@ using namespace llvm;
 const char *XCoreTargetLowering::
 getTargetNodeName(unsigned Opcode) const
 {
-  switch (Opcode)
+  switch ((XCoreISD::NodeType)Opcode)
   {
+    case XCoreISD::FIRST_NUMBER      : break;
     case XCoreISD::BL                : return "XCoreISD::BL";
     case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper";
     case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper";
@@ -64,19 +65,19 @@ getTargetNodeName(unsigned Opcode) const
     case XCoreISD::FRAME_TO_ARGS_OFFSET : return "XCoreISD::FRAME_TO_ARGS_OFFSET";
     case XCoreISD::EH_RETURN         : return "XCoreISD::EH_RETURN";
     case XCoreISD::MEMBARRIER        : return "XCoreISD::MEMBARRIER";
-    default                          : return nullptr;
   }
+  return nullptr;
 }
 
-XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM)
-    : TargetLowering(TM), TM(TM),
-      Subtarget(TM.getSubtarget<XCoreSubtarget>()) {
+XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM,
+                                         const XCoreSubtarget &Subtarget)
+    : TargetLowering(TM), TM(TM), Subtarget(Subtarget) {
 
   // Set up the register classes.
   addRegisterClass(MVT::i32, &XCore::GRRegsRegClass);
 
   // Compute derived properties from the register classes
-  computeRegisterProperties();
+  computeRegisterProperties(Subtarget.getRegisterInfo());
 
   // Division is expensive
   setIntDivIsCheap(false);
@@ -298,7 +299,7 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
     GA = getGlobalAddressWrapper(GA, GV, DAG);
     // Handle the rest of the offset.
     if (Offset != FoldedOffset) {
-      SDValue Remaining = DAG.getConstant(Offset - FoldedOffset, MVT::i32);
+      SDValue Remaining = DAG.getConstant(Offset - FoldedOffset, DL, MVT::i32);
       GA = DAG.getNode(ISD::ADD, DL, MVT::i32, GA, Remaining);
     }
     return GA;
@@ -308,7 +309,8 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
     Constant *GA = ConstantExpr::getBitCast(const_cast<GlobalValue*>(GV), Ty);
     Ty = Type::getInt32Ty(*DAG.getContext());
     Constant *Idx = ConstantInt::get(Ty, Offset);
-    Constant *GAI = ConstantExpr::getGetElementPtr(GA, Idx);
+    Constant *GAI = ConstantExpr::getGetElementPtr(
+        Type::getInt8Ty(*DAG.getContext()), GA, Idx);
     SDValue CP = DAG.getConstantPool(GAI, MVT::i32);
     return DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), CP,
                        MachinePointerInfo(), false, false, false, 0);
@@ -367,7 +369,7 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
   }
   assert((NumEntries >> 31) == 0);
   SDValue ScaledIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
-                                    DAG.getConstant(1, MVT::i32));
+                                    DAG.getConstant(1, dl, MVT::i32));
   return DAG.getNode(XCoreISD::BR_JT32, dl, MVT::Other, Chain, TargetJT,
                      ScaledIndex);
 }
@@ -392,12 +394,12 @@ lowerLoadWordFromAlignedBasePlusOffset(SDLoc DL, SDValue Chain, SDValue Base,
                                     HighOffset);
   } else {
     LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base,
-                          DAG.getConstant(LowOffset, MVT::i32));
+                          DAG.getConstant(LowOffset, DL, MVT::i32));
     HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base,
-                           DAG.getConstant(HighOffset, MVT::i32));
+                           DAG.getConstant(HighOffset, DL, MVT::i32));
   }
-  SDValue LowShift = DAG.getConstant((Offset - LowOffset) * 8, MVT::i32);
-  SDValue HighShift = DAG.getConstant((HighOffset - Offset) * 8, MVT::i32);
+  SDValue LowShift = DAG.getConstant((Offset - LowOffset) * 8, DL, MVT::i32);
+  SDValue HighShift = DAG.getConstant((HighOffset - Offset) * 8, DL, MVT::i32);
 
   SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
                             LowAddr, MachinePointerInfo(),
@@ -468,14 +470,14 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
                                  LD->isVolatile(), LD->isNonTemporal(),
                                  LD->isInvariant(), 2);
     SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
-                                   DAG.getConstant(2, MVT::i32));
+                                   DAG.getConstant(2, DL, MVT::i32));
     SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
                                   HighAddr,
                                   LD->getPointerInfo().getWithOffset(2),
                                   MVT::i16, LD->isVolatile(),
                                   LD->isNonTemporal(), LD->isInvariant(), 2);
     SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High,
-                                      DAG.getConstant(16, MVT::i32));
+                                      DAG.getConstant(16, DL, MVT::i32));
     SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted);
     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
                              High.getValue(1));
@@ -528,13 +530,13 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
   if (ST->getAlignment() == 2) {
     SDValue Low = Value;
     SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
-                                      DAG.getConstant(16, MVT::i32));
+                                      DAG.getConstant(16, dl, MVT::i32));
     SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
                                          ST->getPointerInfo(), MVT::i16,
                                          ST->isVolatile(), ST->isNonTemporal(),
                                          2);
     SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
-                                   DAG.getConstant(2, MVT::i32));
+                                   DAG.getConstant(2, dl, MVT::i32));
     SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
                                           ST->getPointerInfo().getWithOffset(2),
                                           MVT::i16, ST->isVolatile(),
@@ -572,7 +574,7 @@ LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
   SDLoc dl(Op);
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
-  SDValue Zero = DAG.getConstant(0, MVT::i32);
+  SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
   SDValue Hi = DAG.getNode(XCoreISD::MACCS, dl,
                            DAG.getVTList(MVT::i32, MVT::i32), Zero, Zero,
                            LHS, RHS);
@@ -589,7 +591,7 @@ LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
   SDLoc dl(Op);
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
-  SDValue Zero = DAG.getConstant(0, MVT::i32);
+  SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
   SDValue Hi = DAG.getNode(XCoreISD::LMUL, dl,
                            DAG.getVTList(MVT::i32, MVT::i32), LHS, RHS,
                            Zero, Zero);
@@ -674,13 +676,13 @@ TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) const
   SDLoc dl(N);
   SDValue LL, RL, AddendL, AddendH;
   LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                   Mul.getOperand(0),  DAG.getConstant(0, MVT::i32));
+                   Mul.getOperand(0), DAG.getConstant(0, dl, MVT::i32));
   RL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                   Mul.getOperand(1),  DAG.getConstant(0, MVT::i32));
+                   Mul.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
   AddendL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                        Other,  DAG.getConstant(0, MVT::i32));
+                        Other, DAG.getConstant(0, dl, MVT::i32));
   AddendH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                        Other,  DAG.getConstant(1, MVT::i32));
+                        Other, DAG.getConstant(1, dl, MVT::i32));
   APInt HighMask = APInt::getHighBitsSet(64, 32);
   unsigned LHSSB = DAG.ComputeNumSignBits(Mul.getOperand(0));
   unsigned RHSSB = DAG.ComputeNumSignBits(Mul.getOperand(1));
@@ -703,9 +705,9 @@ TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) const
   }
   SDValue LH, RH;
   LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                   Mul.getOperand(0),  DAG.getConstant(1, MVT::i32));
+                   Mul.getOperand(0), DAG.getConstant(1, dl, MVT::i32));
   RH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                   Mul.getOperand(1),  DAG.getConstant(1, MVT::i32));
+                   Mul.getOperand(1), DAG.getConstant(1, dl, MVT::i32));
   SDValue Hi = DAG.getNode(XCoreISD::MACCU, dl,
                            DAG.getVTList(MVT::i32, MVT::i32), AddendH,
                            AddendL, LL, RL);
@@ -734,18 +736,22 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
 
   // Extract components
   SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                            N->getOperand(0),  DAG.getConstant(0, MVT::i32));
+                             N->getOperand(0),
+                             DAG.getConstant(0, dl, MVT::i32));
   SDValue LHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                            N->getOperand(0),  DAG.getConstant(1, MVT::i32));
+                             N->getOperand(0),
+                             DAG.getConstant(1, dl, MVT::i32));
   SDValue RHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                             N->getOperand(1), DAG.getConstant(0, MVT::i32));
+                             N->getOperand(1),
+                             DAG.getConstant(0, dl, MVT::i32));
   SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                             N->getOperand(1), DAG.getConstant(1, MVT::i32));
+                             N->getOperand(1),
+                             DAG.getConstant(1, dl, MVT::i32));
 
   // Expand
   unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD :
                                                    XCoreISD::LSUB;
-  SDValue Zero = DAG.getConstant(0, MVT::i32);
+  SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
   SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
                            LHSL, RHSL, Zero);
   SDValue Carry(Lo.getNode(), 1);
@@ -774,7 +780,8 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const
                                false, false, false, 0);
   // Increment the pointer, VAList, to the next vararg
   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAList,
-                                DAG.getIntPtrConstant(VT.getSizeInBits() / 8));
+                                DAG.getIntPtrConstant(VT.getSizeInBits() / 8,
+                                                      dl));
   // Store the incremented VAList to the legalized pointer
   InChain = DAG.getStore(VAList.getValue(1), dl, nextPtr, VAListPtr,
                          MachinePointerInfo(SV), false, false, 0);
@@ -807,8 +814,7 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
     return SDValue();
 
   MachineFunction &MF = DAG.getMachineFunction();
-  const TargetRegisterInfo *RegInfo =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op),
                             RegInfo->getFrameRegister(MF), MVT::i32);
 }
@@ -854,8 +860,7 @@ LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
 
   // Absolute SP = (FP + FrameToArgs) + Offset
-  const TargetRegisterInfo *RegInfo =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   SDValue Stack = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
                             RegInfo->getFrameRegister(MF), MVT::i32);
   SDValue FrameToArgs = DAG.getNode(XCoreISD::FRAME_TO_ARGS_OFFSET, dl,
@@ -911,30 +916,30 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
   SDValue Addr = Trmp;
 
   SDLoc dl(Op);
-  OutChains[0] = DAG.getStore(Chain, dl, DAG.getConstant(0x0a3cd805, MVT::i32),
-                              Addr, MachinePointerInfo(TrmpAddr), false, false,
-                              0);
+  OutChains[0] = DAG.getStore(Chain, dl,
+                              DAG.getConstant(0x0a3cd805, dl, MVT::i32), Addr,
+                              MachinePointerInfo(TrmpAddr), false, false, 0);
 
   Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
-                     DAG.getConstant(4, MVT::i32));
-  OutChains[1] = DAG.getStore(Chain, dl, DAG.getConstant(0xd80456c0, MVT::i32),
-                              Addr, MachinePointerInfo(TrmpAddr, 4), false,
-                              false, 0);
+                     DAG.getConstant(4, dl, MVT::i32));
+  OutChains[1] = DAG.getStore(Chain, dl,
+                              DAG.getConstant(0xd80456c0, dl, MVT::i32), Addr,
+                              MachinePointerInfo(TrmpAddr, 4), false, false, 0);
 
   Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
-                     DAG.getConstant(8, MVT::i32));
-  OutChains[2] = DAG.getStore(Chain, dl, DAG.getConstant(0x27fb0a3c, MVT::i32),
-                              Addr, MachinePointerInfo(TrmpAddr, 8), false,
-                              false, 0);
+                     DAG.getConstant(8, dl, MVT::i32));
+  OutChains[2] = DAG.getStore(Chain, dl,
+                              DAG.getConstant(0x27fb0a3c, dl, MVT::i32), Addr,
+                              MachinePointerInfo(TrmpAddr, 8), false, false, 0);
 
   Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
-                     DAG.getConstant(12, MVT::i32));
+                     DAG.getConstant(12, dl, MVT::i32));
   OutChains[3] = DAG.getStore(Chain, dl, Nest, Addr,
                               MachinePointerInfo(TrmpAddr, 12), false, false,
                               0);
 
   Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
-                     DAG.getConstant(16, MVT::i32));
+                     DAG.getConstant(16, dl, MVT::i32));
   OutChains[4] = DAG.getStore(Chain, dl, FPtr, Addr,
                               MachinePointerInfo(TrmpAddr, 16), false, false,
                               0);
@@ -1097,7 +1102,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag,
     int offset = ResultMemLocs[i].first;
     unsigned index = ResultMemLocs[i].second;
     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
-    SDValue Ops[] = { Chain, DAG.getConstant(offset / 4, MVT::i32) };
+    SDValue Ops[] = { Chain, DAG.getConstant(offset / 4, dl, MVT::i32) };
     SDValue load = DAG.getNode(XCoreISD::LDWSP, dl, VTs, Ops);
     InVals[index] = load;
     MemOpChains.push_back(load.getValue(1));
@@ -1146,7 +1151,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = RetCCInfo.getNextStackOffset();
 
-  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
+  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, dl,
                                  getPointerTy(), true), dl);
 
   SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
@@ -1183,7 +1188,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
 
       MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other,
                                         Chain, Arg,
-                                        DAG.getConstant(Offset/4, MVT::i32)));
+                                        DAG.getConstant(Offset/4, dl,
+                                                        MVT::i32)));
     }
   }
 
@@ -1234,8 +1240,9 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
 
   // Create the CALLSEQ_END node.
   Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getConstant(NumBytes, getPointerTy(), true),
-                             DAG.getConstant(0, getPointerTy(), true),
+                             DAG.getConstant(NumBytes, dl, getPointerTy(),
+                                             true),
+                             DAG.getConstant(0, dl, getPointerTy(), true),
                              InFlag, dl);
   InFlag = Chain.getValue(1);
 
@@ -1373,8 +1380,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
       XCore::R0, XCore::R1, XCore::R2, XCore::R3
     };
     XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
-    unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs,
-                                                     array_lengthof(ArgRegs));
+    unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs);
     if (FirstVAReg < array_lengthof(ArgRegs)) {
       int offset = 0;
       // Save remaining registers, storing higher register numbers at a higher
@@ -1424,8 +1430,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
       InVals.push_back(FIN);
       MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV,
-                                     DAG.getConstant(Size, MVT::i32),
-                                     Align, false, false,
+                                     DAG.getConstant(Size, dl, MVT::i32),
+                                     Align, false, false, false,
                                      MachinePointerInfo(),
                                      MachinePointerInfo()));
     } else {
@@ -1489,7 +1495,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
   SmallVector<SDValue, 4> RetOps(1, Chain);
 
   // Return on XCore is always a "retsp 0"
-  RetOps.push_back(DAG.getConstant(0, MVT::i32));
+  RetOps.push_back(DAG.getConstant(0, dl, MVT::i32));
 
   SmallVector<SDValue, 4> MemOpChains;
   // Handle return values that must be copied to memory.
@@ -1550,8 +1556,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
 MachineBasicBlock *
 XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                  MachineBasicBlock *BB) const {
-  const TargetInstrInfo &TII =
-      *getTargetMachine().getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
   assert((MI->getOpcode() == XCore::SELECT_CC) &&
          "Unexpected instr type to insert");
@@ -1674,9 +1679,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
 
     // fold (ladd 0, 0, x) -> 0, x & 1
     if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
-      SDValue Carry = DAG.getConstant(0, VT);
+      SDValue Carry = DAG.getConstant(0, dl, VT);
       SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2,
-                                   DAG.getConstant(1, VT));
+                                   DAG.getConstant(1, dl, VT));
       SDValue Ops[] = { Result, Carry };
       return DAG.getMergeValues(Ops, dl);
     }
@@ -1689,7 +1694,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
                                          VT.getSizeInBits() - 1);
       DAG.computeKnownBits(N2, KnownZero, KnownOne);
       if ((KnownZero & Mask) == Mask) {
-        SDValue Carry = DAG.getConstant(0, VT);
+        SDValue Carry = DAG.getConstant(0, dl, VT);
         SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
         SDValue Ops[] = { Result, Carry };
         return DAG.getMergeValues(Ops, dl);
@@ -1714,7 +1719,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
       if ((KnownZero & Mask) == Mask) {
         SDValue Borrow = N2;
         SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
-                                     DAG.getConstant(0, VT), N2);
+                                     DAG.getConstant(0, dl, VT), N2);
         SDValue Ops[] = { Result, Borrow };
         return DAG.getMergeValues(Ops, dl);
       }
@@ -1728,7 +1733,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
                                          VT.getSizeInBits() - 1);
       DAG.computeKnownBits(N2, KnownZero, KnownOne);
       if ((KnownZero & Mask) == Mask) {
-        SDValue Borrow = DAG.getConstant(0, VT);
+        SDValue Borrow = DAG.getConstant(0, dl, VT);
         SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
         SDValue Ops[] = { Result, Borrow };
         return DAG.getMergeValues(Ops, dl);
@@ -1794,13 +1799,13 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
         DAG.MaskedValueIsZero(Addend0, HighMask) &&
         DAG.MaskedValueIsZero(Addend1, HighMask)) {
       SDValue Mul0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                                  Mul0, DAG.getConstant(0, MVT::i32));
+                                  Mul0, DAG.getConstant(0, dl, MVT::i32));
       SDValue Mul1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                                  Mul1, DAG.getConstant(0, MVT::i32));
+                                  Mul1, DAG.getConstant(0, dl, MVT::i32));
       SDValue Addend0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                                     Addend0, DAG.getConstant(0, MVT::i32));
+                                     Addend0, DAG.getConstant(0, dl, MVT::i32));
       SDValue Addend1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                                     Addend1, DAG.getConstant(0, MVT::i32));
+                                     Addend1, DAG.getConstant(0, dl, MVT::i32));
       SDValue Hi = DAG.getNode(XCoreISD::LMUL, dl,
                                DAG.getVTList(MVT::i32, MVT::i32), Mul0L, Mul1L,
                                Addend0L, Addend1L);
@@ -1837,10 +1842,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
         LD->getAlignment() == Alignment &&
         !LD->isVolatile() && !LD->isIndexed() &&
         Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) {
+        bool isTail = isInTailCallPosition(DAG, ST, Chain);
         return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
                               LD->getBasePtr(),
-                              DAG.getConstant(StoreBits/8, MVT::i32),
-                              Alignment, false, ST->getPointerInfo(),
+                              DAG.getConstant(StoreBits/8, dl, MVT::i32),
+                              Alignment, false, isTail, ST->getPointerInfo(),
                               LD->getPointerInfo());
       }
     }
@@ -1924,7 +1930,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
   if (Ty->getTypeID() == Type::VoidTyID)
     return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
 
-  const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *TD = TM.getDataLayout();
   unsigned Size = TD->getTypeAllocSize(Ty);
   if (AM.BaseGV) {
     return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
@@ -1961,10 +1967,10 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
 //                           XCore Inline Assembly Support
 //===----------------------------------------------------------------------===//
 
-std::pair<unsigned, const TargetRegisterClass*>
-XCoreTargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint,
-                             MVT VT) const {
+std::pair<unsigned, const TargetRegisterClass *>
+XCoreTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                                  const std::string &Constraint,
+                                                  MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     default : break;
@@ -1974,5 +1980,5 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
   }
   // Use the default implementation in TargetLowering to convert the register
   // constraint into a member of a register class.
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
index 13154c6..22014ed 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -26,7 +26,7 @@ namespace llvm {
   class XCoreTargetMachine;
 
   namespace XCoreISD {
-    enum NodeType {
+    enum NodeType : unsigned {
       // Start the numbering where the builtin ops and target ops leave off.
       FIRST_NUMBER = ISD::BUILTIN_OP_END,
 
@@ -93,8 +93,8 @@ namespace llvm {
   class XCoreTargetLowering : public TargetLowering
   {
   public:
-
-    explicit XCoreTargetLowering(const TargetMachine &TM);
+    explicit XCoreTargetLowering(const TargetMachine &TM,
+                                 const XCoreSubtarget &Subtarget);
 
     using TargetLowering::isZExtFree;
     bool isZExtFree(SDValue Val, EVT VT2) const override;
@@ -172,8 +172,9 @@ namespace llvm {
     SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
 
     // Inline asm support
-    std::pair<unsigned, const TargetRegisterClass*>
-    getRegForInlineAsmConstraint(const std::string &Constraint,
+    std::pair<unsigned, const TargetRegisterClass *>
+    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                 const std::string &Constraint,
                                  MVT VT) const override;
 
     // Expand specifics
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
index 8e9bb45..8110b91 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
@@ -94,33 +94,34 @@ def XCoreMemBarrier : SDNode<"XCoreISD::MEMBARRIER", SDT_XCoreMEMBARRIER,
 def div4_xform : SDNodeXForm<imm, [{
   // Transformation function: imm/4
   assert(N->getZExtValue() % 4 == 0);
-  return getI32Imm(N->getZExtValue()/4);
+  return getI32Imm(N->getZExtValue()/4, SDLoc(N));
 }]>;
 
 def msksize_xform : SDNodeXForm<imm, [{
   // Transformation function: get the size of a mask
   assert(isMask_32(N->getZExtValue()));
   // look for the first non-zero bit
-  return getI32Imm(32 - countLeadingZeros((uint32_t)N->getZExtValue()));
+  return getI32Imm(32 - countLeadingZeros((uint32_t)N->getZExtValue()),
+                   SDLoc(N));
 }]>;
 
 def neg_xform : SDNodeXForm<imm, [{
   // Transformation function: -imm
   uint32_t value = N->getZExtValue();
-  return getI32Imm(-value);
+  return getI32Imm(-value, SDLoc(N));
 }]>;
 
 def bpwsub_xform : SDNodeXForm<imm, [{
   // Transformation function: 32-imm
   uint32_t value = N->getZExtValue();
-  return getI32Imm(32-value);
+  return getI32Imm(32 - value, SDLoc(N));
 }]>;
 
 def div4neg_xform : SDNodeXForm<imm, [{
   // Transformation function: -imm/4
   uint32_t value = N->getZExtValue();
   assert(-value % 4 == 0);
-  return getI32Imm(-value/4);
+  return getI32Imm(-value/4, SDLoc(N));
 }]>;
 
 def immUs4Neg : PatLeaf<(imm), [{
diff --git a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index ac3bae5..996c6f5 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -82,8 +82,9 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
     case Instruction::GetElementPtr: {
       SmallVector<Value *,4> CEOpVec(CE->op_begin(), CE->op_end());
       ArrayRef<Value *> CEOps(CEOpVec);
-      return dyn_cast<Instruction>(Builder.CreateInBoundsGEP(CEOps[0],
-                                                             CEOps.slice(1)));
+      return dyn_cast<Instruction>(Builder.CreateInBoundsGEP(
+          cast<GEPOperator>(CE)->getSourceElementType(), CEOps[0],
+          CEOps.slice(1)));
     }
     case Instruction::Add:
     case Instruction::Sub:
@@ -137,7 +138,7 @@ static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) {
             if (PN->getIncomingValue(I) == CE) {
               BasicBlock *PredBB = PN->getIncomingBlock(I);
               if (PredBB->getTerminator()->getNumSuccessors() > 1)
-                PredBB = SplitEdge(PredBB, PN->getParent(), P);
+                PredBB = SplitEdge(PredBB, PN->getParent());
               Instruction *InsertPos = PredBB->getTerminator();
               Instruction *NewInst = createReplacementInstr(CE, InsertPos);
               PN->setOperand(I, NewInst);
@@ -208,11 +209,12 @@ bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) {
     IRBuilder<> Builder(Inst);
     Function *GetID = Intrinsic::getDeclaration(GV->getParent(),
                                                 Intrinsic::xcore_getid);
-    Value *ThreadID = Builder.CreateCall(GetID);
+    Value *ThreadID = Builder.CreateCall(GetID, {});
     SmallVector<Value *, 2> Indices;
     Indices.push_back(Constant::getNullValue(Type::getInt64Ty(Ctx)));
     Indices.push_back(ThreadID);
-    Value *Addr = Builder.CreateInBoundsGEP(NewGV, Indices);
+    Value *Addr =
+        Builder.CreateInBoundsGEP(NewGV->getValueType(), NewGV, Indices);
     U->replaceUsesOfWith(GV, Addr);
   }
 
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.cpp b/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
index dfdadcf..cffba5f 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
@@ -68,14 +68,14 @@ MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx);
 
   if (!Offset)
-    return MCOperand::CreateExpr(MCSym);
+    return MCOperand::createExpr(MCSym);
 
   // Assume offset is never negative.
   assert(Offset > 0);
 
   const MCConstantExpr *OffsetExpr =  MCConstantExpr::Create(Offset, *Ctx);
   const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
-  return MCOperand::CreateExpr(Add);
+  return MCOperand::createExpr(Add);
 }
 
 MCOperand XCoreMCInstLower::LowerOperand(const MachineOperand &MO,
@@ -87,9 +87,9 @@ MCOperand XCoreMCInstLower::LowerOperand(const MachineOperand &MO,
     case MachineOperand::MO_Register:
       // Ignore all implicit register operands.
       if (MO.isImplicit()) break;
-      return MCOperand::CreateReg(MO.getReg());
+      return MCOperand::createReg(MO.getReg());
     case MachineOperand::MO_Immediate:
-      return MCOperand::CreateImm(MO.getImm() + offset);
+      return MCOperand::createImm(MO.getImm() + offset);
     case MachineOperand::MO_MachineBasicBlock:
     case MachineOperand::MO_GlobalAddress:
     case MachineOperand::MO_ExternalSymbol:
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
index 5c666ae..1d569e8 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -208,8 +208,8 @@ bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
     MF.getFunction()->needsUnwindTableEntry();
 }
 
-const MCPhysReg* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
-                                                                         const {
+const MCPhysReg *
+XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   // The callee saved registers LR & FP are explicitly handled during
   // emitPrologue & emitEpilogue and related functions.
   static const MCPhysReg CalleeSavedRegs[] = {
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
index 5d7721c..010fccd 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -29,8 +29,7 @@ public:
 
   /// Code Generation virtual methods...
 
-  const MCPhysReg *
-  getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override;
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
   
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
index 7227411..7996020 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp
@@ -27,6 +27,5 @@ void XCoreSubtarget::anchor() { }
 
 XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &CPU,
                                const std::string &FS, const TargetMachine &TM)
-    : XCoreGenSubtargetInfo(TT, CPU, FS),
-      DL("e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32"),
-      InstrInfo(), FrameLowering(*this), TLInfo(TM), TSInfo(DL) {}
+    : XCoreGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(*this),
+      TLInfo(TM, *this), TSInfo(*TM.getDataLayout()) {}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
index 695578d..da51ef1 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h
@@ -31,7 +31,6 @@ class StringRef;
 
 class XCoreSubtarget : public XCoreGenSubtargetInfo {
   virtual void anchor();
-  const DataLayout DL;       // Calculates type size & alignment
   XCoreInstrInfo InstrInfo;
   XCoreFrameLowering FrameLowering;
   XCoreTargetLowering TLInfo;
@@ -61,7 +60,6 @@ public:
   const TargetRegisterInfo *getRegisterInfo() const override {
     return &InstrInfo.getRegisterInfo();
   }
-  const DataLayout *getDataLayout() const override { return &DL; }
 };
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 21ebf45..228dc1c 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -12,10 +12,11 @@
 
 #include "XCoreTargetMachine.h"
 #include "XCoreTargetObjectFile.h"
+#include "XCoreTargetTransformInfo.h"
 #include "XCore.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Module.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
@@ -26,7 +27,9 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    : LLVMTargetMachine(
+          T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32",
+          TT, CPU, FS, Options, RM, CM, OL),
       TLOF(make_unique<XCoreTargetObjectFile>()),
       Subtarget(TT, CPU, FS, *this) {
   initAsmInfo();
@@ -81,10 +84,7 @@ extern "C" void LLVMInitializeXCoreTarget() {
   RegisterTargetMachine<XCoreTargetMachine> X(TheXCoreTarget);
 }
 
-void XCoreTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-  // Add first the target-independent BasicTTI pass, then our XCore pass. This
-  // allows the XCore pass to delegate to the target independent layer when
-  // appropriate.
-  PM.add(createBasicTargetTransformInfoPass(this));
-  PM.add(createXCoreTargetTransformInfoPass(this));
+TargetIRAnalysis XCoreTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis(
+      [this](Function &) { return TargetTransformInfo(XCoreTTIImpl(this)); });
 }
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
index 8ff9269..0d324ab 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
@@ -29,12 +29,15 @@ public:
                      CodeGenOpt::Level OL);
   ~XCoreTargetMachine() override;
 
-  const XCoreSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+  const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
+  const XCoreSubtarget *getSubtargetImpl(const Function &) const override {
+    return &Subtarget;
+  }
 
   // Pass Pipeline Configuration
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
 
-  void addAnalysisPasses(PassManagerBase &PM) override;
+  TargetIRAnalysis getTargetIRAnalysis() override;
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF.get();
   }
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
index 86d0de6..b5a9905 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -21,66 +21,43 @@ using namespace llvm;
 void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
 
-  BSSSection =
-    Ctx.getELFSection(".dp.bss", ELF::SHT_NOBITS,
-                      ELF::SHF_ALLOC | ELF::SHF_WRITE |
-                      ELF::XCORE_SHF_DP_SECTION,
-                      SectionKind::getBSS());
-  BSSSectionLarge =
-    Ctx.getELFSection(".dp.bss.large", ELF::SHT_NOBITS,
-                      ELF::SHF_ALLOC | ELF::SHF_WRITE |
-                      ELF::XCORE_SHF_DP_SECTION,
-                      SectionKind::getBSS());
-  DataSection =
-    Ctx.getELFSection(".dp.data", ELF::SHT_PROGBITS,
-                      ELF::SHF_ALLOC | ELF::SHF_WRITE |
-                      ELF::XCORE_SHF_DP_SECTION,
-                      SectionKind::getDataRel());
-  DataSectionLarge =
-    Ctx.getELFSection(".dp.data.large", ELF::SHT_PROGBITS,
-                      ELF::SHF_ALLOC | ELF::SHF_WRITE |
-                      ELF::XCORE_SHF_DP_SECTION,
-                      SectionKind::getDataRel());
-  DataRelROSection =
-    Ctx.getELFSection(".dp.rodata", ELF::SHT_PROGBITS,
-                      ELF::SHF_ALLOC | ELF::SHF_WRITE |
-                      ELF::XCORE_SHF_DP_SECTION,
-                      SectionKind::getReadOnlyWithRel());
-  DataRelROSectionLarge =
-    Ctx.getELFSection(".dp.rodata.large", ELF::SHT_PROGBITS,
-                      ELF::SHF_ALLOC | ELF::SHF_WRITE |
-                      ELF::XCORE_SHF_DP_SECTION,
-                      SectionKind::getReadOnlyWithRel());
+  BSSSection = Ctx.getELFSection(".dp.bss", ELF::SHT_NOBITS,
+                                 ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                                     ELF::XCORE_SHF_DP_SECTION);
+  BSSSectionLarge = Ctx.getELFSection(".dp.bss.large", ELF::SHT_NOBITS,
+                                      ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                                          ELF::XCORE_SHF_DP_SECTION);
+  DataSection = Ctx.getELFSection(".dp.data", ELF::SHT_PROGBITS,
+                                  ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                                      ELF::XCORE_SHF_DP_SECTION);
+  DataSectionLarge = Ctx.getELFSection(".dp.data.large", ELF::SHT_PROGBITS,
+                                       ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                                           ELF::XCORE_SHF_DP_SECTION);
+  DataRelROSection = Ctx.getELFSection(".dp.rodata", ELF::SHT_PROGBITS,
+                                       ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                                           ELF::XCORE_SHF_DP_SECTION);
+  DataRelROSectionLarge = Ctx.getELFSection(
+      ".dp.rodata.large", ELF::SHT_PROGBITS,
+      ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::XCORE_SHF_DP_SECTION);
   ReadOnlySection =
-    Ctx.getELFSection(".cp.rodata", ELF::SHT_PROGBITS,
-                      ELF::SHF_ALLOC |
-                      ELF::XCORE_SHF_CP_SECTION,
-                      SectionKind::getReadOnlyWithRel());
+      Ctx.getELFSection(".cp.rodata", ELF::SHT_PROGBITS,
+                        ELF::SHF_ALLOC | ELF::XCORE_SHF_CP_SECTION);
   ReadOnlySectionLarge =
-    Ctx.getELFSection(".cp.rodata.large", ELF::SHT_PROGBITS,
-                      ELF::SHF_ALLOC |
-                      ELF::XCORE_SHF_CP_SECTION,
-                      SectionKind::getReadOnlyWithRel());
-  MergeableConst4Section = 
-    Ctx.getELFSection(".cp.rodata.cst4", ELF::SHT_PROGBITS,
-                      ELF::SHF_ALLOC | ELF::SHF_MERGE |
-                      ELF::XCORE_SHF_CP_SECTION,
-                      SectionKind::getMergeableConst4());
-  MergeableConst8Section = 
-    Ctx.getELFSection(".cp.rodata.cst8", ELF::SHT_PROGBITS,
-                      ELF::SHF_ALLOC | ELF::SHF_MERGE |
-                      ELF::XCORE_SHF_CP_SECTION,
-                      SectionKind::getMergeableConst8());
-  MergeableConst16Section = 
-    Ctx.getELFSection(".cp.rodata.cst16", ELF::SHT_PROGBITS,
-                      ELF::SHF_ALLOC | ELF::SHF_MERGE |
-                      ELF::XCORE_SHF_CP_SECTION,
-                      SectionKind::getMergeableConst16());
+      Ctx.getELFSection(".cp.rodata.large", ELF::SHT_PROGBITS,
+                        ELF::SHF_ALLOC | ELF::XCORE_SHF_CP_SECTION);
+  MergeableConst4Section = Ctx.getELFSection(
+      ".cp.rodata.cst4", ELF::SHT_PROGBITS,
+      ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::XCORE_SHF_CP_SECTION, 4, "");
+  MergeableConst8Section = Ctx.getELFSection(
+      ".cp.rodata.cst8", ELF::SHT_PROGBITS,
+      ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::XCORE_SHF_CP_SECTION, 8, "");
+  MergeableConst16Section = Ctx.getELFSection(
+      ".cp.rodata.cst16", ELF::SHT_PROGBITS,
+      ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::XCORE_SHF_CP_SECTION, 16, "");
   CStringSection =
-    Ctx.getELFSection(".cp.rodata.string", ELF::SHT_PROGBITS,
-                      ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS |
-                      ELF::XCORE_SHF_CP_SECTION,
-                      SectionKind::getReadOnlyWithRel());
+      Ctx.getELFSection(".cp.rodata.string", ELF::SHT_PROGBITS,
+                        ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS |
+                            ELF::XCORE_SHF_CP_SECTION);
   // TextSection       - see MObjectFileInfo.cpp
   // StaticCtorSection - see MObjectFileInfo.cpp
   // StaticDtorSection - see MObjectFileInfo.cpp
@@ -118,7 +95,7 @@ static unsigned getXCoreSectionFlags(SectionKind K, bool IsCPRel) {
   return Flags;
 }
 
-const MCSection *
+MCSection *
 XCoreTargetObjectFile::getExplicitSectionGlobal(const GlobalValue *GV,
                                                 SectionKind Kind, Mangler &Mang,
                                                 const TargetMachine &TM) const {
@@ -128,12 +105,13 @@ XCoreTargetObjectFile::getExplicitSectionGlobal(const GlobalValue *GV,
   if (IsCPRel && !Kind.isReadOnly())
     report_fatal_error("Using .cp. section for writeable object.");
   return getContext().getELFSection(SectionName, getXCoreSectionType(Kind),
-                                    getXCoreSectionFlags(Kind, IsCPRel), Kind);
+                                    getXCoreSectionFlags(Kind, IsCPRel));
 }
 
-const MCSection *XCoreTargetObjectFile::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-                       const TargetMachine &TM) const{
+MCSection *
+XCoreTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+                                              SectionKind Kind, Mangler &Mang,
+                                              const TargetMachine &TM) const {
 
   bool UseCPRel = GV->isLocalLinkage(GV->getLinkage());
 
@@ -146,8 +124,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
   }
   Type *ObjType = GV->getType()->getPointerElementType();
   if (TM.getCodeModel() == CodeModel::Small || !ObjType->isSized() ||
-      TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(ObjType) <
-          CodeModelLargeSize) {
+      TM.getDataLayout()->getTypeAllocSize(ObjType) < CodeModelLargeSize) {
     if (Kind.isReadOnly())              return UseCPRel? ReadOnlySection
                                                        : DataRelROSection;
     if (Kind.isBSS() || Kind.isCommon())return BSSSection;
@@ -165,7 +142,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
   report_fatal_error("Target does not support TLS or Common sections");
 }
 
-const MCSection *
+MCSection *
 XCoreTargetObjectFile::getSectionForConstant(SectionKind Kind,
                                              const Constant *C) const {
   if (Kind.isMergeableConst4())           return MergeableConst4Section;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
index 7d3f49d..2a5ac23 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -17,25 +17,24 @@ namespace llvm {
 static const unsigned CodeModelLargeSize = 256;
 
   class XCoreTargetObjectFile : public TargetLoweringObjectFileELF {
-   const MCSection *BSSSectionLarge;
-   const MCSection *DataSectionLarge;
-   const MCSection *ReadOnlySectionLarge;
-   const MCSection *DataRelROSectionLarge;
+    MCSection *BSSSectionLarge;
+    MCSection *DataSectionLarge;
+    MCSection *ReadOnlySectionLarge;
+    MCSection *DataRelROSectionLarge;
+
   public:
     void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
 
-    const MCSection *
-      getExplicitSectionGlobal(const GlobalValue *GV,
-                               SectionKind Kind, Mangler &Mang,
-                               const TargetMachine &TM) const override;
+    MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                                        Mangler &Mang,
+                                        const TargetMachine &TM) const override;
 
-    const MCSection *
-      SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                             Mangler &Mang,
-                             const TargetMachine &TM) const override;
+    MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                                      Mangler &Mang,
+                                      const TargetMachine &TM) const override;
 
-    const MCSection *getSectionForConstant(SectionKind Kind,
-                                           const Constant *C) const override;
+    MCSection *getSectionForConstant(SectionKind Kind,
+                                     const Constant *C) const override;
   };
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetStreamer.h b/contrib/llvm/lib/Target/XCore/XCoreTargetStreamer.h
index 48bf0fa..3563dbc 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetStreamer.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetStreamer.h
@@ -16,7 +16,7 @@ namespace llvm {
 class XCoreTargetStreamer : public MCTargetStreamer {
 public:
   XCoreTargetStreamer(MCStreamer &S);
-  virtual ~XCoreTargetStreamer();
+  ~XCoreTargetStreamer() override;
   virtual void emitCCTopData(StringRef Name) = 0;
   virtual void emitCCTopFunction(StringRef Name) = 0;
   virtual void emitCCBottomData(StringRef Name) = 0;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.cpp
deleted file mode 100644
index da232da..0000000
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-//===-- XCoreTargetTransformInfo.cpp - XCore specific TTI pass ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file implements a TargetTransformInfo analysis pass specific to the
-/// XCore target machine. It uses the target's detailed information to provide
-/// more precise answers to certain TTI queries, while letting the target
-/// independent and default TTI implementations handle the rest.
-///
-//===----------------------------------------------------------------------===//
-
-#include "XCore.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/CostTable.h"
-#include "llvm/Target/TargetLowering.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "xcoretti"
-
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeXCoreTTIPass(PassRegistry &);
-}
-
-namespace {
-
-class XCoreTTI final : public ImmutablePass, public TargetTransformInfo {
-public:
-  XCoreTTI() : ImmutablePass(ID) {
-    llvm_unreachable("This pass cannot be directly constructed");
-  }
-
-  XCoreTTI(const XCoreTargetMachine *TM)
-      : ImmutablePass(ID) {
-    initializeXCoreTTIPass(*PassRegistry::getPassRegistry());
-  }
-
-  void initializePass() override {
-    pushTTIStack(this);
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    TargetTransformInfo::getAnalysisUsage(AU);
-  }
-
-  static char ID;
-
-  void *getAdjustedAnalysisPointer(const void *ID) override {
-    if (ID == &TargetTransformInfo::ID)
-      return (TargetTransformInfo*)this;
-    return this;
-  }
-
-  unsigned getNumberOfRegisters(bool Vector) const override {
-    if (Vector) {
-       return 0;
-    }
-    return 12;
-  }
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(XCoreTTI, TargetTransformInfo, "xcoretti",
-                   "XCore Target Transform Info", true, true, false)
-char XCoreTTI::ID = 0;
-
-
-ImmutablePass *
-llvm::createXCoreTargetTransformInfoPass(const XCoreTargetMachine *TM) {
-  return new XCoreTTI(TM);
-}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h b/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
new file mode 100644
index 0000000..70b47df
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
@@ -0,0 +1,72 @@
+//===-- XCoreTargetTransformInfo.h - XCore specific TTI ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// XCore target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_XCORE_XCORETARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_XCORE_XCORETARGETTRANSFORMINFO_H
+
+#include "XCore.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class XCoreTTIImpl : public BasicTTIImplBase<XCoreTTIImpl> {
+  typedef BasicTTIImplBase<XCoreTTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const XCoreSubtarget *ST;
+  const XCoreTargetLowering *TLI;
+
+  const XCoreSubtarget *getST() const { return ST; }
+  const XCoreTargetLowering *getTLI() const { return TLI; }
+
+public:
+  explicit XCoreTTIImpl(const XCoreTargetMachine *TM)
+      : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  XCoreTTIImpl(const XCoreTTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+  XCoreTTIImpl(XCoreTTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
+  XCoreTTIImpl &operator=(const XCoreTTIImpl &RHS) {
+    BaseT::operator=(static_cast<const BaseT &>(RHS));
+    ST = RHS.ST;
+    TLI = RHS.TLI;
+    return *this;
+  }
+  XCoreTTIImpl &operator=(XCoreTTIImpl &&RHS) {
+    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+    ST = std::move(RHS.ST);
+    TLI = std::move(RHS.TLI);
+    return *this;
+  }
+
+  unsigned getNumberOfRegisters(bool Vector) {
+    if (Vector) {
+      return 0;
+    }
+    return 12;
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 3282022..7b7672d 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
@@ -69,16 +70,15 @@ namespace {
     bool runOnSCC(CallGraphSCC &SCC) override;
     static char ID; // Pass identification, replacement for typeid
     explicit ArgPromotion(unsigned maxElements = 3)
-        : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) {
+        : CallGraphSCCPass(ID), maxElements(maxElements) {
       initializeArgPromotionPass(*PassRegistry::getPassRegistry());
     }
 
     /// A vector used to hold the indices of a single GEP instruction
     typedef std::vector<uint64_t> IndicesVector;
 
-    const DataLayout *DL;
   private:
-    bool isDenselyPacked(Type *type);
+    bool isDenselyPacked(Type *type, const DataLayout &DL);
     bool canPaddingBeAccessed(Argument *Arg);
     CallGraphNode *PromoteArguments(CallGraphNode *CGN);
     bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
@@ -90,7 +90,7 @@ namespace {
     bool doInitialization(CallGraph &CG) override;
     /// The maximum number of elements to expand, or 0 for unlimited.
     unsigned maxElements;
-    DenseMap<const Function *, DISubprogram> FunctionDIs;
+    DenseMap<const Function *, DISubprogram *> FunctionDIs;
   };
 }
 
@@ -109,9 +109,6 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
 bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
   bool Changed = false, LocalChange;
 
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-
   do {  // Iterate until we stop promoting from this SCC.
     LocalChange = false;
     // Attempt to promote arguments from all functions in this SCC.
@@ -128,7 +125,7 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
 }
 
 /// \brief Checks if a type could have padding bytes.
-bool ArgPromotion::isDenselyPacked(Type *type) {
+bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) {
 
   // There is no size information, so be conservative.
   if (!type->isSized())
@@ -136,7 +133,7 @@ bool ArgPromotion::isDenselyPacked(Type *type) {
 
   // If the alloc size is not equal to the storage size, then there are padding
   // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
-  if (!DL || DL->getTypeSizeInBits(type) != DL->getTypeAllocSizeInBits(type))
+  if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
     return false;
 
   if (!isa<CompositeType>(type))
@@ -144,19 +141,20 @@ bool ArgPromotion::isDenselyPacked(Type *type) {
 
   // For homogenous sequential types, check for padding within members.
   if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
-    return isa<PointerType>(seqTy) || isDenselyPacked(seqTy->getElementType());
+    return isa<PointerType>(seqTy) ||
+           isDenselyPacked(seqTy->getElementType(), DL);
 
   // Check for padding within and between elements of a struct.
   StructType *StructTy = cast<StructType>(type);
-  const StructLayout *Layout = DL->getStructLayout(StructTy);
+  const StructLayout *Layout = DL.getStructLayout(StructTy);
   uint64_t StartPos = 0;
   for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
     Type *ElTy = StructTy->getElementType(i);
-    if (!isDenselyPacked(ElTy))
+    if (!isDenselyPacked(ElTy, DL))
       return false;
     if (StartPos != Layout->getElementOffsetInBits(i))
       return false;
-    StartPos += DL->getTypeAllocSizeInBits(ElTy);
+    StartPos += DL.getTypeAllocSizeInBits(ElTy);
   }
 
   return true;
@@ -210,6 +208,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
   // Make sure that it is local to this module.
   if (!F || !F->hasLocalLinkage()) return nullptr;
 
+  // Don't promote arguments for variadic functions. Adding, removing, or
+  // changing non-pack parameters can change the classification of pack
+  // parameters. Frontends encode that classification at the call site in the
+  // IR, while in the callee the classification is determined dynamically based
+  // on the number of registers consumed so far.
+  if (F->isVarArg()) return nullptr;
+
   // First check: see if there are any pointer arguments!  If not, quick exit.
   SmallVector<Argument*, 16> PointerArgs;
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
@@ -230,12 +235,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
       isSelfRecursive = true;
   }
   
-  // Don't promote arguments for variadic functions. Adding, removing, or
-  // changing non-pack parameters can change the classification of pack
-  // parameters. Frontends encode that classification at the call site in the
-  // IR, while in the callee the classification is determined dynamically based
-  // on the number of registers consumed so far.
-  if (F->isVarArg()) return nullptr;
+  const DataLayout &DL = F->getParent()->getDataLayout();
 
   // Check to see which arguments are promotable.  If an argument is promotable,
   // add it to ArgsToPromote.
@@ -250,8 +250,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
     // packed or if we can prove the padding bytes are never accessed. This does
     // not apply to inalloca.
     bool isSafeToPromote =
-      PtrArg->hasByValAttr() &&
-      (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg));
+        PtrArg->hasByValAttr() &&
+        (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
     if (isSafeToPromote) {
       if (StructType *STy = dyn_cast<StructType>(AgTy)) {
         if (maxElements > 0 && STy->getNumElements() > maxElements) {
@@ -310,9 +310,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
 
 /// AllCallersPassInValidPointerForArgument - Return true if we can prove that
 /// all callees pass in a valid pointer for the specified function argument.
-static bool AllCallersPassInValidPointerForArgument(Argument *Arg,
-                                                    const DataLayout *DL) {
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
   Function *Callee = Arg->getParent();
+  const DataLayout &DL = Callee->getParent()->getDataLayout();
 
   unsigned ArgNo = Arg->getArgNo();
 
@@ -322,7 +322,7 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg,
     CallSite CS(U);
     assert(CS && "Should only have direct calls!");
 
-    if (!CS.getArgument(ArgNo)->isDereferenceablePointer(DL))
+    if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL))
       return false;
   }
   return true;
@@ -430,7 +430,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
   GEPIndicesSet ToPromote;
 
   // If the pointer is always valid, any load with first index 0 is valid.
-  if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL))
+  if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
     SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
 
   // First, iterate the entry block and mark loads of (geps of) arguments as
@@ -561,8 +561,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
     // Now check every path from the entry block to the load for transparency.
     // To do this, we perform a depth first search on the inverse CFG from the
     // loading block.
-    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-      BasicBlock *P = *PI;
+    for (BasicBlock *P : predecessors(BB)) {
       for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
         if (AA.canBasicBlockModify(*TranspBB, Loc))
           return false;
@@ -587,7 +586,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   FunctionType *FTy = F->getFunctionType();
   std::vector<Type*> Params;
 
-  typedef std::set<IndicesVector> ScalarizeTable;
+  typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
 
   // ScalarizedElements - If we are promoting a pointer that has elements
   // accessed out of it, keep track of which elements are accessed so that we
@@ -624,8 +623,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
       // Simple byval argument? Just add all the struct element types.
       Type *AgTy = cast<PointerType>(I->getType())->getElementType();
       StructType *STy = cast<StructType>(AgTy);
-      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
-        Params.push_back(STy->getElementType(i));
+      Params.insert(Params.end(), STy->element_begin(), STy->element_end());
       ++NumByValArgsPromoted;
     } else if (!ArgsToPromote.count(I)) {
       // Unchanged argument
@@ -648,7 +646,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
       ScalarizeTable &ArgIndices = ScalarizedElements[I];
       for (User *U : I->users()) {
         Instruction *UI = cast<Instruction>(U);
-        assert(isa<LoadInst>(UI) || isa<GetElementPtrInst>(UI));
+        Type *SrcTy;
+        if (LoadInst *L = dyn_cast<LoadInst>(UI))
+          SrcTy = L->getType();
+        else
+          SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
         IndicesVector Indices;
         Indices.reserve(UI->getNumOperands() - 1);
         // Since loads will only have a single operand, and GEPs only a single
@@ -660,7 +662,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
         // GEPs with a single 0 index can be merged with direct loads
         if (Indices.size() == 1 && Indices.front() == 0)
           Indices.clear();
-        ArgIndices.insert(Indices);
+        ArgIndices.insert(std::make_pair(SrcTy, Indices));
         LoadInst *OrigLoad;
         if (LoadInst *L = dyn_cast<LoadInst>(UI))
           OrigLoad = L;
@@ -674,11 +676,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
       for (ScalarizeTable::iterator SI = ArgIndices.begin(),
              E = ArgIndices.end(); SI != E; ++SI) {
         // not allowed to dereference ->begin() if size() is 0
-        Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI));
+        Params.push_back(GetElementPtrInst::getIndexedType(
+            cast<PointerType>(I->getType()->getScalarType())->getElementType(),
+            SI->second));
         assert(Params.back());
       }
 
-      if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
+      if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
         ++NumArgumentsPromoted;
       else
         ++NumAggregatesPromoted;
@@ -702,8 +706,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   // Patch the pointer to LLVM function in debug info descriptor.
   auto DI = FunctionDIs.find(F);
   if (DI != FunctionDIs.end()) {
-    DISubprogram SP = DI->second;
-    SP.replaceFunction(NF);
+    DISubprogram *SP = DI->second;
+    SP->replaceFunction(NF);
     // Ensure the map is updated so it can be reused on subsequent argument
     // promotions of the same function.
     FunctionDIs.erase(DI);
@@ -769,9 +773,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
               ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
         for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
           Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
-          Value *Idx = GetElementPtrInst::Create(*AI, Idxs,
-                                                 (*AI)->getName()+"."+utostr(i),
-                                                 Call);
+          Value *Idx = GetElementPtrInst::Create(
+              STy, *AI, Idxs, (*AI)->getName() + "." + utostr(i), Call);
           // TODO: Tell AA about the new values?
           Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
         }
@@ -784,12 +787,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
         for (ScalarizeTable::iterator SI = ArgIndices.begin(),
                E = ArgIndices.end(); SI != E; ++SI) {
           Value *V = *AI;
-          LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, *SI)];
-          if (!SI->empty()) {
-            Ops.reserve(SI->size());
+          LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, SI->second)];
+          if (!SI->second.empty()) {
+            Ops.reserve(SI->second.size());
             Type *ElTy = V->getType();
-            for (IndicesVector::const_iterator II = SI->begin(),
-                 IE = SI->end(); II != IE; ++II) {
+            for (IndicesVector::const_iterator II = SI->second.begin(),
+                                               IE = SI->second.end();
+                 II != IE; ++II) {
               // Use i32 to index structs, and i64 for others (pointers/arrays).
               // This satisfies GEP constraints.
               Type *IdxTy = (ElTy->isStructTy() ?
@@ -800,7 +804,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
               ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
             }
             // And create a GEP to extract those indices.
-            V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call);
+            V = GetElementPtrInst::Create(SI->first, V, Ops,
+                                          V->getName() + ".idx", Call);
             Ops.clear();
             AA.copyValue(OrigLoad->getOperand(0), V);
           }
@@ -858,7 +863,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
 
     // Update the callgraph to know that the callsite has been transformed.
     CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
-    CalleeNode->replaceCallEdge(Call, New, NF_CGN);
+    CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
 
     if (!Call->use_empty()) {
       Call->replaceAllUsesWith(New);
@@ -904,10 +909,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
 
       for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
         Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
-        Value *Idx = 
-          GetElementPtrInst::Create(TheAlloca, Idxs,
-                                    TheAlloca->getName()+"."+Twine(i), 
-                                    InsertPt);
+        Value *Idx = GetElementPtrInst::Create(
+            AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
+            InsertPt);
         I2->setName(I->getName()+"."+Twine(i));
         new StoreInst(I2++, Idx, InsertPt);
       }
@@ -940,7 +944,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
 
     while (!I->use_empty()) {
       if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
-        assert(ArgIndices.begin()->empty() &&
+        assert(ArgIndices.begin()->second.empty() &&
                "Load element should sort to front!");
         I2->setName(I->getName()+".val");
         LI->replaceAllUsesWith(I2);
@@ -962,7 +966,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
 
         Function::arg_iterator TheArg = I2;
         for (ScalarizeTable::iterator It = ArgIndices.begin();
-             *It != Operands; ++It, ++TheArg) {
+             It->second != Operands; ++It, ++TheArg) {
           assert(It != ArgIndices.end() && "GEP not handled??");
         }
 
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 0b6ade9..8ce7646 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -52,7 +52,6 @@ namespace {
     // alignment to a concrete value.
     unsigned getAlignment(GlobalVariable *GV) const;
 
-    const DataLayout *DL;
   };
 }
 
@@ -89,32 +88,22 @@ static bool IsBetterCanonical(const GlobalVariable &A,
   return A.hasUnnamedAddr();
 }
 
-bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const {
-  return DL || GV->getAlignment() != 0;
-}
-
 unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
   unsigned Align = GV->getAlignment();
   if (Align)
     return Align;
-  if (DL)
-    return DL->getPreferredAlignment(GV);
-  return 0;
+  return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
 }
 
 bool ConstantMerge::runOnModule(Module &M) {
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
 
   // Find all the globals that are marked "used".  These cannot be merged.
   SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
   FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
   FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
-  
-  // Map unique <constants, has-unknown-alignment> pairs to globals.  We don't
-  // want to merge globals of unknown alignment with those of explicit
-  // alignment.  If we have DataLayout, we always know the alignment.
-  DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap;
+
+  // Map unique constants to globals.
+  DenseMap<Constant *, GlobalVariable *> CMap;
 
   // Replacements - This vector contains a list of replacements to perform.
   SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
@@ -156,8 +145,7 @@ bool ConstantMerge::runOnModule(Module &M) {
       Constant *Init = GV->getInitializer();
 
       // Check to see if the initializer is already known.
-      PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
-      GlobalVariable *&Slot = CMap[Pair];
+      GlobalVariable *&Slot = CMap[Init];
 
       // If this is the first constant we find or if the old one is local,
       // replace with the current one. If the current is externally visible
@@ -188,8 +176,7 @@ bool ConstantMerge::runOnModule(Module &M) {
       Constant *Init = GV->getInitializer();
 
       // Check to see if the initializer is already known.
-      PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
-      GlobalVariable *Slot = CMap[Pair];
+      GlobalVariable *Slot = CMap[Init];
 
       if (!Slot || Slot == GV)
         continue;
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 4045c09..76898f2 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -73,8 +73,8 @@ namespace {
       }
 
       std::string getDescription() const {
-        return std::string((IsArg ? "Argument #" : "Return value #"))
-               + utostr(Idx) + " of function " + F->getName().str();
+        return (Twine(IsArg ? "Argument #" : "Return value #") + utostr(Idx) +
+                " of function " + F->getName()).str();
       }
     };
 
@@ -127,7 +127,7 @@ namespace {
     // As the code generation for module is finished (and DIBuilder is
     // finalized) we assume that subprogram descriptors won't be changed, and
     // they are stored in map for short duration anyway.
-    DenseMap<const Function *, DISubprogram> FunctionDIs;
+    DenseMap<const Function *, DISubprogram *> FunctionDIs;
 
   protected:
     // DAH uses this to specify a different ID.
@@ -146,7 +146,7 @@ namespace {
   private:
     Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
     Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses,
-                       unsigned RetValNum = 0);
+                       unsigned RetValNum = -1U);
     Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
 
     void SurveyFunction(const Function &F);
@@ -303,8 +303,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   // Patch the pointer to LLVM function in debug info descriptor.
   auto DI = FunctionDIs.find(&Fn);
   if (DI != FunctionDIs.end()) {
-    DISubprogram SP = DI->second;
-    SP.replaceFunction(NF);
+    DISubprogram *SP = DI->second;
+    SP->replaceFunction(NF);
     // Ensure the map is updated so it can be reused on non-varargs argument
     // eliminations of the same function.
     FunctionDIs.erase(DI);
@@ -387,14 +387,32 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
 /// for void functions and 1 for functions not returning a struct. It returns
 /// the number of struct elements for functions returning a struct.
 static unsigned NumRetVals(const Function *F) {
-  if (F->getReturnType()->isVoidTy())
+  Type *RetTy = F->getReturnType();
+  if (RetTy->isVoidTy())
     return 0;
-  else if (StructType *STy = dyn_cast<StructType>(F->getReturnType()))
+  else if (StructType *STy = dyn_cast<StructType>(RetTy))
     return STy->getNumElements();
+  else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+    return ATy->getNumElements();
   else
     return 1;
 }
 
+/// Returns the sub-type a function will return at a given Idx. Should
+/// correspond to the result type of an ExtractValue instruction executed with
+/// just that one Idx (i.e. only top-level structure is considered).
+static Type *getRetComponentType(const Function *F, unsigned Idx) {
+  Type *RetTy = F->getReturnType();
+  assert(!RetTy->isVoidTy() && "void type has no subtype");
+
+  if (StructType *STy = dyn_cast<StructType>(RetTy))
+    return STy->getElementType(Idx);
+  else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+    return ATy->getElementType();
+  else
+    return RetTy;
+}
+
 /// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
 /// live, it adds Use to the MaybeLiveUses argument. Returns the determined
 /// liveness of Use.
@@ -425,9 +443,24 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
       // function's return value is live. We use RetValNum here, for the case
       // that U is really a use of an insertvalue instruction that uses the
       // original Use.
-      RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum);
-      // We might be live, depending on the liveness of Use.
-      return MarkIfNotLive(Use, MaybeLiveUses);
+      const Function *F = RI->getParent()->getParent();
+      if (RetValNum != -1U) {
+        RetOrArg Use = CreateRet(F, RetValNum);
+        // We might be live, depending on the liveness of Use.
+        return MarkIfNotLive(Use, MaybeLiveUses);
+      } else {
+        DAE::Liveness Result = MaybeLive;
+        for (unsigned i = 0; i < NumRetVals(F); ++i) {
+          RetOrArg Use = CreateRet(F, i);
+          // We might be live, depending on the liveness of Use. If any
+          // sub-value is live, then the entire value is considered live. This
+          // is a conservative choice, and better tracking is possible.
+          DAE::Liveness SubResult = MarkIfNotLive(Use, MaybeLiveUses);
+          if (Result != Live)
+            Result = SubResult;
+        }
+        return Result;
+      }
     }
     if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
       if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex()
@@ -449,7 +482,7 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
       return Result;
     }
 
-    if (ImmutableCallSite CS = V) {
+    if (auto CS = ImmutableCallSite(V)) {
       const Function *F = CS.getCalledFunction();
       if (F) {
         // Used in a direct call.
@@ -541,7 +574,6 @@ void DAE::SurveyFunction(const Function &F) {
   // Keep track of the number of live retvals, so we can skip checks once all
   // of them turn out to be live.
   unsigned NumLiveRetVals = 0;
-  Type *STy = dyn_cast<StructType>(F.getReturnType());
   // Loop all uses of the function.
   for (const Use &U : F.uses()) {
     // If the function is PASSED IN as an argument, its address has been
@@ -563,34 +595,35 @@ void DAE::SurveyFunction(const Function &F) {
 
     // Now, check how our return value(s) is/are used in this caller. Don't
     // bother checking return values if all of them are live already.
-    if (NumLiveRetVals != RetCount) {
-      if (STy) {
-        // Check all uses of the return value.
-        for (const User *U : TheCall->users()) {
-          const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U);
-          if (Ext && Ext->hasIndices()) {
-            // This use uses a part of our return value, survey the uses of
-            // that part and store the results for this index only.
-            unsigned Idx = *Ext->idx_begin();
-            if (RetValLiveness[Idx] != Live) {
-              RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
-              if (RetValLiveness[Idx] == Live)
-                NumLiveRetVals++;
-            }
-          } else {
-            // Used by something else than extractvalue. Mark all return
-            // values as live.
-            for (unsigned i = 0; i != RetCount; ++i )
-              RetValLiveness[i] = Live;
-            NumLiveRetVals = RetCount;
-            break;
-          }
+    if (NumLiveRetVals == RetCount)
+      continue;
+
+    // Check all uses of the return value.
+    for (const Use &U : TheCall->uses()) {
+      if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U.getUser())) {
+        // This use uses a part of our return value, survey the uses of
+        // that part and store the results for this index only.
+        unsigned Idx = *Ext->idx_begin();
+        if (RetValLiveness[Idx] != Live) {
+          RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+          if (RetValLiveness[Idx] == Live)
+            NumLiveRetVals++;
         }
       } else {
-        // Single return value
-        RetValLiveness[0] = SurveyUses(TheCall, MaybeLiveRetUses[0]);
-        if (RetValLiveness[0] == Live)
+        // Used by something else than extractvalue. Survey, but assume that the
+        // result applies to all sub-values.
+        UseVector MaybeLiveAggregateUses;
+        if (SurveyUse(&U, MaybeLiveAggregateUses) == Live) {
           NumLiveRetVals = RetCount;
+          RetValLiveness.assign(RetCount, Live);
+          break;
+        } else {
+          for (unsigned i = 0; i != RetCount; ++i) {
+            if (RetValLiveness[i] != Live)
+              MaybeLiveRetUses[i].append(MaybeLiveAggregateUses.begin(),
+                                         MaybeLiveAggregateUses.end());
+          }
+        }
       }
     }
   }
@@ -775,39 +808,29 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   if (RetTy->isVoidTy() || HasLiveReturnedArg) {
     NRetTy = RetTy;
   } else {
-    StructType *STy = dyn_cast<StructType>(RetTy);
-    if (STy)
-      // Look at each of the original return values individually.
-      for (unsigned i = 0; i != RetCount; ++i) {
-        RetOrArg Ret = CreateRet(F, i);
-        if (LiveValues.erase(Ret)) {
-          RetTypes.push_back(STy->getElementType(i));
-          NewRetIdxs[i] = RetTypes.size() - 1;
-        } else {
-          ++NumRetValsEliminated;
-          DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
-                << F->getName() << "\n");
-        }
-      }
-    else
-      // We used to return a single value.
-      if (LiveValues.erase(CreateRet(F, 0))) {
-        RetTypes.push_back(RetTy);
-        NewRetIdxs[0] = 0;
+    // Look at each of the original return values individually.
+    for (unsigned i = 0; i != RetCount; ++i) {
+      RetOrArg Ret = CreateRet(F, i);
+      if (LiveValues.erase(Ret)) {
+        RetTypes.push_back(getRetComponentType(F, i));
+        NewRetIdxs[i] = RetTypes.size() - 1;
       } else {
-        DEBUG(dbgs() << "DAE - Removing return value from " << F->getName()
-              << "\n");
         ++NumRetValsEliminated;
+        DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
+              << F->getName() << "\n");
+      }
+    }
+    if (RetTypes.size() > 1) {
+      // More than one return type? Reduce it down to size.
+      if (StructType *STy = dyn_cast<StructType>(RetTy)) {
+        // Make the new struct packed if we used to return a packed struct
+        // already.
+        NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
+      } else {
+        assert(isa<ArrayType>(RetTy) && "unexpected multi-value return");
+        NRetTy = ArrayType::get(RetTypes[0], RetTypes.size());
       }
-    if (RetTypes.size() > 1)
-      // More than one return type? Return a struct with them. Also, if we used
-      // to return a struct and didn't change the number of return values,
-      // return a struct again. This prevents changing {something} into
-      // something and {} into void.
-      // Make the new struct packed if we used to return a packed struct
-      // already.
-      NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
-    else if (RetTypes.size() == 1)
+    } else if (RetTypes.size() == 1)
       // One return type? Just a simple value then, but only if we didn't use to
       // return a struct with that simple value before.
       NRetTy = RetTypes.front();
@@ -826,17 +849,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   // here. Currently, this should not be possible, but special handling might be
   // required when new return value attributes are added.
   if (NRetTy->isVoidTy())
-    RAttrs =
-      AttributeSet::get(NRetTy->getContext(), AttributeSet::ReturnIndex,
-                        AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
-         removeAttributes(AttributeFuncs::
-                          typeIncompatible(NRetTy, AttributeSet::ReturnIndex),
-                          AttributeSet::ReturnIndex));
+    RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
+                                     AttributeSet::ReturnIndex,
+                                     AttributeFuncs::typeIncompatible(NRetTy));
   else
     assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
-             hasAttributes(AttributeFuncs::
-                           typeIncompatible(NRetTy, AttributeSet::ReturnIndex),
-                           AttributeSet::ReturnIndex) &&
+             overlaps(AttributeFuncs::typeIncompatible(NRetTy)) &&
            "Return attributes no longer compatible?");
 
   if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
@@ -880,13 +898,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     AttributeSet RAttrs = CallPAL.getRetAttributes();
 
     // Adjust in case the function was changed to return void.
-    RAttrs =
-      AttributeSet::get(NF->getContext(), AttributeSet::ReturnIndex,
-                        AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
-        removeAttributes(AttributeFuncs::
-                         typeIncompatible(NF->getReturnType(),
-                                          AttributeSet::ReturnIndex),
-                         AttributeSet::ReturnIndex));
+    RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
+                                     AttributeSet::ReturnIndex,
+                        AttributeFuncs::typeIncompatible(NF->getReturnType()));
     if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
       AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs));
 
@@ -959,9 +973,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         if (!Call->getType()->isX86_MMXTy())
           Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
       } else {
-        assert(RetTy->isStructTy() &&
+        assert((RetTy->isStructTy() || RetTy->isArrayTy()) &&
                "Return type changed, but not into a void. The old return type"
-               " must have been a struct!");
+               " must have been a struct or an array!");
         Instruction *InsertPt = Call;
         if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
           BasicBlock::iterator IP = II->getNormalDest()->begin();
@@ -969,9 +983,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
           InsertPt = IP;
         }
 
-        // We used to return a struct. Instead of doing smart stuff with all the
-        // uses of this struct, we will just rebuild it using
-        // extract/insertvalue chaining and let instcombine clean that up.
+        // We used to return a struct or array. Instead of doing smart stuff
+        // with all the uses, we will just rebuild it using extract/insertvalue
+        // chaining and let instcombine clean that up.
         //
         // Start out building up our return value from undef
         Value *RetVal = UndefValue::get(RetTy);
@@ -1034,8 +1048,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         if (NFTy->getReturnType()->isVoidTy()) {
           RetVal = nullptr;
         } else {
-          assert (RetTy->isStructTy());
-          // The original return value was a struct, insert
+          assert(RetTy->isStructTy() || RetTy->isArrayTy());
+          // The original return value was a struct or array, insert
           // extractvalue/insertvalue chains to extract only the values we need
           // to return and insert them into our new result.
           // This does generate messy code, but we'll let it to instcombine to
@@ -1069,7 +1083,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   // Patch the pointer to LLVM function in debug info descriptor.
   auto DI = FunctionDIs.find(F);
   if (DI != FunctionDIs.end())
-    DI->second.replaceFunction(NF);
+    DI->second->replaceFunction(NF);
 
   // Now that the old function is dead, delete it.
   F->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 823ae53..92e384a 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -31,7 +31,7 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "functionattrs"
@@ -124,7 +124,7 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       AU.addRequired<AliasAnalysis>();
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
 
@@ -139,7 +139,7 @@ INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
                 "Deduce function attributes", false, false)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
                 "Deduce function attributes", false, false)
 
@@ -703,10 +703,14 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
     }
 
     if (ReadAttr != Attribute::None) {
-      AttrBuilder B;
+      AttrBuilder B, R;
       B.addAttribute(ReadAttr);
+      R.addAttribute(Attribute::ReadOnly)
+        .addAttribute(Attribute::ReadNone);
       for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
         Argument *A = ArgumentSCC[i]->Definition;
+        // Clear out existing readonly/readnone attributes
+        A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, R));
         A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
         ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
         Changed = true;
@@ -755,8 +759,8 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
         }
         case Instruction::PHI: {
           PHINode *PN = cast<PHINode>(RVI);
-          for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-            FlowsToReturn.insert(PN->getIncomingValue(i));
+          for (Value *IncValue : PN->incoming_values())
+            FlowsToReturn.insert(IncValue);
           continue;
         }
 
@@ -1702,7 +1706,7 @@ bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
 
 bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
   AA = &getAnalysis<AliasAnalysis>();
-  TLI = &getAnalysis<TargetLibraryInfo>();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   bool Changed = annotateLibraryCalls(SCC);
   Changed |= AddReadAttrs(SCC);
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index 0c844fe..ba04c80 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Transforms/Utils/CtorUtils.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
 #include "llvm/Pass.h"
+#include <unordered_map>
 using namespace llvm;
 
 #define DEBUG_TYPE "globaldce"
@@ -47,6 +48,7 @@ namespace {
   private:
     SmallPtrSet<GlobalValue*, 32> AliveGlobals;
     SmallPtrSet<Constant *, 8> SeenConstants;
+    std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
 
     /// GlobalIsNeeded - mark the specific global value as needed, and
     /// recursively mark anything that it uses as also needed.
@@ -78,6 +80,17 @@ bool GlobalDCE::runOnModule(Module &M) {
   // Remove empty functions from the global ctors list.
   Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
 
+  // Collect the set of members for each comdat.
+  for (Function &F : M)
+    if (Comdat *C = F.getComdat())
+      ComdatMembers.insert(std::make_pair(C, &F));
+  for (GlobalVariable &GV : M.globals())
+    if (Comdat *C = GV.getComdat())
+      ComdatMembers.insert(std::make_pair(C, &GV));
+  for (GlobalAlias &GA : M.aliases())
+    if (Comdat *C = GA.getComdat())
+      ComdatMembers.insert(std::make_pair(C, &GA));
+
   // Loop over the module, adding globals which are obviously necessary.
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     Changed |= RemoveUnusedGlobalValue(*I);
@@ -177,6 +190,7 @@ bool GlobalDCE::runOnModule(Module &M) {
   // Make sure that all memory is released
   AliveGlobals.clear();
   SeenConstants.clear();
+  ComdatMembers.clear();
 
   return Changed;
 }
@@ -188,17 +202,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
   if (!AliveGlobals.insert(G).second)
     return;
 
-  Module *M = G->getParent();
   if (Comdat *C = G->getComdat()) {
-    for (Function &F : *M)
-      if (F.getComdat() == C)
-        GlobalIsNeeded(&F);
-    for (GlobalVariable &GV : M->globals())
-      if (GV.getComdat() == C)
-        GlobalIsNeeded(&GV);
-    for (GlobalAlias &GA : M->aliases())
-      if (GA.getComdat() == C)
-        GlobalIsNeeded(&GA);
+    for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
+      GlobalIsNeeded(CM.second);
   }
 
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 6e0ae83..cc4a79f 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
@@ -38,7 +39,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/CtorUtils.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -68,7 +68,7 @@ STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
 namespace {
   struct GlobalOpt : public ModulePass {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
     static char ID; // Pass identification, replacement for typeid
     GlobalOpt() : ModulePass(ID) {
@@ -86,7 +86,6 @@ namespace {
                                const GlobalStatus &GS);
     bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
 
-    const DataLayout *DL;
     TargetLibraryInfo *TLI;
     SmallSet<const Comdat *, 8> NotDiscardableComdats;
   };
@@ -95,7 +94,7 @@ namespace {
 char GlobalOpt::ID = 0;
 INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
                 "Global Variable Optimizer", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(GlobalOpt, "globalopt",
                 "Global Variable Optimizer", false, false)
 
@@ -269,7 +268,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
 /// quick scan over the use list to clean up the easy and obvious cruft.  This
 /// returns true if it made a change.
 static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
-                                       const DataLayout *DL,
+                                       const DataLayout &DL,
                                        TargetLibraryInfo *TLI) {
   bool Changed = false;
   // Note that we need to use a weak value handle for the worklist items. When
@@ -318,8 +317,8 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
       // and will invalidate our notion of what Init is.
       Constant *SubInit = nullptr;
       if (!isa<ConstantExpr>(GEP->getOperand(0))) {
-        ConstantExpr *CE =
-          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI));
+        ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
+            ConstantFoldInstruction(GEP, DL, TLI));
         if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
 
@@ -565,6 +564,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
     if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
 
     Value *NewPtr = NewGlobals[Val];
+    Type *NewTy = NewGlobals[Val]->getValueType();
 
     // Form a shorter GEP if needed.
     if (GEP->getNumOperands() > 3) {
@@ -573,15 +573,16 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
         Idxs.push_back(NullInt);
         for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
           Idxs.push_back(CE->getOperand(i));
-        NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
+        NewPtr =
+            ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
       } else {
         GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
         SmallVector<Value*, 8> Idxs;
         Idxs.push_back(NullInt);
         for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
           Idxs.push_back(GEPI->getOperand(i));
-        NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
-                                           GEPI->getName()+"."+Twine(Val),GEPI);
+        NewPtr = GetElementPtrInst::Create(
+            NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(Val), GEPI);
       }
     }
     GEP->replaceAllUsesWith(NewPtr);
@@ -721,8 +722,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
         else
           break;
       if (Idxs.size() == GEPI->getNumOperands()-1)
-        Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
-                          ConstantExpr::getGetElementPtr(NewV, Idxs));
+        Changed |= OptimizeAwayTrappingUsesOfValue(
+            GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs));
       if (GEPI->use_empty()) {
         Changed = true;
         GEPI->eraseFromParent();
@@ -739,7 +740,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
 /// if the loaded value is dynamically null, then we know that they cannot be
 /// reachable with a null optimize away the load.
 static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
-                                            const DataLayout *DL,
+                                            const DataLayout &DL,
                                             TargetLibraryInfo *TLI) {
   bool Changed = false;
 
@@ -802,7 +803,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
 
 /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
 /// instructions that are foldable.
-static void ConstantPropUsersOf(Value *V, const DataLayout *DL,
+static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
                                 TargetLibraryInfo *TLI) {
   for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
     if (Instruction *I = dyn_cast<Instruction>(*UI++))
@@ -822,12 +823,10 @@ static void ConstantPropUsersOf(Value *V, const DataLayout *DL,
 /// the specified malloc.  Because it is always the result of the specified
 /// malloc, there is no reason to actually DO the malloc.  Instead, turn the
 /// malloc into a global, and any loads of GV as uses of the new global.
-static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
-                                                     CallInst *CI,
-                                                     Type *AllocTy,
-                                                     ConstantInt *NElements,
-                                                     const DataLayout *DL,
-                                                     TargetLibraryInfo *TLI) {
+static GlobalVariable *
+OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
+                              ConstantInt *NElements, const DataLayout &DL,
+                              TargetLibraryInfo *TLI) {
   DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
 
   Type *GlobalType;
@@ -1167,7 +1166,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
                                            InsertedScalarizedValues,
                                            PHIsToRewrite),
                           LI->getName()+".f"+Twine(FieldNo), LI);
-  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+  } else {
+    PHINode *PN = cast<PHINode>(V);
     // PN's type is pointer to struct.  Make a new PHI of pointer to struct
     // field.
 
@@ -1181,8 +1181,6 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
                      PN->getName()+".f"+Twine(FieldNo), PN);
     Result = NewPN;
     PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
-  } else {
-    llvm_unreachable("Unknown usable value");
   }
 
   return FieldVals[FieldNo] = Result;
@@ -1224,7 +1222,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
     GEPIdx.push_back(GEPI->getOperand(1));
     GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
 
-    Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
+    Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx,
                                              GEPI->getName(), GEPI);
     GEPI->replaceAllUsesWith(NGEPI);
     GEPI->eraseFromParent();
@@ -1271,7 +1269,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
 /// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
 /// it up into multiple allocations of arrays of the fields.
 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
-                                            Value *NElems, const DataLayout *DL,
+                                            Value *NElems, const DataLayout &DL,
                                             const TargetLibraryInfo *TLI) {
   DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
   Type *MAT = getMallocAllocatedType(CI, TLI);
@@ -1301,10 +1299,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
                          GV->getThreadLocalMode());
     FieldGlobals.push_back(NGV);
 
-    unsigned TypeSize = DL->getTypeAllocSize(FieldTy);
+    unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
     if (StructType *ST = dyn_cast<StructType>(FieldTy))
-      TypeSize = DL->getStructLayout(ST)->getSizeInBytes();
-    Type *IntPtrTy = DL->getIntPtrType(CI->getType());
+      TypeSize = DL.getStructLayout(ST)->getSizeInBytes();
+    Type *IntPtrTy = DL.getIntPtrType(CI->getType());
     Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
                                         ConstantInt::get(IntPtrTy, TypeSize),
                                         NElems, nullptr,
@@ -1459,16 +1457,12 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
 /// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
 /// pointer global variable with a single value stored it that is a malloc or
 /// cast of malloc.
-static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
-                                               CallInst *CI,
+static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
                                                Type *AllocTy,
                                                AtomicOrdering Ordering,
                                                Module::global_iterator &GVI,
-                                               const DataLayout *DL,
+                                               const DataLayout &DL,
                                                TargetLibraryInfo *TLI) {
-  if (!DL)
-    return false;
-
   // If this is a malloc of an abstract type, don't touch it.
   if (!AllocTy->isSized())
     return false;
@@ -1504,7 +1498,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
     // Restrict this transformation to only working on small allocations
     // (2048 bytes currently), as we don't want to introduce a 16M global or
     // something.
-    if (NElements->getZExtValue() * DL->getTypeAllocSize(AllocTy) < 2048) {
+    if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
       GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
       return true;
     }
@@ -1534,8 +1528,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
     // If this is a fixed size array, transform the Malloc to be an alloc of
     // structs.  malloc [100 x struct],1 -> malloc struct, 100
     if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
-      Type *IntPtrTy = DL->getIntPtrType(CI->getType());
-      unsigned TypeSize = DL->getStructLayout(AllocSTy)->getSizeInBytes();
+      Type *IntPtrTy = DL.getIntPtrType(CI->getType());
+      unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
       Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
       Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
       Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
@@ -1563,7 +1557,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
 static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
                                      AtomicOrdering Ordering,
                                      Module::global_iterator &GVI,
-                                     const DataLayout *DL,
+                                     const DataLayout &DL,
                                      TargetLibraryInfo *TLI) {
   // Ignore no-op GEPs and bitcasts.
   StoredOnceVal = StoredOnceVal->stripPointerCasts();
@@ -1733,6 +1727,7 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
 bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
                                       Module::global_iterator &GVI,
                                       const GlobalStatus &GS) {
+  auto &DL = GV->getParent()->getDataLayout();
   // If this is a first class global and has only one accessing function
   // and this function is main (which we know is not recursive), we replace
   // the global with a local alloca in this function.
@@ -1804,12 +1799,10 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     ++NumMarked;
     return true;
   } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
-    if (DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>()) {
-      const DataLayout &DL = DLP->getDataLayout();
-      if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
-        GVI = FirstNewGV;  // Don't skip the newly produced globals!
-        return true;
-      }
+    const DataLayout &DL = GV->getParent()->getDataLayout();
+    if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
+      GVI = FirstNewGV; // Don't skip the newly produced globals!
+      return true;
     }
   } else if (GS.StoredType == GlobalStatus::StoredOnce) {
     // If the initial value for the global was an undef value, and if only
@@ -1954,6 +1947,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
     // Simplify the initializer.
     if (GV->hasInitializer())
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
+        auto &DL = M.getDataLayout();
         Constant *New = ConstantFoldConstantExpression(CE, DL, TLI);
         if (New && New != CE)
           GV->setInitializer(New);
@@ -1971,9 +1965,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
 
 static inline bool
 isSimpleEnoughValueToCommit(Constant *C,
-                            SmallPtrSetImpl<Constant*> &SimpleConstants,
-                            const DataLayout *DL);
-
+                            SmallPtrSetImpl<Constant *> &SimpleConstants,
+                            const DataLayout &DL);
 
 /// isSimpleEnoughValueToCommit - Return true if the specified constant can be
 /// handled by the code generator.  We don't want to generate something like:
@@ -1983,9 +1976,10 @@ isSimpleEnoughValueToCommit(Constant *C,
 /// This function should be called if C was not found (but just got inserted)
 /// in SimpleConstants to avoid having to rescan the same constants all the
 /// time.
-static bool isSimpleEnoughValueToCommitHelper(Constant *C,
-                                   SmallPtrSetImpl<Constant*> &SimpleConstants,
-                                   const DataLayout *DL) {
+static bool
+isSimpleEnoughValueToCommitHelper(Constant *C,
+                                  SmallPtrSetImpl<Constant *> &SimpleConstants,
+                                  const DataLayout &DL) {
   // Simple global addresses are supported, do not allow dllimport or
   // thread-local globals.
   if (auto *GV = dyn_cast<GlobalValue>(C))
@@ -2019,8 +2013,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
   case Instruction::PtrToInt:
     // int <=> ptr is fine if the int type is the same size as the
     // pointer type.
-    if (!DL || DL->getTypeSizeInBits(CE->getType()) !=
-               DL->getTypeSizeInBits(CE->getOperand(0)->getType()))
+    if (DL.getTypeSizeInBits(CE->getType()) !=
+        DL.getTypeSizeInBits(CE->getOperand(0)->getType()))
       return false;
     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
 
@@ -2042,8 +2036,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
 
 static inline bool
 isSimpleEnoughValueToCommit(Constant *C,
-                            SmallPtrSetImpl<Constant*> &SimpleConstants,
-                            const DataLayout *DL) {
+                            SmallPtrSetImpl<Constant *> &SimpleConstants,
+                            const DataLayout &DL) {
   // If we already checked this constant, we win.
   if (!SimpleConstants.insert(C).second)
     return true;
@@ -2174,8 +2168,8 @@ namespace {
 /// Once an evaluation call fails, the evaluation object should not be reused.
 class Evaluator {
 public:
-  Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI)
-    : DL(DL), TLI(TLI) {
+  Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI)
+      : DL(DL), TLI(TLI) {
     ValueStack.emplace_back();
   }
 
@@ -2249,7 +2243,7 @@ private:
   /// simple enough to live in a static initializer of a global.
   SmallPtrSet<Constant*, 8> SimpleConstants;
 
-  const DataLayout *DL;
+  const DataLayout &DL;
   const TargetLibraryInfo *TLI;
 };
 
@@ -2345,7 +2339,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
               Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
               Constant * const IdxList[] = {IdxZero, IdxZero};
 
-              Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
+              Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
               if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
                 Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
 
@@ -2409,8 +2403,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
            i != e; ++i)
         GEPOps.push_back(getVal(*i));
       InstResult =
-        ConstantExpr::getGetElementPtr(P, GEPOps,
-                                       cast<GEPOperator>(GEP)->isInBounds());
+          ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
+                                         cast<GEPOperator>(GEP)->isInBounds());
       DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
             << "\n");
     } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
@@ -2498,9 +2492,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
           Value *Ptr = PtrArg->stripPointerCasts();
           if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
             Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
-            if (DL && !Size->isAllOnesValue() &&
+            if (!Size->isAllOnesValue() &&
                 Size->getValue().getLimitedValue() >=
-                DL->getTypeStoreSize(ElemTy)) {
+                    DL.getTypeStoreSize(ElemTy)) {
               Invariants.insert(GV);
               DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
                     << "\n");
@@ -2689,7 +2683,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
 
 /// EvaluateStaticConstructor - Evaluate static constructors in the function, if
 /// we can.  Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL,
+static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
                                       const TargetLibraryInfo *TLI) {
   // Call the function.
   Evaluator Eval(DL, TLI);
@@ -3040,9 +3034,8 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
 bool GlobalOpt::runOnModule(Module &M) {
   bool Changed = false;
 
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-  TLI = &getAnalysis<TargetLibraryInfo>();
+  auto &DL = M.getDataLayout();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   bool LocalChange = true;
   while (LocalChange) {
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index b4d31d8..fcacec328 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -16,7 +16,7 @@
 #include "llvm-c/Initialization.h"
 #include "llvm-c/Transforms/IPO.h"
 #include "llvm/InitializePasses.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Transforms/IPO.h"
 
 using namespace llvm;
@@ -36,6 +36,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeLoopExtractorPass(Registry);
   initializeBlockExtractorPassPass(Registry);
   initializeSingleLoopExtractorPass(Registry);
+  initializeLowerBitSetsPass(Registry);
   initializeMergeFunctionsPass(Registry);
   initializePartialInlinerPass(Registry);
   initializePruneEHPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 6686743..8f65a98 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DiagnosticInfo.h"
@@ -29,7 +30,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
@@ -72,8 +72,8 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
                                           InlineLimit : Threshold),
     InsertLifetime(InsertLifetime) {}
 
-/// getAnalysisUsage - For this class, we declare that we require and preserve
-/// the call graph.  If the derived class implements this method, it should
+/// For this class, we declare that we require and preserve the call graph.
+/// If the derived class implements this method, it should
 /// always explicitly call the implementation here.
 void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AliasAnalysis>();
@@ -97,40 +97,31 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
   AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(),
                                               AttributeSet::FunctionIndex,
                                               B);
-  AttributeSet CallerAttr = Caller->getAttributes(),
-               CalleeAttr = Callee->getAttributes();
 
-  if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
-                              Attribute::StackProtectReq)) {
+  if (Callee->hasFnAttribute(Attribute::StackProtectReq)) {
     Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
     Caller->addFnAttr(Attribute::StackProtectReq);
-  } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
-                                     Attribute::StackProtectStrong) &&
-             !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
-                                      Attribute::StackProtectReq)) {
+  } else if (Callee->hasFnAttribute(Attribute::StackProtectStrong) &&
+             !Caller->hasFnAttribute(Attribute::StackProtectReq)) {
     Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
     Caller->addFnAttr(Attribute::StackProtectStrong);
-  } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
-                                     Attribute::StackProtect) &&
-           !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
-                                    Attribute::StackProtectReq) &&
-           !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
-                                    Attribute::StackProtectStrong))
+  } else if (Callee->hasFnAttribute(Attribute::StackProtect) &&
+             !Caller->hasFnAttribute(Attribute::StackProtectReq) &&
+             !Caller->hasFnAttribute(Attribute::StackProtectStrong))
     Caller->addFnAttr(Attribute::StackProtect);
 }
 
-/// InlineCallIfPossible - If it is possible to inline the specified call site,
+/// If it is possible to inline the specified call site,
 /// do so and update the CallGraph for this operation.
 ///
 /// This function also does some basic book-keeping to update the IR.  The
 /// InlinedArrayAllocas map keeps track of any allocas that are already
-/// available from other  functions inlined into the caller.  If we are able to
+/// available from other functions inlined into the caller.  If we are able to
 /// inline this call site we attempt to reuse already available allocas or add
 /// any new allocas to the set if not possible.
 static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
                                  InlinedArrayAllocasTy &InlinedArrayAllocas,
-                                 int InlineHistory, bool InsertLifetime,
-                                 const DataLayout *DL) {
+                                 int InlineHistory, bool InsertLifetime) {
   Function *Callee = CS.getCalledFunction();
   Function *Caller = CS.getCaller();
 
@@ -206,11 +197,6 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
 
       unsigned Align1 = AI->getAlignment(),
                Align2 = AvailableAlloca->getAlignment();
-      // If we don't have data layout information, and only one alloca is using
-      // the target default, then we can't safely merge them because we can't
-      // pick the greater alignment.
-      if (!DL && (!Align1 || !Align2) && Align1 != Align2)
-        continue;
       
       // The available alloca has to be in the right function, not in some other
       // function in this SCC.
@@ -231,8 +217,8 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
 
       if (Align1 != Align2) {
         if (!Align1 || !Align2) {
-          assert(DL && "DataLayout required to compare default alignments");
-          unsigned TypeAlign = DL->getABITypeAlignment(AI->getAllocatedType());
+          const DataLayout &DL = Caller->getParent()->getDataLayout();
+          unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
 
           Align1 = Align1 ? Align1 : TypeAlign;
           Align2 = Align2 ? Align2 : TypeAlign;
@@ -273,8 +259,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
   // would decrease the threshold.
   Function *Caller = CS.getCaller();
   bool OptSize = Caller && !Caller->isDeclaration() &&
-    Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                         Attribute::OptimizeForSize);
+                 Caller->hasFnAttribute(Attribute::OptimizeForSize);
   if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
       OptSizeThreshold < thres)
     thres = OptSizeThreshold;
@@ -283,17 +268,14 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
   // and the caller does not need to minimize its size.
   Function *Callee = CS.getCalledFunction();
   bool InlineHint = Callee && !Callee->isDeclaration() &&
-    Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                         Attribute::InlineHint);
-  if (InlineHint && HintThreshold > thres
-      && !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                               Attribute::MinSize))
+                    Callee->hasFnAttribute(Attribute::InlineHint);
+  if (InlineHint && HintThreshold > thres &&
+      !Caller->hasFnAttribute(Attribute::MinSize))
     thres = HintThreshold;
 
   // Listen to the cold attribute when it would decrease the threshold.
   bool ColdCallee = Callee && !Callee->isDeclaration() &&
-    Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                         Attribute::Cold);
+                    Callee->hasFnAttribute(Attribute::Cold);
   // Command line argument for InlineLimit will override the default
   // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
   // do not use the default cold threshold even if it is smaller.
@@ -312,8 +294,7 @@ static void emitAnalysis(CallSite CS, const Twine &Msg) {
   emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
 }
 
-/// shouldInline - Return true if the inliner should attempt to inline
-/// at the given CallSite.
+/// Return true if the inliner should attempt to inline at the given CallSite.
 bool Inliner::shouldInline(CallSite CS) {
   InlineCost IC = getInlineCost(CS);
   
@@ -427,7 +408,7 @@ bool Inliner::shouldInline(CallSite CS) {
   return true;
 }
 
-/// InlineHistoryIncludes - Return true if the specified inline history ID
+/// Return true if the specified inline history ID
 /// indicates an inline history that includes the specified function.
 static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
             const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
@@ -444,9 +425,8 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
 bool Inliner::runOnSCC(CallGraphSCC &SCC) {
   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
   AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
-  const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+  const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
   AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
 
   SmallPtrSet<Function*, 8> SCCFunctions;
@@ -506,7 +486,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
 
   
   InlinedArrayAllocasTy InlinedArrayAllocas;
-  InlineFunctionInfo InlineInfo(&CG, DL, AA, ACT);
+  InlineFunctionInfo InlineInfo(&CG, AA, ACT);
 
   // Now that we have all of the call sites, loop over them and inline them if
   // it looks profitable to do so.
@@ -564,7 +544,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
 
         // Attempt to inline the function.
         if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
-                                  InlineHistoryID, InsertLifetime, DL)) {
+                                  InlineHistoryID, InsertLifetime)) {
           emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
                                        Twine(Callee->getName() +
                                              " will not be inlined into " +
@@ -636,16 +616,30 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
   return Changed;
 }
 
-// doFinalization - Remove now-dead linkonce functions at the end of
-// processing to avoid breaking the SCC traversal.
+/// Remove now-dead linkonce functions at the end of
+/// processing to avoid breaking the SCC traversal.
 bool Inliner::doFinalization(CallGraph &CG) {
   return removeDeadFunctions(CG);
 }
 
-/// removeDeadFunctions - Remove dead functions that are not included in
-/// DNR (Do Not Remove) list.
+/// Remove dead functions that are not included in DNR (Do Not Remove) list.
 bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
   SmallVector<CallGraphNode*, 16> FunctionsToRemove;
+  SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats;
+  SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive;
+
+  auto RemoveCGN = [&](CallGraphNode *CGN) {
+    // Remove any call graph edges from the function to its callees.
+    CGN->removeAllCalledFunctions();
+
+    // Remove any edges from the external node to the function's call graph
+    // node.  These edges might have been made irrelegant due to
+    // optimization of the program.
+    CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+
+    // Removing the node for callee from the call graph and delete it.
+    FunctionsToRemove.push_back(CGN);
+  };
 
   // Scan for all of the functions, looking for ones that should now be removed
   // from the program.  Insert the dead ones in the FunctionsToRemove set.
@@ -658,9 +652,7 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
     // Handle the case when this function is called and we only want to care
     // about always-inline functions. This is a bit of a hack to share code
     // between here and the InlineAlways pass.
-    if (AlwaysInlineOnly &&
-        !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                         Attribute::AlwaysInline))
+    if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
       continue;
 
     // If the only remaining users of the function are dead constants, remove
@@ -674,20 +666,45 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
     // without also dropping the other members of the COMDAT.
     // The inliner doesn't visit non-function entities which are in COMDAT
     // groups so it is unsafe to do so *unless* the linkage is local.
-    if (!F->hasLocalLinkage() && F->hasComdat())
-      continue;
-    
-    // Remove any call graph edges from the function to its callees.
-    CGN->removeAllCalledFunctions();
+    if (!F->hasLocalLinkage()) {
+      if (const Comdat *C = F->getComdat()) {
+        --ComdatEntriesAlive[C];
+        DeadFunctionsInComdats.push_back(CGN);
+        continue;
+      }
+    }
 
-    // Remove any edges from the external node to the function's call graph
-    // node.  These edges might have been made irrelegant due to
-    // optimization of the program.
-    CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+    RemoveCGN(CGN);
+  }
+  if (!DeadFunctionsInComdats.empty()) {
+    // Count up all the entities in COMDAT groups
+    auto ComdatGroupReferenced = [&](const Comdat *C) {
+      auto I = ComdatEntriesAlive.find(C);
+      if (I != ComdatEntriesAlive.end())
+        ++(I->getSecond());
+    };
+    for (const Function &F : CG.getModule())
+      if (const Comdat *C = F.getComdat())
+        ComdatGroupReferenced(C);
+    for (const GlobalVariable &GV : CG.getModule().globals())
+      if (const Comdat *C = GV.getComdat())
+        ComdatGroupReferenced(C);
+    for (const GlobalAlias &GA : CG.getModule().aliases())
+      if (const Comdat *C = GA.getComdat())
+        ComdatGroupReferenced(C);
+    for (CallGraphNode *CGN : DeadFunctionsInComdats) {
+      Function *F = CGN->getFunction();
+      const Comdat *C = F->getComdat();
+      int NumAlive = ComdatEntriesAlive[C];
+      // We can remove functions in a COMDAT group if the entire group is dead.
+      assert(NumAlive >= 0);
+      if (NumAlive > 0)
+        continue;
 
-    // Removing the node for callee from the call graph and delete it.
-    FunctionsToRemove.push_back(CGN);
+      RemoveCGN(CGN);
+    }
   }
+
   if (FunctionsToRemove.empty())
     return false;
 
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index 20414aa..41334ca 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -242,7 +242,7 @@ void BlockExtractorPass::SplitLandingPadPreds(Function *F) {
     if (!Split) continue;
 
     SmallVector<BasicBlock*, 2> NewBBs;
-    SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", nullptr, NewBBs);
+    SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs);
   }
 }
 
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
new file mode 100644
index 0000000..bffeebb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
@@ -0,0 +1,732 @@
+//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic.
+// See http://llvm.org/docs/LangRef.html#bitsets for more information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/LowerBitSets.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lowerbitsets"
+
+STATISTIC(ByteArraySizeBits, "Byte array size in bits");
+STATISTIC(ByteArraySizeBytes, "Byte array size in bytes");
+STATISTIC(NumByteArraysCreated, "Number of byte arrays created");
+STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered");
+STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets");
+
+static cl::opt<bool> AvoidReuse(
+    "lowerbitsets-avoid-reuse",
+    cl::desc("Try to avoid reuse of byte array addresses using aliases"),
+    cl::Hidden, cl::init(true));
+
+bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
+  if (Offset < ByteOffset)
+    return false;
+
+  if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0)
+    return false;
+
+  uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2;
+  if (BitOffset >= BitSize)
+    return false;
+
+  return Bits.count(BitOffset);
+}
+
+bool BitSetInfo::containsValue(
+    const DataLayout &DL,
+    const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V,
+    uint64_t COffset) const {
+  if (auto GV = dyn_cast<GlobalVariable>(V)) {
+    auto I = GlobalLayout.find(GV);
+    if (I == GlobalLayout.end())
+      return false;
+    return containsGlobalOffset(I->second + COffset);
+  }
+
+  if (auto GEP = dyn_cast<GEPOperator>(V)) {
+    APInt APOffset(DL.getPointerSizeInBits(0), 0);
+    bool Result = GEP->accumulateConstantOffset(DL, APOffset);
+    if (!Result)
+      return false;
+    COffset += APOffset.getZExtValue();
+    return containsValue(DL, GlobalLayout, GEP->getPointerOperand(),
+                         COffset);
+  }
+
+  if (auto Op = dyn_cast<Operator>(V)) {
+    if (Op->getOpcode() == Instruction::BitCast)
+      return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset);
+
+    if (Op->getOpcode() == Instruction::Select)
+      return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) &&
+             containsValue(DL, GlobalLayout, Op->getOperand(2), COffset);
+  }
+
+  return false;
+}
+
+BitSetInfo BitSetBuilder::build() {
+  if (Min > Max)
+    Min = 0;
+
+  // Normalize each offset against the minimum observed offset, and compute
+  // the bitwise OR of each of the offsets. The number of trailing zeros
+  // in the mask gives us the log2 of the alignment of all offsets, which
+  // allows us to compress the bitset by only storing one bit per aligned
+  // address.
+  uint64_t Mask = 0;
+  for (uint64_t &Offset : Offsets) {
+    Offset -= Min;
+    Mask |= Offset;
+  }
+
+  BitSetInfo BSI;
+  BSI.ByteOffset = Min;
+
+  BSI.AlignLog2 = 0;
+  if (Mask != 0)
+    BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined);
+
+  // Build the compressed bitset while normalizing the offsets against the
+  // computed alignment.
+  BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;
+  for (uint64_t Offset : Offsets) {
+    Offset >>= BSI.AlignLog2;
+    BSI.Bits.insert(Offset);
+  }
+
+  return BSI;
+}
+
+void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
+  // Create a new fragment to hold the layout for F.
+  Fragments.emplace_back();
+  std::vector<uint64_t> &Fragment = Fragments.back();
+  uint64_t FragmentIndex = Fragments.size() - 1;
+
+  for (auto ObjIndex : F) {
+    uint64_t OldFragmentIndex = FragmentMap[ObjIndex];
+    if (OldFragmentIndex == 0) {
+      // We haven't seen this object index before, so just add it to the current
+      // fragment.
+      Fragment.push_back(ObjIndex);
+    } else {
+      // This index belongs to an existing fragment. Copy the elements of the
+      // old fragment into this one and clear the old fragment. We don't update
+      // the fragment map just yet, this ensures that any further references to
+      // indices from the old fragment in this fragment do not insert any more
+      // indices.
+      std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
+      Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end());
+      OldFragment.clear();
+    }
+  }
+
+  // Update the fragment map to point our object indices to this fragment.
+  for (uint64_t ObjIndex : Fragment)
+    FragmentMap[ObjIndex] = FragmentIndex;
+}
+
+void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits,
+                                uint64_t BitSize, uint64_t &AllocByteOffset,
+                                uint8_t &AllocMask) {
+  // Find the smallest current allocation.
+  unsigned Bit = 0;
+  for (unsigned I = 1; I != BitsPerByte; ++I)
+    if (BitAllocs[I] < BitAllocs[Bit])
+      Bit = I;
+
+  AllocByteOffset = BitAllocs[Bit];
+
+  // Add our size to it.
+  unsigned ReqSize = AllocByteOffset + BitSize;
+  BitAllocs[Bit] = ReqSize;
+  if (Bytes.size() < ReqSize)
+    Bytes.resize(ReqSize);
+
+  // Set our bits.
+  AllocMask = 1 << Bit;
+  for (uint64_t B : Bits)
+    Bytes[AllocByteOffset + B] |= AllocMask;
+}
+
+namespace {
+
+struct ByteArrayInfo {
+  std::set<uint64_t> Bits;
+  uint64_t BitSize;
+  GlobalVariable *ByteArray;
+  Constant *Mask;
+};
+
+struct LowerBitSets : public ModulePass {
+  static char ID;
+  LowerBitSets() : ModulePass(ID) {
+    initializeLowerBitSetsPass(*PassRegistry::getPassRegistry());
+  }
+
+  Module *M;
+
+  bool LinkerSubsectionsViaSymbols;
+  IntegerType *Int1Ty;
+  IntegerType *Int8Ty;
+  IntegerType *Int32Ty;
+  Type *Int32PtrTy;
+  IntegerType *Int64Ty;
+  Type *IntPtrTy;
+
+  // The llvm.bitsets named metadata.
+  NamedMDNode *BitSetNM;
+
+  // Mapping from bitset mdstrings to the call sites that test them.
+  DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites;
+
+  std::vector<ByteArrayInfo> ByteArrayInfos;
+
+  BitSetInfo
+  buildBitSet(MDString *BitSet,
+              const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
+  ByteArrayInfo *createByteArray(BitSetInfo &BSI);
+  void allocateByteArrays();
+  Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+                          Value *BitOffset);
+  Value *
+  lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+                  GlobalVariable *CombinedGlobal,
+                  const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
+  void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets,
+                               const std::vector<GlobalVariable *> &Globals);
+  bool buildBitSets();
+  bool eraseBitSetMetadata();
+
+  bool doInitialization(Module &M) override;
+  bool runOnModule(Module &M) override;
+};
+
+} // namespace
+
+INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",
+                "Lower bitset metadata", false, false)
+INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets",
+                "Lower bitset metadata", false, false)
+char LowerBitSets::ID = 0;
+
+ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; }
+
+bool LowerBitSets::doInitialization(Module &Mod) {
+  M = &Mod;
+  const DataLayout &DL = Mod.getDataLayout();
+
+  Triple TargetTriple(M->getTargetTriple());
+  LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
+
+  Int1Ty = Type::getInt1Ty(M->getContext());
+  Int8Ty = Type::getInt8Ty(M->getContext());
+  Int32Ty = Type::getInt32Ty(M->getContext());
+  Int32PtrTy = PointerType::getUnqual(Int32Ty);
+  Int64Ty = Type::getInt64Ty(M->getContext());
+  IntPtrTy = DL.getIntPtrType(M->getContext(), 0);
+
+  BitSetNM = M->getNamedMetadata("llvm.bitsets");
+
+  BitSetTestCallSites.clear();
+
+  return false;
+}
+
+/// Build a bit set for BitSet using the object layouts in
+/// GlobalLayout.
+BitSetInfo LowerBitSets::buildBitSet(
+    MDString *BitSet,
+    const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+  BitSetBuilder BSB;
+
+  // Compute the byte offset of each element of this bitset.
+  if (BitSetNM) {
+    for (MDNode *Op : BitSetNM->operands()) {
+      if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
+        continue;
+      auto OpGlobal = cast<GlobalVariable>(
+          cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+      uint64_t Offset =
+          cast<ConstantInt>(cast<ConstantAsMetadata>(Op->getOperand(2))
+                                ->getValue())->getZExtValue();
+
+      Offset += GlobalLayout.find(OpGlobal)->second;
+
+      BSB.addOffset(Offset);
+    }
+  }
+
+  return BSB.build();
+}
+
+/// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in
+/// Bits. This pattern matches to the bt instruction on x86.
+static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits,
+                                  Value *BitOffset) {
+  auto BitsType = cast<IntegerType>(Bits->getType());
+  unsigned BitWidth = BitsType->getBitWidth();
+
+  BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType);
+  Value *BitIndex =
+      B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1));
+  Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex);
+  Value *MaskedBits = B.CreateAnd(Bits, BitMask);
+  return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));
+}
+
+ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) {
+  // Create globals to stand in for byte arrays and masks. These never actually
+  // get initialized, we RAUW and erase them later in allocateByteArrays() once
+  // we know the offset and mask to use.
+  auto ByteArrayGlobal = new GlobalVariable(
+      *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+  auto MaskGlobal = new GlobalVariable(
+      *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+
+  ByteArrayInfos.emplace_back();
+  ByteArrayInfo *BAI = &ByteArrayInfos.back();
+
+  BAI->Bits = BSI.Bits;
+  BAI->BitSize = BSI.BitSize;
+  BAI->ByteArray = ByteArrayGlobal;
+  BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty);
+  return BAI;
+}
+
+void LowerBitSets::allocateByteArrays() {
+  std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(),
+                   [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
+                     return BAI1.BitSize > BAI2.BitSize;
+                   });
+
+  std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());
+
+  ByteArrayBuilder BAB;
+  for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
+    ByteArrayInfo *BAI = &ByteArrayInfos[I];
+
+    uint8_t Mask;
+    BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);
+
+    BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask));
+    cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent();
+  }
+
+  Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes);
+  auto ByteArray =
+      new GlobalVariable(*M, ByteArrayConst->getType(), /*isConstant=*/true,
+                         GlobalValue::PrivateLinkage, ByteArrayConst);
+
+  for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
+    ByteArrayInfo *BAI = &ByteArrayInfos[I];
+
+    Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0),
+                        ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])};
+    Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(
+        ByteArrayConst->getType(), ByteArray, Idxs);
+
+    // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
+    // that the pc-relative displacement is folded into the lea instead of the
+    // test instruction getting another displacement.
+    if (LinkerSubsectionsViaSymbols) {
+      BAI->ByteArray->replaceAllUsesWith(GEP);
+    } else {
+      GlobalAlias *Alias =
+          GlobalAlias::create(PointerType::getUnqual(Int8Ty),
+                              GlobalValue::PrivateLinkage, "bits", GEP, M);
+      BAI->ByteArray->replaceAllUsesWith(Alias);
+    }
+    BAI->ByteArray->eraseFromParent();
+  }
+
+  ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] +
+                      BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] +
+                      BAB.BitAllocs[6] + BAB.BitAllocs[7];
+  ByteArraySizeBytes = BAB.Bytes.size();
+}
+
+/// Build a test that bit BitOffset is set in BSI, where
+/// BitSetGlobal is a global containing the bits in BSI.
+Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
+                                      ByteArrayInfo *&BAI, Value *BitOffset) {
+  if (BSI.BitSize <= 64) {
+    // If the bit set is sufficiently small, we can avoid a load by bit testing
+    // a constant.
+    IntegerType *BitsTy;
+    if (BSI.BitSize <= 32)
+      BitsTy = Int32Ty;
+    else
+      BitsTy = Int64Ty;
+
+    uint64_t Bits = 0;
+    for (auto Bit : BSI.Bits)
+      Bits |= uint64_t(1) << Bit;
+    Constant *BitsConst = ConstantInt::get(BitsTy, Bits);
+    return createMaskedBitTest(B, BitsConst, BitOffset);
+  } else {
+    if (!BAI) {
+      ++NumByteArraysCreated;
+      BAI = createByteArray(BSI);
+    }
+
+    Constant *ByteArray = BAI->ByteArray;
+    Type *Ty = BAI->ByteArray->getValueType();
+    if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
+      // Each use of the byte array uses a different alias. This makes the
+      // backend less likely to reuse previously computed byte array addresses,
+      // improving the security of the CFI mechanism based on this pass.
+      ByteArray = GlobalAlias::create(BAI->ByteArray->getType(),
+                                      GlobalValue::PrivateLinkage, "bits_use",
+                                      ByteArray, M);
+    }
+
+    Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset);
+    Value *Byte = B.CreateLoad(ByteAddr);
+
+    Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask);
+    return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));
+  }
+}
+
+/// Lower a llvm.bitset.test call to its implementation. Returns the value to
+/// replace the call with.
+Value *LowerBitSets::lowerBitSetCall(
+    CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+    GlobalVariable *CombinedGlobal,
+    const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+  Value *Ptr = CI->getArgOperand(0);
+  const DataLayout &DL = M->getDataLayout();
+
+  if (BSI.containsValue(DL, GlobalLayout, Ptr))
+    return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext());
+
+  Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);
+  Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
+      GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
+
+  BasicBlock *InitialBB = CI->getParent();
+
+  IRBuilder<> B(CI);
+
+  Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
+
+  if (BSI.isSingleOffset())
+    return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
+
+  Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
+
+  Value *BitOffset;
+  if (BSI.AlignLog2 == 0) {
+    BitOffset = PtrOffset;
+  } else {
+    // We need to check that the offset both falls within our range and is
+    // suitably aligned. We can check both properties at the same time by
+    // performing a right rotate by log2(alignment) followed by an integer
+    // comparison against the bitset size. The rotate will move the lower
+    // order bits that need to be zero into the higher order bits of the
+    // result, causing the comparison to fail if they are nonzero. The rotate
+    // also conveniently gives us a bit offset to use during the load from
+    // the bitset.
+    Value *OffsetSHR =
+        B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2));
+    Value *OffsetSHL = B.CreateShl(
+        PtrOffset,
+        ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2));
+    BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
+  }
+
+  Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize);
+  Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);
+
+  // If the bit set is all ones, testing against it is unnecessary.
+  if (BSI.isAllOnes())
+    return OffsetInRange;
+
+  TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
+  IRBuilder<> ThenB(Term);
+
+  // Now that we know that the offset is in range and aligned, load the
+  // appropriate bit from the bitset.
+  Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset);
+
+  // The value we want is 0 if we came directly from the initial block
+  // (having failed the range or alignment checks), or the loaded bit if
+  // we came from the block in which we loaded it.
+  B.SetInsertPoint(CI);
+  PHINode *P = B.CreatePHI(Int1Ty, 2);
+  P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
+  P->addIncoming(Bit, ThenB.GetInsertBlock());
+  return P;
+}
+
+/// Given a disjoint set of bitsets and globals, layout the globals, build the
+/// bit sets and lower the llvm.bitset.test calls.
+void LowerBitSets::buildBitSetsFromGlobals(
+    const std::vector<MDString *> &BitSets,
+    const std::vector<GlobalVariable *> &Globals) {
+  // Build a new global with the combined contents of the referenced globals.
+  std::vector<Constant *> GlobalInits;
+  const DataLayout &DL = M->getDataLayout();
+  for (GlobalVariable *G : Globals) {
+    GlobalInits.push_back(G->getInitializer());
+    uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType());
+
+    // Compute the amount of padding required to align the next element to the
+    // next power of 2.
+    uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
+
+    // Cap at 128 was found experimentally to have a good data/instruction
+    // overhead tradeoff.
+    if (Padding > 128)
+      Padding = RoundUpToAlignment(InitSize, 128) - InitSize;
+
+    GlobalInits.push_back(
+        ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
+  }
+  if (!GlobalInits.empty())
+    GlobalInits.pop_back();
+  Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits);
+  auto CombinedGlobal =
+      new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true,
+                         GlobalValue::PrivateLinkage, NewInit);
+
+  const StructLayout *CombinedGlobalLayout =
+      DL.getStructLayout(cast<StructType>(NewInit->getType()));
+
+  // Compute the offsets of the original globals within the new global.
+  DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
+  for (unsigned I = 0; I != Globals.size(); ++I)
+    // Multiply by 2 to account for padding elements.
+    GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
+
+  // For each bitset in this disjoint set...
+  for (MDString *BS : BitSets) {
+    // Build the bitset.
+    BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
+
+    ByteArrayInfo *BAI = 0;
+
+    // Lower each call to llvm.bitset.test for this bitset.
+    for (CallInst *CI : BitSetTestCallSites[BS]) {
+      ++NumBitSetCallsLowered;
+      Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout);
+      CI->replaceAllUsesWith(Lowered);
+      CI->eraseFromParent();
+    }
+  }
+
+  // Build aliases pointing to offsets into the combined global for each
+  // global from which we built the combined global, and replace references
+  // to the original globals with references to the aliases.
+  for (unsigned I = 0; I != Globals.size(); ++I) {
+    // Multiply by 2 to account for padding elements.
+    Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
+                                      ConstantInt::get(Int32Ty, I * 2)};
+    Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(
+        NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);
+    if (LinkerSubsectionsViaSymbols) {
+      Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+    } else {
+      GlobalAlias *GAlias =
+          GlobalAlias::create(Globals[I]->getType(), Globals[I]->getLinkage(),
+                              "", CombinedGlobalElemPtr, M);
+      GAlias->takeName(Globals[I]);
+      Globals[I]->replaceAllUsesWith(GAlias);
+    }
+    Globals[I]->eraseFromParent();
+  }
+}
+
+/// Lower all bit sets in this module.
+bool LowerBitSets::buildBitSets() {
+  Function *BitSetTestFunc =
+      M->getFunction(Intrinsic::getName(Intrinsic::bitset_test));
+  if (!BitSetTestFunc)
+    return false;
+
+  // Equivalence class set containing bitsets and the globals they reference.
+  // This is used to partition the set of bitsets in the module into disjoint
+  // sets.
+  typedef EquivalenceClasses<PointerUnion<GlobalVariable *, MDString *>>
+      GlobalClassesTy;
+  GlobalClassesTy GlobalClasses;
+
+  for (const Use &U : BitSetTestFunc->uses()) {
+    auto CI = cast<CallInst>(U.getUser());
+
+    auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
+    if (!BitSetMDVal || !isa<MDString>(BitSetMDVal->getMetadata()))
+      report_fatal_error(
+          "Second argument of llvm.bitset.test must be metadata string");
+    auto BitSet = cast<MDString>(BitSetMDVal->getMetadata());
+
+    // Add the call site to the list of call sites for this bit set. We also use
+    // BitSetTestCallSites to keep track of whether we have seen this bit set
+    // before. If we have, we don't need to re-add the referenced globals to the
+    // equivalence class.
+    std::pair<DenseMap<MDString *, std::vector<CallInst *>>::iterator,
+              bool> Ins =
+        BitSetTestCallSites.insert(
+            std::make_pair(BitSet, std::vector<CallInst *>()));
+    Ins.first->second.push_back(CI);
+    if (!Ins.second)
+      continue;
+
+    // Add the bitset to the equivalence class.
+    GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet);
+    GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
+
+    if (!BitSetNM)
+      continue;
+
+    // Verify the bitset metadata and add the referenced globals to the bitset's
+    // equivalence class.
+    for (MDNode *Op : BitSetNM->operands()) {
+      if (Op->getNumOperands() != 3)
+        report_fatal_error(
+            "All operands of llvm.bitsets metadata must have 3 elements");
+
+      if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
+        continue;
+
+      auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
+      if (!OpConstMD)
+        report_fatal_error("Bit set element must be a constant");
+      auto OpGlobal = dyn_cast<GlobalVariable>(OpConstMD->getValue());
+      if (!OpGlobal)
+        report_fatal_error("Bit set element must refer to global");
+
+      auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
+      if (!OffsetConstMD)
+        report_fatal_error("Bit set element offset must be a constant");
+      auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
+      if (!OffsetInt)
+        report_fatal_error(
+            "Bit set element offset must be an integer constant");
+
+      CurSet = GlobalClasses.unionSets(
+          CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal)));
+    }
+  }
+
+  if (GlobalClasses.empty())
+    return false;
+
+  // For each disjoint set we found...
+  for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
+                                 E = GlobalClasses.end();
+       I != E; ++I) {
+    if (!I->isLeader()) continue;
+
+    ++NumBitSetDisjointSets;
+
+    // Build the list of bitsets and referenced globals in this disjoint set.
+    std::vector<MDString *> BitSets;
+    std::vector<GlobalVariable *> Globals;
+    llvm::DenseMap<MDString *, uint64_t> BitSetIndices;
+    llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices;
+    for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
+         MI != GlobalClasses.member_end(); ++MI) {
+      if ((*MI).is<MDString *>()) {
+        BitSetIndices[MI->get<MDString *>()] = BitSets.size();
+        BitSets.push_back(MI->get<MDString *>());
+      } else {
+        GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size();
+        Globals.push_back(MI->get<GlobalVariable *>());
+      }
+    }
+
+    // For each bitset, build a set of indices that refer to globals referenced
+    // by the bitset.
+    std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
+    if (BitSetNM) {
+      for (MDNode *Op : BitSetNM->operands()) {
+        // Op = { bitset name, global, offset }
+        if (!Op->getOperand(1))
+          continue;
+        auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0)));
+        if (I == BitSetIndices.end())
+          continue;
+
+        auto OpGlobal = cast<GlobalVariable>(
+            cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+        BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
+      }
+    }
+
+    // Order the sets of indices by size. The GlobalLayoutBuilder works best
+    // when given small index sets first.
+    std::stable_sort(
+        BitSetMembers.begin(), BitSetMembers.end(),
+        [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
+          return O1.size() < O2.size();
+        });
+
+    // Create a GlobalLayoutBuilder and provide it with index sets as layout
+    // fragments. The GlobalLayoutBuilder tries to lay out members of fragments
+    // as close together as possible.
+    GlobalLayoutBuilder GLB(Globals.size());
+    for (auto &&MemSet : BitSetMembers)
+      GLB.addFragment(MemSet);
+
+    // Build a vector of globals with the computed layout.
+    std::vector<GlobalVariable *> OrderedGlobals(Globals.size());
+    auto OGI = OrderedGlobals.begin();
+    for (auto &&F : GLB.Fragments)
+      for (auto &&Offset : F)
+        *OGI++ = Globals[Offset];
+
+    // Order bitsets by name for determinism.
+    std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) {
+      return S1->getString() < S2->getString();
+    });
+
+    // Build the bitsets from this disjoint set.
+    buildBitSetsFromGlobals(BitSets, OrderedGlobals);
+  }
+
+  allocateByteArrays();
+
+  return true;
+}
+
+bool LowerBitSets::eraseBitSetMetadata() {
+  if (!BitSetNM)
+    return false;
+
+  M->eraseNamedMetadata(BitSetNM);
+  return true;
+}
+
+bool LowerBitSets::runOnModule(Module &M) {
+  bool Changed = buildBitSets();
+  Changed |= eraseBitSetMetadata();
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index b91ebf2..91a5eef 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -127,9 +127,8 @@ namespace {
 /// side of claiming that two functions are different).
 class FunctionComparator {
 public:
-  FunctionComparator(const DataLayout *DL, const Function *F1,
-                     const Function *F2)
-      : FnL(F1), FnR(F2), DL(DL) {}
+  FunctionComparator(const Function *F1, const Function *F2)
+      : FnL(F1), FnR(F2) {}
 
   /// Test whether the two functions have equivalent behaviour.
   int compare();
@@ -292,8 +291,7 @@ private:
   /// Parts to be compared for each comparison stage,
   /// most significant stage first:
   /// 1. Address space. As numbers.
-  /// 2. Constant offset, (if "DataLayout *DL" field is not NULL,
-  /// using GEPOperator::accumulateConstantOffset method).
+  /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method).
   /// 3. Pointer operand type (using cmpType method).
   /// 4. Number of operands.
   /// 5. Compare operands, using cmpValues method.
@@ -354,8 +352,6 @@ private:
   // The two functions undergoing comparison.
   const Function *FnL, *FnR;
 
-  const DataLayout *DL;
-
   /// Assign serial numbers to values from left function, and values from
   /// right function.
   /// Explanation:
@@ -394,14 +390,13 @@ private:
 
 class FunctionNode {
   AssertingVH<Function> F;
-  const DataLayout *DL;
 
 public:
-  FunctionNode(Function *F, const DataLayout *DL) : F(F), DL(DL) {}
+  FunctionNode(Function *F) : F(F) {}
   Function *getFunc() const { return F; }
   void release() { F = 0; }
   bool operator<(const FunctionNode &RHS) const {
-    return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1;
+    return (FunctionComparator(F, RHS.getFunc()).compare()) == -1;
   }
 };
 }
@@ -620,10 +615,11 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
   PointerType *PTyL = dyn_cast<PointerType>(TyL);
   PointerType *PTyR = dyn_cast<PointerType>(TyR);
 
-  if (DL) {
-    if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL);
-    if (PTyR && PTyR->getAddressSpace() == 0) TyR = DL->getIntPtrType(TyR);
-  }
+  const DataLayout &DL = FnL->getParent()->getDataLayout();
+  if (PTyL && PTyL->getAddressSpace() == 0)
+    TyL = DL.getIntPtrType(TyL);
+  if (PTyR && PTyR->getAddressSpace() == 0)
+    TyR = DL.getIntPtrType(TyR);
 
   if (TyL == TyR)
     return 0;
@@ -723,6 +719,15 @@ int FunctionComparator::cmpOperations(const Instruction *L,
                            R->getRawSubclassOptionalData()))
     return Res;
 
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
+    if (int Res = cmpTypes(AI->getAllocatedType(),
+                           cast<AllocaInst>(R)->getAllocatedType()))
+      return Res;
+    if (int Res =
+            cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment()))
+      return Res;
+  }
+
   // We have two instructions of identical opcode and #operands.  Check to see
   // if all operands are the same type
   for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
@@ -855,13 +860,12 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
 
   // When we have target data, we can reduce the GEP down to the value in bytes
   // added to the address.
-  if (DL) {
-    unsigned BitWidth = DL->getPointerSizeInBits(ASL);
-    APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
-    if (GEPL->accumulateConstantOffset(*DL, OffsetL) &&
-        GEPR->accumulateConstantOffset(*DL, OffsetR))
-      return cmpAPInts(OffsetL, OffsetR);
-  }
+  const DataLayout &DL = FnL->getParent()->getDataLayout();
+  unsigned BitWidth = DL.getPointerSizeInBits(ASL);
+  APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+  if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
+      GEPR->accumulateConstantOffset(DL, OffsetR))
+    return cmpAPInts(OffsetL, OffsetR);
 
   if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
                            (uint64_t)GEPR->getPointerOperand()->getType()))
@@ -1122,9 +1126,6 @@ private:
   /// to modify it.
   FnTreeType FnTree;
 
-  /// DataLayout for more accurate GEP comparisons. May be NULL.
-  const DataLayout *DL;
-
   /// Whether or not the target supports global aliases.
   bool HasGlobalAliases;
 };
@@ -1152,8 +1153,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
       for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
         Function *F1 = cast<Function>(*I);
         Function *F2 = cast<Function>(*J);
-        int Res1 = FunctionComparator(DL, F1, F2).compare();
-        int Res2 = FunctionComparator(DL, F2, F1).compare();
+        int Res1 = FunctionComparator(F1, F2).compare();
+        int Res2 = FunctionComparator(F2, F1).compare();
 
         // If F1 <= F2, then F2 >= F1, otherwise report failure.
         if (Res1 != -Res2) {
@@ -1174,8 +1175,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
             continue;
 
           Function *F3 = cast<Function>(*K);
-          int Res3 = FunctionComparator(DL, F1, F3).compare();
-          int Res4 = FunctionComparator(DL, F2, F3).compare();
+          int Res3 = FunctionComparator(F1, F3).compare();
+          int Res4 = FunctionComparator(F2, F3).compare();
 
           bool Transitive = true;
 
@@ -1212,8 +1213,6 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
 
 bool MergeFunctions::runOnModule(Module &M) {
   bool Changed = false;
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
 
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
@@ -1368,8 +1367,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
 // Replace G with an alias to F and delete G.
 void MergeFunctions::writeAlias(Function *F, Function *G) {
   PointerType *PTy = G->getType();
-  auto *GA = GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
-                                 G->getLinkage(), "", F);
+  auto *GA = GlobalAlias::create(PTy, G->getLinkage(), "", F);
   F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
   GA->takeName(G);
   GA->setVisibility(G->getVisibility());
@@ -1420,7 +1418,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
 // that was already inserted.
 bool MergeFunctions::insert(Function *NewFunction) {
   std::pair<FnTreeType::iterator, bool> Result =
-      FnTree.insert(FunctionNode(NewFunction, DL));
+      FnTree.insert(FunctionNode(NewFunction));
 
   if (Result.second) {
     DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
@@ -1457,7 +1455,7 @@ bool MergeFunctions::insert(Function *NewFunction) {
 void MergeFunctions::remove(Function *F) {
   // We need to make sure we remove F, not a function "equal" to F per the
   // function equality comparator.
-  FnTreeType::iterator found = FnTree.find(FunctionNode(F, DL));
+  FnTreeType::iterator found = FnTree.find(FunctionNode(F));
   size_t Erased = 0;
   if (found != FnTree.end() && found->getFunc() == F) {
     Erased = 1;
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 76d6dfa..4a7cb7b 100644
--- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -58,13 +58,13 @@ Function* PartialInliner::unswitchFunction(Function* F) {
   BasicBlock* returnBlock = nullptr;
   BasicBlock* nonReturnBlock = nullptr;
   unsigned returnCount = 0;
-  for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
-       SI != SE; ++SI)
-    if (isa<ReturnInst>((*SI)->getTerminator())) {
-      returnBlock = *SI;
+  for (BasicBlock *BB : successors(entryBlock)) {
+    if (isa<ReturnInst>(BB->getTerminator())) {
+      returnBlock = BB;
       returnCount++;
     } else
-      nonReturnBlock = *SI;
+      nonReturnBlock = BB;
+  }
   
   if (returnCount != 1)
     return nullptr;
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index fb673dc..7eb0682 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -19,12 +19,11 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Verifier.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Vectorize.h"
@@ -60,6 +59,10 @@ static cl::opt<bool>
 RunLoopRerolling("reroll-loops", cl::Hidden,
                  cl::desc("Run the loop rerolling pass"));
 
+static cl::opt<bool>
+RunFloat2Int("float-to-int", cl::Hidden, cl::init(true),
+             cl::desc("Run the float2int (float demotion) pass"));
+
 static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
                                     cl::Hidden,
                                     cl::desc("Run the load combining pass"));
@@ -78,6 +81,14 @@ static cl::opt<bool>
 EnableMLSM("mlsm", cl::init(true), cl::Hidden,
            cl::desc("Enable motion of merged load and store"));
 
+static cl::opt<bool> EnableLoopInterchange(
+    "enable-loopinterchange", cl::init(false), cl::Hidden,
+    cl::desc("Enable the new, experimental LoopInterchange Pass"));
+
+static cl::opt<bool> EnableLoopDistribute(
+    "enable-loop-distribute", cl::init(false), cl::Hidden,
+    cl::desc("Enable the new, experimental LoopDistribution Pass"));
+
 static cl::opt<bool> EnableGVN("enable-gvn",
   cl::init(true), cl::Hidden,
   cl::desc("Run the global value numbering pass"));
@@ -98,7 +109,6 @@ PassManagerBuilder::PassManagerBuilder() {
     DisableGVNLoadPRE = false;
     VerifyInput = false;
     VerifyOutput = false;
-    StripDebug = false;
     MergeFunctions = false;
 }
 
@@ -122,7 +132,7 @@ void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
 }
 
 void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
-                                           PassManagerBase &PM) const {
+                                           legacy::PassManagerBase &PM) const {
   for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
     if ((*GlobalExtensions)[i].first == ETy)
       (*GlobalExtensions)[i].second(*this, PM);
@@ -131,8 +141,8 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
       Extensions[i].second(*this, PM);
 }
 
-void
-PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const {
+void PassManagerBuilder::addInitialAliasAnalysisPasses(
+    legacy::PassManagerBase &PM) const {
   // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
   // BasicAliasAnalysis wins if they disagree. This is intended to help
   // support "obvious" type-punning idioms.
@@ -143,11 +153,13 @@ PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const {
   PM.add(createBasicAliasAnalysisPass());
 }
 
-void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
+void PassManagerBuilder::populateFunctionPassManager(
+    legacy::FunctionPassManager &FPM) {
   addExtensionsToPM(EP_EarlyAsPossible, FPM);
 
   // Add LibraryInfo if we have some.
-  if (LibraryInfo) FPM.add(new TargetLibraryInfo(*LibraryInfo));
+  if (LibraryInfo)
+    FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
 
   if (OptLevel == 0) return;
 
@@ -162,7 +174,8 @@ void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
   FPM.add(createLowerExpectIntrinsicPass());
 }
 
-void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
+void PassManagerBuilder::populateModulePassManager(
+    legacy::PassManagerBase &MPM) {
   // If all optimizations are disabled, just run the always-inline pass and,
   // if enabled, the function merging pass.
   if (OptLevel == 0) {
@@ -186,7 +199,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   }
 
   // Add LibraryInfo if we have some.
-  if (LibraryInfo) MPM.add(new TargetLibraryInfo(*LibraryInfo));
+  if (LibraryInfo)
+    MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
 
   addInitialAliasAnalysisPasses(MPM);
 
@@ -240,7 +254,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   MPM.add(createIndVarSimplifyPass());        // Canonicalize indvars
   MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
   MPM.add(createLoopDeletionPass());          // Delete dead loops
-
+  if (EnableLoopInterchange) {
+    MPM.add(createLoopInterchangePass()); // Interchange loops
+    MPM.add(createCFGSimplificationPass());
+  }
   if (!DisableUnrollLoops)
     MPM.add(createSimpleLoopUnrollPass());    // Unroll small loops
   addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
@@ -254,6 +271,11 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   MPM.add(createMemCpyOptPass());             // Remove memcpy / form memset
   MPM.add(createSCCPPass());                  // Constant prop with SCCP
 
+  // Delete dead bit computations (instcombine runs after to fold away the dead
+  // computations, and then ADCE will run later to exploit any new DCE
+  // opportunities that creates).
+  MPM.add(createBitTrackingDCEPass());        // Delete dead bit computations
+
   // Run instcombine after redundancy elimination to exploit opportunities
   // opened up by them.
   MPM.add(createInstructionCombiningPass());
@@ -261,6 +283,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   MPM.add(createJumpThreadingPass());         // Thread jumps
   MPM.add(createCorrelatedValuePropagationPass());
   MPM.add(createDeadStoreEliminationPass());  // Delete dead stores
+  MPM.add(createLICMPass());
 
   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
 
@@ -298,11 +321,18 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   // we must insert a no-op module pass to reset the pass manager.
   MPM.add(createBarrierNoopPass());
 
+  if (RunFloat2Int)
+    MPM.add(createFloat2IntPass());
+
   // Re-rotate loops in all our loop nests. These may have fallout out of
   // rotated form due to GVN or other transformations, and the vectorizer relies
   // on the rotated form.
-  if (ExtraVectorizerPasses)
-    MPM.add(createLoopRotatePass());
+  MPM.add(createLoopRotatePass());
+
+  // Distribute loops to allow partial vectorization.  I.e. isolate dependences
+  // into separate loop that would otherwise inhibit vectorization.
+  if (EnableLoopDistribute)
+    MPM.add(createLoopDistributePass());
 
   MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
   // FIXME: Because of #pragma vectorize enable, the passes below are always
@@ -354,9 +384,19 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   MPM.add(createCFGSimplificationPass());
   MPM.add(createInstructionCombiningPass());
 
-  if (!DisableUnrollLoops)
+  if (!DisableUnrollLoops) {
     MPM.add(createLoopUnrollPass());    // Unroll small loops
 
+    // LoopUnroll may generate some redundency to cleanup.
+    MPM.add(createInstructionCombiningPass());
+
+    // Runtime unrolling will introduce runtime check in loop prologue. If the
+    // unrolled loop is a inner loop, then the prologue will be inside the
+    // outer loop. LICM pass can help to promote the runtime check out if the
+    // checked value is loop invariant.
+    MPM.add(createLICMPass());
+  }
+
   // After vectorization and unrolling, assume intrinsics may tell us more
   // about pointer alignments.
   MPM.add(createAlignmentFromAssumptionsPass());
@@ -379,7 +419,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   addExtensionsToPM(EP_OptimizerLast, MPM);
 }
 
-void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
+void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   // Provide AliasAnalysis services for optimizations.
   addInitialAliasAnalysisPasses(PM);
 
@@ -450,6 +490,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
   // More loops are countable; try to optimize them.
   PM.add(createIndVarSimplifyPass());
   PM.add(createLoopDeletionPass());
+  if (EnableLoopInterchange)
+    PM.add(createLoopInterchangePass());
+
   PM.add(createLoopVectorizePass(true, LoopVectorize));
 
   // More scalar chains could be vectorized due to more alias information
@@ -469,7 +512,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
   addExtensionsToPM(EP_Peephole, PM);
 
   PM.add(createJumpThreadingPass());
+}
 
+void PassManagerBuilder::addLateLTOOptimizationPasses(
+    legacy::PassManagerBase &PM) {
   // Delete basic blocks, which optimization passes may have killed.
   PM.add(createCFGSimplificationPass());
 
@@ -482,32 +528,26 @@ void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
     PM.add(createMergeFunctionsPass());
 }
 
-void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
-                                                TargetMachine *TM) {
-  if (TM) {
-    PM.add(new DataLayoutPass());
-    TM->addAnalysisPasses(PM);
-  }
-
+void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
   if (LibraryInfo)
-    PM.add(new TargetLibraryInfo(*LibraryInfo));
+    PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
 
   if (VerifyInput)
     PM.add(createVerifierPass());
 
-  if (StripDebug)
-    PM.add(createStripSymbolsPass(true));
+  if (OptLevel > 1)
+    addLTOOptimizationPasses(PM);
 
-  if (VerifyInput)
-    PM.add(createDebugInfoVerifierPass());
+  // Lower bit sets to globals. This pass supports Clang's control flow
+  // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI
+  // is enabled. The pass does nothing if CFI is disabled.
+  PM.add(createLowerBitSetsPass());
 
   if (OptLevel != 0)
-    addLTOOptimizationPasses(PM);
+    addLateLTOOptimizationPasses(PM);
 
-  if (VerifyOutput) {
+  if (VerifyOutput)
     PM.add(createVerifierPass());
-    PM.add(createDebugInfoVerifierPass());
-  }
 }
 
 inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
@@ -573,7 +613,7 @@ void
 LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
                                                   LLVMPassManagerRef PM) {
   PassManagerBuilder *Builder = unwrap(PMB);
-  FunctionPassManager *FPM = unwrap<FunctionPassManager>(PM);
+  legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM);
   Builder->populateFunctionPassManager(*FPM);
 }
 
@@ -581,7 +621,7 @@ void
 LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
                                                 LLVMPassManagerRef PM) {
   PassManagerBuilder *Builder = unwrap(PMB);
-  PassManagerBase *MPM = unwrap(PM);
+  legacy::PassManagerBase *MPM = unwrap(PM);
   Builder->populateModulePassManager(*MPM);
 }
 
@@ -590,7 +630,7 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
                                                   LLVMBool Internalize,
                                                   LLVMBool RunInliner) {
   PassManagerBuilder *Builder = unwrap(PMB);
-  PassManagerBase *LPM = unwrap(PM);
+  legacy::PassManagerBase *LPM = unwrap(PM);
 
   // A small backwards compatibility hack. populateLTOPassManager used to take
   // an RunInliner option.
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index 7bd4ce1..1943b93 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -18,8 +18,10 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LibCallSemantics.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
@@ -175,7 +177,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
   bool MadeChange = false;
   for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
     if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
-      if (II->doesNotThrow()) {
+      if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(II)) {
         SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
         // Insert a call instruction before the invoke.
         CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 816978e..60c9573 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -305,41 +305,31 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
   SmallVector<Metadata *, 64> LiveSubprograms;
   DenseSet<const MDNode *> VisitedSet;
 
-  for (DICompileUnit DIC : F.compile_units()) {
-    assert(DIC.Verify() && "DIC must verify as a DICompileUnit.");
-
+  for (DICompileUnit *DIC : F.compile_units()) {
     // Create our live subprogram list.
-    DIArray SPs = DIC.getSubprograms();
     bool SubprogramChange = false;
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-      DISubprogram DISP(SPs.getElement(i));
-      assert(DISP.Verify() && "DISP must verify as a DISubprogram.");
-
+    for (DISubprogram *DISP : DIC->getSubprograms()) {
       // Make sure we visit each subprogram only once.
       if (!VisitedSet.insert(DISP).second)
         continue;
 
       // If the function referenced by DISP is not null, the function is live.
-      if (DISP.getFunction())
+      if (DISP->getFunction())
         LiveSubprograms.push_back(DISP);
       else
         SubprogramChange = true;
     }
 
     // Create our live global variable list.
-    DIArray GVs = DIC.getGlobalVariables();
     bool GlobalVariableChange = false;
-    for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
-      DIGlobalVariable DIG(GVs.getElement(i));
-      assert(DIG.Verify() && "DIG must verify as DIGlobalVariable.");
-
+    for (DIGlobalVariable *DIG : DIC->getGlobalVariables()) {
       // Make sure we only visit each global variable only once.
       if (!VisitedSet.insert(DIG).second)
         continue;
 
       // If the global variable referenced by DIG is not null, the global
       // variable is live.
-      if (DIG.getGlobal())
+      if (DIG->getVariable())
         LiveGlobalVariables.push_back(DIG);
       else
         GlobalVariableChange = true;
@@ -349,12 +339,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
     // subprogram list/global variable list with our new live subprogram/global
     // variable list.
     if (SubprogramChange) {
-      DIC.replaceSubprograms(DIArray(MDNode::get(C, LiveSubprograms)));
+      DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms));
       Changed = true;
     }
 
     if (GlobalVariableChange) {
-      DIC.replaceGlobalVariables(DIArray(MDNode::get(C, LiveGlobalVariables)));
+      DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));
       Changed = true;
     }
 
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 6d20384..a8d0172 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/IR/DataLayout.h"
@@ -891,7 +891,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero,
 /// This basically requires proving that the add in the original type would not
 /// overflow to change the sign bit or have a carry out.
 bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
-                                            Instruction *CxtI) {
+                                            Instruction &CxtI) {
   // There are different heuristics we can use for this.  Here are some simple
   // ones.
 
@@ -909,18 +909,18 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
   //
   // Since the carry into the most significant position is always equal to
   // the carry out of the addition, there is no signed overflow.
-  if (ComputeNumSignBits(LHS, 0, CxtI) > 1 &&
-      ComputeNumSignBits(RHS, 0, CxtI) > 1)
+  if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
+      ComputeNumSignBits(RHS, 0, &CxtI) > 1)
     return true;
 
   unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
   APInt LHSKnownZero(BitWidth, 0);
   APInt LHSKnownOne(BitWidth, 0);
-  computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI);
+  computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
 
   APInt RHSKnownZero(BitWidth, 0);
   APInt RHSKnownOne(BitWidth, 0);
-  computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI);
+  computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
 
   // Addition of two 2's compliment numbers having opposite signs will never
   // overflow.
@@ -943,21 +943,21 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
 /// overflow to change the sign bit or have a carry out.
 /// TODO: Handle this for Vectors.
 bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
-                                            Instruction *CxtI) {
+                                            Instruction &CxtI) {
   // If LHS and RHS each have at least two sign bits, the subtraction
   // cannot overflow.
-  if (ComputeNumSignBits(LHS, 0, CxtI) > 1 &&
-      ComputeNumSignBits(RHS, 0, CxtI) > 1)
+  if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
+      ComputeNumSignBits(RHS, 0, &CxtI) > 1)
     return true;
 
   unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
   APInt LHSKnownZero(BitWidth, 0);
   APInt LHSKnownOne(BitWidth, 0);
-  computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI);
+  computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
 
   APInt RHSKnownZero(BitWidth, 0);
   APInt RHSKnownOne(BitWidth, 0);
-  computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI);
+  computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
 
   // Subtraction of two 2's compliment numbers having identical signs will
   // never overflow.
@@ -972,12 +972,14 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
 /// \brief Return true if we can prove that:
 ///    (sub LHS, RHS)  === (sub nuw LHS, RHS)
 bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS,
-                                              Instruction *CxtI) {
+                                              Instruction &CxtI) {
   // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
   bool LHSKnownNonNegative, LHSKnownNegative;
   bool RHSKnownNonNegative, RHSKnownNegative;
-  ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, CxtI);
-  ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, CxtI);
+  ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0,
+                 &CxtI);
+  ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0,
+                 &CxtI);
   if (LHSKnownNegative && RHSKnownNonNegative)
     return true;
 
@@ -1046,15 +1048,15 @@ static Value *checkForNegativeOperand(BinaryOperator &I,
 }
 
 Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
-   bool Changed = SimplifyAssociativeOrCommutative(I);
-   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
 
-   if (Value *V = SimplifyVectorOp(I))
-     return ReplaceInstUsesWith(I, V);
+  if (Value *V = SimplifyVectorOp(I))
+    return ReplaceInstUsesWith(I, V);
 
-   if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
-                                  I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
-     return ReplaceInstUsesWith(I, V);
+  if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+                                 I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
+    return ReplaceInstUsesWith(I, V);
 
    // (A*B)+(A*C) -> A*(B+C) etc
   if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -1158,20 +1160,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     return ReplaceInstUsesWith(I, V);
 
   // A+B --> A|B iff A and B have no bits set in common.
-  if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
-    APInt LHSKnownOne(IT->getBitWidth(), 0);
-    APInt LHSKnownZero(IT->getBitWidth(), 0);
-    computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &I);
-    if (LHSKnownZero != 0) {
-      APInt RHSKnownOne(IT->getBitWidth(), 0);
-      APInt RHSKnownZero(IT->getBitWidth(), 0);
-      computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &I);
-
-      // No bits in common -> bitwise or.
-      if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
-        return BinaryOperator::CreateOr(LHS, RHS);
-    }
-  }
+  if (haveNoCommonBitsSet(LHS, RHS, DL, AC, &I, DT))
+    return BinaryOperator::CreateOr(LHS, RHS);
 
   if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
     Value *X;
@@ -1243,7 +1233,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
       if (LHSConv->hasOneUse() &&
           ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
-          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) {
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
         // Insert the new, smaller add.
         Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
                                               CI, "addconv");
@@ -1256,10 +1246,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       // Only do this if x/y have the same type, if at last one of them has a
       // single use (so we don't increase the number of sexts), and if the
       // integer add will not overflow.
-      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+      if (LHSConv->getOperand(0)->getType() ==
+              RHSConv->getOperand(0)->getType() &&
           (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
           WillNotOverflowSignedAdd(LHSConv->getOperand(0),
-                                   RHSConv->getOperand(0), &I)) {
+                                   RHSConv->getOperand(0), I)) {
         // Insert the new integer add.
         Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
                                              RHSConv->getOperand(0), "addconv");
@@ -1307,7 +1298,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   // TODO(jingyue): Consider WillNotOverflowSignedAdd and
   // WillNotOverflowUnsignedAdd to reduce the number of invocations of
   // computeKnownBits.
-  if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, &I)) {
+  if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) {
     Changed = true;
     I.setHasNoSignedWrap(true);
   }
@@ -1371,7 +1362,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
       ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
       if (LHSConv->hasOneUse() &&
           ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
-          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) {
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
         // Insert the new integer add.
         Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
                                               CI, "addconv");
@@ -1384,10 +1375,11 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
       // Only do this if x/y have the same type, if at last one of them has a
       // single use (so we don't increase the number of int->fp conversions),
       // and if the integer add will not overflow.
-      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+      if (LHSConv->getOperand(0)->getType() ==
+              RHSConv->getOperand(0)->getType() &&
           (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
           WillNotOverflowSignedAdd(LHSConv->getOperand(0),
-                                   RHSConv->getOperand(0), &I)) {
+                                   RHSConv->getOperand(0), I)) {
         // Insert the new integer add.
         Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
                                               RHSConv->getOperand(0),"addconv");
@@ -1436,8 +1428,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
 ///
 Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
                                                Type *Ty) {
-  assert(DL && "Must have target data info for this");
-
   // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
   // this.
   bool Swapped = false;
@@ -1584,6 +1574,19 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
           CI->getValue() == I.getType()->getPrimitiveSizeInBits() - 1)
         return BinaryOperator::CreateLShr(X, CI);
     }
+
+    // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
+    // zero.
+    APInt IntVal = C->getValue();
+    if ((IntVal + 1).isPowerOf2()) {
+      unsigned BitWidth = I.getType()->getScalarSizeInBits();
+      APInt KnownZero(BitWidth, 0);
+      APInt KnownOne(BitWidth, 0);
+      computeKnownBits(&I, KnownZero, KnownOne, 0, &I);
+      if ((IntVal | KnownZero).isAllOnesValue()) {
+        return BinaryOperator::CreateXor(Op1, C);
+      }
+    }
   }
 
 
@@ -1662,26 +1665,24 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
 
   // Optimize pointer differences into the same array into a size.  Consider:
   //  &A[10] - &A[0]: we should compile this to "10".
-  if (DL) {
-    Value *LHSOp, *RHSOp;
-    if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
-        match(Op1, m_PtrToInt(m_Value(RHSOp))))
-      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
-        return ReplaceInstUsesWith(I, Res);
-
-    // trunc(p)-trunc(q) -> trunc(p-q)
-    if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
-        match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
-      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
-        return ReplaceInstUsesWith(I, Res);
-      }
+  Value *LHSOp, *RHSOp;
+  if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
+      match(Op1, m_PtrToInt(m_Value(RHSOp))))
+    if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+      return ReplaceInstUsesWith(I, Res);
+
+  // trunc(p)-trunc(q) -> trunc(p-q)
+  if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
+      match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+    if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+      return ReplaceInstUsesWith(I, Res);
 
   bool Changed = false;
-  if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, &I)) {
+  if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) {
     Changed = true;
     I.setHasNoSignedWrap(true);
   }
-  if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, &I)) {
+  if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) {
     Changed = true;
     I.setHasNoUnsignedWrap(true);
   }
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 74b6970..ee21c81 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Intrinsics.h"
@@ -22,30 +22,12 @@ using namespace PatternMatch;
 
 #define DEBUG_TYPE "instcombine"
 
-/// isFreeToInvert - Return true if the specified value is free to invert (apply
-/// ~ to).  This happens in cases where the ~ can be eliminated.
-static inline bool isFreeToInvert(Value *V) {
-  // ~(~(X)) -> X.
-  if (BinaryOperator::isNot(V))
-    return true;
-
-  // Constants can be considered to be not'ed values.
-  if (isa<ConstantInt>(V))
-    return true;
-
-  // Compares can be inverted if they have a single use.
-  if (CmpInst *CI = dyn_cast<CmpInst>(V))
-    return CI->hasOneUse();
-
-  return false;
-}
-
 static inline Value *dyn_castNotVal(Value *V) {
   // If this is not(not(x)) don't return that this is a not: we want the two
   // not's to be folded first.
   if (BinaryOperator::isNot(V)) {
     Value *Operand = BinaryOperator::getNotArgument(V);
-    if (!isFreeToInvert(Operand))
+    if (!IsFreeToInvert(Operand, Operand->hasOneUse()))
       return Operand;
   }
 
@@ -997,9 +979,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   // Make a constant range that's the intersection of the two icmp ranges.
   // If the intersection is empty, we know that the result is false.
   ConstantRange LHSRange =
-    ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
+      ConstantRange::makeAllowedICmpRegion(LHSCC, LHSCst->getValue());
   ConstantRange RHSRange =
-    ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
+      ConstantRange::makeAllowedICmpRegion(RHSCC, RHSCst->getValue());
 
   if (LHSRange.intersectWith(RHSRange).isEmptySet())
     return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
@@ -1727,15 +1709,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
       Value *Mask = nullptr;
       Value *Masked = nullptr;
       if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
-          isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, AC, CxtI, DT) &&
-          isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, AC, CxtI, DT)) {
+          isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, AC, CxtI,
+                                 DT) &&
+          isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, AC, CxtI,
+                                 DT)) {
         Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
         Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
       } else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
-                 isKnownToBeAPowerOfTwo(LAnd->getOperand(0), false, 0, AC, CxtI,
-                                        DT) &&
-                 isKnownToBeAPowerOfTwo(RAnd->getOperand(0), false, 0, AC, CxtI,
-                                        DT)) {
+                 isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, AC,
+                                        CxtI, DT) &&
+                 isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, AC,
+                                        CxtI, DT)) {
         Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
         Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
       }
@@ -2585,8 +2569,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
 
         // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
         // ~(X | Y) === (~X & ~Y) - De Morgan's Law
-        if (isFreeToInvert(Op0I->getOperand(0)) &&
-            isFreeToInvert(Op0I->getOperand(1))) {
+        if (IsFreeToInvert(Op0I->getOperand(0),
+                           Op0I->getOperand(0)->hasOneUse()) &&
+            IsFreeToInvert(Op0I->getOperand(1),
+                           Op0I->getOperand(1)->hasOneUse())) {
           Value *NotX =
             Builder->CreateNot(Op0I->getOperand(0), "notlhs");
           Value *NotY =
@@ -2604,15 +2590,16 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     }
   }
 
-
-  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    if (RHS->isOne() && Op0->hasOneUse())
+  if (Constant *RHS = dyn_cast<Constant>(Op1)) {
+    if (RHS->isAllOnesValue() && Op0->hasOneUse())
       // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
       if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
         return CmpInst::Create(CI->getOpcode(),
                                CI->getInversePredicate(),
                                CI->getOperand(0), CI->getOperand(1));
+  }
 
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
     // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
     if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
       if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 83b4b82..e83b9dd 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -11,16 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/IR/CallSite.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Statepoint.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -60,8 +61,8 @@ static Type *reduceToSingleValueType(Type *T) {
 }
 
 Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
-  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AC, MI, DT);
-  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AC, MI, DT);
+  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT);
+  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT);
   unsigned MinAlign = std::min(DstAlign, SrcAlign);
   unsigned CopyAlign = MI->getAlignment();
 
@@ -107,7 +108,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   if (StrippedDest != MI->getArgOperand(0)) {
     Type *SrcETy = cast<PointerType>(StrippedDest->getType())
                                     ->getElementType();
-    if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) {
+    if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) {
       // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
       // down through these levels if so.
       SrcETy = reduceToSingleValueType(SrcETy);
@@ -155,7 +156,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
 }
 
 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
-  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AC, MI, DT);
+  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT);
   if (MI->getAlignment() < Alignment) {
     MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
                                              Alignment, false));
@@ -197,11 +198,137 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   return nullptr;
 }
 
+static Value *SimplifyX86insertps(const IntrinsicInst &II,
+                                  InstCombiner::BuilderTy &Builder) {
+  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+    VectorType *VecTy = cast<VectorType>(II.getType());
+    assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
+    
+    // The immediate permute control byte looks like this:
+    //    [3:0] - zero mask for each 32-bit lane
+    //    [5:4] - select one 32-bit destination lane
+    //    [7:6] - select one 32-bit source lane
+
+    uint8_t Imm = CInt->getZExtValue();
+    uint8_t ZMask = Imm & 0xf;
+    uint8_t DestLane = (Imm >> 4) & 0x3;
+    uint8_t SourceLane = (Imm >> 6) & 0x3;
+
+    ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+    // If all zero mask bits are set, this was just a weird way to
+    // generate a zero vector.
+    if (ZMask == 0xf)
+      return ZeroVector;
+
+    // Initialize by passing all of the first source bits through.
+    int ShuffleMask[4] = { 0, 1, 2, 3 };
+
+    // We may replace the second operand with the zero vector.
+    Value *V1 = II.getArgOperand(1);
+
+    if (ZMask) {
+      // If the zero mask is being used with a single input or the zero mask
+      // overrides the destination lane, this is a shuffle with the zero vector.
+      if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
+          (ZMask & (1 << DestLane))) {
+        V1 = ZeroVector;
+        // We may still move 32-bits of the first source vector from one lane
+        // to another.
+        ShuffleMask[DestLane] = SourceLane;
+        // The zero mask may override the previous insert operation.
+        for (unsigned i = 0; i < 4; ++i)
+          if ((ZMask >> i) & 0x1)
+            ShuffleMask[i] = i + 4;
+      } else {
+        // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
+        return nullptr;
+      }
+    } else {
+      // Replace the selected destination lane with the selected source lane.
+      ShuffleMask[DestLane] = SourceLane + 4;
+    }
+  
+    return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
+  }
+  return nullptr;
+}
+
+/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
+/// source vectors, unless a zero bit is set. If a zero bit is set,
+/// then ignore that half of the mask and clear that half of the vector.
+static Value *SimplifyX86vperm2(const IntrinsicInst &II,
+                                InstCombiner::BuilderTy &Builder) {
+  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+    VectorType *VecTy = cast<VectorType>(II.getType());
+    ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+    // The immediate permute control byte looks like this:
+    //    [1:0] - select 128 bits from sources for low half of destination
+    //    [2]   - ignore
+    //    [3]   - zero low half of destination
+    //    [5:4] - select 128 bits from sources for high half of destination
+    //    [6]   - ignore
+    //    [7]   - zero high half of destination
+
+    uint8_t Imm = CInt->getZExtValue();
+
+    bool LowHalfZero = Imm & 0x08;
+    bool HighHalfZero = Imm & 0x80;
+
+    // If both zero mask bits are set, this was just a weird way to
+    // generate a zero vector.
+    if (LowHalfZero && HighHalfZero)
+      return ZeroVector;
+
+    // If 0 or 1 zero mask bits are set, this is a simple shuffle.
+    unsigned NumElts = VecTy->getNumElements();
+    unsigned HalfSize = NumElts / 2;
+    SmallVector<int, 8> ShuffleMask(NumElts);
+
+    // The high bit of the selection field chooses the 1st or 2nd operand.
+    bool LowInputSelect = Imm & 0x02;
+    bool HighInputSelect = Imm & 0x20;
+    
+    // The low bit of the selection field chooses the low or high half
+    // of the selected operand.
+    bool LowHalfSelect = Imm & 0x01;
+    bool HighHalfSelect = Imm & 0x10;
+
+    // Determine which operand(s) are actually in use for this instruction.
+    Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+    Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+    
+    // If needed, replace operands based on zero mask.
+    V0 = LowHalfZero ? ZeroVector : V0;
+    V1 = HighHalfZero ? ZeroVector : V1;
+    
+    // Permute low half of result.
+    unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
+    for (unsigned i = 0; i < HalfSize; ++i)
+      ShuffleMask[i] = StartIndex + i;
+
+    // Permute high half of result.
+    StartIndex = HighHalfSelect ? HalfSize : 0;
+    StartIndex += NumElts;
+    for (unsigned i = 0; i < HalfSize; ++i)
+      ShuffleMask[i + HalfSize] = StartIndex + i;
+
+    return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
+  }
+  return nullptr;
+}
+
 /// visitCallInst - CallInst simplification.  This mostly only handles folding
 /// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
 /// the heavy lifting.
 ///
 Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+  auto Args = CI.arg_operands();
+  if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
+                              TLI, DT, AC))
+    return ReplaceInstUsesWith(CI, V);
+
   if (isFreeCall(&CI, TLI))
     return visitFree(CI);
 
@@ -350,112 +477,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
     }
     break;
-  case Intrinsic::uadd_with_overflow: {
-    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-    OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, II);
-    if (OR == OverflowResult::NeverOverflows)
-      return CreateOverflowTuple(II, Builder->CreateNUWAdd(LHS, RHS), false);
-    if (OR == OverflowResult::AlwaysOverflows)
-      return CreateOverflowTuple(II, Builder->CreateAdd(LHS, RHS), true);
-  }
-  // FALL THROUGH uadd into sadd
+
+  case Intrinsic::uadd_with_overflow:
   case Intrinsic::sadd_with_overflow:
-    // Canonicalize constants into the RHS.
+  case Intrinsic::umul_with_overflow:
+  case Intrinsic::smul_with_overflow:
     if (isa<Constant>(II->getArgOperand(0)) &&
         !isa<Constant>(II->getArgOperand(1))) {
+      // Canonicalize constants into the RHS.
       Value *LHS = II->getArgOperand(0);
       II->setArgOperand(0, II->getArgOperand(1));
       II->setArgOperand(1, LHS);
       return II;
     }
+    // fall through
 
-    // X + undef -> undef
-    if (isa<UndefValue>(II->getArgOperand(1)))
-      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
-    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
-      // X + 0 -> {X, false}
-      if (RHS->isZero()) {
-        return CreateOverflowTuple(II, II->getArgOperand(0), false,
-                                    /*ReUseName*/false);
-      }
-    }
-
-    // We can strength reduce reduce this signed add into a regular add if we
-    // can prove that it will never overflow.
-    if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) {
-      Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-      if (WillNotOverflowSignedAdd(LHS, RHS, II)) {
-        return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false);
-      }
-    }
-
-    break;
   case Intrinsic::usub_with_overflow:
   case Intrinsic::ssub_with_overflow: {
-    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-    // undef - X -> undef
-    // X - undef -> undef
-    if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
-      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
-    if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
-      // X - 0 -> {X, false}
-      if (ConstRHS->isZero()) {
-        return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false);
-      }
-    }
-    if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
-      if (WillNotOverflowSignedSub(LHS, RHS, II)) {
-        return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false);
-      }
-    } else {
-      if (WillNotOverflowUnsignedSub(LHS, RHS, II)) {
-        return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false);
-      }
-    }
-    break;
-  }
-  case Intrinsic::umul_with_overflow: {
-    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-    OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, II);
-    if (OR == OverflowResult::NeverOverflows)
-      return CreateOverflowTuple(II, Builder->CreateNUWMul(LHS, RHS), false);
-    if (OR == OverflowResult::AlwaysOverflows)
-      return CreateOverflowTuple(II, Builder->CreateMul(LHS, RHS), true);
-  } // FALL THROUGH
-  case Intrinsic::smul_with_overflow:
-    // Canonicalize constants into the RHS.
-    if (isa<Constant>(II->getArgOperand(0)) &&
-        !isa<Constant>(II->getArgOperand(1))) {
-      Value *LHS = II->getArgOperand(0);
-      II->setArgOperand(0, II->getArgOperand(1));
-      II->setArgOperand(1, LHS);
-      return II;
-    }
-
-    // X * undef -> undef
-    if (isa<UndefValue>(II->getArgOperand(1)))
-      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+    OverflowCheckFlavor OCF =
+        IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
+    assert(OCF != OCF_INVALID && "unexpected!");
 
-    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
-      // X*0 -> {0, false}
-      if (RHSI->isZero())
-        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+    Value *OperationResult = nullptr;
+    Constant *OverflowResult = nullptr;
+    if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
+                              *II, OperationResult, OverflowResult))
+      return CreateOverflowTuple(II, OperationResult, OverflowResult);
 
-      // X * 1 -> {X, false}
-      if (RHSI->equalsInt(1)) {
-        return CreateOverflowTuple(II, II->getArgOperand(0), false,
-                                    /*ReUseName*/false);
-      }
-    }
-    if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) {
-      Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-      if (WillNotOverflowSignedMul(LHS, RHS, II)) {
-        return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false);
-      }
-    }
     break;
+  }
+
   case Intrinsic::minnum:
   case Intrinsic::maxnum: {
     Value *Arg0 = II->getArgOperand(0);
@@ -543,7 +594,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::ppc_altivec_lvx:
   case Intrinsic::ppc_altivec_lvxl:
     // Turn PPC lvx -> load if the pointer is known aligned.
-    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
         16) {
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(II->getType()));
@@ -560,7 +611,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::ppc_altivec_stvx:
   case Intrinsic::ppc_altivec_stvxl:
     // Turn stvx -> store if the pointer is known aligned.
-    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >=
+    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
         16) {
       Type *OpPtrTy =
         PointerType::getUnqual(II->getArgOperand(0)->getType());
@@ -575,11 +626,54 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
     return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
   }
+  case Intrinsic::ppc_qpx_qvlfs:
+    // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
+    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
+        16) {
+      Type *VTy = VectorType::get(Builder->getFloatTy(),
+                                  II->getType()->getVectorNumElements());
+      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+                                         PointerType::getUnqual(VTy));
+      Value *Load = Builder->CreateLoad(Ptr);
+      return new FPExtInst(Load, II->getType());
+    }
+    break;
+  case Intrinsic::ppc_qpx_qvlfd:
+    // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
+    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >=
+        32) {
+      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+                                         PointerType::getUnqual(II->getType()));
+      return new LoadInst(Ptr);
+    }
+    break;
+  case Intrinsic::ppc_qpx_qvstfs:
+    // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
+    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
+        16) {
+      Type *VTy = VectorType::get(Builder->getFloatTy(),
+          II->getArgOperand(0)->getType()->getVectorNumElements());
+      Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
+      Type *OpPtrTy = PointerType::getUnqual(VTy);
+      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+      return new StoreInst(TOp, Ptr);
+    }
+    break;
+  case Intrinsic::ppc_qpx_qvstfd:
+    // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
+    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >=
+        32) {
+      Type *OpPtrTy =
+        PointerType::getUnqual(II->getArgOperand(0)->getType());
+      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+      return new StoreInst(II->getArgOperand(0), Ptr);
+    }
+    break;
   case Intrinsic::x86_sse_storeu_ps:
   case Intrinsic::x86_sse2_storeu_pd:
   case Intrinsic::x86_sse2_storeu_dq:
     // Turn X86 storeu -> store if the pointer is known aligned.
-    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
         16) {
       Type *OpPtrTy =
         PointerType::getUnqual(II->getArgOperand(1)->getType());
@@ -696,15 +790,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     unsigned LowHalfElts = VWidth / 2;
     APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
     APInt UndefElts(VWidth, 0);
-    if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
-                                                 InputDemandedElts,
-                                                 UndefElts)) {
+    if (Value *TmpV = SimplifyDemandedVectorElts(
+            II->getArgOperand(0), InputDemandedElts, UndefElts)) {
       II->setArgOperand(0, TmpV);
       return II;
     }
     break;
   }
-
+  case Intrinsic::x86_sse41_insertps:
+    if (Value *V = SimplifyX86insertps(*II, *Builder))
+      return ReplaceInstUsesWith(*II, V);
+    break;
+    
   case Intrinsic::x86_sse4a_insertqi: {
     // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
     // ones undef
@@ -867,6 +964,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     return ReplaceInstUsesWith(CI, Shuffle);
   }
 
+  case Intrinsic::x86_avx_vperm2f128_pd_256:
+  case Intrinsic::x86_avx_vperm2f128_ps_256:
+  case Intrinsic::x86_avx_vperm2f128_si_256:
+  case Intrinsic::x86_avx2_vperm2i128:
+    if (Value *V = SimplifyX86vperm2(*II, *Builder))
+      return ReplaceInstUsesWith(*II, V);
+    break;
+
   case Intrinsic::ppc_altivec_vperm:
     // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
     // Note that ppc_altivec_vperm has a big-endian bias, so when creating
@@ -906,12 +1011,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           unsigned Idx =
             cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
           Idx &= 31;  // Match the hardware behavior.
-          if (DL && DL->isLittleEndian())
+          if (DL.isLittleEndian())
             Idx = 31 - Idx;
 
           if (!ExtractedElts[Idx]) {
-            Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0;
-            Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1;
+            Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
+            Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
             ExtractedElts[Idx] =
               Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
                                             Builder->getInt32(Idx&15));
@@ -940,7 +1045,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::arm_neon_vst2lane:
   case Intrinsic::arm_neon_vst3lane:
   case Intrinsic::arm_neon_vst4lane: {
-    unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AC, II, DT);
+    unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT);
     unsigned AlignArg = II->getNumArgOperands() - 1;
     ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
     if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
@@ -1079,7 +1184,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           RHS->getType()->isPointerTy() &&
           cast<Constant>(RHS)->isNullValue()) {
         LoadInst* LI = cast<LoadInst>(LHS);
-        if (isValidAssumeForContext(II, LI, DL, DT)) {
+        if (isValidAssumeForContext(II, LI, DT)) {
           MDNode *MD = MDNode::get(II->getContext(), None);
           LI->setMetadata(LLVMContext::MD_nonnull, MD);
           return EraseInstFromFunction(*II);
@@ -1102,7 +1207,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // facts about the relocate value, while being careful to
     // preserve relocation semantics.
     GCRelocateOperands Operands(II);
-    Value *DerivedPtr = Operands.derivedPtr();
+    Value *DerivedPtr = Operands.getDerivedPtr();
+    auto *GCRelocateType = cast<PointerType>(II->getType());
 
     // Remove the relocation if unused, note that this check is required
     // to prevent the cases below from looping forever.
@@ -1113,24 +1219,34 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // TODO: provide a hook for this in GCStrategy.  This is clearly legal for
     // most practical collectors, but there was discussion in the review thread
     // about whether it was legal for all possible collectors.
-    if (isa<UndefValue>(DerivedPtr))
-      return ReplaceInstUsesWith(*II, DerivedPtr);
+    if (isa<UndefValue>(DerivedPtr)) {
+      // gc_relocate is uncasted. Use undef of gc_relocate's type to replace it.
+      return ReplaceInstUsesWith(*II, UndefValue::get(GCRelocateType));
+    }
 
     // The relocation of null will be null for most any collector.
     // TODO: provide a hook for this in GCStrategy.  There might be some weird
     // collector this property does not hold for.
-    if (isa<ConstantPointerNull>(DerivedPtr))
-      return ReplaceInstUsesWith(*II, DerivedPtr);
+    if (isa<ConstantPointerNull>(DerivedPtr)) {
+      // gc_relocate is uncasted. Use null-pointer of gc_relocate's type to replace it.
+      return ReplaceInstUsesWith(*II, ConstantPointerNull::get(GCRelocateType));
+    }
 
     // isKnownNonNull -> nonnull attribute
     if (isKnownNonNull(DerivedPtr))
       II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
 
-    // TODO: dereferenceable -> deref attribute
+    // isDereferenceablePointer -> deref attribute
+    if (isDereferenceablePointer(DerivedPtr, DL)) {
+      if (Argument *A = dyn_cast<Argument>(DerivedPtr)) {
+        uint64_t Bytes = A->getDereferenceableBytes();
+        II->addDereferenceableAttr(AttributeSet::ReturnIndex, Bytes);
+      }
+    }
 
     // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
     // Canonicalize on the type from the uses to the defs
-    
+
     // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
   }
   }
@@ -1147,8 +1263,8 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
 /// isSafeToEliminateVarargsCast - If this cast does not affect the value
 /// passed through the varargs area, we can eliminate the use of the cast.
 static bool isSafeToEliminateVarargsCast(const CallSite CS,
-                                         const CastInst * const CI,
-                                         const DataLayout * const DL,
+                                         const DataLayout &DL,
+                                         const CastInst *const CI,
                                          const int ix) {
   if (!CI->isLosslessCast())
     return false;
@@ -1172,7 +1288,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
   Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
   if (!SrcTy->isSized() || !DstTy->isSized())
     return false;
-  if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy))
+  if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
     return false;
   return true;
 }
@@ -1181,10 +1297,14 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
 // Currently we're only working with the checking functions, memcpy_chk,
 // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
 // strcat_chk and strncat_chk.
-Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) {
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
   if (!CI->getCalledFunction()) return nullptr;
 
-  if (Value *With = Simplifier->optimizeCall(CI)) {
+  auto InstCombineRAUW = [this](Instruction *From, Value *With) {
+    ReplaceInstUsesWith(*From, With);
+  };
+  LibCallSimplifier Simplifier(DL, TLI, InstCombineRAUW);
+  if (Value *With = Simplifier.optimizeCall(CI)) {
     ++NumSimplified;
     return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
   }
@@ -1342,7 +1462,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
     for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
            E = CS.arg_end(); I != E; ++I, ++ix) {
       CastInst *CI = dyn_cast<CastInst>(*I);
-      if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) {
+      if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
         *I = CI->getOperand(0);
         Changed = true;
       }
@@ -1359,7 +1479,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
   // this.  None of these calls are seen as possibly dead so go ahead and
   // delete the instruction now.
   if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
-    Instruction *I = tryOptimizeCall(CI, DL);
+    Instruction *I = tryOptimizeCall(CI);
     // If we changed something return the result, etc. Otherwise let
     // the fallthrough check.
     if (I) return EraseInstFromFunction(*I);
@@ -1409,10 +1529,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 
     if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
       AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
-      if (RAttrs.
-          hasAttributes(AttributeFuncs::
-                        typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
-                        AttributeSet::ReturnIndex))
+      if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
         return false;   // Attribute not compatible with transformed value.
     }
 
@@ -1438,7 +1555,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   //
   // into:
   //  call void @takes_i32_inalloca(i32* null)
-  if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
+  //
+  //  Similarly, avoid folding away bitcasts of byval calls.
+  if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+      Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
     return false;
 
   CallSite::arg_iterator AI = CS.arg_begin();
@@ -1450,8 +1570,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
       return false;   // Cannot transform this parameter value.
 
     if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
-          hasAttributes(AttributeFuncs::
-                        typeIncompatible(ParamTy, i + 1), i + 1))
+          overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
       return false;   // Attribute not compatible with transformed value.
 
     if (CS.isInAllocaArgument(i))
@@ -1463,12 +1582,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
         CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
                                                          Attribute::ByVal)) {
       PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
-      if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL)
+      if (!ParamPTy || !ParamPTy->getElementType()->isSized())
         return false;
 
       Type *CurElTy = ActTy->getPointerElementType();
-      if (DL->getTypeAllocSize(CurElTy) !=
-          DL->getTypeAllocSize(ParamPTy->getElementType()))
+      if (DL.getTypeAllocSize(CurElTy) !=
+          DL.getTypeAllocSize(ParamPTy->getElementType()))
         return false;
     }
   }
@@ -1524,10 +1643,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 
   // If the return value is not being used, the type may not be compatible
   // with the existing attributes.  Wipe out any problematic attributes.
-  RAttrs.
-    removeAttributes(AttributeFuncs::
-                     typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
-                     AttributeSet::ReturnIndex);
+  RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
 
   // Add the new return attributes.
   if (RAttrs.hasAttributes())
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 5415726..48ab0eb 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/PatternMatch.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -80,9 +80,6 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
 /// try to eliminate the cast by moving the type information into the alloc.
 Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
                                                    AllocaInst &AI) {
-  // This requires DataLayout to get the alloca alignment and size information.
-  if (!DL) return nullptr;
-
   PointerType *PTy = cast<PointerType>(CI.getType());
 
   BuilderTy AllocaBuilder(*Builder);
@@ -93,8 +90,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   Type *CastElTy = PTy->getElementType();
   if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
 
-  unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy);
-  unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy);
+  unsigned AllocElTyAlign = DL.getABITypeAlignment(AllocElTy);
+  unsigned CastElTyAlign = DL.getABITypeAlignment(CastElTy);
   if (CastElTyAlign < AllocElTyAlign) return nullptr;
 
   // If the allocation has multiple uses, only promote it if we are strictly
@@ -102,14 +99,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   // same, we open the door to infinite loops of various kinds.
   if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
 
-  uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy);
-  uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy);
+  uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy);
+  uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy);
   if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
 
   // If the allocation has multiple uses, only promote it if we're not
   // shrinking the amount of memory being allocated.
-  uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy);
-  uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy);
+  uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy);
+  uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy);
   if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
 
   // See if we can satisfy the modulus by pulling a scale out of the array
@@ -215,7 +212,8 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
     PHINode *OPN = cast<PHINode>(I);
     PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
     for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
-      Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+      Value *V =
+          EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
       NPN->addIncoming(V, OPN->getIncomingBlock(i));
     }
     Res = NPN;
@@ -234,25 +232,22 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
 /// This function is a wrapper around CastInst::isEliminableCastPair. It
 /// simply extracts arguments and returns what that function returns.
 static Instruction::CastOps
-isEliminableCastPair(
-  const CastInst *CI, ///< The first cast instruction
-  unsigned opcode,       ///< The opcode of the second cast instruction
-  Type *DstTy,     ///< The target type for the second cast instruction
-  const DataLayout *DL ///< The target data for pointer size
-) {
-
+isEliminableCastPair(const CastInst *CI, ///< First cast instruction
+                     unsigned opcode,    ///< Opcode for the second cast
+                     Type *DstTy,        ///< Target type for the second cast
+                     const DataLayout &DL) {
   Type *SrcTy = CI->getOperand(0)->getType();   // A from above
   Type *MidTy = CI->getType();                  // B from above
 
   // Get the opcodes of the two Cast instructions
   Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
   Instruction::CastOps secondOp = Instruction::CastOps(opcode);
-  Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ?
-    DL->getIntPtrType(SrcTy) : nullptr;
-  Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ?
-    DL->getIntPtrType(MidTy) : nullptr;
-  Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ?
-    DL->getIntPtrType(DstTy) : nullptr;
+  Type *SrcIntPtrTy =
+      SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
+  Type *MidIntPtrTy =
+      MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
+  Type *DstIntPtrTy =
+      DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
   unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
                                                 DstTy, SrcIntPtrTy, MidIntPtrTy,
                                                 DstIntPtrTy);
@@ -298,7 +293,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
   // eliminate it now.
   if (CastInst *CSrc = dyn_cast<CastInst>(Src)) {   // A->B->C cast
     if (Instruction::CastOps opc =
-        isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
+            isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
       // The first cast (CSrc) is eliminable so we need to fix up or replace
       // the second cast (CI). CSrc will then have a good chance of being dead.
       return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
@@ -314,8 +309,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
   if (isa<PHINode>(Src)) {
     // We don't do this if this would create a PHI node with an illegal type if
     // it is currently legal.
-    if (!Src->getType()->isIntegerTy() ||
-        !CI.getType()->isIntegerTy() ||
+    if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
         ShouldChangeType(CI.getType(), Src->getType()))
       if (Instruction *NV = FoldOpIntoPhi(CI))
         return NV;
@@ -424,8 +418,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
     // get into trouble with cyclic PHIs here because we only consider
     // instructions with a single use.
     PHINode *PN = cast<PHINode>(I);
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-      if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty, IC, CxtI))
+    for (Value *IncValue : PN->incoming_values())
+      if (!CanEvaluateTruncated(IncValue, Ty, IC, CxtI))
         return false;
     return true;
   }
@@ -441,6 +435,15 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   if (Instruction *Result = commonCastTransforms(CI))
     return Result;
 
+  // Test if the trunc is the user of a select which is part of a
+  // minimum or maximum operation. If so, don't do any more simplification.
+  // Even simplifying demanded bits can break the canonical form of a 
+  // min/max.
+  Value *LHS, *RHS;
+  if (SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0)))
+    if (matchSelectPattern(SI, LHS, RHS) != SPF_UNKNOWN)
+      return nullptr;
+  
   // See if we can simplify any instructions used by the input whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(CI))
@@ -1035,8 +1038,8 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
     // get into trouble with cyclic PHIs here because we only consider
     // instructions with a single use.
     PHINode *PN = cast<PHINode>(I);
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-      if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false;
+    for (Value *IncValue : PN->incoming_values())
+      if (!CanEvaluateSExtd(IncValue, Ty)) return false;
     return true;
   }
   default:
@@ -1064,6 +1067,15 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
   Value *Src = CI.getOperand(0);
   Type *SrcTy = Src->getType(), *DestTy = CI.getType();
 
+  // If we know that the value being extended is positive, we can use a zext
+  // instead. 
+  bool KnownZero, KnownOne;
+  ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI);
+  if (KnownZero) {
+    Value *ZExt = Builder->CreateZExt(Src, DestTy);
+    return ReplaceInstUsesWith(CI, ZExt);
+  }
+
   // Attempt to extend the entire input expression tree to the destination
   // type.   Only do this if the dest type is a simple type, don't convert the
   // expression tree to something weird like i93 unless the source is also
@@ -1332,22 +1344,57 @@ Instruction *InstCombiner::visitFPExt(CastInst &CI) {
   return commonCastTransforms(CI);
 }
 
+// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
+// This is safe if the intermediate type has enough bits in its mantissa to
+// accurately represent all values of X.  For example, this won't work with
+// i64 -> float -> i64.
+Instruction *InstCombiner::FoldItoFPtoI(Instruction &FI) {
+  if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
+    return nullptr;
+  Instruction *OpI = cast<Instruction>(FI.getOperand(0));
+
+  Value *SrcI = OpI->getOperand(0);
+  Type *FITy = FI.getType();
+  Type *OpITy = OpI->getType();
+  Type *SrcTy = SrcI->getType();
+  bool IsInputSigned = isa<SIToFPInst>(OpI);
+  bool IsOutputSigned = isa<FPToSIInst>(FI);
+
+  // We can safely assume the conversion won't overflow the output range,
+  // because (for example) (uint8_t)18293.f is undefined behavior.
+
+  // Since we can assume the conversion won't overflow, our decision as to
+  // whether the input will fit in the float should depend on the minimum
+  // of the input range and output range.
+
+  // This means this is also safe for a signed input and unsigned output, since
+  // a negative input would lead to undefined behavior.
+  int InputSize = (int)SrcTy->getScalarSizeInBits() - IsInputSigned;
+  int OutputSize = (int)FITy->getScalarSizeInBits() - IsOutputSigned;
+  int ActualSize = std::min(InputSize, OutputSize);
+
+  if (ActualSize <= OpITy->getFPMantissaWidth()) {
+    if (FITy->getScalarSizeInBits() > SrcTy->getScalarSizeInBits()) {
+      if (IsInputSigned && IsOutputSigned)
+        return new SExtInst(SrcI, FITy);
+      return new ZExtInst(SrcI, FITy);
+    }
+    if (FITy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits())
+      return new TruncInst(SrcI, FITy);
+    if (SrcTy == FITy)
+      return ReplaceInstUsesWith(FI, SrcI);
+    return new BitCastInst(SrcI, FITy);
+  }
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
   Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
   if (!OpI)
     return commonCastTransforms(FI);
 
-  // fptoui(uitofp(X)) --> X
-  // fptoui(sitofp(X)) --> X
-  // This is safe if the intermediate type has enough bits in its mantissa to
-  // accurately represent all values of X.  For example, do not do this with
-  // i64->float->i64.  This is also safe for sitofp case, because any negative
-  // 'X' value would cause an undefined result for the fptoui.
-  if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
-      OpI->getOperand(0)->getType() == FI.getType() &&
-      (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
-                    OpI->getType()->getFPMantissaWidth())
-    return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+  if (Instruction *I = FoldItoFPtoI(FI))
+    return I;
 
   return commonCastTransforms(FI);
 }
@@ -1357,17 +1404,8 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
   if (!OpI)
     return commonCastTransforms(FI);
 
-  // fptosi(sitofp(X)) --> X
-  // fptosi(uitofp(X)) --> X
-  // This is safe if the intermediate type has enough bits in its mantissa to
-  // accurately represent all values of X.  For example, do not do this with
-  // i64->float->i64.  This is also safe for sitofp case, because any negative
-  // 'X' value would cause an undefined result for the fptoui.
-  if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
-      OpI->getOperand(0)->getType() == FI.getType() &&
-      (int)FI.getType()->getScalarSizeInBits() <=
-                    OpI->getType()->getFPMantissaWidth())
-    return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+  if (Instruction *I = FoldItoFPtoI(FI))
+    return I;
 
   return commonCastTransforms(FI);
 }
@@ -1384,18 +1422,15 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
   // If the source integer type is not the intptr_t type for this target, do a
   // trunc or zext to the intptr_t type, then inttoptr of it.  This allows the
   // cast to be exposed to other transforms.
-
-  if (DL) {
-    unsigned AS = CI.getAddressSpace();
-    if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
-        DL->getPointerSizeInBits(AS)) {
-      Type *Ty = DL->getIntPtrType(CI.getContext(), AS);
-      if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
-        Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
-
-      Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
-      return new IntToPtrInst(P, CI.getType());
-    }
+  unsigned AS = CI.getAddressSpace();
+  if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+      DL.getPointerSizeInBits(AS)) {
+    Type *Ty = DL.getIntPtrType(CI.getContext(), AS);
+    if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+      Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+    Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+    return new IntToPtrInst(P, CI.getType());
   }
 
   if (Instruction *I = commonCastTransforms(CI))
@@ -1424,41 +1459,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
       CI.setOperand(0, GEP->getOperand(0));
       return &CI;
     }
-
-    if (!DL)
-      return commonCastTransforms(CI);
-
-    // If the GEP has a single use, and the base pointer is a bitcast, and the
-    // GEP computes a constant offset, see if we can convert these three
-    // instructions into fewer.  This typically happens with unions and other
-    // non-type-safe code.
-    unsigned AS = GEP->getPointerAddressSpace();
-    unsigned OffsetBits = DL->getPointerSizeInBits(AS);
-    APInt Offset(OffsetBits, 0);
-    BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
-    if (GEP->hasOneUse() &&
-        BCI &&
-        GEP->accumulateConstantOffset(*DL, Offset)) {
-      // Get the base pointer input of the bitcast, and the type it points to.
-      Value *OrigBase = BCI->getOperand(0);
-      SmallVector<Value*, 8> NewIndices;
-      if (FindElementAtOffset(OrigBase->getType(),
-                              Offset.getSExtValue(),
-                              NewIndices)) {
-        // If we were able to index down into an element, create the GEP
-        // and bitcast the result.  This eliminates one bitcast, potentially
-        // two.
-        Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
-          Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
-          Builder->CreateGEP(OrigBase, NewIndices);
-        NGEP->takeName(GEP);
-
-        if (isa<BitCastInst>(CI))
-          return new BitCastInst(NGEP, CI.getType());
-        assert(isa<PtrToIntInst>(CI));
-        return new PtrToIntInst(NGEP, CI.getType());
-      }
-    }
   }
 
   return commonCastTransforms(CI);
@@ -1469,16 +1469,13 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   // do a ptrtoint to intptr_t then do a trunc or zext.  This allows the cast
   // to be exposed to other transforms.
 
-  if (!DL)
-    return commonPointerCastTransforms(CI);
-
   Type *Ty = CI.getType();
   unsigned AS = CI.getPointerAddressSpace();
 
-  if (Ty->getScalarSizeInBits() == DL->getPointerSizeInBits(AS))
+  if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS))
     return commonPointerCastTransforms(CI);
 
-  Type *PtrTy = DL->getIntPtrType(CI.getContext(), AS);
+  Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS);
   if (Ty->isVectorTy()) // Handle vectors of pointers.
     PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
 
@@ -1562,8 +1559,8 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
 /// This returns false if the pattern can't be matched or true if it can,
 /// filling in Elements with the elements found here.
 static bool CollectInsertionElements(Value *V, unsigned Shift,
-                                     SmallVectorImpl<Value*> &Elements,
-                                     Type *VecEltTy, InstCombiner &IC) {
+                                     SmallVectorImpl<Value *> &Elements,
+                                     Type *VecEltTy, bool isBigEndian) {
   assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
          "Shift should be a multiple of the element type size");
 
@@ -1579,7 +1576,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
         return true;
 
     unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
-    if (IC.getDataLayout()->isBigEndian())
+    if (isBigEndian)
       ElementIndex = Elements.size() - ElementIndex - 1;
 
     // Fail if multiple elements are inserted into this slot.
@@ -1599,7 +1596,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
     // it to the right type so it gets properly inserted.
     if (NumElts == 1)
       return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
-                                      Shift, Elements, VecEltTy, IC);
+                                      Shift, Elements, VecEltTy, isBigEndian);
 
     // Okay, this is a constant that covers multiple elements.  Slice it up into
     // pieces and insert each element-sized piece into the vector.
@@ -1614,7 +1611,8 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
       Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
                                                                   ShiftI));
       Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
-      if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC))
+      if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
+                                    isBigEndian))
         return false;
     }
     return true;
@@ -1627,28 +1625,28 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
   switch (I->getOpcode()) {
   default: return false; // Unhandled case.
   case Instruction::BitCast:
-    return CollectInsertionElements(I->getOperand(0), Shift,
-                                    Elements, VecEltTy, IC);
+    return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+                                    isBigEndian);
   case Instruction::ZExt:
     if (!isMultipleOfTypeSize(
                           I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
                               VecEltTy))
       return false;
-    return CollectInsertionElements(I->getOperand(0), Shift,
-                                    Elements, VecEltTy, IC);
+    return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+                                    isBigEndian);
   case Instruction::Or:
-    return CollectInsertionElements(I->getOperand(0), Shift,
-                                    Elements, VecEltTy, IC) &&
-           CollectInsertionElements(I->getOperand(1), Shift,
-                                    Elements, VecEltTy, IC);
+    return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+                                    isBigEndian) &&
+           CollectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
+                                    isBigEndian);
   case Instruction::Shl: {
     // Must be shifting by a constant that is a multiple of the element size.
     ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
     if (!CI) return false;
     Shift += CI->getZExtValue();
     if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
-    return CollectInsertionElements(I->getOperand(0), Shift,
-                                    Elements, VecEltTy, IC);
+    return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+                                    isBigEndian);
   }
 
   }
@@ -1671,15 +1669,13 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
 /// Into two insertelements that do "buildvector{%inc, %inc5}".
 static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
                                                 InstCombiner &IC) {
-  // We need to know the target byte order to perform this optimization.
-  if (!IC.getDataLayout()) return nullptr;
-
   VectorType *DestVecTy = cast<VectorType>(CI.getType());
   Value *IntInput = CI.getOperand(0);
 
   SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
   if (!CollectInsertionElements(IntInput, 0, Elements,
-                                DestVecTy->getElementType(), IC))
+                                DestVecTy->getElementType(),
+                                IC.getDataLayout().isBigEndian()))
     return nullptr;
 
   // If we succeeded, we know that all of the element are specified by Elements
@@ -1699,10 +1695,8 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
 
 /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
 /// bitcast.  The various long double bitcasts can't get in here.
-static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
-  // We need to know the target byte order to perform this optimization.
-  if (!IC.getDataLayout()) return nullptr;
-
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC,
+                                              const DataLayout &DL) {
   Value *Src = CI.getOperand(0);
   Type *DestTy = CI.getType();
 
@@ -1725,7 +1719,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
       }
 
       unsigned Elt = 0;
-      if (IC.getDataLayout()->isBigEndian())
+      if (DL.isBigEndian())
         Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
       return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
     }
@@ -1749,7 +1743,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
       }
 
       unsigned Elt = ShAmt->getZExtValue() / DestWidth;
-      if (IC.getDataLayout()->isBigEndian())
+      if (DL.isBigEndian())
         Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
       return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
     }
@@ -1785,26 +1779,24 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     // If the source and destination are pointers, and this cast is equivalent
     // to a getelementptr X, 0, 0, 0...  turn it into the appropriate gep.
     // This can enhance SROA and other transforms that want type-safe pointers.
-    Constant *ZeroUInt =
-      Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
     unsigned NumZeros = 0;
     while (SrcElTy != DstElTy &&
            isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
            SrcElTy->getNumContainedTypes() /* not "{}" */) {
-      SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+      SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(0U);
       ++NumZeros;
     }
 
     // If we found a path from the src to dest, create the getelementptr now.
     if (SrcElTy == DstElTy) {
-      SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+      SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder->getInt32(0));
       return GetElementPtrInst::CreateInBounds(Src, Idxs);
     }
   }
 
   // Try to optimize int -> float bitcasts.
   if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
-    if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+    if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this, DL))
       return I;
 
   if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c07c96d..2dafa58 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
@@ -24,7 +24,7 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 
 using namespace llvm;
 using namespace PatternMatch;
@@ -229,10 +229,6 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
 Instruction *InstCombiner::
 FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
                              CmpInst &ICI, ConstantInt *AndCst) {
-  // We need TD information to know the pointer size unless this is inbounds.
-  if (!GEP->isInBounds() && !DL)
-    return nullptr;
-
   Constant *Init = GV->getInitializer();
   if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
     return nullptr;
@@ -303,7 +299,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   // the array, this will fully represent all the comparison results.
   uint64_t MagicBitvector = 0;
 
-
   // Scan the array and see if one of our patterns matches.
   Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
   for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
@@ -398,7 +393,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   // index down like the GEP would do implicitly.  We don't have to do this for
   // an inbounds GEP because the index can't be out of range.
   if (!GEP->isInBounds()) {
-    Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+    Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
     unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
     if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
       Idx = Builder->CreateTrunc(Idx, IntPtrTy);
@@ -487,10 +482,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
     // - Default to i32
     if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
       Ty = Idx->getType();
-    else if (DL)
-      Ty = DL->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
-    else if (ArrayElementCount <= 32)
-      Ty = Type::getInt32Ty(Init->getContext());
+    else
+      Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
 
     if (Ty) {
       Value *V = Builder->CreateIntCast(Idx, Ty, false);
@@ -514,8 +507,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
 ///
 /// If we can't emit an optimized form for this expression, this returns null.
 ///
-static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
-  const DataLayout &DL = *IC.getDataLayout();
+static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
+                                          const DataLayout &DL) {
   gep_type_iterator GTI = gep_type_begin(GEP);
 
   // Check to see if this gep only has a single variable index.  If so, and if
@@ -628,12 +621,12 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
     RHS = RHS->stripPointerCasts();
 
   Value *PtrBase = GEPLHS->getOperand(0);
-  if (DL && PtrBase == RHS && GEPLHS->isInBounds()) {
+  if (PtrBase == RHS && GEPLHS->isInBounds()) {
     // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
     // This transformation (ignoring the base and scales) is valid because we
     // know pointers can't overflow since the gep is inbounds.  See if we can
     // output an optimized form.
-    Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this);
+    Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this, DL);
 
     // If not, synthesize the offset the hard way.
     if (!Offset)
@@ -661,11 +654,11 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
       // If we're comparing GEPs with two base pointers that only differ in type
       // and both GEPs have only constant indices or just one use, then fold
       // the compare with the adjusted indices.
-      if (DL && GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
+      if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
           (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
           (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
           PtrBase->stripPointerCasts() ==
-            GEPRHS->getOperand(0)->stripPointerCasts()) {
+              GEPRHS->getOperand(0)->stripPointerCasts()) {
         Value *LOffset = EmitGEPOffset(GEPLHS);
         Value *ROffset = EmitGEPOffset(GEPRHS);
 
@@ -733,9 +726,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
 
     // Only lower this if the icmp is the only user of the GEP or if we expect
     // the result to fold to a constant!
-    if (DL &&
-        GEPsInBounds &&
-        (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+    if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
         (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
       // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
       Value *L = EmitGEPOffset(GEPLHS);
@@ -1928,17 +1919,20 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
 
   // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
   // integer type is the same size as the pointer type.
-  if (DL && LHSCI->getOpcode() == Instruction::PtrToInt &&
-      DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
+  if (LHSCI->getOpcode() == Instruction::PtrToInt &&
+      DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
     Value *RHSOp = nullptr;
-    if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
+    if (PtrToIntOperator *RHSC = dyn_cast<PtrToIntOperator>(ICI.getOperand(1))) {
+      Value *RHSCIOp = RHSC->getOperand(0);
+      if (RHSCIOp->getType()->getPointerAddressSpace() ==
+          LHSCIOp->getType()->getPointerAddressSpace()) {
+        RHSOp = RHSC->getOperand(0);
+        // If the pointer types don't match, insert a bitcast.
+        if (LHSCIOp->getType() != RHSOp->getType())
+          RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
+      }
+    } else if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1)))
       RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
-    } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
-      RHSOp = RHSC->getOperand(0);
-      // If the pointer types don't match, insert a bitcast.
-      if (LHSCIOp->getType() != RHSOp->getType())
-        RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
-    }
 
     if (RHSOp)
       return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
@@ -2103,7 +2097,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
 
   Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
   Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
-  CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd");
+  CallInst *Call = Builder->CreateCall(F, {TruncA, TruncB}, "sadd");
   Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
   Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
 
@@ -2115,33 +2109,94 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
   return ExtractValueInst::Create(Call, 1, "sadd.overflow");
 }
 
-static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
-                                     InstCombiner &IC) {
-  // Don't bother doing this transformation for pointers, don't do it for
-  // vectors.
-  if (!isa<IntegerType>(OrigAddV->getType())) return nullptr;
+bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
+                                         Value *RHS, Instruction &OrigI,
+                                         Value *&Result, Constant *&Overflow) {
+  assert((!OrigI.isCommutative() ||
+          !(isa<Constant>(LHS) && !isa<Constant>(RHS))) &&
+         "call with a constant RHS if possible!");
+
+  auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
+    Result = OpResult;
+    Overflow = OverflowVal;
+    if (ReuseName)
+      Result->takeName(&OrigI);
+    return true;
+  };
 
-  // If the add is a constant expr, then we don't bother transforming it.
-  Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
-  if (!OrigAdd) return nullptr;
+  switch (OCF) {
+  case OCF_INVALID:
+    llvm_unreachable("bad overflow check kind!");
 
-  Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
+  case OCF_UNSIGNED_ADD: {
+    OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
+    if (OR == OverflowResult::NeverOverflows)
+      return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(),
+                       true);
 
-  // Put the new code above the original add, in case there are any uses of the
-  // add between the add and the compare.
-  InstCombiner::BuilderTy *Builder = IC.Builder;
-  Builder->SetInsertPoint(OrigAdd);
+    if (OR == OverflowResult::AlwaysOverflows)
+      return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true);
+  }
+  // FALL THROUGH uadd into sadd
+  case OCF_SIGNED_ADD: {
+    // X + 0 -> {X, false}
+    if (match(RHS, m_Zero()))
+      return SetResult(LHS, Builder->getFalse(), false);
+
+    // We can strength reduce this signed add into a regular add if we can prove
+    // that it will never overflow.
+    if (OCF == OCF_SIGNED_ADD)
+      if (WillNotOverflowSignedAdd(LHS, RHS, OrigI))
+        return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(),
+                         true);
+  }
 
-  Module *M = I.getParent()->getParent()->getParent();
-  Type *Ty = LHS->getType();
-  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
-  CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
-  Value *Add = Builder->CreateExtractValue(Call, 0);
+  case OCF_UNSIGNED_SUB:
+  case OCF_SIGNED_SUB: {
+    // X - 0 -> {X, false}
+    if (match(RHS, m_Zero()))
+      return SetResult(LHS, Builder->getFalse(), false);
+
+    if (OCF == OCF_SIGNED_SUB) {
+      if (WillNotOverflowSignedSub(LHS, RHS, OrigI))
+        return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(),
+                         true);
+    } else {
+      if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI))
+        return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(),
+                         true);
+    }
+    break;
+  }
 
-  IC.ReplaceInstUsesWith(*OrigAdd, Add);
+  case OCF_UNSIGNED_MUL: {
+    OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
+    if (OR == OverflowResult::NeverOverflows)
+      return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(),
+                       true);
+    if (OR == OverflowResult::AlwaysOverflows)
+      return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true);
+  } // FALL THROUGH
+  case OCF_SIGNED_MUL:
+    // X * undef -> undef
+    if (isa<UndefValue>(RHS))
+      return SetResult(RHS, UndefValue::get(Builder->getInt1Ty()), false);
+
+    // X * 0 -> {0, false}
+    if (match(RHS, m_Zero()))
+      return SetResult(RHS, Builder->getFalse(), false);
+
+    // X * 1 -> {X, false}
+    if (match(RHS, m_One()))
+      return SetResult(LHS, Builder->getFalse(), false);
+
+    if (OCF == OCF_SIGNED_MUL)
+      if (WillNotOverflowSignedMul(LHS, RHS, OrigI))
+        return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(),
+                         true);
+  }
 
-  // The original icmp gets replaced with the overflow value.
-  return ExtractValueInst::Create(Call, 1, "uadd.overflow");
+  return false;
 }
 
 /// \brief Recognize and process idiom involving test for multiplication
@@ -2311,7 +2366,7 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
     MulB = Builder->CreateZExt(B, MulType);
   Value *F =
       Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType);
-  CallInst *Call = Builder->CreateCall2(F, MulA, MulB, "umul");
+  CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul");
   IC.Worklist.Add(MulInstr);
 
   // If there are uses of mul result other than the comparison, we know that
@@ -2657,8 +2712,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   unsigned BitWidth = 0;
   if (Ty->isIntOrIntVectorTy())
     BitWidth = Ty->getScalarSizeInBits();
-  else if (DL)  // Pointers require DL info to get their size.
-    BitWidth = DL->getTypeSizeInBits(Ty->getScalarType());
+  else // Get pointer size.
+    BitWidth = DL.getTypeSizeInBits(Ty->getScalarType());
 
   bool isSignBit = false;
 
@@ -2771,8 +2826,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
                              Op0KnownZero, Op0KnownOne, 0))
       return &I;
     if (SimplifyDemandedBits(I.getOperandUse(1),
-                             APInt::getAllOnesValue(BitWidth),
-                             Op1KnownZero, Op1KnownOne, 0))
+                             APInt::getAllOnesValue(BitWidth), Op1KnownZero,
+                             Op1KnownOne, 0))
       return &I;
 
     // Given the known and unknown bits, compute a range that the LHS could be
@@ -3091,9 +3146,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       }
       case Instruction::IntToPtr:
         // icmp pred inttoptr(X), null -> icmp pred X, 0
-        if (RHSC->isNullValue() && DL &&
-            DL->getIntPtrType(RHSC->getType()) ==
-               LHSI->getOperand(0)->getType())
+        if (RHSC->isNullValue() &&
+            DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
           return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
                         Constant::getNullValue(LHSI->getOperand(0)->getType()));
         break;
@@ -3425,7 +3479,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     // if A is a power of 2.
     if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
         match(Op1, m_Zero()) &&
-        isKnownToBeAPowerOfTwo(A, false, 0, AC, &I, DT) && I.isEquality())
+        isKnownToBeAPowerOfTwo(A, DL, false, 0, AC, &I, DT) && I.isEquality())
       return new ICmpInst(I.getInversePredicate(),
                           Builder->CreateAnd(A, B),
                           Op1);
@@ -3439,21 +3493,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
     }
 
-    // (a+b) <u a  --> llvm.uadd.with.overflow.
-    // (a+b) <u b  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_ULT &&
-        match(Op0, m_Add(m_Value(A), m_Value(B))) &&
-        (Op1 == A || Op1 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
-        return R;
-
-    // a >u (a+b)  --> llvm.uadd.with.overflow.
-    // b >u (a+b)  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
-        match(Op1, m_Add(m_Value(A), m_Value(B))) &&
-        (Op0 == A || Op0 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
-        return R;
+    Instruction *AddI = nullptr;
+    if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
+                                     m_Instruction(AddI))) &&
+        isa<IntegerType>(A->getType())) {
+      Value *Result;
+      Constant *Overflow;
+      if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
+                                Overflow)) {
+        ReplaceInstUsesWith(*AddI, Result);
+        return ReplaceInstUsesWith(I, Overflow);
+      }
+    }
 
     // (zext a) * (zext b)  --> llvm.umul.with.overflow.
     if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
@@ -3560,6 +3611,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       }
     }
 
+    // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
+    if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
+        match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
+      unsigned TypeBits = Cst1->getBitWidth();
+      unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+      if (ShAmt < TypeBits && ShAmt != 0) {
+        Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+        APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
+        Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal),
+                                        I.getName() + ".mask");
+        return new ICmpInst(I.getPredicate(), And,
+                            Constant::getNullValue(Cst1->getType()));
+      }
+    }
+
     // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
     // "icmp (and X, mask), cst"
     uint64_t ShAmt = 0;
@@ -3886,6 +3952,19 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     }
   }
 
+  // Test if the FCmpInst instruction is used exclusively by a select as
+  // part of a minimum or maximum operation. If so, refrain from doing
+  // any other folding. This helps out other analyses which understand
+  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+  // and CodeGen. And in this case, at least one of the comparison
+  // operands has at least one user besides the compare (the select),
+  // which would often largely negate the benefit of folding anyway.
+  if (I.hasOneUse())
+    if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin()))
+      if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+          (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+        return nullptr;
+
   // Handle fcmp with constant RHS
   if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
     if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 3c3c135..97ea8df 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -1,4 +1,4 @@
-//===- InstCombine.h - Main InstCombine pass definition ---------*- C++ -*-===//
+//===- InstCombineInternal.h - InstCombine pass internals -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,12 +6,17 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides internal interfaces used to implement the InstCombine.
+///
+//===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINE_H
-#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINE_H
+#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
+#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
 
-#include "InstCombineWorklist.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetFolder.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Dominators.h"
@@ -21,7 +26,7 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
 
 #define DEBUG_TYPE "instcombine"
 
@@ -34,20 +39,15 @@ class DbgDeclareInst;
 class MemIntrinsic;
 class MemSetInst;
 
-/// SelectPatternFlavor - We can match a variety of different patterns for
-/// select operations.
-enum SelectPatternFlavor {
-  SPF_UNKNOWN = 0,
-  SPF_SMIN,
-  SPF_UMIN,
-  SPF_SMAX,
-  SPF_UMAX,
-  SPF_ABS,
-  SPF_NABS
-};
-
-/// getComplexity:  Assign a complexity or rank value to LLVM Values...
-///   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
+/// \brief Assign a complexity or rank value to LLVM Values.
+///
+/// This routine maps IR values to various complexity ranks:
+///   0 -> undef
+///   1 -> Constants
+///   2 -> Other non-instructions
+///   3 -> Arguments
+///   3 -> Unary operations
+///   4 -> Other instructions
 static inline unsigned getComplexity(Value *V) {
   if (isa<Instruction>(V)) {
     if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) ||
@@ -60,18 +60,82 @@ static inline unsigned getComplexity(Value *V) {
   return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
 }
 
-/// AddOne - Add one to a Constant
+/// \brief Add one to a Constant
 static inline Constant *AddOne(Constant *C) {
   return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
 }
-/// SubOne - Subtract one from a Constant
+/// \brief Subtract one from a Constant
 static inline Constant *SubOne(Constant *C) {
   return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
 }
 
-/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
-/// just like the normal insertion helper, but also adds any new instructions
-/// to the instcombine worklist.
+/// \brief Return true if the specified value is free to invert (apply ~ to).
+/// This happens in cases where the ~ can be eliminated.  If WillInvertAllUses
+/// is true, work under the assumption that the caller intends to remove all
+/// uses of V and only keep uses of ~V.
+///
+static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
+  // ~(~(X)) -> X.
+  if (BinaryOperator::isNot(V))
+    return true;
+
+  // Constants can be considered to be not'ed values.
+  if (isa<ConstantInt>(V))
+    return true;
+
+  // Compares can be inverted if all of their uses are being modified to use the
+  // ~V.
+  if (isa<CmpInst>(V))
+    return WillInvertAllUses;
+
+  // If `V` is of the form `A + Constant` then `-1 - V` can be folded into `(-1
+  // - Constant) - A` if we are willing to invert all of the uses.
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
+    if (BO->getOpcode() == Instruction::Add ||
+        BO->getOpcode() == Instruction::Sub)
+      if (isa<Constant>(BO->getOperand(0)) || isa<Constant>(BO->getOperand(1)))
+        return WillInvertAllUses;
+
+  return false;
+}
+
+
+/// \brief Specific patterns of overflow check idioms that we match.
+enum OverflowCheckFlavor {
+  OCF_UNSIGNED_ADD,
+  OCF_SIGNED_ADD,
+  OCF_UNSIGNED_SUB,
+  OCF_SIGNED_SUB,
+  OCF_UNSIGNED_MUL,
+  OCF_SIGNED_MUL,
+
+  OCF_INVALID
+};
+
+/// \brief Returns the OverflowCheckFlavor corresponding to a overflow_with_op
+/// intrinsic.
+static inline OverflowCheckFlavor
+IntrinsicIDToOverflowCheckFlavor(unsigned ID) {
+  switch (ID) {
+  default:
+    return OCF_INVALID;
+  case Intrinsic::uadd_with_overflow:
+    return OCF_UNSIGNED_ADD;
+  case Intrinsic::sadd_with_overflow:
+    return OCF_SIGNED_ADD;
+  case Intrinsic::usub_with_overflow:
+    return OCF_UNSIGNED_SUB;
+  case Intrinsic::ssub_with_overflow:
+    return OCF_SIGNED_SUB;
+  case Intrinsic::umul_with_overflow:
+    return OCF_UNSIGNED_MUL;
+  case Intrinsic::smul_with_overflow:
+    return OCF_SIGNED_MUL;
+  }
+}
+
+/// \brief An IRBuilder inserter that adds new instructions to the instcombine
+/// worklist.
 class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
     : public IRBuilderDefaultInserter<true> {
   InstCombineWorklist &Worklist;
@@ -92,47 +156,60 @@ public:
   }
 };
 
-/// InstCombiner - The -instcombine pass.
+/// \brief The core instruction combiner logic.
+///
+/// This class provides both the logic to recursively visit instructions and
+/// combine them, as well as the pass infrastructure for running this as part
+/// of the LLVM pass pipeline.
 class LLVM_LIBRARY_VISIBILITY InstCombiner
-    : public FunctionPass,
-      public InstVisitor<InstCombiner, Instruction *> {
-  AssumptionCache *AC;
-  const DataLayout *DL;
-  TargetLibraryInfo *TLI;
-  DominatorTree *DT;
-  bool MadeIRChange;
-  LibCallSimplifier *Simplifier;
-  bool MinimizeSize;
-
+    : public InstVisitor<InstCombiner, Instruction *> {
+  // FIXME: These members shouldn't be public.
 public:
-  /// Worklist - All of the instructions that need to be simplified.
-  InstCombineWorklist Worklist;
+  /// \brief A worklist of the instructions that need to be simplified.
+  InstCombineWorklist &Worklist;
 
-  /// Builder - This is an IRBuilder that automatically inserts new
-  /// instructions into the worklist when they are created.
+  /// \brief An IRBuilder that automatically inserts new instructions into the
+  /// worklist.
   typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
   BuilderTy *Builder;
 
-  static char ID; // Pass identification, replacement for typeid
-  InstCombiner()
-      : FunctionPass(ID), DL(nullptr), DT(nullptr), Builder(nullptr) {
-    MinimizeSize = false;
-    initializeInstCombinerPass(*PassRegistry::getPassRegistry());
-  }
+private:
+  // Mode in which we are running the combiner.
+  const bool MinimizeSize;
 
-public:
-  bool runOnFunction(Function &F) override;
+  // Required analyses.
+  // FIXME: These can never be null and should be references.
+  AssumptionCache *AC;
+  TargetLibraryInfo *TLI;
+  DominatorTree *DT;
+  const DataLayout &DL;
+
+  // Optional analyses. When non-null, these can both be used to do better
+  // combining and will be updated to reflect any changes.
+  LoopInfo *LI;
+
+  bool MadeIRChange;
 
-  bool DoOneIteration(Function &F, unsigned ItNum);
+public:
+  InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
+               bool MinimizeSize, AssumptionCache *AC, TargetLibraryInfo *TLI,
+               DominatorTree *DT, const DataLayout &DL, LoopInfo *LI)
+      : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
+        AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {}
 
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  /// \brief Run the combiner over the entire worklist until it is empty.
+  ///
+  /// \returns true if the IR is changed.
+  bool run();
 
   AssumptionCache *getAssumptionCache() const { return AC; }
 
-  const DataLayout *getDataLayout() const { return DL; }
-  
+  const DataLayout &getDataLayout() const { return DL; }
+
   DominatorTree *getDominatorTree() const { return DT; }
 
+  LoopInfo *getLoopInfo() const { return LI; }
+
   TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
 
   // Visitation implementation - Implement instruction combining for different
@@ -222,6 +299,7 @@ public:
   Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
                             Value *A, Value *B, Instruction &Outer,
                             SelectPatternFlavor SPF2, Value *C);
+  Instruction *FoldItoFPtoI(Instruction &FI);
   Instruction *visitSelectInst(SelectInst &SI);
   Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
   Instruction *visitCallInst(CallInst &CI);
@@ -262,37 +340,51 @@ private:
   bool ShouldChangeType(Type *From, Type *To) const;
   Value *dyn_castNegVal(Value *V) const;
   Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const;
-  Type *FindElementAtOffset(Type *PtrTy, int64_t Offset,
+  Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
                             SmallVectorImpl<Value *> &NewIndices);
   Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
 
-  /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
-  /// results in any code being generated and is interesting to optimize out. If
-  /// the cast can be eliminated by some other simple transformation, we prefer
-  /// to do the simplification first.
+  /// \brief Classify whether a cast is worth optimizing.
+  ///
+  /// Returns true if the cast from "V to Ty" actually results in any code
+  /// being generated and is interesting to optimize out. If the cast can be
+  /// eliminated by some other simple transformation, we prefer to do the
+  /// simplification first.
   bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V,
                           Type *Ty);
 
+  /// \brief Try to optimize a sequence of instructions checking if an operation
+  /// on LHS and RHS overflows.
+  ///
+  /// If a simplification is possible, stores the simplified result of the
+  /// operation in OperationResult and result of the overflow check in
+  /// OverflowResult, and return true.  If no simplification is possible,
+  /// returns false.
+  bool OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS,
+                             Instruction &CtxI, Value *&OperationResult,
+                             Constant *&OverflowResult);
+
   Instruction *visitCallSite(CallSite CS);
-  Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *DL);
+  Instruction *tryOptimizeCall(CallInst *CI);
   bool transformConstExprCastCall(CallSite CS);
   Instruction *transformCallThroughTrampoline(CallSite CS,
                                               IntrinsicInst *Tramp);
   Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
                                  bool DoXform = true);
   Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
-  bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction *CxtI);
-  bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
-  bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
-  bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI);
+  bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI);
+  bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
+  bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
+  bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI);
   Value *EmitGEPOffset(User *GEP);
   Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
   Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
 
 public:
-  // InsertNewInstBefore - insert an instruction New before instruction Old
-  // in the program.  Add the new instruction to the worklist.
-  //
+  /// \brief Inserts an instruction \p New before instruction \p Old
+  ///
+  /// Also adds the new instruction to the worklist and returns \p New so that
+  /// it is suitable for use as the return from the visitation patterns.
   Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
     assert(New && !New->getParent() &&
            "New instruction already inserted into a basic block!");
@@ -302,21 +394,23 @@ public:
     return New;
   }
 
-  // InsertNewInstWith - same as InsertNewInstBefore, but also sets the
-  // debug loc.
-  //
+  /// \brief Same as InsertNewInstBefore, but also sets the debug loc.
   Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) {
     New->setDebugLoc(Old.getDebugLoc());
     return InsertNewInstBefore(New, Old);
   }
 
-  // ReplaceInstUsesWith - This method is to be used when an instruction is
-  // found to be dead, replacable with another preexisting expression.  Here
-  // we add all uses of I to the worklist, replace all uses of I with the new
-  // value, then return I, so that the inst combiner will know that I was
-  // modified.
-  //
+  /// \brief A combiner-aware RAUW-like routine.
+  ///
+  /// This method is to be used when an instruction is found to be dead,
+  /// replacable with another preexisting expression. Here we add all uses of
+  /// I to the worklist, replace all uses of I with the new value, then return
+  /// I, so that the inst combiner will know that I was modified.
   Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
+    // If there are no uses to replace, then we return nullptr to indicate that
+    // no changes were made to the program.
+    if (I.use_empty()) return nullptr;
+
     Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
 
     // If we are replacing the instruction with itself, this must be in a
@@ -325,30 +419,27 @@ public:
       V = UndefValue::get(I.getType());
 
     DEBUG(dbgs() << "IC: Replacing " << I << "\n"
-                    "    with " << *V << '\n');
+                 << "    with " << *V << '\n');
 
     I.replaceAllUsesWith(V);
     return &I;
   }
 
   /// Creates a result tuple for an overflow intrinsic \p II with a given
-  /// \p Result and a constant \p Overflow value. If \p ReUseName is true the
-  /// \p Result's name is taken from \p II.
+  /// \p Result and a constant \p Overflow value.
   Instruction *CreateOverflowTuple(IntrinsicInst *II, Value *Result,
-                                    bool Overflow, bool ReUseName = true) {
-    if (ReUseName)
-      Result->takeName(II);
-    Constant *V[] = { UndefValue::get(Result->getType()),
-                      Overflow ? Builder->getTrue() : Builder->getFalse() };
+                                   Constant *Overflow) {
+    Constant *V[] = {UndefValue::get(Result->getType()), Overflow};
     StructType *ST = cast<StructType>(II->getType());
     Constant *Struct = ConstantStruct::get(ST, V);
     return InsertValueInst::Create(Struct, Result, 0);
   }
-        
-  // EraseInstFromFunction - When dealing with an instruction that has side
-  // effects or produces a void value, we can't rely on DCE to delete the
-  // instruction.  Instead, visit methods should return the value returned by
-  // this function.
+
+  /// \brief Combiner aware instruction erasure.
+  ///
+  /// When dealing with an instruction that has side effects or produces a void
+  /// value, we can't rely on DCE to delete the instruction. Instead, visit
+  /// methods should return the value returned by this function.
   Instruction *EraseInstFromFunction(Instruction &I) {
     DEBUG(dbgs() << "IC: ERASE " << I << '\n');
 
@@ -367,13 +458,12 @@ public:
   }
 
   void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
-                        unsigned Depth = 0, Instruction *CxtI = nullptr) const {
+                        unsigned Depth, Instruction *CxtI) const {
     return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, AC, CxtI,
                                   DT);
   }
 
-  bool MaskedValueIsZero(Value *V, const APInt &Mask,
-                         unsigned Depth = 0,
+  bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0,
                          Instruction *CxtI = nullptr) const {
     return llvm::MaskedValueIsZero(V, Mask, DL, Depth, AC, CxtI, DT);
   }
@@ -396,22 +486,24 @@ public:
   }
 
 private:
-  /// SimplifyAssociativeOrCommutative - This performs a few simplifications for
-  /// operators which are associative or commutative.
+  /// \brief Performs a few simplifications for operators which are associative
+  /// or commutative.
   bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
 
-  /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
-  /// which some other binary operation distributes over either by factorizing
-  /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
-  /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
-  /// a win).  Returns the simplified value, or null if it didn't simplify.
+  /// \brief Tries to simplify binary operations which some other binary
+  /// operation distributes over.
+  ///
+  /// It does this by either by factorizing out common terms (eg "(A*B)+(A*C)"
+  /// -> "A*(B+C)") or expanding out if this results in simplifications (eg: "A
+  /// & (B | C) -> (A&B) | (A&C)" if this is a win).  Returns the simplified
+  /// value, or null if it didn't simplify.
   Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
 
-  /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
-  /// based on the demanded bits.
+  /// \brief Attempts to replace V with a simpler value based on the demanded
+  /// bits.
   Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero,
                                  APInt &KnownOne, unsigned Depth,
-                                 Instruction *CxtI = nullptr);
+                                 Instruction *CxtI);
   bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero,
                             APInt &KnownOne, unsigned Depth = 0);
   /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
@@ -420,9 +512,8 @@ private:
                                     APInt DemandedMask, APInt &KnownZero,
                                     APInt &KnownOne);
 
-  /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
-  /// SimplifyDemandedBits knows about.  See if the instruction has any
-  /// properties that allow us to simplify its operands.
+  /// \brief Tries to simplify operands to an integer instruction based on its
+  /// demanded bits.
   bool SimplifyDemandedInstructionBits(Instruction &Inst);
 
   Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
@@ -438,9 +529,8 @@ private:
   //
   Instruction *FoldOpIntoPhi(Instruction &I);
 
-  // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
-  // operator and they all are only used by the PHI, PHI together their
-  // inputs, and do the operation once, to the result of the PHI.
+  /// \brief Try to rotate an operation below a PHI node, using PHI nodes for
+  /// its operands.
   Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
   Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
   Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
@@ -461,8 +551,9 @@ private:
 
   Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
 
-  /// Descale - Return a value X such that Val = X * Scale, or null if none.  If
-  /// the multiplication is known not to overflow then NoSignedWrap is set.
+  /// \brief Returns a value X such that Val = X * Scale, or null if none.
+  ///
+  /// If the multiplication is known not to overflow then NoSignedWrap is set.
   Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
 };
 
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 6230c00..5aa59c6 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -11,12 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
@@ -83,7 +84,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
         continue;
       }
 
-      if (CallSite CS = I) {
+      if (auto CS = CallSite(I)) {
         // If this is the function being called then we treat it like a load and
         // ignore it.
         if (CS.isCallee(&U))
@@ -163,62 +164,75 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
   return nullptr;
 }
 
-Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
-  // Ensure that the alloca array size argument has type intptr_t, so that
-  // any casting is exposed early.
-  if (DL) {
-    Type *IntPtrTy = DL->getIntPtrType(AI.getType());
-    if (AI.getArraySize()->getType() != IntPtrTy) {
-      Value *V = Builder->CreateIntCast(AI.getArraySize(),
-                                        IntPtrTy, false);
-      AI.setOperand(0, V);
-      return &AI;
-    }
+static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
+  // Check for array size of 1 (scalar allocation).
+  if (!AI.isArrayAllocation()) {
+    // i32 1 is the canonical array size for scalar allocations.
+    if (AI.getArraySize()->getType()->isIntegerTy(32))
+      return nullptr;
+
+    // Canonicalize it.
+    Value *V = IC.Builder->getInt32(1);
+    AI.setOperand(0, V);
+    return &AI;
   }
 
   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
-  if (AI.isArrayAllocation()) {  // Check C != 1
-    if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
-      Type *NewTy =
-        ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
-      AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName());
-      New->setAlignment(AI.getAlignment());
-
-      // Scan to the end of the allocation instructions, to skip over a block of
-      // allocas if possible...also skip interleaved debug info
-      //
-      BasicBlock::iterator It = New;
-      while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
-
-      // Now that I is pointing to the first non-allocation-inst in the block,
-      // insert our getelementptr instruction...
-      //
-      Type *IdxTy = DL
-                  ? DL->getIntPtrType(AI.getType())
-                  : Type::getInt64Ty(AI.getContext());
-      Value *NullIdx = Constant::getNullValue(IdxTy);
-      Value *Idx[2] = { NullIdx, NullIdx };
-      Instruction *GEP =
+  if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
+    Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+    AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());
+    New->setAlignment(AI.getAlignment());
+
+    // Scan to the end of the allocation instructions, to skip over a block of
+    // allocas if possible...also skip interleaved debug info
+    //
+    BasicBlock::iterator It = New;
+    while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
+      ++It;
+
+    // Now that I is pointing to the first non-allocation-inst in the block,
+    // insert our getelementptr instruction...
+    //
+    Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
+    Value *NullIdx = Constant::getNullValue(IdxTy);
+    Value *Idx[2] = {NullIdx, NullIdx};
+    Instruction *GEP =
         GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
-      InsertNewInstBefore(GEP, *It);
+    IC.InsertNewInstBefore(GEP, *It);
 
-      // Now make everything use the getelementptr instead of the original
-      // allocation.
-      return ReplaceInstUsesWith(AI, GEP);
-    } else if (isa<UndefValue>(AI.getArraySize())) {
-      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
-    }
+    // Now make everything use the getelementptr instead of the original
+    // allocation.
+    return IC.ReplaceInstUsesWith(AI, GEP);
+  }
+
+  if (isa<UndefValue>(AI.getArraySize()))
+    return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+
+  // Ensure that the alloca array size argument has type intptr_t, so that
+  // any casting is exposed early.
+  Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
+  if (AI.getArraySize()->getType() != IntPtrTy) {
+    Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);
+    AI.setOperand(0, V);
+    return &AI;
   }
 
-  if (DL && AI.getAllocatedType()->isSized()) {
+  return nullptr;
+}
+
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+  if (auto *I = simplifyAllocaArraySize(*this, AI))
+    return I;
+
+  if (AI.getAllocatedType()->isSized()) {
     // If the alignment is 0 (unspecified), assign it the preferred alignment.
     if (AI.getAlignment() == 0)
-      AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType()));
+      AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
 
     // Move all alloca's of zero byte objects to the entry block and merge them
     // together.  Note that we only do this for alloca's, because malloc should
     // allocate and return a unique pointer, even for a zero byte allocation.
-    if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) {
+    if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
       // For a zero sized alloca there is no point in doing an array allocation.
       // This is helpful if the array size is a complicated expression not used
       // elsewhere.
@@ -236,7 +250,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
         // dominance as the array size was forced to a constant earlier already.
         AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
         if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
-            DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
+            DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
           AI.moveBefore(FirstInst);
           return &AI;
         }
@@ -245,7 +259,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
         // assign it the preferred alignment.
         if (EntryAI->getAlignment() == 0)
           EntryAI->setAlignment(
-            DL->getPrefTypeAlignment(EntryAI->getAllocatedType()));
+              DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
         // Replace this zero-sized alloca with the one at the start of the entry
         // block after ensuring that the address will be aligned enough for both
         // types.
@@ -269,7 +283,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
     SmallVector<Instruction *, 4> ToDelete;
     if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
       unsigned SourceAlign = getOrEnforceKnownAlignment(
-          Copy->getSource(), AI.getAlignment(), DL, AC, &AI, DT);
+          Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT);
       if (AI.getAlignment() <= SourceAlign) {
         DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
         DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
@@ -300,7 +314,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
 ///
 /// Note that this will create all of the instructions with whatever insert
 /// point the \c InstCombiner currently is using.
-static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy) {
+static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
+                                      const Twine &Suffix = "") {
   Value *Ptr = LI.getPointerOperand();
   unsigned AS = LI.getPointerAddressSpace();
   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
@@ -308,7 +323,8 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
 
   LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
       IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
-      LI.getAlignment(), LI.getName());
+      LI.getAlignment(), LI.getName() + Suffix);
+  MDBuilder MDB(NewLoad->getContext());
   for (const auto &MDPair : MD) {
     unsigned ID = MDPair.first;
     MDNode *N = MDPair.second;
@@ -335,21 +351,81 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
       break;
 
     case LLVMContext::MD_nonnull:
-      // FIXME: We should translate this into range metadata for integer types
-      // and vice versa.
-      if (NewTy->isPointerTy())
+      // This only directly applies if the new type is also a pointer.
+      if (NewTy->isPointerTy()) {
         NewLoad->setMetadata(ID, N);
+        break;
+      }
+      // If it's integral now, translate it to !range metadata.
+      if (NewTy->isIntegerTy()) {
+        auto *ITy = cast<IntegerType>(NewTy);
+        auto *NullInt = ConstantExpr::getPtrToInt(
+            ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+        auto *NonNullInt =
+            ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+        NewLoad->setMetadata(LLVMContext::MD_range,
+                             MDB.createRange(NonNullInt, NullInt));
+      }
       break;
 
     case LLVMContext::MD_range:
       // FIXME: It would be nice to propagate this in some way, but the type
-      // conversions make it hard.
+      // conversions make it hard. If the new type is a pointer, we could
+      // translate it to !nonnull metadata.
       break;
     }
   }
   return NewLoad;
 }
 
+/// \brief Combine a store to a new type.
+///
+/// Returns the newly created store instruction.
+static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
+  Value *Ptr = SI.getPointerOperand();
+  unsigned AS = SI.getPointerAddressSpace();
+  SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
+  SI.getAllMetadata(MD);
+
+  StoreInst *NewStore = IC.Builder->CreateAlignedStore(
+      V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
+      SI.getAlignment());
+  for (const auto &MDPair : MD) {
+    unsigned ID = MDPair.first;
+    MDNode *N = MDPair.second;
+    // Note, essentially every kind of metadata should be preserved here! This
+    // routine is supposed to clone a store instruction changing *only its
+    // type*. The only metadata it makes sense to drop is metadata which is
+    // invalidated when the pointer type changes. This should essentially
+    // never be the case in LLVM, but we explicitly switch over only known
+    // metadata to be conservatively correct. If you are adding metadata to
+    // LLVM which pertains to stores, you almost certainly want to add it
+    // here.
+    switch (ID) {
+    case LLVMContext::MD_dbg:
+    case LLVMContext::MD_tbaa:
+    case LLVMContext::MD_prof:
+    case LLVMContext::MD_fpmath:
+    case LLVMContext::MD_tbaa_struct:
+    case LLVMContext::MD_alias_scope:
+    case LLVMContext::MD_noalias:
+    case LLVMContext::MD_nontemporal:
+    case LLVMContext::MD_mem_parallel_loop_access:
+      // All of these directly apply.
+      NewStore->setMetadata(ID, N);
+      break;
+
+    case LLVMContext::MD_invariant_load:
+    case LLVMContext::MD_nonnull:
+    case LLVMContext::MD_range:
+      // These don't apply for stores.
+      break;
+    }
+  }
+
+  return NewStore;
+}
+
 /// \brief Combine loads to match the type of value their uses after looking
 /// through intervening bitcasts.
 ///
@@ -376,6 +452,35 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
   if (LI.use_empty())
     return nullptr;
 
+  Type *Ty = LI.getType();
+  const DataLayout &DL = IC.getDataLayout();
+
+  // Try to canonicalize loads which are only ever stored to operate over
+  // integers instead of any other type. We only do this when the loaded type
+  // is sized and has a size exactly the same as its store size and the store
+  // size is a legal integer type.
+  if (!Ty->isIntegerTy() && Ty->isSized() &&
+      DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
+      DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) {
+    if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
+          auto *SI = dyn_cast<StoreInst>(U);
+          return SI && SI->getPointerOperand() != &LI;
+        })) {
+      LoadInst *NewLoad = combineLoadToNewType(
+          IC, LI,
+          Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
+      // Replace all the stores with stores of the newly loaded value.
+      for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
+        auto *SI = cast<StoreInst>(*UI++);
+        IC.Builder->SetInsertPoint(SI);
+        combineStoreToNewValue(IC, *SI, NewLoad);
+        IC.EraseInstFromFunction(*SI);
+      }
+      assert(LI.use_empty() && "Failed to remove all users of the load!");
+      // Return the old load so the combiner can delete it safely.
+      return &LI;
+    }
+  }
 
   // Fold away bit casts of the loaded value by loading the desired type.
   if (LI.hasOneUse())
@@ -391,6 +496,218 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
   return nullptr;
 }
 
+static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
+  // FIXME: We could probably with some care handle both volatile and atomic
+  // stores here but it isn't clear that this is important.
+  if (!LI.isSimple())
+    return nullptr;
+
+  Type *T = LI.getType();
+  if (!T->isAggregateType())
+    return nullptr;
+
+  assert(LI.getAlignment() && "Alignement must be set at this point");
+
+  if (auto *ST = dyn_cast<StructType>(T)) {
+    // If the struct only have one element, we unpack.
+    if (ST->getNumElements() == 1) {
+      LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
+                                               ".unpack");
+      return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
+        UndefValue::get(T), NewLoad, 0, LI.getName()));
+    }
+  }
+
+  if (auto *AT = dyn_cast<ArrayType>(T)) {
+    // If the array only have one element, we unpack.
+    if (AT->getNumElements() == 1) {
+      LoadInst *NewLoad = combineLoadToNewType(IC, LI, AT->getElementType(),
+                                               ".unpack");
+      return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
+        UndefValue::get(T), NewLoad, 0, LI.getName()));
+    }
+  }
+
+  return nullptr;
+}
+
+// If we can determine that all possible objects pointed to by the provided
+// pointer value are, not only dereferenceable, but also definitively less than
+// or equal to the provided maximum size, then return true. Otherwise, return
+// false (constant global values and allocas fall into this category).
+//
+// FIXME: This should probably live in ValueTracking (or similar).
+static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
+                                     const DataLayout &DL) {
+  SmallPtrSet<Value *, 4> Visited;
+  SmallVector<Value *, 4> Worklist(1, V);
+
+  do {
+    Value *P = Worklist.pop_back_val();
+    P = P->stripPointerCasts();
+
+    if (!Visited.insert(P).second)
+      continue;
+
+    if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
+      Worklist.push_back(SI->getTrueValue());
+      Worklist.push_back(SI->getFalseValue());
+      continue;
+    }
+
+    if (PHINode *PN = dyn_cast<PHINode>(P)) {
+      for (Value *IncValue : PN->incoming_values())
+        Worklist.push_back(IncValue);
+      continue;
+    }
+
+    if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
+      if (GA->mayBeOverridden())
+        return false;
+      Worklist.push_back(GA->getAliasee());
+      continue;
+    }
+
+    // If we know how big this object is, and it is less than MaxSize, continue
+    // searching. Otherwise, return false.
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
+      if (!AI->getAllocatedType()->isSized())
+        return false;
+
+      ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
+      if (!CS)
+        return false;
+
+      uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
+      // Make sure that, even if the multiplication below would wrap as an
+      // uint64_t, we still do the right thing.
+      if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
+        return false;
+      continue;
+    }
+
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+      if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
+        return false;
+
+      uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType());
+      if (InitSize > MaxSize)
+        return false;
+      continue;
+    }
+
+    return false;
+  } while (!Worklist.empty());
+
+  return true;
+}
+
+// If we're indexing into an object of a known size, and the outer index is
+// not a constant, but having any value but zero would lead to undefined
+// behavior, replace it with zero.
+//
+// For example, if we have:
+// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
+// ...
+// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
+// ... = load i32* %arrayidx, align 4
+// Then we know that we can replace %x in the GEP with i64 0.
+//
+// FIXME: We could fold any GEP index to zero that would cause UB if it were
+// not zero. Currently, we only handle the first such index. Also, we could
+// also search through non-zero constant indices if we kept track of the
+// offsets those indices implied.
+static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
+                                     Instruction *MemI, unsigned &Idx) {
+  if (GEPI->getNumOperands() < 2)
+    return false;
+
+  // Find the first non-zero index of a GEP. If all indices are zero, return
+  // one past the last index.
+  auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
+    unsigned I = 1;
+    for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
+      Value *V = GEPI->getOperand(I);
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+        if (CI->isZero())
+          continue;
+
+      break;
+    }
+
+    return I;
+  };
+
+  // Skip through initial 'zero' indices, and find the corresponding pointer
+  // type. See if the next index is not a constant.
+  Idx = FirstNZIdx(GEPI);
+  if (Idx == GEPI->getNumOperands())
+    return false;
+  if (isa<Constant>(GEPI->getOperand(Idx)))
+    return false;
+
+  SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
+  Type *AllocTy = GetElementPtrInst::getIndexedType(
+      cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType())
+          ->getElementType(),
+      Ops);
+  if (!AllocTy || !AllocTy->isSized())
+    return false;
+  const DataLayout &DL = IC.getDataLayout();
+  uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
+
+  // If there are more indices after the one we might replace with a zero, make
+  // sure they're all non-negative. If any of them are negative, the overall
+  // address being computed might be before the base address determined by the
+  // first non-zero index.
+  auto IsAllNonNegative = [&]() {
+    for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
+      bool KnownNonNegative, KnownNegative;
+      IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,
+                        KnownNegative, 0, MemI);
+      if (KnownNonNegative)
+        continue;
+      return false;
+    }
+
+    return true;
+  };
+
+  // FIXME: If the GEP is not inbounds, and there are extra indices after the
+  // one we'll replace, those could cause the address computation to wrap
+  // (rendering the IsAllNonNegative() check below insufficient). We can do
+  // better, ignoring zero indicies (and other indicies we can prove small
+  // enough not to wrap).
+  if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
+    return false;
+
+  // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
+  // also known to be dereferenceable.
+  return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
+         IsAllNonNegative();
+}
+
+// If we're indexing into an object with a variable index for the memory
+// access, but the object has only one element, we can assume that the index
+// will always be zero. If we replace the GEP, return it.
+template <typename T>
+static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
+                                          T &MemI) {
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
+    unsigned Idx;
+    if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
+      Instruction *NewGEPI = GEPI->clone();
+      NewGEPI->setOperand(Idx,
+        ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
+      NewGEPI->insertBefore(GEPI);
+      MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
+      return NewGEPI;
+    }
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   Value *Op = LI.getOperand(0);
 
@@ -399,23 +716,30 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
     return Res;
 
   // Attempt to improve the alignment.
-  if (DL) {
-    unsigned KnownAlign = getOrEnforceKnownAlignment(
-        Op, DL->getPrefTypeAlignment(LI.getType()), DL, AC, &LI, DT);
-    unsigned LoadAlign = LI.getAlignment();
-    unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
-      DL->getABITypeAlignment(LI.getType());
-
-    if (KnownAlign > EffectiveLoadAlign)
-      LI.setAlignment(KnownAlign);
-    else if (LoadAlign == 0)
-      LI.setAlignment(EffectiveLoadAlign);
+  unsigned KnownAlign = getOrEnforceKnownAlignment(
+      Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);
+  unsigned LoadAlign = LI.getAlignment();
+  unsigned EffectiveLoadAlign =
+      LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
+
+  if (KnownAlign > EffectiveLoadAlign)
+    LI.setAlignment(KnownAlign);
+  else if (LoadAlign == 0)
+    LI.setAlignment(EffectiveLoadAlign);
+
+  // Replace GEP indices if possible.
+  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
+      Worklist.Add(NewGEPI);
+      return &LI;
   }
 
   // None of the following transforms are legal for volatile/atomic loads.
   // FIXME: Some of it is okay for atomic loads; needs refactoring.
   if (!LI.isSimple()) return nullptr;
 
+  if (Instruction *Res = unpackLoadToAggregate(*this, LI))
+    return Res;
+
   // Do really simple store-to-load forwarding and load CSE, to catch cases
   // where there are several consecutive memory accesses to the same location,
   // separated by a few arithmetic operations.
@@ -466,8 +790,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
     if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
       // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
       unsigned Align = LI.getAlignment();
-      if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) &&
-          isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) {
+      if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) &&
+          isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) {
         LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
                                            SI->getOperand(1)->getName()+".val");
         LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
@@ -521,50 +845,12 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
   if (!SI.isSimple())
     return false;
 
-  Value *Ptr = SI.getPointerOperand();
   Value *V = SI.getValueOperand();
-  unsigned AS = SI.getPointerAddressSpace();
-  SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
-  SI.getAllMetadata(MD);
 
   // Fold away bit casts of the stored value by storing the original type.
   if (auto *BC = dyn_cast<BitCastInst>(V)) {
     V = BC->getOperand(0);
-    StoreInst *NewStore = IC.Builder->CreateAlignedStore(
-        V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
-        SI.getAlignment());
-    for (const auto &MDPair : MD) {
-      unsigned ID = MDPair.first;
-      MDNode *N = MDPair.second;
-      // Note, essentially every kind of metadata should be preserved here! This
-      // routine is supposed to clone a store instruction changing *only its
-      // type*. The only metadata it makes sense to drop is metadata which is
-      // invalidated when the pointer type changes. This should essentially
-      // never be the case in LLVM, but we explicitly switch over only known
-      // metadata to be conservatively correct. If you are adding metadata to
-      // LLVM which pertains to stores, you almost certainly want to add it
-      // here.
-      switch (ID) {
-      case LLVMContext::MD_dbg:
-      case LLVMContext::MD_tbaa:
-      case LLVMContext::MD_prof:
-      case LLVMContext::MD_fpmath:
-      case LLVMContext::MD_tbaa_struct:
-      case LLVMContext::MD_alias_scope:
-      case LLVMContext::MD_noalias:
-      case LLVMContext::MD_nontemporal:
-      case LLVMContext::MD_mem_parallel_loop_access:
-        // All of these directly apply.
-        NewStore->setMetadata(ID, N);
-        break;
-
-      case LLVMContext::MD_invariant_load:
-      case LLVMContext::MD_nonnull:
-      case LLVMContext::MD_range:
-        // These don't apply for stores.
-        break;
-      }
-    }
+    combineStoreToNewValue(IC, SI, V);
     return true;
   }
 
@@ -573,6 +859,39 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
   return false;
 }
 
+static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
+  // FIXME: We could probably with some care handle both volatile and atomic
+  // stores here but it isn't clear that this is important.
+  if (!SI.isSimple())
+    return false;
+
+  Value *V = SI.getValueOperand();
+  Type *T = V->getType();
+
+  if (!T->isAggregateType())
+    return false;
+
+  if (auto *ST = dyn_cast<StructType>(T)) {
+    // If the struct only have one element, we unpack.
+    if (ST->getNumElements() == 1) {
+      V = IC.Builder->CreateExtractValue(V, 0);
+      combineStoreToNewValue(IC, SI, V);
+      return true;
+    }
+  }
+
+  if (auto *AT = dyn_cast<ArrayType>(T)) {
+    // If the array only have one element, we unpack.
+    if (AT->getNumElements() == 1) {
+      V = IC.Builder->CreateExtractValue(V, 0);
+      combineStoreToNewValue(IC, SI, V);
+      return true;
+    }
+  }
+
+  return false;
+}
+
 /// equivalentAddressValues - Test if A and B will obviously have the same
 /// value. This includes recognizing that %t0 and %t1 will have the same
 /// value in code like this:
@@ -611,17 +930,25 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
     return EraseInstFromFunction(SI);
 
   // Attempt to improve the alignment.
-  if (DL) {
-    unsigned KnownAlign = getOrEnforceKnownAlignment(
-        Ptr, DL->getPrefTypeAlignment(Val->getType()), DL, AC, &SI, DT);
-    unsigned StoreAlign = SI.getAlignment();
-    unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
-      DL->getABITypeAlignment(Val->getType());
-
-    if (KnownAlign > EffectiveStoreAlign)
-      SI.setAlignment(KnownAlign);
-    else if (StoreAlign == 0)
-      SI.setAlignment(EffectiveStoreAlign);
+  unsigned KnownAlign = getOrEnforceKnownAlignment(
+      Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);
+  unsigned StoreAlign = SI.getAlignment();
+  unsigned EffectiveStoreAlign =
+      StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
+
+  if (KnownAlign > EffectiveStoreAlign)
+    SI.setAlignment(KnownAlign);
+  else if (StoreAlign == 0)
+    SI.setAlignment(EffectiveStoreAlign);
+
+  // Try to canonicalize the stored type.
+  if (unpackStoreToAggregate(*this, SI))
+    return EraseInstFromFunction(SI);
+
+  // Replace GEP indices if possible.
+  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
+      Worklist.Add(NewGEPI);
+      return &SI;
   }
 
   // Don't hack volatile/atomic stores.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index b2ff96f..a554e9f 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
@@ -26,7 +26,7 @@ using namespace PatternMatch;
 /// where it is known to be non-zero.  If this allows us to simplify the
 /// computation, do so and return the new operand, otherwise return null.
 static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
-                                        Instruction *CxtI) {
+                                        Instruction &CxtI) {
   // If V has multiple uses, then we would have to do more analysis to determine
   // if this is safe.  For example, the use could be in dynamically unreached
   // code.
@@ -47,8 +47,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
   // inexact.  Similarly for <<.
   if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
     if (I->isLogicalShift() &&
-        isKnownToBeAPowerOfTwo(I->getOperand(0), false, 0,
-                               IC.getAssumptionCache(), CxtI,
+        isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0,
+                               IC.getAssumptionCache(), &CxtI,
                                IC.getDominatorTree())) {
       // We know that this is an exact/nuw shift and that the input is a
       // non-zero context as well.
@@ -126,7 +126,7 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) {
 /// \brief Return true if we can prove that:
 ///    (mul LHS, RHS)  === (mul nsw LHS, RHS)
 bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
-                                            Instruction *CxtI) {
+                                            Instruction &CxtI) {
   // Multiplying n * m significant bits yields a result of n + m significant
   // bits. If the total number of significant bits does not exceed the
   // result bit width (minus 1), there is no overflow.
@@ -137,8 +137,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
 
   // Note that underestimating the number of sign bits gives a more
   // conservative answer.
-  unsigned SignBits = ComputeNumSignBits(LHS, 0, CxtI) +
-                      ComputeNumSignBits(RHS, 0, CxtI);
+  unsigned SignBits =
+      ComputeNumSignBits(LHS, 0, &CxtI) + ComputeNumSignBits(RHS, 0, &CxtI);
 
   // First handle the easy case: if we have enough sign bits there's
   // definitely no overflow.
@@ -157,8 +157,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
     // For simplicity we just check if at least one side is not negative.
     bool LHSNonNegative, LHSNegative;
     bool RHSNonNegative, RHSNegative;
-    ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, CxtI);
-    ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, CxtI);
+    ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, &CxtI);
+    ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, &CxtI);
     if (LHSNonNegative || RHSNonNegative)
       return true;
   }
@@ -217,12 +217,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
         NewCst = getLogBase2Vector(CV);
 
       if (NewCst) {
+        unsigned Width = NewCst->getType()->getPrimitiveSizeInBits();
         BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
 
         if (I.hasNoUnsignedWrap())
           Shl->setHasNoUnsignedWrap();
-        if (I.hasNoSignedWrap() && NewCst->isNotMinSignedValue())
-          Shl->setHasNoSignedWrap();
+        if (I.hasNoSignedWrap()) {
+          uint64_t V;
+          if (match(NewCst, m_ConstantInt(V)) && V != Width - 1)
+            Shl->setHasNoSignedWrap();
+        }
 
         return Shl;
       }
@@ -375,7 +379,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     }
   }
 
-  if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, &I)) {
+  if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) {
     Changed = true;
     I.setHasNoSignedWrap(true);
   }
@@ -422,7 +426,7 @@ static bool isFiniteNonZeroFp(Constant *C) {
   if (C->getType()->isVectorTy()) {
     for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
          ++I) {
-      ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I));
+      ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I));
       if (!CFP || !CFP->getValueAPF().isFiniteNonZero())
         return false;
     }
@@ -437,7 +441,7 @@ static bool isNormalFp(Constant *C) {
   if (C->getType()->isVectorTy()) {
     for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
          ++I) {
-      ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I));
+      ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I));
       if (!CFP || !CFP->getValueAPF().isNormal())
         return false;
     }
@@ -780,7 +784,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   // The RHS is known non-zero.
-  if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) {
+  if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) {
     I.setOperand(1, V);
     return &I;
   }
@@ -1155,7 +1159,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
         return BO;
       }
 
-      if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) {
+      if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
         // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
         // Safe because the only negative value (1 << Y) can take on is
         // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
@@ -1206,7 +1210,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return ReplaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyFDivInst(Op0, Op1, DL, TLI, DT, AC))
+  if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(),
+                                  DL, TLI, DT, AC))
     return ReplaceInstUsesWith(I, V);
 
   if (isa<Constant>(Op0))
@@ -1337,7 +1342,7 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   // The RHS is known non-zero.
-  if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) {
+  if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) {
     I.setOperand(1, V);
     return &I;
   }
@@ -1384,7 +1389,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
                           I.getType());
 
   // X urem Y -> X and Y-1, where Y is a power of 2,
-  if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) {
+  if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
     Constant *N1 = Constant::getAllOnesValue(I.getType());
     Value *Add = Builder->CreateAdd(Op1, N1);
     return BinaryOperator::CreateAnd(Op0, Add);
@@ -1481,7 +1486,8 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return ReplaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyFRemInst(Op0, Op1, DL, TLI, DT, AC))
+  if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(),
+                                  DL, TLI, DT, AC))
     return ReplaceInstUsesWith(I, V);
 
   // Handle cases involving: rem X, (select Cond, Y, Z)
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 53831c8..6a6693c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -11,11 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/IR/DataLayout.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "instcombine"
@@ -231,7 +230,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
 
   Value *Base = FixedOperands[0];
   GetElementPtrInst *NewGEP =
-    GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
+      GetElementPtrInst::Create(FirstInst->getSourceElementType(), Base,
+                                makeArrayRef(FixedOperands).slice(1));
   if (AllInBounds) NewGEP->setIsInBounds();
   NewGEP->setDebugLoc(FirstInst->getDebugLoc());
   return NewGEP;
@@ -375,8 +375,8 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
   // and mark all the input loads as non-volatile.  If we don't do this, we will
   // insert a new volatile load and the old ones will not be deletable.
   if (isVolatile)
-    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
-      cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+    for (Value *IncValue : PN.incoming_values())
+      cast<LoadInst>(IncValue)->setVolatile(false);
 
   LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
   NewLI->setDebugLoc(FirstLI->getDebugLoc());
@@ -539,8 +539,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
 
   // Scan the operands to see if they are either phi nodes or are equal to
   // the value.
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    Value *Op = PN->getIncomingValue(i);
+  for (Value *Op : PN->incoming_values()) {
     if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
       if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
         return false;
@@ -891,8 +890,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
   // it is only used by trunc or trunc(lshr) operations.  If so, we split the
   // PHI into the various pieces being extracted.  This sort of thing is
   // introduced when SROA promotes an aggregate to a single large integer type.
-  if (PN.getType()->isIntegerTy() && DL &&
-      !DL->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+  if (PN.getType()->isIntegerTy() &&
+      !DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
     if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
       return Res;
 
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index bf3c33e..d2fbcdd 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -11,88 +11,55 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
 
 #define DEBUG_TYPE "instcombine"
 
-/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
-/// returning the kind and providing the out parameter results if we
-/// successfully match.
 static SelectPatternFlavor
-MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
-  SelectInst *SI = dyn_cast<SelectInst>(V);
-  if (!SI) return SPF_UNKNOWN;
-
-  ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
-  if (!ICI) return SPF_UNKNOWN;
-
-  ICmpInst::Predicate Pred = ICI->getPredicate();
-  Value *CmpLHS = ICI->getOperand(0);
-  Value *CmpRHS = ICI->getOperand(1);
-  Value *TrueVal = SI->getTrueValue();
-  Value *FalseVal = SI->getFalseValue();
-
-  LHS = CmpLHS;
-  RHS = CmpRHS;
-
-  // (icmp X, Y) ? X : Y
-  if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
-    switch (Pred) {
-    default: return SPF_UNKNOWN; // Equality.
-    case ICmpInst::ICMP_UGT:
-    case ICmpInst::ICMP_UGE: return SPF_UMAX;
-    case ICmpInst::ICMP_SGT:
-    case ICmpInst::ICMP_SGE: return SPF_SMAX;
-    case ICmpInst::ICMP_ULT:
-    case ICmpInst::ICMP_ULE: return SPF_UMIN;
-    case ICmpInst::ICMP_SLT:
-    case ICmpInst::ICMP_SLE: return SPF_SMIN;
-    }
-  }
-
-  // (icmp X, Y) ? Y : X
-  if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
-    switch (Pred) {
-    default: return SPF_UNKNOWN; // Equality.
-    case ICmpInst::ICMP_UGT:
-    case ICmpInst::ICMP_UGE: return SPF_UMIN;
-    case ICmpInst::ICMP_SGT:
-    case ICmpInst::ICMP_SGE: return SPF_SMIN;
-    case ICmpInst::ICMP_ULT:
-    case ICmpInst::ICMP_ULE: return SPF_UMAX;
-    case ICmpInst::ICMP_SLT:
-    case ICmpInst::ICMP_SLE: return SPF_SMAX;
-    }
+getInverseMinMaxSelectPattern(SelectPatternFlavor SPF) {
+  switch (SPF) {
+  default:
+    llvm_unreachable("unhandled!");
+
+  case SPF_SMIN:
+    return SPF_SMAX;
+  case SPF_UMIN:
+    return SPF_UMAX;
+  case SPF_SMAX:
+    return SPF_SMIN;
+  case SPF_UMAX:
+    return SPF_UMIN;
   }
+}
 
-  if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) {
-    if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) ||
-        (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) {
-
-      // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
-      // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
-      if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
-        return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS;
-      }
-
-      // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
-      // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
-      if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
-        return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS;
-      }
-    }
+static CmpInst::Predicate getICmpPredicateForMinMax(SelectPatternFlavor SPF) {
+  switch (SPF) {
+  default:
+    llvm_unreachable("unhandled!");
+
+  case SPF_SMIN:
+    return ICmpInst::ICMP_SLT;
+  case SPF_UMIN:
+    return ICmpInst::ICMP_ULT;
+  case SPF_SMAX:
+    return ICmpInst::ICMP_SGT;
+  case SPF_UMAX:
+    return ICmpInst::ICMP_UGT;
   }
-
-  // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5)
-
-  return SPF_UNKNOWN;
 }
 
+static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder,
+                                          SelectPatternFlavor SPF, Value *A,
+                                          Value *B) {
+  CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF);
+  return Builder->CreateSelect(Builder->CreateICmp(Pred, A, B), A, B);
+}
 
 /// GetSelectFoldableOperands - We want to turn code that looks like this:
 ///   %C = or %A, %B
@@ -312,9 +279,9 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
 /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
 /// replaced with RepOp.
 static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
-                                     const DataLayout *TD,
                                      const TargetLibraryInfo *TLI,
-                                     DominatorTree *DT, AssumptionCache *AC) {
+                                     const DataLayout &DL, DominatorTree *DT,
+                                     AssumptionCache *AC) {
   // Trivial replacement.
   if (V == Op)
     return RepOp;
@@ -326,18 +293,18 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
   // If this is a binary operator, try to simplify it with the replaced op.
   if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
     if (B->getOperand(0) == Op)
-      return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI);
+      return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), DL, TLI);
     if (B->getOperand(1) == Op)
-      return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI);
+      return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, DL, TLI);
   }
 
   // Same for CmpInsts.
   if (CmpInst *C = dyn_cast<CmpInst>(I)) {
     if (C->getOperand(0) == Op)
-      return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD,
+      return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), DL,
                              TLI, DT, AC);
     if (C->getOperand(1) == Op)
-      return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD,
+      return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, DL,
                              TLI, DT, AC);
   }
 
@@ -361,14 +328,14 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
     if (ConstOps.size() == I->getNumOperands()) {
       if (CmpInst *C = dyn_cast<CmpInst>(I))
         return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
-                                               ConstOps[1], TD, TLI);
+                                               ConstOps[1], DL, TLI);
 
       if (LoadInst *LI = dyn_cast<LoadInst>(I))
         if (!LI->isVolatile())
-          return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
+          return ConstantFoldLoadFromConstPtr(ConstOps[0], DL);
 
-      return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                      ConstOps, TD, TLI);
+      return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps,
+                                      DL, TLI);
     }
   }
 
@@ -437,6 +404,62 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
   return Builder->CreateOr(V, Y);
 }
 
+/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
+/// call to cttz/ctlz with flag 'is_zero_undef' cleared.
+///
+/// For example, we can fold the following code sequence:
+/// \code
+///   %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+///   %1 = icmp ne i32 %x, 0
+///   %2 = select i1 %1, i32 %0, i32 32
+/// \code
+/// 
+/// into:
+///   %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
+                                  InstCombiner::BuilderTy *Builder) {
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+  Value *CmpLHS = ICI->getOperand(0);
+  Value *CmpRHS = ICI->getOperand(1);
+
+  // Check if the condition value compares a value for equality against zero.
+  if (!ICI->isEquality() || !match(CmpRHS, m_Zero()))
+    return nullptr;
+
+  Value *Count = FalseVal;
+  Value *ValueOnZero = TrueVal;
+  if (Pred == ICmpInst::ICMP_NE)
+    std::swap(Count, ValueOnZero);
+
+  // Skip zero extend/truncate.
+  Value *V = nullptr;
+  if (match(Count, m_ZExt(m_Value(V))) ||
+      match(Count, m_Trunc(m_Value(V))))
+    Count = V;
+
+  // Check if the value propagated on zero is a constant number equal to the
+  // sizeof in bits of 'Count'.
+  unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits();
+  if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits)))
+    return nullptr;
+
+  // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the
+  // input to the cttz/ctlz is used as LHS for the compare instruction.
+  if (match(Count, m_Intrinsic<Intrinsic::cttz>(m_Specific(CmpLHS))) ||
+      match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Specific(CmpLHS)))) {
+    IntrinsicInst *II = cast<IntrinsicInst>(Count);
+    IRBuilder<> Builder(II);
+    // Explicitly clear the 'undef_on_zero' flag.
+    IntrinsicInst *NewI = cast<IntrinsicInst>(II->clone());
+    Type *Ty = NewI->getArgOperand(1)->getType();
+    NewI->setArgOperand(1, Constant::getNullValue(Ty));
+    Builder.Insert(NewI);
+    return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType());
+  }
+
+  return nullptr;
+}
+
 /// visitSelectInstWithICmp - Visit a SelectInst that has an
 /// ICmpInst as its first operand.
 ///
@@ -579,25 +602,25 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
   // arms of the select. See if substituting this value into the arm and
   // simplifying the result yields the same value as the other arm.
   if (Pred == ICmpInst::ICMP_EQ) {
-    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
             TrueVal ||
-        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
             TrueVal)
       return ReplaceInstUsesWith(SI, FalseVal);
-    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
             FalseVal ||
-        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
             FalseVal)
       return ReplaceInstUsesWith(SI, FalseVal);
   } else if (Pred == ICmpInst::ICMP_NE) {
-    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
             FalseVal ||
-        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
             FalseVal)
       return ReplaceInstUsesWith(SI, TrueVal);
-    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
             TrueVal ||
-        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
             TrueVal)
       return ReplaceInstUsesWith(SI, TrueVal);
   }
@@ -665,6 +688,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
   if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder))
     return ReplaceInstUsesWith(SI, V);
 
+  if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder))
+    return ReplaceInstUsesWith(SI, V);
+
   return Changed ? &SI : nullptr;
 }
 
@@ -770,6 +796,52 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
         SI->getCondition(), SI->getFalseValue(), SI->getTrueValue());
     return ReplaceInstUsesWith(Outer, NewSI);
   }
+
+  auto IsFreeOrProfitableToInvert =
+      [&](Value *V, Value *&NotV, bool &ElidesXor) {
+    if (match(V, m_Not(m_Value(NotV)))) {
+      // If V has at most 2 uses then we can get rid of the xor operation
+      // entirely.
+      ElidesXor |= !V->hasNUsesOrMore(3);
+      return true;
+    }
+
+    if (IsFreeToInvert(V, !V->hasNUsesOrMore(3))) {
+      NotV = nullptr;
+      return true;
+    }
+
+    return false;
+  };
+
+  Value *NotA, *NotB, *NotC;
+  bool ElidesXor = false;
+
+  // MIN(MIN(~A, ~B), ~C) == ~MAX(MAX(A, B), C)
+  // MIN(MAX(~A, ~B), ~C) == ~MAX(MIN(A, B), C)
+  // MAX(MIN(~A, ~B), ~C) == ~MIN(MAX(A, B), C)
+  // MAX(MAX(~A, ~B), ~C) == ~MIN(MIN(A, B), C)
+  //
+  // This transform is performance neutral if we can elide at least one xor from
+  // the set of three operands, since we'll be tacking on an xor at the very
+  // end.
+  if (IsFreeOrProfitableToInvert(A, NotA, ElidesXor) &&
+      IsFreeOrProfitableToInvert(B, NotB, ElidesXor) &&
+      IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) {
+    if (!NotA)
+      NotA = Builder->CreateNot(A);
+    if (!NotB)
+      NotB = Builder->CreateNot(B);
+    if (!NotC)
+      NotC = Builder->CreateNot(C);
+
+    Value *NewInner = generateMinMaxSelectPattern(
+        Builder, getInverseMinMaxSelectPattern(SPF1), NotA, NotB);
+    Value *NewOuter = Builder->CreateNot(generateMinMaxSelectPattern(
+        Builder, getInverseMinMaxSelectPattern(SPF2), NewInner, NotC));
+    return ReplaceInstUsesWith(Outer, NewOuter);
+  }
+
   return nullptr;
 }
 
@@ -868,7 +940,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       return BinaryOperator::CreateAnd(NotCond, FalseVal);
     }
     if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
-      if (C->getZExtValue() == false) {
+      if (!C->getZExtValue()) {
         // Change: A = select B, C, false --> A = and B, C
         return BinaryOperator::CreateAnd(CondVal, TrueVal);
       }
@@ -1082,26 +1154,67 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       }
 
   // See if we can fold the select into one of our operands.
-  if (SI.getType()->isIntegerTy()) {
+  if (SI.getType()->isIntOrIntVectorTy()) {
     if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
       return FoldI;
 
-    // MAX(MAX(a, b), a) -> MAX(a, b)
-    // MIN(MIN(a, b), a) -> MIN(a, b)
-    // MAX(MIN(a, b), a) -> a
-    // MIN(MAX(a, b), a) -> a
     Value *LHS, *RHS, *LHS2, *RHS2;
-    if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
-      if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
+    Instruction::CastOps CastOp;
+    SelectPatternFlavor SPF = matchSelectPattern(&SI, LHS, RHS, &CastOp);
+
+    if (SPF) {
+      // Canonicalize so that type casts are outside select patterns.
+      if (LHS->getType()->getPrimitiveSizeInBits() !=
+          SI.getType()->getPrimitiveSizeInBits()) {
+        CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF);
+        Value *Cmp = Builder->CreateICmp(Pred, LHS, RHS);
+        Value *NewSI = Builder->CreateCast(CastOp,
+                                           Builder->CreateSelect(Cmp, LHS, RHS),
+                                           SI.getType());
+        return ReplaceInstUsesWith(SI, NewSI);
+      }
+
+      // MAX(MAX(a, b), a) -> MAX(a, b)
+      // MIN(MIN(a, b), a) -> MIN(a, b)
+      // MAX(MIN(a, b), a) -> a
+      // MIN(MAX(a, b), a) -> a
+      if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2))
         if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
                                           SI, SPF, RHS))
           return R;
-      if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
+      if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2))
         if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
                                           SI, SPF, LHS))
           return R;
     }
 
+    // MAX(~a, ~b) -> ~MIN(a, b)
+    if (SPF == SPF_SMAX || SPF == SPF_UMAX) {
+      if (IsFreeToInvert(LHS, LHS->hasNUses(2)) &&
+          IsFreeToInvert(RHS, RHS->hasNUses(2))) {
+
+        // This transform adds a xor operation and that extra cost needs to be
+        // justified.  We look for simplifications that will result from
+        // applying this rule:
+
+        bool Profitable =
+            (LHS->hasNUses(2) && match(LHS, m_Not(m_Value()))) ||
+            (RHS->hasNUses(2) && match(RHS, m_Not(m_Value()))) ||
+            (SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value())));
+
+        if (Profitable) {
+          Value *NewLHS = Builder->CreateNot(LHS);
+          Value *NewRHS = Builder->CreateNot(RHS);
+          Value *NewCmp = SPF == SPF_SMAX
+                              ? Builder->CreateICmpSLT(NewLHS, NewRHS)
+                              : Builder->CreateICmpULT(NewLHS, NewRHS);
+          Value *NewSI =
+              Builder->CreateNot(Builder->CreateSelect(NewCmp, NewLHS, NewRHS));
+          return ReplaceInstUsesWith(SI, NewSI);
+        }
+      }
+    }
+
     // TODO.
     // ABS(-X) -> ABS(X)
   }
@@ -1115,19 +1228,41 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
         return NV;
 
   if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
-    if (TrueSI->getCondition() == CondVal) {
-      if (SI.getTrueValue() == TrueSI->getTrueValue())
-        return nullptr;
-      SI.setOperand(1, TrueSI->getTrueValue());
-      return &SI;
+    if (TrueSI->getCondition()->getType() == CondVal->getType()) {
+      // select(C, select(C, a, b), c) -> select(C, a, c)
+      if (TrueSI->getCondition() == CondVal) {
+        if (SI.getTrueValue() == TrueSI->getTrueValue())
+          return nullptr;
+        SI.setOperand(1, TrueSI->getTrueValue());
+        return &SI;
+      }
+      // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b)
+      // We choose this as normal form to enable folding on the And and shortening
+      // paths for the values (this helps GetUnderlyingObjects() for example).
+      if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) {
+        Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition());
+        SI.setOperand(0, And);
+        SI.setOperand(1, TrueSI->getTrueValue());
+        return &SI;
+      }
     }
   }
   if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
-    if (FalseSI->getCondition() == CondVal) {
-      if (SI.getFalseValue() == FalseSI->getFalseValue())
-        return nullptr;
-      SI.setOperand(2, FalseSI->getFalseValue());
-      return &SI;
+    if (FalseSI->getCondition()->getType() == CondVal->getType()) {
+      // select(C, a, select(C, b, c)) -> select(C, a, c)
+      if (FalseSI->getCondition() == CondVal) {
+        if (SI.getFalseValue() == FalseSI->getFalseValue())
+          return nullptr;
+        SI.setOperand(2, FalseSI->getFalseValue());
+        return &SI;
+      }
+      // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b)
+      if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) {
+        Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition());
+        SI.setOperand(0, Or);
+        SI.setOperand(2, FalseSI->getFalseValue());
+        return &SI;
+      }
     }
   }
 
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 0a16e25..d04ed58 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -175,8 +175,8 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
     // get into trouble with cyclic PHIs here because we only consider
     // instructions with a single use.
     PHINode *PN = cast<PHINode>(I);
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-      if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,
+    for (Value *IncValue : PN->incoming_values())
+      if (!CanEvaluateShifted(IncValue, NumBits, isLeftShift,
                               IC, PN))
         return false;
     return true;
@@ -187,7 +187,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
 /// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
 /// this value inserts the new computation that produces the shifted value.
 static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
-                              InstCombiner &IC) {
+                              InstCombiner &IC, const DataLayout &DL) {
   // We can always evaluate constants shifted.
   if (Constant *C = dyn_cast<Constant>(V)) {
     if (isLeftShift)
@@ -196,8 +196,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
       V = IC.Builder->CreateLShr(C, NumBits);
     // If we got a constantexpr back, try to simplify it with TD info.
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-      V = ConstantFoldConstantExpression(CE, IC.getDataLayout(),
-                                         IC.getTargetLibraryInfo());
+      V = ConstantFoldConstantExpression(CE, DL, IC.getTargetLibraryInfo());
     return V;
   }
 
@@ -210,8 +209,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
   case Instruction::Or:
   case Instruction::Xor:
     // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
-    I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
-    I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+    I->setOperand(
+        0, GetShiftedValue(I->getOperand(0), NumBits, isLeftShift, IC, DL));
+    I->setOperand(
+        1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
     return I;
 
   case Instruction::Shl: {
@@ -297,8 +298,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
   }
 
   case Instruction::Select:
-    I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
-    I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+    I->setOperand(
+        1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
+    I->setOperand(
+        2, GetShiftedValue(I->getOperand(2), NumBits, isLeftShift, IC, DL));
     return I;
   case Instruction::PHI: {
     // We can change a phi if we can change all operands.  Note that we never
@@ -306,8 +309,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
     // instructions with a single use.
     PHINode *PN = cast<PHINode>(I);
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-      PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
-                                              NumBits, isLeftShift, IC));
+      PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), NumBits,
+                                              isLeftShift, IC, DL));
     return PN;
   }
   }
@@ -337,8 +340,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
     DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
               " to eliminate shift:\n  IN: " << *Op0 << "\n  SH: " << I <<"\n");
 
-    return ReplaceInstUsesWith(I,
-                 GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this));
+    return ReplaceInstUsesWith(
+        I, GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this, DL));
   }
 
   // See if we can simplify any instructions used by the instruction whose sole
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index ad6983a..80628b2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstCombine.h"
-#include "llvm/IR/DataLayout.h"
+#include "InstCombineInternal.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
 
@@ -44,19 +44,6 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
   Demanded &= OpC->getValue();
   I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
 
-  // If either 'nsw' or 'nuw' is set and the constant is negative,
-  // removing *any* bits from the constant could make overflow occur.
-  // Remove 'nsw' and 'nuw' from the instruction in this case.
-  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
-    assert(OBO->getOpcode() == Instruction::Add);
-    if (OBO->hasNoSignedWrap() || OBO->hasNoUnsignedWrap()) {
-      if (OpC->getValue().isNegative()) {
-        cast<BinaryOperator>(OBO)->setHasNoSignedWrap(false);
-        cast<BinaryOperator>(OBO)->setHasNoUnsignedWrap(false);
-      }
-    }
-  }
-
   return true;
 }
 
@@ -70,8 +57,8 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
   APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
 
-  Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
-                                     KnownZero, KnownOne, 0, &Inst);
+  Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
+                                     0, &Inst);
   if (!V) return false;
   if (V == &Inst) return true;
   ReplaceInstUsesWith(Inst, V);
@@ -84,9 +71,9 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
 bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
                                         APInt &KnownZero, APInt &KnownOne,
                                         unsigned Depth) {
-  Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
-                                          KnownZero, KnownOne, Depth,
-                                          dyn_cast<Instruction>(U.getUser()));
+  auto *UserI = dyn_cast<Instruction>(U.getUser());
+  Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero,
+                                          KnownOne, Depth, UserI);
   if (!NewVal) return false;
   U = NewVal;
   return true;
@@ -122,15 +109,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   assert(Depth <= 6 && "Limit Search Depth");
   uint32_t BitWidth = DemandedMask.getBitWidth();
   Type *VTy = V->getType();
-  assert((DL || !VTy->isPointerTy()) &&
-         "SimplifyDemandedBits needs to know bit widths!");
-  assert((!DL || DL->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
-         (!VTy->isIntOrIntVectorTy() ||
-          VTy->getScalarSizeInBits() == BitWidth) &&
-         KnownZero.getBitWidth() == BitWidth &&
-         KnownOne.getBitWidth() == BitWidth &&
-         "Value *V, DemandedMask, KnownZero and KnownOne "
-         "must have same BitWidth");
+  assert(
+      (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
+      KnownZero.getBitWidth() == BitWidth &&
+      KnownOne.getBitWidth() == BitWidth &&
+      "Value *V, DemandedMask, KnownZero and KnownOne "
+      "must have same BitWidth");
   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     // We know all of the bits for a constant!
     KnownOne = CI->getValue() & DemandedMask;
@@ -174,9 +158,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // this instruction has a simpler value in that context.
     if (I->getOpcode() == Instruction::And) {
       // If either the LHS or the RHS are Zero, the result is zero.
-      computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+      computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
                        CxtI);
-      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
                        CxtI);
 
       // If all of the demanded bits are known 1 on one side, return the other.
@@ -198,9 +182,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       // only bits from X or Y are demanded.
 
       // If either the LHS or the RHS are One, the result is One.
-      computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+      computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
                        CxtI);
-      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
                        CxtI);
 
       // If all of the demanded bits are known zero on one side, return the
@@ -225,9 +209,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       // We can simplify (X^Y) -> X or Y in the user's context if we know that
       // only bits from X or Y are demanded.
 
-      computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+      computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
                        CxtI);
-      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
                        CxtI);
 
       // If all of the demanded bits are known zero on one side, return the
@@ -256,10 +240,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   case Instruction::And:
     // If either the LHS or the RHS are Zero, the result is zero.
-    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
-                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+                             RHSKnownOne, Depth + 1) ||
         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
-                             LHSKnownZero, LHSKnownOne, Depth+1))
+                             LHSKnownZero, LHSKnownOne, Depth + 1))
       return I;
     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -294,10 +278,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   case Instruction::Or:
     // If either the LHS or the RHS are One, the result is One.
-    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
-                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+                             RHSKnownOne, Depth + 1) ||
         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
-                             LHSKnownZero, LHSKnownOne, Depth+1))
+                             LHSKnownZero, LHSKnownOne, Depth + 1))
       return I;
     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -336,10 +320,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     KnownOne = RHSKnownOne | LHSKnownOne;
     break;
   case Instruction::Xor: {
-    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
-                             RHSKnownZero, RHSKnownOne, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
-                             LHSKnownZero, LHSKnownOne, Depth+1))
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+                             RHSKnownOne, Depth + 1) ||
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero,
+                             LHSKnownOne, Depth + 1))
       return I;
     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -423,10 +407,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   }
   case Instruction::Select:
-    if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
-                             RHSKnownZero, RHSKnownOne, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
-                             LHSKnownZero, LHSKnownOne, Depth+1))
+    // If this is a select as part of a min/max pattern, don't simplify any
+    // further in case we break the structure.
+    Value *LHS, *RHS;
+    if (matchSelectPattern(I, LHS, RHS) != SPF_UNKNOWN)
+      return nullptr;
+      
+    if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
+                             RHSKnownOne, Depth + 1) ||
+        SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
+                             LHSKnownOne, Depth + 1))
       return I;
     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -445,8 +435,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     DemandedMask = DemandedMask.zext(truncBf);
     KnownZero = KnownZero.zext(truncBf);
     KnownOne = KnownOne.zext(truncBf);
-    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
-                             KnownZero, KnownOne, Depth+1))
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+                             KnownOne, Depth + 1))
       return I;
     DemandedMask = DemandedMask.trunc(BitWidth);
     KnownZero = KnownZero.trunc(BitWidth);
@@ -471,8 +461,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       // Don't touch a vector-to-scalar bitcast.
       return nullptr;
 
-    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
-                             KnownZero, KnownOne, Depth+1))
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+                             KnownOne, Depth + 1))
       return I;
     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
     break;
@@ -483,8 +473,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     DemandedMask = DemandedMask.trunc(SrcBitWidth);
     KnownZero = KnownZero.trunc(SrcBitWidth);
     KnownOne = KnownOne.trunc(SrcBitWidth);
-    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
-                             KnownZero, KnownOne, Depth+1))
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+                             KnownOne, Depth + 1))
       return I;
     DemandedMask = DemandedMask.zext(BitWidth);
     KnownZero = KnownZero.zext(BitWidth);
@@ -510,8 +500,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
     KnownZero = KnownZero.trunc(SrcBitWidth);
     KnownOne = KnownOne.trunc(SrcBitWidth);
-    if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
-                             KnownZero, KnownOne, Depth+1))
+    if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero,
+                             KnownOne, Depth + 1))
       return I;
     InputDemandedBits = InputDemandedBits.zext(BitWidth);
     KnownZero = KnownZero.zext(BitWidth);
@@ -532,113 +522,35 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     }
     break;
   }
-  case Instruction::Add: {
-    // Figure out what the input bits are.  If the top bits of the and result
-    // are not demanded, then the add doesn't demand them from its input
-    // either.
+  case Instruction::Add:
+  case Instruction::Sub: {
+    /// If the high-bits of an ADD/SUB are not demanded, then we do not care
+    /// about the high bits of the operands.
     unsigned NLZ = DemandedMask.countLeadingZeros();
-
-    // If there is a constant on the RHS, there are a variety of xformations
-    // we can do.
-    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      // If null, this should be simplified elsewhere.  Some of the xforms here
-      // won't work if the RHS is zero.
-      if (RHS->isZero())
-        break;
-
-      // If the top bit of the output is demanded, demand everything from the
-      // input.  Otherwise, we demand all the input bits except NLZ top bits.
-      APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
-
-      // Find information about known zero/one bits in the input.
-      if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
-                               LHSKnownZero, LHSKnownOne, Depth+1))
-        return I;
-
-      // If the RHS of the add has bits set that can't affect the input, reduce
-      // the constant.
-      if (ShrinkDemandedConstant(I, 1, InDemandedBits))
-        return I;
-
-      // Avoid excess work.
-      if (LHSKnownZero == 0 && LHSKnownOne == 0)
-        break;
-
-      // Turn it into OR if input bits are zero.
-      if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
-        Instruction *Or =
-          BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
-                                   I->getName());
-        return InsertNewInstWith(Or, *I);
-      }
-
-      // We can say something about the output known-zero and known-one bits,
-      // depending on potential carries from the input constant and the
-      // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
-      // bits set and the RHS constant is 0x01001, then we know we have a known
-      // one mask of 0x00001 and a known zero mask of 0xE0F0E.
-
-      // To compute this, we first compute the potential carry bits.  These are
-      // the bits which may be modified.  I'm not aware of a better way to do
-      // this scan.
-      const APInt &RHSVal = RHS->getValue();
-      APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
-
-      // Now that we know which bits have carries, compute the known-1/0 sets.
-
-      // Bits are known one if they are known zero in one operand and one in the
-      // other, and there is no input carry.
-      KnownOne = ((LHSKnownZero & RHSVal) |
-                  (LHSKnownOne & ~RHSVal)) & ~CarryBits;
-
-      // Bits are known zero if they are known zero in both operands and there
-      // is no input carry.
-      KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
-    } else {
-      // If the high-bits of this ADD are not demanded, then it does not demand
-      // the high bits of its LHS or RHS.
-      if (DemandedMask[BitWidth-1] == 0) {
-        // Right fill the mask of bits for this ADD to demand the most
-        // significant bit and all those below it.
-        APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
-        if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
-                                 LHSKnownZero, LHSKnownOne, Depth+1) ||
-            SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
-                                 LHSKnownZero, LHSKnownOne, Depth+1))
-          return I;
-      }
-    }
-    break;
-  }
-  case Instruction::Sub:
-    // If the high-bits of this SUB are not demanded, then it does not demand
-    // the high bits of its LHS or RHS.
-    if (DemandedMask[BitWidth-1] == 0) {
-      // Right fill the mask of bits for this SUB to demand the most
+    if (NLZ > 0) {
+      // Right fill the mask of bits for this ADD/SUB to demand the most
       // significant bit and all those below it.
-      uint32_t NLZ = DemandedMask.countLeadingZeros();
       APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
-                               LHSKnownZero, LHSKnownOne, Depth+1) ||
+                               LHSKnownZero, LHSKnownOne, Depth + 1) ||
+          ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
           SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
-                               LHSKnownZero, LHSKnownOne, Depth+1))
+                               LHSKnownZero, LHSKnownOne, Depth + 1)) {
+        // Disable the nsw and nuw flags here: We can no longer guarantee that
+        // we won't wrap after simplification. Removing the nsw/nuw flags is
+        // legal here because the top bit is not demanded.
+        BinaryOperator &BinOP = *cast<BinaryOperator>(I);
+        BinOP.setHasNoSignedWrap(false);
+        BinOP.setHasNoUnsignedWrap(false);
         return I;
+      }
     }
 
-    // Otherwise just hand the sub off to computeKnownBits to fill in
+    // Otherwise just hand the add/sub off to computeKnownBits to fill in
     // the known zeros and ones.
     computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
-
-    // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
-    // zero.
-    if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
-      APInt I0 = C0->getValue();
-      if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
-        Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
-        return InsertNewInstWith(Xor, *I);
-      }
-    }
     break;
+  }
   case Instruction::Shl:
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
       {
@@ -662,8 +574,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       else if (IOp->hasNoUnsignedWrap())
         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
 
-      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
-                               KnownZero, KnownOne, Depth+1))
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+                               KnownOne, Depth + 1))
         return I;
       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
       KnownZero <<= ShiftAmt;
@@ -686,8 +598,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       if (cast<LShrOperator>(I)->isExact())
         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
 
-      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
-                               KnownZero, KnownOne, Depth+1))
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+                               KnownOne, Depth + 1))
         return I;
       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
@@ -731,8 +643,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       if (cast<AShrOperator>(I)->isExact())
         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
 
-      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
-                               KnownZero, KnownOne, Depth+1))
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+                               KnownOne, Depth + 1))
         return I;
       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
       // Compute the new bits that are at the top now.
@@ -772,8 +684,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
 
         APInt LowBits = RA - 1;
         APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
-        if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
-                                 LHSKnownZero, LHSKnownOne, Depth+1))
+        if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero,
+                                 LHSKnownOne, Depth + 1))
           return I;
 
         // The low bits of LHS are unchanged by the srem.
@@ -798,7 +710,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // remainder is zero.
     if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
       APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
                        CxtI);
       // If it's known zero, our sign bit is also zero.
       if (LHSKnownZero.isNegative())
@@ -808,10 +720,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   case Instruction::URem: {
     APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
     APInt AllOnes = APInt::getAllOnesValue(BitWidth);
-    if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
-                             KnownZero2, KnownOne2, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
-                             KnownZero2, KnownOne2, Depth+1))
+    if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2,
+                             KnownOne2, Depth + 1) ||
+        SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2,
+                             KnownOne2, Depth + 1))
       return I;
 
     unsigned Leaders = KnownZero2.countLeadingOnes();
@@ -1051,7 +963,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       // Note that we can't propagate undef elt info, because we don't know
       // which elt is getting updated.
       TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
-                                        UndefElts2, Depth+1);
+                                        UndefElts2, Depth + 1);
       if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
       break;
     }
@@ -1069,7 +981,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     APInt DemandedElts2 = DemandedElts;
     DemandedElts2.clearBit(IdxNo);
     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
-                                      UndefElts, Depth+1);
+                                      UndefElts, Depth + 1);
     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
 
     // The inserted element is defined.
@@ -1097,12 +1009,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 
     APInt UndefElts4(LHSVWidth, 0);
     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
-                                      UndefElts4, Depth+1);
+                                      UndefElts4, Depth + 1);
     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
 
     APInt UndefElts3(LHSVWidth, 0);
     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
-                                      UndefElts3, Depth+1);
+                                      UndefElts3, Depth + 1);
     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
 
     bool NewUndefElts = false;
@@ -1152,12 +1064,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       }
     }
 
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
-                                      UndefElts, Depth+1);
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
+                                      Depth + 1);
     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
 
     TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
-                                      UndefElts2, Depth+1);
+                                      UndefElts2, Depth + 1);
     if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
 
     // Output elements are undefined if both are undefined.
@@ -1204,7 +1116,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 
     // div/rem demand all inputs, because they don't want divide by zero.
     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
-                                      UndefElts2, Depth+1);
+                                      UndefElts2, Depth + 1);
     if (TmpV) {
       I->setOperand(0, TmpV);
       MadeChange = true;
@@ -1238,11 +1150,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
   case Instruction::Sub:
   case Instruction::Mul:
     // div/rem demand all inputs, because they don't want divide by zero.
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
-                                      UndefElts, Depth+1);
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
+                                      Depth + 1);
     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
-                                      UndefElts2, Depth+1);
+                                      UndefElts2, Depth + 1);
     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
 
     // Output elements are undefined if both are undefined.  Consider things
@@ -1251,8 +1163,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     break;
   case Instruction::FPTrunc:
   case Instruction::FPExt:
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
-                                      UndefElts, Depth+1);
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
+                                      Depth + 1);
     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
     break;
 
@@ -1273,10 +1185,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     case Intrinsic::x86_sse2_min_sd:
     case Intrinsic::x86_sse2_max_sd:
       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
-                                        UndefElts, Depth+1);
+                                        UndefElts, Depth + 1);
       if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
-                                        UndefElts2, Depth+1);
+                                        UndefElts2, Depth + 1);
       if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
 
       // If only the low elt is demanded and this is a scalarizable intrinsic,
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index cb16584..24446c8 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -12,7 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -201,8 +202,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
       APInt UndefElts(VectorWidth, 0);
       APInt DemandedMask(VectorWidth, 0);
       DemandedMask.setBit(IndexVal);
-      if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
-                                                DemandedMask, UndefElts)) {
+      if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask,
+                                                UndefElts)) {
         EI.setOperand(0, V);
         return &EI;
       }
@@ -732,7 +733,8 @@ static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
     case Instruction::GetElementPtr: {
       Value *Ptr = NewOps[0];
       ArrayRef<Value*> Idx = NewOps.slice(1);
-      GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
+      GetElementPtrInst *GEP = GetElementPtrInst::Create(
+          cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
       GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
       return GEP;
     }
@@ -853,10 +855,32 @@ static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask,
   }
 }
 
+// Returns true if the shuffle is extracting a contiguous range of values from
+// LHS, for example:
+//                 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+//   Input:        |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
+//   Shuffles to:  |EE|FF|GG|HH|
+//                 +--+--+--+--+
+static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
+                                       SmallVector<int, 16> &Mask) {
+  unsigned LHSElems =
+      cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements();
+  unsigned MaskElems = Mask.size();
+  unsigned BegIdx = Mask.front();
+  unsigned EndIdx = Mask.back();
+  if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
+    return false;
+  for (unsigned I = 0; I != MaskElems; ++I)
+    if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
+      return false;
+  return true;
+}
+
 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   Value *LHS = SVI.getOperand(0);
   Value *RHS = SVI.getOperand(1);
   SmallVector<int, 16> Mask = SVI.getShuffleMask();
+  Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
 
   bool MadeChange = false;
 
@@ -892,18 +916,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     SmallVector<Constant*, 16> Elts;
     for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
       if (Mask[i] < 0) {
-        Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+        Elts.push_back(UndefValue::get(Int32Ty));
         continue;
       }
 
       if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
           (Mask[i] <  (int)e && isa<UndefValue>(LHS))) {
         Mask[i] = -1;     // Turn into undef.
-        Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+        Elts.push_back(UndefValue::get(Int32Ty));
       } else {
         Mask[i] = Mask[i] % e;  // Force to LHS.
-        Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
-                                        Mask[i]));
+        Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
       }
     }
     SVI.setOperand(0, SVI.getOperand(1));
@@ -929,6 +952,95 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     return ReplaceInstUsesWith(SVI, V);
   }
 
+  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
+  // a non-vector type. We can instead bitcast the original vector followed by
+  // an extract of the desired element:
+  //
+  //   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
+  //                         <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  //   %1 = bitcast <4 x i8> %sroa to i32
+  // Becomes:
+  //   %bc = bitcast <16 x i8> %in to <4 x i32>
+  //   %ext = extractelement <4 x i32> %bc, i32 0
+  //
+  // If the shuffle is extracting a contiguous range of values from the input
+  // vector then each use which is a bitcast of the extracted size can be
+  // replaced. This will work if the vector types are compatible, and the begin
+  // index is aligned to a value in the casted vector type. If the begin index
+  // isn't aligned then we can shuffle the original vector (keeping the same
+  // vector type) before extracting.
+  //
+  // This code will bail out if the target type is fundamentally incompatible
+  // with vectors of the source type.
+  //
+  // Example of <16 x i8>, target type i32:
+  // Index range [4,8):         v-----------v Will work.
+  //                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+  //     <16 x i8>: |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |
+  //     <4 x i32>: |           |           |           |           |
+  //                +-----------+-----------+-----------+-----------+
+  // Index range [6,10):              ^-----------^ Needs an extra shuffle.
+  // Target type i40:           ^--------------^ Won't work, bail.
+  if (isShuffleExtractingFromLHS(SVI, Mask)) {
+    Value *V = LHS;
+    unsigned MaskElems = Mask.size();
+    unsigned BegIdx = Mask.front();
+    VectorType *SrcTy = cast<VectorType>(V->getType());
+    unsigned VecBitWidth = SrcTy->getBitWidth();
+    unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
+    assert(SrcElemBitWidth && "vector elements must have a bitwidth");
+    unsigned SrcNumElems = SrcTy->getNumElements();
+    SmallVector<BitCastInst *, 8> BCs;
+    DenseMap<Type *, Value *> NewBCs;
+    for (User *U : SVI.users())
+      if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
+        if (!BC->use_empty())
+          // Only visit bitcasts that weren't previously handled.
+          BCs.push_back(BC);
+    for (BitCastInst *BC : BCs) {
+      Type *TgtTy = BC->getDestTy();
+      unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
+      if (!TgtElemBitWidth)
+        continue;
+      unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
+      bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
+      bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
+      if (!VecBitWidthsEqual)
+        continue;
+      if (!VectorType::isValidElementType(TgtTy))
+        continue;
+      VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
+      if (!BegIsAligned) {
+        // Shuffle the input so [0,NumElements) contains the output, and
+        // [NumElems,SrcNumElems) is undef.
+        SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
+                                                UndefValue::get(Int32Ty));
+        for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
+          ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
+        V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()),
+                                         ConstantVector::get(ShuffleMask),
+                                         SVI.getName() + ".extract");
+        BegIdx = 0;
+      }
+      unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
+      assert(SrcElemsPerTgtElem);
+      BegIdx /= SrcElemsPerTgtElem;
+      bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
+      auto *NewBC =
+          BCAlreadyExists
+              ? NewBCs[CastSrcTy]
+              : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
+      if (!BCAlreadyExists)
+        NewBCs[CastSrcTy] = NewBC;
+      auto *Ext = Builder->CreateExtractElement(
+          NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
+      // The shufflevector isn't being replaced: the bitcast that used it
+      // is. InstCombine will visit the newly-created instructions.
+      ReplaceInstUsesWith(*BC, Ext);
+      MadeChange = true;
+    }
+  }
+
   // If the LHS is a shufflevector itself, see if we can combine it with this
   // one without producing an unusual shuffle.
   // Cases that might be simplified:
@@ -1099,7 +1211,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   // or is a splat, do the replacement.
   if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
     SmallVector<Constant*, 16> Elts;
-    Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
     for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
       if (newMask[i] < 0) {
         Elts.push_back(UndefValue::get(Int32Ty));
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
deleted file mode 100644
index 8d857d0..0000000
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ /dev/null
@@ -1,107 +0,0 @@
-//===- InstCombineWorklist.h - Worklist for InstCombine pass ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
-#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "instcombine"
-
-namespace llvm {
-
-/// InstCombineWorklist - This is the worklist management logic for
-/// InstCombine.
-class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
-  SmallVector<Instruction*, 256> Worklist;
-  DenseMap<Instruction*, unsigned> WorklistMap;
-
-  void operator=(const InstCombineWorklist&RHS) LLVM_DELETED_FUNCTION;
-  InstCombineWorklist(const InstCombineWorklist&) LLVM_DELETED_FUNCTION;
-public:
-  InstCombineWorklist() {}
-
-  bool isEmpty() const { return Worklist.empty(); }
-
-  /// Add - Add the specified instruction to the worklist if it isn't already
-  /// in it.
-  void Add(Instruction *I) {
-    if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
-      DEBUG(dbgs() << "IC: ADD: " << *I << '\n');
-      Worklist.push_back(I);
-    }
-  }
-
-  void AddValue(Value *V) {
-    if (Instruction *I = dyn_cast<Instruction>(V))
-      Add(I);
-  }
-
-  /// AddInitialGroup - Add the specified batch of stuff in reverse order.
-  /// which should only be done when the worklist is empty and when the group
-  /// has no duplicates.
-  void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
-    assert(Worklist.empty() && "Worklist must be empty to add initial group");
-    Worklist.reserve(NumEntries+16);
-    WorklistMap.resize(NumEntries);
-    DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
-    for (unsigned Idx = 0; NumEntries; --NumEntries) {
-      Instruction *I = List[NumEntries-1];
-      WorklistMap.insert(std::make_pair(I, Idx++));
-      Worklist.push_back(I);
-    }
-  }
-
-  // Remove - remove I from the worklist if it exists.
-  void Remove(Instruction *I) {
-    DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
-    if (It == WorklistMap.end()) return; // Not in worklist.
-
-    // Don't bother moving everything down, just null out the slot.
-    Worklist[It->second] = nullptr;
-
-    WorklistMap.erase(It);
-  }
-
-  Instruction *RemoveOne() {
-    Instruction *I = Worklist.pop_back_val();
-    WorklistMap.erase(I);
-    return I;
-  }
-
-  /// AddUsersToWorkList - When an instruction is simplified, add all users of
-  /// the instruction to the work lists because they might get more simplified
-  /// now.
-  ///
-  void AddUsersToWorkList(Instruction &I) {
-    for (User *U : I.users())
-      Add(cast<Instruction>(U));
-  }
-
-
-  /// Zap - check that the worklist is empty and nuke the backing store for
-  /// the map if it is large.
-  void Zap() {
-    assert(WorklistMap.empty() && "Worklist empty, but map not?");
-
-    // Do an explicit clear, this shrinks the map if needed.
-    WorklistMap.clear();
-  }
-};
-
-} // end namespace llvm.
-
-#undef DEBUG_TYPE
-
-#endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index a0c239a..be49cd1 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -33,8 +33,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
-#include "InstCombine.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "InstCombineInternal.h"
 #include "llvm-c/Initialization.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -43,8 +43,10 @@
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LibCallSemantics.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DataLayout.h"
@@ -55,7 +57,8 @@
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <climits>
@@ -72,35 +75,8 @@ STATISTIC(NumExpand,    "Number of expansions");
 STATISTIC(NumFactor   , "Number of factorizations");
 STATISTIC(NumReassoc  , "Number of reassociations");
 
-// Initialization Routines
-void llvm::initializeInstCombine(PassRegistry &Registry) {
-  initializeInstCombinerPass(Registry);
-}
-
-void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
-  initializeInstCombine(*unwrap(R));
-}
-
-char InstCombiner::ID = 0;
-INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine",
-                "Combine redundant instructions", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(InstCombiner, "instcombine",
-                "Combine redundant instructions", false, false)
-
-void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesCFG();
-  AU.addRequired<AssumptionCacheTracker>();
-  AU.addRequired<TargetLibraryInfo>();
-  AU.addRequired<DominatorTreeWrapperPass>();
-  AU.addPreserved<DominatorTreeWrapperPass>();
-}
-
-
 Value *InstCombiner::EmitGEPOffset(User *GEP) {
-  return llvm::EmitGEPOffset(Builder, *getDataLayout(), GEP);
+  return llvm::EmitGEPOffset(Builder, DL, GEP);
 }
 
 /// ShouldChangeType - Return true if it is desirable to convert a computation
@@ -109,13 +85,10 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
 bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
   assert(From->isIntegerTy() && To->isIntegerTy());
 
-  // If we don't have DL, we don't know if the source/dest are legal.
-  if (!DL) return false;
-
   unsigned FromWidth = From->getPrimitiveSizeInBits();
   unsigned ToWidth = To->getPrimitiveSizeInBits();
-  bool FromLegal = DL->isLegalInteger(FromWidth);
-  bool ToLegal = DL->isLegalInteger(ToWidth);
+  bool FromLegal = DL.isLegalInteger(FromWidth);
+  bool ToLegal = DL.isLegalInteger(ToWidth);
 
   // If this is a legal integer from type, and the result would be an illegal
   // type, don't do the transformation.
@@ -470,7 +443,7 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,
 /// This tries to simplify binary operations by factorizing out common terms
 /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
 static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
-                               const DataLayout *DL, BinaryOperator &I,
+                               const DataLayout &DL, BinaryOperator &I,
                                Instruction::BinaryOps InnerOpcode, Value *A,
                                Value *B, Value *C, Value *D) {
 
@@ -479,6 +452,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
   if (!A || !C || !B || !D)
     return nullptr;
 
+  Value *V = nullptr;
   Value *SimplifiedInst = nullptr;
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
   Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
@@ -495,7 +469,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
         std::swap(C, D);
       // Consider forming "A op' (B op D)".
       // If "B op D" simplifies then it can be formed with no cost.
-      Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
+      V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
       // If "B op D" doesn't simplify then only go on if both of the existing
       // operations "A op' B" and "C op' D" will be zapped as no longer used.
       if (!V && LHS->hasOneUse() && RHS->hasOneUse())
@@ -514,7 +488,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
         std::swap(C, D);
       // Consider forming "(A op C) op' B".
       // If "A op C" simplifies then it can be formed with no cost.
-      Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
+      V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
 
       // If "A op C" doesn't simplify then only go on if both of the existing
       // operations "A op' B" and "C op' D" will be zapped as no longer used.
@@ -544,7 +518,19 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
         if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
           if (isa<OverflowingBinaryOperator>(Op1))
             HasNSW &= Op1->hasNoSignedWrap();
-        BO->setHasNoSignedWrap(HasNSW);
+
+        // We can propogate 'nsw' if we know that
+        //  %Y = mul nsw i16 %X, C
+        //  %Z = add nsw i16 %Y, %X
+        // =>
+        //  %Z = mul nsw i16 %X, C+1
+        //
+        // iff C+1 isn't INT_MIN
+        const APInt *CInt;
+        if (TopLevelOpcode == Instruction::Add &&
+            InnerOpcode == Instruction::Mul)
+          if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
+            BO->setHasNoSignedWrap(HasNSW);
       }
     }
   }
@@ -741,6 +727,22 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
         return nullptr;
     }
 
+    // Test if a CmpInst instruction is used exclusively by a select as
+    // part of a minimum or maximum operation. If so, refrain from doing
+    // any other folding. This helps out other analyses which understand
+    // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+    // and CodeGen. And in this case, at least one of the comparison
+    // operands has at least one user besides the compare (the select),
+    // which would often largely negate the benefit of folding anyway.
+    if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
+      if (CI->hasOneUse()) {
+        Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+        if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+            (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+          return nullptr;
+      }
+    }
+
     Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
     Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
 
@@ -750,7 +752,6 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
   return nullptr;
 }
 
-
 /// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
 /// has a PHI node as operand #0, see if we can fold the instruction into the
 /// PHI (which is only possible if all operands to the PHI are constants).
@@ -799,8 +800,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
     // If the incoming non-constant value is in I's block, we will remove one
     // instruction, but insert another equivalent one, leading to infinite
     // instcombine.
-    if (isPotentiallyReachable(I.getParent(), NonConstBB, DT,
-                               getAnalysisIfAvailable<LoopInfo>()))
+    if (isPotentiallyReachable(I.getParent(), NonConstBB, DT, LI))
       return nullptr;
   }
 
@@ -897,23 +897,18 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
 /// whether or not there is a sequence of GEP indices into the pointed type that
 /// will land us at the specified offset.  If so, fill them into NewIndices and
 /// return the resultant element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
-                                        SmallVectorImpl<Value*> &NewIndices) {
-  assert(PtrTy->isPtrOrPtrVectorTy());
-
-  if (!DL)
-    return nullptr;
-
-  Type *Ty = PtrTy->getPointerElementType();
+Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
+                                        SmallVectorImpl<Value *> &NewIndices) {
+  Type *Ty = PtrTy->getElementType();
   if (!Ty->isSized())
     return nullptr;
 
   // Start with the index over the outer type.  Note that the type size
   // might be zero (even if the offset isn't zero) if the indexed type
   // is something like [0 x {int, int}]
-  Type *IntPtrTy = DL->getIntPtrType(PtrTy);
+  Type *IntPtrTy = DL.getIntPtrType(PtrTy);
   int64_t FirstIdx = 0;
-  if (int64_t TySize = DL->getTypeAllocSize(Ty)) {
+  if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
     FirstIdx = Offset/TySize;
     Offset -= FirstIdx*TySize;
 
@@ -931,11 +926,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
   // Index into the types.  If we fail, set OrigBase to null.
   while (Offset) {
     // Indexing into tail padding between struct/array elements.
-    if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty))
+    if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty))
       return nullptr;
 
     if (StructType *STy = dyn_cast<StructType>(Ty)) {
-      const StructLayout *SL = DL->getStructLayout(STy);
+      const StructLayout *SL = DL.getStructLayout(STy);
       assert(Offset < (int64_t)SL->getSizeInBytes() &&
              "Offset must stay within the indexed type");
 
@@ -946,7 +941,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
       Offset -= SL->getElementOffset(Elt);
       Ty = STy->getElementType(Elt);
     } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
-      uint64_t EltSize = DL->getTypeAllocSize(AT->getElementType());
+      uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
       assert(EltSize && "Cannot index into a zero-sized array");
       NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
       Offset %= EltSize;
@@ -1240,7 +1235,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
   // It may not be safe to reorder shuffles and things like div, urem, etc.
   // because we may trap when executing those ops on unknown vector elements.
   // See PR20059.
-  if (!isSafeToSpeculativelyExecute(&Inst, DL)) return nullptr;
+  if (!isSafeToSpeculativelyExecute(&Inst))
+    return nullptr;
 
   unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
   Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
@@ -1326,37 +1322,37 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
   // Eliminate unneeded casts for indices, and replace indices which displace
   // by multiples of a zero size type with zero.
-  if (DL) {
-    bool MadeChange = false;
-    Type *IntPtrTy = DL->getIntPtrType(GEP.getPointerOperandType());
-
-    gep_type_iterator GTI = gep_type_begin(GEP);
-    for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
-         I != E; ++I, ++GTI) {
-      // Skip indices into struct types.
-      SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
-      if (!SeqTy) continue;
-
-      // If the element type has zero size then any index over it is equivalent
-      // to an index of zero, so replace it with zero if it is not zero already.
-      if (SeqTy->getElementType()->isSized() &&
-          DL->getTypeAllocSize(SeqTy->getElementType()) == 0)
-        if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
-          *I = Constant::getNullValue(IntPtrTy);
-          MadeChange = true;
-        }
+  bool MadeChange = false;
+  Type *IntPtrTy = DL.getIntPtrType(GEP.getPointerOperandType());
+
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
+       ++I, ++GTI) {
+    // Skip indices into struct types.
+    SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
+    if (!SeqTy)
+      continue;
 
-      Type *IndexTy = (*I)->getType();
-      if (IndexTy != IntPtrTy) {
-        // If we are using a wider index than needed for this platform, shrink
-        // it to what we need.  If narrower, sign-extend it to what we need.
-        // This explicit cast can make subsequent optimizations more obvious.
-        *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+    // If the element type has zero size then any index over it is equivalent
+    // to an index of zero, so replace it with zero if it is not zero already.
+    if (SeqTy->getElementType()->isSized() &&
+        DL.getTypeAllocSize(SeqTy->getElementType()) == 0)
+      if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+        *I = Constant::getNullValue(IntPtrTy);
         MadeChange = true;
       }
+
+    Type *IndexTy = (*I)->getType();
+    if (IndexTy != IntPtrTy) {
+      // If we are using a wider index than needed for this platform, shrink
+      // it to what we need.  If narrower, sign-extend it to what we need.
+      // This explicit cast can make subsequent optimizations more obvious.
+      *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+      MadeChange = true;
     }
-    if (MadeChange) return &GEP;
   }
+  if (MadeChange)
+    return &GEP;
 
   // Check to see if the inputs to the PHI node are getelementptr instructions.
   if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) {
@@ -1364,6 +1360,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     if (!Op1)
       return nullptr;
 
+    // Don't fold a GEP into itself through a PHI node. This can only happen
+    // through the back-edge of a loop. Folding a GEP into itself means that
+    // the value of the previous iteration needs to be stored in the meantime,
+    // thus requiring an additional register variable to be live, but not
+    // actually achieving anything (the GEP still needs to be executed once per
+    // loop iteration).
+    if (Op1 == &GEP)
+      return nullptr;
+
     signed DI = -1;
 
     for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
@@ -1371,6 +1376,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
         return nullptr;
 
+      // As for Op1 above, don't try to fold a GEP into itself.
+      if (Op2 == &GEP)
+        return nullptr;
+
       // Keep track of the type as we walk the GEP.
       Type *CurTy = Op1->getOperand(0)->getType()->getScalarType();
 
@@ -1417,8 +1426,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     if (DI == -1) {
       // All the GEPs feeding the PHI are identical. Clone one down into our
       // BB so that it can be merged with the current GEP.
-      GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(),
-                                            NewGEP);
+      GEP.getParent()->getInstList().insert(
+          GEP.getParent()->getFirstInsertionPt(), NewGEP);
     } else {
       // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
       // into the current block so it can be merged, and create a new PHI to
@@ -1434,8 +1443,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
                            PN->getIncomingBlock(I));
 
       NewGEP->setOperand(DI, NewPN);
-      GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(),
-                                            NewGEP);
+      GEP.getParent()->getInstList().insert(
+          GEP.getParent()->getFirstInsertionPt(), NewGEP);
       NewGEP->setOperand(DI, NewPN);
     }
 
@@ -1486,6 +1495,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         // normalized.
         if (SO1->getType() != GO1->getType())
           return nullptr;
+        // Only do the combine when GO1 and SO1 are both constants. Only in
+        // this case, we are sure the cost after the merge is never more than
+        // that before the merge.
+        if (!isa<Constant>(GO1) || !isa<Constant>(SO1))
+          return nullptr;
         Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
       }
 
@@ -1507,19 +1521,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     }
 
     if (!Indices.empty())
-      return (GEP.isInBounds() && Src->isInBounds()) ?
-        GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices,
-                                          GEP.getName()) :
-        GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
+      return GEP.isInBounds() && Src->isInBounds()
+                 ? GetElementPtrInst::CreateInBounds(
+                       Src->getSourceElementType(), Src->getOperand(0), Indices,
+                       GEP.getName())
+                 : GetElementPtrInst::Create(Src->getSourceElementType(),
+                                             Src->getOperand(0), Indices,
+                                             GEP.getName());
   }
 
-  if (DL && GEP.getNumIndices() == 1) {
+  if (GEP.getNumIndices() == 1) {
     unsigned AS = GEP.getPointerAddressSpace();
     if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
-        DL->getPointerSizeInBits(AS)) {
+        DL.getPointerSizeInBits(AS)) {
       Type *PtrTy = GEP.getPointerOperandType();
       Type *Ty = PtrTy->getPointerElementType();
-      uint64_t TyAllocSize = DL->getTypeAllocSize(Ty);
+      uint64_t TyAllocSize = DL.getTypeAllocSize(Ty);
 
       bool Matched = false;
       uint64_t C;
@@ -1588,8 +1605,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
           // -> GEP i8* X, ...
           SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
-          GetElementPtrInst *Res =
-            GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName());
+          GetElementPtrInst *Res = GetElementPtrInst::Create(
+              StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName());
           Res->setIsInBounds(GEP.isInBounds());
           if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
             return Res;
@@ -1613,6 +1630,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // is a leading zero) we can fold the cast into this GEP.
             if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
               GEP.setOperand(0, StrippedPtr);
+              GEP.setSourceElementType(XATy);
               return &GEP;
             }
             // Cannot replace the base pointer directly because StrippedPtr's
@@ -1625,9 +1643,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // %0 = GEP [10 x i8] addrspace(1)* X, ...
             // addrspacecast i8 addrspace(1)* %0 to i8*
             SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
-            Value *NewGEP = GEP.isInBounds() ?
-              Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
-              Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+            Value *NewGEP = GEP.isInBounds()
+                                ? Builder->CreateInBoundsGEP(
+                                      nullptr, StrippedPtr, Idx, GEP.getName())
+                                : Builder->CreateGEP(nullptr, StrippedPtr, Idx,
+                                                     GEP.getName());
             return new AddrSpaceCastInst(NewGEP, GEP.getType());
           }
         }
@@ -1638,14 +1658,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
       Type *SrcElTy = StrippedPtrTy->getElementType();
       Type *ResElTy = PtrOp->getType()->getPointerElementType();
-      if (DL && SrcElTy->isArrayTy() &&
-          DL->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
-          DL->getTypeAllocSize(ResElTy)) {
-        Type *IdxType = DL->getIntPtrType(GEP.getType());
+      if (SrcElTy->isArrayTy() &&
+          DL.getTypeAllocSize(SrcElTy->getArrayElementType()) ==
+              DL.getTypeAllocSize(ResElTy)) {
+        Type *IdxType = DL.getIntPtrType(GEP.getType());
         Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
-        Value *NewGEP = GEP.isInBounds() ?
-          Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
-          Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+        Value *NewGEP =
+            GEP.isInBounds()
+                ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
+                                             GEP.getName())
+                : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());
 
         // V and GEP are both pointer types --> BitCast
         return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1656,11 +1678,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // %V = mul i64 %N, 4
       // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
       // into:  %t1 = getelementptr i32* %arr, i32 %N; bitcast
-      if (DL && ResElTy->isSized() && SrcElTy->isSized()) {
+      if (ResElTy->isSized() && SrcElTy->isSized()) {
         // Check that changing the type amounts to dividing the index by a scale
         // factor.
-        uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
-        uint64_t SrcSize = DL->getTypeAllocSize(SrcElTy);
+        uint64_t ResSize = DL.getTypeAllocSize(ResElTy);
+        uint64_t SrcSize = DL.getTypeAllocSize(SrcElTy);
         if (ResSize && SrcSize % ResSize == 0) {
           Value *Idx = GEP.getOperand(1);
           unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1668,7 +1690,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
           // Earlier transforms ensure that the index has type IntPtrType, which
           // considerably simplifies the logic by eliminating implicit casts.
-          assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
+          assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
                  "Index not cast to pointer width?");
 
           bool NSW;
@@ -1676,9 +1698,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
             // If the multiplication NewIdx * Scale may overflow then the new
             // GEP may not be "inbounds".
-            Value *NewGEP = GEP.isInBounds() && NSW ?
-              Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) :
-              Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName());
+            Value *NewGEP =
+                GEP.isInBounds() && NSW
+                    ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,
+                                                 GEP.getName())
+                    : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx,
+                                         GEP.getName());
 
             // The NewGEP must be pointer typed, so must the old one -> BitCast
             return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1691,13 +1716,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
       //   (where tmp = 8*tmp2) into:
       // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
-      if (DL && ResElTy->isSized() && SrcElTy->isSized() &&
-          SrcElTy->isArrayTy()) {
+      if (ResElTy->isSized() && SrcElTy->isSized() && SrcElTy->isArrayTy()) {
         // Check that changing to the array element type amounts to dividing the
         // index by a scale factor.
-        uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
-        uint64_t ArrayEltSize
-          = DL->getTypeAllocSize(SrcElTy->getArrayElementType());
+        uint64_t ResSize = DL.getTypeAllocSize(ResElTy);
+        uint64_t ArrayEltSize =
+            DL.getTypeAllocSize(SrcElTy->getArrayElementType());
         if (ResSize && ArrayEltSize % ResSize == 0) {
           Value *Idx = GEP.getOperand(1);
           unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1705,7 +1729,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
           // Earlier transforms ensure that the index has type IntPtrType, which
           // considerably simplifies the logic by eliminating implicit casts.
-          assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
+          assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
                  "Index not cast to pointer width?");
 
           bool NSW;
@@ -1714,13 +1738,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // If the multiplication NewIdx * Scale may overflow then the new
             // GEP may not be "inbounds".
             Value *Off[2] = {
-              Constant::getNullValue(DL->getIntPtrType(GEP.getType())),
-              NewIdx
-            };
-
-            Value *NewGEP = GEP.isInBounds() && NSW ?
-              Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
-              Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
+                Constant::getNullValue(DL.getIntPtrType(GEP.getType())),
+                NewIdx};
+
+            Value *NewGEP = GEP.isInBounds() && NSW
+                                ? Builder->CreateInBoundsGEP(
+                                      SrcElTy, StrippedPtr, Off, GEP.getName())
+                                : Builder->CreateGEP(SrcElTy, StrippedPtr, Off,
+                                                     GEP.getName());
             // The NewGEP must be pointer typed, so must the old one -> BitCast
             return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
                                                                  GEP.getType());
@@ -1730,9 +1755,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     }
   }
 
-  if (!DL)
-    return nullptr;
-
   // addrspacecast between types is canonicalized as a bitcast, then an
   // addrspacecast. To take advantage of the below bitcast + struct GEP, look
   // through the addrspacecast.
@@ -1753,10 +1775,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
     Value *Operand = BCI->getOperand(0);
     PointerType *OpType = cast<PointerType>(Operand->getType());
-    unsigned OffsetBits = DL->getPointerTypeSizeInBits(GEP.getType());
+    unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType());
     APInt Offset(OffsetBits, 0);
     if (!isa<BitCastInst>(Operand) &&
-        GEP.accumulateConstantOffset(*DL, Offset)) {
+        GEP.accumulateConstantOffset(DL, Offset)) {
 
       // If this GEP instruction doesn't move the pointer, just replace the GEP
       // with a bitcast of the real input to the dest type.
@@ -1785,9 +1807,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // GEP.
       SmallVector<Value*, 8> NewIndices;
       if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
-        Value *NGEP = GEP.isInBounds() ?
-          Builder->CreateInBoundsGEP(Operand, NewIndices) :
-          Builder->CreateGEP(Operand, NewIndices);
+        Value *NGEP =
+            GEP.isInBounds()
+                ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices)
+                : Builder->CreateGEP(nullptr, Operand, NewIndices);
 
         if (NGEP->getType() == GEP.getType())
           return ReplaceInstUsesWith(GEP, NGEP);
@@ -2038,6 +2061,15 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
     return &BI;
   }
 
+  // If the condition is irrelevant, remove the use so that other
+  // transforms on the condition become more effective.
+  if (BI.isConditional() &&
+      BI.getSuccessor(0) == BI.getSuccessor(1) &&
+      !isa<UndefValue>(BI.getCondition())) {
+    BI.setCondition(UndefValue::get(BI.getCondition()->getType()));
+    return &BI;
+  }
+
   // Canonicalize fcmp_one -> fcmp_oeq
   FCmpInst::Predicate FPred; Value *Y;
   if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
@@ -2077,7 +2109,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
   Value *Cond = SI.getCondition();
   unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth();
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-  computeKnownBits(Cond, KnownZero, KnownOne);
+  computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI);
   unsigned LeadingKnownZeros = KnownZero.countLeadingOnes();
   unsigned LeadingKnownOnes = KnownOne.countLeadingOnes();
 
@@ -2096,8 +2128,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
   // x86 generates redundant zero-extenstion instructions if the operand is
   // truncated to i8 or i16.
   bool TruncCond = false;
-  if (DL && BitWidth > NewWidth &&
-      NewWidth >= DL->getLargestLegalIntTypeSize()) {
+  if (NewWidth > 0 && BitWidth > NewWidth &&
+      NewWidth >= DL.getLargestLegalIntTypeSize()) {
     TruncCond = true;
     IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
     Builder->SetInsertPoint(&SI);
@@ -2270,7 +2302,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       // We need to insert these at the location of the old load, not at that of
       // the extractvalue.
       Builder->SetInsertPoint(L->getParent(), L);
-      Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices);
+      Value *GEP = Builder->CreateInBoundsGEP(L->getType(),
+                                              L->getPointerOperand(), Indices);
       // Returning the load directly will cause the main loop to insert it in
       // the wrong spot, so use ReplaceInstUsesWith().
       return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
@@ -2286,41 +2319,27 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
   return nullptr;
 }
 
-enum Personality_Type {
-  Unknown_Personality,
-  GNU_Ada_Personality,
-  GNU_CXX_Personality,
-  GNU_ObjC_Personality
-};
-
-/// RecognizePersonality - See if the given exception handling personality
-/// function is one that we understand.  If so, return a description of it;
-/// otherwise return Unknown_Personality.
-static Personality_Type RecognizePersonality(Value *Pers) {
-  Function *F = dyn_cast<Function>(Pers->stripPointerCasts());
-  if (!F)
-    return Unknown_Personality;
-  return StringSwitch<Personality_Type>(F->getName())
-    .Case("__gnat_eh_personality", GNU_Ada_Personality)
-    .Case("__gxx_personality_v0",  GNU_CXX_Personality)
-    .Case("__objc_personality_v0", GNU_ObjC_Personality)
-    .Default(Unknown_Personality);
-}
-
 /// isCatchAll - Return 'true' if the given typeinfo will match anything.
-static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) {
+static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
   switch (Personality) {
-  case Unknown_Personality:
+  case EHPersonality::GNU_C:
+    // The GCC C EH personality only exists to support cleanups, so it's not
+    // clear what the semantics of catch clauses are.
     return false;
-  case GNU_Ada_Personality:
+  case EHPersonality::Unknown:
+    return false;
+  case EHPersonality::GNU_Ada:
     // While __gnat_all_others_value will match any Ada exception, it doesn't
     // match foreign exceptions (or didn't, before gcc-4.7).
     return false;
-  case GNU_CXX_Personality:
-  case GNU_ObjC_Personality:
+  case EHPersonality::GNU_CXX:
+  case EHPersonality::GNU_ObjC:
+  case EHPersonality::MSVC_X86SEH:
+  case EHPersonality::MSVC_Win64SEH:
+  case EHPersonality::MSVC_CXX:
     return TypeInfo->isNullValue();
   }
-  llvm_unreachable("Unknown personality!");
+  llvm_unreachable("invalid enum");
 }
 
 static bool shorter_filter(const Value *LHS, const Value *RHS) {
@@ -2334,7 +2353,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
   // The logic here should be correct for any real-world personality function.
   // However if that turns out not to be true, the offending logic can always
   // be conditioned on the personality function, like the catch-all logic is.
-  Personality_Type Personality = RecognizePersonality(LI.getPersonalityFn());
+  EHPersonality Personality = classifyEHPersonality(LI.getPersonalityFn());
 
   // Simplify the list of clauses, eg by removing repeated catch clauses
   // (these are often created by inlining).
@@ -2625,9 +2644,6 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
   return nullptr;
 }
 
-
-
-
 /// TryToSinkInstruction - Try to move the specified instruction from its
 /// current block into the beginning of DestBlock, which can only happen if it's
 /// safe to move the instruction past all of the instructions between it and the
@@ -2660,164 +2676,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
   return true;
 }
 
-
-/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
-/// all reachable code to the worklist.
-///
-/// This has a couple of tricks to make the code faster and more powerful.  In
-/// particular, we constant fold and DCE instructions as we go, to avoid adding
-/// them to the worklist (this significantly speeds up instcombine on code where
-/// many instructions are dead or constant).  Additionally, if we find a branch
-/// whose condition is a known constant, we only visit the reachable successors.
-///
-static bool AddReachableCodeToWorklist(BasicBlock *BB,
-                                       SmallPtrSetImpl<BasicBlock*> &Visited,
-                                       InstCombiner &IC,
-                                       const DataLayout *DL,
-                                       const TargetLibraryInfo *TLI) {
-  bool MadeIRChange = false;
-  SmallVector<BasicBlock*, 256> Worklist;
-  Worklist.push_back(BB);
-
-  SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
-  DenseMap<ConstantExpr*, Constant*> FoldedConstants;
-
-  do {
-    BB = Worklist.pop_back_val();
-
-    // We have now visited this block!  If we've already been here, ignore it.
-    if (!Visited.insert(BB).second)
-      continue;
-
-    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
-      Instruction *Inst = BBI++;
-
-      // DCE instruction if trivially dead.
-      if (isInstructionTriviallyDead(Inst, TLI)) {
-        ++NumDeadInst;
-        DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
-        Inst->eraseFromParent();
-        continue;
-      }
-
-      // ConstantProp instruction if trivially constant.
-      if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
-        if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
-          DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
-                       << *Inst << '\n');
-          Inst->replaceAllUsesWith(C);
-          ++NumConstProp;
-          Inst->eraseFromParent();
-          continue;
-        }
-
-      if (DL) {
-        // See if we can constant fold its operands.
-        for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
-             i != e; ++i) {
-          ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
-          if (CE == nullptr) continue;
-
-          Constant*& FoldRes = FoldedConstants[CE];
-          if (!FoldRes)
-            FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
-          if (!FoldRes)
-            FoldRes = CE;
-
-          if (FoldRes != CE) {
-            *i = FoldRes;
-            MadeIRChange = true;
-          }
-        }
-      }
-
-      InstrsForInstCombineWorklist.push_back(Inst);
-    }
-
-    // Recursively visit successors.  If this is a branch or switch on a
-    // constant, only visit the reachable successor.
-    TerminatorInst *TI = BB->getTerminator();
-    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-      if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
-        bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
-        BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
-        Worklist.push_back(ReachableBB);
-        continue;
-      }
-    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
-      if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
-        // See if this is an explicit destination.
-        for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
-             i != e; ++i)
-          if (i.getCaseValue() == Cond) {
-            BasicBlock *ReachableBB = i.getCaseSuccessor();
-            Worklist.push_back(ReachableBB);
-            continue;
-          }
-
-        // Otherwise it is the default destination.
-        Worklist.push_back(SI->getDefaultDest());
-        continue;
-      }
-    }
-
-    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-      Worklist.push_back(TI->getSuccessor(i));
-  } while (!Worklist.empty());
-
-  // Once we've found all of the instructions to add to instcombine's worklist,
-  // add them in reverse order.  This way instcombine will visit from the top
-  // of the function down.  This jives well with the way that it adds all uses
-  // of instructions to the worklist after doing a transformation, thus avoiding
-  // some N^2 behavior in pathological cases.
-  IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
-                              InstrsForInstCombineWorklist.size());
-
-  return MadeIRChange;
-}
-
-bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
-  MadeIRChange = false;
-
-  DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
-               << F.getName() << "\n");
-
-  {
-    // Do a depth-first traversal of the function, populate the worklist with
-    // the reachable instructions.  Ignore blocks that are not reachable.  Keep
-    // track of which blocks we visit.
-    SmallPtrSet<BasicBlock*, 64> Visited;
-    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, DL,
-                                               TLI);
-
-    // Do a quick scan over the function.  If we find any blocks that are
-    // unreachable, remove any instructions inside of them.  This prevents
-    // the instcombine code from having to deal with some bad special cases.
-    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-      if (Visited.count(BB)) continue;
-
-      // Delete the instructions backwards, as it has a reduced likelihood of
-      // having to update as many def-use and use-def chains.
-      Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
-      while (EndInst != BB->begin()) {
-        // Delete the next to last instruction.
-        BasicBlock::iterator I = EndInst;
-        Instruction *Inst = --I;
-        if (!Inst->use_empty())
-          Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
-        if (isa<LandingPadInst>(Inst)) {
-          EndInst = Inst;
-          continue;
-        }
-        if (!isa<DbgInfoIntrinsic>(Inst)) {
-          ++NumDeadInst;
-          MadeIRChange = true;
-        }
-        Inst->eraseFromParent();
-      }
-    }
-  }
-
+bool InstCombiner::run() {
   while (!Worklist.isEmpty()) {
     Instruction *I = Worklist.RemoveOne();
     if (I == nullptr) continue;  // skip null values.
@@ -2832,7 +2691,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
     }
 
     // Instruction isn't dead, see if we can constant propagate it.
-    if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+    if (!I->use_empty() && isa<Constant>(I->getOperand(0))) {
       if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
         DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
 
@@ -2843,6 +2702,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
         MadeIRChange = true;
         continue;
       }
+    }
 
     // See if we can trivially sink this instruction to a successor basic block.
     if (I->hasOneUse()) {
@@ -2900,7 +2760,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
         DEBUG(dbgs() << "IC: Old = " << *I << '\n'
                      << "    New = " << *Result << '\n');
 
-        if (!I->getDebugLoc().isUnknown())
+        if (I->getDebugLoc())
           Result->setDebugLoc(I->getDebugLoc());
         // Everything uses the new instruction now.
         I->replaceAllUsesWith(Result);
@@ -2947,63 +2807,287 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
   return MadeIRChange;
 }
 
-namespace {
-class InstCombinerLibCallSimplifier final : public LibCallSimplifier {
-  InstCombiner *IC;
-public:
-  InstCombinerLibCallSimplifier(const DataLayout *DL,
-                                const TargetLibraryInfo *TLI,
-                                InstCombiner *IC)
-    : LibCallSimplifier(DL, TLI) {
-    this->IC = IC;
-  }
+/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
+/// all reachable code to the worklist.
+///
+/// This has a couple of tricks to make the code faster and more powerful.  In
+/// particular, we constant fold and DCE instructions as we go, to avoid adding
+/// them to the worklist (this significantly speeds up instcombine on code where
+/// many instructions are dead or constant).  Additionally, if we find a branch
+/// whose condition is a known constant, we only visit the reachable successors.
+///
+static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
+                                       SmallPtrSetImpl<BasicBlock *> &Visited,
+                                       InstCombineWorklist &ICWorklist,
+                                       const TargetLibraryInfo *TLI) {
+  bool MadeIRChange = false;
+  SmallVector<BasicBlock*, 256> Worklist;
+  Worklist.push_back(BB);
 
-  /// replaceAllUsesWith - override so that instruction replacement
-  /// can be defined in terms of the instruction combiner framework.
-  void replaceAllUsesWith(Instruction *I, Value *With) const override {
-    IC->ReplaceInstUsesWith(*I, With);
-  }
-};
+  SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
+  DenseMap<ConstantExpr*, Constant*> FoldedConstants;
+
+  do {
+    BB = Worklist.pop_back_val();
+
+    // We have now visited this block!  If we've already been here, ignore it.
+    if (!Visited.insert(BB).second)
+      continue;
+
+    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+      Instruction *Inst = BBI++;
+
+      // DCE instruction if trivially dead.
+      if (isInstructionTriviallyDead(Inst, TLI)) {
+        ++NumDeadInst;
+        DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
+        Inst->eraseFromParent();
+        continue;
+      }
+
+      // ConstantProp instruction if trivially constant.
+      if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
+        if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
+          DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
+                       << *Inst << '\n');
+          Inst->replaceAllUsesWith(C);
+          ++NumConstProp;
+          Inst->eraseFromParent();
+          continue;
+        }
+
+      // See if we can constant fold its operands.
+      for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e;
+           ++i) {
+        ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+        if (CE == nullptr)
+          continue;
+
+        Constant *&FoldRes = FoldedConstants[CE];
+        if (!FoldRes)
+          FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
+        if (!FoldRes)
+          FoldRes = CE;
+
+        if (FoldRes != CE) {
+          *i = FoldRes;
+          MadeIRChange = true;
+        }
+      }
+
+      InstrsForInstCombineWorklist.push_back(Inst);
+    }
+
+    // Recursively visit successors.  If this is a branch or switch on a
+    // constant, only visit the reachable successor.
+    TerminatorInst *TI = BB->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
+        bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
+        BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
+        Worklist.push_back(ReachableBB);
+        continue;
+      }
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
+        // See if this is an explicit destination.
+        for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+             i != e; ++i)
+          if (i.getCaseValue() == Cond) {
+            BasicBlock *ReachableBB = i.getCaseSuccessor();
+            Worklist.push_back(ReachableBB);
+            continue;
+          }
+
+        // Otherwise it is the default destination.
+        Worklist.push_back(SI->getDefaultDest());
+        continue;
+      }
+    }
+
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      Worklist.push_back(TI->getSuccessor(i));
+  } while (!Worklist.empty());
+
+  // Once we've found all of the instructions to add to instcombine's worklist,
+  // add them in reverse order.  This way instcombine will visit from the top
+  // of the function down.  This jives well with the way that it adds all uses
+  // of instructions to the worklist after doing a transformation, thus avoiding
+  // some N^2 behavior in pathological cases.
+  ICWorklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
+                             InstrsForInstCombineWorklist.size());
+
+  return MadeIRChange;
 }
 
-bool InstCombiner::runOnFunction(Function &F) {
-  if (skipOptnoneFunction(F))
-    return false;
+/// \brief Populate the IC worklist from a function, and prune any dead basic
+/// blocks discovered in the process.
+///
+/// This also does basic constant propagation and other forward fixing to make
+/// the combiner itself run much faster.
+static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
+                                          TargetLibraryInfo *TLI,
+                                          InstCombineWorklist &ICWorklist) {
+  bool MadeIRChange = false;
+
+  // Do a depth-first traversal of the function, populate the worklist with
+  // the reachable instructions.  Ignore blocks that are not reachable.  Keep
+  // track of which blocks we visit.
+  SmallPtrSet<BasicBlock *, 64> Visited;
+  MadeIRChange |=
+      AddReachableCodeToWorklist(F.begin(), DL, Visited, ICWorklist, TLI);
+
+  // Do a quick scan over the function.  If we find any blocks that are
+  // unreachable, remove any instructions inside of them.  This prevents
+  // the instcombine code from having to deal with some bad special cases.
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (Visited.count(BB))
+      continue;
+
+    // Delete the instructions backwards, as it has a reduced likelihood of
+    // having to update as many def-use and use-def chains.
+    Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+    while (EndInst != BB->begin()) {
+      // Delete the next to last instruction.
+      BasicBlock::iterator I = EndInst;
+      Instruction *Inst = --I;
+      if (!Inst->use_empty())
+        Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+      if (isa<LandingPadInst>(Inst)) {
+        EndInst = Inst;
+        continue;
+      }
+      if (!isa<DbgInfoIntrinsic>(Inst)) {
+        ++NumDeadInst;
+        MadeIRChange = true;
+      }
+      Inst->eraseFromParent();
+    }
+  }
 
-  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  TLI = &getAnalysis<TargetLibraryInfo>();
+  return MadeIRChange;
+}
 
+static bool
+combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
+                                AssumptionCache &AC, TargetLibraryInfo &TLI,
+                                DominatorTree &DT, LoopInfo *LI = nullptr) {
   // Minimizing size?
-  MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                                Attribute::MinSize);
+  bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize);
+  auto &DL = F.getParent()->getDataLayout();
 
   /// Builder - This is an IRBuilder that automatically inserts new
   /// instructions into the worklist when they are created.
-  IRBuilder<true, TargetFolder, InstCombineIRInserter> TheBuilder(
-      F.getContext(), TargetFolder(DL), InstCombineIRInserter(Worklist, AC));
-  Builder = &TheBuilder;
-
-  InstCombinerLibCallSimplifier TheSimplifier(DL, TLI, this);
-  Simplifier = &TheSimplifier;
-
-  bool EverMadeChange = false;
+  IRBuilder<true, TargetFolder, InstCombineIRInserter> Builder(
+      F.getContext(), TargetFolder(DL), InstCombineIRInserter(Worklist, &AC));
 
   // Lower dbg.declare intrinsics otherwise their value may be clobbered
   // by instcombiner.
-  EverMadeChange = LowerDbgDeclare(F);
+  bool DbgDeclaresChanged = LowerDbgDeclare(F);
 
   // Iterate while there is work to do.
-  unsigned Iteration = 0;
-  while (DoOneIteration(F, Iteration++))
-    EverMadeChange = true;
+  int Iteration = 0;
+  for (;;) {
+    ++Iteration;
+    DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+                 << F.getName() << "\n");
+
+    bool Changed = false;
+    if (prepareICWorklistFromFunction(F, DL, &TLI, Worklist))
+      Changed = true;
+
+    InstCombiner IC(Worklist, &Builder, MinimizeSize, &AC, &TLI, &DT, DL, LI);
+    if (IC.run())
+      Changed = true;
+
+    if (!Changed)
+      break;
+  }
+
+  return DbgDeclaresChanged || Iteration > 1;
+}
+
+PreservedAnalyses InstCombinePass::run(Function &F,
+                                       AnalysisManager<Function> *AM) {
+  auto &AC = AM->getResult<AssumptionAnalysis>(F);
+  auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
+  auto &TLI = AM->getResult<TargetLibraryAnalysis>(F);
+
+  auto *LI = AM->getCachedResult<LoopAnalysis>(F);
+
+  if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI))
+    // No changes, all analyses are preserved.
+    return PreservedAnalyses::all();
+
+  // Mark all the analyses that instcombine updates as preserved.
+  // FIXME: Need a way to preserve CFG analyses here!
+  PreservedAnalyses PA;
+  PA.preserve<DominatorTreeAnalysis>();
+  return PA;
+}
+
+namespace {
+/// \brief The legacy pass manager's instcombine pass.
+///
+/// This is a basic whole-function wrapper around the instcombine utility. It
+/// will try to combine all instructions in the function.
+class InstructionCombiningPass : public FunctionPass {
+  InstCombineWorklist Worklist;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  InstructionCombiningPass() : FunctionPass(ID) {
+    initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnFunction(Function &F) override;
+};
+}
+
+void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AssumptionCacheTracker>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+bool InstructionCombiningPass::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
+  // Required analyses.
+  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+  // Optional analyses.
+  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+  auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
 
-  Builder = nullptr;
-  return EverMadeChange;
+  return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI);
+}
+
+char InstructionCombiningPass::ID = 0;
+INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
+                      "Combine redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
+                    "Combine redundant instructions", false, false)
+
+// Initialization Routines
+void llvm::initializeInstCombine(PassRegistry &Registry) {
+  initializeInstructionCombiningPassPass(Registry);
+}
+
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
+  initializeInstructionCombiningPassPass(*unwrap(R));
 }
 
 FunctionPass *llvm::createInstructionCombiningPass() {
-  return new InstCombiner();
+  return new InstructionCombiningPass();
 }
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 25f1f02..939e04b 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -24,6 +24,9 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
@@ -43,12 +46,14 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include <algorithm>
 #include <string>
 #include <system_error>
@@ -64,22 +69,21 @@ static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
 static const uint64_t kSmallX86_64ShadowOffset = 0x7FFF8000;  // < 2G.
 static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
 static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;
-static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 36;
+static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37;
+static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36;
 static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
 static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
 static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
 
-static const size_t kMinStackMallocSize = 1 << 6;  // 64B
+static const size_t kMinStackMallocSize = 1 << 6;   // 64B
 static const size_t kMaxStackMallocSize = 1 << 16;  // 64K
 static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
 static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
 
 static const char *const kAsanModuleCtorName = "asan.module_ctor";
 static const char *const kAsanModuleDtorName = "asan.module_dtor";
-static const uint64_t    kAsanCtorAndDtorPriority = 1;
+static const uint64_t kAsanCtorAndDtorPriority = 1;
 static const char *const kAsanReportErrorTemplate = "__asan_report_";
-static const char *const kAsanReportLoadN = "__asan_report_load_n";
-static const char *const kAsanReportStoreN = "__asan_report_store_n";
 static const char *const kAsanRegisterGlobalsName = "__asan_register_globals";
 static const char *const kAsanUnregisterGlobalsName =
     "__asan_unregister_globals";
@@ -89,7 +93,7 @@ static const char *const kAsanInitName = "__asan_init_v5";
 static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
 static const char *const kAsanPtrSub = "__sanitizer_ptr_sub";
 static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
-static const int         kMaxAsanStackMallocSizeClass = 10;
+static const int kMaxAsanStackMallocSizeClass = 10;
 static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_";
 static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_";
 static const char *const kAsanGenPrefix = "__asan_gen_";
@@ -102,10 +106,6 @@ static const char *const kAsanUnpoisonStackMemoryName =
 static const char *const kAsanOptionDetectUAR =
     "__asan_option_detect_stack_use_after_return";
 
-#ifndef NDEBUG
-static const int kAsanStackAfterReturnMagic = 0xf5;
-#endif
-
 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
 static const size_t kNumberOfAccessSizes = 5;
 
@@ -119,84 +119,110 @@ static const unsigned kAsanAllocaPartialVal2 = 0x000000cbU;
 
 // This flag may need to be replaced with -f[no-]asan-reads.
 static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
-       cl::desc("instrument read instructions"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
-       cl::desc("instrument write instructions"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics",
-       cl::desc("instrument atomic instructions (rmw, cmpxchg)"),
-       cl::Hidden, cl::init(true));
-static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path",
-       cl::desc("use instrumentation with slow path for all accesses"),
-       cl::Hidden, cl::init(false));
+                                       cl::desc("instrument read instructions"),
+                                       cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInstrumentWrites(
+    "asan-instrument-writes", cl::desc("instrument write instructions"),
+    cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInstrumentAtomics(
+    "asan-instrument-atomics",
+    cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
+    cl::init(true));
+static cl::opt<bool> ClAlwaysSlowPath(
+    "asan-always-slow-path",
+    cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden,
+    cl::init(false));
 // This flag limits the number of instructions to be instrumented
 // in any given BB. Normally, this should be set to unlimited (INT_MAX),
 // but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary
 // set it to 10000.
-static cl::opt<int> ClMaxInsnsToInstrumentPerBB("asan-max-ins-per-bb",
-       cl::init(10000),
-       cl::desc("maximal number of instructions to instrument in any given BB"),
-       cl::Hidden);
+static cl::opt<int> ClMaxInsnsToInstrumentPerBB(
+    "asan-max-ins-per-bb", cl::init(10000),
+    cl::desc("maximal number of instructions to instrument in any given BB"),
+    cl::Hidden);
 // This flag may need to be replaced with -f[no]asan-stack.
-static cl::opt<bool> ClStack("asan-stack",
-       cl::desc("Handle stack memory"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClStack("asan-stack", cl::desc("Handle stack memory"),
+                             cl::Hidden, cl::init(true));
 static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
-       cl::desc("Check return-after-free"), cl::Hidden, cl::init(true));
+                                      cl::desc("Check return-after-free"),
+                                      cl::Hidden, cl::init(true));
 // This flag may need to be replaced with -f[no]asan-globals.
 static cl::opt<bool> ClGlobals("asan-globals",
-       cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+                               cl::desc("Handle global objects"), cl::Hidden,
+                               cl::init(true));
 static cl::opt<bool> ClInitializers("asan-initialization-order",
-       cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClInvalidPointerPairs("asan-detect-invalid-pointer-pair",
-       cl::desc("Instrument <, <=, >, >=, - with pointer operands"),
-       cl::Hidden, cl::init(false));
-static cl::opt<unsigned> ClRealignStack("asan-realign-stack",
-       cl::desc("Realign stack to the value of this flag (power of two)"),
-       cl::Hidden, cl::init(32));
+                                    cl::desc("Handle C++ initializer order"),
+                                    cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInvalidPointerPairs(
+    "asan-detect-invalid-pointer-pair",
+    cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden,
+    cl::init(false));
+static cl::opt<unsigned> ClRealignStack(
+    "asan-realign-stack",
+    cl::desc("Realign stack to the value of this flag (power of two)"),
+    cl::Hidden, cl::init(32));
 static cl::opt<int> ClInstrumentationWithCallsThreshold(
     "asan-instrumentation-with-call-threshold",
-       cl::desc("If the function being instrumented contains more than "
-                "this number of memory accesses, use callbacks instead of "
-                "inline checks (-1 means never use callbacks)."),
-       cl::Hidden, cl::init(7000));
+    cl::desc(
+        "If the function being instrumented contains more than "
+        "this number of memory accesses, use callbacks instead of "
+        "inline checks (-1 means never use callbacks)."),
+    cl::Hidden, cl::init(7000));
 static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
-       "asan-memory-access-callback-prefix",
-       cl::desc("Prefix for memory access callbacks"), cl::Hidden,
-       cl::init("__asan_"));
+    "asan-memory-access-callback-prefix",
+    cl::desc("Prefix for memory access callbacks"), cl::Hidden,
+    cl::init("__asan_"));
 static cl::opt<bool> ClInstrumentAllocas("asan-instrument-allocas",
-       cl::desc("instrument dynamic allocas"), cl::Hidden, cl::init(false));
+                                         cl::desc("instrument dynamic allocas"),
+                                         cl::Hidden, cl::init(false));
+static cl::opt<bool> ClSkipPromotableAllocas(
+    "asan-skip-promotable-allocas",
+    cl::desc("Do not instrument promotable allocas"), cl::Hidden,
+    cl::init(true));
 
 // These flags allow to change the shadow mapping.
 // The shadow mapping looks like
 //    Shadow = (Mem >> scale) + (1 << offset_log)
 static cl::opt<int> ClMappingScale("asan-mapping-scale",
-       cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0));
+                                   cl::desc("scale of asan shadow mapping"),
+                                   cl::Hidden, cl::init(0));
 
 // Optimization flags. Not user visible, used mostly for testing
 // and benchmarking the tool.
-static cl::opt<bool> ClOpt("asan-opt",
-       cl::desc("Optimize instrumentation"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp",
-       cl::desc("Instrument the same temp just once"), cl::Hidden,
-       cl::init(true));
+static cl::opt<bool> ClOpt("asan-opt", cl::desc("Optimize instrumentation"),
+                           cl::Hidden, cl::init(true));
+static cl::opt<bool> ClOptSameTemp(
+    "asan-opt-same-temp", cl::desc("Instrument the same temp just once"),
+    cl::Hidden, cl::init(true));
 static cl::opt<bool> ClOptGlobals("asan-opt-globals",
-       cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true));
-
-static cl::opt<bool> ClCheckLifetime("asan-check-lifetime",
-       cl::desc("Use llvm.lifetime intrinsics to insert extra checks"),
-       cl::Hidden, cl::init(false));
+                                  cl::desc("Don't instrument scalar globals"),
+                                  cl::Hidden, cl::init(true));
+static cl::opt<bool> ClOptStack(
+    "asan-opt-stack", cl::desc("Don't instrument scalar stack variables"),
+    cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClCheckLifetime(
+    "asan-check-lifetime",
+    cl::desc("Use llvm.lifetime intrinsics to insert extra checks"), cl::Hidden,
+    cl::init(false));
 
 static cl::opt<bool> ClDynamicAllocaStack(
     "asan-stack-dynamic-alloca",
     cl::desc("Use dynamic alloca to represent stack variables"), cl::Hidden,
-    cl::init(false));
+    cl::init(true));
+
+static cl::opt<uint32_t> ClForceExperiment(
+    "asan-force-experiment",
+    cl::desc("Force optimization experiment (for testing)"), cl::Hidden,
+    cl::init(0));
 
 // Debug flags.
 static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
                             cl::init(0));
 static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"),
                                  cl::Hidden, cl::init(0));
-static cl::opt<std::string> ClDebugFunc("asan-debug-func",
-                                        cl::Hidden, cl::desc("Debug func"));
+static cl::opt<std::string> ClDebugFunc("asan-debug-func", cl::Hidden,
+                                        cl::desc("Debug func"));
 static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
                                cl::Hidden, cl::init(-1));
 static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
@@ -206,10 +232,10 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
 STATISTIC(NumInstrumentedDynamicAllocas,
           "Number of instrumented dynamic allocas");
-STATISTIC(NumOptimizedAccessesToGlobalArray,
-          "Number of optimized accesses to global arrays");
 STATISTIC(NumOptimizedAccessesToGlobalVar,
           "Number of optimized accesses to global vars");
+STATISTIC(NumOptimizedAccessesToStackVar,
+          "Number of optimized accesses to stack vars");
 
 namespace {
 /// Frontend-provided metadata for source location.
@@ -224,8 +250,8 @@ struct LocationMetadata {
 
   void parse(MDNode *MDN) {
     assert(MDN->getNumOperands() == 3);
-    MDString *MDFilename = cast<MDString>(MDN->getOperand(0));
-    Filename = MDFilename->getString();
+    MDString *DIFilename = cast<MDString>(MDN->getOperand(0));
+    Filename = DIFilename->getString();
     LineNo =
         mdconst::extract<ConstantInt>(MDN->getOperand(1))->getLimitedValue();
     ColumnNo =
@@ -237,9 +263,7 @@ struct LocationMetadata {
 class GlobalsMetadata {
  public:
   struct Entry {
-    Entry()
-        : SourceLoc(), Name(), IsDynInit(false),
-          IsBlacklisted(false) {}
+    Entry() : SourceLoc(), Name(), IsDynInit(false), IsBlacklisted(false) {}
     LocationMetadata SourceLoc;
     StringRef Name;
     bool IsDynInit;
@@ -248,19 +272,17 @@ class GlobalsMetadata {
 
   GlobalsMetadata() : inited_(false) {}
 
-  void init(Module& M) {
+  void init(Module &M) {
     assert(!inited_);
     inited_ = true;
     NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
-    if (!Globals)
-      return;
+    if (!Globals) return;
     for (auto MDN : Globals->operands()) {
       // Metadata node contains the global and the fields of "Entry".
       assert(MDN->getNumOperands() == 5);
       auto *GV = mdconst::extract_or_null<GlobalVariable>(MDN->getOperand(0));
       // The optimizer may optimize away a global entirely.
-      if (!GV)
-        continue;
+      if (!GV) continue;
       // We can already have an entry for GV if it was merged with another
       // global.
       Entry &E = Entries[GV];
@@ -285,7 +307,7 @@ class GlobalsMetadata {
 
  private:
   bool inited_;
-  DenseMap<GlobalVariable*, Entry> Entries;
+  DenseMap<GlobalVariable *, Entry> Entries;
 };
 
 /// This struct defines the shadow mapping using the rule:
@@ -308,6 +330,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize) {
                   TargetTriple.getArch() == llvm::Triple::mipsel;
   bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 ||
                   TargetTriple.getArch() == llvm::Triple::mips64el;
+  bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64;
   bool IsWindows = TargetTriple.isOSWindows();
 
   ShadowMapping Mapping;
@@ -334,6 +357,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize) {
       Mapping.Offset = kSmallX86_64ShadowOffset;
     else if (IsMIPS64)
       Mapping.Offset = kMIPS64_ShadowOffset64;
+    else if (IsAArch64)
+      Mapping.Offset = kAArch64_ShadowOffset64;
     else
       Mapping.Offset = kDefaultShadowOffset64;
   }
@@ -367,17 +392,36 @@ struct AddressSanitizer : public FunctionPass {
   }
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  }
+  uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
+    Type *Ty = AI->getAllocatedType();
+    uint64_t SizeInBytes =
+        AI->getModule()->getDataLayout().getTypeAllocSize(Ty);
+    return SizeInBytes;
   }
-  void instrumentMop(Instruction *I, bool UseCalls);
+  /// Check if we want (and can) handle this alloca.
+  bool isInterestingAlloca(AllocaInst &AI);
+  /// If it is an interesting memory access, return the PointerOperand
+  /// and set IsWrite/Alignment. Otherwise return nullptr.
+  Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
+                                   uint64_t *TypeSize,
+                                   unsigned *Alignment);
+  void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I,
+                     bool UseCalls, const DataLayout &DL);
   void instrumentPointerComparisonOrSubtraction(Instruction *I);
   void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
                          Value *Addr, uint32_t TypeSize, bool IsWrite,
-                         Value *SizeArgument, bool UseCalls);
+                         Value *SizeArgument, bool UseCalls, uint32_t Exp);
+  void instrumentUnusualSizeOrAlignment(Instruction *I, Value *Addr,
+                                        uint32_t TypeSize, bool IsWrite,
+                                        Value *SizeArgument, bool UseCalls,
+                                        uint32_t Exp);
   Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
                            Value *ShadowValue, uint32_t TypeSize);
   Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
                                  bool IsWrite, size_t AccessSizeIndex,
-                                 Value *SizeArgument);
+                                 Value *SizeArgument, uint32_t Exp);
   void instrumentMemIntrinsic(MemIntrinsic *MI);
   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
   bool runOnFunction(Function &F) override;
@@ -392,9 +436,10 @@ struct AddressSanitizer : public FunctionPass {
 
   bool LooksLikeCodeInBug11395(Instruction *I);
   bool GlobalIsLinkerInitialized(GlobalVariable *G);
+  bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr,
+                    uint64_t TypeSize) const;
 
   LLVMContext *C;
-  const DataLayout *DL;
   Triple TargetTriple;
   int LongSize;
   Type *IntptrTy;
@@ -404,15 +449,16 @@ struct AddressSanitizer : public FunctionPass {
   Function *AsanInitFunction;
   Function *AsanHandleNoReturnFunc;
   Function *AsanPtrCmpFunction, *AsanPtrSubFunction;
-  // This array is indexed by AccessIsWrite and log2(AccessSize).
-  Function *AsanErrorCallback[2][kNumberOfAccessSizes];
-  Function *AsanMemoryAccessCallback[2][kNumberOfAccessSizes];
-  // This array is indexed by AccessIsWrite.
-  Function *AsanErrorCallbackSized[2],
-           *AsanMemoryAccessCallbackSized[2];
+  // This array is indexed by AccessIsWrite, Experiment and log2(AccessSize).
+  Function *AsanErrorCallback[2][2][kNumberOfAccessSizes];
+  Function *AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes];
+  // This array is indexed by AccessIsWrite and Experiment.
+  Function *AsanErrorCallbackSized[2][2];
+  Function *AsanMemoryAccessCallbackSized[2][2];
   Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
   InlineAsm *EmptyAsm;
   GlobalsMetadata GlobalsMD;
+  DenseMap<AllocaInst *, bool> ProcessedAllocas;
 
   friend struct FunctionStackPoisoner;
 };
@@ -422,9 +468,7 @@ class AddressSanitizerModule : public ModulePass {
   AddressSanitizerModule() : ModulePass(ID) {}
   bool runOnModule(Module &M) override;
   static char ID;  // Pass identification, replacement for typeid
-  const char *getPassName() const override {
-    return "AddressSanitizerModule";
-  }
+  const char *getPassName() const override { return "AddressSanitizerModule"; }
 
  private:
   void initializeCallbacks(Module &M);
@@ -440,7 +484,6 @@ class AddressSanitizerModule : public ModulePass {
   GlobalsMetadata GlobalsMD;
   Type *IntptrTy;
   LLVMContext *C;
-  const DataLayout *DL;
   Triple TargetTriple;
   ShadowMapping Mapping;
   Function *AsanPoisonGlobals;
@@ -467,12 +510,12 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   Type *IntptrPtrTy;
   ShadowMapping Mapping;
 
-  SmallVector<AllocaInst*, 16> AllocaVec;
-  SmallVector<Instruction*, 8> RetVec;
+  SmallVector<AllocaInst *, 16> AllocaVec;
+  SmallVector<Instruction *, 8> RetVec;
   unsigned StackAlignment;
 
   Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
-           *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
+      *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
   Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
 
   // Stores a place and arguments of poisoning/unpoisoning call for alloca.
@@ -493,33 +536,38 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
     Value *LeftRzAddr;
     Value *RightRzAddr;
     bool Poison;
-    explicit DynamicAllocaCall(AllocaInst *AI,
-                      Value *LeftRzAddr = nullptr,
-                      Value *RightRzAddr = nullptr)
-      : AI(AI), LeftRzAddr(LeftRzAddr), RightRzAddr(RightRzAddr), Poison(true)
-    {}
+    explicit DynamicAllocaCall(AllocaInst *AI, Value *LeftRzAddr = nullptr,
+                               Value *RightRzAddr = nullptr)
+        : AI(AI),
+          LeftRzAddr(LeftRzAddr),
+          RightRzAddr(RightRzAddr),
+          Poison(true) {}
   };
   SmallVector<DynamicAllocaCall, 1> DynamicAllocaVec;
 
   // Maps Value to an AllocaInst from which the Value is originated.
-  typedef DenseMap<Value*, AllocaInst*> AllocaForValueMapTy;
+  typedef DenseMap<Value *, AllocaInst *> AllocaForValueMapTy;
   AllocaForValueMapTy AllocaForValue;
 
   bool HasNonEmptyInlineAsm;
   std::unique_ptr<CallInst> EmptyInlineAsm;
 
   FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
-      : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false),
-        C(ASan.C), IntptrTy(ASan.IntptrTy),
-        IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping),
-        StackAlignment(1 << Mapping.Scale), HasNonEmptyInlineAsm(false),
+      : F(F),
+        ASan(ASan),
+        DIB(*F.getParent(), /*AllowUnresolved*/ false),
+        C(ASan.C),
+        IntptrTy(ASan.IntptrTy),
+        IntptrPtrTy(PointerType::get(IntptrTy, 0)),
+        Mapping(ASan.Mapping),
+        StackAlignment(1 << Mapping.Scale),
+        HasNonEmptyInlineAsm(false),
         EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {}
 
   bool runOnFunction() {
     if (!ClStack) return false;
     // Collect alloca, ret, lifetime instructions etc.
-    for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
-      visit(*BB);
+    for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB);
 
     if (AllocaVec.empty() && DynamicAllocaVec.empty()) return false;
 
@@ -540,33 +588,31 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
 
   // ----------------------- Visitors.
   /// \brief Collect all Ret instructions.
-  void visitReturnInst(ReturnInst &RI) {
-    RetVec.push_back(&RI);
-  }
+  void visitReturnInst(ReturnInst &RI) { RetVec.push_back(&RI); }
 
   // Unpoison dynamic allocas redzones.
   void unpoisonDynamicAlloca(DynamicAllocaCall &AllocaCall) {
-    if (!AllocaCall.Poison)
-      return;
+    if (!AllocaCall.Poison) return;
     for (auto Ret : RetVec) {
       IRBuilder<> IRBRet(Ret);
       PointerType *Int32PtrTy = PointerType::getUnqual(IRBRet.getInt32Ty());
       Value *Zero = Constant::getNullValue(IRBRet.getInt32Ty());
       Value *PartialRzAddr = IRBRet.CreateSub(AllocaCall.RightRzAddr,
                                               ConstantInt::get(IntptrTy, 4));
-      IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(AllocaCall.LeftRzAddr,
-                                                     Int32PtrTy));
-      IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(PartialRzAddr,
-                                                     Int32PtrTy));
-      IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(AllocaCall.RightRzAddr,
-                                                     Int32PtrTy));
+      IRBRet.CreateStore(
+          Zero, IRBRet.CreateIntToPtr(AllocaCall.LeftRzAddr, Int32PtrTy));
+      IRBRet.CreateStore(Zero,
+                         IRBRet.CreateIntToPtr(PartialRzAddr, Int32PtrTy));
+      IRBRet.CreateStore(
+          Zero, IRBRet.CreateIntToPtr(AllocaCall.RightRzAddr, Int32PtrTy));
     }
   }
 
   // Right shift for BigEndian and left shift for LittleEndian.
   Value *shiftAllocaMagic(Value *Val, IRBuilder<> &IRB, Value *Shift) {
-    return ASan.DL->isLittleEndian() ? IRB.CreateShl(Val, Shift)
-                                     : IRB.CreateLShr(Val, Shift);
+    auto &DL = F.getParent()->getDataLayout();
+    return DL.isLittleEndian() ? IRB.CreateShl(Val, Shift)
+                               : IRB.CreateLShr(Val, Shift);
   }
 
   // Compute PartialRzMagic for dynamic alloca call. Since we don't know the
@@ -595,7 +641,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
 
   /// \brief Collect Alloca instructions we want (and can) handle.
   void visitAllocaInst(AllocaInst &AI) {
-    if (!isInterestingAlloca(AI)) return;
+    if (!ASan.isInterestingAlloca(AI)) return;
 
     StackAlignment = std::max(StackAlignment, AI.getAlignment());
     if (isDynamicAlloca(AI))
@@ -609,8 +655,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   void visitIntrinsicInst(IntrinsicInst &II) {
     if (!ClCheckLifetime) return;
     Intrinsic::ID ID = II.getIntrinsicID();
-    if (ID != Intrinsic::lifetime_start &&
-        ID != Intrinsic::lifetime_end)
+    if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end)
       return;
     // Found lifetime intrinsic, add ASan instrumentation if necessary.
     ConstantInt *Size = dyn_cast<ConstantInt>(II.getArgOperand(0));
@@ -640,8 +685,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
 
   bool doesDominateAllExits(const Instruction *I) const {
     for (auto Ret : RetVec) {
-      if (!ASan.getDominatorTree().dominates(I, Ret))
-        return false;
+      if (!ASan.getDominatorTree().dominates(I, Ret)) return false;
     }
     return true;
   }
@@ -649,19 +693,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   bool isDynamicAlloca(AllocaInst &AI) const {
     return AI.isArrayAllocation() || !AI.isStaticAlloca();
   }
-
-  // Check if we want (and can) handle this alloca.
-  bool isInterestingAlloca(AllocaInst &AI) const {
-    return (AI.getAllocatedType()->isSized() &&
-            // alloca() may be called with 0 size, ignore it.
-            getAllocaSizeInBytes(&AI) > 0);
-  }
-
-  uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
-    Type *Ty = AI->getAllocatedType();
-    uint64_t SizeInBytes = ASan.DL->getTypeAllocSize(Ty);
-    return SizeInBytes;
-  }
   /// Finds alloca where the value comes from.
   AllocaInst *findAllocaForValue(Value *V);
   void poisonRedZones(ArrayRef<uint8_t> ShadowBytes, IRBuilder<> &IRB,
@@ -679,21 +710,25 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
 }  // namespace
 
 char AddressSanitizer::ID = 0;
-INITIALIZE_PASS_BEGIN(AddressSanitizer, "asan",
-    "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
-    false, false)
+INITIALIZE_PASS_BEGIN(
+    AddressSanitizer, "asan",
+    "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
+    false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(AddressSanitizer, "asan",
-    "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
-    false, false)
+INITIALIZE_PASS_END(
+    AddressSanitizer, "asan",
+    "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
+    false)
 FunctionPass *llvm::createAddressSanitizerFunctionPass() {
   return new AddressSanitizer();
 }
 
 char AddressSanitizerModule::ID = 0;
-INITIALIZE_PASS(AddressSanitizerModule, "asan-module",
+INITIALIZE_PASS(
+    AddressSanitizerModule, "asan-module",
     "AddressSanitizer: detects use-after-free and out-of-bounds bugs."
-    "ModulePass", false, false)
+    "ModulePass",
+    false, false)
 ModulePass *llvm::createAddressSanitizerModulePass() {
   return new AddressSanitizerModule();
 }
@@ -705,16 +740,15 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
 }
 
 // \brief Create a constant for Str so that we can pass it to the run-time lib.
-static GlobalVariable *createPrivateGlobalForString(
-    Module &M, StringRef Str, bool AllowMerging) {
+static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str,
+                                                    bool AllowMerging) {
   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
   // We use private linkage for module-local strings. If they can be merged
   // with another one, we set the unnamed_addr attribute.
   GlobalVariable *GV =
       new GlobalVariable(M, StrConst->getType(), true,
                          GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix);
-  if (AllowMerging)
-    GV->setUnnamedAddr(true);
+  if (AllowMerging) GV->setUnnamedAddr(true);
   GV->setAlignment(1);  // Strings may not be merged w/o setting align 1.
   return GV;
 }
@@ -743,8 +777,7 @@ static bool GlobalWasGeneratedByAsan(GlobalVariable *G) {
 Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
   // Shadow >> scale
   Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
-  if (Mapping.Offset == 0)
-    return Shadow;
+  if (Mapping.Offset == 0) return Shadow;
   // (Shadow >> scale) | offset
   if (Mapping.OrShadowOffset)
     return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset));
@@ -756,53 +789,84 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
 void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
   IRBuilder<> IRB(MI);
   if (isa<MemTransferInst>(MI)) {
-    IRB.CreateCall3(
+    IRB.CreateCall(
         isa<MemMoveInst>(MI) ? AsanMemmove : AsanMemcpy,
-        IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
-        IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
-        IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false));
+        {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
   } else if (isa<MemSetInst>(MI)) {
-    IRB.CreateCall3(
+    IRB.CreateCall(
         AsanMemset,
-        IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
-        IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
-        IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false));
+        {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
   }
   MI->eraseFromParent();
 }
 
-// If I is an interesting memory access, return the PointerOperand
-// and set IsWrite/Alignment. Otherwise return nullptr.
-static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
-                                        unsigned *Alignment) {
+/// Check if we want (and can) handle this alloca.
+bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) {
+  auto PreviouslySeenAllocaInfo = ProcessedAllocas.find(&AI);
+
+  if (PreviouslySeenAllocaInfo != ProcessedAllocas.end())
+    return PreviouslySeenAllocaInfo->getSecond();
+
+  bool IsInteresting = (AI.getAllocatedType()->isSized() &&
+    // alloca() may be called with 0 size, ignore it.
+    getAllocaSizeInBytes(&AI) > 0 &&
+    // We are only interested in allocas not promotable to registers.
+    // Promotable allocas are common under -O0.
+    (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)));
+
+  ProcessedAllocas[&AI] = IsInteresting;
+  return IsInteresting;
+}
+
+/// If I is an interesting memory access, return the PointerOperand
+/// and set IsWrite/Alignment. Otherwise return nullptr.
+Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
+                                                   bool *IsWrite,
+                                                   uint64_t *TypeSize,
+                                                   unsigned *Alignment) {
   // Skip memory accesses inserted by another instrumentation.
-  if (I->getMetadata("nosanitize"))
-    return nullptr;
+  if (I->getMetadata("nosanitize")) return nullptr;
+
+  Value *PtrOperand = nullptr;
+  const DataLayout &DL = I->getModule()->getDataLayout();
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     if (!ClInstrumentReads) return nullptr;
     *IsWrite = false;
+    *TypeSize = DL.getTypeStoreSizeInBits(LI->getType());
     *Alignment = LI->getAlignment();
-    return LI->getPointerOperand();
-  }
-  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    PtrOperand = LI->getPointerOperand();
+  } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
     if (!ClInstrumentWrites) return nullptr;
     *IsWrite = true;
+    *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType());
     *Alignment = SI->getAlignment();
-    return SI->getPointerOperand();
-  }
-  if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+    PtrOperand = SI->getPointerOperand();
+  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
     if (!ClInstrumentAtomics) return nullptr;
     *IsWrite = true;
+    *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType());
     *Alignment = 0;
-    return RMW->getPointerOperand();
-  }
-  if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
+    PtrOperand = RMW->getPointerOperand();
+  } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
     if (!ClInstrumentAtomics) return nullptr;
     *IsWrite = true;
+    *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType());
     *Alignment = 0;
-    return XCHG->getPointerOperand();
+    PtrOperand = XCHG->getPointerOperand();
   }
-  return nullptr;
+
+  // Treat memory accesses to promotable allocas as non-interesting since they
+  // will not cause memory violations. This greatly speeds up the instrumented
+  // executable at -O0.
+  if (ClSkipPromotableAllocas)
+    if (auto AI = dyn_cast_or_null<AllocaInst>(PtrOperand))
+      return isInterestingAlloca(*AI) ? AI : nullptr;
+
+  return PtrOperand;
 }
 
 static bool isPointerOperand(Value *V) {
@@ -814,17 +878,15 @@ static bool isPointerOperand(Value *V) {
 // the frontend.
 static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) {
   if (ICmpInst *Cmp = dyn_cast<ICmpInst>(I)) {
-    if (!Cmp->isRelational())
-      return false;
+    if (!Cmp->isRelational()) return false;
   } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
-    if (BO->getOpcode() != Instruction::Sub)
-      return false;
+    if (BO->getOpcode() != Instruction::Sub) return false;
   } else {
     return false;
   }
   if (!isPointerOperand(I->getOperand(0)) ||
       !isPointerOperand(I->getOperand(1)))
-      return false;
+    return false;
   return true;
 }
 
@@ -835,8 +897,8 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
   return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit;
 }
 
-void
-AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) {
+void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
+    Instruction *I) {
   IRBuilder<> IRB(I);
   Function *F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction;
   Value *Param[2] = {I->getOperand(0), I->getOperand(1)};
@@ -844,41 +906,50 @@ AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) {
     if (Param[i]->getType()->isPointerTy())
       Param[i] = IRB.CreatePointerCast(Param[i], IntptrTy);
   }
-  IRB.CreateCall2(F, Param[0], Param[1]);
+  IRB.CreateCall(F, Param);
 }
 
-void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) {
+void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
+                                     Instruction *I, bool UseCalls,
+                                     const DataLayout &DL) {
   bool IsWrite = false;
   unsigned Alignment = 0;
-  Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &Alignment);
+  uint64_t TypeSize = 0;
+  Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment);
   assert(Addr);
+
+  // Optimization experiments.
+  // The experiments can be used to evaluate potential optimizations that remove
+  // instrumentation (assess false negatives). Instead of completely removing
+  // some instrumentation, you set Exp to a non-zero value (mask of optimization
+  // experiments that want to remove instrumentation of this instruction).
+  // If Exp is non-zero, this pass will emit special calls into runtime
+  // (e.g. __asan_report_exp_load1 instead of __asan_report_load1). These calls
+  // make runtime terminate the program in a special way (with a different
+  // exit status). Then you run the new compiler on a buggy corpus, collect
+  // the special terminations (ideally, you don't see them at all -- no false
+  // negatives) and make the decision on the optimization.
+  uint32_t Exp = ClForceExperiment;
+
   if (ClOpt && ClOptGlobals) {
-    if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
-      // If initialization order checking is disabled, a simple access to a
-      // dynamically initialized global is always valid.
-      if (!ClInitializers || GlobalIsLinkerInitialized(G)) {
-        NumOptimizedAccessesToGlobalVar++;
-        return;
-      }
-    }
-    ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr);
-    if (CE && CE->isGEPWithNoNotionalOverIndexing()) {
-      if (GlobalVariable *G = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
-        if (CE->getOperand(1)->isNullValue() && GlobalIsLinkerInitialized(G)) {
-          NumOptimizedAccessesToGlobalArray++;
-          return;
-        }
-      }
+    // If initialization order checking is disabled, a simple access to a
+    // dynamically initialized global is always valid.
+    GlobalVariable *G = dyn_cast<GlobalVariable>(GetUnderlyingObject(Addr, DL));
+    if (G != NULL && (!ClInitializers || GlobalIsLinkerInitialized(G)) &&
+        isSafeAccess(ObjSizeVis, Addr, TypeSize)) {
+      NumOptimizedAccessesToGlobalVar++;
+      return;
     }
   }
 
-  Type *OrigPtrTy = Addr->getType();
-  Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
-
-  assert(OrigTy->isSized());
-  uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy);
-
-  assert((TypeSize % 8) == 0);
+  if (ClOpt && ClOptStack) {
+    // A direct inbounds access to a stack variable is always valid.
+    if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) &&
+        isSafeAccess(ObjSizeVis, Addr, TypeSize)) {
+      NumOptimizedAccessesToStackVar++;
+      return;
+    }
+  }
 
   if (IsWrite)
     NumInstrumentedWrites++;
@@ -891,65 +962,57 @@ void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) {
   if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
        TypeSize == 128) &&
       (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))
-    return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls);
-  // Instrument unusual size or unusual alignment.
-  // We can not do it with a single check, so we do 1-byte check for the first
-  // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
-  // to report the actual access size.
-  IRBuilder<> IRB(I);
-  Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
-  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
-  if (UseCalls) {
-    IRB.CreateCall2(AsanMemoryAccessCallbackSized[IsWrite], AddrLong, Size);
-  } else {
-    Value *LastByte = IRB.CreateIntToPtr(
-        IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
-        OrigPtrTy);
-    instrumentAddress(I, I, Addr, 8, IsWrite, Size, false);
-    instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false);
-  }
+    return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls,
+                             Exp);
+  instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr,
+                                   UseCalls, Exp);
 }
 
-// Validate the result of Module::getOrInsertFunction called for an interface
-// function of AddressSanitizer. If the instrumented module defines a function
-// with the same name, their prototypes must match, otherwise
-// getOrInsertFunction returns a bitcast.
-static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
-  if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast);
-  FuncOrBitcast->dump();
-  report_fatal_error("trying to redefine an AddressSanitizer "
-                     "interface function");
-}
-
-Instruction *AddressSanitizer::generateCrashCode(
-    Instruction *InsertBefore, Value *Addr,
-    bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument) {
+Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore,
+                                                 Value *Addr, bool IsWrite,
+                                                 size_t AccessSizeIndex,
+                                                 Value *SizeArgument,
+                                                 uint32_t Exp) {
   IRBuilder<> IRB(InsertBefore);
-  CallInst *Call = SizeArgument
-    ? IRB.CreateCall2(AsanErrorCallbackSized[IsWrite], Addr, SizeArgument)
-    : IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr);
+  Value *ExpVal = Exp == 0 ? nullptr : ConstantInt::get(IRB.getInt32Ty(), Exp);
+  CallInst *Call = nullptr;
+  if (SizeArgument) {
+    if (Exp == 0)
+      Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][0],
+                            {Addr, SizeArgument});
+    else
+      Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][1],
+                            {Addr, SizeArgument, ExpVal});
+  } else {
+    if (Exp == 0)
+      Call =
+          IRB.CreateCall(AsanErrorCallback[IsWrite][0][AccessSizeIndex], Addr);
+    else
+      Call = IRB.CreateCall(AsanErrorCallback[IsWrite][1][AccessSizeIndex],
+                            {Addr, ExpVal});
+  }
 
   // We don't do Call->setDoesNotReturn() because the BB already has
   // UnreachableInst at the end.
   // This EmptyAsm is required to avoid callback merge.
-  IRB.CreateCall(EmptyAsm);
+  IRB.CreateCall(EmptyAsm, {});
   return Call;
 }
 
 Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
-                                            Value *ShadowValue,
-                                            uint32_t TypeSize) {
+                                           Value *ShadowValue,
+                                           uint32_t TypeSize) {
   size_t Granularity = 1 << Mapping.Scale;
   // Addr & (Granularity - 1)
-  Value *LastAccessedByte = IRB.CreateAnd(
-      AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
+  Value *LastAccessedByte =
+      IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
   // (Addr & (Granularity - 1)) + size - 1
   if (TypeSize / 8 > 1)
     LastAccessedByte = IRB.CreateAdd(
         LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
   // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
-  LastAccessedByte = IRB.CreateIntCast(
-      LastAccessedByte, ShadowValue->getType(), false);
+  LastAccessedByte =
+      IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false);
   // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
   return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
 }
@@ -957,24 +1020,29 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
 void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
                                          Instruction *InsertBefore, Value *Addr,
                                          uint32_t TypeSize, bool IsWrite,
-                                         Value *SizeArgument, bool UseCalls) {
+                                         Value *SizeArgument, bool UseCalls,
+                                         uint32_t Exp) {
   IRBuilder<> IRB(InsertBefore);
   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
   size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
 
   if (UseCalls) {
-    IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][AccessSizeIndex],
-                   AddrLong);
+    if (Exp == 0)
+      IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex],
+                     AddrLong);
+    else
+      IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex],
+                     {AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)});
     return;
   }
 
-  Type *ShadowTy  = IntegerType::get(
-      *C, std::max(8U, TypeSize >> Mapping.Scale));
+  Type *ShadowTy =
+      IntegerType::get(*C, std::max(8U, TypeSize >> Mapping.Scale));
   Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
   Value *ShadowPtr = memToShadow(AddrLong, IRB);
   Value *CmpVal = Constant::getNullValue(ShadowTy);
-  Value *ShadowValue = IRB.CreateLoad(
-      IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
+  Value *ShadowValue =
+      IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
 
   Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
   size_t Granularity = 1 << Mapping.Scale;
@@ -983,10 +1051,9 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
   if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
     // We use branch weights for the slow path check, to indicate that the slow
     // path is rarely taken. This seems to be the case for SPEC benchmarks.
-    TerminatorInst *CheckTerm =
-        SplitBlockAndInsertIfThen(Cmp, InsertBefore, false,
-            MDBuilder(*C).createBranchWeights(1, 100000));
-    assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
+    TerminatorInst *CheckTerm = SplitBlockAndInsertIfThen(
+        Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000));
+    assert(cast<BranchInst>(CheckTerm)->isUnconditional());
     BasicBlock *NextBB = CheckTerm->getSuccessor(0);
     IRB.SetInsertPoint(CheckTerm);
     Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
@@ -999,11 +1066,37 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
     CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, true);
   }
 
-  Instruction *Crash = generateCrashCode(
-      CrashTerm, AddrLong, IsWrite, AccessSizeIndex, SizeArgument);
+  Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite,
+                                         AccessSizeIndex, SizeArgument, Exp);
   Crash->setDebugLoc(OrigIns->getDebugLoc());
 }
 
+// Instrument unusual size or unusual alignment.
+// We can not do it with a single check, so we do 1-byte check for the first
+// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
+// to report the actual access size.
+void AddressSanitizer::instrumentUnusualSizeOrAlignment(
+    Instruction *I, Value *Addr, uint32_t TypeSize, bool IsWrite,
+    Value *SizeArgument, bool UseCalls, uint32_t Exp) {
+  IRBuilder<> IRB(I);
+  Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
+  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+  if (UseCalls) {
+    if (Exp == 0)
+      IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0],
+                     {AddrLong, Size});
+    else
+      IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][1],
+                     {AddrLong, Size, ConstantInt::get(IRB.getInt32Ty(), Exp)});
+  } else {
+    Value *LastByte = IRB.CreateIntToPtr(
+        IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
+        Addr->getType());
+    instrumentAddress(I, I, Addr, 8, IsWrite, Size, false, Exp);
+    instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false, Exp);
+  }
+}
+
 void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
                                                   GlobalValue *ModuleName) {
   // Set up the arguments to our poison/unpoison functions.
@@ -1025,12 +1118,11 @@ void AddressSanitizerModule::createInitializerPoisonCalls(
 
   ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
   for (Use &OP : CA->operands()) {
-    if (isa<ConstantAggregateZero>(OP))
-      continue;
+    if (isa<ConstantAggregateZero>(OP)) continue;
     ConstantStruct *CS = cast<ConstantStruct>(OP);
 
     // Must have a function or null ptr.
-    if (Function* F = dyn_cast<Function>(CS->getOperand(1))) {
+    if (Function *F = dyn_cast<Function>(CS->getOperand(1))) {
       if (F->getName() == kAsanModuleCtorName) continue;
       ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
       // Don't instrument CTORs that will run before asan.module_ctor.
@@ -1055,13 +1147,11 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
       G->getLinkage() != GlobalVariable::PrivateLinkage &&
       G->getLinkage() != GlobalVariable::InternalLinkage)
     return false;
-  if (G->hasComdat())
-    return false;
+  if (G->hasComdat()) return false;
   // Two problems with thread-locals:
   //   - The address of the main thread's copy can't be computed at link-time.
   //   - Need to poison all copies, not just the main thread's one.
-  if (G->isThreadLocal())
-    return false;
+  if (G->isThreadLocal()) return false;
   // For now, just ignore this Global if the alignment is large.
   if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false;
 
@@ -1072,10 +1162,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
       StringRef ParsedSegment, ParsedSection;
       unsigned TAA = 0, StubSize = 0;
       bool TAAParsed;
-      std::string ErrorCode =
-        MCSectionMachO::ParseSectionSpecifier(Section, ParsedSegment,
-                                              ParsedSection, TAA, TAAParsed,
-                                              StubSize);
+      std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(
+          Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize);
       if (!ErrorCode.empty()) {
         report_fatal_error("Invalid section specifier '" + ParsedSection +
                            "': " + ErrorCode + ".");
@@ -1128,20 +1216,19 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
 void AddressSanitizerModule::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(*C);
   // Declare our poisoning and unpoisoning functions.
-  AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr));
   AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
-  AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanUnpoisonGlobalsName, IRB.getVoidTy(), nullptr));
   AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
   // Declare functions that register/unregister globals.
-  AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanRegisterGlobalsName, IRB.getVoidTy(),
-      IntptrTy, IntptrTy, nullptr));
+  AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+      kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
   AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
-  AsanUnregisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanUnregisterGlobalsName,
-      IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
+  AsanUnregisterGlobals = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(),
+                            IntptrTy, IntptrTy, nullptr));
   AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
 }
 
@@ -1154,8 +1241,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
   SmallVector<GlobalVariable *, 16> GlobalsToChange;
 
   for (auto &G : M.globals()) {
-    if (ShouldInstrumentGlobal(&G))
-      GlobalsToChange.push_back(&G);
+    if (ShouldInstrumentGlobal(&G)) GlobalsToChange.push_back(&G);
   }
 
   size_t n = GlobalsToChange.size();
@@ -1180,8 +1266,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
   // We shouldn't merge same module names, as this string serves as unique
   // module ID in runtime.
   GlobalVariable *ModuleName = createPrivateGlobalForString(
-      M, M.getModuleIdentifier(), /*AllowMerging*/false);
+      M, M.getModuleIdentifier(), /*AllowMerging*/ false);
 
+  auto &DL = M.getDataLayout();
   for (size_t i = 0; i < n; i++) {
     static const uint64_t kMaxGlobalRedzone = 1 << 18;
     GlobalVariable *G = GlobalsToChange[i];
@@ -1195,32 +1282,30 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
 
     PointerType *PtrTy = cast<PointerType>(G->getType());
     Type *Ty = PtrTy->getElementType();
-    uint64_t SizeInBytes = DL->getTypeAllocSize(Ty);
+    uint64_t SizeInBytes = DL.getTypeAllocSize(Ty);
     uint64_t MinRZ = MinRedzoneSizeForGlobal();
     // MinRZ <= RZ <= kMaxGlobalRedzone
     // and trying to make RZ to be ~ 1/4 of SizeInBytes.
-    uint64_t RZ = std::max(MinRZ,
-                         std::min(kMaxGlobalRedzone,
-                                  (SizeInBytes / MinRZ / 4) * MinRZ));
+    uint64_t RZ = std::max(
+        MinRZ, std::min(kMaxGlobalRedzone, (SizeInBytes / MinRZ / 4) * MinRZ));
     uint64_t RightRedzoneSize = RZ;
     // Round up to MinRZ
-    if (SizeInBytes % MinRZ)
-      RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ);
+    if (SizeInBytes % MinRZ) RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ);
     assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0);
     Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
 
     StructType *NewTy = StructType::get(Ty, RightRedZoneTy, nullptr);
-    Constant *NewInitializer = ConstantStruct::get(
-        NewTy, G->getInitializer(),
-        Constant::getNullValue(RightRedZoneTy), nullptr);
+    Constant *NewInitializer =
+        ConstantStruct::get(NewTy, G->getInitializer(),
+                            Constant::getNullValue(RightRedZoneTy), nullptr);
 
     // Create a new global variable with enough space for a redzone.
     GlobalValue::LinkageTypes Linkage = G->getLinkage();
     if (G->isConstant() && Linkage == GlobalValue::PrivateLinkage)
       Linkage = GlobalValue::InternalLinkage;
-    GlobalVariable *NewGlobal = new GlobalVariable(
-        M, NewTy, G->isConstant(), Linkage,
-        NewInitializer, "", G, G->getThreadLocalMode());
+    GlobalVariable *NewGlobal =
+        new GlobalVariable(M, NewTy, G->isConstant(), Linkage, NewInitializer,
+                           "", G, G->getThreadLocalMode());
     NewGlobal->copyAttributesFrom(G);
     NewGlobal->setAlignment(MinRZ);
 
@@ -1229,7 +1314,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
     Indices2[1] = IRB.getInt32(0);
 
     G->replaceAllUsesWith(
-        ConstantExpr::getGetElementPtr(NewGlobal, Indices2, true));
+        ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true));
     NewGlobal->takeName(G);
     G->eraseFromParent();
 
@@ -1249,8 +1334,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
         ConstantExpr::getPointerCast(ModuleName, IntptrTy),
         ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc, nullptr);
 
-    if (ClInitializers && MD.IsDynInit)
-      HasDynamicallyInitializedGlobals = true;
+    if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true;
 
     DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
   }
@@ -1263,20 +1347,20 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
   // Create calls for poisoning before initializers run and unpoisoning after.
   if (HasDynamicallyInitializedGlobals)
     createInitializerPoisonCalls(M, ModuleName);
-  IRB.CreateCall2(AsanRegisterGlobals,
-                  IRB.CreatePointerCast(AllGlobals, IntptrTy),
-                  ConstantInt::get(IntptrTy, n));
+  IRB.CreateCall(AsanRegisterGlobals,
+                 {IRB.CreatePointerCast(AllGlobals, IntptrTy),
+                  ConstantInt::get(IntptrTy, n)});
 
   // We also need to unregister globals at the end, e.g. when a shared library
   // gets closed.
-  Function *AsanDtorFunction = Function::Create(
-      FunctionType::get(Type::getVoidTy(*C), false),
-      GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
+  Function *AsanDtorFunction =
+      Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
+                       GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
   BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
   IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB));
-  IRB_Dtor.CreateCall2(AsanUnregisterGlobals,
-                       IRB.CreatePointerCast(AllGlobals, IntptrTy),
-                       ConstantInt::get(IntptrTy, n));
+  IRB_Dtor.CreateCall(AsanUnregisterGlobals,
+                      {IRB.CreatePointerCast(AllGlobals, IntptrTy),
+                       ConstantInt::get(IntptrTy, n)});
   appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);
 
   DEBUG(dbgs() << M);
@@ -1284,12 +1368,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
 }
 
 bool AddressSanitizerModule::runOnModule(Module &M) {
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  if (!DLP)
-    return false;
-  DL = &DLP->getDataLayout();
   C = &(M.getContext());
-  int LongSize = DL->getPointerSizeInBits();
+  int LongSize = M.getDataLayout().getPointerSizeInBits();
   IntptrTy = Type::getIntNTy(*C, LongSize);
   TargetTriple = Triple(M.getTargetTriple());
   Mapping = getShadowMapping(TargetTriple, LongSize);
@@ -1301,8 +1381,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
   assert(CtorFunc);
   IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
 
-  if (ClGlobals)
-    Changed |= InstrumentGlobals(IRB, M);
+  if (ClGlobals) Changed |= InstrumentGlobals(IRB, M);
 
   return Changed;
 }
@@ -1310,50 +1389,51 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
 void AddressSanitizer::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(*C);
   // Create __asan_report* callbacks.
-  for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
-    for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
-         AccessSizeIndex++) {
-      // IsWrite and TypeSize are encoded in the function name.
-      std::string Suffix =
-          (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex);
-      AsanErrorCallback[AccessIsWrite][AccessSizeIndex] =
-          checkInterfaceFunction(
-              M.getOrInsertFunction(kAsanReportErrorTemplate + Suffix,
-                                    IRB.getVoidTy(), IntptrTy, nullptr));
-      AsanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
-          checkInterfaceFunction(
-              M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + Suffix,
-                                    IRB.getVoidTy(), IntptrTy, nullptr));
+  // IsWrite, TypeSize and Exp are encoded in the function name.
+  for (int Exp = 0; Exp < 2; Exp++) {
+    for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
+      const std::string TypeStr = AccessIsWrite ? "store" : "load";
+      const std::string ExpStr = Exp ? "exp_" : "";
+      const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;
+      AsanErrorCallbackSized[AccessIsWrite][Exp] =
+          checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+              kAsanReportErrorTemplate + ExpStr + TypeStr + "_n",
+              IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
+      AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
+          checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+              ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N",
+              IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
+      for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+           AccessSizeIndex++) {
+        const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex);
+        AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
+            checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+                kAsanReportErrorTemplate + ExpStr + Suffix, IRB.getVoidTy(),
+                IntptrTy, ExpType, nullptr));
+        AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
+            checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+                ClMemoryAccessCallbackPrefix + ExpStr + Suffix, IRB.getVoidTy(),
+                IntptrTy, ExpType, nullptr));
+      }
     }
   }
-  AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction(
-              kAsanReportLoadN, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
-  AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction(
-              kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
-
-  AsanMemoryAccessCallbackSized[0] = checkInterfaceFunction(
-      M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "loadN",
-                            IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
-  AsanMemoryAccessCallbackSized[1] = checkInterfaceFunction(
-      M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "storeN",
-                            IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
-
-  AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction(
+
+  AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
-  AsanMemcpy = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
-  AsanMemset = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, nullptr));
 
-  AsanHandleNoReturnFunc = checkInterfaceFunction(
+  AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), nullptr));
 
-  AsanPtrCmpFunction = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanPtrCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
-  AsanPtrSubFunction = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanPtrSubFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
   // We insert an empty inline asm after __asan_report* to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
@@ -1364,28 +1444,18 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
 // virtual
 bool AddressSanitizer::doInitialization(Module &M) {
   // Initialize the private fields. No one has accessed them before.
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  if (!DLP)
-    report_fatal_error("data layout missing");
-  DL = &DLP->getDataLayout();
 
   GlobalsMD.init(M);
 
   C = &(M.getContext());
-  LongSize = DL->getPointerSizeInBits();
+  LongSize = M.getDataLayout().getPointerSizeInBits();
   IntptrTy = Type::getIntNTy(*C, LongSize);
   TargetTriple = Triple(M.getTargetTriple());
 
-  AsanCtorFunction = Function::Create(
-      FunctionType::get(Type::getVoidTy(*C), false),
-      GlobalValue::InternalLinkage, kAsanModuleCtorName, &M);
-  BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
-  // call __asan_init in the module ctor.
-  IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB));
-  AsanInitFunction = checkInterfaceFunction(
-      M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), nullptr));
-  AsanInitFunction->setLinkage(Function::ExternalLinkage);
-  IRB.CreateCall(AsanInitFunction);
+  std::tie(AsanCtorFunction, AsanInitFunction) =
+      createSanitizerCtorAndInitFunctions(M, kAsanModuleCtorName, kAsanInitName,
+                                          /*InitArgTypes=*/{},
+                                          /*InitArgs=*/{});
 
   Mapping = getShadowMapping(TargetTriple, LongSize);
 
@@ -1403,7 +1473,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   // instrumented functions.
   if (F.getName().find(" load]") != std::string::npos) {
     IRBuilder<> IRB(F.begin()->begin());
-    IRB.CreateCall(AsanInitFunction);
+    IRB.CreateCall(AsanInitFunction, {});
     return true;
   }
   return false;
@@ -1420,22 +1490,21 @@ bool AddressSanitizer::runOnFunction(Function &F) {
   // If needed, insert __asan_init before checking for SanitizeAddress attr.
   maybeInsertAsanInitAtFunctionEntry(F);
 
-  if (!F.hasFnAttribute(Attribute::SanitizeAddress))
-    return false;
+  if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return false;
 
-  if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
-    return false;
+  if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false;
 
   // We want to instrument every address only once per basic block (unless there
   // are calls between uses).
-  SmallSet<Value*, 16> TempsToInstrument;
-  SmallVector<Instruction*, 16> ToInstrument;
-  SmallVector<Instruction*, 8> NoReturnCalls;
-  SmallVector<BasicBlock*, 16> AllBlocks;
-  SmallVector<Instruction*, 16> PointerComparisonsOrSubtracts;
+  SmallSet<Value *, 16> TempsToInstrument;
+  SmallVector<Instruction *, 16> ToInstrument;
+  SmallVector<Instruction *, 8> NoReturnCalls;
+  SmallVector<BasicBlock *, 16> AllBlocks;
+  SmallVector<Instruction *, 16> PointerComparisonsOrSubtracts;
   int NumAllocas = 0;
   bool IsWrite;
   unsigned Alignment;
+  uint64_t TypeSize;
 
   // Fill the set of memory operations to instrument.
   for (auto &BB : F) {
@@ -1444,8 +1513,8 @@ bool AddressSanitizer::runOnFunction(Function &F) {
     int NumInsnsPerBB = 0;
     for (auto &Inst : BB) {
       if (LooksLikeCodeInBug11395(&Inst)) return false;
-      if (Value *Addr =
-              isInterestingMemoryAccess(&Inst, &IsWrite, &Alignment)) {
+      if (Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize,
+                                                  &Alignment)) {
         if (ClOpt && ClOptSameTemp) {
           if (!TempsToInstrument.insert(Addr).second)
             continue;  // We've seen this temp in the current BB.
@@ -1457,21 +1526,18 @@ bool AddressSanitizer::runOnFunction(Function &F) {
       } else if (isa<MemIntrinsic>(Inst)) {
         // ok, take it.
       } else {
-        if (isa<AllocaInst>(Inst))
-          NumAllocas++;
+        if (isa<AllocaInst>(Inst)) NumAllocas++;
         CallSite CS(&Inst);
         if (CS) {
           // A call inside BB.
           TempsToInstrument.clear();
-          if (CS.doesNotReturn())
-            NoReturnCalls.push_back(CS.getInstruction());
+          if (CS.doesNotReturn()) NoReturnCalls.push_back(CS.getInstruction());
         }
         continue;
       }
       ToInstrument.push_back(&Inst);
       NumInsnsPerBB++;
-      if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB)
-        break;
+      if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break;
     }
   }
 
@@ -1480,13 +1546,20 @@ bool AddressSanitizer::runOnFunction(Function &F) {
       ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold)
     UseCalls = true;
 
+  const TargetLibraryInfo *TLI =
+      &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(),
+                                     /*RoundToAlign=*/true);
+
   // Instrument.
   int NumInstrumented = 0;
   for (auto Inst : ToInstrument) {
     if (ClDebugMin < 0 || ClDebugMax < 0 ||
         (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
-      if (isInterestingMemoryAccess(Inst, &IsWrite, &Alignment))
-        instrumentMop(Inst, UseCalls);
+      if (isInterestingMemoryAccess(Inst, &IsWrite, &TypeSize, &Alignment))
+        instrumentMop(ObjSizeVis, Inst, UseCalls,
+                      F.getParent()->getDataLayout());
       else
         instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
     }
@@ -1500,7 +1573,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
   // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37
   for (auto CI : NoReturnCalls) {
     IRBuilder<> IRB(CI);
-    IRB.CreateCall(AsanHandleNoReturnFunc);
+    IRB.CreateCall(AsanHandleNoReturnFunc, {});
   }
 
   for (auto Inst : PointerComparisonsOrSubtracts) {
@@ -1531,24 +1604,24 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(*C);
   for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {
     std::string Suffix = itostr(i);
-    AsanStackMallocFunc[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy, nullptr));
-    AsanStackFreeFunc[i] = checkInterfaceFunction(
+    AsanStackMallocFunc[i] = checkSanitizerInterfaceFunction(
+        M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy,
+                              IntptrTy, nullptr));
+    AsanStackFreeFunc[i] = checkSanitizerInterfaceFunction(
         M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix,
                               IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
   }
-  AsanPoisonStackMemoryFunc = checkInterfaceFunction(
+  AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(),
                             IntptrTy, IntptrTy, nullptr));
-  AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(
+  AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(),
                             IntptrTy, IntptrTy, nullptr));
 }
 
-void
-FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
-                                      IRBuilder<> &IRB, Value *ShadowBase,
-                                      bool DoPoison) {
+void FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
+                                           IRBuilder<> &IRB, Value *ShadowBase,
+                                           bool DoPoison) {
   size_t n = ShadowBytes.size();
   size_t i = 0;
   // We need to (un)poison n bytes of stack shadow. Poison as many as we can
@@ -1559,7 +1632,7 @@ FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
     for (; i + LargeStoreSizeInBytes - 1 < n; i += LargeStoreSizeInBytes) {
       uint64_t Val = 0;
       for (size_t j = 0; j < LargeStoreSizeInBytes; j++) {
-        if (ASan.DL->isLittleEndian())
+        if (F.getParent()->getDataLayout().isLittleEndian())
           Val |= (uint64_t)ShadowBytes[i + j] << (8 * j);
         else
           Val = (Val << 8) | ShadowBytes[i + j];
@@ -1578,9 +1651,8 @@ FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
 static int StackMallocSizeClass(uint64_t LocalStackSize) {
   assert(LocalStackSize <= kMaxStackMallocSize);
   uint64_t MaxSize = kMinStackMallocSize;
-  for (int i = 0; ; i++, MaxSize *= 2)
-    if (LocalStackSize <= MaxSize)
-      return i;
+  for (int i = 0;; i++, MaxSize *= 2)
+    if (LocalStackSize <= MaxSize) return i;
   llvm_unreachable("impossible LocalStackSize");
 }
 
@@ -1592,18 +1664,21 @@ static int StackMallocSizeClass(uint64_t LocalStackSize) {
 void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined(
     IRBuilder<> &IRB, Value *ShadowBase, int Size) {
   assert(!(Size % 8));
-  assert(kAsanStackAfterReturnMagic == 0xf5);
+
+  // kAsanStackAfterReturnMagic is 0xf5.
+  const uint64_t kAsanStackAfterReturnMagic64 = 0xf5f5f5f5f5f5f5f5ULL;
+
   for (int i = 0; i < Size; i += 8) {
     Value *p = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
-    IRB.CreateStore(ConstantInt::get(IRB.getInt64Ty(), 0xf5f5f5f5f5f5f5f5ULL),
-                    IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo()));
+    IRB.CreateStore(
+        ConstantInt::get(IRB.getInt64Ty(), kAsanStackAfterReturnMagic64),
+        IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo()));
   }
 }
 
 static DebugLoc getFunctionEntryDebugLocation(Function &F) {
   for (const auto &Inst : F.getEntryBlock())
-    if (!isa<AllocaInst>(Inst))
-      return Inst.getDebugLoc();
+    if (!isa<AllocaInst>(Inst)) return Inst.getDebugLoc();
   return DebugLoc();
 }
 
@@ -1640,10 +1715,13 @@ Value *FunctionStackPoisoner::createAllocaForLayout(
 void FunctionStackPoisoner::poisonStack() {
   assert(AllocaVec.size() > 0 || DynamicAllocaVec.size() > 0);
 
-  if (ClInstrumentAllocas)
+  if (ClInstrumentAllocas) {
     // Handle dynamic allocas.
-    for (auto &AllocaCall : DynamicAllocaVec)
+    for (auto &AllocaCall : DynamicAllocaVec) {
       handleDynamicAllocaCall(AllocaCall);
+      unpoisonDynamicAlloca(AllocaCall);
+    }
+  }
 
   if (AllocaVec.size() == 0) return;
 
@@ -1657,9 +1735,9 @@ void FunctionStackPoisoner::poisonStack() {
   SmallVector<ASanStackVariableDescription, 16> SVD;
   SVD.reserve(AllocaVec.size());
   for (AllocaInst *AI : AllocaVec) {
-    ASanStackVariableDescription D = { AI->getName().data(),
-                                   getAllocaSizeInBytes(AI),
-                                   AI->getAlignment(), AI, 0};
+    ASanStackVariableDescription D = {AI->getName().data(),
+                                      ASan.getAllocaSizeInBytes(AI),
+                                      AI->getAlignment(), AI, 0};
     SVD.push_back(D);
   }
   // Minimal header size (left redzone) is 4 pointers,
@@ -1671,9 +1749,11 @@ void FunctionStackPoisoner::poisonStack() {
   uint64_t LocalStackSize = L.FrameSize;
   bool DoStackMalloc =
       ClUseAfterReturn && LocalStackSize <= kMaxStackMallocSize;
-  // Don't do dynamic alloca in presence of inline asm: too often it
-  // makes assumptions on which registers are available.
+  // Don't do dynamic alloca in presence of inline asm: too often it makes
+  // assumptions on which registers are available. Don't do stack malloc in the
+  // presence of inline asm on 32-bit platforms for the same reason.
   bool DoDynamicAlloca = ClDynamicAllocaStack && !HasNonEmptyInlineAsm;
+  DoStackMalloc &= !HasNonEmptyInlineAsm || ASan.LongSize != 32;
 
   Value *StaticAlloca =
       DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false);
@@ -1739,7 +1819,7 @@ void FunctionStackPoisoner::poisonStack() {
     Value *NewAllocaPtr = IRB.CreateIntToPtr(
         IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
         AI->getType());
-    replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB);
+    replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/true);
     AI->replaceAllUsesWith(NewAllocaPtr);
   }
 
@@ -1750,19 +1830,19 @@ void FunctionStackPoisoner::poisonStack() {
                   BasePlus0);
   // Write the frame description constant to redzone[1].
   Value *BasePlus1 = IRB.CreateIntToPtr(
-    IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)),
-    IntptrPtrTy);
+      IRB.CreateAdd(LocalStackBase,
+                    ConstantInt::get(IntptrTy, ASan.LongSize / 8)),
+      IntptrPtrTy);
   GlobalVariable *StackDescriptionGlobal =
       createPrivateGlobalForString(*F.getParent(), L.DescriptionString,
-                                   /*AllowMerging*/true);
-  Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal,
-                                             IntptrTy);
+                                   /*AllowMerging*/ true);
+  Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
   IRB.CreateStore(Description, BasePlus1);
   // Write the PC to redzone[2].
   Value *BasePlus2 = IRB.CreateIntToPtr(
-    IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy,
-                                                   2 * ASan.LongSize/8)),
-    IntptrPtrTy);
+      IRB.CreateAdd(LocalStackBase,
+                    ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)),
+      IntptrPtrTy);
   IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
 
   // Poison the stack redzones at the entry.
@@ -1807,8 +1887,9 @@ void FunctionStackPoisoner::poisonStack() {
             IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy()));
       } else {
         // For larger frames call __asan_stack_free_*.
-        IRBPoison.CreateCall2(AsanStackFreeFunc[StackMallocIdx], FakeStack,
-                              ConstantInt::get(IntptrTy, LocalStackSize));
+        IRBPoison.CreateCall(
+            AsanStackFreeFunc[StackMallocIdx],
+            {FakeStack, ConstantInt::get(IntptrTy, LocalStackSize)});
       }
 
       IRBuilder<> IRBElse(ElseTerm);
@@ -1822,14 +1903,8 @@ void FunctionStackPoisoner::poisonStack() {
     }
   }
 
-  if (ClInstrumentAllocas)
-    // Unpoison dynamic allocas.
-    for (auto &AllocaCall : DynamicAllocaVec)
-      unpoisonDynamicAlloca(AllocaCall);
-
   // We are done. Remove the old unused alloca instructions.
-  for (auto AI : AllocaVec)
-    AI->eraseFromParent();
+  for (auto AI : AllocaVec) AI->eraseFromParent();
 }
 
 void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
@@ -1837,9 +1912,9 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
   // For now just insert the call to ASan runtime.
   Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy);
   Value *SizeArg = ConstantInt::get(IntptrTy, Size);
-  IRB.CreateCall2(DoPoison ? AsanPoisonStackMemoryFunc
-                           : AsanUnpoisonStackMemoryFunc,
-                  AddrArg, SizeArg);
+  IRB.CreateCall(DoPoison ? AsanPoisonStackMemoryFunc
+                          : AsanUnpoisonStackMemoryFunc,
+                 {AddrArg, SizeArg});
 }
 
 // Handling llvm.lifetime intrinsics for a given %alloca:
@@ -1854,12 +1929,11 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
 AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
   if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
     // We're intested only in allocas we can handle.
-    return isInterestingAlloca(*AI) ? AI : nullptr;
+    return ASan.isInterestingAlloca(*AI) ? AI : nullptr;
   // See if we've already calculated (or started to calculate) alloca for a
   // given value.
   AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
-  if (I != AllocaForValue.end())
-    return I->second;
+  if (I != AllocaForValue.end()) return I->second;
   // Store 0 while we're calculating alloca for value V to avoid
   // infinite recursion if the value references itself.
   AllocaForValue[V] = nullptr;
@@ -1867,8 +1941,7 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
   if (CastInst *CI = dyn_cast<CastInst>(V))
     Res = findAllocaForValue(CI->getOperand(0));
   else if (PHINode *PN = dyn_cast<PHINode>(V)) {
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-      Value *IncValue = PN->getIncomingValue(i);
+    for (Value *IncValue : PN->incoming_values()) {
       // Allow self-referencing phi-nodes.
       if (IncValue == PN) continue;
       AllocaInst *IncValueAI = findAllocaForValue(IncValue);
@@ -1878,8 +1951,7 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
       Res = IncValueAI;
     }
   }
-  if (Res)
-    AllocaForValue[V] = Res;
+  if (Res) AllocaForValue[V] = Res;
   return Res;
 }
 
@@ -1910,14 +1982,14 @@ Value *FunctionStackPoisoner::computePartialRzMagic(Value *PartialSize,
   Value *Shift = IRB.CreateAnd(PartialSize, IRB.getInt32(~7));
   unsigned Val1Int = kAsanAllocaPartialVal1;
   unsigned Val2Int = kAsanAllocaPartialVal2;
-  if (!ASan.DL->isLittleEndian()) {
+  if (!F.getParent()->getDataLayout().isLittleEndian()) {
     Val1Int = sys::getSwappedBytes(Val1Int);
     Val2Int = sys::getSwappedBytes(Val2Int);
   }
   Value *Val1 = shiftAllocaMagic(IRB.getInt32(Val1Int), IRB, Shift);
   Value *PartialBits = IRB.CreateAnd(PartialSize, IRB.getInt32(7));
   // For BigEndian get 0x000000YZ -> 0xYZ000000.
-  if (ASan.DL->isBigEndian())
+  if (F.getParent()->getDataLayout().isBigEndian())
     PartialBits = IRB.CreateShl(PartialBits, IRB.getInt32(24));
   Value *Val2 = IRB.getInt32(Val2Int);
   Value *Cond =
@@ -1951,7 +2023,8 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(
   // redzones, and OldSize is number of allocated blocks with
   // ElementSize size, get allocated memory size in bytes by
   // OldSize * ElementSize.
-  unsigned ElementSize = ASan.DL->getTypeAllocSize(AI->getAllocatedType());
+  unsigned ElementSize =
+      F.getParent()->getDataLayout().getTypeAllocSize(AI->getAllocatedType());
   Value *OldSize = IRB.CreateMul(AI->getArraySize(),
                                  ConstantInt::get(IntptrTy, ElementSize));
 
@@ -2019,3 +2092,20 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(
   AI->eraseFromParent();
   NumInstrumentedDynamicAllocas++;
 }
+
+// isSafeAccess returns true if Addr is always inbounds with respect to its
+// base object. For example, it is a field access or an array access with
+// constant inbounds index.
+bool AddressSanitizer::isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis,
+                                    Value *Addr, uint64_t TypeSize) const {
+  SizeOffsetType SizeOffset = ObjSizeVis.compute(Addr);
+  if (!ObjSizeVis.bothKnown(SizeOffset)) return false;
+  uint64_t Size = SizeOffset.first.getZExtValue();
+  int64_t Offset = SizeOffset.second.getSExtValue();
+  // Three checks are required to ensure safety:
+  // . Offset >= 0  (since the offset is given from the base ptr)
+  // . Size >= Offset  (unsigned)
+  // . Size - Offset >= NeededSize  (unsigned)
+  return Offset >= 0 && Size >= uint64_t(Offset) &&
+         Size - uint64_t(Offset) >= TypeSize / 8;
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 9a5cea8..f685803 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/TargetFolder.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
@@ -24,7 +25,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "bounds-checking"
@@ -49,12 +49,10 @@ namespace {
     bool runOnFunction(Function &F) override;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<DataLayoutPass>();
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
 
   private:
-    const DataLayout *DL;
     const TargetLibraryInfo *TLI;
     ObjectSizeOffsetEvaluator *ObjSizeEval;
     BuilderTy *Builder;
@@ -63,7 +61,7 @@ namespace {
 
     BasicBlock *getTrapBB();
     void emitBranchToTrap(Value *Cmp = nullptr);
-    bool instrument(Value *Ptr, Value *Val);
+    bool instrument(Value *Ptr, Value *Val, const DataLayout &DL);
  };
 }
 
@@ -84,7 +82,7 @@ BasicBlock *BoundsChecking::getTrapBB() {
   Builder->SetInsertPoint(TrapBB);
 
   llvm::Value *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
-  CallInst *TrapCall = Builder->CreateCall(F);
+  CallInst *TrapCall = Builder->CreateCall(F, {});
   TrapCall->setDoesNotReturn();
   TrapCall->setDoesNotThrow();
   TrapCall->setDebugLoc(Inst->getDebugLoc());
@@ -125,8 +123,9 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) {
 /// result from the load or the value being stored. It is used to determine the
 /// size of memory block that is touched.
 /// Returns true if any change was made to the IR, false otherwise.
-bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
-  uint64_t NeededSize = DL->getTypeStoreSize(InstVal->getType());
+bool BoundsChecking::instrument(Value *Ptr, Value *InstVal,
+                                const DataLayout &DL) {
+  uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType());
   DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
               << " bytes\n");
 
@@ -141,7 +140,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
   Value *Offset = SizeOffset.second;
   ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
 
-  Type *IntTy = DL->getIntPtrType(Ptr->getType());
+  Type *IntTy = DL.getIntPtrType(Ptr->getType());
   Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
 
   // three checks are required to ensure safety:
@@ -165,8 +164,8 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
 }
 
 bool BoundsChecking::runOnFunction(Function &F) {
-  DL = &getAnalysis<DataLayoutPass>().getDataLayout();
-  TLI = &getAnalysis<TargetLibraryInfo>();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   TrapBB = nullptr;
   BuilderTy TheBuilder(F.getContext(), TargetFolder(DL));
@@ -192,13 +191,16 @@ bool BoundsChecking::runOnFunction(Function &F) {
 
     Builder->SetInsertPoint(Inst);
     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-      MadeChange |= instrument(LI->getPointerOperand(), LI);
+      MadeChange |= instrument(LI->getPointerOperand(), LI, DL);
     } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-      MadeChange |= instrument(SI->getPointerOperand(), SI->getValueOperand());
+      MadeChange |=
+          instrument(SI->getPointerOperand(), SI->getValueOperand(), DL);
     } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
-      MadeChange |= instrument(AI->getPointerOperand(),AI->getCompareOperand());
+      MadeChange |=
+          instrument(AI->getPointerOperand(), AI->getCompareOperand(), DL);
     } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) {
-      MadeChange |= instrument(AI->getPointerOperand(), AI->getValOperand());
+      MadeChange |=
+          instrument(AI->getPointerOperand(), AI->getValOperand(), DL);
     } else {
       llvm_unreachable("unknown Instruction type");
     }
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 8f24476..2de6e1a 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -83,14 +83,14 @@ static cl::opt<bool> ClPreserveAlignment(
     cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
     cl::init(false));
 
-// The ABI list file controls how shadow parameters are passed.  The pass treats
+// The ABI list files control how shadow parameters are passed. The pass treats
 // every function labelled "uninstrumented" in the ABI list file as conforming
 // to the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains
 // additional annotations for those functions, a call to one of those functions
 // will produce a warning message, as the labelling behaviour of the function is
 // unknown.  The other supported annotations are "functional" and "discard",
 // which are described below under DataFlowSanitizer::WrapperKind.
-static cl::opt<std::string> ClABIListFile(
+static cl::list<std::string> ClABIListFiles(
     "dfsan-abilist",
     cl::desc("File listing native ABI functions and how the pass treats them"),
     cl::Hidden);
@@ -141,7 +141,9 @@ class DFSanABIList {
   std::unique_ptr<SpecialCaseList> SCL;
 
  public:
-  DFSanABIList(std::unique_ptr<SpecialCaseList> SCL) : SCL(std::move(SCL)) {}
+  DFSanABIList() {}
+
+  void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
 
   /// Returns whether either this function or its source file are listed in the
   /// given category.
@@ -215,7 +217,6 @@ class DataFlowSanitizer : public ModulePass {
     WK_Custom
   };
 
-  const DataLayout *DL;
   Module *Mod;
   LLVMContext *Ctx;
   IntegerType *ShadowTy;
@@ -247,7 +248,7 @@ class DataFlowSanitizer : public ModulePass {
   DFSanABIList ABIList;
   DenseMap<Value *, Function *> UnwrappedFnMap;
   AttributeSet ReadOnlyNoneAttrs;
-  DenseMap<const Function *, DISubprogram> FunctionDIs;
+  DenseMap<const Function *, DISubprogram *> FunctionDIs;
 
   Value *getShadowAddress(Value *Addr, Instruction *Pos);
   bool isInstrumented(const Function *F);
@@ -264,9 +265,9 @@ class DataFlowSanitizer : public ModulePass {
   Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
 
  public:
-  DataFlowSanitizer(StringRef ABIListFile = StringRef(),
-                    void *(*getArgTLS)() = nullptr,
-                    void *(*getRetValTLS)() = nullptr);
+  DataFlowSanitizer(
+      const std::vector<std::string> &ABIListFiles = std::vector<std::string>(),
+      void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr);
   static char ID;
   bool doInitialization(Module &M) override;
   bool runOnModule(Module &M) override;
@@ -351,25 +352,26 @@ char DataFlowSanitizer::ID;
 INITIALIZE_PASS(DataFlowSanitizer, "dfsan",
                 "DataFlowSanitizer: dynamic data flow analysis.", false, false)
 
-ModulePass *llvm::createDataFlowSanitizerPass(StringRef ABIListFile,
-                                              void *(*getArgTLS)(),
-                                              void *(*getRetValTLS)()) {
-  return new DataFlowSanitizer(ABIListFile, getArgTLS, getRetValTLS);
+ModulePass *
+llvm::createDataFlowSanitizerPass(const std::vector<std::string> &ABIListFiles,
+                                  void *(*getArgTLS)(),
+                                  void *(*getRetValTLS)()) {
+  return new DataFlowSanitizer(ABIListFiles, getArgTLS, getRetValTLS);
 }
 
-DataFlowSanitizer::DataFlowSanitizer(StringRef ABIListFile,
-                                     void *(*getArgTLS)(),
-                                     void *(*getRetValTLS)())
-    : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS),
-      ABIList(SpecialCaseList::createOrDie(ABIListFile.empty() ? ClABIListFile
-                                                               : ABIListFile)) {
+DataFlowSanitizer::DataFlowSanitizer(
+    const std::vector<std::string> &ABIListFiles, void *(*getArgTLS)(),
+    void *(*getRetValTLS)())
+    : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) {
+  std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
+  AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(),
+                         ClABIListFiles.end());
+  ABIList.set(SpecialCaseList::createOrDie(AllABIListFiles));
 }
 
 FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
-  llvm::SmallVector<Type *, 4> ArgTypes;
-  std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
-  for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
-    ArgTypes.push_back(ShadowTy);
+  llvm::SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end());
+  ArgTypes.append(T->getNumParams(), ShadowTy);
   if (T->isVarArg())
     ArgTypes.push_back(ShadowPtrTy);
   Type *RetType = T->getReturnType();
@@ -382,9 +384,8 @@ FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
   assert(!T->isVarArg());
   llvm::SmallVector<Type *, 4> ArgTypes;
   ArgTypes.push_back(T->getPointerTo());
-  std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
-  for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
-    ArgTypes.push_back(ShadowTy);
+  ArgTypes.append(T->param_begin(), T->param_end());
+  ArgTypes.append(T->getNumParams(), ShadowTy);
   Type *RetType = T->getReturnType();
   if (!RetType->isVoidTy())
     ArgTypes.push_back(ShadowPtrTy);
@@ -420,16 +421,13 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
   bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 ||
                   TargetTriple.getArch() == llvm::Triple::mips64el;
 
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  if (!DLP)
-    report_fatal_error("data layout missing");
-  DL = &DLP->getDataLayout();
+  const DataLayout &DL = M.getDataLayout();
 
   Mod = &M;
   Ctx = &M.getContext();
   ShadowTy = IntegerType::get(*Ctx, ShadowWidth);
   ShadowPtrTy = PointerType::getUnqual(ShadowTy);
-  IntptrTy = DL->getIntPtrType(*Ctx);
+  IntptrTy = DL.getIntPtrType(*Ctx);
   ZeroShadow = ConstantInt::getSigned(ShadowTy, 0);
   ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8);
   if (IsX86_64)
@@ -528,9 +526,9 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
                                     F->getParent());
   NewF->copyAttributesFrom(F);
   NewF->removeAttributes(
-      AttributeSet::ReturnIndex,
-      AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
-                                       AttributeSet::ReturnIndex));
+    AttributeSet::ReturnIndex,
+    AttributeSet::get(F->getContext(), AttributeSet::ReturnIndex,
+                    AttributeFuncs::typeIncompatible(NewFT->getReturnType())));
 
   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
   if (F->isVarArg()) {
@@ -591,9 +589,6 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
 }
 
 bool DataFlowSanitizer::runOnModule(Module &M) {
-  if (!DL)
-    return false;
-
   if (ABIList.isIn(M, "skip"))
     return false;
 
@@ -708,9 +703,9 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
         Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M);
         NewF->copyAttributesFrom(&F);
         NewF->removeAttributes(
-            AttributeSet::ReturnIndex,
-            AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
-                                             AttributeSet::ReturnIndex));
+          AttributeSet::ReturnIndex,
+          AttributeSet::get(NewF->getContext(), AttributeSet::ReturnIndex,
+                    AttributeFuncs::typeIncompatible(NewFT->getReturnType())));
         for (Function::arg_iterator FArg = F.arg_begin(),
                                     NewFArg = NewF->arg_begin(),
                                     FArgEnd = F.arg_end();
@@ -758,7 +753,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
       // Patch the pointer to LLVM function in debug info descriptor.
       auto DI = FunctionDIs.find(&F);
       if (DI != FunctionDIs.end())
-        DI->second.replaceFunction(&F);
+        DI->second->replaceFunction(&F);
 
       UnwrappedFnMap[WrappedFnCst] = &F;
       *i = NewF;
@@ -855,7 +850,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
         BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
             Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
         IRBuilder<> ThenIRB(BI);
-        ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn);
+        ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
       }
     }
   }
@@ -870,7 +865,7 @@ Value *DFSanFunction::getArgTLSPtr() {
     return ArgTLSPtr = DFS.ArgTLS;
 
   IRBuilder<> IRB(F->getEntryBlock().begin());
-  return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS);
+  return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS, {});
 }
 
 Value *DFSanFunction::getRetvalTLS() {
@@ -880,7 +875,7 @@ Value *DFSanFunction::getRetvalTLS() {
     return RetvalTLSPtr = DFS.RetvalTLS;
 
   IRBuilder<> IRB(F->getEntryBlock().begin());
-  return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS);
+  return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS, {});
 }
 
 Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) {
@@ -977,7 +972,7 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
 
   IRBuilder<> IRB(Pos);
   if (AvoidNewBlocks) {
-    CallInst *Call = IRB.CreateCall2(DFS.DFSanCheckedUnionFn, V1, V2);
+    CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {V1, V2});
     Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
     Call->addAttribute(1, Attribute::ZExt);
     Call->addAttribute(2, Attribute::ZExt);
@@ -990,7 +985,7 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
     BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
         Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT));
     IRBuilder<> ThenIRB(BI);
-    CallInst *Call = ThenIRB.CreateCall2(DFS.DFSanUnionFn, V1, V2);
+    CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {V1, V2});
     Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
     Call->addAttribute(1, Attribute::ZExt);
     Call->addAttribute(2, Attribute::ZExt);
@@ -1054,7 +1049,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
 
   uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
   SmallVector<Value *, 2> Objs;
-  GetUnderlyingObjects(Addr, Objs, DFS.DL);
+  GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout());
   bool AllConstants = true;
   for (SmallVector<Value *, 2>::iterator i = Objs.begin(), e = Objs.end();
        i != e; ++i) {
@@ -1080,8 +1075,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
   }
   case 2: {
     IRBuilder<> IRB(Pos);
-    Value *ShadowAddr1 =
-        IRB.CreateGEP(ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1));
+    Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr,
+                                       ConstantInt::get(DFS.IntptrTy, 1));
     return combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),
                           IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign), Pos);
   }
@@ -1092,8 +1087,9 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
     // shadow is non-equal.
     BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F);
     IRBuilder<> FallbackIRB(FallbackBB);
-    CallInst *FallbackCall = FallbackIRB.CreateCall2(
-        DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size));
+    CallInst *FallbackCall = FallbackIRB.CreateCall(
+        DFS.DFSanUnionLoadFn,
+        {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
     FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
 
     // Compare each of the shadows stored in the loaded 64 bits to each other,
@@ -1132,7 +1128,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
       BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
       DT.addNewBlock(NextBB, LastBr->getParent());
       IRBuilder<> NextIRB(NextBB);
-      WideAddr = NextIRB.CreateGEP(WideAddr, ConstantInt::get(DFS.IntptrTy, 1));
+      WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr,
+                                   ConstantInt::get(DFS.IntptrTy, 1));
       Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign);
       ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
       LastBr->setSuccessor(0, NextBB);
@@ -1148,14 +1145,15 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
   }
 
   IRBuilder<> IRB(Pos);
-  CallInst *FallbackCall = IRB.CreateCall2(
-      DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size));
+  CallInst *FallbackCall = IRB.CreateCall(
+      DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
   FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
   return FallbackCall;
 }
 
 void DFSanVisitor::visitLoadInst(LoadInst &LI) {
-  uint64_t Size = DFSF.DFS.DL->getTypeStoreSize(LI.getType());
+  auto &DL = LI.getModule()->getDataLayout();
+  uint64_t Size = DL.getTypeStoreSize(LI.getType());
   if (Size == 0) {
     DFSF.setShadow(&LI, DFSF.DFS.ZeroShadow);
     return;
@@ -1165,7 +1163,7 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
   if (ClPreserveAlignment) {
     Align = LI.getAlignment();
     if (Align == 0)
-      Align = DFSF.DFS.DL->getABITypeAlignment(LI.getType());
+      Align = DL.getABITypeAlignment(LI.getType());
   } else {
     Align = 1;
   }
@@ -1217,7 +1215,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
     Value *ShadowVecAddr =
         IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
     do {
-      Value *CurShadowVecAddr = IRB.CreateConstGEP1_32(ShadowVecAddr, Offset);
+      Value *CurShadowVecAddr =
+          IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
       IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
       Size -= ShadowVecSize;
       ++Offset;
@@ -1225,7 +1224,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
     Offset *= ShadowVecSize;
   }
   while (Size > 0) {
-    Value *CurShadowAddr = IRB.CreateConstGEP1_32(ShadowAddr, Offset);
+    Value *CurShadowAddr =
+        IRB.CreateConstGEP1_32(DFS.ShadowTy, ShadowAddr, Offset);
     IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign);
     --Size;
     ++Offset;
@@ -1233,8 +1233,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
 }
 
 void DFSanVisitor::visitStoreInst(StoreInst &SI) {
-  uint64_t Size =
-      DFSF.DFS.DL->getTypeStoreSize(SI.getValueOperand()->getType());
+  auto &DL = SI.getModule()->getDataLayout();
+  uint64_t Size = DL.getTypeStoreSize(SI.getValueOperand()->getType());
   if (Size == 0)
     return;
 
@@ -1242,7 +1242,7 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) {
   if (ClPreserveAlignment) {
     Align = SI.getAlignment();
     if (Align == 0)
-      Align = DFSF.DFS.DL->getABITypeAlignment(SI.getValueOperand()->getType());
+      Align = DL.getABITypeAlignment(SI.getValueOperand()->getType());
   } else {
     Align = 1;
   }
@@ -1333,10 +1333,10 @@ void DFSanVisitor::visitSelectInst(SelectInst &I) {
 void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
   IRBuilder<> IRB(&I);
   Value *ValShadow = DFSF.getShadow(I.getValue());
-  IRB.CreateCall3(
-      DFSF.DFS.DFSanSetLabelFn, ValShadow,
-      IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)),
-      IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy));
+  IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn,
+                 {ValShadow, IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(
+                                                                *DFSF.DFS.Ctx)),
+                  IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
 }
 
 void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
@@ -1358,8 +1358,8 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
   Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
   DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr);
   SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
-  IRB.CreateCall5(I.getCalledValue(), DestShadow, SrcShadow, LenShadow,
-                  AlignShadow, I.getVolatileCst());
+  IRB.CreateCall(I.getCalledValue(), {DestShadow, SrcShadow, LenShadow,
+                                      AlignShadow, I.getVolatileCst()});
 }
 
 void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
@@ -1473,17 +1473,17 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
           Args.push_back(DFSF.getShadow(*i));
 
         if (FT->isVarArg()) {
-          auto LabelVAAlloca =
-              new AllocaInst(ArrayType::get(DFSF.DFS.ShadowTy,
-                                            CS.arg_size() - FT->getNumParams()),
-                             "labelva", DFSF.F->getEntryBlock().begin());
+          auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy,
+                                           CS.arg_size() - FT->getNumParams());
+          auto *LabelVAAlloca = new AllocaInst(LabelVATy, "labelva",
+                                               DFSF.F->getEntryBlock().begin());
 
           for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) {
-            auto LabelVAPtr = IRB.CreateStructGEP(LabelVAAlloca, n);
+            auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);
             IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr);
           }
 
-          Args.push_back(IRB.CreateStructGEP(LabelVAAlloca, 0));
+          Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
         }
 
         if (!FT->getReturnType()->isVoidTy()) {
@@ -1532,7 +1532,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
         Next = II->getNormalDest()->begin();
       } else {
         BasicBlock *NewBB =
-            SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DFS);
+            SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
         Next = NewBB->begin();
       }
     } else {
@@ -1569,10 +1569,11 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
       ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
       AllocaInst *VarArgShadow =
           new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin());
-      Args.push_back(IRB.CreateConstGEP2_32(VarArgShadow, 0, 0));
+      Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
       for (unsigned n = 0; i != e; ++i, ++n) {
-        IRB.CreateStore(DFSF.getShadow(*i),
-                        IRB.CreateConstGEP2_32(VarArgShadow, 0, n));
+        IRB.CreateStore(
+            DFSF.getShadow(*i),
+            IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n));
         Args.push_back(*i);
       }
     }
@@ -1587,8 +1588,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
     NewCS.setCallingConv(CS.getCallingConv());
     NewCS.setAttributes(CS.getAttributes().removeAttributes(
         *DFSF.DFS.Ctx, AttributeSet::ReturnIndex,
-        AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType(),
-                                         AttributeSet::ReturnIndex)));
+        AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType())));
 
     if (Next) {
       ExtractValueInst *ExVal =
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 60b541f..9a3ed5c 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -57,6 +57,7 @@ GCOVOptions GCOVOptions::getDefault() {
   Options.UseCfgChecksum = false;
   Options.NoRedZone = false;
   Options.FunctionNamesInData = true;
+  Options.ExitBlockBeforeBody = DefaultExitBlockBeforeBody;
 
   if (DefaultGCOVVersion.size() != 4) {
     llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
@@ -72,20 +73,10 @@ namespace {
   class GCOVProfiler : public ModulePass {
   public:
     static char ID;
-    GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) {
-      init();
-    }
-    GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){
+    GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
+    GCOVProfiler(const GCOVOptions &Opts) : ModulePass(ID), Options(Opts) {
       assert((Options.EmitNotes || Options.EmitData) &&
              "GCOVProfiler asked to do nothing?");
-      init();
-    }
-    const char *getPassName() const override {
-      return "GCOV Profiler";
-    }
-
-  private:
-    void init() {
       ReversedVersion[0] = Options.Version[3];
       ReversedVersion[1] = Options.Version[2];
       ReversedVersion[2] = Options.Version[1];
@@ -93,6 +84,11 @@ namespace {
       ReversedVersion[4] = '\0';
       initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
     }
+    const char *getPassName() const override {
+      return "GCOV Profiler";
+    }
+
+  private:
     bool runOnModule(Module &M) override;
 
     // Create the .gcno files for the Module based on DebugInfo.
@@ -130,7 +126,7 @@ namespace {
     Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
     void insertIndirectCounterIncrement();
 
-    std::string mangleName(DICompileUnit CU, const char *NewStem);
+    std::string mangleName(const DICompileUnit *CU, const char *NewStem);
 
     GCOVOptions Options;
 
@@ -153,10 +149,10 @@ ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
   return new GCOVProfiler(Options);
 }
 
-static StringRef getFunctionName(DISubprogram SP) {
-  if (!SP.getLinkageName().empty())
-    return SP.getLinkageName();
-  return SP.getName();
+static StringRef getFunctionName(const DISubprogram *SP) {
+  if (!SP->getLinkageName().empty())
+    return SP->getLinkageName();
+  return SP->getName();
 }
 
 namespace {
@@ -167,7 +163,7 @@ namespace {
     static const char *const BlockTag;
     static const char *const EdgeTag;
 
-    GCOVRecord() {}
+    GCOVRecord() = default;
 
     void writeBytes(const char *Bytes, int Size) {
       os->write(Bytes, Size);
@@ -313,13 +309,13 @@ namespace {
   // object users can construct, the blocks and lines will be rooted here.
   class GCOVFunction : public GCOVRecord {
    public:
-     GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
+     GCOVFunction(const DISubprogram *SP, raw_ostream *os, uint32_t Ident,
                   bool UseCfgChecksum, bool ExitBlockBeforeBody)
          : SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0),
            ReturnBlock(1, os) {
       this->os = os;
 
-      Function *F = SP.getFunction();
+      Function *F = SP->getFunction();
       DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
 
       uint32_t i = 0;
@@ -334,7 +330,7 @@ namespace {
 
       std::string FunctionNameAndLine;
       raw_string_ostream FNLOS(FunctionNameAndLine);
-      FNLOS << getFunctionName(SP) << SP.getLineNumber();
+      FNLOS << getFunctionName(SP) << SP->getLine();
       FNLOS.flush();
       FuncChecksum = hash_value(FunctionNameAndLine);
     }
@@ -370,7 +366,7 @@ namespace {
     void writeOut() {
       writeBytes(FunctionTag, 4);
       uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
-          1 + lengthOfGCOVString(SP.getFilename()) + 1;
+                          1 + lengthOfGCOVString(SP->getFilename()) + 1;
       if (UseCfgChecksum)
         ++BlockLen;
       write(BlockLen);
@@ -379,8 +375,8 @@ namespace {
       if (UseCfgChecksum)
         write(CfgChecksum);
       writeGCOVString(getFunctionName(SP));
-      writeGCOVString(SP.getFilename());
-      write(SP.getLineNumber());
+      writeGCOVString(SP->getFilename());
+      write(SP->getLine());
 
       // Emit count of blocks.
       writeBytes(BlockTag, 4);
@@ -415,7 +411,7 @@ namespace {
     }
 
    private:
-    DISubprogram SP;
+     const DISubprogram *SP;
     uint32_t Ident;
     uint32_t FuncChecksum;
     bool UseCfgChecksum;
@@ -425,7 +421,8 @@ namespace {
   };
 }
 
-std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
+std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
+                                     const char *NewStem) {
   if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
     for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
       MDNode *N = GCov->getOperand(i);
@@ -441,12 +438,12 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
     }
   }
 
-  SmallString<128> Filename = CU.getFilename();
+  SmallString<128> Filename = CU->getFilename();
   sys::path::replace_extension(Filename, NewStem);
   StringRef FName = sys::path::filename(Filename);
   SmallString<128> CurPath;
   if (sys::fs::current_path(CurPath)) return FName;
-  sys::path::append(CurPath, FName.str());
+  sys::path::append(CurPath, FName);
   return CurPath.str();
 }
 
@@ -470,7 +467,8 @@ static bool functionHasLines(Function *F) {
       if (isa<DbgInfoIntrinsic>(I)) continue;
 
       const DebugLoc &Loc = I->getDebugLoc();
-      if (Loc.isUnknown()) continue;
+      if (!Loc)
+        continue;
 
       // Artificial lines such as calls to the global constructors.
       if (Loc.getLine() == 0) continue;
@@ -490,21 +488,14 @@ void GCOVProfiler::emitProfileNotes() {
     // this pass over the original .o's as they're produced, or run it after
     // LTO, we'll generate the same .gcno files.
 
-    DICompileUnit CU(CU_Nodes->getOperand(i));
+    auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i));
     std::error_code EC;
     raw_fd_ostream out(mangleName(CU, "gcno"), EC, sys::fs::F_None);
     std::string EdgeDestinations;
 
-    DIArray SPs = CU.getSubprograms();
     unsigned FunctionIdent = 0;
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-      DISubprogram SP(SPs.getElement(i));
-      assert((!SP || SP.isSubprogram()) &&
-        "A MDNode in subprograms of a CU should be null or a DISubprogram.");
-      if (!SP)
-        continue;
-
-      Function *F = SP.getFunction();
+    for (auto *SP : CU->getSubprograms()) {
+      Function *F = SP->getFunction();
       if (!F) continue;
       if (!functionHasLines(F)) continue;
 
@@ -518,7 +509,7 @@ void GCOVProfiler::emitProfileNotes() {
 
       Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++,
                                                 Options.UseCfgChecksum,
-                                                DefaultExitBlockBeforeBody));
+                                                Options.ExitBlockBeforeBody));
       GCOVFunction &Func = *Funcs.back();
 
       for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
@@ -540,16 +531,18 @@ void GCOVProfiler::emitProfileNotes() {
           if (isa<DbgInfoIntrinsic>(I)) continue;
 
           const DebugLoc &Loc = I->getDebugLoc();
-          if (Loc.isUnknown()) continue;
+          if (!Loc)
+            continue;
 
           // Artificial lines such as calls to the global constructors.
           if (Loc.getLine() == 0) continue;
 
           if (Line == Loc.getLine()) continue;
           Line = Loc.getLine();
-          if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
+          if (SP != getDISubprogram(Loc.getScope()))
+            continue;
 
-          GCOVLines &Lines = Block.getFile(SP.getFilename());
+          GCOVLines &Lines = Block.getFile(SP->getFilename());
           Lines.addLine(Loc.getLine());
         }
       }
@@ -578,16 +571,10 @@ bool GCOVProfiler::emitProfileArcs() {
   bool Result = false;
   bool InsertIndCounterIncrCode = false;
   for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-    DICompileUnit CU(CU_Nodes->getOperand(i));
-    DIArray SPs = CU.getSubprograms();
+    auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i));
     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-      DISubprogram SP(SPs.getElement(i));
-      assert((!SP || SP.isSubprogram()) &&
-        "A MDNode in subprograms of a CU should be null or a DISubprogram.");
-      if (!SP)
-        continue;
-      Function *F = SP.getFunction();
+    for (auto *SP : CU->getSubprograms()) {
+      Function *F = SP->getFunction();
       if (!F) continue;
       if (!functionHasLines(F)) continue;
       if (!Result) Result = true;
@@ -607,7 +594,7 @@ bool GCOVProfiler::emitProfileArcs() {
                            GlobalValue::InternalLinkage,
                            Constant::getNullValue(CounterTy),
                            "__llvm_gcov_ctr");
-      CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
+      CountersBySP.push_back(std::make_pair(Counters, SP));
 
       UniqueVector<BasicBlock *> ComplexEdgePreds;
       UniqueVector<BasicBlock *> ComplexEdgeSuccs;
@@ -632,7 +619,8 @@ bool GCOVProfiler::emitProfileArcs() {
             SmallVector<Value *, 2> Idx;
             Idx.push_back(Builder.getInt64(0));
             Idx.push_back(Sel);
-            Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx);
+            Value *Counter = Builder.CreateInBoundsGEP(Counters->getValueType(),
+                                                       Counters, Idx);
             Value *Count = Builder.CreateLoad(Counter);
             Count = Builder.CreateAdd(Count, Builder.getInt64(1));
             Builder.CreateStore(Count, Counter);
@@ -666,8 +654,8 @@ bool GCOVProfiler::emitProfileArcs() {
 
           // Build code to increment the counter.
           InsertIndCounterIncrCode = true;
-          Builder.CreateCall2(getIncrementIndirectCounterFunc(),
-                              EdgeState, CounterPtrArray);
+          Builder.CreateCall(getIncrementIndirectCounterFunc(),
+                             {EdgeState, CounterPtrArray});
         }
       }
     }
@@ -700,7 +688,7 @@ bool GCOVProfiler::emitProfileArcs() {
     // Initialize the environment and register the local writeout and flush
     // functions.
     Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
-    Builder.CreateCall2(GCOVInit, WriteoutF, FlushF);
+    Builder.CreateCall(GCOVInit, {WriteoutF, FlushF});
     Builder.CreateRetVoid();
 
     appendToGlobalCtors(*M, F, 0);
@@ -859,34 +847,34 @@ Function *GCOVProfiler::insertCounterWriteout(
   NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
   if (CU_Nodes) {
     for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-      DICompileUnit CU(CU_Nodes->getOperand(i));
+      auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i));
       std::string FilenameGcda = mangleName(CU, "gcda");
       uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
-      Builder.CreateCall3(StartFile,
-                          Builder.CreateGlobalStringPtr(FilenameGcda),
+      Builder.CreateCall(StartFile,
+                         {Builder.CreateGlobalStringPtr(FilenameGcda),
                           Builder.CreateGlobalStringPtr(ReversedVersion),
-                          Builder.getInt32(CfgChecksum));
+                          Builder.getInt32(CfgChecksum)});
       for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) {
-        DISubprogram SP(CountersBySP[j].second);
+        auto *SP = cast_or_null<DISubprogram>(CountersBySP[j].second);
         uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
-        Builder.CreateCall5(
-            EmitFunction, Builder.getInt32(j),
-            Options.FunctionNamesInData ?
-              Builder.CreateGlobalStringPtr(getFunctionName(SP)) :
-              Constant::getNullValue(Builder.getInt8PtrTy()),
-            Builder.getInt32(FuncChecksum),
-            Builder.getInt8(Options.UseCfgChecksum),
-            Builder.getInt32(CfgChecksum));
+        Builder.CreateCall(
+            EmitFunction,
+            {Builder.getInt32(j),
+             Options.FunctionNamesInData
+                 ? Builder.CreateGlobalStringPtr(getFunctionName(SP))
+                 : Constant::getNullValue(Builder.getInt8PtrTy()),
+             Builder.getInt32(FuncChecksum),
+             Builder.getInt8(Options.UseCfgChecksum),
+             Builder.getInt32(CfgChecksum)});
 
         GlobalVariable *GV = CountersBySP[j].first;
         unsigned Arcs =
           cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
-        Builder.CreateCall2(EmitArcs,
-                            Builder.getInt32(Arcs),
-                            Builder.CreateConstGEP2_64(GV, 0, 0));
+        Builder.CreateCall(EmitArcs, {Builder.getInt32(Arcs),
+                                      Builder.CreateConstGEP2_64(GV, 0, 0)});
       }
-      Builder.CreateCall(SummaryInfo);
-      Builder.CreateCall(EndFile);
+      Builder.CreateCall(SummaryInfo, {});
+      Builder.CreateCall(EndFile, {});
     }
   }
 
@@ -926,7 +914,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
   Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
   Arg = std::next(Fn->arg_begin());
   Arg->setName("counters");
-  Value *GEP = Builder.CreateGEP(Arg, ZExtPred);
+  Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred);
   Value *Counter = Builder.CreateLoad(GEP, "counter");
   Cond = Builder.CreateICmpEQ(Counter,
                               Constant::getNullValue(
@@ -966,7 +954,7 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
   assert(WriteoutF && "Need to create the writeout function first!");
 
   IRBuilder<> Builder(Entry);
-  Builder.CreateCall(WriteoutF);
+  Builder.CreateCall(WriteoutF, {});
 
   // Zero out the counters.
   for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 2a3d154..610ff52 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -97,7 +97,8 @@ private:
   /// Add uses of our data variables and runtime hook.
   void emitUses();
 
-  /// Create a static initializer for our data, on platforms that need it.
+  /// Create a static initializer for our data, on platforms that need it,
+  /// and for any profile output file that was specified.
   void emitInitialization();
 };
 
@@ -202,6 +203,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
   uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
   LLVMContext &Ctx = M->getContext();
   ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
+  Function *Fn = Inc->getParent()->getParent();
 
   // Create the counters variable.
   auto *Counters = new GlobalVariable(*M, CounterTy, false, Name->getLinkage(),
@@ -210,6 +212,10 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
   Counters->setVisibility(Name->getVisibility());
   Counters->setSection(getCountersSection());
   Counters->setAlignment(8);
+  // Place the counters in the same comdat section as its parent function.
+  // Otherwise, we may get multiple counters for the same function in certain
+  // cases.
+  Counters->setComdat(Fn->getComdat());
 
   RegionCounters[Inc->getName()] = Counters;
 
@@ -234,6 +240,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
   Data->setVisibility(Name->getVisibility());
   Data->setSection(getDataSection());
   Data->setAlignment(8);
+  Data->setComdat(Fn->getComdat());
 
   // Mark the data variable as used so that it isn't stripped out.
   UsedVars.push_back(Data);
@@ -288,6 +295,7 @@ void InstrProfiling::emitRuntimeHook() {
   User->addFnAttr(Attribute::NoInline);
   if (Options.NoRedZone)
     User->addFnAttr(Attribute::NoRedZone);
+  User->setVisibility(GlobalValue::HiddenVisibility);
 
   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
   auto *Load = IRB.CreateLoad(Var);
@@ -327,8 +335,10 @@ void InstrProfiling::emitUses() {
 }
 
 void InstrProfiling::emitInitialization() {
+  std::string InstrProfileOutput = Options.InstrProfileOutput;
+
   Constant *RegisterF = M->getFunction("__llvm_profile_register_functions");
-  if (!RegisterF)
+  if (!RegisterF && InstrProfileOutput.empty())
     return;
 
   // Create the initialization function.
@@ -343,7 +353,24 @@ void InstrProfiling::emitInitialization() {
 
   // Add the basic block and the necessary calls.
   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
-  IRB.CreateCall(RegisterF);
+  if (RegisterF)
+    IRB.CreateCall(RegisterF, {});
+  if (!InstrProfileOutput.empty()) {
+    auto *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
+    auto *SetNameTy = FunctionType::get(VoidTy, Int8PtrTy, false);
+    auto *SetNameF =
+        Function::Create(SetNameTy, GlobalValue::ExternalLinkage,
+                         "__llvm_profile_override_default_filename", M);
+
+    // Create variable for profile name
+    Constant *ProfileNameConst =
+        ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
+    GlobalVariable *ProfileName =
+        new GlobalVariable(*M, ProfileNameConst->getType(), true,
+                           GlobalValue::PrivateLinkage, ProfileNameConst);
+
+    IRB.CreateCall(SetNameF, IRB.CreatePointerCast(ProfileName, Int8PtrTy));
+  }
   IRB.CreateRetVoid();
 
   appendToGlobalCtors(*M, F, 0);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index d7d752f..100824e 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -120,6 +120,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "msan"
 
+static const unsigned kOriginSize = 4;
 static const unsigned kMinOriginAlignment = 4;
 static const unsigned kShadowTLSAlignment = 8;
 
@@ -190,6 +191,9 @@ static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
        cl::desc("Insert checks for constant shadow values"),
        cl::Hidden, cl::init(false));
 
+static const char *const kMsanModuleCtorName = "msan.module_ctor";
+static const char *const kMsanInitName = "__msan_init";
+
 namespace {
 
 // Memory map parameters used in application-to-shadow address calculation.
@@ -209,7 +213,7 @@ struct PlatformMemoryMapParams {
 };
 
 // i386 Linux
-static const MemoryMapParams LinuxMemoryMapParams32 = {
+static const MemoryMapParams Linux_I386_MemoryMapParams = {
   0x000080000000,  // AndMask
   0,               // XorMask (not used)
   0,               // ShadowBase (not used)
@@ -217,15 +221,23 @@ static const MemoryMapParams LinuxMemoryMapParams32 = {
 };
 
 // x86_64 Linux
-static const MemoryMapParams LinuxMemoryMapParams64 = {
+static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
   0x400000000000,  // AndMask
   0,               // XorMask (not used)
   0,               // ShadowBase (not used)
   0x200000000000,  // OriginBase
 };
 
+// mips64 Linux
+static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
+  0x004000000000,  // AndMask
+  0,               // XorMask (not used)
+  0,               // ShadowBase (not used)
+  0x002000000000,  // OriginBase
+};
+
 // i386 FreeBSD
-static const MemoryMapParams FreeBSDMemoryMapParams32 = {
+static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
   0x000180000000,  // AndMask
   0x000040000000,  // XorMask
   0x000020000000,  // ShadowBase
@@ -233,21 +245,26 @@ static const MemoryMapParams FreeBSDMemoryMapParams32 = {
 };
 
 // x86_64 FreeBSD
-static const MemoryMapParams FreeBSDMemoryMapParams64 = {
+static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
   0xc00000000000,  // AndMask
   0x200000000000,  // XorMask
   0x100000000000,  // ShadowBase
   0x380000000000,  // OriginBase
 };
 
-static const PlatformMemoryMapParams LinuxMemoryMapParams = {
-  &LinuxMemoryMapParams32,
-  &LinuxMemoryMapParams64,
+static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
+  &Linux_I386_MemoryMapParams,
+  &Linux_X86_64_MemoryMapParams,
+};
+
+static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
+  NULL,
+  &Linux_MIPS64_MemoryMapParams,
 };
 
-static const PlatformMemoryMapParams FreeBSDMemoryMapParams = {
-  &FreeBSDMemoryMapParams32,
-  &FreeBSDMemoryMapParams64,
+static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
+  &FreeBSD_I386_MemoryMapParams,
+  &FreeBSD_X86_64_MemoryMapParams,
 };
 
 /// \brief An instrumentation pass implementing detection of uninitialized
@@ -260,7 +277,6 @@ class MemorySanitizer : public FunctionPass {
   MemorySanitizer(int TrackOrigins = 0)
       : FunctionPass(ID),
         TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)),
-        DL(nullptr),
         WarningFn(nullptr) {}
   const char *getPassName() const override { return "MemorySanitizer"; }
   bool runOnFunction(Function &F) override;
@@ -273,7 +289,6 @@ class MemorySanitizer : public FunctionPass {
   /// \brief Track origins (allocation points) of uninitialized values.
   int TrackOrigins;
 
-  const DataLayout *DL;
   LLVMContext *C;
   Type *IntptrTy;
   Type *OriginTy;
@@ -320,9 +335,11 @@ class MemorySanitizer : public FunctionPass {
   MDNode *OriginStoreWeights;
   /// \brief An empty volatile inline asm that prevents callback merge.
   InlineAsm *EmptyAsm;
+  Function *MsanCtorFunction;
 
   friend struct MemorySanitizerVisitor;
   friend struct VarArgAMD64Helper;
+  friend struct VarArgMIPS64Helper;
 };
 }  // namespace
 
@@ -434,32 +451,43 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
 ///
 /// inserts a call to __msan_init to the module's constructor list.
 bool MemorySanitizer::doInitialization(Module &M) {
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  if (!DLP)
-    report_fatal_error("data layout missing");
-  DL = &DLP->getDataLayout();
+  auto &DL = M.getDataLayout();
 
   Triple TargetTriple(M.getTargetTriple());
-  const PlatformMemoryMapParams *PlatformMapParams;
-  if (TargetTriple.getOS() == Triple::FreeBSD)
-    PlatformMapParams = &FreeBSDMemoryMapParams;
-  else
-    PlatformMapParams = &LinuxMemoryMapParams;
-
-  C = &(M.getContext());
-  unsigned PtrSize = DL->getPointerSizeInBits(/* AddressSpace */0);
-  switch (PtrSize) {
-    case 64:
-      MapParams = PlatformMapParams->bits64;
+  switch (TargetTriple.getOS()) {
+    case Triple::FreeBSD:
+      switch (TargetTriple.getArch()) {
+        case Triple::x86_64:
+          MapParams = FreeBSD_X86_MemoryMapParams.bits64;
+          break;
+        case Triple::x86:
+          MapParams = FreeBSD_X86_MemoryMapParams.bits32;
+          break;
+        default:
+          report_fatal_error("unsupported architecture");
+      }
       break;
-    case 32:
-      MapParams = PlatformMapParams->bits32;
+    case Triple::Linux:
+      switch (TargetTriple.getArch()) {
+        case Triple::x86_64:
+          MapParams = Linux_X86_MemoryMapParams.bits64;
+          break;
+        case Triple::x86:
+          MapParams = Linux_X86_MemoryMapParams.bits32;
+          break;
+        case Triple::mips64:
+        case Triple::mips64el:
+          MapParams = Linux_MIPS_MemoryMapParams.bits64;
+          break;
+        default:
+          report_fatal_error("unsupported architecture");
+      }
       break;
     default:
-      report_fatal_error("unsupported pointer size");
-      break;
+      report_fatal_error("unsupported operating system");
   }
 
+  C = &(M.getContext());
   IRBuilder<> IRB(*C);
   IntptrTy = IRB.getIntPtrTy(DL);
   OriginTy = IRB.getInt32Ty();
@@ -467,9 +495,12 @@ bool MemorySanitizer::doInitialization(Module &M) {
   ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
 
-  // Insert a call to __msan_init/__msan_track_origins into the module's CTORs.
-  appendToGlobalCtors(M, cast<Function>(M.getOrInsertFunction(
-                      "__msan_init", IRB.getVoidTy(), nullptr)), 0);
+  std::tie(MsanCtorFunction, std::ignore) =
+      createSanitizerCtorAndInitFunctions(M, kMsanModuleCtorName, kMsanInitName,
+                                          /*InitArgTypes=*/{},
+                                          /*InitArgs=*/{});
+
+  appendToGlobalCtors(M, MsanCtorFunction, 0);
 
   if (TrackOrigins)
     new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
@@ -555,8 +586,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
-    bool SanitizeFunction = F.getAttributes().hasAttribute(
-        AttributeSet::FunctionIndex, Attribute::SanitizeMemory);
+    bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
     InsertChecks = SanitizeFunction;
     PropagateShadow = SanitizeFunction;
     PoisonStack = SanitizeFunction && ClPoisonStack;
@@ -575,39 +605,86 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     return IRB.CreateCall(MS.MsanChainOriginFn, V);
   }
 
+  Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
+    if (IntptrSize == kOriginSize) return Origin;
+    assert(IntptrSize == kOriginSize * 2);
+    Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
+    return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
+  }
+
+  /// \brief Fill memory range with the given origin value.
+  void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
+                   unsigned Size, unsigned Alignment) {
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    unsigned IntptrAlignment = DL.getABITypeAlignment(MS.IntptrTy);
+    unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
+    assert(IntptrAlignment >= kMinOriginAlignment);
+    assert(IntptrSize >= kOriginSize);
+
+    unsigned Ofs = 0;
+    unsigned CurrentAlignment = Alignment;
+    if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
+      Value *IntptrOrigin = originToIntptr(IRB, Origin);
+      Value *IntptrOriginPtr =
+          IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
+      for (unsigned i = 0; i < Size / IntptrSize; ++i) {
+        Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
+                       : IntptrOriginPtr;
+        IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
+        Ofs += IntptrSize / kOriginSize;
+        CurrentAlignment = IntptrAlignment;
+      }
+    }
+
+    for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
+      Value *GEP =
+          i ? IRB.CreateConstGEP1_32(nullptr, OriginPtr, i) : OriginPtr;
+      IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
+      CurrentAlignment = kMinOriginAlignment;
+    }
+  }
+
   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
                    unsigned Alignment, bool AsCall) {
+    const DataLayout &DL = F.getParent()->getDataLayout();
     unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
+    unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
     if (isa<StructType>(Shadow->getType())) {
-      IRB.CreateAlignedStore(updateOrigin(Origin, IRB),
-                             getOriginPtr(Addr, IRB, Alignment),
-                             OriginAlignment);
+      paintOrigin(IRB, updateOrigin(Origin, IRB),
+                  getOriginPtr(Addr, IRB, Alignment), StoreSize,
+                  OriginAlignment);
     } else {
       Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
-      // TODO(eugenis): handle non-zero constant shadow by inserting an
-      // unconditional check (can not simply fail compilation as this could
-      // be in the dead code).
-      if (!ClCheckConstantShadow)
-        if (isa<Constant>(ConvertedShadow)) return;
+      Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
+      if (ConstantShadow) {
+        if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
+          paintOrigin(IRB, updateOrigin(Origin, IRB),
+                      getOriginPtr(Addr, IRB, Alignment), StoreSize,
+                      OriginAlignment);
+        return;
+      }
+
       unsigned TypeSizeInBits =
-          MS.DL->getTypeSizeInBits(ConvertedShadow->getType());
+          DL.getTypeSizeInBits(ConvertedShadow->getType());
       unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
       if (AsCall && SizeIndex < kNumberOfAccessSizes) {
         Value *Fn = MS.MaybeStoreOriginFn[SizeIndex];
         Value *ConvertedShadow2 = IRB.CreateZExt(
             ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
-        IRB.CreateCall3(Fn, ConvertedShadow2,
-                        IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
-                        Origin);
+        IRB.CreateCall(Fn, {ConvertedShadow2,
+                            IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
+                            Origin});
       } else {
         Value *Cmp = IRB.CreateICmpNE(
             ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
         Instruction *CheckTerm = SplitBlockAndInsertIfThen(
             Cmp, IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
         IRBuilder<> IRBNew(CheckTerm);
-        IRBNew.CreateAlignedStore(updateOrigin(Origin, IRBNew),
-                                  getOriginPtr(Addr, IRBNew, Alignment),
-                                  OriginAlignment);
+        paintOrigin(IRBNew, updateOrigin(Origin, IRBNew),
+                    getOriginPtr(Addr, IRBNew, Alignment), StoreSize,
+                    OriginAlignment);
       }
     }
   }
@@ -643,19 +720,34 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
     Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
     DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
-    // See the comment in storeOrigin().
-    if (!ClCheckConstantShadow)
-      if (isa<Constant>(ConvertedShadow)) return;
-    unsigned TypeSizeInBits =
-        MS.DL->getTypeSizeInBits(ConvertedShadow->getType());
+
+    Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
+    if (ConstantShadow) {
+      if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
+        if (MS.TrackOrigins) {
+          IRB.CreateStore(Origin ? (Value *)Origin : (Value *)IRB.getInt32(0),
+                          MS.OriginTLS);
+        }
+        IRB.CreateCall(MS.WarningFn, {});
+        IRB.CreateCall(MS.EmptyAsm, {});
+        // FIXME: Insert UnreachableInst if !ClKeepGoing?
+        // This may invalidate some of the following checks and needs to be done
+        // at the very end.
+      }
+      return;
+    }
+
+    const DataLayout &DL = OrigIns->getModule()->getDataLayout();
+
+    unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
     if (AsCall && SizeIndex < kNumberOfAccessSizes) {
       Value *Fn = MS.MaybeWarningFn[SizeIndex];
       Value *ConvertedShadow2 =
           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
-      IRB.CreateCall2(Fn, ConvertedShadow2, MS.TrackOrigins && Origin
+      IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
                                                 ? Origin
-                                                : (Value *)IRB.getInt32(0));
+                                                : (Value *)IRB.getInt32(0)});
     } else {
       Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
                                     getCleanShadow(ConvertedShadow), "_mscmp");
@@ -668,8 +760,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         IRB.CreateStore(Origin ? (Value *)Origin : (Value *)IRB.getInt32(0),
                         MS.OriginTLS);
       }
-      IRB.CreateCall(MS.WarningFn);
-      IRB.CreateCall(MS.EmptyAsm);
+      IRB.CreateCall(MS.WarningFn, {});
+      IRB.CreateCall(MS.EmptyAsm, {});
       DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
     }
   }
@@ -687,7 +779,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   /// \brief Add MemorySanitizer instrumentation to a function.
   bool runOnFunction() {
     MS.initializeCallbacks(*F.getParent());
-    if (!MS.DL) return false;
 
     // In the presence of unreachable blocks, we may see Phi nodes with
     // incoming nodes from such blocks. Since InstVisitor skips unreachable
@@ -743,8 +834,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // This may return weird-sized types like i1.
     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
       return IT;
+    const DataLayout &DL = F.getParent()->getDataLayout();
     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
-      uint32_t EltSize = MS.DL->getTypeSizeInBits(VT->getElementType());
+      uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
       return VectorType::get(IntegerType::get(*MS.C, EltSize),
                              VT->getNumElements());
     }
@@ -760,7 +852,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
       return Res;
     }
-    uint32_t TypeSize = MS.DL->getTypeSizeInBits(OrigTy);
+    uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
     return IntegerType::get(*MS.C, TypeSize);
   }
 
@@ -953,14 +1045,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       Function *F = A->getParent();
       IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI());
       unsigned ArgOffset = 0;
+      const DataLayout &DL = F->getParent()->getDataLayout();
       for (auto &FArg : F->args()) {
         if (!FArg.getType()->isSized()) {
           DEBUG(dbgs() << "Arg is not sized\n");
           continue;
         }
-        unsigned Size = FArg.hasByValAttr()
-          ? MS.DL->getTypeAllocSize(FArg.getType()->getPointerElementType())
-          : MS.DL->getTypeAllocSize(FArg.getType());
+        unsigned Size =
+            FArg.hasByValAttr()
+                ? DL.getTypeAllocSize(FArg.getType()->getPointerElementType())
+                : DL.getTypeAllocSize(FArg.getType());
         if (A == &FArg) {
           bool Overflow = ArgOffset + Size > kParamTLSSize;
           Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
@@ -971,7 +1065,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
             unsigned ArgAlign = FArg.getParamAlignment();
             if (ArgAlign == 0) {
               Type *EltType = A->getType()->getPointerElementType();
-              ArgAlign = MS.DL->getABITypeAlignment(EltType);
+              ArgAlign = DL.getABITypeAlignment(EltType);
             }
             if (Overflow) {
               // ParamTLS overflow.
@@ -1708,11 +1802,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   /// Similar situation exists for memcpy and memset.
   void visitMemMoveInst(MemMoveInst &I) {
     IRBuilder<> IRB(&I);
-    IRB.CreateCall3(
-      MS.MemmoveFn,
-      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
-      IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
-      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+    IRB.CreateCall(
+        MS.MemmoveFn,
+        {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
     I.eraseFromParent();
   }
 
@@ -1722,22 +1816,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   // alignment.
   void visitMemCpyInst(MemCpyInst &I) {
     IRBuilder<> IRB(&I);
-    IRB.CreateCall3(
-      MS.MemcpyFn,
-      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
-      IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
-      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+    IRB.CreateCall(
+        MS.MemcpyFn,
+        {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
     I.eraseFromParent();
   }
 
   // Same as memcpy.
   void visitMemSetInst(MemSetInst &I) {
     IRBuilder<> IRB(&I);
-    IRB.CreateCall3(
-      MS.MemsetFn,
-      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
-      IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
-      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+    IRB.CreateCall(
+        MS.MemsetFn,
+        {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
+         IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
     I.eraseFromParent();
   }
 
@@ -2018,8 +2112,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
     Value *V1 = I.getOperand(0);
     Value *V2 = I.getOperand(1);
-    Value *Shift = IRB.CreateCall2(I.getCalledValue(),
-                                   IRB.CreateBitCast(S1, V1->getType()), V2);
+    Value *Shift = IRB.CreateCall(I.getCalledValue(),
+                                  {IRB.CreateBitCast(S1, V1->getType()), V2});
     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
     setOriginForNaryOp(I);
@@ -2099,7 +2193,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     Function *ShadowFn = Intrinsic::getDeclaration(
         F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
 
-    Value *S = IRB.CreateCall2(ShadowFn, S1_ext, S2_ext, "_msprop_vector_pack");
+    Value *S =
+        IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
     if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
     setShadow(&I, S);
     setOriginForNaryOp(I);
@@ -2178,15 +2273,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     case llvm::Intrinsic::x86_sse_cvttps2pi:
       handleVectorConvertIntrinsic(I, 2);
       break;
-    case llvm::Intrinsic::x86_avx512_psll_dq:
-    case llvm::Intrinsic::x86_avx512_psrl_dq:
     case llvm::Intrinsic::x86_avx2_psll_w:
     case llvm::Intrinsic::x86_avx2_psll_d:
     case llvm::Intrinsic::x86_avx2_psll_q:
     case llvm::Intrinsic::x86_avx2_pslli_w:
     case llvm::Intrinsic::x86_avx2_pslli_d:
     case llvm::Intrinsic::x86_avx2_pslli_q:
-    case llvm::Intrinsic::x86_avx2_psll_dq:
     case llvm::Intrinsic::x86_avx2_psrl_w:
     case llvm::Intrinsic::x86_avx2_psrl_d:
     case llvm::Intrinsic::x86_avx2_psrl_q:
@@ -2197,14 +2289,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     case llvm::Intrinsic::x86_avx2_psrli_q:
     case llvm::Intrinsic::x86_avx2_psrai_w:
     case llvm::Intrinsic::x86_avx2_psrai_d:
-    case llvm::Intrinsic::x86_avx2_psrl_dq:
     case llvm::Intrinsic::x86_sse2_psll_w:
     case llvm::Intrinsic::x86_sse2_psll_d:
     case llvm::Intrinsic::x86_sse2_psll_q:
     case llvm::Intrinsic::x86_sse2_pslli_w:
     case llvm::Intrinsic::x86_sse2_pslli_d:
     case llvm::Intrinsic::x86_sse2_pslli_q:
-    case llvm::Intrinsic::x86_sse2_psll_dq:
     case llvm::Intrinsic::x86_sse2_psrl_w:
     case llvm::Intrinsic::x86_sse2_psrl_d:
     case llvm::Intrinsic::x86_sse2_psrl_q:
@@ -2215,7 +2305,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     case llvm::Intrinsic::x86_sse2_psrli_q:
     case llvm::Intrinsic::x86_sse2_psrai_w:
     case llvm::Intrinsic::x86_sse2_psrai_d:
-    case llvm::Intrinsic::x86_sse2_psrl_dq:
     case llvm::Intrinsic::x86_mmx_psll_w:
     case llvm::Intrinsic::x86_mmx_psll_d:
     case llvm::Intrinsic::x86_mmx_psll_q:
@@ -2247,14 +2336,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       handleVectorShiftIntrinsic(I, /* Variable */ true);
       break;
 
-    // Byte shifts are not implemented.
-    // case llvm::Intrinsic::x86_avx512_psll_dq_bs:
-    // case llvm::Intrinsic::x86_avx512_psrl_dq_bs:
-    // case llvm::Intrinsic::x86_avx2_psll_dq_bs:
-    // case llvm::Intrinsic::x86_avx2_psrl_dq_bs:
-    // case llvm::Intrinsic::x86_sse2_psll_dq_bs:
-    // case llvm::Intrinsic::x86_sse2_psrl_dq_bs:
-
     case llvm::Intrinsic::x86_sse2_packsswb_128:
     case llvm::Intrinsic::x86_sse2_packssdw_128:
     case llvm::Intrinsic::x86_sse2_packuswb_128:
@@ -2356,10 +2437,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       DEBUG(dbgs() << "  Arg#" << i << ": " << *A <<
             " Shadow: " << *ArgShadow << "\n");
       bool ArgIsInitialized = false;
+      const DataLayout &DL = F.getParent()->getDataLayout();
       if (CS.paramHasAttr(i + 1, Attribute::ByVal)) {
         assert(A->getType()->isPointerTy() &&
                "ByVal argument is not a pointer!");
-        Size = MS.DL->getTypeAllocSize(A->getType()->getPointerElementType());
+        Size = DL.getTypeAllocSize(A->getType()->getPointerElementType());
         if (ArgOffset + Size > kParamTLSSize) break;
         unsigned ParamAlignment = CS.getParamAlignment(i + 1);
         unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
@@ -2367,7 +2449,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                                  getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
                                  Size, Alignment);
       } else {
-        Size = MS.DL->getTypeAllocSize(A->getType());
+        Size = DL.getTypeAllocSize(A->getType());
         if (ArgOffset + Size > kParamTLSSize) break;
         Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
                                        kShadowTLSAlignment);
@@ -2460,11 +2542,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setShadow(&I, getCleanShadow(&I));
     setOrigin(&I, getCleanOrigin());
     IRBuilder<> IRB(I.getNextNode());
-    uint64_t Size = MS.DL->getTypeAllocSize(I.getAllocatedType());
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    uint64_t Size = DL.getTypeAllocSize(I.getAllocatedType());
     if (PoisonStack && ClPoisonStackWithCall) {
-      IRB.CreateCall2(MS.MsanPoisonStackFn,
-                      IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
-                      ConstantInt::get(MS.IntptrTy, Size));
+      IRB.CreateCall(MS.MsanPoisonStackFn,
+                     {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
+                      ConstantInt::get(MS.IntptrTy, Size)});
     } else {
       Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB);
       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
@@ -2484,11 +2567,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
           createPrivateNonConstGlobalForString(*F.getParent(),
                                                StackDescription.str());
 
-      IRB.CreateCall4(MS.MsanSetAllocaOrigin4Fn,
-                      IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
+      IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
+                     {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
                       ConstantInt::get(MS.IntptrTy, Size),
                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
-                      IRB.CreatePointerCast(&F, MS.IntptrTy));
+                      IRB.CreatePointerCast(&F, MS.IntptrTy)});
     }
   }
 
@@ -2652,6 +2735,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
     unsigned GpOffset = 0;
     unsigned FpOffset = AMD64GpEndOffset;
     unsigned OverflowOffset = AMD64FpEndOffset;
+    const DataLayout &DL = F.getParent()->getDataLayout();
     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
          ArgIt != End; ++ArgIt) {
       Value *A = *ArgIt;
@@ -2661,7 +2745,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
         // ByVal arguments always go to the overflow area.
         assert(A->getType()->isPointerTy());
         Type *RealTy = A->getType()->getPointerElementType();
-        uint64_t ArgSize = MS.DL->getTypeAllocSize(RealTy);
+        uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
         Value *Base = getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset);
         OverflowOffset += RoundUpToAlignment(ArgSize, 8);
         IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB),
@@ -2683,7 +2767,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
             FpOffset += 16;
             break;
           case AK_Memory:
-            uint64_t ArgSize = MS.DL->getTypeAllocSize(A->getType());
+            uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
             Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
             OverflowOffset += RoundUpToAlignment(ArgSize, 8);
         }
@@ -2768,12 +2852,114 @@ struct VarArgAMD64Helper : public VarArgHelper {
       Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
       Value *OverflowArgAreaShadowPtr =
         MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
-      Value *SrcPtr = IRB.CreateConstGEP1_32(VAArgTLSCopy, AMD64FpEndOffset);
+      Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
+                                             AMD64FpEndOffset);
       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
     }
   }
 };
 
+/// \brief MIPS64-specific implementation of VarArgHelper.
+struct VarArgMIPS64Helper : public VarArgHelper {
+  Function &F;
+  MemorySanitizer &MS;
+  MemorySanitizerVisitor &MSV;
+  Value *VAArgTLSCopy;
+  Value *VAArgSize;
+
+  SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+  VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
+                    MemorySanitizerVisitor &MSV)
+    : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr),
+      VAArgSize(nullptr) {}
+
+  void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
+    unsigned VAArgOffset = 0;
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    for (CallSite::arg_iterator ArgIt = CS.arg_begin() + 1, End = CS.arg_end();
+         ArgIt != End; ++ArgIt) {
+      Value *A = *ArgIt;
+      Value *Base;
+      uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+#if defined(__MIPSEB__) || defined(MIPSEB)
+      // Adjusting the shadow for argument with size < 8 to match the placement
+      // of bits in big endian system
+      if (ArgSize < 8)
+        VAArgOffset += (8 - ArgSize);
+#endif
+      Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset);
+      VAArgOffset += ArgSize;
+      VAArgOffset = RoundUpToAlignment(VAArgOffset, 8);
+      IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+    }
+
+    Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
+    // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
+    // a new class member i.e. it is the total size of all VarArgs.
+    IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
+  }
+
+  /// \brief Compute the shadow address for a given va_arg.
+  Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+                                   int ArgOffset) {
+    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+                              "_msarg");
+  }
+
+  void visitVAStartInst(VAStartInst &I) override {
+    IRBuilder<> IRB(&I);
+    VAStartInstrumentationList.push_back(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */8, /* alignment */8, false);
+  }
+
+  void visitVACopyInst(VACopyInst &I) override {
+    IRBuilder<> IRB(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+    // Unpoison the whole __va_list_tag.
+    // FIXME: magic ABI constants.
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */8, /* alignment */8, false);
+  }
+
+  void finalizeInstrumentation() override {
+    assert(!VAArgSize && !VAArgTLSCopy &&
+           "finalizeInstrumentation called twice");
+    IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+    VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+    Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
+                                    VAArgSize);
+
+    if (!VAStartInstrumentationList.empty()) {
+      // If there is a va_start in this function, make a backup copy of
+      // va_arg_tls somewhere in the function entry block.
+      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+      IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8);
+    }
+
+    // Instrument va_start.
+    // Copy va_list shadow from the backup copy of the TLS contents.
+    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+      CallInst *OrigInst = VAStartInstrumentationList[i];
+      IRBuilder<> IRB(OrigInst->getNextNode());
+      Value *VAListTag = OrigInst->getArgOperand(0);
+      Value *RegSaveAreaPtrPtr =
+        IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+                        Type::getInt64PtrTy(*MS.C));
+      Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+      Value *RegSaveAreaShadowPtr =
+      MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB);
+      IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, 8);
+    }
+  }
+};
+
 /// \brief A no-op implementation of VarArgHelper.
 struct VarArgNoOpHelper : public VarArgHelper {
   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
@@ -2795,6 +2981,9 @@ VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
   llvm::Triple TargetTriple(Func.getParent()->getTargetTriple());
   if (TargetTriple.getArch() == llvm::Triple::x86_64)
     return new VarArgAMD64Helper(Func, Msan, Visitor);
+  else if (TargetTriple.getArch() == llvm::Triple::mips64 ||
+           TargetTriple.getArch() == llvm::Triple::mips64el)
+    return new VarArgMIPS64Helper(Func, Msan, Visitor);
   else
     return new VarArgNoOpHelper(Func, Msan, Visitor);
 }
@@ -2802,6 +2991,8 @@ VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
 }  // namespace
 
 bool MemorySanitizer::runOnFunction(Function &F) {
+  if (&F == MsanCtorFunction)
+    return false;
   MemorySanitizerVisitor Visitor(F, *this);
 
   // Clear out readonly/readnone attributes.
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index c048a99..f6ae0c2 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -11,19 +11,17 @@
 // and potentially with other Sanitizers.
 //
 // We create a Guard variable with the same linkage
-// as the function and inject this code into the entry block (CoverageLevel=1)
-// or all blocks (CoverageLevel>=2):
+// as the function and inject this code into the entry block (SCK_Function)
+// or all blocks (SCK_BB):
 // if (Guard < 0) {
 //    __sanitizer_cov(&Guard);
 // }
 // The accesses to Guard are atomic. The rest of the logic is
 // in __sanitizer_cov (it's fine to call it more than once).
 //
-// With CoverageLevel>=3 we also split critical edges this effectively
+// With SCK_Edge we also split critical edges this effectively
 // instrumenting all edges.
 //
-// CoverageLevel>=4 add indirect call profiling implented as a function call.
-//
 // This coverage implementation provides very limited data:
 // it only tells if a given function (block) was ever executed. No counters.
 // But for many use cases this is what we need and the added slowdown small.
@@ -55,11 +53,13 @@ using namespace llvm;
 
 static const char *const kSanCovModuleInitName = "__sanitizer_cov_module_init";
 static const char *const kSanCovName = "__sanitizer_cov";
+static const char *const kSanCovWithCheckName = "__sanitizer_cov_with_check";
 static const char *const kSanCovIndirCallName = "__sanitizer_cov_indir_call16";
 static const char *const kSanCovTraceEnter = "__sanitizer_cov_trace_func_enter";
 static const char *const kSanCovTraceBB = "__sanitizer_cov_trace_basic_block";
+static const char *const kSanCovTraceCmp = "__sanitizer_cov_trace_cmp";
 static const char *const kSanCovModuleCtorName = "sancov.module_ctor";
-static const uint64_t    kSanCtorAndDtorPriority = 1;
+static const uint64_t    kSanCtorAndDtorPriority = 2;
 
 static cl::opt<int> ClCoverageLevel("sanitizer-coverage-level",
        cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, "
@@ -67,11 +67,11 @@ static cl::opt<int> ClCoverageLevel("sanitizer-coverage-level",
                 "4: above plus indirect calls"),
        cl::Hidden, cl::init(0));
 
-static cl::opt<int> ClCoverageBlockThreshold(
+static cl::opt<unsigned> ClCoverageBlockThreshold(
     "sanitizer-coverage-block-threshold",
-    cl::desc("Add coverage instrumentation only to the entry block if there "
-             "are more than this number of blocks."),
-    cl::Hidden, cl::init(1500));
+    cl::desc("Use a callback with a guard check inside it if there are"
+             " more than this number of blocks."),
+    cl::Hidden, cl::init(500));
 
 static cl::opt<bool>
     ClExperimentalTracing("sanitizer-coverage-experimental-tracing",
@@ -79,13 +79,63 @@ static cl::opt<bool>
                                    "callbacks at every basic block"),
                           cl::Hidden, cl::init(false));
 
+static cl::opt<bool>
+    ClExperimentalCMPTracing("sanitizer-coverage-experimental-trace-compares",
+                             cl::desc("Experimental tracing of CMP and similar "
+                                      "instructions"),
+                             cl::Hidden, cl::init(false));
+
+// Experimental 8-bit counters used as an additional search heuristic during
+// coverage-guided fuzzing.
+// The counters are not thread-friendly:
+//   - contention on these counters may cause significant slowdown;
+//   - the counter updates are racy and the results may be inaccurate.
+// They are also inaccurate due to 8-bit integer overflow.
+static cl::opt<bool> ClUse8bitCounters("sanitizer-coverage-8bit-counters",
+                                       cl::desc("Experimental 8-bit counters"),
+                                       cl::Hidden, cl::init(false));
+
 namespace {
 
+SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) {
+  SanitizerCoverageOptions Res;
+  switch (LegacyCoverageLevel) {
+  case 0:
+    Res.CoverageType = SanitizerCoverageOptions::SCK_None;
+    break;
+  case 1:
+    Res.CoverageType = SanitizerCoverageOptions::SCK_Function;
+    break;
+  case 2:
+    Res.CoverageType = SanitizerCoverageOptions::SCK_BB;
+    break;
+  case 3:
+    Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
+    break;
+  case 4:
+    Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
+    Res.IndirectCalls = true;
+    break;
+  }
+  return Res;
+}
+
+SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
+  // Sets CoverageType and IndirectCalls.
+  SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel);
+  Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType);
+  Options.IndirectCalls |= CLOpts.IndirectCalls;
+  Options.TraceBB |= ClExperimentalTracing;
+  Options.TraceCmp |= ClExperimentalCMPTracing;
+  Options.Use8bitCounters |= ClUse8bitCounters;
+  return Options;
+}
+
 class SanitizerCoverageModule : public ModulePass {
  public:
-   SanitizerCoverageModule(int CoverageLevel = 0)
-       : ModulePass(ID),
-         CoverageLevel(std::max(CoverageLevel, (int)ClCoverageLevel)) {}
+  SanitizerCoverageModule(
+      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions())
+      : ModulePass(ID), Options(OverrideFromCL(Options)) {}
   bool runOnModule(Module &M) override;
   bool runOnFunction(Function &F);
   static char ID;  // Pass identification, replacement for typeid
@@ -93,104 +143,135 @@ class SanitizerCoverageModule : public ModulePass {
     return "SanitizerCoverageModule";
   }
 
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<DataLayoutPass>();
-  }
-
  private:
   void InjectCoverageForIndirectCalls(Function &F,
                                       ArrayRef<Instruction *> IndirCalls);
-  bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks,
-                      ArrayRef<Instruction *> IndirCalls);
-  void InjectCoverageAtBlock(Function &F, BasicBlock &BB);
+  void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets);
+  bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+  void SetNoSanitizeMetadata(Instruction *I);
+  void InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls);
+  unsigned NumberOfInstrumentedBlocks() {
+    return SanCovFunction->getNumUses() + SanCovWithCheckFunction->getNumUses();
+  }
   Function *SanCovFunction;
+  Function *SanCovWithCheckFunction;
   Function *SanCovIndirCallFunction;
-  Function *SanCovModuleInit;
   Function *SanCovTraceEnter, *SanCovTraceBB;
+  Function *SanCovTraceCmpFunction;
   InlineAsm *EmptyAsm;
-  Type *IntptrTy;
+  Type *IntptrTy, *Int64Ty;
   LLVMContext *C;
+  const DataLayout *DL;
 
   GlobalVariable *GuardArray;
+  GlobalVariable *EightBitCounterArray;
 
-  int CoverageLevel;
+  SanitizerCoverageOptions Options;
 };
 
 }  // namespace
 
-static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
-  if (Function *F = dyn_cast<Function>(FuncOrBitcast))
-     return F;
-  std::string Err;
-  raw_string_ostream Stream(Err);
-  Stream << "SanitizerCoverage interface function redefined: "
-         << *FuncOrBitcast;
-  report_fatal_error(Err);
-}
-
 bool SanitizerCoverageModule::runOnModule(Module &M) {
-  if (!CoverageLevel) return false;
+  if (Options.CoverageType == SanitizerCoverageOptions::SCK_None)
+    return false;
   C = &(M.getContext());
-  DataLayoutPass *DLP = &getAnalysis<DataLayoutPass>();
-  IntptrTy = Type::getIntNTy(*C, DLP->getDataLayout().getPointerSizeInBits());
+  DL = &M.getDataLayout();
+  IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
   Type *VoidTy = Type::getVoidTy(*C);
   IRBuilder<> IRB(*C);
+  Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
   Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
+  Int64Ty = IRB.getInt64Ty();
 
-  Function *CtorFunc =
-      Function::Create(FunctionType::get(VoidTy, false),
-                       GlobalValue::InternalLinkage, kSanCovModuleCtorName, &M);
-  ReturnInst::Create(*C, BasicBlock::Create(*C, "", CtorFunc));
-  appendToGlobalCtors(M, CtorFunc, kSanCtorAndDtorPriority);
-
-  SanCovFunction = checkInterfaceFunction(
+  SanCovFunction = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kSanCovName, VoidTy, Int32PtrTy, nullptr));
-  SanCovIndirCallFunction = checkInterfaceFunction(M.getOrInsertFunction(
-      kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr));
-  SanCovModuleInit = checkInterfaceFunction(
-      M.getOrInsertFunction(kSanCovModuleInitName, Type::getVoidTy(*C),
-                            Int32PtrTy, IntptrTy, nullptr));
-  SanCovModuleInit->setLinkage(Function::ExternalLinkage);
+  SanCovWithCheckFunction = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction(kSanCovWithCheckName, VoidTy, Int32PtrTy, nullptr));
+  SanCovIndirCallFunction =
+      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+          kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr));
+  SanCovTraceCmpFunction =
+      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+          kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr));
+
   // We insert an empty inline asm after cov callbacks to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
                             StringRef(""), StringRef(""),
                             /*hasSideEffects=*/true);
 
-  if (ClExperimentalTracing) {
-    SanCovTraceEnter = checkInterfaceFunction(
+  if (Options.TraceBB) {
+    SanCovTraceEnter = checkSanitizerInterfaceFunction(
         M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr));
-    SanCovTraceBB = checkInterfaceFunction(
+    SanCovTraceBB = checkSanitizerInterfaceFunction(
         M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr));
   }
 
   // At this point we create a dummy array of guards because we don't
   // know how many elements we will need.
   Type *Int32Ty = IRB.getInt32Ty();
+  Type *Int8Ty = IRB.getInt8Ty();
+
   GuardArray =
       new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
                          nullptr, "__sancov_gen_cov_tmp");
+  if (Options.Use8bitCounters)
+    EightBitCounterArray =
+        new GlobalVariable(M, Int8Ty, false, GlobalVariable::ExternalLinkage,
+                           nullptr, "__sancov_gen_cov_tmp");
 
   for (auto &F : M)
     runOnFunction(F);
 
+  auto N = NumberOfInstrumentedBlocks();
+
   // Now we know how many elements we need. Create an array of guards
   // with one extra element at the beginning for the size.
-  Type *Int32ArrayNTy =
-      ArrayType::get(Int32Ty, SanCovFunction->getNumUses() + 1);
+  Type *Int32ArrayNTy = ArrayType::get(Int32Ty, N + 1);
   GlobalVariable *RealGuardArray = new GlobalVariable(
       M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage,
       Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov");
 
+
   // Replace the dummy array with the real one.
   GuardArray->replaceAllUsesWith(
       IRB.CreatePointerCast(RealGuardArray, Int32PtrTy));
   GuardArray->eraseFromParent();
 
-  // Call __sanitizer_cov_module_init
-  IRB.SetInsertPoint(CtorFunc->getEntryBlock().getTerminator());
-  IRB.CreateCall2(SanCovModuleInit,
-                  IRB.CreatePointerCast(RealGuardArray, Int32PtrTy),
-                  ConstantInt::get(IntptrTy, SanCovFunction->getNumUses()));
+  GlobalVariable *RealEightBitCounterArray;
+  if (Options.Use8bitCounters) {
+    // Make sure the array is 16-aligned.
+    static const int kCounterAlignment = 16;
+    Type *Int8ArrayNTy =
+        ArrayType::get(Int8Ty, RoundUpToAlignment(N, kCounterAlignment));
+    RealEightBitCounterArray = new GlobalVariable(
+        M, Int8ArrayNTy, false, GlobalValue::PrivateLinkage,
+        Constant::getNullValue(Int8ArrayNTy), "__sancov_gen_cov_counter");
+    RealEightBitCounterArray->setAlignment(kCounterAlignment);
+    EightBitCounterArray->replaceAllUsesWith(
+        IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy));
+    EightBitCounterArray->eraseFromParent();
+  }
+
+  // Create variable for module (compilation unit) name
+  Constant *ModNameStrConst =
+      ConstantDataArray::getString(M.getContext(), M.getName(), true);
+  GlobalVariable *ModuleName =
+      new GlobalVariable(M, ModNameStrConst->getType(), true,
+                         GlobalValue::PrivateLinkage, ModNameStrConst);
+
+  Function *CtorFunc;
+  std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
+      M, kSanCovModuleCtorName, kSanCovModuleInitName,
+      {Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy},
+      {IRB.CreatePointerCast(RealGuardArray, Int32PtrTy),
+       ConstantInt::get(IntptrTy, N),
+       Options.Use8bitCounters
+           ? IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy)
+           : Constant::getNullValue(Int8PtrTy),
+       IRB.CreatePointerCast(ModuleName, Int8PtrTy)});
+
+  appendToGlobalCtors(M, CtorFunc, kSanCtorAndDtorPriority);
+
   return true;
 }
 
@@ -198,38 +279,44 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
   if (F.empty()) return false;
   if (F.getName().find(".module_ctor") != std::string::npos)
     return false;  // Should not instrument sanitizer init functions.
-  if (CoverageLevel >= 3)
-    SplitAllCriticalEdges(F, this);
+  if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
+    SplitAllCriticalEdges(F);
   SmallVector<Instruction*, 8> IndirCalls;
   SmallVector<BasicBlock*, 16> AllBlocks;
+  SmallVector<Instruction*, 8> CmpTraceTargets;
   for (auto &BB : F) {
     AllBlocks.push_back(&BB);
-    if (CoverageLevel >= 4)
-      for (auto &Inst : BB) {
+    for (auto &Inst : BB) {
+      if (Options.IndirectCalls) {
         CallSite CS(&Inst);
         if (CS && !CS.getCalledFunction())
           IndirCalls.push_back(&Inst);
       }
+      if (Options.TraceCmp && isa<ICmpInst>(&Inst))
+        CmpTraceTargets.push_back(&Inst);
+    }
   }
-  InjectCoverage(F, AllBlocks, IndirCalls);
+  InjectCoverage(F, AllBlocks);
+  InjectCoverageForIndirectCalls(F, IndirCalls);
+  InjectTraceForCmp(F, CmpTraceTargets);
   return true;
 }
 
-bool
-SanitizerCoverageModule::InjectCoverage(Function &F,
-                                        ArrayRef<BasicBlock *> AllBlocks,
-                                        ArrayRef<Instruction *> IndirCalls) {
-  if (!CoverageLevel) return false;
-
-  if (CoverageLevel == 1 ||
-      (unsigned)ClCoverageBlockThreshold < AllBlocks.size()) {
-    InjectCoverageAtBlock(F, F.getEntryBlock());
-  } else {
+bool SanitizerCoverageModule::InjectCoverage(Function &F,
+                                             ArrayRef<BasicBlock *> AllBlocks) {
+  switch (Options.CoverageType) {
+  case SanitizerCoverageOptions::SCK_None:
+    return false;
+  case SanitizerCoverageOptions::SCK_Function:
+    InjectCoverageAtBlock(F, F.getEntryBlock(), false);
+    return true;
+  default: {
+    bool UseCalls = ClCoverageBlockThreshold < AllBlocks.size();
     for (auto BB : AllBlocks)
-      InjectCoverageAtBlock(F, *BB);
+      InjectCoverageAtBlock(F, *BB, UseCalls);
+    return true;
+  }
   }
-  InjectCoverageForIndirectCalls(F, IndirCalls);
-  return true;
 }
 
 // On every indirect call we call a run-time function
@@ -249,19 +336,44 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
     IRBuilder<> IRB(I);
     CallSite CS(I);
     Value *Callee = CS.getCalledValue();
-    if (dyn_cast<InlineAsm>(Callee)) continue;
+    if (isa<InlineAsm>(Callee)) continue;
     GlobalVariable *CalleeCache = new GlobalVariable(
         *F.getParent(), Ty, false, GlobalValue::PrivateLinkage,
         Constant::getNullValue(Ty), "__sancov_gen_callee_cache");
     CalleeCache->setAlignment(kCacheAlignment);
-    IRB.CreateCall2(SanCovIndirCallFunction,
-                    IRB.CreatePointerCast(Callee, IntptrTy),
-                    IRB.CreatePointerCast(CalleeCache, IntptrTy));
+    IRB.CreateCall(SanCovIndirCallFunction,
+                   {IRB.CreatePointerCast(Callee, IntptrTy),
+                    IRB.CreatePointerCast(CalleeCache, IntptrTy)});
+  }
+}
+
+void SanitizerCoverageModule::InjectTraceForCmp(
+    Function &F, ArrayRef<Instruction *> CmpTraceTargets) {
+  for (auto I : CmpTraceTargets) {
+    if (ICmpInst *ICMP = dyn_cast<ICmpInst>(I)) {
+      IRBuilder<> IRB(ICMP);
+      Value *A0 = ICMP->getOperand(0);
+      Value *A1 = ICMP->getOperand(1);
+      if (!A0->getType()->isIntegerTy()) continue;
+      uint64_t TypeSize = DL->getTypeStoreSizeInBits(A0->getType());
+      // __sanitizer_cov_trace_cmp((type_size << 32) | predicate, A0, A1);
+      IRB.CreateCall(
+          SanCovTraceCmpFunction,
+          {ConstantInt::get(Int64Ty, (TypeSize << 32) | ICMP->getPredicate()),
+           IRB.CreateIntCast(A0, Int64Ty, true),
+           IRB.CreateIntCast(A1, Int64Ty, true)});
+    }
   }
 }
 
-void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F,
-                                                    BasicBlock &BB) {
+void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) {
+  I->setMetadata(
+      I->getParent()->getParent()->getParent()->getMDKindID("nosanitize"),
+      MDNode::get(*C, None));
+}
+
+void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
+                                                    bool UseCalls) {
   BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
   // Skip static allocas at the top of the entry block so they don't become
   // dynamic when we split the block.  If we used our optimized stack layout,
@@ -273,31 +385,48 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F,
   }
 
   bool IsEntryBB = &BB == &F.getEntryBlock();
-  DebugLoc EntryLoc =
-      IsEntryBB ? IP->getDebugLoc().getFnDebugLoc(*C) : IP->getDebugLoc();
+  DebugLoc EntryLoc = IsEntryBB && IP->getDebugLoc()
+                          ? IP->getDebugLoc().getFnDebugLoc()
+                          : IP->getDebugLoc();
   IRBuilder<> IRB(IP);
   IRB.SetCurrentDebugLocation(EntryLoc);
   SmallVector<Value *, 1> Indices;
   Value *GuardP = IRB.CreateAdd(
       IRB.CreatePointerCast(GuardArray, IntptrTy),
-      ConstantInt::get(IntptrTy, (1 + SanCovFunction->getNumUses()) * 4));
+      ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
   Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
   GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
-  LoadInst *Load = IRB.CreateLoad(GuardP);
-  Load->setAtomic(Monotonic);
-  Load->setAlignment(4);
-  Load->setMetadata(F.getParent()->getMDKindID("nosanitize"),
-                    MDNode::get(*C, None));
-  Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
-  Instruction *Ins = SplitBlockAndInsertIfThen(
-      Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
-  IRB.SetInsertPoint(Ins);
-  IRB.SetCurrentDebugLocation(EntryLoc);
-  // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
-  IRB.CreateCall(SanCovFunction, GuardP);
-  IRB.CreateCall(EmptyAsm);  // Avoids callback merge.
+  if (UseCalls) {
+    IRB.CreateCall(SanCovWithCheckFunction, GuardP);
+  } else {
+    LoadInst *Load = IRB.CreateLoad(GuardP);
+    Load->setAtomic(Monotonic);
+    Load->setAlignment(4);
+    SetNoSanitizeMetadata(Load);
+    Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
+    Instruction *Ins = SplitBlockAndInsertIfThen(
+        Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
+    IRB.SetInsertPoint(Ins);
+    IRB.SetCurrentDebugLocation(EntryLoc);
+    // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
+    IRB.CreateCall(SanCovFunction, GuardP);
+    IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
+  }
+
+  if (Options.Use8bitCounters) {
+    IRB.SetInsertPoint(IP);
+    Value *P = IRB.CreateAdd(
+        IRB.CreatePointerCast(EightBitCounterArray, IntptrTy),
+        ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1));
+    P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy());
+    LoadInst *LI = IRB.CreateLoad(P);
+    Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1));
+    StoreInst *SI = IRB.CreateStore(Inc, P);
+    SetNoSanitizeMetadata(LI);
+    SetNoSanitizeMetadata(SI);
+  }
 
-  if (ClExperimentalTracing) {
+  if (Options.TraceBB) {
     // Experimental support for tracing.
     // Insert a callback with the same guard variable as used for coverage.
     IRB.SetInsertPoint(IP);
@@ -309,6 +438,7 @@ char SanitizerCoverageModule::ID = 0;
 INITIALIZE_PASS(SanitizerCoverageModule, "sancov",
     "SanitizerCoverage: TODO."
     "ModulePass", false, false)
-ModulePass *llvm::createSanitizerCoverageModulePass(int CoverageLevel) {
-  return new SanitizerCoverageModule(CoverageLevel);
+ModulePass *llvm::createSanitizerCoverageModulePass(
+    const SanitizerCoverageOptions &Options) {
+  return new SanitizerCoverageModule(Options);
 }
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 1b86ae5..1a46bbb 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -25,6 +25,8 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
@@ -68,12 +70,16 @@ STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads");
 STATISTIC(NumOmittedReadsFromConstantGlobals,
           "Number of reads from constant globals");
 STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads");
+STATISTIC(NumOmittedNonCaptured, "Number of accesses ignored due to capturing");
+
+static const char *const kTsanModuleCtorName = "tsan.module_ctor";
+static const char *const kTsanInitName = "__tsan_init";
 
 namespace {
 
 /// ThreadSanitizer: instrument the code in module to find races.
 struct ThreadSanitizer : public FunctionPass {
-  ThreadSanitizer() : FunctionPass(ID), DL(nullptr) {}
+  ThreadSanitizer() : FunctionPass(ID) {}
   const char *getPassName() const override;
   bool runOnFunction(Function &F) override;
   bool doInitialization(Module &M) override;
@@ -81,15 +87,15 @@ struct ThreadSanitizer : public FunctionPass {
 
  private:
   void initializeCallbacks(Module &M);
-  bool instrumentLoadOrStore(Instruction *I);
-  bool instrumentAtomic(Instruction *I);
+  bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
+  bool instrumentAtomic(Instruction *I, const DataLayout &DL);
   bool instrumentMemIntrinsic(Instruction *I);
-  void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local,
-                                      SmallVectorImpl<Instruction*> &All);
+  void chooseInstructionsToInstrument(SmallVectorImpl<Instruction *> &Local,
+                                      SmallVectorImpl<Instruction *> &All,
+                                      const DataLayout &DL);
   bool addrPointsToConstantData(Value *Addr);
-  int getMemoryAccessFuncIndex(Value *Addr);
+  int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
 
-  const DataLayout *DL;
   Type *IntptrTy;
   IntegerType *OrdTy;
   // Callbacks to run-time library are computed in doInitialization.
@@ -99,6 +105,8 @@ struct ThreadSanitizer : public FunctionPass {
   static const size_t kNumberOfAccessSizes = 5;
   Function *TsanRead[kNumberOfAccessSizes];
   Function *TsanWrite[kNumberOfAccessSizes];
+  Function *TsanUnalignedRead[kNumberOfAccessSizes];
+  Function *TsanUnalignedWrite[kNumberOfAccessSizes];
   Function *TsanAtomicLoad[kNumberOfAccessSizes];
   Function *TsanAtomicStore[kNumberOfAccessSizes];
   Function *TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1][kNumberOfAccessSizes];
@@ -108,6 +116,7 @@ struct ThreadSanitizer : public FunctionPass {
   Function *TsanVptrUpdate;
   Function *TsanVptrLoad;
   Function *MemmoveFn, *MemcpyFn, *MemsetFn;
+  Function *TsanCtorFunction;
 };
 }  // namespace
 
@@ -124,44 +133,48 @@ FunctionPass *llvm::createThreadSanitizerPass() {
   return new ThreadSanitizer();
 }
 
-static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
-  if (Function *F = dyn_cast<Function>(FuncOrBitcast))
-     return F;
-  FuncOrBitcast->dump();
-  report_fatal_error("ThreadSanitizer interface function redefined");
-}
-
 void ThreadSanitizer::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(M.getContext());
   // Initialize the callbacks.
-  TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction(
+  TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
-  TsanFuncExit = checkInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_func_exit", IRB.getVoidTy(), nullptr));
+  TsanFuncExit = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), nullptr));
   OrdTy = IRB.getInt32Ty();
   for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
     const size_t ByteSize = 1 << i;
     const size_t BitSize = ByteSize * 8;
     SmallString<32> ReadName("__tsan_read" + itostr(ByteSize));
-    TsanRead[i] = checkInterfaceFunction(M.getOrInsertFunction(
+    TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
         ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     SmallString<32> WriteName("__tsan_write" + itostr(ByteSize));
-    TsanWrite[i] = checkInterfaceFunction(M.getOrInsertFunction(
+    TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
         WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
+    SmallString<64> UnalignedReadName("__tsan_unaligned_read" +
+        itostr(ByteSize));
+    TsanUnalignedRead[i] =
+        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+            UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+
+    SmallString<64> UnalignedWriteName("__tsan_unaligned_write" +
+        itostr(ByteSize));
+    TsanUnalignedWrite[i] =
+        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+            UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+
     Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) +
                                    "_load");
-    TsanAtomicLoad[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));
+    TsanAtomicLoad[i] = checkSanitizerInterfaceFunction(
+        M.getOrInsertFunction(AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));
 
     SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) +
                                     "_store");
-    TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy,
-        nullptr));
+    TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+        AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr));
 
     for (int op = AtomicRMWInst::FIRST_BINOP;
         op <= AtomicRMWInst::LAST_BINOP; ++op) {
@@ -184,48 +197,44 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
       else
         continue;
       SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
-      TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction(
-          RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));
+      TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction(
+          M.getOrInsertFunction(RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));
     }
 
     SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) +
                                   "_compare_exchange_val");
-    TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction(
+    TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
         AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr));
   }
-  TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(),
-      IRB.getInt8PtrTy(), nullptr));
-  TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction(
+  TsanVptrUpdate = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(),
+                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), nullptr));
+  TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
-  TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction(
+  TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, nullptr));
-  TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction(
+  TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, nullptr));
 
-  MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction(
-    "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
-    IRB.getInt8PtrTy(), IntptrTy, nullptr));
-  MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction(
-    "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
-    IntptrTy, nullptr));
-  MemsetFn = checkInterfaceFunction(M.getOrInsertFunction(
-    "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
-    IntptrTy, nullptr));
+  MemmoveFn = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                            IRB.getInt8PtrTy(), IntptrTy, nullptr));
+  MemcpyFn = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                            IRB.getInt8PtrTy(), IntptrTy, nullptr));
+  MemsetFn = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                            IRB.getInt32Ty(), IntptrTy, nullptr));
 }
 
 bool ThreadSanitizer::doInitialization(Module &M) {
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  if (!DLP)
-    report_fatal_error("data layout missing");
-  DL = &DLP->getDataLayout();
+  const DataLayout &DL = M.getDataLayout();
+  IntptrTy = DL.getIntPtrType(M.getContext());
+  std::tie(TsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
+      M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{},
+      /*InitArgs=*/{});
 
-  // Always insert a call to __tsan_init into the module's CTORs.
-  IRBuilder<> IRB(M.getContext());
-  IntptrTy = IRB.getIntPtrTy(DL);
-  Value *TsanInit = M.getOrInsertFunction("__tsan_init",
-                                          IRB.getVoidTy(), nullptr);
-  appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
+  appendToGlobalCtors(M, TsanCtorFunction, 0);
 
   return true;
 }
@@ -260,6 +269,7 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
 // Instrumenting some of the accesses may be proven redundant.
 // Currently handled:
 //  - read-before-write (within same BB, no calls between)
+//  - not captured variables
 //
 // We do not handle some of the patterns that should not survive
 // after the classic compiler optimizations.
@@ -269,8 +279,8 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
 // 'Local' is a vector of insns within the same BB (no calls between).
 // 'All' is a vector of insns that will be instrumented.
 void ThreadSanitizer::chooseInstructionsToInstrument(
-    SmallVectorImpl<Instruction*> &Local,
-    SmallVectorImpl<Instruction*> &All) {
+    SmallVectorImpl<Instruction *> &Local, SmallVectorImpl<Instruction *> &All,
+    const DataLayout &DL) {
   SmallSet<Value*, 8> WriteTargets;
   // Iterate from the end.
   for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(),
@@ -291,6 +301,17 @@ void ThreadSanitizer::chooseInstructionsToInstrument(
         continue;
       }
     }
+    Value *Addr = isa<StoreInst>(*I)
+        ? cast<StoreInst>(I)->getPointerOperand()
+        : cast<LoadInst>(I)->getPointerOperand();
+    if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) &&
+        !PointerMayBeCaptured(Addr, true, true)) {
+      // The variable is addressable but not captured, so it cannot be
+      // referenced from a different thread and participate in a data race
+      // (see llvm/Analysis/CaptureTracking.h for details).
+      NumOmittedNonCaptured++;
+      continue;
+    }
     All.push_back(I);
   }
   Local.clear();
@@ -311,7 +332,10 @@ static bool isAtomic(Instruction *I) {
 }
 
 bool ThreadSanitizer::runOnFunction(Function &F) {
-  if (!DL) return false;
+  // This is required to prevent instrumenting call to __tsan_init from within
+  // the module constructor.
+  if (&F == TsanCtorFunction)
+    return false;
   initializeCallbacks(*F.getParent());
   SmallVector<Instruction*, 8> RetVec;
   SmallVector<Instruction*, 8> AllLoadsAndStores;
@@ -321,6 +345,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
   bool Res = false;
   bool HasCalls = false;
   bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread);
+  const DataLayout &DL = F.getParent()->getDataLayout();
 
   // Traverse all instructions, collect loads/stores/returns, check for calls.
   for (auto &BB : F) {
@@ -335,10 +360,11 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
         if (isa<MemIntrinsic>(Inst))
           MemIntrinCalls.push_back(&Inst);
         HasCalls = true;
-        chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
+        chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores,
+                                       DL);
       }
     }
-    chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
+    chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, DL);
   }
 
   // We have collected all loads and stores.
@@ -348,14 +374,14 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
   // Instrument memory accesses only if we want to report bugs in the function.
   if (ClInstrumentMemoryAccesses && SanitizeFunction)
     for (auto Inst : AllLoadsAndStores) {
-      Res |= instrumentLoadOrStore(Inst);
+      Res |= instrumentLoadOrStore(Inst, DL);
     }
 
   // Instrument atomic memory accesses in any case (they can be used to
   // implement synchronization).
   if (ClInstrumentAtomics)
     for (auto Inst : AtomicAccesses) {
-      Res |= instrumentAtomic(Inst);
+      Res |= instrumentAtomic(Inst, DL);
     }
 
   if (ClInstrumentMemIntrinsics && SanitizeFunction)
@@ -372,20 +398,21 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
     IRB.CreateCall(TsanFuncEntry, ReturnAddress);
     for (auto RetInst : RetVec) {
       IRBuilder<> IRBRet(RetInst);
-      IRBRet.CreateCall(TsanFuncExit);
+      IRBRet.CreateCall(TsanFuncExit, {});
     }
     Res = true;
   }
   return Res;
 }
 
-bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
+bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
+                                            const DataLayout &DL) {
   IRBuilder<> IRB(I);
   bool IsWrite = isa<StoreInst>(*I);
   Value *Addr = IsWrite
       ? cast<StoreInst>(I)->getPointerOperand()
       : cast<LoadInst>(I)->getPointerOperand();
-  int Idx = getMemoryAccessFuncIndex(Addr);
+  int Idx = getMemoryAccessFuncIndex(Addr, DL);
   if (Idx < 0)
     return false;
   if (IsWrite && isVtableAccess(I)) {
@@ -400,9 +427,9 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
     if (StoredValue->getType()->isIntegerTy())
       StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy());
     // Call TsanVptrUpdate.
-    IRB.CreateCall2(TsanVptrUpdate,
-                    IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
-                    IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy()));
+    IRB.CreateCall(TsanVptrUpdate,
+                   {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
+                    IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())});
     NumInstrumentedVtableWrites++;
     return true;
   }
@@ -412,7 +439,16 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
     NumInstrumentedVtableReads++;
     return true;
   }
-  Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
+  const unsigned Alignment = IsWrite
+      ? cast<StoreInst>(I)->getAlignment()
+      : cast<LoadInst>(I)->getAlignment();
+  Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
+  const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
+  Value *OnAccessFunc = nullptr;
+  if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0)
+    OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
+  else
+    OnAccessFunc = IsWrite ? TsanUnalignedWrite[Idx] : TsanUnalignedRead[Idx];
   IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
   if (IsWrite) NumInstrumentedWrites++;
   else         NumInstrumentedReads++;
@@ -445,16 +481,18 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
 bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
   IRBuilder<> IRB(I);
   if (MemSetInst *M = dyn_cast<MemSetInst>(I)) {
-    IRB.CreateCall3(MemsetFn,
-      IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
-      IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false),
-      IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+    IRB.CreateCall(
+        MemsetFn,
+        {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false),
+         IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
     I->eraseFromParent();
   } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) {
-    IRB.CreateCall3(isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn,
-      IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
-      IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()),
-      IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+    IRB.CreateCall(
+        isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn,
+        {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
     I->eraseFromParent();
   }
   return false;
@@ -468,11 +506,11 @@ bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
 // The following page contains more background information:
 // http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/
 
-bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
+bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
   IRBuilder<> IRB(I);
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     Value *Addr = LI->getPointerOperand();
-    int Idx = getMemoryAccessFuncIndex(Addr);
+    int Idx = getMemoryAccessFuncIndex(Addr, DL);
     if (Idx < 0)
       return false;
     const size_t ByteSize = 1 << Idx;
@@ -486,7 +524,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
 
   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
     Value *Addr = SI->getPointerOperand();
-    int Idx = getMemoryAccessFuncIndex(Addr);
+    int Idx = getMemoryAccessFuncIndex(Addr, DL);
     if (Idx < 0)
       return false;
     const size_t ByteSize = 1 << Idx;
@@ -500,7 +538,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
     ReplaceInstWithInst(I, C);
   } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) {
     Value *Addr = RMWI->getPointerOperand();
-    int Idx = getMemoryAccessFuncIndex(Addr);
+    int Idx = getMemoryAccessFuncIndex(Addr, DL);
     if (Idx < 0)
       return false;
     Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
@@ -517,7 +555,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
     ReplaceInstWithInst(I, C);
   } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
     Value *Addr = CASI->getPointerOperand();
-    int Idx = getMemoryAccessFuncIndex(Addr);
+    int Idx = getMemoryAccessFuncIndex(Addr, DL);
     if (Idx < 0)
       return false;
     const size_t ByteSize = 1 << Idx;
@@ -547,11 +585,12 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
   return true;
 }
 
-int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr) {
+int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr,
+                                              const DataLayout &DL) {
   Type *OrigPtrTy = Addr->getType();
   Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
   assert(OrigTy->isSized());
-  uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy);
+  uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
   if (TypeSize != 8  && TypeSize != 16 &&
       TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
     NumAccessesWithBadSize++;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp
new file mode 100644
index 0000000..afb873a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp
@@ -0,0 +1,673 @@
+//===- ARCInstKind.cpp - ObjC ARC Optimization ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines several utility functions used by various ARC
+/// optimizations which are IMHO too big to be in a header file.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ObjCARC.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
+                                       const ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::Retain:
+    return OS << "ARCInstKind::Retain";
+  case ARCInstKind::RetainRV:
+    return OS << "ARCInstKind::RetainRV";
+  case ARCInstKind::RetainBlock:
+    return OS << "ARCInstKind::RetainBlock";
+  case ARCInstKind::Release:
+    return OS << "ARCInstKind::Release";
+  case ARCInstKind::Autorelease:
+    return OS << "ARCInstKind::Autorelease";
+  case ARCInstKind::AutoreleaseRV:
+    return OS << "ARCInstKind::AutoreleaseRV";
+  case ARCInstKind::AutoreleasepoolPush:
+    return OS << "ARCInstKind::AutoreleasepoolPush";
+  case ARCInstKind::AutoreleasepoolPop:
+    return OS << "ARCInstKind::AutoreleasepoolPop";
+  case ARCInstKind::NoopCast:
+    return OS << "ARCInstKind::NoopCast";
+  case ARCInstKind::FusedRetainAutorelease:
+    return OS << "ARCInstKind::FusedRetainAutorelease";
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+    return OS << "ARCInstKind::FusedRetainAutoreleaseRV";
+  case ARCInstKind::LoadWeakRetained:
+    return OS << "ARCInstKind::LoadWeakRetained";
+  case ARCInstKind::StoreWeak:
+    return OS << "ARCInstKind::StoreWeak";
+  case ARCInstKind::InitWeak:
+    return OS << "ARCInstKind::InitWeak";
+  case ARCInstKind::LoadWeak:
+    return OS << "ARCInstKind::LoadWeak";
+  case ARCInstKind::MoveWeak:
+    return OS << "ARCInstKind::MoveWeak";
+  case ARCInstKind::CopyWeak:
+    return OS << "ARCInstKind::CopyWeak";
+  case ARCInstKind::DestroyWeak:
+    return OS << "ARCInstKind::DestroyWeak";
+  case ARCInstKind::StoreStrong:
+    return OS << "ARCInstKind::StoreStrong";
+  case ARCInstKind::CallOrUser:
+    return OS << "ARCInstKind::CallOrUser";
+  case ARCInstKind::Call:
+    return OS << "ARCInstKind::Call";
+  case ARCInstKind::User:
+    return OS << "ARCInstKind::User";
+  case ARCInstKind::IntrinsicUser:
+    return OS << "ARCInstKind::IntrinsicUser";
+  case ARCInstKind::None:
+    return OS << "ARCInstKind::None";
+  }
+  llvm_unreachable("Unknown instruction class!");
+}
+
+ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
+  Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+
+  // No (mandatory) arguments.
+  if (AI == AE)
+    return StringSwitch<ARCInstKind>(F->getName())
+        .Case("objc_autoreleasePoolPush", ARCInstKind::AutoreleasepoolPush)
+        .Case("clang.arc.use", ARCInstKind::IntrinsicUser)
+        .Default(ARCInstKind::CallOrUser);
+
+  // One argument.
+  const Argument *A0 = AI++;
+  if (AI == AE)
+    // Argument is a pointer.
+    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+      Type *ETy = PTy->getElementType();
+      // Argument is i8*.
+      if (ETy->isIntegerTy(8))
+        return StringSwitch<ARCInstKind>(F->getName())
+            .Case("objc_retain", ARCInstKind::Retain)
+            .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
+            .Case("objc_retainBlock", ARCInstKind::RetainBlock)
+            .Case("objc_release", ARCInstKind::Release)
+            .Case("objc_autorelease", ARCInstKind::Autorelease)
+            .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
+            .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
+            .Case("objc_retainedObject", ARCInstKind::NoopCast)
+            .Case("objc_unretainedObject", ARCInstKind::NoopCast)
+            .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
+            .Case("objc_retain_autorelease",
+                  ARCInstKind::FusedRetainAutorelease)
+            .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
+            .Case("objc_retainAutoreleaseReturnValue",
+                  ARCInstKind::FusedRetainAutoreleaseRV)
+            .Case("objc_sync_enter", ARCInstKind::User)
+            .Case("objc_sync_exit", ARCInstKind::User)
+            .Default(ARCInstKind::CallOrUser);
+
+      // Argument is i8**
+      if (PointerType *Pte = dyn_cast<PointerType>(ETy))
+        if (Pte->getElementType()->isIntegerTy(8))
+          return StringSwitch<ARCInstKind>(F->getName())
+              .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
+              .Case("objc_loadWeak", ARCInstKind::LoadWeak)
+              .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
+              .Default(ARCInstKind::CallOrUser);
+    }
+
+  // Two arguments, first is i8**.
+  const Argument *A1 = AI++;
+  if (AI == AE)
+    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+      if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+        if (Pte->getElementType()->isIntegerTy(8))
+          if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+            Type *ETy1 = PTy1->getElementType();
+            // Second argument is i8*
+            if (ETy1->isIntegerTy(8))
+              return StringSwitch<ARCInstKind>(F->getName())
+                  .Case("objc_storeWeak", ARCInstKind::StoreWeak)
+                  .Case("objc_initWeak", ARCInstKind::InitWeak)
+                  .Case("objc_storeStrong", ARCInstKind::StoreStrong)
+                  .Default(ARCInstKind::CallOrUser);
+            // Second argument is i8**.
+            if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+              if (Pte1->getElementType()->isIntegerTy(8))
+                return StringSwitch<ARCInstKind>(F->getName())
+                    .Case("objc_moveWeak", ARCInstKind::MoveWeak)
+                    .Case("objc_copyWeak", ARCInstKind::CopyWeak)
+                    // Ignore annotation calls. This is important to stop the
+                    // optimizer from treating annotations as uses which would
+                    // make the state of the pointers they are attempting to
+                    // elucidate to be incorrect.
+                    .Case("llvm.arc.annotation.topdown.bbstart",
+                          ARCInstKind::None)
+                    .Case("llvm.arc.annotation.topdown.bbend",
+                          ARCInstKind::None)
+                    .Case("llvm.arc.annotation.bottomup.bbstart",
+                          ARCInstKind::None)
+                    .Case("llvm.arc.annotation.bottomup.bbend",
+                          ARCInstKind::None)
+                    .Default(ARCInstKind::CallOrUser);
+          }
+
+  // Anything else.
+  return ARCInstKind::CallOrUser;
+}
+
+// A whitelist of intrinsics that we know do not use objc pointers or decrement
+// ref counts.
+static bool isInertIntrinsic(unsigned ID) {
+  // TODO: Make this into a covered switch.
+  switch (ID) {
+  case Intrinsic::returnaddress:
+  case Intrinsic::frameaddress:
+  case Intrinsic::stacksave:
+  case Intrinsic::stackrestore:
+  case Intrinsic::vastart:
+  case Intrinsic::vacopy:
+  case Intrinsic::vaend:
+  case Intrinsic::objectsize:
+  case Intrinsic::prefetch:
+  case Intrinsic::stackprotector:
+  case Intrinsic::eh_return_i32:
+  case Intrinsic::eh_return_i64:
+  case Intrinsic::eh_typeid_for:
+  case Intrinsic::eh_dwarf_cfa:
+  case Intrinsic::eh_sjlj_lsda:
+  case Intrinsic::eh_sjlj_functioncontext:
+  case Intrinsic::init_trampoline:
+  case Intrinsic::adjust_trampoline:
+  case Intrinsic::lifetime_start:
+  case Intrinsic::lifetime_end:
+  case Intrinsic::invariant_start:
+  case Intrinsic::invariant_end:
+  // Don't let dbg info affect our results.
+  case Intrinsic::dbg_declare:
+  case Intrinsic::dbg_value:
+    // Short cut: Some intrinsics obviously don't use ObjC pointers.
+    return true;
+  default:
+    return false;
+  }
+}
+
+// A whitelist of intrinsics that we know do not use objc pointers or decrement
+// ref counts.
+static bool isUseOnlyIntrinsic(unsigned ID) {
+  // We are conservative and even though intrinsics are unlikely to touch
+  // reference counts, we white list them for safety.
+  //
+  // TODO: Expand this into a covered switch. There is a lot more here.
+  switch (ID) {
+  case Intrinsic::memcpy:
+  case Intrinsic::memmove:
+  case Intrinsic::memset:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// \brief Determine what kind of construct V is.
+ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) {
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    // Any instruction other than bitcast and gep with a pointer operand have a
+    // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
+    // to a subsequent use, rather than using it themselves, in this sense.
+    // As a short cut, several other opcodes are known to have no pointer
+    // operands of interest. And ret is never followed by a release, so it's
+    // not interesting to examine.
+    switch (I->getOpcode()) {
+    case Instruction::Call: {
+      const CallInst *CI = cast<CallInst>(I);
+      // See if we have a function that we know something about.
+      if (const Function *F = CI->getCalledFunction()) {
+        ARCInstKind Class = GetFunctionClass(F);
+        if (Class != ARCInstKind::CallOrUser)
+          return Class;
+        Intrinsic::ID ID = F->getIntrinsicID();
+        if (isInertIntrinsic(ID))
+          return ARCInstKind::None;
+        if (isUseOnlyIntrinsic(ID))
+          return ARCInstKind::User;
+      }
+
+      // Otherwise, be conservative.
+      return GetCallSiteClass(CI);
+    }
+    case Instruction::Invoke:
+      // Otherwise, be conservative.
+      return GetCallSiteClass(cast<InvokeInst>(I));
+    case Instruction::BitCast:
+    case Instruction::GetElementPtr:
+    case Instruction::Select:
+    case Instruction::PHI:
+    case Instruction::Ret:
+    case Instruction::Br:
+    case Instruction::Switch:
+    case Instruction::IndirectBr:
+    case Instruction::Alloca:
+    case Instruction::VAArg:
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+    case Instruction::FDiv:
+    case Instruction::SRem:
+    case Instruction::URem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor:
+    case Instruction::SExt:
+    case Instruction::ZExt:
+    case Instruction::Trunc:
+    case Instruction::IntToPtr:
+    case Instruction::FCmp:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::InsertElement:
+    case Instruction::ExtractElement:
+    case Instruction::ShuffleVector:
+    case Instruction::ExtractValue:
+      break;
+    case Instruction::ICmp:
+      // Comparing a pointer with null, or any other constant, isn't an
+      // interesting use, because we don't care what the pointer points to, or
+      // about the values of any other dynamic reference-counted pointers.
+      if (IsPotentialRetainableObjPtr(I->getOperand(1)))
+        return ARCInstKind::User;
+      break;
+    default:
+      // For anything else, check all the operands.
+      // Note that this includes both operands of a Store: while the first
+      // operand isn't actually being dereferenced, it is being stored to
+      // memory where we can no longer track who might read it and dereference
+      // it, so we have to consider it potentially used.
+      for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
+           OI != OE; ++OI)
+        if (IsPotentialRetainableObjPtr(*OI))
+          return ARCInstKind::User;
+    }
+  }
+
+  // Otherwise, it's totally inert for ARC purposes.
+  return ARCInstKind::None;
+}
+
+/// \brief Test if the given class is a kind of user.
+bool llvm::objcarc::IsUser(ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::User:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::IntrinsicUser:
+    return true;
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::Release:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::NoopCast:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::Call:
+  case ARCInstKind::None:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class is objc_retain or equivalent.
+bool llvm::objcarc::IsRetain(ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+    return true;
+  // I believe we treat retain block as not a retain since it can copy its
+  // block.
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::Release:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::NoopCast:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class is objc_autorelease or equivalent.
+bool llvm::objcarc::IsAutorelease(ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+    return true;
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::Release:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::NoopCast:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which return their
+/// argument verbatim.
+bool llvm::objcarc::IsForwarding(ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::NoopCast:
+    return true;
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::Release:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which do nothing if
+/// passed a null pointer.
+bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::Release:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::RetainBlock:
+    return true;
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+  case ARCInstKind::NoopCast:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the "tail" keyword.
+bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) {
+  // ARCInstKind::RetainBlock may be given a stack argument.
+  switch (Class) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::AutoreleaseRV:
+    return true;
+  case ARCInstKind::Release:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+  case ARCInstKind::NoopCast:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which are never safe
+/// to mark with the "tail" keyword.
+bool llvm::objcarc::IsNeverTail(ARCInstKind Class) {
+  /// It is never safe to tail call objc_autorelease since by tail calling
+  /// objc_autorelease: fast autoreleasing causing our object to be potentially
+  /// reclaimed from the autorelease pool which violates the semantics of
+  /// __autoreleasing types in ARC.
+  switch (Class) {
+  case ARCInstKind::Autorelease:
+    return true;
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::Release:
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+  case ARCInstKind::NoopCast:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the nounwind attribute.
+bool llvm::objcarc::IsNoThrow(ARCInstKind Class) {
+  // objc_retainBlock is not nounwind because it calls user copy constructors
+  // which could theoretically throw.
+  switch (Class) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::Release:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+    return true;
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+  case ARCInstKind::NoopCast:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+/// Test whether the given instruction can autorelease any pointer or cause an
+/// autoreleasepool pop.
+///
+/// This means that it *could* interrupt the RV optimization.
+bool llvm::objcarc::CanInterruptRV(ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+    return true;
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::Release:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+  case ARCInstKind::NoopCast:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
+bool llvm::objcarc::CanDecrementRefCount(ARCInstKind Kind) {
+  switch (Kind) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::NoopCast:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+    return false;
+
+  // The cases below are conservative.
+
+  // RetainBlock can result in user defined copy constructors being called
+  // implying releases may occur.
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::Release:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+    return true;
+  }
+
+  llvm_unreachable("covered switch isn't covered?");
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h b/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h
new file mode 100644
index 0000000..636c65c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h
@@ -0,0 +1,123 @@
+//===--- ARCInstKind.h - ARC instruction equivalence classes -*- C++ -*----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_ARCINSTKIND_H
+#define LLVM_LIB_TRANSFORMS_OBJCARC_ARCINSTKIND_H
+
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Function.h"
+
+namespace llvm {
+namespace objcarc {
+
+/// \enum ARCInstKind
+///
+/// \brief Equivalence classes of instructions in the ARC Model.
+///
+/// Since we do not have "instructions" to represent ARC concepts in LLVM IR,
+/// we instead operate on equivalence classes of instructions.
+///
+/// TODO: This should be split into two enums: a runtime entry point enum
+/// (possibly united with the ARCRuntimeEntrypoint class) and an enum that deals
+/// with effects of instructions in the ARC model (which would handle the notion
+/// of a User or CallOrUser).
+enum class ARCInstKind {
+  Retain,                   ///< objc_retain
+  RetainRV,                 ///< objc_retainAutoreleasedReturnValue
+  RetainBlock,              ///< objc_retainBlock
+  Release,                  ///< objc_release
+  Autorelease,              ///< objc_autorelease
+  AutoreleaseRV,            ///< objc_autoreleaseReturnValue
+  AutoreleasepoolPush,      ///< objc_autoreleasePoolPush
+  AutoreleasepoolPop,       ///< objc_autoreleasePoolPop
+  NoopCast,                 ///< objc_retainedObject, etc.
+  FusedRetainAutorelease,   ///< objc_retainAutorelease
+  FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
+  LoadWeakRetained,         ///< objc_loadWeakRetained (primitive)
+  StoreWeak,                ///< objc_storeWeak (primitive)
+  InitWeak,                 ///< objc_initWeak (derived)
+  LoadWeak,                 ///< objc_loadWeak (derived)
+  MoveWeak,                 ///< objc_moveWeak (derived)
+  CopyWeak,                 ///< objc_copyWeak (derived)
+  DestroyWeak,              ///< objc_destroyWeak (derived)
+  StoreStrong,              ///< objc_storeStrong (derived)
+  IntrinsicUser,            ///< clang.arc.use
+  CallOrUser,               ///< could call objc_release and/or "use" pointers
+  Call,                     ///< could call objc_release
+  User,                     ///< could "use" a pointer
+  None                      ///< anything that is inert from an ARC perspective.
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const ARCInstKind Class);
+
+/// \brief Test if the given class is a kind of user.
+bool IsUser(ARCInstKind Class);
+
+/// \brief Test if the given class is objc_retain or equivalent.
+bool IsRetain(ARCInstKind Class);
+
+/// \brief Test if the given class is objc_autorelease or equivalent.
+bool IsAutorelease(ARCInstKind Class);
+
+/// \brief Test if the given class represents instructions which return their
+/// argument verbatim.
+bool IsForwarding(ARCInstKind Class);
+
+/// \brief Test if the given class represents instructions which do nothing if
+/// passed a null pointer.
+bool IsNoopOnNull(ARCInstKind Class);
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the "tail" keyword.
+bool IsAlwaysTail(ARCInstKind Class);
+
+/// \brief Test if the given class represents instructions which are never safe
+/// to mark with the "tail" keyword.
+bool IsNeverTail(ARCInstKind Class);
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the nounwind attribute.
+bool IsNoThrow(ARCInstKind Class);
+
+/// Test whether the given instruction can autorelease any pointer or cause an
+/// autoreleasepool pop.
+bool CanInterruptRV(ARCInstKind Class);
+
+/// \brief Determine if F is one of the special known Functions.  If it isn't,
+/// return ARCInstKind::CallOrUser.
+ARCInstKind GetFunctionClass(const Function *F);
+
+/// \brief Determine which objc runtime call instruction class V belongs to.
+///
+/// This is similar to GetARCInstKind except that it only detects objc
+/// runtime calls. This allows it to be faster.
+///
+static inline ARCInstKind GetBasicARCInstKind(const Value *V) {
+  if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+    if (const Function *F = CI->getCalledFunction())
+      return GetFunctionClass(F);
+    // Otherwise, be conservative.
+    return ARCInstKind::CallOrUser;
+  }
+
+  // Otherwise, be conservative.
+  return isa<InvokeInst>(V) ? ARCInstKind::CallOrUser : ARCInstKind::User;
+}
+
+/// Map V to its ARCInstKind equivalence class.
+ARCInstKind GetARCInstKind(const Value *V);
+
+/// Returns false if conservatively we can prove that any instruction mapped to
+/// this kind can not decrement ref counts. Returns true otherwise.
+bool CanDecrementRefCount(ARCInstKind Kind);
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index e286dbc..d4fef10 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -27,22 +27,22 @@
 namespace llvm {
 namespace objcarc {
 
+enum class ARCRuntimeEntryPointKind {
+  AutoreleaseRV,
+  Release,
+  Retain,
+  RetainBlock,
+  Autorelease,
+  StoreStrong,
+  RetainRV,
+  RetainAutorelease,
+  RetainAutoreleaseRV,
+};
+
 /// Declarations for ObjC runtime functions and constants. These are initialized
 /// lazily to avoid cluttering up the Module with unused declarations.
 class ARCRuntimeEntryPoints {
 public:
-  enum EntryPointType {
-    EPT_AutoreleaseRV,
-    EPT_Release,
-    EPT_Retain,
-    EPT_RetainBlock,
-    EPT_Autorelease,
-    EPT_StoreStrong,
-    EPT_RetainRV,
-    EPT_RetainAutorelease,
-    EPT_RetainAutoreleaseRV
-  };
-
   ARCRuntimeEntryPoints() : TheModule(nullptr),
                             AutoreleaseRV(nullptr),
                             Release(nullptr),
@@ -54,9 +54,7 @@ public:
                             RetainAutorelease(nullptr),
                             RetainAutoreleaseRV(nullptr) { }
 
-  ~ARCRuntimeEntryPoints() { }
-
-  void Initialize(Module *M) {
+  void init(Module *M) {
     TheModule = M;
     AutoreleaseRV = nullptr;
     Release = nullptr;
@@ -69,30 +67,30 @@ public:
     RetainAutoreleaseRV = nullptr;
   }
 
-  Constant *get(const EntryPointType entry) {
+  Constant *get(ARCRuntimeEntryPointKind kind) {
     assert(TheModule != nullptr && "Not initialized.");
 
-    switch (entry) {
-    case EPT_AutoreleaseRV:
+    switch (kind) {
+    case ARCRuntimeEntryPointKind::AutoreleaseRV:
       return getI8XRetI8XEntryPoint(AutoreleaseRV,
                                     "objc_autoreleaseReturnValue", true);
-    case EPT_Release:
+    case ARCRuntimeEntryPointKind::Release:
       return getVoidRetI8XEntryPoint(Release, "objc_release");
-    case EPT_Retain:
+    case ARCRuntimeEntryPointKind::Retain:
       return getI8XRetI8XEntryPoint(Retain, "objc_retain", true);
-    case EPT_RetainBlock:
+    case ARCRuntimeEntryPointKind::RetainBlock:
       return getI8XRetI8XEntryPoint(RetainBlock, "objc_retainBlock", false);
-    case EPT_Autorelease:
+    case ARCRuntimeEntryPointKind::Autorelease:
       return getI8XRetI8XEntryPoint(Autorelease, "objc_autorelease", true);
-    case EPT_StoreStrong:
+    case ARCRuntimeEntryPointKind::StoreStrong:
       return getI8XRetI8XXI8XEntryPoint(StoreStrong, "objc_storeStrong");
-    case EPT_RetainRV:
+    case ARCRuntimeEntryPointKind::RetainRV:
       return getI8XRetI8XEntryPoint(RetainRV,
                                     "objc_retainAutoreleasedReturnValue", true);
-    case EPT_RetainAutorelease:
+    case ARCRuntimeEntryPointKind::RetainAutorelease:
       return getI8XRetI8XEntryPoint(RetainAutorelease, "objc_retainAutorelease",
                                     true);
-    case EPT_RetainAutoreleaseRV:
+    case ARCRuntimeEntryPointKind::RetainAutoreleaseRV:
       return getI8XRetI8XEntryPoint(RetainAutoreleaseRV,
                                     "objc_retainAutoreleaseReturnValue", true);
     }
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/BlotMapVector.h b/contrib/llvm/lib/Transforms/ObjCARC/BlotMapVector.h
new file mode 100644
index 0000000..d6439b6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/BlotMapVector.h
@@ -0,0 +1,108 @@
+//===- BlotMapVector.h - A MapVector with the blot operation -*- C++ -*----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include <vector>
+#include <algorithm>
+
+namespace llvm {
+/// \brief An associative container with fast insertion-order (deterministic)
+/// iteration over its elements. Plus the special blot operation.
+template <class KeyT, class ValueT> class BlotMapVector {
+  /// Map keys to indices in Vector.
+  typedef DenseMap<KeyT, size_t> MapTy;
+  MapTy Map;
+
+  typedef std::vector<std::pair<KeyT, ValueT>> VectorTy;
+  /// Keys and values.
+  VectorTy Vector;
+
+public:
+  typedef typename VectorTy::iterator iterator;
+  typedef typename VectorTy::const_iterator const_iterator;
+  iterator begin() { return Vector.begin(); }
+  iterator end() { return Vector.end(); }
+  const_iterator begin() const { return Vector.begin(); }
+  const_iterator end() const { return Vector.end(); }
+
+#ifdef XDEBUG
+  ~BlotMapVector() {
+    assert(Vector.size() >= Map.size()); // May differ due to blotting.
+    for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); I != E;
+         ++I) {
+      assert(I->second < Vector.size());
+      assert(Vector[I->second].first == I->first);
+    }
+    for (typename VectorTy::const_iterator I = Vector.begin(), E = Vector.end();
+         I != E; ++I)
+      assert(!I->first || (Map.count(I->first) &&
+                           Map[I->first] == size_t(I - Vector.begin())));
+  }
+#endif
+
+  ValueT &operator[](const KeyT &Arg) {
+    std::pair<typename MapTy::iterator, bool> Pair =
+        Map.insert(std::make_pair(Arg, size_t(0)));
+    if (Pair.second) {
+      size_t Num = Vector.size();
+      Pair.first->second = Num;
+      Vector.push_back(std::make_pair(Arg, ValueT()));
+      return Vector[Num].second;
+    }
+    return Vector[Pair.first->second].second;
+  }
+
+  std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &InsertPair) {
+    std::pair<typename MapTy::iterator, bool> Pair =
+        Map.insert(std::make_pair(InsertPair.first, size_t(0)));
+    if (Pair.second) {
+      size_t Num = Vector.size();
+      Pair.first->second = Num;
+      Vector.push_back(InsertPair);
+      return std::make_pair(Vector.begin() + Num, true);
+    }
+    return std::make_pair(Vector.begin() + Pair.first->second, false);
+  }
+
+  iterator find(const KeyT &Key) {
+    typename MapTy::iterator It = Map.find(Key);
+    if (It == Map.end())
+      return Vector.end();
+    return Vector.begin() + It->second;
+  }
+
+  const_iterator find(const KeyT &Key) const {
+    typename MapTy::const_iterator It = Map.find(Key);
+    if (It == Map.end())
+      return Vector.end();
+    return Vector.begin() + It->second;
+  }
+
+  /// This is similar to erase, but instead of removing the element from the
+  /// vector, it just zeros out the key in the vector. This leaves iterators
+  /// intact, but clients must be prepared for zeroed-out keys when iterating.
+  void blot(const KeyT &Key) {
+    typename MapTy::iterator It = Map.find(Key);
+    if (It == Map.end())
+      return;
+    Vector[It->second].first = KeyT();
+    Map.erase(It);
+  }
+
+  void clear() {
+    Map.clear();
+    Vector.clear();
+  }
+
+  bool empty() const {
+    assert(Map.empty() == Vector.empty());
+    return Map.empty();
+  }
+};
+} //
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index f6c236c..4edd029 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -32,21 +32,20 @@ using namespace llvm::objcarc;
 
 /// Test whether the given instruction can result in a reference count
 /// modification (positive or negative) for the pointer's object.
-bool
-llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
-                                ProvenanceAnalysis &PA,
-                                InstructionClass Class) {
+bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+                                     ProvenanceAnalysis &PA,
+                                     ARCInstKind Class) {
   switch (Class) {
-  case IC_Autorelease:
-  case IC_AutoreleaseRV:
-  case IC_IntrinsicUser:
-  case IC_User:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::User:
     // These operations never directly modify a reference count.
     return false;
   default: break;
   }
 
-  ImmutableCallSite CS = static_cast<const Value *>(Inst);
+  ImmutableCallSite CS(Inst);
   assert(CS && "Only calls can alter reference counts!");
 
   // See if AliasAnalysis can help us with the call.
@@ -54,10 +53,12 @@ llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
   if (AliasAnalysis::onlyReadsMemory(MRB))
     return false;
   if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+    const DataLayout &DL = Inst->getModule()->getDataLayout();
     for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
          I != E; ++I) {
       const Value *Op = *I;
-      if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+      if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
+          PA.related(Ptr, Op, DL))
         return true;
     }
     return false;
@@ -67,15 +68,29 @@ llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
   return true;
 }
 
+bool llvm::objcarc::CanDecrementRefCount(const Instruction *Inst,
+                                         const Value *Ptr,
+                                         ProvenanceAnalysis &PA,
+                                         ARCInstKind Class) {
+  // First perform a quick check if Class can not touch ref counts.
+  if (!CanDecrementRefCount(Class))
+    return false;
+
+  // Otherwise, just use CanAlterRefCount for now.
+  return CanAlterRefCount(Inst, Ptr, PA, Class);
+}
+
 /// Test whether the given instruction can "use" the given pointer's object in a
 /// way that requires the reference count to be positive.
-bool
-llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
-                      ProvenanceAnalysis &PA, InstructionClass Class) {
-  // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers.
-  if (Class == IC_Call)
+bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
+                           ProvenanceAnalysis &PA, ARCInstKind Class) {
+  // ARCInstKind::Call operations (as opposed to
+  // ARCInstKind::CallOrUser) never "use" objc pointers.
+  if (Class == ARCInstKind::Call)
     return false;
 
+  const DataLayout &DL = Inst->getModule()->getDataLayout();
+
   // Consider various instructions which may have pointer arguments which are
   // not "uses".
   if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
@@ -84,29 +99,31 @@ llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
     // of any other dynamic reference-counted pointers.
     if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA()))
       return false;
-  } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
+  } else if (auto CS = ImmutableCallSite(Inst)) {
     // For calls, just check the arguments (and not the callee operand).
     for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
          OE = CS.arg_end(); OI != OE; ++OI) {
       const Value *Op = *OI;
-      if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+      if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
+          PA.related(Ptr, Op, DL))
         return true;
     }
     return false;
   } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
     // Special-case stores, because we don't care about the stored value, just
     // the store address.
-    const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
+    const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand(), DL);
     // If we can't tell what the underlying object was, assume there is a
     // dependence.
-    return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr);
+    return IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
+           PA.related(Op, Ptr, DL);
   }
 
   // Check each operand for a match.
   for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
        OI != OE; ++OI) {
     const Value *Op = *OI;
-    if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+    if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op, DL))
       return true;
   }
   return false;
@@ -123,11 +140,11 @@ llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
 
   switch (Flavor) {
   case NeedsPositiveRetainCount: {
-    InstructionClass Class = GetInstructionClass(Inst);
+    ARCInstKind Class = GetARCInstKind(Inst);
     switch (Class) {
-    case IC_AutoreleasepoolPop:
-    case IC_AutoreleasepoolPush:
-    case IC_None:
+    case ARCInstKind::AutoreleasepoolPop:
+    case ARCInstKind::AutoreleasepoolPush:
+    case ARCInstKind::None:
       return false;
     default:
       return CanUse(Inst, Arg, PA, Class);
@@ -135,10 +152,10 @@ llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
   }
 
   case AutoreleasePoolBoundary: {
-    InstructionClass Class = GetInstructionClass(Inst);
+    ARCInstKind Class = GetARCInstKind(Inst);
     switch (Class) {
-    case IC_AutoreleasepoolPop:
-    case IC_AutoreleasepoolPush:
+    case ARCInstKind::AutoreleasepoolPop:
+    case ARCInstKind::AutoreleasepoolPush:
       // These mark the end and begin of an autorelease pool scope.
       return true;
     default:
@@ -148,13 +165,13 @@ llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
   }
 
   case CanChangeRetainCount: {
-    InstructionClass Class = GetInstructionClass(Inst);
+    ARCInstKind Class = GetARCInstKind(Inst);
     switch (Class) {
-    case IC_AutoreleasepoolPop:
+    case ARCInstKind::AutoreleasepoolPop:
       // Conservatively assume this can decrement any count.
       return true;
-    case IC_AutoreleasepoolPush:
-    case IC_None:
+    case ARCInstKind::AutoreleasepoolPush:
+    case ARCInstKind::None:
       return false;
     default:
       return CanAlterRefCount(Inst, Arg, PA, Class);
@@ -162,28 +179,28 @@ llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
   }
 
   case RetainAutoreleaseDep:
-    switch (GetBasicInstructionClass(Inst)) {
-    case IC_AutoreleasepoolPop:
-    case IC_AutoreleasepoolPush:
+    switch (GetBasicARCInstKind(Inst)) {
+    case ARCInstKind::AutoreleasepoolPop:
+    case ARCInstKind::AutoreleasepoolPush:
       // Don't merge an objc_autorelease with an objc_retain inside a different
       // autoreleasepool scope.
       return true;
-    case IC_Retain:
-    case IC_RetainRV:
+    case ARCInstKind::Retain:
+    case ARCInstKind::RetainRV:
       // Check for a retain of the same pointer for merging.
-      return GetObjCArg(Inst) == Arg;
+      return GetArgRCIdentityRoot(Inst) == Arg;
     default:
       // Nothing else matters for objc_retainAutorelease formation.
       return false;
     }
 
   case RetainAutoreleaseRVDep: {
-    InstructionClass Class = GetBasicInstructionClass(Inst);
+    ARCInstKind Class = GetBasicARCInstKind(Inst);
     switch (Class) {
-    case IC_Retain:
-    case IC_RetainRV:
+    case ARCInstKind::Retain:
+    case ARCInstKind::RetainRV:
       // Check for a retain of the same pointer for merging.
-      return GetObjCArg(Inst) == Arg;
+      return GetArgRCIdentityRoot(Inst) == Arg;
     default:
       // Anything that can autorelease interrupts
       // retainAutoreleaseReturnValue formation.
@@ -192,7 +209,7 @@ llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
   }
 
   case RetainRVDep:
-    return CanInterruptRV(GetBasicInstructionClass(Inst));
+    return CanInterruptRV(GetBasicARCInstKind(Inst));
   }
 
   llvm_unreachable("Invalid dependence flavor");
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
index 7b5601a..8e042d4 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -63,15 +63,24 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
 
 /// Test whether the given instruction can "use" the given pointer's object in a
 /// way that requires the reference count to be positive.
-bool
-CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
-       InstructionClass Class);
+bool CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
+            ARCInstKind Class);
 
 /// Test whether the given instruction can result in a reference count
 /// modification (positive or negative) for the pointer's object.
-bool
-CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
-                 ProvenanceAnalysis &PA, InstructionClass Class);
+bool CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+                      ProvenanceAnalysis &PA, ARCInstKind Class);
+
+/// Returns true if we can not conservatively prove that Inst can not decrement
+/// the reference count of Ptr. Returns false if we can.
+bool CanDecrementRefCount(const Instruction *Inst, const Value *Ptr,
+                          ProvenanceAnalysis &PA, ARCInstKind Class);
+
+static inline bool CanDecrementRefCount(const Instruction *Inst,
+                                        const Value *Ptr,
+                                        ProvenanceAnalysis &PA) {
+  return CanDecrementRefCount(Inst, Ptr, PA, GetARCInstKind(Inst));
+}
 
 } // namespace objcarc
 } // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index 7a7eae8..7595e2d 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -24,6 +24,7 @@
 #define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H
 
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -33,6 +34,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "ARCInstKind.h"
 
 namespace llvm {
 class raw_ostream;
@@ -68,160 +70,14 @@ static inline bool ModuleHasARC(const Module &M) {
     M.getNamedValue("clang.arc.use");
 }
 
-/// \enum InstructionClass
-/// \brief A simple classification for instructions.
-enum InstructionClass {
-  IC_Retain,              ///< objc_retain
-  IC_RetainRV,            ///< objc_retainAutoreleasedReturnValue
-  IC_RetainBlock,         ///< objc_retainBlock
-  IC_Release,             ///< objc_release
-  IC_Autorelease,         ///< objc_autorelease
-  IC_AutoreleaseRV,       ///< objc_autoreleaseReturnValue
-  IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush
-  IC_AutoreleasepoolPop,  ///< objc_autoreleasePoolPop
-  IC_NoopCast,            ///< objc_retainedObject, etc.
-  IC_FusedRetainAutorelease, ///< objc_retainAutorelease
-  IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
-  IC_LoadWeakRetained,    ///< objc_loadWeakRetained (primitive)
-  IC_StoreWeak,           ///< objc_storeWeak (primitive)
-  IC_InitWeak,            ///< objc_initWeak (derived)
-  IC_LoadWeak,            ///< objc_loadWeak (derived)
-  IC_MoveWeak,            ///< objc_moveWeak (derived)
-  IC_CopyWeak,            ///< objc_copyWeak (derived)
-  IC_DestroyWeak,         ///< objc_destroyWeak (derived)
-  IC_StoreStrong,         ///< objc_storeStrong (derived)
-  IC_IntrinsicUser,       ///< clang.arc.use
-  IC_CallOrUser,          ///< could call objc_release and/or "use" pointers
-  IC_Call,                ///< could call objc_release
-  IC_User,                ///< could "use" a pointer
-  IC_None                 ///< anything else
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class);
-
-/// \brief Test if the given class is a kind of user.
-inline static bool IsUser(InstructionClass Class) {
-  return Class == IC_User ||
-         Class == IC_CallOrUser ||
-         Class == IC_IntrinsicUser;
-}
-
-/// \brief Test if the given class is objc_retain or equivalent.
-static inline bool IsRetain(InstructionClass Class) {
-  return Class == IC_Retain ||
-         Class == IC_RetainRV;
-}
-
-/// \brief Test if the given class is objc_autorelease or equivalent.
-static inline bool IsAutorelease(InstructionClass Class) {
-  return Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV;
-}
-
-/// \brief Test if the given class represents instructions which return their
-/// argument verbatim.
-static inline bool IsForwarding(InstructionClass Class) {
-  return Class == IC_Retain ||
-         Class == IC_RetainRV ||
-         Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV ||
-         Class == IC_NoopCast;
-}
-
-/// \brief Test if the given class represents instructions which do nothing if
-/// passed a null pointer.
-static inline bool IsNoopOnNull(InstructionClass Class) {
-  return Class == IC_Retain ||
-         Class == IC_RetainRV ||
-         Class == IC_Release ||
-         Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV ||
-         Class == IC_RetainBlock;
-}
-
-/// \brief Test if the given class represents instructions which are always safe
-/// to mark with the "tail" keyword.
-static inline bool IsAlwaysTail(InstructionClass Class) {
-  // IC_RetainBlock may be given a stack argument.
-  return Class == IC_Retain ||
-         Class == IC_RetainRV ||
-         Class == IC_AutoreleaseRV;
-}
-
-/// \brief Test if the given class represents instructions which are never safe
-/// to mark with the "tail" keyword.
-static inline bool IsNeverTail(InstructionClass Class) {
-  /// It is never safe to tail call objc_autorelease since by tail calling
-  /// objc_autorelease, we also tail call -[NSObject autorelease] which supports
-  /// fast autoreleasing causing our object to be potentially reclaimed from the
-  /// autorelease pool which violates the semantics of __autoreleasing types in
-  /// ARC.
-  return Class == IC_Autorelease;
-}
-
-/// \brief Test if the given class represents instructions which are always safe
-/// to mark with the nounwind attribute.
-static inline bool IsNoThrow(InstructionClass Class) {
-  // objc_retainBlock is not nounwind because it calls user copy constructors
-  // which could theoretically throw.
-  return Class == IC_Retain ||
-         Class == IC_RetainRV ||
-         Class == IC_Release ||
-         Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV ||
-         Class == IC_AutoreleasepoolPush ||
-         Class == IC_AutoreleasepoolPop;
-}
-
-/// Test whether the given instruction can autorelease any pointer or cause an
-/// autoreleasepool pop.
-static inline bool
-CanInterruptRV(InstructionClass Class) {
-  switch (Class) {
-  case IC_AutoreleasepoolPop:
-  case IC_CallOrUser:
-  case IC_Call:
-  case IC_Autorelease:
-  case IC_AutoreleaseRV:
-  case IC_FusedRetainAutorelease:
-  case IC_FusedRetainAutoreleaseRV:
-    return true;
-  default:
-    return false;
-  }
-}
-
-/// \brief Determine if F is one of the special known Functions.  If it isn't,
-/// return IC_CallOrUser.
-InstructionClass GetFunctionClass(const Function *F);
-
-/// \brief Determine which objc runtime call instruction class V belongs to.
-///
-/// This is similar to GetInstructionClass except that it only detects objc
-/// runtime calls. This allows it to be faster.
-///
-static inline InstructionClass GetBasicInstructionClass(const Value *V) {
-  if (const CallInst *CI = dyn_cast<CallInst>(V)) {
-    if (const Function *F = CI->getCalledFunction())
-      return GetFunctionClass(F);
-    // Otherwise, be conservative.
-    return IC_CallOrUser;
-  }
-
-  // Otherwise, be conservative.
-  return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User;
-}
-
-/// \brief Determine what kind of construct V is.
-InstructionClass GetInstructionClass(const Value *V);
-
 /// \brief This is a wrapper around getUnderlyingObject which also knows how to
 /// look through objc_retain and objc_autorelease calls, which we know to return
 /// their argument verbatim.
-static inline const Value *GetUnderlyingObjCPtr(const Value *V) {
+static inline const Value *GetUnderlyingObjCPtr(const Value *V,
+                                                const DataLayout &DL) {
   for (;;) {
-    V = GetUnderlyingObject(V);
-    if (!IsForwarding(GetBasicInstructionClass(V)))
+    V = GetUnderlyingObject(V, DL);
+    if (!IsForwarding(GetBasicARCInstKind(V)))
       break;
     V = cast<CallInst>(V)->getArgOperand(0);
   }
@@ -229,37 +85,44 @@ static inline const Value *GetUnderlyingObjCPtr(const Value *V) {
   return V;
 }
 
-/// \brief This is a wrapper around Value::stripPointerCasts which also knows
-/// how to look through objc_retain and objc_autorelease calls, which we know to
-/// return their argument verbatim.
-static inline const Value *StripPointerCastsAndObjCCalls(const Value *V) {
+/// The RCIdentity root of a value \p V is a dominating value U for which
+/// retaining or releasing U is equivalent to retaining or releasing V. In other
+/// words, ARC operations on \p V are equivalent to ARC operations on \p U.
+///
+/// We use this in the ARC optimizer to make it easier to match up ARC
+/// operations by always mapping ARC operations to RCIdentityRoots instead of
+/// pointers themselves.
+///
+/// The two ways that we see RCIdentical values in ObjC are via:
+///
+///   1. PointerCasts
+///   2. Forwarding Calls that return their argument verbatim.
+///
+/// Thus this function strips off pointer casts and forwarding calls. *NOTE*
+/// This implies that two RCIdentical values must alias.
+static inline const Value *GetRCIdentityRoot(const Value *V) {
   for (;;) {
     V = V->stripPointerCasts();
-    if (!IsForwarding(GetBasicInstructionClass(V)))
+    if (!IsForwarding(GetBasicARCInstKind(V)))
       break;
     V = cast<CallInst>(V)->getArgOperand(0);
   }
   return V;
 }
 
-/// \brief This is a wrapper around Value::stripPointerCasts which also knows
-/// how to look through objc_retain and objc_autorelease calls, which we know to
-/// return their argument verbatim.
-static inline Value *StripPointerCastsAndObjCCalls(Value *V) {
-  for (;;) {
-    V = V->stripPointerCasts();
-    if (!IsForwarding(GetBasicInstructionClass(V)))
-      break;
-    V = cast<CallInst>(V)->getArgOperand(0);
-  }
-  return V;
+/// Helper which calls const Value *GetRCIdentityRoot(const Value *V) and just
+/// casts away the const of the result. For documentation about what an
+/// RCIdentityRoot (and by extension GetRCIdentityRoot is) look at that
+/// function.
+static inline Value *GetRCIdentityRoot(Value *V) {
+  return const_cast<Value *>(GetRCIdentityRoot((const Value *)V));
 }
 
 /// \brief Assuming the given instruction is one of the special calls such as
-/// objc_retain or objc_release, return the argument value, stripped of no-op
-/// casts and forwarding calls.
-static inline Value *GetObjCArg(Value *Inst) {
-  return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
+/// objc_retain or objc_release, return the RCIdentity root of the argument of
+/// the call.
+static inline Value *GetArgRCIdentityRoot(Value *Inst) {
+  return GetRCIdentityRoot(cast<CallInst>(Inst)->getArgOperand(0));
 }
 
 static inline bool IsNullOrUndef(const Value *V) {
@@ -286,8 +149,8 @@ static inline void EraseInstruction(Instruction *CI) {
 
   if (!Unused) {
     // Replace the return value with the argument.
-    assert((IsForwarding(GetBasicInstructionClass(CI)) ||
-            (IsNoopOnNull(GetBasicInstructionClass(CI)) &&
+    assert((IsForwarding(GetBasicARCInstKind(CI)) ||
+            (IsNoopOnNull(GetBasicARCInstKind(CI)) &&
              isa<ConstantPointerNull>(OldArg))) &&
            "Can't delete non-forwarding instruction with users!");
     CI->replaceAllUsesWith(OldArg);
@@ -344,15 +207,15 @@ static inline bool IsPotentialRetainableObjPtr(const Value *Op,
   return true;
 }
 
-/// \brief Helper for GetInstructionClass. Determines what kind of construct CS
+/// \brief Helper for GetARCInstKind. Determines what kind of construct CS
 /// is.
-static inline InstructionClass GetCallSiteClass(ImmutableCallSite CS) {
+static inline ARCInstKind GetCallSiteClass(ImmutableCallSite CS) {
   for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
        I != E; ++I)
     if (IsPotentialRetainableObjPtr(*I))
-      return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser;
+      return CS.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser;
 
-  return CS.onlyReadsMemory() ? IC_None : IC_Call;
+  return CS.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call;
 }
 
 /// \brief Return true if this value refers to a distinct and identifiable
@@ -371,7 +234,7 @@ static inline bool IsObjCIdentifiedObject(const Value *V) {
 
   if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
     const Value *Pointer =
-      StripPointerCastsAndObjCCalls(LI->getPointerOperand());
+      GetRCIdentityRoot(LI->getPointerOperand());
     if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
       // A constant pointer can't be pointing to an object on the heap. It may
       // be reference-counted, but it won't be deleted.
@@ -396,6 +259,55 @@ static inline bool IsObjCIdentifiedObject(const Value *V) {
   return false;
 }
 
+enum class ARCMDKindID {
+  ImpreciseRelease,
+  CopyOnEscape,
+  NoObjCARCExceptions,
+};
+
+/// A cache of MDKinds used by various ARC optimizations.
+class ARCMDKindCache {
+  Module *M;
+
+  /// The Metadata Kind for clang.imprecise_release metadata.
+  llvm::Optional<unsigned> ImpreciseReleaseMDKind;
+
+  /// The Metadata Kind for clang.arc.copy_on_escape metadata.
+  llvm::Optional<unsigned> CopyOnEscapeMDKind;
+
+  /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
+  llvm::Optional<unsigned> NoObjCARCExceptionsMDKind;
+
+public:
+  void init(Module *Mod) {
+    M = Mod;
+    ImpreciseReleaseMDKind = NoneType::None;
+    CopyOnEscapeMDKind = NoneType::None;
+    NoObjCARCExceptionsMDKind = NoneType::None;
+  }
+
+  unsigned get(ARCMDKindID ID) {
+    switch (ID) {
+    case ARCMDKindID::ImpreciseRelease:
+      if (!ImpreciseReleaseMDKind)
+        ImpreciseReleaseMDKind =
+            M->getContext().getMDKindID("clang.imprecise_release");
+      return *ImpreciseReleaseMDKind;
+    case ARCMDKindID::CopyOnEscape:
+      if (!CopyOnEscapeMDKind)
+        CopyOnEscapeMDKind =
+            M->getContext().getMDKindID("clang.arc.copy_on_escape");
+      return *CopyOnEscapeMDKind;
+    case ARCMDKindID::NoObjCARCExceptions:
+      if (!NoObjCARCExceptionsMDKind)
+        NoObjCARCExceptionsMDKind =
+            M->getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
+      return *NoObjCARCExceptionsMDKind;
+    }
+    llvm_unreachable("Covered switch isn't covered?!");
+  }
+};
+
 } // end namespace objcarc
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index 1a25391..d318643 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -97,11 +97,11 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
   Instruction *Push = nullptr;
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
     Instruction *Inst = I++;
-    switch (GetBasicInstructionClass(Inst)) {
-    case IC_AutoreleasepoolPush:
+    switch (GetBasicARCInstKind(Inst)) {
+    case ARCInstKind::AutoreleasepoolPush:
       Push = Inst;
       break;
-    case IC_AutoreleasepoolPop:
+    case ARCInstKind::AutoreleasepoolPop:
       // If this pop matches a push and nothing in between can autorelease,
       // zap the pair.
       if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) {
@@ -115,7 +115,7 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
       }
       Push = nullptr;
       break;
-    case IC_CallOrUser:
+    case ARCInstKind::CallOrUser:
       if (MayAutorelease(ImmutableCallSite(Inst)))
         Push = nullptr;
       break;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
index c61b6b0..b1515e3 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
@@ -46,6 +46,11 @@ ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
   return new ObjCARCAliasAnalysis();
 }
 
+bool ObjCARCAliasAnalysis::doInitialization(Module &M) {
+  InitializeAliasAnalysis(this, &M.getDataLayout());
+  return true;
+}
+
 void
 ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
@@ -59,8 +64,8 @@ ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
 
   // First, strip off no-ops, including ObjC-specific no-ops, and try making a
   // precise alias query.
-  const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
-  const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
+  const Value *SA = GetRCIdentityRoot(LocA.Ptr);
+  const Value *SB = GetRCIdentityRoot(LocB.Ptr);
   AliasResult Result =
     AliasAnalysis::alias(Location(SA, LocA.Size, LocA.AATags),
                          Location(SB, LocB.Size, LocB.AATags));
@@ -69,8 +74,8 @@ ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
 
   // If that failed, climb to the underlying object, including climbing through
   // ObjC-specific no-ops, and try making an imprecise alias query.
-  const Value *UA = GetUnderlyingObjCPtr(SA);
-  const Value *UB = GetUnderlyingObjCPtr(SB);
+  const Value *UA = GetUnderlyingObjCPtr(SA, *DL);
+  const Value *UB = GetUnderlyingObjCPtr(SB, *DL);
   if (UA != SA || UB != SB) {
     Result = AliasAnalysis::alias(Location(UA), Location(UB));
     // We can't use MustAlias or PartialAlias results here because
@@ -92,14 +97,14 @@ ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
 
   // First, strip off no-ops, including ObjC-specific no-ops, and try making
   // a precise alias query.
-  const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
+  const Value *S = GetRCIdentityRoot(Loc.Ptr);
   if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.AATags),
                                             OrLocal))
     return true;
 
   // If that failed, climb to the underlying object, including climbing through
   // ObjC-specific no-ops, and try making an imprecise alias query.
-  const Value *U = GetUnderlyingObjCPtr(S);
+  const Value *U = GetUnderlyingObjCPtr(S, *DL);
   if (U != S)
     return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
 
@@ -120,7 +125,7 @@ ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
     return AliasAnalysis::getModRefBehavior(F);
 
   switch (GetFunctionClass(F)) {
-  case IC_NoopCast:
+  case ARCInstKind::NoopCast:
     return DoesNotAccessMemory;
   default:
     break;
@@ -134,15 +139,15 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
   if (!EnableARCOpts)
     return AliasAnalysis::getModRefInfo(CS, Loc);
 
-  switch (GetBasicInstructionClass(CS.getInstruction())) {
-  case IC_Retain:
-  case IC_RetainRV:
-  case IC_Autorelease:
-  case IC_AutoreleaseRV:
-  case IC_NoopCast:
-  case IC_AutoreleasepoolPush:
-  case IC_FusedRetainAutorelease:
-  case IC_FusedRetainAutoreleaseRV:
+  switch (GetBasicARCInstKind(CS.getInstruction())) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::NoopCast:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
     // These functions don't access any memory visible to the compiler.
     // Note that this doesn't include objc_retainBlock, because it updates
     // pointers when it copies block data.
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
index 3fcea4e..3c5a021 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
@@ -44,9 +44,7 @@ namespace objcarc {
     }
 
   private:
-    void initializePass() override {
-      InitializeAliasAnalysis(this);
-    }
+    bool doInitialization(Module &M) override;
 
     /// This method is used when a pass implements an analysis interface through
     /// multiple inheritance.  If needed, it should override this to adjust the
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index eb325eb..2a3139f 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -35,6 +35,7 @@
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace llvm::objcarc;
@@ -44,6 +45,10 @@ using namespace llvm::objcarc;
 STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
 STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
 
+//===----------------------------------------------------------------------===//
+//                                Declarations
+//===----------------------------------------------------------------------===//
+
 namespace {
   /// \brief Late ARC optimizations
   ///
@@ -68,17 +73,23 @@ namespace {
     /// "tail".
     SmallPtrSet<CallInst *, 8> StoreStrongCalls;
 
-    bool OptimizeRetainCall(Function &F, Instruction *Retain);
+    /// Returns true if we eliminated Inst.
+    bool tryToPeepholeInstruction(Function &F, Instruction *Inst,
+                                  inst_iterator &Iter,
+                                  SmallPtrSetImpl<Instruction *> &DepInsts,
+                                  SmallPtrSetImpl<const BasicBlock *> &Visited,
+                                  bool &TailOkForStoreStrong);
 
-    bool ContractAutorelease(Function &F, Instruction *Autorelease,
-                             InstructionClass Class,
-                             SmallPtrSetImpl<Instruction *>
-                               &DependingInstructions,
-                             SmallPtrSetImpl<const BasicBlock *>
-                               &Visited);
+    bool optimizeRetainCall(Function &F, Instruction *Retain);
 
-    void ContractRelease(Instruction *Release,
-                         inst_iterator &Iter);
+    bool
+    contractAutorelease(Function &F, Instruction *Autorelease,
+                        ARCInstKind Class,
+                        SmallPtrSetImpl<Instruction *> &DependingInstructions,
+                        SmallPtrSetImpl<const BasicBlock *> &Visited);
+
+    void tryToContractReleaseIntoStoreStrong(Instruction *Release,
+                                             inst_iterator &Iter);
 
     void getAnalysisUsage(AnalysisUsage &AU) const override;
     bool doInitialization(Module &M) override;
@@ -92,30 +103,15 @@ namespace {
   };
 }
 
-char ObjCARCContract::ID = 0;
-INITIALIZE_PASS_BEGIN(ObjCARCContract,
-                      "objc-arc-contract", "ObjC ARC contraction", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ObjCARCContract,
-                    "objc-arc-contract", "ObjC ARC contraction", false, false)
-
-Pass *llvm::createObjCARCContractPass() {
-  return new ObjCARCContract();
-}
-
-void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<AliasAnalysis>();
-  AU.addRequired<DominatorTreeWrapperPass>();
-  AU.setPreservesCFG();
-}
+//===----------------------------------------------------------------------===//
+//                               Implementation
+//===----------------------------------------------------------------------===//
 
 /// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
 /// return value. We do this late so we do not disrupt the dataflow analysis in
 /// ObjCARCOpt.
-bool
-ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) {
-  ImmutableCallSite CS(GetObjCArg(Retain));
+bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) {
+  ImmutableCallSite CS(GetArgRCIdentityRoot(Retain));
   const Instruction *Call = CS.getInstruction();
   if (!Call)
     return false;
@@ -139,7 +135,7 @@ ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) {
 
   // We do not have to worry about tail calls/does not throw since
   // retain/retainRV have the same properties.
-  Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_RetainRV);
+  Constant *Decl = EP.get(ARCRuntimeEntryPointKind::RetainRV);
   cast<CallInst>(Retain)->setCalledFunction(Decl);
 
   DEBUG(dbgs() << "New: " << *Retain << "\n");
@@ -147,19 +143,16 @@ ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) {
 }
 
 /// Merge an autorelease with a retain into a fused call.
-bool
-ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
-                                     InstructionClass Class,
-                                     SmallPtrSetImpl<Instruction *>
-                                       &DependingInstructions,
-                                     SmallPtrSetImpl<const BasicBlock *>
-                                       &Visited) {
-  const Value *Arg = GetObjCArg(Autorelease);
+bool ObjCARCContract::contractAutorelease(
+    Function &F, Instruction *Autorelease, ARCInstKind Class,
+    SmallPtrSetImpl<Instruction *> &DependingInstructions,
+    SmallPtrSetImpl<const BasicBlock *> &Visited) {
+  const Value *Arg = GetArgRCIdentityRoot(Autorelease);
 
   // Check that there are no instructions between the retain and the autorelease
   // (such as an autorelease_pop) which may change the count.
   CallInst *Retain = nullptr;
-  if (Class == IC_AutoreleaseRV)
+  if (Class == ARCInstKind::AutoreleaseRV)
     FindDependencies(RetainAutoreleaseRVDep, Arg,
                      Autorelease->getParent(), Autorelease,
                      DependingInstructions, Visited, PA);
@@ -177,94 +170,208 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
   Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
   DependingInstructions.clear();
 
-  if (!Retain ||
-      GetBasicInstructionClass(Retain) != IC_Retain ||
-      GetObjCArg(Retain) != Arg)
+  if (!Retain || GetBasicARCInstKind(Retain) != ARCInstKind::Retain ||
+      GetArgRCIdentityRoot(Retain) != Arg)
     return false;
 
   Changed = true;
   ++NumPeeps;
 
-  DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing "
-                  "retain/autorelease. Erasing: " << *Autorelease << "\n"
-                  "                                      Old Retain: "
-               << *Retain << "\n");
+  DEBUG(dbgs() << "    Fusing retain/autorelease!\n"
+                  "        Autorelease:" << *Autorelease << "\n"
+                  "        Retain: " << *Retain << "\n");
 
-  Constant *Decl = EP.get(Class == IC_AutoreleaseRV ?
-                          ARCRuntimeEntryPoints::EPT_RetainAutoreleaseRV :
-                          ARCRuntimeEntryPoints::EPT_RetainAutorelease);
+  Constant *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV
+                              ? ARCRuntimeEntryPointKind::RetainAutoreleaseRV
+                              : ARCRuntimeEntryPointKind::RetainAutorelease);
   Retain->setCalledFunction(Decl);
 
-  DEBUG(dbgs() << "                                      New Retain: "
-               << *Retain << "\n");
+  DEBUG(dbgs() << "        New RetainAutorelease: " << *Retain << "\n");
 
   EraseInstruction(Autorelease);
   return true;
 }
 
-/// Attempt to merge an objc_release with a store, load, and objc_retain to form
-/// an objc_storeStrong. This can be a little tricky because the instructions
-/// don't always appear in order, and there may be unrelated intervening
-/// instructions.
-void ObjCARCContract::ContractRelease(Instruction *Release,
-                                      inst_iterator &Iter) {
-  LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
-  if (!Load || !Load->isSimple()) return;
+static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
+                                                         Instruction *Release,
+                                                         ProvenanceAnalysis &PA,
+                                                         AliasAnalysis *AA) {
+  StoreInst *Store = nullptr;
+  bool SawRelease = false;
 
-  // For now, require everything to be in one basic block.
-  BasicBlock *BB = Release->getParent();
-  if (Load->getParent() != BB) return;
+  // Get the location associated with Load.
+  AliasAnalysis::Location Loc = AA->getLocation(Load);
 
   // Walk down to find the store and the release, which may be in either order.
-  BasicBlock::iterator I = Load, End = BB->end();
-  ++I;
-  AliasAnalysis::Location Loc = AA->getLocation(Load);
-  StoreInst *Store = nullptr;
-  bool SawRelease = false;
-  for (; !Store || !SawRelease; ++I) {
-    if (I == End)
-      return;
+  for (auto I = std::next(BasicBlock::iterator(Load)),
+            E = Load->getParent()->end();
+       I != E; ++I) {
+    // If we found the store we were looking for and saw the release,
+    // break. There is no more work to be done.
+    if (Store && SawRelease)
+      break;
 
-    Instruction *Inst = I;
+    // Now we know that we have not seen either the store or the release. If I
+    // is the the release, mark that we saw the release and continue.
+    Instruction *Inst = &*I;
     if (Inst == Release) {
       SawRelease = true;
       continue;
     }
 
-    InstructionClass Class = GetBasicInstructionClass(Inst);
+    // Otherwise, we check if Inst is a "good" store. Grab the instruction class
+    // of Inst.
+    ARCInstKind Class = GetBasicARCInstKind(Inst);
 
-    // Unrelated retains are harmless.
+    // If Inst is an unrelated retain, we don't care about it.
+    //
+    // TODO: This is one area where the optimization could be made more
+    // aggressive.
     if (IsRetain(Class))
       continue;
 
+    // If we have seen the store, but not the release...
     if (Store) {
-      // The store is the point where we're going to put the objc_storeStrong,
-      // so make sure there are no uses after it.
-      if (CanUse(Inst, Load, PA, Class))
-        return;
-    } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) {
-      // We are moving the load down to the store, so check for anything
-      // else which writes to the memory between the load and the store.
-      Store = dyn_cast<StoreInst>(Inst);
-      if (!Store || !Store->isSimple()) return;
-      if (Store->getPointerOperand() != Loc.Ptr) return;
+      // We need to make sure that it is safe to move the release from its
+      // current position to the store. This implies proving that any
+      // instruction in between Store and the Release conservatively can not use
+      // the RCIdentityRoot of Release. If we can prove we can ignore Inst, so
+      // continue...
+      if (!CanUse(Inst, Load, PA, Class)) {
+        continue;
+      }
+
+      // Otherwise, be conservative and return nullptr.
+      return nullptr;
     }
+
+    // Ok, now we know we have not seen a store yet. See if Inst can write to
+    // our load location, if it can not, just ignore the instruction.
+    if (!(AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod))
+      continue;
+
+    Store = dyn_cast<StoreInst>(Inst);
+
+    // If Inst can, then check if Inst is a simple store. If Inst is not a
+    // store or a store that is not simple, then we have some we do not
+    // understand writing to this memory implying we can not move the load
+    // over the write to any subsequent store that we may find.
+    if (!Store || !Store->isSimple())
+      return nullptr;
+
+    // Then make sure that the pointer we are storing to is Ptr. If so, we
+    // found our Store!
+    if (Store->getPointerOperand() == Loc.Ptr)
+      continue;
+
+    // Otherwise, we have an unknown store to some other ptr that clobbers
+    // Loc.Ptr. Bail!
+    return nullptr;
   }
 
-  Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
+  // If we did not find the store or did not see the release, fail.
+  if (!Store || !SawRelease)
+    return nullptr;
+
+  // We succeeded!
+  return Store;
+}
 
-  // Walk up to find the retain.
-  I = Store;
-  BasicBlock::iterator Begin = BB->begin();
-  while (I != Begin && GetBasicInstructionClass(I) != IC_Retain)
+static Instruction *
+findRetainForStoreStrongContraction(Value *New, StoreInst *Store,
+                                    Instruction *Release,
+                                    ProvenanceAnalysis &PA) {
+  // Walk up from the Store to find the retain.
+  BasicBlock::iterator I = Store;
+  BasicBlock::iterator Begin = Store->getParent()->begin();
+  while (I != Begin && GetBasicARCInstKind(I) != ARCInstKind::Retain) {
+    Instruction *Inst = &*I;
+
+    // It is only safe to move the retain to the store if we can prove
+    // conservatively that nothing besides the release can decrement reference
+    // counts in between the retain and the store.
+    if (CanDecrementRefCount(Inst, New, PA) && Inst != Release)
+      return nullptr;
     --I;
+  }
   Instruction *Retain = I;
-  if (GetBasicInstructionClass(Retain) != IC_Retain) return;
-  if (GetObjCArg(Retain) != New) return;
+  if (GetBasicARCInstKind(Retain) != ARCInstKind::Retain)
+    return nullptr;
+  if (GetArgRCIdentityRoot(Retain) != New)
+    return nullptr;
+  return Retain;
+}
+
+/// Attempt to merge an objc_release with a store, load, and objc_retain to form
+/// an objc_storeStrong. An objc_storeStrong:
+///
+///   objc_storeStrong(i8** %old_ptr, i8* new_value)
+///
+/// is equivalent to the following IR sequence:
+///
+///   ; Load old value.
+///   %old_value = load i8** %old_ptr               (1)
+///
+///   ; Increment the new value and then release the old value. This must occur
+///   ; in order in case old_value releases new_value in its destructor causing
+///   ; us to potentially have a dangling ptr.
+///   tail call i8* @objc_retain(i8* %new_value)    (2)
+///   tail call void @objc_release(i8* %old_value)  (3)
+///
+///   ; Store the new_value into old_ptr
+///   store i8* %new_value, i8** %old_ptr           (4)
+///
+/// The safety of this optimization is based around the following
+/// considerations:
+///
+///  1. We are forming the store strong at the store. Thus to perform this
+///     optimization it must be safe to move the retain, load, and release to
+///     (4).
+///  2. We need to make sure that any re-orderings of (1), (2), (3), (4) are
+///     safe.
+void ObjCARCContract::tryToContractReleaseIntoStoreStrong(Instruction *Release,
+                                                          inst_iterator &Iter) {
+  // See if we are releasing something that we just loaded.
+  auto *Load = dyn_cast<LoadInst>(GetArgRCIdentityRoot(Release));
+  if (!Load || !Load->isSimple())
+    return;
+
+  // For now, require everything to be in one basic block.
+  BasicBlock *BB = Release->getParent();
+  if (Load->getParent() != BB)
+    return;
+
+  // First scan down the BB from Load, looking for a store of the RCIdentityRoot
+  // of Load's
+  StoreInst *Store =
+      findSafeStoreForStoreStrongContraction(Load, Release, PA, AA);
+  // If we fail, bail.
+  if (!Store)
+    return;
+
+  // Then find what new_value's RCIdentity Root is.
+  Value *New = GetRCIdentityRoot(Store->getValueOperand());
+
+  // Then walk up the BB and look for a retain on New without any intervening
+  // instructions which conservatively might decrement ref counts.
+  Instruction *Retain =
+      findRetainForStoreStrongContraction(New, Store, Release, PA);
+
+  // If we fail, bail.
+  if (!Retain)
+    return;
 
   Changed = true;
   ++NumStoreStrongs;
 
+  DEBUG(
+      llvm::dbgs() << "    Contracting retain, release into objc_storeStrong.\n"
+                   << "        Old:\n"
+                   << "            Store:   " << *Store << "\n"
+                   << "            Release: " << *Release << "\n"
+                   << "            Retain:  " << *Retain << "\n"
+                   << "            Load:    " << *Load << "\n");
+
   LLVMContext &C = Release->getContext();
   Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
   Type *I8XX = PointerType::getUnqual(I8X);
@@ -274,7 +381,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
     Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
   if (Args[1]->getType() != I8X)
     Args[1] = new BitCastInst(Args[1], I8X, "", Store);
-  Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_StoreStrong);
+  Constant *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong);
   CallInst *StoreStrong = CallInst::Create(Decl, Args, "", Store);
   StoreStrong->setDoesNotThrow();
   StoreStrong->setDebugLoc(Store->getDebugLoc());
@@ -284,6 +391,8 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
   // we can set the tail flag once we know it's safe.
   StoreStrongCalls.insert(StoreStrong);
 
+  DEBUG(llvm::dbgs() << "        New Store Strong: " << *StoreStrong << "\n");
+
   if (&*Iter == Store) ++Iter;
   Store->eraseFromParent();
   Release->eraseFromParent();
@@ -292,85 +401,34 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
     Load->eraseFromParent();
 }
 
-bool ObjCARCContract::doInitialization(Module &M) {
-  // If nothing in the Module uses ARC, don't do anything.
-  Run = ModuleHasARC(M);
-  if (!Run)
-    return false;
-
-  EP.Initialize(&M);
-
-  // Initialize RetainRVMarker.
-  RetainRVMarker = nullptr;
-  if (NamedMDNode *NMD =
-        M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
-    if (NMD->getNumOperands() == 1) {
-      const MDNode *N = NMD->getOperand(0);
-      if (N->getNumOperands() == 1)
-        if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
-          RetainRVMarker = S;
-    }
-
-  return false;
-}
-
-bool ObjCARCContract::runOnFunction(Function &F) {
-  if (!EnableARCOpts)
-    return false;
-
-  // If nothing in the Module uses ARC, don't do anything.
-  if (!Run)
-    return false;
-
-  Changed = false;
-  AA = &getAnalysis<AliasAnalysis>();
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
-  PA.setAA(&getAnalysis<AliasAnalysis>());
-
-  // Track whether it's ok to mark objc_storeStrong calls with the "tail"
-  // keyword. Be conservative if the function has variadic arguments.
-  // It seems that functions which "return twice" are also unsafe for the
-  // "tail" argument, because they are setjmp, which could need to
-  // return to an earlier stack state.
-  bool TailOkForStoreStrongs = !F.isVarArg() &&
-                               !F.callsFunctionThatReturnsTwice();
-
-  // For ObjC library calls which return their argument, replace uses of the
-  // argument with uses of the call return value, if it dominates the use. This
-  // reduces register pressure.
-  SmallPtrSet<Instruction *, 4> DependingInstructions;
-  SmallPtrSet<const BasicBlock *, 4> Visited;
-  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
-    Instruction *Inst = &*I++;
-
-    DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n");
-
+bool ObjCARCContract::tryToPeepholeInstruction(
+  Function &F, Instruction *Inst, inst_iterator &Iter,
+  SmallPtrSetImpl<Instruction *> &DependingInsts,
+  SmallPtrSetImpl<const BasicBlock *> &Visited,
+  bool &TailOkForStoreStrongs) {
     // Only these library routines return their argument. In particular,
     // objc_retainBlock does not necessarily return its argument.
-    InstructionClass Class = GetBasicInstructionClass(Inst);
+  ARCInstKind Class = GetBasicARCInstKind(Inst);
     switch (Class) {
-    case IC_FusedRetainAutorelease:
-    case IC_FusedRetainAutoreleaseRV:
-      break;
-    case IC_Autorelease:
-    case IC_AutoreleaseRV:
-      if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
-        continue;
-      break;
-    case IC_Retain:
+    case ARCInstKind::FusedRetainAutorelease:
+    case ARCInstKind::FusedRetainAutoreleaseRV:
+      return false;
+    case ARCInstKind::Autorelease:
+    case ARCInstKind::AutoreleaseRV:
+      return contractAutorelease(F, Inst, Class, DependingInsts, Visited);
+    case ARCInstKind::Retain:
       // Attempt to convert retains to retainrvs if they are next to function
       // calls.
-      if (!OptimizeRetainCall(F, Inst))
-        break;
+      if (!optimizeRetainCall(F, Inst))
+        return false;
       // If we succeed in our optimization, fall through.
       // FALLTHROUGH
-    case IC_RetainRV: {
+    case ARCInstKind::RetainRV: {
       // If we're compiling for a target which needs a special inline-asm
       // marker to do the retainAutoreleasedReturnValue optimization,
       // insert it now.
       if (!RetainRVMarker)
-        break;
+        return false;
       BasicBlock::iterator BBI = Inst;
       BasicBlock *InstParent = Inst->getParent();
 
@@ -388,8 +446,8 @@ bool ObjCARCContract::runOnFunction(Function &F) {
         --BBI;
       } while (IsNoopInstruction(BBI));
 
-      if (&*BBI == GetObjCArg(Inst)) {
-        DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for "
+      if (&*BBI == GetArgRCIdentityRoot(Inst)) {
+        DEBUG(dbgs() << "Adding inline asm marker for "
                         "retainAutoreleasedReturnValue optimization.\n");
         Changed = true;
         InlineAsm *IA =
@@ -400,9 +458,9 @@ bool ObjCARCContract::runOnFunction(Function &F) {
         CallInst::Create(IA, "", Inst);
       }
     decline_rv_optimization:
-      break;
+      return false;
     }
-    case IC_InitWeak: {
+    case ARCInstKind::InitWeak: {
       // objc_initWeak(p, null) => *p = null
       CallInst *CI = cast<CallInst>(Inst);
       if (IsNullOrUndef(CI->getArgOperand(1))) {
@@ -417,31 +475,80 @@ bool ObjCARCContract::runOnFunction(Function &F) {
         CI->replaceAllUsesWith(Null);
         CI->eraseFromParent();
       }
-      continue;
+      return true;
     }
-    case IC_Release:
-      ContractRelease(Inst, I);
-      continue;
-    case IC_User:
+    case ARCInstKind::Release:
+      // Try to form an objc store strong from our release. If we fail, there is
+      // nothing further to do below, so continue.
+      tryToContractReleaseIntoStoreStrong(Inst, Iter);
+      return true;
+    case ARCInstKind::User:
       // Be conservative if the function has any alloca instructions.
       // Technically we only care about escaping alloca instructions,
       // but this is sufficient to handle some interesting cases.
       if (isa<AllocaInst>(Inst))
         TailOkForStoreStrongs = false;
-      continue;
-    case IC_IntrinsicUser:
+      return true;
+    case ARCInstKind::IntrinsicUser:
       // Remove calls to @clang.arc.use(...).
       Inst->eraseFromParent();
-      continue;
+      return true;
     default:
-      continue;
+      return true;
     }
+}
+
+//===----------------------------------------------------------------------===//
+//                              Top Level Driver
+//===----------------------------------------------------------------------===//
+
+bool ObjCARCContract::runOnFunction(Function &F) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
+  Changed = false;
+  AA = &getAnalysis<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+  PA.setAA(&getAnalysis<AliasAnalysis>());
+
+  DEBUG(llvm::dbgs() << "**** ObjCARC Contract ****\n");
+
+  // Track whether it's ok to mark objc_storeStrong calls with the "tail"
+  // keyword. Be conservative if the function has variadic arguments.
+  // It seems that functions which "return twice" are also unsafe for the
+  // "tail" argument, because they are setjmp, which could need to
+  // return to an earlier stack state.
+  bool TailOkForStoreStrongs =
+      !F.isVarArg() && !F.callsFunctionThatReturnsTwice();
+
+  // For ObjC library calls which return their argument, replace uses of the
+  // argument with uses of the call return value, if it dominates the use. This
+  // reduces register pressure.
+  SmallPtrSet<Instruction *, 4> DependingInstructions;
+  SmallPtrSet<const BasicBlock *, 4> Visited;
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E;) {
+    Instruction *Inst = &*I++;
+
+    DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
+
+    // First try to peephole Inst. If there is nothing further we can do in
+    // terms of undoing objc-arc-expand, process the next inst.
+    if (tryToPeepholeInstruction(F, Inst, I, DependingInstructions, Visited,
+                                 TailOkForStoreStrongs))
+      continue;
 
-    DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n");
+    // Otherwise, try to undo objc-arc-expand.
 
-    // Don't use GetObjCArg because we don't want to look through bitcasts
+    // Don't use GetArgRCIdentityRoot because we don't want to look through bitcasts
     // and such; to do the replacement, the argument must have type i8*.
     Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
+
+    // TODO: Change this to a do-while.
     for (;;) {
       // If we're compiling bugpointed code, don't get in trouble.
       if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
@@ -458,7 +565,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
         // reachability here because an unreachable call is considered to
         // trivially dominate itself, which would lead us to rewriting its
         // argument in terms of its return value, which would lead to
-        // infinite loops in GetObjCArg.
+        // infinite loops in GetArgRCIdentityRoot.
         if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) {
           Changed = true;
           Instruction *Replacement = Inst;
@@ -514,3 +621,45 @@ bool ObjCARCContract::runOnFunction(Function &F) {
 
   return Changed;
 }
+
+//===----------------------------------------------------------------------===//
+//                             Misc Pass Manager
+//===----------------------------------------------------------------------===//
+
+char ObjCARCContract::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCContract, "objc-arc-contract",
+                      "ObjC ARC contraction", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ObjCARCContract, "objc-arc-contract",
+                    "ObjC ARC contraction", false, false)
+
+void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.setPreservesCFG();
+}
+
+Pass *llvm::createObjCARCContractPass() { return new ObjCARCContract(); }
+
+bool ObjCARCContract::doInitialization(Module &M) {
+  // If nothing in the Module uses ARC, don't do anything.
+  Run = ModuleHasARC(M);
+  if (!Run)
+    return false;
+
+  EP.init(&M);
+
+  // Initialize RetainRVMarker.
+  RetainRVMarker = nullptr;
+  if (NamedMDNode *NMD =
+          M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
+    if (NMD->getNumOperands() == 1) {
+      const MDNode *N = NMD->getOperand(0);
+      if (N->getNumOperands() == 1)
+        if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
+          RetainRVMarker = S;
+    }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index bf9fcbb..53c19c3 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -99,13 +99,13 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
 
     DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n");
 
-    switch (GetBasicInstructionClass(Inst)) {
-    case IC_Retain:
-    case IC_RetainRV:
-    case IC_Autorelease:
-    case IC_AutoreleaseRV:
-    case IC_FusedRetainAutorelease:
-    case IC_FusedRetainAutoreleaseRV: {
+    switch (GetBasicARCInstKind(Inst)) {
+    case ARCInstKind::Retain:
+    case ARCInstKind::RetainRV:
+    case ARCInstKind::Autorelease:
+    case ARCInstKind::AutoreleaseRV:
+    case ARCInstKind::FusedRetainAutorelease:
+    case ARCInstKind::FusedRetainAutoreleaseRV: {
       // These calls return their argument verbatim, as a low-level
       // optimization. However, this makes high-level optimizations
       // harder. Undo any uses of this optimization that the front-end
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 76932e6..dca3f1b 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -26,9 +26,11 @@
 
 #include "ObjCARC.h"
 #include "ARCRuntimeEntryPoints.h"
+#include "BlotMapVector.h"
 #include "DependencyAnalysis.h"
 #include "ObjCARCAliasAnalysis.h"
 #include "ProvenanceAnalysis.h"
+#include "PtrState.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
@@ -45,106 +47,10 @@ using namespace llvm::objcarc;
 
 #define DEBUG_TYPE "objc-arc-opts"
 
-/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific.
-/// @{
-
-namespace {
-  /// \brief An associative container with fast insertion-order (deterministic)
-  /// iteration over its elements. Plus the special blot operation.
-  template<class KeyT, class ValueT>
-  class MapVector {
-    /// Map keys to indices in Vector.
-    typedef DenseMap<KeyT, size_t> MapTy;
-    MapTy Map;
-
-    typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
-    /// Keys and values.
-    VectorTy Vector;
-
-  public:
-    typedef typename VectorTy::iterator iterator;
-    typedef typename VectorTy::const_iterator const_iterator;
-    iterator begin() { return Vector.begin(); }
-    iterator end() { return Vector.end(); }
-    const_iterator begin() const { return Vector.begin(); }
-    const_iterator end() const { return Vector.end(); }
-
-#ifdef XDEBUG
-    ~MapVector() {
-      assert(Vector.size() >= Map.size()); // May differ due to blotting.
-      for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
-           I != E; ++I) {
-        assert(I->second < Vector.size());
-        assert(Vector[I->second].first == I->first);
-      }
-      for (typename VectorTy::const_iterator I = Vector.begin(),
-           E = Vector.end(); I != E; ++I)
-        assert(!I->first ||
-               (Map.count(I->first) &&
-                Map[I->first] == size_t(I - Vector.begin())));
-    }
-#endif
-
-    ValueT &operator[](const KeyT &Arg) {
-      std::pair<typename MapTy::iterator, bool> Pair =
-        Map.insert(std::make_pair(Arg, size_t(0)));
-      if (Pair.second) {
-        size_t Num = Vector.size();
-        Pair.first->second = Num;
-        Vector.push_back(std::make_pair(Arg, ValueT()));
-        return Vector[Num].second;
-      }
-      return Vector[Pair.first->second].second;
-    }
-
-    std::pair<iterator, bool>
-    insert(const std::pair<KeyT, ValueT> &InsertPair) {
-      std::pair<typename MapTy::iterator, bool> Pair =
-        Map.insert(std::make_pair(InsertPair.first, size_t(0)));
-      if (Pair.second) {
-        size_t Num = Vector.size();
-        Pair.first->second = Num;
-        Vector.push_back(InsertPair);
-        return std::make_pair(Vector.begin() + Num, true);
-      }
-      return std::make_pair(Vector.begin() + Pair.first->second, false);
-    }
-
-    iterator find(const KeyT &Key) {
-      typename MapTy::iterator It = Map.find(Key);
-      if (It == Map.end()) return Vector.end();
-      return Vector.begin() + It->second;
-    }
-
-    const_iterator find(const KeyT &Key) const {
-      typename MapTy::const_iterator It = Map.find(Key);
-      if (It == Map.end()) return Vector.end();
-      return Vector.begin() + It->second;
-    }
-
-    /// This is similar to erase, but instead of removing the element from the
-    /// vector, it just zeros out the key in the vector. This leaves iterators
-    /// intact, but clients must be prepared for zeroed-out keys when iterating.
-    void blot(const KeyT &Key) {
-      typename MapTy::iterator It = Map.find(Key);
-      if (It == Map.end()) return;
-      Vector[It->second].first = KeyT();
-      Map.erase(It);
-    }
-
-    void clear() {
-      Map.clear();
-      Vector.clear();
-    }
-  };
-}
-
-/// @}
-///
 /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC.
 /// @{
 
-/// \brief This is similar to StripPointerCastsAndObjCCalls but it stops as soon
+/// \brief This is similar to GetRCIdentityRoot but it stops as soon
 /// as it finds a value with multiple uses.
 static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
   if (Arg->hasOneUse()) {
@@ -153,7 +59,7 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
     if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
       if (GEP->hasAllZeroIndices())
         return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
-    if (IsForwarding(GetBasicInstructionClass(Arg)))
+    if (IsForwarding(GetBasicARCInstKind(Arg)))
       return FindSingleUseIdentifiedObject(
                cast<CallInst>(Arg)->getArgOperand(0));
     if (!IsObjCIdentifiedObject(Arg))
@@ -165,7 +71,7 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
   // trivial uses, we can still consider this to be a single-use value.
   if (IsObjCIdentifiedObject(Arg)) {
     for (const User *U : Arg->users())
-      if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
+      if (!U->use_empty() || GetRCIdentityRoot(U) != Arg)
          return nullptr;
 
     return Arg;
@@ -177,13 +83,14 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
 /// This is a wrapper around getUnderlyingObjCPtr along the lines of
 /// GetUnderlyingObjects except that it returns early when it sees the first
 /// alloca.
-static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) {
+static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V,
+                                                   const DataLayout &DL) {
   SmallPtrSet<const Value *, 4> Visited;
   SmallVector<const Value *, 4> Worklist;
   Worklist.push_back(V);
   do {
     const Value *P = Worklist.pop_back_val();
-    P = GetUnderlyingObjCPtr(P);
+    P = GetUnderlyingObjCPtr(P, DL);
 
     if (isa<AllocaInst>(P))
       return true;
@@ -198,8 +105,8 @@ static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) {
     }
 
     if (const PHINode *PN = dyn_cast<const PHINode>(P)) {
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-        Worklist.push_back(PN->getIncomingValue(i));
+      for (Value *IncValue : PN->incoming_values())
+        Worklist.push_back(IncValue);
       continue;
     }
   } while (!Worklist.empty());
@@ -270,293 +177,6 @@ STATISTIC(NumReleasesAfterOpt,
 #endif
 
 namespace {
-  /// \enum Sequence
-  ///
-  /// \brief A sequence of states that a pointer may go through in which an
-  /// objc_retain and objc_release are actually needed.
-  enum Sequence {
-    S_None,
-    S_Retain,         ///< objc_retain(x).
-    S_CanRelease,     ///< foo(x) -- x could possibly see a ref count decrement.
-    S_Use,            ///< any use of x.
-    S_Stop,           ///< like S_Release, but code motion is stopped.
-    S_Release,        ///< objc_release(x).
-    S_MovableRelease  ///< objc_release(x), !clang.imprecise_release.
-  };
-
-  raw_ostream &operator<<(raw_ostream &OS, const Sequence S)
-    LLVM_ATTRIBUTE_UNUSED;
-  raw_ostream &operator<<(raw_ostream &OS, const Sequence S) {
-    switch (S) {
-    case S_None:
-      return OS << "S_None";
-    case S_Retain:
-      return OS << "S_Retain";
-    case S_CanRelease:
-      return OS << "S_CanRelease";
-    case S_Use:
-      return OS << "S_Use";
-    case S_Release:
-      return OS << "S_Release";
-    case S_MovableRelease:
-      return OS << "S_MovableRelease";
-    case S_Stop:
-      return OS << "S_Stop";
-    }
-    llvm_unreachable("Unknown sequence type.");
-  }
-}
-
-static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
-  // The easy cases.
-  if (A == B)
-    return A;
-  if (A == S_None || B == S_None)
-    return S_None;
-
-  if (A > B) std::swap(A, B);
-  if (TopDown) {
-    // Choose the side which is further along in the sequence.
-    if ((A == S_Retain || A == S_CanRelease) &&
-        (B == S_CanRelease || B == S_Use))
-      return B;
-  } else {
-    // Choose the side which is further along in the sequence.
-    if ((A == S_Use || A == S_CanRelease) &&
-        (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
-      return A;
-    // If both sides are releases, choose the more conservative one.
-    if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
-      return A;
-    if (A == S_Release && B == S_MovableRelease)
-      return A;
-  }
-
-  return S_None;
-}
-
-namespace {
-  /// \brief Unidirectional information about either a
-  /// retain-decrement-use-release sequence or release-use-decrement-retain
-  /// reverse sequence.
-  struct RRInfo {
-    /// After an objc_retain, the reference count of the referenced
-    /// object is known to be positive. Similarly, before an objc_release, the
-    /// reference count of the referenced object is known to be positive. If
-    /// there are retain-release pairs in code regions where the retain count
-    /// is known to be positive, they can be eliminated, regardless of any side
-    /// effects between them.
-    ///
-    /// Also, a retain+release pair nested within another retain+release
-    /// pair all on the known same pointer value can be eliminated, regardless
-    /// of any intervening side effects.
-    ///
-    /// KnownSafe is true when either of these conditions is satisfied.
-    bool KnownSafe;
-
-    /// True of the objc_release calls are all marked with the "tail" keyword.
-    bool IsTailCallRelease;
-
-    /// If the Calls are objc_release calls and they all have a
-    /// clang.imprecise_release tag, this is the metadata tag.
-    MDNode *ReleaseMetadata;
-
-    /// For a top-down sequence, the set of objc_retains or
-    /// objc_retainBlocks. For bottom-up, the set of objc_releases.
-    SmallPtrSet<Instruction *, 2> Calls;
-
-    /// The set of optimal insert positions for moving calls in the opposite
-    /// sequence.
-    SmallPtrSet<Instruction *, 2> ReverseInsertPts;
-
-    /// If this is true, we cannot perform code motion but can still remove
-    /// retain/release pairs.
-    bool CFGHazardAfflicted;
-
-    RRInfo() :
-      KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr),
-      CFGHazardAfflicted(false) {}
-
-    void clear();
-
-    /// Conservatively merge the two RRInfo. Returns true if a partial merge has
-    /// occurred, false otherwise.
-    bool Merge(const RRInfo &Other);
-
-  };
-}
-
-void RRInfo::clear() {
-  KnownSafe = false;
-  IsTailCallRelease = false;
-  ReleaseMetadata = nullptr;
-  Calls.clear();
-  ReverseInsertPts.clear();
-  CFGHazardAfflicted = false;
-}
-
-bool RRInfo::Merge(const RRInfo &Other) {
-    // Conservatively merge the ReleaseMetadata information.
-    if (ReleaseMetadata != Other.ReleaseMetadata)
-      ReleaseMetadata = nullptr;
-
-    // Conservatively merge the boolean state.
-    KnownSafe &= Other.KnownSafe;
-    IsTailCallRelease &= Other.IsTailCallRelease;
-    CFGHazardAfflicted |= Other.CFGHazardAfflicted;
-
-    // Merge the call sets.
-    Calls.insert(Other.Calls.begin(), Other.Calls.end());
-
-    // Merge the insert point sets. If there are any differences,
-    // that makes this a partial merge.
-    bool Partial = ReverseInsertPts.size() != Other.ReverseInsertPts.size();
-    for (Instruction *Inst : Other.ReverseInsertPts)
-      Partial |= ReverseInsertPts.insert(Inst).second;
-    return Partial;
-}
-
-namespace {
-  /// \brief This class summarizes several per-pointer runtime properties which
-  /// are propogated through the flow graph.
-  class PtrState {
-    /// True if the reference count is known to be incremented.
-    bool KnownPositiveRefCount;
-
-    /// True if we've seen an opportunity for partial RR elimination, such as
-    /// pushing calls into a CFG triangle or into one side of a CFG diamond.
-    bool Partial;
-
-    /// The current position in the sequence.
-    unsigned char Seq : 8;
-
-    /// Unidirectional information about the current sequence.
-    RRInfo RRI;
-
-  public:
-    PtrState() : KnownPositiveRefCount(false), Partial(false),
-                 Seq(S_None) {}
-
-
-    bool IsKnownSafe() const {
-      return RRI.KnownSafe;
-    }
-
-    void SetKnownSafe(const bool NewValue) {
-      RRI.KnownSafe = NewValue;
-    }
-
-    bool IsTailCallRelease() const {
-      return RRI.IsTailCallRelease;
-    }
-
-    void SetTailCallRelease(const bool NewValue) {
-      RRI.IsTailCallRelease = NewValue;
-    }
-
-    bool IsTrackingImpreciseReleases() const {
-      return RRI.ReleaseMetadata != nullptr;
-    }
-
-    const MDNode *GetReleaseMetadata() const {
-      return RRI.ReleaseMetadata;
-    }
-
-    void SetReleaseMetadata(MDNode *NewValue) {
-      RRI.ReleaseMetadata = NewValue;
-    }
-
-    bool IsCFGHazardAfflicted() const {
-      return RRI.CFGHazardAfflicted;
-    }
-
-    void SetCFGHazardAfflicted(const bool NewValue) {
-      RRI.CFGHazardAfflicted = NewValue;
-    }
-
-    void SetKnownPositiveRefCount() {
-      DEBUG(dbgs() << "Setting Known Positive.\n");
-      KnownPositiveRefCount = true;
-    }
-
-    void ClearKnownPositiveRefCount() {
-      DEBUG(dbgs() << "Clearing Known Positive.\n");
-      KnownPositiveRefCount = false;
-    }
-
-    bool HasKnownPositiveRefCount() const {
-      return KnownPositiveRefCount;
-    }
-
-    void SetSeq(Sequence NewSeq) {
-      DEBUG(dbgs() << "Old: " << Seq << "; New: " << NewSeq << "\n");
-      Seq = NewSeq;
-    }
-
-    Sequence GetSeq() const {
-      return static_cast<Sequence>(Seq);
-    }
-
-    void ClearSequenceProgress() {
-      ResetSequenceProgress(S_None);
-    }
-
-    void ResetSequenceProgress(Sequence NewSeq) {
-      DEBUG(dbgs() << "Resetting sequence progress.\n");
-      SetSeq(NewSeq);
-      Partial = false;
-      RRI.clear();
-    }
-
-    void Merge(const PtrState &Other, bool TopDown);
-
-    void InsertCall(Instruction *I) {
-      RRI.Calls.insert(I);
-    }
-
-    void InsertReverseInsertPt(Instruction *I) {
-      RRI.ReverseInsertPts.insert(I);
-    }
-
-    void ClearReverseInsertPts() {
-      RRI.ReverseInsertPts.clear();
-    }
-
-    bool HasReverseInsertPts() const {
-      return !RRI.ReverseInsertPts.empty();
-    }
-
-    const RRInfo &GetRRInfo() const {
-      return RRI;
-    }
-  };
-}
-
-void
-PtrState::Merge(const PtrState &Other, bool TopDown) {
-  Seq = MergeSeqs(GetSeq(), Other.GetSeq(), TopDown);
-  KnownPositiveRefCount &= Other.KnownPositiveRefCount;
-
-  // If we're not in a sequence (anymore), drop all associated state.
-  if (Seq == S_None) {
-    Partial = false;
-    RRI.clear();
-  } else if (Partial || Other.Partial) {
-    // If we're doing a merge on a path that's previously seen a partial
-    // merge, conservatively drop the sequence, to avoid doing partial
-    // RR elimination. If the branch predicates for the two merge differ,
-    // mixing them is unsafe.
-    ClearSequenceProgress();
-  } else {
-    // Otherwise merge the other PtrState's RRInfo into our RRInfo. At this
-    // point, we know that currently we are not partial. Stash whether or not
-    // the merge operation caused us to undergo a partial merging of reverse
-    // insertion points.
-    Partial = RRI.Merge(Other.RRI);
-  }
-}
-
-namespace {
   /// \brief Per-BasicBlock state.
   class BBState {
     /// The number of unique control paths from the entry which can reach this
@@ -566,20 +186,18 @@ namespace {
     /// The number of unique control paths to exits from this block.
     unsigned BottomUpPathCount;
 
-    /// A type for PerPtrTopDown and PerPtrBottomUp.
-    typedef MapVector<const Value *, PtrState> MapTy;
-
     /// The top-down traversal uses this to record information known about a
     /// pointer at the bottom of each block.
-    MapTy PerPtrTopDown;
+    BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown;
 
     /// The bottom-up traversal uses this to record information known about a
     /// pointer at the top of each block.
-    MapTy PerPtrBottomUp;
+    BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp;
 
     /// Effective predecessors of the current block ignoring ignorable edges and
     /// ignored backedges.
     SmallVector<BasicBlock *, 2> Preds;
+
     /// Effective successors of the current block ignoring ignorable edges and
     /// ignored backedges.
     SmallVector<BasicBlock *, 2> Succs;
@@ -589,26 +207,38 @@ namespace {
 
     BBState() : TopDownPathCount(0), BottomUpPathCount(0) { }
 
-    typedef MapTy::iterator ptr_iterator;
-    typedef MapTy::const_iterator ptr_const_iterator;
+    typedef decltype(PerPtrTopDown)::iterator top_down_ptr_iterator;
+    typedef decltype(PerPtrTopDown)::const_iterator const_top_down_ptr_iterator;
 
-    ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
-    ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
-    ptr_const_iterator top_down_ptr_begin() const {
+    top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
+    top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
+    const_top_down_ptr_iterator top_down_ptr_begin() const {
       return PerPtrTopDown.begin();
     }
-    ptr_const_iterator top_down_ptr_end() const {
+    const_top_down_ptr_iterator top_down_ptr_end() const {
       return PerPtrTopDown.end();
     }
+    bool hasTopDownPtrs() const {
+      return !PerPtrTopDown.empty();
+    }
 
-    ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
-    ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
-    ptr_const_iterator bottom_up_ptr_begin() const {
+    typedef decltype(PerPtrBottomUp)::iterator bottom_up_ptr_iterator;
+    typedef decltype(
+        PerPtrBottomUp)::const_iterator const_bottom_up_ptr_iterator;
+
+    bottom_up_ptr_iterator bottom_up_ptr_begin() {
+      return PerPtrBottomUp.begin();
+    }
+    bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
+    const_bottom_up_ptr_iterator bottom_up_ptr_begin() const {
       return PerPtrBottomUp.begin();
     }
-    ptr_const_iterator bottom_up_ptr_end() const {
+    const_bottom_up_ptr_iterator bottom_up_ptr_end() const {
       return PerPtrBottomUp.end();
     }
+    bool hasBottomUpPtrs() const {
+      return !PerPtrBottomUp.empty();
+    }
 
     /// Mark this block as being an entry block, which has one path from the
     /// entry by definition.
@@ -621,20 +251,20 @@ namespace {
     /// Attempt to find the PtrState object describing the top down state for
     /// pointer Arg. Return a new initialized PtrState describing the top down
     /// state for Arg if we do not find one.
-    PtrState &getPtrTopDownState(const Value *Arg) {
+    TopDownPtrState &getPtrTopDownState(const Value *Arg) {
       return PerPtrTopDown[Arg];
     }
 
     /// Attempt to find the PtrState object describing the bottom up state for
     /// pointer Arg. Return a new initialized PtrState describing the bottom up
     /// state for Arg if we do not find one.
-    PtrState &getPtrBottomUpState(const Value *Arg) {
+    BottomUpPtrState &getPtrBottomUpState(const Value *Arg) {
       return PerPtrBottomUp[Arg];
     }
 
     /// Attempt to find the PtrState object describing the bottom up state for
     /// pointer Arg.
-    ptr_iterator findPtrBottomUpState(const Value *Arg) {
+    bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) {
       return PerPtrBottomUp.find(Arg);
     }
 
@@ -685,6 +315,11 @@ namespace {
   const unsigned BBState::OverflowOccurredValue = 0xffffffff;
 }
 
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS,
+                        BBState &BBState) LLVM_ATTRIBUTE_UNUSED;
+}
+
 void BBState::InitFromPred(const BBState &Other) {
   PerPtrTopDown = Other.PerPtrTopDown;
   TopDownPathCount = Other.TopDownPathCount;
@@ -724,19 +359,18 @@ void BBState::MergePred(const BBState &Other) {
   // For each entry in the other set, if our set has an entry with the same key,
   // merge the entries. Otherwise, copy the entry and merge it with an empty
   // entry.
-  for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
-       ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
-    std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
-    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+  for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end();
+       MI != ME; ++MI) {
+    auto Pair = PerPtrTopDown.insert(*MI);
+    Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second,
                              /*TopDown=*/true);
   }
 
   // For each entry in our set, if the other set doesn't have an entry with the
   // same key, force it to merge with an empty entry.
-  for (ptr_iterator MI = top_down_ptr_begin(),
-       ME = top_down_ptr_end(); MI != ME; ++MI)
+  for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI)
     if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
-      MI->second.Merge(PtrState(), /*TopDown=*/true);
+      MI->second.Merge(TopDownPtrState(), /*TopDown=*/true);
 }
 
 /// The bottom-up traversal uses this to merge information about successors to
@@ -768,304 +402,80 @@ void BBState::MergeSucc(const BBState &Other) {
   // For each entry in the other set, if our set has an entry with the
   // same key, merge the entries. Otherwise, copy the entry and merge
   // it with an empty entry.
-  for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
-       ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
-    std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
-    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+  for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end();
+       MI != ME; ++MI) {
+    auto Pair = PerPtrBottomUp.insert(*MI);
+    Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second,
                              /*TopDown=*/false);
   }
 
   // For each entry in our set, if the other set doesn't have an entry
   // with the same key, force it to merge with an empty entry.
-  for (ptr_iterator MI = bottom_up_ptr_begin(),
-       ME = bottom_up_ptr_end(); MI != ME; ++MI)
+  for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME;
+       ++MI)
     if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
-      MI->second.Merge(PtrState(), /*TopDown=*/false);
+      MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false);
 }
 
-// Only enable ARC Annotations if we are building a debug version of
-// libObjCARCOpts.
-#ifndef NDEBUG
-#define ARC_ANNOTATIONS
-#endif
-
-// Define some macros along the lines of DEBUG and some helper functions to make
-// it cleaner to create annotations in the source code and to no-op when not
-// building in debug mode.
-#ifdef ARC_ANNOTATIONS
-
-#include "llvm/Support/CommandLine.h"
-
-/// Enable/disable ARC sequence annotations.
-static cl::opt<bool>
-EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false),
-                     cl::desc("Enable emission of arc data flow analysis "
-                              "annotations"));
-static cl::opt<bool>
-DisableCheckForCFGHazards("disable-objc-arc-checkforcfghazards", cl::init(false),
-                          cl::desc("Disable check for cfg hazards when "
-                                   "annotating"));
-static cl::opt<std::string>
-ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier",
-                              cl::init(""),
-                              cl::desc("filter out all data flow annotations "
-                                       "but those that apply to the given "
-                                       "target llvm identifier."));
-
-/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an
-/// instruction so that we can track backwards when post processing via the llvm
-/// arc annotation processor tool. If the function is an
-static MDString *AppendMDNodeToSourcePtr(unsigned NodeId,
-                                         Value *Ptr) {
-  MDString *Hash = nullptr;
-
-  // If pointer is a result of an instruction and it does not have a source
-  // MDNode it, attach a new MDNode onto it. If pointer is a result of
-  // an instruction and does have a source MDNode attached to it, return a
-  // reference to said Node. Otherwise just return 0.
-  if (Instruction *Inst = dyn_cast<Instruction>(Ptr)) {
-    MDNode *Node;
-    if (!(Node = Inst->getMetadata(NodeId))) {
-      // We do not have any node. Generate and attatch the hash MDString to the
-      // instruction.
-
-      // We just use an MDString to ensure that this metadata gets written out
-      // of line at the module level and to provide a very simple format
-      // encoding the information herein. Both of these makes it simpler to
-      // parse the annotations by a simple external program.
-      std::string Str;
-      raw_string_ostream os(Str);
-      os << "(" << Inst->getParent()->getParent()->getName() << ",%"
-         << Inst->getName() << ")";
-
-      Hash = MDString::get(Inst->getContext(), os.str());
-      Inst->setMetadata(NodeId, MDNode::get(Inst->getContext(),Hash));
-    } else {
-      // We have a node. Grab its hash and return it.
-      assert(Node->getNumOperands() == 1 &&
-        "An ARCAnnotationProvenanceSourceMDKind can only have 1 operand.");
-      Hash = cast<MDString>(Node->getOperand(0));
+raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) {
+  // Dump the pointers we are tracking.
+  OS << "    TopDown State:\n";
+  if (!BBInfo.hasTopDownPtrs()) {
+    DEBUG(llvm::dbgs() << "        NONE!\n");
+  } else {
+    for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end();
+         I != E; ++I) {
+      const PtrState &P = I->second;
+      OS << "        Ptr: " << *I->first
+         << "\n            KnownSafe:        " << (P.IsKnownSafe()?"true":"false")
+         << "\n            ImpreciseRelease: "
+           << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
+         << "            HasCFGHazards:    "
+           << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
+         << "            KnownPositive:    "
+           << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
+         << "            Seq:              "
+         << P.GetSeq() << "\n";
     }
-  } else if (Argument *Arg = dyn_cast<Argument>(Ptr)) {
-    std::string str;
-    raw_string_ostream os(str);
-    os << "(" << Arg->getParent()->getName() << ",%" << Arg->getName()
-       << ")";
-    Hash = MDString::get(Arg->getContext(), os.str());
   }
 
-  return Hash;
-}
-
-static std::string SequenceToString(Sequence A) {
-  std::string str;
-  raw_string_ostream os(str);
-  os << A;
-  return os.str();
-}
-
-/// Helper function to change a Sequence into a String object using our overload
-/// for raw_ostream so we only have printing code in one location.
-static MDString *SequenceToMDString(LLVMContext &Context,
-                                    Sequence A) {
-  return MDString::get(Context, SequenceToString(A));
-}
-
-/// A simple function to generate a MDNode which describes the change in state
-/// for Value *Ptr caused by Instruction *Inst.
-static void AppendMDNodeToInstForPtr(unsigned NodeId,
-                                     Instruction *Inst,
-                                     Value *Ptr,
-                                     MDString *PtrSourceMDNodeID,
-                                     Sequence OldSeq,
-                                     Sequence NewSeq) {
-  MDNode *Node = nullptr;
-  Metadata *tmp[3] = {PtrSourceMDNodeID,
-                      SequenceToMDString(Inst->getContext(), OldSeq),
-                      SequenceToMDString(Inst->getContext(), NewSeq)};
-  Node = MDNode::get(Inst->getContext(), tmp);
-
-  Inst->setMetadata(NodeId, Node);
-}
-
-/// Add to the beginning of the basic block llvm.ptr.annotations which show the
-/// state of a pointer at the entrance to a basic block.
-static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
-                                            Value *Ptr, Sequence Seq) {
-  // If we have a target identifier, make sure that we match it before
-  // continuing.
-  if(!ARCAnnotationTargetIdentifier.empty() &&
-     !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
-    return;
-
-  Module *M = BB->getParent()->getParent();
-  LLVMContext &C = M->getContext();
-  Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-  Type *I8XX = PointerType::getUnqual(I8X);
-  Type *Params[] = {I8XX, I8XX};
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), Params,
-                                        /*isVarArg=*/false);
-  Constant *Callee = M->getOrInsertFunction(Name, FTy);
-
-  IRBuilder<> Builder(BB, BB->getFirstInsertionPt());
-
-  Value *PtrName;
-  StringRef Tmp = Ptr->getName();
-  if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) {
-    Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
-                                                         Tmp + "_STR");
-    PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
-                                 cast<Constant>(ActualPtrName), Tmp);
-  }
-
-  Value *S;
-  std::string SeqStr = SequenceToString(Seq);
-  if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) {
-    Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
-                                                         SeqStr + "_STR");
-    S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
-                           cast<Constant>(ActualPtrName), SeqStr);
-  }
-
-  Builder.CreateCall2(Callee, PtrName, S);
-}
-
-/// Add to the end of the basic block llvm.ptr.annotations which show the state
-/// of the pointer at the bottom of the basic block.
-static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
-                                              Value *Ptr, Sequence Seq) {
-  // If we have a target identifier, make sure that we match it before emitting
-  // an annotation.
-  if(!ARCAnnotationTargetIdentifier.empty() &&
-     !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
-    return;
-
-  Module *M = BB->getParent()->getParent();
-  LLVMContext &C = M->getContext();
-  Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-  Type *I8XX = PointerType::getUnqual(I8X);
-  Type *Params[] = {I8XX, I8XX};
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), Params,
-                                        /*isVarArg=*/false);
-  Constant *Callee = M->getOrInsertFunction(Name, FTy);
-
-  IRBuilder<> Builder(BB, std::prev(BB->end()));
-
-  Value *PtrName;
-  StringRef Tmp = Ptr->getName();
-  if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) {
-    Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
-                                                         Tmp + "_STR");
-    PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
-                                 cast<Constant>(ActualPtrName), Tmp);
-  }
-
-  Value *S;
-  std::string SeqStr = SequenceToString(Seq);
-  if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) {
-    Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
-                                                         SeqStr + "_STR");
-    S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
-                           cast<Constant>(ActualPtrName), SeqStr);
+  OS << "    BottomUp State:\n";
+  if (!BBInfo.hasBottomUpPtrs()) {
+    DEBUG(llvm::dbgs() << "        NONE!\n");
+  } else {
+    for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end();
+         I != E; ++I) {
+      const PtrState &P = I->second;
+      OS << "        Ptr: " << *I->first
+         << "\n            KnownSafe:        " << (P.IsKnownSafe()?"true":"false")
+         << "\n            ImpreciseRelease: "
+           << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
+         << "            HasCFGHazards:    "
+           << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
+         << "            KnownPositive:    "
+           << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
+         << "            Seq:              "
+         << P.GetSeq() << "\n";
+    }
   }
-  Builder.CreateCall2(Callee, PtrName, S);
-}
 
-/// Adds a source annotation to pointer and a state change annotation to Inst
-/// referencing the source annotation and the old/new state of pointer.
-static void GenerateARCAnnotation(unsigned InstMDId,
-                                  unsigned PtrMDId,
-                                  Instruction *Inst,
-                                  Value *Ptr,
-                                  Sequence OldSeq,
-                                  Sequence NewSeq) {
-  if (EnableARCAnnotations) {
-    // If we have a target identifier, make sure that we match it before
-    // emitting an annotation.
-    if(!ARCAnnotationTargetIdentifier.empty() &&
-       !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
-      return;
-
-    // First generate the source annotation on our pointer. This will return an
-    // MDString* if Ptr actually comes from an instruction implying we can put
-    // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL),
-    // then we know that our pointer is from an Argument so we put a reference
-    // to the argument number.
-    //
-    // The point of this is to make it easy for the
-    // llvm-arc-annotation-processor tool to cross reference where the source
-    // pointer is in the LLVM IR since the LLVM IR parser does not submit such
-    // information via debug info for backends to use (since why would anyone
-    // need such a thing from LLVM IR besides in non-standard cases
-    // [i.e. this]).
-    MDString *SourcePtrMDNode =
-      AppendMDNodeToSourcePtr(PtrMDId, Ptr);
-    AppendMDNodeToInstForPtr(InstMDId, Inst, Ptr, SourcePtrMDNode, OldSeq,
-                             NewSeq);
-  }
+  return OS;
 }
 
-// The actual interface for accessing the above functionality is defined via
-// some simple macros which are defined below. We do this so that the user does
-// not need to pass in what metadata id is needed resulting in cleaner code and
-// additionally since it provides an easy way to conditionally no-op all
-// annotation support in a non-debug build.
-
-/// Use this macro to annotate a sequence state change when processing
-/// instructions bottom up,
-#define ANNOTATE_BOTTOMUP(inst, ptr, old, new)                          \
-  GenerateARCAnnotation(ARCAnnotationBottomUpMDKind,                    \
-                        ARCAnnotationProvenanceSourceMDKind, (inst),    \
-                        const_cast<Value*>(ptr), (old), (new))
-/// Use this macro to annotate a sequence state change when processing
-/// instructions top down.
-#define ANNOTATE_TOPDOWN(inst, ptr, old, new)                           \
-  GenerateARCAnnotation(ARCAnnotationTopDownMDKind,                     \
-                        ARCAnnotationProvenanceSourceMDKind, (inst),    \
-                        const_cast<Value*>(ptr), (old), (new))
-
-#define ANNOTATE_BB(_states, _bb, _name, _type, _direction)                   \
-  do {                                                                        \
-    if (EnableARCAnnotations) {                                               \
-      for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \
-          E = (_states)._direction##_ptr_end(); I != E; ++I) {                \
-        Value *Ptr = const_cast<Value*>(I->first);                            \
-        Sequence Seq = I->second.GetSeq();                                    \
-        GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq);         \
-      }                                                                       \
-    }                                                                         \
-  } while (0)
-
-#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock)                       \
-    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \
-                Entrance, bottom_up)
-#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock)                         \
-    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend",   \
-                Terminator, bottom_up)
-#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock)                        \
-    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart",  \
-                Entrance, top_down)
-#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock)                          \
-    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend",    \
-                Terminator, top_down)
-
-#else // !ARC_ANNOTATION
-// If annotations are off, noop.
-#define ANNOTATE_BOTTOMUP(inst, ptr, old, new)
-#define ANNOTATE_TOPDOWN(inst, ptr, old, new)
-#define ANNOTATE_BOTTOMUP_BBSTART(states, basicblock)
-#define ANNOTATE_BOTTOMUP_BBEND(states, basicblock)
-#define ANNOTATE_TOPDOWN_BBSTART(states, basicblock)
-#define ANNOTATE_TOPDOWN_BBEND(states, basicblock)
-#endif // !ARC_ANNOTATION
-
 namespace {
+
   /// \brief The main ARC optimization pass.
   class ObjCARCOpt : public FunctionPass {
     bool Changed;
     ProvenanceAnalysis PA;
+
+    /// A cache of references to runtime entry point constants.
     ARCRuntimeEntryPoints EP;
 
+    /// A cache of MDKinds that can be passed into other functions to propagate
+    /// MDKind identifiers.
+    ARCMDKindCache MDKindCache;
+
     // This is used to track if a pointer is stored into an alloca.
     DenseSet<const Value *> MultiOwnersSet;
 
@@ -1076,73 +486,49 @@ namespace {
     /// is in fact used in the current function.
     unsigned UsedInThisFunction;
 
-    /// The Metadata Kind for clang.imprecise_release metadata.
-    unsigned ImpreciseReleaseMDKind;
-
-    /// The Metadata Kind for clang.arc.copy_on_escape metadata.
-    unsigned CopyOnEscapeMDKind;
-
-    /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
-    unsigned NoObjCARCExceptionsMDKind;
-
-#ifdef ARC_ANNOTATIONS
-    /// The Metadata Kind for llvm.arc.annotation.bottomup metadata.
-    unsigned ARCAnnotationBottomUpMDKind;
-    /// The Metadata Kind for llvm.arc.annotation.topdown metadata.
-    unsigned ARCAnnotationTopDownMDKind;
-    /// The Metadata Kind for llvm.arc.annotation.provenancesource metadata.
-    unsigned ARCAnnotationProvenanceSourceMDKind;
-#endif // ARC_ANNOATIONS
-
     bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
     void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
-                                   InstructionClass &Class);
+                                   ARCInstKind &Class);
     void OptimizeIndividualCalls(Function &F);
 
     void CheckForCFGHazards(const BasicBlock *BB,
                             DenseMap<const BasicBlock *, BBState> &BBStates,
                             BBState &MyStates) const;
-    bool VisitInstructionBottomUp(Instruction *Inst,
-                                  BasicBlock *BB,
-                                  MapVector<Value *, RRInfo> &Retains,
+    bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB,
+                                  BlotMapVector<Value *, RRInfo> &Retains,
                                   BBState &MyStates);
     bool VisitBottomUp(BasicBlock *BB,
                        DenseMap<const BasicBlock *, BBState> &BBStates,
-                       MapVector<Value *, RRInfo> &Retains);
+                       BlotMapVector<Value *, RRInfo> &Retains);
     bool VisitInstructionTopDown(Instruction *Inst,
                                  DenseMap<Value *, RRInfo> &Releases,
                                  BBState &MyStates);
     bool VisitTopDown(BasicBlock *BB,
                       DenseMap<const BasicBlock *, BBState> &BBStates,
                       DenseMap<Value *, RRInfo> &Releases);
-    bool Visit(Function &F,
-               DenseMap<const BasicBlock *, BBState> &BBStates,
-               MapVector<Value *, RRInfo> &Retains,
+    bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates,
+               BlotMapVector<Value *, RRInfo> &Retains,
                DenseMap<Value *, RRInfo> &Releases);
 
     void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
-                   MapVector<Value *, RRInfo> &Retains,
+                   BlotMapVector<Value *, RRInfo> &Retains,
                    DenseMap<Value *, RRInfo> &Releases,
-                   SmallVectorImpl<Instruction *> &DeadInsts,
-                   Module *M);
-
-    bool ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> &BBStates,
-                               MapVector<Value *, RRInfo> &Retains,
-                               DenseMap<Value *, RRInfo> &Releases,
-                               Module *M,
-                               SmallVectorImpl<Instruction *> &NewRetains,
-                               SmallVectorImpl<Instruction *> &NewReleases,
-                               SmallVectorImpl<Instruction *> &DeadInsts,
-                               RRInfo &RetainsToMove,
-                               RRInfo &ReleasesToMove,
-                               Value *Arg,
-                               bool KnownSafe,
-                               bool &AnyPairsCompletelyEliminated);
+                   SmallVectorImpl<Instruction *> &DeadInsts, Module *M);
+
+    bool
+    PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates,
+                             BlotMapVector<Value *, RRInfo> &Retains,
+                             DenseMap<Value *, RRInfo> &Releases, Module *M,
+                             SmallVectorImpl<Instruction *> &NewRetains,
+                             SmallVectorImpl<Instruction *> &NewReleases,
+                             SmallVectorImpl<Instruction *> &DeadInsts,
+                             RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
+                             Value *Arg, bool KnownSafe,
+                             bool &AnyPairsCompletelyEliminated);
 
     bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
-                              MapVector<Value *, RRInfo> &Retains,
-                              DenseMap<Value *, RRInfo> &Releases,
-                              Module *M);
+                              BlotMapVector<Value *, RRInfo> &Retains,
+                              DenseMap<Value *, RRInfo> &Releases, Module *M);
 
     void OptimizeWeakCalls(Function &F);
 
@@ -1191,7 +577,7 @@ void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
 bool
 ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
   // Check for the argument being from an immediately preceding call or invoke.
-  const Value *Arg = GetObjCArg(RetainRV);
+  const Value *Arg = GetArgRCIdentityRoot(RetainRV);
   ImmutableCallSite CS(Arg);
   if (const Instruction *Call = CS.getInstruction()) {
     if (Call->getParent() == RetainRV->getParent()) {
@@ -1216,8 +602,8 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
   BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
   if (I != Begin) {
     do --I; while (I != Begin && IsNoopInstruction(I));
-    if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
-        GetObjCArg(I) == Arg) {
+    if (GetBasicARCInstKind(I) == ARCInstKind::AutoreleaseRV &&
+        GetArgRCIdentityRoot(I) == Arg) {
       Changed = true;
       ++NumPeeps;
 
@@ -1238,7 +624,7 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
                   "objc_retain since the operand is not a return value.\n"
                   "Old = " << *RetainRV << "\n");
 
-  Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+  Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain);
   cast<CallInst>(RetainRV)->setCalledFunction(NewDecl);
 
   DEBUG(dbgs() << "New = " << *RetainRV << "\n");
@@ -1248,17 +634,17 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
 
 /// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not
 /// used as a return value.
-void
-ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
-                                      InstructionClass &Class) {
+void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F,
+                                           Instruction *AutoreleaseRV,
+                                           ARCInstKind &Class) {
   // Check for a return of the pointer value.
-  const Value *Ptr = GetObjCArg(AutoreleaseRV);
+  const Value *Ptr = GetArgRCIdentityRoot(AutoreleaseRV);
   SmallVector<const Value *, 2> Users;
   Users.push_back(Ptr);
   do {
     Ptr = Users.pop_back_val();
     for (const User *U : Ptr->users()) {
-      if (isa<ReturnInst>(U) || GetBasicInstructionClass(U) == IC_RetainRV)
+      if (isa<ReturnInst>(U) || GetBasicARCInstKind(U) == ARCInstKind::RetainRV)
         return;
       if (isa<BitCastInst>(U))
         Users.push_back(U);
@@ -1274,10 +660,10 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
                   "Old = " << *AutoreleaseRV << "\n");
 
   CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
-  Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Autorelease);
+  Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease);
   AutoreleaseRVCI->setCalledFunction(NewDecl);
   AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
-  Class = IC_Autorelease;
+  Class = ARCInstKind::Autorelease;
 
   DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n");
 
@@ -1294,7 +680,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
     Instruction *Inst = &*I++;
 
-    InstructionClass Class = GetBasicInstructionClass(Inst);
+    ARCInstKind Class = GetBasicARCInstKind(Inst);
 
     DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
 
@@ -1309,7 +695,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     // There are gray areas here, as the ability to cast reference-counted
     // pointers to raw void* and back allows code to break ARC assumptions,
     // however these are currently considered to be unimportant.
-    case IC_NoopCast:
+    case ARCInstKind::NoopCast:
       Changed = true;
       ++NumNoops;
       DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
@@ -1317,11 +703,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       continue;
 
     // If the pointer-to-weak-pointer is null, it's undefined behavior.
-    case IC_StoreWeak:
-    case IC_LoadWeak:
-    case IC_LoadWeakRetained:
-    case IC_InitWeak:
-    case IC_DestroyWeak: {
+    case ARCInstKind::StoreWeak:
+    case ARCInstKind::LoadWeak:
+    case ARCInstKind::LoadWeakRetained:
+    case ARCInstKind::InitWeak:
+    case ARCInstKind::DestroyWeak: {
       CallInst *CI = cast<CallInst>(Inst);
       if (IsNullOrUndef(CI->getArgOperand(0))) {
         Changed = true;
@@ -1338,8 +724,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       }
       break;
     }
-    case IC_CopyWeak:
-    case IC_MoveWeak: {
+    case ARCInstKind::CopyWeak:
+    case ARCInstKind::MoveWeak: {
       CallInst *CI = cast<CallInst>(Inst);
       if (IsNullOrUndef(CI->getArgOperand(0)) ||
           IsNullOrUndef(CI->getArgOperand(1))) {
@@ -1359,11 +745,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       }
       break;
     }
-    case IC_RetainRV:
+    case ARCInstKind::RetainRV:
       if (OptimizeRetainRVCall(F, Inst))
         continue;
       break;
-    case IC_AutoreleaseRV:
+    case ARCInstKind::AutoreleaseRV:
       OptimizeAutoreleaseRVCall(F, Inst, Class);
       break;
     }
@@ -1380,10 +766,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
         // Create the declaration lazily.
         LLVMContext &C = Inst->getContext();
 
-        Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Release);
+        Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
         CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "",
                                              Call);
-        NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None));
+        NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease),
+                             MDNode::get(C, None));
 
         DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
               "since x is otherwise unused.\nOld: " << *Call << "\nNew: "
@@ -1391,7 +778,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
 
         EraseInstruction(Call);
         Inst = NewCall;
-        Class = IC_Release;
+        Class = ARCInstKind::Release;
       }
     }
 
@@ -1422,11 +809,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     }
 
     if (!IsNoopOnNull(Class)) {
-      UsedInThisFunction |= 1 << Class;
+      UsedInThisFunction |= 1 << unsigned(Class);
       continue;
     }
 
-    const Value *Arg = GetObjCArg(Inst);
+    const Value *Arg = GetArgRCIdentityRoot(Inst);
 
     // ARC calls with null are no-ops. Delete them.
     if (IsNullOrUndef(Arg)) {
@@ -1440,7 +827,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
 
     // Keep track of which of retain, release, autorelease, and retain_block
     // are actually present in this function.
-    UsedInThisFunction |= 1 << Class;
+    UsedInThisFunction |= 1 << unsigned(Class);
 
     // If Arg is a PHI, and one or more incoming values to the
     // PHI are null, and the call is control-equivalent to the PHI, and there
@@ -1463,7 +850,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       bool HasCriticalEdges = false;
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
         Value *Incoming =
-          StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+          GetRCIdentityRoot(PN->getIncomingValue(i));
         if (IsNullOrUndef(Incoming))
           HasNull = true;
         else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
@@ -1480,25 +867,25 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
         // Check that there is nothing that cares about the reference
         // count between the call and the phi.
         switch (Class) {
-        case IC_Retain:
-        case IC_RetainBlock:
+        case ARCInstKind::Retain:
+        case ARCInstKind::RetainBlock:
           // These can always be moved up.
           break;
-        case IC_Release:
+        case ARCInstKind::Release:
           // These can't be moved across things that care about the retain
           // count.
           FindDependencies(NeedsPositiveRetainCount, Arg,
                            Inst->getParent(), Inst,
                            DependingInstructions, Visited, PA);
           break;
-        case IC_Autorelease:
+        case ARCInstKind::Autorelease:
           // These can't be moved across autorelease pool scope boundaries.
           FindDependencies(AutoreleasePoolBoundary, Arg,
                            Inst->getParent(), Inst,
                            DependingInstructions, Visited, PA);
           break;
-        case IC_RetainRV:
-        case IC_AutoreleaseRV:
+        case ARCInstKind::RetainRV:
+        case ARCInstKind::AutoreleaseRV:
           // Don't move these; the RV optimization depends on the autoreleaseRV
           // being tail called, and the retainRV being immediately after a call
           // (which might still happen if we get lucky with codegen layout, but
@@ -1517,7 +904,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
           Type *ParamTy = CInst->getArgOperand(0)->getType();
           for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
             Value *Incoming =
-              StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+              GetRCIdentityRoot(PN->getIncomingValue(i));
             if (!IsNullOrUndef(Incoming)) {
               CallInst *Clone = cast<CallInst>(CInst->clone());
               Value *Op = PN->getIncomingValue(i);
@@ -1547,7 +934,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
 /// no CFG hazards by checking the states of various bottom up pointers.
 static void CheckForUseCFGHazard(const Sequence SuccSSeq,
                                  const bool SuccSRRIKnownSafe,
-                                 PtrState &S,
+                                 TopDownPtrState &S,
                                  bool &SomeSuccHasSame,
                                  bool &AllSuccsHaveSame,
                                  bool &NotAllSeqEqualButKnownSafe,
@@ -1585,7 +972,7 @@ static void CheckForUseCFGHazard(const Sequence SuccSSeq,
 /// pointers.
 static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
                                         const bool SuccSRRIKnownSafe,
-                                        PtrState &S,
+                                        TopDownPtrState &S,
                                         bool &SomeSuccHasSame,
                                         bool &AllSuccsHaveSame,
                                         bool &NotAllSeqEqualButKnownSafe) {
@@ -1618,9 +1005,9 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
                                BBState &MyStates) const {
   // If any top-down local-use or possible-dec has a succ which is earlier in
   // the sequence, forget it.
-  for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
-         E = MyStates.top_down_ptr_end(); I != E; ++I) {
-    PtrState &S = I->second;
+  for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end();
+       I != E; ++I) {
+    TopDownPtrState &S = I->second;
     const Sequence Seq = I->second.GetSeq();
 
     // We only care about S_Retain, S_CanRelease, and S_Use.
@@ -1646,7 +1033,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
       const DenseMap<const BasicBlock *, BBState>::iterator BBI =
         BBStates.find(*SI);
       assert(BBI != BBStates.end());
-      const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+      const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
       const Sequence SuccSSeq = SuccS.GetSeq();
 
       // If bottom up, the pointer is in an S_None state, clear the sequence
@@ -1705,94 +1092,53 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
   }
 }
 
-bool
-ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
-                                     BasicBlock *BB,
-                                     MapVector<Value *, RRInfo> &Retains,
-                                     BBState &MyStates) {
+bool ObjCARCOpt::VisitInstructionBottomUp(
+    Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains,
+    BBState &MyStates) {
   bool NestingDetected = false;
-  InstructionClass Class = GetInstructionClass(Inst);
+  ARCInstKind Class = GetARCInstKind(Inst);
   const Value *Arg = nullptr;
 
-  DEBUG(dbgs() << "Class: " << Class << "\n");
+  DEBUG(dbgs() << "        Class: " << Class << "\n");
 
   switch (Class) {
-  case IC_Release: {
-    Arg = GetObjCArg(Inst);
-
-    PtrState &S = MyStates.getPtrBottomUpState(Arg);
-
-    // If we see two releases in a row on the same pointer. If so, make
-    // a note, and we'll cicle back to revisit it after we've
-    // hopefully eliminated the second release, which may allow us to
-    // eliminate the first release too.
-    // Theoretically we could implement removal of nested retain+release
-    // pairs by making PtrState hold a stack of states, but this is
-    // simple and avoids adding overhead for the non-nested case.
-    if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) {
-      DEBUG(dbgs() << "Found nested releases (i.e. a release pair)\n");
-      NestingDetected = true;
-    }
+  case ARCInstKind::Release: {
+    Arg = GetArgRCIdentityRoot(Inst);
 
-    MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
-    Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release;
-    ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq);
-    S.ResetSequenceProgress(NewSeq);
-    S.SetReleaseMetadata(ReleaseMetadata);
-    S.SetKnownSafe(S.HasKnownPositiveRefCount());
-    S.SetTailCallRelease(cast<CallInst>(Inst)->isTailCall());
-    S.InsertCall(Inst);
-    S.SetKnownPositiveRefCount();
+    BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
+    NestingDetected |= S.InitBottomUp(MDKindCache, Inst);
     break;
   }
-  case IC_RetainBlock:
+  case ARCInstKind::RetainBlock:
     // In OptimizeIndividualCalls, we have strength reduced all optimizable
     // objc_retainBlocks to objc_retains. Thus at this point any
     // objc_retainBlocks that we see are not optimizable.
     break;
-  case IC_Retain:
-  case IC_RetainRV: {
-    Arg = GetObjCArg(Inst);
-
-    PtrState &S = MyStates.getPtrBottomUpState(Arg);
-    S.SetKnownPositiveRefCount();
-
-    Sequence OldSeq = S.GetSeq();
-    switch (OldSeq) {
-    case S_Stop:
-    case S_Release:
-    case S_MovableRelease:
-    case S_Use:
-      // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an
-      // imprecise release, clear our reverse insertion points.
-      if (OldSeq != S_Use || S.IsTrackingImpreciseReleases())
-        S.ClearReverseInsertPts();
-      // FALL THROUGH
-    case S_CanRelease:
-      // Don't do retain+release tracking for IC_RetainRV, because it's
-      // better to let it remain as the first instruction after a call.
-      if (Class != IC_RetainRV)
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV: {
+    Arg = GetArgRCIdentityRoot(Inst);
+    BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
+    if (S.MatchWithRetain()) {
+      // Don't do retain+release tracking for ARCInstKind::RetainRV, because
+      // it's better to let it remain as the first instruction after a call.
+      if (Class != ARCInstKind::RetainRV) {
+        DEBUG(llvm::dbgs() << "        Matching with: " << *Inst << "\n");
         Retains[Inst] = S.GetRRInfo();
+      }
       S.ClearSequenceProgress();
-      break;
-    case S_None:
-      break;
-    case S_Retain:
-      llvm_unreachable("bottom-up pointer in retain state!");
     }
-    ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq());
     // A retain moving bottom up can be a use.
     break;
   }
-  case IC_AutoreleasepoolPop:
+  case ARCInstKind::AutoreleasepoolPop:
     // Conservatively, clear MyStates for all known pointers.
     MyStates.clearBottomUpPointers();
     return NestingDetected;
-  case IC_AutoreleasepoolPush:
-  case IC_None:
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::None:
     // These are irrelevant.
     return NestingDetected;
-  case IC_User:
+  case ARCInstKind::User:
     // If we have a store into an alloca of a pointer we are tracking, the
     // pointer has multiple owners implying that we must be more conservative.
     //
@@ -1806,9 +1152,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
     // in the presence of allocas we only unconditionally remove pointers if
     // both our retain and our release are KnownSafe.
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-      if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand())) {
-        BBState::ptr_iterator I = MyStates.findPtrBottomUpState(
-          StripPointerCastsAndObjCCalls(SI->getValueOperand()));
+      const DataLayout &DL = BB->getModule()->getDataLayout();
+      if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand(), DL)) {
+        auto I = MyStates.findPtrBottomUpState(
+            GetRCIdentityRoot(SI->getValueOperand()));
         if (I != MyStates.bottom_up_ptr_end())
           MultiOwnersSet.insert(I->first);
       }
@@ -1820,90 +1167,26 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
 
   // Consider any other possible effects of this instruction on each
   // pointer being tracked.
-  for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
-       ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+  for (auto MI = MyStates.bottom_up_ptr_begin(),
+            ME = MyStates.bottom_up_ptr_end();
+       MI != ME; ++MI) {
     const Value *Ptr = MI->first;
     if (Ptr == Arg)
       continue; // Handled above.
-    PtrState &S = MI->second;
-    Sequence Seq = S.GetSeq();
+    BottomUpPtrState &S = MI->second;
 
-    // Check for possible releases.
-    if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
-      DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
-            << "\n");
-      S.ClearKnownPositiveRefCount();
-      switch (Seq) {
-      case S_Use:
-        S.SetSeq(S_CanRelease);
-        ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S.GetSeq());
-        continue;
-      case S_CanRelease:
-      case S_Release:
-      case S_MovableRelease:
-      case S_Stop:
-      case S_None:
-        break;
-      case S_Retain:
-        llvm_unreachable("bottom-up pointer in retain state!");
-      }
-    }
+    if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class))
+      continue;
 
-    // Check for possible direct uses.
-    switch (Seq) {
-    case S_Release:
-    case S_MovableRelease:
-      if (CanUse(Inst, Ptr, PA, Class)) {
-        DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
-              << "\n");
-        assert(!S.HasReverseInsertPts());
-        // If this is an invoke instruction, we're scanning it as part of
-        // one of its successor blocks, since we can't insert code after it
-        // in its own block, and we don't want to split critical edges.
-        if (isa<InvokeInst>(Inst))
-          S.InsertReverseInsertPt(BB->getFirstInsertionPt());
-        else
-          S.InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
-        S.SetSeq(S_Use);
-        ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
-      } else if (Seq == S_Release && IsUser(Class)) {
-        DEBUG(dbgs() << "PreciseReleaseUse: Seq: " << Seq << "; " << *Ptr
-              << "\n");
-        // Non-movable releases depend on any possible objc pointer use.
-        S.SetSeq(S_Stop);
-        ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop);
-        assert(!S.HasReverseInsertPts());
-        // As above; handle invoke specially.
-        if (isa<InvokeInst>(Inst))
-          S.InsertReverseInsertPt(BB->getFirstInsertionPt());
-        else
-          S.InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
-      }
-      break;
-    case S_Stop:
-      if (CanUse(Inst, Ptr, PA, Class)) {
-        DEBUG(dbgs() << "PreciseStopUse: Seq: " << Seq << "; " << *Ptr
-              << "\n");
-        S.SetSeq(S_Use);
-        ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
-      }
-      break;
-    case S_CanRelease:
-    case S_Use:
-    case S_None:
-      break;
-    case S_Retain:
-      llvm_unreachable("bottom-up pointer in retain state!");
-    }
+    S.HandlePotentialUse(BB, Inst, Ptr, PA, Class);
   }
 
   return NestingDetected;
 }
 
-bool
-ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
-                          DenseMap<const BasicBlock *, BBState> &BBStates,
-                          MapVector<Value *, RRInfo> &Retains) {
+bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
+                               DenseMap<const BasicBlock *, BBState> &BBStates,
+                               BlotMapVector<Value *, RRInfo> &Retains) {
 
   DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n");
 
@@ -1928,9 +1211,8 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
     }
   }
 
-  // If ARC Annotations are enabled, output the current state of pointers at the
-  // bottom of the basic block.
-  ANNOTATE_BOTTOMUP_BBEND(MyStates, BB);
+  DEBUG(llvm::dbgs() << "Before:\n" << BBStates[BB] << "\n"
+                     << "Performing Dataflow:\n");
 
   // Visit all the instructions, bottom-up.
   for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
@@ -1940,7 +1222,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
     if (isa<InvokeInst>(Inst))
       continue;
 
-    DEBUG(dbgs() << "Visiting " << *Inst << "\n");
+    DEBUG(dbgs() << "    Visiting " << *Inst << "\n");
 
     NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
   }
@@ -1955,9 +1237,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
       NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
   }
 
-  // If ARC Annotations are enabled, output the current state of pointers at the
-  // top of the basic block.
-  ANNOTATE_BOTTOMUP_BBSTART(MyStates, BB);
+  DEBUG(llvm::dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n");
 
   return NestingDetected;
 }
@@ -1967,146 +1247,66 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
                                     DenseMap<Value *, RRInfo> &Releases,
                                     BBState &MyStates) {
   bool NestingDetected = false;
-  InstructionClass Class = GetInstructionClass(Inst);
+  ARCInstKind Class = GetARCInstKind(Inst);
   const Value *Arg = nullptr;
 
+  DEBUG(llvm::dbgs() << "        Class: " << Class << "\n");
+
   switch (Class) {
-  case IC_RetainBlock:
+  case ARCInstKind::RetainBlock:
     // In OptimizeIndividualCalls, we have strength reduced all optimizable
     // objc_retainBlocks to objc_retains. Thus at this point any
-    // objc_retainBlocks that we see are not optimizable.
+    // objc_retainBlocks that we see are not optimizable. We need to break since
+    // a retain can be a potential use.
     break;
-  case IC_Retain:
-  case IC_RetainRV: {
-    Arg = GetObjCArg(Inst);
-
-    PtrState &S = MyStates.getPtrTopDownState(Arg);
-
-    // Don't do retain+release tracking for IC_RetainRV, because it's
-    // better to let it remain as the first instruction after a call.
-    if (Class != IC_RetainRV) {
-      // If we see two retains in a row on the same pointer. If so, make
-      // a note, and we'll cicle back to revisit it after we've
-      // hopefully eliminated the second retain, which may allow us to
-      // eliminate the first retain too.
-      // Theoretically we could implement removal of nested retain+release
-      // pairs by making PtrState hold a stack of states, but this is
-      // simple and avoids adding overhead for the non-nested case.
-      if (S.GetSeq() == S_Retain)
-        NestingDetected = true;
-
-      ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain);
-      S.ResetSequenceProgress(S_Retain);
-      S.SetKnownSafe(S.HasKnownPositiveRefCount());
-      S.InsertCall(Inst);
-    }
-
-    S.SetKnownPositiveRefCount();
-
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV: {
+    Arg = GetArgRCIdentityRoot(Inst);
+    TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
+    NestingDetected |= S.InitTopDown(Class, Inst);
     // A retain can be a potential use; procede to the generic checking
     // code below.
     break;
   }
-  case IC_Release: {
-    Arg = GetObjCArg(Inst);
-
-    PtrState &S = MyStates.getPtrTopDownState(Arg);
-    S.ClearKnownPositiveRefCount();
-
-    Sequence OldSeq = S.GetSeq();
-
-    MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
-
-    switch (OldSeq) {
-    case S_Retain:
-    case S_CanRelease:
-      if (OldSeq == S_Retain || ReleaseMetadata != nullptr)
-        S.ClearReverseInsertPts();
-      // FALL THROUGH
-    case S_Use:
-      S.SetReleaseMetadata(ReleaseMetadata);
-      S.SetTailCallRelease(cast<CallInst>(Inst)->isTailCall());
+  case ARCInstKind::Release: {
+    Arg = GetArgRCIdentityRoot(Inst);
+    TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
+    // Try to form a tentative pair in between this release instruction and the
+    // top down pointers that we are tracking.
+    if (S.MatchWithRelease(MDKindCache, Inst)) {
+      // If we succeed, copy S's RRInfo into the Release -> {Retain Set
+      // Map}. Then we clear S.
+      DEBUG(llvm::dbgs() << "        Matching with: " << *Inst << "\n");
       Releases[Inst] = S.GetRRInfo();
-      ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None);
       S.ClearSequenceProgress();
-      break;
-    case S_None:
-      break;
-    case S_Stop:
-    case S_Release:
-    case S_MovableRelease:
-      llvm_unreachable("top-down pointer in release state!");
     }
     break;
   }
-  case IC_AutoreleasepoolPop:
+  case ARCInstKind::AutoreleasepoolPop:
     // Conservatively, clear MyStates for all known pointers.
     MyStates.clearTopDownPointers();
-    return NestingDetected;
-  case IC_AutoreleasepoolPush:
-  case IC_None:
-    // These are irrelevant.
-    return NestingDetected;
+    return false;
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::None:
+    // These can not be uses of
+    return false;
   default:
     break;
   }
 
   // Consider any other possible effects of this instruction on each
   // pointer being tracked.
-  for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
-       ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+  for (auto MI = MyStates.top_down_ptr_begin(),
+            ME = MyStates.top_down_ptr_end();
+       MI != ME; ++MI) {
     const Value *Ptr = MI->first;
     if (Ptr == Arg)
       continue; // Handled above.
-    PtrState &S = MI->second;
-    Sequence Seq = S.GetSeq();
-
-    // Check for possible releases.
-    if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
-      DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
-            << "\n");
-      S.ClearKnownPositiveRefCount();
-      switch (Seq) {
-      case S_Retain:
-        S.SetSeq(S_CanRelease);
-        ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease);
-        assert(!S.HasReverseInsertPts());
-        S.InsertReverseInsertPt(Inst);
-
-        // One call can't cause a transition from S_Retain to S_CanRelease
-        // and S_CanRelease to S_Use. If we've made the first transition,
-        // we're done.
-        continue;
-      case S_Use:
-      case S_CanRelease:
-      case S_None:
-        break;
-      case S_Stop:
-      case S_Release:
-      case S_MovableRelease:
-        llvm_unreachable("top-down pointer in release state!");
-      }
-    }
+    TopDownPtrState &S = MI->second;
+    if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class))
+      continue;
 
-    // Check for possible direct uses.
-    switch (Seq) {
-    case S_CanRelease:
-      if (CanUse(Inst, Ptr, PA, Class)) {
-        DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
-              << "\n");
-        S.SetSeq(S_Use);
-        ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use);
-      }
-      break;
-    case S_Retain:
-    case S_Use:
-    case S_None:
-      break;
-    case S_Stop:
-    case S_Release:
-    case S_MovableRelease:
-      llvm_unreachable("top-down pointer in release state!");
-    }
+    S.HandlePotentialUse(Inst, Ptr, PA, Class);
   }
 
   return NestingDetected;
@@ -2138,27 +1338,22 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
     }
   }
 
-  // If ARC Annotations are enabled, output the current state of pointers at the
-  // top of the basic block.
-  ANNOTATE_TOPDOWN_BBSTART(MyStates, BB);
+  DEBUG(llvm::dbgs() << "Before:\n" << BBStates[BB]  << "\n"
+                     << "Performing Dataflow:\n");
 
   // Visit all the instructions, top-down.
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
     Instruction *Inst = I;
 
-    DEBUG(dbgs() << "Visiting " << *Inst << "\n");
+    DEBUG(dbgs() << "    Visiting " << *Inst << "\n");
 
     NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
   }
 
-  // If ARC Annotations are enabled, output the current state of pointers at the
-  // bottom of the basic block.
-  ANNOTATE_TOPDOWN_BBEND(MyStates, BB);
-
-#ifdef ARC_ANNOTATIONS
-  if (!(EnableARCAnnotations && DisableCheckForCFGHazards))
-#endif
+  DEBUG(llvm::dbgs() << "\nState Before Checking for CFG Hazards:\n"
+                     << BBStates[BB] << "\n\n");
   CheckForCFGHazards(BB, BBStates, MyStates);
+  DEBUG(llvm::dbgs() << "Final State:\n" << BBStates[BB] << "\n");
   return NestingDetected;
 }
 
@@ -2244,11 +1439,10 @@ ComputePostOrders(Function &F,
 }
 
 // Visit the function both top-down and bottom-up.
-bool
-ObjCARCOpt::Visit(Function &F,
-                  DenseMap<const BasicBlock *, BBState> &BBStates,
-                  MapVector<Value *, RRInfo> &Retains,
-                  DenseMap<Value *, RRInfo> &Releases) {
+bool ObjCARCOpt::Visit(Function &F,
+                       DenseMap<const BasicBlock *, BBState> &BBStates,
+                       BlotMapVector<Value *, RRInfo> &Retains,
+                       DenseMap<Value *, RRInfo> &Releases) {
 
   // Use reverse-postorder traversals, because we magically know that loops
   // will be well behaved, i.e. they won't repeatedly call retain on a single
@@ -2258,7 +1452,7 @@ ObjCARCOpt::Visit(Function &F,
   SmallVector<BasicBlock *, 16> PostOrder;
   SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
   ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
-                    NoObjCARCExceptionsMDKind,
+                    MDKindCache.get(ARCMDKindID::NoObjCARCExceptions),
                     BBStates);
 
   // Use reverse-postorder on the reverse CFG for bottom-up.
@@ -2279,10 +1473,9 @@ ObjCARCOpt::Visit(Function &F,
 }
 
 /// Move the calls in RetainsToMove and ReleasesToMove.
-void ObjCARCOpt::MoveCalls(Value *Arg,
-                           RRInfo &RetainsToMove,
+void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove,
                            RRInfo &ReleasesToMove,
-                           MapVector<Value *, RRInfo> &Retains,
+                           BlotMapVector<Value *, RRInfo> &Retains,
                            DenseMap<Value *, RRInfo> &Releases,
                            SmallVectorImpl<Instruction *> &DeadInsts,
                            Module *M) {
@@ -2295,7 +1488,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
   for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) {
     Value *MyArg = ArgTy == ParamTy ? Arg :
                    new BitCastInst(Arg, ParamTy, "", InsertPt);
-    Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+    Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
     CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
     Call->setDoesNotThrow();
     Call->setTailCall();
@@ -2306,11 +1499,11 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
   for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) {
     Value *MyArg = ArgTy == ParamTy ? Arg :
                    new BitCastInst(Arg, ParamTy, "", InsertPt);
-    Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Release);
+    Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
     CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
     // Attach a clang.imprecise_release metadata tag, if appropriate.
     if (MDNode *M = ReleasesToMove.ReleaseMetadata)
-      Call->setMetadata(ImpreciseReleaseMDKind, M);
+      Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M);
     Call->setDoesNotThrow();
     if (ReleasesToMove.IsTailCallRelease)
       Call->setTailCall();
@@ -2333,20 +1526,15 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
 
 }
 
-bool
-ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
-                                    &BBStates,
-                                  MapVector<Value *, RRInfo> &Retains,
-                                  DenseMap<Value *, RRInfo> &Releases,
-                                  Module *M,
-                                  SmallVectorImpl<Instruction *> &NewRetains,
-                                  SmallVectorImpl<Instruction *> &NewReleases,
-                                  SmallVectorImpl<Instruction *> &DeadInsts,
-                                  RRInfo &RetainsToMove,
-                                  RRInfo &ReleasesToMove,
-                                  Value *Arg,
-                                  bool KnownSafe,
-                                  bool &AnyPairsCompletelyEliminated) {
+bool ObjCARCOpt::PairUpRetainsAndReleases(
+    DenseMap<const BasicBlock *, BBState> &BBStates,
+    BlotMapVector<Value *, RRInfo> &Retains,
+    DenseMap<Value *, RRInfo> &Releases, Module *M,
+    SmallVectorImpl<Instruction *> &NewRetains,
+    SmallVectorImpl<Instruction *> &NewReleases,
+    SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove,
+    RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe,
+    bool &AnyPairsCompletelyEliminated) {
   // If a pair happens in a region where it is known that the reference count
   // is already incremented, we can similarly ignore possible decrements unless
   // we are dealing with a retainable object with multiple provenance sources.
@@ -2367,15 +1555,14 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
     for (SmallVectorImpl<Instruction *>::const_iterator
            NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
       Instruction *NewRetain = *NI;
-      MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
+      auto It = Retains.find(NewRetain);
       assert(It != Retains.end());
       const RRInfo &NewRetainRRI = It->second;
       KnownSafeTD &= NewRetainRRI.KnownSafe;
       MultipleOwners =
-        MultipleOwners || MultiOwnersSet.count(GetObjCArg(NewRetain));
+        MultipleOwners || MultiOwnersSet.count(GetArgRCIdentityRoot(NewRetain));
       for (Instruction *NewRetainRelease : NewRetainRRI.Calls) {
-        DenseMap<Value *, RRInfo>::const_iterator Jt =
-          Releases.find(NewRetainRelease);
+        auto Jt = Releases.find(NewRetainRelease);
         if (Jt == Releases.end())
           return false;
         const RRInfo &NewRetainReleaseRRI = Jt->second;
@@ -2444,15 +1631,13 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
     for (SmallVectorImpl<Instruction *>::const_iterator
            NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
       Instruction *NewRelease = *NI;
-      DenseMap<Value *, RRInfo>::const_iterator It =
-        Releases.find(NewRelease);
+      auto It = Releases.find(NewRelease);
       assert(It != Releases.end());
       const RRInfo &NewReleaseRRI = It->second;
       KnownSafeBU &= NewReleaseRRI.KnownSafe;
       CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted;
       for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) {
-        MapVector<Value *, RRInfo>::const_iterator Jt =
-          Retains.find(NewReleaseRetain);
+        auto Jt = Retains.find(NewReleaseRetain);
         if (Jt == Retains.end())
           return false;
         const RRInfo &NewReleaseRetainRRI = Jt->second;
@@ -2504,11 +1689,8 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
     if (NewRetains.empty()) break;
   }
 
-  // If the pointer is known incremented in 1 direction and we do not have
-  // MultipleOwners, we can safely remove the retain/releases. Otherwise we need
-  // to be known safe in both directions.
-  bool UnconditionallySafe = (KnownSafeTD && KnownSafeBU) ||
-    ((KnownSafeTD || KnownSafeBU) && !MultipleOwners);
+  // We can only remove pointers if we are known safe in both directions.
+  bool UnconditionallySafe = KnownSafeTD && KnownSafeBU;
   if (UnconditionallySafe) {
     RetainsToMove.ReverseInsertPts.clear();
     ReleasesToMove.ReverseInsertPts.clear();
@@ -2538,12 +1720,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
   if (OldDelta != 0)
     return false;
 
-#ifdef ARC_ANNOTATIONS
-  // Do not move calls if ARC annotations are requested.
-  if (EnableARCAnnotations)
-    return false;
-#endif // ARC_ANNOTATIONS
-
   Changed = true;
   assert(OldCount != 0 && "Unreachable code?");
   NumRRs += OldCount - NewCount;
@@ -2556,12 +1732,10 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
 
 /// Identify pairings between the retains and releases, and delete and/or move
 /// them.
-bool
-ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
-                                   &BBStates,
-                                 MapVector<Value *, RRInfo> &Retains,
-                                 DenseMap<Value *, RRInfo> &Releases,
-                                 Module *M) {
+bool ObjCARCOpt::PerformCodePlacement(
+    DenseMap<const BasicBlock *, BBState> &BBStates,
+    BlotMapVector<Value *, RRInfo> &Retains,
+    DenseMap<Value *, RRInfo> &Releases, Module *M) {
   DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
 
   bool AnyPairsCompletelyEliminated = false;
@@ -2572,8 +1746,9 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
   SmallVector<Instruction *, 8> DeadInsts;
 
   // Visit each retain.
-  for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
-       E = Retains.end(); I != E; ++I) {
+  for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
+                                                      E = Retains.end();
+       I != E; ++I) {
     Value *V = I->first;
     if (!V) continue; // blotted
 
@@ -2581,7 +1756,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
 
     DEBUG(dbgs() << "Visiting: " << *Retain << "\n");
 
-    Value *Arg = GetObjCArg(Retain);
+    Value *Arg = GetArgRCIdentityRoot(Retain);
 
     // If the object being released is in static or stack storage, we know it's
     // not being managed by ObjC reference counting, so we can delete pairs
@@ -2593,18 +1768,17 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
     if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
       if (const GlobalVariable *GV =
             dyn_cast<GlobalVariable>(
-              StripPointerCastsAndObjCCalls(LI->getPointerOperand())))
+              GetRCIdentityRoot(LI->getPointerOperand())))
         if (GV->isConstant())
           KnownSafe = true;
 
     // Connect the dots between the top-down-collected RetainsToMove and
     // bottom-up-collected ReleasesToMove to form sets of related calls.
     NewRetains.push_back(Retain);
-    bool PerformMoveCalls =
-      ConnectTDBUTraversals(BBStates, Retains, Releases, M, NewRetains,
-                            NewReleases, DeadInsts, RetainsToMove,
-                            ReleasesToMove, Arg, KnownSafe,
-                            AnyPairsCompletelyEliminated);
+    bool PerformMoveCalls = PairUpRetainsAndReleases(
+        BBStates, Retains, Releases, M, NewRetains, NewReleases, DeadInsts,
+        RetainsToMove, ReleasesToMove, Arg, KnownSafe,
+        AnyPairsCompletelyEliminated);
 
     if (PerformMoveCalls) {
       // Ok, everything checks out and we're all set. Let's move/delete some
@@ -2640,12 +1814,13 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
 
     DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
 
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-    if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
+    ARCInstKind Class = GetBasicARCInstKind(Inst);
+    if (Class != ARCInstKind::LoadWeak &&
+        Class != ARCInstKind::LoadWeakRetained)
       continue;
 
     // Delete objc_loadWeak calls with no users.
-    if (Class == IC_LoadWeak && Inst->use_empty()) {
+    if (Class == ARCInstKind::LoadWeak && Inst->use_empty()) {
       Inst->eraseFromParent();
       continue;
     }
@@ -2660,10 +1835,10 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
                               J = Current.getInstructionIterator();
          J != B; --J) {
       Instruction *EarlierInst = &*std::prev(J);
-      InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
+      ARCInstKind EarlierClass = GetARCInstKind(EarlierInst);
       switch (EarlierClass) {
-      case IC_LoadWeak:
-      case IC_LoadWeakRetained: {
+      case ARCInstKind::LoadWeak:
+      case ARCInstKind::LoadWeakRetained: {
         // If this is loading from the same pointer, replace this load's value
         // with that one.
         CallInst *Call = cast<CallInst>(Inst);
@@ -2674,8 +1849,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
         case AliasAnalysis::MustAlias:
           Changed = true;
           // If the load has a builtin retain, insert a plain retain for it.
-          if (Class == IC_LoadWeakRetained) {
-            Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+          if (Class == ARCInstKind::LoadWeakRetained) {
+            Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
             CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
             CI->setTailCall();
           }
@@ -2691,8 +1866,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
         }
         break;
       }
-      case IC_StoreWeak:
-      case IC_InitWeak: {
+      case ARCInstKind::StoreWeak:
+      case ARCInstKind::InitWeak: {
         // If this is storing to the same pointer and has the same size etc.
         // replace this load's value with the stored value.
         CallInst *Call = cast<CallInst>(Inst);
@@ -2703,8 +1878,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
         case AliasAnalysis::MustAlias:
           Changed = true;
           // If the load has a builtin retain, insert a plain retain for it.
-          if (Class == IC_LoadWeakRetained) {
-            Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+          if (Class == ARCInstKind::LoadWeakRetained) {
+            Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
             CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
             CI->setTailCall();
           }
@@ -2720,14 +1895,14 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
         }
         break;
       }
-      case IC_MoveWeak:
-      case IC_CopyWeak:
+      case ARCInstKind::MoveWeak:
+      case ARCInstKind::CopyWeak:
         // TOOD: Grab the copied value.
         goto clobbered;
-      case IC_AutoreleasepoolPush:
-      case IC_None:
-      case IC_IntrinsicUser:
-      case IC_User:
+      case ARCInstKind::AutoreleasepoolPush:
+      case ARCInstKind::None:
+      case ARCInstKind::IntrinsicUser:
+      case ARCInstKind::User:
         // Weak pointers are only modified through the weak entry points
         // (and arbitrary calls, which could call the weak entry points).
         break;
@@ -2743,8 +1918,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
   // the alloca and all its users can be zapped.
   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
     Instruction *Inst = &*I++;
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-    if (Class != IC_DestroyWeak)
+    ARCInstKind Class = GetBasicARCInstKind(Inst);
+    if (Class != ARCInstKind::DestroyWeak)
       continue;
 
     CallInst *Call = cast<CallInst>(Inst);
@@ -2752,10 +1927,10 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
     if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
       for (User *U : Alloca->users()) {
         const Instruction *UserInst = cast<Instruction>(U);
-        switch (GetBasicInstructionClass(UserInst)) {
-        case IC_InitWeak:
-        case IC_StoreWeak:
-        case IC_DestroyWeak:
+        switch (GetBasicARCInstKind(UserInst)) {
+        case ARCInstKind::InitWeak:
+        case ARCInstKind::StoreWeak:
+        case ARCInstKind::DestroyWeak:
           continue;
         default:
           goto done;
@@ -2764,13 +1939,13 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
       Changed = true;
       for (auto UI = Alloca->user_begin(), UE = Alloca->user_end(); UI != UE;) {
         CallInst *UserInst = cast<CallInst>(*UI++);
-        switch (GetBasicInstructionClass(UserInst)) {
-        case IC_InitWeak:
-        case IC_StoreWeak:
+        switch (GetBasicARCInstKind(UserInst)) {
+        case ARCInstKind::InitWeak:
+        case ARCInstKind::StoreWeak:
           // These functions return their second argument.
           UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
           break;
-        case IC_DestroyWeak:
+        case ARCInstKind::DestroyWeak:
           // No return value.
           break;
         default:
@@ -2792,7 +1967,7 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
   // map stays valid when we get around to rewriting code and calls get
   // replaced by arguments.
   DenseMap<Value *, RRInfo> Releases;
-  MapVector<Value *, RRInfo> Retains;
+  BlotMapVector<Value *, RRInfo> Retains;
 
   // This is used during the traversal of the function to track the
   // states for each identified object at each block.
@@ -2825,16 +2000,15 @@ HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
   if (DepInsts.size() != 1)
     return false;
 
-  CallInst *Call =
-    dyn_cast_or_null<CallInst>(*DepInsts.begin());
+  auto *Call = dyn_cast_or_null<CallInst>(*DepInsts.begin());
 
   // Check that the pointer is the return value of the call.
   if (!Call || Arg != Call)
     return false;
 
   // Check that the call is a regular call.
-  InstructionClass Class = GetBasicInstructionClass(Call);
-  if (Class != IC_CallOrUser && Class != IC_Call)
+  ARCInstKind Class = GetBasicARCInstKind(Call);
+  if (Class != ARCInstKind::CallOrUser && Class != ARCInstKind::Call)
     return false;
 
   return true;
@@ -2854,13 +2028,11 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
   if (DepInsts.size() != 1)
     return nullptr;
 
-  CallInst *Retain =
-    dyn_cast_or_null<CallInst>(*DepInsts.begin());
+  auto *Retain = dyn_cast_or_null<CallInst>(*DepInsts.begin());
 
   // Check that we found a retain with the same argument.
-  if (!Retain ||
-      !IsRetain(GetBasicInstructionClass(Retain)) ||
-      GetObjCArg(Retain) != Arg) {
+  if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) ||
+      GetArgRCIdentityRoot(Retain) != Arg) {
     return nullptr;
   }
 
@@ -2881,14 +2053,13 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
   if (DepInsts.size() != 1)
     return nullptr;
 
-  CallInst *Autorelease =
-    dyn_cast_or_null<CallInst>(*DepInsts.begin());
+  auto *Autorelease = dyn_cast_or_null<CallInst>(*DepInsts.begin());
   if (!Autorelease)
     return nullptr;
-  InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
+  ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease);
   if (!IsAutorelease(AutoreleaseClass))
     return nullptr;
-  if (GetObjCArg(Autorelease) != Arg)
+  if (GetArgRCIdentityRoot(Autorelease) != Arg)
     return nullptr;
 
   return Autorelease;
@@ -2919,7 +2090,7 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
     if (!Ret)
       continue;
 
-    const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
+    const Value *Arg = GetRCIdentityRoot(Ret->getOperand(0));
 
     // Look for an ``autorelease'' instruction that is a predecessor of Ret and
     // dependent on Arg such that there are no instructions dependent on Arg
@@ -2974,13 +2145,13 @@ ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) {
 
   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
     Instruction *Inst = &*I++;
-    switch (GetBasicInstructionClass(Inst)) {
+    switch (GetBasicARCInstKind(Inst)) {
     default:
       break;
-    case IC_Retain:
+    case ARCInstKind::Retain:
       ++NumRetains;
       break;
-    case IC_Release:
+    case ARCInstKind::Release:
       ++NumReleases;
       break;
     }
@@ -2997,28 +2168,13 @@ bool ObjCARCOpt::doInitialization(Module &M) {
   if (!Run)
     return false;
 
-  // Identify the imprecise release metadata kind.
-  ImpreciseReleaseMDKind =
-    M.getContext().getMDKindID("clang.imprecise_release");
-  CopyOnEscapeMDKind =
-    M.getContext().getMDKindID("clang.arc.copy_on_escape");
-  NoObjCARCExceptionsMDKind =
-    M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
-#ifdef ARC_ANNOTATIONS
-  ARCAnnotationBottomUpMDKind =
-    M.getContext().getMDKindID("llvm.arc.annotation.bottomup");
-  ARCAnnotationTopDownMDKind =
-    M.getContext().getMDKindID("llvm.arc.annotation.topdown");
-  ARCAnnotationProvenanceSourceMDKind =
-    M.getContext().getMDKindID("llvm.arc.annotation.provenancesource");
-#endif // ARC_ANNOTATIONS
-
   // Intuitively, objc_retain and others are nocapture, however in practice
   // they are not, because they return their argument value. And objc_release
   // calls finalizers which can have arbitrary side effects.
+  MDKindCache.init(&M);
 
   // Initialize our runtime entry point cache.
-  EP.Initialize(&M);
+  EP.init(&M);
 
   return false;
 }
@@ -3052,27 +2208,27 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
   OptimizeIndividualCalls(F);
 
   // Optimizations for weak pointers.
-  if (UsedInThisFunction & ((1 << IC_LoadWeak) |
-                            (1 << IC_LoadWeakRetained) |
-                            (1 << IC_StoreWeak) |
-                            (1 << IC_InitWeak) |
-                            (1 << IC_CopyWeak) |
-                            (1 << IC_MoveWeak) |
-                            (1 << IC_DestroyWeak)))
+  if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::LoadWeak)) |
+                            (1 << unsigned(ARCInstKind::LoadWeakRetained)) |
+                            (1 << unsigned(ARCInstKind::StoreWeak)) |
+                            (1 << unsigned(ARCInstKind::InitWeak)) |
+                            (1 << unsigned(ARCInstKind::CopyWeak)) |
+                            (1 << unsigned(ARCInstKind::MoveWeak)) |
+                            (1 << unsigned(ARCInstKind::DestroyWeak))))
     OptimizeWeakCalls(F);
 
   // Optimizations for retain+release pairs.
-  if (UsedInThisFunction & ((1 << IC_Retain) |
-                            (1 << IC_RetainRV) |
-                            (1 << IC_RetainBlock)))
-    if (UsedInThisFunction & (1 << IC_Release))
+  if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Retain)) |
+                            (1 << unsigned(ARCInstKind::RetainRV)) |
+                            (1 << unsigned(ARCInstKind::RetainBlock))))
+    if (UsedInThisFunction & (1 << unsigned(ARCInstKind::Release)))
       // Run OptimizeSequences until it either stops making changes or
       // no retain+release pair nesting is detected.
       while (OptimizeSequences(F)) {}
 
   // Optimizations if objc_autorelease is used.
-  if (UsedInThisFunction & ((1 << IC_Autorelease) |
-                            (1 << IC_AutoreleaseRV)))
+  if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Autorelease)) |
+                            (1 << unsigned(ARCInstKind::AutoreleaseRV))))
     OptimizeReturns(F);
 
   // Gather statistics after optimization.
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
deleted file mode 100644
index 53c077e..0000000
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-//===- ObjCARCUtil.cpp - ObjC ARC Optimization ----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file defines several utility functions used by various ARC
-/// optimizations which are IMHO too big to be in a header file.
-///
-/// WARNING: This file knows about certain library functions. It recognizes them
-/// by name, and hardwires knowledge of their semantics.
-///
-/// WARNING: This file knows about how certain Objective-C library functions are
-/// used. Naive LLVM IR transformations which would otherwise be
-/// behavior-preserving may break these assumptions.
-///
-//===----------------------------------------------------------------------===//
-
-#include "ObjCARC.h"
-#include "llvm/IR/Intrinsics.h"
-
-using namespace llvm;
-using namespace llvm::objcarc;
-
-raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
-                                       const InstructionClass Class) {
-  switch (Class) {
-  case IC_Retain:
-    return OS << "IC_Retain";
-  case IC_RetainRV:
-    return OS << "IC_RetainRV";
-  case IC_RetainBlock:
-    return OS << "IC_RetainBlock";
-  case IC_Release:
-    return OS << "IC_Release";
-  case IC_Autorelease:
-    return OS << "IC_Autorelease";
-  case IC_AutoreleaseRV:
-    return OS << "IC_AutoreleaseRV";
-  case IC_AutoreleasepoolPush:
-    return OS << "IC_AutoreleasepoolPush";
-  case IC_AutoreleasepoolPop:
-    return OS << "IC_AutoreleasepoolPop";
-  case IC_NoopCast:
-    return OS << "IC_NoopCast";
-  case IC_FusedRetainAutorelease:
-    return OS << "IC_FusedRetainAutorelease";
-  case IC_FusedRetainAutoreleaseRV:
-    return OS << "IC_FusedRetainAutoreleaseRV";
-  case IC_LoadWeakRetained:
-    return OS << "IC_LoadWeakRetained";
-  case IC_StoreWeak:
-    return OS << "IC_StoreWeak";
-  case IC_InitWeak:
-    return OS << "IC_InitWeak";
-  case IC_LoadWeak:
-    return OS << "IC_LoadWeak";
-  case IC_MoveWeak:
-    return OS << "IC_MoveWeak";
-  case IC_CopyWeak:
-    return OS << "IC_CopyWeak";
-  case IC_DestroyWeak:
-    return OS << "IC_DestroyWeak";
-  case IC_StoreStrong:
-    return OS << "IC_StoreStrong";
-  case IC_CallOrUser:
-    return OS << "IC_CallOrUser";
-  case IC_Call:
-    return OS << "IC_Call";
-  case IC_User:
-    return OS << "IC_User";
-  case IC_IntrinsicUser:
-    return OS << "IC_IntrinsicUser";
-  case IC_None:
-    return OS << "IC_None";
-  }
-  llvm_unreachable("Unknown instruction class!");
-}
-
-InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) {
-  Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-
-  // No (mandatory) arguments.
-  if (AI == AE)
-    return StringSwitch<InstructionClass>(F->getName())
-      .Case("objc_autoreleasePoolPush",  IC_AutoreleasepoolPush)
-      .Case("clang.arc.use", IC_IntrinsicUser)
-      .Default(IC_CallOrUser);
-
-  // One argument.
-  const Argument *A0 = AI++;
-  if (AI == AE)
-    // Argument is a pointer.
-    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
-      Type *ETy = PTy->getElementType();
-      // Argument is i8*.
-      if (ETy->isIntegerTy(8))
-        return StringSwitch<InstructionClass>(F->getName())
-          .Case("objc_retain",                IC_Retain)
-          .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV)
-          .Case("objc_retainBlock",           IC_RetainBlock)
-          .Case("objc_release",               IC_Release)
-          .Case("objc_autorelease",           IC_Autorelease)
-          .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV)
-          .Case("objc_autoreleasePoolPop",    IC_AutoreleasepoolPop)
-          .Case("objc_retainedObject",        IC_NoopCast)
-          .Case("objc_unretainedObject",      IC_NoopCast)
-          .Case("objc_unretainedPointer",     IC_NoopCast)
-          .Case("objc_retain_autorelease",    IC_FusedRetainAutorelease)
-          .Case("objc_retainAutorelease",     IC_FusedRetainAutorelease)
-          .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
-          .Case("objc_sync_enter", IC_User)
-          .Case("objc_sync_exit", IC_User)
-          .Default(IC_CallOrUser);
-
-      // Argument is i8**
-      if (PointerType *Pte = dyn_cast<PointerType>(ETy))
-        if (Pte->getElementType()->isIntegerTy(8))
-          return StringSwitch<InstructionClass>(F->getName())
-            .Case("objc_loadWeakRetained",      IC_LoadWeakRetained)
-            .Case("objc_loadWeak",              IC_LoadWeak)
-            .Case("objc_destroyWeak",           IC_DestroyWeak)
-            .Default(IC_CallOrUser);
-    }
-
-  // Two arguments, first is i8**.
-  const Argument *A1 = AI++;
-  if (AI == AE)
-    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
-      if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
-        if (Pte->getElementType()->isIntegerTy(8))
-          if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
-            Type *ETy1 = PTy1->getElementType();
-            // Second argument is i8*
-            if (ETy1->isIntegerTy(8))
-              return StringSwitch<InstructionClass>(F->getName())
-                .Case("objc_storeWeak",             IC_StoreWeak)
-                .Case("objc_initWeak",              IC_InitWeak)
-                .Case("objc_storeStrong",           IC_StoreStrong)
-                .Default(IC_CallOrUser);
-            // Second argument is i8**.
-            if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
-              if (Pte1->getElementType()->isIntegerTy(8))
-                return StringSwitch<InstructionClass>(F->getName())
-                  .Case("objc_moveWeak",              IC_MoveWeak)
-                  .Case("objc_copyWeak",              IC_CopyWeak)
-                  // Ignore annotation calls. This is important to stop the
-                  // optimizer from treating annotations as uses which would
-                  // make the state of the pointers they are attempting to
-                  // elucidate to be incorrect.
-                  .Case("llvm.arc.annotation.topdown.bbstart", IC_None)
-                  .Case("llvm.arc.annotation.topdown.bbend", IC_None)
-                  .Case("llvm.arc.annotation.bottomup.bbstart", IC_None)
-                  .Case("llvm.arc.annotation.bottomup.bbend", IC_None)
-                  .Default(IC_CallOrUser);
-          }
-
-  // Anything else.
-  return IC_CallOrUser;
-}
-
-/// \brief Determine what kind of construct V is.
-InstructionClass
-llvm::objcarc::GetInstructionClass(const Value *V) {
-  if (const Instruction *I = dyn_cast<Instruction>(V)) {
-    // Any instruction other than bitcast and gep with a pointer operand have a
-    // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
-    // to a subsequent use, rather than using it themselves, in this sense.
-    // As a short cut, several other opcodes are known to have no pointer
-    // operands of interest. And ret is never followed by a release, so it's
-    // not interesting to examine.
-    switch (I->getOpcode()) {
-    case Instruction::Call: {
-      const CallInst *CI = cast<CallInst>(I);
-      // Check for calls to special functions.
-      if (const Function *F = CI->getCalledFunction()) {
-        InstructionClass Class = GetFunctionClass(F);
-        if (Class != IC_CallOrUser)
-          return Class;
-
-        // None of the intrinsic functions do objc_release. For intrinsics, the
-        // only question is whether or not they may be users.
-        switch (F->getIntrinsicID()) {
-        case Intrinsic::returnaddress: case Intrinsic::frameaddress:
-        case Intrinsic::stacksave: case Intrinsic::stackrestore:
-        case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
-        case Intrinsic::objectsize: case Intrinsic::prefetch:
-        case Intrinsic::stackprotector:
-        case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64:
-        case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa:
-        case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext:
-        case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline:
-        case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
-        case Intrinsic::invariant_start: case Intrinsic::invariant_end:
-        // Don't let dbg info affect our results.
-        case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
-          // Short cut: Some intrinsics obviously don't use ObjC pointers.
-          return IC_None;
-        default:
-          break;
-        }
-      }
-      return GetCallSiteClass(CI);
-    }
-    case Instruction::Invoke:
-      return GetCallSiteClass(cast<InvokeInst>(I));
-    case Instruction::BitCast:
-    case Instruction::GetElementPtr:
-    case Instruction::Select: case Instruction::PHI:
-    case Instruction::Ret: case Instruction::Br:
-    case Instruction::Switch: case Instruction::IndirectBr:
-    case Instruction::Alloca: case Instruction::VAArg:
-    case Instruction::Add: case Instruction::FAdd:
-    case Instruction::Sub: case Instruction::FSub:
-    case Instruction::Mul: case Instruction::FMul:
-    case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv:
-    case Instruction::SRem: case Instruction::URem: case Instruction::FRem:
-    case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:
-    case Instruction::And: case Instruction::Or: case Instruction::Xor:
-    case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc:
-    case Instruction::IntToPtr: case Instruction::FCmp:
-    case Instruction::FPTrunc: case Instruction::FPExt:
-    case Instruction::FPToUI: case Instruction::FPToSI:
-    case Instruction::UIToFP: case Instruction::SIToFP:
-    case Instruction::InsertElement: case Instruction::ExtractElement:
-    case Instruction::ShuffleVector:
-    case Instruction::ExtractValue:
-      break;
-    case Instruction::ICmp:
-      // Comparing a pointer with null, or any other constant, isn't an
-      // interesting use, because we don't care what the pointer points to, or
-      // about the values of any other dynamic reference-counted pointers.
-      if (IsPotentialRetainableObjPtr(I->getOperand(1)))
-        return IC_User;
-      break;
-    default:
-      // For anything else, check all the operands.
-      // Note that this includes both operands of a Store: while the first
-      // operand isn't actually being dereferenced, it is being stored to
-      // memory where we can no longer track who might read it and dereference
-      // it, so we have to consider it potentially used.
-      for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
-           OI != OE; ++OI)
-        if (IsPotentialRetainableObjPtr(*OI))
-          return IC_User;
-    }
-  }
-
-  // Otherwise, it's totally inert for ARC purposes.
-  return IC_None;
-}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
index 410abfc..8346345 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -32,20 +32,22 @@ using namespace llvm::objcarc;
 
 bool ProvenanceAnalysis::relatedSelect(const SelectInst *A,
                                        const Value *B) {
+  const DataLayout &DL = A->getModule()->getDataLayout();
   // If the values are Selects with the same condition, we can do a more precise
   // check: just check for relations between the values on corresponding arms.
   if (const SelectInst *SB = dyn_cast<SelectInst>(B))
     if (A->getCondition() == SB->getCondition())
-      return related(A->getTrueValue(), SB->getTrueValue()) ||
-             related(A->getFalseValue(), SB->getFalseValue());
+      return related(A->getTrueValue(), SB->getTrueValue(), DL) ||
+             related(A->getFalseValue(), SB->getFalseValue(), DL);
 
   // Check both arms of the Select node individually.
-  return related(A->getTrueValue(), B) ||
-         related(A->getFalseValue(), B);
+  return related(A->getTrueValue(), B, DL) ||
+         related(A->getFalseValue(), B, DL);
 }
 
 bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
                                     const Value *B) {
+  const DataLayout &DL = A->getModule()->getDataLayout();
   // If the values are PHIs in the same block, we can do a more precise as well
   // as efficient check: just check for relations between the values on
   // corresponding edges.
@@ -53,16 +55,15 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
     if (PNB->getParent() == A->getParent()) {
       for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
         if (related(A->getIncomingValue(i),
-                    PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
+                    PNB->getIncomingValueForBlock(A->getIncomingBlock(i)), DL))
           return true;
       return false;
     }
 
   // Check each unique source of the PHI node against B.
   SmallPtrSet<const Value *, 4> UniqueSrc;
-  for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
-    const Value *PV1 = A->getIncomingValue(i);
-    if (UniqueSrc.insert(PV1).second && related(PV1, B))
+  for (Value *PV1 : A->incoming_values()) {
+    if (UniqueSrc.insert(PV1).second && related(PV1, B, DL))
       return true;
   }
 
@@ -103,11 +104,11 @@ static bool IsStoredObjCPointer(const Value *P) {
   return false;
 }
 
-bool ProvenanceAnalysis::relatedCheck(const Value *A,
-                                      const Value *B) {
+bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B,
+                                      const DataLayout &DL) {
   // Skip past provenance pass-throughs.
-  A = GetUnderlyingObjCPtr(A);
-  B = GetUnderlyingObjCPtr(B);
+  A = GetUnderlyingObjCPtr(A, DL);
+  B = GetUnderlyingObjCPtr(B, DL);
 
   // Quick check.
   if (A == B)
@@ -159,8 +160,8 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A,
   return true;
 }
 
-bool ProvenanceAnalysis::related(const Value *A,
-                                 const Value *B) {
+bool ProvenanceAnalysis::related(const Value *A, const Value *B,
+                                 const DataLayout &DL) {
   // Begin by inserting a conservative value into the map. If the insertion
   // fails, we have the answer already. If it succeeds, leave it there until we
   // compute the real answer to guard against recursive queries.
@@ -170,7 +171,7 @@ bool ProvenanceAnalysis::related(const Value *A,
   if (!Pair.second)
     return Pair.first->second;
 
-  bool Result = relatedCheck(A, B);
+  bool Result = relatedCheck(A, B, DL);
   CachedResults[ValuePairTy(A, B)] = Result;
   return Result;
 }
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index 7820468..0ac41d3 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -30,6 +30,7 @@
 namespace llvm {
   class Value;
   class AliasAnalysis;
+  class DataLayout;
   class PHINode;
   class SelectInst;
 }
@@ -53,12 +54,12 @@ class ProvenanceAnalysis {
   typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
   CachedResultsTy CachedResults;
 
-  bool relatedCheck(const Value *A, const Value *B);
+  bool relatedCheck(const Value *A, const Value *B, const DataLayout &DL);
   bool relatedSelect(const SelectInst *A, const Value *B);
   bool relatedPHI(const PHINode *A, const Value *B);
 
-  void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
-  ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
+  void operator=(const ProvenanceAnalysis &) = delete;
+  ProvenanceAnalysis(const ProvenanceAnalysis &) = delete;
 
 public:
   ProvenanceAnalysis() {}
@@ -67,7 +68,7 @@ public:
 
   AliasAnalysis *getAA() const { return AA; }
 
-  bool related(const Value *A, const Value *B);
+  bool related(const Value *A, const Value *B, const DataLayout &DL);
 
   void clear() {
     CachedResults.clear();
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index d836632..0be75af 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -65,6 +66,7 @@ bool PAEval::runOnFunction(Function &F) {
 
   ProvenanceAnalysis PA;
   PA.setAA(&getAnalysis<AliasAnalysis>());
+  const DataLayout &DL = F.getParent()->getDataLayout();
 
   for (Value *V1 : Values) {
     StringRef NameV1 = getName(V1);
@@ -73,7 +75,7 @@ bool PAEval::runOnFunction(Function &F) {
       if (NameV1 >= NameV2)
         continue;
       errs() << NameV1 << " and " << NameV2;
-      if (PA.related(V1, V2))
+      if (PA.related(V1, V2, DL))
         errs() << " are related.\n";
       else
         errs() << " are not related.\n";
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp
new file mode 100644
index 0000000..ae20e7e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp
@@ -0,0 +1,404 @@
+//===--- PtrState.cpp -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PtrState.h"
+#include "DependencyAnalysis.h"
+#include "ObjCARC.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+#define DEBUG_TYPE "objc-arc-ptr-state"
+
+//===----------------------------------------------------------------------===//
+//                                  Utility
+//===----------------------------------------------------------------------===//
+
+raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, const Sequence S) {
+  switch (S) {
+  case S_None:
+    return OS << "S_None";
+  case S_Retain:
+    return OS << "S_Retain";
+  case S_CanRelease:
+    return OS << "S_CanRelease";
+  case S_Use:
+    return OS << "S_Use";
+  case S_Release:
+    return OS << "S_Release";
+  case S_MovableRelease:
+    return OS << "S_MovableRelease";
+  case S_Stop:
+    return OS << "S_Stop";
+  }
+  llvm_unreachable("Unknown sequence type.");
+}
+
+//===----------------------------------------------------------------------===//
+//                                  Sequence
+//===----------------------------------------------------------------------===//
+
+static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
+  // The easy cases.
+  if (A == B)
+    return A;
+  if (A == S_None || B == S_None)
+    return S_None;
+
+  if (A > B)
+    std::swap(A, B);
+  if (TopDown) {
+    // Choose the side which is further along in the sequence.
+    if ((A == S_Retain || A == S_CanRelease) &&
+        (B == S_CanRelease || B == S_Use))
+      return B;
+  } else {
+    // Choose the side which is further along in the sequence.
+    if ((A == S_Use || A == S_CanRelease) &&
+        (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
+      return A;
+    // If both sides are releases, choose the more conservative one.
+    if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
+      return A;
+    if (A == S_Release && B == S_MovableRelease)
+      return A;
+  }
+
+  return S_None;
+}
+
+//===----------------------------------------------------------------------===//
+//                                   RRInfo
+//===----------------------------------------------------------------------===//
+
+void RRInfo::clear() {
+  KnownSafe = false;
+  IsTailCallRelease = false;
+  ReleaseMetadata = nullptr;
+  Calls.clear();
+  ReverseInsertPts.clear();
+  CFGHazardAfflicted = false;
+}
+
+bool RRInfo::Merge(const RRInfo &Other) {
+  // Conservatively merge the ReleaseMetadata information.
+  if (ReleaseMetadata != Other.ReleaseMetadata)
+    ReleaseMetadata = nullptr;
+
+  // Conservatively merge the boolean state.
+  KnownSafe &= Other.KnownSafe;
+  IsTailCallRelease &= Other.IsTailCallRelease;
+  CFGHazardAfflicted |= Other.CFGHazardAfflicted;
+
+  // Merge the call sets.
+  Calls.insert(Other.Calls.begin(), Other.Calls.end());
+
+  // Merge the insert point sets. If there are any differences,
+  // that makes this a partial merge.
+  bool Partial = ReverseInsertPts.size() != Other.ReverseInsertPts.size();
+  for (Instruction *Inst : Other.ReverseInsertPts)
+    Partial |= ReverseInsertPts.insert(Inst).second;
+  return Partial;
+}
+
+//===----------------------------------------------------------------------===//
+//                                  PtrState
+//===----------------------------------------------------------------------===//
+
+void PtrState::SetKnownPositiveRefCount() {
+  DEBUG(dbgs() << "        Setting Known Positive.\n");
+  KnownPositiveRefCount = true;
+}
+
+void PtrState::ClearKnownPositiveRefCount() {
+  DEBUG(dbgs() << "        Clearing Known Positive.\n");
+  KnownPositiveRefCount = false;
+}
+
+void PtrState::SetSeq(Sequence NewSeq) {
+  DEBUG(dbgs() << "            Old: " << GetSeq() << "; New: " << NewSeq << "\n");
+  Seq = NewSeq;
+}
+
+void PtrState::ResetSequenceProgress(Sequence NewSeq) {
+  DEBUG(dbgs() << "        Resetting sequence progress.\n");
+  SetSeq(NewSeq);
+  Partial = false;
+  RRI.clear();
+}
+
+void PtrState::Merge(const PtrState &Other, bool TopDown) {
+  Seq = MergeSeqs(GetSeq(), Other.GetSeq(), TopDown);
+  KnownPositiveRefCount &= Other.KnownPositiveRefCount;
+
+  // If we're not in a sequence (anymore), drop all associated state.
+  if (Seq == S_None) {
+    Partial = false;
+    RRI.clear();
+  } else if (Partial || Other.Partial) {
+    // If we're doing a merge on a path that's previously seen a partial
+    // merge, conservatively drop the sequence, to avoid doing partial
+    // RR elimination. If the branch predicates for the two merge differ,
+    // mixing them is unsafe.
+    ClearSequenceProgress();
+  } else {
+    // Otherwise merge the other PtrState's RRInfo into our RRInfo. At this
+    // point, we know that currently we are not partial. Stash whether or not
+    // the merge operation caused us to undergo a partial merging of reverse
+    // insertion points.
+    Partial = RRI.Merge(Other.RRI);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                              BottomUpPtrState
+//===----------------------------------------------------------------------===//
+
+bool BottomUpPtrState::InitBottomUp(ARCMDKindCache &Cache, Instruction *I) {
+  // If we see two releases in a row on the same pointer. If so, make
+  // a note, and we'll cicle back to revisit it after we've
+  // hopefully eliminated the second release, which may allow us to
+  // eliminate the first release too.
+  // Theoretically we could implement removal of nested retain+release
+  // pairs by making PtrState hold a stack of states, but this is
+  // simple and avoids adding overhead for the non-nested case.
+  bool NestingDetected = false;
+  if (GetSeq() == S_Release || GetSeq() == S_MovableRelease) {
+    DEBUG(dbgs() << "        Found nested releases (i.e. a release pair)\n");
+    NestingDetected = true;
+  }
+
+  MDNode *ReleaseMetadata =
+      I->getMetadata(Cache.get(ARCMDKindID::ImpreciseRelease));
+  Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release;
+  ResetSequenceProgress(NewSeq);
+  SetReleaseMetadata(ReleaseMetadata);
+  SetKnownSafe(HasKnownPositiveRefCount());
+  SetTailCallRelease(cast<CallInst>(I)->isTailCall());
+  InsertCall(I);
+  SetKnownPositiveRefCount();
+  return NestingDetected;
+}
+
+bool BottomUpPtrState::MatchWithRetain() {
+  SetKnownPositiveRefCount();
+
+  Sequence OldSeq = GetSeq();
+  switch (OldSeq) {
+  case S_Stop:
+  case S_Release:
+  case S_MovableRelease:
+  case S_Use:
+    // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an
+    // imprecise release, clear our reverse insertion points.
+    if (OldSeq != S_Use || IsTrackingImpreciseReleases())
+      ClearReverseInsertPts();
+  // FALL THROUGH
+  case S_CanRelease:
+    return true;
+  case S_None:
+    return false;
+  case S_Retain:
+    llvm_unreachable("bottom-up pointer in retain state!");
+  }
+  llvm_unreachable("Sequence unknown enum value");
+}
+
+bool BottomUpPtrState::HandlePotentialAlterRefCount(Instruction *Inst,
+                                                    const Value *Ptr,
+                                                    ProvenanceAnalysis &PA,
+                                                    ARCInstKind Class) {
+  Sequence S = GetSeq();
+
+  // Check for possible releases.
+  if (!CanAlterRefCount(Inst, Ptr, PA, Class))
+    return false;
+
+  DEBUG(dbgs() << "            CanAlterRefCount: Seq: " << S << "; " << *Ptr
+               << "\n");
+  switch (S) {
+  case S_Use:
+    SetSeq(S_CanRelease);
+    return true;
+  case S_CanRelease:
+  case S_Release:
+  case S_MovableRelease:
+  case S_Stop:
+  case S_None:
+    return false;
+  case S_Retain:
+    llvm_unreachable("bottom-up pointer in retain state!");
+  }
+  llvm_unreachable("Sequence unknown enum value");
+}
+
+void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
+                                          const Value *Ptr,
+                                          ProvenanceAnalysis &PA,
+                                          ARCInstKind Class) {
+  // Check for possible direct uses.
+  switch (GetSeq()) {
+  case S_Release:
+  case S_MovableRelease:
+    if (CanUse(Inst, Ptr, PA, Class)) {
+      DEBUG(dbgs() << "            CanUse: Seq: " << GetSeq() << "; " << *Ptr
+                   << "\n");
+      assert(!HasReverseInsertPts());
+      // If this is an invoke instruction, we're scanning it as part of
+      // one of its successor blocks, since we can't insert code after it
+      // in its own block, and we don't want to split critical edges.
+      if (isa<InvokeInst>(Inst))
+        InsertReverseInsertPt(BB->getFirstInsertionPt());
+      else
+        InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
+      SetSeq(S_Use);
+    } else if (Seq == S_Release && IsUser(Class)) {
+      DEBUG(dbgs() << "            PreciseReleaseUse: Seq: " << GetSeq() << "; "
+                   << *Ptr << "\n");
+      // Non-movable releases depend on any possible objc pointer use.
+      SetSeq(S_Stop);
+      assert(!HasReverseInsertPts());
+      // As above; handle invoke specially.
+      if (isa<InvokeInst>(Inst))
+        InsertReverseInsertPt(BB->getFirstInsertionPt());
+      else
+        InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
+    }
+    break;
+  case S_Stop:
+    if (CanUse(Inst, Ptr, PA, Class)) {
+      DEBUG(dbgs() << "            PreciseStopUse: Seq: " << GetSeq() << "; "
+                   << *Ptr << "\n");
+      SetSeq(S_Use);
+    }
+    break;
+  case S_CanRelease:
+  case S_Use:
+  case S_None:
+    break;
+  case S_Retain:
+    llvm_unreachable("bottom-up pointer in retain state!");
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                              TopDownPtrState
+//===----------------------------------------------------------------------===//
+
+bool TopDownPtrState::InitTopDown(ARCInstKind Kind, Instruction *I) {
+  bool NestingDetected = false;
+  // Don't do retain+release tracking for ARCInstKind::RetainRV, because
+  // it's
+  // better to let it remain as the first instruction after a call.
+  if (Kind != ARCInstKind::RetainRV) {
+    // If we see two retains in a row on the same pointer. If so, make
+    // a note, and we'll cicle back to revisit it after we've
+    // hopefully eliminated the second retain, which may allow us to
+    // eliminate the first retain too.
+    // Theoretically we could implement removal of nested retain+release
+    // pairs by making PtrState hold a stack of states, but this is
+    // simple and avoids adding overhead for the non-nested case.
+    if (GetSeq() == S_Retain)
+      NestingDetected = true;
+
+    ResetSequenceProgress(S_Retain);
+    SetKnownSafe(HasKnownPositiveRefCount());
+    InsertCall(I);
+  }
+
+  SetKnownPositiveRefCount();
+  return NestingDetected;
+}
+
+bool TopDownPtrState::MatchWithRelease(ARCMDKindCache &Cache,
+                                       Instruction *Release) {
+  ClearKnownPositiveRefCount();
+
+  Sequence OldSeq = GetSeq();
+
+  MDNode *ReleaseMetadata =
+      Release->getMetadata(Cache.get(ARCMDKindID::ImpreciseRelease));
+
+  switch (OldSeq) {
+  case S_Retain:
+  case S_CanRelease:
+    if (OldSeq == S_Retain || ReleaseMetadata != nullptr)
+      ClearReverseInsertPts();
+  // FALL THROUGH
+  case S_Use:
+    SetReleaseMetadata(ReleaseMetadata);
+    SetTailCallRelease(cast<CallInst>(Release)->isTailCall());
+    return true;
+  case S_None:
+    return false;
+  case S_Stop:
+  case S_Release:
+  case S_MovableRelease:
+    llvm_unreachable("top-down pointer in bottom up state!");
+  }
+  llvm_unreachable("Sequence unknown enum value");
+}
+
+bool TopDownPtrState::HandlePotentialAlterRefCount(Instruction *Inst,
+                                                   const Value *Ptr,
+                                                   ProvenanceAnalysis &PA,
+                                                   ARCInstKind Class) {
+  // Check for possible releases.
+  if (!CanAlterRefCount(Inst, Ptr, PA, Class))
+    return false;
+
+  DEBUG(dbgs() << "            CanAlterRefCount: Seq: " << GetSeq() << "; " << *Ptr
+               << "\n");
+  ClearKnownPositiveRefCount();
+  switch (GetSeq()) {
+  case S_Retain:
+    SetSeq(S_CanRelease);
+    assert(!HasReverseInsertPts());
+    InsertReverseInsertPt(Inst);
+
+    // One call can't cause a transition from S_Retain to S_CanRelease
+    // and S_CanRelease to S_Use. If we've made the first transition,
+    // we're done.
+    return true;
+  case S_Use:
+  case S_CanRelease:
+  case S_None:
+    return false;
+  case S_Stop:
+  case S_Release:
+  case S_MovableRelease:
+    llvm_unreachable("top-down pointer in release state!");
+  }
+  llvm_unreachable("covered switch is not covered!?");
+}
+
+void TopDownPtrState::HandlePotentialUse(Instruction *Inst, const Value *Ptr,
+                                         ProvenanceAnalysis &PA,
+                                         ARCInstKind Class) {
+  // Check for possible direct uses.
+  switch (GetSeq()) {
+  case S_CanRelease:
+    if (!CanUse(Inst, Ptr, PA, Class))
+      return;
+    DEBUG(dbgs() << "             CanUse: Seq: " << GetSeq() << "; " << *Ptr
+                 << "\n");
+    SetSeq(S_Use);
+    return;
+  case S_Retain:
+  case S_Use:
+  case S_None:
+    return;
+  case S_Stop:
+  case S_Release:
+  case S_MovableRelease:
+    llvm_unreachable("top-down pointer in release state!");
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h
new file mode 100644
index 0000000..e45e1ea
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h
@@ -0,0 +1,210 @@
+//===--- PtrState.h - ARC State for a Ptr -------------------*- C++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file contains declarations for the ARC state associated with a ptr. It
+//  is only used by the ARC Sequence Dataflow computation. By separating this
+//  from the actual dataflow, it is easier to consider the mechanics of the ARC
+//  optimization separate from the actual predicates being used.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H
+#define LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H
+
+#include "ARCInstKind.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+namespace objcarc {
+
+class ARCMDKindCache;
+class ProvenanceAnalysis;
+
+/// \enum Sequence
+///
+/// \brief A sequence of states that a pointer may go through in which an
+/// objc_retain and objc_release are actually needed.
+enum Sequence {
+  S_None,
+  S_Retain,        ///< objc_retain(x).
+  S_CanRelease,    ///< foo(x) -- x could possibly see a ref count decrement.
+  S_Use,           ///< any use of x.
+  S_Stop,          ///< like S_Release, but code motion is stopped.
+  S_Release,       ///< objc_release(x).
+  S_MovableRelease ///< objc_release(x), !clang.imprecise_release.
+};
+
+raw_ostream &operator<<(raw_ostream &OS,
+                        const Sequence S) LLVM_ATTRIBUTE_UNUSED;
+
+/// \brief Unidirectional information about either a
+/// retain-decrement-use-release sequence or release-use-decrement-retain
+/// reverse sequence.
+struct RRInfo {
+  /// After an objc_retain, the reference count of the referenced
+  /// object is known to be positive. Similarly, before an objc_release, the
+  /// reference count of the referenced object is known to be positive. If
+  /// there are retain-release pairs in code regions where the retain count
+  /// is known to be positive, they can be eliminated, regardless of any side
+  /// effects between them.
+  ///
+  /// Also, a retain+release pair nested within another retain+release
+  /// pair all on the known same pointer value can be eliminated, regardless
+  /// of any intervening side effects.
+  ///
+  /// KnownSafe is true when either of these conditions is satisfied.
+  bool KnownSafe;
+
+  /// True of the objc_release calls are all marked with the "tail" keyword.
+  bool IsTailCallRelease;
+
+  /// If the Calls are objc_release calls and they all have a
+  /// clang.imprecise_release tag, this is the metadata tag.
+  MDNode *ReleaseMetadata;
+
+  /// For a top-down sequence, the set of objc_retains or
+  /// objc_retainBlocks. For bottom-up, the set of objc_releases.
+  SmallPtrSet<Instruction *, 2> Calls;
+
+  /// The set of optimal insert positions for moving calls in the opposite
+  /// sequence.
+  SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+
+  /// If this is true, we cannot perform code motion but can still remove
+  /// retain/release pairs.
+  bool CFGHazardAfflicted;
+
+  RRInfo()
+      : KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr),
+        CFGHazardAfflicted(false) {}
+
+  void clear();
+
+  /// Conservatively merge the two RRInfo. Returns true if a partial merge has
+  /// occurred, false otherwise.
+  bool Merge(const RRInfo &Other);
+};
+
+/// \brief This class summarizes several per-pointer runtime properties which
+/// are propogated through the flow graph.
+class PtrState {
+protected:
+  /// True if the reference count is known to be incremented.
+  bool KnownPositiveRefCount;
+
+  /// True if we've seen an opportunity for partial RR elimination, such as
+  /// pushing calls into a CFG triangle or into one side of a CFG diamond.
+  bool Partial;
+
+  /// The current position in the sequence.
+  unsigned char Seq : 8;
+
+  /// Unidirectional information about the current sequence.
+  RRInfo RRI;
+
+  PtrState() : KnownPositiveRefCount(false), Partial(false), Seq(S_None) {}
+
+public:
+  bool IsKnownSafe() const { return RRI.KnownSafe; }
+
+  void SetKnownSafe(const bool NewValue) { RRI.KnownSafe = NewValue; }
+
+  bool IsTailCallRelease() const { return RRI.IsTailCallRelease; }
+
+  void SetTailCallRelease(const bool NewValue) {
+    RRI.IsTailCallRelease = NewValue;
+  }
+
+  bool IsTrackingImpreciseReleases() const {
+    return RRI.ReleaseMetadata != nullptr;
+  }
+
+  const MDNode *GetReleaseMetadata() const { return RRI.ReleaseMetadata; }
+
+  void SetReleaseMetadata(MDNode *NewValue) { RRI.ReleaseMetadata = NewValue; }
+
+  bool IsCFGHazardAfflicted() const { return RRI.CFGHazardAfflicted; }
+
+  void SetCFGHazardAfflicted(const bool NewValue) {
+    RRI.CFGHazardAfflicted = NewValue;
+  }
+
+  void SetKnownPositiveRefCount();
+  void ClearKnownPositiveRefCount();
+
+  bool HasKnownPositiveRefCount() const { return KnownPositiveRefCount; }
+
+  void SetSeq(Sequence NewSeq);
+
+  Sequence GetSeq() const { return static_cast<Sequence>(Seq); }
+
+  void ClearSequenceProgress() { ResetSequenceProgress(S_None); }
+
+  void ResetSequenceProgress(Sequence NewSeq);
+  void Merge(const PtrState &Other, bool TopDown);
+
+  void InsertCall(Instruction *I) { RRI.Calls.insert(I); }
+
+  void InsertReverseInsertPt(Instruction *I) { RRI.ReverseInsertPts.insert(I); }
+
+  void ClearReverseInsertPts() { RRI.ReverseInsertPts.clear(); }
+
+  bool HasReverseInsertPts() const { return !RRI.ReverseInsertPts.empty(); }
+
+  const RRInfo &GetRRInfo() const { return RRI; }
+};
+
+struct BottomUpPtrState : PtrState {
+  BottomUpPtrState() : PtrState() {}
+
+  /// (Re-)Initialize this bottom up pointer returning true if we detected a
+  /// pointer with nested releases.
+  bool InitBottomUp(ARCMDKindCache &Cache, Instruction *I);
+
+  /// Return true if this set of releases can be paired with a release. Modifies
+  /// state appropriately to reflect that the matching occured if it is
+  /// successful.
+  ///
+  /// It is assumed that one has already checked that the RCIdentity of the
+  /// retain and the RCIdentity of this ptr state are the same.
+  bool MatchWithRetain();
+
+  void HandlePotentialUse(BasicBlock *BB, Instruction *Inst, const Value *Ptr,
+                          ProvenanceAnalysis &PA, ARCInstKind Class);
+  bool HandlePotentialAlterRefCount(Instruction *Inst, const Value *Ptr,
+                                    ProvenanceAnalysis &PA, ARCInstKind Class);
+};
+
+struct TopDownPtrState : PtrState {
+  TopDownPtrState() : PtrState() {}
+
+  /// (Re-)Initialize this bottom up pointer returning true if we detected a
+  /// pointer with nested releases.
+  bool InitTopDown(ARCInstKind Kind, Instruction *I);
+
+  /// Return true if this set of retains can be paired with the given
+  /// release. Modifies state appropriately to reflect that the matching
+  /// occured.
+  bool MatchWithRelease(ARCMDKindCache &Cache, Instruction *Release);
+
+  void HandlePotentialUse(Instruction *Inst, const Value *Ptr,
+                          ProvenanceAnalysis &PA, ARCInstKind Class);
+
+  bool HandlePotentialAlterRefCount(Instruction *Inst, const Value *Ptr,
+                                    ProvenanceAnalysis &PA, ARCInstKind Class);
+};
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index 3d91984..d6fc916 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -32,19 +32,18 @@ using namespace llvm;
 STATISTIC(NumRemoved, "Number of instructions removed");
 
 namespace {
-  struct ADCE : public FunctionPass {
-    static char ID; // Pass identification, replacement for typeid
-    ADCE() : FunctionPass(ID) {
-      initializeADCEPass(*PassRegistry::getPassRegistry());
-    }
-
-    bool runOnFunction(Function& F) override;
+struct ADCE : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  ADCE() : FunctionPass(ID) {
+    initializeADCEPass(*PassRegistry::getPassRegistry());
+  }
 
-    void getAnalysisUsage(AnalysisUsage& AU) const override {
-      AU.setPreservesCFG();
-    }
+  bool runOnFunction(Function& F) override;
 
-  };
+  void getAnalysisUsage(AnalysisUsage& AU) const override {
+    AU.setPreservesCFG();
+  }
+};
 }
 
 char ADCE::ID = 0;
@@ -54,46 +53,45 @@ bool ADCE::runOnFunction(Function& F) {
   if (skipOptnoneFunction(F))
     return false;
 
-  SmallPtrSet<Instruction*, 128> alive;
-  SmallVector<Instruction*, 128> worklist;
+  SmallPtrSet<Instruction*, 128> Alive;
+  SmallVector<Instruction*, 128> Worklist;
 
   // Collect the set of "root" instructions that are known live.
-  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-    if (isa<TerminatorInst>(I.getInstructionIterator()) ||
-        isa<DbgInfoIntrinsic>(I.getInstructionIterator()) ||
-        isa<LandingPadInst>(I.getInstructionIterator()) ||
-        I->mayHaveSideEffects()) {
-      alive.insert(I.getInstructionIterator());
-      worklist.push_back(I.getInstructionIterator());
+  for (Instruction &I : inst_range(F)) {
+    if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+        isa<LandingPadInst>(I) || I.mayHaveSideEffects()) {
+      Alive.insert(&I);
+      Worklist.push_back(&I);
     }
+  }
 
   // Propagate liveness backwards to operands.
-  while (!worklist.empty()) {
-    Instruction* curr = worklist.pop_back_val();
-    for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
-         OI != OE; ++OI)
-      if (Instruction* Inst = dyn_cast<Instruction>(OI))
-        if (alive.insert(Inst).second)
-          worklist.push_back(Inst);
+  while (!Worklist.empty()) {
+    Instruction *Curr = Worklist.pop_back_val();
+    for (Use &OI : Curr->operands()) {
+      if (Instruction *Inst = dyn_cast<Instruction>(OI))
+        if (Alive.insert(Inst).second)
+          Worklist.push_back(Inst);
+    }
   }
 
   // The inverse of the live set is the dead set.  These are those instructions
   // which have no side effects and do not influence the control flow or return
   // value of the function, and may therefore be deleted safely.
-  // NOTE: We reuse the worklist vector here for memory efficiency.
-  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-    if (!alive.count(I.getInstructionIterator())) {
-      worklist.push_back(I.getInstructionIterator());
-      I->dropAllReferences();
+  // NOTE: We reuse the Worklist vector here for memory efficiency.
+  for (Instruction &I : inst_range(F)) {
+    if (!Alive.count(&I)) {
+      Worklist.push_back(&I);
+      I.dropAllReferences();
     }
+  }
 
-  for (SmallVectorImpl<Instruction *>::iterator I = worklist.begin(),
-       E = worklist.end(); I != E; ++I) {
+  for (Instruction *&I : Worklist) {
     ++NumRemoved;
-    (*I)->eraseFromParent();
+    I->eraseFromParent();
   }
 
-  return !worklist.empty();
+  return !Worklist.empty();
 }
 
 FunctionPass *llvm::createAggressiveDCEPass() {
diff --git a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index f48cefa..8918909 100644
--- a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -23,15 +23,15 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -50,15 +50,15 @@ struct AlignmentFromAssumptions : public FunctionPass {
     initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry());
   }
 
-  bool runOnFunction(Function &F);
+  bool runOnFunction(Function &F) override;
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<ScalarEvolution>();
     AU.addRequired<DominatorTreeWrapperPass>();
 
     AU.setPreservesCFG();
-    AU.addPreserved<LoopInfo>();
+    AU.addPreserved<LoopInfoWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<ScalarEvolution>();
   }
@@ -71,7 +71,6 @@ struct AlignmentFromAssumptions : public FunctionPass {
 
   ScalarEvolution *SE;
   DominatorTree *DT;
-  const DataLayout *DL;
 
   bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV,
                             const SCEV *&OffSCEV);
@@ -123,7 +122,7 @@ static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV,
 
     // If the displacement is not an exact multiple, but the remainder is a
     // constant, then return this remainder (but only if it is a power of 2).
-    uint64_t DiffUnitsAbs = abs64(DiffUnits);
+    uint64_t DiffUnitsAbs = std::abs(DiffUnits);
     if (isPowerOf2_64(DiffUnitsAbs))
       return (unsigned) DiffUnitsAbs;
   }
@@ -316,7 +315,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
       continue;
 
     if (Instruction *K = dyn_cast<Instruction>(J))
-      if (isValidAssumeForContext(ACall, K, DL, DT))
+      if (isValidAssumeForContext(ACall, K, DT))
         WorkList.push_back(K);
   }
 
@@ -400,7 +399,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
     Visited.insert(J);
     for (User *UJ : J->users()) {
       Instruction *K = cast<Instruction>(UJ);
-      if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DL, DT))
+      if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DT))
         WorkList.push_back(K);
     }
   }
@@ -413,8 +412,6 @@ bool AlignmentFromAssumptions::runOnFunction(Function &F) {
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   SE = &getAnalysis<ScalarEvolution>();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
 
   NewDestAlignments.clear();
   NewSrcAlignments.clear();
diff --git a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
new file mode 100644
index 0000000..09c605e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -0,0 +1,410 @@
+//===---- BDCE.cpp - Bit-tracking dead code elimination -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Bit-Tracking Dead Code Elimination pass. Some
+// instructions (shifts, some ands, ors, etc.) kill some of their input bits.
+// We track these dead bits and remove instructions that compute only these
+// dead bits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bdce"
+
+STATISTIC(NumRemoved, "Number of instructions removed (unused)");
+STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
+
+namespace {
+struct BDCE : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  BDCE() : FunctionPass(ID) {
+    initializeBDCEPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function& F) override;
+
+  void getAnalysisUsage(AnalysisUsage& AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+  }
+
+  void determineLiveOperandBits(const Instruction *UserI,
+                                const Instruction *I, unsigned OperandNo,
+                                const APInt &AOut, APInt &AB,
+                                APInt &KnownZero, APInt &KnownOne,
+                                APInt &KnownZero2, APInt &KnownOne2);
+
+  AssumptionCache *AC;
+  DominatorTree *DT;
+};
+}
+
+char BDCE::ID = 0;
+INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
+                    false, false)
+
+static bool isAlwaysLive(Instruction *I) {
+  return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+         isa<LandingPadInst>(I) || I->mayHaveSideEffects();
+}
+
+void BDCE::determineLiveOperandBits(const Instruction *UserI,
+                                    const Instruction *I, unsigned OperandNo,
+                                    const APInt &AOut, APInt &AB,
+                                    APInt &KnownZero, APInt &KnownOne,
+                                    APInt &KnownZero2, APInt &KnownOne2) {
+  unsigned BitWidth = AB.getBitWidth();
+
+  // We're called once per operand, but for some instructions, we need to
+  // compute known bits of both operands in order to determine the live bits of
+  // either (when both operands are instructions themselves). We don't,
+  // however, want to do this twice, so we cache the result in APInts that live
+  // in the caller. For the two-relevant-operands case, both operand values are
+  // provided here.
+  auto ComputeKnownBits =
+      [&](unsigned BitWidth, const Value *V1, const Value *V2) {
+        const DataLayout &DL = I->getModule()->getDataLayout();
+        KnownZero = APInt(BitWidth, 0);
+        KnownOne = APInt(BitWidth, 0);
+        computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
+                         AC, UserI, DT);
+
+        if (V2) {
+          KnownZero2 = APInt(BitWidth, 0);
+          KnownOne2 = APInt(BitWidth, 0);
+          computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
+                           0, AC, UserI, DT);
+        }
+      };
+
+  switch (UserI->getOpcode()) {
+  default: break;
+  case Instruction::Call:
+  case Instruction::Invoke:
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::bswap:
+        // The alive bits of the input are the swapped alive bits of
+        // the output.
+        AB = AOut.byteSwap();
+        break;
+      case Intrinsic::ctlz:
+        if (OperandNo == 0) {
+          // We need some output bits, so we need all bits of the
+          // input to the left of, and including, the leftmost bit
+          // known to be one.
+          ComputeKnownBits(BitWidth, I, nullptr);
+          AB = APInt::getHighBitsSet(BitWidth,
+                 std::min(BitWidth, KnownOne.countLeadingZeros()+1));
+        }
+        break;
+      case Intrinsic::cttz:
+        if (OperandNo == 0) {
+          // We need some output bits, so we need all bits of the
+          // input to the right of, and including, the rightmost bit
+          // known to be one.
+          ComputeKnownBits(BitWidth, I, nullptr);
+          AB = APInt::getLowBitsSet(BitWidth,
+                 std::min(BitWidth, KnownOne.countTrailingZeros()+1));
+        }
+        break;
+      }
+    break;
+  case Instruction::Add:
+  case Instruction::Sub:
+    // Find the highest live output bit. We don't need any more input
+    // bits than that (adds, and thus subtracts, ripple only to the
+    // left).
+    AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
+    break;
+  case Instruction::Shl:
+    if (OperandNo == 0)
+      if (ConstantInt *CI =
+            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+        AB = AOut.lshr(ShiftAmt);
+
+        // If the shift is nuw/nsw, then the high bits are not dead
+        // (because we've promised that they *must* be zero).
+        const ShlOperator *S = cast<ShlOperator>(UserI);
+        if (S->hasNoSignedWrap())
+          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+        else if (S->hasNoUnsignedWrap())
+          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+      }
+    break;
+  case Instruction::LShr:
+    if (OperandNo == 0)
+      if (ConstantInt *CI =
+            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+        AB = AOut.shl(ShiftAmt);
+
+        // If the shift is exact, then the low bits are not dead
+        // (they must be zero).
+        if (cast<LShrOperator>(UserI)->isExact())
+          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      }
+    break;
+  case Instruction::AShr:
+    if (OperandNo == 0)
+      if (ConstantInt *CI =
+            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+        AB = AOut.shl(ShiftAmt);
+        // Because the high input bit is replicated into the
+        // high-order bits of the result, if we need any of those
+        // bits, then we must keep the highest input bit.
+        if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
+            .getBoolValue())
+          AB.setBit(BitWidth-1);
+
+        // If the shift is exact, then the low bits are not dead
+        // (they must be zero).
+        if (cast<AShrOperator>(UserI)->isExact())
+          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      }
+    break;
+  case Instruction::And:
+    AB = AOut;
+
+    // For bits that are known zero, the corresponding bits in the
+    // other operand are dead (unless they're both zero, in which
+    // case they can't both be dead, so just mark the LHS bits as
+    // dead).
+    if (OperandNo == 0) {
+      ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+      AB &= ~KnownZero2;
+    } else {
+      if (!isa<Instruction>(UserI->getOperand(0)))
+        ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+      AB &= ~(KnownZero & ~KnownZero2);
+    }
+    break;
+  case Instruction::Or:
+    AB = AOut;
+
+    // For bits that are known one, the corresponding bits in the
+    // other operand are dead (unless they're both one, in which
+    // case they can't both be dead, so just mark the LHS bits as
+    // dead).
+    if (OperandNo == 0) {
+      ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+      AB &= ~KnownOne2;
+    } else {
+      if (!isa<Instruction>(UserI->getOperand(0)))
+        ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+      AB &= ~(KnownOne & ~KnownOne2);
+    }
+    break;
+  case Instruction::Xor:
+  case Instruction::PHI:
+    AB = AOut;
+    break;
+  case Instruction::Trunc:
+    AB = AOut.zext(BitWidth);
+    break;
+  case Instruction::ZExt:
+    AB = AOut.trunc(BitWidth);
+    break;
+  case Instruction::SExt:
+    AB = AOut.trunc(BitWidth);
+    // Because the high input bit is replicated into the
+    // high-order bits of the result, if we need any of those
+    // bits, then we must keep the highest input bit.
+    if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
+                                      AOut.getBitWidth() - BitWidth))
+        .getBoolValue())
+      AB.setBit(BitWidth-1);
+    break;
+  case Instruction::Select:
+    if (OperandNo != 0)
+      AB = AOut;
+    break;
+  }
+}
+
+bool BDCE::runOnFunction(Function& F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+  DenseMap<Instruction *, APInt> AliveBits;
+  SmallVector<Instruction*, 128> Worklist;
+
+  // The set of visited instructions (non-integer-typed only).
+  SmallPtrSet<Instruction*, 128> Visited;
+
+  // Collect the set of "root" instructions that are known live.
+  for (Instruction &I : inst_range(F)) {
+    if (!isAlwaysLive(&I))
+      continue;
+
+    DEBUG(dbgs() << "BDCE: Root: " << I << "\n");
+    // For integer-valued instructions, set up an initial empty set of alive
+    // bits and add the instruction to the work list. For other instructions
+    // add their operands to the work list (for integer values operands, mark
+    // all bits as live).
+    if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+      if (!AliveBits.count(&I)) {
+        AliveBits[&I] = APInt(IT->getBitWidth(), 0);
+        Worklist.push_back(&I);
+      }
+
+      continue;
+    }
+
+    // Non-integer-typed instructions...
+    for (Use &OI : I.operands()) {
+      if (Instruction *J = dyn_cast<Instruction>(OI)) {
+        if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
+          AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
+        Worklist.push_back(J);
+      }
+    }
+    // To save memory, we don't add I to the Visited set here. Instead, we
+    // check isAlwaysLive on every instruction when searching for dead
+    // instructions later (we need to check isAlwaysLive for the
+    // integer-typed instructions anyway).
+  }
+
+  // Propagate liveness backwards to operands.
+  while (!Worklist.empty()) {
+    Instruction *UserI = Worklist.pop_back_val();
+
+    DEBUG(dbgs() << "BDCE: Visiting: " << *UserI);
+    APInt AOut;
+    if (UserI->getType()->isIntegerTy()) {
+      AOut = AliveBits[UserI];
+      DEBUG(dbgs() << " Alive Out: " << AOut);
+    }
+    DEBUG(dbgs() << "\n");
+
+    if (!UserI->getType()->isIntegerTy())
+      Visited.insert(UserI);
+
+    APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
+    // Compute the set of alive bits for each operand. These are anded into the
+    // existing set, if any, and if that changes the set of alive bits, the
+    // operand is added to the work-list.
+    for (Use &OI : UserI->operands()) {
+      if (Instruction *I = dyn_cast<Instruction>(OI)) {
+        if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
+          unsigned BitWidth = IT->getBitWidth();
+          APInt AB = APInt::getAllOnesValue(BitWidth);
+          if (UserI->getType()->isIntegerTy() && !AOut &&
+              !isAlwaysLive(UserI)) {
+            AB = APInt(BitWidth, 0);
+          } else {
+            // If all bits of the output are dead, then all bits of the input 
+            // Bits of each operand that are used to compute alive bits of the
+            // output are alive, all others are dead.
+            determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
+                                     KnownZero, KnownOne,
+                                     KnownZero2, KnownOne2);
+          }
+
+          // If we've added to the set of alive bits (or the operand has not
+          // been previously visited), then re-queue the operand to be visited
+          // again.
+          APInt ABPrev(BitWidth, 0);
+          auto ABI = AliveBits.find(I);
+          if (ABI != AliveBits.end())
+            ABPrev = ABI->second;
+
+          APInt ABNew = AB | ABPrev;
+          if (ABNew != ABPrev || ABI == AliveBits.end()) {
+            AliveBits[I] = std::move(ABNew);
+            Worklist.push_back(I);
+          }
+        } else if (!Visited.count(I)) {
+          Worklist.push_back(I);
+        }
+      }
+    }
+  }
+
+  bool Changed = false;
+  // The inverse of the live set is the dead set.  These are those instructions
+  // which have no side effects and do not influence the control flow or return
+  // value of the function, and may therefore be deleted safely.
+  // NOTE: We reuse the Worklist vector here for memory efficiency.
+  for (Instruction &I : inst_range(F)) {
+    // For live instructions that have all dead bits, first make them dead by
+    // replacing all uses with something else. Then, if they don't need to
+    // remain live (because they have side effects, etc.) we can remove them.
+    if (I.getType()->isIntegerTy()) {
+      auto ABI = AliveBits.find(&I);
+      if (ABI != AliveBits.end()) {
+        if (ABI->second.getBoolValue())
+          continue;
+
+        DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
+        // FIXME: In theory we could substitute undef here instead of zero.
+        // This should be reconsidered once we settle on the semantics of
+        // undef, poison, etc.
+        Value *Zero = ConstantInt::get(I.getType(), 0);
+        ++NumSimplified;
+        I.replaceAllUsesWith(Zero);
+        Changed = true;
+      }
+    } else if (Visited.count(&I)) {
+      continue;
+    }
+
+    if (isAlwaysLive(&I))
+      continue;
+
+    Worklist.push_back(&I);
+    I.dropAllReferences();
+    Changed = true;
+  }
+
+  for (Instruction *&I : Worklist) {
+    ++NumRemoved;
+    I->eraseFromParent();
+  }
+
+  return Changed;
+}
+
+FunctionPass *llvm::createBitTrackingDCEPass() {
+  return new BDCE();
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 27c177a..4288742 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -43,6 +43,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include <tuple>
 
 using namespace llvm;
@@ -131,14 +132,14 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
     AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<TargetTransformInfo>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
   }
 
 private:
   /// \brief Initialize the pass.
   void setup(Function &Fn) {
     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-    TTI = &getAnalysis<TargetTransformInfo>();
+    TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
     Entry = &Fn.getEntryBlock();
   }
 
@@ -176,7 +177,7 @@ char ConstantHoisting::ID = 0;
 INITIALIZE_PASS_BEGIN(ConstantHoisting, "consthoist", "Constant Hoisting",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(ConstantHoisting, "consthoist", "Constant Hoisting",
                     false, false)
 
@@ -186,6 +187,9 @@ FunctionPass *llvm::createConstantHoistingPass() {
 
 /// \brief Perform the constant hoisting optimization for the given function.
 bool ConstantHoisting::runOnFunction(Function &Fn) {
+  if (skipOptnoneFunction(Fn))
+    return false;
+
   DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n");
   DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
index dd51ce1..c974ebb 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -22,11 +22,10 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/IR/Constant.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include <set>
 using namespace llvm;
 
@@ -45,7 +44,7 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
   };
 }
@@ -53,7 +52,7 @@ namespace {
 char ConstantPropagation::ID = 0;
 INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
                 "Simple constant propagation", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(ConstantPropagation, "constprop",
                 "Simple constant propagation", false, false)
 
@@ -68,9 +67,9 @@ bool ConstantPropagation::runOnFunction(Function &F) {
       WorkList.insert(&*i);
   }
   bool Changed = false;
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
-  TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  TargetLibraryInfo *TLI =
+      &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   while (!WorkList.empty()) {
     Instruction *I = *WorkList.begin();
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 5a3b5cf..d1302c6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -19,6 +19,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -102,32 +103,52 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
 
     Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB, P);
 
-    // Look if the incoming value is a select with a constant but LVI tells us
-    // that the incoming value can never be that constant. In that case replace
-    // the incoming value with the other value of the select. This often allows
-    // us to remove the select later.
+    // Look if the incoming value is a select with a scalar condition for which
+    // LVI can tells us the value. In that case replace the incoming value with
+    // the appropriate value of the select. This often allows us to remove the
+    // select later.
     if (!V) {
       SelectInst *SI = dyn_cast<SelectInst>(Incoming);
       if (!SI) continue;
 
-      Constant *C = dyn_cast<Constant>(SI->getFalseValue());
-      if (!C) continue;
+      Value *Condition = SI->getCondition();
+      if (!Condition->getType()->isVectorTy()) {
+        if (Constant *C = LVI->getConstantOnEdge(Condition, P->getIncomingBlock(i), BB, P)) {
+          if (C == ConstantInt::getTrue(Condition->getType())) {
+            V = SI->getTrueValue();
+          } else {
+            V = SI->getFalseValue();
+          }
+          // Once LVI learns to handle vector types, we could also add support
+          // for vector type constants that are not all zeroes or all ones.
+        }
+      }
 
-      if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
-                                  P->getIncomingBlock(i), BB, P) !=
-          LazyValueInfo::False)
-        continue;
+      // Look if the select has a constant but LVI tells us that the incoming
+      // value can never be that constant. In that case replace the incoming
+      // value with the other value of the select. This often allows us to
+      // remove the select later.
+      if (!V) {
+        Constant *C = dyn_cast<Constant>(SI->getFalseValue());
+        if (!C) continue;
+
+        if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
+              P->getIncomingBlock(i), BB, P) !=
+            LazyValueInfo::False)
+          continue;
+        V = SI->getTrueValue();
+      }
 
       DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n');
-      V = SI->getTrueValue();
     }
 
     P->setIncomingValue(i, V);
     Changed = true;
   }
 
-  // FIXME: Provide DL, TLI, DT, AT to SimplifyInstruction.
-  if (Value *V = SimplifyInstruction(P)) {
+  // FIXME: Provide TLI, DT, AT to SimplifyInstruction.
+  const DataLayout &DL = BB->getModule()->getDataLayout();
+  if (Value *V = SimplifyInstruction(P, DL)) {
     P->replaceAllUsesWith(V);
     P->eraseFromParent();
     Changed = true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index 99fac75..3b262a2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -21,7 +21,7 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -42,7 +42,8 @@ namespace {
     bool runOnBasicBlock(BasicBlock &BB) override {
       if (skipOptnoneFunction(BB))
         return false;
-      TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+      auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+      TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
       bool Changed = false;
       for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
         Instruction *Inst = DI++;
@@ -95,7 +96,8 @@ bool DCE::runOnFunction(Function &F) {
   if (skipOptnoneFunction(F))
     return false;
 
-  TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+  TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
 
   // Start out with all of the instructions in the worklist...
   std::vector<Instruction*> WorkList;
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a1ddc00..01952cf 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -33,7 +34,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -78,7 +79,8 @@ namespace {
     bool HandleFree(CallInst *F);
     bool handleEndBlock(BasicBlock &BB);
     void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
-                               SmallSetVector<Value*, 16> &DeadStackObjects);
+                               SmallSetVector<Value *, 16> &DeadStackObjects,
+                               const DataLayout &DL);
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
@@ -166,7 +168,7 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
       return true;
     }
   }
-  if (CallSite CS = I) {
+  if (auto CS = CallSite(I)) {
     if (Function *F = CS.getCalledFunction()) {
       if (TLI && TLI->has(LibFunc::strcpy) &&
           F->getName() == TLI->getName(LibFunc::strcpy)) {
@@ -194,18 +196,12 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
 /// describe the memory operations for this instruction.
 static AliasAnalysis::Location
 getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
-  const DataLayout *DL = AA.getDataLayout();
   if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
     return AA.getLocation(SI);
 
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
     // memcpy/memmove/memset.
     AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
-    // If we don't have target data around, an unknown size in Location means
-    // that we should use the size of the pointee type.  This isn't valid for
-    // memset/memcpy, which writes more than an i8.
-    if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr)
-      return AliasAnalysis::Location();
     return Loc;
   }
 
@@ -215,11 +211,6 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
   switch (II->getIntrinsicID()) {
   default: return AliasAnalysis::Location(); // Unhandled intrinsic.
   case Intrinsic::init_trampoline:
-    // If we don't have target data around, an unknown size in Location means
-    // that we should use the size of the pointee type.  This isn't valid for
-    // init.trampoline, which writes more than an i8.
-    if (!DL) return AliasAnalysis::Location();
-
     // FIXME: We don't know the size of the trampoline, so we can't really
     // handle it here.
     return AliasAnalysis::Location(II->getArgOperand(0));
@@ -271,7 +262,7 @@ static bool isRemovable(Instruction *I) {
     }
   }
 
-  if (CallSite CS = I)
+  if (auto CS = CallSite(I))
     return CS.getInstruction()->use_empty();
 
   return false;
@@ -315,15 +306,16 @@ static Value *getStoredPointerOperand(Instruction *I) {
     }
   }
 
-  CallSite CS = I;
+  CallSite CS(I);
   // All the supported functions so far happen to have dest as their first
   // argument.
   return CS.getArgument(0);
 }
 
-static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
+static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
+                               const TargetLibraryInfo *TLI) {
   uint64_t Size;
-  if (getObjectSize(V, Size, AA.getDataLayout(), AA.getTargetLibraryInfo()))
+  if (getObjectSize(V, Size, DL, TLI))
     return Size;
   return AliasAnalysis::UnknownSize;
 }
@@ -343,10 +335,9 @@ namespace {
 /// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
 static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
                                    const AliasAnalysis::Location &Earlier,
-                                   AliasAnalysis &AA,
-                                   int64_t &EarlierOff,
-                                   int64_t &LaterOff) {
-  const DataLayout *DL = AA.getDataLayout();
+                                   const DataLayout &DL,
+                                   const TargetLibraryInfo *TLI,
+                                   int64_t &EarlierOff, int64_t &LaterOff) {
   const Value *P1 = Earlier.Ptr->stripPointerCasts();
   const Value *P2 = Later.Ptr->stripPointerCasts();
 
@@ -367,7 +358,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
   // Otherwise, we have to have size information, and the later store has to be
   // larger than the earlier one.
   if (Later.Size == AliasAnalysis::UnknownSize ||
-      Earlier.Size == AliasAnalysis::UnknownSize || DL == nullptr)
+      Earlier.Size == AliasAnalysis::UnknownSize)
     return OverwriteUnknown;
 
   // Check to see if the later store is to the entire object (either a global,
@@ -382,7 +373,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
     return OverwriteUnknown;
 
   // If the "Later" store is to a recognizable object, get its size.
-  uint64_t ObjectSize = getPointerSize(UO2, AA);
+  uint64_t ObjectSize = getPointerSize(UO2, DL, TLI);
   if (ObjectSize != AliasAnalysis::UnknownSize)
     if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size)
       return OverwriteComplete;
@@ -560,8 +551,10 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
       if (isRemovable(DepWrite) &&
           !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
         int64_t InstWriteOffset, DepWriteOffset;
-        OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
-                                         DepWriteOffset, InstWriteOffset);
+        const DataLayout &DL = BB.getModule()->getDataLayout();
+        OverwriteResult OR =
+            isOverwrite(Loc, DepLoc, DL, AA->getTargetLibraryInfo(),
+                        DepWriteOffset, InstWriteOffset);
         if (OR == OverwriteComplete) {
           DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
                 << *DepWrite << "\n  KILLER: " << *Inst << '\n');
@@ -655,6 +648,7 @@ bool DSE::HandleFree(CallInst *F) {
   AliasAnalysis::Location Loc = AliasAnalysis::Location(F->getOperand(0));
   SmallVector<BasicBlock *, 16> Blocks;
   Blocks.push_back(F->getParent());
+  const DataLayout &DL = F->getModule()->getDataLayout();
 
   while (!Blocks.empty()) {
     BasicBlock *BB = Blocks.pop_back_val();
@@ -668,7 +662,7 @@ bool DSE::HandleFree(CallInst *F) {
         break;
 
       Value *DepPointer =
-        GetUnderlyingObject(getStoredPointerOperand(Dependency));
+          GetUnderlyingObject(getStoredPointerOperand(Dependency), DL);
 
       // Check for aliasing.
       if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
@@ -728,6 +722,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     if (AI->hasByValOrInAllocaAttr())
       DeadStackObjects.insert(AI);
 
+  const DataLayout &DL = BB.getModule()->getDataLayout();
+
   // Scan the basic block backwards
   for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
     --BBI;
@@ -736,7 +732,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
       // See through pointer-to-pointer bitcasts
       SmallVector<Value *, 4> Pointers;
-      GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers);
+      GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers, DL);
 
       // Stores to stack values are valid candidates for removal.
       bool AllDead = true;
@@ -784,7 +780,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       continue;
     }
 
-    if (CallSite CS = cast<Value>(BBI)) {
+    if (auto CS = CallSite(BBI)) {
       // Remove allocation function calls from the list of dead stack objects; 
       // there can't be any references before the definition.
       if (isAllocLikeFn(BBI, TLI))
@@ -799,8 +795,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       // the call is live.
       DeadStackObjects.remove_if([&](Value *I) {
         // See if the call site touches the value.
-        AliasAnalysis::ModRefResult A =
-            AA->getModRefInfo(CS, I, getPointerSize(I, *AA));
+        AliasAnalysis::ModRefResult A = AA->getModRefInfo(
+            CS, I, getPointerSize(I, DL, AA->getTargetLibraryInfo()));
 
         return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
       });
@@ -835,7 +831,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 
     // Remove any allocas from the DeadPointer set that are loaded, as this
     // makes any stores above the access live.
-    RemoveAccessedObjects(LoadedLoc, DeadStackObjects);
+    RemoveAccessedObjects(LoadedLoc, DeadStackObjects, DL);
 
     // If all of the allocas were clobbered by the access then we're not going
     // to find anything else to process.
@@ -850,8 +846,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 /// of the stack objects in the DeadStackObjects set.  If so, they become live
 /// because the location is being loaded.
 void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
-                                SmallSetVector<Value*, 16> &DeadStackObjects) {
-  const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr);
+                                SmallSetVector<Value *, 16> &DeadStackObjects,
+                                const DataLayout &DL) {
+  const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL);
 
   // A constant can't be in the dead pointer set.
   if (isa<Constant>(UnderlyingPointer))
@@ -867,7 +864,8 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
   // Remove objects that could alias LoadedLoc.
   DeadStackObjects.remove_if([&](Value *I) {
     // See if the loaded location could alias the stack location.
-    AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA));
+    AliasAnalysis::Location StackLoc(
+        I, getPointerSize(I, DL, AA->getTargetLibraryInfo()));
     return !AA->isNoAlias(StackLoc, LoadedLoc);
   });
 }
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 969b9a8..d536a93 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -12,12 +12,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/EarlyCSE.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
@@ -26,7 +28,8 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <deque>
 using namespace llvm;
@@ -40,49 +43,44 @@ STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
 STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
 STATISTIC(NumDSE,      "Number of trivial dead stores removed");
 
-static unsigned getHash(const void *V) {
-  return DenseMapInfo<const void*>::getHashValue(V);
-}
-
 //===----------------------------------------------------------------------===//
 // SimpleValue
 //===----------------------------------------------------------------------===//
 
 namespace {
-  /// SimpleValue - Instances of this struct represent available values in the
-  /// scoped hash table.
-  struct SimpleValue {
-    Instruction *Inst;
+/// \brief Struct representing the available values in the scoped hash table.
+struct SimpleValue {
+  Instruction *Inst;
 
-    SimpleValue(Instruction *I) : Inst(I) {
-      assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
-    }
+  SimpleValue(Instruction *I) : Inst(I) {
+    assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+  }
 
-    bool isSentinel() const {
-      return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
-             Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
-    }
+  bool isSentinel() const {
+    return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
+           Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
+  }
 
-    static bool canHandle(Instruction *Inst) {
-      // This can only handle non-void readnone functions.
-      if (CallInst *CI = dyn_cast<CallInst>(Inst))
-        return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
-      return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
-             isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
-             isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
-             isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
-             isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
-    }
-  };
+  static bool canHandle(Instruction *Inst) {
+    // This can only handle non-void readnone functions.
+    if (CallInst *CI = dyn_cast<CallInst>(Inst))
+      return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+    return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
+           isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
+           isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+           isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+           isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
+  }
+};
 }
 
 namespace llvm {
-template<> struct DenseMapInfo<SimpleValue> {
+template <> struct DenseMapInfo<SimpleValue> {
   static inline SimpleValue getEmptyKey() {
-    return DenseMapInfo<Instruction*>::getEmptyKey();
+    return DenseMapInfo<Instruction *>::getEmptyKey();
   }
   static inline SimpleValue getTombstoneKey() {
-    return DenseMapInfo<Instruction*>::getTombstoneKey();
+    return DenseMapInfo<Instruction *>::getTombstoneKey();
   }
   static unsigned getHashValue(SimpleValue Val);
   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
@@ -92,7 +90,7 @@ template<> struct DenseMapInfo<SimpleValue> {
 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
   Instruction *Inst = Val.Inst;
   // Hash in all of the operands as pointers.
-  if (BinaryOperator* BinOp = dyn_cast<BinaryOperator>(Inst)) {
+  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
     Value *LHS = BinOp->getOperand(0);
     Value *RHS = BinOp->getOperand(1);
     if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
@@ -101,8 +99,9 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
     if (isa<OverflowingBinaryOperator>(BinOp)) {
       // Hash the overflow behavior
       unsigned Overflow =
-        BinOp->hasNoSignedWrap()   * OverflowingBinaryOperator::NoSignedWrap |
-        BinOp->hasNoUnsignedWrap() * OverflowingBinaryOperator::NoUnsignedWrap;
+          BinOp->hasNoSignedWrap() * OverflowingBinaryOperator::NoSignedWrap |
+          BinOp->hasNoUnsignedWrap() *
+              OverflowingBinaryOperator::NoUnsignedWrap;
       return hash_combine(BinOp->getOpcode(), Overflow, LHS, RHS);
     }
 
@@ -135,12 +134,13 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
   assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
           isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
           isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
-          isa<ShuffleVectorInst>(Inst)) && "Invalid/unknown instruction");
+          isa<ShuffleVectorInst>(Inst)) &&
+         "Invalid/unknown instruction");
 
   // Mix in the opcode.
-  return hash_combine(Inst->getOpcode(),
-                      hash_combine_range(Inst->value_op_begin(),
-                                         Inst->value_op_end()));
+  return hash_combine(
+      Inst->getOpcode(),
+      hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
 }
 
 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
@@ -149,22 +149,24 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
   if (LHS.isSentinel() || RHS.isSentinel())
     return LHSI == RHSI;
 
-  if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
-  if (LHSI->isIdenticalTo(RHSI)) return true;
+  if (LHSI->getOpcode() != RHSI->getOpcode())
+    return false;
+  if (LHSI->isIdenticalTo(RHSI))
+    return true;
 
   // If we're not strictly identical, we still might be a commutable instruction
   if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
     if (!LHSBinOp->isCommutative())
       return false;
 
-    assert(isa<BinaryOperator>(RHSI)
-           && "same opcode, but different instruction type?");
+    assert(isa<BinaryOperator>(RHSI) &&
+           "same opcode, but different instruction type?");
     BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
 
     // Check overflow attributes
     if (isa<OverflowingBinaryOperator>(LHSBinOp)) {
-      assert(isa<OverflowingBinaryOperator>(RHSBinOp)
-             && "same opcode, but different operator type?");
+      assert(isa<OverflowingBinaryOperator>(RHSBinOp) &&
+             "same opcode, but different operator type?");
       if (LHSBinOp->hasNoUnsignedWrap() != RHSBinOp->hasNoUnsignedWrap() ||
           LHSBinOp->hasNoSignedWrap() != RHSBinOp->hasNoSignedWrap())
         return false;
@@ -172,16 +174,16 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
 
     // Commuted equality
     return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
-      LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
+           LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
   }
   if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
-    assert(isa<CmpInst>(RHSI)
-           && "same opcode, but different instruction type?");
+    assert(isa<CmpInst>(RHSI) &&
+           "same opcode, but different instruction type?");
     CmpInst *RHSCmp = cast<CmpInst>(RHSI);
     // Commuted equality
     return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
-      LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
-      LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
+           LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
+           LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
   }
 
   return false;
@@ -192,57 +194,52 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
 //===----------------------------------------------------------------------===//
 
 namespace {
-  /// CallValue - Instances of this struct represent available call values in
-  /// the scoped hash table.
-  struct CallValue {
-    Instruction *Inst;
+/// \brief Struct representing the available call values in the scoped hash
+/// table.
+struct CallValue {
+  Instruction *Inst;
 
-    CallValue(Instruction *I) : Inst(I) {
-      assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
-    }
+  CallValue(Instruction *I) : Inst(I) {
+    assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+  }
 
-    bool isSentinel() const {
-      return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
-             Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
-    }
+  bool isSentinel() const {
+    return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
+           Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
+  }
 
-    static bool canHandle(Instruction *Inst) {
-      // Don't value number anything that returns void.
-      if (Inst->getType()->isVoidTy())
-        return false;
+  static bool canHandle(Instruction *Inst) {
+    // Don't value number anything that returns void.
+    if (Inst->getType()->isVoidTy())
+      return false;
 
-      CallInst *CI = dyn_cast<CallInst>(Inst);
-      if (!CI || !CI->onlyReadsMemory())
-        return false;
-      return true;
-    }
-  };
+    CallInst *CI = dyn_cast<CallInst>(Inst);
+    if (!CI || !CI->onlyReadsMemory())
+      return false;
+    return true;
+  }
+};
 }
 
 namespace llvm {
-  template<> struct DenseMapInfo<CallValue> {
-    static inline CallValue getEmptyKey() {
-      return DenseMapInfo<Instruction*>::getEmptyKey();
-    }
-    static inline CallValue getTombstoneKey() {
-      return DenseMapInfo<Instruction*>::getTombstoneKey();
-    }
-    static unsigned getHashValue(CallValue Val);
-    static bool isEqual(CallValue LHS, CallValue RHS);
-  };
+template <> struct DenseMapInfo<CallValue> {
+  static inline CallValue getEmptyKey() {
+    return DenseMapInfo<Instruction *>::getEmptyKey();
+  }
+  static inline CallValue getTombstoneKey() {
+    return DenseMapInfo<Instruction *>::getTombstoneKey();
+  }
+  static unsigned getHashValue(CallValue Val);
+  static bool isEqual(CallValue LHS, CallValue RHS);
+};
 }
+
 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
   Instruction *Inst = Val.Inst;
-  // Hash in all of the operands as pointers.
-  unsigned Res = 0;
-  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
-    assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
-           "Cannot value number calls with metadata operands");
-    Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
-  }
-
-  // Mix in the opcode.
-  return (Res << 1) ^ Inst->getOpcode();
+  // Hash all of the operands as pointers and mix in the opcode.
+  return hash_combine(
+      Inst->getOpcode(),
+      hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
 }
 
 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
@@ -252,103 +249,104 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
   return LHSI->isIdenticalTo(RHSI);
 }
 
-
 //===----------------------------------------------------------------------===//
-// EarlyCSE pass.
+// EarlyCSE implementation
 //===----------------------------------------------------------------------===//
 
 namespace {
-
-/// EarlyCSE - This pass does a simple depth-first walk over the dominator
-/// tree, eliminating trivially redundant instructions and using instsimplify
-/// to canonicalize things as it goes.  It is intended to be fast and catch
-/// obvious cases so that instcombine and other passes are more effective.  It
-/// is expected that a later pass of GVN will catch the interesting/hard
-/// cases.
-class EarlyCSE : public FunctionPass {
+/// \brief A simple and fast domtree-based CSE pass.
+///
+/// This pass does a simple depth-first walk over the dominator tree,
+/// eliminating trivially redundant instructions and using instsimplify to
+/// canonicalize things as it goes. It is intended to be fast and catch obvious
+/// cases so that instcombine and other passes are more effective. It is
+/// expected that a later pass of GVN will catch the interesting/hard cases.
+class EarlyCSE {
 public:
-  const DataLayout *DL;
-  const TargetLibraryInfo *TLI;
-  DominatorTree *DT;
-  AssumptionCache *AC;
-  typedef RecyclingAllocator<BumpPtrAllocator,
-                      ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
-  typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
+  Function &F;
+  const TargetLibraryInfo &TLI;
+  const TargetTransformInfo &TTI;
+  DominatorTree &DT;
+  AssumptionCache &AC;
+  typedef RecyclingAllocator<
+      BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value *>> AllocatorTy;
+  typedef ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
                           AllocatorTy> ScopedHTType;
 
-  /// AvailableValues - This scoped hash table contains the current values of
-  /// all of our simple scalar expressions.  As we walk down the domtree, we
-  /// look to see if instructions are in this: if so, we replace them with what
-  /// we find, otherwise we insert them so that dominated values can succeed in
-  /// their lookup.
-  ScopedHTType *AvailableValues;
-
-  /// AvailableLoads - This scoped hash table contains the current values
-  /// of loads.  This allows us to get efficient access to dominating loads when
-  /// we have a fully redundant load.  In addition to the most recent load, we
-  /// keep track of a generation count of the read, which is compared against
-  /// the current generation count.  The current generation count is
-  /// incremented after every possibly writing memory operation, which ensures
-  /// that we only CSE loads with other loads that have no intervening store.
-  typedef RecyclingAllocator<BumpPtrAllocator,
-    ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator;
-  typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
-                          DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
-  LoadHTType *AvailableLoads;
-
-  /// AvailableCalls - This scoped hash table contains the current values
-  /// of read-only call values.  It uses the same generation count as loads.
-  typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
-  CallHTType *AvailableCalls;
-
-  /// CurrentGeneration - This is the current generation of the memory value.
+  /// \brief A scoped hash table of the current values of all of our simple
+  /// scalar expressions.
+  ///
+  /// As we walk down the domtree, we look to see if instructions are in this:
+  /// if so, we replace them with what we find, otherwise we insert them so
+  /// that dominated values can succeed in their lookup.
+  ScopedHTType AvailableValues;
+
+  /// \brief A scoped hash table of the current values of loads.
+  ///
+  /// This allows us to get efficient access to dominating loads when we have
+  /// a fully redundant load.  In addition to the most recent load, we keep
+  /// track of a generation count of the read, which is compared against the
+  /// current generation count.  The current generation count is incremented
+  /// after every possibly writing memory operation, which ensures that we only
+  /// CSE loads with other loads that have no intervening store.
+  typedef RecyclingAllocator<
+      BumpPtrAllocator,
+      ScopedHashTableVal<Value *, std::pair<Value *, unsigned>>>
+      LoadMapAllocator;
+  typedef ScopedHashTable<Value *, std::pair<Value *, unsigned>,
+                          DenseMapInfo<Value *>, LoadMapAllocator> LoadHTType;
+  LoadHTType AvailableLoads;
+
+  /// \brief A scoped hash table of the current values of read-only call
+  /// values.
+  ///
+  /// It uses the same generation count as loads.
+  typedef ScopedHashTable<CallValue, std::pair<Value *, unsigned>> CallHTType;
+  CallHTType AvailableCalls;
+
+  /// \brief This is the current generation of the memory value.
   unsigned CurrentGeneration;
 
-  static char ID;
-  explicit EarlyCSE() : FunctionPass(ID) {
-    initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
-  }
+  /// \brief Set up the EarlyCSE runner for a particular function.
+  EarlyCSE(Function &F, const TargetLibraryInfo &TLI,
+           const TargetTransformInfo &TTI, DominatorTree &DT,
+           AssumptionCache &AC)
+      : F(F), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
 
-  bool runOnFunction(Function &F) override;
+  bool run();
 
 private:
-
-  // NodeScope - almost a POD, but needs to call the constructors for the
-  // scoped hash tables so that a new scope gets pushed on. These are RAII so
-  // that the scope gets popped when the NodeScope is destroyed.
+  // Almost a POD, but needs to call the constructors for the scoped hash
+  // tables so that a new scope gets pushed on. These are RAII so that the
+  // scope gets popped when the NodeScope is destroyed.
   class NodeScope {
-   public:
-    NodeScope(ScopedHTType *availableValues,
-              LoadHTType *availableLoads,
-              CallHTType *availableCalls) :
-        Scope(*availableValues),
-        LoadScope(*availableLoads),
-        CallScope(*availableCalls) {}
-
-   private:
-    NodeScope(const NodeScope&) LLVM_DELETED_FUNCTION;
-    void operator=(const NodeScope&) LLVM_DELETED_FUNCTION;
+  public:
+    NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
+              CallHTType &AvailableCalls)
+        : Scope(AvailableValues), LoadScope(AvailableLoads),
+          CallScope(AvailableCalls) {}
+
+  private:
+    NodeScope(const NodeScope &) = delete;
+    void operator=(const NodeScope &) = delete;
 
     ScopedHTType::ScopeTy Scope;
     LoadHTType::ScopeTy LoadScope;
     CallHTType::ScopeTy CallScope;
   };
 
-  // StackNode - contains all the needed information to create a stack for
-  // doing a depth first tranversal of the tree. This includes scopes for
-  // values, loads, and calls as well as the generation. There is a child
-  // iterator so that the children do not need to be store spearately.
+  // Contains all the needed information to create a stack for doing a depth
+  // first tranversal of the tree. This includes scopes for values, loads, and
+  // calls as well as the generation. There is a child iterator so that the
+  // children do not need to be store spearately.
   class StackNode {
-   public:
-    StackNode(ScopedHTType *availableValues,
-              LoadHTType *availableLoads,
-              CallHTType *availableCalls,
-              unsigned cg, DomTreeNode *n,
-              DomTreeNode::iterator child, DomTreeNode::iterator end) :
-        CurrentGeneration(cg), ChildGeneration(cg), Node(n),
-        ChildIter(child), EndIter(end),
-        Scopes(availableValues, availableLoads, availableCalls),
-        Processed(false) {}
+  public:
+    StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
+              CallHTType &AvailableCalls, unsigned cg, DomTreeNode *n,
+              DomTreeNode::iterator child, DomTreeNode::iterator end)
+        : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
+          EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableCalls),
+          Processed(false) {}
 
     // Accessors.
     unsigned currentGeneration() { return CurrentGeneration; }
@@ -365,9 +363,9 @@ private:
     bool isProcessed() { return Processed; }
     void process() { Processed = true; }
 
-   private:
-    StackNode(const StackNode&) LLVM_DELETED_FUNCTION;
-    void operator=(const StackNode&) LLVM_DELETED_FUNCTION;
+  private:
+    StackNode(const StackNode &) = delete;
+    void operator=(const StackNode &) = delete;
 
     // Members.
     unsigned CurrentGeneration;
@@ -379,31 +377,78 @@ private:
     bool Processed;
   };
 
+  /// \brief Wrapper class to handle memory instructions, including loads,
+  /// stores and intrinsic loads and stores defined by the target.
+  class ParseMemoryInst {
+  public:
+    ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
+        : Load(false), Store(false), Vol(false), MayReadFromMemory(false),
+          MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) {
+      MayReadFromMemory = Inst->mayReadFromMemory();
+      MayWriteToMemory = Inst->mayWriteToMemory();
+      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+        MemIntrinsicInfo Info;
+        if (!TTI.getTgtMemIntrinsic(II, Info))
+          return;
+        if (Info.NumMemRefs == 1) {
+          Store = Info.WriteMem;
+          Load = Info.ReadMem;
+          MatchingId = Info.MatchingId;
+          MayReadFromMemory = Info.ReadMem;
+          MayWriteToMemory = Info.WriteMem;
+          Vol = Info.Vol;
+          Ptr = Info.PtrVal;
+        }
+      } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        Load = true;
+        Vol = !LI->isSimple();
+        Ptr = LI->getPointerOperand();
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        Store = true;
+        Vol = !SI->isSimple();
+        Ptr = SI->getPointerOperand();
+      }
+    }
+    bool isLoad() { return Load; }
+    bool isStore() { return Store; }
+    bool isVolatile() { return Vol; }
+    bool isMatchingMemLoc(const ParseMemoryInst &Inst) {
+      return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId;
+    }
+    bool isValid() { return Ptr != nullptr; }
+    int getMatchingId() { return MatchingId; }
+    Value *getPtr() { return Ptr; }
+    bool mayReadFromMemory() { return MayReadFromMemory; }
+    bool mayWriteToMemory() { return MayWriteToMemory; }
+
+  private:
+    bool Load;
+    bool Store;
+    bool Vol;
+    bool MayReadFromMemory;
+    bool MayWriteToMemory;
+    // For regular (non-intrinsic) loads/stores, this is set to -1. For
+    // intrinsic loads/stores, the id is retrieved from the corresponding
+    // field in the MemIntrinsicInfo structure.  That field contains
+    // non-negative values only.
+    int MatchingId;
+    Value *Ptr;
+  };
+
   bool processNode(DomTreeNode *Node);
 
-  // This transformation requires dominator postdominator info
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<AssumptionCacheTracker>();
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<TargetLibraryInfo>();
-    AU.setPreservesCFG();
+  Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
+    if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+      return LI;
+    else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+      return SI->getValueOperand();
+    assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
+    return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
+                                                 ExpectedType);
   }
 };
 }
 
-char EarlyCSE::ID = 0;
-
-// createEarlyCSEPass - The public interface to this file.
-FunctionPass *llvm::createEarlyCSEPass() {
-  return new EarlyCSE();
-}
-
-INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
-
 bool EarlyCSE::processNode(DomTreeNode *Node) {
   BasicBlock *BB = Node->getBlock();
 
@@ -416,21 +461,46 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
   if (!BB->getSinglePredecessor())
     ++CurrentGeneration;
 
+  // If this node has a single predecessor which ends in a conditional branch,
+  // we can infer the value of the branch condition given that we took this
+  // path.  We need the single predeccesor to ensure there's not another path
+  // which reaches this block where the condition might hold a different
+  // value.  Since we're adding this to the scoped hash table (like any other
+  // def), it will have been popped if we encounter a future merge block.
+  if (BasicBlock *Pred = BB->getSinglePredecessor())
+    if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
+      if (BI->isConditional())
+        if (auto *CondInst = dyn_cast<Instruction>(BI->getCondition()))
+          if (SimpleValue::canHandle(CondInst)) {
+            assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
+            auto *ConditionalConstant = (BI->getSuccessor(0) == BB) ?
+              ConstantInt::getTrue(BB->getContext()) :
+              ConstantInt::getFalse(BB->getContext());
+            AvailableValues.insert(CondInst, ConditionalConstant);
+            DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
+                  << CondInst->getName() << "' as " << *ConditionalConstant
+                  << " in " << BB->getName() << "\n");
+            // Replace all dominated uses with the known value
+            replaceDominatedUsesWith(CondInst, ConditionalConstant, DT,
+                                     BasicBlockEdge(Pred, BB));
+          }
+
   /// LastStore - Keep track of the last non-volatile store that we saw... for
   /// as long as there in no instruction that reads memory.  If we see a store
   /// to the same location, we delete the dead store.  This zaps trivial dead
   /// stores which can occur in bitfield code among other things.
-  StoreInst *LastStore = nullptr;
+  Instruction *LastStore = nullptr;
 
   bool Changed = false;
+  const DataLayout &DL = BB->getModule()->getDataLayout();
 
   // See if any instructions in the block can be eliminated.  If so, do it.  If
   // not, add them to AvailableValues.
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
     Instruction *Inst = I++;
 
     // Dead instructions should just be removed.
-    if (isInstructionTriviallyDead(Inst, TLI)) {
+    if (isInstructionTriviallyDead(Inst, &TLI)) {
       DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
       Inst->eraseFromParent();
       Changed = true;
@@ -449,7 +519,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
 
     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
     // its simpler value.
-    if (Value *V = SimplifyInstruction(Inst, DL, TLI, DT, AC)) {
+    if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
       DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
       Inst->replaceAllUsesWith(V);
       Inst->eraseFromParent();
@@ -461,7 +531,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
     // If this is a simple instruction that we can value number, process it.
     if (SimpleValue::canHandle(Inst)) {
       // See if the instruction has an available value.  If so, use it.
-      if (Value *V = AvailableValues->lookup(Inst)) {
+      if (Value *V = AvailableValues.lookup(Inst)) {
         DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V << '\n');
         Inst->replaceAllUsesWith(V);
         Inst->eraseFromParent();
@@ -471,14 +541,15 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       }
 
       // Otherwise, just remember that this value is available.
-      AvailableValues->insert(Inst, Inst);
+      AvailableValues.insert(Inst, Inst);
       continue;
     }
 
+    ParseMemoryInst MemInst(Inst, TTI);
     // If this is a non-volatile load, process it.
-    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+    if (MemInst.isValid() && MemInst.isLoad()) {
       // Ignore volatile loads.
-      if (!LI->isSimple()) {
+      if (MemInst.isVolatile()) {
         LastStore = nullptr;
         // Don't CSE across synchronization boundaries.
         if (Inst->mayWriteToMemory())
@@ -488,38 +559,48 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
 
       // If we have an available version of this load, and if it is the right
       // generation, replace this instruction.
-      std::pair<Value*, unsigned> InVal =
-        AvailableLoads->lookup(Inst->getOperand(0));
+      std::pair<Value *, unsigned> InVal =
+          AvailableLoads.lookup(MemInst.getPtr());
       if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
-        DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << "  to: "
-              << *InVal.first << '\n');
-        if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
-        Inst->eraseFromParent();
-        Changed = true;
-        ++NumCSELoad;
-        continue;
+        Value *Op = getOrCreateResult(InVal.first, Inst->getType());
+        if (Op != nullptr) {
+          DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
+                       << "  to: " << *InVal.first << '\n');
+          if (!Inst->use_empty())
+            Inst->replaceAllUsesWith(Op);
+          Inst->eraseFromParent();
+          Changed = true;
+          ++NumCSELoad;
+          continue;
+        }
       }
 
       // Otherwise, remember that we have this instruction.
-      AvailableLoads->insert(Inst->getOperand(0),
-                          std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+      AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
+                                                  Inst, CurrentGeneration));
       LastStore = nullptr;
       continue;
     }
 
     // If this instruction may read from memory, forget LastStore.
-    if (Inst->mayReadFromMemory())
+    // Load/store intrinsics will indicate both a read and a write to
+    // memory.  The target may override this (e.g. so that a store intrinsic
+    // does not read  from memory, and thus will be treated the same as a
+    // regular store for commoning purposes).
+    if (Inst->mayReadFromMemory() &&
+        !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
       LastStore = nullptr;
 
     // If this is a read-only call, process it.
     if (CallValue::canHandle(Inst)) {
       // If we have an available version of this call, and if it is the right
       // generation, replace this instruction.
-      std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
+      std::pair<Value *, unsigned> InVal = AvailableCalls.lookup(Inst);
       if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
-        DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << "  to: "
-                     << *InVal.first << '\n');
-        if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+        DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
+                     << "  to: " << *InVal.first << '\n');
+        if (!Inst->use_empty())
+          Inst->replaceAllUsesWith(InVal.first);
         Inst->eraseFromParent();
         Changed = true;
         ++NumCSECall;
@@ -527,8 +608,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       }
 
       // Otherwise, remember that we have this instruction.
-      AvailableCalls->insert(Inst,
-                         std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+      AvailableCalls.insert(
+          Inst, std::pair<Value *, unsigned>(Inst, CurrentGeneration));
       continue;
     }
 
@@ -538,17 +619,19 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
     if (Inst->mayWriteToMemory()) {
       ++CurrentGeneration;
 
-      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      if (MemInst.isValid() && MemInst.isStore()) {
         // We do a trivial form of DSE if there are two stores to the same
         // location with no intervening loads.  Delete the earlier store.
-        if (LastStore &&
-            LastStore->getPointerOperand() == SI->getPointerOperand()) {
-          DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << "  due to: "
-                       << *Inst << '\n');
-          LastStore->eraseFromParent();
-          Changed = true;
-          ++NumDSE;
-          LastStore = nullptr;
+        if (LastStore) {
+          ParseMemoryInst LastStoreMemInst(LastStore, TTI);
+          if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
+            DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
+                         << "  due to: " << *Inst << '\n');
+            LastStore->eraseFromParent();
+            Changed = true;
+            ++NumDSE;
+            LastStore = nullptr;
+          }
           // fallthrough - we can exploit information about this store
         }
 
@@ -557,12 +640,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
         // version of the pointer.  It is safe to forward from volatile stores
         // to non-volatile loads, so we don't have to check for volatility of
         // the store.
-        AvailableLoads->insert(SI->getPointerOperand(),
-         std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
+        AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
+                                                    Inst, CurrentGeneration));
 
         // Remember that this was the last store we saw for DSE.
-        if (SI->isSimple())
-          LastStore = SI;
+        if (!MemInst.isVolatile())
+          LastStore = Inst;
       }
     }
   }
@@ -570,40 +653,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
   return Changed;
 }
 
-
-bool EarlyCSE::runOnFunction(Function &F) {
-  if (skipOptnoneFunction(F))
-    return false;
-
-  // Note, deque is being used here because there is significant performance gains
-  // over vector when the container becomes very large due to the specific access
-  // patterns. For more information see the mailing list discussion on this:
+bool EarlyCSE::run() {
+  // Note, deque is being used here because there is significant performance
+  // gains over vector when the container becomes very large due to the
+  // specific access patterns. For more information see the mailing list
+  // discussion on this:
   // http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
   std::deque<StackNode *> nodesToProcess;
 
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-  TLI = &getAnalysis<TargetLibraryInfo>();
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-
-  // Tables that the pass uses when walking the domtree.
-  ScopedHTType AVTable;
-  AvailableValues = &AVTable;
-  LoadHTType LoadTable;
-  AvailableLoads = &LoadTable;
-  CallHTType CallTable;
-  AvailableCalls = &CallTable;
-
-  CurrentGeneration = 0;
   bool Changed = false;
 
   // Process the root node.
-  nodesToProcess.push_back(
-      new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
-                    CurrentGeneration, DT->getRootNode(),
-                    DT->getRootNode()->begin(),
-                    DT->getRootNode()->end()));
+  nodesToProcess.push_back(new StackNode(
+      AvailableValues, AvailableLoads, AvailableCalls, CurrentGeneration,
+      DT.getRootNode(), DT.getRootNode()->begin(), DT.getRootNode()->end()));
 
   // Save the current generation.
   unsigned LiveOutGeneration = CurrentGeneration;
@@ -627,11 +690,9 @@ bool EarlyCSE::runOnFunction(Function &F) {
       // Push the next child onto the stack.
       DomTreeNode *child = NodeToProcess->nextChild();
       nodesToProcess.push_back(
-          new StackNode(AvailableValues,
-                        AvailableLoads,
-                        AvailableCalls,
-                        NodeToProcess->childGeneration(), child,
-                        child->begin(), child->end()));
+          new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
+                        NodeToProcess->childGeneration(), child, child->begin(),
+                        child->end()));
     } else {
       // It has been processed, and there are no more children to process,
       // so delete it and pop it off the stack.
@@ -645,3 +706,74 @@ bool EarlyCSE::runOnFunction(Function &F) {
 
   return Changed;
 }
+
+PreservedAnalyses EarlyCSEPass::run(Function &F,
+                                    AnalysisManager<Function> *AM) {
+  auto &TLI = AM->getResult<TargetLibraryAnalysis>(F);
+  auto &TTI = AM->getResult<TargetIRAnalysis>(F);
+  auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
+  auto &AC = AM->getResult<AssumptionAnalysis>(F);
+
+  EarlyCSE CSE(F, TLI, TTI, DT, AC);
+
+  if (!CSE.run())
+    return PreservedAnalyses::all();
+
+  // CSE preserves the dominator tree because it doesn't mutate the CFG.
+  // FIXME: Bundle this with other CFG-preservation.
+  PreservedAnalyses PA;
+  PA.preserve<DominatorTreeAnalysis>();
+  return PA;
+}
+
+namespace {
+/// \brief A simple and fast domtree-based CSE pass.
+///
+/// This pass does a simple depth-first walk over the dominator tree,
+/// eliminating trivially redundant instructions and using instsimplify to
+/// canonicalize things as it goes. It is intended to be fast and catch obvious
+/// cases so that instcombine and other passes are more effective. It is
+/// expected that a later pass of GVN will catch the interesting/hard cases.
+class EarlyCSELegacyPass : public FunctionPass {
+public:
+  static char ID;
+
+  EarlyCSELegacyPass() : FunctionPass(ID) {
+    initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (skipOptnoneFunction(F))
+      return false;
+
+    auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+    auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
+    EarlyCSE CSE(F, TLI, TTI, DT, AC);
+
+    return CSE.run();
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.setPreservesCFG();
+  }
+};
+}
+
+char EarlyCSELegacyPass::ID = 0;
+
+FunctionPass *llvm::createEarlyCSEPass() { return new EarlyCSELegacyPass(); }
+
+INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
new file mode 100644
index 0000000..c931422
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -0,0 +1,540 @@
+//===- Float2Int.cpp - Demote floating point ops to work on integers ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Float2Int pass, which aims to demote floating
+// point operations to work on integers, where that is losslessly possible.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "float2int"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <deque>
+#include <functional> // For std::function
+using namespace llvm;
+
+// The algorithm is simple. Start at instructions that convert from the
+// float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use
+// graph, using an equivalence datastructure to unify graphs that interfere.
+//
+// Mappable instructions are those with an integer corrollary that, given
+// integer domain inputs, produce an integer output; fadd, for example.
+//
+// If a non-mappable instruction is seen, this entire def-use graph is marked
+// as non-transformable. If we see an instruction that converts from the 
+// integer domain to FP domain (uitofp,sitofp), we terminate our walk.
+
+/// The largest integer type worth dealing with.
+static cl::opt<unsigned>
+MaxIntegerBW("float2int-max-integer-bw", cl::init(64), cl::Hidden,
+             cl::desc("Max integer bitwidth to consider in float2int"
+                      "(default=64)"));
+
+namespace {
+  struct Float2Int : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    Float2Int() : FunctionPass(ID) {
+      initializeFloat2IntPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F) override;
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesCFG();
+    }
+
+    void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots);
+    ConstantRange seen(Instruction *I, ConstantRange R);
+    ConstantRange badRange();
+    ConstantRange unknownRange();
+    ConstantRange validateRange(ConstantRange R);
+    void walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots);
+    void walkForwards();
+    bool validateAndTransform();
+    Value *convert(Instruction *I, Type *ToTy);
+    void cleanup();
+
+    MapVector<Instruction*, ConstantRange > SeenInsts;
+    SmallPtrSet<Instruction*,8> Roots;
+    EquivalenceClasses<Instruction*> ECs;
+    MapVector<Instruction*, Value*> ConvertedInsts;
+    LLVMContext *Ctx;
+  };
+}
+
+char Float2Int::ID = 0;
+INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
+
+// Given a FCmp predicate, return a matching ICmp predicate if one
+// exists, otherwise return BAD_ICMP_PREDICATE.
+static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) {
+  switch (P) {
+  case CmpInst::FCMP_OEQ:
+  case CmpInst::FCMP_UEQ:
+    return CmpInst::ICMP_EQ;
+  case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_UGT:
+    return CmpInst::ICMP_SGT;
+  case CmpInst::FCMP_OGE:
+  case CmpInst::FCMP_UGE:
+    return CmpInst::ICMP_SGE;
+  case CmpInst::FCMP_OLT:
+  case CmpInst::FCMP_ULT:
+    return CmpInst::ICMP_SLT;
+  case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ULE:
+    return CmpInst::ICMP_SLE;
+  case CmpInst::FCMP_ONE:
+  case CmpInst::FCMP_UNE:
+    return CmpInst::ICMP_NE;
+  default:
+    return CmpInst::BAD_ICMP_PREDICATE;
+  }
+}
+
+// Given a floating point binary operator, return the matching
+// integer version.
+static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unhandled opcode!");
+  case Instruction::FAdd: return Instruction::Add;
+  case Instruction::FSub: return Instruction::Sub;
+  case Instruction::FMul: return Instruction::Mul;
+  }
+}
+
+// Find the roots - instructions that convert from the FP domain to
+// integer domain.
+void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
+  for (auto &I : inst_range(F)) {
+    switch (I.getOpcode()) {
+    default: break;
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      Roots.insert(&I);
+      break;
+    case Instruction::FCmp:
+      if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) != 
+          CmpInst::BAD_ICMP_PREDICATE)
+        Roots.insert(&I);
+      break;
+    }
+  }
+}
+
+// Helper - mark I as having been traversed, having range R.
+ConstantRange Float2Int::seen(Instruction *I, ConstantRange R) {
+  DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n");
+  if (SeenInsts.find(I) != SeenInsts.end())
+    SeenInsts.find(I)->second = R;
+  else
+    SeenInsts.insert(std::make_pair(I, R));
+  return R;
+}
+
+// Helper - get a range representing a poison value.
+ConstantRange Float2Int::badRange() {
+  return ConstantRange(MaxIntegerBW + 1, true);
+}
+ConstantRange Float2Int::unknownRange() {
+  return ConstantRange(MaxIntegerBW + 1, false);
+}
+ConstantRange Float2Int::validateRange(ConstantRange R) {
+  if (R.getBitWidth() > MaxIntegerBW + 1)
+    return badRange();
+  return R;
+}
+
+// The most obvious way to structure the search is a depth-first, eager
+// search from each root. However, that require direct recursion and so
+// can only handle small instruction sequences. Instead, we split the search
+// up into two phases:
+//   - walkBackwards:  A breadth-first walk of the use-def graph starting from
+//                     the roots. Populate "SeenInsts" with interesting
+//                     instructions and poison values if they're obvious and
+//                     cheap to compute. Calculate the equivalance set structure
+//                     while we're here too.
+//   - walkForwards:  Iterate over SeenInsts in reverse order, so we visit
+//                     defs before their uses. Calculate the real range info.
+
+// Breadth-first walk of the use-def graph; determine the set of nodes 
+// we care about and eagerly determine if some of them are poisonous.
+void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
+  std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.back();
+    Worklist.pop_back();
+
+    if (SeenInsts.find(I) != SeenInsts.end())
+      // Seen already.
+      continue;
+
+    switch (I->getOpcode()) {
+      // FIXME: Handle select and phi nodes.
+    default:
+      // Path terminated uncleanly.
+      seen(I, badRange());
+      break;
+
+    case Instruction::UIToFP: {
+      // Path terminated cleanly.
+      unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+      APInt Min = APInt::getMinValue(BW).zextOrSelf(MaxIntegerBW+1);
+      APInt Max = APInt::getMaxValue(BW).zextOrSelf(MaxIntegerBW+1);
+      seen(I, validateRange(ConstantRange(Min, Max)));
+      continue;
+    }
+
+    case Instruction::SIToFP: {
+      // Path terminated cleanly.
+      unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+      APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(MaxIntegerBW+1);
+      APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(MaxIntegerBW+1);
+      seen(I, validateRange(ConstantRange(SMin, SMax)));
+      continue;
+    }
+
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::FCmp:
+      seen(I, unknownRange());
+      break;
+    }
+  
+    for (Value *O : I->operands()) {
+      if (Instruction *OI = dyn_cast<Instruction>(O)) {
+        // Unify def-use chains if they interfere.
+        ECs.unionSets(I, OI);
+	if (SeenInsts.find(I)->second != badRange())
+          Worklist.push_back(OI);
+      } else if (!isa<ConstantFP>(O)) {      
+        // Not an instruction or ConstantFP? we can't do anything.
+        seen(I, badRange());
+      }
+    }
+  }
+}
+
+// Walk forwards down the list of seen instructions, so we visit defs before
+// uses.
+void Float2Int::walkForwards() {
+  for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) {
+    if (It->second != unknownRange())
+      continue;
+
+    Instruction *I = It->first;
+    std::function<ConstantRange(ArrayRef<ConstantRange>)> Op;
+    switch (I->getOpcode()) {
+      // FIXME: Handle select and phi nodes.
+    default:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+      llvm_unreachable("Should have been handled in walkForwards!");
+
+    case Instruction::FAdd:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 2 && "FAdd is a binary operator!");
+        return Ops[0].add(Ops[1]);
+      };
+      break;
+
+    case Instruction::FSub:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 2 && "FSub is a binary operator!");
+        return Ops[0].sub(Ops[1]);
+      };
+      break;
+
+    case Instruction::FMul:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 2 && "FMul is a binary operator!");
+        return Ops[0].multiply(Ops[1]);
+      };
+      break;
+
+    //
+    // Root-only instructions - we'll only see these if they're the
+    //                          first node in a walk.
+    //
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!");
+        return Ops[0];
+      };
+      break;
+
+    case Instruction::FCmp:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 2 && "FCmp is a binary operator!");
+        return Ops[0].unionWith(Ops[1]);
+      };
+      break;
+    }
+
+    bool Abort = false;
+    SmallVector<ConstantRange,4> OpRanges;
+    for (Value *O : I->operands()) {
+      if (Instruction *OI = dyn_cast<Instruction>(O)) {
+        assert(SeenInsts.find(OI) != SeenInsts.end() &&
+	       "def not seen before use!");
+        OpRanges.push_back(SeenInsts.find(OI)->second);
+      } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
+        // Work out if the floating point number can be losslessly represented
+        // as an integer.
+        // APFloat::convertToInteger(&Exact) purports to do what we want, but
+        // the exactness can be too precise. For example, negative zero can
+        // never be exactly converted to an integer.
+        //
+        // Instead, we ask APFloat to round itself to an integral value - this
+        // preserves sign-of-zero - then compare the result with the original.
+        //
+        APFloat F = CF->getValueAPF();
+
+        // First, weed out obviously incorrect values. Non-finite numbers
+        // can't be represented and neither can negative zero, unless 
+        // we're in fast math mode.
+        if (!F.isFinite() ||
+            (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
+	     !I->hasNoSignedZeros())) {
+          seen(I, badRange());
+          Abort = true;
+          break;
+        }
+
+        APFloat NewF = F;
+        auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven);
+        if (Res != APFloat::opOK || NewF.compare(F) != APFloat::cmpEqual) {
+          seen(I, badRange());
+          Abort = true;
+          break;
+        }
+        // OK, it's representable. Now get it.
+        APSInt Int(MaxIntegerBW+1, false);
+        bool Exact;
+        CF->getValueAPF().convertToInteger(Int,
+                                           APFloat::rmNearestTiesToEven,
+                                           &Exact);
+        OpRanges.push_back(ConstantRange(Int));
+      } else {
+        llvm_unreachable("Should have already marked this as badRange!");
+      }
+    }
+
+    // Reduce the operands' ranges to a single range and return.
+    if (!Abort)
+      seen(I, Op(OpRanges));    
+  }
+}
+
+// If there is a valid transform to be done, do it.
+bool Float2Int::validateAndTransform() {
+  bool MadeChange = false;
+
+  // Iterate over every disjoint partition of the def-use graph.
+  for (auto It = ECs.begin(), E = ECs.end(); It != E; ++It) {
+    ConstantRange R(MaxIntegerBW + 1, false);
+    bool Fail = false;
+    Type *ConvertedToTy = nullptr;
+
+    // For every member of the partition, union all the ranges together.
+    for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
+         MI != ME; ++MI) {
+      Instruction *I = *MI;
+      auto SeenI = SeenInsts.find(I);
+      if (SeenI == SeenInsts.end())
+        continue;
+
+      R = R.unionWith(SeenI->second);
+      // We need to ensure I has no users that have not been seen.
+      // If it does, transformation would be illegal.
+      //
+      // Don't count the roots, as they terminate the graphs.
+      if (Roots.count(I) == 0) {
+        // Set the type of the conversion while we're here.
+        if (!ConvertedToTy)
+          ConvertedToTy = I->getType();
+        for (User *U : I->users()) {
+          Instruction *UI = dyn_cast<Instruction>(U);
+          if (!UI || SeenInsts.find(UI) == SeenInsts.end()) {
+            DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n");
+            Fail = true;
+            break;
+          }
+        }
+      }
+      if (Fail)
+        break;
+    }
+
+    // If the set was empty, or we failed, or the range is poisonous,
+    // bail out.
+    if (ECs.member_begin(It) == ECs.member_end() || Fail ||
+        R.isFullSet() || R.isSignWrappedSet())
+      continue;
+    assert(ConvertedToTy && "Must have set the convertedtoty by this point!");
+    
+    // The number of bits required is the maximum of the upper and
+    // lower limits, plus one so it can be signed.
+    unsigned MinBW = std::max(R.getLower().getMinSignedBits(),
+                              R.getUpper().getMinSignedBits()) + 1;
+    DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n");
+
+    // If we've run off the realms of the exactly representable integers,
+    // the floating point result will differ from an integer approximation.
+
+    // Do we need more bits than are in the mantissa of the type we converted
+    // to? semanticsPrecision returns the number of mantissa bits plus one
+    // for the sign bit.
+    unsigned MaxRepresentableBits
+      = APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - 1;
+    if (MinBW > MaxRepresentableBits) {
+      DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n");
+      continue;
+    }
+    if (MinBW > 64) {
+      DEBUG(dbgs() << "F2I: Value requires more than 64 bits to represent!\n");
+      continue;
+    }
+
+    // OK, R is known to be representable. Now pick a type for it.
+    // FIXME: Pick the smallest legal type that will fit.
+    Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx);
+
+    for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
+         MI != ME; ++MI)
+      convert(*MI, Ty);
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+Value *Float2Int::convert(Instruction *I, Type *ToTy) {
+  if (ConvertedInsts.find(I) != ConvertedInsts.end())
+    // Already converted this instruction.
+    return ConvertedInsts[I];
+
+  SmallVector<Value*,4> NewOperands;
+  for (Value *V : I->operands()) {
+    // Don't recurse if we're an instruction that terminates the path.
+    if (I->getOpcode() == Instruction::UIToFP ||
+        I->getOpcode() == Instruction::SIToFP) {
+      NewOperands.push_back(V);
+    } else if (Instruction *VI = dyn_cast<Instruction>(V)) {
+      NewOperands.push_back(convert(VI, ToTy));
+    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+      APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false);
+      bool Exact;
+      CF->getValueAPF().convertToInteger(Val,
+                                         APFloat::rmNearestTiesToEven,
+                                         &Exact);
+      NewOperands.push_back(ConstantInt::get(ToTy, Val));
+    } else {
+      llvm_unreachable("Unhandled operand type?");
+    }
+  }
+
+  // Now create a new instruction.
+  IRBuilder<> IRB(I);
+  Value *NewV = nullptr;
+  switch (I->getOpcode()) {
+  default: llvm_unreachable("Unhandled instruction!");
+
+  case Instruction::FPToUI:
+    NewV = IRB.CreateZExtOrTrunc(NewOperands[0], I->getType());
+    break;
+
+  case Instruction::FPToSI:
+    NewV = IRB.CreateSExtOrTrunc(NewOperands[0], I->getType());
+    break;
+
+  case Instruction::FCmp: {
+    CmpInst::Predicate P = mapFCmpPred(cast<CmpInst>(I)->getPredicate());
+    assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!");
+    NewV = IRB.CreateICmp(P, NewOperands[0], NewOperands[1], I->getName());
+    break;
+  }
+
+  case Instruction::UIToFP:
+    NewV = IRB.CreateZExtOrTrunc(NewOperands[0], ToTy);
+    break;
+
+  case Instruction::SIToFP:
+    NewV = IRB.CreateSExtOrTrunc(NewOperands[0], ToTy);
+    break;
+
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+    NewV = IRB.CreateBinOp(mapBinOpcode(I->getOpcode()),
+                           NewOperands[0], NewOperands[1],
+                           I->getName());
+    break;
+  }
+
+  // If we're a root instruction, RAUW.
+  if (Roots.count(I))
+    I->replaceAllUsesWith(NewV);
+
+  ConvertedInsts[I] = NewV;
+  return NewV;
+}
+
+// Perform dead code elimination on the instructions we just modified.
+void Float2Int::cleanup() {
+  for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend();
+       I != E; ++I)
+    I->first->eraseFromParent();
+}
+
+bool Float2Int::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
+  DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
+  // Clear out all state.
+  ECs = EquivalenceClasses<Instruction*>();
+  SeenInsts.clear();
+  ConvertedInsts.clear();
+  Roots.clear();
+
+  Ctx = &F.getParent()->getContext();
+
+  findRoots(F, Roots);
+
+  walkBackwards(Roots);
+  walkForwards();
+
+  bool Modified = validateAndTransform();
+  if (Modified)
+    cleanup();
+  return Modified;
+}
+
+FunctionPass *llvm::createFloat2IntPass() {
+  return new Float2Int();
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 1ed14d0..7770ddc 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
@@ -45,7 +46,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -458,7 +459,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
   return e;
 }
 
-/// lookup - Returns the value number of the specified value. Fails if
+/// Returns the value number of the specified value. Fails if
 /// the value has not yet been numbered.
 uint32_t ValueTable::lookup(Value *V) const {
   DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
@@ -466,7 +467,7 @@ uint32_t ValueTable::lookup(Value *V) const {
   return VI->second;
 }
 
-/// lookup_or_add_cmp - Returns the value number of the given comparison,
+/// Returns the value number of the given comparison,
 /// assigning it a new number if it did not have one before.  Useful when
 /// we deduced the result of a comparison, but don't immediately have an
 /// instruction realizing that comparison to hand.
@@ -479,14 +480,14 @@ uint32_t ValueTable::lookup_or_add_cmp(unsigned Opcode,
   return e;
 }
 
-/// clear - Remove all entries from the ValueTable.
+/// Remove all entries from the ValueTable.
 void ValueTable::clear() {
   valueNumbering.clear();
   expressionNumbering.clear();
   nextValueNumber = 1;
 }
 
-/// erase - Remove a value from the value numbering.
+/// Remove a value from the value numbering.
 void ValueTable::erase(Value *V) {
   valueNumbering.erase(V);
 }
@@ -582,23 +583,22 @@ namespace {
       return cast<MemIntrinsic>(Val.getPointer());
     }
   
-    /// MaterializeAdjustedValue - Emit code into this block to adjust the value
-    /// defined here to the specified type.  This handles various coercion cases.
-    Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const;
+    /// Emit code into this block to adjust the value defined here to the
+    /// specified type. This handles various coercion cases.
+    Value *MaterializeAdjustedValue(LoadInst *LI, GVN &gvn) const;
   };
 
   class GVN : public FunctionPass {
     bool NoLoads;
     MemoryDependenceAnalysis *MD;
     DominatorTree *DT;
-    const DataLayout *DL;
     const TargetLibraryInfo *TLI;
     AssumptionCache *AC;
     SetVector<BasicBlock *> DeadBlocks;
 
     ValueTable VN;
 
-    /// LeaderTable - A mapping from value numbers to lists of Value*'s that
+    /// A mapping from value numbers to lists of Value*'s that
     /// have that value number.  Use findLeader to query it.
     struct LeaderTableEntry {
       Value *Val;
@@ -623,20 +623,18 @@ namespace {
 
     bool runOnFunction(Function &F) override;
 
-    /// markInstructionForDeletion - This removes the specified instruction from
+    /// This removes the specified instruction from
     /// our various maps and marks it for deletion.
     void markInstructionForDeletion(Instruction *I) {
       VN.erase(I);
       InstrsToErase.push_back(I);
     }
 
-    const DataLayout *getDataLayout() const { return DL; }
     DominatorTree &getDominatorTree() const { return *DT; }
     AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
     MemoryDependenceAnalysis &getMemDep() const { return *MD; }
   private:
-    /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
-    /// its value number.
+    /// Push a new Value to the LeaderTable onto the list for its value number.
     void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) {
       LeaderTableEntry &Curr = LeaderTable[N];
       if (!Curr.Val) {
@@ -652,7 +650,7 @@ namespace {
       Curr.Next = Node;
     }
 
-    /// removeFromLeaderTable - Scan the list of values corresponding to a given
+    /// Scan the list of values corresponding to a given
     /// value number, and remove the given instruction if encountered.
     void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) {
       LeaderTableEntry* Prev = nullptr;
@@ -685,7 +683,7 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<AssumptionCacheTracker>();
       AU.addRequired<DominatorTreeWrapperPass>();
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
       if (!NoLoads)
         AU.addRequired<MemoryDependenceAnalysis>();
       AU.addRequired<AliasAnalysis>();
@@ -711,13 +709,13 @@ namespace {
     bool iterateOnFunction(Function &F);
     bool performPRE(Function &F);
     bool performScalarPRE(Instruction *I);
+    bool performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
+                                   unsigned int ValNo);
     Value *findLeader(const BasicBlock *BB, uint32_t num);
     void cleanupGlobalSets();
     void verifyRemoved(const Instruction *I) const;
     bool splitCriticalEdges();
     BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
-    unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
-                                         const BasicBlockEdge &Root);
     bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
     bool processFoldableCondBr(BranchInst *BI);
     void addDeadBlock(BasicBlock *BB);
@@ -727,7 +725,7 @@ namespace {
   char GVN::ID = 0;
 }
 
-// createGVNPass - The public interface to this file...
+// The public interface to this file...
 FunctionPass *llvm::createGVNPass(bool NoLoads) {
   return new GVN(NoLoads);
 }
@@ -736,7 +734,7 @@ INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
 
@@ -752,7 +750,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
 }
 #endif
 
-/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
+/// Return true if we can prove that the value
 /// we're analyzing is fully available in the specified block.  As we go, keep
 /// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
 /// map is actually a tri-state map with the following values:
@@ -798,7 +796,7 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
 
   return true;
 
-// SpeculationFailure - If we get here, we found out that this is not, after
+// If we get here, we found out that this is not, after
 // all, a fully-available block.  We have a problem if we speculated on this and
 // used the speculation to mark other blocks as available.
 SpeculationFailure:
@@ -833,8 +831,7 @@ SpeculationFailure:
 }
 
 
-/// CanCoerceMustAliasedValueToLoad - Return true if
-/// CoerceAvailableValueToLoadType will succeed.
+/// Return true if CoerceAvailableValueToLoadType will succeed.
 static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
                                             Type *LoadTy,
                                             const DataLayout &DL) {
@@ -853,7 +850,7 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
   return true;
 }
 
-/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
+/// If we saw a store of a value to memory, and
 /// then a load from a must-aliased pointer of a different type, try to coerce
 /// the stored value.  LoadedTy is the type of the load we want to replace and
 /// InsertPt is the place to insert new instructions.
@@ -938,7 +935,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
   return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
 }
 
-/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
+/// This function is called when we have a
 /// memdep query of a load that ends up being a clobbering memory write (store,
 /// memset, memcpy, memmove).  This means that the write *may* provide bits used
 /// by the load but we can't be sure because the pointers don't mustalias.
@@ -956,8 +953,9 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
     return -1;
 
   int64_t StoreOffset = 0, LoadOffset = 0;
-  Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&DL);
-  Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &DL);
+  Value *StoreBase =
+      GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
+  Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
   if (StoreBase != LoadBase)
     return -1;
 
@@ -1018,23 +1016,23 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
   return LoadOffset-StoreOffset;
 }
 
-/// AnalyzeLoadFromClobberingStore - This function is called when we have a
+/// This function is called when we have a
 /// memdep query of a load that ends up being a clobbering store.
 static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
-                                          StoreInst *DepSI,
-                                          const DataLayout &DL) {
+                                          StoreInst *DepSI) {
   // Cannot handle reading from store of first-class aggregate yet.
   if (DepSI->getValueOperand()->getType()->isStructTy() ||
       DepSI->getValueOperand()->getType()->isArrayTy())
     return -1;
 
+  const DataLayout &DL = DepSI->getModule()->getDataLayout();
   Value *StorePtr = DepSI->getPointerOperand();
   uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
   return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
                                         StorePtr, StoreSize, DL);
 }
 
-/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
+/// This function is called when we have a
 /// memdep query of a load that ends up being clobbered by another load.  See if
 /// the other load can feed into the second load.
 static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
@@ -1052,11 +1050,11 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
   // then we should widen it!
   int64_t LoadOffs = 0;
   const Value *LoadBase =
-    GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &DL);
+      GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
   unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
 
-  unsigned Size = MemoryDependenceAnalysis::
-    getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, DL);
+  unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
+      LoadBase, LoadOffs, LoadSize, DepLI);
   if (Size == 0) return -1;
 
   return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL);
@@ -1086,7 +1084,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
   Constant *Src = dyn_cast<Constant>(MTI->getSource());
   if (!Src) return -1;
 
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL));
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
   if (!GV || !GV->isConstant()) return -1;
 
   // See if the access is within the bounds of the transfer.
@@ -1102,15 +1100,16 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
                                  Type::getInt8PtrTy(Src->getContext(), AS));
   Constant *OffsetCst =
     ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
-  Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
+  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+                                       OffsetCst);
   Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
-  if (ConstantFoldLoadFromConstPtr(Src, &DL))
+  if (ConstantFoldLoadFromConstPtr(Src, DL))
     return Offset;
   return -1;
 }
 
 
-/// GetStoreValueForLoad - This function is called when we have a
+/// This function is called when we have a
 /// memdep query of a load that ends up being a clobbering store.  This means
 /// that the store provides bits used by the load but we the pointers don't
 /// mustalias.  Check this case to see if there is anything more we can do
@@ -1149,7 +1148,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
   return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, DL);
 }
 
-/// GetLoadValueForLoad - This function is called when we have a
+/// This function is called when we have a
 /// memdep query of a load that ends up being a clobbering load.  This means
 /// that the load *may* provide bits used by the load but we can't be sure
 /// because the pointers don't mustalias.  Check this case to see if there is
@@ -1157,7 +1156,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
 static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
                                   Type *LoadTy, Instruction *InsertPt,
                                   GVN &gvn) {
-  const DataLayout &DL = *gvn.getDataLayout();
+  const DataLayout &DL = SrcVal->getModule()->getDataLayout();
   // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
   // widen SrcVal out to a larger load.
   unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType());
@@ -1212,7 +1211,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
 }
 
 
-/// GetMemInstValueForLoad - This function is called when we have a
+/// This function is called when we have a
 /// memdep query of a load that ends up being a clobbering mem intrinsic.
 static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
                                      Type *LoadTy, Instruction *InsertPt,
@@ -1263,13 +1262,14 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
                                  Type::getInt8PtrTy(Src->getContext(), AS));
   Constant *OffsetCst =
     ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
-  Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
+  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+                                       OffsetCst);
   Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
-  return ConstantFoldLoadFromConstPtr(Src, &DL);
+  return ConstantFoldLoadFromConstPtr(Src, DL);
 }
 
 
-/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
+/// Given a set of loads specified by ValuesPerBlock,
 /// construct SSA form, allowing us to eliminate LI.  This returns the value
 /// that should be used at LI's definition site.
 static Value *ConstructSSAForLoadSet(LoadInst *LI,
@@ -1281,7 +1281,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
       gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
                                                LI->getParent())) {
     assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block");
-    return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
+    return ValuesPerBlock[0].MaterializeAdjustedValue(LI, gvn);
   }
 
   // Otherwise, we have to construct SSA form.
@@ -1289,8 +1289,6 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   SSAUpdater SSAUpdate(&NewPHIs);
   SSAUpdate.Initialize(LI->getType(), LI->getName());
 
-  Type *LoadTy = LI->getType();
-
   for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
     const AvailableValueInBlock &AV = ValuesPerBlock[i];
     BasicBlock *BB = AV.BB;
@@ -1298,7 +1296,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
     if (SSAUpdate.HasValueForBlock(BB))
       continue;
 
-    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
+    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LI, gvn));
   }
 
   // Perform PHI construction.
@@ -1326,16 +1324,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   return V;
 }
 
-Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
+Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI,
+                                                       GVN &gvn) const {
   Value *Res;
+  Type *LoadTy = LI->getType();
+  const DataLayout &DL = LI->getModule()->getDataLayout();
   if (isSimpleValue()) {
     Res = getSimpleValue();
     if (Res->getType() != LoadTy) {
-      const DataLayout *DL = gvn.getDataLayout();
-      assert(DL && "Need target data to handle type mismatch case");
-      Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
-                                 *DL);
-  
+      Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), DL);
+
       DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
                    << *getSimpleValue() << '\n'
                    << *Res << '\n' << "\n\n\n");
@@ -1353,10 +1351,8 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
                    << *Res << '\n' << "\n\n\n");
     }
   } else if (isMemIntrinValue()) {
-    const DataLayout *DL = gvn.getDataLayout();
-    assert(DL && "Need target data to handle type mismatch case");
-    Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
-                                 LoadTy, BB->getTerminator(), *DL);
+    Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
+                                 BB->getTerminator(), DL);
     DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
                  << "  " << *getMemIntrinValue() << '\n'
                  << *Res << '\n' << "\n\n\n");
@@ -1383,6 +1379,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
   // dependencies that produce an unknown value for the load (such as a call
   // that could potentially clobber the load).
   unsigned NumDeps = Deps.size();
+  const DataLayout &DL = LI->getModule()->getDataLayout();
   for (unsigned i = 0, e = NumDeps; i != e; ++i) {
     BasicBlock *DepBB = Deps[i].getBB();
     MemDepResult DepInfo = Deps[i].getResult();
@@ -1409,9 +1406,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
       // read by the load, we can extract the bits we need for the load from the
       // stored value.
       if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
-        if (DL && Address) {
-          int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
-                                                      DepSI, *DL);
+        if (Address) {
+          int Offset =
+              AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI);
           if (Offset != -1) {
             ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
                                                        DepSI->getValueOperand(),
@@ -1428,9 +1425,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
       if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
         // If this is a clobber and L is the first instruction in its block, then
         // we have the first instruction in the entry block.
-        if (DepLI != LI && Address && DL) {
-          int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address,
-                                                     DepLI, *DL);
+        if (DepLI != LI && Address) {
+          int Offset =
+              AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
 
           if (Offset != -1) {
             ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
@@ -1443,9 +1440,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
       // If the clobbering value is a memset/memcpy/memmove, see if we can
       // forward a value on from it.
       if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
-        if (DL && Address) {
+        if (Address) {
           int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
-                                                        DepMI, *DL);
+                                                        DepMI, DL);
           if (Offset != -1) {
             ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
                                                                   Offset));
@@ -1484,8 +1481,8 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
       if (S->getValueOperand()->getType() != LI->getType()) {
         // If the stored value is larger or equal to the loaded value, we can
         // reuse it.
-        if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
-                                                    LI->getType(), *DL)) {
+        if (!CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+                                             LI->getType(), DL)) {
           UnavailableBlocks.push_back(DepBB);
           continue;
         }
@@ -1501,7 +1498,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
       if (LD->getType() != LI->getType()) {
         // If the stored value is larger or equal to the loaded value, we can
         // reuse it.
-        if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) {
+        if (!CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL)) {
           UnavailableBlocks.push_back(DepBB);
           continue;
         }
@@ -1613,6 +1610,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
 
   // Check if the load can safely be moved to all the unavailable predecessors.
   bool CanDoPRE = true;
+  const DataLayout &DL = LI->getModule()->getDataLayout();
   SmallVector<Instruction*, 8> NewInsts;
   for (auto &PredLoad : PredLoads) {
     BasicBlock *UnavailablePred = PredLoad.first;
@@ -1704,7 +1702,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
   return true;
 }
 
-/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// Attempt to eliminate a load whose dependencies are
 /// non-local by performing PHI construction.
 bool GVN::processNonLocalLoad(LoadInst *LI) {
   // Step 1: Find the non-local dependencies of the load.
@@ -1817,7 +1815,7 @@ static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
   I->replaceAllUsesWith(Repl);
 }
 
-/// processLoad - Attempt to eliminate a load, first by eliminating it
+/// Attempt to eliminate a load, first by eliminating it
 /// locally, and then attempting non-local elimination if that fails.
 bool GVN::processLoad(LoadInst *L) {
   if (!MD)
@@ -1833,10 +1831,11 @@ bool GVN::processLoad(LoadInst *L) {
 
   // ... to a pointer that has been loaded from before...
   MemDepResult Dep = MD->getDependency(L);
+  const DataLayout &DL = L->getModule()->getDataLayout();
 
   // If we have a clobber and target data is around, see if this is a clobber
   // that we can fix up through code synthesis.
-  if (Dep.isClobber() && DL) {
+  if (Dep.isClobber()) {
     // Check to see if we have something like this:
     //   store i32 123, i32* %P
     //   %A = bitcast i32* %P to i8*
@@ -1849,12 +1848,11 @@ bool GVN::processLoad(LoadInst *L) {
     // access code.
     Value *AvailVal = nullptr;
     if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
-      int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
-                                                  L->getPointerOperand(),
-                                                  DepSI, *DL);
+      int Offset = AnalyzeLoadFromClobberingStore(
+          L->getType(), L->getPointerOperand(), DepSI);
       if (Offset != -1)
         AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
-                                        L->getType(), L, *DL);
+                                        L->getType(), L, DL);
     }
 
     // Check to see if we have something like this:
@@ -1867,9 +1865,8 @@ bool GVN::processLoad(LoadInst *L) {
       if (DepLI == L)
         return false;
 
-      int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
-                                                 L->getPointerOperand(),
-                                                 DepLI, *DL);
+      int Offset = AnalyzeLoadFromClobberingLoad(
+          L->getType(), L->getPointerOperand(), DepLI, DL);
       if (Offset != -1)
         AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
     }
@@ -1877,11 +1874,10 @@ bool GVN::processLoad(LoadInst *L) {
     // If the clobbering value is a memset/memcpy/memmove, see if we can forward
     // a value on from it.
     if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
-      int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
-                                                    L->getPointerOperand(),
-                                                    DepMI, *DL);
+      int Offset = AnalyzeLoadFromClobberingMemInst(
+          L->getType(), L->getPointerOperand(), DepMI, DL);
       if (Offset != -1)
-        AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *DL);
+        AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, DL);
     }
 
     if (AvailVal) {
@@ -1932,17 +1928,13 @@ bool GVN::processLoad(LoadInst *L) {
     // actually have the same type.  See if we know how to reuse the stored
     // value (depending on its type).
     if (StoredVal->getType() != L->getType()) {
-      if (DL) {
-        StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
-                                                   L, *DL);
-        if (!StoredVal)
-          return false;
-
-        DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
-                     << '\n' << *L << "\n\n\n");
-      }
-      else
+      StoredVal =
+          CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, DL);
+      if (!StoredVal)
         return false;
+
+      DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+                   << '\n' << *L << "\n\n\n");
     }
 
     // Remove it!
@@ -1961,17 +1953,12 @@ bool GVN::processLoad(LoadInst *L) {
     // the same type.  See if we know how to reuse the previously loaded value
     // (depending on its type).
     if (DepLI->getType() != L->getType()) {
-      if (DL) {
-        AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
-                                                      L, *DL);
-        if (!AvailableVal)
-          return false;
-
-        DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
-                     << "\n" << *L << "\n\n\n");
-      }
-      else
+      AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L, DL);
+      if (!AvailableVal)
         return false;
+
+      DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+                   << "\n" << *L << "\n\n\n");
     }
 
     // Remove it!
@@ -2016,7 +2003,7 @@ bool GVN::processLoad(LoadInst *L) {
   return false;
 }
 
-// findLeader - In order to find a leader for a given value number at a
+// In order to find a leader for a given value number at a
 // specific basic block, we first obtain the list of all Values for that number,
 // and then scan the list to find one whose block dominates the block in
 // question.  This is fast because dominator tree queries consist of only
@@ -2044,25 +2031,7 @@ Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
   return Val;
 }
 
-/// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the
-/// use is dominated by the given basic block.  Returns the number of uses that
-/// were replaced.
-unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
-                                          const BasicBlockEdge &Root) {
-  unsigned Count = 0;
-  for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
-       UI != UE; ) {
-    Use &U = *UI++;
-
-    if (DT->dominates(Root, U)) {
-      U.set(To);
-      ++Count;
-    }
-  }
-  return Count;
-}
-
-/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'.  Return
+/// There is an edge from 'Src' to 'Dst'.  Return
 /// true if every path from the entry block to 'Dst' passes via this edge.  In
 /// particular 'Dst' must not be reachable via another edge from 'Src'.
 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
@@ -2079,7 +2048,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
   return Pred != nullptr;
 }
 
-/// propagateEquality - The given values are known to be equal in every block
+/// The given values are known to be equal in every block
 /// dominated by 'Root'.  Exploit this, for example by replacing 'LHS' with
 /// 'RHS' everywhere in the scope.  Returns whether a change was made.
 bool GVN::propagateEquality(Value *LHS, Value *RHS,
@@ -2138,7 +2107,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
     // LHS always has at least one use that is not dominated by Root, this will
     // never do anything if LHS has only one use.
     if (!LHS->hasOneUse()) {
-      unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root);
+      unsigned NumReplacements = replaceDominatedUsesWith(LHS, RHS, *DT, Root);
       Changed |= NumReplacements > 0;
       NumGVNEqProp += NumReplacements;
     }
@@ -2210,7 +2179,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
         Value *NotCmp = findLeader(Root.getEnd(), Num);
         if (NotCmp && isa<Instruction>(NotCmp)) {
           unsigned NumReplacements =
-            replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
+            replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root);
           Changed |= NumReplacements > 0;
           NumGVNEqProp += NumReplacements;
         }
@@ -2229,7 +2198,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
   return Changed;
 }
 
-/// processInstruction - When calculating availability, handle an instruction
+/// When calculating availability, handle an instruction
 /// by inserting it into the appropriate sets
 bool GVN::processInstruction(Instruction *I) {
   // Ignore dbg info intrinsics.
@@ -2240,6 +2209,7 @@ bool GVN::processInstruction(Instruction *I) {
   // to value numbering it.  Value numbering often exposes redundancies, for
   // example if it determines that %y is equal to %x then the instruction
   // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
+  const DataLayout &DL = I->getModule()->getDataLayout();
   if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
     I->replaceAllUsesWith(V);
     if (MD && V->getType()->getScalarType()->isPointerTy())
@@ -2358,10 +2328,8 @@ bool GVN::runOnFunction(Function& F) {
   if (!NoLoads)
     MD = &getAnalysis<MemoryDependenceAnalysis>();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  TLI = &getAnalysis<TargetLibraryInfo>();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
   VN.setMemDep(MD);
   VN.setDomTree(DT);
@@ -2374,7 +2342,8 @@ bool GVN::runOnFunction(Function& F) {
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
     BasicBlock *BB = FI++;
 
-    bool removedBlock = MergeBlockIntoPredecessor(BB, this);
+    bool removedBlock = MergeBlockIntoPredecessor(
+        BB, DT, /* LoopInfo */ nullptr, VN.getAliasAnalysis(), MD);
     if (removedBlock) ++NumGVNBlocks;
 
     Changed |= removedBlock;
@@ -2457,6 +2426,43 @@ bool GVN::processBlock(BasicBlock *BB) {
   return ChangedFunction;
 }
 
+// Instantiate an expression in a predecessor that lacked it.
+bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
+                                    unsigned int ValNo) {
+  // Because we are going top-down through the block, all value numbers
+  // will be available in the predecessor by the time we need them.  Any
+  // that weren't originally present will have been instantiated earlier
+  // in this loop.
+  bool success = true;
+  for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) {
+    Value *Op = Instr->getOperand(i);
+    if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
+      continue;
+
+    if (Value *V = findLeader(Pred, VN.lookup(Op))) {
+      Instr->setOperand(i, V);
+    } else {
+      success = false;
+      break;
+    }
+  }
+
+  // Fail out if we encounter an operand that is not available in
+  // the PRE predecessor.  This is typically because of loads which
+  // are not value numbered precisely.
+  if (!success)
+    return false;
+
+  Instr->insertBefore(Pred->getTerminator());
+  Instr->setName(Instr->getName() + ".pre");
+  Instr->setDebugLoc(Instr->getDebugLoc());
+  VN.add(Instr, ValNo);
+
+  // Update the availability map to include the new instruction.
+  addToLeaderTable(ValNo, Instr, Pred);
+  return true;
+}
+
 bool GVN::performScalarPRE(Instruction *CurInst) {
   SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
 
@@ -2523,60 +2529,43 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
 
   // Don't do PRE when it might increase code size, i.e. when
   // we would need to insert instructions in more than one pred.
-  if (NumWithout != 1 || NumWith == 0)
+  if (NumWithout > 1 || NumWith == 0)
     return false;
 
-  // Don't do PRE across indirect branch.
-  if (isa<IndirectBrInst>(PREPred->getTerminator()))
-    return false;
+  // We may have a case where all predecessors have the instruction,
+  // and we just need to insert a phi node. Otherwise, perform
+  // insertion.
+  Instruction *PREInstr = nullptr;
 
-  // We can't do PRE safely on a critical edge, so instead we schedule
-  // the edge to be split and perform the PRE the next time we iterate
-  // on the function.
-  unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
-  if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
-    toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
-    return false;
-  }
-
-  // Instantiate the expression in the predecessor that lacked it.
-  // Because we are going top-down through the block, all value numbers
-  // will be available in the predecessor by the time we need them.  Any
-  // that weren't originally present will have been instantiated earlier
-  // in this loop.
-  Instruction *PREInstr = CurInst->clone();
-  bool success = true;
-  for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
-    Value *Op = PREInstr->getOperand(i);
-    if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
-      continue;
+  if (NumWithout != 0) {
+    // Don't do PRE across indirect branch.
+    if (isa<IndirectBrInst>(PREPred->getTerminator()))
+      return false;
 
-    if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
-      PREInstr->setOperand(i, V);
-    } else {
-      success = false;
-      break;
+    // We can't do PRE safely on a critical edge, so instead we schedule
+    // the edge to be split and perform the PRE the next time we iterate
+    // on the function.
+    unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
+    if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
+      toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
+      return false;
+    }
+    // We need to insert somewhere, so let's give it a shot
+    PREInstr = CurInst->clone();
+    if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) {
+      // If we failed insertion, make sure we remove the instruction.
+      DEBUG(verifyRemoved(PREInstr));
+      delete PREInstr;
+      return false;
     }
   }
 
-  // Fail out if we encounter an operand that is not available in
-  // the PRE predecessor.  This is typically because of loads which
-  // are not value numbered precisely.
-  if (!success) {
-    DEBUG(verifyRemoved(PREInstr));
-    delete PREInstr;
-    return false;
-  }
+  // Either we should have filled in the PRE instruction, or we should
+  // not have needed insertions.
+  assert (PREInstr != nullptr || NumWithout == 0);
 
-  PREInstr->insertBefore(PREPred->getTerminator());
-  PREInstr->setName(CurInst->getName() + ".pre");
-  PREInstr->setDebugLoc(CurInst->getDebugLoc());
-  VN.add(PREInstr, ValNo);
   ++NumGVNPRE;
 
-  // Update the availability map to include the new instruction.
-  addToLeaderTable(ValNo, PREInstr, PREPred);
-
   // Create a PHI to make the value available in this block.
   PHINode *Phi =
       PHINode::Create(CurInst->getType(), predMap.size(),
@@ -2612,10 +2601,12 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
     MD->removeInstruction(CurInst);
   DEBUG(verifyRemoved(CurInst));
   CurInst->eraseFromParent();
+  ++NumGVNInstr;
+  
   return true;
 }
 
-/// performPRE - Perform a purely local form of PRE that looks for diamond
+/// Perform a purely local form of PRE that looks for diamond
 /// control flow patterns and attempts to perform simple PRE at the join point.
 bool GVN::performPRE(Function &F) {
   bool Changed = false;
@@ -2645,26 +2636,28 @@ bool GVN::performPRE(Function &F) {
 /// Split the critical edge connecting the given two blocks, and return
 /// the block inserted to the critical edge.
 BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
-  BasicBlock *BB = SplitCriticalEdge(Pred, Succ, this);
+  BasicBlock *BB = SplitCriticalEdge(
+      Pred, Succ, CriticalEdgeSplittingOptions(getAliasAnalysis(), DT));
   if (MD)
     MD->invalidateCachedPredecessors();
   return BB;
 }
 
-/// splitCriticalEdges - Split critical edges found during the previous
+/// Split critical edges found during the previous
 /// iteration that may enable further optimization.
 bool GVN::splitCriticalEdges() {
   if (toSplit.empty())
     return false;
   do {
     std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
-    SplitCriticalEdge(Edge.first, Edge.second, this);
+    SplitCriticalEdge(Edge.first, Edge.second,
+                      CriticalEdgeSplittingOptions(getAliasAnalysis(), DT));
   } while (!toSplit.empty());
   if (MD) MD->invalidateCachedPredecessors();
   return true;
 }
 
-/// iterateOnFunction - Executes one iteration of GVN
+/// Executes one iteration of GVN
 bool GVN::iterateOnFunction(Function &F) {
   cleanupGlobalSets();
 
@@ -2695,7 +2688,7 @@ void GVN::cleanupGlobalSets() {
   TableAllocator.Reset();
 }
 
-/// verifyRemoved - Verify that the specified instruction does not occur in our
+/// Verify that the specified instruction does not occur in our
 /// internal data structures.
 void GVN::verifyRemoved(const Instruction *Inst) const {
   VN.verifyRemoved(Inst);
@@ -2714,11 +2707,10 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
   }
 }
 
-// BB is declared dead, which implied other blocks become dead as well. This
-// function is to add all these blocks to "DeadBlocks". For the dead blocks'
-// live successors, update their phi nodes by replacing the operands
-// corresponding to dead blocks with UndefVal.
-//
+/// BB is declared dead, which implied other blocks become dead as well. This
+/// function is to add all these blocks to "DeadBlocks". For the dead blocks'
+/// live successors, update their phi nodes by replacing the operands
+/// corresponding to dead blocks with UndefVal.
 void GVN::addDeadBlock(BasicBlock *BB) {
   SmallVector<BasicBlock *, 4> NewDead;
   SmallSetVector<BasicBlock *, 4> DF;
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index c01f57f..600589c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -44,7 +45,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
@@ -73,7 +73,6 @@ namespace {
     LoopInfo                  *LI;
     ScalarEvolution           *SE;
     DominatorTree             *DT;
-    const DataLayout          *DL;
     TargetLibraryInfo         *TLI;
     const TargetTransformInfo *TTI;
 
@@ -82,8 +81,8 @@ namespace {
   public:
 
     static char ID; // Pass identification, replacement for typeid
-    IndVarSimplify() : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr),
-                       DL(nullptr), Changed(false) {
+    IndVarSimplify()
+        : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
       initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
     }
 
@@ -91,7 +90,7 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<DominatorTreeWrapperPass>();
-      AU.addRequired<LoopInfo>();
+      AU.addRequired<LoopInfoWrapperPass>();
       AU.addRequired<ScalarEvolution>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
@@ -126,7 +125,7 @@ char IndVarSimplify::ID = 0;
 INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
                 "Induction Variable Simplification", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -622,17 +621,6 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
           PN->eraseFromParent();
         }
       }
-
-      // If we were unable to completely replace the PHI node, clone the PHI
-      // and delete the original one. This lets IVUsers and any other maps
-      // purge the original user from their records.
-      if (!LCSSASafePhiForRAUW) {
-        PHINode *NewPN = cast<PHINode>(PN->clone());
-        NewPN->takeName(PN);
-        NewPN->insertBefore(PN);
-        PN->replaceAllUsesWith(NewPN);
-        PN->eraseFromParent();
-      }
     }
   }
 
@@ -663,14 +651,14 @@ namespace {
 /// extended by this sign or zero extend operation. This is used to determine
 /// the final width of the IV before actually widening it.
 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
-                        const DataLayout *DL, const TargetTransformInfo *TTI) {
+                        const TargetTransformInfo *TTI) {
   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
     return;
 
   Type *Ty = Cast->getType();
   uint64_t Width = SE->getTypeSizeInBits(Ty);
-  if (DL && !DL->isLegalInteger(Width))
+  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
     return;
 
   // Cast is either an sext or zext up to this point.
@@ -916,8 +904,8 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
   return AddRec;
 }
 
-/// GetWideRecurrence - Is this instruction potentially interesting from
-/// IVUsers' perspective after widening it's type? In other words, can the
+/// GetWideRecurrence - Is this instruction potentially interesting for further
+/// simplification after widening it's type? In other words, can the
 /// extend be safely hoisted out of the loop with SCEV reducing the value to a
 /// recurrence on the same loop. If so, return the sign or zero extended
 /// recurrence. Otherwise return NULL.
@@ -1201,7 +1189,6 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
 namespace {
   class IndVarSimplifyVisitor : public IVVisitor {
     ScalarEvolution *SE;
-    const DataLayout *DL;
     const TargetTransformInfo *TTI;
     PHINode *IVPhi;
 
@@ -1209,9 +1196,9 @@ namespace {
     WideIVInfo WI;
 
     IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
-                          const DataLayout *DL, const TargetTransformInfo *TTI,
+                          const TargetTransformInfo *TTI,
                           const DominatorTree *DTree)
-        : SE(SCEV), DL(DL), TTI(TTI), IVPhi(IV) {
+        : SE(SCEV), TTI(TTI), IVPhi(IV) {
       DT = DTree;
       WI.NarrowIV = IVPhi;
       if (ReduceLiveIVs)
@@ -1219,9 +1206,7 @@ namespace {
     }
 
     // Implement the interface used by simplifyUsersOfIV.
-    void visitCast(CastInst *Cast) override {
-      visitIVCast(Cast, WI, SE, DL, TTI);
-    }
+    void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
   };
 }
 
@@ -1255,7 +1240,7 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
       PHINode *CurrIV = LoopPhis.pop_back_val();
 
       // Information about sign/zero extensions of CurrIV.
-      IndVarSimplifyVisitor Visitor(CurrIV, SE, DL, TTI, DT);
+      IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
 
       Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
 
@@ -1278,55 +1263,6 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
 //  LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
 //===----------------------------------------------------------------------===//
 
-/// Check for expressions that ScalarEvolution generates to compute
-/// BackedgeTakenInfo. If these expressions have not been reduced, then
-/// expanding them may incur additional cost (albeit in the loop preheader).
-static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
-                                SmallPtrSetImpl<const SCEV*> &Processed,
-                                ScalarEvolution *SE) {
-  if (!Processed.insert(S).second)
-    return false;
-
-  // If the backedge-taken count is a UDiv, it's very likely a UDiv that
-  // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
-  // precise expression, rather than a UDiv from the user's code. If we can't
-  // find a UDiv in the code with some simple searching, assume the former and
-  // forego rewriting the loop.
-  if (isa<SCEVUDivExpr>(S)) {
-    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
-    if (!OrigCond) return true;
-    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
-    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
-    if (R != S) {
-      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
-      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
-      if (L != S)
-        return true;
-    }
-  }
-
-  // Recurse past add expressions, which commonly occur in the
-  // BackedgeTakenCount. They may already exist in program code, and if not,
-  // they are not too expensive rematerialize.
-  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
-    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-         I != E; ++I) {
-      if (isHighCostExpansion(*I, BI, Processed, SE))
-        return true;
-    }
-    return false;
-  }
-
-  // HowManyLessThans uses a Max expression whenever the loop is not guarded by
-  // the exit condition.
-  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
-    return true;
-
-  // If we haven't recognized an expensive SCEV pattern, assume it's an
-  // expression produced by program code.
-  return false;
-}
-
 /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
 /// count expression can be safely and cheaply expanded into an instruction
 /// sequence that can be used by LinearFunctionTestReplace.
@@ -1340,7 +1276,8 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
 /// However, we don't yet have a strong motivation for converting loop tests
 /// into inequality tests.
-static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
+                                        SCEVExpander &Rewriter) {
   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
       BackedgeTakenCount->isZero())
@@ -1350,12 +1287,10 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
     return false;
 
   // Can't rewrite non-branch yet.
-  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
-  if (!BI)
+  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
     return false;
 
-  SmallPtrSet<const SCEV*, 8> Processed;
-  if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
+  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
     return false;
 
   return true;
@@ -1521,9 +1456,8 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
 /// This is difficult in general for SCEV because of potential overflow. But we
 /// could at least handle constant BECounts.
-static PHINode *
-FindLoopCounter(Loop *L, const SCEV *BECount,
-                ScalarEvolution *SE, DominatorTree *DT, const DataLayout *DL) {
+static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
+                                ScalarEvolution *SE, DominatorTree *DT) {
   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
 
   Value *Cond =
@@ -1552,7 +1486,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
     // AR may not be a narrower type, or we may never exit.
     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
-    if (PhiWidth < BCWidth || (DL && !DL->isLegalInteger(PhiWidth)))
+    if (PhiWidth < BCWidth ||
+        !L->getHeader()->getModule()->getDataLayout().isLegalInteger(PhiWidth))
       continue;
 
     const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
@@ -1641,7 +1576,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
            && "unit stride pointer IV must be i8*");
 
     IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
-    return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit");
+    return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
   }
   else {
     // In any other case, convert both IVInit and IVCount to integers before
@@ -1695,7 +1630,7 @@ LinearFunctionTestReplace(Loop *L,
                           const SCEV *BackedgeTakenCount,
                           PHINode *IndVar,
                           SCEVExpander &Rewriter) {
-  assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+  assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
 
   // Initialize CmpIndVar and IVCount to their preincremented values.
   Value *CmpIndVar = IndVar;
@@ -1705,51 +1640,15 @@ LinearFunctionTestReplace(Loop *L,
   // compare against the post-incremented value, otherwise we must compare
   // against the preincremented value.
   if (L->getExitingBlock() == L->getLoopLatch()) {
+    // Add one to the "backedge-taken" count to get the trip count.
+    // This addition may overflow, which is valid as long as the comparison is
+    // truncated to BackedgeTakenCount->getType().
+    IVCount = SE->getAddExpr(BackedgeTakenCount,
+                             SE->getConstant(BackedgeTakenCount->getType(), 1));
     // The BackedgeTaken expression contains the number of times that the
     // backedge branches to the loop header.  This is one less than the
     // number of times the loop executes, so use the incremented indvar.
-    llvm::Value *IncrementedIndvar =
-        IndVar->getIncomingValueForBlock(L->getExitingBlock());
-    const auto *IncrementedIndvarSCEV =
-        cast<SCEVAddRecExpr>(SE->getSCEV(IncrementedIndvar));
-    // It is unsafe to use the incremented indvar if it has a wrapping flag, we
-    // don't want to compare against a poison value.  Check the SCEV that
-    // corresponds to the incremented indvar, the SCEVExpander will only insert
-    // flags in the IR if the SCEV originally had wrapping flags.
-    // FIXME: In theory, SCEV could drop flags even though they exist in IR.
-    // A more robust solution would involve getting a new expression for
-    // CmpIndVar by applying non-NSW/NUW AddExprs.
-    auto WrappingFlags =
-        ScalarEvolution::setFlags(SCEV::FlagNUW, SCEV::FlagNSW);
-    const SCEV *IVInit = IncrementedIndvarSCEV->getStart();
-    if (SE->getTypeSizeInBits(IVInit->getType()) >
-        SE->getTypeSizeInBits(IVCount->getType()))
-      IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
-    unsigned BitWidth = SE->getTypeSizeInBits(IVCount->getType());
-    Type *WideTy = IntegerType::get(SE->getContext(), BitWidth + 1);
-    // Check if InitIV + BECount+1 requires sign/zero extension.
-    // If not, clear the corresponding flag from WrappingFlags because it is not
-    // necessary for those flags in the IncrementedIndvarSCEV expression.
-    if (SE->getSignExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount),
-                              WideTy) ==
-        SE->getAddExpr(SE->getSignExtendExpr(IVInit, WideTy),
-                       SE->getSignExtendExpr(BackedgeTakenCount, WideTy)))
-      WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNSW);
-    if (SE->getZeroExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount),
-                              WideTy) ==
-        SE->getAddExpr(SE->getZeroExtendExpr(IVInit, WideTy),
-                       SE->getZeroExtendExpr(BackedgeTakenCount, WideTy)))
-      WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNUW);
-    if (!ScalarEvolution::maskFlags(IncrementedIndvarSCEV->getNoWrapFlags(),
-                                    WrappingFlags)) {
-      // Add one to the "backedge-taken" count to get the trip count.
-      // This addition may overflow, which is valid as long as the comparison is
-      // truncated to BackedgeTakenCount->getType().
-      IVCount =
-          SE->getAddExpr(BackedgeTakenCount,
-                         SE->getConstant(BackedgeTakenCount->getType(), 1));
-      CmpIndVar = IncrementedIndvar;
-    }
+    CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
   }
 
   Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
@@ -1929,13 +1828,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (!L->isLoopSimplifyForm())
     return false;
 
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   SE = &getAnalysis<ScalarEvolution>();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-  TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
-  TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+  TLI = TLIP ? &TLIP->getTLI() : nullptr;
+  auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+  TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
+  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
 
   DeadInsts.clear();
   Changed = false;
@@ -1947,7 +1847,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
 
   // Create a rewriter object which we'll use to transform the code with.
-  SCEVExpander Rewriter(*SE, "indvars");
+  SCEVExpander Rewriter(*SE, DL, "indvars");
 #ifndef NDEBUG
   Rewriter.setDebugType(DEBUG_TYPE);
 #endif
@@ -1975,8 +1875,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // If we have a trip count expression, rewrite the loop's exit condition
   // using it.  We can currently only handle loops with a single exit.
-  if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
-    PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, DL);
+  if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {
+    PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
     if (IndVar) {
       // Check preconditions for proper SCEVExpander operation. SCEV does not
       // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
new file mode 100644
index 0000000..cbdacad
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -0,0 +1,1495 @@
+//===-- InductiveRangeCheckElimination.cpp - ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// The InductiveRangeCheckElimination pass splits a loop's iteration space into
+// three disjoint ranges.  It does that in a way such that the loop running in
+// the middle loop provably does not need range checks. As an example, it will
+// convert
+//
+//   len = < known positive >
+//   for (i = 0; i < n; i++) {
+//     if (0 <= i && i < len) {
+//       do_something();
+//     } else {
+//       throw_out_of_bounds();
+//     }
+//   }
+//
+// to
+//
+//   len = < known positive >
+//   limit = smin(n, len)
+//   // no first segment
+//   for (i = 0; i < limit; i++) {
+//     if (0 <= i && i < len) { // this check is fully redundant
+//       do_something();
+//     } else {
+//       throw_out_of_bounds();
+//     }
+//   }
+//   for (i = limit; i < n; i++) {
+//     if (0 <= i && i < len) {
+//       do_something();
+//     } else {
+//       throw_out_of_bounds();
+//     }
+//   }
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <array>
+
+using namespace llvm;
+
+static cl::opt<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden,
+                                        cl::init(64));
+
+static cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden,
+                                       cl::init(false));
+
+static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
+                                      cl::init(false));
+
+static cl::opt<int> MaxExitProbReciprocal("irce-max-exit-prob-reciprocal",
+                                          cl::Hidden, cl::init(10));
+
+#define DEBUG_TYPE "irce"
+
+namespace {
+
+/// An inductive range check is conditional branch in a loop with
+///
+///  1. a very cold successor (i.e. the branch jumps to that successor very
+///     rarely)
+///
+///  and
+///
+///  2. a condition that is provably true for some contiguous range of values
+///     taken by the containing loop's induction variable.
+///
+class InductiveRangeCheck {
+  // Classifies a range check
+  enum RangeCheckKind : unsigned {
+    // Range check of the form "0 <= I".
+    RANGE_CHECK_LOWER = 1,
+
+    // Range check of the form "I < L" where L is known positive.
+    RANGE_CHECK_UPPER = 2,
+
+    // The logical and of the RANGE_CHECK_LOWER and RANGE_CHECK_UPPER
+    // conditions.
+    RANGE_CHECK_BOTH = RANGE_CHECK_LOWER | RANGE_CHECK_UPPER,
+
+    // Unrecognized range check condition.
+    RANGE_CHECK_UNKNOWN = (unsigned)-1
+  };
+
+  static const char *rangeCheckKindToStr(RangeCheckKind);
+
+  const SCEV *Offset;
+  const SCEV *Scale;
+  Value *Length;
+  BranchInst *Branch;
+  RangeCheckKind Kind;
+
+  static RangeCheckKind parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
+                                            ScalarEvolution &SE, Value *&Index,
+                                            Value *&Length);
+
+  static InductiveRangeCheck::RangeCheckKind
+  parseRangeCheck(Loop *L, ScalarEvolution &SE, Value *Condition,
+                  const SCEV *&Index, Value *&UpperLimit);
+
+  InductiveRangeCheck() :
+    Offset(nullptr), Scale(nullptr), Length(nullptr), Branch(nullptr) { }
+
+public:
+  const SCEV *getOffset() const { return Offset; }
+  const SCEV *getScale() const { return Scale; }
+  Value *getLength() const { return Length; }
+
+  void print(raw_ostream &OS) const {
+    OS << "InductiveRangeCheck:\n";
+    OS << "  Kind: " << rangeCheckKindToStr(Kind) << "\n";
+    OS << "  Offset: ";
+    Offset->print(OS);
+    OS << "  Scale: ";
+    Scale->print(OS);
+    OS << "  Length: ";
+    if (Length)
+      Length->print(OS);
+    else
+      OS << "(null)";
+    OS << "\n  Branch: ";
+    getBranch()->print(OS);
+    OS << "\n";
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  void dump() {
+    print(dbgs());
+  }
+#endif
+
+  BranchInst *getBranch() const { return Branch; }
+
+  /// Represents an signed integer range [Range.getBegin(), Range.getEnd()).  If
+  /// R.getEnd() sle R.getBegin(), then R denotes the empty range.
+
+  class Range {
+    const SCEV *Begin;
+    const SCEV *End;
+
+  public:
+    Range(const SCEV *Begin, const SCEV *End) : Begin(Begin), End(End) {
+      assert(Begin->getType() == End->getType() && "ill-typed range!");
+    }
+
+    Type *getType() const { return Begin->getType(); }
+    const SCEV *getBegin() const { return Begin; }
+    const SCEV *getEnd() const { return End; }
+  };
+
+  typedef SpecificBumpPtrAllocator<InductiveRangeCheck> AllocatorTy;
+
+  /// This is the value the condition of the branch needs to evaluate to for the
+  /// branch to take the hot successor (see (1) above).
+  bool getPassingDirection() { return true; }
+
+  /// Computes a range for the induction variable (IndVar) in which the range
+  /// check is redundant and can be constant-folded away.  The induction
+  /// variable is not required to be the canonical {0,+,1} induction variable.
+  Optional<Range> computeSafeIterationSpace(ScalarEvolution &SE,
+                                            const SCEVAddRecExpr *IndVar,
+                                            IRBuilder<> &B) const;
+
+  /// Create an inductive range check out of BI if possible, else return
+  /// nullptr.
+  static InductiveRangeCheck *create(AllocatorTy &Alloc, BranchInst *BI,
+                                     Loop *L, ScalarEvolution &SE,
+                                     BranchProbabilityInfo &BPI);
+};
+
+class InductiveRangeCheckElimination : public LoopPass {
+  InductiveRangeCheck::AllocatorTy Allocator;
+
+public:
+  static char ID;
+  InductiveRangeCheckElimination() : LoopPass(ID) {
+    initializeInductiveRangeCheckEliminationPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addRequiredID(LoopSimplifyID);
+    AU.addRequiredID(LCSSAID);
+    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<BranchProbabilityInfo>();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+char InductiveRangeCheckElimination::ID = 0;
+}
+
+INITIALIZE_PASS(InductiveRangeCheckElimination, "irce",
+                "Inductive range check elimination", false, false)
+
+const char *InductiveRangeCheck::rangeCheckKindToStr(
+    InductiveRangeCheck::RangeCheckKind RCK) {
+  switch (RCK) {
+  case InductiveRangeCheck::RANGE_CHECK_UNKNOWN:
+    return "RANGE_CHECK_UNKNOWN";
+
+  case InductiveRangeCheck::RANGE_CHECK_UPPER:
+    return "RANGE_CHECK_UPPER";
+
+  case InductiveRangeCheck::RANGE_CHECK_LOWER:
+    return "RANGE_CHECK_LOWER";
+
+  case InductiveRangeCheck::RANGE_CHECK_BOTH:
+    return "RANGE_CHECK_BOTH";
+  }
+
+  llvm_unreachable("unknown range check type!");
+}
+
+/// Parse a single ICmp instruction, `ICI`, into a range check.  If `ICI`
+/// cannot
+/// be interpreted as a range check, return `RANGE_CHECK_UNKNOWN` and set
+/// `Index` and `Length` to `nullptr`.  Otherwise set `Index` to the value
+/// being
+/// range checked, and set `Length` to the upper limit `Index` is being range
+/// checked with if (and only if) the range check type is stronger or equal to
+/// RANGE_CHECK_UPPER.
+///
+InductiveRangeCheck::RangeCheckKind
+InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
+                                         ScalarEvolution &SE, Value *&Index,
+                                         Value *&Length) {
+
+  auto IsNonNegativeAndNotLoopVarying = [&SE, L](Value *V) {
+    const SCEV *S = SE.getSCEV(V);
+    if (isa<SCEVCouldNotCompute>(S))
+      return false;
+
+    return SE.getLoopDisposition(S, L) == ScalarEvolution::LoopInvariant &&
+           SE.isKnownNonNegative(S);
+  };
+
+  using namespace llvm::PatternMatch;
+
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+  Value *LHS = ICI->getOperand(0);
+  Value *RHS = ICI->getOperand(1);
+
+  switch (Pred) {
+  default:
+    return RANGE_CHECK_UNKNOWN;
+
+  case ICmpInst::ICMP_SLE:
+    std::swap(LHS, RHS);
+  // fallthrough
+  case ICmpInst::ICMP_SGE:
+    if (match(RHS, m_ConstantInt<0>())) {
+      Index = LHS;
+      return RANGE_CHECK_LOWER;
+    }
+    return RANGE_CHECK_UNKNOWN;
+
+  case ICmpInst::ICMP_SLT:
+    std::swap(LHS, RHS);
+  // fallthrough
+  case ICmpInst::ICMP_SGT:
+    if (match(RHS, m_ConstantInt<-1>())) {
+      Index = LHS;
+      return RANGE_CHECK_LOWER;
+    }
+
+    if (IsNonNegativeAndNotLoopVarying(LHS)) {
+      Index = RHS;
+      Length = LHS;
+      return RANGE_CHECK_UPPER;
+    }
+    return RANGE_CHECK_UNKNOWN;
+
+  case ICmpInst::ICMP_ULT:
+    std::swap(LHS, RHS);
+  // fallthrough
+  case ICmpInst::ICMP_UGT:
+    if (IsNonNegativeAndNotLoopVarying(LHS)) {
+      Index = RHS;
+      Length = LHS;
+      return RANGE_CHECK_BOTH;
+    }
+    return RANGE_CHECK_UNKNOWN;
+  }
+
+  llvm_unreachable("default clause returns!");
+}
+
+/// Parses an arbitrary condition into a range check.  `Length` is set only if
+/// the range check is recognized to be `RANGE_CHECK_UPPER` or stronger.
+InductiveRangeCheck::RangeCheckKind
+InductiveRangeCheck::parseRangeCheck(Loop *L, ScalarEvolution &SE,
+                                     Value *Condition, const SCEV *&Index,
+                                     Value *&Length) {
+  using namespace llvm::PatternMatch;
+
+  Value *A = nullptr;
+  Value *B = nullptr;
+
+  if (match(Condition, m_And(m_Value(A), m_Value(B)))) {
+    Value *IndexA = nullptr, *IndexB = nullptr;
+    Value *LengthA = nullptr, *LengthB = nullptr;
+    ICmpInst *ICmpA = dyn_cast<ICmpInst>(A), *ICmpB = dyn_cast<ICmpInst>(B);
+
+    if (!ICmpA || !ICmpB)
+      return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+    auto RCKindA = parseRangeCheckICmp(L, ICmpA, SE, IndexA, LengthA);
+    auto RCKindB = parseRangeCheckICmp(L, ICmpB, SE, IndexB, LengthB);
+
+    if (RCKindA == InductiveRangeCheck::RANGE_CHECK_UNKNOWN ||
+        RCKindB == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
+      return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+    if (IndexA != IndexB)
+      return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+    if (LengthA != nullptr && LengthB != nullptr && LengthA != LengthB)
+      return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+    Index = SE.getSCEV(IndexA);
+    if (isa<SCEVCouldNotCompute>(Index))
+      return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+    Length = LengthA == nullptr ? LengthB : LengthA;
+
+    return (InductiveRangeCheck::RangeCheckKind)(RCKindA | RCKindB);
+  }
+
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
+    Value *IndexVal = nullptr;
+
+    auto RCKind = parseRangeCheckICmp(L, ICI, SE, IndexVal, Length);
+
+    if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
+      return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+    Index = SE.getSCEV(IndexVal);
+    if (isa<SCEVCouldNotCompute>(Index))
+      return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+    return RCKind;
+  }
+
+  return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+}
+
+
+InductiveRangeCheck *
+InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI,
+                            Loop *L, ScalarEvolution &SE,
+                            BranchProbabilityInfo &BPI) {
+
+  if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
+    return nullptr;
+
+  BranchProbability LikelyTaken(15, 16);
+
+  if (BPI.getEdgeProbability(BI->getParent(), (unsigned) 0) < LikelyTaken)
+    return nullptr;
+
+  Value *Length = nullptr;
+  const SCEV *IndexSCEV = nullptr;
+
+  auto RCKind = InductiveRangeCheck::parseRangeCheck(L, SE, BI->getCondition(),
+                                                     IndexSCEV, Length);
+
+  if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
+    return nullptr;
+
+  assert(IndexSCEV && "contract with SplitRangeCheckCondition!");
+  assert((!(RCKind & InductiveRangeCheck::RANGE_CHECK_UPPER) || Length) &&
+         "contract with SplitRangeCheckCondition!");
+
+  const SCEVAddRecExpr *IndexAddRec = dyn_cast<SCEVAddRecExpr>(IndexSCEV);
+  bool IsAffineIndex =
+      IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine();
+
+  if (!IsAffineIndex)
+    return nullptr;
+
+  InductiveRangeCheck *IRC = new (A.Allocate()) InductiveRangeCheck;
+  IRC->Length = Length;
+  IRC->Offset = IndexAddRec->getStart();
+  IRC->Scale = IndexAddRec->getStepRecurrence(SE);
+  IRC->Branch = BI;
+  IRC->Kind = RCKind;
+  return IRC;
+}
+
+namespace {
+
+// Keeps track of the structure of a loop.  This is similar to llvm::Loop,
+// except that it is more lightweight and can track the state of a loop through
+// changing and potentially invalid IR.  This structure also formalizes the
+// kinds of loops we can deal with -- ones that have a single latch that is also
+// an exiting block *and* have a canonical induction variable.
+struct LoopStructure {
+  const char *Tag;
+
+  BasicBlock *Header;
+  BasicBlock *Latch;
+
+  // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th
+  // successor is `LatchExit', the exit block of the loop.
+  BranchInst *LatchBr;
+  BasicBlock *LatchExit;
+  unsigned LatchBrExitIdx;
+
+  Value *IndVarNext;
+  Value *IndVarStart;
+  Value *LoopExitAt;
+  bool IndVarIncreasing;
+
+  LoopStructure()
+      : Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr),
+        LatchExit(nullptr), LatchBrExitIdx(-1), IndVarNext(nullptr),
+        IndVarStart(nullptr), LoopExitAt(nullptr), IndVarIncreasing(false) {}
+
+  template <typename M> LoopStructure map(M Map) const {
+    LoopStructure Result;
+    Result.Tag = Tag;
+    Result.Header = cast<BasicBlock>(Map(Header));
+    Result.Latch = cast<BasicBlock>(Map(Latch));
+    Result.LatchBr = cast<BranchInst>(Map(LatchBr));
+    Result.LatchExit = cast<BasicBlock>(Map(LatchExit));
+    Result.LatchBrExitIdx = LatchBrExitIdx;
+    Result.IndVarNext = Map(IndVarNext);
+    Result.IndVarStart = Map(IndVarStart);
+    Result.LoopExitAt = Map(LoopExitAt);
+    Result.IndVarIncreasing = IndVarIncreasing;
+    return Result;
+  }
+
+  static Optional<LoopStructure> parseLoopStructure(ScalarEvolution &,
+                                                    BranchProbabilityInfo &BPI,
+                                                    Loop &,
+                                                    const char *&);
+};
+
+/// This class is used to constrain loops to run within a given iteration space.
+/// The algorithm this class implements is given a Loop and a range [Begin,
+/// End).  The algorithm then tries to break out a "main loop" out of the loop
+/// it is given in a way that the "main loop" runs with the induction variable
+/// in a subset of [Begin, End).  The algorithm emits appropriate pre and post
+/// loops to run any remaining iterations.  The pre loop runs any iterations in
+/// which the induction variable is < Begin, and the post loop runs any
+/// iterations in which the induction variable is >= End.
+///
+class LoopConstrainer {
+  // The representation of a clone of the original loop we started out with.
+  struct ClonedLoop {
+    // The cloned blocks
+    std::vector<BasicBlock *> Blocks;
+
+    // `Map` maps values in the clonee into values in the cloned version
+    ValueToValueMapTy Map;
+
+    // An instance of `LoopStructure` for the cloned loop
+    LoopStructure Structure;
+  };
+
+  // Result of rewriting the range of a loop.  See changeIterationSpaceEnd for
+  // more details on what these fields mean.
+  struct RewrittenRangeInfo {
+    BasicBlock *PseudoExit;
+    BasicBlock *ExitSelector;
+    std::vector<PHINode *> PHIValuesAtPseudoExit;
+    PHINode *IndVarEnd;
+
+    RewrittenRangeInfo()
+        : PseudoExit(nullptr), ExitSelector(nullptr), IndVarEnd(nullptr) {}
+  };
+
+  // Calculated subranges we restrict the iteration space of the main loop to.
+  // See the implementation of `calculateSubRanges' for more details on how
+  // these fields are computed.  `LowLimit` is None if there is no restriction
+  // on low end of the restricted iteration space of the main loop.  `HighLimit`
+  // is None if there is no restriction on high end of the restricted iteration
+  // space of the main loop.
+
+  struct SubRanges {
+    Optional<const SCEV *> LowLimit;
+    Optional<const SCEV *> HighLimit;
+  };
+
+  // A utility function that does a `replaceUsesOfWith' on the incoming block
+  // set of a `PHINode' -- replaces instances of `Block' in the `PHINode's
+  // incoming block list with `ReplaceBy'.
+  static void replacePHIBlock(PHINode *PN, BasicBlock *Block,
+                              BasicBlock *ReplaceBy);
+
+  // Compute a safe set of limits for the main loop to run in -- effectively the
+  // intersection of `Range' and the iteration space of the original loop.
+  // Return None if unable to compute the set of subranges.
+  //
+  Optional<SubRanges> calculateSubRanges() const;
+
+  // Clone `OriginalLoop' and return the result in CLResult.  The IR after
+  // running `cloneLoop' is well formed except for the PHI nodes in CLResult --
+  // the PHI nodes say that there is an incoming edge from `OriginalPreheader`
+  // but there is no such edge.
+  //
+  void cloneLoop(ClonedLoop &CLResult, const char *Tag) const;
+
+  // Rewrite the iteration space of the loop denoted by (LS, Preheader). The
+  // iteration space of the rewritten loop ends at ExitLoopAt.  The start of the
+  // iteration space is not changed.  `ExitLoopAt' is assumed to be slt
+  // `OriginalHeaderCount'.
+  //
+  // If there are iterations left to execute, control is made to jump to
+  // `ContinuationBlock', otherwise they take the normal loop exit.  The
+  // returned `RewrittenRangeInfo' object is populated as follows:
+  //
+  //  .PseudoExit is a basic block that unconditionally branches to
+  //      `ContinuationBlock'.
+  //
+  //  .ExitSelector is a basic block that decides, on exit from the loop,
+  //      whether to branch to the "true" exit or to `PseudoExit'.
+  //
+  //  .PHIValuesAtPseudoExit are PHINodes in `PseudoExit' that compute the value
+  //      for each PHINode in the loop header on taking the pseudo exit.
+  //
+  // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate
+  // preheader because it is made to branch to the loop header only
+  // conditionally.
+  //
+  RewrittenRangeInfo
+  changeIterationSpaceEnd(const LoopStructure &LS, BasicBlock *Preheader,
+                          Value *ExitLoopAt,
+                          BasicBlock *ContinuationBlock) const;
+
+  // The loop denoted by `LS' has `OldPreheader' as its preheader.  This
+  // function creates a new preheader for `LS' and returns it.
+  //
+  BasicBlock *createPreheader(const LoopStructure &LS, BasicBlock *OldPreheader,
+                              const char *Tag) const;
+
+  // `ContinuationBlockAndPreheader' was the continuation block for some call to
+  // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'.
+  // This function rewrites the PHI nodes in `LS.Header' to start with the
+  // correct value.
+  void rewriteIncomingValuesForPHIs(
+      LoopStructure &LS, BasicBlock *ContinuationBlockAndPreheader,
+      const LoopConstrainer::RewrittenRangeInfo &RRI) const;
+
+  // Even though we do not preserve any passes at this time, we at least need to
+  // keep the parent loop structure consistent.  The `LPPassManager' seems to
+  // verify this after running a loop pass.  This function adds the list of
+  // blocks denoted by BBs to this loops parent loop if required.
+  void addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs);
+
+  // Some global state.
+  Function &F;
+  LLVMContext &Ctx;
+  ScalarEvolution &SE;
+
+  // Information about the original loop we started out with.
+  Loop &OriginalLoop;
+  LoopInfo &OriginalLoopInfo;
+  const SCEV *LatchTakenCount;
+  BasicBlock *OriginalPreheader;
+
+  // The preheader of the main loop.  This may or may not be different from
+  // `OriginalPreheader'.
+  BasicBlock *MainLoopPreheader;
+
+  // The range we need to run the main loop in.
+  InductiveRangeCheck::Range Range;
+
+  // The structure of the main loop (see comment at the beginning of this class
+  // for a definition)
+  LoopStructure MainLoopStructure;
+
+public:
+  LoopConstrainer(Loop &L, LoopInfo &LI, const LoopStructure &LS,
+                  ScalarEvolution &SE, InductiveRangeCheck::Range R)
+      : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()),
+        SE(SE), OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr),
+        OriginalPreheader(nullptr), MainLoopPreheader(nullptr), Range(R),
+        MainLoopStructure(LS) {}
+
+  // Entry point for the algorithm.  Returns true on success.
+  bool run();
+};
+
+}
+
+void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
+                                      BasicBlock *ReplaceBy) {
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingBlock(i) == Block)
+      PN->setIncomingBlock(i, ReplaceBy);
+}
+
+static bool CanBeSMax(ScalarEvolution &SE, const SCEV *S) {
+  APInt SMax =
+      APInt::getSignedMaxValue(cast<IntegerType>(S->getType())->getBitWidth());
+  return SE.getSignedRange(S).contains(SMax) &&
+         SE.getUnsignedRange(S).contains(SMax);
+}
+
+static bool CanBeSMin(ScalarEvolution &SE, const SCEV *S) {
+  APInt SMin =
+      APInt::getSignedMinValue(cast<IntegerType>(S->getType())->getBitWidth());
+  return SE.getSignedRange(S).contains(SMin) &&
+         SE.getUnsignedRange(S).contains(SMin);
+}
+
+Optional<LoopStructure>
+LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BPI,
+                                  Loop &L, const char *&FailureReason) {
+  assert(L.isLoopSimplifyForm() && "should follow from addRequired<>");
+
+  BasicBlock *Latch = L.getLoopLatch();
+  if (!L.isLoopExiting(Latch)) {
+    FailureReason = "no loop latch";
+    return None;
+  }
+
+  BasicBlock *Header = L.getHeader();
+  BasicBlock *Preheader = L.getLoopPreheader();
+  if (!Preheader) {
+    FailureReason = "no preheader";
+    return None;
+  }
+
+  BranchInst *LatchBr = dyn_cast<BranchInst>(&*Latch->rbegin());
+  if (!LatchBr || LatchBr->isUnconditional()) {
+    FailureReason = "latch terminator not conditional branch";
+    return None;
+  }
+
+  unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0;
+
+  BranchProbability ExitProbability =
+    BPI.getEdgeProbability(LatchBr->getParent(), LatchBrExitIdx);
+
+  if (ExitProbability > BranchProbability(1, MaxExitProbReciprocal)) {
+    FailureReason = "short running loop, not profitable";
+    return None;
+  }
+
+  ICmpInst *ICI = dyn_cast<ICmpInst>(LatchBr->getCondition());
+  if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) {
+    FailureReason = "latch terminator branch not conditional on integral icmp";
+    return None;
+  }
+
+  const SCEV *LatchCount = SE.getExitCount(&L, Latch);
+  if (isa<SCEVCouldNotCompute>(LatchCount)) {
+    FailureReason = "could not compute latch count";
+    return None;
+  }
+
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+  Value *LeftValue = ICI->getOperand(0);
+  const SCEV *LeftSCEV = SE.getSCEV(LeftValue);
+  IntegerType *IndVarTy = cast<IntegerType>(LeftValue->getType());
+
+  Value *RightValue = ICI->getOperand(1);
+  const SCEV *RightSCEV = SE.getSCEV(RightValue);
+
+  // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence.
+  if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
+    if (isa<SCEVAddRecExpr>(RightSCEV)) {
+      std::swap(LeftSCEV, RightSCEV);
+      std::swap(LeftValue, RightValue);
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+    } else {
+      FailureReason = "no add recurrences in the icmp";
+      return None;
+    }
+  }
+
+  auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) {
+    if (AR->getNoWrapFlags(SCEV::FlagNSW))
+      return true;
+
+    IntegerType *Ty = cast<IntegerType>(AR->getType());
+    IntegerType *WideTy =
+        IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);
+
+    const SCEVAddRecExpr *ExtendAfterOp =
+        dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+    if (ExtendAfterOp) {
+      const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy);
+      const SCEV *ExtendedStep =
+          SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy);
+
+      bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart &&
+                          ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep;
+
+      if (NoSignedWrap)
+        return true;
+    }
+
+    // We may have proved this when computing the sign extension above.
+    return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap;
+  };
+
+  auto IsInductionVar = [&](const SCEVAddRecExpr *AR, bool &IsIncreasing) {
+    if (!AR->isAffine())
+      return false;
+
+    // Currently we only work with induction variables that have been proved to
+    // not wrap.  This restriction can potentially be lifted in the future.
+
+    if (!HasNoSignedWrap(AR))
+      return false;
+
+    if (const SCEVConstant *StepExpr =
+            dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE))) {
+      ConstantInt *StepCI = StepExpr->getValue();
+      if (StepCI->isOne() || StepCI->isMinusOne()) {
+        IsIncreasing = StepCI->isOne();
+        return true;
+      }
+    }
+
+    return false;
+  };
+
+  // `ICI` is interpreted as taking the backedge if the *next* value of the
+  // induction variable satisfies some constraint.
+
+  const SCEVAddRecExpr *IndVarNext = cast<SCEVAddRecExpr>(LeftSCEV);
+  bool IsIncreasing = false;
+  if (!IsInductionVar(IndVarNext, IsIncreasing)) {
+    FailureReason = "LHS in icmp not induction variable";
+    return None;
+  }
+
+  ConstantInt *One = ConstantInt::get(IndVarTy, 1);
+  // TODO: generalize the predicates here to also match their unsigned variants.
+  if (IsIncreasing) {
+    bool FoundExpectedPred =
+        (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) ||
+        (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0);
+
+    if (!FoundExpectedPred) {
+      FailureReason = "expected icmp slt semantically, found something else";
+      return None;
+    }
+
+    if (LatchBrExitIdx == 0) {
+      if (CanBeSMax(SE, RightSCEV)) {
+        // TODO: this restriction is easily removable -- we just have to
+        // remember that the icmp was an slt and not an sle.
+        FailureReason = "limit may overflow when coercing sle to slt";
+        return None;
+      }
+
+      IRBuilder<> B(&*Preheader->rbegin());
+      RightValue = B.CreateAdd(RightValue, One);
+    }
+
+  } else {
+    bool FoundExpectedPred =
+        (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) ||
+        (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0);
+
+    if (!FoundExpectedPred) {
+      FailureReason = "expected icmp sgt semantically, found something else";
+      return None;
+    }
+
+    if (LatchBrExitIdx == 0) {
+      if (CanBeSMin(SE, RightSCEV)) {
+        // TODO: this restriction is easily removable -- we just have to
+        // remember that the icmp was an sgt and not an sge.
+        FailureReason = "limit may overflow when coercing sge to sgt";
+        return None;
+      }
+
+      IRBuilder<> B(&*Preheader->rbegin());
+      RightValue = B.CreateSub(RightValue, One);
+    }
+  }
+
+  const SCEV *StartNext = IndVarNext->getStart();
+  const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE));
+  const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
+
+  BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);
+
+  assert(SE.getLoopDisposition(LatchCount, &L) ==
+             ScalarEvolution::LoopInvariant &&
+         "loop variant exit count doesn't make sense!");
+
+  assert(!L.contains(LatchExit) && "expected an exit block!");
+  const DataLayout &DL = Preheader->getModule()->getDataLayout();
+  Value *IndVarStartV =
+      SCEVExpander(SE, DL, "irce")
+          .expandCodeFor(IndVarStart, IndVarTy, &*Preheader->rbegin());
+  IndVarStartV->setName("indvar.start");
+
+  LoopStructure Result;
+
+  Result.Tag = "main";
+  Result.Header = Header;
+  Result.Latch = Latch;
+  Result.LatchBr = LatchBr;
+  Result.LatchExit = LatchExit;
+  Result.LatchBrExitIdx = LatchBrExitIdx;
+  Result.IndVarStart = IndVarStartV;
+  Result.IndVarNext = LeftValue;
+  Result.IndVarIncreasing = IsIncreasing;
+  Result.LoopExitAt = RightValue;
+
+  FailureReason = nullptr;
+
+  return Result;
+}
+
+Optional<LoopConstrainer::SubRanges>
+LoopConstrainer::calculateSubRanges() const {
+  IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
+
+  if (Range.getType() != Ty)
+    return None;
+
+  LoopConstrainer::SubRanges Result;
+
+  // I think we can be more aggressive here and make this nuw / nsw if the
+  // addition that feeds into the icmp for the latch's terminating branch is nuw
+  // / nsw.  In any case, a wrapping 2's complement addition is safe.
+  ConstantInt *One = ConstantInt::get(Ty, 1);
+  const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart);
+  const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt);
+
+  bool Increasing = MainLoopStructure.IndVarIncreasing;
+
+  // We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the
+  // range of values the induction variable takes.
+
+  const SCEV *Smallest = nullptr, *Greatest = nullptr;
+
+  if (Increasing) {
+    Smallest = Start;
+    Greatest = End;
+  } else {
+    // These two computations may sign-overflow.  Here is why that is okay:
+    //
+    // We know that the induction variable does not sign-overflow on any
+    // iteration except the last one, and it starts at `Start` and ends at
+    // `End`, decrementing by one every time.
+    //
+    //  * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the
+    //    induction variable is decreasing we know that that the smallest value
+    //    the loop body is actually executed with is `INT_SMIN` == `Smallest`.
+    //
+    //  * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`.  In
+    //    that case, `Clamp` will always return `Smallest` and
+    //    [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`)
+    //    will be an empty range.  Returning an empty range is always safe.
+    //
+
+    Smallest = SE.getAddExpr(End, SE.getSCEV(One));
+    Greatest = SE.getAddExpr(Start, SE.getSCEV(One));
+  }
+
+  auto Clamp = [this, Smallest, Greatest](const SCEV *S) {
+    return SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S));
+  };
+
+  // In some cases we can prove that we don't need a pre or post loop
+
+  bool ProvablyNoPreloop =
+      SE.isKnownPredicate(ICmpInst::ICMP_SLE, Range.getBegin(), Smallest);
+  if (!ProvablyNoPreloop)
+    Result.LowLimit = Clamp(Range.getBegin());
+
+  bool ProvablyNoPostLoop =
+      SE.isKnownPredicate(ICmpInst::ICMP_SLE, Greatest, Range.getEnd());
+  if (!ProvablyNoPostLoop)
+    Result.HighLimit = Clamp(Range.getEnd());
+
+  return Result;
+}
+
+void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
+                                const char *Tag) const {
+  for (BasicBlock *BB : OriginalLoop.getBlocks()) {
+    BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F);
+    Result.Blocks.push_back(Clone);
+    Result.Map[BB] = Clone;
+  }
+
+  auto GetClonedValue = [&Result](Value *V) {
+    assert(V && "null values not in domain!");
+    auto It = Result.Map.find(V);
+    if (It == Result.Map.end())
+      return V;
+    return static_cast<Value *>(It->second);
+  };
+
+  Result.Structure = MainLoopStructure.map(GetClonedValue);
+  Result.Structure.Tag = Tag;
+
+  for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) {
+    BasicBlock *ClonedBB = Result.Blocks[i];
+    BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i];
+
+    assert(Result.Map[OriginalBB] == ClonedBB && "invariant!");
+
+    for (Instruction &I : *ClonedBB)
+      RemapInstruction(&I, Result.Map,
+                       RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+
+    // Exit blocks will now have one more predecessor and their PHI nodes need
+    // to be edited to reflect that.  No phi nodes need to be introduced because
+    // the loop is in LCSSA.
+
+    for (auto SBBI = succ_begin(OriginalBB), SBBE = succ_end(OriginalBB);
+         SBBI != SBBE; ++SBBI) {
+
+      if (OriginalLoop.contains(*SBBI))
+        continue; // not an exit block
+
+      for (Instruction &I : **SBBI) {
+        if (!isa<PHINode>(&I))
+          break;
+
+        PHINode *PN = cast<PHINode>(&I);
+        Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB);
+        PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB);
+      }
+    }
+  }
+}
+
+LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
+    const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt,
+    BasicBlock *ContinuationBlock) const {
+
+  // We start with a loop with a single latch:
+  //
+  //    +--------------------+
+  //    |                    |
+  //    |     preheader      |
+  //    |                    |
+  //    +--------+-----------+
+  //             |      ----------------\
+  //             |     /                |
+  //    +--------v----v------+          |
+  //    |                    |          |
+  //    |      header        |          |
+  //    |                    |          |
+  //    +--------------------+          |
+  //                                    |
+  //            .....                   |
+  //                                    |
+  //    +--------------------+          |
+  //    |                    |          |
+  //    |       latch        >----------/
+  //    |                    |
+  //    +-------v------------+
+  //            |
+  //            |
+  //            |   +--------------------+
+  //            |   |                    |
+  //            +--->   original exit    |
+  //                |                    |
+  //                +--------------------+
+  //
+  // We change the control flow to look like
+  //
+  //
+  //    +--------------------+
+  //    |                    |
+  //    |     preheader      >-------------------------+
+  //    |                    |                         |
+  //    +--------v-----------+                         |
+  //             |    /-------------+                  |
+  //             |   /              |                  |
+  //    +--------v--v--------+      |                  |
+  //    |                    |      |                  |
+  //    |      header        |      |   +--------+     |
+  //    |                    |      |   |        |     |
+  //    +--------------------+      |   |  +-----v-----v-----------+
+  //                                |   |  |                       |
+  //                                |   |  |     .pseudo.exit      |
+  //                                |   |  |                       |
+  //                                |   |  +-----------v-----------+
+  //                                |   |              |
+  //            .....               |   |              |
+  //                                |   |     +--------v-------------+
+  //    +--------------------+      |   |     |                      |
+  //    |                    |      |   |     |   ContinuationBlock  |
+  //    |       latch        >------+   |     |                      |
+  //    |                    |          |     +----------------------+
+  //    +---------v----------+          |
+  //              |                     |
+  //              |                     |
+  //              |     +---------------^-----+
+  //              |     |                     |
+  //              +----->    .exit.selector   |
+  //                    |                     |
+  //                    +----------v----------+
+  //                               |
+  //     +--------------------+    |
+  //     |                    |    |
+  //     |   original exit    <----+
+  //     |                    |
+  //     +--------------------+
+  //
+
+  RewrittenRangeInfo RRI;
+
+  auto BBInsertLocation = std::next(Function::iterator(LS.Latch));
+  RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
+                                        &F, BBInsertLocation);
+  RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
+                                      BBInsertLocation);
+
+  BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin());
+  bool Increasing = LS.IndVarIncreasing;
+
+  IRBuilder<> B(PreheaderJump);
+
+  // EnterLoopCond - is it okay to start executing this `LS'?
+  Value *EnterLoopCond = Increasing
+                             ? B.CreateICmpSLT(LS.IndVarStart, ExitSubloopAt)
+                             : B.CreateICmpSGT(LS.IndVarStart, ExitSubloopAt);
+
+  B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
+  PreheaderJump->eraseFromParent();
+
+  LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
+  B.SetInsertPoint(LS.LatchBr);
+  Value *TakeBackedgeLoopCond =
+      Increasing ? B.CreateICmpSLT(LS.IndVarNext, ExitSubloopAt)
+                 : B.CreateICmpSGT(LS.IndVarNext, ExitSubloopAt);
+  Value *CondForBranch = LS.LatchBrExitIdx == 1
+                             ? TakeBackedgeLoopCond
+                             : B.CreateNot(TakeBackedgeLoopCond);
+
+  LS.LatchBr->setCondition(CondForBranch);
+
+  B.SetInsertPoint(RRI.ExitSelector);
+
+  // IterationsLeft - are there any more iterations left, given the original
+  // upper bound on the induction variable?  If not, we branch to the "real"
+  // exit.
+  Value *IterationsLeft = Increasing
+                              ? B.CreateICmpSLT(LS.IndVarNext, LS.LoopExitAt)
+                              : B.CreateICmpSGT(LS.IndVarNext, LS.LoopExitAt);
+  B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
+
+  BranchInst *BranchToContinuation =
+      BranchInst::Create(ContinuationBlock, RRI.PseudoExit);
+
+  // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of
+  // each of the PHI nodes in the loop header.  This feeds into the initial
+  // value of the same PHI nodes if/when we continue execution.
+  for (Instruction &I : *LS.Header) {
+    if (!isa<PHINode>(&I))
+      break;
+
+    PHINode *PN = cast<PHINode>(&I);
+
+    PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy",
+                                      BranchToContinuation);
+
+    NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader);
+    NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch),
+                        RRI.ExitSelector);
+    RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
+  }
+
+  RRI.IndVarEnd = PHINode::Create(LS.IndVarNext->getType(), 2, "indvar.end",
+                                  BranchToContinuation);
+  RRI.IndVarEnd->addIncoming(LS.IndVarStart, Preheader);
+  RRI.IndVarEnd->addIncoming(LS.IndVarNext, RRI.ExitSelector);
+
+  // The latch exit now has a branch from `RRI.ExitSelector' instead of
+  // `LS.Latch'.  The PHI nodes need to be updated to reflect that.
+  for (Instruction &I : *LS.LatchExit) {
+    if (PHINode *PN = dyn_cast<PHINode>(&I))
+      replacePHIBlock(PN, LS.Latch, RRI.ExitSelector);
+    else
+      break;
+  }
+
+  return RRI;
+}
+
+void LoopConstrainer::rewriteIncomingValuesForPHIs(
+    LoopStructure &LS, BasicBlock *ContinuationBlock,
+    const LoopConstrainer::RewrittenRangeInfo &RRI) const {
+
+  unsigned PHIIndex = 0;
+  for (Instruction &I : *LS.Header) {
+    if (!isa<PHINode>(&I))
+      break;
+
+    PHINode *PN = cast<PHINode>(&I);
+
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
+      if (PN->getIncomingBlock(i) == ContinuationBlock)
+        PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
+  }
+
+  LS.IndVarStart = RRI.IndVarEnd;
+}
+
+BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
+                                             BasicBlock *OldPreheader,
+                                             const char *Tag) const {
+
+  BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
+  BranchInst::Create(LS.Header, Preheader);
+
+  for (Instruction &I : *LS.Header) {
+    if (!isa<PHINode>(&I))
+      break;
+
+    PHINode *PN = cast<PHINode>(&I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
+      replacePHIBlock(PN, OldPreheader, Preheader);
+  }
+
+  return Preheader;
+}
+
+void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {
+  Loop *ParentLoop = OriginalLoop.getParentLoop();
+  if (!ParentLoop)
+    return;
+
+  for (BasicBlock *BB : BBs)
+    ParentLoop->addBasicBlockToLoop(BB, OriginalLoopInfo);
+}
+
+bool LoopConstrainer::run() {
+  BasicBlock *Preheader = nullptr;
+  LatchTakenCount = SE.getExitCount(&OriginalLoop, MainLoopStructure.Latch);
+  Preheader = OriginalLoop.getLoopPreheader();
+  assert(!isa<SCEVCouldNotCompute>(LatchTakenCount) && Preheader != nullptr &&
+         "preconditions!");
+
+  OriginalPreheader = Preheader;
+  MainLoopPreheader = Preheader;
+
+  Optional<SubRanges> MaybeSR = calculateSubRanges();
+  if (!MaybeSR.hasValue()) {
+    DEBUG(dbgs() << "irce: could not compute subranges\n");
+    return false;
+  }
+
+  SubRanges SR = MaybeSR.getValue();
+  bool Increasing = MainLoopStructure.IndVarIncreasing;
+  IntegerType *IVTy =
+      cast<IntegerType>(MainLoopStructure.IndVarNext->getType());
+
+  SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce");
+  Instruction *InsertPt = OriginalPreheader->getTerminator();
+
+  // It would have been better to make `PreLoop' and `PostLoop'
+  // `Optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy
+  // constructor.
+  ClonedLoop PreLoop, PostLoop;
+  bool NeedsPreLoop =
+      Increasing ? SR.LowLimit.hasValue() : SR.HighLimit.hasValue();
+  bool NeedsPostLoop =
+      Increasing ? SR.HighLimit.hasValue() : SR.LowLimit.hasValue();
+
+  Value *ExitPreLoopAt = nullptr;
+  Value *ExitMainLoopAt = nullptr;
+  const SCEVConstant *MinusOneS =
+      cast<SCEVConstant>(SE.getConstant(IVTy, -1, true /* isSigned */));
+
+  if (NeedsPreLoop) {
+    const SCEV *ExitPreLoopAtSCEV = nullptr;
+
+    if (Increasing)
+      ExitPreLoopAtSCEV = *SR.LowLimit;
+    else {
+      if (CanBeSMin(SE, *SR.HighLimit)) {
+        DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
+                     << "preloop exit limit.  HighLimit = " << *(*SR.HighLimit)
+                     << "\n");
+        return false;
+      }
+      ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
+    }
+
+    ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt);
+    ExitPreLoopAt->setName("exit.preloop.at");
+  }
+
+  if (NeedsPostLoop) {
+    const SCEV *ExitMainLoopAtSCEV = nullptr;
+
+    if (Increasing)
+      ExitMainLoopAtSCEV = *SR.HighLimit;
+    else {
+      if (CanBeSMin(SE, *SR.LowLimit)) {
+        DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
+                     << "mainloop exit limit.  LowLimit = " << *(*SR.LowLimit)
+                     << "\n");
+        return false;
+      }
+      ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
+    }
+
+    ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt);
+    ExitMainLoopAt->setName("exit.mainloop.at");
+  }
+
+  // We clone these ahead of time so that we don't have to deal with changing
+  // and temporarily invalid IR as we transform the loops.
+  if (NeedsPreLoop)
+    cloneLoop(PreLoop, "preloop");
+  if (NeedsPostLoop)
+    cloneLoop(PostLoop, "postloop");
+
+  RewrittenRangeInfo PreLoopRRI;
+
+  if (NeedsPreLoop) {
+    Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header,
+                                                  PreLoop.Structure.Header);
+
+    MainLoopPreheader =
+        createPreheader(MainLoopStructure, Preheader, "mainloop");
+    PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader,
+                                         ExitPreLoopAt, MainLoopPreheader);
+    rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader,
+                                 PreLoopRRI);
+  }
+
+  BasicBlock *PostLoopPreheader = nullptr;
+  RewrittenRangeInfo PostLoopRRI;
+
+  if (NeedsPostLoop) {
+    PostLoopPreheader =
+        createPreheader(PostLoop.Structure, Preheader, "postloop");
+    PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader,
+                                          ExitMainLoopAt, PostLoopPreheader);
+    rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader,
+                                 PostLoopRRI);
+  }
+
+  BasicBlock *NewMainLoopPreheader =
+      MainLoopPreheader != Preheader ? MainLoopPreheader : nullptr;
+  BasicBlock *NewBlocks[] = {PostLoopPreheader,        PreLoopRRI.PseudoExit,
+                             PreLoopRRI.ExitSelector,  PostLoopRRI.PseudoExit,
+                             PostLoopRRI.ExitSelector, NewMainLoopPreheader};
+
+  // Some of the above may be nullptr, filter them out before passing to
+  // addToParentLoopIfNeeded.
+  auto NewBlocksEnd =
+      std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr);
+
+  addToParentLoopIfNeeded(makeArrayRef(std::begin(NewBlocks), NewBlocksEnd));
+  addToParentLoopIfNeeded(PreLoop.Blocks);
+  addToParentLoopIfNeeded(PostLoop.Blocks);
+
+  return true;
+}
+
+/// Computes and returns a range of values for the induction variable (IndVar)
+/// in which the range check can be safely elided.  If it cannot compute such a
+/// range, returns None.
+Optional<InductiveRangeCheck::Range>
+InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
+                                               const SCEVAddRecExpr *IndVar,
+                                               IRBuilder<> &) const {
+  // IndVar is of the form "A + B * I" (where "I" is the canonical induction
+  // variable, that may or may not exist as a real llvm::Value in the loop) and
+  // this inductive range check is a range check on the "C + D * I" ("C" is
+  // getOffset() and "D" is getScale()).  We rewrite the value being range
+  // checked to "M + N * IndVar" where "N" = "D * B^(-1)" and "M" = "C - NA".
+  // Currently we support this only for "B" = "D" = { 1 or -1 }, but the code
+  // can be generalized as needed.
+  //
+  // The actual inequalities we solve are of the form
+  //
+  //   0 <= M + 1 * IndVar < L given L >= 0  (i.e. N == 1)
+  //
+  // The inequality is satisfied by -M <= IndVar < (L - M) [^1].  All additions
+  // and subtractions are twos-complement wrapping and comparisons are signed.
+  //
+  // Proof:
+  //
+  //   If there exists IndVar such that -M <= IndVar < (L - M) then it follows
+  //   that -M <= (-M + L) [== Eq. 1].  Since L >= 0, if (-M + L) sign-overflows
+  //   then (-M + L) < (-M).  Hence by [Eq. 1], (-M + L) could not have
+  //   overflown.
+  //
+  //   This means IndVar = t + (-M) for t in [0, L).  Hence (IndVar + M) = t.
+  //   Hence 0 <= (IndVar + M) < L
+
+  // [^1]: Note that the solution does _not_ apply if L < 0; consider values M =
+  // 127, IndVar = 126 and L = -2 in an i8 world.
+
+  if (!IndVar->isAffine())
+    return None;
+
+  const SCEV *A = IndVar->getStart();
+  const SCEVConstant *B = dyn_cast<SCEVConstant>(IndVar->getStepRecurrence(SE));
+  if (!B)
+    return None;
+
+  const SCEV *C = getOffset();
+  const SCEVConstant *D = dyn_cast<SCEVConstant>(getScale());
+  if (D != B)
+    return None;
+
+  ConstantInt *ConstD = D->getValue();
+  if (!(ConstD->isMinusOne() || ConstD->isOne()))
+    return None;
+
+  const SCEV *M = SE.getMinusSCEV(C, A);
+
+  const SCEV *Begin = SE.getNegativeSCEV(M);
+  const SCEV *UpperLimit = nullptr;
+
+  // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L".
+  // We can potentially do much better here.
+  if (Value *V = getLength()) {
+    UpperLimit = SE.getSCEV(V);
+  } else {
+    assert(Kind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!");
+    unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth();
+    UpperLimit = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
+  }
+
+  const SCEV *End = SE.getMinusSCEV(UpperLimit, M);
+  return InductiveRangeCheck::Range(Begin, End);
+}
+
+static Optional<InductiveRangeCheck::Range>
+IntersectRange(ScalarEvolution &SE,
+               const Optional<InductiveRangeCheck::Range> &R1,
+               const InductiveRangeCheck::Range &R2, IRBuilder<> &B) {
+  if (!R1.hasValue())
+    return R2;
+  auto &R1Value = R1.getValue();
+
+  // TODO: we could widen the smaller range and have this work; but for now we
+  // bail out to keep things simple.
+  if (R1Value.getType() != R2.getType())
+    return None;
+
+  const SCEV *NewBegin = SE.getSMaxExpr(R1Value.getBegin(), R2.getBegin());
+  const SCEV *NewEnd = SE.getSMinExpr(R1Value.getEnd(), R2.getEnd());
+
+  return InductiveRangeCheck::Range(NewBegin, NewEnd);
+}
+
+bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
+  if (L->getBlocks().size() >= LoopSizeCutoff) {
+    DEBUG(dbgs() << "irce: giving up constraining loop, too large\n";);
+    return false;
+  }
+
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    DEBUG(dbgs() << "irce: loop has no preheader, leaving\n");
+    return false;
+  }
+
+  LLVMContext &Context = Preheader->getContext();
+  InductiveRangeCheck::AllocatorTy IRCAlloc;
+  SmallVector<InductiveRangeCheck *, 16> RangeChecks;
+  ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
+  BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+
+  for (auto BBI : L->getBlocks())
+    if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
+      if (InductiveRangeCheck *IRC =
+          InductiveRangeCheck::create(IRCAlloc, TBI, L, SE, BPI))
+        RangeChecks.push_back(IRC);
+
+  if (RangeChecks.empty())
+    return false;
+
+  auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) {
+    OS << "irce: looking at loop "; L->print(OS);
+    OS << "irce: loop has " << RangeChecks.size()
+       << " inductive range checks: \n";
+    for (InductiveRangeCheck *IRC : RangeChecks)
+      IRC->print(OS);
+  };
+
+  DEBUG(PrintRecognizedRangeChecks(dbgs()));
+
+  if (PrintRangeChecks)
+    PrintRecognizedRangeChecks(errs());
+
+  const char *FailureReason = nullptr;
+  Optional<LoopStructure> MaybeLoopStructure =
+      LoopStructure::parseLoopStructure(SE, BPI, *L, FailureReason);
+  if (!MaybeLoopStructure.hasValue()) {
+    DEBUG(dbgs() << "irce: could not parse loop structure: " << FailureReason
+                 << "\n";);
+    return false;
+  }
+  LoopStructure LS = MaybeLoopStructure.getValue();
+  bool Increasing = LS.IndVarIncreasing;
+  const SCEV *MinusOne =
+      SE.getConstant(LS.IndVarNext->getType(), Increasing ? -1 : 1, true);
+  const SCEVAddRecExpr *IndVar =
+      cast<SCEVAddRecExpr>(SE.getAddExpr(SE.getSCEV(LS.IndVarNext), MinusOne));
+
+  Optional<InductiveRangeCheck::Range> SafeIterRange;
+  Instruction *ExprInsertPt = Preheader->getTerminator();
+
+  SmallVector<InductiveRangeCheck *, 4> RangeChecksToEliminate;
+
+  IRBuilder<> B(ExprInsertPt);
+  for (InductiveRangeCheck *IRC : RangeChecks) {
+    auto Result = IRC->computeSafeIterationSpace(SE, IndVar, B);
+    if (Result.hasValue()) {
+      auto MaybeSafeIterRange =
+        IntersectRange(SE, SafeIterRange, Result.getValue(), B);
+      if (MaybeSafeIterRange.hasValue()) {
+        RangeChecksToEliminate.push_back(IRC);
+        SafeIterRange = MaybeSafeIterRange.getValue();
+      }
+    }
+  }
+
+  if (!SafeIterRange.hasValue())
+    return false;
+
+  LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), LS,
+                     SE, SafeIterRange.getValue());
+  bool Changed = LC.run();
+
+  if (Changed) {
+    auto PrintConstrainedLoopInfo = [L]() {
+      dbgs() << "irce: in function ";
+      dbgs() << L->getHeader()->getParent()->getName() << ": ";
+      dbgs() << "constrained ";
+      L->print(dbgs());
+    };
+
+    DEBUG(PrintConstrainedLoopInfo());
+
+    if (PrintChangedLoops)
+      PrintConstrainedLoopInfo();
+
+    // Optimize away the now-redundant range checks.
+
+    for (InductiveRangeCheck *IRC : RangeChecksToEliminate) {
+      ConstantInt *FoldedRangeCheck = IRC->getPassingDirection()
+                                          ? ConstantInt::getTrue(Context)
+                                          : ConstantInt::getFalse(Context);
+      IRC->getBranch()->setCondition(FoldedRangeCheck);
+    }
+  }
+
+  return Changed;
+}
+
+Pass *llvm::createInductiveRangeCheckEliminationPass() {
+  return new InductiveRangeCheckElimination;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 78beb3f..711df41 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
@@ -32,7 +33,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -78,7 +78,6 @@ namespace {
   /// revectored to the false side of the second if.
   ///
   class JumpThreading : public FunctionPass {
-    const DataLayout *DL;
     TargetLibraryInfo *TLI;
     LazyValueInfo *LVI;
 #ifdef NDEBUG
@@ -115,7 +114,7 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<LazyValueInfo>();
       AU.addPreserved<LazyValueInfo>();
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
 
     void FindLoopHeaders(Function &F);
@@ -145,7 +144,7 @@ char JumpThreading::ID = 0;
 INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
                 "Jump Threading", false, false)
 INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(JumpThreading, "jump-threading",
                 "Jump Threading", false, false)
 
@@ -159,9 +158,7 @@ bool JumpThreading::runOnFunction(Function &F) {
     return false;
 
   DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-  TLI = &getAnalysis<TargetLibraryInfo>();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   LVI = &getAnalysis<LazyValueInfo>();
 
   // Remove unreachable blocks from function as they may result in infinite
@@ -505,6 +502,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
     assert(Preference == WantInteger && "Compares only produce integers");
     PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
     if (PN && PN->getParent() == BB) {
+      const DataLayout &DL = PN->getModule()->getDataLayout();
       // We can do this simplification if any comparisons fold to true or false.
       // See if any do.
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -709,7 +707,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   // Run constant folding to see if we can reduce the condition to a simple
   // constant.
   if (Instruction *I = dyn_cast<Instruction>(Condition)) {
-    Value *SimpleVal = ConstantFoldInstruction(I, DL, TLI);
+    Value *SimpleVal =
+        ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
     if (SimpleVal) {
       I->replaceAllUsesWith(SimpleVal);
       I->eraseFromParent();
@@ -792,6 +791,17 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
           CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
           BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
           CondBr->eraseFromParent();
+          if (CondCmp->use_empty())
+            CondCmp->eraseFromParent();
+          else if (CondCmp->getParent() == BB) {
+            // If the fact we just learned is true for all uses of the
+            // condition, replace it with a constant value
+            auto *CI = Baseline == LazyValueInfo::True ?
+              ConstantInt::getTrue(CondCmp->getType()) :
+              ConstantInt::getFalse(CondCmp->getType());
+            CondCmp->replaceAllUsesWith(CI);
+            CondCmp->eraseFromParent();
+          }
           return true;
         }
       }
@@ -993,7 +1003,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
 
     // Split them out to their own block.
     UnavailablePred =
-      SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this);
+      SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split");
   }
 
   // If the value isn't available in all predecessors, then there will be
@@ -1418,7 +1428,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
   else {
     DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
           << " common predecessors.\n");
-    PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
+    PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
   }
 
   // And finally, do it!
@@ -1521,7 +1531,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
   // At this point, the IR is fully up to date and consistent.  Do a quick scan
   // over the new instructions and zap any that are constants or dead.  This
   // frequently happens because of phi translation.
-  SimplifyInstructionsInBlock(NewBB, DL, TLI);
+  SimplifyInstructionsInBlock(NewBB, TLI);
 
   // Threaded an edge!
   ++NumThreads;
@@ -1561,7 +1571,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
   else {
     DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
           << " common predecessors.\n");
-    PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
+    PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
   }
 
   // Okay, we decided to do this!  Clone all the instructions in BB onto the end
@@ -1575,7 +1585,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
   BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
 
   if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
-    PredBB = SplitEdge(PredBB, BB, this);
+    PredBB = SplitEdge(PredBB, BB);
     OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
   }
 
@@ -1586,7 +1596,6 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
   BasicBlock::iterator BI = BB->begin();
   for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
     ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
-
   // Clone the non-phi instructions of BB into PredBB, keeping track of the
   // mapping and using it to remap operands in the cloned instructions.
   for (; BI != BB->end(); ++BI) {
@@ -1603,7 +1612,8 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
     // If this instruction can be simplified after the operands are updated,
     // just use the simplified value instead.  This frequently happens due to
     // phi translation.
-    if (Value *IV = SimplifyInstruction(New, DL)) {
+    if (Value *IV =
+            SimplifyInstruction(New, BB->getModule()->getDataLayout())) {
       delete New;
       ValueMapping[BI] = IV;
     } else {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index e145981..f0e6d64 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
@@ -52,7 +53,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -71,6 +71,33 @@ static cl::opt<bool>
 DisablePromotion("disable-licm-promotion", cl::Hidden,
                  cl::desc("Disable memory promotion in LICM pass"));
 
+static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
+static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop);
+static bool hoist(Instruction &I, BasicBlock *Preheader);
+static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
+                 const Loop *CurLoop, AliasSetTracker *CurAST );
+static bool isGuaranteedToExecute(const Instruction &Inst,
+                                  const DominatorTree *DT,
+                                  const Loop *CurLoop,
+                                  const LICMSafetyInfo *SafetyInfo);
+static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
+                                           const DominatorTree *DT,
+                                           const TargetLibraryInfo *TLI,
+                                           const Loop *CurLoop,
+                                           const LICMSafetyInfo *SafetyInfo,
+                                           const Instruction *CtxI = nullptr);
+static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
+                                     const AAMDNodes &AAInfo, 
+                                     AliasSetTracker *CurAST);
+static Instruction *CloneInstructionInExitBlock(const Instruction &I,
+                                                BasicBlock &ExitBlock,
+                                                PHINode &PN,
+                                                const LoopInfo *LI);
+static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA,
+                               DominatorTree *DT, TargetLibraryInfo *TLI,
+                               Loop *CurLoop, AliasSetTracker *CurAST,
+                               LICMSafetyInfo *SafetyInfo);
+
 namespace {
   struct LICM : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
@@ -86,7 +113,7 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       AU.addRequired<DominatorTreeWrapperPass>();
-      AU.addRequired<LoopInfo>();
+      AU.addRequired<LoopInfoWrapperPass>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
@@ -94,7 +121,7 @@ namespace {
       AU.addRequired<AliasAnalysis>();
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<ScalarEvolution>();
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
 
     using llvm::Pass::doFinalization;
@@ -109,7 +136,6 @@ namespace {
     LoopInfo      *LI;       // Current LoopInfo
     DominatorTree *DT;       // Dominator Tree for the current Loop.
 
-    const DataLayout *DL;    // DataLayout for constant folding.
     TargetLibraryInfo *TLI;  // TargetLibraryInfo for constant folding.
 
     // State that is updated as we process loops.
@@ -117,10 +143,6 @@ namespace {
     BasicBlock *Preheader;   // The preheader block of the current loop...
     Loop *CurLoop;           // The current loop we are working on...
     AliasSetTracker *CurAST; // AliasSet information for the current loop...
-    bool MayThrow;           // The current loop contains an instruction which
-                             // may throw, thus preventing code motion of
-                             // instructions with side effects.
-    bool HeaderMayThrow;     // Same as previous, but specific to loop header
     DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
 
     /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
@@ -133,88 +155,17 @@ namespace {
 
     /// Simple Analysis hook. Delete loop L from alias set map.
     void deleteAnalysisLoop(Loop *L) override;
-
-    /// SinkRegion - Walk the specified region of the CFG (defined by all blocks
-    /// dominated by the specified block, and that are in the current loop) in
-    /// reverse depth first order w.r.t the DominatorTree.  This allows us to
-    /// visit uses before definitions, allowing us to sink a loop body in one
-    /// pass without iteration.
-    ///
-    void SinkRegion(DomTreeNode *N);
-
-    /// HoistRegion - Walk the specified region of the CFG (defined by all
-    /// blocks dominated by the specified block, and that are in the current
-    /// loop) in depth first order w.r.t the DominatorTree.  This allows us to
-    /// visit definitions before uses, allowing us to hoist a loop body in one
-    /// pass without iteration.
-    ///
-    void HoistRegion(DomTreeNode *N);
-
-    /// inSubLoop - Little predicate that returns true if the specified basic
-    /// block is in a subloop of the current one, not the current one itself.
-    ///
-    bool inSubLoop(BasicBlock *BB) {
-      assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
-      return LI->getLoopFor(BB) != CurLoop;
-    }
-
-    /// sink - When an instruction is found to only be used outside of the loop,
-    /// this function moves it to the exit blocks and patches up SSA form as
-    /// needed.
-    ///
-    void sink(Instruction &I);
-
-    /// hoist - When an instruction is found to only use loop invariant operands
-    /// that is safe to hoist, this instruction is called to do the dirty work.
-    ///
-    void hoist(Instruction &I);
-
-    /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it
-    /// is not a trapping instruction or if it is a trapping instruction and is
-    /// guaranteed to execute.
-    ///
-    bool isSafeToExecuteUnconditionally(Instruction &I);
-
-    /// isGuaranteedToExecute - Check that the instruction is guaranteed to
-    /// execute.
-    ///
-    bool isGuaranteedToExecute(Instruction &I);
-
-    /// pointerInvalidatedByLoop - Return true if the body of this loop may
-    /// store into the memory location pointed to by V.
-    ///
-    bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
-                                  const AAMDNodes &AAInfo) {
-      // Check to see if any of the basic blocks in CurLoop invalidate *V.
-      return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
-    }
-
-    bool canSinkOrHoistInst(Instruction &I);
-    bool isNotUsedInLoop(Instruction &I);
-
-    void PromoteAliasSet(AliasSet &AS,
-                         SmallVectorImpl<BasicBlock*> &ExitBlocks,
-                         SmallVectorImpl<Instruction*> &InsertPts,
-                         PredIteratorCache &PIC);
-
-    /// \brief Create a copy of the instruction in the exit block and patch up
-    /// SSA.
-    /// PN is a user of I in ExitBlock that can be used to get the number and
-    /// list of predecessors fast.
-    Instruction *CloneInstructionInExitBlock(Instruction &I,
-                                             BasicBlock &ExitBlock,
-                                             PHINode &PN);
   };
 }
 
 char LICM::ID = 0;
 INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
 
@@ -231,13 +182,11 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
   Changed = false;
 
   // Get our Loop and Alias Analysis information...
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-  TLI = &getAnalysis<TargetLibraryInfo>();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
 
@@ -274,19 +223,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
       CurAST->add(*BB);                 // Incorporate the specified basic block
   }
 
-  HeaderMayThrow = false;
-  BasicBlock *Header = L->getHeader();
-  for (BasicBlock::iterator I = Header->begin(), E = Header->end();
-       (I != E) && !HeaderMayThrow; ++I)
-    HeaderMayThrow |= I->mayThrow();
-  MayThrow = HeaderMayThrow;
-  // TODO: We've already searched for instructions which may throw in subloops.
-  // We may want to reuse this information.
-  for (Loop::block_iterator BB = L->block_begin(), BBE = L->block_end();
-       (BB != BBE) && !MayThrow ; ++BB)
-    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
-         (I != E) && !MayThrow; ++I)
-      MayThrow |= I->mayThrow();
+  // Compute loop safety information.
+  LICMSafetyInfo SafetyInfo;
+  computeLICMSafetyInfo(&SafetyInfo, CurLoop);
 
   // We want to visit all of the instructions in this loop... that are not parts
   // of our subloops (they have already had their invariants hoisted out of
@@ -299,9 +238,11 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
   // instructions, we perform another pass to hoist them out of the loop.
   //
   if (L->hasDedicatedExits())
-    SinkRegion(DT->getNode(L->getHeader()));
+    Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, CurLoop,
+                          CurAST, &SafetyInfo);
   if (Preheader)
-    HoistRegion(DT->getNode(L->getHeader()));
+    Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI,
+                           CurLoop, CurAST, &SafetyInfo);
 
   // Now that all loop invariants have been removed from the loop, promote any
   // memory references to scalars that we can.
@@ -313,7 +254,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
     // Loop over all of the alias sets in the tracker object.
     for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
          I != E; ++I)
-      PromoteAliasSet(*I, ExitBlocks, InsertPts, PIC);
+      Changed |= promoteLoopAccessesToScalars(*I, ExitBlocks, InsertPts, 
+                                              PIC, LI, DT, CurLoop, 
+                                              CurAST, &SafetyInfo);
 
     // Once we have promoted values across the loop body we have to recursively
     // reform LCSSA as any nested loop may now have values defined within the
@@ -346,27 +289,35 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
   return Changed;
 }
 
-/// SinkRegion - Walk the specified region of the CFG (defined by all blocks
-/// dominated by the specified block, and that are in the current loop) in
-/// reverse depth first order w.r.t the DominatorTree.  This allows us to visit
-/// uses before definitions, allowing us to sink a loop body in one pass without
-/// iteration.
+/// Walk the specified region of the CFG (defined by all blocks dominated by
+/// the specified block, and that are in the current loop) in reverse depth 
+/// first order w.r.t the DominatorTree.  This allows us to visit uses before
+/// definitions, allowing us to sink a loop body in one pass without iteration.
 ///
-void LICM::SinkRegion(DomTreeNode *N) {
-  assert(N != nullptr && "Null dominator tree node?");
+bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
+                      DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
+                      AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
+
+  // Verify inputs.
+  assert(N != nullptr && AA != nullptr && LI != nullptr && 
+         DT != nullptr && CurLoop != nullptr && CurAST != nullptr && 
+         SafetyInfo != nullptr && "Unexpected input to sinkRegion");
+
+  // Set changed as false.
+  bool Changed = false;
+  // Get basic block
   BasicBlock *BB = N->getBlock();
-
   // If this subregion is not in the top level loop at all, exit.
-  if (!CurLoop->contains(BB)) return;
+  if (!CurLoop->contains(BB)) return Changed;
 
   // We are processing blocks in reverse dfo, so process children first.
   const std::vector<DomTreeNode*> &Children = N->getChildren();
   for (unsigned i = 0, e = Children.size(); i != e; ++i)
-    SinkRegion(Children[i]);
-
+    Changed |=
+        sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
   // Only need to process the contents of this block if it is not part of a
   // subloop (which would already have been processed).
-  if (inSubLoop(BB)) return;
+  if (inSubLoop(BB,CurLoop,LI)) return Changed;
 
   for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
     Instruction &I = *--II;
@@ -387,35 +338,43 @@ void LICM::SinkRegion(DomTreeNode *N) {
     // outside of the loop.  In this case, it doesn't even matter if the
     // operands of the instruction are loop invariant.
     //
-    if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) {
+    if (isNotUsedInLoop(I, CurLoop) &&
+        canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo)) {
       ++II;
-      sink(I);
+      Changed |= sink(I, LI, DT, CurLoop, CurAST);
     }
   }
+  return Changed;
 }
 
-/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
-/// dominated by the specified block, and that are in the current loop) in depth
-/// first order w.r.t the DominatorTree.  This allows us to visit definitions
-/// before uses, allowing us to hoist a loop body in one pass without iteration.
+/// Walk the specified region of the CFG (defined by all blocks dominated by
+/// the specified block, and that are in the current loop) in depth first
+/// order w.r.t the DominatorTree.  This allows us to visit definitions before
+/// uses, allowing us to hoist a loop body in one pass without iteration.
 ///
-void LICM::HoistRegion(DomTreeNode *N) {
-  assert(N != nullptr && "Null dominator tree node?");
+bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
+                       DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
+                       AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
+  // Verify inputs.
+  assert(N != nullptr && AA != nullptr && LI != nullptr && 
+         DT != nullptr && CurLoop != nullptr && CurAST != nullptr && 
+         SafetyInfo != nullptr && "Unexpected input to hoistRegion");
+  // Set changed as false.
+  bool Changed = false;
+  // Get basic block
   BasicBlock *BB = N->getBlock();
-
   // If this subregion is not in the top level loop at all, exit.
-  if (!CurLoop->contains(BB)) return;
-
+  if (!CurLoop->contains(BB)) return Changed;
   // Only need to process the contents of this block if it is not part of a
   // subloop (which would already have been processed).
-  if (!inSubLoop(BB))
+  if (!inSubLoop(BB, CurLoop, LI))
     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
       Instruction &I = *II++;
-
       // Try constant folding this instruction.  If all the operands are
       // constants, it is technically hoistable, but it would be better to just
       // fold it.
-      if (Constant *C = ConstantFoldInstruction(&I, DL, TLI)) {
+      if (Constant *C = ConstantFoldInstruction(
+              &I, I.getModule()->getDataLayout(), TLI)) {
         DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C << '\n');
         CurAST->copyValue(&I, C);
         CurAST->deleteValue(&I);
@@ -428,20 +387,49 @@ void LICM::HoistRegion(DomTreeNode *N) {
       // if all of the operands of the instruction are loop invariant and if it
       // is safe to hoist the instruction.
       //
-      if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) &&
-          isSafeToExecuteUnconditionally(I))
-        hoist(I);
+      if (CurLoop->hasLoopInvariantOperands(&I) &&
+          canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo) &&
+          isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo,
+                                 CurLoop->getLoopPreheader()->getTerminator()))
+        Changed |= hoist(I, CurLoop->getLoopPreheader());
     }
 
   const std::vector<DomTreeNode*> &Children = N->getChildren();
   for (unsigned i = 0, e = Children.size(); i != e; ++i)
-    HoistRegion(Children[i]);
+    Changed |=
+        hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+  return Changed;
+}
+
+/// Computes loop safety information, checks loop body & header
+/// for the possiblity of may throw exception.
+///
+void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
+  assert(CurLoop != nullptr && "CurLoop cant be null");
+  BasicBlock *Header = CurLoop->getHeader();
+  // Setting default safety values.
+  SafetyInfo->MayThrow = false;
+  SafetyInfo->HeaderMayThrow = false;
+  // Iterate over header and compute dafety info.
+  for (BasicBlock::iterator I = Header->begin(), E = Header->end();
+       (I != E) && !SafetyInfo->HeaderMayThrow; ++I)
+    SafetyInfo->HeaderMayThrow |= I->mayThrow();
+  
+  SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
+  // Iterate over loop instructions and compute safety info. 
+  for (Loop::block_iterator BB = CurLoop->block_begin(), 
+       BBE = CurLoop->block_end(); (BB != BBE) && !SafetyInfo->MayThrow ; ++BB)
+    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
+         (I != E) && !SafetyInfo->MayThrow; ++I)
+      SafetyInfo->MayThrow |= I->mayThrow();
 }
 
 /// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
 /// instruction.
 ///
-bool LICM::canSinkOrHoistInst(Instruction &I) {
+bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
+                        TargetLibraryInfo *TLI, Loop *CurLoop,
+                        AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
   // Loads have extra constraints we have to verify before we can hoist them.
   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
     if (!LI->isUnordered())
@@ -462,7 +450,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
     AAMDNodes AAInfo;
     LI->getAAMetadata(AAInfo);
 
-    return !pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo);
+    return !pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST);
   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
     // Don't sink or hoist dbg info; it's legal, but not useful.
     if (isa<DbgInfoIntrinsic>(I))
@@ -501,30 +489,34 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
       !isa<InsertValueInst>(I))
     return false;
 
-  return isSafeToExecuteUnconditionally(I);
+  // TODO: Plumb the context instruction through to make hoisting and sinking
+  // more powerful. Hoisting of loads already works due to the special casing
+  // above. 
+  return isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo,
+                                        nullptr);
 }
 
-/// \brief Returns true if a PHINode is a trivially replaceable with an
+/// Returns true if a PHINode is a trivially replaceable with an
 /// Instruction.
+/// This is true when all incoming values are that instruction.
+/// This pattern occurs most often with LCSSA PHI nodes.
 ///
-/// This is true when all incoming values are that instruction. This pattern
-/// occurs most often with LCSSA PHI nodes.
-static bool isTriviallyReplacablePHI(PHINode &PN, Instruction &I) {
-  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
-    if (PN.getIncomingValue(i) != &I)
+static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) {
+  for (const Value *IncValue : PN.incoming_values())
+    if (IncValue != &I)
       return false;
 
   return true;
 }
 
-/// isNotUsedInLoop - Return true if the only users of this instruction are
-/// outside of the loop.  If this is true, we can sink the instruction to the
-/// exit blocks of the loop.
+/// Return true if the only users of this instruction are outside of
+/// the loop. If this is true, we can sink the instruction to the exit
+/// blocks of the loop.
 ///
-bool LICM::isNotUsedInLoop(Instruction &I) {
-  for (User *U : I.users()) {
-    Instruction *UI = cast<Instruction>(U);
-    if (PHINode *PN = dyn_cast<PHINode>(UI)) {
+static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop) {
+  for (const User *U : I.users()) {
+    const Instruction *UI = cast<Instruction>(U);
+    if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
       // A PHI node where all of the incoming values are this instruction are
       // special -- they can just be RAUW'ed with the instruction and thus
       // don't require a use in the predecessor. This is a particular important
@@ -552,9 +544,10 @@ bool LICM::isNotUsedInLoop(Instruction &I) {
   return true;
 }
 
-Instruction *LICM::CloneInstructionInExitBlock(Instruction &I,
-                                               BasicBlock &ExitBlock,
-                                               PHINode &PN) {
+static Instruction *CloneInstructionInExitBlock(const Instruction &I,
+                                                BasicBlock &ExitBlock,
+                                                PHINode &PN,
+                                                const LoopInfo *LI) {
   Instruction *New = I.clone();
   ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
   if (!I.getName().empty()) New->setName(I.getName() + ".le");
@@ -581,14 +574,15 @@ Instruction *LICM::CloneInstructionInExitBlock(Instruction &I,
   return New;
 }
 
-/// sink - When an instruction is found to only be used outside of the loop,
-/// this function moves it to the exit blocks and patches up SSA form as needed.
+/// When an instruction is found to only be used outside of the loop, this
+/// function moves it to the exit blocks and patches up SSA form as needed.
 /// This method is guaranteed to remove the original instruction from its
 /// position, and may either delete it or move it to outside of the loop.
 ///
-void LICM::sink(Instruction &I) {
+static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
+                 const Loop *CurLoop, AliasSetTracker *CurAST ) {
   DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
-
+  bool Changed = false;
   if (isa<LoadInst>(I)) ++NumMovedLoads;
   else if (isa<CallInst>(I)) ++NumMovedCalls;
   ++NumSunk;
@@ -597,7 +591,8 @@ void LICM::sink(Instruction &I) {
 #ifndef NDEBUG
   SmallVector<BasicBlock *, 32> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
-  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
+  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), 
+                                             ExitBlocks.end());
 #endif
 
   // Clones of this instruction. Don't create more than one per exit block!
@@ -625,7 +620,7 @@ void LICM::sink(Instruction &I) {
       New = It->second;
     else
       New = SunkCopies[ExitBlock] =
-          CloneInstructionInExitBlock(I, *ExitBlock, *PN);
+            CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI);
 
     PN->replaceAllUsesWith(New);
     PN->eraseFromParent();
@@ -633,37 +628,43 @@ void LICM::sink(Instruction &I) {
 
   CurAST->deleteValue(&I);
   I.eraseFromParent();
+  return Changed;
 }
 
-/// hoist - When an instruction is found to only use loop invariant operands
-/// that is safe to hoist, this instruction is called to do the dirty work.
+/// When an instruction is found to only use loop invariant operands that
+/// is safe to hoist, this instruction is called to do the dirty work.
 ///
-void LICM::hoist(Instruction &I) {
+static bool hoist(Instruction &I, BasicBlock *Preheader) {
   DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
         << I << "\n");
-
   // Move the new node to the Preheader, before its terminator.
   I.moveBefore(Preheader->getTerminator());
 
   if (isa<LoadInst>(I)) ++NumMovedLoads;
   else if (isa<CallInst>(I)) ++NumMovedCalls;
   ++NumHoisted;
-  Changed = true;
+  return true;
 }
 
-/// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is
-/// not a trapping instruction or if it is a trapping instruction and is
-/// guaranteed to execute.
-///
-bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
-  // If it is not a trapping instruction, it is always safe to hoist.
-  if (isSafeToSpeculativelyExecute(&Inst, DL))
+/// Only sink or hoist an instruction if it is not a trapping instruction,
+/// or if the instruction is known not to trap when moved to the preheader.
+/// or if it is a trapping instruction and is guaranteed to execute.
+static bool isSafeToExecuteUnconditionally(const Instruction &Inst, 
+                                           const DominatorTree *DT,
+                                           const TargetLibraryInfo *TLI,
+                                           const Loop *CurLoop,
+                                           const LICMSafetyInfo *SafetyInfo,
+                                           const Instruction *CtxI) {
+  if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
     return true;
 
-  return isGuaranteedToExecute(Inst);
+  return isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo);
 }
 
-bool LICM::isGuaranteedToExecute(Instruction &Inst) {
+static bool isGuaranteedToExecute(const Instruction &Inst,
+                                  const DominatorTree *DT,
+                                  const Loop *CurLoop,
+                                  const LICMSafetyInfo * SafetyInfo) {
 
   // We have to check to make sure that the instruction dominates all
   // of the exit blocks.  If it doesn't, then there is a path out of the loop
@@ -675,11 +676,11 @@ bool LICM::isGuaranteedToExecute(Instruction &Inst) {
   if (Inst.getParent() == CurLoop->getHeader())
     // If there's a throw in the header block, we can't guarantee we'll reach
     // Inst.
-    return !HeaderMayThrow;
+    return !SafetyInfo->HeaderMayThrow;
 
   // Somewhere in this loop there is an instruction which may throw and make us
   // exit the loop.
-  if (MayThrow)
+  if (SafetyInfo->MayThrow)
     return false;
 
   // Get the exit blocks for the current loop.
@@ -719,17 +720,18 @@ namespace {
             // We need to create an LCSSA PHI node for the incoming value and
             // store that.
             PHINode *PN = PHINode::Create(
-                I->getType(), PredCache.GetNumPreds(BB),
+                I->getType(), PredCache.size(BB),
                 I->getName() + ".lcssa", BB->begin());
-            for (BasicBlock **PI = PredCache.GetPreds(BB); *PI; ++PI)
-              PN->addIncoming(I, *PI);
+            for (BasicBlock *Pred : PredCache.get(BB))
+              PN->addIncoming(I, Pred);
             return PN;
           }
       return V;
     }
 
   public:
-    LoopPromoter(Value *SP, const SmallVectorImpl<Instruction *> &Insts,
+    LoopPromoter(Value *SP,
+                 ArrayRef<const Instruction *> Insts,
                  SSAUpdater &S, SmallPtrSetImpl<Value *> &PMA,
                  SmallVectorImpl<BasicBlock *> &LEB,
                  SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
@@ -777,25 +779,37 @@ namespace {
   };
 } // end anon namespace
 
-/// PromoteAliasSet - Try to promote memory values to scalars by sinking
-/// stores out of the loop and moving loads to before the loop.  We do this by
-/// looping over the stores in the loop, looking for stores to Must pointers
-/// which are loop invariant.
+/// Try to promote memory values to scalars by sinking stores out of the
+/// loop and moving loads to before the loop.  We do this by looping over
+/// the stores in the loop, looking for stores to Must pointers which are
+/// loop invariant.
 ///
-void LICM::PromoteAliasSet(AliasSet &AS,
-                           SmallVectorImpl<BasicBlock*> &ExitBlocks,
-                           SmallVectorImpl<Instruction*> &InsertPts,
-                           PredIteratorCache &PIC) {
+bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
+                                        SmallVectorImpl<BasicBlock*>&ExitBlocks,
+                                        SmallVectorImpl<Instruction*>&InsertPts,
+                                        PredIteratorCache &PIC, LoopInfo *LI, 
+                                        DominatorTree *DT, Loop *CurLoop, 
+                                        AliasSetTracker *CurAST, 
+                                        LICMSafetyInfo * SafetyInfo) { 
+  // Verify inputs.
+  assert(LI != nullptr && DT != nullptr && 
+         CurLoop != nullptr && CurAST != nullptr && 
+         SafetyInfo != nullptr && 
+         "Unexpected Input to promoteLoopAccessesToScalars");
+  // Initially set Changed status to false.
+  bool Changed = false;
   // We can promote this alias set if it has a store, if it is a "Must" alias
   // set, if the pointer is loop invariant, and if we are not eliminating any
   // volatile loads or stores.
   if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
       AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
-    return;
+    return Changed;
 
   assert(!AS.empty() &&
          "Must alias set should have at least one pointer element in it!");
+
   Value *SomePtr = AS.begin()->getValue();
+  BasicBlock * Preheader = CurLoop->getLoopPreheader();
 
   // It isn't safe to promote a load/store from the loop if the load/store is
   // conditional.  For example, turning:
@@ -832,7 +846,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
     // cannot (yet) promote a memory location that is loaded and stored in
     // different sizes.
     if (SomePtr->getType() != ASIV->getType())
-      return;
+      return Changed;
 
     for (User *U : ASIV->users()) {
       // Ignore instructions that are outside the loop.
@@ -842,25 +856,25 @@ void LICM::PromoteAliasSet(AliasSet &AS,
 
       // If there is an non-load/store instruction in the loop, we can't promote
       // it.
-      if (LoadInst *load = dyn_cast<LoadInst>(UI)) {
+      if (const LoadInst *load = dyn_cast<LoadInst>(UI)) {
         assert(!load->isVolatile() && "AST broken");
         if (!load->isSimple())
-          return;
-      } else if (StoreInst *store = dyn_cast<StoreInst>(UI)) {
+          return Changed;
+      } else if (const StoreInst *store = dyn_cast<StoreInst>(UI)) {
         // Stores *of* the pointer are not interesting, only stores *to* the
         // pointer.
         if (UI->getOperand(1) != ASIV)
           continue;
         assert(!store->isVolatile() && "AST broken");
         if (!store->isSimple())
-          return;
+          return Changed;
         // Don't sink stores from loops without dedicated block exits. Exits
         // containing indirect branches are not transformed by loop simplify,
         // make sure we catch that. An additional load may be generated in the
         // preheader for SSA updater, so also avoid sinking when no preheader
         // is available.
         if (!HasDedicatedExits || !Preheader)
-          return;
+          return Changed;
 
         // Note that we only check GuaranteedToExecute inside the store case
         // so that we do not introduce stores where they did not exist before
@@ -872,16 +886,17 @@ void LICM::PromoteAliasSet(AliasSet &AS,
         // Larger is better, with the exception of 0 being the best alignment.
         unsigned InstAlignment = store->getAlignment();
         if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
-          if (isGuaranteedToExecute(*UI)) {
+          if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
             GuaranteedToExecute = true;
             Alignment = InstAlignment;
           }
 
         if (!GuaranteedToExecute)
-          GuaranteedToExecute = isGuaranteedToExecute(*UI);
+          GuaranteedToExecute = isGuaranteedToExecute(*UI, DT, 
+                                                      CurLoop, SafetyInfo);
 
       } else
-        return; // Not a load or store.
+        return Changed; // Not a load or store.
 
       // Merge the AA tags.
       if (LoopUses.empty()) {
@@ -897,7 +912,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
 
   // If there isn't a guaranteed-to-execute instruction, we can't promote.
   if (!GuaranteedToExecute)
-    return;
+    return Changed;
 
   // Otherwise, this is safe to promote, lets do it!
   DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
@@ -922,7 +937,8 @@ void LICM::PromoteAliasSet(AliasSet &AS,
   // We use the SSAUpdater interface to insert phi nodes as required.
   SmallVector<PHINode*, 16> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
-  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
+  LoopPromoter Promoter(SomePtr, LoopUses, SSA,
+                        PointerMustAliases, ExitBlocks,
                         InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags);
 
   // Set up the preheader to have a definition of the value.  It is the live-out
@@ -942,10 +958,12 @@ void LICM::PromoteAliasSet(AliasSet &AS,
   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
   if (PreheaderLoad->use_empty())
     PreheaderLoad->eraseFromParent();
-}
 
+  return Changed;
+}
 
-/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+/// Simple Analysis hook. Clone alias set info.
+///
 void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
   AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
   if (!AST)
@@ -954,8 +972,8 @@ void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
   AST->copyValue(From, To);
 }
 
-/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
-/// set.
+/// Simple Analysis hook. Delete value V from alias set
+///
 void LICM::deleteAnalysisValue(Value *V, Loop *L) {
   AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
   if (!AST)
@@ -965,6 +983,7 @@ void LICM::deleteAnalysisValue(Value *V, Loop *L) {
 }
 
 /// Simple Analysis hook. Delete value L from alias set map.
+///
 void LICM::deleteAnalysisLoop(Loop *L) {
   AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
   if (!AST)
@@ -973,3 +992,23 @@ void LICM::deleteAnalysisLoop(Loop *L) {
   delete AST;
   LoopToAliasSetMap.erase(L);
 }
+
+
+/// Return true if the body of this loop may store into the memory
+/// location pointed to by V.
+///
+static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
+                                     const AAMDNodes &AAInfo, 
+                                     AliasSetTracker *CurAST) {
+  // Check to see if any of the basic blocks in CurLoop invalidate *V.
+  return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
+}
+
+/// Little predicate that returns true if the specified basic block is in
+/// a subloop of the current one, not the current one itself.
+///
+static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
+  assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
+  return LI->getLoopFor(BB) != CurLoop;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
index 11e4d76..c19cd19 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
@@ -12,17 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
-
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/TargetFolder.h"
-#include "llvm/Pass.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -41,9 +41,9 @@ struct PointerOffsetPair {
 };
 
 struct LoadPOPPair {
+  LoadPOPPair() = default;
   LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O)
       : Load(L), POP(P), InsertOrder(O) {}
-  LoadPOPPair() {}
   LoadInst *Load;
   PointerOffsetPair POP;
   /// \brief The new load needs to be created before the first load in IR order.
@@ -52,13 +52,10 @@ struct LoadPOPPair {
 
 class LoadCombine : public BasicBlockPass {
   LLVMContext *C;
-  const DataLayout *DL;
   AliasAnalysis *AA;
 
 public:
-  LoadCombine()
-      : BasicBlockPass(ID),
-        C(nullptr), DL(nullptr), AA(nullptr) {
+  LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
     initializeSROAPass(*PassRegistry::getPassRegistry());
   }
   
@@ -85,12 +82,6 @@ private:
 bool LoadCombine::doInitialization(Function &F) {
   DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
   C = &F.getContext();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  if (!DLP) {
-    DEBUG(dbgs() << "  Skipping LoadCombine -- no target data!\n");
-    return false;
-  }
-  DL = &DLP->getDataLayout();
   return true;
 }
 
@@ -100,9 +91,10 @@ PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
   POP.Offset = 0;
   while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
     if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
-      unsigned BitWidth = DL->getPointerTypeSizeInBits(GEP->getType());
+      auto &DL = LI.getModule()->getDataLayout();
+      unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
       APInt Offset(BitWidth, 0);
-      if (GEP->accumulateConstantOffset(*DL, Offset))
+      if (GEP->accumulateConstantOffset(DL, Offset))
         POP.Offset += Offset.getZExtValue();
       else
         // Can't handle GEPs with variable indices.
@@ -145,7 +137,8 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
     if (PrevOffset == -1ull) {
       BaseLoad = L.Load;
       PrevOffset = L.POP.Offset;
-      PrevSize = DL->getTypeStoreSize(L.Load->getType());
+      PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
+          L.Load->getType());
       AggregateLoads.push_back(L);
       continue;
     }
@@ -164,7 +157,8 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
       // FIXME: We may want to handle this case.
       continue;
     PrevOffset = L.POP.Offset;
-    PrevSize = DL->getTypeStoreSize(L.Load->getType());
+    PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
+        L.Load->getType());
     AggregateLoads.push_back(L);
   }
   if (combineLoads(AggregateLoads))
@@ -215,7 +209,8 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
   for (const auto &L : Loads) {
     Builder->SetInsertPoint(L.Load);
     Value *V = Builder->CreateExtractInteger(
-        *DL, NewLoad, cast<IntegerType>(L.Load->getType()),
+        L.Load->getModule()->getDataLayout(), NewLoad,
+        cast<IntegerType>(L.Load->getType()),
         L.POP.Offset - Loads[0].POP.Offset, "combine.extract");
     L.Load->replaceAllUsesWith(V);
   }
@@ -225,13 +220,13 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
 }
 
 bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
-  if (skipOptnoneFunction(BB) || !DL)
+  if (skipOptnoneFunction(BB))
     return false;
 
   AA = &getAnalysis<AliasAnalysis>();
 
-  IRBuilder<true, TargetFolder>
-  TheBuilder(BB.getContext(), TargetFolder(DL));
+  IRBuilder<true, TargetFolder> TheBuilder(
+      BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
   Builder = &TheBuilder;
 
   DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 1d1f33a..98b068e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -39,14 +39,14 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<DominatorTreeWrapperPass>();
-      AU.addRequired<LoopInfo>();
+      AU.addRequired<LoopInfoWrapperPass>();
       AU.addRequired<ScalarEvolution>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
 
       AU.addPreserved<ScalarEvolution>();
       AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addPreserved<LoopInfo>();
+      AU.addPreserved<LoopInfoWrapperPass>();
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreservedID(LCSSAID);
     }
@@ -63,7 +63,7 @@ char LoopDeletion::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
                 "Delete dead loops", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -236,7 +236,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Finally, the blocks from loopinfo.  This has to happen late because
   // otherwise our loop iterators won't work.
-  LoopInfo &loopInfo = getAnalysis<LoopInfo>();
+  LoopInfo &loopInfo = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   SmallPtrSet<BasicBlock*, 8> blocks;
   blocks.insert(L->block_begin(), L->block_end());
   for (BasicBlock *BB : blocks)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
new file mode 100644
index 0000000..a907d59
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -0,0 +1,976 @@
+//===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Loop Distribution Pass.  Its main focus is to
+// distribute loops that cannot be vectorized due to dependence cycles.  It
+// tries to isolate the offending dependences into a new loop allowing
+// vectorization of the remaining parts.
+//
+// For dependence analysis, the pass uses the LoopVectorizer's
+// LoopAccessAnalysis.  Because this analysis presumes no change in the order of
+// memory operations, special care is taken to preserve the lexical order of
+// these operations.
+//
+// Similarly to the Vectorizer, the pass also supports loop versioning to
+// run-time disambiguate potentially overlapping arrays.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <list>
+
+#define LDIST_NAME "loop-distribute"
+#define DEBUG_TYPE LDIST_NAME
+
+using namespace llvm;
+
+static cl::opt<bool>
+    LDistVerify("loop-distribute-verify", cl::Hidden,
+                cl::desc("Turn on DominatorTree and LoopInfo verification "
+                         "after Loop Distribution"),
+                cl::init(false));
+
+static cl::opt<bool> DistributeNonIfConvertible(
+    "loop-distribute-non-if-convertible", cl::Hidden,
+    cl::desc("Whether to distribute into a loop that may not be "
+             "if-convertible by the loop vectorizer"),
+    cl::init(false));
+
+STATISTIC(NumLoopsDistributed, "Number of loops distributed");
+
+/// \brief Remaps instructions in a loop including the preheader.
+static void remapInstructionsInLoop(const SmallVectorImpl<BasicBlock *> &Blocks,
+                                    ValueToValueMapTy &VMap) {
+  // Rewrite the code to refer to itself.
+  for (auto *BB : Blocks)
+    for (auto &Inst : *BB)
+      RemapInstruction(&Inst, VMap,
+                       RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+}
+
+/// \brief Clones a loop \p OrigLoop.  Returns the loop and the blocks in \p
+/// Blocks.
+///
+/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
+/// \p LoopDomBB.  Insert the new blocks before block specified in \p Before.
+static Loop *cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
+                                    Loop *OrigLoop, ValueToValueMapTy &VMap,
+                                    const Twine &NameSuffix, LoopInfo *LI,
+                                    DominatorTree *DT,
+                                    SmallVectorImpl<BasicBlock *> &Blocks) {
+  Function *F = OrigLoop->getHeader()->getParent();
+  Loop *ParentLoop = OrigLoop->getParentLoop();
+
+  Loop *NewLoop = new Loop();
+  if (ParentLoop)
+    ParentLoop->addChildLoop(NewLoop);
+  else
+    LI->addTopLevelLoop(NewLoop);
+
+  BasicBlock *OrigPH = OrigLoop->getLoopPreheader();
+  BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F);
+  // To rename the loop PHIs.
+  VMap[OrigPH] = NewPH;
+  Blocks.push_back(NewPH);
+
+  // Update LoopInfo.
+  if (ParentLoop)
+    ParentLoop->addBasicBlockToLoop(NewPH, *LI);
+
+  // Update DominatorTree.
+  DT->addNewBlock(NewPH, LoopDomBB);
+
+  for (BasicBlock *BB : OrigLoop->getBlocks()) {
+    BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
+    VMap[BB] = NewBB;
+
+    // Update LoopInfo.
+    NewLoop->addBasicBlockToLoop(NewBB, *LI);
+
+    // Update DominatorTree.
+    BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
+    DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
+
+    Blocks.push_back(NewBB);
+  }
+
+  // Move them physically from the end of the block list.
+  F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH);
+  F->getBasicBlockList().splice(Before, F->getBasicBlockList(),
+                                NewLoop->getHeader(), F->end());
+
+  return NewLoop;
+}
+
+namespace {
+/// \brief Maintains the set of instructions of the loop for a partition before
+/// cloning.  After cloning, it hosts the new loop.
+class InstPartition {
+  typedef SmallPtrSet<Instruction *, 8> InstructionSet;
+
+public:
+  InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
+      : DepCycle(DepCycle), OrigLoop(L), ClonedLoop(nullptr) {
+    Set.insert(I);
+  }
+
+  /// \brief Returns whether this partition contains a dependence cycle.
+  bool hasDepCycle() const { return DepCycle; }
+
+  /// \brief Adds an instruction to this partition.
+  void add(Instruction *I) { Set.insert(I); }
+
+  /// \brief Collection accessors.
+  InstructionSet::iterator begin() { return Set.begin(); }
+  InstructionSet::iterator end() { return Set.end(); }
+  InstructionSet::const_iterator begin() const { return Set.begin(); }
+  InstructionSet::const_iterator end() const { return Set.end(); }
+  bool empty() const { return Set.empty(); }
+
+  /// \brief Moves this partition into \p Other.  This partition becomes empty
+  /// after this.
+  void moveTo(InstPartition &Other) {
+    Other.Set.insert(Set.begin(), Set.end());
+    Set.clear();
+    Other.DepCycle |= DepCycle;
+  }
+
+  /// \brief Populates the partition with a transitive closure of all the
+  /// instructions that the seeded instructions dependent on.
+  void populateUsedSet() {
+    // FIXME: We currently don't use control-dependence but simply include all
+    // blocks (possibly empty at the end) and let simplifycfg mostly clean this
+    // up.
+    for (auto *B : OrigLoop->getBlocks())
+      Set.insert(B->getTerminator());
+
+    // Follow the use-def chains to form a transitive closure of all the
+    // instructions that the originally seeded instructions depend on.
+    SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
+    while (!Worklist.empty()) {
+      Instruction *I = Worklist.pop_back_val();
+      // Insert instructions from the loop that we depend on.
+      for (Value *V : I->operand_values()) {
+        auto *I = dyn_cast<Instruction>(V);
+        if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
+          Worklist.push_back(I);
+      }
+    }
+  }
+
+  /// \brief Clones the original loop.
+  ///
+  /// Updates LoopInfo and DominatorTree using the information that block \p
+  /// LoopDomBB dominates the loop.
+  Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
+                               unsigned Index, LoopInfo *LI,
+                               DominatorTree *DT) {
+    ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
+                                          VMap, Twine(".ldist") + Twine(Index),
+                                          LI, DT, ClonedLoopBlocks);
+    return ClonedLoop;
+  }
+
+  /// \brief The cloned loop.  If this partition is mapped to the original loop,
+  /// this is null.
+  const Loop *getClonedLoop() const { return ClonedLoop; }
+
+  /// \brief Returns the loop where this partition ends up after distribution.
+  /// If this partition is mapped to the original loop then use the block from
+  /// the loop.
+  const Loop *getDistributedLoop() const {
+    return ClonedLoop ? ClonedLoop : OrigLoop;
+  }
+
+  /// \brief The VMap that is populated by cloning and then used in
+  /// remapinstruction to remap the cloned instructions.
+  ValueToValueMapTy &getVMap() { return VMap; }
+
+  /// \brief Remaps the cloned instructions using VMap.
+  void remapInstructions() { remapInstructionsInLoop(ClonedLoopBlocks, VMap); }
+
+  /// \brief Based on the set of instructions selected for this partition,
+  /// removes the unnecessary ones.
+  void removeUnusedInsts() {
+    SmallVector<Instruction *, 8> Unused;
+
+    for (auto *Block : OrigLoop->getBlocks())
+      for (auto &Inst : *Block)
+        if (!Set.count(&Inst)) {
+          Instruction *NewInst = &Inst;
+          if (!VMap.empty())
+            NewInst = cast<Instruction>(VMap[NewInst]);
+
+          assert(!isa<BranchInst>(NewInst) &&
+                 "Branches are marked used early on");
+          Unused.push_back(NewInst);
+        }
+
+    // Delete the instructions backwards, as it has a reduced likelihood of
+    // having to update as many def-use and use-def chains.
+    for (auto I = Unused.rbegin(), E = Unused.rend(); I != E; ++I) {
+      auto *Inst = *I;
+
+      if (!Inst->use_empty())
+        Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+      Inst->eraseFromParent();
+    }
+  }
+
+  void print() const {
+    if (DepCycle)
+      dbgs() << "  (cycle)\n";
+    for (auto *I : Set)
+      // Prefix with the block name.
+      dbgs() << "  " << I->getParent()->getName() << ":" << *I << "\n";
+  }
+
+  void printBlocks() const {
+    for (auto *BB : getDistributedLoop()->getBlocks())
+      dbgs() << *BB;
+  }
+
+private:
+  /// \brief Instructions from OrigLoop selected for this partition.
+  InstructionSet Set;
+
+  /// \brief Whether this partition contains a dependence cycle.
+  bool DepCycle;
+
+  /// \brief The original loop.
+  Loop *OrigLoop;
+
+  /// \brief The cloned loop.  If this partition is mapped to the original loop,
+  /// this is null.
+  Loop *ClonedLoop;
+
+  /// \brief The blocks of ClonedLoop including the preheader.  If this
+  /// partition is mapped to the original loop, this is empty.
+  SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
+
+  /// \brief These gets populated once the set of instructions have been
+  /// finalized. If this partition is mapped to the original loop, these are not
+  /// set.
+  ValueToValueMapTy VMap;
+};
+
+/// \brief Holds the set of Partitions.  It populates them, merges them and then
+/// clones the loops.
+class InstPartitionContainer {
+  typedef DenseMap<Instruction *, int> InstToPartitionIdT;
+
+public:
+  InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
+      : L(L), LI(LI), DT(DT) {}
+
+  /// \brief Returns the number of partitions.
+  unsigned getSize() const { return PartitionContainer.size(); }
+
+  /// \brief Adds \p Inst into the current partition if that is marked to
+  /// contain cycles.  Otherwise start a new partition for it.
+  void addToCyclicPartition(Instruction *Inst) {
+    // If the current partition is non-cyclic.  Start a new one.
+    if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
+      PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
+    else
+      PartitionContainer.back().add(Inst);
+  }
+
+  /// \brief Adds \p Inst into a partition that is not marked to contain
+  /// dependence cycles.
+  ///
+  //  Initially we isolate memory instructions into as many partitions as
+  //  possible, then later we may merge them back together.
+  void addToNewNonCyclicPartition(Instruction *Inst) {
+    PartitionContainer.emplace_back(Inst, L);
+  }
+
+  /// \brief Merges adjacent non-cyclic partitions.
+  ///
+  /// The idea is that we currently only want to isolate the non-vectorizable
+  /// partition.  We could later allow more distribution among these partition
+  /// too.
+  void mergeAdjacentNonCyclic() {
+    mergeAdjacentPartitionsIf(
+        [](const InstPartition *P) { return !P->hasDepCycle(); });
+  }
+
+  /// \brief If a partition contains only conditional stores, we won't vectorize
+  /// it.  Try to merge it with a previous cyclic partition.
+  void mergeNonIfConvertible() {
+    mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
+      if (Partition->hasDepCycle())
+        return true;
+
+      // Now, check if all stores are conditional in this partition.
+      bool seenStore = false;
+
+      for (auto *Inst : *Partition)
+        if (isa<StoreInst>(Inst)) {
+          seenStore = true;
+          if (!LoopAccessInfo::blockNeedsPredication(Inst->getParent(), L, DT))
+            return false;
+        }
+      return seenStore;
+    });
+  }
+
+  /// \brief Merges the partitions according to various heuristics.
+  void mergeBeforePopulating() {
+    mergeAdjacentNonCyclic();
+    if (!DistributeNonIfConvertible)
+      mergeNonIfConvertible();
+  }
+
+  /// \brief Merges partitions in order to ensure that no loads are duplicated.
+  ///
+  /// We can't duplicate loads because that could potentially reorder them.
+  /// LoopAccessAnalysis provides dependency information with the context that
+  /// the order of memory operation is preserved.
+  ///
+  /// Return if any partitions were merged.
+  bool mergeToAvoidDuplicatedLoads() {
+    typedef DenseMap<Instruction *, InstPartition *> LoadToPartitionT;
+    typedef EquivalenceClasses<InstPartition *> ToBeMergedT;
+
+    LoadToPartitionT LoadToPartition;
+    ToBeMergedT ToBeMerged;
+
+    // Step through the partitions and create equivalence between partitions
+    // that contain the same load.  Also put partitions in between them in the
+    // same equivalence class to avoid reordering of memory operations.
+    for (PartitionContainerT::iterator I = PartitionContainer.begin(),
+                                       E = PartitionContainer.end();
+         I != E; ++I) {
+      auto *PartI = &*I;
+
+      // If a load occurs in two partitions PartI and PartJ, merge all
+      // partitions (PartI, PartJ] into PartI.
+      for (Instruction *Inst : *PartI)
+        if (isa<LoadInst>(Inst)) {
+          bool NewElt;
+          LoadToPartitionT::iterator LoadToPart;
+
+          std::tie(LoadToPart, NewElt) =
+              LoadToPartition.insert(std::make_pair(Inst, PartI));
+          if (!NewElt) {
+            DEBUG(dbgs() << "Merging partitions due to this load in multiple "
+                         << "partitions: " << PartI << ", "
+                         << LoadToPart->second << "\n" << *Inst << "\n");
+
+            auto PartJ = I;
+            do {
+              --PartJ;
+              ToBeMerged.unionSets(PartI, &*PartJ);
+            } while (&*PartJ != LoadToPart->second);
+          }
+        }
+    }
+    if (ToBeMerged.empty())
+      return false;
+
+    // Merge the member of an equivalence class into its class leader.  This
+    // makes the members empty.
+    for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
+         I != E; ++I) {
+      if (!I->isLeader())
+        continue;
+
+      auto PartI = I->getData();
+      for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
+                                   ToBeMerged.member_end())) {
+        PartJ->moveTo(*PartI);
+      }
+    }
+
+    // Remove the empty partitions.
+    PartitionContainer.remove_if(
+        [](const InstPartition &P) { return P.empty(); });
+
+    return true;
+  }
+
+  /// \brief Sets up the mapping between instructions to partitions.  If the
+  /// instruction is duplicated across multiple partitions, set the entry to -1.
+  void setupPartitionIdOnInstructions() {
+    int PartitionID = 0;
+    for (const auto &Partition : PartitionContainer) {
+      for (Instruction *Inst : Partition) {
+        bool NewElt;
+        InstToPartitionIdT::iterator Iter;
+
+        std::tie(Iter, NewElt) =
+            InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
+        if (!NewElt)
+          Iter->second = -1;
+      }
+      ++PartitionID;
+    }
+  }
+
+  /// \brief Populates the partition with everything that the seeding
+  /// instructions require.
+  void populateUsedSet() {
+    for (auto &P : PartitionContainer)
+      P.populateUsedSet();
+  }
+
+  /// \brief This performs the main chunk of the work of cloning the loops for
+  /// the partitions.
+  void cloneLoops(Pass *P) {
+    BasicBlock *OrigPH = L->getLoopPreheader();
+    // At this point the predecessor of the preheader is either the memcheck
+    // block or the top part of the original preheader.
+    BasicBlock *Pred = OrigPH->getSinglePredecessor();
+    assert(Pred && "Preheader does not have a single predecessor");
+    BasicBlock *ExitBlock = L->getExitBlock();
+    assert(ExitBlock && "No single exit block");
+    Loop *NewLoop;
+
+    assert(!PartitionContainer.empty() && "at least two partitions expected");
+    // We're cloning the preheader along with the loop so we already made sure
+    // it was empty.
+    assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
+           "preheader not empty");
+
+    // Create a loop for each partition except the last.  Clone the original
+    // loop before PH along with adding a preheader for the cloned loop.  Then
+    // update PH to point to the newly added preheader.
+    BasicBlock *TopPH = OrigPH;
+    unsigned Index = getSize() - 1;
+    for (auto I = std::next(PartitionContainer.rbegin()),
+              E = PartitionContainer.rend();
+         I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
+      auto *Part = &*I;
+
+      NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
+
+      Part->getVMap()[ExitBlock] = TopPH;
+      Part->remapInstructions();
+    }
+    Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
+
+    // Now go in forward order and update the immediate dominator for the
+    // preheaders with the exiting block of the previous loop.  Dominance
+    // within the loop is updated in cloneLoopWithPreheader.
+    for (auto Curr = PartitionContainer.cbegin(),
+              Next = std::next(PartitionContainer.cbegin()),
+              E = PartitionContainer.cend();
+         Next != E; ++Curr, ++Next)
+      DT->changeImmediateDominator(
+          Next->getDistributedLoop()->getLoopPreheader(),
+          Curr->getDistributedLoop()->getExitingBlock());
+  }
+
+  /// \brief Removes the dead instructions from the cloned loops.
+  void removeUnusedInsts() {
+    for (auto &Partition : PartitionContainer)
+      Partition.removeUnusedInsts();
+  }
+
+  /// \brief For each memory pointer, it computes the partitionId the pointer is
+  /// used in.
+  ///
+  /// This returns an array of int where the I-th entry corresponds to I-th
+  /// entry in LAI.getRuntimePointerCheck().  If the pointer is used in multiple
+  /// partitions its entry is set to -1.
+  SmallVector<int, 8>
+  computePartitionSetForPointers(const LoopAccessInfo &LAI) {
+    const LoopAccessInfo::RuntimePointerCheck *RtPtrCheck =
+        LAI.getRuntimePointerCheck();
+
+    unsigned N = RtPtrCheck->Pointers.size();
+    SmallVector<int, 8> PtrToPartitions(N);
+    for (unsigned I = 0; I < N; ++I) {
+      Value *Ptr = RtPtrCheck->Pointers[I];
+      auto Instructions =
+          LAI.getInstructionsForAccess(Ptr, RtPtrCheck->IsWritePtr[I]);
+
+      int &Partition = PtrToPartitions[I];
+      // First set it to uninitialized.
+      Partition = -2;
+      for (Instruction *Inst : Instructions) {
+        // Note that this could be -1 if Inst is duplicated across multiple
+        // partitions.
+        int ThisPartition = this->InstToPartitionId[Inst];
+        if (Partition == -2)
+          Partition = ThisPartition;
+        // -1 means belonging to multiple partitions.
+        else if (Partition == -1)
+          break;
+        else if (Partition != (int)ThisPartition)
+          Partition = -1;
+      }
+      assert(Partition != -2 && "Pointer not belonging to any partition");
+    }
+
+    return PtrToPartitions;
+  }
+
+  void print(raw_ostream &OS) const {
+    unsigned Index = 0;
+    for (const auto &P : PartitionContainer) {
+      OS << "Partition " << Index++ << " (" << &P << "):\n";
+      P.print();
+    }
+  }
+
+  void dump() const { print(dbgs()); }
+
+#ifndef NDEBUG
+  friend raw_ostream &operator<<(raw_ostream &OS,
+                                 const InstPartitionContainer &Partitions) {
+    Partitions.print(OS);
+    return OS;
+  }
+#endif
+
+  void printBlocks() const {
+    unsigned Index = 0;
+    for (const auto &P : PartitionContainer) {
+      dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
+      P.printBlocks();
+    }
+  }
+
+private:
+  typedef std::list<InstPartition> PartitionContainerT;
+
+  /// \brief List of partitions.
+  PartitionContainerT PartitionContainer;
+
+  /// \brief Mapping from Instruction to partition Id.  If the instruction
+  /// belongs to multiple partitions the entry contains -1.
+  InstToPartitionIdT InstToPartitionId;
+
+  Loop *L;
+  LoopInfo *LI;
+  DominatorTree *DT;
+
+  /// \brief The control structure to merge adjacent partitions if both satisfy
+  /// the \p Predicate.
+  template <class UnaryPredicate>
+  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
+    InstPartition *PrevMatch = nullptr;
+    for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
+      auto DoesMatch = Predicate(&*I);
+      if (PrevMatch == nullptr && DoesMatch) {
+        PrevMatch = &*I;
+        ++I;
+      } else if (PrevMatch != nullptr && DoesMatch) {
+        I->moveTo(*PrevMatch);
+        I = PartitionContainer.erase(I);
+      } else {
+        PrevMatch = nullptr;
+        ++I;
+      }
+    }
+  }
+};
+
+/// \brief For each memory instruction, this class maintains difference of the
+/// number of unsafe dependences that start out from this instruction minus
+/// those that end here.
+///
+/// By traversing the memory instructions in program order and accumulating this
+/// number, we know whether any unsafe dependence crosses over a program point.
+class MemoryInstructionDependences {
+  typedef MemoryDepChecker::Dependence Dependence;
+
+public:
+  struct Entry {
+    Instruction *Inst;
+    unsigned NumUnsafeDependencesStartOrEnd;
+
+    Entry(Instruction *Inst) : Inst(Inst), NumUnsafeDependencesStartOrEnd(0) {}
+  };
+
+  typedef SmallVector<Entry, 8> AccessesType;
+
+  AccessesType::const_iterator begin() const { return Accesses.begin(); }
+  AccessesType::const_iterator end() const { return Accesses.end(); }
+
+  MemoryInstructionDependences(
+      const SmallVectorImpl<Instruction *> &Instructions,
+      const SmallVectorImpl<Dependence> &InterestingDependences) {
+    Accesses.append(Instructions.begin(), Instructions.end());
+
+    DEBUG(dbgs() << "Backward dependences:\n");
+    for (auto &Dep : InterestingDependences)
+      if (Dep.isPossiblyBackward()) {
+        // Note that the designations source and destination follow the program
+        // order, i.e. source is always first.  (The direction is given by the
+        // DepType.)
+        ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
+        --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
+
+        DEBUG(Dep.print(dbgs(), 2, Instructions));
+      }
+  }
+
+private:
+  AccessesType Accesses;
+};
+
+/// \brief Handles the loop versioning based on memchecks.
+class RuntimeCheckEmitter {
+public:
+  RuntimeCheckEmitter(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
+                      DominatorTree *DT)
+      : OrigLoop(L), NonDistributedLoop(nullptr), LAI(LAI), LI(LI), DT(DT) {}
+
+  /// \brief Given the \p Partitions formed by Loop Distribution, it determines
+  /// in which partition each pointer is used.
+  void partitionPointers(InstPartitionContainer &Partitions) {
+    // Set up partition id in PtrRtChecks.  Ptr -> Access -> Intruction ->
+    // Partition.
+    PtrToPartition = Partitions.computePartitionSetForPointers(LAI);
+
+    DEBUG(dbgs() << "\nPointers:\n");
+    DEBUG(LAI.getRuntimePointerCheck()->print(dbgs(), 0, &PtrToPartition));
+  }
+
+  /// \brief Returns true if we need memchecks to distribute the loop.
+  bool needsRuntimeChecks() const {
+    return LAI.getRuntimePointerCheck()->needsAnyChecking(&PtrToPartition);
+  }
+
+  /// \brief Performs the CFG manipulation part of versioning the loop including
+  /// the DominatorTree and LoopInfo updates.
+  void versionLoop(Pass *P) {
+    Instruction *FirstCheckInst;
+    Instruction *MemRuntimeCheck;
+    // Add the memcheck in the original preheader (this is empty initially).
+    BasicBlock *MemCheckBB = OrigLoop->getLoopPreheader();
+    std::tie(FirstCheckInst, MemRuntimeCheck) =
+        LAI.addRuntimeCheck(MemCheckBB->getTerminator(), &PtrToPartition);
+    assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
+
+    // Rename the block to make the IR more readable.
+    MemCheckBB->setName(OrigLoop->getHeader()->getName() + ".ldist.memcheck");
+
+    // Create empty preheader for the loop (and after cloning for the
+    // original/nondist loop).
+    BasicBlock *PH =
+        SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI);
+    PH->setName(OrigLoop->getHeader()->getName() + ".ph");
+
+    // Clone the loop including the preheader.
+    //
+    // FIXME: This does not currently preserve SimplifyLoop because the exit
+    // block is a join between the two loops.
+    SmallVector<BasicBlock *, 8> NonDistributedLoopBlocks;
+    NonDistributedLoop =
+        cloneLoopWithPreheader(PH, MemCheckBB, OrigLoop, VMap, ".ldist.nondist",
+                               LI, DT, NonDistributedLoopBlocks);
+    remapInstructionsInLoop(NonDistributedLoopBlocks, VMap);
+
+    // Insert the conditional branch based on the result of the memchecks.
+    Instruction *OrigTerm = MemCheckBB->getTerminator();
+    BranchInst::Create(NonDistributedLoop->getLoopPreheader(),
+                       OrigLoop->getLoopPreheader(), MemRuntimeCheck, OrigTerm);
+    OrigTerm->eraseFromParent();
+
+    // The loops merge in the original exit block.  This is now dominated by the
+    // memchecking block.
+    DT->changeImmediateDominator(OrigLoop->getExitBlock(), MemCheckBB);
+  }
+
+  /// \brief Adds the necessary PHI nodes for the versioned loops based on the
+  /// loop-defined values used outside of the loop.
+  void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
+    BasicBlock *PHIBlock = OrigLoop->getExitBlock();
+    assert(PHIBlock && "No single successor to loop exit block");
+
+    for (auto *Inst : DefsUsedOutside) {
+      auto *NonDistInst = cast<Instruction>(VMap[Inst]);
+      PHINode *PN;
+
+      // First see if we have a single-operand PHI with the value defined by the
+      // original loop.
+      for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
+        assert(PN->getNumOperands() == 1 &&
+               "Exit block should only have on predecessor");
+        if (PN->getIncomingValue(0) == Inst)
+          break;
+      }
+      // If not create it.
+      if (!PN) {
+        PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".ldist",
+                             PHIBlock->begin());
+        for (auto *User : Inst->users())
+          if (!OrigLoop->contains(cast<Instruction>(User)->getParent()))
+            User->replaceUsesOfWith(Inst, PN);
+        PN->addIncoming(Inst, OrigLoop->getExitingBlock());
+      }
+      // Add the new incoming value from the non-distributed loop.
+      PN->addIncoming(NonDistInst, NonDistributedLoop->getExitingBlock());
+    }
+  }
+
+private:
+  /// \brief The original loop.  This becomes the "versioned" one, i.e. control
+  /// goes if the memchecks all pass.
+  Loop *OrigLoop;
+  /// \brief The fall-back loop, i.e. if any of the memchecks fail.
+  Loop *NonDistributedLoop;
+
+  /// \brief For each memory pointer it contains the partitionId it is used in.
+  ///
+  /// The I-th entry corresponds to I-th entry in LAI.getRuntimePointerCheck().
+  /// If the pointer is used in multiple partitions the entry is set to -1.
+  SmallVector<int, 8> PtrToPartition;
+
+  /// \brief This maps the instructions from OrigLoop to their counterpart in
+  /// NonDistributedLoop.
+  ValueToValueMapTy VMap;
+
+  /// \brief Analyses used.
+  const LoopAccessInfo &LAI;
+  LoopInfo *LI;
+  DominatorTree *DT;
+};
+
+/// \brief Returns the instructions that use values defined in the loop.
+static SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L) {
+  SmallVector<Instruction *, 8> UsedOutside;
+
+  for (auto *Block : L->getBlocks())
+    // FIXME: I believe that this could use copy_if if the Inst reference could
+    // be adapted into a pointer.
+    for (auto &Inst : *Block) {
+      auto Users = Inst.users();
+      if (std::any_of(Users.begin(), Users.end(), [&](User *U) {
+            auto *Use = cast<Instruction>(U);
+            return !L->contains(Use->getParent());
+          }))
+        UsedOutside.push_back(&Inst);
+    }
+
+  return UsedOutside;
+}
+
+/// \brief The pass class.
+class LoopDistribute : public FunctionPass {
+public:
+  LoopDistribute() : FunctionPass(ID) {
+    initializeLoopDistributePass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    LAA = &getAnalysis<LoopAccessAnalysis>();
+    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+    // Build up a worklist of inner-loops to vectorize. This is necessary as the
+    // act of distributing a loop creates new loops and can invalidate iterators
+    // across the loops.
+    SmallVector<Loop *, 8> Worklist;
+
+    for (Loop *TopLevelLoop : *LI)
+      for (Loop *L : depth_first(TopLevelLoop))
+        // We only handle inner-most loops.
+        if (L->empty())
+          Worklist.push_back(L);
+
+    // Now walk the identified inner loops.
+    bool Changed = false;
+    for (Loop *L : Worklist)
+      Changed |= processLoop(L);
+
+    // Process each loop nest in the function.
+    return Changed;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addPreserved<LoopInfoWrapperPass>();
+    AU.addRequired<LoopAccessAnalysis>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+  }
+
+  static char ID;
+
+private:
+  /// \brief Try to distribute an inner-most loop.
+  bool processLoop(Loop *L) {
+    assert(L->empty() && "Only process inner loops.");
+
+    DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName()
+                 << "\" checking " << *L << "\n");
+
+    BasicBlock *PH = L->getLoopPreheader();
+    if (!PH) {
+      DEBUG(dbgs() << "Skipping; no preheader");
+      return false;
+    }
+    if (!L->getExitBlock()) {
+      DEBUG(dbgs() << "Skipping; multiple exit blocks");
+      return false;
+    }
+    // LAA will check that we only have a single exiting block.
+
+    const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap());
+
+    // Currently, we only distribute to isolate the part of the loop with
+    // dependence cycles to enable partial vectorization.
+    if (LAI.canVectorizeMemory()) {
+      DEBUG(dbgs() << "Skipping; memory operations are safe for vectorization");
+      return false;
+    }
+    auto *InterestingDependences =
+        LAI.getDepChecker().getInterestingDependences();
+    if (!InterestingDependences || InterestingDependences->empty()) {
+      DEBUG(dbgs() << "Skipping; No unsafe dependences to isolate");
+      return false;
+    }
+
+    InstPartitionContainer Partitions(L, LI, DT);
+
+    // First, go through each memory operation and assign them to consecutive
+    // partitions (the order of partitions follows program order).  Put those
+    // with unsafe dependences into "cyclic" partition otherwise put each store
+    // in its own "non-cyclic" partition (we'll merge these later).
+    //
+    // Note that a memory operation (e.g. Load2 below) at a program point that
+    // has an unsafe dependence (Store3->Load1) spanning over it must be
+    // included in the same cyclic partition as the dependent operations.  This
+    // is to preserve the original program order after distribution.  E.g.:
+    //
+    //                NumUnsafeDependencesStartOrEnd  NumUnsafeDependencesActive
+    //  Load1   -.                     1                       0->1
+    //  Load2    | /Unsafe/            0                       1
+    //  Store3  -'                    -1                       1->0
+    //  Load4                          0                       0
+    //
+    // NumUnsafeDependencesActive > 0 indicates this situation and in this case
+    // we just keep assigning to the same cyclic partition until
+    // NumUnsafeDependencesActive reaches 0.
+    const MemoryDepChecker &DepChecker = LAI.getDepChecker();
+    MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
+                                     *InterestingDependences);
+
+    int NumUnsafeDependencesActive = 0;
+    for (auto &InstDep : MID) {
+      Instruction *I = InstDep.Inst;
+      // We update NumUnsafeDependencesActive post-instruction, catch the
+      // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
+      if (NumUnsafeDependencesActive ||
+          InstDep.NumUnsafeDependencesStartOrEnd > 0)
+        Partitions.addToCyclicPartition(I);
+      else
+        Partitions.addToNewNonCyclicPartition(I);
+      NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
+      assert(NumUnsafeDependencesActive >= 0 &&
+             "Negative number of dependences active");
+    }
+
+    // Add partitions for values used outside.  These partitions can be out of
+    // order from the original program order.  This is OK because if the
+    // partition uses a load we will merge this partition with the original
+    // partition of the load that we set up in the previous loop (see
+    // mergeToAvoidDuplicatedLoads).
+    auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
+    for (auto *Inst : DefsUsedOutside)
+      Partitions.addToNewNonCyclicPartition(Inst);
+
+    DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
+    if (Partitions.getSize() < 2)
+      return false;
+
+    // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
+    // should be able to vectorize these together.
+    Partitions.mergeBeforePopulating();
+    DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
+    if (Partitions.getSize() < 2)
+      return false;
+
+    // Now, populate the partitions with non-memory operations.
+    Partitions.populateUsedSet();
+    DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
+
+    // In order to preserve original lexical order for loads, keep them in the
+    // partition that we set up in the MemoryInstructionDependences loop.
+    if (Partitions.mergeToAvoidDuplicatedLoads()) {
+      DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
+                   << Partitions);
+      if (Partitions.getSize() < 2)
+        return false;
+    }
+
+    DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
+    // We're done forming the partitions set up the reverse mapping from
+    // instructions to partitions.
+    Partitions.setupPartitionIdOnInstructions();
+
+    // To keep things simple have an empty preheader before we version or clone
+    // the loop.  (Also split if this has no predecessor, i.e. entry, because we
+    // rely on PH having a predecessor.)
+    if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
+      SplitBlock(PH, PH->getTerminator(), DT, LI);
+
+    // If we need run-time checks to disambiguate pointers are run-time, version
+    // the loop now.
+    RuntimeCheckEmitter RtCheckEmitter(LAI, L, LI, DT);
+    RtCheckEmitter.partitionPointers(Partitions);
+    if (RtCheckEmitter.needsRuntimeChecks()) {
+      RtCheckEmitter.versionLoop(this);
+      RtCheckEmitter.addPHINodes(DefsUsedOutside);
+    }
+
+    // Create identical copies of the original loop for each partition and hook
+    // them up sequentially.
+    Partitions.cloneLoops(this);
+
+    // Now, we remove the instruction from each loop that don't belong to that
+    // partition.
+    Partitions.removeUnusedInsts();
+    DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
+    DEBUG(Partitions.printBlocks());
+
+    if (LDistVerify) {
+      LI->verify();
+      DT->verifyDomTree();
+    }
+
+    ++NumLoopsDistributed;
+    return true;
+  }
+
+  // Analyses used.
+  LoopInfo *LI;
+  LoopAccessAnalysis *LAA;
+  DominatorTree *DT;
+};
+} // anonymous namespace
+
+char LoopDistribute::ID;
+static const char ldist_name[] = "Loop Distribition";
+
+INITIALIZE_PASS_BEGIN(LoopDistribute, LDIST_NAME, ldist_name, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false)
+
+namespace llvm {
+FunctionPass *createLoopDistributePass() { return new LoopDistribute(); }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a12f5a7..f92ecd4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -47,6 +47,7 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
@@ -56,7 +57,6 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -130,7 +130,6 @@ namespace {
 
   class LoopIdiomRecognize : public LoopPass {
     Loop *CurLoop;
-    const DataLayout *DL;
     DominatorTree *DT;
     ScalarEvolution *SE;
     TargetLibraryInfo *TLI;
@@ -139,7 +138,10 @@ namespace {
     static char ID;
     explicit LoopIdiomRecognize() : LoopPass(ID) {
       initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
-      DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr;
+      DT = nullptr;
+      SE = nullptr;
+      TLI = nullptr;
+      TTI = nullptr;
     }
 
     bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -163,8 +165,8 @@ namespace {
     /// loop preheaders be inserted into the CFG.
     ///
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<LoopInfo>();
-      AU.addPreserved<LoopInfo>();
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
@@ -175,16 +177,8 @@ namespace {
       AU.addPreserved<ScalarEvolution>();
       AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addRequired<DominatorTreeWrapperPass>();
-      AU.addRequired<TargetLibraryInfo>();
-      AU.addRequired<TargetTransformInfo>();
-    }
-
-    const DataLayout *getDataLayout() {
-      if (DL)
-        return DL;
-      DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-      DL = DLP ? &DLP->getDataLayout() : nullptr;
-      return DL;
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
+      AU.addRequired<TargetTransformInfoWrapperPass>();
     }
 
     DominatorTree *getDominatorTree() {
@@ -197,11 +191,16 @@ namespace {
     }
 
     TargetLibraryInfo *getTargetLibraryInfo() {
-      return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>());
+      if (!TLI)
+        TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+
+      return TLI;
     }
 
     const TargetTransformInfo *getTargetTransformInfo() {
-      return TTI ? TTI : (TTI = &getAnalysis<TargetTransformInfo>());
+      return TTI ? TTI
+                 : (TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+                        *CurLoop->getHeader()->getParent()));
     }
 
     Loop *getLoop() const { return CurLoop; }
@@ -215,14 +214,14 @@ namespace {
 char LoopIdiomRecognize::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
                       false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
                     false, false)
 
@@ -232,44 +231,13 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
 /// and zero out all the operands of this instruction.  If any of them become
 /// dead, delete them and the computation tree that feeds them.
 ///
-static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
+static void deleteDeadInstruction(Instruction *I,
                                   const TargetLibraryInfo *TLI) {
-  SmallVector<Instruction*, 32> NowDeadInsts;
-
-  NowDeadInsts.push_back(I);
-
-  // Before we touch this instruction, remove it from SE!
-  do {
-    Instruction *DeadInst = NowDeadInsts.pop_back_val();
-
-    // This instruction is dead, zap it, in stages.  Start by removing it from
-    // SCEV.
-    SE.forgetValue(DeadInst);
-
-    for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
-      Value *Op = DeadInst->getOperand(op);
-      DeadInst->setOperand(op, nullptr);
-
-      // If this operand just became dead, add it to the NowDeadInsts list.
-      if (!Op->use_empty()) continue;
-
-      if (Instruction *OpI = dyn_cast<Instruction>(Op))
-        if (isInstructionTriviallyDead(OpI, TLI))
-          NowDeadInsts.push_back(OpI);
-    }
-
-    DeadInst->eraseFromParent();
-
-  } while (!NowDeadInsts.empty());
-}
-
-/// deleteIfDeadInstruction - If the specified value is a dead instruction,
-/// delete it and any recursively used instructions.
-static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
-                                    const TargetLibraryInfo *TLI) {
-  if (Instruction *I = dyn_cast<Instruction>(V))
-    if (isInstructionTriviallyDead(I, TLI))
-      deleteDeadInstruction(I, SE, TLI);
+  SmallVector<Value *, 16> Operands(I->value_op_begin(), I->value_op_end());
+  I->replaceAllUsesWith(UndefValue::get(I->getType()));
+  I->eraseFromParent();
+  for (Value *Op : Operands)
+    RecursivelyDeleteTriviallyDeadInstructions(Op, TLI);
 }
 
 //===----------------------------------------------------------------------===//
@@ -285,7 +253,7 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
 // the concern of breaking data dependence.
 bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
   if (BranchInst *Br = getBranch(BB)) {
-    return Br->isUnconditional() && BB->size() == 1;
+    return Br->isUnconditional() && Br == BB->begin();
   }
   return false;
 }
@@ -542,7 +510,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst,
       cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
     PreCond->replaceAllUsesWith(NewPreCond);
 
-    deleteDeadInstruction(PreCond, *SE, TLI);
+    RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
   }
 
   // Step 3: Note that the population count is exactly the trip count of the
@@ -592,15 +560,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst,
   // Step 4: All the references to the original population counter outside
   //  the loop are replaced with the NewCount -- the value returned from
   //  __builtin_ctpop().
-  {
-    SmallVector<Value *, 4> CntUses;
-    for (User *U : CntInst->users())
-      if (cast<Instruction>(U)->getParent() != Body)
-        CntUses.push_back(U);
-    for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) {
-      (cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount);
-    }
-  }
+  CntInst->replaceUsesOutsideBlock(NewCount, Body);
 
   // step 5: Forget the "non-computable" trip-count SCEV associated with the
   //   loop. The loop would otherwise not be deleted even if it becomes empty.
@@ -651,7 +611,9 @@ bool NclPopcountRecognize::recognize() {
 
 bool LoopIdiomRecognize::runOnCountableLoop() {
   const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
-  if (isa<SCEVCouldNotCompute>(BECount)) return false;
+  assert(!isa<SCEVCouldNotCompute>(BECount) &&
+    "runOnCountableLoop() called on a loop without a predictable"
+    "backedge-taken count");
 
   // If this loop executes exactly one time, then it should be peeled, not
   // optimized by this pass.
@@ -659,15 +621,11 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
     if (BECst->getValue()->getValue() == 0)
       return false;
 
-  // We require target data for now.
-  if (!getDataLayout())
-    return false;
-
   // set DT
   (void)getDominatorTree();
 
-  LoopInfo &LI = getAnalysis<LoopInfo>();
-  TLI = &getAnalysis<TargetLibraryInfo>();
+  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   // set TLI
   (void)getTargetLibraryInfo();
@@ -681,13 +639,12 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
 
   bool MadeChange = false;
   // Scan all the blocks in the loop that are not in subloops.
-  for (Loop::block_iterator BI = CurLoop->block_begin(),
-         E = CurLoop->block_end(); BI != E; ++BI) {
+  for (auto *BB : CurLoop->getBlocks()) {
     // Ignore blocks in subloops.
-    if (LI.getLoopFor(*BI) != CurLoop)
+    if (LI.getLoopFor(BB) != CurLoop)
       continue;
 
-    MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
+    MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
   }
   return MadeChange;
 }
@@ -776,7 +733,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
   Value *StorePtr = SI->getPointerOperand();
 
   // Reject stores that are so large that they overflow an unsigned.
-  uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
+  auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
+  uint64_t SizeInBits = DL.getTypeSizeInBits(StoredVal->getType());
   if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
     return false;
 
@@ -951,7 +909,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
   // but it can be turned into memset_pattern if the target supports it.
   Value *SplatValue = isBytewiseValue(StoredVal);
   Constant *PatternValue = nullptr;
-
+  auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
   unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
 
   // If we're allowed to form a memset, and the stored value would be acceptable
@@ -962,9 +920,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
       CurLoop->isLoopInvariant(SplatValue)) {
     // Keep and use SplatValue.
     PatternValue = nullptr;
-  } else if (DestAS == 0 &&
-             TLI->has(LibFunc::memset_pattern16) &&
-             (PatternValue = getMemSetPatternValue(StoredVal, *DL))) {
+  } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) &&
+             (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
     // Don't create memset_pattern16s with address spaces.
     // It looks like we can use PatternValue!
     SplatValue = nullptr;
@@ -979,7 +936,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
   // header.  This allows us to insert code for it in the preheader.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
   IRBuilder<> Builder(Preheader->getTerminator());
-  SCEVExpander Expander(*SE, "loop-idiom");
+  SCEVExpander Expander(*SE, DL, "loop-idiom");
 
   Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
 
@@ -997,7 +954,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
                             StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) {
     Expander.clear();
     // If we generated new code for the base pointer, clean up.
-    deleteIfDeadInstruction(BasePtr, *SE, TLI);
+    RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
     return false;
   }
 
@@ -1039,12 +996,12 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
     // Otherwise we should form a memset_pattern16.  PatternValue is known to be
     // an constant array of 16-bytes.  Plop the value into a mergable global.
     GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
-                                            GlobalValue::InternalLinkage,
+                                            GlobalValue::PrivateLinkage,
                                             PatternValue, ".memset_pattern");
     GV->setUnnamedAddr(true); // Ok to merge these.
     GV->setAlignment(16);
     Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
-    NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
+    NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
   }
 
   DEBUG(dbgs() << "  Formed memset: " << *NewCall << "\n"
@@ -1053,7 +1010,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
 
   // Okay, the memset has been formed.  Zap the original store and anything that
   // feeds into it.
-  deleteDeadInstruction(TheStore, *SE, TLI);
+  deleteDeadInstruction(TheStore, TLI);
   ++NumMemSet;
   return true;
 }
@@ -1076,7 +1033,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
   // header.  This allows us to insert code for it in the preheader.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
   IRBuilder<> Builder(Preheader->getTerminator());
-  SCEVExpander Expander(*SE, "loop-idiom");
+  const DataLayout &DL = Preheader->getModule()->getDataLayout();
+  SCEVExpander Expander(*SE, DL, "loop-idiom");
 
   // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
   // this into a memcpy in the loop preheader now if we want.  However, this
@@ -1094,7 +1052,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
                             getAnalysis<AliasAnalysis>(), SI)) {
     Expander.clear();
     // If we generated new code for the base pointer, clean up.
-    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
+    RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
     return false;
   }
 
@@ -1109,8 +1067,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
                             StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
     Expander.clear();
     // If we generated new code for the base pointer, clean up.
-    deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);
-    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
+    RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
+    RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
     return false;
   }
 
@@ -1143,7 +1101,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
 
   // Okay, the memset has been formed.  Zap the original store and anything that
   // feeds into it.
-  deleteDeadInstruction(SI, *SE, TLI);
+  deleteDeadInstruction(SI, TLI);
   ++NumMemCpy;
   return true;
 }
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index d664f85..e125026 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -23,7 +23,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -44,12 +44,12 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       AU.addRequired<AssumptionCacheTracker>();
-      AU.addRequired<LoopInfo>();
+      AU.addRequired<LoopInfoWrapperPass>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreservedID(LCSSAID);
       AU.addPreserved<ScalarEvolution>();
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
   };
 }
@@ -58,9 +58,9 @@ char LoopInstSimplify::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
                 "Simplify instructions in loops", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify",
                 "Simplify instructions in loops", false, false)
@@ -76,10 +76,9 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   DominatorTreeWrapperPass *DTWP =
       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
   DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  LoopInfo *LI = &getAnalysis<LoopInfo>();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
-  const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  const TargetLibraryInfo *TLI =
+      &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
       *L->getHeader()->getParent());
 
@@ -109,6 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
       WorklistItem Item = VisitStack.pop_back_val();
       BasicBlock *BB = Item.getPointer();
       bool IsSubloopHeader = Item.getInt();
+      const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
 
       // Simplify instructions in the current basic block.
       for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
new file mode 100644
index 0000000..f584018
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -0,0 +1,1300 @@
+//===- LoopInterchange.cpp - Loop interchange pass------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This Pass handles loop interchange transform.
+// This pass interchanges loops to provide a more cache-friendly memory access
+// patterns.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-interchange"
+
+namespace {
+
+typedef SmallVector<Loop *, 8> LoopVector;
+
+// TODO: Check if we can use a sparse matrix here.
+typedef std::vector<std::vector<char>> CharMatrix;
+
+// Maximum number of dependencies that can be handled in the dependency matrix.
+static const unsigned MaxMemInstrCount = 100;
+
+// Maximum loop depth supported.
+static const unsigned MaxLoopNestDepth = 10;
+
+struct LoopInterchange;
+
+#ifdef DUMP_DEP_MATRICIES
+void printDepMatrix(CharMatrix &DepMatrix) {
+  for (auto I = DepMatrix.begin(), E = DepMatrix.end(); I != E; ++I) {
+    std::vector<char> Vec = *I;
+    for (auto II = Vec.begin(), EE = Vec.end(); II != EE; ++II)
+      DEBUG(dbgs() << *II << " ");
+    DEBUG(dbgs() << "\n");
+  }
+}
+#endif
+
+static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
+                                     Loop *L, DependenceAnalysis *DA) {
+  typedef SmallVector<Value *, 16> ValueVector;
+  ValueVector MemInstr;
+
+  if (Level > MaxLoopNestDepth) {
+    DEBUG(dbgs() << "Cannot handle loops of depth greater than "
+                 << MaxLoopNestDepth << "\n");
+    return false;
+  }
+
+  // For each block.
+  for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end();
+       BB != BE; ++BB) {
+    // Scan the BB and collect legal loads and stores.
+    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E;
+         ++I) {
+      Instruction *Ins = dyn_cast<Instruction>(I);
+      if (!Ins)
+        return false;
+      LoadInst *Ld = dyn_cast<LoadInst>(I);
+      StoreInst *St = dyn_cast<StoreInst>(I);
+      if (!St && !Ld)
+        continue;
+      if (Ld && !Ld->isSimple())
+        return false;
+      if (St && !St->isSimple())
+        return false;
+      MemInstr.push_back(I);
+    }
+  }
+
+  DEBUG(dbgs() << "Found " << MemInstr.size()
+               << " Loads and Stores to analyze\n");
+
+  ValueVector::iterator I, IE, J, JE;
+
+  for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
+    for (J = I, JE = MemInstr.end(); J != JE; ++J) {
+      std::vector<char> Dep;
+      Instruction *Src = dyn_cast<Instruction>(*I);
+      Instruction *Des = dyn_cast<Instruction>(*J);
+      if (Src == Des)
+        continue;
+      if (isa<LoadInst>(Src) && isa<LoadInst>(Des))
+        continue;
+      if (auto D = DA->depends(Src, Des, true)) {
+        DEBUG(dbgs() << "Found Dependency between Src=" << Src << " Des=" << Des
+                     << "\n");
+        if (D->isFlow()) {
+          // TODO: Handle Flow dependence.Check if it is sufficient to populate
+          // the Dependence Matrix with the direction reversed.
+          DEBUG(dbgs() << "Flow dependence not handled");
+          return false;
+        }
+        if (D->isAnti()) {
+          DEBUG(dbgs() << "Found Anti dependence \n");
+          unsigned Levels = D->getLevels();
+          char Direction;
+          for (unsigned II = 1; II <= Levels; ++II) {
+            const SCEV *Distance = D->getDistance(II);
+            const SCEVConstant *SCEVConst =
+                dyn_cast_or_null<SCEVConstant>(Distance);
+            if (SCEVConst) {
+              const ConstantInt *CI = SCEVConst->getValue();
+              if (CI->isNegative())
+                Direction = '<';
+              else if (CI->isZero())
+                Direction = '=';
+              else
+                Direction = '>';
+              Dep.push_back(Direction);
+            } else if (D->isScalar(II)) {
+              Direction = 'S';
+              Dep.push_back(Direction);
+            } else {
+              unsigned Dir = D->getDirection(II);
+              if (Dir == Dependence::DVEntry::LT ||
+                  Dir == Dependence::DVEntry::LE)
+                Direction = '<';
+              else if (Dir == Dependence::DVEntry::GT ||
+                       Dir == Dependence::DVEntry::GE)
+                Direction = '>';
+              else if (Dir == Dependence::DVEntry::EQ)
+                Direction = '=';
+              else
+                Direction = '*';
+              Dep.push_back(Direction);
+            }
+          }
+          while (Dep.size() != Level) {
+            Dep.push_back('I');
+          }
+
+          DepMatrix.push_back(Dep);
+          if (DepMatrix.size() > MaxMemInstrCount) {
+            DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
+                         << " dependencies inside loop\n");
+            return false;
+          }
+        }
+      }
+    }
+  }
+
+  // We don't have a DepMatrix to check legality return false
+  if (DepMatrix.size() == 0)
+    return false;
+  return true;
+}
+
+// A loop is moved from index 'from' to an index 'to'. Update the Dependence
+// matrix by exchanging the two columns.
+static void interChangeDepedencies(CharMatrix &DepMatrix, unsigned FromIndx,
+                                   unsigned ToIndx) {
+  unsigned numRows = DepMatrix.size();
+  for (unsigned i = 0; i < numRows; ++i) {
+    char TmpVal = DepMatrix[i][ToIndx];
+    DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx];
+    DepMatrix[i][FromIndx] = TmpVal;
+  }
+}
+
+// Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
+// '>'
+static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
+                                   unsigned Column) {
+  for (unsigned i = 0; i <= Column; ++i) {
+    if (DepMatrix[Row][i] == '<')
+      return false;
+    if (DepMatrix[Row][i] == '>')
+      return true;
+  }
+  // All dependencies were '=','S' or 'I'
+  return false;
+}
+
+// Checks if no dependence exist in the dependency matrix in Row before Column.
+static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
+                                 unsigned Column) {
+  for (unsigned i = 0; i < Column; ++i) {
+    if (DepMatrix[Row][i] != '=' || DepMatrix[Row][i] != 'S' ||
+        DepMatrix[Row][i] != 'I')
+      return false;
+  }
+  return true;
+}
+
+static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
+                                unsigned OuterLoopId, char InnerDep,
+                                char OuterDep) {
+
+  if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
+    return false;
+
+  if (InnerDep == OuterDep)
+    return true;
+
+  // It is legal to interchange if and only if after interchange no row has a
+  // '>' direction as the leftmost non-'='.
+
+  if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
+    return true;
+
+  if (InnerDep == '<')
+    return true;
+
+  if (InnerDep == '>') {
+    // If OuterLoopId represents outermost loop then interchanging will make the
+    // 1st dependency as '>'
+    if (OuterLoopId == 0)
+      return false;
+
+    // If all dependencies before OuterloopId are '=','S'or 'I'. Then
+    // interchanging will result in this row having an outermost non '='
+    // dependency of '>'
+    if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
+      return true;
+  }
+
+  return false;
+}
+
+// Checks if it is legal to interchange 2 loops.
+// [Theorem] A permutation of the loops in a perfect nest is legal if and only
+// if
+// the direction matrix, after the same permutation is applied to its columns,
+// has no ">" direction as the leftmost non-"=" direction in any row.
+static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
+                                      unsigned InnerLoopId,
+                                      unsigned OuterLoopId) {
+
+  unsigned NumRows = DepMatrix.size();
+  // For each row check if it is valid to interchange.
+  for (unsigned Row = 0; Row < NumRows; ++Row) {
+    char InnerDep = DepMatrix[Row][InnerLoopId];
+    char OuterDep = DepMatrix[Row][OuterLoopId];
+    if (InnerDep == '*' || OuterDep == '*')
+      return false;
+    else if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep,
+                                  OuterDep))
+      return false;
+  }
+  return true;
+}
+
+static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
+
+  DEBUG(dbgs() << "Calling populateWorklist called\n");
+  LoopVector LoopList;
+  Loop *CurrentLoop = &L;
+  std::vector<Loop *> vec = CurrentLoop->getSubLoopsVector();
+  while (vec.size() != 0) {
+    // The current loop has multiple subloops in it hence it is not tightly
+    // nested.
+    // Discard all loops above it added into Worklist.
+    if (vec.size() != 1) {
+      LoopList.clear();
+      return;
+    }
+    LoopList.push_back(CurrentLoop);
+    CurrentLoop = *(vec.begin());
+    vec = CurrentLoop->getSubLoopsVector();
+  }
+  LoopList.push_back(CurrentLoop);
+  V.push_back(LoopList);
+}
+
+static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
+  PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
+  if (InnerIndexVar)
+    return InnerIndexVar;
+  if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
+    return nullptr;
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PhiVar = cast<PHINode>(I);
+    Type *PhiTy = PhiVar->getType();
+    if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
+        !PhiTy->isPointerTy())
+      return nullptr;
+    const SCEVAddRecExpr *AddRec =
+        dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
+    if (!AddRec || !AddRec->isAffine())
+      continue;
+    const SCEV *Step = AddRec->getStepRecurrence(*SE);
+    const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+    if (!C)
+      continue;
+    // Found the induction variable.
+    // FIXME: Handle loops with more than one induction variable. Note that,
+    // currently, legality makes sure we have only one induction variable.
+    return PhiVar;
+  }
+  return nullptr;
+}
+
+/// LoopInterchangeLegality checks if it is legal to interchange the loop.
+class LoopInterchangeLegality {
+public:
+  LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
+                          LoopInterchange *Pass)
+      : OuterLoop(Outer), InnerLoop(Inner), SE(SE), CurrentPass(Pass),
+        InnerLoopHasReduction(false) {}
+
+  /// Check if the loops can be interchanged.
+  bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
+                           CharMatrix &DepMatrix);
+  /// Check if the loop structure is understood. We do not handle triangular
+  /// loops for now.
+  bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
+
+  bool currentLimitations();
+
+  bool hasInnerLoopReduction() { return InnerLoopHasReduction; }
+
+private:
+  bool tightlyNested(Loop *Outer, Loop *Inner);
+  bool containsUnsafeInstructionsInHeader(BasicBlock *BB);
+  bool areAllUsesReductions(Instruction *Ins, Loop *L);
+  bool containsUnsafeInstructionsInLatch(BasicBlock *BB);
+  bool findInductionAndReductions(Loop *L,
+                                  SmallVector<PHINode *, 8> &Inductions,
+                                  SmallVector<PHINode *, 8> &Reductions);
+  Loop *OuterLoop;
+  Loop *InnerLoop;
+
+  /// Scev analysis.
+  ScalarEvolution *SE;
+  LoopInterchange *CurrentPass;
+
+  bool InnerLoopHasReduction;
+};
+
+/// LoopInterchangeProfitability checks if it is profitable to interchange the
+/// loop.
+class LoopInterchangeProfitability {
+public:
+  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE)
+      : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {}
+
+  /// Check if the loop interchange is profitable
+  bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
+                    CharMatrix &DepMatrix);
+
+private:
+  int getInstrOrderCost();
+
+  Loop *OuterLoop;
+  Loop *InnerLoop;
+
+  /// Scev analysis.
+  ScalarEvolution *SE;
+};
+
+/// LoopInterchangeTransform interchanges the loop
+class LoopInterchangeTransform {
+public:
+  LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
+                           LoopInfo *LI, DominatorTree *DT,
+                           LoopInterchange *Pass, BasicBlock *LoopNestExit,
+                           bool InnerLoopContainsReductions)
+      : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
+        LoopExit(LoopNestExit),
+        InnerLoopHasReduction(InnerLoopContainsReductions) {}
+
+  /// Interchange OuterLoop and InnerLoop.
+  bool transform();
+  void restructureLoops(Loop *InnerLoop, Loop *OuterLoop);
+  void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
+
+private:
+  void splitInnerLoopLatch(Instruction *);
+  void splitOuterLoopLatch();
+  void splitInnerLoopHeader();
+  bool adjustLoopLinks();
+  void adjustLoopPreheaders();
+  void adjustOuterLoopPreheader();
+  void adjustInnerLoopPreheader();
+  bool adjustLoopBranches();
+  void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
+                           BasicBlock *NewPred);
+
+  Loop *OuterLoop;
+  Loop *InnerLoop;
+
+  /// Scev analysis.
+  ScalarEvolution *SE;
+  LoopInfo *LI;
+  DominatorTree *DT;
+  BasicBlock *LoopExit;
+  bool InnerLoopHasReduction;
+};
+
+// Main LoopInterchange Pass
+struct LoopInterchange : public FunctionPass {
+  static char ID;
+  ScalarEvolution *SE;
+  LoopInfo *LI;
+  DependenceAnalysis *DA;
+  DominatorTree *DT;
+  LoopInterchange()
+      : FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) {
+    initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addRequired<DependenceAnalysis>();
+    AU.addRequiredID(LoopSimplifyID);
+    AU.addRequiredID(LCSSAID);
+  }
+
+  bool runOnFunction(Function &F) override {
+    SE = &getAnalysis<ScalarEvolution>();
+    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    DA = &getAnalysis<DependenceAnalysis>();
+    auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+    DT = DTWP ? &DTWP->getDomTree() : nullptr;
+    // Build up a worklist of loop pairs to analyze.
+    SmallVector<LoopVector, 8> Worklist;
+
+    for (Loop *L : *LI)
+      populateWorklist(*L, Worklist);
+
+    DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n");
+    bool Changed = true;
+    while (!Worklist.empty()) {
+      LoopVector LoopList = Worklist.pop_back_val();
+      Changed = processLoopList(LoopList, F);
+    }
+    return Changed;
+  }
+
+  bool isComputableLoopNest(LoopVector LoopList) {
+    for (auto I = LoopList.begin(), E = LoopList.end(); I != E; ++I) {
+      Loop *L = *I;
+      const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
+      if (ExitCountOuter == SE->getCouldNotCompute()) {
+        DEBUG(dbgs() << "Couldn't compute Backedge count\n");
+        return false;
+      }
+      if (L->getNumBackEdges() != 1) {
+        DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
+        return false;
+      }
+      if (!L->getExitingBlock()) {
+        DEBUG(dbgs() << "Loop Doesn't have unique exit block\n");
+        return false;
+      }
+    }
+    return true;
+  }
+
+  unsigned selectLoopForInterchange(LoopVector LoopList) {
+    // TODO: Add a better heuristic to select the loop to be interchanged based
+    // on the dependece matrix. Currently we select the innermost loop.
+    return LoopList.size() - 1;
+  }
+
+  bool processLoopList(LoopVector LoopList, Function &F) {
+
+    bool Changed = false;
+    CharMatrix DependencyMatrix;
+    if (LoopList.size() < 2) {
+      DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
+      return false;
+    }
+    if (!isComputableLoopNest(LoopList)) {
+      DEBUG(dbgs() << "Not vaild loop candidate for interchange\n");
+      return false;
+    }
+    Loop *OuterMostLoop = *(LoopList.begin());
+
+    DEBUG(dbgs() << "Processing LoopList of size = " << LoopList.size()
+                 << "\n");
+
+    if (!populateDependencyMatrix(DependencyMatrix, LoopList.size(),
+                                  OuterMostLoop, DA)) {
+      DEBUG(dbgs() << "Populating Dependency matrix failed\n");
+      return false;
+    }
+#ifdef DUMP_DEP_MATRICIES
+    DEBUG(dbgs() << "Dependence before inter change \n");
+    printDepMatrix(DependencyMatrix);
+#endif
+
+    BasicBlock *OuterMostLoopLatch = OuterMostLoop->getLoopLatch();
+    BranchInst *OuterMostLoopLatchBI =
+        dyn_cast<BranchInst>(OuterMostLoopLatch->getTerminator());
+    if (!OuterMostLoopLatchBI)
+      return false;
+
+    // Since we currently do not handle LCSSA PHI's any failure in loop
+    // condition will now branch to LoopNestExit.
+    // TODO: This should be removed once we handle LCSSA PHI nodes.
+
+    // Get the Outermost loop exit.
+    BasicBlock *LoopNestExit;
+    if (OuterMostLoopLatchBI->getSuccessor(0) == OuterMostLoop->getHeader())
+      LoopNestExit = OuterMostLoopLatchBI->getSuccessor(1);
+    else
+      LoopNestExit = OuterMostLoopLatchBI->getSuccessor(0);
+
+    if (isa<PHINode>(LoopNestExit->begin())) {
+      DEBUG(dbgs() << "PHI Nodes in loop nest exit is not handled for now "
+                      "since on failure all loops branch to loop nest exit.\n");
+      return false;
+    }
+
+    unsigned SelecLoopId = selectLoopForInterchange(LoopList);
+    // Move the selected loop outwards to the best posible position.
+    for (unsigned i = SelecLoopId; i > 0; i--) {
+      bool Interchanged =
+          processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
+      if (!Interchanged)
+        return Changed;
+      // Loops interchanged reflect the same in LoopList
+      std::swap(LoopList[i - 1], LoopList[i]);
+
+      // Update the DependencyMatrix
+      interChangeDepedencies(DependencyMatrix, i, i - 1);
+      DT->recalculate(F);
+#ifdef DUMP_DEP_MATRICIES
+      DEBUG(dbgs() << "Dependence after inter change \n");
+      printDepMatrix(DependencyMatrix);
+#endif
+      Changed |= Interchanged;
+    }
+    return Changed;
+  }
+
+  bool processLoop(LoopVector LoopList, unsigned InnerLoopId,
+                   unsigned OuterLoopId, BasicBlock *LoopNestExit,
+                   std::vector<std::vector<char>> &DependencyMatrix) {
+
+    DEBUG(dbgs() << "Processing Innder Loop Id = " << InnerLoopId
+                 << " and OuterLoopId = " << OuterLoopId << "\n");
+    Loop *InnerLoop = LoopList[InnerLoopId];
+    Loop *OuterLoop = LoopList[OuterLoopId];
+
+    LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, this);
+    if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
+      DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n");
+      return false;
+    }
+    DEBUG(dbgs() << "Loops are legal to interchange\n");
+    LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE);
+    if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
+      DEBUG(dbgs() << "Interchanging Loops not profitable\n");
+      return false;
+    }
+
+    LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, this,
+                                 LoopNestExit, LIL.hasInnerLoopReduction());
+    LIT.transform();
+    DEBUG(dbgs() << "Loops interchanged\n");
+    return true;
+  }
+};
+
+} // end of namespace
+bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) {
+  return !std::any_of(Ins->user_begin(), Ins->user_end(), [=](User *U) -> bool {
+    PHINode *UserIns = dyn_cast<PHINode>(U);
+    ReductionDescriptor RD;
+    return !UserIns || !ReductionDescriptor::isReductionPHI(UserIns, L, RD);
+  });
+}
+
+bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader(
+    BasicBlock *BB) {
+  for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
+    // Load corresponding to reduction PHI's are safe while concluding if
+    // tightly nested.
+    if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+      if (!areAllUsesReductions(L, InnerLoop))
+        return true;
+    } else if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+      return true;
+  }
+  return false;
+}
+
+bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch(
+    BasicBlock *BB) {
+  for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
+    // Stores corresponding to reductions are safe while concluding if tightly
+    // nested.
+    if (StoreInst *L = dyn_cast<StoreInst>(I)) {
+      PHINode *PHI = dyn_cast<PHINode>(L->getOperand(0));
+      if (!PHI)
+        return true;
+    } else if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+      return true;
+  }
+  return false;
+}
+
+bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
+  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+
+  DEBUG(dbgs() << "Checking if Loops are Tightly Nested\n");
+
+  // A perfectly nested loop will not have any branch in between the outer and
+  // inner block i.e. outer header will branch to either inner preheader and
+  // outerloop latch.
+  BranchInst *outerLoopHeaderBI =
+      dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
+  if (!outerLoopHeaderBI)
+    return false;
+  unsigned num = outerLoopHeaderBI->getNumSuccessors();
+  for (unsigned i = 0; i < num; i++) {
+    if (outerLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader &&
+        outerLoopHeaderBI->getSuccessor(i) != OuterLoopLatch)
+      return false;
+  }
+
+  DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n");
+  // We do not have any basic block in between now make sure the outer header
+  // and outer loop latch doesnt contain any unsafe instructions.
+  if (containsUnsafeInstructionsInHeader(OuterLoopHeader) ||
+      containsUnsafeInstructionsInLatch(OuterLoopLatch))
+    return false;
+
+  DEBUG(dbgs() << "Loops are perfectly nested \n");
+  // We have a perfect loop nest.
+  return true;
+}
+
+
+bool LoopInterchangeLegality::isLoopStructureUnderstood(
+    PHINode *InnerInduction) {
+
+  unsigned Num = InnerInduction->getNumOperands();
+  BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
+  for (unsigned i = 0; i < Num; ++i) {
+    Value *Val = InnerInduction->getOperand(i);
+    if (isa<Constant>(Val))
+      continue;
+    Instruction *I = dyn_cast<Instruction>(Val);
+    if (!I)
+      return false;
+    // TODO: Handle triangular loops.
+    // e.g. for(int i=0;i<N;i++)
+    //        for(int j=i;j<N;j++)
+    unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
+    if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
+            InnerLoopPreheader &&
+        !OuterLoop->isLoopInvariant(I)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool LoopInterchangeLegality::findInductionAndReductions(
+    Loop *L, SmallVector<PHINode *, 8> &Inductions,
+    SmallVector<PHINode *, 8> &Reductions) {
+  if (!L->getLoopLatch() || !L->getLoopPredecessor())
+    return false;
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    ReductionDescriptor RD;
+    PHINode *PHI = cast<PHINode>(I);
+    ConstantInt *StepValue = nullptr;
+    if (isInductionPHI(PHI, SE, StepValue))
+      Inductions.push_back(PHI);
+    else if (ReductionDescriptor::isReductionPHI(PHI, L, RD))
+      Reductions.push_back(PHI);
+    else {
+      DEBUG(
+          dbgs() << "Failed to recognize PHI as an induction or reduction.\n");
+      return false;
+    }
+  }
+  return true;
+}
+
+static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) {
+  for (auto I = Block->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PHI = cast<PHINode>(I);
+    // Reduction lcssa phi will have only 1 incoming block that from loop latch.
+    if (PHI->getNumIncomingValues() > 1)
+      return false;
+    Instruction *Ins = dyn_cast<Instruction>(PHI->getIncomingValue(0));
+    if (!Ins)
+      return false;
+    // Incoming value for lcssa phi's in outer loop exit can only be inner loop
+    // exits lcssa phi else it would not be tightly nested.
+    if (!isa<PHINode>(Ins) && isOuterLoopExitBlock)
+      return false;
+  }
+  return true;
+}
+
+static BasicBlock *getLoopLatchExitBlock(BasicBlock *LatchBlock,
+                                         BasicBlock *LoopHeader) {
+  if (BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator())) {
+    unsigned Num = BI->getNumSuccessors();
+    assert(Num == 2);
+    for (unsigned i = 0; i < Num; ++i) {
+      if (BI->getSuccessor(i) == LoopHeader)
+        continue;
+      return BI->getSuccessor(i);
+    }
+  }
+  return nullptr;
+}
+
+// This function indicates the current limitations in the transform as a result
+// of which we do not proceed.
+bool LoopInterchangeLegality::currentLimitations() {
+
+  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
+  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
+  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+
+  PHINode *InnerInductionVar;
+  SmallVector<PHINode *, 8> Inductions;
+  SmallVector<PHINode *, 8> Reductions;
+  if (!findInductionAndReductions(InnerLoop, Inductions, Reductions))
+    return true;
+
+  // TODO: Currently we handle only loops with 1 induction variable.
+  if (Inductions.size() != 1) {
+    DEBUG(dbgs() << "We currently only support loops with 1 induction variable."
+                 << "Failed to interchange due to current limitation\n");
+    return true;
+  }
+  if (Reductions.size() > 0)
+    InnerLoopHasReduction = true;
+
+  InnerInductionVar = Inductions.pop_back_val();
+  Reductions.clear();
+  if (!findInductionAndReductions(OuterLoop, Inductions, Reductions))
+    return true;
+
+  // Outer loop cannot have reduction because then loops will not be tightly
+  // nested.
+  if (!Reductions.empty())
+    return true;
+  // TODO: Currently we handle only loops with 1 induction variable.
+  if (Inductions.size() != 1)
+    return true;
+
+  // TODO: Triangular loops are not handled for now.
+  if (!isLoopStructureUnderstood(InnerInductionVar)) {
+    DEBUG(dbgs() << "Loop structure not understood by pass\n");
+    return true;
+  }
+
+  // TODO: We only handle LCSSA PHI's corresponding to reduction for now.
+  BasicBlock *LoopExitBlock =
+      getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader);
+  if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true))
+    return true;
+
+  LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader);
+  if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false))
+    return true;
+
+  // TODO: Current limitation: Since we split the inner loop latch at the point
+  // were induction variable is incremented (induction.next); We cannot have
+  // more than 1 user of induction.next since it would result in broken code
+  // after split.
+  // e.g.
+  // for(i=0;i<N;i++) {
+  //    for(j = 0;j<M;j++) {
+  //      A[j+1][i+2] = A[j][i]+k;
+  //  }
+  // }
+  bool FoundInduction = false;
+  Instruction *InnerIndexVarInc = nullptr;
+  if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
+    InnerIndexVarInc =
+        dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
+  else
+    InnerIndexVarInc =
+        dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
+
+  if (!InnerIndexVarInc)
+    return true;
+
+  // Since we split the inner loop latch on this induction variable. Make sure
+  // we do not have any instruction between the induction variable and branch
+  // instruction.
+
+  for (auto I = InnerLoopLatch->rbegin(), E = InnerLoopLatch->rend();
+       I != E && !FoundInduction; ++I) {
+    if (isa<BranchInst>(*I) || isa<CmpInst>(*I) || isa<TruncInst>(*I))
+      continue;
+    const Instruction &Ins = *I;
+    // We found an instruction. If this is not induction variable then it is not
+    // safe to split this loop latch.
+    if (!Ins.isIdenticalTo(InnerIndexVarInc))
+      return true;
+    else
+      FoundInduction = true;
+  }
+  // The loop latch ended and we didnt find the induction variable return as
+  // current limitation.
+  if (!FoundInduction)
+    return true;
+
+  return false;
+}
+
+bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
+                                                  unsigned OuterLoopId,
+                                                  CharMatrix &DepMatrix) {
+
+  if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
+    DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
+                 << "and OuterLoopId = " << OuterLoopId
+                 << "due to dependence\n");
+    return false;
+  }
+
+  // Create unique Preheaders if we already do not have one.
+  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+
+  // Create  a unique outer preheader -
+  // 1) If OuterLoop preheader is not present.
+  // 2) If OuterLoop Preheader is same as OuterLoop Header
+  // 3) If OuterLoop Preheader is same as Header of the previous loop.
+  // 4) If OuterLoop Preheader is Entry node.
+  if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() ||
+      isa<PHINode>(OuterLoopPreHeader->begin()) ||
+      !OuterLoopPreHeader->getUniquePredecessor()) {
+    OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, CurrentPass);
+  }
+
+  if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() ||
+      InnerLoopPreHeader == OuterLoop->getHeader()) {
+    InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, CurrentPass);
+  }
+
+  // TODO: The loops could not be interchanged due to current limitations in the
+  // transform module.
+  if (currentLimitations()) {
+    DEBUG(dbgs() << "Not legal because of current transform limitation\n");
+    return false;
+  }
+
+  // Check if the loops are tightly nested.
+  if (!tightlyNested(OuterLoop, InnerLoop)) {
+    DEBUG(dbgs() << "Loops not tightly nested\n");
+    return false;
+  }
+
+  return true;
+}
+
+int LoopInterchangeProfitability::getInstrOrderCost() {
+  unsigned GoodOrder, BadOrder;
+  BadOrder = GoodOrder = 0;
+  for (auto BI = InnerLoop->block_begin(), BE = InnerLoop->block_end();
+       BI != BE; ++BI) {
+    for (auto I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) {
+      const Instruction &Ins = *I;
+      if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
+        unsigned NumOp = GEP->getNumOperands();
+        bool FoundInnerInduction = false;
+        bool FoundOuterInduction = false;
+        for (unsigned i = 0; i < NumOp; ++i) {
+          const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
+          const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
+          if (!AR)
+            continue;
+
+          // If we find the inner induction after an outer induction e.g.
+          // for(int i=0;i<N;i++)
+          //   for(int j=0;j<N;j++)
+          //     A[i][j] = A[i-1][j-1]+k;
+          // then it is a good order.
+          if (AR->getLoop() == InnerLoop) {
+            // We found an InnerLoop induction after OuterLoop induction. It is
+            // a good order.
+            FoundInnerInduction = true;
+            if (FoundOuterInduction) {
+              GoodOrder++;
+              break;
+            }
+          }
+          // If we find the outer induction after an inner induction e.g.
+          // for(int i=0;i<N;i++)
+          //   for(int j=0;j<N;j++)
+          //     A[j][i] = A[j-1][i-1]+k;
+          // then it is a bad order.
+          if (AR->getLoop() == OuterLoop) {
+            // We found an OuterLoop induction after InnerLoop induction. It is
+            // a bad order.
+            FoundOuterInduction = true;
+            if (FoundInnerInduction) {
+              BadOrder++;
+              break;
+            }
+          }
+        }
+      }
+    }
+  }
+  return GoodOrder - BadOrder;
+}
+
+static bool isProfitabileForVectorization(unsigned InnerLoopId,
+                                          unsigned OuterLoopId,
+                                          CharMatrix &DepMatrix) {
+  // TODO: Improve this heuristic to catch more cases.
+  // If the inner loop is loop independent or doesn't carry any dependency it is
+  // profitable to move this to outer position.
+  unsigned Row = DepMatrix.size();
+  for (unsigned i = 0; i < Row; ++i) {
+    if (DepMatrix[i][InnerLoopId] != 'S' && DepMatrix[i][InnerLoopId] != 'I')
+      return false;
+    // TODO: We need to improve this heuristic.
+    if (DepMatrix[i][OuterLoopId] != '=')
+      return false;
+  }
+  // If outer loop has dependence and inner loop is loop independent then it is
+  // profitable to interchange to enable parallelism.
+  return true;
+}
+
+bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
+                                                unsigned OuterLoopId,
+                                                CharMatrix &DepMatrix) {
+
+  // TODO: Add Better Profitibility checks.
+  // e.g
+  // 1) Construct dependency matrix and move the one with no loop carried dep
+  //    inside to enable vectorization.
+
+  // This is rough cost estimation algorithm. It counts the good and bad order
+  // of induction variables in the instruction and allows reordering if number
+  // of bad orders is more than good.
+  int Cost = 0;
+  Cost += getInstrOrderCost();
+  DEBUG(dbgs() << "Cost = " << Cost << "\n");
+  if (Cost < 0)
+    return true;
+
+  // It is not profitable as per current cache profitibility model. But check if
+  // we can move this loop outside to improve parallelism.
+  bool ImprovesPar =
+      isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
+  return ImprovesPar;
+}
+
+void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
+                                               Loop *InnerLoop) {
+  for (Loop::iterator I = OuterLoop->begin(), E = OuterLoop->end(); I != E;
+       ++I) {
+    if (*I == InnerLoop) {
+      OuterLoop->removeChildLoop(I);
+      return;
+    }
+  }
+  assert(false && "Couldn't find loop");
+}
+
+void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop,
+                                                Loop *OuterLoop) {
+  Loop *OuterLoopParent = OuterLoop->getParentLoop();
+  if (OuterLoopParent) {
+    // Remove the loop from its parent loop.
+    removeChildLoop(OuterLoopParent, OuterLoop);
+    removeChildLoop(OuterLoop, InnerLoop);
+    OuterLoopParent->addChildLoop(InnerLoop);
+  } else {
+    removeChildLoop(OuterLoop, InnerLoop);
+    LI->changeTopLevelLoop(OuterLoop, InnerLoop);
+  }
+
+  while (!InnerLoop->empty())
+    OuterLoop->addChildLoop(InnerLoop->removeChildLoop(InnerLoop->begin()));
+
+  InnerLoop->addChildLoop(OuterLoop);
+}
+
+bool LoopInterchangeTransform::transform() {
+
+  DEBUG(dbgs() << "transform\n");
+  bool Transformed = false;
+  Instruction *InnerIndexVar;
+
+  if (InnerLoop->getSubLoops().size() == 0) {
+    BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+    DEBUG(dbgs() << "Calling Split Inner Loop\n");
+    PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
+    if (!InductionPHI) {
+      DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
+      return false;
+    }
+
+    if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
+      InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
+    else
+      InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
+
+    //
+    // Split at the place were the induction variable is
+    // incremented/decremented.
+    // TODO: This splitting logic may not work always. Fix this.
+    splitInnerLoopLatch(InnerIndexVar);
+    DEBUG(dbgs() << "splitInnerLoopLatch Done\n");
+
+    // Splits the inner loops phi nodes out into a seperate basic block.
+    splitInnerLoopHeader();
+    DEBUG(dbgs() << "splitInnerLoopHeader Done\n");
+  }
+
+  Transformed |= adjustLoopLinks();
+  if (!Transformed) {
+    DEBUG(dbgs() << "adjustLoopLinks Failed\n");
+    return false;
+  }
+
+  restructureLoops(InnerLoop, OuterLoop);
+  return true;
+}
+
+void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
+  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
+  BasicBlock *InnerLoopLatchPred = InnerLoopLatch;
+  InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
+}
+
+void LoopInterchangeTransform::splitOuterLoopLatch() {
+  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+  BasicBlock *OuterLatchLcssaPhiBlock = OuterLoopLatch;
+  OuterLoopLatch = SplitBlock(OuterLatchLcssaPhiBlock,
+                              OuterLoopLatch->getFirstNonPHI(), DT, LI);
+}
+
+void LoopInterchangeTransform::splitInnerLoopHeader() {
+
+  // Split the inner loop header out. Here make sure that the reduction PHI's
+  // stay in the innerloop body.
+  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
+  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+  if (InnerLoopHasReduction) {
+    // FIXME: Check if the induction PHI will always be the first PHI.
+    BasicBlock *New = InnerLoopHeader->splitBasicBlock(
+        ++(InnerLoopHeader->begin()), InnerLoopHeader->getName() + ".split");
+    if (LI)
+      if (Loop *L = LI->getLoopFor(InnerLoopHeader))
+        L->addBasicBlockToLoop(New, *LI);
+
+    // Adjust Reduction PHI's in the block.
+    SmallVector<PHINode *, 8> PHIVec;
+    for (auto I = New->begin(); isa<PHINode>(I); ++I) {
+      PHINode *PHI = dyn_cast<PHINode>(I);
+      Value *V = PHI->getIncomingValueForBlock(InnerLoopPreHeader);
+      PHI->replaceAllUsesWith(V);
+      PHIVec.push_back((PHI));
+    }
+    for (auto I = PHIVec.begin(), E = PHIVec.end(); I != E; ++I) {
+      PHINode *P = *I;
+      P->eraseFromParent();
+    }
+  } else {
+    SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
+  }
+
+  DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & "
+                  "InnerLoopHeader \n");
+}
+
+/// \brief Move all instructions except the terminator from FromBB right before
+/// InsertBefore
+static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
+  auto &ToList = InsertBefore->getParent()->getInstList();
+  auto &FromList = FromBB->getInstList();
+
+  ToList.splice(InsertBefore, FromList, FromList.begin(),
+                FromBB->getTerminator());
+}
+
+void LoopInterchangeTransform::adjustOuterLoopPreheader() {
+  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+  BasicBlock *InnerPreHeader = InnerLoop->getLoopPreheader();
+
+  moveBBContents(OuterLoopPreHeader, InnerPreHeader->getTerminator());
+}
+
+void LoopInterchangeTransform::adjustInnerLoopPreheader() {
+  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+  BasicBlock *OuterHeader = OuterLoop->getHeader();
+
+  moveBBContents(InnerLoopPreHeader, OuterHeader->getTerminator());
+}
+
+void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock,
+                                                   BasicBlock *OldPred,
+                                                   BasicBlock *NewPred) {
+  for (auto I = CurrBlock->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PHI = cast<PHINode>(I);
+    unsigned Num = PHI->getNumIncomingValues();
+    for (unsigned i = 0; i < Num; ++i) {
+      if (PHI->getIncomingBlock(i) == OldPred)
+        PHI->setIncomingBlock(i, NewPred);
+    }
+  }
+}
+
+bool LoopInterchangeTransform::adjustLoopBranches() {
+
+  DEBUG(dbgs() << "adjustLoopBranches called\n");
+  // Adjust the loop preheader
+  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
+  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
+  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+  BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
+  BasicBlock *InnerLoopLatchPredecessor =
+      InnerLoopLatch->getUniquePredecessor();
+  BasicBlock *InnerLoopLatchSuccessor;
+  BasicBlock *OuterLoopLatchSuccessor;
+
+  BranchInst *OuterLoopLatchBI =
+      dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
+  BranchInst *InnerLoopLatchBI =
+      dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
+  BranchInst *OuterLoopHeaderBI =
+      dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
+  BranchInst *InnerLoopHeaderBI =
+      dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
+
+  if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
+      !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
+      !InnerLoopHeaderBI)
+    return false;
+
+  BranchInst *InnerLoopLatchPredecessorBI =
+      dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
+  BranchInst *OuterLoopPredecessorBI =
+      dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
+
+  if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
+    return false;
+  BasicBlock *InnerLoopHeaderSucessor = InnerLoopHeader->getUniqueSuccessor();
+  if (!InnerLoopHeaderSucessor)
+    return false;
+
+  // Adjust Loop Preheader and headers
+
+  unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors();
+  for (unsigned i = 0; i < NumSucc; ++i) {
+    if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader)
+      OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader);
+  }
+
+  NumSucc = OuterLoopHeaderBI->getNumSuccessors();
+  for (unsigned i = 0; i < NumSucc; ++i) {
+    if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch)
+      OuterLoopHeaderBI->setSuccessor(i, LoopExit);
+    else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader)
+      OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSucessor);
+  }
+
+  // Adjust reduction PHI's now that the incoming block has changed.
+  updateIncomingBlock(InnerLoopHeaderSucessor, InnerLoopHeader,
+                      OuterLoopHeader);
+
+  BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI);
+  InnerLoopHeaderBI->eraseFromParent();
+
+  // -------------Adjust loop latches-----------
+  if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
+    InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
+  else
+    InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
+
+  NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors();
+  for (unsigned i = 0; i < NumSucc; ++i) {
+    if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch)
+      InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor);
+  }
+
+  // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with
+  // the value and remove this PHI node from inner loop.
+  SmallVector<PHINode *, 8> LcssaVec;
+  for (auto I = InnerLoopLatchSuccessor->begin(); isa<PHINode>(I); ++I) {
+    PHINode *LcssaPhi = cast<PHINode>(I);
+    LcssaVec.push_back(LcssaPhi);
+  }
+  for (auto I = LcssaVec.begin(), E = LcssaVec.end(); I != E; ++I) {
+    PHINode *P = *I;
+    Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch);
+    P->replaceAllUsesWith(Incoming);
+    P->eraseFromParent();
+  }
+
+  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
+    OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
+  else
+    OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
+
+  if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor)
+    InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor);
+  else
+    InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor);
+
+  updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
+
+  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) {
+    OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch);
+  } else {
+    OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch);
+  }
+
+  return true;
+}
+void LoopInterchangeTransform::adjustLoopPreheaders() {
+
+  // We have interchanged the preheaders so we need to interchange the data in
+  // the preheader as well.
+  // This is because the content of inner preheader was previously executed
+  // inside the outer loop.
+  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+  BranchInst *InnerTermBI =
+      cast<BranchInst>(InnerLoopPreHeader->getTerminator());
+
+  // These instructions should now be executed inside the loop.
+  // Move instruction into a new block after outer header.
+  moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator());
+  // These instructions were not executed previously in the loop so move them to
+  // the older inner loop preheader.
+  moveBBContents(OuterLoopPreHeader, InnerTermBI);
+}
+
+bool LoopInterchangeTransform::adjustLoopLinks() {
+
+  // Adjust all branches in the inner and outer loop.
+  bool Changed = adjustLoopBranches();
+  if (Changed)
+    adjustLoopPreheaders();
+  return Changed;
+}
+
+char LoopInterchange::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
+                      "Interchanges loops for cache reuse", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+
+INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
+                    "Interchanges loops for cache reuse", false, false)
+
+Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index 8f12204..ed103e6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -12,7 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -21,6 +23,7 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
@@ -28,7 +31,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
@@ -43,6 +45,12 @@ static cl::opt<unsigned>
 MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
   cl::desc("The maximum increment for loop rerolling"));
 
+static cl::opt<unsigned>
+NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
+                          cl::Hidden,
+                          cl::desc("The maximum number of failures to tolerate"
+                                   " during fuzzy matching. (default: 400)"));
+
 // This loop re-rolling transformation aims to transform loops like this:
 //
 // int foo(int a);
@@ -119,6 +127,16 @@ MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
 // br %cmp, header, exit
 
 namespace {
+  enum IterationLimits {
+    /// The maximum number of iterations that we'll try and reroll. This
+    /// has to be less than 25 in order to fit into a SmallBitVector.
+    IL_MaxRerollIterations = 16,
+    /// The bitvector index used by loop induction variables and other
+    /// instructions that belong to all iterations.
+    IL_All,
+    IL_End
+  };
+
   class LoopReroll : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
@@ -130,19 +148,18 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<AliasAnalysis>();
-      AU.addRequired<LoopInfo>();
-      AU.addPreserved<LoopInfo>();
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addRequired<ScalarEvolution>();
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
 
-protected:
+  protected:
     AliasAnalysis *AA;
     LoopInfo *LI;
     ScalarEvolution *SE;
-    const DataLayout *DL;
     TargetLibraryInfo *TLI;
     DominatorTree *DT;
 
@@ -311,26 +328,113 @@ protected:
       DenseSet<int> Reds;
     };
 
+    // A DAGRootSet models an induction variable being used in a rerollable
+    // loop. For example,
+    //
+    //   x[i*3+0] = y1
+    //   x[i*3+1] = y2
+    //   x[i*3+2] = y3
+    //
+    //   Base instruction -> i*3               
+    //                    +---+----+
+    //                   /    |     \
+    //               ST[y1]  +1     +2  <-- Roots
+    //                        |      |
+    //                      ST[y2] ST[y3]
+    //
+    // There may be multiple DAGRoots, for example:
+    //
+    //   x[i*2+0] = ...   (1)
+    //   x[i*2+1] = ...   (1)
+    //   x[i*2+4] = ...   (2)
+    //   x[i*2+5] = ...   (2)
+    //   x[(i+1234)*2+5678] = ... (3)
+    //   x[(i+1234)*2+5679] = ... (3)
+    //
+    // The loop will be rerolled by adding a new loop induction variable,
+    // one for the Base instruction in each DAGRootSet.
+    //
+    struct DAGRootSet {
+      Instruction *BaseInst;
+      SmallInstructionVector Roots;
+      // The instructions between IV and BaseInst (but not including BaseInst).
+      SmallInstructionSet SubsumedInsts;
+    };
+
+    // The set of all DAG roots, and state tracking of all roots
+    // for a particular induction variable.
+    struct DAGRootTracker {
+      DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
+                     ScalarEvolution *SE, AliasAnalysis *AA,
+                     TargetLibraryInfo *TLI)
+          : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV) {}
+
+      /// Stage 1: Find all the DAG roots for the induction variable.
+      bool findRoots();
+      /// Stage 2: Validate if the found roots are valid.
+      bool validate(ReductionTracker &Reductions);
+      /// Stage 3: Assuming validate() returned true, perform the
+      /// replacement.
+      /// @param IterCount The maximum iteration count of L.
+      void replace(const SCEV *IterCount);
+
+    protected:
+      typedef MapVector<Instruction*, SmallBitVector> UsesTy;
+
+      bool findRootsRecursive(Instruction *IVU,
+                              SmallInstructionSet SubsumedInsts);
+      bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
+      bool collectPossibleRoots(Instruction *Base,
+                                std::map<int64_t,Instruction*> &Roots);
+
+      bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
+      void collectInLoopUserSet(const SmallInstructionVector &Roots,
+                                const SmallInstructionSet &Exclude,
+                                const SmallInstructionSet &Final,
+                                DenseSet<Instruction *> &Users);
+      void collectInLoopUserSet(Instruction *Root,
+                                const SmallInstructionSet &Exclude,
+                                const SmallInstructionSet &Final,
+                                DenseSet<Instruction *> &Users);
+
+      UsesTy::iterator nextInstr(int Val, UsesTy &In,
+                                 const SmallInstructionSet &Exclude,
+                                 UsesTy::iterator *StartI=nullptr);
+      bool isBaseInst(Instruction *I);
+      bool isRootInst(Instruction *I);
+      bool instrDependsOn(Instruction *I,
+                          UsesTy::iterator Start,
+                          UsesTy::iterator End);
+
+      LoopReroll *Parent;
+
+      // Members of Parent, replicated here for brevity.
+      Loop *L;
+      ScalarEvolution *SE;
+      AliasAnalysis *AA;
+      TargetLibraryInfo *TLI;
+
+      // The loop induction variable.
+      Instruction *IV;
+      // Loop step amount.
+      uint64_t Inc;
+      // Loop reroll count; if Inc == 1, this records the scaling applied
+      // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
+      // If Inc is not 1, Scale = Inc.
+      uint64_t Scale;
+      // The roots themselves.
+      SmallVector<DAGRootSet,16> RootSets;
+      // All increment instructions for IV.
+      SmallInstructionVector LoopIncs;
+      // Map of all instructions in the loop (in order) to the iterations
+      // they are used in (or specially, IL_All for instructions
+      // used in the loop increment mechanism).
+      UsesTy Uses;
+    };
+
     void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
     void collectPossibleReductions(Loop *L,
            ReductionTracker &Reductions);
-    void collectInLoopUserSet(Loop *L,
-           const SmallInstructionVector &Roots,
-           const SmallInstructionSet &Exclude,
-           const SmallInstructionSet &Final,
-           DenseSet<Instruction *> &Users);
-    void collectInLoopUserSet(Loop *L,
-           Instruction * Root,
-           const SmallInstructionSet &Exclude,
-           const SmallInstructionSet &Final,
-           DenseSet<Instruction *> &Users);
-    bool findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
-                          Instruction *&IV,
-                          SmallInstructionVector &LoopIncs);
-    bool collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale, Instruction *IV,
-                         SmallVector<SmallInstructionVector, 32> &Roots,
-                         SmallInstructionSet &AllRoots,
-                         SmallInstructionVector &LoopIncs);
     bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
                 ReductionTracker &Reductions);
   };
@@ -339,10 +443,10 @@ protected:
 char LoopReroll::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
 
 Pass *llvm::createLoopRerollPass() {
@@ -353,10 +457,10 @@ Pass *llvm::createLoopRerollPass() {
 // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
 // non-loop blocks to be outside the loop.
 static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
-  for (User *U : I->users())
+  for (User *U : I->users()) {
     if (!L->contains(cast<Instruction>(U)))
       return true;
-
+  }
   return false;
 }
 
@@ -403,6 +507,8 @@ void LoopReroll::SimpleLoopReduction::add(Loop *L) {
   // (including the PHI), except for the last value (which is used by the PHI
   // and also outside the loop).
   Instruction *C = Instructions.front();
+  if (C->user_empty())
+    return;
 
   do {
     C = cast<Instruction>(*C->user_begin());
@@ -424,11 +530,12 @@ void LoopReroll::SimpleLoopReduction::add(Loop *L) {
     return;
 
   // C is now the (potential) last instruction in the reduction chain.
-  for (User *U : C->users())
+  for (User *U : C->users()) {
     // The only in-loop user can be the initial PHI.
     if (L->contains(cast<Instruction>(U)))
       if (cast<Instruction>(U) != Instructions.front())
         return;
+  }
 
   Instructions.push_back(C);
   Valid = true;
@@ -467,7 +574,7 @@ void LoopReroll::collectPossibleReductions(Loop *L,
 //   if they are users, but their users are not added. This is used, for
 //   example, to prevent a reduction update from forcing all later reduction
 //   updates into the use set.
-void LoopReroll::collectInLoopUserSet(Loop *L,
+void LoopReroll::DAGRootTracker::collectInLoopUserSet(
   Instruction *Root, const SmallInstructionSet &Exclude,
   const SmallInstructionSet &Final,
   DenseSet<Instruction *> &Users) {
@@ -504,14 +611,14 @@ void LoopReroll::collectInLoopUserSet(Loop *L,
 
 // Collect all of the users of all of the provided root instructions (combined
 // into a single set).
-void LoopReroll::collectInLoopUserSet(Loop *L,
+void LoopReroll::DAGRootTracker::collectInLoopUserSet(
   const SmallInstructionVector &Roots,
   const SmallInstructionSet &Exclude,
   const SmallInstructionSet &Final,
   DenseSet<Instruction *> &Users) {
   for (SmallInstructionVector::const_iterator I = Roots.begin(),
        IE = Roots.end(); I != IE; ++I)
-    collectInLoopUserSet(L, *I, Exclude, Final, Users);
+    collectInLoopUserSet(*I, Exclude, Final, Users);
 }
 
 static bool isSimpleLoadStore(Instruction *I) {
@@ -524,130 +631,699 @@ static bool isSimpleLoadStore(Instruction *I) {
   return false;
 }
 
-// Recognize loops that are setup like this:
-//
-// %iv = phi [ (preheader, ...), (body, %iv.next) ]
-// %scaled.iv = mul %iv, scale
-// f(%scaled.iv)
-// %scaled.iv.1 = add %scaled.iv, 1
-// f(%scaled.iv.1)
-// %scaled.iv.2 = add %scaled.iv, 2
-// f(%scaled.iv.2)
-// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
-// f(%scaled.iv.scale_m_1)
-// ...
-// %iv.next = add %iv, 1
-// %cmp = icmp(%iv, ...)
-// br %cmp, header, exit
-//
-// and, if found, set IV = %scaled.iv, and add %iv.next to LoopIncs.
-bool LoopReroll::findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
-                                  Instruction *&IV,
-                                  SmallInstructionVector &LoopIncs) {
-  // This is a special case: here we're looking for all uses (except for
-  // the increment) to be multiplied by a common factor. The increment must
-  // be by one. This is to capture loops like:
-  //   for (int i = 0; i < 500; ++i) {
-  //     foo(3*i); foo(3*i+1); foo(3*i+2);
-  //   }
-  if (RealIV->getNumUses() != 2)
+/// Return true if IVU is a "simple" arithmetic operation.
+/// This is used for narrowing the search space for DAGRoots; only arithmetic
+/// and GEPs can be part of a DAGRoot.
+static bool isSimpleArithmeticOp(User *IVU) {
+  if (Instruction *I = dyn_cast<Instruction>(IVU)) {
+    switch (I->getOpcode()) {
+    default: return false;
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+    case Instruction::Shl:
+    case Instruction::AShr:
+    case Instruction::LShr:
+    case Instruction::GetElementPtr:
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool isLoopIncrement(User *U, Instruction *IV) {
+  BinaryOperator *BO = dyn_cast<BinaryOperator>(U);
+  if (!BO || BO->getOpcode() != Instruction::Add)
+    return false;
+
+  for (auto *UU : BO->users()) {
+    PHINode *PN = dyn_cast<PHINode>(UU);
+    if (PN && PN == IV)
+      return true;
+  }
+  return false;
+}
+
+bool LoopReroll::DAGRootTracker::
+collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
+  SmallInstructionVector BaseUsers;
+
+  for (auto *I : Base->users()) {
+    ConstantInt *CI = nullptr;
+
+    if (isLoopIncrement(I, IV)) {
+      LoopIncs.push_back(cast<Instruction>(I));
+      continue;
+    }
+
+    // The root nodes must be either GEPs, ORs or ADDs.
+    if (auto *BO = dyn_cast<BinaryOperator>(I)) {
+      if (BO->getOpcode() == Instruction::Add ||
+          BO->getOpcode() == Instruction::Or)
+        CI = dyn_cast<ConstantInt>(BO->getOperand(1));
+    } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+      Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
+      CI = dyn_cast<ConstantInt>(LastOperand);
+    }
+
+    if (!CI) {
+      if (Instruction *II = dyn_cast<Instruction>(I)) {
+        BaseUsers.push_back(II);
+        continue;
+      } else {
+        DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I << "\n");
+        return false;
+      }
+    }
+
+    int64_t V = CI->getValue().getSExtValue();
+    if (Roots.find(V) != Roots.end())
+      // No duplicates, please.
+      return false;
+
+    // FIXME: Add support for negative values.
+    if (V < 0) {
+      DEBUG(dbgs() << "LRR: Aborting due to negative value: " << V << "\n");
+      return false;
+    }
+
+    Roots[V] = cast<Instruction>(I);
+  }
+
+  if (Roots.empty())
     return false;
-  const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(RealIV));
-  Instruction *User1 = cast<Instruction>(*RealIV->user_begin()),
-              *User2 = cast<Instruction>(*std::next(RealIV->user_begin()));
-  if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType()))
+
+  // If we found non-loop-inc, non-root users of Base, assume they are
+  // for the zeroth root index. This is because "add %a, 0" gets optimized
+  // away.
+  if (BaseUsers.size()) {
+    if (Roots.find(0) != Roots.end()) {
+      DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n");
+      return false;
+    }
+    Roots[0] = Base;
+  }
+
+  // Calculate the number of users of the base, or lowest indexed, iteration.
+  unsigned NumBaseUses = BaseUsers.size();
+  if (NumBaseUses == 0)
+    NumBaseUses = Roots.begin()->second->getNumUses();
+  
+  // Check that every node has the same number of users.
+  for (auto &KV : Roots) {
+    if (KV.first == 0)
+      continue;
+    if (KV.second->getNumUses() != NumBaseUses) {
+      DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
+            << "#Base=" << NumBaseUses << ", #Root=" <<
+            KV.second->getNumUses() << "\n");
+      return false;
+    }
+  }
+
+  return true; 
+}
+
+bool LoopReroll::DAGRootTracker::
+findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
+  // Does the user look like it could be part of a root set?
+  // All its users must be simple arithmetic ops.
+  if (I->getNumUses() > IL_MaxRerollIterations)
     return false;
-  const SCEVAddRecExpr *User1SCEV =
-                         dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User1)),
-                       *User2SCEV =
-                         dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User2));
-  if (!User1SCEV || !User1SCEV->isAffine() ||
-      !User2SCEV || !User2SCEV->isAffine())
+
+  if ((I->getOpcode() == Instruction::Mul ||
+       I->getOpcode() == Instruction::PHI) &&
+      I != IV &&
+      findRootsBase(I, SubsumedInsts))
+    return true;
+
+  SubsumedInsts.insert(I);
+
+  for (User *V : I->users()) {
+    Instruction *I = dyn_cast<Instruction>(V);
+    if (std::find(LoopIncs.begin(), LoopIncs.end(), I) != LoopIncs.end())
+      continue;
+
+    if (!I || !isSimpleArithmeticOp(I) ||
+        !findRootsRecursive(I, SubsumedInsts))
+      return false;
+  }
+  return true;
+}
+
+bool LoopReroll::DAGRootTracker::
+findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
+
+  // The base instruction needs to be a multiply so
+  // that we can erase it.
+  if (IVU->getOpcode() != Instruction::Mul &&
+      IVU->getOpcode() != Instruction::PHI)
+    return false;
+
+  std::map<int64_t, Instruction*> V;
+  if (!collectPossibleRoots(IVU, V))
     return false;
 
-  // We assume below that User1 is the scale multiply and User2 is the
-  // increment. If this can't be true, then swap them.
-  if (User1SCEV == RealIVSCEV->getPostIncExpr(*SE)) {
-    std::swap(User1, User2);
-    std::swap(User1SCEV, User2SCEV);
+  // If we didn't get a root for index zero, then IVU must be 
+  // subsumed.
+  if (V.find(0) == V.end())
+    SubsumedInsts.insert(IVU);
+
+  // Partition the vector into monotonically increasing indexes.
+  DAGRootSet DRS;
+  DRS.BaseInst = nullptr;
+
+  for (auto &KV : V) {
+    if (!DRS.BaseInst) {
+      DRS.BaseInst = KV.second;
+      DRS.SubsumedInsts = SubsumedInsts;
+    } else if (DRS.Roots.empty()) {
+      DRS.Roots.push_back(KV.second);
+    } else if (V.find(KV.first - 1) != V.end()) {
+      DRS.Roots.push_back(KV.second);
+    } else {
+      // Linear sequence terminated.
+      RootSets.push_back(DRS);
+      DRS.BaseInst = KV.second;
+      DRS.SubsumedInsts = SubsumedInsts;
+      DRS.Roots.clear();
+    }
+  }
+  RootSets.push_back(DRS);
+
+  return true;
+}
+
+bool LoopReroll::DAGRootTracker::findRoots() {
+
+  const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
+  Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
+    getValue()->getZExtValue();
+
+  assert(RootSets.empty() && "Unclean state!");
+  if (Inc == 1) {
+    for (auto *IVU : IV->users()) {
+      if (isLoopIncrement(IVU, IV))
+        LoopIncs.push_back(cast<Instruction>(IVU));
+    }
+    if (!findRootsRecursive(IV, SmallInstructionSet()))
+      return false;
+    LoopIncs.push_back(IV);
+  } else {
+    if (!findRootsBase(IV, SmallInstructionSet()))
+      return false;
   }
 
-  if (User2SCEV != RealIVSCEV->getPostIncExpr(*SE))
+  // Ensure all sets have the same size.
+  if (RootSets.empty()) {
+    DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
     return false;
-  assert(User2SCEV->getStepRecurrence(*SE)->isOne() &&
-         "Invalid non-unit step for multiplicative scaling");
-  LoopIncs.push_back(User2);
-
-  if (const SCEVConstant *MulScale =
-      dyn_cast<SCEVConstant>(User1SCEV->getStepRecurrence(*SE))) {
-    // Make sure that both the start and step have the same multiplier.
-    if (RealIVSCEV->getStart()->getType() != MulScale->getType())
+  }
+  for (auto &V : RootSets) {
+    if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
+      DEBUG(dbgs()
+            << "LRR: Aborting because not all root sets have the same size\n");
       return false;
-    if (SE->getMulExpr(RealIVSCEV->getStart(), MulScale) !=
-        User1SCEV->getStart())
+    }
+  }
+
+  // And ensure all loop iterations are consecutive. We rely on std::map
+  // providing ordered traversal.
+  for (auto &V : RootSets) {
+    const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(V.BaseInst));
+    if (!ADR)
       return false;
 
-    ConstantInt *MulScaleCI = MulScale->getValue();
-    if (!MulScaleCI->uge(2) || MulScaleCI->uge(MaxInc))
+    // Consider a DAGRootSet with N-1 roots (so N different values including
+    //   BaseInst).
+    // Define d = Roots[0] - BaseInst, which should be the same as
+    //   Roots[I] - Roots[I-1] for all I in [1..N).
+    // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
+    //   loop iteration J.
+    //
+    // Now, For the loop iterations to be consecutive:
+    //   D = d * N
+
+    unsigned N = V.Roots.size() + 1;
+    const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(V.Roots[0]), ADR);
+    const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
+    if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV)) {
+      DEBUG(dbgs() << "LRR: Aborting because iterations are not consecutive\n");
       return false;
-    Scale = MulScaleCI->getZExtValue();
-    IV = User1;
-  } else
+    }
+  }
+  Scale = RootSets[0].Roots.size() + 1;
+
+  if (Scale > IL_MaxRerollIterations) {
+    DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
+          << "#Found=" << Scale << ", #Max=" << IL_MaxRerollIterations
+          << "\n");
     return false;
+  }
+
+  DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale << "\n");
 
-  DEBUG(dbgs() << "LRR: Found possible scaling " << *User1 << "\n");
   return true;
 }
 
-// Collect all root increments with respect to the provided induction variable
-// (normally the PHI, but sometimes a multiply). A root increment is an
-// instruction, normally an add, with a positive constant less than Scale. In a
-// rerollable loop, each of these increments is the root of an instruction
-// graph isomorphic to the others. Also, we collect the final induction
-// increment (the increment equal to the Scale), and its users in LoopIncs.
-bool LoopReroll::collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale,
-                                 Instruction *IV,
-                                 SmallVector<SmallInstructionVector, 32> &Roots,
-                                 SmallInstructionSet &AllRoots,
-                                 SmallInstructionVector &LoopIncs) {
-  for (User *U : IV->users()) {
-    Instruction *UI = cast<Instruction>(U);
-    if (!SE->isSCEVable(UI->getType()))
-      continue;
-    if (UI->getType() != IV->getType())
-      continue;
-    if (!L->contains(UI))
-      continue;
-    if (hasUsesOutsideLoop(UI, L))
-      continue;
+bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
+  // Populate the MapVector with all instructions in the block, in order first,
+  // so we can iterate over the contents later in perfect order.
+  for (auto &I : *L->getHeader()) {
+    Uses[&I].resize(IL_End);
+  }
+
+  SmallInstructionSet Exclude;
+  for (auto &DRS : RootSets) {
+    Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
+    Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
+    Exclude.insert(DRS.BaseInst);
+  }
+  Exclude.insert(LoopIncs.begin(), LoopIncs.end());
+
+  for (auto &DRS : RootSets) {
+    DenseSet<Instruction*> VBase;
+    collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
+    for (auto *I : VBase) {
+      Uses[I].set(0);
+    }
+
+    unsigned Idx = 1;
+    for (auto *Root : DRS.Roots) {
+      DenseSet<Instruction*> V;
+      collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
 
-    if (const SCEVConstant *Diff = dyn_cast<SCEVConstant>(SE->getMinusSCEV(
-          SE->getSCEV(UI), SE->getSCEV(IV)))) {
-      uint64_t Idx = Diff->getValue()->getValue().getZExtValue();
-      if (Idx > 0 && Idx < Scale) {
-        Roots[Idx-1].push_back(UI);
-        AllRoots.insert(UI);
-      } else if (Idx == Scale && Inc > 1) {
-        LoopIncs.push_back(UI);
+      // While we're here, check the use sets are the same size.
+      if (V.size() != VBase.size()) {
+        DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
+        return false;
+      }
+
+      for (auto *I : V) {
+        Uses[I].set(Idx);
       }
+      ++Idx;
     }
+
+    // Make sure our subsumed instructions are remembered too.
+    for (auto *I : DRS.SubsumedInsts) {
+      Uses[I].set(IL_All);
+    }
+  }
+
+  // Make sure the loop increments are also accounted for.
+
+  Exclude.clear();
+  for (auto &DRS : RootSets) {
+    Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
+    Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
+    Exclude.insert(DRS.BaseInst);
+  }
+
+  DenseSet<Instruction*> V;
+  collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
+  for (auto *I : V) {
+    Uses[I].set(IL_All);
+  }
+
+  return true;
+
+}
+
+/// Get the next instruction in "In" that is a member of set Val.
+/// Start searching from StartI, and do not return anything in Exclude.
+/// If StartI is not given, start from In.begin().
+LoopReroll::DAGRootTracker::UsesTy::iterator
+LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
+                                      const SmallInstructionSet &Exclude,
+                                      UsesTy::iterator *StartI) {
+  UsesTy::iterator I = StartI ? *StartI : In.begin();
+  while (I != In.end() && (I->second.test(Val) == 0 ||
+                           Exclude.count(I->first) != 0))
+    ++I;
+  return I;
+}
+
+bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
+  for (auto &DRS : RootSets) {
+    if (DRS.BaseInst == I)
+      return true;
   }
+  return false;
+}
 
-  if (Roots[0].empty())
+bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
+  for (auto &DRS : RootSets) {
+    if (std::find(DRS.Roots.begin(), DRS.Roots.end(), I) != DRS.Roots.end())
+      return true;
+  }
+  return false;
+}
+
+/// Return true if instruction I depends on any instruction between
+/// Start and End.
+bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
+                                                UsesTy::iterator Start,
+                                                UsesTy::iterator End) {
+  for (auto *U : I->users()) {
+    for (auto It = Start; It != End; ++It)
+      if (U == It->first)
+        return true;
+  }
+  return false;
+}
+
+bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
+  // We now need to check for equivalence of the use graph of each root with
+  // that of the primary induction variable (excluding the roots). Our goal
+  // here is not to solve the full graph isomorphism problem, but rather to
+  // catch common cases without a lot of work. As a result, we will assume
+  // that the relative order of the instructions in each unrolled iteration
+  // is the same (although we will not make an assumption about how the
+  // different iterations are intermixed). Note that while the order must be
+  // the same, the instructions may not be in the same basic block.
+
+  // An array of just the possible reductions for this scale factor. When we
+  // collect the set of all users of some root instructions, these reduction
+  // instructions are treated as 'final' (their uses are not considered).
+  // This is important because we don't want the root use set to search down
+  // the reduction chain.
+  SmallInstructionSet PossibleRedSet;
+  SmallInstructionSet PossibleRedLastSet;
+  SmallInstructionSet PossibleRedPHISet;
+  Reductions.restrictToScale(Scale, PossibleRedSet,
+                             PossibleRedPHISet, PossibleRedLastSet);
+
+  // Populate "Uses" with where each instruction is used.
+  if (!collectUsedInstructions(PossibleRedSet))
     return false;
-  bool AllSame = true;
-  for (unsigned i = 1; i < Scale-1; ++i)
-    if (Roots[i].size() != Roots[0].size()) {
-      AllSame = false;
-      break;
+
+  // Make sure we mark the reduction PHIs as used in all iterations.
+  for (auto *I : PossibleRedPHISet) {
+    Uses[I].set(IL_All);
+  }
+
+  // Make sure all instructions in the loop are in one and only one
+  // set.
+  for (auto &KV : Uses) {
+    if (KV.second.count() != 1) {
+      DEBUG(dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
+            << *KV.first << " (#uses=" << KV.second.count() << ")\n");
+      return false;
     }
+  }
 
-  if (!AllSame)
-    return false;
+  DEBUG(
+    for (auto &KV : Uses) {
+      dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n";
+    }
+    );
+
+  for (unsigned Iter = 1; Iter < Scale; ++Iter) {
+    // In addition to regular aliasing information, we need to look for
+    // instructions from later (future) iterations that have side effects
+    // preventing us from reordering them past other instructions with side
+    // effects.
+    bool FutureSideEffects = false;
+    AliasSetTracker AST(*AA);
+    // The map between instructions in f(%iv.(i+1)) and f(%iv).
+    DenseMap<Value *, Value *> BaseMap;
+
+    // Compare iteration Iter to the base.
+    SmallInstructionSet Visited;
+    auto BaseIt = nextInstr(0, Uses, Visited);
+    auto RootIt = nextInstr(Iter, Uses, Visited);
+    auto LastRootIt = Uses.begin();
+
+    while (BaseIt != Uses.end() && RootIt != Uses.end()) {
+      Instruction *BaseInst = BaseIt->first;
+      Instruction *RootInst = RootIt->first;
+
+      // Skip over the IV or root instructions; only match their users.
+      bool Continue = false;
+      if (isBaseInst(BaseInst)) {
+        Visited.insert(BaseInst);
+        BaseIt = nextInstr(0, Uses, Visited);
+        Continue = true;
+      }
+      if (isRootInst(RootInst)) {
+        LastRootIt = RootIt;
+        Visited.insert(RootInst);
+        RootIt = nextInstr(Iter, Uses, Visited);
+        Continue = true;
+      }
+      if (Continue) continue;
+
+      if (!BaseInst->isSameOperationAs(RootInst)) {
+        // Last chance saloon. We don't try and solve the full isomorphism
+        // problem, but try and at least catch the case where two instructions
+        // *of different types* are round the wrong way. We won't be able to
+        // efficiently tell, given two ADD instructions, which way around we
+        // should match them, but given an ADD and a SUB, we can at least infer
+        // which one is which.
+        //
+        // This should allow us to deal with a greater subset of the isomorphism
+        // problem. It does however change a linear algorithm into a quadratic
+        // one, so limit the number of probes we do.
+        auto TryIt = RootIt;
+        unsigned N = NumToleratedFailedMatches;
+        while (TryIt != Uses.end() &&
+               !BaseInst->isSameOperationAs(TryIt->first) &&
+               N--) {
+          ++TryIt;
+          TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
+        }
+
+        if (TryIt == Uses.end() || TryIt == RootIt ||
+            instrDependsOn(TryIt->first, RootIt, TryIt)) {
+          DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+                " vs. " << *RootInst << "\n");
+          return false;
+        }
+        
+        RootIt = TryIt;
+        RootInst = TryIt->first;
+      }
+
+      // All instructions between the last root and this root
+      // may belong to some other iteration. If they belong to a 
+      // future iteration, then they're dangerous to alias with.
+      // 
+      // Note that because we allow a limited amount of flexibility in the order
+      // that we visit nodes, LastRootIt might be *before* RootIt, in which
+      // case we've already checked this set of instructions so we shouldn't
+      // do anything.
+      for (; LastRootIt < RootIt; ++LastRootIt) {
+        Instruction *I = LastRootIt->first;
+        if (LastRootIt->second.find_first() < (int)Iter)
+          continue;
+        if (I->mayWriteToMemory())
+          AST.add(I);
+        // Note: This is specifically guarded by a check on isa<PHINode>,
+        // which while a valid (somewhat arbitrary) micro-optimization, is
+        // needed because otherwise isSafeToSpeculativelyExecute returns
+        // false on PHI nodes.
+        if (!isa<PHINode>(I) && !isSimpleLoadStore(I) &&
+            !isSafeToSpeculativelyExecute(I))
+          // Intervening instructions cause side effects.
+          FutureSideEffects = true;
+      }
+
+      // Make sure that this instruction, which is in the use set of this
+      // root instruction, does not also belong to the base set or the set of
+      // some other root instruction.
+      if (RootIt->second.count() > 1) {
+        DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+                        " vs. " << *RootInst << " (prev. case overlap)\n");
+        return false;
+      }
+
+      // Make sure that we don't alias with any instruction in the alias set
+      // tracker. If we do, then we depend on a future iteration, and we
+      // can't reroll.
+      if (RootInst->mayReadFromMemory())
+        for (auto &K : AST) {
+          if (K.aliasesUnknownInst(RootInst, *AA)) {
+            DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+                            " vs. " << *RootInst << " (depends on future store)\n");
+            return false;
+          }
+        }
+
+      // If we've past an instruction from a future iteration that may have
+      // side effects, and this instruction might also, then we can't reorder
+      // them, and this matching fails. As an exception, we allow the alias
+      // set tracker to handle regular (simple) load/store dependencies.
+      if (FutureSideEffects && ((!isSimpleLoadStore(BaseInst) &&
+                                 !isSafeToSpeculativelyExecute(BaseInst)) ||
+                                (!isSimpleLoadStore(RootInst) &&
+                                 !isSafeToSpeculativelyExecute(RootInst)))) {
+        DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+                        " vs. " << *RootInst <<
+                        " (side effects prevent reordering)\n");
+        return false;
+      }
+
+      // For instructions that are part of a reduction, if the operation is
+      // associative, then don't bother matching the operands (because we
+      // already know that the instructions are isomorphic, and the order
+      // within the iteration does not matter). For non-associative reductions,
+      // we do need to match the operands, because we need to reject
+      // out-of-order instructions within an iteration!
+      // For example (assume floating-point addition), we need to reject this:
+      //   x += a[i]; x += b[i];
+      //   x += a[i+1]; x += b[i+1];
+      //   x += b[i+2]; x += a[i+2];
+      bool InReduction = Reductions.isPairInSame(BaseInst, RootInst);
+
+      if (!(InReduction && BaseInst->isAssociative())) {
+        bool Swapped = false, SomeOpMatched = false;
+        for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) {
+          Value *Op2 = RootInst->getOperand(j);
+
+          // If this is part of a reduction (and the operation is not
+          // associatve), then we match all operands, but not those that are
+          // part of the reduction.
+          if (InReduction)
+            if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
+              if (Reductions.isPairInSame(RootInst, Op2I))
+                continue;
+
+          DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
+          if (BMI != BaseMap.end()) {
+            Op2 = BMI->second;
+          } else {
+            for (auto &DRS : RootSets) {
+              if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
+                Op2 = DRS.BaseInst;
+                break;
+              }
+            }
+          }
+
+          if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
+            // If we've not already decided to swap the matched operands, and
+            // we've not already matched our first operand (note that we could
+            // have skipped matching the first operand because it is part of a
+            // reduction above), and the instruction is commutative, then try
+            // the swapped match.
+            if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched &&
+                BaseInst->getOperand(!j) == Op2) {
+              Swapped = true;
+            } else {
+              DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
+                    << " vs. " << *RootInst << " (operand " << j << ")\n");
+              return false;
+            }
+          }
+
+          SomeOpMatched = true;
+        }
+      }
+
+      if ((!PossibleRedLastSet.count(BaseInst) &&
+           hasUsesOutsideLoop(BaseInst, L)) ||
+          (!PossibleRedLastSet.count(RootInst) &&
+           hasUsesOutsideLoop(RootInst, L))) {
+        DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+                        " vs. " << *RootInst << " (uses outside loop)\n");
+        return false;
+      }
+
+      Reductions.recordPair(BaseInst, RootInst, Iter);
+      BaseMap.insert(std::make_pair(RootInst, BaseInst));
+
+      LastRootIt = RootIt;
+      Visited.insert(BaseInst);
+      Visited.insert(RootInst);
+      BaseIt = nextInstr(0, Uses, Visited);
+      RootIt = nextInstr(Iter, Uses, Visited);
+    }
+    assert (BaseIt == Uses.end() && RootIt == Uses.end() &&
+            "Mismatched set sizes!");
+  }
+
+  DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
+                  *IV << "\n");
 
   return true;
 }
 
+void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
+  BasicBlock *Header = L->getHeader();
+  // Remove instructions associated with non-base iterations.
+  for (BasicBlock::reverse_iterator J = Header->rbegin();
+       J != Header->rend();) {
+    unsigned I = Uses[&*J].find_first();
+    if (I > 0 && I < IL_All) {
+      Instruction *D = &*J;
+      DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
+      D->eraseFromParent();
+      continue;
+    }
+
+    ++J;
+  }
+  const DataLayout &DL = Header->getModule()->getDataLayout();
+
+  // We need to create a new induction variable for each different BaseInst.
+  for (auto &DRS : RootSets) {
+    // Insert the new induction variable.
+    const SCEVAddRecExpr *RealIVSCEV =
+      cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
+    const SCEV *Start = RealIVSCEV->getStart();
+    const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>
+      (SE->getAddRecExpr(Start,
+                         SE->getConstant(RealIVSCEV->getType(), 1),
+                         L, SCEV::FlagAnyWrap));
+    { // Limit the lifetime of SCEVExpander.
+      SCEVExpander Expander(*SE, DL, "reroll");
+      Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
+
+      for (auto &KV : Uses) {
+        if (KV.second.find_first() == 0)
+          KV.first->replaceUsesOfWith(DRS.BaseInst, NewIV);
+      }
+
+      if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
+        // FIXME: Why do we need this check?
+        if (Uses[BI].find_first() == IL_All) {
+          const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
+
+          // Iteration count SCEV minus 1
+          const SCEV *ICMinus1SCEV =
+            SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
+
+          Value *ICMinus1; // Iteration count minus 1
+          if (isa<SCEVConstant>(ICMinus1SCEV)) {
+            ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
+          } else {
+            BasicBlock *Preheader = L->getLoopPreheader();
+            if (!Preheader)
+              Preheader = InsertPreheaderForLoop(L, Parent);
+
+            ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
+                                              Preheader->getTerminator());
+          }
+
+          Value *Cond =
+            new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond");
+          BI->setCondition(Cond);
+
+          if (BI->getSuccessor(1) != Header)
+            BI->swapSuccessors();
+        }
+      }
+    }
+  }
+
+  SimplifyInstructionsInBlock(Header, TLI);
+  DeleteDeadPHIs(Header, TLI);
+}
+
 // Validate the selected reductions. All iterations must have an isomorphic
 // part of the reduction chain and, for non-associative reductions, the chain
 // entries must appear in order.
@@ -711,8 +1387,9 @@ void LoopReroll::ReductionTracker::replaceSelected() {
 
     // Replace users with the new end-of-chain value.
     SmallInstructionVector Users;
-    for (User *U : PossibleReds[i].getReducedValue()->users())
+    for (User *U : PossibleReds[i].getReducedValue()->users()) {
       Users.push_back(cast<Instruction>(U));
+    }
 
     for (SmallInstructionVector::iterator J = Users.begin(),
          JE = Users.end(); J != JE; ++J)
@@ -767,359 +1444,23 @@ void LoopReroll::ReductionTracker::replaceSelected() {
 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
                         const SCEV *IterCount,
                         ReductionTracker &Reductions) {
-  const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
-  uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
-                   getValue()->getZExtValue();
-  // The collection of loop increment instructions.
-  SmallInstructionVector LoopIncs;
-  uint64_t Scale = Inc;
-
-  // The effective induction variable, IV, is normally also the real induction
-  // variable. When we're dealing with a loop like:
-  //   for (int i = 0; i < 500; ++i)
-  //     x[3*i] = ...;
-  //     x[3*i+1] = ...;
-  //     x[3*i+2] = ...;
-  // then the real IV is still i, but the effective IV is (3*i).
-  Instruction *RealIV = IV;
-  if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs))
-    return false;
-
-  assert(Scale <= MaxInc && "Scale is too large");
-  assert(Scale > 1 && "Scale must be at least 2");
+  DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI);
 
-  // The set of increment instructions for each increment value.
-  SmallVector<SmallInstructionVector, 32> Roots(Scale-1);
-  SmallInstructionSet AllRoots;
-  if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs))
+  if (!DAGRoots.findRoots())
     return false;
-
   DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
-                  *RealIV << "\n");
-
-  // An array of just the possible reductions for this scale factor. When we
-  // collect the set of all users of some root instructions, these reduction
-  // instructions are treated as 'final' (their uses are not considered).
-  // This is important because we don't want the root use set to search down
-  // the reduction chain.
-  SmallInstructionSet PossibleRedSet;
-  SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet;
-  Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet,
-                             PossibleRedLastSet);
-
-  // We now need to check for equivalence of the use graph of each root with
-  // that of the primary induction variable (excluding the roots). Our goal
-  // here is not to solve the full graph isomorphism problem, but rather to
-  // catch common cases without a lot of work. As a result, we will assume
-  // that the relative order of the instructions in each unrolled iteration
-  // is the same (although we will not make an assumption about how the
-  // different iterations are intermixed). Note that while the order must be
-  // the same, the instructions may not be in the same basic block.
-  SmallInstructionSet Exclude(AllRoots);
-  Exclude.insert(LoopIncs.begin(), LoopIncs.end());
-
-  DenseSet<Instruction *> BaseUseSet;
-  collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet);
-
-  DenseSet<Instruction *> AllRootUses;
-  std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1);
-
-  bool MatchFailed = false;
-  for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) {
-    DenseSet<Instruction *> &RootUseSet = RootUseSets[i];
-    collectInLoopUserSet(L, Roots[i], SmallInstructionSet(),
-                         PossibleRedSet, RootUseSet);
-
-    DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() <<
-                    " vs. iteration increment " << (i+1) <<
-                    " use set size: " << RootUseSet.size() << "\n");
-
-    if (BaseUseSet.size() != RootUseSet.size()) {
-      MatchFailed = true;
-      break;
-    }
-
-    // In addition to regular aliasing information, we need to look for
-    // instructions from later (future) iterations that have side effects
-    // preventing us from reordering them past other instructions with side
-    // effects.
-    bool FutureSideEffects = false;
-    AliasSetTracker AST(*AA);
-
-    // The map between instructions in f(%iv.(i+1)) and f(%iv).
-    DenseMap<Value *, Value *> BaseMap;
-
-    assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops");
-    for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(),
-         JE = Header->end(); J1 != JE && !MatchFailed; ++J1) {
-      if (cast<Instruction>(J1) == RealIV)
-        continue;
-      if (cast<Instruction>(J1) == IV)
-        continue;
-      if (!BaseUseSet.count(J1))
-        continue;
-      if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs.
-        continue;
-
-      while (J2 != JE && (!RootUseSet.count(J2) ||
-             std::find(Roots[i].begin(), Roots[i].end(), J2) !=
-               Roots[i].end())) {
-        // As we iterate through the instructions, instructions that don't
-        // belong to previous iterations (or the base case), must belong to
-        // future iterations. We want to track the alias set of writes from
-        // previous iterations.
-        if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) &&
-            !AllRootUses.count(J2)) {
-          if (J2->mayWriteToMemory())
-            AST.add(J2);
-
-          // Note: This is specifically guarded by a check on isa<PHINode>,
-          // which while a valid (somewhat arbitrary) micro-optimization, is
-          // needed because otherwise isSafeToSpeculativelyExecute returns
-          // false on PHI nodes.
-          if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL))
-            FutureSideEffects = true;
-        }
-
-        ++J2;
-      }
-
-      if (!J1->isSameOperationAs(J2)) {
-        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
-                        " vs. " << *J2 << "\n");
-        MatchFailed = true;
-        break;
-      }
-
-      // Make sure that this instruction, which is in the use set of this
-      // root instruction, does not also belong to the base set or the set of
-      // some previous root instruction.
-      if (BaseUseSet.count(J2) || AllRootUses.count(J2)) {
-        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
-                        " vs. " << *J2 << " (prev. case overlap)\n");
-        MatchFailed = true;
-        break;
-      }
-
-      // Make sure that we don't alias with any instruction in the alias set
-      // tracker. If we do, then we depend on a future iteration, and we
-      // can't reroll.
-      if (J2->mayReadFromMemory()) {
-        for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end();
-             K != KE && !MatchFailed; ++K) {
-          if (K->aliasesUnknownInst(J2, *AA)) {
-            DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
-                            " vs. " << *J2 << " (depends on future store)\n");
-            MatchFailed = true;
-            break;
-          }
-        }
-      }
-
-      // If we've past an instruction from a future iteration that may have
-      // side effects, and this instruction might also, then we can't reorder
-      // them, and this matching fails. As an exception, we allow the alias
-      // set tracker to handle regular (simple) load/store dependencies.
-      if (FutureSideEffects &&
-            ((!isSimpleLoadStore(J1) &&
-              !isSafeToSpeculativelyExecute(J1, DL)) ||
-             (!isSimpleLoadStore(J2) &&
-              !isSafeToSpeculativelyExecute(J2, DL)))) {
-        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
-                        " vs. " << *J2 <<
-                        " (side effects prevent reordering)\n");
-        MatchFailed = true;
-        break;
-      }
-
-      // For instructions that are part of a reduction, if the operation is
-      // associative, then don't bother matching the operands (because we
-      // already know that the instructions are isomorphic, and the order
-      // within the iteration does not matter). For non-associative reductions,
-      // we do need to match the operands, because we need to reject
-      // out-of-order instructions within an iteration!
-      // For example (assume floating-point addition), we need to reject this:
-      //   x += a[i]; x += b[i];
-      //   x += a[i+1]; x += b[i+1];
-      //   x += b[i+2]; x += a[i+2];
-      bool InReduction = Reductions.isPairInSame(J1, J2);
-
-      if (!(InReduction && J1->isAssociative())) {
-        bool Swapped = false, SomeOpMatched = false;
-        for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
-          Value *Op2 = J2->getOperand(j);
-
-          // If this is part of a reduction (and the operation is not
-          // associatve), then we match all operands, but not those that are
-          // part of the reduction.
-          if (InReduction)
-            if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
-              if (Reductions.isPairInSame(J2, Op2I))
-                continue;
-
-          DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
-          if (BMI != BaseMap.end())
-            Op2 = BMI->second;
-          else if (std::find(Roots[i].begin(), Roots[i].end(),
-                             (Instruction*) Op2) != Roots[i].end())
-            Op2 = IV;
-
-          if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
-            // If we've not already decided to swap the matched operands, and
-            // we've not already matched our first operand (note that we could
-            // have skipped matching the first operand because it is part of a
-            // reduction above), and the instruction is commutative, then try
-            // the swapped match.
-            if (!Swapped && J1->isCommutative() && !SomeOpMatched &&
-                J1->getOperand(!j) == Op2) {
-              Swapped = true;
-            } else {
-              DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
-                              " vs. " << *J2 << " (operand " << j << ")\n");
-              MatchFailed = true;
-              break;
-            }
-          }
-
-          SomeOpMatched = true;
-        }
-      }
-
-      if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) ||
-          (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) {
-        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
-                        " vs. " << *J2 << " (uses outside loop)\n");
-        MatchFailed = true;
-        break;
-      }
-
-      if (!MatchFailed)
-        BaseMap.insert(std::pair<Value *, Value *>(J2, J1));
-
-      AllRootUses.insert(J2);
-      Reductions.recordPair(J1, J2, i+1);
-
-      ++J2;
-    }
-  }
-
-  if (MatchFailed)
-    return false;
-
-  DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
-                  *RealIV << "\n");
-
-  DenseSet<Instruction *> LoopIncUseSet;
-  collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(),
-                       SmallInstructionSet(), LoopIncUseSet);
-  DEBUG(dbgs() << "LRR: Loop increment set size: " <<
-                  LoopIncUseSet.size() << "\n");
-
-  // Make sure that all instructions in the loop have been included in some
-  // use set.
-  for (BasicBlock::iterator J = Header->begin(), JE = Header->end();
-       J != JE; ++J) {
-    if (isa<DbgInfoIntrinsic>(J))
-      continue;
-    if (cast<Instruction>(J) == RealIV)
-      continue;
-    if (cast<Instruction>(J) == IV)
-      continue;
-    if (BaseUseSet.count(J) || AllRootUses.count(J) ||
-        (LoopIncUseSet.count(J) && (J->isTerminator() ||
-                                    isSafeToSpeculativelyExecute(J, DL))))
-      continue;
-
-    if (AllRoots.count(J))
-      continue;
-
-    if (Reductions.isSelectedPHI(J))
-      continue;
-
-    DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV <<
-                    " unprocessed instruction found: " << *J << "\n");
-    MatchFailed = true;
-    break;
-  }
-
-  if (MatchFailed)
+                  *IV << "\n");
+  
+  if (!DAGRoots.validate(Reductions))
     return false;
-
-  DEBUG(dbgs() << "LRR: all instructions processed from " <<
-                  *RealIV << "\n");
-
   if (!Reductions.validateSelected())
     return false;
-
   // At this point, we've validated the rerolling, and we're committed to
   // making changes!
 
   Reductions.replaceSelected();
+  DAGRoots.replace(IterCount);
 
-  // Remove instructions associated with non-base iterations.
-  for (BasicBlock::reverse_iterator J = Header->rbegin();
-       J != Header->rend();) {
-    if (AllRootUses.count(&*J)) {
-      Instruction *D = &*J;
-      DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
-      D->eraseFromParent();
-      continue;
-    }
-
-    ++J;
-  }
-
-  // Insert the new induction variable.
-  const SCEV *Start = RealIVSCEV->getStart();
-  if (Inc == 1)
-    Start = SE->getMulExpr(Start,
-                           SE->getConstant(Start->getType(), Scale));
-  const SCEVAddRecExpr *H =
-    cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start,
-                           SE->getConstant(RealIVSCEV->getType(), 1),
-                           L, SCEV::FlagAnyWrap));
-  { // Limit the lifetime of SCEVExpander.
-    SCEVExpander Expander(*SE, "reroll");
-    Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
-
-    for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
-         JE = BaseUseSet.end(); J != JE; ++J)
-      (*J)->replaceUsesOfWith(IV, NewIV);
-
-    if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
-      if (LoopIncUseSet.count(BI)) {
-        const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
-        if (Inc == 1)
-          ICSCEV =
-            SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
-        // Iteration count SCEV minus 1
-        const SCEV *ICMinus1SCEV =
-          SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
-
-        Value *ICMinus1; // Iteration count minus 1
-        if (isa<SCEVConstant>(ICMinus1SCEV)) {
-          ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
-        } else {
-          BasicBlock *Preheader = L->getLoopPreheader();
-          if (!Preheader)
-            Preheader = InsertPreheaderForLoop(L, this);
-
-          ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
-                                            Preheader->getTerminator());
-        }
-
-        Value *Cond =
-            new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond");
-        BI->setCondition(Cond);
-
-        if (BI->getSuccessor(1) != Header)
-          BI->swapSuccessors();
-      }
-    }
-  }
-
-  SimplifyInstructionsInBlock(Header, DL, TLI);
-  DeleteDeadPHIs(Header, TLI);
   ++NumRerolledLoops;
   return true;
 }
@@ -1129,11 +1470,9 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     return false;
 
   AA = &getAnalysis<AliasAnalysis>();
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   SE = &getAnalysis<ScalarEvolution>();
-  TLI = &getAnalysis<TargetLibraryInfo>();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 
   BasicBlock *Header = L->getHeader();
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 267cb99..a675e12 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -24,8 +24,10 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -56,14 +58,14 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<AssumptionCacheTracker>();
       AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addRequired<LoopInfo>();
-      AU.addPreserved<LoopInfo>();
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
       AU.addPreserved<ScalarEvolution>();
-      AU.addRequired<TargetTransformInfo>();
+      AU.addRequired<TargetTransformInfoWrapperPass>();
     }
 
     bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -75,14 +77,15 @@ namespace {
     LoopInfo *LI;
     const TargetTransformInfo *TTI;
     AssumptionCache *AC;
+    DominatorTree *DT;
   };
 }
 
 char LoopRotate::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
@@ -100,10 +103,13 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
   // Save the loop metadata.
   MDNode *LoopMD = L->getLoopID();
 
-  LI = &getAnalysis<LoopInfo>();
-  TTI = &getAnalysis<TargetTransformInfo>();
-  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
-      *L->getHeader()->getParent());
+  Function &F = *L->getHeader()->getParent();
+
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DT = DTWP ? &DTWP->getDomTree() : nullptr;
 
   // Simplify the loop latch before attempting to rotate the header
   // upward. Rotation may not be needed if the loop tail can be folded into the
@@ -226,20 +232,17 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
     case Instruction::Shl:
     case Instruction::LShr:
     case Instruction::AShr: {
-      Value *IVOpnd = nullptr;
-      if (isa<ConstantInt>(I->getOperand(0)))
-        IVOpnd = I->getOperand(1);
-
-      if (isa<ConstantInt>(I->getOperand(1))) {
-        if (IVOpnd)
-          return false;
-
-        IVOpnd = I->getOperand(0);
-      }
+      Value *IVOpnd = !isa<Constant>(I->getOperand(0))
+                          ? I->getOperand(0)
+                          : !isa<Constant>(I->getOperand(1))
+                                ? I->getOperand(1)
+                                : nullptr;
+      if (!IVOpnd)
+        return false;
 
       // If increment operand is used outside of the loop, this speculation
       // could cause extra live range interference.
-      if (MultiExitLoop && IVOpnd) {
+      if (MultiExitLoop) {
         for (User *UseI : IVOpnd->users()) {
           auto *UserInst = cast<Instruction>(UseI);
           if (!L->contains(UserInst))
@@ -308,9 +311,8 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
   // Nuke the Latch block.
   assert(Latch->empty() && "unable to evacuate Latch");
   LI->removeBlock(Latch);
-  if (DominatorTreeWrapperPass *DTWP =
-          getAnalysisIfAvailable<DominatorTreeWrapperPass>())
-    DTWP->getDomTree().eraseNode(Latch);
+  if (DT)
+    DT->eraseNode(Latch);
   Latch->eraseFromParent();
   return true;
 }
@@ -412,6 +414,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
   for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
     ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
 
+  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+
   // For the rest of the instructions, either hoist to the OrigPreheader if
   // possible or create a clone in the OldPreHeader if not.
   TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
@@ -442,8 +446,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     // With the operands remapped, see if the instruction constant folds or is
     // otherwise simplifyable.  This commonly occurs because the entry from PHI
     // nodes allows icmps and other instructions to fold.
-    // FIXME: Provide DL, TLI, DT, AC to SimplifyInstruction.
-    Value *V = SimplifyInstruction(C);
+    // FIXME: Provide TLI, DT, AC to SimplifyInstruction.
+    Value *V = SimplifyInstruction(C, DL);
     if (V && LI->replacementPreservesLCSSAForm(C, V)) {
       // If so, then delete the temporary instruction and stick the folded value
       // in the map.
@@ -495,31 +499,31 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     // The conditional branch can't be folded, handle the general case.
     // Update DominatorTree to reflect the CFG change we just made.  Then split
     // edges as necessary to preserve LoopSimplify form.
-    if (DominatorTreeWrapperPass *DTWP =
-            getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
-      DominatorTree &DT = DTWP->getDomTree();
+    if (DT) {
       // Everything that was dominated by the old loop header is now dominated
       // by the original loop preheader. Conceptually the header was merged
       // into the preheader, even though we reuse the actual block as a new
       // loop latch.
-      DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader);
+      DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
       SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
                                                    OrigHeaderNode->end());
-      DomTreeNode *OrigPreheaderNode = DT.getNode(OrigPreheader);
+      DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader);
       for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
-        DT.changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
+        DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
 
-      assert(DT.getNode(Exit)->getIDom() == OrigPreheaderNode);
-      assert(DT.getNode(NewHeader)->getIDom() == OrigPreheaderNode);
+      assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode);
+      assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode);
 
       // Update OrigHeader to be dominated by the new header block.
-      DT.changeImmediateDominator(OrigHeader, OrigLatch);
+      DT->changeImmediateDominator(OrigHeader, OrigLatch);
     }
 
     // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
     // thus is not a preheader anymore.
     // Split the edge to form a real preheader.
-    BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
+    BasicBlock *NewPH = SplitCriticalEdge(
+        OrigPreheader, NewHeader,
+        CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
     NewPH->setName(NewHeader->getName() + ".lr.ph");
 
     // Preserve canonical loop form, which means that 'Exit' should have only
@@ -538,7 +542,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
       if (isa<IndirectBrInst>((*PI)->getTerminator()))
         continue;
       SplitLatchEdge |= L->getLoopLatch() == *PI;
-      BasicBlock *ExitSplit = SplitCriticalEdge(*PI, Exit, this);
+      BasicBlock *ExitSplit = SplitCriticalEdge(
+          *PI, Exit, CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
       ExitSplit->moveBefore(Exit);
     }
     assert(SplitLatchEdge &&
@@ -552,17 +557,15 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     PHBI->eraseFromParent();
 
     // With our CFG finalized, update DomTree if it is available.
-    if (DominatorTreeWrapperPass *DTWP =
-            getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
-      DominatorTree &DT = DTWP->getDomTree();
+    if (DT) {
       // Update OrigHeader to be dominated by the new header block.
-      DT.changeImmediateDominator(NewHeader, OrigPreheader);
-      DT.changeImmediateDominator(OrigHeader, OrigLatch);
+      DT->changeImmediateDominator(NewHeader, OrigPreheader);
+      DT->changeImmediateDominator(OrigHeader, OrigLatch);
 
       // Brute force incremental dominator tree update. Call
       // findNearestCommonDominator on all CFG predecessors of each child of the
       // original header.
-      DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader);
+      DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
       SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
                                                    OrigHeaderNode->end());
       bool Changed;
@@ -575,11 +578,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
           pred_iterator PI = pred_begin(BB);
           BasicBlock *NearestDom = *PI;
           for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
-            NearestDom = DT.findNearestCommonDominator(NearestDom, *PI);
+            NearestDom = DT->findNearestCommonDominator(NearestDom, *PI);
 
           // Remember if this changes the DomTree.
           if (Node->getIDom()->getBlock() != NearestDom) {
-            DT.changeImmediateDominator(BB, NearestDom);
+            DT->changeImmediateDominator(BB, NearestDom);
             Changed = true;
           }
         }
@@ -597,7 +600,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
   // the OrigHeader block into OrigLatch.  This will succeed if they are
   // connected by an unconditional branch.  This is just a cleanup so the
   // emitted code isn't too gross in this common case.
-  MergeBlockIntoPredecessor(OrigHeader, this);
+  MergeBlockIntoPredecessor(OrigHeader, DT, LI);
 
   DEBUG(dbgs() << "LoopRotation: into "; L->dump());
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 7b60373..584c7ae 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -28,7 +28,7 @@
 //
 // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
 // it's useful to think about these as the same register, with some uses using
-// the value of the register before the add and some using // it after. In this
+// the value of the register before the add and some using it after. In this
 // example, the icmp is a post-increment user, since it uses %i.next, which is
 // the value of the induction variable after the increment. The other common
 // case of post-increment users is users outside the loop.
@@ -68,6 +68,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -111,8 +112,6 @@ public:
   /// a particular register.
   SmallBitVector UsedByIndices;
 
-  RegSortData() {}
-
   void print(raw_ostream &OS) const;
   void dump() const;
 };
@@ -186,9 +185,8 @@ RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
 
   // Update RegUses. The data structure is not optimized for this purpose;
   // we must iterate through it and update each of the bit vectors.
-  for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
-       I != E; ++I) {
-    SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+  for (auto &Pair : RegUsesMap) {
+    SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
     if (LUIdx < UsedByIndices.size())
       UsedByIndices[LUIdx] =
         LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
@@ -298,9 +296,8 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
 
   // Look at add operands.
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
-    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-         I != E; ++I)
-      DoInitialMatch(*I, L, Good, Bad, SE);
+    for (const SCEV *S : Add->operands())
+      DoInitialMatch(S, L, Good, Bad, SE);
     return;
   }
 
@@ -327,12 +324,10 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
       DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
       const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
         SE.getEffectiveSCEVType(NewMul->getType())));
-      for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
-           E = MyGood.end(); I != E; ++I)
-        Good.push_back(SE.getMulExpr(NegOne, *I));
-      for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
-           E = MyBad.end(); I != E; ++I)
-        Bad.push_back(SE.getMulExpr(NegOne, *I));
+      for (const SCEV *S : MyGood)
+        Good.push_back(SE.getMulExpr(NegOne, S));
+      for (const SCEV *S : MyBad)
+        Bad.push_back(SE.getMulExpr(NegOne, S));
       return;
     }
 
@@ -444,9 +439,8 @@ bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
   if (ScaledReg)
     if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
       return true;
-  for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
-       E = BaseRegs.end(); I != E; ++I)
-    if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
+  for (const SCEV *BaseReg : BaseRegs)
+    if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
       return true;
   return false;
 }
@@ -461,10 +455,9 @@ void Formula::print(raw_ostream &OS) const {
     if (!First) OS << " + "; else First = false;
     OS << BaseOffset;
   }
-  for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
-       E = BaseRegs.end(); I != E; ++I) {
+  for (const SCEV *BaseReg : BaseRegs) {
     if (!First) OS << " + "; else First = false;
-    OS << "reg(" << **I << ')';
+    OS << "reg(" << *BaseReg << ')';
   }
   if (HasBaseReg && BaseRegs.empty()) {
     if (!First) OS << " + "; else First = false;
@@ -577,10 +570,8 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
     if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
       SmallVector<const SCEV *, 8> Ops;
-      for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-           I != E; ++I) {
-        const SCEV *Op = getExactSDiv(*I, RHS, SE,
-                                      IgnoreSignificantBits);
+      for (const SCEV *S : Add->operands()) {
+        const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);
         if (!Op) return nullptr;
         Ops.push_back(Op);
       }
@@ -594,9 +585,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
     if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
       SmallVector<const SCEV *, 4> Ops;
       bool Found = false;
-      for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
-           I != E; ++I) {
-        const SCEV *S = *I;
+      for (const SCEV *S : Mul->operands()) {
         if (!Found)
           if (const SCEV *Q = getExactSDiv(S, RHS, SE,
                                            IgnoreSignificantBits)) {
@@ -766,9 +755,8 @@ static bool isHighCostExpansion(const SCEV *S,
     return false;
 
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
-    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-         I != E; ++I) {
-      if (isHighCostExpansion(*I, Processed, SE))
+    for (const SCEV *S : Add->operands()) {
+      if (isHighCostExpansion(S, Processed, SE))
         return true;
     }
     return false;
@@ -819,9 +807,9 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
     if (!I || !isInstructionTriviallyDead(I))
       continue;
 
-    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
-      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
-        *OI = nullptr;
+    for (Use &O : I->operands())
+      if (Instruction *U = dyn_cast<Instruction>(O)) {
+        O = nullptr;
         if (U->use_empty())
           DeadInsts.push_back(U);
       }
@@ -1002,9 +990,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
     if (isLoser())
       return;
   }
-  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
-       E = F.BaseRegs.end(); I != E; ++I) {
-    const SCEV *BaseReg = *I;
+  for (const SCEV *BaseReg : F.BaseRegs) {
     if (VisitedRegs.count(BaseReg)) {
       Lose();
       return;
@@ -1027,9 +1013,8 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
   ScaleCost += getScalingFactorCost(TTI, LU, F);
 
   // Tally up the non-zero immediates.
-  for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
-       E = Offsets.end(); I != E; ++I) {
-    int64_t Offset = (uint64_t)*I + F.BaseOffset;
+  for (int64_t O : Offsets) {
+    int64_t Offset = (uint64_t)O + F.BaseOffset;
     if (F.BaseGV)
       ImmCost += 64; // Handle symbolic values conservatively.
                      // TODO: This should probably be the pointer size.
@@ -1152,10 +1137,9 @@ void LSRFixup::print(raw_ostream &OS) const {
   OS << ", OperandValToReplace=";
   OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
 
-  for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
-       E = PostIncLoops.end(); I != E; ++I) {
+  for (const Loop *PIL : PostIncLoops) {
     OS << ", PostIncLoop=";
-    (*I)->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+    PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
   }
 
   if (LUIdx != ~size_t(0))
@@ -1301,9 +1285,8 @@ bool LSRUse::InsertFormula(const Formula &F) {
   assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
          "Zero allocated in a scaled register!");
 #ifndef NDEBUG
-  for (SmallVectorImpl<const SCEV *>::const_iterator I =
-       F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
-    assert(!(*I)->isZero() && "Zero allocated in a base register!");
+  for (const SCEV *BaseReg : F.BaseRegs)
+    assert(!BaseReg->isZero() && "Zero allocated in a base register!");
 #endif
 
   // Add the formula to the list.
@@ -1327,11 +1310,9 @@ void LSRUse::DeleteFormula(Formula &F) {
 /// RecomputeRegs - Recompute the Regs field, and update RegUses.
 void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
   // Now that we've filtered out some formulae, recompute the Regs set.
-  SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
+  SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
   Regs.clear();
-  for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
-       E = Formulae.end(); I != E; ++I) {
-    const Formula &F = *I;
+  for (const Formula &F : Formulae) {
     if (F.ScaledReg) Regs.insert(F.ScaledReg);
     Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
   }
@@ -1357,11 +1338,11 @@ void LSRUse::print(raw_ostream &OS) const {
   }
 
   OS << ", Offsets={";
-  for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
-       E = Offsets.end(); I != E; ++I) {
-    OS << *I;
-    if (std::next(I) != E)
-      OS << ',';
+  bool NeedComma = false;
+  for (int64_t O : Offsets) {
+    if (NeedComma) OS << ',';
+    OS << O;
+    NeedComma = true;
   }
   OS << '}';
 
@@ -1386,9 +1367,6 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
   case LSRUse::Address:
     return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
 
-    // Otherwise, just guess that reg+reg addressing is legal.
-    //return ;
-
   case LSRUse::ICmpZero:
     // There's not even a target hook for querying whether it would be legal to
     // fold a GV into an ICmp.
@@ -1928,12 +1906,12 @@ void LSRInstance::OptimizeShadowIV() {
 /// set the IV user and stride information and return true, otherwise return
 /// false.
 bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
-  for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
-    if (UI->getUser() == Cond) {
+  for (IVStrideUse &U : IU)
+    if (U.getUser() == Cond) {
       // NOTE: we could handle setcc instructions with multiple uses here, but
       // InstCombine does it as well for simple uses, it's not clear that it
       // occurs enough in real life to handle.
-      CondUse = UI;
+      CondUse = &U;
       return true;
     }
   return false;
@@ -2108,8 +2086,7 @@ LSRInstance::OptimizeLoopTermCond() {
   SmallVector<BasicBlock*, 8> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
 
-  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
-    BasicBlock *ExitingBlock = ExitingBlocks[i];
+  for (BasicBlock *ExitingBlock : ExitingBlocks) {
 
     // Get the terminating condition for the loop if possible.  If we
     // can, we want to change it to use a post-incremented version of its
@@ -2352,9 +2329,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
         LU.WidestFixupType == OrigLU.WidestFixupType &&
         LU.HasFormulaWithSameRegs(OrigF)) {
       // Scan through this use's formulae.
-      for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
-           E = LU.Formulae.end(); I != E; ++I) {
-        const Formula &F = *I;
+      for (const Formula &F : LU.Formulae) {
         // Check to see if this formula has the same registers and symbols
         // as OrigF.
         if (F.BaseRegs == OrigF.BaseRegs &&
@@ -2382,8 +2357,8 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
 
   // Collect interesting types and strides.
   SmallVector<const SCEV *, 4> Worklist;
-  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
-    const SCEV *Expr = IU.getExpr(*UI);
+  for (const IVStrideUse &U : IU) {
+    const SCEV *Expr = IU.getExpr(U);
 
     // Collect interesting types.
     Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
@@ -2586,25 +2561,23 @@ isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
   unsigned NumConstIncrements = 0;
   unsigned NumVarIncrements = 0;
   unsigned NumReusedIncrements = 0;
-  for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
-       I != E; ++I) {
-
-    if (I->IncExpr->isZero())
+  for (const IVInc &Inc : Chain) {
+    if (Inc.IncExpr->isZero())
       continue;
 
     // Incrementing by zero or some constant is neutral. We assume constants can
     // be folded into an addressing mode or an add's immediate operand.
-    if (isa<SCEVConstant>(I->IncExpr)) {
+    if (isa<SCEVConstant>(Inc.IncExpr)) {
       ++NumConstIncrements;
       continue;
     }
 
-    if (I->IncExpr == LastIncExpr)
+    if (Inc.IncExpr == LastIncExpr)
       ++NumReusedIncrements;
     else
       ++NumVarIncrements;
 
-    LastIncExpr = I->IncExpr;
+    LastIncExpr = Inc.IncExpr;
   }
   // An IV chain with a single increment is handled by LSR's postinc
   // uses. However, a chain with multiple increments requires keeping the IV's
@@ -2839,12 +2812,11 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
   assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
   DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
 
-  for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
-       I != E; ++I) {
-    DEBUG(dbgs() << "        Inc: " << *I->UserInst << "\n");
-    User::op_iterator UseI =
-      std::find(I->UserInst->op_begin(), I->UserInst->op_end(), I->IVOperand);
-    assert(UseI != I->UserInst->op_end() && "cannot find IV operand");
+  for (const IVInc &Inc : Chain) {
+    DEBUG(dbgs() << "        Inc: " << Inc.UserInst << "\n");
+    auto UseI = std::find(Inc.UserInst->op_begin(), Inc.UserInst->op_end(),
+                          Inc.IVOperand);
+    assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
     IVIncSet.insert(UseI);
   }
 }
@@ -2907,20 +2879,18 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
   Type *IVTy = IVSrc->getType();
   Type *IntTy = SE.getEffectiveSCEVType(IVTy);
   const SCEV *LeftOverExpr = nullptr;
-  for (IVChain::const_iterator IncI = Chain.begin(),
-         IncE = Chain.end(); IncI != IncE; ++IncI) {
-
-    Instruction *InsertPt = IncI->UserInst;
+  for (const IVInc &Inc : Chain) {
+    Instruction *InsertPt = Inc.UserInst;
     if (isa<PHINode>(InsertPt))
       InsertPt = L->getLoopLatch()->getTerminator();
 
     // IVOper will replace the current IV User's operand. IVSrc is the IV
     // value currently held in a register.
     Value *IVOper = IVSrc;
-    if (!IncI->IncExpr->isZero()) {
+    if (!Inc.IncExpr->isZero()) {
       // IncExpr was the result of subtraction of two narrow values, so must
       // be signed.
-      const SCEV *IncExpr = SE.getNoopOrSignExtend(IncI->IncExpr, IntTy);
+      const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
       LeftOverExpr = LeftOverExpr ?
         SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
     }
@@ -2933,22 +2903,21 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
       IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
 
       // If an IV increment can't be folded, use it as the next IV value.
-      if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
-                            TTI)) {
+      if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
         assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
         IVSrc = IVOper;
         LeftOverExpr = nullptr;
       }
     }
-    Type *OperTy = IncI->IVOperand->getType();
+    Type *OperTy = Inc.IVOperand->getType();
     if (IVTy != OperTy) {
       assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
              "cannot extend a chained IV");
       IRBuilder<> Builder(InsertPt);
       IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
     }
-    IncI->UserInst->replaceUsesOfWith(IncI->IVOperand, IVOper);
-    DeadInsts.push_back(IncI->IVOperand);
+    Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
+    DeadInsts.push_back(Inc.IVOperand);
   }
   // If LSR created a new, wider phi, we may also replace its postinc. We only
   // do this if we also found a wide value for the head of the chain.
@@ -2976,11 +2945,11 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
 }
 
 void LSRInstance::CollectFixupsAndInitialFormulae() {
-  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
-    Instruction *UserInst = UI->getUser();
+  for (const IVStrideUse &U : IU) {
+    Instruction *UserInst = U.getUser();
     // Skip IV users that are part of profitable IV Chains.
     User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
-                                       UI->getOperandValToReplace());
+                                       U.getOperandValToReplace());
     assert(UseI != UserInst->op_end() && "cannot find IV operand");
     if (IVIncSet.count(UseI))
       continue;
@@ -2988,8 +2957,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
     // Record the uses.
     LSRFixup &LF = getNewFixup();
     LF.UserInst = UserInst;
-    LF.OperandValToReplace = UI->getOperandValToReplace();
-    LF.PostIncLoops = UI->getPostIncLoops();
+    LF.OperandValToReplace = U.getOperandValToReplace();
+    LF.PostIncLoops = U.getPostIncLoops();
 
     LSRUse::KindType Kind = LSRUse::Basic;
     Type *AccessTy = nullptr;
@@ -2998,7 +2967,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
       AccessTy = getAccessType(LF.UserInst);
     }
 
-    const SCEV *S = IU.getExpr(*UI);
+    const SCEV *S = IU.getExpr(U);
 
     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
     // (N - i == 0), and this allows (N - i) to be the expression that we work
@@ -3090,9 +3059,8 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S,
 void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
   if (F.ScaledReg)
     RegUses.CountRegister(F.ScaledReg, LUIdx);
-  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
-       E = F.BaseRegs.end(); I != E; ++I)
-    RegUses.CountRegister(*I, LUIdx);
+  for (const SCEV *BaseReg : F.BaseRegs)
+    RegUses.CountRegister(BaseReg, LUIdx);
 }
 
 /// InsertFormula - If the given formula has not yet been inserted, add it to
@@ -3213,9 +3181,8 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
 
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     // Break out add operands.
-    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-         I != E; ++I) {
-      const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
+    for (const SCEV *S : Add->operands()) {
+      const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);
       if (Remainder)
         Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
     }
@@ -3373,9 +3340,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
   Formula F = Base;
   F.BaseRegs.clear();
   SmallVector<const SCEV *, 4> Ops;
-  for (SmallVectorImpl<const SCEV *>::const_iterator
-       I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
-    const SCEV *BaseReg = *I;
+  for (const SCEV *BaseReg : Base.BaseRegs) {
     if (SE.properlyDominates(BaseReg, L->getHeader()) &&
         !SE.hasComputableLoopEvolution(BaseReg, L))
       Ops.push_back(BaseReg);
@@ -3432,15 +3397,13 @@ void LSRInstance::GenerateConstantOffsetsImpl(
     LSRUse &LU, unsigned LUIdx, const Formula &Base,
     const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
   const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
-  for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
-                                                E = Worklist.end();
-       I != E; ++I) {
+  for (int64_t Offset : Worklist) {
     Formula F = Base;
-    F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
-    if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+    F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
+    if (isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
                    LU.AccessTy, F)) {
       // Add the offset to the base register.
-      const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
+      const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G);
       // If it cancelled out, drop the base register, otherwise update it.
       if (NewG->isZero()) {
         if (IsScaledReg) {
@@ -3506,10 +3469,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
   assert(!Base.BaseGV && "ICmpZero use is not legal!");
 
   // Check each interesting stride.
-  for (SmallSetVector<int64_t, 8>::const_iterator
-       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
-    int64_t Factor = *I;
-
+  for (int64_t Factor : Factors) {
     // Check that the multiplication doesn't overflow.
     if (Base.BaseOffset == INT64_MIN && Factor == -1)
       continue;
@@ -3593,10 +3553,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
   assert(Base.Scale == 0 && "Unscale did not did its job!");
 
   // Check each interesting stride.
-  for (SmallSetVector<int64_t, 8>::const_iterator
-       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
-    int64_t Factor = *I;
-
+  for (int64_t Factor : Factors) {
     Base.Scale = Factor;
     Base.HasBaseReg = Base.BaseRegs.size() > 1;
     // Check whether this scale is going to be legal.
@@ -3652,16 +3609,13 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
   if (!DstTy) return;
   DstTy = SE.getEffectiveSCEVType(DstTy);
 
-  for (SmallSetVector<Type *, 4>::const_iterator
-       I = Types.begin(), E = Types.end(); I != E; ++I) {
-    Type *SrcTy = *I;
+  for (Type *SrcTy : Types) {
     if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
       Formula F = Base;
 
-      if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
-      for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
-           JE = F.BaseRegs.end(); J != JE; ++J)
-        *J = SE.getAnyExtendExpr(*J, SrcTy);
+      if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
+      for (const SCEV *&BaseReg : F.BaseRegs)
+        BaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
 
       // TODO: This assumes we've done basic processing on all uses and
       // have an idea what the register usage is.
@@ -3708,20 +3662,17 @@ void WorkItem::dump() const {
 void LSRInstance::GenerateCrossUseConstantOffsets() {
   // Group the registers by their value without any added constant offset.
   typedef std::map<int64_t, const SCEV *> ImmMapTy;
-  typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
-  RegMapTy Map;
+  DenseMap<const SCEV *, ImmMapTy> Map;
   DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
   SmallVector<const SCEV *, 8> Sequence;
-  for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
-       I != E; ++I) {
-    const SCEV *Reg = *I;
+  for (const SCEV *Use : RegUses) {
+    const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.
     int64_t Imm = ExtractImmediate(Reg, SE);
-    std::pair<RegMapTy::iterator, bool> Pair =
-      Map.insert(std::make_pair(Reg, ImmMapTy()));
+    auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy()));
     if (Pair.second)
       Sequence.push_back(Reg);
-    Pair.first->second.insert(std::make_pair(Imm, *I));
-    UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
+    Pair.first->second.insert(std::make_pair(Imm, Use));
+    UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
   }
 
   // Now examine each set of registers with the same base value. Build up
@@ -3729,9 +3680,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
   // not adding formulae and register counts while we're searching.
   SmallVector<WorkItem, 32> WorkItems;
   SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
-  for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
-       E = Sequence.end(); I != E; ++I) {
-    const SCEV *Reg = *I;
+  for (const SCEV *Reg : Sequence) {
     const ImmMapTy &Imms = Map.find(Reg)->second;
 
     // It's not worthwhile looking for reuse if there's only one offset.
@@ -3739,9 +3688,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
       continue;
 
     DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
-          for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
-               J != JE; ++J)
-            dbgs() << ' ' << J->first;
+          for (const auto &Entry : Imms)
+            dbgs() << ' ' << Entry.first;
           dbgs() << '\n');
 
     // Examine each offset.
@@ -3786,9 +3734,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
   UniqueItems.clear();
 
   // Now iterate through the worklist and add new formulae.
-  for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
-       E = WorkItems.end(); I != E; ++I) {
-    const WorkItem &WI = *I;
+  for (const WorkItem &WI : WorkItems) {
     size_t LUIdx = WI.LUIdx;
     LSRUse &LU = Uses[LUIdx];
     int64_t Imm = WI.Imm;
@@ -3827,7 +3773,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
           if (C->getValue()->isNegative() !=
                 (NewF.BaseOffset < 0) &&
               (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
-                .ule(abs64(NewF.BaseOffset)))
+                .ule(std::abs(NewF.BaseOffset)))
             continue;
 
         // OK, looks good.
@@ -3853,12 +3799,10 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
           // If the new formula has a constant in a register, and adding the
           // constant value to the immediate would produce a value closer to
           // zero than the immediate itself, then the formula isn't worthwhile.
-          for (SmallVectorImpl<const SCEV *>::const_iterator
-               J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
-               J != JE; ++J)
-            if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
+          for (const SCEV *NewReg : NewF.BaseRegs)
+            if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
               if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
-                   abs64(NewF.BaseOffset)) &&
+                   std::abs(NewF.BaseOffset)) &&
                   (C->getValue()->getValue() +
                    NewF.BaseOffset).countTrailingZeros() >=
                    countTrailingZeros<uint64_t>(NewF.BaseOffset))
@@ -3959,9 +3903,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
       }
       else {
         SmallVector<const SCEV *, 4> Key;
-        for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
-               JE = F.BaseRegs.end(); J != JE; ++J) {
-          const SCEV *Reg = *J;
+        for (const SCEV *Reg : F.BaseRegs) {
           if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
             Key.push_back(Reg);
         }
@@ -4023,9 +3965,8 @@ static const size_t ComplexityLimit = UINT16_MAX;
 /// isn't always sufficient.
 size_t LSRInstance::EstimateSearchSpaceComplexity() const {
   size_t Power = 1;
-  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
-       E = Uses.end(); I != E; ++I) {
-    size_t FSize = I->Formulae.size();
+  for (const LSRUse &LU : Uses) {
+    size_t FSize = LU.Formulae.size();
     if (FSize >= ComplexityLimit) {
       Power = ComplexityLimit;
       break;
@@ -4116,9 +4057,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
 
   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
     LSRUse &LU = Uses[LUIdx];
-    for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
-         E = LU.Formulae.end(); I != E; ++I) {
-      const Formula &F = *I;
+    for (const Formula &F : LU.Formulae) {
       if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
         continue;
 
@@ -4135,9 +4074,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
       LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
 
       // Update the relocs to reference the new use.
-      for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
-           E = Fixups.end(); I != E; ++I) {
-        LSRFixup &Fixup = *I;
+      for (LSRFixup &Fixup : Fixups) {
         if (Fixup.LUIdx == LUIdx) {
           Fixup.LUIdx = LUThatHas - &Uses.front();
           Fixup.Offset += F.BaseOffset;
@@ -4218,9 +4155,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
     // to be a good reuse register candidate.
     const SCEV *Best = nullptr;
     unsigned BestNum = 0;
-    for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
-         I != E; ++I) {
-      const SCEV *Reg = *I;
+    for (const SCEV *Reg : RegUses) {
       if (Taken.count(Reg))
         continue;
       if (!Best)
@@ -4308,17 +4243,12 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
 
   SmallPtrSet<const SCEV *, 16> NewRegs;
   Cost NewCost;
-  for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
-       E = LU.Formulae.end(); I != E; ++I) {
-    const Formula &F = *I;
-
+  for (const Formula &F : LU.Formulae) {
     // Ignore formulae which may not be ideal in terms of register reuse of
     // ReqRegs.  The formula should use all required registers before
     // introducing new ones.
     int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
-    for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
-         JE = ReqRegs.end(); J != JE; ++J) {
-      const SCEV *Reg = *J;
+    for (const SCEV *Reg : ReqRegs) {
       if ((F.ScaledReg && F.ScaledReg == Reg) ||
           std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) !=
           F.BaseRegs.end()) {
@@ -4426,9 +4356,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
     bool AllDominate = true;
     Instruction *BetterPos = nullptr;
     Instruction *Tentative = IDom->getTerminator();
-    for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
-         E = Inputs.end(); I != E; ++I) {
-      Instruction *Inst = *I;
+    for (Instruction *Inst : Inputs) {
       if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
         AllDominate = false;
         break;
@@ -4475,9 +4403,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
   }
   // The expansion must also be dominated by the increment positions of any
   // loops it for which it is using post-inc mode.
-  for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
-       E = LF.PostIncLoops.end(); I != E; ++I) {
-    const Loop *PIL = *I;
+  for (const Loop *PIL : LF.PostIncLoops) {
     if (PIL == L) continue;
 
     // Be dominated by the loop exit.
@@ -4552,9 +4478,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   SmallVector<const SCEV *, 8> Ops;
 
   // Expand the BaseRegs portion.
-  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
-       E = F.BaseRegs.end(); I != E; ++I) {
-    const SCEV *Reg = *I;
+  for (const SCEV *Reg : F.BaseRegs) {
     assert(!Reg->isZero() && "Zero allocated in a base register!");
 
     // If we're expanding for a post-inc user, make the post-inc adjustment.
@@ -4728,12 +4652,14 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
           // Split the critical edge.
           BasicBlock *NewBB = nullptr;
           if (!Parent->isLandingPad()) {
-            NewBB = SplitCriticalEdge(BB, Parent, P,
-                                      /*MergeIdenticalEdges=*/true,
-                                      /*DontDeleteUselessPhis=*/true);
+            NewBB = SplitCriticalEdge(BB, Parent,
+                                      CriticalEdgeSplittingOptions(&DT, &LI)
+                                          .setMergeIdenticalEdges()
+                                          .setDontDeleteUselessPHIs());
           } else {
             SmallVector<BasicBlock*, 2> NewBBs;
-            SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);
+            SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs,
+                                        /*AliasAnalysis*/ nullptr, &DT, &LI);
             NewBB = NewBBs[0];
           }
           // If NewBB==NULL, then SplitCriticalEdge refused to split because all
@@ -4823,7 +4749,8 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
   // we can remove them after we are done working.
   SmallVector<WeakVH, 16> DeadInsts;
 
-  SCEVExpander Rewriter(SE, "lsr");
+  SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
+                        "lsr");
 #ifndef NDEBUG
   Rewriter.setDebugType(DEBUG_TYPE);
 #endif
@@ -4832,25 +4759,20 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
   Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
 
   // Mark phi nodes that terminate chains so the expander tries to reuse them.
-  for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
-         ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
-    if (PHINode *PN = dyn_cast<PHINode>(ChainI->tailUserInst()))
+  for (const IVChain &Chain : IVChainVec) {
+    if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
       Rewriter.setChainedPhi(PN);
   }
 
   // Expand the new value definitions and update the users.
-  for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
-       E = Fixups.end(); I != E; ++I) {
-    const LSRFixup &Fixup = *I;
-
+  for (const LSRFixup &Fixup : Fixups) {
     Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
 
     Changed = true;
   }
 
-  for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
-         ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
-    GenerateIVChain(*ChainI, Rewriter, DeadInsts);
+  for (const IVChain &Chain : IVChainVec) {
+    GenerateIVChain(Chain, Rewriter, DeadInsts);
     Changed = true;
   }
   // Clean up after ourselves. This must be done before deleting any
@@ -4863,9 +4785,10 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
 LSRInstance::LSRInstance(Loop *L, Pass *P)
     : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
       DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
-      LI(P->getAnalysis<LoopInfo>()),
-      TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
-      IVIncInsertPos(nullptr) {
+      LI(P->getAnalysis<LoopInfoWrapperPass>().getLoopInfo()),
+      TTI(P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+          *L->getHeader()->getParent())),
+      L(L), Changed(false), IVIncInsertPos(nullptr) {
   // If LoopSimplify form is not available, stay out of trouble.
   if (!L->isLoopSimplifyForm())
     return;
@@ -4876,10 +4799,10 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
   // If there's too much analysis to be done, bail early. We won't be able to
   // model the problem anyway.
   unsigned NumUsers = 0;
-  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+  for (const IVStrideUse &U : IU) {
     if (++NumUsers > MaxIVUsers) {
-      DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L
-            << "\n");
+      (void)U;
+      DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U << "\n");
       return;
     }
   }
@@ -4948,14 +4871,10 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
 
 #ifndef NDEBUG
   // Formulae should be legal.
-  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
-       I != E; ++I) {
-    const LSRUse &LU = *I;
-    for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
-                                                  JE = LU.Formulae.end();
-         J != JE; ++J)
+  for (const LSRUse &LU : Uses) {
+    for (const Formula &F : LU.Formulae)
       assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
-                        *J) && "Illegal formula generated!");
+                        F) && "Illegal formula generated!");
   };
 #endif
 
@@ -4969,44 +4888,38 @@ void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
   OS << "LSR has identified the following interesting factors and types: ";
   bool First = true;
 
-  for (SmallSetVector<int64_t, 8>::const_iterator
-       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+  for (int64_t Factor : Factors) {
     if (!First) OS << ", ";
     First = false;
-    OS << '*' << *I;
+    OS << '*' << Factor;
   }
 
-  for (SmallSetVector<Type *, 4>::const_iterator
-       I = Types.begin(), E = Types.end(); I != E; ++I) {
+  for (Type *Ty : Types) {
     if (!First) OS << ", ";
     First = false;
-    OS << '(' << **I << ')';
+    OS << '(' << *Ty << ')';
   }
   OS << '\n';
 }
 
 void LSRInstance::print_fixups(raw_ostream &OS) const {
   OS << "LSR is examining the following fixup sites:\n";
-  for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
-       E = Fixups.end(); I != E; ++I) {
+  for (const LSRFixup &LF : Fixups) {
     dbgs() << "  ";
-    I->print(OS);
+    LF.print(OS);
     OS << '\n';
   }
 }
 
 void LSRInstance::print_uses(raw_ostream &OS) const {
   OS << "LSR is examining the following uses:\n";
-  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
-       E = Uses.end(); I != E; ++I) {
-    const LSRUse &LU = *I;
+  for (const LSRUse &LU : Uses) {
     dbgs() << "  ";
     LU.print(OS);
     OS << '\n';
-    for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
-         JE = LU.Formulae.end(); J != JE; ++J) {
+    for (const Formula &F : LU.Formulae) {
       OS << "    ";
-      J->print(OS);
+      F.print(OS);
       OS << '\n';
     }
   }
@@ -5041,11 +4954,11 @@ private:
 char LoopStrengthReduce::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
                 "Loop Strength Reduction", false, false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(IVUsers)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
                 "Loop Strength Reduction", false, false)
@@ -5064,8 +4977,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   // many analyses if they are around.
   AU.addPreservedID(LoopSimplifyID);
 
-  AU.addRequired<LoopInfo>();
-  AU.addPreserved<LoopInfo>();
+  AU.addRequired<LoopInfoWrapperPass>();
+  AU.addPreserved<LoopInfoWrapperPass>();
   AU.addRequiredID(LoopSimplifyID);
   AU.addRequired<DominatorTreeWrapperPass>();
   AU.addPreserved<DominatorTreeWrapperPass>();
@@ -5076,7 +4989,7 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequiredID(LoopSimplifyID);
   AU.addRequired<IVUsers>();
   AU.addPreserved<IVUsers>();
-  AU.addRequired<TargetTransformInfo>();
+  AU.addRequired<TargetTransformInfoWrapperPass>();
 }
 
 bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
@@ -5092,13 +5005,15 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
   Changed |= DeleteDeadPHIs(L->getHeader());
   if (EnablePhiElim && L->isLoopSimplifyForm()) {
     SmallVector<WeakVH, 16> DeadInsts;
-    SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
+    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+    SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), DL, "lsr");
 #ifndef NDEBUG
     Rewriter.setDebugType(DEBUG_TYPE);
 #endif
     unsigned numFolded = Rewriter.replaceCongruentIVs(
         L, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), DeadInsts,
-        &getAnalysis<TargetTransformInfo>());
+        &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+            *L->getHeader()->getParent()));
     if (numFolded) {
       Changed = true;
       DeleteTriviallyDeadInstructions(DeadInsts);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index fef5210..ccafd10 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -13,15 +13,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/FunctionTargetTransformInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/CommandLine.h"
@@ -38,6 +41,22 @@ static cl::opt<unsigned>
 UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
   cl::desc("The cut-off point for automatic loop unrolling"));
 
+static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
+    "unroll-max-iteration-count-to-analyze", cl::init(0), cl::Hidden,
+    cl::desc("Don't allow loop unrolling to simulate more than this number of"
+             "iterations when checking full unroll profitability"));
+
+static cl::opt<unsigned> UnrollMinPercentOfOptimized(
+    "unroll-percent-of-optimized-for-complete-unroll", cl::init(20), cl::Hidden,
+    cl::desc("If complete unrolling could trigger further optimizations, and, "
+             "by that, remove the given percent of instructions, perform the "
+             "complete unroll even if it's beyond the threshold"));
+
+static cl::opt<unsigned> UnrollAbsoluteThreshold(
+    "unroll-absolute-threshold", cl::init(2000), cl::Hidden,
+    cl::desc("Don't unroll if the unrolled size is bigger than this threshold,"
+             " even if we can remove big portion of instructions later."));
+
 static cl::opt<unsigned>
 UnrollCount("unroll-count", cl::init(0), cl::Hidden,
   cl::desc("Use this unroll count for all loops including those with "
@@ -63,11 +82,16 @@ namespace {
     static char ID; // Pass ID, replacement for typeid
     LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
       CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
+      CurrentAbsoluteThreshold = UnrollAbsoluteThreshold;
+      CurrentMinPercentOfOptimized = UnrollMinPercentOfOptimized;
       CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
       CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
       CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
 
       UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+      UserAbsoluteThreshold = (UnrollAbsoluteThreshold.getNumOccurrences() > 0);
+      UserPercentOfOptimized =
+          (UnrollMinPercentOfOptimized.getNumOccurrences() > 0);
       UserAllowPartial = (P != -1) ||
                          (UnrollAllowPartial.getNumOccurrences() > 0);
       UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
@@ -91,10 +115,16 @@ namespace {
 
     unsigned CurrentCount;
     unsigned CurrentThreshold;
+    unsigned CurrentAbsoluteThreshold;
+    unsigned CurrentMinPercentOfOptimized;
     bool     CurrentAllowPartial;
     bool     CurrentRuntime;
     bool     UserCount;            // CurrentCount is user-specified.
     bool     UserThreshold;        // CurrentThreshold is user-specified.
+    bool UserAbsoluteThreshold;    // CurrentAbsoluteThreshold is
+                                   // user-specified.
+    bool UserPercentOfOptimized;   // CurrentMinPercentOfOptimized is
+                                   // user-specified.
     bool     UserAllowPartial;     // CurrentAllowPartial is user-specified.
     bool     UserRuntime;          // CurrentRuntime is user-specified.
 
@@ -105,16 +135,15 @@ namespace {
     ///
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<AssumptionCacheTracker>();
-      AU.addRequired<LoopInfo>();
-      AU.addPreserved<LoopInfo>();
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
       AU.addRequired<ScalarEvolution>();
       AU.addPreserved<ScalarEvolution>();
-      AU.addRequired<TargetTransformInfo>();
-      AU.addRequired<FunctionTargetTransformInfo>();
+      AU.addRequired<TargetTransformInfoWrapperPass>();
       // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
       // If loop unroll does not preserve dom info then LCSSA pass on next
       // loop will receive invalid dom info.
@@ -124,9 +153,11 @@ namespace {
 
     // Fill in the UnrollingPreferences parameter with values from the
     // TargetTransformationInfo.
-    void getUnrollingPreferences(Loop *L, const FunctionTargetTransformInfo &FTTI,
+    void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,
                                  TargetTransformInfo::UnrollingPreferences &UP) {
       UP.Threshold = CurrentThreshold;
+      UP.AbsoluteThreshold = CurrentAbsoluteThreshold;
+      UP.MinPercentOfOptimized = CurrentMinPercentOfOptimized;
       UP.OptSizeThreshold = OptSizeUnrollThreshold;
       UP.PartialThreshold = CurrentThreshold;
       UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
@@ -134,7 +165,8 @@ namespace {
       UP.MaxCount = UINT_MAX;
       UP.Partial = CurrentAllowPartial;
       UP.Runtime = CurrentRuntime;
-      FTTI.getUnrollingPreferences(L, UP);
+      UP.AllowExpensiveTripCount = false;
+      TTI.getUnrollingPreferences(L, UP);
     }
 
     // Select and return an unroll count based on parameters from
@@ -153,7 +185,9 @@ namespace {
     // unrolled loops respectively.
     void selectThresholds(const Loop *L, bool HasPragma,
                           const TargetTransformInfo::UnrollingPreferences &UP,
-                          unsigned &Threshold, unsigned &PartialThreshold) {
+                          unsigned &Threshold, unsigned &PartialThreshold,
+                          unsigned &AbsoluteThreshold,
+                          unsigned &PercentOfOptimizedForCompleteUnroll) {
       // Determine the current unrolling threshold.  While this is
       // normally set from UnrollThreshold, it is overridden to a
       // smaller value if the current function is marked as
@@ -161,10 +195,15 @@ namespace {
       // specified.
       Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
       PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
+      AbsoluteThreshold = UserAbsoluteThreshold ? CurrentAbsoluteThreshold
+                                                : UP.AbsoluteThreshold;
+      PercentOfOptimizedForCompleteUnroll = UserPercentOfOptimized
+                                                ? CurrentMinPercentOfOptimized
+                                                : UP.MinPercentOfOptimized;
+
       if (!UserThreshold &&
-          L->getHeader()->getParent()->getAttributes().
-              hasAttribute(AttributeSet::FunctionIndex,
-                           Attribute::OptimizeForSize)) {
+          L->getHeader()->getParent()->hasFnAttribute(
+              Attribute::OptimizeForSize)) {
         Threshold = UP.OptSizeThreshold;
         PartialThreshold = UP.PartialOptSizeThreshold;
       }
@@ -180,15 +219,18 @@ namespace {
               std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold);
       }
     }
+    bool canUnrollCompletely(Loop *L, unsigned Threshold,
+                             unsigned AbsoluteThreshold, uint64_t UnrolledSize,
+                             unsigned NumberOfOptimizedInstructions,
+                             unsigned PercentOfOptimizedForCompleteUnroll);
   };
 }
 
 char LoopUnroll::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(FunctionTargetTransformInfo)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
@@ -203,6 +245,407 @@ Pass *llvm::createSimpleLoopUnrollPass() {
   return llvm::createLoopUnrollPass(-1, -1, 0, 0);
 }
 
+namespace {
+/// \brief SCEV expressions visitor used for finding expressions that would
+/// become constants if the loop L is unrolled.
+struct FindConstantPointers {
+  /// \brief Shows whether the expression is ConstAddress+Constant or not.
+  bool IndexIsConstant;
+
+  /// \brief Used for filtering out SCEV expressions with two or more AddRec
+  /// subexpressions.
+  ///
+  /// Used to filter out complicated SCEV expressions, having several AddRec
+  /// sub-expressions. We don't handle them, because unrolling one loop
+  /// would help to replace only one of these inductions with a constant, and
+  /// consequently, the expression would remain non-constant.
+  bool HaveSeenAR;
+
+  /// \brief If the SCEV expression becomes ConstAddress+Constant, this value
+  /// holds ConstAddress. Otherwise, it's nullptr.
+  Value *BaseAddress;
+
+  /// \brief The loop, which we try to completely unroll.
+  const Loop *L;
+
+  ScalarEvolution &SE;
+
+  FindConstantPointers(const Loop *L, ScalarEvolution &SE)
+      : IndexIsConstant(true), HaveSeenAR(false), BaseAddress(nullptr),
+        L(L), SE(SE) {}
+
+  /// Examine the given expression S and figure out, if it can be a part of an
+  /// expression, that could become a constant after the loop is unrolled.
+  /// The routine sets IndexIsConstant and HaveSeenAR according to the analysis
+  /// results.
+  /// \returns true if we need to examine subexpressions, and false otherwise.
+  bool follow(const SCEV *S) {
+    if (const SCEVUnknown *SC = dyn_cast<SCEVUnknown>(S)) {
+      // We've reached the leaf node of SCEV, it's most probably just a
+      // variable.
+      // If it's the only one SCEV-subexpression, then it might be a base
+      // address of an index expression.
+      // If we've already recorded base address, then just give up on this SCEV
+      // - it's too complicated.
+      if (BaseAddress) {
+        IndexIsConstant = false;
+        return false;
+      }
+      BaseAddress = SC->getValue();
+      return false;
+    }
+    if (isa<SCEVConstant>(S))
+      return false;
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+      // If the current SCEV expression is AddRec, and its loop isn't the loop
+      // we are about to unroll, then we won't get a constant address after
+      // unrolling, and thus, won't be able to eliminate the load.
+      if (AR->getLoop() != L) {
+        IndexIsConstant = false;
+        return false;
+      }
+      // We don't handle multiple AddRecs here, so give up in this case.
+      if (HaveSeenAR) {
+        IndexIsConstant = false;
+        return false;
+      }
+      HaveSeenAR = true;
+    }
+
+    // Continue traversal.
+    return true;
+  }
+  bool isDone() const { return !IndexIsConstant; }
+};
+} // End anonymous namespace.
+
+namespace {
+/// \brief A cache of SCEV results used to optimize repeated queries to SCEV on
+/// the same set of instructions.
+///
+/// The primary cost this saves is the cost of checking the validity of a SCEV
+/// every time it is looked up. However, in some cases we can provide a reduced
+/// and especially useful model for an instruction based upon SCEV that is
+/// non-trivial to compute but more useful to clients.
+class SCEVCache {
+public:
+  /// \brief Struct to represent a GEP whose start and step are known fixed
+  /// offsets from a base address due to SCEV's analysis.
+  struct GEPDescriptor {
+    Value *BaseAddr = nullptr;
+    unsigned Start = 0;
+    unsigned Step = 0;
+  };
+
+  Optional<GEPDescriptor> getGEPDescriptor(GetElementPtrInst *GEP);
+
+  SCEVCache(const Loop &L, ScalarEvolution &SE) : L(L), SE(SE) {}
+
+private:
+  const Loop &L;
+  ScalarEvolution &SE;
+
+  SmallDenseMap<GetElementPtrInst *, GEPDescriptor> GEPDescriptors;
+};
+} // End anonymous namespace.
+
+/// \brief Get a simplified descriptor for a GEP instruction.
+///
+/// Where possible, this produces a simplified descriptor for a GEP instruction
+/// using SCEV analysis of the containing loop. If this isn't possible, it
+/// returns an empty optional.
+///
+/// The model is a base address, an initial offset, and a per-iteration step.
+/// This fits very common patterns of GEPs inside loops and is something we can
+/// use to simulate the behavior of a particular iteration of a loop.
+///
+/// This is a cached interface. The first call may do non-trivial work to
+/// compute the result, but all subsequent calls will return a fast answer
+/// based on a cached result. This includes caching negative results.
+Optional<SCEVCache::GEPDescriptor>
+SCEVCache::getGEPDescriptor(GetElementPtrInst *GEP) {
+  decltype(GEPDescriptors)::iterator It;
+  bool Inserted;
+
+  std::tie(It, Inserted) = GEPDescriptors.insert({GEP, {}});
+
+  if (!Inserted) {
+    if (!It->second.BaseAddr)
+      return None;
+
+    return It->second;
+  }
+
+  // We've inserted a new record into the cache, so compute the GEP descriptor
+  // if possible.
+  Value *V = cast<Value>(GEP);
+  if (!SE.isSCEVable(V->getType()))
+    return None;
+  const SCEV *S = SE.getSCEV(V);
+
+  // FIXME: It'd be nice if the worklist and set used by the
+  // SCEVTraversal could be re-used between loop iterations, but the
+  // interface doesn't support that. There is no way to clear the visited
+  // sets between uses.
+  FindConstantPointers Visitor(&L, SE);
+  SCEVTraversal<FindConstantPointers> T(Visitor);
+
+  // Try to find (BaseAddress+Step+Offset) tuple.
+  // If succeeded, save it to the cache - it might help in folding
+  // loads.
+  T.visitAll(S);
+  if (!Visitor.IndexIsConstant || !Visitor.BaseAddress)
+    return None;
+
+  const SCEV *BaseAddrSE = SE.getSCEV(Visitor.BaseAddress);
+  if (BaseAddrSE->getType() != S->getType())
+    return None;
+  const SCEV *OffSE = SE.getMinusSCEV(S, BaseAddrSE);
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OffSE);
+
+  if (!AR)
+    return None;
+
+  const SCEVConstant *StepSE =
+      dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE));
+  const SCEVConstant *StartSE = dyn_cast<SCEVConstant>(AR->getStart());
+  if (!StepSE || !StartSE)
+    return None;
+
+  // Check and skip caching if doing so would require lots of bits to
+  // avoid overflow.
+  APInt Start = StartSE->getValue()->getValue();
+  APInt Step = StepSE->getValue()->getValue();
+  if (Start.getActiveBits() > 32 || Step.getActiveBits() > 32)
+    return None;
+
+  // We found a cacheable SCEV model for the GEP.
+  It->second.BaseAddr = Visitor.BaseAddress;
+  It->second.Start = Start.getLimitedValue();
+  It->second.Step = Step.getLimitedValue();
+  return It->second;
+}
+
+namespace {
+// This class is used to get an estimate of the optimization effects that we
+// could get from complete loop unrolling. It comes from the fact that some
+// loads might be replaced with concrete constant values and that could trigger
+// a chain of instruction simplifications.
+//
+// E.g. we might have:
+//   int a[] = {0, 1, 0};
+//   v = 0;
+//   for (i = 0; i < 3; i ++)
+//     v += b[i]*a[i];
+// If we completely unroll the loop, we would get:
+//   v = b[0]*a[0] + b[1]*a[1] + b[2]*a[2]
+// Which then will be simplified to:
+//   v = b[0]* 0 + b[1]* 1 + b[2]* 0
+// And finally:
+//   v = b[1]
+class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
+  typedef InstVisitor<UnrolledInstAnalyzer, bool> Base;
+  friend class InstVisitor<UnrolledInstAnalyzer, bool>;
+
+public:
+  UnrolledInstAnalyzer(unsigned Iteration,
+                       DenseMap<Value *, Constant *> &SimplifiedValues,
+                       SCEVCache &SC)
+      : Iteration(Iteration), SimplifiedValues(SimplifiedValues), SC(SC) {}
+
+  // Allow access to the initial visit method.
+  using Base::visit;
+
+private:
+  /// \brief Number of currently simulated iteration.
+  ///
+  /// If an expression is ConstAddress+Constant, then the Constant is
+  /// Start + Iteration*Step, where Start and Step could be obtained from
+  /// SCEVGEPCache.
+  unsigned Iteration;
+
+  // While we walk the loop instructions, we we build up and maintain a mapping
+  // of simplified values specific to this iteration.  The idea is to propagate
+  // any special information we have about loads that can be replaced with
+  // constants after complete unrolling, and account for likely simplifications
+  // post-unrolling.
+  DenseMap<Value *, Constant *> &SimplifiedValues;
+
+  // We use a cache to wrap all our SCEV queries.
+  SCEVCache &SC;
+
+  /// Base case for the instruction visitor.
+  bool visitInstruction(Instruction &I) { return false; };
+
+  /// TODO: Add visitors for other instruction types, e.g. ZExt, SExt.
+
+  /// Try to simplify binary operator I.
+  ///
+  /// TODO: Probaly it's worth to hoist the code for estimating the
+  /// simplifications effects to a separate class, since we have a very similar
+  /// code in InlineCost already.
+  bool visitBinaryOperator(BinaryOperator &I) {
+    Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+    if (!isa<Constant>(LHS))
+      if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+        LHS = SimpleLHS;
+    if (!isa<Constant>(RHS))
+      if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+        RHS = SimpleRHS;
+    Value *SimpleV = nullptr;
+    const DataLayout &DL = I.getModule()->getDataLayout();
+    if (auto FI = dyn_cast<FPMathOperator>(&I))
+      SimpleV =
+          SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
+    else
+      SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
+
+    if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
+      SimplifiedValues[&I] = C;
+
+    return SimpleV;
+  }
+
+  /// Try to fold load I.
+  bool visitLoad(LoadInst &I) {
+    Value *AddrOp = I.getPointerOperand();
+    if (!isa<Constant>(AddrOp))
+      if (Constant *SimplifiedAddrOp = SimplifiedValues.lookup(AddrOp))
+        AddrOp = SimplifiedAddrOp;
+
+    auto *GEP = dyn_cast<GetElementPtrInst>(AddrOp);
+    if (!GEP)
+      return false;
+    auto OptionalGEPDesc = SC.getGEPDescriptor(GEP);
+    if (!OptionalGEPDesc)
+      return false;
+
+    auto GV = dyn_cast<GlobalVariable>(OptionalGEPDesc->BaseAddr);
+    // We're only interested in loads that can be completely folded to a
+    // constant.
+    if (!GV || !GV->hasInitializer())
+      return false;
+
+    ConstantDataSequential *CDS =
+        dyn_cast<ConstantDataSequential>(GV->getInitializer());
+    if (!CDS)
+      return false;
+
+    // This calculation should never overflow because we bound Iteration quite
+    // low and both the start and step are 32-bit integers. We use signed
+    // integers so that UBSan will catch if a bug sneaks into the code.
+    int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
+    int64_t Index = ((int64_t)OptionalGEPDesc->Start +
+                     (int64_t)OptionalGEPDesc->Step * (int64_t)Iteration) /
+                    ElemSize;
+    if (Index >= CDS->getNumElements()) {
+      // FIXME: For now we conservatively ignore out of bound accesses, but
+      // we're allowed to perform the optimization in this case.
+      return false;
+    }
+
+    Constant *CV = CDS->getElementAsConstant(Index);
+    assert(CV && "Constant expected.");
+    SimplifiedValues[&I] = CV;
+
+    return true;
+  }
+};
+} // namespace
+
+
+namespace {
+struct EstimatedUnrollCost {
+  /// \brief Count the number of optimized instructions.
+  unsigned NumberOfOptimizedInstructions;
+
+  /// \brief Count the total number of instructions.
+  unsigned UnrolledLoopSize;
+};
+}
+
+/// \brief Figure out if the loop is worth full unrolling.
+///
+/// Complete loop unrolling can make some loads constant, and we need to know
+/// if that would expose any further optimization opportunities.  This routine
+/// estimates this optimization.  It assigns computed number of instructions,
+/// that potentially might be optimized away, to
+/// NumberOfOptimizedInstructions, and total number of instructions to
+/// UnrolledLoopSize (not counting blocks that won't be reached, if we were
+/// able to compute the condition).
+/// \returns false if we can't analyze the loop, or if we discovered that
+/// unrolling won't give anything. Otherwise, returns true.
+Optional<EstimatedUnrollCost>
+analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
+                      const TargetTransformInfo &TTI,
+                      unsigned MaxUnrolledLoopSize) {
+  // We want to be able to scale offsets by the trip count and add more offsets
+  // to them without checking for overflows, and we already don't want to
+  // analyze *massive* trip counts, so we force the max to be reasonably small.
+  assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
+         "The unroll iterations max is too large!");
+
+  // Don't simulate loops with a big or unknown tripcount
+  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
+      TripCount > UnrollMaxIterationsCountToAnalyze)
+    return None;
+
+  SmallSetVector<BasicBlock *, 16> BBWorklist;
+  DenseMap<Value *, Constant *> SimplifiedValues;
+
+  // Use a cache to access SCEV expressions so that we don't pay the cost on
+  // each iteration. This cache is lazily self-populating.
+  SCEVCache SC(*L, SE);
+
+  unsigned NumberOfOptimizedInstructions = 0;
+  unsigned UnrolledLoopSize = 0;
+
+  // Simulate execution of each iteration of the loop counting instructions,
+  // which would be simplified.
+  // Since the same load will take different values on different iterations,
+  // we literally have to go through all loop's iterations.
+  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
+    SimplifiedValues.clear();
+    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SC);
+
+    BBWorklist.clear();
+    BBWorklist.insert(L->getHeader());
+    // Note that we *must not* cache the size, this loop grows the worklist.
+    for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
+      BasicBlock *BB = BBWorklist[Idx];
+
+      // Visit all instructions in the given basic block and try to simplify
+      // it.  We don't change the actual IR, just count optimization
+      // opportunities.
+      for (Instruction &I : *BB) {
+        UnrolledLoopSize += TTI.getUserCost(&I);
+
+        // Visit the instruction to analyze its loop cost after unrolling,
+        // and if the visitor returns true, then we can optimize this
+        // instruction away.
+        if (Analyzer.visit(I))
+          NumberOfOptimizedInstructions += TTI.getUserCost(&I);
+
+        // If unrolled body turns out to be too big, bail out.
+        if (UnrolledLoopSize - NumberOfOptimizedInstructions >
+            MaxUnrolledLoopSize)
+          return None;
+      }
+
+      // Add BB's successors to the worklist.
+      for (BasicBlock *Succ : successors(BB))
+        if (L->contains(Succ))
+          BBWorklist.insert(Succ);
+    }
+
+    // If we found no optimization opportunities on the first iteration, we
+    // won't find them on later ones too.
+    if (!NumberOfOptimizedInstructions)
+      return None;
+  }
+  return {{NumberOfOptimizedInstructions, UnrolledLoopSize}};
+}
+
 /// ApproximateLoopSize - Approximate the size of the loop.
 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
                                     bool &NotDuplicatable,
@@ -234,44 +677,31 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
 // Returns the loop hint metadata node with the given name (for example,
 // "llvm.loop.unroll.count").  If no such metadata node exists, then nullptr is
 // returned.
-static const MDNode *GetUnrollMetadata(const Loop *L, StringRef Name) {
-  MDNode *LoopID = L->getLoopID();
-  if (!LoopID)
-    return nullptr;
-
-  // First operand should refer to the loop id itself.
-  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
-  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
-
-  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
-    const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
-    if (!MD)
-      continue;
-
-    const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
-    if (!S)
-      continue;
-
-    if (Name.equals(S->getString()))
-      return MD;
-  }
+static MDNode *GetUnrollMetadataForLoop(const Loop *L, StringRef Name) {
+  if (MDNode *LoopID = L->getLoopID())
+    return GetUnrollMetadata(LoopID, Name);
   return nullptr;
 }
 
 // Returns true if the loop has an unroll(full) pragma.
 static bool HasUnrollFullPragma(const Loop *L) {
-  return GetUnrollMetadata(L, "llvm.loop.unroll.full");
+  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
 }
 
 // Returns true if the loop has an unroll(disable) pragma.
 static bool HasUnrollDisablePragma(const Loop *L) {
-  return GetUnrollMetadata(L, "llvm.loop.unroll.disable");
+  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
+}
+
+// Returns true if the loop has an runtime unroll(disable) pragma.
+static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
+  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
 }
 
 // If loop has an unroll_count pragma return the (necessarily
 // positive) value from the pragma.  Otherwise return 0.
 static unsigned UnrollCountPragmaValue(const Loop *L) {
-  const MDNode *MD = GetUnrollMetadata(L, "llvm.loop.unroll.count");
+  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
   if (MD) {
     assert(MD->getNumOperands() == 2 &&
            "Unroll count hint metadata should have two operands.");
@@ -319,6 +749,49 @@ static void SetLoopAlreadyUnrolled(Loop *L) {
   L->setLoopID(NewLoopID);
 }
 
+bool LoopUnroll::canUnrollCompletely(
+    Loop *L, unsigned Threshold, unsigned AbsoluteThreshold,
+    uint64_t UnrolledSize, unsigned NumberOfOptimizedInstructions,
+    unsigned PercentOfOptimizedForCompleteUnroll) {
+
+  if (Threshold == NoThreshold) {
+    DEBUG(dbgs() << "  Can fully unroll, because no threshold is set.\n");
+    return true;
+  }
+
+  if (UnrolledSize <= Threshold) {
+    DEBUG(dbgs() << "  Can fully unroll, because unrolled size: "
+                 << UnrolledSize << "<" << Threshold << "\n");
+    return true;
+  }
+
+  assert(UnrolledSize && "UnrolledSize can't be 0 at this point.");
+  unsigned PercentOfOptimizedInstructions =
+      (uint64_t)NumberOfOptimizedInstructions * 100ull / UnrolledSize;
+
+  if (UnrolledSize <= AbsoluteThreshold &&
+      PercentOfOptimizedInstructions >= PercentOfOptimizedForCompleteUnroll) {
+    DEBUG(dbgs() << "  Can fully unroll, because unrolling will help removing "
+                 << PercentOfOptimizedInstructions
+                 << "% instructions (threshold: "
+                 << PercentOfOptimizedForCompleteUnroll << "%)\n");
+    DEBUG(dbgs() << "  Unrolled size (" << UnrolledSize
+                 << ") is less than the threshold (" << AbsoluteThreshold
+                 << ").\n");
+    return true;
+  }
+
+  DEBUG(dbgs() << "  Too large to fully unroll:\n");
+  DEBUG(dbgs() << "    Unrolled size: " << UnrolledSize << "\n");
+  DEBUG(dbgs() << "    Estimated number of optimized instructions: "
+               << NumberOfOptimizedInstructions << "\n");
+  DEBUG(dbgs() << "    Absolute threshold: " << AbsoluteThreshold << "\n");
+  DEBUG(dbgs() << "    Minimum percent of removed instructions: "
+               << PercentOfOptimizedForCompleteUnroll << "\n");
+  DEBUG(dbgs() << "    Threshold for small loops: " << Threshold << "\n");
+  return false;
+}
+
 unsigned LoopUnroll::selectUnrollCount(
     const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
     unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP,
@@ -363,13 +836,13 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (skipOptnoneFunction(L))
     return false;
 
-  LoopInfo *LI = &getAnalysis<LoopInfo>();
+  Function &F = *L->getHeader()->getParent();
+
+  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
-  const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
-  const FunctionTargetTransformInfo &FTTI =
-      getAnalysis<FunctionTargetTransformInfo>();
-  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
-      *L->getHeader()->getParent());
+  const TargetTransformInfo &TTI =
+      getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
 
   BasicBlock *Header = L->getHeader();
   DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
@@ -383,7 +856,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   bool HasPragma = PragmaFullUnroll || PragmaCount > 0;
 
   TargetTransformInfo::UnrollingPreferences UP;
-  getUnrollingPreferences(L, FTTI, UP);
+  getUnrollingPreferences(L, TTI, UP);
 
   // Find trip count and trip multiple if count is not available
   unsigned TripCount = 0;
@@ -426,20 +899,33 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   unsigned Threshold, PartialThreshold;
-  selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold);
+  unsigned AbsoluteThreshold, PercentOfOptimizedForCompleteUnroll;
+  selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold,
+                   AbsoluteThreshold, PercentOfOptimizedForCompleteUnroll);
 
   // Given Count, TripCount and thresholds determine the type of
   // unrolling which is to be performed.
   enum { Full = 0, Partial = 1, Runtime = 2 };
   int Unrolling;
   if (TripCount && Count == TripCount) {
-    if (Threshold != NoThreshold && UnrolledSize > Threshold) {
-      DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
-                   << " because size: " << UnrolledSize << ">" << Threshold
-                   << "\n");
-      Unrolling = Partial;
-    } else {
+    Unrolling = Partial;
+    // If the loop is really small, we don't need to run an expensive analysis.
+    if (canUnrollCompletely(
+            L, Threshold, AbsoluteThreshold,
+            UnrolledSize, 0, 100)) {
       Unrolling = Full;
+    } else {
+      // The loop isn't that small, but we still can fully unroll it if that
+      // helps to remove a significant number of instructions.
+      // To check that, run additional analysis on the loop.
+      if (Optional<EstimatedUnrollCost> Cost =
+              analyzeLoopUnrollCost(L, TripCount, *SE, TTI, AbsoluteThreshold))
+        if (canUnrollCompletely(L, Threshold, AbsoluteThreshold,
+                                Cost->UnrolledLoopSize,
+                                Cost->NumberOfOptimizedInstructions,
+                                PercentOfOptimizedForCompleteUnroll)) {
+          Unrolling = Full;
+        }
     }
   } else if (TripCount && Count < TripCount) {
     Unrolling = Partial;
@@ -450,6 +936,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   // Reduce count based on the type of unrolling and the threshold values.
   unsigned OriginalCount = Count;
   bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime;
+  if (HasRuntimeUnrollDisablePragma(L)) {
+    AllowRuntime = false;
+  }
   if (Unrolling == Partial) {
     bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
     if (!AllowPartial && !CountSetExplicitly) {
@@ -518,8 +1007,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   // Unroll the loop.
-  if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this,
-                  &LPM, &AC))
+  if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
+                  TripMultiple, LI, this, &LPM, &AC))
     return false;
 
   return true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 9f4c122..988d2af 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -42,6 +42,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -170,13 +171,13 @@ namespace {
       AU.addRequired<AssumptionCacheTracker>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
-      AU.addRequired<LoopInfo>();
-      AU.addPreserved<LoopInfo>();
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
       AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addPreserved<ScalarEvolution>();
-      AU.addRequired<TargetTransformInfo>();
+      AU.addRequired<TargetTransformInfoWrapperPass>();
     }
 
   private:
@@ -333,10 +334,10 @@ void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,
 char LoopUnswitch::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
                       false, false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
                       false, false)
@@ -387,7 +388,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
 
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
       *L->getHeader()->getParent());
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   LPM = &LPM_Ref;
   DominatorTreeWrapperPass *DTWP =
       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
@@ -432,8 +433,10 @@ bool LoopUnswitch::processCurrentLoop() {
 
   // Probably we reach the quota of branches for this loop. If so
   // stop unswitching.
-  if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>(),
-                              AC))
+  if (!BranchesInfo.countLoop(
+          currentLoop, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+                           *currentLoop->getHeader()->getParent()),
+          AC))
     return false;
 
   // Loop over all of the basic blocks in the loop.  If we find an interior
@@ -655,9 +658,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
   // Check to see if it would be profitable to unswitch current loop.
 
   // Do not do non-trivial unswitch while optimizing for size.
-  if (OptimizeForSize ||
-      F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                      Attribute::OptimizeForSize))
+  if (OptimizeForSize || F->hasFnAttribute(Attribute::OptimizeForSize))
     return false;
 
   UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
@@ -675,7 +676,7 @@ static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I)
     if (LI->getLoopFor(*I) == L)
-      New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), LI->getBase());
+      New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
 
   // Add all of the subloops to the new loop.
   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
@@ -706,8 +707,9 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
 
   // If either edge is critical, split it. This helps preserve LoopSimplify
   // form for enclosing loops.
-  SplitCriticalEdge(BI, 0, this, false, false, true);
-  SplitCriticalEdge(BI, 1, this, false, false, true);
+  auto Options = CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA();
+  SplitCriticalEdge(BI, 0, Options);
+  SplitCriticalEdge(BI, 1, Options);
 }
 
 /// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
@@ -726,7 +728,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
   // First step, split the preheader, so that we know that there is a safe place
   // to insert the conditional branch.  We will change loopPreheader to have a
   // conditional branch on Cond.
-  BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, this);
+  BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, DT, LI);
 
   // Now that we have a place to insert the conditional branch, create a place
   // to branch to: this is the exit block out of the loop that we should
@@ -737,7 +739,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
   // without actually branching to it (the exit block should be dominated by the
   // loop header, not the preheader).
   assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
-  BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this);
+  BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), DT, LI);
 
   // Okay, now we have a position to branch from and a position to branch to,
   // insert the new conditional branch.
@@ -768,13 +770,9 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
 
     // Although SplitBlockPredecessors doesn't preserve loop-simplify in
     // general, if we call it on all predecessors of all exits then it does.
-    if (!ExitBlock->isLandingPad()) {
-      SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", this);
-    } else {
-      SmallVector<BasicBlock*, 2> NewBBs;
-      SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa",
-                                  this, NewBBs);
-    }
+    SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa",
+                           /*AliasAnalysis*/ nullptr, DT, LI,
+                           /*PreserveLCSSA*/ true);
   }
 }
 
@@ -797,7 +795,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
 
   // First step, split the preheader and exit blocks, and add these blocks to
   // the LoopBlocks list.
-  BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
+  BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, DT, LI);
   LoopBlocks.push_back(NewPreheader);
 
   // We want the loop to come after the preheader, but before the exit blocks.
@@ -850,14 +848,14 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   if (ParentLoop) {
     // Make sure to add the cloned preheader and exit blocks to the parent loop
     // as well.
-    ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
+    ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI);
   }
 
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
     BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
     // The new exit block should be in the same loop as the old one.
     if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
-      ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
+      ExitBBLoop->addBasicBlockToLoop(NewExit, *LI);
 
     assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
            "Exit block should have been split to have one successor!");
@@ -1043,7 +1041,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
     // and hooked up so as to preserve the loop structure, because
     // trying to update it is complicated.  So instead we preserve the
     // loop structure and put the block on a dead code path.
-    SplitEdge(Switch, SISucc, this);
+    SplitEdge(Switch, SISucc, DT, LI);
     // Compute the successors instead of relying on the return value
     // of SplitEdge, since it may have split the switch successor
     // after PHI nodes.
@@ -1085,6 +1083,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
 /// pass.
 ///
 void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
+  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
   while (!Worklist.empty()) {
     Instruction *I = Worklist.back();
     Worklist.pop_back();
@@ -1107,7 +1106,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     // See if instruction simplification can hack this up.  This is common for
     // things like "select false, X, Y" after unswitching made the condition be
     // 'false'.  TODO: update the domtree properly so we can pass it here.
-    if (Value *V = SimplifyInstruction(I))
+    if (Value *V = SimplifyInstruction(I, DL))
       if (LI->replacementPreservesLCSSAForm(I, V)) {
         ReplaceUsesOfWith(I, V, Worklist, L, LPM);
         continue;
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index ff89e74..0c47cbd 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -11,7 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
@@ -24,13 +25,14 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include <vector>
+#include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
 
 #define DEBUG_TYPE "lower-expect-intrinsic"
 
-STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled");
+STATISTIC(ExpectIntrinsicsHandled,
+          "Number of 'expect' intrinsic instructions handled");
 
 static cl::opt<uint32_t>
 LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
@@ -39,27 +41,8 @@ static cl::opt<uint32_t>
 UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4),
                    cl::desc("Weight of the branch unlikely to be taken (default = 4)"));
 
-namespace {
-
-  class LowerExpectIntrinsic : public FunctionPass {
-
-    bool HandleSwitchExpect(SwitchInst *SI);
-
-    bool HandleIfExpect(BranchInst *BI);
-
-  public:
-    static char ID;
-    LowerExpectIntrinsic() : FunctionPass(ID) {
-      initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
-    }
-
-    bool runOnFunction(Function &F) override;
-  };
-}
-
-
-bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
-  CallInst *CI = dyn_cast<CallInst>(SI->getCondition());
+static bool handleSwitchExpect(SwitchInst &SI) {
+  CallInst *CI = dyn_cast<CallInst>(SI.getCondition());
   if (!CI)
     return false;
 
@@ -72,26 +55,24 @@ bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
   if (!ExpectedValue)
     return false;
 
-  SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue);
-  unsigned n = SI->getNumCases(); // +1 for default case.
-  std::vector<uint32_t> Weights(n + 1);
+  SwitchInst::CaseIt Case = SI.findCaseValue(ExpectedValue);
+  unsigned n = SI.getNumCases(); // +1 for default case.
+  SmallVector<uint32_t, 16> Weights(n + 1, UnlikelyBranchWeight);
 
-  Weights[0] = Case == SI->case_default() ? LikelyBranchWeight
-                                          : UnlikelyBranchWeight;
-  for (unsigned i = 0; i != n; ++i)
-    Weights[i + 1] = i == Case.getCaseIndex() ? LikelyBranchWeight
-                                              : UnlikelyBranchWeight;
+  if (Case == SI.case_default())
+    Weights[0] = LikelyBranchWeight;
+  else
+    Weights[Case.getCaseIndex() + 1] = LikelyBranchWeight;
 
-  SI->setMetadata(LLVMContext::MD_prof,
-                  MDBuilder(CI->getContext()).createBranchWeights(Weights));
+  SI.setMetadata(LLVMContext::MD_prof,
+                 MDBuilder(CI->getContext()).createBranchWeights(Weights));
 
-  SI->setCondition(ArgValue);
+  SI.setCondition(ArgValue);
   return true;
 }
 
-
-bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
-  if (BI->isUnconditional())
+static bool handleBranchExpect(BranchInst &BI) {
+  if (BI.isUnconditional())
     return false;
 
   // Handle non-optimized IR code like:
@@ -105,9 +86,9 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
 
   CallInst *CI;
 
-  ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
+  ICmpInst *CmpI = dyn_cast<ICmpInst>(BI.getCondition());
   if (!CmpI) {
-    CI = dyn_cast<CallInst>(BI->getCondition());
+    CI = dyn_cast<CallInst>(BI.getCondition());
   } else {
     if (CmpI->getPredicate() != CmpInst::ICMP_NE)
       return false;
@@ -136,32 +117,30 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
   else
     Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight);
 
-  BI->setMetadata(LLVMContext::MD_prof, Node);
+  BI.setMetadata(LLVMContext::MD_prof, Node);
 
   if (CmpI)
     CmpI->setOperand(0, ArgValue);
   else
-    BI->setCondition(ArgValue);
+    BI.setCondition(ArgValue);
   return true;
 }
 
+static bool lowerExpectIntrinsic(Function &F) {
+  bool Changed = false;
 
-bool LowerExpectIntrinsic::runOnFunction(Function &F) {
-  for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
-    BasicBlock *BB = I++;
-
+  for (BasicBlock &BB : F) {
     // Create "block_weights" metadata.
-    if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
-      if (HandleIfExpect(BI))
-        IfHandled++;
-    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
-      if (HandleSwitchExpect(SI))
-        IfHandled++;
+    if (BranchInst *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
+      if (handleBranchExpect(*BI))
+        ExpectIntrinsicsHandled++;
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
+      if (handleSwitchExpect(*SI))
+        ExpectIntrinsicsHandled++;
     }
 
     // remove llvm.expect intrinsics.
-    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
-         BI != BE; ) {
+    for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
       CallInst *CI = dyn_cast<CallInst>(BI++);
       if (!CI)
         continue;
@@ -171,17 +150,42 @@ bool LowerExpectIntrinsic::runOnFunction(Function &F) {
         Value *Exp = CI->getArgOperand(0);
         CI->replaceAllUsesWith(Exp);
         CI->eraseFromParent();
+        Changed = true;
       }
     }
   }
 
-  return false;
+  return Changed;
 }
 
+PreservedAnalyses LowerExpectIntrinsicPass::run(Function &F) {
+  if (lowerExpectIntrinsic(F))
+    return PreservedAnalyses::none();
+
+  return PreservedAnalyses::all();
+}
+
+namespace {
+/// \brief Legacy pass for lowering expect intrinsics out of the IR.
+///
+/// When this pass is run over a function it uses expect intrinsics which feed
+/// branches and switches to provide branch weight metadata for those
+/// terminators. It then removes the expect intrinsics from the IR so the rest
+/// of the optimizer can ignore them.
+class LowerExpectIntrinsic : public FunctionPass {
+public:
+  static char ID;
+  LowerExpectIntrinsic() : FunctionPass(ID) {
+    initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override { return lowerExpectIntrinsic(F); }
+};
+}
 
 char LowerExpectIntrinsic::ID = 0;
-INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' "
-                "Intrinsics", false, false)
+INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect",
+                "Lower 'expect' Intrinsics", false, false)
 
 FunctionPass *llvm::createLowerExpectIntrinsicPass() {
   return new LowerExpectIntrinsic();
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 3eea3d4..66d6ac6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
@@ -28,7 +29,6 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <list>
 using namespace llvm;
@@ -41,7 +41,8 @@ STATISTIC(NumMoveToCpy,   "Number of memmoves converted to memcpy");
 STATISTIC(NumCpyToSet,    "Number of memcpys converted to memset");
 
 static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
-                                  bool &VariableIdxFound, const DataLayout &TD){
+                                  bool &VariableIdxFound,
+                                  const DataLayout &DL) {
   // Skip over the first indices.
   gep_type_iterator GTI = gep_type_begin(GEP);
   for (unsigned i = 1; i != Idx; ++i, ++GTI)
@@ -57,13 +58,13 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
 
     // Handle struct indices, which add their field offset to the pointer.
     if (StructType *STy = dyn_cast<StructType>(*GTI)) {
-      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+      Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
       continue;
     }
 
     // Otherwise, we have a sequential type like an array or vector.  Multiply
     // the index by the ElementSize.
-    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+    uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
     Offset += Size*OpC->getSExtValue();
   }
 
@@ -74,7 +75,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
 /// constant offset, and return that constant offset.  For example, Ptr1 might
 /// be &A[42], and Ptr2 might be &A[40].  In this case offset would be -8.
 static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
-                            const DataLayout &TD) {
+                            const DataLayout &DL) {
   Ptr1 = Ptr1->stripPointerCasts();
   Ptr2 = Ptr2->stripPointerCasts();
 
@@ -92,12 +93,12 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
   // If one pointer is a GEP and the other isn't, then see if the GEP is a
   // constant offset from the base, as in "P" and "gep P, 1".
   if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
-    Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
+    Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL);
     return !VariableIdxFound;
   }
 
   if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
-    Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
+    Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL);
     return !VariableIdxFound;
   }
 
@@ -115,8 +116,8 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
     if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
       break;
 
-  int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
-  int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
+  int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL);
+  int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL);
   if (VariableIdxFound) return false;
 
   Offset = Offset2-Offset1;
@@ -150,12 +151,11 @@ struct MemsetRange {
   /// TheStores - The actual stores that make up this range.
   SmallVector<Instruction*, 16> TheStores;
 
-  bool isProfitableToUseMemset(const DataLayout &TD) const;
-
+  bool isProfitableToUseMemset(const DataLayout &DL) const;
 };
 } // end anon namespace
 
-bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
+bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
   // If we found more than 4 stores to merge or 16 bytes, use memset.
   if (TheStores.size() >= 4 || End-Start >= 16) return true;
 
@@ -183,7 +183,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
   // size. If so, check to see whether we will end up actually reducing the
   // number of stores used.
   unsigned Bytes = unsigned(End-Start);
-  unsigned MaxIntSize = TD.getLargestLegalIntTypeSize();
+  unsigned MaxIntSize = DL.getLargestLegalIntTypeSize();
   if (MaxIntSize == 0)
     MaxIntSize = 1;
   unsigned NumPointerStores = Bytes / MaxIntSize;
@@ -314,14 +314,12 @@ namespace {
   class MemCpyOpt : public FunctionPass {
     MemoryDependenceAnalysis *MD;
     TargetLibraryInfo *TLI;
-    const DataLayout *DL;
   public:
     static char ID; // Pass identification, replacement for typeid
     MemCpyOpt() : FunctionPass(ID) {
       initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
       MD = nullptr;
       TLI = nullptr;
-      DL = nullptr;
     }
 
     bool runOnFunction(Function &F) override;
@@ -334,7 +332,7 @@ namespace {
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addRequired<MemoryDependenceAnalysis>();
       AU.addRequired<AliasAnalysis>();
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<MemoryDependenceAnalysis>();
     }
@@ -346,8 +344,9 @@ namespace {
     bool processMemMove(MemMoveInst *M);
     bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
                               uint64_t cpyLen, unsigned cpyAlign, CallInst *C);
-    bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
-                                       uint64_t MSize);
+    bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
+    bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
+    bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
     bool processByValArgument(CallSite CS, unsigned ArgNo);
     Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
                                       Value *ByteVal);
@@ -366,7 +365,7 @@ INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
                     false, false)
@@ -377,13 +376,13 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
 /// attempts to merge them together into a memcpy/memset.
 Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
                                              Value *StartPtr, Value *ByteVal) {
-  if (!DL) return nullptr;
+  const DataLayout &DL = StartInst->getModule()->getDataLayout();
 
   // Okay, so we now have a single store that can be splatable.  Scan to find
   // all subsequent stores of the same value to offset from the same pointer.
   // Join these together into ranges, so we can decide whether contiguous blocks
   // are stored.
-  MemsetRanges Ranges(*DL);
+  MemsetRanges Ranges(DL);
 
   BasicBlock::iterator BI = StartInst;
   for (++BI; !isa<TerminatorInst>(BI); ++BI) {
@@ -406,8 +405,8 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
 
       // Check to see if this store is to a constant offset from the start ptr.
       int64_t Offset;
-      if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
-                           Offset, *DL))
+      if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset,
+                           DL))
         break;
 
       Ranges.addStore(Offset, NextStore);
@@ -420,7 +419,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
 
       // Check to see if this store is to a constant offset from the start ptr.
       int64_t Offset;
-      if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *DL))
+      if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL))
         break;
 
       Ranges.addMemSet(Offset, MSI);
@@ -452,7 +451,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
     if (Range.TheStores.size() == 1) continue;
 
     // If it is profitable to lower this range to memset, do so now.
-    if (!Range.isProfitableToUseMemset(*DL))
+    if (!Range.isProfitableToUseMemset(DL))
       continue;
 
     // Otherwise, we do want to transform this!  Create a new memset.
@@ -464,7 +463,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
     if (Alignment == 0) {
       Type *EltType =
         cast<PointerType>(StartPtr->getType())->getElementType();
-      Alignment = DL->getABITypeAlignment(EltType);
+      Alignment = DL.getABITypeAlignment(EltType);
     }
 
     AMemSet =
@@ -494,8 +493,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
 
 bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
   if (!SI->isSimple()) return false;
-
-  if (!DL) return false;
+  const DataLayout &DL = SI->getModule()->getDataLayout();
 
   // Detect cases where we're performing call slot forwarding, but
   // happen to be using a load-store pair to implement it, rather than
@@ -525,16 +523,16 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
       if (C) {
         unsigned storeAlign = SI->getAlignment();
         if (!storeAlign)
-          storeAlign = DL->getABITypeAlignment(SI->getOperand(0)->getType());
+          storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
         unsigned loadAlign = LI->getAlignment();
         if (!loadAlign)
-          loadAlign = DL->getABITypeAlignment(LI->getType());
+          loadAlign = DL.getABITypeAlignment(LI->getType());
 
-        bool changed = performCallSlotOptzn(LI,
-                        SI->getPointerOperand()->stripPointerCasts(),
-                        LI->getPointerOperand()->stripPointerCasts(),
-                        DL->getTypeStoreSize(SI->getOperand(0)->getType()),
-                        std::min(storeAlign, loadAlign), C);
+        bool changed = performCallSlotOptzn(
+            LI, SI->getPointerOperand()->stripPointerCasts(),
+            LI->getPointerOperand()->stripPointerCasts(),
+            DL.getTypeStoreSize(SI->getOperand(0)->getType()),
+            std::min(storeAlign, loadAlign), C);
         if (changed) {
           MD->removeInstruction(SI);
           SI->eraseFromParent();
@@ -606,15 +604,13 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
   if (!srcAlloca)
     return false;
 
-  // Check that all of src is copied to dest.
-  if (!DL) return false;
-
   ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
   if (!srcArraySize)
     return false;
 
-  uint64_t srcSize = DL->getTypeAllocSize(srcAlloca->getAllocatedType()) *
-    srcArraySize->getZExtValue();
+  const DataLayout &DL = cpy->getModule()->getDataLayout();
+  uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
+                     srcArraySize->getZExtValue();
 
   if (cpyLen < srcSize)
     return false;
@@ -628,8 +624,8 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
     if (!destArraySize)
       return false;
 
-    uint64_t destSize = DL->getTypeAllocSize(A->getAllocatedType()) *
-      destArraySize->getZExtValue();
+    uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) *
+                        destArraySize->getZExtValue();
 
     if (destSize < srcSize)
       return false;
@@ -648,7 +644,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
         return false;
       }
 
-      uint64_t destSize = DL->getTypeAllocSize(StructTy);
+      uint64_t destSize = DL.getTypeAllocSize(StructTy);
       if (destSize < srcSize)
         return false;
     }
@@ -659,7 +655,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
   // Check that dest points to memory that is at least as aligned as src.
   unsigned srcAlign = srcAlloca->getAlignment();
   if (!srcAlign)
-    srcAlign = DL->getABITypeAlignment(srcAlloca->getAllocatedType());
+    srcAlign = DL.getABITypeAlignment(srcAlloca->getAllocatedType());
   bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
   // If dest is not aligned enough and we can't increase its alignment then
   // bail out.
@@ -769,10 +765,9 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
 
 /// processMemCpyMemCpyDependence - We've found that the (upward scanning)
 /// memory dependence of memcpy 'M' is the memcpy 'MDep'.  Try to simplify M to
-/// copy from MDep's input if we can.  MSize is the size of M's copy.
+/// copy from MDep's input if we can.
 ///
-bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
-                                              uint64_t MSize) {
+bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
   // We can only transforms memcpy's where the dest of one is the source of the
   // other.
   if (M->getSource() != MDep->getDest() || MDep->isVolatile())
@@ -844,6 +839,103 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
   return true;
 }
 
+/// We've found that the (upward scanning) memory dependence of \p MemCpy is
+/// \p MemSet.  Try to simplify \p MemSet to only set the trailing bytes that
+/// weren't copied over by \p MemCpy.
+///
+/// In other words, transform:
+/// \code
+///   memset(dst, c, dst_size);
+///   memcpy(dst, src, src_size);
+/// \endcode
+/// into:
+/// \code
+///   memcpy(dst, src, src_size);
+///   memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
+/// \endcode
+bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
+                                              MemSetInst *MemSet) {
+  // We can only transform memset/memcpy with the same destination.
+  if (MemSet->getDest() != MemCpy->getDest())
+    return false;
+
+  // Check that there are no other dependencies on the memset destination.
+  MemDepResult DstDepInfo =
+      MD->getPointerDependencyFrom(AliasAnalysis::getLocationForDest(MemSet),
+                                   false, MemCpy, MemCpy->getParent());
+  if (DstDepInfo.getInst() != MemSet)
+    return false;
+
+  // Use the same i8* dest as the memcpy, killing the memset dest if different.
+  Value *Dest = MemCpy->getRawDest();
+  Value *DestSize = MemSet->getLength();
+  Value *SrcSize = MemCpy->getLength();
+
+  // By default, create an unaligned memset.
+  unsigned Align = 1;
+  // If Dest is aligned, and SrcSize is constant, use the minimum alignment
+  // of the sum.
+  const unsigned DestAlign =
+      std::max(MemSet->getAlignment(), MemCpy->getAlignment());
+  if (DestAlign > 1)
+    if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
+      Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
+
+  IRBuilder<> Builder(MemCpy);
+
+  // If the sizes have different types, zext the smaller one.
+  if (DestSize->getType() != SrcSize->getType()) {
+    if (DestSize->getType()->getIntegerBitWidth() >
+        SrcSize->getType()->getIntegerBitWidth())
+      SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
+    else
+      DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
+  }
+
+  Value *MemsetLen =
+      Builder.CreateSelect(Builder.CreateICmpULE(DestSize, SrcSize),
+                           ConstantInt::getNullValue(DestSize->getType()),
+                           Builder.CreateSub(DestSize, SrcSize));
+  Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1),
+                       MemsetLen, Align);
+
+  MD->removeInstruction(MemSet);
+  MemSet->eraseFromParent();
+  return true;
+}
+
+/// Transform memcpy to memset when its source was just memset.
+/// In other words, turn:
+/// \code
+///   memset(dst1, c, dst1_size);
+///   memcpy(dst2, dst1, dst2_size);
+/// \endcode
+/// into:
+/// \code
+///   memset(dst1, c, dst1_size);
+///   memset(dst2, c, dst2_size);
+/// \endcode
+/// When dst2_size <= dst1_size.
+///
+/// The \p MemCpy must have a Constant length.
+bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
+                                           MemSetInst *MemSet) {
+  // This only makes sense on memcpy(..., memset(...), ...).
+  if (MemSet->getRawDest() != MemCpy->getRawSource())
+    return false;
+
+  ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
+  ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
+  // Make sure the memcpy doesn't read any more than what the memset wrote.
+  // Don't worry about sizes larger than i64.
+  if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue())
+    return false;
+
+  IRBuilder<> Builder(MemCpy);
+  Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
+                       CopySize, MemCpy->getAlignment());
+  return true;
+}
 
 /// processMemCpy - perform simplification of memcpy's.  If we have memcpy A
 /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
@@ -874,17 +966,26 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
         return true;
       }
 
+  MemDepResult DepInfo = MD->getDependency(M);
+
+  // Try to turn a partially redundant memset + memcpy into
+  // memcpy + smaller memset.  We don't need the memcpy size for this.
+  if (DepInfo.isClobber())
+    if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
+      if (processMemSetMemCpyDependence(M, MDep))
+        return true;
+
   // The optimizations after this point require the memcpy size.
   ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
   if (!CopySize) return false;
 
-  // The are three possible optimizations we can do for memcpy:
+  // There are four possible optimizations we can do for memcpy:
   //   a) memcpy-memcpy xform which exposes redundance for DSE.
   //   b) call-memcpy xform for return slot optimization.
   //   c) memcpy from freshly alloca'd space or space that has just started its
   //      lifetime copies undefined data, and we can therefore eliminate the
   //      memcpy in favor of the data that was already at the destination.
-  MemDepResult DepInfo = MD->getDependency(M);
+  //   d) memcpy from a just-memset'd source can be turned into memset.
   if (DepInfo.isClobber()) {
     if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
       if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
@@ -900,9 +1001,10 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
   AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);
   MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
                                                          M, M->getParent());
+
   if (SrcDepInfo.isClobber()) {
     if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
-      return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
+      return processMemCpyMemCpyDependence(M, MDep);
   } else if (SrcDepInfo.isDef()) {
     Instruction *I = SrcDepInfo.getInst();
     bool hasUndefContents = false;
@@ -924,6 +1026,15 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
     }
   }
 
+  if (SrcDepInfo.isClobber())
+    if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
+      if (performMemCpyToMemSetOptzn(M, MDep)) {
+        MD->removeInstruction(M);
+        M->eraseFromParent();
+        ++NumCpyToSet;
+        return true;
+      }
+
   return false;
 }
 
@@ -959,12 +1070,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
 
 /// processByValArgument - This is called on every byval argument in call sites.
 bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
-  if (!DL) return false;
-
+  const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
   // Find out what feeds this byval argument.
   Value *ByValArg = CS.getArgument(ArgNo);
   Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
-  uint64_t ByValSize = DL->getTypeAllocSize(ByValTy);
+  uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
   MemDepResult DepInfo =
     MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
                                  true, CS.getInstruction(),
@@ -997,8 +1107,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
           *CS->getParent()->getParent());
   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   if (MDep->getAlignment() < ByValAlign &&
-      getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &AC,
-                                 CS.getInstruction(), &DT) < ByValAlign)
+      getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
+                                 CS.getInstruction(), &AC, &DT) < ByValAlign)
     return false;
 
   // Verify that the copied-from memory doesn't change in between the memcpy and
@@ -1051,7 +1161,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
         RepeatInstruction = processMemCpy(M);
       else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
         RepeatInstruction = processMemMove(M);
-      else if (CallSite CS = (Value*)I) {
+      else if (auto CS = CallSite(I)) {
         for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
           if (CS.isByValArgument(i))
             MadeChange |= processByValArgument(CS, i);
@@ -1077,9 +1187,7 @@ bool MemCpyOpt::runOnFunction(Function &F) {
 
   bool MadeChange = false;
   MD = &getAnalysis<MemoryDependenceAnalysis>();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-  TLI = &getAnalysis<TargetLibraryInfo>();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   // If we don't have at least memset and memcpy, there is little point of doing
   // anything here.  These are required by a freestanding implementation, so if
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 1f73cbc..611a941 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -81,12 +81,13 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <vector>
@@ -115,9 +116,9 @@ public:
 private:
   // This transformation requires dominator postdominator info
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<TargetLibraryInfo>();
-    AU.addRequired<MemoryDependenceAnalysis>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addRequired<AliasAnalysis>();
+    AU.addPreserved<MemoryDependenceAnalysis>();
     AU.addPreserved<AliasAnalysis>();
   }
 
@@ -168,7 +169,7 @@ FunctionPass *llvm::createMergedLoadStoreMotionPass() {
 INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion",
                       "MergedLoadStoreMotion", false, false)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion",
                     "MergedLoadStoreMotion", false, false)
@@ -579,7 +580,7 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
 /// \brief Run the transformation for each function
 ///
 bool MergedLoadStoreMotion::runOnFunction(Function &F) {
-  MD = &getAnalysis<MemoryDependenceAnalysis>();
+  MD = getAnalysisIfAvailable<MemoryDependenceAnalysis>();
   AA = &getAnalysis<AliasAnalysis>();
 
   bool Changed = false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
new file mode 100644
index 0000000..5b370e0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -0,0 +1,481 @@
+//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass reassociates n-ary add expressions and eliminates the redundancy
+// exposed by the reassociation.
+//
+// A motivating example:
+//
+//   void foo(int a, int b) {
+//     bar(a + b);
+//     bar((a + 2) + b);
+//   }
+//
+// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
+// the above code to
+//
+//   int t = a + b;
+//   bar(t);
+//   bar(t + 2);
+//
+// However, the Reassociate pass is unable to do that because it processes each
+// instruction individually and believes (a + 2) + b is the best form according
+// to its rank system.
+//
+// To address this limitation, NaryReassociate reassociates an expression in a
+// form that reuses existing instructions. As a result, NaryReassociate can
+// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
+// (a + b) is computed before.
+//
+// NaryReassociate works as follows. For every instruction in the form of (a +
+// b) + c, it checks whether a + c or b + c is already computed by a dominating
+// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
+// c) + a and removes the redundancy accordingly. To efficiently look up whether
+// an expression is computed before, we store each instruction seen and its SCEV
+// into an SCEV-to-instruction map.
+//
+// Although the algorithm pattern-matches only ternary additions, it
+// automatically handles many >3-ary expressions by walking through the function
+// in the depth-first order. For example, given
+//
+//   (a + c) + d
+//   ((a + b) + c) + d
+//
+// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
+// ((a + c) + b) + d into ((a + c) + d) + b.
+//
+// Finally, the above dominator-based algorithm may need to be run multiple
+// iterations before emitting optimal code. One source of this need is that we
+// only split an operand when it is used only once. The above algorithm can
+// eliminate an instruction and decrease the usage count of its operands. As a
+// result, an instruction that previously had multiple uses may become a
+// single-use instruction and thus eligible for split consideration. For
+// example,
+//
+//   ac = a + c
+//   ab = a + b
+//   abc = ab + c
+//   ab2 = ab + b
+//   ab2c = ab2 + c
+//
+// In the first iteration, we cannot reassociate abc to ac+b because ab is used
+// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a
+// result, ab2 becomes dead and ab will be used only once in the second
+// iteration.
+//
+// Limitations and TODO items:
+//
+// 1) We only considers n-ary adds for now. This should be extended and
+// generalized.
+//
+// 2) Besides arithmetic operations, similar reassociation can be applied to
+// GEPs. For example, if
+//   X = &arr[a]
+// dominates
+//   Y = &arr[a + b]
+// we may rewrite Y into X + b.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "nary-reassociate"
+
+namespace {
+class NaryReassociate : public FunctionPass {
+public:
+  static char ID;
+
+  NaryReassociate(): FunctionPass(ID) {
+    initializeNaryReassociatePass(*PassRegistry::getPassRegistry());
+  }
+
+  bool doInitialization(Module &M) override {
+    DL = &M.getDataLayout();
+    return false;
+  }
+  bool runOnFunction(Function &F) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addPreserved<ScalarEvolution>();
+    AU.addPreserved<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.setPreservesCFG();
+  }
+
+private:
+  // Runs only one iteration of the dominator-based algorithm. See the header
+  // comments for why we need multiple iterations.
+  bool doOneIteration(Function &F);
+
+  // Reassociates I for better CSE.
+  Instruction *tryReassociate(Instruction *I);
+
+  // Reassociate GEP for better CSE.
+  Instruction *tryReassociateGEP(GetElementPtrInst *GEP);
+  // Try splitting GEP at the I-th index and see whether either part can be
+  // CSE'ed. This is a helper function for tryReassociateGEP.
+  //
+  // \p IndexedType The element type indexed by GEP's I-th index. This is
+  //                equivalent to
+  //                  GEP->getIndexedType(GEP->getPointerOperand(), 0-th index,
+  //                                      ..., i-th index).
+  GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
+                                              unsigned I, Type *IndexedType);
+  // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or
+  // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly.
+  GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
+                                              unsigned I, Value *LHS,
+                                              Value *RHS, Type *IndexedType);
+
+  // Reassociate Add for better CSE.
+  Instruction *tryReassociateAdd(BinaryOperator *I);
+  // A helper function for tryReassociateAdd. LHS and RHS are explicitly passed.
+  Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I);
+  // Rewrites I to LHS + RHS if LHS is computed already.
+  Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I);
+
+  // Returns the closest dominator of \c Dominatee that computes
+  // \c CandidateExpr. Returns null if not found.
+  Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr,
+                                            Instruction *Dominatee);
+  // GetElementPtrInst implicitly sign-extends an index if the index is shorter
+  // than the pointer size. This function returns whether Index is shorter than
+  // GEP's pointer size, i.e., whether Index needs to be sign-extended in order
+  // to be an index of GEP.
+  bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP);
+
+  DominatorTree *DT;
+  ScalarEvolution *SE;
+  TargetLibraryInfo *TLI;
+  TargetTransformInfo *TTI;
+  const DataLayout *DL;
+  // A lookup table quickly telling which instructions compute the given SCEV.
+  // Note that there can be multiple instructions at different locations
+  // computing to the same SCEV, so we map a SCEV to an instruction list.  For
+  // example,
+  //
+  //   if (p1)
+  //     foo(a + b);
+  //   if (p2)
+  //     bar(a + b);
+  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs;
+};
+} // anonymous namespace
+
+char NaryReassociate::ID = 0;
+INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
+                    false, false)
+
+FunctionPass *llvm::createNaryReassociatePass() {
+  return new NaryReassociate();
+}
+
+bool NaryReassociate::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  SE = &getAnalysis<ScalarEvolution>();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+  bool Changed = false, ChangedInThisIteration;
+  do {
+    ChangedInThisIteration = doOneIteration(F);
+    Changed |= ChangedInThisIteration;
+  } while (ChangedInThisIteration);
+  return Changed;
+}
+
+// Whitelist the instruction types NaryReassociate handles for now.
+static bool isPotentiallyNaryReassociable(Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::Add:
+  case Instruction::GetElementPtr:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool NaryReassociate::doOneIteration(Function &F) {
+  bool Changed = false;
+  SeenExprs.clear();
+  // Process the basic blocks in pre-order of the dominator tree. This order
+  // ensures that all bases of a candidate are in Candidates when we process it.
+  for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
+       Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
+    BasicBlock *BB = Node->getBlock();
+    for (auto I = BB->begin(); I != BB->end(); ++I) {
+      if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(I)) {
+        if (Instruction *NewI = tryReassociate(I)) {
+          Changed = true;
+          SE->forgetValue(I);
+          I->replaceAllUsesWith(NewI);
+          RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+          I = NewI;
+        }
+        // Add the rewritten instruction to SeenExprs; the original instruction
+        // is deleted.
+        SeenExprs[SE->getSCEV(I)].push_back(I);
+      }
+    }
+  }
+  return Changed;
+}
+
+Instruction *NaryReassociate::tryReassociate(Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::Add:
+    return tryReassociateAdd(cast<BinaryOperator>(I));
+  case Instruction::GetElementPtr:
+    return tryReassociateGEP(cast<GetElementPtrInst>(I));
+  default:
+    llvm_unreachable("should be filtered out by isPotentiallyNaryReassociable");
+  }
+}
+
+// FIXME: extract this method into TTI->getGEPCost.
+static bool isGEPFoldable(GetElementPtrInst *GEP,
+                          const TargetTransformInfo *TTI,
+                          const DataLayout *DL) {
+  GlobalVariable *BaseGV = nullptr;
+  int64_t BaseOffset = 0;
+  bool HasBaseReg = false;
+  int64_t Scale = 0;
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand()))
+    BaseGV = GV;
+  else
+    HasBaseReg = true;
+
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I, ++GTI) {
+    if (isa<SequentialType>(*GTI)) {
+      int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+      if (ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) {
+        BaseOffset += ConstIdx->getSExtValue() * ElementSize;
+      } else {
+        // Needs scale register.
+        if (Scale != 0) {
+          // No addressing mode takes two scale registers.
+          return false;
+        }
+        Scale = ElementSize;
+      }
+    } else {
+      StructType *STy = cast<StructType>(*GTI);
+      uint64_t Field = cast<ConstantInt>(*I)->getZExtValue();
+      BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field);
+    }
+  }
+  return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV,
+                                    BaseOffset, HasBaseReg, Scale);
+}
+
+Instruction *NaryReassociate::tryReassociateGEP(GetElementPtrInst *GEP) {
+  // Not worth reassociating GEP if it is foldable.
+  if (isGEPFoldable(GEP, TTI, DL))
+    return nullptr;
+
+  gep_type_iterator GTI = gep_type_begin(*GEP);
+  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) {
+    if (isa<SequentialType>(*GTI++)) {
+      if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I - 1, *GTI)) {
+        return NewGEP;
+      }
+    }
+  }
+  return nullptr;
+}
+
+bool NaryReassociate::requiresSignExtension(Value *Index,
+                                            GetElementPtrInst *GEP) {
+  unsigned PointerSizeInBits =
+      DL->getPointerSizeInBits(GEP->getType()->getPointerAddressSpace());
+  return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
+}
+
+GetElementPtrInst *
+NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
+                                          Type *IndexedType) {
+  Value *IndexToSplit = GEP->getOperand(I + 1);
+  if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit))
+    IndexToSplit = SExt->getOperand(0);
+
+  if (AddOperator *AO = dyn_cast<AddOperator>(IndexToSplit)) {
+    // If the I-th index needs sext and the underlying add is not equipped with
+    // nsw, we cannot split the add because
+    //   sext(LHS + RHS) != sext(LHS) + sext(RHS).
+    if (requiresSignExtension(IndexToSplit, GEP) && !AO->hasNoSignedWrap())
+      return nullptr;
+    Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
+    // IndexToSplit = LHS + RHS.
+    if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I, LHS, RHS, IndexedType))
+      return NewGEP;
+    // Symmetrically, try IndexToSplit = RHS + LHS.
+    if (LHS != RHS) {
+      if (auto *NewGEP =
+              tryReassociateGEPAtIndex(GEP, I, RHS, LHS, IndexedType))
+        return NewGEP;
+    }
+  }
+  return nullptr;
+}
+
+GetElementPtrInst *
+NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
+                                          Value *LHS, Value *RHS,
+                                          Type *IndexedType) {
+  // Look for GEP's closest dominator that has the same SCEV as GEP except that
+  // the I-th index is replaced with LHS.
+  SmallVector<const SCEV *, 4> IndexExprs;
+  for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
+    IndexExprs.push_back(SE->getSCEV(*Index));
+  // Replace the I-th index with LHS.
+  IndexExprs[I] = SE->getSCEV(LHS);
+  const SCEV *CandidateExpr = SE->getGEPExpr(
+      GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()),
+      IndexExprs, GEP->isInBounds());
+
+  auto *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
+  if (Candidate == nullptr)
+    return nullptr;
+
+  PointerType *TypeOfCandidate = dyn_cast<PointerType>(Candidate->getType());
+  // Pretty rare but theoretically possible when a numeric value happens to
+  // share CandidateExpr.
+  if (TypeOfCandidate == nullptr)
+    return nullptr;
+
+  // NewGEP = (char *)Candidate + RHS * sizeof(IndexedType)
+  uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType);
+  Type *ElementType = TypeOfCandidate->getElementType();
+  uint64_t ElementSize = DL->getTypeAllocSize(ElementType);
+  // Another less rare case: because I is not necessarily the last index of the
+  // GEP, the size of the type at the I-th index (IndexedSize) is not
+  // necessarily divisible by ElementSize. For example,
+  //
+  // #pragma pack(1)
+  // struct S {
+  //   int a[3];
+  //   int64 b[8];
+  // };
+  // #pragma pack()
+  //
+  // sizeof(S) = 100 is indivisible by sizeof(int64) = 8.
+  //
+  // TODO: bail out on this case for now. We could emit uglygep.
+  if (IndexedSize % ElementSize != 0)
+    return nullptr;
+
+  // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));
+  IRBuilder<> Builder(GEP);
+  Type *IntPtrTy = DL->getIntPtrType(TypeOfCandidate);
+  if (RHS->getType() != IntPtrTy)
+    RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy);
+  if (IndexedSize != ElementSize) {
+    RHS = Builder.CreateMul(
+        RHS, ConstantInt::get(IntPtrTy, IndexedSize / ElementSize));
+  }
+  GetElementPtrInst *NewGEP =
+      cast<GetElementPtrInst>(Builder.CreateGEP(Candidate, RHS));
+  NewGEP->setIsInBounds(GEP->isInBounds());
+  NewGEP->takeName(GEP);
+  return NewGEP;
+}
+
+Instruction *NaryReassociate::tryReassociateAdd(BinaryOperator *I) {
+  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+  if (auto *NewI = tryReassociateAdd(LHS, RHS, I))
+    return NewI;
+  if (auto *NewI = tryReassociateAdd(RHS, LHS, I))
+    return NewI;
+  return nullptr;
+}
+
+Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS,
+                                                Instruction *I) {
+  Value *A = nullptr, *B = nullptr;
+  // To be conservative, we reassociate I only when it is the only user of A+B.
+  if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) {
+    // I = (A + B) + RHS
+    //   = (A + RHS) + B or (B + RHS) + A
+    const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
+    const SCEV *RHSExpr = SE->getSCEV(RHS);
+    if (BExpr != RHSExpr) {
+      if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I))
+        return NewI;
+    }
+    if (AExpr != RHSExpr) {
+      if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I))
+        return NewI;
+    }
+  }
+  return nullptr;
+}
+
+Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
+                                                 Value *RHS, Instruction *I) {
+  auto Pos = SeenExprs.find(LHSExpr);
+  // Bail out if LHSExpr is not previously seen.
+  if (Pos == SeenExprs.end())
+    return nullptr;
+
+  // Look for the closest dominator LHS of I that computes LHSExpr, and replace
+  // I with LHS + RHS.
+  auto *LHS = findClosestMatchingDominator(LHSExpr, I);
+  if (LHS == nullptr)
+    return nullptr;
+
+  Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+  NewI->takeName(I);
+  return NewI;
+}
+
+Instruction *
+NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr,
+                                              Instruction *Dominatee) {
+  auto Pos = SeenExprs.find(CandidateExpr);
+  if (Pos == SeenExprs.end())
+    return nullptr;
+
+  auto &Candidates = Pos->second;
+  // Because we process the basic blocks in pre-order of the dominator tree, a
+  // candidate that doesn't dominate the current instruction won't dominate any
+  // future instruction either. Therefore, we pop it out of the stack. This
+  // optimization makes the algorithm O(n).
+  while (!Candidates.empty()) {
+    Instruction *Candidate = Candidates.back();
+    if (DT->dominates(Candidate, Dominatee))
+      return Candidate;
+    Candidates.pop_back();
+  }
+  return nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 5c8bed5..31d7df3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -18,7 +18,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
@@ -52,16 +52,18 @@ INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
                 "Partially inline calls to library functions", false, false)
 
 void PartiallyInlineLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<TargetLibraryInfo>();
-  AU.addRequired<TargetTransformInfo>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+  AU.addRequired<TargetTransformInfoWrapperPass>();
   FunctionPass::getAnalysisUsage(AU);
 }
 
 bool PartiallyInlineLibCalls::runOnFunction(Function &F) {
   bool Changed = false;
   Function::iterator CurrBB;
-  TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
-  const TargetTransformInfo *TTI = &getAnalysis<TargetTransformInfo>();
+  TargetLibraryInfo *TLI =
+      &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  const TargetTransformInfo *TTI =
+      &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
   for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
     CurrBB = BB++;
 
@@ -126,7 +128,7 @@ bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
 
   // Move all instructions following Call to newly created block JoinBB.
   // Create phi and replace all uses.
-  BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode(), this);
+  BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode());
   IRBuilder<> Builder(JoinBB, JoinBB->begin());
   PHINode *Phi = Builder.CreatePHI(Call->getType(), 2);
   Call->replaceAllUsesWith(Phi);
diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
new file mode 100644
index 0000000..3e7deeb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -0,0 +1,993 @@
+//===- PlaceSafepoints.cpp - Place GC Safepoints --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Place garbage collection safepoints at appropriate locations in the IR. This
+// does not make relocation semantics or variable liveness explicit.  That's
+// done by RewriteStatepointsForGC.
+//
+// Terminology:
+// - A call is said to be "parseable" if there is a stack map generated for the
+// return PC of the call.  A runtime can determine where values listed in the
+// deopt arguments and (after RewriteStatepointsForGC) gc arguments are located
+// on the stack when the code is suspended inside such a call.  Every parse
+// point is represented by a call wrapped in an gc.statepoint intrinsic.
+// - A "poll" is an explicit check in the generated code to determine if the
+// runtime needs the generated code to cooperate by calling a helper routine
+// and thus suspending its execution at a known state. The call to the helper
+// routine will be parseable.  The (gc & runtime specific) logic of a poll is
+// assumed to be provided in a function of the name "gc.safepoint_poll".
+//
+// We aim to insert polls such that running code can quickly be brought to a
+// well defined state for inspection by the collector.  In the current
+// implementation, this is done via the insertion of poll sites at method entry
+// and the backedge of most loops.  We try to avoid inserting more polls than
+// are neccessary to ensure a finite period between poll sites.  This is not
+// because the poll itself is expensive in the generated code; it's not.  Polls
+// do tend to impact the optimizer itself in negative ways; we'd like to avoid
+// perturbing the optimization of the method as much as we can.
+//
+// We also need to make most call sites parseable.  The callee might execute a
+// poll (or otherwise be inspected by the GC).  If so, the entire stack
+// (including the suspended frame of the current method) must be parseable.
+//
+// This pass will insert:
+// - Call parse points ("call safepoints") for any call which may need to
+// reach a safepoint during the execution of the callee function.
+// - Backedge safepoint polls and entry safepoint polls to ensure that
+// executing code reaches a safepoint poll in a finite amount of time.
+//
+// We do not currently support return statepoints, but adding them would not
+// be hard.  They are not required for correctness - entry safepoints are an
+// alternative - but some GCs may prefer them.  Patches welcome.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+#define DEBUG_TYPE "safepoint-placement"
+STATISTIC(NumEntrySafepoints, "Number of entry safepoints inserted");
+STATISTIC(NumCallSafepoints, "Number of call safepoints inserted");
+STATISTIC(NumBackedgeSafepoints, "Number of backedge safepoints inserted");
+
+STATISTIC(CallInLoop, "Number of loops w/o safepoints due to calls in loop");
+STATISTIC(FiniteExecution, "Number of loops w/o safepoints finite execution");
+
+using namespace llvm;
+
+// Ignore oppurtunities to avoid placing safepoints on backedges, useful for
+// validation
+static cl::opt<bool> AllBackedges("spp-all-backedges", cl::Hidden,
+                                  cl::init(false));
+
+/// If true, do not place backedge safepoints in counted loops.
+static cl::opt<bool> SkipCounted("spp-counted", cl::Hidden, cl::init(true));
+
+// If true, split the backedge of a loop when placing the safepoint, otherwise
+// split the latch block itself.  Both are useful to support for
+// experimentation, but in practice, it looks like splitting the backedge
+// optimizes better.
+static cl::opt<bool> SplitBackedge("spp-split-backedge", cl::Hidden,
+                                   cl::init(false));
+
+// Print tracing output
+static cl::opt<bool> TraceLSP("spp-trace", cl::Hidden, cl::init(false));
+
+namespace {
+
+/// An analysis pass whose purpose is to identify each of the backedges in
+/// the function which require a safepoint poll to be inserted.
+struct PlaceBackedgeSafepointsImpl : public FunctionPass {
+  static char ID;
+
+  /// The output of the pass - gives a list of each backedge (described by
+  /// pointing at the branch) which need a poll inserted.
+  std::vector<TerminatorInst *> PollLocations;
+
+  /// True unless we're running spp-no-calls in which case we need to disable
+  /// the call dependend placement opts.
+  bool CallSafepointsEnabled;
+
+  ScalarEvolution *SE = nullptr;
+  DominatorTree *DT = nullptr;
+  LoopInfo *LI = nullptr;
+
+  PlaceBackedgeSafepointsImpl(bool CallSafepoints = false)
+      : FunctionPass(ID), CallSafepointsEnabled(CallSafepoints) {
+    initializePlaceBackedgeSafepointsImplPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnLoop(Loop *);
+  void runOnLoopAndSubLoops(Loop *L) {
+    // Visit all the subloops
+    for (auto I = L->begin(), E = L->end(); I != E; I++)
+      runOnLoopAndSubLoops(*I);
+    runOnLoop(L);
+  }
+
+  bool runOnFunction(Function &F) override {
+    SE = &getAnalysis<ScalarEvolution>();
+    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    for (auto I = LI->begin(), E = LI->end(); I != E; I++) {
+      runOnLoopAndSubLoops(*I);
+    }
+    return false;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<LoopInfoWrapperPass>();
+    // We no longer modify the IR at all in this pass.  Thus all
+    // analysis are preserved.
+    AU.setPreservesAll();
+  }
+};
+}
+
+static cl::opt<bool> NoEntry("spp-no-entry", cl::Hidden, cl::init(false));
+static cl::opt<bool> NoCall("spp-no-call", cl::Hidden, cl::init(false));
+static cl::opt<bool> NoBackedge("spp-no-backedge", cl::Hidden, cl::init(false));
+
+namespace {
+struct PlaceSafepoints : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+
+  PlaceSafepoints() : FunctionPass(ID) {
+    initializePlaceSafepointsPass(*PassRegistry::getPassRegistry());
+  }
+  bool runOnFunction(Function &F) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    // We modify the graph wholesale (inlining, block insertion, etc).  We
+    // preserve nothing at the moment.  We could potentially preserve dom tree
+    // if that was worth doing
+  }
+};
+}
+
+// Insert a safepoint poll immediately before the given instruction.  Does
+// not handle the parsability of state at the runtime call, that's the
+// callers job.
+static void
+InsertSafepointPoll(Instruction *InsertBefore,
+                    std::vector<CallSite> &ParsePointsNeeded /*rval*/);
+
+static bool isGCLeafFunction(const CallSite &CS);
+
+static bool needsStatepoint(const CallSite &CS) {
+  if (isGCLeafFunction(CS))
+    return false;
+  if (CS.isCall()) {
+    CallInst *call = cast<CallInst>(CS.getInstruction());
+    if (call->isInlineAsm())
+      return false;
+  }
+  if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS)) {
+    return false;
+  }
+  return true;
+}
+
+static Value *ReplaceWithStatepoint(const CallSite &CS, Pass *P);
+
+/// Returns true if this loop is known to contain a call safepoint which
+/// must unconditionally execute on any iteration of the loop which returns
+/// to the loop header via an edge from Pred.  Returns a conservative correct
+/// answer; i.e. false is always valid.
+static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
+                                               BasicBlock *Pred,
+                                               DominatorTree &DT) {
+  // In general, we're looking for any cut of the graph which ensures
+  // there's a call safepoint along every edge between Header and Pred.
+  // For the moment, we look only for the 'cuts' that consist of a single call
+  // instruction in a block which is dominated by the Header and dominates the
+  // loop latch (Pred) block.  Somewhat surprisingly, walking the entire chain
+  // of such dominating blocks gets substaintially more occurences than just
+  // checking the Pred and Header blocks themselves.  This may be due to the
+  // density of loop exit conditions caused by range and null checks.
+  // TODO: structure this as an analysis pass, cache the result for subloops,
+  // avoid dom tree recalculations
+  assert(DT.dominates(Header, Pred) && "loop latch not dominated by header?");
+
+  BasicBlock *Current = Pred;
+  while (true) {
+    for (Instruction &I : *Current) {
+      if (auto CS = CallSite(&I))
+        // Note: Technically, needing a safepoint isn't quite the right
+        // condition here.  We should instead be checking if the target method
+        // has an
+        // unconditional poll. In practice, this is only a theoretical concern
+        // since we don't have any methods with conditional-only safepoint
+        // polls.
+        if (needsStatepoint(CS))
+          return true;
+    }
+
+    if (Current == Header)
+      break;
+    Current = DT.getNode(Current)->getIDom()->getBlock();
+  }
+
+  return false;
+}
+
+/// Returns true if this loop is known to terminate in a finite number of
+/// iterations.  Note that this function may return false for a loop which
+/// does actual terminate in a finite constant number of iterations due to
+/// conservatism in the analysis.
+static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
+                                    BasicBlock *Pred) {
+  // Only used when SkipCounted is off
+  const unsigned upperTripBound = 8192;
+
+  // A conservative bound on the loop as a whole.
+  const SCEV *MaxTrips = SE->getMaxBackedgeTakenCount(L);
+  if (MaxTrips != SE->getCouldNotCompute()) {
+    if (SE->getUnsignedRange(MaxTrips).getUnsignedMax().ult(upperTripBound))
+      return true;
+    if (SkipCounted &&
+        SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(32))
+      return true;
+  }
+
+  // If this is a conditional branch to the header with the alternate path
+  // being outside the loop, we can ask questions about the execution frequency
+  // of the exit block.
+  if (L->isLoopExiting(Pred)) {
+    // This returns an exact expression only.  TODO: We really only need an
+    // upper bound here, but SE doesn't expose that.
+    const SCEV *MaxExec = SE->getExitCount(L, Pred);
+    if (MaxExec != SE->getCouldNotCompute()) {
+      if (SE->getUnsignedRange(MaxExec).getUnsignedMax().ult(upperTripBound))
+        return true;
+      if (SkipCounted &&
+          SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(32))
+        return true;
+    }
+  }
+
+  return /* not finite */ false;
+}
+
+static void scanOneBB(Instruction *start, Instruction *end,
+                      std::vector<CallInst *> &calls,
+                      std::set<BasicBlock *> &seen,
+                      std::vector<BasicBlock *> &worklist) {
+  for (BasicBlock::iterator itr(start);
+       itr != start->getParent()->end() && itr != BasicBlock::iterator(end);
+       itr++) {
+    if (CallInst *CI = dyn_cast<CallInst>(&*itr)) {
+      calls.push_back(CI);
+    }
+    // FIXME: This code does not handle invokes
+    assert(!dyn_cast<InvokeInst>(&*itr) &&
+           "support for invokes in poll code needed");
+    // Only add the successor blocks if we reach the terminator instruction
+    // without encountering end first
+    if (itr->isTerminator()) {
+      BasicBlock *BB = itr->getParent();
+      for (BasicBlock *Succ : successors(BB)) {
+        if (seen.count(Succ) == 0) {
+          worklist.push_back(Succ);
+          seen.insert(Succ);
+        }
+      }
+    }
+  }
+}
+static void scanInlinedCode(Instruction *start, Instruction *end,
+                            std::vector<CallInst *> &calls,
+                            std::set<BasicBlock *> &seen) {
+  calls.clear();
+  std::vector<BasicBlock *> worklist;
+  seen.insert(start->getParent());
+  scanOneBB(start, end, calls, seen, worklist);
+  while (!worklist.empty()) {
+    BasicBlock *BB = worklist.back();
+    worklist.pop_back();
+    scanOneBB(&*BB->begin(), end, calls, seen, worklist);
+  }
+}
+
+bool PlaceBackedgeSafepointsImpl::runOnLoop(Loop *L) {
+  // Loop through all loop latches (branches controlling backedges).  We need
+  // to place a safepoint on every backedge (potentially).
+  // Note: In common usage, there will be only one edge due to LoopSimplify
+  // having run sometime earlier in the pipeline, but this code must be correct
+  // w.r.t. loops with multiple backedges.
+  BasicBlock *header = L->getHeader();
+  SmallVector<BasicBlock*, 16> LoopLatches;
+  L->getLoopLatches(LoopLatches);
+  for (BasicBlock *pred : LoopLatches) {
+    assert(L->contains(pred));
+
+    // Make a policy decision about whether this loop needs a safepoint or
+    // not.  Note that this is about unburdening the optimizer in loops, not
+    // avoiding the runtime cost of the actual safepoint.
+    if (!AllBackedges) {
+      if (mustBeFiniteCountedLoop(L, SE, pred)) {
+        if (TraceLSP)
+          errs() << "skipping safepoint placement in finite loop\n";
+        FiniteExecution++;
+        continue;
+      }
+      if (CallSafepointsEnabled &&
+          containsUnconditionalCallSafepoint(L, header, pred, *DT)) {
+        // Note: This is only semantically legal since we won't do any further
+        // IPO or inlining before the actual call insertion..  If we hadn't, we
+        // might latter loose this call safepoint.
+        if (TraceLSP)
+          errs() << "skipping safepoint placement due to unconditional call\n";
+        CallInLoop++;
+        continue;
+      }
+    }
+
+    // TODO: We can create an inner loop which runs a finite number of
+    // iterations with an outer loop which contains a safepoint.  This would
+    // not help runtime performance that much, but it might help our ability to
+    // optimize the inner loop.
+
+    // Safepoint insertion would involve creating a new basic block (as the
+    // target of the current backedge) which does the safepoint (of all live
+    // variables) and branches to the true header
+    TerminatorInst *term = pred->getTerminator();
+
+    if (TraceLSP) {
+      errs() << "[LSP] terminator instruction: ";
+      term->dump();
+    }
+
+    PollLocations.push_back(term);
+  }
+
+  return false;
+}
+
+/// Returns true if an entry safepoint is not required before this callsite in
+/// the caller function.
+static bool doesNotRequireEntrySafepointBefore(const CallSite &CS) {
+  Instruction *Inst = CS.getInstruction();
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::experimental_gc_statepoint:
+    case Intrinsic::experimental_patchpoint_void:
+    case Intrinsic::experimental_patchpoint_i64:
+      // The can wrap an actual call which may grow the stack by an unbounded
+      // amount or run forever.
+      return false;
+    default:
+      // Most LLVM intrinsics are things which do not expand to actual calls, or
+      // at least if they do, are leaf functions that cause only finite stack
+      // growth.  In particular, the optimizer likes to form things like memsets
+      // out of stores in the original IR.  Another important example is
+      // llvm.frameescape which must occur in the entry block.  Inserting a
+      // safepoint before it is not legal since it could push the frameescape
+      // out of the entry block.
+      return true;
+    }
+  }
+  return false;
+}
+
+static Instruction *findLocationForEntrySafepoint(Function &F,
+                                                  DominatorTree &DT) {
+
+  // Conceptually, this poll needs to be on method entry, but in
+  // practice, we place it as late in the entry block as possible.  We
+  // can place it as late as we want as long as it dominates all calls
+  // that can grow the stack.  This, combined with backedge polls,
+  // give us all the progress guarantees we need.
+
+  // hasNextInstruction and nextInstruction are used to iterate
+  // through a "straight line" execution sequence.
+
+  auto hasNextInstruction = [](Instruction *I) {
+    if (!I->isTerminator()) {
+      return true;
+    }
+    BasicBlock *nextBB = I->getParent()->getUniqueSuccessor();
+    return nextBB && (nextBB->getUniquePredecessor() != nullptr);
+  };
+
+  auto nextInstruction = [&hasNextInstruction](Instruction *I) {
+    assert(hasNextInstruction(I) &&
+           "first check if there is a next instruction!");
+    if (I->isTerminator()) {
+      return I->getParent()->getUniqueSuccessor()->begin();
+    } else {
+      return std::next(BasicBlock::iterator(I));
+    }
+  };
+
+  Instruction *cursor = nullptr;
+  for (cursor = F.getEntryBlock().begin(); hasNextInstruction(cursor);
+       cursor = nextInstruction(cursor)) {
+
+    // We need to ensure a safepoint poll occurs before any 'real' call.  The
+    // easiest way to ensure finite execution between safepoints in the face of
+    // recursive and mutually recursive functions is to enforce that each take
+    // a safepoint.  Additionally, we need to ensure a poll before any call
+    // which can grow the stack by an unbounded amount.  This isn't required
+    // for GC semantics per se, but is a common requirement for languages
+    // which detect stack overflow via guard pages and then throw exceptions.
+    if (auto CS = CallSite(cursor)) {
+      if (doesNotRequireEntrySafepointBefore(CS))
+        continue;
+      break;
+    }
+  }
+
+  assert((hasNextInstruction(cursor) || cursor->isTerminator()) &&
+         "either we stopped because of a call, or because of terminator");
+
+  return cursor;
+}
+
+/// Identify the list of call sites which need to be have parseable state
+static void findCallSafepoints(Function &F,
+                               std::vector<CallSite> &Found /*rval*/) {
+  assert(Found.empty() && "must be empty!");
+  for (Instruction &I : inst_range(F)) {
+    Instruction *inst = &I;
+    if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) {
+      CallSite CS(inst);
+
+      // No safepoint needed or wanted
+      if (!needsStatepoint(CS)) {
+        continue;
+      }
+
+      Found.push_back(CS);
+    }
+  }
+}
+
+/// Implement a unique function which doesn't require we sort the input
+/// vector.  Doing so has the effect of changing the output of a couple of
+/// tests in ways which make them less useful in testing fused safepoints.
+template <typename T> static void unique_unsorted(std::vector<T> &vec) {
+  std::set<T> seen;
+  std::vector<T> tmp;
+  vec.reserve(vec.size());
+  std::swap(tmp, vec);
+  for (auto V : tmp) {
+    if (seen.insert(V).second) {
+      vec.push_back(V);
+    }
+  }
+}
+
+static std::string GCSafepointPollName("gc.safepoint_poll");
+
+static bool isGCSafepointPoll(Function &F) {
+  return F.getName().equals(GCSafepointPollName);
+}
+
+/// Returns true if this function should be rewritten to include safepoint
+/// polls and parseable call sites.  The main point of this function is to be
+/// an extension point for custom logic.
+static bool shouldRewriteFunction(Function &F) {
+  // TODO: This should check the GCStrategy
+  if (F.hasGC()) {
+    const char *FunctionGCName = F.getGC();
+    const StringRef StatepointExampleName("statepoint-example");
+    const StringRef CoreCLRName("coreclr");
+    return (StatepointExampleName == FunctionGCName) ||
+           (CoreCLRName == FunctionGCName);
+  } else
+    return false;
+}
+
+// TODO: These should become properties of the GCStrategy, possibly with
+// command line overrides.
+static bool enableEntrySafepoints(Function &F) { return !NoEntry; }
+static bool enableBackedgeSafepoints(Function &F) { return !NoBackedge; }
+static bool enableCallSafepoints(Function &F) { return !NoCall; }
+
+// Normalize basic block to make it ready to be target of invoke statepoint.
+// Ensure that 'BB' does not have phi nodes. It may require spliting it.
+static BasicBlock *normalizeForInvokeSafepoint(BasicBlock *BB,
+                                               BasicBlock *InvokeParent) {
+  BasicBlock *ret = BB;
+
+  if (!BB->getUniquePredecessor()) {
+    ret = SplitBlockPredecessors(BB, InvokeParent, "");
+  }
+
+  // Now that 'ret' has unique predecessor we can safely remove all phi nodes
+  // from it
+  FoldSingleEntryPHINodes(ret);
+  assert(!isa<PHINode>(ret->begin()));
+
+  return ret;
+}
+
+bool PlaceSafepoints::runOnFunction(Function &F) {
+  if (F.isDeclaration() || F.empty()) {
+    // This is a declaration, nothing to do.  Must exit early to avoid crash in
+    // dom tree calculation
+    return false;
+  }
+
+  if (isGCSafepointPoll(F)) {
+    // Given we're inlining this inside of safepoint poll insertion, this
+    // doesn't make any sense.  Note that we do make any contained calls
+    // parseable after we inline a poll.
+    return false;
+  }
+
+  if (!shouldRewriteFunction(F))
+    return false;
+
+  bool modified = false;
+
+  // In various bits below, we rely on the fact that uses are reachable from
+  // defs.  When there are basic blocks unreachable from the entry, dominance
+  // and reachablity queries return non-sensical results.  Thus, we preprocess
+  // the function to ensure these properties hold.
+  modified |= removeUnreachableBlocks(F);
+
+  // STEP 1 - Insert the safepoint polling locations.  We do not need to
+  // actually insert parse points yet.  That will be done for all polls and
+  // calls in a single pass.
+
+  DominatorTree DT;
+  DT.recalculate(F);
+
+  SmallVector<Instruction *, 16> PollsNeeded;
+  std::vector<CallSite> ParsePointNeeded;
+
+  if (enableBackedgeSafepoints(F)) {
+    // Construct a pass manager to run the LoopPass backedge logic.  We
+    // need the pass manager to handle scheduling all the loop passes
+    // appropriately.  Doing this by hand is painful and just not worth messing
+    // with for the moment.
+    legacy::FunctionPassManager FPM(F.getParent());
+    bool CanAssumeCallSafepoints = enableCallSafepoints(F);
+    PlaceBackedgeSafepointsImpl *PBS =
+      new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints);
+    FPM.add(PBS);
+    FPM.run(F);
+
+    // We preserve dominance information when inserting the poll, otherwise
+    // we'd have to recalculate this on every insert
+    DT.recalculate(F);
+
+    auto &PollLocations = PBS->PollLocations;
+
+    auto OrderByBBName = [](Instruction *a, Instruction *b) {
+      return a->getParent()->getName() < b->getParent()->getName();
+    };
+    // We need the order of list to be stable so that naming ends up stable
+    // when we split edges.  This makes test cases much easier to write.
+    std::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName);
+
+    // We can sometimes end up with duplicate poll locations.  This happens if
+    // a single loop is visited more than once.   The fact this happens seems
+    // wrong, but it does happen for the split-backedge.ll test case.
+    PollLocations.erase(std::unique(PollLocations.begin(),
+                                    PollLocations.end()),
+                        PollLocations.end());
+
+    // Insert a poll at each point the analysis pass identified
+    // The poll location must be the terminator of a loop latch block.
+    for (TerminatorInst *Term : PollLocations) {
+      // We are inserting a poll, the function is modified
+      modified = true;
+
+      if (SplitBackedge) {
+        // Split the backedge of the loop and insert the poll within that new
+        // basic block.  This creates a loop with two latches per original
+        // latch (which is non-ideal), but this appears to be easier to
+        // optimize in practice than inserting the poll immediately before the
+        // latch test.
+
+        // Since this is a latch, at least one of the successors must dominate
+        // it. Its possible that we have a) duplicate edges to the same header
+        // and b) edges to distinct loop headers.  We need to insert pools on
+        // each.
+        SetVector<BasicBlock *> Headers;
+        for (unsigned i = 0; i < Term->getNumSuccessors(); i++) {
+          BasicBlock *Succ = Term->getSuccessor(i);
+          if (DT.dominates(Succ, Term->getParent())) {
+            Headers.insert(Succ);
+          }
+        }
+        assert(!Headers.empty() && "poll location is not a loop latch?");
+
+        // The split loop structure here is so that we only need to recalculate
+        // the dominator tree once.  Alternatively, we could just keep it up to
+        // date and use a more natural merged loop.
+        SetVector<BasicBlock *> SplitBackedges;
+        for (BasicBlock *Header : Headers) {
+          BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT);
+          PollsNeeded.push_back(NewBB->getTerminator());
+          NumBackedgeSafepoints++;
+        }
+      } else {
+        // Split the latch block itself, right before the terminator.
+        PollsNeeded.push_back(Term);
+        NumBackedgeSafepoints++;
+      }
+    }
+  }
+
+  if (enableEntrySafepoints(F)) {
+    Instruction *Location = findLocationForEntrySafepoint(F, DT);
+    if (!Location) {
+      // policy choice not to insert?
+    } else {
+      PollsNeeded.push_back(Location);
+      modified = true;
+      NumEntrySafepoints++;
+    }
+  }
+
+  // Now that we've identified all the needed safepoint poll locations, insert
+  // safepoint polls themselves.
+  for (Instruction *PollLocation : PollsNeeded) {
+    std::vector<CallSite> RuntimeCalls;
+    InsertSafepointPoll(PollLocation, RuntimeCalls);
+    ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
+                            RuntimeCalls.end());
+  }
+  PollsNeeded.clear(); // make sure we don't accidentally use
+  // The dominator tree has been invalidated by the inlining performed in the
+  // above loop.  TODO: Teach the inliner how to update the dom tree?
+  DT.recalculate(F);
+
+  if (enableCallSafepoints(F)) {
+    std::vector<CallSite> Calls;
+    findCallSafepoints(F, Calls);
+    NumCallSafepoints += Calls.size();
+    ParsePointNeeded.insert(ParsePointNeeded.end(), Calls.begin(), Calls.end());
+  }
+
+  // Unique the vectors since we can end up with duplicates if we scan the call
+  // site for call safepoints after we add it for entry or backedge.  The
+  // only reason we need tracking at all is that some functions might have
+  // polls but not call safepoints and thus we might miss marking the runtime
+  // calls for the polls. (This is useful in test cases!)
+  unique_unsorted(ParsePointNeeded);
+
+  // Any parse point (no matter what source) will be handled here
+
+  // We're about to start modifying the function
+  if (!ParsePointNeeded.empty())
+    modified = true;
+
+  // Now run through and insert the safepoints, but do _NOT_ update or remove
+  // any existing uses.  We have references to live variables that need to
+  // survive to the last iteration of this loop.
+  std::vector<Value *> Results;
+  Results.reserve(ParsePointNeeded.size());
+  for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
+    CallSite &CS = ParsePointNeeded[i];
+
+    // For invoke statepoints we need to remove all phi nodes at the normal
+    // destination block.
+    // Reason for this is that we can place gc_result only after last phi node
+    // in basic block. We will get malformed code after RAUW for the
+    // gc_result if one of this phi nodes uses result from the invoke.
+    if (InvokeInst *Invoke = dyn_cast<InvokeInst>(CS.getInstruction())) {
+      normalizeForInvokeSafepoint(Invoke->getNormalDest(),
+                                  Invoke->getParent());
+    }
+
+    Value *GCResult = ReplaceWithStatepoint(CS, nullptr);
+    Results.push_back(GCResult);
+  }
+  assert(Results.size() == ParsePointNeeded.size());
+
+  // Adjust all users of the old call sites to use the new ones instead
+  for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
+    CallSite &CS = ParsePointNeeded[i];
+    Value *GCResult = Results[i];
+    if (GCResult) {
+      // Can not RAUW for the invoke gc result in case of phi nodes preset.
+      assert(CS.isCall() || !isa<PHINode>(cast<Instruction>(GCResult)->getParent()->begin()));
+
+      // Replace all uses with the new call
+      CS.getInstruction()->replaceAllUsesWith(GCResult);
+    }
+
+    // Now that we've handled all uses, remove the original call itself
+    // Note: The insert point can't be the deleted instruction!
+    CS.getInstruction()->eraseFromParent();
+  }
+  return modified;
+}
+
+char PlaceBackedgeSafepointsImpl::ID = 0;
+char PlaceSafepoints::ID = 0;
+
+FunctionPass *llvm::createPlaceSafepointsPass() {
+  return new PlaceSafepoints();
+}
+
+INITIALIZE_PASS_BEGIN(PlaceBackedgeSafepointsImpl,
+                      "place-backedge-safepoints-impl",
+                      "Place Backedge Safepoints", false, false)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(PlaceBackedgeSafepointsImpl,
+                    "place-backedge-safepoints-impl",
+                    "Place Backedge Safepoints", false, false)
+
+INITIALIZE_PASS_BEGIN(PlaceSafepoints, "place-safepoints", "Place Safepoints",
+                      false, false)
+INITIALIZE_PASS_END(PlaceSafepoints, "place-safepoints", "Place Safepoints",
+                    false, false)
+
+static bool isGCLeafFunction(const CallSite &CS) {
+  Instruction *inst = CS.getInstruction();
+  if (isa<IntrinsicInst>(inst)) {
+    // Most LLVM intrinsics are things which can never take a safepoint.
+    // As a result, we don't need to have the stack parsable at the
+    // callsite.  This is a highly useful optimization since intrinsic
+    // calls are fairly prevelent, particularly in debug builds.
+    return true;
+  }
+
+  // If this function is marked explicitly as a leaf call, we don't need to
+  // place a safepoint of it.  In fact, for correctness we *can't* in many
+  // cases.  Note: Indirect calls return Null for the called function,
+  // these obviously aren't runtime functions with attributes
+  // TODO: Support attributes on the call site as well.
+  const Function *F = CS.getCalledFunction();
+  bool isLeaf =
+      F &&
+      F->getFnAttribute("gc-leaf-function").getValueAsString().equals("true");
+  if (isLeaf) {
+    return true;
+  }
+  return false;
+}
+
+static void
+InsertSafepointPoll(Instruction *InsertBefore,
+                    std::vector<CallSite> &ParsePointsNeeded /*rval*/) {
+  BasicBlock *OrigBB = InsertBefore->getParent();
+  Module *M = InsertBefore->getModule();
+  assert(M && "must be part of a module");
+
+  // Inline the safepoint poll implementation - this will get all the branch,
+  // control flow, etc..  Most importantly, it will introduce the actual slow
+  // path call - where we need to insert a safepoint (parsepoint).
+
+  auto *F = M->getFunction(GCSafepointPollName);
+  assert(F->getType()->getElementType() ==
+         FunctionType::get(Type::getVoidTy(M->getContext()), false) &&
+         "gc.safepoint_poll declared with wrong type");
+  assert(!F->empty() && "gc.safepoint_poll must be a non-empty function");
+  CallInst *PollCall = CallInst::Create(F, "", InsertBefore);
+
+  // Record some information about the call site we're replacing
+  BasicBlock::iterator before(PollCall), after(PollCall);
+  bool isBegin(false);
+  if (before == OrigBB->begin()) {
+    isBegin = true;
+  } else {
+    before--;
+  }
+  after++;
+  assert(after != OrigBB->end() && "must have successor");
+
+  // do the actual inlining
+  InlineFunctionInfo IFI;
+  bool InlineStatus = InlineFunction(PollCall, IFI);
+  assert(InlineStatus && "inline must succeed");
+  (void)InlineStatus; // suppress warning in release-asserts
+
+  // Check post conditions
+  assert(IFI.StaticAllocas.empty() && "can't have allocs");
+
+  std::vector<CallInst *> calls; // new calls
+  std::set<BasicBlock *> BBs;    // new BBs + insertee
+  // Include only the newly inserted instructions, Note: begin may not be valid
+  // if we inserted to the beginning of the basic block
+  BasicBlock::iterator start;
+  if (isBegin) {
+    start = OrigBB->begin();
+  } else {
+    start = before;
+    start++;
+  }
+
+  // If your poll function includes an unreachable at the end, that's not
+  // valid.  Bugpoint likes to create this, so check for it.
+  assert(isPotentiallyReachable(&*start, &*after, nullptr, nullptr) &&
+         "malformed poll function");
+
+  scanInlinedCode(&*(start), &*(after), calls, BBs);
+  assert(!calls.empty() && "slow path not found for safepoint poll");
+
+  // Record the fact we need a parsable state at the runtime call contained in
+  // the poll function.  This is required so that the runtime knows how to
+  // parse the last frame when we actually take  the safepoint (i.e. execute
+  // the slow path)
+  assert(ParsePointsNeeded.empty());
+  for (size_t i = 0; i < calls.size(); i++) {
+
+    // No safepoint needed or wanted
+    if (!needsStatepoint(calls[i])) {
+      continue;
+    }
+
+    // These are likely runtime calls.  Should we assert that via calling
+    // convention or something?
+    ParsePointsNeeded.push_back(CallSite(calls[i]));
+  }
+  assert(ParsePointsNeeded.size() <= calls.size());
+}
+
+/// Replaces the given call site (Call or Invoke) with a gc.statepoint
+/// intrinsic with an empty deoptimization arguments list.  This does
+/// NOT do explicit relocation for GC support.
+static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
+                                    Pass *P) {
+  assert(CS.getInstruction()->getParent()->getParent()->getParent() &&
+         "must be set");
+
+  // TODO: technically, a pass is not allowed to get functions from within a
+  // function pass since it might trigger a new function addition.  Refactor
+  // this logic out to the initialization of the pass.  Doesn't appear to
+  // matter in practice.
+
+  // Then go ahead and use the builder do actually do the inserts.  We insert
+  // immediately before the previous instruction under the assumption that all
+  // arguments will be available here.  We can't insert afterwards since we may
+  // be replacing a terminator.
+  IRBuilder<> Builder(CS.getInstruction());
+
+  // Note: The gc args are not filled in at this time, that's handled by
+  // RewriteStatepointsForGC (which is currently under review).
+
+  // Create the statepoint given all the arguments
+  Instruction *Token = nullptr;
+
+  uint64_t ID;
+  uint32_t NumPatchBytes;
+
+  AttributeSet OriginalAttrs = CS.getAttributes();
+  Attribute AttrID =
+      OriginalAttrs.getAttribute(AttributeSet::FunctionIndex, "statepoint-id");
+  Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute(
+      AttributeSet::FunctionIndex, "statepoint-num-patch-bytes");
+
+  AttrBuilder AttrsToRemove;
+  bool HasID = AttrID.isStringAttribute() &&
+               !AttrID.getValueAsString().getAsInteger(10, ID);
+
+  if (HasID)
+    AttrsToRemove.addAttribute("statepoint-id");
+  else
+    ID = 0xABCDEF00;
+
+  bool HasNumPatchBytes =
+      AttrNumPatchBytes.isStringAttribute() &&
+      !AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes);
+
+  if (HasNumPatchBytes)
+    AttrsToRemove.addAttribute("statepoint-num-patch-bytes");
+  else
+    NumPatchBytes = 0;
+
+  OriginalAttrs = OriginalAttrs.removeAttributes(
+      CS.getInstruction()->getContext(), AttributeSet::FunctionIndex,
+      AttrsToRemove);
+
+  Value *StatepointTarget = NumPatchBytes == 0
+                                ? CS.getCalledValue()
+                                : ConstantPointerNull::get(cast<PointerType>(
+                                      CS.getCalledValue()->getType()));
+
+  if (CS.isCall()) {
+    CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
+    CallInst *Call = Builder.CreateGCStatepointCall(
+        ID, NumPatchBytes, StatepointTarget,
+        makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None,
+        "safepoint_token");
+    Call->setTailCall(ToReplace->isTailCall());
+    Call->setCallingConv(ToReplace->getCallingConv());
+
+    // In case if we can handle this set of attributes - set up function
+    // attributes directly on statepoint and return attributes later for
+    // gc_result intrinsic.
+    Call->setAttributes(OriginalAttrs.getFnAttributes());
+
+    Token = Call;
+
+    // Put the following gc_result and gc_relocate calls immediately after the
+    // the old call (which we're about to delete).
+    assert(ToReplace->getNextNode() && "not a terminator, must have next");
+    Builder.SetInsertPoint(ToReplace->getNextNode());
+    Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
+  } else if (CS.isInvoke()) {
+    InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
+
+    // Insert the new invoke into the old block.  We'll remove the old one in a
+    // moment at which point this will become the new terminator for the
+    // original block.
+    Builder.SetInsertPoint(ToReplace->getParent());
+    InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
+        ID, NumPatchBytes, StatepointTarget, ToReplace->getNormalDest(),
+        ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()),
+        None, None, "safepoint_token");
+
+    Invoke->setCallingConv(ToReplace->getCallingConv());
+
+    // In case if we can handle this set of attributes - set up function
+    // attributes directly on statepoint and return attributes later for
+    // gc_result intrinsic.
+    Invoke->setAttributes(OriginalAttrs.getFnAttributes());
+
+    Token = Invoke;
+
+    // We'll insert the gc.result into the normal block
+    BasicBlock *NormalDest = ToReplace->getNormalDest();
+    // Can not insert gc.result in case of phi nodes preset.
+    // Should have removed this cases prior to runnning this function
+    assert(!isa<PHINode>(NormalDest->begin()));
+    Instruction *IP = &*(NormalDest->getFirstInsertionPt());
+    Builder.SetInsertPoint(IP);
+  } else {
+    llvm_unreachable("unexpect type of CallSite");
+  }
+  assert(Token);
+
+  // Handle the return value of the original call - update all uses to use a
+  // gc_result hanging off the statepoint node we just inserted
+
+  // Only add the gc_result iff there is actually a used result
+  if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
+    std::string TakenName =
+        CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
+    CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), TakenName);
+    GCResult->setAttributes(OriginalAttrs.getRetAttributes());
+    return GCResult;
+  } else {
+    // No return value for the call.
+    return nullptr;
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 4e02255..b677523 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -59,7 +59,7 @@ namespace {
 }
 
 #ifndef NDEBUG
-/// PrintOps - Print out the expression identified in the Ops list.
+/// Print out the expression identified in the Ops list.
 ///
 static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
   Module *M = I->getParent()->getParent()->getParent();
@@ -233,8 +233,8 @@ INITIALIZE_PASS(Reassociate, "reassociate",
 // Public interface to the Reassociate pass
 FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
 
-/// isReassociableOp - Return true if V is an instruction of the specified
-/// opcode and if it only has one use.
+/// Return true if V is an instruction of the specified opcode and if it
+/// only has one use.
 static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
   if (V->hasOneUse() && isa<Instruction>(V) &&
       cast<Instruction>(V)->getOpcode() == Opcode &&
@@ -321,10 +321,8 @@ unsigned Reassociate::getRank(Value *V) {
 
   // If this is a not or neg instruction, do not count it for rank.  This
   // assures us that X and ~X will have the same rank.
-  Type *Ty = V->getType();
-  if ((!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) ||
-      (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
-       !BinaryOperator::isFNeg(I)))
+  if  (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
+       !BinaryOperator::isFNeg(I))
     ++Rank;
 
   DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n");
@@ -351,7 +349,7 @@ void Reassociate::canonicalizeOperands(Instruction *I) {
 
 static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
                                  Instruction *InsertBefore, Value *FlagsOp) {
-  if (S1->getType()->isIntegerTy())
+  if (S1->getType()->isIntOrIntVectorTy())
     return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
   else {
     BinaryOperator *Res =
@@ -363,7 +361,7 @@ static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
 
 static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
                                  Instruction *InsertBefore, Value *FlagsOp) {
-  if (S1->getType()->isIntegerTy())
+  if (S1->getType()->isIntOrIntVectorTy())
     return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
   else {
     BinaryOperator *Res =
@@ -375,7 +373,7 @@ static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
 
 static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
                                  Instruction *InsertBefore, Value *FlagsOp) {
-  if (S1->getType()->isIntegerTy())
+  if (S1->getType()->isIntOrIntVectorTy())
     return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
   else {
     BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore);
@@ -384,12 +382,11 @@ static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
   }
 }
 
-/// LowerNegateToMultiply - Replace 0-X with X*-1.
-///
+/// Replace 0-X with X*-1.
 static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
   Type *Ty = Neg->getType();
-  Constant *NegOne = Ty->isIntegerTy() ? ConstantInt::getAllOnesValue(Ty)
-                                       : ConstantFP::get(Ty, -1.0);
+  Constant *NegOne = Ty->isIntOrIntVectorTy() ?
+    ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
 
   BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg);
   Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
@@ -399,8 +396,8 @@ static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
   return Res;
 }
 
-/// CarmichaelShift - Returns k such that lambda(2^Bitwidth) = 2^k, where lambda
-/// is the Carmichael function. This means that x^(2^k) === 1 mod 2^Bitwidth for
+/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
+/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
 /// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
 /// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
 /// even x in Bitwidth-bit arithmetic.
@@ -410,7 +407,7 @@ static unsigned CarmichaelShift(unsigned Bitwidth) {
   return Bitwidth - 2;
 }
 
-/// IncorporateWeight - Add the extra weight 'RHS' to the existing weight 'LHS',
+/// Add the extra weight 'RHS' to the existing weight 'LHS',
 /// reducing the combined weight using any special properties of the operation.
 /// The existing weight LHS represents the computation X op X op ... op X where
 /// X occurs LHS times.  The combined weight represents  X op X op ... op X with
@@ -492,7 +489,7 @@ static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
 
 typedef std::pair<Value*, APInt> RepeatedValue;
 
-/// LinearizeExprTree - Given an associative binary expression, return the leaf
+/// Given an associative binary expression, return the leaf
 /// nodes in Ops along with their weights (how many times the leaf occurs).  The
 /// original expression is the same as
 ///   (Ops[0].first op Ops[0].first op ... Ops[0].first)  <- Ops[0].second times
@@ -742,8 +739,8 @@ static bool LinearizeExprTree(BinaryOperator *I,
   return Changed;
 }
 
-// RewriteExprTree - Now that the operands for this expression tree are
-// linearized and optimized, emit them in-order.
+/// Now that the operands for this expression tree are
+/// linearized and optimized, emit them in-order.
 void Reassociate::RewriteExprTree(BinaryOperator *I,
                                   SmallVectorImpl<ValueEntry> &Ops) {
   assert(Ops.size() > 1 && "Single values should be used directly!");
@@ -872,7 +869,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
       Constant *Undef = UndefValue::get(I->getType());
       NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
                                      Undef, Undef, "", I);
-      if (NewOp->getType()->isFloatingPointTy())
+      if (NewOp->getType()->isFPOrFPVectorTy())
         NewOp->setFastMathFlags(I->getFastMathFlags());
     } else {
       NewOp = NodesToRewrite.pop_back_val();
@@ -912,15 +909,18 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
     RedoInsts.insert(NodesToRewrite[i]);
 }
 
-/// NegateValue - Insert instructions before the instruction pointed to by BI,
+/// Insert instructions before the instruction pointed to by BI,
 /// that computes the negative version of the value specified.  The negative
 /// version of the value is returned, and BI is left pointing at the instruction
 /// that should be processed next by the reassociation pass.
 static Value *NegateValue(Value *V, Instruction *BI) {
-  if (ConstantFP *C = dyn_cast<ConstantFP>(V))
-    return ConstantExpr::getFNeg(C);
-  if (Constant *C = dyn_cast<Constant>(V))
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    if (C->getType()->isFPOrFPVectorTy()) {
+      return ConstantExpr::getFNeg(C);
+    }
     return ConstantExpr::getNeg(C);
+  }
+
 
   // We are trying to expose opportunity for reassociation.  One of the things
   // that we want to do to achieve this is to push a negation as deep into an
@@ -984,8 +984,7 @@ static Value *NegateValue(Value *V, Instruction *BI) {
   return CreateNeg(V, V->getName() + ".neg", BI, BI);
 }
 
-/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
-/// X-Y into (X + -Y).
+/// Return true if we should break up this subtract of X-Y into (X + -Y).
 static bool ShouldBreakUpSubtract(Instruction *Sub) {
   // If this is a negation, we can't split it up!
   if (BinaryOperator::isNeg(Sub) || BinaryOperator::isFNeg(Sub))
@@ -1014,9 +1013,8 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
   return false;
 }
 
-/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
-/// only used by an add, transform this into (X+(0-Y)) to promote better
-/// reassociation.
+/// If we have (X-Y), and if either X is an add, or if this is only used by an
+/// add, transform this into (X+(0-Y)) to promote better reassociation.
 static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
   // Convert a subtract into an add and a neg instruction. This allows sub
   // instructions to be commuted with other add instructions.
@@ -1038,9 +1036,8 @@ static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
   return New;
 }
 
-/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used
-/// by one, change this into a multiply by a constant to assist with further
-/// reassociation.
+/// If this is a shift of a reassociable multiply or is used by one, change
+/// this into a multiply by a constant to assist with further reassociation.
 static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
   Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
   MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
@@ -1065,10 +1062,9 @@ static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
   return Mul;
 }
 
-/// FindInOperandList - Scan backwards and forwards among values with the same
-/// rank as element i to see if X exists.  If X does not exist, return i.  This
-/// is useful when scanning for 'x' when we see '-x' because they both get the
-/// same rank.
+/// Scan backwards and forwards among values with the same rank as element i
+/// to see if X exists.  If X does not exist, return i.  This is useful when
+/// scanning for 'x' when we see '-x' because they both get the same rank.
 static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
                                   Value *X) {
   unsigned XRank = Ops[i].Rank;
@@ -1093,7 +1089,7 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
   return i;
 }
 
-/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together
+/// Emit a tree of add instructions, summing Ops together
 /// and returning the result.  Insert the tree before I.
 static Value *EmitAddTreeOfValues(Instruction *I,
                                   SmallVectorImpl<WeakVH> &Ops){
@@ -1105,8 +1101,8 @@ static Value *EmitAddTreeOfValues(Instruction *I,
   return CreateAdd(V2, V1, "tmp", I, I);
 }
 
-/// RemoveFactorFromExpression - If V is an expression tree that is a
-/// multiplication sequence, and if this sequence contains a multiply by Factor,
+/// If V is an expression tree that is a multiplication sequence,
+/// and if this sequence contains a multiply by Factor,
 /// remove Factor from the tree and return the new tree.
 Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
   BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
@@ -1178,8 +1174,8 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
   return V;
 }
 
-/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively
-/// add its operands as factors, otherwise add V to the list of factors.
+/// If V is a single-use multiply, recursively add its operands as factors,
+/// otherwise add V to the list of factors.
 ///
 /// Ops is the top-level list of add operands we're trying to factor.
 static void FindSingleUseMultiplyFactors(Value *V,
@@ -1196,10 +1192,9 @@ static void FindSingleUseMultiplyFactors(Value *V,
   FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops);
 }
 
-/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor'
-/// instruction.  This optimizes based on identities.  If it can be reduced to
-/// a single Value, it is returned, otherwise the Ops list is mutated as
-/// necessary.
+/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
+/// This optimizes based on identities.  If it can be reduced to a single Value,
+/// it is returned, otherwise the Ops list is mutated as necessary.
 static Value *OptimizeAndOrXor(unsigned Opcode,
                                SmallVectorImpl<ValueEntry> &Ops) {
   // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
@@ -1489,7 +1484,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
   return nullptr;
 }
 
-/// OptimizeAdd - Optimize a series of operands to an 'add' instruction.  This
+/// Optimize a series of operands to an 'add' instruction.  This
 /// optimizes based on identities.  If it can be reduced to a single Value, it
 /// is returned, otherwise the Ops list is mutated as necessary.
 Value *Reassociate::OptimizeAdd(Instruction *I,
@@ -1517,8 +1512,8 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
 
       // Insert a new multiply.
       Type *Ty = TheOp->getType();
-      Constant *C = Ty->isIntegerTy() ? ConstantInt::get(Ty, NumFound)
-                                      : ConstantFP::get(Ty, NumFound);
+      Constant *C = Ty->isIntOrIntVectorTy() ?
+        ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
       Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
 
       // Now that we have inserted a multiply, optimize it. This allows us to
@@ -1658,7 +1653,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
     // from an expression will drop a use of maxocc, and this can cause
     // RemoveFactorFromExpression on successive values to behave differently.
     Instruction *DummyInst =
-        I->getType()->isIntegerTy()
+        I->getType()->isIntOrIntVectorTy()
             ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
             : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
 
@@ -1789,7 +1784,7 @@ static Value *buildMultiplyTree(IRBuilder<> &Builder,
 
   Value *LHS = Ops.pop_back_val();
   do {
-    if (LHS->getType()->isIntegerTy())
+    if (LHS->getType()->isIntOrIntVectorTy())
       LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
     else
       LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
@@ -1942,8 +1937,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
   return nullptr;
 }
 
-/// EraseInst - Zap the given instruction, adding interesting operands to the
-/// work list.
+/// Zap the given instruction, adding interesting operands to the work list.
 void Reassociate::EraseInst(Instruction *I) {
   assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
   SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
@@ -1988,7 +1982,7 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) {
   Constant *C = C0 ? C0 : C1;
   unsigned ConstIdx = C0 ? 0 : 1;
   if (auto *CI = dyn_cast<ConstantInt>(C)) {
-    if (!CI->isNegative())
+    if (!CI->isNegative() || CI->isMinValue(true))
       return nullptr;
   } else if (auto *CF = dyn_cast<ConstantFP>(C)) {
     if (!CF->isNegative())
@@ -2057,7 +2051,7 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) {
   return NI;
 }
 
-/// OptimizeInst - Inspect and optimize the given instruction. Note that erasing
+/// Inspect and optimize the given instruction. Note that erasing
 /// instructions is not allowed.
 void Reassociate::OptimizeInst(Instruction *I) {
   // Only consider operations that we understand.
@@ -2087,8 +2081,9 @@ void Reassociate::OptimizeInst(Instruction *I) {
   if (I->isCommutative())
     canonicalizeOperands(I);
 
-  // Don't optimize vector instructions.
-  if (I->getType()->isVectorTy())
+  // TODO: We should optimize vector Xor instructions, but they are
+  // currently unsupported.
+  if (I->getType()->isVectorTy() && I->getOpcode() == Instruction::Xor)
     return;
 
   // Don't optimize floating point instructions that don't have unsafe algebra.
@@ -2167,9 +2162,6 @@ void Reassociate::OptimizeInst(Instruction *I) {
 }
 
 void Reassociate::ReassociateExpression(BinaryOperator *I) {
-  assert(!I->getType()->isVectorTy() &&
-         "Reassociation of vector instructions is not supported.");
-
   // First, walk the expression tree, linearizing the tree, collecting the
   // operand information.
   SmallVector<RepeatedValue, 8> Tree;
@@ -2192,7 +2184,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
   // the vector.
   std::stable_sort(Ops.begin(), Ops.end());
 
-  // OptimizeExpression - Now that we have the expression tree in a convenient
+  // Now that we have the expression tree in a convenient
   // sorted form, optimize it globally if possible.
   if (Value *V = OptimizeExpression(I, Ops)) {
     if (V == I)
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
new file mode 100644
index 0000000..6cf765a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -0,0 +1,2506 @@
+//===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Rewrite an existing set of gc.statepoints such that they make potential
+// relocations performed by the garbage collector explicit in the IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+
+#define DEBUG_TYPE "rewrite-statepoints-for-gc"
+
+using namespace llvm;
+
+// Print tracing output
+static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden,
+                              cl::init(false));
+
+// Print the liveset found at the insert location
+static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
+                                  cl::init(false));
+static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
+                                      cl::init(false));
+// Print out the base pointers for debugging
+static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
+                                       cl::init(false));
+
+// Cost threshold measuring when it is profitable to rematerialize value instead
+// of relocating it
+static cl::opt<unsigned>
+RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden,
+                           cl::init(6));
+
+#ifdef XDEBUG
+static bool ClobberNonLive = true;
+#else
+static bool ClobberNonLive = false;
+#endif
+static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
+                                                  cl::location(ClobberNonLive),
+                                                  cl::Hidden);
+
+namespace {
+struct RewriteStatepointsForGC : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+
+  RewriteStatepointsForGC() : FunctionPass(ID) {
+    initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry());
+  }
+  bool runOnFunction(Function &F) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    // We add and rewrite a bunch of instructions, but don't really do much
+    // else.  We could in theory preserve a lot more analyses here.
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+  }
+};
+} // namespace
+
+char RewriteStatepointsForGC::ID = 0;
+
+FunctionPass *llvm::createRewriteStatepointsForGCPass() {
+  return new RewriteStatepointsForGC();
+}
+
+INITIALIZE_PASS_BEGIN(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
+                      "Make relocations explicit at statepoints", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
+                    "Make relocations explicit at statepoints", false, false)
+
+namespace {
+struct GCPtrLivenessData {
+  /// Values defined in this block.
+  DenseMap<BasicBlock *, DenseSet<Value *>> KillSet;
+  /// Values used in this block (and thus live); does not included values
+  /// killed within this block.
+  DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet;
+
+  /// Values live into this basic block (i.e. used by any
+  /// instruction in this basic block or ones reachable from here)
+  DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn;
+
+  /// Values live out of this basic block (i.e. live into
+  /// any successor block)
+  DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut;
+};
+
+// The type of the internal cache used inside the findBasePointers family
+// of functions.  From the callers perspective, this is an opaque type and
+// should not be inspected.
+//
+// In the actual implementation this caches two relations:
+// - The base relation itself (i.e. this pointer is based on that one)
+// - The base defining value relation (i.e. before base_phi insertion)
+// Generally, after the execution of a full findBasePointer call, only the
+// base relation will remain.  Internally, we add a mixture of the two
+// types, then update all the second type to the first type
+typedef DenseMap<Value *, Value *> DefiningValueMapTy;
+typedef DenseSet<llvm::Value *> StatepointLiveSetTy;
+typedef DenseMap<Instruction *, Value *> RematerializedValueMapTy;
+
+struct PartiallyConstructedSafepointRecord {
+  /// The set of values known to be live accross this safepoint
+  StatepointLiveSetTy liveset;
+
+  /// Mapping from live pointers to a base-defining-value
+  DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
+
+  /// The *new* gc.statepoint instruction itself.  This produces the token
+  /// that normal path gc.relocates and the gc.result are tied to.
+  Instruction *StatepointToken;
+
+  /// Instruction to which exceptional gc relocates are attached
+  /// Makes it easier to iterate through them during relocationViaAlloca.
+  Instruction *UnwindToken;
+
+  /// Record live values we are rematerialized instead of relocating.
+  /// They are not included into 'liveset' field.
+  /// Maps rematerialized copy to it's original value.
+  RematerializedValueMapTy RematerializedValues;
+};
+}
+
+/// Compute the live-in set for every basic block in the function
+static void computeLiveInValues(DominatorTree &DT, Function &F,
+                                GCPtrLivenessData &Data);
+
+/// Given results from the dataflow liveness computation, find the set of live
+/// Values at a particular instruction.
+static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
+                              StatepointLiveSetTy &out);
+
+// TODO: Once we can get to the GCStrategy, this becomes
+// Optional<bool> isGCManagedPointer(const Value *V) const override {
+
+static bool isGCPointerType(const Type *T) {
+  if (const PointerType *PT = dyn_cast<PointerType>(T))
+    // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
+    // GC managed heap.  We know that a pointer into this heap needs to be
+    // updated and that no other pointer does.
+    return (1 == PT->getAddressSpace());
+  return false;
+}
+
+// Return true if this type is one which a) is a gc pointer or contains a GC
+// pointer and b) is of a type this code expects to encounter as a live value.
+// (The insertion code will assert that a type which matches (a) and not (b)
+// is not encountered.)
+static bool isHandledGCPointerType(Type *T) {
+  // We fully support gc pointers
+  if (isGCPointerType(T))
+    return true;
+  // We partially support vectors of gc pointers. The code will assert if it
+  // can't handle something.
+  if (auto VT = dyn_cast<VectorType>(T))
+    if (isGCPointerType(VT->getElementType()))
+      return true;
+  return false;
+}
+
+#ifndef NDEBUG
+/// Returns true if this type contains a gc pointer whether we know how to
+/// handle that type or not.
+static bool containsGCPtrType(Type *Ty) {
+  if (isGCPointerType(Ty))
+    return true;
+  if (VectorType *VT = dyn_cast<VectorType>(Ty))
+    return isGCPointerType(VT->getScalarType());
+  if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
+    return containsGCPtrType(AT->getElementType());
+  if (StructType *ST = dyn_cast<StructType>(Ty))
+    return std::any_of(
+        ST->subtypes().begin(), ST->subtypes().end(),
+        [](Type *SubType) { return containsGCPtrType(SubType); });
+  return false;
+}
+
+// Returns true if this is a type which a) is a gc pointer or contains a GC
+// pointer and b) is of a type which the code doesn't expect (i.e. first class
+// aggregates).  Used to trip assertions.
+static bool isUnhandledGCPointerType(Type *Ty) {
+  return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);
+}
+#endif
+
+static bool order_by_name(llvm::Value *a, llvm::Value *b) {
+  if (a->hasName() && b->hasName()) {
+    return -1 == a->getName().compare(b->getName());
+  } else if (a->hasName() && !b->hasName()) {
+    return true;
+  } else if (!a->hasName() && b->hasName()) {
+    return false;
+  } else {
+    // Better than nothing, but not stable
+    return a < b;
+  }
+}
+
+// Conservatively identifies any definitions which might be live at the
+// given instruction. The  analysis is performed immediately before the
+// given instruction. Values defined by that instruction are not considered
+// live.  Values used by that instruction are considered live.
+static void analyzeParsePointLiveness(
+    DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData,
+    const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
+  Instruction *inst = CS.getInstruction();
+
+  StatepointLiveSetTy liveset;
+  findLiveSetAtInst(inst, OriginalLivenessData, liveset);
+
+  if (PrintLiveSet) {
+    // Note: This output is used by several of the test cases
+    // The order of elemtns in a set is not stable, put them in a vec and sort
+    // by name
+    SmallVector<Value *, 64> temp;
+    temp.insert(temp.end(), liveset.begin(), liveset.end());
+    std::sort(temp.begin(), temp.end(), order_by_name);
+    errs() << "Live Variables:\n";
+    for (Value *V : temp) {
+      errs() << " " << V->getName(); // no newline
+      V->dump();
+    }
+  }
+  if (PrintLiveSetSize) {
+    errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
+    errs() << "Number live values: " << liveset.size() << "\n";
+  }
+  result.liveset = liveset;
+}
+
+static Value *findBaseDefiningValue(Value *I);
+
+/// If we can trivially determine that the index specified in the given vector
+/// is a base pointer, return it.  In cases where the entire vector is known to
+/// consist of base pointers, the entire vector will be returned.  This
+/// indicates that the relevant extractelement is a valid base pointer and
+/// should be used directly.
+static Value *findBaseOfVector(Value *I, Value *Index) {
+  assert(I->getType()->isVectorTy() &&
+         cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
+         "Illegal to ask for the base pointer of a non-pointer type");
+
+  // Each case parallels findBaseDefiningValue below, see that code for
+  // detailed motivation.
+
+  if (isa<Argument>(I))
+    // An incoming argument to the function is a base pointer
+    return I;
+
+  // We shouldn't see the address of a global as a vector value?
+  assert(!isa<GlobalVariable>(I) &&
+         "unexpected global variable found in base of vector");
+
+  // inlining could possibly introduce phi node that contains
+  // undef if callee has multiple returns
+  if (isa<UndefValue>(I))
+    // utterly meaningless, but useful for dealing with partially optimized
+    // code.
+    return I;
+
+  // Due to inheritance, this must be _after_ the global variable and undef
+  // checks
+  if (Constant *Con = dyn_cast<Constant>(I)) {
+    assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
+           "order of checks wrong!");
+    assert(Con->isNullValue() && "null is the only case which makes sense");
+    return Con;
+  }
+
+  if (isa<LoadInst>(I))
+    return I;
+
+  // For an insert element, we might be able to look through it if we know
+  // something about the indexes, but if the indices are arbitrary values, we
+  // can't without much more extensive scalarization.
+  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) {
+    Value *InsertIndex = IEI->getOperand(2);
+    // This index is inserting the value, look for it's base
+    if (InsertIndex == Index)
+      return findBaseDefiningValue(IEI->getOperand(1));
+    // Both constant, and can't be equal per above. This insert is definitely
+    // not relevant, look back at the rest of the vector and keep trying.
+    if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex))
+      return findBaseOfVector(IEI->getOperand(0), Index);
+  }
+
+  // Note: This code is currently rather incomplete.  We are essentially only
+  // handling cases where the vector element is trivially a base pointer.  We
+  // need to update the entire base pointer construction algorithm to know how
+  // to track vector elements and potentially scalarize, but the case which
+  // would motivate the work hasn't shown up in real workloads yet.
+  llvm_unreachable("no base found for vector element");
+}
+
+/// Helper function for findBasePointer - Will return a value which either a)
+/// defines the base pointer for the input or b) blocks the simple search
+/// (i.e. a PHI or Select of two derived pointers)
+static Value *findBaseDefiningValue(Value *I) {
+  assert(I->getType()->isPointerTy() &&
+         "Illegal to ask for the base pointer of a non-pointer type");
+
+  // This case is a bit of a hack - it only handles extracts from vectors which
+  // trivially contain only base pointers or cases where we can directly match
+  // the index of the original extract element to an insertion into the vector.
+  // See note inside the function for how to improve this.
+  if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
+    Value *VectorOperand = EEI->getVectorOperand();
+    Value *Index = EEI->getIndexOperand();
+    Value *VectorBase = findBaseOfVector(VectorOperand, Index);
+    // If the result returned is a vector, we know the entire vector must
+    // contain base pointers.  In that case, the extractelement is a valid base
+    // for this value.
+    if (VectorBase->getType()->isVectorTy())
+      return EEI;
+    // Otherwise, we needed to look through the vector to find the base for
+    // this particular element.
+    assert(VectorBase->getType()->isPointerTy());
+    return VectorBase;
+  }
+
+  if (isa<Argument>(I))
+    // An incoming argument to the function is a base pointer
+    // We should have never reached here if this argument isn't an gc value
+    return I;
+
+  if (isa<GlobalVariable>(I))
+    // base case
+    return I;
+
+  // inlining could possibly introduce phi node that contains
+  // undef if callee has multiple returns
+  if (isa<UndefValue>(I))
+    // utterly meaningless, but useful for dealing with
+    // partially optimized code.
+    return I;
+
+  // Due to inheritance, this must be _after_ the global variable and undef
+  // checks
+  if (Constant *Con = dyn_cast<Constant>(I)) {
+    assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
+           "order of checks wrong!");
+    // Note: Finding a constant base for something marked for relocation
+    // doesn't really make sense.  The most likely case is either a) some
+    // screwed up the address space usage or b) your validating against
+    // compiled C++ code w/o the proper separation.  The only real exception
+    // is a null pointer.  You could have generic code written to index of
+    // off a potentially null value and have proven it null.  We also use
+    // null pointers in dead paths of relocation phis (which we might later
+    // want to find a base pointer for).
+    assert(isa<ConstantPointerNull>(Con) &&
+           "null is the only case which makes sense");
+    return Con;
+  }
+
+  if (CastInst *CI = dyn_cast<CastInst>(I)) {
+    Value *Def = CI->stripPointerCasts();
+    // If we find a cast instruction here, it means we've found a cast which is
+    // not simply a pointer cast (i.e. an inttoptr).  We don't know how to
+    // handle int->ptr conversion.
+    assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
+    return findBaseDefiningValue(Def);
+  }
+
+  if (isa<LoadInst>(I))
+    return I; // The value loaded is an gc base itself
+
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+    // The base of this GEP is the base
+    return findBaseDefiningValue(GEP->getPointerOperand());
+
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::experimental_gc_result_ptr:
+    default:
+      // fall through to general call handling
+      break;
+    case Intrinsic::experimental_gc_statepoint:
+    case Intrinsic::experimental_gc_result_float:
+    case Intrinsic::experimental_gc_result_int:
+      llvm_unreachable("these don't produce pointers");
+    case Intrinsic::experimental_gc_relocate: {
+      // Rerunning safepoint insertion after safepoints are already
+      // inserted is not supported.  It could probably be made to work,
+      // but why are you doing this?  There's no good reason.
+      llvm_unreachable("repeat safepoint insertion is not supported");
+    }
+    case Intrinsic::gcroot:
+      // Currently, this mechanism hasn't been extended to work with gcroot.
+      // There's no reason it couldn't be, but I haven't thought about the
+      // implications much.
+      llvm_unreachable(
+          "interaction with the gcroot mechanism is not supported");
+    }
+  }
+  // We assume that functions in the source language only return base
+  // pointers.  This should probably be generalized via attributes to support
+  // both source language and internal functions.
+  if (isa<CallInst>(I) || isa<InvokeInst>(I))
+    return I;
+
+  // I have absolutely no idea how to implement this part yet.  It's not
+  // neccessarily hard, I just haven't really looked at it yet.
+  assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
+
+  if (isa<AtomicCmpXchgInst>(I))
+    // A CAS is effectively a atomic store and load combined under a
+    // predicate.  From the perspective of base pointers, we just treat it
+    // like a load.
+    return I;
+
+  assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
+                                   "binary ops which don't apply to pointers");
+
+  // The aggregate ops.  Aggregates can either be in the heap or on the
+  // stack, but in either case, this is simply a field load.  As a result,
+  // this is a defining definition of the base just like a load is.
+  if (isa<ExtractValueInst>(I))
+    return I;
+
+  // We should never see an insert vector since that would require we be
+  // tracing back a struct value not a pointer value.
+  assert(!isa<InsertValueInst>(I) &&
+         "Base pointer for a struct is meaningless");
+
+  // The last two cases here don't return a base pointer.  Instead, they
+  // return a value which dynamically selects from amoung several base
+  // derived pointers (each with it's own base potentially).  It's the job of
+  // the caller to resolve these.
+  assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
+         "missing instruction case in findBaseDefiningValing");
+  return I;
+}
+
+/// Returns the base defining value for this value.
+static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
+  Value *&Cached = Cache[I];
+  if (!Cached) {
+    Cached = findBaseDefiningValue(I);
+  }
+  assert(Cache[I] != nullptr);
+
+  if (TraceLSP) {
+    dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
+           << "\n";
+  }
+  return Cached;
+}
+
+/// Return a base pointer for this value if known.  Otherwise, return it's
+/// base defining value.
+static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
+  Value *Def = findBaseDefiningValueCached(I, Cache);
+  auto Found = Cache.find(Def);
+  if (Found != Cache.end()) {
+    // Either a base-of relation, or a self reference.  Caller must check.
+    return Found->second;
+  }
+  // Only a BDV available
+  return Def;
+}
+
+/// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
+/// is it known to be a base pointer?  Or do we need to continue searching.
+static bool isKnownBaseResult(Value *V) {
+  if (!isa<PHINode>(V) && !isa<SelectInst>(V)) {
+    // no recursion possible
+    return true;
+  }
+  if (isa<Instruction>(V) &&
+      cast<Instruction>(V)->getMetadata("is_base_value")) {
+    // This is a previously inserted base phi or select.  We know
+    // that this is a base value.
+    return true;
+  }
+
+  // We need to keep searching
+  return false;
+}
+
+// TODO: find a better name for this
+namespace {
+class PhiState {
+public:
+  enum Status { Unknown, Base, Conflict };
+
+  PhiState(Status s, Value *b = nullptr) : status(s), base(b) {
+    assert(status != Base || b);
+  }
+  PhiState(Value *b) : status(Base), base(b) {}
+  PhiState() : status(Unknown), base(nullptr) {}
+
+  Status getStatus() const { return status; }
+  Value *getBase() const { return base; }
+
+  bool isBase() const { return getStatus() == Base; }
+  bool isUnknown() const { return getStatus() == Unknown; }
+  bool isConflict() const { return getStatus() == Conflict; }
+
+  bool operator==(const PhiState &other) const {
+    return base == other.base && status == other.status;
+  }
+
+  bool operator!=(const PhiState &other) const { return !(*this == other); }
+
+  void dump() {
+    errs() << status << " (" << base << " - "
+           << (base ? base->getName() : "nullptr") << "): ";
+  }
+
+private:
+  Status status;
+  Value *base; // non null only if status == base
+};
+
+typedef DenseMap<Value *, PhiState> ConflictStateMapTy;
+// Values of type PhiState form a lattice, and this is a helper
+// class that implementes the meet operation.  The meat of the meet
+// operation is implemented in MeetPhiStates::pureMeet
+class MeetPhiStates {
+public:
+  // phiStates is a mapping from PHINodes and SelectInst's to PhiStates.
+  explicit MeetPhiStates(const ConflictStateMapTy &phiStates)
+      : phiStates(phiStates) {}
+
+  // Destructively meet the current result with the base V.  V can
+  // either be a merge instruction (SelectInst / PHINode), in which
+  // case its status is looked up in the phiStates map; or a regular
+  // SSA value, in which case it is assumed to be a base.
+  void meetWith(Value *V) {
+    PhiState otherState = getStateForBDV(V);
+    assert((MeetPhiStates::pureMeet(otherState, currentResult) ==
+            MeetPhiStates::pureMeet(currentResult, otherState)) &&
+           "math is wrong: meet does not commute!");
+    currentResult = MeetPhiStates::pureMeet(otherState, currentResult);
+  }
+
+  PhiState getResult() const { return currentResult; }
+
+private:
+  const ConflictStateMapTy &phiStates;
+  PhiState currentResult;
+
+  /// Return a phi state for a base defining value.  We'll generate a new
+  /// base state for known bases and expect to find a cached state otherwise
+  PhiState getStateForBDV(Value *baseValue) {
+    if (isKnownBaseResult(baseValue)) {
+      return PhiState(baseValue);
+    } else {
+      return lookupFromMap(baseValue);
+    }
+  }
+
+  PhiState lookupFromMap(Value *V) {
+    auto I = phiStates.find(V);
+    assert(I != phiStates.end() && "lookup failed!");
+    return I->second;
+  }
+
+  static PhiState pureMeet(const PhiState &stateA, const PhiState &stateB) {
+    switch (stateA.getStatus()) {
+    case PhiState::Unknown:
+      return stateB;
+
+    case PhiState::Base:
+      assert(stateA.getBase() && "can't be null");
+      if (stateB.isUnknown())
+        return stateA;
+
+      if (stateB.isBase()) {
+        if (stateA.getBase() == stateB.getBase()) {
+          assert(stateA == stateB && "equality broken!");
+          return stateA;
+        }
+        return PhiState(PhiState::Conflict);
+      }
+      assert(stateB.isConflict() && "only three states!");
+      return PhiState(PhiState::Conflict);
+
+    case PhiState::Conflict:
+      return stateA;
+    }
+    llvm_unreachable("only three states!");
+  }
+};
+}
+/// For a given value or instruction, figure out what base ptr it's derived
+/// from.  For gc objects, this is simply itself.  On success, returns a value
+/// which is the base pointer.  (This is reliable and can be used for
+/// relocation.)  On failure, returns nullptr.
+static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
+  Value *def = findBaseOrBDV(I, cache);
+
+  if (isKnownBaseResult(def)) {
+    return def;
+  }
+
+  // Here's the rough algorithm:
+  // - For every SSA value, construct a mapping to either an actual base
+  //   pointer or a PHI which obscures the base pointer.
+  // - Construct a mapping from PHI to unknown TOP state.  Use an
+  //   optimistic algorithm to propagate base pointer information.  Lattice
+  //   looks like:
+  //   UNKNOWN
+  //   b1 b2 b3 b4
+  //   CONFLICT
+  //   When algorithm terminates, all PHIs will either have a single concrete
+  //   base or be in a conflict state.
+  // - For every conflict, insert a dummy PHI node without arguments.  Add
+  //   these to the base[Instruction] = BasePtr mapping.  For every
+  //   non-conflict, add the actual base.
+  //  - For every conflict, add arguments for the base[a] of each input
+  //   arguments.
+  //
+  // Note: A simpler form of this would be to add the conflict form of all
+  // PHIs without running the optimistic algorithm.  This would be
+  // analougous to pessimistic data flow and would likely lead to an
+  // overall worse solution.
+
+  ConflictStateMapTy states;
+  states[def] = PhiState();
+  // Recursively fill in all phis & selects reachable from the initial one
+  // for which we don't already know a definite base value for
+  // TODO: This should be rewritten with a worklist
+  bool done = false;
+  while (!done) {
+    done = true;
+    // Since we're adding elements to 'states' as we run, we can't keep
+    // iterators into the set.
+    SmallVector<Value *, 16> Keys;
+    Keys.reserve(states.size());
+    for (auto Pair : states) {
+      Value *V = Pair.first;
+      Keys.push_back(V);
+    }
+    for (Value *v : Keys) {
+      assert(!isKnownBaseResult(v) && "why did it get added?");
+      if (PHINode *phi = dyn_cast<PHINode>(v)) {
+        assert(phi->getNumIncomingValues() > 0 &&
+               "zero input phis are illegal");
+        for (Value *InVal : phi->incoming_values()) {
+          Value *local = findBaseOrBDV(InVal, cache);
+          if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
+            states[local] = PhiState();
+            done = false;
+          }
+        }
+      } else if (SelectInst *sel = dyn_cast<SelectInst>(v)) {
+        Value *local = findBaseOrBDV(sel->getTrueValue(), cache);
+        if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
+          states[local] = PhiState();
+          done = false;
+        }
+        local = findBaseOrBDV(sel->getFalseValue(), cache);
+        if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
+          states[local] = PhiState();
+          done = false;
+        }
+      }
+    }
+  }
+
+  if (TraceLSP) {
+    errs() << "States after initialization:\n";
+    for (auto Pair : states) {
+      Instruction *v = cast<Instruction>(Pair.first);
+      PhiState state = Pair.second;
+      state.dump();
+      v->dump();
+    }
+  }
+
+  // TODO: come back and revisit the state transitions around inputs which
+  // have reached conflict state.  The current version seems too conservative.
+
+  bool progress = true;
+  while (progress) {
+#ifndef NDEBUG
+    size_t oldSize = states.size();
+#endif
+    progress = false;
+    // We're only changing keys in this loop, thus safe to keep iterators
+    for (auto Pair : states) {
+      MeetPhiStates calculateMeet(states);
+      Value *v = Pair.first;
+      assert(!isKnownBaseResult(v) && "why did it get added?");
+      if (SelectInst *select = dyn_cast<SelectInst>(v)) {
+        calculateMeet.meetWith(findBaseOrBDV(select->getTrueValue(), cache));
+        calculateMeet.meetWith(findBaseOrBDV(select->getFalseValue(), cache));
+      } else
+        for (Value *Val : cast<PHINode>(v)->incoming_values())
+          calculateMeet.meetWith(findBaseOrBDV(Val, cache));
+
+      PhiState oldState = states[v];
+      PhiState newState = calculateMeet.getResult();
+      if (oldState != newState) {
+        progress = true;
+        states[v] = newState;
+      }
+    }
+
+    assert(oldSize <= states.size());
+    assert(oldSize == states.size() || progress);
+  }
+
+  if (TraceLSP) {
+    errs() << "States after meet iteration:\n";
+    for (auto Pair : states) {
+      Instruction *v = cast<Instruction>(Pair.first);
+      PhiState state = Pair.second;
+      state.dump();
+      v->dump();
+    }
+  }
+
+  // Insert Phis for all conflicts
+  // We want to keep naming deterministic in the loop that follows, so
+  // sort the keys before iteration.  This is useful in allowing us to
+  // write stable tests. Note that there is no invalidation issue here.
+  SmallVector<Value *, 16> Keys;
+  Keys.reserve(states.size());
+  for (auto Pair : states) {
+    Value *V = Pair.first;
+    Keys.push_back(V);
+  }
+  std::sort(Keys.begin(), Keys.end(), order_by_name);
+  // TODO: adjust naming patterns to avoid this order of iteration dependency
+  for (Value *V : Keys) {
+    Instruction *v = cast<Instruction>(V);
+    PhiState state = states[V];
+    assert(!isKnownBaseResult(v) && "why did it get added?");
+    assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
+    if (!state.isConflict())
+      continue;
+
+    if (isa<PHINode>(v)) {
+      int num_preds =
+          std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
+      assert(num_preds > 0 && "how did we reach here");
+      PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
+      // Add metadata marking this as a base value
+      auto *const_1 = ConstantInt::get(
+          Type::getInt32Ty(
+              v->getParent()->getParent()->getParent()->getContext()),
+          1);
+      auto MDConst = ConstantAsMetadata::get(const_1);
+      MDNode *md = MDNode::get(
+          v->getParent()->getParent()->getParent()->getContext(), MDConst);
+      phi->setMetadata("is_base_value", md);
+      states[v] = PhiState(PhiState::Conflict, phi);
+    } else {
+      SelectInst *sel = cast<SelectInst>(v);
+      // The undef will be replaced later
+      UndefValue *undef = UndefValue::get(sel->getType());
+      SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
+                                               undef, "base_select", sel);
+      // Add metadata marking this as a base value
+      auto *const_1 = ConstantInt::get(
+          Type::getInt32Ty(
+              v->getParent()->getParent()->getParent()->getContext()),
+          1);
+      auto MDConst = ConstantAsMetadata::get(const_1);
+      MDNode *md = MDNode::get(
+          v->getParent()->getParent()->getParent()->getContext(), MDConst);
+      basesel->setMetadata("is_base_value", md);
+      states[v] = PhiState(PhiState::Conflict, basesel);
+    }
+  }
+
+  // Fixup all the inputs of the new PHIs
+  for (auto Pair : states) {
+    Instruction *v = cast<Instruction>(Pair.first);
+    PhiState state = Pair.second;
+
+    assert(!isKnownBaseResult(v) && "why did it get added?");
+    assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
+    if (!state.isConflict())
+      continue;
+
+    if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
+      PHINode *phi = cast<PHINode>(v);
+      unsigned NumPHIValues = phi->getNumIncomingValues();
+      for (unsigned i = 0; i < NumPHIValues; i++) {
+        Value *InVal = phi->getIncomingValue(i);
+        BasicBlock *InBB = phi->getIncomingBlock(i);
+
+        // If we've already seen InBB, add the same incoming value
+        // we added for it earlier.  The IR verifier requires phi
+        // nodes with multiple entries from the same basic block
+        // to have the same incoming value for each of those
+        // entries.  If we don't do this check here and basephi
+        // has a different type than base, we'll end up adding two
+        // bitcasts (and hence two distinct values) as incoming
+        // values for the same basic block.
+
+        int blockIndex = basephi->getBasicBlockIndex(InBB);
+        if (blockIndex != -1) {
+          Value *oldBase = basephi->getIncomingValue(blockIndex);
+          basephi->addIncoming(oldBase, InBB);
+#ifndef NDEBUG
+          Value *base = findBaseOrBDV(InVal, cache);
+          if (!isKnownBaseResult(base)) {
+            // Either conflict or base.
+            assert(states.count(base));
+            base = states[base].getBase();
+            assert(base != nullptr && "unknown PhiState!");
+          }
+
+          // In essense this assert states: the only way two
+          // values incoming from the same basic block may be
+          // different is by being different bitcasts of the same
+          // value.  A cleanup that remains TODO is changing
+          // findBaseOrBDV to return an llvm::Value of the correct
+          // type (and still remain pure).  This will remove the
+          // need to add bitcasts.
+          assert(base->stripPointerCasts() == oldBase->stripPointerCasts() &&
+                 "sanity -- findBaseOrBDV should be pure!");
+#endif
+          continue;
+        }
+
+        // Find either the defining value for the PHI or the normal base for
+        // a non-phi node
+        Value *base = findBaseOrBDV(InVal, cache);
+        if (!isKnownBaseResult(base)) {
+          // Either conflict or base.
+          assert(states.count(base));
+          base = states[base].getBase();
+          assert(base != nullptr && "unknown PhiState!");
+        }
+        assert(base && "can't be null");
+        // Must use original input BB since base may not be Instruction
+        // The cast is needed since base traversal may strip away bitcasts
+        if (base->getType() != basephi->getType()) {
+          base = new BitCastInst(base, basephi->getType(), "cast",
+                                 InBB->getTerminator());
+        }
+        basephi->addIncoming(base, InBB);
+      }
+      assert(basephi->getNumIncomingValues() == NumPHIValues);
+    } else {
+      SelectInst *basesel = cast<SelectInst>(state.getBase());
+      SelectInst *sel = cast<SelectInst>(v);
+      // Operand 1 & 2 are true, false path respectively. TODO: refactor to
+      // something more safe and less hacky.
+      for (int i = 1; i <= 2; i++) {
+        Value *InVal = sel->getOperand(i);
+        // Find either the defining value for the PHI or the normal base for
+        // a non-phi node
+        Value *base = findBaseOrBDV(InVal, cache);
+        if (!isKnownBaseResult(base)) {
+          // Either conflict or base.
+          assert(states.count(base));
+          base = states[base].getBase();
+          assert(base != nullptr && "unknown PhiState!");
+        }
+        assert(base && "can't be null");
+        // Must use original input BB since base may not be Instruction
+        // The cast is needed since base traversal may strip away bitcasts
+        if (base->getType() != basesel->getType()) {
+          base = new BitCastInst(base, basesel->getType(), "cast", basesel);
+        }
+        basesel->setOperand(i, base);
+      }
+    }
+  }
+
+  // Cache all of our results so we can cheaply reuse them
+  // NOTE: This is actually two caches: one of the base defining value
+  // relation and one of the base pointer relation!  FIXME
+  for (auto item : states) {
+    Value *v = item.first;
+    Value *base = item.second.getBase();
+    assert(v && base);
+    assert(!isKnownBaseResult(v) && "why did it get added?");
+
+    if (TraceLSP) {
+      std::string fromstr =
+          cache.count(v) ? (cache[v]->hasName() ? cache[v]->getName() : "")
+                         : "none";
+      errs() << "Updating base value cache"
+             << " for: " << (v->hasName() ? v->getName() : "")
+             << " from: " << fromstr
+             << " to: " << (base->hasName() ? base->getName() : "") << "\n";
+    }
+
+    assert(isKnownBaseResult(base) &&
+           "must be something we 'know' is a base pointer");
+    if (cache.count(v)) {
+      // Once we transition from the BDV relation being store in the cache to
+      // the base relation being stored, it must be stable
+      assert((!isKnownBaseResult(cache[v]) || cache[v] == base) &&
+             "base relation should be stable");
+    }
+    cache[v] = base;
+  }
+  assert(cache.find(def) != cache.end());
+  return cache[def];
+}
+
+// For a set of live pointers (base and/or derived), identify the base
+// pointer of the object which they are derived from.  This routine will
+// mutate the IR graph as needed to make the 'base' pointer live at the
+// definition site of 'derived'.  This ensures that any use of 'derived' can
+// also use 'base'.  This may involve the insertion of a number of
+// additional PHI nodes.
+//
+// preconditions: live is a set of pointer type Values
+//
+// side effects: may insert PHI nodes into the existing CFG, will preserve
+// CFG, will not remove or mutate any existing nodes
+//
+// post condition: PointerToBase contains one (derived, base) pair for every
+// pointer in live.  Note that derived can be equal to base if the original
+// pointer was a base pointer.
+static void
+findBasePointers(const StatepointLiveSetTy &live,
+                 DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
+                 DominatorTree *DT, DefiningValueMapTy &DVCache) {
+  // For the naming of values inserted to be deterministic - which makes for
+  // much cleaner and more stable tests - we need to assign an order to the
+  // live values.  DenseSets do not provide a deterministic order across runs.
+  SmallVector<Value *, 64> Temp;
+  Temp.insert(Temp.end(), live.begin(), live.end());
+  std::sort(Temp.begin(), Temp.end(), order_by_name);
+  for (Value *ptr : Temp) {
+    Value *base = findBasePointer(ptr, DVCache);
+    assert(base && "failed to find base pointer");
+    PointerToBase[ptr] = base;
+    assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
+            DT->dominates(cast<Instruction>(base)->getParent(),
+                          cast<Instruction>(ptr)->getParent())) &&
+           "The base we found better dominate the derived pointer");
+
+    // If you see this trip and like to live really dangerously, the code should
+    // be correct, just with idioms the verifier can't handle.  You can try
+    // disabling the verifier at your own substaintial risk.
+    assert(!isa<ConstantPointerNull>(base) &&
+           "the relocation code needs adjustment to handle the relocation of "
+           "a null pointer constant without causing false positives in the "
+           "safepoint ir verifier.");
+  }
+}
+
+/// Find the required based pointers (and adjust the live set) for the given
+/// parse point.
+static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
+                             const CallSite &CS,
+                             PartiallyConstructedSafepointRecord &result) {
+  DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
+  findBasePointers(result.liveset, PointerToBase, &DT, DVCache);
+
+  if (PrintBasePointers) {
+    // Note: Need to print these in a stable order since this is checked in
+    // some tests.
+    errs() << "Base Pairs (w/o Relocation):\n";
+    SmallVector<Value *, 64> Temp;
+    Temp.reserve(PointerToBase.size());
+    for (auto Pair : PointerToBase) {
+      Temp.push_back(Pair.first);
+    }
+    std::sort(Temp.begin(), Temp.end(), order_by_name);
+    for (Value *Ptr : Temp) {
+      Value *Base = PointerToBase[Ptr];
+      errs() << " derived %" << Ptr->getName() << " base %" << Base->getName()
+             << "\n";
+    }
+  }
+
+  result.PointerToBase = PointerToBase;
+}
+
+/// Given an updated version of the dataflow liveness results, update the
+/// liveset and base pointer maps for the call site CS.
+static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
+                                  const CallSite &CS,
+                                  PartiallyConstructedSafepointRecord &result);
+
+static void recomputeLiveInValues(
+    Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+    MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+  // TODO-PERF: reuse the original liveness, then simply run the dataflow
+  // again.  The old values are still live and will help it stablize quickly.
+  GCPtrLivenessData RevisedLivenessData;
+  computeLiveInValues(DT, F, RevisedLivenessData);
+  for (size_t i = 0; i < records.size(); i++) {
+    struct PartiallyConstructedSafepointRecord &info = records[i];
+    const CallSite &CS = toUpdate[i];
+    recomputeLiveInValues(RevisedLivenessData, CS, info);
+  }
+}
+
+// When inserting gc.relocate calls, we need to ensure there are no uses
+// of the original value between the gc.statepoint and the gc.relocate call.
+// One case which can arise is a phi node starting one of the successor blocks.
+// We also need to be able to insert the gc.relocates only on the path which
+// goes through the statepoint.  We might need to split an edge to make this
+// possible.
+static BasicBlock *
+normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) {
+  DominatorTree *DT = nullptr;
+  if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+    DT = &DTP->getDomTree();
+
+  BasicBlock *Ret = BB;
+  if (!BB->getUniquePredecessor()) {
+    Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT);
+  }
+
+  // Now that 'ret' has unique predecessor we can safely remove all phi nodes
+  // from it
+  FoldSingleEntryPHINodes(Ret);
+  assert(!isa<PHINode>(Ret->begin()));
+
+  // At this point, we can safely insert a gc.relocate as the first instruction
+  // in Ret if needed.
+  return Ret;
+}
+
+static int find_index(ArrayRef<Value *> livevec, Value *val) {
+  auto itr = std::find(livevec.begin(), livevec.end(), val);
+  assert(livevec.end() != itr);
+  size_t index = std::distance(livevec.begin(), itr);
+  assert(index < livevec.size());
+  return index;
+}
+
+// Create new attribute set containing only attributes which can be transfered
+// from original call to the safepoint.
+static AttributeSet legalizeCallAttributes(AttributeSet AS) {
+  AttributeSet ret;
+
+  for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
+    unsigned index = AS.getSlotIndex(Slot);
+
+    if (index == AttributeSet::ReturnIndex ||
+        index == AttributeSet::FunctionIndex) {
+
+      for (auto it = AS.begin(Slot), it_end = AS.end(Slot); it != it_end;
+           ++it) {
+        Attribute attr = *it;
+
+        // Do not allow certain attributes - just skip them
+        // Safepoint can not be read only or read none.
+        if (attr.hasAttribute(Attribute::ReadNone) ||
+            attr.hasAttribute(Attribute::ReadOnly))
+          continue;
+
+        ret = ret.addAttributes(
+            AS.getContext(), index,
+            AttributeSet::get(AS.getContext(), index, AttrBuilder(attr)));
+      }
+    }
+
+    // Just skip parameter attributes for now
+  }
+
+  return ret;
+}
+
+/// Helper function to place all gc relocates necessary for the given
+/// statepoint.
+/// Inputs:
+///   liveVariables - list of variables to be relocated.
+///   liveStart - index of the first live variable.
+///   basePtrs - base pointers.
+///   statepointToken - statepoint instruction to which relocates should be
+///   bound.
+///   Builder - Llvm IR builder to be used to construct new calls.
+static void CreateGCRelocates(ArrayRef<llvm::Value *> LiveVariables,
+                              const int LiveStart,
+                              ArrayRef<llvm::Value *> BasePtrs,
+                              Instruction *StatepointToken,
+                              IRBuilder<> Builder) {
+  SmallVector<Instruction *, 64> NewDefs;
+  NewDefs.reserve(LiveVariables.size());
+
+  Module *M = StatepointToken->getParent()->getParent()->getParent();
+
+  for (unsigned i = 0; i < LiveVariables.size(); i++) {
+    // We generate a (potentially) unique declaration for every pointer type
+    // combination.  This results is some blow up the function declarations in
+    // the IR, but removes the need for argument bitcasts which shrinks the IR
+    // greatly and makes it much more readable.
+    SmallVector<Type *, 1> Types;                 // one per 'any' type
+    // All gc_relocate are set to i8 addrspace(1)* type. This could help avoid
+    // cases where the actual value's type mangling is not supported by llvm. A
+    // bitcast is added later to convert gc_relocate to the actual value's type.
+    Types.push_back(Type::getInt8PtrTy(M->getContext(), 1));
+    Value *GCRelocateDecl = Intrinsic::getDeclaration(
+        M, Intrinsic::experimental_gc_relocate, Types);
+
+    // Generate the gc.relocate call and save the result
+    Value *BaseIdx =
+        ConstantInt::get(Type::getInt32Ty(M->getContext()),
+                         LiveStart + find_index(LiveVariables, BasePtrs[i]));
+    Value *LiveIdx = ConstantInt::get(
+        Type::getInt32Ty(M->getContext()),
+        LiveStart + find_index(LiveVariables, LiveVariables[i]));
+
+    // only specify a debug name if we can give a useful one
+    Value *Reloc = Builder.CreateCall(
+        GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
+        LiveVariables[i]->hasName() ? LiveVariables[i]->getName() + ".relocated"
+                                    : "");
+    // Trick CodeGen into thinking there are lots of free registers at this
+    // fake call.
+    cast<CallInst>(Reloc)->setCallingConv(CallingConv::Cold);
+
+    NewDefs.push_back(cast<Instruction>(Reloc));
+  }
+  assert(NewDefs.size() == LiveVariables.size() &&
+         "missing or extra redefinition at safepoint");
+}
+
+static void
+makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
+                           const SmallVectorImpl<llvm::Value *> &basePtrs,
+                           const SmallVectorImpl<llvm::Value *> &liveVariables,
+                           Pass *P,
+                           PartiallyConstructedSafepointRecord &result) {
+  assert(basePtrs.size() == liveVariables.size());
+  assert(isStatepoint(CS) &&
+         "This method expects to be rewriting a statepoint");
+
+  BasicBlock *BB = CS.getInstruction()->getParent();
+  assert(BB);
+  Function *F = BB->getParent();
+  assert(F && "must be set");
+  Module *M = F->getParent();
+  (void)M;
+  assert(M && "must be set");
+
+  // We're not changing the function signature of the statepoint since the gc
+  // arguments go into the var args section.
+  Function *gc_statepoint_decl = CS.getCalledFunction();
+
+  // Then go ahead and use the builder do actually do the inserts.  We insert
+  // immediately before the previous instruction under the assumption that all
+  // arguments will be available here.  We can't insert afterwards since we may
+  // be replacing a terminator.
+  Instruction *insertBefore = CS.getInstruction();
+  IRBuilder<> Builder(insertBefore);
+  // Copy all of the arguments from the original statepoint - this includes the
+  // target, call args, and deopt args
+  SmallVector<llvm::Value *, 64> args;
+  args.insert(args.end(), CS.arg_begin(), CS.arg_end());
+  // TODO: Clear the 'needs rewrite' flag
+
+  // add all the pointers to be relocated (gc arguments)
+  // Capture the start of the live variable list for use in the gc_relocates
+  const int live_start = args.size();
+  args.insert(args.end(), liveVariables.begin(), liveVariables.end());
+
+  // Create the statepoint given all the arguments
+  Instruction *token = nullptr;
+  AttributeSet return_attributes;
+  if (CS.isCall()) {
+    CallInst *toReplace = cast<CallInst>(CS.getInstruction());
+    CallInst *call =
+        Builder.CreateCall(gc_statepoint_decl, args, "safepoint_token");
+    call->setTailCall(toReplace->isTailCall());
+    call->setCallingConv(toReplace->getCallingConv());
+
+    // Currently we will fail on parameter attributes and on certain
+    // function attributes.
+    AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
+    // In case if we can handle this set of sttributes - set up function attrs
+    // directly on statepoint and return attrs later for gc_result intrinsic.
+    call->setAttributes(new_attrs.getFnAttributes());
+    return_attributes = new_attrs.getRetAttributes();
+
+    token = call;
+
+    // Put the following gc_result and gc_relocate calls immediately after the
+    // the old call (which we're about to delete)
+    BasicBlock::iterator next(toReplace);
+    assert(BB->end() != next && "not a terminator, must have next");
+    next++;
+    Instruction *IP = &*(next);
+    Builder.SetInsertPoint(IP);
+    Builder.SetCurrentDebugLocation(IP->getDebugLoc());
+
+  } else {
+    InvokeInst *toReplace = cast<InvokeInst>(CS.getInstruction());
+
+    // Insert the new invoke into the old block.  We'll remove the old one in a
+    // moment at which point this will become the new terminator for the
+    // original block.
+    InvokeInst *invoke = InvokeInst::Create(
+        gc_statepoint_decl, toReplace->getNormalDest(),
+        toReplace->getUnwindDest(), args, "", toReplace->getParent());
+    invoke->setCallingConv(toReplace->getCallingConv());
+
+    // Currently we will fail on parameter attributes and on certain
+    // function attributes.
+    AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
+    // In case if we can handle this set of sttributes - set up function attrs
+    // directly on statepoint and return attrs later for gc_result intrinsic.
+    invoke->setAttributes(new_attrs.getFnAttributes());
+    return_attributes = new_attrs.getRetAttributes();
+
+    token = invoke;
+
+    // Generate gc relocates in exceptional path
+    BasicBlock *unwindBlock = toReplace->getUnwindDest();
+    assert(!isa<PHINode>(unwindBlock->begin()) &&
+           unwindBlock->getUniquePredecessor() &&
+           "can't safely insert in this block!");
+
+    Instruction *IP = &*(unwindBlock->getFirstInsertionPt());
+    Builder.SetInsertPoint(IP);
+    Builder.SetCurrentDebugLocation(toReplace->getDebugLoc());
+
+    // Extract second element from landingpad return value. We will attach
+    // exceptional gc relocates to it.
+    const unsigned idx = 1;
+    Instruction *exceptional_token =
+        cast<Instruction>(Builder.CreateExtractValue(
+            unwindBlock->getLandingPadInst(), idx, "relocate_token"));
+    result.UnwindToken = exceptional_token;
+
+    // Just throw away return value. We will use the one we got for normal
+    // block.
+    (void)CreateGCRelocates(liveVariables, live_start, basePtrs,
+                            exceptional_token, Builder);
+
+    // Generate gc relocates and returns for normal block
+    BasicBlock *normalDest = toReplace->getNormalDest();
+    assert(!isa<PHINode>(normalDest->begin()) &&
+           normalDest->getUniquePredecessor() &&
+           "can't safely insert in this block!");
+
+    IP = &*(normalDest->getFirstInsertionPt());
+    Builder.SetInsertPoint(IP);
+
+    // gc relocates will be generated later as if it were regular call
+    // statepoint
+  }
+  assert(token);
+
+  // Take the name of the original value call if it had one.
+  token->takeName(CS.getInstruction());
+
+// The GCResult is already inserted, we just need to find it
+#ifndef NDEBUG
+  Instruction *toReplace = CS.getInstruction();
+  assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) &&
+         "only valid use before rewrite is gc.result");
+  assert(!toReplace->hasOneUse() ||
+         isGCResult(cast<Instruction>(*toReplace->user_begin())));
+#endif
+
+  // Update the gc.result of the original statepoint (if any) to use the newly
+  // inserted statepoint.  This is safe to do here since the token can't be
+  // considered a live reference.
+  CS.getInstruction()->replaceAllUsesWith(token);
+
+  result.StatepointToken = token;
+
+  // Second, create a gc.relocate for every live variable
+  CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder);
+}
+
+namespace {
+struct name_ordering {
+  Value *base;
+  Value *derived;
+  bool operator()(name_ordering const &a, name_ordering const &b) {
+    return -1 == a.derived->getName().compare(b.derived->getName());
+  }
+};
+}
+static void stablize_order(SmallVectorImpl<Value *> &basevec,
+                           SmallVectorImpl<Value *> &livevec) {
+  assert(basevec.size() == livevec.size());
+
+  SmallVector<name_ordering, 64> temp;
+  for (size_t i = 0; i < basevec.size(); i++) {
+    name_ordering v;
+    v.base = basevec[i];
+    v.derived = livevec[i];
+    temp.push_back(v);
+  }
+  std::sort(temp.begin(), temp.end(), name_ordering());
+  for (size_t i = 0; i < basevec.size(); i++) {
+    basevec[i] = temp[i].base;
+    livevec[i] = temp[i].derived;
+  }
+}
+
+// Replace an existing gc.statepoint with a new one and a set of gc.relocates
+// which make the relocations happening at this safepoint explicit.
+//
+// WARNING: Does not do any fixup to adjust users of the original live
+// values.  That's the callers responsibility.
+static void
+makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, Pass *P,
+                       PartiallyConstructedSafepointRecord &result) {
+  auto liveset = result.liveset;
+  auto PointerToBase = result.PointerToBase;
+
+  // Convert to vector for efficient cross referencing.
+  SmallVector<Value *, 64> basevec, livevec;
+  livevec.reserve(liveset.size());
+  basevec.reserve(liveset.size());
+  for (Value *L : liveset) {
+    livevec.push_back(L);
+
+    assert(PointerToBase.find(L) != PointerToBase.end());
+    Value *base = PointerToBase[L];
+    basevec.push_back(base);
+  }
+  assert(livevec.size() == basevec.size());
+
+  // To make the output IR slightly more stable (for use in diffs), ensure a
+  // fixed order of the values in the safepoint (by sorting the value name).
+  // The order is otherwise meaningless.
+  stablize_order(basevec, livevec);
+
+  // Do the actual rewriting and delete the old statepoint
+  makeStatepointExplicitImpl(CS, basevec, livevec, P, result);
+  CS.getInstruction()->eraseFromParent();
+}
+
+// Helper function for the relocationViaAlloca.
+// It receives iterator to the statepoint gc relocates and emits store to the
+// assigned
+// location (via allocaMap) for the each one of them.
+// Add visited values into the visitedLiveValues set we will later use them
+// for sanity check.
+static void
+insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
+                       DenseMap<Value *, Value *> &AllocaMap,
+                       DenseSet<Value *> &VisitedLiveValues) {
+
+  for (User *U : GCRelocs) {
+    if (!isa<IntrinsicInst>(U))
+      continue;
+
+    IntrinsicInst *RelocatedValue = cast<IntrinsicInst>(U);
+
+    // We only care about relocates
+    if (RelocatedValue->getIntrinsicID() !=
+        Intrinsic::experimental_gc_relocate) {
+      continue;
+    }
+
+    GCRelocateOperands RelocateOperands(RelocatedValue);
+    Value *OriginalValue =
+        const_cast<Value *>(RelocateOperands.getDerivedPtr());
+    assert(AllocaMap.count(OriginalValue));
+    Value *Alloca = AllocaMap[OriginalValue];
+
+    // Emit store into the related alloca
+    // All gc_relocate are i8 addrspace(1)* typed, and it must be bitcasted to
+    // the correct type according to alloca.
+    assert(RelocatedValue->getNextNode() && "Should always have one since it's not a terminator");
+    IRBuilder<> Builder(RelocatedValue->getNextNode());
+    Value *CastedRelocatedValue =
+        Builder.CreateBitCast(RelocatedValue, cast<AllocaInst>(Alloca)->getAllocatedType(),
+        RelocatedValue->hasName() ? RelocatedValue->getName() + ".casted" : "");
+
+    StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca);
+    Store->insertAfter(cast<Instruction>(CastedRelocatedValue));
+
+#ifndef NDEBUG
+    VisitedLiveValues.insert(OriginalValue);
+#endif
+  }
+}
+
+// Helper function for the "relocationViaAlloca". Similar to the
+// "insertRelocationStores" but works for rematerialized values.
+static void
+insertRematerializationStores(
+  RematerializedValueMapTy RematerializedValues,
+  DenseMap<Value *, Value *> &AllocaMap,
+  DenseSet<Value *> &VisitedLiveValues) {
+
+  for (auto RematerializedValuePair: RematerializedValues) {
+    Instruction *RematerializedValue = RematerializedValuePair.first;
+    Value *OriginalValue = RematerializedValuePair.second;
+
+    assert(AllocaMap.count(OriginalValue) &&
+           "Can not find alloca for rematerialized value");
+    Value *Alloca = AllocaMap[OriginalValue];
+
+    StoreInst *Store = new StoreInst(RematerializedValue, Alloca);
+    Store->insertAfter(RematerializedValue);
+
+#ifndef NDEBUG
+    VisitedLiveValues.insert(OriginalValue);
+#endif
+  }
+}
+
+/// do all the relocation update via allocas and mem2reg
+static void relocationViaAlloca(
+    Function &F, DominatorTree &DT, ArrayRef<Value *> Live,
+    ArrayRef<struct PartiallyConstructedSafepointRecord> Records) {
+#ifndef NDEBUG
+  // record initial number of (static) allocas; we'll check we have the same
+  // number when we get done.
+  int InitialAllocaNum = 0;
+  for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
+       I++)
+    if (isa<AllocaInst>(*I))
+      InitialAllocaNum++;
+#endif
+
+  // TODO-PERF: change data structures, reserve
+  DenseMap<Value *, Value *> AllocaMap;
+  SmallVector<AllocaInst *, 200> PromotableAllocas;
+  // Used later to chack that we have enough allocas to store all values
+  std::size_t NumRematerializedValues = 0;
+  PromotableAllocas.reserve(Live.size());
+
+  // Emit alloca for "LiveValue" and record it in "allocaMap" and
+  // "PromotableAllocas"
+  auto emitAllocaFor = [&](Value *LiveValue) {
+    AllocaInst *Alloca = new AllocaInst(LiveValue->getType(), "",
+                                        F.getEntryBlock().getFirstNonPHI());
+    AllocaMap[LiveValue] = Alloca;
+    PromotableAllocas.push_back(Alloca);
+  };
+
+  // emit alloca for each live gc pointer
+  for (unsigned i = 0; i < Live.size(); i++) {
+    emitAllocaFor(Live[i]);
+  }
+
+  // emit allocas for rematerialized values
+  for (size_t i = 0; i < Records.size(); i++) {
+    const struct PartiallyConstructedSafepointRecord &Info = Records[i];
+
+    for (auto RematerializedValuePair : Info.RematerializedValues) {
+      Value *OriginalValue = RematerializedValuePair.second;
+      if (AllocaMap.count(OriginalValue) != 0)
+        continue;
+
+      emitAllocaFor(OriginalValue);
+      ++NumRematerializedValues;
+    }
+  }
+
+  // The next two loops are part of the same conceptual operation.  We need to
+  // insert a store to the alloca after the original def and at each
+  // redefinition.  We need to insert a load before each use.  These are split
+  // into distinct loops for performance reasons.
+
+  // update gc pointer after each statepoint
+  // either store a relocated value or null (if no relocated value found for
+  // this gc pointer and it is not a gc_result)
+  // this must happen before we update the statepoint with load of alloca
+  // otherwise we lose the link between statepoint and old def
+  for (size_t i = 0; i < Records.size(); i++) {
+    const struct PartiallyConstructedSafepointRecord &Info = Records[i];
+    Value *Statepoint = Info.StatepointToken;
+
+    // This will be used for consistency check
+    DenseSet<Value *> VisitedLiveValues;
+
+    // Insert stores for normal statepoint gc relocates
+    insertRelocationStores(Statepoint->users(), AllocaMap, VisitedLiveValues);
+
+    // In case if it was invoke statepoint
+    // we will insert stores for exceptional path gc relocates.
+    if (isa<InvokeInst>(Statepoint)) {
+      insertRelocationStores(Info.UnwindToken->users(), AllocaMap,
+                             VisitedLiveValues);
+    }
+
+    // Do similar thing with rematerialized values
+    insertRematerializationStores(Info.RematerializedValues, AllocaMap,
+                                  VisitedLiveValues);
+
+    if (ClobberNonLive) {
+      // As a debuging aid, pretend that an unrelocated pointer becomes null at
+      // the gc.statepoint.  This will turn some subtle GC problems into
+      // slightly easier to debug SEGVs.  Note that on large IR files with
+      // lots of gc.statepoints this is extremely costly both memory and time
+      // wise.
+      SmallVector<AllocaInst *, 64> ToClobber;
+      for (auto Pair : AllocaMap) {
+        Value *Def = Pair.first;
+        AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
+
+        // This value was relocated
+        if (VisitedLiveValues.count(Def)) {
+          continue;
+        }
+        ToClobber.push_back(Alloca);
+      }
+
+      auto InsertClobbersAt = [&](Instruction *IP) {
+        for (auto *AI : ToClobber) {
+          auto AIType = cast<PointerType>(AI->getType());
+          auto PT = cast<PointerType>(AIType->getElementType());
+          Constant *CPN = ConstantPointerNull::get(PT);
+          StoreInst *Store = new StoreInst(CPN, AI);
+          Store->insertBefore(IP);
+        }
+      };
+
+      // Insert the clobbering stores.  These may get intermixed with the
+      // gc.results and gc.relocates, but that's fine.
+      if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
+        InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
+        InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
+      } else {
+        BasicBlock::iterator Next(cast<CallInst>(Statepoint));
+        Next++;
+        InsertClobbersAt(Next);
+      }
+    }
+  }
+  // update use with load allocas and add store for gc_relocated
+  for (auto Pair : AllocaMap) {
+    Value *Def = Pair.first;
+    Value *Alloca = Pair.second;
+
+    // we pre-record the uses of allocas so that we dont have to worry about
+    // later update
+    // that change the user information.
+    SmallVector<Instruction *, 20> Uses;
+    // PERF: trade a linear scan for repeated reallocation
+    Uses.reserve(std::distance(Def->user_begin(), Def->user_end()));
+    for (User *U : Def->users()) {
+      if (!isa<ConstantExpr>(U)) {
+        // If the def has a ConstantExpr use, then the def is either a
+        // ConstantExpr use itself or null.  In either case
+        // (recursively in the first, directly in the second), the oop
+        // it is ultimately dependent on is null and this particular
+        // use does not need to be fixed up.
+        Uses.push_back(cast<Instruction>(U));
+      }
+    }
+
+    std::sort(Uses.begin(), Uses.end());
+    auto Last = std::unique(Uses.begin(), Uses.end());
+    Uses.erase(Last, Uses.end());
+
+    for (Instruction *Use : Uses) {
+      if (isa<PHINode>(Use)) {
+        PHINode *Phi = cast<PHINode>(Use);
+        for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
+          if (Def == Phi->getIncomingValue(i)) {
+            LoadInst *Load = new LoadInst(
+                Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
+            Phi->setIncomingValue(i, Load);
+          }
+        }
+      } else {
+        LoadInst *Load = new LoadInst(Alloca, "", Use);
+        Use->replaceUsesOfWith(Def, Load);
+      }
+    }
+
+    // emit store for the initial gc value
+    // store must be inserted after load, otherwise store will be in alloca's
+    // use list and an extra load will be inserted before it
+    StoreInst *Store = new StoreInst(Def, Alloca);
+    if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
+      if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
+        // InvokeInst is a TerminatorInst so the store need to be inserted
+        // into its normal destination block.
+        BasicBlock *NormalDest = Invoke->getNormalDest();
+        Store->insertBefore(NormalDest->getFirstNonPHI());
+      } else {
+        assert(!Inst->isTerminator() &&
+               "The only TerminatorInst that can produce a value is "
+               "InvokeInst which is handled above.");
+        Store->insertAfter(Inst);
+      }
+    } else {
+      assert(isa<Argument>(Def));
+      Store->insertAfter(cast<Instruction>(Alloca));
+    }
+  }
+
+  assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
+         "we must have the same allocas with lives");
+  if (!PromotableAllocas.empty()) {
+    // apply mem2reg to promote alloca to SSA
+    PromoteMemToReg(PromotableAllocas, DT);
+  }
+
+#ifndef NDEBUG
+  for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
+       I++)
+    if (isa<AllocaInst>(*I))
+      InitialAllocaNum--;
+  assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
+#endif
+}
+
+/// Implement a unique function which doesn't require we sort the input
+/// vector.  Doing so has the effect of changing the output of a couple of
+/// tests in ways which make them less useful in testing fused safepoints.
+template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
+  DenseSet<T> Seen;
+  SmallVector<T, 128> TempVec;
+  TempVec.reserve(Vec.size());
+  for (auto Element : Vec)
+    TempVec.push_back(Element);
+  Vec.clear();
+  for (auto V : TempVec) {
+    if (Seen.insert(V).second) {
+      Vec.push_back(V);
+    }
+  }
+}
+
+/// Insert holders so that each Value is obviously live through the entire
+/// lifetime of the call.
+static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
+                                 SmallVectorImpl<CallInst *> &Holders) {
+  if (Values.empty())
+    // No values to hold live, might as well not insert the empty holder
+    return;
+
+  Module *M = CS.getInstruction()->getParent()->getParent()->getParent();
+  // Use a dummy vararg function to actually hold the values live
+  Function *Func = cast<Function>(M->getOrInsertFunction(
+      "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
+  if (CS.isCall()) {
+    // For call safepoints insert dummy calls right after safepoint
+    BasicBlock::iterator Next(CS.getInstruction());
+    Next++;
+    Holders.push_back(CallInst::Create(Func, Values, "", Next));
+    return;
+  }
+  // For invoke safepooints insert dummy calls both in normal and
+  // exceptional destination blocks
+  auto *II = cast<InvokeInst>(CS.getInstruction());
+  Holders.push_back(CallInst::Create(
+      Func, Values, "", II->getNormalDest()->getFirstInsertionPt()));
+  Holders.push_back(CallInst::Create(
+      Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));
+}
+
+static void findLiveReferences(
+    Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+    MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+  GCPtrLivenessData OriginalLivenessData;
+  computeLiveInValues(DT, F, OriginalLivenessData);
+  for (size_t i = 0; i < records.size(); i++) {
+    struct PartiallyConstructedSafepointRecord &info = records[i];
+    const CallSite &CS = toUpdate[i];
+    analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info);
+  }
+}
+
+/// Remove any vector of pointers from the liveset by scalarizing them over the
+/// statepoint instruction.  Adds the scalarized pieces to the liveset.  It
+/// would be preferrable to include the vector in the statepoint itself, but
+/// the lowering code currently does not handle that.  Extending it would be
+/// slightly non-trivial since it requires a format change.  Given how rare
+/// such cases are (for the moment?) scalarizing is an acceptable comprimise.
+static void splitVectorValues(Instruction *StatepointInst,
+                              StatepointLiveSetTy &LiveSet, DominatorTree &DT) {
+  SmallVector<Value *, 16> ToSplit;
+  for (Value *V : LiveSet)
+    if (isa<VectorType>(V->getType()))
+      ToSplit.push_back(V);
+
+  if (ToSplit.empty())
+    return;
+
+  Function &F = *(StatepointInst->getParent()->getParent());
+
+  DenseMap<Value *, AllocaInst *> AllocaMap;
+  // First is normal return, second is exceptional return (invoke only)
+  DenseMap<Value *, std::pair<Value *, Value *>> Replacements;
+  for (Value *V : ToSplit) {
+    LiveSet.erase(V);
+
+    AllocaInst *Alloca =
+        new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
+    AllocaMap[V] = Alloca;
+
+    VectorType *VT = cast<VectorType>(V->getType());
+    IRBuilder<> Builder(StatepointInst);
+    SmallVector<Value *, 16> Elements;
+    for (unsigned i = 0; i < VT->getNumElements(); i++)
+      Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
+    LiveSet.insert(Elements.begin(), Elements.end());
+
+    auto InsertVectorReform = [&](Instruction *IP) {
+      Builder.SetInsertPoint(IP);
+      Builder.SetCurrentDebugLocation(IP->getDebugLoc());
+      Value *ResultVec = UndefValue::get(VT);
+      for (unsigned i = 0; i < VT->getNumElements(); i++)
+        ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i],
+                                                Builder.getInt32(i));
+      return ResultVec;
+    };
+
+    if (isa<CallInst>(StatepointInst)) {
+      BasicBlock::iterator Next(StatepointInst);
+      Next++;
+      Instruction *IP = &*(Next);
+      Replacements[V].first = InsertVectorReform(IP);
+      Replacements[V].second = nullptr;
+    } else {
+      InvokeInst *Invoke = cast<InvokeInst>(StatepointInst);
+      // We've already normalized - check that we don't have shared destination
+      // blocks
+      BasicBlock *NormalDest = Invoke->getNormalDest();
+      assert(!isa<PHINode>(NormalDest->begin()));
+      BasicBlock *UnwindDest = Invoke->getUnwindDest();
+      assert(!isa<PHINode>(UnwindDest->begin()));
+      // Insert insert element sequences in both successors
+      Instruction *IP = &*(NormalDest->getFirstInsertionPt());
+      Replacements[V].first = InsertVectorReform(IP);
+      IP = &*(UnwindDest->getFirstInsertionPt());
+      Replacements[V].second = InsertVectorReform(IP);
+    }
+  }
+  for (Value *V : ToSplit) {
+    AllocaInst *Alloca = AllocaMap[V];
+
+    // Capture all users before we start mutating use lists
+    SmallVector<Instruction *, 16> Users;
+    for (User *U : V->users())
+      Users.push_back(cast<Instruction>(U));
+
+    for (Instruction *I : Users) {
+      if (auto Phi = dyn_cast<PHINode>(I)) {
+        for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++)
+          if (V == Phi->getIncomingValue(i)) {
+            LoadInst *Load = new LoadInst(
+                Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
+            Phi->setIncomingValue(i, Load);
+          }
+      } else {
+        LoadInst *Load = new LoadInst(Alloca, "", I);
+        I->replaceUsesOfWith(V, Load);
+      }
+    }
+
+    // Store the original value and the replacement value into the alloca
+    StoreInst *Store = new StoreInst(V, Alloca);
+    if (auto I = dyn_cast<Instruction>(V))
+      Store->insertAfter(I);
+    else
+      Store->insertAfter(Alloca);
+
+    // Normal return for invoke, or call return
+    Instruction *Replacement = cast<Instruction>(Replacements[V].first);
+    (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
+    // Unwind return for invoke only
+    Replacement = cast_or_null<Instruction>(Replacements[V].second);
+    if (Replacement)
+      (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
+  }
+
+  // apply mem2reg to promote alloca to SSA
+  SmallVector<AllocaInst *, 16> Allocas;
+  for (Value *V : ToSplit)
+    Allocas.push_back(AllocaMap[V]);
+  PromoteMemToReg(Allocas, DT);
+}
+
+// Helper function for the "rematerializeLiveValues". It walks use chain
+// starting from the "CurrentValue" until it meets "BaseValue". Only "simple"
+// values are visited (currently it is GEP's and casts). Returns true if it
+// sucessfully reached "BaseValue" and false otherwise.
+// Fills "ChainToBase" array with all visited values. "BaseValue" is not
+// recorded.
+static bool findRematerializableChainToBasePointer(
+  SmallVectorImpl<Instruction*> &ChainToBase,
+  Value *CurrentValue, Value *BaseValue) {
+
+  // We have found a base value
+  if (CurrentValue == BaseValue) {
+    return true;
+  }
+
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
+    ChainToBase.push_back(GEP);
+    return findRematerializableChainToBasePointer(ChainToBase,
+                                                  GEP->getPointerOperand(),
+                                                  BaseValue);
+  }
+
+  if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
+    Value *Def = CI->stripPointerCasts();
+
+    // This two checks are basically similar. First one is here for the
+    // consistency with findBasePointers logic.
+    assert(!isa<CastInst>(Def) && "not a pointer cast found");
+    if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
+      return false;
+
+    ChainToBase.push_back(CI);
+    return findRematerializableChainToBasePointer(ChainToBase, Def, BaseValue);
+  }
+
+  // Not supported instruction in the chain
+  return false;
+}
+
+// Helper function for the "rematerializeLiveValues". Compute cost of the use
+// chain we are going to rematerialize.
+static unsigned
+chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
+                       TargetTransformInfo &TTI) {
+  unsigned Cost = 0;
+
+  for (Instruction *Instr : Chain) {
+    if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
+      assert(CI->isNoopCast(CI->getModule()->getDataLayout()) &&
+             "non noop cast is found during rematerialization");
+
+      Type *SrcTy = CI->getOperand(0)->getType();
+      Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy);
+
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
+      // Cost of the address calculation
+      Type *ValTy = GEP->getPointerOperandType()->getPointerElementType();
+      Cost += TTI.getAddressComputationCost(ValTy);
+
+      // And cost of the GEP itself
+      // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
+      //       allowed for the external usage)
+      if (!GEP->hasAllConstantIndices())
+        Cost += 2;
+
+    } else {
+      llvm_unreachable("unsupported instruciton type during rematerialization");
+    }
+  }
+
+  return Cost;
+}
+
+// From the statepoint liveset pick values that are cheaper to recompute then to
+// relocate. Remove this values from the liveset, rematerialize them after
+// statepoint and record them in "Info" structure. Note that similar to
+// relocated values we don't do any user adjustments here.
+static void rematerializeLiveValues(CallSite CS,
+                                    PartiallyConstructedSafepointRecord &Info,
+                                    TargetTransformInfo &TTI) {
+  const unsigned int ChainLengthThreshold = 10;
+
+  // Record values we are going to delete from this statepoint live set.
+  // We can not di this in following loop due to iterator invalidation.
+  SmallVector<Value *, 32> LiveValuesToBeDeleted;
+
+  for (Value *LiveValue: Info.liveset) {
+    // For each live pointer find it's defining chain
+    SmallVector<Instruction *, 3> ChainToBase;
+    assert(Info.PointerToBase.find(LiveValue) != Info.PointerToBase.end());
+    bool FoundChain =
+      findRematerializableChainToBasePointer(ChainToBase,
+                                             LiveValue,
+                                             Info.PointerToBase[LiveValue]);
+    // Nothing to do, or chain is too long
+    if (!FoundChain ||
+        ChainToBase.size() == 0 ||
+        ChainToBase.size() > ChainLengthThreshold)
+      continue;
+
+    // Compute cost of this chain
+    unsigned Cost = chainToBasePointerCost(ChainToBase, TTI);
+    // TODO: We can also account for cases when we will be able to remove some
+    //       of the rematerialized values by later optimization passes. I.e if
+    //       we rematerialized several intersecting chains. Or if original values
+    //       don't have any uses besides this statepoint.
+
+    // For invokes we need to rematerialize each chain twice - for normal and
+    // for unwind basic blocks. Model this by multiplying cost by two.
+    if (CS.isInvoke()) {
+      Cost *= 2;
+    }
+    // If it's too expensive - skip it
+    if (Cost >= RematerializationThreshold)
+      continue;
+
+    // Remove value from the live set
+    LiveValuesToBeDeleted.push_back(LiveValue);
+
+    // Clone instructions and record them inside "Info" structure
+
+    // Walk backwards to visit top-most instructions first
+    std::reverse(ChainToBase.begin(), ChainToBase.end());
+
+    // Utility function which clones all instructions from "ChainToBase"
+    // and inserts them before "InsertBefore". Returns rematerialized value
+    // which should be used after statepoint.
+    auto rematerializeChain = [&ChainToBase](Instruction *InsertBefore) {
+      Instruction *LastClonedValue = nullptr;
+      Instruction *LastValue = nullptr;
+      for (Instruction *Instr: ChainToBase) {
+        // Only GEP's and casts are suported as we need to be careful to not
+        // introduce any new uses of pointers not in the liveset.
+        // Note that it's fine to introduce new uses of pointers which were
+        // otherwise not used after this statepoint.
+        assert(isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr));
+
+        Instruction *ClonedValue = Instr->clone();
+        ClonedValue->insertBefore(InsertBefore);
+        ClonedValue->setName(Instr->getName() + ".remat");
+
+        // If it is not first instruction in the chain then it uses previously
+        // cloned value. We should update it to use cloned value.
+        if (LastClonedValue) {
+          assert(LastValue);
+          ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue);
+#ifndef NDEBUG
+          // Assert that cloned instruction does not use any instructions from
+          // this chain other than LastClonedValue
+          for (auto OpValue : ClonedValue->operand_values()) {
+            assert(std::find(ChainToBase.begin(), ChainToBase.end(), OpValue) ==
+                       ChainToBase.end() &&
+                   "incorrect use in rematerialization chain");
+          }
+#endif
+        }
+
+        LastClonedValue = ClonedValue;
+        LastValue = Instr;
+      }
+      assert(LastClonedValue);
+      return LastClonedValue;
+    };
+
+    // Different cases for calls and invokes. For invokes we need to clone
+    // instructions both on normal and unwind path.
+    if (CS.isCall()) {
+      Instruction *InsertBefore = CS.getInstruction()->getNextNode();
+      assert(InsertBefore);
+      Instruction *RematerializedValue = rematerializeChain(InsertBefore);
+      Info.RematerializedValues[RematerializedValue] = LiveValue;
+    } else {
+      InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
+
+      Instruction *NormalInsertBefore =
+          Invoke->getNormalDest()->getFirstInsertionPt();
+      Instruction *UnwindInsertBefore =
+          Invoke->getUnwindDest()->getFirstInsertionPt();
+
+      Instruction *NormalRematerializedValue =
+          rematerializeChain(NormalInsertBefore);
+      Instruction *UnwindRematerializedValue =
+          rematerializeChain(UnwindInsertBefore);
+
+      Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
+      Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
+    }
+  }
+
+  // Remove rematerializaed values from the live set
+  for (auto LiveValue: LiveValuesToBeDeleted) {
+    Info.liveset.erase(LiveValue);
+  }
+}
+
+static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
+                              SmallVectorImpl<CallSite> &toUpdate) {
+#ifndef NDEBUG
+  // sanity check the input
+  std::set<CallSite> uniqued;
+  uniqued.insert(toUpdate.begin(), toUpdate.end());
+  assert(uniqued.size() == toUpdate.size() && "no duplicates please!");
+
+  for (size_t i = 0; i < toUpdate.size(); i++) {
+    CallSite &CS = toUpdate[i];
+    assert(CS.getInstruction()->getParent()->getParent() == &F);
+    assert(isStatepoint(CS) && "expected to already be a deopt statepoint");
+  }
+#endif
+
+  // When inserting gc.relocates for invokes, we need to be able to insert at
+  // the top of the successor blocks.  See the comment on
+  // normalForInvokeSafepoint on exactly what is needed.  Note that this step
+  // may restructure the CFG.
+  for (CallSite CS : toUpdate) {
+    if (!CS.isInvoke())
+      continue;
+    InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
+    normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(),
+                                P);
+    normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(),
+                                P);
+  }
+
+  // A list of dummy calls added to the IR to keep various values obviously
+  // live in the IR.  We'll remove all of these when done.
+  SmallVector<CallInst *, 64> holders;
+
+  // Insert a dummy call with all of the arguments to the vm_state we'll need
+  // for the actual safepoint insertion.  This ensures reference arguments in
+  // the deopt argument list are considered live through the safepoint (and
+  // thus makes sure they get relocated.)
+  for (size_t i = 0; i < toUpdate.size(); i++) {
+    CallSite &CS = toUpdate[i];
+    Statepoint StatepointCS(CS);
+
+    SmallVector<Value *, 64> DeoptValues;
+    for (Use &U : StatepointCS.vm_state_args()) {
+      Value *Arg = cast<Value>(&U);
+      assert(!isUnhandledGCPointerType(Arg->getType()) &&
+             "support for FCA unimplemented");
+      if (isHandledGCPointerType(Arg->getType()))
+        DeoptValues.push_back(Arg);
+    }
+    insertUseHolderAfter(CS, DeoptValues, holders);
+  }
+
+  SmallVector<struct PartiallyConstructedSafepointRecord, 64> records;
+  records.reserve(toUpdate.size());
+  for (size_t i = 0; i < toUpdate.size(); i++) {
+    struct PartiallyConstructedSafepointRecord info;
+    records.push_back(info);
+  }
+  assert(records.size() == toUpdate.size());
+
+  // A) Identify all gc pointers which are staticly live at the given call
+  // site.
+  findLiveReferences(F, DT, P, toUpdate, records);
+
+  // Do a limited scalarization of any live at safepoint vector values which
+  // contain pointers.  This enables this pass to run after vectorization at
+  // the cost of some possible performance loss.  TODO: it would be nice to
+  // natively support vectors all the way through the backend so we don't need
+  // to scalarize here.
+  for (size_t i = 0; i < records.size(); i++) {
+    struct PartiallyConstructedSafepointRecord &info = records[i];
+    Instruction *statepoint = toUpdate[i].getInstruction();
+    splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT);
+  }
+
+  // B) Find the base pointers for each live pointer
+  /* scope for caching */ {
+    // Cache the 'defining value' relation used in the computation and
+    // insertion of base phis and selects.  This ensures that we don't insert
+    // large numbers of duplicate base_phis.
+    DefiningValueMapTy DVCache;
+
+    for (size_t i = 0; i < records.size(); i++) {
+      struct PartiallyConstructedSafepointRecord &info = records[i];
+      CallSite &CS = toUpdate[i];
+      findBasePointers(DT, DVCache, CS, info);
+    }
+  } // end of cache scope
+
+  // The base phi insertion logic (for any safepoint) may have inserted new
+  // instructions which are now live at some safepoint.  The simplest such
+  // example is:
+  // loop:
+  //   phi a  <-- will be a new base_phi here
+  //   safepoint 1 <-- that needs to be live here
+  //   gep a + 1
+  //   safepoint 2
+  //   br loop
+  // We insert some dummy calls after each safepoint to definitely hold live
+  // the base pointers which were identified for that safepoint.  We'll then
+  // ask liveness for _every_ base inserted to see what is now live.  Then we
+  // remove the dummy calls.
+  holders.reserve(holders.size() + records.size());
+  for (size_t i = 0; i < records.size(); i++) {
+    struct PartiallyConstructedSafepointRecord &info = records[i];
+    CallSite &CS = toUpdate[i];
+
+    SmallVector<Value *, 128> Bases;
+    for (auto Pair : info.PointerToBase) {
+      Bases.push_back(Pair.second);
+    }
+    insertUseHolderAfter(CS, Bases, holders);
+  }
+
+  // By selecting base pointers, we've effectively inserted new uses. Thus, we
+  // need to rerun liveness.  We may *also* have inserted new defs, but that's
+  // not the key issue.
+  recomputeLiveInValues(F, DT, P, toUpdate, records);
+
+  if (PrintBasePointers) {
+    for (size_t i = 0; i < records.size(); i++) {
+      struct PartiallyConstructedSafepointRecord &info = records[i];
+      errs() << "Base Pairs: (w/Relocation)\n";
+      for (auto Pair : info.PointerToBase) {
+        errs() << " derived %" << Pair.first->getName() << " base %"
+               << Pair.second->getName() << "\n";
+      }
+    }
+  }
+  for (size_t i = 0; i < holders.size(); i++) {
+    holders[i]->eraseFromParent();
+    holders[i] = nullptr;
+  }
+  holders.clear();
+
+  // In order to reduce live set of statepoint we might choose to rematerialize
+  // some values instead of relocating them. This is purelly an optimization and
+  // does not influence correctness.
+  TargetTransformInfo &TTI =
+    P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+  for (size_t i = 0; i < records.size(); i++) {
+    struct PartiallyConstructedSafepointRecord &info = records[i];
+    CallSite &CS = toUpdate[i];
+
+    rematerializeLiveValues(CS, info, TTI);
+  }
+
+  // Now run through and replace the existing statepoints with new ones with
+  // the live variables listed.  We do not yet update uses of the values being
+  // relocated. We have references to live variables that need to
+  // survive to the last iteration of this loop.  (By construction, the
+  // previous statepoint can not be a live variable, thus we can and remove
+  // the old statepoint calls as we go.)
+  for (size_t i = 0; i < records.size(); i++) {
+    struct PartiallyConstructedSafepointRecord &info = records[i];
+    CallSite &CS = toUpdate[i];
+    makeStatepointExplicit(DT, CS, P, info);
+  }
+  toUpdate.clear(); // prevent accident use of invalid CallSites
+
+  // Do all the fixups of the original live variables to their relocated selves
+  SmallVector<Value *, 128> live;
+  for (size_t i = 0; i < records.size(); i++) {
+    struct PartiallyConstructedSafepointRecord &info = records[i];
+    // We can't simply save the live set from the original insertion.  One of
+    // the live values might be the result of a call which needs a safepoint.
+    // That Value* no longer exists and we need to use the new gc_result.
+    // Thankfully, the liveset is embedded in the statepoint (and updated), so
+    // we just grab that.
+    Statepoint statepoint(info.StatepointToken);
+    live.insert(live.end(), statepoint.gc_args_begin(),
+                statepoint.gc_args_end());
+#ifndef NDEBUG
+    // Do some basic sanity checks on our liveness results before performing
+    // relocation.  Relocation can and will turn mistakes in liveness results
+    // into non-sensical code which is must harder to debug.
+    // TODO: It would be nice to test consistency as well
+    assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) &&
+           "statepoint must be reachable or liveness is meaningless");
+    for (Value *V : statepoint.gc_args()) {
+      if (!isa<Instruction>(V))
+        // Non-instruction values trivial dominate all possible uses
+        continue;
+      auto LiveInst = cast<Instruction>(V);
+      assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
+             "unreachable values should never be live");
+      assert(DT.dominates(LiveInst, info.StatepointToken) &&
+             "basic SSA liveness expectation violated by liveness analysis");
+    }
+#endif
+  }
+  unique_unsorted(live);
+
+#ifndef NDEBUG
+  // sanity check
+  for (auto ptr : live) {
+    assert(isGCPointerType(ptr->getType()) && "must be a gc pointer type");
+  }
+#endif
+
+  relocationViaAlloca(F, DT, live, records);
+  return !records.empty();
+}
+
+/// Returns true if this function should be rewritten by this pass.  The main
+/// point of this function is as an extension point for custom logic.
+static bool shouldRewriteStatepointsIn(Function &F) {
+  // TODO: This should check the GCStrategy
+  if (F.hasGC()) {
+    const char *FunctionGCName = F.getGC();
+    const StringRef StatepointExampleName("statepoint-example");
+    const StringRef CoreCLRName("coreclr");
+    return (StatepointExampleName == FunctionGCName) ||
+           (CoreCLRName == FunctionGCName);
+  } else
+    return false;
+}
+
+bool RewriteStatepointsForGC::runOnFunction(Function &F) {
+  // Nothing to do for declarations.
+  if (F.isDeclaration() || F.empty())
+    return false;
+
+  // Policy choice says not to rewrite - the most common reason is that we're
+  // compiling code without a GCStrategy.
+  if (!shouldRewriteStatepointsIn(F))
+    return false;
+
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+  // Gather all the statepoints which need rewritten.  Be careful to only
+  // consider those in reachable code since we need to ask dominance queries
+  // when rewriting.  We'll delete the unreachable ones in a moment.
+  SmallVector<CallSite, 64> ParsePointNeeded;
+  bool HasUnreachableStatepoint = false;
+  for (Instruction &I : inst_range(F)) {
+    // TODO: only the ones with the flag set!
+    if (isStatepoint(I)) {
+      if (DT.isReachableFromEntry(I.getParent()))
+        ParsePointNeeded.push_back(CallSite(&I));
+      else
+        HasUnreachableStatepoint = true;
+    }
+  }
+
+  bool MadeChange = false;
+
+  // Delete any unreachable statepoints so that we don't have unrewritten
+  // statepoints surviving this pass.  This makes testing easier and the
+  // resulting IR less confusing to human readers.  Rather than be fancy, we
+  // just reuse a utility function which removes the unreachable blocks.
+  if (HasUnreachableStatepoint)
+    MadeChange |= removeUnreachableBlocks(F);
+
+  // Return early if no work to do.
+  if (ParsePointNeeded.empty())
+    return MadeChange;
+
+  // As a prepass, go ahead and aggressively destroy single entry phi nodes.
+  // These are created by LCSSA.  They have the effect of increasing the size
+  // of liveness sets for no good reason.  It may be harder to do this post
+  // insertion since relocations and base phis can confuse things.
+  for (BasicBlock &BB : F)
+    if (BB.getUniquePredecessor()) {
+      MadeChange = true;
+      FoldSingleEntryPHINodes(&BB);
+    }
+
+  MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded);
+  return MadeChange;
+}
+
+// liveness computation via standard dataflow
+// -------------------------------------------------------------------
+
+// TODO: Consider using bitvectors for liveness, the set of potentially
+// interesting values should be small and easy to pre-compute.
+
+/// Compute the live-in set for the location rbegin starting from
+/// the live-out set of the basic block
+static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
+                                BasicBlock::reverse_iterator rend,
+                                DenseSet<Value *> &LiveTmp) {
+
+  for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) {
+    Instruction *I = &*ritr;
+
+    // KILL/Def - Remove this definition from LiveIn
+    LiveTmp.erase(I);
+
+    // Don't consider *uses* in PHI nodes, we handle their contribution to
+    // predecessor blocks when we seed the LiveOut sets
+    if (isa<PHINode>(I))
+      continue;
+
+    // USE - Add to the LiveIn set for this instruction
+    for (Value *V : I->operands()) {
+      assert(!isUnhandledGCPointerType(V->getType()) &&
+             "support for FCA unimplemented");
+      if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
+        // The choice to exclude all things constant here is slightly subtle.
+        // There are two idependent reasons:
+        // - We assume that things which are constant (from LLVM's definition)
+        // do not move at runtime.  For example, the address of a global
+        // variable is fixed, even though it's contents may not be.
+        // - Second, we can't disallow arbitrary inttoptr constants even
+        // if the language frontend does.  Optimization passes are free to
+        // locally exploit facts without respect to global reachability.  This
+        // can create sections of code which are dynamically unreachable and
+        // contain just about anything.  (see constants.ll in tests)
+        LiveTmp.insert(V);
+      }
+    }
+  }
+}
+
+static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) {
+
+  for (BasicBlock *Succ : successors(BB)) {
+    const BasicBlock::iterator E(Succ->getFirstNonPHI());
+    for (BasicBlock::iterator I = Succ->begin(); I != E; I++) {
+      PHINode *Phi = cast<PHINode>(&*I);
+      Value *V = Phi->getIncomingValueForBlock(BB);
+      assert(!isUnhandledGCPointerType(V->getType()) &&
+             "support for FCA unimplemented");
+      if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
+        LiveTmp.insert(V);
+      }
+    }
+  }
+}
+
+static DenseSet<Value *> computeKillSet(BasicBlock *BB) {
+  DenseSet<Value *> KillSet;
+  for (Instruction &I : *BB)
+    if (isHandledGCPointerType(I.getType()))
+      KillSet.insert(&I);
+  return KillSet;
+}
+
+#ifndef NDEBUG
+/// Check that the items in 'Live' dominate 'TI'.  This is used as a basic
+/// sanity check for the liveness computation.
+static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live,
+                          TerminatorInst *TI, bool TermOkay = false) {
+  for (Value *V : Live) {
+    if (auto *I = dyn_cast<Instruction>(V)) {
+      // The terminator can be a member of the LiveOut set.  LLVM's definition
+      // of instruction dominance states that V does not dominate itself.  As
+      // such, we need to special case this to allow it.
+      if (TermOkay && TI == I)
+        continue;
+      assert(DT.dominates(I, TI) &&
+             "basic SSA liveness expectation violated by liveness analysis");
+    }
+  }
+}
+
+/// Check that all the liveness sets used during the computation of liveness
+/// obey basic SSA properties.  This is useful for finding cases where we miss
+/// a def.
+static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
+                          BasicBlock &BB) {
+  checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
+  checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
+  checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
+}
+#endif
+
+static void computeLiveInValues(DominatorTree &DT, Function &F,
+                                GCPtrLivenessData &Data) {
+
+  SmallSetVector<BasicBlock *, 200> Worklist;
+  auto AddPredsToWorklist = [&](BasicBlock *BB) {
+    // We use a SetVector so that we don't have duplicates in the worklist.
+    Worklist.insert(pred_begin(BB), pred_end(BB));
+  };
+  auto NextItem = [&]() {
+    BasicBlock *BB = Worklist.back();
+    Worklist.pop_back();
+    return BB;
+  };
+
+  // Seed the liveness for each individual block
+  for (BasicBlock &BB : F) {
+    Data.KillSet[&BB] = computeKillSet(&BB);
+    Data.LiveSet[&BB].clear();
+    computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]);
+
+#ifndef NDEBUG
+    for (Value *Kill : Data.KillSet[&BB])
+      assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
+#endif
+
+    Data.LiveOut[&BB] = DenseSet<Value *>();
+    computeLiveOutSeed(&BB, Data.LiveOut[&BB]);
+    Data.LiveIn[&BB] = Data.LiveSet[&BB];
+    set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]);
+    set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]);
+    if (!Data.LiveIn[&BB].empty())
+      AddPredsToWorklist(&BB);
+  }
+
+  // Propagate that liveness until stable
+  while (!Worklist.empty()) {
+    BasicBlock *BB = NextItem();
+
+    // Compute our new liveout set, then exit early if it hasn't changed
+    // despite the contribution of our successor.
+    DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+    const auto OldLiveOutSize = LiveOut.size();
+    for (BasicBlock *Succ : successors(BB)) {
+      assert(Data.LiveIn.count(Succ));
+      set_union(LiveOut, Data.LiveIn[Succ]);
+    }
+    // assert OutLiveOut is a subset of LiveOut
+    if (OldLiveOutSize == LiveOut.size()) {
+      // If the sets are the same size, then we didn't actually add anything
+      // when unioning our successors LiveIn  Thus, the LiveIn of this block
+      // hasn't changed.
+      continue;
+    }
+    Data.LiveOut[BB] = LiveOut;
+
+    // Apply the effects of this basic block
+    DenseSet<Value *> LiveTmp = LiveOut;
+    set_union(LiveTmp, Data.LiveSet[BB]);
+    set_subtract(LiveTmp, Data.KillSet[BB]);
+
+    assert(Data.LiveIn.count(BB));
+    const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB];
+    // assert: OldLiveIn is a subset of LiveTmp
+    if (OldLiveIn.size() != LiveTmp.size()) {
+      Data.LiveIn[BB] = LiveTmp;
+      AddPredsToWorklist(BB);
+    }
+  } // while( !worklist.empty() )
+
+#ifndef NDEBUG
+  // Sanity check our ouput against SSA properties.  This helps catch any
+  // missing kills during the above iteration.
+  for (BasicBlock &BB : F) {
+    checkBasicSSA(DT, Data, BB);
+  }
+#endif
+}
+
+static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
+                              StatepointLiveSetTy &Out) {
+
+  BasicBlock *BB = Inst->getParent();
+
+  // Note: The copy is intentional and required
+  assert(Data.LiveOut.count(BB));
+  DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+
+  // We want to handle the statepoint itself oddly.  It's
+  // call result is not live (normal), nor are it's arguments
+  // (unless they're used again later).  This adjustment is
+  // specifically what we need to relocate
+  BasicBlock::reverse_iterator rend(Inst);
+  computeLiveInValues(BB->rbegin(), rend, LiveOut);
+  LiveOut.erase(Inst);
+  Out.insert(LiveOut.begin(), LiveOut.end());
+}
+
+static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
+                                  const CallSite &CS,
+                                  PartiallyConstructedSafepointRecord &Info) {
+  Instruction *Inst = CS.getInstruction();
+  StatepointLiveSetTy Updated;
+  findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
+
+#ifndef NDEBUG
+  DenseSet<Value *> Bases;
+  for (auto KVPair : Info.PointerToBase) {
+    Bases.insert(KVPair.second);
+  }
+#endif
+  // We may have base pointers which are now live that weren't before.  We need
+  // to update the PointerToBase structure to reflect this.
+  for (auto V : Updated)
+    if (!Info.PointerToBase.count(V)) {
+      assert(Bases.count(V) && "can't find base for unexpected live value");
+      Info.PointerToBase[V] = V;
+      continue;
+    }
+
+#ifndef NDEBUG
+  for (auto V : Updated) {
+    assert(Info.PointerToBase.count(V) &&
+           "must be able to find base for live value");
+  }
+#endif
+
+  // Remove any stale base mappings - this can happen since our liveness is
+  // more precise then the one inherent in the base pointer analysis
+  DenseSet<Value *> ToErase;
+  for (auto KVPair : Info.PointerToBase)
+    if (!Updated.count(KVPair.first))
+      ToErase.insert(KVPair.first);
+  for (auto V : ToErase)
+    Info.PointerToBase.erase(V);
+
+#ifndef NDEBUG
+  for (auto KVPair : Info.PointerToBase)
+    assert(Updated.count(KVPair.first) && "record for non-live value");
+#endif
+
+  Info.liveset = Updated;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index cfc9a8e..bc068f7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -35,7 +36,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
@@ -154,7 +154,7 @@ namespace {
 /// Constant Propagation.
 ///
 class SCCPSolver : public InstVisitor<SCCPSolver> {
-  const DataLayout *DL;
+  const DataLayout &DL;
   const TargetLibraryInfo *TLI;
   SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
   DenseMap<Value*, LatticeVal> ValueState;  // The state each value is in.
@@ -206,8 +206,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
   typedef std::pair<BasicBlock*, BasicBlock*> Edge;
   DenseSet<Edge> KnownFeasibleEdges;
 public:
-  SCCPSolver(const DataLayout *DL, const TargetLibraryInfo *tli)
-    : DL(DL), TLI(tli) {}
+  SCCPSolver(const DataLayout &DL, const TargetLibraryInfo *tli)
+      : DL(DL), TLI(tli) {}
 
   /// MarkBlockExecutable - This method can be used by clients to mark all of
   /// the blocks that are known to be intrinsically live in the processed unit.
@@ -1012,7 +1012,8 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
 
   Constant *Ptr = Operands[0];
   auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
-  markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices));
+  markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr,
+                                                  Indices));
 }
 
 void SCCPSolver::visitStoreInst(StoreInst &SI) {
@@ -1504,7 +1505,7 @@ namespace {
   ///
   struct SCCP : public FunctionPass {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
     static char ID; // Pass identification, replacement for typeid
     SCCP() : FunctionPass(ID) {
@@ -1561,9 +1562,9 @@ bool SCCP::runOnFunction(Function &F) {
     return false;
 
   DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
-  const DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
-  const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  const TargetLibraryInfo *TLI =
+      &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   SCCPSolver Solver(DL, TLI);
 
   // Mark the first block of the function as being executable.
@@ -1637,7 +1638,7 @@ namespace {
   ///
   struct IPSCCP : public ModulePass {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
     static char ID;
     IPSCCP() : ModulePass(ID) {
@@ -1651,7 +1652,7 @@ char IPSCCP::ID = 0;
 INITIALIZE_PASS_BEGIN(IPSCCP, "ipsccp",
                 "Interprocedural Sparse Conditional Constant Propagation",
                 false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(IPSCCP, "ipsccp",
                 "Interprocedural Sparse Conditional Constant Propagation",
                 false, false)
@@ -1690,9 +1691,9 @@ static bool AddressIsTaken(const GlobalValue *GV) {
 }
 
 bool IPSCCP::runOnModule(Module &M) {
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
-  const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+  const DataLayout &DL = M.getDataLayout();
+  const TargetLibraryInfo *TLI =
+      &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   SCCPSolver Solver(DL, TLI);
 
   // AddressTakenFunctions - This set keeps track of the address-taken functions
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index ed161fd..056dd11 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -247,7 +247,7 @@ public:
   /// hold.
   void insert(ArrayRef<Slice> NewSlices) {
     int OldSize = Slices.size();
-    std::move(NewSlices.begin(), NewSlices.end(), std::back_inserter(Slices));
+    Slices.append(NewSlices.begin(), NewSlices.end());
     auto SliceI = Slices.begin() + OldSize;
     std::sort(SliceI, Slices.end());
     std::inplace_merge(Slices.begin(), SliceI, Slices.end());
@@ -701,6 +701,7 @@ private:
       // by writing out the code here where we have tho underlying allocation
       // size readily available.
       APInt GEPOffset = Offset;
+      const DataLayout &DL = GEPI.getModule()->getDataLayout();
       for (gep_type_iterator GTI = gep_type_begin(GEPI),
                              GTE = gep_type_end(GEPI);
            GTI != GTE; ++GTI) {
@@ -750,6 +751,7 @@ private:
     if (!IsOffsetKnown)
       return PI.setAborted(&LI);
 
+    const DataLayout &DL = LI.getModule()->getDataLayout();
     uint64_t Size = DL.getTypeStoreSize(LI.getType());
     return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
   }
@@ -761,6 +763,7 @@ private:
     if (!IsOffsetKnown)
       return PI.setAborted(&SI);
 
+    const DataLayout &DL = SI.getModule()->getDataLayout();
     uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
 
     // If this memory access can be shown to *statically* extend outside the
@@ -898,6 +901,7 @@ private:
     SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
     Visited.insert(Root);
     Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
+    const DataLayout &DL = Root->getModule()->getDataLayout();
     // If there are no loads or stores, the access is dead. We mark that as
     // a size zero access.
     Size = 0;
@@ -1084,7 +1088,8 @@ class AllocaPromoter : public LoadAndStorePromoter {
   SmallVector<DbgValueInst *, 4> DVIs;
 
 public:
-  AllocaPromoter(const SmallVectorImpl<Instruction *> &Insts, SSAUpdater &S,
+  AllocaPromoter(ArrayRef<const Instruction *> Insts,
+                 SSAUpdater &S,
                  AllocaInst &AI, DIBuilder &DIB)
       : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
 
@@ -1092,8 +1097,8 @@ public:
     // Retain the debug information attached to the alloca for use when
     // rewriting loads and stores.
     if (auto *L = LocalAsMetadata::getIfExists(&AI)) {
-      if (auto *DebugNode = MetadataAsValue::getIfExists(AI.getContext(), L)) {
-        for (User *U : DebugNode->users())
+      if (auto *DINode = MetadataAsValue::getIfExists(AI.getContext(), L)) {
+        for (User *U : DINode->users())
           if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
             DDIs.push_back(DDI);
           else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
@@ -1162,10 +1167,9 @@ public:
       } else {
         continue;
       }
-      Instruction *DbgVal =
-          DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
-                                      DIExpression(DVI->getExpression()), Inst);
-      DbgVal->setDebugLoc(DVI->getDebugLoc());
+      DIB.insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
+                                  DVI->getExpression(), DVI->getDebugLoc(),
+                                  Inst);
     }
   }
 };
@@ -1194,7 +1198,6 @@ class SROA : public FunctionPass {
   const bool RequiresDomTree;
 
   LLVMContext *C;
-  const DataLayout *DL;
   DominatorTree *DT;
   AssumptionCache *AC;
 
@@ -1243,7 +1246,7 @@ class SROA : public FunctionPass {
 public:
   SROA(bool RequiresDomTree = true)
       : FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr),
-        DL(nullptr), DT(nullptr) {
+        DT(nullptr) {
     initializeSROAPass(*PassRegistry::getPassRegistry());
   }
   bool runOnFunction(Function &F) override;
@@ -1257,8 +1260,8 @@ private:
   friend class AllocaSliceRewriter;
 
   bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
-  bool rewritePartition(AllocaInst &AI, AllocaSlices &AS,
-                        AllocaSlices::Partition &P);
+  AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS,
+                               AllocaSlices::Partition &P);
   bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
   bool runOnAlloca(AllocaInst &AI);
   void clobberUse(Use &U);
@@ -1349,7 +1352,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
 ///
 /// FIXME: This should be hoisted into a generic utility, likely in
 /// Transforms/Util/Local.h
-static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) {
+static bool isSafePHIToSpeculate(PHINode &PN) {
   // For now, we can only do this promotion if the load is in the same block
   // as the PHI, and if there are no stores between the phi and load.
   // TODO: Allow recursive phi users.
@@ -1381,6 +1384,8 @@ static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) {
   if (!HaveLoad)
     return false;
 
+  const DataLayout &DL = PN.getModule()->getDataLayout();
+
   // We can only transform this if it is safe to push the loads into the
   // predecessor blocks. The only thing to watch out for is that we can't put
   // a possibly trapping load in the predecessor if it is a critical edge.
@@ -1402,8 +1407,8 @@ static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) {
     // If this pointer is always safe to load, or if we can prove that there
     // is already a load in the block, then we can move the load to the pred
     // block.
-    if (InVal->isDereferenceablePointer(DL) ||
-        isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL))
+    if (isDereferenceablePointer(InVal, DL) ||
+        isSafeToLoadUnconditionally(InVal, TI, MaxAlign))
       continue;
 
     return false;
@@ -1468,12 +1473,12 @@ static void speculatePHINodeLoads(PHINode &PN) {
 ///
 /// We can do this to a select if its only uses are loads and if the operand
 /// to the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst &SI,
-                                    const DataLayout *DL = nullptr) {
+static bool isSafeSelectToSpeculate(SelectInst &SI) {
   Value *TValue = SI.getTrueValue();
   Value *FValue = SI.getFalseValue();
-  bool TDerefable = TValue->isDereferenceablePointer(DL);
-  bool FDerefable = FValue->isDereferenceablePointer(DL);
+  const DataLayout &DL = SI.getModule()->getDataLayout();
+  bool TDerefable = isDereferenceablePointer(TValue, DL);
+  bool FDerefable = isDereferenceablePointer(FValue, DL);
 
   for (User *U : SI.users()) {
     LoadInst *LI = dyn_cast<LoadInst>(U);
@@ -1484,10 +1489,10 @@ static bool isSafeSelectToSpeculate(SelectInst &SI,
     // absolutely (e.g. allocas) or at this point because we can see other
     // accesses to it.
     if (!TDerefable &&
-        !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment(), DL))
+        !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment()))
       return false;
     if (!FDerefable &&
-        !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment(), DL))
+        !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment()))
       return false;
   }
 
@@ -1547,7 +1552,8 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
   if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
     return BasePtr;
 
-  return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx");
+  return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices,
+                               NamePrefix + "sroa_idx");
 }
 
 /// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1798,7 +1804,8 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
 
     OffsetPtr = Int8PtrOffset == 0
                     ? Int8Ptr
-                    : IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
+                    : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
+                                            IRB.getInt(Int8PtrOffset),
                                             NamePrefix + "sroa_raw_idx");
   }
   Ptr = OffsetPtr;
@@ -3245,7 +3252,8 @@ private:
     void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
       assert(Ty->isSingleValueType());
       // Load the single value and insert it using the indices.
-      Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep");
+      Value *GEP =
+          IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
       Value *Load = IRB.CreateLoad(GEP, Name + ".load");
       Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
       DEBUG(dbgs() << "          to: " << *Load << "\n");
@@ -3278,7 +3286,7 @@ private:
       // Extract the single value and store it using the indices.
       Value *Store = IRB.CreateStore(
           IRB.CreateExtractValue(Agg, Indices, Name + ".extract"),
-          IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"));
+          IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"));
       (void)Store;
       DEBUG(dbgs() << "          to: " << *Store << "\n");
     }
@@ -3699,6 +3707,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
   // them to the alloca slices.
   SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
   std::vector<LoadInst *> SplitLoads;
+  const DataLayout &DL = AI.getModule()->getDataLayout();
   for (LoadInst *LI : Loads) {
     SplitLoads.clear();
 
@@ -3724,10 +3733,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
       auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
       auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
       LoadInst *PLoad = IRB.CreateAlignedLoad(
-          getAdjustedPtr(IRB, *DL, BasePtr,
-                         APInt(DL->getPointerSizeInBits(), PartOffset),
+          getAdjustedPtr(IRB, DL, BasePtr,
+                         APInt(DL.getPointerSizeInBits(), PartOffset),
                          PartPtrTy, BasePtr->getName() + "."),
-          getAdjustedAlignment(LI, PartOffset, *DL), /*IsVolatile*/ false,
+          getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
           LI->getName());
 
       // Append this load onto the list of split loads so we can find it later
@@ -3777,10 +3786,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
             PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
 
         StoreInst *PStore = IRB.CreateAlignedStore(
-            PLoad, getAdjustedPtr(IRB, *DL, StoreBasePtr,
-                                  APInt(DL->getPointerSizeInBits(), PartOffset),
+            PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
+                                  APInt(DL.getPointerSizeInBits(), PartOffset),
                                   PartPtrTy, StoreBasePtr->getName() + "."),
-            getAdjustedAlignment(SI, PartOffset, *DL), /*IsVolatile*/ false);
+            getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
         (void)PStore;
         DEBUG(dbgs() << "      +" << PartOffset << ":" << *PStore << "\n");
       }
@@ -3857,20 +3866,20 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
       } else {
         IRB.SetInsertPoint(BasicBlock::iterator(LI));
         PLoad = IRB.CreateAlignedLoad(
-            getAdjustedPtr(IRB, *DL, LoadBasePtr,
-                           APInt(DL->getPointerSizeInBits(), PartOffset),
+            getAdjustedPtr(IRB, DL, LoadBasePtr,
+                           APInt(DL.getPointerSizeInBits(), PartOffset),
                            PartPtrTy, LoadBasePtr->getName() + "."),
-            getAdjustedAlignment(LI, PartOffset, *DL), /*IsVolatile*/ false,
+            getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
             LI->getName());
       }
 
       // And store this partition.
       IRB.SetInsertPoint(BasicBlock::iterator(SI));
       StoreInst *PStore = IRB.CreateAlignedStore(
-          PLoad, getAdjustedPtr(IRB, *DL, StoreBasePtr,
-                                APInt(DL->getPointerSizeInBits(), PartOffset),
+          PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
+                                APInt(DL.getPointerSizeInBits(), PartOffset),
                                 PartPtrTy, StoreBasePtr->getName() + "."),
-          getAdjustedAlignment(SI, PartOffset, *DL), /*IsVolatile*/ false);
+          getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
 
       // Now build a new slice for the alloca.
       NewSlices.push_back(
@@ -3964,31 +3973,32 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
 /// appropriate new offsets. It also evaluates how successful the rewrite was
 /// at enabling promotion and if it was successful queues the alloca to be
 /// promoted.
-bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
-                            AllocaSlices::Partition &P) {
+AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
+                                   AllocaSlices::Partition &P) {
   // Try to compute a friendly type for this partition of the alloca. This
   // won't always succeed, in which case we fall back to a legal integer type
   // or an i8 array of an appropriate size.
   Type *SliceTy = nullptr;
+  const DataLayout &DL = AI.getModule()->getDataLayout();
   if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()))
-    if (DL->getTypeAllocSize(CommonUseTy) >= P.size())
+    if (DL.getTypeAllocSize(CommonUseTy) >= P.size())
       SliceTy = CommonUseTy;
   if (!SliceTy)
-    if (Type *TypePartitionTy = getTypePartition(*DL, AI.getAllocatedType(),
+    if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
                                                  P.beginOffset(), P.size()))
       SliceTy = TypePartitionTy;
   if ((!SliceTy || (SliceTy->isArrayTy() &&
                     SliceTy->getArrayElementType()->isIntegerTy())) &&
-      DL->isLegalInteger(P.size() * 8))
+      DL.isLegalInteger(P.size() * 8))
     SliceTy = Type::getIntNTy(*C, P.size() * 8);
   if (!SliceTy)
     SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
-  assert(DL->getTypeAllocSize(SliceTy) >= P.size());
+  assert(DL.getTypeAllocSize(SliceTy) >= P.size());
 
-  bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, *DL);
+  bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
 
   VectorType *VecTy =
-      IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, *DL);
+      IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
   if (VecTy)
     SliceTy = VecTy;
 
@@ -4003,18 +4013,19 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
     NewAI = &AI;
     // FIXME: We should be able to bail at this point with "nothing changed".
     // FIXME: We might want to defer PHI speculation until after here.
+    // FIXME: return nullptr;
   } else {
     unsigned Alignment = AI.getAlignment();
     if (!Alignment) {
       // The minimum alignment which users can rely on when the explicit
       // alignment is omitted or zero is that required by the ABI for this
       // type.
-      Alignment = DL->getABITypeAlignment(AI.getAllocatedType());
+      Alignment = DL.getABITypeAlignment(AI.getAllocatedType());
     }
     Alignment = MinAlign(Alignment, P.beginOffset());
     // If we will get at least this much alignment from the type alone, leave
     // the alloca's alignment unconstrained.
-    if (Alignment <= DL->getABITypeAlignment(SliceTy))
+    if (Alignment <= DL.getABITypeAlignment(SliceTy))
       Alignment = 0;
     NewAI = new AllocaInst(
         SliceTy, nullptr, Alignment,
@@ -4034,7 +4045,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
   SmallPtrSet<PHINode *, 8> PHIUsers;
   SmallPtrSet<SelectInst *, 8> SelectUsers;
 
-  AllocaSliceRewriter Rewriter(*DL, AS, *this, AI, *NewAI, P.beginOffset(),
+  AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
                                P.endOffset(), IsIntegerPromotable, VecTy,
                                PHIUsers, SelectUsers);
   bool Promotable = true;
@@ -4056,7 +4067,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
   for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
                                             E = PHIUsers.end();
        I != E; ++I)
-    if (!isSafePHIToSpeculate(**I, DL)) {
+    if (!isSafePHIToSpeculate(**I)) {
       Promotable = false;
       PHIUsers.clear();
       SelectUsers.clear();
@@ -4065,7 +4076,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
   for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
                                                E = SelectUsers.end();
        I != E; ++I)
-    if (!isSafeSelectToSpeculate(**I, DL)) {
+    if (!isSafeSelectToSpeculate(**I)) {
       Promotable = false;
       PHIUsers.clear();
       SelectUsers.clear();
@@ -4098,7 +4109,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
       PostPromotionWorklist.pop_back();
   }
 
-  return true;
+  return NewAI;
 }
 
 /// \brief Walks the slices of an alloca and form partitions based on them,
@@ -4109,6 +4120,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
 
   unsigned NumPartitions = 0;
   bool Changed = false;
+  const DataLayout &DL = AI.getModule()->getDataLayout();
 
   // First try to pre-split loads and stores.
   Changed |= presplitLoadsAndStores(AI, AS);
@@ -4126,7 +4138,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
     // confident that the above handling of splittable loads and stores is
     // completely sufficient before we forcibly disable the remaining handling.
     if (S.beginOffset() == 0 &&
-        S.endOffset() >= DL->getTypeAllocSize(AI.getAllocatedType()))
+        S.endOffset() >= DL.getTypeAllocSize(AI.getAllocatedType()))
       continue;
     if (isa<LoadInst>(S.getUse()->getUser()) ||
         isa<StoreInst>(S.getUse()->getUser())) {
@@ -4137,9 +4149,29 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
   if (!IsSorted)
     std::sort(AS.begin(), AS.end());
 
+  /// \brief Describes the allocas introduced by rewritePartition
+  /// in order to migrate the debug info.
+  struct Piece {
+    AllocaInst *Alloca;
+    uint64_t Offset;
+    uint64_t Size;
+    Piece(AllocaInst *AI, uint64_t O, uint64_t S)
+      : Alloca(AI), Offset(O), Size(S) {}
+  };
+  SmallVector<Piece, 4> Pieces;
+
   // Rewrite each partition.
   for (auto &P : AS.partitions()) {
-    Changed |= rewritePartition(AI, AS, P);
+    if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
+      Changed = true;
+      if (NewAI != &AI) {
+        uint64_t SizeOfByte = 8;
+        uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType());
+        // Don't include any padding.
+        uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
+        Pieces.push_back(Piece(NewAI, P.beginOffset() * SizeOfByte, Size));
+      }
+    }
     ++NumPartitions;
   }
 
@@ -4147,6 +4179,42 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
   MaxPartitionsPerAlloca =
       std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
 
+  // Migrate debug information from the old alloca to the new alloca(s)
+  // and the individial partitions.
+  if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) {
+    auto *Var = DbgDecl->getVariable();
+    auto *Expr = DbgDecl->getExpression();
+    DIBuilder DIB(*AI.getParent()->getParent()->getParent(),
+                  /*AllowUnresolved*/ false);
+    bool IsSplit = Pieces.size() > 1;
+    for (auto Piece : Pieces) {
+      // Create a piece expression describing the new partition or reuse AI's
+      // expression if there is only one partition.
+      auto *PieceExpr = Expr;
+      if (IsSplit || Expr->isBitPiece()) {
+        // If this alloca is already a scalar replacement of a larger aggregate,
+        // Piece.Offset describes the offset inside the scalar.
+        uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;
+        uint64_t Start = Offset + Piece.Offset;
+        uint64_t Size = Piece.Size;
+        if (Expr->isBitPiece()) {
+          uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize();
+          if (Start >= AbsEnd)
+            // No need to describe a SROAed padding.
+            continue;
+          Size = std::min(Size, AbsEnd - Start);
+        }
+        PieceExpr = DIB.createBitPieceExpression(Start, Size);
+      }
+
+      // Remove any existing dbg.declare intrinsic describing the same alloca.
+      if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca))
+        OldDDI->eraseFromParent();
+
+      DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(),
+                        &AI);
+    }
+  }
   return Changed;
 }
 
@@ -4179,21 +4247,22 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
     AI.eraseFromParent();
     return true;
   }
+  const DataLayout &DL = AI.getModule()->getDataLayout();
 
   // Skip alloca forms that this analysis can't handle.
   if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() ||
-      DL->getTypeAllocSize(AI.getAllocatedType()) == 0)
+      DL.getTypeAllocSize(AI.getAllocatedType()) == 0)
     return false;
 
   bool Changed = false;
 
   // First, split any FCA loads and stores touching this alloca to promote
   // better splitting and promotion opportunities.
-  AggLoadStoreRewriter AggRewriter(*DL);
+  AggLoadStoreRewriter AggRewriter(DL);
   Changed |= AggRewriter.rewrite(AI);
 
   // Build the slices using a recursive instruction-visiting builder.
-  AllocaSlices AS(*DL, AI);
+  AllocaSlices AS(DL, AI);
   DEBUG(AS.print(dbgs()));
   if (AS.isEscaped())
     return Changed;
@@ -4258,8 +4327,11 @@ void SROA::deleteDeadInstructions(
           DeadInsts.insert(U);
       }
 
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
       DeletedAllocas.insert(AI);
+      if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(AI))
+        DbgDecl->eraseFromParent();
+    }
 
     ++NumDeleted;
     I->eraseFromParent();
@@ -4363,12 +4435,6 @@ bool SROA::runOnFunction(Function &F) {
 
   DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
   C = &F.getContext();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  if (!DLP) {
-    DEBUG(dbgs() << "  Skipping SROA -- no target data!\n");
-    return false;
-  }
-  DL = &DLP->getDataLayout();
   DominatorTreeWrapperPass *DTWP =
       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
   DT = DTWP ? &DTWP->getDomTree() : nullptr;
@@ -4376,9 +4442,10 @@ bool SROA::runOnFunction(Function &F) {
 
   BasicBlock &EntryBB = F.getEntryBlock();
   for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
-       I != E; ++I)
+       I != E; ++I) {
     if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
       Worklist.insert(AI);
+  }
 
   bool Changed = false;
   // A set of deleted alloca instruction pointers which should be removed from
diff --git a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp b/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
index 179bbf7..3480cd4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
@@ -95,7 +95,7 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
-    AU.addRequired<LoopInfo>();
+    AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<PostDominatorTree>();
   }
@@ -217,13 +217,16 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
 /// \returns The profiled weight of I.
 unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
   DebugLoc DLoc = Inst.getDebugLoc();
+  if (!DLoc)
+    return 0;
+
   unsigned Lineno = DLoc.getLine();
   if (Lineno < HeaderLineno)
     return 0;
 
-  DILocation DIL(DLoc.getAsMDNode(*Ctx));
+  const DILocation *DIL = DLoc;
   int LOffset = Lineno - HeaderLineno;
-  unsigned Discriminator = DIL.getDiscriminator();
+  unsigned Discriminator = DIL->getDiscriminator();
   unsigned Weight = Samples->samplesAt(LOffset, Discriminator);
   DEBUG(dbgs() << "    " << Lineno << "." << Discriminator << ":" << Inst
                << " (line offset: " << LOffset << "." << Discriminator
@@ -577,6 +580,10 @@ void SampleProfileLoader::propagateWeights(Function &F) {
   bool Changed = true;
   unsigned i = 0;
 
+  // Add an entry count to the function using the samples gathered
+  // at the function entry.
+  F.setEntryCount(Samples->getHeadSamples());
+
   // Before propagation starts, build, for each block, a list of
   // unique predecessors and successors. This is necessary to handle
   // identical edges in multiway branches. Since we visit all blocks and all
@@ -639,9 +646,8 @@ void SampleProfileLoader::propagateWeights(Function &F) {
 /// \returns the line number where \p F is defined. If it returns 0,
 ///          it means that there is no debug information available for \p F.
 unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
-  DISubprogram S = getDISubprogram(&F);
-  if (S.isSubprogram())
-    return S.getLineNumber();
+  if (DISubprogram *S = getDISubprogram(&F))
+    return S->getLine();
 
   // If could not find the start of \p F, emit a diagnostic to inform the user
   // about the missed opportunity.
@@ -731,7 +737,7 @@ INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
                       "Sample Profile loader", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
 INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
                     "Sample Profile loader", false, false)
@@ -762,7 +768,7 @@ bool SampleProfileLoader::runOnFunction(Function &F) {
 
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   PDT = &getAnalysis<PostDominatorTree>();
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   Ctx = &F.getParent()->getContext();
   Samples = Reader->getSamplesFor(F);
   if (!Samples->empty())
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index a16e9e2..d5d3605 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -20,7 +20,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
 
 using namespace llvm;
 
@@ -28,6 +28,7 @@ using namespace llvm;
 /// ScalarOpts library.
 void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeADCEPass(Registry);
+  initializeBDCEPass(Registry);
   initializeAlignmentFromAssumptionsPass(Registry);
   initializeSampleProfileLoaderPass(Registry);
   initializeConstantHoistingPass(Registry);
@@ -38,13 +39,16 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeScalarizerPass(Registry);
   initializeDSEPass(Registry);
   initializeGVNPass(Registry);
-  initializeEarlyCSEPass(Registry);
+  initializeEarlyCSELegacyPassPass(Registry);
   initializeFlattenCFGPassPass(Registry);
+  initializeInductiveRangeCheckEliminationPass(Registry);
   initializeIndVarSimplifyPass(Registry);
   initializeJumpThreadingPass(Registry);
   initializeLICMPass(Registry);
   initializeLoopDeletionPass(Registry);
+  initializeLoopAccessAnalysisPass(Registry);
   initializeLoopInstSimplifyPass(Registry);
+  initializeLoopInterchangePass(Registry);
   initializeLoopRotatePass(Registry);
   initializeLoopStrengthReducePass(Registry);
   initializeLoopRerollPass(Registry);
@@ -55,9 +59,11 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLowerExpectIntrinsicPass(Registry);
   initializeMemCpyOptPass(Registry);
   initializeMergedLoadStoreMotionPass(Registry);
+  initializeNaryReassociatePass(Registry);
   initializePartiallyInlineLibCallsPass(Registry);
   initializeReassociatePass(Registry);
   initializeRegToMemPass(Registry);
+  initializeRewriteStatepointsForGCPass(Registry);
   initializeSCCPPass(Registry);
   initializeIPSCCPPass(Registry);
   initializeSROAPass(Registry);
@@ -68,7 +74,13 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeSinkingPass(Registry);
   initializeTailCallElimPass(Registry);
   initializeSeparateConstOffsetFromGEPPass(Registry);
+  initializeSpeculativeExecutionPass(Registry);
+  initializeStraightLineStrengthReducePass(Registry);
   initializeLoadCombinePass(Registry);
+  initializePlaceBackedgeSafepointsImplPass(Registry);
+  initializePlaceSafepointsPass(Registry);
+  initializeFloat2IntPass(Registry);
+  initializeLoopDistributePass(Registry);
 }
 
 void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
@@ -79,6 +91,10 @@ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createAggressiveDCEPass());
 }
 
+void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createBitTrackingDCEPass());
+}
+
 void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createAlignmentFromAssumptionsPass());
 }
@@ -198,7 +214,6 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
 
 void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createVerifierPass());
-  // FIXME: should this also add createDebugInfoVerifierPass()?
 }
 
 void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 5c49a55..d955da7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -89,7 +89,6 @@ namespace {
 
   private:
     bool HasDomTree;
-    const DataLayout *DL;
 
     /// DeadInsts - Keep track of instructions we have made dead, so that
     /// we can remove them after we are done working.
@@ -159,9 +158,10 @@ namespace {
     void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
                          Type *MemOpType, bool isStore, AllocaInfo &Info,
                          Instruction *TheAccess, bool AllowWholeAccess);
-    bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size);
-    uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset,
-                                  Type *&IdxTy);
+    bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
+                          const DataLayout &DL);
+    uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy,
+                                  const DataLayout &DL);
 
     void DoScalarReplacement(AllocaInst *AI,
                              std::vector<AllocaInst*> &WorkList);
@@ -699,9 +699,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
       // If the source and destination are both to the same alloca, then this is
       // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
       // as appropriate.
-      AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &DL, 0));
+      AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, DL, 0));
 
-      if (GetUnderlyingObject(MTI->getSource(), &DL, 0) != OrigAI) {
+      if (GetUnderlyingObject(MTI->getSource(), DL, 0) != OrigAI) {
         // Dest must be OrigAI, change this to be a load from the original
         // pointer (bitcasted), then a store to our new alloca.
         assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
@@ -717,7 +717,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
         LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
         SrcVal->setAlignment(MTI->getAlignment());
         Builder.CreateStore(SrcVal, NewAI);
-      } else if (GetUnderlyingObject(MTI->getDest(), &DL, 0) != OrigAI) {
+      } else if (GetUnderlyingObject(MTI->getDest(), DL, 0) != OrigAI) {
         // Src must be OrigAI, change this to be a load from NewAI then a store
         // through the original dest pointer (bitcasted).
         assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
@@ -1032,17 +1032,8 @@ bool SROA::runOnFunction(Function &F) {
   if (skipOptnoneFunction(F))
     return false;
 
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
-
   bool Changed = performPromotion(F);
 
-  // FIXME: ScalarRepl currently depends on DataLayout more than it
-  // theoretically needs to. It should be refactored in order to support
-  // target-independent IR. Until this is done, just skip the actual
-  // scalar-replacement portion of this pass.
-  if (!DL) return Changed;
-
   while (1) {
     bool LocalChange = performScalarRepl(F);
     if (!LocalChange) break;   // No need to repromote if no scalarrepl
@@ -1061,7 +1052,7 @@ class AllocaPromoter : public LoadAndStorePromoter {
   SmallVector<DbgDeclareInst *, 4> DDIs;
   SmallVector<DbgValueInst *, 4> DVIs;
 public:
-  AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+  AllocaPromoter(ArrayRef<Instruction*> Insts, SSAUpdater &S,
                  DIBuilder *DB)
     : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {}
 
@@ -1069,8 +1060,8 @@ public:
     // Remember which alloca we're promoting (for isInstInList).
     this->AI = AI;
     if (auto *L = LocalAsMetadata::getIfExists(AI)) {
-      if (auto *DebugNode = MetadataAsValue::getIfExists(AI->getContext(), L)) {
-        for (User *U : DebugNode->users())
+      if (auto *DINode = MetadataAsValue::getIfExists(AI->getContext(), L)) {
+        for (User *U : DINode->users())
           if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
             DDIs.push_back(DDI);
           else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
@@ -1126,10 +1117,9 @@ public:
       } else {
         continue;
       }
-      Instruction *DbgVal = DIB->insertDbgValueIntrinsic(
-          Arg, 0, DIVariable(DVI->getVariable()),
-          DIExpression(DVI->getExpression()), Inst);
-      DbgVal->setDebugLoc(DVI->getDebugLoc());
+      DIB->insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
+                                   DVI->getExpression(), DVI->getDebugLoc(),
+                                   Inst);
     }
   }
 };
@@ -1148,9 +1138,10 @@ public:
 ///
 /// We can do this to a select if its only uses are loads and if the operand to
 /// the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
-  bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(DL);
-  bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(DL);
+static bool isSafeSelectToSpeculate(SelectInst *SI) {
+  const DataLayout &DL = SI->getModule()->getDataLayout();
+  bool TDerefable = isDereferenceablePointer(SI->getTrueValue(), DL);
+  bool FDerefable = isDereferenceablePointer(SI->getFalseValue(), DL);
 
   for (User *U : SI->users()) {
     LoadInst *LI = dyn_cast<LoadInst>(U);
@@ -1158,11 +1149,13 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
 
     // Both operands to the select need to be dereferencable, either absolutely
     // (e.g. allocas) or at this point because we can see other accesses to it.
-    if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
-                                                    LI->getAlignment(), DL))
+    if (!TDerefable &&
+        !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
+                                     LI->getAlignment()))
       return false;
-    if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
-                                                    LI->getAlignment(), DL))
+    if (!FDerefable &&
+        !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
+                                     LI->getAlignment()))
       return false;
   }
 
@@ -1185,7 +1178,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
 ///
 /// We can do this to a select if its only uses are loads and if the operand to
 /// the select can be loaded unconditionally.
-static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
+static bool isSafePHIToSpeculate(PHINode *PN) {
   // For now, we can only do this promotion if the load is in the same block as
   // the PHI, and if there are no stores between the phi and load.
   // TODO: Allow recursive phi users.
@@ -1209,6 +1202,8 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
     MaxAlign = std::max(MaxAlign, LI->getAlignment());
   }
 
+  const DataLayout &DL = PN->getModule()->getDataLayout();
+
   // Okay, we know that we have one or more loads in the same block as the PHI.
   // We can transform this if it is safe to push the loads into the predecessor
   // blocks.  The only thing to watch out for is that we can't put a possibly
@@ -1233,8 +1228,8 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
 
     // If this pointer is always safe to load, or if we can prove that there is
     // already a load in the block, then we can move the load to the pred block.
-    if (InVal->isDereferenceablePointer(DL) ||
-        isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, DL))
+    if (isDereferenceablePointer(InVal, DL) ||
+        isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign))
       continue;
 
     return false;
@@ -1248,7 +1243,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
 /// direct (non-volatile) loads and stores to it.  If the alloca is close but
 /// not quite there, this will transform the code to allow promotion.  As such,
 /// it is a non-pure predicate.
-static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout &DL) {
   SetVector<Instruction*, SmallVector<Instruction*, 4>,
             SmallPtrSet<Instruction*, 4> > InstsToRewrite;
   for (User *U : AI->users()) {
@@ -1279,7 +1274,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
 
       // If it is safe to turn "load (select c, AI, ptr)" into a select of two
       // loads, then we can transform this by rewriting the select.
-      if (!isSafeSelectToSpeculate(SI, DL))
+      if (!isSafeSelectToSpeculate(SI))
         return false;
 
       InstsToRewrite.insert(SI);
@@ -1294,7 +1289,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
 
       // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
       // in the pred blocks, then we can transform this by rewriting the PHI.
-      if (!isSafePHIToSpeculate(PN, DL))
+      if (!isSafePHIToSpeculate(PN))
         return false;
 
       InstsToRewrite.insert(PN);
@@ -1416,6 +1411,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
 
 bool SROA::performPromotion(Function &F) {
   std::vector<AllocaInst*> Allocas;
+  const DataLayout &DL = F.getParent()->getDataLayout();
   DominatorTree *DT = nullptr;
   if (HasDomTree)
     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -1479,6 +1475,7 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
 //
 bool SROA::performScalarRepl(Function &F) {
   std::vector<AllocaInst*> WorkList;
+  const DataLayout &DL = F.getParent()->getDataLayout();
 
   // Scan the entry basic block, adding allocas to the worklist.
   BasicBlock &BB = F.getEntryBlock();
@@ -1508,7 +1505,7 @@ bool SROA::performScalarRepl(Function &F) {
     // transform the allocation instruction if it is an array allocation
     // (allocations OF arrays are ok though), and an allocation of a scalar
     // value cannot be decomposed at all.
-    uint64_t AllocaSize = DL->getTypeAllocSize(AI->getAllocatedType());
+    uint64_t AllocaSize = DL.getTypeAllocSize(AI->getAllocatedType());
 
     // Do not promote [0 x %struct].
     if (AllocaSize == 0) continue;
@@ -1531,8 +1528,9 @@ bool SROA::performScalarRepl(Function &F) {
     // promoted itself.  If so, we don't want to transform it needlessly.  Note
     // that we can't just check based on the type: the alloca may be of an i32
     // but that has pointer arithmetic to set byte 3 of it or something.
-    if (AllocaInst *NewAI = ConvertToScalarInfo(
-              (unsigned)AllocaSize, *DL, ScalarLoadThreshold).TryConvert(AI)) {
+    if (AllocaInst *NewAI =
+            ConvertToScalarInfo((unsigned)AllocaSize, DL, ScalarLoadThreshold)
+                .TryConvert(AI)) {
       NewAI->takeName(AI);
       AI->eraseFromParent();
       ++NumConverted;
@@ -1610,6 +1608,7 @@ void SROA::DeleteDeadInstructions() {
 /// referenced by this instruction.
 void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
                                AllocaInfo &Info) {
+  const DataLayout &DL = I->getModule()->getDataLayout();
   for (Use &U : I->uses()) {
     Instruction *User = cast<Instruction>(U.getUser());
 
@@ -1632,8 +1631,8 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
       if (!LI->isSimple())
         return MarkUnsafe(Info, User);
       Type *LIType = LI->getType();
-      isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType),
-                      LIType, false, Info, LI, true /*AllowWholeAccess*/);
+      isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
+                      LI, true /*AllowWholeAccess*/);
       Info.hasALoadOrStore = true;
 
     } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
@@ -1642,8 +1641,8 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
         return MarkUnsafe(Info, User);
 
       Type *SIType = SI->getOperand(0)->getType();
-      isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType),
-                      SIType, true, Info, SI, true /*AllowWholeAccess*/);
+      isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
+                      SI, true /*AllowWholeAccess*/);
       Info.hasALoadOrStore = true;
     } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
       if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
@@ -1675,6 +1674,7 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
     if (!Info.CheckedPHIs.insert(PN).second)
       return;
 
+  const DataLayout &DL = I->getModule()->getDataLayout();
   for (User *U : I->users()) {
     Instruction *UI = cast<Instruction>(U);
 
@@ -1691,8 +1691,8 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
       if (!LI->isSimple())
         return MarkUnsafe(Info, UI);
       Type *LIType = LI->getType();
-      isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType),
-                      LIType, false, Info, LI, false /*AllowWholeAccess*/);
+      isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
+                      LI, false /*AllowWholeAccess*/);
       Info.hasALoadOrStore = true;
 
     } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
@@ -1701,8 +1701,8 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
         return MarkUnsafe(Info, UI);
 
       Type *SIType = SI->getOperand(0)->getType();
-      isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType),
-                      SIType, true, Info, SI, false /*AllowWholeAccess*/);
+      isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
+                      SI, false /*AllowWholeAccess*/);
       Info.hasALoadOrStore = true;
     } else if (isa<PHINode>(UI) || isa<SelectInst>(UI)) {
       isSafePHISelectUseForScalarRepl(UI, Offset, Info);
@@ -1746,9 +1746,11 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
   // constant part of the offset.
   if (NonConstant)
     Indices.pop_back();
-  Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
-  if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset,
-                        NonConstantIdxSize))
+
+  const DataLayout &DL = GEPI->getModule()->getDataLayout();
+  Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices);
+  if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, NonConstantIdxSize,
+                        DL))
     MarkUnsafe(Info, GEPI);
 }
 
@@ -1803,9 +1805,10 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
                            Type *MemOpType, bool isStore,
                            AllocaInfo &Info, Instruction *TheAccess,
                            bool AllowWholeAccess) {
+  const DataLayout &DL = TheAccess->getModule()->getDataLayout();
   // Check if this is a load/store of the entire alloca.
   if (Offset == 0 && AllowWholeAccess &&
-      MemSize == DL->getTypeAllocSize(Info.AI->getAllocatedType())) {
+      MemSize == DL.getTypeAllocSize(Info.AI->getAllocatedType())) {
     // This can be safe for MemIntrinsics (where MemOpType is 0) and integer
     // loads/stores (which are essentially the same as the MemIntrinsics with
     // regard to copying padding between elements).  But, if an alloca is
@@ -1828,7 +1831,7 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
   }
   // Check if the offset/size correspond to a component within the alloca type.
   Type *T = Info.AI->getAllocatedType();
-  if (TypeHasComponent(T, Offset, MemSize)) {
+  if (TypeHasComponent(T, Offset, MemSize, DL)) {
     Info.hasSubelementAccess = true;
     return;
   }
@@ -1838,24 +1841,25 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
 
 /// TypeHasComponent - Return true if T has a component type with the
 /// specified offset and size.  If Size is zero, do not check the size.
-bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
+bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
+                            const DataLayout &DL) {
   Type *EltTy;
   uint64_t EltSize;
   if (StructType *ST = dyn_cast<StructType>(T)) {
-    const StructLayout *Layout = DL->getStructLayout(ST);
+    const StructLayout *Layout = DL.getStructLayout(ST);
     unsigned EltIdx = Layout->getElementContainingOffset(Offset);
     EltTy = ST->getContainedType(EltIdx);
-    EltSize = DL->getTypeAllocSize(EltTy);
+    EltSize = DL.getTypeAllocSize(EltTy);
     Offset -= Layout->getElementOffset(EltIdx);
   } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
     EltTy = AT->getElementType();
-    EltSize = DL->getTypeAllocSize(EltTy);
+    EltSize = DL.getTypeAllocSize(EltTy);
     if (Offset >= AT->getNumElements() * EltSize)
       return false;
     Offset %= EltSize;
   } else if (VectorType *VT = dyn_cast<VectorType>(T)) {
     EltTy = VT->getElementType();
-    EltSize = DL->getTypeAllocSize(EltTy);
+    EltSize = DL.getTypeAllocSize(EltTy);
     if (Offset >= VT->getNumElements() * EltSize)
       return false;
     Offset %= EltSize;
@@ -1867,7 +1871,7 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
   // Check if the component spans multiple elements.
   if (Offset + Size > EltSize)
     return false;
-  return TypeHasComponent(EltTy, Offset, Size);
+  return TypeHasComponent(EltTy, Offset, Size, DL);
 }
 
 /// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
@@ -1876,6 +1880,7 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
 /// instruction.
 void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
                                 SmallVectorImpl<AllocaInst *> &NewElts) {
+  const DataLayout &DL = I->getModule()->getDataLayout();
   for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
     Use &TheUse = *UI++;
     Instruction *User = cast<Instruction>(TheUse.getUser());
@@ -1893,8 +1898,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
       ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
       uint64_t MemSize = Length->getZExtValue();
-      if (Offset == 0 &&
-          MemSize == DL->getTypeAllocSize(AI->getAllocatedType()))
+      if (Offset == 0 && MemSize == DL.getTypeAllocSize(AI->getAllocatedType()))
         RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
       // Otherwise the intrinsic can only touch a single element and the
       // address operand will be updated, so nothing else needs to be done.
@@ -1930,8 +1934,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
         LI->replaceAllUsesWith(Insert);
         DeadInsts.push_back(LI);
       } else if (LIType->isIntegerTy() &&
-                 DL->getTypeAllocSize(LIType) ==
-                 DL->getTypeAllocSize(AI->getAllocatedType())) {
+                 DL.getTypeAllocSize(LIType) ==
+                     DL.getTypeAllocSize(AI->getAllocatedType())) {
         // If this is a load of the entire alloca to an integer, rewrite it.
         RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
       }
@@ -1957,8 +1961,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
         }
         DeadInsts.push_back(SI);
       } else if (SIType->isIntegerTy() &&
-                 DL->getTypeAllocSize(SIType) ==
-                 DL->getTypeAllocSize(AI->getAllocatedType())) {
+                 DL.getTypeAllocSize(SIType) ==
+                     DL.getTypeAllocSize(AI->getAllocatedType())) {
         // If this is a store of the entire alloca from an integer, rewrite it.
         RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
       }
@@ -2001,7 +2005,8 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
   Type *T = AI->getAllocatedType();
   uint64_t EltOffset = 0;
   Type *IdxTy;
-  uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+  uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy,
+                                      BC->getModule()->getDataLayout());
   Instruction *Val = NewElts[Idx];
   if (Val->getType() != BC->getDestTy()) {
     Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
@@ -2016,11 +2021,12 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
 /// Sets T to the type of the element and Offset to the offset within that
 /// element.  IdxTy is set to the type of the index result to be used in a
 /// GEP instruction.
-uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
-                                    Type *&IdxTy) {
+uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy,
+                                    const DataLayout &DL) {
   uint64_t Idx = 0;
+
   if (StructType *ST = dyn_cast<StructType>(T)) {
-    const StructLayout *Layout = DL->getStructLayout(ST);
+    const StructLayout *Layout = DL.getStructLayout(ST);
     Idx = Layout->getElementContainingOffset(Offset);
     T = ST->getContainedType(Idx);
     Offset -= Layout->getElementOffset(Idx);
@@ -2028,7 +2034,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
     return Idx;
   } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
     T = AT->getElementType();
-    uint64_t EltSize = DL->getTypeAllocSize(T);
+    uint64_t EltSize = DL.getTypeAllocSize(T);
     Idx = Offset / EltSize;
     Offset -= Idx * EltSize;
     IdxTy = Type::getInt64Ty(T->getContext());
@@ -2036,7 +2042,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
   }
   VectorType *VT = cast<VectorType>(T);
   T = VT->getElementType();
-  uint64_t EltSize = DL->getTypeAllocSize(T);
+  uint64_t EltSize = DL.getTypeAllocSize(T);
   Idx = Offset / EltSize;
   Offset -= Idx * EltSize;
   IdxTy = Type::getInt64Ty(T->getContext());
@@ -2049,6 +2055,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
 void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
                       SmallVectorImpl<AllocaInst *> &NewElts) {
   uint64_t OldOffset = Offset;
+  const DataLayout &DL = GEPI->getModule()->getDataLayout();
   SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
   // If the GEP was dynamic then it must have been a dynamic vector lookup.
   // In this case, it must be the last GEP operand which is dynamic so keep that
@@ -2057,19 +2064,19 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
   Value* NonConstantIdx = nullptr;
   if (!GEPI->hasAllConstantIndices())
     NonConstantIdx = Indices.pop_back_val();
-  Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
+  Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices);
 
   RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
 
   Type *T = AI->getAllocatedType();
   Type *IdxTy;
-  uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
+  uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy, DL);
   if (GEPI->getOperand(0) == AI)
     OldIdx = ~0ULL; // Force the GEP to be rewritten.
 
   T = AI->getAllocatedType();
   uint64_t EltOffset = Offset;
-  uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+  uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy, DL);
 
   // If this GEP does not move the pointer across elements of the alloca
   // being split, then it does not needs to be rewritten.
@@ -2080,7 +2087,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
   SmallVector<Value*, 8> NewArgs;
   NewArgs.push_back(Constant::getNullValue(i32Ty));
   while (EltOffset != 0) {
-    uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
+    uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy, DL);
     NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
   }
   if (NonConstantIdx) {
@@ -2114,9 +2121,10 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
   // Put matching lifetime markers on everything from Offset up to
   // Offset+OldSize.
   Type *AIType = AI->getAllocatedType();
+  const DataLayout &DL = II->getModule()->getDataLayout();
   uint64_t NewOffset = Offset;
   Type *IdxTy;
-  uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy);
+  uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy, DL);
 
   IRBuilder<> Builder(II);
   uint64_t Size = OldSize->getLimitedValue();
@@ -2126,10 +2134,10 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
     // split the alloca again later.
     unsigned AS = AI->getType()->getAddressSpace();
     Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS));
-    V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
+    V = Builder.CreateGEP(Builder.getInt8Ty(), V, Builder.getInt64(NewOffset));
 
     IdxTy = NewElts[Idx]->getAllocatedType();
-    uint64_t EltSize = DL->getTypeAllocSize(IdxTy) - NewOffset;
+    uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset;
     if (EltSize > Size) {
       EltSize = Size;
       Size = 0;
@@ -2145,7 +2153,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
 
   for (; Idx != NewElts.size() && Size; ++Idx) {
     IdxTy = NewElts[Idx]->getAllocatedType();
-    uint64_t EltSize = DL->getTypeAllocSize(IdxTy);
+    uint64_t EltSize = DL.getTypeAllocSize(IdxTy);
     if (EltSize > Size) {
       EltSize = Size;
       Size = 0;
@@ -2221,6 +2229,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
   bool SROADest = MI->getRawDest() == Inst;
 
   Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
+  const DataLayout &DL = MI->getModule()->getDataLayout();
 
   for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
     // If this is a memcpy/memmove, emit a GEP of the other element address.
@@ -2237,10 +2246,10 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
       PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
       Type *OtherTy = OtherPtrTy->getElementType();
       if (StructType *ST = dyn_cast<StructType>(OtherTy)) {
-        EltOffset = DL->getStructLayout(ST)->getElementOffset(i);
+        EltOffset = DL.getStructLayout(ST)->getElementOffset(i);
       } else {
         Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
-        EltOffset = DL->getTypeAllocSize(EltTy)*i;
+        EltOffset = DL.getTypeAllocSize(EltTy) * i;
       }
 
       // The alignment of the other pointer is the guaranteed alignment of the
@@ -2281,7 +2290,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
           Type *ValTy = EltTy->getScalarType();
 
           // Construct an integer with the right value.
-          unsigned EltSize = DL->getTypeSizeInBits(ValTy);
+          unsigned EltSize = DL.getTypeSizeInBits(ValTy);
           APInt OneVal(EltSize, CI->getZExtValue());
           APInt TotalVal(OneVal);
           // Set each byte.
@@ -2311,7 +2320,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
       // this element.
     }
 
-    unsigned EltSize = DL->getTypeAllocSize(EltTy);
+    unsigned EltSize = DL.getTypeAllocSize(EltTy);
     if (!EltSize)
       continue;
 
@@ -2345,12 +2354,13 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
   // and store the element value to the individual alloca.
   Value *SrcVal = SI->getOperand(0);
   Type *AllocaEltTy = AI->getAllocatedType();
-  uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy);
+  const DataLayout &DL = SI->getModule()->getDataLayout();
+  uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy);
 
   IRBuilder<> Builder(SI);
 
   // Handle tail padding by extending the operand
-  if (DL->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+  if (DL.getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
     SrcVal = Builder.CreateZExt(SrcVal,
                             IntegerType::get(SI->getContext(), AllocaSizeBits));
 
@@ -2360,15 +2370,15 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
   // There are two forms here: AI could be an array or struct.  Both cases
   // have different ways to compute the element offset.
   if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
-    const StructLayout *Layout = DL->getStructLayout(EltSTy);
+    const StructLayout *Layout = DL.getStructLayout(EltSTy);
 
     for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
       // Get the number of bits to shift SrcVal to get the value.
       Type *FieldTy = EltSTy->getElementType(i);
       uint64_t Shift = Layout->getElementOffsetInBits(i);
 
-      if (DL->isBigEndian())
-        Shift = AllocaSizeBits-Shift-DL->getTypeAllocSizeInBits(FieldTy);
+      if (DL.isBigEndian())
+        Shift = AllocaSizeBits - Shift - DL.getTypeAllocSizeInBits(FieldTy);
 
       Value *EltVal = SrcVal;
       if (Shift) {
@@ -2377,7 +2387,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
       }
 
       // Truncate down to an integer of the right size.
-      uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy);
+      uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy);
 
       // Ignore zero sized fields like {}, they obviously contain no data.
       if (FieldSizeBits == 0) continue;
@@ -2402,12 +2412,12 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
   } else {
     ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
     Type *ArrayEltTy = ATy->getElementType();
-    uint64_t ElementOffset = DL->getTypeAllocSizeInBits(ArrayEltTy);
-    uint64_t ElementSizeBits = DL->getTypeSizeInBits(ArrayEltTy);
+    uint64_t ElementOffset = DL.getTypeAllocSizeInBits(ArrayEltTy);
+    uint64_t ElementSizeBits = DL.getTypeSizeInBits(ArrayEltTy);
 
     uint64_t Shift;
 
-    if (DL->isBigEndian())
+    if (DL.isBigEndian())
       Shift = AllocaSizeBits-ElementOffset;
     else
       Shift = 0;
@@ -2441,7 +2451,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
       }
       new StoreInst(EltVal, DestField, SI);
 
-      if (DL->isBigEndian())
+      if (DL.isBigEndian())
         Shift -= ElementOffset;
       else
         Shift += ElementOffset;
@@ -2459,7 +2469,8 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
   // Extract each element out of the NewElts according to its structure offset
   // and form the result value.
   Type *AllocaEltTy = AI->getAllocatedType();
-  uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy);
+  const DataLayout &DL = LI->getModule()->getDataLayout();
+  uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy);
 
   DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
                << '\n');
@@ -2469,10 +2480,10 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
   const StructLayout *Layout = nullptr;
   uint64_t ArrayEltBitOffset = 0;
   if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
-    Layout = DL->getStructLayout(EltSTy);
+    Layout = DL.getStructLayout(EltSTy);
   } else {
     Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
-    ArrayEltBitOffset = DL->getTypeAllocSizeInBits(ArrayEltTy);
+    ArrayEltBitOffset = DL.getTypeAllocSizeInBits(ArrayEltTy);
   }
 
   Value *ResultVal =
@@ -2484,7 +2495,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
     Value *SrcField = NewElts[i];
     Type *FieldTy =
       cast<PointerType>(SrcField->getType())->getElementType();
-    uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy);
+    uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy);
 
     // Ignore zero sized fields like {}, they obviously contain no data.
     if (FieldSizeBits == 0) continue;
@@ -2515,7 +2526,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
     else  // Array case.
       Shift = i*ArrayEltBitOffset;
 
-    if (DL->isBigEndian())
+    if (DL.isBigEndian())
       Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
 
     if (Shift) {
@@ -2532,7 +2543,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
   }
 
   // Handle tail padding by truncating the result
-  if (DL->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
+  if (DL.getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
     ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
 
   LI->replaceAllUsesWith(ResultVal);
@@ -2589,13 +2600,15 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
     return false;
   }
 
+  const DataLayout &DL = AI->getModule()->getDataLayout();
+
   // Okay, we know all the users are promotable.  If the aggregate is a memcpy
   // source and destination, we have to be careful.  In particular, the memcpy
   // could be moving around elements that live in structure padding of the LLVM
   // types, but may actually be used.  In these cases, we refuse to promote the
   // struct.
   if (Info.isMemCpySrc && Info.isMemCpyDst &&
-      HasPadding(AI->getAllocatedType(), *DL))
+      HasPadding(AI->getAllocatedType(), DL))
     return false;
 
   // If the alloca never has an access to just *part* of it, but is accessed
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 6036c09..d55dc6a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -165,7 +165,7 @@ private:
   void gather(Instruction *, const ValueVector &);
   bool canTransferMetadata(unsigned Kind);
   void transferMetadata(Instruction *, const ValueVector &);
-  bool getVectorLayout(Type *, unsigned, VectorLayout &);
+  bool getVectorLayout(Type *, unsigned, VectorLayout &, const DataLayout &);
   bool finish();
 
   template<typename T> bool splitBinary(Instruction &, const T &);
@@ -173,7 +173,6 @@ private:
   ScatterMap Scattered;
   GatherList Gathered;
   unsigned ParallelLoopAccessMDKind;
-  const DataLayout *DL;
   bool ScalarizeLoadStore;
 };
 
@@ -214,7 +213,7 @@ Value *Scatterer::operator[](unsigned I) {
       CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
     }
     if (I != 0)
-      CV[I] = Builder.CreateConstGEP1_32(CV[0], I,
+      CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
                                          V->getName() + ".i" + Twine(I));
   } else {
     // Search through a chain of InsertElementInsts looking for element I.
@@ -248,8 +247,6 @@ bool Scalarizer::doInitialization(Module &M) {
 }
 
 bool Scalarizer::runOnFunction(Function &F) {
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
   for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
     BasicBlock *BB = BBI;
     for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
@@ -345,10 +342,7 @@ void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
 // Try to fill in Layout from Ty, returning true on success.  Alignment is
 // the alignment of the vector, or 0 if the ABI default should be used.
 bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
-                                 VectorLayout &Layout) {
-  if (!DL)
-    return false;
-
+                                 VectorLayout &Layout, const DataLayout &DL) {
   // Make sure we're dealing with a vector.
   Layout.VecTy = dyn_cast<VectorType>(Ty);
   if (!Layout.VecTy)
@@ -356,15 +350,15 @@ bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
 
   // Check that we're dealing with full-byte elements.
   Layout.ElemTy = Layout.VecTy->getElementType();
-  if (DL->getTypeSizeInBits(Layout.ElemTy) !=
-      DL->getTypeStoreSizeInBits(Layout.ElemTy))
+  if (DL.getTypeSizeInBits(Layout.ElemTy) !=
+      DL.getTypeStoreSizeInBits(Layout.ElemTy))
     return false;
 
   if (Alignment)
     Layout.VecAlign = Alignment;
   else
-    Layout.VecAlign = DL->getABITypeAlignment(Layout.VecTy);
-  Layout.ElemSize = DL->getTypeStoreSize(Layout.ElemTy);
+    Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy);
+  Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
   return true;
 }
 
@@ -456,7 +450,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
     Indices.resize(NumIndices);
     for (unsigned J = 0; J < NumIndices; ++J)
       Indices[J] = Ops[J][I];
-    Res[I] = Builder.CreateGEP(Base[I], Indices,
+    Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
                                GEPI.getName() + ".i" + Twine(I));
     if (GEPI.isInBounds())
       if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
@@ -595,7 +589,8 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
     return false;
 
   VectorLayout Layout;
-  if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout))
+  if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout,
+                       LI.getModule()->getDataLayout()))
     return false;
 
   unsigned NumElems = Layout.VecTy->getNumElements();
@@ -619,7 +614,8 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
 
   VectorLayout Layout;
   Value *FullValue = SI.getValueOperand();
-  if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout))
+  if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
+                       SI.getModule()->getDataLayout()))
     return false;
 
   unsigned NumElems = Layout.VecTy->getNumElements();
diff --git a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 6157746..3a782d1 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -160,6 +160,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
@@ -167,6 +168,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/IR/IRBuilder.h"
@@ -177,6 +179,13 @@ static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
     "disable-separate-const-offset-from-gep", cl::init(false),
     cl::desc("Do not separate the constant offset from a GEP instruction"),
     cl::Hidden);
+// Setting this flag may emit false positives when the input module already
+// contains dead instructions. Therefore, we set it only in unit tests that are
+// free of dead code.
+static cl::opt<bool>
+    VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
+                     cl::desc("Verify this pass produces no dead code"),
+                     cl::Hidden);
 
 namespace {
 
@@ -194,23 +203,26 @@ namespace {
 /// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
 /// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
 class ConstantOffsetExtractor {
- public:
+public:
   /// Extracts a constant offset from the given GEP index. It returns the
   /// new index representing the remainder (equal to the original index minus
   /// the constant offset), or nullptr if we cannot extract a constant offset.
-  /// \p Idx    The given GEP index
-  /// \p DL     The datalayout of the module
-  /// \p GEP    The given GEP
-  static Value *Extract(Value *Idx, const DataLayout *DL,
-                        GetElementPtrInst *GEP);
+  /// \p Idx The given GEP index
+  /// \p GEP The given GEP
+  /// \p UserChainTail Outputs the tail of UserChain so that we can
+  ///                  garbage-collect unused instructions in UserChain.
+  static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
+                        User *&UserChainTail, const DominatorTree *DT);
   /// Looks for a constant offset from the given GEP index without extracting
   /// it. It returns the numeric value of the extracted constant offset (0 if
   /// failed). The meaning of the arguments are the same as Extract.
-  static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP);
+  static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
+                      const DominatorTree *DT);
 
- private:
-  ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt)
-      : DL(Layout), IP(InsertionPt) {}
+private:
+  ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
+      : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
+  }
   /// Searches the expression that computes V for a non-zero constant C s.t.
   /// V can be reassociated into the form V' + C. If the searching is
   /// successful, returns C and update UserChain as a def-use chain from C to V;
@@ -268,12 +280,6 @@ class ConstantOffsetExtractor {
   /// returns "sext i32 (zext i16 V to i32) to i64".
   Value *applyExts(Value *V);
 
-  /// Returns true if LHS and RHS have no bits in common, i.e., LHS | RHS == 0.
-  bool NoCommonBits(Value *LHS, Value *RHS) const;
-  /// Computes which bits are known to be one or zero.
-  /// \p KnownOne Mask of all bits that are known to be one.
-  /// \p KnownZero Mask of all bits that are known to be zero.
-  void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const;
   /// A helper function that returns whether we can trace into the operands
   /// of binary operator BO for a constant offset.
   ///
@@ -294,39 +300,36 @@ class ConstantOffsetExtractor {
   /// A data structure used in rebuildWithoutConstOffset. Contains all
   /// sext/zext instructions along UserChain.
   SmallVector<CastInst *, 16> ExtInsts;
-  /// The data layout of the module. Used in ComputeKnownBits.
-  const DataLayout *DL;
   Instruction *IP;  /// Insertion position of cloned instructions.
+  const DataLayout &DL;
+  const DominatorTree *DT;
 };
 
 /// \brief A pass that tries to split every GEP in the function into a variadic
 /// base and a constant offset. It is a FunctionPass because searching for the
 /// constant offset may inspect other basic blocks.
 class SeparateConstOffsetFromGEP : public FunctionPass {
- public:
+public:
   static char ID;
   SeparateConstOffsetFromGEP(const TargetMachine *TM = nullptr,
                              bool LowerGEP = false)
-      : FunctionPass(ID), TM(TM), LowerGEP(LowerGEP) {
+      : FunctionPass(ID), DL(nullptr), DT(nullptr), TM(TM), LowerGEP(LowerGEP) {
     initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry());
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<DataLayoutPass>();
-    AU.addRequired<TargetTransformInfo>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.setPreservesCFG();
   }
 
   bool doInitialization(Module &M) override {
-    DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-    if (DLP == nullptr)
-      report_fatal_error("data layout missing");
-    DL = &DLP->getDataLayout();
+    DL = &M.getDataLayout();
     return false;
   }
-
   bool runOnFunction(Function &F) override;
 
- private:
+private:
   /// Tries to split the given GEP into a variadic base and a constant offset,
   /// and returns true if the splitting succeeds.
   bool splitGEP(GetElementPtrInst *GEP);
@@ -370,8 +373,11 @@ class SeparateConstOffsetFromGEP : public FunctionPass {
   ///
   /// Verified in @i32_add in split-gep.ll
   bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
+  /// Verify F is free of dead code.
+  void verifyNoDeadCode(Function &F);
 
   const DataLayout *DL;
+  const DominatorTree *DT;
   const TargetMachine *TM;
   /// Whether to lower a GEP with multiple indices into arithmetic operations or
   /// multiple GEPs with a single index.
@@ -384,8 +390,8 @@ INITIALIZE_PASS_BEGIN(
     SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
     "Split GEPs to a variadic base and a constant offset for better CSE", false,
     false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_DEPENDENCY(DataLayoutPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(
     SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
     "Split GEPs to a variadic base and a constant offset for better CSE", false,
@@ -413,7 +419,8 @@ bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
   Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
   // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
   // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
-  if (BO->getOpcode() == Instruction::Or && !NoCommonBits(LHS, RHS))
+  if (BO->getOpcode() == Instruction::Or &&
+      !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT))
     return false;
 
   // In addition, tracing into BO requires that its surrounding s/zext (if
@@ -498,9 +505,8 @@ APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
     ConstantOffset = CI->getValue();
   } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
     // Trace into subexpressions for more hoisting opportunities.
-    if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) {
+    if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
       ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
-    }
   } else if (isa<SExtInst>(V)) {
     ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
                           ZeroExtended, NonNegative).sext(BitWidth);
@@ -597,6 +603,11 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
   }
 
   BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
+  assert(BO->getNumUses() <= 1 &&
+         "distributeExtsAndCloneChain clones each BinaryOperator in "
+         "UserChain, so no one should be used more than "
+         "once");
+
   unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
   assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
   Value *NextInChain = removeConstOffset(ChainIndex - 1);
@@ -609,6 +620,7 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
       return TheOther;
   }
 
+  BinaryOperator::BinaryOps NewOp = BO->getOpcode();
   if (BO->getOpcode() == Instruction::Or) {
     // Rebuild "or" as "add", because "or" may be invalid for the new
     // epxression.
@@ -623,68 +635,46 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
     //
     // Replacing the "or" with "add" is fine, because
     //   a | (b + 5) = a + (b + 5) = (a + b) + 5
-    if (OpNo == 0) {
-      return BinaryOperator::CreateAdd(NextInChain, TheOther, BO->getName(),
-                                       IP);
-    } else {
-      return BinaryOperator::CreateAdd(TheOther, NextInChain, BO->getName(),
-                                       IP);
-    }
+    NewOp = Instruction::Add;
   }
 
-  // We can reuse BO in this case, because the new expression shares the same
-  // instruction type and BO is used at most once.
-  assert(BO->getNumUses() <= 1 &&
-         "distributeExtsAndCloneChain clones each BinaryOperator in "
-         "UserChain, so no one should be used more than "
-         "once");
-  BO->setOperand(OpNo, NextInChain);
-  BO->setHasNoSignedWrap(false);
-  BO->setHasNoUnsignedWrap(false);
-  // Make sure it appears after all instructions we've inserted so far.
-  BO->moveBefore(IP);
-  return BO;
+  BinaryOperator *NewBO;
+  if (OpNo == 0) {
+    NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
+  } else {
+    NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
+  }
+  NewBO->takeName(BO);
+  return NewBO;
 }
 
-Value *ConstantOffsetExtractor::Extract(Value *Idx, const DataLayout *DL,
-                                        GetElementPtrInst *GEP) {
-  ConstantOffsetExtractor Extractor(DL, GEP);
+Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
+                                        User *&UserChainTail,
+                                        const DominatorTree *DT) {
+  ConstantOffsetExtractor Extractor(GEP, DT);
   // Find a non-zero constant offset first.
   APInt ConstantOffset =
       Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
                      GEP->isInBounds());
-  if (ConstantOffset == 0)
+  if (ConstantOffset == 0) {
+    UserChainTail = nullptr;
     return nullptr;
+  }
   // Separates the constant offset from the GEP index.
-  return Extractor.rebuildWithoutConstOffset();
+  Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
+  UserChainTail = Extractor.UserChain.back();
+  return IdxWithoutConstOffset;
 }
 
-int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL,
-      GetElementPtrInst *GEP) {
+int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP,
+                                      const DominatorTree *DT) {
   // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
-  return ConstantOffsetExtractor(DL, GEP)
+  return ConstantOffsetExtractor(GEP, DT)
       .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
             GEP->isInBounds())
       .getSExtValue();
 }
 
-void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne,
-                                               APInt &KnownZero) const {
-  IntegerType *IT = cast<IntegerType>(V->getType());
-  KnownOne = APInt(IT->getBitWidth(), 0);
-  KnownZero = APInt(IT->getBitWidth(), 0);
-  llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0);
-}
-
-bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const {
-  assert(LHS->getType() == RHS->getType() &&
-         "LHS and RHS should have the same type");
-  APInt LHSKnownOne, LHSKnownZero, RHSKnownOne, RHSKnownZero;
-  ComputeKnownBits(LHS, LHSKnownOne, LHSKnownZero);
-  ComputeKnownBits(RHS, RHSKnownOne, RHSKnownZero);
-  return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
-}
-
 bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
     GetElementPtrInst *GEP) {
   bool Changed = false;
@@ -713,7 +703,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
     if (isa<SequentialType>(*GTI)) {
       // Tries to extract a constant offset from this GEP index.
       int64_t ConstantOffset =
-          ConstantOffsetExtractor::Find(GEP->getOperand(I), DL, GEP);
+          ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
       if (ConstantOffset != 0) {
         NeedsExtraction = true;
         // A GEP may have multiple indices.  We accumulate the extracted
@@ -770,14 +760,16 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
         }
       }
       // Create an ugly GEP with a single index for each index.
-      ResultPtr = Builder.CreateGEP(ResultPtr, Idx, "uglygep");
+      ResultPtr =
+          Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
     }
   }
 
   // Create a GEP with the constant offset index.
   if (AccumulativeByteOffset != 0) {
     Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
-    ResultPtr = Builder.CreateGEP(ResultPtr, Offset, "uglygep");
+    ResultPtr =
+        Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
   }
   if (ResultPtr->getType() != Variadic->getType())
     ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
@@ -857,7 +849,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   // of variable indices. Therefore, we don't check for addressing modes in that
   // case.
   if (!LowerGEP) {
-    TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
+    TargetTransformInfo &TTI =
+        getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+            *GEP->getParent()->getParent());
     if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(),
                                    /*BaseGV=*/nullptr, AccumulativeByteOffset,
                                    /*HasBaseReg=*/true, /*Scale=*/0)) {
@@ -877,10 +871,17 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
     if (isa<SequentialType>(*GTI)) {
       // Splits this GEP index into a variadic part and a constant offset, and
       // uses the variadic part as the new index.
+      Value *OldIdx = GEP->getOperand(I);
+      User *UserChainTail;
       Value *NewIdx =
-          ConstantOffsetExtractor::Extract(GEP->getOperand(I), DL, GEP);
+          ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
       if (NewIdx != nullptr) {
+        // Switches to the index with the constant offset removed.
         GEP->setOperand(I, NewIdx);
+        // After switching to the new index, we can garbage-collect UserChain
+        // and the old index if they are not used.
+        RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
+        RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
       }
     }
   }
@@ -910,7 +911,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   if (LowerGEP) {
     // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
     // arithmetic operations if the target uses alias analysis in codegen.
-    if (TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())
+    if (TM && TM->getSubtargetImpl(*GEP->getParent()->getParent())->useAA())
       lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
     else
       lowerToArithmetics(GEP, AccumulativeByteOffset);
@@ -962,8 +963,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
     // Very likely. As long as %gep is natually aligned, the byte offset we
     // extracted should be a multiple of sizeof(*%gep).
     int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
-    NewGEP = GetElementPtrInst::Create(
-        NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP);
+    NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
+                                       ConstantInt::get(IntPtrTy, Index, true),
+                                       GEP->getName(), GEP);
   } else {
     // Unlikely but possible. For example,
     // #pragma pack(1)
@@ -983,8 +985,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
                                        GEP->getPointerAddressSpace());
     NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
     NewGEP = GetElementPtrInst::Create(
-        NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true),
-        "uglygep", GEP);
+        Type::getInt8Ty(GEP->getContext()), NewGEP,
+        ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
+        GEP);
     if (GEP->getType() != I8PtrTy)
       NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
   }
@@ -996,9 +999,14 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
 }
 
 bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
   if (DisableSeparateConstOffsetFromGEP)
     return false;
 
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
   bool Changed = false;
   for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
     for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
@@ -1009,5 +1017,22 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
       // already.
     }
   }
+
+  if (VerifyNoDeadCode)
+    verifyNoDeadCode(F);
+
   return Changed;
 }
+
+void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
+  for (auto &B : F) {
+    for (auto &I : B) {
+      if (isInstructionTriviallyDead(&I)) {
+        std::string ErrMessage;
+        raw_string_ostream RSO(ErrMessage);
+        RSO << "Dead instruction detected!\n" << I << "\n";
+        llvm_unreachable(RSO.str().c_str());
+      }
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 2e317f9..8566cd9 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -21,7 +21,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/SimplifyCFG.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -37,6 +37,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "simplifycfg"
@@ -47,36 +48,6 @@ UserBonusInstThreshold("bonus-inst-threshold", cl::Hidden, cl::init(1),
 
 STATISTIC(NumSimpl, "Number of blocks simplified");
 
-namespace {
-struct CFGSimplifyPass : public FunctionPass {
-  static char ID; // Pass identification, replacement for typeid
-  unsigned BonusInstThreshold;
-  CFGSimplifyPass(int T = -1) : FunctionPass(ID) {
-    BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : unsigned(T);
-    initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
-  }
-  bool runOnFunction(Function &F) override;
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<AssumptionCacheTracker>();
-    AU.addRequired<TargetTransformInfo>();
-  }
-};
-}
-
-char CFGSimplifyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
-                      false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
-                    false)
-
-// Public interface to the CFGSimplification pass
-FunctionPass *llvm::createCFGSimplificationPass(int Threshold) {
-  return new CFGSimplifyPass(Threshold);
-}
-
 /// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
 /// node) return blocks, merge them together to promote recursive block merging.
 static bool mergeEmptyReturnBlocks(Function &F) {
@@ -156,7 +127,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
 /// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
 /// iterating until no more changes are made.
 static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
-                                   const DataLayout *DL, AssumptionCache *AC,
+                                   AssumptionCache *AC,
                                    unsigned BonusInstThreshold) {
   bool Changed = false;
   bool LocalChange = true;
@@ -166,7 +137,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
     // Loop over all of the basic blocks and remove them if they are unneeded...
     //
     for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
-      if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, DL, AC)) {
+      if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) {
         LocalChange = true;
         ++NumSimpl;
       }
@@ -176,21 +147,11 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
   return Changed;
 }
 
-// It is possible that we may require multiple passes over the code to fully
-// simplify the CFG.
-//
-bool CFGSimplifyPass::runOnFunction(Function &F) {
-  if (skipOptnoneFunction(F))
-    return false;
-
-  AssumptionCache *AC =
-      &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
+static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
+                                AssumptionCache *AC, int BonusInstThreshold) {
   bool EverChanged = removeUnreachableBlocks(F);
   EverChanged |= mergeEmptyReturnBlocks(F);
-  EverChanged |= iterativelySimplifyCFG(F, TTI, DL, AC, BonusInstThreshold);
+  EverChanged |= iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold);
 
   // If neither pass changed anything, we're done.
   if (!EverChanged) return false;
@@ -204,9 +165,66 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
     return true;
 
   do {
-    EverChanged = iterativelySimplifyCFG(F, TTI, DL, AC, BonusInstThreshold);
+    EverChanged = iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold);
     EverChanged |= removeUnreachableBlocks(F);
   } while (EverChanged);
 
   return true;
 }
+
+SimplifyCFGPass::SimplifyCFGPass()
+    : BonusInstThreshold(UserBonusInstThreshold) {}
+
+SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold)
+    : BonusInstThreshold(BonusInstThreshold) {}
+
+PreservedAnalyses SimplifyCFGPass::run(Function &F,
+                                       AnalysisManager<Function> *AM) {
+  auto &TTI = AM->getResult<TargetIRAnalysis>(F);
+  auto &AC = AM->getResult<AssumptionAnalysis>(F);
+
+  if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold))
+    return PreservedAnalyses::none();
+
+  return PreservedAnalyses::all();
+}
+
+namespace {
+struct CFGSimplifyPass : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  unsigned BonusInstThreshold;
+  CFGSimplifyPass(int T = -1) : FunctionPass(ID) {
+    BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : unsigned(T);
+    initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+  }
+  bool runOnFunction(Function &F) override {
+    if (skipOptnoneFunction(F))
+      return false;
+
+    AssumptionCache *AC =
+        &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+    const TargetTransformInfo &TTI =
+        getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    return simplifyFunctionCFG(F, TTI, AC, BonusInstThreshold);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+  }
+};
+}
+
+char CFGSimplifyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
+                    false)
+
+// Public interface to the CFGSimplification pass
+FunctionPass *llvm::createCFGSimplificationPass(int Threshold) {
+  return new CFGSimplifyPass(Threshold);
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index 903b675..b169d56 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -21,6 +21,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -35,7 +36,6 @@ namespace {
     DominatorTree *DT;
     LoopInfo *LI;
     AliasAnalysis *AA;
-    const DataLayout *DL;
 
   public:
     static char ID; // Pass identification
@@ -50,9 +50,9 @@ namespace {
       FunctionPass::getAnalysisUsage(AU);
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<DominatorTreeWrapperPass>();
-      AU.addRequired<LoopInfo>();
+      AU.addRequired<LoopInfoWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addPreserved<LoopInfo>();
+      AU.addPreserved<LoopInfoWrapperPass>();
     }
   private:
     bool ProcessBlock(BasicBlock &BB);
@@ -64,7 +64,7 @@ namespace {
 
 char Sinking::ID = 0;
 INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
@@ -98,10 +98,8 @@ bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
 
 bool Sinking::runOnFunction(Function &F) {
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   AA = &getAnalysis<AliasAnalysis>();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
 
   bool MadeChange, EverMadeChange = false;
 
@@ -196,7 +194,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
   if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) {
     // We cannot sink a load across a critical edge - there may be stores in
     // other code paths.
-    if (!isSafeToSpeculativelyExecute(Inst, DL))
+    if (!isSafeToSpeculativelyExecute(Inst))
       return false;
 
     // We don't want to sink across a critical edge if we don't dominate the
diff --git a/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
new file mode 100644
index 0000000..ff3f00a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -0,0 +1,243 @@
+//===- SpeculativeExecution.cpp ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass hoists instructions to enable speculative execution on
+// targets where branches are expensive. This is aimed at GPUs. It
+// currently works on simple if-then and if-then-else
+// patterns.
+//
+// Removing branches is not the only motivation for this
+// pass. E.g. consider this code and assume that there is no
+// addressing mode for multiplying by sizeof(*a):
+//
+//   if (b > 0)
+//     c = a[i + 1]
+//   if (d > 0)
+//     e = a[i + 2]
+//
+// turns into
+//
+//   p = &a[i + 1];
+//   if (b > 0)
+//     c = *p;
+//   q = &a[i + 2];
+//   if (d > 0)
+//     e = *q;
+//
+// which could later be optimized to
+//
+//   r = &a[i];
+//   if (b > 0)
+//     c = r[1];
+//   if (d > 0)
+//     e = r[2];
+//
+// Later passes sink back much of the speculated code that did not enable
+// further optimization.
+//
+// This pass is more aggressive than the function SpeculativeyExecuteBB in
+// SimplifyCFG. SimplifyCFG will not speculate if no selects are introduced and
+// it will speculate at most one instruction. It also will not speculate if
+// there is a value defined in the if-block that is only used in the then-block.
+// These restrictions make sense since the speculation in SimplifyCFG seems
+// aimed at introducing cheap selects, while this pass is intended to do more
+// aggressive speculation while counting on later passes to either capitalize on
+// that or clean it up.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "speculative-execution"
+
+// The risk that speculation will not pay off increases with the
+// number of instructions speculated, so we put a limit on that.
+static cl::opt<unsigned> SpecExecMaxSpeculationCost(
+    "spec-exec-max-speculation-cost", cl::init(7), cl::Hidden,
+    cl::desc("Speculative execution is not applied to basic blocks where "
+             "the cost of the instructions to speculatively execute "
+             "exceeds this limit."));
+
+// Speculating just a few instructions from a larger block tends not
+// to be profitable and this limit prevents that. A reason for that is
+// that small basic blocks are more likely to be candidates for
+// further optimization.
+static cl::opt<unsigned> SpecExecMaxNotHoisted(
+    "spec-exec-max-not-hoisted", cl::init(5), cl::Hidden,
+    cl::desc("Speculative execution is not applied to basic blocks where the "
+             "number of instructions that would not be speculatively executed "
+             "exceeds this limit."));
+
+namespace {
+class SpeculativeExecution : public FunctionPass {
+ public:
+  static char ID;
+  SpeculativeExecution(): FunctionPass(ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnFunction(Function &F) override;
+
+ private:
+  bool runOnBasicBlock(BasicBlock &B);
+  bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock);
+
+  const TargetTransformInfo *TTI = nullptr;
+};
+} // namespace
+
+char SpeculativeExecution::ID = 0;
+INITIALIZE_PASS_BEGIN(SpeculativeExecution, "speculative-execution",
+                      "Speculatively execute instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(SpeculativeExecution, "speculative-execution",
+                      "Speculatively execute instructions", false, false)
+
+void SpeculativeExecution::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetTransformInfoWrapperPass>();
+}
+
+bool SpeculativeExecution::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
+  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+  bool Changed = false;
+  for (auto& B : F) {
+    Changed |= runOnBasicBlock(B);
+  }
+  return Changed;
+}
+
+bool SpeculativeExecution::runOnBasicBlock(BasicBlock &B) {
+  BranchInst *BI = dyn_cast<BranchInst>(B.getTerminator());
+  if (BI == nullptr)
+    return false;
+
+  if (BI->getNumSuccessors() != 2)
+    return false;
+  BasicBlock &Succ0 = *BI->getSuccessor(0);
+  BasicBlock &Succ1 = *BI->getSuccessor(1);
+
+  if (&B == &Succ0 || &B == &Succ1 || &Succ0 == &Succ1) {
+    return false;
+  }
+
+  // Hoist from if-then (triangle).
+  if (Succ0.getSinglePredecessor() != nullptr &&
+      Succ0.getSingleSuccessor() == &Succ1) {
+    return considerHoistingFromTo(Succ0, B);
+  }
+
+  // Hoist from if-else (triangle).
+  if (Succ1.getSinglePredecessor() != nullptr &&
+      Succ1.getSingleSuccessor() == &Succ0) {
+    return considerHoistingFromTo(Succ1, B);
+  }
+
+  // Hoist from if-then-else (diamond), but only if it is equivalent to
+  // an if-else or if-then due to one of the branches doing nothing.
+  if (Succ0.getSinglePredecessor() != nullptr &&
+      Succ1.getSinglePredecessor() != nullptr &&
+      Succ1.getSingleSuccessor() != nullptr &&
+      Succ1.getSingleSuccessor() != &B &&
+      Succ1.getSingleSuccessor() == Succ0.getSingleSuccessor()) {
+    // If a block has only one instruction, then that is a terminator
+    // instruction so that the block does nothing. This does happen.
+    if (Succ1.size() == 1) // equivalent to if-then
+      return considerHoistingFromTo(Succ0, B);
+    if (Succ0.size() == 1) // equivalent to if-else
+      return considerHoistingFromTo(Succ1, B);
+  }
+
+  return false;
+}
+
+static unsigned ComputeSpeculationCost(const Instruction *I,
+                                       const TargetTransformInfo &TTI) {
+  switch (Operator::getOpcode(I)) {
+    case Instruction::GetElementPtr:
+    case Instruction::Add:
+    case Instruction::Mul:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Select:
+    case Instruction::Shl:
+    case Instruction::Sub:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::Xor:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+      return TTI.getUserCost(I);
+
+    default:
+      return UINT_MAX; // Disallow anything not whitelisted.
+  }
+}
+
+bool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock,
+                                                  BasicBlock &ToBlock) {
+  SmallSet<const Instruction *, 8> NotHoisted;
+  const auto AllPrecedingUsesFromBlockHoisted = [&NotHoisted](User *U) {
+    for (Value* V : U->operand_values()) {
+      if (Instruction *I = dyn_cast<Instruction>(V)) {
+        if (NotHoisted.count(I) > 0)
+          return false;
+      }
+    }
+    return true;
+  };
+
+  unsigned TotalSpeculationCost = 0;
+  for (auto& I : FromBlock) {
+    const unsigned Cost = ComputeSpeculationCost(&I, *TTI);
+    if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) &&
+        AllPrecedingUsesFromBlockHoisted(&I)) {
+      TotalSpeculationCost += Cost;
+      if (TotalSpeculationCost > SpecExecMaxSpeculationCost)
+        return false;  // too much to hoist
+    } else {
+      NotHoisted.insert(&I);
+      if (NotHoisted.size() > SpecExecMaxNotHoisted)
+        return false; // too much left behind
+    }
+  }
+
+  if (TotalSpeculationCost == 0)
+    return false; // nothing to hoist
+
+  for (auto I = FromBlock.begin(); I != FromBlock.end();) {
+    // We have to increment I before moving Current as moving Current
+    // changes the list that I is iterating through.
+    auto Current = I;
+    ++I;
+    if (!NotHoisted.count(Current)) {
+      Current->moveBefore(ToBlock.getTerminator());
+    }
+  }
+  return true;
+}
+
+namespace llvm {
+
+FunctionPass *createSpeculativeExecutionPass() {
+  return new SpeculativeExecution();
+}
+
+}  // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
new file mode 100644
index 0000000..453503a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -0,0 +1,710 @@
+//===-- StraightLineStrengthReduce.cpp - ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements straight-line strength reduction (SLSR). Unlike loop
+// strength reduction, this algorithm is designed to reduce arithmetic
+// redundancy in straight-line code instead of loops. It has proven to be
+// effective in simplifying arithmetic statements derived from an unrolled loop.
+// It can also simplify the logic of SeparateConstOffsetFromGEP.
+//
+// There are many optimizations we can perform in the domain of SLSR. This file
+// for now contains only an initial step. Specifically, we look for strength
+// reduction candidates in the following forms:
+//
+// Form 1: B + i * S
+// Form 2: (B + i) * S
+// Form 3: &B[i * S]
+//
+// where S is an integer variable, and i is a constant integer. If we found two
+// candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2
+// in a simpler way with respect to S1. For example,
+//
+// S1: X = B + i * S
+// S2: Y = B + i' * S   => X + (i' - i) * S
+//
+// S1: X = (B + i) * S
+// S2: Y = (B + i') * S => X + (i' - i) * S
+//
+// S1: X = &B[i * S]
+// S2: Y = &B[i' * S]   => &X[(i' - i) * S]
+//
+// Note: (i' - i) * S is folded to the extent possible.
+//
+// This rewriting is in general a good idea. The code patterns we focus on
+// usually come from loop unrolling, so (i' - i) * S is likely the same
+// across iterations and can be reused. When that happens, the optimized form
+// takes only one add starting from the second iteration.
+//
+// When such rewriting is possible, we call S1 a "basis" of S2. When S2 has
+// multiple bases, we choose to rewrite S2 with respect to its "immediate"
+// basis, the basis that is the closest ancestor in the dominator tree.
+//
+// TODO:
+//
+// - Floating point arithmetics when fast math is enabled.
+//
+// - SLSR may decrease ILP at the architecture level. Targets that are very
+//   sensitive to ILP may want to disable it. Having SLSR to consider ILP is
+//   left as future work.
+//
+// - When (i' - i) is constant but i and i' are not, we could still perform
+//   SLSR.
+#include <vector>
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+using namespace PatternMatch;
+
+namespace {
+
+class StraightLineStrengthReduce : public FunctionPass {
+public:
+  // SLSR candidate. Such a candidate must be in one of the forms described in
+  // the header comments.
+  struct Candidate : public ilist_node<Candidate> {
+    enum Kind {
+      Invalid, // reserved for the default constructor
+      Add,     // B + i * S
+      Mul,     // (B + i) * S
+      GEP,     // &B[..][i * S][..]
+    };
+
+    Candidate()
+        : CandidateKind(Invalid), Base(nullptr), Index(nullptr),
+          Stride(nullptr), Ins(nullptr), Basis(nullptr) {}
+    Candidate(Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
+              Instruction *I)
+        : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I),
+          Basis(nullptr) {}
+    Kind CandidateKind;
+    const SCEV *Base;
+    // Note that Index and Stride of a GEP candidate do not necessarily have the
+    // same integer type. In that case, during rewriting, Stride will be
+    // sign-extended or truncated to Index's type.
+    ConstantInt *Index;
+    Value *Stride;
+    // The instruction this candidate corresponds to. It helps us to rewrite a
+    // candidate with respect to its immediate basis. Note that one instruction
+    // can correspond to multiple candidates depending on how you associate the
+    // expression. For instance,
+    //
+    // (a + 1) * (b + 2)
+    //
+    // can be treated as
+    //
+    // <Base: a, Index: 1, Stride: b + 2>
+    //
+    // or
+    //
+    // <Base: b, Index: 2, Stride: a + 1>
+    Instruction *Ins;
+    // Points to the immediate basis of this candidate, or nullptr if we cannot
+    // find any basis for this candidate.
+    Candidate *Basis;
+  };
+
+  static char ID;
+
+  StraightLineStrengthReduce()
+      : FunctionPass(ID), DL(nullptr), DT(nullptr), TTI(nullptr) {
+    initializeStraightLineStrengthReducePass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    // We do not modify the shape of the CFG.
+    AU.setPreservesCFG();
+  }
+
+  bool doInitialization(Module &M) override {
+    DL = &M.getDataLayout();
+    return false;
+  }
+
+  bool runOnFunction(Function &F) override;
+
+private:
+  // Returns true if Basis is a basis for C, i.e., Basis dominates C and they
+  // share the same base and stride.
+  bool isBasisFor(const Candidate &Basis, const Candidate &C);
+  // Returns whether the candidate can be folded into an addressing mode.
+  bool isFoldable(const Candidate &C, TargetTransformInfo *TTI,
+                  const DataLayout *DL);
+  // Returns true if C is already in a simplest form and not worth being
+  // rewritten.
+  bool isSimplestForm(const Candidate &C);
+  // Checks whether I is in a candidate form. If so, adds all the matching forms
+  // to Candidates, and tries to find the immediate basis for each of them.
+  void allocateCandidatesAndFindBasis(Instruction *I);
+  // Allocate candidates and find bases for Add instructions.
+  void allocateCandidatesAndFindBasisForAdd(Instruction *I);
+  // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a
+  // candidate.
+  void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS,
+                                            Instruction *I);
+  // Allocate candidates and find bases for Mul instructions.
+  void allocateCandidatesAndFindBasisForMul(Instruction *I);
+  // Splits LHS into Base + Index and, if succeeds, calls
+  // allocateCandidatesAndFindBasis.
+  void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS,
+                                            Instruction *I);
+  // Allocate candidates and find bases for GetElementPtr instructions.
+  void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP);
+  // A helper function that scales Idx with ElementSize before invoking
+  // allocateCandidatesAndFindBasis.
+  void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
+                                            Value *S, uint64_t ElementSize,
+                                            Instruction *I);
+  // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate
+  // basis.
+  void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B,
+                                      ConstantInt *Idx, Value *S,
+                                      Instruction *I);
+  // Rewrites candidate C with respect to Basis.
+  void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis);
+  // A helper function that factors ArrayIdx to a product of a stride and a
+  // constant index, and invokes allocateCandidatesAndFindBasis with the
+  // factorings.
+  void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
+                        GetElementPtrInst *GEP);
+  // Emit code that computes the "bump" from Basis to C. If the candidate is a
+  // GEP and the bump is not divisible by the element size of the GEP, this
+  // function sets the BumpWithUglyGEP flag to notify its caller to bump the
+  // basis using an ugly GEP.
+  static Value *emitBump(const Candidate &Basis, const Candidate &C,
+                         IRBuilder<> &Builder, const DataLayout *DL,
+                         bool &BumpWithUglyGEP);
+
+  const DataLayout *DL;
+  DominatorTree *DT;
+  ScalarEvolution *SE;
+  TargetTransformInfo *TTI;
+  ilist<Candidate> Candidates;
+  // Temporarily holds all instructions that are unlinked (but not deleted) by
+  // rewriteCandidateWithBasis. These instructions will be actually removed
+  // after all rewriting finishes.
+  std::vector<Instruction *> UnlinkedInstructions;
+};
+}  // anonymous namespace
+
+char StraightLineStrengthReduce::ID = 0;
+INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr",
+                      "Straight line strength reduction", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr",
+                    "Straight line strength reduction", false, false)
+
+FunctionPass *llvm::createStraightLineStrengthReducePass() {
+  return new StraightLineStrengthReduce();
+}
+
+bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
+                                            const Candidate &C) {
+  return (Basis.Ins != C.Ins && // skip the same instruction
+          // Basis must dominate C in order to rewrite C with respect to Basis.
+          DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) &&
+          // They share the same base, stride, and candidate kind.
+          Basis.Base == C.Base &&
+          Basis.Stride == C.Stride &&
+          Basis.CandidateKind == C.CandidateKind);
+}
+
+static bool isGEPFoldable(GetElementPtrInst *GEP,
+                          const TargetTransformInfo *TTI,
+                          const DataLayout *DL) {
+  GlobalVariable *BaseGV = nullptr;
+  int64_t BaseOffset = 0;
+  bool HasBaseReg = false;
+  int64_t Scale = 0;
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand()))
+    BaseGV = GV;
+  else
+    HasBaseReg = true;
+
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I, ++GTI) {
+    if (isa<SequentialType>(*GTI)) {
+      int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+      if (ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) {
+        BaseOffset += ConstIdx->getSExtValue() * ElementSize;
+      } else {
+        // Needs scale register.
+        if (Scale != 0) {
+          // No addressing mode takes two scale registers.
+          return false;
+        }
+        Scale = ElementSize;
+      }
+    } else {
+      StructType *STy = cast<StructType>(*GTI);
+      uint64_t Field = cast<ConstantInt>(*I)->getZExtValue();
+      BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field);
+    }
+  }
+  return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV,
+                                    BaseOffset, HasBaseReg, Scale);
+}
+
+// Returns whether (Base + Index * Stride) can be folded to an addressing mode.
+static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride,
+                          TargetTransformInfo *TTI) {
+  return TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true,
+                                    Index->getSExtValue());
+}
+
+bool StraightLineStrengthReduce::isFoldable(const Candidate &C,
+                                            TargetTransformInfo *TTI,
+                                            const DataLayout *DL) {
+  if (C.CandidateKind == Candidate::Add)
+    return isAddFoldable(C.Base, C.Index, C.Stride, TTI);
+  if (C.CandidateKind == Candidate::GEP)
+    return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI, DL);
+  return false;
+}
+
+// Returns true if GEP has zero or one non-zero index.
+static bool hasOnlyOneNonZeroIndex(GetElementPtrInst *GEP) {
+  unsigned NumNonZeroIndices = 0;
+  for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) {
+    ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
+    if (ConstIdx == nullptr || !ConstIdx->isZero())
+      ++NumNonZeroIndices;
+  }
+  return NumNonZeroIndices <= 1;
+}
+
+bool StraightLineStrengthReduce::isSimplestForm(const Candidate &C) {
+  if (C.CandidateKind == Candidate::Add) {
+    // B + 1 * S or B + (-1) * S
+    return C.Index->isOne() || C.Index->isMinusOne();
+  }
+  if (C.CandidateKind == Candidate::Mul) {
+    // (B + 0) * S
+    return C.Index->isZero();
+  }
+  if (C.CandidateKind == Candidate::GEP) {
+    // (char*)B + S or (char*)B - S
+    return ((C.Index->isOne() || C.Index->isMinusOne()) &&
+            hasOnlyOneNonZeroIndex(cast<GetElementPtrInst>(C.Ins)));
+  }
+  return false;
+}
+
+// TODO: We currently implement an algorithm whose time complexity is linear in
+// the number of existing candidates. However, we could do better by using
+// ScopedHashTable. Specifically, while traversing the dominator tree, we could
+// maintain all the candidates that dominate the basic block being traversed in
+// a ScopedHashTable. This hash table is indexed by the base and the stride of
+// a candidate. Therefore, finding the immediate basis of a candidate boils down
+// to one hash-table look up.
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
+    Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
+    Instruction *I) {
+  Candidate C(CT, B, Idx, S, I);
+  // SLSR can complicate an instruction in two cases:
+  //
+  // 1. If we can fold I into an addressing mode, computing I is likely free or
+  // takes only one instruction.
+  //
+  // 2. I is already in a simplest form. For example, when
+  //      X = B + 8 * S
+  //      Y = B + S,
+  //    rewriting Y to X - 7 * S is probably a bad idea.
+  //
+  // In the above cases, we still add I to the candidate list so that I can be
+  // the basis of other candidates, but we leave I's basis blank so that I
+  // won't be rewritten.
+  if (!isFoldable(C, TTI, DL) && !isSimplestForm(C)) {
+    // Try to compute the immediate basis of C.
+    unsigned NumIterations = 0;
+    // Limit the scan radius to avoid running in quadratice time.
+    static const unsigned MaxNumIterations = 50;
+    for (auto Basis = Candidates.rbegin();
+         Basis != Candidates.rend() && NumIterations < MaxNumIterations;
+         ++Basis, ++NumIterations) {
+      if (isBasisFor(*Basis, C)) {
+        C.Basis = &(*Basis);
+        break;
+      }
+    }
+  }
+  // Regardless of whether we find a basis for C, we need to push C to the
+  // candidate list so that it can be the basis of other candidates.
+  Candidates.push_back(C);
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
+    Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::Add:
+    allocateCandidatesAndFindBasisForAdd(I);
+    break;
+  case Instruction::Mul:
+    allocateCandidatesAndFindBasisForMul(I);
+    break;
+  case Instruction::GetElementPtr:
+    allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I));
+    break;
+  }
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
+    Instruction *I) {
+  // Try matching B + i * S.
+  if (!isa<IntegerType>(I->getType()))
+    return;
+
+  assert(I->getNumOperands() == 2 && "isn't I an add?");
+  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+  allocateCandidatesAndFindBasisForAdd(LHS, RHS, I);
+  if (LHS != RHS)
+    allocateCandidatesAndFindBasisForAdd(RHS, LHS, I);
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
+    Value *LHS, Value *RHS, Instruction *I) {
+  Value *S = nullptr;
+  ConstantInt *Idx = nullptr;
+  if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) {
+    // I = LHS + RHS = LHS + Idx * S
+    allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
+  } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) {
+    // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx)
+    APInt One(Idx->getBitWidth(), 1);
+    Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue());
+    allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
+  } else {
+    // At least, I = LHS + 1 * RHS
+    ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1);
+    allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS,
+                                   I);
+  }
+}
+
+// Returns true if A matches B + C where C is constant.
+static bool matchesAdd(Value *A, Value *&B, ConstantInt *&C) {
+  return (match(A, m_Add(m_Value(B), m_ConstantInt(C))) ||
+          match(A, m_Add(m_ConstantInt(C), m_Value(B))));
+}
+
+// Returns true if A matches B | C where C is constant.
+static bool matchesOr(Value *A, Value *&B, ConstantInt *&C) {
+  return (match(A, m_Or(m_Value(B), m_ConstantInt(C))) ||
+          match(A, m_Or(m_ConstantInt(C), m_Value(B))));
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
+    Value *LHS, Value *RHS, Instruction *I) {
+  Value *B = nullptr;
+  ConstantInt *Idx = nullptr;
+  if (matchesAdd(LHS, B, Idx)) {
+    // If LHS is in the form of "Base + Index", then I is in the form of
+    // "(Base + Index) * RHS".
+    allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
+  } else if (matchesOr(LHS, B, Idx) && haveNoCommonBitsSet(B, Idx, *DL)) {
+    // If LHS is in the form of "Base | Index" and Base and Index have no common
+    // bits set, then
+    //   Base | Index = Base + Index
+    // and I is thus in the form of "(Base + Index) * RHS".
+    allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
+  } else {
+    // Otherwise, at least try the form (LHS + 0) * RHS.
+    ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0);
+    allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
+                                   I);
+  }
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
+    Instruction *I) {
+  // Try matching (B + i) * S.
+  // TODO: we could extend SLSR to float and vector types.
+  if (!isa<IntegerType>(I->getType()))
+    return;
+
+  assert(I->getNumOperands() == 2 && "isn't I a mul?");
+  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+  allocateCandidatesAndFindBasisForMul(LHS, RHS, I);
+  if (LHS != RHS) {
+    // Symmetrically, try to split RHS to Base + Index.
+    allocateCandidatesAndFindBasisForMul(RHS, LHS, I);
+  }
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
+    const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize,
+    Instruction *I) {
+  // I = B + sext(Idx *nsw S) * ElementSize
+  //   = B + (sext(Idx) * sext(S)) * ElementSize
+  //   = B + (sext(Idx) * ElementSize) * sext(S)
+  // Casting to IntegerType is safe because we skipped vector GEPs.
+  IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType()));
+  ConstantInt *ScaledIdx = ConstantInt::get(
+      IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
+  allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
+}
+
+void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,
+                                                  const SCEV *Base,
+                                                  uint64_t ElementSize,
+                                                  GetElementPtrInst *GEP) {
+  // At least, ArrayIdx = ArrayIdx *nsw 1.
+  allocateCandidatesAndFindBasisForGEP(
+      Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1),
+      ArrayIdx, ElementSize, GEP);
+  Value *LHS = nullptr;
+  ConstantInt *RHS = nullptr;
+  // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx
+  // itself. This would allow us to handle the shl case for free. However,
+  // matching SCEVs has two issues:
+  //
+  // 1. this would complicate rewriting because the rewriting procedure
+  // would have to translate SCEVs back to IR instructions. This translation
+  // is difficult when LHS is further evaluated to a composite SCEV.
+  //
+  // 2. ScalarEvolution is designed to be control-flow oblivious. It tends
+  // to strip nsw/nuw flags which are critical for SLSR to trace into
+  // sext'ed multiplication.
+  if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) {
+    // SLSR is currently unsafe if i * S may overflow.
+    // GEP = Base + sext(LHS *nsw RHS) * ElementSize
+    allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
+  } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) {
+    // GEP = Base + sext(LHS <<nsw RHS) * ElementSize
+    //     = Base + sext(LHS *nsw (1 << RHS)) * ElementSize
+    APInt One(RHS->getBitWidth(), 1);
+    ConstantInt *PowerOf2 =
+        ConstantInt::get(RHS->getContext(), One << RHS->getValue());
+    allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP);
+  }
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
+    GetElementPtrInst *GEP) {
+  // TODO: handle vector GEPs
+  if (GEP->getType()->isVectorTy())
+    return;
+
+  SmallVector<const SCEV *, 4> IndexExprs;
+  for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
+    IndexExprs.push_back(SE->getSCEV(*I));
+
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) {
+    if (!isa<SequentialType>(*GTI++))
+      continue;
+
+    const SCEV *OrigIndexExpr = IndexExprs[I - 1];
+    IndexExprs[I - 1] = SE->getConstant(OrigIndexExpr->getType(), 0);
+
+    // The base of this candidate is GEP's base plus the offsets of all
+    // indices except this current one.
+    const SCEV *BaseExpr = SE->getGEPExpr(GEP->getSourceElementType(),
+                                          SE->getSCEV(GEP->getPointerOperand()),
+                                          IndexExprs, GEP->isInBounds());
+    Value *ArrayIdx = GEP->getOperand(I);
+    uint64_t ElementSize = DL->getTypeAllocSize(*GTI);
+    factorArrayIndex(ArrayIdx, BaseExpr, ElementSize, GEP);
+    // When ArrayIdx is the sext of a value, we try to factor that value as
+    // well.  Handling this case is important because array indices are
+    // typically sign-extended to the pointer size.
+    Value *TruncatedArrayIdx = nullptr;
+    if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))))
+      factorArrayIndex(TruncatedArrayIdx, BaseExpr, ElementSize, GEP);
+
+    IndexExprs[I - 1] = OrigIndexExpr;
+  }
+}
+
+// A helper function that unifies the bitwidth of A and B.
+static void unifyBitWidth(APInt &A, APInt &B) {
+  if (A.getBitWidth() < B.getBitWidth())
+    A = A.sext(B.getBitWidth());
+  else if (A.getBitWidth() > B.getBitWidth())
+    B = B.sext(A.getBitWidth());
+}
+
+Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
+                                            const Candidate &C,
+                                            IRBuilder<> &Builder,
+                                            const DataLayout *DL,
+                                            bool &BumpWithUglyGEP) {
+  APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue();
+  unifyBitWidth(Idx, BasisIdx);
+  APInt IndexOffset = Idx - BasisIdx;
+
+  BumpWithUglyGEP = false;
+  if (Basis.CandidateKind == Candidate::GEP) {
+    APInt ElementSize(
+        IndexOffset.getBitWidth(),
+        DL->getTypeAllocSize(
+            cast<GetElementPtrInst>(Basis.Ins)->getType()->getElementType()));
+    APInt Q, R;
+    APInt::sdivrem(IndexOffset, ElementSize, Q, R);
+    if (R.getSExtValue() == 0)
+      IndexOffset = Q;
+    else
+      BumpWithUglyGEP = true;
+  }
+
+  // Compute Bump = C - Basis = (i' - i) * S.
+  // Common case 1: if (i' - i) is 1, Bump = S.
+  if (IndexOffset.getSExtValue() == 1)
+    return C.Stride;
+  // Common case 2: if (i' - i) is -1, Bump = -S.
+  if (IndexOffset.getSExtValue() == -1)
+    return Builder.CreateNeg(C.Stride);
+
+  // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
+  // have different bit widths.
+  IntegerType *DeltaType =
+      IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth());
+  Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType);
+  if (IndexOffset.isPowerOf2()) {
+    // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i).
+    ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2());
+    return Builder.CreateShl(ExtendedStride, Exponent);
+  }
+  if ((-IndexOffset).isPowerOf2()) {
+    // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i).
+    ConstantInt *Exponent =
+        ConstantInt::get(DeltaType, (-IndexOffset).logBase2());
+    return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent));
+  }
+  Constant *Delta = ConstantInt::get(DeltaType, IndexOffset);
+  return Builder.CreateMul(ExtendedStride, Delta);
+}
+
+void StraightLineStrengthReduce::rewriteCandidateWithBasis(
+    const Candidate &C, const Candidate &Basis) {
+  assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base &&
+         C.Stride == Basis.Stride);
+  // We run rewriteCandidateWithBasis on all candidates in a post-order, so the
+  // basis of a candidate cannot be unlinked before the candidate.
+  assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked");
+
+  // An instruction can correspond to multiple candidates. Therefore, instead of
+  // simply deleting an instruction when we rewrite it, we mark its parent as
+  // nullptr (i.e. unlink it) so that we can skip the candidates whose
+  // instruction is already rewritten.
+  if (!C.Ins->getParent())
+    return;
+
+  IRBuilder<> Builder(C.Ins);
+  bool BumpWithUglyGEP;
+  Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);
+  Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
+  switch (C.CandidateKind) {
+  case Candidate::Add:
+  case Candidate::Mul:
+    // C = Basis + Bump
+    if (BinaryOperator::isNeg(Bump)) {
+      // If Bump is a neg instruction, emit C = Basis - (-Bump).
+      Reduced =
+          Builder.CreateSub(Basis.Ins, BinaryOperator::getNegArgument(Bump));
+      // We only use the negative argument of Bump, and Bump itself may be
+      // trivially dead.
+      RecursivelyDeleteTriviallyDeadInstructions(Bump);
+    } else {
+      Reduced = Builder.CreateAdd(Basis.Ins, Bump);
+    }
+    break;
+  case Candidate::GEP:
+    {
+      Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
+      bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
+      if (BumpWithUglyGEP) {
+        // C = (char *)Basis + Bump
+        unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
+        Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
+        Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
+        if (InBounds)
+          Reduced =
+              Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump);
+        else
+          Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump);
+        Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
+      } else {
+        // C = gep Basis, Bump
+        // Canonicalize bump to pointer size.
+        Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
+        if (InBounds)
+          Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump);
+        else
+          Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump);
+      }
+    }
+    break;
+  default:
+    llvm_unreachable("C.CandidateKind is invalid");
+  };
+  Reduced->takeName(C.Ins);
+  C.Ins->replaceAllUsesWith(Reduced);
+  // Unlink C.Ins so that we can skip other candidates also corresponding to
+  // C.Ins. The actual deletion is postponed to the end of runOnFunction.
+  C.Ins->removeFromParent();
+  UnlinkedInstructions.push_back(C.Ins);
+}
+
+bool StraightLineStrengthReduce::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
+  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  SE = &getAnalysis<ScalarEvolution>();
+  // Traverse the dominator tree in the depth-first order. This order makes sure
+  // all bases of a candidate are in Candidates when we process it.
+  for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);
+       node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) {
+    for (auto &I : *node->getBlock())
+      allocateCandidatesAndFindBasis(&I);
+  }
+
+  // Rewrite candidates in the reverse depth-first order. This order makes sure
+  // a candidate being rewritten is not a basis for any other candidate.
+  while (!Candidates.empty()) {
+    const Candidate &C = Candidates.back();
+    if (C.Basis != nullptr) {
+      rewriteCandidateWithBasis(C, *C.Basis);
+    }
+    Candidates.pop_back();
+  }
+
+  // Delete all unlink instructions.
+  for (auto *UnlinkedInst : UnlinkedInstructions) {
+    for (unsigned I = 0, E = UnlinkedInst->getNumOperands(); I != E; ++I) {
+      Value *Op = UnlinkedInst->getOperand(I);
+      UnlinkedInst->setOperand(I, nullptr);
+      RecursivelyDeleteTriviallyDeadInstructions(Op);
+    }
+    delete UnlinkedInst;
+  }
+  bool Ret = !UnlinkedInstructions.empty();
+  UnlinkedInstructions.clear();
+  return Ret;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 7fe87f9..4f23e20 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/RegionInfo.h"
@@ -16,6 +17,8 @@
 #include "llvm/Analysis/RegionPass.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 
 using namespace llvm;
@@ -249,7 +252,7 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequiredID(LowerSwitchID);
     AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<LoopInfo>();
+    AU.addRequired<LoopInfoWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     RegionPass::getAnalysisUsage(AU);
   }
@@ -281,11 +284,65 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
 
 /// \brief Build up the general order of nodes
 void StructurizeCFG::orderNodes() {
-  scc_iterator<Region *> I = scc_begin(ParentRegion);
-  for (Order.clear(); !I.isAtEnd(); ++I) {
-    const std::vector<RegionNode *> &Nodes = *I;
-    Order.append(Nodes.begin(), Nodes.end());
+  RNVector TempOrder;
+  ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
+  TempOrder.append(RPOT.begin(), RPOT.end());
+
+  std::map<Loop*, unsigned> LoopBlocks;
+
+
+  // The reverse post-order traversal of the list gives us an ordering close
+  // to what we want.  The only problem with it is that sometimes backedges
+  // for outer loops will be visited before backedges for inner loops.
+  for (RegionNode *RN : TempOrder) {
+    BasicBlock *BB = RN->getEntry();
+    Loop *Loop = LI->getLoopFor(BB);
+    if (!LoopBlocks.count(Loop)) {
+      LoopBlocks[Loop] = 1;
+      continue;
+    }
+    LoopBlocks[Loop]++;
+  }
+
+  unsigned CurrentLoopDepth = 0;
+  Loop *CurrentLoop = nullptr;
+  BBSet TempVisited;
+  for (RNVector::iterator I = TempOrder.begin(), E = TempOrder.end(); I != E; ++I) {
+    BasicBlock *BB = (*I)->getEntry();
+    unsigned LoopDepth = LI->getLoopDepth(BB);
+
+    if (std::find(Order.begin(), Order.end(), *I) != Order.end())
+      continue;
+
+    if (LoopDepth < CurrentLoopDepth) {
+      // Make sure we have visited all blocks in this loop before moving back to
+      // the outer loop.
+
+      RNVector::iterator LoopI = I;
+      while(LoopBlocks[CurrentLoop]) {
+        LoopI++;
+        BasicBlock *LoopBB = (*LoopI)->getEntry();
+        if (LI->getLoopFor(LoopBB) == CurrentLoop) {
+          LoopBlocks[CurrentLoop]--;
+          Order.push_back(*LoopI);
+        }
+      }
+    }
+
+    CurrentLoop = LI->getLoopFor(BB);
+    if (CurrentLoop) {
+      LoopBlocks[CurrentLoop]--;
+    }
+
+    CurrentLoopDepth = LoopDepth;
+    Order.push_back(*I);
   }
+
+  // This pass originally used a post-order traversal and then operated on
+  // the list in reverse. Now that we are using a reverse post-order traversal
+  // rather than re-working the whole pass to operate on the list in order,
+  // we just reverse the list and continue to operate on it in reverse.
+  std::reverse(Order.begin(), Order.end());
 }
 
 /// \brief Determine the end of the loops
@@ -304,7 +361,7 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
     for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
       BasicBlock *Succ = Term->getSuccessor(i);
 
-      if (Visited.count(Succ) && LI->isLoopHeader(Succ) ) {
+      if (Visited.count(Succ)) {
         Loops[Succ] = BB;
       }
     }
@@ -441,6 +498,10 @@ void StructurizeCFG::collectInfos() {
   for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
        OI != OE; ++OI) {
 
+    DEBUG(dbgs() << "Visiting: " <<
+                    ((*OI)->isSubRegion() ? "SubRegion with entry: " : "") <<
+                    (*OI)->getEntry()->getName() << " Loop Depth: " << LI->getLoopDepth((*OI)->getEntry()) << "\n");
+
     // Analyze all the conditions leading to a node
     gatherPredicates(*OI);
 
@@ -826,7 +887,7 @@ void StructurizeCFG::createFlow() {
 /// no longer dominate all their uses. Not sure if this is really nessasary
 void StructurizeCFG::rebuildSSA() {
   SSAUpdater Updater;
-  for (const auto &BB : ParentRegion->blocks())
+  for (auto *BB : ParentRegion->blocks())
     for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
          II != IE; ++II) {
 
@@ -866,7 +927,7 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
   ParentRegion = R;
 
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
 
   orderNodes();
   collectInfos();
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index f3c3e30..9eef132 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -54,8 +54,8 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
@@ -87,7 +87,6 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
 namespace {
   struct TailCallElim : public FunctionPass {
     const TargetTransformInfo *TTI;
-    const DataLayout *DL;
 
     static char ID; // Pass identification, replacement for typeid
     TailCallElim() : FunctionPass(ID) {
@@ -126,7 +125,7 @@ namespace {
 char TailCallElim::ID = 0;
 INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim",
                       "Tail Call Elimination", false, false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(TailCallElim, "tailcallelim",
                     "Tail Call Elimination", false, false)
 
@@ -136,7 +135,7 @@ FunctionPass *llvm::createTailCallEliminationPass() {
 }
 
 void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<TargetTransformInfo>();
+  AU.addRequired<TargetTransformInfoWrapperPass>();
 }
 
 /// \brief Scan the specified function for alloca instructions.
@@ -159,8 +158,6 @@ bool TailCallElim::runOnFunction(Function &F) {
   if (skipOptnoneFunction(F))
     return false;
 
-  DL = F.getParent()->getDataLayout();
-
   bool AllCallsAreTailCalls = false;
   bool Modified = markTails(F, AllCallsAreTailCalls);
   if (AllCallsAreTailCalls)
@@ -386,16 +383,15 @@ bool TailCallElim::runTRE(Function &F) {
   // right, so don't even try to convert it...
   if (F.getFunctionType()->isVarArg()) return false;
 
-  TTI = &getAnalysis<TargetTransformInfo>();
+  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
   BasicBlock *OldEntry = nullptr;
   bool TailCallsAreMarkedTail = false;
   SmallVector<PHINode*, 8> ArgumentPHIs;
   bool MadeChange = false;
 
-  // CanTRETailMarkedCall - If false, we cannot perform TRE on tail calls
-  // marked with the 'tail' attribute, because doing so would cause the stack
-  // size to increase (real TRE would deallocate variable sized allocas, TRE
-  // doesn't).
+  // If false, we cannot perform TRE on tail calls marked with the 'tail'
+  // attribute, because doing so would cause the stack size to increase (real
+  // TRE would deallocate variable sized allocas, TRE doesn't).
   bool CanTRETailMarkedCall = CanTRE(F);
 
   // Change any tail recursive calls to loops.
@@ -404,28 +400,19 @@ bool TailCallElim::runTRE(Function &F) {
   // alloca' is changed from being a static alloca to being a dynamic alloca.
   // Until this is resolved, disable this transformation if that would ever
   // happen.  This bug is PR962.
-  SmallVector<BasicBlock*, 8> BBToErase;
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+  for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
+    BasicBlock *BB = BBI++; // FoldReturnAndProcessPred may delete BB.
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
       bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
                                           ArgumentPHIs, !CanTRETailMarkedCall);
-      if (!Change && BB->getFirstNonPHIOrDbg() == Ret) {
+      if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
         Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
                                           TailCallsAreMarkedTail, ArgumentPHIs,
                                           !CanTRETailMarkedCall);
-        // FoldReturnAndProcessPred may have emptied some BB. Remember to
-        // erase them.
-        if (Change && BB->empty())
-          BBToErase.push_back(BB);
-
-      }
       MadeChange |= Change;
     }
   }
 
-  for (auto BB: BBToErase)
-    BB->eraseFromParent();
-
   // If we eliminated any tail recursions, it's possible that we inserted some
   // silly PHI nodes which just merge an initial value (the incoming operand)
   // with themselves.  Check to see if we did and clean up our mess if so.  This
@@ -435,7 +422,7 @@ bool TailCallElim::runTRE(Function &F) {
     PHINode *PN = ArgumentPHIs[i];
 
     // If the PHI Node is a dynamic constant, replace it with the value it is.
-    if (Value *PNV = SimplifyInstruction(PN)) {
+    if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) {
       PN->replaceAllUsesWith(PNV);
       PN->eraseFromParent();
     }
@@ -445,7 +432,7 @@ bool TailCallElim::runTRE(Function &F) {
 }
 
 
-/// CanMoveAboveCall - Return true if it is safe to move the specified
+/// Return true if it is safe to move the specified
 /// instruction from after the call to before the call, assuming that all
 /// instructions between the call and this instruction are movable.
 ///
@@ -464,7 +451,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
       // being loaded from.
       if (CI->mayWriteToMemory() ||
           !isSafeToLoadUnconditionally(L->getPointerOperand(), L,
-                                       L->getAlignment(), DL))
+                                       L->getAlignment()))
         return false;
     }
   }
@@ -480,13 +467,11 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
   return true;
 }
 
-// isDynamicConstant - Return true if the specified value is the same when the
-// return would exit as it was when the initial iteration of the recursive
-// function was executed.
-//
-// We currently handle static constants and arguments that are not modified as
-// part of the recursion.
-//
+/// Return true if the specified value is the same when the return would exit
+/// as it was when the initial iteration of the recursive function was executed.
+///
+/// We currently handle static constants and arguments that are not modified as
+/// part of the recursion.
 static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
   if (isa<Constant>(V)) return true; // Static constants are always dyn consts
 
@@ -518,10 +503,9 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
   return false;
 }
 
-// getCommonReturnValue - Check to see if the function containing the specified
-// tail call consistently returns the same runtime-constant value at all exit
-// points except for IgnoreRI.  If so, return the returned value.
-//
+/// Check to see if the function containing the specified tail call consistently
+/// returns the same runtime-constant value at all exit points except for
+/// IgnoreRI. If so, return the returned value.
 static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
   Function *F = CI->getParent()->getParent();
   Value *ReturnedValue = nullptr;
@@ -545,10 +529,9 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
   return ReturnedValue;
 }
 
-/// CanTransformAccumulatorRecursion - If the specified instruction can be
-/// transformed using accumulator recursion elimination, return the constant
-/// which is the start of the accumulator value.  Otherwise return null.
-///
+/// If the specified instruction can be transformed using accumulator recursion
+/// elimination, return the constant which is the start of the accumulator
+/// value.  Otherwise return null.
 Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
                                                       CallInst *CI) {
   if (!I->isAssociative() || !I->isCommutative()) return nullptr;
@@ -836,14 +819,11 @@ bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
       ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred);
 
       // Cleanup: if all predecessors of BB have been eliminated by
-      // FoldReturnIntoUncondBranch, we would like to delete it, but we
-      // can not just nuke it as it is being used as an iterator by our caller.
-      // Just empty it, and the caller will erase it when it is safe to do so.
-      // It is important to empty it, because the ret instruction in there is
-      // still using a value which EliminateRecursiveTailCall will attempt
-      // to remove.
+      // FoldReturnIntoUncondBranch, delete it.  It is important to empty it,
+      // because the ret instruction in there is still using a value which
+      // EliminateRecursiveTailCall will attempt to remove.
       if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
-        BB->getInstList().clear();
+        BB->eraseFromParent();
 
       EliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail,
                                  ArgumentPHIs,
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index cce016a..03c3a80 100644
--- a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/MathExtras.h"
 #include <algorithm>
 
 namespace llvm {
@@ -33,11 +34,6 @@ static inline bool CompareVars(const ASanStackVariableDescription &a,
 // with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
 static const size_t kMinAlignment = 16;
 
-static size_t RoundUpTo(size_t X, size_t RoundTo) {
-  assert((RoundTo & (RoundTo - 1)) == 0);
-  return (X + RoundTo - 1) & ~(RoundTo - 1);
-}
-
 // The larger the variable Size the larger is the redzone.
 // The resulting frame size is a multiple of Alignment.
 static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
@@ -48,7 +44,7 @@ static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
   else if (Size <= 512) Res = Size + 64;
   else if (Size <= 4096) Res = Size + 128;
   else                   Res = Size + 256;
-  return RoundUpTo(Res, Alignment);
+  return RoundUpToAlignment(Res, Alignment);
 }
 
 void
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index 820544b..e9f6239 100644
--- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -174,42 +174,51 @@ bool AddDiscriminators::runOnFunction(Function &F) {
   for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
     BasicBlock *B = I;
     TerminatorInst *Last = B->getTerminator();
-    DebugLoc LastLoc = Last->getDebugLoc();
-    if (LastLoc.isUnknown()) continue;
-    DILocation LastDIL(LastLoc.getAsMDNode(Ctx));
+    const DILocation *LastDIL = Last->getDebugLoc();
+    if (!LastDIL)
+      continue;
 
     for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) {
       BasicBlock *Succ = Last->getSuccessor(I);
       Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime();
-      DebugLoc FirstLoc = First->getDebugLoc();
-      if (FirstLoc.isUnknown()) continue;
-      DILocation FirstDIL(FirstLoc.getAsMDNode(Ctx));
+      const DILocation *FirstDIL = First->getDebugLoc();
+      if (!FirstDIL)
+        continue;
 
       // If the first instruction (First) of Succ is at the same file
       // location as B's last instruction (Last), add a new
       // discriminator for First's location and all the instructions
       // in Succ that share the same location with First.
-      if (FirstDIL.atSameLineAs(LastDIL)) {
+      if (!FirstDIL->canDiscriminate(*LastDIL)) {
         // Create a new lexical scope and compute a new discriminator
         // number for it.
-        StringRef Filename = FirstDIL.getFilename();
-        DIScope Scope = FirstDIL.getScope();
-        DIFile File = Builder.createFile(Filename, Scope.getDirectory());
-        unsigned Discriminator = FirstDIL.computeNewDiscriminator(Ctx);
-        DILexicalBlockFile NewScope =
+        StringRef Filename = FirstDIL->getFilename();
+        auto *Scope = FirstDIL->getScope();
+        auto *File = Builder.createFile(Filename, Scope->getDirectory());
+
+        // FIXME: Calculate the discriminator here, based on local information,
+        // and delete DILocation::computeNewDiscriminator().  The current
+        // solution gives different results depending on other modules in the
+        // same context.  All we really need is to discriminate between
+        // FirstDIL and LastDIL -- a local map would suffice.
+        unsigned Discriminator = FirstDIL->computeNewDiscriminator();
+        auto *NewScope =
             Builder.createLexicalBlockFile(Scope, File, Discriminator);
-        DILocation NewDIL = FirstDIL.copyWithNewScope(Ctx, NewScope);
-        DebugLoc newDebugLoc = DebugLoc::getFromDILocation(NewDIL);
+        auto *NewDIL =
+            DILocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(),
+                            NewScope, FirstDIL->getInlinedAt());
+        DebugLoc newDebugLoc = NewDIL;
 
         // Attach this new debug location to First and every
         // instruction following First that shares the same location.
         for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1;
              ++I1) {
-          if (I1->getDebugLoc() != FirstLoc) break;
+          if (I1->getDebugLoc().get() != FirstDIL)
+            break;
           I1->setDebugLoc(newDebugLoc);
-          DEBUG(dbgs() << NewDIL.getFilename() << ":" << NewDIL.getLineNumber()
-                       << ":" << NewDIL.getColumnNumber() << ":"
-                       << NewDIL.getDiscriminator() << *I1 << "\n");
+          DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine()
+                       << ":" << NewDIL->getColumn() << ":"
+                       << NewDIL->getDiscriminator() << *I1 << "\n");
         }
         DEBUG(dbgs() << "\n");
         Changed = true;
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 983f025..f3c8013 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -65,16 +65,10 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
 /// any single-entry PHI nodes in it, fold them away.  This handles the case
 /// when all entries to the PHI nodes in a block are guaranteed equal, such as
 /// when the block has exactly one predecessor.
-void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA,
+                                   MemoryDependenceAnalysis *MemDep) {
   if (!isa<PHINode>(BB->begin())) return;
 
-  AliasAnalysis *AA = nullptr;
-  MemoryDependenceAnalysis *MemDep = nullptr;
-  if (P) {
-    AA = P->getAnalysisIfAvailable<AliasAnalysis>();
-    MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
-  }
-
   while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
     if (PN->getIncomingValue(0) != PN)
       PN->replaceAllUsesWith(PN->getIncomingValue(0));
@@ -113,7 +107,9 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
 
 /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
 /// if possible.  The return value indicates success or failure.
-bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
+                                     LoopInfo *LI, AliasAnalysis *AA,
+                                     MemoryDependenceAnalysis *MemDep) {
   // Don't merge away blocks who have their address taken.
   if (BB->hasAddressTaken()) return false;
 
@@ -140,8 +136,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   // Can't merge if there is PHI loop.
   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
     if (PHINode *PN = dyn_cast<PHINode>(BI)) {
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-        if (PN->getIncomingValue(i) == PN)
+      for (Value *IncValue : PN->incoming_values())
+        if (IncValue == PN)
           return false;
     } else
       break;
@@ -149,7 +145,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
 
   // Begin by getting rid of unneeded PHIs.
   if (isa<PHINode>(BB->front()))
-    FoldSingleEntryPHINodes(BB, P);
+    FoldSingleEntryPHINodes(BB, AA, MemDep);
 
   // Delete the unconditional branch from the predecessor...
   PredBB->getInstList().pop_back();
@@ -166,28 +162,23 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
     PredBB->takeName(BB);
 
   // Finally, erase the old block and update dominator info.
-  if (P) {
-    if (DominatorTreeWrapperPass *DTWP =
-            P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
-      DominatorTree &DT = DTWP->getDomTree();
-      if (DomTreeNode *DTN = DT.getNode(BB)) {
-        DomTreeNode *PredDTN = DT.getNode(PredBB);
-        SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
-        for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
-             DE = Children.end(); DI != DE; ++DI)
-          DT.changeImmediateDominator(*DI, PredDTN);
-
-        DT.eraseNode(BB);
-      }
+  if (DT)
+    if (DomTreeNode *DTN = DT->getNode(BB)) {
+      DomTreeNode *PredDTN = DT->getNode(PredBB);
+      SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
+      for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
+                                                    DE = Children.end();
+           DI != DE; ++DI)
+        DT->changeImmediateDominator(*DI, PredDTN);
+
+      DT->eraseNode(BB);
+    }
 
-      if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
-        LI->removeBlock(BB);
+  if (LI)
+    LI->removeBlock(BB);
 
-      if (MemoryDependenceAnalysis *MD =
-            P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
-        MD->invalidateCachedPredecessors();
-    }
-  }
+  if (MemDep)
+    MemDep->invalidateCachedPredecessors();
 
   BB->eraseFromParent();
   return true;
@@ -240,12 +231,14 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
 
 /// SplitEdge -  Split the edge connecting specified block. Pass P must
 /// not be NULL.
-BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
+                            LoopInfo *LI) {
   unsigned SuccNum = GetSuccessorNumber(BB, Succ);
 
   // If this is a critical edge, let SplitCriticalEdge do it.
   TerminatorInst *LatchTerm = BB->getTerminator();
-  if (SplitCriticalEdge(LatchTerm, SuccNum, P))
+  if (SplitCriticalEdge(LatchTerm, SuccNum, CriticalEdgeSplittingOptions(DT, LI)
+                                                .setPreserveLCSSA()))
     return LatchTerm->getSuccessor(SuccNum);
 
   // If the edge isn't critical, then BB has a single successor or Succ has a
@@ -255,23 +248,25 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
     // block.
     assert(SP == BB && "CFG broken");
     SP = nullptr;
-    return SplitBlock(Succ, Succ->begin(), P);
+    return SplitBlock(Succ, Succ->begin(), DT, LI);
   }
 
   // Otherwise, if BB has a single successor, split it at the bottom of the
   // block.
   assert(BB->getTerminator()->getNumSuccessors() == 1 &&
          "Should have a single succ!");
-  return SplitBlock(BB, BB->getTerminator(), P);
+  return SplitBlock(BB, BB->getTerminator(), DT, LI);
 }
 
-unsigned llvm::SplitAllCriticalEdges(Function &F, Pass *P) {
+unsigned
+llvm::SplitAllCriticalEdges(Function &F,
+                            const CriticalEdgeSplittingOptions &Options) {
   unsigned NumBroken = 0;
   for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
     TerminatorInst *TI = I->getTerminator();
     if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
       for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-        if (SplitCriticalEdge(TI, i, P))
+        if (SplitCriticalEdge(TI, i, Options))
           ++NumBroken;
   }
   return NumBroken;
@@ -282,7 +277,8 @@ unsigned llvm::SplitAllCriticalEdges(Function &F, Pass *P) {
 /// to a new block.  The two blocks are joined by an unconditional branch and
 /// the loop info is updated.
 ///
-BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+                             DominatorTree *DT, LoopInfo *LI) {
   BasicBlock::iterator SplitIt = SplitPt;
   while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
     ++SplitIt;
@@ -290,26 +286,23 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
 
   // The new block lives in whichever loop the old one did. This preserves
   // LCSSA as well, because we force the split point to be after any PHI nodes.
-  if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+  if (LI)
     if (Loop *L = LI->getLoopFor(Old))
-      L->addBasicBlockToLoop(New, LI->getBase());
+      L->addBasicBlockToLoop(New, *LI);
 
-  if (DominatorTreeWrapperPass *DTWP =
-          P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
-    DominatorTree &DT = DTWP->getDomTree();
+  if (DT)
     // Old dominates New. New node dominates all other nodes dominated by Old.
-    if (DomTreeNode *OldNode = DT.getNode(Old)) {
+    if (DomTreeNode *OldNode = DT->getNode(Old)) {
       std::vector<DomTreeNode *> Children;
       for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
            I != E; ++I)
         Children.push_back(*I);
 
-      DomTreeNode *NewNode = DT.addNewBlock(New, Old);
+      DomTreeNode *NewNode = DT->addNewBlock(New, Old);
       for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
              E = Children.end(); I != E; ++I)
-        DT.changeImmediateDominator(*I, NewNode);
+        DT->changeImmediateDominator(*I, NewNode);
     }
-  }
 
   return New;
 }
@@ -318,45 +311,46 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
 /// analysis information.
 static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
                                       ArrayRef<BasicBlock *> Preds,
-                                      Pass *P, bool &HasLoopExit) {
-  if (!P) return;
+                                      DominatorTree *DT, LoopInfo *LI,
+                                      bool PreserveLCSSA, bool &HasLoopExit) {
+  // Update dominator tree if available.
+  if (DT)
+    DT->splitBlock(NewBB);
+
+  // The rest of the logic is only relevant for updating the loop structures.
+  if (!LI)
+    return;
 
-  LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
-  Loop *L = LI ? LI->getLoopFor(OldBB) : nullptr;
+  Loop *L = LI->getLoopFor(OldBB);
 
   // If we need to preserve loop analyses, collect some information about how
   // this split will affect loops.
   bool IsLoopEntry = !!L;
   bool SplitMakesNewLoopHeader = false;
-  if (LI) {
-    bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
-    for (ArrayRef<BasicBlock*>::iterator
-           i = Preds.begin(), e = Preds.end(); i != e; ++i) {
-      BasicBlock *Pred = *i;
-
-      // If we need to preserve LCSSA, determine if any of the preds is a loop
-      // exit.
-      if (PreserveLCSSA)
-        if (Loop *PL = LI->getLoopFor(Pred))
-          if (!PL->contains(OldBB))
-            HasLoopExit = true;
-
-      // If we need to preserve LoopInfo, note whether any of the preds crosses
-      // an interesting loop boundary.
-      if (!L) continue;
-      if (L->contains(Pred))
-        IsLoopEntry = false;
-      else
-        SplitMakesNewLoopHeader = true;
-    }
+  for (ArrayRef<BasicBlock *>::iterator i = Preds.begin(), e = Preds.end();
+       i != e; ++i) {
+    BasicBlock *Pred = *i;
+
+    // If we need to preserve LCSSA, determine if any of the preds is a loop
+    // exit.
+    if (PreserveLCSSA)
+      if (Loop *PL = LI->getLoopFor(Pred))
+        if (!PL->contains(OldBB))
+          HasLoopExit = true;
+
+    // If we need to preserve LoopInfo, note whether any of the preds crosses
+    // an interesting loop boundary.
+    if (!L)
+      continue;
+    if (L->contains(Pred))
+      IsLoopEntry = false;
+    else
+      SplitMakesNewLoopHeader = true;
   }
 
-  // Update dominator tree if available.
-  if (DominatorTreeWrapperPass *DTWP =
-          P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())
-    DTWP->getDomTree().splitBlock(NewBB);
-
-  if (!L) return;
+  // Unless we have a loop for OldBB, nothing else to do here.
+  if (!L)
+    return;
 
   if (IsLoopEntry) {
     // Add the new block to the nearest enclosing loop (and not an adjacent
@@ -382,9 +376,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
     }
 
     if (InnermostPredLoop)
-      InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+      InnermostPredLoop->addBasicBlockToLoop(NewBB, *LI);
   } else {
-    L->addBasicBlockToLoop(NewBB, LI->getBase());
+    L->addBasicBlockToLoop(NewBB, *LI);
     if (SplitMakesNewLoopHeader)
       L->moveToHeader(NewBB);
   }
@@ -393,10 +387,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
 /// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming
 /// from NewBB. This also updates AliasAnalysis, if available.
 static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
-                           ArrayRef<BasicBlock*> Preds, BranchInst *BI,
-                           Pass *P, bool HasLoopExit) {
+                           ArrayRef<BasicBlock *> Preds, BranchInst *BI,
+                           AliasAnalysis *AA, bool HasLoopExit) {
   // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
-  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : nullptr;
   SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end());
   for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I++);
@@ -461,11 +454,15 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
   }
 }
 
-/// SplitBlockPredecessors - This method transforms BB by introducing a new
-/// basic block into the function, and moving some of the predecessors of BB to
-/// be predecessors of the new block.  The new predecessors are indicated by the
-/// Preds array, which has NumPreds elements in it.  The new block is given a
-/// suffix of 'Suffix'.
+/// SplitBlockPredecessors - This method introduces at least one new basic block
+/// into the function and moves some of the predecessors of BB to be
+/// predecessors of the new block. The new predecessors are indicated by the
+/// Preds array. The new block is given a suffix of 'Suffix'. Returns new basic
+/// block to which predecessors from Preds are now pointing.
+///
+/// If BB is a landingpad block then additional basicblock might be introduced.
+/// It will have suffix of 'Suffix'+".split_lp".
+/// See SplitLandingPadPredecessors for more details on this case.
 ///
 /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
 /// LoopInfo, and LCCSA but no other analyses. In particular, it does not
@@ -473,8 +470,21 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
 /// of the edges being split is an exit of a loop with other exits).
 ///
 BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
-                                         ArrayRef<BasicBlock*> Preds,
-                                         const char *Suffix, Pass *P) {
+                                         ArrayRef<BasicBlock *> Preds,
+                                         const char *Suffix, AliasAnalysis *AA,
+                                         DominatorTree *DT, LoopInfo *LI,
+                                         bool PreserveLCSSA) {
+  // For the landingpads we need to act a bit differently.
+  // Delegate this work to the SplitLandingPadPredecessors.
+  if (BB->isLandingPad()) {
+    SmallVector<BasicBlock*, 2> NewBBs;
+    std::string NewName = std::string(Suffix) + ".split-lp";
+
+    SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(),
+                                NewBBs, AA, DT, LI, PreserveLCSSA);
+    return NewBBs[0];
+  }
+
   // Create new basic block, insert right before the original block.
   BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
                                          BB->getParent(), BB);
@@ -505,10 +515,11 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
 
   // Update DominatorTree, LoopInfo, and LCCSA analysis information.
   bool HasLoopExit = false;
-  UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit);
+  UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA,
+                            HasLoopExit);
 
   // Update the PHI nodes in BB with the values coming from NewBB.
-  UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit);
+  UpdatePHINodes(BB, NewBB, Preds, BI, AA, HasLoopExit);
   return NewBB;
 }
 
@@ -526,10 +537,11 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
 /// exits).
 ///
 void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
-                                       ArrayRef<BasicBlock*> Preds,
+                                       ArrayRef<BasicBlock *> Preds,
                                        const char *Suffix1, const char *Suffix2,
-                                       Pass *P,
-                                       SmallVectorImpl<BasicBlock*> &NewBBs) {
+                                       SmallVectorImpl<BasicBlock *> &NewBBs,
+                                       AliasAnalysis *AA, DominatorTree *DT,
+                                       LoopInfo *LI, bool PreserveLCSSA) {
   assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
 
   // Create a new basic block for OrigBB's predecessors listed in Preds. Insert
@@ -552,12 +564,12 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
     Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1);
   }
 
-  // Update DominatorTree, LoopInfo, and LCCSA analysis information.
   bool HasLoopExit = false;
-  UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit);
+  UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, PreserveLCSSA,
+                            HasLoopExit);
 
   // Update the PHI nodes in OrigBB with the values coming from NewBB1.
-  UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit);
+  UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, AA, HasLoopExit);
 
   // Move the remaining edges from OrigBB to point to NewBB2.
   SmallVector<BasicBlock*, 8> NewBB2Preds;
@@ -589,10 +601,11 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
 
     // Update DominatorTree, LoopInfo, and LCCSA analysis information.
     HasLoopExit = false;
-    UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit);
+    UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI,
+                              PreserveLCSSA, HasLoopExit);
 
     // Update the PHI nodes in OrigBB with the values coming from NewBB2.
-    UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit);
+    UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, AA, HasLoopExit);
   }
 
   LandingPadInst *LPad = OrigBB->getLandingPadInst();
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index eda22cf..7e83c9e 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/CFG.h"
@@ -41,14 +42,19 @@ namespace {
     }
 
     bool runOnFunction(Function &F) override {
-      unsigned N = SplitAllCriticalEdges(F, this);
+      auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+      auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+      auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+      auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+      unsigned N =
+          SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
       NumBroken += N;
       return N > 0;
     }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addPreserved<LoopInfo>();
+      AU.addPreserved<LoopInfoWrapperPass>();
 
       // No loop canonicalization guarantees are broken by this pass.
       AU.addPreservedID(LoopSimplifyID);
@@ -125,10 +131,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
 /// to.
 ///
 BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
-                                    Pass *P, bool MergeIdenticalEdges,
-                                    bool DontDeleteUselessPhis,
-                                    bool SplitLandingPads) {
-  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return nullptr;
+                                    const CriticalEdgeSplittingOptions &Options) {
+  if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
+    return nullptr;
 
   assert(!isa<IndirectBrInst>(TI) &&
          "Cannot split critical edge from IndirectBrInst");
@@ -179,29 +184,22 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   // If there are any other edges from TIBB to DestBB, update those to go
   // through the split block, making those edges non-critical as well (and
   // reducing the number of phi entries in the DestBB if relevant).
-  if (MergeIdenticalEdges) {
+  if (Options.MergeIdenticalEdges) {
     for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
       if (TI->getSuccessor(i) != DestBB) continue;
 
       // Remove an entry for TIBB from DestBB phi nodes.
-      DestBB->removePredecessor(TIBB, DontDeleteUselessPhis);
+      DestBB->removePredecessor(TIBB, Options.DontDeleteUselessPHIs);
 
       // We found another edge to DestBB, go to NewBB instead.
       TI->setSuccessor(i, NewBB);
     }
   }
 
-
-
-  // If we don't have a pass object, we can't update anything...
-  if (!P) return NewBB;
-
-  DominatorTreeWrapperPass *DTWP =
-      P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
-
   // If we have nothing to update, just return.
+  auto *AA = Options.AA;
+  auto *DT = Options.DT;
+  auto *LI = Options.LI;
   if (!DT && !LI)
     return NewBB;
 
@@ -268,13 +266,13 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
       if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
         if (TIL == DestLoop) {
           // Both in the same loop, the NewBB joins loop.
-          DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+          DestLoop->addBasicBlockToLoop(NewBB, *LI);
         } else if (TIL->contains(DestLoop)) {
           // Edge from an outer loop to an inner loop.  Add to the outer loop.
-          TIL->addBasicBlockToLoop(NewBB, LI->getBase());
+          TIL->addBasicBlockToLoop(NewBB, *LI);
         } else if (DestLoop->contains(TIL)) {
           // Edge from an inner loop to an outer loop.  Add to the outer loop.
-          DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+          DestLoop->addBasicBlockToLoop(NewBB, *LI);
         } else {
           // Edge from two loops with no containment relation.  Because these
           // are natural loops, we know that the destination block must be the
@@ -283,19 +281,20 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
           assert(DestLoop->getHeader() == DestBB &&
                  "Should not create irreducible loops!");
           if (Loop *P = DestLoop->getParentLoop())
-            P->addBasicBlockToLoop(NewBB, LI->getBase());
+            P->addBasicBlockToLoop(NewBB, *LI);
         }
       }
+
       // If TIBB is in a loop and DestBB is outside of that loop, we may need
       // to update LoopSimplify form and LCSSA form.
-      if (!TIL->contains(DestBB) &&
-          P->mustPreserveAnalysisID(LoopSimplifyID)) {
+      if (!TIL->contains(DestBB)) {
         assert(!TIL->contains(NewBB) &&
                "Split point for loop exit is contained in loop!");
 
         // Update LCSSA form in the newly created exit block.
-        if (P->mustPreserveAnalysisID(LCSSAID))
+        if (Options.PreserveLCSSA) {
           createPHIsForSplitLoopExit(TIBB, NewBB, DestBB);
+        }
 
         // The only that we can break LoopSimplify form by splitting a critical
         // edge is if after the split there exists some edge from TIL to DestBB
@@ -322,20 +321,12 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
         if (!LoopPreds.empty()) {
           assert(!DestBB->isLandingPad() &&
                  "We don't split edges to landing pads!");
-          BasicBlock *NewExitBB =
-              SplitBlockPredecessors(DestBB, LoopPreds, "split", P);
-          if (P->mustPreserveAnalysisID(LCSSAID))
+          BasicBlock *NewExitBB = SplitBlockPredecessors(
+              DestBB, LoopPreds, "split", AA, DT, LI, Options.PreserveLCSSA);
+          if (Options.PreserveLCSSA)
             createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
         }
       }
-      // LCSSA form was updated above for the case where LoopSimplify is
-      // available, which means that all predecessors of loop exit blocks
-      // are within the loop. Without LoopSimplify form, it would be
-      // necessary to insert a new phi.
-      assert((!P->mustPreserveAnalysisID(LCSSAID) ||
-              P->mustPreserveAnalysisID(LoopSimplifyID)) &&
-             "SplitCriticalEdge doesn't know how to update LCCSA form "
-             "without LoopSimplify!");
     }
   }
 
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 322485d..8aa7b2a 100644
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -21,7 +21,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 
 using namespace llvm;
 
@@ -33,7 +33,7 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
 
 /// EmitStrLen - Emit a call to the strlen function to the builder, for the
 /// specified pointer.  This always returns an integer value of size intptr_t.
-Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
                         const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::strlen))
     return nullptr;
@@ -45,12 +45,9 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
   AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  Constant *StrLen = M->getOrInsertFunction("strlen",
-                                            AttributeSet::get(M->getContext(),
-                                                              AS),
-                                            TD->getIntPtrType(Context),
-                                            B.getInt8PtrTy(),
-                                            nullptr);
+  Constant *StrLen = M->getOrInsertFunction(
+      "strlen", AttributeSet::get(M->getContext(), AS),
+      DL.getIntPtrType(Context), B.getInt8PtrTy(), nullptr);
   CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
   if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
@@ -62,7 +59,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
 /// specified pointer.  Ptr is required to be some pointer type, MaxLen must
 /// be of size_t type, and the return value has 'intptr_t' type.
 Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
-                         const DataLayout *TD, const TargetLibraryInfo *TLI) {
+                         const DataLayout &DL, const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::strnlen))
     return nullptr;
 
@@ -73,14 +70,11 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
   AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  Constant *StrNLen = M->getOrInsertFunction("strnlen",
-                                             AttributeSet::get(M->getContext(),
-                                                              AS),
-                                             TD->getIntPtrType(Context),
-                                             B.getInt8PtrTy(),
-                                             TD->getIntPtrType(Context),
-                                             nullptr);
-  CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen");
+  Constant *StrNLen =
+      M->getOrInsertFunction("strnlen", AttributeSet::get(M->getContext(), AS),
+                             DL.getIntPtrType(Context), B.getInt8PtrTy(),
+                             DL.getIntPtrType(Context), nullptr);
+  CallInst *CI = B.CreateCall(StrNLen, {CastToCStr(Ptr, B), MaxLen}, "strnlen");
   if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
 
@@ -91,7 +85,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
 /// specified pointer and character.  Ptr is required to be some pointer type,
 /// and the return value has 'i8*' type.
 Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
-                        const DataLayout *TD, const TargetLibraryInfo *TLI) {
+                        const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::strchr))
     return nullptr;
 
@@ -106,17 +100,16 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
                                             AttributeSet::get(M->getContext(),
                                                              AS),
                                             I8Ptr, I8Ptr, I32Ty, nullptr);
-  CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
-                               ConstantInt::get(I32Ty, C), "strchr");
+  CallInst *CI = B.CreateCall(
+      StrChr, {CastToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, "strchr");
   if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
   return CI;
 }
 
 /// EmitStrNCmp - Emit a call to the strncmp function to the builder.
-Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
-                         IRBuilder<> &B, const DataLayout *TD,
-                         const TargetLibraryInfo *TLI) {
+Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+                         const DataLayout &DL, const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::strncmp))
     return nullptr;
 
@@ -128,15 +121,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
   AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  Value *StrNCmp = M->getOrInsertFunction("strncmp",
-                                          AttributeSet::get(M->getContext(),
-                                                           AS),
-                                          B.getInt32Ty(),
-                                          B.getInt8PtrTy(),
-                                          B.getInt8PtrTy(),
-                                          TD->getIntPtrType(Context), nullptr);
-  CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
-                               CastToCStr(Ptr2, B), Len, "strncmp");
+  Value *StrNCmp = M->getOrInsertFunction(
+      "strncmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(),
+      B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr);
+  CallInst *CI = B.CreateCall(
+      StrNCmp, {CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len}, "strncmp");
 
   if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
@@ -147,8 +136,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
 /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
 /// specified pointer arguments.
 Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
-                        const DataLayout *TD, const TargetLibraryInfo *TLI,
-                        StringRef Name) {
+                        const TargetLibraryInfo *TLI, StringRef Name) {
   if (!TLI->has(LibFunc::strcpy))
     return nullptr;
 
@@ -161,8 +149,8 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
   Value *StrCpy = M->getOrInsertFunction(Name,
                                          AttributeSet::get(M->getContext(), AS),
                                          I8Ptr, I8Ptr, I8Ptr, nullptr);
-  CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
-                               Name);
+  CallInst *CI =
+      B.CreateCall(StrCpy, {CastToCStr(Dst, B), CastToCStr(Src, B)}, Name);
   if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
   return CI;
@@ -170,8 +158,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
 
 /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
 /// specified pointer arguments.
-Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
-                         IRBuilder<> &B, const DataLayout *TD,
+Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
                          const TargetLibraryInfo *TLI, StringRef Name) {
   if (!TLI->has(LibFunc::strncpy))
     return nullptr;
@@ -187,8 +174,8 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
                                                             AS),
                                           I8Ptr, I8Ptr, I8Ptr,
                                           Len->getType(), nullptr);
-  CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
-                               Len, "strncpy");
+  CallInst *CI = B.CreateCall(
+      StrNCpy, {CastToCStr(Dst, B), CastToCStr(Src, B), Len}, "strncpy");
   if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
   return CI;
@@ -198,7 +185,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
 /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
 /// are pointers.
 Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
-                           IRBuilder<> &B, const DataLayout *TD,
+                           IRBuilder<> &B, const DataLayout &DL,
                            const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::memcpy_chk))
     return nullptr;
@@ -208,16 +195,13 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
   AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
                          Attribute::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
-                                         AttributeSet::get(M->getContext(), AS),
-                                         B.getInt8PtrTy(),
-                                         B.getInt8PtrTy(),
-                                         B.getInt8PtrTy(),
-                                         TD->getIntPtrType(Context),
-                                         TD->getIntPtrType(Context), nullptr);
+  Value *MemCpy = M->getOrInsertFunction(
+      "__memcpy_chk", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(),
+      B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context),
+      DL.getIntPtrType(Context), nullptr);
   Dst = CastToCStr(Dst, B);
   Src = CastToCStr(Src, B);
-  CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
+  CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize});
   if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
   return CI;
@@ -225,9 +209,8 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
 
 /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
 /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
-Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
-                        Value *Len, IRBuilder<> &B, const DataLayout *TD,
-                        const TargetLibraryInfo *TLI) {
+Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
+                        const DataLayout &DL, const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::memchr))
     return nullptr;
 
@@ -236,14 +219,10 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
   Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
   AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  Value *MemChr = M->getOrInsertFunction("memchr",
-                                         AttributeSet::get(M->getContext(), AS),
-                                         B.getInt8PtrTy(),
-                                         B.getInt8PtrTy(),
-                                         B.getInt32Ty(),
-                                         TD->getIntPtrType(Context),
-                                         nullptr);
-  CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+  Value *MemChr = M->getOrInsertFunction(
+      "memchr", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(),
+      B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context), nullptr);
+  CallInst *CI = B.CreateCall(MemChr, {CastToCStr(Ptr, B), Val, Len}, "memchr");
 
   if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
@@ -252,9 +231,8 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
 }
 
 /// EmitMemCmp - Emit a call to the memcmp function.
-Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
-                        Value *Len, IRBuilder<> &B, const DataLayout *TD,
-                        const TargetLibraryInfo *TLI) {
+Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+                        const DataLayout &DL, const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::memcmp))
     return nullptr;
 
@@ -266,14 +244,11 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
   AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  Value *MemCmp = M->getOrInsertFunction("memcmp",
-                                         AttributeSet::get(M->getContext(), AS),
-                                         B.getInt32Ty(),
-                                         B.getInt8PtrTy(),
-                                         B.getInt8PtrTy(),
-                                         TD->getIntPtrType(Context), nullptr);
-  CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
-                               Len, "memcmp");
+  Value *MemCmp = M->getOrInsertFunction(
+      "memcmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(),
+      B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr);
+  CallInst *CI = B.CreateCall(
+      MemCmp, {CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len}, "memcmp");
 
   if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
@@ -329,7 +304,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   Value *Callee = M->getOrInsertFunction(Name, Op1->getType(),
                                          Op1->getType(), Op2->getType(), nullptr);
-  CallInst *CI = B.CreateCall2(Callee, Op1, Op2, Name);
+  CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name);
   CI->setAttributes(Attrs);
   if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
@@ -339,7 +314,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
 
 /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
 /// is an integer.
-Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B,
                          const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::putchar))
     return nullptr;
@@ -361,7 +336,7 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
 
 /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
 /// some pointer.
-Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
+Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B,
                       const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::puts))
     return nullptr;
@@ -386,7 +361,7 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
 /// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
 /// an integer and File is a pointer to FILE.
 Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
-                       const DataLayout *TD, const TargetLibraryInfo *TLI) {
+                       const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::fputc))
     return nullptr;
 
@@ -409,7 +384,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
                                File->getType(), nullptr);
   Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
                          "chari");
-  CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+  CallInst *CI = B.CreateCall(F, {Char, File}, "fputc");
 
   if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
@@ -419,7 +394,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
 /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
 /// pointer and File is a pointer to FILE.
 Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
-                       const DataLayout *TD, const TargetLibraryInfo *TLI) {
+                       const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::fputs))
     return nullptr;
 
@@ -441,7 +416,7 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
     F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
                                B.getInt8PtrTy(),
                                File->getType(), nullptr);
-  CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+  CallInst *CI = B.CreateCall(F, {CastToCStr(Str, B), File}, "fputs");
 
   if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
@@ -450,9 +425,8 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
 
 /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
 /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
-                        IRBuilder<> &B, const DataLayout *TD,
-                        const TargetLibraryInfo *TLI) {
+Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
+                        const DataLayout &DL, const TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::fwrite))
     return nullptr;
 
@@ -466,21 +440,18 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
   StringRef FWriteName = TLI->getName(LibFunc::fwrite);
   Constant *F;
   if (File->getType()->isPointerTy())
-    F = M->getOrInsertFunction(FWriteName,
-                               AttributeSet::get(M->getContext(), AS),
-                               TD->getIntPtrType(Context),
-                               B.getInt8PtrTy(),
-                               TD->getIntPtrType(Context),
-                               TD->getIntPtrType(Context),
-                               File->getType(), nullptr);
+    F = M->getOrInsertFunction(
+        FWriteName, AttributeSet::get(M->getContext(), AS),
+        DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context),
+        DL.getIntPtrType(Context), File->getType(), nullptr);
   else
-    F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context),
-                               B.getInt8PtrTy(),
-                               TD->getIntPtrType(Context),
-                               TD->getIntPtrType(Context),
-                               File->getType(), nullptr);
-  CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
-                        ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+    F = M->getOrInsertFunction(FWriteName, DL.getIntPtrType(Context),
+                               B.getInt8PtrTy(), DL.getIntPtrType(Context),
+                               DL.getIntPtrType(Context), File->getType(),
+                               nullptr);
+  CallInst *CI =
+      B.CreateCall(F, {CastToCStr(Ptr, B), Size,
+                       ConstantInt::get(DL.getIntPtrType(Context), 1), File});
 
   if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 96a763f..4f8d1df 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -34,7 +34,7 @@
 #include <map>
 using namespace llvm;
 
-// CloneBasicBlock - See comments in Cloning.h
+/// See comments in Cloning.h.
 BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
                                   ValueToValueMapTy &VMap,
                                   const Twine &NameSuffix, Function *F,
@@ -154,23 +154,26 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                        TypeMapper, Materializer);
 }
 
-// Find the MDNode which corresponds to the DISubprogram data that described F.
-static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) {
-  for (DISubprogram Subprogram : Finder.subprograms()) {
-    if (Subprogram.describes(F)) return Subprogram;
+// Find the MDNode which corresponds to the subprogram data that described F.
+static DISubprogram *FindSubprogram(const Function *F,
+                                    DebugInfoFinder &Finder) {
+  for (DISubprogram *Subprogram : Finder.subprograms()) {
+    if (Subprogram->describes(F))
+      return Subprogram;
   }
   return nullptr;
 }
 
 // Add an operand to an existing MDNode. The new operand will be added at the
 // back of the operand list.
-static void AddOperand(DICompileUnit CU, DIArray SPs, Metadata *NewSP) {
+static void AddOperand(DICompileUnit *CU, DISubprogramArray SPs,
+                       Metadata *NewSP) {
   SmallVector<Metadata *, 16> NewSPs;
-  NewSPs.reserve(SPs->getNumOperands() + 1);
-  for (unsigned I = 0, E = SPs->getNumOperands(); I != E; ++I)
-    NewSPs.push_back(SPs->getOperand(I));
+  NewSPs.reserve(SPs.size() + 1);
+  for (auto *SP : SPs)
+    NewSPs.push_back(SP);
   NewSPs.push_back(NewSP);
-  CU.replaceSubprograms(DIArray(MDNode::get(CU->getContext(), NewSPs)));
+  CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs));
 }
 
 // Clone the module-level debug info associated with OldFunc. The cloned data
@@ -180,21 +183,21 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
   DebugInfoFinder Finder;
   Finder.processModule(*OldFunc->getParent());
 
-  const MDNode *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder);
+  const DISubprogram *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder);
   if (!OldSubprogramMDNode) return;
 
   // Ensure that OldFunc appears in the map.
   // (if it's already there it must point to NewFunc anyway)
   VMap[OldFunc] = NewFunc;
-  DISubprogram NewSubprogram(MapMetadata(OldSubprogramMDNode, VMap));
-
-  for (DICompileUnit CU : Finder.compile_units()) {
-    DIArray Subprograms(CU.getSubprograms());
+  auto *NewSubprogram =
+      cast<DISubprogram>(MapMetadata(OldSubprogramMDNode, VMap));
 
+  for (auto *CU : Finder.compile_units()) {
+    auto Subprograms = CU->getSubprograms();
     // If the compile unit's function list contains the old function, it should
     // also contain the new one.
-    for (unsigned i = 0; i < Subprograms.getNumElements(); i++) {
-      if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) {
+    for (auto *SP : Subprograms) {
+      if (SP == OldSubprogramMDNode) {
         AddOperand(CU, Subprograms, NewSubprogram);
         break;
       }
@@ -202,7 +205,7 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
   }
 }
 
-/// CloneFunction - Return a copy of the specified function, but without
+/// Return a copy of the specified function, but without
 /// embedding the function into another module.  Also, any references specified
 /// in the VMap are changed to refer to their mapped value instead of the
 /// original one.  If any of the arguments to the function are in the VMap,
@@ -250,8 +253,7 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
 
 
 namespace {
-  /// PruningFunctionCloner - This class is a private class used to implement
-  /// the CloneAndPruneFunctionInto method.
+  /// This is a private class used to implement CloneAndPruneFunctionInto.
   struct PruningFunctionCloner {
     Function *NewFunc;
     const Function *OldFunc;
@@ -259,29 +261,40 @@ namespace {
     bool ModuleLevelChanges;
     const char *NameSuffix;
     ClonedCodeInfo *CodeInfo;
-    const DataLayout *DL;
+    CloningDirector *Director;
+    ValueMapTypeRemapper *TypeMapper;
+    ValueMaterializer *Materializer;
+
   public:
     PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
-                          ValueToValueMapTy &valueMap,
-                          bool moduleLevelChanges,
-                          const char *nameSuffix, 
-                          ClonedCodeInfo *codeInfo,
-                          const DataLayout *DL)
-    : NewFunc(newFunc), OldFunc(oldFunc),
-      VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
-      NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL) {
+                          ValueToValueMapTy &valueMap, bool moduleLevelChanges,
+                          const char *nameSuffix, ClonedCodeInfo *codeInfo,
+                          CloningDirector *Director)
+        : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
+          ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
+          CodeInfo(codeInfo), Director(Director) {
+      // These are optional components.  The Director may return null.
+      if (Director) {
+        TypeMapper = Director->getTypeRemapper();
+        Materializer = Director->getValueMaterializer();
+      } else {
+        TypeMapper = nullptr;
+        Materializer = nullptr;
+      }
     }
 
-    /// CloneBlock - The specified block is found to be reachable, clone it and
+    /// The specified block is found to be reachable, clone it and
     /// anything that it can reach.
-    void CloneBlock(const BasicBlock *BB,
+    void CloneBlock(const BasicBlock *BB, 
+                    BasicBlock::const_iterator StartingInst,
                     std::vector<const BasicBlock*> &ToClone);
   };
 }
 
-/// CloneBlock - The specified block is found to be reachable, clone it and
+/// The specified block is found to be reachable, clone it and
 /// anything that it can reach.
 void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
+                                       BasicBlock::const_iterator StartingInst,
                                        std::vector<const BasicBlock*> &ToClone){
   WeakVH &BBEntry = VMap[BB];
 
@@ -307,26 +320,45 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
                                             const_cast<BasicBlock*>(BB));
     VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
   }
-    
 
   bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
-  
+
   // Loop over all instructions, and copy them over, DCE'ing as we go.  This
   // loop doesn't include the terminator.
-  for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
+  for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end();
        II != IE; ++II) {
+    // If the "Director" remaps the instruction, don't clone it.
+    if (Director) {
+      CloningDirector::CloningAction Action 
+                              = Director->handleInstruction(VMap, II, NewBB);
+      // If the cloning director says stop, we want to stop everything, not
+      // just break out of the loop (which would cause the terminator to be
+      // cloned).  The cloning director is responsible for inserting a proper
+      // terminator into the new basic block in this case.
+      if (Action == CloningDirector::StopCloningBB)
+        return;
+      // If the cloning director says skip, continue to the next instruction.
+      // In this case, the cloning director is responsible for mapping the
+      // skipped instruction to some value that is defined in the new
+      // basic block.
+      if (Action == CloningDirector::SkipInstruction)
+        continue;
+    }
+
     Instruction *NewInst = II->clone();
 
     // Eagerly remap operands to the newly cloned instruction, except for PHI
     // nodes for which we defer processing until we update the CFG.
     if (!isa<PHINode>(NewInst)) {
       RemapInstruction(NewInst, VMap,
-                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+                       TypeMapper, Materializer);
 
       // If we can simplify this instruction to some other value, simply add
       // a mapping to that value rather than inserting a new instruction into
       // the basic block.
-      if (Value *V = SimplifyInstruction(NewInst, DL)) {
+      if (Value *V =
+              SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
         // On the off-chance that this simplifies to an instruction in the old
         // function, map it back into the new function.
         if (Value *MappedV = VMap.lookup(V))
@@ -354,6 +386,26 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
   // Finally, clone over the terminator.
   const TerminatorInst *OldTI = BB->getTerminator();
   bool TerminatorDone = false;
+  if (Director) {
+    CloningDirector::CloningAction Action 
+                           = Director->handleInstruction(VMap, OldTI, NewBB);
+    // If the cloning director says stop, we want to stop everything, not
+    // just break out of the loop (which would cause the terminator to be
+    // cloned).  The cloning director is responsible for inserting a proper
+    // terminator into the new basic block in this case.
+    if (Action == CloningDirector::StopCloningBB)
+      return;
+    if (Action == CloningDirector::CloneSuccessors) {
+      // If the director says to skip with a terminate instruction, we still
+      // need to clone this block's successors.
+      const TerminatorInst *TI = NewBB->getTerminator();
+      for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+        ToClone.push_back(TI->getSuccessor(i));
+      return;
+    }
+    assert(Action != CloningDirector::SkipInstruction && 
+           "SkipInstruction is not valid for terminators.");
+  }
   if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
     if (BI->isConditional()) {
       // If the condition was a known constant in the callee...
@@ -409,39 +461,53 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
   }
 }
 
-/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
-/// except that it does some simple constant prop and DCE on the fly.  The
-/// effect of this is to copy significantly less code in cases where (for
-/// example) a function call with constant arguments is inlined, and those
-/// constant arguments cause a significant amount of code in the callee to be
-/// dead.  Since this doesn't produce an exact copy of the input, it can't be
-/// used for things like CloneFunction or CloneModule.
-void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+/// This works like CloneAndPruneFunctionInto, except that it does not clone the
+/// entire function. Instead it starts at an instruction provided by the caller
+/// and copies (and prunes) only the code reachable from that instruction.
+void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
+                                     const Instruction *StartingInst,
                                      ValueToValueMapTy &VMap,
                                      bool ModuleLevelChanges,
-                                     SmallVectorImpl<ReturnInst*> &Returns,
+                                     SmallVectorImpl<ReturnInst *> &Returns,
                                      const char *NameSuffix, 
                                      ClonedCodeInfo *CodeInfo,
-                                     const DataLayout *DL,
-                                     Instruction *TheCall) {
+                                     CloningDirector *Director) {
   assert(NameSuffix && "NameSuffix cannot be null!");
-  
+
+  ValueMapTypeRemapper *TypeMapper = nullptr;
+  ValueMaterializer *Materializer = nullptr;
+
+  if (Director) {
+    TypeMapper = Director->getTypeRemapper();
+    Materializer = Director->getValueMaterializer();
+  }
+
 #ifndef NDEBUG
-  for (Function::const_arg_iterator II = OldFunc->arg_begin(), 
-       E = OldFunc->arg_end(); II != E; ++II)
-    assert(VMap.count(II) && "No mapping from source argument specified!");
+  // If the cloning starts at the begining of the function, verify that
+  // the function arguments are mapped.
+  if (!StartingInst)
+    for (Function::const_arg_iterator II = OldFunc->arg_begin(),
+         E = OldFunc->arg_end(); II != E; ++II)
+      assert(VMap.count(II) && "No mapping from source argument specified!");
 #endif
 
   PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
-                            NameSuffix, CodeInfo, DL);
+                            NameSuffix, CodeInfo, Director);
+  const BasicBlock *StartingBB;
+  if (StartingInst)
+    StartingBB = StartingInst->getParent();
+  else {
+    StartingBB = &OldFunc->getEntryBlock();
+    StartingInst = StartingBB->begin();
+  }
 
   // Clone the entry block, and anything recursively reachable from it.
   std::vector<const BasicBlock*> CloneWorklist;
-  CloneWorklist.push_back(&OldFunc->getEntryBlock());
+  PFC.CloneBlock(StartingBB, StartingInst, CloneWorklist);
   while (!CloneWorklist.empty()) {
     const BasicBlock *BB = CloneWorklist.back();
     CloneWorklist.pop_back();
-    PFC.CloneBlock(BB, CloneWorklist);
+    PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
   }
   
   // Loop over all of the basic blocks in the old function.  If the block was
@@ -461,16 +527,24 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
 
     // Handle PHI nodes specially, as we have to remove references to dead
     // blocks.
-    for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I)
-      if (const PHINode *PN = dyn_cast<PHINode>(I))
-        PHIToResolve.push_back(PN);
-      else
+    for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
+      // PHI nodes may have been remapped to non-PHI nodes by the caller or
+      // during the cloning process.
+      if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+        if (isa<PHINode>(VMap[PN]))
+          PHIToResolve.push_back(PN);
+        else
+          break;
+      } else {
         break;
+      }
+    }
 
     // Finally, remap the terminator instructions, as those can't be remapped
     // until all BBs are mapped.
     RemapInstruction(NewBB->getTerminator(), VMap,
-                     ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+                     ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+                     TypeMapper, Materializer);
   }
   
   // Defer PHI resolution until rest of function is resolved, PHI resolution
@@ -563,13 +637,13 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
   // node).
   for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
     if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
-      recursivelySimplifyInstruction(PN, DL);
+      recursivelySimplifyInstruction(PN);
 
   // Now that the inlined function body has been fully constructed, go through
-  // and zap unconditional fall-through branches.  This happen all the time when
+  // and zap unconditional fall-through branches. This happens all the time when
   // specializing code: code specialization turns conditional branches into
   // uncond branches, and this code folds them.
-  Function::iterator Begin = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+  Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB]);
   Function::iterator I = Begin;
   while (I != NewFunc->end()) {
     // Check if this block has become dead during inlining or other
@@ -617,12 +691,32 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
     // Do not increment I, iteratively merge all things this block branches to.
   }
 
-  // Make a final pass over the basic blocks from theh old function to gather
+  // Make a final pass over the basic blocks from the old function to gather
   // any return instructions which survived folding. We have to do this here
   // because we can iteratively remove and merge returns above.
-  for (Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]),
+  for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB]),
                           E = NewFunc->end();
        I != E; ++I)
     if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
       Returns.push_back(RI);
 }
+
+
+/// This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly.  The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead.  Since this doesn't produce an exact copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+                                     ValueToValueMapTy &VMap,
+                                     bool ModuleLevelChanges,
+                                     SmallVectorImpl<ReturnInst*> &Returns,
+                                     const char *NameSuffix, 
+                                     ClonedCodeInfo *CodeInfo,
+                                     Instruction *TheCall) {
+  CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), VMap,
+                            ModuleLevelChanges, Returns, NameSuffix, CodeInfo,
+                            nullptr);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
index fae9ff5..2693322 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -69,9 +69,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
   for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
        I != E; ++I) {
     auto *PTy = cast<PointerType>(I->getType());
-    auto *GA =
-        GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
-                            I->getLinkage(), I->getName(), New);
+    auto *GA = GlobalAlias::create(PTy, I->getLinkage(), I->getName(), New);
     GA->copyAttributesFrom(I);
     VMap[I] = GA;
   }
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index e70a7d6..ab89b41 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -332,11 +332,11 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
     DEBUG(dbgs() << **i << ", ");
   DEBUG(dbgs() << ")\n");
 
+  StructType *StructTy;
   if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
-    PointerType *StructPtr =
-           PointerType::getUnqual(StructType::get(M->getContext(), paramTy));
+    StructTy = StructType::get(M->getContext(), paramTy);
     paramTy.clear();
-    paramTy.push_back(StructPtr);
+    paramTy.push_back(PointerType::getUnqual(StructTy));
   }
   FunctionType *funcType =
                   FunctionType::get(RetTy, paramTy, false);
@@ -364,8 +364,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
       Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
       Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
       TerminatorInst *TI = newFunction->begin()->getTerminator();
-      GetElementPtrInst *GEP = 
-        GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI);
+      GetElementPtrInst *GEP = GetElementPtrInst::Create(
+          StructTy, AI, Idx, "gep_" + inputs[i]->getName(), TI);
       RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
     } else
       RewriteVal = AI++;
@@ -447,6 +447,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     }
   }
 
+  StructType *StructArgTy = nullptr;
   AllocaInst *Struct = nullptr;
   if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
     std::vector<Type*> ArgTypes;
@@ -455,7 +456,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
       ArgTypes.push_back((*v)->getType());
 
     // Allocate a struct at the beginning of this function
-    Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
+    StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
     Struct =
       new AllocaInst(StructArgTy, nullptr, "structArg",
                      codeReplacer->getParent()->begin()->begin());
@@ -465,9 +466,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
       Value *Idx[2];
       Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
       Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
-      GetElementPtrInst *GEP =
-        GetElementPtrInst::Create(Struct, Idx,
-                                  "gep_" + StructValues[i]->getName());
+      GetElementPtrInst *GEP = GetElementPtrInst::Create(
+          StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
       codeReplacer->getInstList().push_back(GEP);
       StoreInst *SI = new StoreInst(StructValues[i], GEP);
       codeReplacer->getInstList().push_back(SI);
@@ -491,9 +491,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
       Value *Idx[2];
       Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
       Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
-      GetElementPtrInst *GEP
-        = GetElementPtrInst::Create(Struct, Idx,
-                                    "gep_reload_" + outputs[i]->getName());
+      GetElementPtrInst *GEP = GetElementPtrInst::Create(
+          StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName());
       codeReplacer->getInstList().push_back(GEP);
       Output = GEP;
     } else {
@@ -606,10 +605,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
                 Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
                 Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
                                           FirstOut+out);
-                GetElementPtrInst *GEP =
-                  GetElementPtrInst::Create(OAI, Idx,
-                                            "gep_" + outputs[out]->getName(),
-                                            NTRet);
+                GetElementPtrInst *GEP = GetElementPtrInst::Create(
+                    StructArgTy, OAI, Idx, "gep_" + outputs[out]->getName(),
+                    NTRet);
                 new StoreInst(outputs[out], GEP, NTRet);
               } else {
                 new StoreInst(outputs[out], OAI, NTRet);
diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
index 26875e8..dc95089 100644
--- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -11,14 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/BitVector.h"
 #include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 #define DEBUG_TYPE "ctor_utils"
 
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 9972b22..003da58 100644
--- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -39,6 +39,19 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
                           F->getEntryBlock().begin());
   }
 
+  // We cannot demote invoke instructions to the stack if their normal edge
+  // is critical. Therefore, split the critical edge and create a basic block
+  // into which the store can be inserted.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
+    if (!II->getNormalDest()->getSinglePredecessor()) {
+      unsigned SuccNum = GetSuccessorNumber(II->getParent(), II->getNormalDest());
+      assert(isCriticalEdge(II, SuccNum) && "Expected a critical edge!");
+      BasicBlock *BB = SplitCriticalEdge(II, SuccNum);
+      assert(BB && "Unable to split critical edge.");
+      (void)BB;
+    }
+  }
+
   // Change all of the users of the instruction to read from the stack slot.
   while (!I.use_empty()) {
     Instruction *U = cast<Instruction>(I.user_back());
@@ -71,7 +84,6 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
     }
   }
 
-
   // Insert stores of the computed value into the stack slot. We have to be
   // careful if I is an invoke instruction, because we can't insert the store
   // AFTER the terminator instruction.
@@ -79,27 +91,13 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
   if (!isa<TerminatorInst>(I)) {
     InsertPt = &I;
     ++InsertPt;
+    for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+      /* empty */;   // Don't insert before PHI nodes or landingpad instrs.
   } else {
     InvokeInst &II = cast<InvokeInst>(I);
-    if (II.getNormalDest()->getSinglePredecessor())
-      InsertPt = II.getNormalDest()->getFirstInsertionPt();
-    else {
-      // We cannot demote invoke instructions to the stack if their normal edge
-      // is critical.  Therefore, split the critical edge and insert the store
-      // in the newly created basic block.
-      unsigned SuccNum = GetSuccessorNumber(I.getParent(), II.getNormalDest());
-      TerminatorInst *TI = &cast<TerminatorInst>(I);
-      assert (isCriticalEdge(TI, SuccNum) &&
-              "Expected a critical edge!");
-      BasicBlock *BB = SplitCriticalEdge(TI, SuccNum);
-      assert (BB && "Unable to split critical edge.");
-      InsertPt = BB->getFirstInsertionPt();
-    }
+    InsertPt = II.getNormalDest()->getFirstInsertionPt();
   }
 
-  for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
-    /* empty */;   // Don't insert before PHI nodes or landingpad instrs.
-
   new StoreInst(&I, Slot, InsertPt);
   return Slot;
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index 52e2d59..44b7d25 100644
--- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -150,7 +150,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
         if (MSI->isVolatile())
           return true;
         GS.StoredType = GlobalStatus::Stored;
-      } else if (ImmutableCallSite C = I) {
+      } else if (auto C = ImmutableCallSite(I)) {
         if (!C.isCallee(&U))
           return true;
         GS.IsLoaded = true;
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 2a86eb5..ddeaff0 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -30,6 +30,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
@@ -88,7 +89,7 @@ namespace {
       CallerLPad = cast<LandingPadInst>(I);
     }
 
-    /// getOuterResumeDest - The outer unwind destination is the target of
+    /// The outer unwind destination is the target of
     /// unwind edges introduced for calls within the inlined function.
     BasicBlock *getOuterResumeDest() const {
       return OuterResumeDest;
@@ -98,17 +99,16 @@ namespace {
 
     LandingPadInst *getLandingPadInst() const { return CallerLPad; }
 
-    /// forwardResume - Forward the 'resume' instruction to the caller's landing
-    /// pad block. When the landing pad block has only one predecessor, this is
+    /// Forward the 'resume' instruction to the caller's landing pad block.
+    /// When the landing pad block has only one predecessor, this is
     /// a simple branch. When there is more than one predecessor, we need to
     /// split the landing pad block after the landingpad instruction and jump
     /// to there.
     void forwardResume(ResumeInst *RI,
                        SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
 
-    /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
-    /// destination block for the given basic block, using the values for the
-    /// original invoke's source block.
+    /// Add incoming-PHI values to the unwind destination block for the given
+    /// basic block, using the values for the original invoke's source block.
     void addIncomingPHIValuesFor(BasicBlock *BB) const {
       addIncomingPHIValuesForInto(BB, OuterResumeDest);
     }
@@ -123,7 +123,7 @@ namespace {
   };
 }
 
-/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts.
+/// Get or create a target for the branch from ResumeInsts.
 BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
   if (InnerResumeDest) return InnerResumeDest;
 
@@ -158,8 +158,8 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
   return InnerResumeDest;
 }
 
-/// forwardResume - Forward the 'resume' instruction to the caller's landing pad
-/// block. When the landing pad block has only one predecessor, this is a simple
+/// Forward the 'resume' instruction to the caller's landing pad block.
+/// When the landing pad block has only one predecessor, this is a simple
 /// branch. When there is more than one predecessor, we need to split the
 /// landing pad block after the landingpad instruction and jump to there.
 void InvokeInliningInfo::forwardResume(ResumeInst *RI,
@@ -177,9 +177,9 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
   RI->eraseFromParent();
 }
 
-/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
-/// an invoke, we have to turn all of the calls that can throw into
-/// invokes.  This function analyze BB to see if there are any calls, and if so,
+/// When we inline a basic block into an invoke,
+/// we have to turn all of the calls that can throw into invokes.
+/// This function analyze BB to see if there are any calls, and if so,
 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
 /// nodes in that block with the values specified in InvokeDestPHIValues.
 static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
@@ -227,7 +227,7 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
   }
 }
 
-/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
+/// If we inlined an invoke site, we need to convert calls
 /// in the body of the inlined function into invokes.
 ///
 /// II is the invoke instruction being inlined.  FirstNewBlock is the first
@@ -278,8 +278,8 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
   InvokeDest->removePredecessor(II->getParent());
 }
 
-/// CloneAliasScopeMetadata - When inlining a function that contains noalias
-/// scope metadata, this metadata needs to be cloned so that the inlined blocks
+/// When inlining a function that contains noalias scope metadata,
+/// this metadata needs to be cloned so that the inlined blocks
 /// have different "unqiue scopes" at every call site. Were this not done, then
 /// aliasing scopes from a function inlined into a caller multiple times could
 /// not be differentiated (and this would lead to miscompiles because the
@@ -319,13 +319,12 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
 
   // Now we have a complete set of all metadata in the chains used to specify
   // the noalias scopes and the lists of those scopes.
-  SmallVector<MDNode *, 16> DummyNodes;
+  SmallVector<TempMDTuple, 16> DummyNodes;
   DenseMap<const MDNode *, TrackingMDNodeRef> MDMap;
   for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end();
        I != IE; ++I) {
-    MDNode *Dummy = MDNode::getTemporary(CalledFunc->getContext(), None);
-    DummyNodes.push_back(Dummy);
-    MDMap[*I].reset(Dummy);
+    DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None));
+    MDMap[*I].reset(DummyNodes.back().get());
   }
 
   // Create new metadata nodes to replace the dummy nodes, replacing old
@@ -343,7 +342,8 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
     }
 
     MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps);
-    MDNodeFwdDecl *TempM = cast<MDNodeFwdDecl>(MDMap[*I]);
+    MDTuple *TempM = cast<MDTuple>(MDMap[*I]);
+    assert(TempM->isTemporary() && "Expected temporary node");
 
     TempM->replaceAllUsesWith(NewM);
   }
@@ -388,18 +388,14 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
         NI->setMetadata(LLVMContext::MD_noalias, M);
     }
   }
-
-  // Now that everything has been replaced, delete the dummy nodes.
-  for (unsigned i = 0, ie = DummyNodes.size(); i != ie; ++i)
-    MDNode::deleteTemporary(DummyNodes[i]);
 }
 
-/// AddAliasScopeMetadata - If the inlined function has noalias arguments, then
-/// add new alias scopes for each noalias argument, tag the mapped noalias
+/// If the inlined function has noalias arguments,
+/// then add new alias scopes for each noalias argument, tag the mapped noalias
 /// parameters with noalias metadata specifying the new scope, and tag all
 /// non-derived loads, stores and memory intrinsics with the new alias scopes.
 static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
-                                  const DataLayout *DL, AliasAnalysis *AA) {
+                                  const DataLayout &DL, AliasAnalysis *AA) {
   if (!EnableNoAliasConversion)
     return;
 
@@ -625,8 +621,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
 /// If the inlined function has non-byval align arguments, then
 /// add @llvm.assume-based alignment assumptions to preserve this information.
 static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
-  if (!PreserveAlignmentAssumptions || !IFI.DL)
+  if (!PreserveAlignmentAssumptions)
     return;
+  auto &DL = CS.getCaller()->getParent()->getDataLayout();
 
   // To avoid inserting redundant assumptions, we should check for assumptions
   // already in the caller. To do this, we might need a DT of the caller.
@@ -648,20 +645,20 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
       // If we can already prove the asserted alignment in the context of the
       // caller, then don't bother inserting the assumption.
       Value *Arg = CS.getArgument(I->getArgNo());
-      if (getKnownAlignment(Arg, IFI.DL,
+      if (getKnownAlignment(Arg, DL, CS.getInstruction(),
                             &IFI.ACT->getAssumptionCache(*CalledFunc),
-                            CS.getInstruction(), &DT) >= Align)
+                            &DT) >= Align)
         continue;
 
-      IRBuilder<>(CS.getInstruction()).CreateAlignmentAssumption(*IFI.DL, Arg,
-                                                                 Align);
+      IRBuilder<>(CS.getInstruction())
+          .CreateAlignmentAssumption(DL, Arg, Align);
     }
   }
 }
 
-/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
-/// into the caller, update the specified callgraph to reflect the changes we
-/// made.  Note that it's possible that not all code was copied over, so only
+/// Once we have cloned code over from a callee into the caller,
+/// update the specified callgraph to reflect the changes we made.
+/// Note that it's possible that not all code was copied over, so only
 /// some edges of the callgraph may remain.
 static void UpdateCallGraphAfterInlining(CallSite CS,
                                          Function::iterator FirstNewBlock,
@@ -696,8 +693,15 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
     // If the call was inlined, but then constant folded, there is no edge to
     // add.  Check for this case.
     Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
-    if (!NewCall) continue;
+    if (!NewCall)
+      continue;
 
+    // We do not treat intrinsic calls like real function calls because we
+    // expect them to become inline code; do not add an edge for an intrinsic.
+    CallSite CS = CallSite(NewCall);
+    if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic())
+      continue;
+    
     // Remember that this call site got inlined for the client of
     // InlineFunction.
     IFI.InlinedCalls.push_back(NewCall);
@@ -729,11 +733,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
   Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
   IRBuilder<> Builder(InsertBlock->begin());
 
-  Value *Size;
-  if (IFI.DL == nullptr)
-    Size = ConstantExpr::getSizeOf(AggTy);
-  else
-    Size = Builder.getInt64(IFI.DL->getTypeStoreSize(AggTy));
+  Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy));
 
   // Always generate a memcpy of alignment 1 here because we don't know
   // the alignment of the src pointer.  Other optimizations can infer
@@ -741,7 +741,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
   Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1);
 }
 
-/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// When inlining a call site that has a byval argument,
 /// we have to make the implicit memcpy explicit by adding it.
 static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
                                   const Function *CalledFunc,
@@ -762,11 +762,13 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
     if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
       return Arg;
 
+    const DataLayout &DL = Caller->getParent()->getDataLayout();
+
     // If the pointer is already known to be sufficiently aligned, or if we can
     // round it up to a larger alignment, then we don't need a temporary.
-    if (getOrEnforceKnownAlignment(Arg, ByValAlignment, IFI.DL,
-                                   &IFI.ACT->getAssumptionCache(*Caller),
-                                   TheCall) >= ByValAlignment)
+    if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall,
+                                   &IFI.ACT->getAssumptionCache(*Caller)) >=
+        ByValAlignment)
       return Arg;
     
     // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
@@ -774,10 +776,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   }
 
   // Create the alloca.  If we have DataLayout, use nice alignment.
-  unsigned Align = 1;
-  if (IFI.DL)
-    Align = IFI.DL->getPrefTypeAlignment(AggTy);
-  
+  unsigned Align =
+      Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy);
+
   // If the byval had an alignment specified, we *must* use at least that
   // alignment, as it is required by the byval argument (and uses of the
   // pointer inside the callee).
@@ -792,8 +793,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   return NewAlloca;
 }
 
-// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
-// intrinsic.
+// Check whether this Value is used by a lifetime intrinsic.
 static bool isUsedByLifetimeMarker(Value *V) {
   for (User *U : V->users()) {
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
@@ -808,7 +808,7 @@ static bool isUsedByLifetimeMarker(Value *V) {
   return false;
 }
 
-// hasLifetimeMarkers - Check whether the given alloca already has
+// Check whether the given alloca already has
 // lifetime.start or lifetime.end intrinsics.
 static bool hasLifetimeMarkers(AllocaInst *AI) {
   Type *Ty = AI->getType();
@@ -827,35 +827,69 @@ static bool hasLifetimeMarkers(AllocaInst *AI) {
   return false;
 }
 
-/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to
-/// recursively update InlinedAtEntry of a DebugLoc.
-static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, 
-                                    const DebugLoc &InlinedAtDL,
-                                    LLVMContext &Ctx) {
-  if (MDNode *IA = DL.getInlinedAt(Ctx)) {
-    DebugLoc NewInlinedAtDL 
-      = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx);
-    return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
-                         NewInlinedAtDL.getAsMDNode(Ctx));
+/// Rebuild the entire inlined-at chain for this instruction so that the top of
+/// the chain now is inlined-at the new call site.
+static DebugLoc
+updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx,
+                    DenseMap<const DILocation *, DILocation *> &IANodes) {
+  SmallVector<DILocation *, 3> InlinedAtLocations;
+  DILocation *Last = InlinedAtNode;
+  DILocation *CurInlinedAt = DL;
+
+  // Gather all the inlined-at nodes
+  while (DILocation *IA = CurInlinedAt->getInlinedAt()) {
+    // Skip any we've already built nodes for
+    if (DILocation *Found = IANodes[IA]) {
+      Last = Found;
+      break;
+    }
+
+    InlinedAtLocations.push_back(IA);
+    CurInlinedAt = IA;
   }
 
-  return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
-                       InlinedAtDL.getAsMDNode(Ctx));
+  // Starting from the top, rebuild the nodes to point to the new inlined-at
+  // location (then rebuilding the rest of the chain behind it) and update the
+  // map of already-constructed inlined-at nodes.
+  for (auto I = InlinedAtLocations.rbegin(), E = InlinedAtLocations.rend();
+       I != E; ++I) {
+    const DILocation *MD = *I;
+    Last = IANodes[MD] = DILocation::getDistinct(
+        Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last);
+  }
+
+  // And finally create the normal location for this instruction, referring to
+  // the new inlined-at chain.
+  return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last);
 }
 
-/// fixupLineNumbers - Update inlined instructions' line numbers to 
+/// Update inlined instructions' line numbers to
 /// to encode location where these instructions are inlined.
 static void fixupLineNumbers(Function *Fn, Function::iterator FI,
                              Instruction *TheCall) {
   DebugLoc TheCallDL = TheCall->getDebugLoc();
-  if (TheCallDL.isUnknown())
+  if (!TheCallDL)
     return;
 
+  auto &Ctx = Fn->getContext();
+  DILocation *InlinedAtNode = TheCallDL;
+
+  // Create a unique call site, not to be confused with any other call from the
+  // same location.
+  InlinedAtNode = DILocation::getDistinct(
+      Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(),
+      InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt());
+
+  // Cache the inlined-at nodes as they're built so they are reused, without
+  // this every instruction's inlined-at chain would become distinct from each
+  // other.
+  DenseMap<const DILocation *, DILocation *> IANodes;
+
   for (; FI != Fn->end(); ++FI) {
     for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
          BI != BE; ++BI) {
       DebugLoc DL = BI->getDebugLoc();
-      if (DL.isUnknown()) {
+      if (!DL) {
         // If the inlined instruction has no line number, make it look as if it
         // originates from the call location. This is important for
         // ((__always_inline__, __nodebug__)) functions which must use caller
@@ -868,29 +902,15 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
 
         BI->setDebugLoc(TheCallDL);
       } else {
-        BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
-        if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
-          LLVMContext &Ctx = BI->getContext();
-          MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
-          DVI->setOperand(2, MetadataAsValue::get(
-                                 Ctx, createInlinedVariable(DVI->getVariable(),
-                                                            InlinedAt, Ctx)));
-        } else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
-          LLVMContext &Ctx = BI->getContext();
-          MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
-          DDI->setOperand(1, MetadataAsValue::get(
-                                 Ctx, createInlinedVariable(DDI->getVariable(),
-                                                            InlinedAt, Ctx)));
-        }
+        BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes));
       }
     }
   }
 }
 
-/// InlineFunction - This function inlines the called function into the basic
-/// block of the caller.  This returns false if it is not possible to inline
-/// this call.  The program is still in a well defined state if this occurs
-/// though.
+/// This function inlines the called function into the basic block of the
+/// caller. This returns false if it is not possible to inline this call.
+/// The program is still in a well defined state if this occurs though.
 ///
 /// Note that this only does one level of inlining.  For example, if the
 /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
@@ -975,6 +995,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     // Keep a list of pair (dst, src) to emit byval initializations.
     SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
 
+    auto &DL = Caller->getParent()->getDataLayout();
+
     assert(CalledFunc->arg_size() == CS.arg_size() &&
            "No varargs calls can be inlined!");
 
@@ -1009,9 +1031,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     // have no dead or constant instructions leftover after inlining occurs
     // (which can happen, e.g., because an argument was constant), but we'll be
     // happy with whatever the cloner can do.
-    CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 
+    CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
                               /*ModuleLevelChanges=*/false, Returns, ".i",
-                              &InlinedFunctionInfo, IFI.DL, TheCall);
+                              &InlinedFunctionInfo, TheCall);
 
     // Remember the first block that is newly cloned over.
     FirstNewBlock = LastBlock; ++FirstNewBlock;
@@ -1032,7 +1054,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     CloneAliasScopeMetadata(CS, VMap);
 
     // Add noalias metadata if necessary.
-    AddAliasScopeMetadata(CS, VMap, IFI.DL, IFI.AA);
+    AddAliasScopeMetadata(CS, VMap, DL, IFI.AA);
 
     // FIXME: We could register any cloned assumptions instead of clearing the
     // whole function's cache.
@@ -1079,6 +1101,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
                                                    FirstNewBlock->getInstList(),
                                                    AI, I);
     }
+    // Move any dbg.declares describing the allocas into the entry basic block.
+    DIBuilder DIB(*Caller->getParent());
+    for (auto &AI : IFI.StaticAllocas)
+      replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false);
   }
 
   bool InlinedMustTailCalls = false;
@@ -1136,18 +1162,21 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       ConstantInt *AllocaSize = nullptr;
       if (ConstantInt *AIArraySize =
           dyn_cast<ConstantInt>(AI->getArraySize())) {
-        if (IFI.DL) {
-          Type *AllocaType = AI->getAllocatedType();
-          uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType);
-          uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
-          assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
-          // Check that array size doesn't saturate uint64_t and doesn't
-          // overflow when it's multiplied by type size.
-          if (AllocaArraySize != ~0ULL &&
-              UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
-            AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
-                                          AllocaArraySize * AllocaTypeSize);
-          }
+        auto &DL = Caller->getParent()->getDataLayout();
+        Type *AllocaType = AI->getAllocatedType();
+        uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
+        uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
+
+        // Don't add markers for zero-sized allocas.
+        if (AllocaArraySize == 0)
+          continue;
+
+        // Check that array size doesn't saturate uint64_t and doesn't
+        // overflow when it's multiplied by type size.
+        if (AllocaArraySize != ~0ULL &&
+            UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
+          AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
+                                        AllocaArraySize * AllocaTypeSize);
         }
       }
 
@@ -1173,7 +1202,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
     // Insert the llvm.stacksave.
     CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
-      .CreateCall(StackSave, "savedstack");
+                             .CreateCall(StackSave, {}, "savedstack");
 
     // Insert a call to llvm.stackrestore before any return instructions in the
     // inlined function.
@@ -1408,7 +1437,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   // the entries are the same or undef).  If so, remove the PHI so it doesn't
   // block other optimizations.
   if (PHI) {
-    if (Value *V = SimplifyInstruction(PHI, IFI.DL, nullptr, nullptr,
+    auto &DL = Caller->getParent()->getDataLayout();
+    if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr,
                                        &IFI.ACT->getAssumptionCache(*Caller))) {
       PHI->replaceAllUsesWith(V);
       PHI->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
index 0ae746c..30edf3b 100644
--- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
@@ -252,8 +252,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
   Value *Ret0_1      = Builder.CreateICmpEQ(Divisor, Zero);
   Value *Ret0_2      = Builder.CreateICmpEQ(Dividend, Zero);
   Value *Ret0_3      = Builder.CreateOr(Ret0_1, Ret0_2);
-  Value *Tmp0        = Builder.CreateCall2(CTLZ, Divisor, True);
-  Value *Tmp1        = Builder.CreateCall2(CTLZ, Dividend, True);
+  Value *Tmp0 = Builder.CreateCall(CTLZ, {Divisor, True});
+  Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True});
   Value *SR          = Builder.CreateSub(Tmp0, Tmp1);
   Value *Ret0_4      = Builder.CreateICmpUGT(SR, MSB);
   Value *Ret0        = Builder.CreateOr(Ret0_3, Ret0_4);
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index 3f9b702..9d40b69 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -112,17 +112,17 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
     if (SSAUpdate.HasValueForBlock(ExitBB))
       continue;
 
-    PHINode *PN = PHINode::Create(Inst.getType(), PredCache.GetNumPreds(ExitBB),
+    PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB),
                                   Inst.getName() + ".lcssa", ExitBB->begin());
 
     // Add inputs from inside the loop for this PHI.
-    for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
-      PN->addIncoming(&Inst, *PI);
+    for (BasicBlock *Pred : PredCache.get(ExitBB)) {
+      PN->addIncoming(&Inst, Pred);
 
       // If the exit block has a predecessor not within the loop, arrange for
       // the incoming value use corresponding to that predecessor to be
       // rewritten in terms of a different LCSSA PHI.
-      if (!L.contains(*PI))
+      if (!L.contains(Pred))
         UsesToRewrite.push_back(
             &PN->getOperandUse(PN->getOperandNumForIncomingValue(
                  PN->getNumIncomingValues() - 1)));
@@ -294,21 +294,18 @@ struct LCSSA : public FunctionPass {
     AU.setPreservesCFG();
 
     AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<LoopInfo>();
+    AU.addRequired<LoopInfoWrapperPass>();
     AU.addPreservedID(LoopSimplifyID);
     AU.addPreserved<AliasAnalysis>();
     AU.addPreserved<ScalarEvolution>();
   }
-
-private:
-  void verifyAnalysis() const override;
 };
 }
 
 char LCSSA::ID = 0;
 INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
 
 Pass *llvm::createLCSSAPass() { return new LCSSA(); }
@@ -318,7 +315,7 @@ char &llvm::LCSSAID = LCSSA::ID;
 /// Process all loops in the function, inner-most out.
 bool LCSSA::runOnFunction(Function &F) {
   bool Changed = false;
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   SE = getAnalysisIfAvailable<ScalarEvolution>();
 
@@ -329,18 +326,3 @@ bool LCSSA::runOnFunction(Function &F) {
   return Changed;
 }
 
-static void verifyLoop(Loop &L, DominatorTree &DT) {
-  // Recurse depth-first through inner loops.
-  for (Loop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
-    verifyLoop(**LI, DT);
-
-  // Check the special guarantees that LCSSA makes.
-  //assert(L.isLCSSAForm(DT) && "LCSSA form not preserved!");
-}
-
-void LCSSA::verifyAnalysis() const {
-  // Verify each loop nest in the function, assuming LI still points at that
-  // function's loop info.
-  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
-    verifyLoop(**I, *DT);
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index 2a84d7e..70c77b0 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LibCallSemantics.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
@@ -110,11 +111,17 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
   }
 
   if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
-    // If we are switching on a constant, we can convert the switch into a
-    // single branch instruction!
+    // If we are switching on a constant, we can convert the switch to an
+    // unconditional branch.
     ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
-    BasicBlock *TheOnlyDest = SI->getDefaultDest();
-    BasicBlock *DefaultDest = TheOnlyDest;
+    BasicBlock *DefaultDest = SI->getDefaultDest();
+    BasicBlock *TheOnlyDest = DefaultDest;
+
+    // If the default is unreachable, ignore it when searching for TheOnlyDest.
+    if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) &&
+        SI->getNumCases() > 0) {
+      TheOnlyDest = SI->case_begin().getCaseSuccessor();
+    }
 
     // Figure out which case it goes to.
     for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
@@ -410,7 +417,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
 ///
 /// This returns true if it changed the code, note that it can delete
 /// instructions in other blocks as well in this block.
-bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
                                        const TargetLibraryInfo *TLI) {
   bool MadeChange = false;
 
@@ -427,7 +434,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
     Instruction *Inst = BI++;
 
     WeakVH BIHandle(BI);
-    if (recursivelySimplifyInstruction(Inst, TD, TLI)) {
+    if (recursivelySimplifyInstruction(Inst, TLI)) {
       MadeChange = true;
       if (BIHandle != BI)
         BI = BB->begin();
@@ -457,8 +464,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
 ///
 /// .. and delete the predecessor corresponding to the '1', this will attempt to
 /// recursively fold the and to 0.
-void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
-                                        DataLayout *TD) {
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) {
   // This only adjusts blocks with PHI nodes.
   if (!isa<PHINode>(BB->begin()))
     return;
@@ -473,7 +479,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
     PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
     Value *OldPhiIt = PhiIt;
 
-    if (!recursivelySimplifyInstruction(PN, TD))
+    if (!recursivelySimplifyInstruction(PN))
       continue;
 
     // If recursive simplification ended up deleting the next PHI node we would
@@ -489,7 +495,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
 /// between them, moving the instructions in the predecessor into DestBB and
 /// deleting the predecessor block.
 ///
-void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
   // If BB has single-entry PHI nodes, fold them.
   while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
     Value *NewVal = PN->getIncomingValue(0);
@@ -525,14 +531,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   if (PredBB == &DestBB->getParent()->getEntryBlock())
     DestBB->moveAfter(PredBB);
 
-  if (P) {
-    if (DominatorTreeWrapperPass *DTWP =
-            P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
-      DominatorTree &DT = DTWP->getDomTree();
-      BasicBlock *PredBBIDom = DT.getNode(PredBB)->getIDom()->getBlock();
-      DT.changeImmediateDominator(DestBB, PredBBIDom);
-      DT.eraseNode(PredBB);
-    }
+  if (DT) {
+    BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
+    DT->changeImmediateDominator(DestBB, PredBBIDom);
+    DT->eraseNode(PredBB);
   }
   // Nuke BB.
   PredBB->eraseFromParent();
@@ -897,13 +899,14 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
 /// their preferred alignment from the beginning.
 ///
 static unsigned enforceKnownAlignment(Value *V, unsigned Align,
-                                      unsigned PrefAlign, const DataLayout *TD) {
+                                      unsigned PrefAlign,
+                                      const DataLayout &DL) {
   V = V->stripPointerCasts();
 
   if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
     // If the preferred alignment is greater than the natural stack alignment
     // then don't round up. This avoids dynamic stack realignment.
-    if (TD && TD->exceedsNaturalStackAlignment(PrefAlign))
+    if (DL.exceedsNaturalStackAlignment(PrefAlign))
       return Align;
     // If there is a requested alignment and if this is an alloca, round up.
     if (AI->getAlignment() >= PrefAlign)
@@ -942,13 +945,13 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
 /// and it is more than the alignment of the ultimate object, see if we can
 /// increase the alignment of the ultimate object, making this check succeed.
 unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
-                                          const DataLayout *DL,
-                                          AssumptionCache *AC,
+                                          const DataLayout &DL,
                                           const Instruction *CxtI,
+                                          AssumptionCache *AC,
                                           const DominatorTree *DT) {
   assert(V->getType()->isPointerTy() &&
          "getOrEnforceKnownAlignment expects a pointer!");
-  unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64;
+  unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType());
 
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
   computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT);
@@ -975,7 +978,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
 ///
 
 /// See if there is a dbg.value intrinsic for DIVar before I.
-static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
+static bool LdStHasDebugValue(const DILocalVariable *DIVar, Instruction *I) {
   // Since we can't guarantee that the original dbg.declare instrinsic
   // is removed by LowerDbgDeclare(), we need to make sure that we are
   // not inserting the same dbg.value intrinsic over and over.
@@ -995,17 +998,13 @@ static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
 /// that has an associated llvm.dbg.decl intrinsic.
 bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            StoreInst *SI, DIBuilder &Builder) {
-  DIVariable DIVar(DDI->getVariable());
-  DIExpression DIExpr(DDI->getExpression());
-  assert((!DIVar || DIVar.isVariable()) &&
-         "Variable in DbgDeclareInst should be either null or a DIVariable.");
-  if (!DIVar)
-    return false;
+  auto *DIVar = DDI->getVariable();
+  auto *DIExpr = DDI->getExpression();
+  assert(DIVar && "Missing variable");
 
   if (LdStHasDebugValue(DIVar, SI))
     return true;
 
-  Instruction *DbgVal = nullptr;
   // If an argument is zero extended then use argument directly. The ZExt
   // may be zapped by an optimization pass in future.
   Argument *ExtendedArg = nullptr;
@@ -1014,11 +1013,11 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
     ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
   if (ExtendedArg)
-    DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, SI);
+    Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr,
+                                    DDI->getDebugLoc(), SI);
   else
-    DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar,
-                                             DIExpr, SI);
-  DbgVal->setDebugLoc(DDI->getDebugLoc());
+    Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr,
+                                    DDI->getDebugLoc(), SI);
   return true;
 }
 
@@ -1026,19 +1025,15 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
 /// that has an associated llvm.dbg.decl intrinsic.
 bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            LoadInst *LI, DIBuilder &Builder) {
-  DIVariable DIVar(DDI->getVariable());
-  DIExpression DIExpr(DDI->getExpression());
-  assert((!DIVar || DIVar.isVariable()) &&
-         "Variable in DbgDeclareInst should be either null or a DIVariable.");
-  if (!DIVar)
-    return false;
+  auto *DIVar = DDI->getVariable();
+  auto *DIExpr = DDI->getExpression();
+  assert(DIVar && "Missing variable");
 
   if (LdStHasDebugValue(DIVar, LI))
     return true;
 
-  Instruction *DbgVal =
-      Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, LI);
-  DbgVal->setDebugLoc(DDI->getDebugLoc());
+  Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr,
+                                  DDI->getDebugLoc(), LI);
   return true;
 }
 
@@ -1080,10 +1075,9 @@ bool llvm::LowerDbgDeclare(Function &F) {
           // This is a call by-value or some other instruction that
           // takes a pointer to the variable. Insert a *value*
           // intrinsic that describes the alloca.
-          auto DbgVal = DIB.insertDbgValueIntrinsic(
-              AI, 0, DIVariable(DDI->getVariable()),
-              DIExpression(DDI->getExpression()), CI);
-          DbgVal->setDebugLoc(DDI->getDebugLoc());
+          DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(),
+                                      DDI->getExpression(), DDI->getDebugLoc(),
+                                      CI);
         }
       DDI->eraseFromParent();
     }
@@ -1104,32 +1098,31 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
 }
 
 bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
-                                      DIBuilder &Builder) {
+                                      DIBuilder &Builder, bool Deref) {
   DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
   if (!DDI)
     return false;
-  DIVariable DIVar(DDI->getVariable());
-  DIExpression DIExpr(DDI->getExpression());
-  assert((!DIVar || DIVar.isVariable()) &&
-         "Variable in DbgDeclareInst should be either null or a DIVariable.");
-  if (!DIVar)
-    return false;
-
-  // Create a copy of the original DIDescriptor for user variable, prepending
-  // "deref" operation to a list of address elements, as new llvm.dbg.declare
-  // will take a value storing address of the memory for variable, not
-  // alloca itself.
-  SmallVector<int64_t, 4> NewDIExpr;
-  NewDIExpr.push_back(dwarf::DW_OP_deref);
-  if (DIExpr)
-    for (unsigned i = 0, n = DIExpr.getNumElements(); i < n; ++i)
-      NewDIExpr.push_back(DIExpr.getElement(i));
+  DebugLoc Loc = DDI->getDebugLoc();
+  auto *DIVar = DDI->getVariable();
+  auto *DIExpr = DDI->getExpression();
+  assert(DIVar && "Missing variable");
+
+  if (Deref) {
+    // Create a copy of the original DIDescriptor for user variable, prepending
+    // "deref" operation to a list of address elements, as new llvm.dbg.declare
+    // will take a value storing address of the memory for variable, not
+    // alloca itself.
+    SmallVector<uint64_t, 4> NewDIExpr;
+    NewDIExpr.push_back(dwarf::DW_OP_deref);
+    if (DIExpr)
+      NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
+    DIExpr = Builder.createExpression(NewDIExpr);
+  }
 
   // Insert llvm.dbg.declare in the same basic block as the original alloca,
   // and remove old llvm.dbg.declare.
   BasicBlock *BB = AI->getParent();
-  Builder.insertDeclare(NewAllocaAddress, DIVar,
-                        Builder.createExpression(NewDIExpr), BB);
+  Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB);
   DDI->eraseFromParent();
   return true;
 }
@@ -1254,7 +1247,7 @@ static bool markAliveBlocks(BasicBlock *BB,
       if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
         changeToUnreachable(II, true);
         Changed = true;
-      } else if (II->doesNotThrow()) {
+      } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(II)) {
         if (II->use_empty() && II->onlyReadsMemory()) {
           // jump to the normal destination branch.
           BranchInst::Create(II->getNormalDest(), II);
@@ -1350,3 +1343,23 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign
     }
   }
 }
+
+unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
+                                        DominatorTree &DT,
+                                        const BasicBlockEdge &Root) {
+  assert(From->getType() == To->getType());
+  
+  unsigned Count = 0;
+  for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+       UI != UE; ) {
+    Use &U = *UI++;
+    if (DT.dominates(Root, U)) {
+      U.set(To);
+      DEBUG(dbgs() << "Replace dominated use of '"
+            << From->getName() << "' as "
+            << *To << " in " << *U << "\n");
+      ++Count;
+    }
+  }
+  return Count;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index c832a4b..90dfaba 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -57,8 +57,10 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
@@ -113,6 +115,14 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
 BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
   BasicBlock *Header = L->getHeader();
 
+  // Get analyses that we try to update.
+  auto *AA = PP->getAnalysisIfAvailable<AliasAnalysis>();
+  auto *DTWP = PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  auto *LIWP = PP->getAnalysisIfAvailable<LoopInfoWrapperPass>();
+  auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+  bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
+
   // Compute the set of predecessors of the loop that are not in the loop.
   SmallVector<BasicBlock*, 8> OutsideBlocks;
   for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
@@ -131,15 +141,8 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
 
   // Split out the loop pre-header.
   BasicBlock *PreheaderBB;
-  if (!Header->isLandingPad()) {
-    PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
-                                         PP);
-  } else {
-    SmallVector<BasicBlock*, 2> NewBBs;
-    SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
-                                ".split-lp", PP, NewBBs);
-    PreheaderBB = NewBBs[0];
-  }
+  PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
+                                       AA, DT, LI, PreserveLCSSA);
 
   PreheaderBB->getTerminator()->setDebugLoc(
                                       Header->getFirstNonPHI()->getDebugLoc());
@@ -157,7 +160,9 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
 ///
 /// This method is used to split exit blocks that have predecessors outside of
 /// the loop.
-static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) {
+static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit,
+                                        AliasAnalysis *AA, DominatorTree *DT,
+                                        LoopInfo *LI, Pass *PP) {
   SmallVector<BasicBlock*, 8> LoopBlocks;
   for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
     BasicBlock *P = *I;
@@ -172,15 +177,10 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) {
   assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
   BasicBlock *NewExitBB = nullptr;
 
-  if (Exit->isLandingPad()) {
-    SmallVector<BasicBlock*, 2> NewBBs;
-    SplitLandingPadPredecessors(Exit, LoopBlocks,
-                                ".loopexit", ".nonloopexit",
-                                PP, NewBBs);
-    NewExitBB = NewBBs[0];
-  } else {
-    NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", PP);
-  }
+  bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
+
+  NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", AA, DT,
+                                     LI, PreserveLCSSA);
 
   DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
                << NewExitBB->getName() << "\n");
@@ -211,10 +211,11 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
 static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
                                         DominatorTree *DT,
                                         AssumptionCache *AC) {
+  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I);
     ++I;
-    if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) {
+    if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
       // This is a degenerate PHI already, don't modify it!
       PN->replaceAllUsesWith(V);
       if (AA) AA->deleteValue(PN);
@@ -287,9 +288,11 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
   if (SE)
     SE->forgetLoop(L);
 
+  bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
+
   BasicBlock *Header = L->getHeader();
-  BasicBlock *NewBB =
-    SplitBlockPredecessors(Header, OuterLoopPreds,  ".outer", PP);
+  BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
+                                             AA, DT, LI, PreserveLCSSA);
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
@@ -460,7 +463,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
 
   // Update Loop Information - we know that this block is now in the current
   // loop and all parent loops.
-  L->addBasicBlockToLoop(BEBlock, LI->getBase());
+  L->addBasicBlockToLoop(BEBlock, *LI);
 
   // Update dominator information
   DT->splitBlock(BEBlock);
@@ -476,7 +479,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
 /// explicit if they accepted the analysis directly and then updated it.
 static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
                             AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
-                            ScalarEvolution *SE, Pass *PP, const DataLayout *DL,
+                            ScalarEvolution *SE, Pass *PP,
                             AssumptionCache *AC) {
   bool Changed = false;
 ReprocessLoop:
@@ -567,7 +570,7 @@ ReprocessLoop:
       // Must be exactly this loop: no subloops, parent loops, or non-loop preds
       // allowed.
       if (!L->contains(*PI)) {
-        if (rewriteLoopExitBlock(L, ExitBlock, PP)) {
+        if (rewriteLoopExitBlock(L, ExitBlock, AA, DT, LI, PP)) {
           ++NumInserted;
           Changed = true;
         }
@@ -608,13 +611,15 @@ ReprocessLoop:
     }
   }
 
+  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+
   // Scan over the PHI nodes in the loop header.  Since they now have only two
   // incoming values (the loop is canonicalized), we may have simplified the PHI
   // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
   PHINode *PN;
   for (BasicBlock::iterator I = L->getHeader()->begin();
        (PN = dyn_cast<PHINode>(I++)); )
-    if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) {
+    if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
       if (AA) AA->deleteValue(PN);
       if (SE) SE->forgetValue(PN);
       PN->replaceAllUsesWith(V);
@@ -676,7 +681,8 @@ ReprocessLoop:
       // The block has now been cleared of all instructions except for
       // a comparison and a conditional branch. SimplifyCFG may be able
       // to fold it now.
-      if (!FoldBranchToCommonDest(BI, DL)) continue;
+      if (!FoldBranchToCommonDest(BI))
+        continue;
 
       // Success. The block is now dead, so remove it from the loop,
       // update the dominator tree and delete it.
@@ -714,7 +720,7 @@ ReprocessLoop:
 
 bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
                         AliasAnalysis *AA, ScalarEvolution *SE,
-                        const DataLayout *DL, AssumptionCache *AC) {
+                        AssumptionCache *AC) {
   bool Changed = false;
 
   // Worklist maintains our depth-first queue of loops in this nest to process.
@@ -726,13 +732,12 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
   // order. We can use this simple process because loops form a tree.
   for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
     Loop *L2 = Worklist[Idx];
-    for (Loop::iterator I = L2->begin(), E = L2->end(); I != E; ++I)
-      Worklist.push_back(*I);
+    Worklist.append(L2->begin(), L2->end());
   }
 
   while (!Worklist.empty())
     Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI,
-                               SE, PP, DL, AC);
+                               SE, PP, AC);
 
   return Changed;
 }
@@ -750,7 +755,6 @@ namespace {
     DominatorTree *DT;
     LoopInfo *LI;
     ScalarEvolution *SE;
-    const DataLayout *DL;
     AssumptionCache *AC;
 
     bool runOnFunction(Function &F) override;
@@ -762,8 +766,8 @@ namespace {
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
 
-      AU.addRequired<LoopInfo>();
-      AU.addPreserved<LoopInfo>();
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
 
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<ScalarEvolution>();
@@ -781,7 +785,7 @@ INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
                 "Canonicalize natural loops", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
                 "Canonicalize natural loops", false, false)
 
@@ -795,16 +799,14 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
 bool LoopSimplify::runOnFunction(Function &F) {
   bool Changed = false;
   AA = getAnalysisIfAvailable<AliasAnalysis>();
-  LI = &getAnalysis<LoopInfo>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   SE = getAnalysisIfAvailable<ScalarEvolution>();
-  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : nullptr;
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
 
   // Simplify each loop nest in the function.
   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
-    Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL, AC);
+    Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, AC);
 
   return Changed;
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 5745920..1dbce47 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -26,8 +26,8 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -146,6 +146,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
 /// Similarly, TripMultiple divides the number of times that the LatchBlock may
 /// execute without exiting the loop.
 ///
+/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
+/// have a runtime (i.e. not compile time constant) trip count.  Unrolling these
+/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
+/// iterations before branching into the unrolled loop.  UnrollLoop will not
+/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
+/// AllowExpensiveTripCount is false.
+///
 /// The LoopInfo Analysis that is passed will be kept consistent.
 ///
 /// If a LoopPassManager is passed in, and the loop is fully removed, it will be
@@ -154,8 +161,9 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
 /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
 /// available from the Pass it must also preserve those analyses.
 bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
-                      bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI,
-                      Pass *PP, LPPassManager *LPM, AssumptionCache *AC) {
+                      bool AllowRuntime, bool AllowExpensiveTripCount,
+                      unsigned TripMultiple, LoopInfo *LI, Pass *PP,
+                      LPPassManager *LPM, AssumptionCache *AC) {
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
@@ -218,7 +226,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
   // flag is specified.
   bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
 
-  if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
+  if (RuntimeTripCount &&
+      !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM))
     return false;
 
   // Notify ScalarEvolution that the loop will be substantially changed,
@@ -311,7 +320,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
       // Tell LI about New.
       if (*BB == Header) {
         assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop");
-        L->addBasicBlockToLoop(New, LI->getBase());
+        L->addBasicBlockToLoop(New, *LI);
       } else {
         // Figure out which loop New is in.
         const Loop *OldLoop = LI->getLoopFor(*BB);
@@ -333,7 +342,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
           if (SE)
             SE->forgetLoop(OldLoop);
         }
-        NewLoop->addBasicBlockToLoop(New, LI->getBase());
+        NewLoop->addBasicBlockToLoop(New, *LI);
       }
 
       if (*BB == Header)
@@ -500,6 +509,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
   // At this point, the code is well formed.  We now do a quick sweep over the
   // inserted code, doing constant propagation and dead code elimination as we
   // go.
+  const DataLayout &DL = Header->getModule()->getDataLayout();
   const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
   for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
        BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
@@ -508,7 +518,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
 
       if (isInstructionTriviallyDead(Inst))
         (*BB)->getInstList().erase(Inst);
-      else if (Value *V = SimplifyInstruction(Inst))
+      else if (Value *V = SimplifyInstruction(Inst, DL))
         if (LI->replacementPreservesLCSSAForm(Inst, V)) {
           Inst->replaceAllUsesWith(V);
           (*BB)->getInstList().erase(Inst);
@@ -531,9 +541,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
     if (!OuterL && !CompletelyUnroll)
       OuterL = L;
     if (OuterL) {
-      DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>();
-      const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
-      simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AC);
+      simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, AC);
 
       // LCSSA must be performed on the outermost affected loop. The unrolled
       // loop's last loop latch is guaranteed to be in the outermost loop after
@@ -549,3 +557,26 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
 
   return true;
 }
+
+/// Given an llvm.loop loop id metadata node, returns the loop hint metadata
+/// node with the given name (for example, "llvm.loop.unroll.count"). If no
+/// such metadata node exists, then nullptr is returned.
+MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) {
+  // First operand should refer to the loop id itself.
+  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+    MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+    if (!MD)
+      continue;
+
+    MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+    if (!S)
+      continue;
+
+    if (Name.equals(S->getString()))
+      return MD;
+  }
+  return nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 8a32215..d1774df 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -23,14 +23,18 @@
 
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include <algorithm>
@@ -58,7 +62,8 @@ STATISTIC(NumRuntimeUnrolled,
 static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
                           BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
                           BasicBlock *OrigPH, BasicBlock *NewPH,
-                          ValueToValueMapTy &VMap, Pass *P) {
+                          ValueToValueMapTy &VMap, AliasAnalysis *AA,
+                          DominatorTree *DT, LoopInfo *LI, Pass *P) {
   BasicBlock *Latch = L->getLoopLatch();
   assert(Latch && "Loop must have a latch");
 
@@ -122,13 +127,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
   assert(Exit && "Loop must have a single exit block only");
   // Split the exit to maintain loop canonicalization guarantees
   SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
-  if (!Exit->isLandingPad()) {
-    SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
-  } else {
-    SmallVector<BasicBlock*, 2> NewBBs;
-    SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
-                                P, NewBBs);
-  }
+  SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI,
+                         P->mustPreserveAnalysisID(LCSSAID));
   // Add the branch to the exit block (around the unrolled loop)
   BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
   InsertPt->eraseFromParent();
@@ -167,9 +167,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
     NewBlocks.push_back(NewBB);
 
     if (NewLoop)
-      NewLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+      NewLoop->addBasicBlockToLoop(NewBB, *LI);
     else if (ParentLoop)
-      ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+      ParentLoop->addBasicBlockToLoop(NewBB, *LI);
 
     VMap[*BB] = NewBB;
     if (Header == *BB) {
@@ -278,7 +278,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
 /// ...
 /// End:
 ///
-bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
+                                   bool AllowExpensiveTripCount, LoopInfo *LI,
                                    LPPassManager *LPM) {
   // for now, only unroll loops that contain a single exit
   if (!L->getExitingBlock())
@@ -312,15 +313,20 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
   if (isa<SCEVCouldNotCompute>(TripCountSC))
     return false;
 
+  BasicBlock *Header = L->getHeader();
+  const DataLayout &DL = Header->getModule()->getDataLayout();
+  SCEVExpander Expander(*SE, DL, "loop-unroll");
+  if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L))
+    return false;
+
   // We only handle cases when the unroll factor is a power of 2.
   // Count is the loop unroll factor, the number of extra copies added + 1.
   if (!isPowerOf2_32(Count))
     return false;
 
   // This constraint lets us deal with an overflowing trip count easily; see the
-  // comment on ModVal below.  This check is equivalent to `Log2(Count) <
-  // BEWidth`.
-  if (static_cast<uint64_t>(Count) > (1ULL << BEWidth))
+  // comment on ModVal below.
+  if (Log2_32(Count) > BEWidth)
     return false;
 
   // If this loop is nested, then the loop unroller changes the code in
@@ -328,18 +334,20 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
   if (Loop *ParentLoop = L->getParentLoop())
     SE->forgetLoop(ParentLoop);
 
+  // Grab analyses that we preserve.
+  auto *DTWP = LPM->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+
   BasicBlock *PH = L->getLoopPreheader();
-  BasicBlock *Header = L->getHeader();
   BasicBlock *Latch = L->getLoopLatch();
   // It helps to splits the original preheader twice, one for the end of the
   // prolog code and one for a new loop preheader
-  BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
-  BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
+  BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
+  BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
   BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
 
   // Compute the number of extra iterations required, which is:
   //  extra iterations = run-time trip count % (loop unroll factor + 1)
-  SCEVExpander Expander(*SE, "loop-unroll");
   Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                                             PreHeaderBR);
   Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
@@ -408,7 +416,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
   // PHI functions.
   BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
   ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
-                LPM->getAsPass());
+                /*AliasAnalysis*/ nullptr, DT, LI, LPM->getAsPass());
   NumRuntimeUnrolled++;
   return true;
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
new file mode 100644
index 0000000..a5890c0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -0,0 +1,499 @@
+//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines common loop utility functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "loop-utils"
+
+bool ReductionDescriptor::areAllUsesIn(Instruction *I,
+                                       SmallPtrSetImpl<Instruction *> &Set) {
+  for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
+    if (!Set.count(dyn_cast<Instruction>(*Use)))
+      return false;
+  return true;
+}
+
+bool ReductionDescriptor::AddReductionVar(PHINode *Phi, ReductionKind Kind,
+                                          Loop *TheLoop, bool HasFunNoNaNAttr,
+                                          ReductionDescriptor &RedDes) {
+  if (Phi->getNumIncomingValues() != 2)
+    return false;
+
+  // Reduction variables are only found in the loop header block.
+  if (Phi->getParent() != TheLoop->getHeader())
+    return false;
+
+  // Obtain the reduction start value from the value that comes from the loop
+  // preheader.
+  Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
+
+  // ExitInstruction is the single value which is used outside the loop.
+  // We only allow for a single reduction value to be used outside the loop.
+  // This includes users of the reduction, variables (which form a cycle
+  // which ends in the phi node).
+  Instruction *ExitInstruction = nullptr;
+  // Indicates that we found a reduction operation in our scan.
+  bool FoundReduxOp = false;
+
+  // We start with the PHI node and scan for all of the users of this
+  // instruction. All users must be instructions that can be used as reduction
+  // variables (such as ADD). We must have a single out-of-block user. The cycle
+  // must include the original PHI.
+  bool FoundStartPHI = false;
+
+  // To recognize min/max patterns formed by a icmp select sequence, we store
+  // the number of instruction we saw from the recognized min/max pattern,
+  //  to make sure we only see exactly the two instructions.
+  unsigned NumCmpSelectPatternInst = 0;
+  ReductionInstDesc ReduxDesc(false, nullptr);
+
+  SmallPtrSet<Instruction *, 8> VisitedInsts;
+  SmallVector<Instruction *, 8> Worklist;
+  Worklist.push_back(Phi);
+  VisitedInsts.insert(Phi);
+
+  // A value in the reduction can be used:
+  //  - By the reduction:
+  //      - Reduction operation:
+  //        - One use of reduction value (safe).
+  //        - Multiple use of reduction value (not safe).
+  //      - PHI:
+  //        - All uses of the PHI must be the reduction (safe).
+  //        - Otherwise, not safe.
+  //  - By one instruction outside of the loop (safe).
+  //  - By further instructions outside of the loop (not safe).
+  //  - By an instruction that is not part of the reduction (not safe).
+  //    This is either:
+  //      * An instruction type other than PHI or the reduction operation.
+  //      * A PHI in the header other than the initial PHI.
+  while (!Worklist.empty()) {
+    Instruction *Cur = Worklist.back();
+    Worklist.pop_back();
+
+    // No Users.
+    // If the instruction has no users then this is a broken chain and can't be
+    // a reduction variable.
+    if (Cur->use_empty())
+      return false;
+
+    bool IsAPhi = isa<PHINode>(Cur);
+
+    // A header PHI use other than the original PHI.
+    if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
+      return false;
+
+    // Reductions of instructions such as Div, and Sub is only possible if the
+    // LHS is the reduction variable.
+    if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
+        !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
+        !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
+      return false;
+
+    // Any reduction instruction must be of one of the allowed kinds.
+    ReduxDesc = isReductionInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
+    if (!ReduxDesc.isReduction())
+      return false;
+
+    // A reduction operation must only have one use of the reduction value.
+    if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
+        hasMultipleUsesOf(Cur, VisitedInsts))
+      return false;
+
+    // All inputs to a PHI node must be a reduction value.
+    if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
+      return false;
+
+    if (Kind == RK_IntegerMinMax &&
+        (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
+      ++NumCmpSelectPatternInst;
+    if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
+      ++NumCmpSelectPatternInst;
+
+    // Check  whether we found a reduction operator.
+    FoundReduxOp |= !IsAPhi;
+
+    // Process users of current instruction. Push non-PHI nodes after PHI nodes
+    // onto the stack. This way we are going to have seen all inputs to PHI
+    // nodes once we get to them.
+    SmallVector<Instruction *, 8> NonPHIs;
+    SmallVector<Instruction *, 8> PHIs;
+    for (User *U : Cur->users()) {
+      Instruction *UI = cast<Instruction>(U);
+
+      // Check if we found the exit user.
+      BasicBlock *Parent = UI->getParent();
+      if (!TheLoop->contains(Parent)) {
+        // Exit if you find multiple outside users or if the header phi node is
+        // being used. In this case the user uses the value of the previous
+        // iteration, in which case we would loose "VF-1" iterations of the
+        // reduction operation if we vectorize.
+        if (ExitInstruction != nullptr || Cur == Phi)
+          return false;
+
+        // The instruction used by an outside user must be the last instruction
+        // before we feed back to the reduction phi. Otherwise, we loose VF-1
+        // operations on the value.
+        if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end())
+          return false;
+
+        ExitInstruction = Cur;
+        continue;
+      }
+
+      // Process instructions only once (termination). Each reduction cycle
+      // value must only be used once, except by phi nodes and min/max
+      // reductions which are represented as a cmp followed by a select.
+      ReductionInstDesc IgnoredVal(false, nullptr);
+      if (VisitedInsts.insert(UI).second) {
+        if (isa<PHINode>(UI))
+          PHIs.push_back(UI);
+        else
+          NonPHIs.push_back(UI);
+      } else if (!isa<PHINode>(UI) &&
+                 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
+                   !isa<SelectInst>(UI)) ||
+                  !isMinMaxSelectCmpPattern(UI, IgnoredVal).isReduction()))
+        return false;
+
+      // Remember that we completed the cycle.
+      if (UI == Phi)
+        FoundStartPHI = true;
+    }
+    Worklist.append(PHIs.begin(), PHIs.end());
+    Worklist.append(NonPHIs.begin(), NonPHIs.end());
+  }
+
+  // This means we have seen one but not the other instruction of the
+  // pattern or more than just a select and cmp.
+  if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+      NumCmpSelectPatternInst != 2)
+    return false;
+
+  if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
+    return false;
+
+  // We found a reduction var if we have reached the original phi node and we
+  // only have a single instruction with out-of-loop users.
+
+  // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
+  // is saved as part of the ReductionDescriptor.
+
+  // Save the description of this reduction variable.
+  ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
+                         ReduxDesc.getMinMaxKind());
+
+  RedDes = RD;
+
+  return true;
+}
+
+/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+/// pattern corresponding to a min(X, Y) or max(X, Y).
+ReductionInstDesc
+ReductionDescriptor::isMinMaxSelectCmpPattern(Instruction *I,
+                                              ReductionInstDesc &Prev) {
+
+  assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
+         "Expect a select instruction");
+  Instruction *Cmp = nullptr;
+  SelectInst *Select = nullptr;
+
+  // We must handle the select(cmp()) as a single instruction. Advance to the
+  // select.
+  if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
+    if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
+      return ReductionInstDesc(false, I);
+    return ReductionInstDesc(Select, Prev.getMinMaxKind());
+  }
+
+  // Only handle single use cases for now.
+  if (!(Select = dyn_cast<SelectInst>(I)))
+    return ReductionInstDesc(false, I);
+  if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
+      !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
+    return ReductionInstDesc(false, I);
+  if (!Cmp->hasOneUse())
+    return ReductionInstDesc(false, I);
+
+  Value *CmpLeft;
+  Value *CmpRight;
+
+  // Look for a min/max pattern.
+  if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, ReductionInstDesc::MRK_UIntMin);
+  else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, ReductionInstDesc::MRK_UIntMax);
+  else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, ReductionInstDesc::MRK_SIntMax);
+  else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, ReductionInstDesc::MRK_SIntMin);
+  else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMin);
+  else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMax);
+  else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMin);
+  else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMax);
+
+  return ReductionInstDesc(false, I);
+}
+
+ReductionInstDesc ReductionDescriptor::isReductionInstr(Instruction *I,
+                                                        ReductionKind Kind,
+                                                        ReductionInstDesc &Prev,
+                                                        bool HasFunNoNaNAttr) {
+  bool FP = I->getType()->isFloatingPointTy();
+  bool FastMath = FP && I->hasUnsafeAlgebra();
+  switch (I->getOpcode()) {
+  default:
+    return ReductionInstDesc(false, I);
+  case Instruction::PHI:
+    if (FP &&
+        (Kind != RK_FloatMult && Kind != RK_FloatAdd && Kind != RK_FloatMinMax))
+      return ReductionInstDesc(false, I);
+    return ReductionInstDesc(I, Prev.getMinMaxKind());
+  case Instruction::Sub:
+  case Instruction::Add:
+    return ReductionInstDesc(Kind == RK_IntegerAdd, I);
+  case Instruction::Mul:
+    return ReductionInstDesc(Kind == RK_IntegerMult, I);
+  case Instruction::And:
+    return ReductionInstDesc(Kind == RK_IntegerAnd, I);
+  case Instruction::Or:
+    return ReductionInstDesc(Kind == RK_IntegerOr, I);
+  case Instruction::Xor:
+    return ReductionInstDesc(Kind == RK_IntegerXor, I);
+  case Instruction::FMul:
+    return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
+  case Instruction::FSub:
+  case Instruction::FAdd:
+    return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
+  case Instruction::FCmp:
+  case Instruction::ICmp:
+  case Instruction::Select:
+    if (Kind != RK_IntegerMinMax &&
+        (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
+      return ReductionInstDesc(false, I);
+    return isMinMaxSelectCmpPattern(I, Prev);
+  }
+}
+
+bool ReductionDescriptor::hasMultipleUsesOf(
+    Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) {
+  unsigned NumUses = 0;
+  for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
+       ++Use) {
+    if (Insts.count(dyn_cast<Instruction>(*Use)))
+      ++NumUses;
+    if (NumUses > 1)
+      return true;
+  }
+
+  return false;
+}
+bool ReductionDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
+                                         ReductionDescriptor &RedDes) {
+
+  bool HasFunNoNaNAttr = false;
+  BasicBlock *Header = TheLoop->getHeader();
+  Function &F = *Header->getParent();
+  if (F.hasFnAttribute("no-nans-fp-math"))
+    HasFunNoNaNAttr =
+        F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
+
+  if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+    DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
+    DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) {
+    DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+    DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) {
+    DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr,
+                      RedDes)) {
+    DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
+    DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+    DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) {
+    DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  // Not a reduction of known type.
+  return false;
+}
+
+/// This function returns the identity element (or neutral element) for
+/// the operation K.
+Constant *ReductionDescriptor::getReductionIdentity(ReductionKind K, Type *Tp) {
+  switch (K) {
+  case RK_IntegerXor:
+  case RK_IntegerAdd:
+  case RK_IntegerOr:
+    // Adding, Xoring, Oring zero to a number does not change it.
+    return ConstantInt::get(Tp, 0);
+  case RK_IntegerMult:
+    // Multiplying a number by 1 does not change it.
+    return ConstantInt::get(Tp, 1);
+  case RK_IntegerAnd:
+    // AND-ing a number with an all-1 value does not change it.
+    return ConstantInt::get(Tp, -1, true);
+  case RK_FloatMult:
+    // Multiplying a number by 1 does not change it.
+    return ConstantFP::get(Tp, 1.0L);
+  case RK_FloatAdd:
+    // Adding zero to a number does not change it.
+    return ConstantFP::get(Tp, 0.0L);
+  default:
+    llvm_unreachable("Unknown reduction kind");
+  }
+}
+
+/// This function translates the reduction kind to an LLVM binary operator.
+unsigned ReductionDescriptor::getReductionBinOp(ReductionKind Kind) {
+  switch (Kind) {
+  case RK_IntegerAdd:
+    return Instruction::Add;
+  case RK_IntegerMult:
+    return Instruction::Mul;
+  case RK_IntegerOr:
+    return Instruction::Or;
+  case RK_IntegerAnd:
+    return Instruction::And;
+  case RK_IntegerXor:
+    return Instruction::Xor;
+  case RK_FloatMult:
+    return Instruction::FMul;
+  case RK_FloatAdd:
+    return Instruction::FAdd;
+  case RK_IntegerMinMax:
+    return Instruction::ICmp;
+  case RK_FloatMinMax:
+    return Instruction::FCmp;
+  default:
+    llvm_unreachable("Unknown reduction operation");
+  }
+}
+
+Value *
+ReductionDescriptor::createMinMaxOp(IRBuilder<> &Builder,
+                                    ReductionInstDesc::MinMaxReductionKind RK,
+                                    Value *Left, Value *Right) {
+  CmpInst::Predicate P = CmpInst::ICMP_NE;
+  switch (RK) {
+  default:
+    llvm_unreachable("Unknown min/max reduction kind");
+  case ReductionInstDesc::MRK_UIntMin:
+    P = CmpInst::ICMP_ULT;
+    break;
+  case ReductionInstDesc::MRK_UIntMax:
+    P = CmpInst::ICMP_UGT;
+    break;
+  case ReductionInstDesc::MRK_SIntMin:
+    P = CmpInst::ICMP_SLT;
+    break;
+  case ReductionInstDesc::MRK_SIntMax:
+    P = CmpInst::ICMP_SGT;
+    break;
+  case ReductionInstDesc::MRK_FloatMin:
+    P = CmpInst::FCMP_OLT;
+    break;
+  case ReductionInstDesc::MRK_FloatMax:
+    P = CmpInst::FCMP_OGT;
+    break;
+  }
+
+  Value *Cmp;
+  if (RK == ReductionInstDesc::MRK_FloatMin ||
+      RK == ReductionInstDesc::MRK_FloatMax)
+    Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
+  else
+    Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
+  Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
+  return Select;
+}
+
+bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
+                          ConstantInt *&StepValue) {
+  Type *PhiTy = Phi->getType();
+  // We only handle integer and pointer inductions variables.
+  if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+    return false;
+
+  // Check that the PHI is consecutive.
+  const SCEV *PhiScev = SE->getSCEV(Phi);
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+  if (!AR) {
+    DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+    return false;
+  }
+
+  const SCEV *Step = AR->getStepRecurrence(*SE);
+  // Calculate the pointer stride and check if it is consecutive.
+  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+  if (!C)
+    return false;
+
+  ConstantInt *CV = C->getValue();
+  if (PhiTy->isIntegerTy()) {
+    StepValue = CV;
+    return true;
+  }
+
+  assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+  Type *PointerElementType = PhiTy->getPointerElementType();
+  // The pointer stride cannot be determined if the pointer element type is not
+  // sized.
+  if (!PointerElementType->isSized())
+    return false;
+
+  const DataLayout &DL = Phi->getModule()->getDataLayout();
+  int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
+  int64_t CVSize = CV->getSExtValue();
+  if (CVSize % Size)
+    return false;
+  StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 04b9130..e0e0e90 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -14,17 +14,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/CFG.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 #include <algorithm>
 using namespace llvm;
@@ -32,6 +32,23 @@ using namespace llvm;
 #define DEBUG_TYPE "lower-switch"
 
 namespace {
+  struct IntRange {
+    int64_t Low, High;
+  };
+  // Return true iff R is covered by Ranges.
+  static bool IsInRanges(const IntRange &R,
+                         const std::vector<IntRange> &Ranges) {
+    // Note: Ranges must be sorted, non-overlapping and non-adjacent.
+
+    // Find the first range whose High field is >= R.High,
+    // then check if the Low field is <= R.Low. If so, we
+    // have a Range that covers R.
+    auto I = std::lower_bound(
+        Ranges.begin(), Ranges.end(), R,
+        [](const IntRange &A, const IntRange &B) { return A.High < B.High; });
+    return I != Ranges.end() && I->Low <= R.Low;
+  }
+
   /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
   /// instructions.
   class LowerSwitch : public FunctionPass {
@@ -50,13 +67,12 @@ namespace {
     }
 
     struct CaseRange {
-      Constant* Low;
-      Constant* High;
+      ConstantInt* Low;
+      ConstantInt* High;
       BasicBlock* BB;
 
-      CaseRange(Constant *low = nullptr, Constant *high = nullptr,
-                BasicBlock *bb = nullptr) :
-        Low(low), High(high), BB(bb) { }
+      CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb)
+          : Low(low), High(high), BB(bb) {}
     };
 
     typedef std::vector<CaseRange> CaseVector;
@@ -67,7 +83,8 @@ namespace {
     BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
                               ConstantInt *LowerBound, ConstantInt *UpperBound,
                               Value *Val, BasicBlock *Predecessor,
-                              BasicBlock *OrigBlock, BasicBlock *Default);
+                              BasicBlock *OrigBlock, BasicBlock *Default,
+                              const std::vector<IntRange> &UnreachableRanges);
     BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock,
                              BasicBlock *Default);
     unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
@@ -158,11 +175,16 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
 
     // Remove additional occurences coming from condensed cases and keep the
     // number of incoming values equal to the number of branches to SuccBB.
+    SmallVector<unsigned, 8> Indices;
     for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx)
       if (PN->getIncomingBlock(Idx) == OrigBB) {
-        PN->removeIncomingValue(Idx);
+        Indices.push_back(Idx);
         LocalNumMergedCases--;
       }
+    // Remove incoming values in the reverse order to prevent invalidating
+    // *successive* index.
+    for (auto III = Indices.rbegin(), IIE = Indices.rend(); III != IIE; ++III)
+      PN->removeIncomingValue(*III);
   }
 }
 
@@ -171,12 +193,12 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
 // LowerBound and UpperBound are used to keep track of the bounds for Val
 // that have already been checked by a block emitted by one of the previous
 // calls to switchConvert in the call stack.
-BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
-                                       ConstantInt *LowerBound,
-                                       ConstantInt *UpperBound, Value *Val,
-                                       BasicBlock *Predecessor,
-                                       BasicBlock *OrigBlock,
-                                       BasicBlock *Default) {
+BasicBlock *
+LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
+                           ConstantInt *UpperBound, Value *Val,
+                           BasicBlock *Predecessor, BasicBlock *OrigBlock,
+                           BasicBlock *Default,
+                           const std::vector<IntRange> &UnreachableRanges) {
   unsigned Size = End - Begin;
 
   if (Size == 1) {
@@ -203,32 +225,32 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
 
   CaseRange &Pivot = *(Begin + Mid);
   DEBUG(dbgs() << "Pivot ==> "
-               << cast<ConstantInt>(Pivot.Low)->getValue()
-               << " -" << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+               << Pivot.Low->getValue()
+               << " -" << Pivot.High->getValue() << "\n");
 
   // NewLowerBound here should never be the integer minimal value.
   // This is because it is computed from a case range that is never
   // the smallest, so there is always a case range that has at least
   // a smaller value.
-  ConstantInt *NewLowerBound = cast<ConstantInt>(Pivot.Low);
-  ConstantInt *NewUpperBound;
-
-  // If we don't have a Default block then it means that we can never
-  // have a value outside of a case range, so set the UpperBound to the highest
-  // value in the LHS part of the case ranges.
-  if (Default != nullptr) {
-    // Because NewLowerBound is never the smallest representable integer
-    // it is safe here to subtract one.
-    NewUpperBound = ConstantInt::get(NewLowerBound->getContext(),
-                                     NewLowerBound->getValue() - 1);
-  } else {
-    CaseItr LastLHS = LHS.begin() + LHS.size() - 1;
-    NewUpperBound = cast<ConstantInt>(LastLHS->High);
+  ConstantInt *NewLowerBound = Pivot.Low;
+
+  // Because NewLowerBound is never the smallest representable integer
+  // it is safe here to subtract one.
+  ConstantInt *NewUpperBound = ConstantInt::get(NewLowerBound->getContext(),
+                                                NewLowerBound->getValue() - 1);
+
+  if (!UnreachableRanges.empty()) {
+    // Check if the gap between LHS's highest and NewLowerBound is unreachable.
+    int64_t GapLow = LHS.back().High->getSExtValue() + 1;
+    int64_t GapHigh = NewLowerBound->getSExtValue() - 1;
+    IntRange Gap = { GapLow, GapHigh };
+    if (GapHigh >= GapLow && IsInRanges(Gap, UnreachableRanges))
+      NewUpperBound = LHS.back().High;
   }
 
   DEBUG(dbgs() << "LHS Bounds ==> ";
         if (LowerBound) {
-          dbgs() << cast<ConstantInt>(LowerBound)->getSExtValue();
+          dbgs() << LowerBound->getSExtValue();
         } else {
           dbgs() << "NONE";
         }
@@ -236,7 +258,7 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
         dbgs() << "RHS Bounds ==> ";
         dbgs() << NewLowerBound->getSExtValue() << " - ";
         if (UpperBound) {
-          dbgs() << cast<ConstantInt>(UpperBound)->getSExtValue() << "\n";
+          dbgs() << UpperBound->getSExtValue() << "\n";
         } else {
           dbgs() << "NONE\n";
         });
@@ -251,10 +273,10 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
 
   BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound,
                                       NewUpperBound, Val, NewNode, OrigBlock,
-                                      Default);
+                                      Default, UnreachableRanges);
   BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound,
                                       UpperBound, Val, NewNode, OrigBlock,
-                                      Default);
+                                      Default, UnreachableRanges);
 
   Function::iterator FI = OrigBlock;
   F->getBasicBlockList().insert(++FI, NewNode);
@@ -287,11 +309,11 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
                         Leaf.Low, "SwitchLeaf");
   } else {
     // Make range comparison
-    if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
+    if (Leaf.Low->isMinValue(true /*isSigned*/)) {
       // Val >= Min && Val <= Hi --> Val <= Hi
       Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
                           "SwitchLeaf");
-    } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
+    } else if (Leaf.Low->isZero()) {
       // Val >= 0 && Val <= Hi --> Val <=u Hi
       Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
                           "SwitchLeaf");      
@@ -316,8 +338,8 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
   for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
     PHINode* PN = cast<PHINode>(I);
     // Remove all but one incoming entries from the cluster
-    uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() -
-                     cast<ConstantInt>(Leaf.Low)->getSExtValue();    
+    uint64_t Range = Leaf.High->getSExtValue() -
+                     Leaf.Low->getSExtValue();
     for (uint64_t j = 0; j < Range; ++j) {
       PN->removeIncomingValue(OrigBlock);
     }
@@ -345,8 +367,8 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
   if (Cases.size()>=2)
     for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
          J != Cases.end();) {
-      int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
-      int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+      int64_t nextValue = J->Low->getSExtValue();
+      int64_t currentValue = I->High->getSExtValue();
       BasicBlock* nextBB = J->BB;
       BasicBlock* currentBB = I->BB;
 
@@ -379,26 +401,102 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
   Value *Val = SI->getCondition();  // The value we are switching on...
   BasicBlock* Default = SI->getDefaultDest();
 
-  // If there is only the default destination, don't bother with the code below.
+  // If there is only the default destination, just branch.
   if (!SI->getNumCases()) {
-    BranchInst::Create(SI->getDefaultDest(), CurBlock);
-    CurBlock->getInstList().erase(SI);
+    BranchInst::Create(Default, CurBlock);
+    SI->eraseFromParent();
     return;
   }
 
-  const bool DefaultIsUnreachable =
-      Default->size() == 1 && isa<UnreachableInst>(Default->getTerminator());
+  // Prepare cases vector.
+  CaseVector Cases;
+  unsigned numCmps = Clusterify(Cases, SI);
+  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+               << ". Total compares: " << numCmps << "\n");
+  DEBUG(dbgs() << "Cases: " << Cases << "\n");
+  (void)numCmps;
+
+  ConstantInt *LowerBound = nullptr;
+  ConstantInt *UpperBound = nullptr;
+  std::vector<IntRange> UnreachableRanges;
+
+  if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) {
+    // Make the bounds tightly fitted around the case value range, becase we
+    // know that the value passed to the switch must be exactly one of the case
+    // values.
+    assert(!Cases.empty());
+    LowerBound = Cases.front().Low;
+    UpperBound = Cases.back().High;
+
+    DenseMap<BasicBlock *, unsigned> Popularity;
+    unsigned MaxPop = 0;
+    BasicBlock *PopSucc = nullptr;
+
+    IntRange R = { INT64_MIN, INT64_MAX };
+    UnreachableRanges.push_back(R);
+    for (const auto &I : Cases) {
+      int64_t Low = I.Low->getSExtValue();
+      int64_t High = I.High->getSExtValue();
+
+      IntRange &LastRange = UnreachableRanges.back();
+      if (LastRange.Low == Low) {
+        // There is nothing left of the previous range.
+        UnreachableRanges.pop_back();
+      } else {
+        // Terminate the previous range.
+        assert(Low > LastRange.Low);
+        LastRange.High = Low - 1;
+      }
+      if (High != INT64_MAX) {
+        IntRange R = { High + 1, INT64_MAX };
+        UnreachableRanges.push_back(R);
+      }
+
+      // Count popularity.
+      int64_t N = High - Low + 1;
+      unsigned &Pop = Popularity[I.BB];
+      if ((Pop += N) > MaxPop) {
+        MaxPop = Pop;
+        PopSucc = I.BB;
+      }
+    }
+#ifndef NDEBUG
+    /* UnreachableRanges should be sorted and the ranges non-adjacent. */
+    for (auto I = UnreachableRanges.begin(), E = UnreachableRanges.end();
+         I != E; ++I) {
+      assert(I->Low <= I->High);
+      auto Next = I + 1;
+      if (Next != E) {
+        assert(Next->Low > I->High);
+      }
+    }
+#endif
+
+    // Use the most popular block as the new default, reducing the number of
+    // cases.
+    assert(MaxPop > 0 && PopSucc);
+    Default = PopSucc;
+    for (CaseItr I = Cases.begin(); I != Cases.end();) {
+      if (I->BB == PopSucc)
+        I = Cases.erase(I);
+      else
+        ++I;
+    }
+
+    // If there are no cases left, just branch.
+    if (Cases.empty()) {
+      BranchInst::Create(Default, CurBlock);
+      SI->eraseFromParent();
+      return;
+    }
+  }
+
   // Create a new, empty default block so that the new hierarchy of
   // if-then statements go to this and the PHI nodes are happy.
-  // if the default block is set as an unreachable we avoid creating one
-  // because will never be a valid target.
-  BasicBlock *NewDefault = nullptr;
-  if (!DefaultIsUnreachable) {
-    NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
-    F->getBasicBlockList().insert(Default, NewDefault);
-
-    BranchInst::Create(Default, NewDefault);
-  }
+  BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
+  F->getBasicBlockList().insert(Default, NewDefault);
+  BranchInst::Create(Default, NewDefault);
+
   // If there is an entry in any PHI nodes for the default edge, make sure
   // to update them as well.
   for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
@@ -408,40 +506,18 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
     PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
   }
 
-  // Prepare cases vector.
-  CaseVector Cases;
-  unsigned numCmps = Clusterify(Cases, SI);
-
-  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
-               << ". Total compares: " << numCmps << "\n");
-  DEBUG(dbgs() << "Cases: " << Cases << "\n");
-  (void)numCmps;
-  
-  ConstantInt *UpperBound = nullptr;
-  ConstantInt *LowerBound = nullptr;
-
-  // Optimize the condition where Default is an unreachable block. In this case
-  // we can make the bounds tightly fitted around the case value ranges,
-  // because we know that the value passed to the switch should always be
-  // exactly one of the case values.
-  if (DefaultIsUnreachable) {
-    CaseItr LastCase = Cases.begin() + Cases.size() - 1;
-    UpperBound = cast<ConstantInt>(LastCase->High);
-    LowerBound = cast<ConstantInt>(Cases.begin()->Low);
-  }
   BasicBlock *SwitchBlock =
       switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
-                    OrigBlock, OrigBlock, NewDefault);
+                    OrigBlock, OrigBlock, NewDefault, UnreachableRanges);
 
   // Branch to our shiny new if-then stuff...
   BranchInst::Create(SwitchBlock, OrigBlock);
 
   // We are now done with the switch instruction, delete it.
+  BasicBlock *OldDefault = SI->getDefaultDest();
   CurBlock->getInstList().erase(SI);
 
-  pred_iterator PI = pred_begin(Default), E = pred_end(Default);
-  // If the Default block has no more predecessors just remove it
-  if (PI == E) {
-    DeleteDeadBlock(Default);
-  }
+  // If the Default block has no more predecessors just remove it.
+  if (pred_begin(OldDefault) == pred_end(OldDefault))
+    DeleteDeadBlock(OldDefault);
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 35c701e..d69a81e 100644
--- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -17,6 +17,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -93,3 +94,34 @@ llvm::collectUsedGlobalVariables(Module &M, SmallPtrSetImpl<GlobalValue *> &Set,
   }
   return GV;
 }
+
+Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
+  if (isa<Function>(FuncOrBitcast))
+    return cast<Function>(FuncOrBitcast);
+  FuncOrBitcast->dump();
+  std::string Err;
+  raw_string_ostream Stream(Err);
+  Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast;
+  report_fatal_error(Err);
+}
+
+std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
+    Module &M, StringRef CtorName, StringRef InitName,
+    ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs) {
+  assert(!InitName.empty() && "Expected init function name");
+  assert(InitArgTypes.size() == InitArgTypes.size() &&
+         "Sanitizer's init function expects different number of arguments");
+  Function *Ctor = Function::Create(
+      FunctionType::get(Type::getVoidTy(M.getContext()), false),
+      GlobalValue::InternalLinkage, CtorName, &M);
+  BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
+  IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB));
+  Function *InitFunction =
+      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+          InitName, FunctionType::get(IRB.getVoidTy(), InitArgTypes, false),
+          AttributeSet()));
+  InitFunction->setLinkage(Function::ExternalLinkage);
+  IRB.CreateCall(InitFunction, InitArgs);
+  return std::make_pair(Ctor, InitFunction);
+}
+
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index dabadb7..623dbc9 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -13,16 +13,6 @@
 // traversing the function in depth-first order to rewrite loads and stores as
 // appropriate.
 //
-// The algorithm used here is based on:
-//
-//   Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
-//   In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
-//   Programming Languages
-//   POPL '95. ACM, New York, NY, 62-73.
-//
-// It has been modified to not explicitly use the DJ graph data structure and to
-// directly compute pruned SSA using per-variable liveness information.
-//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -34,6 +24,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
@@ -45,9 +36,9 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
-#include <queue>
 using namespace llvm;
 
 #define DEBUG_TYPE "mem2reg"
@@ -274,9 +265,6 @@ struct PromoteMem2Reg {
   /// behavior.
   DenseMap<BasicBlock *, unsigned> BBNumbers;
 
-  /// Maps DomTreeNodes to their level in the dominator tree.
-  DenseMap<DomTreeNode *, unsigned> DomLevels;
-
   /// Lazily compute the number of predecessors a block has.
   DenseMap<const BasicBlock *, unsigned> BBNumPreds;
 
@@ -303,8 +291,6 @@ private:
     return NP - 1;
   }
 
-  void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
-                               AllocaInfo &Info);
   void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
                            const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
                            SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
@@ -531,6 +517,7 @@ void PromoteMem2Reg::run() {
 
   AllocaInfo Info;
   LargeBlockInfo LBI;
+  IDFCalculator IDF(DT);
 
   for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
     AllocaInst *AI = Allocas[AllocaNum];
@@ -578,31 +565,12 @@ void PromoteMem2Reg::run() {
       continue;
     }
 
-    // If we haven't computed dominator tree levels, do so now.
-    if (DomLevels.empty()) {
-      SmallVector<DomTreeNode *, 32> Worklist;
-
-      DomTreeNode *Root = DT.getRootNode();
-      DomLevels[Root] = 0;
-      Worklist.push_back(Root);
-
-      while (!Worklist.empty()) {
-        DomTreeNode *Node = Worklist.pop_back_val();
-        unsigned ChildLevel = DomLevels[Node] + 1;
-        for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
-             CI != CE; ++CI) {
-          DomLevels[*CI] = ChildLevel;
-          Worklist.push_back(*CI);
-        }
-      }
-    }
-
     // If we haven't computed a numbering for the BB's in the function, do so
     // now.
     if (BBNumbers.empty()) {
       unsigned ID = 0;
-      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
-        BBNumbers[I] = ID++;
+      for (auto &BB : F)
+        BBNumbers[&BB] = ID++;
     }
 
     // If we have an AST to keep updated, remember some pointer value that is
@@ -621,7 +589,34 @@ void PromoteMem2Reg::run() {
     // the standard SSA construction algorithm.  Determine which blocks need PHI
     // nodes and see if we can optimize out some work by avoiding insertion of
     // dead phi nodes.
-    DetermineInsertionPoint(AI, AllocaNum, Info);
+
+
+    // Unique the set of defining blocks for efficient lookup.
+    SmallPtrSet<BasicBlock *, 32> DefBlocks;
+    DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+
+    // Determine which blocks the value is live in.  These are blocks which lead
+    // to uses.
+    SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
+    ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+    // At this point, we're committed to promoting the alloca using IDF's, and
+    // the standard SSA construction algorithm.  Determine which blocks need phi
+    // nodes and see if we can optimize out some work by avoiding insertion of
+    // dead phi nodes.
+    IDF.setLiveInBlocks(LiveInBlocks);
+    IDF.setDefiningBlocks(DefBlocks);
+    SmallVector<BasicBlock *, 32> PHIBlocks;
+    IDF.calculate(PHIBlocks);
+    if (PHIBlocks.size() > 1)
+      std::sort(PHIBlocks.begin(), PHIBlocks.end(),
+                [this](BasicBlock *A, BasicBlock *B) {
+                  return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+                });
+
+    unsigned CurrentVersion = 0;
+    for (unsigned i = 0, e = PHIBlocks.size(); i != e; ++i)
+      QueuePhiNode(PHIBlocks[i], AllocaNum, CurrentVersion);
   }
 
   if (Allocas.empty())
@@ -667,6 +662,8 @@ void PromoteMem2Reg::run() {
     A->eraseFromParent();
   }
 
+  const DataLayout &DL = F.getParent()->getDataLayout();
+
   // Remove alloca's dbg.declare instrinsics from the function.
   for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
     if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
@@ -691,7 +688,7 @@ void PromoteMem2Reg::run() {
       PHINode *PN = I->second;
 
       // If this PHI node merges one value and/or undefs, get the value.
-      if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT, AC)) {
+      if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) {
         if (AST && PN->getType()->isPointerTy())
           AST->deleteValue(PN);
         PN->replaceAllUsesWith(V);
@@ -841,95 +838,6 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
   }
 }
 
-/// At this point, we're committed to promoting the alloca using IDF's, and the
-/// standard SSA construction algorithm.  Determine which blocks need phi nodes
-/// and see if we can optimize out some work by avoiding insertion of dead phi
-/// nodes.
-void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
-                                             AllocaInfo &Info) {
-  // Unique the set of defining blocks for efficient lookup.
-  SmallPtrSet<BasicBlock *, 32> DefBlocks;
-  DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
-
-  // Determine which blocks the value is live in.  These are blocks which lead
-  // to uses.
-  SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
-  ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
-
-  // Use a priority queue keyed on dominator tree level so that inserted nodes
-  // are handled from the bottom of the dominator tree upwards.
-  typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
-  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
-                              less_second> IDFPriorityQueue;
-  IDFPriorityQueue PQ;
-
-  for (BasicBlock *BB : DefBlocks) {
-    if (DomTreeNode *Node = DT.getNode(BB))
-      PQ.push(std::make_pair(Node, DomLevels[Node]));
-  }
-
-  SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks;
-  SmallPtrSet<DomTreeNode *, 32> Visited;
-  SmallVector<DomTreeNode *, 32> Worklist;
-  while (!PQ.empty()) {
-    DomTreeNodePair RootPair = PQ.top();
-    PQ.pop();
-    DomTreeNode *Root = RootPair.first;
-    unsigned RootLevel = RootPair.second;
-
-    // Walk all dominator tree children of Root, inspecting their CFG edges with
-    // targets elsewhere on the dominator tree. Only targets whose level is at
-    // most Root's level are added to the iterated dominance frontier of the
-    // definition set.
-
-    Worklist.clear();
-    Worklist.push_back(Root);
-
-    while (!Worklist.empty()) {
-      DomTreeNode *Node = Worklist.pop_back_val();
-      BasicBlock *BB = Node->getBlock();
-
-      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
-           ++SI) {
-        DomTreeNode *SuccNode = DT.getNode(*SI);
-
-        // Quickly skip all CFG edges that are also dominator tree edges instead
-        // of catching them below.
-        if (SuccNode->getIDom() == Node)
-          continue;
-
-        unsigned SuccLevel = DomLevels[SuccNode];
-        if (SuccLevel > RootLevel)
-          continue;
-
-        if (!Visited.insert(SuccNode).second)
-          continue;
-
-        BasicBlock *SuccBB = SuccNode->getBlock();
-        if (!LiveInBlocks.count(SuccBB))
-          continue;
-
-        DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
-        if (!DefBlocks.count(SuccBB))
-          PQ.push(std::make_pair(SuccNode, SuccLevel));
-      }
-
-      for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
-           ++CI) {
-        if (!Visited.count(*CI))
-          Worklist.push_back(*CI);
-      }
-    }
-  }
-
-  if (DFBlocks.size() > 1)
-    std::sort(DFBlocks.begin(), DFBlocks.end());
-
-  unsigned CurrentVersion = 0;
-  for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
-    QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
-}
-
 /// \brief Queue a phi-node to be added to a basic-block for a specific Alloca.
 ///
 /// Returns true if there wasn't already a phi-node for that variable
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 3fcb789..88b39dd 100644
--- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -19,6 +19,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -150,12 +151,13 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
                                          ProtoName, &BB->front());
 
   // Fill in all the predecessors of the PHI.
-  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
-    InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
+  for (const auto &PredValue : PredValues)
+    InsertedPHI->addIncoming(PredValue.second, PredValue.first);
 
   // See if the PHI node can be merged to a single value.  This can happen in
   // loop cases when we get a PHI of itself and one other value.
-  if (Value *V = SimplifyInstruction(InsertedPHI)) {
+  if (Value *V =
+          SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) {
     InsertedPHI->eraseFromParent();
     return V;
   }
@@ -245,8 +247,7 @@ public:
     // but it is relatively slow.  If we already have PHI nodes in this
     // block, walk one of them to get the predecessor list instead.
     if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
-      for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
-        Preds->push_back(SomePhi->getIncomingBlock(PI));
+      Preds->append(SomePhi->block_begin(), SomePhi->block_end());
     } else {
       for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
         Preds->push_back(*PI);
@@ -321,12 +322,12 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
 //===----------------------------------------------------------------------===//
 
 LoadAndStorePromoter::
-LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+LoadAndStorePromoter(ArrayRef<const Instruction*> Insts,
                      SSAUpdater &S, StringRef BaseName) : SSA(S) {
   if (Insts.empty()) return;
   
-  Value *SomeVal;
-  if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
+  const Value *SomeVal;
+  if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
     SomeVal = LI;
   else
     SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
@@ -344,20 +345,17 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
   // This is important because we have to handle multiple defs/uses in a block
   // ourselves: SSAUpdater is purely for cross-block references.
   DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock;
-  
-  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
-    Instruction *User = Insts[i];
+
+  for (Instruction *User : Insts)
     UsesByBlock[User->getParent()].push_back(User);
-  }
   
   // Okay, now we can iterate over all the blocks in the function with uses,
   // processing them.  Keep track of which loads are loading a live-in value.
   // Walk the uses in the use-list order to be determinstic.
   SmallVector<LoadInst*, 32> LiveInLoads;
   DenseMap<Value*, Value*> ReplacedLoads;
-  
-  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
-    Instruction *User = Insts[i];
+
+  for (Instruction *User : Insts) {
     BasicBlock *BB = User->getParent();
     TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB];
     
@@ -380,8 +378,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
     
     // Otherwise, check to see if this block is all loads.
     bool HasStore = false;
-    for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
-      if (isa<StoreInst>(BlockUses[i])) {
+    for (Instruction *I : BlockUses) {
+      if (isa<StoreInst>(I)) {
         HasStore = true;
         break;
       }
@@ -391,8 +389,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
     // efficient way to tell which on is first in the block and don't want to
     // scan large blocks, so just add all loads as live ins.
     if (!HasStore) {
-      for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
-        LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+      for (Instruction *I : BlockUses)
+        LiveInLoads.push_back(cast<LoadInst>(I));
       BlockUses.clear();
       continue;
     }
@@ -403,8 +401,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
     // block is a load, then it uses the live in value.  The last store defines
     // the live out value.  We handle this by doing a linear scan of the block.
     Value *StoredValue = nullptr;
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
-      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+    for (Instruction &I : *BB) {
+      if (LoadInst *L = dyn_cast<LoadInst>(&I)) {
         // If this is a load from an unrelated pointer, ignore it.
         if (!isInstInList(L, Insts)) continue;
         
@@ -419,8 +417,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
         }
         continue;
       }
-      
-      if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
+
+      if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
         // If this is a store to an unrelated pointer, ignore it.
         if (!isInstInList(SI, Insts)) continue;
         updateDebugInfo(SI);
@@ -438,8 +436,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
   
   // Okay, now we rewrite all loads that use live-in values in the loop,
   // inserting PHI nodes as necessary.
-  for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
-    LoadInst *ALoad = LiveInLoads[i];
+  for (LoadInst *ALoad : LiveInLoads) {
     Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
     replaceLoadWithValue(ALoad, NewVal);
 
@@ -454,9 +451,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
   
   // Now that everything is rewritten, delete the old instructions from the
   // function.  They should all be dead now.
-  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
-    Instruction *User = Insts[i];
-    
+  for (Instruction *User : Insts) {
     // If this is a load that still has uses, then the load must have been added
     // as a live value in the SSAUpdate data structure for a block (e.g. because
     // the loaded value was stored later).  In this case, we need to recursively
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index f6867c2..60ac271 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -53,9 +53,13 @@ using namespace PatternMatch;
 
 #define DEBUG_TYPE "simplifycfg"
 
+// Chosen as 2 so as to be cheap, but still to have enough power to fold
+// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
+// To catch this, we need to fold a compare and a select, hence '2' being the
+// minimum reasonable default.
 static cl::opt<unsigned>
-PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
-   cl::desc("Control the amount of phi node folding to perform (default = 1)"));
+PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2),
+   cl::desc("Control the amount of phi node folding to perform (default = 2)"));
 
 static cl::opt<bool>
 DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
@@ -106,8 +110,8 @@ namespace {
 
 class SimplifyCFGOpt {
   const TargetTransformInfo &TTI;
+  const DataLayout &DL;
   unsigned BonusInstThreshold;
-  const DataLayout *const DL;
   AssumptionCache *AC;
   Value *isValueEqualityComparison(TerminatorInst *TI);
   BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
@@ -127,9 +131,9 @@ class SimplifyCFGOpt {
   bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
 
 public:
-  SimplifyCFGOpt(const TargetTransformInfo &TTI, unsigned BonusInstThreshold,
-                 const DataLayout *DL, AssumptionCache *AC)
-      : TTI(TTI), BonusInstThreshold(BonusInstThreshold), DL(DL), AC(AC) {}
+  SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
+                 unsigned BonusInstThreshold, AssumptionCache *AC)
+      : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {}
   bool run(BasicBlock *BB);
 };
 }
@@ -216,45 +220,15 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
 }
 
 /// ComputeSpeculationCost - Compute an abstract "cost" of speculating the
-/// given instruction, which is assumed to be safe to speculate. 1 means
-/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive.
-static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) {
-  assert(isSafeToSpeculativelyExecute(I, DL) &&
+/// given instruction, which is assumed to be safe to speculate. TCC_Free means
+/// cheap, TCC_Basic means less cheap, and TCC_Expensive means prohibitively
+/// expensive.
+static unsigned ComputeSpeculationCost(const User *I,
+                                       const TargetTransformInfo &TTI) {
+  assert(isSafeToSpeculativelyExecute(I) &&
          "Instruction is not safe to speculatively execute!");
-  switch (Operator::getOpcode(I)) {
-  default:
-    // In doubt, be conservative.
-    return UINT_MAX;
-  case Instruction::GetElementPtr:
-    // GEPs are cheap if all indices are constant.
-    if (!cast<GEPOperator>(I)->hasAllConstantIndices())
-      return UINT_MAX;
-    return 1;
-  case Instruction::ExtractValue:
-  case Instruction::Load:
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor:
-  case Instruction::Shl:
-  case Instruction::LShr:
-  case Instruction::AShr:
-  case Instruction::ICmp:
-  case Instruction::Trunc:
-  case Instruction::ZExt:
-  case Instruction::SExt:
-  case Instruction::BitCast:
-  case Instruction::ExtractElement:
-  case Instruction::InsertElement:
-    return 1; // These are all cheap.
-
-  case Instruction::Call:
-  case Instruction::Select:
-    return 2;
-  }
+  return TTI.getUserCost(I);
 }
-
 /// DominatesMergePoint - If we have a merge point of an "if condition" as
 /// accepted above, return true if the specified value dominates the block.  We
 /// don't handle the true generality of domination here, just a special case
@@ -275,7 +249,7 @@ static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) {
 static bool DominatesMergePoint(Value *V, BasicBlock *BB,
                                 SmallPtrSetImpl<Instruction*> *AggressiveInsts,
                                 unsigned &CostRemaining,
-                                const DataLayout *DL) {
+                                const TargetTransformInfo &TTI) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) {
     // Non-instructions all dominate instructions, but not all constantexprs
@@ -308,10 +282,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   // Okay, it looks like the instruction IS in the "condition".  Check to
   // see if it's a cheap instruction to unconditionally compute, and if it
   // only uses stuff defined outside of the condition.  If so, hoist it out.
-  if (!isSafeToSpeculativelyExecute(I, DL))
+  if (!isSafeToSpeculativelyExecute(I))
     return false;
 
-  unsigned Cost = ComputeSpeculationCost(I, DL);
+  unsigned Cost = ComputeSpeculationCost(I, TTI);
 
   if (Cost > CostRemaining)
     return false;
@@ -321,7 +295,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   // Okay, we can only really hoist these out if their operands do
   // not take us over the cost threshold.
   for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
-    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL))
+    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI))
       return false;
   // Okay, it's safe to do this!  Remember this instruction.
   AggressiveInsts->insert(I);
@@ -330,15 +304,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
 
 /// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
 /// and PointerNullValue. Return NULL if value is not a constant int.
-static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) {
+static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
   // Normal constant int.
   ConstantInt *CI = dyn_cast<ConstantInt>(V);
-  if (CI || !DL || !isa<Constant>(V) || !V->getType()->isPointerTy())
+  if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
     return CI;
 
   // This is some kind of pointer constant. Turn it into a pointer-sized
   // ConstantInt if possible.
-  IntegerType *PtrTy = cast<IntegerType>(DL->getIntPtrType(V->getType()));
+  IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
 
   // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
   if (isa<ConstantPointerNull>(V))
@@ -371,23 +345,22 @@ namespace {
 /// while for a chain of '&&' it will build the set elements that make the test
 /// fail.
 struct ConstantComparesGatherer {
-
+  const DataLayout &DL;
   Value *CompValue; /// Value found for the switch comparison
   Value *Extra;     /// Extra clause to be checked before the switch
   SmallVector<ConstantInt *, 8> Vals; /// Set of integers to match in switch
   unsigned UsedICmps; /// Number of comparisons matched in the and/or chain
 
   /// Construct and compute the result for the comparison instruction Cond
-  ConstantComparesGatherer(Instruction *Cond, const DataLayout *DL)
-      : CompValue(nullptr), Extra(nullptr), UsedICmps(0) {
-    gather(Cond, DL);
+  ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL)
+      : DL(DL), CompValue(nullptr), Extra(nullptr), UsedICmps(0) {
+    gather(Cond);
   }
 
   /// Prevent copy
-  ConstantComparesGatherer(const ConstantComparesGatherer &)
-      LLVM_DELETED_FUNCTION;
+  ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
   ConstantComparesGatherer &
-  operator=(const ConstantComparesGatherer &) LLVM_DELETED_FUNCTION;
+  operator=(const ConstantComparesGatherer &) = delete;
 
 private:
 
@@ -406,7 +379,7 @@ private:
   /// against is placed in CompValue.
   /// If CompValue is already set, the function is expected to fail if a match
   /// is found but the value compared to is different.
-  bool matchInstruction(Instruction *I, const DataLayout *DL, bool isEQ) {
+  bool matchInstruction(Instruction *I, bool isEQ) {
     // If this is an icmp against a constant, handle this as one of the cases.
     ICmpInst *ICI;
     ConstantInt *C;
@@ -448,8 +421,8 @@ private:
     }
 
     // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
-    ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(),
-                                                       C->getValue());
+    ConstantRange Span = ConstantRange::makeAllowedICmpRegion(
+        ICI->getPredicate(), C->getValue());
 
     // Shift the range if the compare is fed by an add. This is the range
     // compare idiom as emitted by instcombine.
@@ -488,7 +461,7 @@ private:
   /// the value being compared, and stick the list constants into the Vals
   /// vector.
   /// One "Extra" case is allowed to differ from the other.
-  void gather(Value *V, const DataLayout *DL) {
+  void gather(Value *V) {
     Instruction *I = dyn_cast<Instruction>(V);
     bool isEQ = (I->getOpcode() == Instruction::Or);
 
@@ -510,7 +483,7 @@ private:
         }
 
         // Try to match the current instruction
-        if (matchInstruction(I, DL, isEQ))
+        if (matchInstruction(I, isEQ))
           // Match succeed, continue the loop
           continue;
       }
@@ -558,15 +531,16 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
       CV = SI->getCondition();
   } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
     if (BI->isConditional() && BI->getCondition()->hasOneUse())
-      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
         if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
           CV = ICI->getOperand(0);
+      }
 
   // Unwrap any lossless ptrtoint cast.
-  if (DL && CV) {
+  if (CV) {
     if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
       Value *Ptr = PTII->getPointerOperand();
-      if (PTII->getType() == DL->getIntPtrType(Ptr->getType()))
+      if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
         CV = Ptr;
     }
   }
@@ -1007,8 +981,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
       Builder.SetInsertPoint(PTI);
       // Convert pointer to int before we switch.
       if (CV->getType()->isPointerTy()) {
-        assert(DL && "Cannot switch on pointer without DataLayout");
-        CV = Builder.CreatePtrToInt(CV, DL->getIntPtrType(CV->getType()),
+        CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()),
                                     "magicptr");
       }
 
@@ -1079,7 +1052,8 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
 /// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
 /// BB2, hoist any common code in the two blocks up into the branch block.  The
 /// caller of this function guarantees that BI's block dominates BB1 and BB2.
-static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) {
+static bool HoistThenElseCodeToIf(BranchInst *BI,
+                                  const TargetTransformInfo &TTI) {
   // This does very trivial matching, with limited scanning, to find identical
   // instructions in the two blocks.  In particular, we don't want to get into
   // O(M*N) situations here where M and N are the sizes of BB1 and BB2.  As
@@ -1114,6 +1088,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) {
     if (isa<TerminatorInst>(I1))
       goto HoistTerminator;
 
+    if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
+      return Changed;
+
     // For a normal instruction, we just move one to right before the branch,
     // then replace all uses of the other with the first.  Finally, we remove
     // the now redundant second instruction.
@@ -1167,9 +1144,9 @@ HoistTerminator:
           passingValueIsAlwaysUndefined(BB2V, PN))
        return Changed;
 
-      if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V, DL))
+      if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
         return Changed;
-      if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V, DL))
+      if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
         return Changed;
     }
   }
@@ -1489,7 +1466,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
 ///
 /// \returns true if the conditional block is removed.
 static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
-                                   const DataLayout *DL) {
+                                   const TargetTransformInfo &TTI) {
   // Be conservative for now. FP select instruction can often be expensive.
   Value *BrCond = BI->getCondition();
   if (isa<FCmpInst>(BrCond))
@@ -1525,20 +1502,20 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
     if (isa<DbgInfoIntrinsic>(I))
       continue;
 
-    // Only speculatively execution a single instruction (not counting the
+    // Only speculatively execute a single instruction (not counting the
     // terminator) for now.
     ++SpeculationCost;
     if (SpeculationCost > 1)
       return false;
 
     // Don't hoist the instruction if it's unsafe or expensive.
-    if (!isSafeToSpeculativelyExecute(I, DL) &&
-        !(HoistCondStores &&
-          (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
-                                                         EndBB))))
+    if (!isSafeToSpeculativelyExecute(I) &&
+        !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
+                                  I, BB, ThenBB, EndBB))))
       return false;
     if (!SpeculatedStoreValue &&
-        ComputeSpeculationCost(I, DL) > PHINodeFoldingThreshold)
+        ComputeSpeculationCost(I, TTI) >
+            PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
       return false;
 
     // Store the store speculation candidate.
@@ -1594,12 +1571,14 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
     if (!OrigCE && !ThenCE)
       continue; // Known safe and cheap.
 
-    if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) ||
-        (OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL)))
+    if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
+        (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
       return false;
-    unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL) : 0;
-    unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL) : 0;
-    if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold)
+    unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
+    unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
+    unsigned MaxCost = 2 * PHINodeFoldingThreshold *
+      TargetTransformInfo::TCC_Basic;
+    if (OrigCost + ThenCost > MaxCost)
       return false;
 
     // Account for the cost of an unfolded ConstantExpr which could end up
@@ -1706,7 +1685,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
 /// that is defined in the same block as the branch and if any PHI entries are
 /// constants, thread edges corresponding to that entry to be branches to their
 /// ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
   BasicBlock *BB = BI->getParent();
   PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
   // NOTE: we currently cannot transform this case if the PHI node is used
@@ -1804,7 +1783,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) {
 
 /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
 /// PHI node, see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
+static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
+                                const DataLayout &DL) {
   // Ok, this is a two entry PHI node.  Check to see if this is a simple "if
   // statement", which has a very simple dominance structure.  Basically, we
   // are trying to find the condition that is being branched on, which
@@ -1835,6 +1815,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
   SmallPtrSet<Instruction*, 4> AggressiveInsts;
   unsigned MaxCostVal0 = PHINodeFoldingThreshold,
            MaxCostVal1 = PHINodeFoldingThreshold;
+  MaxCostVal0 *= TargetTransformInfo::TCC_Basic;
+  MaxCostVal1 *= TargetTransformInfo::TCC_Basic;
 
   for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
     PHINode *PN = cast<PHINode>(II++);
@@ -1845,9 +1827,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
     }
 
     if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
-                             MaxCostVal0, DL) ||
+                             MaxCostVal0, TTI) ||
         !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
-                             MaxCostVal1, DL))
+                             MaxCostVal1, TTI))
       return false;
   }
 
@@ -2067,8 +2049,7 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
 /// FoldBranchToCommonDest - If this basic block is simple enough, and if a
 /// predecessor branches to us and one of our successors, fold the block into
 /// the predecessor and use logical operations to pick the right destination.
-bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL,
-                                  unsigned BonusInstThreshold) {
+bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
   BasicBlock *BB = BI->getParent();
 
   Instruction *Cond = nullptr;
@@ -2124,7 +2105,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL,
     // Ignore dbg intrinsics.
     if (isa<DbgInfoIntrinsic>(I))
       continue;
-    if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I, DL))
+    if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I))
       return false;
     // I has only one use and can be executed unconditionally.
     Instruction *User = dyn_cast<Instruction>(I->user_back());
@@ -2536,17 +2517,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     // The weight to CommonDest should be PredCommon * SuccTotal +
     //                                    PredOther * SuccCommon.
     // The weight to OtherDest should be PredOther * SuccOther.
-    SmallVector<uint64_t, 2> NewWeights;
-    NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) +
-                         PredOther * SuccCommon);
-    NewWeights.push_back(PredOther * SuccOther);
+    uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
+                                  PredOther * SuccCommon,
+                              PredOther * SuccOther};
     // Halve the weights if any of them cannot fit in an uint32_t
     FitWeights(NewWeights);
 
-    SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end());
     PBI->setMetadata(LLVMContext::MD_prof,
-                     MDBuilder(BI->getContext()).
-                     createBranchWeights(MDWeights));
+                     MDBuilder(BI->getContext())
+                         .createBranchWeights(NewWeights[0], NewWeights[1]));
   }
 
   // OtherDest may have phi nodes.  If so, add an entry from PBI's
@@ -2719,8 +2698,9 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
 /// We prefer to split the edge to 'end' so that there is a true/false entry to
 /// the PHI, merging the third icmp into the switch.
 static bool TryToSimplifyUncondBranchWithICmpInIt(
-    ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI,
-    unsigned BonusInstThreshold, const DataLayout *DL, AssumptionCache *AC) {
+    ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL,
+    const TargetTransformInfo &TTI, unsigned BonusInstThreshold,
+    AssumptionCache *AC) {
   BasicBlock *BB = ICI->getParent();
 
   // If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -2753,7 +2733,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
       ICI->eraseFromParent();
     }
     // BB is now empty, so it is likely to simplify away.
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
   }
 
   // Ok, the block is reachable from the default dest.  If the constant we're
@@ -2769,7 +2749,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
     ICI->replaceAllUsesWith(V);
     ICI->eraseFromParent();
     // BB is now empty, so it is likely to simplify away.
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
   }
 
   // The use of the icmp has to be in the 'end' block, by the only PHI node in
@@ -2825,8 +2805,8 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
 /// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
 /// Check to see if it is branching on an or/and chain of icmp instructions, and
 /// fold it into a switch instruction if so.
-static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
-                                      IRBuilder<> &Builder) {
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
+                                      const DataLayout &DL) {
   Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
   if (!Cond) return false;
 
@@ -2901,10 +2881,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
   Builder.SetInsertPoint(BI);
   // Convert pointer to int before we switch.
   if (CompVal->getType()->isPointerTy()) {
-    assert(DL && "Cannot switch on pointer without DataLayout");
-    CompVal = Builder.CreatePtrToInt(CompVal,
-                                     DL->getIntPtrType(CompVal->getType()),
-                                     "magicptr");
+    CompVal = Builder.CreatePtrToInt(
+        CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
   }
 
   // Create the new switch instruction now.
@@ -2949,20 +2927,9 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
       return false;
 
   // Turn all invokes that unwind here into calls and delete the basic block.
-  bool InvokeRequiresTableEntry = false;
-  bool Changed = false;
   for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
     InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
-
-    if (II->hasFnAttr(Attribute::UWTable)) {
-      // Don't remove an `invoke' instruction if the ABI requires an entry into
-      // the table.
-      InvokeRequiresTableEntry = true;
-      continue;
-    }
-
     SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
-
     // Insert a call instruction before the invoke.
     CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
     Call->takeName(II);
@@ -2982,14 +2949,11 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
 
     // Finally, delete the invoke instruction!
     II->eraseFromParent();
-    Changed = true;
   }
 
-  if (!InvokeRequiresTableEntry)
-    // The landingpad is now unreachable.  Zap it.
-    BB->eraseFromParent();
-
-  return Changed;
+  // The landingpad is now unreachable.  Zap it.
+  BB->eraseFromParent();
+  return true;
 }
 
 bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
@@ -3121,55 +3085,6 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
           --i; --e;
           Changed = true;
         }
-      // If the default value is unreachable, figure out the most popular
-      // destination and make it the default.
-      if (SI->getDefaultDest() == BB) {
-        std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
-        for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
-             i != e; ++i) {
-          std::pair<unsigned, unsigned> &entry =
-              Popularity[i.getCaseSuccessor()];
-          if (entry.first == 0) {
-            entry.first = 1;
-            entry.second = i.getCaseIndex();
-          } else {
-            entry.first++;
-          }
-        }
-
-        // Find the most popular block.
-        unsigned MaxPop = 0;
-        unsigned MaxIndex = 0;
-        BasicBlock *MaxBlock = nullptr;
-        for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
-             I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
-          if (I->second.first > MaxPop ||
-              (I->second.first == MaxPop && MaxIndex > I->second.second)) {
-            MaxPop = I->second.first;
-            MaxIndex = I->second.second;
-            MaxBlock = I->first;
-          }
-        }
-        if (MaxBlock) {
-          // Make this the new default, allowing us to delete any explicit
-          // edges to it.
-          SI->setDefaultDest(MaxBlock);
-          Changed = true;
-
-          // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
-          // it.
-          if (isa<PHINode>(MaxBlock->begin()))
-            for (unsigned i = 0; i != MaxPop-1; ++i)
-              MaxBlock->removePredecessor(SI->getParent());
-
-          for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
-               i != e; ++i)
-            if (i.getCaseSuccessor() == MaxBlock) {
-              SI->removeCase(i);
-              --i; --e;
-            }
-        }
-      }
     } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
       if (II->getUnwindDest() == BB) {
         // Convert the invoke to a call instruction.  This would be a good
@@ -3203,70 +3118,122 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
   return Changed;
 }
 
-/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
-/// integer range comparison into a sub, an icmp and a branch.
-static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
-  assert(SI->getNumCases() > 1 && "Degenerate switch?");
+static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
+  assert(Cases.size() >= 1);
 
-  // Make sure all cases point to the same destination and gather the values.
-  SmallVector<ConstantInt *, 16> Cases;
-  SwitchInst::CaseIt I = SI->case_begin();
-  Cases.push_back(I.getCaseValue());
-  SwitchInst::CaseIt PrevI = I++;
-  for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) {
-    if (PrevI.getCaseSuccessor() != I.getCaseSuccessor())
+  array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+  for (size_t I = 1, E = Cases.size(); I != E; ++I) {
+    if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
       return false;
-    Cases.push_back(I.getCaseValue());
   }
-  assert(Cases.size() == SI->getNumCases() && "Not all cases gathered");
+  return true;
+}
 
-  // Sort the case values, then check if they form a range we can transform.
-  array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
-  for (unsigned I = 1, E = Cases.size(); I != E; ++I) {
-    if (Cases[I-1]->getValue() != Cases[I]->getValue()+1)
-      return false;
+/// Turn a switch with two reachable destinations into an integer range
+/// comparison and branch.
+static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
+  assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+  bool HasDefault =
+      !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+
+  // Partition the cases into two sets with different destinations.
+  BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
+  BasicBlock *DestB = nullptr;
+  SmallVector <ConstantInt *, 16> CasesA;
+  SmallVector <ConstantInt *, 16> CasesB;
+
+  for (SwitchInst::CaseIt I : SI->cases()) {
+    BasicBlock *Dest = I.getCaseSuccessor();
+    if (!DestA) DestA = Dest;
+    if (Dest == DestA) {
+      CasesA.push_back(I.getCaseValue());
+      continue;
+    }
+    if (!DestB) DestB = Dest;
+    if (Dest == DestB) {
+      CasesB.push_back(I.getCaseValue());
+      continue;
+    }
+    return false;  // More than two destinations.
   }
 
-  Constant *Offset = ConstantExpr::getNeg(Cases.back());
-  Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases());
+  assert(DestA && DestB && "Single-destination switch should have been folded.");
+  assert(DestA != DestB);
+  assert(DestB != SI->getDefaultDest());
+  assert(!CasesB.empty() && "There must be non-default cases.");
+  assert(!CasesA.empty() || HasDefault);
+
+  // Figure out if one of the sets of cases form a contiguous range.
+  SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
+  BasicBlock *ContiguousDest = nullptr;
+  BasicBlock *OtherDest = nullptr;
+  if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
+    ContiguousCases = &CasesA;
+    ContiguousDest = DestA;
+    OtherDest = DestB;
+  } else if (CasesAreContiguous(CasesB)) {
+    ContiguousCases = &CasesB;
+    ContiguousDest = DestB;
+    OtherDest = DestA;
+  } else
+    return false;
+
+  // Start building the compare and branch.
+
+  Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
+  Constant *NumCases = ConstantInt::get(Offset->getType(), ContiguousCases->size());
 
   Value *Sub = SI->getCondition();
   if (!Offset->isNullValue())
-    Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
+    Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
+
   Value *Cmp;
   // If NumCases overflowed, then all possible values jump to the successor.
-  if (NumCases->isNullValue() && SI->getNumCases() != 0)
+  if (NumCases->isNullValue() && !ContiguousCases->empty())
     Cmp = ConstantInt::getTrue(SI->getContext());
   else
     Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
-  BranchInst *NewBI = Builder.CreateCondBr(
-      Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
+  BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
 
   // Update weight for the newly-created conditional branch.
-  SmallVector<uint64_t, 8> Weights;
-  bool HasWeights = HasBranchWeights(SI);
-  if (HasWeights) {
+  if (HasBranchWeights(SI)) {
+    SmallVector<uint64_t, 8> Weights;
     GetBranchWeights(SI, Weights);
     if (Weights.size() == 1 + SI->getNumCases()) {
-      // Combine all weights for the cases to be the true weight of NewBI.
-      // We assume that the sum of all weights for a Terminator can fit into 32
-      // bits.
-      uint32_t NewTrueWeight = 0;
-      for (unsigned I = 1, E = Weights.size(); I != E; ++I)
-        NewTrueWeight += (uint32_t)Weights[I];
+      uint64_t TrueWeight = 0;
+      uint64_t FalseWeight = 0;
+      for (size_t I = 0, E = Weights.size(); I != E; ++I) {
+        if (SI->getSuccessor(I) == ContiguousDest)
+          TrueWeight += Weights[I];
+        else
+          FalseWeight += Weights[I];
+      }
+      while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
+        TrueWeight /= 2;
+        FalseWeight /= 2;
+      }
       NewBI->setMetadata(LLVMContext::MD_prof,
-                         MDBuilder(SI->getContext()).
-                         createBranchWeights(NewTrueWeight,
-                                             (uint32_t)Weights[0]));
+                         MDBuilder(SI->getContext()).createBranchWeights(
+                             (uint32_t)TrueWeight, (uint32_t)FalseWeight));
     }
   }
 
-  // Prune obsolete incoming values off the successor's PHI nodes.
-  for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin();
-       isa<PHINode>(BBI); ++BBI) {
-    for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I)
+  // Prune obsolete incoming values off the successors' PHI nodes.
+  for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
+    unsigned PreviousEdges = ContiguousCases->size();
+    if (ContiguousDest == SI->getDefaultDest()) ++PreviousEdges;
+    for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
+      cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+  }
+  for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
+    unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
+    if (OtherDest == SI->getDefaultDest()) ++PreviousEdges;
+    for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
       cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
   }
+
+  // Drop the switch.
   SI->eraseFromParent();
 
   return true;
@@ -3274,8 +3241,8 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
 
 /// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch
 /// and use it to remove dead cases.
-static bool EliminateDeadSwitchCases(SwitchInst *SI, const DataLayout *DL,
-                                     AssumptionCache *AC) {
+static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
+                                     const DataLayout &DL) {
   Value *Cond = SI->getCondition();
   unsigned Bits = Cond->getType()->getIntegerBitWidth();
   APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
@@ -3426,9 +3393,8 @@ static Constant *LookupConstant(Value *V,
 /// constant or can be replaced by constants from the ConstantPool. Returns the
 /// resulting constant on success, 0 otherwise.
 static Constant *
-ConstantFold(Instruction *I,
-             const SmallDenseMap<Value *, Constant *> &ConstantPool,
-             const DataLayout *DL) {
+ConstantFold(Instruction *I, const DataLayout &DL,
+             const SmallDenseMap<Value *, Constant *> &ConstantPool) {
   if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
     Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
     if (!A)
@@ -3448,9 +3414,10 @@ ConstantFold(Instruction *I,
       return nullptr;
   }
 
-  if (CmpInst *Cmp = dyn_cast<CmpInst>(I))
+  if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
     return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
                                            COps[1], DL);
+  }
 
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL);
 }
@@ -3460,12 +3427,10 @@ ConstantFold(Instruction *I,
 /// destionations CaseDest corresponding to value CaseVal (0 for the default
 /// case), of a switch instruction SI.
 static bool
-GetCaseResults(SwitchInst *SI,
-               ConstantInt *CaseVal,
-               BasicBlock *CaseDest,
+GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
                BasicBlock **CommonDest,
-               SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res,
-               const DataLayout *DL) {
+               SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
+               const DataLayout &DL) {
   // The block from which we enter the common destination.
   BasicBlock *Pred = SI->getParent();
 
@@ -3484,7 +3449,7 @@ GetCaseResults(SwitchInst *SI,
     } else if (isa<DbgInfoIntrinsic>(I)) {
       // Skip debug intrinsic.
       continue;
-    } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) {
+    } else if (Constant *C = ConstantFold(I, DL, ConstantPool)) {
       // Instruction is side-effect free and constant.
 
       // If the instruction has uses outside this block or a phi node slot for
@@ -3555,11 +3520,11 @@ static void MapCaseToResult(ConstantInt *CaseVal,
 // results for the PHI node of the common destination block for a switch
 // instruction. Returns false if multiple PHI nodes have been found or if
 // there is not a common destination block for the switch.
-static bool InitializeUniqueCases(
-    SwitchInst *SI, const DataLayout *DL, PHINode *&PHI,
-    BasicBlock *&CommonDest,
-    SwitchCaseResultVectorTy &UniqueResults,
-    Constant *&DefaultResult) {
+static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
+                                  BasicBlock *&CommonDest,
+                                  SwitchCaseResultVectorTy &UniqueResults,
+                                  Constant *&DefaultResult,
+                                  const DataLayout &DL) {
   for (auto &I : SI->cases()) {
     ConstantInt *CaseVal = I.getCaseValue();
 
@@ -3666,15 +3631,15 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
 /// phi nodes in a common successor block with only two different
 /// constant values, replace the switch with select.
 static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
-                           const DataLayout *DL, AssumptionCache *AC) {
+                           AssumptionCache *AC, const DataLayout &DL) {
   Value *const Cond = SI->getCondition();
   PHINode *PHI = nullptr;
   BasicBlock *CommonDest = nullptr;
   Constant *DefaultResult;
   SwitchCaseResultVectorTy UniqueResults;
   // Collect all the cases that will deliver the same value from the switch.
-  if (!InitializeUniqueCases(SI, DL, PHI, CommonDest, UniqueResults,
-                             DefaultResult))
+  if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
+                             DL))
     return false;
   // Selects choose between maximum two values.
   if (UniqueResults.size() != 2)
@@ -3701,12 +3666,10 @@ namespace {
     /// SwitchLookupTable - Create a lookup table to use as a switch replacement
     /// with the contents of Values, using DefaultValue to fill any holes in the
     /// table.
-    SwitchLookupTable(Module &M,
-                      uint64_t TableSize,
-                      ConstantInt *Offset,
-             const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
-                      Constant *DefaultValue,
-                      const DataLayout *DL);
+    SwitchLookupTable(
+        Module &M, uint64_t TableSize, ConstantInt *Offset,
+        const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
+        Constant *DefaultValue, const DataLayout &DL);
 
     /// BuildLookup - Build instructions with Builder to retrieve the value at
     /// the position given by Index in the lookup table.
@@ -3714,8 +3677,7 @@ namespace {
 
     /// WouldFitInRegister - Return true if a table with TableSize elements of
     /// type ElementType would fit in a target-legal register.
-    static bool WouldFitInRegister(const DataLayout *DL,
-                                   uint64_t TableSize,
+    static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
                                    const Type *ElementType);
 
   private:
@@ -3757,12 +3719,10 @@ namespace {
   };
 }
 
-SwitchLookupTable::SwitchLookupTable(Module &M,
-                                     uint64_t TableSize,
-                                     ConstantInt *Offset,
-             const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
-                                     Constant *DefaultValue,
-                                     const DataLayout *DL)
+SwitchLookupTable::SwitchLookupTable(
+    Module &M, uint64_t TableSize, ConstantInt *Offset,
+    const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
+    Constant *DefaultValue, const DataLayout &DL)
     : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr),
       LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) {
   assert(Values.size() && "Can't build lookup table without values!");
@@ -3924,19 +3884,17 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
                                    "switch.tableidx.zext");
 
       Value *GEPIndices[] = { Builder.getInt32(0), Index };
-      Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
-                                             "switch.gep");
+      Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
+                                             GEPIndices, "switch.gep");
       return Builder.CreateLoad(GEP, "switch.load");
     }
   }
   llvm_unreachable("Unknown lookup table kind!");
 }
 
-bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL,
+bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
                                            uint64_t TableSize,
                                            const Type *ElementType) {
-  if (!DL)
-    return false;
   const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
   if (!IT)
     return false;
@@ -3946,17 +3904,16 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL,
   // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
   if (TableSize >= UINT_MAX/IT->getBitWidth())
     return false;
-  return DL->fitsInLegalInteger(TableSize * IT->getBitWidth());
+  return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
 }
 
 /// ShouldBuildLookupTable - Determine whether a lookup table should be built
 /// for this switch, based on the number of cases, size of the table and the
 /// types of the results.
-static bool ShouldBuildLookupTable(SwitchInst *SI,
-                                   uint64_t TableSize,
-                                   const TargetTransformInfo &TTI,
-                                   const DataLayout *DL,
-                            const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
+static bool
+ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
+                       const TargetTransformInfo &TTI, const DataLayout &DL,
+                       const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
   if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
     return false; // TableSize overflowed, or mul below might overflow.
 
@@ -4079,10 +4036,9 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock,
 /// SwitchToLookupTable - If the switch is only used to initialize one or more
 /// phi nodes in a common successor block with different constant values,
 /// replace the switch with lookup tables.
-static bool SwitchToLookupTable(SwitchInst *SI,
-                                IRBuilder<> &Builder,
-                                const TargetTransformInfo &TTI,
-                                const DataLayout* DL) {
+static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
+                                const DataLayout &DL,
+                                const TargetTransformInfo &TTI) {
   assert(SI->getNumCases() > 1 && "Degenerate switch?");
 
   // Only build lookup table when we have a target that supports it.
@@ -4153,14 +4109,14 @@ static bool SwitchToLookupTable(SwitchInst *SI,
   // or a bitmask that fits in a register.
   SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
   bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(),
-                                       &CommonDest, DefaultResultsList, DL);
+                                          &CommonDest, DefaultResultsList, DL);
 
   bool NeedMask = (TableHasHoles && !HasDefaultResults);
   if (NeedMask) {
     // As an extra penalty for the validity test we require more cases.
     if (SI->getNumCases() < 4)  // FIXME: Find best threshold value (benchmark).
       return false;
-    if (!(DL && DL->fitsInLegalInteger(TableSize)))
+    if (!DL.fitsInLegalInteger(TableSize))
       return false;
   }
 
@@ -4193,19 +4149,18 @@ static bool SwitchToLookupTable(SwitchInst *SI,
          "It is impossible for a switch to have more entries than the max "
          "representable value of its input integer type's size.");
 
-  // If we have a fully covered lookup table, unconditionally branch to the
-  // lookup table BB. Otherwise, check if the condition value is within the case
-  // range. If it is so, branch to the new BB. Otherwise branch to SI's default
-  // destination.
+  // If the default destination is unreachable, or if the lookup table covers
+  // all values of the conditional variable, branch directly to the lookup table
+  // BB. Otherwise, check that the condition is within the case range.
+  const bool DefaultIsReachable =
+      !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+  const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
   BranchInst *RangeCheckBranch = nullptr;
 
-  const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize;
-  if (GeneratingCoveredLookupTable) {
+  if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
     Builder.CreateBr(LookupBB);
-    // We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later,
-    // do not delete PHINodes here.
-    SI->getDefaultDest()->removePredecessor(SI->getParent(),
-                                            true/*DontDeleteUselessPHIs*/);
+    // Note: We call removeProdecessor later since we need to be able to get the
+    // PHI value for the default case in case we're using a bit mask.
   } else {
     Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
                                        MinCaseVal->getType(), TableSize));
@@ -4257,6 +4212,13 @@ static bool SwitchToLookupTable(SwitchInst *SI,
     AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent());
   }
 
+  if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
+    // We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later,
+    // do not delete PHINodes here.
+    SI->getDefaultDest()->removePredecessor(SI->getParent(),
+                                            /*DontDeleteUselessPHIs=*/true);
+  }
+
   bool ReturnedEarly = false;
   for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
     PHINode *PHI = PHIs[I];
@@ -4317,12 +4279,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
     // see if that predecessor totally determines the outcome of this switch.
     if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
       if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
     Value *Cond = SI->getCondition();
     if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
       if (SimplifySwitchOnSelect(SI, Select))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
     // If the block only contains the switch, see if we can fold the block
     // away into any preds.
@@ -4332,25 +4294,25 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
       ++BBI;
     if (SI == &*BBI)
       if (FoldValueComparisonIntoPredecessors(SI, Builder))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
   }
 
   // Try to transform the switch into an icmp and a branch.
   if (TurnSwitchRangeIntoICmp(SI, Builder))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
   // Remove unreachable cases.
-  if (EliminateDeadSwitchCases(SI, DL, AC))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+  if (EliminateDeadSwitchCases(SI, AC, DL))
+    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
-  if (SwitchToSelect(SI, Builder, DL, AC))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+  if (SwitchToSelect(SI, Builder, AC, DL))
+    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
   if (ForwardSwitchConditionToPHI(SI))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
-  if (SwitchToLookupTable(SI, Builder, TTI, DL))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+  if (SwitchToLookupTable(SI, Builder, DL, TTI))
+    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
   return false;
 }
@@ -4387,11 +4349,87 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
 
   if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
     if (SimplifyIndirectBrOnSelect(IBI, SI))
-      return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+      return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
   }
   return Changed;
 }
 
+/// Given an block with only a single landing pad and a unconditional branch
+/// try to find another basic block which this one can be merged with.  This
+/// handles cases where we have multiple invokes with unique landing pads, but
+/// a shared handler.
+///
+/// We specifically choose to not worry about merging non-empty blocks
+/// here.  That is a PRE/scheduling problem and is best solved elsewhere.  In
+/// practice, the optimizer produces empty landing pad blocks quite frequently
+/// when dealing with exception dense code.  (see: instcombine, gvn, if-else
+/// sinking in this file)
+///
+/// This is primarily a code size optimization.  We need to avoid performing
+/// any transform which might inhibit optimization (such as our ability to
+/// specialize a particular handler via tail commoning).  We do this by not
+/// merging any blocks which require us to introduce a phi.  Since the same
+/// values are flowing through both blocks, we don't loose any ability to
+/// specialize.  If anything, we make such specialization more likely.
+///
+/// TODO - This transformation could remove entries from a phi in the target
+/// block when the inputs in the phi are the same for the two blocks being
+/// merged.  In some cases, this could result in removal of the PHI entirely.
+static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
+                                 BasicBlock *BB) {
+  auto Succ = BB->getUniqueSuccessor();
+  assert(Succ);
+  // If there's a phi in the successor block, we'd likely have to introduce
+  // a phi into the merged landing pad block.
+  if (isa<PHINode>(*Succ->begin()))
+    return false;
+
+  for (BasicBlock *OtherPred : predecessors(Succ)) {
+    if (BB == OtherPred)
+      continue;
+    BasicBlock::iterator I = OtherPred->begin();
+    LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
+    if (!LPad2 || !LPad2->isIdenticalTo(LPad))
+      continue;
+    for (++I; isa<DbgInfoIntrinsic>(I); ++I) {}
+    BranchInst *BI2 = dyn_cast<BranchInst>(I);
+    if (!BI2 || !BI2->isIdenticalTo(BI))
+      continue;
+
+    // We've found an identical block.  Update our predeccessors to take that
+    // path instead and make ourselves dead.
+    SmallSet<BasicBlock *, 16> Preds;
+    Preds.insert(pred_begin(BB), pred_end(BB));
+    for (BasicBlock *Pred : Preds) {
+      InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
+      assert(II->getNormalDest() != BB &&
+             II->getUnwindDest() == BB && "unexpected successor");
+      II->setUnwindDest(OtherPred);
+    }
+
+    // The debug info in OtherPred doesn't cover the merged control flow that
+    // used to go through BB.  We need to delete it or update it.
+    for (auto I = OtherPred->begin(), E = OtherPred->end();
+         I != E;) {
+      Instruction &Inst = *I; I++;
+      if (isa<DbgInfoIntrinsic>(Inst))
+        Inst.eraseFromParent();
+    }
+
+    SmallSet<BasicBlock *, 16> Succs;
+    Succs.insert(succ_begin(BB), succ_end(BB));
+    for (BasicBlock *Succ : Succs) {
+      Succ->removePredecessor(BB);
+    }
+
+    IRBuilder<> Builder(BI);
+    Builder.CreateUnreachable();
+    BI->eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
 bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
   BasicBlock *BB = BI->getParent();
 
@@ -4411,17 +4449,26 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
       for (++I; isa<DbgInfoIntrinsic>(I); ++I)
         ;
       if (I->isTerminator() &&
-          TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI,
-                                                BonusInstThreshold, DL, AC))
+          TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI,
+                                                BonusInstThreshold, AC))
         return true;
     }
 
+  // See if we can merge an empty landing pad block with another which is
+  // equivalent.
+  if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
+    for (++I; isa<DbgInfoIntrinsic>(I); ++I) {}
+    if (I->isTerminator() &&
+        TryToMergeLandingPad(LPad, BI, BB))
+      return true;
+  }
+
   // If this basic block is ONLY a compare and a branch, and if a predecessor
   // branches to us and our successor, fold the comparison into the
   // predecessor and use logical operations to update the incoming value
   // for PHI nodes in common successor.
-  if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+  if (FoldBranchToCommonDest(BI, BonusInstThreshold))
+    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
   return false;
 }
 
@@ -4436,7 +4483,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
     // switch.
     if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
       if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
     // This block must be empty, except for the setcond inst, if it exists.
     // Ignore dbg intrinsics.
@@ -4446,26 +4493,26 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
       ++I;
     if (&*I == BI) {
       if (FoldValueComparisonIntoPredecessors(BI, Builder))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
     } else if (&*I == cast<Instruction>(BI->getCondition())){
       ++I;
       // Ignore dbg intrinsics.
       while (isa<DbgInfoIntrinsic>(I))
         ++I;
       if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
     }
   }
 
   // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
-  if (SimplifyBranchOnICmpChain(BI, DL, Builder))
+  if (SimplifyBranchOnICmpChain(BI, Builder, DL))
     return true;
 
   // If this basic block is ONLY a compare and a branch, and if a predecessor
   // branches to us and one of our successors, fold the comparison into the
   // predecessor and use logical operations to pick the right destination.
-  if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+  if (FoldBranchToCommonDest(BI, BonusInstThreshold))
+    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
   // We have a conditional branch to two blocks that are only reachable
   // from BI.  We know that the condbr dominates the two blocks, so see if
@@ -4473,16 +4520,16 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   // can hoist it up to the branching block.
   if (BI->getSuccessor(0)->getSinglePredecessor()) {
     if (BI->getSuccessor(1)->getSinglePredecessor()) {
-      if (HoistThenElseCodeToIf(BI, DL))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+      if (HoistThenElseCodeToIf(BI, TTI))
+        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
     } else {
       // If Successor #1 has multiple preds, we may be able to conditionally
       // execute Successor #0 if it branches to Successor #1.
       TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
       if (Succ0TI->getNumSuccessors() == 1 &&
           Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
-        if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL))
-          return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+        if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
+          return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
     }
   } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
     // If Successor #0 has multiple preds, we may be able to conditionally
@@ -4490,8 +4537,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
     TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
     if (Succ1TI->getNumSuccessors() == 1 &&
         Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
-      if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+      if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
+        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
   }
 
   // If this is a branch on a phi node in the current block, thread control
@@ -4499,14 +4546,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
     if (PN->getParent() == BI->getParent())
       if (FoldCondBranchOnPHI(BI, DL))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
   // Scan predecessor blocks for conditional branches.
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
     if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
       if (PBI != BI && PBI->isConditional())
         if (SimplifyCondBranchToCondBranch(PBI, BI))
-          return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+          return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
   return false;
 }
@@ -4618,7 +4665,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
   // eliminate it, do so now.
   if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
     if (PN->getNumIncomingValues() == 2)
-      Changed |= FoldTwoEntryPHINode(PN, DL);
+      Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
 
   Builder.SetInsertPoint(BB->getTerminator());
   if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
@@ -4650,7 +4697,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
 /// of the CFG.  It returns true if a modification was made.
 ///
 bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
-                       unsigned BonusInstThreshold, const DataLayout *DL,
-                       AssumptionCache *AC) {
-  return SimplifyCFGOpt(TTI, BonusInstThreshold, DL, AC).run(BB);
+                       unsigned BonusInstThreshold, AssumptionCache *AC) {
+  return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(),
+                        BonusInstThreshold, AC).run(BB);
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index d54c09a..3757a80 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -17,7 +17,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -48,22 +47,15 @@ namespace {
     Loop             *L;
     LoopInfo         *LI;
     ScalarEvolution  *SE;
-    const DataLayout *DL; // May be NULL
 
     SmallVectorImpl<WeakVH> &DeadInsts;
 
     bool Changed;
 
   public:
-    SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM,
-                   SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = nullptr) :
-      L(Loop),
-      LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
-      SE(SE),
-      DeadInsts(Dead),
-      Changed(false) {
-      DataLayoutPass *DLP = LPM->getAnalysisIfAvailable<DataLayoutPass>();
-      DL = DLP ? &DLP->getDataLayout() : nullptr;
+    SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LoopInfo *LI,
+                   SmallVectorImpl<WeakVH> &Dead)
+        : L(Loop), LI(LI), SE(SE), DeadInsts(Dead), Changed(false) {
       assert(LI && "IV simplification requires LoopInfo");
     }
 
@@ -277,95 +269,57 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
 bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
                                                     Value *IVOperand) {
 
-  // Currently we only handle instructions of the form "add <indvar> <value>"
-  unsigned Op = BO->getOpcode();
-  if (Op != Instruction::Add)
+  // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`.
+  if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
     return false;
 
-  // If BO is already both nuw and nsw then there is nothing left to do
-  if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
+  const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *,
+                                               SCEV::NoWrapFlags);
+
+  switch (BO->getOpcode()) {
+  default:
     return false;
 
-  IntegerType *IT = cast<IntegerType>(IVOperand->getType());
-  Value *OtherOperand = nullptr;
-  if (BO->getOperand(0) == IVOperand) {
-    OtherOperand = BO->getOperand(1);
-  } else {
-    assert(BO->getOperand(1) == IVOperand && "only other use!");
-    OtherOperand = BO->getOperand(0);
+  case Instruction::Add:
+    GetExprForBO = &ScalarEvolution::getAddExpr;
+    break;
+
+  case Instruction::Sub:
+    GetExprForBO = &ScalarEvolution::getMinusSCEV;
+    break;
+
+  case Instruction::Mul:
+    GetExprForBO = &ScalarEvolution::getMulExpr;
+    break;
   }
 
-  bool Changed = false;
-  const SCEV *OtherOpSCEV = SE->getSCEV(OtherOperand);
-  if (OtherOpSCEV == SE->getCouldNotCompute())
-    return false;
+  unsigned BitWidth = cast<IntegerType>(BO->getType())->getBitWidth();
+  Type *WideTy = IntegerType::get(BO->getContext(), BitWidth * 2);
+  const SCEV *LHS = SE->getSCEV(BO->getOperand(0));
+  const SCEV *RHS = SE->getSCEV(BO->getOperand(1));
 
-  const SCEV *IVOpSCEV = SE->getSCEV(IVOperand);
-  const SCEV *ZeroSCEV = SE->getConstant(IVOpSCEV->getType(), 0);
+  bool Changed = false;
 
-  if (!BO->hasNoSignedWrap()) {
-    // Upgrade the add to an "add nsw" if we can prove that it will never
-    // sign-overflow or sign-underflow.
-
-    const SCEV *SignedMax =
-      SE->getConstant(APInt::getSignedMaxValue(IT->getBitWidth()));
-    const SCEV *SignedMin =
-      SE->getConstant(APInt::getSignedMinValue(IT->getBitWidth()));
-
-    // The addition "IVOperand + OtherOp" does not sign-overflow if the result
-    // is sign-representable in 2's complement in the given bit-width.
-    //
-    // If OtherOp is SLT 0, then for an IVOperand in [SignedMin - OtherOp,
-    // SignedMax], "IVOperand + OtherOp" is in [SignedMin, SignedMax + OtherOp].
-    // Everything in [SignedMin, SignedMax + OtherOp] is representable since
-    // SignedMax + OtherOp is at least -1.
-    //
-    // If OtherOp is SGE 0, then for an IVOperand in [SignedMin, SignedMax -
-    // OtherOp], "IVOperand + OtherOp" is in [SignedMin + OtherOp, SignedMax].
-    // Everything in [SignedMin + OtherOp, SignedMax] is representable since
-    // SignedMin + OtherOp is at most -1.
-    //
-    // It follows that for all values of IVOperand in [SignedMin - smin(0,
-    // OtherOp), SignedMax - smax(0, OtherOp)] the result of the add is
-    // representable (i.e. there is no sign-overflow).
-
-    const SCEV *UpperDelta = SE->getSMaxExpr(ZeroSCEV, OtherOpSCEV);
-    const SCEV *UpperLimit = SE->getMinusSCEV(SignedMax, UpperDelta);
-
-    bool NeverSignedOverflows =
-      SE->isKnownPredicate(ICmpInst::ICMP_SLE, IVOpSCEV, UpperLimit);
-
-    if (NeverSignedOverflows) {
-      const SCEV *LowerDelta = SE->getSMinExpr(ZeroSCEV, OtherOpSCEV);
-      const SCEV *LowerLimit = SE->getMinusSCEV(SignedMin, LowerDelta);
-
-      bool NeverSignedUnderflows =
-        SE->isKnownPredicate(ICmpInst::ICMP_SGE, IVOpSCEV, LowerLimit);
-      if (NeverSignedUnderflows) {
-        BO->setHasNoSignedWrap(true);
-        Changed = true;
-      }
+  if (!BO->hasNoUnsignedWrap()) {
+    const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy);
+    const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
+      SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy),
+      SCEV::FlagAnyWrap);
+    if (ExtendAfterOp == OpAfterExtend) {
+      BO->setHasNoUnsignedWrap();
+      SE->forgetValue(BO);
+      Changed = true;
     }
   }
 
-  if (!BO->hasNoUnsignedWrap()) {
-    // Upgrade the add computing "IVOperand + OtherOp" to an "add nuw" if we can
-    // prove that it will never unsigned-overflow (i.e. the result will always
-    // be representable in the given bit-width).
-    //
-    // "IVOperand + OtherOp" is unsigned-representable in 2's complement iff it
-    // does not produce a carry.  "IVOperand + OtherOp" produces no carry iff
-    // IVOperand ULE (UnsignedMax - OtherOp).
-
-    const SCEV *UnsignedMax =
-      SE->getConstant(APInt::getMaxValue(IT->getBitWidth()));
-    const SCEV *UpperLimit = SE->getMinusSCEV(UnsignedMax, OtherOpSCEV);
-
-    bool NeverUnsignedOverflows =
-        SE->isKnownPredicate(ICmpInst::ICMP_ULE, IVOpSCEV, UpperLimit);
-
-    if (NeverUnsignedOverflows) {
-      BO->setHasNoUnsignedWrap(true);
+  if (!BO->hasNoSignedWrap()) {
+    const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy);
+    const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
+      SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy),
+      SCEV::FlagAnyWrap);
+    if (ExtendAfterOp == OpAfterExtend) {
+      BO->setHasNoSignedWrap();
+      SE->forgetValue(BO);
       Changed = true;
     }
   }
@@ -562,8 +516,8 @@ void IVVisitor::anchor() { }
 bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
                        SmallVectorImpl<WeakVH> &Dead, IVVisitor *V)
 {
-  LoopInfo *LI = &LPM->getAnalysis<LoopInfo>();
-  SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead);
+  LoopInfo *LI = &LPM->getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LI, Dead);
   SIV.simplifyUsers(CurrIV, V);
   return SIV.hasChanged();
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
index cc97098..c499c87 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -25,7 +25,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -43,7 +43,7 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       AU.addRequired<AssumptionCacheTracker>();
-      AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
 
     /// runOnFunction - Remove instructions that simplify.
@@ -51,9 +51,9 @@ namespace {
       const DominatorTreeWrapperPass *DTWP =
           getAnalysisIfAvailable<DominatorTreeWrapperPass>();
       const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-      DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-      const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
-      const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+      const DataLayout &DL = F.getParent()->getDataLayout();
+      const TargetLibraryInfo *TLI =
+          &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
       AssumptionCache *AC =
           &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
       SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
@@ -106,7 +106,7 @@ char InstSimplifier::ID = 0;
 INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
                       "Remove redundant instructions", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
                     "Remove redundant instructions", false, false)
 char &llvm::InstructionSimplifierID = InstSimplifier::ID;
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 5a0d52e..6bbf828 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -30,7 +30,7 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 
 using namespace llvm;
@@ -120,12 +120,12 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
 /// string/memory copying library function \p Func.
 /// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset.
 /// Their fortified (_chk) counterparts are also accepted.
-static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func,
-                                            const DataLayout *DL) {
+static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func) {
+  const DataLayout &DL = F->getParent()->getDataLayout();
   FunctionType *FT = F->getFunctionType();
   LLVMContext &Context = F->getContext();
   Type *PCharTy = Type::getInt8PtrTy(Context);
-  Type *SizeTTy = DL ? DL->getIntPtrType(Context) : nullptr;
+  Type *SizeTTy = DL.getIntPtrType(Context);
   unsigned NumParams = FT->getNumParams();
 
   // All string libfuncs return the same type as the first parameter.
@@ -208,10 +208,6 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) {
   if (Len == 0)
     return Dst;
 
-  // These optimizations require DataLayout.
-  if (!DL)
-    return nullptr;
-
   return emitStrLenMemCpy(Src, Dst, Len, B);
 }
 
@@ -226,13 +222,13 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
   // Now that we have the destination's length, we must index into the
   // destination's pointer to get the actual memcpy destination (end of
   // the string .. we're concatenating).
-  Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+  Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr");
 
   // We have enough information to now generate the memcpy call to do the
   // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
-  B.CreateMemCpy(
-      CpyDst, Src,
-      ConstantInt::get(DL->getIntPtrType(Src->getContext()), Len + 1), 1);
+  B.CreateMemCpy(CpyDst, Src,
+                 ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1),
+                 1);
   return Dst;
 }
 
@@ -269,10 +265,6 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
   if (SrcLen == 0 || Len == 0)
     return Dst;
 
-  // These optimizations require DataLayout.
-  if (!DL)
-    return nullptr;
-
   // We don't optimize this case
   if (Len < SrcLen)
     return nullptr;
@@ -297,25 +289,21 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
   // of the input string and turn this into memchr.
   ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
   if (!CharC) {
-    // These optimizations require DataLayout.
-    if (!DL)
-      return nullptr;
-
     uint64_t Len = GetStringLength(SrcStr);
     if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
       return nullptr;
 
-    return EmitMemChr(
-        SrcStr, CI->getArgOperand(1), // include nul.
-        ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), B, DL, TLI);
+    return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len),
+                      B, DL, TLI);
   }
 
   // Otherwise, the character is a constant, see if the first argument is
   // a string literal.  If so, we can constant fold.
   StringRef Str;
   if (!getConstantStringInfo(SrcStr, Str)) {
-    if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
-      return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
+    if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
+      return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
     return nullptr;
   }
 
@@ -328,7 +316,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
     return Constant::getNullValue(CI->getType());
 
   // strchr(s+n,c)  -> gep(s+n+i,c)
-  return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+  return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
 }
 
 Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
@@ -350,8 +338,8 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
   StringRef Str;
   if (!getConstantStringInfo(SrcStr, Str)) {
     // strrchr(s, 0) -> strchr(s, 0)
-    if (DL && CharC->isZero())
-      return EmitStrChr(SrcStr, '\0', B, DL, TLI);
+    if (CharC->isZero())
+      return EmitStrChr(SrcStr, '\0', B, TLI);
     return nullptr;
   }
 
@@ -363,7 +351,7 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
     return Constant::getNullValue(CI->getType());
 
   // strrchr(s+n,c) -> gep(s+n+i,c)
-  return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+  return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr");
 }
 
 Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
@@ -398,12 +386,8 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
   uint64_t Len1 = GetStringLength(Str1P);
   uint64_t Len2 = GetStringLength(Str2P);
   if (Len1 && Len2) {
-    // These optimizations require DataLayout.
-    if (!DL)
-      return nullptr;
-
     return EmitMemCmp(Str1P, Str2P,
-                      ConstantInt::get(DL->getIntPtrType(CI->getContext()),
+                      ConstantInt::get(DL.getIntPtrType(CI->getContext()),
                                        std::min(Len1, Len2)),
                       B, DL, TLI);
   }
@@ -435,7 +419,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
   if (Length == 0) // strncmp(x,y,0)   -> 0
     return ConstantInt::get(CI->getType(), 0);
 
-  if (DL && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+  if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
     return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
 
   StringRef Str1, Str2;
@@ -462,17 +446,13 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
 Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
 
-  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy, DL))
+  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy))
     return nullptr;
 
   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
   if (Dst == Src) // strcpy(x,x)  -> x
     return Src;
 
-  // These optimizations require DataLayout.
-  if (!DL)
-    return nullptr;
-
   // See if we can get the length of the input string.
   uint64_t Len = GetStringLength(Src);
   if (Len == 0)
@@ -481,7 +461,7 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
   // We have enough information to now generate the memcpy call to do the
   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
   B.CreateMemCpy(Dst, Src,
-                 ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), 1);
+                 ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1);
   return Dst;
 }
 
@@ -490,17 +470,13 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
   // Verify the "stpcpy" function prototype.
   FunctionType *FT = Callee->getFunctionType();
 
-  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy, DL))
-    return nullptr;
-
-  // These optimizations require DataLayout.
-  if (!DL)
+  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy))
     return nullptr;
 
   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
   if (Dst == Src) { // stpcpy(x,x)  -> x+strlen(x)
     Value *StrLen = EmitStrLen(Src, B, DL, TLI);
-    return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+    return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
   }
 
   // See if we can get the length of the input string.
@@ -509,9 +485,9 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
     return nullptr;
 
   Type *PT = FT->getParamType(0);
-  Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
+  Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
   Value *DstEnd =
-      B.CreateGEP(Dst, ConstantInt::get(DL->getIntPtrType(PT), Len - 1));
+      B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
 
   // We have enough information to now generate the memcpy call to do the
   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
@@ -523,7 +499,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
   FunctionType *FT = Callee->getFunctionType();
 
-  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy, DL))
+  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy))
     return nullptr;
 
   Value *Dst = CI->getArgOperand(0);
@@ -551,17 +527,13 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
   if (Len == 0)
     return Dst; // strncpy(x, y, 0) -> x
 
-  // These optimizations require DataLayout.
-  if (!DL)
-    return nullptr;
-
   // Let strncpy handle the zero padding
   if (Len > SrcLen + 1)
     return nullptr;
 
   Type *PT = FT->getParamType(0);
   // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
-  B.CreateMemCpy(Dst, Src, ConstantInt::get(DL->getIntPtrType(PT), Len), 1);
+  B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1);
 
   return Dst;
 }
@@ -625,12 +597,12 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
     if (I == StringRef::npos) // No match.
       return Constant::getNullValue(CI->getType());
 
-    return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+    return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), "strpbrk");
   }
 
   // strpbrk(s, "a") -> strchr(s, 'a')
-  if (DL && HasS2 && S2.size() == 1)
-    return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI);
+  if (HasS2 && S2.size() == 1)
+    return EmitStrChr(CI->getArgOperand(0), S2[0], B, TLI);
 
   return nullptr;
 }
@@ -706,7 +678,7 @@ Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) {
   }
 
   // strcspn(s, "") -> strlen(s)
-  if (DL && HasS2 && S2.empty())
+  if (HasS2 && S2.empty())
     return EmitStrLen(CI->getArgOperand(0), B, DL, TLI);
 
   return nullptr;
@@ -725,7 +697,7 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
     return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
 
   // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
-  if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+  if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
     Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI);
     if (!StrLen)
       return nullptr;
@@ -767,12 +739,98 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
 
   // fold strstr(x, "y") -> strchr(x, 'y').
   if (HasStr2 && ToFindStr.size() == 1) {
-    Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI);
+    Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
     return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
   }
   return nullptr;
 }
 
+Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
+  Function *Callee = CI->getCalledFunction();
+  FunctionType *FT = Callee->getFunctionType();
+  if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+      !FT->getParamType(1)->isIntegerTy(32) ||
+      !FT->getParamType(2)->isIntegerTy() ||
+      !FT->getReturnType()->isPointerTy())
+    return nullptr;
+
+  Value *SrcStr = CI->getArgOperand(0);
+  ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+  ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+
+  // memchr(x, y, 0) -> null
+  if (LenC && LenC->isNullValue())
+    return Constant::getNullValue(CI->getType());
+
+  // From now on we need at least constant length and string.
+  StringRef Str;
+  if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
+    return nullptr;
+
+  // Truncate the string to LenC. If Str is smaller than LenC we will still only
+  // scan the string, as reading past the end of it is undefined and we can just
+  // return null if we don't find the char.
+  Str = Str.substr(0, LenC->getZExtValue());
+
+  // If the char is variable but the input str and length are not we can turn
+  // this memchr call into a simple bit field test. Of course this only works
+  // when the return value is only checked against null.
+  //
+  // It would be really nice to reuse switch lowering here but we can't change
+  // the CFG at this point.
+  //
+  // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0
+  //   after bounds check.
+  if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) {
+    unsigned char Max =
+        *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()),
+                          reinterpret_cast<const unsigned char *>(Str.end()));
+
+    // Make sure the bit field we're about to create fits in a register on the
+    // target.
+    // FIXME: On a 64 bit architecture this prevents us from using the
+    // interesting range of alpha ascii chars. We could do better by emitting
+    // two bitfields or shifting the range by 64 if no lower chars are used.
+    if (!DL.fitsInLegalInteger(Max + 1))
+      return nullptr;
+
+    // For the bit field use a power-of-2 type with at least 8 bits to avoid
+    // creating unnecessary illegal types.
+    unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max));
+
+    // Now build the bit field.
+    APInt Bitfield(Width, 0);
+    for (char C : Str)
+      Bitfield.setBit((unsigned char)C);
+    Value *BitfieldC = B.getInt(Bitfield);
+
+    // First check that the bit field access is within bounds.
+    Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType());
+    Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
+                                 "memchr.bounds");
+
+    // Create code that checks if the given bit is set in the field.
+    Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C);
+    Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits");
+
+    // Finally merge both checks and cast to pointer type. The inttoptr
+    // implicitly zexts the i1 to intptr type.
+    return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType());
+  }
+
+  // Check if all arguments are constants.  If so, we can constant fold.
+  if (!CharC)
+    return nullptr;
+
+  // Compute the offset.
+  size_t I = Str.find(CharC->getSExtValue() & 0xFF);
+  if (I == StringRef::npos) // Didn't find the char.  memchr returns null.
+    return Constant::getNullValue(CI->getType());
+
+  // memchr(s+n,c,l) -> gep(s+n+i,c)
+  return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr");
+}
+
 Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
   FunctionType *FT = Callee->getFunctionType();
@@ -827,11 +885,8 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
 
 Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
-  // These optimizations require DataLayout.
-  if (!DL)
-    return nullptr;
 
-  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy, DL))
+  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy))
     return nullptr;
 
   // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
@@ -842,11 +897,8 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
 
 Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
-  // These optimizations require DataLayout.
-  if (!DL)
-    return nullptr;
 
-  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove, DL))
+  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove))
     return nullptr;
 
   // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
@@ -857,11 +909,8 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
 
 Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
-  // These optimizations require DataLayout.
-  if (!DL)
-    return nullptr;
 
-  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset, DL))
+  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset))
     return nullptr;
 
   // memset(p, v, n) -> llvm.memset(p, v, n, 1)
@@ -924,7 +973,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
   // floor((double)floatval) -> (double)floorf(floatval)
   if (Callee->isIntrinsic()) {
     Module *M = CI->getParent()->getParent()->getParent();
-    Intrinsic::ID IID = (Intrinsic::ID) Callee->getIntrinsicID();
+    Intrinsic::ID IID = Callee->getIntrinsicID();
     Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
     V = B.CreateCall(F, V);
   } else {
@@ -1101,7 +1150,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
       Value *Callee =
           M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
                                  Op->getType(), B.getInt32Ty(), nullptr);
-      CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+      CallInst *CI = B.CreateCall(Callee, {One, LdExpArg});
       if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
         CI->setCallingConv(F->getCallingConv());
 
@@ -1387,7 +1436,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
   Type *ArgType = Op->getType();
   Value *F =
       Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType);
-  Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
+  Value *V = B.CreateCall(F, {Op, B.getFalse()}, "cttz");
   V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
   V = B.CreateIntCast(V, B.getInt32Ty(), false);
 
@@ -1521,7 +1570,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
 
   // printf("x") -> putchar('x'), even for '%'.
   if (FormatStr.size() == 1) {
-    Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, DL, TLI);
+    Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TLI);
     if (CI->use_empty() || !Res)
       return Res;
     return B.CreateIntCast(Res, CI->getType(), true);
@@ -1534,7 +1583,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
     // pass to be run after this pass, to merge duplicate strings.
     FormatStr = FormatStr.drop_back();
     Value *GV = B.CreateGlobalString(FormatStr, "str");
-    Value *NewCI = EmitPutS(GV, B, DL, TLI);
+    Value *NewCI = EmitPutS(GV, B, TLI);
     return (CI->use_empty() || !NewCI)
                ? NewCI
                : ConstantInt::get(CI->getType(), FormatStr.size() + 1);
@@ -1544,7 +1593,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
   // printf("%c", chr) --> putchar(chr)
   if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
       CI->getArgOperand(1)->getType()->isIntegerTy()) {
-    Value *Res = EmitPutChar(CI->getArgOperand(1), B, DL, TLI);
+    Value *Res = EmitPutChar(CI->getArgOperand(1), B, TLI);
 
     if (CI->use_empty() || !Res)
       return Res;
@@ -1554,7 +1603,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
   // printf("%s\n", str) --> puts(str)
   if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
       CI->getArgOperand(1)->getType()->isPointerTy()) {
-    return EmitPutS(CI->getArgOperand(1), B, DL, TLI);
+    return EmitPutS(CI->getArgOperand(1), B, TLI);
   }
   return nullptr;
 }
@@ -1600,16 +1649,11 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
       if (FormatStr[i] == '%')
         return nullptr; // we found a format specifier, bail out.
 
-    // These optimizations require DataLayout.
-    if (!DL)
-      return nullptr;
-
     // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
-    B.CreateMemCpy(
-        CI->getArgOperand(0), CI->getArgOperand(1),
-        ConstantInt::get(DL->getIntPtrType(CI->getContext()),
-                         FormatStr.size() + 1),
-        1); // Copy the null byte.
+    B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                   ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+                                    FormatStr.size() + 1),
+                   1); // Copy the null byte.
     return ConstantInt::get(CI->getType(), FormatStr.size());
   }
 
@@ -1627,17 +1671,13 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
     Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
     Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
     B.CreateStore(V, Ptr);
-    Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+    Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
     B.CreateStore(B.getInt8(0), Ptr);
 
     return ConstantInt::get(CI->getType(), 1);
   }
 
   if (FormatStr[1] == 's') {
-    // These optimizations require DataLayout.
-    if (!DL)
-      return nullptr;
-
     // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
     if (!CI->getArgOperand(2)->getType()->isPointerTy())
       return nullptr;
@@ -1702,13 +1742,9 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
       if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
         return nullptr;        // We found a format specifier.
 
-    // These optimizations require DataLayout.
-    if (!DL)
-      return nullptr;
-
     return EmitFWrite(
         CI->getArgOperand(1),
-        ConstantInt::get(DL->getIntPtrType(CI->getContext()), FormatStr.size()),
+        ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()),
         CI->getArgOperand(0), B, DL, TLI);
   }
 
@@ -1723,14 +1759,14 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
     // fprintf(F, "%c", chr) --> fputc(chr, F)
     if (!CI->getArgOperand(2)->getType()->isIntegerTy())
       return nullptr;
-    return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
+    return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
   }
 
   if (FormatStr[1] == 's') {
     // fprintf(F, "%s", str) --> fputs(str, F)
     if (!CI->getArgOperand(2)->getType()->isPointerTy())
       return nullptr;
-    return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
+    return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
   }
   return nullptr;
 }
@@ -1790,7 +1826,7 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
   // This optimisation is only valid, if the return value is unused.
   if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
     Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
-    Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI);
+    Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TLI);
     return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
   }
 
@@ -1802,10 +1838,6 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
 
   Function *Callee = CI->getCalledFunction();
 
-  // These optimizations require DataLayout.
-  if (!DL)
-    return nullptr;
-
   // Require two pointers.  Also, we can't optimize if return value is used.
   FunctionType *FT = Callee->getFunctionType();
   if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
@@ -1820,7 +1852,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
   // Known to have no uses (see above).
   return EmitFWrite(
       CI->getArgOperand(0),
-      ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len - 1),
+      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1),
       CI->getArgOperand(1), B, DL, TLI);
 }
 
@@ -1839,7 +1871,7 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
 
   if (Str.empty() && CI->use_empty()) {
     // puts("") -> putchar('\n')
-    Value *Res = EmitPutChar(B.getInt32('\n'), B, DL, TLI);
+    Value *Res = EmitPutChar(B.getInt32('\n'), B, TLI);
     if (CI->use_empty() || !Res)
       return Res;
     return B.CreateIntCast(Res, CI->getType(), true);
@@ -1906,6 +1938,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
       return optimizeStrCSpn(CI, Builder);
     case LibFunc::strstr:
       return optimizeStrStr(CI, Builder);
+    case LibFunc::memchr:
+      return optimizeMemChr(CI, Builder);
     case LibFunc::memcmp:
       return optimizeMemCmp(CI, Builder);
     case LibFunc::memcpy:
@@ -2088,15 +2122,19 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
   return nullptr;
 }
 
-LibCallSimplifier::LibCallSimplifier(const DataLayout *DL,
-                                     const TargetLibraryInfo *TLI) :
-                                     FortifiedSimplifier(DL, TLI),
-                                     DL(DL),
-                                     TLI(TLI),
-                                     UnsafeFPShrink(false) {
+LibCallSimplifier::LibCallSimplifier(
+    const DataLayout &DL, const TargetLibraryInfo *TLI,
+    function_ref<void(Instruction *, Value *)> Replacer)
+    : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false),
+      Replacer(Replacer) {}
+
+void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
+  // Indirect through the replacer used in this instance.
+  Replacer(I, With);
 }
 
-void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
+/*static*/ void LibCallSimplifier::replaceAllUsesWithDefault(Instruction *I,
+                                                             Value *With) {
   I->replaceAllUsesWith(With);
   I->eraseFromParent();
 }
@@ -2183,7 +2221,7 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
 Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
 
-  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk, DL))
+  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk))
     return nullptr;
 
   if (isFortifiedCallFoldable(CI, 3, 2, false)) {
@@ -2197,7 +2235,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &
 Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
 
-  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk, DL))
+  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk))
     return nullptr;
 
   if (isFortifiedCallFoldable(CI, 3, 2, false)) {
@@ -2211,7 +2249,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<>
 Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
 
-  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk, DL))
+  if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk))
     return nullptr;
 
   if (isFortifiedCallFoldable(CI, 3, 2, false)) {
@@ -2227,8 +2265,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
                                                       LibFunc::Func Func) {
   Function *Callee = CI->getCalledFunction();
   StringRef Name = Callee->getName();
+  const DataLayout &DL = CI->getModule()->getDataLayout();
 
-  if (!checkStringCopyLibFuncSignature(Callee, Func, DL))
+  if (!checkStringCopyLibFuncSignature(Callee, Func))
     return nullptr;
 
   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
@@ -2237,7 +2276,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
   // __stpcpy_chk(x,x,...)  -> x+strlen(x)
   if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
     Value *StrLen = EmitStrLen(Src, B, DL, TLI);
-    return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+    return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
   }
 
   // If a) we don't have any length information, or b) we know this will
@@ -2245,29 +2284,25 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
   // st[rp]cpy_chk call which may fail at runtime if the size is too long.
   // TODO: It might be nice to get a maximum length out of the possible
   // string lengths for varying.
-  if (isFortifiedCallFoldable(CI, 2, 1, true)) {
-    Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
-    return Ret;
-  } else if (!OnlyLowerUnknownSize) {
-    // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
-    uint64_t Len = GetStringLength(Src);
-    if (Len == 0)
-      return nullptr;
+  if (isFortifiedCallFoldable(CI, 2, 1, true))
+    return EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
 
-    // This optimization requires DataLayout.
-    if (!DL)
-      return nullptr;
+  if (OnlyLowerUnknownSize)
+    return nullptr;
 
-    Type *SizeTTy = DL->getIntPtrType(CI->getContext());
-    Value *LenV = ConstantInt::get(SizeTTy, Len);
-    Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
-    // If the function was an __stpcpy_chk, and we were able to fold it into
-    // a __memcpy_chk, we still need to return the correct end pointer.
-    if (Ret && Func == LibFunc::stpcpy_chk)
-      return B.CreateGEP(Dst, ConstantInt::get(SizeTTy, Len - 1));
-    return Ret;
-  }
-  return nullptr;
+  // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
+  uint64_t Len = GetStringLength(Src);
+  if (Len == 0)
+    return nullptr;
+
+  Type *SizeTTy = DL.getIntPtrType(CI->getContext());
+  Value *LenV = ConstantInt::get(SizeTTy, Len);
+  Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
+  // If the function was an __stpcpy_chk, and we were able to fold it into
+  // a __memcpy_chk, we still need to return the correct end pointer.
+  if (Ret && Func == LibFunc::stpcpy_chk)
+    return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1));
+  return Ret;
 }
 
 Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
@@ -2276,20 +2311,29 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
   Function *Callee = CI->getCalledFunction();
   StringRef Name = Callee->getName();
 
-  if (!checkStringCopyLibFuncSignature(Callee, Func, DL))
+  if (!checkStringCopyLibFuncSignature(Callee, Func))
     return nullptr;
   if (isFortifiedCallFoldable(CI, 3, 2, false)) {
-    Value *Ret =
-        EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                    CI->getArgOperand(2), B, DL, TLI, Name.substr(2, 7));
+    Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                             CI->getArgOperand(2), B, TLI, Name.substr(2, 7));
     return Ret;
   }
   return nullptr;
 }
 
 Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
-  if (CI->isNoBuiltin())
-    return nullptr;
+  // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
+  // Some clang users checked for _chk libcall availability using:
+  //   __has_builtin(__builtin___memcpy_chk)
+  // When compiling with -fno-builtin, this is always true.
+  // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we
+  // end up with fortified libcalls, which isn't acceptable in a freestanding
+  // environment which only provides their non-fortified counterparts.
+  //
+  // Until we change clang and/or teach external users to check for availability
+  // differently, disregard the "nobuiltin" attribute and TLI::has.
+  //
+  // PR23093.
 
   LibFunc::Func Func;
   Function *Callee = CI->getCalledFunction();
@@ -2298,7 +2342,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
   bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
 
   // First, check that this is a known library functions.
-  if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func))
+  if (!TLI->getLibFunc(FuncName, Func))
     return nullptr;
 
   // We never change the calling convention.
@@ -2324,8 +2368,6 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
   return nullptr;
 }
 
-FortifiedLibCallSimplifier::
-FortifiedLibCallSimplifier(const DataLayout *DL, const TargetLibraryInfo *TLI,
-                           bool OnlyLowerUnknownSize)
-  : DL(DL), TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {
-}
+FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
+    const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
+    : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index d36283e..a2a54da 100644
--- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -60,7 +60,8 @@
 #define DEBUG_TYPE "symbol-rewriter"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Pass.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -72,15 +73,15 @@
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
 
 using namespace llvm;
+using namespace SymbolRewriter;
 
 static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
                                              cl::desc("Symbol Rewrite Map"),
                                              cl::value_desc("filename"));
 
-namespace llvm {
-namespace SymbolRewriter {
-void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source,
-                   const std::string &Target) {
+static void rewriteComdat(Module &M, GlobalObject *GO,
+                          const std::string &Source,
+                          const std::string &Target) {
   if (Comdat *CD = GO->getComdat()) {
     auto &Comdats = M.getComdatSymbolTable();
 
@@ -92,6 +93,7 @@ void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source,
   }
 }
 
+namespace {
 template <RewriteDescriptor::Type DT, typename ValueType,
           ValueType *(llvm::Module::*Get)(StringRef) const>
 class ExplicitRewriteDescriptor : public RewriteDescriptor {
@@ -226,6 +228,7 @@ typedef PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias,
                                  &llvm::Module::getNamedAlias,
                                  &llvm::Module::aliases>
     PatternRewriteNamedAliasDescriptor;
+} // namespace
 
 bool RewriteMapParser::parse(const std::string &MapFile,
                              RewriteDescriptorList *DL) {
@@ -489,8 +492,6 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
 
   return true;
 }
-}
-}
 
 namespace {
 class RewriteSymbols : public ModulePass {
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 65f2ae2..cac80ac 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InlineAsm.h"
@@ -155,13 +156,12 @@ static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD) {
 }
 
 static Metadata *MapMetadataImpl(const Metadata *MD,
-                                 SmallVectorImpl<UniquableMDNode *> &Cycles,
+                                 SmallVectorImpl<MDNode *> &Cycles,
                                  ValueToValueMapTy &VM, RemapFlags Flags,
                                  ValueMapTypeRemapper *TypeMapper,
                                  ValueMaterializer *Materializer);
 
-static Metadata *mapMetadataOp(Metadata *Op,
-                               SmallVectorImpl<UniquableMDNode *> &Cycles,
+static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl<MDNode *> &Cycles,
                                ValueToValueMapTy &VM, RemapFlags Flags,
                                ValueMapTypeRemapper *TypeMapper,
                                ValueMaterializer *Materializer) {
@@ -182,157 +182,85 @@ static Metadata *mapMetadataOp(Metadata *Op,
   return nullptr;
 }
 
-static Metadata *cloneMDTuple(const MDTuple *Node,
-                              SmallVectorImpl<UniquableMDNode *> &Cycles,
-                              ValueToValueMapTy &VM, RemapFlags Flags,
-                              ValueMapTypeRemapper *TypeMapper,
-                              ValueMaterializer *Materializer,
-                              bool IsDistinct) {
-  // Distinct MDTuples have their own code path.
-  assert(!IsDistinct && "Unexpected distinct tuple");
-  (void)IsDistinct;
-
-  SmallVector<Metadata *, 4> Elts;
-  Elts.reserve(Node->getNumOperands());
-  for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
-    Elts.push_back(mapMetadataOp(Node->getOperand(I), Cycles, VM, Flags,
-                                 TypeMapper, Materializer));
-
-  return MDTuple::get(Node->getContext(), Elts);
-}
-
-static Metadata *cloneMDLocation(const MDLocation *Node,
-                                 SmallVectorImpl<UniquableMDNode *> &Cycles,
-                                 ValueToValueMapTy &VM, RemapFlags Flags,
-                                 ValueMapTypeRemapper *TypeMapper,
-                                 ValueMaterializer *Materializer,
-                                 bool IsDistinct) {
-  return (IsDistinct ? MDLocation::getDistinct : MDLocation::get)(
-      Node->getContext(), Node->getLine(), Node->getColumn(),
-      mapMetadataOp(Node->getScope(), Cycles, VM, Flags, TypeMapper,
-                    Materializer),
-      mapMetadataOp(Node->getInlinedAt(), Cycles, VM, Flags, TypeMapper,
-                    Materializer));
-}
-
-static Metadata *cloneMDNode(const UniquableMDNode *Node,
-                             SmallVectorImpl<UniquableMDNode *> &Cycles,
-                             ValueToValueMapTy &VM, RemapFlags Flags,
-                             ValueMapTypeRemapper *TypeMapper,
-                             ValueMaterializer *Materializer, bool IsDistinct) {
-  switch (Node->getMetadataID()) {
-  default:
-    llvm_unreachable("Invalid UniquableMDNode subclass");
-#define HANDLE_UNIQUABLE_LEAF(CLASS)                                           \
-  case Metadata::CLASS##Kind:                                                  \
-    return clone##CLASS(cast<CLASS>(Node), Cycles, VM, Flags, TypeMapper,      \
-                        Materializer, IsDistinct);
-#include "llvm/IR/Metadata.def"
+/// \brief Remap nodes.
+///
+/// Insert \c NewNode in the value map, and then remap \c OldNode's operands.
+/// Assumes that \c NewNode is already a clone of \c OldNode.
+///
+/// \pre \c NewNode is a clone of \c OldNode.
+static bool remap(const MDNode *OldNode, MDNode *NewNode,
+                  SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM,
+                  RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
+                  ValueMaterializer *Materializer) {
+  assert(OldNode->getNumOperands() == NewNode->getNumOperands() &&
+         "Expected nodes to match");
+  assert(OldNode->isResolved() && "Expected resolved node");
+  assert(!NewNode->isUniqued() && "Expected non-uniqued node");
+
+  // Map the node upfront so it's available for cyclic references.
+  mapToMetadata(VM, OldNode, NewNode);
+  bool AnyChanged = false;
+  for (unsigned I = 0, E = OldNode->getNumOperands(); I != E; ++I) {
+    Metadata *Old = OldNode->getOperand(I);
+    assert(NewNode->getOperand(I) == Old &&
+           "Expected old operands to already be in place");
+
+    Metadata *New = mapMetadataOp(OldNode->getOperand(I), Cycles, VM, Flags,
+                                  TypeMapper, Materializer);
+    if (Old != New) {
+      AnyChanged = true;
+      NewNode->replaceOperandWith(I, New);
+    }
   }
-}
 
-static void
-trackCyclesUnderDistinct(const UniquableMDNode *Node,
-                         SmallVectorImpl<UniquableMDNode *> &Cycles) {
-  // Track any cycles beneath this node.
-  for (Metadata *Op : Node->operands())
-    if (auto *N = dyn_cast_or_null<UniquableMDNode>(Op))
-      if (!N->isResolved())
-        Cycles.push_back(N);
+  return AnyChanged;
 }
 
 /// \brief Map a distinct MDNode.
 ///
 /// Distinct nodes are not uniqued, so they must always recreated.
-static Metadata *mapDistinctNode(const UniquableMDNode *Node,
-                                 SmallVectorImpl<UniquableMDNode *> &Cycles,
+static Metadata *mapDistinctNode(const MDNode *Node,
+                                 SmallVectorImpl<MDNode *> &Cycles,
                                  ValueToValueMapTy &VM, RemapFlags Flags,
                                  ValueMapTypeRemapper *TypeMapper,
                                  ValueMaterializer *Materializer) {
   assert(Node->isDistinct() && "Expected distinct node");
 
-  // Optimization for MDTuples.
-  if (isa<MDTuple>(Node)) {
-    // Create the node first so it's available for cyclical references.
-    SmallVector<Metadata *, 4> EmptyOps(Node->getNumOperands());
-    MDTuple *NewMD = MDTuple::getDistinct(Node->getContext(), EmptyOps);
-    mapToMetadata(VM, Node, NewMD);
-
-    // Fix the operands.
-    for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
-      NewMD->replaceOperandWith(I,
-                                mapMetadataOp(Node->getOperand(I), Cycles, VM,
-                                              Flags, TypeMapper, Materializer));
-
-    trackCyclesUnderDistinct(NewMD, Cycles);
-    return NewMD;
-  }
+  MDNode *NewMD = MDNode::replaceWithDistinct(Node->clone());
+  remap(Node, NewMD, Cycles, VM, Flags, TypeMapper, Materializer);
 
-  // In general we need a dummy node, since whether the operands are null can
-  // affect the size of the node.
-  std::unique_ptr<MDNodeFwdDecl> Dummy(
-      MDNode::getTemporary(Node->getContext(), None));
-  mapToMetadata(VM, Node, Dummy.get());
-  auto *NewMD = cast<UniquableMDNode>(cloneMDNode(Node, Cycles, VM, Flags,
-                                                  TypeMapper, Materializer,
-                                                  /* IsDistinct */ true));
-  Dummy->replaceAllUsesWith(NewMD);
-  trackCyclesUnderDistinct(NewMD, Cycles);
-  return mapToMetadata(VM, Node, NewMD);
-}
+  // Track any cycles beneath this node.
+  for (Metadata *Op : NewMD->operands())
+    if (auto *Node = dyn_cast_or_null<MDNode>(Op))
+      if (!Node->isResolved())
+        Cycles.push_back(Node);
 
-/// \brief Check whether a uniqued node needs to be remapped.
-///
-/// Check whether a uniqued node needs to be remapped (due to any operands
-/// changing).
-static bool shouldRemapUniquedNode(const UniquableMDNode *Node,
-                                   SmallVectorImpl<UniquableMDNode *> &Cycles,
-                                   ValueToValueMapTy &VM, RemapFlags Flags,
-                                   ValueMapTypeRemapper *TypeMapper,
-                                   ValueMaterializer *Materializer) {
-  // Check all operands to see if any need to be remapped.
-  for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) {
-    Metadata *Op = Node->getOperand(I);
-    if (Op != mapMetadataOp(Op, Cycles, VM, Flags, TypeMapper, Materializer))
-      return true;
-  }
-  return false;
+  return NewMD;
 }
 
 /// \brief Map a uniqued MDNode.
 ///
 /// Uniqued nodes may not need to be recreated (they may map to themselves).
-static Metadata *mapUniquedNode(const UniquableMDNode *Node,
-                                SmallVectorImpl<UniquableMDNode *> &Cycles,
+static Metadata *mapUniquedNode(const MDNode *Node,
+                                SmallVectorImpl<MDNode *> &Cycles,
                                 ValueToValueMapTy &VM, RemapFlags Flags,
                                 ValueMapTypeRemapper *TypeMapper,
                                 ValueMaterializer *Materializer) {
-  assert(!Node->isDistinct() && "Expected uniqued node");
-
-  // Create a dummy node in case we have a metadata cycle.
-  MDNodeFwdDecl *Dummy = MDNode::getTemporary(Node->getContext(), None);
-  mapToMetadata(VM, Node, Dummy);
-
-  // Check all operands to see if any need to be remapped.
-  if (!shouldRemapUniquedNode(Node, Cycles, VM, Flags, TypeMapper,
-                              Materializer)) {
-    // Use an identity mapping.
-    mapToSelf(VM, Node);
-    MDNode::deleteTemporary(Dummy);
-    return const_cast<Metadata *>(static_cast<const Metadata *>(Node));
-  }
+  assert(Node->isUniqued() && "Expected uniqued node");
 
-  // At least one operand needs remapping.
-  Metadata *NewMD =
-      cloneMDNode(Node, Cycles, VM, Flags, TypeMapper, Materializer,
-                  /* IsDistinct */ false);
-  Dummy->replaceAllUsesWith(NewMD);
-  MDNode::deleteTemporary(Dummy);
-  return mapToMetadata(VM, Node, NewMD);
+  // Create a temporary node upfront in case we have a metadata cycle.
+  auto ClonedMD = Node->clone();
+  if (!remap(Node, ClonedMD.get(), Cycles, VM, Flags, TypeMapper, Materializer))
+    // No operands changed, so use the identity mapping.
+    return mapToSelf(VM, Node);
+
+  // At least one operand has changed, so uniquify the cloned node.
+  return mapToMetadata(VM, Node,
+                       MDNode::replaceWithUniqued(std::move(ClonedMD)));
 }
 
 static Metadata *MapMetadataImpl(const Metadata *MD,
-                                 SmallVectorImpl<UniquableMDNode *> &Cycles,
+                                 SmallVectorImpl<MDNode *> &Cycles,
                                  ValueToValueMapTy &VM, RemapFlags Flags,
                                  ValueMapTypeRemapper *TypeMapper,
                                  ValueMaterializer *Materializer) {
@@ -364,14 +292,18 @@ static Metadata *MapMetadataImpl(const Metadata *MD,
     return nullptr;
   }
 
-  const UniquableMDNode *Node = cast<UniquableMDNode>(MD);
-  assert(Node->isResolved() && "Unexpected unresolved node");
+  // Note: this cast precedes the Flags check so we always get its associated
+  // assertion.
+  const MDNode *Node = cast<MDNode>(MD);
 
   // If this is a module-level metadata and we know that nothing at the
   // module level is changing, then use an identity mapping.
   if (Flags & RF_NoModuleLevelChanges)
     return mapToSelf(VM, MD);
 
+  // Require resolved nodes whenever metadata might be remapped.
+  assert(Node->isResolved() && "Unexpected unresolved node");
+
   if (Node->isDistinct())
     return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
 
@@ -381,17 +313,19 @@ static Metadata *MapMetadataImpl(const Metadata *MD,
 Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
                             RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
                             ValueMaterializer *Materializer) {
-  SmallVector<UniquableMDNode *, 8> Cycles;
+  SmallVector<MDNode *, 8> Cycles;
   Metadata *NewMD =
       MapMetadataImpl(MD, Cycles, VM, Flags, TypeMapper, Materializer);
 
   // Resolve cycles underneath MD.
   if (NewMD && NewMD != MD) {
-    if (auto *N = dyn_cast<UniquableMDNode>(NewMD))
-      N->resolveCycles();
+    if (auto *N = dyn_cast<MDNode>(NewMD))
+      if (!N->isResolved())
+        N->resolveCycles();
 
-    for (UniquableMDNode *N : Cycles)
-      N->resolveCycles();
+    for (MDNode *N : Cycles)
+      if (!N->isResolved())
+        N->resolveCycles();
   } else {
     // Shouldn't get unresolved cycles if nothing was remapped.
     assert(Cycles.empty() && "Expected no unresolved cycles");
@@ -450,7 +384,24 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
       I->setMetadata(MI->first, New);
   }
   
+  if (!TypeMapper)
+    return;
+
   // If the instruction's type is being remapped, do so now.
-  if (TypeMapper)
-    I->mutateType(TypeMapper->remapType(I->getType()));
+  if (auto CS = CallSite(I)) {
+    SmallVector<Type *, 3> Tys;
+    FunctionType *FTy = CS.getFunctionType();
+    Tys.reserve(FTy->getNumParams());
+    for (Type *Ty : FTy->params())
+      Tys.push_back(TypeMapper->remapType(Ty));
+    CS.mutateFunctionType(FunctionType::get(
+        TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg()));
+    return;
+  }
+  if (auto *AI = dyn_cast<AllocaInst>(I))
+    AI->setAllocatedType(TypeMapper->remapType(AI->getAllocatedType()));
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+    GEP->setSourceElementType(
+        TypeMapper->remapType(GEP->getSourceElementType()));
+  I->mutateType(TypeMapper->remapType(I->getType()));
 }
diff --git a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index a0ccf9d..215d6f9 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -39,6 +39,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
@@ -201,14 +202,14 @@ namespace {
       initializeBBVectorizePass(*PassRegistry::getPassRegistry());
     }
 
-    BBVectorize(Pass *P, const VectorizeConfig &C)
+    BBVectorize(Pass *P, Function &F, const VectorizeConfig &C)
       : BasicBlockPass(ID), Config(C) {
       AA = &P->getAnalysis<AliasAnalysis>();
       DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
       SE = &P->getAnalysis<ScalarEvolution>();
-      DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>();
-      DL = DLP ? &DLP->getDataLayout() : nullptr;
-      TTI = IgnoreTargetInfo ? nullptr : &P->getAnalysis<TargetTransformInfo>();
+      TTI = IgnoreTargetInfo
+                ? nullptr
+                : &P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
     }
 
     typedef std::pair<Value *, Value *> ValuePair;
@@ -220,7 +221,6 @@ namespace {
     AliasAnalysis *AA;
     DominatorTree *DT;
     ScalarEvolution *SE;
-    const DataLayout *DL;
     const TargetTransformInfo *TTI;
 
     // FIXME: const correct?
@@ -440,9 +440,10 @@ namespace {
       AA = &getAnalysis<AliasAnalysis>();
       DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
       SE = &getAnalysis<ScalarEvolution>();
-      DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-      DL = DLP ? &DLP->getDataLayout() : nullptr;
-      TTI = IgnoreTargetInfo ? nullptr : &getAnalysis<TargetTransformInfo>();
+      TTI = IgnoreTargetInfo
+                ? nullptr
+                : &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+                      *BB.getParent());
 
       return vectorizeBB(BB);
     }
@@ -452,7 +453,7 @@ namespace {
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addRequired<ScalarEvolution>();
-      AU.addRequired<TargetTransformInfo>();
+      AU.addRequired<TargetTransformInfoWrapperPass>();
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addPreserved<ScalarEvolution>();
@@ -637,19 +638,19 @@ namespace {
             dyn_cast<SCEVConstant>(OffsetSCEV)) {
         ConstantInt *IntOff = ConstOffSCEV->getValue();
         int64_t Offset = IntOff->getSExtValue();
-
+        const DataLayout &DL = I->getModule()->getDataLayout();
         Type *VTy = IPtr->getType()->getPointerElementType();
-        int64_t VTyTSS = (int64_t) DL->getTypeStoreSize(VTy);
+        int64_t VTyTSS = (int64_t)DL.getTypeStoreSize(VTy);
 
         Type *VTy2 = JPtr->getType()->getPointerElementType();
         if (VTy != VTy2 && Offset < 0) {
-          int64_t VTy2TSS = (int64_t) DL->getTypeStoreSize(VTy2);
+          int64_t VTy2TSS = (int64_t)DL.getTypeStoreSize(VTy2);
           OffsetInElmts = Offset/VTy2TSS;
-          return (abs64(Offset) % VTy2TSS) == 0;
+          return (std::abs(Offset) % VTy2TSS) == 0;
         }
 
         OffsetInElmts = Offset/VTyTSS;
-        return (abs64(Offset) % VTyTSS) == 0;
+        return (std::abs(Offset) % VTyTSS) == 0;
       }
 
       return false;
@@ -661,7 +662,7 @@ namespace {
       Function *F = I->getCalledFunction();
       if (!F) return false;
 
-      Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
+      Intrinsic::ID IID = F->getIntrinsicID();
       if (!IID) return false;
 
       switch(IID) {
@@ -841,7 +842,7 @@ namespace {
 
     // It is important to cleanup here so that future iterations of this
     // function have less work to do.
-    (void) SimplifyInstructionsInBlock(&BB, DL, AA->getTargetLibraryInfo());
+    (void)SimplifyInstructionsInBlock(&BB, AA->getTargetLibraryInfo());
     return true;
   }
 
@@ -895,10 +896,6 @@ namespace {
       return false;
     }
 
-    // We can't vectorize memory operations without target data
-    if (!DL && IsSimpleLoadStore)
-      return false;
-
     Type *T1, *T2;
     getInstructionTypes(I, T1, T2);
 
@@ -933,9 +930,8 @@ namespace {
     if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy())
       return false;
 
-    if ((!Config.VectorizePointers || !DL) &&
-        (T1->getScalarType()->isPointerTy() ||
-         T2->getScalarType()->isPointerTy()))
+    if (!Config.VectorizePointers && (T1->getScalarType()->isPointerTy() ||
+                                      T2->getScalarType()->isPointerTy()))
       return false;
 
     if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
@@ -980,8 +976,8 @@ namespace {
       unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
       int64_t OffsetInElmts = 0;
       if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
-            IAddressSpace, JAddressSpace,
-            OffsetInElmts) && abs64(OffsetInElmts) == 1) {
+                         IAddressSpace, JAddressSpace, OffsetInElmts) &&
+          std::abs(OffsetInElmts) == 1) {
         FixedOrder = (int) OffsetInElmts;
         unsigned BottomAlignment = IAlignment;
         if (OffsetInElmts < 0) BottomAlignment = JAlignment;
@@ -996,8 +992,8 @@ namespace {
           // An aligned load or store is possible only if the instruction
           // with the lower offset has an alignment suitable for the
           // vector type.
-
-          unsigned VecAlignment = DL->getPrefTypeAlignment(VType);
+          const DataLayout &DL = I->getModule()->getDataLayout();
+          unsigned VecAlignment = DL.getPrefTypeAlignment(VType);
           if (BottomAlignment < VecAlignment)
             return false;
         }
@@ -1102,7 +1098,7 @@ namespace {
     CallInst *CI = dyn_cast<CallInst>(I);
     Function *FI;
     if (CI && (FI = CI->getCalledFunction())) {
-      Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID();
+      Intrinsic::ID IID = FI->getIntrinsicID();
       if (IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
           IID == Intrinsic::cttz) {
         Value *A1I = CI->getArgOperand(1),
@@ -1277,7 +1273,7 @@ namespace {
             CostSavings, FixedOrder)) continue;
 
         // J is a candidate for merging with I.
-        if (!PairableInsts.size() ||
+        if (PairableInsts.empty() ||
              PairableInsts[PairableInsts.size()-1] != I) {
           PairableInsts.push_back(I);
         }
@@ -2774,7 +2770,7 @@ namespace {
         continue;
       } else if (isa<CallInst>(I)) {
         Function *F = cast<CallInst>(I)->getCalledFunction();
-        Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
+        Intrinsic::ID IID = F->getIntrinsicID();
         if (o == NumOperands-1) {
           BasicBlock &BB = *I->getParent();
 
@@ -3107,7 +3103,17 @@ namespace {
       else if (H->hasName())
         K->takeName(H);
 
-      if (!isa<StoreInst>(K))
+      if (auto CS = CallSite(K)) {
+        SmallVector<Type *, 3> Tys;
+        FunctionType *Old = CS.getFunctionType();
+        unsigned NumOld = Old->getNumParams();
+        assert(NumOld <= ReplacedOperands.size());
+        for (unsigned i = 0; i != NumOld; ++i)
+          Tys.push_back(ReplacedOperands[i]->getType());
+        CS.mutateFunctionType(
+            FunctionType::get(getVecTypeForPair(L->getType(), H->getType()),
+                              Tys, Old->isVarArg()));
+      } else if (!isa<StoreInst>(K))
         K->mutateType(getVecTypeForPair(L->getType(), H->getType()));
 
       unsigned KnownIDs[] = {
@@ -3192,7 +3198,7 @@ char BBVectorize::ID = 0;
 static const char bb_vectorize_name[] = "Basic-Block Vectorization";
 INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
@@ -3203,7 +3209,7 @@ BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
 
 bool
 llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) {
-  BBVectorize BBVectorizer(P, C);
+  BBVectorize BBVectorizer(P, *BB.getParent(), C);
   return BBVectorizer.vectorizeBB(BB);
 }
 
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 47b92a3..011fd0f 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -58,6 +58,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -92,6 +93,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/VectorUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 #include <algorithm>
 #include <map>
 #include <tuple>
@@ -105,15 +107,6 @@ using namespace llvm::PatternMatch;
 STATISTIC(LoopsVectorized, "Number of loops vectorized");
 STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
 
-static cl::opt<unsigned>
-VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
-                    cl::desc("Sets the SIMD width. Zero is autoselect."));
-
-static cl::opt<unsigned>
-VectorizationInterleave("force-vector-interleave", cl::init(0), cl::Hidden,
-                    cl::desc("Sets the vectorization interleave count. "
-                             "Zero is autoselect."));
-
 static cl::opt<bool>
 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
                    cl::desc("Enable if-conversion during vectorization."));
@@ -144,13 +137,6 @@ static cl::opt<bool> EnableMemAccessVersioning(
 /// We don't unroll loops with a known constant trip count below this number.
 static const unsigned TinyTripCountUnrollThreshold = 128;
 
-/// When performing memory disambiguation checks at runtime do not make more
-/// than this number of comparisons.
-static const unsigned RuntimeMemoryCheckThreshold = 8;
-
-/// Maximum simd width.
-static const unsigned MaxVectorWidth = 64;
-
 static cl::opt<unsigned> ForceTargetNumScalarRegs(
     "force-target-num-scalar-regs", cl::init(0), cl::Hidden,
     cl::desc("A flag that overrides the target's number of scalar registers."));
@@ -218,29 +204,30 @@ class LoopVectorizationLegality;
 class LoopVectorizationCostModel;
 class LoopVectorizeHints;
 
-/// Optimization analysis message produced during vectorization. Messages inform
-/// the user why vectorization did not occur.
-class Report {
-  std::string Message;
-  raw_string_ostream Out;
-  Instruction *Instr;
-
+/// \brief This modifies LoopAccessReport to initialize message with
+/// loop-vectorizer-specific part.
+class VectorizationReport : public LoopAccessReport {
 public:
-  Report(Instruction *I = nullptr) : Out(Message), Instr(I) {
-    Out << "loop not vectorized: ";
-  }
-
-  template <typename A> Report &operator<<(const A &Value) {
-    Out << Value;
-    return *this;
-  }
-
-  Instruction *getInstr() { return Instr; }
-
-  std::string &str() { return Out.str(); }
-  operator Twine() { return Out.str(); }
+  VectorizationReport(Instruction *I = nullptr)
+      : LoopAccessReport("loop not vectorized: ", I) {}
+
+  /// \brief This allows promotion of the loop-access analysis report into the
+  /// loop-vectorizer report.  It modifies the message to add the
+  /// loop-vectorizer-specific part of the message.
+  explicit VectorizationReport(const LoopAccessReport &R)
+      : LoopAccessReport(Twine("loop not vectorized: ") + R.str(),
+                         R.getInstr()) {}
 };
 
+/// A helper function for converting Scalar types to vector types.
+/// If the incoming type is void, we return void. If the VF is 1, we return
+/// the scalar type.
+static Type* ToVectorTy(Type *Scalar, unsigned VF) {
+  if (Scalar->isVoidTy() || VF == 1)
+    return Scalar;
+  return VectorType::get(Scalar, VF);
+}
+
 /// InnerLoopVectorizer vectorizes loops which contain only one basic
 /// block to a specified vectorization factor (VF).
 /// This class performs the widening of scalars into vectors, or multiple
@@ -258,13 +245,13 @@ public:
 class InnerLoopVectorizer {
 public:
   InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
-                      DominatorTree *DT, const DataLayout *DL,
-                      const TargetLibraryInfo *TLI, unsigned VecWidth,
+                      DominatorTree *DT, const TargetLibraryInfo *TLI,
+                      const TargetTransformInfo *TTI, unsigned VecWidth,
                       unsigned UnrollFactor)
-      : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI),
+      : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
         VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()),
         Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor),
-        Legal(nullptr) {}
+        Legal(nullptr), AddedSafetyChecks(false) {}
 
   // Perform the actual loop widening (vectorization).
   void vectorize(LoopVectorizationLegality *L) {
@@ -278,6 +265,11 @@ public:
     updateAnalysis();
   }
 
+  // Return true if any runtime check is added.
+  bool IsSafetyChecksAdded() {
+    return AddedSafetyChecks;
+  }
+
   virtual ~InnerLoopVectorizer() {}
 
 protected:
@@ -288,19 +280,12 @@ protected:
   /// originated from one scalar instruction.
   typedef SmallVector<Value*, 2> VectorParts;
 
-  // When we if-convert we need create edge masks. We have to cache values so
-  // that we don't end up with exponential recursion/IR.
+  // When we if-convert we need to create edge masks. We have to cache values
+  // so that we don't end up with exponential recursion/IR.
   typedef DenseMap<std::pair<BasicBlock*, BasicBlock*>,
                    VectorParts> EdgeMaskCache;
 
-  /// \brief Add code that checks at runtime if the accessed arrays overlap.
-  ///
-  /// Returns a pair of instructions where the first element is the first
-  /// instruction generated in possibly a sequence of instructions and the
-  /// second value is the final comparator value or NULL if no check is needed.
-  std::pair<Instruction *, Instruction *> addRuntimeCheck(Instruction *Loc);
-
-  /// \brief Add checks for strides that where assumed to be 1.
+  /// \brief Add checks for strides that were assumed to be 1.
   ///
   /// Returns the last check instruction and the first check instruction in the
   /// pair as (first, last).
@@ -355,10 +340,9 @@ protected:
   /// element.
   virtual Value *getBroadcastInstrs(Value *V);
 
-  /// This function adds 0, 1, 2 ... to each vector element, starting at zero.
-  /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
-  /// The sequence starts at StartIndex.
-  virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
+  /// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...)
+  /// to each vector element of Val. The sequence starts at StartIndex.
+  virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step);
 
   /// When we go over instructions in the basic block we rely on previous
   /// values within the current basic block or on loop invariant values.
@@ -420,10 +404,10 @@ protected:
   DominatorTree *DT;
   /// Alias Analysis.
   AliasAnalysis *AA;
-  /// Data Layout.
-  const DataLayout *DL;
   /// Target Library Info.
   const TargetLibraryInfo *TLI;
+  /// Target Transform Info.
+  const TargetTransformInfo *TTI;
 
   /// The vectorization SIMD factor to use. Each vector will have this many
   /// vector elements.
@@ -465,21 +449,24 @@ protected:
   EdgeMaskCache MaskCache;
 
   LoopVectorizationLegality *Legal;
+
+  // Record whether runtime check is added.
+  bool AddedSafetyChecks;
 };
 
 class InnerLoopUnroller : public InnerLoopVectorizer {
 public:
   InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
-                    DominatorTree *DT, const DataLayout *DL,
-                    const TargetLibraryInfo *TLI, unsigned UnrollFactor) :
-    InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
+                    DominatorTree *DT, const TargetLibraryInfo *TLI,
+                    const TargetTransformInfo *TTI, unsigned UnrollFactor)
+      : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor) {}
 
 private:
   void scalarizeInstruction(Instruction *Instr,
                             bool IfPredicateStore = false) override;
   void vectorizeMemoryInstruction(Instruction *Instr) override;
   Value *getBroadcastInstrs(Value *V) override;
-  Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate) override;
+  Value *getStepVector(Value *Val, int StartIdx, Value *Step) override;
   Value *reverseVector(Value *Vec) override;
 };
 
@@ -517,9 +504,8 @@ static std::string getDebugLocString(const Loop *L) {
   std::string Result;
   if (L) {
     raw_string_ostream OS(Result);
-    const DebugLoc LoopDbgLoc = L->getStartLoc();
-    if (!LoopDbgLoc.isUnknown())
-      LoopDbgLoc.print(L->getHeader()->getContext(), OS);
+    if (const DebugLoc LoopDbgLoc = L->getStartLoc())
+      LoopDbgLoc.print(OS);
     else
       // Just print the module name.
       OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier();
@@ -574,135 +560,84 @@ static void propagateMetadata(SmallVectorImpl<Value *> &To, const Instruction *F
 /// induction variable and the different reduction variables.
 class LoopVectorizationLegality {
 public:
-  unsigned NumLoads;
-  unsigned NumStores;
-  unsigned NumPredStores;
-
-  LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
-                            DominatorTree *DT, TargetLibraryInfo *TLI,
-                            AliasAnalysis *AA, Function *F,
-                            const TargetTransformInfo *TTI)
-      : NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
-        DT(DT), TLI(TLI), AA(AA), TheFunction(F), TTI(TTI), Induction(nullptr),
-        WidestIndTy(nullptr), HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {
-  }
-
-  /// This enum represents the kinds of reductions that we support.
-  enum ReductionKind {
-    RK_NoReduction, ///< Not a reduction.
-    RK_IntegerAdd,  ///< Sum of integers.
-    RK_IntegerMult, ///< Product of integers.
-    RK_IntegerOr,   ///< Bitwise or logical OR of numbers.
-    RK_IntegerAnd,  ///< Bitwise or logical AND of numbers.
-    RK_IntegerXor,  ///< Bitwise or logical XOR of numbers.
-    RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
-    RK_FloatAdd,    ///< Sum of floats.
-    RK_FloatMult,   ///< Product of floats.
-    RK_FloatMinMax  ///< Min/max implemented in terms of select(cmp()).
-  };
+  LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
+                            TargetLibraryInfo *TLI, AliasAnalysis *AA,
+                            Function *F, const TargetTransformInfo *TTI,
+                            LoopAccessAnalysis *LAA)
+      : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
+        TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), Induction(nullptr),
+        WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
 
   /// This enum represents the kinds of inductions that we support.
   enum InductionKind {
-    IK_NoInduction,         ///< Not an induction variable.
-    IK_IntInduction,        ///< Integer induction variable. Step = 1.
-    IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1.
-    IK_PtrInduction,        ///< Pointer induction var. Step = sizeof(elem).
-    IK_ReversePtrInduction  ///< Reverse ptr indvar. Step = - sizeof(elem).
-  };
-
-  // This enum represents the kind of minmax reduction.
-  enum MinMaxReductionKind {
-    MRK_Invalid,
-    MRK_UIntMin,
-    MRK_UIntMax,
-    MRK_SIntMin,
-    MRK_SIntMax,
-    MRK_FloatMin,
-    MRK_FloatMax
-  };
-
-  /// This struct holds information about reduction variables.
-  struct ReductionDescriptor {
-    ReductionDescriptor() : StartValue(nullptr), LoopExitInstr(nullptr),
-      Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
-
-    ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
-                        MinMaxReductionKind MK)
-        : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
-
-    // The starting value of the reduction.
-    // It does not have to be zero!
-    TrackingVH<Value> StartValue;
-    // The instruction who's value is used outside the loop.
-    Instruction *LoopExitInstr;
-    // The kind of the reduction.
-    ReductionKind Kind;
-    // If this a min/max reduction the kind of reduction.
-    MinMaxReductionKind MinMaxKind;
+    IK_NoInduction,  ///< Not an induction variable.
+    IK_IntInduction, ///< Integer induction variable. Step = C.
+    IK_PtrInduction  ///< Pointer induction var. Step = C / sizeof(elem).
   };
 
-  /// This POD struct holds information about a potential reduction operation.
-  struct ReductionInstDesc {
-    ReductionInstDesc(bool IsRedux, Instruction *I) :
-      IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
-
-    ReductionInstDesc(Instruction *I, MinMaxReductionKind K) :
-      IsReduction(true), PatternLastInst(I), MinMaxKind(K) {}
-
-    // Is this instruction a reduction candidate.
-    bool IsReduction;
-    // The last instruction in a min/max pattern (select of the select(icmp())
-    // pattern), or the current reduction instruction otherwise.
-    Instruction *PatternLastInst;
-    // If this is a min/max pattern the comparison predicate.
-    MinMaxReductionKind MinMaxKind;
-  };
-
-  /// This struct holds information about the memory runtime legality
-  /// check that a group of pointers do not overlap.
-  struct RuntimePointerCheck {
-    RuntimePointerCheck() : Need(false) {}
-
-    /// Reset the state of the pointer runtime information.
-    void reset() {
-      Need = false;
-      Pointers.clear();
-      Starts.clear();
-      Ends.clear();
-      IsWritePtr.clear();
-      DependencySetId.clear();
-      AliasSetId.clear();
+  /// A struct for saving information about induction variables.
+  struct InductionInfo {
+    InductionInfo(Value *Start, InductionKind K, ConstantInt *Step)
+        : StartValue(Start), IK(K), StepValue(Step) {
+      assert(IK != IK_NoInduction && "Not an induction");
+      assert(StartValue && "StartValue is null");
+      assert(StepValue && !StepValue->isZero() && "StepValue is zero");
+      assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
+             "StartValue is not a pointer for pointer induction");
+      assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
+             "StartValue is not an integer for integer induction");
+      assert(StepValue->getType()->isIntegerTy() &&
+             "StepValue is not an integer");
+    }
+    InductionInfo()
+        : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
+
+    /// Get the consecutive direction. Returns:
+    ///   0 - unknown or non-consecutive.
+    ///   1 - consecutive and increasing.
+    ///  -1 - consecutive and decreasing.
+    int getConsecutiveDirection() const {
+      if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
+        return StepValue->getSExtValue();
+      return 0;
     }
 
-    /// Insert a pointer and calculate the start and end SCEVs.
-    void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr,
-                unsigned DepSetId, unsigned ASId, ValueToValueMap &Strides);
-
-    /// This flag indicates if we need to add the runtime check.
-    bool Need;
-    /// Holds the pointers that we need to check.
-    SmallVector<TrackingVH<Value>, 2> Pointers;
-    /// Holds the pointer value at the beginning of the loop.
-    SmallVector<const SCEV*, 2> Starts;
-    /// Holds the pointer value at the end of the loop.
-    SmallVector<const SCEV*, 2> Ends;
-    /// Holds the information if this pointer is used for writing to memory.
-    SmallVector<bool, 2> IsWritePtr;
-    /// Holds the id of the set of pointers that could be dependent because of a
-    /// shared underlying object.
-    SmallVector<unsigned, 2> DependencySetId;
-    /// Holds the id of the disjoint alias set to which this pointer belongs.
-    SmallVector<unsigned, 2> AliasSetId;
-  };
+    /// Compute the transformed value of Index at offset StartValue using step
+    /// StepValue.
+    /// For integer induction, returns StartValue + Index * StepValue.
+    /// For pointer induction, returns StartValue[Index * StepValue].
+    /// FIXME: The newly created binary instructions should contain nsw/nuw
+    /// flags, which can be found from the original scalar operations.
+    Value *transform(IRBuilder<> &B, Value *Index) const {
+      switch (IK) {
+      case IK_IntInduction:
+        assert(Index->getType() == StartValue->getType() &&
+               "Index type does not match StartValue type");
+        if (StepValue->isMinusOne())
+          return B.CreateSub(StartValue, Index);
+        if (!StepValue->isOne())
+          Index = B.CreateMul(Index, StepValue);
+        return B.CreateAdd(StartValue, Index);
+
+      case IK_PtrInduction:
+        if (StepValue->isMinusOne())
+          Index = B.CreateNeg(Index);
+        else if (!StepValue->isOne())
+          Index = B.CreateMul(Index, StepValue);
+        return B.CreateGEP(nullptr, StartValue, Index);
+
+      case IK_NoInduction:
+        return nullptr;
+      }
+      llvm_unreachable("invalid enum");
+    }
 
-  /// A struct for saving information about induction variables.
-  struct InductionInfo {
-    InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
-    InductionInfo() : StartValue(nullptr), IK(IK_NoInduction) {}
     /// Start value.
     TrackingVH<Value> StartValue;
     /// Induction kind.
     InductionKind IK;
+    /// Step value.
+    ConstantInt *StepValue;
   };
 
   /// ReductionList contains the reduction descriptors for all
@@ -754,13 +689,15 @@ public:
   bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
 
   /// Returns the information that we collected about runtime memory check.
-  RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; }
+  const LoopAccessInfo::RuntimePointerCheck *getRuntimePointerCheck() const {
+    return LAI->getRuntimePointerCheck();
+  }
 
-  /// This function returns the identity element (or neutral element) for
-  /// the operation K.
-  static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
+  const LoopAccessInfo *getLAI() const {
+    return LAI;
+  }
 
-  unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
+  unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
 
   bool hasStride(Value *V) { return StrideSet.count(V); }
   bool mustCheckStrides() { return !StrideSet.empty(); }
@@ -784,6 +721,15 @@ public:
   bool isMaskRequired(const Instruction* I) {
     return (MaskedOp.count(I) != 0);
   }
+  unsigned getNumStores() const {
+    return LAI->getNumStores();
+  }
+  unsigned getNumLoads() const {
+    return LAI->getNumLoads();
+  }
+  unsigned getNumPredStores() const {
+    return NumPredStores;
+  }
 private:
   /// Check if a single basic block loop is vectorizable.
   /// At this point we know that this is a loop with a constant trip count
@@ -808,23 +754,9 @@ private:
   /// and we know that we can read from them without segfault.
   bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
 
-  /// Returns True, if 'Phi' is the kind of reduction variable for type
-  /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
-  bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
-  /// Returns a struct describing if the instruction 'I' can be a reduction
-  /// variable of type 'Kind'. If the reduction is a min/max pattern of
-  /// select(icmp()) this function advances the instruction pointer 'I' from the
-  /// compare instruction to the select instruction and stores this pointer in
-  /// 'PatternLastInst' member of the returned struct.
-  ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind,
-                                     ReductionInstDesc &Desc);
-  /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
-  /// pattern corresponding to a min(X, Y) or max(X, Y).
-  static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
-                                                    ReductionInstDesc &Prev);
-  /// Returns the induction kind of Phi. This function may return NoInduction
-  /// if the PHI is not an induction variable.
-  InductionKind isInductionVariable(PHINode *Phi);
+  /// Returns the induction kind of Phi and record the step. This function may
+  /// return NoInduction if the PHI is not an induction variable.
+  InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue);
 
   /// \brief Collect memory access with loop invariant strides.
   ///
@@ -833,31 +765,32 @@ private:
   void collectStridedAccess(Value *LoadOrStoreInst);
 
   /// Report an analysis message to assist the user in diagnosing loops that are
-  /// not vectorized.
-  void emitAnalysis(Report &Message) {
-    DebugLoc DL = TheLoop->getStartLoc();
-    if (Instruction *I = Message.getInstr())
-      DL = I->getDebugLoc();
-    emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE,
-                                   *TheFunction, DL, Message.str());
+  /// not vectorized.  These are handled as LoopAccessReport rather than
+  /// VectorizationReport because the << operator of VectorizationReport returns
+  /// LoopAccessReport.
+  void emitAnalysis(const LoopAccessReport &Message) {
+    LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
   }
 
+  unsigned NumPredStores;
+
   /// The loop that we evaluate.
   Loop *TheLoop;
   /// Scev analysis.
   ScalarEvolution *SE;
-  /// DataLayout analysis.
-  const DataLayout *DL;
-  /// Dominators.
-  DominatorTree *DT;
   /// Target Library Info.
   TargetLibraryInfo *TLI;
-  /// Alias analysis.
-  AliasAnalysis *AA;
   /// Parent function
   Function *TheFunction;
   /// Target Transform Info
   const TargetTransformInfo *TTI;
+  /// Dominator Tree.
+  DominatorTree *DT;
+  // LoopAccess analysis.
+  LoopAccessAnalysis *LAA;
+  // And the loop-accesses info corresponding to this loop.  This pointer is
+  // null until canVectorizeMemory sets it up.
+  const LoopAccessInfo *LAI;
 
   //  ---  vectorization state --- //
 
@@ -879,17 +812,13 @@ private:
   /// This set holds the variables which are known to be uniform after
   /// vectorization.
   SmallPtrSet<Instruction*, 4> Uniforms;
-  /// We need to check that all of the pointers in this list are disjoint
-  /// at runtime.
-  RuntimePointerCheck PtrRtCheck;
+
   /// Can we assume the absence of NaNs.
   bool HasFunNoNaNAttr;
 
-  unsigned MaxSafeDepDistBytes;
-
   ValueToValueMap Strides;
   SmallPtrSet<Value *, 8> StrideSet;
-  
+
   /// While vectorizing these instructions we have to generate a
   /// call to the appropriate masked intrinsic
   SmallPtrSet<const Instruction*, 8> MaskedOp;
@@ -907,10 +836,9 @@ public:
   LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
                              LoopVectorizationLegality *Legal,
                              const TargetTransformInfo &TTI,
-                             const DataLayout *DL, const TargetLibraryInfo *TLI,
-                             AssumptionCache *AC, const Function *F,
-                             const LoopVectorizeHints *Hints)
-      : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI),
+                             const TargetLibraryInfo *TLI, AssumptionCache *AC,
+                             const Function *F, const LoopVectorizeHints *Hints)
+      : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI),
         TheFunction(F), Hints(Hints) {
     CodeMetrics::collectEphemeralValues(L, AC, EphValues);
   }
@@ -963,23 +891,16 @@ private:
   /// width. Vector width of one means scalar.
   unsigned getInstructionCost(Instruction *I, unsigned VF);
 
-  /// A helper function for converting Scalar types to vector types.
-  /// If the incoming type is void, we return void. If the VF is 1, we return
-  /// the scalar type.
-  static Type* ToVectorTy(Type *Scalar, unsigned VF);
-
   /// Returns whether the instruction is a load or store and will be a emitted
   /// as a vector operation.
   bool isConsecutiveLoadOrStore(Instruction *I);
 
   /// Report an analysis message to assist the user in diagnosing loops that are
-  /// not vectorized.
-  void emitAnalysis(Report &Message) {
-    DebugLoc DL = TheLoop->getStartLoc();
-    if (Instruction *I = Message.getInstr())
-      DL = I->getDebugLoc();
-    emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE,
-                                   *TheFunction, DL, Message.str());
+  /// not vectorized.  These are handled as LoopAccessReport rather than
+  /// VectorizationReport because the << operator of VectorizationReport returns
+  /// LoopAccessReport.
+  void emitAnalysis(const LoopAccessReport &Message) {
+    LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
   }
 
   /// Values used only by @llvm.assume calls.
@@ -995,8 +916,6 @@ private:
   LoopVectorizationLegality *Legal;
   /// Vector target information.
   const TargetTransformInfo &TTI;
-  /// Target data layout information.
-  const DataLayout *DL;
   /// Target Library Info.
   const TargetLibraryInfo *TLI;
   const Function *TheFunction;
@@ -1032,7 +951,7 @@ class LoopVectorizeHints {
     bool validate(unsigned Val) {
       switch (Kind) {
       case HK_WIDTH:
-        return isPowerOf2_32(Val) && Val <= MaxVectorWidth;
+        return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
       case HK_UNROLL:
         return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
       case HK_FORCE:
@@ -1060,7 +979,8 @@ public:
   };
 
   LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
-      : Width("vectorize.width", VectorizationFactor, HK_WIDTH),
+      : Width("vectorize.width", VectorizerParams::VectorizationFactor,
+              HK_WIDTH),
         Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
         Force("vectorize.enable", FK_Undefined, HK_FORCE),
         TheLoop(L) {
@@ -1068,8 +988,8 @@ public:
     getHintsFromMetadata();
 
     // force-vector-interleave overrides DisableInterleaving.
-    if (VectorizationInterleave.getNumOccurrences() > 0)
-      Interleave.Value = VectorizationInterleave;
+    if (VectorizerParams::isInterleaveForced())
+      Interleave.Value = VectorizerParams::VectorizationInterleave;
 
     DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
           << "LV: Interleaving disabled by the pass manager\n");
@@ -1084,7 +1004,7 @@ public:
 
   /// Dumps all the hint information.
   std::string emitRemark() const {
-    Report R;
+    VectorizationReport R;
     if (Force.Value == LoopVectorizeHints::FK_Disabled)
       R << "vectorization is explicitly disabled";
     else {
@@ -1260,7 +1180,6 @@ struct LoopVectorize : public FunctionPass {
   }
 
   ScalarEvolution *SE;
-  const DataLayout *DL;
   LoopInfo *LI;
   TargetTransformInfo *TTI;
   DominatorTree *DT;
@@ -1268,6 +1187,7 @@ struct LoopVectorize : public FunctionPass {
   TargetLibraryInfo *TLI;
   AliasAnalysis *AA;
   AssumptionCache *AC;
+  LoopAccessAnalysis *LAA;
   bool DisableUnrolling;
   bool AlwaysVectorize;
 
@@ -1275,15 +1195,15 @@ struct LoopVectorize : public FunctionPass {
 
   bool runOnFunction(Function &F) override {
     SE = &getAnalysis<ScalarEvolution>();
-    DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-    DL = DLP ? &DLP->getDataLayout() : nullptr;
-    LI = &getAnalysis<LoopInfo>();
-    TTI = &getAnalysis<TargetTransformInfo>();
+    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     BFI = &getAnalysis<BlockFrequencyInfo>();
-    TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+    auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+    TLI = TLIP ? &TLIP->getTLI() : nullptr;
     AA = &getAnalysis<AliasAnalysis>();
     AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+    LAA = &getAnalysis<LoopAccessAnalysis>();
 
     // Compute some weights outside of the loop over the loops. Compute this
     // using a BranchProbability to re-use its scaling math.
@@ -1295,12 +1215,6 @@ struct LoopVectorize : public FunctionPass {
     if (!TTI->getNumberOfRegisters(true))
       return false;
 
-    if (!DL) {
-      DEBUG(dbgs() << "\nLV: Not vectorizing " << F.getName()
-                   << ": Missing data layout\n");
-      return false;
-    }
-
     // Build up a worklist of inner-loops to vectorize. This is necessary as
     // the act of vectorizing or partially unrolling a loop creates new loops
     // and can invalidate iterators across the loops.
@@ -1320,6 +1234,40 @@ struct LoopVectorize : public FunctionPass {
     return Changed;
   }
 
+  static void AddRuntimeUnrollDisableMetaData(Loop *L) {
+    SmallVector<Metadata *, 4> MDs;
+    // Reserve first location for self reference to the LoopID metadata node.
+    MDs.push_back(nullptr);
+    bool IsUnrollMetadata = false;
+    MDNode *LoopID = L->getLoopID();
+    if (LoopID) {
+      // First find existing loop unrolling disable metadata.
+      for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+        MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+        if (MD) {
+          const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+          IsUnrollMetadata =
+              S && S->getString().startswith("llvm.loop.unroll.disable");
+        }
+        MDs.push_back(LoopID->getOperand(i));
+      }
+    }
+
+    if (!IsUnrollMetadata) {
+      // Add runtime unroll disable metadata.
+      LLVMContext &Context = L->getHeader()->getContext();
+      SmallVector<Metadata *, 1> DisableOperands;
+      DisableOperands.push_back(
+          MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
+      MDNode *DisableNode = MDNode::get(Context, DisableOperands);
+      MDs.push_back(DisableNode);
+      MDNode *NewLoopID = MDNode::get(Context, MDs);
+      // Set operand 0 to refer to the loop id itself.
+      NewLoopID->replaceOperandWith(0, NewLoopID);
+      L->setLoopID(NewLoopID);
+    }
+  }
+
   bool processLoop(Loop *L) {
     assert(L->empty() && "Only process inner loops.");
 
@@ -1394,7 +1342,7 @@ struct LoopVectorize : public FunctionPass {
     }
 
     // Check if it is legal to vectorize the loop.
-    LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI);
+    LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA);
     if (!LVL.canVectorize()) {
       DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
       emitMissedWarning(F, L, Hints);
@@ -1402,8 +1350,7 @@ struct LoopVectorize : public FunctionPass {
     }
 
     // Use the cost model.
-    LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, AC, F,
-                                  &Hints);
+    LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints);
 
     // Check the function attributes to find out if this function should be
     // optimized for size.
@@ -1467,14 +1414,20 @@ struct LoopVectorize : public FunctionPass {
 
       // We decided not to vectorize, but we may want to unroll.
 
-      InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
+      InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, UF);
       Unroller.vectorize(&LVL);
     } else {
       // If we decided that it is *legal* to vectorize the loop then do it.
-      InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
+      InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, UF);
       LB.vectorize(&LVL);
       ++LoopsVectorized;
 
+      // Add metadata to disable runtime unrolling scalar loop when there's no
+      // runtime check about strides and memory. Because at this situation,
+      // scalar loop is rarely used not worthy to be unrolled.
+      if (!LB.IsSafetyChecksAdded())
+        AddRuntimeUnrollDisableMetaData(L);
+
       // Report the vectorization decision.
       emitOptimizationRemark(
           F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
@@ -1495,11 +1448,12 @@ struct LoopVectorize : public FunctionPass {
     AU.addRequiredID(LCSSAID);
     AU.addRequired<BlockFrequencyInfo>();
     AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<LoopInfo>();
+    AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<ScalarEvolution>();
-    AU.addRequired<TargetTransformInfo>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
     AU.addRequired<AliasAnalysis>();
-    AU.addPreserved<LoopInfo>();
+    AU.addRequired<LoopAccessAnalysis>();
+    AU.addPreserved<LoopInfoWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<AliasAnalysis>();
   }
@@ -1513,65 +1467,6 @@ struct LoopVectorize : public FunctionPass {
 // LoopVectorizationCostModel.
 //===----------------------------------------------------------------------===//
 
-static Value *stripIntegerCast(Value *V) {
-  if (CastInst *CI = dyn_cast<CastInst>(V))
-    if (CI->getOperand(0)->getType()->isIntegerTy())
-      return CI->getOperand(0);
-  return V;
-}
-
-///\brief Replaces the symbolic stride in a pointer SCEV expression by one.
-///
-/// If \p OrigPtr is not null, use it to look up the stride value instead of
-/// \p Ptr.
-static const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE,
-                                             ValueToValueMap &PtrToStride,
-                                             Value *Ptr, Value *OrigPtr = nullptr) {
-
-  const SCEV *OrigSCEV = SE->getSCEV(Ptr);
-
-  // If there is an entry in the map return the SCEV of the pointer with the
-  // symbolic stride replaced by one.
-  ValueToValueMap::iterator SI = PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
-  if (SI != PtrToStride.end()) {
-    Value *StrideVal = SI->second;
-
-    // Strip casts.
-    StrideVal = stripIntegerCast(StrideVal);
-
-    // Replace symbolic stride by one.
-    Value *One = ConstantInt::get(StrideVal->getType(), 1);
-    ValueToValueMap RewriteMap;
-    RewriteMap[StrideVal] = One;
-
-    const SCEV *ByOne =
-        SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
-    DEBUG(dbgs() << "LV: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
-                 << "\n");
-    return ByOne;
-  }
-
-  // Otherwise, just return the SCEV of the original pointer.
-  return SE->getSCEV(Ptr);
-}
-
-void LoopVectorizationLegality::RuntimePointerCheck::insert(
-    ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
-    unsigned ASId, ValueToValueMap &Strides) {
-  // Get the stride replaced scev.
-  const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
-  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
-  assert(AR && "Invalid addrec expression");
-  const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
-  const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
-  Pointers.push_back(Ptr);
-  Starts.push_back(AR->getStart());
-  Ends.push_back(ScEnd);
-  IsWritePtr.push_back(WritePtr);
-  DependencySetId.push_back(DepSetId);
-  AliasSetId.push_back(ASId);
-}
-
 Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
   // We need to place the broadcast of invariant variables outside the loop.
   Instruction *Instr = dyn_cast<Instruction>(V);
@@ -1591,11 +1486,13 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
   return Shuf;
 }
 
-Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
-                                                 bool Negate) {
+Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
+                                          Value *Step) {
   assert(Val->getType()->isVectorTy() && "Must be a vector");
   assert(Val->getType()->getScalarType()->isIntegerTy() &&
          "Elem must be an integer");
+  assert(Step->getType() == Val->getType()->getScalarType() &&
+         "Step has wrong type");
   // Create the types.
   Type *ITy = Val->getType()->getScalarType();
   VectorType *Ty = cast<VectorType>(Val->getType());
@@ -1603,24 +1500,27 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
   SmallVector<Constant*, 8> Indices;
 
   // Create a vector of consecutive numbers from zero to VF.
-  for (int i = 0; i < VLen; ++i) {
-    int64_t Idx = Negate ? (-i) : i;
-    Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate));
-  }
+  for (int i = 0; i < VLen; ++i)
+    Indices.push_back(ConstantInt::get(ITy, StartIdx + i));
 
   // Add the consecutive indices to the vector value.
   Constant *Cv = ConstantVector::get(Indices);
   assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
-  return Builder.CreateAdd(Val, Cv, "induction");
+  Step = Builder.CreateVectorSplat(VLen, Step);
+  assert(Step->getType() == Val->getType() && "Invalid step vec");
+  // FIXME: The newly created binary instructions should contain nsw/nuw flags,
+  // which can be found from the original scalar operations.
+  Step = Builder.CreateMul(Cv, Step);
+  return Builder.CreateAdd(Val, Step, "induction");
 }
 
 /// \brief Find the operand of the GEP that should be checked for consecutive
 /// stores. This ignores trailing indices that have no effect on the final
 /// pointer.
-static unsigned getGEPInductionOperand(const DataLayout *DL,
-                                       const GetElementPtrInst *Gep) {
+static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
+  const DataLayout &DL = Gep->getModule()->getDataLayout();
   unsigned LastOperand = Gep->getNumOperands() - 1;
-  unsigned GEPAllocSize = DL->getTypeAllocSize(
+  unsigned GEPAllocSize = DL.getTypeAllocSize(
       cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
 
   // Walk backwards and try to peel off zeros.
@@ -1631,7 +1531,7 @@ static unsigned getGEPInductionOperand(const DataLayout *DL,
 
     // If it's a type with the same allocation size as the result of the GEP we
     // can peel off the zero index.
-    if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize)
+    if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
       break;
     --LastOperand;
   }
@@ -1649,10 +1549,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
   PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
   if (Phi && Inductions.count(Phi)) {
     InductionInfo II = Inductions[Phi];
-    if (IK_PtrInduction == II.IK)
-      return 1;
-    else if (IK_ReversePtrInduction == II.IK)
-      return -1;
+    return II.getConsecutiveDirection();
   }
 
   GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
@@ -1677,13 +1574,10 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
         return 0;
 
     InductionInfo II = Inductions[Phi];
-    if (IK_PtrInduction == II.IK)
-      return 1;
-    else if (IK_ReversePtrInduction == II.IK)
-      return -1;
+    return II.getConsecutiveDirection();
   }
 
-  unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
+  unsigned InductionOperand = getGEPInductionOperand(Gep);
 
   // Check that all of the gep indices are uniform except for our induction
   // operand.
@@ -1730,7 +1624,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
 }
 
 bool LoopVectorizationLegality::isUniform(Value *V) {
-  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
+  return LAI->isUniform(V);
 }
 
 InnerLoopVectorizer::VectorParts&
@@ -1776,11 +1670,12 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
   unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
   // An alignment of 0 means target abi alignment. We need to use the scalar's
   // target abi alignment in such a case.
+  const DataLayout &DL = Instr->getModule()->getDataLayout();
   if (!Alignment)
-    Alignment = DL->getABITypeAlignment(ScalarDataTy);
+    Alignment = DL.getABITypeAlignment(ScalarDataTy);
   unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
-  unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
-  unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
+  unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ScalarDataTy);
+  unsigned VectorElementSize = DL.getTypeStoreSize(DataTy) / VF;
 
   if (SI && Legal->blockNeedsPredication(SI->getParent()) &&
       !Legal->isMaskRequired(SI))
@@ -1821,7 +1716,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
     // The last index does not have to be the induction. It can be
     // consecutive and be a function of the index. For example A[I+1];
     unsigned NumOperands = Gep->getNumOperands();
-    unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
+    unsigned InductionOperand = getGEPInductionOperand(Gep);
     // Create the new GEP with the new induction variable.
     GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
 
@@ -1864,7 +1759,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
     
     for (unsigned Part = 0; Part < UF; ++Part) {
       // Calculate the pointer for the specific unroll-part.
-      Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+      Value *PartPtr =
+          Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
 
       if (Reverse) {
         // If we store to reverse consecutive memory locations then we need
@@ -1872,8 +1768,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
         StoredVal[Part] = reverseVector(StoredVal[Part]);
         // If the address is consecutive but reversed, then the
         // wide store needs to start at the last vector element.
-        PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
-        PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+        PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
+        PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
         Mask[Part] = reverseVector(Mask[Part]);
       }
 
@@ -1896,13 +1792,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
   setDebugLocFromInst(Builder, LI);
   for (unsigned Part = 0; Part < UF; ++Part) {
     // Calculate the pointer for the specific unroll-part.
-    Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+    Value *PartPtr =
+        Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
 
     if (Reverse) {
       // If the address is consecutive but reversed, then the
       // wide load needs to start at the last vector element.
-      PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
-      PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+      PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
+      PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
       Mask[Part] = reverseVector(Mask[Part]);
     }
 
@@ -1992,7 +1889,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
         Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, ConstantInt::get(Cmp->getType(), 1));
         CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
         LoopVectorBody.push_back(CondBlock);
-        VectorLp->addBasicBlockToLoop(CondBlock, LI->getBase());
+        VectorLp->addBasicBlockToLoop(CondBlock, *LI);
         // Update Builder with newly created basic block.
         Builder.SetInsertPoint(InsertPt);
       }
@@ -2021,7 +1918,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
       if (IfPredicateStore) {
          BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
          LoopVectorBody.push_back(NewIfBlock);
-         VectorLp->addBasicBlockToLoop(NewIfBlock, LI->getBase());
+         VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
          Builder.SetInsertPoint(InsertPt);
          Instruction *OldBr = IfBlock->getTerminator();
          BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
@@ -2078,102 +1975,6 @@ InnerLoopVectorizer::addStrideCheck(Instruction *Loc) {
   return std::make_pair(FirstInst, TheCheck);
 }
 
-std::pair<Instruction *, Instruction *>
-InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) {
-  LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
-  Legal->getRuntimePointerCheck();
-
-  Instruction *tnullptr = nullptr;
-  if (!PtrRtCheck->Need)
-    return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
-
-  unsigned NumPointers = PtrRtCheck->Pointers.size();
-  SmallVector<TrackingVH<Value> , 2> Starts;
-  SmallVector<TrackingVH<Value> , 2> Ends;
-
-  LLVMContext &Ctx = Loc->getContext();
-  SCEVExpander Exp(*SE, "induction");
-  Instruction *FirstInst = nullptr;
-
-  for (unsigned i = 0; i < NumPointers; ++i) {
-    Value *Ptr = PtrRtCheck->Pointers[i];
-    const SCEV *Sc = SE->getSCEV(Ptr);
-
-    if (SE->isLoopInvariant(Sc, OrigLoop)) {
-      DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
-            *Ptr <<"\n");
-      Starts.push_back(Ptr);
-      Ends.push_back(Ptr);
-    } else {
-      DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
-      unsigned AS = Ptr->getType()->getPointerAddressSpace();
-
-      // Use this type for pointer arithmetic.
-      Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
-
-      Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
-      Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
-      Starts.push_back(Start);
-      Ends.push_back(End);
-    }
-  }
-
-  IRBuilder<> ChkBuilder(Loc);
-  // Our instructions might fold to a constant.
-  Value *MemoryRuntimeCheck = nullptr;
-  for (unsigned i = 0; i < NumPointers; ++i) {
-    for (unsigned j = i+1; j < NumPointers; ++j) {
-      // No need to check if two readonly pointers intersect.
-      if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
-        continue;
-
-      // Only need to check pointers between two different dependency sets.
-      if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
-       continue;
-      // Only need to check pointers in the same alias set.
-      if (PtrRtCheck->AliasSetId[i] != PtrRtCheck->AliasSetId[j])
-        continue;
-
-      unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
-      unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
-
-      assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
-             (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
-             "Trying to bounds check pointers with different address spaces");
-
-      Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
-      Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
-
-      Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
-      Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
-      Value *End0 =   ChkBuilder.CreateBitCast(Ends[i],   PtrArithTy1, "bc");
-      Value *End1 =   ChkBuilder.CreateBitCast(Ends[j],   PtrArithTy0, "bc");
-
-      Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
-      FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
-      Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
-      FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
-      Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
-      FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
-      if (MemoryRuntimeCheck) {
-        IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
-                                         "conflict.rdx");
-        FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
-      }
-      MemoryRuntimeCheck = IsConflict;
-    }
-  }
-
-  // We have to do this trickery because the IRBuilder might fold the check to a
-  // constant expression in which case there is no Instruction anchored in a
-  // the block.
-  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
-                                                 ConstantInt::getTrue(Ctx));
-  ChkBuilder.Insert(Check, "memcheck.conflict");
-  FirstInst = getFirstInst(FirstInst, Check, Loc);
-  return std::make_pair(FirstInst, Check);
-}
-
 void InnerLoopVectorizer::createEmptyLoop() {
   /*
    In this function we generate a new loop. The new loop will contain
@@ -2238,9 +2039,11 @@ void InnerLoopVectorizer::createEmptyLoop() {
   ExitCount = SE->getAddExpr(BackedgeTakeCount,
                              SE->getConstant(BackedgeTakeCount->getType(), 1));
 
+  const DataLayout &DL = OldBasicBlock->getModule()->getDataLayout();
+
   // Expand the trip count and place the new instructions in the preheader.
   // Notice that the pre-header does not change, only the loop body.
-  SCEVExpander Exp(*SE, "induction");
+  SCEVExpander Exp(*SE, DL, "induction");
 
   // We need to test whether the backedge-taken count is uint##_max. Adding one
   // to it will cause overflow and an incorrect loop trip count in the vector
@@ -2299,13 +2102,13 @@ void InnerLoopVectorizer::createEmptyLoop() {
   // before calling any utilities such as SCEV that require valid LoopInfo.
   if (ParentLoop) {
     ParentLoop->addChildLoop(Lp);
-    ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase());
-    ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase());
-    ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase());
+    ParentLoop->addBasicBlockToLoop(ScalarPH, *LI);
+    ParentLoop->addBasicBlockToLoop(VectorPH, *LI);
+    ParentLoop->addBasicBlockToLoop(MiddleBlock, *LI);
   } else {
     LI->addTopLevelLoop(Lp);
   }
-  Lp->addBasicBlockToLoop(VecBody, LI->getBase());
+  Lp->addBasicBlockToLoop(VecBody, *LI);
 
   // Use this IR builder to create the loop instructions (Phi, Br, Cmp)
   // inside the loop.
@@ -2360,7 +2163,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
     BasicBlock *CheckBlock =
       LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
     if (ParentLoop)
-      ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
+      ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
     LoopBypassBlocks.push_back(CheckBlock);
     Instruction *OldTerm = LastBypassBlock->getTerminator();
     BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow, OldTerm);
@@ -2376,11 +2179,12 @@ void InnerLoopVectorizer::createEmptyLoop() {
   std::tie(FirstCheckInst, StrideCheck) =
       addStrideCheck(LastBypassBlock->getTerminator());
   if (StrideCheck) {
+    AddedSafetyChecks = true;
     // Create a new block containing the stride check.
     BasicBlock *CheckBlock =
         LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
     if (ParentLoop)
-      ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
+      ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
     LoopBypassBlocks.push_back(CheckBlock);
 
     // Replace the branch into the memory check block with a conditional branch
@@ -2398,13 +2202,14 @@ void InnerLoopVectorizer::createEmptyLoop() {
   // faster.
   Instruction *MemRuntimeCheck;
   std::tie(FirstCheckInst, MemRuntimeCheck) =
-      addRuntimeCheck(LastBypassBlock->getTerminator());
+    Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator());
   if (MemRuntimeCheck) {
+    AddedSafetyChecks = true;
     // Create a new block containing the memory check.
     BasicBlock *CheckBlock =
-        LastBypassBlock->splitBasicBlock(MemRuntimeCheck, "vector.memcheck");
+        LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck");
     if (ParentLoop)
-      ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
+      ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
     LoopBypassBlocks.push_back(CheckBlock);
 
     // Replace the branch into the memory check block with a conditional branch
@@ -2495,33 +2300,13 @@ void InnerLoopVectorizer::createEmptyLoop() {
       Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
                                                    II.StartValue->getType(),
                                                    "cast.crd");
-      EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end");
-      break;
-    }
-    case LoopVectorizationLegality::IK_ReverseIntInduction: {
-      // Convert the CountRoundDown variable to the PHI size.
-      Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
-                                                   II.StartValue->getType(),
-                                                   "cast.crd");
-      // Handle reverse integer induction counter.
-      EndValue = BypassBuilder.CreateSub(II.StartValue, CRD, "rev.ind.end");
+      EndValue = II.transform(BypassBuilder, CRD);
+      EndValue->setName("ind.end");
       break;
     }
     case LoopVectorizationLegality::IK_PtrInduction: {
-      // For pointer induction variables, calculate the offset using
-      // the end index.
-      EndValue = BypassBuilder.CreateGEP(II.StartValue, CountRoundDown,
-                                         "ptr.ind.end");
-      break;
-    }
-    case LoopVectorizationLegality::IK_ReversePtrInduction: {
-      // The value at the end of the loop for the reverse pointer is calculated
-      // by creating a GEP with a negative index starting from the start value.
-      Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0);
-      Value *NegIdx = BypassBuilder.CreateSub(Zero, CountRoundDown,
-                                              "rev.ind.end");
-      EndValue = BypassBuilder.CreateGEP(II.StartValue, NegIdx,
-                                         "rev.ptr.ind.end");
+      EndValue = II.transform(BypassBuilder, CountRoundDown);
+      EndValue->setName("ptr.ind.end");
       break;
     }
     }// end of case
@@ -2604,99 +2389,6 @@ void InnerLoopVectorizer::createEmptyLoop() {
   Hints.setAlreadyVectorized();
 }
 
-/// This function returns the identity element (or neutral element) for
-/// the operation K.
-Constant*
-LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
-  switch (K) {
-  case RK_IntegerXor:
-  case RK_IntegerAdd:
-  case RK_IntegerOr:
-    // Adding, Xoring, Oring zero to a number does not change it.
-    return ConstantInt::get(Tp, 0);
-  case RK_IntegerMult:
-    // Multiplying a number by 1 does not change it.
-    return ConstantInt::get(Tp, 1);
-  case RK_IntegerAnd:
-    // AND-ing a number with an all-1 value does not change it.
-    return ConstantInt::get(Tp, -1, true);
-  case  RK_FloatMult:
-    // Multiplying a number by 1 does not change it.
-    return ConstantFP::get(Tp, 1.0L);
-  case  RK_FloatAdd:
-    // Adding zero to a number does not change it.
-    return ConstantFP::get(Tp, 0.0L);
-  default:
-    llvm_unreachable("Unknown reduction kind");
-  }
-}
-
-/// This function translates the reduction kind to an LLVM binary operator.
-static unsigned
-getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
-  switch (Kind) {
-    case LoopVectorizationLegality::RK_IntegerAdd:
-      return Instruction::Add;
-    case LoopVectorizationLegality::RK_IntegerMult:
-      return Instruction::Mul;
-    case LoopVectorizationLegality::RK_IntegerOr:
-      return Instruction::Or;
-    case LoopVectorizationLegality::RK_IntegerAnd:
-      return Instruction::And;
-    case LoopVectorizationLegality::RK_IntegerXor:
-      return Instruction::Xor;
-    case LoopVectorizationLegality::RK_FloatMult:
-      return Instruction::FMul;
-    case LoopVectorizationLegality::RK_FloatAdd:
-      return Instruction::FAdd;
-    case LoopVectorizationLegality::RK_IntegerMinMax:
-      return Instruction::ICmp;
-    case LoopVectorizationLegality::RK_FloatMinMax:
-      return Instruction::FCmp;
-    default:
-      llvm_unreachable("Unknown reduction operation");
-  }
-}
-
-Value *createMinMaxOp(IRBuilder<> &Builder,
-                      LoopVectorizationLegality::MinMaxReductionKind RK,
-                      Value *Left,
-                      Value *Right) {
-  CmpInst::Predicate P = CmpInst::ICMP_NE;
-  switch (RK) {
-  default:
-    llvm_unreachable("Unknown min/max reduction kind");
-  case LoopVectorizationLegality::MRK_UIntMin:
-    P = CmpInst::ICMP_ULT;
-    break;
-  case LoopVectorizationLegality::MRK_UIntMax:
-    P = CmpInst::ICMP_UGT;
-    break;
-  case LoopVectorizationLegality::MRK_SIntMin:
-    P = CmpInst::ICMP_SLT;
-    break;
-  case LoopVectorizationLegality::MRK_SIntMax:
-    P = CmpInst::ICMP_SGT;
-    break;
-  case LoopVectorizationLegality::MRK_FloatMin:
-    P = CmpInst::FCMP_OLT;
-    break;
-  case LoopVectorizationLegality::MRK_FloatMax:
-    P = CmpInst::FCMP_OGT;
-    break;
-  }
-
-  Value *Cmp;
-  if (RK == LoopVectorizationLegality::MRK_FloatMin ||
-      RK == LoopVectorizationLegality::MRK_FloatMax)
-    Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
-  else
-    Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
-
-  Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
-  return Select;
-}
-
 namespace {
 struct CSEDenseMapInfo {
   static bool canHandle(Instruction *I) {
@@ -2772,6 +2464,95 @@ static Value *addFastMathFlag(Value *V) {
   return V;
 }
 
+/// Estimate the overhead of scalarizing a value. Insert and Extract are set if
+/// the result needs to be inserted and/or extracted from vectors.
+static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract,
+                                         const TargetTransformInfo &TTI) {
+  if (Ty->isVoidTy())
+    return 0;
+
+  assert(Ty->isVectorTy() && "Can only scalarize vectors");
+  unsigned Cost = 0;
+
+  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+    if (Insert)
+      Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, i);
+    if (Extract)
+      Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+  }
+
+  return Cost;
+}
+
+// Estimate cost of a call instruction CI if it were vectorized with factor VF.
+// Return the cost of the instruction, including scalarization overhead if it's
+// needed. The flag NeedToScalarize shows if the call needs to be scalarized -
+// i.e. either vector version isn't available, or is too expensive.
+static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
+                                  const TargetTransformInfo &TTI,
+                                  const TargetLibraryInfo *TLI,
+                                  bool &NeedToScalarize) {
+  Function *F = CI->getCalledFunction();
+  StringRef FnName = CI->getCalledFunction()->getName();
+  Type *ScalarRetTy = CI->getType();
+  SmallVector<Type *, 4> Tys, ScalarTys;
+  for (auto &ArgOp : CI->arg_operands())
+    ScalarTys.push_back(ArgOp->getType());
+
+  // Estimate cost of scalarized vector call. The source operands are assumed
+  // to be vectors, so we need to extract individual elements from there,
+  // execute VF scalar calls, and then gather the result into the vector return
+  // value.
+  unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys);
+  if (VF == 1)
+    return ScalarCallCost;
+
+  // Compute corresponding vector type for return value and arguments.
+  Type *RetTy = ToVectorTy(ScalarRetTy, VF);
+  for (unsigned i = 0, ie = ScalarTys.size(); i != ie; ++i)
+    Tys.push_back(ToVectorTy(ScalarTys[i], VF));
+
+  // Compute costs of unpacking argument values for the scalar calls and
+  // packing the return values to a vector.
+  unsigned ScalarizationCost =
+      getScalarizationOverhead(RetTy, true, false, TTI);
+  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i)
+    ScalarizationCost += getScalarizationOverhead(Tys[i], false, true, TTI);
+
+  unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
+
+  // If we can't emit a vector call for this function, then the currently found
+  // cost is the cost we need to return.
+  NeedToScalarize = true;
+  if (!TLI || !TLI->isFunctionVectorizable(FnName, VF) || CI->isNoBuiltin())
+    return Cost;
+
+  // If the corresponding vector cost is cheaper, return its cost.
+  unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys);
+  if (VectorCallCost < Cost) {
+    NeedToScalarize = false;
+    return VectorCallCost;
+  }
+  return Cost;
+}
+
+// Estimate cost of an intrinsic call instruction CI if it were vectorized with
+// factor VF.  Return the cost of the instruction, including scalarization
+// overhead if it's needed.
+static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
+                                       const TargetTransformInfo &TTI,
+                                       const TargetLibraryInfo *TLI) {
+  Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+  assert(ID && "Expected intrinsic call!");
+
+  Type *RetTy = ToVectorTy(CI->getType(), VF);
+  SmallVector<Type *, 4> Tys;
+  for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+    Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
+
+  return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
+}
+
 void InnerLoopVectorizer::vectorizeLoop() {
   //===------------------------------------------------===//
   //
@@ -2819,10 +2600,14 @@ void InnerLoopVectorizer::vectorizeLoop() {
     // Find the reduction variable descriptor.
     assert(Legal->getReductionVars()->count(RdxPhi) &&
            "Unable to find the reduction variable");
-    LoopVectorizationLegality::ReductionDescriptor RdxDesc =
-    (*Legal->getReductionVars())[RdxPhi];
+    ReductionDescriptor RdxDesc = (*Legal->getReductionVars())[RdxPhi];
 
-    setDebugLocFromInst(Builder, RdxDesc.StartValue);
+    ReductionDescriptor::ReductionKind RK = RdxDesc.getReductionKind();
+    TrackingVH<Value> ReductionStartValue = RdxDesc.getReductionStartValue();
+    Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
+    ReductionInstDesc::MinMaxReductionKind MinMaxKind =
+        RdxDesc.getMinMaxReductionKind();
+    setDebugLocFromInst(Builder, ReductionStartValue);
 
     // We need to generate a reduction vector from the incoming scalar.
     // To do so, we need to generate the 'identity' vector and override
@@ -2831,40 +2616,38 @@ void InnerLoopVectorizer::vectorizeLoop() {
     Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
 
     // This is the vector-clone of the value that leaves the loop.
-    VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
+    VectorParts &VectorExit = getVectorValue(LoopExitInst);
     Type *VecTy = VectorExit[0]->getType();
 
     // Find the reduction identity variable. Zero for addition, or, xor,
     // one for multiplication, -1 for And.
     Value *Identity;
     Value *VectorStart;
-    if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
-        RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
+    if (RK == ReductionDescriptor::RK_IntegerMinMax ||
+        RK == ReductionDescriptor::RK_FloatMinMax) {
       // MinMax reduction have the start value as their identify.
       if (VF == 1) {
-        VectorStart = Identity = RdxDesc.StartValue;
+        VectorStart = Identity = ReductionStartValue;
       } else {
-        VectorStart = Identity = Builder.CreateVectorSplat(VF,
-                                                           RdxDesc.StartValue,
-                                                           "minmax.ident");
+        VectorStart = Identity =
+            Builder.CreateVectorSplat(VF, ReductionStartValue, "minmax.ident");
       }
     } else {
       // Handle other reduction kinds:
       Constant *Iden =
-      LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
-                                                      VecTy->getScalarType());
+          ReductionDescriptor::getReductionIdentity(RK, VecTy->getScalarType());
       if (VF == 1) {
         Identity = Iden;
         // This vector is the Identity vector where the first element is the
         // incoming scalar reduction.
-        VectorStart = RdxDesc.StartValue;
+        VectorStart = ReductionStartValue;
       } else {
         Identity = ConstantVector::getSplat(VF, Iden);
 
         // This vector is the Identity vector where the first element is the
         // incoming scalar reduction.
-        VectorStart = Builder.CreateInsertElement(Identity,
-                                                  RdxDesc.StartValue, Zero);
+        VectorStart =
+            Builder.CreateInsertElement(Identity, ReductionStartValue, Zero);
       }
     }
 
@@ -2893,11 +2676,11 @@ void InnerLoopVectorizer::vectorizeLoop() {
     Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
 
     VectorParts RdxParts;
-    setDebugLocFromInst(Builder, RdxDesc.LoopExitInstr);
+    setDebugLocFromInst(Builder, LoopExitInst);
     for (unsigned part = 0; part < UF; ++part) {
       // This PHINode contains the vectorized reduction variable, or
       // the initial value vector, if we bypass the vector loop.
-      VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
+      VectorParts &RdxExitVal = getVectorValue(LoopExitInst);
       PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
       Value *StartVal = (part == 0) ? VectorStart : Identity;
       for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
@@ -2909,7 +2692,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
 
     // Reduce all of the unrolled parts into a single vector.
     Value *ReducedPartRdx = RdxParts[0];
-    unsigned Op = getReductionBinOp(RdxDesc.Kind);
+    unsigned Op = ReductionDescriptor::getReductionBinOp(RK);
     setDebugLocFromInst(Builder, ReducedPartRdx);
     for (unsigned part = 1; part < UF; ++part) {
       if (Op != Instruction::ICmp && Op != Instruction::FCmp)
@@ -2918,8 +2701,8 @@ void InnerLoopVectorizer::vectorizeLoop() {
             Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxParts[part],
                                 ReducedPartRdx, "bin.rdx"));
       else
-        ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
-                                        ReducedPartRdx, RdxParts[part]);
+        ReducedPartRdx = ReductionDescriptor::createMinMaxOp(
+            Builder, MinMaxKind, ReducedPartRdx, RdxParts[part]);
     }
 
     if (VF > 1) {
@@ -2950,7 +2733,8 @@ void InnerLoopVectorizer::vectorizeLoop() {
           TmpVec = addFastMathFlag(Builder.CreateBinOp(
               (Instruction::BinaryOps)Op, TmpVec, Shuf, "bin.rdx"));
         else
-          TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
+          TmpVec = ReductionDescriptor::createMinMaxOp(Builder, MinMaxKind,
+                                                       TmpVec, Shuf);
       }
 
       // The result is in the first element of the vector.
@@ -2962,7 +2746,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
     // block and the middle block.
     PHINode *BCBlockPhi = PHINode::Create(RdxPhi->getType(), 2, "bc.merge.rdx",
                                           LoopScalarPreHeader->getTerminator());
-    BCBlockPhi->addIncoming(RdxDesc.StartValue, LoopBypassBlocks[0]);
+    BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[0]);
     BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
 
     // Now, we need to fix the users of the reduction variable
@@ -2980,7 +2764,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
 
       // We found our reduction value exit-PHI. Update it with the
       // incoming bypass edge.
-      if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
+      if (LCSSAPhi->getIncomingValue(0) == LoopExitInst) {
         // Add an edge coming from the bypass.
         LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
         break;
@@ -2995,7 +2779,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
     // Pick the other block.
     int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
     (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
-    (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
+    (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
   }// end of for each redux variable.
 
   fixLCSSAPHIs();
@@ -3136,6 +2920,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
   LoopVectorizationLegality::InductionInfo II =
   Legal->getInductionVars()->lookup(P);
 
+  // FIXME: The newly created binary instructions should contain nsw/nuw flags,
+  // which can be found from the original scalar operations.
   switch (II.IK) {
     case LoopVectorizationLegality::IK_NoInduction:
       llvm_unreachable("Unknown induction");
@@ -3153,80 +2939,42 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
         Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
                                                  "normalized.idx");
         NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
-        Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
-                                        "offset.idx");
+        Broadcasted = II.transform(Builder, NormalizedIdx);
+        Broadcasted->setName("offset.idx");
       }
       Broadcasted = getBroadcastInstrs(Broadcasted);
       // After broadcasting the induction variable we need to make the vector
       // consecutive by adding 0, 1, 2, etc.
       for (unsigned part = 0; part < UF; ++part)
-        Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
+        Entry[part] = getStepVector(Broadcasted, VF * part, II.StepValue);
       return;
     }
-    case LoopVectorizationLegality::IK_ReverseIntInduction:
     case LoopVectorizationLegality::IK_PtrInduction:
-    case LoopVectorizationLegality::IK_ReversePtrInduction:
-      // Handle reverse integer and pointer inductions.
-      Value *StartIdx = ExtendedIdx;
-      // This is the normalized GEP that starts counting at zero.
-      Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
-                                               "normalized.idx");
-
-      // Handle the reverse integer induction variable case.
-      if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
-        IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
-        Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
-                                               "resize.norm.idx");
-        Value *ReverseInd  = Builder.CreateSub(II.StartValue, CNI,
-                                               "reverse.idx");
-
-        // This is a new value so do not hoist it out.
-        Value *Broadcasted = getBroadcastInstrs(ReverseInd);
-        // After broadcasting the induction variable we need to make the
-        // vector consecutive by adding  ... -3, -2, -1, 0.
-        for (unsigned part = 0; part < UF; ++part)
-          Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
-                                             true);
-        return;
-      }
-
       // Handle the pointer induction variable case.
       assert(P->getType()->isPointerTy() && "Unexpected type.");
-
-      // Is this a reverse induction ptr or a consecutive induction ptr.
-      bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
-                      II.IK);
-
+      // This is the normalized GEP that starts counting at zero.
+      Value *NormalizedIdx =
+          Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx");
       // This is the vector of results. Notice that we don't generate
       // vector geps because scalar geps result in better code.
       for (unsigned part = 0; part < UF; ++part) {
         if (VF == 1) {
-          int EltIndex = (part) * (Reverse ? -1 : 1);
+          int EltIndex = part;
           Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
-          Value *GlobalIdx;
-          if (Reverse)
-            GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
-          else
-            GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
-
-          Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
-                                             "next.gep");
+          Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
+          Value *SclrGep = II.transform(Builder, GlobalIdx);
+          SclrGep->setName("next.gep");
           Entry[part] = SclrGep;
           continue;
         }
 
         Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
         for (unsigned int i = 0; i < VF; ++i) {
-          int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
+          int EltIndex = i + part * VF;
           Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
-          Value *GlobalIdx;
-          if (!Reverse)
-            GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
-          else
-            GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
-
-          Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
-                                             "next.gep");
+          Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
+          Value *SclrGep = II.transform(Builder, GlobalIdx);
+          SclrGep->setName("next.gep");
           VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
                                                Builder.getInt32(i),
                                                "insert.gep");
@@ -3246,7 +2994,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       // Nothing to do for PHIs and BR, since we already took care of the
       // loop control flow instructions.
       continue;
-    case Instruction::PHI:{
+    case Instruction::PHI: {
       // Vectorize PHINodes.
       widenPHIInstruction(it, Entry, UF, VF, PV);
       continue;
@@ -3367,8 +3115,12 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
         Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
                                                CI->getType());
         Value *Broadcasted = getBroadcastInstrs(ScalarCast);
+        LoopVectorizationLegality::InductionInfo II =
+            Legal->getInductionVars()->lookup(OldInduction);
+        Constant *Step =
+            ConstantInt::getSigned(CI->getType(), II.StepValue->getSExtValue());
         for (unsigned Part = 0; Part < UF; ++Part)
-          Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, false);
+          Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
         propagateMetadata(Entry, it);
         break;
       }
@@ -3391,37 +3143,71 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
 
       Module *M = BB->getParent()->getParent();
       CallInst *CI = cast<CallInst>(it);
+
+      StringRef FnName = CI->getCalledFunction()->getName();
+      Function *F = CI->getCalledFunction();
+      Type *RetTy = ToVectorTy(CI->getType(), VF);
+      SmallVector<Type *, 4> Tys;
+      for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+        Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
+
       Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
-      assert(ID && "Not an intrinsic call!");
-      switch (ID) {
-      case Intrinsic::assume:
-      case Intrinsic::lifetime_end:
-      case Intrinsic::lifetime_start:
+      if (ID &&
+          (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
+           ID == Intrinsic::lifetime_start)) {
         scalarizeInstruction(it);
         break;
-      default:
-        bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1);
-        for (unsigned Part = 0; Part < UF; ++Part) {
-          SmallVector<Value *, 4> Args;
-          for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
-            if (HasScalarOpd && i == 1) {
-              Args.push_back(CI->getArgOperand(i));
-              continue;
-            }
-            VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
-            Args.push_back(Arg[Part]);
-          }
-          Type *Tys[] = {CI->getType()};
-          if (VF > 1)
-            Tys[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+      }
+      // The flag shows whether we use Intrinsic or a usual Call for vectorized
+      // version of the instruction.
+      // Is it beneficial to perform intrinsic call compared to lib call?
+      bool NeedToScalarize;
+      unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+      bool UseVectorIntrinsic =
+          ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+      if (!UseVectorIntrinsic && NeedToScalarize) {
+        scalarizeInstruction(it);
+        break;
+      }
 
-          Function *F = Intrinsic::getDeclaration(M, ID, Tys);
-          Entry[Part] = Builder.CreateCall(F, Args);
+      for (unsigned Part = 0; Part < UF; ++Part) {
+        SmallVector<Value *, 4> Args;
+        for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+          Value *Arg = CI->getArgOperand(i);
+          // Some intrinsics have a scalar argument - don't replace it with a
+          // vector.
+          if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) {
+            VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i));
+            Arg = VectorArg[Part];
+          }
+          Args.push_back(Arg);
         }
 
-        propagateMetadata(Entry, it);
-        break;
+        Function *VectorF;
+        if (UseVectorIntrinsic) {
+          // Use vector version of the intrinsic.
+          Type *TysForDecl[] = {CI->getType()};
+          if (VF > 1)
+            TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+          VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
+        } else {
+          // Use vector version of the library call.
+          StringRef VFnName = TLI->getVectorizedFunction(FnName, VF);
+          assert(!VFnName.empty() && "Vector function name is empty.");
+          VectorF = M->getFunction(VFnName);
+          if (!VectorF) {
+            // Generate a declaration
+            FunctionType *FTy = FunctionType::get(RetTy, Tys, false);
+            VectorF =
+                Function::Create(FTy, Function::ExternalLinkage, VFnName, M);
+            VectorF->copyAttributesFrom(F);
+          }
+        }
+        assert(VectorF && "Can't create vector function.");
+        Entry[Part] = Builder.CreateCall(VectorF, Args);
       }
+
+      propagateMetadata(Entry, it);
       break;
     }
 
@@ -3484,7 +3270,7 @@ static bool canIfConvertPHINodes(BasicBlock *BB) {
 
 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
   if (!EnableIfConversion) {
-    emitAnalysis(Report() << "if-conversion is disabled");
+    emitAnalysis(VectorizationReport() << "if-conversion is disabled");
     return false;
   }
 
@@ -3517,7 +3303,7 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
 
     // We don't support switch statements inside loops.
     if (!isa<BranchInst>(BB->getTerminator())) {
-      emitAnalysis(Report(BB->getTerminator())
+      emitAnalysis(VectorizationReport(BB->getTerminator())
                    << "loop contains a switch statement");
       return false;
     }
@@ -3525,12 +3311,12 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
     // We must be able to predicate all blocks that need to be predicated.
     if (blockNeedsPredication(BB)) {
       if (!blockCanBePredicated(BB, SafePointes)) {
-        emitAnalysis(Report(BB->getTerminator())
+        emitAnalysis(VectorizationReport(BB->getTerminator())
                      << "control flow cannot be substituted for a select");
         return false;
       }
     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
-      emitAnalysis(Report(BB->getTerminator())
+      emitAnalysis(VectorizationReport(BB->getTerminator())
                    << "control flow cannot be substituted for a select");
       return false;
     }
@@ -3545,27 +3331,30 @@ bool LoopVectorizationLegality::canVectorize() {
   // be canonicalized.
   if (!TheLoop->getLoopPreheader()) {
     emitAnalysis(
-        Report() << "loop control flow is not understood by vectorizer");
+        VectorizationReport() <<
+        "loop control flow is not understood by vectorizer");
     return false;
   }
 
   // We can only vectorize innermost loops.
-  if (TheLoop->getSubLoopsVector().size()) {
-    emitAnalysis(Report() << "loop is not the innermost loop");
+  if (!TheLoop->getSubLoopsVector().empty()) {
+    emitAnalysis(VectorizationReport() << "loop is not the innermost loop");
     return false;
   }
 
   // We must have a single backedge.
   if (TheLoop->getNumBackEdges() != 1) {
     emitAnalysis(
-        Report() << "loop control flow is not understood by vectorizer");
+        VectorizationReport() <<
+        "loop control flow is not understood by vectorizer");
     return false;
   }
 
   // We must have a single exiting block.
   if (!TheLoop->getExitingBlock()) {
     emitAnalysis(
-        Report() << "loop control flow is not understood by vectorizer");
+        VectorizationReport() <<
+        "loop control flow is not understood by vectorizer");
     return false;
   }
 
@@ -3574,7 +3363,8 @@ bool LoopVectorizationLegality::canVectorize() {
   // instructions in the loop are executed the same number of times.
   if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
     emitAnalysis(
-        Report() << "loop control flow is not understood by vectorizer");
+        VectorizationReport() <<
+        "loop control flow is not understood by vectorizer");
     return false;
   }
 
@@ -3592,7 +3382,8 @@ bool LoopVectorizationLegality::canVectorize() {
   // ScalarEvolution needs to be able to find the exit count.
   const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
   if (ExitCount == SE->getCouldNotCompute()) {
-    emitAnalysis(Report() << "could not determine number of loop iterations");
+    emitAnalysis(VectorizationReport() <<
+                 "could not determine number of loop iterations");
     DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
     return false;
   }
@@ -3613,7 +3404,8 @@ bool LoopVectorizationLegality::canVectorize() {
   collectLoopUniforms();
 
   DEBUG(dbgs() << "LV: We can vectorize this loop" <<
-        (PtrRtCheck.Need ? " (with a runtime bound check)" : "")
+        (LAI->getRuntimePointerCheck()->Need ? " (with a runtime bound check)" :
+         "")
         <<"!\n");
 
   // Okay! We can vectorize. At this point we don't have any other mem analysis
@@ -3667,10 +3459,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
   // Look for the attribute signaling the absence of NaNs.
   Function &F = *Header->getParent();
+  const DataLayout &DL = F.getParent()->getDataLayout();
   if (F.hasFnAttribute("no-nans-fp-math"))
-    HasFunNoNaNAttr = F.getAttributes().getAttribute(
-      AttributeSet::FunctionIndex,
-      "no-nans-fp-math").getValueAsString() == "true";
+    HasFunNoNaNAttr =
+        F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
 
   // For each block in the loop.
   for (Loop::block_iterator bb = TheLoop->block_begin(),
@@ -3686,7 +3478,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         if (!PhiTy->isIntegerTy() &&
             !PhiTy->isFloatingPointTy() &&
             !PhiTy->isPointerTy()) {
-          emitAnalysis(Report(it)
+          emitAnalysis(VectorizationReport(it)
                        << "loop control flow is not understood by vectorizer");
           DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
           return false;
@@ -3700,14 +3492,15 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           // identified reduction value with an outside user.
           if (!hasOutsideLoopUser(TheLoop, it, AllowedExit))
             continue;
-          emitAnalysis(Report(it) << "value could not be identified as "
-                                     "an induction or reduction variable");
+          emitAnalysis(VectorizationReport(it) <<
+                       "value could not be identified as "
+                       "an induction or reduction variable");
           return false;
         }
 
         // We only allow if-converted PHIs with exactly two incoming values.
         if (Phi->getNumIncomingValues() != 2) {
-          emitAnalysis(Report(it)
+          emitAnalysis(VectorizationReport(it)
                        << "control flow not understood by vectorizer");
           DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
           return false;
@@ -3715,18 +3508,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
         // This is the value coming from the preheader.
         Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
+        ConstantInt *StepValue = nullptr;
         // Check if this is an induction variable.
-        InductionKind IK = isInductionVariable(Phi);
+        InductionKind IK = isInductionVariable(Phi, StepValue);
 
         if (IK_NoInduction != IK) {
           // Get the widest type.
           if (!WidestIndTy)
-            WidestIndTy = convertPointerToIntegerType(*DL, PhiTy);
+            WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
           else
-            WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy);
+            WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
 
           // Int inductions are special because we only allow one IV.
-          if (IK == IK_IntInduction) {
+          if (IK == IK_IntInduction && StepValue->isOne()) {
             // Use the phi node with the widest type as induction. Use the last
             // one if there are multiple (no good reason for doing this other
             // than it is expedient).
@@ -3735,69 +3529,44 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           }
 
           DEBUG(dbgs() << "LV: Found an induction variable.\n");
-          Inductions[Phi] = InductionInfo(StartValue, IK);
+          Inductions[Phi] = InductionInfo(StartValue, IK, StepValue);
 
           // Until we explicitly handle the case of an induction variable with
           // an outside loop user we have to give up vectorizing this loop.
           if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
-            emitAnalysis(Report(it) << "use of induction value outside of the "
-                                       "loop is not handled by vectorizer");
+            emitAnalysis(VectorizationReport(it) <<
+                         "use of induction value outside of the "
+                         "loop is not handled by vectorizer");
             return false;
           }
 
           continue;
         }
 
-        if (AddReductionVar(Phi, RK_IntegerAdd)) {
-          DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
-          continue;
-        }
-        if (AddReductionVar(Phi, RK_IntegerMult)) {
-          DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
-          continue;
-        }
-        if (AddReductionVar(Phi, RK_IntegerOr)) {
-          DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
-          continue;
-        }
-        if (AddReductionVar(Phi, RK_IntegerAnd)) {
-          DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
-          continue;
-        }
-        if (AddReductionVar(Phi, RK_IntegerXor)) {
-          DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
-          continue;
-        }
-        if (AddReductionVar(Phi, RK_IntegerMinMax)) {
-          DEBUG(dbgs() << "LV: Found a MINMAX reduction PHI."<< *Phi <<"\n");
-          continue;
-        }
-        if (AddReductionVar(Phi, RK_FloatMult)) {
-          DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n");
-          continue;
-        }
-        if (AddReductionVar(Phi, RK_FloatAdd)) {
-          DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
-          continue;
-        }
-        if (AddReductionVar(Phi, RK_FloatMinMax)) {
-          DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<
-                "\n");
+        if (ReductionDescriptor::isReductionPHI(Phi, TheLoop,
+                                                Reductions[Phi])) {
+          AllowedExit.insert(Reductions[Phi].getLoopExitInstr());
           continue;
         }
 
-        emitAnalysis(Report(it) << "value that could not be identified as "
-                                   "reduction is used outside the loop");
+        emitAnalysis(VectorizationReport(it) <<
+                     "value that could not be identified as "
+                     "reduction is used outside the loop");
         DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
         return false;
       }// end of PHI handling
 
-      // We still don't handle functions. However, we can ignore dbg intrinsic
-      // calls and we do handle certain intrinsic and libm functions.
+      // We handle calls that:
+      //   * Are debug info intrinsics.
+      //   * Have a mapping to an IR intrinsic.
+      //   * Have a vector version available.
       CallInst *CI = dyn_cast<CallInst>(it);
-      if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
-        emitAnalysis(Report(it) << "call instruction cannot be vectorized");
-        DEBUG(dbgs() << "LV: Found a call site.\n");
+      if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI) &&
+          !(CI->getCalledFunction() && TLI &&
+            TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
+        emitAnalysis(VectorizationReport(it) <<
+                     "call instruction cannot be vectorized");
+        DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
         return false;
       }
 
@@ -3806,7 +3575,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       if (CI &&
           hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
         if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
-          emitAnalysis(Report(it)
+          emitAnalysis(VectorizationReport(it)
                        << "intrinsic instruction cannot be vectorized");
           DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
           return false;
@@ -3817,7 +3586,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       // Also, we can't vectorize extractelement instructions.
       if ((!VectorType::isValidElementType(it->getType()) &&
            !it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
-        emitAnalysis(Report(it)
+        emitAnalysis(VectorizationReport(it)
                      << "instruction return type cannot be vectorized");
         DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
         return false;
@@ -3827,7 +3596,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       if (StoreInst *ST = dyn_cast<StoreInst>(it)) {
         Type *T = ST->getValueOperand()->getType();
         if (!VectorType::isValidElementType(T)) {
-          emitAnalysis(Report(ST) << "store instruction cannot be vectorized");
+          emitAnalysis(VectorizationReport(ST) <<
+                       "store instruction cannot be vectorized");
           return false;
         }
         if (EnableMemAccessVersioning)
@@ -3841,7 +3611,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       // Reduction instructions are allowed to have exit users.
       // All other instructions must not have external users.
       if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
-        emitAnalysis(Report(it) << "value cannot be used outside the loop");
+        emitAnalysis(VectorizationReport(it) <<
+                     "value cannot be used outside the loop");
         return false;
       }
 
@@ -3852,7 +3623,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
   if (!Induction) {
     DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
     if (Inductions.empty()) {
-      emitAnalysis(Report()
+      emitAnalysis(VectorizationReport()
                    << "loop induction variable could not be identified");
       return false;
     }
@@ -3863,13 +3634,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
 ///\brief Remove GEPs whose indices but the last one are loop invariant and
 /// return the induction operand of the gep pointer.
-static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE,
-                                 const DataLayout *DL, Loop *Lp) {
+static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
   GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
   if (!GEP)
     return Ptr;
 
-  unsigned InductionOperand = getGEPInductionOperand(DL, GEP);
+  unsigned InductionOperand = getGEPInductionOperand(GEP);
 
   // Check that all of the gep indices are uniform except for our induction
   // operand.
@@ -3898,8 +3668,7 @@ static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
 ///\brief Get the stride of a pointer access in a loop.
 /// Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a
 /// pointer to the Value, or null otherwise.
-static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
-                                   const DataLayout *DL, Loop *Lp) {
+static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
   const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
   if (!PtrTy || PtrTy->isAggregateType())
     return nullptr;
@@ -3912,7 +3681,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
   // The size of the pointer access.
   int64_t PtrAccessSize = 1;
 
-  Ptr = stripGetElementPtr(Ptr, SE, DL, Lp);
+  Ptr = stripGetElementPtr(Ptr, SE, Lp);
   const SCEV *V = SE->getSCEV(Ptr);
 
   if (Ptr != OrigPtr)
@@ -3931,7 +3700,8 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
   // Strip off the size of access multiplication if we are still analyzing the
   // pointer.
   if (OrigPtr == Ptr) {
-    DL->getTypeAllocSize(PtrTy->getElementType());
+    const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout();
+    DL.getTypeAllocSize(PtrTy->getElementType());
     if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
       if (M->getOperand(0)->getSCEVType() != scConstant)
         return nullptr;
@@ -3983,7 +3753,7 @@ void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) {
   else
     return;
 
-  Value *Stride = getStrideFromPointer(Ptr, SE, DL, TheLoop);
+  Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop);
   if (!Stride)
     return;
 
@@ -4012,7 +3782,7 @@ void LoopVectorizationLegality::collectLoopUniforms() {
       if (I->getType()->isPointerTy() && isConsecutivePtr(I))
         Worklist.insert(Worklist.end(), I->op_begin(), I->op_end());
 
-  while (Worklist.size()) {
+  while (!Worklist.empty()) {
     Instruction *I = dyn_cast<Instruction>(Worklist.back());
     Worklist.pop_back();
 
@@ -4030,1305 +3800,46 @@ void LoopVectorizationLegality::collectLoopUniforms() {
   }
 }
 
-namespace {
-/// \brief Analyses memory accesses in a loop.
-///
-/// Checks whether run time pointer checks are needed and builds sets for data
-/// dependence checking.
-class AccessAnalysis {
-public:
-  /// \brief Read or write access location.
-  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
-  typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
-
-  /// \brief Set of potential dependent memory accesses.
-  typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
-
-  AccessAnalysis(const DataLayout *Dl, AliasAnalysis *AA, DepCandidates &DA) :
-    DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {}
-
-  /// \brief Register a load  and whether it is only read from.
-  void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) {
-    Value *Ptr = const_cast<Value*>(Loc.Ptr);
-    AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
-    Accesses.insert(MemAccessInfo(Ptr, false));
-    if (IsReadOnly)
-      ReadOnlyPtr.insert(Ptr);
-  }
-
-  /// \brief Register a store.
-  void addStore(AliasAnalysis::Location &Loc) {
-    Value *Ptr = const_cast<Value*>(Loc.Ptr);
-    AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
-    Accesses.insert(MemAccessInfo(Ptr, true));
-  }
-
-  /// \brief Check whether we can check the pointers at runtime for
-  /// non-intersection.
-  bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
-                       unsigned &NumComparisons, ScalarEvolution *SE,
-                       Loop *TheLoop, ValueToValueMap &Strides,
-                       bool ShouldCheckStride = false);
-
-  /// \brief Goes over all memory accesses, checks whether a RT check is needed
-  /// and builds sets of dependent accesses.
-  void buildDependenceSets() {
-    processMemAccesses();
-  }
-
-  bool isRTCheckNeeded() { return IsRTCheckNeeded; }
-
-  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
-  void resetDepChecks() { CheckDeps.clear(); }
-
-  MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
-
-private:
-  typedef SetVector<MemAccessInfo> PtrAccessSet;
-
-  /// \brief Go over all memory access and check whether runtime pointer checks
-  /// are needed /// and build sets of dependency check candidates.
-  void processMemAccesses();
-
-  /// Set of all accesses.
-  PtrAccessSet Accesses;
-
-  /// Set of accesses that need a further dependence check.
-  MemAccessInfoSet CheckDeps;
-
-  /// Set of pointers that are read only.
-  SmallPtrSet<Value*, 16> ReadOnlyPtr;
-
-  const DataLayout *DL;
-
-  /// An alias set tracker to partition the access set by underlying object and
-  //intrinsic property (such as TBAA metadata).
-  AliasSetTracker AST;
-
-  /// Sets of potentially dependent accesses - members of one set share an
-  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
-  /// dependence check.
-  DepCandidates &DepCands;
-
-  bool IsRTCheckNeeded;
-};
-
-} // end anonymous namespace
-
-/// \brief Check whether a pointer can participate in a runtime bounds check.
-static bool hasComputableBounds(ScalarEvolution *SE, ValueToValueMap &Strides,
-                                Value *Ptr) {
-  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
-  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
-  if (!AR)
-    return false;
-
-  return AR->isAffine();
-}
-
-/// \brief Check the stride of the pointer and ensure that it does not wrap in
-/// the address space.
-static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
-                        const Loop *Lp, ValueToValueMap &StridesMap);
-
-bool AccessAnalysis::canCheckPtrAtRT(
-    LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
-    unsigned &NumComparisons, ScalarEvolution *SE, Loop *TheLoop,
-    ValueToValueMap &StridesMap, bool ShouldCheckStride) {
-  // Find pointers with computable bounds. We are going to use this information
-  // to place a runtime bound check.
-  bool CanDoRT = true;
-
-  bool IsDepCheckNeeded = isDependencyCheckNeeded();
-  NumComparisons = 0;
-
-  // We assign a consecutive id to access from different alias sets.
-  // Accesses between different groups doesn't need to be checked.
-  unsigned ASId = 1;
-  for (auto &AS : AST) {
-    unsigned NumReadPtrChecks = 0;
-    unsigned NumWritePtrChecks = 0;
-
-    // We assign consecutive id to access from different dependence sets.
-    // Accesses within the same set don't need a runtime check.
-    unsigned RunningDepId = 1;
-    DenseMap<Value *, unsigned> DepSetId;
-
-    for (auto A : AS) {
-      Value *Ptr = A.getValue();
-      bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
-      MemAccessInfo Access(Ptr, IsWrite);
-
-      if (IsWrite)
-        ++NumWritePtrChecks;
-      else
-        ++NumReadPtrChecks;
-
-      if (hasComputableBounds(SE, StridesMap, Ptr) &&
-          // When we run after a failing dependency check we have to make sure we
-          // don't have wrapping pointers.
-          (!ShouldCheckStride ||
-           isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) {
-        // The id of the dependence set.
-        unsigned DepId;
-
-        if (IsDepCheckNeeded) {
-          Value *Leader = DepCands.getLeaderValue(Access).getPointer();
-          unsigned &LeaderId = DepSetId[Leader];
-          if (!LeaderId)
-            LeaderId = RunningDepId++;
-          DepId = LeaderId;
-        } else
-          // Each access has its own dependence set.
-          DepId = RunningDepId++;
-
-        RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
-
-        DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
-      } else {
-        CanDoRT = false;
-      }
-    }
-
-    if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
-      NumComparisons += 0; // Only one dependence set.
-    else {
-      NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks +
-                                              NumWritePtrChecks - 1));
-    }
-
-    ++ASId;
-  }
-
-  // If the pointers that we would use for the bounds comparison have different
-  // address spaces, assume the values aren't directly comparable, so we can't
-  // use them for the runtime check. We also have to assume they could
-  // overlap. In the future there should be metadata for whether address spaces
-  // are disjoint.
-  unsigned NumPointers = RtCheck.Pointers.size();
-  for (unsigned i = 0; i < NumPointers; ++i) {
-    for (unsigned j = i + 1; j < NumPointers; ++j) {
-      // Only need to check pointers between two different dependency sets.
-      if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
-       continue;
-      // Only need to check pointers in the same alias set.
-      if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
-        continue;
-
-      Value *PtrI = RtCheck.Pointers[i];
-      Value *PtrJ = RtCheck.Pointers[j];
-
-      unsigned ASi = PtrI->getType()->getPointerAddressSpace();
-      unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
-      if (ASi != ASj) {
-        DEBUG(dbgs() << "LV: Runtime check would require comparison between"
-                       " different address spaces\n");
-        return false;
-      }
-    }
-  }
-
-  return CanDoRT;
-}
-
-void AccessAnalysis::processMemAccesses() {
-  // We process the set twice: first we process read-write pointers, last we
-  // process read-only pointers. This allows us to skip dependence tests for
-  // read-only pointers.
-
-  DEBUG(dbgs() << "LV: Processing memory accesses...\n");
-  DEBUG(dbgs() << "  AST: "; AST.dump());
-  DEBUG(dbgs() << "LV:   Accesses:\n");
-  DEBUG({
-    for (auto A : Accesses)
-      dbgs() << "\t" << *A.getPointer() << " (" <<
-                (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
-                                         "read-only" : "read")) << ")\n";
-  });
-
-  // The AliasSetTracker has nicely partitioned our pointers by metadata
-  // compatibility and potential for underlying-object overlap. As a result, we
-  // only need to check for potential pointer dependencies within each alias
-  // set.
-  for (auto &AS : AST) {
-    // Note that both the alias-set tracker and the alias sets themselves used
-    // linked lists internally and so the iteration order here is deterministic
-    // (matching the original instruction order within each set).
-
-    bool SetHasWrite = false;
-
-    // Map of pointers to last access encountered.
-    typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
-    UnderlyingObjToAccessMap ObjToLastAccess;
-
-    // Set of access to check after all writes have been processed.
-    PtrAccessSet DeferredAccesses;
-
-    // Iterate over each alias set twice, once to process read/write pointers,
-    // and then to process read-only pointers.
-    for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
-      bool UseDeferred = SetIteration > 0;
-      PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
-
-      for (auto AV : AS) {
-        Value *Ptr = AV.getValue();
-
-        // For a single memory access in AliasSetTracker, Accesses may contain
-        // both read and write, and they both need to be handled for CheckDeps.
-        for (auto AC : S) {
-          if (AC.getPointer() != Ptr)
-            continue;
-
-          bool IsWrite = AC.getInt();
-
-          // If we're using the deferred access set, then it contains only
-          // reads.
-          bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
-          if (UseDeferred && !IsReadOnlyPtr)
-            continue;
-          // Otherwise, the pointer must be in the PtrAccessSet, either as a
-          // read or a write.
-          assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
-                  S.count(MemAccessInfo(Ptr, false))) &&
-                 "Alias-set pointer not in the access set?");
-
-          MemAccessInfo Access(Ptr, IsWrite);
-          DepCands.insert(Access);
-
-          // Memorize read-only pointers for later processing and skip them in
-          // the first round (they need to be checked after we have seen all
-          // write pointers). Note: we also mark pointer that are not
-          // consecutive as "read-only" pointers (so that we check
-          // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
-          if (!UseDeferred && IsReadOnlyPtr) {
-            DeferredAccesses.insert(Access);
-            continue;
-          }
-
-          // If this is a write - check other reads and writes for conflicts. If
-          // this is a read only check other writes for conflicts (but only if
-          // there is no other write to the ptr - this is an optimization to
-          // catch "a[i] = a[i] + " without having to do a dependence check).
-          if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
-            CheckDeps.insert(Access);
-            IsRTCheckNeeded = true;
-          }
-
-          if (IsWrite)
-            SetHasWrite = true;
-
-          // Create sets of pointers connected by a shared alias set and
-          // underlying object.
-          typedef SmallVector<Value *, 16> ValueVector;
-          ValueVector TempObjects;
-          GetUnderlyingObjects(Ptr, TempObjects, DL);
-          for (Value *UnderlyingObj : TempObjects) {
-            UnderlyingObjToAccessMap::iterator Prev =
-                ObjToLastAccess.find(UnderlyingObj);
-            if (Prev != ObjToLastAccess.end())
-              DepCands.unionSets(Access, Prev->second);
-
-            ObjToLastAccess[UnderlyingObj] = Access;
-          }
-        }
-      }
-    }
-  }
-}
-
-namespace {
-/// \brief Checks memory dependences among accesses to the same underlying
-/// object to determine whether there vectorization is legal or not (and at
-/// which vectorization factor).
-///
-/// This class works under the assumption that we already checked that memory
-/// locations with different underlying pointers are "must-not alias".
-/// We use the ScalarEvolution framework to symbolically evalutate access
-/// functions pairs. Since we currently don't restructure the loop we can rely
-/// on the program order of memory accesses to determine their safety.
-/// At the moment we will only deem accesses as safe for:
-///  * A negative constant distance assuming program order.
-///
-///      Safe: tmp = a[i + 1];     OR     a[i + 1] = x;
-///            a[i] = tmp;                y = a[i];
-///
-///   The latter case is safe because later checks guarantuee that there can't
-///   be a cycle through a phi node (that is, we check that "x" and "y" is not
-///   the same variable: a header phi can only be an induction or a reduction, a
-///   reduction can't have a memory sink, an induction can't have a memory
-///   source). This is important and must not be violated (or we have to
-///   resort to checking for cycles through memory).
-///
-///  * A positive constant distance assuming program order that is bigger
-///    than the biggest memory access.
-///
-///     tmp = a[i]        OR              b[i] = x
-///     a[i+2] = tmp                      y = b[i+2];
-///
-///     Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
-///
-///  * Zero distances and all accesses have the same size.
-///
-class MemoryDepChecker {
-public:
-  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
-  typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
-
-  MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L)
-      : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
-        ShouldRetryWithRuntimeCheck(false) {}
-
-  /// \brief Register the location (instructions are given increasing numbers)
-  /// of a write access.
-  void addAccess(StoreInst *SI) {
-    Value *Ptr = SI->getPointerOperand();
-    Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
-    InstMap.push_back(SI);
-    ++AccessIdx;
-  }
-
-  /// \brief Register the location (instructions are given increasing numbers)
-  /// of a write access.
-  void addAccess(LoadInst *LI) {
-    Value *Ptr = LI->getPointerOperand();
-    Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
-    InstMap.push_back(LI);
-    ++AccessIdx;
-  }
-
-  /// \brief Check whether the dependencies between the accesses are safe.
-  ///
-  /// Only checks sets with elements in \p CheckDeps.
-  bool areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
-                   MemAccessInfoSet &CheckDeps, ValueToValueMap &Strides);
-
-  /// \brief The maximum number of bytes of a vector register we can vectorize
-  /// the accesses safely with.
-  unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
-
-  /// \brief In same cases when the dependency check fails we can still
-  /// vectorize the loop with a dynamic array access check.
-  bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
-
-private:
-  ScalarEvolution *SE;
-  const DataLayout *DL;
-  const Loop *InnermostLoop;
-
-  /// \brief Maps access locations (ptr, read/write) to program order.
-  DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
-
-  /// \brief Memory access instructions in program order.
-  SmallVector<Instruction *, 16> InstMap;
-
-  /// \brief The program order index to be used for the next instruction.
-  unsigned AccessIdx;
-
-  // We can access this many bytes in parallel safely.
-  unsigned MaxSafeDepDistBytes;
-
-  /// \brief If we see a non-constant dependence distance we can still try to
-  /// vectorize this loop with runtime checks.
-  bool ShouldRetryWithRuntimeCheck;
-
-  /// \brief Check whether there is a plausible dependence between the two
-  /// accesses.
-  ///
-  /// Access \p A must happen before \p B in program order. The two indices
-  /// identify the index into the program order map.
-  ///
-  /// This function checks  whether there is a plausible dependence (or the
-  /// absence of such can't be proved) between the two accesses. If there is a
-  /// plausible dependence but the dependence distance is bigger than one
-  /// element access it records this distance in \p MaxSafeDepDistBytes (if this
-  /// distance is smaller than any other distance encountered so far).
-  /// Otherwise, this function returns true signaling a possible dependence.
-  bool isDependent(const MemAccessInfo &A, unsigned AIdx,
-                   const MemAccessInfo &B, unsigned BIdx,
-                   ValueToValueMap &Strides);
-
-  /// \brief Check whether the data dependence could prevent store-load
-  /// forwarding.
-  bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
-};
-
-} // end anonymous namespace
-
-static bool isInBoundsGep(Value *Ptr) {
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
-    return GEP->isInBounds();
-  return false;
-}
-
-/// \brief Check whether the access through \p Ptr has a constant stride.
-static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
-                        const Loop *Lp, ValueToValueMap &StridesMap) {
-  const Type *Ty = Ptr->getType();
-  assert(Ty->isPointerTy() && "Unexpected non-ptr");
-
-  // Make sure that the pointer does not point to aggregate types.
-  const PointerType *PtrTy = cast<PointerType>(Ty);
-  if (PtrTy->getElementType()->isAggregateType()) {
-    DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr <<
-          "\n");
-    return 0;
-  }
-
-  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
-
-  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
-  if (!AR) {
-    DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer "
-          << *Ptr << " SCEV: " << *PtrScev << "\n");
-    return 0;
-  }
-
-  // The accesss function must stride over the innermost loop.
-  if (Lp != AR->getLoop()) {
-    DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " <<
-          *Ptr << " SCEV: " << *PtrScev << "\n");
-  }
-
-  // The address calculation must not wrap. Otherwise, a dependence could be
-  // inverted.
-  // An inbounds getelementptr that is a AddRec with a unit stride
-  // cannot wrap per definition. The unit stride requirement is checked later.
-  // An getelementptr without an inbounds attribute and unit stride would have
-  // to access the pointer value "0" which is undefined behavior in address
-  // space 0, therefore we can also vectorize this case.
-  bool IsInBoundsGEP = isInBoundsGep(Ptr);
-  bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
-  bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
-  if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
-    DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space "
-          << *Ptr << " SCEV: " << *PtrScev << "\n");
-    return 0;
-  }
-
-  // Check the step is constant.
-  const SCEV *Step = AR->getStepRecurrence(*SE);
-
-  // Calculate the pointer stride and check if it is consecutive.
-  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
-  if (!C) {
-    DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr <<
-          " SCEV: " << *PtrScev << "\n");
-    return 0;
-  }
-
-  int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType());
-  const APInt &APStepVal = C->getValue()->getValue();
-
-  // Huge step value - give up.
-  if (APStepVal.getBitWidth() > 64)
-    return 0;
-
-  int64_t StepVal = APStepVal.getSExtValue();
-
-  // Strided access.
-  int64_t Stride = StepVal / Size;
-  int64_t Rem = StepVal % Size;
-  if (Rem)
-    return 0;
-
-  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
-  // know we can't "wrap around the address space". In case of address space
-  // zero we know that this won't happen without triggering undefined behavior.
-  if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
-      Stride != 1 && Stride != -1)
-    return 0;
-
-  return Stride;
-}
-
-bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
-                                                    unsigned TypeByteSize) {
-  // If loads occur at a distance that is not a multiple of a feasible vector
-  // factor store-load forwarding does not take place.
-  // Positive dependences might cause troubles because vectorizing them might
-  // prevent store-load forwarding making vectorized code run a lot slower.
-  //   a[i] = a[i-3] ^ a[i-8];
-  //   The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
-  //   hence on your typical architecture store-load forwarding does not take
-  //   place. Vectorizing in such cases does not make sense.
-  // Store-load forwarding distance.
-  const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
-  // Maximum vector factor.
-  unsigned MaxVFWithoutSLForwardIssues = MaxVectorWidth*TypeByteSize;
-  if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
-    MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
-
-  for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
-       vf *= 2) {
-    if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
-      MaxVFWithoutSLForwardIssues = (vf >>=1);
-      break;
-    }
-  }
-
-  if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
-    DEBUG(dbgs() << "LV: Distance " << Distance <<
-          " that could cause a store-load forwarding conflict\n");
-    return true;
-  }
-
-  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
-      MaxVFWithoutSLForwardIssues != MaxVectorWidth*TypeByteSize)
-    MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
-  return false;
-}
-
-bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
-                                   const MemAccessInfo &B, unsigned BIdx,
-                                   ValueToValueMap &Strides) {
-  assert (AIdx < BIdx && "Must pass arguments in program order");
-
-  Value *APtr = A.getPointer();
-  Value *BPtr = B.getPointer();
-  bool AIsWrite = A.getInt();
-  bool BIsWrite = B.getInt();
-
-  // Two reads are independent.
-  if (!AIsWrite && !BIsWrite)
-    return false;
-
-  // We cannot check pointers in different address spaces.
-  if (APtr->getType()->getPointerAddressSpace() !=
-      BPtr->getType()->getPointerAddressSpace())
-    return true;
-
-  const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
-  const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
-
-  int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop, Strides);
-  int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop, Strides);
-
-  const SCEV *Src = AScev;
-  const SCEV *Sink = BScev;
-
-  // If the induction step is negative we have to invert source and sink of the
-  // dependence.
-  if (StrideAPtr < 0) {
-    //Src = BScev;
-    //Sink = AScev;
-    std::swap(APtr, BPtr);
-    std::swap(Src, Sink);
-    std::swap(AIsWrite, BIsWrite);
-    std::swap(AIdx, BIdx);
-    std::swap(StrideAPtr, StrideBPtr);
-  }
-
-  const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
-
-  DEBUG(dbgs() << "LV: Src Scev: " << *Src << "Sink Scev: " << *Sink
-        << "(Induction step: " << StrideAPtr <<  ")\n");
-  DEBUG(dbgs() << "LV: Distance for " << *InstMap[AIdx] << " to "
-        << *InstMap[BIdx] << ": " << *Dist << "\n");
-
-  // Need consecutive accesses. We don't want to vectorize
-  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
-  // the address space.
-  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
-    DEBUG(dbgs() << "Non-consecutive pointer access\n");
-    return true;
-  }
-
-  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
-  if (!C) {
-    DEBUG(dbgs() << "LV: Dependence because of non-constant distance\n");
-    ShouldRetryWithRuntimeCheck = true;
-    return true;
-  }
-
-  Type *ATy = APtr->getType()->getPointerElementType();
-  Type *BTy = BPtr->getType()->getPointerElementType();
-  unsigned TypeByteSize = DL->getTypeAllocSize(ATy);
-
-  // Negative distances are not plausible dependencies.
-  const APInt &Val = C->getValue()->getValue();
-  if (Val.isNegative()) {
-    bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
-    if (IsTrueDataDependence &&
-        (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
-         ATy != BTy))
-      return true;
-
-    DEBUG(dbgs() << "LV: Dependence is negative: NoDep\n");
-    return false;
-  }
-
-  // Write to the same location with the same size.
-  // Could be improved to assert type sizes are the same (i32 == float, etc).
-  if (Val == 0) {
-    if (ATy == BTy)
-      return false;
-    DEBUG(dbgs() << "LV: Zero dependence difference but different types\n");
-    return true;
-  }
-
-  assert(Val.isStrictlyPositive() && "Expect a positive value");
-
-  // Positive distance bigger than max vectorization factor.
-  if (ATy != BTy) {
-    DEBUG(dbgs() <<
-          "LV: ReadWrite-Write positive dependency with different types\n");
-    return false;
-  }
-
-  unsigned Distance = (unsigned) Val.getZExtValue();
-
-  // Bail out early if passed-in parameters make vectorization not feasible.
-  unsigned ForcedFactor = VectorizationFactor ? VectorizationFactor : 1;
-  unsigned ForcedUnroll = VectorizationInterleave ? VectorizationInterleave : 1;
-
-  // The distance must be bigger than the size needed for a vectorized version
-  // of the operation and the size of the vectorized operation must not be
-  // bigger than the currrent maximum size.
-  if (Distance < 2*TypeByteSize ||
-      2*TypeByteSize > MaxSafeDepDistBytes ||
-      Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
-    DEBUG(dbgs() << "LV: Failure because of Positive distance "
-        << Val.getSExtValue() << '\n');
-    return true;
-  }
-
-  MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ?
-    Distance : MaxSafeDepDistBytes;
-
-  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
-  if (IsTrueDataDependence &&
-      couldPreventStoreLoadForward(Distance, TypeByteSize))
-     return true;
-
-  DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<
-        " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
-
-  return false;
-}
-
-bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
-                                   MemAccessInfoSet &CheckDeps,
-                                   ValueToValueMap &Strides) {
-
-  MaxSafeDepDistBytes = -1U;
-  while (!CheckDeps.empty()) {
-    MemAccessInfo CurAccess = *CheckDeps.begin();
-
-    // Get the relevant memory access set.
-    EquivalenceClasses<MemAccessInfo>::iterator I =
-      AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
-
-    // Check accesses within this set.
-    EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE;
-    AI = AccessSets.member_begin(I), AE = AccessSets.member_end();
-
-    // Check every access pair.
-    while (AI != AE) {
-      CheckDeps.erase(*AI);
-      EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
-      while (OI != AE) {
-        // Check every accessing instruction pair in program order.
-        for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
-             I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
-          for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
-               I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
-            if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2, Strides))
-              return false;
-            if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1, Strides))
-              return false;
-          }
-        ++OI;
-      }
-      AI++;
-    }
-  }
-  return true;
-}
-
 bool LoopVectorizationLegality::canVectorizeMemory() {
-
-  typedef SmallVector<Value*, 16> ValueVector;
-  typedef SmallPtrSet<Value*, 16> ValueSet;
-
-  // Holds the Load and Store *instructions*.
-  ValueVector Loads;
-  ValueVector Stores;
-
-  // Holds all the different accesses in the loop.
-  unsigned NumReads = 0;
-  unsigned NumReadWrites = 0;
-
-  PtrRtCheck.Pointers.clear();
-  PtrRtCheck.Need = false;
-
-  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
-  MemoryDepChecker DepChecker(SE, DL, TheLoop);
-
-  // For each block.
-  for (Loop::block_iterator bb = TheLoop->block_begin(),
-       be = TheLoop->block_end(); bb != be; ++bb) {
-
-    // Scan the BB and collect legal loads and stores.
-    for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
-         ++it) {
-
-      // If this is a load, save it. If this instruction can read from memory
-      // but is not a load, then we quit. Notice that we don't handle function
-      // calls that read or write.
-      if (it->mayReadFromMemory()) {
-        // Many math library functions read the rounding mode. We will only
-        // vectorize a loop if it contains known function calls that don't set
-        // the flag. Therefore, it is safe to ignore this read from memory.
-        CallInst *Call = dyn_cast<CallInst>(it);
-        if (Call && getIntrinsicIDForCall(Call, TLI))
-          continue;
-
-        LoadInst *Ld = dyn_cast<LoadInst>(it);
-        if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
-          emitAnalysis(Report(Ld)
-                       << "read with atomic ordering or volatile read");
-          DEBUG(dbgs() << "LV: Found a non-simple load.\n");
-          return false;
-        }
-        NumLoads++;
-        Loads.push_back(Ld);
-        DepChecker.addAccess(Ld);
-        continue;
-      }
-
-      // Save 'store' instructions. Abort if other instructions write to memory.
-      if (it->mayWriteToMemory()) {
-        StoreInst *St = dyn_cast<StoreInst>(it);
-        if (!St) {
-          emitAnalysis(Report(it) << "instruction cannot be vectorized");
-          return false;
-        }
-        if (!St->isSimple() && !IsAnnotatedParallel) {
-          emitAnalysis(Report(St)
-                       << "write with atomic ordering or volatile write");
-          DEBUG(dbgs() << "LV: Found a non-simple store.\n");
-          return false;
-        }
-        NumStores++;
-        Stores.push_back(St);
-        DepChecker.addAccess(St);
-      }
-    } // Next instr.
-  } // Next block.
-
-  // Now we have two lists that hold the loads and the stores.
-  // Next, we find the pointers that they use.
-
-  // Check if we see any stores. If there are no stores, then we don't
-  // care if the pointers are *restrict*.
-  if (!Stores.size()) {
-    DEBUG(dbgs() << "LV: Found a read-only loop!\n");
-    return true;
-  }
-
-  AccessAnalysis::DepCandidates DependentAccesses;
-  AccessAnalysis Accesses(DL, AA, DependentAccesses);
-
-  // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
-  // multiple times on the same object. If the ptr is accessed twice, once
-  // for read and once for write, it will only appear once (on the write
-  // list). This is okay, since we are going to check for conflicts between
-  // writes and between reads and writes, but not between reads and reads.
-  ValueSet Seen;
-
-  ValueVector::iterator I, IE;
-  for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
-    StoreInst *ST = cast<StoreInst>(*I);
-    Value* Ptr = ST->getPointerOperand();
-
-    if (isUniform(Ptr)) {
-      emitAnalysis(
-          Report(ST)
-          << "write to a loop invariant address could not be vectorized");
-      DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
-      return false;
-    }
-
-    // If we did *not* see this pointer before, insert it to  the read-write
-    // list. At this phase it is only a 'write' list.
-    if (Seen.insert(Ptr).second) {
-      ++NumReadWrites;
-
-      AliasAnalysis::Location Loc = AA->getLocation(ST);
-      // The TBAA metadata could have a control dependency on the predication
-      // condition, so we cannot rely on it when determining whether or not we
-      // need runtime pointer checks.
-      if (blockNeedsPredication(ST->getParent()))
-        Loc.AATags.TBAA = nullptr;
-
-      Accesses.addStore(Loc);
-    }
-  }
-
-  if (IsAnnotatedParallel) {
-    DEBUG(dbgs()
-          << "LV: A loop annotated parallel, ignore memory dependency "
-          << "checks.\n");
-    return true;
-  }
-
-  for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
-    LoadInst *LD = cast<LoadInst>(*I);
-    Value* Ptr = LD->getPointerOperand();
-    // If we did *not* see this pointer before, insert it to the
-    // read list. If we *did* see it before, then it is already in
-    // the read-write list. This allows us to vectorize expressions
-    // such as A[i] += x;  Because the address of A[i] is a read-write
-    // pointer. This only works if the index of A[i] is consecutive.
-    // If the address of i is unknown (for example A[B[i]]) then we may
-    // read a few words, modify, and write a few words, and some of the
-    // words may be written to the same address.
-    bool IsReadOnlyPtr = false;
-    if (Seen.insert(Ptr).second ||
-        !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) {
-      ++NumReads;
-      IsReadOnlyPtr = true;
-    }
-
-    AliasAnalysis::Location Loc = AA->getLocation(LD);
-    // The TBAA metadata could have a control dependency on the predication
-    // condition, so we cannot rely on it when determining whether or not we
-    // need runtime pointer checks.
-    if (blockNeedsPredication(LD->getParent()))
-      Loc.AATags.TBAA = nullptr;
-
-    Accesses.addLoad(Loc, IsReadOnlyPtr);
-  }
-
-  // If we write (or read-write) to a single destination and there are no
-  // other reads in this loop then is it safe to vectorize.
-  if (NumReadWrites == 1 && NumReads == 0) {
-    DEBUG(dbgs() << "LV: Found a write-only loop!\n");
-    return true;
-  }
-
-  // Build dependence sets and check whether we need a runtime pointer bounds
-  // check.
-  Accesses.buildDependenceSets();
-  bool NeedRTCheck = Accesses.isRTCheckNeeded();
-
-  // Find pointers with computable bounds. We are going to use this information
-  // to place a runtime bound check.
-  unsigned NumComparisons = 0;
-  bool CanDoRT = false;
-  if (NeedRTCheck)
-    CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
-                                       Strides);
-
-  DEBUG(dbgs() << "LV: We need to do " << NumComparisons <<
-        " pointer comparisons.\n");
-
-  // If we only have one set of dependences to check pointers among we don't
-  // need a runtime check.
-  if (NumComparisons == 0 && NeedRTCheck)
-    NeedRTCheck = false;
-
-  // Check that we did not collect too many pointers or found an unsizeable
-  // pointer.
-  if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
-    PtrRtCheck.reset();
-    CanDoRT = false;
-  }
-
-  if (CanDoRT) {
-    DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
-  }
-
-  if (NeedRTCheck && !CanDoRT) {
-    emitAnalysis(Report() << "cannot identify array bounds");
-    DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
-          "the array bounds.\n");
-    PtrRtCheck.reset();
-    return false;
-  }
-
-  PtrRtCheck.Need = NeedRTCheck;
-
-  bool CanVecMem = true;
-  if (Accesses.isDependencyCheckNeeded()) {
-    DEBUG(dbgs() << "LV: Checking memory dependencies\n");
-    CanVecMem = DepChecker.areDepsSafe(
-        DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
-    MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
-
-    if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
-      DEBUG(dbgs() << "LV: Retrying with memory checks\n");
-      NeedRTCheck = true;
-
-      // Clear the dependency checks. We assume they are not needed.
-      Accesses.resetDepChecks();
-
-      PtrRtCheck.reset();
-      PtrRtCheck.Need = true;
-
-      CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
-                                         TheLoop, Strides, true);
-      // Check that we did not collect too many pointers or found an unsizeable
-      // pointer.
-      if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
-        if (!CanDoRT && NumComparisons > 0)
-          emitAnalysis(Report()
-                       << "cannot check memory dependencies at runtime");
-        else
-          emitAnalysis(Report()
-                       << NumComparisons << " exceeds limit of "
-                       << RuntimeMemoryCheckThreshold
-                       << " dependent memory operations checked at runtime");
-        DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
-        PtrRtCheck.reset();
-        return false;
-      }
-
-      CanVecMem = true;
-    }
-  }
-
-  if (!CanVecMem)
-    emitAnalysis(Report() << "unsafe dependent memory operations in loop");
-
-  DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
-        " need a runtime memory check.\n");
-
-  return CanVecMem;
-}
-
-static bool hasMultipleUsesOf(Instruction *I,
-                              SmallPtrSetImpl<Instruction *> &Insts) {
-  unsigned NumUses = 0;
-  for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) {
-    if (Insts.count(dyn_cast<Instruction>(*Use)))
-      ++NumUses;
-    if (NumUses > 1)
-      return true;
-  }
-
-  return false;
-}
-
-static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set) {
-  for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
-    if (!Set.count(dyn_cast<Instruction>(*Use)))
-      return false;
-  return true;
-}
-
-bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
-                                                ReductionKind Kind) {
-  if (Phi->getNumIncomingValues() != 2)
+  LAI = &LAA->getInfo(TheLoop, Strides);
+  auto &OptionalReport = LAI->getReport();
+  if (OptionalReport)
+    emitAnalysis(VectorizationReport(*OptionalReport));
+  if (!LAI->canVectorizeMemory())
     return false;
 
-  // Reduction variables are only found in the loop header block.
-  if (Phi->getParent() != TheLoop->getHeader())
+  if (LAI->hasStoreToLoopInvariantAddress()) {
+    emitAnalysis(
+        VectorizationReport()
+        << "write to a loop invariant address could not be vectorized");
+    DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
     return false;
-
-  // Obtain the reduction start value from the value that comes from the loop
-  // preheader.
-  Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
-
-  // ExitInstruction is the single value which is used outside the loop.
-  // We only allow for a single reduction value to be used outside the loop.
-  // This includes users of the reduction, variables (which form a cycle
-  // which ends in the phi node).
-  Instruction *ExitInstruction = nullptr;
-  // Indicates that we found a reduction operation in our scan.
-  bool FoundReduxOp = false;
-
-  // We start with the PHI node and scan for all of the users of this
-  // instruction. All users must be instructions that can be used as reduction
-  // variables (such as ADD). We must have a single out-of-block user. The cycle
-  // must include the original PHI.
-  bool FoundStartPHI = false;
-
-  // To recognize min/max patterns formed by a icmp select sequence, we store
-  // the number of instruction we saw from the recognized min/max pattern,
-  //  to make sure we only see exactly the two instructions.
-  unsigned NumCmpSelectPatternInst = 0;
-  ReductionInstDesc ReduxDesc(false, nullptr);
-
-  SmallPtrSet<Instruction *, 8> VisitedInsts;
-  SmallVector<Instruction *, 8> Worklist;
-  Worklist.push_back(Phi);
-  VisitedInsts.insert(Phi);
-
-  // A value in the reduction can be used:
-  //  - By the reduction:
-  //      - Reduction operation:
-  //        - One use of reduction value (safe).
-  //        - Multiple use of reduction value (not safe).
-  //      - PHI:
-  //        - All uses of the PHI must be the reduction (safe).
-  //        - Otherwise, not safe.
-  //  - By one instruction outside of the loop (safe).
-  //  - By further instructions outside of the loop (not safe).
-  //  - By an instruction that is not part of the reduction (not safe).
-  //    This is either:
-  //      * An instruction type other than PHI or the reduction operation.
-  //      * A PHI in the header other than the initial PHI.
-  while (!Worklist.empty()) {
-    Instruction *Cur = Worklist.back();
-    Worklist.pop_back();
-
-    // No Users.
-    // If the instruction has no users then this is a broken chain and can't be
-    // a reduction variable.
-    if (Cur->use_empty())
-      return false;
-
-    bool IsAPhi = isa<PHINode>(Cur);
-
-    // A header PHI use other than the original PHI.
-    if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
-      return false;
-
-    // Reductions of instructions such as Div, and Sub is only possible if the
-    // LHS is the reduction variable.
-    if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
-        !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
-        !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
-      return false;
-
-    // Any reduction instruction must be of one of the allowed kinds.
-    ReduxDesc = isReductionInstr(Cur, Kind, ReduxDesc);
-    if (!ReduxDesc.IsReduction)
-      return false;
-
-    // A reduction operation must only have one use of the reduction value.
-    if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
-        hasMultipleUsesOf(Cur, VisitedInsts))
-      return false;
-
-    // All inputs to a PHI node must be a reduction value.
-    if(IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
-      return false;
-
-    if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(Cur) ||
-                                     isa<SelectInst>(Cur)))
-      ++NumCmpSelectPatternInst;
-    if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) ||
-                                   isa<SelectInst>(Cur)))
-      ++NumCmpSelectPatternInst;
-
-    // Check  whether we found a reduction operator.
-    FoundReduxOp |= !IsAPhi;
-
-    // Process users of current instruction. Push non-PHI nodes after PHI nodes
-    // onto the stack. This way we are going to have seen all inputs to PHI
-    // nodes once we get to them.
-    SmallVector<Instruction *, 8> NonPHIs;
-    SmallVector<Instruction *, 8> PHIs;
-    for (User *U : Cur->users()) {
-      Instruction *UI = cast<Instruction>(U);
-
-      // Check if we found the exit user.
-      BasicBlock *Parent = UI->getParent();
-      if (!TheLoop->contains(Parent)) {
-        // Exit if you find multiple outside users or if the header phi node is
-        // being used. In this case the user uses the value of the previous
-        // iteration, in which case we would loose "VF-1" iterations of the
-        // reduction operation if we vectorize.
-        if (ExitInstruction != nullptr || Cur == Phi)
-          return false;
-
-        // The instruction used by an outside user must be the last instruction
-        // before we feed back to the reduction phi. Otherwise, we loose VF-1
-        // operations on the value.
-        if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end())
-         return false;
-
-        ExitInstruction = Cur;
-        continue;
-      }
-
-      // Process instructions only once (termination). Each reduction cycle
-      // value must only be used once, except by phi nodes and min/max
-      // reductions which are represented as a cmp followed by a select.
-      ReductionInstDesc IgnoredVal(false, nullptr);
-      if (VisitedInsts.insert(UI).second) {
-        if (isa<PHINode>(UI))
-          PHIs.push_back(UI);
-        else
-          NonPHIs.push_back(UI);
-      } else if (!isa<PHINode>(UI) &&
-                 ((!isa<FCmpInst>(UI) &&
-                   !isa<ICmpInst>(UI) &&
-                   !isa<SelectInst>(UI)) ||
-                  !isMinMaxSelectCmpPattern(UI, IgnoredVal).IsReduction))
-        return false;
-
-      // Remember that we completed the cycle.
-      if (UI == Phi)
-        FoundStartPHI = true;
-    }
-    Worklist.append(PHIs.begin(), PHIs.end());
-    Worklist.append(NonPHIs.begin(), NonPHIs.end());
   }
 
-  // This means we have seen one but not the other instruction of the
-  // pattern or more than just a select and cmp.
-  if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
-      NumCmpSelectPatternInst != 2)
-    return false;
-
-  if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
+  if (LAI->getNumRuntimePointerChecks() >
+      VectorizerParams::RuntimeMemoryCheckThreshold) {
+    emitAnalysis(VectorizationReport()
+                 << LAI->getNumRuntimePointerChecks() << " exceeds limit of "
+                 << VectorizerParams::RuntimeMemoryCheckThreshold
+                 << " dependent memory operations checked at runtime");
+    DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
     return false;
-
-  // We found a reduction var if we have reached the original phi node and we
-  // only have a single instruction with out-of-loop users.
-
-  // This instruction is allowed to have out-of-loop users.
-  AllowedExit.insert(ExitInstruction);
-
-  // Save the description of this reduction variable.
-  ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
-                         ReduxDesc.MinMaxKind);
-  Reductions[Phi] = RD;
-  // We've ended the cycle. This is a reduction variable if we have an
-  // outside user and it has a binary op.
-
-  return true;
-}
-
-/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
-/// pattern corresponding to a min(X, Y) or max(X, Y).
-LoopVectorizationLegality::ReductionInstDesc
-LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
-                                                    ReductionInstDesc &Prev) {
-
-  assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
-         "Expect a select instruction");
-  Instruction *Cmp = nullptr;
-  SelectInst *Select = nullptr;
-
-  // We must handle the select(cmp()) as a single instruction. Advance to the
-  // select.
-  if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
-    if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
-      return ReductionInstDesc(false, I);
-    return ReductionInstDesc(Select, Prev.MinMaxKind);
-  }
-
-  // Only handle single use cases for now.
-  if (!(Select = dyn_cast<SelectInst>(I)))
-    return ReductionInstDesc(false, I);
-  if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
-      !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
-    return ReductionInstDesc(false, I);
-  if (!Cmp->hasOneUse())
-    return ReductionInstDesc(false, I);
-
-  Value *CmpLeft;
-  Value *CmpRight;
-
-  // Look for a min/max pattern.
-  if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, MRK_UIntMin);
-  else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, MRK_UIntMax);
-  else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, MRK_SIntMax);
-  else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, MRK_SIntMin);
-  else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, MRK_FloatMin);
-  else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, MRK_FloatMax);
-  else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, MRK_FloatMin);
-  else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return ReductionInstDesc(Select, MRK_FloatMax);
-
-  return ReductionInstDesc(false, I);
-}
-
-LoopVectorizationLegality::ReductionInstDesc
-LoopVectorizationLegality::isReductionInstr(Instruction *I,
-                                            ReductionKind Kind,
-                                            ReductionInstDesc &Prev) {
-  bool FP = I->getType()->isFloatingPointTy();
-  bool FastMath = FP && I->hasUnsafeAlgebra();
-  switch (I->getOpcode()) {
-  default:
-    return ReductionInstDesc(false, I);
-  case Instruction::PHI:
-      if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd &&
-                 Kind != RK_FloatMinMax))
-        return ReductionInstDesc(false, I);
-    return ReductionInstDesc(I, Prev.MinMaxKind);
-  case Instruction::Sub:
-  case Instruction::Add:
-    return ReductionInstDesc(Kind == RK_IntegerAdd, I);
-  case Instruction::Mul:
-    return ReductionInstDesc(Kind == RK_IntegerMult, I);
-  case Instruction::And:
-    return ReductionInstDesc(Kind == RK_IntegerAnd, I);
-  case Instruction::Or:
-    return ReductionInstDesc(Kind == RK_IntegerOr, I);
-  case Instruction::Xor:
-    return ReductionInstDesc(Kind == RK_IntegerXor, I);
-  case Instruction::FMul:
-    return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
-  case Instruction::FSub:
-  case Instruction::FAdd:
-    return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
-  case Instruction::FCmp:
-  case Instruction::ICmp:
-  case Instruction::Select:
-    if (Kind != RK_IntegerMinMax &&
-        (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
-      return ReductionInstDesc(false, I);
-    return isMinMaxSelectCmpPattern(I, Prev);
   }
+  return true;
 }
 
 LoopVectorizationLegality::InductionKind
-LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
-  Type *PhiTy = Phi->getType();
-  // We only handle integer and pointer inductions variables.
-  if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
-    return IK_NoInduction;
-
-  // Check that the PHI is consecutive.
-  const SCEV *PhiScev = SE->getSCEV(Phi);
-  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
-  if (!AR) {
-    DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
-    return IK_NoInduction;
-  }
-  const SCEV *Step = AR->getStepRecurrence(*SE);
-
-  // Integer inductions need to have a stride of one.
-  if (PhiTy->isIntegerTy()) {
-    if (Step->isOne())
-      return IK_IntInduction;
-    if (Step->isAllOnesValue())
-      return IK_ReverseIntInduction;
-    return IK_NoInduction;
-  }
-
-  // Calculate the pointer stride and check if it is consecutive.
-  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
-  if (!C)
-    return IK_NoInduction;
-
-  assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
-  Type *PointerElementType = PhiTy->getPointerElementType();
-  // The pointer stride cannot be determined if the pointer element type is not
-  // sized.
-  if (!PointerElementType->isSized())
+LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
+                                               ConstantInt *&StepValue) {
+  if (!isInductionPHI(Phi, SE, StepValue))
     return IK_NoInduction;
 
-  uint64_t Size = DL->getTypeAllocSize(PointerElementType);
-  if (C->getValue()->equalsInt(Size))
-    return IK_PtrInduction;
-  else if (C->getValue()->equalsInt(0 - Size))
-    return IK_ReversePtrInduction;
-
-  return IK_NoInduction;
+  Type *PhiTy = Phi->getType();
+  // Found an Integer induction variable.
+  if (PhiTy->isIntegerTy())
+    return IK_IntInduction;
+  // Found an Pointer induction variable.
+  return IK_PtrInduction;
 }
 
 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
@@ -5341,11 +3852,7 @@ bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
 }
 
 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB)  {
-  assert(TheLoop->contains(BB) && "Unknown block used");
-
-  // Blocks that do not dominate the latch need predication.
-  BasicBlock* Latch = TheLoop->getLoopLatch();
-  return !DT->dominates(BB, Latch);
+  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
 }
 
 bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
@@ -5421,13 +3928,17 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
   // Width 1 means no vectorize
   VectorizationFactor Factor = { 1U, 0U };
   if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
-    emitAnalysis(Report() << "runtime pointer checks needed. Enable vectorization of this loop with '#pragma clang loop vectorize(enable)' when compiling with -Os");
+    emitAnalysis(VectorizationReport() <<
+                 "runtime pointer checks needed. Enable vectorization of this "
+                 "loop with '#pragma clang loop vectorize(enable)' when "
+                 "compiling with -Os");
     DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
     return Factor;
   }
 
-  if (!EnableCondStoresVectorization && Legal->NumPredStores) {
-    emitAnalysis(Report() << "store that is conditionally executed prevents vectorization");
+  if (!EnableCondStoresVectorization && Legal->getNumPredStores()) {
+    emitAnalysis(VectorizationReport() <<
+                 "store that is conditionally executed prevents vectorization");
     DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n");
     return Factor;
   }
@@ -5462,7 +3973,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
   if (OptForSize) {
     // If we are unable to calculate the trip count then don't try to vectorize.
     if (TC < 2) {
-      emitAnalysis(Report() << "unable to calculate the loop count due to complex control flow");
+      emitAnalysis
+        (VectorizationReport() <<
+         "unable to calculate the loop count due to complex control flow");
       DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
       return Factor;
     }
@@ -5476,10 +3989,11 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
     // If the trip count that we found modulo the vectorization factor is not
     // zero then we require a tail.
     if (VF < 2) {
-      emitAnalysis(Report() << "cannot optimize for size and vectorize at the "
-                               "same time. Enable vectorization of this loop "
-                               "with '#pragma clang loop vectorize(enable)' "
-                               "when compiling with -Os");
+      emitAnalysis(VectorizationReport() <<
+                   "cannot optimize for size and vectorize at the "
+                   "same time. Enable vectorization of this loop "
+                   "with '#pragma clang loop vectorize(enable)' "
+                   "when compiling with -Os");
       DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
       return Factor;
     }
@@ -5532,6 +4046,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
 
 unsigned LoopVectorizationCostModel::getWidestType() {
   unsigned MaxWidth = 8;
+  const DataLayout &DL = TheFunction->getParent()->getDataLayout();
 
   // For each block.
   for (Loop::block_iterator bb = TheLoop->block_begin(),
@@ -5566,7 +4081,7 @@ unsigned LoopVectorizationCostModel::getWidestType() {
         continue;
 
       MaxWidth = std::max(MaxWidth,
-                          (unsigned)DL->getTypeSizeInBits(T->getScalarType()));
+                          (unsigned)DL.getTypeSizeInBits(T->getScalarType()));
     }
   }
 
@@ -5645,7 +4160,7 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
                        std::max(1U, (R.MaxLocalUsers - 1)));
 
   // Clamp the unroll factor ranges to reasonable factors.
-  unsigned MaxInterleaveSize = TTI.getMaxInterleaveFactor();
+  unsigned MaxInterleaveSize = TTI.getMaxInterleaveFactor(VF);
 
   // Check if the user has overridden the unroll max.
   if (VF == 1) {
@@ -5692,8 +4207,10 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
 
     // Unroll until store/load ports (estimated by max unroll factor) are
     // saturated.
-    unsigned StoresUF = UF / (Legal->NumStores ? Legal->NumStores : 1);
-    unsigned LoadsUF = UF /  (Legal->NumLoads ? Legal->NumLoads : 1);
+    unsigned NumStores = Legal->getNumStores();
+    unsigned NumLoads = Legal->getNumLoads();
+    unsigned StoresUF = UF / (NumStores ? NumStores : 1);
+    unsigned LoadsUF = UF /  (NumLoads ? NumLoads : 1);
 
     // If we have a scalar reduction (vector reductions are already dealt with
     // by this point), we can increase the critical path length if the loop
@@ -5716,6 +4233,14 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
     return SmallUF;
   }
 
+  // Unroll if this is a large loop (small loops are already dealt with by this
+  // point) that could benefit from interleaved unrolling.
+  bool HasReductions = (Legal->getReductionVars()->size() > 0);
+  if (TTI.enableAggressiveInterleaving(HasReductions)) {
+    DEBUG(dbgs() << "LV: Unrolling to expose ILP.\n");
+    return UF;
+  }
+
   DEBUG(dbgs() << "LV: Not Unrolling.\n");
   return 1;
 }
@@ -6053,8 +4578,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
     // Scalarized loads/stores.
     int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
     bool Reverse = ConsecutiveStride < 0;
-    unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
-    unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
+    const DataLayout &DL = I->getModule()->getDataLayout();
+    unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ValTy);
+    unsigned VectorElementSize = DL.getTypeStoreSize(VectorTy) / VF;
     if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
       bool IsComplexComputation =
         isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
@@ -6081,7 +4607,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
 
     // Wide load/stores.
     unsigned Cost = TTI.getAddressComputationCost(VectorTy);
-    Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+    if (Legal->isMaskRequired(I))
+      Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment,
+                                        AS);
+    else
+      Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
 
     if (Reverse)
       Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
@@ -6111,14 +4641,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
     return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
   }
   case Instruction::Call: {
+    bool NeedToScalarize;
     CallInst *CI = cast<CallInst>(I);
-    Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
-    assert(ID && "Not an intrinsic call!");
-    Type *RetTy = ToVectorTy(CI->getType(), VF);
-    SmallVector<Type*, 4> Tys;
-    for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
-      Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
-    return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
+    unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize);
+    if (getIntrinsicIDForCall(CI, TLI))
+      return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI));
+    return CallCost;
   }
   default: {
     // We are scalarizing the instruction. Return the cost of the scalar
@@ -6145,24 +4673,19 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   }// end of switch.
 }
 
-Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
-  if (Scalar->isVoidTy() || VF == 1)
-    return Scalar;
-  return VectorType::get(Scalar, VF);
-}
-
 char LoopVectorize::ID = 0;
 static const char lv_name[] = "Loop Vectorization";
 INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
 
 namespace llvm {
@@ -6259,7 +4782,7 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
                                ConstantInt::get(Cond[Part]->getType(), 1));
       CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
       LoopVectorBody.push_back(CondBlock);
-      VectorLp->addBasicBlockToLoop(CondBlock, LI->getBase());
+      VectorLp->addBasicBlockToLoop(CondBlock, *LI);
       // Update Builder with newly created basic block.
       Builder.SetInsertPoint(InsertPt);
     }
@@ -6285,7 +4808,7 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
       if (IfPredicateStore) {
         BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
         LoopVectorBody.push_back(NewIfBlock);
-        VectorLp->addBasicBlockToLoop(NewIfBlock, LI->getBase());
+        VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
         Builder.SetInsertPoint(InsertPt);
         Instruction *OldBr = IfBlock->getTerminator();
         BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
@@ -6310,11 +4833,10 @@ Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) {
   return V;
 }
 
-Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx,
-                                               bool Negate) {
+Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) {
   // When unrolling and the VF is 1, we only need to add a simple scalar.
   Type *ITy = Val->getType();
   assert(!ITy->isVectorTy() && "Val must be a scalar");
-  Constant *C = ConstantInt::get(ITy, StartIdx, Negate);
-  return Builder.CreateAdd(Val, C, "induction");
+  Constant *C = ConstantInt::get(ITy, StartIdx);
+  return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
 }
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bd8a4b3..504425e 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17,9 +17,9 @@
 //===----------------------------------------------------------------------===//
 #include "llvm/Transforms/Vectorize.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -75,6 +75,15 @@ static const unsigned MinVecRegSize = 128;
 
 static const unsigned RecursionMaxDepth = 12;
 
+// Limit the number of alias checks. The limit is chosen so that
+// it has no negative effect on the llvm benchmarks.
+static const unsigned AliasedCheckLimit = 10;
+
+// Another limit for the alias checks: The maximum distance between load/store
+// instructions where alias checks are done.
+// This limit is useful for very large basic blocks.
+static const unsigned MaxMemDepDistance = 160;
+
 /// \brief Predicate for the element types that the SLP vectorizer supports.
 ///
 /// The most important thing to filter here are types which are invalid in LLVM
@@ -278,104 +287,6 @@ static bool CanReuseExtract(ArrayRef<Value *> VL) {
   return true;
 }
 
-static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
-                                           SmallVectorImpl<Value *> &Left,
-                                           SmallVectorImpl<Value *> &Right) {
-
-  SmallVector<Value *, 16> OrigLeft, OrigRight;
-
-  bool AllSameOpcodeLeft = true;
-  bool AllSameOpcodeRight = true;
-  for (unsigned i = 0, e = VL.size(); i != e; ++i) {
-    Instruction *I = cast<Instruction>(VL[i]);
-    Value *V0 = I->getOperand(0);
-    Value *V1 = I->getOperand(1);
-
-    OrigLeft.push_back(V0);
-    OrigRight.push_back(V1);
-
-    Instruction *I0 = dyn_cast<Instruction>(V0);
-    Instruction *I1 = dyn_cast<Instruction>(V1);
-
-    // Check whether all operands on one side have the same opcode. In this case
-    // we want to preserve the original order and not make things worse by
-    // reordering.
-    AllSameOpcodeLeft = I0;
-    AllSameOpcodeRight = I1;
-
-    if (i && AllSameOpcodeLeft) {
-      if(Instruction *P0 = dyn_cast<Instruction>(OrigLeft[i-1])) {
-        if(P0->getOpcode() != I0->getOpcode())
-          AllSameOpcodeLeft = false;
-      } else
-        AllSameOpcodeLeft = false;
-    }
-    if (i && AllSameOpcodeRight) {
-      if(Instruction *P1 = dyn_cast<Instruction>(OrigRight[i-1])) {
-        if(P1->getOpcode() != I1->getOpcode())
-          AllSameOpcodeRight = false;
-      } else
-        AllSameOpcodeRight = false;
-    }
-
-    // Sort two opcodes. In the code below we try to preserve the ability to use
-    // broadcast of values instead of individual inserts.
-    // vl1 = load
-    // vl2 = phi
-    // vr1 = load
-    // vr2 = vr2
-    //    = vl1 x vr1
-    //    = vl2 x vr2
-    // If we just sorted according to opcode we would leave the first line in
-    // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load).
-    //    = vl1 x vr1
-    //    = vr2 x vl2
-    // Because vr2 and vr1 are from the same load we loose the opportunity of a
-    // broadcast for the packed right side in the backend: we have [vr1, vl2]
-    // instead of [vr1, vr2=vr1].
-    if (I0 && I1) {
-       if(!i && I0->getOpcode() > I1->getOpcode()) {
-         Left.push_back(I1);
-         Right.push_back(I0);
-       } else if (i && I0->getOpcode() > I1->getOpcode() && Right[i-1] != I1) {
-         // Try not to destroy a broad cast for no apparent benefit.
-         Left.push_back(I1);
-         Right.push_back(I0);
-       } else if (i && I0->getOpcode() == I1->getOpcode() && Right[i-1] ==  I0) {
-         // Try preserve broadcasts.
-         Left.push_back(I1);
-         Right.push_back(I0);
-       } else if (i && I0->getOpcode() == I1->getOpcode() && Left[i-1] == I1) {
-         // Try preserve broadcasts.
-         Left.push_back(I1);
-         Right.push_back(I0);
-       } else {
-         Left.push_back(I0);
-         Right.push_back(I1);
-       }
-       continue;
-    }
-    // One opcode, put the instruction on the right.
-    if (I0) {
-      Left.push_back(V1);
-      Right.push_back(I0);
-      continue;
-    }
-    Left.push_back(V0);
-    Right.push_back(V1);
-  }
-
-  bool LeftBroadcast = isSplat(Left);
-  bool RightBroadcast = isSplat(Right);
-
-  // Don't reorder if the operands where good to begin with.
-  if (!(LeftBroadcast || RightBroadcast) &&
-      (AllSameOpcodeRight || AllSameOpcodeLeft)) {
-    Left = OrigLeft;
-    Right = OrigRight;
-  }
-}
-
 /// \returns True if in-tree use also needs extract. This refers to
 /// possible scalar operand in vectorized instruction.
 static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
@@ -412,6 +323,17 @@ static AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) {
   return AliasAnalysis::Location();
 }
 
+/// \returns True if the instruction is not a volatile or atomic load/store.
+static bool isSimple(Instruction *I) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return LI->isSimple();
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->isSimple();
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+    return !MI->isVolatile();
+  return true;
+}
+
 /// Bottom Up SLP Vectorizer.
 class BoUpSLP {
 public:
@@ -420,11 +342,11 @@ public:
   typedef SmallPtrSet<Value *, 16> ValueSet;
   typedef SmallVector<StoreInst *, 8> StoreList;
 
-  BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl,
-          TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa,
-          LoopInfo *Li, DominatorTree *Dt, AssumptionCache *AC)
+  BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
+          TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
+          DominatorTree *Dt, AssumptionCache *AC)
       : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func),
-        SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
+        SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
         Builder(Se->getContext()) {
     CodeMetrics::collectEphemeralValues(F, AC, EphValues);
   }
@@ -461,7 +383,7 @@ public:
   }
 
   /// \returns true if the memory operations A and B are consecutive.
-  bool isConsecutiveAccess(Value *A, Value *B);
+  bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL);
 
   /// \brief Perform LICM and CSE on the newly generated gather sequences.
   void optimizeGatherSequence();
@@ -518,6 +440,16 @@ private:
   /// be beneficial even the tree height is tiny.
   bool isFullyVectorizableTinyTree();
 
+  /// \reorder commutative operands in alt shuffle if they result in
+  ///  vectorized code.
+  void reorderAltShuffleOperands(ArrayRef<Value *> VL,
+                                 SmallVectorImpl<Value *> &Left,
+                                 SmallVectorImpl<Value *> &Right);
+  /// \reorder commutative operands to get better probability of
+  /// generating vectorized code.
+  void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+                                      SmallVectorImpl<Value *> &Left,
+                                      SmallVectorImpl<Value *> &Right);
   struct TreeEntry {
     TreeEntry() : Scalars(), VectorizedValue(nullptr),
     NeedToGather(0) {}
@@ -594,7 +526,7 @@ private:
     }
     AliasAnalysis::Location Loc2 = getLocation(Inst2, AA);
     bool aliased = true;
-    if (Loc1.Ptr && Loc2.Ptr) {
+    if (Loc1.Ptr && Loc2.Ptr && isSimple(Inst1) && isSimple(Inst2)) {
       // Do the alias check.
       aliased = AA->alias(Loc1, Loc2);
     }
@@ -945,7 +877,6 @@ private:
   // Analysis and block reference.
   Function *F;
   ScalarEvolution *SE;
-  const DataLayout *DL;
   TargetTransformInfo *TTI;
   TargetLibraryInfo *TLI;
   AliasAnalysis *AA;
@@ -1198,8 +1129,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
           DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
           return;
         }
-        if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
-          if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0])) {
+        const DataLayout &DL = F->getParent()->getDataLayout();
+        if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
+          if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) {
             ++NumLoadsWantToChangeOrder;
           }
           BS.cancelScheduling(VL);
@@ -1251,7 +1183,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
     case Instruction::ICmp:
     case Instruction::FCmp: {
       // Check that all of the compares have the same predicate.
-      CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
+      CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
       Type *ComparedTy = cast<Instruction>(VL[0])->getOperand(0)->getType();
       for (unsigned i = 1, e = VL.size(); i < e; ++i) {
         CmpInst *Cmp = cast<CmpInst>(VL[i]);
@@ -1368,9 +1300,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
       return;
     }
     case Instruction::Store: {
+      const DataLayout &DL = F->getParent()->getDataLayout();
       // Check if the stores are consecutive or of we need to swizzle them.
       for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
-        if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
+        if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
           BS.cancelScheduling(VL);
           newTreeEntry(VL, false);
           DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
@@ -1451,6 +1384,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
       }
       newTreeEntry(VL, true);
       DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
+
+      // Reorder operands if reordering would enable vectorization.
+      if (isa<BinaryOperator>(VL0)) {
+        ValueList Left, Right;
+        reorderAltShuffleOperands(VL, Left, Right);
+        buildTree_rec(Left, Depth + 1);
+        buildTree_rec(Right, Depth + 1);
+        return;
+      }
+
       for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
         ValueList Operands;
         // Prepare the operand vector.
@@ -1774,7 +1717,7 @@ int BoUpSLP::getTreeCost() {
 
   // We only vectorize tiny trees if it is fully vectorizable.
   if (VectorizableTree.size() < 3 && !isFullyVectorizableTinyTree()) {
-    if (!VectorizableTree.size()) {
+    if (VectorizableTree.empty()) {
       assert(!ExternalUses.size() && "We should not have any external users");
     }
     return INT_MAX;
@@ -1847,7 +1790,7 @@ unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
   return -1;
 }
 
-bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
+bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL) {
   Value *PtrA = getPointerOperand(A);
   Value *PtrB = getPointerOperand(B);
   unsigned ASA = getAddressSpaceOperand(A);
@@ -1861,13 +1804,13 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
   if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
     return false;
 
-  unsigned PtrBitWidth = DL->getPointerSizeInBits(ASA);
+  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
   Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
-  APInt Size(PtrBitWidth, DL->getTypeStoreSize(Ty));
+  APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
 
   APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
-  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetA);
-  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetB);
+  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
+  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
 
   APInt OffsetDelta = OffsetB - OffsetA;
 
@@ -1888,6 +1831,198 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
   return X == PtrSCEVB;
 }
 
+// Reorder commutative operations in alternate shuffle if the resulting vectors
+// are consecutive loads. This would allow us to vectorize the tree.
+// If we have something like-
+// load a[0] - load b[0]
+// load b[1] + load a[1]
+// load a[2] - load b[2]
+// load a[3] + load b[3]
+// Reordering the second load b[1]  load a[1] would allow us to vectorize this
+// code.
+void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
+                                        SmallVectorImpl<Value *> &Left,
+                                        SmallVectorImpl<Value *> &Right) {
+  const DataLayout &DL = F->getParent()->getDataLayout();
+
+  // Push left and right operands of binary operation into Left and Right
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    Left.push_back(cast<Instruction>(VL[i])->getOperand(0));
+    Right.push_back(cast<Instruction>(VL[i])->getOperand(1));
+  }
+
+  // Reorder if we have a commutative operation and consecutive access
+  // are on either side of the alternate instructions.
+  for (unsigned j = 0; j < VL.size() - 1; ++j) {
+    if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
+      if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
+        Instruction *VL1 = cast<Instruction>(VL[j]);
+        Instruction *VL2 = cast<Instruction>(VL[j + 1]);
+        if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
+          std::swap(Left[j], Right[j]);
+          continue;
+        } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
+          std::swap(Left[j + 1], Right[j + 1]);
+          continue;
+        }
+        // else unchanged
+      }
+    }
+    if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
+      if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
+        Instruction *VL1 = cast<Instruction>(VL[j]);
+        Instruction *VL2 = cast<Instruction>(VL[j + 1]);
+        if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
+          std::swap(Left[j], Right[j]);
+          continue;
+        } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
+          std::swap(Left[j + 1], Right[j + 1]);
+          continue;
+        }
+        // else unchanged
+      }
+    }
+  }
+}
+
+void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+                                             SmallVectorImpl<Value *> &Left,
+                                             SmallVectorImpl<Value *> &Right) {
+
+  SmallVector<Value *, 16> OrigLeft, OrigRight;
+
+  bool AllSameOpcodeLeft = true;
+  bool AllSameOpcodeRight = true;
+  for (unsigned i = 0, e = VL.size(); i != e; ++i) {
+    Instruction *I = cast<Instruction>(VL[i]);
+    Value *VLeft = I->getOperand(0);
+    Value *VRight = I->getOperand(1);
+
+    OrigLeft.push_back(VLeft);
+    OrigRight.push_back(VRight);
+
+    Instruction *ILeft = dyn_cast<Instruction>(VLeft);
+    Instruction *IRight = dyn_cast<Instruction>(VRight);
+
+    // Check whether all operands on one side have the same opcode. In this case
+    // we want to preserve the original order and not make things worse by
+    // reordering.
+    if (i && AllSameOpcodeLeft && ILeft) {
+      if (Instruction *PLeft = dyn_cast<Instruction>(OrigLeft[i - 1])) {
+        if (PLeft->getOpcode() != ILeft->getOpcode())
+          AllSameOpcodeLeft = false;
+      } else
+        AllSameOpcodeLeft = false;
+    }
+    if (i && AllSameOpcodeRight && IRight) {
+      if (Instruction *PRight = dyn_cast<Instruction>(OrigRight[i - 1])) {
+        if (PRight->getOpcode() != IRight->getOpcode())
+          AllSameOpcodeRight = false;
+      } else
+        AllSameOpcodeRight = false;
+    }
+
+    // Sort two opcodes. In the code below we try to preserve the ability to use
+    // broadcast of values instead of individual inserts.
+    // vl1 = load
+    // vl2 = phi
+    // vr1 = load
+    // vr2 = vr2
+    //    = vl1 x vr1
+    //    = vl2 x vr2
+    // If we just sorted according to opcode we would leave the first line in
+    // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load).
+    //    = vl1 x vr1
+    //    = vr2 x vl2
+    // Because vr2 and vr1 are from the same load we loose the opportunity of a
+    // broadcast for the packed right side in the backend: we have [vr1, vl2]
+    // instead of [vr1, vr2=vr1].
+    if (ILeft && IRight) {
+      if (!i && ILeft->getOpcode() > IRight->getOpcode()) {
+        Left.push_back(IRight);
+        Right.push_back(ILeft);
+      } else if (i && ILeft->getOpcode() > IRight->getOpcode() &&
+                 Right[i - 1] != IRight) {
+        // Try not to destroy a broad cast for no apparent benefit.
+        Left.push_back(IRight);
+        Right.push_back(ILeft);
+      } else if (i && ILeft->getOpcode() == IRight->getOpcode() &&
+                 Right[i - 1] == ILeft) {
+        // Try preserve broadcasts.
+        Left.push_back(IRight);
+        Right.push_back(ILeft);
+      } else if (i && ILeft->getOpcode() == IRight->getOpcode() &&
+                 Left[i - 1] == IRight) {
+        // Try preserve broadcasts.
+        Left.push_back(IRight);
+        Right.push_back(ILeft);
+      } else {
+        Left.push_back(ILeft);
+        Right.push_back(IRight);
+      }
+      continue;
+    }
+    // One opcode, put the instruction on the right.
+    if (ILeft) {
+      Left.push_back(VRight);
+      Right.push_back(ILeft);
+      continue;
+    }
+    Left.push_back(VLeft);
+    Right.push_back(VRight);
+  }
+
+  bool LeftBroadcast = isSplat(Left);
+  bool RightBroadcast = isSplat(Right);
+
+  // If operands end up being broadcast return this operand order.
+  if (LeftBroadcast || RightBroadcast)
+    return;
+
+  // Don't reorder if the operands where good to begin.
+  if (AllSameOpcodeRight || AllSameOpcodeLeft) {
+    Left = OrigLeft;
+    Right = OrigRight;
+  }
+
+  const DataLayout &DL = F->getParent()->getDataLayout();
+
+  // Finally check if we can get longer vectorizable chain by reordering
+  // without breaking the good operand order detected above.
+  // E.g. If we have something like-
+  // load a[0]  load b[0]
+  // load b[1]  load a[1]
+  // load a[2]  load b[2]
+  // load a[3]  load b[3]
+  // Reordering the second load b[1]  load a[1] would allow us to vectorize
+  // this code and we still retain AllSameOpcode property.
+  // FIXME: This load reordering might break AllSameOpcode in some rare cases
+  // such as-
+  // add a[0],c[0]  load b[0]
+  // add a[1],c[2]  load b[1]
+  // b[2]           load b[2]
+  // add a[3],c[3]  load b[3]
+  for (unsigned j = 0; j < VL.size() - 1; ++j) {
+    if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
+      if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
+        if (isConsecutiveAccess(L, L1, DL)) {
+          std::swap(Left[j + 1], Right[j + 1]);
+          continue;
+        }
+      }
+    }
+    if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
+      if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
+        if (isConsecutiveAccess(L, L1, DL)) {
+          std::swap(Left[j + 1], Right[j + 1]);
+          continue;
+        }
+      }
+    }
+    // else unchanged
+  }
+}
+
 void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
   Instruction *VL0 = cast<Instruction>(VL[0]);
   BasicBlock::iterator NextInst = VL0;
@@ -1974,6 +2109,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     return Gather(E->Scalars, VecTy);
   }
 
+  const DataLayout &DL = F->getParent()->getDataLayout();
   unsigned Opcode = getSameOpcode(E->Scalars);
 
   switch (Opcode) {
@@ -2066,7 +2202,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       if (Value *V = alreadyVectorized(E->Scalars))
         return V;
 
-      CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
+      CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
       Value *V;
       if (Opcode == Instruction::FCmp)
         V = Builder.CreateFCmp(P0, L, R);
@@ -2170,8 +2306,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
 
       unsigned Alignment = LI->getAlignment();
       LI = Builder.CreateLoad(VecPtr);
-      if (!Alignment)
-        Alignment = DL->getABITypeAlignment(ScalarLoadTy);
+      if (!Alignment) {
+        Alignment = DL.getABITypeAlignment(ScalarLoadTy);
+      }
       LI->setAlignment(Alignment);
       E->VectorizedValue = LI;
       ++NumVectorInstructions;
@@ -2200,8 +2337,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         ExternalUses.push_back(
             ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0));
 
-      if (!Alignment)
-        Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType());
+      if (!Alignment) {
+        Alignment = DL.getABITypeAlignment(SI->getValueOperand()->getType());
+      }
       S->setAlignment(Alignment);
       E->VectorizedValue = S;
       ++NumVectorInstructions;
@@ -2227,7 +2365,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         OpVecs.push_back(OpVec);
       }
 
-      Value *V = Builder.CreateGEP(Op0, OpVecs);
+      Value *V = Builder.CreateGEP(
+          cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs);
       E->VectorizedValue = V;
       ++NumVectorInstructions;
 
@@ -2243,7 +2382,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       Intrinsic::ID IID  = Intrinsic::not_intrinsic;
       Value *ScalarArg = nullptr;
       if (CI && (FI = CI->getCalledFunction())) {
-        IID = (Intrinsic::ID) FI->getIntrinsicID();
+        IID = FI->getIntrinsicID();
       }
       std::vector<Value *> OpVecs;
       for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
@@ -2284,10 +2423,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     }
     case Instruction::ShuffleVector: {
       ValueList LHSVL, RHSVL;
-      for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
-        LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
-        RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
-      }
+      assert(isa<BinaryOperator>(VL0) && "Invalid Shuffle Vector Operand");
+      reorderAltShuffleOperands(E->Scalars, LHSVL, RHSVL);
       setInsertPointAfterBundle(E->Scalars);
 
       Value *LHS = vectorizeTree(LHSVL);
@@ -2768,23 +2905,57 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
           Instruction *SrcInst = BundleMember->Inst;
           AliasAnalysis::Location SrcLoc = getLocation(SrcInst, SLP->AA);
           bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
+          unsigned numAliased = 0;
+          unsigned DistToSrc = 1;
 
           while (DepDest) {
             assert(isInSchedulingRegion(DepDest));
-            if (SrcMayWrite || DepDest->Inst->mayWriteToMemory()) {
-              if (SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)) {
-                DepDest->MemoryDependencies.push_back(BundleMember);
-                BundleMember->Dependencies++;
-                ScheduleData *DestBundle = DepDest->FirstInBundle;
-                if (!DestBundle->IsScheduled) {
-                  BundleMember->incrementUnscheduledDeps(1);
-                }
-                if (!DestBundle->hasValidDependencies()) {
-                  WorkList.push_back(DestBundle);
-                }
+
+            // We have two limits to reduce the complexity:
+            // 1) AliasedCheckLimit: It's a small limit to reduce calls to
+            //    SLP->isAliased (which is the expensive part in this loop).
+            // 2) MaxMemDepDistance: It's for very large blocks and it aborts
+            //    the whole loop (even if the loop is fast, it's quadratic).
+            //    It's important for the loop break condition (see below) to
+            //    check this limit even between two read-only instructions.
+            if (DistToSrc >= MaxMemDepDistance ||
+                    ((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) &&
+                     (numAliased >= AliasedCheckLimit ||
+                      SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) {
+
+              // We increment the counter only if the locations are aliased
+              // (instead of counting all alias checks). This gives a better
+              // balance between reduced runtime and accurate dependencies.
+              numAliased++;
+
+              DepDest->MemoryDependencies.push_back(BundleMember);
+              BundleMember->Dependencies++;
+              ScheduleData *DestBundle = DepDest->FirstInBundle;
+              if (!DestBundle->IsScheduled) {
+                BundleMember->incrementUnscheduledDeps(1);
+              }
+              if (!DestBundle->hasValidDependencies()) {
+                WorkList.push_back(DestBundle);
               }
             }
             DepDest = DepDest->NextLoadStore;
+
+            // Example, explaining the loop break condition: Let's assume our
+            // starting instruction is i0 and MaxMemDepDistance = 3.
+            //
+            //                      +--------v--v--v
+            //             i0,i1,i2,i3,i4,i5,i6,i7,i8
+            //             +--------^--^--^
+            //
+            // MaxMemDepDistance let us stop alias-checking at i3 and we add
+            // dependencies from i0 to i3,i4,.. (even if they are not aliased).
+            // Previously we already added dependencies from i3 to i6,i7,i8
+            // (because of MaxMemDepDistance). As we added a dependency from
+            // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8
+            // and we can abort this loop at i6.
+            if (DistToSrc >= 2 * MaxMemDepDistance)
+                break;
+            DistToSrc++;
           }
         }
       }
@@ -2888,7 +3059,6 @@ struct SLPVectorizer : public FunctionPass {
   }
 
   ScalarEvolution *SE;
-  const DataLayout *DL;
   TargetTransformInfo *TTI;
   TargetLibraryInfo *TLI;
   AliasAnalysis *AA;
@@ -2901,12 +3071,11 @@ struct SLPVectorizer : public FunctionPass {
       return false;
 
     SE = &getAnalysis<ScalarEvolution>();
-    DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-    DL = DLP ? &DLP->getDataLayout() : nullptr;
-    TTI = &getAnalysis<TargetTransformInfo>();
-    TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+    TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+    TLI = TLIP ? &TLIP->getTLI() : nullptr;
     AA = &getAnalysis<AliasAnalysis>();
-    LI = &getAnalysis<LoopInfo>();
+    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
 
@@ -2918,11 +3087,6 @@ struct SLPVectorizer : public FunctionPass {
     if (!TTI->getNumberOfRegisters(true))
       return false;
 
-    // Must have DataLayout. We can't require it because some tests run w/o
-    // triple.
-    if (!DL)
-      return false;
-
     // Don't vectorize when the attribute NoImplicitFloat is used.
     if (F.hasFnAttribute(Attribute::NoImplicitFloat))
       return false;
@@ -2931,15 +3095,13 @@ struct SLPVectorizer : public FunctionPass {
 
     // Use the bottom up slp vectorizer to construct chains that start with
     // store instructions.
-    BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT, AC);
+    BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC);
 
     // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
     // delete instructions.
 
     // Scan the blocks in the function in post order.
-    for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
-         e = po_end(&F.getEntryBlock()); it != e; ++it) {
-      BasicBlock *BB = *it;
+    for (auto BB : post_order(&F.getEntryBlock())) {
       // Vectorize trees that end at stores.
       if (unsigned count = collectStores(BB, R)) {
         (void)count;
@@ -2964,10 +3126,10 @@ struct SLPVectorizer : public FunctionPass {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<ScalarEvolution>();
     AU.addRequired<AliasAnalysis>();
-    AU.addRequired<TargetTransformInfo>();
-    AU.addRequired<LoopInfo>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addPreserved<LoopInfo>();
+    AU.addPreserved<LoopInfoWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.setPreservesCFG();
   }
@@ -3014,15 +3176,11 @@ private:
 /// the WeakVH array.
 /// Vectorization of part of the VL array may cause later values in the VL array
 /// to become invalid. We track when this has happened in the WeakVH array.
-static bool hasValueBeenRAUWed(ArrayRef<Value *> &VL,
-                               SmallVectorImpl<WeakVH> &VH,
-                               unsigned SliceBegin,
-                               unsigned SliceSize) {
-  for (unsigned i = SliceBegin; i < SliceBegin + SliceSize; ++i)
-    if (VH[i] != VL[i])
-      return true;
-
-  return false;
+static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH,
+                               unsigned SliceBegin, unsigned SliceSize) {
+  VL = VL.slice(SliceBegin, SliceSize);
+  VH = VH.slice(SliceBegin, SliceSize);
+  return !std::equal(VL.begin(), VL.end(), VH.begin());
 }
 
 bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
@@ -3031,7 +3189,8 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
   DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
         << "\n");
   Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
-  unsigned Sz = DL->getTypeSizeInBits(StoreTy);
+  auto &DL = cast<StoreInst>(Chain[0])->getModule()->getDataLayout();
+  unsigned Sz = DL.getTypeSizeInBits(StoreTy);
   unsigned VF = MinVecRegSize / Sz;
 
   if (!isPowerOf2_32(Sz) || VF < 2)
@@ -3074,8 +3233,8 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
 
 bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
                                     int costThreshold, BoUpSLP &R) {
-  SetVector<Value *> Heads, Tails;
-  SmallDenseMap<Value *, Value *> ConsecutiveChain;
+  SetVector<StoreInst *> Heads, Tails;
+  SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
 
   // We may run into multiple chains that merge into a single chain. We mark the
   // stores that we vectorized so that we don't visit the same store twice.
@@ -3088,8 +3247,8 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
     for (unsigned j = 0; j < e; ++j) {
       if (i == j)
         continue;
-
-      if (R.isConsecutiveAccess(Stores[i], Stores[j])) {
+      const DataLayout &DL = Stores[i]->getModule()->getDataLayout();
+      if (R.isConsecutiveAccess(Stores[i], Stores[j], DL)) {
         Tails.insert(Stores[j]);
         Heads.insert(Stores[i]);
         ConsecutiveChain[Stores[i]] = Stores[j];
@@ -3098,7 +3257,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
   }
 
   // For stores that start but don't end a link in the chain:
-  for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end();
+  for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
        it != e; ++it) {
     if (Tails.count(*it))
       continue;
@@ -3106,7 +3265,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
     // We found a store instr that starts a chain. Now follow the chain and try
     // to vectorize it.
     BoUpSLP::ValueList Operands;
-    Value *I = *it;
+    StoreInst *I = *it;
     // Collect the chain into a list.
     while (Tails.count(I) || Heads.count(I)) {
       if (VectorizedStores.count(I))
@@ -3131,6 +3290,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
 unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
   unsigned count = 0;
   StoreRefs.clear();
+  const DataLayout &DL = BB->getModule()->getDataLayout();
   for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
     StoreInst *SI = dyn_cast<StoreInst>(it);
     if (!SI)
@@ -3176,9 +3336,10 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
     return false;
 
   unsigned Opcode0 = I0->getOpcode();
+  const DataLayout &DL = I0->getModule()->getDataLayout();
 
   Type *Ty0 = I0->getType();
-  unsigned Sz = DL->getTypeSizeInBits(Ty0);
+  unsigned Sz = DL.getTypeSizeInBits(Ty0);
   unsigned VF = MinVecRegSize / Sz;
 
   for (int i = 0, e = VL.size(); i < e; ++i) {
@@ -3380,8 +3541,7 @@ public:
     ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
 
   /// \brief Try to find a reduction tree.
-  bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B,
-                                 const DataLayout *DL) {
+  bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) {
     assert((!Phi ||
             std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
            "Thi phi needs to use the binary operator");
@@ -3406,9 +3566,10 @@ public:
     if (!isValidElementType(Ty))
       return false;
 
+    const DataLayout &DL = B->getModule()->getDataLayout();
     ReductionOpcode = B->getOpcode();
     ReducedValueOpcode = 0;
-    ReduxWidth = MinVecRegSize / DL->getTypeSizeInBits(Ty);
+    ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty);
     ReductionRoot = B;
     ReductionPHI = Phi;
 
@@ -3718,8 +3879,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
 
       // Try to match and vectorize a horizontal reduction.
       HorizontalReduction HorRdx;
-      if (ShouldVectorizeHor &&
-          HorRdx.matchAssociativeReduction(P, BI, DL) &&
+      if (ShouldVectorizeHor && HorRdx.matchAssociativeReduction(P, BI) &&
           HorRdx.tryToReduce(R, TTI)) {
         Changed = true;
         it = BB->begin();
@@ -3749,7 +3909,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
         if (BinaryOperator *BinOp =
                 dyn_cast<BinaryOperator>(SI->getValueOperand())) {
           HorizontalReduction HorRdx;
-          if (((HorRdx.matchAssociativeReduction(nullptr, BinOp, DL) &&
+          if (((HorRdx.matchAssociativeReduction(nullptr, BinOp) &&
                 HorRdx.tryToReduce(R, TTI)) ||
                tryToVectorize(BinOp, R))) {
             Changed = true;
@@ -3793,6 +3953,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
             // and the iterator may become invalid value.
             it = BB->begin();
             e = BB->end();
+            break;
           }
         }
       }
@@ -3849,7 +4010,7 @@ char SLPVectorizer::ID = 0;
 static const char lv_name[] = "SLP Vectorizer";
 INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
diff --git a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
index d459bcf..6e002fd 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -19,7 +19,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
 
 using namespace llvm;
author	dim <dim@FreeBSD.org>	2015-05-27 20:26:41 +0000
committer	dim <dim@FreeBSD.org>	2015-05-27 20:26:41 +0000
commit	5ef8fd3549d38e883a31881636be3dc2a275de20 (patch)
tree	bd13a22d9db57ccf3eddbc07b32c18109521d050 /contrib/llvm/lib
parent	77794ebe2d5718eb502c93ec32f8ccae4d8a0b7b (diff)
parent	782067d0278612ee75d024b9b135c221c327e9e8 (diff)
download	FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.zip FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.tar.gz